diff --git a/linux-6.6/rk3568_patch/kernel.patch b/linux-6.6/rk3568_patch/kernel.patch index 1ff4793f37a0f70791e6fe27773220d9080b4add..1e12a66bb8706a40a3e4796bdff677eaebe4e272 100644 --- a/linux-6.6/rk3568_patch/kernel.patch +++ b/linux-6.6/rk3568_patch/kernel.patch @@ -473,11 +473,11 @@ index 000000000..0722d0afc +}; diff --git a/Documentation/devicetree/bindings/arm/mali-utgard.txt b/Documentation/devicetree/bindings/arm/mali-utgard.txt new file mode 100644 -index 000000000..ea315345e +index 000000000..dbc69fa11 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/mali-utgard.txt @@ -0,0 +1,68 @@ -+/* ++/* + * Copyright (C) 2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 @@ -485,66 +485,66 @@ index 000000000..ea315345e + * + * A copy of the licence is included with the program, and can also be obtained from Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+* ARM Mali-300/400/450 GPU -+ -+Required properties: -+- compatible: -+ At least one of these: "arm,mali-300", "arm,mali-400", "arm,mali-450" -+ Always: "arm,mali-utgard" -+ Mali-450 can also include "arm,mali-400" as it is compatible. -+ - "arm,mali-400", "arm,mali-utgard" for any Mali-400 GPU. -+ - "arm,mali-450", "arm,mali-400", "arm,mali-utgard" for any Mali-450 GPU. -+- reg: -+ Physical base address and length of the GPU's registers. -+- interrupts: -+ - List of all Mali interrupts. -+ - This list must match the number of and the order of entries in -+ interrupt-names. -+- interrupt-names: -+ - IRQPP - Name for PP interrupts. -+ - IRQPPMMU - Name for interrupts from the PP MMU. -+ - IRQPP - Name for the PP broadcast interrupt (Mali-450 only). -+ - IRQGP - Name for the GP interrupt. -+ - IRQGPMMU - Name for the interrupt from the GP MMU. -+ - IRQPMU - Name for the PMU interrupt (If pmu is implemented in HW, it must be contained). -+ -+Optional properties: -+- pmu_domain_config: -+ - If the Mali internal PMU is present and the PMU IRQ is specified in -+ interrupt/interrupt-names ("IRQPMU").This contains the mapping of -+ Mali HW units to the PMU power domain. -+ -Mali Dynamic power domain configuration in sequence from 0-11, like: -+ . -+- pmu-switch-delay: -+ - Only needed if the power gates are connected to the PMU in a high fanout -+ network. This value is the number of Mali clock cycles it takes to -+ enable the power gates and turn on the power mesh. This value will -+ have no effect if a daisy chain implementation is used. -+ -+Platform related properties: -+- clocks: Phandle to clock for Mali utgard device. -+- clock-names: the corresponding names of clock in clocks property. -+- regulator: Phandle to regulator which is power supplier of mali device. -+ -+Example for a Mali400_MP1_PMU device: -+ -+/ { -+ ... -+ -+ gpu@12300000 { -+ compatible = "arm,mali-400", "arm,mali-utgard"; -+ reg = <0x12300000 0x30000>; -+ interrupts = <0 55 4>, <0 56 4>, <0 57 4>, <0 58 4>, <0 59 4>; -+ interrupt-names = "IRQGP", "IRQGPMMU", "IRQPP0", "IRQPPMMU0", "IRQPMU"; -+ -+ pmu_domain_config = <0x1 0x4 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x2 0x0 0x0>; -+ pmu_switch_delay = <0xff>; -+ clocks = , ; -+ clock-names = "mali_parent", "mali"; -+ vdd_g3d-supply = ; -+ }; -+} ++ */ ++* ARM Mali-300/400/450 GPU ++ ++Required properties: ++- compatible: ++ At least one of these: "arm,mali-300", "arm,mali-400", "arm,mali-450" ++ Always: "arm,mali-utgard" ++ Mali-450 can also include "arm,mali-400" as it is compatible. ++ - "arm,mali-400", "arm,mali-utgard" for any Mali-400 GPU. ++ - "arm,mali-450", "arm,mali-400", "arm,mali-utgard" for any Mali-450 GPU. ++- reg: ++ Physical base address and length of the GPU's registers. ++- interrupts: ++ - List of all Mali interrupts. ++ - This list must match the number of and the order of entries in ++ interrupt-names. ++- interrupt-names: ++ - IRQPP - Name for PP interrupts. ++ - IRQPPMMU - Name for interrupts from the PP MMU. ++ - IRQPP - Name for the PP broadcast interrupt (Mali-450 only). ++ - IRQGP - Name for the GP interrupt. ++ - IRQGPMMU - Name for the interrupt from the GP MMU. ++ - IRQPMU - Name for the PMU interrupt (If pmu is implemented in HW, it must be contained). ++ ++Optional properties: ++- pmu_domain_config: ++ - If the Mali internal PMU is present and the PMU IRQ is specified in ++ interrupt/interrupt-names ("IRQPMU").This contains the mapping of ++ Mali HW units to the PMU power domain. ++ -Mali Dynamic power domain configuration in sequence from 0-11, like: ++ . ++- pmu-switch-delay: ++ - Only needed if the power gates are connected to the PMU in a high fanout ++ network. This value is the number of Mali clock cycles it takes to ++ enable the power gates and turn on the power mesh. This value will ++ have no effect if a daisy chain implementation is used. ++ ++Platform related properties: ++- clocks: Phandle to clock for Mali utgard device. ++- clock-names: the corresponding names of clock in clocks property. ++- regulator: Phandle to regulator which is power supplier of mali device. ++ ++Example for a Mali400_MP1_PMU device: ++ ++/ { ++ ... ++ ++ gpu@12300000 { ++ compatible = "arm,mali-400", "arm,mali-utgard"; ++ reg = <0x12300000 0x30000>; ++ interrupts = <0 55 4>, <0 56 4>, <0 57 4>, <0 58 4>, <0 59 4>; ++ interrupt-names = "IRQGP", "IRQGPMMU", "IRQPP0", "IRQPPMMU0", "IRQPMU"; ++ ++ pmu_domain_config = <0x1 0x4 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x2 0x0 0x0>; ++ pmu_switch_delay = <0xff>; ++ clocks = , ; ++ clock-names = "mali_parent", "mali"; ++ vdd_g3d-supply = ; ++ }; ++} diff --git a/Documentation/devicetree/bindings/crypto/rockchip,rk3588-crypto.yaml b/Documentation/devicetree/bindings/crypto/rockchip,rk3588-crypto.yaml new file mode 100644 index 000000000..c01963413 @@ -1312,7 +1312,7 @@ index 259e59594..da0640994 100644 dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588s-rock-5a.dtb +dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3588s-orangepi-5.dtb diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts -index de0a1f2af..7d4c5324c 100644 +index 776c2236d..c8ceb5cdb 100644 --- a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts @@ -86,7 +86,7 @@ simple-audio-card,cpu { @@ -1345,10 +1345,10 @@ index 20955556b..42ce78beb 100644 pinctrl-0 = <&spi1_clk &spi1_csn0 &spi1_csn1 &spi1_miso &spi1_mosi>; #address-cells = <1>; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts b/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts -index 4237f2ee8..5d7d56728 100644 +index f57d4acd9..21e030604 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts -@@ -26,11 +26,9 @@ yt8531c: ethernet-phy@0 { +@@ -28,11 +28,9 @@ yt8531c: ethernet-phy@0 { compatible = "ethernet-phy-ieee802.3-c22"; reg = <0>; @@ -1361,6 +1361,56 @@ index 4237f2ee8..5d7d56728 100644 pinctrl-0 = <ð_phy_reset_pin>; pinctrl-names = "default"; +diff --git a/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts.orig b/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts.orig +new file mode 100644 +index 000000000..f57d4acd9 +--- /dev/null ++++ b/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts.orig +@@ -0,0 +1,44 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later OR MIT ++/* ++ * Copyright (c) 2016 Xunlong Software. Co., Ltd. ++ * (http://www.orangepi.org) ++ * ++ * Copyright (c) 2021-2023 Tianling Shen ++ */ ++ ++/dts-v1/; ++#include "rk3328-orangepi-r1-plus.dts" ++ ++/ { ++ model = "Xunlong Orange Pi R1 Plus LTS"; ++ compatible = "xunlong,orangepi-r1-plus-lts", "rockchip,rk3328"; ++}; ++ ++&gmac2io { ++ /delete-property/ tx_delay; ++ /delete-property/ rx_delay; ++ ++ phy-handle = <&yt8531c>; ++ phy-mode = "rgmii-id"; ++ ++ mdio { ++ /delete-node/ ethernet-phy@1; ++ ++ yt8531c: ethernet-phy@0 { ++ compatible = "ethernet-phy-ieee802.3-c22"; ++ reg = <0>; ++ ++ motorcomm,auto-sleep-disabled; ++ motorcomm,clk-out-frequency-hz = <125000000>; ++ motorcomm,keep-pll-enabled; ++ motorcomm,rx-clk-drv-microamp = <5020>; ++ motorcomm,rx-data-drv-microamp = <5020>; ++ ++ pinctrl-0 = <ð_phy_reset_pin>; ++ pinctrl-names = "default"; ++ reset-assert-us = <15000>; ++ reset-deassert-us = <50000>; ++ reset-gpios = <&gpio1 RK_PC2 GPIO_ACTIVE_LOW>; ++ }; ++ }; ++}; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-evb.dtsi b/arch/arm64/boot/dts/rockchip/rk3368-evb.dtsi index e47d1398a..083452c67 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-evb.dtsi @@ -1824,7 +1874,7 @@ index fee5e7111..ea403de13 100644 &gpu { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi -index aba2748fe..e089e0c26 100644 +index 35a0fb73a..a6bae0a45 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -27,7 +27,7 @@ module_led: led-0 { @@ -4644,7 +4694,7 @@ index 000000000..459fc3b6d +}; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-toybrick-mipi-tx0-beiqicloud.dtsi b/arch/arm64/boot/dts/rockchip/rk3568-toybrick-mipi-tx0-beiqicloud.dtsi new file mode 100644 -index 000000000..5267b377d +index 000000000..c9356f891 --- /dev/null +++ b/arch/arm64/boot/dts/rockchip/rk3568-toybrick-mipi-tx0-beiqicloud.dtsi @@ -0,0 +1,395 @@ @@ -4927,7 +4977,7 @@ index 000000000..5267b377d + native-mode = <&dsi0_timing0>; + + dsi0_timing0: timing0 { -+ clock-frequency = <75000000>; ++ clock-frequency = <65000000>; + hactive = <720>; + vactive = <1280>; + hfront-porch = <40>; @@ -31007,10 +31057,10 @@ index 86fa5dc7d..7ead471bb 100644 return 0; diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c -index 17a2f158a..0e87f8283 100644 +index 70320b8f1..3fb94b4d8 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c -@@ -149,6 +149,11 @@ static void hci_uart_write_work(struct work_struct *work) +@@ -151,6 +151,11 @@ static void hci_uart_write_work(struct work_struct *work) struct hci_dev *hdev = hu->hdev; struct sk_buff *skb; @@ -31022,6 +31072,941 @@ index 17a2f158a..0e87f8283 100644 /* REVISIT: should we cope with bad skbs or ->write() returning * and error value ? */ +diff --git a/drivers/bluetooth/hci_ldisc.c.orig b/drivers/bluetooth/hci_ldisc.c.orig +new file mode 100644 +index 000000000..70320b8f1 +--- /dev/null ++++ b/drivers/bluetooth/hci_ldisc.c.orig +@@ -0,0 +1,929 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later ++/* ++ * ++ * Bluetooth HCI UART driver ++ * ++ * Copyright (C) 2000-2001 Qualcomm Incorporated ++ * Copyright (C) 2002-2003 Maxim Krasnyansky ++ * Copyright (C) 2004-2005 Marcel Holtmann ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include "btintel.h" ++#include "btbcm.h" ++#include "hci_uart.h" ++ ++#define VERSION "2.3" ++ ++static const struct hci_uart_proto *hup[HCI_UART_MAX_PROTO]; ++ ++int hci_uart_register_proto(const struct hci_uart_proto *p) ++{ ++ if (p->id >= HCI_UART_MAX_PROTO) ++ return -EINVAL; ++ ++ if (hup[p->id]) ++ return -EEXIST; ++ ++ hup[p->id] = p; ++ ++ BT_INFO("HCI UART protocol %s registered", p->name); ++ ++ return 0; ++} ++ ++int hci_uart_unregister_proto(const struct hci_uart_proto *p) ++{ ++ if (p->id >= HCI_UART_MAX_PROTO) ++ return -EINVAL; ++ ++ if (!hup[p->id]) ++ return -EINVAL; ++ ++ hup[p->id] = NULL; ++ ++ return 0; ++} ++ ++static const struct hci_uart_proto *hci_uart_get_proto(unsigned int id) ++{ ++ if (id >= HCI_UART_MAX_PROTO) ++ return NULL; ++ ++ return hup[id]; ++} ++ ++static inline void hci_uart_tx_complete(struct hci_uart *hu, int pkt_type) ++{ ++ struct hci_dev *hdev = hu->hdev; ++ ++ /* Update HCI stat counters */ ++ switch (pkt_type) { ++ case HCI_COMMAND_PKT: ++ hdev->stat.cmd_tx++; ++ break; ++ ++ case HCI_ACLDATA_PKT: ++ hdev->stat.acl_tx++; ++ break; ++ ++ case HCI_SCODATA_PKT: ++ hdev->stat.sco_tx++; ++ break; ++ } ++} ++ ++static inline struct sk_buff *hci_uart_dequeue(struct hci_uart *hu) ++{ ++ struct sk_buff *skb = hu->tx_skb; ++ ++ if (!skb) { ++ percpu_down_read(&hu->proto_lock); ++ ++ if (test_bit(HCI_UART_PROTO_READY, &hu->flags) || ++ test_bit(HCI_UART_PROTO_INIT, &hu->flags)) ++ skb = hu->proto->dequeue(hu); ++ ++ percpu_up_read(&hu->proto_lock); ++ } else { ++ hu->tx_skb = NULL; ++ } ++ ++ return skb; ++} ++ ++int hci_uart_tx_wakeup(struct hci_uart *hu) ++{ ++ /* This may be called in an IRQ context, so we can't sleep. Therefore ++ * we try to acquire the lock only, and if that fails we assume the ++ * tty is being closed because that is the only time the write lock is ++ * acquired. If, however, at some point in the future the write lock ++ * is also acquired in other situations, then this must be revisited. ++ */ ++ if (!percpu_down_read_trylock(&hu->proto_lock)) ++ return 0; ++ ++ if (!test_bit(HCI_UART_PROTO_READY, &hu->flags) && ++ !test_bit(HCI_UART_PROTO_INIT, &hu->flags)) ++ goto no_schedule; ++ ++ set_bit(HCI_UART_TX_WAKEUP, &hu->tx_state); ++ if (test_and_set_bit(HCI_UART_SENDING, &hu->tx_state)) ++ goto no_schedule; ++ ++ BT_DBG(""); ++ ++ schedule_work(&hu->write_work); ++ ++no_schedule: ++ percpu_up_read(&hu->proto_lock); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(hci_uart_tx_wakeup); ++ ++static void hci_uart_write_work(struct work_struct *work) ++{ ++ struct hci_uart *hu = container_of(work, struct hci_uart, write_work); ++ struct tty_struct *tty = hu->tty; ++ struct hci_dev *hdev = hu->hdev; ++ struct sk_buff *skb; ++ ++ /* REVISIT: should we cope with bad skbs or ->write() returning ++ * and error value ? ++ */ ++ ++restart: ++ clear_bit(HCI_UART_TX_WAKEUP, &hu->tx_state); ++ ++ while ((skb = hci_uart_dequeue(hu))) { ++ int len; ++ ++ set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); ++ len = tty->ops->write(tty, skb->data, skb->len); ++ hdev->stat.byte_tx += len; ++ ++ skb_pull(skb, len); ++ if (skb->len) { ++ hu->tx_skb = skb; ++ break; ++ } ++ ++ hci_uart_tx_complete(hu, hci_skb_pkt_type(skb)); ++ kfree_skb(skb); ++ } ++ ++ clear_bit(HCI_UART_SENDING, &hu->tx_state); ++ if (test_bit(HCI_UART_TX_WAKEUP, &hu->tx_state)) ++ goto restart; ++ ++ wake_up_bit(&hu->tx_state, HCI_UART_SENDING); ++} ++ ++void hci_uart_init_work(struct work_struct *work) ++{ ++ struct hci_uart *hu = container_of(work, struct hci_uart, init_ready); ++ int err; ++ struct hci_dev *hdev; ++ ++ if (!test_and_clear_bit(HCI_UART_INIT_PENDING, &hu->hdev_flags)) ++ return; ++ ++ err = hci_register_dev(hu->hdev); ++ if (err < 0) { ++ BT_ERR("Can't register HCI device"); ++ clear_bit(HCI_UART_PROTO_READY, &hu->flags); ++ hu->proto->close(hu); ++ hdev = hu->hdev; ++ hu->hdev = NULL; ++ hci_free_dev(hdev); ++ return; ++ } ++ ++ set_bit(HCI_UART_REGISTERED, &hu->flags); ++} ++ ++int hci_uart_init_ready(struct hci_uart *hu) ++{ ++ if (!test_bit(HCI_UART_INIT_PENDING, &hu->hdev_flags)) ++ return -EALREADY; ++ ++ schedule_work(&hu->init_ready); ++ ++ return 0; ++} ++ ++int hci_uart_wait_until_sent(struct hci_uart *hu) ++{ ++ return wait_on_bit_timeout(&hu->tx_state, HCI_UART_SENDING, ++ TASK_INTERRUPTIBLE, ++ msecs_to_jiffies(2000)); ++} ++ ++/* ------- Interface to HCI layer ------ */ ++/* Reset device */ ++static int hci_uart_flush(struct hci_dev *hdev) ++{ ++ struct hci_uart *hu = hci_get_drvdata(hdev); ++ struct tty_struct *tty = hu->tty; ++ ++ BT_DBG("hdev %p tty %p", hdev, tty); ++ ++ if (hu->tx_skb) { ++ kfree_skb(hu->tx_skb); hu->tx_skb = NULL; ++ } ++ ++ /* Flush any pending characters in the driver and discipline. */ ++ tty_ldisc_flush(tty); ++ tty_driver_flush_buffer(tty); ++ ++ percpu_down_read(&hu->proto_lock); ++ ++ if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) ++ hu->proto->flush(hu); ++ ++ percpu_up_read(&hu->proto_lock); ++ ++ return 0; ++} ++ ++/* Initialize device */ ++static int hci_uart_open(struct hci_dev *hdev) ++{ ++ BT_DBG("%s %p", hdev->name, hdev); ++ ++ /* Undo clearing this from hci_uart_close() */ ++ hdev->flush = hci_uart_flush; ++ ++ return 0; ++} ++ ++/* Close device */ ++static int hci_uart_close(struct hci_dev *hdev) ++{ ++ BT_DBG("hdev %p", hdev); ++ ++ hci_uart_flush(hdev); ++ hdev->flush = NULL; ++ return 0; ++} ++ ++/* Send frames from HCI layer */ ++static int hci_uart_send_frame(struct hci_dev *hdev, struct sk_buff *skb) ++{ ++ struct hci_uart *hu = hci_get_drvdata(hdev); ++ ++ BT_DBG("%s: type %d len %d", hdev->name, hci_skb_pkt_type(skb), ++ skb->len); ++ ++ percpu_down_read(&hu->proto_lock); ++ ++ if (!test_bit(HCI_UART_PROTO_READY, &hu->flags) && ++ !test_bit(HCI_UART_PROTO_INIT, &hu->flags)) { ++ percpu_up_read(&hu->proto_lock); ++ return -EUNATCH; ++ } ++ ++ hu->proto->enqueue(hu, skb); ++ percpu_up_read(&hu->proto_lock); ++ ++ hci_uart_tx_wakeup(hu); ++ ++ return 0; ++} ++ ++/* Check the underlying device or tty has flow control support */ ++bool hci_uart_has_flow_control(struct hci_uart *hu) ++{ ++ /* serdev nodes check if the needed operations are present */ ++ if (hu->serdev) ++ return true; ++ ++ if (hu->tty->driver->ops->tiocmget && hu->tty->driver->ops->tiocmset) ++ return true; ++ ++ return false; ++} ++ ++/* Flow control or un-flow control the device */ ++void hci_uart_set_flow_control(struct hci_uart *hu, bool enable) ++{ ++ struct tty_struct *tty = hu->tty; ++ struct ktermios ktermios; ++ int status; ++ unsigned int set = 0; ++ unsigned int clear = 0; ++ ++ if (hu->serdev) { ++ serdev_device_set_flow_control(hu->serdev, !enable); ++ serdev_device_set_rts(hu->serdev, !enable); ++ return; ++ } ++ ++ if (enable) { ++ /* Disable hardware flow control */ ++ ktermios = tty->termios; ++ ktermios.c_cflag &= ~CRTSCTS; ++ tty_set_termios(tty, &ktermios); ++ BT_DBG("Disabling hardware flow control: %s", ++ (tty->termios.c_cflag & CRTSCTS) ? "failed" : "success"); ++ ++ /* Clear RTS to prevent the device from sending */ ++ /* Most UARTs need OUT2 to enable interrupts */ ++ status = tty->driver->ops->tiocmget(tty); ++ BT_DBG("Current tiocm 0x%x", status); ++ ++ set &= ~(TIOCM_OUT2 | TIOCM_RTS); ++ clear = ~set; ++ set &= TIOCM_DTR | TIOCM_RTS | TIOCM_OUT1 | ++ TIOCM_OUT2 | TIOCM_LOOP; ++ clear &= TIOCM_DTR | TIOCM_RTS | TIOCM_OUT1 | ++ TIOCM_OUT2 | TIOCM_LOOP; ++ status = tty->driver->ops->tiocmset(tty, set, clear); ++ BT_DBG("Clearing RTS: %s", status ? "failed" : "success"); ++ } else { ++ /* Set RTS to allow the device to send again */ ++ status = tty->driver->ops->tiocmget(tty); ++ BT_DBG("Current tiocm 0x%x", status); ++ ++ set |= (TIOCM_OUT2 | TIOCM_RTS); ++ clear = ~set; ++ set &= TIOCM_DTR | TIOCM_RTS | TIOCM_OUT1 | ++ TIOCM_OUT2 | TIOCM_LOOP; ++ clear &= TIOCM_DTR | TIOCM_RTS | TIOCM_OUT1 | ++ TIOCM_OUT2 | TIOCM_LOOP; ++ status = tty->driver->ops->tiocmset(tty, set, clear); ++ BT_DBG("Setting RTS: %s", status ? "failed" : "success"); ++ ++ /* Re-enable hardware flow control */ ++ ktermios = tty->termios; ++ ktermios.c_cflag |= CRTSCTS; ++ tty_set_termios(tty, &ktermios); ++ BT_DBG("Enabling hardware flow control: %s", ++ !(tty->termios.c_cflag & CRTSCTS) ? "failed" : "success"); ++ } ++} ++ ++void hci_uart_set_speeds(struct hci_uart *hu, unsigned int init_speed, ++ unsigned int oper_speed) ++{ ++ hu->init_speed = init_speed; ++ hu->oper_speed = oper_speed; ++} ++ ++void hci_uart_set_baudrate(struct hci_uart *hu, unsigned int speed) ++{ ++ struct tty_struct *tty = hu->tty; ++ struct ktermios ktermios; ++ ++ ktermios = tty->termios; ++ ktermios.c_cflag &= ~CBAUD; ++ tty_termios_encode_baud_rate(&ktermios, speed, speed); ++ ++ /* tty_set_termios() return not checked as it is always 0 */ ++ tty_set_termios(tty, &ktermios); ++ ++ BT_DBG("%s: New tty speeds: %d/%d", hu->hdev->name, ++ tty->termios.c_ispeed, tty->termios.c_ospeed); ++} ++ ++static int hci_uart_setup(struct hci_dev *hdev) ++{ ++ struct hci_uart *hu = hci_get_drvdata(hdev); ++ struct hci_rp_read_local_version *ver; ++ struct sk_buff *skb; ++ unsigned int speed; ++ int err; ++ ++ /* Init speed if any */ ++ if (hu->init_speed) ++ speed = hu->init_speed; ++ else if (hu->proto->init_speed) ++ speed = hu->proto->init_speed; ++ else ++ speed = 0; ++ ++ if (speed) ++ hci_uart_set_baudrate(hu, speed); ++ ++ /* Operational speed if any */ ++ if (hu->oper_speed) ++ speed = hu->oper_speed; ++ else if (hu->proto->oper_speed) ++ speed = hu->proto->oper_speed; ++ else ++ speed = 0; ++ ++ if (hu->proto->set_baudrate && speed) { ++ err = hu->proto->set_baudrate(hu, speed); ++ if (!err) ++ hci_uart_set_baudrate(hu, speed); ++ } ++ ++ if (hu->proto->setup) ++ return hu->proto->setup(hu); ++ ++ if (!test_bit(HCI_UART_VND_DETECT, &hu->hdev_flags)) ++ return 0; ++ ++ skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL, ++ HCI_INIT_TIMEOUT); ++ if (IS_ERR(skb)) { ++ BT_ERR("%s: Reading local version information failed (%ld)", ++ hdev->name, PTR_ERR(skb)); ++ return 0; ++ } ++ ++ if (skb->len != sizeof(*ver)) { ++ BT_ERR("%s: Event length mismatch for version information", ++ hdev->name); ++ goto done; ++ } ++ ++ ver = (struct hci_rp_read_local_version *)skb->data; ++ ++ switch (le16_to_cpu(ver->manufacturer)) { ++#ifdef CONFIG_BT_HCIUART_INTEL ++ case 2: ++ hdev->set_bdaddr = btintel_set_bdaddr; ++ btintel_check_bdaddr(hdev); ++ break; ++#endif ++#ifdef CONFIG_BT_HCIUART_BCM ++ case 15: ++ hdev->set_bdaddr = btbcm_set_bdaddr; ++ btbcm_check_bdaddr(hdev); ++ break; ++#endif ++ default: ++ break; ++ } ++ ++done: ++ kfree_skb(skb); ++ return 0; ++} ++ ++/* ------ LDISC part ------ */ ++/* hci_uart_tty_open ++ * ++ * Called when line discipline changed to HCI_UART. ++ * ++ * Arguments: ++ * tty pointer to tty info structure ++ * Return Value: ++ * 0 if success, otherwise error code ++ */ ++static int hci_uart_tty_open(struct tty_struct *tty) ++{ ++ struct hci_uart *hu; ++ ++ BT_DBG("tty %p", tty); ++ ++ if (!capable(CAP_NET_ADMIN)) ++ return -EPERM; ++ ++ /* Error if the tty has no write op instead of leaving an exploitable ++ * hole ++ */ ++ if (tty->ops->write == NULL) ++ return -EOPNOTSUPP; ++ ++ hu = kzalloc(sizeof(struct hci_uart), GFP_KERNEL); ++ if (!hu) { ++ BT_ERR("Can't allocate control structure"); ++ return -ENFILE; ++ } ++ if (percpu_init_rwsem(&hu->proto_lock)) { ++ BT_ERR("Can't allocate semaphore structure"); ++ kfree(hu); ++ return -ENOMEM; ++ } ++ ++ tty->disc_data = hu; ++ hu->tty = tty; ++ tty->receive_room = 65536; ++ ++ /* disable alignment support by default */ ++ hu->alignment = 1; ++ hu->padding = 0; ++ ++ INIT_WORK(&hu->init_ready, hci_uart_init_work); ++ INIT_WORK(&hu->write_work, hci_uart_write_work); ++ ++ /* Flush any pending characters in the driver */ ++ tty_driver_flush_buffer(tty); ++ ++ return 0; ++} ++ ++/* hci_uart_tty_close() ++ * ++ * Called when the line discipline is changed to something ++ * else, the tty is closed, or the tty detects a hangup. ++ */ ++static void hci_uart_tty_close(struct tty_struct *tty) ++{ ++ struct hci_uart *hu = tty->disc_data; ++ struct hci_dev *hdev; ++ ++ BT_DBG("tty %p", tty); ++ ++ /* Detach from the tty */ ++ tty->disc_data = NULL; ++ ++ if (!hu) ++ return; ++ ++ hdev = hu->hdev; ++ if (hdev) ++ hci_uart_close(hdev); ++ ++ if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) { ++ percpu_down_write(&hu->proto_lock); ++ clear_bit(HCI_UART_PROTO_READY, &hu->flags); ++ percpu_up_write(&hu->proto_lock); ++ ++ cancel_work_sync(&hu->init_ready); ++ cancel_work_sync(&hu->write_work); ++ ++ if (hdev) { ++ if (test_bit(HCI_UART_REGISTERED, &hu->flags)) ++ hci_unregister_dev(hdev); ++ hci_free_dev(hdev); ++ } ++ hu->proto->close(hu); ++ } ++ clear_bit(HCI_UART_PROTO_SET, &hu->flags); ++ ++ percpu_free_rwsem(&hu->proto_lock); ++ ++ kfree(hu); ++} ++ ++/* hci_uart_tty_wakeup() ++ * ++ * Callback for transmit wakeup. Called when low level ++ * device driver can accept more send data. ++ * ++ * Arguments: tty pointer to associated tty instance data ++ * Return Value: None ++ */ ++static void hci_uart_tty_wakeup(struct tty_struct *tty) ++{ ++ struct hci_uart *hu = tty->disc_data; ++ ++ BT_DBG(""); ++ ++ if (!hu) ++ return; ++ ++ clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); ++ ++ if (tty != hu->tty) ++ return; ++ ++ if (test_bit(HCI_UART_PROTO_READY, &hu->flags) || ++ test_bit(HCI_UART_PROTO_INIT, &hu->flags)) ++ hci_uart_tx_wakeup(hu); ++} ++ ++/* hci_uart_tty_receive() ++ * ++ * Called by tty low level driver when receive data is ++ * available. ++ * ++ * Arguments: tty pointer to tty isntance data ++ * data pointer to received data ++ * flags pointer to flags for data ++ * count count of received data in bytes ++ * ++ * Return Value: None ++ */ ++static void hci_uart_tty_receive(struct tty_struct *tty, const u8 *data, ++ const u8 *flags, size_t count) ++{ ++ struct hci_uart *hu = tty->disc_data; ++ ++ if (!hu || tty != hu->tty) ++ return; ++ ++ percpu_down_read(&hu->proto_lock); ++ ++ if (!test_bit(HCI_UART_PROTO_READY, &hu->flags) && ++ !test_bit(HCI_UART_PROTO_INIT, &hu->flags)) { ++ percpu_up_read(&hu->proto_lock); ++ return; ++ } ++ ++ /* It does not need a lock here as it is already protected by a mutex in ++ * tty caller ++ */ ++ hu->proto->recv(hu, data, count); ++ percpu_up_read(&hu->proto_lock); ++ ++ if (hu->hdev) ++ hu->hdev->stat.byte_rx += count; ++ ++ tty_unthrottle(tty); ++} ++ ++static int hci_uart_register_dev(struct hci_uart *hu) ++{ ++ struct hci_dev *hdev; ++ int err; ++ ++ BT_DBG(""); ++ ++ /* Initialize and register HCI device */ ++ hdev = hci_alloc_dev(); ++ if (!hdev) { ++ BT_ERR("Can't allocate HCI device"); ++ return -ENOMEM; ++ } ++ ++ hu->hdev = hdev; ++ ++ hdev->bus = HCI_UART; ++ hci_set_drvdata(hdev, hu); ++ ++ /* Only when vendor specific setup callback is provided, consider ++ * the manufacturer information valid. This avoids filling in the ++ * value for Ericsson when nothing is specified. ++ */ ++ if (hu->proto->setup) ++ hdev->manufacturer = hu->proto->manufacturer; ++ ++ hdev->open = hci_uart_open; ++ hdev->close = hci_uart_close; ++ hdev->flush = hci_uart_flush; ++ hdev->send = hci_uart_send_frame; ++ hdev->setup = hci_uart_setup; ++ SET_HCIDEV_DEV(hdev, hu->tty->dev); ++ ++ if (test_bit(HCI_UART_RAW_DEVICE, &hu->hdev_flags)) ++ set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); ++ ++ if (test_bit(HCI_UART_EXT_CONFIG, &hu->hdev_flags)) ++ set_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks); ++ ++ if (!test_bit(HCI_UART_RESET_ON_INIT, &hu->hdev_flags)) ++ set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); ++ ++ /* Only call open() for the protocol after hdev is fully initialized as ++ * open() (or a timer/workqueue it starts) may attempt to reference it. ++ */ ++ err = hu->proto->open(hu); ++ if (err) { ++ hu->hdev = NULL; ++ hci_free_dev(hdev); ++ return err; ++ } ++ ++ if (test_bit(HCI_UART_INIT_PENDING, &hu->hdev_flags)) ++ return 0; ++ ++ if (hci_register_dev(hdev) < 0) { ++ BT_ERR("Can't register HCI device"); ++ hu->proto->close(hu); ++ hu->hdev = NULL; ++ hci_free_dev(hdev); ++ return -ENODEV; ++ } ++ ++ set_bit(HCI_UART_REGISTERED, &hu->flags); ++ ++ return 0; ++} ++ ++static int hci_uart_set_proto(struct hci_uart *hu, int id) ++{ ++ const struct hci_uart_proto *p; ++ int err; ++ ++ p = hci_uart_get_proto(id); ++ if (!p) ++ return -EPROTONOSUPPORT; ++ ++ hu->proto = p; ++ ++ set_bit(HCI_UART_PROTO_INIT, &hu->flags); ++ ++ err = hci_uart_register_dev(hu); ++ if (err) { ++ return err; ++ } ++ ++ set_bit(HCI_UART_PROTO_READY, &hu->flags); ++ clear_bit(HCI_UART_PROTO_INIT, &hu->flags); ++ ++ return 0; ++} ++ ++static int hci_uart_set_flags(struct hci_uart *hu, unsigned long flags) ++{ ++ unsigned long valid_flags = BIT(HCI_UART_RAW_DEVICE) | ++ BIT(HCI_UART_RESET_ON_INIT) | ++ BIT(HCI_UART_INIT_PENDING) | ++ BIT(HCI_UART_EXT_CONFIG) | ++ BIT(HCI_UART_VND_DETECT); ++ ++ if (flags & ~valid_flags) ++ return -EINVAL; ++ ++ hu->hdev_flags = flags; ++ ++ return 0; ++} ++ ++/* hci_uart_tty_ioctl() ++ * ++ * Process IOCTL system call for the tty device. ++ * ++ * Arguments: ++ * ++ * tty pointer to tty instance data ++ * cmd IOCTL command code ++ * arg argument for IOCTL call (cmd dependent) ++ * ++ * Return Value: Command dependent ++ */ ++static int hci_uart_tty_ioctl(struct tty_struct *tty, unsigned int cmd, ++ unsigned long arg) ++{ ++ struct hci_uart *hu = tty->disc_data; ++ int err = 0; ++ ++ BT_DBG(""); ++ ++ /* Verify the status of the device */ ++ if (!hu) ++ return -EBADF; ++ ++ switch (cmd) { ++ case HCIUARTSETPROTO: ++ if (!test_and_set_bit(HCI_UART_PROTO_SET, &hu->flags)) { ++ err = hci_uart_set_proto(hu, arg); ++ if (err) ++ clear_bit(HCI_UART_PROTO_SET, &hu->flags); ++ } else ++ err = -EBUSY; ++ break; ++ ++ case HCIUARTGETPROTO: ++ if (test_bit(HCI_UART_PROTO_SET, &hu->flags) && ++ test_bit(HCI_UART_PROTO_READY, &hu->flags)) ++ err = hu->proto->id; ++ else ++ err = -EUNATCH; ++ break; ++ ++ case HCIUARTGETDEVICE: ++ if (test_bit(HCI_UART_REGISTERED, &hu->flags)) ++ err = hu->hdev->id; ++ else ++ err = -EUNATCH; ++ break; ++ ++ case HCIUARTSETFLAGS: ++ if (test_bit(HCI_UART_PROTO_SET, &hu->flags)) ++ err = -EBUSY; ++ else ++ err = hci_uart_set_flags(hu, arg); ++ break; ++ ++ case HCIUARTGETFLAGS: ++ err = hu->hdev_flags; ++ break; ++ ++ default: ++ err = n_tty_ioctl_helper(tty, cmd, arg); ++ break; ++ } ++ ++ return err; ++} ++ ++/* ++ * We don't provide read/write/poll interface for user space. ++ */ ++static ssize_t hci_uart_tty_read(struct tty_struct *tty, struct file *file, ++ u8 *buf, size_t nr, void **cookie, ++ unsigned long offset) ++{ ++ return 0; ++} ++ ++static ssize_t hci_uart_tty_write(struct tty_struct *tty, struct file *file, ++ const u8 *data, size_t count) ++{ ++ return 0; ++} ++ ++static struct tty_ldisc_ops hci_uart_ldisc = { ++ .owner = THIS_MODULE, ++ .num = N_HCI, ++ .name = "n_hci", ++ .open = hci_uart_tty_open, ++ .close = hci_uart_tty_close, ++ .read = hci_uart_tty_read, ++ .write = hci_uart_tty_write, ++ .ioctl = hci_uart_tty_ioctl, ++ .compat_ioctl = hci_uart_tty_ioctl, ++ .receive_buf = hci_uart_tty_receive, ++ .write_wakeup = hci_uart_tty_wakeup, ++}; ++ ++static int __init hci_uart_init(void) ++{ ++ int err; ++ ++ BT_INFO("HCI UART driver ver %s", VERSION); ++ ++ /* Register the tty discipline */ ++ err = tty_register_ldisc(&hci_uart_ldisc); ++ if (err) { ++ BT_ERR("HCI line discipline registration failed. (%d)", err); ++ return err; ++ } ++ ++#ifdef CONFIG_BT_HCIUART_H4 ++ h4_init(); ++#endif ++#ifdef CONFIG_BT_HCIUART_BCSP ++ bcsp_init(); ++#endif ++#ifdef CONFIG_BT_HCIUART_LL ++ ll_init(); ++#endif ++#ifdef CONFIG_BT_HCIUART_ATH3K ++ ath_init(); ++#endif ++#ifdef CONFIG_BT_HCIUART_3WIRE ++ h5_init(); ++#endif ++#ifdef CONFIG_BT_HCIUART_INTEL ++ intel_init(); ++#endif ++#ifdef CONFIG_BT_HCIUART_BCM ++ bcm_init(); ++#endif ++#ifdef CONFIG_BT_HCIUART_QCA ++ qca_init(); ++#endif ++#ifdef CONFIG_BT_HCIUART_AG6XX ++ ag6xx_init(); ++#endif ++#ifdef CONFIG_BT_HCIUART_MRVL ++ mrvl_init(); ++#endif ++ ++ return 0; ++} ++ ++static void __exit hci_uart_exit(void) ++{ ++#ifdef CONFIG_BT_HCIUART_H4 ++ h4_deinit(); ++#endif ++#ifdef CONFIG_BT_HCIUART_BCSP ++ bcsp_deinit(); ++#endif ++#ifdef CONFIG_BT_HCIUART_LL ++ ll_deinit(); ++#endif ++#ifdef CONFIG_BT_HCIUART_ATH3K ++ ath_deinit(); ++#endif ++#ifdef CONFIG_BT_HCIUART_3WIRE ++ h5_deinit(); ++#endif ++#ifdef CONFIG_BT_HCIUART_INTEL ++ intel_deinit(); ++#endif ++#ifdef CONFIG_BT_HCIUART_BCM ++ bcm_deinit(); ++#endif ++#ifdef CONFIG_BT_HCIUART_QCA ++ qca_deinit(); ++#endif ++#ifdef CONFIG_BT_HCIUART_AG6XX ++ ag6xx_deinit(); ++#endif ++#ifdef CONFIG_BT_HCIUART_MRVL ++ mrvl_deinit(); ++#endif ++ ++ tty_unregister_ldisc(&hci_uart_ldisc); ++} ++ ++module_init(hci_uart_init); ++module_exit(hci_uart_exit); ++ ++MODULE_AUTHOR("Marcel Holtmann "); ++MODULE_DESCRIPTION("Bluetooth HCI UART driver ver " VERSION); ++MODULE_VERSION(VERSION); ++MODULE_LICENSE("GPL"); ++MODULE_ALIAS_LDISC(N_HCI); diff --git a/drivers/bluetooth/rtk_btusb.c b/drivers/bluetooth/rtk_btusb.c new file mode 100644 index 000000000..5db188416 @@ -37118,10 +38103,10 @@ index 642838076..5e3c95eed 100644 if (gate->lock) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c -index 5bbd036f5..7dedc37b5 100644 +index 8474099e2..22040720c 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c -@@ -5545,3 +5545,274 @@ void __init of_clk_init(const struct of_device_id *matches) +@@ -5549,3 +5549,274 @@ void __init of_clk_init(const struct of_device_id *matches) } } #endif @@ -37396,11254 +38381,10431 @@ index 5bbd036f5..7dedc37b5 100644 +} +late_initcall_sync(clk_create_procfs); +#endif -diff --git a/drivers/clk/rockchip-oh/Kconfig b/drivers/clk/rockchip-oh/Kconfig +diff --git a/drivers/clk/clk.c.orig b/drivers/clk/clk.c.orig new file mode 100644 -index 000000000..c4704da18 +index 000000000..8474099e2 --- /dev/null -+++ b/drivers/clk/rockchip-oh/Kconfig -@@ -0,0 +1,212 @@ -+# SPDX-License-Identifier: GPL-2.0 -+# common clock support for ROCKCHIP SoC family. ++++ b/drivers/clk/clk.c.orig +@@ -0,0 +1,5551 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (C) 2010-2011 Canonical Ltd ++ * Copyright (C) 2011-2012 Linaro Ltd ++ * ++ * Standard functionality for the common clock API. See Documentation/driver-api/clk.rst ++ */ + -+config COMMON_CLK_ROCKCHIP -+ tristate "Rockchip clock controller common support" -+ depends on ARCH_ROCKCHIP -+ default ARCH_ROCKCHIP -+ help -+ Say y here to enable common clock controller for Rockchip platforms. ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+if COMMON_CLK_ROCKCHIP -+config CLK_PX30 -+ tristate "Rockchip PX30 clock controller support" -+ depends on CPU_PX30 || COMPILE_TEST -+ default y -+ help -+ Build the driver for PX30 Clock Driver. ++#include "clk.h" + -+config CLK_RV1106 -+ tristate "Rockchip RV1106 clock controller support" -+ depends on CPU_RV1106 || COMPILE_TEST -+ default y -+ help -+ Build the driver for RV1106 Clock Driver. ++static DEFINE_SPINLOCK(enable_lock); ++static DEFINE_MUTEX(prepare_lock); + -+config CLK_RV1108 -+ tristate "Rockchip RV1108 clock controller support" -+ depends on CPU_RV1108 || COMPILE_TEST -+ default y -+ help -+ Build the driver for RV1108 Clock Driver. ++static struct task_struct *prepare_owner; ++static struct task_struct *enable_owner; + -+config CLK_RV1126 -+ tristate "Rockchip RV1126 clock controller support" -+ depends on CPU_RV1126 || COMPILE_TEST -+ default y -+ help -+ Build the driver for RV1126 Clock Driver. ++static int prepare_refcnt; ++static int enable_refcnt; + -+config CLK_RK1808 -+ tristate "Rockchip RK1808 clock controller support" -+ depends on CPU_RK1808 || COMPILE_TEST -+ default y -+ help -+ Build the driver for RK1808 Clock Driver. ++static HLIST_HEAD(clk_root_list); ++static HLIST_HEAD(clk_orphan_list); ++static LIST_HEAD(clk_notifier_list); + -+config CLK_RK3036 -+ tristate "Rockchip RK3036 clock controller support" -+ depends on CPU_RK3036 || COMPILE_TEST -+ default y -+ help -+ Build the driver for RK3036 Clock Driver. ++/* List of registered clks that use runtime PM */ ++static HLIST_HEAD(clk_rpm_list); ++static DEFINE_MUTEX(clk_rpm_list_lock); + -+config CLK_RK312X -+ tristate "Rockchip RK312x clock controller support" -+ depends on CPU_RK312X || COMPILE_TEST -+ default y -+ help -+ Build the driver for RK312x Clock Driver. ++static const struct hlist_head *all_lists[] = { ++ &clk_root_list, ++ &clk_orphan_list, ++ NULL, ++}; + -+config CLK_RK3188 -+ tristate "Rockchip RK3188 clock controller support" -+ depends on CPU_RK3188 || COMPILE_TEST -+ default y -+ help -+ Build the driver for RK3188 Clock Driver. ++/*** private data structures ***/ + -+config CLK_RK322X -+ tristate "Rockchip RK322x clock controller support" -+ depends on CPU_RK322X || COMPILE_TEST -+ default y -+ help -+ Build the driver for RK322x Clock Driver. ++struct clk_parent_map { ++ const struct clk_hw *hw; ++ struct clk_core *core; ++ const char *fw_name; ++ const char *name; ++ int index; ++}; + -+config CLK_RK3288 -+ tristate "Rockchip RK3288 clock controller support" -+ depends on CPU_RK3288 || COMPILE_TEST -+ default y -+ help -+ Build the driver for RK3288 Clock Driver. ++struct clk_core { ++ const char *name; ++ const struct clk_ops *ops; ++ struct clk_hw *hw; ++ struct module *owner; ++ struct device *dev; ++ struct hlist_node rpm_node; ++ struct device_node *of_node; ++ struct clk_core *parent; ++ struct clk_parent_map *parents; ++ u8 num_parents; ++ u8 new_parent_index; ++ unsigned long rate; ++ unsigned long req_rate; ++ unsigned long new_rate; ++ struct clk_core *new_parent; ++ struct clk_core *new_child; ++ unsigned long flags; ++ bool orphan; ++ bool rpm_enabled; ++ unsigned int enable_count; ++ unsigned int prepare_count; ++ unsigned int protect_count; ++ unsigned long min_rate; ++ unsigned long max_rate; ++ unsigned long accuracy; ++ int phase; ++ struct clk_duty duty; ++ struct hlist_head children; ++ struct hlist_node child_node; ++ struct hlist_head clks; ++ unsigned int notifier_count; ++#ifdef CONFIG_DEBUG_FS ++ struct dentry *dentry; ++ struct hlist_node debug_node; ++#endif ++ struct kref ref; ++}; + -+config CLK_RK3308 -+ tristate "Rockchip RK3308 clock controller support" -+ depends on CPU_RK3308 || COMPILE_TEST -+ default y -+ help -+ Build the driver for RK3308 Clock Driver. ++#define CREATE_TRACE_POINTS ++#include + -+config CLK_RK3328 -+ tristate "Rockchip RK3328 clock controller support" -+ depends on CPU_RK3328 || COMPILE_TEST -+ default y -+ help -+ Build the driver for RK3328 Clock Driver. ++struct clk { ++ struct clk_core *core; ++ struct device *dev; ++ const char *dev_id; ++ const char *con_id; ++ unsigned long min_rate; ++ unsigned long max_rate; ++ unsigned int exclusive_count; ++ struct hlist_node clks_node; ++}; + -+config CLK_RK3368 -+ tristate "Rockchip RK3368 clock controller support" -+ depends on CPU_RK3368 || COMPILE_TEST -+ default y -+ help -+ Build the driver for RK3368 Clock Driver. ++/*** runtime pm ***/ ++static int clk_pm_runtime_get(struct clk_core *core) ++{ ++ if (!core->rpm_enabled) ++ return 0; + -+config CLK_RK3399 -+ tristate "Rockchip RK3399 clock controller support" -+ depends on CPU_RK3399 || COMPILE_TEST -+ default y -+ help -+ Build the driver for RK3399 Clock Driver. ++ return pm_runtime_resume_and_get(core->dev); ++} + -+config CLK_RK3528 -+ tristate "Rockchip RK3528 clock controller support" -+ depends on CPU_RK3528 || COMPILE_TEST -+ default y -+ help -+ Build the driver for RK3528 Clock Driver. ++static void clk_pm_runtime_put(struct clk_core *core) ++{ ++ if (!core->rpm_enabled) ++ return; + -+config CLK_RK3562 -+ tristate "Rockchip RK3562 clock controller support" -+ depends on CPU_RK3562 || COMPILE_TEST -+ default y -+ help -+ Build the driver for RK3562 Clock Driver. ++ pm_runtime_put_sync(core->dev); ++} + -+config CLK_RK3568 -+ tristate "Rockchip RK3568 clock controller support" -+ depends on CPU_RK3568 || COMPILE_TEST -+ default y -+ help -+ Build the driver for RK3568 Clock Driver. ++/** ++ * clk_pm_runtime_get_all() - Runtime "get" all clk provider devices ++ * ++ * Call clk_pm_runtime_get() on all runtime PM enabled clks in the clk tree so ++ * that disabling unused clks avoids a deadlock where a device is runtime PM ++ * resuming/suspending and the runtime PM callback is trying to grab the ++ * prepare_lock for something like clk_prepare_enable() while ++ * clk_disable_unused_subtree() holds the prepare_lock and is trying to runtime ++ * PM resume/suspend the device as well. ++ * ++ * Context: Acquires the 'clk_rpm_list_lock' and returns with the lock held on ++ * success. Otherwise the lock is released on failure. ++ * ++ * Return: 0 on success, negative errno otherwise. ++ */ ++static int clk_pm_runtime_get_all(void) ++{ ++ int ret; ++ struct clk_core *core, *failed; + -+config CLK_RK3588 -+ tristate "Rockchip RK3588 clock controller support" -+ depends on CPU_RK3588 || COMPILE_TEST -+ default y -+ help -+ Build the driver for RK3588 Clock Driver. ++ /* ++ * Grab the list lock to prevent any new clks from being registered ++ * or unregistered until clk_pm_runtime_put_all(). ++ */ ++ mutex_lock(&clk_rpm_list_lock); + -+config ROCKCHIP_CLK_COMPENSATION -+ bool "Rockchip Clk Compensation" -+ help -+ Say y here to enable clk compensation(+/- 1000 ppm). ++ /* ++ * Runtime PM "get" all the devices that are needed for the clks ++ * currently registered. Do this without holding the prepare_lock, to ++ * avoid the deadlock. ++ */ ++ hlist_for_each_entry(core, &clk_rpm_list, rpm_node) { ++ ret = clk_pm_runtime_get(core); ++ if (ret) { ++ failed = core; ++ pr_err("clk: Failed to runtime PM get '%s' for clk '%s'\n", ++ dev_name(failed->dev), failed->name); ++ goto err; ++ } ++ } + -+config ROCKCHIP_CLK_LINK -+ tristate "Rockchip clock link support" -+ default CLK_RK3562 || CLK_RK3588 -+ help -+ Say y here to enable clock link for Rockchip. ++ return 0; + -+config ROCKCHIP_CLK_BOOST -+ bool "Rockchip Clk Boost" -+ default y if CPU_PX30 -+ help -+ Say y here to enable clk boost. ++err: ++ hlist_for_each_entry(core, &clk_rpm_list, rpm_node) { ++ if (core == failed) ++ break; + -+config ROCKCHIP_CLK_INV -+ bool "Rockchip Clk Inverter" -+ default y if !CPU_RV1126 && !CPU_RV1106 -+ help -+ Say y here to enable clk Inverter. ++ clk_pm_runtime_put(core); ++ } ++ mutex_unlock(&clk_rpm_list_lock); + -+config ROCKCHIP_CLK_OUT -+ tristate "Rockchip Clk Out / Input Switch" -+ default y if !ROCKCHIP_MINI_KERNEL -+ help -+ Say y here to enable clk out / input switch. ++ return ret; ++} + -+config ROCKCHIP_CLK_PVTM -+ bool "Rockchip Clk Pvtm" -+ default y if !CPU_RV1126 && !CPU_RV1106 -+ help -+ Say y here to enable clk pvtm. ++/** ++ * clk_pm_runtime_put_all() - Runtime "put" all clk provider devices ++ * ++ * Put the runtime PM references taken in clk_pm_runtime_get_all() and release ++ * the 'clk_rpm_list_lock'. ++ */ ++static void clk_pm_runtime_put_all(void) ++{ ++ struct clk_core *core; + -+config ROCKCHIP_DDRCLK -+ bool ++ hlist_for_each_entry(core, &clk_rpm_list, rpm_node) ++ clk_pm_runtime_put(core); ++ mutex_unlock(&clk_rpm_list_lock); ++} + -+config ROCKCHIP_DDRCLK_SIP -+ bool "Rockchip DDR Clk SIP" -+ default y if CPU_RK3399 -+ select ROCKCHIP_DDRCLK -+ help -+ Say y here to enable ddr clk sip. ++static void clk_pm_runtime_init(struct clk_core *core) ++{ ++ struct device *dev = core->dev; + -+config ROCKCHIP_DDRCLK_SIP_V2 -+ bool "Rockchip DDR Clk SIP V2" -+ default y if CPU_PX30 || CPU_RK1808 || CPU_RK312X || CPU_RK322X || \ -+ CPU_RK3288 || CPU_RK3308 || CPU_RK3328 || CPU_RV1126 -+ select ROCKCHIP_DDRCLK -+ help -+ Say y here to enable ddr clk sip v2. ++ if (dev && pm_runtime_enabled(dev)) { ++ core->rpm_enabled = true; + -+config ROCKCHIP_PLL_RK3066 -+ bool "Rockchip PLL Type RK3066" -+ default y if CPU_RK30XX || CPU_RK3188 || \ -+ CPU_RK3288 || CPU_RK3368 -+ help -+ Say y here to enable pll type is rk3066. ++ mutex_lock(&clk_rpm_list_lock); ++ hlist_add_head(&core->rpm_node, &clk_rpm_list); ++ mutex_unlock(&clk_rpm_list_lock); ++ } ++} + -+config ROCKCHIP_PLL_RK3399 -+ bool "Rockchip PLL Type RK3399" -+ default y if CPU_RK3399 || CPU_RV1108 -+ help -+ Say y here to enable pll type is rk3399. ++/*** locking ***/ ++static void clk_prepare_lock(void) ++{ ++ if (!mutex_trylock(&prepare_lock)) { ++ if (prepare_owner == current) { ++ prepare_refcnt++; ++ return; ++ } ++ mutex_lock(&prepare_lock); ++ } ++ WARN_ON_ONCE(prepare_owner != NULL); ++ WARN_ON_ONCE(prepare_refcnt != 0); ++ prepare_owner = current; ++ prepare_refcnt = 1; ++} + -+config ROCKCHIP_PLL_RK3588 -+ bool "Rockchip PLL Type RK3588" -+ default y if CPU_RK3588 -+ help -+ Say y here to enable pll type is rk3588. ++static void clk_prepare_unlock(void) ++{ ++ WARN_ON_ONCE(prepare_owner != current); ++ WARN_ON_ONCE(prepare_refcnt == 0); + -+source "drivers/clk/rockchip-oh/regmap/Kconfig" ++ if (--prepare_refcnt) ++ return; ++ prepare_owner = NULL; ++ mutex_unlock(&prepare_lock); ++} + -+endif -diff --git a/drivers/clk/rockchip-oh/Makefile b/drivers/clk/rockchip-oh/Makefile -new file mode 100644 -index 000000000..d6aafb106 ---- /dev/null -+++ b/drivers/clk/rockchip-oh/Makefile -@@ -0,0 +1,40 @@ -+# SPDX-License-Identifier: GPL-2.0 -+# -+# Rockchip Clock specific Makefile -+# ++static unsigned long clk_enable_lock(void) ++ __acquires(enable_lock) ++{ ++ unsigned long flags; + -+obj-$(CONFIG_COMMON_CLK_ROCKCHIP) += clk-rockchip.o -+obj-$(CONFIG_COMMON_CLK_ROCKCHIP_REGMAP) += regmap/ ++ /* ++ * On UP systems, spin_trylock_irqsave() always returns true, even if ++ * we already hold the lock. So, in that case, we rely only on ++ * reference counting. ++ */ ++ if (!IS_ENABLED(CONFIG_SMP) || ++ !spin_trylock_irqsave(&enable_lock, flags)) { ++ if (enable_owner == current) { ++ enable_refcnt++; ++ __acquire(enable_lock); ++ if (!IS_ENABLED(CONFIG_SMP)) ++ local_save_flags(flags); ++ return flags; ++ } ++ spin_lock_irqsave(&enable_lock, flags); ++ } ++ WARN_ON_ONCE(enable_owner != NULL); ++ WARN_ON_ONCE(enable_refcnt != 0); ++ enable_owner = current; ++ enable_refcnt = 1; ++ return flags; ++} + -+clk-rockchip-y += clk.o -+clk-rockchip-y += clk-pll.o -+clk-rockchip-y += clk-cpu.o -+clk-rockchip-y += clk-half-divider.o -+clk-rockchip-y += clk-mmc-phase.o -+clk-rockchip-y += clk-muxgrf.o -+clk-rockchip-$(CONFIG_ROCKCHIP_DDRCLK) += clk-ddr.o -+clk-rockchip-$(CONFIG_ROCKCHIP_CLK_INV) += clk-inverter.o -+clk-rockchip-$(CONFIG_ROCKCHIP_CLK_PVTM) += clk-pvtm.o -+clk-rockchip-$(CONFIG_RESET_CONTROLLER) += softrst.o ++static void clk_enable_unlock(unsigned long flags) ++ __releases(enable_lock) ++{ ++ WARN_ON_ONCE(enable_owner != current); ++ WARN_ON_ONCE(enable_refcnt == 0); + -+obj-$(CONFIG_ROCKCHIP_CLK_LINK) += clk-link.o -+obj-$(CONFIG_ROCKCHIP_CLK_OUT) += clk-out.o ++ if (--enable_refcnt) { ++ __release(enable_lock); ++ return; ++ } ++ enable_owner = NULL; ++ spin_unlock_irqrestore(&enable_lock, flags); ++} + -+obj-$(CONFIG_CLK_PX30) += clk-px30.o -+obj-$(CONFIG_CLK_RV1106) += clk-rv1106.o -+obj-$(CONFIG_CLK_RV1108) += clk-rv1108.o -+obj-$(CONFIG_CLK_RV1126) += clk-rv1126.o -+obj-$(CONFIG_CLK_RK1808) += clk-rk1808.o -+obj-$(CONFIG_CLK_RK3036) += clk-rk3036.o -+obj-$(CONFIG_CLK_RK312X) += clk-rk3128.o -+obj-$(CONFIG_CLK_RK3188) += clk-rk3188.o -+obj-$(CONFIG_CLK_RK322X) += clk-rk3228.o -+obj-$(CONFIG_CLK_RK3288) += clk-rk3288.o -+obj-$(CONFIG_CLK_RK3308) += clk-rk3308.o -+obj-$(CONFIG_CLK_RK3328) += clk-rk3328.o -+obj-$(CONFIG_CLK_RK3368) += clk-rk3368.o -+obj-$(CONFIG_CLK_RK3399) += clk-rk3399.o -+obj-$(CONFIG_CLK_RK3528) += clk-rk3528.o -+obj-$(CONFIG_CLK_RK3562) += clk-rk3562.o -+obj-$(CONFIG_CLK_RK3568) += clk-rk3568.o -+obj-$(CONFIG_CLK_RK3588) += clk-rk3588.o -diff --git a/drivers/clk/rockchip-oh/clk-cpu.c b/drivers/clk/rockchip-oh/clk-cpu.c -new file mode 100644 -index 000000000..9a9beeb8c ---- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-cpu.c -@@ -0,0 +1,593 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+/* -+ * Copyright (c) 2014 MundoReader S.L. -+ * Author: Heiko Stuebner -+ * -+ * based on clk/samsung/clk-cpu.c -+ * Copyright (c) 2014 Samsung Electronics Co., Ltd. -+ * Author: Thomas Abraham -+ * -+ * A CPU clock is defined as a clock supplied to a CPU or a group of CPUs. -+ * The CPU clock is typically derived from a hierarchy of clock -+ * blocks which includes mux and divider blocks. There are a number of other -+ * auxiliary clocks supplied to the CPU domain such as the debug blocks and AXI -+ * clock for CPU domain. The rates of these auxiliary clocks are related to the -+ * CPU clock rate and this relation is usually specified in the hardware manual -+ * of the SoC or supplied after the SoC characterization. -+ * -+ * The below implementation of the CPU clock allows the rate changes of the CPU -+ * clock and the corresponding rate changes of the auxillary clocks of the CPU -+ * domain. The platform clock driver provides a clock register configuration -+ * for each configurable rate which is then used to program the clock hardware -+ * registers to acheive a fast co-oridinated rate change for all the CPU domain -+ * clocks. -+ * -+ * On a rate change request for the CPU clock, the rate change is propagated -+ * upto the PLL supplying the clock to the CPU domain clock blocks. While the -+ * CPU domain PLL is reconfigured, the CPU domain clocks are driven using an -+ * alternate clock source. If required, the alternate clock source is divided -+ * down in order to keep the output clock rate within the previous OPP limits. -+ */ ++static bool clk_core_rate_is_protected(struct clk_core *core) ++{ ++ return core->protect_count; ++} + -+#include -+#include -+#include -+#include -+#include -+#include "clk.h" ++static bool clk_core_is_prepared(struct clk_core *core) ++{ ++ bool ret = false; + -+/** -+ * struct rockchip_cpuclk: information about clock supplied to a CPU core. -+ * @hw: handle between ccf and cpu clock. -+ * @alt_parent: alternate parent clock to use when switching the speed -+ * of the primary parent clock. -+ * @reg_base: base register for cpu-clock values. -+ * @clk_nb: clock notifier registered for changes in clock speed of the -+ * primary parent clock. -+ * @rate_count: number of rates in the rate_table -+ * @rate_table: pll-rates and their associated dividers -+ * @reg_data: cpu-specific register settings -+ * @lock: clock lock -+ */ -+struct rockchip_cpuclk { -+ struct clk_hw hw; -+ struct clk_hw *pll_hw; -+ struct clk *alt_parent; -+ void __iomem *reg_base; -+ struct notifier_block clk_nb; -+ unsigned int rate_count; -+ struct rockchip_cpuclk_rate_table *rate_table; -+ const struct rockchip_cpuclk_reg_data *reg_data; -+ spinlock_t *lock; -+}; ++ /* ++ * .is_prepared is optional for clocks that can prepare ++ * fall back to software usage counter if it is missing ++ */ ++ if (!core->ops->is_prepared) ++ return core->prepare_count; + -+#define to_rockchip_cpuclk_hw(hw) container_of(hw, struct rockchip_cpuclk, hw) -+#define to_rockchip_cpuclk_nb(nb) \ -+ container_of(nb, struct rockchip_cpuclk, clk_nb) ++ if (!clk_pm_runtime_get(core)) { ++ ret = core->ops->is_prepared(core->hw); ++ clk_pm_runtime_put(core); ++ } + -+static const struct rockchip_cpuclk_rate_table *rockchip_get_cpuclk_settings( -+ struct rockchip_cpuclk *cpuclk, unsigned long rate) ++ return ret; ++} ++ ++static bool clk_core_is_enabled(struct clk_core *core) +{ -+ const struct rockchip_cpuclk_rate_table *rate_table = -+ cpuclk->rate_table; -+ int i; ++ bool ret = false; + -+ for (i = 0; i < cpuclk->rate_count; i++) { -+ if (rate == rate_table[i].prate) -+ return &rate_table[i]; ++ /* ++ * .is_enabled is only mandatory for clocks that gate ++ * fall back to software usage counter if .is_enabled is missing ++ */ ++ if (!core->ops->is_enabled) ++ return core->enable_count; ++ ++ /* ++ * Check if clock controller's device is runtime active before ++ * calling .is_enabled callback. If not, assume that clock is ++ * disabled, because we might be called from atomic context, from ++ * which pm_runtime_get() is not allowed. ++ * This function is called mainly from clk_disable_unused_subtree, ++ * which ensures proper runtime pm activation of controller before ++ * taking enable spinlock, but the below check is needed if one tries ++ * to call it from other places. ++ */ ++ if (core->rpm_enabled) { ++ pm_runtime_get_noresume(core->dev); ++ if (!pm_runtime_active(core->dev)) { ++ ret = false; ++ goto done; ++ } + } + -+ return NULL; -+} ++ /* ++ * This could be called with the enable lock held, or from atomic ++ * context. If the parent isn't enabled already, we can't do ++ * anything here. We can also assume this clock isn't enabled. ++ */ ++ if ((core->flags & CLK_OPS_PARENT_ENABLE) && core->parent) ++ if (!clk_core_is_enabled(core->parent)) { ++ ret = false; ++ goto done; ++ } + -+static unsigned long rockchip_cpuclk_recalc_rate(struct clk_hw *hw, -+ unsigned long parent_rate) -+{ -+ struct rockchip_cpuclk *cpuclk = to_rockchip_cpuclk_hw(hw); -+ const struct rockchip_cpuclk_reg_data *reg_data = cpuclk->reg_data; -+ u32 clksel0 = readl_relaxed(cpuclk->reg_base + reg_data->core_reg[0]); ++ ret = core->ops->is_enabled(core->hw); ++done: ++ if (core->rpm_enabled) ++ pm_runtime_put(core->dev); + -+ clksel0 >>= reg_data->div_core_shift[0]; -+ clksel0 &= reg_data->div_core_mask[0]; -+ return parent_rate / (clksel0 + 1); ++ return ret; +} + -+static const struct clk_ops rockchip_cpuclk_ops = { -+ .recalc_rate = rockchip_cpuclk_recalc_rate, -+}; ++/*** helper functions ***/ + -+static void rockchip_cpuclk_set_dividers(struct rockchip_cpuclk *cpuclk, -+ const struct rockchip_cpuclk_rate_table *rate) ++const char *__clk_get_name(const struct clk *clk) +{ -+ int i; ++ return !clk ? NULL : clk->core->name; ++} ++EXPORT_SYMBOL_GPL(__clk_get_name); + -+ /* alternate parent is active now. set the dividers */ -+ for (i = 0; i < ARRAY_SIZE(rate->divs); i++) { -+ const struct rockchip_cpuclk_clksel *clksel = &rate->divs[i]; ++const char *clk_hw_get_name(const struct clk_hw *hw) ++{ ++ return hw->core->name; ++} ++EXPORT_SYMBOL_GPL(clk_hw_get_name); + -+ if (!clksel->reg) -+ continue; ++struct clk_hw *__clk_get_hw(struct clk *clk) ++{ ++ return !clk ? NULL : clk->core->hw; ++} ++EXPORT_SYMBOL_GPL(__clk_get_hw); + -+ pr_debug("%s: setting reg 0x%x to 0x%x\n", -+ __func__, clksel->reg, clksel->val); -+ writel(clksel->val, cpuclk->reg_base + clksel->reg); -+ } ++unsigned int clk_hw_get_num_parents(const struct clk_hw *hw) ++{ ++ return hw->core->num_parents; +} ++EXPORT_SYMBOL_GPL(clk_hw_get_num_parents); + -+static void rockchip_cpuclk_set_pre_muxs(struct rockchip_cpuclk *cpuclk, -+ const struct rockchip_cpuclk_rate_table *rate) ++struct clk_hw *clk_hw_get_parent(const struct clk_hw *hw) +{ -+ int i; ++ return hw->core->parent ? hw->core->parent->hw : NULL; ++} ++EXPORT_SYMBOL_GPL(clk_hw_get_parent); + -+ /* alternate parent is active now. set the pre_muxs */ -+ for (i = 0; i < ARRAY_SIZE(rate->pre_muxs); i++) { -+ const struct rockchip_cpuclk_clksel *clksel = &rate->pre_muxs[i]; ++static struct clk_core *__clk_lookup_subtree(const char *name, ++ struct clk_core *core) ++{ ++ struct clk_core *child; ++ struct clk_core *ret; + -+ if (!clksel->reg) -+ break; ++ if (!strcmp(core->name, name)) ++ return core; + -+ pr_debug("%s: setting reg 0x%x to 0x%x\n", -+ __func__, clksel->reg, clksel->val); -+ writel(clksel->val, cpuclk->reg_base + clksel->reg); ++ hlist_for_each_entry(child, &core->children, child_node) { ++ ret = __clk_lookup_subtree(name, child); ++ if (ret) ++ return ret; + } ++ ++ return NULL; +} + -+static void rockchip_cpuclk_set_post_muxs(struct rockchip_cpuclk *cpuclk, -+ const struct rockchip_cpuclk_rate_table *rate) ++static struct clk_core *clk_core_lookup(const char *name) +{ -+ int i; ++ struct clk_core *root_clk; ++ struct clk_core *ret; + -+ /* alternate parent is active now. set the muxs */ -+ for (i = 0; i < ARRAY_SIZE(rate->post_muxs); i++) { -+ const struct rockchip_cpuclk_clksel *clksel = &rate->post_muxs[i]; ++ if (!name) ++ return NULL; + -+ if (!clksel->reg) -+ break; ++ /* search the 'proper' clk tree first */ ++ hlist_for_each_entry(root_clk, &clk_root_list, child_node) { ++ ret = __clk_lookup_subtree(name, root_clk); ++ if (ret) ++ return ret; ++ } + -+ pr_debug("%s: setting reg 0x%x to 0x%x\n", -+ __func__, clksel->reg, clksel->val); -+ writel(clksel->val, cpuclk->reg_base + clksel->reg); ++ /* if not found, then search the orphan tree */ ++ hlist_for_each_entry(root_clk, &clk_orphan_list, child_node) { ++ ret = __clk_lookup_subtree(name, root_clk); ++ if (ret) ++ return ret; + } ++ ++ return NULL; +} + -+static int rockchip_cpuclk_pre_rate_change(struct rockchip_cpuclk *cpuclk, -+ struct clk_notifier_data *ndata) ++#ifdef CONFIG_OF ++static int of_parse_clkspec(const struct device_node *np, int index, ++ const char *name, struct of_phandle_args *out_args); ++static struct clk_hw * ++of_clk_get_hw_from_clkspec(struct of_phandle_args *clkspec); ++#else ++static inline int of_parse_clkspec(const struct device_node *np, int index, ++ const char *name, ++ struct of_phandle_args *out_args) +{ -+ const struct rockchip_cpuclk_reg_data *reg_data = cpuclk->reg_data; -+ const struct rockchip_cpuclk_rate_table *rate; -+ unsigned long alt_prate, alt_div; -+ unsigned long flags; -+ int i = 0; ++ return -ENOENT; ++} ++static inline struct clk_hw * ++of_clk_get_hw_from_clkspec(struct of_phandle_args *clkspec) ++{ ++ return ERR_PTR(-ENOENT); ++} ++#endif + -+ /* check validity of the new rate */ -+ rate = rockchip_get_cpuclk_settings(cpuclk, ndata->new_rate); -+ if (!rate) { -+ pr_err("%s: Invalid rate : %lu for cpuclk\n", -+ __func__, ndata->new_rate); -+ return -EINVAL; ++/** ++ * clk_core_get - Find the clk_core parent of a clk ++ * @core: clk to find parent of ++ * @p_index: parent index to search for ++ * ++ * This is the preferred method for clk providers to find the parent of a ++ * clk when that parent is external to the clk controller. The parent_names ++ * array is indexed and treated as a local name matching a string in the device ++ * node's 'clock-names' property or as the 'con_id' matching the device's ++ * dev_name() in a clk_lookup. This allows clk providers to use their own ++ * namespace instead of looking for a globally unique parent string. ++ * ++ * For example the following DT snippet would allow a clock registered by the ++ * clock-controller@c001 that has a clk_init_data::parent_data array ++ * with 'xtal' in the 'name' member to find the clock provided by the ++ * clock-controller@f00abcd without needing to get the globally unique name of ++ * the xtal clk. ++ * ++ * parent: clock-controller@f00abcd { ++ * reg = <0xf00abcd 0xabcd>; ++ * #clock-cells = <0>; ++ * }; ++ * ++ * clock-controller@c001 { ++ * reg = <0xc001 0xf00d>; ++ * clocks = <&parent>; ++ * clock-names = "xtal"; ++ * #clock-cells = <1>; ++ * }; ++ * ++ * Returns: -ENOENT when the provider can't be found or the clk doesn't ++ * exist in the provider or the name can't be found in the DT node or ++ * in a clkdev lookup. NULL when the provider knows about the clk but it ++ * isn't provided on this system. ++ * A valid clk_core pointer when the clk can be found in the provider. ++ */ ++static struct clk_core *clk_core_get(struct clk_core *core, u8 p_index) ++{ ++ const char *name = core->parents[p_index].fw_name; ++ int index = core->parents[p_index].index; ++ struct clk_hw *hw = ERR_PTR(-ENOENT); ++ struct device *dev = core->dev; ++ const char *dev_id = dev ? dev_name(dev) : NULL; ++ struct device_node *np = core->of_node; ++ struct of_phandle_args clkspec; ++ ++ if (np && (name || index >= 0) && ++ !of_parse_clkspec(np, index, name, &clkspec)) { ++ hw = of_clk_get_hw_from_clkspec(&clkspec); ++ of_node_put(clkspec.np); ++ } else if (name) { ++ /* ++ * If the DT search above couldn't find the provider fallback to ++ * looking up via clkdev based clk_lookups. ++ */ ++ hw = clk_find_hw(dev_id, name); + } + -+ if (IS_ENABLED(CONFIG_ROCKCHIP_CLK_BOOST)) -+ rockchip_boost_enable_recovery_sw_low(cpuclk->pll_hw); ++ if (IS_ERR(hw)) ++ return ERR_CAST(hw); + -+ alt_prate = clk_get_rate(cpuclk->alt_parent); ++ if (!hw) ++ return NULL; + -+ spin_lock_irqsave(cpuclk->lock, flags); ++ return hw->core; ++} ++ ++static void clk_core_fill_parent_index(struct clk_core *core, u8 index) ++{ ++ struct clk_parent_map *entry = &core->parents[index]; ++ struct clk_core *parent; ++ ++ if (entry->hw) { ++ parent = entry->hw->core; ++ } else { ++ parent = clk_core_get(core, index); ++ if (PTR_ERR(parent) == -ENOENT && entry->name) ++ parent = clk_core_lookup(entry->name); ++ } + + /* -+ * If the old parent clock speed is less than the clock speed -+ * of the alternate parent, then it should be ensured that at no point -+ * the armclk speed is more than the old_rate until the dividers are -+ * set. ++ * We have a direct reference but it isn't registered yet? ++ * Orphan it and let clk_reparent() update the orphan status ++ * when the parent is registered. + */ -+ if (alt_prate > ndata->old_rate) { -+ /* calculate dividers */ -+ alt_div = DIV_ROUND_UP(alt_prate, ndata->old_rate) - 1; -+ if (alt_div > reg_data->div_core_mask[0]) { -+ pr_warn("%s: limiting alt-divider %lu to %d\n", -+ __func__, alt_div, reg_data->div_core_mask[0]); -+ alt_div = reg_data->div_core_mask[0]; -+ } ++ if (!parent) ++ parent = ERR_PTR(-EPROBE_DEFER); + -+ /* -+ * Change parents and add dividers in a single transaction. -+ * -+ * NOTE: we do this in a single transaction so we're never -+ * dividing the primary parent by the extra dividers that were -+ * needed for the alt. -+ */ -+ pr_debug("%s: setting div %lu as alt-rate %lu > old-rate %lu\n", -+ __func__, alt_div, alt_prate, ndata->old_rate); ++ /* Only cache it if it's not an error */ ++ if (!IS_ERR(parent)) ++ entry->core = parent; ++} + -+ for (i = 0; i < reg_data->num_cores; i++) { -+ writel(HIWORD_UPDATE(alt_div, reg_data->div_core_mask[i], -+ reg_data->div_core_shift[i]), -+ cpuclk->reg_base + reg_data->core_reg[i]); -+ } -+ } ++static struct clk_core *clk_core_get_parent_by_index(struct clk_core *core, ++ u8 index) ++{ ++ if (!core || index >= core->num_parents || !core->parents) ++ return NULL; + -+ if (IS_ENABLED(CONFIG_ROCKCHIP_CLK_BOOST)) -+ rockchip_boost_add_core_div(cpuclk->pll_hw, alt_prate); ++ if (!core->parents[index].core) ++ clk_core_fill_parent_index(core, index); + -+ rockchip_cpuclk_set_pre_muxs(cpuclk, rate); ++ return core->parents[index].core; ++} + -+ /* select alternate parent */ -+ if (reg_data->mux_core_reg) -+ writel(HIWORD_UPDATE(reg_data->mux_core_alt, -+ reg_data->mux_core_mask, -+ reg_data->mux_core_shift), -+ cpuclk->reg_base + reg_data->mux_core_reg); -+ else -+ writel(HIWORD_UPDATE(reg_data->mux_core_alt, -+ reg_data->mux_core_mask, -+ reg_data->mux_core_shift), -+ cpuclk->reg_base + reg_data->core_reg[0]); ++struct clk_hw * ++clk_hw_get_parent_by_index(const struct clk_hw *hw, unsigned int index) ++{ ++ struct clk_core *parent; + -+ spin_unlock_irqrestore(cpuclk->lock, flags); -+ return 0; ++ parent = clk_core_get_parent_by_index(hw->core, index); ++ ++ return !parent ? NULL : parent->hw; +} ++EXPORT_SYMBOL_GPL(clk_hw_get_parent_by_index); + -+static int rockchip_cpuclk_post_rate_change(struct rockchip_cpuclk *cpuclk, -+ struct clk_notifier_data *ndata) ++unsigned int __clk_get_enable_count(struct clk *clk) +{ -+ const struct rockchip_cpuclk_reg_data *reg_data = cpuclk->reg_data; -+ const struct rockchip_cpuclk_rate_table *rate; -+ unsigned long flags; -+ int i = 0; -+ -+ rate = rockchip_get_cpuclk_settings(cpuclk, ndata->new_rate); -+ if (!rate) { -+ pr_err("%s: Invalid rate : %lu for cpuclk\n", -+ __func__, ndata->new_rate); -+ return -EINVAL; -+ } ++ return !clk ? 0 : clk->core->enable_count; ++} + -+ spin_lock_irqsave(cpuclk->lock, flags); ++static unsigned long clk_core_get_rate_nolock(struct clk_core *core) ++{ ++ if (!core) ++ return 0; + -+ if (ndata->old_rate < ndata->new_rate) -+ rockchip_cpuclk_set_dividers(cpuclk, rate); ++ if (!core->num_parents || core->parent) ++ return core->rate; + + /* -+ * post-rate change event, re-mux to primary parent and remove dividers. -+ * -+ * NOTE: we do this in a single transaction so we're never dividing the -+ * primary parent by the extra dividers that were needed for the alt. ++ * Clk must have a parent because num_parents > 0 but the parent isn't ++ * known yet. Best to return 0 as the rate of this clk until we can ++ * properly recalc the rate based on the parent's rate. + */ ++ return 0; ++} + -+ if (reg_data->mux_core_reg) -+ writel(HIWORD_UPDATE(reg_data->mux_core_main, -+ reg_data->mux_core_mask, -+ reg_data->mux_core_shift), -+ cpuclk->reg_base + reg_data->mux_core_reg); -+ else -+ writel(HIWORD_UPDATE(reg_data->mux_core_main, -+ reg_data->mux_core_mask, -+ reg_data->mux_core_shift), -+ cpuclk->reg_base + reg_data->core_reg[0]); ++unsigned long clk_hw_get_rate(const struct clk_hw *hw) ++{ ++ return clk_core_get_rate_nolock(hw->core); ++} ++EXPORT_SYMBOL_GPL(clk_hw_get_rate); + -+ rockchip_cpuclk_set_post_muxs(cpuclk, rate); ++static unsigned long clk_core_get_accuracy_no_lock(struct clk_core *core) ++{ ++ if (!core) ++ return 0; + -+ /* remove dividers */ -+ for (i = 0; i < reg_data->num_cores; i++) { -+ writel(HIWORD_UPDATE(0, reg_data->div_core_mask[i], -+ reg_data->div_core_shift[i]), -+ cpuclk->reg_base + reg_data->core_reg[i]); -+ } ++ return core->accuracy; ++} + -+ if (ndata->old_rate > ndata->new_rate) -+ rockchip_cpuclk_set_dividers(cpuclk, rate); ++unsigned long clk_hw_get_flags(const struct clk_hw *hw) ++{ ++ return hw->core->flags; ++} ++EXPORT_SYMBOL_GPL(clk_hw_get_flags); + -+ if (IS_ENABLED(CONFIG_ROCKCHIP_CLK_BOOST)) -+ rockchip_boost_disable_recovery_sw(cpuclk->pll_hw); ++bool clk_hw_is_prepared(const struct clk_hw *hw) ++{ ++ return clk_core_is_prepared(hw->core); ++} ++EXPORT_SYMBOL_GPL(clk_hw_is_prepared); + -+ spin_unlock_irqrestore(cpuclk->lock, flags); -+ return 0; ++bool clk_hw_rate_is_protected(const struct clk_hw *hw) ++{ ++ return clk_core_rate_is_protected(hw->core); +} ++EXPORT_SYMBOL_GPL(clk_hw_rate_is_protected); + -+/* -+ * This clock notifier is called when the frequency of the parent clock -+ * of cpuclk is to be changed. This notifier handles the setting up all -+ * the divider clocks, remux to temporary parent and handling the safe -+ * frequency levels when using temporary parent. -+ */ -+static int rockchip_cpuclk_notifier_cb(struct notifier_block *nb, -+ unsigned long event, void *data) ++bool clk_hw_is_enabled(const struct clk_hw *hw) +{ -+ struct clk_notifier_data *ndata = data; -+ struct rockchip_cpuclk *cpuclk = to_rockchip_cpuclk_nb(nb); -+ int ret = 0; ++ return clk_core_is_enabled(hw->core); ++} ++EXPORT_SYMBOL_GPL(clk_hw_is_enabled); + -+ pr_debug("%s: event %lu, old_rate %lu, new_rate: %lu\n", -+ __func__, event, ndata->old_rate, ndata->new_rate); -+ if (event == PRE_RATE_CHANGE) -+ ret = rockchip_cpuclk_pre_rate_change(cpuclk, ndata); -+ else if (event == POST_RATE_CHANGE) -+ ret = rockchip_cpuclk_post_rate_change(cpuclk, ndata); ++bool __clk_is_enabled(struct clk *clk) ++{ ++ if (!clk) ++ return false; + -+ return notifier_from_errno(ret); ++ return clk_core_is_enabled(clk->core); +} ++EXPORT_SYMBOL_GPL(__clk_is_enabled); + -+struct clk *rockchip_clk_register_cpuclk(const char *name, -+ u8 num_parents, -+ struct clk *parent, struct clk *alt_parent, -+ const struct rockchip_cpuclk_reg_data *reg_data, -+ const struct rockchip_cpuclk_rate_table *rates, -+ int nrates, void __iomem *reg_base, spinlock_t *lock) ++static bool mux_is_better_rate(unsigned long rate, unsigned long now, ++ unsigned long best, unsigned long flags) +{ -+ struct rockchip_cpuclk *cpuclk; -+ struct clk_init_data init; -+ struct clk *clk, *cclk, *pll_clk; -+ const char *parent_name; -+ int ret; -+ -+ if (num_parents < 2) { -+ pr_err("%s: needs at least two parent clocks\n", __func__); -+ return ERR_PTR(-EINVAL); -+ } ++ if (flags & CLK_MUX_ROUND_CLOSEST) ++ return abs(now - rate) < abs(best - rate); + -+ if (IS_ERR(parent) || IS_ERR(alt_parent)) { -+ pr_err("%s: invalid parent clock(s)\n", __func__); -+ return ERR_PTR(-EINVAL); -+ } ++ return now <= rate && now > best; ++} + -+ cpuclk = kzalloc(sizeof(*cpuclk), GFP_KERNEL); -+ if (!cpuclk) -+ return ERR_PTR(-ENOMEM); ++static void clk_core_init_rate_req(struct clk_core * const core, ++ struct clk_rate_request *req, ++ unsigned long rate); + -+ parent_name = clk_hw_get_name(__clk_get_hw(parent)); -+ init.name = name; -+ init.parent_names = &parent_name; -+ init.num_parents = 1; -+ init.ops = &rockchip_cpuclk_ops; ++static int clk_core_round_rate_nolock(struct clk_core *core, ++ struct clk_rate_request *req); + -+ /* only allow rate changes when we have a rate table */ -+ init.flags = (nrates > 0) ? CLK_SET_RATE_PARENT : 0; ++static bool clk_core_has_parent(struct clk_core *core, const struct clk_core *parent) ++{ ++ struct clk_core *tmp; ++ unsigned int i; + -+ /* disallow automatic parent changes by ccf */ -+ init.flags |= CLK_SET_RATE_NO_REPARENT; ++ /* Optimize for the case where the parent is already the parent. */ ++ if (core->parent == parent) ++ return true; + -+ init.flags |= CLK_GET_RATE_NOCACHE; ++ for (i = 0; i < core->num_parents; i++) { ++ tmp = clk_core_get_parent_by_index(core, i); ++ if (!tmp) ++ continue; + -+ cpuclk->reg_base = reg_base; -+ cpuclk->lock = lock; -+ cpuclk->reg_data = reg_data; -+ cpuclk->clk_nb.notifier_call = rockchip_cpuclk_notifier_cb; -+ cpuclk->hw.init = &init; -+ if (IS_ENABLED(CONFIG_ROCKCHIP_CLK_BOOST) && reg_data->pll_name) { -+ pll_clk = clk_get_parent(parent); -+ if (!pll_clk) { -+ pr_err("%s: could not lookup pll clock: (%s)\n", -+ __func__, reg_data->pll_name); -+ ret = -EINVAL; -+ goto free_cpuclk; -+ } -+ cpuclk->pll_hw = __clk_get_hw(pll_clk); -+ rockchip_boost_init(cpuclk->pll_hw); ++ if (tmp == parent) ++ return true; + } + -+ cpuclk->alt_parent = alt_parent; -+ if (!cpuclk->alt_parent) { -+ pr_err("%s: could not lookup alternate parent: (%d)\n", -+ __func__, reg_data->mux_core_alt); -+ ret = -EINVAL; -+ goto free_cpuclk; -+ } ++ return false; ++} + -+ ret = clk_prepare_enable(cpuclk->alt_parent); -+ if (ret) { -+ pr_err("%s: could not enable alternate parent\n", -+ __func__); -+ goto free_cpuclk; -+ } ++static void ++clk_core_forward_rate_req(struct clk_core *core, ++ const struct clk_rate_request *old_req, ++ struct clk_core *parent, ++ struct clk_rate_request *req, ++ unsigned long parent_rate) ++{ ++ if (WARN_ON(!clk_core_has_parent(core, parent))) ++ return; + -+ clk = parent; -+ if (!clk) { -+ pr_err("%s: could not lookup parent clock: (%d) %s\n", -+ __func__, reg_data->mux_core_main, -+ parent_name); -+ ret = -EINVAL; -+ goto free_alt_parent; -+ } ++ clk_core_init_rate_req(parent, req, parent_rate); + -+ ret = clk_notifier_register(clk, &cpuclk->clk_nb); -+ if (ret) { -+ pr_err("%s: failed to register clock notifier for %s\n", -+ __func__, name); -+ goto free_alt_parent; -+ } ++ if (req->min_rate < old_req->min_rate) ++ req->min_rate = old_req->min_rate; + -+ if (nrates > 0) { -+ cpuclk->rate_count = nrates; -+ cpuclk->rate_table = kmemdup(rates, -+ sizeof(*rates) * nrates, -+ GFP_KERNEL); -+ if (!cpuclk->rate_table) { -+ ret = -ENOMEM; -+ goto unregister_notifier; ++ if (req->max_rate > old_req->max_rate) ++ req->max_rate = old_req->max_rate; ++} ++ ++static int ++clk_core_determine_rate_no_reparent(struct clk_hw *hw, ++ struct clk_rate_request *req) ++{ ++ struct clk_core *core = hw->core; ++ struct clk_core *parent = core->parent; ++ unsigned long best; ++ int ret; ++ ++ if (core->flags & CLK_SET_RATE_PARENT) { ++ struct clk_rate_request parent_req; ++ ++ if (!parent) { ++ req->rate = 0; ++ return 0; + } -+ } + -+ cclk = clk_register(NULL, &cpuclk->hw); -+ if (IS_ERR(cclk)) { -+ pr_err("%s: could not register cpuclk %s\n", __func__, name); -+ ret = PTR_ERR(cclk); -+ goto free_rate_table; -+ } ++ clk_core_forward_rate_req(core, req, parent, &parent_req, ++ req->rate); + -+ return cclk; ++ trace_clk_rate_request_start(&parent_req); + -+free_rate_table: -+ kfree(cpuclk->rate_table); -+unregister_notifier: -+ clk_notifier_unregister(clk, &cpuclk->clk_nb); -+free_alt_parent: -+ clk_disable_unprepare(cpuclk->alt_parent); -+free_cpuclk: -+ kfree(cpuclk); -+ return ERR_PTR(ret); -+} ++ ret = clk_core_round_rate_nolock(parent, &parent_req); ++ if (ret) ++ return ret; + -+static int rockchip_cpuclk_v2_pre_rate_change(struct rockchip_cpuclk *cpuclk, -+ struct clk_notifier_data *ndata) -+{ -+ unsigned long new_rate = roundup(ndata->new_rate, 1000); -+ const struct rockchip_cpuclk_rate_table *rate; -+ unsigned long flags; ++ trace_clk_rate_request_done(&parent_req); + -+ rate = rockchip_get_cpuclk_settings(cpuclk, new_rate); -+ if (!rate) { -+ pr_err("%s: Invalid rate : %lu for cpuclk\n", -+ __func__, new_rate); -+ return -EINVAL; ++ best = parent_req.rate; ++ } else if (parent) { ++ best = clk_core_get_rate_nolock(parent); ++ } else { ++ best = clk_core_get_rate_nolock(core); + } + -+ if (new_rate > ndata->old_rate) { -+ spin_lock_irqsave(cpuclk->lock, flags); -+ rockchip_cpuclk_set_dividers(cpuclk, rate); -+ spin_unlock_irqrestore(cpuclk->lock, flags); -+ } ++ req->best_parent_rate = best; ++ req->rate = best; + + return 0; +} + -+static int rockchip_cpuclk_v2_post_rate_change(struct rockchip_cpuclk *cpuclk, -+ struct clk_notifier_data *ndata) ++int clk_mux_determine_rate_flags(struct clk_hw *hw, ++ struct clk_rate_request *req, ++ unsigned long flags) +{ -+ unsigned long new_rate = roundup(ndata->new_rate, 1000); -+ const struct rockchip_cpuclk_rate_table *rate; -+ unsigned long flags; ++ struct clk_core *core = hw->core, *parent, *best_parent = NULL; ++ int i, num_parents, ret; ++ unsigned long best = 0; + -+ rate = rockchip_get_cpuclk_settings(cpuclk, new_rate); -+ if (!rate) { -+ pr_err("%s: Invalid rate : %lu for cpuclk\n", -+ __func__, new_rate); -+ return -EINVAL; -+ } ++ /* if NO_REPARENT flag set, pass through to current parent */ ++ if (core->flags & CLK_SET_RATE_NO_REPARENT) ++ return clk_core_determine_rate_no_reparent(hw, req); + -+ if (new_rate < ndata->old_rate) { -+ spin_lock_irqsave(cpuclk->lock, flags); -+ rockchip_cpuclk_set_dividers(cpuclk, rate); -+ spin_unlock_irqrestore(cpuclk->lock, flags); ++ /* find the parent that can provide the fastest rate <= rate */ ++ num_parents = core->num_parents; ++ for (i = 0; i < num_parents; i++) { ++ unsigned long parent_rate; ++ ++ parent = clk_core_get_parent_by_index(core, i); ++ if (!parent) ++ continue; ++ ++ if (core->flags & CLK_SET_RATE_PARENT) { ++ struct clk_rate_request parent_req; ++ ++ clk_core_forward_rate_req(core, req, parent, &parent_req, req->rate); ++ ++ trace_clk_rate_request_start(&parent_req); ++ ++ ret = clk_core_round_rate_nolock(parent, &parent_req); ++ if (ret) ++ continue; ++ ++ trace_clk_rate_request_done(&parent_req); ++ ++ parent_rate = parent_req.rate; ++ } else { ++ parent_rate = clk_core_get_rate_nolock(parent); ++ } ++ ++ if (mux_is_better_rate(req->rate, parent_rate, ++ best, flags)) { ++ best_parent = parent; ++ best = parent_rate; ++ } + } + ++ if (!best_parent) ++ return -EINVAL; ++ ++ req->best_parent_hw = best_parent->hw; ++ req->best_parent_rate = best; ++ req->rate = best; ++ + return 0; +} ++EXPORT_SYMBOL_GPL(clk_mux_determine_rate_flags); + -+static int rockchip_cpuclk_v2_notifier_cb(struct notifier_block *nb, -+ unsigned long event, void *data) ++struct clk *__clk_lookup(const char *name) +{ -+ struct clk_notifier_data *ndata = data; -+ struct rockchip_cpuclk *cpuclk = to_rockchip_cpuclk_nb(nb); -+ int ret = 0; -+ -+ pr_debug("%s: event %lu, old_rate %lu, new_rate: %lu\n", -+ __func__, event, ndata->old_rate, ndata->new_rate); -+ if (event == PRE_RATE_CHANGE) -+ ret = rockchip_cpuclk_v2_pre_rate_change(cpuclk, ndata); -+ else if (event == POST_RATE_CHANGE) -+ ret = rockchip_cpuclk_v2_post_rate_change(cpuclk, ndata); ++ struct clk_core *core = clk_core_lookup(name); + -+ return notifier_from_errno(ret); ++ return !core ? NULL : core->hw->clk; +} + -+struct clk *rockchip_clk_register_cpuclk_v2(const char *name, -+ const char *const *parent_names, -+ u8 num_parents, void __iomem *base, -+ int muxdiv_offset, u8 mux_shift, -+ u8 mux_width, u8 mux_flags, -+ int div_offset, u8 div_shift, -+ u8 div_width, u8 div_flags, -+ unsigned long flags, spinlock_t *lock, -+ const struct rockchip_cpuclk_rate_table *rates, -+ int nrates) ++static void clk_core_get_boundaries(struct clk_core *core, ++ unsigned long *min_rate, ++ unsigned long *max_rate) +{ -+ struct rockchip_cpuclk *cpuclk; -+ struct clk_hw *hw; -+ struct clk_mux *mux = NULL; -+ struct clk_divider *div = NULL; -+ const struct clk_ops *mux_ops = NULL, *div_ops = NULL; -+ int ret; ++ struct clk *clk_user; + -+ if (num_parents > 1) { -+ mux = kzalloc(sizeof(*mux), GFP_KERNEL); -+ if (!mux) -+ return ERR_PTR(-ENOMEM); ++ lockdep_assert_held(&prepare_lock); + -+ mux->reg = base + muxdiv_offset; -+ mux->shift = mux_shift; -+ mux->mask = BIT(mux_width) - 1; -+ mux->flags = mux_flags; -+ mux->lock = lock; -+ mux_ops = (mux_flags & CLK_MUX_READ_ONLY) ? &clk_mux_ro_ops -+ : &clk_mux_ops; -+ } ++ *min_rate = core->min_rate; ++ *max_rate = core->max_rate; + -+ if (div_width > 0) { -+ div = kzalloc(sizeof(*div), GFP_KERNEL); -+ if (!div) { -+ ret = -ENOMEM; -+ goto free_mux; -+ } ++ hlist_for_each_entry(clk_user, &core->clks, clks_node) ++ *min_rate = max(*min_rate, clk_user->min_rate); + -+ div->flags = div_flags; -+ if (div_offset) -+ div->reg = base + div_offset; -+ else -+ div->reg = base + muxdiv_offset; -+ div->shift = div_shift; -+ div->width = div_width; -+ div->lock = lock; -+ div_ops = (div_flags & CLK_DIVIDER_READ_ONLY) -+ ? &clk_divider_ro_ops -+ : &clk_divider_ops; -+ } ++ hlist_for_each_entry(clk_user, &core->clks, clks_node) ++ *max_rate = min(*max_rate, clk_user->max_rate); ++} + -+ hw = clk_hw_register_composite(NULL, name, parent_names, num_parents, -+ mux ? &mux->hw : NULL, mux_ops, -+ div ? &div->hw : NULL, div_ops, -+ NULL, NULL, flags); -+ if (IS_ERR(hw)) { -+ ret = PTR_ERR(hw); -+ goto free_div; -+ } ++/* ++ * clk_hw_get_rate_range() - returns the clock rate range for a hw clk ++ * @hw: the hw clk we want to get the range from ++ * @min_rate: pointer to the variable that will hold the minimum ++ * @max_rate: pointer to the variable that will hold the maximum ++ * ++ * Fills the @min_rate and @max_rate variables with the minimum and ++ * maximum that clock can reach. ++ */ ++void clk_hw_get_rate_range(struct clk_hw *hw, unsigned long *min_rate, ++ unsigned long *max_rate) ++{ ++ clk_core_get_boundaries(hw->core, min_rate, max_rate); ++} ++EXPORT_SYMBOL_GPL(clk_hw_get_rate_range); + -+ cpuclk = kzalloc(sizeof(*cpuclk), GFP_KERNEL); -+ if (!cpuclk) { -+ ret = -ENOMEM; -+ goto unregister_clk; -+ } ++static bool clk_core_check_boundaries(struct clk_core *core, ++ unsigned long min_rate, ++ unsigned long max_rate) ++{ ++ struct clk *user; + -+ cpuclk->reg_base = base; -+ cpuclk->lock = lock; -+ cpuclk->clk_nb.notifier_call = rockchip_cpuclk_v2_notifier_cb; -+ ret = clk_notifier_register(hw->clk, &cpuclk->clk_nb); -+ if (ret) { -+ pr_err("%s: failed to register clock notifier for %s\n", -+ __func__, name); -+ goto free_cpuclk; -+ } ++ lockdep_assert_held(&prepare_lock); + -+ if (nrates > 0) { -+ cpuclk->rate_count = nrates; -+ cpuclk->rate_table = kmemdup(rates, -+ sizeof(*rates) * nrates, -+ GFP_KERNEL); -+ if (!cpuclk->rate_table) { -+ ret = -ENOMEM; -+ goto free_cpuclk; -+ } -+ } ++ if (min_rate > core->max_rate || max_rate < core->min_rate) ++ return false; + -+ return hw->clk; ++ hlist_for_each_entry(user, &core->clks, clks_node) ++ if (min_rate > user->max_rate || max_rate < user->min_rate) ++ return false; + -+free_cpuclk: -+ kfree(cpuclk); -+unregister_clk: -+ clk_hw_unregister_composite(hw); -+free_div: -+ kfree(div); -+free_mux: -+ kfree(mux); ++ return true; ++} + -+ return ERR_PTR(ret); ++void clk_hw_set_rate_range(struct clk_hw *hw, unsigned long min_rate, ++ unsigned long max_rate) ++{ ++ hw->core->min_rate = min_rate; ++ hw->core->max_rate = max_rate; +} -diff --git a/drivers/clk/rockchip-oh/clk-dclk-divider.c b/drivers/clk/rockchip-oh/clk-dclk-divider.c -new file mode 100644 -index 000000000..88cf7ab82 ---- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-dclk-divider.c -@@ -0,0 +1,168 @@ -+// SPDX-License-Identifier: GPL-2.0 ++EXPORT_SYMBOL_GPL(clk_hw_set_rate_range); ++ +/* -+ * Copyright (c) 2019 Fuzhou Rockchip Electronics Co., Ltd ++ * __clk_mux_determine_rate - clk_ops::determine_rate implementation for a mux type clk ++ * @hw: mux type clk to determine rate on ++ * @req: rate request, also used to return preferred parent and frequencies ++ * ++ * Helper for finding best parent to provide a given frequency. This can be used ++ * directly as a determine_rate callback (e.g. for a mux), or from a more ++ * complex clock that may combine a mux with other operations. ++ * ++ * Returns: 0 on success, -EERROR value on error + */ ++int __clk_mux_determine_rate(struct clk_hw *hw, ++ struct clk_rate_request *req) ++{ ++ return clk_mux_determine_rate_flags(hw, req, 0); ++} ++EXPORT_SYMBOL_GPL(__clk_mux_determine_rate); + -+#include -+#include -+#include -+#include -+#include -+#include "clk.h" ++int __clk_mux_determine_rate_closest(struct clk_hw *hw, ++ struct clk_rate_request *req) ++{ ++ return clk_mux_determine_rate_flags(hw, req, CLK_MUX_ROUND_CLOSEST); ++} ++EXPORT_SYMBOL_GPL(__clk_mux_determine_rate_closest); + -+#define div_mask(width) ((1 << (width)) - 1) ++/* ++ * clk_hw_determine_rate_no_reparent - clk_ops::determine_rate implementation for a clk that doesn't reparent ++ * @hw: mux type clk to determine rate on ++ * @req: rate request, also used to return preferred frequency ++ * ++ * Helper for finding best parent rate to provide a given frequency. ++ * This can be used directly as a determine_rate callback (e.g. for a ++ * mux), or from a more complex clock that may combine a mux with other ++ * operations. ++ * ++ * Returns: 0 on success, -EERROR value on error ++ */ ++int clk_hw_determine_rate_no_reparent(struct clk_hw *hw, ++ struct clk_rate_request *req) ++{ ++ return clk_core_determine_rate_no_reparent(hw, req); ++} ++EXPORT_SYMBOL_GPL(clk_hw_determine_rate_no_reparent); + -+static unsigned long clk_dclk_recalc_rate(struct clk_hw *hw, -+ unsigned long parent_rate) ++/*** clk api ***/ ++ ++static void clk_core_rate_unprotect(struct clk_core *core) +{ -+ struct clk_divider *divider = to_clk_divider(hw); -+ unsigned int val; ++ lockdep_assert_held(&prepare_lock); + -+ val = clk_readl(divider->reg) >> divider->shift; -+ val &= div_mask(divider->width); ++ if (!core) ++ return; + -+ return DIV_ROUND_UP_ULL(((u64)parent_rate), val + 1); -+} ++ if (WARN(core->protect_count == 0, ++ "%s already unprotected\n", core->name)) ++ return; + -+static long clk_dclk_round_rate(struct clk_hw *hw, unsigned long rate, -+ unsigned long *prate) -+{ -+ struct clk_divider *divider = to_clk_divider(hw); -+ int div, maxdiv = div_mask(divider->width) + 1; ++ if (--core->protect_count > 0) ++ return; + -+ div = DIV_ROUND_UP_ULL(divider->max_prate, rate); -+ if (div % 2) -+ div = __rounddown_pow_of_two(div); -+ div = div > maxdiv ? maxdiv : div; -+ *prate = div * rate; -+ return rate; ++ clk_core_rate_unprotect(core->parent); +} + -+static int clk_dclk_set_rate(struct clk_hw *hw, unsigned long rate, -+ unsigned long parent_rate) ++static int clk_core_rate_nuke_protect(struct clk_core *core) +{ -+ struct clk_divider *divider = to_clk_divider(hw); -+ unsigned int value; -+ unsigned long flags = 0; -+ u32 val; ++ int ret; + -+ value = divider_get_val(rate, parent_rate, divider->table, -+ divider->width, divider->flags); ++ lockdep_assert_held(&prepare_lock); + -+ if (divider->lock) -+ spin_lock_irqsave(divider->lock, flags); -+ else -+ __acquire(divider->lock); ++ if (!core) ++ return -EINVAL; + -+ if (divider->flags & CLK_DIVIDER_HIWORD_MASK) { -+ val = div_mask(divider->width) << (divider->shift + 16); -+ } else { -+ val = clk_readl(divider->reg); -+ val &= ~(div_mask(divider->width) << divider->shift); -+ } -+ val |= value << divider->shift; -+ clk_writel(val, divider->reg); ++ if (core->protect_count == 0) ++ return 0; + -+ if (divider->lock) -+ spin_unlock_irqrestore(divider->lock, flags); -+ else -+ __release(divider->lock); ++ ret = core->protect_count; ++ core->protect_count = 1; ++ clk_core_rate_unprotect(core); + -+ return 0; ++ return ret; +} + -+const struct clk_ops clk_dclk_divider_ops = { -+ .recalc_rate = clk_dclk_recalc_rate, -+ .round_rate = clk_dclk_round_rate, -+ .set_rate = clk_dclk_set_rate, -+}; -+EXPORT_SYMBOL_GPL(clk_dclk_divider_ops); -+ +/** -+ * Register a clock branch. -+ * Most clock branches have a form like ++ * clk_rate_exclusive_put - release exclusivity over clock rate control ++ * @clk: the clk over which the exclusivity is released + * -+ * src1 --|--\ -+ * |M |--[GATE]-[DIV]- -+ * src2 --|--/ ++ * clk_rate_exclusive_put() completes a critical section during which a clock ++ * consumer cannot tolerate any other consumer making any operation on the ++ * clock which could result in a rate change or rate glitch. Exclusive clocks ++ * cannot have their rate changed, either directly or indirectly due to changes ++ * further up the parent chain of clocks. As a result, clocks up parent chain ++ * also get under exclusive control of the calling consumer. + * -+ * sometimes without one of those components. ++ * If exlusivity is claimed more than once on clock, even by the same consumer, ++ * the rate effectively gets locked as exclusivity can't be preempted. ++ * ++ * Calls to clk_rate_exclusive_put() must be balanced with calls to ++ * clk_rate_exclusive_get(). Calls to this function may sleep, and do not return ++ * error status. + */ -+struct clk *rockchip_clk_register_dclk_branch(const char *name, -+ const char *const *parent_names, -+ u8 num_parents, -+ void __iomem *base, -+ int muxdiv_offset, u8 mux_shift, -+ u8 mux_width, u8 mux_flags, -+ int div_offset, u8 div_shift, -+ u8 div_width, u8 div_flags, -+ struct clk_div_table *div_table, -+ int gate_offset, -+ u8 gate_shift, u8 gate_flags, -+ unsigned long flags, -+ unsigned long max_prate, -+ spinlock_t *lock) ++void clk_rate_exclusive_put(struct clk *clk) +{ -+ struct clk *clk; -+ struct clk_mux *mux = NULL; -+ struct clk_gate *gate = NULL; -+ struct clk_divider *div = NULL; -+ const struct clk_ops *mux_ops = NULL, *div_ops = NULL, -+ *gate_ops = NULL; -+ -+ if (num_parents > 1) { -+ mux = kzalloc(sizeof(*mux), GFP_KERNEL); -+ if (!mux) -+ return ERR_PTR(-ENOMEM); ++ if (!clk) ++ return; + -+ mux->reg = base + muxdiv_offset; -+ mux->shift = mux_shift; -+ mux->mask = BIT(mux_width) - 1; -+ mux->flags = mux_flags; -+ mux->lock = lock; -+ mux_ops = (mux_flags & CLK_MUX_READ_ONLY) ? &clk_mux_ro_ops -+ : &clk_mux_ops; -+ } ++ clk_prepare_lock(); + -+ if (gate_offset >= 0) { -+ gate = kzalloc(sizeof(*gate), GFP_KERNEL); -+ if (!gate) -+ goto err_gate; ++ /* ++ * if there is something wrong with this consumer protect count, stop ++ * here before messing with the provider ++ */ ++ if (WARN_ON(clk->exclusive_count <= 0)) ++ goto out; + -+ gate->flags = gate_flags; -+ gate->reg = base + gate_offset; -+ gate->bit_idx = gate_shift; -+ gate->lock = lock; -+ gate_ops = &clk_gate_ops; -+ } ++ clk_core_rate_unprotect(clk->core); ++ clk->exclusive_count--; ++out: ++ clk_prepare_unlock(); ++} ++EXPORT_SYMBOL_GPL(clk_rate_exclusive_put); + -+ if (div_width > 0) { -+ div = kzalloc(sizeof(*div), GFP_KERNEL); -+ if (!div) -+ goto err_div; ++static void clk_core_rate_protect(struct clk_core *core) ++{ ++ lockdep_assert_held(&prepare_lock); + -+ div->flags = div_flags; -+ if (div_offset) -+ div->reg = base + div_offset; -+ else -+ div->reg = base + muxdiv_offset; -+ div->shift = div_shift; -+ div->width = div_width; -+ div->lock = lock; -+ div->max_prate = max_prate; -+ div_ops = &clk_dclk_divider_ops; -+ } ++ if (!core) ++ return; + -+ clk = clk_register_composite(NULL, name, parent_names, num_parents, -+ mux ? &mux->hw : NULL, mux_ops, -+ div ? &div->hw : NULL, div_ops, -+ gate ? &gate->hw : NULL, gate_ops, -+ flags); ++ if (core->protect_count == 0) ++ clk_core_rate_protect(core->parent); + -+ return clk; -+err_div: -+ kfree(gate); -+err_gate: -+ kfree(mux); -+ return ERR_PTR(-ENOMEM); ++ core->protect_count++; +} -diff --git a/drivers/clk/rockchip-oh/clk-ddr.c b/drivers/clk/rockchip-oh/clk-ddr.c -new file mode 100644 -index 000000000..46df75f1a ---- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-ddr.c -@@ -0,0 +1,238 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2016 Rockchip Electronics Co. Ltd. -+ * Author: Lin Huang -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#ifdef CONFIG_ARM -+#include -+#endif + -+#include "clk.h" ++static void clk_core_rate_restore_protect(struct clk_core *core, int count) ++{ ++ lockdep_assert_held(&prepare_lock); + -+struct rockchip_ddrclk { -+ struct clk_hw hw; -+ void __iomem *reg_base; -+ int mux_offset; -+ int mux_shift; -+ int mux_width; -+ int div_shift; -+ int div_width; -+ int ddr_flag; -+}; ++ if (!core) ++ return; + -+#define to_rockchip_ddrclk_hw(hw) container_of(hw, struct rockchip_ddrclk, hw) ++ if (count == 0) ++ return; + -+struct share_params_ddrclk { -+ u32 hz; -+ u32 lcdc_type; -+}; ++ clk_core_rate_protect(core); ++ core->protect_count = count; ++} + -+struct rockchip_ddrclk_data { -+ void __iomem *params; -+ int (*dmcfreq_wait_complete)(void); -+}; ++/** ++ * clk_rate_exclusive_get - get exclusivity over the clk rate control ++ * @clk: the clk over which the exclusity of rate control is requested ++ * ++ * clk_rate_exclusive_get() begins a critical section during which a clock ++ * consumer cannot tolerate any other consumer making any operation on the ++ * clock which could result in a rate change or rate glitch. Exclusive clocks ++ * cannot have their rate changed, either directly or indirectly due to changes ++ * further up the parent chain of clocks. As a result, clocks up parent chain ++ * also get under exclusive control of the calling consumer. ++ * ++ * If exlusivity is claimed more than once on clock, even by the same consumer, ++ * the rate effectively gets locked as exclusivity can't be preempted. ++ * ++ * Calls to clk_rate_exclusive_get() should be balanced with calls to ++ * clk_rate_exclusive_put(). Calls to this function may sleep. ++ * Returns 0 on success, -EERROR otherwise ++ */ ++int clk_rate_exclusive_get(struct clk *clk) ++{ ++ if (!clk) ++ return 0; + -+static struct rockchip_ddrclk_data ddr_data = {NULL, NULL}; ++ clk_prepare_lock(); ++ clk_core_rate_protect(clk->core); ++ clk->exclusive_count++; ++ clk_prepare_unlock(); + -+void rockchip_set_ddrclk_params(void __iomem *params) -+{ -+ ddr_data.params = params; ++ return 0; +} -+EXPORT_SYMBOL(rockchip_set_ddrclk_params); ++EXPORT_SYMBOL_GPL(clk_rate_exclusive_get); + -+void rockchip_set_ddrclk_dmcfreq_wait_complete(int (*func)(void)) ++static void clk_core_unprepare(struct clk_core *core) +{ -+ ddr_data.dmcfreq_wait_complete = func; -+} -+EXPORT_SYMBOL(rockchip_set_ddrclk_dmcfreq_wait_complete); ++ lockdep_assert_held(&prepare_lock); + -+static int rockchip_ddrclk_sip_set_rate(struct clk_hw *hw, unsigned long drate, -+ unsigned long prate) -+{ -+ struct arm_smccc_res res; ++ if (!core) ++ return; + -+ arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, drate, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_SET_RATE, -+ 0, 0, 0, 0, &res); ++ if (WARN(core->prepare_count == 0, ++ "%s already unprepared\n", core->name)) ++ return; + -+ if (res.a0) -+ return 0; -+ else -+ return -EPERM; -+} ++ if (WARN(core->prepare_count == 1 && core->flags & CLK_IS_CRITICAL, ++ "Unpreparing critical %s\n", core->name)) ++ return; + -+static unsigned long -+rockchip_ddrclk_sip_recalc_rate(struct clk_hw *hw, -+ unsigned long parent_rate) -+{ -+ struct arm_smccc_res res; ++ if (core->flags & CLK_SET_RATE_GATE) ++ clk_core_rate_unprotect(core); + -+ arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_GET_RATE, -+ 0, 0, 0, 0, &res); ++ if (--core->prepare_count > 0) ++ return; + -+ return res.a0; ++ WARN(core->enable_count > 0, "Unpreparing enabled %s\n", core->name); ++ ++ trace_clk_unprepare(core); ++ ++ if (core->ops->unprepare) ++ core->ops->unprepare(core->hw); ++ ++ trace_clk_unprepare_complete(core); ++ clk_core_unprepare(core->parent); ++ clk_pm_runtime_put(core); +} + -+static long rockchip_ddrclk_sip_round_rate(struct clk_hw *hw, -+ unsigned long rate, -+ unsigned long *prate) ++static void clk_core_unprepare_lock(struct clk_core *core) +{ -+ struct arm_smccc_res res; ++ clk_prepare_lock(); ++ clk_core_unprepare(core); ++ clk_prepare_unlock(); ++} + -+ arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, rate, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_ROUND_RATE, -+ 0, 0, 0, 0, &res); ++/** ++ * clk_unprepare - undo preparation of a clock source ++ * @clk: the clk being unprepared ++ * ++ * clk_unprepare may sleep, which differentiates it from clk_disable. In a ++ * simple case, clk_unprepare can be used instead of clk_disable to gate a clk ++ * if the operation may sleep. One example is a clk which is accessed over ++ * I2c. In the complex case a clk gate operation may require a fast and a slow ++ * part. It is this reason that clk_unprepare and clk_disable are not mutually ++ * exclusive. In fact clk_disable must be called before clk_unprepare. ++ */ ++void clk_unprepare(struct clk *clk) ++{ ++ if (IS_ERR_OR_NULL(clk)) ++ return; + -+ return res.a0; ++ clk_core_unprepare_lock(clk->core); +} ++EXPORT_SYMBOL_GPL(clk_unprepare); + -+static u8 rockchip_ddrclk_get_parent(struct clk_hw *hw) ++static int clk_core_prepare(struct clk_core *core) +{ -+ struct rockchip_ddrclk *ddrclk = to_rockchip_ddrclk_hw(hw); -+ u32 val; ++ int ret = 0; + -+ val = readl(ddrclk->reg_base + -+ ddrclk->mux_offset) >> ddrclk->mux_shift; -+ val &= GENMASK(ddrclk->mux_width - 1, 0); ++ lockdep_assert_held(&prepare_lock); + -+ return val; -+} ++ if (!core) ++ return 0; + -+static const struct clk_ops rockchip_ddrclk_sip_ops = { -+ .recalc_rate = rockchip_ddrclk_sip_recalc_rate, -+ .set_rate = rockchip_ddrclk_sip_set_rate, -+ .round_rate = rockchip_ddrclk_sip_round_rate, -+ .get_parent = rockchip_ddrclk_get_parent, -+}; ++ if (core->prepare_count == 0) { ++ ret = clk_pm_runtime_get(core); ++ if (ret) ++ return ret; + -+static int rockchip_ddrclk_sip_set_rate_v2(struct clk_hw *hw, -+ unsigned long drate, -+ unsigned long prate) -+{ -+ struct share_params_ddrclk *p; -+ struct arm_smccc_res res; ++ ret = clk_core_prepare(core->parent); ++ if (ret) ++ goto runtime_put; + -+ p = (struct share_params_ddrclk *)ddr_data.params; -+ if (p) -+ p->hz = drate; ++ trace_clk_prepare(core); + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_SET_RATE); ++ if (core->ops->prepare) ++ ret = core->ops->prepare(core->hw); + -+ if ((int)res.a1 == SIP_RET_SET_RATE_TIMEOUT) { -+ if (ddr_data.dmcfreq_wait_complete) -+ ddr_data.dmcfreq_wait_complete(); ++ trace_clk_prepare_complete(core); ++ ++ if (ret) ++ goto unprepare; + } + -+ return res.a0; -+} ++ core->prepare_count++; + -+static unsigned long rockchip_ddrclk_sip_recalc_rate_v2 -+ (struct clk_hw *hw, unsigned long parent_rate) -+{ -+ struct arm_smccc_res res; ++ /* ++ * CLK_SET_RATE_GATE is a special case of clock protection ++ * Instead of a consumer claiming exclusive rate control, it is ++ * actually the provider which prevents any consumer from making any ++ * operation which could result in a rate change or rate glitch while ++ * the clock is prepared. ++ */ ++ if (core->flags & CLK_SET_RATE_GATE) ++ clk_core_rate_protect(core); + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_GET_RATE); -+ if (!res.a0) -+ return res.a1; -+ else -+ return 0; ++ return 0; ++unprepare: ++ clk_core_unprepare(core->parent); ++runtime_put: ++ clk_pm_runtime_put(core); ++ return ret; +} + -+static long rockchip_ddrclk_sip_round_rate_v2(struct clk_hw *hw, -+ unsigned long rate, -+ unsigned long *prate) ++static int clk_core_prepare_lock(struct clk_core *core) +{ -+ struct share_params_ddrclk *p; -+ struct arm_smccc_res res; ++ int ret; + -+ p = (struct share_params_ddrclk *)ddr_data.params; -+ if (p) -+ p->hz = rate; ++ clk_prepare_lock(); ++ ret = clk_core_prepare(core); ++ clk_prepare_unlock(); + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_ROUND_RATE); -+ if (!res.a0) -+ return res.a1; -+ else -+ return 0; ++ return ret; +} + -+static const struct clk_ops rockchip_ddrclk_sip_ops_v2 = { -+ .recalc_rate = rockchip_ddrclk_sip_recalc_rate_v2, -+ .set_rate = rockchip_ddrclk_sip_set_rate_v2, -+ .round_rate = rockchip_ddrclk_sip_round_rate_v2, -+ .get_parent = rockchip_ddrclk_get_parent, -+}; ++/** ++ * clk_prepare - prepare a clock source ++ * @clk: the clk being prepared ++ * ++ * clk_prepare may sleep, which differentiates it from clk_enable. In a simple ++ * case, clk_prepare can be used instead of clk_enable to ungate a clk if the ++ * operation may sleep. One example is a clk which is accessed over I2c. In ++ * the complex case a clk ungate operation may require a fast and a slow part. ++ * It is this reason that clk_prepare and clk_enable are not mutually ++ * exclusive. In fact clk_prepare must be called before clk_enable. ++ * Returns 0 on success, -EERROR otherwise. ++ */ ++int clk_prepare(struct clk *clk) ++{ ++ if (!clk) ++ return 0; + -+struct clk *rockchip_clk_register_ddrclk(const char *name, int flags, -+ const char *const *parent_names, -+ u8 num_parents, int mux_offset, -+ int mux_shift, int mux_width, -+ int div_shift, int div_width, -+ int ddr_flag, void __iomem *reg_base) ++ return clk_core_prepare_lock(clk->core); ++} ++EXPORT_SYMBOL_GPL(clk_prepare); ++ ++static void clk_core_disable(struct clk_core *core) +{ -+ struct rockchip_ddrclk *ddrclk; -+ struct clk_init_data init; -+ struct clk *clk; ++ lockdep_assert_held(&enable_lock); + -+#ifdef CONFIG_ARM -+ if (!psci_smp_available()) -+ return NULL; -+#endif ++ if (!core) ++ return; + -+ ddrclk = kzalloc(sizeof(*ddrclk), GFP_KERNEL); -+ if (!ddrclk) -+ return ERR_PTR(-ENOMEM); ++ if (WARN(core->enable_count == 0, "%s already disabled\n", core->name)) ++ return; + -+ init.name = name; -+ init.parent_names = parent_names; -+ init.num_parents = num_parents; ++ if (WARN(core->enable_count == 1 && core->flags & CLK_IS_CRITICAL, ++ "Disabling critical %s\n", core->name)) ++ return; + -+ init.flags = flags; -+ init.flags |= CLK_SET_RATE_NO_REPARENT; ++ if (--core->enable_count > 0) ++ return; + -+ switch (ddr_flag) { -+#ifdef CONFIG_ROCKCHIP_DDRCLK_SIP -+ case ROCKCHIP_DDRCLK_SIP: -+ init.ops = &rockchip_ddrclk_sip_ops; -+ break; -+#endif -+#ifdef CONFIG_ROCKCHIP_DDRCLK_SIP_V2 -+ case ROCKCHIP_DDRCLK_SIP_V2: -+ init.ops = &rockchip_ddrclk_sip_ops_v2; -+ break; -+#endif -+ default: -+ pr_err("%s: unsupported ddrclk type %d\n", __func__, ddr_flag); -+ kfree(ddrclk); -+ return ERR_PTR(-EINVAL); -+ } ++ trace_clk_disable(core); + -+ ddrclk->reg_base = reg_base; -+ ddrclk->hw.init = &init; -+ ddrclk->mux_offset = mux_offset; -+ ddrclk->mux_shift = mux_shift; -+ ddrclk->mux_width = mux_width; -+ ddrclk->div_shift = div_shift; -+ ddrclk->div_width = div_width; -+ ddrclk->ddr_flag = ddr_flag; ++ if (core->ops->disable) ++ core->ops->disable(core->hw); + -+ clk = clk_register(NULL, &ddrclk->hw); -+ if (IS_ERR(clk)) -+ kfree(ddrclk); ++ trace_clk_disable_complete(core); + -+ return clk; ++ clk_core_disable(core->parent); +} -+EXPORT_SYMBOL_GPL(rockchip_clk_register_ddrclk); -diff --git a/drivers/clk/rockchip-oh/clk-half-divider.c b/drivers/clk/rockchip-oh/clk-half-divider.c -new file mode 100644 -index 000000000..9e0d0fc00 ---- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-half-divider.c -@@ -0,0 +1,224 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (c) 2018 Fuzhou Rockchip Electronics Co., Ltd -+ */ + -+#include -+#include -+#include -+#include "clk.h" ++static void clk_core_disable_lock(struct clk_core *core) ++{ ++ unsigned long flags; + -+#define div_mask(width) ((1 << (width)) - 1) ++ flags = clk_enable_lock(); ++ clk_core_disable(core); ++ clk_enable_unlock(flags); ++} + -+static bool _is_best_half_div(unsigned long rate, unsigned long now, -+ unsigned long best, unsigned long flags) ++/** ++ * clk_disable - gate a clock ++ * @clk: the clk being gated ++ * ++ * clk_disable must not sleep, which differentiates it from clk_unprepare. In ++ * a simple case, clk_disable can be used instead of clk_unprepare to gate a ++ * clk if the operation is fast and will never sleep. One example is a ++ * SoC-internal clk which is controlled via simple register writes. In the ++ * complex case a clk gate operation may require a fast and a slow part. It is ++ * this reason that clk_unprepare and clk_disable are not mutually exclusive. ++ * In fact clk_disable must be called before clk_unprepare. ++ */ ++void clk_disable(struct clk *clk) +{ -+ if (flags & CLK_DIVIDER_ROUND_CLOSEST) -+ return abs(rate - now) <= abs(rate - best); ++ if (IS_ERR_OR_NULL(clk)) ++ return; + -+ return now <= rate && now >= best; ++ clk_core_disable_lock(clk->core); +} ++EXPORT_SYMBOL_GPL(clk_disable); + -+static unsigned long clk_half_divider_recalc_rate(struct clk_hw *hw, -+ unsigned long parent_rate) ++static int clk_core_enable(struct clk_core *core) +{ -+ struct clk_divider *divider = to_clk_divider(hw); -+ unsigned int val; ++ int ret = 0; + -+ val = readl(divider->reg) >> divider->shift; -+ val &= div_mask(divider->width); -+ val = val * 2 + 3; ++ lockdep_assert_held(&enable_lock); + -+ return DIV_ROUND_UP_ULL(((u64)parent_rate * 2), val); -+} ++ if (!core) ++ return 0; + -+static int clk_half_divider_bestdiv(struct clk_hw *hw, unsigned long rate, -+ unsigned long *best_parent_rate, u8 width, -+ unsigned long flags) -+{ -+ unsigned int i, bestdiv = 0; -+ unsigned long parent_rate, best = 0, now, maxdiv; -+ bool is_bestdiv = false; ++ if (WARN(core->prepare_count == 0, ++ "Enabling unprepared %s\n", core->name)) ++ return -ESHUTDOWN; + -+ if (!rate) -+ rate = 1; ++ if (core->enable_count == 0) { ++ ret = clk_core_enable(core->parent); + -+ maxdiv = div_mask(width); ++ if (ret) ++ return ret; + -+ if (!(clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT)) { -+ parent_rate = *best_parent_rate; -+ bestdiv = DIV_ROUND_UP_ULL(((u64)parent_rate * 2), rate); -+ if (bestdiv < 3) -+ bestdiv = 0; -+ else -+ bestdiv = DIV_ROUND_UP(bestdiv - 3, 2); -+ bestdiv = bestdiv > maxdiv ? maxdiv : bestdiv; -+ return bestdiv; -+ } ++ trace_clk_enable(core); + -+ /* -+ * The maximum divider we can use without overflowing -+ * unsigned long in rate * i below -+ */ -+ maxdiv = min(ULONG_MAX / rate, maxdiv); ++ if (core->ops->enable) ++ ret = core->ops->enable(core->hw); + -+ for (i = 0; i <= maxdiv; i++) { -+ parent_rate = clk_hw_round_rate(clk_hw_get_parent(hw), -+ ((u64)rate * (i * 2 + 3)) / 2); -+ now = DIV_ROUND_UP_ULL(((u64)parent_rate * 2), -+ (i * 2 + 3)); ++ trace_clk_enable_complete(core); + -+ if (_is_best_half_div(rate, now, best, flags)) { -+ is_bestdiv = true; -+ bestdiv = i; -+ best = now; -+ *best_parent_rate = parent_rate; ++ if (ret) { ++ clk_core_disable(core->parent); ++ return ret; + } + } + -+ if (!is_bestdiv) { -+ bestdiv = div_mask(width); -+ *best_parent_rate = clk_hw_round_rate(clk_hw_get_parent(hw), 1); -+ } -+ -+ return bestdiv; ++ core->enable_count++; ++ return 0; +} + -+static long clk_half_divider_round_rate(struct clk_hw *hw, unsigned long rate, -+ unsigned long *prate) ++static int clk_core_enable_lock(struct clk_core *core) +{ -+ struct clk_divider *divider = to_clk_divider(hw); -+ int div; ++ unsigned long flags; ++ int ret; + -+ div = clk_half_divider_bestdiv(hw, rate, prate, -+ divider->width, -+ divider->flags); ++ flags = clk_enable_lock(); ++ ret = clk_core_enable(core); ++ clk_enable_unlock(flags); + -+ return DIV_ROUND_UP_ULL(((u64)*prate * 2), div * 2 + 3); ++ return ret; +} + -+static int clk_half_divider_set_rate(struct clk_hw *hw, unsigned long rate, -+ unsigned long parent_rate) ++/** ++ * clk_gate_restore_context - restore context for poweroff ++ * @hw: the clk_hw pointer of clock whose state is to be restored ++ * ++ * The clock gate restore context function enables or disables ++ * the gate clocks based on the enable_count. This is done in cases ++ * where the clock context is lost and based on the enable_count ++ * the clock either needs to be enabled/disabled. This ++ * helps restore the state of gate clocks. ++ */ ++void clk_gate_restore_context(struct clk_hw *hw) +{ -+ struct clk_divider *divider = to_clk_divider(hw); -+ unsigned int value; -+ unsigned long flags = 0; -+ u32 val; ++ struct clk_core *core = hw->core; + -+ value = DIV_ROUND_UP_ULL(((u64)parent_rate * 2), rate); -+ value = DIV_ROUND_UP(value - 3, 2); -+ value = min_t(unsigned int, value, div_mask(divider->width)); -+ -+ if (divider->lock) -+ spin_lock_irqsave(divider->lock, flags); ++ if (core->enable_count) ++ core->ops->enable(hw); + else -+ __acquire(divider->lock); ++ core->ops->disable(hw); ++} ++EXPORT_SYMBOL_GPL(clk_gate_restore_context); + -+ if (divider->flags & CLK_DIVIDER_HIWORD_MASK) { -+ val = div_mask(divider->width) << (divider->shift + 16); -+ } else { -+ val = readl(divider->reg); -+ val &= ~(div_mask(divider->width) << divider->shift); ++static int clk_core_save_context(struct clk_core *core) ++{ ++ struct clk_core *child; ++ int ret = 0; ++ ++ hlist_for_each_entry(child, &core->children, child_node) { ++ ret = clk_core_save_context(child); ++ if (ret < 0) ++ return ret; + } -+ val |= value << divider->shift; -+ writel(val, divider->reg); + -+ if (divider->lock) -+ spin_unlock_irqrestore(divider->lock, flags); -+ else -+ __release(divider->lock); ++ if (core->ops && core->ops->save_context) ++ ret = core->ops->save_context(core->hw); + -+ return 0; ++ return ret; +} + -+static const struct clk_ops clk_half_divider_ops = { -+ .recalc_rate = clk_half_divider_recalc_rate, -+ .round_rate = clk_half_divider_round_rate, -+ .set_rate = clk_half_divider_set_rate, -+}; ++static void clk_core_restore_context(struct clk_core *core) ++{ ++ struct clk_core *child; + -+/* -+ * Register a clock branch. -+ * Most clock branches have a form like -+ * -+ * src1 --|--\ -+ * |M |--[GATE]-[DIV]- -+ * src2 --|--/ ++ if (core->ops && core->ops->restore_context) ++ core->ops->restore_context(core->hw); ++ ++ hlist_for_each_entry(child, &core->children, child_node) ++ clk_core_restore_context(child); ++} ++ ++/** ++ * clk_save_context - save clock context for poweroff + * -+ * sometimes without one of those components. ++ * Saves the context of the clock register for powerstates in which the ++ * contents of the registers will be lost. Occurs deep within the suspend ++ * code. Returns 0 on success. + */ -+struct clk *rockchip_clk_register_halfdiv(const char *name, -+ const char *const *parent_names, -+ u8 num_parents, void __iomem *base, -+ int muxdiv_offset, u8 mux_shift, -+ u8 mux_width, u8 mux_flags, -+ int div_offset, u8 div_shift, -+ u8 div_width, u8 div_flags, -+ int gate_offset, u8 gate_shift, -+ u8 gate_flags, unsigned long flags, -+ spinlock_t *lock) ++int clk_save_context(void) +{ -+ struct clk_hw *hw = ERR_PTR(-ENOMEM); -+ struct clk_mux *mux = NULL; -+ struct clk_gate *gate = NULL; -+ struct clk_divider *div = NULL; -+ const struct clk_ops *mux_ops = NULL, *div_ops = NULL, -+ *gate_ops = NULL; ++ struct clk_core *clk; ++ int ret; + -+ if (num_parents > 1) { -+ mux = kzalloc(sizeof(*mux), GFP_KERNEL); -+ if (!mux) -+ return ERR_PTR(-ENOMEM); ++ hlist_for_each_entry(clk, &clk_root_list, child_node) { ++ ret = clk_core_save_context(clk); ++ if (ret < 0) ++ return ret; ++ } + -+ mux->reg = base + muxdiv_offset; -+ mux->shift = mux_shift; -+ mux->mask = BIT(mux_width) - 1; -+ mux->flags = mux_flags; -+ mux->lock = lock; -+ mux_ops = (mux_flags & CLK_MUX_READ_ONLY) ? &clk_mux_ro_ops -+ : &clk_mux_ops; ++ hlist_for_each_entry(clk, &clk_orphan_list, child_node) { ++ ret = clk_core_save_context(clk); ++ if (ret < 0) ++ return ret; + } + -+ if (gate_offset >= 0) { -+ gate = kzalloc(sizeof(*gate), GFP_KERNEL); -+ if (!gate) -+ goto err_gate; ++ return 0; ++} ++EXPORT_SYMBOL_GPL(clk_save_context); + -+ gate->flags = gate_flags; -+ gate->reg = base + gate_offset; -+ gate->bit_idx = gate_shift; -+ gate->lock = lock; -+ gate_ops = &clk_gate_ops; -+ } ++/** ++ * clk_restore_context - restore clock context after poweroff ++ * ++ * Restore the saved clock context upon resume. ++ * ++ */ ++void clk_restore_context(void) ++{ ++ struct clk_core *core; + -+ if (div_width > 0) { -+ div = kzalloc(sizeof(*div), GFP_KERNEL); -+ if (!div) -+ goto err_div; ++ hlist_for_each_entry(core, &clk_root_list, child_node) ++ clk_core_restore_context(core); + -+ div->flags = div_flags; -+ if (div_offset) -+ div->reg = base + div_offset; -+ else -+ div->reg = base + muxdiv_offset; -+ div->shift = div_shift; -+ div->width = div_width; -+ div->lock = lock; -+ div_ops = &clk_half_divider_ops; -+ } ++ hlist_for_each_entry(core, &clk_orphan_list, child_node) ++ clk_core_restore_context(core); ++} ++EXPORT_SYMBOL_GPL(clk_restore_context); + -+ hw = clk_hw_register_composite(NULL, name, parent_names, num_parents, -+ mux ? &mux->hw : NULL, mux_ops, -+ div ? &div->hw : NULL, div_ops, -+ gate ? &gate->hw : NULL, gate_ops, -+ flags); -+ if (IS_ERR(hw)) -+ goto err_div; ++/** ++ * clk_enable - ungate a clock ++ * @clk: the clk being ungated ++ * ++ * clk_enable must not sleep, which differentiates it from clk_prepare. In a ++ * simple case, clk_enable can be used instead of clk_prepare to ungate a clk ++ * if the operation will never sleep. One example is a SoC-internal clk which ++ * is controlled via simple register writes. In the complex case a clk ungate ++ * operation may require a fast and a slow part. It is this reason that ++ * clk_enable and clk_prepare are not mutually exclusive. In fact clk_prepare ++ * must be called before clk_enable. Returns 0 on success, -EERROR ++ * otherwise. ++ */ ++int clk_enable(struct clk *clk) ++{ ++ if (!clk) ++ return 0; + -+ return hw->clk; -+err_div: -+ kfree(gate); -+err_gate: -+ kfree(mux); -+ return ERR_CAST(hw); ++ return clk_core_enable_lock(clk->core); +} -diff --git a/drivers/clk/rockchip-oh/clk-inverter.c b/drivers/clk/rockchip-oh/clk-inverter.c -new file mode 100644 -index 000000000..5dfbdce18 ---- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-inverter.c -@@ -0,0 +1,103 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright 2015 Heiko Stuebner ++EXPORT_SYMBOL_GPL(clk_enable); ++ ++/** ++ * clk_is_enabled_when_prepared - indicate if preparing a clock also enables it. ++ * @clk: clock source ++ * ++ * Returns true if clk_prepare() implicitly enables the clock, effectively ++ * making clk_enable()/clk_disable() no-ops, false otherwise. ++ * ++ * This is of interest mainly to power management code where actually ++ * disabling the clock also requires unpreparing it to have any material ++ * effect. ++ * ++ * Regardless of the value returned here, the caller must always invoke ++ * clk_enable() or clk_prepare_enable() and counterparts for usage counts ++ * to be right. + */ ++bool clk_is_enabled_when_prepared(struct clk *clk) ++{ ++ return clk && !(clk->core->ops->enable && clk->core->ops->disable); ++} ++EXPORT_SYMBOL_GPL(clk_is_enabled_when_prepared); + -+#include -+#include -+#include -+#include -+#include -+#include "clk.h" ++static int clk_core_prepare_enable(struct clk_core *core) ++{ ++ int ret; + -+struct rockchip_inv_clock { -+ struct clk_hw hw; -+ void __iomem *reg; -+ int shift; -+ int flags; -+ spinlock_t *lock; -+}; ++ ret = clk_core_prepare_lock(core); ++ if (ret) ++ return ret; + -+#define to_inv_clock(_hw) container_of(_hw, struct rockchip_inv_clock, hw) ++ ret = clk_core_enable_lock(core); ++ if (ret) ++ clk_core_unprepare_lock(core); + -+#define INVERTER_MASK 0x1 ++ return ret; ++} + -+static int rockchip_inv_get_phase(struct clk_hw *hw) ++static void clk_core_disable_unprepare(struct clk_core *core) +{ -+ struct rockchip_inv_clock *inv_clock = to_inv_clock(hw); -+ u32 val; -+ -+ val = readl(inv_clock->reg) >> inv_clock->shift; -+ val &= INVERTER_MASK; -+ return val ? 180 : 0; ++ clk_core_disable_lock(core); ++ clk_core_unprepare_lock(core); +} + -+static int rockchip_inv_set_phase(struct clk_hw *hw, int degrees) ++static void __init clk_unprepare_unused_subtree(struct clk_core *core) +{ -+ struct rockchip_inv_clock *inv_clock = to_inv_clock(hw); -+ u32 val; ++ struct clk_core *child; + -+ if (degrees % 180 == 0) { -+ val = !!degrees; -+ } else { -+ pr_err("%s: unsupported phase %d for %s\n", -+ __func__, degrees, clk_hw_get_name(hw)); -+ return -EINVAL; ++ lockdep_assert_held(&prepare_lock); ++ ++ hlist_for_each_entry(child, &core->children, child_node) ++ clk_unprepare_unused_subtree(child); ++ ++ if (core->prepare_count) ++ return; ++ ++ if (core->flags & CLK_IGNORE_UNUSED) ++ return; ++ ++ if (clk_core_is_prepared(core)) { ++ trace_clk_unprepare(core); ++ if (core->ops->unprepare_unused) ++ core->ops->unprepare_unused(core->hw); ++ else if (core->ops->unprepare) ++ core->ops->unprepare(core->hw); ++ trace_clk_unprepare_complete(core); + } ++} + -+ if (inv_clock->flags & ROCKCHIP_INVERTER_HIWORD_MASK) { -+ writel(HIWORD_UPDATE(val, INVERTER_MASK, inv_clock->shift), -+ inv_clock->reg); -+ } else { -+ unsigned long flags; -+ u32 reg; ++static void __init clk_disable_unused_subtree(struct clk_core *core) ++{ ++ struct clk_core *child; ++ unsigned long flags; + -+ spin_lock_irqsave(inv_clock->lock, flags); ++ lockdep_assert_held(&prepare_lock); + -+ reg = readl(inv_clock->reg); -+ reg &= ~BIT(inv_clock->shift); -+ reg |= val; -+ writel(reg, inv_clock->reg); ++ hlist_for_each_entry(child, &core->children, child_node) ++ clk_disable_unused_subtree(child); + -+ spin_unlock_irqrestore(inv_clock->lock, flags); ++ if (core->flags & CLK_OPS_PARENT_ENABLE) ++ clk_core_prepare_enable(core->parent); ++ ++ flags = clk_enable_lock(); ++ ++ if (core->enable_count) ++ goto unlock_out; ++ ++ if (core->flags & CLK_IGNORE_UNUSED) ++ goto unlock_out; ++ ++ /* ++ * some gate clocks have special needs during the disable-unused ++ * sequence. call .disable_unused if available, otherwise fall ++ * back to .disable ++ */ ++ if (clk_core_is_enabled(core)) { ++ trace_clk_disable(core); ++ if (core->ops->disable_unused) ++ core->ops->disable_unused(core->hw); ++ else if (core->ops->disable) ++ core->ops->disable(core->hw); ++ trace_clk_disable_complete(core); + } + -+ return 0; ++unlock_out: ++ clk_enable_unlock(flags); ++ if (core->flags & CLK_OPS_PARENT_ENABLE) ++ clk_core_disable_unprepare(core->parent); +} + -+static const struct clk_ops rockchip_inv_clk_ops = { -+ .get_phase = rockchip_inv_get_phase, -+ .set_phase = rockchip_inv_set_phase, -+}; -+ -+struct clk *rockchip_clk_register_inverter(const char *name, -+ const char *const *parent_names, u8 num_parents, -+ void __iomem *reg, int shift, int flags, -+ spinlock_t *lock) ++static bool clk_ignore_unused __initdata; ++static int __init clk_ignore_unused_setup(char *__unused) +{ -+ struct clk_init_data init; -+ struct rockchip_inv_clock *inv_clock; -+ struct clk *clk; ++ clk_ignore_unused = true; ++ return 1; ++} ++__setup("clk_ignore_unused", clk_ignore_unused_setup); + -+ inv_clock = kmalloc(sizeof(*inv_clock), GFP_KERNEL); -+ if (!inv_clock) -+ return ERR_PTR(-ENOMEM); ++static int __init clk_disable_unused(void) ++{ ++ struct clk_core *core; ++ int ret; + -+ init.name = name; -+ init.num_parents = num_parents; -+ init.flags = CLK_SET_RATE_PARENT; -+ init.parent_names = parent_names; -+ init.ops = &rockchip_inv_clk_ops; ++ if (clk_ignore_unused) { ++ pr_warn("clk: Not disabling unused clocks\n"); ++ return 0; ++ } + -+ inv_clock->hw.init = &init; -+ inv_clock->reg = reg; -+ inv_clock->shift = shift; -+ inv_clock->flags = flags; -+ inv_clock->lock = lock; ++ pr_info("clk: Disabling unused clocks\n"); + -+ clk = clk_register(NULL, &inv_clock->hw); -+ if (IS_ERR(clk)) -+ kfree(inv_clock); ++ ret = clk_pm_runtime_get_all(); ++ if (ret) ++ return ret; ++ /* ++ * Grab the prepare lock to keep the clk topology stable while iterating ++ * over clks. ++ */ ++ clk_prepare_lock(); + -+ return clk; -+} -diff --git a/drivers/clk/rockchip-oh/clk-link.c b/drivers/clk/rockchip-oh/clk-link.c -new file mode 100644 -index 000000000..78ff9b53c ---- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-link.c -@@ -0,0 +1,244 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (c) 2021 Rockchip Electronics Co., Ltd -+ */ ++ hlist_for_each_entry(core, &clk_root_list, child_node) ++ clk_disable_unused_subtree(core); + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ hlist_for_each_entry(core, &clk_orphan_list, child_node) ++ clk_disable_unused_subtree(core); + -+struct rockchip_link_info { -+ u32 shift; -+ const char *name; -+ const char *pname; -+}; ++ hlist_for_each_entry(core, &clk_root_list, child_node) ++ clk_unprepare_unused_subtree(core); + -+struct rockchip_link { -+ int num; -+ const struct rockchip_link_info *info; -+}; ++ hlist_for_each_entry(core, &clk_orphan_list, child_node) ++ clk_unprepare_unused_subtree(core); + -+struct rockchip_link_clk { -+ void __iomem *base; -+ struct clk_gate *gate; -+ spinlock_t lock; -+ u32 shift; -+ u32 flag; -+ const char *name; -+ const char *pname; -+ const char *link_name; -+ const struct rockchip_link *link; -+}; ++ clk_prepare_unlock(); + -+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) ++ clk_pm_runtime_put_all(); + -+#define GATE_LINK(_name, _pname, _shift) \ -+{ \ -+ .name = _name, \ -+ .pname = _pname, \ -+ .shift = (_shift), \ ++ return 0; +} ++late_initcall_sync(clk_disable_unused); + -+static int register_clocks(struct rockchip_link_clk *priv, struct device *dev) ++static int clk_core_determine_round_nolock(struct clk_core *core, ++ struct clk_rate_request *req) +{ -+ struct clk_gate *gate; -+ struct clk_init_data init = {}; -+ struct clk *clk; ++ long rate; + -+ gate = devm_kzalloc(dev, sizeof(struct clk_gate), GFP_KERNEL); -+ if (!gate) -+ return -ENOMEM; ++ lockdep_assert_held(&prepare_lock); + -+ init.name = priv->name; -+ init.ops = &clk_gate_ops; -+ init.flags |= CLK_SET_RATE_PARENT; -+ init.parent_names = &priv->pname; -+ init.num_parents = 1; ++ if (!core) ++ return 0; + -+ /* struct clk_gate assignments */ -+ gate->reg = priv->base; -+ gate->bit_idx = priv->shift; -+ gate->flags = GFLAGS; -+ gate->lock = &priv->lock; -+ gate->hw.init = &init; ++ /* ++ * Some clock providers hand-craft their clk_rate_requests and ++ * might not fill min_rate and max_rate. ++ * ++ * If it's the case, clamping the rate is equivalent to setting ++ * the rate to 0 which is bad. Skip the clamping but complain so ++ * that it gets fixed, hopefully. ++ */ ++ if (!req->min_rate && !req->max_rate) ++ pr_warn("%s: %s: clk_rate_request has initialized min or max rate.\n", ++ __func__, core->name); ++ else ++ req->rate = clamp(req->rate, req->min_rate, req->max_rate); + -+ clk = devm_clk_register(dev, &gate->hw); -+ if (IS_ERR(clk)) ++ /* ++ * At this point, core protection will be disabled ++ * - if the provider is not protected at all ++ * - if the calling consumer is the only one which has exclusivity ++ * over the provider ++ */ ++ if (clk_core_rate_is_protected(core)) { ++ req->rate = core->rate; ++ } else if (core->ops->determine_rate) { ++ return core->ops->determine_rate(core->hw, req); ++ } else if (core->ops->round_rate) { ++ rate = core->ops->round_rate(core->hw, req->rate, ++ &req->best_parent_rate); ++ if (rate < 0) ++ return rate; ++ ++ req->rate = rate; ++ } else { + return -EINVAL; ++ } + -+ return of_clk_add_provider(dev->of_node, of_clk_src_simple_get, clk); ++ return 0; +} + -+static const struct rockchip_link_info rk3562_clk_gate_link_info[] = { -+ GATE_LINK("aclk_rga_jdec", "aclk_rga_pre", 3), -+ GATE_LINK("aclk_vdpu", "aclk_vdpu_pre", 5), -+ GATE_LINK("aclk_vepu", "aclk_vepu_pre", 3), -+ GATE_LINK("aclk_vi_isp", "aclk_vi", 3), -+ GATE_LINK("aclk_vo", "aclk_vo_pre", 3), -+ GATE_LINK("hclk_vepu", "hclk_vepu_pre", 4), -+}; -+ -+static const struct rockchip_link rk3562_clk_gate_link = { -+ .num = ARRAY_SIZE(rk3562_clk_gate_link_info), -+ .info = rk3562_clk_gate_link_info, -+}; ++static void clk_core_init_rate_req(struct clk_core * const core, ++ struct clk_rate_request *req, ++ unsigned long rate) ++{ ++ struct clk_core *parent; + -+static const struct rockchip_link_info rk3588_clk_gate_link_info[] = { -+ GATE_LINK("aclk_isp1_pre", "aclk_isp1_root", 6), -+ GATE_LINK("hclk_isp1_pre", "hclk_isp1_root", 8), -+ GATE_LINK("hclk_nvm", "hclk_nvm_root", 2), -+ GATE_LINK("aclk_usb", "aclk_usb_root", 2), -+ GATE_LINK("hclk_usb", "hclk_usb_root", 3), -+ GATE_LINK("aclk_jpeg_decoder_pre", "aclk_jpeg_decoder_root", 7), -+ GATE_LINK("aclk_vdpu_low_pre", "aclk_vdpu_low_root", 5), -+ GATE_LINK("aclk_rkvenc1_pre", "aclk_rkvenc1_root", 3), -+ GATE_LINK("hclk_rkvenc1_pre", "hclk_rkvenc1_root", 2), -+ GATE_LINK("hclk_rkvdec0_pre", "hclk_rkvdec0_root", 5), -+ GATE_LINK("aclk_rkvdec0_pre", "aclk_rkvdec0_root", 6), -+ GATE_LINK("hclk_rkvdec1_pre", "hclk_rkvdec1_root", 4), -+ GATE_LINK("aclk_rkvdec1_pre", "aclk_rkvdec1_root", 5), -+ GATE_LINK("aclk_hdcp0_pre", "aclk_vo0_root", 9), -+ GATE_LINK("hclk_vo0", "hclk_vo0_root", 5), -+ GATE_LINK("aclk_hdcp1_pre", "aclk_hdcp1_root", 6), -+ GATE_LINK("hclk_vo1", "hclk_vo1_root", 9), -+ GATE_LINK("aclk_av1_pre", "aclk_av1_root", 1), -+ GATE_LINK("pclk_av1_pre", "pclk_av1_root", 4), -+ GATE_LINK("hclk_sdio_pre", "hclk_sdio_root", 1), -+ GATE_LINK("pclk_vo0_grf", "pclk_vo0_root", 10), -+ GATE_LINK("pclk_vo1_grf", "pclk_vo1_root", 12), -+}; ++ if (WARN_ON(!req)) ++ return; + -+static const struct rockchip_link rk3588_clk_gate_link = { -+ .num = ARRAY_SIZE(rk3588_clk_gate_link_info), -+ .info = rk3588_clk_gate_link_info, -+}; ++ memset(req, 0, sizeof(*req)); ++ req->max_rate = ULONG_MAX; + -+static const struct of_device_id rockchip_clk_link_of_match[] = { -+ { -+ .compatible = "rockchip,rk3562-clock-gate-link", -+ .data = (void *)&rk3562_clk_gate_link, -+ }, -+ { -+ .compatible = "rockchip,rk3588-clock-gate-link", -+ .data = (void *)&rk3588_clk_gate_link, -+ }, -+ {} -+}; -+MODULE_DEVICE_TABLE(of, rockchip_clk_link_of_match); ++ if (!core) ++ return; + -+static const struct rockchip_link_info * -+rockchip_get_link_infos(const struct rockchip_link *link, const char *name) -+{ -+ const struct rockchip_link_info *info = link->info; -+ int i = 0; ++ req->core = core; ++ req->rate = rate; ++ clk_core_get_boundaries(core, &req->min_rate, &req->max_rate); + -+ for (i = 0; i < link->num; i++) { -+ if (strcmp(info->name, name) == 0) -+ break; -+ info++; ++ parent = core->parent; ++ if (parent) { ++ req->best_parent_hw = parent->hw; ++ req->best_parent_rate = parent->rate; ++ } else { ++ req->best_parent_hw = NULL; ++ req->best_parent_rate = 0; + } -+ return info; +} + -+static int rockchip_clk_link_probe(struct platform_device *pdev) ++/** ++ * clk_hw_init_rate_request - Initializes a clk_rate_request ++ * @hw: the clk for which we want to submit a rate request ++ * @req: the clk_rate_request structure we want to initialise ++ * @rate: the rate which is to be requested ++ * ++ * Initializes a clk_rate_request structure to submit to ++ * __clk_determine_rate() or similar functions. ++ */ ++void clk_hw_init_rate_request(const struct clk_hw *hw, ++ struct clk_rate_request *req, ++ unsigned long rate) +{ -+ struct rockchip_link_clk *priv; -+ struct device_node *node = pdev->dev.of_node; -+ const struct of_device_id *match; -+ const char *clk_name; -+ const struct rockchip_link_info *link_info; -+ int ret; ++ if (WARN_ON(!hw || !req)) ++ return; + -+ match = of_match_node(rockchip_clk_link_of_match, node); -+ if (!match) -+ return -ENXIO; ++ clk_core_init_rate_req(hw->core, req, rate); ++} ++EXPORT_SYMBOL_GPL(clk_hw_init_rate_request); + -+ priv = devm_kzalloc(&pdev->dev, sizeof(struct rockchip_link_clk), -+ GFP_KERNEL); -+ if (!priv) -+ return -ENOMEM; ++/** ++ * clk_hw_forward_rate_request - Forwards a clk_rate_request to a clock's parent ++ * @hw: the original clock that got the rate request ++ * @old_req: the original clk_rate_request structure we want to forward ++ * @parent: the clk we want to forward @old_req to ++ * @req: the clk_rate_request structure we want to initialise ++ * @parent_rate: The rate which is to be requested to @parent ++ * ++ * Initializes a clk_rate_request structure to submit to a clock parent ++ * in __clk_determine_rate() or similar functions. ++ */ ++void clk_hw_forward_rate_request(const struct clk_hw *hw, ++ const struct clk_rate_request *old_req, ++ const struct clk_hw *parent, ++ struct clk_rate_request *req, ++ unsigned long parent_rate) ++{ ++ if (WARN_ON(!hw || !old_req || !parent || !req)) ++ return; + -+ priv->link = match->data; ++ clk_core_forward_rate_req(hw->core, old_req, ++ parent->core, req, ++ parent_rate); ++} ++EXPORT_SYMBOL_GPL(clk_hw_forward_rate_request); + -+ spin_lock_init(&priv->lock); -+ platform_set_drvdata(pdev, priv); ++static bool clk_core_can_round(struct clk_core * const core) ++{ ++ return core->ops->determine_rate || core->ops->round_rate; ++} + -+ priv->base = of_iomap(node, 0); -+ if (IS_ERR(priv->base)) -+ return PTR_ERR(priv->base); ++static int clk_core_round_rate_nolock(struct clk_core *core, ++ struct clk_rate_request *req) ++{ ++ int ret; + -+ if (of_property_read_string(node, "clock-output-names", &clk_name)) -+ priv->name = node->name; -+ else -+ priv->name = clk_name; ++ lockdep_assert_held(&prepare_lock); + -+ link_info = rockchip_get_link_infos(priv->link, priv->name); -+ priv->shift = link_info->shift; -+ priv->pname = link_info->pname; ++ if (!core) { ++ req->rate = 0; ++ return 0; ++ } + -+ pm_runtime_enable(&pdev->dev); -+ ret = pm_clk_create(&pdev->dev); -+ if (ret) -+ goto disable_pm_runtime; ++ if (clk_core_can_round(core)) ++ return clk_core_determine_round_nolock(core, req); + -+ ret = pm_clk_add(&pdev->dev, "link"); ++ if (core->flags & CLK_SET_RATE_PARENT) { ++ struct clk_rate_request parent_req; + -+ if (ret) -+ goto destroy_pm_clk; ++ clk_core_forward_rate_req(core, req, core->parent, &parent_req, req->rate); + -+ ret = register_clocks(priv, &pdev->dev); -+ if (ret) -+ goto destroy_pm_clk; ++ trace_clk_rate_request_start(&parent_req); + -+ return 0; ++ ret = clk_core_round_rate_nolock(core->parent, &parent_req); ++ if (ret) ++ return ret; + -+destroy_pm_clk: -+ pm_clk_destroy(&pdev->dev); -+disable_pm_runtime: -+ pm_runtime_disable(&pdev->dev); ++ trace_clk_rate_request_done(&parent_req); + -+ return ret; -+} ++ req->best_parent_rate = parent_req.rate; ++ req->rate = parent_req.rate; + -+static int rockchip_clk_link_remove(struct platform_device *pdev) -+{ -+ pm_clk_destroy(&pdev->dev); -+ pm_runtime_disable(&pdev->dev); ++ return 0; ++ } + ++ req->rate = core->rate; + return 0; +} + -+static const struct dev_pm_ops rockchip_clk_link_pm_ops = { -+ SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL) -+}; -+ -+static struct platform_driver rockchip_clk_link_driver = { -+ .driver = { -+ .name = "clock-link", -+ .of_match_table = of_match_ptr(rockchip_clk_link_of_match), -+ .pm = &rockchip_clk_link_pm_ops, -+ }, -+ .probe = rockchip_clk_link_probe, -+ .remove = rockchip_clk_link_remove, -+}; -+ -+static int __init rockchip_clk_link_drv_register(void) ++/** ++ * __clk_determine_rate - get the closest rate actually supported by a clock ++ * @hw: determine the rate of this clock ++ * @req: target rate request ++ * ++ * Useful for clk_ops such as .set_rate and .determine_rate. ++ */ ++int __clk_determine_rate(struct clk_hw *hw, struct clk_rate_request *req) +{ -+ return platform_driver_register(&rockchip_clk_link_driver); -+} -+postcore_initcall_sync(rockchip_clk_link_drv_register); ++ if (!hw) { ++ req->rate = 0; ++ return 0; ++ } + -+static void __exit rockchip_clk_link_drv_unregister(void) -+{ -+ platform_driver_unregister(&rockchip_clk_link_driver); ++ return clk_core_round_rate_nolock(hw->core, req); +} -+module_exit(rockchip_clk_link_drv_unregister); ++EXPORT_SYMBOL_GPL(__clk_determine_rate); + -+MODULE_AUTHOR("Elaine Zhang "); -+MODULE_DESCRIPTION("Clock driver for Niu Dependencies"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/clk/rockchip-oh/clk-mmc-phase.c b/drivers/clk/rockchip-oh/clk-mmc-phase.c -new file mode 100644 -index 000000000..975454a3d ---- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-mmc-phase.c -@@ -0,0 +1,232 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright 2014 Google, Inc -+ * Author: Alexandru M Stan ++/** ++ * clk_hw_round_rate() - round the given rate for a hw clk ++ * @hw: the hw clk for which we are rounding a rate ++ * @rate: the rate which is to be rounded ++ * ++ * Takes in a rate as input and rounds it to a rate that the clk can actually ++ * use. ++ * ++ * Context: prepare_lock must be held. ++ * For clk providers to call from within clk_ops such as .round_rate, ++ * .determine_rate. ++ * ++ * Return: returns rounded rate of hw clk if clk supports round_rate operation ++ * else returns the parent rate. + */ ++unsigned long clk_hw_round_rate(struct clk_hw *hw, unsigned long rate) ++{ ++ int ret; ++ struct clk_rate_request req; + -+#include -+#include -+#include -+#include -+#include -+#include "clk.h" ++ clk_core_init_rate_req(hw->core, &req, rate); + -+struct rockchip_mmc_clock { -+ struct clk_hw hw; -+ void __iomem *reg; -+ int id; -+ int shift; -+ int cached_phase; -+ struct notifier_block clk_rate_change_nb; -+}; ++ trace_clk_rate_request_start(&req); + -+#define to_mmc_clock(_hw) container_of(_hw, struct rockchip_mmc_clock, hw) ++ ret = clk_core_round_rate_nolock(hw->core, &req); ++ if (ret) ++ return 0; + -+#define RK3288_MMC_CLKGEN_DIV 2 ++ trace_clk_rate_request_done(&req); + -+static unsigned long rockchip_mmc_recalc(struct clk_hw *hw, -+ unsigned long parent_rate) -+{ -+ return parent_rate / RK3288_MMC_CLKGEN_DIV; ++ return req.rate; +} ++EXPORT_SYMBOL_GPL(clk_hw_round_rate); + -+#define ROCKCHIP_MMC_DELAY_SEL BIT(10) -+#define ROCKCHIP_MMC_DEGREE_MASK 0x3 -+#define ROCKCHIP_MMC_DELAYNUM_OFFSET 2 -+#define ROCKCHIP_MMC_DELAYNUM_MASK (0xff << ROCKCHIP_MMC_DELAYNUM_OFFSET) -+ -+#define PSECS_PER_SEC 1000000000000LL -+ -+/* -+ * Each fine delay is between 44ps-77ps. Assume each fine delay is 60ps to -+ * simplify calculations. So 45degs could be anywhere between 33deg and 57.8deg. ++/** ++ * clk_round_rate - round the given rate for a clk ++ * @clk: the clk for which we are rounding a rate ++ * @rate: the rate which is to be rounded ++ * ++ * Takes in a rate as input and rounds it to a rate that the clk can actually ++ * use which is then returned. If clk doesn't support round_rate operation ++ * then the parent rate is returned. + */ -+#define ROCKCHIP_MMC_DELAY_ELEMENT_PSEC 60 -+ -+static int rockchip_mmc_get_phase(struct clk_hw *hw) ++long clk_round_rate(struct clk *clk, unsigned long rate) +{ -+ struct rockchip_mmc_clock *mmc_clock = to_mmc_clock(hw); -+ unsigned long rate = clk_hw_get_rate(hw); -+ u32 raw_value; -+ u16 degrees; -+ u32 delay_num = 0; ++ struct clk_rate_request req; ++ int ret; + -+ /* Constant signal, no measurable phase shift */ -+ if (!rate) ++ if (!clk) + return 0; + -+ raw_value = readl(mmc_clock->reg) >> (mmc_clock->shift); -+ -+ degrees = (raw_value & ROCKCHIP_MMC_DEGREE_MASK) * 90; -+ -+ if (raw_value & ROCKCHIP_MMC_DELAY_SEL) { -+ /* degrees/delaynum * 1000000 */ -+ unsigned long factor = (ROCKCHIP_MMC_DELAY_ELEMENT_PSEC / 10) * -+ 36 * (rate / 10000); -+ -+ delay_num = (raw_value & ROCKCHIP_MMC_DELAYNUM_MASK); -+ delay_num >>= ROCKCHIP_MMC_DELAYNUM_OFFSET; -+ degrees += DIV_ROUND_CLOSEST(delay_num * factor, 1000000); -+ } -+ -+ return degrees % 360; -+} ++ clk_prepare_lock(); + -+static int rockchip_mmc_set_phase(struct clk_hw *hw, int degrees) -+{ -+ struct rockchip_mmc_clock *mmc_clock = to_mmc_clock(hw); -+ unsigned long rate = clk_hw_get_rate(hw); -+ u8 nineties, remainder; -+ u8 delay_num; -+ u32 raw_value; -+ u32 delay; ++ if (clk->exclusive_count) ++ clk_core_rate_unprotect(clk->core); + -+ /* -+ * The below calculation is based on the output clock from -+ * MMC host to the card, which expects the phase clock inherits -+ * the clock rate from its parent, namely the output clock -+ * provider of MMC host. However, things may go wrong if -+ * (1) It is orphan. -+ * (2) It is assigned to the wrong parent. -+ * -+ * This check help debug the case (1), which seems to be the -+ * most likely problem we often face and which makes it difficult -+ * for people to debug unstable mmc tuning results. -+ */ -+ if (!rate) { -+ pr_err("%s: invalid clk rate\n", __func__); -+ return -EINVAL; -+ } ++ clk_core_init_rate_req(clk->core, &req, rate); + -+ nineties = degrees / 90; -+ remainder = (degrees % 90); ++ trace_clk_rate_request_start(&req); + -+ /* -+ * Due to the inexact nature of the "fine" delay, we might -+ * actually go non-monotonic. We don't go _too_ monotonic -+ * though, so we should be OK. Here are options of how we may -+ * work: -+ * -+ * Ideally we end up with: -+ * 1.0, 2.0, ..., 69.0, 70.0, ..., 89.0, 90.0 -+ * -+ * On one extreme (if delay is actually 44ps): -+ * .73, 1.5, ..., 50.6, 51.3, ..., 65.3, 90.0 -+ * The other (if delay is actually 77ps): -+ * 1.3, 2.6, ..., 88.6. 89.8, ..., 114.0, 90 -+ * -+ * It's possible we might make a delay that is up to 25 -+ * degrees off from what we think we're making. That's OK -+ * though because we should be REALLY far from any bad range. -+ */ ++ ret = clk_core_round_rate_nolock(clk->core, &req); + -+ /* -+ * Convert to delay; do a little extra work to make sure we -+ * don't overflow 32-bit / 64-bit numbers. -+ */ -+ delay = 10000000; /* PSECS_PER_SEC / 10000 / 10 */ -+ delay *= remainder; -+ delay = DIV_ROUND_CLOSEST(delay, -+ (rate / 1000) * 36 * -+ (ROCKCHIP_MMC_DELAY_ELEMENT_PSEC / 10)); ++ trace_clk_rate_request_done(&req); + -+ delay_num = (u8) min_t(u32, delay, 255); ++ if (clk->exclusive_count) ++ clk_core_rate_protect(clk->core); + -+ raw_value = delay_num ? ROCKCHIP_MMC_DELAY_SEL : 0; -+ raw_value |= delay_num << ROCKCHIP_MMC_DELAYNUM_OFFSET; -+ raw_value |= nineties; -+ writel(HIWORD_UPDATE(raw_value, 0x07ff, mmc_clock->shift), -+ mmc_clock->reg); ++ clk_prepare_unlock(); + -+ pr_debug("%s->set_phase(%d) delay_nums=%u reg[0x%p]=0x%03x actual_degrees=%d\n", -+ clk_hw_get_name(hw), degrees, delay_num, -+ mmc_clock->reg, raw_value>>(mmc_clock->shift), -+ rockchip_mmc_get_phase(hw) -+ ); ++ if (ret) ++ return ret; + -+ return 0; ++ return req.rate; +} ++EXPORT_SYMBOL_GPL(clk_round_rate); + -+static const struct clk_ops rockchip_mmc_clk_ops = { -+ .recalc_rate = rockchip_mmc_recalc, -+ .get_phase = rockchip_mmc_get_phase, -+ .set_phase = rockchip_mmc_set_phase, -+}; -+ -+#define to_rockchip_mmc_clock(x) \ -+ container_of(x, struct rockchip_mmc_clock, clk_rate_change_nb) -+static int rockchip_mmc_clk_rate_notify(struct notifier_block *nb, -+ unsigned long event, void *data) ++/** ++ * __clk_notify - call clk notifier chain ++ * @core: clk that is changing rate ++ * @msg: clk notifier type (see include/linux/clk.h) ++ * @old_rate: old clk rate ++ * @new_rate: new clk rate ++ * ++ * Triggers a notifier call chain on the clk rate-change notification ++ * for 'clk'. Passes a pointer to the struct clk and the previous ++ * and current rates to the notifier callback. Intended to be called by ++ * internal clock code only. Returns NOTIFY_DONE from the last driver ++ * called if all went well, or NOTIFY_STOP or NOTIFY_BAD immediately if ++ * a driver returns that. ++ */ ++static int __clk_notify(struct clk_core *core, unsigned long msg, ++ unsigned long old_rate, unsigned long new_rate) +{ -+ struct rockchip_mmc_clock *mmc_clock = to_rockchip_mmc_clock(nb); -+ struct clk_notifier_data *ndata = data; ++ struct clk_notifier *cn; ++ struct clk_notifier_data cnd; ++ int ret = NOTIFY_DONE; + -+ /* -+ * rockchip_mmc_clk is mostly used by mmc controllers to sample -+ * the intput data, which expects the fixed phase after the tuning -+ * process. However if the clock rate is changed, the phase is stale -+ * and may break the data sampling. So here we try to restore the phase -+ * for that case, except that -+ * (1) cached_phase is invaild since we inevitably cached it when the -+ * clock provider be reparented from orphan to its real parent in the -+ * first place. Otherwise we may mess up the initialization of MMC cards -+ * since we only set the default sample phase and drive phase later on. -+ * (2) the new coming rate is higher than the older one since mmc driver -+ * set the max-frequency to match the boards' ability but we can't go -+ * over the heads of that, otherwise the tests smoke out the issue. -+ */ -+ if (ndata->old_rate <= ndata->new_rate) -+ return NOTIFY_DONE; ++ cnd.old_rate = old_rate; ++ cnd.new_rate = new_rate; + -+ if (event == PRE_RATE_CHANGE) -+ mmc_clock->cached_phase = -+ rockchip_mmc_get_phase(&mmc_clock->hw); -+ else if (mmc_clock->cached_phase != -EINVAL && -+ event == POST_RATE_CHANGE) -+ rockchip_mmc_set_phase(&mmc_clock->hw, mmc_clock->cached_phase); ++ list_for_each_entry(cn, &clk_notifier_list, node) { ++ if (cn->clk->core == core) { ++ cnd.clk = cn->clk; ++ ret = srcu_notifier_call_chain(&cn->notifier_head, msg, ++ &cnd); ++ if (ret & NOTIFY_STOP_MASK) ++ return ret; ++ } ++ } + -+ return NOTIFY_DONE; ++ return ret; +} + -+struct clk *rockchip_clk_register_mmc(const char *name, -+ const char *const *parent_names, u8 num_parents, -+ void __iomem *reg, int shift) ++/** ++ * __clk_recalc_accuracies ++ * @core: first clk in the subtree ++ * ++ * Walks the subtree of clks starting with clk and recalculates accuracies as ++ * it goes. Note that if a clk does not implement the .recalc_accuracy ++ * callback then it is assumed that the clock will take on the accuracy of its ++ * parent. ++ */ ++static void __clk_recalc_accuracies(struct clk_core *core) +{ -+ struct clk_init_data init; -+ struct rockchip_mmc_clock *mmc_clock; -+ struct clk *clk; -+ int ret; -+ -+ mmc_clock = kmalloc(sizeof(*mmc_clock), GFP_KERNEL); -+ if (!mmc_clock) -+ return ERR_PTR(-ENOMEM); -+ -+ init.name = name; -+ init.flags = 0; -+ init.num_parents = num_parents; -+ init.parent_names = parent_names; -+ init.ops = &rockchip_mmc_clk_ops; ++ unsigned long parent_accuracy = 0; ++ struct clk_core *child; + -+ mmc_clock->hw.init = &init; -+ mmc_clock->reg = reg; -+ mmc_clock->shift = shift; ++ lockdep_assert_held(&prepare_lock); + -+ clk = clk_register(NULL, &mmc_clock->hw); -+ if (IS_ERR(clk)) { -+ ret = PTR_ERR(clk); -+ goto err_register; -+ } ++ if (core->parent) ++ parent_accuracy = core->parent->accuracy; + -+ mmc_clock->clk_rate_change_nb.notifier_call = -+ &rockchip_mmc_clk_rate_notify; -+ ret = clk_notifier_register(clk, &mmc_clock->clk_rate_change_nb); -+ if (ret) -+ goto err_notifier; ++ if (core->ops->recalc_accuracy) ++ core->accuracy = core->ops->recalc_accuracy(core->hw, ++ parent_accuracy); ++ else ++ core->accuracy = parent_accuracy; + -+ return clk; -+err_notifier: -+ clk_unregister(clk); -+err_register: -+ kfree(mmc_clock); -+ return ERR_PTR(ret); ++ hlist_for_each_entry(child, &core->children, child_node) ++ __clk_recalc_accuracies(child); +} -diff --git a/drivers/clk/rockchip-oh/clk-muxgrf.c b/drivers/clk/rockchip-oh/clk-muxgrf.c -new file mode 100644 -index 000000000..4a335a5f4 ---- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-muxgrf.c -@@ -0,0 +1,92 @@ -+// SPDX-License-Identifier: GPL-2.0-only + -+#include -+#include -+#include -+#include -+#include -+#include "clk.h" -+ -+struct rockchip_muxgrf_clock { -+ struct clk_hw hw; -+ struct regmap *regmap; -+ u32 reg; -+ u32 shift; -+ u32 width; -+ int flags; -+}; ++static long clk_core_get_accuracy_recalc(struct clk_core *core) ++{ ++ if (core && (core->flags & CLK_GET_ACCURACY_NOCACHE)) ++ __clk_recalc_accuracies(core); + -+#define to_muxgrf_clock(_hw) container_of(_hw, struct rockchip_muxgrf_clock, hw) ++ return clk_core_get_accuracy_no_lock(core); ++} + -+static u8 rockchip_muxgrf_get_parent(struct clk_hw *hw) ++/** ++ * clk_get_accuracy - return the accuracy of clk ++ * @clk: the clk whose accuracy is being returned ++ * ++ * Simply returns the cached accuracy of the clk, unless ++ * CLK_GET_ACCURACY_NOCACHE flag is set, which means a recalc_rate will be ++ * issued. ++ * If clk is NULL then returns 0. ++ */ ++long clk_get_accuracy(struct clk *clk) +{ -+ struct rockchip_muxgrf_clock *mux = to_muxgrf_clock(hw); -+ unsigned int mask = GENMASK(mux->width - 1, 0); -+ unsigned int val; ++ long accuracy; + -+ regmap_read(mux->regmap, mux->reg, &val); ++ if (!clk) ++ return 0; + -+ val >>= mux->shift; -+ val &= mask; ++ clk_prepare_lock(); ++ accuracy = clk_core_get_accuracy_recalc(clk->core); ++ clk_prepare_unlock(); + -+ return val; ++ return accuracy; +} ++EXPORT_SYMBOL_GPL(clk_get_accuracy); + -+static int rockchip_muxgrf_set_parent(struct clk_hw *hw, u8 index) ++static unsigned long clk_recalc(struct clk_core *core, ++ unsigned long parent_rate) +{ -+ struct rockchip_muxgrf_clock *mux = to_muxgrf_clock(hw); -+ unsigned int mask = GENMASK(mux->width + mux->shift - 1, mux->shift); -+ unsigned int val; -+ -+ val = index; -+ val <<= mux->shift; ++ unsigned long rate = parent_rate; + -+ if (mux->flags & CLK_MUX_HIWORD_MASK) -+ return regmap_write(mux->regmap, mux->reg, val | (mask << 16)); -+ else -+ return regmap_update_bits(mux->regmap, mux->reg, mask, val); ++ if (core->ops->recalc_rate && !clk_pm_runtime_get(core)) { ++ rate = core->ops->recalc_rate(core->hw, parent_rate); ++ clk_pm_runtime_put(core); ++ } ++ return rate; +} + -+static const struct clk_ops rockchip_muxgrf_clk_ops = { -+ .get_parent = rockchip_muxgrf_get_parent, -+ .set_parent = rockchip_muxgrf_set_parent, -+ .determine_rate = __clk_mux_determine_rate, -+}; -+ -+struct clk *rockchip_clk_register_muxgrf(const char *name, -+ const char *const *parent_names, u8 num_parents, -+ int flags, struct regmap *regmap, int reg, -+ int shift, int width, int mux_flags) ++/** ++ * __clk_recalc_rates ++ * @core: first clk in the subtree ++ * @update_req: Whether req_rate should be updated with the new rate ++ * @msg: notification type (see include/linux/clk.h) ++ * ++ * Walks the subtree of clks starting with clk and recalculates rates as it ++ * goes. Note that if a clk does not implement the .recalc_rate callback then ++ * it is assumed that the clock will take on the rate of its parent. ++ * ++ * clk_recalc_rates also propagates the POST_RATE_CHANGE notification, ++ * if necessary. ++ */ ++static void __clk_recalc_rates(struct clk_core *core, bool update_req, ++ unsigned long msg) +{ -+ struct rockchip_muxgrf_clock *muxgrf_clock; -+ struct clk_init_data init; -+ struct clk *clk; ++ unsigned long old_rate; ++ unsigned long parent_rate = 0; ++ struct clk_core *child; + -+ if (IS_ERR(regmap)) { -+ pr_err("%s: regmap not available\n", __func__); -+ return ERR_PTR(-ENOTSUPP); -+ } ++ lockdep_assert_held(&prepare_lock); + -+ muxgrf_clock = kmalloc(sizeof(*muxgrf_clock), GFP_KERNEL); -+ if (!muxgrf_clock) -+ return ERR_PTR(-ENOMEM); ++ old_rate = core->rate; + -+ init.name = name; -+ init.flags = flags; -+ init.num_parents = num_parents; -+ init.parent_names = parent_names; -+ init.ops = &rockchip_muxgrf_clk_ops; ++ if (core->parent) ++ parent_rate = core->parent->rate; + -+ muxgrf_clock->hw.init = &init; -+ muxgrf_clock->regmap = regmap; -+ muxgrf_clock->reg = reg; -+ muxgrf_clock->shift = shift; -+ muxgrf_clock->width = width; -+ muxgrf_clock->flags = mux_flags; ++ core->rate = clk_recalc(core, parent_rate); ++ if (update_req) ++ core->req_rate = core->rate; + -+ clk = clk_register(NULL, &muxgrf_clock->hw); -+ if (IS_ERR(clk)) -+ kfree(muxgrf_clock); ++ /* ++ * ignore NOTIFY_STOP and NOTIFY_BAD return values for POST_RATE_CHANGE ++ * & ABORT_RATE_CHANGE notifiers ++ */ ++ if (core->notifier_count && msg) ++ __clk_notify(core, msg, old_rate, core->rate); + -+ return clk; ++ hlist_for_each_entry(child, &core->children, child_node) ++ __clk_recalc_rates(child, update_req, msg); +} -diff --git a/drivers/clk/rockchip-oh/clk-out.c b/drivers/clk/rockchip-oh/clk-out.c -new file mode 100644 -index 000000000..22dcd98fb ---- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-out.c -@@ -0,0 +1,99 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2023 Rockchip Electronics Co., Ltd -+ */ + -+#include -+#include -+#include -+#include -+#include -+#include ++static unsigned long clk_core_get_rate_recalc(struct clk_core *core) ++{ ++ if (core && (core->flags & CLK_GET_RATE_NOCACHE)) ++ __clk_recalc_rates(core, false, 0); + -+static DEFINE_SPINLOCK(clk_out_lock); ++ return clk_core_get_rate_nolock(core); ++} + -+static int rockchip_clk_out_probe(struct platform_device *pdev) ++/** ++ * clk_get_rate - return the rate of clk ++ * @clk: the clk whose rate is being returned ++ * ++ * Simply returns the cached rate of the clk, unless CLK_GET_RATE_NOCACHE flag ++ * is set, which means a recalc_rate will be issued. Can be called regardless of ++ * the clock enabledness. If clk is NULL, or if an error occurred, then returns ++ * 0. ++ */ ++unsigned long clk_get_rate(struct clk *clk) +{ -+ struct device *dev = &pdev->dev; -+ struct device_node *node = pdev->dev.of_node; -+ struct clk_hw *hw; -+ struct resource *res; -+ const char *clk_name = node->name; -+ const char *parent_name; -+ void __iomem *reg; -+ u32 shift = 0; -+ u8 clk_gate_flags = CLK_GATE_HIWORD_MASK; -+ int ret; ++ unsigned long rate; + -+ ret = device_property_read_string(dev, "clock-output-names", &clk_name); -+ if (ret) -+ return ret; ++ if (!clk) ++ return 0; + -+ ret = device_property_read_u32(dev, "rockchip,bit-shift", &shift); -+ if (ret) -+ return ret; ++ clk_prepare_lock(); ++ rate = clk_core_get_rate_recalc(clk->core); ++ clk_prepare_unlock(); + -+ if (device_property_read_bool(dev, "rockchip,bit-set-to-disable")) -+ clk_gate_flags |= CLK_GATE_SET_TO_DISABLE; ++ return rate; ++} ++EXPORT_SYMBOL_GPL(clk_get_rate); + -+ ret = of_clk_parent_fill(node, &parent_name, 1); -+ if (ret != 1) ++static int clk_fetch_parent_index(struct clk_core *core, ++ struct clk_core *parent) ++{ ++ int i; ++ ++ if (!parent) + return -EINVAL; + -+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); -+ if (!res) -+ return -ENOMEM; ++ for (i = 0; i < core->num_parents; i++) { ++ /* Found it first try! */ ++ if (core->parents[i].core == parent) ++ return i; + -+ reg = devm_ioremap(dev, res->start, resource_size(res)); -+ if (!reg) -+ return -ENOMEM; ++ /* Something else is here, so keep looking */ ++ if (core->parents[i].core) ++ continue; + -+ pm_runtime_enable(dev); ++ /* Maybe core hasn't been cached but the hw is all we know? */ ++ if (core->parents[i].hw) { ++ if (core->parents[i].hw == parent->hw) ++ break; + -+ hw = clk_hw_register_gate(dev, clk_name, parent_name, CLK_SET_RATE_PARENT, -+ reg, shift, clk_gate_flags, &clk_out_lock); -+ if (IS_ERR(hw)) { -+ ret = -EINVAL; -+ goto err_disable_pm_runtime; -+ } ++ /* Didn't match, but we're expecting a clk_hw */ ++ continue; ++ } + -+ of_clk_add_hw_provider(node, of_clk_hw_simple_get, hw); ++ /* Maybe it hasn't been cached (clk_set_parent() path) */ ++ if (parent == clk_core_get(core, i)) ++ break; + -+ return 0; ++ /* Fallback to comparing globally unique names */ ++ if (core->parents[i].name && ++ !strcmp(parent->name, core->parents[i].name)) ++ break; ++ } + -+err_disable_pm_runtime: -+ pm_runtime_disable(dev); ++ if (i == core->num_parents) ++ return -EINVAL; + -+ return ret; ++ core->parents[i].core = parent; ++ return i; +} + -+static int rockchip_clk_out_remove(struct platform_device *pdev) ++/** ++ * clk_hw_get_parent_index - return the index of the parent clock ++ * @hw: clk_hw associated with the clk being consumed ++ * ++ * Fetches and returns the index of parent clock. Returns -EINVAL if the given ++ * clock does not have a current parent. ++ */ ++int clk_hw_get_parent_index(struct clk_hw *hw) +{ -+ struct device_node *node = pdev->dev.of_node; ++ struct clk_hw *parent = clk_hw_get_parent(hw); + -+ of_clk_del_provider(node); -+ pm_runtime_disable(&pdev->dev); ++ if (WARN_ON(parent == NULL)) ++ return -EINVAL; + -+ return 0; ++ return clk_fetch_parent_index(hw->core, parent->core); +} ++EXPORT_SYMBOL_GPL(clk_hw_get_parent_index); + -+static const struct of_device_id rockchip_clk_out_match[] = { -+ { .compatible = "rockchip,clk-out", }, -+ {}, -+}; -+ -+static struct platform_driver rockchip_clk_out_driver = { -+ .driver = { -+ .name = "rockchip-clk-out", -+ .of_match_table = rockchip_clk_out_match, -+ }, -+ .probe = rockchip_clk_out_probe, -+ .remove = rockchip_clk_out_remove, -+}; -+ -+module_platform_driver(rockchip_clk_out_driver); -+ -+MODULE_DESCRIPTION("Rockchip Clock Input-Output-Switch"); -+MODULE_AUTHOR("Sugar Zhang "); -+MODULE_LICENSE("GPL"); -+MODULE_DEVICE_TABLE(of, rockchip_clk_out_match); -diff --git a/drivers/clk/rockchip-oh/clk-pll.c b/drivers/clk/rockchip-oh/clk-pll.c -new file mode 100644 -index 000000000..8aa9c3014 ---- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-pll.c -@@ -0,0 +1,2200 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later +/* -+ * Copyright (c) 2014 MundoReader S.L. -+ * Author: Heiko Stuebner -+ * -+ * Copyright (c) 2015 Rockchip Electronics Co. Ltd. -+ * Author: Xing Zheng ++ * Update the orphan status of @core and all its children. + */ ++static void clk_core_update_orphan_status(struct clk_core *core, bool is_orphan) ++{ ++ struct clk_core *child; + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "clk.h" ++ core->orphan = is_orphan; + -+#define PLL_MODE_MASK 0x3 -+#define PLL_MODE_SLOW 0x0 -+#define PLL_MODE_NORM 0x1 -+#define PLL_MODE_DEEP 0x2 -+#define PLL_RK3328_MODE_MASK 0x1 ++ hlist_for_each_entry(child, &core->children, child_node) ++ clk_core_update_orphan_status(child, is_orphan); ++} + -+struct rockchip_clk_pll { -+ struct clk_hw hw; ++static void clk_reparent(struct clk_core *core, struct clk_core *new_parent) ++{ ++ bool was_orphan = core->orphan; + -+ struct clk_mux pll_mux; -+ const struct clk_ops *pll_mux_ops; ++ hlist_del(&core->child_node); + -+ struct notifier_block clk_nb; ++ if (new_parent) { ++ bool becomes_orphan = new_parent->orphan; + -+ void __iomem *reg_base; -+ int lock_offset; -+ unsigned int lock_shift; -+ enum rockchip_pll_type type; -+ u8 flags; -+ const struct rockchip_pll_rate_table *rate_table; -+ unsigned int rate_count; -+ int sel; -+ unsigned long scaling; -+ spinlock_t *lock; ++ /* avoid duplicate POST_RATE_CHANGE notifications */ ++ if (new_parent->new_child == core) ++ new_parent->new_child = NULL; + -+ struct rockchip_clk_provider *ctx; ++ hlist_add_head(&core->child_node, &new_parent->children); + -+#ifdef CONFIG_ROCKCHIP_CLK_BOOST -+ bool boost_enabled; -+ u32 boost_backup_pll_usage; -+ unsigned long boost_backup_pll_rate; -+ unsigned long boost_low_rate; -+ unsigned long boost_high_rate; -+ struct regmap *boost; -+#endif -+#ifdef CONFIG_DEBUG_FS -+ struct hlist_node debug_node; -+#endif -+}; ++ if (was_orphan != becomes_orphan) ++ clk_core_update_orphan_status(core, becomes_orphan); ++ } else { ++ hlist_add_head(&core->child_node, &clk_orphan_list); ++ if (!was_orphan) ++ clk_core_update_orphan_status(core, true); ++ } + -+#define to_rockchip_clk_pll(_hw) container_of(_hw, struct rockchip_clk_pll, hw) -+#define to_rockchip_clk_pll_nb(nb) \ -+ container_of(nb, struct rockchip_clk_pll, clk_nb) ++ core->parent = new_parent; ++} + -+#ifdef CONFIG_ROCKCHIP_CLK_BOOST -+static void rockchip_boost_disable_low(struct rockchip_clk_pll *pll); -+#ifdef CONFIG_DEBUG_FS -+static HLIST_HEAD(clk_boost_list); -+static DEFINE_MUTEX(clk_boost_lock); -+#endif -+#else -+static inline void rockchip_boost_disable_low(struct rockchip_clk_pll *pll) {} -+#endif ++static struct clk_core *__clk_set_parent_before(struct clk_core *core, ++ struct clk_core *parent) ++{ ++ unsigned long flags; ++ struct clk_core *old_parent = core->parent; + -+#define MHZ (1000UL * 1000UL) -+#define KHZ (1000UL) ++ /* ++ * 1. enable parents for CLK_OPS_PARENT_ENABLE clock ++ * ++ * 2. Migrate prepare state between parents and prevent race with ++ * clk_enable(). ++ * ++ * If the clock is not prepared, then a race with ++ * clk_enable/disable() is impossible since we already have the ++ * prepare lock (future calls to clk_enable() need to be preceded by ++ * a clk_prepare()). ++ * ++ * If the clock is prepared, migrate the prepared state to the new ++ * parent and also protect against a race with clk_enable() by ++ * forcing the clock and the new parent on. This ensures that all ++ * future calls to clk_enable() are practically NOPs with respect to ++ * hardware and software states. ++ * ++ * See also: Comment for clk_set_parent() below. ++ */ + -+/* CLK_PLL_TYPE_RK3066_AUTO type ops */ -+#define PLL_FREF_MIN (269 * KHZ) -+#define PLL_FREF_MAX (2200 * MHZ) ++ /* enable old_parent & parent if CLK_OPS_PARENT_ENABLE is set */ ++ if (core->flags & CLK_OPS_PARENT_ENABLE) { ++ clk_core_prepare_enable(old_parent); ++ clk_core_prepare_enable(parent); ++ } + -+#define PLL_FVCO_MIN (440 * MHZ) -+#define PLL_FVCO_MAX (2200 * MHZ) ++ /* migrate prepare count if > 0 */ ++ if (core->prepare_count) { ++ clk_core_prepare_enable(parent); ++ clk_core_enable_lock(core); ++ } + -+#define PLL_FOUT_MIN (27500 * KHZ) -+#define PLL_FOUT_MAX (2200 * MHZ) ++ /* update the clk tree topology */ ++ flags = clk_enable_lock(); ++ clk_reparent(core, parent); ++ clk_enable_unlock(flags); + -+#define PLL_NF_MAX (4096) -+#define PLL_NR_MAX (64) -+#define PLL_NO_MAX (16) ++ return old_parent; ++} + -+/* CLK_PLL_TYPE_RK3036/3366/3399_AUTO type ops */ -+#define MIN_FOUTVCO_FREQ (800 * MHZ) -+#define MAX_FOUTVCO_FREQ (2000 * MHZ) ++static void __clk_set_parent_after(struct clk_core *core, ++ struct clk_core *parent, ++ struct clk_core *old_parent) ++{ ++ /* ++ * Finish the migration of prepare state and undo the changes done ++ * for preventing a race with clk_enable(). ++ */ ++ if (core->prepare_count) { ++ clk_core_disable_lock(core); ++ clk_core_disable_unprepare(old_parent); ++ } + -+static struct rockchip_pll_rate_table auto_table; ++ /* re-balance ref counting if CLK_OPS_PARENT_ENABLE is set */ ++ if (core->flags & CLK_OPS_PARENT_ENABLE) { ++ clk_core_disable_unprepare(parent); ++ clk_core_disable_unprepare(old_parent); ++ } ++} + -+int rockchip_pll_clk_adaptive_scaling(struct clk *clk, int sel) ++static int __clk_set_parent(struct clk_core *core, struct clk_core *parent, ++ u8 p_index) +{ -+ struct clk *parent = clk_get_parent(clk); -+ struct rockchip_clk_pll *pll; -+ -+ if (IS_ERR_OR_NULL(parent)) -+ return -EINVAL; ++ unsigned long flags; ++ int ret = 0; ++ struct clk_core *old_parent; + -+ pll = to_rockchip_clk_pll(__clk_get_hw(parent)); -+ if (!pll) -+ return -EINVAL; ++ old_parent = __clk_set_parent_before(core, parent); + -+ pll->sel = sel; ++ trace_clk_set_parent(core, parent); + -+ return 0; -+} -+EXPORT_SYMBOL(rockchip_pll_clk_adaptive_scaling); ++ /* change clock input source */ ++ if (parent && core->ops->set_parent) ++ ret = core->ops->set_parent(core->hw, p_index); + -+int rockchip_pll_clk_rate_to_scale(struct clk *clk, unsigned long rate) -+{ -+ const struct rockchip_pll_rate_table *rate_table; -+ struct clk *parent = clk_get_parent(clk); -+ struct rockchip_clk_pll *pll; -+ unsigned int i; ++ trace_clk_set_parent_complete(core, parent); + -+ if (IS_ERR_OR_NULL(parent)) -+ return -EINVAL; ++ if (ret) { ++ flags = clk_enable_lock(); ++ clk_reparent(core, old_parent); ++ clk_enable_unlock(flags); + -+ pll = to_rockchip_clk_pll(__clk_get_hw(parent)); -+ if (!pll) -+ return -EINVAL; ++ __clk_set_parent_after(core, old_parent, parent); + -+ rate_table = pll->rate_table; -+ for (i = 0; i < pll->rate_count; i++) { -+ if (rate >= rate_table[i].rate) -+ return i; ++ return ret; + } + -+ return -EINVAL; ++ __clk_set_parent_after(core, parent, old_parent); ++ ++ return 0; +} -+EXPORT_SYMBOL(rockchip_pll_clk_rate_to_scale); + -+int rockchip_pll_clk_scale_to_rate(struct clk *clk, unsigned int scale) ++/** ++ * __clk_speculate_rates ++ * @core: first clk in the subtree ++ * @parent_rate: the "future" rate of clk's parent ++ * ++ * Walks the subtree of clks starting with clk, speculating rates as it ++ * goes and firing off PRE_RATE_CHANGE notifications as necessary. ++ * ++ * Unlike clk_recalc_rates, clk_speculate_rates exists only for sending ++ * pre-rate change notifications and returns early if no clks in the ++ * subtree have subscribed to the notifications. Note that if a clk does not ++ * implement the .recalc_rate callback then it is assumed that the clock will ++ * take on the rate of its parent. ++ */ ++static int __clk_speculate_rates(struct clk_core *core, ++ unsigned long parent_rate) +{ -+ const struct rockchip_pll_rate_table *rate_table; -+ struct clk *parent = clk_get_parent(clk); -+ struct rockchip_clk_pll *pll; -+ unsigned int i; ++ struct clk_core *child; ++ unsigned long new_rate; ++ int ret = NOTIFY_DONE; + -+ if (IS_ERR_OR_NULL(parent)) -+ return -EINVAL; ++ lockdep_assert_held(&prepare_lock); + -+ pll = to_rockchip_clk_pll(__clk_get_hw(parent)); -+ if (!pll) -+ return -EINVAL; ++ new_rate = clk_recalc(core, parent_rate); + -+ rate_table = pll->rate_table; -+ for (i = 0; i < pll->rate_count; i++) { -+ if (i == scale) -+ return rate_table[i].rate; ++ /* abort rate change if a driver returns NOTIFY_BAD or NOTIFY_STOP */ ++ if (core->notifier_count) ++ ret = __clk_notify(core, PRE_RATE_CHANGE, core->rate, new_rate); ++ ++ if (ret & NOTIFY_STOP_MASK) { ++ pr_debug("%s: clk notifier callback for clock %s aborted with error %d\n", ++ __func__, core->name, ret); ++ goto out; + } + -+ return -EINVAL; -+} -+EXPORT_SYMBOL(rockchip_pll_clk_scale_to_rate); ++ hlist_for_each_entry(child, &core->children, child_node) { ++ ret = __clk_speculate_rates(child, new_rate); ++ if (ret & NOTIFY_STOP_MASK) ++ break; ++ } + -+static struct rockchip_pll_rate_table *rk_pll_rate_table_get(void) -+{ -+ return &auto_table; ++out: ++ return ret; +} + -+static int rockchip_pll_clk_set_postdiv(unsigned long fout_hz, -+ u32 *postdiv1, -+ u32 *postdiv2, -+ u32 *foutvco) ++static void clk_calc_subtree(struct clk_core *core, unsigned long new_rate, ++ struct clk_core *new_parent, u8 p_index) +{ -+ unsigned long freq; ++ struct clk_core *child; + -+ if (fout_hz < MIN_FOUTVCO_FREQ) { -+ for (*postdiv1 = 1; *postdiv1 <= 7; (*postdiv1)++) { -+ for (*postdiv2 = 1; *postdiv2 <= 7; (*postdiv2)++) { -+ freq = fout_hz * (*postdiv1) * (*postdiv2); -+ if (freq >= MIN_FOUTVCO_FREQ && -+ freq <= MAX_FOUTVCO_FREQ) { -+ *foutvco = freq; -+ return 0; -+ } -+ } -+ } -+ pr_err("CANNOT FIND postdiv1/2 to make fout in range from 800M to 2000M,fout = %lu\n", -+ fout_hz); -+ } else { -+ *postdiv1 = 1; -+ *postdiv2 = 1; ++ core->new_rate = new_rate; ++ core->new_parent = new_parent; ++ core->new_parent_index = p_index; ++ /* include clk in new parent's PRE_RATE_CHANGE notifications */ ++ core->new_child = NULL; ++ if (new_parent && new_parent != core->parent) ++ new_parent->new_child = core; ++ ++ hlist_for_each_entry(child, &core->children, child_node) { ++ child->new_rate = clk_recalc(child, new_rate); ++ clk_calc_subtree(child, child->new_rate, NULL, 0); + } -+ return 0; +} + -+static struct rockchip_pll_rate_table * -+rockchip_pll_clk_set_by_auto(struct rockchip_clk_pll *pll, -+ unsigned long fin_hz, -+ unsigned long fout_hz) ++/* ++ * calculate the new rates returning the topmost clock that has to be ++ * changed. ++ */ ++static struct clk_core *clk_calc_new_rates(struct clk_core *core, ++ unsigned long rate) +{ -+ struct rockchip_pll_rate_table *rate_table = rk_pll_rate_table_get(); -+ /* FIXME set postdiv1/2 always 1*/ -+ u32 foutvco = fout_hz; -+ u64 fin_64, frac_64; -+ u32 f_frac, postdiv1, postdiv2; -+ unsigned long clk_gcd = 0; ++ struct clk_core *top = core; ++ struct clk_core *old_parent, *parent; ++ unsigned long best_parent_rate = 0; ++ unsigned long new_rate; ++ unsigned long min_rate; ++ unsigned long max_rate; ++ int p_index = 0; ++ long ret; + -+ if (fin_hz == 0 || fout_hz == 0 || fout_hz == fin_hz) ++ /* sanity */ ++ if (IS_ERR_OR_NULL(core)) + return NULL; + -+ rockchip_pll_clk_set_postdiv(fout_hz, &postdiv1, &postdiv2, &foutvco); -+ rate_table->postdiv1 = postdiv1; -+ rate_table->postdiv2 = postdiv2; -+ rate_table->dsmpd = 1; ++ /* save parent rate, if it exists */ ++ parent = old_parent = core->parent; ++ if (parent) ++ best_parent_rate = parent->rate; + -+ if (fin_hz / MHZ * MHZ == fin_hz && fout_hz / MHZ * MHZ == fout_hz) { -+ fin_hz /= MHZ; -+ foutvco /= MHZ; -+ clk_gcd = gcd(fin_hz, foutvco); -+ rate_table->refdiv = fin_hz / clk_gcd; -+ rate_table->fbdiv = foutvco / clk_gcd; ++ clk_core_get_boundaries(core, &min_rate, &max_rate); + -+ rate_table->frac = 0; ++ /* find the closest rate and parent clk/rate */ ++ if (clk_core_can_round(core)) { ++ struct clk_rate_request req; + -+ pr_debug("fin = %lu, fout = %lu, clk_gcd = %lu, refdiv = %u, fbdiv = %u, postdiv1 = %u, postdiv2 = %u, frac = %u\n", -+ fin_hz, fout_hz, clk_gcd, rate_table->refdiv, -+ rate_table->fbdiv, rate_table->postdiv1, -+ rate_table->postdiv2, rate_table->frac); -+ } else { -+ pr_debug("frac div running, fin_hz = %lu, fout_hz = %lu, fin_INT_mhz = %lu, fout_INT_mhz = %lu\n", -+ fin_hz, fout_hz, -+ fin_hz / MHZ * MHZ, -+ fout_hz / MHZ * MHZ); -+ pr_debug("frac get postdiv1 = %u, postdiv2 = %u, foutvco = %u\n", -+ rate_table->postdiv1, rate_table->postdiv2, foutvco); -+ clk_gcd = gcd(fin_hz / MHZ, foutvco / MHZ); -+ rate_table->refdiv = fin_hz / MHZ / clk_gcd; -+ rate_table->fbdiv = foutvco / MHZ / clk_gcd; -+ pr_debug("frac get refdiv = %u, fbdiv = %u\n", -+ rate_table->refdiv, rate_table->fbdiv); ++ clk_core_init_rate_req(core, &req, rate); + -+ rate_table->frac = 0; ++ trace_clk_rate_request_start(&req); + -+ f_frac = (foutvco % MHZ); -+ fin_64 = fin_hz; -+ do_div(fin_64, (u64)rate_table->refdiv); -+ frac_64 = (u64)f_frac << 24; -+ do_div(frac_64, fin_64); -+ rate_table->frac = (u32)frac_64; -+ if (rate_table->frac > 0) -+ rate_table->dsmpd = 0; -+ pr_debug("frac = %x\n", rate_table->frac); -+ } -+ return rate_table; -+} ++ ret = clk_core_determine_round_nolock(core, &req); ++ if (ret < 0) ++ return NULL; + -+static struct rockchip_pll_rate_table * -+rockchip_rk3066_pll_clk_set_by_auto(struct rockchip_clk_pll *pll, -+ unsigned long fin_hz, -+ unsigned long fout_hz) -+{ -+ struct rockchip_pll_rate_table *rate_table = rk_pll_rate_table_get(); -+ u32 nr, nf, no, nonr; -+ u32 nr_out, nf_out, no_out; -+ u32 n; -+ u32 numerator, denominator; -+ u64 fref, fvco, fout; -+ unsigned long clk_gcd = 0; ++ trace_clk_rate_request_done(&req); + -+ nr_out = PLL_NR_MAX + 1; -+ no_out = 0; -+ nf_out = 0; ++ best_parent_rate = req.best_parent_rate; ++ new_rate = req.rate; ++ parent = req.best_parent_hw ? req.best_parent_hw->core : NULL; + -+ if (fin_hz == 0 || fout_hz == 0 || fout_hz == fin_hz) ++ if (new_rate < min_rate || new_rate > max_rate) ++ return NULL; ++ } else if (!parent || !(core->flags & CLK_SET_RATE_PARENT)) { ++ /* pass-through clock without adjustable parent */ ++ core->new_rate = core->rate; + return NULL; ++ } else { ++ /* pass-through clock with adjustable parent */ ++ top = clk_calc_new_rates(parent, rate); ++ new_rate = parent->new_rate; ++ goto out; ++ } + -+ clk_gcd = gcd(fin_hz, fout_hz); -+ -+ numerator = fout_hz / clk_gcd; -+ denominator = fin_hz / clk_gcd; ++ /* some clocks must be gated to change parent */ ++ if (parent != old_parent && ++ (core->flags & CLK_SET_PARENT_GATE) && core->prepare_count) { ++ pr_debug("%s: %s not gated but wants to reparent\n", ++ __func__, core->name); ++ return NULL; ++ } + -+ for (n = 1;; n++) { -+ nf = numerator * n; -+ nonr = denominator * n; -+ if (nf > PLL_NF_MAX || nonr > (PLL_NO_MAX * PLL_NR_MAX)) -+ break; ++ /* try finding the new parent index */ ++ if (parent && core->num_parents > 1) { ++ p_index = clk_fetch_parent_index(core, parent); ++ if (p_index < 0) { ++ pr_debug("%s: clk %s can not be parent of clk %s\n", ++ __func__, parent->name, core->name); ++ return NULL; ++ } ++ } + -+ for (no = 1; no <= PLL_NO_MAX; no++) { -+ if (!(no == 1 || !(no % 2))) -+ continue; ++ if ((core->flags & CLK_SET_RATE_PARENT) && parent && ++ best_parent_rate != parent->rate) ++ top = clk_calc_new_rates(parent, best_parent_rate); + -+ if (nonr % no) -+ continue; -+ nr = nonr / no; ++out: ++ clk_calc_subtree(core, new_rate, parent, p_index); + -+ if (nr > PLL_NR_MAX) -+ continue; ++ return top; ++} + -+ fref = fin_hz / nr; -+ if (fref < PLL_FREF_MIN || fref > PLL_FREF_MAX) -+ continue; ++/* ++ * Notify about rate changes in a subtree. Always walk down the whole tree ++ * so that in case of an error we can walk down the whole tree again and ++ * abort the change. ++ */ ++static struct clk_core *clk_propagate_rate_change(struct clk_core *core, ++ unsigned long event) ++{ ++ struct clk_core *child, *tmp_clk, *fail_clk = NULL; ++ int ret = NOTIFY_DONE; + -+ fvco = fref * nf; -+ if (fvco < PLL_FVCO_MIN || fvco > PLL_FVCO_MAX) -+ continue; ++ if (core->rate == core->new_rate) ++ return NULL; + -+ fout = fvco / no; -+ if (fout < PLL_FOUT_MIN || fout > PLL_FOUT_MAX) -+ continue; ++ if (core->notifier_count) { ++ ret = __clk_notify(core, event, core->rate, core->new_rate); ++ if (ret & NOTIFY_STOP_MASK) ++ fail_clk = core; ++ } + -+ /* select the best from all available PLL settings */ -+ if ((no > no_out) || -+ ((no == no_out) && (nr < nr_out))) { -+ nr_out = nr; -+ nf_out = nf; -+ no_out = no; -+ } -+ } ++ hlist_for_each_entry(child, &core->children, child_node) { ++ /* Skip children who will be reparented to another clock */ ++ if (child->new_parent && child->new_parent != core) ++ continue; ++ tmp_clk = clk_propagate_rate_change(child, event); ++ if (tmp_clk) ++ fail_clk = tmp_clk; + } + -+ /* output the best PLL setting */ -+ if ((nr_out <= PLL_NR_MAX) && (no_out > 0)) { -+ rate_table->nr = nr_out; -+ rate_table->nf = nf_out; -+ rate_table->no = no_out; -+ } else { -+ return NULL; ++ /* handle the new child who might not be in core->children yet */ ++ if (core->new_child) { ++ tmp_clk = clk_propagate_rate_change(core->new_child, event); ++ if (tmp_clk) ++ fail_clk = tmp_clk; + } + -+ return rate_table; ++ return fail_clk; +} + -+static u32 -+rockchip_rk3588_pll_frac_get(u32 m, u32 p, u32 s, u64 fin_hz, u64 fvco) ++/* ++ * walk down a subtree and set the new rates notifying the rate ++ * change on the way ++ */ ++static void clk_change_rate(struct clk_core *core) +{ -+ u64 fref, fout, ffrac; -+ u32 k = 0; ++ struct clk_core *child; ++ struct hlist_node *tmp; ++ unsigned long old_rate; ++ unsigned long best_parent_rate = 0; ++ bool skip_set_rate = false; ++ struct clk_core *old_parent; ++ struct clk_core *parent = NULL; + -+ fref = fin_hz / p; -+ ffrac = fvco - (m * fref); -+ fout = ffrac * 65536; -+ k = fout / fref; -+ if (k > 32767) { -+ fref = fin_hz / p; -+ ffrac = ((m + 1) * fref) - fvco; -+ fout = ffrac * 65536; -+ k = ((fout * 10 / fref) + 7) / 10; -+ if (k > 32767) -+ k = 0; -+ else -+ k = ~k + 1; ++ old_rate = core->rate; ++ ++ if (core->new_parent) { ++ parent = core->new_parent; ++ best_parent_rate = core->new_parent->rate; ++ } else if (core->parent) { ++ parent = core->parent; ++ best_parent_rate = core->parent->rate; + } -+ return k; -+} + -+static struct rockchip_pll_rate_table * -+rockchip_rk3588_pll_frac_by_auto(unsigned long fin_hz, unsigned long fout_hz) -+{ -+ struct rockchip_pll_rate_table *rate_table = rk_pll_rate_table_get(); -+ u64 fvco_min = 2250 * MHZ, fvco_max = 4500 * MHZ; -+ u32 p, m, s, k; -+ u64 fvco; ++ if (clk_pm_runtime_get(core)) ++ return; + -+ for (s = 0; s <= 6; s++) { -+ fvco = (u64)fout_hz << s; -+ if (fvco < fvco_min || fvco > fvco_max) -+ continue; -+ for (p = 1; p <= 4; p++) { -+ for (m = 64; m <= 1023; m++) { -+ if ((fvco >= m * fin_hz / p) && (fvco < (m + 1) * fin_hz / p)) { -+ k = rockchip_rk3588_pll_frac_get(m, p, s, -+ (u64)fin_hz, -+ fvco); -+ if (!k) -+ continue; -+ rate_table->p = p; -+ rate_table->s = s; -+ rate_table->k = k; -+ if (k > 32767) -+ rate_table->m = m + 1; -+ else -+ rate_table->m = m; -+ return rate_table; -+ } -+ } ++ if (core->flags & CLK_SET_RATE_UNGATE) { ++ clk_core_prepare(core); ++ clk_core_enable_lock(core); ++ } ++ ++ if (core->new_parent && core->new_parent != core->parent) { ++ old_parent = __clk_set_parent_before(core, core->new_parent); ++ trace_clk_set_parent(core, core->new_parent); ++ ++ if (core->ops->set_rate_and_parent) { ++ skip_set_rate = true; ++ core->ops->set_rate_and_parent(core->hw, core->new_rate, ++ best_parent_rate, ++ core->new_parent_index); ++ } else if (core->ops->set_parent) { ++ core->ops->set_parent(core->hw, core->new_parent_index); + } ++ ++ trace_clk_set_parent_complete(core, core->new_parent); ++ __clk_set_parent_after(core, core->new_parent, old_parent); + } -+ return NULL; -+} + -+static struct rockchip_pll_rate_table * -+rockchip_rk3588_pll_clk_set_by_auto(struct rockchip_clk_pll *pll, -+ unsigned long fin_hz, -+ unsigned long fout_hz) -+{ -+ struct rockchip_pll_rate_table *rate_table = rk_pll_rate_table_get(); -+ u64 fvco_min = 2250 * MHZ, fvco_max = 4500 * MHZ; -+ u64 fout_min = 37 * MHZ, fout_max = 4500 * MHZ; -+ u32 p, m, s; -+ u64 fvco; ++ if (core->flags & CLK_OPS_PARENT_ENABLE) ++ clk_core_prepare_enable(parent); + -+ if (fin_hz == 0 || fout_hz == 0 || fout_hz == fin_hz) -+ return NULL; ++ trace_clk_set_rate(core, core->new_rate); + -+ if (fout_hz > fout_max || fout_hz < fout_min) -+ return NULL; ++ if (!skip_set_rate && core->ops->set_rate) ++ core->ops->set_rate(core->hw, core->new_rate, best_parent_rate); + -+ if (fin_hz / MHZ * MHZ == fin_hz && fout_hz / MHZ * MHZ == fout_hz) { -+ for (s = 0; s <= 6; s++) { -+ fvco = (u64)fout_hz << s; -+ if (fvco < fvco_min || fvco > fvco_max) -+ continue; -+ for (p = 2; p <= 4; p++) { -+ for (m = 64; m <= 1023; m++) { -+ if (fvco == m * fin_hz / p) { -+ rate_table->p = p; -+ rate_table->m = m; -+ rate_table->s = s; -+ rate_table->k = 0; -+ return rate_table; -+ } -+ } -+ } -+ } -+ pr_err("CANNOT FIND Fout by auto,fout = %lu\n", fout_hz); -+ } else { -+ rate_table = rockchip_rk3588_pll_frac_by_auto(fin_hz, fout_hz); -+ if (!rate_table) -+ pr_err("CANNOT FIND Fout by auto,fout = %lu\n", fout_hz); -+ else -+ return rate_table; ++ trace_clk_set_rate_complete(core, core->new_rate); ++ ++ core->rate = clk_recalc(core, best_parent_rate); ++ ++ if (core->flags & CLK_SET_RATE_UNGATE) { ++ clk_core_disable_lock(core); ++ clk_core_unprepare(core); + } -+ return NULL; -+} + -+static const struct rockchip_pll_rate_table *rockchip_get_pll_settings( -+ struct rockchip_clk_pll *pll, unsigned long rate) -+{ -+ const struct rockchip_pll_rate_table *rate_table = pll->rate_table; -+ int i; ++ if (core->flags & CLK_OPS_PARENT_ENABLE) ++ clk_core_disable_unprepare(parent); + -+ for (i = 0; i < pll->rate_count; i++) { -+ if (rate == rate_table[i].rate) { -+ if (i < pll->sel) { -+ pll->scaling = rate; -+ return &rate_table[pll->sel]; -+ } -+ pll->scaling = 0; -+ return &rate_table[i]; -+ } ++ if (core->notifier_count && old_rate != core->rate) ++ __clk_notify(core, POST_RATE_CHANGE, old_rate, core->rate); ++ ++ if (core->flags & CLK_RECALC_NEW_RATES) ++ (void)clk_calc_new_rates(core, core->new_rate); ++ ++ /* ++ * Use safe iteration, as change_rate can actually swap parents ++ * for certain clock types. ++ */ ++ hlist_for_each_entry_safe(child, tmp, &core->children, child_node) { ++ /* Skip children who will be reparented to another clock */ ++ if (child->new_parent && child->new_parent != core) ++ continue; ++ clk_change_rate(child); + } -+ pll->scaling = 0; + -+ if (pll->type == pll_rk3066) -+ return rockchip_rk3066_pll_clk_set_by_auto(pll, 24 * MHZ, rate); -+ else if (pll->type == pll_rk3588 || pll->type == pll_rk3588_core) -+ return rockchip_rk3588_pll_clk_set_by_auto(pll, 24 * MHZ, rate); -+ else -+ return rockchip_pll_clk_set_by_auto(pll, 24 * MHZ, rate); ++ /* handle the new child who might not be in core->children yet */ ++ if (core->new_child) ++ clk_change_rate(core->new_child); ++ ++ clk_pm_runtime_put(core); +} + -+static long rockchip_pll_round_rate(struct clk_hw *hw, -+ unsigned long drate, unsigned long *prate) ++static unsigned long clk_core_req_round_rate_nolock(struct clk_core *core, ++ unsigned long req_rate) +{ -+ return drate; ++ int ret, cnt; ++ struct clk_rate_request req; ++ ++ lockdep_assert_held(&prepare_lock); ++ ++ if (!core) ++ return 0; ++ ++ /* simulate what the rate would be if it could be freely set */ ++ cnt = clk_core_rate_nuke_protect(core); ++ if (cnt < 0) ++ return cnt; ++ ++ clk_core_init_rate_req(core, &req, req_rate); ++ ++ trace_clk_rate_request_start(&req); ++ ++ ret = clk_core_round_rate_nolock(core, &req); ++ ++ trace_clk_rate_request_done(&req); ++ ++ /* restore the protection */ ++ clk_core_rate_restore_protect(core, cnt); ++ ++ return ret ? 0 : req.rate; +} + -+/* -+ * Wait for the pll to reach the locked state. -+ * The calling set_rate function is responsible for making sure the -+ * grf regmap is available. -+ */ -+static int rockchip_pll_wait_lock(struct rockchip_clk_pll *pll) ++static int clk_core_set_rate_nolock(struct clk_core *core, ++ unsigned long req_rate) +{ -+ struct regmap *grf = pll->ctx->grf; -+ unsigned int val; ++ struct clk_core *top, *fail_clk; ++ unsigned long rate; + int ret; + -+ ret = regmap_read_poll_timeout(grf, pll->lock_offset, val, -+ val & BIT(pll->lock_shift), 0, 1000); -+ if (ret) -+ pr_err("%s: timeout waiting for pll to lock\n", __func__); ++ if (!core) ++ return 0; + -+ return ret; -+} ++ rate = clk_core_req_round_rate_nolock(core, req_rate); + -+/* -+ * PLL used in RK3036 -+ */ ++ /* bail early if nothing to do */ ++ if (rate == clk_core_get_rate_nolock(core)) ++ return 0; + -+#define RK3036_PLLCON(i) (i * 0x4) -+#define RK3036_PLLCON0_FBDIV_MASK 0xfff -+#define RK3036_PLLCON0_FBDIV_SHIFT 0 -+#define RK3036_PLLCON0_POSTDIV1_MASK 0x7 -+#define RK3036_PLLCON0_POSTDIV1_SHIFT 12 -+#define RK3036_PLLCON1_REFDIV_MASK 0x3f -+#define RK3036_PLLCON1_REFDIV_SHIFT 0 -+#define RK3036_PLLCON1_POSTDIV2_MASK 0x7 -+#define RK3036_PLLCON1_POSTDIV2_SHIFT 6 -+#define RK3036_PLLCON1_LOCK_STATUS BIT(10) -+#define RK3036_PLLCON1_DSMPD_MASK 0x1 -+#define RK3036_PLLCON1_DSMPD_SHIFT 12 -+#define RK3036_PLLCON1_PWRDOWN BIT(13) -+#define RK3036_PLLCON1_PLLPDSEL BIT(15) -+#define RK3036_PLLCON2_FRAC_MASK 0xffffff -+#define RK3036_PLLCON2_FRAC_SHIFT 0 ++ /* fail on a direct rate set of a protected provider */ ++ if (clk_core_rate_is_protected(core)) ++ return -EBUSY; + -+static int rockchip_rk3036_pll_wait_lock(struct rockchip_clk_pll *pll) -+{ -+ u32 pllcon; -+ int ret; ++ /* calculate new rates and get the topmost changed clock */ ++ top = clk_calc_new_rates(core, req_rate); ++ if (!top) ++ return -EINVAL; + -+ /* -+ * Lock time typical 250, max 500 input clock cycles @24MHz -+ * So define a very safe maximum of 1000us, meaning 24000 cycles. -+ */ -+ ret = readl_relaxed_poll_timeout(pll->reg_base + RK3036_PLLCON(1), -+ pllcon, -+ pllcon & RK3036_PLLCON1_LOCK_STATUS, -+ 0, 1000); ++ ret = clk_pm_runtime_get(core); + if (ret) -+ pr_err("%s: timeout waiting for pll to lock\n", __func__); ++ return ret; ++ ++ /* notify that we are about to change rates */ ++ fail_clk = clk_propagate_rate_change(top, PRE_RATE_CHANGE); ++ if (fail_clk) { ++ pr_debug("%s: failed to set %s rate\n", __func__, ++ fail_clk->name); ++ clk_propagate_rate_change(top, ABORT_RATE_CHANGE); ++ ret = -EBUSY; ++ goto err; ++ } ++ ++ /* change the rates */ ++ clk_change_rate(top); ++ ++ core->req_rate = req_rate; ++err: ++ clk_pm_runtime_put(core); + + return ret; +} + -+static unsigned long __maybe_unused -+rockchip_rk3036_pll_con_to_rate(struct rockchip_clk_pll *pll, -+ u32 con0, u32 con1) ++/** ++ * clk_set_rate - specify a new rate for clk ++ * @clk: the clk whose rate is being changed ++ * @rate: the new rate for clk ++ * ++ * In the simplest case clk_set_rate will only adjust the rate of clk. ++ * ++ * Setting the CLK_SET_RATE_PARENT flag allows the rate change operation to ++ * propagate up to clk's parent; whether or not this happens depends on the ++ * outcome of clk's .round_rate implementation. If *parent_rate is unchanged ++ * after calling .round_rate then upstream parent propagation is ignored. If ++ * *parent_rate comes back with a new rate for clk's parent then we propagate ++ * up to clk's parent and set its rate. Upward propagation will continue ++ * until either a clk does not support the CLK_SET_RATE_PARENT flag or ++ * .round_rate stops requesting changes to clk's parent_rate. ++ * ++ * Rate changes are accomplished via tree traversal that also recalculates the ++ * rates for the clocks and fires off POST_RATE_CHANGE notifiers. ++ * ++ * Returns 0 on success, -EERROR otherwise. ++ */ ++int clk_set_rate(struct clk *clk, unsigned long rate) +{ -+ unsigned int fbdiv, postdiv1, refdiv, postdiv2; -+ u64 rate64 = 24000000; ++ int ret; + -+ fbdiv = ((con0 >> RK3036_PLLCON0_FBDIV_SHIFT) & -+ RK3036_PLLCON0_FBDIV_MASK); -+ postdiv1 = ((con0 >> RK3036_PLLCON0_POSTDIV1_SHIFT) & -+ RK3036_PLLCON0_POSTDIV1_MASK); -+ refdiv = ((con1 >> RK3036_PLLCON1_REFDIV_SHIFT) & -+ RK3036_PLLCON1_REFDIV_MASK); -+ postdiv2 = ((con1 >> RK3036_PLLCON1_POSTDIV2_SHIFT) & -+ RK3036_PLLCON1_POSTDIV2_MASK); ++ if (!clk) ++ return 0; + -+ rate64 *= fbdiv; -+ do_div(rate64, refdiv); -+ do_div(rate64, postdiv1); -+ do_div(rate64, postdiv2); ++ /* prevent racing with updates to the clock topology */ ++ clk_prepare_lock(); + -+ return (unsigned long)rate64; -+} ++ if (clk->exclusive_count) ++ clk_core_rate_unprotect(clk->core); + -+static void rockchip_rk3036_pll_get_params(struct rockchip_clk_pll *pll, -+ struct rockchip_pll_rate_table *rate) -+{ -+ u32 pllcon; ++ ret = clk_core_set_rate_nolock(clk->core, rate); + -+ pllcon = readl_relaxed(pll->reg_base + RK3036_PLLCON(0)); -+ rate->fbdiv = ((pllcon >> RK3036_PLLCON0_FBDIV_SHIFT) -+ & RK3036_PLLCON0_FBDIV_MASK); -+ rate->postdiv1 = ((pllcon >> RK3036_PLLCON0_POSTDIV1_SHIFT) -+ & RK3036_PLLCON0_POSTDIV1_MASK); ++ if (clk->exclusive_count) ++ clk_core_rate_protect(clk->core); + -+ pllcon = readl_relaxed(pll->reg_base + RK3036_PLLCON(1)); -+ rate->refdiv = ((pllcon >> RK3036_PLLCON1_REFDIV_SHIFT) -+ & RK3036_PLLCON1_REFDIV_MASK); -+ rate->postdiv2 = ((pllcon >> RK3036_PLLCON1_POSTDIV2_SHIFT) -+ & RK3036_PLLCON1_POSTDIV2_MASK); -+ rate->dsmpd = ((pllcon >> RK3036_PLLCON1_DSMPD_SHIFT) -+ & RK3036_PLLCON1_DSMPD_MASK); ++ clk_prepare_unlock(); + -+ pllcon = readl_relaxed(pll->reg_base + RK3036_PLLCON(2)); -+ rate->frac = ((pllcon >> RK3036_PLLCON2_FRAC_SHIFT) -+ & RK3036_PLLCON2_FRAC_MASK); ++ return ret; +} ++EXPORT_SYMBOL_GPL(clk_set_rate); + -+static unsigned long rockchip_rk3036_pll_recalc_rate(struct clk_hw *hw, -+ unsigned long prate) ++/** ++ * clk_set_rate_exclusive - specify a new rate and get exclusive control ++ * @clk: the clk whose rate is being changed ++ * @rate: the new rate for clk ++ * ++ * This is a combination of clk_set_rate() and clk_rate_exclusive_get() ++ * within a critical section ++ * ++ * This can be used initially to ensure that at least 1 consumer is ++ * satisfied when several consumers are competing for exclusivity over the ++ * same clock provider. ++ * ++ * The exclusivity is not applied if setting the rate failed. ++ * ++ * Calls to clk_rate_exclusive_get() should be balanced with calls to ++ * clk_rate_exclusive_put(). ++ * ++ * Returns 0 on success, -EERROR otherwise. ++ */ ++int clk_set_rate_exclusive(struct clk *clk, unsigned long rate) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); -+ struct rockchip_pll_rate_table cur; -+ u64 rate64 = prate, frac_rate64 = prate; -+ -+ if (pll->sel && pll->scaling) -+ return pll->scaling; ++ int ret; + -+ rockchip_rk3036_pll_get_params(pll, &cur); ++ if (!clk) ++ return 0; + -+ rate64 *= cur.fbdiv; -+ do_div(rate64, cur.refdiv); ++ /* prevent racing with updates to the clock topology */ ++ clk_prepare_lock(); + -+ if (cur.dsmpd == 0) { -+ /* fractional mode */ -+ frac_rate64 *= cur.frac; ++ /* ++ * The temporary protection removal is not here, on purpose ++ * This function is meant to be used instead of clk_rate_protect, ++ * so before the consumer code path protect the clock provider ++ */ + -+ do_div(frac_rate64, cur.refdiv); -+ rate64 += frac_rate64 >> 24; ++ ret = clk_core_set_rate_nolock(clk->core, rate); ++ if (!ret) { ++ clk_core_rate_protect(clk->core); ++ clk->exclusive_count++; + } + -+ do_div(rate64, cur.postdiv1); -+ do_div(rate64, cur.postdiv2); ++ clk_prepare_unlock(); + -+ return (unsigned long)rate64; ++ return ret; +} ++EXPORT_SYMBOL_GPL(clk_set_rate_exclusive); + -+static int rockchip_rk3036_pll_set_params(struct rockchip_clk_pll *pll, -+ const struct rockchip_pll_rate_table *rate) ++static int clk_set_rate_range_nolock(struct clk *clk, ++ unsigned long min, ++ unsigned long max) +{ -+ const struct clk_ops *pll_mux_ops = pll->pll_mux_ops; -+ struct clk_mux *pll_mux = &pll->pll_mux; -+ struct rockchip_pll_rate_table cur; -+ u32 pllcon; -+ int rate_change_remuxed = 0; -+ int cur_parent; -+ int ret; ++ int ret = 0; ++ unsigned long old_min, old_max, rate; + -+ pr_debug("%s: rate settings for %lu fbdiv: %d, postdiv1: %d, refdiv: %d, postdiv2: %d, dsmpd: %d, frac: %d\n", -+ __func__, rate->rate, rate->fbdiv, rate->postdiv1, rate->refdiv, -+ rate->postdiv2, rate->dsmpd, rate->frac); ++ lockdep_assert_held(&prepare_lock); + -+ rockchip_rk3036_pll_get_params(pll, &cur); -+ cur.rate = 0; ++ if (!clk) ++ return 0; + -+ if (!(pll->flags & ROCKCHIP_PLL_FIXED_MODE)) { -+ cur_parent = pll_mux_ops->get_parent(&pll_mux->hw); -+ if (cur_parent == PLL_MODE_NORM) { -+ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_SLOW); -+ rate_change_remuxed = 1; -+ } ++ trace_clk_set_rate_range(clk->core, min, max); ++ ++ if (min > max) { ++ pr_err("%s: clk %s dev %s con %s: invalid range [%lu, %lu]\n", ++ __func__, clk->core->name, clk->dev_id, clk->con_id, ++ min, max); ++ return -EINVAL; + } + -+ /* update pll values */ -+ writel_relaxed(HIWORD_UPDATE(rate->fbdiv, RK3036_PLLCON0_FBDIV_MASK, -+ RK3036_PLLCON0_FBDIV_SHIFT) | -+ HIWORD_UPDATE(rate->postdiv1, RK3036_PLLCON0_POSTDIV1_MASK, -+ RK3036_PLLCON0_POSTDIV1_SHIFT), -+ pll->reg_base + RK3036_PLLCON(0)); ++ if (clk->exclusive_count) ++ clk_core_rate_unprotect(clk->core); + -+ writel_relaxed(HIWORD_UPDATE(rate->refdiv, RK3036_PLLCON1_REFDIV_MASK, -+ RK3036_PLLCON1_REFDIV_SHIFT) | -+ HIWORD_UPDATE(rate->postdiv2, RK3036_PLLCON1_POSTDIV2_MASK, -+ RK3036_PLLCON1_POSTDIV2_SHIFT) | -+ HIWORD_UPDATE(rate->dsmpd, RK3036_PLLCON1_DSMPD_MASK, -+ RK3036_PLLCON1_DSMPD_SHIFT), -+ pll->reg_base + RK3036_PLLCON(1)); ++ /* Save the current values in case we need to rollback the change */ ++ old_min = clk->min_rate; ++ old_max = clk->max_rate; ++ clk->min_rate = min; ++ clk->max_rate = max; + -+ /* GPLL CON2 is not HIWORD_MASK */ -+ pllcon = readl_relaxed(pll->reg_base + RK3036_PLLCON(2)); -+ pllcon &= ~(RK3036_PLLCON2_FRAC_MASK << RK3036_PLLCON2_FRAC_SHIFT); -+ pllcon |= rate->frac << RK3036_PLLCON2_FRAC_SHIFT; -+ writel_relaxed(pllcon, pll->reg_base + RK3036_PLLCON(2)); ++ if (!clk_core_check_boundaries(clk->core, min, max)) { ++ ret = -EINVAL; ++ goto out; ++ } + -+ if (IS_ENABLED(CONFIG_ROCKCHIP_CLK_BOOST)) -+ rockchip_boost_disable_low(pll); ++ rate = clk->core->req_rate; ++ if (clk->core->flags & CLK_GET_RATE_NOCACHE) ++ rate = clk_core_get_rate_recalc(clk->core); + -+ /* wait for the pll to lock */ -+ ret = rockchip_rk3036_pll_wait_lock(pll); ++ /* ++ * Since the boundaries have been changed, let's give the ++ * opportunity to the provider to adjust the clock rate based on ++ * the new boundaries. ++ * ++ * We also need to handle the case where the clock is currently ++ * outside of the boundaries. Clamping the last requested rate ++ * to the current minimum and maximum will also handle this. ++ * ++ * FIXME: ++ * There is a catch. It may fail for the usual reason (clock ++ * broken, clock protected, etc) but also because: ++ * - round_rate() was not favorable and fell on the wrong ++ * side of the boundary ++ * - the determine_rate() callback does not really check for ++ * this corner case when determining the rate ++ */ ++ rate = clamp(rate, min, max); ++ ret = clk_core_set_rate_nolock(clk->core, rate); + if (ret) { -+ pr_warn("%s: pll update unsuccessful, trying to restore old params\n", -+ __func__); -+ rockchip_rk3036_pll_set_params(pll, &cur); ++ /* rollback the changes */ ++ clk->min_rate = old_min; ++ clk->max_rate = old_max; + } + -+ if (rate_change_remuxed) -+ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_NORM); ++out: ++ if (clk->exclusive_count) ++ clk_core_rate_protect(clk->core); + + return ret; +} + -+static int rockchip_rk3036_pll_set_rate(struct clk_hw *hw, unsigned long drate, -+ unsigned long prate) ++/** ++ * clk_set_rate_range - set a rate range for a clock source ++ * @clk: clock source ++ * @min: desired minimum clock rate in Hz, inclusive ++ * @max: desired maximum clock rate in Hz, inclusive ++ * ++ * Return: 0 for success or negative errno on failure. ++ */ ++int clk_set_rate_range(struct clk *clk, unsigned long min, unsigned long max) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); -+ const struct rockchip_pll_rate_table *rate; ++ int ret; + -+ pr_debug("%s: changing %s to %lu with a parent rate of %lu\n", -+ __func__, __clk_get_name(hw->clk), drate, prate); ++ if (!clk) ++ return 0; + -+ /* Get required rate settings from table */ -+ rate = rockchip_get_pll_settings(pll, drate); -+ if (!rate) { -+ pr_err("%s: Invalid rate : %lu for pll clk %s\n", __func__, -+ drate, __clk_get_name(hw->clk)); -+ return -EINVAL; -+ } ++ clk_prepare_lock(); + -+ return rockchip_rk3036_pll_set_params(pll, rate); ++ ret = clk_set_rate_range_nolock(clk, min, max); ++ ++ clk_prepare_unlock(); ++ ++ return ret; +} ++EXPORT_SYMBOL_GPL(clk_set_rate_range); + -+static int rockchip_rk3036_pll_enable(struct clk_hw *hw) ++/** ++ * clk_set_min_rate - set a minimum clock rate for a clock source ++ * @clk: clock source ++ * @rate: desired minimum clock rate in Hz, inclusive ++ * ++ * Returns success (0) or negative errno. ++ */ ++int clk_set_min_rate(struct clk *clk, unsigned long rate) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); -+ const struct clk_ops *pll_mux_ops = pll->pll_mux_ops; -+ struct clk_mux *pll_mux = &pll->pll_mux; -+ -+ writel(HIWORD_UPDATE(0, RK3036_PLLCON1_PWRDOWN, 0), -+ pll->reg_base + RK3036_PLLCON(1)); -+ rockchip_rk3036_pll_wait_lock(pll); ++ if (!clk) ++ return 0; + -+ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_NORM); ++ trace_clk_set_min_rate(clk->core, rate); + -+ return 0; ++ return clk_set_rate_range(clk, rate, clk->max_rate); +} ++EXPORT_SYMBOL_GPL(clk_set_min_rate); + -+static void rockchip_rk3036_pll_disable(struct clk_hw *hw) ++/** ++ * clk_set_max_rate - set a maximum clock rate for a clock source ++ * @clk: clock source ++ * @rate: desired maximum clock rate in Hz, inclusive ++ * ++ * Returns success (0) or negative errno. ++ */ ++int clk_set_max_rate(struct clk *clk, unsigned long rate) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); -+ const struct clk_ops *pll_mux_ops = pll->pll_mux_ops; -+ struct clk_mux *pll_mux = &pll->pll_mux; ++ if (!clk) ++ return 0; + -+ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_SLOW); ++ trace_clk_set_max_rate(clk->core, rate); + -+ writel(HIWORD_UPDATE(RK3036_PLLCON1_PWRDOWN, -+ RK3036_PLLCON1_PWRDOWN, 0), -+ pll->reg_base + RK3036_PLLCON(1)); ++ return clk_set_rate_range(clk, clk->min_rate, rate); +} ++EXPORT_SYMBOL_GPL(clk_set_max_rate); + -+static int rockchip_rk3036_pll_is_enabled(struct clk_hw *hw) ++/** ++ * clk_get_parent - return the parent of a clk ++ * @clk: the clk whose parent gets returned ++ * ++ * Simply returns clk->parent. Returns NULL if clk is NULL. ++ */ ++struct clk *clk_get_parent(struct clk *clk) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); -+ u32 pllcon = readl(pll->reg_base + RK3036_PLLCON(1)); ++ struct clk *parent; + -+ return !(pllcon & RK3036_PLLCON1_PWRDOWN); ++ if (!clk) ++ return NULL; ++ ++ clk_prepare_lock(); ++ /* TODO: Create a per-user clk and change callers to call clk_put */ ++ parent = !clk->core->parent ? NULL : clk->core->parent->hw->clk; ++ clk_prepare_unlock(); ++ ++ return parent; +} ++EXPORT_SYMBOL_GPL(clk_get_parent); + -+static int rockchip_rk3036_pll_init(struct clk_hw *hw) ++static struct clk_core *__clk_init_parent(struct clk_core *core) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); -+ const struct rockchip_pll_rate_table *rate; -+ struct rockchip_pll_rate_table cur; -+ unsigned long drate; ++ u8 index = 0; + -+ if (!(pll->flags & ROCKCHIP_PLL_SYNC_RATE)) -+ return 0; ++ if (core->num_parents > 1 && core->ops->get_parent) ++ index = core->ops->get_parent(core->hw); + -+ drate = clk_hw_get_rate(hw); -+ rate = rockchip_get_pll_settings(pll, drate); ++ return clk_core_get_parent_by_index(core, index); ++} + -+ /* when no rate setting for the current rate, rely on clk_set_rate */ -+ if (!rate) -+ return 0; ++static void clk_core_reparent(struct clk_core *core, ++ struct clk_core *new_parent) ++{ ++ clk_reparent(core, new_parent); ++ __clk_recalc_accuracies(core); ++ __clk_recalc_rates(core, true, POST_RATE_CHANGE); ++} + -+ rockchip_rk3036_pll_get_params(pll, &cur); ++void clk_hw_reparent(struct clk_hw *hw, struct clk_hw *new_parent) ++{ ++ if (!hw) ++ return; + -+ pr_debug("%s: pll %s@%lu: Hz\n", __func__, __clk_get_name(hw->clk), -+ drate); -+ pr_debug("old - fbdiv: %d, postdiv1: %d, refdiv: %d, postdiv2: %d, dsmpd: %d, frac: %d\n", -+ cur.fbdiv, cur.postdiv1, cur.refdiv, cur.postdiv2, -+ cur.dsmpd, cur.frac); -+ pr_debug("new - fbdiv: %d, postdiv1: %d, refdiv: %d, postdiv2: %d, dsmpd: %d, frac: %d\n", -+ rate->fbdiv, rate->postdiv1, rate->refdiv, rate->postdiv2, -+ rate->dsmpd, rate->frac); ++ clk_core_reparent(hw->core, !new_parent ? NULL : new_parent->core); ++} + -+ if (rate->fbdiv != cur.fbdiv || rate->postdiv1 != cur.postdiv1 || -+ rate->refdiv != cur.refdiv || rate->postdiv2 != cur.postdiv2 || -+ rate->dsmpd != cur.dsmpd || -+ (!cur.dsmpd && (rate->frac != cur.frac))) { -+ struct clk *parent = clk_get_parent(hw->clk); ++/** ++ * clk_has_parent - check if a clock is a possible parent for another ++ * @clk: clock source ++ * @parent: parent clock source ++ * ++ * This function can be used in drivers that need to check that a clock can be ++ * the parent of another without actually changing the parent. ++ * ++ * Returns true if @parent is a possible parent for @clk, false otherwise. ++ */ ++bool clk_has_parent(const struct clk *clk, const struct clk *parent) ++{ ++ /* NULL clocks should be nops, so return success if either is NULL. */ ++ if (!clk || !parent) ++ return true; + -+ if (!parent) { -+ pr_warn("%s: parent of %s not available\n", -+ __func__, __clk_get_name(hw->clk)); -+ return 0; -+ } ++ return clk_core_has_parent(clk->core, parent->core); ++} ++EXPORT_SYMBOL_GPL(clk_has_parent); + -+ pr_debug("%s: pll %s: rate params do not match rate table, adjusting\n", -+ __func__, __clk_get_name(hw->clk)); -+ rockchip_rk3036_pll_set_params(pll, rate); -+ } ++static int clk_core_set_parent_nolock(struct clk_core *core, ++ struct clk_core *parent) ++{ ++ int ret = 0; ++ int p_index = 0; ++ unsigned long p_rate = 0; + -+ return 0; -+} ++ lockdep_assert_held(&prepare_lock); + -+static const struct clk_ops rockchip_rk3036_pll_clk_norate_ops = { -+ .recalc_rate = rockchip_rk3036_pll_recalc_rate, -+ .enable = rockchip_rk3036_pll_enable, -+ .disable = rockchip_rk3036_pll_disable, -+ .is_enabled = rockchip_rk3036_pll_is_enabled, -+}; ++ if (!core) ++ return 0; + -+static const struct clk_ops rockchip_rk3036_pll_clk_ops = { -+ .recalc_rate = rockchip_rk3036_pll_recalc_rate, -+ .round_rate = rockchip_pll_round_rate, -+ .set_rate = rockchip_rk3036_pll_set_rate, -+ .enable = rockchip_rk3036_pll_enable, -+ .disable = rockchip_rk3036_pll_disable, -+ .is_enabled = rockchip_rk3036_pll_is_enabled, -+ .init = rockchip_rk3036_pll_init, -+}; ++ if (core->parent == parent) ++ return 0; + -+/* -+ * PLL used in RK3066, RK3188 and RK3288 -+ */ ++ /* verify ops for multi-parent clks */ ++ if (core->num_parents > 1 && !core->ops->set_parent) ++ return -EPERM; + -+#define RK3066_PLL_RESET_DELAY(nr) ((nr * 500) / 24 + 1) ++ /* check that we are allowed to re-parent if the clock is in use */ ++ if ((core->flags & CLK_SET_PARENT_GATE) && core->prepare_count) ++ return -EBUSY; + -+#define RK3066_PLLCON(i) (i * 0x4) -+#define RK3066_PLLCON0_OD_MASK 0xf -+#define RK3066_PLLCON0_OD_SHIFT 0 -+#define RK3066_PLLCON0_NR_MASK 0x3f -+#define RK3066_PLLCON0_NR_SHIFT 8 -+#define RK3066_PLLCON1_NF_MASK 0x1fff -+#define RK3066_PLLCON1_NF_SHIFT 0 -+#define RK3066_PLLCON2_NB_MASK 0xfff -+#define RK3066_PLLCON2_NB_SHIFT 0 -+#define RK3066_PLLCON3_RESET (1 << 5) -+#define RK3066_PLLCON3_PWRDOWN (1 << 1) -+#define RK3066_PLLCON3_BYPASS (1 << 0) ++ if (clk_core_rate_is_protected(core)) ++ return -EBUSY; + -+static void rockchip_rk3066_pll_get_params(struct rockchip_clk_pll *pll, -+ struct rockchip_pll_rate_table *rate) -+{ -+ u32 pllcon; ++ /* try finding the new parent index */ ++ if (parent) { ++ p_index = clk_fetch_parent_index(core, parent); ++ if (p_index < 0) { ++ pr_debug("%s: clk %s can not be parent of clk %s\n", ++ __func__, parent->name, core->name); ++ return p_index; ++ } ++ p_rate = parent->rate; ++ } + -+ pllcon = readl_relaxed(pll->reg_base + RK3066_PLLCON(0)); -+ rate->nr = ((pllcon >> RK3066_PLLCON0_NR_SHIFT) -+ & RK3066_PLLCON0_NR_MASK) + 1; -+ rate->no = ((pllcon >> RK3066_PLLCON0_OD_SHIFT) -+ & RK3066_PLLCON0_OD_MASK) + 1; ++ ret = clk_pm_runtime_get(core); ++ if (ret) ++ return ret; + -+ pllcon = readl_relaxed(pll->reg_base + RK3066_PLLCON(1)); -+ rate->nf = ((pllcon >> RK3066_PLLCON1_NF_SHIFT) -+ & RK3066_PLLCON1_NF_MASK) + 1; ++ /* propagate PRE_RATE_CHANGE notifications */ ++ ret = __clk_speculate_rates(core, p_rate); + -+ pllcon = readl_relaxed(pll->reg_base + RK3066_PLLCON(2)); -+ rate->nb = ((pllcon >> RK3066_PLLCON2_NB_SHIFT) -+ & RK3066_PLLCON2_NB_MASK) + 1; -+} ++ /* abort if a driver objects */ ++ if (ret & NOTIFY_STOP_MASK) ++ goto runtime_put; + -+static unsigned long rockchip_rk3066_pll_recalc_rate(struct clk_hw *hw, -+ unsigned long prate) -+{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); -+ struct rockchip_pll_rate_table cur; -+ u64 rate64 = prate; -+ u32 pllcon; ++ /* do the re-parent */ ++ ret = __clk_set_parent(core, parent, p_index); + -+ pllcon = readl_relaxed(pll->reg_base + RK3066_PLLCON(3)); -+ if (pllcon & RK3066_PLLCON3_BYPASS) { -+ pr_debug("%s: pll %s is bypassed\n", __func__, -+ clk_hw_get_name(hw)); -+ return prate; ++ /* propagate rate an accuracy recalculation accordingly */ ++ if (ret) { ++ __clk_recalc_rates(core, true, ABORT_RATE_CHANGE); ++ } else { ++ __clk_recalc_rates(core, true, POST_RATE_CHANGE); ++ __clk_recalc_accuracies(core); + } + -+ if (pll->sel && pll->scaling) -+ return pll->scaling; -+ -+ rockchip_rk3066_pll_get_params(pll, &cur); ++runtime_put: ++ clk_pm_runtime_put(core); + -+ rate64 *= cur.nf; -+ do_div(rate64, cur.nr); -+ do_div(rate64, cur.no); ++ return ret; ++} + -+ return (unsigned long)rate64; ++int clk_hw_set_parent(struct clk_hw *hw, struct clk_hw *parent) ++{ ++ return clk_core_set_parent_nolock(hw->core, parent->core); +} ++EXPORT_SYMBOL_GPL(clk_hw_set_parent); + -+static int rockchip_rk3066_pll_set_params(struct rockchip_clk_pll *pll, -+ const struct rockchip_pll_rate_table *rate) ++/** ++ * clk_set_parent - switch the parent of a mux clk ++ * @clk: the mux clk whose input we are switching ++ * @parent: the new input to clk ++ * ++ * Re-parent clk to use parent as its new input source. If clk is in ++ * prepared state, the clk will get enabled for the duration of this call. If ++ * that's not acceptable for a specific clk (Eg: the consumer can't handle ++ * that, the reparenting is glitchy in hardware, etc), use the ++ * CLK_SET_PARENT_GATE flag to allow reparenting only when clk is unprepared. ++ * ++ * After successfully changing clk's parent clk_set_parent will update the ++ * clk topology, sysfs topology and propagate rate recalculation via ++ * __clk_recalc_rates. ++ * ++ * Returns 0 on success, -EERROR otherwise. ++ */ ++int clk_set_parent(struct clk *clk, struct clk *parent) +{ -+ const struct clk_ops *pll_mux_ops = pll->pll_mux_ops; -+ struct clk_mux *pll_mux = &pll->pll_mux; -+ struct rockchip_pll_rate_table cur; -+ int rate_change_remuxed = 0; -+ int cur_parent; + int ret; + -+ pr_debug("%s: rate settings for %lu (nr, no, nf): (%d, %d, %d)\n", -+ __func__, rate->rate, rate->nr, rate->no, rate->nf); ++ if (!clk) ++ return 0; + -+ rockchip_rk3066_pll_get_params(pll, &cur); -+ cur.rate = 0; ++ clk_prepare_lock(); + -+ cur_parent = pll_mux_ops->get_parent(&pll_mux->hw); -+ if (cur_parent == PLL_MODE_NORM) { -+ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_SLOW); -+ rate_change_remuxed = 1; -+ } ++ if (clk->exclusive_count) ++ clk_core_rate_unprotect(clk->core); + -+ /* enter reset mode */ -+ writel(HIWORD_UPDATE(RK3066_PLLCON3_RESET, RK3066_PLLCON3_RESET, 0), -+ pll->reg_base + RK3066_PLLCON(3)); ++ ret = clk_core_set_parent_nolock(clk->core, ++ parent ? parent->core : NULL); + -+ /* update pll values */ -+ writel(HIWORD_UPDATE(rate->nr - 1, RK3066_PLLCON0_NR_MASK, -+ RK3066_PLLCON0_NR_SHIFT) | -+ HIWORD_UPDATE(rate->no - 1, RK3066_PLLCON0_OD_MASK, -+ RK3066_PLLCON0_OD_SHIFT), -+ pll->reg_base + RK3066_PLLCON(0)); ++ if (clk->exclusive_count) ++ clk_core_rate_protect(clk->core); + -+ writel_relaxed(HIWORD_UPDATE(rate->nf - 1, RK3066_PLLCON1_NF_MASK, -+ RK3066_PLLCON1_NF_SHIFT), -+ pll->reg_base + RK3066_PLLCON(1)); -+ writel_relaxed(HIWORD_UPDATE(rate->nb - 1, RK3066_PLLCON2_NB_MASK, -+ RK3066_PLLCON2_NB_SHIFT), -+ pll->reg_base + RK3066_PLLCON(2)); ++ clk_prepare_unlock(); + -+ /* leave reset and wait the reset_delay */ -+ writel(HIWORD_UPDATE(0, RK3066_PLLCON3_RESET, 0), -+ pll->reg_base + RK3066_PLLCON(3)); -+ udelay(RK3066_PLL_RESET_DELAY(rate->nr)); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(clk_set_parent); + -+ /* wait for the pll to lock */ -+ ret = rockchip_pll_wait_lock(pll); -+ if (ret) { -+ pr_warn("%s: pll update unsuccessful, trying to restore old params\n", -+ __func__); -+ rockchip_rk3066_pll_set_params(pll, &cur); ++static int clk_core_set_phase_nolock(struct clk_core *core, int degrees) ++{ ++ int ret = -EINVAL; ++ ++ lockdep_assert_held(&prepare_lock); ++ ++ if (!core) ++ return 0; ++ ++ if (clk_core_rate_is_protected(core)) ++ return -EBUSY; ++ ++ trace_clk_set_phase(core, degrees); ++ ++ if (core->ops->set_phase) { ++ ret = core->ops->set_phase(core->hw, degrees); ++ if (!ret) ++ core->phase = degrees; + } + -+ if (rate_change_remuxed) -+ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_NORM); ++ trace_clk_set_phase_complete(core, degrees); + + return ret; +} + -+static int rockchip_rk3066_pll_set_rate(struct clk_hw *hw, unsigned long drate, -+ unsigned long prate) ++/** ++ * clk_set_phase - adjust the phase shift of a clock signal ++ * @clk: clock signal source ++ * @degrees: number of degrees the signal is shifted ++ * ++ * Shifts the phase of a clock signal by the specified ++ * degrees. Returns 0 on success, -EERROR otherwise. ++ * ++ * This function makes no distinction about the input or reference ++ * signal that we adjust the clock signal phase against. For example ++ * phase locked-loop clock signal generators we may shift phase with ++ * respect to feedback clock signal input, but for other cases the ++ * clock phase may be shifted with respect to some other, unspecified ++ * signal. ++ * ++ * Additionally the concept of phase shift does not propagate through ++ * the clock tree hierarchy, which sets it apart from clock rates and ++ * clock accuracy. A parent clock phase attribute does not have an ++ * impact on the phase attribute of a child clock. ++ */ ++int clk_set_phase(struct clk *clk, int degrees) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); -+ const struct rockchip_pll_rate_table *rate; -+ unsigned long old_rate = rockchip_rk3066_pll_recalc_rate(hw, prate); -+ struct regmap *grf = pll->ctx->grf; + int ret; + -+ if (IS_ERR(grf)) { -+ pr_debug("%s: grf regmap not available, aborting rate change\n", -+ __func__); -+ return PTR_ERR(grf); -+ } ++ if (!clk) ++ return 0; + -+ pr_debug("%s: changing %s from %lu to %lu with a parent rate of %lu\n", -+ __func__, clk_hw_get_name(hw), old_rate, drate, prate); ++ /* sanity check degrees */ ++ degrees %= 360; ++ if (degrees < 0) ++ degrees += 360; + -+ /* Get required rate settings from table */ -+ rate = rockchip_get_pll_settings(pll, drate); -+ if (!rate) { -+ pr_err("%s: Invalid rate : %lu for pll clk %s\n", __func__, -+ drate, clk_hw_get_name(hw)); -+ return -EINVAL; -+ } ++ clk_prepare_lock(); + -+ ret = rockchip_rk3066_pll_set_params(pll, rate); -+ if (ret) -+ pll->scaling = 0; ++ if (clk->exclusive_count) ++ clk_core_rate_unprotect(clk->core); ++ ++ ret = clk_core_set_phase_nolock(clk->core, degrees); ++ ++ if (clk->exclusive_count) ++ clk_core_rate_protect(clk->core); ++ ++ clk_prepare_unlock(); + + return ret; +} ++EXPORT_SYMBOL_GPL(clk_set_phase); + -+static int rockchip_rk3066_pll_enable(struct clk_hw *hw) ++static int clk_core_get_phase(struct clk_core *core) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ int ret; + -+ writel(HIWORD_UPDATE(0, RK3066_PLLCON3_PWRDOWN, 0), -+ pll->reg_base + RK3066_PLLCON(3)); -+ rockchip_pll_wait_lock(pll); ++ lockdep_assert_held(&prepare_lock); ++ if (!core->ops->get_phase) ++ return 0; + -+ return 0; ++ /* Always try to update cached phase if possible */ ++ ret = core->ops->get_phase(core->hw); ++ if (ret >= 0) ++ core->phase = ret; ++ ++ return ret; +} + -+static void rockchip_rk3066_pll_disable(struct clk_hw *hw) ++/** ++ * clk_get_phase - return the phase shift of a clock signal ++ * @clk: clock signal source ++ * ++ * Returns the phase shift of a clock node in degrees, otherwise returns ++ * -EERROR. ++ */ ++int clk_get_phase(struct clk *clk) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ int ret; + -+ writel(HIWORD_UPDATE(RK3066_PLLCON3_PWRDOWN, -+ RK3066_PLLCON3_PWRDOWN, 0), -+ pll->reg_base + RK3066_PLLCON(3)); -+} ++ if (!clk) ++ return 0; + -+static int rockchip_rk3066_pll_is_enabled(struct clk_hw *hw) -+{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); -+ u32 pllcon = readl(pll->reg_base + RK3066_PLLCON(3)); ++ clk_prepare_lock(); ++ ret = clk_core_get_phase(clk->core); ++ clk_prepare_unlock(); + -+ return !(pllcon & RK3066_PLLCON3_PWRDOWN); ++ return ret; +} ++EXPORT_SYMBOL_GPL(clk_get_phase); + -+static int rockchip_rk3066_pll_init(struct clk_hw *hw) ++static void clk_core_reset_duty_cycle_nolock(struct clk_core *core) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); -+ const struct rockchip_pll_rate_table *rate; -+ struct rockchip_pll_rate_table cur; -+ unsigned long drate; ++ /* Assume a default value of 50% */ ++ core->duty.num = 1; ++ core->duty.den = 2; ++} + -+ if (!(pll->flags & ROCKCHIP_PLL_SYNC_RATE)) -+ return 0; ++static int clk_core_update_duty_cycle_parent_nolock(struct clk_core *core); + -+ drate = clk_hw_get_rate(hw); -+ rate = rockchip_get_pll_settings(pll, drate); ++static int clk_core_update_duty_cycle_nolock(struct clk_core *core) ++{ ++ struct clk_duty *duty = &core->duty; ++ int ret = 0; + -+ /* when no rate setting for the current rate, rely on clk_set_rate */ -+ if (!rate) -+ return 0; ++ if (!core->ops->get_duty_cycle) ++ return clk_core_update_duty_cycle_parent_nolock(core); + -+ rockchip_rk3066_pll_get_params(pll, &cur); ++ ret = core->ops->get_duty_cycle(core->hw, duty); ++ if (ret) ++ goto reset; + -+ pr_debug("%s: pll %s@%lu: nr (%d:%d); no (%d:%d); nf(%d:%d), nb(%d:%d)\n", -+ __func__, clk_hw_get_name(hw), drate, rate->nr, cur.nr, -+ rate->no, cur.no, rate->nf, cur.nf, rate->nb, cur.nb); -+ if (rate->nr != cur.nr || rate->no != cur.no || rate->nf != cur.nf -+ || rate->nb != cur.nb) { -+ pr_debug("%s: pll %s: rate params do not match rate table, adjusting\n", -+ __func__, clk_hw_get_name(hw)); -+ rockchip_rk3066_pll_set_params(pll, rate); ++ /* Don't trust the clock provider too much */ ++ if (duty->den == 0 || duty->num > duty->den) { ++ ret = -EINVAL; ++ goto reset; + } + + return 0; ++ ++reset: ++ clk_core_reset_duty_cycle_nolock(core); ++ return ret; +} + -+static const struct clk_ops rockchip_rk3066_pll_clk_norate_ops = { -+ .recalc_rate = rockchip_rk3066_pll_recalc_rate, -+ .enable = rockchip_rk3066_pll_enable, -+ .disable = rockchip_rk3066_pll_disable, -+ .is_enabled = rockchip_rk3066_pll_is_enabled, -+}; ++static int clk_core_update_duty_cycle_parent_nolock(struct clk_core *core) ++{ ++ int ret = 0; + -+static const struct clk_ops rockchip_rk3066_pll_clk_ops = { -+ .recalc_rate = rockchip_rk3066_pll_recalc_rate, -+ .round_rate = rockchip_pll_round_rate, -+ .set_rate = rockchip_rk3066_pll_set_rate, -+ .enable = rockchip_rk3066_pll_enable, -+ .disable = rockchip_rk3066_pll_disable, -+ .is_enabled = rockchip_rk3066_pll_is_enabled, -+ .init = rockchip_rk3066_pll_init, -+}; ++ if (core->parent && ++ core->flags & CLK_DUTY_CYCLE_PARENT) { ++ ret = clk_core_update_duty_cycle_nolock(core->parent); ++ memcpy(&core->duty, &core->parent->duty, sizeof(core->duty)); ++ } else { ++ clk_core_reset_duty_cycle_nolock(core); ++ } + -+/* -+ * PLL used in RK3399 -+ */ ++ return ret; ++} + -+#define RK3399_PLLCON(i) (i * 0x4) -+#define RK3399_PLLCON0_FBDIV_MASK 0xfff -+#define RK3399_PLLCON0_FBDIV_SHIFT 0 -+#define RK3399_PLLCON1_REFDIV_MASK 0x3f -+#define RK3399_PLLCON1_REFDIV_SHIFT 0 -+#define RK3399_PLLCON1_POSTDIV1_MASK 0x7 -+#define RK3399_PLLCON1_POSTDIV1_SHIFT 8 -+#define RK3399_PLLCON1_POSTDIV2_MASK 0x7 -+#define RK3399_PLLCON1_POSTDIV2_SHIFT 12 -+#define RK3399_PLLCON2_FRAC_MASK 0xffffff -+#define RK3399_PLLCON2_FRAC_SHIFT 0 -+#define RK3399_PLLCON2_LOCK_STATUS BIT(31) -+#define RK3399_PLLCON3_PWRDOWN BIT(0) -+#define RK3399_PLLCON3_DSMPD_MASK 0x1 -+#define RK3399_PLLCON3_DSMPD_SHIFT 3 ++static int clk_core_set_duty_cycle_parent_nolock(struct clk_core *core, ++ struct clk_duty *duty); + -+static int rockchip_rk3399_pll_wait_lock(struct rockchip_clk_pll *pll) ++static int clk_core_set_duty_cycle_nolock(struct clk_core *core, ++ struct clk_duty *duty) +{ -+ u32 pllcon; + int ret; + -+ /* -+ * Lock time typical 250, max 500 input clock cycles @24MHz -+ * So define a very safe maximum of 1000us, meaning 24000 cycles. -+ */ -+ ret = readl_relaxed_poll_timeout(pll->reg_base + RK3399_PLLCON(2), -+ pllcon, -+ pllcon & RK3399_PLLCON2_LOCK_STATUS, -+ 0, 1000); -+ if (ret) -+ pr_err("%s: timeout waiting for pll to lock\n", __func__); ++ lockdep_assert_held(&prepare_lock); ++ ++ if (clk_core_rate_is_protected(core)) ++ return -EBUSY; ++ ++ trace_clk_set_duty_cycle(core, duty); ++ ++ if (!core->ops->set_duty_cycle) ++ return clk_core_set_duty_cycle_parent_nolock(core, duty); ++ ++ ret = core->ops->set_duty_cycle(core->hw, duty); ++ if (!ret) ++ memcpy(&core->duty, duty, sizeof(*duty)); ++ ++ trace_clk_set_duty_cycle_complete(core, duty); + + return ret; +} + -+static void rockchip_rk3399_pll_get_params(struct rockchip_clk_pll *pll, -+ struct rockchip_pll_rate_table *rate) ++static int clk_core_set_duty_cycle_parent_nolock(struct clk_core *core, ++ struct clk_duty *duty) +{ -+ u32 pllcon; -+ -+ pllcon = readl_relaxed(pll->reg_base + RK3399_PLLCON(0)); -+ rate->fbdiv = ((pllcon >> RK3399_PLLCON0_FBDIV_SHIFT) -+ & RK3399_PLLCON0_FBDIV_MASK); -+ -+ pllcon = readl_relaxed(pll->reg_base + RK3399_PLLCON(1)); -+ rate->refdiv = ((pllcon >> RK3399_PLLCON1_REFDIV_SHIFT) -+ & RK3399_PLLCON1_REFDIV_MASK); -+ rate->postdiv1 = ((pllcon >> RK3399_PLLCON1_POSTDIV1_SHIFT) -+ & RK3399_PLLCON1_POSTDIV1_MASK); -+ rate->postdiv2 = ((pllcon >> RK3399_PLLCON1_POSTDIV2_SHIFT) -+ & RK3399_PLLCON1_POSTDIV2_MASK); ++ int ret = 0; + -+ pllcon = readl_relaxed(pll->reg_base + RK3399_PLLCON(2)); -+ rate->frac = ((pllcon >> RK3399_PLLCON2_FRAC_SHIFT) -+ & RK3399_PLLCON2_FRAC_MASK); ++ if (core->parent && ++ core->flags & (CLK_DUTY_CYCLE_PARENT | CLK_SET_RATE_PARENT)) { ++ ret = clk_core_set_duty_cycle_nolock(core->parent, duty); ++ memcpy(&core->duty, &core->parent->duty, sizeof(core->duty)); ++ } + -+ pllcon = readl_relaxed(pll->reg_base + RK3399_PLLCON(3)); -+ rate->dsmpd = ((pllcon >> RK3399_PLLCON3_DSMPD_SHIFT) -+ & RK3399_PLLCON3_DSMPD_MASK); ++ return ret; +} + -+static unsigned long rockchip_rk3399_pll_recalc_rate(struct clk_hw *hw, -+ unsigned long prate) ++/** ++ * clk_set_duty_cycle - adjust the duty cycle ratio of a clock signal ++ * @clk: clock signal source ++ * @num: numerator of the duty cycle ratio to be applied ++ * @den: denominator of the duty cycle ratio to be applied ++ * ++ * Apply the duty cycle ratio if the ratio is valid and the clock can ++ * perform this operation ++ * ++ * Returns (0) on success, a negative errno otherwise. ++ */ ++int clk_set_duty_cycle(struct clk *clk, unsigned int num, unsigned int den) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); -+ struct rockchip_pll_rate_table cur; -+ u64 rate64 = prate; ++ int ret; ++ struct clk_duty duty; + -+ if (pll->sel && pll->scaling) -+ return pll->scaling; ++ if (!clk) ++ return 0; + -+ rockchip_rk3399_pll_get_params(pll, &cur); ++ /* sanity check the ratio */ ++ if (den == 0 || num > den) ++ return -EINVAL; + -+ rate64 *= cur.fbdiv; -+ do_div(rate64, cur.refdiv); ++ duty.num = num; ++ duty.den = den; + -+ if (cur.dsmpd == 0) { -+ /* fractional mode */ -+ u64 frac_rate64 = prate * cur.frac; ++ clk_prepare_lock(); + -+ do_div(frac_rate64, cur.refdiv); -+ rate64 += frac_rate64 >> 24; -+ } ++ if (clk->exclusive_count) ++ clk_core_rate_unprotect(clk->core); + -+ do_div(rate64, cur.postdiv1); -+ do_div(rate64, cur.postdiv2); ++ ret = clk_core_set_duty_cycle_nolock(clk->core, &duty); + -+ return (unsigned long)rate64; ++ if (clk->exclusive_count) ++ clk_core_rate_protect(clk->core); ++ ++ clk_prepare_unlock(); ++ ++ return ret; +} ++EXPORT_SYMBOL_GPL(clk_set_duty_cycle); + -+static int rockchip_rk3399_pll_set_params(struct rockchip_clk_pll *pll, -+ const struct rockchip_pll_rate_table *rate) ++static int clk_core_get_scaled_duty_cycle(struct clk_core *core, ++ unsigned int scale) +{ -+ const struct clk_ops *pll_mux_ops = pll->pll_mux_ops; -+ struct clk_mux *pll_mux = &pll->pll_mux; -+ struct rockchip_pll_rate_table cur; -+ u32 pllcon; -+ int rate_change_remuxed = 0; -+ int cur_parent; ++ struct clk_duty *duty = &core->duty; + int ret; + -+ pr_debug("%s: rate settings for %lu fbdiv: %d, postdiv1: %d, refdiv: %d, postdiv2: %d, dsmpd: %d, frac: %d\n", -+ __func__, rate->rate, rate->fbdiv, rate->postdiv1, rate->refdiv, -+ rate->postdiv2, rate->dsmpd, rate->frac); ++ clk_prepare_lock(); + -+ rockchip_rk3399_pll_get_params(pll, &cur); -+ cur.rate = 0; ++ ret = clk_core_update_duty_cycle_nolock(core); ++ if (!ret) ++ ret = mult_frac(scale, duty->num, duty->den); + -+ cur_parent = pll_mux_ops->get_parent(&pll_mux->hw); -+ if (cur_parent == PLL_MODE_NORM) { -+ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_SLOW); -+ rate_change_remuxed = 1; -+ } ++ clk_prepare_unlock(); + -+ /* set pll power down */ -+ writel(HIWORD_UPDATE(RK3399_PLLCON3_PWRDOWN, -+ RK3399_PLLCON3_PWRDOWN, 0), -+ pll->reg_base + RK3399_PLLCON(3)); ++ return ret; ++} + -+ /* update pll values */ -+ writel_relaxed(HIWORD_UPDATE(rate->fbdiv, RK3399_PLLCON0_FBDIV_MASK, -+ RK3399_PLLCON0_FBDIV_SHIFT), -+ pll->reg_base + RK3399_PLLCON(0)); ++/** ++ * clk_get_scaled_duty_cycle - return the duty cycle ratio of a clock signal ++ * @clk: clock signal source ++ * @scale: scaling factor to be applied to represent the ratio as an integer ++ * ++ * Returns the duty cycle ratio of a clock node multiplied by the provided ++ * scaling factor, or negative errno on error. ++ */ ++int clk_get_scaled_duty_cycle(struct clk *clk, unsigned int scale) ++{ ++ if (!clk) ++ return 0; + -+ writel_relaxed(HIWORD_UPDATE(rate->refdiv, RK3399_PLLCON1_REFDIV_MASK, -+ RK3399_PLLCON1_REFDIV_SHIFT) | -+ HIWORD_UPDATE(rate->postdiv1, RK3399_PLLCON1_POSTDIV1_MASK, -+ RK3399_PLLCON1_POSTDIV1_SHIFT) | -+ HIWORD_UPDATE(rate->postdiv2, RK3399_PLLCON1_POSTDIV2_MASK, -+ RK3399_PLLCON1_POSTDIV2_SHIFT), -+ pll->reg_base + RK3399_PLLCON(1)); ++ return clk_core_get_scaled_duty_cycle(clk->core, scale); ++} ++EXPORT_SYMBOL_GPL(clk_get_scaled_duty_cycle); + -+ /* xPLL CON2 is not HIWORD_MASK */ -+ pllcon = readl_relaxed(pll->reg_base + RK3399_PLLCON(2)); -+ pllcon &= ~(RK3399_PLLCON2_FRAC_MASK << RK3399_PLLCON2_FRAC_SHIFT); -+ pllcon |= rate->frac << RK3399_PLLCON2_FRAC_SHIFT; -+ writel_relaxed(pllcon, pll->reg_base + RK3399_PLLCON(2)); ++/** ++ * clk_is_match - check if two clk's point to the same hardware clock ++ * @p: clk compared against q ++ * @q: clk compared against p ++ * ++ * Returns true if the two struct clk pointers both point to the same hardware ++ * clock node. Put differently, returns true if struct clk *p and struct clk *q ++ * share the same struct clk_core object. ++ * ++ * Returns false otherwise. Note that two NULL clks are treated as matching. ++ */ ++bool clk_is_match(const struct clk *p, const struct clk *q) ++{ ++ /* trivial case: identical struct clk's or both NULL */ ++ if (p == q) ++ return true; + -+ writel_relaxed(HIWORD_UPDATE(rate->dsmpd, RK3399_PLLCON3_DSMPD_MASK, -+ RK3399_PLLCON3_DSMPD_SHIFT), -+ pll->reg_base + RK3399_PLLCON(3)); ++ /* true if clk->core pointers match. Avoid dereferencing garbage */ ++ if (!IS_ERR_OR_NULL(p) && !IS_ERR_OR_NULL(q)) ++ if (p->core == q->core) ++ return true; + -+ /* set pll power up */ -+ writel(HIWORD_UPDATE(0, -+ RK3399_PLLCON3_PWRDOWN, 0), -+ pll->reg_base + RK3399_PLLCON(3)); ++ return false; ++} ++EXPORT_SYMBOL_GPL(clk_is_match); + -+ /* wait for the pll to lock */ -+ ret = rockchip_rk3399_pll_wait_lock(pll); -+ if (ret) { -+ pr_warn("%s: pll update unsuccessful, trying to restore old params\n", -+ __func__); -+ rockchip_rk3399_pll_set_params(pll, &cur); -+ } ++/*** debugfs support ***/ + -+ if (rate_change_remuxed) -+ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_NORM); ++#ifdef CONFIG_DEBUG_FS ++#include + -+ return ret; -+} ++static struct dentry *rootdir; ++static int inited = 0; ++static DEFINE_MUTEX(clk_debug_lock); ++static HLIST_HEAD(clk_debug_list); + -+static int rockchip_rk3399_pll_set_rate(struct clk_hw *hw, unsigned long drate, -+ unsigned long prate) ++static struct hlist_head *orphan_list[] = { ++ &clk_orphan_list, ++ NULL, ++}; ++ ++static void clk_summary_show_one(struct seq_file *s, struct clk_core *c, ++ int level) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); -+ const struct rockchip_pll_rate_table *rate; -+ unsigned long old_rate = rockchip_rk3399_pll_recalc_rate(hw, prate); -+ int ret; ++ int phase; ++ struct clk *clk_user; ++ int multi_node = 0; + -+ pr_debug("%s: changing %s from %lu to %lu with a parent rate of %lu\n", -+ __func__, __clk_get_name(hw->clk), old_rate, drate, prate); ++ seq_printf(s, "%*s%-*s %-7d %-8d %-8d %-11lu %-10lu ", ++ level * 3 + 1, "", ++ 35 - level * 3, c->name, ++ c->enable_count, c->prepare_count, c->protect_count, ++ clk_core_get_rate_recalc(c), ++ clk_core_get_accuracy_recalc(c)); + -+ /* Get required rate settings from table */ -+ rate = rockchip_get_pll_settings(pll, drate); -+ if (!rate) { -+ pr_err("%s: Invalid rate : %lu for pll clk %s\n", __func__, -+ drate, __clk_get_name(hw->clk)); -+ return -EINVAL; -+ } ++ phase = clk_core_get_phase(c); ++ if (phase >= 0) ++ seq_printf(s, "%-5d", phase); ++ else ++ seq_puts(s, "-----"); + -+ ret = rockchip_rk3399_pll_set_params(pll, rate); -+ if (ret) -+ pll->scaling = 0; ++ seq_printf(s, " %-6d", clk_core_get_scaled_duty_cycle(c, 100000)); ++ ++ if (c->ops->is_enabled) ++ seq_printf(s, " %5c ", clk_core_is_enabled(c) ? 'Y' : 'N'); ++ else if (!c->ops->enable) ++ seq_printf(s, " %5c ", 'Y'); ++ else ++ seq_printf(s, " %5c ", '?'); ++ ++ hlist_for_each_entry(clk_user, &c->clks, clks_node) { ++ seq_printf(s, "%*s%-*s %-25s\n", ++ level * 3 + 2 + 105 * multi_node, "", ++ 30, ++ clk_user->dev_id ? clk_user->dev_id : "deviceless", ++ clk_user->con_id ? clk_user->con_id : "no_connection_id"); ++ ++ multi_node = 1; ++ } + -+ return ret; +} + -+static int rockchip_rk3399_pll_enable(struct clk_hw *hw) ++static void clk_summary_show_subtree(struct seq_file *s, struct clk_core *c, ++ int level) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ struct clk_core *child; + -+ writel(HIWORD_UPDATE(0, RK3399_PLLCON3_PWRDOWN, 0), -+ pll->reg_base + RK3399_PLLCON(3)); -+ rockchip_rk3399_pll_wait_lock(pll); ++ clk_summary_show_one(s, c, level); + -+ return 0; ++ hlist_for_each_entry(child, &c->children, child_node) ++ clk_summary_show_subtree(s, child, level + 1); +} + -+static void rockchip_rk3399_pll_disable(struct clk_hw *hw) ++static int clk_summary_show(struct seq_file *s, void *data) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ struct clk_core *c; ++ struct hlist_head **lists = s->private; ++ int ret; + -+ writel(HIWORD_UPDATE(RK3399_PLLCON3_PWRDOWN, -+ RK3399_PLLCON3_PWRDOWN, 0), -+ pll->reg_base + RK3399_PLLCON(3)); ++ seq_puts(s, " enable prepare protect duty hardware connection\n"); ++ seq_puts(s, " clock count count count rate accuracy phase cycle enable consumer id\n"); ++ seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n"); ++ ++ ret = clk_pm_runtime_get_all(); ++ if (ret) ++ return ret; ++ ++ clk_prepare_lock(); ++ ++ for (; *lists; lists++) ++ hlist_for_each_entry(c, *lists, child_node) ++ clk_summary_show_subtree(s, c, 0); ++ ++ clk_prepare_unlock(); ++ clk_pm_runtime_put_all(); ++ ++ return 0; +} ++DEFINE_SHOW_ATTRIBUTE(clk_summary); + -+static int rockchip_rk3399_pll_is_enabled(struct clk_hw *hw) ++static void clk_dump_one(struct seq_file *s, struct clk_core *c, int level) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); -+ u32 pllcon = readl(pll->reg_base + RK3399_PLLCON(3)); ++ int phase; ++ unsigned long min_rate, max_rate; + -+ return !(pllcon & RK3399_PLLCON3_PWRDOWN); ++ clk_core_get_boundaries(c, &min_rate, &max_rate); ++ ++ /* This should be JSON format, i.e. elements separated with a comma */ ++ seq_printf(s, "\"%s\": { ", c->name); ++ seq_printf(s, "\"enable_count\": %d,", c->enable_count); ++ seq_printf(s, "\"prepare_count\": %d,", c->prepare_count); ++ seq_printf(s, "\"protect_count\": %d,", c->protect_count); ++ seq_printf(s, "\"rate\": %lu,", clk_core_get_rate_recalc(c)); ++ seq_printf(s, "\"min_rate\": %lu,", min_rate); ++ seq_printf(s, "\"max_rate\": %lu,", max_rate); ++ seq_printf(s, "\"accuracy\": %lu,", clk_core_get_accuracy_recalc(c)); ++ phase = clk_core_get_phase(c); ++ if (phase >= 0) ++ seq_printf(s, "\"phase\": %d,", phase); ++ seq_printf(s, "\"duty_cycle\": %u", ++ clk_core_get_scaled_duty_cycle(c, 100000)); +} + -+static int rockchip_rk3399_pll_init(struct clk_hw *hw) ++static void clk_dump_subtree(struct seq_file *s, struct clk_core *c, int level) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); -+ const struct rockchip_pll_rate_table *rate; -+ struct rockchip_pll_rate_table cur; -+ unsigned long drate; ++ struct clk_core *child; + -+ if (!(pll->flags & ROCKCHIP_PLL_SYNC_RATE)) -+ return 0; ++ clk_dump_one(s, c, level); + -+ drate = clk_hw_get_rate(hw); -+ rate = rockchip_get_pll_settings(pll, drate); ++ hlist_for_each_entry(child, &c->children, child_node) { ++ seq_putc(s, ','); ++ clk_dump_subtree(s, child, level + 1); ++ } + -+ /* when no rate setting for the current rate, rely on clk_set_rate */ -+ if (!rate) -+ return 0; ++ seq_putc(s, '}'); ++} + -+ rockchip_rk3399_pll_get_params(pll, &cur); ++static int clk_dump_show(struct seq_file *s, void *data) ++{ ++ struct clk_core *c; ++ bool first_node = true; ++ struct hlist_head **lists = s->private; ++ int ret; + -+ pr_debug("%s: pll %s@%lu: Hz\n", __func__, __clk_get_name(hw->clk), -+ drate); -+ pr_debug("old - fbdiv: %d, postdiv1: %d, refdiv: %d, postdiv2: %d, dsmpd: %d, frac: %d\n", -+ cur.fbdiv, cur.postdiv1, cur.refdiv, cur.postdiv2, -+ cur.dsmpd, cur.frac); -+ pr_debug("new - fbdiv: %d, postdiv1: %d, refdiv: %d, postdiv2: %d, dsmpd: %d, frac: %d\n", -+ rate->fbdiv, rate->postdiv1, rate->refdiv, rate->postdiv2, -+ rate->dsmpd, rate->frac); ++ ret = clk_pm_runtime_get_all(); ++ if (ret) ++ return ret; + -+ if (rate->fbdiv != cur.fbdiv || rate->postdiv1 != cur.postdiv1 || -+ rate->refdiv != cur.refdiv || rate->postdiv2 != cur.postdiv2 || -+ rate->dsmpd != cur.dsmpd || -+ (!cur.dsmpd && (rate->frac != cur.frac))) { -+ struct clk *parent = clk_get_parent(hw->clk); ++ seq_putc(s, '{'); + -+ if (!parent) { -+ pr_warn("%s: parent of %s not available\n", -+ __func__, __clk_get_name(hw->clk)); -+ return 0; -+ } ++ clk_prepare_lock(); + -+ pr_debug("%s: pll %s: rate params do not match rate table, adjusting\n", -+ __func__, __clk_get_name(hw->clk)); -+ rockchip_rk3399_pll_set_params(pll, rate); ++ for (; *lists; lists++) { ++ hlist_for_each_entry(c, *lists, child_node) { ++ if (!first_node) ++ seq_putc(s, ','); ++ first_node = false; ++ clk_dump_subtree(s, c, 0); ++ } + } + ++ clk_prepare_unlock(); ++ clk_pm_runtime_put_all(); ++ ++ seq_puts(s, "}\n"); + return 0; +} ++DEFINE_SHOW_ATTRIBUTE(clk_dump); + -+static const struct clk_ops rockchip_rk3399_pll_clk_norate_ops = { -+ .recalc_rate = rockchip_rk3399_pll_recalc_rate, -+ .enable = rockchip_rk3399_pll_enable, -+ .disable = rockchip_rk3399_pll_disable, -+ .is_enabled = rockchip_rk3399_pll_is_enabled, -+}; -+ -+static const struct clk_ops rockchip_rk3399_pll_clk_ops = { -+ .recalc_rate = rockchip_rk3399_pll_recalc_rate, -+ .round_rate = rockchip_pll_round_rate, -+ .set_rate = rockchip_rk3399_pll_set_rate, -+ .enable = rockchip_rk3399_pll_enable, -+ .disable = rockchip_rk3399_pll_disable, -+ .is_enabled = rockchip_rk3399_pll_is_enabled, -+ .init = rockchip_rk3399_pll_init, -+}; -+ -+/** -+ * PLL used in RK3588 ++#undef CLOCK_ALLOW_WRITE_DEBUGFS ++#ifdef CLOCK_ALLOW_WRITE_DEBUGFS ++/* ++ * This can be dangerous, therefore don't provide any real compile time ++ * configuration option for this feature. ++ * People who want to use this will need to modify the source code directly. + */ -+ -+#define RK3588_PLLCON(i) (i * 0x4) -+#define RK3588_PLLCON0_M_MASK 0x3ff -+#define RK3588_PLLCON0_M_SHIFT 0 -+#define RK3588_PLLCON1_P_MASK 0x3f -+#define RK3588_PLLCON1_P_SHIFT 0 -+#define RK3588_PLLCON1_S_MASK 0x7 -+#define RK3588_PLLCON1_S_SHIFT 6 -+#define RK3588_PLLCON2_K_MASK 0xffff -+#define RK3588_PLLCON2_K_SHIFT 0 -+#define RK3588_PLLCON1_PWRDOWN BIT(13) -+#define RK3588_PLLCON6_LOCK_STATUS BIT(15) -+ -+static int rockchip_rk3588_pll_wait_lock(struct rockchip_clk_pll *pll) ++static int clk_rate_set(void *data, u64 val) +{ -+ u32 pllcon; ++ struct clk_core *core = data; + int ret; + -+ /* -+ * Lock time typical 250, max 500 input clock cycles @24MHz -+ * So define a very safe maximum of 1000us, meaning 24000 cycles. -+ */ -+ ret = readl_relaxed_poll_timeout(pll->reg_base + RK3588_PLLCON(6), -+ pllcon, -+ pllcon & RK3588_PLLCON6_LOCK_STATUS, -+ 0, 1000); -+ if (ret) -+ pr_err("%s: timeout waiting for pll to lock\n", __func__); ++ clk_prepare_lock(); ++ ret = clk_core_set_rate_nolock(core, val); ++ clk_prepare_unlock(); + + return ret; +} + -+static long rockchip_rk3588_pll_round_rate(struct clk_hw *hw, -+ unsigned long drate, unsigned long *prate) ++#define clk_rate_mode 0644 ++ ++static int clk_prepare_enable_set(void *data, u64 val) +{ -+ if ((drate < 37 * MHZ) || (drate > 4500 * MHZ)) -+ return -EINVAL; ++ struct clk_core *core = data; ++ int ret = 0; ++ ++ if (val) ++ ret = clk_prepare_enable(core->hw->clk); + else -+ return drate; ++ clk_disable_unprepare(core->hw->clk); ++ ++ return ret; +} + -+static void rockchip_rk3588_pll_get_params(struct rockchip_clk_pll *pll, -+ struct rockchip_pll_rate_table *rate) ++static int clk_prepare_enable_get(void *data, u64 *val) +{ -+ u32 pllcon; ++ struct clk_core *core = data; + -+ pllcon = readl_relaxed(pll->reg_base + RK3588_PLLCON(0)); -+ rate->m = ((pllcon >> RK3588_PLLCON0_M_SHIFT) -+ & RK3588_PLLCON0_M_MASK); ++ *val = core->enable_count && core->prepare_count; ++ return 0; ++} + -+ pllcon = readl_relaxed(pll->reg_base + RK3588_PLLCON(1)); -+ rate->p = ((pllcon >> RK3588_PLLCON1_P_SHIFT) -+ & RK3588_PLLCON1_P_MASK); -+ rate->s = ((pllcon >> RK3588_PLLCON1_S_SHIFT) -+ & RK3588_PLLCON1_S_MASK); ++DEFINE_DEBUGFS_ATTRIBUTE(clk_prepare_enable_fops, clk_prepare_enable_get, ++ clk_prepare_enable_set, "%llu\n"); + -+ pllcon = readl_relaxed(pll->reg_base + RK3588_PLLCON(2)); -+ rate->k = ((pllcon >> RK3588_PLLCON2_K_SHIFT) -+ & RK3588_PLLCON2_K_MASK); -+} ++#else ++#define clk_rate_set NULL ++#define clk_rate_mode 0444 ++#endif + -+static unsigned long rockchip_rk3588_pll_recalc_rate(struct clk_hw *hw, -+ unsigned long prate) ++static int clk_rate_get(void *data, u64 *val) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); -+ struct rockchip_pll_rate_table cur; -+ u64 rate64 = prate, postdiv; ++ struct clk_core *core = data; + -+ if (pll->sel && pll->scaling) -+ return pll->scaling; -+ -+ rockchip_rk3588_pll_get_params(pll, &cur); -+ if (cur.p == 0) -+ return prate; ++ clk_prepare_lock(); ++ *val = clk_core_get_rate_recalc(core); ++ clk_prepare_unlock(); + -+ rate64 *= cur.m; -+ do_div(rate64, cur.p); ++ return 0; ++} + -+ if (cur.k & BIT(15)) { -+ /* fractional mode */ -+ u64 frac_rate64; ++DEFINE_DEBUGFS_ATTRIBUTE(clk_rate_fops, clk_rate_get, clk_rate_set, "%llu\n"); + -+ cur.k = (~(cur.k - 1)) & RK3588_PLLCON2_K_MASK; -+ frac_rate64 = prate * cur.k; -+ postdiv = cur.p; -+ postdiv *= 65536; -+ do_div(frac_rate64, postdiv); -+ rate64 -= frac_rate64; -+ } else { -+ /* fractional mode */ -+ u64 frac_rate64 = prate * cur.k; ++static const struct { ++ unsigned long flag; ++ const char *name; ++} clk_flags[] = { ++#define ENTRY(f) { f, #f } ++ ENTRY(CLK_SET_RATE_GATE), ++ ENTRY(CLK_SET_PARENT_GATE), ++ ENTRY(CLK_SET_RATE_PARENT), ++ ENTRY(CLK_IGNORE_UNUSED), ++ ENTRY(CLK_GET_RATE_NOCACHE), ++ ENTRY(CLK_SET_RATE_NO_REPARENT), ++ ENTRY(CLK_GET_ACCURACY_NOCACHE), ++ ENTRY(CLK_RECALC_NEW_RATES), ++ ENTRY(CLK_SET_RATE_UNGATE), ++ ENTRY(CLK_IS_CRITICAL), ++ ENTRY(CLK_OPS_PARENT_ENABLE), ++ ENTRY(CLK_DUTY_CYCLE_PARENT), ++#undef ENTRY ++}; ++ ++static int clk_flags_show(struct seq_file *s, void *data) ++{ ++ struct clk_core *core = s->private; ++ unsigned long flags = core->flags; ++ unsigned int i; + -+ postdiv = cur.p; -+ postdiv *= 65536; -+ do_div(frac_rate64, postdiv); -+ rate64 += frac_rate64; ++ for (i = 0; flags && i < ARRAY_SIZE(clk_flags); i++) { ++ if (flags & clk_flags[i].flag) { ++ seq_printf(s, "%s\n", clk_flags[i].name); ++ flags &= ~clk_flags[i].flag; ++ } ++ } ++ if (flags) { ++ /* Unknown flags */ ++ seq_printf(s, "0x%lx\n", flags); + } -+ rate64 = rate64 >> cur.s; + -+ return (unsigned long)rate64; ++ return 0; +} ++DEFINE_SHOW_ATTRIBUTE(clk_flags); + -+static int rockchip_rk3588_pll_set_params(struct rockchip_clk_pll *pll, -+ const struct rockchip_pll_rate_table *rate) ++static void possible_parent_show(struct seq_file *s, struct clk_core *core, ++ unsigned int i, char terminator) +{ -+ const struct clk_ops *pll_mux_ops = pll->pll_mux_ops; -+ struct clk_mux *pll_mux = &pll->pll_mux; -+ struct rockchip_pll_rate_table cur; -+ int rate_change_remuxed = 0; -+ int cur_parent; -+ int ret; -+ -+ pr_debug("%s: rate settings for %lu p: %d, m: %d, s: %d, k: %d\n", -+ __func__, rate->rate, rate->p, rate->m, rate->s, rate->k); ++ struct clk_core *parent; ++ const char *name = NULL; + -+ rockchip_rk3588_pll_get_params(pll, &cur); -+ cur.rate = 0; ++ /* ++ * Go through the following options to fetch a parent's name. ++ * ++ * 1. Fetch the registered parent clock and use its name ++ * 2. Use the global (fallback) name if specified ++ * 3. Use the local fw_name if provided ++ * 4. Fetch parent clock's clock-output-name if DT index was set ++ * ++ * This may still fail in some cases, such as when the parent is ++ * specified directly via a struct clk_hw pointer, but it isn't ++ * registered (yet). ++ */ ++ parent = clk_core_get_parent_by_index(core, i); ++ if (parent) { ++ seq_puts(s, parent->name); ++ } else if (core->parents[i].name) { ++ seq_puts(s, core->parents[i].name); ++ } else if (core->parents[i].fw_name) { ++ seq_printf(s, "<%s>(fw)", core->parents[i].fw_name); ++ } else { ++ if (core->parents[i].index >= 0) ++ name = of_clk_get_parent_name(core->of_node, core->parents[i].index); ++ if (!name) ++ name = "(missing)"; + -+ if (pll->type == pll_rk3588) { -+ cur_parent = pll_mux_ops->get_parent(&pll_mux->hw); -+ if (cur_parent == PLL_MODE_NORM) { -+ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_SLOW); -+ rate_change_remuxed = 1; -+ } ++ seq_puts(s, name); + } + -+ /* set pll power down */ -+ writel(HIWORD_UPDATE(RK3588_PLLCON1_PWRDOWN, -+ RK3588_PLLCON1_PWRDOWN, 0), -+ pll->reg_base + RK3588_PLLCON(1)); ++ seq_putc(s, terminator); ++} + -+ /* update pll values */ -+ writel_relaxed(HIWORD_UPDATE(rate->m, RK3588_PLLCON0_M_MASK, -+ RK3588_PLLCON0_M_SHIFT), -+ pll->reg_base + RK3588_PLLCON(0)); ++static int possible_parents_show(struct seq_file *s, void *data) ++{ ++ struct clk_core *core = s->private; ++ int i; + -+ writel_relaxed(HIWORD_UPDATE(rate->p, RK3588_PLLCON1_P_MASK, -+ RK3588_PLLCON1_P_SHIFT) | -+ HIWORD_UPDATE(rate->s, RK3588_PLLCON1_S_MASK, -+ RK3588_PLLCON1_S_SHIFT), -+ pll->reg_base + RK3588_PLLCON(1)); ++ for (i = 0; i < core->num_parents - 1; i++) ++ possible_parent_show(s, core, i, ' '); + -+ writel_relaxed(HIWORD_UPDATE(rate->k, RK3588_PLLCON2_K_MASK, -+ RK3588_PLLCON2_K_SHIFT), -+ pll->reg_base + RK3588_PLLCON(2)); ++ possible_parent_show(s, core, i, '\n'); + -+ /* set pll power up */ -+ writel(HIWORD_UPDATE(0, -+ RK3588_PLLCON1_PWRDOWN, 0), -+ pll->reg_base + RK3588_PLLCON(1)); ++ return 0; ++} ++DEFINE_SHOW_ATTRIBUTE(possible_parents); + -+ /* wait for the pll to lock */ -+ ret = rockchip_rk3588_pll_wait_lock(pll); -+ if (ret) { -+ pr_warn("%s: pll update unsuccessful, trying to restore old params\n", -+ __func__); -+ rockchip_rk3588_pll_set_params(pll, &cur); -+ } ++static int current_parent_show(struct seq_file *s, void *data) ++{ ++ struct clk_core *core = s->private; + -+ if ((pll->type == pll_rk3588) && rate_change_remuxed) -+ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_NORM); ++ if (core->parent) ++ seq_printf(s, "%s\n", core->parent->name); + -+ return ret; ++ return 0; +} ++DEFINE_SHOW_ATTRIBUTE(current_parent); + -+static int rockchip_rk3588_pll_set_rate(struct clk_hw *hw, unsigned long drate, -+ unsigned long prate) ++#ifdef CLOCK_ALLOW_WRITE_DEBUGFS ++static ssize_t current_parent_write(struct file *file, const char __user *ubuf, ++ size_t count, loff_t *ppos) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); -+ const struct rockchip_pll_rate_table *rate; -+ unsigned long old_rate = rockchip_rk3588_pll_recalc_rate(hw, prate); -+ int ret; ++ struct seq_file *s = file->private_data; ++ struct clk_core *core = s->private; ++ struct clk_core *parent; ++ u8 idx; ++ int err; + -+ pr_debug("%s: changing %s from %lu to %lu with a parent rate of %lu\n", -+ __func__, __clk_get_name(hw->clk), old_rate, drate, prate); ++ err = kstrtou8_from_user(ubuf, count, 0, &idx); ++ if (err < 0) ++ return err; + -+ /* Get required rate settings from table */ -+ rate = rockchip_get_pll_settings(pll, drate); -+ if (!rate) { -+ pr_err("%s: Invalid rate : %lu for pll clk %s\n", __func__, -+ drate, __clk_get_name(hw->clk)); -+ return -EINVAL; -+ } ++ parent = clk_core_get_parent_by_index(core, idx); ++ if (!parent) ++ return -ENOENT; + -+ ret = rockchip_rk3588_pll_set_params(pll, rate); -+ if (ret) -+ pll->scaling = 0; ++ clk_prepare_lock(); ++ err = clk_core_set_parent_nolock(core, parent); ++ clk_prepare_unlock(); ++ if (err) ++ return err; + -+ return ret; ++ return count; +} + -+static int rockchip_rk3588_pll_enable(struct clk_hw *hw) -+{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); -+ const struct clk_ops *pll_mux_ops = pll->pll_mux_ops; -+ struct clk_mux *pll_mux = &pll->pll_mux; ++static const struct file_operations current_parent_rw_fops = { ++ .open = current_parent_open, ++ .write = current_parent_write, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++#endif + -+ writel(HIWORD_UPDATE(0, RK3588_PLLCON1_PWRDOWN, 0), -+ pll->reg_base + RK3588_PLLCON(1)); -+ rockchip_rk3588_pll_wait_lock(pll); ++static int clk_duty_cycle_show(struct seq_file *s, void *data) ++{ ++ struct clk_core *core = s->private; ++ struct clk_duty *duty = &core->duty; + -+ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_NORM); ++ seq_printf(s, "%u/%u\n", duty->num, duty->den); + + return 0; +} ++DEFINE_SHOW_ATTRIBUTE(clk_duty_cycle); + -+static void rockchip_rk3588_pll_disable(struct clk_hw *hw) ++static int clk_min_rate_show(struct seq_file *s, void *data) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); -+ const struct clk_ops *pll_mux_ops = pll->pll_mux_ops; -+ struct clk_mux *pll_mux = &pll->pll_mux; ++ struct clk_core *core = s->private; ++ unsigned long min_rate, max_rate; + -+ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_SLOW); ++ clk_prepare_lock(); ++ clk_core_get_boundaries(core, &min_rate, &max_rate); ++ clk_prepare_unlock(); ++ seq_printf(s, "%lu\n", min_rate); + -+ writel(HIWORD_UPDATE(RK3588_PLLCON1_PWRDOWN, -+ RK3588_PLLCON1_PWRDOWN, 0), -+ pll->reg_base + RK3588_PLLCON(1)); ++ return 0; +} ++DEFINE_SHOW_ATTRIBUTE(clk_min_rate); + -+static int rockchip_rk3588_pll_is_enabled(struct clk_hw *hw) ++static int clk_max_rate_show(struct seq_file *s, void *data) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); -+ u32 pllcon = readl(pll->reg_base + RK3588_PLLCON(1)); ++ struct clk_core *core = s->private; ++ unsigned long min_rate, max_rate; + -+ return !(pllcon & RK3588_PLLCON1_PWRDOWN); ++ clk_prepare_lock(); ++ clk_core_get_boundaries(core, &min_rate, &max_rate); ++ clk_prepare_unlock(); ++ seq_printf(s, "%lu\n", max_rate); ++ ++ return 0; +} ++DEFINE_SHOW_ATTRIBUTE(clk_max_rate); + -+static int rockchip_rk3588_pll_init(struct clk_hw *hw) ++static void clk_debug_create_one(struct clk_core *core, struct dentry *pdentry) +{ -+ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ struct dentry *root; + -+ if (!(pll->flags & ROCKCHIP_PLL_SYNC_RATE)) -+ return 0; ++ if (!core || !pdentry) ++ return; + -+ return 0; ++ root = debugfs_create_dir(core->name, pdentry); ++ core->dentry = root; ++ ++ debugfs_create_file("clk_rate", clk_rate_mode, root, core, ++ &clk_rate_fops); ++ debugfs_create_file("clk_min_rate", 0444, root, core, &clk_min_rate_fops); ++ debugfs_create_file("clk_max_rate", 0444, root, core, &clk_max_rate_fops); ++ debugfs_create_ulong("clk_accuracy", 0444, root, &core->accuracy); ++ debugfs_create_u32("clk_phase", 0444, root, &core->phase); ++ debugfs_create_file("clk_flags", 0444, root, core, &clk_flags_fops); ++ debugfs_create_u32("clk_prepare_count", 0444, root, &core->prepare_count); ++ debugfs_create_u32("clk_enable_count", 0444, root, &core->enable_count); ++ debugfs_create_u32("clk_protect_count", 0444, root, &core->protect_count); ++ debugfs_create_u32("clk_notifier_count", 0444, root, &core->notifier_count); ++ debugfs_create_file("clk_duty_cycle", 0444, root, core, ++ &clk_duty_cycle_fops); ++#ifdef CLOCK_ALLOW_WRITE_DEBUGFS ++ debugfs_create_file("clk_prepare_enable", 0644, root, core, ++ &clk_prepare_enable_fops); ++ ++ if (core->num_parents > 1) ++ debugfs_create_file("clk_parent", 0644, root, core, ++ ¤t_parent_rw_fops); ++ else ++#endif ++ if (core->num_parents > 0) ++ debugfs_create_file("clk_parent", 0444, root, core, ++ ¤t_parent_fops); ++ ++ if (core->num_parents > 1) ++ debugfs_create_file("clk_possible_parents", 0444, root, core, ++ &possible_parents_fops); ++ ++ if (core->ops->debug_init) ++ core->ops->debug_init(core->hw, core->dentry); +} + -+static const struct clk_ops rockchip_rk3588_pll_clk_norate_ops = { -+ .recalc_rate = rockchip_rk3588_pll_recalc_rate, -+ .enable = rockchip_rk3588_pll_enable, -+ .disable = rockchip_rk3588_pll_disable, -+ .is_enabled = rockchip_rk3588_pll_is_enabled, -+}; ++/** ++ * clk_debug_register - add a clk node to the debugfs clk directory ++ * @core: the clk being added to the debugfs clk directory ++ * ++ * Dynamically adds a clk to the debugfs clk directory if debugfs has been ++ * initialized. Otherwise it bails out early since the debugfs clk directory ++ * will be created lazily by clk_debug_init as part of a late_initcall. ++ */ ++static void clk_debug_register(struct clk_core *core) ++{ ++ mutex_lock(&clk_debug_lock); ++ hlist_add_head(&core->debug_node, &clk_debug_list); ++ if (inited) ++ clk_debug_create_one(core, rootdir); ++ mutex_unlock(&clk_debug_lock); ++} + -+static const struct clk_ops rockchip_rk3588_pll_clk_ops = { -+ .recalc_rate = rockchip_rk3588_pll_recalc_rate, -+ .round_rate = rockchip_rk3588_pll_round_rate, -+ .set_rate = rockchip_rk3588_pll_set_rate, -+ .enable = rockchip_rk3588_pll_enable, -+ .disable = rockchip_rk3588_pll_disable, -+ .is_enabled = rockchip_rk3588_pll_is_enabled, -+ .init = rockchip_rk3588_pll_init, -+}; ++ /** ++ * clk_debug_unregister - remove a clk node from the debugfs clk directory ++ * @core: the clk being removed from the debugfs clk directory ++ * ++ * Dynamically removes a clk and all its child nodes from the ++ * debugfs clk directory if clk->dentry points to debugfs created by ++ * clk_debug_register in __clk_core_init. ++ */ ++static void clk_debug_unregister(struct clk_core *core) ++{ ++ mutex_lock(&clk_debug_lock); ++ hlist_del_init(&core->debug_node); ++ debugfs_remove_recursive(core->dentry); ++ core->dentry = NULL; ++ mutex_unlock(&clk_debug_lock); ++} + -+#ifdef CONFIG_ROCKCHIP_CLK_COMPENSATION -+int rockchip_pll_clk_compensation(struct clk *clk, int ppm) ++/** ++ * clk_debug_init - lazily populate the debugfs clk directory ++ * ++ * clks are often initialized very early during boot before memory can be ++ * dynamically allocated and well before debugfs is setup. This function ++ * populates the debugfs clk directory once at boot-time when we know that ++ * debugfs is setup. It should only be called once at boot-time, all other clks ++ * added dynamically will be done so with clk_debug_register. ++ */ ++static int __init clk_debug_init(void) +{ -+ struct clk *parent = clk_get_parent(clk); -+ struct rockchip_clk_pll *pll; -+ static u32 frac, fbdiv, s, p; -+ bool negative; -+ u32 pllcon, pllcon0, pllcon2, fbdiv_mask, frac_mask, frac_shift; -+ u64 fracdiv, m, n; ++ struct clk_core *core; + -+ if ((ppm > 1000) || (ppm < -1000)) -+ return -EINVAL; ++#ifdef CLOCK_ALLOW_WRITE_DEBUGFS ++ pr_warn("\n"); ++ pr_warn("********************************************************************\n"); ++ pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); ++ pr_warn("** **\n"); ++ pr_warn("** WRITEABLE clk DebugFS SUPPORT HAS BEEN ENABLED IN THIS KERNEL **\n"); ++ pr_warn("** **\n"); ++ pr_warn("** This means that this kernel is built to expose clk operations **\n"); ++ pr_warn("** such as parent or rate setting, enabling, disabling, etc. **\n"); ++ pr_warn("** to userspace, which may compromise security on your system. **\n"); ++ pr_warn("** **\n"); ++ pr_warn("** If you see this message and you are not debugging the **\n"); ++ pr_warn("** kernel, report this immediately to your vendor! **\n"); ++ pr_warn("** **\n"); ++ pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); ++ pr_warn("********************************************************************\n"); ++#endif + -+ if (IS_ERR_OR_NULL(parent)) -+ return -EINVAL; ++ rootdir = debugfs_create_dir("clk", NULL); + -+ pll = to_rockchip_clk_pll(__clk_get_hw(parent)); -+ if (!pll) -+ return -EINVAL; ++ debugfs_create_file("clk_summary", 0444, rootdir, &all_lists, ++ &clk_summary_fops); ++ debugfs_create_file("clk_dump", 0444, rootdir, &all_lists, ++ &clk_dump_fops); ++ debugfs_create_file("clk_orphan_summary", 0444, rootdir, &orphan_list, ++ &clk_summary_fops); ++ debugfs_create_file("clk_orphan_dump", 0444, rootdir, &orphan_list, ++ &clk_dump_fops); + -+ switch (pll->type) { -+ case pll_rk3036: -+ case pll_rk3328: -+ pllcon0 = RK3036_PLLCON(0); -+ pllcon2 = RK3036_PLLCON(2); -+ fbdiv_mask = RK3036_PLLCON0_FBDIV_MASK; -+ frac_mask = RK3036_PLLCON2_FRAC_MASK; -+ frac_shift = RK3036_PLLCON2_FRAC_SHIFT; -+ if (!frac) -+ writel(HIWORD_UPDATE(RK3036_PLLCON1_PLLPDSEL, -+ RK3036_PLLCON1_PLLPDSEL, 0), -+ pll->reg_base + RK3036_PLLCON(1)); -+ break; -+ case pll_rk3066: -+ return -EINVAL; -+ case pll_rk3399: -+ pllcon0 = RK3399_PLLCON(0); -+ pllcon2 = RK3399_PLLCON(2); -+ fbdiv_mask = RK3399_PLLCON0_FBDIV_MASK; -+ frac_mask = RK3399_PLLCON2_FRAC_MASK; -+ frac_shift = RK3399_PLLCON2_FRAC_SHIFT; -+ break; -+ case pll_rk3588: -+ pllcon0 = RK3588_PLLCON(0); -+ pllcon2 = RK3588_PLLCON(2); -+ fbdiv_mask = RK3588_PLLCON0_M_MASK; -+ frac_mask = RK3588_PLLCON2_K_MASK; -+ frac_shift = RK3588_PLLCON2_K_SHIFT; -+ break; -+ default: -+ return -EINVAL; -+ } ++ mutex_lock(&clk_debug_lock); ++ hlist_for_each_entry(core, &clk_debug_list, debug_node) ++ clk_debug_create_one(core, rootdir); + -+ negative = !!(ppm & BIT(31)); -+ ppm = negative ? ~ppm + 1 : ppm; ++ inited = 1; ++ mutex_unlock(&clk_debug_lock); + -+ switch (pll->type) { -+ case pll_rk3036: -+ case pll_rk3328: -+ case pll_rk3066: -+ case pll_rk3399: -+ /* -+ * delta frac frac ppm -+ * -------------- = (fbdiv + ----------) * --------- -+ * 1 << 24 1 << 24 1000000 -+ * -+ */ -+ if (!frac) { -+ frac = readl_relaxed(pll->reg_base + pllcon2) & frac_mask; -+ fbdiv = readl_relaxed(pll->reg_base + pllcon0) & fbdiv_mask; -+ } -+ m = div64_u64((uint64_t)frac * ppm, 1000000); -+ n = div64_u64((uint64_t)ppm << 24, 1000000) * fbdiv; ++ return 0; ++} ++late_initcall(clk_debug_init); ++#else ++static inline void clk_debug_register(struct clk_core *core) { } ++static inline void clk_debug_unregister(struct clk_core *core) ++{ ++} ++#endif + -+ fracdiv = negative ? frac - (m + n) : frac + (m + n); ++static void clk_core_reparent_orphans_nolock(void) ++{ ++ struct clk_core *orphan; ++ struct hlist_node *tmp2; + -+ if (!frac || fracdiv > frac_mask) -+ return -EINVAL; ++ /* ++ * walk the list of orphan clocks and reparent any that newly finds a ++ * parent. ++ */ ++ hlist_for_each_entry_safe(orphan, tmp2, &clk_orphan_list, child_node) { ++ struct clk_core *parent = __clk_init_parent(orphan); + -+ pllcon = readl_relaxed(pll->reg_base + pllcon2); -+ pllcon &= ~(frac_mask << frac_shift); -+ pllcon |= fracdiv << frac_shift; -+ writel_relaxed(pllcon, pll->reg_base + pllcon2); -+ break; -+ case pll_rk3588: -+ if (!fbdiv) { -+ frac = readl_relaxed(pll->reg_base + pllcon2) & frac_mask; -+ fbdiv = readl_relaxed(pll->reg_base + pllcon0) & fbdiv_mask; -+ } -+ if (!frac) { -+ pllcon = readl_relaxed(pll->reg_base + RK3588_PLLCON(1)); -+ s = ((pllcon >> RK3588_PLLCON1_S_SHIFT) -+ & RK3588_PLLCON1_S_MASK); -+ p = ((pllcon >> RK3588_PLLCON1_P_SHIFT) -+ & RK3588_PLLCON1_P_MASK); -+ m = div64_u64((uint64_t)clk_get_rate(clk) * ppm, 24000000); -+ n = div64_u64((uint64_t)m * 65536 * p * (1 << s), 1000000); ++ /* ++ * We need to use __clk_set_parent_before() and _after() to ++ * properly migrate any prepare/enable count of the orphan ++ * clock. This is important for CLK_IS_CRITICAL clocks, which ++ * are enabled during init but might not have a parent yet. ++ */ ++ if (parent) { ++ /* update the clk tree topology */ ++ __clk_set_parent_before(orphan, parent); ++ __clk_set_parent_after(orphan, parent, NULL); ++ __clk_recalc_accuracies(orphan); ++ __clk_recalc_rates(orphan, true, 0); + -+ if (n > 32767) -+ return -EINVAL; -+ fracdiv = negative ? ~n + 1 : n; -+ } else if (frac & BIT(15)) { -+ frac = (~(frac - 1)) & RK3588_PLLCON2_K_MASK; -+ m = div64_u64((uint64_t)frac * ppm, 100000); -+ n = div64_u64((uint64_t)ppm * 65536 * fbdiv, 100000); -+ if (negative) { -+ fracdiv = frac + (div64_u64(m + n, 10)); -+ if (fracdiv > 32767) -+ return -EINVAL; -+ fracdiv = ~fracdiv + 1; -+ } else { -+ s = div64_u64(m + n, 10); -+ if (frac >= s) { -+ fracdiv = frac - s; -+ if (fracdiv > 32767) -+ return -EINVAL; -+ fracdiv = ~fracdiv + 1; -+ } else { -+ fracdiv = s - frac; -+ if (fracdiv > 32767) -+ return -EINVAL; -+ } -+ } -+ } else { -+ m = div64_u64((uint64_t)frac * ppm, 100000); -+ n = div64_u64((uint64_t)ppm * 65536 * fbdiv, 100000); -+ if (!negative) { -+ fracdiv = frac + (div64_u64(m + n, 10)); -+ if (fracdiv > 32767) -+ return -EINVAL; -+ } else { -+ s = div64_u64(m + n, 10); -+ if (frac >= s) { -+ fracdiv = frac - s; -+ if (fracdiv > 32767) -+ return -EINVAL; -+ } else { -+ fracdiv = s - frac; -+ if (fracdiv > 32767) -+ return -EINVAL; -+ fracdiv = ~fracdiv + 1; -+ } -+ } ++ /* ++ * __clk_init_parent() will set the initial req_rate to ++ * 0 if the clock doesn't have clk_ops::recalc_rate and ++ * is an orphan when it's registered. ++ * ++ * 'req_rate' is used by clk_set_rate_range() and ++ * clk_put() to trigger a clk_set_rate() call whenever ++ * the boundaries are modified. Let's make sure ++ * 'req_rate' is set to something non-zero so that ++ * clk_set_rate_range() doesn't drop the frequency. ++ */ ++ orphan->req_rate = orphan->rate; + } -+ -+ writel_relaxed(HIWORD_UPDATE(fracdiv, frac_mask, frac_shift), -+ pll->reg_base + pllcon2); -+ break; -+ default: -+ return -EINVAL; + } -+ -+ return 0; +} -+EXPORT_SYMBOL(rockchip_pll_clk_compensation); -+#endif + -+/* -+ * Common registering of pll clocks ++/** ++ * __clk_core_init - initialize the data structures in a struct clk_core ++ * @core: clk_core being initialized ++ * ++ * Initializes the lists in struct clk_core, queries the hardware for the ++ * parent and rate and sets them both. + */ -+ -+struct clk *rockchip_clk_register_pll(struct rockchip_clk_provider *ctx, -+ enum rockchip_pll_type pll_type, -+ const char *name, const char *const *parent_names, -+ u8 num_parents, int con_offset, int grf_lock_offset, -+ int lock_shift, int mode_offset, int mode_shift, -+ struct rockchip_pll_rate_table *rate_table, -+ unsigned long flags, u8 clk_pll_flags) ++static int __clk_core_init(struct clk_core *core) +{ -+ const char *pll_parents[3]; -+ struct clk_init_data init; -+ struct rockchip_clk_pll *pll; -+ struct clk_mux *pll_mux; -+ struct clk *pll_clk, *mux_clk; -+ char pll_name[20]; ++ int ret; ++ struct clk_core *parent; ++ unsigned long rate; ++ int phase; + -+ if ((pll_type != pll_rk3328 && num_parents != 2) || -+ (pll_type == pll_rk3328 && num_parents != 1)) { -+ pr_err("%s: needs two parent clocks\n", __func__); -+ return ERR_PTR(-EINVAL); ++ clk_prepare_lock(); ++ ++ /* ++ * Set hw->core after grabbing the prepare_lock to synchronize with ++ * callers of clk_core_fill_parent_index() where we treat hw->core ++ * being NULL as the clk not being registered yet. This is crucial so ++ * that clks aren't parented until their parent is fully registered. ++ */ ++ core->hw->core = core; ++ ++ ret = clk_pm_runtime_get(core); ++ if (ret) ++ goto unlock; ++ ++ /* check to see if a clock with this name is already registered */ ++ if (clk_core_lookup(core->name)) { ++ pr_debug("%s: clk %s already initialized\n", ++ __func__, core->name); ++ ret = -EEXIST; ++ goto out; + } + -+ /* name the actual pll */ -+ snprintf(pll_name, sizeof(pll_name), "pll_%s", name); ++ /* check that clk_ops are sane. See Documentation/driver-api/clk.rst */ ++ if (core->ops->set_rate && ++ !((core->ops->round_rate || core->ops->determine_rate) && ++ core->ops->recalc_rate)) { ++ pr_err("%s: %s must implement .round_rate or .determine_rate in addition to .recalc_rate\n", ++ __func__, core->name); ++ ret = -EINVAL; ++ goto out; ++ } + -+ pll = kzalloc(sizeof(*pll), GFP_KERNEL); -+ if (!pll) -+ return ERR_PTR(-ENOMEM); ++ if (core->ops->set_parent && !core->ops->get_parent) { ++ pr_err("%s: %s must implement .get_parent & .set_parent\n", ++ __func__, core->name); ++ ret = -EINVAL; ++ goto out; ++ } + -+ /* create the mux on top of the real pll */ -+ pll->pll_mux_ops = &clk_mux_ops; -+ pll_mux = &pll->pll_mux; -+ pll_mux->reg = ctx->reg_base + mode_offset; -+ pll_mux->shift = mode_shift; -+ if (pll_type == pll_rk3328) -+ pll_mux->mask = PLL_RK3328_MODE_MASK; -+ else -+ pll_mux->mask = PLL_MODE_MASK; -+ pll_mux->flags = 0; -+ pll_mux->lock = &ctx->lock; -+ pll_mux->hw.init = &init; -+ pll_mux->flags |= CLK_MUX_HIWORD_MASK; ++ if (core->ops->set_parent && !core->ops->determine_rate) { ++ pr_err("%s: %s must implement .set_parent & .determine_rate\n", ++ __func__, core->name); ++ ret = -EINVAL; ++ goto out; ++ } + -+ /* the actual muxing is xin24m, pll-output, xin32k */ -+ pll_parents[0] = parent_names[0]; -+ pll_parents[1] = pll_name; -+ pll_parents[2] = parent_names[1]; ++ if (core->num_parents > 1 && !core->ops->get_parent) { ++ pr_err("%s: %s must implement .get_parent as it has multi parents\n", ++ __func__, core->name); ++ ret = -EINVAL; ++ goto out; ++ } + -+ init.name = name; -+ init.flags = CLK_SET_RATE_PARENT; -+ init.ops = pll->pll_mux_ops; -+ init.parent_names = pll_parents; -+ if (pll_type == pll_rk3328) -+ init.num_parents = 2; -+ else -+ init.num_parents = ARRAY_SIZE(pll_parents); ++ if (core->ops->set_rate_and_parent && ++ !(core->ops->set_parent && core->ops->set_rate)) { ++ pr_err("%s: %s must implement .set_parent & .set_rate\n", ++ __func__, core->name); ++ ret = -EINVAL; ++ goto out; ++ } + -+ mux_clk = clk_register(NULL, &pll_mux->hw); -+ if (IS_ERR(mux_clk)) -+ goto err_mux; ++ /* ++ * optional platform-specific magic ++ * ++ * The .init callback is not used by any of the basic clock types, but ++ * exists for weird hardware that must perform initialization magic for ++ * CCF to get an accurate view of clock for any other callbacks. It may ++ * also be used needs to perform dynamic allocations. Such allocation ++ * must be freed in the terminate() callback. ++ * This callback shall not be used to initialize the parameters state, ++ * such as rate, parent, etc ... ++ * ++ * If it exist, this callback should called before any other callback of ++ * the clock ++ */ ++ if (core->ops->init) { ++ ret = core->ops->init(core->hw); ++ if (ret) ++ goto out; ++ } + -+ /* now create the actual pll */ -+ init.name = pll_name; ++ parent = core->parent = __clk_init_parent(core); + -+#ifndef CONFIG_ROCKCHIP_LOW_PERFORMANCE -+ if (clk_pll_flags & ROCKCHIP_PLL_ALLOW_POWER_DOWN) -+ init.flags = flags; ++ /* ++ * Populate core->parent if parent has already been clk_core_init'd. If ++ * parent has not yet been clk_core_init'd then place clk in the orphan ++ * list. If clk doesn't have any parents then place it in the root ++ * clk list. ++ * ++ * Every time a new clk is clk_init'd then we walk the list of orphan ++ * clocks and re-parent any that are children of the clock currently ++ * being clk_init'd. ++ */ ++ if (parent) { ++ hlist_add_head(&core->child_node, &parent->children); ++ core->orphan = parent->orphan; ++ } else if (!core->num_parents) { ++ hlist_add_head(&core->child_node, &clk_root_list); ++ core->orphan = false; ++ } else { ++ hlist_add_head(&core->child_node, &clk_orphan_list); ++ core->orphan = true; ++ } ++ ++ /* ++ * Set clk's accuracy. The preferred method is to use ++ * .recalc_accuracy. For simple clocks and lazy developers the default ++ * fallback is to use the parent's accuracy. If a clock doesn't have a ++ * parent (or is orphaned) then accuracy is set to zero (perfect ++ * clock). ++ */ ++ if (core->ops->recalc_accuracy) ++ core->accuracy = core->ops->recalc_accuracy(core->hw, ++ clk_core_get_accuracy_no_lock(parent)); ++ else if (parent) ++ core->accuracy = parent->accuracy; + else -+ /* keep all plls untouched for now */ -+ init.flags = flags | CLK_IGNORE_UNUSED; -+#else -+ init.flags = flags; -+#endif ++ core->accuracy = 0; + -+ init.parent_names = &parent_names[0]; -+ init.num_parents = 1; ++ /* ++ * Set clk's phase by clk_core_get_phase() caching the phase. ++ * Since a phase is by definition relative to its parent, just ++ * query the current clock phase, or just assume it's in phase. ++ */ ++ phase = clk_core_get_phase(core); ++ if (phase < 0) { ++ ret = phase; ++ pr_warn("%s: Failed to get phase for clk '%s'\n", __func__, ++ core->name); ++ goto out; ++ } + -+ if (rate_table) { -+ int len; ++ /* ++ * Set clk's duty cycle. ++ */ ++ clk_core_update_duty_cycle_nolock(core); + -+ /* find count of rates in rate_table */ -+ for (len = 0; rate_table[len].rate != 0; ) -+ len++; ++ /* ++ * Set clk's rate. The preferred method is to use .recalc_rate. For ++ * simple clocks and lazy developers the default fallback is to use the ++ * parent's rate. If a clock doesn't have a parent (or is orphaned) ++ * then rate is set to zero. ++ */ ++ if (core->ops->recalc_rate) ++ rate = core->ops->recalc_rate(core->hw, ++ clk_core_get_rate_nolock(parent)); ++ else if (parent) ++ rate = parent->rate; ++ else ++ rate = 0; ++ core->rate = core->req_rate = rate; + -+ pll->rate_count = len; -+ pll->rate_table = kmemdup(rate_table, -+ pll->rate_count * -+ sizeof(struct rockchip_pll_rate_table), -+ GFP_KERNEL); -+ WARN(!pll->rate_table, -+ "%s: could not allocate rate table for %s\n", -+ __func__, name); ++ /* ++ * Enable CLK_IS_CRITICAL clocks so newly added critical clocks ++ * don't get accidentally disabled when walking the orphan tree and ++ * reparenting clocks ++ */ ++ if (core->flags & CLK_IS_CRITICAL) { ++ ret = clk_core_prepare(core); ++ if (ret) { ++ pr_warn("%s: critical clk '%s' failed to prepare\n", ++ __func__, core->name); ++ goto out; ++ } ++ ++ ret = clk_core_enable_lock(core); ++ if (ret) { ++ pr_warn("%s: critical clk '%s' failed to enable\n", ++ __func__, core->name); ++ clk_core_unprepare(core); ++ goto out; ++ } + } + -+ switch (pll_type) { -+ case pll_rk3036: -+ case pll_rk3328: -+ if (!pll->rate_table) -+ init.ops = &rockchip_rk3036_pll_clk_norate_ops; -+ else -+ init.ops = &rockchip_rk3036_pll_clk_ops; -+ break; -+#ifdef CONFIG_ROCKCHIP_PLL_RK3066 -+ case pll_rk3066: -+ if (!pll->rate_table || IS_ERR(ctx->grf)) -+ init.ops = &rockchip_rk3066_pll_clk_norate_ops; -+ else -+ init.ops = &rockchip_rk3066_pll_clk_ops; -+ break; -+#endif -+#ifdef CONFIG_ROCKCHIP_PLL_RK3399 -+ case pll_rk3399: -+ if (!pll->rate_table) -+ init.ops = &rockchip_rk3399_pll_clk_norate_ops; -+ else -+ init.ops = &rockchip_rk3399_pll_clk_ops; -+ break; -+#endif -+#ifdef CONFIG_ROCKCHIP_PLL_RK3588 -+ case pll_rk3588: -+ case pll_rk3588_core: -+ if (!pll->rate_table) -+ init.ops = &rockchip_rk3588_pll_clk_norate_ops; -+ else -+ init.ops = &rockchip_rk3588_pll_clk_ops; -+ init.flags = flags; -+ break; -+#endif -+ default: -+ pr_warn("%s: Unknown pll type for pll clk %s\n", -+ __func__, name); ++ clk_core_reparent_orphans_nolock(); ++out: ++ clk_pm_runtime_put(core); ++unlock: ++ if (ret) { ++ hlist_del_init(&core->child_node); ++ core->hw->core = NULL; + } + -+ pll->hw.init = &init; -+ pll->type = pll_type; -+ pll->reg_base = ctx->reg_base + con_offset; -+ pll->lock_offset = grf_lock_offset; -+ pll->lock_shift = lock_shift; -+ pll->flags = clk_pll_flags; -+ pll->lock = &ctx->lock; -+ pll->ctx = ctx; ++ clk_prepare_unlock(); + -+ pll_clk = clk_register(NULL, &pll->hw); -+ if (IS_ERR(pll_clk)) { -+ pr_err("%s: failed to register pll clock %s : %ld\n", -+ __func__, name, PTR_ERR(pll_clk)); -+ goto err_pll; -+ } ++ if (!ret) ++ clk_debug_register(core); + -+ return mux_clk; ++ return ret; ++} + -+err_pll: -+ kfree(pll->rate_table); -+ clk_unregister(mux_clk); -+ mux_clk = pll_clk; -+err_mux: -+ kfree(pll); -+ return mux_clk; ++/** ++ * clk_core_link_consumer - Add a clk consumer to the list of consumers in a clk_core ++ * @core: clk to add consumer to ++ * @clk: consumer to link to a clk ++ */ ++static void clk_core_link_consumer(struct clk_core *core, struct clk *clk) ++{ ++ clk_prepare_lock(); ++ hlist_add_head(&clk->clks_node, &core->clks); ++ clk_prepare_unlock(); +} + -+#ifdef CONFIG_ROCKCHIP_CLK_BOOST -+static unsigned long rockchip_pll_con_to_rate(struct rockchip_clk_pll *pll, -+ u32 con0, u32 con1) ++/** ++ * clk_core_unlink_consumer - Remove a clk consumer from the list of consumers in a clk_core ++ * @clk: consumer to unlink ++ */ ++static void clk_core_unlink_consumer(struct clk *clk) +{ -+ switch (pll->type) { -+ case pll_rk3036: -+ case pll_rk3328: -+ return rockchip_rk3036_pll_con_to_rate(pll, con0, con1); -+ case pll_rk3066: -+ break; -+ case pll_rk3399: -+ break; -+ default: -+ pr_warn("%s: Unknown pll type\n", __func__); -+ } ++ lockdep_assert_held(&prepare_lock); ++ hlist_del(&clk->clks_node); ++} + -+ return 0; ++/** ++ * alloc_clk - Allocate a clk consumer, but leave it unlinked to the clk_core ++ * @core: clk to allocate a consumer for ++ * @dev_id: string describing device name ++ * @con_id: connection ID string on device ++ * ++ * Returns: clk consumer left unlinked from the consumer list ++ */ ++static struct clk *alloc_clk(struct clk_core *core, const char *dev_id, ++ const char *con_id) ++{ ++ struct clk *clk; ++ ++ clk = kzalloc(sizeof(*clk), GFP_KERNEL); ++ if (!clk) ++ return ERR_PTR(-ENOMEM); ++ ++ clk->core = core; ++ clk->dev_id = dev_id; ++ clk->con_id = kstrdup_const(con_id, GFP_KERNEL); ++ clk->max_rate = ULONG_MAX; ++ ++ return clk; +} + -+void rockchip_boost_init(struct clk_hw *hw) ++/** ++ * free_clk - Free a clk consumer ++ * @clk: clk consumer to free ++ * ++ * Note, this assumes the clk has been unlinked from the clk_core consumer ++ * list. ++ */ ++static void free_clk(struct clk *clk) +{ -+ struct rockchip_clk_pll *pll; -+ struct device_node *np; -+ u32 value, con0, con1; ++ kfree_const(clk->con_id); ++ kfree(clk); ++} + -+ if (!hw) -+ return; -+ pll = to_rockchip_clk_pll(hw); -+ np = of_parse_phandle(pll->ctx->cru_node, "rockchip,boost", 0); -+ if (!np) { -+ pr_debug("%s: failed to get boost np\n", __func__); -+ return; -+ } -+ pll->boost = syscon_node_to_regmap(np); -+ if (IS_ERR(pll->boost)) { -+ pr_debug("%s: failed to get boost regmap\n", __func__); -+ return; -+ } ++/** ++ * clk_hw_create_clk: Allocate and link a clk consumer to a clk_core given ++ * a clk_hw ++ * @dev: clk consumer device ++ * @hw: clk_hw associated with the clk being consumed ++ * @dev_id: string describing device name ++ * @con_id: connection ID string on device ++ * ++ * This is the main function used to create a clk pointer for use by clk ++ * consumers. It connects a consumer to the clk_core and clk_hw structures ++ * used by the framework and clk provider respectively. ++ */ ++struct clk *clk_hw_create_clk(struct device *dev, struct clk_hw *hw, ++ const char *dev_id, const char *con_id) ++{ ++ struct clk *clk; ++ struct clk_core *core; + -+ if (!of_property_read_u32(np, "rockchip,boost-low-con0", &con0) && -+ !of_property_read_u32(np, "rockchip,boost-low-con1", &con1)) { -+ pr_debug("boost-low-con=0x%x 0x%x\n", con0, con1); -+ regmap_write(pll->boost, BOOST_PLL_L_CON(0), -+ HIWORD_UPDATE(con0, BOOST_PLL_CON_MASK, 0)); -+ regmap_write(pll->boost, BOOST_PLL_L_CON(1), -+ HIWORD_UPDATE(con1, BOOST_PLL_CON_MASK, 0)); -+ pll->boost_low_rate = rockchip_pll_con_to_rate(pll, con0, -+ con1); -+ pr_debug("boost-low-rate=%lu\n", pll->boost_low_rate); -+ } -+ if (!of_property_read_u32(np, "rockchip,boost-high-con0", &con0) && -+ !of_property_read_u32(np, "rockchip,boost-high-con1", &con1)) { -+ pr_debug("boost-high-con=0x%x 0x%x\n", con0, con1); -+ regmap_write(pll->boost, BOOST_PLL_H_CON(0), -+ HIWORD_UPDATE(con0, BOOST_PLL_CON_MASK, 0)); -+ regmap_write(pll->boost, BOOST_PLL_H_CON(1), -+ HIWORD_UPDATE(con1, BOOST_PLL_CON_MASK, 0)); -+ pll->boost_high_rate = rockchip_pll_con_to_rate(pll, con0, -+ con1); -+ pr_debug("boost-high-rate=%lu\n", pll->boost_high_rate); -+ } -+ if (!of_property_read_u32(np, "rockchip,boost-backup-pll", &value)) { -+ pr_debug("boost-backup-pll=0x%x\n", value); -+ regmap_write(pll->boost, BOOST_CLK_CON, -+ HIWORD_UPDATE(value, BOOST_BACKUP_PLL_MASK, -+ BOOST_BACKUP_PLL_SHIFT)); -+ } -+ if (!of_property_read_u32(np, "rockchip,boost-backup-pll-usage", -+ &pll->boost_backup_pll_usage)) { -+ pr_debug("boost-backup-pll-usage=0x%x\n", -+ pll->boost_backup_pll_usage); -+ regmap_write(pll->boost, BOOST_CLK_CON, -+ HIWORD_UPDATE(pll->boost_backup_pll_usage, -+ BOOST_BACKUP_PLL_USAGE_MASK, -+ BOOST_BACKUP_PLL_USAGE_SHIFT)); -+ } -+ if (!of_property_read_u32(np, "rockchip,boost-switch-threshold", -+ &value)) { -+ pr_debug("boost-switch-threshold=0x%x\n", value); -+ regmap_write(pll->boost, BOOST_SWITCH_THRESHOLD, value); -+ } -+ if (!of_property_read_u32(np, "rockchip,boost-statis-threshold", -+ &value)) { -+ pr_debug("boost-statis-threshold=0x%x\n", value); -+ regmap_write(pll->boost, BOOST_STATIS_THRESHOLD, value); -+ } -+ if (!of_property_read_u32(np, "rockchip,boost-statis-enable", -+ &value)) { -+ pr_debug("boost-statis-enable=0x%x\n", value); -+ regmap_write(pll->boost, BOOST_BOOST_CON, -+ HIWORD_UPDATE(value, BOOST_STATIS_ENABLE_MASK, -+ BOOST_STATIS_ENABLE_SHIFT)); -+ } -+ if (!of_property_read_u32(np, "rockchip,boost-enable", &value)) { -+ pr_debug("boost-enable=0x%x\n", value); -+ regmap_write(pll->boost, BOOST_BOOST_CON, -+ HIWORD_UPDATE(value, BOOST_ENABLE_MASK, -+ BOOST_ENABLE_SHIFT)); -+ if (value) -+ pll->boost_enabled = true; ++ /* This is to allow this function to be chained to others */ ++ if (IS_ERR_OR_NULL(hw)) ++ return ERR_CAST(hw); ++ ++ core = hw->core; ++ clk = alloc_clk(core, dev_id, con_id); ++ if (IS_ERR(clk)) ++ return clk; ++ clk->dev = dev; ++ ++ if (!try_module_get(core->owner)) { ++ free_clk(clk); ++ return ERR_PTR(-ENOENT); + } -+#ifdef CONFIG_DEBUG_FS -+ if (pll->boost_enabled) { -+ mutex_lock(&clk_boost_lock); -+ hlist_add_head(&pll->debug_node, &clk_boost_list); -+ mutex_unlock(&clk_boost_lock); ++ ++ kref_get(&core->ref); ++ clk_core_link_consumer(core, clk); ++ ++ return clk; ++} ++ ++/** ++ * clk_hw_get_clk - get clk consumer given an clk_hw ++ * @hw: clk_hw associated with the clk being consumed ++ * @con_id: connection ID string on device ++ * ++ * Returns: new clk consumer ++ * This is the function to be used by providers which need ++ * to get a consumer clk and act on the clock element ++ * Calls to this function must be balanced with calls clk_put() ++ */ ++struct clk *clk_hw_get_clk(struct clk_hw *hw, const char *con_id) ++{ ++ struct device *dev = hw->core->dev; ++ const char *name = dev ? dev_name(dev) : NULL; ++ ++ return clk_hw_create_clk(dev, hw, name, con_id); ++} ++EXPORT_SYMBOL(clk_hw_get_clk); ++ ++static int clk_cpy_name(const char **dst_p, const char *src, bool must_exist) ++{ ++ const char *dst; ++ ++ if (!src) { ++ if (must_exist) ++ return -EINVAL; ++ return 0; + } -+#endif ++ ++ *dst_p = dst = kstrdup_const(src, GFP_KERNEL); ++ if (!dst) ++ return -ENOMEM; ++ ++ return 0; +} + -+void rockchip_boost_enable_recovery_sw_low(struct clk_hw *hw) ++static int clk_core_populate_parent_map(struct clk_core *core, ++ const struct clk_init_data *init) +{ -+ struct rockchip_clk_pll *pll; -+ unsigned int val; ++ u8 num_parents = init->num_parents; ++ const char * const *parent_names = init->parent_names; ++ const struct clk_hw **parent_hws = init->parent_hws; ++ const struct clk_parent_data *parent_data = init->parent_data; ++ int i, ret = 0; ++ struct clk_parent_map *parents, *parent; + -+ if (!hw) -+ return; -+ pll = to_rockchip_clk_pll(hw); -+ if (!pll->boost_enabled) -+ return; ++ if (!num_parents) ++ return 0; + -+ regmap_write(pll->boost, BOOST_BOOST_CON, -+ HIWORD_UPDATE(1, BOOST_RECOVERY_MASK, -+ BOOST_RECOVERY_SHIFT)); -+ do { -+ regmap_read(pll->boost, BOOST_FSM_STATUS, &val); -+ } while (!(val & BOOST_BUSY_STATE)); ++ /* ++ * Avoid unnecessary string look-ups of clk_core's possible parents by ++ * having a cache of names/clk_hw pointers to clk_core pointers. ++ */ ++ parents = kcalloc(num_parents, sizeof(*parents), GFP_KERNEL); ++ core->parents = parents; ++ if (!parents) ++ return -ENOMEM; + -+ regmap_write(pll->boost, BOOST_BOOST_CON, -+ HIWORD_UPDATE(1, BOOST_SW_CTRL_MASK, -+ BOOST_SW_CTRL_SHIFT) | -+ HIWORD_UPDATE(1, BOOST_LOW_FREQ_EN_MASK, -+ BOOST_LOW_FREQ_EN_SHIFT)); ++ /* Copy everything over because it might be __initdata */ ++ for (i = 0, parent = parents; i < num_parents; i++, parent++) { ++ parent->index = -1; ++ if (parent_names) { ++ /* throw a WARN if any entries are NULL */ ++ WARN(!parent_names[i], ++ "%s: invalid NULL in %s's .parent_names\n", ++ __func__, core->name); ++ ret = clk_cpy_name(&parent->name, parent_names[i], ++ true); ++ } else if (parent_data) { ++ parent->hw = parent_data[i].hw; ++ parent->index = parent_data[i].index; ++ ret = clk_cpy_name(&parent->fw_name, ++ parent_data[i].fw_name, false); ++ if (!ret) ++ ret = clk_cpy_name(&parent->name, ++ parent_data[i].name, ++ false); ++ } else if (parent_hws) { ++ parent->hw = parent_hws[i]; ++ } else { ++ ret = -EINVAL; ++ WARN(1, "Must specify parents if num_parents > 0\n"); ++ } ++ ++ if (ret) { ++ do { ++ kfree_const(parents[i].name); ++ kfree_const(parents[i].fw_name); ++ } while (--i >= 0); ++ kfree(parents); ++ ++ return ret; ++ } ++ } ++ ++ return 0; +} + -+static void rockchip_boost_disable_low(struct rockchip_clk_pll *pll) ++static void clk_core_free_parent_map(struct clk_core *core) +{ -+ if (!pll->boost_enabled) ++ int i = core->num_parents; ++ ++ if (!core->num_parents) + return; + -+ regmap_write(pll->boost, BOOST_BOOST_CON, -+ HIWORD_UPDATE(0, BOOST_LOW_FREQ_EN_MASK, -+ BOOST_LOW_FREQ_EN_SHIFT)); ++ while (--i >= 0) { ++ kfree_const(core->parents[i].name); ++ kfree_const(core->parents[i].fw_name); ++ } ++ ++ kfree(core->parents); +} + -+void rockchip_boost_disable_recovery_sw(struct clk_hw *hw) ++/* Free memory allocated for a struct clk_core */ ++static void __clk_release(struct kref *ref) +{ -+ struct rockchip_clk_pll *pll; ++ struct clk_core *core = container_of(ref, struct clk_core, ref); + -+ if (!hw) -+ return; -+ pll = to_rockchip_clk_pll(hw); -+ if (!pll->boost_enabled) -+ return; ++ if (core->rpm_enabled) { ++ mutex_lock(&clk_rpm_list_lock); ++ hlist_del(&core->rpm_node); ++ mutex_unlock(&clk_rpm_list_lock); ++ } + -+ regmap_write(pll->boost, BOOST_BOOST_CON, -+ HIWORD_UPDATE(0, BOOST_RECOVERY_MASK, -+ BOOST_RECOVERY_SHIFT)); -+ regmap_write(pll->boost, BOOST_BOOST_CON, -+ HIWORD_UPDATE(0, BOOST_SW_CTRL_MASK, -+ BOOST_SW_CTRL_SHIFT)); ++ clk_core_free_parent_map(core); ++ kfree_const(core->name); ++ kfree(core); +} + -+void rockchip_boost_add_core_div(struct clk_hw *hw, unsigned long prate) ++static struct clk * ++__clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) +{ -+ struct rockchip_clk_pll *pll; -+ unsigned int div; ++ int ret; ++ struct clk_core *core; ++ const struct clk_init_data *init = hw->init; + -+ if (!hw) -+ return; -+ pll = to_rockchip_clk_pll(hw); -+ if (!pll->boost_enabled || pll->boost_backup_pll_rate == prate) -+ return; ++ /* ++ * The init data is not supposed to be used outside of registration path. ++ * Set it to NULL so that provider drivers can't use it either and so that ++ * we catch use of hw->init early on in the core. ++ */ ++ hw->init = NULL; ++ ++ core = kzalloc(sizeof(*core), GFP_KERNEL); ++ if (!core) { ++ ret = -ENOMEM; ++ goto fail_out; ++ } ++ ++ kref_init(&core->ref); ++ ++ core->name = kstrdup_const(init->name, GFP_KERNEL); ++ if (!core->name) { ++ ret = -ENOMEM; ++ goto fail_name; ++ } ++ ++ if (WARN_ON(!init->ops)) { ++ ret = -EINVAL; ++ goto fail_ops; ++ } ++ core->ops = init->ops; ++ ++ core->dev = dev; ++ clk_pm_runtime_init(core); ++ core->of_node = np; ++ if (dev && dev->driver) ++ core->owner = dev->driver->owner; ++ core->hw = hw; ++ core->flags = init->flags; ++ core->num_parents = init->num_parents; ++ core->min_rate = 0; ++ core->max_rate = ULONG_MAX; ++ ++ ret = clk_core_populate_parent_map(core, init); ++ if (ret) ++ goto fail_parents; ++ ++ INIT_HLIST_HEAD(&core->clks); + -+ /* todo */ -+ if (pll->boost_backup_pll_usage == BOOST_BACKUP_PLL_USAGE_TARGET) -+ return; + /* -+ * cpu clock rate should be less than or equal to -+ * low rate when change pll rate in boost module ++ * Don't call clk_hw_create_clk() here because that would pin the ++ * provider module to itself and prevent it from ever being removed. + */ -+ if (pll->boost_low_rate && prate > pll->boost_low_rate) { -+ div = DIV_ROUND_UP(prate, pll->boost_low_rate) - 1; -+ regmap_write(pll->boost, BOOST_CLK_CON, -+ HIWORD_UPDATE(div, BOOST_CORE_DIV_MASK, -+ BOOST_CORE_DIV_SHIFT)); -+ pll->boost_backup_pll_rate = prate; ++ hw->clk = alloc_clk(core, NULL, NULL); ++ if (IS_ERR(hw->clk)) { ++ ret = PTR_ERR(hw->clk); ++ goto fail_create_clk; + } -+} + -+#ifdef CONFIG_DEBUG_FS -+#include ++ clk_core_link_consumer(core, hw->clk); + -+#ifndef MODULE -+static int boost_summary_show(struct seq_file *s, void *data) -+{ -+ struct rockchip_clk_pll *pll = (struct rockchip_clk_pll *)s->private; -+ u32 boost_count = 0; -+ u32 freq_cnt0 = 0, freq_cnt1 = 0; -+ u64 freq_cnt = 0, high_freq_time = 0; -+ u32 short_count = 0, short_threshold = 0; -+ u32 interval_time = 0; ++ ret = __clk_core_init(core); ++ if (!ret) ++ return hw->clk; + -+ seq_puts(s, " device boost_count high_freq_count high_freq_time short_count short_threshold interval_count\n"); -+ seq_puts(s, "------------------------------------------------------------------------------------------------------\n"); -+ seq_printf(s, " %s\n", clk_hw_get_name(&pll->hw)); ++ clk_prepare_lock(); ++ clk_core_unlink_consumer(hw->clk); ++ clk_prepare_unlock(); + -+ regmap_read(pll->boost, BOOST_SWITCH_CNT, &boost_count); ++ free_clk(hw->clk); ++ hw->clk = NULL; + -+ regmap_read(pll->boost, BOOST_HIGH_PERF_CNT0, &freq_cnt0); -+ regmap_read(pll->boost, BOOST_HIGH_PERF_CNT1, &freq_cnt1); -+ freq_cnt = ((u64)freq_cnt1 << 32) + (u64)freq_cnt0; -+ high_freq_time = freq_cnt; -+ do_div(high_freq_time, 24); ++fail_create_clk: ++fail_parents: ++fail_ops: ++fail_name: ++ kref_put(&core->ref, __clk_release); ++fail_out: ++ return ERR_PTR(ret); ++} + -+ regmap_read(pll->boost, BOOST_SHORT_SWITCH_CNT, &short_count); -+ regmap_read(pll->boost, BOOST_STATIS_THRESHOLD, &short_threshold); -+ regmap_read(pll->boost, BOOST_SWITCH_THRESHOLD, &interval_time); ++/** ++ * dev_or_parent_of_node() - Get device node of @dev or @dev's parent ++ * @dev: Device to get device node of ++ * ++ * Return: device node pointer of @dev, or the device node pointer of ++ * @dev->parent if dev doesn't have a device node, or NULL if neither ++ * @dev or @dev->parent have a device node. ++ */ ++static struct device_node *dev_or_parent_of_node(struct device *dev) ++{ ++ struct device_node *np; + -+ seq_printf(s, "%22u %17llu %15llu %12u %16u %15u\n", -+ boost_count, freq_cnt, high_freq_time, short_count, -+ short_threshold, interval_time); ++ if (!dev) ++ return NULL; + -+ return 0; ++ np = dev_of_node(dev); ++ if (!np) ++ np = dev_of_node(dev->parent); ++ ++ return np; +} + -+static int boost_summary_open(struct inode *inode, struct file *file) ++/** ++ * clk_register - allocate a new clock, register it and return an opaque cookie ++ * @dev: device that is registering this clock ++ * @hw: link to hardware-specific clock data ++ * ++ * clk_register is the *deprecated* interface for populating the clock tree with ++ * new clock nodes. Use clk_hw_register() instead. ++ * ++ * Returns: a pointer to the newly allocated struct clk which ++ * cannot be dereferenced by driver code but may be used in conjunction with the ++ * rest of the clock API. In the event of an error clk_register will return an ++ * error code; drivers must test for an error code after calling clk_register. ++ */ ++struct clk *clk_register(struct device *dev, struct clk_hw *hw) +{ -+ return single_open(file, boost_summary_show, inode->i_private); ++ return __clk_register(dev, dev_or_parent_of_node(dev), hw); +} ++EXPORT_SYMBOL_GPL(clk_register); + -+static const struct file_operations boost_summary_fops = { -+ .open = boost_summary_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; ++/** ++ * clk_hw_register - register a clk_hw and return an error code ++ * @dev: device that is registering this clock ++ * @hw: link to hardware-specific clock data ++ * ++ * clk_hw_register is the primary interface for populating the clock tree with ++ * new clock nodes. It returns an integer equal to zero indicating success or ++ * less than zero indicating failure. Drivers must test for an error code after ++ * calling clk_hw_register(). ++ */ ++int clk_hw_register(struct device *dev, struct clk_hw *hw) ++{ ++ return PTR_ERR_OR_ZERO(__clk_register(dev, dev_or_parent_of_node(dev), ++ hw)); ++} ++EXPORT_SYMBOL_GPL(clk_hw_register); + -+static int boost_config_show(struct seq_file *s, void *data) ++/* ++ * of_clk_hw_register - register a clk_hw and return an error code ++ * @node: device_node of device that is registering this clock ++ * @hw: link to hardware-specific clock data ++ * ++ * of_clk_hw_register() is the primary interface for populating the clock tree ++ * with new clock nodes when a struct device is not available, but a struct ++ * device_node is. It returns an integer equal to zero indicating success or ++ * less than zero indicating failure. Drivers must test for an error code after ++ * calling of_clk_hw_register(). ++ */ ++int of_clk_hw_register(struct device_node *node, struct clk_hw *hw) +{ -+ struct rockchip_clk_pll *pll = (struct rockchip_clk_pll *)s->private; ++ return PTR_ERR_OR_ZERO(__clk_register(NULL, node, hw)); ++} ++EXPORT_SYMBOL_GPL(of_clk_hw_register); + -+ seq_printf(s, "boost_enabled: %d\n", pll->boost_enabled); -+ seq_printf(s, "boost_low_rate: %lu\n", pll->boost_low_rate); -+ seq_printf(s, "boost_high_rate: %lu\n", pll->boost_high_rate); ++/* ++ * Empty clk_ops for unregistered clocks. These are used temporarily ++ * after clk_unregister() was called on a clock and until last clock ++ * consumer calls clk_put() and the struct clk object is freed. ++ */ ++static int clk_nodrv_prepare_enable(struct clk_hw *hw) ++{ ++ return -ENXIO; ++} + -+ return 0; ++static void clk_nodrv_disable_unprepare(struct clk_hw *hw) ++{ ++ WARN_ON_ONCE(1); +} + -+static int boost_config_open(struct inode *inode, struct file *file) ++static int clk_nodrv_set_rate(struct clk_hw *hw, unsigned long rate, ++ unsigned long parent_rate) +{ -+ return single_open(file, boost_config_show, inode->i_private); ++ return -ENXIO; +} + -+static const struct file_operations boost_config_fops = { -+ .open = boost_config_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, ++static int clk_nodrv_set_parent(struct clk_hw *hw, u8 index) ++{ ++ return -ENXIO; ++} ++ ++static int clk_nodrv_determine_rate(struct clk_hw *hw, ++ struct clk_rate_request *req) ++{ ++ return -ENXIO; ++} ++ ++static const struct clk_ops clk_nodrv_ops = { ++ .enable = clk_nodrv_prepare_enable, ++ .disable = clk_nodrv_disable_unprepare, ++ .prepare = clk_nodrv_prepare_enable, ++ .unprepare = clk_nodrv_disable_unprepare, ++ .determine_rate = clk_nodrv_determine_rate, ++ .set_rate = clk_nodrv_set_rate, ++ .set_parent = clk_nodrv_set_parent, +}; + -+static int boost_debug_create_one(struct rockchip_clk_pll *pll, -+ struct dentry *rootdir) ++static void clk_core_evict_parent_cache_subtree(struct clk_core *root, ++ const struct clk_core *target) +{ -+ struct dentry *pdentry, *d; ++ int i; ++ struct clk_core *child; + -+ pdentry = debugfs_lookup(clk_hw_get_name(&pll->hw), rootdir); -+ if (!pdentry) { -+ pr_err("%s: failed to lookup %s dentry\n", __func__, -+ clk_hw_get_name(&pll->hw)); -+ return -ENOMEM; -+ } ++ for (i = 0; i < root->num_parents; i++) ++ if (root->parents[i].core == target) ++ root->parents[i].core = NULL; + -+ d = debugfs_create_file("boost_summary", 0444, pdentry, -+ pll, &boost_summary_fops); -+ if (!d) { -+ pr_err("%s: failed to create boost_summary file\n", __func__); -+ return -ENOMEM; -+ } ++ hlist_for_each_entry(child, &root->children, child_node) ++ clk_core_evict_parent_cache_subtree(child, target); ++} + -+ d = debugfs_create_file("boost_config", 0444, pdentry, -+ pll, &boost_config_fops); -+ if (!d) { -+ pr_err("%s: failed to create boost config file\n", __func__); -+ return -ENOMEM; -+ } ++/* Remove this clk from all parent caches */ ++static void clk_core_evict_parent_cache(struct clk_core *core) ++{ ++ const struct hlist_head **lists; ++ struct clk_core *root; ++ ++ lockdep_assert_held(&prepare_lock); ++ ++ for (lists = all_lists; *lists; lists++) ++ hlist_for_each_entry(root, *lists, child_node) ++ clk_core_evict_parent_cache_subtree(root, core); + -+ return 0; +} + -+static int __init boost_debug_init(void) ++/** ++ * clk_unregister - unregister a currently registered clock ++ * @clk: clock to unregister ++ */ ++void clk_unregister(struct clk *clk) +{ -+ struct rockchip_clk_pll *pll; -+ struct dentry *rootdir; ++ unsigned long flags; ++ const struct clk_ops *ops; + -+ rootdir = debugfs_lookup("clk", NULL); -+ if (!rootdir) { -+ pr_err("%s: failed to lookup clk dentry\n", __func__); -+ return -ENOMEM; -+ } ++ if (!clk || WARN_ON_ONCE(IS_ERR(clk))) ++ return; + -+ mutex_lock(&clk_boost_lock); ++ clk_debug_unregister(clk->core); + -+ hlist_for_each_entry(pll, &clk_boost_list, debug_node) -+ boost_debug_create_one(pll, rootdir); ++ clk_prepare_lock(); + -+ mutex_unlock(&clk_boost_lock); ++ ops = clk->core->ops; ++ if (ops == &clk_nodrv_ops) { ++ pr_err("%s: unregistered clock: %s\n", __func__, ++ clk->core->name); ++ clk_prepare_unlock(); ++ return; ++ } ++ /* ++ * Assign empty clock ops for consumers that might still hold ++ * a reference to this clock. ++ */ ++ flags = clk_enable_lock(); ++ clk->core->ops = &clk_nodrv_ops; ++ clk_enable_unlock(flags); + -+ return 0; -+} -+late_initcall(boost_debug_init); -+#endif /* MODULE */ -+#endif /* CONFIG_DEBUG_FS */ -+#endif /* CONFIG_ROCKCHIP_CLK_BOOST */ -diff --git a/drivers/clk/rockchip-oh/clk-pvtm.c b/drivers/clk/rockchip-oh/clk-pvtm.c -new file mode 100644 -index 000000000..c748589dd ---- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-pvtm.c -@@ -0,0 +1,311 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (c) 2018 Fuzhou Rockchip Electronics Co., Ltd -+ */ ++ if (ops->terminate) ++ ops->terminate(clk->core->hw); + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ if (!hlist_empty(&clk->core->children)) { ++ struct clk_core *child; ++ struct hlist_node *t; + -+#define CLK_SEL_EXTERNAL_32K 0 -+#define CLK_SEL_INTERNAL_PVTM 1 ++ /* Reparent all children to the orphan list. */ ++ hlist_for_each_entry_safe(child, t, &clk->core->children, ++ child_node) ++ clk_core_set_parent_nolock(child, NULL); ++ } + -+#define wr_msk_bit(v, off, msk) ((v) << (off) | (msk << (16 + (off)))) ++ clk_core_evict_parent_cache(clk->core); + -+struct rockchip_clock_pvtm; ++ hlist_del_init(&clk->core->child_node); + -+struct rockchip_clock_pvtm_info { -+ u32 con; -+ u32 sta; -+ u32 sel_con; -+ u32 sel_shift; -+ u32 sel_value; -+ u32 sel_mask; -+ u32 div_shift; -+ u32 div_mask; ++ if (clk->core->prepare_count) ++ pr_warn("%s: unregistering prepared clock: %s\n", ++ __func__, clk->core->name); + -+ u32 (*get_value)(struct rockchip_clock_pvtm *pvtm, -+ unsigned int time_us); -+ int (*init_freq)(struct rockchip_clock_pvtm *pvtm); -+ int (*sel_enable)(struct rockchip_clock_pvtm *pvtm); -+}; ++ if (clk->core->protect_count) ++ pr_warn("%s: unregistering protected clock: %s\n", ++ __func__, clk->core->name); ++ clk_prepare_unlock(); + -+struct rockchip_clock_pvtm { -+ const struct rockchip_clock_pvtm_info *info; -+ struct regmap *grf; -+ struct clk *pvtm_clk; -+ struct clk *clk; -+ unsigned long rate; -+}; ++ kref_put(&clk->core->ref, __clk_release); ++ free_clk(clk); ++} ++EXPORT_SYMBOL_GPL(clk_unregister); + -+static unsigned long xin32k_pvtm_recalc_rate(struct clk_hw *hw, -+ unsigned long parent_rate) ++/** ++ * clk_hw_unregister - unregister a currently registered clk_hw ++ * @hw: hardware-specific clock data to unregister ++ */ ++void clk_hw_unregister(struct clk_hw *hw) +{ -+ return 32768; ++ clk_unregister(hw->clk); +} ++EXPORT_SYMBOL_GPL(clk_hw_unregister); + -+static const struct clk_ops xin32k_pvtm = { -+ .recalc_rate = xin32k_pvtm_recalc_rate, -+}; ++static void devm_clk_unregister_cb(struct device *dev, void *res) ++{ ++ clk_unregister(*(struct clk **)res); ++} + -+static void rockchip_clock_pvtm_delay(unsigned int delay) ++static void devm_clk_hw_unregister_cb(struct device *dev, void *res) +{ -+ unsigned int ms = delay / 1000; -+ unsigned int us = delay % 1000; ++ clk_hw_unregister(*(struct clk_hw **)res); ++} + -+ if (ms > 0) { -+ if (ms < 20) -+ us += ms * 1000; -+ else -+ msleep(ms); ++/** ++ * devm_clk_register - resource managed clk_register() ++ * @dev: device that is registering this clock ++ * @hw: link to hardware-specific clock data ++ * ++ * Managed clk_register(). This function is *deprecated*, use devm_clk_hw_register() instead. ++ * ++ * Clocks returned from this function are automatically clk_unregister()ed on ++ * driver detach. See clk_register() for more information. ++ */ ++struct clk *devm_clk_register(struct device *dev, struct clk_hw *hw) ++{ ++ struct clk *clk; ++ struct clk **clkp; ++ ++ clkp = devres_alloc(devm_clk_unregister_cb, sizeof(*clkp), GFP_KERNEL); ++ if (!clkp) ++ return ERR_PTR(-ENOMEM); ++ ++ clk = clk_register(dev, hw); ++ if (!IS_ERR(clk)) { ++ *clkp = clk; ++ devres_add(dev, clkp); ++ } else { ++ devres_free(clkp); + } + -+ if (us >= 10) -+ usleep_range(us, us + 100); -+ else -+ udelay(us); ++ return clk; +} ++EXPORT_SYMBOL_GPL(devm_clk_register); + -+static int rockchip_clock_sel_internal_pvtm(struct rockchip_clock_pvtm *pvtm) ++/** ++ * devm_clk_hw_register - resource managed clk_hw_register() ++ * @dev: device that is registering this clock ++ * @hw: link to hardware-specific clock data ++ * ++ * Managed clk_hw_register(). Clocks registered by this function are ++ * automatically clk_hw_unregister()ed on driver detach. See clk_hw_register() ++ * for more information. ++ */ ++int devm_clk_hw_register(struct device *dev, struct clk_hw *hw) +{ -+ int ret = 0; ++ struct clk_hw **hwp; ++ int ret; + -+ ret = regmap_write(pvtm->grf, pvtm->info->sel_con, -+ wr_msk_bit(pvtm->info->sel_value, -+ pvtm->info->sel_shift, -+ pvtm->info->sel_mask)); -+ if (ret != 0) -+ pr_err("%s: fail to write register\n", __func__); ++ hwp = devres_alloc(devm_clk_hw_unregister_cb, sizeof(*hwp), GFP_KERNEL); ++ if (!hwp) ++ return -ENOMEM; ++ ++ ret = clk_hw_register(dev, hw); ++ if (!ret) { ++ *hwp = hw; ++ devres_add(dev, hwp); ++ } else { ++ devres_free(hwp); ++ } + + return ret; +} ++EXPORT_SYMBOL_GPL(devm_clk_hw_register); + -+/* get pmu pvtm value */ -+static u32 rockchip_clock_pvtm_get_value(struct rockchip_clock_pvtm *pvtm, -+ u32 time_us) ++static void devm_clk_release(struct device *dev, void *res) +{ -+ const struct rockchip_clock_pvtm_info *info = pvtm->info; -+ u32 val = 0, sta = 0; -+ u32 clk_cnt, check_cnt; -+ -+ /* 24m clk ,24cnt=1us */ -+ clk_cnt = time_us * 24; ++ clk_put(*(struct clk **)res); ++} + -+ regmap_write(pvtm->grf, info->con + 0x4, clk_cnt); -+ regmap_write(pvtm->grf, info->con, wr_msk_bit(3, 0, 0x3)); ++/** ++ * devm_clk_hw_get_clk - resource managed clk_hw_get_clk() ++ * @dev: device that is registering this clock ++ * @hw: clk_hw associated with the clk being consumed ++ * @con_id: connection ID string on device ++ * ++ * Managed clk_hw_get_clk(). Clocks got with this function are ++ * automatically clk_put() on driver detach. See clk_put() ++ * for more information. ++ */ ++struct clk *devm_clk_hw_get_clk(struct device *dev, struct clk_hw *hw, ++ const char *con_id) ++{ ++ struct clk *clk; ++ struct clk **clkp; + -+ rockchip_clock_pvtm_delay(time_us); ++ /* This should not happen because it would mean we have drivers ++ * passing around clk_hw pointers instead of having the caller use ++ * proper clk_get() style APIs ++ */ ++ WARN_ON_ONCE(dev != hw->core->dev); + -+ check_cnt = 100; -+ while (check_cnt) { -+ regmap_read(pvtm->grf, info->sta, &sta); -+ if (sta & 0x1) -+ break; -+ udelay(4); -+ check_cnt--; -+ } ++ clkp = devres_alloc(devm_clk_release, sizeof(*clkp), GFP_KERNEL); ++ if (!clkp) ++ return ERR_PTR(-ENOMEM); + -+ if (check_cnt) { -+ regmap_read(pvtm->grf, info->sta + 0x4, &val); ++ clk = clk_hw_get_clk(hw, con_id); ++ if (!IS_ERR(clk)) { ++ *clkp = clk; ++ devres_add(dev, clkp); + } else { -+ pr_err("%s: wait pvtm_done timeout!\n", __func__); -+ val = 0; ++ devres_free(clkp); + } + -+ regmap_write(pvtm->grf, info->con, wr_msk_bit(0, 0, 0x3)); -+ -+ return val; ++ return clk; +} ++EXPORT_SYMBOL_GPL(devm_clk_hw_get_clk); + -+static int rockchip_clock_pvtm_init_freq(struct rockchip_clock_pvtm *pvtm) ++/* ++ * clkdev helpers ++ */ ++ ++void __clk_put(struct clk *clk) +{ -+ u32 pvtm_cnt = 0; -+ u32 div, time_us; -+ int ret = 0; ++ struct module *owner; + -+ time_us = 1000; -+ pvtm_cnt = pvtm->info->get_value(pvtm, time_us); -+ pr_debug("get pvtm_cnt = %d\n", pvtm_cnt); ++ if (!clk || WARN_ON_ONCE(IS_ERR(clk))) ++ return; + -+ /* set pvtm_div to get rate */ -+ div = DIV_ROUND_UP(1000 * pvtm_cnt, pvtm->rate); -+ if (div > pvtm->info->div_mask) { -+ pr_err("pvtm_div out of bounary! set max instead\n"); -+ div = pvtm->info->div_mask; ++ clk_prepare_lock(); ++ ++ /* ++ * Before calling clk_put, all calls to clk_rate_exclusive_get() from a ++ * given user should be balanced with calls to clk_rate_exclusive_put() ++ * and by that same consumer ++ */ ++ if (WARN_ON(clk->exclusive_count)) { ++ /* We voiced our concern, let's sanitize the situation */ ++ clk->core->protect_count -= (clk->exclusive_count - 1); ++ clk_core_rate_unprotect(clk->core); ++ clk->exclusive_count = 0; + } + -+ pr_debug("set div %d, rate %luKHZ\n", div, pvtm->rate); -+ ret = regmap_write(pvtm->grf, pvtm->info->con, -+ wr_msk_bit(div, pvtm->info->div_shift, -+ pvtm->info->div_mask)); -+ if (ret != 0) -+ goto out; ++ hlist_del(&clk->clks_node); + -+ /* pmu pvtm oscilator enable */ -+ ret = regmap_write(pvtm->grf, pvtm->info->con, -+ wr_msk_bit(1, 1, 0x1)); -+ if (ret != 0) -+ goto out; ++ /* If we had any boundaries on that clock, let's drop them. */ ++ if (clk->min_rate > 0 || clk->max_rate < ULONG_MAX) ++ clk_set_rate_range_nolock(clk, 0, ULONG_MAX); + -+ ret = pvtm->info->sel_enable(pvtm); -+out: -+ if (ret != 0) -+ pr_err("%s: fail to write register\n", __func__); ++ clk_prepare_unlock(); + -+ return ret; ++ owner = clk->core->owner; ++ kref_put(&clk->core->ref, __clk_release); ++ module_put(owner); ++ free_clk(clk); +} + -+static int clock_pvtm_regitstor(struct device *dev, -+ struct rockchip_clock_pvtm *pvtm) ++/*** clk rate change notifiers ***/ ++ ++/** ++ * clk_notifier_register - add a clk rate change notifier ++ * @clk: struct clk * to watch ++ * @nb: struct notifier_block * with callback info ++ * ++ * Request notification when clk's rate changes. This uses an SRCU ++ * notifier because we want it to block and notifier unregistrations are ++ * uncommon. The callbacks associated with the notifier must not ++ * re-enter into the clk framework by calling any top-level clk APIs; ++ * this will cause a nested prepare_lock mutex. ++ * ++ * In all notification cases (pre, post and abort rate change) the original ++ * clock rate is passed to the callback via struct clk_notifier_data.old_rate ++ * and the new frequency is passed via struct clk_notifier_data.new_rate. ++ * ++ * clk_notifier_register() must be called from non-atomic context. ++ * Returns -EINVAL if called with null arguments, -ENOMEM upon ++ * allocation failure; otherwise, passes along the return value of ++ * srcu_notifier_chain_register(). ++ */ ++int clk_notifier_register(struct clk *clk, struct notifier_block *nb) +{ -+ struct clk_init_data init = {}; -+ struct clk_hw *clk_hw; ++ struct clk_notifier *cn; ++ int ret = -ENOMEM; + -+ /* Init the xin32k_pvtm */ -+ pvtm->info->init_freq(pvtm); ++ if (!clk || !nb) ++ return -EINVAL; + -+ init.parent_names = NULL; -+ init.num_parents = 0; -+ init.name = "xin32k_pvtm"; -+ init.ops = &xin32k_pvtm; ++ clk_prepare_lock(); + -+ clk_hw = devm_kzalloc(dev, sizeof(*clk_hw), GFP_KERNEL); -+ if (!clk_hw) -+ return -ENOMEM; -+ clk_hw->init = &init; ++ /* search the list of notifiers for this clk */ ++ list_for_each_entry(cn, &clk_notifier_list, node) ++ if (cn->clk == clk) ++ goto found; + -+ /* optional override of the clockname */ -+ of_property_read_string_index(dev->of_node, "clock-output-names", -+ 0, &init.name); -+ pvtm->clk = devm_clk_register(dev, clk_hw); -+ if (IS_ERR(pvtm->clk)) -+ return PTR_ERR(pvtm->clk); ++ /* if clk wasn't in the notifier list, allocate new clk_notifier */ ++ cn = kzalloc(sizeof(*cn), GFP_KERNEL); ++ if (!cn) ++ goto out; + -+ return of_clk_add_provider(dev->of_node, of_clk_src_simple_get, -+ pvtm->clk); -+} ++ cn->clk = clk; ++ srcu_init_notifier_head(&cn->notifier_head); + -+static const struct rockchip_clock_pvtm_info rk3368_pvtm_data = { -+ .con = 0x180, -+ .sta = 0x190, -+ .sel_con = 0x100, -+ .sel_shift = 6, -+ .sel_value = CLK_SEL_INTERNAL_PVTM, -+ .sel_mask = 0x1, -+ .div_shift = 2, -+ .div_mask = 0x3f, ++ list_add(&cn->node, &clk_notifier_list); + -+ .sel_enable = rockchip_clock_sel_internal_pvtm, -+ .get_value = rockchip_clock_pvtm_get_value, -+ .init_freq = rockchip_clock_pvtm_init_freq, -+}; ++found: ++ ret = srcu_notifier_chain_register(&cn->notifier_head, nb); + -+static const struct of_device_id rockchip_clock_pvtm_match[] = { -+ { -+ .compatible = "rockchip,rk3368-pvtm-clock", -+ .data = (void *)&rk3368_pvtm_data, -+ }, -+ {} -+}; -+MODULE_DEVICE_TABLE(of, rockchip_clock_pvtm_match); ++ clk->core->notifier_count++; + -+static int rockchip_clock_pvtm_probe(struct platform_device *pdev) -+{ -+ struct device *dev = &pdev->dev; -+ struct device_node *np = pdev->dev.of_node; -+ const struct of_device_id *match; -+ struct rockchip_clock_pvtm *pvtm; -+ int error; -+ u32 rate; ++out: ++ clk_prepare_unlock(); + -+ pvtm = devm_kzalloc(dev, sizeof(*pvtm), GFP_KERNEL); -+ if (!pvtm) -+ return -ENOMEM; ++ return ret; ++} ++EXPORT_SYMBOL_GPL(clk_notifier_register); + -+ match = of_match_node(rockchip_clock_pvtm_match, np); -+ if (!match) -+ return -ENXIO; ++/** ++ * clk_notifier_unregister - remove a clk rate change notifier ++ * @clk: struct clk * ++ * @nb: struct notifier_block * with callback info ++ * ++ * Request no further notification for changes to 'clk' and frees memory ++ * allocated in clk_notifier_register. ++ * ++ * Returns -EINVAL if called with null arguments; otherwise, passes ++ * along the return value of srcu_notifier_chain_unregister(). ++ */ ++int clk_notifier_unregister(struct clk *clk, struct notifier_block *nb) ++{ ++ struct clk_notifier *cn; ++ int ret = -ENOENT; + -+ pvtm->info = (const struct rockchip_clock_pvtm_info *)match->data; -+ if (!pvtm->info) ++ if (!clk || !nb) + return -EINVAL; + -+ if (!dev->parent || !dev->parent->of_node) -+ return -EINVAL; ++ clk_prepare_lock(); + -+ pvtm->grf = syscon_node_to_regmap(dev->parent->of_node); -+ if (IS_ERR(pvtm->grf)) -+ return PTR_ERR(pvtm->grf); ++ list_for_each_entry(cn, &clk_notifier_list, node) { ++ if (cn->clk == clk) { ++ ret = srcu_notifier_chain_unregister(&cn->notifier_head, nb); + -+ if (!of_property_read_u32(np, "pvtm-rate", &rate)) -+ pvtm->rate = rate; -+ else -+ pvtm->rate = 32768; ++ clk->core->notifier_count--; + -+ pvtm->pvtm_clk = devm_clk_get(&pdev->dev, "pvtm_pmu_clk"); -+ if (IS_ERR(pvtm->pvtm_clk)) { -+ error = PTR_ERR(pvtm->pvtm_clk); -+ if (error != -EPROBE_DEFER) -+ dev_err(&pdev->dev, -+ "failed to get pvtm core clock: %d\n", -+ error); -+ goto out_probe; ++ /* XXX the notifier code should handle this better */ ++ if (!cn->notifier_head.head) { ++ srcu_cleanup_notifier_head(&cn->notifier_head); ++ list_del(&cn->node); ++ kfree(cn); ++ } ++ break; ++ } + } + -+ error = clk_prepare_enable(pvtm->pvtm_clk); -+ if (error) { -+ dev_err(&pdev->dev, "failed to enable the clock: %d\n", -+ error); -+ goto out_probe; -+ } ++ clk_prepare_unlock(); + -+ platform_set_drvdata(pdev, pvtm); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(clk_notifier_unregister); + -+ error = clock_pvtm_regitstor(&pdev->dev, pvtm); -+ if (error) { -+ dev_err(&pdev->dev, "failed to registor clock: %d\n", -+ error); -+ goto out_clk_put; -+ } ++struct clk_notifier_devres { ++ struct clk *clk; ++ struct notifier_block *nb; ++}; + -+ return error; ++static void devm_clk_notifier_release(struct device *dev, void *res) ++{ ++ struct clk_notifier_devres *devres = res; + -+out_clk_put: -+ clk_disable_unprepare(pvtm->pvtm_clk); -+out_probe: -+ return error; ++ clk_notifier_unregister(devres->clk, devres->nb); +} + -+static int rockchip_clock_pvtm_remove(struct platform_device *pdev) ++int devm_clk_notifier_register(struct device *dev, struct clk *clk, ++ struct notifier_block *nb) +{ -+ struct rockchip_clock_pvtm *pvtm = platform_get_drvdata(pdev); -+ struct device_node *np = pdev->dev.of_node; -+ -+ of_clk_del_provider(np); -+ clk_disable_unprepare(pvtm->pvtm_clk); -+ -+ return 0; -+} ++ struct clk_notifier_devres *devres; ++ int ret; + -+static struct platform_driver rockchip_clock_pvtm_driver = { -+ .driver = { -+ .name = "rockchip-clcok-pvtm", -+ .of_match_table = rockchip_clock_pvtm_match, -+ }, -+ .probe = rockchip_clock_pvtm_probe, -+ .remove = rockchip_clock_pvtm_remove, -+}; ++ devres = devres_alloc(devm_clk_notifier_release, ++ sizeof(*devres), GFP_KERNEL); + -+module_platform_driver(rockchip_clock_pvtm_driver); ++ if (!devres) ++ return -ENOMEM; + -+MODULE_DESCRIPTION("Rockchip Clock Pvtm Driver"); -+MODULE_LICENSE("GPL v2"); -diff --git a/drivers/clk/rockchip-oh/clk-px30.c b/drivers/clk/rockchip-oh/clk-px30.c -new file mode 100644 -index 000000000..d76aaf04b ---- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-px30.c -@@ -0,0 +1,1139 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2018 Rockchip Electronics Co. Ltd. -+ * Author: Elaine Zhang -+ */ ++ ret = clk_notifier_register(clk, nb); ++ if (!ret) { ++ devres->clk = clk; ++ devres->nb = nb; ++ devres_add(dev, devres); ++ } else { ++ devres_free(devres); ++ } + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "clk.h" ++ return ret; ++} ++EXPORT_SYMBOL_GPL(devm_clk_notifier_register); + -+#define PX30_GRF_SOC_STATUS0 0x480 ++#ifdef CONFIG_OF ++static void clk_core_reparent_orphans(void) ++{ ++ clk_prepare_lock(); ++ clk_core_reparent_orphans_nolock(); ++ clk_prepare_unlock(); ++} + -+enum px30_plls { -+ apll, dpll, cpll, npll, apll_b_h, apll_b_l, -+}; ++/** ++ * struct of_clk_provider - Clock provider registration structure ++ * @link: Entry in global list of clock providers ++ * @node: Pointer to device tree node of clock provider ++ * @get: Get clock callback. Returns NULL or a struct clk for the ++ * given clock specifier ++ * @get_hw: Get clk_hw callback. Returns NULL, ERR_PTR or a ++ * struct clk_hw for the given clock specifier ++ * @data: context pointer to be passed into @get callback ++ */ ++struct of_clk_provider { ++ struct list_head link; + -+enum px30_pmu_plls { -+ gpll, ++ struct device_node *node; ++ struct clk *(*get)(struct of_phandle_args *clkspec, void *data); ++ struct clk_hw *(*get_hw)(struct of_phandle_args *clkspec, void *data); ++ void *data; +}; + -+static struct rockchip_pll_rate_table px30_pll_rates[] = { -+ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ -+ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1488000000, 1, 62, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1464000000, 1, 61, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1440000000, 1, 60, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1392000000, 1, 58, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1368000000, 1, 57, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1344000000, 1, 56, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1320000000, 1, 55, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1272000000, 1, 53, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1248000000, 1, 52, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1188000000, 2, 99, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1100000000, 12, 550, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1000000000, 6, 500, 2, 1, 1, 0), -+ RK3036_PLL_RATE(984000000, 1, 82, 2, 1, 1, 0), -+ RK3036_PLL_RATE(960000000, 1, 80, 2, 1, 1, 0), -+ RK3036_PLL_RATE(936000000, 1, 78, 2, 1, 1, 0), -+ RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), -+ RK3036_PLL_RATE(900000000, 4, 300, 2, 1, 1, 0), -+ RK3036_PLL_RATE(888000000, 1, 74, 2, 1, 1, 0), -+ RK3036_PLL_RATE(864000000, 1, 72, 2, 1, 1, 0), -+ RK3036_PLL_RATE(840000000, 1, 70, 2, 1, 1, 0), -+ RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), -+ RK3036_PLL_RATE(800000000, 6, 400, 2, 1, 1, 0), -+ RK3036_PLL_RATE(700000000, 6, 350, 2, 1, 1, 0), -+ RK3036_PLL_RATE(696000000, 1, 58, 2, 1, 1, 0), -+ RK3036_PLL_RATE(624000000, 1, 52, 2, 1, 1, 0), -+ RK3036_PLL_RATE(600000000, 1, 75, 3, 1, 1, 0), -+ RK3036_PLL_RATE(594000000, 2, 99, 2, 1, 1, 0), -+ RK3036_PLL_RATE(504000000, 1, 63, 3, 1, 1, 0), -+ RK3036_PLL_RATE(500000000, 6, 250, 2, 1, 1, 0), -+ RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), -+ RK3036_PLL_RATE(312000000, 1, 52, 2, 2, 1, 0), -+ RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), -+ RK3036_PLL_RATE(96000000, 1, 64, 4, 4, 1, 0), -+ { /* sentinel */ }, -+}; ++extern struct of_device_id __clk_of_table; ++static const struct of_device_id __clk_of_table_sentinel ++ __used __section("__clk_of_table_end"); + -+#define PX30_DIV_ACLKM_MASK 0x7 -+#define PX30_DIV_ACLKM_SHIFT 12 -+#define PX30_DIV_PCLK_DBG_MASK 0xf -+#define PX30_DIV_PCLK_DBG_SHIFT 8 ++static LIST_HEAD(of_clk_providers); ++static DEFINE_MUTEX(of_clk_mutex); + -+#define PX30_CLKSEL0(_aclk_core, _pclk_dbg) \ -+{ \ -+ .reg = PX30_CLKSEL_CON(0), \ -+ .val = HIWORD_UPDATE(_aclk_core, PX30_DIV_ACLKM_MASK, \ -+ PX30_DIV_ACLKM_SHIFT) | \ -+ HIWORD_UPDATE(_pclk_dbg, PX30_DIV_PCLK_DBG_MASK, \ -+ PX30_DIV_PCLK_DBG_SHIFT), \ ++struct clk *of_clk_src_simple_get(struct of_phandle_args *clkspec, ++ void *data) ++{ ++ return data; +} ++EXPORT_SYMBOL_GPL(of_clk_src_simple_get); + -+#define PX30_CPUCLK_RATE(_prate, _aclk_core, _pclk_dbg) \ -+{ \ -+ .prate = _prate, \ -+ .divs = { \ -+ PX30_CLKSEL0(_aclk_core, _pclk_dbg), \ -+ }, \ ++struct clk_hw *of_clk_hw_simple_get(struct of_phandle_args *clkspec, void *data) ++{ ++ return data; +} ++EXPORT_SYMBOL_GPL(of_clk_hw_simple_get); + -+static struct rockchip_cpuclk_rate_table px30_cpuclk_rates[] __initdata = { -+ PX30_CPUCLK_RATE(1608000000, 1, 7), -+ PX30_CPUCLK_RATE(1584000000, 1, 7), -+ PX30_CPUCLK_RATE(1560000000, 1, 7), -+ PX30_CPUCLK_RATE(1536000000, 1, 7), -+ PX30_CPUCLK_RATE(1512000000, 1, 7), -+ PX30_CPUCLK_RATE(1488000000, 1, 5), -+ PX30_CPUCLK_RATE(1464000000, 1, 5), -+ PX30_CPUCLK_RATE(1440000000, 1, 5), -+ PX30_CPUCLK_RATE(1416000000, 1, 5), -+ PX30_CPUCLK_RATE(1392000000, 1, 5), -+ PX30_CPUCLK_RATE(1368000000, 1, 5), -+ PX30_CPUCLK_RATE(1344000000, 1, 5), -+ PX30_CPUCLK_RATE(1320000000, 1, 5), -+ PX30_CPUCLK_RATE(1296000000, 1, 5), -+ PX30_CPUCLK_RATE(1272000000, 1, 5), -+ PX30_CPUCLK_RATE(1248000000, 1, 5), -+ PX30_CPUCLK_RATE(1224000000, 1, 5), -+ PX30_CPUCLK_RATE(1200000000, 1, 5), -+ PX30_CPUCLK_RATE(1104000000, 1, 5), -+ PX30_CPUCLK_RATE(1008000000, 1, 5), -+ PX30_CPUCLK_RATE(912000000, 1, 5), -+ PX30_CPUCLK_RATE(816000000, 1, 3), -+ PX30_CPUCLK_RATE(696000000, 1, 3), -+ PX30_CPUCLK_RATE(600000000, 1, 3), -+ PX30_CPUCLK_RATE(408000000, 1, 1), -+ PX30_CPUCLK_RATE(312000000, 1, 1), -+ PX30_CPUCLK_RATE(216000000, 1, 1), -+ PX30_CPUCLK_RATE(96000000, 1, 1), -+}; ++struct clk *of_clk_src_onecell_get(struct of_phandle_args *clkspec, void *data) ++{ ++ struct clk_onecell_data *clk_data = data; ++ unsigned int idx = clkspec->args[0]; + -+static const struct rockchip_cpuclk_reg_data px30_cpuclk_data = { -+ .core_reg[0] = PX30_CLKSEL_CON(0), -+ .div_core_shift[0] = 0, -+ .div_core_mask[0] = 0xf, -+ .num_cores = 1, -+ .mux_core_alt = 1, -+ .mux_core_main = 0, -+ .mux_core_shift = 7, -+ .mux_core_mask = 0x1, -+ .pll_name = "pll_apll", -+}; ++ if (idx >= clk_data->clk_num) { ++ pr_err("%s: invalid clock index %u\n", __func__, idx); ++ return ERR_PTR(-EINVAL); ++ } + -+PNAME(mux_pll_p) = { "xin24m"}; -+PNAME(mux_usb480m_p) = { "xin24m", "usb480m_phy", "clk_rtc32k_pmu" }; -+PNAME(mux_ddrphy_p) = { "dpll_ddr", "gpll_ddr" }; -+PNAME(mux_ddrstdby_p) = { "clk_ddrphy1x", "clk_stdby_2wrap" }; -+PNAME(mux_gpll_dmycpll_usb480m_npll_p) = { "gpll", "dummy_cpll", "usb480m", "npll" }; -+PNAME(mux_gpll_dmycpll_usb480m_dmynpll_p) = { "gpll", "dummy_cpll", "usb480m", "dummy_npll" }; -+PNAME(mux_cpll_npll_p) = { "cpll", "npll" }; -+PNAME(mux_npll_cpll_p) = { "npll", "cpll" }; -+PNAME(mux_gpll_cpll_p) = { "gpll", "dummy_cpll" }; -+PNAME(mux_gpll_npll_p) = { "gpll", "dummy_npll" }; -+PNAME(mux_gpll_xin24m_p) = { "gpll", "xin24m"}; -+PNAME(mux_xin24m_gpll_p) = { "xin24m", "gpll"}; -+PNAME(mux_gpll_cpll_npll_p) = { "gpll", "dummy_cpll", "dummy_npll" }; -+PNAME(mux_gpll_cpll_npll_xin24m_p) = { "gpll", "dummy_cpll", "dummy_npll", "xin24m" }; -+PNAME(mux_gpll_xin24m_npll_p) = { "gpll", "xin24m", "dummy_npll"}; -+PNAME(mux_pdm_p) = { "clk_pdm_src", "clk_pdm_frac" }; -+PNAME(mux_i2s0_tx_p) = { "clk_i2s0_tx_src", "clk_i2s0_tx_frac", "mclk_i2s0_tx_in", "xin12m"}; -+PNAME(mux_i2s0_rx_p) = { "clk_i2s0_rx_src", "clk_i2s0_rx_frac", "mclk_i2s0_rx_in", "xin12m"}; -+PNAME(mux_i2s1_p) = { "clk_i2s1_src", "clk_i2s1_frac", "i2s1_clkin", "xin12m"}; -+PNAME(mux_i2s2_p) = { "clk_i2s2_src", "clk_i2s2_frac", "i2s2_clkin", "xin12m"}; -+PNAME(mux_i2s0_tx_out_p) = { "clk_i2s0_tx", "xin12m", "clk_i2s0_rx"}; -+PNAME(mux_i2s0_rx_out_p) = { "clk_i2s0_rx", "xin12m", "clk_i2s0_tx"}; -+PNAME(mux_i2s1_out_p) = { "clk_i2s1", "xin12m"}; -+PNAME(mux_i2s2_out_p) = { "clk_i2s2", "xin12m"}; -+PNAME(mux_i2s0_tx_rx_p) = { "clk_i2s0_tx_mux", "clk_i2s0_rx_mux"}; -+PNAME(mux_i2s0_rx_tx_p) = { "clk_i2s0_rx_mux", "clk_i2s0_tx_mux"}; -+PNAME(mux_uart_src_p) = { "gpll", "xin24m", "usb480m", "dummy_npll" }; -+PNAME(mux_uart1_p) = { "clk_uart1_src", "clk_uart1_np5", "clk_uart1_frac" }; -+PNAME(mux_uart2_p) = { "clk_uart2_src", "clk_uart2_np5", "clk_uart2_frac" }; -+PNAME(mux_uart3_p) = { "clk_uart3_src", "clk_uart3_np5", "clk_uart3_frac" }; -+PNAME(mux_uart4_p) = { "clk_uart4_src", "clk_uart4_np5", "clk_uart4_frac" }; -+PNAME(mux_uart5_p) = { "clk_uart5_src", "clk_uart5_np5", "clk_uart5_frac" }; -+PNAME(mux_cif_out_p) = { "xin24m", "dummy_cpll", "dummy_npll", "usb480m" }; -+PNAME(mux_dclk_vopb_p) = { "dclk_vopb_src", "dummy", "xin24m" }; -+PNAME(mux_dclk_vopl_p) = { "dclk_vopl_src", "dummy", "xin24m" }; -+PNAME(mux_nandc_p) = { "clk_nandc_div", "clk_nandc_div50" }; -+PNAME(mux_sdio_p) = { "clk_sdio_div", "clk_sdio_div50" }; -+PNAME(mux_emmc_p) = { "clk_emmc_div", "clk_emmc_div50" }; -+PNAME(mux_sdmmc_p) = { "clk_sdmmc_div", "clk_sdmmc_div50" }; -+PNAME(mux_gmac_p) = { "clk_gmac_src", "gmac_clkin" }; -+PNAME(mux_gmac_rmii_sel_p) = { "clk_gmac_rx_tx_div20", "clk_gmac_rx_tx_div2" }; -+PNAME(mux_rtc32k_pmu_p) = { "xin32k", "pmu_pvtm_32k", "clk_rtc32k_frac", }; -+PNAME(mux_wifi_pmu_p) = { "xin24m", "clk_wifi_pmu_src" }; -+PNAME(mux_uart0_pmu_p) = { "clk_uart0_pmu_src", "clk_uart0_np5", "clk_uart0_frac" }; -+PNAME(mux_usbphy_ref_p) = { "xin24m", "clk_ref24m_pmu" }; -+PNAME(mux_mipidsiphy_ref_p) = { "xin24m", "clk_ref24m_pmu" }; -+PNAME(mux_gpu_p) = { "clk_gpu_div", "clk_gpu_np5" }; ++ return clk_data->clks[idx]; ++} ++EXPORT_SYMBOL_GPL(of_clk_src_onecell_get); + -+static struct rockchip_pll_clock px30_pll_clks[] __initdata = { -+ [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, -+ CLK_IS_CRITICAL, PX30_PLL_CON(0), -+ PX30_MODE_CON, 0, 0, 0, px30_pll_rates), -+ [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p, -+ CLK_IS_CRITICAL, PX30_PLL_CON(8), -+ PX30_MODE_CON, 4, 1, 0, NULL), -+ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, -+ 0, PX30_PLL_CON(16), -+ PX30_MODE_CON, 2, 2, 0, px30_pll_rates), -+ [npll] = PLL(pll_rk3328, PLL_NPLL, "npll", mux_pll_p, -+ CLK_IS_CRITICAL, PX30_PLL_CON(24), -+ PX30_MODE_CON, 6, 4, 0, px30_pll_rates), -+}; ++struct clk_hw * ++of_clk_hw_onecell_get(struct of_phandle_args *clkspec, void *data) ++{ ++ struct clk_hw_onecell_data *hw_data = data; ++ unsigned int idx = clkspec->args[0]; + -+static struct rockchip_pll_clock px30_pmu_pll_clks[] __initdata = { -+ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, 0, PX30_PMU_PLL_CON(0), -+ PX30_PMU_MODE, 0, 3, 0, px30_pll_rates), -+}; ++ if (idx >= hw_data->num) { ++ pr_err("%s: invalid index %u\n", __func__, idx); ++ return ERR_PTR(-EINVAL); ++ } + -+#define MFLAGS CLK_MUX_HIWORD_MASK -+#define DFLAGS CLK_DIVIDER_HIWORD_MASK -+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) ++ return hw_data->hws[idx]; ++} ++EXPORT_SYMBOL_GPL(of_clk_hw_onecell_get); + -+static struct rockchip_clk_branch px30_pdm_fracmux __initdata = -+ MUX(0, "clk_pdm_mux", mux_pdm_p, CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(26), 15, 1, MFLAGS); ++/** ++ * of_clk_add_provider() - Register a clock provider for a node ++ * @np: Device node pointer associated with clock provider ++ * @clk_src_get: callback for decoding clock ++ * @data: context pointer for @clk_src_get callback. ++ * ++ * This function is *deprecated*. Use of_clk_add_hw_provider() instead. ++ */ ++int of_clk_add_provider(struct device_node *np, ++ struct clk *(*clk_src_get)(struct of_phandle_args *clkspec, ++ void *data), ++ void *data) ++{ ++ struct of_clk_provider *cp; ++ int ret; + -+static struct rockchip_clk_branch px30_i2s0_tx_fracmux __initdata = -+ MUX(SCLK_I2S0_TX_MUX, "clk_i2s0_tx_mux", mux_i2s0_tx_p, CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(28), 10, 2, MFLAGS); ++ if (!np) ++ return 0; + -+static struct rockchip_clk_branch px30_i2s0_rx_fracmux __initdata = -+ MUX(SCLK_I2S0_RX_MUX, "clk_i2s0_rx_mux", mux_i2s0_rx_p, CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(58), 10, 2, MFLAGS); ++ cp = kzalloc(sizeof(*cp), GFP_KERNEL); ++ if (!cp) ++ return -ENOMEM; + -+static struct rockchip_clk_branch px30_i2s1_fracmux __initdata = -+ MUX(0, "clk_i2s1_mux", mux_i2s1_p, CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(30), 10, 2, MFLAGS); ++ cp->node = of_node_get(np); ++ cp->data = data; ++ cp->get = clk_src_get; + -+static struct rockchip_clk_branch px30_i2s2_fracmux __initdata = -+ MUX(0, "clk_i2s2_mux", mux_i2s2_p, CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(32), 10, 2, MFLAGS); ++ mutex_lock(&of_clk_mutex); ++ list_add(&cp->link, &of_clk_providers); ++ mutex_unlock(&of_clk_mutex); ++ pr_debug("Added clock from %pOF\n", np); + -+static struct rockchip_clk_branch px30_uart1_fracmux __initdata = -+ MUX(0, "clk_uart1_mux", mux_uart1_p, CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(35), 14, 2, MFLAGS); ++ clk_core_reparent_orphans(); + -+static struct rockchip_clk_branch px30_uart2_fracmux __initdata = -+ MUX(0, "clk_uart2_mux", mux_uart2_p, CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(38), 14, 2, MFLAGS); ++ ret = of_clk_set_defaults(np, true); ++ if (ret < 0) ++ of_clk_del_provider(np); + -+static struct rockchip_clk_branch px30_uart3_fracmux __initdata = -+ MUX(0, "clk_uart3_mux", mux_uart3_p, CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(41), 14, 2, MFLAGS); ++ fwnode_dev_initialized(&np->fwnode, true); + -+static struct rockchip_clk_branch px30_uart4_fracmux __initdata = -+ MUX(0, "clk_uart4_mux", mux_uart4_p, CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(44), 14, 2, MFLAGS); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(of_clk_add_provider); + -+static struct rockchip_clk_branch px30_uart5_fracmux __initdata = -+ MUX(0, "clk_uart5_mux", mux_uart5_p, CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(47), 14, 2, MFLAGS); ++/** ++ * of_clk_add_hw_provider() - Register a clock provider for a node ++ * @np: Device node pointer associated with clock provider ++ * @get: callback for decoding clk_hw ++ * @data: context pointer for @get callback. ++ */ ++int of_clk_add_hw_provider(struct device_node *np, ++ struct clk_hw *(*get)(struct of_phandle_args *clkspec, ++ void *data), ++ void *data) ++{ ++ struct of_clk_provider *cp; ++ int ret; + -+static struct rockchip_clk_branch px30_rtc32k_pmu_fracmux __initdata = -+ MUX(SCLK_RTC32K_PMU, "clk_rtc32k_pmu", mux_rtc32k_pmu_p, CLK_SET_RATE_PARENT | CLK_IS_CRITICAL, -+ PX30_PMU_CLKSEL_CON(0), 14, 2, MFLAGS); ++ if (!np) ++ return 0; + -+static struct rockchip_clk_branch px30_uart0_pmu_fracmux __initdata = -+ MUX(0, "clk_uart0_pmu_mux", mux_uart0_pmu_p, CLK_SET_RATE_PARENT, -+ PX30_PMU_CLKSEL_CON(4), 14, 2, MFLAGS); ++ cp = kzalloc(sizeof(*cp), GFP_KERNEL); ++ if (!cp) ++ return -ENOMEM; + -+static struct rockchip_clk_branch px30_clk_branches[] __initdata = { -+ /* -+ * Clock-Architecture Diagram 1 -+ */ ++ cp->node = of_node_get(np); ++ cp->data = data; ++ cp->get_hw = get; + -+ MUX(USB480M, "usb480m", mux_usb480m_p, CLK_SET_RATE_PARENT, -+ PX30_MODE_CON, 8, 2, MFLAGS), -+ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), ++ mutex_lock(&of_clk_mutex); ++ list_add(&cp->link, &of_clk_providers); ++ mutex_unlock(&of_clk_mutex); ++ pr_debug("Added clk_hw provider from %pOF\n", np); + -+ /* -+ * Clock-Architecture Diagram 3 -+ */ ++ clk_core_reparent_orphans(); + -+ /* PD_CORE */ -+ GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(0), 0, GFLAGS), -+ GATE(0, "gpll_core", "gpll", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE_NOMUX(0, "pclk_dbg", "armclk", CLK_IGNORE_UNUSED, -+ PX30_CLKSEL_CON(0), 8, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ PX30_CLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE_NOMUX(0, "aclk_core", "armclk", CLK_IGNORE_UNUSED, -+ PX30_CLKSEL_CON(0), 12, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ PX30_CLKGATE_CON(0), 1, GFLAGS), -+ GATE(0, "aclk_core_niu", "aclk_core", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(0), 4, GFLAGS), -+ GATE(0, "aclk_core_prf", "aclk_core", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(17), 5, GFLAGS), -+ GATE(0, "pclk_dbg_niu", "pclk_dbg", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(0), 5, GFLAGS), -+ GATE(0, "pclk_core_dbg", "pclk_dbg", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(0), 6, GFLAGS), -+ GATE(0, "pclk_core_grf", "pclk_dbg", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(17), 6, GFLAGS), ++ ret = of_clk_set_defaults(np, true); ++ if (ret < 0) ++ of_clk_del_provider(np); + -+ GATE(0, "clk_jtag", "jtag_clkin", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(0), 3, GFLAGS), -+ GATE(SCLK_PVTM, "clk_pvtm", "xin24m", 0, -+ PX30_CLKGATE_CON(17), 4, GFLAGS), ++ fwnode_dev_initialized(&np->fwnode, true); + -+ /* PD_GPU */ -+ GATE(SCLK_GPU, "clk_gpu", "clk_gpu_src", 0, -+ PX30_CLKGATE_CON(0), 10, GFLAGS), -+ COMPOSITE_NOMUX(0, "aclk_gpu", "clk_gpu", CLK_IGNORE_UNUSED, -+ PX30_CLKSEL_CON(1), 13, 2, DFLAGS, -+ PX30_CLKGATE_CON(17), 10, GFLAGS), -+ GATE(0, "aclk_gpu_niu", "aclk_gpu", CLK_IS_CRITICAL, -+ PX30_CLKGATE_CON(0), 11, GFLAGS), -+ GATE(0, "aclk_gpu_prf", "aclk_gpu", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(17), 8, GFLAGS), -+ GATE(0, "pclk_gpu_grf", "aclk_gpu", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(17), 9, GFLAGS), ++ return ret; ++} ++EXPORT_SYMBOL_GPL(of_clk_add_hw_provider); + -+ /* -+ * Clock-Architecture Diagram 4 -+ */ ++static void devm_of_clk_release_provider(struct device *dev, void *res) ++{ ++ of_clk_del_provider(*(struct device_node **)res); ++} + -+ /* PD_DDR */ -+ GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(0), 7, GFLAGS), -+ GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(0), 13, GFLAGS), -+ COMPOSITE_DDRCLK(SCLK_DDRCLK, "sclk_ddrc", mux_ddrphy_p, -+ CLK_IGNORE_UNUSED, PX30_CLKSEL_CON(2), 7, 1, 0, 3, -+ ROCKCHIP_DDRCLK_SIP_V2), -+ COMPOSITE_NODIV(0, "clk_ddrstdby", mux_ddrstdby_p, CLK_IGNORE_UNUSED, -+ PX30_CLKSEL_CON(2), 4, 1, MFLAGS, -+ PX30_CLKGATE_CON(1), 13, GFLAGS), -+ GATE(0, "aclk_split", "clk_ddrphy1x", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(1), 15, GFLAGS), -+ GATE(0, "clk_msch", "clk_ddrphy1x", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(1), 8, GFLAGS), -+ GATE(0, "aclk_ddrc", "clk_ddrphy1x", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(1), 5, GFLAGS), -+ GATE(0, "clk_core_ddrc", "clk_ddrphy1x", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(1), 6, GFLAGS), -+ GATE(0, "aclk_cmd_buff", "clk_ddrphy1x", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(1), 6, GFLAGS), -+ GATE(0, "clk_ddrmon", "clk_ddrphy1x", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(1), 11, GFLAGS), ++/* ++ * We allow a child device to use its parent device as the clock provider node ++ * for cases like MFD sub-devices where the child device driver wants to use ++ * devm_*() APIs but not list the device in DT as a sub-node. ++ */ ++static struct device_node *get_clk_provider_node(struct device *dev) ++{ ++ struct device_node *np, *parent_np; + -+ GATE(0, "clk_ddrmon_timer", "xin24m", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(0), 15, GFLAGS), ++ np = dev->of_node; ++ parent_np = dev->parent ? dev->parent->of_node : NULL; + -+ COMPOSITE_NOMUX(PCLK_DDR, "pclk_ddr", "gpll", CLK_IGNORE_UNUSED, -+ PX30_CLKSEL_CON(2), 8, 5, DFLAGS, -+ PX30_CLKGATE_CON(1), 1, GFLAGS), -+ GATE(0, "pclk_ddrmon", "pclk_ddr", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(1), 10, GFLAGS), -+ GATE(0, "pclk_ddrc", "pclk_ddr", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(1), 7, GFLAGS), -+ GATE(0, "pclk_msch", "pclk_ddr", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(1), 9, GFLAGS), -+ GATE(0, "pclk_stdby", "pclk_ddr", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(1), 12, GFLAGS), -+ GATE(0, "pclk_ddr_grf", "pclk_ddr", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(1), 14, GFLAGS), -+ GATE(0, "pclk_cmdbuff", "pclk_ddr", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(1), 3, GFLAGS), ++ if (!of_property_present(np, "#clock-cells")) ++ if (of_property_present(parent_np, "#clock-cells")) ++ np = parent_np; + -+ /* -+ * Clock-Architecture Diagram 5 -+ */ ++ return np; ++} + -+ /* PD_VI */ -+ COMPOSITE(ACLK_VI_PRE, "aclk_vi_pre", mux_gpll_cpll_npll_p, 0, -+ PX30_CLKSEL_CON(11), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ PX30_CLKGATE_CON(4), 8, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_VI_PRE, "hclk_vi_pre", "aclk_vi_pre", 0, -+ PX30_CLKSEL_CON(11), 8, 4, DFLAGS, -+ PX30_CLKGATE_CON(4), 12, GFLAGS), -+ COMPOSITE(SCLK_ISP, "clk_isp", mux_gpll_cpll_npll_p, 0, -+ PX30_CLKSEL_CON(12), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ PX30_CLKGATE_CON(4), 9, GFLAGS), -+ COMPOSITE(SCLK_CIF_OUT, "clk_cif_out", mux_cif_out_p, 0, -+ PX30_CLKSEL_CON(13), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ PX30_CLKGATE_CON(4), 11, GFLAGS), -+ GATE(PCLK_ISP, "pclkin_isp", "ext_pclkin", 0, -+ PX30_CLKGATE_CON(4), 13, GFLAGS), -+ GATE(PCLK_CIF, "pclkin_cif", "ext_pclkin", 0, -+ PX30_CLKGATE_CON(4), 14, GFLAGS), ++/** ++ * devm_of_clk_add_hw_provider() - Managed clk provider node registration ++ * @dev: Device acting as the clock provider (used for DT node and lifetime) ++ * @get: callback for decoding clk_hw ++ * @data: context pointer for @get callback ++ * ++ * Registers clock provider for given device's node. If the device has no DT ++ * node or if the device node lacks of clock provider information (#clock-cells) ++ * then the parent device's node is scanned for this information. If parent node ++ * has the #clock-cells then it is used in registration. Provider is ++ * automatically released at device exit. ++ * ++ * Return: 0 on success or an errno on failure. ++ */ ++int devm_of_clk_add_hw_provider(struct device *dev, ++ struct clk_hw *(*get)(struct of_phandle_args *clkspec, ++ void *data), ++ void *data) ++{ ++ struct device_node **ptr, *np; ++ int ret; + -+ /* -+ * Clock-Architecture Diagram 6 -+ */ ++ ptr = devres_alloc(devm_of_clk_release_provider, sizeof(*ptr), ++ GFP_KERNEL); ++ if (!ptr) ++ return -ENOMEM; + -+ /* PD_VO */ -+ COMPOSITE(ACLK_VO_PRE, "aclk_vo_pre", mux_gpll_cpll_npll_p, 0, -+ PX30_CLKSEL_CON(3), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ PX30_CLKGATE_CON(2), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_VO_PRE, "hclk_vo_pre", "aclk_vo_pre", 0, -+ PX30_CLKSEL_CON(3), 8, 4, DFLAGS, -+ PX30_CLKGATE_CON(2), 12, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_VO_PRE, "pclk_vo_pre", "aclk_vo_pre", 0, -+ PX30_CLKSEL_CON(3), 12, 4, DFLAGS, -+ PX30_CLKGATE_CON(2), 13, GFLAGS), -+ COMPOSITE(SCLK_RGA_CORE, "clk_rga_core", mux_gpll_cpll_npll_p, 0, -+ PX30_CLKSEL_CON(4), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ PX30_CLKGATE_CON(2), 1, GFLAGS), ++ np = get_clk_provider_node(dev); ++ ret = of_clk_add_hw_provider(np, get, data); ++ if (!ret) { ++ *ptr = np; ++ devres_add(dev, ptr); ++ } else { ++ devres_free(ptr); ++ } + -+ COMPOSITE(SCLK_VOPB_PWM, "clk_vopb_pwm", mux_gpll_xin24m_p, 0, -+ PX30_CLKSEL_CON(7), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ PX30_CLKGATE_CON(2), 5, GFLAGS), -+ COMPOSITE(0, "dclk_vopb_src", mux_cpll_npll_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ PX30_CLKSEL_CON(5), 11, 1, MFLAGS, 0, 8, DFLAGS, -+ PX30_CLKGATE_CON(2), 2, GFLAGS), -+ COMPOSITE_NODIV(DCLK_VOPB, "dclk_vopb", mux_dclk_vopb_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ PX30_CLKSEL_CON(5), 14, 2, MFLAGS, -+ PX30_CLKGATE_CON(2), 4, GFLAGS), -+ COMPOSITE(0, "dclk_vopl_src", mux_npll_cpll_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ PX30_CLKSEL_CON(8), 11, 1, MFLAGS, 0, 8, DFLAGS, -+ PX30_CLKGATE_CON(2), 6, GFLAGS), -+ COMPOSITE_NODIV(DCLK_VOPL, "dclk_vopl", mux_dclk_vopl_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ PX30_CLKSEL_CON(8), 14, 2, MFLAGS, -+ PX30_CLKGATE_CON(2), 8, GFLAGS), ++ return ret; ++} ++EXPORT_SYMBOL_GPL(devm_of_clk_add_hw_provider); + -+ /* PD_VPU */ -+ COMPOSITE(0, "aclk_vpu_pre", mux_gpll_cpll_npll_p, 0, -+ PX30_CLKSEL_CON(10), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ PX30_CLKGATE_CON(4), 0, GFLAGS), -+ COMPOSITE_NOMUX(0, "hclk_vpu_pre", "aclk_vpu_pre", 0, -+ PX30_CLKSEL_CON(10), 8, 4, DFLAGS, -+ PX30_CLKGATE_CON(4), 2, GFLAGS), -+ COMPOSITE(SCLK_CORE_VPU, "sclk_core_vpu", mux_gpll_cpll_npll_p, 0, -+ PX30_CLKSEL_CON(13), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ PX30_CLKGATE_CON(4), 1, GFLAGS), ++/** ++ * of_clk_del_provider() - Remove a previously registered clock provider ++ * @np: Device node pointer associated with clock provider ++ */ ++void of_clk_del_provider(struct device_node *np) ++{ ++ struct of_clk_provider *cp; + -+ /* -+ * Clock-Architecture Diagram 7 -+ */ ++ if (!np) ++ return; + -+ COMPOSITE_NODIV(ACLK_PERI_SRC, "aclk_peri_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ PX30_CLKSEL_CON(14), 15, 1, MFLAGS, -+ PX30_CLKGATE_CON(5), 7, GFLAGS), -+ COMPOSITE_NOMUX(ACLK_PERI_PRE, "aclk_peri_pre", "aclk_peri_src", CLK_IS_CRITICAL, -+ PX30_CLKSEL_CON(14), 0, 5, DFLAGS, -+ PX30_CLKGATE_CON(5), 8, GFLAGS), -+ DIV(HCLK_PERI_PRE, "hclk_peri_pre", "aclk_peri_src", CLK_IS_CRITICAL, -+ PX30_CLKSEL_CON(14), 8, 5, DFLAGS), ++ mutex_lock(&of_clk_mutex); ++ list_for_each_entry(cp, &of_clk_providers, link) { ++ if (cp->node == np) { ++ list_del(&cp->link); ++ fwnode_dev_initialized(&np->fwnode, false); ++ of_node_put(cp->node); ++ kfree(cp); ++ break; ++ } ++ } ++ mutex_unlock(&of_clk_mutex); ++} ++EXPORT_SYMBOL_GPL(of_clk_del_provider); + -+ /* PD_MMC_NAND */ -+ GATE(HCLK_MMC_NAND, "hclk_mmc_nand", "hclk_peri_pre", 0, -+ PX30_CLKGATE_CON(6), 0, GFLAGS), -+ COMPOSITE(SCLK_NANDC_DIV, "clk_nandc_div", mux_gpll_cpll_npll_p, 0, -+ PX30_CLKSEL_CON(15), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ PX30_CLKGATE_CON(5), 11, GFLAGS), -+ COMPOSITE(SCLK_NANDC_DIV50, "clk_nandc_div50", mux_gpll_cpll_npll_p, 0, -+ PX30_CLKSEL_CON(15), 6, 2, MFLAGS, 8, 5, DFLAGS, -+ PX30_CLKGATE_CON(5), 12, GFLAGS), -+ COMPOSITE_NODIV(SCLK_NANDC, "clk_nandc", mux_nandc_p, -+ CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ PX30_CLKSEL_CON(15), 15, 1, MFLAGS, -+ PX30_CLKGATE_CON(5), 13, GFLAGS), ++/** ++ * of_parse_clkspec() - Parse a DT clock specifier for a given device node ++ * @np: device node to parse clock specifier from ++ * @index: index of phandle to parse clock out of. If index < 0, @name is used ++ * @name: clock name to find and parse. If name is NULL, the index is used ++ * @out_args: Result of parsing the clock specifier ++ * ++ * Parses a device node's "clocks" and "clock-names" properties to find the ++ * phandle and cells for the index or name that is desired. The resulting clock ++ * specifier is placed into @out_args, or an errno is returned when there's a ++ * parsing error. The @index argument is ignored if @name is non-NULL. ++ * ++ * Example: ++ * ++ * phandle1: clock-controller@1 { ++ * #clock-cells = <2>; ++ * } ++ * ++ * phandle2: clock-controller@2 { ++ * #clock-cells = <1>; ++ * } ++ * ++ * clock-consumer@3 { ++ * clocks = <&phandle1 1 2 &phandle2 3>; ++ * clock-names = "name1", "name2"; ++ * } ++ * ++ * To get a device_node for `clock-controller@2' node you may call this ++ * function a few different ways: ++ * ++ * of_parse_clkspec(clock-consumer@3, -1, "name2", &args); ++ * of_parse_clkspec(clock-consumer@3, 1, NULL, &args); ++ * of_parse_clkspec(clock-consumer@3, 1, "name2", &args); ++ * ++ * Return: 0 upon successfully parsing the clock specifier. Otherwise, -ENOENT ++ * if @name is NULL or -EINVAL if @name is non-NULL and it can't be found in ++ * the "clock-names" property of @np. ++ */ ++static int of_parse_clkspec(const struct device_node *np, int index, ++ const char *name, struct of_phandle_args *out_args) ++{ ++ int ret = -ENOENT; + -+ COMPOSITE(SCLK_SDIO_DIV, "clk_sdio_div", mux_gpll_cpll_npll_xin24m_p, 0, -+ PX30_CLKSEL_CON(18), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ PX30_CLKGATE_CON(6), 1, GFLAGS), -+ COMPOSITE_DIV_OFFSET(SCLK_SDIO_DIV50, "clk_sdio_div50", -+ mux_gpll_cpll_npll_xin24m_p, 0, -+ PX30_CLKSEL_CON(18), 14, 2, MFLAGS, -+ PX30_CLKSEL_CON(19), 0, 8, DFLAGS, -+ PX30_CLKGATE_CON(6), 2, GFLAGS), -+ COMPOSITE_NODIV(SCLK_SDIO, "clk_sdio", mux_sdio_p, -+ CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ PX30_CLKSEL_CON(19), 15, 1, MFLAGS, -+ PX30_CLKGATE_CON(6), 3, GFLAGS), ++ /* Walk up the tree of devices looking for a clock property that matches */ ++ while (np) { ++ /* ++ * For named clocks, first look up the name in the ++ * "clock-names" property. If it cannot be found, then index ++ * will be an error code and of_parse_phandle_with_args() will ++ * return -EINVAL. ++ */ ++ if (name) ++ index = of_property_match_string(np, "clock-names", name); ++ ret = of_parse_phandle_with_args(np, "clocks", "#clock-cells", ++ index, out_args); ++ if (!ret) ++ break; ++ if (name && index >= 0) ++ break; + -+ COMPOSITE(SCLK_EMMC_DIV, "clk_emmc_div", mux_gpll_cpll_npll_xin24m_p, 0, -+ PX30_CLKSEL_CON(20), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ PX30_CLKGATE_CON(6), 4, GFLAGS), -+ COMPOSITE_DIV_OFFSET(SCLK_EMMC_DIV50, "clk_emmc_div50", mux_gpll_cpll_npll_xin24m_p, 0, -+ PX30_CLKSEL_CON(20), 14, 2, MFLAGS, -+ PX30_CLKSEL_CON(21), 0, 8, DFLAGS, -+ PX30_CLKGATE_CON(6), 5, GFLAGS), -+ COMPOSITE_NODIV(SCLK_EMMC, "clk_emmc", mux_emmc_p, -+ CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ PX30_CLKSEL_CON(21), 15, 1, MFLAGS, -+ PX30_CLKGATE_CON(6), 6, GFLAGS), ++ /* ++ * No matching clock found on this node. If the parent node ++ * has a "clock-ranges" property, then we can try one of its ++ * clocks. ++ */ ++ np = np->parent; ++ if (np && !of_get_property(np, "clock-ranges", NULL)) ++ break; ++ index = 0; ++ } + -+ COMPOSITE(SCLK_SFC, "clk_sfc", mux_gpll_cpll_p, 0, -+ PX30_CLKSEL_CON(22), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ PX30_CLKGATE_CON(6), 7, GFLAGS), ++ return ret; ++} + -+ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "clk_sdmmc", -+ PX30_SDMMC_CON0, 1), -+ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "clk_sdmmc", -+ PX30_SDMMC_CON1, 1), ++static struct clk_hw * ++__of_clk_get_hw_from_provider(struct of_clk_provider *provider, ++ struct of_phandle_args *clkspec) ++{ ++ struct clk *clk; + -+ MMC(SCLK_SDIO_DRV, "sdio_drv", "clk_sdio", -+ PX30_SDIO_CON0, 1), -+ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "clk_sdio", -+ PX30_SDIO_CON1, 1), ++ if (provider->get_hw) ++ return provider->get_hw(clkspec, provider->data); + -+ MMC(SCLK_EMMC_DRV, "emmc_drv", "clk_emmc", -+ PX30_EMMC_CON0, 1), -+ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "clk_emmc", -+ PX30_EMMC_CON1, 1), ++ clk = provider->get(clkspec, provider->data); ++ if (IS_ERR(clk)) ++ return ERR_CAST(clk); ++ return __clk_get_hw(clk); ++} + -+ /* PD_SDCARD */ -+ GATE(0, "hclk_sdmmc_pre", "hclk_peri_pre", 0, -+ PX30_CLKGATE_CON(6), 12, GFLAGS), -+ COMPOSITE(SCLK_SDMMC_DIV, "clk_sdmmc_div", mux_gpll_cpll_npll_xin24m_p, 0, -+ PX30_CLKSEL_CON(16), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ PX30_CLKGATE_CON(6), 13, GFLAGS), -+ COMPOSITE_DIV_OFFSET(SCLK_SDMMC_DIV50, "clk_sdmmc_div50", mux_gpll_cpll_npll_xin24m_p, 0, -+ PX30_CLKSEL_CON(16), 14, 2, MFLAGS, -+ PX30_CLKSEL_CON(17), 0, 8, DFLAGS, -+ PX30_CLKGATE_CON(6), 14, GFLAGS), -+ COMPOSITE_NODIV(SCLK_SDMMC, "clk_sdmmc", mux_sdmmc_p, -+ CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ PX30_CLKSEL_CON(17), 15, 1, MFLAGS, -+ PX30_CLKGATE_CON(6), 15, GFLAGS), ++static struct clk_hw * ++of_clk_get_hw_from_clkspec(struct of_phandle_args *clkspec) ++{ ++ struct of_clk_provider *provider; ++ struct clk_hw *hw = ERR_PTR(-EPROBE_DEFER); + -+ /* PD_USB */ -+ GATE(HCLK_USB, "hclk_usb", "hclk_peri_pre", CLK_IS_CRITICAL, -+ PX30_CLKGATE_CON(7), 2, GFLAGS), -+ GATE(SCLK_OTG_ADP, "clk_otg_adp", "clk_rtc32k_pmu", 0, -+ PX30_CLKGATE_CON(7), 3, GFLAGS), ++ if (!clkspec) ++ return ERR_PTR(-EINVAL); + -+ /* PD_GMAC */ -+ COMPOSITE(SCLK_GMAC_SRC, "clk_gmac_src", mux_gpll_cpll_npll_p, 0, -+ PX30_CLKSEL_CON(22), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ PX30_CLKGATE_CON(7), 11, GFLAGS), -+ MUX(SCLK_GMAC, "clk_gmac", mux_gmac_p, CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(23), 6, 1, MFLAGS), -+ GATE(SCLK_MAC_REF, "clk_mac_ref", "clk_gmac", 0, -+ PX30_CLKGATE_CON(7), 15, GFLAGS), -+ GATE(SCLK_GMAC_RX_TX, "clk_gmac_rx_tx", "clk_gmac", 0, -+ PX30_CLKGATE_CON(7), 13, GFLAGS), -+ FACTOR(0, "clk_gmac_rx_tx_div2", "clk_gmac_rx_tx", 0, 1, 2), -+ FACTOR(0, "clk_gmac_rx_tx_div20", "clk_gmac_rx_tx", 0, 1, 20), -+ MUX(SCLK_GMAC_RMII, "clk_gmac_rmii_sel", mux_gmac_rmii_sel_p, CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(23), 7, 1, MFLAGS), ++ /* Check if node in clkspec is in disabled/fail state */ ++ if (!of_device_is_available(clkspec->np)) ++ return ERR_PTR(-ENOENT); + -+ GATE(0, "aclk_gmac_pre", "aclk_peri_pre", 0, -+ PX30_CLKGATE_CON(7), 10, GFLAGS), -+ COMPOSITE_NOMUX(0, "pclk_gmac_pre", "aclk_gmac_pre", 0, -+ PX30_CLKSEL_CON(23), 0, 4, DFLAGS, -+ PX30_CLKGATE_CON(7), 12, GFLAGS), ++ mutex_lock(&of_clk_mutex); ++ list_for_each_entry(provider, &of_clk_providers, link) { ++ if (provider->node == clkspec->np) { ++ hw = __of_clk_get_hw_from_provider(provider, clkspec); ++ if (!IS_ERR(hw)) ++ break; ++ } ++ } ++ mutex_unlock(&of_clk_mutex); + -+ COMPOSITE(SCLK_MAC_OUT, "clk_mac_out", mux_gpll_cpll_npll_p, 0, -+ PX30_CLKSEL_CON(12), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ PX30_CLKGATE_CON(8), 5, GFLAGS), ++ return hw; ++} + -+ /* -+ * Clock-Architecture Diagram 8 -+ */ ++/** ++ * of_clk_get_from_provider() - Lookup a clock from a clock provider ++ * @clkspec: pointer to a clock specifier data structure ++ * ++ * This function looks up a struct clk from the registered list of clock ++ * providers, an input is a clock specifier data structure as returned ++ * from the of_parse_phandle_with_args() function call. ++ */ ++struct clk *of_clk_get_from_provider(struct of_phandle_args *clkspec) ++{ ++ struct clk_hw *hw = of_clk_get_hw_from_clkspec(clkspec); + -+ /* PD_BUS */ -+ COMPOSITE_NODIV(ACLK_BUS_SRC, "aclk_bus_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ PX30_CLKSEL_CON(23), 15, 1, MFLAGS, -+ PX30_CLKGATE_CON(8), 6, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_BUS_PRE, "hclk_bus_pre", "aclk_bus_src", CLK_IS_CRITICAL, -+ PX30_CLKSEL_CON(24), 0, 5, DFLAGS, -+ PX30_CLKGATE_CON(8), 8, GFLAGS), -+ COMPOSITE_NOMUX(ACLK_BUS_PRE, "aclk_bus_pre", "aclk_bus_src", CLK_IS_CRITICAL, -+ PX30_CLKSEL_CON(23), 8, 5, DFLAGS, -+ PX30_CLKGATE_CON(8), 7, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_BUS_PRE, "pclk_bus_pre", "aclk_bus_pre", CLK_IS_CRITICAL, -+ PX30_CLKSEL_CON(24), 8, 2, DFLAGS, -+ PX30_CLKGATE_CON(8), 9, GFLAGS), -+ GATE(0, "pclk_top_pre", "pclk_bus_pre", CLK_IS_CRITICAL, -+ PX30_CLKGATE_CON(8), 10, GFLAGS), ++ return clk_hw_create_clk(NULL, hw, NULL, __func__); ++} ++EXPORT_SYMBOL_GPL(of_clk_get_from_provider); + -+ COMPOSITE(0, "clk_pdm_src", mux_gpll_xin24m_npll_p, 0, -+ PX30_CLKSEL_CON(26), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ PX30_CLKGATE_CON(9), 9, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_pdm_frac", "clk_pdm_src", CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(27), 0, -+ PX30_CLKGATE_CON(9), 10, GFLAGS, -+ &px30_pdm_fracmux), -+ GATE(SCLK_PDM, "clk_pdm", "clk_pdm_mux", CLK_SET_RATE_PARENT, -+ PX30_CLKGATE_CON(9), 11, GFLAGS), ++struct clk_hw *of_clk_get_hw(struct device_node *np, int index, ++ const char *con_id) ++{ ++ int ret; ++ struct clk_hw *hw; ++ struct of_phandle_args clkspec; + -+ COMPOSITE(0, "clk_i2s0_tx_src", mux_gpll_npll_p, 0, -+ PX30_CLKSEL_CON(28), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ PX30_CLKGATE_CON(9), 12, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s0_tx_frac", "clk_i2s0_tx_src", CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(29), 0, -+ PX30_CLKGATE_CON(9), 13, GFLAGS, -+ &px30_i2s0_tx_fracmux), -+ COMPOSITE_NODIV(SCLK_I2S0_TX, "clk_i2s0_tx", mux_i2s0_tx_rx_p, CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(28), 12, 1, MFLAGS, -+ PX30_CLKGATE_CON(9), 14, GFLAGS), -+ COMPOSITE_NODIV(0, "clk_i2s0_tx_out_pre", mux_i2s0_tx_out_p, CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(28), 14, 2, MFLAGS, -+ PX30_CLKGATE_CON(9), 15, GFLAGS), -+ GATE(SCLK_I2S0_TX_OUT, "clk_i2s0_tx_out", "clk_i2s0_tx_out_pre", CLK_SET_RATE_PARENT, -+ PX30_CLKGATE_CON(10), 8, CLK_GATE_HIWORD_MASK), ++ ret = of_parse_clkspec(np, index, con_id, &clkspec); ++ if (ret) ++ return ERR_PTR(ret); + -+ COMPOSITE(0, "clk_i2s0_rx_src", mux_gpll_npll_p, 0, -+ PX30_CLKSEL_CON(58), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ PX30_CLKGATE_CON(17), 0, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s0_rx_frac", "clk_i2s0_rx_src", CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(59), 0, -+ PX30_CLKGATE_CON(17), 1, GFLAGS, -+ &px30_i2s0_rx_fracmux), -+ COMPOSITE_NODIV(SCLK_I2S0_RX, "clk_i2s0_rx", mux_i2s0_rx_tx_p, CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(58), 12, 1, MFLAGS, -+ PX30_CLKGATE_CON(17), 2, GFLAGS), -+ COMPOSITE_NODIV(0, "clk_i2s0_rx_out_pre", mux_i2s0_rx_out_p, 0, -+ PX30_CLKSEL_CON(58), 14, 2, MFLAGS, -+ PX30_CLKGATE_CON(17), 3, GFLAGS), -+ GATE(SCLK_I2S0_RX_OUT, "clk_i2s0_rx_out", "clk_i2s0_rx_out_pre", CLK_SET_RATE_PARENT, -+ PX30_CLKGATE_CON(10), 11, CLK_GATE_HIWORD_MASK), ++ hw = of_clk_get_hw_from_clkspec(&clkspec); ++ of_node_put(clkspec.np); + -+ COMPOSITE(0, "clk_i2s1_src", mux_gpll_npll_p, 0, -+ PX30_CLKSEL_CON(30), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ PX30_CLKGATE_CON(10), 0, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s1_frac", "clk_i2s1_src", CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(31), 0, -+ PX30_CLKGATE_CON(10), 1, GFLAGS, -+ &px30_i2s1_fracmux), -+ GATE(SCLK_I2S1, "clk_i2s1", "clk_i2s1_mux", CLK_SET_RATE_PARENT, -+ PX30_CLKGATE_CON(10), 2, GFLAGS), -+ COMPOSITE_NODIV(0, "clk_i2s1_out_pre", mux_i2s1_out_p, CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(30), 15, 1, MFLAGS, -+ PX30_CLKGATE_CON(10), 3, GFLAGS), -+ GATE(SCLK_I2S1_OUT, "clk_i2s1_out", "clk_i2s1_out_pre", CLK_SET_RATE_PARENT, -+ PX30_CLKGATE_CON(10), 9, CLK_GATE_HIWORD_MASK), ++ return hw; ++} + -+ COMPOSITE(0, "clk_i2s2_src", mux_gpll_npll_p, 0, -+ PX30_CLKSEL_CON(32), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ PX30_CLKGATE_CON(10), 4, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s2_frac", "clk_i2s2_src", CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(33), 0, -+ PX30_CLKGATE_CON(10), 5, GFLAGS, -+ &px30_i2s2_fracmux), -+ GATE(SCLK_I2S2, "clk_i2s2", "clk_i2s2_mux", CLK_SET_RATE_PARENT, -+ PX30_CLKGATE_CON(10), 6, GFLAGS), -+ COMPOSITE_NODIV(0, "clk_i2s2_out_pre", mux_i2s2_out_p, 0, -+ PX30_CLKSEL_CON(32), 15, 1, MFLAGS, -+ PX30_CLKGATE_CON(10), 7, GFLAGS), -+ GATE(SCLK_I2S2_OUT, "clk_i2s2_out", "clk_i2s2_out_pre", CLK_SET_RATE_PARENT, -+ PX30_CLKGATE_CON(10), 10, CLK_GATE_HIWORD_MASK), ++static struct clk *__of_clk_get(struct device_node *np, ++ int index, const char *dev_id, ++ const char *con_id) ++{ ++ struct clk_hw *hw = of_clk_get_hw(np, index, con_id); + -+ COMPOSITE(SCLK_UART1_SRC, "clk_uart1_src", mux_uart_src_p, CLK_SET_RATE_NO_REPARENT, -+ PX30_CLKSEL_CON(34), 14, 2, MFLAGS, 0, 5, DFLAGS, -+ PX30_CLKGATE_CON(10), 12, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart1_np5", "clk_uart1_src", 0, -+ PX30_CLKSEL_CON(35), 0, 5, DFLAGS, -+ PX30_CLKGATE_CON(10), 13, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(36), 0, -+ PX30_CLKGATE_CON(10), 14, GFLAGS, -+ &px30_uart1_fracmux), -+ GATE(SCLK_UART1, "clk_uart1", "clk_uart1_mux", CLK_SET_RATE_PARENT, -+ PX30_CLKGATE_CON(10), 15, GFLAGS), ++ return clk_hw_create_clk(NULL, hw, dev_id, con_id); ++} + -+ COMPOSITE(SCLK_UART2_SRC, "clk_uart2_src", mux_uart_src_p, 0, -+ PX30_CLKSEL_CON(37), 14, 2, MFLAGS, 0, 5, DFLAGS, -+ PX30_CLKGATE_CON(11), 0, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart2_np5", "clk_uart2_src", 0, -+ PX30_CLKSEL_CON(38), 0, 5, DFLAGS, -+ PX30_CLKGATE_CON(11), 1, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(39), 0, -+ PX30_CLKGATE_CON(11), 2, GFLAGS, -+ &px30_uart2_fracmux), -+ GATE(SCLK_UART2, "clk_uart2", "clk_uart2_mux", CLK_SET_RATE_PARENT | CLK_IS_CRITICAL, -+ PX30_CLKGATE_CON(11), 3, GFLAGS), ++struct clk *of_clk_get(struct device_node *np, int index) ++{ ++ return __of_clk_get(np, index, np->full_name, NULL); ++} ++EXPORT_SYMBOL(of_clk_get); + -+ COMPOSITE(0, "clk_uart3_src", mux_uart_src_p, 0, -+ PX30_CLKSEL_CON(40), 14, 2, MFLAGS, 0, 5, DFLAGS, -+ PX30_CLKGATE_CON(11), 4, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart3_np5", "clk_uart3_src", 0, -+ PX30_CLKSEL_CON(41), 0, 5, DFLAGS, -+ PX30_CLKGATE_CON(11), 5, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(42), 0, -+ PX30_CLKGATE_CON(11), 6, GFLAGS, -+ &px30_uart3_fracmux), -+ GATE(SCLK_UART3, "clk_uart3", "clk_uart3_mux", CLK_SET_RATE_PARENT, -+ PX30_CLKGATE_CON(11), 7, GFLAGS), ++/** ++ * of_clk_get_by_name() - Parse and lookup a clock referenced by a device node ++ * @np: pointer to clock consumer node ++ * @name: name of consumer's clock input, or NULL for the first clock reference ++ * ++ * This function parses the clocks and clock-names properties, ++ * and uses them to look up the struct clk from the registered list of clock ++ * providers. ++ */ ++struct clk *of_clk_get_by_name(struct device_node *np, const char *name) ++{ ++ if (!np) ++ return ERR_PTR(-ENOENT); + -+ COMPOSITE(0, "clk_uart4_src", mux_uart_src_p, 0, -+ PX30_CLKSEL_CON(43), 14, 2, MFLAGS, 0, 5, DFLAGS, -+ PX30_CLKGATE_CON(11), 8, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart4_np5", "clk_uart4_src", 0, -+ PX30_CLKSEL_CON(44), 0, 5, DFLAGS, -+ PX30_CLKGATE_CON(11), 9, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(45), 0, -+ PX30_CLKGATE_CON(11), 10, GFLAGS, -+ &px30_uart4_fracmux), -+ GATE(SCLK_UART4, "clk_uart4", "clk_uart4_mux", CLK_SET_RATE_PARENT, -+ PX30_CLKGATE_CON(11), 11, GFLAGS), ++ return __of_clk_get(np, 0, np->full_name, name); ++} ++EXPORT_SYMBOL(of_clk_get_by_name); + -+ COMPOSITE(0, "clk_uart5_src", mux_uart_src_p, 0, -+ PX30_CLKSEL_CON(46), 14, 2, MFLAGS, 0, 5, DFLAGS, -+ PX30_CLKGATE_CON(11), 12, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart5_np5", "clk_uart5_src", 0, -+ PX30_CLKSEL_CON(47), 0, 5, DFLAGS, -+ PX30_CLKGATE_CON(11), 13, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, -+ PX30_CLKSEL_CON(48), 0, -+ PX30_CLKGATE_CON(11), 14, GFLAGS, -+ &px30_uart5_fracmux), -+ GATE(SCLK_UART5, "clk_uart5", "clk_uart5_mux", CLK_SET_RATE_PARENT, -+ PX30_CLKGATE_CON(11), 15, GFLAGS), ++/** ++ * of_clk_get_parent_count() - Count the number of clocks a device node has ++ * @np: device node to count ++ * ++ * Returns: The number of clocks that are possible parents of this node ++ */ ++unsigned int of_clk_get_parent_count(const struct device_node *np) ++{ ++ int count; + -+ COMPOSITE(SCLK_I2C0, "clk_i2c0", mux_gpll_xin24m_p, 0, -+ PX30_CLKSEL_CON(49), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ PX30_CLKGATE_CON(12), 0, GFLAGS), -+ COMPOSITE(SCLK_I2C1, "clk_i2c1", mux_gpll_xin24m_p, 0, -+ PX30_CLKSEL_CON(49), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ PX30_CLKGATE_CON(12), 1, GFLAGS), -+ COMPOSITE(SCLK_I2C2, "clk_i2c2", mux_gpll_xin24m_p, 0, -+ PX30_CLKSEL_CON(50), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ PX30_CLKGATE_CON(12), 2, GFLAGS), -+ COMPOSITE(SCLK_I2C3, "clk_i2c3", mux_gpll_xin24m_p, 0, -+ PX30_CLKSEL_CON(50), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ PX30_CLKGATE_CON(12), 3, GFLAGS), -+ COMPOSITE(SCLK_PWM0, "clk_pwm0", mux_gpll_xin24m_p, 0, -+ PX30_CLKSEL_CON(52), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ PX30_CLKGATE_CON(12), 5, GFLAGS), -+ COMPOSITE(SCLK_PWM1, "clk_pwm1", mux_gpll_xin24m_p, 0, -+ PX30_CLKSEL_CON(52), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ PX30_CLKGATE_CON(12), 6, GFLAGS), -+ COMPOSITE(SCLK_SPI0, "clk_spi0", mux_gpll_xin24m_p, 0, -+ PX30_CLKSEL_CON(53), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ PX30_CLKGATE_CON(12), 7, GFLAGS), -+ COMPOSITE(SCLK_SPI1, "clk_spi1", mux_gpll_xin24m_p, 0, -+ PX30_CLKSEL_CON(53), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ PX30_CLKGATE_CON(12), 8, GFLAGS), ++ count = of_count_phandle_with_args(np, "clocks", "#clock-cells"); ++ if (count < 0) ++ return 0; + -+ GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0, -+ PX30_CLKGATE_CON(13), 0, GFLAGS), -+ GATE(SCLK_TIMER1, "sclk_timer1", "xin24m", 0, -+ PX30_CLKGATE_CON(13), 1, GFLAGS), -+ GATE(SCLK_TIMER2, "sclk_timer2", "xin24m", 0, -+ PX30_CLKGATE_CON(13), 2, GFLAGS), -+ GATE(SCLK_TIMER3, "sclk_timer3", "xin24m", 0, -+ PX30_CLKGATE_CON(13), 3, GFLAGS), -+ GATE(SCLK_TIMER4, "sclk_timer4", "xin24m", 0, -+ PX30_CLKGATE_CON(13), 4, GFLAGS), -+ GATE(SCLK_TIMER5, "sclk_timer5", "xin24m", 0, -+ PX30_CLKGATE_CON(13), 5, GFLAGS), ++ return count; ++} ++EXPORT_SYMBOL_GPL(of_clk_get_parent_count); + -+ COMPOSITE_NOMUX(SCLK_TSADC, "clk_tsadc", "xin24m", 0, -+ PX30_CLKSEL_CON(54), 0, 11, DFLAGS, -+ PX30_CLKGATE_CON(12), 9, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_SARADC, "clk_saradc", "xin24m", 0, -+ PX30_CLKSEL_CON(55), 0, 11, DFLAGS, -+ PX30_CLKGATE_CON(12), 10, GFLAGS), ++const char *of_clk_get_parent_name(const struct device_node *np, int index) ++{ ++ struct of_phandle_args clkspec; ++ const char *clk_name; ++ bool found = false; ++ u32 pv; ++ int rc; ++ int count; ++ struct clk *clk; + -+ GATE(0, "clk_cpu_boost", "xin24m", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(12), 12, GFLAGS), ++ rc = of_parse_phandle_with_args(np, "clocks", "#clock-cells", index, ++ &clkspec); ++ if (rc) ++ return NULL; + -+ /* PD_CRYPTO */ -+ GATE(0, "aclk_crypto_pre", "aclk_bus_pre", 0, -+ PX30_CLKGATE_CON(8), 12, GFLAGS), -+ GATE(0, "hclk_crypto_pre", "hclk_bus_pre", 0, -+ PX30_CLKGATE_CON(8), 13, GFLAGS), -+ COMPOSITE(SCLK_CRYPTO, "clk_crypto", mux_gpll_cpll_npll_p, 0, -+ PX30_CLKSEL_CON(25), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ PX30_CLKGATE_CON(8), 14, GFLAGS), -+ COMPOSITE(SCLK_CRYPTO_APK, "clk_crypto_apk", mux_gpll_cpll_npll_p, 0, -+ PX30_CLKSEL_CON(25), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ PX30_CLKGATE_CON(8), 15, GFLAGS), ++ index = clkspec.args_count ? clkspec.args[0] : 0; ++ count = 0; + -+ /* -+ * Clock-Architecture Diagram 9 ++ /* if there is an indices property, use it to transfer the index ++ * specified into an array offset for the clock-output-names property. + */ ++ of_property_for_each_u32(clkspec.np, "clock-indices", pv) { ++ if (index == pv) { ++ index = count; ++ found = true; ++ break; ++ } ++ count++; ++ } ++ /* We went off the end of 'clock-indices' without finding it */ ++ if (of_property_present(clkspec.np, "clock-indices") && !found) { ++ of_node_put(clkspec.np); ++ return NULL; ++ } + -+ /* PD_BUS_TOP */ -+ GATE(0, "pclk_top_niu", "pclk_top_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(16), 0, GFLAGS), -+ GATE(0, "pclk_top_cru", "pclk_top_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(16), 1, GFLAGS), -+ GATE(PCLK_OTP_PHY, "pclk_otp_phy", "pclk_top_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(16), 2, GFLAGS), -+ GATE(0, "pclk_ddrphy", "pclk_top_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(16), 3, GFLAGS), -+ GATE(PCLK_MIPIDSIPHY, "pclk_mipidsiphy", "pclk_top_pre", 0, PX30_CLKGATE_CON(16), 4, GFLAGS), -+ GATE(PCLK_MIPICSIPHY, "pclk_mipicsiphy", "pclk_top_pre", 0, PX30_CLKGATE_CON(16), 5, GFLAGS), -+ GATE(PCLK_USB_GRF, "pclk_usb_grf", "pclk_top_pre", CLK_IS_CRITICAL, PX30_CLKGATE_CON(16), 6, GFLAGS), -+ GATE(0, "pclk_cpu_hoost", "pclk_top_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(16), 7, GFLAGS), ++ if (of_property_read_string_index(clkspec.np, "clock-output-names", ++ index, ++ &clk_name) < 0) { ++ /* ++ * Best effort to get the name if the clock has been ++ * registered with the framework. If the clock isn't ++ * registered, we return the node name as the name of ++ * the clock as long as #clock-cells = 0. ++ */ ++ clk = of_clk_get_from_provider(&clkspec); ++ if (IS_ERR(clk)) { ++ if (clkspec.args_count == 0) ++ clk_name = clkspec.np->name; ++ else ++ clk_name = NULL; ++ } else { ++ clk_name = __clk_get_name(clk); ++ clk_put(clk); ++ } ++ } + -+ /* PD_VI */ -+ GATE(0, "aclk_vi_niu", "aclk_vi_pre", CLK_IS_CRITICAL, PX30_CLKGATE_CON(4), 15, GFLAGS), -+ GATE(ACLK_CIF, "aclk_cif", "aclk_vi_pre", 0, PX30_CLKGATE_CON(5), 1, GFLAGS), -+ GATE(ACLK_ISP, "aclk_isp", "aclk_vi_pre", 0, PX30_CLKGATE_CON(5), 3, GFLAGS), -+ GATE(0, "hclk_vi_niu", "hclk_vi_pre", CLK_IS_CRITICAL, PX30_CLKGATE_CON(5), 0, GFLAGS), -+ GATE(HCLK_CIF, "hclk_cif", "hclk_vi_pre", 0, PX30_CLKGATE_CON(5), 2, GFLAGS), -+ GATE(HCLK_ISP, "hclk_isp", "hclk_vi_pre", 0, PX30_CLKGATE_CON(5), 4, GFLAGS), + -+ /* PD_VO */ -+ GATE(0, "aclk_vo_niu", "aclk_vo_pre", CLK_IS_CRITICAL, PX30_CLKGATE_CON(3), 0, GFLAGS), -+ GATE(ACLK_VOPB, "aclk_vopb", "aclk_vo_pre", 0, PX30_CLKGATE_CON(3), 3, GFLAGS), -+ GATE(ACLK_RGA, "aclk_rga", "aclk_vo_pre", 0, PX30_CLKGATE_CON(3), 7, GFLAGS), -+ GATE(ACLK_VOPL, "aclk_vopl", "aclk_vo_pre", 0, PX30_CLKGATE_CON(3), 5, GFLAGS), ++ of_node_put(clkspec.np); ++ return clk_name; ++} ++EXPORT_SYMBOL_GPL(of_clk_get_parent_name); + -+ GATE(0, "hclk_vo_niu", "hclk_vo_pre", CLK_IS_CRITICAL, PX30_CLKGATE_CON(3), 1, GFLAGS), -+ GATE(HCLK_VOPB, "hclk_vopb", "hclk_vo_pre", 0, PX30_CLKGATE_CON(3), 4, GFLAGS), -+ GATE(HCLK_RGA, "hclk_rga", "hclk_vo_pre", 0, PX30_CLKGATE_CON(3), 8, GFLAGS), -+ GATE(HCLK_VOPL, "hclk_vopl", "hclk_vo_pre", 0, PX30_CLKGATE_CON(3), 6, GFLAGS), ++/** ++ * of_clk_parent_fill() - Fill @parents with names of @np's parents and return ++ * number of parents ++ * @np: Device node pointer associated with clock provider ++ * @parents: pointer to char array that hold the parents' names ++ * @size: size of the @parents array ++ * ++ * Return: number of parents for the clock node. ++ */ ++int of_clk_parent_fill(struct device_node *np, const char **parents, ++ unsigned int size) ++{ ++ unsigned int i = 0; + -+ GATE(0, "pclk_vo_niu", "pclk_vo_pre", CLK_IS_CRITICAL, PX30_CLKGATE_CON(3), 2, GFLAGS), -+ GATE(PCLK_MIPI_DSI, "pclk_mipi_dsi", "pclk_vo_pre", 0, PX30_CLKGATE_CON(3), 9, GFLAGS), ++ while (i < size && (parents[i] = of_clk_get_parent_name(np, i)) != NULL) ++ i++; + -+ /* PD_BUS */ -+ GATE(0, "aclk_bus_niu", "aclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(13), 8, GFLAGS), -+ GATE(0, "aclk_intmem", "aclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(13), 11, GFLAGS), -+ GATE(ACLK_GIC, "aclk_gic", "aclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(13), 12, GFLAGS), -+ GATE(ACLK_DCF, "aclk_dcf", "aclk_bus_pre", 0, PX30_CLKGATE_CON(13), 15, GFLAGS), ++ return i; ++} ++EXPORT_SYMBOL_GPL(of_clk_parent_fill); + -+ /* aclk_dmac is controlled by sgrf_soc_con1[11]. */ -+ SGRF_GATE(ACLK_DMAC, "aclk_dmac", "aclk_bus_pre"), ++struct clock_provider { ++ void (*clk_init_cb)(struct device_node *); ++ struct device_node *np; ++ struct list_head node; ++}; + -+ GATE(0, "hclk_bus_niu", "hclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(13), 9, GFLAGS), -+ GATE(0, "hclk_rom", "hclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(13), 14, GFLAGS), -+ GATE(HCLK_PDM, "hclk_pdm", "hclk_bus_pre", 0, PX30_CLKGATE_CON(14), 1, GFLAGS), -+ GATE(HCLK_I2S0, "hclk_i2s0", "hclk_bus_pre", 0, PX30_CLKGATE_CON(14), 2, GFLAGS), -+ GATE(HCLK_I2S1, "hclk_i2s1", "hclk_bus_pre", 0, PX30_CLKGATE_CON(14), 3, GFLAGS), -+ GATE(HCLK_I2S2, "hclk_i2s2", "hclk_bus_pre", 0, PX30_CLKGATE_CON(14), 4, GFLAGS), ++/* ++ * This function looks for a parent clock. If there is one, then it ++ * checks that the provider for this parent clock was initialized, in ++ * this case the parent clock will be ready. ++ */ ++static int parent_ready(struct device_node *np) ++{ ++ int i = 0; + -+ GATE(0, "pclk_bus_niu", "pclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(13), 10, GFLAGS), -+ GATE(PCLK_DCF, "pclk_dcf", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 0, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 5, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_bus_pre", CLK_IS_CRITICAL, PX30_CLKGATE_CON(14), 6, GFLAGS), -+ GATE(PCLK_UART3, "pclk_uart3", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 7, GFLAGS), -+ GATE(PCLK_UART4, "pclk_uart4", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 8, GFLAGS), -+ GATE(PCLK_UART5, "pclk_uart5", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 9, GFLAGS), -+ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 10, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 11, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 12, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 13, GFLAGS), -+ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 14, GFLAGS), -+ GATE(PCLK_PWM0, "pclk_pwm0", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 15, GFLAGS), -+ GATE(PCLK_PWM1, "pclk_pwm1", "pclk_bus_pre", 0, PX30_CLKGATE_CON(15), 0, GFLAGS), -+ GATE(PCLK_SPI0, "pclk_spi0", "pclk_bus_pre", 0, PX30_CLKGATE_CON(15), 1, GFLAGS), -+ GATE(PCLK_SPI1, "pclk_spi1", "pclk_bus_pre", 0, PX30_CLKGATE_CON(15), 2, GFLAGS), -+ GATE(PCLK_SARADC, "pclk_saradc", "pclk_bus_pre", 0, PX30_CLKGATE_CON(15), 3, GFLAGS), -+ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_bus_pre", 0, PX30_CLKGATE_CON(15), 4, GFLAGS), -+ GATE(PCLK_TIMER, "pclk_timer", "pclk_bus_pre", 0, PX30_CLKGATE_CON(15), 5, GFLAGS), -+ GATE(PCLK_OTP_NS, "pclk_otp_ns", "pclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(15), 6, GFLAGS), -+ GATE(PCLK_WDT_NS, "pclk_wdt_ns", "pclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(15), 7, GFLAGS), -+ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_bus_pre", 0, PX30_CLKGATE_CON(15), 8, GFLAGS), -+ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_bus_pre", 0, PX30_CLKGATE_CON(15), 9, GFLAGS), -+ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_bus_pre", 0, PX30_CLKGATE_CON(15), 10, GFLAGS), -+ GATE(0, "pclk_grf", "pclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(15), 11, GFLAGS), -+ GATE(0, "pclk_sgrf", "pclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(15), 12, GFLAGS), ++ while (true) { ++ struct clk *clk = of_clk_get(np, i); + -+ /* PD_VPU */ -+ GATE(0, "hclk_vpu_niu", "hclk_vpu_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(4), 7, GFLAGS), -+ GATE(HCLK_VPU, "hclk_vpu", "hclk_vpu_pre", 0, PX30_CLKGATE_CON(4), 6, GFLAGS), -+ GATE(0, "aclk_vpu_niu", "aclk_vpu_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(4), 5, GFLAGS), -+ GATE(ACLK_VPU, "aclk_vpu", "aclk_vpu_pre", 0, PX30_CLKGATE_CON(4), 4, GFLAGS), ++ /* this parent is ready we can check the next one */ ++ if (!IS_ERR(clk)) { ++ clk_put(clk); ++ i++; ++ continue; ++ } + -+ /* PD_CRYPTO */ -+ GATE(0, "hclk_crypto_niu", "hclk_crypto_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(9), 3, GFLAGS), -+ GATE(HCLK_CRYPTO, "hclk_crypto", "hclk_crypto_pre", 0, PX30_CLKGATE_CON(9), 5, GFLAGS), -+ GATE(0, "aclk_crypto_niu", "aclk_crypto_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(9), 2, GFLAGS), -+ GATE(ACLK_CRYPTO, "aclk_crypto", "aclk_crypto_pre", 0, PX30_CLKGATE_CON(9), 4, GFLAGS), ++ /* at least one parent is not ready, we exit now */ ++ if (PTR_ERR(clk) == -EPROBE_DEFER) ++ return 0; + -+ /* PD_SDCARD */ -+ GATE(0, "hclk_sdmmc_niu", "hclk_sdmmc_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(7), 0, GFLAGS), -+ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_sdmmc_pre", 0, PX30_CLKGATE_CON(7), 1, GFLAGS), ++ /* ++ * Here we make assumption that the device tree is ++ * written correctly. So an error means that there is ++ * no more parent. As we didn't exit yet, then the ++ * previous parent are ready. If there is no clock ++ * parent, no need to wait for them, then we can ++ * consider their absence as being ready ++ */ ++ return 1; ++ } ++} + -+ /* PD_PERI */ -+ GATE(0, "aclk_peri_niu", "aclk_peri_pre", CLK_IS_CRITICAL, PX30_CLKGATE_CON(5), 9, GFLAGS), ++/** ++ * of_clk_detect_critical() - set CLK_IS_CRITICAL flag from Device Tree ++ * @np: Device node pointer associated with clock provider ++ * @index: clock index ++ * @flags: pointer to top-level framework flags ++ * ++ * Detects if the clock-critical property exists and, if so, sets the ++ * corresponding CLK_IS_CRITICAL flag. ++ * ++ * Do not use this function. It exists only for legacy Device Tree ++ * bindings, such as the one-clock-per-node style that are outdated. ++ * Those bindings typically put all clock data into .dts and the Linux ++ * driver has no clock data, thus making it impossible to set this flag ++ * correctly from the driver. Only those drivers may call ++ * of_clk_detect_critical from their setup functions. ++ * ++ * Return: error code or zero on success ++ */ ++int of_clk_detect_critical(struct device_node *np, int index, ++ unsigned long *flags) ++{ ++ uint32_t idx; + -+ /* PD_MMC_NAND */ -+ GATE(HCLK_NANDC, "hclk_nandc", "hclk_mmc_nand", 0, PX30_CLKGATE_CON(5), 15, GFLAGS), -+ GATE(0, "hclk_mmc_nand_niu", "hclk_mmc_nand", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(6), 8, GFLAGS), -+ GATE(HCLK_SDIO, "hclk_sdio", "hclk_mmc_nand", 0, PX30_CLKGATE_CON(6), 9, GFLAGS), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_mmc_nand", 0, PX30_CLKGATE_CON(6), 10, GFLAGS), -+ GATE(HCLK_SFC, "hclk_sfc", "hclk_mmc_nand", 0, PX30_CLKGATE_CON(6), 11, GFLAGS), ++ if (!np || !flags) ++ return -EINVAL; + -+ /* PD_USB */ -+ GATE(0, "hclk_usb_niu", "hclk_usb", CLK_IS_CRITICAL, PX30_CLKGATE_CON(7), 4, GFLAGS), -+ GATE(HCLK_OTG, "hclk_otg", "hclk_usb", 0, PX30_CLKGATE_CON(7), 5, GFLAGS), -+ GATE(HCLK_HOST, "hclk_host", "hclk_usb", 0, PX30_CLKGATE_CON(7), 6, GFLAGS), -+ GATE(HCLK_HOST_ARB, "hclk_host_arb", "hclk_usb", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(7), 8, GFLAGS), ++ of_property_for_each_u32(np, "clock-critical", idx) ++ if (index == idx) ++ *flags |= CLK_IS_CRITICAL; + -+ /* PD_GMAC */ -+ GATE(0, "aclk_gmac_niu", "aclk_gmac_pre", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(8), 0, GFLAGS), -+ GATE(ACLK_GMAC, "aclk_gmac", "aclk_gmac_pre", 0, -+ PX30_CLKGATE_CON(8), 2, GFLAGS), -+ GATE(0, "pclk_gmac_niu", "pclk_gmac_pre", CLK_IGNORE_UNUSED, -+ PX30_CLKGATE_CON(8), 1, GFLAGS), -+ GATE(PCLK_GMAC, "pclk_gmac", "pclk_gmac_pre", 0, -+ PX30_CLKGATE_CON(8), 3, GFLAGS), -+}; ++ return 0; ++} + -+static struct rockchip_clk_branch px30_gpu_src_clk[] __initdata = { -+ COMPOSITE(0, "clk_gpu_src", mux_gpll_dmycpll_usb480m_dmynpll_p, 0, -+ PX30_CLKSEL_CON(1), 6, 2, MFLAGS, 0, 4, DFLAGS, -+ PX30_CLKGATE_CON(0), 8, GFLAGS), -+}; ++/** ++ * of_clk_init() - Scan and init clock providers from the DT ++ * @matches: array of compatible values and init functions for providers. ++ * ++ * This function scans the device tree for matching clock providers ++ * and calls their initialization functions. It also does it by trying ++ * to follow the dependencies. ++ */ ++void __init of_clk_init(const struct of_device_id *matches) ++{ ++ const struct of_device_id *match; ++ struct device_node *np; ++ struct clock_provider *clk_provider, *next; ++ bool is_init_done; ++ bool force = false; ++ LIST_HEAD(clk_provider_list); + -+static struct rockchip_clk_branch rk3326_gpu_src_clk[] __initdata = { -+ COMPOSITE(0, "clk_gpu_src", mux_gpll_dmycpll_usb480m_npll_p, 0, -+ PX30_CLKSEL_CON(1), 6, 2, MFLAGS, 0, 4, DFLAGS, -+ PX30_CLKGATE_CON(0), 8, GFLAGS), -+}; ++ if (!matches) ++ matches = &__clk_of_table; + -+static struct rockchip_clk_branch px30_clk_pmu_branches[] __initdata = { -+ /* -+ * Clock-Architecture Diagram 2 -+ */ ++ /* First prepare the list of the clocks providers */ ++ for_each_matching_node_and_match(np, matches, &match) { ++ struct clock_provider *parent; + -+ COMPOSITE_FRACMUX(0, "clk_rtc32k_frac", "xin24m", CLK_IGNORE_UNUSED, -+ PX30_PMU_CLKSEL_CON(1), 0, -+ PX30_PMU_CLKGATE_CON(0), 13, GFLAGS, -+ &px30_rtc32k_pmu_fracmux), ++ if (!of_device_is_available(np)) ++ continue; + -+ COMPOSITE_NOMUX(XIN24M_DIV, "xin24m_div", "xin24m", CLK_IGNORE_UNUSED, -+ PX30_PMU_CLKSEL_CON(0), 8, 5, DFLAGS, -+ PX30_PMU_CLKGATE_CON(0), 12, GFLAGS), ++ parent = kzalloc(sizeof(*parent), GFP_KERNEL); ++ if (!parent) { ++ list_for_each_entry_safe(clk_provider, next, ++ &clk_provider_list, node) { ++ list_del(&clk_provider->node); ++ of_node_put(clk_provider->np); ++ kfree(clk_provider); ++ } ++ of_node_put(np); ++ return; ++ } + -+ COMPOSITE_NOMUX(0, "clk_wifi_pmu_src", "gpll", 0, -+ PX30_PMU_CLKSEL_CON(2), 8, 6, DFLAGS, -+ PX30_PMU_CLKGATE_CON(0), 14, GFLAGS), -+ COMPOSITE_NODIV(SCLK_WIFI_PMU, "clk_wifi_pmu", mux_wifi_pmu_p, CLK_SET_RATE_PARENT, -+ PX30_PMU_CLKSEL_CON(2), 15, 1, MFLAGS, -+ PX30_PMU_CLKGATE_CON(0), 15, GFLAGS), ++ parent->clk_init_cb = match->data; ++ parent->np = of_node_get(np); ++ list_add_tail(&parent->node, &clk_provider_list); ++ } + -+ COMPOSITE(0, "clk_uart0_pmu_src", mux_uart_src_p, 0, -+ PX30_PMU_CLKSEL_CON(3), 14, 2, MFLAGS, 0, 5, DFLAGS, -+ PX30_PMU_CLKGATE_CON(1), 0, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart0_np5", "clk_uart0_pmu_src", 0, -+ PX30_PMU_CLKSEL_CON(4), 0, 5, DFLAGS, -+ PX30_PMU_CLKGATE_CON(1), 1, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart0_frac", "clk_uart0_pmu_src", CLK_SET_RATE_PARENT, -+ PX30_PMU_CLKSEL_CON(5), 0, -+ PX30_PMU_CLKGATE_CON(1), 2, GFLAGS, -+ &px30_uart0_pmu_fracmux), -+ GATE(SCLK_UART0_PMU, "clk_uart0_pmu", "clk_uart0_pmu_mux", CLK_SET_RATE_PARENT, -+ PX30_PMU_CLKGATE_CON(1), 3, GFLAGS), ++ while (!list_empty(&clk_provider_list)) { ++ is_init_done = false; ++ list_for_each_entry_safe(clk_provider, next, ++ &clk_provider_list, node) { ++ if (force || parent_ready(clk_provider->np)) { + -+ GATE(SCLK_PVTM_PMU, "clk_pvtm_pmu", "xin24m", 0, -+ PX30_PMU_CLKGATE_CON(1), 4, GFLAGS), ++ /* Don't populate platform devices */ ++ of_node_set_flag(clk_provider->np, ++ OF_POPULATED); + -+ COMPOSITE_NOMUX(PCLK_PMU_PRE, "pclk_pmu_pre", "gpll", CLK_IS_CRITICAL, -+ PX30_PMU_CLKSEL_CON(0), 0, 5, DFLAGS, -+ PX30_PMU_CLKGATE_CON(0), 0, GFLAGS), ++ clk_provider->clk_init_cb(clk_provider->np); ++ of_clk_set_defaults(clk_provider->np, true); + -+ COMPOSITE_NOMUX(SCLK_REF24M_PMU, "clk_ref24m_pmu", "gpll", 0, -+ PX30_PMU_CLKSEL_CON(2), 0, 6, DFLAGS, -+ PX30_PMU_CLKGATE_CON(1), 8, GFLAGS), -+ COMPOSITE_NODIV(SCLK_USBPHY_REF, "clk_usbphy_ref", mux_usbphy_ref_p, CLK_SET_RATE_PARENT, -+ PX30_PMU_CLKSEL_CON(2), 6, 1, MFLAGS, -+ PX30_PMU_CLKGATE_CON(1), 9, GFLAGS), -+ COMPOSITE_NODIV(SCLK_MIPIDSIPHY_REF, "clk_mipidsiphy_ref", mux_mipidsiphy_ref_p, CLK_SET_RATE_PARENT, -+ PX30_PMU_CLKSEL_CON(2), 7, 1, MFLAGS, -+ PX30_PMU_CLKGATE_CON(1), 10, GFLAGS), ++ list_del(&clk_provider->node); ++ of_node_put(clk_provider->np); ++ kfree(clk_provider); ++ is_init_done = true; ++ } ++ } + -+ /* -+ * Clock-Architecture Diagram 9 -+ */ ++ /* ++ * We didn't manage to initialize any of the ++ * remaining providers during the last loop, so now we ++ * initialize all the remaining ones unconditionally ++ * in case the clock parent was not mandatory ++ */ ++ if (!is_init_done) ++ force = true; ++ } ++} ++#endif +diff --git a/drivers/clk/rockchip-oh/Kconfig b/drivers/clk/rockchip-oh/Kconfig +new file mode 100644 +index 000000000..c4704da18 +--- /dev/null ++++ b/drivers/clk/rockchip-oh/Kconfig +@@ -0,0 +1,212 @@ ++# SPDX-License-Identifier: GPL-2.0 ++# common clock support for ROCKCHIP SoC family. + -+ /* PD_PMU */ -+ GATE(0, "pclk_pmu_niu", "pclk_pmu_pre", CLK_IGNORE_UNUSED, PX30_PMU_CLKGATE_CON(0), 1, GFLAGS), -+ GATE(0, "pclk_pmu_sgrf", "pclk_pmu_pre", CLK_IGNORE_UNUSED, PX30_PMU_CLKGATE_CON(0), 2, GFLAGS), -+ GATE(0, "pclk_pmu_grf", "pclk_pmu_pre", CLK_IGNORE_UNUSED, PX30_PMU_CLKGATE_CON(0), 3, GFLAGS), -+ GATE(0, "pclk_pmu", "pclk_pmu_pre", CLK_IGNORE_UNUSED, PX30_PMU_CLKGATE_CON(0), 4, GFLAGS), -+ GATE(0, "pclk_pmu_mem", "pclk_pmu_pre", CLK_IGNORE_UNUSED, PX30_PMU_CLKGATE_CON(0), 5, GFLAGS), -+ GATE(PCLK_GPIO0_PMU, "pclk_gpio0_pmu", "pclk_pmu_pre", 0, PX30_PMU_CLKGATE_CON(0), 6, GFLAGS), -+ GATE(PCLK_UART0_PMU, "pclk_uart0_pmu", "pclk_pmu_pre", 0, PX30_PMU_CLKGATE_CON(0), 7, GFLAGS), -+ GATE(0, "pclk_cru_pmu", "pclk_pmu_pre", CLK_IGNORE_UNUSED, PX30_PMU_CLKGATE_CON(0), 8, GFLAGS), -+}; ++config COMMON_CLK_ROCKCHIP ++ tristate "Rockchip clock controller common support" ++ depends on ARCH_ROCKCHIP ++ default ARCH_ROCKCHIP ++ help ++ Say y here to enable common clock controller for Rockchip platforms. + -+static struct rockchip_clk_branch px30_clk_ddrphy_otp[] __initdata = { -+ COMPOSITE_NOGATE(0, "clk_ddrphy4x", mux_ddrphy_p, CLK_IGNORE_UNUSED, -+ PX30_CLKSEL_CON(2), 7, 1, MFLAGS, 0, 3, DFLAGS), -+ FACTOR_GATE(0, "clk_ddrphy1x", "clk_ddrphy4x", CLK_IGNORE_UNUSED, 1, 4, -+ PX30_CLKGATE_CON(0), 14, GFLAGS), -+ FACTOR_GATE(0, "clk_stdby_2wrap", "clk_ddrphy4x", -+ CLK_IGNORE_UNUSED, 1, 4, -+ PX30_CLKGATE_CON(1), 0, GFLAGS), ++if COMMON_CLK_ROCKCHIP ++config CLK_PX30 ++ tristate "Rockchip PX30 clock controller support" ++ depends on CPU_PX30 || COMPILE_TEST ++ default y ++ help ++ Build the driver for PX30 Clock Driver. + -+ COMPOSITE_NOMUX(SCLK_OTP, "clk_otp", "xin24m", 0, -+ PX30_CLKSEL_CON(56), 0, 3, DFLAGS, -+ PX30_CLKGATE_CON(12), 11, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_OTP_USR, "clk_otp_usr", "clk_otp", 0, -+ PX30_CLKSEL_CON(56), 4, 2, DFLAGS, -+ PX30_CLKGATE_CON(13), 6, GFLAGS), -+}; ++config CLK_RV1106 ++ tristate "Rockchip RV1106 clock controller support" ++ depends on CPU_RV1106 || COMPILE_TEST ++ default y ++ help ++ Build the driver for RV1106 Clock Driver. + -+static struct rockchip_clk_branch px30s_clk_ddrphy_otp[] __initdata = { -+ COMPOSITE(0, "clk_ddrphy1x", mux_ddrphy_p, CLK_IGNORE_UNUSED, -+ PX30_CLKSEL_CON(2), 7, 1, MFLAGS, 0, 3, DFLAGS, -+ PX30_CLKGATE_CON(0), 14, GFLAGS), -+ FACTOR_GATE(0, "clk_stdby_2wrap", "clk_ddrphy1x", -+ CLK_IGNORE_UNUSED, 1, 4, -+ PX30_CLKGATE_CON(1), 0, GFLAGS), ++config CLK_RV1108 ++ tristate "Rockchip RV1108 clock controller support" ++ depends on CPU_RV1108 || COMPILE_TEST ++ default y ++ help ++ Build the driver for RV1108 Clock Driver. + -+ COMPOSITE(SCLK_OTP_USR, "clk_otp_usr", mux_xin24m_gpll_p, 0, -+ PX30_CLKSEL_CON(56), 8, 1, MFLAGS, 0, 8, DFLAGS, -+ PX30_CLKGATE_CON(12), 11, GFLAGS), -+}; ++config CLK_RV1126 ++ tristate "Rockchip RV1126 clock controller support" ++ depends on CPU_RV1126 || COMPILE_TEST ++ default y ++ help ++ Build the driver for RV1126 Clock Driver. + -+static __initdata struct rockchip_clk_provider *cru_ctx, *pmucru_ctx; -+static void __init px30_register_armclk(void) -+{ -+ rockchip_clk_register_armclk(cru_ctx, ARMCLK, "armclk", 2, -+ cru_ctx->clk_data.clks[PLL_APLL], -+ pmucru_ctx->clk_data.clks[PLL_GPLL], -+ &px30_cpuclk_data, -+ px30_cpuclk_rates, -+ ARRAY_SIZE(px30_cpuclk_rates)); -+} ++config CLK_RK1808 ++ tristate "Rockchip RK1808 clock controller support" ++ depends on CPU_RK1808 || COMPILE_TEST ++ default y ++ help ++ Build the driver for RK1808 Clock Driver. + -+static void __init px30_clk_init(struct device_node *np) -+{ -+ struct rockchip_clk_provider *ctx; -+ void __iomem *reg_base; ++config CLK_RK3036 ++ tristate "Rockchip RK3036 clock controller support" ++ depends on CPU_RK3036 || COMPILE_TEST ++ default y ++ help ++ Build the driver for RK3036 Clock Driver. + -+ reg_base = of_iomap(np, 0); -+ if (!reg_base) { -+ pr_err("%s: could not map cru region\n", __func__); -+ return; -+ } ++config CLK_RK312X ++ tristate "Rockchip RK312x clock controller support" ++ depends on CPU_RK312X || COMPILE_TEST ++ default y ++ help ++ Build the driver for RK312x Clock Driver. + -+ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); -+ if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip clk init failed\n", __func__); -+ iounmap(reg_base); -+ return; -+ } -+ cru_ctx = ctx; ++config CLK_RK3188 ++ tristate "Rockchip RK3188 clock controller support" ++ depends on CPU_RK3188 || COMPILE_TEST ++ default y ++ help ++ Build the driver for RK3188 Clock Driver. + -+ rockchip_clk_register_plls(ctx, px30_pll_clks, -+ ARRAY_SIZE(px30_pll_clks), -+ PX30_GRF_SOC_STATUS0); ++config CLK_RK322X ++ tristate "Rockchip RK322x clock controller support" ++ depends on CPU_RK322X || COMPILE_TEST ++ default y ++ help ++ Build the driver for RK322x Clock Driver. + -+ if (pmucru_ctx) -+ px30_register_armclk(); ++config CLK_RK3288 ++ tristate "Rockchip RK3288 clock controller support" ++ depends on CPU_RK3288 || COMPILE_TEST ++ default y ++ help ++ Build the driver for RK3288 Clock Driver. + -+ rockchip_clk_register_branches(ctx, px30_clk_branches, -+ ARRAY_SIZE(px30_clk_branches)); -+ if (of_machine_is_compatible("rockchip,px30")) -+ rockchip_clk_register_branches(ctx, px30_gpu_src_clk, -+ ARRAY_SIZE(px30_gpu_src_clk)); -+ else -+ rockchip_clk_register_branches(ctx, rk3326_gpu_src_clk, -+ ARRAY_SIZE(rk3326_gpu_src_clk)); ++config CLK_RK3308 ++ tristate "Rockchip RK3308 clock controller support" ++ depends on CPU_RK3308 || COMPILE_TEST ++ default y ++ help ++ Build the driver for RK3308 Clock Driver. + -+ rockchip_soc_id_init(); -+ if (soc_is_px30s()) -+ rockchip_clk_register_branches(ctx, px30s_clk_ddrphy_otp, -+ ARRAY_SIZE(px30s_clk_ddrphy_otp)); -+ else -+ rockchip_clk_register_branches(ctx, px30_clk_ddrphy_otp, -+ ARRAY_SIZE(px30_clk_ddrphy_otp)); ++config CLK_RK3328 ++ tristate "Rockchip RK3328 clock controller support" ++ depends on CPU_RK3328 || COMPILE_TEST ++ default y ++ help ++ Build the driver for RK3328 Clock Driver. + -+ rockchip_register_softrst(np, 12, reg_base + PX30_SOFTRST_CON(0), -+ ROCKCHIP_SOFTRST_HIWORD_MASK); ++config CLK_RK3368 ++ tristate "Rockchip RK3368 clock controller support" ++ depends on CPU_RK3368 || COMPILE_TEST ++ default y ++ help ++ Build the driver for RK3368 Clock Driver. + -+ rockchip_register_restart_notifier(ctx, PX30_GLB_SRST_FST, NULL); ++config CLK_RK3399 ++ tristate "Rockchip RK3399 clock controller support" ++ depends on CPU_RK3399 || COMPILE_TEST ++ default y ++ help ++ Build the driver for RK3399 Clock Driver. + -+ rockchip_clk_of_add_provider(np, ctx); -+} -+CLK_OF_DECLARE(px30_cru, "rockchip,px30-cru", px30_clk_init); ++config CLK_RK3528 ++ tristate "Rockchip RK3528 clock controller support" ++ depends on CPU_RK3528 || COMPILE_TEST ++ default y ++ help ++ Build the driver for RK3528 Clock Driver. + -+static void __init px30_pmu_clk_init(struct device_node *np) -+{ -+ struct rockchip_clk_provider *ctx; -+ void __iomem *reg_base; ++config CLK_RK3562 ++ tristate "Rockchip RK3562 clock controller support" ++ depends on CPU_RK3562 || COMPILE_TEST ++ default y ++ help ++ Build the driver for RK3562 Clock Driver. + -+ reg_base = of_iomap(np, 0); -+ if (!reg_base) { -+ pr_err("%s: could not map cru pmu region\n", __func__); -+ return; -+ } ++config CLK_RK3568 ++ tristate "Rockchip RK3568 clock controller support" ++ depends on CPU_RK3568 || COMPILE_TEST ++ default y ++ help ++ Build the driver for RK3568 Clock Driver. + -+ ctx = rockchip_clk_init(np, reg_base, CLKPMU_NR_CLKS); -+ if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip pmu clk init failed\n", __func__); -+ return; -+ } -+ pmucru_ctx = ctx; ++config CLK_RK3588 ++ tristate "Rockchip RK3588 clock controller support" ++ depends on CPU_RK3588 || COMPILE_TEST ++ default y ++ help ++ Build the driver for RK3588 Clock Driver. + -+ rockchip_clk_register_plls(ctx, px30_pmu_pll_clks, -+ ARRAY_SIZE(px30_pmu_pll_clks), PX30_GRF_SOC_STATUS0); ++config ROCKCHIP_CLK_COMPENSATION ++ bool "Rockchip Clk Compensation" ++ help ++ Say y here to enable clk compensation(+/- 1000 ppm). + -+ if (cru_ctx) -+ px30_register_armclk(); ++config ROCKCHIP_CLK_LINK ++ tristate "Rockchip clock link support" ++ default CLK_RK3562 || CLK_RK3588 ++ help ++ Say y here to enable clock link for Rockchip. + -+ rockchip_clk_register_branches(ctx, px30_clk_pmu_branches, -+ ARRAY_SIZE(px30_clk_pmu_branches)); ++config ROCKCHIP_CLK_BOOST ++ bool "Rockchip Clk Boost" ++ default y if CPU_PX30 ++ help ++ Say y here to enable clk boost. + -+ rockchip_clk_of_add_provider(np, ctx); -+} -+CLK_OF_DECLARE(px30_cru_pmu, "rockchip,px30-pmucru", px30_pmu_clk_init); ++config ROCKCHIP_CLK_INV ++ bool "Rockchip Clk Inverter" ++ default y if !CPU_RV1126 && !CPU_RV1106 ++ help ++ Say y here to enable clk Inverter. + -+#ifdef MODULE -+struct clk_px30_inits { -+ void (*inits)(struct device_node *np); -+}; ++config ROCKCHIP_CLK_OUT ++ tristate "Rockchip Clk Out / Input Switch" ++ default y if !ROCKCHIP_MINI_KERNEL ++ help ++ Say y here to enable clk out / input switch. + -+static const struct clk_px30_inits clk_px30_init = { -+ .inits = px30_clk_init, -+}; ++config ROCKCHIP_CLK_PVTM ++ bool "Rockchip Clk Pvtm" ++ default y if !CPU_RV1126 && !CPU_RV1106 ++ help ++ Say y here to enable clk pvtm. + -+static const struct clk_px30_inits clk_px30_pmu_init = { -+ .inits = px30_pmu_clk_init, -+}; ++config ROCKCHIP_DDRCLK ++ bool + -+static const struct of_device_id clk_px30_match_table[] = { -+ { -+ .compatible = "rockchip,px30-cru", -+ .data = &clk_px30_init, -+ }, { -+ .compatible = "rockchip,px30-pmucru", -+ .data = &clk_px30_pmu_init, -+ }, -+ { } -+}; -+MODULE_DEVICE_TABLE(of, clk_px30_match_table); ++config ROCKCHIP_DDRCLK_SIP ++ bool "Rockchip DDR Clk SIP" ++ default y if CPU_RK3399 ++ select ROCKCHIP_DDRCLK ++ help ++ Say y here to enable ddr clk sip. + -+static int clk_px30_probe(struct platform_device *pdev) -+{ -+ struct device_node *np = pdev->dev.of_node; -+ const struct of_device_id *match; -+ const struct clk_px30_inits *init_data; ++config ROCKCHIP_DDRCLK_SIP_V2 ++ bool "Rockchip DDR Clk SIP V2" ++ default y if CPU_PX30 || CPU_RK1808 || CPU_RK312X || CPU_RK322X || \ ++ CPU_RK3288 || CPU_RK3308 || CPU_RK3328 || CPU_RV1126 ++ select ROCKCHIP_DDRCLK ++ help ++ Say y here to enable ddr clk sip v2. + -+ match = of_match_device(clk_px30_match_table, &pdev->dev); -+ if (!match || !match->data) -+ return -EINVAL; ++config ROCKCHIP_PLL_RK3066 ++ bool "Rockchip PLL Type RK3066" ++ default y if CPU_RK30XX || CPU_RK3188 || \ ++ CPU_RK3288 || CPU_RK3368 ++ help ++ Say y here to enable pll type is rk3066. + -+ init_data = match->data; -+ if (init_data->inits) -+ init_data->inits(np); ++config ROCKCHIP_PLL_RK3399 ++ bool "Rockchip PLL Type RK3399" ++ default y if CPU_RK3399 || CPU_RV1108 ++ help ++ Say y here to enable pll type is rk3399. + -+ return 0; -+} ++config ROCKCHIP_PLL_RK3588 ++ bool "Rockchip PLL Type RK3588" ++ default y if CPU_RK3588 ++ help ++ Say y here to enable pll type is rk3588. + -+static struct platform_driver clk_px30_driver = { -+ .probe = clk_px30_probe, -+ .driver = { -+ .name = "clk-px30", -+ .of_match_table = clk_px30_match_table, -+ }, -+}; -+module_platform_driver(clk_px30_driver); ++source "drivers/clk/rockchip-oh/regmap/Kconfig" + -+MODULE_DESCRIPTION("Rockchip PX30 Clock Driver"); -+MODULE_LICENSE("GPL"); -+#endif /* MODULE */ -diff --git a/drivers/clk/rockchip-oh/clk-rk1808.c b/drivers/clk/rockchip-oh/clk-rk1808.c ++endif +diff --git a/drivers/clk/rockchip-oh/Makefile b/drivers/clk/rockchip-oh/Makefile new file mode 100644 -index 000000000..e177a3dd6 +index 000000000..d6aafb106 --- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-rk1808.c -@@ -0,0 +1,1249 @@ -+// SPDX-License-Identifier: GPL-2.0 ++++ b/drivers/clk/rockchip-oh/Makefile +@@ -0,0 +1,40 @@ ++# SPDX-License-Identifier: GPL-2.0 ++# ++# Rockchip Clock specific Makefile ++# ++ ++obj-$(CONFIG_COMMON_CLK_ROCKCHIP) += clk-rockchip.o ++obj-$(CONFIG_COMMON_CLK_ROCKCHIP_REGMAP) += regmap/ ++ ++clk-rockchip-y += clk.o ++clk-rockchip-y += clk-pll.o ++clk-rockchip-y += clk-cpu.o ++clk-rockchip-y += clk-half-divider.o ++clk-rockchip-y += clk-mmc-phase.o ++clk-rockchip-y += clk-muxgrf.o ++clk-rockchip-$(CONFIG_ROCKCHIP_DDRCLK) += clk-ddr.o ++clk-rockchip-$(CONFIG_ROCKCHIP_CLK_INV) += clk-inverter.o ++clk-rockchip-$(CONFIG_ROCKCHIP_CLK_PVTM) += clk-pvtm.o ++clk-rockchip-$(CONFIG_RESET_CONTROLLER) += softrst.o ++ ++obj-$(CONFIG_ROCKCHIP_CLK_LINK) += clk-link.o ++obj-$(CONFIG_ROCKCHIP_CLK_OUT) += clk-out.o ++ ++obj-$(CONFIG_CLK_PX30) += clk-px30.o ++obj-$(CONFIG_CLK_RV1106) += clk-rv1106.o ++obj-$(CONFIG_CLK_RV1108) += clk-rv1108.o ++obj-$(CONFIG_CLK_RV1126) += clk-rv1126.o ++obj-$(CONFIG_CLK_RK1808) += clk-rk1808.o ++obj-$(CONFIG_CLK_RK3036) += clk-rk3036.o ++obj-$(CONFIG_CLK_RK312X) += clk-rk3128.o ++obj-$(CONFIG_CLK_RK3188) += clk-rk3188.o ++obj-$(CONFIG_CLK_RK322X) += clk-rk3228.o ++obj-$(CONFIG_CLK_RK3288) += clk-rk3288.o ++obj-$(CONFIG_CLK_RK3308) += clk-rk3308.o ++obj-$(CONFIG_CLK_RK3328) += clk-rk3328.o ++obj-$(CONFIG_CLK_RK3368) += clk-rk3368.o ++obj-$(CONFIG_CLK_RK3399) += clk-rk3399.o ++obj-$(CONFIG_CLK_RK3528) += clk-rk3528.o ++obj-$(CONFIG_CLK_RK3562) += clk-rk3562.o ++obj-$(CONFIG_CLK_RK3568) += clk-rk3568.o ++obj-$(CONFIG_CLK_RK3588) += clk-rk3588.o +diff --git a/drivers/clk/rockchip-oh/clk-cpu.c b/drivers/clk/rockchip-oh/clk-cpu.c +new file mode 100644 +index 000000000..9a9beeb8c +--- /dev/null ++++ b/drivers/clk/rockchip-oh/clk-cpu.c +@@ -0,0 +1,593 @@ ++// SPDX-License-Identifier: GPL-2.0-only +/* -+ * Copyright (c) 2018 Fuzhou Rockchip Electronics Co., Ltd -+ * Author: Elaine Zhang ++ * Copyright (c) 2014 MundoReader S.L. ++ * Author: Heiko Stuebner ++ * ++ * based on clk/samsung/clk-cpu.c ++ * Copyright (c) 2014 Samsung Electronics Co., Ltd. ++ * Author: Thomas Abraham ++ * ++ * A CPU clock is defined as a clock supplied to a CPU or a group of CPUs. ++ * The CPU clock is typically derived from a hierarchy of clock ++ * blocks which includes mux and divider blocks. There are a number of other ++ * auxiliary clocks supplied to the CPU domain such as the debug blocks and AXI ++ * clock for CPU domain. The rates of these auxiliary clocks are related to the ++ * CPU clock rate and this relation is usually specified in the hardware manual ++ * of the SoC or supplied after the SoC characterization. ++ * ++ * The below implementation of the CPU clock allows the rate changes of the CPU ++ * clock and the corresponding rate changes of the auxillary clocks of the CPU ++ * domain. The platform clock driver provides a clock register configuration ++ * for each configurable rate which is then used to program the clock hardware ++ * registers to acheive a fast co-oridinated rate change for all the CPU domain ++ * clocks. ++ * ++ * On a rate change request for the CPU clock, the rate change is propagated ++ * upto the PLL supplying the clock to the CPU domain clock blocks. While the ++ * CPU domain PLL is reconfigured, the CPU domain clocks are driven using an ++ * alternate clock source. If required, the alternate clock source is divided ++ * down in order to keep the output clock rate within the previous OPP limits. + */ -+#include -+#include ++ +#include -+#include -+#include -+#include -+#include ++#include ++#include ++#include ++#include +#include "clk.h" + -+#define RK1808_GRF_SOC_STATUS0 0x480 -+#define RK1808_PMUGRF_SOC_CON0 0x100 -+#define RK1808_UART_FRAC_MAX_PRATE 800000000 -+#define RK1808_PDM_FRAC_MAX_PRATE 300000000 -+#define RK1808_I2S_FRAC_MAX_PRATE 600000000 -+#define RK1808_VOP_RAW_FRAC_MAX_PRATE 300000000 -+#define RK1808_VOP_LITE_FRAC_MAX_PRATE 400000000 -+ -+enum rk1808_plls { -+ apll, dpll, cpll, gpll, npll, ppll, ++/** ++ * struct rockchip_cpuclk: information about clock supplied to a CPU core. ++ * @hw: handle between ccf and cpu clock. ++ * @alt_parent: alternate parent clock to use when switching the speed ++ * of the primary parent clock. ++ * @reg_base: base register for cpu-clock values. ++ * @clk_nb: clock notifier registered for changes in clock speed of the ++ * primary parent clock. ++ * @rate_count: number of rates in the rate_table ++ * @rate_table: pll-rates and their associated dividers ++ * @reg_data: cpu-specific register settings ++ * @lock: clock lock ++ */ ++struct rockchip_cpuclk { ++ struct clk_hw hw; ++ struct clk_hw *pll_hw; ++ struct clk *alt_parent; ++ void __iomem *reg_base; ++ struct notifier_block clk_nb; ++ unsigned int rate_count; ++ struct rockchip_cpuclk_rate_table *rate_table; ++ const struct rockchip_cpuclk_reg_data *reg_data; ++ spinlock_t *lock; +}; + -+static struct rockchip_pll_rate_table rk1808_pll_rates[] = { -+ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ -+ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1488000000, 1, 62, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1464000000, 1, 61, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1440000000, 1, 60, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1392000000, 1, 58, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1368000000, 1, 57, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1344000000, 1, 56, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1320000000, 1, 55, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1272000000, 1, 53, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1248000000, 1, 52, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1188000000, 1, 99, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1100000000, 2, 275, 3, 1, 1, 0), -+ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1000000000, 1, 125, 3, 1, 1, 0), -+ RK3036_PLL_RATE(984000000, 1, 82, 2, 1, 1, 0), -+ RK3036_PLL_RATE(960000000, 1, 80, 2, 1, 1, 0), -+ RK3036_PLL_RATE(936000000, 1, 78, 2, 1, 1, 0), -+ RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), -+ RK3036_PLL_RATE(900000000, 1, 75, 2, 1, 1, 0), -+ RK3036_PLL_RATE(888000000, 1, 74, 2, 1, 1, 0), -+ RK3036_PLL_RATE(864000000, 1, 72, 2, 1, 1, 0), -+ RK3036_PLL_RATE(840000000, 1, 70, 2, 1, 1, 0), -+ RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), -+ RK3036_PLL_RATE(800000000, 1, 100, 3, 1, 1, 0), -+ RK3036_PLL_RATE(700000000, 1, 175, 2, 1, 1, 0), -+ RK3036_PLL_RATE(696000000, 1, 58, 2, 1, 1, 0), -+ RK3036_PLL_RATE(624000000, 1, 52, 2, 1, 1, 0), -+ RK3036_PLL_RATE(600000000, 1, 75, 3, 1, 1, 0), -+ RK3036_PLL_RATE(594000000, 1, 99, 4, 1, 1, 0), -+ RK3036_PLL_RATE(504000000, 1, 63, 3, 1, 1, 0), -+ RK3036_PLL_RATE(500000000, 1, 125, 6, 1, 1, 0), -+ RK3036_PLL_RATE(416000000, 1, 52, 3, 1, 1, 0), -+ RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), -+ RK3036_PLL_RATE(312000000, 1, 52, 2, 2, 1, 0), -+ RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), -+ RK3036_PLL_RATE(200000000, 1, 200, 6, 4, 1, 0), -+ RK3036_PLL_RATE(100000000, 1, 150, 6, 6, 1, 0), -+ RK3036_PLL_RATE(96000000, 1, 64, 4, 4, 1, 0), -+ { /* sentinel */ }, -+}; ++#define to_rockchip_cpuclk_hw(hw) container_of(hw, struct rockchip_cpuclk, hw) ++#define to_rockchip_cpuclk_nb(nb) \ ++ container_of(nb, struct rockchip_cpuclk, clk_nb) + -+#define RK1808_DIV_ACLKM_MASK 0x7 -+#define RK1808_DIV_ACLKM_SHIFT 12 -+#define RK1808_DIV_PCLK_DBG_MASK 0xf -+#define RK1808_DIV_PCLK_DBG_SHIFT 8 ++static const struct rockchip_cpuclk_rate_table *rockchip_get_cpuclk_settings( ++ struct rockchip_cpuclk *cpuclk, unsigned long rate) ++{ ++ const struct rockchip_cpuclk_rate_table *rate_table = ++ cpuclk->rate_table; ++ int i; + -+#define RK1808_CLKSEL0(_aclk_core, _pclk_dbg) \ -+{ \ -+ .reg = RK1808_CLKSEL_CON(0), \ -+ .val = HIWORD_UPDATE(_aclk_core, RK1808_DIV_ACLKM_MASK, \ -+ RK1808_DIV_ACLKM_SHIFT) | \ -+ HIWORD_UPDATE(_pclk_dbg, RK1808_DIV_PCLK_DBG_MASK, \ -+ RK1808_DIV_PCLK_DBG_SHIFT), \ -+} ++ for (i = 0; i < cpuclk->rate_count; i++) { ++ if (rate == rate_table[i].prate) ++ return &rate_table[i]; ++ } + -+#define RK1808_CPUCLK_RATE(_prate, _aclk_core, _pclk_dbg) \ -+{ \ -+ .prate = _prate, \ -+ .divs = { \ -+ RK1808_CLKSEL0(_aclk_core, _pclk_dbg), \ -+ }, \ ++ return NULL; +} + -+static struct rockchip_cpuclk_rate_table rk1808_cpuclk_rates[] __initdata = { -+ RK1808_CPUCLK_RATE(1608000000, 1, 7), -+ RK1808_CPUCLK_RATE(1512000000, 1, 7), -+ RK1808_CPUCLK_RATE(1488000000, 1, 5), -+ RK1808_CPUCLK_RATE(1416000000, 1, 5), -+ RK1808_CPUCLK_RATE(1392000000, 1, 5), -+ RK1808_CPUCLK_RATE(1296000000, 1, 5), -+ RK1808_CPUCLK_RATE(1200000000, 1, 5), -+ RK1808_CPUCLK_RATE(1104000000, 1, 5), -+ RK1808_CPUCLK_RATE(1008000000, 1, 5), -+ RK1808_CPUCLK_RATE(912000000, 1, 5), -+ RK1808_CPUCLK_RATE(816000000, 1, 3), -+ RK1808_CPUCLK_RATE(696000000, 1, 3), -+ RK1808_CPUCLK_RATE(600000000, 1, 3), -+ RK1808_CPUCLK_RATE(408000000, 1, 1), -+ RK1808_CPUCLK_RATE(312000000, 1, 1), -+ RK1808_CPUCLK_RATE(216000000, 1, 1), -+ RK1808_CPUCLK_RATE(96000000, 1, 1), -+}; -+ -+static const struct rockchip_cpuclk_reg_data rk1808_cpuclk_data = { -+ .core_reg[0] = RK1808_CLKSEL_CON(0), -+ .div_core_shift[0] = 0, -+ .div_core_mask[0] = 0xf, -+ .num_cores = 1, -+ .mux_core_alt = 2, -+ .mux_core_main = 0, -+ .mux_core_shift = 6, -+ .mux_core_mask = 0x3, -+}; ++static unsigned long rockchip_cpuclk_recalc_rate(struct clk_hw *hw, ++ unsigned long parent_rate) ++{ ++ struct rockchip_cpuclk *cpuclk = to_rockchip_cpuclk_hw(hw); ++ const struct rockchip_cpuclk_reg_data *reg_data = cpuclk->reg_data; ++ u32 clksel0 = readl_relaxed(cpuclk->reg_base + reg_data->core_reg[0]); + -+PNAME(mux_pll_p) = { "xin24m", "xin32k"}; -+PNAME(mux_usb480m_p) = { "xin24m", "usb480m_phy", "xin32k" }; -+PNAME(mux_gpll_cpll_p) = { "gpll", "cpll" }; -+PNAME(mux_gpll_cpll_apll_p) = { "gpll", "cpll", "apll" }; -+PNAME(mux_npu_p) = { "clk_npu_div", "clk_npu_np5" }; -+PNAME(mux_ddr_p) = { "dpll_ddr", "gpll_ddr" }; -+PNAME(mux_cpll_gpll_npll_p) = { "cpll", "gpll", "npll" }; -+PNAME(mux_gpll_cpll_npll_p) = { "gpll", "cpll", "npll" }; -+PNAME(mux_dclk_vopraw_p) = { "dclk_vopraw_src", "dclk_vopraw_frac", "xin24m" }; -+PNAME(mux_dclk_voplite_p) = { "dclk_voplite_src", "dclk_voplite_frac", "xin24m" }; -+PNAME(mux_24m_npll_gpll_usb480m_p) = { "xin24m", "npll", "gpll", "usb480m" }; -+PNAME(mux_usb3_otg0_suspend_p) = { "xin32k", "xin24m" }; -+PNAME(mux_pcie_aux_p) = { "xin24m", "clk_pcie_src" }; -+PNAME(mux_gpll_cpll_npll_24m_p) = { "gpll", "cpll", "npll", "xin24m" }; -+PNAME(mux_sdio_p) = { "clk_sdio_div", "clk_sdio_div50" }; -+PNAME(mux_sdmmc_p) = { "clk_sdmmc_div", "clk_sdmmc_div50" }; -+PNAME(mux_emmc_p) = { "clk_emmc_div", "clk_emmc_div50" }; -+PNAME(mux_cpll_npll_ppll_p) = { "cpll", "npll", "ppll" }; -+PNAME(mux_gmac_p) = { "clk_gmac_src", "gmac_clkin" }; -+PNAME(mux_gmac_rgmii_speed_p) = { "clk_gmac_tx_src", "clk_gmac_tx_src", "clk_gmac_tx_div50", "clk_gmac_tx_div5" }; -+PNAME(mux_gmac_rmii_speed_p) = { "clk_gmac_rx_div20", "clk_gmac_rx_div2" }; -+PNAME(mux_gmac_rx_tx_p) = { "clk_gmac_rgmii_speed", "clk_gmac_rmii_speed" }; -+PNAME(mux_gpll_usb480m_cpll_npll_p) = { "gpll", "usb480m", "cpll", "npll" }; -+PNAME(mux_uart1_p) = { "clk_uart1_src", "clk_uart1_np5", "clk_uart1_frac", "xin24m" }; -+PNAME(mux_uart2_p) = { "clk_uart2_src", "clk_uart2_np5", "clk_uart2_frac", "xin24m" }; -+PNAME(mux_uart3_p) = { "clk_uart3_src", "clk_uart3_np5", "clk_uart3_frac", "xin24m" }; -+PNAME(mux_uart4_p) = { "clk_uart4_src", "clk_uart4_np5", "clk_uart4_frac", "xin24m" }; -+PNAME(mux_uart5_p) = { "clk_uart5_src", "clk_uart5_np5", "clk_uart5_frac", "xin24m" }; -+PNAME(mux_uart6_p) = { "clk_uart6_src", "clk_uart6_np5", "clk_uart6_frac", "xin24m" }; -+PNAME(mux_uart7_p) = { "clk_uart7_src", "clk_uart7_np5", "clk_uart7_frac", "xin24m" }; -+PNAME(mux_gpll_xin24m_p) = { "gpll", "xin24m" }; -+PNAME(mux_gpll_cpll_xin24m_p) = { "gpll", "cpll", "xin24m" }; -+PNAME(mux_gpll_xin24m_cpll_npll_p) = { "gpll", "xin24m", "cpll", "npll" }; -+PNAME(mux_pdm_p) = { "clk_pdm_src", "clk_pdm_frac" }; -+PNAME(mux_i2s0_8ch_tx_p) = { "clk_i2s0_8ch_tx_src", "clk_i2s0_8ch_tx_frac", "mclk_i2s0_8ch_in", "xin12m" }; -+PNAME(mux_i2s0_8ch_tx_rx_p) = { "clk_i2s0_8ch_tx_mux", "clk_i2s0_8ch_rx_mux"}; -+PNAME(mux_i2s0_8ch_tx_out_p) = { "clk_i2s0_8ch_tx", "xin12m", "clk_i2s0_8ch_rx" }; -+PNAME(mux_i2s0_8ch_rx_p) = { "clk_i2s0_8ch_rx_src", "clk_i2s0_8ch_rx_frac", "mclk_i2s0_8ch_in", "xin12m" }; -+PNAME(mux_i2s0_8ch_rx_tx_p) = { "clk_i2s0_8ch_rx_mux", "clk_i2s0_8ch_tx_mux"}; -+PNAME(mux_i2s0_8ch_rx_out_p) = { "clk_i2s0_8ch_rx", "xin12m", "clk_i2s0_8ch_tx" }; -+PNAME(mux_i2s1_2ch_p) = { "clk_i2s1_2ch_src", "clk_i2s1_2ch_frac", "mclk_i2s1_2ch_in", "xin12m" }; -+PNAME(mux_i2s1_2ch_out_p) = { "clk_i2s1_2ch", "xin12m" }; -+PNAME(mux_rtc32k_pmu_p) = { "xin32k", "pmu_pvtm_32k", "clk_rtc32k_frac" }; -+PNAME(mux_wifi_pmu_p) = { "xin24m", "clk_wifi_pmu_src" }; -+PNAME(mux_gpll_usb480m_cpll_ppll_p) = { "gpll", "usb480m", "cpll", "ppll" }; -+PNAME(mux_uart0_pmu_p) = { "clk_uart0_pmu_src", "clk_uart0_np5", "clk_uart0_frac", "xin24m" }; -+PNAME(mux_usbphy_ref_p) = { "xin24m", "clk_ref24m_pmu" }; -+PNAME(mux_mipidsiphy_ref_p) = { "xin24m", "clk_ref24m_pmu" }; -+PNAME(mux_pciephy_ref_p) = { "xin24m", "clk_pciephy_src" }; -+PNAME(mux_ppll_xin24m_p) = { "ppll", "xin24m" }; -+PNAME(mux_xin24m_32k_p) = { "xin24m", "xin32k" }; -+PNAME(mux_clk_32k_ioe_p) = { "clk_rtc32k_pmu", "xin32k" }; ++ clksel0 >>= reg_data->div_core_shift[0]; ++ clksel0 &= reg_data->div_core_mask[0]; ++ return parent_rate / (clksel0 + 1); ++} + -+static struct rockchip_pll_clock rk1808_pll_clks[] __initdata = { -+ [apll] = PLL(pll_rk3036, PLL_APLL, "apll", mux_pll_p, -+ 0, RK1808_PLL_CON(0), -+ RK1808_MODE_CON, 0, 0, 0, rk1808_pll_rates), -+ [dpll] = PLL(pll_rk3036, PLL_DPLL, "dpll", mux_pll_p, -+ 0, RK1808_PLL_CON(8), -+ RK1808_MODE_CON, 2, 1, 0, NULL), -+ [cpll] = PLL(pll_rk3036, PLL_CPLL, "cpll", mux_pll_p, -+ 0, RK1808_PLL_CON(16), -+ RK1808_MODE_CON, 4, 2, 0, rk1808_pll_rates), -+ [gpll] = PLL(pll_rk3036, PLL_GPLL, "gpll", mux_pll_p, -+ 0, RK1808_PLL_CON(24), -+ RK1808_MODE_CON, 6, 3, 0, rk1808_pll_rates), -+ [npll] = PLL(pll_rk3036, PLL_NPLL, "npll", mux_pll_p, -+ 0, RK1808_PLL_CON(32), -+ RK1808_MODE_CON, 8, 5, 0, rk1808_pll_rates), -+ [ppll] = PLL(pll_rk3036, PLL_PPLL, "ppll", mux_pll_p, -+ 0, RK1808_PMU_PLL_CON(0), -+ RK1808_PMU_MODE_CON, 0, 4, 0, rk1808_pll_rates), ++static const struct clk_ops rockchip_cpuclk_ops = { ++ .recalc_rate = rockchip_cpuclk_recalc_rate, +}; + -+#define MFLAGS CLK_MUX_HIWORD_MASK -+#define DFLAGS CLK_DIVIDER_HIWORD_MASK -+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) -+ -+static struct rockchip_clk_branch rk1808_uart1_fracmux __initdata = -+ MUX(0, "clk_uart1_mux", mux_uart1_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(39), 14, 2, MFLAGS); ++static void rockchip_cpuclk_set_dividers(struct rockchip_cpuclk *cpuclk, ++ const struct rockchip_cpuclk_rate_table *rate) ++{ ++ int i; + -+static struct rockchip_clk_branch rk1808_uart2_fracmux __initdata = -+ MUX(0, "clk_uart2_mux", mux_uart2_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(42), 14, 2, MFLAGS); ++ /* alternate parent is active now. set the dividers */ ++ for (i = 0; i < ARRAY_SIZE(rate->divs); i++) { ++ const struct rockchip_cpuclk_clksel *clksel = &rate->divs[i]; + -+static struct rockchip_clk_branch rk1808_uart3_fracmux __initdata = -+ MUX(0, "clk_uart3_mux", mux_uart3_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(45), 14, 2, MFLAGS); ++ if (!clksel->reg) ++ continue; + -+static struct rockchip_clk_branch rk1808_uart4_fracmux __initdata = -+ MUX(0, "clk_uart4_mux", mux_uart4_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(48), 14, 2, MFLAGS); ++ pr_debug("%s: setting reg 0x%x to 0x%x\n", ++ __func__, clksel->reg, clksel->val); ++ writel(clksel->val, cpuclk->reg_base + clksel->reg); ++ } ++} + -+static struct rockchip_clk_branch rk1808_uart5_fracmux __initdata = -+ MUX(0, "clk_uart5_mux", mux_uart5_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(51), 14, 2, MFLAGS); ++static void rockchip_cpuclk_set_pre_muxs(struct rockchip_cpuclk *cpuclk, ++ const struct rockchip_cpuclk_rate_table *rate) ++{ ++ int i; + -+static struct rockchip_clk_branch rk1808_uart6_fracmux __initdata = -+ MUX(0, "clk_uart6_mux", mux_uart6_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(54), 14, 2, MFLAGS); ++ /* alternate parent is active now. set the pre_muxs */ ++ for (i = 0; i < ARRAY_SIZE(rate->pre_muxs); i++) { ++ const struct rockchip_cpuclk_clksel *clksel = &rate->pre_muxs[i]; + -+static struct rockchip_clk_branch rk1808_uart7_fracmux __initdata = -+ MUX(0, "clk_uart7_mux", mux_uart7_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(57), 14, 2, MFLAGS); ++ if (!clksel->reg) ++ break; + -+static struct rockchip_clk_branch rk1808_dclk_vopraw_fracmux __initdata = -+ MUX(0, "dclk_vopraw_mux", mux_dclk_vopraw_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(5), 14, 2, MFLAGS); ++ pr_debug("%s: setting reg 0x%x to 0x%x\n", ++ __func__, clksel->reg, clksel->val); ++ writel(clksel->val, cpuclk->reg_base + clksel->reg); ++ } ++} + -+static struct rockchip_clk_branch rk1808_dclk_voplite_fracmux __initdata = -+ MUX(0, "dclk_voplite_mux", mux_dclk_voplite_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(7), 14, 2, MFLAGS); ++static void rockchip_cpuclk_set_post_muxs(struct rockchip_cpuclk *cpuclk, ++ const struct rockchip_cpuclk_rate_table *rate) ++{ ++ int i; + -+static struct rockchip_clk_branch rk1808_pdm_fracmux __initdata = -+ MUX(0, "clk_pdm_mux", mux_pdm_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(30), 15, 1, MFLAGS); ++ /* alternate parent is active now. set the muxs */ ++ for (i = 0; i < ARRAY_SIZE(rate->post_muxs); i++) { ++ const struct rockchip_cpuclk_clksel *clksel = &rate->post_muxs[i]; + -+static struct rockchip_clk_branch rk1808_i2s0_8ch_tx_fracmux __initdata = -+ MUX(SCLK_I2S0_8CH_TX_MUX, "clk_i2s0_8ch_tx_mux", mux_i2s0_8ch_tx_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(32), 10, 2, MFLAGS); ++ if (!clksel->reg) ++ break; + -+static struct rockchip_clk_branch rk1808_i2s0_8ch_rx_fracmux __initdata = -+ MUX(SCLK_I2S0_8CH_RX_MUX, "clk_i2s0_8ch_rx_mux", mux_i2s0_8ch_rx_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(34), 10, 2, MFLAGS); ++ pr_debug("%s: setting reg 0x%x to 0x%x\n", ++ __func__, clksel->reg, clksel->val); ++ writel(clksel->val, cpuclk->reg_base + clksel->reg); ++ } ++} + -+static struct rockchip_clk_branch rk1808_i2s1_2ch_fracmux __initdata = -+ MUX(0, "clk_i2s1_2ch_mux", mux_i2s1_2ch_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(36), 10, 2, MFLAGS); ++static int rockchip_cpuclk_pre_rate_change(struct rockchip_cpuclk *cpuclk, ++ struct clk_notifier_data *ndata) ++{ ++ const struct rockchip_cpuclk_reg_data *reg_data = cpuclk->reg_data; ++ const struct rockchip_cpuclk_rate_table *rate; ++ unsigned long alt_prate, alt_div; ++ unsigned long flags; ++ int i = 0; + -+static struct rockchip_clk_branch rk1808_rtc32k_pmu_fracmux __initdata = -+ MUX(SCLK_RTC32K_PMU, "clk_rtc32k_pmu", mux_rtc32k_pmu_p, CLK_SET_RATE_PARENT, -+ RK1808_PMU_CLKSEL_CON(0), 14, 2, MFLAGS); ++ /* check validity of the new rate */ ++ rate = rockchip_get_cpuclk_settings(cpuclk, ndata->new_rate); ++ if (!rate) { ++ pr_err("%s: Invalid rate : %lu for cpuclk\n", ++ __func__, ndata->new_rate); ++ return -EINVAL; ++ } + -+static struct rockchip_clk_branch rk1808_uart0_pmu_fracmux __initdata = -+ MUX(0, "clk_uart0_pmu_mux", mux_uart0_pmu_p, CLK_SET_RATE_PARENT, -+ RK1808_PMU_CLKSEL_CON(4), 14, 2, MFLAGS); ++ if (IS_ENABLED(CONFIG_ROCKCHIP_CLK_BOOST)) ++ rockchip_boost_enable_recovery_sw_low(cpuclk->pll_hw); + -+static struct rockchip_clk_branch rk1808_clk_branches[] __initdata = { -+ /* -+ * Clock-Architecture Diagram 1 -+ */ ++ alt_prate = clk_get_rate(cpuclk->alt_parent); + -+ MUX(USB480M, "usb480m", mux_usb480m_p, CLK_SET_RATE_PARENT, -+ RK1808_MODE_CON, 10, 2, MFLAGS), -+ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), ++ spin_lock_irqsave(cpuclk->lock, flags); + + /* -+ * Clock-Architecture Diagram 2 ++ * If the old parent clock speed is less than the clock speed ++ * of the alternate parent, then it should be ensured that at no point ++ * the armclk speed is more than the old_rate until the dividers are ++ * set. + */ ++ if (alt_prate > ndata->old_rate) { ++ /* calculate dividers */ ++ alt_div = DIV_ROUND_UP(alt_prate, ndata->old_rate) - 1; ++ if (alt_div > reg_data->div_core_mask[0]) { ++ pr_warn("%s: limiting alt-divider %lu to %d\n", ++ __func__, alt_div, reg_data->div_core_mask[0]); ++ alt_div = reg_data->div_core_mask[0]; ++ } + -+ GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(0), 0, GFLAGS), -+ GATE(0, "cpll_core", "cpll", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(0), 0, GFLAGS), -+ GATE(0, "gpll_core", "gpll", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE_NOMUX(0, "pclk_core_dbg", "armclk", CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(0), 8, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK1808_CLKGATE_CON(0), 3, GFLAGS), -+ COMPOSITE_NOMUX(0, "aclk_core", "armclk", CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(0), 12, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK1808_CLKGATE_CON(0), 2, GFLAGS), -+ -+ GATE(0, "clk_jtag", "jtag_clkin", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(0), 4, GFLAGS), -+ -+ GATE(SCLK_PVTM_CORE, "clk_pvtm_core", "xin24m", 0, -+ RK1808_CLKGATE_CON(0), 5, GFLAGS), ++ /* ++ * Change parents and add dividers in a single transaction. ++ * ++ * NOTE: we do this in a single transaction so we're never ++ * dividing the primary parent by the extra dividers that were ++ * needed for the alt. ++ */ ++ pr_debug("%s: setting div %lu as alt-rate %lu > old-rate %lu\n", ++ __func__, alt_div, alt_prate, ndata->old_rate); + -+ COMPOSITE_NOMUX(MSCLK_CORE_NIU, "msclk_core_niu", "gpll", CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(18), 0, 5, DFLAGS, -+ RK1808_CLKGATE_CON(0), 1, GFLAGS), ++ for (i = 0; i < reg_data->num_cores; i++) { ++ writel(HIWORD_UPDATE(alt_div, reg_data->div_core_mask[i], ++ reg_data->div_core_shift[i]), ++ cpuclk->reg_base + reg_data->core_reg[i]); ++ } ++ } + -+ /* -+ * Clock-Architecture Diagram 3 -+ */ ++ if (IS_ENABLED(CONFIG_ROCKCHIP_CLK_BOOST)) ++ rockchip_boost_add_core_div(cpuclk->pll_hw, alt_prate); + -+ COMPOSITE(ACLK_GIC_PRE, "aclk_gic_pre", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(15), 11, 1, MFLAGS, 12, 4, DFLAGS, -+ RK1808_CLKGATE_CON(1), 0, GFLAGS), -+ GATE(0, "aclk_gic_niu", "aclk_gic_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(1), 1, GFLAGS), -+ GATE(ACLK_GIC, "aclk_gic", "aclk_gic_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(1), 2, GFLAGS), -+ GATE(0, "aclk_core2gic", "aclk_gic_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(1), 3, GFLAGS), -+ GATE(0, "aclk_gic2core", "aclk_gic_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(1), 4, GFLAGS), -+ GATE(0, "aclk_spinlock", "aclk_gic_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(1), 4, GFLAGS), ++ rockchip_cpuclk_set_pre_muxs(cpuclk, rate); + -+ COMPOSITE(0, "aclk_vpu_pre", mux_gpll_cpll_p, 0, -+ RK1808_CLKSEL_CON(16), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK1808_CLKGATE_CON(8), 8, GFLAGS), -+ COMPOSITE_NOMUX(0, "hclk_vpu_pre", "aclk_vpu_pre", 0, -+ RK1808_CLKSEL_CON(16), 8, 4, DFLAGS, -+ RK1808_CLKGATE_CON(8), 9, GFLAGS), -+ GATE(ACLK_VPU, "aclk_vpu", "aclk_vpu_pre", 0, -+ RK1808_CLKGATE_CON(8), 12, GFLAGS), -+ GATE(0, "aclk_vpu_niu", "aclk_vpu_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(8), 10, GFLAGS), -+ GATE(HCLK_VPU, "hclk_vpu", "hclk_vpu_pre", 0, -+ RK1808_CLKGATE_CON(8), 13, GFLAGS), -+ GATE(0, "hclk_vpu_niu", "hclk_vpu_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(8), 11, GFLAGS), ++ /* select alternate parent */ ++ if (reg_data->mux_core_reg) ++ writel(HIWORD_UPDATE(reg_data->mux_core_alt, ++ reg_data->mux_core_mask, ++ reg_data->mux_core_shift), ++ cpuclk->reg_base + reg_data->mux_core_reg); ++ else ++ writel(HIWORD_UPDATE(reg_data->mux_core_alt, ++ reg_data->mux_core_mask, ++ reg_data->mux_core_shift), ++ cpuclk->reg_base + reg_data->core_reg[0]); + -+ /* -+ * Clock-Architecture Diagram 4 -+ */ -+ COMPOSITE_NOGATE(0, "clk_npu_div", mux_gpll_cpll_p, CLK_OPS_PARENT_ENABLE, -+ RK1808_CLKSEL_CON(1), 8, 2, MFLAGS, 0, 4, DFLAGS), -+ COMPOSITE_NOGATE_HALFDIV(0, "clk_npu_np5", mux_gpll_cpll_p, CLK_OPS_PARENT_ENABLE, -+ RK1808_CLKSEL_CON(1), 10, 2, MFLAGS, 4, 4, DFLAGS), -+ MUX(0, "clk_npu_pre", mux_npu_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(1), 15, 1, MFLAGS), -+ FACTOR(0, "clk_npu_scan", "clk_npu_pre", 0, 1, 2), -+ GATE(SCLK_NPU, "clk_npu", "clk_npu_pre", 0, -+ RK1808_CLKGATE_CON(1), 10, GFLAGS), ++ spin_unlock_irqrestore(cpuclk->lock, flags); ++ return 0; ++} + -+ COMPOSITE(0, "aclk_npu_pre", mux_gpll_cpll_p, 0, -+ RK1808_CLKSEL_CON(2), 14, 1, MFLAGS, 0, 4, DFLAGS, -+ RK1808_CLKGATE_CON(1), 8, GFLAGS), -+ COMPOSITE(0, "hclk_npu_pre", mux_gpll_cpll_p, 0, -+ RK1808_CLKSEL_CON(2), 15, 1, MFLAGS, 8, 4, DFLAGS, -+ RK1808_CLKGATE_CON(1), 9, GFLAGS), -+ GATE(ACLK_NPU, "aclk_npu", "aclk_npu_pre", 0, -+ RK1808_CLKGATE_CON(1), 11, GFLAGS), -+ GATE(0, "aclk_npu_niu", "aclk_npu_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(1), 13, GFLAGS), -+ COMPOSITE_NOMUX(0, "aclk_npu2mem", "aclk_npu_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(2), 4, 4, DFLAGS, -+ RK1808_CLKGATE_CON(1), 15, GFLAGS), -+ GATE(HCLK_NPU, "hclk_npu", "hclk_npu_pre", 0, -+ RK1808_CLKGATE_CON(1), 12, GFLAGS), -+ GATE(0, "hclk_npu_niu", "hclk_npu_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(1), 14, GFLAGS), ++static int rockchip_cpuclk_post_rate_change(struct rockchip_cpuclk *cpuclk, ++ struct clk_notifier_data *ndata) ++{ ++ const struct rockchip_cpuclk_reg_data *reg_data = cpuclk->reg_data; ++ const struct rockchip_cpuclk_rate_table *rate; ++ unsigned long flags; ++ int i = 0; + -+ GATE(SCLK_PVTM_NPU, "clk_pvtm_npu", "xin24m", 0, -+ RK1808_CLKGATE_CON(0), 15, GFLAGS), ++ rate = rockchip_get_cpuclk_settings(cpuclk, ndata->new_rate); ++ if (!rate) { ++ pr_err("%s: Invalid rate : %lu for cpuclk\n", ++ __func__, ndata->new_rate); ++ return -EINVAL; ++ } + -+ COMPOSITE(ACLK_IMEM_PRE, "aclk_imem_pre", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(17), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK1808_CLKGATE_CON(7), 0, GFLAGS), -+ GATE(ACLK_IMEM0, "aclk_imem0", "aclk_imem_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(7), 6, GFLAGS), -+ GATE(0, "aclk_imem0_niu", "aclk_imem_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(7), 10, GFLAGS), -+ GATE(ACLK_IMEM1, "aclk_imem1", "aclk_imem_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(7), 7, GFLAGS), -+ GATE(0, "aclk_imem1_niu", "aclk_imem_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(7), 11, GFLAGS), -+ GATE(ACLK_IMEM2, "aclk_imem2", "aclk_imem_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(7), 8, GFLAGS), -+ GATE(0, "aclk_imem2_niu", "aclk_imem_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(7), 12, GFLAGS), -+ GATE(ACLK_IMEM3, "aclk_imem3", "aclk_imem_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(7), 9, GFLAGS), -+ GATE(0, "aclk_imem3_niu", "aclk_imem_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(7), 13, GFLAGS), ++ spin_lock_irqsave(cpuclk->lock, flags); + -+ COMPOSITE(HSCLK_IMEM, "hsclk_imem", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(17), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(7), 5, GFLAGS), ++ if (ndata->old_rate < ndata->new_rate) ++ rockchip_cpuclk_set_dividers(cpuclk, rate); + + /* -+ * Clock-Architecture Diagram 5 ++ * post-rate change event, re-mux to primary parent and remove dividers. ++ * ++ * NOTE: we do this in a single transaction so we're never dividing the ++ * primary parent by the extra dividers that were needed for the alt. + */ -+ GATE(0, "clk_ddr_mon_timer", "xin24m", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 0, GFLAGS), + -+ GATE(0, "clk_ddr_mon", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 11, GFLAGS), -+ GATE(0, "aclk_split", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 15, GFLAGS), -+ GATE(0, "clk_ddr_msch", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 8, GFLAGS), -+ GATE(0, "clk_ddrdfi_ctl", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 3, GFLAGS), -+ GATE(0, "clk_stdby", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 13, GFLAGS), -+ GATE(0, "aclk_ddrc", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 5, GFLAGS), -+ GATE(0, "clk_core_ddrc", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 6, GFLAGS), ++ if (reg_data->mux_core_reg) ++ writel(HIWORD_UPDATE(reg_data->mux_core_main, ++ reg_data->mux_core_mask, ++ reg_data->mux_core_shift), ++ cpuclk->reg_base + reg_data->mux_core_reg); ++ else ++ writel(HIWORD_UPDATE(reg_data->mux_core_main, ++ reg_data->mux_core_mask, ++ reg_data->mux_core_shift), ++ cpuclk->reg_base + reg_data->core_reg[0]); + -+ GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(8), 5, GFLAGS), -+ GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(8), 6, GFLAGS), ++ rockchip_cpuclk_set_post_muxs(cpuclk, rate); + -+ COMPOSITE_NOGATE(SCLK_DDRCLK, "sclk_ddrc", mux_ddr_p, CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(3), 7, 1, MFLAGS, 0, 5, DFLAGS), -+ FACTOR(0, "clk_ddrphy1x_out", "sclk_ddrc", CLK_IGNORE_UNUSED, 1, 1), ++ /* remove dividers */ ++ for (i = 0; i < reg_data->num_cores; i++) { ++ writel(HIWORD_UPDATE(0, reg_data->div_core_mask[i], ++ reg_data->div_core_shift[i]), ++ cpuclk->reg_base + reg_data->core_reg[i]); ++ } + -+ COMPOSITE_NOMUX(PCLK_DDR, "pclk_ddr", "gpll", CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(3), 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(2), 1, GFLAGS), -+ GATE(PCLK_DDRMON, "pclk_ddrmon", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 10, GFLAGS), -+ GATE(PCLK_DDRC, "pclk_ddrc", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 7, GFLAGS), -+ GATE(PCLK_MSCH, "pclk_msch", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 9, GFLAGS), -+ GATE(PCLK_STDBY, "pclk_stdby", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 12, GFLAGS), -+ GATE(0, "pclk_ddr_grf", "pclk_ddr", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(2), 14, GFLAGS), -+ GATE(0, "pclk_ddrdfi_ctl", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 2, GFLAGS), ++ if (ndata->old_rate > ndata->new_rate) ++ rockchip_cpuclk_set_dividers(cpuclk, rate); + -+ /* -+ * Clock-Architecture Diagram 6 -+ */ ++ if (IS_ENABLED(CONFIG_ROCKCHIP_CLK_BOOST)) ++ rockchip_boost_disable_recovery_sw(cpuclk->pll_hw); + -+ COMPOSITE(HSCLK_VIO, "hsclk_vio", mux_gpll_cpll_p, 0, -+ RK1808_CLKSEL_CON(4), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK1808_CLKGATE_CON(3), 0, GFLAGS), -+ COMPOSITE_NOMUX(LSCLK_VIO, "lsclk_vio", "hsclk_vio", 0, -+ RK1808_CLKSEL_CON(4), 8, 4, DFLAGS, -+ RK1808_CLKGATE_CON(3), 12, GFLAGS), -+ GATE(0, "hsclk_vio_niu", "hsclk_vio", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(4), 0, GFLAGS), -+ GATE(0, "lsclk_vio_niu", "lsclk_vio", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(4), 1, GFLAGS), -+ GATE(ACLK_VOPRAW, "aclk_vopraw", "hsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 2, GFLAGS), -+ GATE(HCLK_VOPRAW, "hclk_vopraw", "lsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 3, GFLAGS), -+ GATE(ACLK_VOPLITE, "aclk_voplite", "hsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 4, GFLAGS), -+ GATE(HCLK_VOPLITE, "hclk_voplite", "lsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 5, GFLAGS), -+ GATE(PCLK_DSI_TX, "pclk_dsi_tx", "lsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 6, GFLAGS), -+ GATE(PCLK_CSI_TX, "pclk_csi_tx", "lsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 7, GFLAGS), -+ GATE(ACLK_RGA, "aclk_rga", "hsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 8, GFLAGS), -+ GATE(HCLK_RGA, "hclk_rga", "lsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 9, GFLAGS), -+ GATE(ACLK_ISP, "aclk_isp", "hsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 13, GFLAGS), -+ GATE(HCLK_ISP, "hclk_isp", "lsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 14, GFLAGS), -+ GATE(ACLK_CIF, "aclk_cif", "hsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 10, GFLAGS), -+ GATE(HCLK_CIF, "hclk_cif", "lsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 11, GFLAGS), -+ GATE(PCLK_CSI2HOST, "pclk_csi2host", "lsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 12, GFLAGS), ++ spin_unlock_irqrestore(cpuclk->lock, flags); ++ return 0; ++} + -+ COMPOSITE(0, "dclk_vopraw_src", mux_cpll_gpll_npll_p, 0, -+ RK1808_CLKSEL_CON(5), 10, 2, MFLAGS, 0, 8, DFLAGS, -+ RK1808_CLKGATE_CON(3), 1, GFLAGS), -+ COMPOSITE_FRACMUX(0, "dclk_vopraw_frac", "dclk_vopraw_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(6), 0, -+ RK1808_CLKGATE_CON(3), 2, GFLAGS, -+ &rk1808_dclk_vopraw_fracmux), -+ GATE(DCLK_VOPRAW, "dclk_vopraw", "dclk_vopraw_mux", 0, -+ RK1808_CLKGATE_CON(3), 3, GFLAGS), ++/* ++ * This clock notifier is called when the frequency of the parent clock ++ * of cpuclk is to be changed. This notifier handles the setting up all ++ * the divider clocks, remux to temporary parent and handling the safe ++ * frequency levels when using temporary parent. ++ */ ++static int rockchip_cpuclk_notifier_cb(struct notifier_block *nb, ++ unsigned long event, void *data) ++{ ++ struct clk_notifier_data *ndata = data; ++ struct rockchip_cpuclk *cpuclk = to_rockchip_cpuclk_nb(nb); ++ int ret = 0; + -+ COMPOSITE(0, "dclk_voplite_src", mux_cpll_gpll_npll_p, 0, -+ RK1808_CLKSEL_CON(7), 10, 2, MFLAGS, 0, 8, DFLAGS, -+ RK1808_CLKGATE_CON(3), 4, GFLAGS), -+ COMPOSITE_FRACMUX(0, "dclk_voplite_frac", "dclk_voplite_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(8), 0, -+ RK1808_CLKGATE_CON(3), 5, GFLAGS, -+ &rk1808_dclk_voplite_fracmux), -+ GATE(DCLK_VOPLITE, "dclk_voplite", "dclk_voplite_mux", 0, -+ RK1808_CLKGATE_CON(3), 6, GFLAGS), ++ pr_debug("%s: event %lu, old_rate %lu, new_rate: %lu\n", ++ __func__, event, ndata->old_rate, ndata->new_rate); ++ if (event == PRE_RATE_CHANGE) ++ ret = rockchip_cpuclk_pre_rate_change(cpuclk, ndata); ++ else if (event == POST_RATE_CHANGE) ++ ret = rockchip_cpuclk_post_rate_change(cpuclk, ndata); + -+ COMPOSITE_NOMUX(SCLK_TXESC, "clk_txesc", "gpll", 0, -+ RK1808_CLKSEL_CON(9), 0, 12, DFLAGS, -+ RK1808_CLKGATE_CON(3), 7, GFLAGS), ++ return notifier_from_errno(ret); ++} + -+ COMPOSITE(SCLK_RGA, "clk_rga", mux_gpll_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(10), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK1808_CLKGATE_CON(3), 8, GFLAGS), ++struct clk *rockchip_clk_register_cpuclk(const char *name, ++ u8 num_parents, ++ struct clk *parent, struct clk *alt_parent, ++ const struct rockchip_cpuclk_reg_data *reg_data, ++ const struct rockchip_cpuclk_rate_table *rates, ++ int nrates, void __iomem *reg_base, spinlock_t *lock) ++{ ++ struct rockchip_cpuclk *cpuclk; ++ struct clk_init_data init; ++ struct clk *clk, *cclk, *pll_clk; ++ const char *parent_name; ++ int ret; + -+ COMPOSITE(SCLK_ISP, "clk_isp", mux_gpll_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(10), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(3), 10, GFLAGS), ++ if (num_parents < 2) { ++ pr_err("%s: needs at least two parent clocks\n", __func__); ++ return ERR_PTR(-EINVAL); ++ } + -+ COMPOSITE(DCLK_CIF, "dclk_cif", mux_cpll_gpll_npll_p, 0, -+ RK1808_CLKSEL_CON(11), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(3), 11, GFLAGS), ++ if (IS_ERR(parent) || IS_ERR(alt_parent)) { ++ pr_err("%s: invalid parent clock(s)\n", __func__); ++ return ERR_PTR(-EINVAL); ++ } + -+ COMPOSITE(SCLK_CIF_OUT, "clk_cif_out", mux_24m_npll_gpll_usb480m_p, 0, -+ RK1808_CLKSEL_CON(11), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK1808_CLKGATE_CON(3), 9, GFLAGS), ++ cpuclk = kzalloc(sizeof(*cpuclk), GFP_KERNEL); ++ if (!cpuclk) ++ return ERR_PTR(-ENOMEM); + -+ /* -+ * Clock-Architecture Diagram 7 -+ */ ++ parent_name = clk_hw_get_name(__clk_get_hw(parent)); ++ init.name = name; ++ init.parent_names = &parent_name; ++ init.num_parents = 1; ++ init.ops = &rockchip_cpuclk_ops; + -+ /* PD_PCIE */ -+ COMPOSITE_NODIV(0, "clk_pcie_src", mux_gpll_cpll_p, 0, -+ RK1808_CLKSEL_CON(12), 15, 1, MFLAGS, -+ RK1808_CLKGATE_CON(5), 0, GFLAGS), -+ DIV(HSCLK_PCIE, "hsclk_pcie", "clk_pcie_src", 0, -+ RK1808_CLKSEL_CON(12), 0, 5, DFLAGS), -+ DIV(LSCLK_PCIE, "lsclk_pcie", "clk_pcie_src", 0, -+ RK1808_CLKSEL_CON(12), 8, 5, DFLAGS), -+ GATE(0, "hsclk_pcie_niu", "hsclk_pcie", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(6), 0, GFLAGS), -+ GATE(0, "lsclk_pcie_niu", "lsclk_pcie", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(6), 1, GFLAGS), -+ GATE(0, "pclk_pcie_grf", "lsclk_pcie", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(6), 5, GFLAGS), -+ GATE(ACLK_USB3OTG, "aclk_usb3otg", "hsclk_pcie", 0, -+ RK1808_CLKGATE_CON(6), 6, GFLAGS), -+ GATE(HCLK_HOST, "hclk_host", "lsclk_pcie", 0, -+ RK1808_CLKGATE_CON(6), 7, GFLAGS), -+ GATE(HCLK_HOST_ARB, "hclk_host_arb", "lsclk_pcie", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(6), 8, GFLAGS), ++ /* only allow rate changes when we have a rate table */ ++ init.flags = (nrates > 0) ? CLK_SET_RATE_PARENT : 0; + -+ COMPOSITE(ACLK_PCIE, "aclk_pcie", mux_gpll_cpll_p, 0, -+ RK1808_CLKSEL_CON(15), 8, 1, MFLAGS, 0, 4, DFLAGS, -+ RK1808_CLKGATE_CON(5), 5, GFLAGS), -+ DIV(0, "pclk_pcie_pre", "aclk_pcie", 0, -+ RK1808_CLKSEL_CON(15), 4, 4, DFLAGS), -+ GATE(0, "aclk_pcie_niu", "aclk_pcie", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(6), 10, GFLAGS), -+ GATE(ACLK_PCIE_MST, "aclk_pcie_mst", "aclk_pcie", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(6), 2, GFLAGS), -+ GATE(ACLK_PCIE_SLV, "aclk_pcie_slv", "aclk_pcie", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(6), 3, GFLAGS), -+ GATE(0, "pclk_pcie_niu", "pclk_pcie_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(6), 11, GFLAGS), -+ GATE(0, "pclk_pcie_dbi", "pclk_pcie_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(6), 4, GFLAGS), -+ GATE(PCLK_PCIE, "pclk_pcie", "pclk_pcie_pre", 0, -+ RK1808_CLKGATE_CON(6), 9, GFLAGS), ++ /* disallow automatic parent changes by ccf */ ++ init.flags |= CLK_SET_RATE_NO_REPARENT; + -+ COMPOSITE(0, "clk_pcie_aux_src", mux_cpll_gpll_npll_p, 0, -+ RK1808_CLKSEL_CON(14), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(5), 3, GFLAGS), -+ COMPOSITE_NODIV(SCLK_PCIE_AUX, "clk_pcie_aux", mux_pcie_aux_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(14), 12, 1, MFLAGS, -+ RK1808_CLKGATE_CON(5), 4, GFLAGS), ++ init.flags |= CLK_GET_RATE_NOCACHE; + -+ GATE(SCLK_USB3_OTG0_REF, "clk_usb3_otg0_ref", "xin24m", 0, -+ RK1808_CLKGATE_CON(5), 1, GFLAGS), ++ cpuclk->reg_base = reg_base; ++ cpuclk->lock = lock; ++ cpuclk->reg_data = reg_data; ++ cpuclk->clk_nb.notifier_call = rockchip_cpuclk_notifier_cb; ++ cpuclk->hw.init = &init; ++ if (IS_ENABLED(CONFIG_ROCKCHIP_CLK_BOOST) && reg_data->pll_name) { ++ pll_clk = clk_get_parent(parent); ++ if (!pll_clk) { ++ pr_err("%s: could not lookup pll clock: (%s)\n", ++ __func__, reg_data->pll_name); ++ ret = -EINVAL; ++ goto free_cpuclk; ++ } ++ cpuclk->pll_hw = __clk_get_hw(pll_clk); ++ rockchip_boost_init(cpuclk->pll_hw); ++ } + -+ COMPOSITE(SCLK_USB3_OTG0_SUSPEND, "clk_usb3_otg0_suspend", mux_usb3_otg0_suspend_p, 0, -+ RK1808_CLKSEL_CON(13), 12, 1, MFLAGS, 0, 10, DFLAGS, -+ RK1808_CLKGATE_CON(5), 2, GFLAGS), ++ cpuclk->alt_parent = alt_parent; ++ if (!cpuclk->alt_parent) { ++ pr_err("%s: could not lookup alternate parent: (%d)\n", ++ __func__, reg_data->mux_core_alt); ++ ret = -EINVAL; ++ goto free_cpuclk; ++ } + -+ /* -+ * Clock-Architecture Diagram 8 -+ */ ++ ret = clk_prepare_enable(cpuclk->alt_parent); ++ if (ret) { ++ pr_err("%s: could not enable alternate parent\n", ++ __func__); ++ goto free_cpuclk; ++ } + -+ /* PD_PHP */ ++ clk = parent; ++ if (!clk) { ++ pr_err("%s: could not lookup parent clock: (%d) %s\n", ++ __func__, reg_data->mux_core_main, ++ parent_name); ++ ret = -EINVAL; ++ goto free_alt_parent; ++ } + -+ COMPOSITE_NODIV(0, "clk_peri_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(19), 15, 1, MFLAGS, -+ RK1808_CLKGATE_CON(8), 0, GFLAGS), -+ COMPOSITE_NOMUX(MSCLK_PERI, "msclk_peri", "clk_peri_src", CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(19), 0, 5, DFLAGS, -+ RK1808_CLKGATE_CON(8), 1, GFLAGS), -+ COMPOSITE_NOMUX(LSCLK_PERI, "lsclk_peri", "clk_peri_src", CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(19), 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(8), 2, GFLAGS), -+ GATE(0, "msclk_peri_niu", "msclk_peri", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(8), 3, GFLAGS), -+ GATE(0, "lsclk_peri_niu", "lsclk_peri", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(8), 4, GFLAGS), ++ ret = clk_notifier_register(clk, &cpuclk->clk_nb); ++ if (ret) { ++ pr_err("%s: failed to register clock notifier for %s\n", ++ __func__, name); ++ goto free_alt_parent; ++ } + -+ /* PD_MMC */ ++ if (nrates > 0) { ++ cpuclk->rate_count = nrates; ++ cpuclk->rate_table = kmemdup(rates, ++ sizeof(*rates) * nrates, ++ GFP_KERNEL); ++ if (!cpuclk->rate_table) { ++ ret = -ENOMEM; ++ goto unregister_notifier; ++ } ++ } + -+ GATE(0, "hclk_mmc_sfc", "msclk_peri", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(9), 0, GFLAGS), -+ GATE(0, "hclk_mmc_sfc_niu", "hclk_mmc_sfc", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(9), 11, GFLAGS), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_mmc_sfc", 0, -+ RK1808_CLKGATE_CON(9), 12, GFLAGS), -+ GATE(HCLK_SFC, "hclk_sfc", "hclk_mmc_sfc", 0, -+ RK1808_CLKGATE_CON(9), 13, GFLAGS), ++ cclk = clk_register(NULL, &cpuclk->hw); ++ if (IS_ERR(cclk)) { ++ pr_err("%s: could not register cpuclk %s\n", __func__, name); ++ ret = PTR_ERR(cclk); ++ goto free_rate_table; ++ } + -+ COMPOSITE(SCLK_SDIO_DIV, "clk_sdio_div", mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(22), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ RK1808_CLKGATE_CON(9), 1, GFLAGS), -+ COMPOSITE_DIV_OFFSET(SCLK_SDIO_DIV50, "clk_sdio_div50", -+ mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(22), 14, 2, MFLAGS, -+ RK1808_CLKSEL_CON(23), 0, 8, DFLAGS, -+ RK1808_CLKGATE_CON(9), 2, GFLAGS), -+ COMPOSITE_NODIV(SCLK_SDIO, "clk_sdio", mux_sdio_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK1808_CLKSEL_CON(23), 15, 1, MFLAGS, -+ RK1808_CLKGATE_CON(9), 3, GFLAGS), ++ return cclk; + -+ MMC(SCLK_SDIO_DRV, "sdio_drv", "clk_sdio", RK1808_SDIO_CON0, 1), -+ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "clk_sdio", RK1808_SDIO_CON1, 1), ++free_rate_table: ++ kfree(cpuclk->rate_table); ++unregister_notifier: ++ clk_notifier_unregister(clk, &cpuclk->clk_nb); ++free_alt_parent: ++ clk_disable_unprepare(cpuclk->alt_parent); ++free_cpuclk: ++ kfree(cpuclk); ++ return ERR_PTR(ret); ++} + -+ COMPOSITE(SCLK_EMMC_DIV, "clk_emmc_div", -+ mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(24), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ RK1808_CLKGATE_CON(9), 4, GFLAGS), -+ COMPOSITE_DIV_OFFSET(SCLK_EMMC_DIV50, "clk_emmc_div50", mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(24), 14, 2, MFLAGS, -+ RK1808_CLKSEL_CON(25), 0, 8, DFLAGS, -+ RK1808_CLKGATE_CON(9), 5, GFLAGS), -+ COMPOSITE_NODIV(SCLK_EMMC, "clk_emmc", mux_emmc_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK1808_CLKSEL_CON(25), 15, 1, MFLAGS, -+ RK1808_CLKGATE_CON(9), 6, GFLAGS), -+ MMC(SCLK_EMMC_DRV, "emmc_drv", "clk_emmc", RK1808_EMMC_CON0, 1), -+ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "clk_emmc", RK1808_EMMC_CON1, 1), ++static int rockchip_cpuclk_v2_pre_rate_change(struct rockchip_cpuclk *cpuclk, ++ struct clk_notifier_data *ndata) ++{ ++ unsigned long new_rate = roundup(ndata->new_rate, 1000); ++ const struct rockchip_cpuclk_rate_table *rate; ++ unsigned long flags; + -+ COMPOSITE(SCLK_SDMMC_DIV, "clk_sdmmc_div", mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(20), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ RK1808_CLKGATE_CON(9), 7, GFLAGS), -+ COMPOSITE_DIV_OFFSET(SCLK_SDMMC_DIV50, "clk_sdmmc_div50", -+ mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(20), 14, 2, MFLAGS, -+ RK1808_CLKSEL_CON(21), 0, 8, DFLAGS, -+ RK1808_CLKGATE_CON(9), 8, GFLAGS), -+ COMPOSITE_NODIV(SCLK_SDMMC, "clk_sdmmc", mux_sdmmc_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK1808_CLKSEL_CON(21), 15, 1, MFLAGS, -+ RK1808_CLKGATE_CON(9), 9, GFLAGS), -+ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "clk_sdmmc", RK1808_SDMMC_CON0, 1), -+ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "clk_sdmmc", RK1808_SDMMC_CON1, 1), ++ rate = rockchip_get_cpuclk_settings(cpuclk, new_rate); ++ if (!rate) { ++ pr_err("%s: Invalid rate : %lu for cpuclk\n", ++ __func__, new_rate); ++ return -EINVAL; ++ } + -+ COMPOSITE(SCLK_SFC, "clk_sfc", mux_gpll_cpll_p, 0, -+ RK1808_CLKSEL_CON(26), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(9), 10, GFLAGS), ++ if (new_rate > ndata->old_rate) { ++ spin_lock_irqsave(cpuclk->lock, flags); ++ rockchip_cpuclk_set_dividers(cpuclk, rate); ++ spin_unlock_irqrestore(cpuclk->lock, flags); ++ } + -+ /* PD_MAC */ ++ return 0; ++} + -+ GATE(0, "pclk_sd_gmac", "lsclk_peri", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(10), 2, GFLAGS), -+ GATE(0, "aclk_sd_gmac", "msclk_peri", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(10), 0, GFLAGS), -+ GATE(0, "hclk_sd_gmac", "msclk_peri", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(10), 1, GFLAGS), -+ GATE(0, "pclk_gmac_niu", "pclk_sd_gmac", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(10), 10, GFLAGS), -+ GATE(PCLK_GMAC, "pclk_gmac", "pclk_sd_gmac", 0, -+ RK1808_CLKGATE_CON(10), 12, GFLAGS), -+ GATE(0, "aclk_gmac_niu", "aclk_sd_gmac", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(10), 8, GFLAGS), -+ GATE(ACLK_GMAC, "aclk_gmac", "aclk_sd_gmac", 0, -+ RK1808_CLKGATE_CON(10), 11, GFLAGS), -+ GATE(0, "hclk_gmac_niu", "hclk_sd_gmac", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(10), 9, GFLAGS), -+ GATE(HCLK_SDIO, "hclk_sdio", "hclk_sd_gmac", 0, -+ RK1808_CLKGATE_CON(10), 13, GFLAGS), -+ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_sd_gmac", 0, -+ RK1808_CLKGATE_CON(10), 14, GFLAGS), ++static int rockchip_cpuclk_v2_post_rate_change(struct rockchip_cpuclk *cpuclk, ++ struct clk_notifier_data *ndata) ++{ ++ unsigned long new_rate = roundup(ndata->new_rate, 1000); ++ const struct rockchip_cpuclk_rate_table *rate; ++ unsigned long flags; + -+ COMPOSITE(SCLK_GMAC_OUT, "clk_gmac_out", mux_cpll_npll_ppll_p, 0, -+ RK1808_CLKSEL_CON(18), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(10), 15, GFLAGS), ++ rate = rockchip_get_cpuclk_settings(cpuclk, new_rate); ++ if (!rate) { ++ pr_err("%s: Invalid rate : %lu for cpuclk\n", ++ __func__, new_rate); ++ return -EINVAL; ++ } + -+ COMPOSITE(SCLK_GMAC_SRC, "clk_gmac_src", mux_cpll_npll_ppll_p, 0, -+ RK1808_CLKSEL_CON(26), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(10), 3, GFLAGS), -+ MUX(SCLK_GMAC, "clk_gmac", mux_gmac_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK1808_CLKSEL_CON(27), 0, 1, MFLAGS), -+ GATE(SCLK_GMAC_REF, "clk_gmac_ref", "clk_gmac", 0, -+ RK1808_CLKGATE_CON(10), 4, GFLAGS), -+ GATE(0, "clk_gmac_tx_src", "clk_gmac", 0, -+ RK1808_CLKGATE_CON(10), 7, GFLAGS), -+ GATE(0, "clk_gmac_rx_src", "clk_gmac", 0, -+ RK1808_CLKGATE_CON(10), 6, GFLAGS), -+ GATE(SCLK_GMAC_REFOUT, "clk_gmac_refout", "clk_gmac", 0, -+ RK1808_CLKGATE_CON(10), 5, GFLAGS), -+ FACTOR(0, "clk_gmac_tx_div5", "clk_gmac_tx_src", 0, 1, 5), -+ FACTOR(0, "clk_gmac_tx_div50", "clk_gmac_tx_src", 0, 1, 50), -+ FACTOR(0, "clk_gmac_rx_div2", "clk_gmac_rx_src", 0, 1, 2), -+ FACTOR(0, "clk_gmac_rx_div20", "clk_gmac_rx_src", 0, 1, 20), -+ MUX(SCLK_GMAC_RGMII_SPEED, "clk_gmac_rgmii_speed", mux_gmac_rgmii_speed_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(27), 2, 2, MFLAGS), -+ MUX(SCLK_GMAC_RMII_SPEED, "clk_gmac_rmii_speed", mux_gmac_rmii_speed_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(27), 1, 1, MFLAGS), -+ MUX(SCLK_GMAC_RX_TX, "clk_gmac_rx_tx", mux_gmac_rx_tx_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(27), 4, 1, MFLAGS), ++ if (new_rate < ndata->old_rate) { ++ spin_lock_irqsave(cpuclk->lock, flags); ++ rockchip_cpuclk_set_dividers(cpuclk, rate); ++ spin_unlock_irqrestore(cpuclk->lock, flags); ++ } + -+ /* -+ * Clock-Architecture Diagram 9 -+ */ ++ return 0; ++} + -+ /* PD_BUS */ ++static int rockchip_cpuclk_v2_notifier_cb(struct notifier_block *nb, ++ unsigned long event, void *data) ++{ ++ struct clk_notifier_data *ndata = data; ++ struct rockchip_cpuclk *cpuclk = to_rockchip_cpuclk_nb(nb); ++ int ret = 0; + -+ COMPOSITE_NODIV(0, "clk_bus_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(27), 15, 1, MFLAGS, -+ RK1808_CLKGATE_CON(11), 0, GFLAGS), -+ COMPOSITE_NOMUX(HSCLK_BUS_PRE, "hsclk_bus_pre", "clk_bus_src", CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(27), 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(11), 1, GFLAGS), -+ COMPOSITE_NOMUX(MSCLK_BUS_PRE, "msclk_bus_pre", "clk_bus_src", CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(28), 0, 5, DFLAGS, -+ RK1808_CLKGATE_CON(11), 2, GFLAGS), -+ COMPOSITE_NOMUX(LSCLK_BUS_PRE, "lsclk_bus_pre", "clk_bus_src", CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(28), 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(11), 3, GFLAGS), -+ GATE(0, "hsclk_bus_niu", "hsclk_bus_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(15), 0, GFLAGS), -+ GATE(0, "msclk_bus_niu", "msclk_bus_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(15), 1, GFLAGS), -+ GATE(0, "msclk_sub", "msclk_bus_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(15), 2, GFLAGS), -+ GATE(ACLK_DMAC, "aclk_dmac", "msclk_bus_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(14), 15, GFLAGS), -+ GATE(HCLK_ROM, "hclk_rom", "msclk_bus_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(15), 4, GFLAGS), -+ GATE(ACLK_CRYPTO, "aclk_crypto", "msclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 5, GFLAGS), -+ GATE(HCLK_CRYPTO, "hclk_crypto", "msclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 6, GFLAGS), -+ GATE(ACLK_DCF, "aclk_dcf", "msclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 7, GFLAGS), -+ GATE(0, "lsclk_bus_niu", "lsclk_bus_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(15), 3, GFLAGS), -+ GATE(PCLK_DCF, "pclk_dcf", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 8, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 9, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 10, GFLAGS), -+ GATE(PCLK_UART3, "pclk_uart3", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 11, GFLAGS), -+ GATE(PCLK_UART4, "pclk_uart4", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 12, GFLAGS), -+ GATE(PCLK_UART5, "pclk_uart5", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 13, GFLAGS), -+ GATE(PCLK_UART6, "pclk_uart6", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 14, GFLAGS), -+ GATE(PCLK_UART7, "pclk_uart7", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 15, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_i2c1", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 0, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_i2c2", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 1, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 2, GFLAGS), -+ GATE(PCLK_I2C4, "pclk_i2c4", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(17), 4, GFLAGS), -+ GATE(PCLK_I2C5, "pclk_i2c5", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(17), 5, GFLAGS), -+ GATE(PCLK_SPI0, "pclk_spi0", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 3, GFLAGS), -+ GATE(PCLK_SPI1, "pclk_spi1", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 4, GFLAGS), -+ GATE(PCLK_SPI2, "pclk_spi2", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 5, GFLAGS), -+ GATE(PCLK_TSADC, "pclk_tsadc", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 9, GFLAGS), -+ GATE(PCLK_SARADC, "pclk_saradc", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 10, GFLAGS), -+ GATE(PCLK_EFUSE, "pclk_efuse", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 11, GFLAGS), -+ GATE(PCLK_GPIO1, "pclk_gpio1", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 12, GFLAGS), -+ GATE(PCLK_GPIO2, "pclk_gpio2", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 13, GFLAGS), -+ GATE(PCLK_GPIO3, "pclk_gpio3", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 14, GFLAGS), -+ GATE(PCLK_GPIO4, "pclk_gpio4", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 15, GFLAGS), -+ GATE(PCLK_PWM0, "pclk_pwm0", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 6, GFLAGS), -+ GATE(PCLK_PWM1, "pclk_pwm1", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 7, GFLAGS), -+ GATE(PCLK_PWM2, "pclk_pwm2", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 8, GFLAGS), -+ GATE(PCLK_TIMER, "pclk_timer", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(17), 0, GFLAGS), -+ GATE(PCLK_WDT, "pclk_wdt", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(17), 1, GFLAGS), -+ GATE(0, "pclk_grf", "lsclk_bus_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(17), 2, GFLAGS), -+ GATE(0, "pclk_sgrf", "lsclk_bus_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(17), 3, GFLAGS), -+ GATE(0, "hclk_audio_pre", "msclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(17), 8, GFLAGS), -+ GATE(0, "pclk_top_pre", "lsclk_bus_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(11), 4, GFLAGS), ++ pr_debug("%s: event %lu, old_rate %lu, new_rate: %lu\n", ++ __func__, event, ndata->old_rate, ndata->new_rate); ++ if (event == PRE_RATE_CHANGE) ++ ret = rockchip_cpuclk_v2_pre_rate_change(cpuclk, ndata); ++ else if (event == POST_RATE_CHANGE) ++ ret = rockchip_cpuclk_v2_post_rate_change(cpuclk, ndata); + -+ COMPOSITE(SCLK_CRYPTO, "clk_crypto", mux_gpll_cpll_p, 0, -+ RK1808_CLKSEL_CON(29), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK1808_CLKGATE_CON(11), 5, GFLAGS), -+ COMPOSITE(SCLK_CRYPTO_APK, "clk_crypto_apk", mux_gpll_cpll_p, 0, -+ RK1808_CLKSEL_CON(29), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(11), 6, GFLAGS), ++ return notifier_from_errno(ret); ++} + -+ COMPOSITE(0, "clk_uart1_src", mux_gpll_usb480m_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(38), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(11), 8, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart1_np5", "clk_uart1_src", 0, -+ RK1808_CLKSEL_CON(39), 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(11), 9, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(40), 0, -+ RK1808_CLKGATE_CON(11), 10, GFLAGS, -+ &rk1808_uart1_fracmux), -+ GATE(SCLK_UART1, "clk_uart1", "clk_uart1_mux", 0, -+ RK1808_CLKGATE_CON(11), 11, GFLAGS), ++struct clk *rockchip_clk_register_cpuclk_v2(const char *name, ++ const char *const *parent_names, ++ u8 num_parents, void __iomem *base, ++ int muxdiv_offset, u8 mux_shift, ++ u8 mux_width, u8 mux_flags, ++ int div_offset, u8 div_shift, ++ u8 div_width, u8 div_flags, ++ unsigned long flags, spinlock_t *lock, ++ const struct rockchip_cpuclk_rate_table *rates, ++ int nrates) ++{ ++ struct rockchip_cpuclk *cpuclk; ++ struct clk_hw *hw; ++ struct clk_mux *mux = NULL; ++ struct clk_divider *div = NULL; ++ const struct clk_ops *mux_ops = NULL, *div_ops = NULL; ++ int ret; + -+ COMPOSITE(0, "clk_uart2_src", mux_gpll_usb480m_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(41), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(11), 12, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart2_np5", "clk_uart2_src", 0, -+ RK1808_CLKSEL_CON(42), 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(11), 13, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(43), 0, -+ RK1808_CLKGATE_CON(11), 14, GFLAGS, -+ &rk1808_uart2_fracmux), -+ GATE(SCLK_UART2, "clk_uart2", "clk_uart2_mux", 0, -+ RK1808_CLKGATE_CON(11), 15, GFLAGS), ++ if (num_parents > 1) { ++ mux = kzalloc(sizeof(*mux), GFP_KERNEL); ++ if (!mux) ++ return ERR_PTR(-ENOMEM); + -+ COMPOSITE(0, "clk_uart3_src", mux_gpll_usb480m_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(44), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(12), 0, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart3_np5", "clk_uart3_src", 0, -+ RK1808_CLKSEL_CON(45), 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(12), 1, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(46), 0, -+ RK1808_CLKGATE_CON(12), 2, GFLAGS, -+ &rk1808_uart3_fracmux), -+ GATE(SCLK_UART3, "clk_uart3", "clk_uart3_mux", 0, -+ RK1808_CLKGATE_CON(12), 3, GFLAGS), ++ mux->reg = base + muxdiv_offset; ++ mux->shift = mux_shift; ++ mux->mask = BIT(mux_width) - 1; ++ mux->flags = mux_flags; ++ mux->lock = lock; ++ mux_ops = (mux_flags & CLK_MUX_READ_ONLY) ? &clk_mux_ro_ops ++ : &clk_mux_ops; ++ } + -+ COMPOSITE(0, "clk_uart4_src", mux_gpll_usb480m_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(47), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(12), 4, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart4_np5", "clk_uart4_src", 0, -+ RK1808_CLKSEL_CON(48), 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(12), 5, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(49), 0, -+ RK1808_CLKGATE_CON(12), 6, GFLAGS, -+ &rk1808_uart4_fracmux), -+ GATE(SCLK_UART4, "clk_uart4", "clk_uart4_mux", 0, -+ RK1808_CLKGATE_CON(12), 7, GFLAGS), ++ if (div_width > 0) { ++ div = kzalloc(sizeof(*div), GFP_KERNEL); ++ if (!div) { ++ ret = -ENOMEM; ++ goto free_mux; ++ } + -+ COMPOSITE(0, "clk_uart5_src", mux_gpll_usb480m_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(50), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(12), 8, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart5_np5", "clk_uart5_src", 0, -+ RK1808_CLKSEL_CON(51), 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(12), 9, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(52), 0, -+ RK1808_CLKGATE_CON(12), 10, GFLAGS, -+ &rk1808_uart5_fracmux), -+ GATE(SCLK_UART5, "clk_uart5", "clk_uart5_mux", 0, -+ RK1808_CLKGATE_CON(12), 11, GFLAGS), ++ div->flags = div_flags; ++ if (div_offset) ++ div->reg = base + div_offset; ++ else ++ div->reg = base + muxdiv_offset; ++ div->shift = div_shift; ++ div->width = div_width; ++ div->lock = lock; ++ div_ops = (div_flags & CLK_DIVIDER_READ_ONLY) ++ ? &clk_divider_ro_ops ++ : &clk_divider_ops; ++ } + -+ COMPOSITE(0, "clk_uart6_src", mux_gpll_usb480m_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(53), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(12), 12, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart6_np5", "clk_uart6_src", 0, -+ RK1808_CLKSEL_CON(54), 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(12), 13, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart6_frac", "clk_uart6_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(55), 0, -+ RK1808_CLKGATE_CON(12), 14, GFLAGS, -+ &rk1808_uart6_fracmux), -+ GATE(SCLK_UART6, "clk_uart6", "clk_uart6_mux", 0, -+ RK1808_CLKGATE_CON(12), 15, GFLAGS), ++ hw = clk_hw_register_composite(NULL, name, parent_names, num_parents, ++ mux ? &mux->hw : NULL, mux_ops, ++ div ? &div->hw : NULL, div_ops, ++ NULL, NULL, flags); ++ if (IS_ERR(hw)) { ++ ret = PTR_ERR(hw); ++ goto free_div; ++ } + -+ COMPOSITE(0, "clk_uart7_src", mux_gpll_usb480m_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(56), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 0, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart7_np5", "clk_uart7_src", 0, -+ RK1808_CLKSEL_CON(57), 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 1, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart7_frac", "clk_uart7_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(58), 0, -+ RK1808_CLKGATE_CON(13), 2, GFLAGS, -+ &rk1808_uart7_fracmux), -+ GATE(SCLK_UART7, "clk_uart7", "clk_uart7_mux", 0, -+ RK1808_CLKGATE_CON(13), 3, GFLAGS), ++ cpuclk = kzalloc(sizeof(*cpuclk), GFP_KERNEL); ++ if (!cpuclk) { ++ ret = -ENOMEM; ++ goto unregister_clk; ++ } + -+ COMPOSITE(SCLK_I2C1, "clk_i2c1", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(59), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 4, GFLAGS), -+ COMPOSITE(SCLK_I2C2, "clk_i2c2", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(59), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 5, GFLAGS), -+ COMPOSITE(SCLK_I2C3, "clk_i2c3", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(60), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 6, GFLAGS), -+ COMPOSITE(SCLK_I2C4, "clk_i2c4", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(71), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(14), 6, GFLAGS), -+ COMPOSITE(SCLK_I2C5, "clk_i2c5", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(71), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK1808_CLKGATE_CON(14), 7, GFLAGS), ++ cpuclk->reg_base = base; ++ cpuclk->lock = lock; ++ cpuclk->clk_nb.notifier_call = rockchip_cpuclk_v2_notifier_cb; ++ ret = clk_notifier_register(hw->clk, &cpuclk->clk_nb); ++ if (ret) { ++ pr_err("%s: failed to register clock notifier for %s\n", ++ __func__, name); ++ goto free_cpuclk; ++ } + -+ COMPOSITE(SCLK_SPI0, "clk_spi0", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(60), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 7, GFLAGS), -+ COMPOSITE(SCLK_SPI1, "clk_spi1", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(61), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 8, GFLAGS), -+ COMPOSITE(SCLK_SPI2, "clk_spi2", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(61), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 9, GFLAGS), ++ if (nrates > 0) { ++ cpuclk->rate_count = nrates; ++ cpuclk->rate_table = kmemdup(rates, ++ sizeof(*rates) * nrates, ++ GFP_KERNEL); ++ if (!cpuclk->rate_table) { ++ ret = -ENOMEM; ++ goto free_cpuclk; ++ } ++ } + -+ COMPOSITE_NOMUX(SCLK_TSADC, "clk_tsadc", "xin24m", 0, -+ RK1808_CLKSEL_CON(62), 0, 11, DFLAGS, -+ RK1808_CLKGATE_CON(13), 13, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_SARADC, "clk_saradc", "xin24m", 0, -+ RK1808_CLKSEL_CON(63), 0, 11, DFLAGS, -+ RK1808_CLKGATE_CON(13), 14, GFLAGS), ++ return hw->clk; + -+ COMPOSITE(SCLK_EFUSE_S, "clk_efuse_s", mux_gpll_cpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(64), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK1808_CLKGATE_CON(14), 0, GFLAGS), -+ COMPOSITE(SCLK_EFUSE_NS, "clk_efuse_ns", mux_gpll_cpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(64), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK1808_CLKGATE_CON(14), 1, GFLAGS), ++free_cpuclk: ++ kfree(cpuclk); ++unregister_clk: ++ clk_hw_unregister_composite(hw); ++free_div: ++ kfree(div); ++free_mux: ++ kfree(mux); + -+ COMPOSITE(DBCLK_GPIO1, "dbclk_gpio1", mux_xin24m_32k_p, 0, -+ RK1808_CLKSEL_CON(65), 15, 1, MFLAGS, 0, 11, DFLAGS, -+ RK1808_CLKGATE_CON(14), 2, GFLAGS), -+ COMPOSITE(DBCLK_GPIO2, "dbclk_gpio2", mux_xin24m_32k_p, 0, -+ RK1808_CLKSEL_CON(66), 15, 1, MFLAGS, 0, 11, DFLAGS, -+ RK1808_CLKGATE_CON(14), 3, GFLAGS), -+ COMPOSITE(DBCLK_GPIO3, "dbclk_gpio3", mux_xin24m_32k_p, 0, -+ RK1808_CLKSEL_CON(67), 15, 1, MFLAGS, 0, 11, DFLAGS, -+ RK1808_CLKGATE_CON(14), 4, GFLAGS), -+ COMPOSITE(DBCLK_GPIO4, "dbclk_gpio4", mux_xin24m_32k_p, 0, -+ RK1808_CLKSEL_CON(68), 15, 1, MFLAGS, 0, 11, DFLAGS, -+ RK1808_CLKGATE_CON(14), 5, GFLAGS), ++ return ERR_PTR(ret); ++} +diff --git a/drivers/clk/rockchip-oh/clk-dclk-divider.c b/drivers/clk/rockchip-oh/clk-dclk-divider.c +new file mode 100644 +index 000000000..88cf7ab82 +--- /dev/null ++++ b/drivers/clk/rockchip-oh/clk-dclk-divider.c +@@ -0,0 +1,168 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (c) 2019 Fuzhou Rockchip Electronics Co., Ltd ++ */ + -+ COMPOSITE(SCLK_PWM0, "clk_pwm0", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(69), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 10, GFLAGS), -+ COMPOSITE(SCLK_PWM1, "clk_pwm1", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(69), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 11, GFLAGS), -+ COMPOSITE(SCLK_PWM2, "clk_pwm2", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(70), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 12, GFLAGS), ++#include ++#include ++#include ++#include ++#include ++#include "clk.h" + -+ GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0, -+ RK1808_CLKGATE_CON(14), 8, GFLAGS), -+ GATE(SCLK_TIMER1, "sclk_timer1", "xin24m", 0, -+ RK1808_CLKGATE_CON(14), 9, GFLAGS), -+ GATE(SCLK_TIMER2, "sclk_timer2", "xin24m", 0, -+ RK1808_CLKGATE_CON(14), 10, GFLAGS), -+ GATE(SCLK_TIMER3, "sclk_timer3", "xin24m", 0, -+ RK1808_CLKGATE_CON(14), 11, GFLAGS), -+ GATE(SCLK_TIMER4, "sclk_timer4", "xin24m", 0, -+ RK1808_CLKGATE_CON(14), 12, GFLAGS), -+ GATE(SCLK_TIMER5, "sclk_timer5", "xin24m", 0, -+ RK1808_CLKGATE_CON(14), 13, GFLAGS), ++#define div_mask(width) ((1 << (width)) - 1) + -+ /* -+ * Clock-Architecture Diagram 10 -+ */ ++static unsigned long clk_dclk_recalc_rate(struct clk_hw *hw, ++ unsigned long parent_rate) ++{ ++ struct clk_divider *divider = to_clk_divider(hw); ++ unsigned int val; + -+ /* PD_AUDIO */ ++ val = clk_readl(divider->reg) >> divider->shift; ++ val &= div_mask(divider->width); + -+ GATE(0, "hclk_audio_niu", "hclk_audio_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(18), 11, GFLAGS), -+ GATE(HCLK_VAD, "hclk_vad", "hclk_audio_pre", 0, -+ RK1808_CLKGATE_CON(18), 12, GFLAGS), -+ GATE(HCLK_PDM, "hclk_pdm", "hclk_audio_pre", 0, -+ RK1808_CLKGATE_CON(18), 13, GFLAGS), -+ GATE(HCLK_I2S0_8CH, "hclk_i2s0_8ch", "hclk_audio_pre", 0, -+ RK1808_CLKGATE_CON(18), 14, GFLAGS), -+ GATE(HCLK_I2S1_2CH, "hclk_i2s1_2ch", "hclk_audio_pre", 0, -+ RK1808_CLKGATE_CON(18), 15, GFLAGS), ++ return DIV_ROUND_UP_ULL(((u64)parent_rate), val + 1); ++} + -+ COMPOSITE(0, "clk_pdm_src", mux_gpll_xin24m_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(30), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(17), 9, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_pdm_frac", "clk_pdm_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(31), 0, -+ RK1808_CLKGATE_CON(17), 10, GFLAGS, -+ &rk1808_pdm_fracmux), -+ GATE(SCLK_PDM, "clk_pdm", "clk_pdm_mux", 0, -+ RK1808_CLKGATE_CON(17), 11, GFLAGS), ++static long clk_dclk_round_rate(struct clk_hw *hw, unsigned long rate, ++ unsigned long *prate) ++{ ++ struct clk_divider *divider = to_clk_divider(hw); ++ int div, maxdiv = div_mask(divider->width) + 1; + -+ COMPOSITE(SCLK_I2S0_8CH_TX_SRC, "clk_i2s0_8ch_tx_src", mux_gpll_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(32), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(17), 12, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s0_8ch_tx_frac", "clk_i2s0_8ch_tx_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(33), 0, -+ RK1808_CLKGATE_CON(17), 13, GFLAGS, -+ &rk1808_i2s0_8ch_tx_fracmux), -+ COMPOSITE_NODIV(SCLK_I2S0_8CH_TX, "clk_i2s0_8ch_tx", mux_i2s0_8ch_tx_rx_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(32), 12, 1, MFLAGS, -+ RK1808_CLKGATE_CON(17), 14, GFLAGS), -+ COMPOSITE_NODIV(SCLK_I2S0_8CH_TX_OUT, "clk_i2s0_8ch_tx_out", mux_i2s0_8ch_tx_out_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(32), 14, 2, MFLAGS, -+ RK1808_CLKGATE_CON(17), 15, GFLAGS), ++ div = DIV_ROUND_UP_ULL(divider->max_prate, rate); ++ if (div % 2) ++ div = __rounddown_pow_of_two(div); ++ div = div > maxdiv ? maxdiv : div; ++ *prate = div * rate; ++ return rate; ++} + -+ COMPOSITE(SCLK_I2S0_8CH_RX_SRC, "clk_i2s0_8ch_rx_src", mux_gpll_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(34), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(18), 0, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s0_8ch_rx_frac", "clk_i2s0_8ch_rx_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(35), 0, -+ RK1808_CLKGATE_CON(18), 1, GFLAGS, -+ &rk1808_i2s0_8ch_rx_fracmux), -+ COMPOSITE_NODIV(SCLK_I2S0_8CH_RX, "clk_i2s0_8ch_rx", mux_i2s0_8ch_rx_tx_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(34), 12, 1, MFLAGS, -+ RK1808_CLKGATE_CON(18), 2, GFLAGS), -+ COMPOSITE_NODIV(SCLK_I2S0_8CH_RX_OUT, "clk_i2s0_8ch_rx_out", mux_i2s0_8ch_rx_out_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(34), 14, 2, MFLAGS, -+ RK1808_CLKGATE_CON(18), 3, GFLAGS), ++static int clk_dclk_set_rate(struct clk_hw *hw, unsigned long rate, ++ unsigned long parent_rate) ++{ ++ struct clk_divider *divider = to_clk_divider(hw); ++ unsigned int value; ++ unsigned long flags = 0; ++ u32 val; + -+ COMPOSITE(SCLK_I2S1_2CH_SRC, "clk_i2s1_2ch_src", mux_gpll_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(36), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(18), 4, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s1_2ch_frac", "clk_i2s1_2ch_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(37), 0, -+ RK1808_CLKGATE_CON(18), 5, GFLAGS, -+ &rk1808_i2s1_2ch_fracmux), -+ GATE(SCLK_I2S1_2CH, "clk_i2s1_2ch", "clk_i2s1_2ch_mux", 0, -+ RK1808_CLKGATE_CON(18), 6, GFLAGS), -+ COMPOSITE_NODIV(SCLK_I2S1_2CH_OUT, "clk_i2s1_2ch_out", mux_i2s1_2ch_out_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(36), 15, 1, MFLAGS, -+ RK1808_CLKGATE_CON(18), 7, GFLAGS), ++ value = divider_get_val(rate, parent_rate, divider->table, ++ divider->width, divider->flags); + -+ /* -+ * Clock-Architecture Diagram 10 -+ */ ++ if (divider->lock) ++ spin_lock_irqsave(divider->lock, flags); ++ else ++ __acquire(divider->lock); + -+ /* PD_BUS */ ++ if (divider->flags & CLK_DIVIDER_HIWORD_MASK) { ++ val = div_mask(divider->width) << (divider->shift + 16); ++ } else { ++ val = clk_readl(divider->reg); ++ val &= ~(div_mask(divider->width) << divider->shift); ++ } ++ val |= value << divider->shift; ++ clk_writel(val, divider->reg); + -+ GATE(0, "pclk_top_niu", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 0, GFLAGS), -+ GATE(0, "pclk_top_cru", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 1, GFLAGS), -+ GATE(0, "pclk_ddrphy", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 2, GFLAGS), -+ GATE(PCLK_MIPIDSIPHY, "pclk_mipidsiphy", "pclk_top_pre", 0, RK1808_CLKGATE_CON(19), 3, GFLAGS), -+ GATE(PCLK_MIPICSIPHY, "pclk_mipicsiphy", "pclk_top_pre", 0, RK1808_CLKGATE_CON(19), 4, GFLAGS), ++ if (divider->lock) ++ spin_unlock_irqrestore(divider->lock, flags); ++ else ++ __release(divider->lock); + -+ GATE(PCLK_USB3PHY_PIPE, "pclk_usb3phy_pipe", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 6, GFLAGS), -+ GATE(0, "pclk_usb3_grf", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 7, GFLAGS), -+ GATE(0, "pclk_usb_grf", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 8, GFLAGS), ++ return 0; ++} + -+ /* -+ * Clock-Architecture Diagram 11 -+ */ ++const struct clk_ops clk_dclk_divider_ops = { ++ .recalc_rate = clk_dclk_recalc_rate, ++ .round_rate = clk_dclk_round_rate, ++ .set_rate = clk_dclk_set_rate, ++}; ++EXPORT_SYMBOL_GPL(clk_dclk_divider_ops); + -+ /* PD_PMU */ ++/** ++ * Register a clock branch. ++ * Most clock branches have a form like ++ * ++ * src1 --|--\ ++ * |M |--[GATE]-[DIV]- ++ * src2 --|--/ ++ * ++ * sometimes without one of those components. ++ */ ++struct clk *rockchip_clk_register_dclk_branch(const char *name, ++ const char *const *parent_names, ++ u8 num_parents, ++ void __iomem *base, ++ int muxdiv_offset, u8 mux_shift, ++ u8 mux_width, u8 mux_flags, ++ int div_offset, u8 div_shift, ++ u8 div_width, u8 div_flags, ++ struct clk_div_table *div_table, ++ int gate_offset, ++ u8 gate_shift, u8 gate_flags, ++ unsigned long flags, ++ unsigned long max_prate, ++ spinlock_t *lock) ++{ ++ struct clk *clk; ++ struct clk_mux *mux = NULL; ++ struct clk_gate *gate = NULL; ++ struct clk_divider *div = NULL; ++ const struct clk_ops *mux_ops = NULL, *div_ops = NULL, ++ *gate_ops = NULL; + -+ COMPOSITE_FRACMUX(SCLK_RTC32K_FRAC, "clk_rtc32k_frac", "xin24m", CLK_IGNORE_UNUSED, -+ RK1808_PMU_CLKSEL_CON(1), 0, -+ RK1808_PMU_CLKGATE_CON(0), 13, GFLAGS, -+ &rk1808_rtc32k_pmu_fracmux), ++ if (num_parents > 1) { ++ mux = kzalloc(sizeof(*mux), GFP_KERNEL); ++ if (!mux) ++ return ERR_PTR(-ENOMEM); + -+ COMPOSITE_NOMUX(XIN24M_DIV, "xin24m_div", "xin24m", CLK_IGNORE_UNUSED, -+ RK1808_PMU_CLKSEL_CON(0), 8, 5, DFLAGS, -+ RK1808_PMU_CLKGATE_CON(0), 12, GFLAGS), ++ mux->reg = base + muxdiv_offset; ++ mux->shift = mux_shift; ++ mux->mask = BIT(mux_width) - 1; ++ mux->flags = mux_flags; ++ mux->lock = lock; ++ mux_ops = (mux_flags & CLK_MUX_READ_ONLY) ? &clk_mux_ro_ops ++ : &clk_mux_ops; ++ } + -+ COMPOSITE_NOMUX(0, "clk_wifi_pmu_src", "ppll", 0, -+ RK1808_PMU_CLKSEL_CON(2), 8, 6, DFLAGS, -+ RK1808_PMU_CLKGATE_CON(0), 14, GFLAGS), -+ COMPOSITE_NODIV(SCLK_WIFI_PMU, "clk_wifi_pmu", mux_wifi_pmu_p, CLK_SET_RATE_PARENT, -+ RK1808_PMU_CLKSEL_CON(2), 15, 1, MFLAGS, -+ RK1808_PMU_CLKGATE_CON(0), 15, GFLAGS), ++ if (gate_offset >= 0) { ++ gate = kzalloc(sizeof(*gate), GFP_KERNEL); ++ if (!gate) ++ goto err_gate; + -+ COMPOSITE(0, "clk_uart0_pmu_src", mux_gpll_usb480m_cpll_ppll_p, 0, -+ RK1808_PMU_CLKSEL_CON(3), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_PMU_CLKGATE_CON(1), 0, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart0_np5", "clk_uart0_pmu_src", 0, -+ RK1808_PMU_CLKSEL_CON(4), 0, 7, DFLAGS, -+ RK1808_PMU_CLKGATE_CON(1), 1, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart0_frac", "clk_uart0_pmu_src", CLK_SET_RATE_PARENT, -+ RK1808_PMU_CLKSEL_CON(5), 0, -+ RK1808_PMU_CLKGATE_CON(1), 2, GFLAGS, -+ &rk1808_uart0_pmu_fracmux), -+ GATE(SCLK_UART0_PMU, "clk_uart0_pmu", "clk_uart0_pmu_mux", CLK_SET_RATE_PARENT, -+ RK1808_PMU_CLKGATE_CON(1), 3, GFLAGS), ++ gate->flags = gate_flags; ++ gate->reg = base + gate_offset; ++ gate->bit_idx = gate_shift; ++ gate->lock = lock; ++ gate_ops = &clk_gate_ops; ++ } + -+ GATE(SCLK_PVTM_PMU, "clk_pvtm_pmu", "xin24m", 0, -+ RK1808_PMU_CLKGATE_CON(1), 4, GFLAGS), ++ if (div_width > 0) { ++ div = kzalloc(sizeof(*div), GFP_KERNEL); ++ if (!div) ++ goto err_div; + -+ COMPOSITE(SCLK_PMU_I2C0, "clk_pmu_i2c0", mux_ppll_xin24m_p, 0, -+ RK1808_PMU_CLKSEL_CON(7), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK1808_PMU_CLKGATE_CON(1), 5, GFLAGS), ++ div->flags = div_flags; ++ if (div_offset) ++ div->reg = base + div_offset; ++ else ++ div->reg = base + muxdiv_offset; ++ div->shift = div_shift; ++ div->width = div_width; ++ div->lock = lock; ++ div->max_prate = max_prate; ++ div_ops = &clk_dclk_divider_ops; ++ } + -+ COMPOSITE(DBCLK_PMU_GPIO0, "dbclk_gpio0", mux_xin24m_32k_p, 0, -+ RK1808_PMU_CLKSEL_CON(6), 15, 1, MFLAGS, 0, 11, DFLAGS, -+ RK1808_PMU_CLKGATE_CON(1), 6, GFLAGS), ++ clk = clk_register_composite(NULL, name, parent_names, num_parents, ++ mux ? &mux->hw : NULL, mux_ops, ++ div ? &div->hw : NULL, div_ops, ++ gate ? &gate->hw : NULL, gate_ops, ++ flags); + -+ COMPOSITE_NOMUX(SCLK_REF24M_PMU, "clk_ref24m_pmu", "ppll", 0, -+ RK1808_PMU_CLKSEL_CON(2), 0, 6, DFLAGS, -+ RK1808_PMU_CLKGATE_CON(1), 8, GFLAGS), -+ COMPOSITE_NODIV(SCLK_USBPHY_REF, "clk_usbphy_ref", mux_usbphy_ref_p, CLK_SET_RATE_PARENT, -+ RK1808_PMU_CLKSEL_CON(2), 6, 1, MFLAGS, -+ RK1808_PMU_CLKGATE_CON(1), 9, GFLAGS), -+ COMPOSITE_NODIV(SCLK_MIPIDSIPHY_REF, "clk_mipidsiphy_ref", mux_mipidsiphy_ref_p, CLK_SET_RATE_PARENT, -+ RK1808_PMU_CLKSEL_CON(2), 7, 1, MFLAGS, -+ RK1808_PMU_CLKGATE_CON(1), 10, GFLAGS), ++ return clk; ++err_div: ++ kfree(gate); ++err_gate: ++ kfree(mux); ++ return ERR_PTR(-ENOMEM); ++} +diff --git a/drivers/clk/rockchip-oh/clk-ddr.c b/drivers/clk/rockchip-oh/clk-ddr.c +new file mode 100644 +index 000000000..46df75f1a +--- /dev/null ++++ b/drivers/clk/rockchip-oh/clk-ddr.c +@@ -0,0 +1,238 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later ++/* ++ * Copyright (c) 2016 Rockchip Electronics Co. Ltd. ++ * Author: Lin Huang ++ */ + -+ FACTOR(0, "clk_ppll_ph0", "ppll", 0, 1, 2), -+ COMPOSITE_NOMUX(0, "clk_pciephy_src", "clk_ppll_ph0", 0, -+ RK1808_PMU_CLKSEL_CON(7), 0, 2, DFLAGS, -+ RK1808_PMU_CLKGATE_CON(1), 11, GFLAGS), -+ COMPOSITE_NODIV(SCLK_PCIEPHY_REF, "clk_pciephy_ref", mux_pciephy_ref_p, CLK_SET_RATE_PARENT, -+ RK1808_PMU_CLKSEL_CON(7), 4, 1, MFLAGS, -+ RK1808_PMU_CLKGATE_CON(1), 12, GFLAGS), ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_ARM ++#include ++#endif + -+ COMPOSITE_NOMUX(PCLK_PMU_PRE, "pclk_pmu_pre", "ppll", CLK_IS_CRITICAL, -+ RK1808_PMU_CLKSEL_CON(0), 0, 5, DFLAGS, -+ RK1808_PMU_CLKGATE_CON(0), 0, GFLAGS), ++#include "clk.h" + -+ GATE(0, "pclk_pmu_niu", "pclk_pmu_pre", CLK_IS_CRITICAL, RK1808_PMU_CLKGATE_CON(0), 1, GFLAGS), -+ GATE(0, "pclk_pmu_sgrf", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 2, GFLAGS), -+ GATE(0, "pclk_pmu_grf", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 3, GFLAGS), -+ GATE(0, "pclk_pmu", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 4, GFLAGS), -+ GATE(0, "pclk_pmu_mem", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 5, GFLAGS), -+ GATE(PCLK_GPIO0_PMU, "pclk_gpio0_pmu", "pclk_pmu_pre", 0, RK1808_PMU_CLKGATE_CON(0), 6, GFLAGS), -+ GATE(PCLK_UART0_PMU, "pclk_uart0_pmu", "pclk_pmu_pre", 0, RK1808_PMU_CLKGATE_CON(0), 7, GFLAGS), -+ GATE(0, "pclk_cru_pmu", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 8, GFLAGS), -+ GATE(PCLK_I2C0_PMU, "pclk_i2c0_pmu", "pclk_pmu_pre", 0, RK1808_PMU_CLKGATE_CON(0), 9, GFLAGS), ++struct rockchip_ddrclk { ++ struct clk_hw hw; ++ void __iomem *reg_base; ++ int mux_offset; ++ int mux_shift; ++ int mux_width; ++ int div_shift; ++ int div_width; ++ int ddr_flag; ++}; + -+ MUXPMUGRF(SCLK_32K_IOE, "clk_32k_ioe", mux_clk_32k_ioe_p, 0, -+ RK1808_PMUGRF_SOC_CON0, 0, 1, MFLAGS) ++#define to_rockchip_ddrclk_hw(hw) container_of(hw, struct rockchip_ddrclk, hw) ++ ++struct share_params_ddrclk { ++ u32 hz; ++ u32 lcdc_type; +}; + -+static void __iomem *rk1808_cru_base; ++struct rockchip_ddrclk_data { ++ void __iomem *params; ++ int (*dmcfreq_wait_complete)(void); ++}; + -+void rk1808_dump_cru(void) ++static struct rockchip_ddrclk_data ddr_data = {NULL, NULL}; ++ ++void rockchip_set_ddrclk_params(void __iomem *params) +{ -+ if (rk1808_cru_base) { -+ pr_warn("CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk1808_cru_base, -+ 0x500, false); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk1808_cru_base + 0x4000, -+ 0x100, false); -+ } ++ ddr_data.params = params; +} -+EXPORT_SYMBOL_GPL(rk1808_dump_cru); ++EXPORT_SYMBOL(rockchip_set_ddrclk_params); + -+static int rk1808_clk_panic(struct notifier_block *this, -+ unsigned long ev, void *ptr) ++void rockchip_set_ddrclk_dmcfreq_wait_complete(int (*func)(void)) +{ -+ rk1808_dump_cru(); -+ return NOTIFY_DONE; ++ ddr_data.dmcfreq_wait_complete = func; +} ++EXPORT_SYMBOL(rockchip_set_ddrclk_dmcfreq_wait_complete); + -+static struct notifier_block rk1808_clk_panic_block = { -+ .notifier_call = rk1808_clk_panic, -+}; ++static int rockchip_ddrclk_sip_set_rate(struct clk_hw *hw, unsigned long drate, ++ unsigned long prate) ++{ ++ struct arm_smccc_res res; + -+static void __init rk1808_clk_init(struct device_node *np) ++ arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, drate, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_SET_RATE, ++ 0, 0, 0, 0, &res); ++ ++ if (res.a0) ++ return 0; ++ else ++ return -EPERM; ++} ++ ++static unsigned long ++rockchip_ddrclk_sip_recalc_rate(struct clk_hw *hw, ++ unsigned long parent_rate) +{ -+ struct rockchip_clk_provider *ctx; -+ void __iomem *reg_base; -+ struct clk **clks; ++ struct arm_smccc_res res; + -+ reg_base = of_iomap(np, 0); -+ if (!reg_base) { -+ pr_err("%s: could not map cru region\n", __func__); -+ return; -+ } ++ arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_GET_RATE, ++ 0, 0, 0, 0, &res); + -+ rk1808_cru_base = reg_base; ++ return res.a0; ++} + -+ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); -+ if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip clk init failed\n", __func__); -+ iounmap(reg_base); -+ return; -+ } -+ clks = ctx->clk_data.clks; ++static long rockchip_ddrclk_sip_round_rate(struct clk_hw *hw, ++ unsigned long rate, ++ unsigned long *prate) ++{ ++ struct arm_smccc_res res; + -+ rockchip_clk_register_plls(ctx, rk1808_pll_clks, -+ ARRAY_SIZE(rk1808_pll_clks), -+ RK1808_GRF_SOC_STATUS0); -+ rockchip_clk_register_branches(ctx, rk1808_clk_branches, -+ ARRAY_SIZE(rk1808_clk_branches)); ++ arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, rate, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_ROUND_RATE, ++ 0, 0, 0, 0, &res); + -+ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", -+ 3, clks[PLL_APLL], clks[PLL_GPLL], -+ &rk1808_cpuclk_data, rk1808_cpuclk_rates, -+ ARRAY_SIZE(rk1808_cpuclk_rates)); ++ return res.a0; ++} + -+ rockchip_register_softrst(np, 16, reg_base + RK1808_SOFTRST_CON(0), -+ ROCKCHIP_SOFTRST_HIWORD_MASK); ++static u8 rockchip_ddrclk_get_parent(struct clk_hw *hw) ++{ ++ struct rockchip_ddrclk *ddrclk = to_rockchip_ddrclk_hw(hw); ++ u32 val; + -+ rockchip_register_restart_notifier(ctx, RK1808_GLB_SRST_FST, NULL); ++ val = readl(ddrclk->reg_base + ++ ddrclk->mux_offset) >> ddrclk->mux_shift; ++ val &= GENMASK(ddrclk->mux_width - 1, 0); + -+ rockchip_clk_of_add_provider(np, ctx); ++ return val; ++} + -+ atomic_notifier_chain_register(&panic_notifier_list, -+ &rk1808_clk_panic_block); ++static const struct clk_ops rockchip_ddrclk_sip_ops = { ++ .recalc_rate = rockchip_ddrclk_sip_recalc_rate, ++ .set_rate = rockchip_ddrclk_sip_set_rate, ++ .round_rate = rockchip_ddrclk_sip_round_rate, ++ .get_parent = rockchip_ddrclk_get_parent, ++}; ++ ++static int rockchip_ddrclk_sip_set_rate_v2(struct clk_hw *hw, ++ unsigned long drate, ++ unsigned long prate) ++{ ++ struct share_params_ddrclk *p; ++ struct arm_smccc_res res; ++ ++ p = (struct share_params_ddrclk *)ddr_data.params; ++ if (p) ++ p->hz = drate; ++ ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_SET_RATE); ++ ++ if ((int)res.a1 == SIP_RET_SET_RATE_TIMEOUT) { ++ if (ddr_data.dmcfreq_wait_complete) ++ ddr_data.dmcfreq_wait_complete(); ++ } ++ ++ return res.a0; +} + -+CLK_OF_DECLARE(rk1808_cru, "rockchip,rk1808-cru", rk1808_clk_init); ++static unsigned long rockchip_ddrclk_sip_recalc_rate_v2 ++ (struct clk_hw *hw, unsigned long parent_rate) ++{ ++ struct arm_smccc_res res; + -+static int __init clk_rk1808_probe(struct platform_device *pdev) ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_GET_RATE); ++ if (!res.a0) ++ return res.a1; ++ else ++ return 0; ++} ++ ++static long rockchip_ddrclk_sip_round_rate_v2(struct clk_hw *hw, ++ unsigned long rate, ++ unsigned long *prate) +{ -+ struct device_node *np = pdev->dev.of_node; ++ struct share_params_ddrclk *p; ++ struct arm_smccc_res res; + -+ rk1808_clk_init(np); ++ p = (struct share_params_ddrclk *)ddr_data.params; ++ if (p) ++ p->hz = rate; + -+ return 0; ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_ROUND_RATE); ++ if (!res.a0) ++ return res.a1; ++ else ++ return 0; +} + -+static const struct of_device_id clk_rk1808_match_table[] = { -+ { -+ .compatible = "rockchip,rk1808-cru", -+ }, -+ { } ++static const struct clk_ops rockchip_ddrclk_sip_ops_v2 = { ++ .recalc_rate = rockchip_ddrclk_sip_recalc_rate_v2, ++ .set_rate = rockchip_ddrclk_sip_set_rate_v2, ++ .round_rate = rockchip_ddrclk_sip_round_rate_v2, ++ .get_parent = rockchip_ddrclk_get_parent, +}; -+MODULE_DEVICE_TABLE(of, clk_rk1808_match_table); + -+static struct platform_driver clk_rk1808_driver = { -+ .driver = { -+ .name = "clk-rk1808", -+ .of_match_table = clk_rk1808_match_table, -+ }, -+}; -+builtin_platform_driver_probe(clk_rk1808_driver, clk_rk1808_probe); ++struct clk *rockchip_clk_register_ddrclk(const char *name, int flags, ++ const char *const *parent_names, ++ u8 num_parents, int mux_offset, ++ int mux_shift, int mux_width, ++ int div_shift, int div_width, ++ int ddr_flag, void __iomem *reg_base) ++{ ++ struct rockchip_ddrclk *ddrclk; ++ struct clk_init_data init; ++ struct clk *clk; + -+MODULE_DESCRIPTION("Rockchip RK1808 Clock Driver"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/clk/rockchip-oh/clk-rk3036.c b/drivers/clk/rockchip-oh/clk-rk3036.c ++#ifdef CONFIG_ARM ++ if (!psci_smp_available()) ++ return NULL; ++#endif ++ ++ ddrclk = kzalloc(sizeof(*ddrclk), GFP_KERNEL); ++ if (!ddrclk) ++ return ERR_PTR(-ENOMEM); ++ ++ init.name = name; ++ init.parent_names = parent_names; ++ init.num_parents = num_parents; ++ ++ init.flags = flags; ++ init.flags |= CLK_SET_RATE_NO_REPARENT; ++ ++ switch (ddr_flag) { ++#ifdef CONFIG_ROCKCHIP_DDRCLK_SIP ++ case ROCKCHIP_DDRCLK_SIP: ++ init.ops = &rockchip_ddrclk_sip_ops; ++ break; ++#endif ++#ifdef CONFIG_ROCKCHIP_DDRCLK_SIP_V2 ++ case ROCKCHIP_DDRCLK_SIP_V2: ++ init.ops = &rockchip_ddrclk_sip_ops_v2; ++ break; ++#endif ++ default: ++ pr_err("%s: unsupported ddrclk type %d\n", __func__, ddr_flag); ++ kfree(ddrclk); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ ddrclk->reg_base = reg_base; ++ ddrclk->hw.init = &init; ++ ddrclk->mux_offset = mux_offset; ++ ddrclk->mux_shift = mux_shift; ++ ddrclk->mux_width = mux_width; ++ ddrclk->div_shift = div_shift; ++ ddrclk->div_width = div_width; ++ ddrclk->ddr_flag = ddr_flag; ++ ++ clk = clk_register(NULL, &ddrclk->hw); ++ if (IS_ERR(clk)) ++ kfree(ddrclk); ++ ++ return clk; ++} ++EXPORT_SYMBOL_GPL(rockchip_clk_register_ddrclk); +diff --git a/drivers/clk/rockchip-oh/clk-half-divider.c b/drivers/clk/rockchip-oh/clk-half-divider.c new file mode 100644 -index 000000000..ed05eb6c5 +index 000000000..9e0d0fc00 --- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-rk3036.c -@@ -0,0 +1,530 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later ++++ b/drivers/clk/rockchip-oh/clk-half-divider.c +@@ -0,0 +1,224 @@ ++// SPDX-License-Identifier: GPL-2.0 +/* -+ * Copyright (c) 2014 MundoReader S.L. -+ * Author: Heiko Stuebner -+ * -+ * Copyright (c) 2015 Rockchip Electronics Co. Ltd. -+ * Author: Xing Zheng ++ * Copyright (c) 2018 Fuzhou Rockchip Electronics Co., Ltd + */ + +#include +#include -+#include -+#include -+#include -+#include -+#include -+#include ++#include +#include "clk.h" + -+#define RK3036_GRF_SOC_STATUS0 0x14c ++#define div_mask(width) ((1 << (width)) - 1) + -+enum rk3036_plls { -+ apll, dpll, gpll, -+}; ++static bool _is_best_half_div(unsigned long rate, unsigned long now, ++ unsigned long best, unsigned long flags) ++{ ++ if (flags & CLK_DIVIDER_ROUND_CLOSEST) ++ return abs(rate - now) <= abs(rate - best); + -+static struct rockchip_pll_rate_table rk3036_pll_rates[] = { -+ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ -+ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1488000000, 1, 62, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1464000000, 1, 61, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1440000000, 1, 60, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1392000000, 1, 58, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1368000000, 1, 57, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1344000000, 1, 56, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1320000000, 1, 55, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1272000000, 1, 53, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1248000000, 1, 52, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1188000000, 2, 99, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1100000000, 12, 550, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1000000000, 6, 500, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 984000000, 1, 82, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 960000000, 1, 80, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 936000000, 1, 78, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 912000000, 1, 76, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 900000000, 4, 300, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 888000000, 1, 74, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 864000000, 1, 72, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 840000000, 1, 70, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 816000000, 1, 68, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 800000000, 6, 400, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 700000000, 6, 350, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 696000000, 1, 58, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 600000000, 1, 75, 3, 1, 1, 0), -+ RK3036_PLL_RATE( 594000000, 2, 99, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 504000000, 1, 63, 3, 1, 1, 0), -+ RK3036_PLL_RATE( 500000000, 6, 250, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 408000000, 1, 68, 2, 2, 1, 0), -+ RK3036_PLL_RATE( 312000000, 1, 52, 2, 2, 1, 0), -+ RK3036_PLL_RATE( 216000000, 1, 72, 4, 2, 1, 0), -+ RK3036_PLL_RATE( 96000000, 1, 64, 4, 4, 1, 0), -+ { /* sentinel */ }, -+}; ++ return now <= rate && now >= best; ++} + -+#define RK3036_DIV_CPU_MASK 0x1f -+#define RK3036_DIV_CPU_SHIFT 8 ++static unsigned long clk_half_divider_recalc_rate(struct clk_hw *hw, ++ unsigned long parent_rate) ++{ ++ struct clk_divider *divider = to_clk_divider(hw); ++ unsigned int val; + -+#define RK3036_DIV_PERI_MASK 0xf -+#define RK3036_DIV_PERI_SHIFT 0 -+#define RK3036_DIV_ACLK_MASK 0x7 -+#define RK3036_DIV_ACLK_SHIFT 4 -+#define RK3036_DIV_HCLK_MASK 0x3 -+#define RK3036_DIV_HCLK_SHIFT 8 -+#define RK3036_DIV_PCLK_MASK 0x7 -+#define RK3036_DIV_PCLK_SHIFT 12 ++ val = readl(divider->reg) >> divider->shift; ++ val &= div_mask(divider->width); ++ val = val * 2 + 3; + -+#define RK3036_CLKSEL1(_core_periph_div) \ -+ { \ -+ .reg = RK2928_CLKSEL_CON(1), \ -+ .val = HIWORD_UPDATE(_core_periph_div, RK3036_DIV_PERI_MASK, \ -+ RK3036_DIV_PERI_SHIFT) \ ++ return DIV_ROUND_UP_ULL(((u64)parent_rate * 2), val); ++} ++ ++static int clk_half_divider_bestdiv(struct clk_hw *hw, unsigned long rate, ++ unsigned long *best_parent_rate, u8 width, ++ unsigned long flags) ++{ ++ unsigned int i, bestdiv = 0; ++ unsigned long parent_rate, best = 0, now, maxdiv; ++ bool is_bestdiv = false; ++ ++ if (!rate) ++ rate = 1; ++ ++ maxdiv = div_mask(width); ++ ++ if (!(clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT)) { ++ parent_rate = *best_parent_rate; ++ bestdiv = DIV_ROUND_UP_ULL(((u64)parent_rate * 2), rate); ++ if (bestdiv < 3) ++ bestdiv = 0; ++ else ++ bestdiv = DIV_ROUND_UP(bestdiv - 3, 2); ++ bestdiv = bestdiv > maxdiv ? maxdiv : bestdiv; ++ return bestdiv; + } + -+#define RK3036_CPUCLK_RATE(_prate, _core_periph_div) \ -+ { \ -+ .prate = _prate, \ -+ .divs = { \ -+ RK3036_CLKSEL1(_core_periph_div), \ -+ }, \ ++ /* ++ * The maximum divider we can use without overflowing ++ * unsigned long in rate * i below ++ */ ++ maxdiv = min(ULONG_MAX / rate, maxdiv); ++ ++ for (i = 0; i <= maxdiv; i++) { ++ parent_rate = clk_hw_round_rate(clk_hw_get_parent(hw), ++ ((u64)rate * (i * 2 + 3)) / 2); ++ now = DIV_ROUND_UP_ULL(((u64)parent_rate * 2), ++ (i * 2 + 3)); ++ ++ if (_is_best_half_div(rate, now, best, flags)) { ++ is_bestdiv = true; ++ bestdiv = i; ++ best = now; ++ *best_parent_rate = parent_rate; ++ } + } + -+static struct rockchip_cpuclk_rate_table rk3036_cpuclk_rates[] __initdata = { -+ RK3036_CPUCLK_RATE(1200000000, 4), -+ RK3036_CPUCLK_RATE(1008000000, 4), -+ RK3036_CPUCLK_RATE(816000000, 4), -+ RK3036_CPUCLK_RATE(600000000, 4), -+ RK3036_CPUCLK_RATE(408000000, 4), -+ RK3036_CPUCLK_RATE(312000000, 4), -+}; ++ if (!is_bestdiv) { ++ bestdiv = div_mask(width); ++ *best_parent_rate = clk_hw_round_rate(clk_hw_get_parent(hw), 1); ++ } + -+static const struct rockchip_cpuclk_reg_data rk3036_cpuclk_data = { -+ .core_reg[0] = RK2928_CLKSEL_CON(0), -+ .div_core_shift[0] = 0, -+ .div_core_mask[0] = 0x1f, -+ .num_cores = 1, -+ .mux_core_alt = 1, -+ .mux_core_main = 0, -+ .mux_core_shift = 7, -+ .mux_core_mask = 0x1, -+}; ++ return bestdiv; ++} + -+PNAME(mux_pll_p) = { "xin24m", "xin24m" }; ++static long clk_half_divider_round_rate(struct clk_hw *hw, unsigned long rate, ++ unsigned long *prate) ++{ ++ struct clk_divider *divider = to_clk_divider(hw); ++ int div; + -+PNAME(mux_busclk_p) = { "dummy_apll", "dpll_cpu", "gpll_cpu" }; -+PNAME(mux_ddrphy_p) = { "dpll_ddr", "gpll_ddr" }; -+PNAME(mux_pll_src_apll_dpll_gpll_p) = { "apll", "dpll", "gpll" }; -+PNAME(mux_pll_src_dmyapll_dpll_gpll_p) = { "dummy_apll", "dpll", "gpll" }; ++ div = clk_half_divider_bestdiv(hw, rate, prate, ++ divider->width, ++ divider->flags); + -+PNAME(mux_timer_p) = { "xin24m", "pclk_peri_src" }; ++ return DIV_ROUND_UP_ULL(((u64)*prate * 2), div * 2 + 3); ++} + -+PNAME(mux_pll_src_dmyapll_dpll_gpll_usb480m_p) = { "dummy_apll", "dpll", "gpll", "usb480m" }; -+PNAME(mux_pll_src_dmyapll_dpll_gpll_xin24_p) = { "dummy_apll", "dpll", "gpll", "xin24m" }; ++static int clk_half_divider_set_rate(struct clk_hw *hw, unsigned long rate, ++ unsigned long parent_rate) ++{ ++ struct clk_divider *divider = to_clk_divider(hw); ++ unsigned int value; ++ unsigned long flags = 0; ++ u32 val; + -+PNAME(mux_mmc_src_p) = { "dummy_apll", "dpll", "gpll", "xin24m" }; -+PNAME(mux_i2s_pre_p) = { "i2s_src", "i2s_frac", "ext_i2s", "xin12m" }; -+PNAME(mux_i2s_clkout_p) = { "i2s_pre", "xin12m" }; -+PNAME(mux_spdif_p) = { "spdif_src", "spdif_frac", "xin12m" }; -+PNAME(mux_uart0_p) = { "uart0_src", "uart0_frac", "xin24m" }; -+PNAME(mux_uart1_p) = { "uart1_src", "uart1_frac", "xin24m" }; -+PNAME(mux_uart2_p) = { "uart2_src", "uart2_frac", "xin24m" }; -+PNAME(mux_mac_p) = { "mac_pll_src", "rmii_clkin" }; -+PNAME(mux_dclk_p) = { "dclk_lcdc", "dclk_cru" }; ++ value = DIV_ROUND_UP_ULL(((u64)parent_rate * 2), rate); ++ value = DIV_ROUND_UP(value - 3, 2); ++ value = min_t(unsigned int, value, div_mask(divider->width)); + -+static struct rockchip_pll_clock rk3036_pll_clks[] __initdata = { -+ [apll] = PLL(pll_rk3036, PLL_APLL, "apll", mux_pll_p, 0, RK2928_PLL_CON(0), -+ RK2928_MODE_CON, 0, 5, 0, rk3036_pll_rates), -+ [dpll] = PLL(pll_rk3036, PLL_DPLL, "dpll", mux_pll_p, 0, RK2928_PLL_CON(4), -+ RK2928_MODE_CON, 4, 4, 0, NULL), -+ [gpll] = PLL(pll_rk3036, PLL_GPLL, "gpll", mux_pll_p, 0, RK2928_PLL_CON(12), -+ RK2928_MODE_CON, 12, 6, ROCKCHIP_PLL_SYNC_RATE, rk3036_pll_rates), -+}; ++ if (divider->lock) ++ spin_lock_irqsave(divider->lock, flags); ++ else ++ __acquire(divider->lock); + -+#define MFLAGS CLK_MUX_HIWORD_MASK -+#define DFLAGS CLK_DIVIDER_HIWORD_MASK -+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) ++ if (divider->flags & CLK_DIVIDER_HIWORD_MASK) { ++ val = div_mask(divider->width) << (divider->shift + 16); ++ } else { ++ val = readl(divider->reg); ++ val &= ~(div_mask(divider->width) << divider->shift); ++ } ++ val |= value << divider->shift; ++ writel(val, divider->reg); + -+static struct rockchip_clk_branch rk3036_uart0_fracmux __initdata = -+ MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(13), 8, 2, MFLAGS); ++ if (divider->lock) ++ spin_unlock_irqrestore(divider->lock, flags); ++ else ++ __release(divider->lock); + -+static struct rockchip_clk_branch rk3036_uart1_fracmux __initdata = -+ MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(14), 8, 2, MFLAGS); ++ return 0; ++} + -+static struct rockchip_clk_branch rk3036_uart2_fracmux __initdata = -+ MUX(SCLK_UART2, "sclk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(15), 8, 2, MFLAGS); ++static const struct clk_ops clk_half_divider_ops = { ++ .recalc_rate = clk_half_divider_recalc_rate, ++ .round_rate = clk_half_divider_round_rate, ++ .set_rate = clk_half_divider_set_rate, ++}; + -+static struct rockchip_clk_branch rk3036_i2s_fracmux __initdata = -+ MUX(SCLK_I2S_PRE, "i2s_pre", mux_i2s_pre_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(3), 8, 2, MFLAGS); ++/* ++ * Register a clock branch. ++ * Most clock branches have a form like ++ * ++ * src1 --|--\ ++ * |M |--[GATE]-[DIV]- ++ * src2 --|--/ ++ * ++ * sometimes without one of those components. ++ */ ++struct clk *rockchip_clk_register_halfdiv(const char *name, ++ const char *const *parent_names, ++ u8 num_parents, void __iomem *base, ++ int muxdiv_offset, u8 mux_shift, ++ u8 mux_width, u8 mux_flags, ++ int div_offset, u8 div_shift, ++ u8 div_width, u8 div_flags, ++ int gate_offset, u8 gate_shift, ++ u8 gate_flags, unsigned long flags, ++ spinlock_t *lock) ++{ ++ struct clk_hw *hw = ERR_PTR(-ENOMEM); ++ struct clk_mux *mux = NULL; ++ struct clk_gate *gate = NULL; ++ struct clk_divider *div = NULL; ++ const struct clk_ops *mux_ops = NULL, *div_ops = NULL, ++ *gate_ops = NULL; + -+static struct rockchip_clk_branch rk3036_spdif_fracmux __initdata = -+ MUX(SCLK_SPDIF, "sclk_spdif", mux_spdif_p, 0, -+ RK2928_CLKSEL_CON(5), 8, 2, MFLAGS); ++ if (num_parents > 1) { ++ mux = kzalloc(sizeof(*mux), GFP_KERNEL); ++ if (!mux) ++ return ERR_PTR(-ENOMEM); + -+static struct rockchip_clk_branch rk3036_clk_branches[] __initdata = { -+ /* -+ * Clock-Architecture Diagram 1 -+ */ ++ mux->reg = base + muxdiv_offset; ++ mux->shift = mux_shift; ++ mux->mask = BIT(mux_width) - 1; ++ mux->flags = mux_flags; ++ mux->lock = lock; ++ mux_ops = (mux_flags & CLK_MUX_READ_ONLY) ? &clk_mux_ro_ops ++ : &clk_mux_ops; ++ } + -+ GATE(0, "gpll_armclk", "gpll", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(0), 6, GFLAGS), ++ if (gate_offset >= 0) { ++ gate = kzalloc(sizeof(*gate), GFP_KERNEL); ++ if (!gate) ++ goto err_gate; + -+ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), ++ gate->flags = gate_flags; ++ gate->reg = base + gate_offset; ++ gate->bit_idx = gate_shift; ++ gate->lock = lock; ++ gate_ops = &clk_gate_ops; ++ } + -+ /* -+ * Clock-Architecture Diagram 2 -+ */ ++ if (div_width > 0) { ++ div = kzalloc(sizeof(*div), GFP_KERNEL); ++ if (!div) ++ goto err_div; + -+ GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(0), 2, GFLAGS), -+ GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(0), 8, GFLAGS), -+ COMPOSITE_NOGATE(0, "ddrphy2x", mux_ddrphy_p, CLK_IGNORE_UNUSED, -+ RK2928_CLKSEL_CON(26), 8, 1, MFLAGS, 0, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO), -+ FACTOR(0, "ddrphy", "ddrphy2x", 0, 1, 2), ++ div->flags = div_flags; ++ if (div_offset) ++ div->reg = base + div_offset; ++ else ++ div->reg = base + muxdiv_offset; ++ div->shift = div_shift; ++ div->width = div_width; ++ div->lock = lock; ++ div_ops = &clk_half_divider_ops; ++ } + -+ COMPOSITE_NOMUX(0, "pclk_dbg", "armclk", CLK_IGNORE_UNUSED, -+ RK2928_CLKSEL_CON(1), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK2928_CLKGATE_CON(0), 7, GFLAGS), -+ COMPOSITE_NOMUX(0, "aclk_core_pre", "armclk", CLK_IGNORE_UNUSED, -+ RK2928_CLKSEL_CON(1), 4, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK2928_CLKGATE_CON(0), 7, GFLAGS), ++ hw = clk_hw_register_composite(NULL, name, parent_names, num_parents, ++ mux ? &mux->hw : NULL, mux_ops, ++ div ? &div->hw : NULL, div_ops, ++ gate ? &gate->hw : NULL, gate_ops, ++ flags); ++ if (IS_ERR(hw)) ++ goto err_div; + -+ GATE(0, "dpll_cpu", "dpll", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(10), 8, GFLAGS), -+ GATE(0, "gpll_cpu", "gpll", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE_NOGATE(0, "aclk_cpu_src", mux_busclk_p, CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(0), 14, 2, MFLAGS, 8, 5, DFLAGS), -+ GATE(ACLK_CPU, "aclk_cpu", "aclk_cpu_src", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(0), 3, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_CPU, "pclk_cpu", "aclk_cpu_src", CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(1), 12, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK2928_CLKGATE_CON(0), 5, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_CPU, "hclk_cpu", "aclk_cpu_src", CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(1), 8, 2, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK2928_CLKGATE_CON(0), 4, GFLAGS), ++ return hw->clk; ++err_div: ++ kfree(gate); ++err_gate: ++ kfree(mux); ++ return ERR_CAST(hw); ++} +diff --git a/drivers/clk/rockchip-oh/clk-inverter.c b/drivers/clk/rockchip-oh/clk-inverter.c +new file mode 100644 +index 000000000..5dfbdce18 +--- /dev/null ++++ b/drivers/clk/rockchip-oh/clk-inverter.c +@@ -0,0 +1,103 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later ++/* ++ * Copyright 2015 Heiko Stuebner ++ */ + -+ COMPOSITE(0, "aclk_peri_src", mux_pll_src_dmyapll_dpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(10), 14, 2, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(2), 0, GFLAGS), ++#include ++#include ++#include ++#include ++#include ++#include "clk.h" + -+ GATE(ACLK_PERI, "aclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(2), 1, GFLAGS), -+ DIV(0, "pclk_peri_src", "aclk_peri_src", CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(10), 12, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO), -+ GATE(PCLK_PERI, "pclk_peri", "pclk_peri_src", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(2), 3, GFLAGS), -+ DIV(0, "hclk_peri_src", "aclk_peri_src", CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(10), 8, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO), -+ GATE(HCLK_PERI, "hclk_peri", "hclk_peri_src", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(2), 2, GFLAGS), ++struct rockchip_inv_clock { ++ struct clk_hw hw; ++ void __iomem *reg; ++ int shift; ++ int flags; ++ spinlock_t *lock; ++}; + -+ COMPOSITE_NODIV(SCLK_TIMER0, "sclk_timer0", mux_timer_p, CLK_IGNORE_UNUSED, -+ RK2928_CLKSEL_CON(2), 4, 1, MFLAGS, -+ RK2928_CLKGATE_CON(1), 0, GFLAGS), -+ COMPOSITE_NODIV(SCLK_TIMER1, "sclk_timer1", mux_timer_p, CLK_IGNORE_UNUSED, -+ RK2928_CLKSEL_CON(2), 5, 1, MFLAGS, -+ RK2928_CLKGATE_CON(1), 1, GFLAGS), -+ COMPOSITE_NODIV(SCLK_TIMER2, "sclk_timer2", mux_timer_p, CLK_IGNORE_UNUSED, -+ RK2928_CLKSEL_CON(2), 6, 1, MFLAGS, -+ RK2928_CLKGATE_CON(2), 4, GFLAGS), -+ COMPOSITE_NODIV(SCLK_TIMER3, "sclk_timer3", mux_timer_p, CLK_IGNORE_UNUSED, -+ RK2928_CLKSEL_CON(2), 7, 1, MFLAGS, -+ RK2928_CLKGATE_CON(2), 5, GFLAGS), ++#define to_inv_clock(_hw) container_of(_hw, struct rockchip_inv_clock, hw) + -+ MUX(0, "uart_pll_clk", mux_pll_src_dmyapll_dpll_gpll_usb480m_p, 0, -+ RK2928_CLKSEL_CON(13), 10, 2, MFLAGS), -+ COMPOSITE_NOMUX(0, "uart0_src", "uart_pll_clk", 0, -+ RK2928_CLKSEL_CON(13), 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(1), 8, GFLAGS), -+ COMPOSITE_NOMUX(0, "uart1_src", "uart_pll_clk", 0, -+ RK2928_CLKSEL_CON(14), 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(1), 10, GFLAGS), -+ COMPOSITE_NOMUX(0, "uart2_src", "uart_pll_clk", 0, -+ RK2928_CLKSEL_CON(15), 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(1), 12, GFLAGS), -+ COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(17), 0, -+ RK2928_CLKGATE_CON(1), 9, GFLAGS, -+ &rk3036_uart0_fracmux), -+ COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_src", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(18), 0, -+ RK2928_CLKGATE_CON(1), 11, GFLAGS, -+ &rk3036_uart1_fracmux), -+ COMPOSITE_FRACMUX(0, "uart2_frac", "uart2_src", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(19), 0, -+ RK2928_CLKGATE_CON(1), 13, GFLAGS, -+ &rk3036_uart2_fracmux), ++#define INVERTER_MASK 0x1 + -+ COMPOSITE(ACLK_VCODEC, "aclk_vcodec", mux_pll_src_dmyapll_dpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(32), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 11, GFLAGS), -+ FACTOR_GATE(HCLK_VCODEC, "hclk_vcodec", "aclk_vcodec", 0, 1, 4, -+ RK2928_CLKGATE_CON(3), 12, GFLAGS), ++static int rockchip_inv_get_phase(struct clk_hw *hw) ++{ ++ struct rockchip_inv_clock *inv_clock = to_inv_clock(hw); ++ u32 val; + -+ COMPOSITE(ACLK_HEVC, "aclk_hevc", mux_pll_src_dmyapll_dpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(20), 0, 2, MFLAGS, 2, 5, DFLAGS, -+ RK2928_CLKGATE_CON(10), 6, GFLAGS), ++ val = readl(inv_clock->reg) >> inv_clock->shift; ++ val &= INVERTER_MASK; ++ return val ? 180 : 0; ++} + -+ COMPOSITE(0, "aclk_disp1_pre", mux_pll_src_dmyapll_dpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(31), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(1), 4, GFLAGS), -+ COMPOSITE(0, "hclk_disp_pre", mux_pll_src_dmyapll_dpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(30), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(0), 11, GFLAGS), -+ COMPOSITE(SCLK_LCDC, "dclk_lcdc", mux_pll_src_apll_dpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(28), 0, 2, MFLAGS, 8, 8, DFLAGS, -+ RK2928_CLKGATE_CON(3), 2, GFLAGS), ++static int rockchip_inv_set_phase(struct clk_hw *hw, int degrees) ++{ ++ struct rockchip_inv_clock *inv_clock = to_inv_clock(hw); ++ u32 val; + -+ COMPOSITE_NODIV(0, "sclk_sdmmc_src", mux_mmc_src_p, 0, -+ RK2928_CLKSEL_CON(12), 8, 2, MFLAGS, -+ RK2928_CLKGATE_CON(2), 11, GFLAGS), -+ DIV(SCLK_SDMMC, "sclk_sdmmc", "sclk_sdmmc_src", 0, -+ RK2928_CLKSEL_CON(11), 0, 7, DFLAGS), ++ if (degrees % 180 == 0) { ++ val = !!degrees; ++ } else { ++ pr_err("%s: unsupported phase %d for %s\n", ++ __func__, degrees, clk_hw_get_name(hw)); ++ return -EINVAL; ++ } + -+ COMPOSITE_NODIV(0, "sclk_sdio_src", mux_mmc_src_p, 0, -+ RK2928_CLKSEL_CON(12), 10, 2, MFLAGS, -+ RK2928_CLKGATE_CON(2), 13, GFLAGS), -+ DIV(SCLK_SDIO, "sclk_sdio", "sclk_sdio_src", 0, -+ RK2928_CLKSEL_CON(11), 8, 7, DFLAGS), ++ if (inv_clock->flags & ROCKCHIP_INVERTER_HIWORD_MASK) { ++ writel(HIWORD_UPDATE(val, INVERTER_MASK, inv_clock->shift), ++ inv_clock->reg); ++ } else { ++ unsigned long flags; ++ u32 reg; + -+ COMPOSITE(SCLK_EMMC, "sclk_emmc", mux_mmc_src_p, 0, -+ RK2928_CLKSEL_CON(12), 12, 2, MFLAGS, 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(2), 14, GFLAGS), ++ spin_lock_irqsave(inv_clock->lock, flags); + -+ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "sclk_sdmmc", RK3036_SDMMC_CON0, 1), -+ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RK3036_SDMMC_CON1, 0), ++ reg = readl(inv_clock->reg); ++ reg &= ~BIT(inv_clock->shift); ++ reg |= val; ++ writel(reg, inv_clock->reg); + -+ MMC(SCLK_SDIO_DRV, "sdio_drv", "sclk_sdio", RK3036_SDIO_CON0, 1), -+ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "sclk_sdio", RK3036_SDIO_CON1, 0), ++ spin_unlock_irqrestore(inv_clock->lock, flags); ++ } + -+ MMC(SCLK_EMMC_DRV, "emmc_drv", "sclk_emmc", RK3036_EMMC_CON0, 1), -+ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "sclk_emmc", RK3036_EMMC_CON1, 0), ++ return 0; ++} + -+ COMPOSITE(0, "i2s_src", mux_pll_src_dmyapll_dpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(3), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(0), 9, GFLAGS), -+ COMPOSITE_FRACMUX(SCLK_I2S_FRAC, "i2s_frac", "i2s_src", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(7), 0, -+ RK2928_CLKGATE_CON(0), 10, GFLAGS, -+ &rk3036_i2s_fracmux), -+ COMPOSITE_NODIV(SCLK_I2S_OUT, "i2s_clkout", mux_i2s_clkout_p, 0, -+ RK2928_CLKSEL_CON(3), 12, 1, MFLAGS, -+ RK2928_CLKGATE_CON(0), 13, GFLAGS), -+ GATE(SCLK_I2S, "sclk_i2s", "i2s_pre", CLK_SET_RATE_PARENT, -+ RK2928_CLKGATE_CON(0), 14, GFLAGS), ++static const struct clk_ops rockchip_inv_clk_ops = { ++ .get_phase = rockchip_inv_get_phase, ++ .set_phase = rockchip_inv_set_phase, ++}; + -+ COMPOSITE(0, "spdif_src", mux_pll_src_dmyapll_dpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(5), 10, 2, MFLAGS, 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(2), 10, GFLAGS), -+ COMPOSITE_FRACMUX(0, "spdif_frac", "spdif_src", 0, -+ RK2928_CLKSEL_CON(9), 0, -+ RK2928_CLKGATE_CON(2), 12, GFLAGS, -+ &rk3036_spdif_fracmux), ++struct clk *rockchip_clk_register_inverter(const char *name, ++ const char *const *parent_names, u8 num_parents, ++ void __iomem *reg, int shift, int flags, ++ spinlock_t *lock) ++{ ++ struct clk_init_data init; ++ struct rockchip_inv_clock *inv_clock; ++ struct clk *clk; + -+ GATE(SCLK_OTGPHY0, "sclk_otgphy0", "xin12m", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(1), 5, GFLAGS), ++ inv_clock = kmalloc(sizeof(*inv_clock), GFP_KERNEL); ++ if (!inv_clock) ++ return ERR_PTR(-ENOMEM); + -+ COMPOSITE(SCLK_GPU, "sclk_gpu", mux_pll_src_dmyapll_dpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(34), 8, 2, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 13, GFLAGS), ++ init.name = name; ++ init.num_parents = num_parents; ++ init.flags = CLK_SET_RATE_PARENT; ++ init.parent_names = parent_names; ++ init.ops = &rockchip_inv_clk_ops; + -+ COMPOSITE(SCLK_SPI, "sclk_spi", mux_pll_src_dmyapll_dpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(25), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(2), 9, GFLAGS), ++ inv_clock->hw.init = &init; ++ inv_clock->reg = reg; ++ inv_clock->shift = shift; ++ inv_clock->flags = flags; ++ inv_clock->lock = lock; + -+ COMPOSITE(SCLK_NANDC, "sclk_nandc", mux_pll_src_dmyapll_dpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(16), 8, 2, MFLAGS, 10, 5, DFLAGS, -+ RK2928_CLKGATE_CON(10), 4, GFLAGS), ++ clk = clk_register(NULL, &inv_clock->hw); ++ if (IS_ERR(clk)) ++ kfree(inv_clock); + -+ COMPOSITE(SCLK_SFC, "sclk_sfc", mux_pll_src_dmyapll_dpll_gpll_xin24_p, 0, -+ RK2928_CLKSEL_CON(16), 0, 2, MFLAGS, 2, 5, DFLAGS, -+ RK2928_CLKGATE_CON(10), 5, GFLAGS), ++ return clk; ++} +diff --git a/drivers/clk/rockchip-oh/clk-link.c b/drivers/clk/rockchip-oh/clk-link.c +new file mode 100644 +index 000000000..78ff9b53c +--- /dev/null ++++ b/drivers/clk/rockchip-oh/clk-link.c +@@ -0,0 +1,244 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (c) 2021 Rockchip Electronics Co., Ltd ++ */ + -+ COMPOSITE_NOGATE(SCLK_MACPLL, "mac_pll_src", mux_pll_src_apll_dpll_gpll_p, CLK_SET_RATE_NO_REPARENT, -+ RK2928_CLKSEL_CON(21), 0, 2, MFLAGS, 9, 5, DFLAGS), -+ MUX(SCLK_MACREF, "mac_clk_ref", mux_mac_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(21), 3, 1, MFLAGS), ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ COMPOSITE_NOMUX(SCLK_MAC, "mac_clk", "mac_clk_ref", 0, -+ RK2928_CLKSEL_CON(21), 4, 5, DFLAGS, -+ RK2928_CLKGATE_CON(2), 6, GFLAGS), -+ FACTOR(0, "sclk_macref_out", "hclk_peri_src", 0, 1, 2), ++struct rockchip_link_info { ++ u32 shift; ++ const char *name; ++ const char *pname; ++}; + -+ MUX(SCLK_HDMI, "dclk_hdmi", mux_dclk_p, 0, -+ RK2928_CLKSEL_CON(31), 0, 1, MFLAGS), ++struct rockchip_link { ++ int num; ++ const struct rockchip_link_info *info; ++}; + -+ /* -+ * Clock-Architecture Diagram 3 -+ */ ++struct rockchip_link_clk { ++ void __iomem *base; ++ struct clk_gate *gate; ++ spinlock_t lock; ++ u32 shift; ++ u32 flag; ++ const char *name; ++ const char *pname; ++ const char *link_name; ++ const struct rockchip_link *link; ++}; + -+ /* aclk_cpu gates */ -+ GATE(0, "sclk_intmem", "aclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 12, GFLAGS), -+ GATE(0, "aclk_strc_sys", "aclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 10, GFLAGS), ++#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) + -+ /* hclk_cpu gates */ -+ GATE(HCLK_ROM, "hclk_rom", "hclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 6, GFLAGS), ++#define GATE_LINK(_name, _pname, _shift) \ ++{ \ ++ .name = _name, \ ++ .pname = _pname, \ ++ .shift = (_shift), \ ++} + -+ /* pclk_cpu gates */ -+ GATE(PCLK_GRF, "pclk_grf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 4, GFLAGS), -+ GATE(PCLK_DDRUPCTL, "pclk_ddrupctl", "pclk_cpu", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(5), 7, GFLAGS), -+ GATE(PCLK_ACODEC, "pclk_acodec", "pclk_cpu", 0, RK2928_CLKGATE_CON(5), 14, GFLAGS), -+ GATE(PCLK_HDMI, "pclk_hdmi", "pclk_cpu", 0, RK2928_CLKGATE_CON(3), 8, GFLAGS), ++static int register_clocks(struct rockchip_link_clk *priv, struct device *dev) ++{ ++ struct clk_gate *gate; ++ struct clk_init_data init = {}; ++ struct clk *clk; + -+ /* aclk_vio gates */ -+ GATE(ACLK_VIO, "aclk_vio", "aclk_disp1_pre", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(6), 13, GFLAGS), -+ GATE(ACLK_LCDC, "aclk_lcdc", "aclk_disp1_pre", 0, RK2928_CLKGATE_CON(9), 6, GFLAGS), ++ gate = devm_kzalloc(dev, sizeof(struct clk_gate), GFP_KERNEL); ++ if (!gate) ++ return -ENOMEM; + -+ GATE(HCLK_VIO_BUS, "hclk_vio_bus", "hclk_disp_pre", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(6), 12, GFLAGS), -+ GATE(HCLK_LCDC, "hclk_lcdc", "hclk_disp_pre", 0, RK2928_CLKGATE_CON(9), 5, GFLAGS), ++ init.name = priv->name; ++ init.ops = &clk_gate_ops; ++ init.flags |= CLK_SET_RATE_PARENT; ++ init.parent_names = &priv->pname; ++ init.num_parents = 1; + ++ /* struct clk_gate assignments */ ++ gate->reg = priv->base; ++ gate->bit_idx = priv->shift; ++ gate->flags = GFLAGS; ++ gate->lock = &priv->lock; ++ gate->hw.init = &init; + -+ /* xin24m gates */ -+ GATE(SCLK_PVTM_CORE, "sclk_pvtm_core", "xin24m", 0, RK2928_CLKGATE_CON(10), 0, GFLAGS), -+ GATE(SCLK_PVTM_GPU, "sclk_pvtm_gpu", "xin24m", 0, RK2928_CLKGATE_CON(10), 1, GFLAGS), ++ clk = devm_clk_register(dev, &gate->hw); ++ if (IS_ERR(clk)) ++ return -EINVAL; + -+ /* aclk_peri gates */ -+ GATE(0, "aclk_peri_axi_matrix", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 3, GFLAGS), -+ GATE(0, "aclk_cpu_peri", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 2, GFLAGS), -+ GATE(ACLK_DMAC2, "aclk_dmac2", "aclk_peri", 0, RK2928_CLKGATE_CON(5), 1, GFLAGS), -+ GATE(0, "aclk_peri_niu", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 15, GFLAGS), ++ return of_clk_add_provider(dev->of_node, of_clk_src_simple_get, clk); ++} + -+ /* hclk_peri gates */ -+ GATE(0, "hclk_peri_matrix", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 0, GFLAGS), -+ GATE(0, "hclk_usb_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 13, GFLAGS), -+ GATE(0, "hclk_peri_arbi", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 14, GFLAGS), -+ GATE(HCLK_NANDC, "hclk_nandc", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 9, GFLAGS), -+ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 10, GFLAGS), -+ GATE(HCLK_SDIO, "hclk_sdio", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 11, GFLAGS), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 0, GFLAGS), -+ GATE(HCLK_OTG0, "hclk_otg0", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 13, GFLAGS), -+ GATE(HCLK_OTG1, "hclk_otg1", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(7), 3, GFLAGS), -+ GATE(HCLK_I2S, "hclk_i2s", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 2, GFLAGS), -+ GATE(HCLK_SFC, "hclk_sfc", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(3), 14, GFLAGS), -+ GATE(HCLK_MAC, "hclk_mac", "hclk_peri", 0, RK2928_CLKGATE_CON(3), 5, GFLAGS), ++static const struct rockchip_link_info rk3562_clk_gate_link_info[] = { ++ GATE_LINK("aclk_rga_jdec", "aclk_rga_pre", 3), ++ GATE_LINK("aclk_vdpu", "aclk_vdpu_pre", 5), ++ GATE_LINK("aclk_vepu", "aclk_vepu_pre", 3), ++ GATE_LINK("aclk_vi_isp", "aclk_vi", 3), ++ GATE_LINK("aclk_vo", "aclk_vo_pre", 3), ++ GATE_LINK("hclk_vepu", "hclk_vepu_pre", 4), ++}; + -+ /* pclk_peri gates */ -+ GATE(0, "pclk_peri_matrix", "pclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 1, GFLAGS), -+ GATE(0, "pclk_efuse", "pclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 2, GFLAGS), -+ GATE(PCLK_TIMER, "pclk_timer", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 7, GFLAGS), -+ GATE(PCLK_PWM, "pclk_pwm", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 10, GFLAGS), -+ GATE(PCLK_SPI, "pclk_spi", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 12, GFLAGS), -+ GATE(PCLK_WDT, "pclk_wdt", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 15, GFLAGS), -+ GATE(PCLK_UART0, "pclk_uart0", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 0, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 1, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 2, GFLAGS), -+ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 4, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 5, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 6, GFLAGS), -+ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 9, GFLAGS), -+ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 10, GFLAGS), -+ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 11, GFLAGS), ++static const struct rockchip_link rk3562_clk_gate_link = { ++ .num = ARRAY_SIZE(rk3562_clk_gate_link_info), ++ .info = rk3562_clk_gate_link_info, +}; + -+static void __iomem *rk3036_cru_base; ++static const struct rockchip_link_info rk3588_clk_gate_link_info[] = { ++ GATE_LINK("aclk_isp1_pre", "aclk_isp1_root", 6), ++ GATE_LINK("hclk_isp1_pre", "hclk_isp1_root", 8), ++ GATE_LINK("hclk_nvm", "hclk_nvm_root", 2), ++ GATE_LINK("aclk_usb", "aclk_usb_root", 2), ++ GATE_LINK("hclk_usb", "hclk_usb_root", 3), ++ GATE_LINK("aclk_jpeg_decoder_pre", "aclk_jpeg_decoder_root", 7), ++ GATE_LINK("aclk_vdpu_low_pre", "aclk_vdpu_low_root", 5), ++ GATE_LINK("aclk_rkvenc1_pre", "aclk_rkvenc1_root", 3), ++ GATE_LINK("hclk_rkvenc1_pre", "hclk_rkvenc1_root", 2), ++ GATE_LINK("hclk_rkvdec0_pre", "hclk_rkvdec0_root", 5), ++ GATE_LINK("aclk_rkvdec0_pre", "aclk_rkvdec0_root", 6), ++ GATE_LINK("hclk_rkvdec1_pre", "hclk_rkvdec1_root", 4), ++ GATE_LINK("aclk_rkvdec1_pre", "aclk_rkvdec1_root", 5), ++ GATE_LINK("aclk_hdcp0_pre", "aclk_vo0_root", 9), ++ GATE_LINK("hclk_vo0", "hclk_vo0_root", 5), ++ GATE_LINK("aclk_hdcp1_pre", "aclk_hdcp1_root", 6), ++ GATE_LINK("hclk_vo1", "hclk_vo1_root", 9), ++ GATE_LINK("aclk_av1_pre", "aclk_av1_root", 1), ++ GATE_LINK("pclk_av1_pre", "pclk_av1_root", 4), ++ GATE_LINK("hclk_sdio_pre", "hclk_sdio_root", 1), ++ GATE_LINK("pclk_vo0_grf", "pclk_vo0_root", 10), ++ GATE_LINK("pclk_vo1_grf", "pclk_vo1_root", 12), ++}; + -+static void rk3036_dump_cru(void) ++static const struct rockchip_link rk3588_clk_gate_link = { ++ .num = ARRAY_SIZE(rk3588_clk_gate_link_info), ++ .info = rk3588_clk_gate_link_info, ++}; ++ ++static const struct of_device_id rockchip_clk_link_of_match[] = { ++ { ++ .compatible = "rockchip,rk3562-clock-gate-link", ++ .data = (void *)&rk3562_clk_gate_link, ++ }, ++ { ++ .compatible = "rockchip,rk3588-clock-gate-link", ++ .data = (void *)&rk3588_clk_gate_link, ++ }, ++ {} ++}; ++MODULE_DEVICE_TABLE(of, rockchip_clk_link_of_match); ++ ++static const struct rockchip_link_info * ++rockchip_get_link_infos(const struct rockchip_link *link, const char *name) +{ -+ if (rk3036_cru_base) { -+ pr_warn("CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk3036_cru_base, -+ 0x1f8, false); ++ const struct rockchip_link_info *info = link->info; ++ int i = 0; ++ ++ for (i = 0; i < link->num; i++) { ++ if (strcmp(info->name, name) == 0) ++ break; ++ info++; + } ++ return info; +} + -+static void __init rk3036_clk_init(struct device_node *np) ++static int rockchip_clk_link_probe(struct platform_device *pdev) +{ -+ struct rockchip_clk_provider *ctx; -+ void __iomem *reg_base; -+ struct clk *clk; -+ struct clk **clks; ++ struct rockchip_link_clk *priv; ++ struct device_node *node = pdev->dev.of_node; ++ const struct of_device_id *match; ++ const char *clk_name; ++ const struct rockchip_link_info *link_info; ++ int ret; + -+ reg_base = of_iomap(np, 0); -+ if (!reg_base) { -+ pr_err("%s: could not map cru region\n", __func__); -+ return; -+ } ++ match = of_match_node(rockchip_clk_link_of_match, node); ++ if (!match) ++ return -ENXIO; + -+ /* -+ * Make uart_pll_clk a child of the gpll, as all other sources are -+ * not that usable / stable. -+ */ -+ writel_relaxed(HIWORD_UPDATE(0x2, 0x3, 10), -+ reg_base + RK2928_CLKSEL_CON(13)); ++ priv = devm_kzalloc(&pdev->dev, sizeof(struct rockchip_link_clk), ++ GFP_KERNEL); ++ if (!priv) ++ return -ENOMEM; + -+ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); -+ if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip clk init failed\n", __func__); -+ iounmap(reg_base); -+ return; -+ } -+ clks = ctx->clk_data.clks; ++ priv->link = match->data; + -+ clk = clk_register_fixed_factor(NULL, "usb480m", "xin24m", 0, 20, 1); -+ if (IS_ERR(clk)) -+ pr_warn("%s: could not register clock usb480m: %ld\n", -+ __func__, PTR_ERR(clk)); ++ spin_lock_init(&priv->lock); ++ platform_set_drvdata(pdev, priv); + -+ rockchip_clk_register_plls(ctx, rk3036_pll_clks, -+ ARRAY_SIZE(rk3036_pll_clks), -+ RK3036_GRF_SOC_STATUS0); -+ rockchip_clk_register_branches(ctx, rk3036_clk_branches, -+ ARRAY_SIZE(rk3036_clk_branches)); ++ priv->base = of_iomap(node, 0); ++ if (IS_ERR(priv->base)) ++ return PTR_ERR(priv->base); + -+ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", -+ 2, clks[PLL_APLL], clks[PLL_GPLL], -+ &rk3036_cpuclk_data, rk3036_cpuclk_rates, -+ ARRAY_SIZE(rk3036_cpuclk_rates)); ++ if (of_property_read_string(node, "clock-output-names", &clk_name)) ++ priv->name = node->name; ++ else ++ priv->name = clk_name; + -+ rockchip_register_softrst(np, 9, reg_base + RK2928_SOFTRST_CON(0), -+ ROCKCHIP_SOFTRST_HIWORD_MASK); ++ link_info = rockchip_get_link_infos(priv->link, priv->name); ++ priv->shift = link_info->shift; ++ priv->pname = link_info->pname; + -+ rockchip_register_restart_notifier(ctx, RK2928_GLB_SRST_FST, NULL); ++ pm_runtime_enable(&pdev->dev); ++ ret = pm_clk_create(&pdev->dev); ++ if (ret) ++ goto disable_pm_runtime; + -+ rockchip_clk_of_add_provider(np, ctx); ++ ret = pm_clk_add(&pdev->dev, "link"); + -+ if (!rk_dump_cru) { -+ rk3036_cru_base = reg_base; -+ rk_dump_cru = rk3036_dump_cru; -+ } ++ if (ret) ++ goto destroy_pm_clk; ++ ++ ret = register_clocks(priv, &pdev->dev); ++ if (ret) ++ goto destroy_pm_clk; ++ ++ return 0; ++ ++destroy_pm_clk: ++ pm_clk_destroy(&pdev->dev); ++disable_pm_runtime: ++ pm_runtime_disable(&pdev->dev); ++ ++ return ret; +} -+CLK_OF_DECLARE(rk3036_cru, "rockchip,rk3036-cru", rk3036_clk_init); + -+static int __init clk_rk3036_probe(struct platform_device *pdev) ++static int rockchip_clk_link_remove(struct platform_device *pdev) +{ -+ struct device_node *np = pdev->dev.of_node; -+ -+ rk3036_clk_init(np); ++ pm_clk_destroy(&pdev->dev); ++ pm_runtime_disable(&pdev->dev); + + return 0; +} + -+static const struct of_device_id clk_rk3036_match_table[] = { -+ { -+ .compatible = "rockchip,rk3036-cru", -+ }, -+ { } ++static const struct dev_pm_ops rockchip_clk_link_pm_ops = { ++ SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL) +}; -+MODULE_DEVICE_TABLE(of, clk_rk3036_match_table); + -+static struct platform_driver clk_rk3036_driver = { -+ .driver = { -+ .name = "clk-rk3036", -+ .of_match_table = clk_rk3036_match_table, ++static struct platform_driver rockchip_clk_link_driver = { ++ .driver = { ++ .name = "clock-link", ++ .of_match_table = of_match_ptr(rockchip_clk_link_of_match), ++ .pm = &rockchip_clk_link_pm_ops, + }, ++ .probe = rockchip_clk_link_probe, ++ .remove = rockchip_clk_link_remove, +}; -+builtin_platform_driver_probe(clk_rk3036_driver, clk_rk3036_probe); + -+MODULE_DESCRIPTION("Rockchip RK3036 Clock Driver"); ++static int __init rockchip_clk_link_drv_register(void) ++{ ++ return platform_driver_register(&rockchip_clk_link_driver); ++} ++postcore_initcall_sync(rockchip_clk_link_drv_register); ++ ++static void __exit rockchip_clk_link_drv_unregister(void) ++{ ++ platform_driver_unregister(&rockchip_clk_link_driver); ++} ++module_exit(rockchip_clk_link_drv_unregister); ++ ++MODULE_AUTHOR("Elaine Zhang "); ++MODULE_DESCRIPTION("Clock driver for Niu Dependencies"); +MODULE_LICENSE("GPL"); -diff --git a/drivers/clk/rockchip-oh/clk-rk3128.c b/drivers/clk/rockchip-oh/clk-rk3128.c +diff --git a/drivers/clk/rockchip-oh/clk-mmc-phase.c b/drivers/clk/rockchip-oh/clk-mmc-phase.c new file mode 100644 -index 000000000..1b1111e88 +index 000000000..975454a3d --- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-rk3128.c -@@ -0,0 +1,708 @@ ++++ b/drivers/clk/rockchip-oh/clk-mmc-phase.c +@@ -0,0 +1,232 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* -+ * Copyright (c) 2017 Rockchip Electronics Co. Ltd. -+ * Author: Elaine ++ * Copyright 2014 Google, Inc ++ * Author: Alexandru M Stan + */ + ++#include ++#include +#include +#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++#include +#include "clk.h" + -+#define RK3128_GRF_SOC_STATUS0 0x14c -+ -+enum rk3128_plls { -+ apll, dpll, cpll, gpll, -+}; -+ -+static struct rockchip_pll_rate_table rk3128_pll_rates[] = { -+ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ -+ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1488000000, 1, 62, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1464000000, 1, 61, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1440000000, 1, 60, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1392000000, 1, 58, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1368000000, 1, 57, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1344000000, 1, 56, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1320000000, 1, 55, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1272000000, 1, 53, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1248000000, 1, 52, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1188000000, 2, 99, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1100000000, 12, 550, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1000000000, 6, 500, 2, 1, 1, 0), -+ RK3036_PLL_RATE(984000000, 1, 82, 2, 1, 1, 0), -+ RK3036_PLL_RATE(960000000, 1, 80, 2, 1, 1, 0), -+ RK3036_PLL_RATE(936000000, 1, 78, 2, 1, 1, 0), -+ RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), -+ RK3036_PLL_RATE(900000000, 4, 300, 2, 1, 1, 0), -+ RK3036_PLL_RATE(888000000, 1, 74, 2, 1, 1, 0), -+ RK3036_PLL_RATE(864000000, 1, 72, 2, 1, 1, 0), -+ RK3036_PLL_RATE(840000000, 1, 70, 2, 1, 1, 0), -+ RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), -+ RK3036_PLL_RATE(800000000, 6, 400, 2, 1, 1, 0), -+ RK3036_PLL_RATE(700000000, 6, 350, 2, 1, 1, 0), -+ RK3036_PLL_RATE(696000000, 1, 58, 2, 1, 1, 0), -+ RK3036_PLL_RATE(600000000, 1, 75, 3, 1, 1, 0), -+ RK3036_PLL_RATE(594000000, 2, 99, 2, 1, 1, 0), -+ RK3036_PLL_RATE(504000000, 1, 63, 3, 1, 1, 0), -+ RK3036_PLL_RATE(500000000, 6, 250, 2, 1, 1, 0), -+ RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), -+ RK3036_PLL_RATE(312000000, 1, 52, 2, 2, 1, 0), -+ RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), -+ RK3036_PLL_RATE(96000000, 1, 64, 4, 4, 1, 0), -+ { /* sentinel */ }, ++struct rockchip_mmc_clock { ++ struct clk_hw hw; ++ void __iomem *reg; ++ int id; ++ int shift; ++ int cached_phase; ++ struct notifier_block clk_rate_change_nb; +}; + -+#define RK3128_DIV_CPU_MASK 0x1f -+#define RK3128_DIV_CPU_SHIFT 8 ++#define to_mmc_clock(_hw) container_of(_hw, struct rockchip_mmc_clock, hw) + -+#define RK3128_DIV_PERI_MASK 0xf -+#define RK3128_DIV_PERI_SHIFT 0 -+#define RK3128_DIV_ACLK_MASK 0x7 -+#define RK3128_DIV_ACLK_SHIFT 4 -+#define RK3128_DIV_HCLK_MASK 0x3 -+#define RK3128_DIV_HCLK_SHIFT 8 -+#define RK3128_DIV_PCLK_MASK 0x7 -+#define RK3128_DIV_PCLK_SHIFT 12 ++#define RK3288_MMC_CLKGEN_DIV 2 + -+#define RK3128_CLKSEL1(_core_aclk_div, _pclk_dbg_div) \ -+{ \ -+ .reg = RK2928_CLKSEL_CON(1), \ -+ .val = HIWORD_UPDATE(_pclk_dbg_div, RK3128_DIV_PERI_MASK, \ -+ RK3128_DIV_PERI_SHIFT) | \ -+ HIWORD_UPDATE(_core_aclk_div, RK3128_DIV_ACLK_MASK, \ -+ RK3128_DIV_ACLK_SHIFT), \ ++static unsigned long rockchip_mmc_recalc(struct clk_hw *hw, ++ unsigned long parent_rate) ++{ ++ return parent_rate / RK3288_MMC_CLKGEN_DIV; +} + -+#define RK3128_CPUCLK_RATE(_prate, _core_aclk_div, _pclk_dbg_div) \ -+{ \ -+ .prate = _prate, \ -+ .divs = { \ -+ RK3128_CLKSEL1(_core_aclk_div, _pclk_dbg_div), \ -+ }, \ -+} ++#define ROCKCHIP_MMC_DELAY_SEL BIT(10) ++#define ROCKCHIP_MMC_DEGREE_MASK 0x3 ++#define ROCKCHIP_MMC_DELAYNUM_OFFSET 2 ++#define ROCKCHIP_MMC_DELAYNUM_MASK (0xff << ROCKCHIP_MMC_DELAYNUM_OFFSET) + -+static struct rockchip_cpuclk_rate_table rk3128_cpuclk_rates[] __initdata = { -+ RK3128_CPUCLK_RATE(1800000000, 1, 7), -+ RK3128_CPUCLK_RATE(1704000000, 1, 7), -+ RK3128_CPUCLK_RATE(1608000000, 1, 7), -+ RK3128_CPUCLK_RATE(1512000000, 1, 7), -+ RK3128_CPUCLK_RATE(1488000000, 1, 5), -+ RK3128_CPUCLK_RATE(1416000000, 1, 5), -+ RK3128_CPUCLK_RATE(1392000000, 1, 5), -+ RK3128_CPUCLK_RATE(1296000000, 1, 5), -+ RK3128_CPUCLK_RATE(1200000000, 1, 5), -+ RK3128_CPUCLK_RATE(1104000000, 1, 5), -+ RK3128_CPUCLK_RATE(1008000000, 1, 5), -+ RK3128_CPUCLK_RATE(912000000, 1, 5), -+ RK3128_CPUCLK_RATE(816000000, 1, 3), -+ RK3128_CPUCLK_RATE(696000000, 1, 3), -+ RK3128_CPUCLK_RATE(600000000, 1, 3), -+ RK3128_CPUCLK_RATE(408000000, 1, 1), -+ RK3128_CPUCLK_RATE(312000000, 1, 1), -+ RK3128_CPUCLK_RATE(216000000, 1, 1), -+ RK3128_CPUCLK_RATE(96000000, 1, 1), -+}; ++#define PSECS_PER_SEC 1000000000000LL + -+static const struct rockchip_cpuclk_reg_data rk3128_cpuclk_data = { -+ .core_reg[0] = RK2928_CLKSEL_CON(0), -+ .div_core_shift[0] = 0, -+ .div_core_mask[0] = 0x1f, -+ .num_cores = 1, -+ .mux_core_alt = 1, -+ .mux_core_main = 0, -+ .mux_core_shift = 7, -+ .mux_core_mask = 0x1, -+}; ++/* ++ * Each fine delay is between 44ps-77ps. Assume each fine delay is 60ps to ++ * simplify calculations. So 45degs could be anywhere between 33deg and 57.8deg. ++ */ ++#define ROCKCHIP_MMC_DELAY_ELEMENT_PSEC 60 + -+PNAME(mux_pll_p) = { "clk_24m", "xin24m" }; ++static int rockchip_mmc_get_phase(struct clk_hw *hw) ++{ ++ struct rockchip_mmc_clock *mmc_clock = to_mmc_clock(hw); ++ unsigned long rate = clk_hw_get_rate(hw); ++ u32 raw_value; ++ u16 degrees; ++ u32 delay_num = 0; + -+PNAME(mux_ddrphy_p) = { "dpll_ddr", "gpll_div2_ddr" }; -+PNAME(mux_usb480m_p) = { "usb480m_phy", "xin24m" }; -+PNAME(mux_aclk_cpu_src_p) = { "cpll", "gpll", "gpll_div2", "gpll_div3" }; ++ /* Constant signal, no measurable phase shift */ ++ if (!rate) ++ return 0; + -+PNAME(mux_pll_src_5plls_p) = { "cpll", "gpll", "gpll_div2", "gpll_div3", "usb480m" }; -+PNAME(mux_pll_src_4plls_p) = { "cpll", "gpll", "gpll_div2", "usb480m" }; -+PNAME(mux_pll_src_3plls_p) = { "cpll", "gpll", "gpll_div2" }; ++ raw_value = readl(mmc_clock->reg) >> (mmc_clock->shift); + -+PNAME(mux_aclk_peri_src_p) = { "gpll", "cpll", "gpll_div2", "gpll_div3" }; -+PNAME(mux_mmc_src_p) = { "cpll", "gpll", "gpll_div2", "xin24m" }; -+PNAME(mux_clk_cif_out_src_p) = { "sclk_cif_src", "xin24m" }; -+PNAME(mux_sclk_vop_src_p) = { "cpll", "gpll", "gpll_div2", "gpll_div3" }; ++ degrees = (raw_value & ROCKCHIP_MMC_DEGREE_MASK) * 90; + -+PNAME(mux_i2s0_p) = { "i2s0_src", "i2s0_frac", "ext_i2s", "xin12m" }; -+PNAME(mux_i2s1_pre_p) = { "i2s1_src", "i2s1_frac", "ext_i2s", "xin12m" }; -+PNAME(mux_i2s_out_p) = { "i2s1_pre", "xin12m" }; -+PNAME(mux_sclk_spdif_p) = { "sclk_spdif_src", "spdif_frac", "xin12m" }; ++ if (raw_value & ROCKCHIP_MMC_DELAY_SEL) { ++ /* degrees/delaynum * 1000000 */ ++ unsigned long factor = (ROCKCHIP_MMC_DELAY_ELEMENT_PSEC / 10) * ++ 36 * (rate / 10000); + -+PNAME(mux_uart0_p) = { "uart0_src", "uart0_frac", "xin24m" }; -+PNAME(mux_uart1_p) = { "uart1_src", "uart1_frac", "xin24m" }; -+PNAME(mux_uart2_p) = { "uart2_src", "uart2_frac", "xin24m" }; ++ delay_num = (raw_value & ROCKCHIP_MMC_DELAYNUM_MASK); ++ delay_num >>= ROCKCHIP_MMC_DELAYNUM_OFFSET; ++ degrees += DIV_ROUND_CLOSEST(delay_num * factor, 1000000); ++ } + -+PNAME(mux_sclk_gmac_p) = { "sclk_gmac_src", "gmac_clkin" }; -+PNAME(mux_sclk_sfc_src_p) = { "cpll", "gpll", "gpll_div2", "xin24m" }; ++ return degrees % 360; ++} + -+static struct rockchip_pll_clock rk3128_pll_clks[] __initdata = { -+ [apll] = PLL(pll_rk3036, PLL_APLL, "apll", mux_pll_p, 0, RK2928_PLL_CON(0), -+ RK2928_MODE_CON, 0, 1, 0, rk3128_pll_rates), -+ [dpll] = PLL(pll_rk3036, PLL_DPLL, "dpll", mux_pll_p, 0, RK2928_PLL_CON(4), -+ RK2928_MODE_CON, 4, 0, 0, NULL), -+ [cpll] = PLL(pll_rk3036, PLL_CPLL, "cpll", mux_pll_p, 0, RK2928_PLL_CON(8), -+ RK2928_MODE_CON, 8, 2, 0, rk3128_pll_rates), -+ [gpll] = PLL(pll_rk3036, PLL_GPLL, "gpll", mux_pll_p, 0, RK2928_PLL_CON(12), -+ RK2928_MODE_CON, 12, 3, ROCKCHIP_PLL_SYNC_RATE, rk3128_pll_rates), -+}; ++static int rockchip_mmc_set_phase(struct clk_hw *hw, int degrees) ++{ ++ struct rockchip_mmc_clock *mmc_clock = to_mmc_clock(hw); ++ unsigned long rate = clk_hw_get_rate(hw); ++ u8 nineties, remainder; ++ u8 delay_num; ++ u32 raw_value; ++ u32 delay; + -+#define MFLAGS CLK_MUX_HIWORD_MASK -+#define DFLAGS CLK_DIVIDER_HIWORD_MASK -+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) ++ /* ++ * The below calculation is based on the output clock from ++ * MMC host to the card, which expects the phase clock inherits ++ * the clock rate from its parent, namely the output clock ++ * provider of MMC host. However, things may go wrong if ++ * (1) It is orphan. ++ * (2) It is assigned to the wrong parent. ++ * ++ * This check help debug the case (1), which seems to be the ++ * most likely problem we often face and which makes it difficult ++ * for people to debug unstable mmc tuning results. ++ */ ++ if (!rate) { ++ pr_err("%s: invalid clk rate\n", __func__); ++ return -EINVAL; ++ } + -+static struct rockchip_clk_branch rk3128_i2s0_fracmux __initdata = -+ MUX(0, "i2s0_pre", mux_i2s0_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(9), 8, 2, MFLAGS); ++ nineties = degrees / 90; ++ remainder = (degrees % 90); + -+static struct rockchip_clk_branch rk3128_i2s1_fracmux __initdata = -+ MUX(0, "i2s1_pre", mux_i2s1_pre_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(3), 8, 2, MFLAGS); ++ /* ++ * Due to the inexact nature of the "fine" delay, we might ++ * actually go non-monotonic. We don't go _too_ monotonic ++ * though, so we should be OK. Here are options of how we may ++ * work: ++ * ++ * Ideally we end up with: ++ * 1.0, 2.0, ..., 69.0, 70.0, ..., 89.0, 90.0 ++ * ++ * On one extreme (if delay is actually 44ps): ++ * .73, 1.5, ..., 50.6, 51.3, ..., 65.3, 90.0 ++ * The other (if delay is actually 77ps): ++ * 1.3, 2.6, ..., 88.6. 89.8, ..., 114.0, 90 ++ * ++ * It's possible we might make a delay that is up to 25 ++ * degrees off from what we think we're making. That's OK ++ * though because we should be REALLY far from any bad range. ++ */ + -+static struct rockchip_clk_branch rk3128_spdif_fracmux __initdata = -+ MUX(SCLK_SPDIF, "sclk_spdif", mux_sclk_spdif_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(6), 8, 2, MFLAGS); ++ /* ++ * Convert to delay; do a little extra work to make sure we ++ * don't overflow 32-bit / 64-bit numbers. ++ */ ++ delay = 10000000; /* PSECS_PER_SEC / 10000 / 10 */ ++ delay *= remainder; ++ delay = DIV_ROUND_CLOSEST(delay, ++ (rate / 1000) * 36 * ++ (ROCKCHIP_MMC_DELAY_ELEMENT_PSEC / 10)); + -+static struct rockchip_clk_branch rk3128_uart0_fracmux __initdata = -+ MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(13), 8, 2, MFLAGS); ++ delay_num = (u8) min_t(u32, delay, 255); + -+static struct rockchip_clk_branch rk3128_uart1_fracmux __initdata = -+ MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(14), 8, 2, MFLAGS); ++ raw_value = delay_num ? ROCKCHIP_MMC_DELAY_SEL : 0; ++ raw_value |= delay_num << ROCKCHIP_MMC_DELAYNUM_OFFSET; ++ raw_value |= nineties; ++ writel(HIWORD_UPDATE(raw_value, 0x07ff, mmc_clock->shift), ++ mmc_clock->reg); + -+static struct rockchip_clk_branch rk3128_uart2_fracmux __initdata = -+ MUX(SCLK_UART2, "sclk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(15), 8, 2, MFLAGS); ++ pr_debug("%s->set_phase(%d) delay_nums=%u reg[0x%p]=0x%03x actual_degrees=%d\n", ++ clk_hw_get_name(hw), degrees, delay_num, ++ mmc_clock->reg, raw_value>>(mmc_clock->shift), ++ rockchip_mmc_get_phase(hw) ++ ); ++ ++ return 0; ++} ++ ++static const struct clk_ops rockchip_mmc_clk_ops = { ++ .recalc_rate = rockchip_mmc_recalc, ++ .get_phase = rockchip_mmc_get_phase, ++ .set_phase = rockchip_mmc_set_phase, ++}; ++ ++#define to_rockchip_mmc_clock(x) \ ++ container_of(x, struct rockchip_mmc_clock, clk_rate_change_nb) ++static int rockchip_mmc_clk_rate_notify(struct notifier_block *nb, ++ unsigned long event, void *data) ++{ ++ struct rockchip_mmc_clock *mmc_clock = to_rockchip_mmc_clock(nb); ++ struct clk_notifier_data *ndata = data; + -+static struct rockchip_clk_branch common_clk_branches[] __initdata = { + /* -+ * Clock-Architecture Diagram 1 ++ * rockchip_mmc_clk is mostly used by mmc controllers to sample ++ * the intput data, which expects the fixed phase after the tuning ++ * process. However if the clock rate is changed, the phase is stale ++ * and may break the data sampling. So here we try to restore the phase ++ * for that case, except that ++ * (1) cached_phase is invaild since we inevitably cached it when the ++ * clock provider be reparented from orphan to its real parent in the ++ * first place. Otherwise we may mess up the initialization of MMC cards ++ * since we only set the default sample phase and drive phase later on. ++ * (2) the new coming rate is higher than the older one since mmc driver ++ * set the max-frequency to match the boards' ability but we can't go ++ * over the heads of that, otherwise the tests smoke out the issue. + */ ++ if (ndata->old_rate <= ndata->new_rate) ++ return NOTIFY_DONE; + -+ FACTOR(PLL_GPLL_DIV2, "gpll_div2", "gpll", 0, 1, 2), -+ FACTOR(PLL_GPLL_DIV3, "gpll_div3", "gpll", 0, 1, 3), ++ if (event == PRE_RATE_CHANGE) ++ mmc_clock->cached_phase = ++ rockchip_mmc_get_phase(&mmc_clock->hw); ++ else if (mmc_clock->cached_phase != -EINVAL && ++ event == POST_RATE_CHANGE) ++ rockchip_mmc_set_phase(&mmc_clock->hw, mmc_clock->cached_phase); + -+ DIV(0, "clk_24m", "xin24m", CLK_IGNORE_UNUSED, -+ RK2928_CLKSEL_CON(4), 8, 5, DFLAGS), ++ return NOTIFY_DONE; ++} + -+ /* PD_DDR */ -+ GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(0), 2, GFLAGS), -+ GATE(0, "gpll_div2_ddr", "gpll_div2", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE_DDRCLK(SCLK_DDRC, "clk_ddrc", mux_ddrphy_p, 0, -+ RK2928_CLKSEL_CON(26), 8, 2, 0, 2, -+ ROCKCHIP_DDRCLK_SIP_V2), -+ FACTOR(0, "clk_ddrphy", "ddrphy2x", 0, 1, 2), ++struct clk *rockchip_clk_register_mmc(const char *name, ++ const char *const *parent_names, u8 num_parents, ++ void __iomem *reg, int shift) ++{ ++ struct clk_init_data init; ++ struct rockchip_mmc_clock *mmc_clock; ++ struct clk *clk; ++ int ret; + -+ /* PD_CORE */ -+ GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(0), 6, GFLAGS), -+ GATE(0, "gpll_div2_core", "gpll_div2", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(0), 6, GFLAGS), -+ COMPOSITE_NOMUX(0, "pclk_dbg", "armclk", CLK_IGNORE_UNUSED, -+ RK2928_CLKSEL_CON(1), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK2928_CLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE_NOMUX(0, "armcore", "armclk", CLK_IGNORE_UNUSED, -+ RK2928_CLKSEL_CON(1), 4, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK2928_CLKGATE_CON(0), 7, GFLAGS), ++ mmc_clock = kmalloc(sizeof(*mmc_clock), GFP_KERNEL); ++ if (!mmc_clock) ++ return ERR_PTR(-ENOMEM); + -+ /* PD_MISC */ -+ MUX(SCLK_USB480M, "usb480m", mux_usb480m_p, CLK_SET_RATE_PARENT, -+ RK2928_MISC_CON, 15, 1, MFLAGS), ++ init.name = name; ++ init.flags = 0; ++ init.num_parents = num_parents; ++ init.parent_names = parent_names; ++ init.ops = &rockchip_mmc_clk_ops; + -+ /* PD_CPU */ -+ COMPOSITE(0, "aclk_cpu_src", mux_aclk_cpu_src_p, CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(0), 13, 2, MFLAGS, 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(0), 1, GFLAGS), -+ GATE(ACLK_CPU, "aclk_cpu", "aclk_cpu_src", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(0), 3, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_CPU, "hclk_cpu", "aclk_cpu_src", CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(1), 8, 2, DFLAGS, -+ RK2928_CLKGATE_CON(0), 4, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_CPU, "pclk_cpu", "aclk_cpu_src", CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(1), 12, 2, DFLAGS, -+ RK2928_CLKGATE_CON(0), 5, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_CRYPTO, "clk_crypto", "aclk_cpu_src", 0, -+ RK2928_CLKSEL_CON(24), 0, 2, DFLAGS, -+ RK2928_CLKGATE_CON(0), 12, GFLAGS), ++ mmc_clock->hw.init = &init; ++ mmc_clock->reg = reg; ++ mmc_clock->shift = shift; + -+ /* PD_VIDEO */ -+ COMPOSITE(ACLK_VEPU, "aclk_vepu", mux_pll_src_5plls_p, 0, -+ RK2928_CLKSEL_CON(32), 5, 3, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 9, GFLAGS), -+ FACTOR(HCLK_VEPU, "hclk_vepu", "aclk_vepu", 0, 1, 4), ++ clk = clk_register(NULL, &mmc_clock->hw); ++ if (IS_ERR(clk)) { ++ ret = PTR_ERR(clk); ++ goto err_register; ++ } + -+ COMPOSITE(ACLK_VDPU, "aclk_vdpu", mux_pll_src_5plls_p, 0, -+ RK2928_CLKSEL_CON(32), 13, 3, MFLAGS, 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 11, GFLAGS), -+ FACTOR_GATE(HCLK_VDPU, "hclk_vdpu", "aclk_vdpu", 0, 1, 4, -+ RK2928_CLKGATE_CON(3), 12, GFLAGS), ++ mmc_clock->clk_rate_change_nb.notifier_call = ++ &rockchip_mmc_clk_rate_notify; ++ ret = clk_notifier_register(clk, &mmc_clock->clk_rate_change_nb); ++ if (ret) ++ goto err_notifier; + -+ COMPOSITE(SCLK_HEVC_CORE, "sclk_hevc_core", mux_pll_src_5plls_p, 0, -+ RK2928_CLKSEL_CON(34), 13, 3, MFLAGS, 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 10, GFLAGS), ++ return clk; ++err_notifier: ++ clk_unregister(clk); ++err_register: ++ kfree(mmc_clock); ++ return ERR_PTR(ret); ++} +diff --git a/drivers/clk/rockchip-oh/clk-muxgrf.c b/drivers/clk/rockchip-oh/clk-muxgrf.c +new file mode 100644 +index 000000000..4a335a5f4 +--- /dev/null ++++ b/drivers/clk/rockchip-oh/clk-muxgrf.c +@@ -0,0 +1,92 @@ ++// SPDX-License-Identifier: GPL-2.0-only + -+ /* PD_VIO */ -+ COMPOSITE(ACLK_VIO0, "aclk_vio0", mux_pll_src_5plls_p, CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(31), 5, 3, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 0, GFLAGS), -+ COMPOSITE(ACLK_VIO1, "aclk_vio1", mux_pll_src_5plls_p, 0, -+ RK2928_CLKSEL_CON(31), 13, 3, MFLAGS, 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(1), 4, GFLAGS), -+ FACTOR_GATE(HCLK_VIO, "hclk_vio", "aclk_vio0", CLK_IS_CRITICAL, 1, 4, -+ RK2928_CLKGATE_CON(0), 11, GFLAGS), ++#include ++#include ++#include ++#include ++#include ++#include "clk.h" + -+ /* PD_PERI */ -+ COMPOSITE(0, "aclk_peri_src", mux_aclk_peri_src_p, 0, -+ RK2928_CLKSEL_CON(10), 14, 2, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(2), 0, GFLAGS), ++struct rockchip_muxgrf_clock { ++ struct clk_hw hw; ++ struct regmap *regmap; ++ u32 reg; ++ u32 shift; ++ u32 width; ++ int flags; ++}; + -+ COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(10), 12, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, -+ RK2928_CLKGATE_CON(2), 3, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(10), 8, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, -+ RK2928_CLKGATE_CON(2), 2, GFLAGS), -+ GATE(ACLK_PERI, "aclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(2), 1, GFLAGS), ++#define to_muxgrf_clock(_hw) container_of(_hw, struct rockchip_muxgrf_clock, hw) + -+ GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0, -+ RK2928_CLKGATE_CON(10), 3, GFLAGS), -+ GATE(SCLK_TIMER1, "sclk_timer1", "xin24m", 0, -+ RK2928_CLKGATE_CON(10), 4, GFLAGS), -+ GATE(SCLK_TIMER2, "sclk_timer2", "xin24m", 0, -+ RK2928_CLKGATE_CON(10), 5, GFLAGS), -+ GATE(SCLK_TIMER3, "sclk_timer3", "xin24m", 0, -+ RK2928_CLKGATE_CON(10), 6, GFLAGS), -+ GATE(SCLK_TIMER4, "sclk_timer4", "xin24m", 0, -+ RK2928_CLKGATE_CON(10), 7, GFLAGS), -+ GATE(SCLK_TIMER5, "sclk_timer5", "xin24m", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(10), 8, GFLAGS), ++static u8 rockchip_muxgrf_get_parent(struct clk_hw *hw) ++{ ++ struct rockchip_muxgrf_clock *mux = to_muxgrf_clock(hw); ++ unsigned int mask = GENMASK(mux->width - 1, 0); ++ unsigned int val; + -+ GATE(SCLK_PVTM_CORE, "clk_pvtm_core", "xin24m", 0, -+ RK2928_CLKGATE_CON(10), 0, GFLAGS), -+ GATE(SCLK_PVTM_GPU, "clk_pvtm_gpu", "xin24m", 0, -+ RK2928_CLKGATE_CON(10), 1, GFLAGS), -+ GATE(SCLK_PVTM_FUNC, "clk_pvtm_func", "xin24m", 0, -+ RK2928_CLKGATE_CON(10), 2, GFLAGS), -+ GATE(SCLK_MIPI_24M, "clk_mipi_24m", "xin24m", 0, -+ RK2928_CLKGATE_CON(2), 15, GFLAGS), ++ regmap_read(mux->regmap, mux->reg, &val); + -+ COMPOSITE(SCLK_SDMMC, "sclk_sdmmc", mux_mmc_src_p, 0, -+ RK2928_CLKSEL_CON(11), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK2928_CLKGATE_CON(2), 11, GFLAGS), ++ val >>= mux->shift; ++ val &= mask; + -+ COMPOSITE(SCLK_SDIO, "sclk_sdio", mux_mmc_src_p, 0, -+ RK2928_CLKSEL_CON(12), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK2928_CLKGATE_CON(2), 13, GFLAGS), ++ return val; ++} + -+ COMPOSITE(SCLK_EMMC, "sclk_emmc", mux_mmc_src_p, 0, -+ RK2928_CLKSEL_CON(12), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK2928_CLKGATE_CON(2), 14, GFLAGS), ++static int rockchip_muxgrf_set_parent(struct clk_hw *hw, u8 index) ++{ ++ struct rockchip_muxgrf_clock *mux = to_muxgrf_clock(hw); ++ unsigned int mask = GENMASK(mux->width + mux->shift - 1, mux->shift); ++ unsigned int val; + -+ DIV(SCLK_PVTM, "clk_pvtm", "clk_pvtm_func", 0, -+ RK2928_CLKSEL_CON(2), 0, 7, DFLAGS), ++ val = index; ++ val <<= mux->shift; + -+ /* -+ * Clock-Architecture Diagram 2 -+ */ -+ COMPOSITE(DCLK_VOP, "dclk_vop", mux_sclk_vop_src_p, 0, -+ RK2928_CLKSEL_CON(27), 0, 2, MFLAGS, 8, 8, DFLAGS, -+ RK2928_CLKGATE_CON(3), 1, GFLAGS), -+ COMPOSITE(SCLK_VOP, "sclk_vop", mux_sclk_vop_src_p, 0, -+ RK2928_CLKSEL_CON(28), 0, 2, MFLAGS, 8, 8, DFLAGS, -+ RK2928_CLKGATE_CON(3), 2, GFLAGS), -+ COMPOSITE(DCLK_EBC, "dclk_ebc", mux_pll_src_3plls_p, 0, -+ RK2928_CLKSEL_CON(23), 0, 2, MFLAGS, 8, 8, DFLAGS, -+ RK2928_CLKGATE_CON(3), 4, GFLAGS), ++ if (mux->flags & CLK_MUX_HIWORD_MASK) ++ return regmap_write(mux->regmap, mux->reg, val | (mask << 16)); ++ else ++ return regmap_update_bits(mux->regmap, mux->reg, mask, val); ++} + -+ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), ++static const struct clk_ops rockchip_muxgrf_clk_ops = { ++ .get_parent = rockchip_muxgrf_get_parent, ++ .set_parent = rockchip_muxgrf_set_parent, ++ .determine_rate = __clk_mux_determine_rate, ++}; + -+ COMPOSITE_NODIV(SCLK_CIF_SRC, "sclk_cif_src", mux_pll_src_4plls_p, 0, -+ RK2928_CLKSEL_CON(29), 0, 2, MFLAGS, -+ RK2928_CLKGATE_CON(3), 7, GFLAGS), -+ MUX(SCLK_CIF_OUT_SRC, "sclk_cif_out_src", mux_clk_cif_out_src_p, 0, -+ RK2928_CLKSEL_CON(29), 7, 1, MFLAGS), -+ DIV(SCLK_CIF_OUT, "sclk_cif_out", "sclk_cif_out_src", 0, -+ RK2928_CLKSEL_CON(29), 2, 5, DFLAGS), ++struct clk *rockchip_clk_register_muxgrf(const char *name, ++ const char *const *parent_names, u8 num_parents, ++ int flags, struct regmap *regmap, int reg, ++ int shift, int width, int mux_flags) ++{ ++ struct rockchip_muxgrf_clock *muxgrf_clock; ++ struct clk_init_data init; ++ struct clk *clk; + -+ COMPOSITE(0, "i2s0_src", mux_pll_src_3plls_p, 0, -+ RK2928_CLKSEL_CON(9), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(4), 4, GFLAGS), -+ COMPOSITE_FRACMUX(0, "i2s0_frac", "i2s0_src", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(8), 0, -+ RK2928_CLKGATE_CON(4), 5, GFLAGS, -+ &rk3128_i2s0_fracmux), -+ GATE(SCLK_I2S0, "sclk_i2s0", "i2s0_pre", CLK_SET_RATE_PARENT, -+ RK2928_CLKGATE_CON(4), 6, GFLAGS), ++ if (IS_ERR(regmap)) { ++ pr_err("%s: regmap not available\n", __func__); ++ return ERR_PTR(-ENOTSUPP); ++ } + -+ COMPOSITE(0, "i2s1_src", mux_pll_src_3plls_p, 0, -+ RK2928_CLKSEL_CON(3), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(0), 9, GFLAGS), -+ COMPOSITE_FRACMUX(0, "i2s1_frac", "i2s1_src", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(7), 0, -+ RK2928_CLKGATE_CON(0), 10, GFLAGS, -+ &rk3128_i2s1_fracmux), -+ GATE(SCLK_I2S1, "sclk_i2s1", "i2s1_pre", CLK_SET_RATE_PARENT, -+ RK2928_CLKGATE_CON(0), 14, GFLAGS), -+ COMPOSITE_NODIV(SCLK_I2S_OUT, "i2s_out", mux_i2s_out_p, 0, -+ RK2928_CLKSEL_CON(3), 12, 1, MFLAGS, -+ RK2928_CLKGATE_CON(0), 13, GFLAGS), ++ muxgrf_clock = kmalloc(sizeof(*muxgrf_clock), GFP_KERNEL); ++ if (!muxgrf_clock) ++ return ERR_PTR(-ENOMEM); + -+ COMPOSITE(0, "sclk_spdif_src", mux_pll_src_3plls_p, 0, -+ RK2928_CLKSEL_CON(6), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(2), 10, GFLAGS), -+ COMPOSITE_FRACMUX(0, "spdif_frac", "sclk_spdif_src", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(20), 0, -+ RK2928_CLKGATE_CON(2), 12, GFLAGS, -+ &rk3128_spdif_fracmux), ++ init.name = name; ++ init.flags = flags; ++ init.num_parents = num_parents; ++ init.parent_names = parent_names; ++ init.ops = &rockchip_muxgrf_clk_ops; + -+ GATE(0, "jtag", "ext_jtag", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(1), 3, GFLAGS), ++ muxgrf_clock->hw.init = &init; ++ muxgrf_clock->regmap = regmap; ++ muxgrf_clock->reg = reg; ++ muxgrf_clock->shift = shift; ++ muxgrf_clock->width = width; ++ muxgrf_clock->flags = mux_flags; + -+ GATE(SCLK_OTGPHY0, "sclk_otgphy0", "xin12m", 0, -+ RK2928_CLKGATE_CON(1), 5, GFLAGS), -+ GATE(SCLK_OTGPHY1, "sclk_otgphy1", "xin12m", 0, -+ RK2928_CLKGATE_CON(1), 6, GFLAGS), ++ clk = clk_register(NULL, &muxgrf_clock->hw); ++ if (IS_ERR(clk)) ++ kfree(muxgrf_clock); + -+ COMPOSITE_NOMUX(SCLK_SARADC, "sclk_saradc", "xin24m", 0, -+ RK2928_CLKSEL_CON(24), 8, 8, DFLAGS, -+ RK2928_CLKGATE_CON(2), 8, GFLAGS), ++ return clk; ++} +diff --git a/drivers/clk/rockchip-oh/clk-out.c b/drivers/clk/rockchip-oh/clk-out.c +new file mode 100644 +index 000000000..22dcd98fb +--- /dev/null ++++ b/drivers/clk/rockchip-oh/clk-out.c +@@ -0,0 +1,99 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later ++/* ++ * Copyright (c) 2023 Rockchip Electronics Co., Ltd ++ */ + -+ COMPOSITE(ACLK_GPU, "aclk_gpu", mux_pll_src_5plls_p, 0, -+ RK2928_CLKSEL_CON(34), 5, 3, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 13, GFLAGS), ++#include ++#include ++#include ++#include ++#include ++#include + -+ COMPOSITE(SCLK_SPI0, "sclk_spi0", mux_pll_src_3plls_p, 0, -+ RK2928_CLKSEL_CON(25), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(2), 9, GFLAGS), ++static DEFINE_SPINLOCK(clk_out_lock); + -+ /* PD_UART */ -+ COMPOSITE(0, "uart0_src", mux_pll_src_4plls_p, 0, -+ RK2928_CLKSEL_CON(13), 12, 2, MFLAGS, 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(1), 8, GFLAGS), -+ MUX(0, "uart12_src", mux_pll_src_4plls_p, 0, -+ RK2928_CLKSEL_CON(13), 14, 2, MFLAGS), -+ COMPOSITE_NOMUX(0, "uart1_src", "uart12_src", 0, -+ RK2928_CLKSEL_CON(14), 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(1), 10, GFLAGS), -+ COMPOSITE_NOMUX(0, "uart2_src", "uart12_src", 0, -+ RK2928_CLKSEL_CON(15), 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(1), 13, GFLAGS), -+ COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(17), 0, -+ RK2928_CLKGATE_CON(1), 9, GFLAGS, -+ &rk3128_uart0_fracmux), -+ COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_src", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(18), 0, -+ RK2928_CLKGATE_CON(1), 11, GFLAGS, -+ &rk3128_uart1_fracmux), -+ COMPOSITE_FRACMUX(0, "uart2_frac", "uart2_src", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(19), 0, -+ RK2928_CLKGATE_CON(1), 13, GFLAGS, -+ &rk3128_uart2_fracmux), ++static int rockchip_clk_out_probe(struct platform_device *pdev) ++{ ++ struct device *dev = &pdev->dev; ++ struct device_node *node = pdev->dev.of_node; ++ struct clk_hw *hw; ++ struct resource *res; ++ const char *clk_name = node->name; ++ const char *parent_name; ++ void __iomem *reg; ++ u32 shift = 0; ++ u8 clk_gate_flags = CLK_GATE_HIWORD_MASK; ++ int ret; + -+ COMPOSITE(SCLK_MAC_SRC, "sclk_gmac_src", mux_pll_src_3plls_p, 0, -+ RK2928_CLKSEL_CON(5), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(1), 7, GFLAGS), -+ MUX(SCLK_MAC, "sclk_gmac", mux_sclk_gmac_p, 0, -+ RK2928_CLKSEL_CON(5), 15, 1, MFLAGS), -+ GATE(SCLK_MAC_REFOUT, "sclk_mac_refout", "sclk_gmac", 0, -+ RK2928_CLKGATE_CON(2), 5, GFLAGS), -+ GATE(SCLK_MAC_REF, "sclk_mac_ref", "sclk_gmac", 0, -+ RK2928_CLKGATE_CON(2), 4, GFLAGS), -+ GATE(SCLK_MAC_RX, "sclk_mac_rx", "sclk_gmac", 0, -+ RK2928_CLKGATE_CON(2), 6, GFLAGS), -+ GATE(SCLK_MAC_TX, "sclk_mac_tx", "sclk_gmac", 0, -+ RK2928_CLKGATE_CON(2), 7, GFLAGS), ++ ret = device_property_read_string(dev, "clock-output-names", &clk_name); ++ if (ret) ++ return ret; + -+ COMPOSITE(SCLK_TSP, "sclk_tsp", mux_pll_src_3plls_p, 0, -+ RK2928_CLKSEL_CON(4), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(1), 14, GFLAGS), -+ GATE(SCLK_HSADC_TSP, "sclk_hsadc_tsp", "ext_hsadc_tsp", 0, -+ RK2928_CLKGATE_CON(10), 13, GFLAGS), ++ ret = device_property_read_u32(dev, "rockchip,bit-shift", &shift); ++ if (ret) ++ return ret; + -+ COMPOSITE(SCLK_NANDC, "sclk_nandc", mux_pll_src_3plls_p, 0, -+ RK2928_CLKSEL_CON(2), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(10), 15, GFLAGS), ++ if (device_property_read_bool(dev, "rockchip,bit-set-to-disable")) ++ clk_gate_flags |= CLK_GATE_SET_TO_DISABLE; + -+ COMPOSITE_NOMUX(PCLK_PMU_PRE, "pclk_pmu_pre", "cpll", CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(29), 8, 6, DFLAGS, -+ RK2928_CLKGATE_CON(1), 0, GFLAGS), ++ ret = of_clk_parent_fill(node, &parent_name, 1); ++ if (ret != 1) ++ return -EINVAL; + -+ /* -+ * Clock-Architecture Diagram 3 -+ */ ++ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ if (!res) ++ return -ENOMEM; + -+ /* PD_VOP */ -+ GATE(ACLK_LCDC0, "aclk_lcdc0", "aclk_vio0", 0, RK2928_CLKGATE_CON(6), 0, GFLAGS), -+ GATE(ACLK_CIF, "aclk_cif", "aclk_vio0", 0, RK2928_CLKGATE_CON(6), 5, GFLAGS), -+ GATE(ACLK_RGA, "aclk_rga", "aclk_vio0", 0, RK2928_CLKGATE_CON(6), 11, GFLAGS), -+ GATE(0, "aclk_vio0_niu", "aclk_vio0", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(6), 13, GFLAGS), ++ reg = devm_ioremap(dev, res->start, resource_size(res)); ++ if (!reg) ++ return -ENOMEM; + -+ GATE(ACLK_IEP, "aclk_iep", "aclk_vio1", 0, RK2928_CLKGATE_CON(9), 8, GFLAGS), -+ GATE(0, "aclk_vio1_niu", "aclk_vio1", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 10, GFLAGS), ++ pm_runtime_enable(dev); + -+ GATE(HCLK_VIO_H2P, "hclk_vio_h2p", "hclk_vio", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(9), 5, GFLAGS), -+ GATE(PCLK_MIPI, "pclk_mipi", "hclk_vio", 0, RK2928_CLKGATE_CON(9), 6, GFLAGS), -+ GATE(HCLK_RGA, "hclk_rga", "hclk_vio", 0, RK2928_CLKGATE_CON(6), 10, GFLAGS), -+ GATE(HCLK_LCDC0, "hclk_lcdc0", "hclk_vio", 0, RK2928_CLKGATE_CON(6), 1, GFLAGS), -+ GATE(HCLK_IEP, "hclk_iep", "hclk_vio", 0, RK2928_CLKGATE_CON(9), 7, GFLAGS), -+ GATE(0, "hclk_vio_niu", "hclk_vio", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(6), 12, GFLAGS), -+ GATE(HCLK_CIF, "hclk_cif", "hclk_vio", 0, RK2928_CLKGATE_CON(6), 4, GFLAGS), -+ GATE(HCLK_EBC, "hclk_ebc", "hclk_vio", 0, RK2928_CLKGATE_CON(9), 9, GFLAGS), ++ hw = clk_hw_register_gate(dev, clk_name, parent_name, CLK_SET_RATE_PARENT, ++ reg, shift, clk_gate_flags, &clk_out_lock); ++ if (IS_ERR(hw)) { ++ ret = -EINVAL; ++ goto err_disable_pm_runtime; ++ } + -+ /* PD_PERI */ -+ GATE(0, "aclk_peri_axi", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 3, GFLAGS), -+ GATE(ACLK_GMAC, "aclk_gmac", "aclk_peri", 0, RK2928_CLKGATE_CON(10), 10, GFLAGS), -+ GATE(ACLK_DMAC, "aclk_dmac", "aclk_peri", 0, RK2928_CLKGATE_CON(5), 1, GFLAGS), -+ GATE(0, "aclk_peri_niu", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 15, GFLAGS), -+ GATE(0, "aclk_cpu_to_peri", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 2, GFLAGS), ++ of_clk_add_hw_provider(node, of_clk_hw_simple_get, hw); + -+ GATE(HCLK_I2S_8CH, "hclk_i2s_8ch", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 4, GFLAGS), -+ GATE(0, "hclk_peri_matrix", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 0, GFLAGS), -+ GATE(HCLK_I2S_2CH, "hclk_i2s_2ch", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 2, GFLAGS), -+ GATE(0, "hclk_usb_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 13, GFLAGS), -+ GATE(HCLK_HOST2, "hclk_host2", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 3, GFLAGS), -+ GATE(HCLK_OTG, "hclk_otg", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 13, GFLAGS), -+ GATE(0, "hclk_peri_ahb", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 14, GFLAGS), -+ GATE(HCLK_SPDIF, "hclk_spdif", "hclk_peri", 0, RK2928_CLKGATE_CON(10), 9, GFLAGS), -+ GATE(HCLK_TSP, "hclk_tsp", "hclk_peri", 0, RK2928_CLKGATE_CON(10), 12, GFLAGS), -+ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 10, GFLAGS), -+ GATE(HCLK_SDIO, "hclk_sdio", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 11, GFLAGS), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 0, GFLAGS), -+ GATE(0, "hclk_emmc_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(3), 6, GFLAGS), -+ GATE(HCLK_NANDC, "hclk_nandc", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 9, GFLAGS), -+ GATE(HCLK_USBHOST, "hclk_usbhost", "hclk_peri", 0, RK2928_CLKGATE_CON(10), 14, GFLAGS), -+ GATE(HCLK_SFC, "hclk_sfc", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 1, GFLAGS), ++ return 0; + -+ GATE(PCLK_SIM_CARD, "pclk_sim_card", "pclk_peri", 0, RK2928_CLKGATE_CON(9), 12, GFLAGS), -+ GATE(PCLK_GMAC, "pclk_gmac", "pclk_peri", 0, RK2928_CLKGATE_CON(10), 11, GFLAGS), -+ GATE(0, "pclk_peri_axi", "pclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 1, GFLAGS), -+ GATE(PCLK_SPI0, "pclk_spi0", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 12, GFLAGS), -+ GATE(PCLK_UART0, "pclk_uart0", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 0, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 1, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 2, GFLAGS), -+ GATE(PCLK_PWM, "pclk_pwm", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 10, GFLAGS), -+ GATE(PCLK_WDT, "pclk_wdt", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 15, GFLAGS), -+ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 4, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 5, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 6, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 7, GFLAGS), -+ GATE(PCLK_SARADC, "pclk_saradc", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 14, GFLAGS), -+ GATE(PCLK_EFUSE, "pclk_efuse", "pclk_peri", 0, RK2928_CLKGATE_CON(5), 2, GFLAGS), -+ GATE(PCLK_TIMER, "pclk_timer", "pclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(7), 7, GFLAGS), -+ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 9, GFLAGS), -+ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 10, GFLAGS), -+ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 11, GFLAGS), -+ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 12, GFLAGS), -+ -+ /* PD_BUS */ -+ GATE(0, "aclk_initmem", "aclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 12, GFLAGS), -+ GATE(0, "aclk_strc_sys", "aclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 10, GFLAGS), -+ -+ GATE(0, "hclk_rom", "hclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 6, GFLAGS), -+ GATE(HCLK_CRYPTO, "hclk_crypto", "hclk_cpu", 0, RK2928_CLKGATE_CON(3), 5, GFLAGS), ++err_disable_pm_runtime: ++ pm_runtime_disable(dev); + -+ GATE(PCLK_ACODEC, "pclk_acodec", "pclk_cpu", 0, RK2928_CLKGATE_CON(5), 14, GFLAGS), -+ GATE(0, "pclk_ddrupctl", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 7, GFLAGS), -+ GATE(0, "pclk_grf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 4, GFLAGS), -+ GATE(PCLK_MIPIPHY, "pclk_mipiphy", "pclk_cpu", 0, RK2928_CLKGATE_CON(5), 0, GFLAGS), ++ return ret; ++} + -+ GATE(0, "pclk_pmu", "pclk_pmu_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(9), 2, GFLAGS), -+ GATE(0, "pclk_pmu_niu", "pclk_pmu_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(9), 3, GFLAGS), ++static int rockchip_clk_out_remove(struct platform_device *pdev) ++{ ++ struct device_node *node = pdev->dev.of_node; + -+ /* PD_MMC */ -+ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "sclk_sdmmc", RK3228_SDMMC_CON0, 1), -+ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RK3228_SDMMC_CON1, 0), ++ of_clk_del_provider(node); ++ pm_runtime_disable(&pdev->dev); + -+ MMC(SCLK_SDIO_DRV, "sdio_drv", "sclk_sdio", RK3228_SDIO_CON0, 1), -+ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "sclk_sdio", RK3228_SDIO_CON1, 0), ++ return 0; ++} + -+ MMC(SCLK_EMMC_DRV, "emmc_drv", "sclk_emmc", RK3228_EMMC_CON0, 1), -+ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "sclk_emmc", RK3228_EMMC_CON1, 0), ++static const struct of_device_id rockchip_clk_out_match[] = { ++ { .compatible = "rockchip,clk-out", }, ++ {}, +}; + -+static struct rockchip_clk_branch rk3126_clk_branches[] __initdata = { -+ GATE(0, "pclk_stimer", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(3), 15, GFLAGS), -+ GATE(0, "pclk_s_efuse", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(3), 14, GFLAGS), -+ GATE(0, "pclk_sgrf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(3), 8, GFLAGS), ++static struct platform_driver rockchip_clk_out_driver = { ++ .driver = { ++ .name = "rockchip-clk-out", ++ .of_match_table = rockchip_clk_out_match, ++ }, ++ .probe = rockchip_clk_out_probe, ++ .remove = rockchip_clk_out_remove, +}; + -+static struct rockchip_clk_branch rk3128_clk_branches[] __initdata = { -+ COMPOSITE(SCLK_SFC, "sclk_sfc", mux_sclk_sfc_src_p, 0, -+ RK2928_CLKSEL_CON(11), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 15, GFLAGS), ++module_platform_driver(rockchip_clk_out_driver); + -+ GATE(HCLK_GPS, "hclk_gps", "aclk_peri", 0, RK2928_CLKGATE_CON(3), 14, GFLAGS), -+ GATE(PCLK_HDMI, "pclk_hdmi", "pclk_cpu", 0, RK2928_CLKGATE_CON(3), 8, GFLAGS), -+}; ++MODULE_DESCRIPTION("Rockchip Clock Input-Output-Switch"); ++MODULE_AUTHOR("Sugar Zhang "); ++MODULE_LICENSE("GPL"); ++MODULE_DEVICE_TABLE(of, rockchip_clk_out_match); +diff --git a/drivers/clk/rockchip-oh/clk-pll.c b/drivers/clk/rockchip-oh/clk-pll.c +new file mode 100644 +index 000000000..8aa9c3014 +--- /dev/null ++++ b/drivers/clk/rockchip-oh/clk-pll.c +@@ -0,0 +1,2200 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later ++/* ++ * Copyright (c) 2014 MundoReader S.L. ++ * Author: Heiko Stuebner ++ * ++ * Copyright (c) 2015 Rockchip Electronics Co. Ltd. ++ * Author: Xing Zheng ++ */ + -+static void __iomem *rk312x_reg_base; ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "clk.h" + -+void rkclk_cpuclk_div_setting(int div) -+{ -+ if (cpu_is_rk312x()) -+ writel_relaxed((0x001f0000 | (div - 1)), -+ rk312x_reg_base + RK2928_CLKSEL_CON(0)); -+} ++#define PLL_MODE_MASK 0x3 ++#define PLL_MODE_SLOW 0x0 ++#define PLL_MODE_NORM 0x1 ++#define PLL_MODE_DEEP 0x2 ++#define PLL_RK3328_MODE_MASK 0x1 + -+static void rk3128_dump_cru(void) -+{ -+ if (rk312x_reg_base) { -+ pr_warn("CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk312x_reg_base, -+ 0x1f8, false); -+ } -+} ++struct rockchip_clk_pll { ++ struct clk_hw hw; + -+static struct rockchip_clk_provider *__init rk3128_common_clk_init(struct device_node *np) -+{ -+ struct rockchip_clk_provider *ctx; -+ void __iomem *reg_base; -+ struct clk **clks; ++ struct clk_mux pll_mux; ++ const struct clk_ops *pll_mux_ops; + -+ reg_base = of_iomap(np, 0); -+ if (!reg_base) { -+ pr_err("%s: could not map cru region\n", __func__); -+ return ERR_PTR(-ENOMEM); -+ } ++ struct notifier_block clk_nb; + -+ rk312x_reg_base = reg_base; -+ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); -+ if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip clk init failed\n", __func__); -+ iounmap(reg_base); -+ return ERR_PTR(-ENOMEM); -+ } -+ clks = ctx->clk_data.clks; ++ void __iomem *reg_base; ++ int lock_offset; ++ unsigned int lock_shift; ++ enum rockchip_pll_type type; ++ u8 flags; ++ const struct rockchip_pll_rate_table *rate_table; ++ unsigned int rate_count; ++ int sel; ++ unsigned long scaling; ++ spinlock_t *lock; + -+ rockchip_clk_register_plls(ctx, rk3128_pll_clks, -+ ARRAY_SIZE(rk3128_pll_clks), -+ RK3128_GRF_SOC_STATUS0); -+ rockchip_clk_register_branches(ctx, common_clk_branches, -+ ARRAY_SIZE(common_clk_branches)); ++ struct rockchip_clk_provider *ctx; + -+ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", -+ 2, clks[PLL_APLL], clks[PLL_GPLL_DIV2], -+ &rk3128_cpuclk_data, rk3128_cpuclk_rates, -+ ARRAY_SIZE(rk3128_cpuclk_rates)); ++#ifdef CONFIG_ROCKCHIP_CLK_BOOST ++ bool boost_enabled; ++ u32 boost_backup_pll_usage; ++ unsigned long boost_backup_pll_rate; ++ unsigned long boost_low_rate; ++ unsigned long boost_high_rate; ++ struct regmap *boost; ++#endif ++#ifdef CONFIG_DEBUG_FS ++ struct hlist_node debug_node; ++#endif ++}; + -+ rockchip_register_softrst(np, 9, reg_base + RK2928_SOFTRST_CON(0), -+ ROCKCHIP_SOFTRST_HIWORD_MASK); ++#define to_rockchip_clk_pll(_hw) container_of(_hw, struct rockchip_clk_pll, hw) ++#define to_rockchip_clk_pll_nb(nb) \ ++ container_of(nb, struct rockchip_clk_pll, clk_nb) + -+ rockchip_register_restart_notifier(ctx, RK2928_GLB_SRST_FST, NULL); ++#ifdef CONFIG_ROCKCHIP_CLK_BOOST ++static void rockchip_boost_disable_low(struct rockchip_clk_pll *pll); ++#ifdef CONFIG_DEBUG_FS ++static HLIST_HEAD(clk_boost_list); ++static DEFINE_MUTEX(clk_boost_lock); ++#endif ++#else ++static inline void rockchip_boost_disable_low(struct rockchip_clk_pll *pll) {} ++#endif + -+ if (!rk_dump_cru) -+ rk_dump_cru = rk3128_dump_cru; ++#define MHZ (1000UL * 1000UL) ++#define KHZ (1000UL) + -+ return ctx; -+} ++/* CLK_PLL_TYPE_RK3066_AUTO type ops */ ++#define PLL_FREF_MIN (269 * KHZ) ++#define PLL_FREF_MAX (2200 * MHZ) + -+static void __init rk3126_clk_init(struct device_node *np) -+{ -+ struct rockchip_clk_provider *ctx; ++#define PLL_FVCO_MIN (440 * MHZ) ++#define PLL_FVCO_MAX (2200 * MHZ) + -+ ctx = rk3128_common_clk_init(np); -+ if (IS_ERR(ctx)) -+ return; ++#define PLL_FOUT_MIN (27500 * KHZ) ++#define PLL_FOUT_MAX (2200 * MHZ) + -+ rockchip_clk_register_branches(ctx, rk3126_clk_branches, -+ ARRAY_SIZE(rk3126_clk_branches)); ++#define PLL_NF_MAX (4096) ++#define PLL_NR_MAX (64) ++#define PLL_NO_MAX (16) + -+ rockchip_clk_of_add_provider(np, ctx); -+} ++/* CLK_PLL_TYPE_RK3036/3366/3399_AUTO type ops */ ++#define MIN_FOUTVCO_FREQ (800 * MHZ) ++#define MAX_FOUTVCO_FREQ (2000 * MHZ) + -+CLK_OF_DECLARE(rk3126_cru, "rockchip,rk3126-cru", rk3126_clk_init); ++static struct rockchip_pll_rate_table auto_table; + -+static void __init rk3128_clk_init(struct device_node *np) ++int rockchip_pll_clk_adaptive_scaling(struct clk *clk, int sel) +{ -+ struct rockchip_clk_provider *ctx; ++ struct clk *parent = clk_get_parent(clk); ++ struct rockchip_clk_pll *pll; + -+ ctx = rk3128_common_clk_init(np); -+ if (IS_ERR(ctx)) -+ return; ++ if (IS_ERR_OR_NULL(parent)) ++ return -EINVAL; + -+ rockchip_clk_register_branches(ctx, rk3128_clk_branches, -+ ARRAY_SIZE(rk3128_clk_branches)); ++ pll = to_rockchip_clk_pll(__clk_get_hw(parent)); ++ if (!pll) ++ return -EINVAL; + -+ rockchip_clk_of_add_provider(np, ctx); ++ pll->sel = sel; ++ ++ return 0; +} ++EXPORT_SYMBOL(rockchip_pll_clk_adaptive_scaling); + -+CLK_OF_DECLARE(rk3128_cru, "rockchip,rk3128-cru", rk3128_clk_init); ++int rockchip_pll_clk_rate_to_scale(struct clk *clk, unsigned long rate) ++{ ++ const struct rockchip_pll_rate_table *rate_table; ++ struct clk *parent = clk_get_parent(clk); ++ struct rockchip_clk_pll *pll; ++ unsigned int i; + -+struct clk_rk3128_inits { -+ void (*inits)(struct device_node *np); -+}; ++ if (IS_ERR_OR_NULL(parent)) ++ return -EINVAL; + -+static const struct clk_rk3128_inits clk_rk3126_init = { -+ .inits = rk3126_clk_init, -+}; ++ pll = to_rockchip_clk_pll(__clk_get_hw(parent)); ++ if (!pll) ++ return -EINVAL; + -+static const struct clk_rk3128_inits clk_rk3128_init = { -+ .inits = rk3128_clk_init, -+}; ++ rate_table = pll->rate_table; ++ for (i = 0; i < pll->rate_count; i++) { ++ if (rate >= rate_table[i].rate) ++ return i; ++ } + -+static const struct of_device_id clk_rk3128_match_table[] = { -+ { -+ .compatible = "rockchip,rk3126-cru", -+ .data = &clk_rk3126_init, -+ }, { -+ .compatible = "rockchip,rk3128-cru", -+ .data = &clk_rk3128_init, -+ }, -+ { } -+}; -+MODULE_DEVICE_TABLE(of, clk_rk3128_match_table); ++ return -EINVAL; ++} ++EXPORT_SYMBOL(rockchip_pll_clk_rate_to_scale); + -+static int __init clk_rk3128_probe(struct platform_device *pdev) ++int rockchip_pll_clk_scale_to_rate(struct clk *clk, unsigned int scale) +{ -+ struct device_node *np = pdev->dev.of_node; -+ const struct of_device_id *match; -+ const struct clk_rk3128_inits *init_data; ++ const struct rockchip_pll_rate_table *rate_table; ++ struct clk *parent = clk_get_parent(clk); ++ struct rockchip_clk_pll *pll; ++ unsigned int i; + -+ match = of_match_device(clk_rk3128_match_table, &pdev->dev); -+ if (!match || !match->data) ++ if (IS_ERR_OR_NULL(parent)) + return -EINVAL; + -+ init_data = match->data; -+ if (init_data->inits) -+ init_data->inits(np); ++ pll = to_rockchip_clk_pll(__clk_get_hw(parent)); ++ if (!pll) ++ return -EINVAL; + -+ return 0; ++ rate_table = pll->rate_table; ++ for (i = 0; i < pll->rate_count; i++) { ++ if (i == scale) ++ return rate_table[i].rate; ++ } ++ ++ return -EINVAL; +} ++EXPORT_SYMBOL(rockchip_pll_clk_scale_to_rate); + -+static struct platform_driver clk_rk3128_driver = { -+ .driver = { -+ .name = "clk-rk3128", -+ .of_match_table = clk_rk3128_match_table, -+ }, -+}; -+builtin_platform_driver_probe(clk_rk3128_driver, clk_rk3128_probe); ++static struct rockchip_pll_rate_table *rk_pll_rate_table_get(void) ++{ ++ return &auto_table; ++} + -+MODULE_DESCRIPTION("Rockchip RK3128 Clock Driver"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/clk/rockchip-oh/clk-rk3188.c b/drivers/clk/rockchip-oh/clk-rk3188.c -new file mode 100644 -index 000000000..d891b80d6 ---- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-rk3188.c -@@ -0,0 +1,921 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2014 MundoReader S.L. -+ * Author: Heiko Stuebner -+ */ ++static int rockchip_pll_clk_set_postdiv(unsigned long fout_hz, ++ u32 *postdiv1, ++ u32 *postdiv2, ++ u32 *foutvco) ++{ ++ unsigned long freq; + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "clk.h" ++ if (fout_hz < MIN_FOUTVCO_FREQ) { ++ for (*postdiv1 = 1; *postdiv1 <= 7; (*postdiv1)++) { ++ for (*postdiv2 = 1; *postdiv2 <= 7; (*postdiv2)++) { ++ freq = fout_hz * (*postdiv1) * (*postdiv2); ++ if (freq >= MIN_FOUTVCO_FREQ && ++ freq <= MAX_FOUTVCO_FREQ) { ++ *foutvco = freq; ++ return 0; ++ } ++ } ++ } ++ pr_err("CANNOT FIND postdiv1/2 to make fout in range from 800M to 2000M,fout = %lu\n", ++ fout_hz); ++ } else { ++ *postdiv1 = 1; ++ *postdiv2 = 1; ++ } ++ return 0; ++} + -+#define RK3066_GRF_SOC_STATUS 0x15c -+#define RK3188_GRF_SOC_STATUS 0xac ++static struct rockchip_pll_rate_table * ++rockchip_pll_clk_set_by_auto(struct rockchip_clk_pll *pll, ++ unsigned long fin_hz, ++ unsigned long fout_hz) ++{ ++ struct rockchip_pll_rate_table *rate_table = rk_pll_rate_table_get(); ++ /* FIXME set postdiv1/2 always 1*/ ++ u32 foutvco = fout_hz; ++ u64 fin_64, frac_64; ++ u32 f_frac, postdiv1, postdiv2; ++ unsigned long clk_gcd = 0; + -+enum rk3188_plls { -+ apll, cpll, dpll, gpll, -+}; ++ if (fin_hz == 0 || fout_hz == 0 || fout_hz == fin_hz) ++ return NULL; + -+static struct rockchip_pll_rate_table rk3188_pll_rates[] = { -+ RK3066_PLL_RATE(2208000000, 1, 92, 1), -+ RK3066_PLL_RATE(2184000000, 1, 91, 1), -+ RK3066_PLL_RATE(2160000000, 1, 90, 1), -+ RK3066_PLL_RATE(2136000000, 1, 89, 1), -+ RK3066_PLL_RATE(2112000000, 1, 88, 1), -+ RK3066_PLL_RATE(2088000000, 1, 87, 1), -+ RK3066_PLL_RATE(2064000000, 1, 86, 1), -+ RK3066_PLL_RATE(2040000000, 1, 85, 1), -+ RK3066_PLL_RATE(2016000000, 1, 84, 1), -+ RK3066_PLL_RATE(1992000000, 1, 83, 1), -+ RK3066_PLL_RATE(1968000000, 1, 82, 1), -+ RK3066_PLL_RATE(1944000000, 1, 81, 1), -+ RK3066_PLL_RATE(1920000000, 1, 80, 1), -+ RK3066_PLL_RATE(1896000000, 1, 79, 1), -+ RK3066_PLL_RATE(1872000000, 1, 78, 1), -+ RK3066_PLL_RATE(1848000000, 1, 77, 1), -+ RK3066_PLL_RATE(1824000000, 1, 76, 1), -+ RK3066_PLL_RATE(1800000000, 1, 75, 1), -+ RK3066_PLL_RATE(1776000000, 1, 74, 1), -+ RK3066_PLL_RATE(1752000000, 1, 73, 1), -+ RK3066_PLL_RATE(1728000000, 1, 72, 1), -+ RK3066_PLL_RATE(1704000000, 1, 71, 1), -+ RK3066_PLL_RATE(1680000000, 1, 70, 1), -+ RK3066_PLL_RATE(1656000000, 1, 69, 1), -+ RK3066_PLL_RATE(1632000000, 1, 68, 1), -+ RK3066_PLL_RATE(1608000000, 1, 67, 1), -+ RK3066_PLL_RATE(1560000000, 1, 65, 1), -+ RK3066_PLL_RATE(1512000000, 1, 63, 1), -+ RK3066_PLL_RATE(1488000000, 1, 62, 1), -+ RK3066_PLL_RATE(1464000000, 1, 61, 1), -+ RK3066_PLL_RATE(1440000000, 1, 60, 1), -+ RK3066_PLL_RATE(1416000000, 1, 59, 1), -+ RK3066_PLL_RATE(1392000000, 1, 58, 1), -+ RK3066_PLL_RATE(1368000000, 1, 57, 1), -+ RK3066_PLL_RATE(1344000000, 1, 56, 1), -+ RK3066_PLL_RATE(1320000000, 1, 55, 1), -+ RK3066_PLL_RATE(1296000000, 1, 54, 1), -+ RK3066_PLL_RATE(1272000000, 1, 53, 1), -+ RK3066_PLL_RATE(1248000000, 1, 52, 1), -+ RK3066_PLL_RATE(1224000000, 1, 51, 1), -+ RK3066_PLL_RATE(1200000000, 1, 50, 1), -+ RK3066_PLL_RATE(1188000000, 2, 99, 1), -+ RK3066_PLL_RATE(1176000000, 1, 49, 1), -+ RK3066_PLL_RATE(1128000000, 1, 47, 1), -+ RK3066_PLL_RATE(1104000000, 1, 46, 1), -+ RK3066_PLL_RATE(1008000000, 1, 84, 2), -+ RK3066_PLL_RATE( 912000000, 1, 76, 2), -+ RK3066_PLL_RATE( 891000000, 8, 594, 2), -+ RK3066_PLL_RATE( 888000000, 1, 74, 2), -+ RK3066_PLL_RATE( 816000000, 1, 68, 2), -+ RK3066_PLL_RATE( 798000000, 2, 133, 2), -+ RK3066_PLL_RATE( 792000000, 1, 66, 2), -+ RK3066_PLL_RATE( 768000000, 1, 64, 2), -+ RK3066_PLL_RATE( 742500000, 8, 495, 2), -+ RK3066_PLL_RATE( 696000000, 1, 58, 2), -+ RK3066_PLL_RATE( 600000000, 1, 50, 2), -+ RK3066_PLL_RATE( 594000000, 2, 198, 4), -+ RK3066_PLL_RATE( 552000000, 1, 46, 2), -+ RK3066_PLL_RATE( 504000000, 1, 84, 4), -+ RK3066_PLL_RATE( 456000000, 1, 76, 4), -+ RK3066_PLL_RATE( 408000000, 1, 68, 4), -+ RK3066_PLL_RATE( 400000000, 3, 100, 2), -+ RK3066_PLL_RATE( 384000000, 2, 128, 4), -+ RK3066_PLL_RATE( 360000000, 1, 60, 4), -+ RK3066_PLL_RATE( 312000000, 1, 52, 4), -+ RK3066_PLL_RATE( 300000000, 1, 50, 4), -+ RK3066_PLL_RATE( 297000000, 2, 198, 8), -+ RK3066_PLL_RATE( 252000000, 1, 84, 8), -+ RK3066_PLL_RATE( 216000000, 1, 72, 8), -+ RK3066_PLL_RATE( 148500000, 2, 99, 8), -+ RK3066_PLL_RATE( 126000000, 1, 84, 16), -+ RK3066_PLL_RATE( 48000000, 1, 64, 32), -+ { /* sentinel */ }, -+}; ++ rockchip_pll_clk_set_postdiv(fout_hz, &postdiv1, &postdiv2, &foutvco); ++ rate_table->postdiv1 = postdiv1; ++ rate_table->postdiv2 = postdiv2; ++ rate_table->dsmpd = 1; + -+#define RK3066_DIV_CORE_PERIPH_MASK 0x3 -+#define RK3066_DIV_CORE_PERIPH_SHIFT 6 -+#define RK3066_DIV_ACLK_CORE_MASK 0x7 -+#define RK3066_DIV_ACLK_CORE_SHIFT 0 -+#define RK3066_DIV_ACLK_HCLK_MASK 0x3 -+#define RK3066_DIV_ACLK_HCLK_SHIFT 8 -+#define RK3066_DIV_ACLK_PCLK_MASK 0x3 -+#define RK3066_DIV_ACLK_PCLK_SHIFT 12 -+#define RK3066_DIV_AHB2APB_MASK 0x3 -+#define RK3066_DIV_AHB2APB_SHIFT 14 ++ if (fin_hz / MHZ * MHZ == fin_hz && fout_hz / MHZ * MHZ == fout_hz) { ++ fin_hz /= MHZ; ++ foutvco /= MHZ; ++ clk_gcd = gcd(fin_hz, foutvco); ++ rate_table->refdiv = fin_hz / clk_gcd; ++ rate_table->fbdiv = foutvco / clk_gcd; + -+#define RK3066_CLKSEL0(_core_peri) \ -+ { \ -+ .reg = RK2928_CLKSEL_CON(0), \ -+ .val = HIWORD_UPDATE(_core_peri, RK3066_DIV_CORE_PERIPH_MASK, \ -+ RK3066_DIV_CORE_PERIPH_SHIFT) \ -+ } -+#define RK3066_CLKSEL1(_aclk_core, _aclk_hclk, _aclk_pclk, _ahb2apb) \ -+ { \ -+ .reg = RK2928_CLKSEL_CON(1), \ -+ .val = HIWORD_UPDATE(_aclk_core, RK3066_DIV_ACLK_CORE_MASK, \ -+ RK3066_DIV_ACLK_CORE_SHIFT) | \ -+ HIWORD_UPDATE(_aclk_hclk, RK3066_DIV_ACLK_HCLK_MASK, \ -+ RK3066_DIV_ACLK_HCLK_SHIFT) | \ -+ HIWORD_UPDATE(_aclk_pclk, RK3066_DIV_ACLK_PCLK_MASK, \ -+ RK3066_DIV_ACLK_PCLK_SHIFT) | \ -+ HIWORD_UPDATE(_ahb2apb, RK3066_DIV_AHB2APB_MASK, \ -+ RK3066_DIV_AHB2APB_SHIFT), \ -+ } ++ rate_table->frac = 0; + -+#define RK3066_CPUCLK_RATE(_prate, _core_peri, _acore, _ahclk, _apclk, _h2p) \ -+ { \ -+ .prate = _prate, \ -+ .divs = { \ -+ RK3066_CLKSEL0(_core_peri), \ -+ RK3066_CLKSEL1(_acore, _ahclk, _apclk, _h2p), \ -+ }, \ -+ } ++ pr_debug("fin = %lu, fout = %lu, clk_gcd = %lu, refdiv = %u, fbdiv = %u, postdiv1 = %u, postdiv2 = %u, frac = %u\n", ++ fin_hz, fout_hz, clk_gcd, rate_table->refdiv, ++ rate_table->fbdiv, rate_table->postdiv1, ++ rate_table->postdiv2, rate_table->frac); ++ } else { ++ pr_debug("frac div running, fin_hz = %lu, fout_hz = %lu, fin_INT_mhz = %lu, fout_INT_mhz = %lu\n", ++ fin_hz, fout_hz, ++ fin_hz / MHZ * MHZ, ++ fout_hz / MHZ * MHZ); ++ pr_debug("frac get postdiv1 = %u, postdiv2 = %u, foutvco = %u\n", ++ rate_table->postdiv1, rate_table->postdiv2, foutvco); ++ clk_gcd = gcd(fin_hz / MHZ, foutvco / MHZ); ++ rate_table->refdiv = fin_hz / MHZ / clk_gcd; ++ rate_table->fbdiv = foutvco / MHZ / clk_gcd; ++ pr_debug("frac get refdiv = %u, fbdiv = %u\n", ++ rate_table->refdiv, rate_table->fbdiv); + -+static struct rockchip_cpuclk_rate_table rk3066_cpuclk_rates[] __initdata = { -+ RK3066_CPUCLK_RATE(1416000000, 2, 3, 1, 2, 1), -+ RK3066_CPUCLK_RATE(1200000000, 2, 3, 1, 2, 1), -+ RK3066_CPUCLK_RATE(1008000000, 2, 2, 1, 2, 1), -+ RK3066_CPUCLK_RATE( 816000000, 2, 2, 1, 2, 1), -+ RK3066_CPUCLK_RATE( 600000000, 1, 2, 1, 2, 1), -+ RK3066_CPUCLK_RATE( 504000000, 1, 1, 1, 2, 1), -+ RK3066_CPUCLK_RATE( 312000000, 0, 1, 1, 1, 0), -+}; ++ rate_table->frac = 0; + -+static const struct rockchip_cpuclk_reg_data rk3066_cpuclk_data = { -+ .core_reg[0] = RK2928_CLKSEL_CON(0), -+ .div_core_shift[0] = 0, -+ .div_core_mask[0] = 0x1f, -+ .num_cores = 1, -+ .mux_core_alt = 1, -+ .mux_core_main = 0, -+ .mux_core_shift = 8, -+ .mux_core_mask = 0x1, -+}; ++ f_frac = (foutvco % MHZ); ++ fin_64 = fin_hz; ++ do_div(fin_64, (u64)rate_table->refdiv); ++ frac_64 = (u64)f_frac << 24; ++ do_div(frac_64, fin_64); ++ rate_table->frac = (u32)frac_64; ++ if (rate_table->frac > 0) ++ rate_table->dsmpd = 0; ++ pr_debug("frac = %x\n", rate_table->frac); ++ } ++ return rate_table; ++} + -+#define RK3188_DIV_ACLK_CORE_MASK 0x7 -+#define RK3188_DIV_ACLK_CORE_SHIFT 3 ++static struct rockchip_pll_rate_table * ++rockchip_rk3066_pll_clk_set_by_auto(struct rockchip_clk_pll *pll, ++ unsigned long fin_hz, ++ unsigned long fout_hz) ++{ ++ struct rockchip_pll_rate_table *rate_table = rk_pll_rate_table_get(); ++ u32 nr, nf, no, nonr; ++ u32 nr_out, nf_out, no_out; ++ u32 n; ++ u32 numerator, denominator; ++ u64 fref, fvco, fout; ++ unsigned long clk_gcd = 0; + -+#define RK3188_CLKSEL1(_aclk_core) \ -+ { \ -+ .reg = RK2928_CLKSEL_CON(1), \ -+ .val = HIWORD_UPDATE(_aclk_core, RK3188_DIV_ACLK_CORE_MASK,\ -+ RK3188_DIV_ACLK_CORE_SHIFT) \ -+ } -+#define RK3188_CPUCLK_RATE(_prate, _core_peri, _aclk_core) \ -+ { \ -+ .prate = _prate, \ -+ .divs = { \ -+ RK3066_CLKSEL0(_core_peri), \ -+ RK3188_CLKSEL1(_aclk_core), \ -+ }, \ -+ } ++ nr_out = PLL_NR_MAX + 1; ++ no_out = 0; ++ nf_out = 0; + -+static struct rockchip_cpuclk_rate_table rk3188_cpuclk_rates[] __initdata = { -+ RK3188_CPUCLK_RATE(1608000000, 2, 3), -+ RK3188_CPUCLK_RATE(1416000000, 2, 3), -+ RK3188_CPUCLK_RATE(1200000000, 2, 3), -+ RK3188_CPUCLK_RATE(1008000000, 2, 3), -+ RK3188_CPUCLK_RATE( 816000000, 2, 3), -+ RK3188_CPUCLK_RATE( 600000000, 1, 3), -+ RK3188_CPUCLK_RATE( 504000000, 1, 3), -+ RK3188_CPUCLK_RATE( 312000000, 0, 1), -+}; ++ if (fin_hz == 0 || fout_hz == 0 || fout_hz == fin_hz) ++ return NULL; + -+static const struct rockchip_cpuclk_reg_data rk3188_cpuclk_data = { -+ .core_reg[0] = RK2928_CLKSEL_CON(0), -+ .div_core_shift[0] = 9, -+ .div_core_mask[0] = 0x1f, -+ .num_cores = 1, -+ .mux_core_alt = 1, -+ .mux_core_main = 0, -+ .mux_core_shift = 8, -+ .mux_core_mask = 0x1, -+}; ++ clk_gcd = gcd(fin_hz, fout_hz); + -+PNAME(mux_pll_p) = { "xin24m", "xin32k" }; -+PNAME(mux_ddrphy_p) = { "dpll", "gpll_ddr" }; -+PNAME(mux_pll_src_gpll_cpll_p) = { "gpll", "cpll" }; -+PNAME(mux_pll_src_cpll_gpll_p) = { "cpll", "gpll" }; -+PNAME(mux_aclk_cpu_p) = { "apll", "gpll" }; -+PNAME(mux_sclk_cif0_p) = { "cif0_pre", "xin24m" }; -+PNAME(mux_sclk_i2s0_p) = { "i2s0_pre", "i2s0_frac", "xin12m" }; -+PNAME(mux_sclk_spdif_p) = { "spdif_pre", "spdif_frac", "xin12m" }; -+PNAME(mux_sclk_uart0_p) = { "uart0_pre", "uart0_frac", "xin24m" }; -+PNAME(mux_sclk_uart1_p) = { "uart1_pre", "uart1_frac", "xin24m" }; -+PNAME(mux_sclk_uart2_p) = { "uart2_pre", "uart2_frac", "xin24m" }; -+PNAME(mux_sclk_uart3_p) = { "uart3_pre", "uart3_frac", "xin24m" }; -+PNAME(mux_sclk_hsadc_p) = { "hsadc_src", "hsadc_frac", "ext_hsadc" }; -+PNAME(mux_mac_p) = { "gpll", "dpll" }; -+PNAME(mux_sclk_macref_p) = { "mac_src", "ext_rmii" }; ++ numerator = fout_hz / clk_gcd; ++ denominator = fin_hz / clk_gcd; + -+static struct rockchip_pll_clock rk3066_pll_clks[] __initdata = { -+ [apll] = PLL(pll_rk3066, PLL_APLL, "apll", mux_pll_p, 0, RK2928_PLL_CON(0), -+ RK2928_MODE_CON, 0, 5, 0, rk3188_pll_rates), -+ [dpll] = PLL(pll_rk3066, PLL_DPLL, "dpll", mux_pll_p, 0, RK2928_PLL_CON(4), -+ RK2928_MODE_CON, 4, 4, 0, NULL), -+ [cpll] = PLL(pll_rk3066, PLL_CPLL, "cpll", mux_pll_p, 0, RK2928_PLL_CON(8), -+ RK2928_MODE_CON, 8, 6, ROCKCHIP_PLL_SYNC_RATE, rk3188_pll_rates), -+ [gpll] = PLL(pll_rk3066, PLL_GPLL, "gpll", mux_pll_p, 0, RK2928_PLL_CON(12), -+ RK2928_MODE_CON, 12, 7, ROCKCHIP_PLL_SYNC_RATE, rk3188_pll_rates), -+}; ++ for (n = 1;; n++) { ++ nf = numerator * n; ++ nonr = denominator * n; ++ if (nf > PLL_NF_MAX || nonr > (PLL_NO_MAX * PLL_NR_MAX)) ++ break; + -+static struct rockchip_pll_clock rk3188_pll_clks[] __initdata = { -+ [apll] = PLL(pll_rk3066, PLL_APLL, "apll", mux_pll_p, 0, RK2928_PLL_CON(0), -+ RK2928_MODE_CON, 0, 6, 0, rk3188_pll_rates), -+ [dpll] = PLL(pll_rk3066, PLL_DPLL, "dpll", mux_pll_p, 0, RK2928_PLL_CON(4), -+ RK2928_MODE_CON, 4, 5, 0, NULL), -+ [cpll] = PLL(pll_rk3066, PLL_CPLL, "cpll", mux_pll_p, 0, RK2928_PLL_CON(8), -+ RK2928_MODE_CON, 8, 7, ROCKCHIP_PLL_SYNC_RATE, rk3188_pll_rates), -+ [gpll] = PLL(pll_rk3066, PLL_GPLL, "gpll", mux_pll_p, 0, RK2928_PLL_CON(12), -+ RK2928_MODE_CON, 12, 8, ROCKCHIP_PLL_SYNC_RATE, rk3188_pll_rates), -+}; ++ for (no = 1; no <= PLL_NO_MAX; no++) { ++ if (!(no == 1 || !(no % 2))) ++ continue; + -+#define MFLAGS CLK_MUX_HIWORD_MASK -+#define DFLAGS CLK_DIVIDER_HIWORD_MASK -+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) -+#define IFLAGS ROCKCHIP_INVERTER_HIWORD_MASK ++ if (nonr % no) ++ continue; ++ nr = nonr / no; + -+/* 2 ^ (val + 1) */ -+static struct clk_div_table div_core_peri_t[] = { -+ { .val = 0, .div = 2 }, -+ { .val = 1, .div = 4 }, -+ { .val = 2, .div = 8 }, -+ { .val = 3, .div = 16 }, -+ { /* sentinel */ }, -+}; ++ if (nr > PLL_NR_MAX) ++ continue; + -+static struct rockchip_clk_branch common_hsadc_out_fracmux __initdata = -+ MUX(0, "sclk_hsadc_out", mux_sclk_hsadc_p, 0, -+ RK2928_CLKSEL_CON(22), 4, 2, MFLAGS); ++ fref = fin_hz / nr; ++ if (fref < PLL_FREF_MIN || fref > PLL_FREF_MAX) ++ continue; + -+static struct rockchip_clk_branch common_spdif_fracmux __initdata = -+ MUX(SCLK_SPDIF, "sclk_spdif", mux_sclk_spdif_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(5), 8, 2, MFLAGS); ++ fvco = fref * nf; ++ if (fvco < PLL_FVCO_MIN || fvco > PLL_FVCO_MAX) ++ continue; + -+static struct rockchip_clk_branch common_uart0_fracmux __initdata = -+ MUX(SCLK_UART0, "sclk_uart0", mux_sclk_uart0_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(13), 8, 2, MFLAGS); ++ fout = fvco / no; ++ if (fout < PLL_FOUT_MIN || fout > PLL_FOUT_MAX) ++ continue; + -+static struct rockchip_clk_branch common_uart1_fracmux __initdata = -+ MUX(SCLK_UART1, "sclk_uart1", mux_sclk_uart1_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(14), 8, 2, MFLAGS); ++ /* select the best from all available PLL settings */ ++ if ((no > no_out) || ++ ((no == no_out) && (nr < nr_out))) { ++ nr_out = nr; ++ nf_out = nf; ++ no_out = no; ++ } ++ } ++ } + -+static struct rockchip_clk_branch common_uart2_fracmux __initdata = -+ MUX(SCLK_UART2, "sclk_uart2", mux_sclk_uart2_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(15), 8, 2, MFLAGS); ++ /* output the best PLL setting */ ++ if ((nr_out <= PLL_NR_MAX) && (no_out > 0)) { ++ rate_table->nr = nr_out; ++ rate_table->nf = nf_out; ++ rate_table->no = no_out; ++ } else { ++ return NULL; ++ } + -+static struct rockchip_clk_branch common_uart3_fracmux __initdata = -+ MUX(SCLK_UART3, "sclk_uart3", mux_sclk_uart3_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(16), 8, 2, MFLAGS); ++ return rate_table; ++} + -+static struct rockchip_clk_branch common_clk_branches[] __initdata = { -+ /* -+ * Clock-Architecture Diagram 2 -+ */ ++static u32 ++rockchip_rk3588_pll_frac_get(u32 m, u32 p, u32 s, u64 fin_hz, u64 fvco) ++{ ++ u64 fref, fout, ffrac; ++ u32 k = 0; + -+ GATE(0, "gpll_armclk", "gpll", 0, RK2928_CLKGATE_CON(0), 1, GFLAGS), ++ fref = fin_hz / p; ++ ffrac = fvco - (m * fref); ++ fout = ffrac * 65536; ++ k = fout / fref; ++ if (k > 32767) { ++ fref = fin_hz / p; ++ ffrac = ((m + 1) * fref) - fvco; ++ fout = ffrac * 65536; ++ k = ((fout * 10 / fref) + 7) / 10; ++ if (k > 32767) ++ k = 0; ++ else ++ k = ~k + 1; ++ } ++ return k; ++} + -+ /* these two are set by the cpuclk and should not be changed */ -+ COMPOSITE_NOMUX_DIVTBL(CORE_PERI, "core_peri", "armclk", 0, -+ RK2928_CLKSEL_CON(0), 6, 2, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ div_core_peri_t, RK2928_CLKGATE_CON(0), 0, GFLAGS), ++static struct rockchip_pll_rate_table * ++rockchip_rk3588_pll_frac_by_auto(unsigned long fin_hz, unsigned long fout_hz) ++{ ++ struct rockchip_pll_rate_table *rate_table = rk_pll_rate_table_get(); ++ u64 fvco_min = 2250 * MHZ, fvco_max = 4500 * MHZ; ++ u32 p, m, s, k; ++ u64 fvco; + -+ COMPOSITE(ACLK_VEPU, "aclk_vepu", mux_pll_src_cpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(32), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 9, GFLAGS), -+ GATE(HCLK_VEPU, "hclk_vepu", "aclk_vepu", 0, -+ RK2928_CLKGATE_CON(3), 10, GFLAGS), -+ COMPOSITE(ACLK_VDPU, "aclk_vdpu", mux_pll_src_cpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(32), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 11, GFLAGS), -+ GATE(HCLK_VDPU, "hclk_vdpu", "aclk_vdpu", 0, -+ RK2928_CLKGATE_CON(3), 12, GFLAGS), ++ for (s = 0; s <= 6; s++) { ++ fvco = (u64)fout_hz << s; ++ if (fvco < fvco_min || fvco > fvco_max) ++ continue; ++ for (p = 1; p <= 4; p++) { ++ for (m = 64; m <= 1023; m++) { ++ if ((fvco >= m * fin_hz / p) && (fvco < (m + 1) * fin_hz / p)) { ++ k = rockchip_rk3588_pll_frac_get(m, p, s, ++ (u64)fin_hz, ++ fvco); ++ if (!k) ++ continue; ++ rate_table->p = p; ++ rate_table->s = s; ++ rate_table->k = k; ++ if (k > 32767) ++ rate_table->m = m + 1; ++ else ++ rate_table->m = m; ++ return rate_table; ++ } ++ } ++ } ++ } ++ return NULL; ++} + -+ GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(1), 7, GFLAGS), -+ COMPOSITE(0, "ddrphy", mux_ddrphy_p, CLK_IGNORE_UNUSED, -+ RK2928_CLKSEL_CON(26), 8, 1, MFLAGS, 0, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, -+ RK2928_CLKGATE_CON(0), 2, GFLAGS), ++static struct rockchip_pll_rate_table * ++rockchip_rk3588_pll_clk_set_by_auto(struct rockchip_clk_pll *pll, ++ unsigned long fin_hz, ++ unsigned long fout_hz) ++{ ++ struct rockchip_pll_rate_table *rate_table = rk_pll_rate_table_get(); ++ u64 fvco_min = 2250 * MHZ, fvco_max = 4500 * MHZ; ++ u64 fout_min = 37 * MHZ, fout_max = 4500 * MHZ; ++ u32 p, m, s; ++ u64 fvco; + -+ GATE(ACLK_CPU, "aclk_cpu", "aclk_cpu_pre", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(0), 3, GFLAGS), ++ if (fin_hz == 0 || fout_hz == 0 || fout_hz == fin_hz) ++ return NULL; + -+ GATE(0, "atclk_cpu", "pclk_cpu_pre", 0, -+ RK2928_CLKGATE_CON(0), 6, GFLAGS), -+ GATE(PCLK_CPU, "pclk_cpu", "pclk_cpu_pre", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(0), 5, GFLAGS), -+ GATE(HCLK_CPU, "hclk_cpu", "hclk_cpu_pre", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(0), 4, GFLAGS), ++ if (fout_hz > fout_max || fout_hz < fout_min) ++ return NULL; + -+ COMPOSITE(0, "aclk_lcdc0_pre", mux_pll_src_cpll_gpll_p, CLK_IGNORE_UNUSED, -+ RK2928_CLKSEL_CON(31), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 0, GFLAGS), -+ COMPOSITE(0, "aclk_lcdc1_pre", mux_pll_src_cpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(31), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(1), 4, GFLAGS), ++ if (fin_hz / MHZ * MHZ == fin_hz && fout_hz / MHZ * MHZ == fout_hz) { ++ for (s = 0; s <= 6; s++) { ++ fvco = (u64)fout_hz << s; ++ if (fvco < fvco_min || fvco > fvco_max) ++ continue; ++ for (p = 2; p <= 4; p++) { ++ for (m = 64; m <= 1023; m++) { ++ if (fvco == m * fin_hz / p) { ++ rate_table->p = p; ++ rate_table->m = m; ++ rate_table->s = s; ++ rate_table->k = 0; ++ return rate_table; ++ } ++ } ++ } ++ } ++ pr_err("CANNOT FIND Fout by auto,fout = %lu\n", fout_hz); ++ } else { ++ rate_table = rockchip_rk3588_pll_frac_by_auto(fin_hz, fout_hz); ++ if (!rate_table) ++ pr_err("CANNOT FIND Fout by auto,fout = %lu\n", fout_hz); ++ else ++ return rate_table; ++ } ++ return NULL; ++} + -+ GATE(ACLK_PERI, "aclk_peri", "aclk_peri_pre", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(2), 1, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "aclk_peri_pre", CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(10), 8, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, -+ RK2928_CLKGATE_CON(2), 2, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "aclk_peri_pre", CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(10), 12, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, -+ RK2928_CLKGATE_CON(2), 3, GFLAGS), ++static const struct rockchip_pll_rate_table *rockchip_get_pll_settings( ++ struct rockchip_clk_pll *pll, unsigned long rate) ++{ ++ const struct rockchip_pll_rate_table *rate_table = pll->rate_table; ++ int i; + -+ MUX(0, "cif_src", mux_pll_src_cpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(29), 0, 1, MFLAGS), -+ COMPOSITE_NOMUX(0, "cif0_pre", "cif_src", 0, -+ RK2928_CLKSEL_CON(29), 1, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 7, GFLAGS), -+ MUX(SCLK_CIF0, "sclk_cif0", mux_sclk_cif0_p, 0, -+ RK2928_CLKSEL_CON(29), 7, 1, MFLAGS), ++ for (i = 0; i < pll->rate_count; i++) { ++ if (rate == rate_table[i].rate) { ++ if (i < pll->sel) { ++ pll->scaling = rate; ++ return &rate_table[pll->sel]; ++ } ++ pll->scaling = 0; ++ return &rate_table[i]; ++ } ++ } ++ pll->scaling = 0; + -+ GATE(0, "pclkin_cif0", "ext_cif0", 0, -+ RK2928_CLKGATE_CON(3), 3, GFLAGS), -+ INVERTER(0, "pclk_cif0", "pclkin_cif0", -+ RK2928_CLKSEL_CON(30), 8, IFLAGS), ++ if (pll->type == pll_rk3066) ++ return rockchip_rk3066_pll_clk_set_by_auto(pll, 24 * MHZ, rate); ++ else if (pll->type == pll_rk3588 || pll->type == pll_rk3588_core) ++ return rockchip_rk3588_pll_clk_set_by_auto(pll, 24 * MHZ, rate); ++ else ++ return rockchip_pll_clk_set_by_auto(pll, 24 * MHZ, rate); ++} + -+ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), ++static long rockchip_pll_round_rate(struct clk_hw *hw, ++ unsigned long drate, unsigned long *prate) ++{ ++ return drate; ++} + -+ /* -+ * the 480m are generated inside the usb block from these clocks, -+ * but they are also a source for the hsicphy clock. -+ */ -+ GATE(SCLK_OTGPHY0, "sclk_otgphy0", "xin24m", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(1), 5, GFLAGS), -+ GATE(SCLK_OTGPHY1, "sclk_otgphy1", "xin24m", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(1), 6, GFLAGS), ++/* ++ * Wait for the pll to reach the locked state. ++ * The calling set_rate function is responsible for making sure the ++ * grf regmap is available. ++ */ ++static int rockchip_pll_wait_lock(struct rockchip_clk_pll *pll) ++{ ++ struct regmap *grf = pll->ctx->grf; ++ unsigned int val; ++ int ret; + -+ COMPOSITE(0, "mac_src", mux_mac_p, 0, -+ RK2928_CLKSEL_CON(21), 0, 1, MFLAGS, 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(2), 5, GFLAGS), -+ MUX(SCLK_MAC, "sclk_macref", mux_sclk_macref_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(21), 4, 1, MFLAGS), -+ GATE(0, "sclk_mac_lbtest", "sclk_macref", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(2), 12, GFLAGS), ++ ret = regmap_read_poll_timeout(grf, pll->lock_offset, val, ++ val & BIT(pll->lock_shift), 0, 1000); ++ if (ret) ++ pr_err("%s: timeout waiting for pll to lock\n", __func__); + -+ COMPOSITE(0, "hsadc_src", mux_pll_src_gpll_cpll_p, 0, -+ RK2928_CLKSEL_CON(22), 0, 1, MFLAGS, 8, 8, DFLAGS, -+ RK2928_CLKGATE_CON(2), 6, GFLAGS), -+ COMPOSITE_FRACMUX(0, "hsadc_frac", "hsadc_src", 0, -+ RK2928_CLKSEL_CON(23), 0, -+ RK2928_CLKGATE_CON(2), 7, GFLAGS, -+ &common_hsadc_out_fracmux), -+ INVERTER(SCLK_HSADC, "sclk_hsadc", "sclk_hsadc_out", -+ RK2928_CLKSEL_CON(22), 7, IFLAGS), ++ return ret; ++} + -+ COMPOSITE_NOMUX(SCLK_SARADC, "sclk_saradc", "xin24m", 0, -+ RK2928_CLKSEL_CON(24), 8, 8, DFLAGS, -+ RK2928_CLKGATE_CON(2), 8, GFLAGS), ++/* ++ * PLL used in RK3036 ++ */ + -+ COMPOSITE_NOMUX(0, "spdif_pre", "i2s_src", 0, -+ RK2928_CLKSEL_CON(5), 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(0), 13, GFLAGS), -+ COMPOSITE_FRACMUX(0, "spdif_frac", "spdif_pre", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(9), 0, -+ RK2928_CLKGATE_CON(0), 14, GFLAGS, -+ &common_spdif_fracmux), ++#define RK3036_PLLCON(i) (i * 0x4) ++#define RK3036_PLLCON0_FBDIV_MASK 0xfff ++#define RK3036_PLLCON0_FBDIV_SHIFT 0 ++#define RK3036_PLLCON0_POSTDIV1_MASK 0x7 ++#define RK3036_PLLCON0_POSTDIV1_SHIFT 12 ++#define RK3036_PLLCON1_REFDIV_MASK 0x3f ++#define RK3036_PLLCON1_REFDIV_SHIFT 0 ++#define RK3036_PLLCON1_POSTDIV2_MASK 0x7 ++#define RK3036_PLLCON1_POSTDIV2_SHIFT 6 ++#define RK3036_PLLCON1_LOCK_STATUS BIT(10) ++#define RK3036_PLLCON1_DSMPD_MASK 0x1 ++#define RK3036_PLLCON1_DSMPD_SHIFT 12 ++#define RK3036_PLLCON1_PWRDOWN BIT(13) ++#define RK3036_PLLCON1_PLLPDSEL BIT(15) ++#define RK3036_PLLCON2_FRAC_MASK 0xffffff ++#define RK3036_PLLCON2_FRAC_SHIFT 0 ++ ++static int rockchip_rk3036_pll_wait_lock(struct rockchip_clk_pll *pll) ++{ ++ u32 pllcon; ++ int ret; + + /* -+ * Clock-Architecture Diagram 4 ++ * Lock time typical 250, max 500 input clock cycles @24MHz ++ * So define a very safe maximum of 1000us, meaning 24000 cycles. + */ ++ ret = readl_relaxed_poll_timeout(pll->reg_base + RK3036_PLLCON(1), ++ pllcon, ++ pllcon & RK3036_PLLCON1_LOCK_STATUS, ++ 0, 1000); ++ if (ret) ++ pr_err("%s: timeout waiting for pll to lock\n", __func__); + -+ GATE(SCLK_SMC, "sclk_smc", "hclk_peri", 0, -+ RK2928_CLKGATE_CON(2), 4, GFLAGS), ++ return ret; ++} + -+ COMPOSITE_NOMUX(SCLK_SPI0, "sclk_spi0", "pclk_peri", 0, -+ RK2928_CLKSEL_CON(25), 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(2), 9, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_SPI1, "sclk_spi1", "pclk_peri", 0, -+ RK2928_CLKSEL_CON(25), 8, 7, DFLAGS, -+ RK2928_CLKGATE_CON(2), 10, GFLAGS), ++static unsigned long __maybe_unused ++rockchip_rk3036_pll_con_to_rate(struct rockchip_clk_pll *pll, ++ u32 con0, u32 con1) ++{ ++ unsigned int fbdiv, postdiv1, refdiv, postdiv2; ++ u64 rate64 = 24000000; + -+ COMPOSITE_NOMUX(SCLK_SDMMC, "sclk_sdmmc", "hclk_peri", 0, -+ RK2928_CLKSEL_CON(11), 0, 6, DFLAGS, -+ RK2928_CLKGATE_CON(2), 11, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_SDIO, "sclk_sdio", "hclk_peri", 0, -+ RK2928_CLKSEL_CON(12), 0, 6, DFLAGS, -+ RK2928_CLKGATE_CON(2), 13, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_EMMC, "sclk_emmc", "hclk_peri", 0, -+ RK2928_CLKSEL_CON(12), 8, 6, DFLAGS, -+ RK2928_CLKGATE_CON(2), 14, GFLAGS), ++ fbdiv = ((con0 >> RK3036_PLLCON0_FBDIV_SHIFT) & ++ RK3036_PLLCON0_FBDIV_MASK); ++ postdiv1 = ((con0 >> RK3036_PLLCON0_POSTDIV1_SHIFT) & ++ RK3036_PLLCON0_POSTDIV1_MASK); ++ refdiv = ((con1 >> RK3036_PLLCON1_REFDIV_SHIFT) & ++ RK3036_PLLCON1_REFDIV_MASK); ++ postdiv2 = ((con1 >> RK3036_PLLCON1_POSTDIV2_SHIFT) & ++ RK3036_PLLCON1_POSTDIV2_MASK); + -+ MUX(0, "uart_src", mux_pll_src_gpll_cpll_p, 0, -+ RK2928_CLKSEL_CON(12), 15, 1, MFLAGS), -+ COMPOSITE_NOMUX(0, "uart0_pre", "uart_src", 0, -+ RK2928_CLKSEL_CON(13), 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(1), 8, GFLAGS), -+ COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_pre", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(17), 0, -+ RK2928_CLKGATE_CON(1), 9, GFLAGS, -+ &common_uart0_fracmux), -+ COMPOSITE_NOMUX(0, "uart1_pre", "uart_src", 0, -+ RK2928_CLKSEL_CON(14), 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(1), 10, GFLAGS), -+ COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_pre", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(18), 0, -+ RK2928_CLKGATE_CON(1), 11, GFLAGS, -+ &common_uart1_fracmux), -+ COMPOSITE_NOMUX(0, "uart2_pre", "uart_src", 0, -+ RK2928_CLKSEL_CON(15), 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(1), 12, GFLAGS), -+ COMPOSITE_FRACMUX(0, "uart2_frac", "uart2_pre", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(19), 0, -+ RK2928_CLKGATE_CON(1), 13, GFLAGS, -+ &common_uart2_fracmux), -+ COMPOSITE_NOMUX(0, "uart3_pre", "uart_src", 0, -+ RK2928_CLKSEL_CON(16), 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(1), 14, GFLAGS), -+ COMPOSITE_FRACMUX(0, "uart3_frac", "uart3_pre", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(20), 0, -+ RK2928_CLKGATE_CON(1), 15, GFLAGS, -+ &common_uart3_fracmux), ++ rate64 *= fbdiv; ++ do_div(rate64, refdiv); ++ do_div(rate64, postdiv1); ++ do_div(rate64, postdiv2); + -+ GATE(SCLK_JTAG, "jtag", "ext_jtag", 0, RK2928_CLKGATE_CON(1), 3, GFLAGS), ++ return (unsigned long)rate64; ++} + -+ GATE(SCLK_TIMER0, "timer0", "xin24m", 0, RK2928_CLKGATE_CON(1), 0, GFLAGS), -+ GATE(SCLK_TIMER1, "timer1", "xin24m", 0, RK2928_CLKGATE_CON(1), 1, GFLAGS), ++static void rockchip_rk3036_pll_get_params(struct rockchip_clk_pll *pll, ++ struct rockchip_pll_rate_table *rate) ++{ ++ u32 pllcon; + -+ /* clk_core_pre gates */ -+ GATE(0, "core_dbg", "armclk", 0, RK2928_CLKGATE_CON(9), 0, GFLAGS), ++ pllcon = readl_relaxed(pll->reg_base + RK3036_PLLCON(0)); ++ rate->fbdiv = ((pllcon >> RK3036_PLLCON0_FBDIV_SHIFT) ++ & RK3036_PLLCON0_FBDIV_MASK); ++ rate->postdiv1 = ((pllcon >> RK3036_PLLCON0_POSTDIV1_SHIFT) ++ & RK3036_PLLCON0_POSTDIV1_MASK); + -+ /* aclk_cpu gates */ -+ GATE(ACLK_DMA1, "aclk_dma1", "aclk_cpu", 0, RK2928_CLKGATE_CON(5), 0, GFLAGS), -+ GATE(0, "aclk_intmem", "aclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 12, GFLAGS), -+ GATE(0, "aclk_strc_sys", "aclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 10, GFLAGS), ++ pllcon = readl_relaxed(pll->reg_base + RK3036_PLLCON(1)); ++ rate->refdiv = ((pllcon >> RK3036_PLLCON1_REFDIV_SHIFT) ++ & RK3036_PLLCON1_REFDIV_MASK); ++ rate->postdiv2 = ((pllcon >> RK3036_PLLCON1_POSTDIV2_SHIFT) ++ & RK3036_PLLCON1_POSTDIV2_MASK); ++ rate->dsmpd = ((pllcon >> RK3036_PLLCON1_DSMPD_SHIFT) ++ & RK3036_PLLCON1_DSMPD_MASK); + -+ /* hclk_cpu gates */ -+ GATE(HCLK_ROM, "hclk_rom", "hclk_cpu", 0, RK2928_CLKGATE_CON(5), 6, GFLAGS), -+ GATE(HCLK_I2S0_2CH, "hclk_i2s0_2ch", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 2, GFLAGS), -+ GATE(HCLK_SPDIF, "hclk_spdif", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 1, GFLAGS), -+ GATE(0, "hclk_cpubus", "hclk_cpu", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(4), 8, GFLAGS), -+ /* hclk_ahb2apb is part of a clk branch */ -+ GATE(0, "hclk_vio_bus", "hclk_cpu", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(6), 12, GFLAGS), -+ GATE(HCLK_LCDC0, "hclk_lcdc0", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 1, GFLAGS), -+ GATE(HCLK_LCDC1, "hclk_lcdc1", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 2, GFLAGS), -+ GATE(HCLK_CIF0, "hclk_cif0", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 4, GFLAGS), -+ GATE(HCLK_IPP, "hclk_ipp", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 9, GFLAGS), -+ GATE(HCLK_RGA, "hclk_rga", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 10, GFLAGS), ++ pllcon = readl_relaxed(pll->reg_base + RK3036_PLLCON(2)); ++ rate->frac = ((pllcon >> RK3036_PLLCON2_FRAC_SHIFT) ++ & RK3036_PLLCON2_FRAC_MASK); ++} + -+ /* hclk_peri gates */ -+ GATE(0, "hclk_peri_axi_matrix", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 0, GFLAGS), -+ GATE(0, "hclk_peri_ahb_arbi", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 6, GFLAGS), -+ GATE(0, "hclk_emem_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 7, GFLAGS), -+ GATE(HCLK_EMAC, "hclk_emac", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 0, GFLAGS), -+ GATE(HCLK_NANDC0, "hclk_nandc0", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 9, GFLAGS), -+ GATE(0, "hclk_usb_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 5, GFLAGS), -+ GATE(HCLK_OTG0, "hclk_usbotg0", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 13, GFLAGS), -+ GATE(HCLK_HSADC, "hclk_hsadc", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 5, GFLAGS), -+ GATE(HCLK_PIDF, "hclk_pidfilter", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 6, GFLAGS), -+ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 10, GFLAGS), -+ GATE(HCLK_SDIO, "hclk_sdio", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 11, GFLAGS), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 12, GFLAGS), ++static unsigned long rockchip_rk3036_pll_recalc_rate(struct clk_hw *hw, ++ unsigned long prate) ++{ ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ struct rockchip_pll_rate_table cur; ++ u64 rate64 = prate, frac_rate64 = prate; + -+ /* aclk_lcdc0_pre gates */ -+ GATE(0, "aclk_vio0", "aclk_lcdc0_pre", 0, RK2928_CLKGATE_CON(6), 13, GFLAGS), -+ GATE(ACLK_LCDC0, "aclk_lcdc0", "aclk_vio0", 0, RK2928_CLKGATE_CON(6), 0, GFLAGS), -+ GATE(ACLK_CIF0, "aclk_cif0", "aclk_vio0", 0, RK2928_CLKGATE_CON(6), 5, GFLAGS), -+ GATE(ACLK_IPP, "aclk_ipp", "aclk_vio0", 0, RK2928_CLKGATE_CON(6), 8, GFLAGS), ++ if (pll->sel && pll->scaling) ++ return pll->scaling; + -+ /* aclk_lcdc1_pre gates */ -+ GATE(0, "aclk_vio1", "aclk_lcdc1_pre", 0, RK2928_CLKGATE_CON(9), 5, GFLAGS), -+ GATE(ACLK_LCDC1, "aclk_lcdc1", "aclk_vio1", 0, RK2928_CLKGATE_CON(6), 3, GFLAGS), -+ GATE(ACLK_RGA, "aclk_rga", "aclk_vio1", 0, RK2928_CLKGATE_CON(6), 11, GFLAGS), ++ rockchip_rk3036_pll_get_params(pll, &cur); + -+ /* atclk_cpu gates */ -+ GATE(0, "atclk", "atclk_cpu", 0, RK2928_CLKGATE_CON(9), 3, GFLAGS), -+ GATE(0, "trace", "atclk_cpu", 0, RK2928_CLKGATE_CON(9), 2, GFLAGS), ++ rate64 *= cur.fbdiv; ++ do_div(rate64, cur.refdiv); + -+ /* pclk_cpu gates */ -+ GATE(PCLK_PWM01, "pclk_pwm01", "pclk_cpu", 0, RK2928_CLKGATE_CON(7), 10, GFLAGS), -+ GATE(PCLK_TIMER0, "pclk_timer0", "pclk_cpu", 0, RK2928_CLKGATE_CON(7), 7, GFLAGS), -+ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 4, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 5, GFLAGS), -+ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 9, GFLAGS), -+ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 10, GFLAGS), -+ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 11, GFLAGS), -+ GATE(PCLK_EFUSE, "pclk_efuse", "pclk_cpu", 0, RK2928_CLKGATE_CON(5), 2, GFLAGS), -+ GATE(PCLK_TZPC, "pclk_tzpc", "pclk_cpu", 0, RK2928_CLKGATE_CON(5), 3, GFLAGS), -+ GATE(PCLK_DDRUPCTL, "pclk_ddrupctl", "pclk_cpu", 0, RK2928_CLKGATE_CON(5), 7, GFLAGS), -+ GATE(PCLK_PUBL, "pclk_ddrpubl", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 6, GFLAGS), -+ GATE(0, "pclk_dbg", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 1, GFLAGS), -+ GATE(PCLK_GRF, "pclk_grf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 4, GFLAGS), -+ GATE(PCLK_PMU, "pclk_pmu", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 5, GFLAGS), ++ if (cur.dsmpd == 0) { ++ /* fractional mode */ ++ frac_rate64 *= cur.frac; + -+ /* aclk_peri */ -+ GATE(ACLK_DMA2, "aclk_dma2", "aclk_peri", 0, RK2928_CLKGATE_CON(5), 1, GFLAGS), -+ GATE(ACLK_SMC, "aclk_smc", "aclk_peri", 0, RK2928_CLKGATE_CON(5), 8, GFLAGS), -+ GATE(0, "aclk_peri_niu", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 4, GFLAGS), -+ GATE(0, "aclk_cpu_peri", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 2, GFLAGS), -+ GATE(0, "aclk_peri_axi_matrix", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 3, GFLAGS), ++ do_div(frac_rate64, cur.refdiv); ++ rate64 += frac_rate64 >> 24; ++ } + -+ /* pclk_peri gates */ -+ GATE(0, "pclk_peri_axi_matrix", "pclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 1, GFLAGS), -+ GATE(PCLK_PWM23, "pclk_pwm23", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 11, GFLAGS), -+ GATE(PCLK_WDT, "pclk_wdt", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 15, GFLAGS), -+ GATE(PCLK_SPI0, "pclk_spi0", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 12, GFLAGS), -+ GATE(PCLK_SPI1, "pclk_spi1", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 13, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 2, GFLAGS), -+ GATE(PCLK_UART3, "pclk_uart3", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 3, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 6, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 7, GFLAGS), -+ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 8, GFLAGS), -+ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 12, GFLAGS), -+ GATE(PCLK_SARADC, "pclk_saradc", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 14, GFLAGS), -+}; ++ do_div(rate64, cur.postdiv1); ++ do_div(rate64, cur.postdiv2); + -+PNAME(mux_rk3066_lcdc0_p) = { "dclk_lcdc0_src", "xin27m" }; -+PNAME(mux_rk3066_lcdc1_p) = { "dclk_lcdc1_src", "xin27m" }; -+PNAME(mux_sclk_cif1_p) = { "cif1_pre", "xin24m" }; -+PNAME(mux_sclk_i2s1_p) = { "i2s1_pre", "i2s1_frac", "xin12m" }; -+PNAME(mux_sclk_i2s2_p) = { "i2s2_pre", "i2s2_frac", "xin12m" }; ++ return (unsigned long)rate64; ++} + -+static struct clk_div_table div_aclk_cpu_t[] = { -+ { .val = 0, .div = 1 }, -+ { .val = 1, .div = 2 }, -+ { .val = 2, .div = 3 }, -+ { .val = 3, .div = 4 }, -+ { .val = 4, .div = 8 }, -+ { /* sentinel */ }, -+}; ++static int rockchip_rk3036_pll_set_params(struct rockchip_clk_pll *pll, ++ const struct rockchip_pll_rate_table *rate) ++{ ++ const struct clk_ops *pll_mux_ops = pll->pll_mux_ops; ++ struct clk_mux *pll_mux = &pll->pll_mux; ++ struct rockchip_pll_rate_table cur; ++ u32 pllcon; ++ int rate_change_remuxed = 0; ++ int cur_parent; ++ int ret; + -+static struct rockchip_clk_branch rk3066a_i2s0_fracmux __initdata = -+ MUX(SCLK_I2S0, "sclk_i2s0", mux_sclk_i2s0_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(2), 8, 2, MFLAGS); ++ pr_debug("%s: rate settings for %lu fbdiv: %d, postdiv1: %d, refdiv: %d, postdiv2: %d, dsmpd: %d, frac: %d\n", ++ __func__, rate->rate, rate->fbdiv, rate->postdiv1, rate->refdiv, ++ rate->postdiv2, rate->dsmpd, rate->frac); + -+static struct rockchip_clk_branch rk3066a_i2s1_fracmux __initdata = -+ MUX(SCLK_I2S1, "sclk_i2s1", mux_sclk_i2s1_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(3), 8, 2, MFLAGS); ++ rockchip_rk3036_pll_get_params(pll, &cur); ++ cur.rate = 0; + -+static struct rockchip_clk_branch rk3066a_i2s2_fracmux __initdata = -+ MUX(SCLK_I2S2, "sclk_i2s2", mux_sclk_i2s2_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(4), 8, 2, MFLAGS); ++ if (!(pll->flags & ROCKCHIP_PLL_FIXED_MODE)) { ++ cur_parent = pll_mux_ops->get_parent(&pll_mux->hw); ++ if (cur_parent == PLL_MODE_NORM) { ++ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_SLOW); ++ rate_change_remuxed = 1; ++ } ++ } + -+static struct rockchip_clk_branch rk3066a_clk_branches[] __initdata = { -+ DIVTBL(0, "aclk_cpu_pre", "armclk", 0, -+ RK2928_CLKSEL_CON(1), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, div_aclk_cpu_t), -+ DIV(0, "pclk_cpu_pre", "aclk_cpu_pre", 0, -+ RK2928_CLKSEL_CON(1), 12, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO -+ | CLK_DIVIDER_READ_ONLY), -+ DIV(0, "hclk_cpu_pre", "aclk_cpu_pre", 0, -+ RK2928_CLKSEL_CON(1), 8, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO -+ | CLK_DIVIDER_READ_ONLY), -+ COMPOSITE_NOMUX(0, "hclk_ahb2apb", "hclk_cpu_pre", 0, -+ RK2928_CLKSEL_CON(1), 14, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO -+ | CLK_DIVIDER_READ_ONLY, -+ RK2928_CLKGATE_CON(4), 9, GFLAGS), ++ /* update pll values */ ++ writel_relaxed(HIWORD_UPDATE(rate->fbdiv, RK3036_PLLCON0_FBDIV_MASK, ++ RK3036_PLLCON0_FBDIV_SHIFT) | ++ HIWORD_UPDATE(rate->postdiv1, RK3036_PLLCON0_POSTDIV1_MASK, ++ RK3036_PLLCON0_POSTDIV1_SHIFT), ++ pll->reg_base + RK3036_PLLCON(0)); + -+ GATE(CORE_L2C, "core_l2c", "aclk_cpu", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(9), 4, GFLAGS), ++ writel_relaxed(HIWORD_UPDATE(rate->refdiv, RK3036_PLLCON1_REFDIV_MASK, ++ RK3036_PLLCON1_REFDIV_SHIFT) | ++ HIWORD_UPDATE(rate->postdiv2, RK3036_PLLCON1_POSTDIV2_MASK, ++ RK3036_PLLCON1_POSTDIV2_SHIFT) | ++ HIWORD_UPDATE(rate->dsmpd, RK3036_PLLCON1_DSMPD_MASK, ++ RK3036_PLLCON1_DSMPD_SHIFT), ++ pll->reg_base + RK3036_PLLCON(1)); + -+ COMPOSITE(0, "aclk_peri_pre", mux_pll_src_gpll_cpll_p, CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(10), 15, 1, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(2), 0, GFLAGS), ++ /* GPLL CON2 is not HIWORD_MASK */ ++ pllcon = readl_relaxed(pll->reg_base + RK3036_PLLCON(2)); ++ pllcon &= ~(RK3036_PLLCON2_FRAC_MASK << RK3036_PLLCON2_FRAC_SHIFT); ++ pllcon |= rate->frac << RK3036_PLLCON2_FRAC_SHIFT; ++ writel_relaxed(pllcon, pll->reg_base + RK3036_PLLCON(2)); + -+ COMPOSITE(0, "dclk_lcdc0_src", mux_pll_src_cpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(27), 0, 1, MFLAGS, 8, 8, DFLAGS, -+ RK2928_CLKGATE_CON(3), 1, GFLAGS), -+ MUX(DCLK_LCDC0, "dclk_lcdc0", mux_rk3066_lcdc0_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(27), 4, 1, MFLAGS), -+ COMPOSITE(0, "dclk_lcdc1_src", mux_pll_src_cpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(28), 0, 1, MFLAGS, 8, 8, DFLAGS, -+ RK2928_CLKGATE_CON(3), 2, GFLAGS), -+ MUX(DCLK_LCDC1, "dclk_lcdc1", mux_rk3066_lcdc1_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(28), 4, 1, MFLAGS), ++ if (IS_ENABLED(CONFIG_ROCKCHIP_CLK_BOOST)) ++ rockchip_boost_disable_low(pll); + -+ COMPOSITE_NOMUX(0, "cif1_pre", "cif_src", 0, -+ RK2928_CLKSEL_CON(29), 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 8, GFLAGS), -+ MUX(SCLK_CIF1, "sclk_cif1", mux_sclk_cif1_p, 0, -+ RK2928_CLKSEL_CON(29), 15, 1, MFLAGS), ++ /* wait for the pll to lock */ ++ ret = rockchip_rk3036_pll_wait_lock(pll); ++ if (ret) { ++ pr_warn("%s: pll update unsuccessful, trying to restore old params\n", ++ __func__); ++ rockchip_rk3036_pll_set_params(pll, &cur); ++ } + -+ GATE(0, "pclkin_cif1", "ext_cif1", 0, -+ RK2928_CLKGATE_CON(3), 4, GFLAGS), -+ INVERTER(0, "pclk_cif1", "pclkin_cif1", -+ RK2928_CLKSEL_CON(30), 12, IFLAGS), ++ if (rate_change_remuxed) ++ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_NORM); + -+ COMPOSITE(0, "aclk_gpu_src", mux_pll_src_cpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(33), 8, 1, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 13, GFLAGS), -+ GATE(ACLK_GPU, "aclk_gpu", "aclk_gpu_src", 0, -+ RK2928_CLKGATE_CON(5), 15, GFLAGS), ++ return ret; ++} + -+ GATE(SCLK_TIMER2, "timer2", "xin24m", 0, -+ RK2928_CLKGATE_CON(3), 2, GFLAGS), ++static int rockchip_rk3036_pll_set_rate(struct clk_hw *hw, unsigned long drate, ++ unsigned long prate) ++{ ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ const struct rockchip_pll_rate_table *rate; + -+ COMPOSITE_NOMUX(SCLK_TSADC, "sclk_tsadc", "xin24m", 0, -+ RK2928_CLKSEL_CON(34), 0, 16, DFLAGS, -+ RK2928_CLKGATE_CON(2), 15, GFLAGS), ++ pr_debug("%s: changing %s to %lu with a parent rate of %lu\n", ++ __func__, __clk_get_name(hw->clk), drate, prate); + -+ MUX(0, "i2s_src", mux_pll_src_gpll_cpll_p, 0, -+ RK2928_CLKSEL_CON(2), 15, 1, MFLAGS), -+ COMPOSITE_NOMUX(0, "i2s0_pre", "i2s_src", 0, -+ RK2928_CLKSEL_CON(2), 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(0), 7, GFLAGS), -+ COMPOSITE_FRACMUX(0, "i2s0_frac", "i2s0_pre", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(6), 0, -+ RK2928_CLKGATE_CON(0), 8, GFLAGS, -+ &rk3066a_i2s0_fracmux), -+ COMPOSITE_NOMUX(0, "i2s1_pre", "i2s_src", 0, -+ RK2928_CLKSEL_CON(3), 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(0), 9, GFLAGS), -+ COMPOSITE_FRACMUX(0, "i2s1_frac", "i2s1_pre", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(7), 0, -+ RK2928_CLKGATE_CON(0), 10, GFLAGS, -+ &rk3066a_i2s1_fracmux), -+ COMPOSITE_NOMUX(0, "i2s2_pre", "i2s_src", 0, -+ RK2928_CLKSEL_CON(4), 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(0), 11, GFLAGS), -+ COMPOSITE_FRACMUX(0, "i2s2_frac", "i2s2_pre", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(8), 0, -+ RK2928_CLKGATE_CON(0), 12, GFLAGS, -+ &rk3066a_i2s2_fracmux), ++ /* Get required rate settings from table */ ++ rate = rockchip_get_pll_settings(pll, drate); ++ if (!rate) { ++ pr_err("%s: Invalid rate : %lu for pll clk %s\n", __func__, ++ drate, __clk_get_name(hw->clk)); ++ return -EINVAL; ++ } + -+ GATE(HCLK_I2S1_2CH, "hclk_i2s1_2ch", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 3, GFLAGS), -+ GATE(HCLK_I2S_8CH, "hclk_i2s_8ch", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 4, GFLAGS), -+ GATE(HCLK_CIF1, "hclk_cif1", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 6, GFLAGS), -+ GATE(HCLK_HDMI, "hclk_hdmi", "hclk_cpu", 0, RK2928_CLKGATE_CON(4), 14, GFLAGS), ++ return rockchip_rk3036_pll_set_params(pll, rate); ++} + -+ GATE(HCLK_OTG1, "hclk_usbotg1", "hclk_peri", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(5), 14, GFLAGS), ++static int rockchip_rk3036_pll_enable(struct clk_hw *hw) ++{ ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ const struct clk_ops *pll_mux_ops = pll->pll_mux_ops; ++ struct clk_mux *pll_mux = &pll->pll_mux; + -+ GATE(ACLK_CIF1, "aclk_cif1", "aclk_vio1", 0, RK2928_CLKGATE_CON(6), 7, GFLAGS), ++ writel(HIWORD_UPDATE(0, RK3036_PLLCON1_PWRDOWN, 0), ++ pll->reg_base + RK3036_PLLCON(1)); ++ rockchip_rk3036_pll_wait_lock(pll); + -+ GATE(PCLK_TIMER1, "pclk_timer1", "pclk_cpu", 0, RK2928_CLKGATE_CON(7), 8, GFLAGS), -+ GATE(PCLK_TIMER2, "pclk_timer2", "pclk_cpu", 0, RK2928_CLKGATE_CON(7), 9, GFLAGS), -+ GATE(PCLK_GPIO6, "pclk_gpio6", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 15, GFLAGS), -+ GATE(PCLK_UART0, "pclk_uart0", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 0, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 1, GFLAGS), ++ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_NORM); + -+ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 13, GFLAGS), -+ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_peri", 0, RK2928_CLKGATE_CON(4), 13, GFLAGS), -+}; ++ return 0; ++} + -+static struct clk_div_table div_rk3188_aclk_core_t[] = { -+ { .val = 0, .div = 1 }, -+ { .val = 1, .div = 2 }, -+ { .val = 2, .div = 3 }, -+ { .val = 3, .div = 4 }, -+ { .val = 4, .div = 8 }, -+ { /* sentinel */ }, -+}; ++static void rockchip_rk3036_pll_disable(struct clk_hw *hw) ++{ ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ const struct clk_ops *pll_mux_ops = pll->pll_mux_ops; ++ struct clk_mux *pll_mux = &pll->pll_mux; + -+PNAME(mux_hsicphy_p) = { "sclk_otgphy0_480m", "sclk_otgphy1_480m", -+ "gpll", "cpll" }; ++ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_SLOW); + -+static struct rockchip_clk_branch rk3188_i2s0_fracmux __initdata = -+ MUX(SCLK_I2S0, "sclk_i2s0", mux_sclk_i2s0_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(3), 8, 2, MFLAGS); ++ writel(HIWORD_UPDATE(RK3036_PLLCON1_PWRDOWN, ++ RK3036_PLLCON1_PWRDOWN, 0), ++ pll->reg_base + RK3036_PLLCON(1)); ++} + -+static struct rockchip_clk_branch rk3188_clk_branches[] __initdata = { -+ COMPOSITE_NOMUX_DIVTBL(0, "aclk_core", "armclk", CLK_IGNORE_UNUSED, -+ RK2928_CLKSEL_CON(1), 3, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ div_rk3188_aclk_core_t, RK2928_CLKGATE_CON(0), 7, GFLAGS), ++static int rockchip_rk3036_pll_is_enabled(struct clk_hw *hw) ++{ ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ u32 pllcon = readl(pll->reg_base + RK3036_PLLCON(1)); + -+ /* do not source aclk_cpu_pre from the apll, to keep complexity down */ -+ COMPOSITE_NOGATE(ACLK_CPU_PRE, "aclk_cpu_pre", mux_aclk_cpu_p, CLK_SET_RATE_NO_REPARENT, -+ RK2928_CLKSEL_CON(0), 5, 1, MFLAGS, 0, 5, DFLAGS), -+ DIV(0, "pclk_cpu_pre", "aclk_cpu_pre", 0, -+ RK2928_CLKSEL_CON(1), 12, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO), -+ DIV(0, "hclk_cpu_pre", "aclk_cpu_pre", 0, -+ RK2928_CLKSEL_CON(1), 8, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO), -+ COMPOSITE_NOMUX(0, "hclk_ahb2apb", "hclk_cpu_pre", 0, -+ RK2928_CLKSEL_CON(1), 14, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, -+ RK2928_CLKGATE_CON(4), 9, GFLAGS), ++ return !(pllcon & RK3036_PLLCON1_PWRDOWN); ++} + -+ GATE(CORE_L2C, "core_l2c", "armclk", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(9), 4, GFLAGS), ++static int rockchip_rk3036_pll_init(struct clk_hw *hw) ++{ ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ const struct rockchip_pll_rate_table *rate; ++ struct rockchip_pll_rate_table cur; ++ unsigned long drate; + -+ COMPOSITE(0, "aclk_peri_pre", mux_pll_src_cpll_gpll_p, CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(10), 15, 1, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(2), 0, GFLAGS), ++ if (!(pll->flags & ROCKCHIP_PLL_SYNC_RATE)) ++ return 0; + -+ COMPOSITE(DCLK_LCDC0, "dclk_lcdc0", mux_pll_src_cpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(27), 0, 1, MFLAGS, 8, 8, DFLAGS, -+ RK2928_CLKGATE_CON(3), 1, GFLAGS), -+ COMPOSITE(DCLK_LCDC1, "dclk_lcdc1", mux_pll_src_cpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(28), 0, 1, MFLAGS, 8, 8, DFLAGS, -+ RK2928_CLKGATE_CON(3), 2, GFLAGS), ++ drate = clk_hw_get_rate(hw); ++ rate = rockchip_get_pll_settings(pll, drate); + -+ COMPOSITE(0, "aclk_gpu_src", mux_pll_src_cpll_gpll_p, 0, -+ RK2928_CLKSEL_CON(34), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 15, GFLAGS), -+ GATE(ACLK_GPU, "aclk_gpu", "aclk_gpu_src", 0, -+ RK2928_CLKGATE_CON(9), 7, GFLAGS), ++ /* when no rate setting for the current rate, rely on clk_set_rate */ ++ if (!rate) ++ return 0; + -+ GATE(SCLK_TIMER2, "timer2", "xin24m", 0, RK2928_CLKGATE_CON(3), 4, GFLAGS), -+ GATE(SCLK_TIMER3, "timer3", "xin24m", 0, RK2928_CLKGATE_CON(1), 2, GFLAGS), -+ GATE(SCLK_TIMER4, "timer4", "xin24m", 0, RK2928_CLKGATE_CON(3), 5, GFLAGS), -+ GATE(SCLK_TIMER5, "timer5", "xin24m", 0, RK2928_CLKGATE_CON(3), 8, GFLAGS), -+ GATE(SCLK_TIMER6, "timer6", "xin24m", 0, RK2928_CLKGATE_CON(3), 14, GFLAGS), ++ rockchip_rk3036_pll_get_params(pll, &cur); + -+ COMPOSITE_NODIV(0, "sclk_hsicphy_480m", mux_hsicphy_p, 0, -+ RK2928_CLKSEL_CON(30), 0, 2, DFLAGS, -+ RK2928_CLKGATE_CON(3), 6, GFLAGS), -+ DIV(0, "sclk_hsicphy_12m", "sclk_hsicphy_480m", 0, -+ RK2928_CLKSEL_CON(11), 8, 6, DFLAGS), ++ pr_debug("%s: pll %s@%lu: Hz\n", __func__, __clk_get_name(hw->clk), ++ drate); ++ pr_debug("old - fbdiv: %d, postdiv1: %d, refdiv: %d, postdiv2: %d, dsmpd: %d, frac: %d\n", ++ cur.fbdiv, cur.postdiv1, cur.refdiv, cur.postdiv2, ++ cur.dsmpd, cur.frac); ++ pr_debug("new - fbdiv: %d, postdiv1: %d, refdiv: %d, postdiv2: %d, dsmpd: %d, frac: %d\n", ++ rate->fbdiv, rate->postdiv1, rate->refdiv, rate->postdiv2, ++ rate->dsmpd, rate->frac); + -+ MUX(0, "i2s_src", mux_pll_src_gpll_cpll_p, 0, -+ RK2928_CLKSEL_CON(2), 15, 1, MFLAGS), -+ COMPOSITE_NOMUX(0, "i2s0_pre", "i2s_src", 0, -+ RK2928_CLKSEL_CON(3), 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(0), 9, GFLAGS), -+ COMPOSITE_FRACMUX(0, "i2s0_frac", "i2s0_pre", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(7), 0, -+ RK2928_CLKGATE_CON(0), 10, GFLAGS, -+ &rk3188_i2s0_fracmux), ++ if (rate->fbdiv != cur.fbdiv || rate->postdiv1 != cur.postdiv1 || ++ rate->refdiv != cur.refdiv || rate->postdiv2 != cur.postdiv2 || ++ rate->dsmpd != cur.dsmpd || ++ (!cur.dsmpd && (rate->frac != cur.frac))) { ++ struct clk *parent = clk_get_parent(hw->clk); + -+ GATE(0, "hclk_imem0", "hclk_cpu", 0, RK2928_CLKGATE_CON(4), 14, GFLAGS), -+ GATE(0, "hclk_imem1", "hclk_cpu", 0, RK2928_CLKGATE_CON(4), 15, GFLAGS), ++ if (!parent) { ++ pr_warn("%s: parent of %s not available\n", ++ __func__, __clk_get_name(hw->clk)); ++ return 0; ++ } + -+ GATE(HCLK_OTG1, "hclk_usbotg1", "hclk_peri", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(7), 3, GFLAGS), -+ GATE(HCLK_HSIC, "hclk_hsic", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 4, GFLAGS), ++ pr_debug("%s: pll %s: rate params do not match rate table, adjusting\n", ++ __func__, __clk_get_name(hw->clk)); ++ rockchip_rk3036_pll_set_params(pll, rate); ++ } + -+ GATE(PCLK_TIMER3, "pclk_timer3", "pclk_cpu", 0, RK2928_CLKGATE_CON(7), 9, GFLAGS), ++ return 0; ++} + -+ GATE(PCLK_UART0, "pclk_uart0", "hclk_ahb2apb", 0, RK2928_CLKGATE_CON(8), 0, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "hclk_ahb2apb", 0, RK2928_CLKGATE_CON(8), 1, GFLAGS), ++static const struct clk_ops rockchip_rk3036_pll_clk_norate_ops = { ++ .recalc_rate = rockchip_rk3036_pll_recalc_rate, ++ .enable = rockchip_rk3036_pll_enable, ++ .disable = rockchip_rk3036_pll_disable, ++ .is_enabled = rockchip_rk3036_pll_is_enabled, ++}; + -+ GATE(ACLK_GPS, "aclk_gps", "aclk_peri", 0, RK2928_CLKGATE_CON(8), 13, GFLAGS), ++static const struct clk_ops rockchip_rk3036_pll_clk_ops = { ++ .recalc_rate = rockchip_rk3036_pll_recalc_rate, ++ .round_rate = rockchip_pll_round_rate, ++ .set_rate = rockchip_rk3036_pll_set_rate, ++ .enable = rockchip_rk3036_pll_enable, ++ .disable = rockchip_rk3036_pll_disable, ++ .is_enabled = rockchip_rk3036_pll_is_enabled, ++ .init = rockchip_rk3036_pll_init, +}; + -+static struct rockchip_clk_provider *__init rk3188_common_clk_init(struct device_node *np) -+{ -+ struct rockchip_clk_provider *ctx; -+ void __iomem *reg_base; ++/* ++ * PLL used in RK3066, RK3188 and RK3288 ++ */ + -+ reg_base = of_iomap(np, 0); -+ if (!reg_base) { -+ pr_err("%s: could not map cru region\n", __func__); -+ return ERR_PTR(-ENOMEM); -+ } ++#define RK3066_PLL_RESET_DELAY(nr) ((nr * 500) / 24 + 1) + -+ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); -+ if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip clk init failed\n", __func__); -+ iounmap(reg_base); -+ return ERR_PTR(-ENOMEM); -+ } ++#define RK3066_PLLCON(i) (i * 0x4) ++#define RK3066_PLLCON0_OD_MASK 0xf ++#define RK3066_PLLCON0_OD_SHIFT 0 ++#define RK3066_PLLCON0_NR_MASK 0x3f ++#define RK3066_PLLCON0_NR_SHIFT 8 ++#define RK3066_PLLCON1_NF_MASK 0x1fff ++#define RK3066_PLLCON1_NF_SHIFT 0 ++#define RK3066_PLLCON2_NB_MASK 0xfff ++#define RK3066_PLLCON2_NB_SHIFT 0 ++#define RK3066_PLLCON3_RESET (1 << 5) ++#define RK3066_PLLCON3_PWRDOWN (1 << 1) ++#define RK3066_PLLCON3_BYPASS (1 << 0) + -+ rockchip_clk_register_branches(ctx, common_clk_branches, -+ ARRAY_SIZE(common_clk_branches)); ++static void rockchip_rk3066_pll_get_params(struct rockchip_clk_pll *pll, ++ struct rockchip_pll_rate_table *rate) ++{ ++ u32 pllcon; + -+ rockchip_register_softrst(np, 9, reg_base + RK2928_SOFTRST_CON(0), -+ ROCKCHIP_SOFTRST_HIWORD_MASK); ++ pllcon = readl_relaxed(pll->reg_base + RK3066_PLLCON(0)); ++ rate->nr = ((pllcon >> RK3066_PLLCON0_NR_SHIFT) ++ & RK3066_PLLCON0_NR_MASK) + 1; ++ rate->no = ((pllcon >> RK3066_PLLCON0_OD_SHIFT) ++ & RK3066_PLLCON0_OD_MASK) + 1; + -+ rockchip_register_restart_notifier(ctx, RK2928_GLB_SRST_FST, NULL); ++ pllcon = readl_relaxed(pll->reg_base + RK3066_PLLCON(1)); ++ rate->nf = ((pllcon >> RK3066_PLLCON1_NF_SHIFT) ++ & RK3066_PLLCON1_NF_MASK) + 1; + -+ return ctx; ++ pllcon = readl_relaxed(pll->reg_base + RK3066_PLLCON(2)); ++ rate->nb = ((pllcon >> RK3066_PLLCON2_NB_SHIFT) ++ & RK3066_PLLCON2_NB_MASK) + 1; +} + -+static void __init rk3066a_clk_init(struct device_node *np) ++static unsigned long rockchip_rk3066_pll_recalc_rate(struct clk_hw *hw, ++ unsigned long prate) +{ -+ struct rockchip_clk_provider *ctx; -+ struct clk **clks; ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ struct rockchip_pll_rate_table cur; ++ u64 rate64 = prate; ++ u32 pllcon; + -+ ctx = rk3188_common_clk_init(np); -+ if (IS_ERR(ctx)) -+ return; -+ clks = ctx->clk_data.clks; ++ pllcon = readl_relaxed(pll->reg_base + RK3066_PLLCON(3)); ++ if (pllcon & RK3066_PLLCON3_BYPASS) { ++ pr_debug("%s: pll %s is bypassed\n", __func__, ++ clk_hw_get_name(hw)); ++ return prate; ++ } + -+ rockchip_clk_register_plls(ctx, rk3066_pll_clks, -+ ARRAY_SIZE(rk3066_pll_clks), -+ RK3066_GRF_SOC_STATUS); -+ rockchip_clk_register_branches(ctx, rk3066a_clk_branches, -+ ARRAY_SIZE(rk3066a_clk_branches)); -+ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", -+ 2, clks[PLL_APLL], clks[PLL_GPLL], -+ &rk3066_cpuclk_data, rk3066_cpuclk_rates, -+ ARRAY_SIZE(rk3066_cpuclk_rates)); -+ rockchip_clk_of_add_provider(np, ctx); ++ if (pll->sel && pll->scaling) ++ return pll->scaling; ++ ++ rockchip_rk3066_pll_get_params(pll, &cur); ++ ++ rate64 *= cur.nf; ++ do_div(rate64, cur.nr); ++ do_div(rate64, cur.no); ++ ++ return (unsigned long)rate64; +} -+CLK_OF_DECLARE(rk3066a_cru, "rockchip,rk3066a-cru", rk3066a_clk_init); + -+static void __init rk3188a_clk_init(struct device_node *np) ++static int rockchip_rk3066_pll_set_params(struct rockchip_clk_pll *pll, ++ const struct rockchip_pll_rate_table *rate) +{ -+ struct rockchip_clk_provider *ctx; -+ struct clk **clks; -+ unsigned long rate; ++ const struct clk_ops *pll_mux_ops = pll->pll_mux_ops; ++ struct clk_mux *pll_mux = &pll->pll_mux; ++ struct rockchip_pll_rate_table cur; ++ int rate_change_remuxed = 0; ++ int cur_parent; + int ret; + -+ ctx = rk3188_common_clk_init(np); -+ if (IS_ERR(ctx)) -+ return; -+ clks = ctx->clk_data.clks; ++ pr_debug("%s: rate settings for %lu (nr, no, nf): (%d, %d, %d)\n", ++ __func__, rate->rate, rate->nr, rate->no, rate->nf); + -+ rockchip_clk_register_plls(ctx, rk3188_pll_clks, -+ ARRAY_SIZE(rk3188_pll_clks), -+ RK3188_GRF_SOC_STATUS); -+ rockchip_clk_register_branches(ctx, rk3188_clk_branches, -+ ARRAY_SIZE(rk3188_clk_branches)); -+ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", -+ 2, clks[PLL_APLL], clks[PLL_GPLL], -+ &rk3188_cpuclk_data, rk3188_cpuclk_rates, -+ ARRAY_SIZE(rk3188_cpuclk_rates)); ++ rockchip_rk3066_pll_get_params(pll, &cur); ++ cur.rate = 0; + -+ /* reparent aclk_cpu_pre from apll */ -+ if (clks[ACLK_CPU_PRE] && clks[PLL_GPLL]) { -+ rate = clk_get_rate(clks[ACLK_CPU_PRE]); ++ cur_parent = pll_mux_ops->get_parent(&pll_mux->hw); ++ if (cur_parent == PLL_MODE_NORM) { ++ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_SLOW); ++ rate_change_remuxed = 1; ++ } + -+ ret = clk_set_parent(clks[ACLK_CPU_PRE], clks[PLL_GPLL]); -+ if (ret < 0) -+ pr_warn("%s: could not reparent aclk_cpu_pre to gpll\n", -+ __func__); ++ /* enter reset mode */ ++ writel(HIWORD_UPDATE(RK3066_PLLCON3_RESET, RK3066_PLLCON3_RESET, 0), ++ pll->reg_base + RK3066_PLLCON(3)); + -+ clk_set_rate(clks[ACLK_CPU_PRE], rate); -+ } else { -+ pr_warn("%s: missing clocks to reparent aclk_cpu_pre to gpll\n", ++ /* update pll values */ ++ writel(HIWORD_UPDATE(rate->nr - 1, RK3066_PLLCON0_NR_MASK, ++ RK3066_PLLCON0_NR_SHIFT) | ++ HIWORD_UPDATE(rate->no - 1, RK3066_PLLCON0_OD_MASK, ++ RK3066_PLLCON0_OD_SHIFT), ++ pll->reg_base + RK3066_PLLCON(0)); ++ ++ writel_relaxed(HIWORD_UPDATE(rate->nf - 1, RK3066_PLLCON1_NF_MASK, ++ RK3066_PLLCON1_NF_SHIFT), ++ pll->reg_base + RK3066_PLLCON(1)); ++ writel_relaxed(HIWORD_UPDATE(rate->nb - 1, RK3066_PLLCON2_NB_MASK, ++ RK3066_PLLCON2_NB_SHIFT), ++ pll->reg_base + RK3066_PLLCON(2)); ++ ++ /* leave reset and wait the reset_delay */ ++ writel(HIWORD_UPDATE(0, RK3066_PLLCON3_RESET, 0), ++ pll->reg_base + RK3066_PLLCON(3)); ++ udelay(RK3066_PLL_RESET_DELAY(rate->nr)); ++ ++ /* wait for the pll to lock */ ++ ret = rockchip_pll_wait_lock(pll); ++ if (ret) { ++ pr_warn("%s: pll update unsuccessful, trying to restore old params\n", + __func__); ++ rockchip_rk3066_pll_set_params(pll, &cur); + } + -+ rockchip_clk_of_add_provider(np, ctx); ++ if (rate_change_remuxed) ++ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_NORM); ++ ++ return ret; +} -+CLK_OF_DECLARE(rk3188a_cru, "rockchip,rk3188a-cru", rk3188a_clk_init); + -+static void __init rk3188_clk_init(struct device_node *np) ++static int rockchip_rk3066_pll_set_rate(struct clk_hw *hw, unsigned long drate, ++ unsigned long prate) +{ -+ int i; ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ const struct rockchip_pll_rate_table *rate; ++ unsigned long old_rate = rockchip_rk3066_pll_recalc_rate(hw, prate); ++ struct regmap *grf = pll->ctx->grf; ++ int ret; + -+ for (i = 0; i < ARRAY_SIZE(rk3188_pll_clks); i++) { -+ struct rockchip_pll_clock *pll = &rk3188_pll_clks[i]; -+ struct rockchip_pll_rate_table *rate; ++ if (IS_ERR(grf)) { ++ pr_debug("%s: grf regmap not available, aborting rate change\n", ++ __func__); ++ return PTR_ERR(grf); ++ } + -+ if (!pll->rate_table) -+ continue; ++ pr_debug("%s: changing %s from %lu to %lu with a parent rate of %lu\n", ++ __func__, clk_hw_get_name(hw), old_rate, drate, prate); + -+ rate = pll->rate_table; -+ while (rate->rate > 0) { -+ rate->nb = 1; -+ rate++; -+ } ++ /* Get required rate settings from table */ ++ rate = rockchip_get_pll_settings(pll, drate); ++ if (!rate) { ++ pr_err("%s: Invalid rate : %lu for pll clk %s\n", __func__, ++ drate, clk_hw_get_name(hw)); ++ return -EINVAL; + } + -+ rk3188a_clk_init(np); ++ ret = rockchip_rk3066_pll_set_params(pll, rate); ++ if (ret) ++ pll->scaling = 0; ++ ++ return ret; +} -+CLK_OF_DECLARE(rk3188_cru, "rockchip,rk3188-cru", rk3188_clk_init); + -+struct clk_rk3188_inits { -+ void (*inits)(struct device_node *np); -+}; ++static int rockchip_rk3066_pll_enable(struct clk_hw *hw) ++{ ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); + -+static const struct clk_rk3188_inits clk_rk3066a_init = { -+ .inits = rk3066a_clk_init, -+}; ++ writel(HIWORD_UPDATE(0, RK3066_PLLCON3_PWRDOWN, 0), ++ pll->reg_base + RK3066_PLLCON(3)); ++ rockchip_pll_wait_lock(pll); + -+static const struct clk_rk3188_inits clk_rk3188a_init = { -+ .inits = rk3188a_clk_init, -+}; ++ return 0; ++} + -+static const struct clk_rk3188_inits clk_rk3188_init = { -+ .inits = rk3188_clk_init, -+}; ++static void rockchip_rk3066_pll_disable(struct clk_hw *hw) ++{ ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); + -+static const struct of_device_id clk_rk3188_match_table[] = { -+ { -+ .compatible = "rockchip,rk3066a-cru", -+ .data = &clk_rk3066a_init, -+ }, { -+ .compatible = "rockchip,rk3188a-cru", -+ .data = &clk_rk3188a_init, -+ }, { -+ .compatible = "rockchip,rk3188-cru", -+ .data = &rk3188_clk_init, -+ }, -+ { } -+}; -+MODULE_DEVICE_TABLE(of, clk_rk3188_match_table); ++ writel(HIWORD_UPDATE(RK3066_PLLCON3_PWRDOWN, ++ RK3066_PLLCON3_PWRDOWN, 0), ++ pll->reg_base + RK3066_PLLCON(3)); ++} + -+static int __init clk_rk3188_probe(struct platform_device *pdev) ++static int rockchip_rk3066_pll_is_enabled(struct clk_hw *hw) +{ -+ struct device_node *np = pdev->dev.of_node; -+ const struct of_device_id *match; -+ const struct clk_rk3188_inits *init_data; ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ u32 pllcon = readl(pll->reg_base + RK3066_PLLCON(3)); + -+ match = of_match_device(clk_rk3188_match_table, &pdev->dev); -+ if (!match || !match->data) -+ return -EINVAL; ++ return !(pllcon & RK3066_PLLCON3_PWRDOWN); ++} + -+ init_data = match->data; -+ if (init_data->inits) -+ init_data->inits(np); ++static int rockchip_rk3066_pll_init(struct clk_hw *hw) ++{ ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ const struct rockchip_pll_rate_table *rate; ++ struct rockchip_pll_rate_table cur; ++ unsigned long drate; ++ ++ if (!(pll->flags & ROCKCHIP_PLL_SYNC_RATE)) ++ return 0; ++ ++ drate = clk_hw_get_rate(hw); ++ rate = rockchip_get_pll_settings(pll, drate); ++ ++ /* when no rate setting for the current rate, rely on clk_set_rate */ ++ if (!rate) ++ return 0; ++ ++ rockchip_rk3066_pll_get_params(pll, &cur); ++ ++ pr_debug("%s: pll %s@%lu: nr (%d:%d); no (%d:%d); nf(%d:%d), nb(%d:%d)\n", ++ __func__, clk_hw_get_name(hw), drate, rate->nr, cur.nr, ++ rate->no, cur.no, rate->nf, cur.nf, rate->nb, cur.nb); ++ if (rate->nr != cur.nr || rate->no != cur.no || rate->nf != cur.nf ++ || rate->nb != cur.nb) { ++ pr_debug("%s: pll %s: rate params do not match rate table, adjusting\n", ++ __func__, clk_hw_get_name(hw)); ++ rockchip_rk3066_pll_set_params(pll, rate); ++ } + + return 0; +} + -+static struct platform_driver clk_rk3188_driver = { -+ .driver = { -+ .name = "clk-rk3188", -+ .of_match_table = clk_rk3188_match_table, -+ }, ++static const struct clk_ops rockchip_rk3066_pll_clk_norate_ops = { ++ .recalc_rate = rockchip_rk3066_pll_recalc_rate, ++ .enable = rockchip_rk3066_pll_enable, ++ .disable = rockchip_rk3066_pll_disable, ++ .is_enabled = rockchip_rk3066_pll_is_enabled, ++}; ++ ++static const struct clk_ops rockchip_rk3066_pll_clk_ops = { ++ .recalc_rate = rockchip_rk3066_pll_recalc_rate, ++ .round_rate = rockchip_pll_round_rate, ++ .set_rate = rockchip_rk3066_pll_set_rate, ++ .enable = rockchip_rk3066_pll_enable, ++ .disable = rockchip_rk3066_pll_disable, ++ .is_enabled = rockchip_rk3066_pll_is_enabled, ++ .init = rockchip_rk3066_pll_init, +}; -+builtin_platform_driver_probe(clk_rk3188_driver, clk_rk3188_probe); + -+MODULE_DESCRIPTION("Rockchip RK3188 Clock Driver"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/clk/rockchip-oh/clk-rk3228.c b/drivers/clk/rockchip-oh/clk-rk3228.c -new file mode 100644 -index 000000000..18c94343b ---- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-rk3228.c -@@ -0,0 +1,732 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later +/* -+ * Copyright (c) 2015 Rockchip Electronics Co. Ltd. -+ * Author: Xing Zheng -+ * Jeffy Chen ++ * PLL used in RK3399 + */ + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "clk.h" ++#define RK3399_PLLCON(i) (i * 0x4) ++#define RK3399_PLLCON0_FBDIV_MASK 0xfff ++#define RK3399_PLLCON0_FBDIV_SHIFT 0 ++#define RK3399_PLLCON1_REFDIV_MASK 0x3f ++#define RK3399_PLLCON1_REFDIV_SHIFT 0 ++#define RK3399_PLLCON1_POSTDIV1_MASK 0x7 ++#define RK3399_PLLCON1_POSTDIV1_SHIFT 8 ++#define RK3399_PLLCON1_POSTDIV2_MASK 0x7 ++#define RK3399_PLLCON1_POSTDIV2_SHIFT 12 ++#define RK3399_PLLCON2_FRAC_MASK 0xffffff ++#define RK3399_PLLCON2_FRAC_SHIFT 0 ++#define RK3399_PLLCON2_LOCK_STATUS BIT(31) ++#define RK3399_PLLCON3_PWRDOWN BIT(0) ++#define RK3399_PLLCON3_DSMPD_MASK 0x1 ++#define RK3399_PLLCON3_DSMPD_SHIFT 3 + -+#define RK3228_GRF_SOC_STATUS0 0x480 ++static int rockchip_rk3399_pll_wait_lock(struct rockchip_clk_pll *pll) ++{ ++ u32 pllcon; ++ int ret; + -+enum rk3228_plls { -+ apll, dpll, cpll, gpll, -+}; ++ /* ++ * Lock time typical 250, max 500 input clock cycles @24MHz ++ * So define a very safe maximum of 1000us, meaning 24000 cycles. ++ */ ++ ret = readl_relaxed_poll_timeout(pll->reg_base + RK3399_PLLCON(2), ++ pllcon, ++ pllcon & RK3399_PLLCON2_LOCK_STATUS, ++ 0, 1000); ++ if (ret) ++ pr_err("%s: timeout waiting for pll to lock\n", __func__); + -+static struct rockchip_pll_rate_table rk3228_pll_rates[] = { -+ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ -+ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1488000000, 1, 62, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1464000000, 1, 61, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1440000000, 1, 60, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1392000000, 1, 58, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1368000000, 1, 57, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1344000000, 1, 56, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1320000000, 1, 55, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1272000000, 1, 53, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1248000000, 1, 52, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1188000000, 2, 99, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1100000000, 12, 550, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1000000000, 6, 500, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 984000000, 1, 82, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 960000000, 1, 80, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 936000000, 1, 78, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 912000000, 1, 76, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 900000000, 4, 300, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 888000000, 1, 74, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 864000000, 1, 72, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 840000000, 1, 70, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 816000000, 1, 68, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 800000000, 6, 400, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 700000000, 6, 350, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 696000000, 1, 58, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 600000000, 1, 75, 3, 1, 1, 0), -+ RK3036_PLL_RATE( 594000000, 2, 99, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 504000000, 1, 63, 3, 1, 1, 0), -+ RK3036_PLL_RATE( 500000000, 6, 250, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 408000000, 1, 68, 2, 2, 1, 0), -+ RK3036_PLL_RATE( 312000000, 1, 52, 2, 2, 1, 0), -+ RK3036_PLL_RATE( 216000000, 1, 72, 4, 2, 1, 0), -+ RK3036_PLL_RATE( 96000000, 1, 64, 4, 4, 1, 0), -+ { /* sentinel */ }, -+}; ++ return ret; ++} + -+#define RK3228_DIV_CPU_MASK 0x1f -+#define RK3228_DIV_CPU_SHIFT 8 ++static void rockchip_rk3399_pll_get_params(struct rockchip_clk_pll *pll, ++ struct rockchip_pll_rate_table *rate) ++{ ++ u32 pllcon; + -+#define RK3228_DIV_PERI_MASK 0xf -+#define RK3228_DIV_PERI_SHIFT 0 -+#define RK3228_DIV_ACLK_MASK 0x7 -+#define RK3228_DIV_ACLK_SHIFT 4 -+#define RK3228_DIV_HCLK_MASK 0x3 -+#define RK3228_DIV_HCLK_SHIFT 8 -+#define RK3228_DIV_PCLK_MASK 0x7 -+#define RK3228_DIV_PCLK_SHIFT 12 ++ pllcon = readl_relaxed(pll->reg_base + RK3399_PLLCON(0)); ++ rate->fbdiv = ((pllcon >> RK3399_PLLCON0_FBDIV_SHIFT) ++ & RK3399_PLLCON0_FBDIV_MASK); + -+#define RK3228_CLKSEL1(_core_aclk_div, _core_peri_div) \ -+{ \ -+ .reg = RK2928_CLKSEL_CON(1), \ -+ .val = HIWORD_UPDATE(_core_peri_div, RK3228_DIV_PERI_MASK, \ -+ RK3228_DIV_PERI_SHIFT) | \ -+ HIWORD_UPDATE(_core_aclk_div, RK3228_DIV_ACLK_MASK, \ -+ RK3228_DIV_ACLK_SHIFT), \ -+} ++ pllcon = readl_relaxed(pll->reg_base + RK3399_PLLCON(1)); ++ rate->refdiv = ((pllcon >> RK3399_PLLCON1_REFDIV_SHIFT) ++ & RK3399_PLLCON1_REFDIV_MASK); ++ rate->postdiv1 = ((pllcon >> RK3399_PLLCON1_POSTDIV1_SHIFT) ++ & RK3399_PLLCON1_POSTDIV1_MASK); ++ rate->postdiv2 = ((pllcon >> RK3399_PLLCON1_POSTDIV2_SHIFT) ++ & RK3399_PLLCON1_POSTDIV2_MASK); + -+#define RK3228_CPUCLK_RATE(_prate, _core_aclk_div, _core_peri_div) \ -+{ \ -+ .prate = _prate, \ -+ .divs = { \ -+ RK3228_CLKSEL1(_core_aclk_div, _core_peri_div), \ -+ }, \ ++ pllcon = readl_relaxed(pll->reg_base + RK3399_PLLCON(2)); ++ rate->frac = ((pllcon >> RK3399_PLLCON2_FRAC_SHIFT) ++ & RK3399_PLLCON2_FRAC_MASK); ++ ++ pllcon = readl_relaxed(pll->reg_base + RK3399_PLLCON(3)); ++ rate->dsmpd = ((pllcon >> RK3399_PLLCON3_DSMPD_SHIFT) ++ & RK3399_PLLCON3_DSMPD_MASK); +} + -+static struct rockchip_cpuclk_rate_table rk3228_cpuclk_rates[] __initdata = { -+ RK3228_CPUCLK_RATE(1800000000, 1, 7), -+ RK3228_CPUCLK_RATE(1704000000, 1, 7), -+ RK3228_CPUCLK_RATE(1608000000, 1, 7), -+ RK3228_CPUCLK_RATE(1512000000, 1, 7), -+ RK3228_CPUCLK_RATE(1488000000, 1, 5), -+ RK3228_CPUCLK_RATE(1464000000, 1, 5), -+ RK3228_CPUCLK_RATE(1416000000, 1, 5), -+ RK3228_CPUCLK_RATE(1392000000, 1, 5), -+ RK3228_CPUCLK_RATE(1296000000, 1, 5), -+ RK3228_CPUCLK_RATE(1200000000, 1, 5), -+ RK3228_CPUCLK_RATE(1104000000, 1, 5), -+ RK3228_CPUCLK_RATE(1008000000, 1, 5), -+ RK3228_CPUCLK_RATE(912000000, 1, 5), -+ RK3228_CPUCLK_RATE(816000000, 1, 3), -+ RK3228_CPUCLK_RATE(696000000, 1, 3), -+ RK3228_CPUCLK_RATE(600000000, 1, 3), -+ RK3228_CPUCLK_RATE(408000000, 1, 1), -+ RK3228_CPUCLK_RATE(312000000, 1, 1), -+ RK3228_CPUCLK_RATE(216000000, 1, 1), -+ RK3228_CPUCLK_RATE(96000000, 1, 1), -+}; ++static unsigned long rockchip_rk3399_pll_recalc_rate(struct clk_hw *hw, ++ unsigned long prate) ++{ ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ struct rockchip_pll_rate_table cur; ++ u64 rate64 = prate; + -+static const struct rockchip_cpuclk_reg_data rk3228_cpuclk_data = { -+ .core_reg[0] = RK2928_CLKSEL_CON(0), -+ .div_core_shift[0] = 0, -+ .div_core_mask[0] = 0x1f, -+ .num_cores = 1, -+ .mux_core_alt = 1, -+ .mux_core_main = 0, -+ .mux_core_shift = 6, -+ .mux_core_mask = 0x1, -+}; ++ if (pll->sel && pll->scaling) ++ return pll->scaling; + -+PNAME(mux_pll_p) = { "clk_24m", "xin24m" }; ++ rockchip_rk3399_pll_get_params(pll, &cur); + -+PNAME(mux_ddrphy_p) = { "dpll_ddr", "gpll_ddr", "apll_ddr" }; -+PNAME(mux_usb480m_phy_p) = { "usb480m_phy0", "usb480m_phy1" }; -+PNAME(mux_usb480m_p) = { "usb480m_phy", "xin24m" }; -+PNAME(mux_hdmiphy_p) = { "hdmiphy_phy", "xin24m" }; -+PNAME(mux_aclk_cpu_src_p) = { "cpll_aclk_cpu", "gpll_aclk_cpu", "hdmiphy_aclk_cpu" }; ++ rate64 *= cur.fbdiv; ++ do_div(rate64, cur.refdiv); + -+PNAME(mux_pll_src_4plls_p) = { "cpll", "gpll", "hdmiphy", "usb480m" }; -+PNAME(mux_pll_src_3plls_p) = { "cpll", "gpll", "hdmiphy" }; -+PNAME(mux_pll_src_2plls_p) = { "cpll", "gpll" }; -+PNAME(mux_sclk_hdmi_cec_p) = { "cpll", "gpll", "xin24m" }; -+PNAME(mux_aclk_peri_src_p) = { "cpll_peri", "gpll_peri", "hdmiphy_peri" }; -+PNAME(mux_mmc_src_p) = { "cpll", "gpll", "xin24m", "usb480m" }; -+PNAME(mux_pll_src_cpll_gpll_usb480m_p) = { "cpll", "gpll", "usb480m" }; ++ if (cur.dsmpd == 0) { ++ /* fractional mode */ ++ u64 frac_rate64 = prate * cur.frac; + -+PNAME(mux_sclk_rga_p) = { "gpll", "cpll", "sclk_rga_src" }; ++ do_div(frac_rate64, cur.refdiv); ++ rate64 += frac_rate64 >> 24; ++ } + -+PNAME(mux_sclk_vop_src_p) = { "gpll_vop", "cpll_vop" }; -+PNAME(mux_dclk_vop_p) = { "hdmiphy", "sclk_vop_pre" }; ++ do_div(rate64, cur.postdiv1); ++ do_div(rate64, cur.postdiv2); + -+PNAME(mux_i2s0_p) = { "i2s0_src", "i2s0_frac", "ext_i2s", "xin12m" }; -+PNAME(mux_i2s1_pre_p) = { "i2s1_src", "i2s1_frac", "ext_i2s", "xin12m" }; -+PNAME(mux_i2s_out_p) = { "i2s1_pre", "xin12m" }; -+PNAME(mux_i2s2_p) = { "i2s2_src", "i2s2_frac", "xin12m" }; -+PNAME(mux_sclk_spdif_p) = { "sclk_spdif_src", "spdif_frac", "xin12m" }; ++ return (unsigned long)rate64; ++} + -+PNAME(mux_uart0_p) = { "uart0_src", "uart0_frac", "xin24m" }; -+PNAME(mux_uart1_p) = { "uart1_src", "uart1_frac", "xin24m" }; -+PNAME(mux_uart2_p) = { "uart2_src", "uart2_frac", "xin24m" }; ++static int rockchip_rk3399_pll_set_params(struct rockchip_clk_pll *pll, ++ const struct rockchip_pll_rate_table *rate) ++{ ++ const struct clk_ops *pll_mux_ops = pll->pll_mux_ops; ++ struct clk_mux *pll_mux = &pll->pll_mux; ++ struct rockchip_pll_rate_table cur; ++ u32 pllcon; ++ int rate_change_remuxed = 0; ++ int cur_parent; ++ int ret; + -+PNAME(mux_sclk_mac_extclk_p) = { "ext_gmac", "phy_50m_out" }; -+PNAME(mux_sclk_gmac_pre_p) = { "sclk_gmac_src", "sclk_mac_extclk" }; -+PNAME(mux_sclk_macphy_p) = { "sclk_gmac_src", "ext_gmac" }; ++ pr_debug("%s: rate settings for %lu fbdiv: %d, postdiv1: %d, refdiv: %d, postdiv2: %d, dsmpd: %d, frac: %d\n", ++ __func__, rate->rate, rate->fbdiv, rate->postdiv1, rate->refdiv, ++ rate->postdiv2, rate->dsmpd, rate->frac); + -+static struct rockchip_pll_clock rk3228_pll_clks[] __initdata = { -+ [apll] = PLL(pll_rk3036, PLL_APLL, "apll", mux_pll_p, 0, RK2928_PLL_CON(0), -+ RK2928_MODE_CON, 0, 7, 0, rk3228_pll_rates), -+ [dpll] = PLL(pll_rk3036, PLL_DPLL, "dpll", mux_pll_p, 0, RK2928_PLL_CON(3), -+ RK2928_MODE_CON, 4, 6, 0, NULL), -+ [cpll] = PLL(pll_rk3036, PLL_CPLL, "cpll", mux_pll_p, 0, RK2928_PLL_CON(6), -+ RK2928_MODE_CON, 8, 8, 0, NULL), -+ [gpll] = PLL(pll_rk3036, PLL_GPLL, "gpll", mux_pll_p, 0, RK2928_PLL_CON(9), -+ RK2928_MODE_CON, 12, 9, ROCKCHIP_PLL_SYNC_RATE, rk3228_pll_rates), -+}; ++ rockchip_rk3399_pll_get_params(pll, &cur); ++ cur.rate = 0; + -+#define MFLAGS CLK_MUX_HIWORD_MASK -+#define DFLAGS CLK_DIVIDER_HIWORD_MASK -+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) ++ cur_parent = pll_mux_ops->get_parent(&pll_mux->hw); ++ if (cur_parent == PLL_MODE_NORM) { ++ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_SLOW); ++ rate_change_remuxed = 1; ++ } + -+static struct rockchip_clk_branch rk3228_i2s0_fracmux __initdata = -+ MUX(0, "i2s0_pre", mux_i2s0_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(9), 8, 2, MFLAGS); ++ /* set pll power down */ ++ writel(HIWORD_UPDATE(RK3399_PLLCON3_PWRDOWN, ++ RK3399_PLLCON3_PWRDOWN, 0), ++ pll->reg_base + RK3399_PLLCON(3)); + -+static struct rockchip_clk_branch rk3228_i2s1_fracmux __initdata = -+ MUX(0, "i2s1_pre", mux_i2s1_pre_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(3), 8, 2, MFLAGS); ++ /* update pll values */ ++ writel_relaxed(HIWORD_UPDATE(rate->fbdiv, RK3399_PLLCON0_FBDIV_MASK, ++ RK3399_PLLCON0_FBDIV_SHIFT), ++ pll->reg_base + RK3399_PLLCON(0)); + -+static struct rockchip_clk_branch rk3228_i2s2_fracmux __initdata = -+ MUX(0, "i2s2_pre", mux_i2s2_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(16), 8, 2, MFLAGS); ++ writel_relaxed(HIWORD_UPDATE(rate->refdiv, RK3399_PLLCON1_REFDIV_MASK, ++ RK3399_PLLCON1_REFDIV_SHIFT) | ++ HIWORD_UPDATE(rate->postdiv1, RK3399_PLLCON1_POSTDIV1_MASK, ++ RK3399_PLLCON1_POSTDIV1_SHIFT) | ++ HIWORD_UPDATE(rate->postdiv2, RK3399_PLLCON1_POSTDIV2_MASK, ++ RK3399_PLLCON1_POSTDIV2_SHIFT), ++ pll->reg_base + RK3399_PLLCON(1)); + -+static struct rockchip_clk_branch rk3228_spdif_fracmux __initdata = -+ MUX(SCLK_SPDIF, "sclk_spdif", mux_sclk_spdif_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(6), 8, 2, MFLAGS); ++ /* xPLL CON2 is not HIWORD_MASK */ ++ pllcon = readl_relaxed(pll->reg_base + RK3399_PLLCON(2)); ++ pllcon &= ~(RK3399_PLLCON2_FRAC_MASK << RK3399_PLLCON2_FRAC_SHIFT); ++ pllcon |= rate->frac << RK3399_PLLCON2_FRAC_SHIFT; ++ writel_relaxed(pllcon, pll->reg_base + RK3399_PLLCON(2)); + -+static struct rockchip_clk_branch rk3228_uart0_fracmux __initdata = -+ MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(13), 8, 2, MFLAGS); ++ writel_relaxed(HIWORD_UPDATE(rate->dsmpd, RK3399_PLLCON3_DSMPD_MASK, ++ RK3399_PLLCON3_DSMPD_SHIFT), ++ pll->reg_base + RK3399_PLLCON(3)); + -+static struct rockchip_clk_branch rk3228_uart1_fracmux __initdata = -+ MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(14), 8, 2, MFLAGS); ++ /* set pll power up */ ++ writel(HIWORD_UPDATE(0, ++ RK3399_PLLCON3_PWRDOWN, 0), ++ pll->reg_base + RK3399_PLLCON(3)); + -+static struct rockchip_clk_branch rk3228_uart2_fracmux __initdata = -+ MUX(SCLK_UART2, "sclk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(15), 8, 2, MFLAGS); ++ /* wait for the pll to lock */ ++ ret = rockchip_rk3399_pll_wait_lock(pll); ++ if (ret) { ++ pr_warn("%s: pll update unsuccessful, trying to restore old params\n", ++ __func__); ++ rockchip_rk3399_pll_set_params(pll, &cur); ++ } + -+static struct rockchip_clk_branch rk3228_clk_branches[] __initdata = { -+ /* -+ * Clock-Architecture Diagram 1 -+ */ ++ if (rate_change_remuxed) ++ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_NORM); + -+ DIV(0, "clk_24m", "xin24m", CLK_IGNORE_UNUSED, -+ RK2928_CLKSEL_CON(4), 8, 5, DFLAGS), ++ return ret; ++} + -+ /* PD_DDR */ -+ GATE(0, "apll_ddr", "apll", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(0), 2, GFLAGS), -+ GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(0), 2, GFLAGS), -+ GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE(0, "ddrphy4x", mux_ddrphy_p, CLK_IGNORE_UNUSED, -+ RK2928_CLKSEL_CON(26), 8, 2, MFLAGS, 0, 3, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, -+ RK2928_CLKGATE_CON(7), 1, GFLAGS), -+ GATE(0, "ddrc", "ddrphy_pre", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(8), 5, GFLAGS), -+ FACTOR_GATE(0, "ddrphy", "ddrphy4x", CLK_IGNORE_UNUSED, 1, 4, -+ RK2928_CLKGATE_CON(7), 0, GFLAGS), ++static int rockchip_rk3399_pll_set_rate(struct clk_hw *hw, unsigned long drate, ++ unsigned long prate) ++{ ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ const struct rockchip_pll_rate_table *rate; ++ unsigned long old_rate = rockchip_rk3399_pll_recalc_rate(hw, prate); ++ int ret; + -+ /* PD_CORE */ -+ GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(0), 6, GFLAGS), -+ GATE(0, "gpll_core", "gpll", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(0), 6, GFLAGS), -+ GATE(0, "dpll_core", "dpll", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(0), 6, GFLAGS), -+ COMPOSITE_NOMUX(0, "pclk_dbg", "armclk", CLK_IGNORE_UNUSED, -+ RK2928_CLKSEL_CON(1), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK2928_CLKGATE_CON(4), 1, GFLAGS), -+ COMPOSITE_NOMUX(0, "armcore", "armclk", CLK_IGNORE_UNUSED, -+ RK2928_CLKSEL_CON(1), 4, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK2928_CLKGATE_CON(4), 0, GFLAGS), ++ pr_debug("%s: changing %s from %lu to %lu with a parent rate of %lu\n", ++ __func__, __clk_get_name(hw->clk), old_rate, drate, prate); + -+ /* PD_MISC */ -+ MUX(SCLK_HDMI_PHY, "hdmiphy", mux_hdmiphy_p, CLK_SET_RATE_PARENT, -+ RK2928_MISC_CON, 13, 1, MFLAGS), -+ MUX(0, "usb480m_phy", mux_usb480m_phy_p, CLK_SET_RATE_PARENT, -+ RK2928_MISC_CON, 14, 1, MFLAGS), -+ MUX(0, "usb480m", mux_usb480m_p, CLK_SET_RATE_PARENT, -+ RK2928_MISC_CON, 15, 1, MFLAGS), ++ /* Get required rate settings from table */ ++ rate = rockchip_get_pll_settings(pll, drate); ++ if (!rate) { ++ pr_err("%s: Invalid rate : %lu for pll clk %s\n", __func__, ++ drate, __clk_get_name(hw->clk)); ++ return -EINVAL; ++ } + -+ /* PD_BUS */ -+ GATE(0, "hdmiphy_aclk_cpu", "hdmiphy", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(0), 1, GFLAGS), -+ GATE(0, "gpll_aclk_cpu", "gpll", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(0), 1, GFLAGS), -+ GATE(0, "cpll_aclk_cpu", "cpll", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE_NOGATE(0, "aclk_cpu_src", mux_aclk_cpu_src_p, CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(0), 13, 2, MFLAGS, 8, 5, DFLAGS), -+ GATE(ACLK_CPU, "aclk_cpu", "aclk_cpu_src", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(6), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_CPU, "hclk_cpu", "aclk_cpu_src", CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(1), 8, 2, DFLAGS, -+ RK2928_CLKGATE_CON(6), 1, GFLAGS), -+ COMPOSITE_NOMUX(0, "pclk_bus_src", "aclk_cpu_src", CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(1), 12, 3, DFLAGS, -+ RK2928_CLKGATE_CON(6), 2, GFLAGS), -+ GATE(PCLK_CPU, "pclk_cpu", "pclk_bus_src", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(6), 3, GFLAGS), -+ GATE(0, "pclk_phy_pre", "pclk_bus_src", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(6), 4, GFLAGS), -+ GATE(0, "pclk_ddr_pre", "pclk_bus_src", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(6), 13, GFLAGS), ++ ret = rockchip_rk3399_pll_set_params(pll, rate); ++ if (ret) ++ pll->scaling = 0; + -+ /* PD_VIDEO */ -+ COMPOSITE(ACLK_VPU_PRE, "aclk_vpu_pre", mux_pll_src_4plls_p, 0, -+ RK2928_CLKSEL_CON(32), 5, 2, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 11, GFLAGS), -+ FACTOR_GATE(HCLK_VPU_PRE, "hclk_vpu_pre", "aclk_vpu_pre", 0, 1, 4, -+ RK2928_CLKGATE_CON(4), 4, GFLAGS), ++ return ret; ++} + -+ COMPOSITE(ACLK_RKVDEC_PRE, "aclk_rkvdec_pre", mux_pll_src_4plls_p, 0, -+ RK2928_CLKSEL_CON(28), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 2, GFLAGS), -+ FACTOR_GATE(HCLK_RKVDEC_PRE, "hclk_rkvdec_pre", "aclk_rkvdec_pre", 0, 1, 4, -+ RK2928_CLKGATE_CON(4), 5, GFLAGS), ++static int rockchip_rk3399_pll_enable(struct clk_hw *hw) ++{ ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); + -+ COMPOSITE(SCLK_VDEC_CABAC, "sclk_vdec_cabac", mux_pll_src_4plls_p, 0, -+ RK2928_CLKSEL_CON(28), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 3, GFLAGS), ++ writel(HIWORD_UPDATE(0, RK3399_PLLCON3_PWRDOWN, 0), ++ pll->reg_base + RK3399_PLLCON(3)); ++ rockchip_rk3399_pll_wait_lock(pll); + -+ COMPOSITE(SCLK_VDEC_CORE, "sclk_vdec_core", mux_pll_src_4plls_p, 0, -+ RK2928_CLKSEL_CON(34), 13, 2, MFLAGS, 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 4, GFLAGS), ++ return 0; ++} + -+ /* PD_VIO */ -+ COMPOSITE(ACLK_IEP_PRE, "aclk_iep_pre", mux_pll_src_4plls_p, 0, -+ RK2928_CLKSEL_CON(31), 5, 2, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 0, GFLAGS), -+ DIV(HCLK_VIO_PRE, "hclk_vio_pre", "aclk_iep_pre", 0, -+ RK2928_CLKSEL_CON(2), 0, 5, DFLAGS), ++static void rockchip_rk3399_pll_disable(struct clk_hw *hw) ++{ ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); + -+ COMPOSITE(ACLK_HDCP_PRE, "aclk_hdcp_pre", mux_pll_src_4plls_p, 0, -+ RK2928_CLKSEL_CON(31), 13, 2, MFLAGS, 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(1), 4, GFLAGS), ++ writel(HIWORD_UPDATE(RK3399_PLLCON3_PWRDOWN, ++ RK3399_PLLCON3_PWRDOWN, 0), ++ pll->reg_base + RK3399_PLLCON(3)); ++} + -+ MUX(0, "sclk_rga_src", mux_pll_src_4plls_p, CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(33), 13, 2, MFLAGS), -+ COMPOSITE_NOMUX(ACLK_RGA_PRE, "aclk_rga_pre", "sclk_rga_src", CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(33), 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(1), 2, GFLAGS), -+ COMPOSITE(SCLK_RGA, "sclk_rga", mux_sclk_rga_p, 0, -+ RK2928_CLKSEL_CON(22), 5, 2, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 6, GFLAGS), ++static int rockchip_rk3399_pll_is_enabled(struct clk_hw *hw) ++{ ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ u32 pllcon = readl(pll->reg_base + RK3399_PLLCON(3)); + -+ COMPOSITE(ACLK_VOP_PRE, "aclk_vop_pre", mux_pll_src_4plls_p, 0, -+ RK2928_CLKSEL_CON(33), 5, 2, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(1), 1, GFLAGS), ++ return !(pllcon & RK3399_PLLCON3_PWRDOWN); ++} + -+ COMPOSITE(SCLK_HDCP, "sclk_hdcp", mux_pll_src_3plls_p, 0, -+ RK2928_CLKSEL_CON(23), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK2928_CLKGATE_CON(3), 5, GFLAGS), ++static int rockchip_rk3399_pll_init(struct clk_hw *hw) ++{ ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ const struct rockchip_pll_rate_table *rate; ++ struct rockchip_pll_rate_table cur; ++ unsigned long drate; + -+ GATE(SCLK_HDMI_HDCP, "sclk_hdmi_hdcp", "xin24m", 0, -+ RK2928_CLKGATE_CON(3), 7, GFLAGS), ++ if (!(pll->flags & ROCKCHIP_PLL_SYNC_RATE)) ++ return 0; + -+ COMPOSITE(SCLK_HDMI_CEC, "sclk_hdmi_cec", mux_sclk_hdmi_cec_p, 0, -+ RK2928_CLKSEL_CON(21), 14, 2, MFLAGS, 0, 14, DFLAGS, -+ RK2928_CLKGATE_CON(3), 8, GFLAGS), ++ drate = clk_hw_get_rate(hw); ++ rate = rockchip_get_pll_settings(pll, drate); + -+ /* PD_PERI */ -+ GATE(0, "cpll_peri", "cpll", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(2), 0, GFLAGS), -+ GATE(0, "gpll_peri", "gpll", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(2), 0, GFLAGS), -+ GATE(0, "hdmiphy_peri", "hdmiphy", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(2), 0, GFLAGS), -+ COMPOSITE_NOGATE(0, "aclk_peri_src", mux_aclk_peri_src_p, CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(10), 10, 2, MFLAGS, 0, 5, DFLAGS), -+ COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(10), 12, 3, DFLAGS, -+ RK2928_CLKGATE_CON(5), 2, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, -+ RK2928_CLKSEL_CON(10), 8, 2, DFLAGS, -+ RK2928_CLKGATE_CON(5), 1, GFLAGS), -+ GATE(ACLK_PERI, "aclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, -+ RK2928_CLKGATE_CON(5), 0, GFLAGS), ++ /* when no rate setting for the current rate, rely on clk_set_rate */ ++ if (!rate) ++ return 0; + -+ GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0, -+ RK2928_CLKGATE_CON(6), 5, GFLAGS), -+ GATE(SCLK_TIMER1, "sclk_timer1", "xin24m", 0, -+ RK2928_CLKGATE_CON(6), 6, GFLAGS), -+ GATE(SCLK_TIMER2, "sclk_timer2", "xin24m", 0, -+ RK2928_CLKGATE_CON(6), 7, GFLAGS), -+ GATE(SCLK_TIMER3, "sclk_timer3", "xin24m", 0, -+ RK2928_CLKGATE_CON(6), 8, GFLAGS), -+ GATE(SCLK_TIMER4, "sclk_timer4", "xin24m", 0, -+ RK2928_CLKGATE_CON(6), 9, GFLAGS), -+ GATE(SCLK_TIMER5, "sclk_timer5", "xin24m", 0, -+ RK2928_CLKGATE_CON(6), 10, GFLAGS), ++ rockchip_rk3399_pll_get_params(pll, &cur); + -+ COMPOSITE(SCLK_CRYPTO, "sclk_crypto", mux_pll_src_2plls_p, 0, -+ RK2928_CLKSEL_CON(24), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(2), 7, GFLAGS), ++ pr_debug("%s: pll %s@%lu: Hz\n", __func__, __clk_get_name(hw->clk), ++ drate); ++ pr_debug("old - fbdiv: %d, postdiv1: %d, refdiv: %d, postdiv2: %d, dsmpd: %d, frac: %d\n", ++ cur.fbdiv, cur.postdiv1, cur.refdiv, cur.postdiv2, ++ cur.dsmpd, cur.frac); ++ pr_debug("new - fbdiv: %d, postdiv1: %d, refdiv: %d, postdiv2: %d, dsmpd: %d, frac: %d\n", ++ rate->fbdiv, rate->postdiv1, rate->refdiv, rate->postdiv2, ++ rate->dsmpd, rate->frac); + -+ COMPOSITE(SCLK_TSP, "sclk_tsp", mux_pll_src_2plls_p, 0, -+ RK2928_CLKSEL_CON(22), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(2), 6, GFLAGS), ++ if (rate->fbdiv != cur.fbdiv || rate->postdiv1 != cur.postdiv1 || ++ rate->refdiv != cur.refdiv || rate->postdiv2 != cur.postdiv2 || ++ rate->dsmpd != cur.dsmpd || ++ (!cur.dsmpd && (rate->frac != cur.frac))) { ++ struct clk *parent = clk_get_parent(hw->clk); + -+ GATE(SCLK_HSADC, "sclk_hsadc", "ext_hsadc", 0, -+ RK2928_CLKGATE_CON(10), 12, GFLAGS), ++ if (!parent) { ++ pr_warn("%s: parent of %s not available\n", ++ __func__, __clk_get_name(hw->clk)); ++ return 0; ++ } + -+ COMPOSITE(SCLK_WIFI, "sclk_wifi", mux_pll_src_cpll_gpll_usb480m_p, 0, -+ RK2928_CLKSEL_CON(23), 5, 2, MFLAGS, 0, 6, DFLAGS, -+ RK2928_CLKGATE_CON(2), 15, GFLAGS), ++ pr_debug("%s: pll %s: rate params do not match rate table, adjusting\n", ++ __func__, __clk_get_name(hw->clk)); ++ rockchip_rk3399_pll_set_params(pll, rate); ++ } + -+ COMPOSITE(SCLK_SDMMC, "sclk_sdmmc", mux_mmc_src_p, 0, -+ RK2928_CLKSEL_CON(11), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK2928_CLKGATE_CON(2), 11, GFLAGS), ++ return 0; ++} + -+ COMPOSITE_NODIV(SCLK_SDIO_SRC, "sclk_sdio_src", mux_mmc_src_p, 0, -+ RK2928_CLKSEL_CON(11), 10, 2, MFLAGS, -+ RK2928_CLKGATE_CON(2), 13, GFLAGS), -+ DIV(SCLK_SDIO, "sclk_sdio", "sclk_sdio_src", 0, -+ RK2928_CLKSEL_CON(12), 0, 8, DFLAGS), ++static const struct clk_ops rockchip_rk3399_pll_clk_norate_ops = { ++ .recalc_rate = rockchip_rk3399_pll_recalc_rate, ++ .enable = rockchip_rk3399_pll_enable, ++ .disable = rockchip_rk3399_pll_disable, ++ .is_enabled = rockchip_rk3399_pll_is_enabled, ++}; + -+ COMPOSITE_NODIV(0, "sclk_emmc_src", mux_mmc_src_p, 0, -+ RK2928_CLKSEL_CON(11), 12, 2, MFLAGS, -+ RK2928_CLKGATE_CON(2), 14, GFLAGS), -+ DIV(SCLK_EMMC, "sclk_emmc", "sclk_emmc_src", 0, -+ RK2928_CLKSEL_CON(12), 8, 8, DFLAGS), ++static const struct clk_ops rockchip_rk3399_pll_clk_ops = { ++ .recalc_rate = rockchip_rk3399_pll_recalc_rate, ++ .round_rate = rockchip_pll_round_rate, ++ .set_rate = rockchip_rk3399_pll_set_rate, ++ .enable = rockchip_rk3399_pll_enable, ++ .disable = rockchip_rk3399_pll_disable, ++ .is_enabled = rockchip_rk3399_pll_is_enabled, ++ .init = rockchip_rk3399_pll_init, ++}; + -+ /* -+ * Clock-Architecture Diagram 2 -+ */ ++/** ++ * PLL used in RK3588 ++ */ + -+ GATE(0, "gpll_vop", "gpll", 0, -+ RK2928_CLKGATE_CON(3), 1, GFLAGS), -+ GATE(0, "cpll_vop", "cpll", 0, -+ RK2928_CLKGATE_CON(3), 1, GFLAGS), -+ MUX(0, "sclk_vop_src", mux_sclk_vop_src_p, 0, -+ RK2928_CLKSEL_CON(27), 0, 1, MFLAGS), -+ DIV(DCLK_HDMI_PHY, "dclk_hdmiphy", "sclk_vop_src", 0, -+ RK2928_CLKSEL_CON(29), 0, 3, DFLAGS), -+ DIV(0, "sclk_vop_pre", "sclk_vop_src", 0, -+ RK2928_CLKSEL_CON(27), 8, 8, DFLAGS), -+ MUX(DCLK_VOP, "dclk_vop", mux_dclk_vop_p, 0, -+ RK2928_CLKSEL_CON(27), 1, 1, MFLAGS), ++#define RK3588_PLLCON(i) (i * 0x4) ++#define RK3588_PLLCON0_M_MASK 0x3ff ++#define RK3588_PLLCON0_M_SHIFT 0 ++#define RK3588_PLLCON1_P_MASK 0x3f ++#define RK3588_PLLCON1_P_SHIFT 0 ++#define RK3588_PLLCON1_S_MASK 0x7 ++#define RK3588_PLLCON1_S_SHIFT 6 ++#define RK3588_PLLCON2_K_MASK 0xffff ++#define RK3588_PLLCON2_K_SHIFT 0 ++#define RK3588_PLLCON1_PWRDOWN BIT(13) ++#define RK3588_PLLCON6_LOCK_STATUS BIT(15) + -+ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), ++static int rockchip_rk3588_pll_wait_lock(struct rockchip_clk_pll *pll) ++{ ++ u32 pllcon; ++ int ret; + -+ COMPOSITE(0, "i2s0_src", mux_pll_src_2plls_p, 0, -+ RK2928_CLKSEL_CON(9), 15, 1, MFLAGS, 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(0), 3, GFLAGS), -+ COMPOSITE_FRACMUX(0, "i2s0_frac", "i2s0_src", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(8), 0, -+ RK2928_CLKGATE_CON(0), 4, GFLAGS, -+ &rk3228_i2s0_fracmux), -+ GATE(SCLK_I2S0, "sclk_i2s0", "i2s0_pre", CLK_SET_RATE_PARENT, -+ RK2928_CLKGATE_CON(0), 5, GFLAGS), ++ /* ++ * Lock time typical 250, max 500 input clock cycles @24MHz ++ * So define a very safe maximum of 1000us, meaning 24000 cycles. ++ */ ++ ret = readl_relaxed_poll_timeout(pll->reg_base + RK3588_PLLCON(6), ++ pllcon, ++ pllcon & RK3588_PLLCON6_LOCK_STATUS, ++ 0, 1000); ++ if (ret) ++ pr_err("%s: timeout waiting for pll to lock\n", __func__); + -+ COMPOSITE(0, "i2s1_src", mux_pll_src_2plls_p, 0, -+ RK2928_CLKSEL_CON(3), 15, 1, MFLAGS, 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(0), 10, GFLAGS), -+ COMPOSITE_FRACMUX(0, "i2s1_frac", "i2s1_src", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(7), 0, -+ RK2928_CLKGATE_CON(0), 11, GFLAGS, -+ &rk3228_i2s1_fracmux), -+ GATE(SCLK_I2S1, "sclk_i2s1", "i2s1_pre", CLK_SET_RATE_PARENT, -+ RK2928_CLKGATE_CON(0), 14, GFLAGS), -+ COMPOSITE_NODIV(SCLK_I2S_OUT, "i2s_out", mux_i2s_out_p, 0, -+ RK2928_CLKSEL_CON(3), 12, 1, MFLAGS, -+ RK2928_CLKGATE_CON(0), 13, GFLAGS), ++ return ret; ++} + -+ COMPOSITE(0, "i2s2_src", mux_pll_src_2plls_p, 0, -+ RK2928_CLKSEL_CON(16), 15, 1, MFLAGS, 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(0), 7, GFLAGS), -+ COMPOSITE_FRACMUX(0, "i2s2_frac", "i2s2_src", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(30), 0, -+ RK2928_CLKGATE_CON(0), 8, GFLAGS, -+ &rk3228_i2s2_fracmux), -+ GATE(SCLK_I2S2, "sclk_i2s2", "i2s2_pre", CLK_SET_RATE_PARENT, -+ RK2928_CLKGATE_CON(0), 9, GFLAGS), ++static long rockchip_rk3588_pll_round_rate(struct clk_hw *hw, ++ unsigned long drate, unsigned long *prate) ++{ ++ if ((drate < 37 * MHZ) || (drate > 4500 * MHZ)) ++ return -EINVAL; ++ else ++ return drate; ++} + -+ COMPOSITE(0, "sclk_spdif_src", mux_pll_src_2plls_p, 0, -+ RK2928_CLKSEL_CON(6), 15, 1, MFLAGS, 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(2), 10, GFLAGS), -+ COMPOSITE_FRACMUX(0, "spdif_frac", "sclk_spdif_src", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(20), 0, -+ RK2928_CLKGATE_CON(2), 12, GFLAGS, -+ &rk3228_spdif_fracmux), ++static void rockchip_rk3588_pll_get_params(struct rockchip_clk_pll *pll, ++ struct rockchip_pll_rate_table *rate) ++{ ++ u32 pllcon; + -+ GATE(0, "jtag", "ext_jtag", CLK_IGNORE_UNUSED, -+ RK2928_CLKGATE_CON(1), 3, GFLAGS), ++ pllcon = readl_relaxed(pll->reg_base + RK3588_PLLCON(0)); ++ rate->m = ((pllcon >> RK3588_PLLCON0_M_SHIFT) ++ & RK3588_PLLCON0_M_MASK); + -+ GATE(SCLK_OTGPHY0, "sclk_otgphy0", "xin24m", 0, -+ RK2928_CLKGATE_CON(1), 5, GFLAGS), -+ GATE(SCLK_OTGPHY1, "sclk_otgphy1", "xin24m", 0, -+ RK2928_CLKGATE_CON(1), 6, GFLAGS), ++ pllcon = readl_relaxed(pll->reg_base + RK3588_PLLCON(1)); ++ rate->p = ((pllcon >> RK3588_PLLCON1_P_SHIFT) ++ & RK3588_PLLCON1_P_MASK); ++ rate->s = ((pllcon >> RK3588_PLLCON1_S_SHIFT) ++ & RK3588_PLLCON1_S_MASK); + -+ COMPOSITE_NOMUX(SCLK_TSADC, "sclk_tsadc", "xin24m", 0, -+ RK2928_CLKSEL_CON(24), 6, 10, DFLAGS, -+ RK2928_CLKGATE_CON(2), 8, GFLAGS), ++ pllcon = readl_relaxed(pll->reg_base + RK3588_PLLCON(2)); ++ rate->k = ((pllcon >> RK3588_PLLCON2_K_SHIFT) ++ & RK3588_PLLCON2_K_MASK); ++} + -+ COMPOSITE(0, "aclk_gpu_pre", mux_pll_src_4plls_p, 0, -+ RK2928_CLKSEL_CON(34), 5, 2, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(3), 13, GFLAGS), ++static unsigned long rockchip_rk3588_pll_recalc_rate(struct clk_hw *hw, ++ unsigned long prate) ++{ ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ struct rockchip_pll_rate_table cur; ++ u64 rate64 = prate, postdiv; + -+ COMPOSITE(SCLK_SPI0, "sclk_spi0", mux_pll_src_2plls_p, 0, -+ RK2928_CLKSEL_CON(25), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(2), 9, GFLAGS), ++ if (pll->sel && pll->scaling) ++ return pll->scaling; + -+ /* PD_UART */ -+ COMPOSITE(0, "uart0_src", mux_pll_src_cpll_gpll_usb480m_p, 0, -+ RK2928_CLKSEL_CON(13), 12, 2, MFLAGS, 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(1), 8, GFLAGS), -+ COMPOSITE(0, "uart1_src", mux_pll_src_cpll_gpll_usb480m_p, 0, -+ RK2928_CLKSEL_CON(14), 12, 2, MFLAGS, 0, 7, DFLAGS, -+ RK2928_CLKGATE_CON(1), 10, GFLAGS), -+ COMPOSITE(0, "uart2_src", mux_pll_src_cpll_gpll_usb480m_p, -+ 0, RK2928_CLKSEL_CON(15), 12, 2, -+ MFLAGS, 0, 7, DFLAGS, RK2928_CLKGATE_CON(1), 12, GFLAGS), -+ COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(17), 0, -+ RK2928_CLKGATE_CON(1), 9, GFLAGS, -+ &rk3228_uart0_fracmux), -+ COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_src", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(18), 0, -+ RK2928_CLKGATE_CON(1), 11, GFLAGS, -+ &rk3228_uart1_fracmux), -+ COMPOSITE_FRACMUX(0, "uart2_frac", "uart2_src", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(19), 0, -+ RK2928_CLKGATE_CON(1), 13, GFLAGS, -+ &rk3228_uart2_fracmux), ++ rockchip_rk3588_pll_get_params(pll, &cur); ++ if (cur.p == 0) ++ return prate; + -+ COMPOSITE(SCLK_NANDC, "sclk_nandc", mux_pll_src_2plls_p, 0, -+ RK2928_CLKSEL_CON(2), 14, 1, MFLAGS, 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(1), 0, GFLAGS), ++ rate64 *= cur.m; ++ do_div(rate64, cur.p); + -+ COMPOSITE(SCLK_MAC_SRC, "sclk_gmac_src", mux_pll_src_2plls_p, 0, -+ RK2928_CLKSEL_CON(5), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK2928_CLKGATE_CON(1), 7, GFLAGS), -+ MUX(SCLK_MAC_EXTCLK, "sclk_mac_extclk", mux_sclk_mac_extclk_p, 0, -+ RK2928_CLKSEL_CON(29), 10, 1, MFLAGS), -+ MUX(SCLK_MAC, "sclk_gmac_pre", mux_sclk_gmac_pre_p, 0, -+ RK2928_CLKSEL_CON(5), 5, 1, MFLAGS), -+ GATE(SCLK_MAC_REFOUT, "sclk_mac_refout", "sclk_gmac_pre", 0, -+ RK2928_CLKGATE_CON(5), 4, GFLAGS), -+ GATE(SCLK_MAC_REF, "sclk_mac_ref", "sclk_gmac_pre", 0, -+ RK2928_CLKGATE_CON(5), 3, GFLAGS), -+ GATE(SCLK_MAC_RX, "sclk_mac_rx", "sclk_gmac_pre", 0, -+ RK2928_CLKGATE_CON(5), 5, GFLAGS), -+ GATE(SCLK_MAC_TX, "sclk_mac_tx", "sclk_gmac_pre", 0, -+ RK2928_CLKGATE_CON(5), 6, GFLAGS), -+ COMPOSITE(SCLK_MAC_PHY, "sclk_macphy", mux_sclk_macphy_p, 0, -+ RK2928_CLKSEL_CON(29), 12, 1, MFLAGS, 8, 2, DFLAGS, -+ RK2928_CLKGATE_CON(5), 7, GFLAGS), -+ COMPOSITE(SCLK_MAC_OUT, "sclk_gmac_out", mux_pll_src_2plls_p, 0, -+ RK2928_CLKSEL_CON(5), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK2928_CLKGATE_CON(2), 2, GFLAGS), ++ if (cur.k & BIT(15)) { ++ /* fractional mode */ ++ u64 frac_rate64; + -+ /* -+ * Clock-Architecture Diagram 3 -+ */ ++ cur.k = (~(cur.k - 1)) & RK3588_PLLCON2_K_MASK; ++ frac_rate64 = prate * cur.k; ++ postdiv = cur.p; ++ postdiv *= 65536; ++ do_div(frac_rate64, postdiv); ++ rate64 -= frac_rate64; ++ } else { ++ /* fractional mode */ ++ u64 frac_rate64 = prate * cur.k; + -+ /* PD_VOP */ -+ GATE(ACLK_RGA, "aclk_rga", "aclk_rga_pre", 0, RK2928_CLKGATE_CON(13), 0, GFLAGS), -+ GATE(0, "aclk_rga_noc", "aclk_rga_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(13), 11, GFLAGS), -+ GATE(ACLK_IEP, "aclk_iep", "aclk_iep_pre", 0, RK2928_CLKGATE_CON(13), 2, GFLAGS), -+ GATE(0, "aclk_iep_noc", "aclk_iep_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(13), 9, GFLAGS), ++ postdiv = cur.p; ++ postdiv *= 65536; ++ do_div(frac_rate64, postdiv); ++ rate64 += frac_rate64; ++ } ++ rate64 = rate64 >> cur.s; + -+ GATE(ACLK_VOP, "aclk_vop", "aclk_vop_pre", 0, RK2928_CLKGATE_CON(13), 5, GFLAGS), -+ GATE(0, "aclk_vop_noc", "aclk_vop_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(13), 12, GFLAGS), ++ return (unsigned long)rate64; ++} + -+ GATE(ACLK_HDCP, "aclk_hdcp", "aclk_hdcp_pre", 0, RK2928_CLKGATE_CON(14), 10, GFLAGS), -+ GATE(0, "aclk_hdcp_noc", "aclk_hdcp_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(13), 10, GFLAGS), ++static int rockchip_rk3588_pll_set_params(struct rockchip_clk_pll *pll, ++ const struct rockchip_pll_rate_table *rate) ++{ ++ const struct clk_ops *pll_mux_ops = pll->pll_mux_ops; ++ struct clk_mux *pll_mux = &pll->pll_mux; ++ struct rockchip_pll_rate_table cur; ++ int rate_change_remuxed = 0; ++ int cur_parent; ++ int ret; + -+ GATE(HCLK_RGA, "hclk_rga", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 1, GFLAGS), -+ GATE(HCLK_IEP, "hclk_iep", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 3, GFLAGS), -+ GATE(HCLK_VOP, "hclk_vop", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 6, GFLAGS), -+ GATE(0, "hclk_vio_ahb_arbi", "hclk_vio_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(13), 7, GFLAGS), -+ GATE(0, "hclk_vio_noc", "hclk_vio_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(13), 8, GFLAGS), -+ GATE(0, "hclk_vop_noc", "hclk_vio_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(13), 13, GFLAGS), -+ GATE(HCLK_VIO_H2P, "hclk_vio_h2p", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 7, GFLAGS), -+ GATE(HCLK_HDCP_MMU, "hclk_hdcp_mmu", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 12, GFLAGS), -+ GATE(PCLK_HDMI_CTRL, "pclk_hdmi_ctrl", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 6, GFLAGS), -+ GATE(PCLK_VIO_H2P, "pclk_vio_h2p", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 8, GFLAGS), -+ GATE(PCLK_HDCP, "pclk_hdcp", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 11, GFLAGS), ++ pr_debug("%s: rate settings for %lu p: %d, m: %d, s: %d, k: %d\n", ++ __func__, rate->rate, rate->p, rate->m, rate->s, rate->k); + -+ /* PD_PERI */ -+ GATE(0, "aclk_peri_noc", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(12), 0, GFLAGS), -+ GATE(ACLK_GMAC, "aclk_gmac", "aclk_peri", 0, RK2928_CLKGATE_CON(11), 4, GFLAGS), ++ rockchip_rk3588_pll_get_params(pll, &cur); ++ cur.rate = 0; + -+ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK2928_CLKGATE_CON(11), 0, GFLAGS), -+ GATE(HCLK_SDIO, "hclk_sdio", "hclk_peri", 0, RK2928_CLKGATE_CON(11), 1, GFLAGS), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, RK2928_CLKGATE_CON(11), 2, GFLAGS), -+ GATE(HCLK_NANDC, "hclk_nandc", "hclk_peri", 0, RK2928_CLKGATE_CON(11), 3, GFLAGS), -+ GATE(HCLK_HOST0, "hclk_host0", "hclk_peri", 0, RK2928_CLKGATE_CON(11), 6, GFLAGS), -+ GATE(0, "hclk_host0_arb", "hclk_peri", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(11), 7, GFLAGS), -+ GATE(HCLK_HOST1, "hclk_host1", "hclk_peri", 0, RK2928_CLKGATE_CON(11), 8, GFLAGS), -+ GATE(0, "hclk_host1_arb", "hclk_peri", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(11), 9, GFLAGS), -+ GATE(HCLK_HOST2, "hclk_host2", "hclk_peri", 0, RK2928_CLKGATE_CON(11), 10, GFLAGS), -+ GATE(HCLK_OTG, "hclk_otg", "hclk_peri", 0, RK2928_CLKGATE_CON(11), 12, GFLAGS), -+ GATE(0, "hclk_otg_pmu", "hclk_peri", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(11), 13, GFLAGS), -+ GATE(0, "hclk_host2_arb", "hclk_peri", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(11), 14, GFLAGS), -+ GATE(0, "hclk_peri_noc", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(12), 1, GFLAGS), ++ if (pll->type == pll_rk3588) { ++ cur_parent = pll_mux_ops->get_parent(&pll_mux->hw); ++ if (cur_parent == PLL_MODE_NORM) { ++ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_SLOW); ++ rate_change_remuxed = 1; ++ } ++ } + -+ GATE(PCLK_GMAC, "pclk_gmac", "pclk_peri", 0, RK2928_CLKGATE_CON(11), 5, GFLAGS), -+ GATE(0, "pclk_peri_noc", "pclk_peri", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(12), 2, GFLAGS), ++ /* set pll power down */ ++ writel(HIWORD_UPDATE(RK3588_PLLCON1_PWRDOWN, ++ RK3588_PLLCON1_PWRDOWN, 0), ++ pll->reg_base + RK3588_PLLCON(1)); + -+ /* PD_GPU */ -+ GATE(ACLK_GPU, "aclk_gpu", "aclk_gpu_pre", 0, RK2928_CLKGATE_CON(7), 14, GFLAGS), -+ GATE(0, "aclk_gpu_noc", "aclk_gpu_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(7), 15, GFLAGS), ++ /* update pll values */ ++ writel_relaxed(HIWORD_UPDATE(rate->m, RK3588_PLLCON0_M_MASK, ++ RK3588_PLLCON0_M_SHIFT), ++ pll->reg_base + RK3588_PLLCON(0)); + -+ /* PD_BUS */ -+ GATE(0, "sclk_initmem_mbist", "aclk_cpu", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(8), 1, GFLAGS), -+ GATE(0, "aclk_initmem", "aclk_cpu", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(8), 0, GFLAGS), -+ GATE(ACLK_DMAC, "aclk_dmac_bus", "aclk_cpu", 0, RK2928_CLKGATE_CON(8), 2, GFLAGS), -+ GATE(0, "aclk_bus_noc", "aclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(10), 1, GFLAGS), ++ writel_relaxed(HIWORD_UPDATE(rate->p, RK3588_PLLCON1_P_MASK, ++ RK3588_PLLCON1_P_SHIFT) | ++ HIWORD_UPDATE(rate->s, RK3588_PLLCON1_S_MASK, ++ RK3588_PLLCON1_S_SHIFT), ++ pll->reg_base + RK3588_PLLCON(1)); + -+ GATE(0, "hclk_rom", "hclk_cpu", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(8), 3, GFLAGS), -+ GATE(HCLK_I2S0_8CH, "hclk_i2s0_8ch", "hclk_cpu", 0, RK2928_CLKGATE_CON(8), 7, GFLAGS), -+ GATE(HCLK_I2S1_8CH, "hclk_i2s1_8ch", "hclk_cpu", 0, RK2928_CLKGATE_CON(8), 8, GFLAGS), -+ GATE(HCLK_I2S2_2CH, "hclk_i2s2_2ch", "hclk_cpu", 0, RK2928_CLKGATE_CON(8), 9, GFLAGS), -+ GATE(HCLK_SPDIF_8CH, "hclk_spdif_8ch", "hclk_cpu", 0, RK2928_CLKGATE_CON(8), 10, GFLAGS), -+ GATE(HCLK_TSP, "hclk_tsp", "hclk_cpu", 0, RK2928_CLKGATE_CON(10), 11, GFLAGS), -+ GATE(HCLK_M_CRYPTO, "hclk_crypto_mst", "hclk_cpu", 0, RK2928_CLKGATE_CON(8), 11, GFLAGS), -+ GATE(HCLK_S_CRYPTO, "hclk_crypto_slv", "hclk_cpu", 0, RK2928_CLKGATE_CON(8), 12, GFLAGS), ++ writel_relaxed(HIWORD_UPDATE(rate->k, RK3588_PLLCON2_K_MASK, ++ RK3588_PLLCON2_K_SHIFT), ++ pll->reg_base + RK3588_PLLCON(2)); + -+ GATE(0, "pclk_ddrupctl", "pclk_ddr_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(8), 4, GFLAGS), -+ GATE(0, "pclk_ddrmon", "pclk_ddr_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(8), 6, GFLAGS), -+ GATE(0, "pclk_msch_noc", "pclk_ddr_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(10), 2, GFLAGS), ++ /* set pll power up */ ++ writel(HIWORD_UPDATE(0, ++ RK3588_PLLCON1_PWRDOWN, 0), ++ pll->reg_base + RK3588_PLLCON(1)); + -+ GATE(PCLK_EFUSE_1024, "pclk_efuse_1024", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 13, GFLAGS), -+ GATE(PCLK_EFUSE_256, "pclk_efuse_256", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 14, GFLAGS), -+ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 15, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 0, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 1, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 2, GFLAGS), -+ GATE(PCLK_TIMER, "pclk_timer0", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 4, GFLAGS), -+ GATE(0, "pclk_stimer", "pclk_cpu", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(9), 5, GFLAGS), -+ GATE(PCLK_SPI0, "pclk_spi0", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 6, GFLAGS), -+ GATE(PCLK_PWM, "pclk_rk_pwm", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 7, GFLAGS), -+ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 8, GFLAGS), -+ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 9, GFLAGS), -+ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 10, GFLAGS), -+ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 11, GFLAGS), -+ GATE(PCLK_UART0, "pclk_uart0", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 12, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 13, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 14, GFLAGS), -+ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 15, GFLAGS), -+ GATE(PCLK_GRF, "pclk_grf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(10), 0, GFLAGS), -+ GATE(0, "pclk_cru", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(10), 1, GFLAGS), -+ GATE(0, "pclk_sgrf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(10), 2, GFLAGS), -+ GATE(0, "pclk_sim", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(10), 3, GFLAGS), ++ /* wait for the pll to lock */ ++ ret = rockchip_rk3588_pll_wait_lock(pll); ++ if (ret) { ++ pr_warn("%s: pll update unsuccessful, trying to restore old params\n", ++ __func__); ++ rockchip_rk3588_pll_set_params(pll, &cur); ++ } + -+ GATE(0, "pclk_ddrphy", "pclk_phy_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(10), 3, GFLAGS), -+ GATE(0, "pclk_acodecphy", "pclk_phy_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(10), 5, GFLAGS), -+ GATE(PCLK_HDMI_PHY, "pclk_hdmiphy", "pclk_phy_pre", 0, RK2928_CLKGATE_CON(10), 7, GFLAGS), -+ GATE(0, "pclk_vdacphy", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(10), 8, GFLAGS), -+ GATE(0, "pclk_phy_noc", "pclk_phy_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(10), 9, GFLAGS), ++ if ((pll->type == pll_rk3588) && rate_change_remuxed) ++ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_NORM); + -+ GATE(ACLK_VPU, "aclk_vpu", "aclk_vpu_pre", 0, RK2928_CLKGATE_CON(15), 0, GFLAGS), -+ GATE(0, "aclk_vpu_noc", "aclk_vpu_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(15), 4, GFLAGS), -+ GATE(ACLK_RKVDEC, "aclk_rkvdec", "aclk_rkvdec_pre", 0, RK2928_CLKGATE_CON(15), 2, GFLAGS), -+ GATE(0, "aclk_rkvdec_noc", "aclk_rkvdec_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(15), 6, GFLAGS), -+ GATE(HCLK_VPU, "hclk_vpu", "hclk_vpu_pre", 0, RK2928_CLKGATE_CON(15), 1, GFLAGS), -+ GATE(0, "hclk_vpu_noc", "hclk_vpu_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(15), 5, GFLAGS), -+ GATE(HCLK_RKVDEC, "hclk_rkvdec", "hclk_rkvdec_pre", 0, RK2928_CLKGATE_CON(15), 3, GFLAGS), -+ GATE(0, "hclk_rkvdec_noc", "hclk_rkvdec_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(15), 7, GFLAGS), ++ return ret; ++} + -+ /* PD_MMC */ -+ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "sclk_sdmmc", RK3228_SDMMC_CON0, 1), -+ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RK3228_SDMMC_CON1, 1), ++static int rockchip_rk3588_pll_set_rate(struct clk_hw *hw, unsigned long drate, ++ unsigned long prate) ++{ ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ const struct rockchip_pll_rate_table *rate; ++ unsigned long old_rate = rockchip_rk3588_pll_recalc_rate(hw, prate); ++ int ret; + -+ MMC(SCLK_SDIO_DRV, "sdio_drv", "sclk_sdio", RK3228_SDIO_CON0, 1), -+ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "sclk_sdio", RK3228_SDIO_CON1, 1), ++ pr_debug("%s: changing %s from %lu to %lu with a parent rate of %lu\n", ++ __func__, __clk_get_name(hw->clk), old_rate, drate, prate); + -+ MMC(SCLK_EMMC_DRV, "emmc_drv", "sclk_emmc", RK3228_EMMC_CON0, 1), -+ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "sclk_emmc", RK3228_EMMC_CON1, 1), -+}; ++ /* Get required rate settings from table */ ++ rate = rockchip_get_pll_settings(pll, drate); ++ if (!rate) { ++ pr_err("%s: Invalid rate : %lu for pll clk %s\n", __func__, ++ drate, __clk_get_name(hw->clk)); ++ return -EINVAL; ++ } + -+static void __iomem *rk3228_cru_base; ++ ret = rockchip_rk3588_pll_set_params(pll, rate); ++ if (ret) ++ pll->scaling = 0; + -+static void rk3228_dump_cru(void) -+{ -+ if (rk3228_cru_base) { -+ pr_warn("CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk3228_cru_base, -+ 0x1f8, false); -+ } ++ return ret; +} + -+static void __init rk3228_clk_init(struct device_node *np) ++static int rockchip_rk3588_pll_enable(struct clk_hw *hw) +{ -+ struct rockchip_clk_provider *ctx; -+ void __iomem *reg_base; -+ struct clk **clks; ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ const struct clk_ops *pll_mux_ops = pll->pll_mux_ops; ++ struct clk_mux *pll_mux = &pll->pll_mux; + -+ reg_base = of_iomap(np, 0); -+ if (!reg_base) { -+ pr_err("%s: could not map cru region\n", __func__); -+ return; -+ } ++ writel(HIWORD_UPDATE(0, RK3588_PLLCON1_PWRDOWN, 0), ++ pll->reg_base + RK3588_PLLCON(1)); ++ rockchip_rk3588_pll_wait_lock(pll); + -+ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); -+ if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip clk init failed\n", __func__); -+ iounmap(reg_base); -+ return; -+ } -+ clks = ctx->clk_data.clks; ++ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_NORM); + -+ rockchip_clk_register_plls(ctx, rk3228_pll_clks, -+ ARRAY_SIZE(rk3228_pll_clks), -+ RK3228_GRF_SOC_STATUS0); -+ rockchip_clk_register_branches(ctx, rk3228_clk_branches, -+ ARRAY_SIZE(rk3228_clk_branches)); ++ return 0; ++} + -+ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", -+ 3, clks[PLL_APLL], clks[PLL_GPLL], -+ &rk3228_cpuclk_data, rk3228_cpuclk_rates, -+ ARRAY_SIZE(rk3228_cpuclk_rates)); ++static void rockchip_rk3588_pll_disable(struct clk_hw *hw) ++{ ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ const struct clk_ops *pll_mux_ops = pll->pll_mux_ops; ++ struct clk_mux *pll_mux = &pll->pll_mux; + -+ rockchip_register_softrst(np, 9, reg_base + RK2928_SOFTRST_CON(0), -+ ROCKCHIP_SOFTRST_HIWORD_MASK); ++ pll_mux_ops->set_parent(&pll_mux->hw, PLL_MODE_SLOW); + -+ rockchip_register_restart_notifier(ctx, RK3228_GLB_SRST_FST, NULL); ++ writel(HIWORD_UPDATE(RK3588_PLLCON1_PWRDOWN, ++ RK3588_PLLCON1_PWRDOWN, 0), ++ pll->reg_base + RK3588_PLLCON(1)); ++} + -+ rockchip_clk_of_add_provider(np, ctx); ++static int rockchip_rk3588_pll_is_enabled(struct clk_hw *hw) ++{ ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); ++ u32 pllcon = readl(pll->reg_base + RK3588_PLLCON(1)); + -+ if (!rk_dump_cru) { -+ rk3228_cru_base = reg_base; -+ rk_dump_cru = rk3228_dump_cru; -+ } ++ return !(pllcon & RK3588_PLLCON1_PWRDOWN); +} -+CLK_OF_DECLARE(rk3228_cru, "rockchip,rk3228-cru", rk3228_clk_init); + -+static int __init clk_rk3228_probe(struct platform_device *pdev) ++static int rockchip_rk3588_pll_init(struct clk_hw *hw) +{ -+ struct device_node *np = pdev->dev.of_node; ++ struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw); + -+ rk3228_clk_init(np); ++ if (!(pll->flags & ROCKCHIP_PLL_SYNC_RATE)) ++ return 0; + + return 0; +} + -+static const struct of_device_id clk_rk3228_match_table[] = { -+ { -+ .compatible = "rockchip,rk3228-cru", -+ }, -+ { } -+}; -+MODULE_DEVICE_TABLE(of, clk_rk3228_match_table); -+ -+static struct platform_driver clk_rk3228_driver = { -+ .driver = { -+ .name = "clk-rk3228", -+ .of_match_table = clk_rk3228_match_table, -+ }, ++static const struct clk_ops rockchip_rk3588_pll_clk_norate_ops = { ++ .recalc_rate = rockchip_rk3588_pll_recalc_rate, ++ .enable = rockchip_rk3588_pll_enable, ++ .disable = rockchip_rk3588_pll_disable, ++ .is_enabled = rockchip_rk3588_pll_is_enabled, +}; -+builtin_platform_driver_probe(clk_rk3228_driver, clk_rk3228_probe); -+ -+MODULE_DESCRIPTION("Rockchip RK3228 Clock Driver"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/clk/rockchip-oh/clk-rk3288.c b/drivers/clk/rockchip-oh/clk-rk3288.c -new file mode 100644 -index 000000000..f9c4678fc ---- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-rk3288.c -@@ -0,0 +1,1063 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2014 MundoReader S.L. -+ * Author: Heiko Stuebner -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "clk.h" -+#include -+ -+#define RK3288_GRF_SOC_CON(x) (0x244 + x * 4) -+#define RK3288_GRF_SOC_STATUS1 0x284 + -+enum rk3288_variant { -+ RK3288_CRU, -+ RK3288W_CRU, ++static const struct clk_ops rockchip_rk3588_pll_clk_ops = { ++ .recalc_rate = rockchip_rk3588_pll_recalc_rate, ++ .round_rate = rockchip_rk3588_pll_round_rate, ++ .set_rate = rockchip_rk3588_pll_set_rate, ++ .enable = rockchip_rk3588_pll_enable, ++ .disable = rockchip_rk3588_pll_disable, ++ .is_enabled = rockchip_rk3588_pll_is_enabled, ++ .init = rockchip_rk3588_pll_init, +}; + -+enum rk3288_plls { -+ apll, dpll, cpll, gpll, npll, -+}; ++#ifdef CONFIG_ROCKCHIP_CLK_COMPENSATION ++int rockchip_pll_clk_compensation(struct clk *clk, int ppm) ++{ ++ struct clk *parent = clk_get_parent(clk); ++ struct rockchip_clk_pll *pll; ++ static u32 frac, fbdiv, s, p; ++ bool negative; ++ u32 pllcon, pllcon0, pllcon2, fbdiv_mask, frac_mask, frac_shift; ++ u64 fracdiv, m, n; + -+static struct rockchip_pll_rate_table rk3288_pll_rates[] = { -+ RK3066_PLL_RATE(2208000000, 1, 92, 1), -+ RK3066_PLL_RATE(2184000000, 1, 91, 1), -+ RK3066_PLL_RATE(2160000000, 1, 90, 1), -+ RK3066_PLL_RATE(2136000000, 1, 89, 1), -+ RK3066_PLL_RATE(2112000000, 1, 88, 1), -+ RK3066_PLL_RATE(2088000000, 1, 87, 1), -+ RK3066_PLL_RATE(2064000000, 1, 86, 1), -+ RK3066_PLL_RATE(2040000000, 1, 85, 1), -+ RK3066_PLL_RATE(2016000000, 1, 84, 1), -+ RK3066_PLL_RATE(1992000000, 1, 83, 1), -+ RK3066_PLL_RATE(1968000000, 1, 82, 1), -+ RK3066_PLL_RATE(1944000000, 1, 81, 1), -+ RK3066_PLL_RATE(1920000000, 1, 80, 1), -+ RK3066_PLL_RATE(1896000000, 1, 79, 1), -+ RK3066_PLL_RATE(1872000000, 1, 78, 1), -+ RK3066_PLL_RATE(1848000000, 1, 77, 1), -+ RK3066_PLL_RATE(1824000000, 1, 76, 1), -+ RK3066_PLL_RATE(1800000000, 1, 75, 1), -+ RK3066_PLL_RATE(1776000000, 1, 74, 1), -+ RK3066_PLL_RATE(1752000000, 1, 73, 1), -+ RK3066_PLL_RATE(1728000000, 1, 72, 1), -+ RK3066_PLL_RATE(1704000000, 1, 71, 1), -+ RK3066_PLL_RATE(1680000000, 1, 70, 1), -+ RK3066_PLL_RATE(1656000000, 1, 69, 1), -+ RK3066_PLL_RATE(1632000000, 1, 68, 1), -+ RK3066_PLL_RATE(1608000000, 1, 67, 1), -+ RK3066_PLL_RATE(1560000000, 1, 65, 1), -+ RK3066_PLL_RATE(1512000000, 1, 63, 1), -+ RK3066_PLL_RATE(1488000000, 1, 62, 1), -+ RK3066_PLL_RATE(1464000000, 1, 61, 1), -+ RK3066_PLL_RATE(1440000000, 1, 60, 1), -+ RK3066_PLL_RATE(1416000000, 1, 59, 1), -+ RK3066_PLL_RATE(1392000000, 1, 58, 1), -+ RK3066_PLL_RATE(1368000000, 1, 57, 1), -+ RK3066_PLL_RATE(1344000000, 1, 56, 1), -+ RK3066_PLL_RATE(1320000000, 1, 55, 1), -+ RK3066_PLL_RATE(1296000000, 1, 54, 1), -+ RK3066_PLL_RATE(1272000000, 1, 53, 1), -+ RK3066_PLL_RATE(1248000000, 1, 52, 1), -+ RK3066_PLL_RATE(1224000000, 1, 51, 1), -+ RK3066_PLL_RATE(1200000000, 1, 50, 1), -+ RK3066_PLL_RATE(1188000000, 1, 99, 2), -+ RK3066_PLL_RATE(1176000000, 1, 49, 1), -+ RK3066_PLL_RATE(1128000000, 1, 47, 1), -+ RK3066_PLL_RATE(1104000000, 1, 46, 1), -+ RK3066_PLL_RATE(1008000000, 1, 84, 2), -+ RK3066_PLL_RATE( 912000000, 1, 76, 2), -+ RK3066_PLL_RATE( 891000000, 2, 297, 4), -+ RK3066_PLL_RATE( 888000000, 1, 74, 2), -+ RK3066_PLL_RATE( 816000000, 1, 68, 2), -+ RK3066_PLL_RATE( 798000000, 1, 133, 4), -+ RK3066_PLL_RATE( 792000000, 1, 66, 2), -+ RK3066_PLL_RATE( 768000000, 1, 64, 2), -+ RK3066_PLL_RATE( 742500000, 4, 495, 4), -+ RK3066_PLL_RATE( 696000000, 1, 58, 2), -+ RK3066_PLL_RATE_NB(621000000, 1, 207, 8, 1), -+ RK3066_PLL_RATE( 600000000, 1, 50, 2), -+ RK3066_PLL_RATE_NB(594000000, 1, 99, 4, 1), -+ RK3066_PLL_RATE( 552000000, 1, 46, 2), -+ RK3066_PLL_RATE( 504000000, 1, 84, 4), -+ RK3066_PLL_RATE( 500000000, 1, 125, 6), -+ RK3066_PLL_RATE( 456000000, 1, 76, 4), -+ RK3066_PLL_RATE( 428000000, 1, 107, 6), -+ RK3066_PLL_RATE( 408000000, 1, 68, 4), -+ RK3066_PLL_RATE( 400000000, 1, 100, 6), -+ RK3066_PLL_RATE_NB( 394000000, 1, 197, 12, 1), -+ RK3066_PLL_RATE( 384000000, 1, 64, 4), -+ RK3066_PLL_RATE( 360000000, 1, 60, 4), -+ RK3066_PLL_RATE_NB( 356000000, 1, 178, 12, 1), -+ RK3066_PLL_RATE_NB( 324000000, 1, 189, 14, 1), -+ RK3066_PLL_RATE( 312000000, 1, 52, 4), -+ RK3066_PLL_RATE_NB( 308000000, 1, 154, 12, 1), -+ RK3066_PLL_RATE_NB( 303000000, 1, 202, 16, 1), -+ RK3066_PLL_RATE( 300000000, 1, 75, 6), -+ RK3066_PLL_RATE_NB( 297750000, 2, 397, 16, 1), -+ RK3066_PLL_RATE( 297000000, 1, 99, 8), -+ RK3066_PLL_RATE_NB( 293250000, 2, 391, 16, 1), -+ RK3066_PLL_RATE_NB( 292500000, 1, 195, 16, 1), -+ RK3066_PLL_RATE( 273600000, 1, 114, 10), -+ RK3066_PLL_RATE_NB( 273000000, 1, 182, 16, 1), -+ RK3066_PLL_RATE_NB( 270000000, 1, 180, 16, 1), -+ RK3066_PLL_RATE_NB( 266250000, 2, 355, 16, 1), -+ RK3066_PLL_RATE_NB( 256500000, 1, 171, 16, 1), -+ RK3066_PLL_RATE( 252000000, 1, 84, 8), -+ RK3066_PLL_RATE_NB( 250500000, 1, 167, 16, 1), -+ RK3066_PLL_RATE_NB( 243428571, 1, 142, 14, 1), -+ RK3066_PLL_RATE( 238000000, 1, 119, 12), -+ RK3066_PLL_RATE_NB( 219750000, 2, 293, 16, 1), -+ RK3066_PLL_RATE_NB( 216000000, 1, 144, 16, 1), -+ RK3066_PLL_RATE_NB( 213000000, 1, 142, 16, 1), -+ RK3066_PLL_RATE( 195428571, 1, 114, 14), -+ RK3066_PLL_RATE( 160000000, 1, 80, 12), -+ RK3066_PLL_RATE( 157500000, 1, 105, 16), -+ RK3066_PLL_RATE( 148500000, 1, 99, 16), -+ RK3066_PLL_RATE( 126000000, 1, 84, 16), -+ { /* sentinel */ }, -+}; ++ if ((ppm > 1000) || (ppm < -1000)) ++ return -EINVAL; + -+#define RK3288_DIV_ACLK_CORE_M0_MASK 0xf -+#define RK3288_DIV_ACLK_CORE_M0_SHIFT 0 -+#define RK3288_DIV_ACLK_CORE_MP_MASK 0xf -+#define RK3288_DIV_ACLK_CORE_MP_SHIFT 4 -+#define RK3288_DIV_L2RAM_MASK 0x7 -+#define RK3288_DIV_L2RAM_SHIFT 0 -+#define RK3288_DIV_ATCLK_MASK 0x1f -+#define RK3288_DIV_ATCLK_SHIFT 4 -+#define RK3288_DIV_PCLK_DBGPRE_MASK 0x1f -+#define RK3288_DIV_PCLK_DBGPRE_SHIFT 9 ++ if (IS_ERR_OR_NULL(parent)) ++ return -EINVAL; + -+#define RK3288_CLKSEL0(_core_m0, _core_mp) \ -+ { \ -+ .reg = RK3288_CLKSEL_CON(0), \ -+ .val = HIWORD_UPDATE(_core_m0, RK3288_DIV_ACLK_CORE_M0_MASK, \ -+ RK3288_DIV_ACLK_CORE_M0_SHIFT) | \ -+ HIWORD_UPDATE(_core_mp, RK3288_DIV_ACLK_CORE_MP_MASK, \ -+ RK3288_DIV_ACLK_CORE_MP_SHIFT), \ -+ } -+#define RK3288_CLKSEL37(_l2ram, _atclk, _pclk_dbg_pre) \ -+ { \ -+ .reg = RK3288_CLKSEL_CON(37), \ -+ .val = HIWORD_UPDATE(_l2ram, RK3288_DIV_L2RAM_MASK, \ -+ RK3288_DIV_L2RAM_SHIFT) | \ -+ HIWORD_UPDATE(_atclk, RK3288_DIV_ATCLK_MASK, \ -+ RK3288_DIV_ATCLK_SHIFT) | \ -+ HIWORD_UPDATE(_pclk_dbg_pre, \ -+ RK3288_DIV_PCLK_DBGPRE_MASK, \ -+ RK3288_DIV_PCLK_DBGPRE_SHIFT), \ -+ } ++ pll = to_rockchip_clk_pll(__clk_get_hw(parent)); ++ if (!pll) ++ return -EINVAL; + -+#define RK3288_CPUCLK_RATE(_prate, _core_m0, _core_mp, _l2ram, _atclk, _pdbg) \ -+ { \ -+ .prate = _prate, \ -+ .divs = { \ -+ RK3288_CLKSEL0(_core_m0, _core_mp), \ -+ RK3288_CLKSEL37(_l2ram, _atclk, _pdbg), \ -+ }, \ ++ switch (pll->type) { ++ case pll_rk3036: ++ case pll_rk3328: ++ pllcon0 = RK3036_PLLCON(0); ++ pllcon2 = RK3036_PLLCON(2); ++ fbdiv_mask = RK3036_PLLCON0_FBDIV_MASK; ++ frac_mask = RK3036_PLLCON2_FRAC_MASK; ++ frac_shift = RK3036_PLLCON2_FRAC_SHIFT; ++ if (!frac) ++ writel(HIWORD_UPDATE(RK3036_PLLCON1_PLLPDSEL, ++ RK3036_PLLCON1_PLLPDSEL, 0), ++ pll->reg_base + RK3036_PLLCON(1)); ++ break; ++ case pll_rk3066: ++ return -EINVAL; ++ case pll_rk3399: ++ pllcon0 = RK3399_PLLCON(0); ++ pllcon2 = RK3399_PLLCON(2); ++ fbdiv_mask = RK3399_PLLCON0_FBDIV_MASK; ++ frac_mask = RK3399_PLLCON2_FRAC_MASK; ++ frac_shift = RK3399_PLLCON2_FRAC_SHIFT; ++ break; ++ case pll_rk3588: ++ pllcon0 = RK3588_PLLCON(0); ++ pllcon2 = RK3588_PLLCON(2); ++ fbdiv_mask = RK3588_PLLCON0_M_MASK; ++ frac_mask = RK3588_PLLCON2_K_MASK; ++ frac_shift = RK3588_PLLCON2_K_SHIFT; ++ break; ++ default: ++ return -EINVAL; + } + -+static struct rockchip_cpuclk_rate_table rk3288_cpuclk_rates[] __initdata = { -+ RK3288_CPUCLK_RATE(1800000000, 1, 3, 1, 3, 3), -+ RK3288_CPUCLK_RATE(1704000000, 1, 3, 1, 3, 3), -+ RK3288_CPUCLK_RATE(1608000000, 1, 3, 1, 3, 3), -+ RK3288_CPUCLK_RATE(1512000000, 1, 3, 1, 3, 3), -+ RK3288_CPUCLK_RATE(1416000000, 1, 3, 1, 3, 3), -+ RK3288_CPUCLK_RATE(1200000000, 1, 3, 1, 3, 3), -+ RK3288_CPUCLK_RATE(1008000000, 1, 3, 1, 3, 3), -+ RK3288_CPUCLK_RATE( 816000000, 1, 3, 1, 3, 3), -+ RK3288_CPUCLK_RATE( 696000000, 1, 3, 1, 3, 3), -+ RK3288_CPUCLK_RATE( 600000000, 1, 3, 1, 3, 3), -+ RK3288_CPUCLK_RATE( 408000000, 1, 3, 1, 3, 3), -+ RK3288_CPUCLK_RATE( 312000000, 1, 3, 1, 3, 3), -+ RK3288_CPUCLK_RATE( 216000000, 1, 3, 1, 3, 3), -+ RK3288_CPUCLK_RATE( 126000000, 1, 3, 1, 3, 3), -+}; ++ negative = !!(ppm & BIT(31)); ++ ppm = negative ? ~ppm + 1 : ppm; + -+static const struct rockchip_cpuclk_reg_data rk3288_cpuclk_data = { -+ .core_reg[0] = RK3288_CLKSEL_CON(0), -+ .div_core_shift[0] = 8, -+ .div_core_mask[0] = 0x1f, -+ .num_cores = 1, -+ .mux_core_alt = 1, -+ .mux_core_main = 0, -+ .mux_core_shift = 15, -+ .mux_core_mask = 0x1, -+}; ++ switch (pll->type) { ++ case pll_rk3036: ++ case pll_rk3328: ++ case pll_rk3066: ++ case pll_rk3399: ++ /* ++ * delta frac frac ppm ++ * -------------- = (fbdiv + ----------) * --------- ++ * 1 << 24 1 << 24 1000000 ++ * ++ */ ++ if (!frac) { ++ frac = readl_relaxed(pll->reg_base + pllcon2) & frac_mask; ++ fbdiv = readl_relaxed(pll->reg_base + pllcon0) & fbdiv_mask; ++ } ++ m = div64_u64((uint64_t)frac * ppm, 1000000); ++ n = div64_u64((uint64_t)ppm << 24, 1000000) * fbdiv; + -+PNAME(mux_pll_p) = { "xin24m", "xin32k" }; -+PNAME(mux_ddrphy_p) = { "dpll_ddr", "gpll_ddr" }; -+PNAME(mux_aclk_cpu_src_p) = { "cpll_aclk_cpu", "gpll_aclk_cpu" }; ++ fracdiv = negative ? frac - (m + n) : frac + (m + n); + -+PNAME(mux_pll_src_cpll_gpll_p) = { "cpll", "gpll" }; -+PNAME(mux_pll_src_npll_cpll_gpll_p) = { "npll", "cpll", "gpll" }; -+PNAME(mux_pll_src_cpll_gpll_npll_p) = { "cpll", "gpll", "npll" }; -+PNAME(mux_pll_src_cpll_gpll_usb480m_p) = { "cpll", "gpll", "unstable:usbphy480m_src" }; -+PNAME(mux_pll_src_cpll_gll_usb_npll_p) = { "cpll", "gpll", "unstable:usbphy480m_src", "npll" }; ++ if (!frac || fracdiv > frac_mask) ++ return -EINVAL; + -+PNAME(mux_mmc_src_p) = { "cpll", "gpll", "xin24m", "xin24m" }; -+PNAME(mux_i2s_pre_p) = { "i2s_src", "i2s_frac", "ext_i2s", "xin12m" }; -+PNAME(mux_i2s_clkout_p) = { "i2s_pre", "xin12m" }; -+PNAME(mux_spdif_p) = { "spdif_pre", "spdif_frac", "xin12m" }; -+PNAME(mux_spdif_8ch_p) = { "spdif_8ch_pre", "spdif_8ch_frac", "xin12m" }; -+PNAME(mux_uart0_p) = { "uart0_src", "uart0_frac", "xin24m" }; -+PNAME(mux_uart1_p) = { "uart1_src", "uart1_frac", "xin24m" }; -+PNAME(mux_uart2_p) = { "uart2_src", "uart2_frac", "xin24m" }; -+PNAME(mux_uart3_p) = { "uart3_src", "uart3_frac", "xin24m" }; -+PNAME(mux_uart4_p) = { "uart4_src", "uart4_frac", "xin24m" }; -+PNAME(mux_vip_out_p) = { "vip_src", "xin24m" }; -+PNAME(mux_mac_p) = { "mac_pll_src", "ext_gmac" }; -+PNAME(mux_hsadcout_p) = { "hsadc_src", "ext_hsadc" }; -+PNAME(mux_edp_24m_p) = { "ext_edp_24m", "xin24m" }; -+PNAME(mux_tspout_p) = { "cpll", "gpll", "npll", "xin27m" }; ++ pllcon = readl_relaxed(pll->reg_base + pllcon2); ++ pllcon &= ~(frac_mask << frac_shift); ++ pllcon |= fracdiv << frac_shift; ++ writel_relaxed(pllcon, pll->reg_base + pllcon2); ++ break; ++ case pll_rk3588: ++ if (!fbdiv) { ++ frac = readl_relaxed(pll->reg_base + pllcon2) & frac_mask; ++ fbdiv = readl_relaxed(pll->reg_base + pllcon0) & fbdiv_mask; ++ } ++ if (!frac) { ++ pllcon = readl_relaxed(pll->reg_base + RK3588_PLLCON(1)); ++ s = ((pllcon >> RK3588_PLLCON1_S_SHIFT) ++ & RK3588_PLLCON1_S_MASK); ++ p = ((pllcon >> RK3588_PLLCON1_P_SHIFT) ++ & RK3588_PLLCON1_P_MASK); ++ m = div64_u64((uint64_t)clk_get_rate(clk) * ppm, 24000000); ++ n = div64_u64((uint64_t)m * 65536 * p * (1 << s), 1000000); + -+PNAME(mux_aclk_vcodec_pre_p) = { "aclk_vdpu", "aclk_vepu" }; -+PNAME(mux_testout_src_p) = { "aclk_peri", "armclk", "aclk_vio0", "ddrphy", -+ "aclk_vcodec", "aclk_gpu", "sclk_rga", "aclk_cpu", -+ "xin24m", "xin27m", "xin32k", "clk_wifi", -+ "dclk_vop0", "dclk_vop1", "sclk_isp_jpe", -+ "sclk_isp" }; ++ if (n > 32767) ++ return -EINVAL; ++ fracdiv = negative ? ~n + 1 : n; ++ } else if (frac & BIT(15)) { ++ frac = (~(frac - 1)) & RK3588_PLLCON2_K_MASK; ++ m = div64_u64((uint64_t)frac * ppm, 100000); ++ n = div64_u64((uint64_t)ppm * 65536 * fbdiv, 100000); ++ if (negative) { ++ fracdiv = frac + (div64_u64(m + n, 10)); ++ if (fracdiv > 32767) ++ return -EINVAL; ++ fracdiv = ~fracdiv + 1; ++ } else { ++ s = div64_u64(m + n, 10); ++ if (frac >= s) { ++ fracdiv = frac - s; ++ if (fracdiv > 32767) ++ return -EINVAL; ++ fracdiv = ~fracdiv + 1; ++ } else { ++ fracdiv = s - frac; ++ if (fracdiv > 32767) ++ return -EINVAL; ++ } ++ } ++ } else { ++ m = div64_u64((uint64_t)frac * ppm, 100000); ++ n = div64_u64((uint64_t)ppm * 65536 * fbdiv, 100000); ++ if (!negative) { ++ fracdiv = frac + (div64_u64(m + n, 10)); ++ if (fracdiv > 32767) ++ return -EINVAL; ++ } else { ++ s = div64_u64(m + n, 10); ++ if (frac >= s) { ++ fracdiv = frac - s; ++ if (fracdiv > 32767) ++ return -EINVAL; ++ } else { ++ fracdiv = s - frac; ++ if (fracdiv > 32767) ++ return -EINVAL; ++ fracdiv = ~fracdiv + 1; ++ } ++ } ++ } + -+PNAME(mux_usbphy480m_p) = { "sclk_otgphy1_480m", "sclk_otgphy2_480m", -+ "sclk_otgphy0_480m" }; -+PNAME(mux_hsicphy480m_p) = { "cpll", "gpll", "usbphy480m_src" }; -+PNAME(mux_hsicphy12m_p) = { "hsicphy12m_xin12m", "hsicphy12m_usbphy" }; ++ writel_relaxed(HIWORD_UPDATE(fracdiv, frac_mask, frac_shift), ++ pll->reg_base + pllcon2); ++ break; ++ default: ++ return -EINVAL; ++ } + -+static struct rockchip_pll_clock rk3288_pll_clks[] __initdata = { -+ [apll] = PLL(pll_rk3066, PLL_APLL, "apll", mux_pll_p, 0, RK3288_PLL_CON(0), -+ RK3288_MODE_CON, 0, 6, 0, rk3288_pll_rates), -+ [dpll] = PLL(pll_rk3066, PLL_DPLL, "dpll", mux_pll_p, 0, RK3288_PLL_CON(4), -+ RK3288_MODE_CON, 4, 5, 0, NULL), -+ [cpll] = PLL(pll_rk3066, PLL_CPLL, "cpll", mux_pll_p, 0, RK3288_PLL_CON(8), -+ RK3288_MODE_CON, 8, 7, ROCKCHIP_PLL_SYNC_RATE, rk3288_pll_rates), -+ [gpll] = PLL(pll_rk3066, PLL_GPLL, "gpll", mux_pll_p, 0, RK3288_PLL_CON(12), -+ RK3288_MODE_CON, 12, 8, ROCKCHIP_PLL_SYNC_RATE, rk3288_pll_rates), -+ [npll] = PLL(pll_rk3066, PLL_NPLL, "npll", mux_pll_p, 0, RK3288_PLL_CON(16), -+ RK3288_MODE_CON, 14, 9, ROCKCHIP_PLL_SYNC_RATE, rk3288_pll_rates), -+}; ++ return 0; ++} ++EXPORT_SYMBOL(rockchip_pll_clk_compensation); ++#endif + -+static struct clk_div_table div_hclk_cpu_t[] = { -+ { .val = 0, .div = 1 }, -+ { .val = 1, .div = 2 }, -+ { .val = 3, .div = 4 }, -+ { /* sentinel */}, -+}; ++/* ++ * Common registering of pll clocks ++ */ + -+#define MFLAGS CLK_MUX_HIWORD_MASK -+#define DFLAGS CLK_DIVIDER_HIWORD_MASK -+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) -+#define IFLAGS ROCKCHIP_INVERTER_HIWORD_MASK ++struct clk *rockchip_clk_register_pll(struct rockchip_clk_provider *ctx, ++ enum rockchip_pll_type pll_type, ++ const char *name, const char *const *parent_names, ++ u8 num_parents, int con_offset, int grf_lock_offset, ++ int lock_shift, int mode_offset, int mode_shift, ++ struct rockchip_pll_rate_table *rate_table, ++ unsigned long flags, u8 clk_pll_flags) ++{ ++ const char *pll_parents[3]; ++ struct clk_init_data init; ++ struct rockchip_clk_pll *pll; ++ struct clk_mux *pll_mux; ++ struct clk *pll_clk, *mux_clk; ++ char pll_name[20]; + -+static struct rockchip_clk_branch rk3288_i2s_fracmux __initdata = -+ MUX(0, "i2s_pre", mux_i2s_pre_p, CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(4), 8, 2, MFLAGS); ++ if ((pll_type != pll_rk3328 && num_parents != 2) || ++ (pll_type == pll_rk3328 && num_parents != 1)) { ++ pr_err("%s: needs two parent clocks\n", __func__); ++ return ERR_PTR(-EINVAL); ++ } + -+static struct rockchip_clk_branch rk3288_spdif_fracmux __initdata = -+ MUX(0, "spdif_mux", mux_spdif_p, CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(5), 8, 2, MFLAGS); ++ /* name the actual pll */ ++ snprintf(pll_name, sizeof(pll_name), "pll_%s", name); + -+static struct rockchip_clk_branch rk3288_spdif_8ch_fracmux __initdata = -+ MUX(0, "spdif_8ch_mux", mux_spdif_8ch_p, CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(40), 8, 2, MFLAGS); ++ pll = kzalloc(sizeof(*pll), GFP_KERNEL); ++ if (!pll) ++ return ERR_PTR(-ENOMEM); + -+static struct rockchip_clk_branch rk3288_uart0_fracmux __initdata = -+ MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(13), 8, 2, MFLAGS); ++ /* create the mux on top of the real pll */ ++ pll->pll_mux_ops = &clk_mux_ops; ++ pll_mux = &pll->pll_mux; ++ pll_mux->reg = ctx->reg_base + mode_offset; ++ pll_mux->shift = mode_shift; ++ if (pll_type == pll_rk3328) ++ pll_mux->mask = PLL_RK3328_MODE_MASK; ++ else ++ pll_mux->mask = PLL_MODE_MASK; ++ pll_mux->flags = 0; ++ pll_mux->lock = &ctx->lock; ++ pll_mux->hw.init = &init; ++ pll_mux->flags |= CLK_MUX_HIWORD_MASK; + -+static struct rockchip_clk_branch rk3288_uart1_fracmux __initdata = -+ MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(14), 8, 2, MFLAGS); ++ /* the actual muxing is xin24m, pll-output, xin32k */ ++ pll_parents[0] = parent_names[0]; ++ pll_parents[1] = pll_name; ++ pll_parents[2] = parent_names[1]; + -+static struct rockchip_clk_branch rk3288_uart2_fracmux __initdata = -+ MUX(SCLK_UART2, "sclk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(15), 8, 2, MFLAGS); ++ init.name = name; ++ init.flags = CLK_SET_RATE_PARENT; ++ init.ops = pll->pll_mux_ops; ++ init.parent_names = pll_parents; ++ if (pll_type == pll_rk3328) ++ init.num_parents = 2; ++ else ++ init.num_parents = ARRAY_SIZE(pll_parents); + -+static struct rockchip_clk_branch rk3288_uart3_fracmux __initdata = -+ MUX(SCLK_UART3, "sclk_uart3", mux_uart3_p, CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(16), 8, 2, MFLAGS); ++ mux_clk = clk_register(NULL, &pll_mux->hw); ++ if (IS_ERR(mux_clk)) ++ goto err_mux; + -+static struct rockchip_clk_branch rk3288_uart4_fracmux __initdata = -+ MUX(SCLK_UART4, "sclk_uart4", mux_uart4_p, CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(3), 8, 2, MFLAGS); ++ /* now create the actual pll */ ++ init.name = pll_name; + -+static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = { -+ /* -+ * Clock-Architecture Diagram 1 -+ */ ++#ifndef CONFIG_ROCKCHIP_LOW_PERFORMANCE ++ if (clk_pll_flags & ROCKCHIP_PLL_ALLOW_POWER_DOWN) ++ init.flags = flags; ++ else ++ /* keep all plls untouched for now */ ++ init.flags = flags | CLK_IGNORE_UNUSED; ++#else ++ init.flags = flags; ++#endif + -+ GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED, -+ RK3288_CLKGATE_CON(0), 1, GFLAGS), -+ GATE(0, "gpll_core", "gpll", CLK_IGNORE_UNUSED, -+ RK3288_CLKGATE_CON(0), 2, GFLAGS), ++ init.parent_names = &parent_names[0]; ++ init.num_parents = 1; + -+ COMPOSITE_NOMUX(0, "armcore0", "armclk", CLK_IGNORE_UNUSED, -+ RK3288_CLKSEL_CON(36), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3288_CLKGATE_CON(12), 0, GFLAGS), -+ COMPOSITE_NOMUX(0, "armcore1", "armclk", CLK_IGNORE_UNUSED, -+ RK3288_CLKSEL_CON(36), 4, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3288_CLKGATE_CON(12), 1, GFLAGS), -+ COMPOSITE_NOMUX(0, "armcore2", "armclk", CLK_IGNORE_UNUSED, -+ RK3288_CLKSEL_CON(36), 8, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3288_CLKGATE_CON(12), 2, GFLAGS), -+ COMPOSITE_NOMUX(0, "armcore3", "armclk", CLK_IGNORE_UNUSED, -+ RK3288_CLKSEL_CON(36), 12, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3288_CLKGATE_CON(12), 3, GFLAGS), -+ COMPOSITE_NOMUX(0, "l2ram", "armclk", CLK_IGNORE_UNUSED, -+ RK3288_CLKSEL_CON(37), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3288_CLKGATE_CON(12), 4, GFLAGS), -+ COMPOSITE_NOMUX(0, "aclk_core_m0", "armclk", CLK_IGNORE_UNUSED, -+ RK3288_CLKSEL_CON(0), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3288_CLKGATE_CON(12), 5, GFLAGS), -+ COMPOSITE_NOMUX(0, "aclk_core_mp", "armclk", CLK_IGNORE_UNUSED, -+ RK3288_CLKSEL_CON(0), 4, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3288_CLKGATE_CON(12), 6, GFLAGS), -+ COMPOSITE_NOMUX(0, "atclk", "armclk", 0, -+ RK3288_CLKSEL_CON(37), 4, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3288_CLKGATE_CON(12), 7, GFLAGS), -+ COMPOSITE_NOMUX(0, "pclk_dbg_pre", "armclk", CLK_IGNORE_UNUSED, -+ RK3288_CLKSEL_CON(37), 9, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3288_CLKGATE_CON(12), 8, GFLAGS), -+ GATE(0, "pclk_dbg", "pclk_dbg_pre", 0, -+ RK3288_CLKGATE_CON(12), 9, GFLAGS), -+ GATE(0, "cs_dbg", "pclk_dbg_pre", CLK_IGNORE_UNUSED, -+ RK3288_CLKGATE_CON(12), 10, GFLAGS), -+ GATE(0, "pclk_core_niu", "pclk_dbg_pre", 0, -+ RK3288_CLKGATE_CON(12), 11, GFLAGS), ++ if (rate_table) { ++ int len; + -+ GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED, -+ RK3288_CLKGATE_CON(0), 8, GFLAGS), -+ GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED, -+ RK3288_CLKGATE_CON(0), 9, GFLAGS), -+ COMPOSITE_DDRCLK(SCLK_DDRCLK, "sclk_ddrc", mux_ddrphy_p, 0, -+ RK3288_CLKSEL_CON(26), 2, 1, 0, 0, -+ ROCKCHIP_DDRCLK_SIP_V2), -+ COMPOSITE_NOGATE(0, "ddrphy", mux_ddrphy_p, CLK_IGNORE_UNUSED, -+ RK3288_CLKSEL_CON(26), 2, 1, MFLAGS, 0, 2, -+ DFLAGS | CLK_DIVIDER_POWER_OF_TWO), ++ /* find count of rates in rate_table */ ++ for (len = 0; rate_table[len].rate != 0; ) ++ len++; + -+ GATE(0, "gpll_aclk_cpu", "gpll", CLK_IS_CRITICAL, -+ RK3288_CLKGATE_CON(0), 10, GFLAGS), -+ GATE(0, "cpll_aclk_cpu", "cpll", CLK_IS_CRITICAL, -+ RK3288_CLKGATE_CON(0), 11, GFLAGS), -+ COMPOSITE_NOGATE(0, "aclk_cpu_src", mux_aclk_cpu_src_p, CLK_IS_CRITICAL, -+ RK3288_CLKSEL_CON(1), 15, 1, MFLAGS, 3, 5, DFLAGS), -+ DIV(0, "aclk_cpu_pre", "aclk_cpu_src", CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(1), 0, 3, DFLAGS), -+ GATE(ACLK_CPU, "aclk_cpu", "aclk_cpu_pre", CLK_IS_CRITICAL, -+ RK3288_CLKGATE_CON(0), 3, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_CPU, "pclk_cpu", "aclk_cpu_pre", CLK_IS_CRITICAL, -+ RK3288_CLKSEL_CON(1), 12, 3, DFLAGS, -+ RK3288_CLKGATE_CON(0), 5, GFLAGS), -+ COMPOSITE_NOMUX_DIVTBL(HCLK_CPU, "hclk_cpu", "aclk_cpu_pre", CLK_IS_CRITICAL, -+ RK3288_CLKSEL_CON(1), 8, 2, DFLAGS, div_hclk_cpu_t, -+ RK3288_CLKGATE_CON(0), 4, GFLAGS), -+ GATE(0, "c2c_host", "aclk_cpu_src", 0, -+ RK3288_CLKGATE_CON(13), 8, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_CRYPTO, "crypto", "aclk_cpu_pre", 0, -+ RK3288_CLKSEL_CON(26), 6, 2, DFLAGS, -+ RK3288_CLKGATE_CON(5), 4, GFLAGS), -+ GATE(0, "aclk_bus_2pmu", "aclk_cpu_pre", CLK_IGNORE_UNUSED, -+ RK3288_CLKGATE_CON(0), 7, GFLAGS), ++ pll->rate_count = len; ++ pll->rate_table = kmemdup(rate_table, ++ pll->rate_count * ++ sizeof(struct rockchip_pll_rate_table), ++ GFP_KERNEL); ++ WARN(!pll->rate_table, ++ "%s: could not allocate rate table for %s\n", ++ __func__, name); ++ } + -+ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), ++ switch (pll_type) { ++ case pll_rk3036: ++ case pll_rk3328: ++ if (!pll->rate_table) ++ init.ops = &rockchip_rk3036_pll_clk_norate_ops; ++ else ++ init.ops = &rockchip_rk3036_pll_clk_ops; ++ break; ++#ifdef CONFIG_ROCKCHIP_PLL_RK3066 ++ case pll_rk3066: ++ if (!pll->rate_table || IS_ERR(ctx->grf)) ++ init.ops = &rockchip_rk3066_pll_clk_norate_ops; ++ else ++ init.ops = &rockchip_rk3066_pll_clk_ops; ++ break; ++#endif ++#ifdef CONFIG_ROCKCHIP_PLL_RK3399 ++ case pll_rk3399: ++ if (!pll->rate_table) ++ init.ops = &rockchip_rk3399_pll_clk_norate_ops; ++ else ++ init.ops = &rockchip_rk3399_pll_clk_ops; ++ break; ++#endif ++#ifdef CONFIG_ROCKCHIP_PLL_RK3588 ++ case pll_rk3588: ++ case pll_rk3588_core: ++ if (!pll->rate_table) ++ init.ops = &rockchip_rk3588_pll_clk_norate_ops; ++ else ++ init.ops = &rockchip_rk3588_pll_clk_ops; ++ init.flags = flags; ++ break; ++#endif ++ default: ++ pr_warn("%s: Unknown pll type for pll clk %s\n", ++ __func__, name); ++ } + -+ COMPOSITE(SCLK_I2S_SRC, "i2s_src", mux_pll_src_cpll_gpll_p, 0, -+ RK3288_CLKSEL_CON(4), 15, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3288_CLKGATE_CON(4), 1, GFLAGS), -+ COMPOSITE_FRACMUX(0, "i2s_frac", "i2s_src", CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(8), 0, -+ RK3288_CLKGATE_CON(4), 2, GFLAGS, -+ &rk3288_i2s_fracmux), -+ COMPOSITE_NODIV(SCLK_I2S0_OUT, "i2s0_clkout", mux_i2s_clkout_p, 0, -+ RK3288_CLKSEL_CON(4), 12, 1, MFLAGS, -+ RK3288_CLKGATE_CON(4), 0, GFLAGS), -+ GATE(SCLK_I2S0, "sclk_i2s0", "i2s_pre", CLK_SET_RATE_PARENT, -+ RK3288_CLKGATE_CON(4), 3, GFLAGS), ++ pll->hw.init = &init; ++ pll->type = pll_type; ++ pll->reg_base = ctx->reg_base + con_offset; ++ pll->lock_offset = grf_lock_offset; ++ pll->lock_shift = lock_shift; ++ pll->flags = clk_pll_flags; ++ pll->lock = &ctx->lock; ++ pll->ctx = ctx; + -+ MUX(0, "spdif_src", mux_pll_src_cpll_gpll_p, 0, -+ RK3288_CLKSEL_CON(5), 15, 1, MFLAGS), -+ COMPOSITE_NOMUX(0, "spdif_pre", "spdif_src", CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(5), 0, 7, DFLAGS, -+ RK3288_CLKGATE_CON(4), 4, GFLAGS), -+ COMPOSITE_FRACMUX(0, "spdif_frac", "spdif_src", CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(9), 0, -+ RK3288_CLKGATE_CON(4), 5, GFLAGS, -+ &rk3288_spdif_fracmux), -+ GATE(SCLK_SPDIF, "sclk_spdif", "spdif_mux", CLK_SET_RATE_PARENT, -+ RK3288_CLKGATE_CON(4), 6, GFLAGS), -+ COMPOSITE_NOMUX(0, "spdif_8ch_pre", "spdif_src", CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(40), 0, 7, DFLAGS, -+ RK3288_CLKGATE_CON(4), 7, GFLAGS), -+ COMPOSITE_FRACMUX(0, "spdif_8ch_frac", "spdif_8ch_pre", CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(41), 0, -+ RK3288_CLKGATE_CON(4), 8, GFLAGS, -+ &rk3288_spdif_8ch_fracmux), -+ GATE(SCLK_SPDIF8CH, "sclk_spdif_8ch", "spdif_8ch_mux", CLK_SET_RATE_PARENT, -+ RK3288_CLKGATE_CON(4), 9, GFLAGS), ++ pll_clk = clk_register(NULL, &pll->hw); ++ if (IS_ERR(pll_clk)) { ++ pr_err("%s: failed to register pll clock %s : %ld\n", ++ __func__, name, PTR_ERR(pll_clk)); ++ goto err_pll; ++ } + -+ GATE(0, "sclk_acc_efuse", "xin24m", 0, -+ RK3288_CLKGATE_CON(0), 12, GFLAGS), ++ return mux_clk; + -+ GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0, -+ RK3288_CLKGATE_CON(1), 0, GFLAGS), -+ GATE(SCLK_TIMER1, "sclk_timer1", "xin24m", 0, -+ RK3288_CLKGATE_CON(1), 1, GFLAGS), -+ GATE(SCLK_TIMER2, "sclk_timer2", "xin24m", 0, -+ RK3288_CLKGATE_CON(1), 2, GFLAGS), -+ GATE(SCLK_TIMER3, "sclk_timer3", "xin24m", 0, -+ RK3288_CLKGATE_CON(1), 3, GFLAGS), -+ GATE(SCLK_TIMER4, "sclk_timer4", "xin24m", 0, -+ RK3288_CLKGATE_CON(1), 4, GFLAGS), -+ GATE(SCLK_TIMER5, "sclk_timer5", "xin24m", 0, -+ RK3288_CLKGATE_CON(1), 5, GFLAGS), ++err_pll: ++ kfree(pll->rate_table); ++ clk_unregister(mux_clk); ++ mux_clk = pll_clk; ++err_mux: ++ kfree(pll); ++ return mux_clk; ++} + -+ /* -+ * Clock-Architecture Diagram 2 -+ */ ++#ifdef CONFIG_ROCKCHIP_CLK_BOOST ++static unsigned long rockchip_pll_con_to_rate(struct rockchip_clk_pll *pll, ++ u32 con0, u32 con1) ++{ ++ switch (pll->type) { ++ case pll_rk3036: ++ case pll_rk3328: ++ return rockchip_rk3036_pll_con_to_rate(pll, con0, con1); ++ case pll_rk3066: ++ break; ++ case pll_rk3399: ++ break; ++ default: ++ pr_warn("%s: Unknown pll type\n", __func__); ++ } + -+ COMPOSITE(0, "aclk_vepu", mux_pll_src_cpll_gpll_usb480m_p, 0, -+ RK3288_CLKSEL_CON(32), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3288_CLKGATE_CON(3), 9, GFLAGS), -+ COMPOSITE(0, "aclk_vdpu", mux_pll_src_cpll_gpll_usb480m_p, 0, -+ RK3288_CLKSEL_CON(32), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3288_CLKGATE_CON(3), 11, GFLAGS), -+ MUXGRF(0, "aclk_vcodec_pre", mux_aclk_vcodec_pre_p, CLK_SET_RATE_PARENT, -+ RK3288_GRF_SOC_CON(0), 7, 1, MFLAGS), -+ GATE(ACLK_VCODEC, "aclk_vcodec", "aclk_vcodec_pre", 0, -+ RK3288_CLKGATE_CON(9), 0, GFLAGS), ++ return 0; ++} + -+ FACTOR_GATE(0, "hclk_vcodec_pre", "aclk_vcodec_pre", 0, 1, 4, -+ RK3288_CLKGATE_CON(3), 10, GFLAGS), ++void rockchip_boost_init(struct clk_hw *hw) ++{ ++ struct rockchip_clk_pll *pll; ++ struct device_node *np; ++ u32 value, con0, con1; + -+ GATE(HCLK_VCODEC, "hclk_vcodec", "hclk_vcodec_pre", 0, -+ RK3288_CLKGATE_CON(9), 1, GFLAGS), ++ if (!hw) ++ return; ++ pll = to_rockchip_clk_pll(hw); ++ np = of_parse_phandle(pll->ctx->cru_node, "rockchip,boost", 0); ++ if (!np) { ++ pr_debug("%s: failed to get boost np\n", __func__); ++ return; ++ } ++ pll->boost = syscon_node_to_regmap(np); ++ if (IS_ERR(pll->boost)) { ++ pr_debug("%s: failed to get boost regmap\n", __func__); ++ return; ++ } + -+ COMPOSITE(ACLK_VIO0, "aclk_vio0", mux_pll_src_cpll_gpll_usb480m_p, CLK_IGNORE_UNUSED, -+ RK3288_CLKSEL_CON(31), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3288_CLKGATE_CON(3), 0, GFLAGS), -+ COMPOSITE(ACLK_VIO1, "aclk_vio1", mux_pll_src_cpll_gpll_usb480m_p, CLK_IGNORE_UNUSED, -+ RK3288_CLKSEL_CON(31), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3288_CLKGATE_CON(3), 2, GFLAGS), ++ if (!of_property_read_u32(np, "rockchip,boost-low-con0", &con0) && ++ !of_property_read_u32(np, "rockchip,boost-low-con1", &con1)) { ++ pr_debug("boost-low-con=0x%x 0x%x\n", con0, con1); ++ regmap_write(pll->boost, BOOST_PLL_L_CON(0), ++ HIWORD_UPDATE(con0, BOOST_PLL_CON_MASK, 0)); ++ regmap_write(pll->boost, BOOST_PLL_L_CON(1), ++ HIWORD_UPDATE(con1, BOOST_PLL_CON_MASK, 0)); ++ pll->boost_low_rate = rockchip_pll_con_to_rate(pll, con0, ++ con1); ++ pr_debug("boost-low-rate=%lu\n", pll->boost_low_rate); ++ } ++ if (!of_property_read_u32(np, "rockchip,boost-high-con0", &con0) && ++ !of_property_read_u32(np, "rockchip,boost-high-con1", &con1)) { ++ pr_debug("boost-high-con=0x%x 0x%x\n", con0, con1); ++ regmap_write(pll->boost, BOOST_PLL_H_CON(0), ++ HIWORD_UPDATE(con0, BOOST_PLL_CON_MASK, 0)); ++ regmap_write(pll->boost, BOOST_PLL_H_CON(1), ++ HIWORD_UPDATE(con1, BOOST_PLL_CON_MASK, 0)); ++ pll->boost_high_rate = rockchip_pll_con_to_rate(pll, con0, ++ con1); ++ pr_debug("boost-high-rate=%lu\n", pll->boost_high_rate); ++ } ++ if (!of_property_read_u32(np, "rockchip,boost-backup-pll", &value)) { ++ pr_debug("boost-backup-pll=0x%x\n", value); ++ regmap_write(pll->boost, BOOST_CLK_CON, ++ HIWORD_UPDATE(value, BOOST_BACKUP_PLL_MASK, ++ BOOST_BACKUP_PLL_SHIFT)); ++ } ++ if (!of_property_read_u32(np, "rockchip,boost-backup-pll-usage", ++ &pll->boost_backup_pll_usage)) { ++ pr_debug("boost-backup-pll-usage=0x%x\n", ++ pll->boost_backup_pll_usage); ++ regmap_write(pll->boost, BOOST_CLK_CON, ++ HIWORD_UPDATE(pll->boost_backup_pll_usage, ++ BOOST_BACKUP_PLL_USAGE_MASK, ++ BOOST_BACKUP_PLL_USAGE_SHIFT)); ++ } ++ if (!of_property_read_u32(np, "rockchip,boost-switch-threshold", ++ &value)) { ++ pr_debug("boost-switch-threshold=0x%x\n", value); ++ regmap_write(pll->boost, BOOST_SWITCH_THRESHOLD, value); ++ } ++ if (!of_property_read_u32(np, "rockchip,boost-statis-threshold", ++ &value)) { ++ pr_debug("boost-statis-threshold=0x%x\n", value); ++ regmap_write(pll->boost, BOOST_STATIS_THRESHOLD, value); ++ } ++ if (!of_property_read_u32(np, "rockchip,boost-statis-enable", ++ &value)) { ++ pr_debug("boost-statis-enable=0x%x\n", value); ++ regmap_write(pll->boost, BOOST_BOOST_CON, ++ HIWORD_UPDATE(value, BOOST_STATIS_ENABLE_MASK, ++ BOOST_STATIS_ENABLE_SHIFT)); ++ } ++ if (!of_property_read_u32(np, "rockchip,boost-enable", &value)) { ++ pr_debug("boost-enable=0x%x\n", value); ++ regmap_write(pll->boost, BOOST_BOOST_CON, ++ HIWORD_UPDATE(value, BOOST_ENABLE_MASK, ++ BOOST_ENABLE_SHIFT)); ++ if (value) ++ pll->boost_enabled = true; ++ } ++#ifdef CONFIG_DEBUG_FS ++ if (pll->boost_enabled) { ++ mutex_lock(&clk_boost_lock); ++ hlist_add_head(&pll->debug_node, &clk_boost_list); ++ mutex_unlock(&clk_boost_lock); ++ } ++#endif ++} + -+ COMPOSITE(0, "aclk_rga_pre", mux_pll_src_cpll_gpll_usb480m_p, 0, -+ RK3288_CLKSEL_CON(30), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3288_CLKGATE_CON(3), 5, GFLAGS), -+ COMPOSITE(SCLK_RGA, "sclk_rga", mux_pll_src_cpll_gpll_usb480m_p, 0, -+ RK3288_CLKSEL_CON(30), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3288_CLKGATE_CON(3), 4, GFLAGS), ++void rockchip_boost_enable_recovery_sw_low(struct clk_hw *hw) ++{ ++ struct rockchip_clk_pll *pll; ++ unsigned int val; + -+ COMPOSITE(DCLK_VOP0, "dclk_vop0", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3288_CLKSEL_CON(27), 0, 2, MFLAGS, 8, 8, DFLAGS, -+ RK3288_CLKGATE_CON(3), 1, GFLAGS), -+ COMPOSITE(DCLK_VOP1, "dclk_vop1", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3288_CLKSEL_CON(29), 6, 2, MFLAGS, 8, 8, DFLAGS, -+ RK3288_CLKGATE_CON(3), 3, GFLAGS), ++ if (!hw) ++ return; ++ pll = to_rockchip_clk_pll(hw); ++ if (!pll->boost_enabled) ++ return; + -+ COMPOSITE_NODIV(SCLK_EDP_24M, "sclk_edp_24m", mux_edp_24m_p, 0, -+ RK3288_CLKSEL_CON(28), 15, 1, MFLAGS, -+ RK3288_CLKGATE_CON(3), 12, GFLAGS), -+ COMPOSITE(SCLK_EDP, "sclk_edp", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3288_CLKSEL_CON(28), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3288_CLKGATE_CON(3), 13, GFLAGS), ++ regmap_write(pll->boost, BOOST_BOOST_CON, ++ HIWORD_UPDATE(1, BOOST_RECOVERY_MASK, ++ BOOST_RECOVERY_SHIFT)); ++ do { ++ regmap_read(pll->boost, BOOST_FSM_STATUS, &val); ++ } while (!(val & BOOST_BUSY_STATE)); + -+ COMPOSITE(SCLK_ISP, "sclk_isp", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3288_CLKSEL_CON(6), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3288_CLKGATE_CON(3), 14, GFLAGS), -+ COMPOSITE(SCLK_ISP_JPE, "sclk_isp_jpe", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3288_CLKSEL_CON(6), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3288_CLKGATE_CON(3), 15, GFLAGS), ++ regmap_write(pll->boost, BOOST_BOOST_CON, ++ HIWORD_UPDATE(1, BOOST_SW_CTRL_MASK, ++ BOOST_SW_CTRL_SHIFT) | ++ HIWORD_UPDATE(1, BOOST_LOW_FREQ_EN_MASK, ++ BOOST_LOW_FREQ_EN_SHIFT)); ++} + -+ GATE(SCLK_HDMI_HDCP, "sclk_hdmi_hdcp", "xin24m", 0, -+ RK3288_CLKGATE_CON(5), 12, GFLAGS), -+ GATE(SCLK_HDMI_CEC, "sclk_hdmi_cec", "xin32k", 0, -+ RK3288_CLKGATE_CON(5), 11, GFLAGS), ++static void rockchip_boost_disable_low(struct rockchip_clk_pll *pll) ++{ ++ if (!pll->boost_enabled) ++ return; + -+ COMPOSITE(ACLK_HEVC, "aclk_hevc", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3288_CLKSEL_CON(39), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3288_CLKGATE_CON(13), 13, GFLAGS), -+ DIV(HCLK_HEVC, "hclk_hevc", "aclk_hevc", 0, -+ RK3288_CLKSEL_CON(40), 12, 2, DFLAGS), ++ regmap_write(pll->boost, BOOST_BOOST_CON, ++ HIWORD_UPDATE(0, BOOST_LOW_FREQ_EN_MASK, ++ BOOST_LOW_FREQ_EN_SHIFT)); ++} + -+ COMPOSITE(SCLK_HEVC_CABAC, "sclk_hevc_cabac", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3288_CLKSEL_CON(42), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3288_CLKGATE_CON(13), 14, GFLAGS), -+ COMPOSITE(SCLK_HEVC_CORE, "sclk_hevc_core", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3288_CLKSEL_CON(42), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3288_CLKGATE_CON(13), 15, GFLAGS), ++void rockchip_boost_disable_recovery_sw(struct clk_hw *hw) ++{ ++ struct rockchip_clk_pll *pll; + -+ COMPOSITE_NODIV(0, "vip_src", mux_pll_src_cpll_gpll_p, 0, -+ RK3288_CLKSEL_CON(26), 8, 1, MFLAGS, -+ RK3288_CLKGATE_CON(3), 7, GFLAGS), -+ COMPOSITE_NOGATE(SCLK_VIP_OUT, "sclk_vip_out", mux_vip_out_p, 0, -+ RK3288_CLKSEL_CON(26), 15, 1, MFLAGS, 9, 5, DFLAGS), ++ if (!hw) ++ return; ++ pll = to_rockchip_clk_pll(hw); ++ if (!pll->boost_enabled) ++ return; + -+ DIV(PCLK_PD_ALIVE, "pclk_pd_alive", "gpll", CLK_IS_CRITICAL, -+ RK3288_CLKSEL_CON(33), 8, 5, DFLAGS), -+ COMPOSITE_NOMUX(PCLK_PD_PMU, "pclk_pd_pmu", "gpll", CLK_IS_CRITICAL, -+ RK3288_CLKSEL_CON(33), 0, 5, DFLAGS, -+ RK3288_CLKGATE_CON(5), 8, GFLAGS), ++ regmap_write(pll->boost, BOOST_BOOST_CON, ++ HIWORD_UPDATE(0, BOOST_RECOVERY_MASK, ++ BOOST_RECOVERY_SHIFT)); ++ regmap_write(pll->boost, BOOST_BOOST_CON, ++ HIWORD_UPDATE(0, BOOST_SW_CTRL_MASK, ++ BOOST_SW_CTRL_SHIFT)); ++} + -+ COMPOSITE(SCLK_GPU, "sclk_gpu", mux_pll_src_cpll_gll_usb_npll_p, 0, -+ RK3288_CLKSEL_CON(34), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3288_CLKGATE_CON(5), 7, GFLAGS), ++void rockchip_boost_add_core_div(struct clk_hw *hw, unsigned long prate) ++{ ++ struct rockchip_clk_pll *pll; ++ unsigned int div; + -+ COMPOSITE(0, "aclk_peri_src", mux_pll_src_cpll_gpll_p, CLK_IS_CRITICAL, -+ RK3288_CLKSEL_CON(10), 15, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3288_CLKGATE_CON(2), 0, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, -+ RK3288_CLKSEL_CON(10), 12, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, -+ RK3288_CLKGATE_CON(2), 3, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, -+ RK3288_CLKSEL_CON(10), 8, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, -+ RK3288_CLKGATE_CON(2), 2, GFLAGS), -+ GATE(ACLK_PERI, "aclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, -+ RK3288_CLKGATE_CON(2), 1, GFLAGS), ++ if (!hw) ++ return; ++ pll = to_rockchip_clk_pll(hw); ++ if (!pll->boost_enabled || pll->boost_backup_pll_rate == prate) ++ return; + ++ /* todo */ ++ if (pll->boost_backup_pll_usage == BOOST_BACKUP_PLL_USAGE_TARGET) ++ return; + /* -+ * Clock-Architecture Diagram 3 ++ * cpu clock rate should be less than or equal to ++ * low rate when change pll rate in boost module + */ ++ if (pll->boost_low_rate && prate > pll->boost_low_rate) { ++ div = DIV_ROUND_UP(prate, pll->boost_low_rate) - 1; ++ regmap_write(pll->boost, BOOST_CLK_CON, ++ HIWORD_UPDATE(div, BOOST_CORE_DIV_MASK, ++ BOOST_CORE_DIV_SHIFT)); ++ pll->boost_backup_pll_rate = prate; ++ } ++} + -+ COMPOSITE(SCLK_SPI0, "sclk_spi0", mux_pll_src_cpll_gpll_p, 0, -+ RK3288_CLKSEL_CON(25), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3288_CLKGATE_CON(2), 9, GFLAGS), -+ COMPOSITE(SCLK_SPI1, "sclk_spi1", mux_pll_src_cpll_gpll_p, 0, -+ RK3288_CLKSEL_CON(25), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK3288_CLKGATE_CON(2), 10, GFLAGS), -+ COMPOSITE(SCLK_SPI2, "sclk_spi2", mux_pll_src_cpll_gpll_p, 0, -+ RK3288_CLKSEL_CON(39), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3288_CLKGATE_CON(2), 11, GFLAGS), -+ -+ COMPOSITE(SCLK_SDMMC, "sclk_sdmmc", mux_mmc_src_p, 0, -+ RK3288_CLKSEL_CON(11), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3288_CLKGATE_CON(13), 0, GFLAGS), -+ COMPOSITE(SCLK_SDIO0, "sclk_sdio0", mux_mmc_src_p, 0, -+ RK3288_CLKSEL_CON(12), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3288_CLKGATE_CON(13), 1, GFLAGS), -+ COMPOSITE(SCLK_SDIO1, "sclk_sdio1", mux_mmc_src_p, 0, -+ RK3288_CLKSEL_CON(34), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3288_CLKGATE_CON(13), 2, GFLAGS), -+ COMPOSITE(SCLK_EMMC, "sclk_emmc", mux_mmc_src_p, 0, -+ RK3288_CLKSEL_CON(12), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3288_CLKGATE_CON(13), 3, GFLAGS), ++#ifdef CONFIG_DEBUG_FS ++#include + -+ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "sclk_sdmmc", RK3288_SDMMC_CON0, 1), -+ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RK3288_SDMMC_CON1, 0), ++#ifndef MODULE ++static int boost_summary_show(struct seq_file *s, void *data) ++{ ++ struct rockchip_clk_pll *pll = (struct rockchip_clk_pll *)s->private; ++ u32 boost_count = 0; ++ u32 freq_cnt0 = 0, freq_cnt1 = 0; ++ u64 freq_cnt = 0, high_freq_time = 0; ++ u32 short_count = 0, short_threshold = 0; ++ u32 interval_time = 0; + -+ MMC(SCLK_SDIO0_DRV, "sdio0_drv", "sclk_sdio0", RK3288_SDIO0_CON0, 1), -+ MMC(SCLK_SDIO0_SAMPLE, "sdio0_sample", "sclk_sdio0", RK3288_SDIO0_CON1, 0), ++ seq_puts(s, " device boost_count high_freq_count high_freq_time short_count short_threshold interval_count\n"); ++ seq_puts(s, "------------------------------------------------------------------------------------------------------\n"); ++ seq_printf(s, " %s\n", clk_hw_get_name(&pll->hw)); + -+ MMC(SCLK_SDIO1_DRV, "sdio1_drv", "sclk_sdio1", RK3288_SDIO1_CON0, 1), -+ MMC(SCLK_SDIO1_SAMPLE, "sdio1_sample", "sclk_sdio1", RK3288_SDIO1_CON1, 0), ++ regmap_read(pll->boost, BOOST_SWITCH_CNT, &boost_count); + -+ MMC(SCLK_EMMC_DRV, "emmc_drv", "sclk_emmc", RK3288_EMMC_CON0, 1), -+ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "sclk_emmc", RK3288_EMMC_CON1, 0), ++ regmap_read(pll->boost, BOOST_HIGH_PERF_CNT0, &freq_cnt0); ++ regmap_read(pll->boost, BOOST_HIGH_PERF_CNT1, &freq_cnt1); ++ freq_cnt = ((u64)freq_cnt1 << 32) + (u64)freq_cnt0; ++ high_freq_time = freq_cnt; ++ do_div(high_freq_time, 24); + -+ COMPOSITE(SCLK_TSPOUT, "sclk_tspout", mux_tspout_p, 0, -+ RK3288_CLKSEL_CON(35), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3288_CLKGATE_CON(4), 11, GFLAGS), -+ COMPOSITE(SCLK_TSP, "sclk_tsp", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3288_CLKSEL_CON(35), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3288_CLKGATE_CON(4), 10, GFLAGS), ++ regmap_read(pll->boost, BOOST_SHORT_SWITCH_CNT, &short_count); ++ regmap_read(pll->boost, BOOST_STATIS_THRESHOLD, &short_threshold); ++ regmap_read(pll->boost, BOOST_SWITCH_THRESHOLD, &interval_time); + -+ GATE(SCLK_OTGPHY0, "sclk_otgphy0", "xin24m", CLK_IGNORE_UNUSED, -+ RK3288_CLKGATE_CON(13), 4, GFLAGS), -+ GATE(SCLK_OTGPHY1, "sclk_otgphy1", "xin24m", CLK_IGNORE_UNUSED, -+ RK3288_CLKGATE_CON(13), 5, GFLAGS), -+ GATE(SCLK_OTGPHY2, "sclk_otgphy2", "xin24m", CLK_IGNORE_UNUSED, -+ RK3288_CLKGATE_CON(13), 6, GFLAGS), -+ GATE(SCLK_OTG_ADP, "sclk_otg_adp", "xin32k", CLK_IGNORE_UNUSED, -+ RK3288_CLKGATE_CON(13), 7, GFLAGS), ++ seq_printf(s, "%22u %17llu %15llu %12u %16u %15u\n", ++ boost_count, freq_cnt, high_freq_time, short_count, ++ short_threshold, interval_time); + -+ COMPOSITE_NOMUX(SCLK_TSADC, "sclk_tsadc", "xin32k", 0, -+ RK3288_CLKSEL_CON(2), 0, 6, DFLAGS, -+ RK3288_CLKGATE_CON(2), 7, GFLAGS), ++ return 0; ++} + -+ MUX(SCLK_TESTOUT_SRC, "sclk_testout_src", mux_testout_src_p, 0, -+ RK3288_MISC_CON, 8, 4, MFLAGS), -+ COMPOSITE_NOMUX(SCLK_TESTOUT, "sclk_testout", "sclk_testout_src", 0, -+ RK3288_CLKSEL_CON(2), 8, 5, DFLAGS, -+ RK3288_CLKGATE_CON(4), 15, GFLAGS), ++static int boost_summary_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, boost_summary_show, inode->i_private); ++} + -+ COMPOSITE_NOMUX(SCLK_SARADC, "sclk_saradc", "xin24m", 0, -+ RK3288_CLKSEL_CON(24), 8, 8, DFLAGS, -+ RK3288_CLKGATE_CON(2), 8, GFLAGS), ++static const struct file_operations boost_summary_fops = { ++ .open = boost_summary_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + -+ GATE(SCLK_PS2C, "sclk_ps2c", "xin24m", 0, -+ RK3288_CLKGATE_CON(5), 13, GFLAGS), ++static int boost_config_show(struct seq_file *s, void *data) ++{ ++ struct rockchip_clk_pll *pll = (struct rockchip_clk_pll *)s->private; + -+ COMPOSITE(SCLK_NANDC0, "sclk_nandc0", mux_pll_src_cpll_gpll_p, 0, -+ RK3288_CLKSEL_CON(38), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3288_CLKGATE_CON(5), 5, GFLAGS), -+ COMPOSITE(SCLK_NANDC1, "sclk_nandc1", mux_pll_src_cpll_gpll_p, 0, -+ RK3288_CLKSEL_CON(38), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK3288_CLKGATE_CON(5), 6, GFLAGS), ++ seq_printf(s, "boost_enabled: %d\n", pll->boost_enabled); ++ seq_printf(s, "boost_low_rate: %lu\n", pll->boost_low_rate); ++ seq_printf(s, "boost_high_rate: %lu\n", pll->boost_high_rate); + -+ COMPOSITE(0, "uart0_src", mux_pll_src_cpll_gll_usb_npll_p, 0, -+ RK3288_CLKSEL_CON(13), 13, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3288_CLKGATE_CON(1), 8, GFLAGS), -+ COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(17), 0, -+ RK3288_CLKGATE_CON(1), 9, GFLAGS, -+ &rk3288_uart0_fracmux), -+ MUX(0, "uart_src", mux_pll_src_cpll_gpll_p, 0, -+ RK3288_CLKSEL_CON(13), 15, 1, MFLAGS), -+ COMPOSITE_NOMUX(0, "uart1_src", "uart_src", 0, -+ RK3288_CLKSEL_CON(14), 0, 7, DFLAGS, -+ RK3288_CLKGATE_CON(1), 10, GFLAGS), -+ COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_src", CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(18), 0, -+ RK3288_CLKGATE_CON(1), 11, GFLAGS, -+ &rk3288_uart1_fracmux), -+ COMPOSITE_NOMUX(0, "uart2_src", "uart_src", 0, -+ RK3288_CLKSEL_CON(15), 0, 7, DFLAGS, -+ RK3288_CLKGATE_CON(1), 12, GFLAGS), -+ COMPOSITE_FRACMUX(0, "uart2_frac", "uart2_src", CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(19), 0, -+ RK3288_CLKGATE_CON(1), 13, GFLAGS, -+ &rk3288_uart2_fracmux), -+ COMPOSITE_NOMUX(0, "uart3_src", "uart_src", 0, -+ RK3288_CLKSEL_CON(16), 0, 7, DFLAGS, -+ RK3288_CLKGATE_CON(1), 14, GFLAGS), -+ COMPOSITE_FRACMUX(0, "uart3_frac", "uart3_src", CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(20), 0, -+ RK3288_CLKGATE_CON(1), 15, GFLAGS, -+ &rk3288_uart3_fracmux), -+ COMPOSITE_NOMUX(0, "uart4_src", "uart_src", 0, -+ RK3288_CLKSEL_CON(3), 0, 7, DFLAGS, -+ RK3288_CLKGATE_CON(2), 12, GFLAGS), -+ COMPOSITE_FRACMUX(0, "uart4_frac", "uart4_src", CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(7), 0, -+ RK3288_CLKGATE_CON(2), 13, GFLAGS, -+ &rk3288_uart4_fracmux), ++ return 0; ++} + -+ COMPOSITE(SCLK_MAC_PLL, "mac_pll_src", mux_pll_src_npll_cpll_gpll_p, 0, -+ RK3288_CLKSEL_CON(21), 0, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3288_CLKGATE_CON(2), 5, GFLAGS), -+ MUX(SCLK_MAC, "mac_clk", mux_mac_p, CLK_SET_RATE_PARENT, -+ RK3288_CLKSEL_CON(21), 4, 1, MFLAGS), -+ GATE(SCLK_MACREF_OUT, "sclk_macref_out", "mac_clk", 0, -+ RK3288_CLKGATE_CON(5), 3, GFLAGS), -+ GATE(SCLK_MACREF, "sclk_macref", "mac_clk", 0, -+ RK3288_CLKGATE_CON(5), 2, GFLAGS), -+ GATE(SCLK_MAC_RX, "sclk_mac_rx", "mac_clk", 0, -+ RK3288_CLKGATE_CON(5), 0, GFLAGS), -+ GATE(SCLK_MAC_TX, "sclk_mac_tx", "mac_clk", 0, -+ RK3288_CLKGATE_CON(5), 1, GFLAGS), ++static int boost_config_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, boost_config_show, inode->i_private); ++} + -+ COMPOSITE(0, "hsadc_src", mux_pll_src_cpll_gpll_p, 0, -+ RK3288_CLKSEL_CON(22), 0, 1, MFLAGS, 8, 8, DFLAGS, -+ RK3288_CLKGATE_CON(2), 6, GFLAGS), -+ MUX(0, "sclk_hsadc_out", mux_hsadcout_p, 0, -+ RK3288_CLKSEL_CON(22), 4, 1, MFLAGS), -+ INVERTER(SCLK_HSADC, "sclk_hsadc", "sclk_hsadc_out", -+ RK3288_CLKSEL_CON(22), 7, IFLAGS), ++static const struct file_operations boost_config_fops = { ++ .open = boost_config_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + -+ GATE(0, "jtag", "ext_jtag", 0, -+ RK3288_CLKGATE_CON(4), 14, GFLAGS), ++static int boost_debug_create_one(struct rockchip_clk_pll *pll, ++ struct dentry *rootdir) ++{ ++ struct dentry *pdentry, *d; + -+ COMPOSITE_NODIV(SCLK_USBPHY480M_SRC, "usbphy480m_src", mux_usbphy480m_p, 0, -+ RK3288_CLKSEL_CON(13), 11, 2, MFLAGS, -+ RK3288_CLKGATE_CON(5), 14, GFLAGS), -+ COMPOSITE_NODIV(SCLK_HSICPHY480M, "sclk_hsicphy480m", mux_hsicphy480m_p, 0, -+ RK3288_CLKSEL_CON(29), 0, 2, MFLAGS, -+ RK3288_CLKGATE_CON(3), 6, GFLAGS), -+ GATE(0, "hsicphy12m_xin12m", "xin12m", 0, -+ RK3288_CLKGATE_CON(13), 9, GFLAGS), -+ DIV(0, "hsicphy12m_usbphy", "sclk_hsicphy480m", 0, -+ RK3288_CLKSEL_CON(11), 8, 6, DFLAGS), -+ MUX(SCLK_HSICPHY12M, "sclk_hsicphy12m", mux_hsicphy12m_p, 0, -+ RK3288_CLKSEL_CON(22), 4, 1, MFLAGS), ++ pdentry = debugfs_lookup(clk_hw_get_name(&pll->hw), rootdir); ++ if (!pdentry) { ++ pr_err("%s: failed to lookup %s dentry\n", __func__, ++ clk_hw_get_name(&pll->hw)); ++ return -ENOMEM; ++ } + -+ /* -+ * Clock-Architecture Diagram 4 -+ */ ++ d = debugfs_create_file("boost_summary", 0444, pdentry, ++ pll, &boost_summary_fops); ++ if (!d) { ++ pr_err("%s: failed to create boost_summary file\n", __func__); ++ return -ENOMEM; ++ } + -+ /* aclk_cpu gates */ -+ GATE(0, "sclk_intmem0", "aclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(10), 5, GFLAGS), -+ GATE(0, "sclk_intmem1", "aclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(10), 6, GFLAGS), -+ GATE(0, "sclk_intmem2", "aclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(10), 7, GFLAGS), -+ GATE(ACLK_DMAC1, "aclk_dmac1", "aclk_cpu", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(10), 12, GFLAGS), -+ GATE(0, "aclk_strc_sys", "aclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(10), 13, GFLAGS), -+ GATE(0, "aclk_intmem", "aclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(10), 4, GFLAGS), -+ GATE(ACLK_CRYPTO, "aclk_crypto", "aclk_cpu", 0, RK3288_CLKGATE_CON(11), 6, GFLAGS), -+ GATE(0, "aclk_ccp", "aclk_cpu", 0, RK3288_CLKGATE_CON(11), 8, GFLAGS), ++ d = debugfs_create_file("boost_config", 0444, pdentry, ++ pll, &boost_config_fops); ++ if (!d) { ++ pr_err("%s: failed to create boost config file\n", __func__); ++ return -ENOMEM; ++ } + -+ /* hclk_cpu gates */ -+ GATE(HCLK_CRYPTO, "hclk_crypto", "hclk_cpu", 0, RK3288_CLKGATE_CON(11), 7, GFLAGS), -+ GATE(HCLK_I2S0, "hclk_i2s0", "hclk_cpu", 0, RK3288_CLKGATE_CON(10), 8, GFLAGS), -+ GATE(HCLK_ROM, "hclk_rom", "hclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(10), 9, GFLAGS), -+ GATE(HCLK_SPDIF, "hclk_spdif", "hclk_cpu", 0, RK3288_CLKGATE_CON(10), 10, GFLAGS), -+ GATE(HCLK_SPDIF8CH, "hclk_spdif_8ch", "hclk_cpu", 0, RK3288_CLKGATE_CON(10), 11, GFLAGS), ++ return 0; ++} + -+ /* pclk_cpu gates */ -+ GATE(PCLK_PWM, "pclk_pwm", "pclk_cpu", 0, RK3288_CLKGATE_CON(10), 0, GFLAGS), -+ GATE(PCLK_TIMER, "pclk_timer", "pclk_cpu", 0, RK3288_CLKGATE_CON(10), 1, GFLAGS), -+ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_cpu", 0, RK3288_CLKGATE_CON(10), 2, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_cpu", 0, RK3288_CLKGATE_CON(10), 3, GFLAGS), -+ GATE(PCLK_DDRUPCTL0, "pclk_ddrupctl0", "pclk_cpu", 0, RK3288_CLKGATE_CON(10), 14, GFLAGS), -+ GATE(PCLK_PUBL0, "pclk_publ0", "pclk_cpu", 0, RK3288_CLKGATE_CON(10), 15, GFLAGS), -+ GATE(PCLK_DDRUPCTL1, "pclk_ddrupctl1", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 0, GFLAGS), -+ GATE(PCLK_PUBL1, "pclk_publ1", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 1, GFLAGS), -+ GATE(PCLK_EFUSE1024, "pclk_efuse_1024", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 2, GFLAGS), -+ GATE(PCLK_TZPC, "pclk_tzpc", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 3, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 9, GFLAGS), -+ GATE(PCLK_EFUSE256, "pclk_efuse_256", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 10, GFLAGS), -+ GATE(PCLK_RKPWM, "pclk_rkpwm", "pclk_cpu", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(11), 11, GFLAGS), ++static int __init boost_debug_init(void) ++{ ++ struct rockchip_clk_pll *pll; ++ struct dentry *rootdir; + -+ /* ddrctrl [DDR Controller PHY clock] gates */ -+ GATE(0, "nclk_ddrupctl0", "ddrphy", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(11), 4, GFLAGS), -+ GATE(0, "nclk_ddrupctl1", "ddrphy", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(11), 5, GFLAGS), ++ rootdir = debugfs_lookup("clk", NULL); ++ if (!rootdir) { ++ pr_err("%s: failed to lookup clk dentry\n", __func__); ++ return -ENOMEM; ++ } + -+ /* ddrphy gates */ -+ GATE(0, "sclk_ddrphy0", "ddrphy", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(4), 12, GFLAGS), -+ GATE(0, "sclk_ddrphy1", "ddrphy", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(4), 13, GFLAGS), ++ mutex_lock(&clk_boost_lock); + -+ /* aclk_peri gates */ -+ GATE(0, "aclk_peri_axi_matrix", "aclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(6), 2, GFLAGS), -+ GATE(ACLK_DMAC2, "aclk_dmac2", "aclk_peri", 0, RK3288_CLKGATE_CON(6), 3, GFLAGS), -+ GATE(0, "aclk_peri_niu", "aclk_peri", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(7), 11, GFLAGS), -+ GATE(ACLK_MMU, "aclk_mmu", "aclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(8), 12, GFLAGS), -+ GATE(ACLK_GMAC, "aclk_gmac", "aclk_peri", 0, RK3288_CLKGATE_CON(8), 0, GFLAGS), -+ GATE(HCLK_GPS, "hclk_gps", "aclk_peri", 0, RK3288_CLKGATE_CON(8), 2, GFLAGS), ++ hlist_for_each_entry(pll, &clk_boost_list, debug_node) ++ boost_debug_create_one(pll, rootdir); + -+ /* hclk_peri gates */ -+ GATE(0, "hclk_peri_matrix", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(6), 0, GFLAGS), -+ GATE(HCLK_OTG0, "hclk_otg0", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 4, GFLAGS), -+ GATE(HCLK_USBHOST0, "hclk_host0", "hclk_peri", 0, RK3288_CLKGATE_CON(7), 6, GFLAGS), -+ GATE(HCLK_USBHOST1, "hclk_host1", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 7, GFLAGS), -+ GATE(HCLK_HSIC, "hclk_hsic", "hclk_peri", 0, RK3288_CLKGATE_CON(7), 8, GFLAGS), -+ GATE(HCLK_USB_PERI, "hclk_usb_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 9, GFLAGS), -+ GATE(0, "hclk_peri_ahb_arbi", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 10, GFLAGS), -+ GATE(0, "hclk_emem", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 12, GFLAGS), -+ GATE(0, "hclk_mem", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 13, GFLAGS), -+ GATE(HCLK_NANDC0, "hclk_nandc0", "hclk_peri", 0, RK3288_CLKGATE_CON(7), 14, GFLAGS), -+ GATE(HCLK_NANDC1, "hclk_nandc1", "hclk_peri", 0, RK3288_CLKGATE_CON(7), 15, GFLAGS), -+ GATE(HCLK_TSP, "hclk_tsp", "hclk_peri", 0, RK3288_CLKGATE_CON(8), 8, GFLAGS), -+ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK3288_CLKGATE_CON(8), 3, GFLAGS), -+ GATE(HCLK_SDIO0, "hclk_sdio0", "hclk_peri", 0, RK3288_CLKGATE_CON(8), 4, GFLAGS), -+ GATE(HCLK_SDIO1, "hclk_sdio1", "hclk_peri", 0, RK3288_CLKGATE_CON(8), 5, GFLAGS), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, RK3288_CLKGATE_CON(8), 6, GFLAGS), -+ GATE(HCLK_HSADC, "hclk_hsadc", "hclk_peri", 0, RK3288_CLKGATE_CON(8), 7, GFLAGS), -+ GATE(0, "pmu_hclk_otg0", "hclk_peri", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(7), 5, GFLAGS), ++ mutex_unlock(&clk_boost_lock); + -+ /* pclk_peri gates */ -+ GATE(0, "pclk_peri_matrix", "pclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(6), 1, GFLAGS), -+ GATE(PCLK_SPI0, "pclk_spi0", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 4, GFLAGS), -+ GATE(PCLK_SPI1, "pclk_spi1", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 5, GFLAGS), -+ GATE(PCLK_SPI2, "pclk_spi2", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 6, GFLAGS), -+ GATE(PCLK_PS2C, "pclk_ps2c", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 7, GFLAGS), -+ GATE(PCLK_UART0, "pclk_uart0", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 8, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 9, GFLAGS), -+ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 15, GFLAGS), -+ GATE(PCLK_UART3, "pclk_uart3", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 11, GFLAGS), -+ GATE(PCLK_UART4, "pclk_uart4", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 12, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 13, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 14, GFLAGS), -+ GATE(PCLK_SARADC, "pclk_saradc", "pclk_peri", 0, RK3288_CLKGATE_CON(7), 1, GFLAGS), -+ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_peri", 0, RK3288_CLKGATE_CON(7), 2, GFLAGS), -+ GATE(PCLK_SIM, "pclk_sim", "pclk_peri", 0, RK3288_CLKGATE_CON(7), 3, GFLAGS), -+ GATE(PCLK_I2C5, "pclk_i2c5", "pclk_peri", 0, RK3288_CLKGATE_CON(7), 0, GFLAGS), -+ GATE(PCLK_GMAC, "pclk_gmac", "pclk_peri", 0, RK3288_CLKGATE_CON(8), 1, GFLAGS), -+ -+ GATE(SCLK_LCDC_PWM0, "sclk_lcdc_pwm0", "xin24m", 0, RK3288_CLKGATE_CON(13), 10, GFLAGS), -+ GATE(SCLK_LCDC_PWM1, "sclk_lcdc_pwm1", "xin24m", 0, RK3288_CLKGATE_CON(13), 11, GFLAGS), -+ GATE(SCLK_PVTM_CORE, "sclk_pvtm_core", "xin24m", 0, RK3288_CLKGATE_CON(5), 9, GFLAGS), -+ GATE(SCLK_PVTM_GPU, "sclk_pvtm_gpu", "xin24m", 0, RK3288_CLKGATE_CON(5), 10, GFLAGS), -+ GATE(SCLK_MIPIDSI_24M, "sclk_mipidsi_24m", "xin24m", 0, RK3288_CLKGATE_CON(5), 15, GFLAGS), -+ -+ /* sclk_gpu gates */ -+ GATE(ACLK_GPU, "aclk_gpu", "sclk_gpu", 0, RK3288_CLKGATE_CON(18), 0, GFLAGS), -+ -+ /* pclk_pd_alive gates */ -+ GATE(PCLK_GPIO8, "pclk_gpio8", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 8, GFLAGS), -+ GATE(PCLK_GPIO7, "pclk_gpio7", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 7, GFLAGS), -+ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 1, GFLAGS), -+ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 2, GFLAGS), -+ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 3, GFLAGS), -+ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 4, GFLAGS), -+ GATE(PCLK_GPIO5, "pclk_gpio5", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 5, GFLAGS), -+ GATE(PCLK_GPIO6, "pclk_gpio6", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 6, GFLAGS), -+ GATE(PCLK_GRF, "pclk_grf", "pclk_pd_alive", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(14), 11, GFLAGS), -+ GATE(0, "pclk_alive_niu", "pclk_pd_alive", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(14), 12, GFLAGS), -+ -+ /* Watchdog pclk is controlled by RK3288_SGRF_SOC_CON0[1]. */ -+ SGRF_GATE(PCLK_WDT, "pclk_wdt", "pclk_pd_alive"), -+ -+ /* pclk_pd_pmu gates */ -+ GATE(PCLK_PMU, "pclk_pmu", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(17), 0, GFLAGS), -+ GATE(0, "pclk_intmem1", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(17), 1, GFLAGS), -+ GATE(0, "pclk_pmu_niu", "pclk_pd_pmu", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(17), 2, GFLAGS), -+ GATE(PCLK_SGRF, "pclk_sgrf", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(17), 3, GFLAGS), -+ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pd_pmu", 0, RK3288_CLKGATE_CON(17), 4, GFLAGS), -+ -+ /* hclk_vio gates */ -+ GATE(HCLK_RGA, "hclk_rga", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 1, GFLAGS), -+ GATE(HCLK_VOP0, "hclk_vop0", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 6, GFLAGS), -+ GATE(HCLK_VOP1, "hclk_vop1", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 8, GFLAGS), -+ GATE(HCLK_VIO_AHB_ARBI, "hclk_vio_ahb_arbi", "hclk_vio", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(15), 9, GFLAGS), -+ GATE(HCLK_VIO_NIU, "hclk_vio_niu", "hclk_vio", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(15), 10, GFLAGS), -+ GATE(HCLK_VIP, "hclk_vip", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 15, GFLAGS), -+ GATE(HCLK_IEP, "hclk_iep", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 3, GFLAGS), -+ GATE(HCLK_ISP, "hclk_isp", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 1, GFLAGS), -+ GATE(HCLK_VIO2_H2P, "hclk_vio2_h2p", "hclk_vio", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(16), 10, GFLAGS), -+ GATE(PCLK_MIPI_DSI0, "pclk_mipi_dsi0", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 4, GFLAGS), -+ GATE(PCLK_MIPI_DSI1, "pclk_mipi_dsi1", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 5, GFLAGS), -+ GATE(PCLK_MIPI_CSI, "pclk_mipi_csi", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 6, GFLAGS), -+ GATE(PCLK_LVDS_PHY, "pclk_lvds_phy", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 7, GFLAGS), -+ GATE(PCLK_EDP_CTRL, "pclk_edp_ctrl", "hclk_vio", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(16), 8, GFLAGS), -+ GATE(PCLK_HDMI_CTRL, "pclk_hdmi_ctrl", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 9, GFLAGS), -+ GATE(PCLK_VIO2_H2P, "pclk_vio2_h2p", "hclk_vio", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(16), 11, GFLAGS), -+ -+ /* aclk_vio0 gates */ -+ GATE(ACLK_VOP0, "aclk_vop0", "aclk_vio0", 0, RK3288_CLKGATE_CON(15), 5, GFLAGS), -+ GATE(ACLK_IEP, "aclk_iep", "aclk_vio0", 0, RK3288_CLKGATE_CON(15), 2, GFLAGS), -+ GATE(ACLK_VIO0_NIU, "aclk_vio0_niu", "aclk_vio0", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(15), 11, GFLAGS), -+ GATE(ACLK_VIP, "aclk_vip", "aclk_vio0", 0, RK3288_CLKGATE_CON(15), 14, GFLAGS), ++ return 0; ++} ++late_initcall(boost_debug_init); ++#endif /* MODULE */ ++#endif /* CONFIG_DEBUG_FS */ ++#endif /* CONFIG_ROCKCHIP_CLK_BOOST */ +diff --git a/drivers/clk/rockchip-oh/clk-pvtm.c b/drivers/clk/rockchip-oh/clk-pvtm.c +new file mode 100644 +index 000000000..c748589dd +--- /dev/null ++++ b/drivers/clk/rockchip-oh/clk-pvtm.c +@@ -0,0 +1,311 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (c) 2018 Fuzhou Rockchip Electronics Co., Ltd ++ */ + -+ /* aclk_vio1 gates */ -+ GATE(ACLK_VOP1, "aclk_vop1", "aclk_vio1", 0, RK3288_CLKGATE_CON(15), 7, GFLAGS), -+ GATE(ACLK_ISP, "aclk_isp", "aclk_vio1", 0, RK3288_CLKGATE_CON(16), 2, GFLAGS), -+ GATE(ACLK_VIO1_NIU, "aclk_vio1_niu", "aclk_vio1", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(15), 12, GFLAGS), ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ /* aclk_rga_pre gates */ -+ GATE(ACLK_RGA, "aclk_rga", "aclk_rga_pre", 0, RK3288_CLKGATE_CON(15), 0, GFLAGS), -+ GATE(ACLK_RGA_NIU, "aclk_rga_niu", "aclk_rga_pre", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(15), 13, GFLAGS), ++#define CLK_SEL_EXTERNAL_32K 0 ++#define CLK_SEL_INTERNAL_PVTM 1 + -+ /* -+ * Other ungrouped clocks. -+ */ ++#define wr_msk_bit(v, off, msk) ((v) << (off) | (msk << (16 + (off)))) + -+ GATE(PCLK_VIP_IN, "pclk_vip_in", "ext_vip", 0, RK3288_CLKGATE_CON(16), 0, GFLAGS), -+ INVERTER(PCLK_VIP, "pclk_vip", "pclk_vip_in", RK3288_CLKSEL_CON(29), 4, IFLAGS), -+ GATE(PCLK_ISP_IN, "pclk_isp_in", "ext_isp", 0, RK3288_CLKGATE_CON(16), 3, GFLAGS), -+ INVERTER(0, "pclk_isp", "pclk_isp_in", RK3288_CLKSEL_CON(29), 3, IFLAGS), ++struct rockchip_clock_pvtm; + -+ GATE(SCLK_HSADC0_TSP, "clk_hsadc0_tsp", "ext_hsadc0_tsp", 0, RK3288_CLKGATE_CON(8), 9, GFLAGS), -+ GATE(SCLK_HSADC1_TSP, "clk_hsadc1_tsp", "ext_hsadc0_tsp", 0, RK3288_CLKGATE_CON(8), 10, GFLAGS), -+ GATE(SCLK_27M_TSP, "clk_27m_tsp", "ext_27m_tsp", 0, RK3288_CLKGATE_CON(8), 11, GFLAGS), -+}; ++struct rockchip_clock_pvtm_info { ++ u32 con; ++ u32 sta; ++ u32 sel_con; ++ u32 sel_shift; ++ u32 sel_value; ++ u32 sel_mask; ++ u32 div_shift; ++ u32 div_mask; + -+static struct rockchip_clk_branch rk3288w_hclkvio_branch[] __initdata = { -+ DIV(0, "hclk_vio", "aclk_vio1", 0, -+ RK3288_CLKSEL_CON(28), 8, 5, DFLAGS), ++ u32 (*get_value)(struct rockchip_clock_pvtm *pvtm, ++ unsigned int time_us); ++ int (*init_freq)(struct rockchip_clock_pvtm *pvtm); ++ int (*sel_enable)(struct rockchip_clock_pvtm *pvtm); +}; + -+static struct rockchip_clk_branch rk3288_hclkvio_branch[] __initdata = { -+ DIV(0, "hclk_vio", "aclk_vio0", 0, -+ RK3288_CLKSEL_CON(28), 8, 5, DFLAGS), ++struct rockchip_clock_pvtm { ++ const struct rockchip_clock_pvtm_info *info; ++ struct regmap *grf; ++ struct clk *pvtm_clk; ++ struct clk *clk; ++ unsigned long rate; +}; + -+static void __iomem *rk3288_cru_base; -+ -+/* -+ * Some CRU registers will be reset in maskrom when the system -+ * wakes up from fastboot. -+ * So save them before suspend, restore them after resume. -+ */ -+static const int rk3288_saved_cru_reg_ids[] = { -+ RK3288_MODE_CON, -+ RK3288_CLKSEL_CON(0), -+ RK3288_CLKSEL_CON(1), -+ RK3288_CLKSEL_CON(10), -+ RK3288_CLKSEL_CON(33), -+ RK3288_CLKSEL_CON(37), ++static unsigned long xin32k_pvtm_recalc_rate(struct clk_hw *hw, ++ unsigned long parent_rate) ++{ ++ return 32768; ++} + -+ /* We turn aclk_dmac1 on for suspend; this will restore it */ -+ RK3288_CLKGATE_CON(10), ++static const struct clk_ops xin32k_pvtm = { ++ .recalc_rate = xin32k_pvtm_recalc_rate, +}; + -+static u32 rk3288_saved_cru_regs[ARRAY_SIZE(rk3288_saved_cru_reg_ids)]; -+ -+static int rk3288_clk_suspend(void) ++static void rockchip_clock_pvtm_delay(unsigned int delay) +{ -+ int i, reg_id; -+ -+ for (i = 0; i < ARRAY_SIZE(rk3288_saved_cru_reg_ids); i++) { -+ reg_id = rk3288_saved_cru_reg_ids[i]; ++ unsigned int ms = delay / 1000; ++ unsigned int us = delay % 1000; + -+ rk3288_saved_cru_regs[i] = -+ readl_relaxed(rk3288_cru_base + reg_id); ++ if (ms > 0) { ++ if (ms < 20) ++ us += ms * 1000; ++ else ++ msleep(ms); + } + -+ /* -+ * Going into deep sleep (specifically setting PMU_CLR_DMA in -+ * RK3288_PMU_PWRMODE_CON1) appears to fail unless -+ * "aclk_dmac1" is on. -+ */ -+ writel_relaxed(1 << (12 + 16), -+ rk3288_cru_base + RK3288_CLKGATE_CON(10)); ++ if (us >= 10) ++ usleep_range(us, us + 100); ++ else ++ udelay(us); ++} + -+ /* -+ * Switch PLLs other than DPLL (for SDRAM) to slow mode to -+ * avoid crashes on resume. The Mask ROM on the system will -+ * put APLL, CPLL, and GPLL into slow mode at resume time -+ * anyway (which is why we restore them), but we might not -+ * even make it to the Mask ROM if this isn't done at suspend -+ * time. -+ * -+ * NOTE: only APLL truly matters here, but we'll do them all. -+ */ ++static int rockchip_clock_sel_internal_pvtm(struct rockchip_clock_pvtm *pvtm) ++{ ++ int ret = 0; + -+ writel_relaxed(0xf3030000, rk3288_cru_base + RK3288_MODE_CON); ++ ret = regmap_write(pvtm->grf, pvtm->info->sel_con, ++ wr_msk_bit(pvtm->info->sel_value, ++ pvtm->info->sel_shift, ++ pvtm->info->sel_mask)); ++ if (ret != 0) ++ pr_err("%s: fail to write register\n", __func__); + -+ return 0; ++ return ret; +} + -+static void rk3288_clk_resume(void) ++/* get pmu pvtm value */ ++static u32 rockchip_clock_pvtm_get_value(struct rockchip_clock_pvtm *pvtm, ++ u32 time_us) +{ -+ int i, reg_id; ++ const struct rockchip_clock_pvtm_info *info = pvtm->info; ++ u32 val = 0, sta = 0; ++ u32 clk_cnt, check_cnt; + -+ for (i = ARRAY_SIZE(rk3288_saved_cru_reg_ids) - 1; i >= 0; i--) { -+ reg_id = rk3288_saved_cru_reg_ids[i]; ++ /* 24m clk ,24cnt=1us */ ++ clk_cnt = time_us * 24; + -+ writel_relaxed(rk3288_saved_cru_regs[i] | 0xffff0000, -+ rk3288_cru_base + reg_id); -+ } -+} ++ regmap_write(pvtm->grf, info->con + 0x4, clk_cnt); ++ regmap_write(pvtm->grf, info->con, wr_msk_bit(3, 0, 0x3)); + -+static void rk3288_clk_shutdown(void) -+{ -+ writel_relaxed(0xf3030000, rk3288_cru_base + RK3288_MODE_CON); -+} ++ rockchip_clock_pvtm_delay(time_us); + -+static struct syscore_ops rk3288_clk_syscore_ops = { -+ .suspend = rk3288_clk_suspend, -+ .resume = rk3288_clk_resume, -+}; ++ check_cnt = 100; ++ while (check_cnt) { ++ regmap_read(pvtm->grf, info->sta, &sta); ++ if (sta & 0x1) ++ break; ++ udelay(4); ++ check_cnt--; ++ } + -+static void rk3288_dump_cru(void) -+{ -+ if (rk3288_cru_base) { -+ pr_warn("CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk3288_cru_base, -+ 0x21c, false); ++ if (check_cnt) { ++ regmap_read(pvtm->grf, info->sta + 0x4, &val); ++ } else { ++ pr_err("%s: wait pvtm_done timeout!\n", __func__); ++ val = 0; + } ++ ++ regmap_write(pvtm->grf, info->con, wr_msk_bit(0, 0, 0x3)); ++ ++ return val; +} + -+static void __init rk3288_common_init(struct device_node *np, -+ enum rk3288_variant soc) ++static int rockchip_clock_pvtm_init_freq(struct rockchip_clock_pvtm *pvtm) +{ -+ struct rockchip_clk_provider *ctx; -+ struct clk **clks; ++ u32 pvtm_cnt = 0; ++ u32 div, time_us; ++ int ret = 0; + -+ rk3288_cru_base = of_iomap(np, 0); -+ if (!rk3288_cru_base) { -+ pr_err("%s: could not map cru region\n", __func__); -+ return; -+ } ++ time_us = 1000; ++ pvtm_cnt = pvtm->info->get_value(pvtm, time_us); ++ pr_debug("get pvtm_cnt = %d\n", pvtm_cnt); + -+ ctx = rockchip_clk_init(np, rk3288_cru_base, CLK_NR_CLKS); -+ if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip clk init failed\n", __func__); -+ iounmap(rk3288_cru_base); -+ return; ++ /* set pvtm_div to get rate */ ++ div = DIV_ROUND_UP(1000 * pvtm_cnt, pvtm->rate); ++ if (div > pvtm->info->div_mask) { ++ pr_err("pvtm_div out of bounary! set max instead\n"); ++ div = pvtm->info->div_mask; + } -+ clks = ctx->clk_data.clks; + -+ rockchip_clk_register_plls(ctx, rk3288_pll_clks, -+ ARRAY_SIZE(rk3288_pll_clks), -+ RK3288_GRF_SOC_STATUS1); -+ rockchip_clk_register_branches(ctx, rk3288_clk_branches, -+ ARRAY_SIZE(rk3288_clk_branches)); ++ pr_debug("set div %d, rate %luKHZ\n", div, pvtm->rate); ++ ret = regmap_write(pvtm->grf, pvtm->info->con, ++ wr_msk_bit(div, pvtm->info->div_shift, ++ pvtm->info->div_mask)); ++ if (ret != 0) ++ goto out; + -+ if (soc == RK3288W_CRU) -+ rockchip_clk_register_branches(ctx, rk3288w_hclkvio_branch, -+ ARRAY_SIZE(rk3288w_hclkvio_branch)); -+ else -+ rockchip_clk_register_branches(ctx, rk3288_hclkvio_branch, -+ ARRAY_SIZE(rk3288_hclkvio_branch)); ++ /* pmu pvtm oscilator enable */ ++ ret = regmap_write(pvtm->grf, pvtm->info->con, ++ wr_msk_bit(1, 1, 0x1)); ++ if (ret != 0) ++ goto out; + -+ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", -+ 2, clks[PLL_APLL], clks[PLL_GPLL], -+ &rk3288_cpuclk_data, rk3288_cpuclk_rates, -+ ARRAY_SIZE(rk3288_cpuclk_rates)); ++ ret = pvtm->info->sel_enable(pvtm); ++out: ++ if (ret != 0) ++ pr_err("%s: fail to write register\n", __func__); + -+ rockchip_register_softrst(np, 12, -+ rk3288_cru_base + RK3288_SOFTRST_CON(0), -+ ROCKCHIP_SOFTRST_HIWORD_MASK); ++ return ret; ++} + -+ rockchip_register_restart_notifier(ctx, RK3288_GLB_SRST_FST, -+ rk3288_clk_shutdown); ++static int clock_pvtm_regitstor(struct device *dev, ++ struct rockchip_clock_pvtm *pvtm) ++{ ++ struct clk_init_data init = {}; ++ struct clk_hw *clk_hw; + -+ if (!psci_smp_available()) -+ register_syscore_ops(&rk3288_clk_syscore_ops); ++ /* Init the xin32k_pvtm */ ++ pvtm->info->init_freq(pvtm); + -+ rockchip_clk_of_add_provider(np, ctx); ++ init.parent_names = NULL; ++ init.num_parents = 0; ++ init.name = "xin32k_pvtm"; ++ init.ops = &xin32k_pvtm; + -+ if (!rk_dump_cru) -+ rk_dump_cru = rk3288_dump_cru; -+} ++ clk_hw = devm_kzalloc(dev, sizeof(*clk_hw), GFP_KERNEL); ++ if (!clk_hw) ++ return -ENOMEM; ++ clk_hw->init = &init; + -+static void __init rk3288_clk_init(struct device_node *np) -+{ -+ rk3288_common_init(np, RK3288_CRU); -+} -+CLK_OF_DECLARE(rk3288_cru, "rockchip,rk3288-cru", rk3288_clk_init); ++ /* optional override of the clockname */ ++ of_property_read_string_index(dev->of_node, "clock-output-names", ++ 0, &init.name); ++ pvtm->clk = devm_clk_register(dev, clk_hw); ++ if (IS_ERR(pvtm->clk)) ++ return PTR_ERR(pvtm->clk); + -+static void __init rk3288w_clk_init(struct device_node *np) -+{ -+ rk3288_common_init(np, RK3288W_CRU); ++ return of_clk_add_provider(dev->of_node, of_clk_src_simple_get, ++ pvtm->clk); +} -+CLK_OF_DECLARE(rk3288w_cru, "rockchip,rk3288w-cru", rk3288w_clk_init); -+ -+struct clk_rk3288_inits { -+ void (*inits)(struct device_node *np); -+}; + -+static const struct clk_rk3288_inits clk_rk3288_init = { -+ .inits = rk3288_clk_init, -+}; ++static const struct rockchip_clock_pvtm_info rk3368_pvtm_data = { ++ .con = 0x180, ++ .sta = 0x190, ++ .sel_con = 0x100, ++ .sel_shift = 6, ++ .sel_value = CLK_SEL_INTERNAL_PVTM, ++ .sel_mask = 0x1, ++ .div_shift = 2, ++ .div_mask = 0x3f, + -+static const struct clk_rk3288_inits clk_rk3288w_init = { -+ .inits = rk3288w_clk_init, ++ .sel_enable = rockchip_clock_sel_internal_pvtm, ++ .get_value = rockchip_clock_pvtm_get_value, ++ .init_freq = rockchip_clock_pvtm_init_freq, +}; + -+static const struct of_device_id clk_rk3288_match_table[] = { ++static const struct of_device_id rockchip_clock_pvtm_match[] = { + { -+ .compatible = "rockchip,rk3288-cru", -+ .data = &clk_rk3288_init, -+ }, { -+ .compatible = "rockchip,rk3288w-cru", -+ .data = &clk_rk3288w_init, ++ .compatible = "rockchip,rk3368-pvtm-clock", ++ .data = (void *)&rk3368_pvtm_data, + }, -+ { } ++ {} +}; -+MODULE_DEVICE_TABLE(of, clk_rk3288_match_table); ++MODULE_DEVICE_TABLE(of, rockchip_clock_pvtm_match); + -+static int __init clk_rk3288_probe(struct platform_device *pdev) ++static int rockchip_clock_pvtm_probe(struct platform_device *pdev) +{ ++ struct device *dev = &pdev->dev; + struct device_node *np = pdev->dev.of_node; + const struct of_device_id *match; -+ const struct clk_rk3288_inits *init_data; ++ struct rockchip_clock_pvtm *pvtm; ++ int error; ++ u32 rate; + -+ match = of_match_device(clk_rk3288_match_table, &pdev->dev); -+ if (!match || !match->data) ++ pvtm = devm_kzalloc(dev, sizeof(*pvtm), GFP_KERNEL); ++ if (!pvtm) ++ return -ENOMEM; ++ ++ match = of_match_node(rockchip_clock_pvtm_match, np); ++ if (!match) ++ return -ENXIO; ++ ++ pvtm->info = (const struct rockchip_clock_pvtm_info *)match->data; ++ if (!pvtm->info) + return -EINVAL; + -+ init_data = match->data; -+ if (init_data->inits) -+ init_data->inits(np); ++ if (!dev->parent || !dev->parent->of_node) ++ return -EINVAL; ++ ++ pvtm->grf = syscon_node_to_regmap(dev->parent->of_node); ++ if (IS_ERR(pvtm->grf)) ++ return PTR_ERR(pvtm->grf); ++ ++ if (!of_property_read_u32(np, "pvtm-rate", &rate)) ++ pvtm->rate = rate; ++ else ++ pvtm->rate = 32768; ++ ++ pvtm->pvtm_clk = devm_clk_get(&pdev->dev, "pvtm_pmu_clk"); ++ if (IS_ERR(pvtm->pvtm_clk)) { ++ error = PTR_ERR(pvtm->pvtm_clk); ++ if (error != -EPROBE_DEFER) ++ dev_err(&pdev->dev, ++ "failed to get pvtm core clock: %d\n", ++ error); ++ goto out_probe; ++ } ++ ++ error = clk_prepare_enable(pvtm->pvtm_clk); ++ if (error) { ++ dev_err(&pdev->dev, "failed to enable the clock: %d\n", ++ error); ++ goto out_probe; ++ } ++ ++ platform_set_drvdata(pdev, pvtm); ++ ++ error = clock_pvtm_regitstor(&pdev->dev, pvtm); ++ if (error) { ++ dev_err(&pdev->dev, "failed to registor clock: %d\n", ++ error); ++ goto out_clk_put; ++ } ++ ++ return error; ++ ++out_clk_put: ++ clk_disable_unprepare(pvtm->pvtm_clk); ++out_probe: ++ return error; ++} ++ ++static int rockchip_clock_pvtm_remove(struct platform_device *pdev) ++{ ++ struct rockchip_clock_pvtm *pvtm = platform_get_drvdata(pdev); ++ struct device_node *np = pdev->dev.of_node; ++ ++ of_clk_del_provider(np); ++ clk_disable_unprepare(pvtm->pvtm_clk); + + return 0; +} + -+static struct platform_driver clk_rk3288_driver = { -+ .driver = { -+ .name = "clk-rk3288", -+ .of_match_table = clk_rk3288_match_table, ++static struct platform_driver rockchip_clock_pvtm_driver = { ++ .driver = { ++ .name = "rockchip-clcok-pvtm", ++ .of_match_table = rockchip_clock_pvtm_match, + }, ++ .probe = rockchip_clock_pvtm_probe, ++ .remove = rockchip_clock_pvtm_remove, +}; -+builtin_platform_driver_probe(clk_rk3288_driver, clk_rk3288_probe); + -+MODULE_DESCRIPTION("Rockchip RK3288 Clock Driver"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/clk/rockchip-oh/clk-rk3308.c b/drivers/clk/rockchip-oh/clk-rk3308.c ++module_platform_driver(rockchip_clock_pvtm_driver); ++ ++MODULE_DESCRIPTION("Rockchip Clock Pvtm Driver"); ++MODULE_LICENSE("GPL v2"); +diff --git a/drivers/clk/rockchip-oh/clk-px30.c b/drivers/clk/rockchip-oh/clk-px30.c new file mode 100644 -index 000000000..6a8e38f11 +index 000000000..d76aaf04b --- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-rk3308.c -@@ -0,0 +1,1013 @@ ++++ b/drivers/clk/rockchip-oh/clk-px30.c +@@ -0,0 +1,1139 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* -+ * Copyright (c) 2019 Rockchip Electronics Co. Ltd. -+ * Author: Finley Xiao ++ * Copyright (c) 2018 Rockchip Electronics Co. Ltd. ++ * Author: Elaine Zhang + */ + +#include +#include ++#include +#include +#include -+#include +#include +#include +#include -+#include ++#include +#include "clk.h" + -+#define RK3308_GRF_SOC_STATUS0 0x380 ++#define PX30_GRF_SOC_STATUS0 0x480 + -+enum rk3308_plls { -+ apll, dpll, vpll0, vpll1, ++enum px30_plls { ++ apll, dpll, cpll, npll, apll_b_h, apll_b_l, +}; + -+static struct rockchip_pll_rate_table rk3308_pll_rates[] = { ++enum px30_pmu_plls { ++ gpll, ++}; ++ ++static struct rockchip_pll_rate_table px30_pll_rates[] = { + /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ + RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), + RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0), @@ -48691,873 +48853,937 @@ index 000000000..6a8e38f11 + { /* sentinel */ }, +}; + -+#define RK3308_DIV_ACLKM_MASK 0x7 -+#define RK3308_DIV_ACLKM_SHIFT 12 -+#define RK3308_DIV_PCLK_DBG_MASK 0xf -+#define RK3308_DIV_PCLK_DBG_SHIFT 8 ++#define PX30_DIV_ACLKM_MASK 0x7 ++#define PX30_DIV_ACLKM_SHIFT 12 ++#define PX30_DIV_PCLK_DBG_MASK 0xf ++#define PX30_DIV_PCLK_DBG_SHIFT 8 + -+#define RK3308_CLKSEL0(_aclk_core, _pclk_dbg) \ ++#define PX30_CLKSEL0(_aclk_core, _pclk_dbg) \ +{ \ -+ .reg = RK3308_CLKSEL_CON(0), \ -+ .val = HIWORD_UPDATE(_aclk_core, RK3308_DIV_ACLKM_MASK, \ -+ RK3308_DIV_ACLKM_SHIFT) | \ -+ HIWORD_UPDATE(_pclk_dbg, RK3308_DIV_PCLK_DBG_MASK, \ -+ RK3308_DIV_PCLK_DBG_SHIFT), \ ++ .reg = PX30_CLKSEL_CON(0), \ ++ .val = HIWORD_UPDATE(_aclk_core, PX30_DIV_ACLKM_MASK, \ ++ PX30_DIV_ACLKM_SHIFT) | \ ++ HIWORD_UPDATE(_pclk_dbg, PX30_DIV_PCLK_DBG_MASK, \ ++ PX30_DIV_PCLK_DBG_SHIFT), \ +} + -+#define RK3308_CPUCLK_RATE(_prate, _aclk_core, _pclk_dbg) \ ++#define PX30_CPUCLK_RATE(_prate, _aclk_core, _pclk_dbg) \ +{ \ + .prate = _prate, \ + .divs = { \ -+ RK3308_CLKSEL0(_aclk_core, _pclk_dbg), \ ++ PX30_CLKSEL0(_aclk_core, _pclk_dbg), \ + }, \ +} + -+static struct rockchip_cpuclk_rate_table rk3308_cpuclk_rates[] __initdata = { -+ RK3308_CPUCLK_RATE(1608000000, 1, 7), -+ RK3308_CPUCLK_RATE(1512000000, 1, 7), -+ RK3308_CPUCLK_RATE(1488000000, 1, 5), -+ RK3308_CPUCLK_RATE(1416000000, 1, 5), -+ RK3308_CPUCLK_RATE(1392000000, 1, 5), -+ RK3308_CPUCLK_RATE(1296000000, 1, 5), -+ RK3308_CPUCLK_RATE(1200000000, 1, 5), -+ RK3308_CPUCLK_RATE(1104000000, 1, 5), -+ RK3308_CPUCLK_RATE(1008000000, 1, 5), -+ RK3308_CPUCLK_RATE(912000000, 1, 5), -+ RK3308_CPUCLK_RATE(816000000, 1, 3), -+ RK3308_CPUCLK_RATE(696000000, 1, 3), -+ RK3308_CPUCLK_RATE(600000000, 1, 3), -+ RK3308_CPUCLK_RATE(408000000, 1, 1), -+ RK3308_CPUCLK_RATE(312000000, 1, 1), -+ RK3308_CPUCLK_RATE(216000000, 1, 1), -+ RK3308_CPUCLK_RATE(96000000, 1, 1), ++static struct rockchip_cpuclk_rate_table px30_cpuclk_rates[] __initdata = { ++ PX30_CPUCLK_RATE(1608000000, 1, 7), ++ PX30_CPUCLK_RATE(1584000000, 1, 7), ++ PX30_CPUCLK_RATE(1560000000, 1, 7), ++ PX30_CPUCLK_RATE(1536000000, 1, 7), ++ PX30_CPUCLK_RATE(1512000000, 1, 7), ++ PX30_CPUCLK_RATE(1488000000, 1, 5), ++ PX30_CPUCLK_RATE(1464000000, 1, 5), ++ PX30_CPUCLK_RATE(1440000000, 1, 5), ++ PX30_CPUCLK_RATE(1416000000, 1, 5), ++ PX30_CPUCLK_RATE(1392000000, 1, 5), ++ PX30_CPUCLK_RATE(1368000000, 1, 5), ++ PX30_CPUCLK_RATE(1344000000, 1, 5), ++ PX30_CPUCLK_RATE(1320000000, 1, 5), ++ PX30_CPUCLK_RATE(1296000000, 1, 5), ++ PX30_CPUCLK_RATE(1272000000, 1, 5), ++ PX30_CPUCLK_RATE(1248000000, 1, 5), ++ PX30_CPUCLK_RATE(1224000000, 1, 5), ++ PX30_CPUCLK_RATE(1200000000, 1, 5), ++ PX30_CPUCLK_RATE(1104000000, 1, 5), ++ PX30_CPUCLK_RATE(1008000000, 1, 5), ++ PX30_CPUCLK_RATE(912000000, 1, 5), ++ PX30_CPUCLK_RATE(816000000, 1, 3), ++ PX30_CPUCLK_RATE(696000000, 1, 3), ++ PX30_CPUCLK_RATE(600000000, 1, 3), ++ PX30_CPUCLK_RATE(408000000, 1, 1), ++ PX30_CPUCLK_RATE(312000000, 1, 1), ++ PX30_CPUCLK_RATE(216000000, 1, 1), ++ PX30_CPUCLK_RATE(96000000, 1, 1), +}; + -+static const struct rockchip_cpuclk_reg_data rk3308_cpuclk_data = { -+ .core_reg[0] = RK3308_CLKSEL_CON(0), ++static const struct rockchip_cpuclk_reg_data px30_cpuclk_data = { ++ .core_reg[0] = PX30_CLKSEL_CON(0), + .div_core_shift[0] = 0, + .div_core_mask[0] = 0xf, + .num_cores = 1, + .mux_core_alt = 1, + .mux_core_main = 0, -+ .mux_core_shift = 6, -+ .mux_core_mask = 0x3, ++ .mux_core_shift = 7, ++ .mux_core_mask = 0x1, ++ .pll_name = "pll_apll", +}; + -+PNAME(mux_pll_p) = { "xin24m" }; -+PNAME(mux_usb480m_p) = { "xin24m", "usb480m_phy", "clk_rtc32k" }; -+PNAME(mux_dpll_vpll0_p) = { "dpll", "vpll0" }; -+PNAME(mux_dpll_vpll0_xin24m_p) = { "dpll", "vpll0", "xin24m" }; -+PNAME(mux_dpll_vpll0_vpll1_p) = { "dpll", "vpll0", "vpll1" }; -+PNAME(mux_dpll_vpll0_vpll1_xin24m_p) = { "dpll", "vpll0", "vpll1", "xin24m" }; -+PNAME(mux_dpll_vpll0_vpll1_usb480m_xin24m_p) = { "dpll", "vpll0", "vpll1", "usb480m", "xin24m" }; -+PNAME(mux_vpll0_vpll1_p) = { "vpll0", "vpll1" }; -+PNAME(mux_vpll0_vpll1_xin24m_p) = { "vpll0", "vpll1", "xin24m" }; -+PNAME(mux_uart0_p) = { "clk_uart0_src", "dummy", "clk_uart0_frac" }; -+PNAME(mux_uart1_p) = { "clk_uart1_src", "dummy", "clk_uart1_frac" }; -+PNAME(mux_uart2_p) = { "clk_uart2_src", "dummy", "clk_uart2_frac" }; -+PNAME(mux_uart3_p) = { "clk_uart3_src", "dummy", "clk_uart3_frac" }; -+PNAME(mux_uart4_p) = { "clk_uart4_src", "dummy", "clk_uart4_frac" }; -+PNAME(mux_dclk_vop_p) = { "dclk_vop_src", "dclk_vop_frac", "xin24m" }; ++PNAME(mux_pll_p) = { "xin24m"}; ++PNAME(mux_usb480m_p) = { "xin24m", "usb480m_phy", "clk_rtc32k_pmu" }; ++PNAME(mux_ddrphy_p) = { "dpll_ddr", "gpll_ddr" }; ++PNAME(mux_ddrstdby_p) = { "clk_ddrphy1x", "clk_stdby_2wrap" }; ++PNAME(mux_gpll_dmycpll_usb480m_npll_p) = { "gpll", "dummy_cpll", "usb480m", "npll" }; ++PNAME(mux_gpll_dmycpll_usb480m_dmynpll_p) = { "gpll", "dummy_cpll", "usb480m", "dummy_npll" }; ++PNAME(mux_cpll_npll_p) = { "cpll", "npll" }; ++PNAME(mux_npll_cpll_p) = { "npll", "cpll" }; ++PNAME(mux_gpll_cpll_p) = { "gpll", "dummy_cpll" }; ++PNAME(mux_gpll_npll_p) = { "gpll", "dummy_npll" }; ++PNAME(mux_gpll_xin24m_p) = { "gpll", "xin24m"}; ++PNAME(mux_xin24m_gpll_p) = { "xin24m", "gpll"}; ++PNAME(mux_gpll_cpll_npll_p) = { "gpll", "dummy_cpll", "dummy_npll" }; ++PNAME(mux_gpll_cpll_npll_xin24m_p) = { "gpll", "dummy_cpll", "dummy_npll", "xin24m" }; ++PNAME(mux_gpll_xin24m_npll_p) = { "gpll", "xin24m", "dummy_npll"}; ++PNAME(mux_pdm_p) = { "clk_pdm_src", "clk_pdm_frac" }; ++PNAME(mux_i2s0_tx_p) = { "clk_i2s0_tx_src", "clk_i2s0_tx_frac", "mclk_i2s0_tx_in", "xin12m"}; ++PNAME(mux_i2s0_rx_p) = { "clk_i2s0_rx_src", "clk_i2s0_rx_frac", "mclk_i2s0_rx_in", "xin12m"}; ++PNAME(mux_i2s1_p) = { "clk_i2s1_src", "clk_i2s1_frac", "i2s1_clkin", "xin12m"}; ++PNAME(mux_i2s2_p) = { "clk_i2s2_src", "clk_i2s2_frac", "i2s2_clkin", "xin12m"}; ++PNAME(mux_i2s0_tx_out_p) = { "clk_i2s0_tx", "xin12m", "clk_i2s0_rx"}; ++PNAME(mux_i2s0_rx_out_p) = { "clk_i2s0_rx", "xin12m", "clk_i2s0_tx"}; ++PNAME(mux_i2s1_out_p) = { "clk_i2s1", "xin12m"}; ++PNAME(mux_i2s2_out_p) = { "clk_i2s2", "xin12m"}; ++PNAME(mux_i2s0_tx_rx_p) = { "clk_i2s0_tx_mux", "clk_i2s0_rx_mux"}; ++PNAME(mux_i2s0_rx_tx_p) = { "clk_i2s0_rx_mux", "clk_i2s0_tx_mux"}; ++PNAME(mux_uart_src_p) = { "gpll", "xin24m", "usb480m", "dummy_npll" }; ++PNAME(mux_uart1_p) = { "clk_uart1_src", "clk_uart1_np5", "clk_uart1_frac" }; ++PNAME(mux_uart2_p) = { "clk_uart2_src", "clk_uart2_np5", "clk_uart2_frac" }; ++PNAME(mux_uart3_p) = { "clk_uart3_src", "clk_uart3_np5", "clk_uart3_frac" }; ++PNAME(mux_uart4_p) = { "clk_uart4_src", "clk_uart4_np5", "clk_uart4_frac" }; ++PNAME(mux_uart5_p) = { "clk_uart5_src", "clk_uart5_np5", "clk_uart5_frac" }; ++PNAME(mux_cif_out_p) = { "xin24m", "dummy_cpll", "dummy_npll", "usb480m" }; ++PNAME(mux_dclk_vopb_p) = { "dclk_vopb_src", "dummy", "xin24m" }; ++PNAME(mux_dclk_vopl_p) = { "dclk_vopl_src", "dummy", "xin24m" }; +PNAME(mux_nandc_p) = { "clk_nandc_div", "clk_nandc_div50" }; -+PNAME(mux_sdmmc_p) = { "clk_sdmmc_div", "clk_sdmmc_div50" }; +PNAME(mux_sdio_p) = { "clk_sdio_div", "clk_sdio_div50" }; +PNAME(mux_emmc_p) = { "clk_emmc_div", "clk_emmc_div50" }; -+PNAME(mux_mac_p) = { "clk_mac_src", "mac_clkin" }; -+PNAME(mux_mac_rmii_sel_p) = { "clk_mac_rx_tx_div20", "clk_mac_rx_tx_div2" }; -+PNAME(mux_ddrstdby_p) = { "clk_ddrphy1x_out", "clk_ddr_stdby_div4" }; -+PNAME(mux_rtc32k_p) = { "xin32k", "clk_pvtm_32k", "clk_rtc32k_frac", "clk_rtc32k_div" }; -+PNAME(mux_usbphy_ref_p) = { "xin24m", "clk_usbphy_ref_src" }; -+PNAME(mux_wifi_src_p) = { "clk_wifi_dpll", "clk_wifi_vpll0" }; -+PNAME(mux_wifi_p) = { "clk_wifi_osc", "clk_wifi_src" }; -+PNAME(mux_pdm_p) = { "clk_pdm_src", "clk_pdm_frac" }; -+PNAME(mux_i2s0_8ch_tx_p) = { "clk_i2s0_8ch_tx_src", "clk_i2s0_8ch_tx_frac", "mclk_i2s0_8ch_in" }; -+PNAME(mux_i2s0_8ch_tx_rx_p) = { "clk_i2s0_8ch_tx_mux", "clk_i2s0_8ch_rx_mux"}; -+PNAME(mux_i2s0_8ch_tx_out_p) = { "clk_i2s0_8ch_tx", "xin12m" }; -+PNAME(mux_i2s0_8ch_rx_p) = { "clk_i2s0_8ch_rx_src", "clk_i2s0_8ch_rx_frac", "mclk_i2s0_8ch_in" }; -+PNAME(mux_i2s0_8ch_rx_tx_p) = { "clk_i2s0_8ch_rx_mux", "clk_i2s0_8ch_tx_mux"}; -+PNAME(mux_i2s1_8ch_tx_p) = { "clk_i2s1_8ch_tx_src", "clk_i2s1_8ch_tx_frac", "mclk_i2s1_8ch_in" }; -+PNAME(mux_i2s1_8ch_tx_rx_p) = { "clk_i2s1_8ch_tx_mux", "clk_i2s1_8ch_rx_mux"}; -+PNAME(mux_i2s1_8ch_tx_out_p) = { "clk_i2s1_8ch_tx", "xin12m" }; -+PNAME(mux_i2s1_8ch_rx_p) = { "clk_i2s1_8ch_rx_src", "clk_i2s1_8ch_rx_frac", "mclk_i2s1_8ch_in" }; -+PNAME(mux_i2s1_8ch_rx_tx_p) = { "clk_i2s1_8ch_rx_mux", "clk_i2s1_8ch_tx_mux"}; -+PNAME(mux_i2s2_8ch_tx_p) = { "clk_i2s2_8ch_tx_src", "clk_i2s2_8ch_tx_frac", "mclk_i2s2_8ch_in" }; -+PNAME(mux_i2s2_8ch_tx_rx_p) = { "clk_i2s2_8ch_tx_mux", "clk_i2s2_8ch_rx_mux"}; -+PNAME(mux_i2s2_8ch_tx_out_p) = { "clk_i2s2_8ch_tx", "xin12m" }; -+PNAME(mux_i2s2_8ch_rx_p) = { "clk_i2s2_8ch_rx_src", "clk_i2s2_8ch_rx_frac", "mclk_i2s2_8ch_in" }; -+PNAME(mux_i2s2_8ch_rx_tx_p) = { "clk_i2s2_8ch_rx_mux", "clk_i2s2_8ch_tx_mux"}; -+PNAME(mux_i2s3_8ch_tx_p) = { "clk_i2s3_8ch_tx_src", "clk_i2s3_8ch_tx_frac", "mclk_i2s3_8ch_in" }; -+PNAME(mux_i2s3_8ch_tx_rx_p) = { "clk_i2s3_8ch_tx_mux", "clk_i2s3_8ch_rx_mux"}; -+PNAME(mux_i2s3_8ch_tx_out_p) = { "clk_i2s3_8ch_tx", "xin12m" }; -+PNAME(mux_i2s3_8ch_rx_p) = { "clk_i2s3_8ch_rx_src", "clk_i2s3_8ch_rx_frac", "mclk_i2s3_8ch_in" }; -+PNAME(mux_i2s3_8ch_rx_tx_p) = { "clk_i2s3_8ch_rx_mux", "clk_i2s3_8ch_tx_mux"}; -+PNAME(mux_i2s0_2ch_p) = { "clk_i2s0_2ch_src", "clk_i2s0_2ch_frac", "mclk_i2s0_2ch_in" }; -+PNAME(mux_i2s0_2ch_out_p) = { "clk_i2s0_2ch", "xin12m" }; -+PNAME(mux_i2s1_2ch_p) = { "clk_i2s1_2ch_src", "clk_i2s1_2ch_frac", "mclk_i2s1_2ch_in"}; -+PNAME(mux_i2s1_2ch_out_p) = { "clk_i2s1_2ch", "xin12m" }; -+PNAME(mux_spdif_tx_src_p) = { "clk_spdif_tx_div", "clk_spdif_tx_div50" }; -+PNAME(mux_spdif_tx_p) = { "clk_spdif_tx_src", "clk_spdif_tx_frac", "mclk_i2s0_2ch_in" }; -+PNAME(mux_spdif_rx_src_p) = { "clk_spdif_rx_div", "clk_spdif_rx_div50" }; -+PNAME(mux_spdif_rx_p) = { "clk_spdif_rx_src", "clk_spdif_rx_frac" }; -+PNAME(mux_uart_src_p) = { "usb480m", "xin24m", "dpll", "vpll0", "vpll1" }; -+static u32 uart_src_mux_idx[] = { 3, 4, 0, 1, 2 }; ++PNAME(mux_sdmmc_p) = { "clk_sdmmc_div", "clk_sdmmc_div50" }; ++PNAME(mux_gmac_p) = { "clk_gmac_src", "gmac_clkin" }; ++PNAME(mux_gmac_rmii_sel_p) = { "clk_gmac_rx_tx_div20", "clk_gmac_rx_tx_div2" }; ++PNAME(mux_rtc32k_pmu_p) = { "xin32k", "pmu_pvtm_32k", "clk_rtc32k_frac", }; ++PNAME(mux_wifi_pmu_p) = { "xin24m", "clk_wifi_pmu_src" }; ++PNAME(mux_uart0_pmu_p) = { "clk_uart0_pmu_src", "clk_uart0_np5", "clk_uart0_frac" }; ++PNAME(mux_usbphy_ref_p) = { "xin24m", "clk_ref24m_pmu" }; ++PNAME(mux_mipidsiphy_ref_p) = { "xin24m", "clk_ref24m_pmu" }; ++PNAME(mux_gpu_p) = { "clk_gpu_div", "clk_gpu_np5" }; + -+static struct rockchip_pll_clock rk3308_pll_clks[] __initdata = { ++static struct rockchip_pll_clock px30_pll_clks[] __initdata = { + [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, -+ 0, RK3308_PLL_CON(0), -+ RK3308_MODE_CON, 0, 0, 0, rk3308_pll_rates), ++ CLK_IS_CRITICAL, PX30_PLL_CON(0), ++ PX30_MODE_CON, 0, 0, 0, px30_pll_rates), + [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p, -+ 0, RK3308_PLL_CON(8), -+ RK3308_MODE_CON, 2, 1, 0, rk3308_pll_rates), -+ [vpll0] = PLL(pll_rk3328, PLL_VPLL0, "vpll0", mux_pll_p, -+ 0, RK3308_PLL_CON(16), -+ RK3308_MODE_CON, 4, 2, 0, rk3308_pll_rates), -+ [vpll1] = PLL(pll_rk3328, PLL_VPLL1, "vpll1", mux_pll_p, -+ 0, RK3308_PLL_CON(24), -+ RK3308_MODE_CON, 6, 3, 0, rk3308_pll_rates), ++ CLK_IS_CRITICAL, PX30_PLL_CON(8), ++ PX30_MODE_CON, 4, 1, 0, NULL), ++ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, ++ 0, PX30_PLL_CON(16), ++ PX30_MODE_CON, 2, 2, 0, px30_pll_rates), ++ [npll] = PLL(pll_rk3328, PLL_NPLL, "npll", mux_pll_p, ++ CLK_IS_CRITICAL, PX30_PLL_CON(24), ++ PX30_MODE_CON, 6, 4, 0, px30_pll_rates), ++}; ++ ++static struct rockchip_pll_clock px30_pmu_pll_clks[] __initdata = { ++ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, 0, PX30_PMU_PLL_CON(0), ++ PX30_PMU_MODE, 0, 3, 0, px30_pll_rates), +}; + +#define MFLAGS CLK_MUX_HIWORD_MASK +#define DFLAGS CLK_DIVIDER_HIWORD_MASK +#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) + -+static struct rockchip_clk_branch rk3308_uart0_fracmux __initdata = -+ MUX(0, "clk_uart0_mux", mux_uart0_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(11), 14, 2, MFLAGS); ++static struct rockchip_clk_branch px30_pdm_fracmux __initdata = ++ MUX(0, "clk_pdm_mux", mux_pdm_p, CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(26), 15, 1, MFLAGS); + -+static struct rockchip_clk_branch rk3308_uart1_fracmux __initdata = ++static struct rockchip_clk_branch px30_i2s0_tx_fracmux __initdata = ++ MUX(SCLK_I2S0_TX_MUX, "clk_i2s0_tx_mux", mux_i2s0_tx_p, CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(28), 10, 2, MFLAGS); ++ ++static struct rockchip_clk_branch px30_i2s0_rx_fracmux __initdata = ++ MUX(SCLK_I2S0_RX_MUX, "clk_i2s0_rx_mux", mux_i2s0_rx_p, CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(58), 10, 2, MFLAGS); ++ ++static struct rockchip_clk_branch px30_i2s1_fracmux __initdata = ++ MUX(0, "clk_i2s1_mux", mux_i2s1_p, CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(30), 10, 2, MFLAGS); ++ ++static struct rockchip_clk_branch px30_i2s2_fracmux __initdata = ++ MUX(0, "clk_i2s2_mux", mux_i2s2_p, CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(32), 10, 2, MFLAGS); ++ ++static struct rockchip_clk_branch px30_uart1_fracmux __initdata = + MUX(0, "clk_uart1_mux", mux_uart1_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(14), 14, 2, MFLAGS); ++ PX30_CLKSEL_CON(35), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3308_uart2_fracmux __initdata = ++static struct rockchip_clk_branch px30_uart2_fracmux __initdata = + MUX(0, "clk_uart2_mux", mux_uart2_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(17), 14, 2, MFLAGS); ++ PX30_CLKSEL_CON(38), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3308_uart3_fracmux __initdata = ++static struct rockchip_clk_branch px30_uart3_fracmux __initdata = + MUX(0, "clk_uart3_mux", mux_uart3_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(20), 14, 2, MFLAGS); ++ PX30_CLKSEL_CON(41), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3308_uart4_fracmux __initdata = ++static struct rockchip_clk_branch px30_uart4_fracmux __initdata = + MUX(0, "clk_uart4_mux", mux_uart4_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(23), 14, 2, MFLAGS); -+ -+static struct rockchip_clk_branch rk3308_dclk_vop_fracmux __initdata = -+ MUX(0, "dclk_vop_mux", mux_dclk_vop_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(8), 14, 2, MFLAGS); -+ -+static struct rockchip_clk_branch rk3308_rtc32k_fracmux __initdata = -+ MUX(SCLK_RTC32K, "clk_rtc32k", mux_rtc32k_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(2), 8, 2, MFLAGS); -+ -+static struct rockchip_clk_branch rk3308_pdm_fracmux __initdata = -+ MUX(0, "clk_pdm_mux", mux_pdm_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(46), 15, 1, MFLAGS); ++ PX30_CLKSEL_CON(44), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3308_i2s0_8ch_tx_fracmux __initdata = -+ MUX(SCLK_I2S0_8CH_TX_MUX, "clk_i2s0_8ch_tx_mux", mux_i2s0_8ch_tx_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(52), 10, 2, MFLAGS); ++static struct rockchip_clk_branch px30_uart5_fracmux __initdata = ++ MUX(0, "clk_uart5_mux", mux_uart5_p, CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(47), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3308_i2s0_8ch_rx_fracmux __initdata = -+ MUX(SCLK_I2S0_8CH_RX_MUX, "clk_i2s0_8ch_rx_mux", mux_i2s0_8ch_rx_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(54), 10, 2, MFLAGS); ++static struct rockchip_clk_branch px30_rtc32k_pmu_fracmux __initdata = ++ MUX(SCLK_RTC32K_PMU, "clk_rtc32k_pmu", mux_rtc32k_pmu_p, CLK_SET_RATE_PARENT | CLK_IS_CRITICAL, ++ PX30_PMU_CLKSEL_CON(0), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3308_i2s1_8ch_tx_fracmux __initdata = -+ MUX(SCLK_I2S1_8CH_TX_MUX, "clk_i2s1_8ch_tx_mux", mux_i2s1_8ch_tx_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(56), 10, 2, MFLAGS); ++static struct rockchip_clk_branch px30_uart0_pmu_fracmux __initdata = ++ MUX(0, "clk_uart0_pmu_mux", mux_uart0_pmu_p, CLK_SET_RATE_PARENT, ++ PX30_PMU_CLKSEL_CON(4), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3308_i2s1_8ch_rx_fracmux __initdata = -+ MUX(SCLK_I2S1_8CH_RX_MUX, "clk_i2s1_8ch_rx_mux", mux_i2s1_8ch_rx_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(58), 10, 2, MFLAGS); ++static struct rockchip_clk_branch px30_clk_branches[] __initdata = { ++ /* ++ * Clock-Architecture Diagram 1 ++ */ + -+static struct rockchip_clk_branch rk3308_i2s2_8ch_tx_fracmux __initdata = -+ MUX(SCLK_I2S2_8CH_TX_MUX, "clk_i2s2_8ch_tx_mux", mux_i2s2_8ch_tx_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(60), 10, 2, MFLAGS); ++ MUX(USB480M, "usb480m", mux_usb480m_p, CLK_SET_RATE_PARENT, ++ PX30_MODE_CON, 8, 2, MFLAGS), ++ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), + -+static struct rockchip_clk_branch rk3308_i2s2_8ch_rx_fracmux __initdata = -+ MUX(SCLK_I2S2_8CH_RX_MUX, "clk_i2s2_8ch_rx_mux", mux_i2s2_8ch_rx_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(62), 10, 2, MFLAGS); ++ /* ++ * Clock-Architecture Diagram 3 ++ */ + -+static struct rockchip_clk_branch rk3308_i2s3_8ch_tx_fracmux __initdata = -+ MUX(SCLK_I2S3_8CH_TX_MUX, "clk_i2s3_8ch_tx_mux", mux_i2s3_8ch_tx_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(64), 10, 2, MFLAGS); ++ /* PD_CORE */ ++ GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(0), 0, GFLAGS), ++ GATE(0, "gpll_core", "gpll", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE_NOMUX(0, "pclk_dbg", "armclk", CLK_IGNORE_UNUSED, ++ PX30_CLKSEL_CON(0), 8, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ PX30_CLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE_NOMUX(0, "aclk_core", "armclk", CLK_IGNORE_UNUSED, ++ PX30_CLKSEL_CON(0), 12, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ PX30_CLKGATE_CON(0), 1, GFLAGS), ++ GATE(0, "aclk_core_niu", "aclk_core", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(0), 4, GFLAGS), ++ GATE(0, "aclk_core_prf", "aclk_core", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(17), 5, GFLAGS), ++ GATE(0, "pclk_dbg_niu", "pclk_dbg", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(0), 5, GFLAGS), ++ GATE(0, "pclk_core_dbg", "pclk_dbg", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(0), 6, GFLAGS), ++ GATE(0, "pclk_core_grf", "pclk_dbg", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(17), 6, GFLAGS), + -+static struct rockchip_clk_branch rk3308_i2s3_8ch_rx_fracmux __initdata = -+ MUX(SCLK_I2S3_8CH_RX_MUX, "clk_i2s3_8ch_rx_mux", mux_i2s3_8ch_rx_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(66), 10, 2, MFLAGS); ++ GATE(0, "clk_jtag", "jtag_clkin", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(0), 3, GFLAGS), ++ GATE(SCLK_PVTM, "clk_pvtm", "xin24m", 0, ++ PX30_CLKGATE_CON(17), 4, GFLAGS), + -+static struct rockchip_clk_branch rk3308_i2s0_2ch_fracmux __initdata = -+ MUX(0, "clk_i2s0_2ch_mux", mux_i2s0_2ch_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(68), 10, 2, MFLAGS); ++ /* PD_GPU */ ++ GATE(SCLK_GPU, "clk_gpu", "clk_gpu_src", 0, ++ PX30_CLKGATE_CON(0), 10, GFLAGS), ++ COMPOSITE_NOMUX(0, "aclk_gpu", "clk_gpu", CLK_IGNORE_UNUSED, ++ PX30_CLKSEL_CON(1), 13, 2, DFLAGS, ++ PX30_CLKGATE_CON(17), 10, GFLAGS), ++ GATE(0, "aclk_gpu_niu", "aclk_gpu", CLK_IS_CRITICAL, ++ PX30_CLKGATE_CON(0), 11, GFLAGS), ++ GATE(0, "aclk_gpu_prf", "aclk_gpu", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(17), 8, GFLAGS), ++ GATE(0, "pclk_gpu_grf", "aclk_gpu", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(17), 9, GFLAGS), + -+static struct rockchip_clk_branch rk3308_i2s1_2ch_fracmux __initdata = -+ MUX(0, "clk_i2s1_2ch_mux", mux_i2s1_2ch_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(70), 10, 2, MFLAGS); ++ /* ++ * Clock-Architecture Diagram 4 ++ */ + -+static struct rockchip_clk_branch rk3308_spdif_tx_fracmux __initdata = -+ MUX(0, "clk_spdif_tx_mux", mux_spdif_tx_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(48), 14, 2, MFLAGS); ++ /* PD_DDR */ ++ GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(0), 7, GFLAGS), ++ GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(0), 13, GFLAGS), ++ COMPOSITE_DDRCLK(SCLK_DDRCLK, "sclk_ddrc", mux_ddrphy_p, ++ CLK_IGNORE_UNUSED, PX30_CLKSEL_CON(2), 7, 1, 0, 3, ++ ROCKCHIP_DDRCLK_SIP_V2), ++ COMPOSITE_NODIV(0, "clk_ddrstdby", mux_ddrstdby_p, CLK_IGNORE_UNUSED, ++ PX30_CLKSEL_CON(2), 4, 1, MFLAGS, ++ PX30_CLKGATE_CON(1), 13, GFLAGS), ++ GATE(0, "aclk_split", "clk_ddrphy1x", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(1), 15, GFLAGS), ++ GATE(0, "clk_msch", "clk_ddrphy1x", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(1), 8, GFLAGS), ++ GATE(0, "aclk_ddrc", "clk_ddrphy1x", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(1), 5, GFLAGS), ++ GATE(0, "clk_core_ddrc", "clk_ddrphy1x", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(1), 6, GFLAGS), ++ GATE(0, "aclk_cmd_buff", "clk_ddrphy1x", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(1), 6, GFLAGS), ++ GATE(0, "clk_ddrmon", "clk_ddrphy1x", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(1), 11, GFLAGS), + -+static struct rockchip_clk_branch rk3308_spdif_rx_fracmux __initdata = -+ MUX(0, "clk_spdif_rx_mux", mux_spdif_rx_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(50), 15, 1, MFLAGS); ++ GATE(0, "clk_ddrmon_timer", "xin24m", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(0), 15, GFLAGS), + ++ COMPOSITE_NOMUX(PCLK_DDR, "pclk_ddr", "gpll", CLK_IGNORE_UNUSED, ++ PX30_CLKSEL_CON(2), 8, 5, DFLAGS, ++ PX30_CLKGATE_CON(1), 1, GFLAGS), ++ GATE(0, "pclk_ddrmon", "pclk_ddr", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(1), 10, GFLAGS), ++ GATE(0, "pclk_ddrc", "pclk_ddr", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(1), 7, GFLAGS), ++ GATE(0, "pclk_msch", "pclk_ddr", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(1), 9, GFLAGS), ++ GATE(0, "pclk_stdby", "pclk_ddr", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(1), 12, GFLAGS), ++ GATE(0, "pclk_ddr_grf", "pclk_ddr", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(1), 14, GFLAGS), ++ GATE(0, "pclk_cmdbuff", "pclk_ddr", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(1), 3, GFLAGS), + -+static struct rockchip_clk_branch rk3308_clk_branches[] __initdata = { + /* -+ * Clock-Architecture Diagram 1 ++ * Clock-Architecture Diagram 5 + */ + -+ MUX(USB480M, "usb480m", mux_usb480m_p, CLK_SET_RATE_PARENT, -+ RK3308_MODE_CON, 8, 2, MFLAGS), -+ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), ++ /* PD_VI */ ++ COMPOSITE(ACLK_VI_PRE, "aclk_vi_pre", mux_gpll_cpll_npll_p, 0, ++ PX30_CLKSEL_CON(11), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ PX30_CLKGATE_CON(4), 8, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_VI_PRE, "hclk_vi_pre", "aclk_vi_pre", 0, ++ PX30_CLKSEL_CON(11), 8, 4, DFLAGS, ++ PX30_CLKGATE_CON(4), 12, GFLAGS), ++ COMPOSITE(SCLK_ISP, "clk_isp", mux_gpll_cpll_npll_p, 0, ++ PX30_CLKSEL_CON(12), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ PX30_CLKGATE_CON(4), 9, GFLAGS), ++ COMPOSITE(SCLK_CIF_OUT, "clk_cif_out", mux_cif_out_p, 0, ++ PX30_CLKSEL_CON(13), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ PX30_CLKGATE_CON(4), 11, GFLAGS), ++ GATE(PCLK_ISP, "pclkin_isp", "ext_pclkin", 0, ++ PX30_CLKGATE_CON(4), 13, GFLAGS), ++ GATE(PCLK_CIF, "pclkin_cif", "ext_pclkin", 0, ++ PX30_CLKGATE_CON(4), 14, GFLAGS), + + /* -+ * Clock-Architecture Diagram 2 ++ * Clock-Architecture Diagram 6 + */ + -+ GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED, -+ RK3308_CLKGATE_CON(0), 0, GFLAGS), -+ GATE(0, "vpll0_core", "vpll0", CLK_IGNORE_UNUSED, -+ RK3308_CLKGATE_CON(0), 0, GFLAGS), -+ GATE(0, "vpll1_core", "vpll1", CLK_IGNORE_UNUSED, -+ RK3308_CLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE_NOMUX(0, "pclk_core_dbg", "armclk", CLK_IGNORE_UNUSED, -+ RK3308_CLKSEL_CON(0), 8, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3308_CLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE_NOMUX(0, "aclk_core", "armclk", CLK_IGNORE_UNUSED, -+ RK3308_CLKSEL_CON(0), 12, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3308_CLKGATE_CON(0), 1, GFLAGS), ++ /* PD_VO */ ++ COMPOSITE(ACLK_VO_PRE, "aclk_vo_pre", mux_gpll_cpll_npll_p, 0, ++ PX30_CLKSEL_CON(3), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ PX30_CLKGATE_CON(2), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_VO_PRE, "hclk_vo_pre", "aclk_vo_pre", 0, ++ PX30_CLKSEL_CON(3), 8, 4, DFLAGS, ++ PX30_CLKGATE_CON(2), 12, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_VO_PRE, "pclk_vo_pre", "aclk_vo_pre", 0, ++ PX30_CLKSEL_CON(3), 12, 4, DFLAGS, ++ PX30_CLKGATE_CON(2), 13, GFLAGS), ++ COMPOSITE(SCLK_RGA_CORE, "clk_rga_core", mux_gpll_cpll_npll_p, 0, ++ PX30_CLKSEL_CON(4), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ PX30_CLKGATE_CON(2), 1, GFLAGS), + -+ GATE(0, "clk_jtag", "jtag_clkin", CLK_IGNORE_UNUSED, -+ RK3308_CLKGATE_CON(0), 3, GFLAGS), ++ COMPOSITE(SCLK_VOPB_PWM, "clk_vopb_pwm", mux_gpll_xin24m_p, 0, ++ PX30_CLKSEL_CON(7), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ PX30_CLKGATE_CON(2), 5, GFLAGS), ++ COMPOSITE(0, "dclk_vopb_src", mux_cpll_npll_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ PX30_CLKSEL_CON(5), 11, 1, MFLAGS, 0, 8, DFLAGS, ++ PX30_CLKGATE_CON(2), 2, GFLAGS), ++ COMPOSITE_NODIV(DCLK_VOPB, "dclk_vopb", mux_dclk_vopb_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ PX30_CLKSEL_CON(5), 14, 2, MFLAGS, ++ PX30_CLKGATE_CON(2), 4, GFLAGS), ++ COMPOSITE(0, "dclk_vopl_src", mux_npll_cpll_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ PX30_CLKSEL_CON(8), 11, 1, MFLAGS, 0, 8, DFLAGS, ++ PX30_CLKGATE_CON(2), 6, GFLAGS), ++ COMPOSITE_NODIV(DCLK_VOPL, "dclk_vopl", mux_dclk_vopl_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ PX30_CLKSEL_CON(8), 14, 2, MFLAGS, ++ PX30_CLKGATE_CON(2), 8, GFLAGS), + -+ GATE(SCLK_PVTM_CORE, "clk_pvtm_core", "xin24m", 0, -+ RK3308_CLKGATE_CON(0), 4, GFLAGS), ++ /* PD_VPU */ ++ COMPOSITE(0, "aclk_vpu_pre", mux_gpll_cpll_npll_p, 0, ++ PX30_CLKSEL_CON(10), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ PX30_CLKGATE_CON(4), 0, GFLAGS), ++ COMPOSITE_NOMUX(0, "hclk_vpu_pre", "aclk_vpu_pre", 0, ++ PX30_CLKSEL_CON(10), 8, 4, DFLAGS, ++ PX30_CLKGATE_CON(4), 2, GFLAGS), ++ COMPOSITE(SCLK_CORE_VPU, "sclk_core_vpu", mux_gpll_cpll_npll_p, 0, ++ PX30_CLKSEL_CON(13), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ PX30_CLKGATE_CON(4), 1, GFLAGS), + + /* -+ * Clock-Architecture Diagram 3 ++ * Clock-Architecture Diagram 7 + */ + -+ COMPOSITE_NODIV(ACLK_BUS_SRC, "clk_bus_src", mux_dpll_vpll0_vpll1_p, CLK_IS_CRITICAL, -+ RK3308_CLKSEL_CON(5), 6, 2, MFLAGS, -+ RK3308_CLKGATE_CON(1), 0, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_BUS, "pclk_bus", "clk_bus_src", CLK_IS_CRITICAL, -+ RK3308_CLKSEL_CON(6), 8, 5, DFLAGS, -+ RK3308_CLKGATE_CON(1), 3, GFLAGS), -+ GATE(PCLK_DDR, "pclk_ddr", "pclk_bus", CLK_IGNORE_UNUSED, -+ RK3308_CLKGATE_CON(4), 15, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_BUS, "hclk_bus", "clk_bus_src", CLK_IS_CRITICAL, -+ RK3308_CLKSEL_CON(6), 0, 5, DFLAGS, -+ RK3308_CLKGATE_CON(1), 2, GFLAGS), -+ COMPOSITE_NOMUX(ACLK_BUS, "aclk_bus", "clk_bus_src", CLK_IS_CRITICAL, -+ RK3308_CLKSEL_CON(5), 0, 5, DFLAGS, -+ RK3308_CLKGATE_CON(1), 1, GFLAGS), -+ -+ COMPOSITE_MUXTBL(0, "clk_uart0_src", mux_uart_src_p, 0, -+ RK3308_CLKSEL_CON(10), 13, 3, MFLAGS, uart_src_mux_idx, 0, 5, DFLAGS, -+ RK3308_CLKGATE_CON(1), 9, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart0_frac", "clk_uart0_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(12), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3308_CLKGATE_CON(1), 11, GFLAGS, -+ &rk3308_uart0_fracmux), -+ GATE(SCLK_UART0, "clk_uart0", "clk_uart0_mux", 0, -+ RK3308_CLKGATE_CON(1), 12, GFLAGS), -+ -+ COMPOSITE_MUXTBL(0, "clk_uart1_src", mux_uart_src_p, 0, -+ RK3308_CLKSEL_CON(13), 13, 3, MFLAGS, uart_src_mux_idx, 0, 5, DFLAGS, -+ RK3308_CLKGATE_CON(1), 13, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(15), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3308_CLKGATE_CON(1), 15, GFLAGS, -+ &rk3308_uart1_fracmux), -+ GATE(SCLK_UART1, "clk_uart1", "clk_uart1_mux", 0, -+ RK3308_CLKGATE_CON(2), 0, GFLAGS), ++ COMPOSITE_NODIV(ACLK_PERI_SRC, "aclk_peri_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ PX30_CLKSEL_CON(14), 15, 1, MFLAGS, ++ PX30_CLKGATE_CON(5), 7, GFLAGS), ++ COMPOSITE_NOMUX(ACLK_PERI_PRE, "aclk_peri_pre", "aclk_peri_src", CLK_IS_CRITICAL, ++ PX30_CLKSEL_CON(14), 0, 5, DFLAGS, ++ PX30_CLKGATE_CON(5), 8, GFLAGS), ++ DIV(HCLK_PERI_PRE, "hclk_peri_pre", "aclk_peri_src", CLK_IS_CRITICAL, ++ PX30_CLKSEL_CON(14), 8, 5, DFLAGS), + -+ COMPOSITE_MUXTBL(0, "clk_uart2_src", mux_uart_src_p, 0, -+ RK3308_CLKSEL_CON(16), 13, 3, MFLAGS, uart_src_mux_idx, 0, 5, DFLAGS, -+ RK3308_CLKGATE_CON(2), 1, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(18), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3308_CLKGATE_CON(2), 3, GFLAGS, -+ &rk3308_uart2_fracmux), -+ GATE(SCLK_UART2, "clk_uart2", "clk_uart2_mux", CLK_SET_RATE_PARENT, -+ RK3308_CLKGATE_CON(2), 4, GFLAGS), ++ /* PD_MMC_NAND */ ++ GATE(HCLK_MMC_NAND, "hclk_mmc_nand", "hclk_peri_pre", 0, ++ PX30_CLKGATE_CON(6), 0, GFLAGS), ++ COMPOSITE(SCLK_NANDC_DIV, "clk_nandc_div", mux_gpll_cpll_npll_p, 0, ++ PX30_CLKSEL_CON(15), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ PX30_CLKGATE_CON(5), 11, GFLAGS), ++ COMPOSITE(SCLK_NANDC_DIV50, "clk_nandc_div50", mux_gpll_cpll_npll_p, 0, ++ PX30_CLKSEL_CON(15), 6, 2, MFLAGS, 8, 5, DFLAGS, ++ PX30_CLKGATE_CON(5), 12, GFLAGS), ++ COMPOSITE_NODIV(SCLK_NANDC, "clk_nandc", mux_nandc_p, ++ CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ PX30_CLKSEL_CON(15), 15, 1, MFLAGS, ++ PX30_CLKGATE_CON(5), 13, GFLAGS), + -+ COMPOSITE_MUXTBL(0, "clk_uart3_src", mux_uart_src_p, 0, -+ RK3308_CLKSEL_CON(19), 13, 3, MFLAGS, uart_src_mux_idx, 0, 5, DFLAGS, -+ RK3308_CLKGATE_CON(2), 5, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(21), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3308_CLKGATE_CON(2), 7, GFLAGS, -+ &rk3308_uart3_fracmux), -+ GATE(SCLK_UART3, "clk_uart3", "clk_uart3_mux", 0, -+ RK3308_CLKGATE_CON(2), 8, GFLAGS), ++ COMPOSITE(SCLK_SDIO_DIV, "clk_sdio_div", mux_gpll_cpll_npll_xin24m_p, 0, ++ PX30_CLKSEL_CON(18), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ PX30_CLKGATE_CON(6), 1, GFLAGS), ++ COMPOSITE_DIV_OFFSET(SCLK_SDIO_DIV50, "clk_sdio_div50", ++ mux_gpll_cpll_npll_xin24m_p, 0, ++ PX30_CLKSEL_CON(18), 14, 2, MFLAGS, ++ PX30_CLKSEL_CON(19), 0, 8, DFLAGS, ++ PX30_CLKGATE_CON(6), 2, GFLAGS), ++ COMPOSITE_NODIV(SCLK_SDIO, "clk_sdio", mux_sdio_p, ++ CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ PX30_CLKSEL_CON(19), 15, 1, MFLAGS, ++ PX30_CLKGATE_CON(6), 3, GFLAGS), + -+ COMPOSITE_MUXTBL(0, "clk_uart4_src", mux_uart_src_p, 0, -+ RK3308_CLKSEL_CON(22), 13, 3, MFLAGS, uart_src_mux_idx, 0, 5, DFLAGS, -+ RK3308_CLKGATE_CON(2), 9, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(24), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3308_CLKGATE_CON(2), 11, GFLAGS, -+ &rk3308_uart4_fracmux), -+ GATE(SCLK_UART4, "clk_uart4", "clk_uart4_mux", 0, -+ RK3308_CLKGATE_CON(2), 12, GFLAGS), ++ COMPOSITE(SCLK_EMMC_DIV, "clk_emmc_div", mux_gpll_cpll_npll_xin24m_p, 0, ++ PX30_CLKSEL_CON(20), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ PX30_CLKGATE_CON(6), 4, GFLAGS), ++ COMPOSITE_DIV_OFFSET(SCLK_EMMC_DIV50, "clk_emmc_div50", mux_gpll_cpll_npll_xin24m_p, 0, ++ PX30_CLKSEL_CON(20), 14, 2, MFLAGS, ++ PX30_CLKSEL_CON(21), 0, 8, DFLAGS, ++ PX30_CLKGATE_CON(6), 5, GFLAGS), ++ COMPOSITE_NODIV(SCLK_EMMC, "clk_emmc", mux_emmc_p, ++ CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ PX30_CLKSEL_CON(21), 15, 1, MFLAGS, ++ PX30_CLKGATE_CON(6), 6, GFLAGS), + -+ COMPOSITE(SCLK_I2C0, "clk_i2c0", mux_dpll_vpll0_xin24m_p, 0, -+ RK3308_CLKSEL_CON(25), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(2), 13, GFLAGS), -+ COMPOSITE(SCLK_I2C1, "clk_i2c1", mux_dpll_vpll0_xin24m_p, 0, -+ RK3308_CLKSEL_CON(26), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(2), 14, GFLAGS), -+ COMPOSITE(SCLK_I2C2, "clk_i2c2", mux_dpll_vpll0_xin24m_p, 0, -+ RK3308_CLKSEL_CON(27), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(2), 15, GFLAGS), -+ COMPOSITE(SCLK_I2C3, "clk_i2c3", mux_dpll_vpll0_xin24m_p, 0, -+ RK3308_CLKSEL_CON(28), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(3), 0, GFLAGS), ++ COMPOSITE(SCLK_SFC, "clk_sfc", mux_gpll_cpll_p, 0, ++ PX30_CLKSEL_CON(22), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ PX30_CLKGATE_CON(6), 7, GFLAGS), + -+ COMPOSITE(SCLK_PWM0, "clk_pwm0", mux_dpll_vpll0_xin24m_p, 0, -+ RK3308_CLKSEL_CON(29), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(3), 1, GFLAGS), -+ COMPOSITE(SCLK_PWM1, "clk_pwm1", mux_dpll_vpll0_xin24m_p, 0, -+ RK3308_CLKSEL_CON(74), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(15), 0, GFLAGS), -+ COMPOSITE(SCLK_PWM2, "clk_pwm2", mux_dpll_vpll0_xin24m_p, 0, -+ RK3308_CLKSEL_CON(75), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(15), 1, GFLAGS), ++ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "clk_sdmmc", ++ PX30_SDMMC_CON0, 1), ++ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "clk_sdmmc", ++ PX30_SDMMC_CON1, 1), + -+ COMPOSITE(SCLK_SPI0, "clk_spi0", mux_dpll_vpll0_xin24m_p, 0, -+ RK3308_CLKSEL_CON(30), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(3), 2, GFLAGS), -+ COMPOSITE(SCLK_SPI1, "clk_spi1", mux_dpll_vpll0_xin24m_p, 0, -+ RK3308_CLKSEL_CON(31), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(3), 3, GFLAGS), -+ COMPOSITE(SCLK_SPI2, "clk_spi2", mux_dpll_vpll0_xin24m_p, 0, -+ RK3308_CLKSEL_CON(32), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(3), 4, GFLAGS), ++ MMC(SCLK_SDIO_DRV, "sdio_drv", "clk_sdio", ++ PX30_SDIO_CON0, 1), ++ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "clk_sdio", ++ PX30_SDIO_CON1, 1), + -+ GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0, -+ RK3308_CLKGATE_CON(3), 10, GFLAGS), -+ GATE(SCLK_TIMER1, "sclk_timer1", "xin24m", 0, -+ RK3308_CLKGATE_CON(3), 11, GFLAGS), -+ GATE(SCLK_TIMER2, "sclk_timer2", "xin24m", 0, -+ RK3308_CLKGATE_CON(3), 12, GFLAGS), -+ GATE(SCLK_TIMER3, "sclk_timer3", "xin24m", 0, -+ RK3308_CLKGATE_CON(3), 13, GFLAGS), -+ GATE(SCLK_TIMER4, "sclk_timer4", "xin24m", 0, -+ RK3308_CLKGATE_CON(3), 14, GFLAGS), -+ GATE(SCLK_TIMER5, "sclk_timer5", "xin24m", 0, -+ RK3308_CLKGATE_CON(3), 15, GFLAGS), ++ MMC(SCLK_EMMC_DRV, "emmc_drv", "clk_emmc", ++ PX30_EMMC_CON0, 1), ++ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "clk_emmc", ++ PX30_EMMC_CON1, 1), + -+ COMPOSITE_NOMUX(SCLK_TSADC, "clk_tsadc", "xin24m", 0, -+ RK3308_CLKSEL_CON(33), 0, 11, DFLAGS, -+ RK3308_CLKGATE_CON(3), 5, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_SARADC, "clk_saradc", "xin24m", 0, -+ RK3308_CLKSEL_CON(34), 0, 11, DFLAGS, -+ RK3308_CLKGATE_CON(3), 6, GFLAGS), ++ /* PD_SDCARD */ ++ GATE(0, "hclk_sdmmc_pre", "hclk_peri_pre", 0, ++ PX30_CLKGATE_CON(6), 12, GFLAGS), ++ COMPOSITE(SCLK_SDMMC_DIV, "clk_sdmmc_div", mux_gpll_cpll_npll_xin24m_p, 0, ++ PX30_CLKSEL_CON(16), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ PX30_CLKGATE_CON(6), 13, GFLAGS), ++ COMPOSITE_DIV_OFFSET(SCLK_SDMMC_DIV50, "clk_sdmmc_div50", mux_gpll_cpll_npll_xin24m_p, 0, ++ PX30_CLKSEL_CON(16), 14, 2, MFLAGS, ++ PX30_CLKSEL_CON(17), 0, 8, DFLAGS, ++ PX30_CLKGATE_CON(6), 14, GFLAGS), ++ COMPOSITE_NODIV(SCLK_SDMMC, "clk_sdmmc", mux_sdmmc_p, ++ CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ PX30_CLKSEL_CON(17), 15, 1, MFLAGS, ++ PX30_CLKGATE_CON(6), 15, GFLAGS), + -+ COMPOSITE_NOMUX(SCLK_OTP, "clk_otp", "xin24m", 0, -+ RK3308_CLKSEL_CON(35), 0, 4, DFLAGS, -+ RK3308_CLKGATE_CON(3), 7, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_OTP_USR, "clk_otp_usr", "clk_otp", 0, -+ RK3308_CLKSEL_CON(35), 4, 2, DFLAGS, -+ RK3308_CLKGATE_CON(3), 8, GFLAGS), ++ /* PD_USB */ ++ GATE(HCLK_USB, "hclk_usb", "hclk_peri_pre", CLK_IS_CRITICAL, ++ PX30_CLKGATE_CON(7), 2, GFLAGS), ++ GATE(SCLK_OTG_ADP, "clk_otg_adp", "clk_rtc32k_pmu", 0, ++ PX30_CLKGATE_CON(7), 3, GFLAGS), + -+ GATE(SCLK_CPU_BOOST, "clk_cpu_boost", "xin24m", CLK_IGNORE_UNUSED, -+ RK3308_CLKGATE_CON(3), 9, GFLAGS), ++ /* PD_GMAC */ ++ COMPOSITE(SCLK_GMAC_SRC, "clk_gmac_src", mux_gpll_cpll_npll_p, 0, ++ PX30_CLKSEL_CON(22), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ PX30_CLKGATE_CON(7), 11, GFLAGS), ++ MUX(SCLK_GMAC, "clk_gmac", mux_gmac_p, CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(23), 6, 1, MFLAGS), ++ GATE(SCLK_MAC_REF, "clk_mac_ref", "clk_gmac", 0, ++ PX30_CLKGATE_CON(7), 15, GFLAGS), ++ GATE(SCLK_GMAC_RX_TX, "clk_gmac_rx_tx", "clk_gmac", 0, ++ PX30_CLKGATE_CON(7), 13, GFLAGS), ++ FACTOR(0, "clk_gmac_rx_tx_div2", "clk_gmac_rx_tx", 0, 1, 2), ++ FACTOR(0, "clk_gmac_rx_tx_div20", "clk_gmac_rx_tx", 0, 1, 20), ++ MUX(SCLK_GMAC_RMII, "clk_gmac_rmii_sel", mux_gmac_rmii_sel_p, CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(23), 7, 1, MFLAGS), + -+ COMPOSITE(SCLK_CRYPTO, "clk_crypto", mux_dpll_vpll0_vpll1_p, 0, -+ RK3308_CLKSEL_CON(7), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3308_CLKGATE_CON(1), 4, GFLAGS), -+ COMPOSITE(SCLK_CRYPTO_APK, "clk_crypto_apk", mux_dpll_vpll0_vpll1_p, 0, -+ RK3308_CLKSEL_CON(7), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3308_CLKGATE_CON(1), 5, GFLAGS), ++ GATE(0, "aclk_gmac_pre", "aclk_peri_pre", 0, ++ PX30_CLKGATE_CON(7), 10, GFLAGS), ++ COMPOSITE_NOMUX(0, "pclk_gmac_pre", "aclk_gmac_pre", 0, ++ PX30_CLKSEL_CON(23), 0, 4, DFLAGS, ++ PX30_CLKGATE_CON(7), 12, GFLAGS), + -+ COMPOSITE(0, "dclk_vop_src", mux_dpll_vpll0_vpll1_p, 0, -+ RK3308_CLKSEL_CON(8), 10, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3308_CLKGATE_CON(1), 6, GFLAGS), -+ COMPOSITE_FRACMUX(0, "dclk_vop_frac", "dclk_vop_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(9), 0, -+ RK3308_CLKGATE_CON(1), 7, GFLAGS, -+ &rk3308_dclk_vop_fracmux), -+ GATE(DCLK_VOP, "dclk_vop", "dclk_vop_mux", 0, -+ RK3308_CLKGATE_CON(1), 8, GFLAGS), ++ COMPOSITE(SCLK_MAC_OUT, "clk_mac_out", mux_gpll_cpll_npll_p, 0, ++ PX30_CLKSEL_CON(12), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ PX30_CLKGATE_CON(8), 5, GFLAGS), + + /* -+ * Clock-Architecture Diagram 4 ++ * Clock-Architecture Diagram 8 + */ + -+ COMPOSITE_NODIV(ACLK_PERI_SRC, "clk_peri_src", mux_dpll_vpll0_vpll1_p, CLK_IS_CRITICAL, -+ RK3308_CLKSEL_CON(36), 6, 2, MFLAGS, -+ RK3308_CLKGATE_CON(8), 0, GFLAGS), -+ COMPOSITE_NOMUX(ACLK_PERI, "aclk_peri", "clk_peri_src", CLK_IS_CRITICAL, -+ RK3308_CLKSEL_CON(36), 0, 5, DFLAGS, -+ RK3308_CLKGATE_CON(8), 1, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "clk_peri_src", CLK_IS_CRITICAL, -+ RK3308_CLKSEL_CON(37), 0, 5, DFLAGS, -+ RK3308_CLKGATE_CON(8), 2, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "clk_peri_src", CLK_IS_CRITICAL, -+ RK3308_CLKSEL_CON(37), 8, 5, DFLAGS, -+ RK3308_CLKGATE_CON(8), 3, GFLAGS), ++ /* PD_BUS */ ++ COMPOSITE_NODIV(ACLK_BUS_SRC, "aclk_bus_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ PX30_CLKSEL_CON(23), 15, 1, MFLAGS, ++ PX30_CLKGATE_CON(8), 6, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_BUS_PRE, "hclk_bus_pre", "aclk_bus_src", CLK_IS_CRITICAL, ++ PX30_CLKSEL_CON(24), 0, 5, DFLAGS, ++ PX30_CLKGATE_CON(8), 8, GFLAGS), ++ COMPOSITE_NOMUX(ACLK_BUS_PRE, "aclk_bus_pre", "aclk_bus_src", CLK_IS_CRITICAL, ++ PX30_CLKSEL_CON(23), 8, 5, DFLAGS, ++ PX30_CLKGATE_CON(8), 7, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_BUS_PRE, "pclk_bus_pre", "aclk_bus_pre", CLK_IS_CRITICAL, ++ PX30_CLKSEL_CON(24), 8, 2, DFLAGS, ++ PX30_CLKGATE_CON(8), 9, GFLAGS), ++ GATE(0, "pclk_top_pre", "pclk_bus_pre", CLK_IS_CRITICAL, ++ PX30_CLKGATE_CON(8), 10, GFLAGS), + -+ COMPOSITE(SCLK_NANDC_DIV, "clk_nandc_div", mux_dpll_vpll0_vpll1_p, CLK_IGNORE_UNUSED, -+ RK3308_CLKSEL_CON(38), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3308_CLKGATE_CON(8), 4, GFLAGS), -+ COMPOSITE(SCLK_NANDC_DIV50, "clk_nandc_div50", mux_dpll_vpll0_vpll1_p, CLK_IGNORE_UNUSED, -+ RK3308_CLKSEL_CON(38), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3308_CLKGATE_CON(8), 4, GFLAGS), -+ COMPOSITE_NODIV(SCLK_NANDC, "clk_nandc", mux_nandc_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3308_CLKSEL_CON(38), 15, 1, MFLAGS, -+ RK3308_CLKGATE_CON(8), 5, GFLAGS), ++ COMPOSITE(0, "clk_pdm_src", mux_gpll_xin24m_npll_p, 0, ++ PX30_CLKSEL_CON(26), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ PX30_CLKGATE_CON(9), 9, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_pdm_frac", "clk_pdm_src", CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(27), 0, ++ PX30_CLKGATE_CON(9), 10, GFLAGS, ++ &px30_pdm_fracmux), ++ GATE(SCLK_PDM, "clk_pdm", "clk_pdm_mux", CLK_SET_RATE_PARENT, ++ PX30_CLKGATE_CON(9), 11, GFLAGS), + -+ COMPOSITE(SCLK_SDMMC_DIV, "clk_sdmmc_div", mux_dpll_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, -+ RK3308_CLKSEL_CON(39), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3308_CLKGATE_CON(8), 6, GFLAGS), -+ COMPOSITE(SCLK_SDMMC_DIV50, "clk_sdmmc_div50", mux_dpll_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, -+ RK3308_CLKSEL_CON(39), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3308_CLKGATE_CON(8), 6, GFLAGS), -+ COMPOSITE_NODIV(SCLK_SDMMC, "clk_sdmmc", mux_sdmmc_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3308_CLKSEL_CON(39), 15, 1, MFLAGS, -+ RK3308_CLKGATE_CON(8), 7, GFLAGS), -+ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "clk_sdmmc", RK3308_SDMMC_CON0, 1), -+ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "clk_sdmmc", RK3308_SDMMC_CON1, 1), ++ COMPOSITE(0, "clk_i2s0_tx_src", mux_gpll_npll_p, 0, ++ PX30_CLKSEL_CON(28), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ PX30_CLKGATE_CON(9), 12, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s0_tx_frac", "clk_i2s0_tx_src", CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(29), 0, ++ PX30_CLKGATE_CON(9), 13, GFLAGS, ++ &px30_i2s0_tx_fracmux), ++ COMPOSITE_NODIV(SCLK_I2S0_TX, "clk_i2s0_tx", mux_i2s0_tx_rx_p, CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(28), 12, 1, MFLAGS, ++ PX30_CLKGATE_CON(9), 14, GFLAGS), ++ COMPOSITE_NODIV(0, "clk_i2s0_tx_out_pre", mux_i2s0_tx_out_p, CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(28), 14, 2, MFLAGS, ++ PX30_CLKGATE_CON(9), 15, GFLAGS), ++ GATE(SCLK_I2S0_TX_OUT, "clk_i2s0_tx_out", "clk_i2s0_tx_out_pre", CLK_SET_RATE_PARENT, ++ PX30_CLKGATE_CON(10), 8, CLK_GATE_HIWORD_MASK), + -+ COMPOSITE(SCLK_SDIO_DIV, "clk_sdio_div", mux_dpll_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, -+ RK3308_CLKSEL_CON(40), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3308_CLKGATE_CON(8), 8, GFLAGS), -+ COMPOSITE(SCLK_SDIO_DIV50, "clk_sdio_div50", mux_dpll_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, -+ RK3308_CLKSEL_CON(40), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3308_CLKGATE_CON(8), 8, GFLAGS), -+ COMPOSITE_NODIV(SCLK_SDIO, "clk_sdio", mux_sdio_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3308_CLKSEL_CON(40), 15, 1, MFLAGS, -+ RK3308_CLKGATE_CON(8), 9, GFLAGS), -+ MMC(SCLK_SDIO_DRV, "sdio_drv", "clk_sdio", RK3308_SDIO_CON0, 1), -+ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "clk_sdio", RK3308_SDIO_CON1, 1), ++ COMPOSITE(0, "clk_i2s0_rx_src", mux_gpll_npll_p, 0, ++ PX30_CLKSEL_CON(58), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ PX30_CLKGATE_CON(17), 0, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s0_rx_frac", "clk_i2s0_rx_src", CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(59), 0, ++ PX30_CLKGATE_CON(17), 1, GFLAGS, ++ &px30_i2s0_rx_fracmux), ++ COMPOSITE_NODIV(SCLK_I2S0_RX, "clk_i2s0_rx", mux_i2s0_rx_tx_p, CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(58), 12, 1, MFLAGS, ++ PX30_CLKGATE_CON(17), 2, GFLAGS), ++ COMPOSITE_NODIV(0, "clk_i2s0_rx_out_pre", mux_i2s0_rx_out_p, 0, ++ PX30_CLKSEL_CON(58), 14, 2, MFLAGS, ++ PX30_CLKGATE_CON(17), 3, GFLAGS), ++ GATE(SCLK_I2S0_RX_OUT, "clk_i2s0_rx_out", "clk_i2s0_rx_out_pre", CLK_SET_RATE_PARENT, ++ PX30_CLKGATE_CON(10), 11, CLK_GATE_HIWORD_MASK), + -+ COMPOSITE(SCLK_EMMC_DIV, "clk_emmc_div", mux_dpll_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, -+ RK3308_CLKSEL_CON(41), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3308_CLKGATE_CON(8), 10, GFLAGS), -+ COMPOSITE(SCLK_EMMC_DIV50, "clk_emmc_div50", mux_dpll_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, -+ RK3308_CLKSEL_CON(41), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3308_CLKGATE_CON(8), 10, GFLAGS), -+ COMPOSITE_NODIV(SCLK_EMMC, "clk_emmc", mux_emmc_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3308_CLKSEL_CON(41), 15, 1, MFLAGS, -+ RK3308_CLKGATE_CON(8), 11, GFLAGS), -+ MMC(SCLK_EMMC_DRV, "emmc_drv", "clk_emmc", RK3308_EMMC_CON0, 1), -+ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "clk_emmc", RK3308_EMMC_CON1, 1), ++ COMPOSITE(0, "clk_i2s1_src", mux_gpll_npll_p, 0, ++ PX30_CLKSEL_CON(30), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ PX30_CLKGATE_CON(10), 0, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s1_frac", "clk_i2s1_src", CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(31), 0, ++ PX30_CLKGATE_CON(10), 1, GFLAGS, ++ &px30_i2s1_fracmux), ++ GATE(SCLK_I2S1, "clk_i2s1", "clk_i2s1_mux", CLK_SET_RATE_PARENT, ++ PX30_CLKGATE_CON(10), 2, GFLAGS), ++ COMPOSITE_NODIV(0, "clk_i2s1_out_pre", mux_i2s1_out_p, CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(30), 15, 1, MFLAGS, ++ PX30_CLKGATE_CON(10), 3, GFLAGS), ++ GATE(SCLK_I2S1_OUT, "clk_i2s1_out", "clk_i2s1_out_pre", CLK_SET_RATE_PARENT, ++ PX30_CLKGATE_CON(10), 9, CLK_GATE_HIWORD_MASK), + -+ COMPOSITE(SCLK_SFC, "clk_sfc", mux_dpll_vpll0_vpll1_p, 0, -+ RK3308_CLKSEL_CON(42), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(8), 12, GFLAGS), ++ COMPOSITE(0, "clk_i2s2_src", mux_gpll_npll_p, 0, ++ PX30_CLKSEL_CON(32), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ PX30_CLKGATE_CON(10), 4, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s2_frac", "clk_i2s2_src", CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(33), 0, ++ PX30_CLKGATE_CON(10), 5, GFLAGS, ++ &px30_i2s2_fracmux), ++ GATE(SCLK_I2S2, "clk_i2s2", "clk_i2s2_mux", CLK_SET_RATE_PARENT, ++ PX30_CLKGATE_CON(10), 6, GFLAGS), ++ COMPOSITE_NODIV(0, "clk_i2s2_out_pre", mux_i2s2_out_p, 0, ++ PX30_CLKSEL_CON(32), 15, 1, MFLAGS, ++ PX30_CLKGATE_CON(10), 7, GFLAGS), ++ GATE(SCLK_I2S2_OUT, "clk_i2s2_out", "clk_i2s2_out_pre", CLK_SET_RATE_PARENT, ++ PX30_CLKGATE_CON(10), 10, CLK_GATE_HIWORD_MASK), + -+ GATE(SCLK_OTG_ADP, "clk_otg_adp", "clk_rtc32k", 0, -+ RK3308_CLKGATE_CON(8), 13, GFLAGS), ++ COMPOSITE(SCLK_UART1_SRC, "clk_uart1_src", mux_uart_src_p, CLK_SET_RATE_NO_REPARENT, ++ PX30_CLKSEL_CON(34), 14, 2, MFLAGS, 0, 5, DFLAGS, ++ PX30_CLKGATE_CON(10), 12, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart1_np5", "clk_uart1_src", 0, ++ PX30_CLKSEL_CON(35), 0, 5, DFLAGS, ++ PX30_CLKGATE_CON(10), 13, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(36), 0, ++ PX30_CLKGATE_CON(10), 14, GFLAGS, ++ &px30_uart1_fracmux), ++ GATE(SCLK_UART1, "clk_uart1", "clk_uart1_mux", CLK_SET_RATE_PARENT, ++ PX30_CLKGATE_CON(10), 15, GFLAGS), + -+ COMPOSITE(SCLK_MAC_SRC, "clk_mac_src", mux_dpll_vpll0_vpll1_p, 0, -+ RK3308_CLKSEL_CON(43), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3308_CLKGATE_CON(8), 14, GFLAGS), -+ MUX(SCLK_MAC, "clk_mac", mux_mac_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(43), 14, 1, MFLAGS), -+ GATE(SCLK_MAC_REF, "clk_mac_ref", "clk_mac", 0, -+ RK3308_CLKGATE_CON(9), 1, GFLAGS), -+ GATE(SCLK_MAC_RX_TX, "clk_mac_rx_tx", "clk_mac", 0, -+ RK3308_CLKGATE_CON(9), 0, GFLAGS), -+ FACTOR(0, "clk_mac_rx_tx_div2", "clk_mac_rx_tx", 0, 1, 2), -+ FACTOR(0, "clk_mac_rx_tx_div20", "clk_mac_rx_tx", 0, 1, 20), -+ MUX(SCLK_MAC_RMII, "clk_mac_rmii_sel", mux_mac_rmii_sel_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(43), 15, 1, MFLAGS), ++ COMPOSITE(SCLK_UART2_SRC, "clk_uart2_src", mux_uart_src_p, 0, ++ PX30_CLKSEL_CON(37), 14, 2, MFLAGS, 0, 5, DFLAGS, ++ PX30_CLKGATE_CON(11), 0, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart2_np5", "clk_uart2_src", 0, ++ PX30_CLKSEL_CON(38), 0, 5, DFLAGS, ++ PX30_CLKGATE_CON(11), 1, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(39), 0, ++ PX30_CLKGATE_CON(11), 2, GFLAGS, ++ &px30_uart2_fracmux), ++ GATE(SCLK_UART2, "clk_uart2", "clk_uart2_mux", CLK_SET_RATE_PARENT | CLK_IS_CRITICAL, ++ PX30_CLKGATE_CON(11), 3, GFLAGS), + -+ COMPOSITE(SCLK_OWIRE, "clk_owire", mux_dpll_vpll0_xin24m_p, 0, -+ RK3308_CLKSEL_CON(44), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3308_CLKGATE_CON(8), 15, GFLAGS), ++ COMPOSITE(0, "clk_uart3_src", mux_uart_src_p, 0, ++ PX30_CLKSEL_CON(40), 14, 2, MFLAGS, 0, 5, DFLAGS, ++ PX30_CLKGATE_CON(11), 4, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart3_np5", "clk_uart3_src", 0, ++ PX30_CLKSEL_CON(41), 0, 5, DFLAGS, ++ PX30_CLKGATE_CON(11), 5, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(42), 0, ++ PX30_CLKGATE_CON(11), 6, GFLAGS, ++ &px30_uart3_fracmux), ++ GATE(SCLK_UART3, "clk_uart3", "clk_uart3_mux", CLK_SET_RATE_PARENT, ++ PX30_CLKGATE_CON(11), 7, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 5 -+ */ ++ COMPOSITE(0, "clk_uart4_src", mux_uart_src_p, 0, ++ PX30_CLKSEL_CON(43), 14, 2, MFLAGS, 0, 5, DFLAGS, ++ PX30_CLKGATE_CON(11), 8, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart4_np5", "clk_uart4_src", 0, ++ PX30_CLKSEL_CON(44), 0, 5, DFLAGS, ++ PX30_CLKGATE_CON(11), 9, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(45), 0, ++ PX30_CLKGATE_CON(11), 10, GFLAGS, ++ &px30_uart4_fracmux), ++ GATE(SCLK_UART4, "clk_uart4", "clk_uart4_mux", CLK_SET_RATE_PARENT, ++ PX30_CLKGATE_CON(11), 11, GFLAGS), + -+ GATE(0, "clk_ddr_mon_timer", "xin24m", CLK_IGNORE_UNUSED, -+ RK3308_CLKGATE_CON(0), 12, GFLAGS), ++ COMPOSITE(0, "clk_uart5_src", mux_uart_src_p, 0, ++ PX30_CLKSEL_CON(46), 14, 2, MFLAGS, 0, 5, DFLAGS, ++ PX30_CLKGATE_CON(11), 12, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart5_np5", "clk_uart5_src", 0, ++ PX30_CLKSEL_CON(47), 0, 5, DFLAGS, ++ PX30_CLKGATE_CON(11), 13, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, ++ PX30_CLKSEL_CON(48), 0, ++ PX30_CLKGATE_CON(11), 14, GFLAGS, ++ &px30_uart5_fracmux), ++ GATE(SCLK_UART5, "clk_uart5", "clk_uart5_mux", CLK_SET_RATE_PARENT, ++ PX30_CLKGATE_CON(11), 15, GFLAGS), + -+ GATE(0, "clk_ddr_mon", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, -+ RK3308_CLKGATE_CON(4), 10, GFLAGS), -+ GATE(0, "clk_ddr_upctrl", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, -+ RK3308_CLKGATE_CON(4), 11, GFLAGS), -+ GATE(0, "clk_ddr_msch", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, -+ RK3308_CLKGATE_CON(4), 12, GFLAGS), -+ GATE(0, "clk_ddr_msch_peribus", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, -+ RK3308_CLKGATE_CON(4), 13, GFLAGS), ++ COMPOSITE(SCLK_I2C0, "clk_i2c0", mux_gpll_xin24m_p, 0, ++ PX30_CLKSEL_CON(49), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ PX30_CLKGATE_CON(12), 0, GFLAGS), ++ COMPOSITE(SCLK_I2C1, "clk_i2c1", mux_gpll_xin24m_p, 0, ++ PX30_CLKSEL_CON(49), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ PX30_CLKGATE_CON(12), 1, GFLAGS), ++ COMPOSITE(SCLK_I2C2, "clk_i2c2", mux_gpll_xin24m_p, 0, ++ PX30_CLKSEL_CON(50), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ PX30_CLKGATE_CON(12), 2, GFLAGS), ++ COMPOSITE(SCLK_I2C3, "clk_i2c3", mux_gpll_xin24m_p, 0, ++ PX30_CLKSEL_CON(50), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ PX30_CLKGATE_CON(12), 3, GFLAGS), ++ COMPOSITE(SCLK_PWM0, "clk_pwm0", mux_gpll_xin24m_p, 0, ++ PX30_CLKSEL_CON(52), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ PX30_CLKGATE_CON(12), 5, GFLAGS), ++ COMPOSITE(SCLK_PWM1, "clk_pwm1", mux_gpll_xin24m_p, 0, ++ PX30_CLKSEL_CON(52), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ PX30_CLKGATE_CON(12), 6, GFLAGS), ++ COMPOSITE(SCLK_SPI0, "clk_spi0", mux_gpll_xin24m_p, 0, ++ PX30_CLKSEL_CON(53), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ PX30_CLKGATE_CON(12), 7, GFLAGS), ++ COMPOSITE(SCLK_SPI1, "clk_spi1", mux_gpll_xin24m_p, 0, ++ PX30_CLKSEL_CON(53), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ PX30_CLKGATE_CON(12), 8, GFLAGS), + -+ COMPOSITE(SCLK_DDRCLK, "clk_ddrphy4x_src", mux_dpll_vpll0_vpll1_p, CLK_IS_CRITICAL, -+ RK3308_CLKSEL_CON(1), 6, 2, MFLAGS, 0, 3, DFLAGS, -+ RK3308_CLKGATE_CON(0), 10, GFLAGS), -+ GATE(0, "clk_ddrphy4x", "clk_ddrphy4x_src", CLK_IS_CRITICAL, -+ RK3308_CLKGATE_CON(0), 11, GFLAGS), -+ FACTOR_GATE(0, "clk_ddr_stdby_div4", "clk_ddrphy4x", CLK_IGNORE_UNUSED, 1, 4, -+ RK3308_CLKGATE_CON(0), 13, GFLAGS), -+ COMPOSITE_NODIV(0, "clk_ddrstdby", mux_ddrstdby_p, CLK_IGNORE_UNUSED, -+ RK3308_CLKSEL_CON(1), 8, 1, MFLAGS, -+ RK3308_CLKGATE_CON(4), 14, GFLAGS), ++ GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0, ++ PX30_CLKGATE_CON(13), 0, GFLAGS), ++ GATE(SCLK_TIMER1, "sclk_timer1", "xin24m", 0, ++ PX30_CLKGATE_CON(13), 1, GFLAGS), ++ GATE(SCLK_TIMER2, "sclk_timer2", "xin24m", 0, ++ PX30_CLKGATE_CON(13), 2, GFLAGS), ++ GATE(SCLK_TIMER3, "sclk_timer3", "xin24m", 0, ++ PX30_CLKGATE_CON(13), 3, GFLAGS), ++ GATE(SCLK_TIMER4, "sclk_timer4", "xin24m", 0, ++ PX30_CLKGATE_CON(13), 4, GFLAGS), ++ GATE(SCLK_TIMER5, "sclk_timer5", "xin24m", 0, ++ PX30_CLKGATE_CON(13), 5, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 6 -+ */ ++ COMPOSITE_NOMUX(SCLK_TSADC, "clk_tsadc", "xin24m", 0, ++ PX30_CLKSEL_CON(54), 0, 11, DFLAGS, ++ PX30_CLKGATE_CON(12), 9, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_SARADC, "clk_saradc", "xin24m", 0, ++ PX30_CLKSEL_CON(55), 0, 11, DFLAGS, ++ PX30_CLKGATE_CON(12), 10, GFLAGS), + -+ GATE(PCLK_PMU, "pclk_pmu", "pclk_bus", CLK_IGNORE_UNUSED, -+ RK3308_CLKGATE_CON(4), 5, GFLAGS), -+ GATE(SCLK_PMU, "clk_pmu", "pclk_bus", CLK_IGNORE_UNUSED, -+ RK3308_CLKGATE_CON(4), 6, GFLAGS), ++ GATE(0, "clk_cpu_boost", "xin24m", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(12), 12, GFLAGS), + -+ COMPOSITE_FRACMUX(0, "clk_rtc32k_frac", "xin24m", CLK_IGNORE_UNUSED, -+ RK3308_CLKSEL_CON(3), 0, -+ RK3308_CLKGATE_CON(4), 3, GFLAGS, -+ &rk3308_rtc32k_fracmux), -+ MUX(0, "clk_rtc32k_div_src", mux_vpll0_vpll1_p, 0, -+ RK3308_CLKSEL_CON(2), 10, 1, MFLAGS), -+ COMPOSITE_NOMUX(0, "clk_rtc32k_div", "clk_rtc32k_div_src", CLK_IGNORE_UNUSED | CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(4), 0, 16, DFLAGS, -+ RK3308_CLKGATE_CON(4), 2, GFLAGS), ++ /* PD_CRYPTO */ ++ GATE(0, "aclk_crypto_pre", "aclk_bus_pre", 0, ++ PX30_CLKGATE_CON(8), 12, GFLAGS), ++ GATE(0, "hclk_crypto_pre", "hclk_bus_pre", 0, ++ PX30_CLKGATE_CON(8), 13, GFLAGS), ++ COMPOSITE(SCLK_CRYPTO, "clk_crypto", mux_gpll_cpll_npll_p, 0, ++ PX30_CLKSEL_CON(25), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ PX30_CLKGATE_CON(8), 14, GFLAGS), ++ COMPOSITE(SCLK_CRYPTO_APK, "clk_crypto_apk", mux_gpll_cpll_npll_p, 0, ++ PX30_CLKSEL_CON(25), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ PX30_CLKGATE_CON(8), 15, GFLAGS), + -+ COMPOSITE(0, "clk_usbphy_ref_src", mux_dpll_vpll0_p, 0, -+ RK3308_CLKSEL_CON(72), 6, 1, MFLAGS, 0, 6, DFLAGS, -+ RK3308_CLKGATE_CON(4), 7, GFLAGS), -+ COMPOSITE_NODIV(SCLK_USBPHY_REF, "clk_usbphy_ref", mux_usbphy_ref_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(72), 7, 1, MFLAGS, -+ RK3308_CLKGATE_CON(4), 8, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 9 ++ */ + -+ GATE(0, "clk_wifi_dpll", "dpll", 0, -+ RK3308_CLKGATE_CON(15), 2, GFLAGS), -+ GATE(0, "clk_wifi_vpll0", "vpll0", 0, -+ RK3308_CLKGATE_CON(15), 3, GFLAGS), -+ GATE(0, "clk_wifi_osc", "xin24m", 0, -+ RK3308_CLKGATE_CON(15), 4, GFLAGS), -+ COMPOSITE(0, "clk_wifi_src", mux_wifi_src_p, 0, -+ RK3308_CLKSEL_CON(44), 6, 1, MFLAGS, 0, 6, DFLAGS, -+ RK3308_CLKGATE_CON(4), 0, GFLAGS), -+ COMPOSITE_NODIV(SCLK_WIFI, "clk_wifi", mux_wifi_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(44), 7, 1, MFLAGS, -+ RK3308_CLKGATE_CON(4), 1, GFLAGS), ++ /* PD_BUS_TOP */ ++ GATE(0, "pclk_top_niu", "pclk_top_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(16), 0, GFLAGS), ++ GATE(0, "pclk_top_cru", "pclk_top_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(16), 1, GFLAGS), ++ GATE(PCLK_OTP_PHY, "pclk_otp_phy", "pclk_top_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(16), 2, GFLAGS), ++ GATE(0, "pclk_ddrphy", "pclk_top_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(16), 3, GFLAGS), ++ GATE(PCLK_MIPIDSIPHY, "pclk_mipidsiphy", "pclk_top_pre", 0, PX30_CLKGATE_CON(16), 4, GFLAGS), ++ GATE(PCLK_MIPICSIPHY, "pclk_mipicsiphy", "pclk_top_pre", 0, PX30_CLKGATE_CON(16), 5, GFLAGS), ++ GATE(PCLK_USB_GRF, "pclk_usb_grf", "pclk_top_pre", CLK_IS_CRITICAL, PX30_CLKGATE_CON(16), 6, GFLAGS), ++ GATE(0, "pclk_cpu_hoost", "pclk_top_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(16), 7, GFLAGS), + -+ GATE(SCLK_PVTM_PMU, "clk_pvtm_pmu", "xin24m", 0, -+ RK3308_CLKGATE_CON(4), 4, GFLAGS), ++ /* PD_VI */ ++ GATE(0, "aclk_vi_niu", "aclk_vi_pre", CLK_IS_CRITICAL, PX30_CLKGATE_CON(4), 15, GFLAGS), ++ GATE(ACLK_CIF, "aclk_cif", "aclk_vi_pre", 0, PX30_CLKGATE_CON(5), 1, GFLAGS), ++ GATE(ACLK_ISP, "aclk_isp", "aclk_vi_pre", 0, PX30_CLKGATE_CON(5), 3, GFLAGS), ++ GATE(0, "hclk_vi_niu", "hclk_vi_pre", CLK_IS_CRITICAL, PX30_CLKGATE_CON(5), 0, GFLAGS), ++ GATE(HCLK_CIF, "hclk_cif", "hclk_vi_pre", 0, PX30_CLKGATE_CON(5), 2, GFLAGS), ++ GATE(HCLK_ISP, "hclk_isp", "hclk_vi_pre", 0, PX30_CLKGATE_CON(5), 4, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 7 -+ */ ++ /* PD_VO */ ++ GATE(0, "aclk_vo_niu", "aclk_vo_pre", CLK_IS_CRITICAL, PX30_CLKGATE_CON(3), 0, GFLAGS), ++ GATE(ACLK_VOPB, "aclk_vopb", "aclk_vo_pre", 0, PX30_CLKGATE_CON(3), 3, GFLAGS), ++ GATE(ACLK_RGA, "aclk_rga", "aclk_vo_pre", 0, PX30_CLKGATE_CON(3), 7, GFLAGS), ++ GATE(ACLK_VOPL, "aclk_vopl", "aclk_vo_pre", 0, PX30_CLKGATE_CON(3), 5, GFLAGS), + -+ COMPOSITE_NODIV(0, "clk_audio_src", mux_vpll0_vpll1_xin24m_p, CLK_IS_CRITICAL, -+ RK3308_CLKSEL_CON(45), 6, 2, MFLAGS, -+ RK3308_CLKGATE_CON(10), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_AUDIO, "hclk_audio", "clk_audio_src", CLK_IS_CRITICAL, -+ RK3308_CLKSEL_CON(45), 0, 5, DFLAGS, -+ RK3308_CLKGATE_CON(10), 1, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_AUDIO, "pclk_audio", "clk_audio_src", CLK_IS_CRITICAL, -+ RK3308_CLKSEL_CON(45), 8, 5, DFLAGS, -+ RK3308_CLKGATE_CON(10), 2, GFLAGS), ++ GATE(0, "hclk_vo_niu", "hclk_vo_pre", CLK_IS_CRITICAL, PX30_CLKGATE_CON(3), 1, GFLAGS), ++ GATE(HCLK_VOPB, "hclk_vopb", "hclk_vo_pre", 0, PX30_CLKGATE_CON(3), 4, GFLAGS), ++ GATE(HCLK_RGA, "hclk_rga", "hclk_vo_pre", 0, PX30_CLKGATE_CON(3), 8, GFLAGS), ++ GATE(HCLK_VOPL, "hclk_vopl", "hclk_vo_pre", 0, PX30_CLKGATE_CON(3), 6, GFLAGS), + -+ COMPOSITE(0, "clk_pdm_src", mux_vpll0_vpll1_xin24m_p, 0, -+ RK3308_CLKSEL_CON(46), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(10), 3, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_pdm_frac", "clk_pdm_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(47), 0, -+ RK3308_CLKGATE_CON(10), 4, GFLAGS, -+ &rk3308_pdm_fracmux), -+ GATE(SCLK_PDM, "clk_pdm", "clk_pdm_mux", 0, -+ RK3308_CLKGATE_CON(10), 5, GFLAGS), ++ GATE(0, "pclk_vo_niu", "pclk_vo_pre", CLK_IS_CRITICAL, PX30_CLKGATE_CON(3), 2, GFLAGS), ++ GATE(PCLK_MIPI_DSI, "pclk_mipi_dsi", "pclk_vo_pre", 0, PX30_CLKGATE_CON(3), 9, GFLAGS), + -+ COMPOSITE(SCLK_I2S0_8CH_TX_SRC, "clk_i2s0_8ch_tx_src", mux_vpll0_vpll1_xin24m_p, 0, -+ RK3308_CLKSEL_CON(52), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(10), 12, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s0_8ch_tx_frac", "clk_i2s0_8ch_tx_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(53), 0, -+ RK3308_CLKGATE_CON(10), 13, GFLAGS, -+ &rk3308_i2s0_8ch_tx_fracmux), -+ COMPOSITE_NODIV(SCLK_I2S0_8CH_TX, "clk_i2s0_8ch_tx", mux_i2s0_8ch_tx_rx_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(52), 12, 1, MFLAGS, -+ RK3308_CLKGATE_CON(10), 14, GFLAGS), -+ COMPOSITE_NODIV(SCLK_I2S0_8CH_TX_OUT, "clk_i2s0_8ch_tx_out", mux_i2s0_8ch_tx_out_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(52), 15, 1, MFLAGS, -+ RK3308_CLKGATE_CON(10), 15, GFLAGS), ++ /* PD_BUS */ ++ GATE(0, "aclk_bus_niu", "aclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(13), 8, GFLAGS), ++ GATE(0, "aclk_intmem", "aclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(13), 11, GFLAGS), ++ GATE(ACLK_GIC, "aclk_gic", "aclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(13), 12, GFLAGS), ++ GATE(ACLK_DCF, "aclk_dcf", "aclk_bus_pre", 0, PX30_CLKGATE_CON(13), 15, GFLAGS), + -+ COMPOSITE(SCLK_I2S0_8CH_RX_SRC, "clk_i2s0_8ch_rx_src", mux_vpll0_vpll1_xin24m_p, 0, -+ RK3308_CLKSEL_CON(54), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(11), 0, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s0_8ch_rx_frac", "clk_i2s0_8ch_rx_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(55), 0, -+ RK3308_CLKGATE_CON(11), 1, GFLAGS, -+ &rk3308_i2s0_8ch_rx_fracmux), -+ COMPOSITE_NODIV(SCLK_I2S0_8CH_RX, "clk_i2s0_8ch_rx", mux_i2s0_8ch_rx_tx_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(54), 12, 1, MFLAGS, -+ RK3308_CLKGATE_CON(11), 2, GFLAGS), -+ GATE(SCLK_I2S0_8CH_RX_OUT, "clk_i2s0_8ch_rx_out", "clk_i2s0_8ch_rx", 0, -+ RK3308_CLKGATE_CON(11), 3, GFLAGS), ++ /* aclk_dmac is controlled by sgrf_soc_con1[11]. */ ++ SGRF_GATE(ACLK_DMAC, "aclk_dmac", "aclk_bus_pre"), + -+ COMPOSITE(SCLK_I2S1_8CH_TX_SRC, "clk_i2s1_8ch_tx_src", mux_vpll0_vpll1_xin24m_p, 0, -+ RK3308_CLKSEL_CON(56), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(11), 4, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s1_8ch_tx_frac", "clk_i2s1_8ch_tx_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(57), 0, -+ RK3308_CLKGATE_CON(11), 5, GFLAGS, -+ &rk3308_i2s1_8ch_tx_fracmux), -+ COMPOSITE_NODIV(SCLK_I2S1_8CH_TX, "clk_i2s1_8ch_tx", mux_i2s1_8ch_tx_rx_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(56), 12, 1, MFLAGS, -+ RK3308_CLKGATE_CON(11), 6, GFLAGS), -+ COMPOSITE_NODIV(SCLK_I2S1_8CH_TX_OUT, "clk_i2s1_8ch_tx_out", mux_i2s1_8ch_tx_out_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(56), 15, 1, MFLAGS, -+ RK3308_CLKGATE_CON(11), 7, GFLAGS), ++ GATE(0, "hclk_bus_niu", "hclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(13), 9, GFLAGS), ++ GATE(0, "hclk_rom", "hclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(13), 14, GFLAGS), ++ GATE(HCLK_PDM, "hclk_pdm", "hclk_bus_pre", 0, PX30_CLKGATE_CON(14), 1, GFLAGS), ++ GATE(HCLK_I2S0, "hclk_i2s0", "hclk_bus_pre", 0, PX30_CLKGATE_CON(14), 2, GFLAGS), ++ GATE(HCLK_I2S1, "hclk_i2s1", "hclk_bus_pre", 0, PX30_CLKGATE_CON(14), 3, GFLAGS), ++ GATE(HCLK_I2S2, "hclk_i2s2", "hclk_bus_pre", 0, PX30_CLKGATE_CON(14), 4, GFLAGS), + -+ COMPOSITE(SCLK_I2S1_8CH_RX_SRC, "clk_i2s1_8ch_rx_src", mux_vpll0_vpll1_xin24m_p, 0, -+ RK3308_CLKSEL_CON(58), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(11), 8, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s1_8ch_rx_frac", "clk_i2s1_8ch_rx_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(59), 0, -+ RK3308_CLKGATE_CON(11), 9, GFLAGS, -+ &rk3308_i2s1_8ch_rx_fracmux), -+ COMPOSITE_NODIV(SCLK_I2S1_8CH_RX, "clk_i2s1_8ch_rx", mux_i2s1_8ch_rx_tx_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(58), 12, 1, MFLAGS, -+ RK3308_CLKGATE_CON(11), 10, GFLAGS), -+ GATE(SCLK_I2S1_8CH_RX_OUT, "clk_i2s1_8ch_rx_out", "clk_i2s1_8ch_rx", 0, -+ RK3308_CLKGATE_CON(11), 11, GFLAGS), ++ GATE(0, "pclk_bus_niu", "pclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(13), 10, GFLAGS), ++ GATE(PCLK_DCF, "pclk_dcf", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 0, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 5, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_bus_pre", CLK_IS_CRITICAL, PX30_CLKGATE_CON(14), 6, GFLAGS), ++ GATE(PCLK_UART3, "pclk_uart3", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 7, GFLAGS), ++ GATE(PCLK_UART4, "pclk_uart4", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 8, GFLAGS), ++ GATE(PCLK_UART5, "pclk_uart5", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 9, GFLAGS), ++ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 10, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 11, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 12, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 13, GFLAGS), ++ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 14, GFLAGS), ++ GATE(PCLK_PWM0, "pclk_pwm0", "pclk_bus_pre", 0, PX30_CLKGATE_CON(14), 15, GFLAGS), ++ GATE(PCLK_PWM1, "pclk_pwm1", "pclk_bus_pre", 0, PX30_CLKGATE_CON(15), 0, GFLAGS), ++ GATE(PCLK_SPI0, "pclk_spi0", "pclk_bus_pre", 0, PX30_CLKGATE_CON(15), 1, GFLAGS), ++ GATE(PCLK_SPI1, "pclk_spi1", "pclk_bus_pre", 0, PX30_CLKGATE_CON(15), 2, GFLAGS), ++ GATE(PCLK_SARADC, "pclk_saradc", "pclk_bus_pre", 0, PX30_CLKGATE_CON(15), 3, GFLAGS), ++ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_bus_pre", 0, PX30_CLKGATE_CON(15), 4, GFLAGS), ++ GATE(PCLK_TIMER, "pclk_timer", "pclk_bus_pre", 0, PX30_CLKGATE_CON(15), 5, GFLAGS), ++ GATE(PCLK_OTP_NS, "pclk_otp_ns", "pclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(15), 6, GFLAGS), ++ GATE(PCLK_WDT_NS, "pclk_wdt_ns", "pclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(15), 7, GFLAGS), ++ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_bus_pre", 0, PX30_CLKGATE_CON(15), 8, GFLAGS), ++ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_bus_pre", 0, PX30_CLKGATE_CON(15), 9, GFLAGS), ++ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_bus_pre", 0, PX30_CLKGATE_CON(15), 10, GFLAGS), ++ GATE(0, "pclk_grf", "pclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(15), 11, GFLAGS), ++ GATE(0, "pclk_sgrf", "pclk_bus_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(15), 12, GFLAGS), + -+ COMPOSITE(SCLK_I2S2_8CH_TX_SRC, "clk_i2s2_8ch_tx_src", mux_vpll0_vpll1_xin24m_p, 0, -+ RK3308_CLKSEL_CON(60), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(11), 12, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s2_8ch_tx_frac", "clk_i2s2_8ch_tx_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(61), 0, -+ RK3308_CLKGATE_CON(11), 13, GFLAGS, -+ &rk3308_i2s2_8ch_tx_fracmux), -+ COMPOSITE_NODIV(SCLK_I2S2_8CH_TX, "clk_i2s2_8ch_tx", mux_i2s2_8ch_tx_rx_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(60), 12, 1, MFLAGS, -+ RK3308_CLKGATE_CON(11), 14, GFLAGS), -+ COMPOSITE_NODIV(SCLK_I2S2_8CH_TX_OUT, "clk_i2s2_8ch_tx_out", mux_i2s2_8ch_tx_out_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(60), 15, 1, MFLAGS, -+ RK3308_CLKGATE_CON(11), 15, GFLAGS), ++ /* PD_VPU */ ++ GATE(0, "hclk_vpu_niu", "hclk_vpu_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(4), 7, GFLAGS), ++ GATE(HCLK_VPU, "hclk_vpu", "hclk_vpu_pre", 0, PX30_CLKGATE_CON(4), 6, GFLAGS), ++ GATE(0, "aclk_vpu_niu", "aclk_vpu_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(4), 5, GFLAGS), ++ GATE(ACLK_VPU, "aclk_vpu", "aclk_vpu_pre", 0, PX30_CLKGATE_CON(4), 4, GFLAGS), + -+ COMPOSITE(SCLK_I2S2_8CH_RX_SRC, "clk_i2s2_8ch_rx_src", mux_vpll0_vpll1_xin24m_p, 0, -+ RK3308_CLKSEL_CON(62), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(12), 0, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s2_8ch_rx_frac", "clk_i2s2_8ch_rx_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(63), 0, -+ RK3308_CLKGATE_CON(12), 1, GFLAGS, -+ &rk3308_i2s2_8ch_rx_fracmux), -+ COMPOSITE_NODIV(SCLK_I2S2_8CH_RX, "clk_i2s2_8ch_rx", mux_i2s2_8ch_rx_tx_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(62), 12, 1, MFLAGS, -+ RK3308_CLKGATE_CON(12), 2, GFLAGS), -+ GATE(SCLK_I2S2_8CH_RX_OUT, "clk_i2s2_8ch_rx_out", "clk_i2s2_8ch_rx", 0, -+ RK3308_CLKGATE_CON(12), 3, GFLAGS), ++ /* PD_CRYPTO */ ++ GATE(0, "hclk_crypto_niu", "hclk_crypto_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(9), 3, GFLAGS), ++ GATE(HCLK_CRYPTO, "hclk_crypto", "hclk_crypto_pre", 0, PX30_CLKGATE_CON(9), 5, GFLAGS), ++ GATE(0, "aclk_crypto_niu", "aclk_crypto_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(9), 2, GFLAGS), ++ GATE(ACLK_CRYPTO, "aclk_crypto", "aclk_crypto_pre", 0, PX30_CLKGATE_CON(9), 4, GFLAGS), + -+ COMPOSITE(SCLK_I2S3_8CH_TX_SRC, "clk_i2s3_8ch_tx_src", mux_vpll0_vpll1_xin24m_p, 0, -+ RK3308_CLKSEL_CON(64), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(12), 4, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s3_8ch_tx_frac", "clk_i2s3_8ch_tx_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(65), 0, -+ RK3308_CLKGATE_CON(12), 5, GFLAGS, -+ &rk3308_i2s3_8ch_tx_fracmux), -+ COMPOSITE_NODIV(SCLK_I2S3_8CH_TX, "clk_i2s3_8ch_tx", mux_i2s3_8ch_tx_rx_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(64), 12, 1, MFLAGS, -+ RK3308_CLKGATE_CON(12), 6, GFLAGS), -+ COMPOSITE_NODIV(SCLK_I2S3_8CH_TX_OUT, "clk_i2s3_8ch_tx_out", mux_i2s3_8ch_tx_out_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(64), 15, 1, MFLAGS, -+ RK3308_CLKGATE_CON(12), 7, GFLAGS), ++ /* PD_SDCARD */ ++ GATE(0, "hclk_sdmmc_niu", "hclk_sdmmc_pre", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(7), 0, GFLAGS), ++ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_sdmmc_pre", 0, PX30_CLKGATE_CON(7), 1, GFLAGS), + -+ COMPOSITE(SCLK_I2S3_8CH_RX_SRC, "clk_i2s3_8ch_rx_src", mux_vpll0_vpll1_xin24m_p, 0, -+ RK3308_CLKSEL_CON(66), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(12), 8, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s3_8ch_rx_frac", "clk_i2s3_8ch_rx_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(67), 0, -+ RK3308_CLKGATE_CON(12), 9, GFLAGS, -+ &rk3308_i2s3_8ch_rx_fracmux), -+ COMPOSITE_NODIV(SCLK_I2S3_8CH_RX, "clk_i2s3_8ch_rx", mux_i2s3_8ch_rx_tx_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(66), 12, 1, MFLAGS, -+ RK3308_CLKGATE_CON(12), 10, GFLAGS), -+ GATE(SCLK_I2S3_8CH_RX_OUT, "clk_i2s3_8ch_rx_out", "clk_i2s3_8ch_rx", 0, -+ RK3308_CLKGATE_CON(12), 11, GFLAGS), ++ /* PD_PERI */ ++ GATE(0, "aclk_peri_niu", "aclk_peri_pre", CLK_IS_CRITICAL, PX30_CLKGATE_CON(5), 9, GFLAGS), + -+ COMPOSITE(SCLK_I2S0_2CH_SRC, "clk_i2s0_2ch_src", mux_vpll0_vpll1_xin24m_p, 0, -+ RK3308_CLKSEL_CON(68), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(12), 12, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s0_2ch_frac", "clk_i2s0_2ch_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(69), 0, -+ RK3308_CLKGATE_CON(12), 13, GFLAGS, -+ &rk3308_i2s0_2ch_fracmux), -+ GATE(SCLK_I2S0_2CH, "clk_i2s0_2ch", "clk_i2s0_2ch_mux", 0, -+ RK3308_CLKGATE_CON(12), 14, GFLAGS), -+ COMPOSITE_NODIV(SCLK_I2S0_2CH_OUT, "clk_i2s0_2ch_out", mux_i2s0_2ch_out_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(68), 15, 1, MFLAGS, -+ RK3308_CLKGATE_CON(12), 15, GFLAGS), ++ /* PD_MMC_NAND */ ++ GATE(HCLK_NANDC, "hclk_nandc", "hclk_mmc_nand", 0, PX30_CLKGATE_CON(5), 15, GFLAGS), ++ GATE(0, "hclk_mmc_nand_niu", "hclk_mmc_nand", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(6), 8, GFLAGS), ++ GATE(HCLK_SDIO, "hclk_sdio", "hclk_mmc_nand", 0, PX30_CLKGATE_CON(6), 9, GFLAGS), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_mmc_nand", 0, PX30_CLKGATE_CON(6), 10, GFLAGS), ++ GATE(HCLK_SFC, "hclk_sfc", "hclk_mmc_nand", 0, PX30_CLKGATE_CON(6), 11, GFLAGS), + -+ COMPOSITE(SCLK_I2S1_2CH_SRC, "clk_i2s1_2ch_src", mux_vpll0_vpll1_xin24m_p, 0, -+ RK3308_CLKSEL_CON(70), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(13), 0, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s1_2ch_frac", "clk_i2s1_2ch_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(71), 0, -+ RK3308_CLKGATE_CON(13), 1, GFLAGS, -+ &rk3308_i2s1_2ch_fracmux), -+ GATE(SCLK_I2S1_2CH, "clk_i2s1_2ch", "clk_i2s1_2ch_mux", 0, -+ RK3308_CLKGATE_CON(13), 2, GFLAGS), -+ COMPOSITE_NODIV(SCLK_I2S1_2CH_OUT, "clk_i2s1_2ch_out", mux_i2s1_2ch_out_p, CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(70), 15, 1, MFLAGS, -+ RK3308_CLKGATE_CON(13), 3, GFLAGS), ++ /* PD_USB */ ++ GATE(0, "hclk_usb_niu", "hclk_usb", CLK_IS_CRITICAL, PX30_CLKGATE_CON(7), 4, GFLAGS), ++ GATE(HCLK_OTG, "hclk_otg", "hclk_usb", 0, PX30_CLKGATE_CON(7), 5, GFLAGS), ++ GATE(HCLK_HOST, "hclk_host", "hclk_usb", 0, PX30_CLKGATE_CON(7), 6, GFLAGS), ++ GATE(HCLK_HOST_ARB, "hclk_host_arb", "hclk_usb", CLK_IGNORE_UNUSED, PX30_CLKGATE_CON(7), 8, GFLAGS), + -+ COMPOSITE(SCLK_SPDIF_TX_DIV, "clk_spdif_tx_div", mux_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, -+ RK3308_CLKSEL_CON(48), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(10), 6, GFLAGS), -+ COMPOSITE(SCLK_SPDIF_TX_DIV50, "clk_spdif_tx_div50", mux_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, -+ RK3308_CLKSEL_CON(48), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(10), 6, GFLAGS), -+ MUX(0, "clk_spdif_tx_src", mux_spdif_tx_src_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3308_CLKSEL_CON(48), 12, 1, MFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_spdif_tx_frac", "clk_spdif_tx_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(49), 0, -+ RK3308_CLKGATE_CON(10), 7, GFLAGS, -+ &rk3308_spdif_tx_fracmux), -+ GATE(SCLK_SPDIF_TX, "clk_spdif_tx", "clk_spdif_tx_mux", 0, -+ RK3308_CLKGATE_CON(10), 8, GFLAGS), ++ /* PD_GMAC */ ++ GATE(0, "aclk_gmac_niu", "aclk_gmac_pre", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(8), 0, GFLAGS), ++ GATE(ACLK_GMAC, "aclk_gmac", "aclk_gmac_pre", 0, ++ PX30_CLKGATE_CON(8), 2, GFLAGS), ++ GATE(0, "pclk_gmac_niu", "pclk_gmac_pre", CLK_IGNORE_UNUSED, ++ PX30_CLKGATE_CON(8), 1, GFLAGS), ++ GATE(PCLK_GMAC, "pclk_gmac", "pclk_gmac_pre", 0, ++ PX30_CLKGATE_CON(8), 3, GFLAGS), ++}; + -+ COMPOSITE(SCLK_SPDIF_RX_DIV, "clk_spdif_rx_div", mux_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, -+ RK3308_CLKSEL_CON(50), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(10), 9, GFLAGS), -+ COMPOSITE(SCLK_SPDIF_RX_DIV50, "clk_spdif_rx_div50", mux_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, -+ RK3308_CLKSEL_CON(50), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3308_CLKGATE_CON(10), 9, GFLAGS), -+ MUX(0, "clk_spdif_rx_src", mux_spdif_rx_src_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3308_CLKSEL_CON(50), 14, 1, MFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_spdif_rx_frac", "clk_spdif_rx_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(51), 0, -+ RK3308_CLKGATE_CON(10), 10, GFLAGS, -+ &rk3308_spdif_rx_fracmux), -+ GATE(SCLK_SPDIF_RX, "clk_spdif_rx", "clk_spdif_rx_mux", 0, -+ RK3308_CLKGATE_CON(10), 11, GFLAGS), ++static struct rockchip_clk_branch px30_gpu_src_clk[] __initdata = { ++ COMPOSITE(0, "clk_gpu_src", mux_gpll_dmycpll_usb480m_dmynpll_p, 0, ++ PX30_CLKSEL_CON(1), 6, 2, MFLAGS, 0, 4, DFLAGS, ++ PX30_CLKGATE_CON(0), 8, GFLAGS), ++}; ++ ++static struct rockchip_clk_branch rk3326_gpu_src_clk[] __initdata = { ++ COMPOSITE(0, "clk_gpu_src", mux_gpll_dmycpll_usb480m_npll_p, 0, ++ PX30_CLKSEL_CON(1), 6, 2, MFLAGS, 0, 4, DFLAGS, ++ PX30_CLKGATE_CON(0), 8, GFLAGS), ++}; + ++static struct rockchip_clk_branch px30_clk_pmu_branches[] __initdata = { + /* -+ * Clock-Architecture Diagram 8 ++ * Clock-Architecture Diagram 2 + */ + -+ GATE(0, "aclk_core_niu", "aclk_core", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(0), 5, GFLAGS), -+ GATE(0, "pclk_core_dbg_niu", "aclk_core", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(0), 6, GFLAGS), -+ GATE(0, "pclk_core_dbg_daplite", "pclk_core_dbg", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(0), 7, GFLAGS), -+ GATE(0, "aclk_core_perf", "pclk_core_dbg", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(0), 8, GFLAGS), -+ GATE(0, "pclk_core_grf", "pclk_core_dbg", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(0), 9, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_rtc32k_frac", "xin24m", CLK_IGNORE_UNUSED, ++ PX30_PMU_CLKSEL_CON(1), 0, ++ PX30_PMU_CLKGATE_CON(0), 13, GFLAGS, ++ &px30_rtc32k_pmu_fracmux), + -+ GATE(0, "aclk_peri_niu", "aclk_peri", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(9), 2, GFLAGS), -+ GATE(0, "aclk_peribus_niu", "aclk_peri", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(9), 3, GFLAGS), -+ GATE(ACLK_MAC, "aclk_mac", "aclk_peri", 0, RK3308_CLKGATE_CON(9), 4, GFLAGS), ++ COMPOSITE_NOMUX(XIN24M_DIV, "xin24m_div", "xin24m", CLK_IGNORE_UNUSED, ++ PX30_PMU_CLKSEL_CON(0), 8, 5, DFLAGS, ++ PX30_PMU_CLKGATE_CON(0), 12, GFLAGS), + -+ GATE(0, "hclk_peri_niu", "hclk_peri", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(9), 5, GFLAGS), -+ GATE(HCLK_NANDC, "hclk_nandc", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 6, GFLAGS), -+ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 7, GFLAGS), -+ GATE(HCLK_SDIO, "hclk_sdio", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 8, GFLAGS), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 9, GFLAGS), -+ GATE(HCLK_SFC, "hclk_sfc", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 10, GFLAGS), -+ GATE(HCLK_OTG, "hclk_otg", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 11, GFLAGS), -+ GATE(HCLK_HOST, "hclk_host", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 12, GFLAGS), -+ GATE(HCLK_HOST_ARB, "hclk_host_arb", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 13, GFLAGS), ++ COMPOSITE_NOMUX(0, "clk_wifi_pmu_src", "gpll", 0, ++ PX30_PMU_CLKSEL_CON(2), 8, 6, DFLAGS, ++ PX30_PMU_CLKGATE_CON(0), 14, GFLAGS), ++ COMPOSITE_NODIV(SCLK_WIFI_PMU, "clk_wifi_pmu", mux_wifi_pmu_p, CLK_SET_RATE_PARENT, ++ PX30_PMU_CLKSEL_CON(2), 15, 1, MFLAGS, ++ PX30_PMU_CLKGATE_CON(0), 15, GFLAGS), + -+ GATE(0, "pclk_peri_niu", "pclk_peri", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(9), 14, GFLAGS), -+ GATE(PCLK_MAC, "pclk_mac", "pclk_peri", 0, RK3308_CLKGATE_CON(9), 15, GFLAGS), ++ COMPOSITE(0, "clk_uart0_pmu_src", mux_uart_src_p, 0, ++ PX30_PMU_CLKSEL_CON(3), 14, 2, MFLAGS, 0, 5, DFLAGS, ++ PX30_PMU_CLKGATE_CON(1), 0, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart0_np5", "clk_uart0_pmu_src", 0, ++ PX30_PMU_CLKSEL_CON(4), 0, 5, DFLAGS, ++ PX30_PMU_CLKGATE_CON(1), 1, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart0_frac", "clk_uart0_pmu_src", CLK_SET_RATE_PARENT, ++ PX30_PMU_CLKSEL_CON(5), 0, ++ PX30_PMU_CLKGATE_CON(1), 2, GFLAGS, ++ &px30_uart0_pmu_fracmux), ++ GATE(SCLK_UART0_PMU, "clk_uart0_pmu", "clk_uart0_pmu_mux", CLK_SET_RATE_PARENT, ++ PX30_PMU_CLKGATE_CON(1), 3, GFLAGS), + -+ GATE(0, "hclk_audio_niu", "hclk_audio", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(14), 0, GFLAGS), -+ GATE(HCLK_PDM, "hclk_pdm", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 1, GFLAGS), -+ GATE(HCLK_SPDIFTX, "hclk_spdiftx", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 2, GFLAGS), -+ GATE(HCLK_SPDIFRX, "hclk_spdifrx", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 3, GFLAGS), -+ GATE(HCLK_I2S0_8CH, "hclk_i2s0_8ch", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 4, GFLAGS), -+ GATE(HCLK_I2S1_8CH, "hclk_i2s1_8ch", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 5, GFLAGS), -+ GATE(HCLK_I2S2_8CH, "hclk_i2s2_8ch", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 6, GFLAGS), -+ GATE(HCLK_I2S3_8CH, "hclk_i2s3_8ch", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 7, GFLAGS), -+ GATE(HCLK_I2S0_2CH, "hclk_i2s0_2ch", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 8, GFLAGS), -+ GATE(HCLK_I2S1_2CH, "hclk_i2s1_2ch", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 9, GFLAGS), -+ GATE(HCLK_VAD, "hclk_vad", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 10, GFLAGS), ++ GATE(SCLK_PVTM_PMU, "clk_pvtm_pmu", "xin24m", 0, ++ PX30_PMU_CLKGATE_CON(1), 4, GFLAGS), + -+ GATE(0, "pclk_audio_niu", "pclk_audio", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(14), 11, GFLAGS), -+ GATE(PCLK_ACODEC, "pclk_acodec", "pclk_audio", 0, RK3308_CLKGATE_CON(14), 12, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_PMU_PRE, "pclk_pmu_pre", "gpll", CLK_IS_CRITICAL, ++ PX30_PMU_CLKSEL_CON(0), 0, 5, DFLAGS, ++ PX30_PMU_CLKGATE_CON(0), 0, GFLAGS), + -+ GATE(0, "aclk_bus_niu", "aclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(5), 0, GFLAGS), -+ GATE(0, "aclk_intmem", "aclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(5), 1, GFLAGS), -+ GATE(ACLK_CRYPTO, "aclk_crypto", "aclk_bus", 0, RK3308_CLKGATE_CON(5), 2, GFLAGS), -+ GATE(ACLK_VOP, "aclk_vop", "aclk_bus", 0, RK3308_CLKGATE_CON(5), 3, GFLAGS), -+ GATE(0, "aclk_gic", "aclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(5), 4, GFLAGS), -+ /* aclk_dmaci0 is controlled by sgrf_clkgat_con. */ -+ SGRF_GATE(ACLK_DMAC0, "aclk_dmac0", "aclk_bus"), -+ /* aclk_dmac1 is controlled by sgrf_clkgat_con. */ -+ SGRF_GATE(ACLK_DMAC1, "aclk_dmac1", "aclk_bus"), -+ /* watchdog pclk is controlled by sgrf_clkgat_con. */ -+ SGRF_GATE(PCLK_WDT, "pclk_wdt", "pclk_bus"), ++ COMPOSITE_NOMUX(SCLK_REF24M_PMU, "clk_ref24m_pmu", "gpll", 0, ++ PX30_PMU_CLKSEL_CON(2), 0, 6, DFLAGS, ++ PX30_PMU_CLKGATE_CON(1), 8, GFLAGS), ++ COMPOSITE_NODIV(SCLK_USBPHY_REF, "clk_usbphy_ref", mux_usbphy_ref_p, CLK_SET_RATE_PARENT, ++ PX30_PMU_CLKSEL_CON(2), 6, 1, MFLAGS, ++ PX30_PMU_CLKGATE_CON(1), 9, GFLAGS), ++ COMPOSITE_NODIV(SCLK_MIPIDSIPHY_REF, "clk_mipidsiphy_ref", mux_mipidsiphy_ref_p, CLK_SET_RATE_PARENT, ++ PX30_PMU_CLKSEL_CON(2), 7, 1, MFLAGS, ++ PX30_PMU_CLKGATE_CON(1), 10, GFLAGS), + -+ GATE(0, "hclk_bus_niu", "hclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(5), 5, GFLAGS), -+ GATE(0, "hclk_rom", "hclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(5), 6, GFLAGS), -+ GATE(HCLK_CRYPTO, "hclk_crypto", "hclk_bus", 0, RK3308_CLKGATE_CON(5), 7, GFLAGS), -+ GATE(HCLK_VOP, "hclk_vop", "hclk_bus", 0, RK3308_CLKGATE_CON(5), 8, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 9 ++ */ + -+ GATE(0, "pclk_bus_niu", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(5), 9, GFLAGS), -+ GATE(PCLK_UART0, "pclk_uart0", "pclk_bus", 0, RK3308_CLKGATE_CON(5), 10, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_bus", 0, RK3308_CLKGATE_CON(5), 11, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_bus", 0, RK3308_CLKGATE_CON(5), 12, GFLAGS), -+ GATE(PCLK_UART3, "pclk_uart3", "pclk_bus", 0, RK3308_CLKGATE_CON(5), 13, GFLAGS), -+ GATE(PCLK_UART4, "pclk_uart4", "pclk_bus", 0, RK3308_CLKGATE_CON(5), 14, GFLAGS), -+ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_bus", 0, RK3308_CLKGATE_CON(5), 15, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 0, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 1, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 2, GFLAGS), -+ GATE(PCLK_PWM0, "pclk_pwm0", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 3, GFLAGS), -+ GATE(PCLK_SPI0, "pclk_spi0", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 4, GFLAGS), -+ GATE(PCLK_SPI1, "pclk_spi1", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 5, GFLAGS), -+ GATE(PCLK_SPI2, "pclk_spi2", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 6, GFLAGS), -+ GATE(PCLK_SARADC, "pclk_saradc", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 7, GFLAGS), -+ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 8, GFLAGS), -+ GATE(PCLK_TIMER, "pclk_timer", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 9, GFLAGS), -+ GATE(PCLK_OTP_NS, "pclk_otp_ns", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 10, GFLAGS), -+ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 12, GFLAGS), -+ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 13, GFLAGS), -+ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 14, GFLAGS), -+ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 15, GFLAGS), -+ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_bus", 0, RK3308_CLKGATE_CON(7), 0, GFLAGS), -+ GATE(PCLK_SGRF, "pclk_sgrf", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 1, GFLAGS), -+ GATE(PCLK_GRF, "pclk_grf", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 2, GFLAGS), -+ GATE(PCLK_USBSD_DET, "pclk_usbsd_det", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 3, GFLAGS), -+ GATE(PCLK_DDR_UPCTL, "pclk_ddr_upctl", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 4, GFLAGS), -+ GATE(PCLK_DDR_MON, "pclk_ddr_mon", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 5, GFLAGS), -+ GATE(PCLK_DDRPHY, "pclk_ddrphy", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 6, GFLAGS), -+ GATE(PCLK_DDR_STDBY, "pclk_ddr_stdby", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 7, GFLAGS), -+ GATE(PCLK_USB_GRF, "pclk_usb_grf", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 8, GFLAGS), -+ GATE(PCLK_CRU, "pclk_cru", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 9, GFLAGS), -+ GATE(PCLK_OTP_PHY, "pclk_otp_phy", "pclk_bus", 0, RK3308_CLKGATE_CON(7), 10, GFLAGS), -+ GATE(PCLK_CPU_BOOST, "pclk_cpu_boost", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 11, GFLAGS), -+ GATE(PCLK_PWM1, "pclk_pwm1", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 12, GFLAGS), -+ GATE(PCLK_PWM2, "pclk_pwm2", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 13, GFLAGS), -+ GATE(PCLK_CAN, "pclk_can", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 14, GFLAGS), -+ GATE(PCLK_OWIRE, "pclk_owire", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 15, GFLAGS), ++ /* PD_PMU */ ++ GATE(0, "pclk_pmu_niu", "pclk_pmu_pre", CLK_IGNORE_UNUSED, PX30_PMU_CLKGATE_CON(0), 1, GFLAGS), ++ GATE(0, "pclk_pmu_sgrf", "pclk_pmu_pre", CLK_IGNORE_UNUSED, PX30_PMU_CLKGATE_CON(0), 2, GFLAGS), ++ GATE(0, "pclk_pmu_grf", "pclk_pmu_pre", CLK_IGNORE_UNUSED, PX30_PMU_CLKGATE_CON(0), 3, GFLAGS), ++ GATE(0, "pclk_pmu", "pclk_pmu_pre", CLK_IGNORE_UNUSED, PX30_PMU_CLKGATE_CON(0), 4, GFLAGS), ++ GATE(0, "pclk_pmu_mem", "pclk_pmu_pre", CLK_IGNORE_UNUSED, PX30_PMU_CLKGATE_CON(0), 5, GFLAGS), ++ GATE(PCLK_GPIO0_PMU, "pclk_gpio0_pmu", "pclk_pmu_pre", 0, PX30_PMU_CLKGATE_CON(0), 6, GFLAGS), ++ GATE(PCLK_UART0_PMU, "pclk_uart0_pmu", "pclk_pmu_pre", 0, PX30_PMU_CLKGATE_CON(0), 7, GFLAGS), ++ GATE(0, "pclk_cru_pmu", "pclk_pmu_pre", CLK_IGNORE_UNUSED, PX30_PMU_CLKGATE_CON(0), 8, GFLAGS), +}; + -+static struct rockchip_clk_branch rk3308_dclk_vop_frac[] __initdata = { -+ COMPOSITE_FRACMUX(0, "dclk_vop_frac", "dclk_vop_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(9), 0, -+ RK3308_CLKGATE_CON(1), 7, GFLAGS, -+ &rk3308_dclk_vop_fracmux), -+}; ++static struct rockchip_clk_branch px30_clk_ddrphy_otp[] __initdata = { ++ COMPOSITE_NOGATE(0, "clk_ddrphy4x", mux_ddrphy_p, CLK_IGNORE_UNUSED, ++ PX30_CLKSEL_CON(2), 7, 1, MFLAGS, 0, 3, DFLAGS), ++ FACTOR_GATE(0, "clk_ddrphy1x", "clk_ddrphy4x", CLK_IGNORE_UNUSED, 1, 4, ++ PX30_CLKGATE_CON(0), 14, GFLAGS), ++ FACTOR_GATE(0, "clk_stdby_2wrap", "clk_ddrphy4x", ++ CLK_IGNORE_UNUSED, 1, 4, ++ PX30_CLKGATE_CON(1), 0, GFLAGS), + -+static struct rockchip_clk_branch rk3308b_dclk_vop_frac[] __initdata = { -+ COMPOSITE_FRACMUX(0, "dclk_vop_frac", "dclk_vop_src", CLK_SET_RATE_PARENT, -+ RK3308_CLKSEL_CON(9), 0, -+ RK3308_CLKGATE_CON(1), 7, GFLAGS, -+ &rk3308_dclk_vop_fracmux), ++ COMPOSITE_NOMUX(SCLK_OTP, "clk_otp", "xin24m", 0, ++ PX30_CLKSEL_CON(56), 0, 3, DFLAGS, ++ PX30_CLKGATE_CON(12), 11, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_OTP_USR, "clk_otp_usr", "clk_otp", 0, ++ PX30_CLKSEL_CON(56), 4, 2, DFLAGS, ++ PX30_CLKGATE_CON(13), 6, GFLAGS), +}; + -+static void __iomem *rk3308_cru_base; ++static struct rockchip_clk_branch px30s_clk_ddrphy_otp[] __initdata = { ++ COMPOSITE(0, "clk_ddrphy1x", mux_ddrphy_p, CLK_IGNORE_UNUSED, ++ PX30_CLKSEL_CON(2), 7, 1, MFLAGS, 0, 3, DFLAGS, ++ PX30_CLKGATE_CON(0), 14, GFLAGS), ++ FACTOR_GATE(0, "clk_stdby_2wrap", "clk_ddrphy1x", ++ CLK_IGNORE_UNUSED, 1, 4, ++ PX30_CLKGATE_CON(1), 0, GFLAGS), + -+void rk3308_dump_cru(void) ++ COMPOSITE(SCLK_OTP_USR, "clk_otp_usr", mux_xin24m_gpll_p, 0, ++ PX30_CLKSEL_CON(56), 8, 1, MFLAGS, 0, 8, DFLAGS, ++ PX30_CLKGATE_CON(12), 11, GFLAGS), ++}; ++ ++static __initdata struct rockchip_clk_provider *cru_ctx, *pmucru_ctx; ++static void __init px30_register_armclk(void) +{ -+ if (rk3308_cru_base) { -+ pr_warn("CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk3308_cru_base, -+ 0x500, false); -+ } ++ rockchip_clk_register_armclk(cru_ctx, ARMCLK, "armclk", 2, ++ cru_ctx->clk_data.clks[PLL_APLL], ++ pmucru_ctx->clk_data.clks[PLL_GPLL], ++ &px30_cpuclk_data, ++ px30_cpuclk_rates, ++ ARRAY_SIZE(px30_cpuclk_rates)); +} + -+static void __init rk3308_clk_init(struct device_node *np) ++static void __init px30_clk_init(struct device_node *np) +{ + struct rockchip_clk_provider *ctx; + void __iomem *reg_base; -+ struct clk **clks; + + reg_base = of_iomap(np, 0); + if (!reg_base) { @@ -49571,100 +49797,159 @@ index 000000000..6a8e38f11 + iounmap(reg_base); + return; + } -+ clks = ctx->clk_data.clks; ++ cru_ctx = ctx; + -+ rockchip_clk_register_plls(ctx, rk3308_pll_clks, -+ ARRAY_SIZE(rk3308_pll_clks), -+ RK3308_GRF_SOC_STATUS0); -+ rockchip_clk_register_branches(ctx, rk3308_clk_branches, -+ ARRAY_SIZE(rk3308_clk_branches)); -+ rockchip_soc_id_init(); -+ if (soc_is_rk3308b()) -+ rockchip_clk_register_branches(ctx, rk3308b_dclk_vop_frac, -+ ARRAY_SIZE(rk3308b_dclk_vop_frac)); ++ rockchip_clk_register_plls(ctx, px30_pll_clks, ++ ARRAY_SIZE(px30_pll_clks), ++ PX30_GRF_SOC_STATUS0); ++ ++ if (pmucru_ctx) ++ px30_register_armclk(); ++ ++ rockchip_clk_register_branches(ctx, px30_clk_branches, ++ ARRAY_SIZE(px30_clk_branches)); ++ if (of_machine_is_compatible("rockchip,px30")) ++ rockchip_clk_register_branches(ctx, px30_gpu_src_clk, ++ ARRAY_SIZE(px30_gpu_src_clk)); + else -+ rockchip_clk_register_branches(ctx, rk3308_dclk_vop_frac, -+ ARRAY_SIZE(rk3308_dclk_vop_frac)); ++ rockchip_clk_register_branches(ctx, rk3326_gpu_src_clk, ++ ARRAY_SIZE(rk3326_gpu_src_clk)); + -+ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", -+ 3, clks[PLL_APLL], clks[PLL_VPLL0], -+ &rk3308_cpuclk_data, rk3308_cpuclk_rates, -+ ARRAY_SIZE(rk3308_cpuclk_rates)); ++ rockchip_soc_id_init(); ++ if (soc_is_px30s()) ++ rockchip_clk_register_branches(ctx, px30s_clk_ddrphy_otp, ++ ARRAY_SIZE(px30s_clk_ddrphy_otp)); ++ else ++ rockchip_clk_register_branches(ctx, px30_clk_ddrphy_otp, ++ ARRAY_SIZE(px30_clk_ddrphy_otp)); + -+ rockchip_register_softrst(np, 10, reg_base + RK3308_SOFTRST_CON(0), ++ rockchip_register_softrst(np, 12, reg_base + PX30_SOFTRST_CON(0), + ROCKCHIP_SOFTRST_HIWORD_MASK); + -+ rockchip_register_restart_notifier(ctx, RK3308_GLB_SRST_FST, NULL); ++ rockchip_register_restart_notifier(ctx, PX30_GLB_SRST_FST, NULL); + + rockchip_clk_of_add_provider(np, ctx); ++} ++CLK_OF_DECLARE(px30_cru, "rockchip,px30-cru", px30_clk_init); + -+ if (!rk_dump_cru) { -+ rk3308_cru_base = reg_base; -+ rk_dump_cru = rk3308_dump_cru; ++static void __init px30_pmu_clk_init(struct device_node *np) ++{ ++ struct rockchip_clk_provider *ctx; ++ void __iomem *reg_base; ++ ++ reg_base = of_iomap(np, 0); ++ if (!reg_base) { ++ pr_err("%s: could not map cru pmu region\n", __func__); ++ return; + } -+} + -+CLK_OF_DECLARE(rk3308_cru, "rockchip,rk3308-cru", rk3308_clk_init); ++ ctx = rockchip_clk_init(np, reg_base, CLKPMU_NR_CLKS); ++ if (IS_ERR(ctx)) { ++ pr_err("%s: rockchip pmu clk init failed\n", __func__); ++ return; ++ } ++ pmucru_ctx = ctx; + -+static int __init clk_rk3308_probe(struct platform_device *pdev) -+{ -+ struct device_node *np = pdev->dev.of_node; ++ rockchip_clk_register_plls(ctx, px30_pmu_pll_clks, ++ ARRAY_SIZE(px30_pmu_pll_clks), PX30_GRF_SOC_STATUS0); + -+ rk3308_clk_init(np); ++ if (cru_ctx) ++ px30_register_armclk(); + -+ return 0; ++ rockchip_clk_register_branches(ctx, px30_clk_pmu_branches, ++ ARRAY_SIZE(px30_clk_pmu_branches)); ++ ++ rockchip_clk_of_add_provider(np, ctx); +} ++CLK_OF_DECLARE(px30_cru_pmu, "rockchip,px30-pmucru", px30_pmu_clk_init); + -+static const struct of_device_id clk_rk3308_match_table[] = { ++#ifdef MODULE ++struct clk_px30_inits { ++ void (*inits)(struct device_node *np); ++}; ++ ++static const struct clk_px30_inits clk_px30_init = { ++ .inits = px30_clk_init, ++}; ++ ++static const struct clk_px30_inits clk_px30_pmu_init = { ++ .inits = px30_pmu_clk_init, ++}; ++ ++static const struct of_device_id clk_px30_match_table[] = { + { -+ .compatible = "rockchip,rk3308-cru", ++ .compatible = "rockchip,px30-cru", ++ .data = &clk_px30_init, ++ }, { ++ .compatible = "rockchip,px30-pmucru", ++ .data = &clk_px30_pmu_init, + }, + { } +}; -+MODULE_DEVICE_TABLE(of, clk_rk3308_match_table); ++MODULE_DEVICE_TABLE(of, clk_px30_match_table); + -+static struct platform_driver clk_rk3308_driver = { ++static int clk_px30_probe(struct platform_device *pdev) ++{ ++ struct device_node *np = pdev->dev.of_node; ++ const struct of_device_id *match; ++ const struct clk_px30_inits *init_data; ++ ++ match = of_match_device(clk_px30_match_table, &pdev->dev); ++ if (!match || !match->data) ++ return -EINVAL; ++ ++ init_data = match->data; ++ if (init_data->inits) ++ init_data->inits(np); ++ ++ return 0; ++} ++ ++static struct platform_driver clk_px30_driver = { ++ .probe = clk_px30_probe, + .driver = { -+ .name = "clk-rk3308", -+ .of_match_table = clk_rk3308_match_table, ++ .name = "clk-px30", ++ .of_match_table = clk_px30_match_table, + }, +}; -+builtin_platform_driver_probe(clk_rk3308_driver, clk_rk3308_probe); ++module_platform_driver(clk_px30_driver); + -+MODULE_DESCRIPTION("Rockchip RK3308 Clock Driver"); ++MODULE_DESCRIPTION("Rockchip PX30 Clock Driver"); +MODULE_LICENSE("GPL"); -diff --git a/drivers/clk/rockchip-oh/clk-rk3328.c b/drivers/clk/rockchip-oh/clk-rk3328.c ++#endif /* MODULE */ +diff --git a/drivers/clk/rockchip-oh/clk-rk1808.c b/drivers/clk/rockchip-oh/clk-rk1808.c new file mode 100644 -index 000000000..8ec63aaae +index 000000000..e177a3dd6 --- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-rk3328.c -@@ -0,0 +1,900 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later ++++ b/drivers/clk/rockchip-oh/clk-rk1808.c +@@ -0,0 +1,1249 @@ ++// SPDX-License-Identifier: GPL-2.0 +/* -+ * Copyright (c) 2016 Rockchip Electronics Co. Ltd. -+ * Author: Elaine ++ * Copyright (c) 2018 Fuzhou Rockchip Electronics Co., Ltd ++ * Author: Elaine Zhang + */ -+ +#include -+#include +#include +#include +#include +#include +#include -+#include ++#include +#include "clk.h" + -+#define RK3328_GRF_SOC_CON4 0x410 -+#define RK3328_GRF_SOC_STATUS0 0x480 -+#define RK3328_GRF_MAC_CON1 0x904 -+#define RK3328_GRF_MAC_CON2 0x908 ++#define RK1808_GRF_SOC_STATUS0 0x480 ++#define RK1808_PMUGRF_SOC_CON0 0x100 ++#define RK1808_UART_FRAC_MAX_PRATE 800000000 ++#define RK1808_PDM_FRAC_MAX_PRATE 300000000 ++#define RK1808_I2S_FRAC_MAX_PRATE 600000000 ++#define RK1808_VOP_RAW_FRAC_MAX_PRATE 300000000 ++#define RK1808_VOP_LITE_FRAC_MAX_PRATE 400000000 + -+enum rk3328_plls { -+ apll, dpll, cpll, gpll, npll, ++enum rk1808_plls { ++ apll, dpll, cpll, gpll, npll, ppll, +}; + -+static struct rockchip_pll_rate_table rk3328_pll_rates[] = { ++static struct rockchip_pll_rate_table rk1808_pll_rates[] = { + /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ + RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), + RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0), @@ -49683,796 +49968,1138 @@ index 000000000..8ec63aaae + RK3036_PLL_RATE(1272000000, 1, 53, 1, 1, 1, 0), + RK3036_PLL_RATE(1248000000, 1, 52, 1, 1, 1, 0), + RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1188000000, 2, 99, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1188000000, 1, 99, 2, 1, 1, 0), + RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1100000000, 12, 550, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1100000000, 2, 275, 3, 1, 1, 0), + RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1000000000, 6, 500, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1000000000, 1, 125, 3, 1, 1, 0), + RK3036_PLL_RATE(984000000, 1, 82, 2, 1, 1, 0), + RK3036_PLL_RATE(960000000, 1, 80, 2, 1, 1, 0), + RK3036_PLL_RATE(936000000, 1, 78, 2, 1, 1, 0), + RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), -+ RK3036_PLL_RATE(900000000, 4, 300, 2, 1, 1, 0), ++ RK3036_PLL_RATE(900000000, 1, 75, 2, 1, 1, 0), + RK3036_PLL_RATE(888000000, 1, 74, 2, 1, 1, 0), + RK3036_PLL_RATE(864000000, 1, 72, 2, 1, 1, 0), + RK3036_PLL_RATE(840000000, 1, 70, 2, 1, 1, 0), + RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), -+ RK3036_PLL_RATE(800000000, 6, 400, 2, 1, 1, 0), -+ RK3036_PLL_RATE(700000000, 6, 350, 2, 1, 1, 0), ++ RK3036_PLL_RATE(800000000, 1, 100, 3, 1, 1, 0), ++ RK3036_PLL_RATE(700000000, 1, 175, 2, 1, 1, 0), + RK3036_PLL_RATE(696000000, 1, 58, 2, 1, 1, 0), ++ RK3036_PLL_RATE(624000000, 1, 52, 2, 1, 1, 0), + RK3036_PLL_RATE(600000000, 1, 75, 3, 1, 1, 0), -+ RK3036_PLL_RATE(594000000, 2, 99, 2, 1, 1, 0), ++ RK3036_PLL_RATE(594000000, 1, 99, 4, 1, 1, 0), + RK3036_PLL_RATE(504000000, 1, 63, 3, 1, 1, 0), -+ RK3036_PLL_RATE(500000000, 6, 250, 2, 1, 1, 0), ++ RK3036_PLL_RATE(500000000, 1, 125, 6, 1, 1, 0), ++ RK3036_PLL_RATE(416000000, 1, 52, 3, 1, 1, 0), + RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), + RK3036_PLL_RATE(312000000, 1, 52, 2, 2, 1, 0), + RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), ++ RK3036_PLL_RATE(200000000, 1, 200, 6, 4, 1, 0), ++ RK3036_PLL_RATE(100000000, 1, 150, 6, 6, 1, 0), + RK3036_PLL_RATE(96000000, 1, 64, 4, 4, 1, 0), + { /* sentinel */ }, +}; + -+static struct rockchip_pll_rate_table rk3328_pll_frac_rates[] = { -+ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ -+ RK3036_PLL_RATE(1016064000, 3, 127, 1, 1, 0, 134218), -+ /* vco = 1016064000 */ -+ RK3036_PLL_RATE(983040000, 24, 983, 1, 1, 0, 671089), -+ /* vco = 983040000 */ -+ RK3036_PLL_RATE(491520000, 24, 983, 2, 1, 0, 671089), -+ /* vco = 983040000 */ -+ RK3036_PLL_RATE(61440000, 6, 215, 7, 2, 0, 671089), -+ /* vco = 860156000 */ -+ RK3036_PLL_RATE(56448000, 12, 451, 4, 4, 0, 9797895), -+ /* vco = 903168000 */ -+ RK3036_PLL_RATE(40960000, 12, 409, 4, 5, 0, 10066330), -+ /* vco = 819200000 */ -+ { /* sentinel */ }, -+}; -+ -+#define RK3328_DIV_ACLKM_MASK 0x7 -+#define RK3328_DIV_ACLKM_SHIFT 4 -+#define RK3328_DIV_PCLK_DBG_MASK 0xf -+#define RK3328_DIV_PCLK_DBG_SHIFT 0 ++#define RK1808_DIV_ACLKM_MASK 0x7 ++#define RK1808_DIV_ACLKM_SHIFT 12 ++#define RK1808_DIV_PCLK_DBG_MASK 0xf ++#define RK1808_DIV_PCLK_DBG_SHIFT 8 + -+#define RK3328_CLKSEL1(_aclk_core, _pclk_dbg) \ ++#define RK1808_CLKSEL0(_aclk_core, _pclk_dbg) \ +{ \ -+ .reg = RK3328_CLKSEL_CON(1), \ -+ .val = HIWORD_UPDATE(_aclk_core, RK3328_DIV_ACLKM_MASK, \ -+ RK3328_DIV_ACLKM_SHIFT) | \ -+ HIWORD_UPDATE(_pclk_dbg, RK3328_DIV_PCLK_DBG_MASK, \ -+ RK3328_DIV_PCLK_DBG_SHIFT), \ ++ .reg = RK1808_CLKSEL_CON(0), \ ++ .val = HIWORD_UPDATE(_aclk_core, RK1808_DIV_ACLKM_MASK, \ ++ RK1808_DIV_ACLKM_SHIFT) | \ ++ HIWORD_UPDATE(_pclk_dbg, RK1808_DIV_PCLK_DBG_MASK, \ ++ RK1808_DIV_PCLK_DBG_SHIFT), \ +} + -+#define RK3328_CPUCLK_RATE(_prate, _aclk_core, _pclk_dbg) \ ++#define RK1808_CPUCLK_RATE(_prate, _aclk_core, _pclk_dbg) \ +{ \ + .prate = _prate, \ + .divs = { \ -+ RK3328_CLKSEL1(_aclk_core, _pclk_dbg), \ ++ RK1808_CLKSEL0(_aclk_core, _pclk_dbg), \ + }, \ +} + -+static struct rockchip_cpuclk_rate_table rk3328_cpuclk_rates[] __initdata = { -+ RK3328_CPUCLK_RATE(1800000000, 1, 7), -+ RK3328_CPUCLK_RATE(1704000000, 1, 7), -+ RK3328_CPUCLK_RATE(1608000000, 1, 7), -+ RK3328_CPUCLK_RATE(1512000000, 1, 7), -+ RK3328_CPUCLK_RATE(1488000000, 1, 5), -+ RK3328_CPUCLK_RATE(1416000000, 1, 5), -+ RK3328_CPUCLK_RATE(1392000000, 1, 5), -+ RK3328_CPUCLK_RATE(1296000000, 1, 5), -+ RK3328_CPUCLK_RATE(1200000000, 1, 5), -+ RK3328_CPUCLK_RATE(1104000000, 1, 5), -+ RK3328_CPUCLK_RATE(1008000000, 1, 5), -+ RK3328_CPUCLK_RATE(912000000, 1, 5), -+ RK3328_CPUCLK_RATE(816000000, 1, 3), -+ RK3328_CPUCLK_RATE(696000000, 1, 3), -+ RK3328_CPUCLK_RATE(600000000, 1, 3), -+ RK3328_CPUCLK_RATE(408000000, 1, 1), -+ RK3328_CPUCLK_RATE(312000000, 1, 1), -+ RK3328_CPUCLK_RATE(216000000, 1, 1), -+ RK3328_CPUCLK_RATE(96000000, 1, 1), ++static struct rockchip_cpuclk_rate_table rk1808_cpuclk_rates[] __initdata = { ++ RK1808_CPUCLK_RATE(1608000000, 1, 7), ++ RK1808_CPUCLK_RATE(1512000000, 1, 7), ++ RK1808_CPUCLK_RATE(1488000000, 1, 5), ++ RK1808_CPUCLK_RATE(1416000000, 1, 5), ++ RK1808_CPUCLK_RATE(1392000000, 1, 5), ++ RK1808_CPUCLK_RATE(1296000000, 1, 5), ++ RK1808_CPUCLK_RATE(1200000000, 1, 5), ++ RK1808_CPUCLK_RATE(1104000000, 1, 5), ++ RK1808_CPUCLK_RATE(1008000000, 1, 5), ++ RK1808_CPUCLK_RATE(912000000, 1, 5), ++ RK1808_CPUCLK_RATE(816000000, 1, 3), ++ RK1808_CPUCLK_RATE(696000000, 1, 3), ++ RK1808_CPUCLK_RATE(600000000, 1, 3), ++ RK1808_CPUCLK_RATE(408000000, 1, 1), ++ RK1808_CPUCLK_RATE(312000000, 1, 1), ++ RK1808_CPUCLK_RATE(216000000, 1, 1), ++ RK1808_CPUCLK_RATE(96000000, 1, 1), +}; + -+static const struct rockchip_cpuclk_reg_data rk3328_cpuclk_data = { -+ .core_reg[0] = RK3328_CLKSEL_CON(0), ++static const struct rockchip_cpuclk_reg_data rk1808_cpuclk_data = { ++ .core_reg[0] = RK1808_CLKSEL_CON(0), + .div_core_shift[0] = 0, -+ .div_core_mask[0] = 0x1f, ++ .div_core_mask[0] = 0xf, + .num_cores = 1, -+ .mux_core_alt = 1, -+ .mux_core_main = 3, ++ .mux_core_alt = 2, ++ .mux_core_main = 0, + .mux_core_shift = 6, + .mux_core_mask = 0x3, +}; + -+PNAME(mux_pll_p) = { "xin24m" }; -+ -+PNAME(mux_2plls_p) = { "cpll", "gpll" }; ++PNAME(mux_pll_p) = { "xin24m", "xin32k"}; ++PNAME(mux_usb480m_p) = { "xin24m", "usb480m_phy", "xin32k" }; +PNAME(mux_gpll_cpll_p) = { "gpll", "cpll" }; -+PNAME(mux_cpll_gpll_apll_p) = { "cpll", "gpll", "apll" }; -+PNAME(mux_2plls_xin24m_p) = { "cpll", "gpll", "xin24m" }; -+PNAME(mux_2plls_hdmiphy_p) = { "cpll", "gpll", -+ "dummy_hdmiphy" }; -+PNAME(mux_4plls_p) = { "cpll", "gpll", -+ "dummy_hdmiphy", -+ "usb480m" }; -+PNAME(mux_2plls_u480m_p) = { "cpll", "gpll", -+ "usb480m" }; -+PNAME(mux_2plls_24m_u480m_p) = { "cpll", "gpll", -+ "xin24m", "usb480m" }; -+ -+PNAME(mux_ddrphy_p) = { "dpll", "apll", "cpll" }; -+PNAME(mux_armclk_p) = { "apll_core", -+ "gpll_core", -+ "dpll_core", -+ "npll_core"}; -+PNAME(mux_hdmiphy_p) = { "hdmi_phy", "xin24m" }; -+PNAME(mux_usb480m_p) = { "usb480m_phy", -+ "xin24m" }; -+ -+PNAME(mux_i2s0_p) = { "clk_i2s0_div", -+ "clk_i2s0_frac", -+ "xin12m", -+ "xin12m" }; -+PNAME(mux_i2s1_p) = { "clk_i2s1_div", -+ "clk_i2s1_frac", -+ "clkin_i2s1", -+ "xin12m" }; -+PNAME(mux_i2s2_p) = { "clk_i2s2_div", -+ "clk_i2s2_frac", -+ "clkin_i2s2", -+ "xin12m" }; -+PNAME(mux_i2s1out_p) = { "clk_i2s1", "xin12m"}; -+PNAME(mux_i2s2out_p) = { "clk_i2s2", "xin12m" }; -+PNAME(mux_spdif_p) = { "clk_spdif_div", -+ "clk_spdif_frac", -+ "xin12m", -+ "xin12m" }; -+PNAME(mux_uart0_p) = { "clk_uart0_div", -+ "clk_uart0_frac", -+ "xin24m" }; -+PNAME(mux_uart1_p) = { "clk_uart1_div", -+ "clk_uart1_frac", -+ "xin24m" }; -+PNAME(mux_uart2_p) = { "clk_uart2_div", -+ "clk_uart2_frac", -+ "xin24m" }; -+ -+PNAME(mux_sclk_cif_p) = { "clk_cif_src", -+ "xin24m" }; -+PNAME(mux_dclk_lcdc_p) = { "hdmiphy", -+ "dclk_lcdc_src" }; -+PNAME(mux_aclk_peri_pre_p) = { "cpll_peri", -+ "gpll_peri", -+ "hdmiphy_peri" }; -+PNAME(mux_ref_usb3otg_src_p) = { "xin24m", -+ "clk_usb3otg_ref" }; -+PNAME(mux_xin24m_32k_p) = { "xin24m", -+ "clk_rtc32k" }; -+PNAME(mux_mac2io_src_p) = { "clk_mac2io_src", -+ "gmac_clkin" }; -+PNAME(mux_mac2phy_src_p) = { "clk_mac2phy_src", -+ "phy_50m_out" }; -+PNAME(mux_mac2io_ext_p) = { "clk_mac2io", -+ "gmac_clkin" }; ++PNAME(mux_gpll_cpll_apll_p) = { "gpll", "cpll", "apll" }; ++PNAME(mux_npu_p) = { "clk_npu_div", "clk_npu_np5" }; ++PNAME(mux_ddr_p) = { "dpll_ddr", "gpll_ddr" }; ++PNAME(mux_cpll_gpll_npll_p) = { "cpll", "gpll", "npll" }; ++PNAME(mux_gpll_cpll_npll_p) = { "gpll", "cpll", "npll" }; ++PNAME(mux_dclk_vopraw_p) = { "dclk_vopraw_src", "dclk_vopraw_frac", "xin24m" }; ++PNAME(mux_dclk_voplite_p) = { "dclk_voplite_src", "dclk_voplite_frac", "xin24m" }; ++PNAME(mux_24m_npll_gpll_usb480m_p) = { "xin24m", "npll", "gpll", "usb480m" }; ++PNAME(mux_usb3_otg0_suspend_p) = { "xin32k", "xin24m" }; ++PNAME(mux_pcie_aux_p) = { "xin24m", "clk_pcie_src" }; ++PNAME(mux_gpll_cpll_npll_24m_p) = { "gpll", "cpll", "npll", "xin24m" }; ++PNAME(mux_sdio_p) = { "clk_sdio_div", "clk_sdio_div50" }; ++PNAME(mux_sdmmc_p) = { "clk_sdmmc_div", "clk_sdmmc_div50" }; ++PNAME(mux_emmc_p) = { "clk_emmc_div", "clk_emmc_div50" }; ++PNAME(mux_cpll_npll_ppll_p) = { "cpll", "npll", "ppll" }; ++PNAME(mux_gmac_p) = { "clk_gmac_src", "gmac_clkin" }; ++PNAME(mux_gmac_rgmii_speed_p) = { "clk_gmac_tx_src", "clk_gmac_tx_src", "clk_gmac_tx_div50", "clk_gmac_tx_div5" }; ++PNAME(mux_gmac_rmii_speed_p) = { "clk_gmac_rx_div20", "clk_gmac_rx_div2" }; ++PNAME(mux_gmac_rx_tx_p) = { "clk_gmac_rgmii_speed", "clk_gmac_rmii_speed" }; ++PNAME(mux_gpll_usb480m_cpll_npll_p) = { "gpll", "usb480m", "cpll", "npll" }; ++PNAME(mux_uart1_p) = { "clk_uart1_src", "clk_uart1_np5", "clk_uart1_frac", "xin24m" }; ++PNAME(mux_uart2_p) = { "clk_uart2_src", "clk_uart2_np5", "clk_uart2_frac", "xin24m" }; ++PNAME(mux_uart3_p) = { "clk_uart3_src", "clk_uart3_np5", "clk_uart3_frac", "xin24m" }; ++PNAME(mux_uart4_p) = { "clk_uart4_src", "clk_uart4_np5", "clk_uart4_frac", "xin24m" }; ++PNAME(mux_uart5_p) = { "clk_uart5_src", "clk_uart5_np5", "clk_uart5_frac", "xin24m" }; ++PNAME(mux_uart6_p) = { "clk_uart6_src", "clk_uart6_np5", "clk_uart6_frac", "xin24m" }; ++PNAME(mux_uart7_p) = { "clk_uart7_src", "clk_uart7_np5", "clk_uart7_frac", "xin24m" }; ++PNAME(mux_gpll_xin24m_p) = { "gpll", "xin24m" }; ++PNAME(mux_gpll_cpll_xin24m_p) = { "gpll", "cpll", "xin24m" }; ++PNAME(mux_gpll_xin24m_cpll_npll_p) = { "gpll", "xin24m", "cpll", "npll" }; ++PNAME(mux_pdm_p) = { "clk_pdm_src", "clk_pdm_frac" }; ++PNAME(mux_i2s0_8ch_tx_p) = { "clk_i2s0_8ch_tx_src", "clk_i2s0_8ch_tx_frac", "mclk_i2s0_8ch_in", "xin12m" }; ++PNAME(mux_i2s0_8ch_tx_rx_p) = { "clk_i2s0_8ch_tx_mux", "clk_i2s0_8ch_rx_mux"}; ++PNAME(mux_i2s0_8ch_tx_out_p) = { "clk_i2s0_8ch_tx", "xin12m", "clk_i2s0_8ch_rx" }; ++PNAME(mux_i2s0_8ch_rx_p) = { "clk_i2s0_8ch_rx_src", "clk_i2s0_8ch_rx_frac", "mclk_i2s0_8ch_in", "xin12m" }; ++PNAME(mux_i2s0_8ch_rx_tx_p) = { "clk_i2s0_8ch_rx_mux", "clk_i2s0_8ch_tx_mux"}; ++PNAME(mux_i2s0_8ch_rx_out_p) = { "clk_i2s0_8ch_rx", "xin12m", "clk_i2s0_8ch_tx" }; ++PNAME(mux_i2s1_2ch_p) = { "clk_i2s1_2ch_src", "clk_i2s1_2ch_frac", "mclk_i2s1_2ch_in", "xin12m" }; ++PNAME(mux_i2s1_2ch_out_p) = { "clk_i2s1_2ch", "xin12m" }; ++PNAME(mux_rtc32k_pmu_p) = { "xin32k", "pmu_pvtm_32k", "clk_rtc32k_frac" }; ++PNAME(mux_wifi_pmu_p) = { "xin24m", "clk_wifi_pmu_src" }; ++PNAME(mux_gpll_usb480m_cpll_ppll_p) = { "gpll", "usb480m", "cpll", "ppll" }; ++PNAME(mux_uart0_pmu_p) = { "clk_uart0_pmu_src", "clk_uart0_np5", "clk_uart0_frac", "xin24m" }; ++PNAME(mux_usbphy_ref_p) = { "xin24m", "clk_ref24m_pmu" }; ++PNAME(mux_mipidsiphy_ref_p) = { "xin24m", "clk_ref24m_pmu" }; ++PNAME(mux_pciephy_ref_p) = { "xin24m", "clk_pciephy_src" }; ++PNAME(mux_ppll_xin24m_p) = { "ppll", "xin24m" }; ++PNAME(mux_xin24m_32k_p) = { "xin24m", "xin32k" }; ++PNAME(mux_clk_32k_ioe_p) = { "clk_rtc32k_pmu", "xin32k" }; + -+static struct rockchip_pll_clock rk3328_pll_clks[] __initdata = { -+ [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, -+ 0, RK3328_PLL_CON(0), -+ RK3328_MODE_CON, 0, 4, 0, rk3328_pll_frac_rates), -+ [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p, -+ 0, RK3328_PLL_CON(8), -+ RK3328_MODE_CON, 4, 3, 0, NULL), -+ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, -+ 0, RK3328_PLL_CON(16), -+ RK3328_MODE_CON, 8, 2, 0, rk3328_pll_rates), -+ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, -+ 0, RK3328_PLL_CON(24), -+ RK3328_MODE_CON, 12, 1, 0, rk3328_pll_frac_rates), -+ [npll] = PLL(pll_rk3328, PLL_NPLL, "npll", mux_pll_p, -+ 0, RK3328_PLL_CON(40), -+ RK3328_MODE_CON, 1, 0, 0, rk3328_pll_rates), ++static struct rockchip_pll_clock rk1808_pll_clks[] __initdata = { ++ [apll] = PLL(pll_rk3036, PLL_APLL, "apll", mux_pll_p, ++ 0, RK1808_PLL_CON(0), ++ RK1808_MODE_CON, 0, 0, 0, rk1808_pll_rates), ++ [dpll] = PLL(pll_rk3036, PLL_DPLL, "dpll", mux_pll_p, ++ 0, RK1808_PLL_CON(8), ++ RK1808_MODE_CON, 2, 1, 0, NULL), ++ [cpll] = PLL(pll_rk3036, PLL_CPLL, "cpll", mux_pll_p, ++ 0, RK1808_PLL_CON(16), ++ RK1808_MODE_CON, 4, 2, 0, rk1808_pll_rates), ++ [gpll] = PLL(pll_rk3036, PLL_GPLL, "gpll", mux_pll_p, ++ 0, RK1808_PLL_CON(24), ++ RK1808_MODE_CON, 6, 3, 0, rk1808_pll_rates), ++ [npll] = PLL(pll_rk3036, PLL_NPLL, "npll", mux_pll_p, ++ 0, RK1808_PLL_CON(32), ++ RK1808_MODE_CON, 8, 5, 0, rk1808_pll_rates), ++ [ppll] = PLL(pll_rk3036, PLL_PPLL, "ppll", mux_pll_p, ++ 0, RK1808_PMU_PLL_CON(0), ++ RK1808_PMU_MODE_CON, 0, 4, 0, rk1808_pll_rates), +}; + +#define MFLAGS CLK_MUX_HIWORD_MASK +#define DFLAGS CLK_DIVIDER_HIWORD_MASK +#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) + -+static struct rockchip_clk_branch rk3328_i2s0_fracmux __initdata = -+ MUX(0, "i2s0_pre", mux_i2s0_p, CLK_SET_RATE_PARENT, -+ RK3328_CLKSEL_CON(6), 8, 2, MFLAGS); ++static struct rockchip_clk_branch rk1808_uart1_fracmux __initdata = ++ MUX(0, "clk_uart1_mux", mux_uart1_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(39), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3328_i2s1_fracmux __initdata = -+ MUX(0, "i2s1_pre", mux_i2s1_p, CLK_SET_RATE_PARENT, -+ RK3328_CLKSEL_CON(8), 8, 2, MFLAGS); ++static struct rockchip_clk_branch rk1808_uart2_fracmux __initdata = ++ MUX(0, "clk_uart2_mux", mux_uart2_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(42), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3328_i2s2_fracmux __initdata = -+ MUX(0, "i2s2_pre", mux_i2s2_p, CLK_SET_RATE_PARENT, -+ RK3328_CLKSEL_CON(10), 8, 2, MFLAGS); ++static struct rockchip_clk_branch rk1808_uart3_fracmux __initdata = ++ MUX(0, "clk_uart3_mux", mux_uart3_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(45), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3328_spdif_fracmux __initdata = -+ MUX(SCLK_SPDIF, "sclk_spdif", mux_spdif_p, CLK_SET_RATE_PARENT, -+ RK3328_CLKSEL_CON(12), 8, 2, MFLAGS); ++static struct rockchip_clk_branch rk1808_uart4_fracmux __initdata = ++ MUX(0, "clk_uart4_mux", mux_uart4_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(48), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3328_uart0_fracmux __initdata = -+ MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT, -+ RK3328_CLKSEL_CON(14), 8, 2, MFLAGS); ++static struct rockchip_clk_branch rk1808_uart5_fracmux __initdata = ++ MUX(0, "clk_uart5_mux", mux_uart5_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(51), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3328_uart1_fracmux __initdata = -+ MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT, -+ RK3328_CLKSEL_CON(16), 8, 2, MFLAGS); ++static struct rockchip_clk_branch rk1808_uart6_fracmux __initdata = ++ MUX(0, "clk_uart6_mux", mux_uart6_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(54), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3328_uart2_fracmux __initdata = -+ MUX(SCLK_UART2, "sclk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT, -+ RK3328_CLKSEL_CON(18), 8, 2, MFLAGS); ++static struct rockchip_clk_branch rk1808_uart7_fracmux __initdata = ++ MUX(0, "clk_uart7_mux", mux_uart7_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(57), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = { ++static struct rockchip_clk_branch rk1808_dclk_vopraw_fracmux __initdata = ++ MUX(0, "dclk_vopraw_mux", mux_dclk_vopraw_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(5), 14, 2, MFLAGS); ++ ++static struct rockchip_clk_branch rk1808_dclk_voplite_fracmux __initdata = ++ MUX(0, "dclk_voplite_mux", mux_dclk_voplite_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(7), 14, 2, MFLAGS); ++ ++static struct rockchip_clk_branch rk1808_pdm_fracmux __initdata = ++ MUX(0, "clk_pdm_mux", mux_pdm_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(30), 15, 1, MFLAGS); ++ ++static struct rockchip_clk_branch rk1808_i2s0_8ch_tx_fracmux __initdata = ++ MUX(SCLK_I2S0_8CH_TX_MUX, "clk_i2s0_8ch_tx_mux", mux_i2s0_8ch_tx_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(32), 10, 2, MFLAGS); ++ ++static struct rockchip_clk_branch rk1808_i2s0_8ch_rx_fracmux __initdata = ++ MUX(SCLK_I2S0_8CH_RX_MUX, "clk_i2s0_8ch_rx_mux", mux_i2s0_8ch_rx_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(34), 10, 2, MFLAGS); ++ ++static struct rockchip_clk_branch rk1808_i2s1_2ch_fracmux __initdata = ++ MUX(0, "clk_i2s1_2ch_mux", mux_i2s1_2ch_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(36), 10, 2, MFLAGS); ++ ++static struct rockchip_clk_branch rk1808_rtc32k_pmu_fracmux __initdata = ++ MUX(SCLK_RTC32K_PMU, "clk_rtc32k_pmu", mux_rtc32k_pmu_p, CLK_SET_RATE_PARENT, ++ RK1808_PMU_CLKSEL_CON(0), 14, 2, MFLAGS); ++ ++static struct rockchip_clk_branch rk1808_uart0_pmu_fracmux __initdata = ++ MUX(0, "clk_uart0_pmu_mux", mux_uart0_pmu_p, CLK_SET_RATE_PARENT, ++ RK1808_PMU_CLKSEL_CON(4), 14, 2, MFLAGS); ++ ++static struct rockchip_clk_branch rk1808_clk_branches[] __initdata = { + /* + * Clock-Architecture Diagram 1 + */ + -+ DIV(0, "clk_24m", "xin24m", CLK_IGNORE_UNUSED, -+ RK3328_CLKSEL_CON(2), 8, 5, DFLAGS), -+ COMPOSITE(SCLK_RTC32K, "clk_rtc32k", mux_2plls_xin24m_p, 0, -+ RK3328_CLKSEL_CON(38), 14, 2, MFLAGS, 0, 14, DFLAGS, -+ RK3328_CLKGATE_CON(0), 11, GFLAGS), -+ -+ /* PD_MISC */ -+ MUX(HDMIPHY, "hdmiphy", mux_hdmiphy_p, CLK_SET_RATE_PARENT, -+ RK3328_MISC_CON, 13, 1, MFLAGS), + MUX(USB480M, "usb480m", mux_usb480m_p, CLK_SET_RATE_PARENT, -+ RK3328_MISC_CON, 15, 1, MFLAGS), ++ RK1808_MODE_CON, 10, 2, MFLAGS), ++ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), + + /* + * Clock-Architecture Diagram 2 + */ + -+ /* PD_CORE */ + GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED, -+ RK3328_CLKGATE_CON(0), 0, GFLAGS), ++ RK1808_CLKGATE_CON(0), 0, GFLAGS), ++ GATE(0, "cpll_core", "cpll", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(0), 0, GFLAGS), + GATE(0, "gpll_core", "gpll", CLK_IGNORE_UNUSED, -+ RK3328_CLKGATE_CON(0), 2, GFLAGS), -+ GATE(0, "dpll_core", "dpll", CLK_IGNORE_UNUSED, -+ RK3328_CLKGATE_CON(0), 1, GFLAGS), -+ GATE(0, "npll_core", "npll", CLK_IGNORE_UNUSED, -+ RK3328_CLKGATE_CON(0), 12, GFLAGS), -+ COMPOSITE_NOMUX(0, "pclk_dbg", "armclk", CLK_IS_CRITICAL, -+ RK3328_CLKSEL_CON(1), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3328_CLKGATE_CON(7), 0, GFLAGS), -+ COMPOSITE_NOMUX(0, "aclk_core", "armclk", CLK_IS_CRITICAL, -+ RK3328_CLKSEL_CON(1), 4, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3328_CLKGATE_CON(7), 1, GFLAGS), -+ GATE(0, "aclk_core_niu", "aclk_core", CLK_IS_CRITICAL, -+ RK3328_CLKGATE_CON(13), 0, GFLAGS), -+ GATE(0, "aclk_gic400", "aclk_core", CLK_IS_CRITICAL, -+ RK3328_CLKGATE_CON(13), 1, GFLAGS), -+ -+ GATE(0, "clk_jtag", "jtag_clkin", CLK_IS_CRITICAL, -+ RK3328_CLKGATE_CON(7), 2, GFLAGS), -+ -+ /* PD_GPU */ -+ COMPOSITE(0, "aclk_gpu_pre", mux_4plls_p, 0, -+ RK3328_CLKSEL_CON(44), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3328_CLKGATE_CON(6), 6, GFLAGS), -+ GATE(ACLK_GPU, "aclk_gpu", "aclk_gpu_pre", CLK_SET_RATE_PARENT, -+ RK3328_CLKGATE_CON(14), 0, GFLAGS), -+ GATE(0, "aclk_gpu_niu", "aclk_gpu_pre", CLK_IS_CRITICAL, -+ RK3328_CLKGATE_CON(14), 1, GFLAGS), ++ RK1808_CLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE_NOMUX(0, "pclk_core_dbg", "armclk", CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(0), 8, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK1808_CLKGATE_CON(0), 3, GFLAGS), ++ COMPOSITE_NOMUX(0, "aclk_core", "armclk", CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(0), 12, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK1808_CLKGATE_CON(0), 2, GFLAGS), + -+ /* PD_DDR */ -+ COMPOSITE_DDRCLK(SCLK_DDRCLK, "sclk_ddrc", mux_ddrphy_p, 0, -+ RK3328_CLKSEL_CON(3), 8, 2, 0, 3, -+ ROCKCHIP_DDRCLK_SIP_V2), ++ GATE(0, "clk_jtag", "jtag_clkin", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(0), 4, GFLAGS), + -+ GATE(0, "clk_ddrmsch", "sclk_ddrc", CLK_IGNORE_UNUSED, -+ RK3328_CLKGATE_CON(18), 6, GFLAGS), -+ GATE(0, "clk_ddrupctl", "sclk_ddrc", CLK_IGNORE_UNUSED, -+ RK3328_CLKGATE_CON(18), 5, GFLAGS), -+ GATE(0, "aclk_ddrupctl", "sclk_ddrc", CLK_IGNORE_UNUSED, -+ RK3328_CLKGATE_CON(18), 4, GFLAGS), -+ GATE(0, "clk_ddrmon", "xin24m", CLK_IGNORE_UNUSED, -+ RK3328_CLKGATE_CON(0), 6, GFLAGS), ++ GATE(SCLK_PVTM_CORE, "clk_pvtm_core", "xin24m", 0, ++ RK1808_CLKGATE_CON(0), 5, GFLAGS), + -+ COMPOSITE(PCLK_DDR, "pclk_ddr", mux_2plls_hdmiphy_p, CLK_IS_CRITICAL, -+ RK3328_CLKSEL_CON(4), 13, 2, MFLAGS, 8, 3, DFLAGS, -+ RK3328_CLKGATE_CON(7), 4, GFLAGS), -+ GATE(0, "pclk_ddrupctl", "pclk_ddr", CLK_IS_CRITICAL, -+ RK3328_CLKGATE_CON(18), 1, GFLAGS), -+ GATE(0, "pclk_ddr_msch", "pclk_ddr", CLK_IS_CRITICAL, -+ RK3328_CLKGATE_CON(18), 2, GFLAGS), -+ GATE(0, "pclk_ddr_mon", "pclk_ddr", CLK_IS_CRITICAL, -+ RK3328_CLKGATE_CON(18), 3, GFLAGS), -+ GATE(0, "pclk_ddrstdby", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3328_CLKGATE_CON(18), 7, GFLAGS), -+ GATE(0, "pclk_ddr_grf", "pclk_ddr", CLK_IS_CRITICAL, -+ RK3328_CLKGATE_CON(18), 9, GFLAGS), ++ COMPOSITE_NOMUX(MSCLK_CORE_NIU, "msclk_core_niu", "gpll", CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(18), 0, 5, DFLAGS, ++ RK1808_CLKGATE_CON(0), 1, GFLAGS), + + /* + * Clock-Architecture Diagram 3 + */ + -+ /* PD_BUS */ -+ COMPOSITE(ACLK_BUS_PRE, "aclk_bus_pre", mux_2plls_hdmiphy_p, CLK_IS_CRITICAL, -+ RK3328_CLKSEL_CON(0), 13, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3328_CLKGATE_CON(8), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_BUS_PRE, "hclk_bus_pre", "aclk_bus_pre", CLK_IS_CRITICAL, -+ RK3328_CLKSEL_CON(1), 8, 2, DFLAGS, -+ RK3328_CLKGATE_CON(8), 1, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_BUS_PRE, "pclk_bus_pre", "aclk_bus_pre", CLK_IS_CRITICAL, -+ RK3328_CLKSEL_CON(1), 12, 3, DFLAGS, -+ RK3328_CLKGATE_CON(8), 2, GFLAGS), -+ GATE(0, "pclk_bus", "pclk_bus_pre", CLK_IS_CRITICAL, -+ RK3328_CLKGATE_CON(8), 3, GFLAGS), -+ GATE(0, "pclk_phy_pre", "pclk_bus_pre", CLK_IS_CRITICAL, -+ RK3328_CLKGATE_CON(8), 4, GFLAGS), ++ COMPOSITE(ACLK_GIC_PRE, "aclk_gic_pre", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(15), 11, 1, MFLAGS, 12, 4, DFLAGS, ++ RK1808_CLKGATE_CON(1), 0, GFLAGS), ++ GATE(0, "aclk_gic_niu", "aclk_gic_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(1), 1, GFLAGS), ++ GATE(ACLK_GIC, "aclk_gic", "aclk_gic_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(1), 2, GFLAGS), ++ GATE(0, "aclk_core2gic", "aclk_gic_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(1), 3, GFLAGS), ++ GATE(0, "aclk_gic2core", "aclk_gic_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(1), 4, GFLAGS), ++ GATE(0, "aclk_spinlock", "aclk_gic_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(1), 4, GFLAGS), + -+ COMPOSITE(SCLK_TSP, "clk_tsp", mux_2plls_p, 0, -+ RK3328_CLKSEL_CON(21), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK3328_CLKGATE_CON(2), 5, GFLAGS), -+ GATE(0, "clk_hsadc_tsp", "ext_gpio3a2", 0, -+ RK3328_CLKGATE_CON(17), 13, GFLAGS), ++ COMPOSITE(0, "aclk_vpu_pre", mux_gpll_cpll_p, 0, ++ RK1808_CLKSEL_CON(16), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK1808_CLKGATE_CON(8), 8, GFLAGS), ++ COMPOSITE_NOMUX(0, "hclk_vpu_pre", "aclk_vpu_pre", 0, ++ RK1808_CLKSEL_CON(16), 8, 4, DFLAGS, ++ RK1808_CLKGATE_CON(8), 9, GFLAGS), ++ GATE(ACLK_VPU, "aclk_vpu", "aclk_vpu_pre", 0, ++ RK1808_CLKGATE_CON(8), 12, GFLAGS), ++ GATE(0, "aclk_vpu_niu", "aclk_vpu_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(8), 10, GFLAGS), ++ GATE(HCLK_VPU, "hclk_vpu", "hclk_vpu_pre", 0, ++ RK1808_CLKGATE_CON(8), 13, GFLAGS), ++ GATE(0, "hclk_vpu_niu", "hclk_vpu_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(8), 11, GFLAGS), + -+ /* PD_I2S */ -+ COMPOSITE(0, "clk_i2s0_div", mux_2plls_p, 0, -+ RK3328_CLKSEL_CON(6), 15, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3328_CLKGATE_CON(1), 1, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s0_frac", "clk_i2s0_div", CLK_SET_RATE_PARENT, -+ RK3328_CLKSEL_CON(7), 0, -+ RK3328_CLKGATE_CON(1), 2, GFLAGS, -+ &rk3328_i2s0_fracmux), -+ GATE(SCLK_I2S0, "clk_i2s0", "i2s0_pre", CLK_SET_RATE_PARENT, -+ RK3328_CLKGATE_CON(1), 3, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 4 ++ */ ++ COMPOSITE_NOGATE(0, "clk_npu_div", mux_gpll_cpll_p, CLK_OPS_PARENT_ENABLE, ++ RK1808_CLKSEL_CON(1), 8, 2, MFLAGS, 0, 4, DFLAGS), ++ COMPOSITE_NOGATE_HALFDIV(0, "clk_npu_np5", mux_gpll_cpll_p, CLK_OPS_PARENT_ENABLE, ++ RK1808_CLKSEL_CON(1), 10, 2, MFLAGS, 4, 4, DFLAGS), ++ MUX(0, "clk_npu_pre", mux_npu_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(1), 15, 1, MFLAGS), ++ FACTOR(0, "clk_npu_scan", "clk_npu_pre", 0, 1, 2), ++ GATE(SCLK_NPU, "clk_npu", "clk_npu_pre", 0, ++ RK1808_CLKGATE_CON(1), 10, GFLAGS), + -+ COMPOSITE(0, "clk_i2s1_div", mux_2plls_p, 0, -+ RK3328_CLKSEL_CON(8), 15, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3328_CLKGATE_CON(1), 4, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s1_frac", "clk_i2s1_div", CLK_SET_RATE_PARENT, -+ RK3328_CLKSEL_CON(9), 0, -+ RK3328_CLKGATE_CON(1), 5, GFLAGS, -+ &rk3328_i2s1_fracmux), -+ GATE(SCLK_I2S1, "clk_i2s1", "i2s1_pre", CLK_SET_RATE_PARENT, -+ RK3328_CLKGATE_CON(1), 6, GFLAGS), -+ COMPOSITE_NODIV(SCLK_I2S1_OUT, "i2s1_out", mux_i2s1out_p, 0, -+ RK3328_CLKSEL_CON(8), 12, 1, MFLAGS, -+ RK3328_CLKGATE_CON(1), 7, GFLAGS), ++ COMPOSITE(0, "aclk_npu_pre", mux_gpll_cpll_p, 0, ++ RK1808_CLKSEL_CON(2), 14, 1, MFLAGS, 0, 4, DFLAGS, ++ RK1808_CLKGATE_CON(1), 8, GFLAGS), ++ COMPOSITE(0, "hclk_npu_pre", mux_gpll_cpll_p, 0, ++ RK1808_CLKSEL_CON(2), 15, 1, MFLAGS, 8, 4, DFLAGS, ++ RK1808_CLKGATE_CON(1), 9, GFLAGS), ++ GATE(ACLK_NPU, "aclk_npu", "aclk_npu_pre", 0, ++ RK1808_CLKGATE_CON(1), 11, GFLAGS), ++ GATE(0, "aclk_npu_niu", "aclk_npu_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(1), 13, GFLAGS), ++ COMPOSITE_NOMUX(0, "aclk_npu2mem", "aclk_npu_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(2), 4, 4, DFLAGS, ++ RK1808_CLKGATE_CON(1), 15, GFLAGS), ++ GATE(HCLK_NPU, "hclk_npu", "hclk_npu_pre", 0, ++ RK1808_CLKGATE_CON(1), 12, GFLAGS), ++ GATE(0, "hclk_npu_niu", "hclk_npu_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(1), 14, GFLAGS), + -+ COMPOSITE(0, "clk_i2s2_div", mux_2plls_p, 0, -+ RK3328_CLKSEL_CON(10), 15, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3328_CLKGATE_CON(1), 8, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s2_frac", "clk_i2s2_div", CLK_SET_RATE_PARENT, -+ RK3328_CLKSEL_CON(11), 0, -+ RK3328_CLKGATE_CON(1), 9, GFLAGS, -+ &rk3328_i2s2_fracmux), -+ GATE(SCLK_I2S2, "clk_i2s2", "i2s2_pre", CLK_SET_RATE_PARENT, -+ RK3328_CLKGATE_CON(1), 10, GFLAGS), -+ COMPOSITE_NODIV(SCLK_I2S2_OUT, "i2s2_out", mux_i2s2out_p, 0, -+ RK3328_CLKSEL_CON(10), 12, 1, MFLAGS, -+ RK3328_CLKGATE_CON(1), 11, GFLAGS), ++ GATE(SCLK_PVTM_NPU, "clk_pvtm_npu", "xin24m", 0, ++ RK1808_CLKGATE_CON(0), 15, GFLAGS), + -+ COMPOSITE(0, "clk_spdif_div", mux_2plls_p, 0, -+ RK3328_CLKSEL_CON(12), 15, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3328_CLKGATE_CON(1), 12, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_spdif_frac", "clk_spdif_div", CLK_SET_RATE_PARENT, -+ RK3328_CLKSEL_CON(13), 0, -+ RK3328_CLKGATE_CON(1), 13, GFLAGS, -+ &rk3328_spdif_fracmux), ++ COMPOSITE(ACLK_IMEM_PRE, "aclk_imem_pre", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(17), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK1808_CLKGATE_CON(7), 0, GFLAGS), ++ GATE(ACLK_IMEM0, "aclk_imem0", "aclk_imem_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(7), 6, GFLAGS), ++ GATE(0, "aclk_imem0_niu", "aclk_imem_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(7), 10, GFLAGS), ++ GATE(ACLK_IMEM1, "aclk_imem1", "aclk_imem_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(7), 7, GFLAGS), ++ GATE(0, "aclk_imem1_niu", "aclk_imem_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(7), 11, GFLAGS), ++ GATE(ACLK_IMEM2, "aclk_imem2", "aclk_imem_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(7), 8, GFLAGS), ++ GATE(0, "aclk_imem2_niu", "aclk_imem_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(7), 12, GFLAGS), ++ GATE(ACLK_IMEM3, "aclk_imem3", "aclk_imem_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(7), 9, GFLAGS), ++ GATE(0, "aclk_imem3_niu", "aclk_imem_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(7), 13, GFLAGS), + -+ /* PD_UART */ -+ COMPOSITE(0, "clk_uart0_div", mux_2plls_u480m_p, 0, -+ RK3328_CLKSEL_CON(14), 12, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3328_CLKGATE_CON(1), 14, GFLAGS), -+ COMPOSITE(0, "clk_uart1_div", mux_2plls_u480m_p, 0, -+ RK3328_CLKSEL_CON(16), 12, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3328_CLKGATE_CON(2), 0, GFLAGS), -+ COMPOSITE(0, "clk_uart2_div", mux_2plls_u480m_p, 0, -+ RK3328_CLKSEL_CON(18), 12, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3328_CLKGATE_CON(2), 2, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart0_frac", "clk_uart0_div", CLK_SET_RATE_PARENT, -+ RK3328_CLKSEL_CON(15), 0, -+ RK3328_CLKGATE_CON(1), 15, GFLAGS, -+ &rk3328_uart0_fracmux), -+ COMPOSITE_FRACMUX(0, "clk_uart1_frac", "clk_uart1_div", CLK_SET_RATE_PARENT, -+ RK3328_CLKSEL_CON(17), 0, -+ RK3328_CLKGATE_CON(2), 1, GFLAGS, -+ &rk3328_uart1_fracmux), -+ COMPOSITE_FRACMUX(0, "clk_uart2_frac", "clk_uart2_div", CLK_SET_RATE_PARENT, -+ RK3328_CLKSEL_CON(19), 0, -+ RK3328_CLKGATE_CON(2), 3, GFLAGS, -+ &rk3328_uart2_fracmux), ++ COMPOSITE(HSCLK_IMEM, "hsclk_imem", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(17), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(7), 5, GFLAGS), + + /* -+ * Clock-Architecture Diagram 4 ++ * Clock-Architecture Diagram 5 + */ ++ GATE(0, "clk_ddr_mon_timer", "xin24m", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 0, GFLAGS), + -+ COMPOSITE(SCLK_I2C0, "clk_i2c0", mux_2plls_p, 0, -+ RK3328_CLKSEL_CON(34), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3328_CLKGATE_CON(2), 9, GFLAGS), -+ COMPOSITE(SCLK_I2C1, "clk_i2c1", mux_2plls_p, 0, -+ RK3328_CLKSEL_CON(34), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK3328_CLKGATE_CON(2), 10, GFLAGS), -+ COMPOSITE(SCLK_I2C2, "clk_i2c2", mux_2plls_p, 0, -+ RK3328_CLKSEL_CON(35), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3328_CLKGATE_CON(2), 11, GFLAGS), -+ COMPOSITE(SCLK_I2C3, "clk_i2c3", mux_2plls_p, 0, -+ RK3328_CLKSEL_CON(35), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK3328_CLKGATE_CON(2), 12, GFLAGS), -+ COMPOSITE(SCLK_CRYPTO, "clk_crypto", mux_2plls_p, 0, -+ RK3328_CLKSEL_CON(20), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3328_CLKGATE_CON(2), 4, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_TSADC, "clk_tsadc", "clk_24m", 0, -+ RK3328_CLKSEL_CON(22), 0, 10, DFLAGS, -+ RK3328_CLKGATE_CON(2), 6, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_SARADC, "clk_saradc", "clk_24m", 0, -+ RK3328_CLKSEL_CON(23), 0, 10, DFLAGS, -+ RK3328_CLKGATE_CON(2), 14, GFLAGS), -+ COMPOSITE(SCLK_SPI, "clk_spi", mux_2plls_p, 0, -+ RK3328_CLKSEL_CON(24), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3328_CLKGATE_CON(2), 7, GFLAGS), -+ COMPOSITE(SCLK_PWM, "clk_pwm", mux_2plls_p, 0, -+ RK3328_CLKSEL_CON(24), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK3328_CLKGATE_CON(2), 8, GFLAGS), -+ COMPOSITE(SCLK_OTP, "clk_otp", mux_2plls_xin24m_p, 0, -+ RK3328_CLKSEL_CON(4), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3328_CLKGATE_CON(3), 8, GFLAGS), -+ COMPOSITE(SCLK_EFUSE, "clk_efuse", mux_2plls_xin24m_p, 0, -+ RK3328_CLKSEL_CON(5), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3328_CLKGATE_CON(2), 13, GFLAGS), -+ COMPOSITE(SCLK_PDM, "clk_pdm", mux_cpll_gpll_apll_p, CLK_SET_RATE_NO_REPARENT | CLK_SET_RATE_PARENT, -+ RK3328_CLKSEL_CON(20), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3328_CLKGATE_CON(2), 15, GFLAGS), ++ GATE(0, "clk_ddr_mon", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 11, GFLAGS), ++ GATE(0, "aclk_split", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 15, GFLAGS), ++ GATE(0, "clk_ddr_msch", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 8, GFLAGS), ++ GATE(0, "clk_ddrdfi_ctl", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 3, GFLAGS), ++ GATE(0, "clk_stdby", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 13, GFLAGS), ++ GATE(0, "aclk_ddrc", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 5, GFLAGS), ++ GATE(0, "clk_core_ddrc", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 6, GFLAGS), + -+ GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0, -+ RK3328_CLKGATE_CON(8), 5, GFLAGS), -+ GATE(SCLK_TIMER1, "sclk_timer1", "xin24m", 0, -+ RK3328_CLKGATE_CON(8), 6, GFLAGS), -+ GATE(SCLK_TIMER2, "sclk_timer2", "xin24m", 0, -+ RK3328_CLKGATE_CON(8), 7, GFLAGS), -+ GATE(SCLK_TIMER3, "sclk_timer3", "xin24m", 0, -+ RK3328_CLKGATE_CON(8), 8, GFLAGS), -+ GATE(SCLK_TIMER4, "sclk_timer4", "xin24m", 0, -+ RK3328_CLKGATE_CON(8), 9, GFLAGS), -+ GATE(SCLK_TIMER5, "sclk_timer5", "xin24m", 0, -+ RK3328_CLKGATE_CON(8), 10, GFLAGS), ++ GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(8), 5, GFLAGS), ++ GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(8), 6, GFLAGS), + -+ COMPOSITE(SCLK_WIFI, "clk_wifi", mux_2plls_u480m_p, 0, -+ RK3328_CLKSEL_CON(52), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3328_CLKGATE_CON(0), 10, GFLAGS), ++ COMPOSITE_NOGATE(SCLK_DDRCLK, "sclk_ddrc", mux_ddr_p, CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(3), 7, 1, MFLAGS, 0, 5, DFLAGS), ++ FACTOR(0, "clk_ddrphy1x_out", "sclk_ddrc", CLK_IGNORE_UNUSED, 1, 1), ++ ++ COMPOSITE_NOMUX(PCLK_DDR, "pclk_ddr", "gpll", CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(3), 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(2), 1, GFLAGS), ++ GATE(PCLK_DDRMON, "pclk_ddrmon", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 10, GFLAGS), ++ GATE(PCLK_DDRC, "pclk_ddrc", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 7, GFLAGS), ++ GATE(PCLK_MSCH, "pclk_msch", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 9, GFLAGS), ++ GATE(PCLK_STDBY, "pclk_stdby", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 12, GFLAGS), ++ GATE(0, "pclk_ddr_grf", "pclk_ddr", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(2), 14, GFLAGS), ++ GATE(0, "pclk_ddrdfi_ctl", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 2, GFLAGS), + + /* -+ * Clock-Architecture Diagram 5 ++ * Clock-Architecture Diagram 6 + */ + -+ /* PD_VIDEO */ -+ COMPOSITE(ACLK_RKVDEC_PRE, "aclk_rkvdec_pre", mux_4plls_p, 0, -+ RK3328_CLKSEL_CON(48), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3328_CLKGATE_CON(6), 0, GFLAGS), -+ FACTOR_GATE(HCLK_RKVDEC_PRE, "hclk_rkvdec_pre", "aclk_rkvdec_pre", 0, 1, 4, -+ RK3328_CLKGATE_CON(11), 0, GFLAGS), -+ GATE(ACLK_RKVDEC, "aclk_rkvdec", "aclk_rkvdec_pre", CLK_SET_RATE_PARENT, -+ RK3328_CLKGATE_CON(24), 0, GFLAGS), -+ GATE(HCLK_RKVDEC, "hclk_rkvdec", "hclk_rkvdec_pre", CLK_SET_RATE_PARENT, -+ RK3328_CLKGATE_CON(24), 1, GFLAGS), -+ GATE(0, "aclk_rkvdec_niu", "aclk_rkvdec_pre", CLK_IS_CRITICAL, -+ RK3328_CLKGATE_CON(24), 2, GFLAGS), -+ GATE(0, "hclk_rkvdec_niu", "hclk_rkvdec_pre", CLK_IS_CRITICAL, -+ RK3328_CLKGATE_CON(24), 3, GFLAGS), ++ COMPOSITE(HSCLK_VIO, "hsclk_vio", mux_gpll_cpll_p, 0, ++ RK1808_CLKSEL_CON(4), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK1808_CLKGATE_CON(3), 0, GFLAGS), ++ COMPOSITE_NOMUX(LSCLK_VIO, "lsclk_vio", "hsclk_vio", 0, ++ RK1808_CLKSEL_CON(4), 8, 4, DFLAGS, ++ RK1808_CLKGATE_CON(3), 12, GFLAGS), ++ GATE(0, "hsclk_vio_niu", "hsclk_vio", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(4), 0, GFLAGS), ++ GATE(0, "lsclk_vio_niu", "lsclk_vio", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(4), 1, GFLAGS), ++ GATE(ACLK_VOPRAW, "aclk_vopraw", "hsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 2, GFLAGS), ++ GATE(HCLK_VOPRAW, "hclk_vopraw", "lsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 3, GFLAGS), ++ GATE(ACLK_VOPLITE, "aclk_voplite", "hsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 4, GFLAGS), ++ GATE(HCLK_VOPLITE, "hclk_voplite", "lsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 5, GFLAGS), ++ GATE(PCLK_DSI_TX, "pclk_dsi_tx", "lsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 6, GFLAGS), ++ GATE(PCLK_CSI_TX, "pclk_csi_tx", "lsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 7, GFLAGS), ++ GATE(ACLK_RGA, "aclk_rga", "hsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 8, GFLAGS), ++ GATE(HCLK_RGA, "hclk_rga", "lsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 9, GFLAGS), ++ GATE(ACLK_ISP, "aclk_isp", "hsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 13, GFLAGS), ++ GATE(HCLK_ISP, "hclk_isp", "lsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 14, GFLAGS), ++ GATE(ACLK_CIF, "aclk_cif", "hsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 10, GFLAGS), ++ GATE(HCLK_CIF, "hclk_cif", "lsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 11, GFLAGS), ++ GATE(PCLK_CSI2HOST, "pclk_csi2host", "lsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 12, GFLAGS), + -+ COMPOSITE(SCLK_VDEC_CABAC, "sclk_vdec_cabac", mux_4plls_p, 0, -+ RK3328_CLKSEL_CON(48), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3328_CLKGATE_CON(6), 1, GFLAGS), ++ COMPOSITE(0, "dclk_vopraw_src", mux_cpll_gpll_npll_p, 0, ++ RK1808_CLKSEL_CON(5), 10, 2, MFLAGS, 0, 8, DFLAGS, ++ RK1808_CLKGATE_CON(3), 1, GFLAGS), ++ COMPOSITE_FRACMUX(0, "dclk_vopraw_frac", "dclk_vopraw_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(6), 0, ++ RK1808_CLKGATE_CON(3), 2, GFLAGS, ++ &rk1808_dclk_vopraw_fracmux), ++ GATE(DCLK_VOPRAW, "dclk_vopraw", "dclk_vopraw_mux", 0, ++ RK1808_CLKGATE_CON(3), 3, GFLAGS), + -+ COMPOSITE(SCLK_VDEC_CORE, "sclk_vdec_core", mux_4plls_p, 0, -+ RK3328_CLKSEL_CON(49), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3328_CLKGATE_CON(6), 2, GFLAGS), ++ COMPOSITE(0, "dclk_voplite_src", mux_cpll_gpll_npll_p, 0, ++ RK1808_CLKSEL_CON(7), 10, 2, MFLAGS, 0, 8, DFLAGS, ++ RK1808_CLKGATE_CON(3), 4, GFLAGS), ++ COMPOSITE_FRACMUX(0, "dclk_voplite_frac", "dclk_voplite_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(8), 0, ++ RK1808_CLKGATE_CON(3), 5, GFLAGS, ++ &rk1808_dclk_voplite_fracmux), ++ GATE(DCLK_VOPLITE, "dclk_voplite", "dclk_voplite_mux", 0, ++ RK1808_CLKGATE_CON(3), 6, GFLAGS), + -+ COMPOSITE(ACLK_VPU_PRE, "aclk_vpu_pre", mux_4plls_p, 0, -+ RK3328_CLKSEL_CON(50), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3328_CLKGATE_CON(6), 5, GFLAGS), -+ FACTOR_GATE(HCLK_VPU_PRE, "hclk_vpu_pre", "aclk_vpu_pre", 0, 1, 4, -+ RK3328_CLKGATE_CON(11), 8, GFLAGS), -+ GATE(ACLK_VPU, "aclk_vpu", "aclk_vpu_pre", CLK_SET_RATE_PARENT, -+ RK3328_CLKGATE_CON(23), 0, GFLAGS), -+ GATE(HCLK_VPU, "hclk_vpu", "hclk_vpu_pre", CLK_SET_RATE_PARENT, -+ RK3328_CLKGATE_CON(23), 1, GFLAGS), -+ GATE(0, "aclk_vpu_niu", "aclk_vpu_pre", CLK_IS_CRITICAL, -+ RK3328_CLKGATE_CON(23), 2, GFLAGS), -+ GATE(0, "hclk_vpu_niu", "hclk_vpu_pre", CLK_IS_CRITICAL, -+ RK3328_CLKGATE_CON(23), 3, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_TXESC, "clk_txesc", "gpll", 0, ++ RK1808_CLKSEL_CON(9), 0, 12, DFLAGS, ++ RK1808_CLKGATE_CON(3), 7, GFLAGS), + -+ COMPOSITE(ACLK_RKVENC, "aclk_rkvenc", mux_4plls_p, 0, -+ RK3328_CLKSEL_CON(51), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3328_CLKGATE_CON(6), 3, GFLAGS), ++ COMPOSITE(SCLK_RGA, "clk_rga", mux_gpll_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(10), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK1808_CLKGATE_CON(3), 8, GFLAGS), + -+ COMPOSITE(SCLK_VENC_CORE, "sclk_venc_core", mux_4plls_p, 0, -+ RK3328_CLKSEL_CON(51), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3328_CLKGATE_CON(6), 4, GFLAGS), -+ FACTOR_GATE(0, "hclk_venc", "sclk_venc_core", 0, 1, 4, -+ RK3328_CLKGATE_CON(11), 4, GFLAGS), ++ COMPOSITE(SCLK_ISP, "clk_isp", mux_gpll_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(10), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(3), 10, GFLAGS), + -+ GATE(0, "aclk_rkvenc_niu", "sclk_venc_core", CLK_IS_CRITICAL, -+ RK3328_CLKGATE_CON(25), 0, GFLAGS), -+ GATE(0, "hclk_rkvenc_niu", "hclk_venc", CLK_IS_CRITICAL, -+ RK3328_CLKGATE_CON(25), 1, GFLAGS), -+ GATE(ACLK_H265, "aclk_h265", "sclk_venc_core", 0, -+ RK3328_CLKGATE_CON(25), 2, GFLAGS), -+ GATE(PCLK_H265, "pclk_h265", "hclk_venc", 0, -+ RK3328_CLKGATE_CON(25), 3, GFLAGS), -+ GATE(ACLK_H264, "aclk_h264", "sclk_venc_core", 0, -+ RK3328_CLKGATE_CON(25), 4, GFLAGS), -+ GATE(HCLK_H264, "hclk_h264", "hclk_venc", 0, -+ RK3328_CLKGATE_CON(25), 5, GFLAGS), -+ GATE(ACLK_AXISRAM, "aclk_axisram", "sclk_venc_core", CLK_IGNORE_UNUSED, -+ RK3328_CLKGATE_CON(25), 6, GFLAGS), ++ COMPOSITE(DCLK_CIF, "dclk_cif", mux_cpll_gpll_npll_p, 0, ++ RK1808_CLKSEL_CON(11), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(3), 11, GFLAGS), + -+ COMPOSITE(SCLK_VENC_DSP, "sclk_venc_dsp", mux_4plls_p, 0, -+ RK3328_CLKSEL_CON(52), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3328_CLKGATE_CON(6), 7, GFLAGS), ++ COMPOSITE(SCLK_CIF_OUT, "clk_cif_out", mux_24m_npll_gpll_usb480m_p, 0, ++ RK1808_CLKSEL_CON(11), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK1808_CLKGATE_CON(3), 9, GFLAGS), + + /* -+ * Clock-Architecture Diagram 6 ++ * Clock-Architecture Diagram 7 + */ + -+ /* PD_VIO */ -+ COMPOSITE(ACLK_VIO_PRE, "aclk_vio_pre", mux_4plls_p, 0, -+ RK3328_CLKSEL_CON(37), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3328_CLKGATE_CON(5), 2, GFLAGS), -+ DIV(HCLK_VIO_PRE, "hclk_vio_pre", "aclk_vio_pre", 0, -+ RK3328_CLKSEL_CON(37), 8, 5, DFLAGS), ++ /* PD_PCIE */ ++ COMPOSITE_NODIV(0, "clk_pcie_src", mux_gpll_cpll_p, 0, ++ RK1808_CLKSEL_CON(12), 15, 1, MFLAGS, ++ RK1808_CLKGATE_CON(5), 0, GFLAGS), ++ DIV(HSCLK_PCIE, "hsclk_pcie", "clk_pcie_src", 0, ++ RK1808_CLKSEL_CON(12), 0, 5, DFLAGS), ++ DIV(LSCLK_PCIE, "lsclk_pcie", "clk_pcie_src", 0, ++ RK1808_CLKSEL_CON(12), 8, 5, DFLAGS), ++ GATE(0, "hsclk_pcie_niu", "hsclk_pcie", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(6), 0, GFLAGS), ++ GATE(0, "lsclk_pcie_niu", "lsclk_pcie", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(6), 1, GFLAGS), ++ GATE(0, "pclk_pcie_grf", "lsclk_pcie", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(6), 5, GFLAGS), ++ GATE(ACLK_USB3OTG, "aclk_usb3otg", "hsclk_pcie", 0, ++ RK1808_CLKGATE_CON(6), 6, GFLAGS), ++ GATE(HCLK_HOST, "hclk_host", "lsclk_pcie", 0, ++ RK1808_CLKGATE_CON(6), 7, GFLAGS), ++ GATE(HCLK_HOST_ARB, "hclk_host_arb", "lsclk_pcie", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(6), 8, GFLAGS), + -+ COMPOSITE(ACLK_RGA_PRE, "aclk_rga_pre", mux_4plls_p, 0, -+ RK3328_CLKSEL_CON(36), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3328_CLKGATE_CON(5), 0, GFLAGS), -+ COMPOSITE(SCLK_RGA, "clk_rga", mux_4plls_p, 0, -+ RK3328_CLKSEL_CON(36), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3328_CLKGATE_CON(5), 1, GFLAGS), -+ COMPOSITE(ACLK_VOP_PRE, "aclk_vop_pre", mux_4plls_p, 0, -+ RK3328_CLKSEL_CON(39), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3328_CLKGATE_CON(5), 5, GFLAGS), -+ GATE(SCLK_HDMI_SFC, "sclk_hdmi_sfc", "xin24m", 0, -+ RK3328_CLKGATE_CON(5), 4, GFLAGS), ++ COMPOSITE(ACLK_PCIE, "aclk_pcie", mux_gpll_cpll_p, 0, ++ RK1808_CLKSEL_CON(15), 8, 1, MFLAGS, 0, 4, DFLAGS, ++ RK1808_CLKGATE_CON(5), 5, GFLAGS), ++ DIV(0, "pclk_pcie_pre", "aclk_pcie", 0, ++ RK1808_CLKSEL_CON(15), 4, 4, DFLAGS), ++ GATE(0, "aclk_pcie_niu", "aclk_pcie", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(6), 10, GFLAGS), ++ GATE(ACLK_PCIE_MST, "aclk_pcie_mst", "aclk_pcie", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(6), 2, GFLAGS), ++ GATE(ACLK_PCIE_SLV, "aclk_pcie_slv", "aclk_pcie", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(6), 3, GFLAGS), ++ GATE(0, "pclk_pcie_niu", "pclk_pcie_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(6), 11, GFLAGS), ++ GATE(0, "pclk_pcie_dbi", "pclk_pcie_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(6), 4, GFLAGS), ++ GATE(PCLK_PCIE, "pclk_pcie", "pclk_pcie_pre", 0, ++ RK1808_CLKGATE_CON(6), 9, GFLAGS), + -+ COMPOSITE_NODIV(0, "clk_cif_src", mux_2plls_p, 0, -+ RK3328_CLKSEL_CON(42), 7, 1, MFLAGS, -+ RK3328_CLKGATE_CON(5), 3, GFLAGS), -+ COMPOSITE_NOGATE(SCLK_CIF_OUT, "clk_cif_out", mux_sclk_cif_p, CLK_SET_RATE_PARENT, -+ RK3328_CLKSEL_CON(42), 5, 1, MFLAGS, 0, 5, DFLAGS), ++ COMPOSITE(0, "clk_pcie_aux_src", mux_cpll_gpll_npll_p, 0, ++ RK1808_CLKSEL_CON(14), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(5), 3, GFLAGS), ++ COMPOSITE_NODIV(SCLK_PCIE_AUX, "clk_pcie_aux", mux_pcie_aux_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(14), 12, 1, MFLAGS, ++ RK1808_CLKGATE_CON(5), 4, GFLAGS), + -+ COMPOSITE(DCLK_LCDC_SRC, "dclk_lcdc_src", mux_gpll_cpll_p, 0, -+ RK3328_CLKSEL_CON(40), 0, 1, MFLAGS, 8, 8, DFLAGS, -+ RK3328_CLKGATE_CON(5), 6, GFLAGS), -+ DIV(DCLK_HDMIPHY, "dclk_hdmiphy", "dclk_lcdc_src", 0, -+ RK3328_CLKSEL_CON(40), 3, 3, DFLAGS), -+ MUX(DCLK_LCDC, "dclk_lcdc", mux_dclk_lcdc_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3328_CLKSEL_CON(40), 1, 1, MFLAGS), ++ GATE(SCLK_USB3_OTG0_REF, "clk_usb3_otg0_ref", "xin24m", 0, ++ RK1808_CLKGATE_CON(5), 1, GFLAGS), ++ ++ COMPOSITE(SCLK_USB3_OTG0_SUSPEND, "clk_usb3_otg0_suspend", mux_usb3_otg0_suspend_p, 0, ++ RK1808_CLKSEL_CON(13), 12, 1, MFLAGS, 0, 10, DFLAGS, ++ RK1808_CLKGATE_CON(5), 2, GFLAGS), + + /* -+ * Clock-Architecture Diagram 7 ++ * Clock-Architecture Diagram 8 + */ + -+ /* PD_PERI */ -+ GATE(0, "gpll_peri", "gpll", CLK_IS_CRITICAL, -+ RK3328_CLKGATE_CON(4), 0, GFLAGS), -+ GATE(0, "cpll_peri", "cpll", CLK_IS_CRITICAL, -+ RK3328_CLKGATE_CON(4), 1, GFLAGS), -+ GATE(0, "hdmiphy_peri", "hdmiphy", CLK_IS_CRITICAL, -+ RK3328_CLKGATE_CON(4), 2, GFLAGS), -+ COMPOSITE_NOGATE(ACLK_PERI_PRE, "aclk_peri_pre", mux_aclk_peri_pre_p, CLK_IS_CRITICAL, -+ RK3328_CLKSEL_CON(28), 6, 2, MFLAGS, 0, 5, DFLAGS), -+ COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "aclk_peri_pre", CLK_IS_CRITICAL, -+ RK3328_CLKSEL_CON(29), 0, 2, DFLAGS, -+ RK3328_CLKGATE_CON(10), 2, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "aclk_peri_pre", CLK_IS_CRITICAL, -+ RK3328_CLKSEL_CON(29), 4, 3, DFLAGS, -+ RK3328_CLKGATE_CON(10), 1, GFLAGS), -+ GATE(ACLK_PERI, "aclk_peri", "aclk_peri_pre", CLK_IS_CRITICAL | CLK_SET_RATE_PARENT, -+ RK3328_CLKGATE_CON(10), 0, GFLAGS), ++ /* PD_PHP */ + -+ COMPOSITE(SCLK_SDMMC, "clk_sdmmc", mux_2plls_24m_u480m_p, 0, -+ RK3328_CLKSEL_CON(30), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3328_CLKGATE_CON(4), 3, GFLAGS), ++ COMPOSITE_NODIV(0, "clk_peri_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(19), 15, 1, MFLAGS, ++ RK1808_CLKGATE_CON(8), 0, GFLAGS), ++ COMPOSITE_NOMUX(MSCLK_PERI, "msclk_peri", "clk_peri_src", CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(19), 0, 5, DFLAGS, ++ RK1808_CLKGATE_CON(8), 1, GFLAGS), ++ COMPOSITE_NOMUX(LSCLK_PERI, "lsclk_peri", "clk_peri_src", CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(19), 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(8), 2, GFLAGS), ++ GATE(0, "msclk_peri_niu", "msclk_peri", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(8), 3, GFLAGS), ++ GATE(0, "lsclk_peri_niu", "lsclk_peri", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(8), 4, GFLAGS), + -+ COMPOSITE(SCLK_SDIO, "clk_sdio", mux_2plls_24m_u480m_p, 0, -+ RK3328_CLKSEL_CON(31), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3328_CLKGATE_CON(4), 4, GFLAGS), ++ /* PD_MMC */ + -+ COMPOSITE(SCLK_EMMC, "clk_emmc", mux_2plls_24m_u480m_p, 0, -+ RK3328_CLKSEL_CON(32), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3328_CLKGATE_CON(4), 5, GFLAGS), ++ GATE(0, "hclk_mmc_sfc", "msclk_peri", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(9), 0, GFLAGS), ++ GATE(0, "hclk_mmc_sfc_niu", "hclk_mmc_sfc", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(9), 11, GFLAGS), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_mmc_sfc", 0, ++ RK1808_CLKGATE_CON(9), 12, GFLAGS), ++ GATE(HCLK_SFC, "hclk_sfc", "hclk_mmc_sfc", 0, ++ RK1808_CLKGATE_CON(9), 13, GFLAGS), + -+ COMPOSITE(SCLK_SDMMC_EXT, "clk_sdmmc_ext", mux_2plls_24m_u480m_p, 0, -+ RK3328_CLKSEL_CON(43), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3328_CLKGATE_CON(4), 10, GFLAGS), ++ COMPOSITE(SCLK_SDIO_DIV, "clk_sdio_div", mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(22), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ RK1808_CLKGATE_CON(9), 1, GFLAGS), ++ COMPOSITE_DIV_OFFSET(SCLK_SDIO_DIV50, "clk_sdio_div50", ++ mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(22), 14, 2, MFLAGS, ++ RK1808_CLKSEL_CON(23), 0, 8, DFLAGS, ++ RK1808_CLKGATE_CON(9), 2, GFLAGS), ++ COMPOSITE_NODIV(SCLK_SDIO, "clk_sdio", mux_sdio_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK1808_CLKSEL_CON(23), 15, 1, MFLAGS, ++ RK1808_CLKGATE_CON(9), 3, GFLAGS), + -+ COMPOSITE(SCLK_REF_USB3OTG_SRC, "clk_ref_usb3otg_src", mux_2plls_p, 0, -+ RK3328_CLKSEL_CON(45), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3328_CLKGATE_CON(4), 9, GFLAGS), ++ MMC(SCLK_SDIO_DRV, "sdio_drv", "clk_sdio", RK1808_SDIO_CON0, 1), ++ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "clk_sdio", RK1808_SDIO_CON1, 1), + -+ MUX(SCLK_REF_USB3OTG, "clk_ref_usb3otg", mux_ref_usb3otg_src_p, CLK_SET_RATE_PARENT, -+ RK3328_CLKSEL_CON(45), 8, 1, MFLAGS), ++ COMPOSITE(SCLK_EMMC_DIV, "clk_emmc_div", ++ mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(24), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ RK1808_CLKGATE_CON(9), 4, GFLAGS), ++ COMPOSITE_DIV_OFFSET(SCLK_EMMC_DIV50, "clk_emmc_div50", mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(24), 14, 2, MFLAGS, ++ RK1808_CLKSEL_CON(25), 0, 8, DFLAGS, ++ RK1808_CLKGATE_CON(9), 5, GFLAGS), ++ COMPOSITE_NODIV(SCLK_EMMC, "clk_emmc", mux_emmc_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK1808_CLKSEL_CON(25), 15, 1, MFLAGS, ++ RK1808_CLKGATE_CON(9), 6, GFLAGS), ++ MMC(SCLK_EMMC_DRV, "emmc_drv", "clk_emmc", RK1808_EMMC_CON0, 1), ++ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "clk_emmc", RK1808_EMMC_CON1, 1), + -+ GATE(SCLK_USB3OTG_REF, "clk_usb3otg_ref", "xin24m", 0, -+ RK3328_CLKGATE_CON(4), 7, GFLAGS), ++ COMPOSITE(SCLK_SDMMC_DIV, "clk_sdmmc_div", mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(20), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ RK1808_CLKGATE_CON(9), 7, GFLAGS), ++ COMPOSITE_DIV_OFFSET(SCLK_SDMMC_DIV50, "clk_sdmmc_div50", ++ mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(20), 14, 2, MFLAGS, ++ RK1808_CLKSEL_CON(21), 0, 8, DFLAGS, ++ RK1808_CLKGATE_CON(9), 8, GFLAGS), ++ COMPOSITE_NODIV(SCLK_SDMMC, "clk_sdmmc", mux_sdmmc_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK1808_CLKSEL_CON(21), 15, 1, MFLAGS, ++ RK1808_CLKGATE_CON(9), 9, GFLAGS), ++ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "clk_sdmmc", RK1808_SDMMC_CON0, 1), ++ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "clk_sdmmc", RK1808_SDMMC_CON1, 1), + -+ COMPOSITE(SCLK_USB3OTG_SUSPEND, "clk_usb3otg_suspend", mux_xin24m_32k_p, 0, -+ RK3328_CLKSEL_CON(33), 15, 1, MFLAGS, 0, 10, DFLAGS, -+ RK3328_CLKGATE_CON(4), 8, GFLAGS), ++ COMPOSITE(SCLK_SFC, "clk_sfc", mux_gpll_cpll_p, 0, ++ RK1808_CLKSEL_CON(26), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(9), 10, GFLAGS), ++ ++ /* PD_MAC */ ++ ++ GATE(0, "pclk_sd_gmac", "lsclk_peri", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(10), 2, GFLAGS), ++ GATE(0, "aclk_sd_gmac", "msclk_peri", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(10), 0, GFLAGS), ++ GATE(0, "hclk_sd_gmac", "msclk_peri", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(10), 1, GFLAGS), ++ GATE(0, "pclk_gmac_niu", "pclk_sd_gmac", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(10), 10, GFLAGS), ++ GATE(PCLK_GMAC, "pclk_gmac", "pclk_sd_gmac", 0, ++ RK1808_CLKGATE_CON(10), 12, GFLAGS), ++ GATE(0, "aclk_gmac_niu", "aclk_sd_gmac", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(10), 8, GFLAGS), ++ GATE(ACLK_GMAC, "aclk_gmac", "aclk_sd_gmac", 0, ++ RK1808_CLKGATE_CON(10), 11, GFLAGS), ++ GATE(0, "hclk_gmac_niu", "hclk_sd_gmac", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(10), 9, GFLAGS), ++ GATE(HCLK_SDIO, "hclk_sdio", "hclk_sd_gmac", 0, ++ RK1808_CLKGATE_CON(10), 13, GFLAGS), ++ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_sd_gmac", 0, ++ RK1808_CLKGATE_CON(10), 14, GFLAGS), ++ ++ COMPOSITE(SCLK_GMAC_OUT, "clk_gmac_out", mux_cpll_npll_ppll_p, 0, ++ RK1808_CLKSEL_CON(18), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(10), 15, GFLAGS), ++ ++ COMPOSITE(SCLK_GMAC_SRC, "clk_gmac_src", mux_cpll_npll_ppll_p, 0, ++ RK1808_CLKSEL_CON(26), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(10), 3, GFLAGS), ++ MUX(SCLK_GMAC, "clk_gmac", mux_gmac_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK1808_CLKSEL_CON(27), 0, 1, MFLAGS), ++ GATE(SCLK_GMAC_REF, "clk_gmac_ref", "clk_gmac", 0, ++ RK1808_CLKGATE_CON(10), 4, GFLAGS), ++ GATE(0, "clk_gmac_tx_src", "clk_gmac", 0, ++ RK1808_CLKGATE_CON(10), 7, GFLAGS), ++ GATE(0, "clk_gmac_rx_src", "clk_gmac", 0, ++ RK1808_CLKGATE_CON(10), 6, GFLAGS), ++ GATE(SCLK_GMAC_REFOUT, "clk_gmac_refout", "clk_gmac", 0, ++ RK1808_CLKGATE_CON(10), 5, GFLAGS), ++ FACTOR(0, "clk_gmac_tx_div5", "clk_gmac_tx_src", 0, 1, 5), ++ FACTOR(0, "clk_gmac_tx_div50", "clk_gmac_tx_src", 0, 1, 50), ++ FACTOR(0, "clk_gmac_rx_div2", "clk_gmac_rx_src", 0, 1, 2), ++ FACTOR(0, "clk_gmac_rx_div20", "clk_gmac_rx_src", 0, 1, 20), ++ MUX(SCLK_GMAC_RGMII_SPEED, "clk_gmac_rgmii_speed", mux_gmac_rgmii_speed_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(27), 2, 2, MFLAGS), ++ MUX(SCLK_GMAC_RMII_SPEED, "clk_gmac_rmii_speed", mux_gmac_rmii_speed_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(27), 1, 1, MFLAGS), ++ MUX(SCLK_GMAC_RX_TX, "clk_gmac_rx_tx", mux_gmac_rx_tx_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(27), 4, 1, MFLAGS), + + /* -+ * Clock-Architecture Diagram 8 ++ * Clock-Architecture Diagram 9 + */ + -+ /* PD_GMAC */ -+ COMPOSITE(ACLK_GMAC, "aclk_gmac", mux_2plls_hdmiphy_p, 0, -+ RK3328_CLKSEL_CON(25), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3328_CLKGATE_CON(3), 2, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_GMAC, "pclk_gmac", "aclk_gmac", 0, -+ RK3328_CLKSEL_CON(25), 8, 3, DFLAGS, -+ RK3328_CLKGATE_CON(9), 0, GFLAGS), ++ /* PD_BUS */ + -+ COMPOSITE(SCLK_MAC2IO_SRC, "clk_mac2io_src", mux_2plls_p, 0, -+ RK3328_CLKSEL_CON(27), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3328_CLKGATE_CON(3), 1, GFLAGS), -+ GATE(SCLK_MAC2IO_REF, "clk_mac2io_ref", "clk_mac2io", 0, -+ RK3328_CLKGATE_CON(9), 7, GFLAGS), -+ GATE(SCLK_MAC2IO_RX, "clk_mac2io_rx", "clk_mac2io", 0, -+ RK3328_CLKGATE_CON(9), 4, GFLAGS), -+ GATE(SCLK_MAC2IO_TX, "clk_mac2io_tx", "clk_mac2io", 0, -+ RK3328_CLKGATE_CON(9), 5, GFLAGS), -+ GATE(SCLK_MAC2IO_REFOUT, "clk_mac2io_refout", "clk_mac2io", 0, -+ RK3328_CLKGATE_CON(9), 6, GFLAGS), -+ COMPOSITE(SCLK_MAC2IO_OUT, "clk_mac2io_out", mux_2plls_p, 0, -+ RK3328_CLKSEL_CON(27), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK3328_CLKGATE_CON(3), 5, GFLAGS), -+ MUXGRF(SCLK_MAC2IO, "clk_mac2io", mux_mac2io_src_p, CLK_SET_RATE_NO_REPARENT, -+ RK3328_GRF_MAC_CON1, 10, 1, MFLAGS), -+ MUXGRF(SCLK_MAC2IO_EXT, "clk_mac2io_ext", mux_mac2io_ext_p, CLK_SET_RATE_NO_REPARENT, -+ RK3328_GRF_SOC_CON4, 14, 1, MFLAGS), ++ COMPOSITE_NODIV(0, "clk_bus_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(27), 15, 1, MFLAGS, ++ RK1808_CLKGATE_CON(11), 0, GFLAGS), ++ COMPOSITE_NOMUX(HSCLK_BUS_PRE, "hsclk_bus_pre", "clk_bus_src", CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(27), 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(11), 1, GFLAGS), ++ COMPOSITE_NOMUX(MSCLK_BUS_PRE, "msclk_bus_pre", "clk_bus_src", CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(28), 0, 5, DFLAGS, ++ RK1808_CLKGATE_CON(11), 2, GFLAGS), ++ COMPOSITE_NOMUX(LSCLK_BUS_PRE, "lsclk_bus_pre", "clk_bus_src", CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(28), 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(11), 3, GFLAGS), ++ GATE(0, "hsclk_bus_niu", "hsclk_bus_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(15), 0, GFLAGS), ++ GATE(0, "msclk_bus_niu", "msclk_bus_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(15), 1, GFLAGS), ++ GATE(0, "msclk_sub", "msclk_bus_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(15), 2, GFLAGS), ++ GATE(ACLK_DMAC, "aclk_dmac", "msclk_bus_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(14), 15, GFLAGS), ++ GATE(HCLK_ROM, "hclk_rom", "msclk_bus_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(15), 4, GFLAGS), ++ GATE(ACLK_CRYPTO, "aclk_crypto", "msclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 5, GFLAGS), ++ GATE(HCLK_CRYPTO, "hclk_crypto", "msclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 6, GFLAGS), ++ GATE(ACLK_DCF, "aclk_dcf", "msclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 7, GFLAGS), ++ GATE(0, "lsclk_bus_niu", "lsclk_bus_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(15), 3, GFLAGS), ++ GATE(PCLK_DCF, "pclk_dcf", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 8, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 9, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 10, GFLAGS), ++ GATE(PCLK_UART3, "pclk_uart3", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 11, GFLAGS), ++ GATE(PCLK_UART4, "pclk_uart4", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 12, GFLAGS), ++ GATE(PCLK_UART5, "pclk_uart5", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 13, GFLAGS), ++ GATE(PCLK_UART6, "pclk_uart6", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 14, GFLAGS), ++ GATE(PCLK_UART7, "pclk_uart7", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 15, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 0, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 1, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 2, GFLAGS), ++ GATE(PCLK_I2C4, "pclk_i2c4", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(17), 4, GFLAGS), ++ GATE(PCLK_I2C5, "pclk_i2c5", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(17), 5, GFLAGS), ++ GATE(PCLK_SPI0, "pclk_spi0", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 3, GFLAGS), ++ GATE(PCLK_SPI1, "pclk_spi1", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 4, GFLAGS), ++ GATE(PCLK_SPI2, "pclk_spi2", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 5, GFLAGS), ++ GATE(PCLK_TSADC, "pclk_tsadc", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 9, GFLAGS), ++ GATE(PCLK_SARADC, "pclk_saradc", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 10, GFLAGS), ++ GATE(PCLK_EFUSE, "pclk_efuse", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 11, GFLAGS), ++ GATE(PCLK_GPIO1, "pclk_gpio1", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 12, GFLAGS), ++ GATE(PCLK_GPIO2, "pclk_gpio2", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 13, GFLAGS), ++ GATE(PCLK_GPIO3, "pclk_gpio3", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 14, GFLAGS), ++ GATE(PCLK_GPIO4, "pclk_gpio4", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 15, GFLAGS), ++ GATE(PCLK_PWM0, "pclk_pwm0", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 6, GFLAGS), ++ GATE(PCLK_PWM1, "pclk_pwm1", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 7, GFLAGS), ++ GATE(PCLK_PWM2, "pclk_pwm2", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 8, GFLAGS), ++ GATE(PCLK_TIMER, "pclk_timer", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(17), 0, GFLAGS), ++ GATE(PCLK_WDT, "pclk_wdt", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(17), 1, GFLAGS), ++ GATE(0, "pclk_grf", "lsclk_bus_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(17), 2, GFLAGS), ++ GATE(0, "pclk_sgrf", "lsclk_bus_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(17), 3, GFLAGS), ++ GATE(0, "hclk_audio_pre", "msclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(17), 8, GFLAGS), ++ GATE(0, "pclk_top_pre", "lsclk_bus_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(11), 4, GFLAGS), + -+ COMPOSITE(SCLK_MAC2PHY_SRC, "clk_mac2phy_src", mux_2plls_p, 0, -+ RK3328_CLKSEL_CON(26), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3328_CLKGATE_CON(3), 0, GFLAGS), -+ GATE(SCLK_MAC2PHY_REF, "clk_mac2phy_ref", "clk_mac2phy", 0, -+ RK3328_CLKGATE_CON(9), 3, GFLAGS), -+ GATE(SCLK_MAC2PHY_RXTX, "clk_mac2phy_rxtx", "clk_mac2phy", 0, -+ RK3328_CLKGATE_CON(9), 1, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_MAC2PHY_OUT, "clk_mac2phy_out", "clk_mac2phy", 0, -+ RK3328_CLKSEL_CON(26), 8, 2, DFLAGS, -+ RK3328_CLKGATE_CON(9), 2, GFLAGS), -+ MUXGRF(SCLK_MAC2PHY, "clk_mac2phy", mux_mac2phy_src_p, CLK_SET_RATE_NO_REPARENT, -+ RK3328_GRF_MAC_CON2, 10, 1, MFLAGS), ++ COMPOSITE(SCLK_CRYPTO, "clk_crypto", mux_gpll_cpll_p, 0, ++ RK1808_CLKSEL_CON(29), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK1808_CLKGATE_CON(11), 5, GFLAGS), ++ COMPOSITE(SCLK_CRYPTO_APK, "clk_crypto_apk", mux_gpll_cpll_p, 0, ++ RK1808_CLKSEL_CON(29), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(11), 6, GFLAGS), + -+ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), ++ COMPOSITE(0, "clk_uart1_src", mux_gpll_usb480m_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(38), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(11), 8, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart1_np5", "clk_uart1_src", 0, ++ RK1808_CLKSEL_CON(39), 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(11), 9, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(40), 0, ++ RK1808_CLKGATE_CON(11), 10, GFLAGS, ++ &rk1808_uart1_fracmux), ++ GATE(SCLK_UART1, "clk_uart1", "clk_uart1_mux", 0, ++ RK1808_CLKGATE_CON(11), 11, GFLAGS), ++ ++ COMPOSITE(0, "clk_uart2_src", mux_gpll_usb480m_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(41), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(11), 12, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart2_np5", "clk_uart2_src", 0, ++ RK1808_CLKSEL_CON(42), 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(11), 13, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(43), 0, ++ RK1808_CLKGATE_CON(11), 14, GFLAGS, ++ &rk1808_uart2_fracmux), ++ GATE(SCLK_UART2, "clk_uart2", "clk_uart2_mux", 0, ++ RK1808_CLKGATE_CON(11), 15, GFLAGS), ++ ++ COMPOSITE(0, "clk_uart3_src", mux_gpll_usb480m_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(44), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(12), 0, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart3_np5", "clk_uart3_src", 0, ++ RK1808_CLKSEL_CON(45), 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(12), 1, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(46), 0, ++ RK1808_CLKGATE_CON(12), 2, GFLAGS, ++ &rk1808_uart3_fracmux), ++ GATE(SCLK_UART3, "clk_uart3", "clk_uart3_mux", 0, ++ RK1808_CLKGATE_CON(12), 3, GFLAGS), ++ ++ COMPOSITE(0, "clk_uart4_src", mux_gpll_usb480m_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(47), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(12), 4, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart4_np5", "clk_uart4_src", 0, ++ RK1808_CLKSEL_CON(48), 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(12), 5, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(49), 0, ++ RK1808_CLKGATE_CON(12), 6, GFLAGS, ++ &rk1808_uart4_fracmux), ++ GATE(SCLK_UART4, "clk_uart4", "clk_uart4_mux", 0, ++ RK1808_CLKGATE_CON(12), 7, GFLAGS), ++ ++ COMPOSITE(0, "clk_uart5_src", mux_gpll_usb480m_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(50), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(12), 8, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart5_np5", "clk_uart5_src", 0, ++ RK1808_CLKSEL_CON(51), 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(12), 9, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(52), 0, ++ RK1808_CLKGATE_CON(12), 10, GFLAGS, ++ &rk1808_uart5_fracmux), ++ GATE(SCLK_UART5, "clk_uart5", "clk_uart5_mux", 0, ++ RK1808_CLKGATE_CON(12), 11, GFLAGS), ++ ++ COMPOSITE(0, "clk_uart6_src", mux_gpll_usb480m_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(53), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(12), 12, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart6_np5", "clk_uart6_src", 0, ++ RK1808_CLKSEL_CON(54), 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(12), 13, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart6_frac", "clk_uart6_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(55), 0, ++ RK1808_CLKGATE_CON(12), 14, GFLAGS, ++ &rk1808_uart6_fracmux), ++ GATE(SCLK_UART6, "clk_uart6", "clk_uart6_mux", 0, ++ RK1808_CLKGATE_CON(12), 15, GFLAGS), ++ ++ COMPOSITE(0, "clk_uart7_src", mux_gpll_usb480m_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(56), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 0, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart7_np5", "clk_uart7_src", 0, ++ RK1808_CLKSEL_CON(57), 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 1, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart7_frac", "clk_uart7_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(58), 0, ++ RK1808_CLKGATE_CON(13), 2, GFLAGS, ++ &rk1808_uart7_fracmux), ++ GATE(SCLK_UART7, "clk_uart7", "clk_uart7_mux", 0, ++ RK1808_CLKGATE_CON(13), 3, GFLAGS), ++ ++ COMPOSITE(SCLK_I2C1, "clk_i2c1", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(59), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 4, GFLAGS), ++ COMPOSITE(SCLK_I2C2, "clk_i2c2", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(59), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 5, GFLAGS), ++ COMPOSITE(SCLK_I2C3, "clk_i2c3", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(60), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 6, GFLAGS), ++ COMPOSITE(SCLK_I2C4, "clk_i2c4", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(71), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(14), 6, GFLAGS), ++ COMPOSITE(SCLK_I2C5, "clk_i2c5", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(71), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK1808_CLKGATE_CON(14), 7, GFLAGS), ++ ++ COMPOSITE(SCLK_SPI0, "clk_spi0", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(60), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 7, GFLAGS), ++ COMPOSITE(SCLK_SPI1, "clk_spi1", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(61), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 8, GFLAGS), ++ COMPOSITE(SCLK_SPI2, "clk_spi2", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(61), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 9, GFLAGS), ++ ++ COMPOSITE_NOMUX(SCLK_TSADC, "clk_tsadc", "xin24m", 0, ++ RK1808_CLKSEL_CON(62), 0, 11, DFLAGS, ++ RK1808_CLKGATE_CON(13), 13, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_SARADC, "clk_saradc", "xin24m", 0, ++ RK1808_CLKSEL_CON(63), 0, 11, DFLAGS, ++ RK1808_CLKGATE_CON(13), 14, GFLAGS), ++ ++ COMPOSITE(SCLK_EFUSE_S, "clk_efuse_s", mux_gpll_cpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(64), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK1808_CLKGATE_CON(14), 0, GFLAGS), ++ COMPOSITE(SCLK_EFUSE_NS, "clk_efuse_ns", mux_gpll_cpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(64), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK1808_CLKGATE_CON(14), 1, GFLAGS), ++ ++ COMPOSITE(DBCLK_GPIO1, "dbclk_gpio1", mux_xin24m_32k_p, 0, ++ RK1808_CLKSEL_CON(65), 15, 1, MFLAGS, 0, 11, DFLAGS, ++ RK1808_CLKGATE_CON(14), 2, GFLAGS), ++ COMPOSITE(DBCLK_GPIO2, "dbclk_gpio2", mux_xin24m_32k_p, 0, ++ RK1808_CLKSEL_CON(66), 15, 1, MFLAGS, 0, 11, DFLAGS, ++ RK1808_CLKGATE_CON(14), 3, GFLAGS), ++ COMPOSITE(DBCLK_GPIO3, "dbclk_gpio3", mux_xin24m_32k_p, 0, ++ RK1808_CLKSEL_CON(67), 15, 1, MFLAGS, 0, 11, DFLAGS, ++ RK1808_CLKGATE_CON(14), 4, GFLAGS), ++ COMPOSITE(DBCLK_GPIO4, "dbclk_gpio4", mux_xin24m_32k_p, 0, ++ RK1808_CLKSEL_CON(68), 15, 1, MFLAGS, 0, 11, DFLAGS, ++ RK1808_CLKGATE_CON(14), 5, GFLAGS), ++ ++ COMPOSITE(SCLK_PWM0, "clk_pwm0", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(69), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 10, GFLAGS), ++ COMPOSITE(SCLK_PWM1, "clk_pwm1", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(69), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 11, GFLAGS), ++ COMPOSITE(SCLK_PWM2, "clk_pwm2", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(70), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 12, GFLAGS), ++ ++ GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0, ++ RK1808_CLKGATE_CON(14), 8, GFLAGS), ++ GATE(SCLK_TIMER1, "sclk_timer1", "xin24m", 0, ++ RK1808_CLKGATE_CON(14), 9, GFLAGS), ++ GATE(SCLK_TIMER2, "sclk_timer2", "xin24m", 0, ++ RK1808_CLKGATE_CON(14), 10, GFLAGS), ++ GATE(SCLK_TIMER3, "sclk_timer3", "xin24m", 0, ++ RK1808_CLKGATE_CON(14), 11, GFLAGS), ++ GATE(SCLK_TIMER4, "sclk_timer4", "xin24m", 0, ++ RK1808_CLKGATE_CON(14), 12, GFLAGS), ++ GATE(SCLK_TIMER5, "sclk_timer5", "xin24m", 0, ++ RK1808_CLKGATE_CON(14), 13, GFLAGS), + + /* -+ * Clock-Architecture Diagram 9 ++ * Clock-Architecture Diagram 10 + */ + -+ /* PD_VOP */ -+ GATE(ACLK_RGA, "aclk_rga", "aclk_rga_pre", 0, RK3328_CLKGATE_CON(21), 10, GFLAGS), -+ GATE(0, "aclk_rga_niu", "aclk_rga_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(22), 3, GFLAGS), -+ GATE(ACLK_VOP, "aclk_vop", "aclk_vop_pre", 0, RK3328_CLKGATE_CON(21), 2, GFLAGS), -+ GATE(0, "aclk_vop_niu", "aclk_vop_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(21), 4, GFLAGS), ++ /* PD_AUDIO */ + -+ GATE(ACLK_IEP, "aclk_iep", "aclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 6, GFLAGS), -+ GATE(ACLK_CIF, "aclk_cif", "aclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 8, GFLAGS), -+ GATE(ACLK_HDCP, "aclk_hdcp", "aclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 15, GFLAGS), -+ GATE(0, "aclk_vio_niu", "aclk_vio_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(22), 2, GFLAGS), ++ GATE(0, "hclk_audio_niu", "hclk_audio_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(18), 11, GFLAGS), ++ GATE(HCLK_VAD, "hclk_vad", "hclk_audio_pre", 0, ++ RK1808_CLKGATE_CON(18), 12, GFLAGS), ++ GATE(HCLK_PDM, "hclk_pdm", "hclk_audio_pre", 0, ++ RK1808_CLKGATE_CON(18), 13, GFLAGS), ++ GATE(HCLK_I2S0_8CH, "hclk_i2s0_8ch", "hclk_audio_pre", 0, ++ RK1808_CLKGATE_CON(18), 14, GFLAGS), ++ GATE(HCLK_I2S1_2CH, "hclk_i2s1_2ch", "hclk_audio_pre", 0, ++ RK1808_CLKGATE_CON(18), 15, GFLAGS), + -+ GATE(HCLK_VOP, "hclk_vop", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 3, GFLAGS), -+ GATE(0, "hclk_vop_niu", "hclk_vio_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(21), 5, GFLAGS), -+ GATE(HCLK_IEP, "hclk_iep", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 7, GFLAGS), -+ GATE(HCLK_CIF, "hclk_cif", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 9, GFLAGS), -+ GATE(HCLK_RGA, "hclk_rga", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 11, GFLAGS), -+ GATE(0, "hclk_ahb1tom", "hclk_vio_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(21), 12, GFLAGS), -+ GATE(0, "pclk_vio_h2p", "hclk_vio_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(21), 13, GFLAGS), -+ GATE(0, "hclk_vio_h2p", "hclk_vio_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(21), 14, GFLAGS), -+ GATE(HCLK_HDCP, "hclk_hdcp", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(22), 0, GFLAGS), -+ GATE(0, "hclk_vio_niu", "hclk_vio_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(22), 1, GFLAGS), -+ GATE(PCLK_HDMI, "pclk_hdmi", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(22), 4, GFLAGS), -+ GATE(PCLK_HDCP, "pclk_hdcp", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(22), 5, GFLAGS), ++ COMPOSITE(0, "clk_pdm_src", mux_gpll_xin24m_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(30), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(17), 9, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_pdm_frac", "clk_pdm_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(31), 0, ++ RK1808_CLKGATE_CON(17), 10, GFLAGS, ++ &rk1808_pdm_fracmux), ++ GATE(SCLK_PDM, "clk_pdm", "clk_pdm_mux", 0, ++ RK1808_CLKGATE_CON(17), 11, GFLAGS), + -+ /* PD_PERI */ -+ GATE(0, "aclk_peri_noc", "aclk_peri", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(19), 11, GFLAGS), -+ GATE(ACLK_USB3OTG, "aclk_usb3otg", "aclk_peri", 0, RK3328_CLKGATE_CON(19), 14, GFLAGS), ++ COMPOSITE(SCLK_I2S0_8CH_TX_SRC, "clk_i2s0_8ch_tx_src", mux_gpll_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(32), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(17), 12, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s0_8ch_tx_frac", "clk_i2s0_8ch_tx_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(33), 0, ++ RK1808_CLKGATE_CON(17), 13, GFLAGS, ++ &rk1808_i2s0_8ch_tx_fracmux), ++ COMPOSITE_NODIV(SCLK_I2S0_8CH_TX, "clk_i2s0_8ch_tx", mux_i2s0_8ch_tx_rx_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(32), 12, 1, MFLAGS, ++ RK1808_CLKGATE_CON(17), 14, GFLAGS), ++ COMPOSITE_NODIV(SCLK_I2S0_8CH_TX_OUT, "clk_i2s0_8ch_tx_out", mux_i2s0_8ch_tx_out_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(32), 14, 2, MFLAGS, ++ RK1808_CLKGATE_CON(17), 15, GFLAGS), + -+ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 0, GFLAGS), -+ GATE(HCLK_SDIO, "hclk_sdio", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 1, GFLAGS), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 2, GFLAGS), -+ GATE(HCLK_SDMMC_EXT, "hclk_sdmmc_ext", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 15, GFLAGS), -+ GATE(HCLK_HOST0, "hclk_host0", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 6, GFLAGS), -+ GATE(HCLK_HOST0_ARB, "hclk_host0_arb", "hclk_peri", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(19), 7, GFLAGS), -+ GATE(HCLK_OTG, "hclk_otg", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 8, GFLAGS), -+ GATE(HCLK_OTG_PMU, "hclk_otg_pmu", "hclk_peri", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(19), 9, GFLAGS), -+ GATE(0, "hclk_peri_niu", "hclk_peri", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(19), 12, GFLAGS), -+ GATE(0, "pclk_peri_niu", "hclk_peri", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(19), 13, GFLAGS), ++ COMPOSITE(SCLK_I2S0_8CH_RX_SRC, "clk_i2s0_8ch_rx_src", mux_gpll_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(34), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(18), 0, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s0_8ch_rx_frac", "clk_i2s0_8ch_rx_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(35), 0, ++ RK1808_CLKGATE_CON(18), 1, GFLAGS, ++ &rk1808_i2s0_8ch_rx_fracmux), ++ COMPOSITE_NODIV(SCLK_I2S0_8CH_RX, "clk_i2s0_8ch_rx", mux_i2s0_8ch_rx_tx_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(34), 12, 1, MFLAGS, ++ RK1808_CLKGATE_CON(18), 2, GFLAGS), ++ COMPOSITE_NODIV(SCLK_I2S0_8CH_RX_OUT, "clk_i2s0_8ch_rx_out", mux_i2s0_8ch_rx_out_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(34), 14, 2, MFLAGS, ++ RK1808_CLKGATE_CON(18), 3, GFLAGS), + -+ /* PD_GMAC */ -+ GATE(ACLK_MAC2PHY, "aclk_mac2phy", "aclk_gmac", 0, RK3328_CLKGATE_CON(26), 0, GFLAGS), -+ GATE(ACLK_MAC2IO, "aclk_mac2io", "aclk_gmac", 0, RK3328_CLKGATE_CON(26), 2, GFLAGS), -+ GATE(0, "aclk_gmac_niu", "aclk_gmac", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(26), 4, GFLAGS), -+ GATE(PCLK_MAC2PHY, "pclk_mac2phy", "pclk_gmac", 0, RK3328_CLKGATE_CON(26), 1, GFLAGS), -+ GATE(PCLK_MAC2IO, "pclk_mac2io", "pclk_gmac", 0, RK3328_CLKGATE_CON(26), 3, GFLAGS), -+ GATE(0, "pclk_gmac_niu", "pclk_gmac", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(26), 5, GFLAGS), ++ COMPOSITE(SCLK_I2S1_2CH_SRC, "clk_i2s1_2ch_src", mux_gpll_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(36), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(18), 4, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s1_2ch_frac", "clk_i2s1_2ch_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(37), 0, ++ RK1808_CLKGATE_CON(18), 5, GFLAGS, ++ &rk1808_i2s1_2ch_fracmux), ++ GATE(SCLK_I2S1_2CH, "clk_i2s1_2ch", "clk_i2s1_2ch_mux", 0, ++ RK1808_CLKGATE_CON(18), 6, GFLAGS), ++ COMPOSITE_NODIV(SCLK_I2S1_2CH_OUT, "clk_i2s1_2ch_out", mux_i2s1_2ch_out_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(36), 15, 1, MFLAGS, ++ RK1808_CLKGATE_CON(18), 7, GFLAGS), ++ ++ /* ++ * Clock-Architecture Diagram 10 ++ */ + + /* PD_BUS */ -+ GATE(0, "aclk_bus_niu", "aclk_bus_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(15), 12, GFLAGS), -+ GATE(ACLK_DCF, "aclk_dcf", "aclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 11, GFLAGS), -+ GATE(ACLK_TSP, "aclk_tsp", "aclk_bus_pre", 0, RK3328_CLKGATE_CON(17), 12, GFLAGS), -+ GATE(0, "aclk_intmem", "aclk_bus_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(15), 0, GFLAGS), -+ GATE(ACLK_DMAC, "aclk_dmac_bus", "aclk_bus_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(15), 1, GFLAGS), + -+ GATE(0, "hclk_rom", "hclk_bus_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(15), 2, GFLAGS), -+ GATE(HCLK_I2S0_8CH, "hclk_i2s0_8ch", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 3, GFLAGS), -+ GATE(HCLK_I2S1_8CH, "hclk_i2s1_8ch", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 4, GFLAGS), -+ GATE(HCLK_I2S2_2CH, "hclk_i2s2_2ch", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 5, GFLAGS), -+ GATE(HCLK_SPDIF_8CH, "hclk_spdif_8ch", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 6, GFLAGS), -+ GATE(HCLK_TSP, "hclk_tsp", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(17), 11, GFLAGS), -+ GATE(HCLK_CRYPTO_MST, "hclk_crypto_mst", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 7, GFLAGS), -+ GATE(HCLK_CRYPTO_SLV, "hclk_crypto_slv", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 8, GFLAGS), -+ GATE(0, "hclk_bus_niu", "hclk_bus_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(15), 13, GFLAGS), -+ GATE(HCLK_PDM, "hclk_pdm", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(28), 0, GFLAGS), ++ GATE(0, "pclk_top_niu", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 0, GFLAGS), ++ GATE(0, "pclk_top_cru", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 1, GFLAGS), ++ GATE(0, "pclk_ddrphy", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 2, GFLAGS), ++ GATE(PCLK_MIPIDSIPHY, "pclk_mipidsiphy", "pclk_top_pre", 0, RK1808_CLKGATE_CON(19), 3, GFLAGS), ++ GATE(PCLK_MIPICSIPHY, "pclk_mipicsiphy", "pclk_top_pre", 0, RK1808_CLKGATE_CON(19), 4, GFLAGS), + -+ GATE(0, "pclk_bus_niu", "pclk_bus", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(15), 14, GFLAGS), -+ GATE(0, "pclk_efuse", "pclk_bus", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(15), 9, GFLAGS), -+ GATE(0, "pclk_otp", "pclk_bus", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(28), 4, GFLAGS), -+ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_bus", 0, RK3328_CLKGATE_CON(15), 10, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 0, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 1, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 2, GFLAGS), -+ GATE(PCLK_TIMER, "pclk_timer0", "pclk_bus", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(16), 3, GFLAGS), -+ GATE(0, "pclk_stimer", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 4, GFLAGS), -+ GATE(PCLK_SPI, "pclk_spi", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 5, GFLAGS), -+ GATE(PCLK_PWM, "pclk_rk_pwm", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 6, GFLAGS), -+ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 7, GFLAGS), -+ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 8, GFLAGS), -+ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 9, GFLAGS), -+ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 10, GFLAGS), -+ GATE(PCLK_UART0, "pclk_uart0", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 11, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 12, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 13, GFLAGS), -+ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 14, GFLAGS), -+ GATE(PCLK_DCF, "pclk_dcf", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 15, GFLAGS), -+ GATE(PCLK_GRF, "pclk_grf", "pclk_bus", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(17), 0, GFLAGS), -+ GATE(0, "pclk_cru", "pclk_bus", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(17), 4, GFLAGS), -+ GATE(0, "pclk_sgrf", "pclk_bus", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(17), 6, GFLAGS), -+ GATE(0, "pclk_sim", "pclk_bus", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 10, GFLAGS), -+ GATE(PCLK_SARADC, "pclk_saradc", "pclk_bus", 0, RK3328_CLKGATE_CON(17), 15, GFLAGS), -+ GATE(0, "pclk_pmu", "pclk_bus", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(28), 3, GFLAGS), ++ GATE(PCLK_USB3PHY_PIPE, "pclk_usb3phy_pipe", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 6, GFLAGS), ++ GATE(0, "pclk_usb3_grf", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 7, GFLAGS), ++ GATE(0, "pclk_usb_grf", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 8, GFLAGS), + -+ /* Watchdog pclk is controlled from the secure GRF */ -+ SGRF_GATE(PCLK_WDT, "pclk_wdt", "pclk_bus"), ++ /* ++ * Clock-Architecture Diagram 11 ++ */ + -+ GATE(PCLK_USB3PHY_OTG, "pclk_usb3phy_otg", "pclk_phy_pre", 0, RK3328_CLKGATE_CON(28), 1, GFLAGS), -+ GATE(PCLK_USB3PHY_PIPE, "pclk_usb3phy_pipe", "pclk_phy_pre", 0, RK3328_CLKGATE_CON(28), 2, GFLAGS), -+ GATE(PCLK_USB3_GRF, "pclk_usb3_grf", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 2, GFLAGS), -+ GATE(PCLK_USB2_GRF, "pclk_usb2_grf", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 14, GFLAGS), -+ GATE(0, "pclk_ddrphy", "pclk_phy_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(17), 13, GFLAGS), -+ GATE(PCLK_ACODECPHY, "pclk_acodecphy", "pclk_phy_pre", 0, RK3328_CLKGATE_CON(17), 5, GFLAGS), -+ GATE(PCLK_HDMIPHY, "pclk_hdmiphy", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 7, GFLAGS), -+ GATE(0, "pclk_vdacphy", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 8, GFLAGS), -+ GATE(0, "pclk_phy_niu", "pclk_phy_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(15), 15, GFLAGS), ++ /* PD_PMU */ + -+ /* PD_MMC */ -+ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "clk_sdmmc", -+ RK3328_SDMMC_CON0, 1), -+ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "clk_sdmmc", -+ RK3328_SDMMC_CON1, 1), ++ COMPOSITE_FRACMUX(SCLK_RTC32K_FRAC, "clk_rtc32k_frac", "xin24m", CLK_IGNORE_UNUSED, ++ RK1808_PMU_CLKSEL_CON(1), 0, ++ RK1808_PMU_CLKGATE_CON(0), 13, GFLAGS, ++ &rk1808_rtc32k_pmu_fracmux), + -+ MMC(SCLK_SDIO_DRV, "sdio_drv", "clk_sdio", -+ RK3328_SDIO_CON0, 1), -+ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "clk_sdio", -+ RK3328_SDIO_CON1, 1), ++ COMPOSITE_NOMUX(XIN24M_DIV, "xin24m_div", "xin24m", CLK_IGNORE_UNUSED, ++ RK1808_PMU_CLKSEL_CON(0), 8, 5, DFLAGS, ++ RK1808_PMU_CLKGATE_CON(0), 12, GFLAGS), + -+ MMC(SCLK_EMMC_DRV, "emmc_drv", "clk_emmc", -+ RK3328_EMMC_CON0, 1), -+ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "clk_emmc", -+ RK3328_EMMC_CON1, 1), ++ COMPOSITE_NOMUX(0, "clk_wifi_pmu_src", "ppll", 0, ++ RK1808_PMU_CLKSEL_CON(2), 8, 6, DFLAGS, ++ RK1808_PMU_CLKGATE_CON(0), 14, GFLAGS), ++ COMPOSITE_NODIV(SCLK_WIFI_PMU, "clk_wifi_pmu", mux_wifi_pmu_p, CLK_SET_RATE_PARENT, ++ RK1808_PMU_CLKSEL_CON(2), 15, 1, MFLAGS, ++ RK1808_PMU_CLKGATE_CON(0), 15, GFLAGS), + -+ MMC(SCLK_SDMMC_EXT_DRV, "sdmmc_ext_drv", "clk_sdmmc_ext", -+ RK3328_SDMMC_EXT_CON0, 1), -+ MMC(SCLK_SDMMC_EXT_SAMPLE, "sdmmc_ext_sample", "clk_sdmmc_ext", -+ RK3328_SDMMC_EXT_CON1, 1), ++ COMPOSITE(0, "clk_uart0_pmu_src", mux_gpll_usb480m_cpll_ppll_p, 0, ++ RK1808_PMU_CLKSEL_CON(3), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_PMU_CLKGATE_CON(1), 0, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart0_np5", "clk_uart0_pmu_src", 0, ++ RK1808_PMU_CLKSEL_CON(4), 0, 7, DFLAGS, ++ RK1808_PMU_CLKGATE_CON(1), 1, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart0_frac", "clk_uart0_pmu_src", CLK_SET_RATE_PARENT, ++ RK1808_PMU_CLKSEL_CON(5), 0, ++ RK1808_PMU_CLKGATE_CON(1), 2, GFLAGS, ++ &rk1808_uart0_pmu_fracmux), ++ GATE(SCLK_UART0_PMU, "clk_uart0_pmu", "clk_uart0_pmu_mux", CLK_SET_RATE_PARENT, ++ RK1808_PMU_CLKGATE_CON(1), 3, GFLAGS), ++ ++ GATE(SCLK_PVTM_PMU, "clk_pvtm_pmu", "xin24m", 0, ++ RK1808_PMU_CLKGATE_CON(1), 4, GFLAGS), ++ ++ COMPOSITE(SCLK_PMU_I2C0, "clk_pmu_i2c0", mux_ppll_xin24m_p, 0, ++ RK1808_PMU_CLKSEL_CON(7), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK1808_PMU_CLKGATE_CON(1), 5, GFLAGS), ++ ++ COMPOSITE(DBCLK_PMU_GPIO0, "dbclk_gpio0", mux_xin24m_32k_p, 0, ++ RK1808_PMU_CLKSEL_CON(6), 15, 1, MFLAGS, 0, 11, DFLAGS, ++ RK1808_PMU_CLKGATE_CON(1), 6, GFLAGS), ++ ++ COMPOSITE_NOMUX(SCLK_REF24M_PMU, "clk_ref24m_pmu", "ppll", 0, ++ RK1808_PMU_CLKSEL_CON(2), 0, 6, DFLAGS, ++ RK1808_PMU_CLKGATE_CON(1), 8, GFLAGS), ++ COMPOSITE_NODIV(SCLK_USBPHY_REF, "clk_usbphy_ref", mux_usbphy_ref_p, CLK_SET_RATE_PARENT, ++ RK1808_PMU_CLKSEL_CON(2), 6, 1, MFLAGS, ++ RK1808_PMU_CLKGATE_CON(1), 9, GFLAGS), ++ COMPOSITE_NODIV(SCLK_MIPIDSIPHY_REF, "clk_mipidsiphy_ref", mux_mipidsiphy_ref_p, CLK_SET_RATE_PARENT, ++ RK1808_PMU_CLKSEL_CON(2), 7, 1, MFLAGS, ++ RK1808_PMU_CLKGATE_CON(1), 10, GFLAGS), ++ ++ FACTOR(0, "clk_ppll_ph0", "ppll", 0, 1, 2), ++ COMPOSITE_NOMUX(0, "clk_pciephy_src", "clk_ppll_ph0", 0, ++ RK1808_PMU_CLKSEL_CON(7), 0, 2, DFLAGS, ++ RK1808_PMU_CLKGATE_CON(1), 11, GFLAGS), ++ COMPOSITE_NODIV(SCLK_PCIEPHY_REF, "clk_pciephy_ref", mux_pciephy_ref_p, CLK_SET_RATE_PARENT, ++ RK1808_PMU_CLKSEL_CON(7), 4, 1, MFLAGS, ++ RK1808_PMU_CLKGATE_CON(1), 12, GFLAGS), ++ ++ COMPOSITE_NOMUX(PCLK_PMU_PRE, "pclk_pmu_pre", "ppll", CLK_IS_CRITICAL, ++ RK1808_PMU_CLKSEL_CON(0), 0, 5, DFLAGS, ++ RK1808_PMU_CLKGATE_CON(0), 0, GFLAGS), ++ ++ GATE(0, "pclk_pmu_niu", "pclk_pmu_pre", CLK_IS_CRITICAL, RK1808_PMU_CLKGATE_CON(0), 1, GFLAGS), ++ GATE(0, "pclk_pmu_sgrf", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 2, GFLAGS), ++ GATE(0, "pclk_pmu_grf", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 3, GFLAGS), ++ GATE(0, "pclk_pmu", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 4, GFLAGS), ++ GATE(0, "pclk_pmu_mem", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 5, GFLAGS), ++ GATE(PCLK_GPIO0_PMU, "pclk_gpio0_pmu", "pclk_pmu_pre", 0, RK1808_PMU_CLKGATE_CON(0), 6, GFLAGS), ++ GATE(PCLK_UART0_PMU, "pclk_uart0_pmu", "pclk_pmu_pre", 0, RK1808_PMU_CLKGATE_CON(0), 7, GFLAGS), ++ GATE(0, "pclk_cru_pmu", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 8, GFLAGS), ++ GATE(PCLK_I2C0_PMU, "pclk_i2c0_pmu", "pclk_pmu_pre", 0, RK1808_PMU_CLKGATE_CON(0), 9, GFLAGS), ++ ++ MUXPMUGRF(SCLK_32K_IOE, "clk_32k_ioe", mux_clk_32k_ioe_p, 0, ++ RK1808_PMUGRF_SOC_CON0, 0, 1, MFLAGS) +}; + -+static void __init rk3328_clk_init(struct device_node *np) ++static void __iomem *rk1808_cru_base; ++ ++void rk1808_dump_cru(void) ++{ ++ if (rk1808_cru_base) { ++ pr_warn("CRU:\n"); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rk1808_cru_base, ++ 0x500, false); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rk1808_cru_base + 0x4000, ++ 0x100, false); ++ } ++} ++EXPORT_SYMBOL_GPL(rk1808_dump_cru); ++ ++static int rk1808_clk_panic(struct notifier_block *this, ++ unsigned long ev, void *ptr) ++{ ++ rk1808_dump_cru(); ++ return NOTIFY_DONE; ++} ++ ++static struct notifier_block rk1808_clk_panic_block = { ++ .notifier_call = rk1808_clk_panic, ++}; ++ ++static void __init rk1808_clk_init(struct device_node *np) +{ + struct rockchip_clk_provider *ctx; + void __iomem *reg_base; @@ -50484,6 +51111,8 @@ index 000000000..8ec63aaae + return; + } + ++ rk1808_cru_base = reg_base; ++ + ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); + if (IS_ERR(ctx)) { + pr_err("%s: rockchip clk init failed\n", __func__); @@ -50492,62 +51121,70 @@ index 000000000..8ec63aaae + } + clks = ctx->clk_data.clks; + -+ rockchip_clk_register_plls(ctx, rk3328_pll_clks, -+ ARRAY_SIZE(rk3328_pll_clks), -+ RK3328_GRF_SOC_STATUS0); -+ rockchip_clk_register_branches(ctx, rk3328_clk_branches, -+ ARRAY_SIZE(rk3328_clk_branches)); ++ rockchip_clk_register_plls(ctx, rk1808_pll_clks, ++ ARRAY_SIZE(rk1808_pll_clks), ++ RK1808_GRF_SOC_STATUS0); ++ rockchip_clk_register_branches(ctx, rk1808_clk_branches, ++ ARRAY_SIZE(rk1808_clk_branches)); + + rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", -+ 4, clks[PLL_APLL], clks[PLL_GPLL], -+ &rk3328_cpuclk_data, rk3328_cpuclk_rates, -+ ARRAY_SIZE(rk3328_cpuclk_rates)); ++ 3, clks[PLL_APLL], clks[PLL_GPLL], ++ &rk1808_cpuclk_data, rk1808_cpuclk_rates, ++ ARRAY_SIZE(rk1808_cpuclk_rates)); + -+ rockchip_register_softrst(np, 12, reg_base + RK3328_SOFTRST_CON(0), ++ rockchip_register_softrst(np, 16, reg_base + RK1808_SOFTRST_CON(0), + ROCKCHIP_SOFTRST_HIWORD_MASK); + -+ rockchip_register_restart_notifier(ctx, RK3328_GLB_SRST_FST, NULL); ++ rockchip_register_restart_notifier(ctx, RK1808_GLB_SRST_FST, NULL); + + rockchip_clk_of_add_provider(np, ctx); ++ ++ atomic_notifier_chain_register(&panic_notifier_list, ++ &rk1808_clk_panic_block); +} -+CLK_OF_DECLARE(rk3328_cru, "rockchip,rk3328-cru", rk3328_clk_init); + -+static int __init clk_rk3328_probe(struct platform_device *pdev) ++CLK_OF_DECLARE(rk1808_cru, "rockchip,rk1808-cru", rk1808_clk_init); ++ ++static int __init clk_rk1808_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + -+ rk3328_clk_init(np); ++ rk1808_clk_init(np); + + return 0; +} + -+static const struct of_device_id clk_rk3328_match_table[] = { ++static const struct of_device_id clk_rk1808_match_table[] = { + { -+ .compatible = "rockchip,rk3328-cru", ++ .compatible = "rockchip,rk1808-cru", + }, + { } +}; -+MODULE_DEVICE_TABLE(of, clk_rk3328_match_table); ++MODULE_DEVICE_TABLE(of, clk_rk1808_match_table); + -+static struct platform_driver clk_rk3328_driver = { ++static struct platform_driver clk_rk1808_driver = { + .driver = { -+ .name = "clk-rk3328", -+ .of_match_table = clk_rk3328_match_table, ++ .name = "clk-rk1808", ++ .of_match_table = clk_rk1808_match_table, + }, +}; -+builtin_platform_driver_probe(clk_rk3328_driver, clk_rk3328_probe); ++builtin_platform_driver_probe(clk_rk1808_driver, clk_rk1808_probe); + -+MODULE_DESCRIPTION("Rockchip RK3328 Clock Driver"); ++MODULE_DESCRIPTION("Rockchip RK1808 Clock Driver"); +MODULE_LICENSE("GPL"); -diff --git a/drivers/clk/rockchip-oh/clk-rk3368.c b/drivers/clk/rockchip-oh/clk-rk3368.c +diff --git a/drivers/clk/rockchip-oh/clk-rk3036.c b/drivers/clk/rockchip-oh/clk-rk3036.c new file mode 100644 -index 000000000..3ddff548e +index 000000000..ed05eb6c5 --- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-rk3368.c -@@ -0,0 +1,956 @@ ++++ b/drivers/clk/rockchip-oh/clk-rk3036.c +@@ -0,0 +1,530 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* -+ * Copyright (c) 2015 Heiko Stuebner ++ * Copyright (c) 2014 MundoReader S.L. ++ * Author: Heiko Stuebner ++ * ++ * Copyright (c) 2015 Rockchip Electronics Co. Ltd. ++ * Author: Xing Zheng + */ + +#include @@ -50556,171 +51193,101 @@ index 000000000..3ddff548e +#include +#include +#include -+#include -+#include ++#include ++#include +#include "clk.h" + -+#define RK3368_GRF_SOC_STATUS0 0x480 ++#define RK3036_GRF_SOC_STATUS0 0x14c + -+enum rk3368_plls { -+ apllb, aplll, dpll, cpll, gpll, npll, ++enum rk3036_plls { ++ apll, dpll, gpll, +}; + -+static struct rockchip_pll_rate_table rk3368_pll_rates[] = { -+ RK3066_PLL_RATE(2208000000, 1, 92, 1), -+ RK3066_PLL_RATE(2184000000, 1, 91, 1), -+ RK3066_PLL_RATE(2160000000, 1, 90, 1), -+ RK3066_PLL_RATE(2136000000, 1, 89, 1), -+ RK3066_PLL_RATE(2112000000, 1, 88, 1), -+ RK3066_PLL_RATE(2088000000, 1, 87, 1), -+ RK3066_PLL_RATE(2064000000, 1, 86, 1), -+ RK3066_PLL_RATE(2040000000, 1, 85, 1), -+ RK3066_PLL_RATE(2016000000, 1, 84, 1), -+ RK3066_PLL_RATE(1992000000, 1, 83, 1), -+ RK3066_PLL_RATE(1968000000, 1, 82, 1), -+ RK3066_PLL_RATE(1944000000, 1, 81, 1), -+ RK3066_PLL_RATE(1920000000, 1, 80, 1), -+ RK3066_PLL_RATE(1896000000, 1, 79, 1), -+ RK3066_PLL_RATE(1872000000, 1, 78, 1), -+ RK3066_PLL_RATE(1848000000, 1, 77, 1), -+ RK3066_PLL_RATE(1824000000, 1, 76, 1), -+ RK3066_PLL_RATE(1800000000, 1, 75, 1), -+ RK3066_PLL_RATE(1776000000, 1, 74, 1), -+ RK3066_PLL_RATE(1752000000, 1, 73, 1), -+ RK3066_PLL_RATE(1728000000, 1, 72, 1), -+ RK3066_PLL_RATE(1704000000, 1, 71, 1), -+ RK3066_PLL_RATE(1680000000, 1, 70, 1), -+ RK3066_PLL_RATE(1656000000, 1, 69, 1), -+ RK3066_PLL_RATE(1632000000, 1, 68, 1), -+ RK3066_PLL_RATE(1608000000, 1, 67, 1), -+ RK3066_PLL_RATE(1560000000, 1, 65, 1), -+ RK3066_PLL_RATE(1512000000, 1, 63, 1), -+ RK3066_PLL_RATE(1488000000, 1, 62, 1), -+ RK3066_PLL_RATE(1464000000, 1, 61, 1), -+ RK3066_PLL_RATE(1440000000, 1, 60, 1), -+ RK3066_PLL_RATE(1416000000, 1, 59, 1), -+ RK3066_PLL_RATE(1392000000, 1, 58, 1), -+ RK3066_PLL_RATE(1368000000, 1, 57, 1), -+ RK3066_PLL_RATE(1344000000, 1, 56, 1), -+ RK3066_PLL_RATE(1320000000, 1, 55, 1), -+ RK3066_PLL_RATE(1296000000, 1, 54, 1), -+ RK3066_PLL_RATE(1272000000, 1, 53, 1), -+ RK3066_PLL_RATE(1248000000, 1, 52, 1), -+ RK3066_PLL_RATE(1224000000, 1, 51, 1), -+ RK3066_PLL_RATE(1200000000, 1, 50, 1), -+ RK3066_PLL_RATE(1176000000, 1, 49, 1), -+ RK3066_PLL_RATE(1128000000, 1, 47, 1), -+ RK3066_PLL_RATE(1104000000, 1, 46, 1), -+ RK3066_PLL_RATE(1008000000, 1, 84, 2), -+ RK3066_PLL_RATE( 912000000, 1, 76, 2), -+ RK3066_PLL_RATE( 888000000, 1, 74, 2), -+ RK3066_PLL_RATE( 816000000, 1, 68, 2), -+ RK3066_PLL_RATE( 792000000, 1, 66, 2), -+ RK3066_PLL_RATE( 696000000, 1, 58, 2), -+ RK3066_PLL_RATE( 672000000, 1, 56, 2), -+ RK3066_PLL_RATE( 648000000, 1, 54, 2), -+ RK3066_PLL_RATE( 624000000, 1, 52, 2), -+ RK3066_PLL_RATE( 600000000, 1, 50, 2), -+ RK3066_PLL_RATE( 576000000, 1, 48, 2), -+ RK3066_PLL_RATE( 552000000, 1, 46, 2), -+ RK3066_PLL_RATE( 528000000, 1, 88, 4), -+ RK3066_PLL_RATE( 504000000, 1, 84, 4), -+ RK3066_PLL_RATE( 480000000, 1, 80, 4), -+ RK3066_PLL_RATE( 456000000, 1, 76, 4), -+ RK3066_PLL_RATE( 408000000, 1, 68, 4), -+ RK3066_PLL_RATE( 312000000, 1, 52, 4), -+ RK3066_PLL_RATE( 252000000, 1, 84, 8), -+ RK3066_PLL_RATE( 216000000, 1, 72, 8), -+ RK3066_PLL_RATE( 126000000, 2, 84, 8), -+ RK3066_PLL_RATE( 48000000, 2, 32, 8), ++static struct rockchip_pll_rate_table rk3036_pll_rates[] = { ++ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ ++ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1488000000, 1, 62, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1464000000, 1, 61, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1440000000, 1, 60, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1392000000, 1, 58, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1368000000, 1, 57, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1344000000, 1, 56, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1320000000, 1, 55, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1272000000, 1, 53, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1248000000, 1, 52, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1188000000, 2, 99, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1100000000, 12, 550, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1000000000, 6, 500, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 984000000, 1, 82, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 960000000, 1, 80, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 936000000, 1, 78, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 912000000, 1, 76, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 900000000, 4, 300, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 888000000, 1, 74, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 864000000, 1, 72, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 840000000, 1, 70, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 816000000, 1, 68, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 800000000, 6, 400, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 700000000, 6, 350, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 696000000, 1, 58, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 600000000, 1, 75, 3, 1, 1, 0), ++ RK3036_PLL_RATE( 594000000, 2, 99, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 504000000, 1, 63, 3, 1, 1, 0), ++ RK3036_PLL_RATE( 500000000, 6, 250, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 408000000, 1, 68, 2, 2, 1, 0), ++ RK3036_PLL_RATE( 312000000, 1, 52, 2, 2, 1, 0), ++ RK3036_PLL_RATE( 216000000, 1, 72, 4, 2, 1, 0), ++ RK3036_PLL_RATE( 96000000, 1, 64, 4, 4, 1, 0), + { /* sentinel */ }, +}; + -+static struct rockchip_pll_rate_table rk3368_npll_rates[] = { -+ RK3066_PLL_RATE_NB(594000000, 1, 99, 4, 32), -+ RK3066_PLL_RATE_NB(585000000, 6, 585, 4, 32), -+ RK3066_PLL_RATE_NB(432000000, 3, 216, 4, 32), -+ RK3066_PLL_RATE_NB(426000000, 3, 213, 4, 32), -+ RK3066_PLL_RATE_NB(400000000, 1, 100, 6, 32), -+ RK3066_PLL_RATE_NB(342000000, 3, 171, 4, 32), -+ RK3066_PLL_RATE_NB(297000000, 2, 198, 8, 16), -+ RK3066_PLL_RATE_NB(270000000, 1, 135, 12, 32), -+ RK3066_PLL_RATE_NB(260000000, 1, 130, 12, 32), -+ RK3066_PLL_RATE_NB(148500000, 1, 99, 16, 32), -+ RK3066_PLL_RATE_NB(146250000, 6, 585, 16, 32), -+ RK3066_PLL_RATE_NB(108000000, 1, 54, 12, 32), -+ RK3066_PLL_RATE_NB(106500000, 4, 213, 12, 32), -+ RK3066_PLL_RATE_NB(85500000, 4, 171, 12, 32), -+ RK3066_PLL_RATE_NB(74250000, 4, 198, 16, 32), -+}; ++#define RK3036_DIV_CPU_MASK 0x1f ++#define RK3036_DIV_CPU_SHIFT 8 + -+PNAME(mux_pll_p) = { "xin24m", "xin32k" }; -+PNAME(mux_ddrphy_p) = { "dpll_ddr", "gpll_ddr" }; -+PNAME(mux_cs_src_p) = { "apllb_cs", "aplll_cs", "gpll_cs"}; -+PNAME(mux_aclk_bus_src_p) = { "cpll_aclk_bus", "gpll_aclk_bus" }; ++#define RK3036_DIV_PERI_MASK 0xf ++#define RK3036_DIV_PERI_SHIFT 0 ++#define RK3036_DIV_ACLK_MASK 0x7 ++#define RK3036_DIV_ACLK_SHIFT 4 ++#define RK3036_DIV_HCLK_MASK 0x3 ++#define RK3036_DIV_HCLK_SHIFT 8 ++#define RK3036_DIV_PCLK_MASK 0x7 ++#define RK3036_DIV_PCLK_SHIFT 12 + -+PNAME(mux_pll_src_cpll_gpll_p) = { "cpll", "gpll" }; -+PNAME(mux_pll_src_cpll_gpll_npll_p) = { "cpll", "gpll", "dummy_npll" }; -+PNAME(mux_pll_src_dmycpll_dmygpll_npll_p) = { "dummy_cpll", "dummy_gpll", "npll" }; -+PNAME(mux_pll_src_npll_cpll_gpll_p) = { "dummy_npll", "cpll", "gpll" }; -+PNAME(mux_pll_src_cpll_gpll_usb_p) = { "cpll", "gpll", "usbphy_480m" }; -+PNAME(mux_pll_src_cpll_gpll_usb_usb_p) = { "cpll", "gpll", "usbphy_480m", -+ "usbphy_480m" }; -+PNAME(mux_pll_src_cpll_gpll_usb_npll_p) = { "cpll", "gpll", "usbphy_480m", -+ "dummy_npll" }; -+PNAME(mux_pll_src_cpll_gpll_npll_npll_p) = { "cpll", "gpll", "dummy_npll", "dummy_npll" }; -+PNAME(mux_pll_src_cpll_gpll_npll_usb_p) = { "cpll", "gpll", "dummy_npll", -+ "usbphy_480m" }; ++#define RK3036_CLKSEL1(_core_periph_div) \ ++ { \ ++ .reg = RK2928_CLKSEL_CON(1), \ ++ .val = HIWORD_UPDATE(_core_periph_div, RK3036_DIV_PERI_MASK, \ ++ RK3036_DIV_PERI_SHIFT) \ ++ } + -+PNAME(mux_i2s_8ch_pre_p) = { "i2s_8ch_src", "i2s_8ch_frac", -+ "ext_i2s", "xin12m" }; -+PNAME(mux_i2s_8ch_clkout_p) = { "i2s_8ch_pre", "xin12m" }; -+PNAME(mux_i2s_2ch_p) = { "i2s_2ch_src", "i2s_2ch_frac", -+ "dummy", "xin12m" }; -+PNAME(mux_spdif_8ch_p) = { "spdif_8ch_pre", "spdif_8ch_frac", -+ "ext_i2s", "xin12m" }; -+PNAME(mux_edp_24m_p) = { "xin24m", "dummy" }; -+PNAME(mux_vip_out_p) = { "vip_src", "xin24m" }; -+PNAME(mux_usbphy480m_p) = { "usbotg_out", "xin24m" }; -+PNAME(mux_hsic_usbphy480m_p) = { "usbotg_out", "dummy" }; -+PNAME(mux_hsicphy480m_p) = { "cpll", "gpll", "usbphy_480m" }; -+PNAME(mux_uart0_p) = { "uart0_src", "uart0_frac", "xin24m" }; -+PNAME(mux_uart1_p) = { "uart1_src", "uart1_frac", "xin24m" }; -+PNAME(mux_uart2_p) = { "uart2_src", "xin24m" }; -+PNAME(mux_uart3_p) = { "uart3_src", "uart3_frac", "xin24m" }; -+PNAME(mux_uart4_p) = { "uart4_src", "uart4_frac", "xin24m" }; -+PNAME(mux_mac_p) = { "mac_pll_src", "ext_gmac" }; -+PNAME(mux_mmc_src_p) = { "cpll", "gpll", "usbphy_480m", "xin24m" }; -+ -+static struct rockchip_pll_clock rk3368_pll_clks[] __initdata = { -+ [apllb] = PLL(pll_rk3066, PLL_APLLB, "apllb", mux_pll_p, 0, RK3368_PLL_CON(0), -+ RK3368_PLL_CON(3), 8, 1, 0, rk3368_pll_rates), -+ [aplll] = PLL(pll_rk3066, PLL_APLLL, "aplll", mux_pll_p, 0, RK3368_PLL_CON(4), -+ RK3368_PLL_CON(7), 8, 0, 0, rk3368_pll_rates), -+ [dpll] = PLL(pll_rk3066, PLL_DPLL, "dpll", mux_pll_p, 0, RK3368_PLL_CON(8), -+ RK3368_PLL_CON(11), 8, 2, 0, NULL), -+ [cpll] = PLL(pll_rk3066, PLL_CPLL, "cpll", mux_pll_p, 0, RK3368_PLL_CON(12), -+ RK3368_PLL_CON(15), 8, 3, ROCKCHIP_PLL_SYNC_RATE, rk3368_pll_rates), -+ [gpll] = PLL(pll_rk3066, PLL_GPLL, "gpll", mux_pll_p, 0, RK3368_PLL_CON(16), -+ RK3368_PLL_CON(19), 8, 4, ROCKCHIP_PLL_SYNC_RATE, rk3368_pll_rates), -+ [npll] = PLL(pll_rk3066, PLL_NPLL, "npll", mux_pll_p, 0, RK3368_PLL_CON(20), -+ RK3368_PLL_CON(23), 8, 5, 0, rk3368_npll_rates), -+}; ++#define RK3036_CPUCLK_RATE(_prate, _core_periph_div) \ ++ { \ ++ .prate = _prate, \ ++ .divs = { \ ++ RK3036_CLKSEL1(_core_periph_div), \ ++ }, \ ++ } + -+static struct clk_div_table div_ddrphy_t[] = { -+ { .val = 0, .div = 1 }, -+ { .val = 1, .div = 2 }, -+ { .val = 3, .div = 4 }, -+ { /* sentinel */ }, ++static struct rockchip_cpuclk_rate_table rk3036_cpuclk_rates[] __initdata = { ++ RK3036_CPUCLK_RATE(1200000000, 4), ++ RK3036_CPUCLK_RATE(1008000000, 4), ++ RK3036_CPUCLK_RATE(816000000, 4), ++ RK3036_CPUCLK_RATE(600000000, 4), ++ RK3036_CPUCLK_RATE(408000000, 4), ++ RK3036_CPUCLK_RATE(312000000, 4), +}; + -+#define MFLAGS CLK_MUX_HIWORD_MASK -+#define DFLAGS CLK_DIVIDER_HIWORD_MASK -+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) -+#define IFLAGS ROCKCHIP_INVERTER_HIWORD_MASK -+ -+static const struct rockchip_cpuclk_reg_data rk3368_cpuclkb_data = { -+ .core_reg[0] = RK3368_CLKSEL_CON(0), ++static const struct rockchip_cpuclk_reg_data rk3036_cpuclk_data = { ++ .core_reg[0] = RK2928_CLKSEL_CON(0), + .div_core_shift[0] = 0, + .div_core_mask[0] = 0x1f, + .num_cores = 1, @@ -50730,704 +51297,337 @@ index 000000000..3ddff548e + .mux_core_mask = 0x1, +}; + -+static const struct rockchip_cpuclk_reg_data rk3368_cpuclkl_data = { -+ .core_reg[0] = RK3368_CLKSEL_CON(2), -+ .div_core_shift[0] = 0, -+ .mux_core_alt = 1, -+ .num_cores = 1, -+ .mux_core_main = 0, -+ .div_core_mask[0] = 0x1f, -+ .mux_core_shift = 7, -+ .mux_core_mask = 0x1, -+}; -+ -+#define RK3368_DIV_ACLKM_MASK 0x1f -+#define RK3368_DIV_ACLKM_SHIFT 8 -+#define RK3368_DIV_ATCLK_MASK 0x1f -+#define RK3368_DIV_ATCLK_SHIFT 0 -+#define RK3368_DIV_PCLK_DBG_MASK 0x1f -+#define RK3368_DIV_PCLK_DBG_SHIFT 8 ++PNAME(mux_pll_p) = { "xin24m", "xin24m" }; + -+#define RK3368_CLKSEL0(_offs, _aclkm) \ -+ { \ -+ .reg = RK3368_CLKSEL_CON(0 + _offs), \ -+ .val = HIWORD_UPDATE(_aclkm, RK3368_DIV_ACLKM_MASK, \ -+ RK3368_DIV_ACLKM_SHIFT), \ -+ } -+#define RK3368_CLKSEL1(_offs, _atclk, _pdbg) \ -+ { \ -+ .reg = RK3368_CLKSEL_CON(1 + _offs), \ -+ .val = HIWORD_UPDATE(_atclk, RK3368_DIV_ATCLK_MASK, \ -+ RK3368_DIV_ATCLK_SHIFT) | \ -+ HIWORD_UPDATE(_pdbg, RK3368_DIV_PCLK_DBG_MASK, \ -+ RK3368_DIV_PCLK_DBG_SHIFT), \ -+ } ++PNAME(mux_busclk_p) = { "dummy_apll", "dpll_cpu", "gpll_cpu" }; ++PNAME(mux_ddrphy_p) = { "dpll_ddr", "gpll_ddr" }; ++PNAME(mux_pll_src_apll_dpll_gpll_p) = { "apll", "dpll", "gpll" }; ++PNAME(mux_pll_src_dmyapll_dpll_gpll_p) = { "dummy_apll", "dpll", "gpll" }; + -+/* cluster_b: aclkm in clksel0, rest in clksel1 */ -+#define RK3368_CPUCLKB_RATE(_prate, _aclkm, _atclk, _pdbg) \ -+ { \ -+ .prate = _prate, \ -+ .divs = { \ -+ RK3368_CLKSEL0(0, _aclkm), \ -+ RK3368_CLKSEL1(0, _atclk, _pdbg), \ -+ }, \ -+ } ++PNAME(mux_timer_p) = { "xin24m", "pclk_peri_src" }; + -+/* cluster_l: aclkm in clksel2, rest in clksel3 */ -+#define RK3368_CPUCLKL_RATE(_prate, _aclkm, _atclk, _pdbg) \ -+ { \ -+ .prate = _prate, \ -+ .divs = { \ -+ RK3368_CLKSEL0(2, _aclkm), \ -+ RK3368_CLKSEL1(2, _atclk, _pdbg), \ -+ }, \ -+ } ++PNAME(mux_pll_src_dmyapll_dpll_gpll_usb480m_p) = { "dummy_apll", "dpll", "gpll", "usb480m" }; ++PNAME(mux_pll_src_dmyapll_dpll_gpll_xin24_p) = { "dummy_apll", "dpll", "gpll", "xin24m" }; + -+static struct rockchip_cpuclk_rate_table rk3368_cpuclkb_rates[] __initdata = { -+ RK3368_CPUCLKB_RATE(1512000000, 1, 5, 5), -+ RK3368_CPUCLKB_RATE(1488000000, 1, 4, 4), -+ RK3368_CPUCLKB_RATE(1416000000, 1, 4, 4), -+ RK3368_CPUCLKB_RATE(1200000000, 1, 3, 3), -+ RK3368_CPUCLKB_RATE(1008000000, 1, 3, 3), -+ RK3368_CPUCLKB_RATE( 816000000, 1, 2, 2), -+ RK3368_CPUCLKB_RATE( 696000000, 1, 2, 2), -+ RK3368_CPUCLKB_RATE( 600000000, 1, 1, 1), -+ RK3368_CPUCLKB_RATE( 408000000, 1, 1, 1), -+ RK3368_CPUCLKB_RATE( 312000000, 1, 1, 1), -+}; ++PNAME(mux_mmc_src_p) = { "dummy_apll", "dpll", "gpll", "xin24m" }; ++PNAME(mux_i2s_pre_p) = { "i2s_src", "i2s_frac", "ext_i2s", "xin12m" }; ++PNAME(mux_i2s_clkout_p) = { "i2s_pre", "xin12m" }; ++PNAME(mux_spdif_p) = { "spdif_src", "spdif_frac", "xin12m" }; ++PNAME(mux_uart0_p) = { "uart0_src", "uart0_frac", "xin24m" }; ++PNAME(mux_uart1_p) = { "uart1_src", "uart1_frac", "xin24m" }; ++PNAME(mux_uart2_p) = { "uart2_src", "uart2_frac", "xin24m" }; ++PNAME(mux_mac_p) = { "mac_pll_src", "rmii_clkin" }; ++PNAME(mux_dclk_p) = { "dclk_lcdc", "dclk_cru" }; + -+static struct rockchip_cpuclk_rate_table rk3368_cpuclkl_rates[] __initdata = { -+ RK3368_CPUCLKL_RATE(1512000000, 1, 6, 6), -+ RK3368_CPUCLKL_RATE(1488000000, 1, 5, 5), -+ RK3368_CPUCLKL_RATE(1416000000, 1, 5, 5), -+ RK3368_CPUCLKL_RATE(1200000000, 1, 4, 4), -+ RK3368_CPUCLKL_RATE(1008000000, 1, 4, 4), -+ RK3368_CPUCLKL_RATE( 816000000, 1, 3, 3), -+ RK3368_CPUCLKL_RATE( 696000000, 1, 2, 2), -+ RK3368_CPUCLKL_RATE( 600000000, 1, 2, 2), -+ RK3368_CPUCLKL_RATE( 408000000, 1, 1, 1), -+ RK3368_CPUCLKL_RATE( 312000000, 1, 1, 1), ++static struct rockchip_pll_clock rk3036_pll_clks[] __initdata = { ++ [apll] = PLL(pll_rk3036, PLL_APLL, "apll", mux_pll_p, 0, RK2928_PLL_CON(0), ++ RK2928_MODE_CON, 0, 5, 0, rk3036_pll_rates), ++ [dpll] = PLL(pll_rk3036, PLL_DPLL, "dpll", mux_pll_p, 0, RK2928_PLL_CON(4), ++ RK2928_MODE_CON, 4, 4, 0, NULL), ++ [gpll] = PLL(pll_rk3036, PLL_GPLL, "gpll", mux_pll_p, 0, RK2928_PLL_CON(12), ++ RK2928_MODE_CON, 12, 6, ROCKCHIP_PLL_SYNC_RATE, rk3036_pll_rates), +}; + -+static struct rockchip_clk_branch rk3368_i2s_8ch_fracmux __initdata = -+ MUX(0, "i2s_8ch_pre", mux_i2s_8ch_pre_p, CLK_SET_RATE_PARENT, -+ RK3368_CLKSEL_CON(27), 8, 2, MFLAGS); -+ -+static struct rockchip_clk_branch rk3368_spdif_8ch_fracmux __initdata = -+ MUX(0, "spdif_8ch_pre", mux_spdif_8ch_p, CLK_SET_RATE_PARENT, -+ RK3368_CLKSEL_CON(31), 8, 2, MFLAGS); -+ -+static struct rockchip_clk_branch rk3368_i2s_2ch_fracmux __initdata = -+ MUX(0, "i2s_2ch_pre", mux_i2s_2ch_p, CLK_SET_RATE_PARENT, -+ RK3368_CLKSEL_CON(53), 8, 2, MFLAGS); ++#define MFLAGS CLK_MUX_HIWORD_MASK ++#define DFLAGS CLK_DIVIDER_HIWORD_MASK ++#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) + -+static struct rockchip_clk_branch rk3368_uart0_fracmux __initdata = ++static struct rockchip_clk_branch rk3036_uart0_fracmux __initdata = + MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT, -+ RK3368_CLKSEL_CON(33), 8, 2, MFLAGS); ++ RK2928_CLKSEL_CON(13), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3368_uart1_fracmux __initdata = ++static struct rockchip_clk_branch rk3036_uart1_fracmux __initdata = + MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT, -+ RK3368_CLKSEL_CON(35), 8, 2, MFLAGS); -+ -+static struct rockchip_clk_branch rk3368_uart3_fracmux __initdata = -+ MUX(SCLK_UART3, "sclk_uart3", mux_uart3_p, CLK_SET_RATE_PARENT, -+ RK3368_CLKSEL_CON(39), 8, 2, MFLAGS); -+ -+static struct rockchip_clk_branch rk3368_uart4_fracmux __initdata = -+ MUX(SCLK_UART4, "sclk_uart4", mux_uart4_p, CLK_SET_RATE_PARENT, -+ RK3368_CLKSEL_CON(41), 8, 2, MFLAGS); -+ -+static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = { -+ /* -+ * Clock-Architecture Diagram 2 -+ */ -+ -+ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), -+ -+ MUX(SCLK_USBPHY480M, "usbphy_480m", mux_usbphy480m_p, CLK_SET_RATE_PARENT, -+ RK3368_CLKSEL_CON(13), 8, 1, MFLAGS), -+ -+ GATE(0, "apllb_core", "apllb", CLK_IGNORE_UNUSED, -+ RK3368_CLKGATE_CON(0), 0, GFLAGS), -+ GATE(0, "gpllb_core", "gpll", CLK_IGNORE_UNUSED, -+ RK3368_CLKGATE_CON(0), 1, GFLAGS), -+ -+ GATE(0, "aplll_core", "aplll", CLK_IGNORE_UNUSED, -+ RK3368_CLKGATE_CON(0), 4, GFLAGS), -+ GATE(0, "gplll_core", "gpll", CLK_IGNORE_UNUSED, -+ RK3368_CLKGATE_CON(0), 5, GFLAGS), -+ -+ DIV(0, "aclkm_core_b", "armclkb", 0, -+ RK3368_CLKSEL_CON(0), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY), -+ DIV(0, "atclk_core_b", "armclkb", 0, -+ RK3368_CLKSEL_CON(1), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY), -+ DIV(0, "pclk_dbg_b", "armclkb", 0, -+ RK3368_CLKSEL_CON(1), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY), -+ -+ DIV(0, "aclkm_core_l", "armclkl", 0, -+ RK3368_CLKSEL_CON(2), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY), -+ DIV(0, "atclk_core_l", "armclkl", 0, -+ RK3368_CLKSEL_CON(3), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY), -+ DIV(0, "pclk_dbg_l", "armclkl", 0, -+ RK3368_CLKSEL_CON(3), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY), -+ -+ GATE(0, "apllb_cs", "apllb", CLK_IGNORE_UNUSED, -+ RK3368_CLKGATE_CON(0), 9, GFLAGS), -+ GATE(0, "aplll_cs", "aplll", CLK_IGNORE_UNUSED, -+ RK3368_CLKGATE_CON(0), 10, GFLAGS), -+ GATE(0, "gpll_cs", "gpll", CLK_IGNORE_UNUSED, -+ RK3368_CLKGATE_CON(0), 8, GFLAGS), -+ COMPOSITE_NOGATE(0, "sclk_cs_pre", mux_cs_src_p, CLK_IGNORE_UNUSED, -+ RK3368_CLKSEL_CON(4), 6, 2, MFLAGS, 0, 5, DFLAGS), -+ COMPOSITE_NOMUX(0, "clkin_trace", "sclk_cs_pre", CLK_IGNORE_UNUSED, -+ RK3368_CLKSEL_CON(4), 8, 5, DFLAGS, -+ RK3368_CLKGATE_CON(0), 13, GFLAGS), -+ -+ COMPOSITE(ACLK_CCI_PRE, "aclk_cci_pre", mux_pll_src_cpll_gpll_usb_npll_p, CLK_IGNORE_UNUSED, -+ RK3368_CLKSEL_CON(5), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3368_CLKGATE_CON(0), 12, GFLAGS), -+ GATE(SCLK_PVTM_CORE, "sclk_pvtm_core", "xin24m", 0, RK3368_CLKGATE_CON(7), 10, GFLAGS), -+ -+ GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED, -+ RK3368_CLKGATE_CON(1), 8, GFLAGS), -+ GATE(0, "gpll_ddr", "gpll", 0, -+ RK3368_CLKGATE_CON(1), 9, GFLAGS), -+ COMPOSITE_NOGATE_DIVTBL(0, "ddrphy_src", mux_ddrphy_p, CLK_IGNORE_UNUSED, -+ RK3368_CLKSEL_CON(13), 4, 1, MFLAGS, 0, 2, DFLAGS, div_ddrphy_t), ++ RK2928_CLKSEL_CON(14), 8, 2, MFLAGS); + -+ FACTOR_GATE(0, "sclk_ddr", "ddrphy_src", CLK_IGNORE_UNUSED, 1, 4, -+ RK3368_CLKGATE_CON(6), 14, GFLAGS), -+ GATE(0, "sclk_ddr4x", "ddrphy_src", CLK_IGNORE_UNUSED, -+ RK3368_CLKGATE_CON(6), 15, GFLAGS), ++static struct rockchip_clk_branch rk3036_uart2_fracmux __initdata = ++ MUX(SCLK_UART2, "sclk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(15), 8, 2, MFLAGS); + -+ GATE(0, "gpll_aclk_bus", "gpll", CLK_IS_CRITICAL, -+ RK3368_CLKGATE_CON(1), 10, GFLAGS), -+ GATE(0, "cpll_aclk_bus", "cpll", CLK_IS_CRITICAL, -+ RK3368_CLKGATE_CON(1), 11, GFLAGS), -+ COMPOSITE_NOGATE(0, "aclk_bus_src", mux_aclk_bus_src_p, CLK_IS_CRITICAL, -+ RK3368_CLKSEL_CON(8), 7, 1, MFLAGS, 0, 5, DFLAGS), ++static struct rockchip_clk_branch rk3036_i2s_fracmux __initdata = ++ MUX(SCLK_I2S_PRE, "i2s_pre", mux_i2s_pre_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(3), 8, 2, MFLAGS); + -+ GATE(ACLK_BUS, "aclk_bus", "aclk_bus_src", CLK_IS_CRITICAL, -+ RK3368_CLKGATE_CON(1), 0, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_BUS, "pclk_bus", "aclk_bus_src", CLK_IS_CRITICAL, -+ RK3368_CLKSEL_CON(8), 12, 3, DFLAGS, -+ RK3368_CLKGATE_CON(1), 2, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_BUS, "hclk_bus", "aclk_bus_src", CLK_IS_CRITICAL, -+ RK3368_CLKSEL_CON(8), 8, 2, DFLAGS, -+ RK3368_CLKGATE_CON(1), 1, GFLAGS), -+ COMPOSITE_NOMUX(0, "sclk_crypto", "aclk_bus_src", 0, -+ RK3368_CLKSEL_CON(10), 14, 2, DFLAGS, -+ RK3368_CLKGATE_CON(7), 2, GFLAGS), ++static struct rockchip_clk_branch rk3036_spdif_fracmux __initdata = ++ MUX(SCLK_SPDIF, "sclk_spdif", mux_spdif_p, 0, ++ RK2928_CLKSEL_CON(5), 8, 2, MFLAGS); + -+ COMPOSITE(0, "fclk_mcu_src", mux_pll_src_cpll_gpll_p, CLK_IGNORE_UNUSED, -+ RK3368_CLKSEL_CON(12), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3368_CLKGATE_CON(1), 3, GFLAGS), ++static struct rockchip_clk_branch rk3036_clk_branches[] __initdata = { + /* -+ * stclk_mcu is listed as child of fclk_mcu_src in diagram 5, -+ * but stclk_mcu has an additional own divider in diagram 2 ++ * Clock-Architecture Diagram 1 + */ -+ COMPOSITE_NOMUX(0, "stclk_mcu", "fclk_mcu_src", CLK_IGNORE_UNUSED, -+ RK3368_CLKSEL_CON(12), 8, 3, DFLAGS, -+ RK3368_CLKGATE_CON(13), 13, GFLAGS), -+ -+ COMPOSITE(0, "i2s_8ch_src", mux_pll_src_cpll_gpll_p, 0, -+ RK3368_CLKSEL_CON(27), 12, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3368_CLKGATE_CON(6), 1, GFLAGS), -+ COMPOSITE_FRACMUX(0, "i2s_8ch_frac", "i2s_8ch_src", CLK_SET_RATE_PARENT, -+ RK3368_CLKSEL_CON(28), 0, -+ RK3368_CLKGATE_CON(6), 2, GFLAGS, -+ &rk3368_i2s_8ch_fracmux), -+ COMPOSITE_NODIV(SCLK_I2S_8CH_OUT, "i2s_8ch_clkout", mux_i2s_8ch_clkout_p, 0, -+ RK3368_CLKSEL_CON(27), 15, 1, MFLAGS, -+ RK3368_CLKGATE_CON(6), 0, GFLAGS), -+ GATE(SCLK_I2S_8CH, "sclk_i2s_8ch", "i2s_8ch_pre", CLK_SET_RATE_PARENT, -+ RK3368_CLKGATE_CON(6), 3, GFLAGS), -+ COMPOSITE(0, "spdif_8ch_src", mux_pll_src_cpll_gpll_p, 0, -+ RK3368_CLKSEL_CON(31), 12, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3368_CLKGATE_CON(6), 4, GFLAGS), -+ COMPOSITE_FRACMUX(0, "spdif_8ch_frac", "spdif_8ch_src", CLK_SET_RATE_PARENT, -+ RK3368_CLKSEL_CON(32), 0, -+ RK3368_CLKGATE_CON(6), 5, GFLAGS, -+ &rk3368_spdif_8ch_fracmux), -+ GATE(SCLK_SPDIF_8CH, "sclk_spdif_8ch", "spdif_8ch_pre", CLK_SET_RATE_PARENT, -+ RK3368_CLKGATE_CON(6), 6, GFLAGS), -+ COMPOSITE(0, "i2s_2ch_src", mux_pll_src_cpll_gpll_p, 0, -+ RK3368_CLKSEL_CON(53), 12, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3368_CLKGATE_CON(5), 13, GFLAGS), -+ COMPOSITE_FRACMUX(0, "i2s_2ch_frac", "i2s_2ch_src", CLK_SET_RATE_PARENT, -+ RK3368_CLKSEL_CON(54), 0, -+ RK3368_CLKGATE_CON(5), 14, GFLAGS, -+ &rk3368_i2s_2ch_fracmux), -+ GATE(SCLK_I2S_2CH, "sclk_i2s_2ch", "i2s_2ch_pre", CLK_SET_RATE_PARENT, -+ RK3368_CLKGATE_CON(5), 15, GFLAGS), -+ -+ COMPOSITE(0, "sclk_tsp", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3368_CLKSEL_CON(46), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3368_CLKGATE_CON(6), 12, GFLAGS), -+ GATE(0, "sclk_hsadc_tsp", "ext_hsadc_tsp", 0, -+ RK3368_CLKGATE_CON(13), 7, GFLAGS), -+ -+ MUX(0, "uart_src", mux_pll_src_cpll_gpll_p, 0, -+ RK3368_CLKSEL_CON(35), 12, 1, MFLAGS), -+ COMPOSITE_NOMUX(0, "uart2_src", "uart_src", 0, -+ RK3368_CLKSEL_CON(37), 0, 7, DFLAGS, -+ RK3368_CLKGATE_CON(2), 4, GFLAGS), -+ MUX(SCLK_UART2, "sclk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT, -+ RK3368_CLKSEL_CON(37), 8, 1, MFLAGS), + -+ /* -+ * Clock-Architecture Diagram 3 -+ */ ++ GATE(0, "gpll_armclk", "gpll", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(0), 6, GFLAGS), + -+ COMPOSITE(0, "aclk_vepu", mux_pll_src_cpll_gpll_npll_usb_p, 0, -+ RK3368_CLKSEL_CON(15), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3368_CLKGATE_CON(4), 6, GFLAGS), -+ COMPOSITE(0, "aclk_vdpu", mux_pll_src_cpll_gpll_npll_usb_p, 0, -+ RK3368_CLKSEL_CON(15), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3368_CLKGATE_CON(4), 7, GFLAGS), ++ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), + + /* -+ * We use aclk_vdpu by default ---GRF_SOC_CON0[7] setting in system, -+ * so we ignore the mux and make clocks nodes as following, ++ * Clock-Architecture Diagram 2 + */ -+ FACTOR_GATE(0, "hclk_video_pre", "aclk_vdpu", 0, 1, 4, -+ RK3368_CLKGATE_CON(4), 8, GFLAGS), -+ -+ COMPOSITE(0, "sclk_hevc_cabac_src", mux_pll_src_cpll_gpll_npll_usb_p, 0, -+ RK3368_CLKSEL_CON(17), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3368_CLKGATE_CON(5), 1, GFLAGS), -+ COMPOSITE(0, "sclk_hevc_core_src", mux_pll_src_cpll_gpll_npll_usb_p, 0, -+ RK3368_CLKSEL_CON(17), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3368_CLKGATE_CON(5), 2, GFLAGS), -+ -+ COMPOSITE(0, "aclk_vio0", mux_pll_src_cpll_gpll_usb_p, CLK_IGNORE_UNUSED, -+ RK3368_CLKSEL_CON(19), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3368_CLKGATE_CON(4), 0, GFLAGS), -+ DIV(0, "hclk_vio", "aclk_vio0", 0, -+ RK3368_CLKSEL_CON(21), 0, 5, DFLAGS), -+ -+ COMPOSITE(0, "aclk_rga_pre", mux_pll_src_cpll_gpll_usb_p, 0, -+ RK3368_CLKSEL_CON(18), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3368_CLKGATE_CON(4), 3, GFLAGS), -+ COMPOSITE(SCLK_RGA, "sclk_rga", mux_pll_src_cpll_gpll_usb_p, 0, -+ RK3368_CLKSEL_CON(18), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3368_CLKGATE_CON(4), 4, GFLAGS), -+ -+ COMPOSITE(DCLK_VOP, "dclk_vop", mux_pll_src_dmycpll_dmygpll_npll_p, CLK_SET_RATE_PARENT, -+ RK3368_CLKSEL_CON(20), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3368_CLKGATE_CON(4), 1, GFLAGS), -+ -+ GATE(SCLK_VOP0_PWM, "sclk_vop0_pwm", "xin24m", 0, -+ RK3368_CLKGATE_CON(4), 2, GFLAGS), -+ -+ COMPOSITE(SCLK_ISP, "sclk_isp", mux_pll_src_cpll_gpll_npll_npll_p, 0, -+ RK3368_CLKSEL_CON(22), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3368_CLKGATE_CON(4), 9, GFLAGS), -+ -+ GATE(0, "pclk_isp_in", "ext_isp", 0, -+ RK3368_CLKGATE_CON(17), 2, GFLAGS), -+ INVERTER(PCLK_ISP, "pclk_isp", "pclk_isp_in", -+ RK3368_CLKSEL_CON(21), 6, IFLAGS), -+ -+ GATE(0, "pclk_vip_in", "ext_vip", 0, -+ RK3368_CLKGATE_CON(16), 13, GFLAGS), -+ INVERTER(PCLK_VIP, "pclk_vip", "pclk_vip_in", -+ RK3368_CLKSEL_CON(21), 13, IFLAGS), -+ -+ GATE(SCLK_HDMI_HDCP, "sclk_hdmi_hdcp", "xin24m", 0, -+ RK3368_CLKGATE_CON(4), 13, GFLAGS), -+ GATE(SCLK_HDMI_CEC, "sclk_hdmi_cec", "xin32k", 0, -+ RK3368_CLKGATE_CON(4), 12, GFLAGS), -+ -+ COMPOSITE_NODIV(0, "vip_src", mux_pll_src_cpll_gpll_p, 0, -+ RK3368_CLKSEL_CON(21), 15, 1, MFLAGS, -+ RK3368_CLKGATE_CON(4), 5, GFLAGS), -+ COMPOSITE_NOGATE(SCLK_VIP_OUT, "sclk_vip_out", mux_vip_out_p, 0, -+ RK3368_CLKSEL_CON(21), 14, 1, MFLAGS, 8, 5, DFLAGS), + -+ COMPOSITE_NODIV(SCLK_EDP_24M, "sclk_edp_24m", mux_edp_24m_p, 0, -+ RK3368_CLKSEL_CON(23), 8, 1, MFLAGS, -+ RK3368_CLKGATE_CON(5), 4, GFLAGS), -+ COMPOSITE(SCLK_EDP, "sclk_edp", mux_pll_src_cpll_gpll_npll_npll_p, 0, -+ RK3368_CLKSEL_CON(23), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3368_CLKGATE_CON(5), 3, GFLAGS), -+ -+ COMPOSITE(SCLK_HDCP, "sclk_hdcp", mux_pll_src_cpll_gpll_npll_npll_p, 0, -+ RK3368_CLKSEL_CON(55), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3368_CLKGATE_CON(5), 5, GFLAGS), ++ GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(0), 2, GFLAGS), ++ GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(0), 8, GFLAGS), ++ COMPOSITE_NOGATE(0, "ddrphy2x", mux_ddrphy_p, CLK_IGNORE_UNUSED, ++ RK2928_CLKSEL_CON(26), 8, 1, MFLAGS, 0, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO), ++ FACTOR(0, "ddrphy", "ddrphy2x", 0, 1, 2), + -+ DIV(0, "pclk_pd_alive", "gpll", CLK_IS_CRITICAL, -+ RK3368_CLKSEL_CON(10), 8, 5, DFLAGS), ++ COMPOSITE_NOMUX(0, "pclk_dbg", "armclk", CLK_IGNORE_UNUSED, ++ RK2928_CLKSEL_CON(1), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK2928_CLKGATE_CON(0), 7, GFLAGS), ++ COMPOSITE_NOMUX(0, "aclk_core_pre", "armclk", CLK_IGNORE_UNUSED, ++ RK2928_CLKSEL_CON(1), 4, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK2928_CLKGATE_CON(0), 7, GFLAGS), + -+ /* sclk_timer has a gate in the sgrf */ ++ GATE(0, "dpll_cpu", "dpll", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(10), 8, GFLAGS), ++ GATE(0, "gpll_cpu", "gpll", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NOGATE(0, "aclk_cpu_src", mux_busclk_p, CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(0), 14, 2, MFLAGS, 8, 5, DFLAGS), ++ GATE(ACLK_CPU, "aclk_cpu", "aclk_cpu_src", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(0), 3, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_CPU, "pclk_cpu", "aclk_cpu_src", CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(1), 12, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK2928_CLKGATE_CON(0), 5, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_CPU, "hclk_cpu", "aclk_cpu_src", CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(1), 8, 2, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK2928_CLKGATE_CON(0), 4, GFLAGS), + -+ COMPOSITE_NOMUX(0, "pclk_pd_pmu", "gpll", CLK_IS_CRITICAL, -+ RK3368_CLKSEL_CON(10), 0, 5, DFLAGS, -+ RK3368_CLKGATE_CON(7), 9, GFLAGS), -+ GATE(SCLK_PVTM_PMU, "sclk_pvtm_pmu", "xin24m", 0, -+ RK3368_CLKGATE_CON(7), 3, GFLAGS), -+ COMPOSITE(0, "sclk_gpu_core_src", mux_pll_src_cpll_gpll_usb_npll_p, 0, -+ RK3368_CLKSEL_CON(14), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3368_CLKGATE_CON(4), 11, GFLAGS), -+ MUX(0, "aclk_gpu_src", mux_pll_src_cpll_gpll_p, 0, -+ RK3368_CLKSEL_CON(14), 14, 1, MFLAGS), -+ COMPOSITE_NOMUX(0, "aclk_gpu_mem_pre", "aclk_gpu_src", 0, -+ RK3368_CLKSEL_CON(14), 8, 5, DFLAGS, -+ RK3368_CLKGATE_CON(5), 8, GFLAGS), -+ COMPOSITE_NOMUX(0, "aclk_gpu_cfg_pre", "aclk_gpu_src", 0, -+ RK3368_CLKSEL_CON(16), 8, 5, DFLAGS, -+ RK3368_CLKGATE_CON(5), 9, GFLAGS), -+ GATE(SCLK_PVTM_GPU, "sclk_pvtm_gpu", "xin24m", 0, -+ RK3368_CLKGATE_CON(7), 11, GFLAGS), ++ COMPOSITE(0, "aclk_peri_src", mux_pll_src_dmyapll_dpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(10), 14, 2, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(2), 0, GFLAGS), + -+ COMPOSITE(0, "aclk_peri_src", mux_pll_src_cpll_gpll_p, CLK_IS_CRITICAL, -+ RK3368_CLKSEL_CON(9), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3368_CLKGATE_CON(3), 0, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, -+ RK3368_CLKSEL_CON(9), 12, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, -+ RK3368_CLKGATE_CON(3), 3, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, -+ RK3368_CLKSEL_CON(9), 8, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, -+ RK3368_CLKGATE_CON(3), 2, GFLAGS), + GATE(ACLK_PERI, "aclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, -+ RK3368_CLKGATE_CON(3), 1, GFLAGS), ++ RK2928_CLKGATE_CON(2), 1, GFLAGS), ++ DIV(0, "pclk_peri_src", "aclk_peri_src", CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(10), 12, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO), ++ GATE(PCLK_PERI, "pclk_peri", "pclk_peri_src", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(2), 3, GFLAGS), ++ DIV(0, "hclk_peri_src", "aclk_peri_src", CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(10), 8, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO), ++ GATE(HCLK_PERI, "hclk_peri", "hclk_peri_src", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(2), 2, GFLAGS), + -+ GATE(0, "sclk_mipidsi_24m", "xin24m", 0, RK3368_CLKGATE_CON(4), 14, GFLAGS), ++ COMPOSITE_NODIV(SCLK_TIMER0, "sclk_timer0", mux_timer_p, CLK_IGNORE_UNUSED, ++ RK2928_CLKSEL_CON(2), 4, 1, MFLAGS, ++ RK2928_CLKGATE_CON(1), 0, GFLAGS), ++ COMPOSITE_NODIV(SCLK_TIMER1, "sclk_timer1", mux_timer_p, CLK_IGNORE_UNUSED, ++ RK2928_CLKSEL_CON(2), 5, 1, MFLAGS, ++ RK2928_CLKGATE_CON(1), 1, GFLAGS), ++ COMPOSITE_NODIV(SCLK_TIMER2, "sclk_timer2", mux_timer_p, CLK_IGNORE_UNUSED, ++ RK2928_CLKSEL_CON(2), 6, 1, MFLAGS, ++ RK2928_CLKGATE_CON(2), 4, GFLAGS), ++ COMPOSITE_NODIV(SCLK_TIMER3, "sclk_timer3", mux_timer_p, CLK_IGNORE_UNUSED, ++ RK2928_CLKSEL_CON(2), 7, 1, MFLAGS, ++ RK2928_CLKGATE_CON(2), 5, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 4 -+ */ ++ MUX(0, "uart_pll_clk", mux_pll_src_dmyapll_dpll_gpll_usb480m_p, 0, ++ RK2928_CLKSEL_CON(13), 10, 2, MFLAGS), ++ COMPOSITE_NOMUX(0, "uart0_src", "uart_pll_clk", 0, ++ RK2928_CLKSEL_CON(13), 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(1), 8, GFLAGS), ++ COMPOSITE_NOMUX(0, "uart1_src", "uart_pll_clk", 0, ++ RK2928_CLKSEL_CON(14), 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(1), 10, GFLAGS), ++ COMPOSITE_NOMUX(0, "uart2_src", "uart_pll_clk", 0, ++ RK2928_CLKSEL_CON(15), 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(1), 12, GFLAGS), ++ COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(17), 0, ++ RK2928_CLKGATE_CON(1), 9, GFLAGS, ++ &rk3036_uart0_fracmux), ++ COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_src", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(18), 0, ++ RK2928_CLKGATE_CON(1), 11, GFLAGS, ++ &rk3036_uart1_fracmux), ++ COMPOSITE_FRACMUX(0, "uart2_frac", "uart2_src", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(19), 0, ++ RK2928_CLKGATE_CON(1), 13, GFLAGS, ++ &rk3036_uart2_fracmux), + -+ COMPOSITE(SCLK_SPI0, "sclk_spi0", mux_pll_src_cpll_gpll_p, 0, -+ RK3368_CLKSEL_CON(45), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3368_CLKGATE_CON(3), 7, GFLAGS), -+ COMPOSITE(SCLK_SPI1, "sclk_spi1", mux_pll_src_cpll_gpll_p, 0, -+ RK3368_CLKSEL_CON(45), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK3368_CLKGATE_CON(3), 8, GFLAGS), -+ COMPOSITE(SCLK_SPI2, "sclk_spi2", mux_pll_src_cpll_gpll_p, 0, -+ RK3368_CLKSEL_CON(46), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK3368_CLKGATE_CON(3), 9, GFLAGS), ++ COMPOSITE(ACLK_VCODEC, "aclk_vcodec", mux_pll_src_dmyapll_dpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(32), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 11, GFLAGS), ++ FACTOR_GATE(HCLK_VCODEC, "hclk_vcodec", "aclk_vcodec", 0, 1, 4, ++ RK2928_CLKGATE_CON(3), 12, GFLAGS), + ++ COMPOSITE(ACLK_HEVC, "aclk_hevc", mux_pll_src_dmyapll_dpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(20), 0, 2, MFLAGS, 2, 5, DFLAGS, ++ RK2928_CLKGATE_CON(10), 6, GFLAGS), + -+ COMPOSITE(SCLK_SDMMC, "sclk_sdmmc", mux_mmc_src_p, 0, -+ RK3368_CLKSEL_CON(50), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3368_CLKGATE_CON(7), 12, GFLAGS), -+ COMPOSITE(SCLK_SDIO0, "sclk_sdio0", mux_mmc_src_p, 0, -+ RK3368_CLKSEL_CON(48), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3368_CLKGATE_CON(7), 13, GFLAGS), -+ COMPOSITE(SCLK_EMMC, "sclk_emmc", mux_mmc_src_p, 0, -+ RK3368_CLKSEL_CON(51), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3368_CLKGATE_CON(7), 15, GFLAGS), ++ COMPOSITE(0, "aclk_disp1_pre", mux_pll_src_dmyapll_dpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(31), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(1), 4, GFLAGS), ++ COMPOSITE(0, "hclk_disp_pre", mux_pll_src_dmyapll_dpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(30), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(0), 11, GFLAGS), ++ COMPOSITE(SCLK_LCDC, "dclk_lcdc", mux_pll_src_apll_dpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(28), 0, 2, MFLAGS, 8, 8, DFLAGS, ++ RK2928_CLKGATE_CON(3), 2, GFLAGS), + -+ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "sclk_sdmmc", RK3368_SDMMC_CON0, 1), -+ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RK3368_SDMMC_CON1, 0), ++ COMPOSITE_NODIV(0, "sclk_sdmmc_src", mux_mmc_src_p, 0, ++ RK2928_CLKSEL_CON(12), 8, 2, MFLAGS, ++ RK2928_CLKGATE_CON(2), 11, GFLAGS), ++ DIV(SCLK_SDMMC, "sclk_sdmmc", "sclk_sdmmc_src", 0, ++ RK2928_CLKSEL_CON(11), 0, 7, DFLAGS), + -+ MMC(SCLK_SDIO0_DRV, "sdio0_drv", "sclk_sdio0", RK3368_SDIO0_CON0, 1), -+ MMC(SCLK_SDIO0_SAMPLE, "sdio0_sample", "sclk_sdio0", RK3368_SDIO0_CON1, 0), ++ COMPOSITE_NODIV(0, "sclk_sdio_src", mux_mmc_src_p, 0, ++ RK2928_CLKSEL_CON(12), 10, 2, MFLAGS, ++ RK2928_CLKGATE_CON(2), 13, GFLAGS), ++ DIV(SCLK_SDIO, "sclk_sdio", "sclk_sdio_src", 0, ++ RK2928_CLKSEL_CON(11), 8, 7, DFLAGS), + -+ MMC(SCLK_EMMC_DRV, "emmc_drv", "sclk_emmc", RK3368_EMMC_CON0, 1), -+ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "sclk_emmc", RK3368_EMMC_CON1, 0), ++ COMPOSITE(SCLK_EMMC, "sclk_emmc", mux_mmc_src_p, 0, ++ RK2928_CLKSEL_CON(12), 12, 2, MFLAGS, 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(2), 14, GFLAGS), + -+ GATE(SCLK_OTGPHY0, "sclk_otgphy0", "xin24m", CLK_IGNORE_UNUSED, -+ RK3368_CLKGATE_CON(8), 1, GFLAGS), ++ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "sclk_sdmmc", RK3036_SDMMC_CON0, 1), ++ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RK3036_SDMMC_CON1, 0), + -+ /* pmu_grf_soc_con0[6] allows to select between xin32k and pvtm_pmu */ -+ GATE(SCLK_OTG_ADP, "sclk_otg_adp", "xin32k", CLK_IGNORE_UNUSED, -+ RK3368_CLKGATE_CON(8), 4, GFLAGS), ++ MMC(SCLK_SDIO_DRV, "sdio_drv", "sclk_sdio", RK3036_SDIO_CON0, 1), ++ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "sclk_sdio", RK3036_SDIO_CON1, 0), + -+ /* pmu_grf_soc_con0[6] allows to select between xin32k and pvtm_pmu */ -+ COMPOSITE_NOMUX(SCLK_TSADC, "sclk_tsadc", "xin32k", 0, -+ RK3368_CLKSEL_CON(25), 0, 6, DFLAGS, -+ RK3368_CLKGATE_CON(3), 5, GFLAGS), ++ MMC(SCLK_EMMC_DRV, "emmc_drv", "sclk_emmc", RK3036_EMMC_CON0, 1), ++ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "sclk_emmc", RK3036_EMMC_CON1, 0), + -+ COMPOSITE_NOMUX(SCLK_SARADC, "sclk_saradc", "xin24m", 0, -+ RK3368_CLKSEL_CON(25), 8, 8, DFLAGS, -+ RK3368_CLKGATE_CON(3), 6, GFLAGS), ++ COMPOSITE(0, "i2s_src", mux_pll_src_dmyapll_dpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(3), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(0), 9, GFLAGS), ++ COMPOSITE_FRACMUX(SCLK_I2S_FRAC, "i2s_frac", "i2s_src", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(7), 0, ++ RK2928_CLKGATE_CON(0), 10, GFLAGS, ++ &rk3036_i2s_fracmux), ++ COMPOSITE_NODIV(SCLK_I2S_OUT, "i2s_clkout", mux_i2s_clkout_p, 0, ++ RK2928_CLKSEL_CON(3), 12, 1, MFLAGS, ++ RK2928_CLKGATE_CON(0), 13, GFLAGS), ++ GATE(SCLK_I2S, "sclk_i2s", "i2s_pre", CLK_SET_RATE_PARENT, ++ RK2928_CLKGATE_CON(0), 14, GFLAGS), + -+ COMPOSITE(SCLK_NANDC0, "sclk_nandc0", mux_pll_src_cpll_gpll_p, 0, -+ RK3368_CLKSEL_CON(47), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3368_CLKGATE_CON(7), 8, GFLAGS), ++ COMPOSITE(0, "spdif_src", mux_pll_src_dmyapll_dpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(5), 10, 2, MFLAGS, 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(2), 10, GFLAGS), ++ COMPOSITE_FRACMUX(0, "spdif_frac", "spdif_src", 0, ++ RK2928_CLKSEL_CON(9), 0, ++ RK2928_CLKGATE_CON(2), 12, GFLAGS, ++ &rk3036_spdif_fracmux), + -+ COMPOSITE(SCLK_SFC, "sclk_sfc", mux_pll_src_cpll_gpll_p, 0, -+ RK3368_CLKSEL_CON(52), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3368_CLKGATE_CON(6), 7, GFLAGS), ++ GATE(SCLK_OTGPHY0, "sclk_otgphy0", "xin12m", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(1), 5, GFLAGS), + -+ COMPOSITE(0, "uart0_src", mux_pll_src_cpll_gpll_usb_usb_p, 0, -+ RK3368_CLKSEL_CON(33), 12, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3368_CLKGATE_CON(2), 0, GFLAGS), -+ COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT, -+ RK3368_CLKSEL_CON(34), 0, -+ RK3368_CLKGATE_CON(2), 1, GFLAGS, -+ &rk3368_uart0_fracmux), ++ COMPOSITE(SCLK_GPU, "sclk_gpu", mux_pll_src_dmyapll_dpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(34), 8, 2, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 13, GFLAGS), + -+ COMPOSITE_NOMUX(0, "uart1_src", "uart_src", 0, -+ RK3368_CLKSEL_CON(35), 0, 7, DFLAGS, -+ RK3368_CLKGATE_CON(2), 2, GFLAGS), -+ COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_src", CLK_SET_RATE_PARENT, -+ RK3368_CLKSEL_CON(36), 0, -+ RK3368_CLKGATE_CON(2), 3, GFLAGS, -+ &rk3368_uart1_fracmux), ++ COMPOSITE(SCLK_SPI, "sclk_spi", mux_pll_src_dmyapll_dpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(25), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(2), 9, GFLAGS), + -+ COMPOSITE_NOMUX(0, "uart3_src", "uart_src", 0, -+ RK3368_CLKSEL_CON(39), 0, 7, DFLAGS, -+ RK3368_CLKGATE_CON(2), 6, GFLAGS), -+ COMPOSITE_FRACMUX(0, "uart3_frac", "uart3_src", CLK_SET_RATE_PARENT, -+ RK3368_CLKSEL_CON(40), 0, -+ RK3368_CLKGATE_CON(2), 7, GFLAGS, -+ &rk3368_uart3_fracmux), ++ COMPOSITE(SCLK_NANDC, "sclk_nandc", mux_pll_src_dmyapll_dpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(16), 8, 2, MFLAGS, 10, 5, DFLAGS, ++ RK2928_CLKGATE_CON(10), 4, GFLAGS), + -+ COMPOSITE_NOMUX(0, "uart4_src", "uart_src", 0, -+ RK3368_CLKSEL_CON(41), 0, 7, DFLAGS, -+ RK3368_CLKGATE_CON(2), 8, GFLAGS), -+ COMPOSITE_FRACMUX(0, "uart4_frac", "uart4_src", CLK_SET_RATE_PARENT, -+ RK3368_CLKSEL_CON(42), 0, -+ RK3368_CLKGATE_CON(2), 9, GFLAGS, -+ &rk3368_uart4_fracmux), ++ COMPOSITE(SCLK_SFC, "sclk_sfc", mux_pll_src_dmyapll_dpll_gpll_xin24_p, 0, ++ RK2928_CLKSEL_CON(16), 0, 2, MFLAGS, 2, 5, DFLAGS, ++ RK2928_CLKGATE_CON(10), 5, GFLAGS), + -+ COMPOSITE(0, "mac_pll_src", mux_pll_src_npll_cpll_gpll_p, 0, -+ RK3368_CLKSEL_CON(43), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3368_CLKGATE_CON(3), 4, GFLAGS), -+ MUX(SCLK_MAC, "mac_clk", mux_mac_p, CLK_SET_RATE_PARENT, -+ RK3368_CLKSEL_CON(43), 8, 1, MFLAGS), -+ GATE(SCLK_MACREF_OUT, "sclk_macref_out", "mac_clk", 0, -+ RK3368_CLKGATE_CON(7), 7, GFLAGS), -+ GATE(SCLK_MACREF, "sclk_macref", "mac_clk", 0, -+ RK3368_CLKGATE_CON(7), 6, GFLAGS), -+ GATE(SCLK_MAC_RX, "sclk_mac_rx", "mac_clk", 0, -+ RK3368_CLKGATE_CON(7), 4, GFLAGS), -+ GATE(SCLK_MAC_TX, "sclk_mac_tx", "mac_clk", 0, -+ RK3368_CLKGATE_CON(7), 5, GFLAGS), ++ COMPOSITE_NOGATE(SCLK_MACPLL, "mac_pll_src", mux_pll_src_apll_dpll_gpll_p, CLK_SET_RATE_NO_REPARENT, ++ RK2928_CLKSEL_CON(21), 0, 2, MFLAGS, 9, 5, DFLAGS), ++ MUX(SCLK_MACREF, "mac_clk_ref", mux_mac_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(21), 3, 1, MFLAGS), + -+ GATE(0, "jtag", "ext_jtag", CLK_IGNORE_UNUSED, -+ RK3368_CLKGATE_CON(7), 0, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_MAC, "mac_clk", "mac_clk_ref", 0, ++ RK2928_CLKSEL_CON(21), 4, 5, DFLAGS, ++ RK2928_CLKGATE_CON(2), 6, GFLAGS), ++ FACTOR(0, "sclk_macref_out", "hclk_peri_src", 0, 1, 2), + -+ COMPOSITE_NODIV(0, "hsic_usbphy_480m", mux_hsic_usbphy480m_p, 0, -+ RK3368_CLKSEL_CON(26), 8, 2, MFLAGS, -+ RK3368_CLKGATE_CON(8), 0, GFLAGS), -+ COMPOSITE_NODIV(SCLK_HSICPHY480M, "sclk_hsicphy480m", mux_hsicphy480m_p, 0, -+ RK3368_CLKSEL_CON(26), 12, 2, MFLAGS, -+ RK3368_CLKGATE_CON(8), 7, GFLAGS), -+ GATE(SCLK_HSICPHY12M, "sclk_hsicphy12m", "xin12m", 0, -+ RK3368_CLKGATE_CON(8), 6, GFLAGS), ++ MUX(SCLK_HDMI, "dclk_hdmi", mux_dclk_p, 0, ++ RK2928_CLKSEL_CON(31), 0, 1, MFLAGS), + + /* -+ * Clock-Architecture Diagram 5 ++ * Clock-Architecture Diagram 3 + */ + -+ /* aclk_cci_pre gates */ -+ GATE(0, "aclk_core_niu_cpup", "aclk_cci_pre", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(11), 4, GFLAGS), -+ GATE(0, "aclk_core_niu_cci", "aclk_cci_pre", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(11), 3, GFLAGS), -+ GATE(0, "aclk_cci400", "aclk_cci_pre", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(11), 2, GFLAGS), -+ GATE(0, "aclk_adb400m_pd_core_b", "aclk_cci_pre", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(11), 1, GFLAGS), -+ GATE(0, "aclk_adb400m_pd_core_l", "aclk_cci_pre", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(11), 0, GFLAGS), -+ -+ /* aclkm_core_* gates */ -+ GATE(0, "aclk_adb400s_pd_core_b", "aclkm_core_b", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(10), 0, GFLAGS), -+ GATE(0, "aclk_adb400s_pd_core_l", "aclkm_core_l", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(9), 0, GFLAGS), -+ -+ /* armclk* gates */ -+ GATE(0, "sclk_dbg_pd_core_b", "armclkb", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(10), 1, GFLAGS), -+ GATE(0, "sclk_dbg_pd_core_l", "armclkl", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(9), 1, GFLAGS), -+ -+ /* sclk_cs_pre gates */ -+ GATE(0, "sclk_dbg", "sclk_cs_pre", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(11), 7, GFLAGS), -+ GATE(0, "pclk_core_niu_sdbg", "sclk_cs_pre", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(11), 6, GFLAGS), -+ GATE(0, "hclk_core_niu_dbg", "sclk_cs_pre", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(11), 5, GFLAGS), -+ -+ /* aclk_bus gates */ -+ GATE(0, "aclk_strc_sys", "aclk_bus", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(12), 12, GFLAGS), -+ GATE(ACLK_DMAC_BUS, "aclk_dmac_bus", "aclk_bus", CLK_IS_CRITICAL, RK3368_CLKGATE_CON(12), 11, GFLAGS), -+ GATE(0, "sclk_intmem1", "aclk_bus", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(12), 6, GFLAGS), -+ GATE(0, "sclk_intmem0", "aclk_bus", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(12), 5, GFLAGS), -+ GATE(0, "aclk_intmem", "aclk_bus", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(12), 4, GFLAGS), -+ GATE(0, "aclk_gic400", "aclk_bus", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(13), 9, GFLAGS), -+ -+ /* sclk_ddr gates */ -+ GATE(0, "nclk_ddrupctl", "sclk_ddr", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(13), 2, GFLAGS), -+ -+ /* clk_hsadc_tsp is part of diagram2 */ -+ -+ /* fclk_mcu_src gates */ -+ GATE(0, "hclk_noc_mcu", "fclk_mcu_src", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(13), 14, GFLAGS), -+ GATE(0, "fclk_mcu", "fclk_mcu_src", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(13), 12, GFLAGS), -+ GATE(0, "hclk_mcu", "fclk_mcu_src", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(13), 11, GFLAGS), ++ /* aclk_cpu gates */ ++ GATE(0, "sclk_intmem", "aclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 12, GFLAGS), ++ GATE(0, "aclk_strc_sys", "aclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 10, GFLAGS), + + /* hclk_cpu gates */ -+ GATE(HCLK_SPDIF, "hclk_spdif", "hclk_bus", 0, RK3368_CLKGATE_CON(12), 10, GFLAGS), -+ GATE(HCLK_ROM, "hclk_rom", "hclk_bus", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(12), 9, GFLAGS), -+ GATE(HCLK_I2S_2CH, "hclk_i2s_2ch", "hclk_bus", 0, RK3368_CLKGATE_CON(12), 8, GFLAGS), -+ GATE(HCLK_I2S_8CH, "hclk_i2s_8ch", "hclk_bus", 0, RK3368_CLKGATE_CON(12), 7, GFLAGS), -+ GATE(HCLK_TSP, "hclk_tsp", "hclk_bus", 0, RK3368_CLKGATE_CON(13), 10, GFLAGS), -+ GATE(HCLK_CRYPTO, "hclk_crypto", "hclk_bus", 0, RK3368_CLKGATE_CON(13), 4, GFLAGS), -+ GATE(MCLK_CRYPTO, "mclk_crypto", "hclk_bus", 0, RK3368_CLKGATE_CON(13), 3, GFLAGS), ++ GATE(HCLK_ROM, "hclk_rom", "hclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 6, GFLAGS), + + /* pclk_cpu gates */ -+ GATE(PCLK_DDRPHY, "pclk_ddrphy", "pclk_bus", CLK_IS_CRITICAL, RK3368_CLKGATE_CON(12), 14, GFLAGS), -+ GATE(PCLK_DDRUPCTL, "pclk_ddrupctl", "pclk_bus", CLK_IS_CRITICAL, RK3368_CLKGATE_CON(12), 13, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_bus", 0, RK3368_CLKGATE_CON(12), 3, GFLAGS), -+ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_bus", 0, RK3368_CLKGATE_CON(12), 2, GFLAGS), -+ GATE(PCLK_MAILBOX, "pclk_mailbox", "pclk_bus", 0, RK3368_CLKGATE_CON(12), 1, GFLAGS), -+ GATE(PCLK_PWM0, "pclk_pwm0", "pclk_bus", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(12), 0, GFLAGS), -+ GATE(PCLK_SIM, "pclk_sim", "pclk_bus", 0, RK3368_CLKGATE_CON(13), 8, GFLAGS), -+ GATE(PCLK_PWM1, "pclk_pwm1", "pclk_bus", CLK_IS_CRITICAL, RK3368_CLKGATE_CON(13), 6, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_bus", 0, RK3368_CLKGATE_CON(13), 5, GFLAGS), -+ GATE(PCLK_EFUSE256, "pclk_efuse_256", "pclk_bus", 0, RK3368_CLKGATE_CON(13), 1, GFLAGS), -+ GATE(0, "pclk_efuse_1024", "pclk_bus", 0, RK3368_CLKGATE_CON(13), 0, GFLAGS), -+ -+ /* -+ * video clk gates -+ * aclk_video(_pre) can actually select between parents of aclk_vdpu -+ * and aclk_vepu by setting bit GRF_SOC_CON0[7]. -+ */ -+ GATE(ACLK_VIDEO, "aclk_video", "aclk_vdpu", 0, RK3368_CLKGATE_CON(15), 0, GFLAGS), -+ GATE(SCLK_HEVC_CABAC, "sclk_hevc_cabac", "sclk_hevc_cabac_src", 0, RK3368_CLKGATE_CON(15), 3, GFLAGS), -+ GATE(SCLK_HEVC_CORE, "sclk_hevc_core", "sclk_hevc_core_src", 0, RK3368_CLKGATE_CON(15), 2, GFLAGS), -+ GATE(HCLK_VIDEO, "hclk_video", "hclk_video_pre", 0, RK3368_CLKGATE_CON(15), 1, GFLAGS), -+ -+ /* aclk_rga_pre gates */ -+ GATE(ACLK_VIO1_NOC, "aclk_vio1_noc", "aclk_rga_pre", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(16), 10, GFLAGS), -+ GATE(ACLK_RGA, "aclk_rga", "aclk_rga_pre", 0, RK3368_CLKGATE_CON(16), 0, GFLAGS), -+ GATE(ACLK_HDCP, "aclk_hdcp", "aclk_rga_pre", 0, RK3368_CLKGATE_CON(17), 10, GFLAGS), -+ -+ /* aclk_vio0 gates */ -+ GATE(ACLK_VIP, "aclk_vip", "aclk_vio0", 0, RK3368_CLKGATE_CON(16), 11, GFLAGS), -+ GATE(ACLK_VIO0_NOC, "aclk_vio0_noc", "aclk_vio0", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(16), 9, GFLAGS), -+ GATE(ACLK_VOP, "aclk_vop", "aclk_vio0", 0, RK3368_CLKGATE_CON(16), 5, GFLAGS), -+ GATE(ACLK_VOP_IEP, "aclk_vop_iep", "aclk_vio0", 0, RK3368_CLKGATE_CON(16), 4, GFLAGS), -+ GATE(ACLK_IEP, "aclk_iep", "aclk_vio0", 0, RK3368_CLKGATE_CON(16), 2, GFLAGS), -+ -+ /* sclk_isp gates */ -+ GATE(HCLK_ISP, "hclk_isp", "sclk_isp", 0, RK3368_CLKGATE_CON(16), 14, GFLAGS), -+ GATE(ACLK_ISP, "aclk_isp", "sclk_isp", 0, RK3368_CLKGATE_CON(17), 0, GFLAGS), ++ GATE(PCLK_GRF, "pclk_grf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 4, GFLAGS), ++ GATE(PCLK_DDRUPCTL, "pclk_ddrupctl", "pclk_cpu", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(5), 7, GFLAGS), ++ GATE(PCLK_ACODEC, "pclk_acodec", "pclk_cpu", 0, RK2928_CLKGATE_CON(5), 14, GFLAGS), ++ GATE(PCLK_HDMI, "pclk_hdmi", "pclk_cpu", 0, RK2928_CLKGATE_CON(3), 8, GFLAGS), + -+ /* hclk_vio gates */ -+ GATE(HCLK_VIP, "hclk_vip", "hclk_vio", 0, RK3368_CLKGATE_CON(16), 12, GFLAGS), -+ GATE(HCLK_VIO_NOC, "hclk_vio_noc", "hclk_vio", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(16), 8, GFLAGS), -+ GATE(HCLK_VIO_AHB_ARBI, "hclk_vio_ahb_arbi", "hclk_vio", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(16), 7, GFLAGS), -+ GATE(HCLK_VOP, "hclk_vop", "hclk_vio", 0, RK3368_CLKGATE_CON(16), 6, GFLAGS), -+ GATE(HCLK_IEP, "hclk_iep", "hclk_vio", 0, RK3368_CLKGATE_CON(16), 3, GFLAGS), -+ GATE(HCLK_RGA, "hclk_rga", "hclk_vio", 0, RK3368_CLKGATE_CON(16), 1, GFLAGS), -+ GATE(HCLK_VIO_HDCPMMU, "hclk_hdcpmmu", "hclk_vio", 0, RK3368_CLKGATE_CON(17), 12, GFLAGS), -+ GATE(HCLK_VIO_H2P, "hclk_vio_h2p", "hclk_vio", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(17), 7, GFLAGS), ++ /* aclk_vio gates */ ++ GATE(ACLK_VIO, "aclk_vio", "aclk_disp1_pre", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(6), 13, GFLAGS), ++ GATE(ACLK_LCDC, "aclk_lcdc", "aclk_disp1_pre", 0, RK2928_CLKGATE_CON(9), 6, GFLAGS), + -+ /* -+ * pclk_vio gates -+ * pclk_vio comes from the exactly same source as hclk_vio -+ */ -+ GATE(PCLK_HDCP, "pclk_hdcp", "hclk_vio", 0, RK3368_CLKGATE_CON(17), 11, GFLAGS), -+ GATE(PCLK_EDP_CTRL, "pclk_edp_ctrl", "hclk_vio", 0, RK3368_CLKGATE_CON(17), 9, GFLAGS), -+ GATE(PCLK_VIO_H2P, "pclk_vio_h2p", "hclk_vio", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(17), 8, GFLAGS), -+ GATE(PCLK_HDMI_CTRL, "pclk_hdmi_ctrl", "hclk_vio", 0, RK3368_CLKGATE_CON(17), 6, GFLAGS), -+ GATE(PCLK_MIPI_CSI, "pclk_mipi_csi", "hclk_vio", 0, RK3368_CLKGATE_CON(17), 4, GFLAGS), -+ GATE(PCLK_MIPI_DSI0, "pclk_mipi_dsi0", "hclk_vio", 0, RK3368_CLKGATE_CON(17), 3, GFLAGS), ++ GATE(HCLK_VIO_BUS, "hclk_vio_bus", "hclk_disp_pre", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(6), 12, GFLAGS), ++ GATE(HCLK_LCDC, "hclk_lcdc", "hclk_disp_pre", 0, RK2928_CLKGATE_CON(9), 5, GFLAGS), + -+ /* ext_vip gates in diagram3 */ + -+ /* gpu gates */ -+ GATE(SCLK_GPU_CORE, "sclk_gpu_core", "sclk_gpu_core_src", 0, RK3368_CLKGATE_CON(18), 2, GFLAGS), -+ GATE(ACLK_GPU_MEM, "aclk_gpu_mem", "aclk_gpu_mem_pre", 0, RK3368_CLKGATE_CON(18), 1, GFLAGS), -+ GATE(ACLK_GPU_CFG, "aclk_gpu_cfg", "aclk_gpu_cfg_pre", 0, RK3368_CLKGATE_CON(18), 0, GFLAGS), ++ /* xin24m gates */ ++ GATE(SCLK_PVTM_CORE, "sclk_pvtm_core", "xin24m", 0, RK2928_CLKGATE_CON(10), 0, GFLAGS), ++ GATE(SCLK_PVTM_GPU, "sclk_pvtm_gpu", "xin24m", 0, RK2928_CLKGATE_CON(10), 1, GFLAGS), + + /* aclk_peri gates */ -+ GATE(ACLK_DMAC_PERI, "aclk_dmac_peri", "aclk_peri", 0, RK3368_CLKGATE_CON(19), 3, GFLAGS), -+ GATE(0, "aclk_peri_axi_matrix", "aclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(19), 2, GFLAGS), -+ GATE(HCLK_SFC, "hclk_sfc", "aclk_peri", 0, RK3368_CLKGATE_CON(20), 15, GFLAGS), -+ GATE(ACLK_GMAC, "aclk_gmac", "aclk_peri", 0, RK3368_CLKGATE_CON(20), 13, GFLAGS), -+ GATE(0, "aclk_peri_niu", "aclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(20), 8, GFLAGS), -+ GATE(ACLK_PERI_MMU, "aclk_peri_mmu", "aclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(21), 4, GFLAGS), ++ GATE(0, "aclk_peri_axi_matrix", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 3, GFLAGS), ++ GATE(0, "aclk_cpu_peri", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 2, GFLAGS), ++ GATE(ACLK_DMAC2, "aclk_dmac2", "aclk_peri", 0, RK2928_CLKGATE_CON(5), 1, GFLAGS), ++ GATE(0, "aclk_peri_niu", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 15, GFLAGS), + + /* hclk_peri gates */ -+ GATE(0, "hclk_peri_axi_matrix", "hclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(19), 0, GFLAGS), -+ GATE(HCLK_NANDC0, "hclk_nandc0", "hclk_peri", 0, RK3368_CLKGATE_CON(20), 11, GFLAGS), -+ GATE(0, "hclk_mmc_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(20), 10, GFLAGS), -+ GATE(0, "hclk_emem_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(20), 9, GFLAGS), -+ GATE(0, "hclk_peri_ahb_arbi", "hclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(20), 7, GFLAGS), -+ GATE(0, "hclk_usb_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(20), 6, GFLAGS), -+ GATE(HCLK_HSIC, "hclk_hsic", "hclk_peri", 0, RK3368_CLKGATE_CON(20), 5, GFLAGS), -+ GATE(HCLK_HOST1, "hclk_host1", "hclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(20), 4, GFLAGS), -+ GATE(HCLK_HOST0, "hclk_host0", "hclk_peri", 0, RK3368_CLKGATE_CON(20), 3, GFLAGS), -+ GATE(0, "pmu_hclk_otg0", "hclk_peri", CLK_IS_CRITICAL, RK3368_CLKGATE_CON(20), 2, GFLAGS), -+ GATE(HCLK_OTG0, "hclk_otg0", "hclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(20), 1, GFLAGS), -+ GATE(HCLK_HSADC, "hclk_hsadc", "hclk_peri", 0, RK3368_CLKGATE_CON(21), 3, GFLAGS), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, RK3368_CLKGATE_CON(21), 2, GFLAGS), -+ GATE(HCLK_SDIO0, "hclk_sdio0", "hclk_peri", 0, RK3368_CLKGATE_CON(21), 1, GFLAGS), -+ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK3368_CLKGATE_CON(21), 0, GFLAGS), ++ GATE(0, "hclk_peri_matrix", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 0, GFLAGS), ++ GATE(0, "hclk_usb_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 13, GFLAGS), ++ GATE(0, "hclk_peri_arbi", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 14, GFLAGS), ++ GATE(HCLK_NANDC, "hclk_nandc", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 9, GFLAGS), ++ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 10, GFLAGS), ++ GATE(HCLK_SDIO, "hclk_sdio", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 11, GFLAGS), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 0, GFLAGS), ++ GATE(HCLK_OTG0, "hclk_otg0", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 13, GFLAGS), ++ GATE(HCLK_OTG1, "hclk_otg1", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(7), 3, GFLAGS), ++ GATE(HCLK_I2S, "hclk_i2s", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 2, GFLAGS), ++ GATE(HCLK_SFC, "hclk_sfc", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(3), 14, GFLAGS), ++ GATE(HCLK_MAC, "hclk_mac", "hclk_peri", 0, RK2928_CLKGATE_CON(3), 5, GFLAGS), + + /* pclk_peri gates */ -+ GATE(PCLK_SARADC, "pclk_saradc", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 15, GFLAGS), -+ GATE(PCLK_I2C5, "pclk_i2c5", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 14, GFLAGS), -+ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 13, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 12, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 11, GFLAGS), -+ GATE(PCLK_UART4, "pclk_uart4", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 10, GFLAGS), -+ GATE(PCLK_UART3, "pclk_uart3", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 9, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 8, GFLAGS), -+ GATE(PCLK_UART0, "pclk_uart0", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 7, GFLAGS), -+ GATE(PCLK_SPI2, "pclk_spi2", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 6, GFLAGS), -+ GATE(PCLK_SPI1, "pclk_spi1", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 5, GFLAGS), -+ GATE(PCLK_SPI0, "pclk_spi0", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 4, GFLAGS), -+ GATE(0, "pclk_peri_axi_matrix", "pclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(19), 1, GFLAGS), -+ GATE(PCLK_GMAC, "pclk_gmac", "pclk_peri", 0, RK3368_CLKGATE_CON(20), 14, GFLAGS), -+ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_peri", 0, RK3368_CLKGATE_CON(20), 0, GFLAGS), -+ -+ /* pclk_pd_alive gates */ -+ GATE(PCLK_TIMER1, "pclk_timer1", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 13, GFLAGS), -+ GATE(PCLK_TIMER0, "pclk_timer0", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 12, GFLAGS), -+ GATE(0, "pclk_alive_niu", "pclk_pd_alive", CLK_IS_CRITICAL, RK3368_CLKGATE_CON(22), 9, GFLAGS), -+ GATE(PCLK_GRF, "pclk_grf", "pclk_pd_alive", CLK_IS_CRITICAL, RK3368_CLKGATE_CON(22), 8, GFLAGS), -+ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 3, GFLAGS), -+ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 2, GFLAGS), -+ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 1, GFLAGS), -+ -+ /* Watchdog pclk is controlled by sgrf_soc_con3[7]. */ -+ SGRF_GATE(PCLK_WDT, "pclk_wdt", "pclk_pd_alive"), -+ -+ /* -+ * pclk_vio gates -+ * pclk_vio comes from the exactly same source as hclk_vio -+ */ -+ GATE(PCLK_DPHYRX, "pclk_dphyrx", "hclk_vio", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(22), 11, GFLAGS), -+ GATE(PCLK_DPHYTX0, "pclk_dphytx0", "hclk_vio", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(22), 10, GFLAGS), -+ -+ /* pclk_pd_pmu gates */ -+ GATE(PCLK_PMUGRF, "pclk_pmugrf", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 5, GFLAGS), -+ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pd_pmu", 0, RK3368_CLKGATE_CON(23), 4, GFLAGS), -+ GATE(PCLK_SGRF, "pclk_sgrf", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 3, GFLAGS), -+ GATE(0, "pclk_pmu_noc", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 2, GFLAGS), -+ GATE(0, "pclk_intmem1", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 1, GFLAGS), -+ GATE(PCLK_PMU, "pclk_pmu", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 0, GFLAGS), -+ -+ /* timer gates */ -+ GATE(SCLK_TIMER15, "sclk_timer15", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 11, GFLAGS), -+ GATE(SCLK_TIMER14, "sclk_timer14", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 10, GFLAGS), -+ GATE(SCLK_TIMER13, "sclk_timer13", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 9, GFLAGS), -+ GATE(SCLK_TIMER12, "sclk_timer12", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 8, GFLAGS), -+ GATE(SCLK_TIMER11, "sclk_timer11", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 7, GFLAGS), -+ GATE(SCLK_TIMER10, "sclk_timer10", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 6, GFLAGS), -+ GATE(SCLK_TIMER05, "sclk_timer05", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 5, GFLAGS), -+ GATE(SCLK_TIMER04, "sclk_timer04", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 4, GFLAGS), -+ GATE(SCLK_TIMER03, "sclk_timer03", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 3, GFLAGS), -+ GATE(SCLK_TIMER02, "sclk_timer02", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 2, GFLAGS), -+ GATE(SCLK_TIMER01, "sclk_timer01", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 1, GFLAGS), -+ GATE(SCLK_TIMER00, "sclk_timer00", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 0, GFLAGS), ++ GATE(0, "pclk_peri_matrix", "pclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 1, GFLAGS), ++ GATE(0, "pclk_efuse", "pclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 2, GFLAGS), ++ GATE(PCLK_TIMER, "pclk_timer", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 7, GFLAGS), ++ GATE(PCLK_PWM, "pclk_pwm", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 10, GFLAGS), ++ GATE(PCLK_SPI, "pclk_spi", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 12, GFLAGS), ++ GATE(PCLK_WDT, "pclk_wdt", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 15, GFLAGS), ++ GATE(PCLK_UART0, "pclk_uart0", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 0, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 1, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 2, GFLAGS), ++ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 4, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 5, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 6, GFLAGS), ++ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 9, GFLAGS), ++ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 10, GFLAGS), ++ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 11, GFLAGS), +}; + -+static void __iomem *rk3368_cru_base; ++static void __iomem *rk3036_cru_base; + -+static void rk3368_dump_cru(void) ++static void rk3036_dump_cru(void) +{ -+ if (rk3368_cru_base) { ++ if (rk3036_cru_base) { + pr_warn("CRU:\n"); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk3368_cru_base, -+ 0x41c, false); ++ 32, 4, rk3036_cru_base, ++ 0x1f8, false); + } +} + -+static void __init rk3368_clk_init(struct device_node *np) ++static void __init rk3036_clk_init(struct device_node *np) +{ + struct rockchip_clk_provider *ctx; + void __iomem *reg_base; ++ struct clk *clk; + struct clk **clks; + + reg_base = of_iomap(np, 0); @@ -51436,6 +51636,13 @@ index 000000000..3ddff548e + return; + } + ++ /* ++ * Make uart_pll_clk a child of the gpll, as all other sources are ++ * not that usable / stable. ++ */ ++ writel_relaxed(HIWORD_UPDATE(0x2, 0x3, 10), ++ reg_base + RK2928_CLKSEL_CON(13)); ++ + ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); + if (IS_ERR(ctx)) { + pr_err("%s: rockchip clk init failed\n", __func__); @@ -51444,123 +51651,95 @@ index 000000000..3ddff548e + } + clks = ctx->clk_data.clks; + -+ rockchip_clk_register_plls(ctx, rk3368_pll_clks, -+ ARRAY_SIZE(rk3368_pll_clks), -+ RK3368_GRF_SOC_STATUS0); -+ rockchip_clk_register_branches(ctx, rk3368_clk_branches, -+ ARRAY_SIZE(rk3368_clk_branches)); ++ clk = clk_register_fixed_factor(NULL, "usb480m", "xin24m", 0, 20, 1); ++ if (IS_ERR(clk)) ++ pr_warn("%s: could not register clock usb480m: %ld\n", ++ __func__, PTR_ERR(clk)); + -+ rockchip_clk_register_armclk(ctx, ARMCLKB, "armclkb", -+ 2, clks[PLL_APLLB], clks[PLL_GPLL], -+ &rk3368_cpuclkb_data, rk3368_cpuclkb_rates, -+ ARRAY_SIZE(rk3368_cpuclkb_rates)); ++ rockchip_clk_register_plls(ctx, rk3036_pll_clks, ++ ARRAY_SIZE(rk3036_pll_clks), ++ RK3036_GRF_SOC_STATUS0); ++ rockchip_clk_register_branches(ctx, rk3036_clk_branches, ++ ARRAY_SIZE(rk3036_clk_branches)); + -+ rockchip_clk_register_armclk(ctx, ARMCLKL, "armclkl", -+ 2, clks[PLL_APLLL], clks[PLL_GPLL], -+ &rk3368_cpuclkl_data, rk3368_cpuclkl_rates, -+ ARRAY_SIZE(rk3368_cpuclkl_rates)); ++ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", ++ 2, clks[PLL_APLL], clks[PLL_GPLL], ++ &rk3036_cpuclk_data, rk3036_cpuclk_rates, ++ ARRAY_SIZE(rk3036_cpuclk_rates)); + -+ rockchip_register_softrst(np, 15, reg_base + RK3368_SOFTRST_CON(0), ++ rockchip_register_softrst(np, 9, reg_base + RK2928_SOFTRST_CON(0), + ROCKCHIP_SOFTRST_HIWORD_MASK); + -+ rockchip_register_restart_notifier(ctx, RK3368_GLB_SRST_FST, NULL); ++ rockchip_register_restart_notifier(ctx, RK2928_GLB_SRST_FST, NULL); + + rockchip_clk_of_add_provider(np, ctx); + + if (!rk_dump_cru) { -+ rk3368_cru_base = reg_base; -+ rk_dump_cru = rk3368_dump_cru; ++ rk3036_cru_base = reg_base; ++ rk_dump_cru = rk3036_dump_cru; + } +} -+CLK_OF_DECLARE(rk3368_cru, "rockchip,rk3368-cru", rk3368_clk_init); ++CLK_OF_DECLARE(rk3036_cru, "rockchip,rk3036-cru", rk3036_clk_init); + -+static int __init clk_rk3368_probe(struct platform_device *pdev) ++static int __init clk_rk3036_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + -+ rk3368_clk_init(np); ++ rk3036_clk_init(np); + + return 0; +} + -+static const struct of_device_id clk_rk3368_match_table[] = { ++static const struct of_device_id clk_rk3036_match_table[] = { + { -+ .compatible = "rockchip,rk3368-cru", ++ .compatible = "rockchip,rk3036-cru", + }, + { } +}; -+MODULE_DEVICE_TABLE(of, clk_rk3368_match_table); ++MODULE_DEVICE_TABLE(of, clk_rk3036_match_table); + -+static struct platform_driver clk_rk3368_driver = { ++static struct platform_driver clk_rk3036_driver = { + .driver = { -+ .name = "clk-rk3368", -+ .of_match_table = clk_rk3368_match_table, ++ .name = "clk-rk3036", ++ .of_match_table = clk_rk3036_match_table, + }, +}; -+builtin_platform_driver_probe(clk_rk3368_driver, clk_rk3368_probe); ++builtin_platform_driver_probe(clk_rk3036_driver, clk_rk3036_probe); + -+MODULE_DESCRIPTION("Rockchip RK3368 Clock Driver"); ++MODULE_DESCRIPTION("Rockchip RK3036 Clock Driver"); +MODULE_LICENSE("GPL"); -diff --git a/drivers/clk/rockchip-oh/clk-rk3399.c b/drivers/clk/rockchip-oh/clk-rk3399.c +diff --git a/drivers/clk/rockchip-oh/clk-rk3128.c b/drivers/clk/rockchip-oh/clk-rk3128.c new file mode 100644 -index 000000000..e1b6c5267 +index 000000000..1b1111e88 --- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-rk3399.c -@@ -0,0 +1,1752 @@ ++++ b/drivers/clk/rockchip-oh/clk-rk3128.c +@@ -0,0 +1,708 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* -+ * Copyright (c) 2016 Rockchip Electronics Co. Ltd. -+ * Author: Xing Zheng ++ * Copyright (c) 2017 Rockchip Electronics Co. Ltd. ++ * Author: Elaine + */ + +#include -+#include +#include ++#include +#include +#include +#include -+#include -+#include -+#include ++#include ++#include ++#include +#include "clk.h" + -+enum rk3399_plls { -+ lpll, bpll, dpll, cpll, gpll, npll, vpll, -+}; ++#define RK3128_GRF_SOC_STATUS0 0x14c + -+enum rk3399_pmu_plls { -+ ppll, ++enum rk3128_plls { ++ apll, dpll, cpll, gpll, +}; + -+static struct rockchip_pll_rate_table rk3399_pll_rates[] = { ++static struct rockchip_pll_rate_table rk3128_pll_rates[] = { + /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ -+ RK3036_PLL_RATE(2208000000, 1, 92, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2184000000, 1, 91, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2160000000, 1, 90, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2136000000, 1, 89, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2112000000, 1, 88, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2088000000, 1, 87, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2064000000, 1, 86, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2040000000, 1, 85, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2016000000, 1, 84, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1992000000, 1, 83, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1968000000, 1, 82, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1944000000, 1, 81, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1920000000, 1, 80, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1896000000, 1, 79, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1872000000, 1, 78, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1848000000, 1, 77, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1824000000, 1, 76, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1800000000, 1, 75, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1776000000, 1, 74, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1752000000, 1, 73, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1728000000, 1, 72, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1704000000, 1, 71, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1680000000, 1, 70, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1656000000, 1, 69, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1632000000, 1, 68, 1, 1, 1, 0), + RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1600000000, 3, 200, 1, 1, 1, 0), + RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0), + RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0), + RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0), @@ -51581,1660 +51760,1575 @@ index 000000000..e1b6c5267 + RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0), + RK3036_PLL_RATE(1100000000, 12, 550, 1, 1, 1, 0), + RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1000000000, 1, 125, 3, 1, 1, 0), -+ RK3036_PLL_RATE( 984000000, 1, 82, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 960000000, 1, 80, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 936000000, 1, 78, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 912000000, 1, 76, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 900000000, 4, 300, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 888000000, 1, 74, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 864000000, 1, 72, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 840000000, 1, 70, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 816000000, 1, 68, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 800000000, 1, 100, 3, 1, 1, 0), -+ RK3036_PLL_RATE( 700000000, 6, 350, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 696000000, 1, 58, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 676000000, 3, 169, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 600000000, 1, 75, 3, 1, 1, 0), -+ RK3036_PLL_RATE( 594000000, 1, 99, 4, 1, 1, 0), -+ RK3036_PLL_RATE( 533250000, 8, 711, 4, 1, 1, 0), -+ RK3036_PLL_RATE( 504000000, 1, 63, 3, 1, 1, 0), -+ RK3036_PLL_RATE( 500000000, 6, 250, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 408000000, 1, 68, 2, 2, 1, 0), -+ RK3036_PLL_RATE( 312000000, 1, 52, 2, 2, 1, 0), -+ RK3036_PLL_RATE( 297000000, 1, 99, 4, 2, 1, 0), -+ RK3036_PLL_RATE( 216000000, 1, 72, 4, 2, 1, 0), -+ RK3036_PLL_RATE( 148500000, 1, 99, 4, 4, 1, 0), -+ RK3036_PLL_RATE( 106500000, 1, 71, 4, 4, 1, 0), -+ RK3036_PLL_RATE( 96000000, 1, 64, 4, 4, 1, 0), -+ RK3036_PLL_RATE( 74250000, 2, 99, 4, 4, 1, 0), -+ RK3036_PLL_RATE( 65000000, 1, 65, 6, 4, 1, 0), -+ RK3036_PLL_RATE( 54000000, 1, 54, 6, 4, 1, 0), -+ RK3036_PLL_RATE( 27000000, 1, 27, 6, 4, 1, 0), ++ RK3036_PLL_RATE(1000000000, 6, 500, 2, 1, 1, 0), ++ RK3036_PLL_RATE(984000000, 1, 82, 2, 1, 1, 0), ++ RK3036_PLL_RATE(960000000, 1, 80, 2, 1, 1, 0), ++ RK3036_PLL_RATE(936000000, 1, 78, 2, 1, 1, 0), ++ RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), ++ RK3036_PLL_RATE(900000000, 4, 300, 2, 1, 1, 0), ++ RK3036_PLL_RATE(888000000, 1, 74, 2, 1, 1, 0), ++ RK3036_PLL_RATE(864000000, 1, 72, 2, 1, 1, 0), ++ RK3036_PLL_RATE(840000000, 1, 70, 2, 1, 1, 0), ++ RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), ++ RK3036_PLL_RATE(800000000, 6, 400, 2, 1, 1, 0), ++ RK3036_PLL_RATE(700000000, 6, 350, 2, 1, 1, 0), ++ RK3036_PLL_RATE(696000000, 1, 58, 2, 1, 1, 0), ++ RK3036_PLL_RATE(600000000, 1, 75, 3, 1, 1, 0), ++ RK3036_PLL_RATE(594000000, 2, 99, 2, 1, 1, 0), ++ RK3036_PLL_RATE(504000000, 1, 63, 3, 1, 1, 0), ++ RK3036_PLL_RATE(500000000, 6, 250, 2, 1, 1, 0), ++ RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), ++ RK3036_PLL_RATE(312000000, 1, 52, 2, 2, 1, 0), ++ RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), ++ RK3036_PLL_RATE(96000000, 1, 64, 4, 4, 1, 0), + { /* sentinel */ }, +}; + -+static struct rockchip_pll_rate_table rk3399_vpll_rates[] = { -+ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ -+ RK3036_PLL_RATE( 594000000, 1, 123, 5, 1, 0, 12582912), /* vco = 2970000000 */ -+ RK3036_PLL_RATE( 593406593, 1, 123, 5, 1, 0, 10508804), /* vco = 2967032965 */ -+ RK3036_PLL_RATE( 297000000, 1, 123, 5, 2, 0, 12582912), /* vco = 2970000000 */ -+ RK3036_PLL_RATE( 296703297, 1, 123, 5, 2, 0, 10508807), /* vco = 2967032970 */ -+ RK3036_PLL_RATE( 148500000, 1, 129, 7, 3, 0, 15728640), /* vco = 3118500000 */ -+ RK3036_PLL_RATE( 148351648, 1, 123, 5, 4, 0, 10508800), /* vco = 2967032960 */ -+ RK3036_PLL_RATE( 106500000, 1, 124, 7, 4, 0, 4194304), /* vco = 2982000000 */ -+ RK3036_PLL_RATE( 74250000, 1, 129, 7, 6, 0, 15728640), /* vco = 3118500000 */ -+ RK3036_PLL_RATE( 74175824, 1, 129, 7, 6, 0, 13550823), /* vco = 3115384608 */ -+ RK3036_PLL_RATE( 65000000, 1, 113, 7, 6, 0, 12582912), /* vco = 2730000000 */ -+ RK3036_PLL_RATE( 59340659, 1, 121, 7, 7, 0, 2581098), /* vco = 2907692291 */ -+ RK3036_PLL_RATE( 54000000, 1, 110, 7, 7, 0, 4194304), /* vco = 2646000000 */ -+ RK3036_PLL_RATE( 27000000, 1, 55, 7, 7, 0, 2097152), /* vco = 1323000000 */ -+ RK3036_PLL_RATE( 26973027, 1, 55, 7, 7, 0, 1173232), /* vco = 1321678323 */ -+ { /* sentinel */ }, -+}; ++#define RK3128_DIV_CPU_MASK 0x1f ++#define RK3128_DIV_CPU_SHIFT 8 + -+/* CRU parents */ -+PNAME(mux_pll_p) = { "xin24m", "xin32k" }; ++#define RK3128_DIV_PERI_MASK 0xf ++#define RK3128_DIV_PERI_SHIFT 0 ++#define RK3128_DIV_ACLK_MASK 0x7 ++#define RK3128_DIV_ACLK_SHIFT 4 ++#define RK3128_DIV_HCLK_MASK 0x3 ++#define RK3128_DIV_HCLK_SHIFT 8 ++#define RK3128_DIV_PCLK_MASK 0x7 ++#define RK3128_DIV_PCLK_SHIFT 12 + -+PNAME(mux_ddrclk_p) = { "clk_ddrc_lpll_src", -+ "clk_ddrc_bpll_src", -+ "clk_ddrc_dpll_src", -+ "clk_ddrc_gpll_src" }; ++#define RK3128_CLKSEL1(_core_aclk_div, _pclk_dbg_div) \ ++{ \ ++ .reg = RK2928_CLKSEL_CON(1), \ ++ .val = HIWORD_UPDATE(_pclk_dbg_div, RK3128_DIV_PERI_MASK, \ ++ RK3128_DIV_PERI_SHIFT) | \ ++ HIWORD_UPDATE(_core_aclk_div, RK3128_DIV_ACLK_MASK, \ ++ RK3128_DIV_ACLK_SHIFT), \ ++} + -+PNAME(mux_pll_src_vpll_cpll_gpll_p) = { "vpll", "cpll", "gpll" }; -+PNAME(mux_pll_src_dmyvpll_cpll_gpll_p) = { "dummy_vpll", "cpll", "gpll" }; ++#define RK3128_CPUCLK_RATE(_prate, _core_aclk_div, _pclk_dbg_div) \ ++{ \ ++ .prate = _prate, \ ++ .divs = { \ ++ RK3128_CLKSEL1(_core_aclk_div, _pclk_dbg_div), \ ++ }, \ ++} + -+#ifdef RK3399_TWO_PLL_FOR_VOP -+PNAME(mux_aclk_cci_p) = { "dummy_cpll", -+ "gpll_aclk_cci_src", -+ "npll_aclk_cci_src", -+ "dummy_vpll" }; -+PNAME(mux_cci_trace_p) = { "dummy_cpll", -+ "gpll_cci_trace" }; -+PNAME(mux_cs_p) = { "dummy_cpll", "gpll_cs", -+ "npll_cs"}; -+PNAME(mux_aclk_perihp_p) = { "dummy_cpll", -+ "gpll_aclk_perihp_src" }; ++static struct rockchip_cpuclk_rate_table rk3128_cpuclk_rates[] __initdata = { ++ RK3128_CPUCLK_RATE(1800000000, 1, 7), ++ RK3128_CPUCLK_RATE(1704000000, 1, 7), ++ RK3128_CPUCLK_RATE(1608000000, 1, 7), ++ RK3128_CPUCLK_RATE(1512000000, 1, 7), ++ RK3128_CPUCLK_RATE(1488000000, 1, 5), ++ RK3128_CPUCLK_RATE(1416000000, 1, 5), ++ RK3128_CPUCLK_RATE(1392000000, 1, 5), ++ RK3128_CPUCLK_RATE(1296000000, 1, 5), ++ RK3128_CPUCLK_RATE(1200000000, 1, 5), ++ RK3128_CPUCLK_RATE(1104000000, 1, 5), ++ RK3128_CPUCLK_RATE(1008000000, 1, 5), ++ RK3128_CPUCLK_RATE(912000000, 1, 5), ++ RK3128_CPUCLK_RATE(816000000, 1, 3), ++ RK3128_CPUCLK_RATE(696000000, 1, 3), ++ RK3128_CPUCLK_RATE(600000000, 1, 3), ++ RK3128_CPUCLK_RATE(408000000, 1, 1), ++ RK3128_CPUCLK_RATE(312000000, 1, 1), ++ RK3128_CPUCLK_RATE(216000000, 1, 1), ++ RK3128_CPUCLK_RATE(96000000, 1, 1), ++}; + -+PNAME(mux_pll_src_cpll_gpll_p) = { "dummy_cpll", "gpll" }; -+PNAME(mux_pll_src_cpll_gpll_npll_p) = { "dummy_cpll", "gpll", "npll" }; -+PNAME(mux_pll_src_cpll_gpll_ppll_p) = { "dummy_cpll", "gpll", "ppll" }; -+PNAME(mux_pll_src_cpll_gpll_upll_p) = { "dummy_cpll", "gpll", "upll" }; -+PNAME(mux_pll_src_npll_cpll_gpll_p) = { "npll", "dummy_cpll", "gpll" }; -+PNAME(mux_pll_src_cpll_gpll_npll_ppll_p) = { "dummy_cpll", "gpll", "npll", -+ "ppll" }; -+PNAME(mux_pll_src_cpll_gpll_npll_24m_p) = { "dummy_cpll", "gpll", "npll", -+ "xin24m" }; -+PNAME(mux_pll_src_cpll_gpll_npll_usbphy480m_p) = { "dummy_cpll", "gpll", "npll", -+ "clk_usbphy_480m" }; -+PNAME(mux_pll_src_ppll_cpll_gpll_npll_p) = { "ppll", "dummy_cpll", "gpll", -+ "npll", "upll" }; -+PNAME(mux_pll_src_cpll_gpll_npll_upll_24m_p) = { "dummy_cpll", "gpll", "npll", -+ "upll", "xin24m" }; -+PNAME(mux_pll_src_cpll_gpll_npll_ppll_upll_24m_p) = { "dummy_cpll", "gpll", "npll", -+ "ppll", "upll", "xin24m" }; -+/* -+ * We hope to be able to HDMI/DP can obtain better signal quality, -+ * therefore, we move VOP pwm and aclk clocks to other PLLs, let -+ * HDMI/DP phyclock can monopolize VPLL. -+ */ -+PNAME(mux_pll_src_dmyvpll_cpll_gpll_npll_p) = { "dummy_vpll", "dummy_cpll", "gpll", -+ "npll" }; -+PNAME(mux_pll_src_dmyvpll_cpll_gpll_gpll_p) = { "dummy_vpll", "dummy_cpll", "gpll", -+ "gpll" }; -+PNAME(mux_pll_src_24m_32k_cpll_gpll_p) = { "xin24m", "xin32k", -+ "dummy_cpll", "gpll" }; ++static const struct rockchip_cpuclk_reg_data rk3128_cpuclk_data = { ++ .core_reg[0] = RK2928_CLKSEL_CON(0), ++ .div_core_shift[0] = 0, ++ .div_core_mask[0] = 0x1f, ++ .num_cores = 1, ++ .mux_core_alt = 1, ++ .mux_core_main = 0, ++ .mux_core_shift = 7, ++ .mux_core_mask = 0x1, ++}; + -+PNAME(mux_aclk_emmc_p) = { "dummy_cpll", -+ "gpll_aclk_emmc_src" }; ++PNAME(mux_pll_p) = { "clk_24m", "xin24m" }; + -+PNAME(mux_aclk_perilp0_p) = { "dummy_cpll", -+ "gpll_aclk_perilp0_src" }; ++PNAME(mux_ddrphy_p) = { "dpll_ddr", "gpll_div2_ddr" }; ++PNAME(mux_usb480m_p) = { "usb480m_phy", "xin24m" }; ++PNAME(mux_aclk_cpu_src_p) = { "cpll", "gpll", "gpll_div2", "gpll_div3" }; + -+PNAME(mux_fclk_cm0s_p) = { "dummy_cpll", -+ "gpll_fclk_cm0s_src" }; ++PNAME(mux_pll_src_5plls_p) = { "cpll", "gpll", "gpll_div2", "gpll_div3", "usb480m" }; ++PNAME(mux_pll_src_4plls_p) = { "cpll", "gpll", "gpll_div2", "usb480m" }; ++PNAME(mux_pll_src_3plls_p) = { "cpll", "gpll", "gpll_div2" }; + -+PNAME(mux_hclk_perilp1_p) = { "dummy_cpll", -+ "gpll_hclk_perilp1_src" }; -+PNAME(mux_aclk_gmac_p) = { "dummy_cpll", -+ "gpll_aclk_gmac_src" }; -+#else -+PNAME(mux_aclk_cci_p) = { "cpll_aclk_cci_src", -+ "gpll_aclk_cci_src", -+ "npll_aclk_cci_src", -+ "dummy_vpll" }; -+PNAME(mux_cci_trace_p) = { "cpll_cci_trace", -+ "gpll_cci_trace" }; -+PNAME(mux_cs_p) = { "cpll_cs", "gpll_cs", -+ "npll_cs"}; -+PNAME(mux_aclk_perihp_p) = { "cpll_aclk_perihp_src", -+ "gpll_aclk_perihp_src" }; ++PNAME(mux_aclk_peri_src_p) = { "gpll", "cpll", "gpll_div2", "gpll_div3" }; ++PNAME(mux_mmc_src_p) = { "cpll", "gpll", "gpll_div2", "xin24m" }; ++PNAME(mux_clk_cif_out_src_p) = { "sclk_cif_src", "xin24m" }; ++PNAME(mux_sclk_vop_src_p) = { "cpll", "gpll", "gpll_div2", "gpll_div3" }; + -+PNAME(mux_pll_src_cpll_gpll_p) = { "cpll", "gpll" }; -+PNAME(mux_pll_src_cpll_gpll_npll_p) = { "cpll", "gpll", "npll" }; -+PNAME(mux_pll_src_cpll_gpll_ppll_p) = { "cpll", "gpll", "ppll" }; -+PNAME(mux_pll_src_cpll_gpll_upll_p) = { "cpll", "gpll", "upll" }; -+PNAME(mux_pll_src_npll_cpll_gpll_p) = { "npll", "cpll", "gpll" }; -+PNAME(mux_pll_src_cpll_gpll_npll_ppll_p) = { "cpll", "gpll", "npll", -+ "ppll" }; -+PNAME(mux_pll_src_cpll_gpll_npll_24m_p) = { "cpll", "gpll", "npll", -+ "xin24m" }; -+PNAME(mux_pll_src_cpll_gpll_npll_usbphy480m_p) = { "cpll", "gpll", "npll", -+ "clk_usbphy_480m" }; -+PNAME(mux_pll_src_ppll_cpll_gpll_npll_p) = { "ppll", "cpll", "gpll", -+ "npll", "upll" }; -+PNAME(mux_pll_src_cpll_gpll_npll_upll_24m_p) = { "cpll", "gpll", "npll", -+ "upll", "xin24m" }; -+PNAME(mux_pll_src_cpll_gpll_npll_ppll_upll_24m_p) = { "cpll", "gpll", "npll", -+ "ppll", "upll", "xin24m" }; -+/* -+ * We hope to be able to HDMI/DP can obtain better signal quality, -+ * therefore, we move VOP pwm and aclk clocks to other PLLs, let -+ * HDMI/DP phyclock can monopolize VPLL. -+ */ -+PNAME(mux_pll_src_dmyvpll_cpll_gpll_npll_p) = { "dummy_vpll", "cpll", "gpll", -+ "npll" }; -+PNAME(mux_pll_src_dmyvpll_cpll_gpll_gpll_p) = { "dummy_vpll", "cpll", "gpll", -+ "gpll" }; -+PNAME(mux_pll_src_24m_32k_cpll_gpll_p) = { "xin24m", "xin32k", -+ "cpll", "gpll" }; ++PNAME(mux_i2s0_p) = { "i2s0_src", "i2s0_frac", "ext_i2s", "xin12m" }; ++PNAME(mux_i2s1_pre_p) = { "i2s1_src", "i2s1_frac", "ext_i2s", "xin12m" }; ++PNAME(mux_i2s_out_p) = { "i2s1_pre", "xin12m" }; ++PNAME(mux_sclk_spdif_p) = { "sclk_spdif_src", "spdif_frac", "xin12m" }; + -+PNAME(mux_aclk_emmc_p) = { "cpll_aclk_emmc_src", -+ "gpll_aclk_emmc_src" }; ++PNAME(mux_uart0_p) = { "uart0_src", "uart0_frac", "xin24m" }; ++PNAME(mux_uart1_p) = { "uart1_src", "uart1_frac", "xin24m" }; ++PNAME(mux_uart2_p) = { "uart2_src", "uart2_frac", "xin24m" }; + -+PNAME(mux_aclk_perilp0_p) = { "cpll_aclk_perilp0_src", -+ "gpll_aclk_perilp0_src" }; ++PNAME(mux_sclk_gmac_p) = { "sclk_gmac_src", "gmac_clkin" }; ++PNAME(mux_sclk_sfc_src_p) = { "cpll", "gpll", "gpll_div2", "xin24m" }; + -+PNAME(mux_fclk_cm0s_p) = { "cpll_fclk_cm0s_src", -+ "gpll_fclk_cm0s_src" }; ++static struct rockchip_pll_clock rk3128_pll_clks[] __initdata = { ++ [apll] = PLL(pll_rk3036, PLL_APLL, "apll", mux_pll_p, 0, RK2928_PLL_CON(0), ++ RK2928_MODE_CON, 0, 1, 0, rk3128_pll_rates), ++ [dpll] = PLL(pll_rk3036, PLL_DPLL, "dpll", mux_pll_p, 0, RK2928_PLL_CON(4), ++ RK2928_MODE_CON, 4, 0, 0, NULL), ++ [cpll] = PLL(pll_rk3036, PLL_CPLL, "cpll", mux_pll_p, 0, RK2928_PLL_CON(8), ++ RK2928_MODE_CON, 8, 2, 0, rk3128_pll_rates), ++ [gpll] = PLL(pll_rk3036, PLL_GPLL, "gpll", mux_pll_p, 0, RK2928_PLL_CON(12), ++ RK2928_MODE_CON, 12, 3, ROCKCHIP_PLL_SYNC_RATE, rk3128_pll_rates), ++}; + -+PNAME(mux_hclk_perilp1_p) = { "cpll_hclk_perilp1_src", -+ "gpll_hclk_perilp1_src" }; -+PNAME(mux_aclk_gmac_p) = { "cpll_aclk_gmac_src", -+ "gpll_aclk_gmac_src" }; -+#endif ++#define MFLAGS CLK_MUX_HIWORD_MASK ++#define DFLAGS CLK_DIVIDER_HIWORD_MASK ++#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) + -+PNAME(mux_dclk_vop0_p) = { "dclk_vop0_div", -+ "dummy_dclk_vop0_frac" }; -+PNAME(mux_dclk_vop1_p) = { "dclk_vop1_div", -+ "dummy_dclk_vop1_frac" }; ++static struct rockchip_clk_branch rk3128_i2s0_fracmux __initdata = ++ MUX(0, "i2s0_pre", mux_i2s0_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(9), 8, 2, MFLAGS); + -+PNAME(mux_clk_cif_p) = { "clk_cifout_src", "xin24m" }; ++static struct rockchip_clk_branch rk3128_i2s1_fracmux __initdata = ++ MUX(0, "i2s1_pre", mux_i2s1_pre_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(3), 8, 2, MFLAGS); + -+PNAME(mux_pll_src_24m_usbphy480m_p) = { "xin24m", "clk_usbphy_480m" }; -+PNAME(mux_pll_src_24m_pciephy_p) = { "xin24m", "clk_pciephy_ref100m" }; -+PNAME(mux_pciecore_cru_phy_p) = { "clk_pcie_core_cru", -+ "clk_pcie_core_phy" }; -+PNAME(mux_clk_testout1_p) = { "clk_testout1_pll_src", "xin24m" }; -+PNAME(mux_clk_testout2_p) = { "clk_testout2_pll_src", "xin24m" }; ++static struct rockchip_clk_branch rk3128_spdif_fracmux __initdata = ++ MUX(SCLK_SPDIF, "sclk_spdif", mux_sclk_spdif_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(6), 8, 2, MFLAGS); + -+PNAME(mux_usbphy_480m_p) = { "clk_usbphy0_480m_src", -+ "clk_usbphy1_480m_src" }; -+PNAME(mux_rmii_p) = { "clk_gmac", "clkin_gmac" }; -+PNAME(mux_spdif_p) = { "clk_spdif_div", "clk_spdif_frac", -+ "clkin_i2s", "xin12m" }; -+PNAME(mux_i2s0_p) = { "clk_i2s0_div", "clk_i2s0_frac", -+ "clkin_i2s", "xin12m" }; -+PNAME(mux_i2s1_p) = { "clk_i2s1_div", "clk_i2s1_frac", -+ "clkin_i2s", "xin12m" }; -+PNAME(mux_i2s2_p) = { "clk_i2s2_div", "clk_i2s2_frac", -+ "clkin_i2s", "xin12m" }; -+PNAME(mux_i2sch_p) = { "clk_i2s0", "clk_i2s1", -+ "clk_i2s2" }; -+PNAME(mux_i2sout_p) = { "clk_i2sout_src", "xin12m" }; ++static struct rockchip_clk_branch rk3128_uart0_fracmux __initdata = ++ MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(13), 8, 2, MFLAGS); + -+PNAME(mux_uart0_p) = { "xin24m", "clk_uart0_div", "clk_uart0_frac" }; -+PNAME(mux_uart1_p) = { "xin24m", "clk_uart1_div", "clk_uart1_frac" }; -+PNAME(mux_uart2_p) = { "xin24m", "clk_uart2_div", "clk_uart2_frac" }; -+PNAME(mux_uart3_p) = { "xin24m", "clk_uart3_div", "clk_uart3_frac" }; ++static struct rockchip_clk_branch rk3128_uart1_fracmux __initdata = ++ MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(14), 8, 2, MFLAGS); + -+/* PMU CRU parents */ -+PNAME(mux_ppll_24m_p) = { "ppll", "xin24m" }; -+PNAME(mux_24m_ppll_p) = { "xin24m", "ppll" }; -+PNAME(mux_fclk_cm0s_pmu_ppll_p) = { "fclk_cm0s_pmu_ppll_src", "xin24m" }; -+PNAME(mux_wifi_pmu_p) = { "clk_wifi_div", "clk_wifi_frac" }; -+PNAME(mux_uart4_pmu_p) = { "xin24m", "clk_uart4_div", -+ "clk_uart4_frac" }; -+PNAME(mux_clk_testout2_2io_p) = { "clk_testout2", "clk_32k_suspend_pmu" }; ++static struct rockchip_clk_branch rk3128_uart2_fracmux __initdata = ++ MUX(SCLK_UART2, "sclk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(15), 8, 2, MFLAGS); + -+static u32 uart_mux_idx[] = { 2, 0, 1 }; ++static struct rockchip_clk_branch common_clk_branches[] __initdata = { ++ /* ++ * Clock-Architecture Diagram 1 ++ */ + -+static struct rockchip_pll_clock rk3399_pll_clks[] __initdata = { -+ [lpll] = PLL(pll_rk3399, PLL_APLLL, "lpll", mux_pll_p, 0, RK3399_PLL_CON(0), -+ RK3399_PLL_CON(3), 8, 31, 0, rk3399_pll_rates), -+ [bpll] = PLL(pll_rk3399, PLL_APLLB, "bpll", mux_pll_p, 0, RK3399_PLL_CON(8), -+ RK3399_PLL_CON(11), 8, 31, 0, rk3399_pll_rates), -+ [dpll] = PLL(pll_rk3399, PLL_DPLL, "dpll", mux_pll_p, 0, RK3399_PLL_CON(16), -+ RK3399_PLL_CON(19), 8, 31, 0, NULL), -+#ifdef RK3399_TWO_PLL_FOR_VOP -+ [cpll] = PLL(pll_rk3399, PLL_CPLL, "cpll", mux_pll_p, 0, RK3399_PLL_CON(24), -+ RK3399_PLL_CON(27), 8, 31, 0, rk3399_pll_rates), -+#else -+ [cpll] = PLL(pll_rk3399, PLL_CPLL, "cpll", mux_pll_p, 0, RK3399_PLL_CON(24), -+ RK3399_PLL_CON(27), 8, 31, ROCKCHIP_PLL_SYNC_RATE, rk3399_pll_rates), -+#endif -+ [gpll] = PLL(pll_rk3399, PLL_GPLL, "gpll", mux_pll_p, 0, RK3399_PLL_CON(32), -+ RK3399_PLL_CON(35), 8, 31, 0, rk3399_pll_rates), -+ [npll] = PLL(pll_rk3399, PLL_NPLL, "npll", mux_pll_p, 0, RK3399_PLL_CON(40), -+ RK3399_PLL_CON(43), 8, 31, ROCKCHIP_PLL_SYNC_RATE, rk3399_pll_rates), -+ [vpll] = PLL(pll_rk3399, PLL_VPLL, "vpll", mux_pll_p, 0, RK3399_PLL_CON(48), -+ RK3399_PLL_CON(51), 8, 31, 0, rk3399_vpll_rates), -+}; ++ FACTOR(PLL_GPLL_DIV2, "gpll_div2", "gpll", 0, 1, 2), ++ FACTOR(PLL_GPLL_DIV3, "gpll_div3", "gpll", 0, 1, 3), + -+static struct rockchip_pll_clock rk3399_pmu_pll_clks[] __initdata = { -+ [ppll] = PLL(pll_rk3399, PLL_PPLL, "ppll", mux_pll_p, CLK_IS_CRITICAL, RK3399_PMU_PLL_CON(0), -+ RK3399_PMU_PLL_CON(3), 8, 31, ROCKCHIP_PLL_SYNC_RATE, rk3399_pll_rates), -+}; ++ DIV(0, "clk_24m", "xin24m", CLK_IGNORE_UNUSED, ++ RK2928_CLKSEL_CON(4), 8, 5, DFLAGS), + -+#define MFLAGS CLK_MUX_HIWORD_MASK -+#define DFLAGS CLK_DIVIDER_HIWORD_MASK -+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) -+#define IFLAGS ROCKCHIP_INVERTER_HIWORD_MASK ++ /* PD_DDR */ ++ GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(0), 2, GFLAGS), ++ GATE(0, "gpll_div2_ddr", "gpll_div2", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE_DDRCLK(SCLK_DDRC, "clk_ddrc", mux_ddrphy_p, 0, ++ RK2928_CLKSEL_CON(26), 8, 2, 0, 2, ++ ROCKCHIP_DDRCLK_SIP_V2), ++ FACTOR(0, "clk_ddrphy", "ddrphy2x", 0, 1, 2), + -+static struct rockchip_clk_branch rk3399_spdif_fracmux __initdata = -+ MUX(0, "clk_spdif_mux", mux_spdif_p, CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(32), 13, 2, MFLAGS); ++ /* PD_CORE */ ++ GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(0), 6, GFLAGS), ++ GATE(0, "gpll_div2_core", "gpll_div2", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(0), 6, GFLAGS), ++ COMPOSITE_NOMUX(0, "pclk_dbg", "armclk", CLK_IGNORE_UNUSED, ++ RK2928_CLKSEL_CON(1), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK2928_CLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE_NOMUX(0, "armcore", "armclk", CLK_IGNORE_UNUSED, ++ RK2928_CLKSEL_CON(1), 4, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK2928_CLKGATE_CON(0), 7, GFLAGS), + -+static struct rockchip_clk_branch rk3399_i2s0_fracmux __initdata = -+ MUX(0, "clk_i2s0_mux", mux_i2s0_p, CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(28), 8, 2, MFLAGS); ++ /* PD_MISC */ ++ MUX(SCLK_USB480M, "usb480m", mux_usb480m_p, CLK_SET_RATE_PARENT, ++ RK2928_MISC_CON, 15, 1, MFLAGS), + -+static struct rockchip_clk_branch rk3399_i2s1_fracmux __initdata = -+ MUX(0, "clk_i2s1_mux", mux_i2s1_p, CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(29), 8, 2, MFLAGS); ++ /* PD_CPU */ ++ COMPOSITE(0, "aclk_cpu_src", mux_aclk_cpu_src_p, CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(0), 13, 2, MFLAGS, 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(0), 1, GFLAGS), ++ GATE(ACLK_CPU, "aclk_cpu", "aclk_cpu_src", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(0), 3, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_CPU, "hclk_cpu", "aclk_cpu_src", CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(1), 8, 2, DFLAGS, ++ RK2928_CLKGATE_CON(0), 4, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_CPU, "pclk_cpu", "aclk_cpu_src", CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(1), 12, 2, DFLAGS, ++ RK2928_CLKGATE_CON(0), 5, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_CRYPTO, "clk_crypto", "aclk_cpu_src", 0, ++ RK2928_CLKSEL_CON(24), 0, 2, DFLAGS, ++ RK2928_CLKGATE_CON(0), 12, GFLAGS), + -+static struct rockchip_clk_branch rk3399_i2s2_fracmux __initdata = -+ MUX(0, "clk_i2s2_mux", mux_i2s2_p, CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(30), 8, 2, MFLAGS); ++ /* PD_VIDEO */ ++ COMPOSITE(ACLK_VEPU, "aclk_vepu", mux_pll_src_5plls_p, 0, ++ RK2928_CLKSEL_CON(32), 5, 3, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 9, GFLAGS), ++ FACTOR(HCLK_VEPU, "hclk_vepu", "aclk_vepu", 0, 1, 4), + -+static struct rockchip_clk_branch rk3399_uart0_fracmux __initdata = -+ MUXTBL(SCLK_UART0, "clk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(33), 8, 2, MFLAGS, uart_mux_idx); ++ COMPOSITE(ACLK_VDPU, "aclk_vdpu", mux_pll_src_5plls_p, 0, ++ RK2928_CLKSEL_CON(32), 13, 3, MFLAGS, 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 11, GFLAGS), ++ FACTOR_GATE(HCLK_VDPU, "hclk_vdpu", "aclk_vdpu", 0, 1, 4, ++ RK2928_CLKGATE_CON(3), 12, GFLAGS), + -+static struct rockchip_clk_branch rk3399_uart1_fracmux __initdata = -+ MUXTBL(SCLK_UART1, "clk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(34), 8, 2, MFLAGS, uart_mux_idx); ++ COMPOSITE(SCLK_HEVC_CORE, "sclk_hevc_core", mux_pll_src_5plls_p, 0, ++ RK2928_CLKSEL_CON(34), 13, 3, MFLAGS, 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 10, GFLAGS), + -+static struct rockchip_clk_branch rk3399_uart2_fracmux __initdata = -+ MUXTBL(SCLK_UART2, "clk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(35), 8, 2, MFLAGS, uart_mux_idx); ++ /* PD_VIO */ ++ COMPOSITE(ACLK_VIO0, "aclk_vio0", mux_pll_src_5plls_p, CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(31), 5, 3, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 0, GFLAGS), ++ COMPOSITE(ACLK_VIO1, "aclk_vio1", mux_pll_src_5plls_p, 0, ++ RK2928_CLKSEL_CON(31), 13, 3, MFLAGS, 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(1), 4, GFLAGS), ++ FACTOR_GATE(HCLK_VIO, "hclk_vio", "aclk_vio0", CLK_IS_CRITICAL, 1, 4, ++ RK2928_CLKGATE_CON(0), 11, GFLAGS), + -+static struct rockchip_clk_branch rk3399_uart3_fracmux __initdata = -+ MUXTBL(SCLK_UART3, "clk_uart3", mux_uart3_p, CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(36), 8, 2, MFLAGS, uart_mux_idx); ++ /* PD_PERI */ ++ COMPOSITE(0, "aclk_peri_src", mux_aclk_peri_src_p, 0, ++ RK2928_CLKSEL_CON(10), 14, 2, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(2), 0, GFLAGS), + -+static struct rockchip_clk_branch rk3399_uart4_pmu_fracmux __initdata = -+ MUXTBL(SCLK_UART4_PMU, "clk_uart4_pmu", mux_uart4_pmu_p, CLK_SET_RATE_PARENT, -+ RK3399_PMU_CLKSEL_CON(5), 8, 2, MFLAGS, uart_mux_idx); ++ COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(10), 12, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, ++ RK2928_CLKGATE_CON(2), 3, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(10), 8, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, ++ RK2928_CLKGATE_CON(2), 2, GFLAGS), ++ GATE(ACLK_PERI, "aclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(2), 1, GFLAGS), + -+static struct rockchip_clk_branch rk3399_dclk_vop0_fracmux __initdata = -+ MUX(DCLK_VOP0, "dclk_vop0", mux_dclk_vop0_p, CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(49), 11, 1, MFLAGS); ++ GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0, ++ RK2928_CLKGATE_CON(10), 3, GFLAGS), ++ GATE(SCLK_TIMER1, "sclk_timer1", "xin24m", 0, ++ RK2928_CLKGATE_CON(10), 4, GFLAGS), ++ GATE(SCLK_TIMER2, "sclk_timer2", "xin24m", 0, ++ RK2928_CLKGATE_CON(10), 5, GFLAGS), ++ GATE(SCLK_TIMER3, "sclk_timer3", "xin24m", 0, ++ RK2928_CLKGATE_CON(10), 6, GFLAGS), ++ GATE(SCLK_TIMER4, "sclk_timer4", "xin24m", 0, ++ RK2928_CLKGATE_CON(10), 7, GFLAGS), ++ GATE(SCLK_TIMER5, "sclk_timer5", "xin24m", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(10), 8, GFLAGS), + -+static struct rockchip_clk_branch rk3399_dclk_vop1_fracmux __initdata = -+ MUX(DCLK_VOP1, "dclk_vop1", mux_dclk_vop1_p, CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(50), 11, 1, MFLAGS); ++ GATE(SCLK_PVTM_CORE, "clk_pvtm_core", "xin24m", 0, ++ RK2928_CLKGATE_CON(10), 0, GFLAGS), ++ GATE(SCLK_PVTM_GPU, "clk_pvtm_gpu", "xin24m", 0, ++ RK2928_CLKGATE_CON(10), 1, GFLAGS), ++ GATE(SCLK_PVTM_FUNC, "clk_pvtm_func", "xin24m", 0, ++ RK2928_CLKGATE_CON(10), 2, GFLAGS), ++ GATE(SCLK_MIPI_24M, "clk_mipi_24m", "xin24m", 0, ++ RK2928_CLKGATE_CON(2), 15, GFLAGS), + -+static struct rockchip_clk_branch rk3399_pmuclk_wifi_fracmux __initdata = -+ MUX(SCLK_WIFI_PMU, "clk_wifi_pmu", mux_wifi_pmu_p, CLK_SET_RATE_PARENT, -+ RK3399_PMU_CLKSEL_CON(1), 14, 1, MFLAGS); ++ COMPOSITE(SCLK_SDMMC, "sclk_sdmmc", mux_mmc_src_p, 0, ++ RK2928_CLKSEL_CON(11), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK2928_CLKGATE_CON(2), 11, GFLAGS), + -+static const struct rockchip_cpuclk_reg_data rk3399_cpuclkl_data = { -+ .core_reg[0] = RK3399_CLKSEL_CON(0), -+ .div_core_shift[0] = 0, -+ .div_core_mask[0] = 0x1f, -+ .num_cores = 1, -+ .mux_core_alt = 3, -+ .mux_core_main = 0, -+ .mux_core_shift = 6, -+ .mux_core_mask = 0x3, -+}; ++ COMPOSITE(SCLK_SDIO, "sclk_sdio", mux_mmc_src_p, 0, ++ RK2928_CLKSEL_CON(12), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK2928_CLKGATE_CON(2), 13, GFLAGS), + -+static const struct rockchip_cpuclk_reg_data rk3399_cpuclkb_data = { -+ .core_reg[0] = RK3399_CLKSEL_CON(2), -+ .div_core_shift[0] = 0, -+ .div_core_mask[0] = 0x1f, -+ .num_cores = 1, -+ .mux_core_alt = 3, -+ .mux_core_main = 1, -+ .mux_core_shift = 6, -+ .mux_core_mask = 0x3, -+}; ++ COMPOSITE(SCLK_EMMC, "sclk_emmc", mux_mmc_src_p, 0, ++ RK2928_CLKSEL_CON(12), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK2928_CLKGATE_CON(2), 14, GFLAGS), + -+#define RK3399_DIV_ACLKM_MASK 0x1f -+#define RK3399_DIV_ACLKM_SHIFT 8 -+#define RK3399_DIV_ATCLK_MASK 0x1f -+#define RK3399_DIV_ATCLK_SHIFT 0 -+#define RK3399_DIV_PCLK_DBG_MASK 0x1f -+#define RK3399_DIV_PCLK_DBG_SHIFT 8 ++ DIV(SCLK_PVTM, "clk_pvtm", "clk_pvtm_func", 0, ++ RK2928_CLKSEL_CON(2), 0, 7, DFLAGS), + -+#define RK3399_CLKSEL0(_offs, _aclkm) \ -+ { \ -+ .reg = RK3399_CLKSEL_CON(0 + _offs), \ -+ .val = HIWORD_UPDATE(_aclkm, RK3399_DIV_ACLKM_MASK, \ -+ RK3399_DIV_ACLKM_SHIFT), \ -+ } -+#define RK3399_CLKSEL1(_offs, _atclk, _pdbg) \ -+ { \ -+ .reg = RK3399_CLKSEL_CON(1 + _offs), \ -+ .val = HIWORD_UPDATE(_atclk, RK3399_DIV_ATCLK_MASK, \ -+ RK3399_DIV_ATCLK_SHIFT) | \ -+ HIWORD_UPDATE(_pdbg, RK3399_DIV_PCLK_DBG_MASK, \ -+ RK3399_DIV_PCLK_DBG_SHIFT), \ -+ } ++ /* ++ * Clock-Architecture Diagram 2 ++ */ ++ COMPOSITE(DCLK_VOP, "dclk_vop", mux_sclk_vop_src_p, 0, ++ RK2928_CLKSEL_CON(27), 0, 2, MFLAGS, 8, 8, DFLAGS, ++ RK2928_CLKGATE_CON(3), 1, GFLAGS), ++ COMPOSITE(SCLK_VOP, "sclk_vop", mux_sclk_vop_src_p, 0, ++ RK2928_CLKSEL_CON(28), 0, 2, MFLAGS, 8, 8, DFLAGS, ++ RK2928_CLKGATE_CON(3), 2, GFLAGS), ++ COMPOSITE(DCLK_EBC, "dclk_ebc", mux_pll_src_3plls_p, 0, ++ RK2928_CLKSEL_CON(23), 0, 2, MFLAGS, 8, 8, DFLAGS, ++ RK2928_CLKGATE_CON(3), 4, GFLAGS), + -+/* cluster_l: aclkm in clksel0, rest in clksel1 */ -+#define RK3399_CPUCLKL_RATE(_prate, _aclkm, _atclk, _pdbg) \ -+ { \ -+ .prate = _prate##U, \ -+ .divs = { \ -+ RK3399_CLKSEL0(0, _aclkm), \ -+ RK3399_CLKSEL1(0, _atclk, _pdbg), \ -+ }, \ -+ } ++ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), + -+/* cluster_b: aclkm in clksel2, rest in clksel3 */ -+#define RK3399_CPUCLKB_RATE(_prate, _aclkm, _atclk, _pdbg) \ -+ { \ -+ .prate = _prate##U, \ -+ .divs = { \ -+ RK3399_CLKSEL0(2, _aclkm), \ -+ RK3399_CLKSEL1(2, _atclk, _pdbg), \ -+ }, \ -+ } ++ COMPOSITE_NODIV(SCLK_CIF_SRC, "sclk_cif_src", mux_pll_src_4plls_p, 0, ++ RK2928_CLKSEL_CON(29), 0, 2, MFLAGS, ++ RK2928_CLKGATE_CON(3), 7, GFLAGS), ++ MUX(SCLK_CIF_OUT_SRC, "sclk_cif_out_src", mux_clk_cif_out_src_p, 0, ++ RK2928_CLKSEL_CON(29), 7, 1, MFLAGS), ++ DIV(SCLK_CIF_OUT, "sclk_cif_out", "sclk_cif_out_src", 0, ++ RK2928_CLKSEL_CON(29), 2, 5, DFLAGS), + -+static struct rockchip_cpuclk_rate_table rk3399_cpuclkl_rates[] __initdata = { -+ RK3399_CPUCLKL_RATE(1800000000, 1, 8, 8), -+ RK3399_CPUCLKL_RATE(1704000000, 1, 8, 8), -+ RK3399_CPUCLKL_RATE(1608000000, 1, 7, 7), -+ RK3399_CPUCLKL_RATE(1512000000, 1, 7, 7), -+ RK3399_CPUCLKL_RATE(1488000000, 1, 6, 6), -+ RK3399_CPUCLKL_RATE(1416000000, 1, 6, 6), -+ RK3399_CPUCLKL_RATE(1200000000, 1, 5, 5), -+ RK3399_CPUCLKL_RATE(1008000000, 1, 5, 5), -+ RK3399_CPUCLKL_RATE( 816000000, 1, 4, 4), -+ RK3399_CPUCLKL_RATE( 696000000, 1, 3, 3), -+ RK3399_CPUCLKL_RATE( 600000000, 1, 3, 3), -+ RK3399_CPUCLKL_RATE( 408000000, 1, 2, 2), -+ RK3399_CPUCLKL_RATE( 312000000, 1, 1, 1), -+ RK3399_CPUCLKL_RATE( 216000000, 1, 1, 1), -+ RK3399_CPUCLKL_RATE( 96000000, 1, 1, 1), -+}; ++ COMPOSITE(0, "i2s0_src", mux_pll_src_3plls_p, 0, ++ RK2928_CLKSEL_CON(9), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(4), 4, GFLAGS), ++ COMPOSITE_FRACMUX(0, "i2s0_frac", "i2s0_src", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(8), 0, ++ RK2928_CLKGATE_CON(4), 5, GFLAGS, ++ &rk3128_i2s0_fracmux), ++ GATE(SCLK_I2S0, "sclk_i2s0", "i2s0_pre", CLK_SET_RATE_PARENT, ++ RK2928_CLKGATE_CON(4), 6, GFLAGS), + -+static struct rockchip_cpuclk_rate_table rk3399_cpuclkb_rates[] __initdata = { -+ RK3399_CPUCLKB_RATE(2208000000, 1, 11, 11), -+ RK3399_CPUCLKB_RATE(2184000000, 1, 11, 11), -+ RK3399_CPUCLKB_RATE(2088000000, 1, 10, 10), -+ RK3399_CPUCLKB_RATE(2040000000, 1, 10, 10), -+ RK3399_CPUCLKB_RATE(2016000000, 1, 9, 9), -+ RK3399_CPUCLKB_RATE(1992000000, 1, 9, 9), -+ RK3399_CPUCLKB_RATE(1896000000, 1, 9, 9), -+ RK3399_CPUCLKB_RATE(1800000000, 1, 8, 8), -+ RK3399_CPUCLKB_RATE(1704000000, 1, 8, 8), -+ RK3399_CPUCLKB_RATE(1608000000, 1, 7, 7), -+ RK3399_CPUCLKB_RATE(1512000000, 1, 7, 7), -+ RK3399_CPUCLKB_RATE(1488000000, 1, 6, 6), -+ RK3399_CPUCLKB_RATE(1416000000, 1, 6, 6), -+ RK3399_CPUCLKB_RATE(1200000000, 1, 5, 5), -+ RK3399_CPUCLKB_RATE(1008000000, 1, 5, 5), -+ RK3399_CPUCLKB_RATE( 816000000, 1, 4, 4), -+ RK3399_CPUCLKB_RATE( 696000000, 1, 3, 3), -+ RK3399_CPUCLKB_RATE( 600000000, 1, 3, 3), -+ RK3399_CPUCLKB_RATE( 408000000, 1, 2, 2), -+ RK3399_CPUCLKB_RATE( 312000000, 1, 1, 1), -+ RK3399_CPUCLKB_RATE( 216000000, 1, 1, 1), -+ RK3399_CPUCLKB_RATE( 96000000, 1, 1, 1), -+}; ++ COMPOSITE(0, "i2s1_src", mux_pll_src_3plls_p, 0, ++ RK2928_CLKSEL_CON(3), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(0), 9, GFLAGS), ++ COMPOSITE_FRACMUX(0, "i2s1_frac", "i2s1_src", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(7), 0, ++ RK2928_CLKGATE_CON(0), 10, GFLAGS, ++ &rk3128_i2s1_fracmux), ++ GATE(SCLK_I2S1, "sclk_i2s1", "i2s1_pre", CLK_SET_RATE_PARENT, ++ RK2928_CLKGATE_CON(0), 14, GFLAGS), ++ COMPOSITE_NODIV(SCLK_I2S_OUT, "i2s_out", mux_i2s_out_p, 0, ++ RK2928_CLKSEL_CON(3), 12, 1, MFLAGS, ++ RK2928_CLKGATE_CON(0), 13, GFLAGS), + -+static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = { -+ /* -+ * CRU Clock-Architecture -+ */ ++ COMPOSITE(0, "sclk_spdif_src", mux_pll_src_3plls_p, 0, ++ RK2928_CLKSEL_CON(6), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(2), 10, GFLAGS), ++ COMPOSITE_FRACMUX(0, "spdif_frac", "sclk_spdif_src", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(20), 0, ++ RK2928_CLKGATE_CON(2), 12, GFLAGS, ++ &rk3128_spdif_fracmux), + -+ /* usbphy */ -+ GATE(SCLK_USB2PHY0_REF, "clk_usb2phy0_ref", "xin24m", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(6), 5, GFLAGS), -+ GATE(SCLK_USB2PHY1_REF, "clk_usb2phy1_ref", "xin24m", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(6), 6, GFLAGS), ++ GATE(0, "jtag", "ext_jtag", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(1), 3, GFLAGS), + -+ GATE(SCLK_USBPHY0_480M_SRC, "clk_usbphy0_480m_src", "clk_usbphy0_480m", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(13), 12, GFLAGS), -+ GATE(SCLK_USBPHY1_480M_SRC, "clk_usbphy1_480m_src", "clk_usbphy1_480m", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(13), 12, GFLAGS), -+ MUX(0, "clk_usbphy_480m", mux_usbphy_480m_p, 0, -+ RK3399_CLKSEL_CON(14), 6, 1, MFLAGS), ++ GATE(SCLK_OTGPHY0, "sclk_otgphy0", "xin12m", 0, ++ RK2928_CLKGATE_CON(1), 5, GFLAGS), ++ GATE(SCLK_OTGPHY1, "sclk_otgphy1", "xin12m", 0, ++ RK2928_CLKGATE_CON(1), 6, GFLAGS), + -+ MUX(0, "upll", mux_pll_src_24m_usbphy480m_p, 0, -+ RK3399_CLKSEL_CON(14), 15, 1, MFLAGS), ++ COMPOSITE_NOMUX(SCLK_SARADC, "sclk_saradc", "xin24m", 0, ++ RK2928_CLKSEL_CON(24), 8, 8, DFLAGS, ++ RK2928_CLKGATE_CON(2), 8, GFLAGS), + -+ COMPOSITE_NODIV(SCLK_HSICPHY, "clk_hsicphy", mux_pll_src_cpll_gpll_npll_usbphy480m_p, 0, -+ RK3399_CLKSEL_CON(19), 0, 2, MFLAGS, -+ RK3399_CLKGATE_CON(6), 4, GFLAGS), ++ COMPOSITE(ACLK_GPU, "aclk_gpu", mux_pll_src_5plls_p, 0, ++ RK2928_CLKSEL_CON(34), 5, 3, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 13, GFLAGS), + -+ COMPOSITE(ACLK_USB3, "aclk_usb3", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3399_CLKSEL_CON(39), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(12), 0, GFLAGS), -+ GATE(ACLK_USB3_NOC, "aclk_usb3_noc", "aclk_usb3", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(30), 0, GFLAGS), -+ GATE(ACLK_USB3OTG0, "aclk_usb3otg0", "aclk_usb3", 0, -+ RK3399_CLKGATE_CON(30), 1, GFLAGS), -+ GATE(ACLK_USB3OTG1, "aclk_usb3otg1", "aclk_usb3", 0, -+ RK3399_CLKGATE_CON(30), 2, GFLAGS), -+ GATE(ACLK_USB3_RKSOC_AXI_PERF, "aclk_usb3_rksoc_axi_perf", "aclk_usb3", 0, -+ RK3399_CLKGATE_CON(30), 3, GFLAGS), -+ GATE(ACLK_USB3_GRF, "aclk_usb3_grf", "aclk_usb3", 0, -+ RK3399_CLKGATE_CON(30), 4, GFLAGS), ++ COMPOSITE(SCLK_SPI0, "sclk_spi0", mux_pll_src_3plls_p, 0, ++ RK2928_CLKSEL_CON(25), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(2), 9, GFLAGS), + -+ GATE(SCLK_USB3OTG0_REF, "clk_usb3otg0_ref", "xin24m", 0, -+ RK3399_CLKGATE_CON(12), 1, GFLAGS), -+ GATE(SCLK_USB3OTG1_REF, "clk_usb3otg1_ref", "xin24m", 0, -+ RK3399_CLKGATE_CON(12), 2, GFLAGS), ++ /* PD_UART */ ++ COMPOSITE(0, "uart0_src", mux_pll_src_4plls_p, 0, ++ RK2928_CLKSEL_CON(13), 12, 2, MFLAGS, 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(1), 8, GFLAGS), ++ MUX(0, "uart12_src", mux_pll_src_4plls_p, 0, ++ RK2928_CLKSEL_CON(13), 14, 2, MFLAGS), ++ COMPOSITE_NOMUX(0, "uart1_src", "uart12_src", 0, ++ RK2928_CLKSEL_CON(14), 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(1), 10, GFLAGS), ++ COMPOSITE_NOMUX(0, "uart2_src", "uart12_src", 0, ++ RK2928_CLKSEL_CON(15), 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(1), 13, GFLAGS), ++ COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(17), 0, ++ RK2928_CLKGATE_CON(1), 9, GFLAGS, ++ &rk3128_uart0_fracmux), ++ COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_src", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(18), 0, ++ RK2928_CLKGATE_CON(1), 11, GFLAGS, ++ &rk3128_uart1_fracmux), ++ COMPOSITE_FRACMUX(0, "uart2_frac", "uart2_src", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(19), 0, ++ RK2928_CLKGATE_CON(1), 13, GFLAGS, ++ &rk3128_uart2_fracmux), + -+ COMPOSITE(SCLK_USB3OTG0_SUSPEND, "clk_usb3otg0_suspend", mux_pll_p, 0, -+ RK3399_CLKSEL_CON(40), 15, 1, MFLAGS, 0, 10, DFLAGS, -+ RK3399_CLKGATE_CON(12), 3, GFLAGS), ++ COMPOSITE(SCLK_MAC_SRC, "sclk_gmac_src", mux_pll_src_3plls_p, 0, ++ RK2928_CLKSEL_CON(5), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(1), 7, GFLAGS), ++ MUX(SCLK_MAC, "sclk_gmac", mux_sclk_gmac_p, 0, ++ RK2928_CLKSEL_CON(5), 15, 1, MFLAGS), ++ GATE(SCLK_MAC_REFOUT, "sclk_mac_refout", "sclk_gmac", 0, ++ RK2928_CLKGATE_CON(2), 5, GFLAGS), ++ GATE(SCLK_MAC_REF, "sclk_mac_ref", "sclk_gmac", 0, ++ RK2928_CLKGATE_CON(2), 4, GFLAGS), ++ GATE(SCLK_MAC_RX, "sclk_mac_rx", "sclk_gmac", 0, ++ RK2928_CLKGATE_CON(2), 6, GFLAGS), ++ GATE(SCLK_MAC_TX, "sclk_mac_tx", "sclk_gmac", 0, ++ RK2928_CLKGATE_CON(2), 7, GFLAGS), + -+ COMPOSITE(SCLK_USB3OTG1_SUSPEND, "clk_usb3otg1_suspend", mux_pll_p, 0, -+ RK3399_CLKSEL_CON(41), 15, 1, MFLAGS, 0, 10, DFLAGS, -+ RK3399_CLKGATE_CON(12), 4, GFLAGS), ++ COMPOSITE(SCLK_TSP, "sclk_tsp", mux_pll_src_3plls_p, 0, ++ RK2928_CLKSEL_CON(4), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(1), 14, GFLAGS), ++ GATE(SCLK_HSADC_TSP, "sclk_hsadc_tsp", "ext_hsadc_tsp", 0, ++ RK2928_CLKGATE_CON(10), 13, GFLAGS), + -+ COMPOSITE(SCLK_UPHY0_TCPDPHY_REF, "clk_uphy0_tcpdphy_ref", mux_pll_p, 0, -+ RK3399_CLKSEL_CON(64), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(13), 4, GFLAGS), ++ COMPOSITE(SCLK_NANDC, "sclk_nandc", mux_pll_src_3plls_p, 0, ++ RK2928_CLKSEL_CON(2), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(10), 15, GFLAGS), + -+ COMPOSITE(SCLK_UPHY0_TCPDCORE, "clk_uphy0_tcpdcore", mux_pll_src_24m_32k_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(64), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(13), 5, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_PMU_PRE, "pclk_pmu_pre", "cpll", CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(29), 8, 6, DFLAGS, ++ RK2928_CLKGATE_CON(1), 0, GFLAGS), + -+ COMPOSITE(SCLK_UPHY1_TCPDPHY_REF, "clk_uphy1_tcpdphy_ref", mux_pll_p, 0, -+ RK3399_CLKSEL_CON(65), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(13), 6, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 3 ++ */ + -+ COMPOSITE(SCLK_UPHY1_TCPDCORE, "clk_uphy1_tcpdcore", mux_pll_src_24m_32k_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(65), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(13), 7, GFLAGS), ++ /* PD_VOP */ ++ GATE(ACLK_LCDC0, "aclk_lcdc0", "aclk_vio0", 0, RK2928_CLKGATE_CON(6), 0, GFLAGS), ++ GATE(ACLK_CIF, "aclk_cif", "aclk_vio0", 0, RK2928_CLKGATE_CON(6), 5, GFLAGS), ++ GATE(ACLK_RGA, "aclk_rga", "aclk_vio0", 0, RK2928_CLKGATE_CON(6), 11, GFLAGS), ++ GATE(0, "aclk_vio0_niu", "aclk_vio0", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(6), 13, GFLAGS), + -+ /* little core */ -+ GATE(0, "clk_core_l_lpll_src", "lpll", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(0), 0, GFLAGS), -+ GATE(0, "clk_core_l_bpll_src", "bpll", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(0), 1, GFLAGS), -+ GATE(0, "clk_core_l_dpll_src", "dpll", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(0), 2, GFLAGS), -+ GATE(0, "clk_core_l_gpll_src", "gpll", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(0), 3, GFLAGS), ++ GATE(ACLK_IEP, "aclk_iep", "aclk_vio1", 0, RK2928_CLKGATE_CON(9), 8, GFLAGS), ++ GATE(0, "aclk_vio1_niu", "aclk_vio1", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 10, GFLAGS), + -+ COMPOSITE_NOMUX(0, "aclkm_core_l", "armclkl", CLK_IGNORE_UNUSED, -+ RK3399_CLKSEL_CON(0), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3399_CLKGATE_CON(0), 4, GFLAGS), -+ COMPOSITE_NOMUX(0, "atclk_core_l", "armclkl", CLK_IGNORE_UNUSED, -+ RK3399_CLKSEL_CON(1), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3399_CLKGATE_CON(0), 5, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_COREDBG_L, "pclk_dbg_core_l", "armclkl", CLK_IGNORE_UNUSED, -+ RK3399_CLKSEL_CON(1), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3399_CLKGATE_CON(0), 6, GFLAGS), ++ GATE(HCLK_VIO_H2P, "hclk_vio_h2p", "hclk_vio", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(9), 5, GFLAGS), ++ GATE(PCLK_MIPI, "pclk_mipi", "hclk_vio", 0, RK2928_CLKGATE_CON(9), 6, GFLAGS), ++ GATE(HCLK_RGA, "hclk_rga", "hclk_vio", 0, RK2928_CLKGATE_CON(6), 10, GFLAGS), ++ GATE(HCLK_LCDC0, "hclk_lcdc0", "hclk_vio", 0, RK2928_CLKGATE_CON(6), 1, GFLAGS), ++ GATE(HCLK_IEP, "hclk_iep", "hclk_vio", 0, RK2928_CLKGATE_CON(9), 7, GFLAGS), ++ GATE(0, "hclk_vio_niu", "hclk_vio", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(6), 12, GFLAGS), ++ GATE(HCLK_CIF, "hclk_cif", "hclk_vio", 0, RK2928_CLKGATE_CON(6), 4, GFLAGS), ++ GATE(HCLK_EBC, "hclk_ebc", "hclk_vio", 0, RK2928_CLKGATE_CON(9), 9, GFLAGS), + -+ GATE(ACLK_CORE_ADB400_CORE_L_2_CCI500, "aclk_core_adb400_core_l_2_cci500", "aclkm_core_l", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(14), 12, GFLAGS), -+ GATE(ACLK_PERF_CORE_L, "aclk_perf_core_l", "aclkm_core_l", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(14), 13, GFLAGS), ++ /* PD_PERI */ ++ GATE(0, "aclk_peri_axi", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 3, GFLAGS), ++ GATE(ACLK_GMAC, "aclk_gmac", "aclk_peri", 0, RK2928_CLKGATE_CON(10), 10, GFLAGS), ++ GATE(ACLK_DMAC, "aclk_dmac", "aclk_peri", 0, RK2928_CLKGATE_CON(5), 1, GFLAGS), ++ GATE(0, "aclk_peri_niu", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 15, GFLAGS), ++ GATE(0, "aclk_cpu_to_peri", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 2, GFLAGS), + -+ GATE(0, "clk_dbg_pd_core_l", "armclkl", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(14), 9, GFLAGS), -+ GATE(ACLK_GIC_ADB400_GIC_2_CORE_L, "aclk_core_adb400_gic_2_core_l", "armclkl", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(14), 10, GFLAGS), -+ GATE(ACLK_GIC_ADB400_CORE_L_2_GIC, "aclk_core_adb400_core_l_2_gic", "armclkl", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(14), 11, GFLAGS), -+ GATE(SCLK_PVTM_CORE_L, "clk_pvtm_core_l", "xin24m", 0, -+ RK3399_CLKGATE_CON(0), 7, GFLAGS), ++ GATE(HCLK_I2S_8CH, "hclk_i2s_8ch", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 4, GFLAGS), ++ GATE(0, "hclk_peri_matrix", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 0, GFLAGS), ++ GATE(HCLK_I2S_2CH, "hclk_i2s_2ch", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 2, GFLAGS), ++ GATE(0, "hclk_usb_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 13, GFLAGS), ++ GATE(HCLK_HOST2, "hclk_host2", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 3, GFLAGS), ++ GATE(HCLK_OTG, "hclk_otg", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 13, GFLAGS), ++ GATE(0, "hclk_peri_ahb", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 14, GFLAGS), ++ GATE(HCLK_SPDIF, "hclk_spdif", "hclk_peri", 0, RK2928_CLKGATE_CON(10), 9, GFLAGS), ++ GATE(HCLK_TSP, "hclk_tsp", "hclk_peri", 0, RK2928_CLKGATE_CON(10), 12, GFLAGS), ++ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 10, GFLAGS), ++ GATE(HCLK_SDIO, "hclk_sdio", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 11, GFLAGS), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 0, GFLAGS), ++ GATE(0, "hclk_emmc_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(3), 6, GFLAGS), ++ GATE(HCLK_NANDC, "hclk_nandc", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 9, GFLAGS), ++ GATE(HCLK_USBHOST, "hclk_usbhost", "hclk_peri", 0, RK2928_CLKGATE_CON(10), 14, GFLAGS), ++ GATE(HCLK_SFC, "hclk_sfc", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 1, GFLAGS), + -+ /* big core */ -+ GATE(0, "clk_core_b_lpll_src", "lpll", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(1), 0, GFLAGS), -+ GATE(0, "clk_core_b_bpll_src", "bpll", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(1), 1, GFLAGS), -+ GATE(0, "clk_core_b_dpll_src", "dpll", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(1), 2, GFLAGS), -+ GATE(0, "clk_core_b_gpll_src", "gpll", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(1), 3, GFLAGS), ++ GATE(PCLK_SIM_CARD, "pclk_sim_card", "pclk_peri", 0, RK2928_CLKGATE_CON(9), 12, GFLAGS), ++ GATE(PCLK_GMAC, "pclk_gmac", "pclk_peri", 0, RK2928_CLKGATE_CON(10), 11, GFLAGS), ++ GATE(0, "pclk_peri_axi", "pclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 1, GFLAGS), ++ GATE(PCLK_SPI0, "pclk_spi0", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 12, GFLAGS), ++ GATE(PCLK_UART0, "pclk_uart0", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 0, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 1, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 2, GFLAGS), ++ GATE(PCLK_PWM, "pclk_pwm", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 10, GFLAGS), ++ GATE(PCLK_WDT, "pclk_wdt", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 15, GFLAGS), ++ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 4, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 5, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 6, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 7, GFLAGS), ++ GATE(PCLK_SARADC, "pclk_saradc", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 14, GFLAGS), ++ GATE(PCLK_EFUSE, "pclk_efuse", "pclk_peri", 0, RK2928_CLKGATE_CON(5), 2, GFLAGS), ++ GATE(PCLK_TIMER, "pclk_timer", "pclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(7), 7, GFLAGS), ++ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 9, GFLAGS), ++ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 10, GFLAGS), ++ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 11, GFLAGS), ++ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 12, GFLAGS), + -+ COMPOSITE_NOMUX(0, "aclkm_core_b", "armclkb", CLK_IGNORE_UNUSED, -+ RK3399_CLKSEL_CON(2), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3399_CLKGATE_CON(1), 4, GFLAGS), -+ COMPOSITE_NOMUX(0, "atclk_core_b", "armclkb", CLK_IGNORE_UNUSED, -+ RK3399_CLKSEL_CON(3), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3399_CLKGATE_CON(1), 5, GFLAGS), -+ COMPOSITE_NOMUX(0, "pclk_dbg_core_b", "armclkb", CLK_IGNORE_UNUSED, -+ RK3399_CLKSEL_CON(3), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3399_CLKGATE_CON(1), 6, GFLAGS), ++ /* PD_BUS */ ++ GATE(0, "aclk_initmem", "aclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 12, GFLAGS), ++ GATE(0, "aclk_strc_sys", "aclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 10, GFLAGS), + -+ GATE(ACLK_CORE_ADB400_CORE_B_2_CCI500, "aclk_core_adb400_core_b_2_cci500", "aclkm_core_b", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(14), 5, GFLAGS), -+ GATE(ACLK_PERF_CORE_B, "aclk_perf_core_b", "aclkm_core_b", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(14), 6, GFLAGS), ++ GATE(0, "hclk_rom", "hclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 6, GFLAGS), ++ GATE(HCLK_CRYPTO, "hclk_crypto", "hclk_cpu", 0, RK2928_CLKGATE_CON(3), 5, GFLAGS), + -+ GATE(0, "clk_dbg_pd_core_b", "armclkb", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(14), 1, GFLAGS), -+ GATE(ACLK_GIC_ADB400_GIC_2_CORE_B, "aclk_core_adb400_gic_2_core_b", "armclkb", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(14), 3, GFLAGS), -+ GATE(ACLK_GIC_ADB400_CORE_B_2_GIC, "aclk_core_adb400_core_b_2_gic", "armclkb", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(14), 4, GFLAGS), ++ GATE(PCLK_ACODEC, "pclk_acodec", "pclk_cpu", 0, RK2928_CLKGATE_CON(5), 14, GFLAGS), ++ GATE(0, "pclk_ddrupctl", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 7, GFLAGS), ++ GATE(0, "pclk_grf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 4, GFLAGS), ++ GATE(PCLK_MIPIPHY, "pclk_mipiphy", "pclk_cpu", 0, RK2928_CLKGATE_CON(5), 0, GFLAGS), + -+ DIV(PCLK_COREDBG_B, "pclken_dbg_core_b", "pclk_dbg_core_b", CLK_IGNORE_UNUSED, -+ RK3399_CLKSEL_CON(3), 13, 2, DFLAGS | CLK_DIVIDER_READ_ONLY), ++ GATE(0, "pclk_pmu", "pclk_pmu_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(9), 2, GFLAGS), ++ GATE(0, "pclk_pmu_niu", "pclk_pmu_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(9), 3, GFLAGS), + -+ GATE(0, "pclk_dbg_cxcs_pd_core_b", "pclk_dbg_core_b", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(14), 2, GFLAGS), ++ /* PD_MMC */ ++ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "sclk_sdmmc", RK3228_SDMMC_CON0, 1), ++ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RK3228_SDMMC_CON1, 0), + -+ GATE(SCLK_PVTM_CORE_B, "clk_pvtm_core_b", "xin24m", 0, -+ RK3399_CLKGATE_CON(1), 7, GFLAGS), ++ MMC(SCLK_SDIO_DRV, "sdio_drv", "sclk_sdio", RK3228_SDIO_CON0, 1), ++ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "sclk_sdio", RK3228_SDIO_CON1, 0), + -+ /* gmac */ -+ GATE(0, "cpll_aclk_gmac_src", "cpll", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(6), 9, GFLAGS), -+ GATE(0, "gpll_aclk_gmac_src", "gpll", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(6), 8, GFLAGS), -+ COMPOSITE(0, "aclk_gmac_pre", mux_aclk_gmac_p, 0, -+ RK3399_CLKSEL_CON(20), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(6), 10, GFLAGS), ++ MMC(SCLK_EMMC_DRV, "emmc_drv", "sclk_emmc", RK3228_EMMC_CON0, 1), ++ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "sclk_emmc", RK3228_EMMC_CON1, 0), ++}; + -+ GATE(ACLK_GMAC, "aclk_gmac", "aclk_gmac_pre", 0, -+ RK3399_CLKGATE_CON(32), 0, GFLAGS), -+ GATE(ACLK_GMAC_NOC, "aclk_gmac_noc", "aclk_gmac_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(32), 1, GFLAGS), -+ GATE(ACLK_PERF_GMAC, "aclk_perf_gmac", "aclk_gmac_pre", 0, -+ RK3399_CLKGATE_CON(32), 4, GFLAGS), ++static struct rockchip_clk_branch rk3126_clk_branches[] __initdata = { ++ GATE(0, "pclk_stimer", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(3), 15, GFLAGS), ++ GATE(0, "pclk_s_efuse", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(3), 14, GFLAGS), ++ GATE(0, "pclk_sgrf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(3), 8, GFLAGS), ++}; + -+ COMPOSITE_NOMUX(0, "pclk_gmac_pre", "aclk_gmac_pre", 0, -+ RK3399_CLKSEL_CON(19), 8, 3, DFLAGS, -+ RK3399_CLKGATE_CON(6), 11, GFLAGS), -+ GATE(PCLK_GMAC, "pclk_gmac", "pclk_gmac_pre", 0, -+ RK3399_CLKGATE_CON(32), 2, GFLAGS), -+ GATE(PCLK_GMAC_NOC, "pclk_gmac_noc", "pclk_gmac_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(32), 3, GFLAGS), ++static struct rockchip_clk_branch rk3128_clk_branches[] __initdata = { ++ COMPOSITE(SCLK_SFC, "sclk_sfc", mux_sclk_sfc_src_p, 0, ++ RK2928_CLKSEL_CON(11), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 15, GFLAGS), + -+ COMPOSITE(SCLK_MAC, "clk_gmac", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3399_CLKSEL_CON(20), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(5), 5, GFLAGS), ++ GATE(HCLK_GPS, "hclk_gps", "aclk_peri", 0, RK2928_CLKGATE_CON(3), 14, GFLAGS), ++ GATE(PCLK_HDMI, "pclk_hdmi", "pclk_cpu", 0, RK2928_CLKGATE_CON(3), 8, GFLAGS), ++}; + -+ MUX(SCLK_RMII_SRC, "clk_rmii_src", mux_rmii_p, CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(19), 4, 1, MFLAGS), -+ GATE(SCLK_MACREF_OUT, "clk_mac_refout", "clk_rmii_src", 0, -+ RK3399_CLKGATE_CON(5), 7, GFLAGS), -+ GATE(SCLK_MACREF, "clk_mac_ref", "clk_rmii_src", 0, -+ RK3399_CLKGATE_CON(5), 6, GFLAGS), -+ GATE(SCLK_MAC_RX, "clk_rmii_rx", "clk_rmii_src", 0, -+ RK3399_CLKGATE_CON(5), 8, GFLAGS), -+ GATE(SCLK_MAC_TX, "clk_rmii_tx", "clk_rmii_src", 0, -+ RK3399_CLKGATE_CON(5), 9, GFLAGS), ++static void __iomem *rk312x_reg_base; + -+ /* spdif */ -+ COMPOSITE(SCLK_SPDIF_DIV, "clk_spdif_div", mux_pll_src_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(32), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3399_CLKGATE_CON(8), 13, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_spdif_frac", "clk_spdif_div", CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(99), 0, -+ RK3399_CLKGATE_CON(8), 14, GFLAGS, -+ &rk3399_spdif_fracmux), -+ GATE(SCLK_SPDIF_8CH, "clk_spdif", "clk_spdif_mux", CLK_SET_RATE_PARENT, -+ RK3399_CLKGATE_CON(8), 15, GFLAGS), ++void rkclk_cpuclk_div_setting(int div) ++{ ++ if (cpu_is_rk312x()) ++ writel_relaxed((0x001f0000 | (div - 1)), ++ rk312x_reg_base + RK2928_CLKSEL_CON(0)); ++} + -+ COMPOSITE(SCLK_SPDIF_REC_DPTX, "clk_spdif_rec_dptx", mux_pll_src_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(32), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(10), 6, GFLAGS), -+ /* i2s */ -+ COMPOSITE(SCLK_I2S0_DIV, "clk_i2s0_div", mux_pll_src_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(28), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3399_CLKGATE_CON(8), 3, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s0_frac", "clk_i2s0_div", CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(96), 0, -+ RK3399_CLKGATE_CON(8), 4, GFLAGS, -+ &rk3399_i2s0_fracmux), -+ GATE(SCLK_I2S0_8CH, "clk_i2s0", "clk_i2s0_mux", CLK_SET_RATE_PARENT, -+ RK3399_CLKGATE_CON(8), 5, GFLAGS), ++static void rk3128_dump_cru(void) ++{ ++ if (rk312x_reg_base) { ++ pr_warn("CRU:\n"); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rk312x_reg_base, ++ 0x1f8, false); ++ } ++} + -+ COMPOSITE(SCLK_I2S1_DIV, "clk_i2s1_div", mux_pll_src_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(29), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3399_CLKGATE_CON(8), 6, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s1_frac", "clk_i2s1_div", CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(97), 0, -+ RK3399_CLKGATE_CON(8), 7, GFLAGS, -+ &rk3399_i2s1_fracmux), -+ GATE(SCLK_I2S1_8CH, "clk_i2s1", "clk_i2s1_mux", CLK_SET_RATE_PARENT, -+ RK3399_CLKGATE_CON(8), 8, GFLAGS), ++static struct rockchip_clk_provider *__init rk3128_common_clk_init(struct device_node *np) ++{ ++ struct rockchip_clk_provider *ctx; ++ void __iomem *reg_base; ++ struct clk **clks; + -+ COMPOSITE(SCLK_I2S2_DIV, "clk_i2s2_div", mux_pll_src_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(30), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3399_CLKGATE_CON(8), 9, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s2_frac", "clk_i2s2_div", CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(98), 0, -+ RK3399_CLKGATE_CON(8), 10, GFLAGS, -+ &rk3399_i2s2_fracmux), -+ GATE(SCLK_I2S2_8CH, "clk_i2s2", "clk_i2s2_mux", CLK_SET_RATE_PARENT, -+ RK3399_CLKGATE_CON(8), 11, GFLAGS), -+ -+ MUX(SCLK_I2SOUT_SRC, "clk_i2sout_src", mux_i2sch_p, CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(31), 0, 2, MFLAGS), -+ COMPOSITE_NODIV(SCLK_I2S_8CH_OUT, "clk_i2sout", mux_i2sout_p, CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(31), 2, 1, MFLAGS, -+ RK3399_CLKGATE_CON(8), 12, GFLAGS), -+ -+ /* uart */ -+ MUX(SCLK_UART0_SRC, "clk_uart0_src", mux_pll_src_cpll_gpll_upll_p, 0, -+ RK3399_CLKSEL_CON(33), 12, 2, MFLAGS), -+ COMPOSITE_NOMUX(0, "clk_uart0_div", "clk_uart0_src", 0, -+ RK3399_CLKSEL_CON(33), 0, 7, DFLAGS, -+ RK3399_CLKGATE_CON(9), 0, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart0_frac", "clk_uart0_div", CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(100), 0, -+ RK3399_CLKGATE_CON(9), 1, GFLAGS, -+ &rk3399_uart0_fracmux), -+ -+ MUX(SCLK_UART_SRC, "clk_uart_src", mux_pll_src_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(33), 15, 1, MFLAGS), -+ COMPOSITE_NOMUX(0, "clk_uart1_div", "clk_uart_src", 0, -+ RK3399_CLKSEL_CON(34), 0, 7, DFLAGS, -+ RK3399_CLKGATE_CON(9), 2, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart1_frac", "clk_uart1_div", CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(101), 0, -+ RK3399_CLKGATE_CON(9), 3, GFLAGS, -+ &rk3399_uart1_fracmux), -+ -+ COMPOSITE_NOMUX(0, "clk_uart2_div", "clk_uart_src", 0, -+ RK3399_CLKSEL_CON(35), 0, 7, DFLAGS, -+ RK3399_CLKGATE_CON(9), 4, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart2_frac", "clk_uart2_div", CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(102), 0, -+ RK3399_CLKGATE_CON(9), 5, GFLAGS, -+ &rk3399_uart2_fracmux), -+ -+ COMPOSITE_NOMUX(0, "clk_uart3_div", "clk_uart_src", 0, -+ RK3399_CLKSEL_CON(36), 0, 7, DFLAGS, -+ RK3399_CLKGATE_CON(9), 6, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart3_frac", "clk_uart3_div", CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(103), 0, -+ RK3399_CLKGATE_CON(9), 7, GFLAGS, -+ &rk3399_uart3_fracmux), -+ -+ COMPOSITE(PCLK_DDR, "pclk_ddr", mux_pll_src_cpll_gpll_p, CLK_IS_CRITICAL, -+ RK3399_CLKSEL_CON(6), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(3), 4, GFLAGS), -+ -+ GATE(PCLK_CENTER_MAIN_NOC, "pclk_center_main_noc", "pclk_ddr", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(18), 10, GFLAGS), -+ GATE(PCLK_DDR_MON, "pclk_ddr_mon", "pclk_ddr", 0, -+ RK3399_CLKGATE_CON(18), 12, GFLAGS), -+ GATE(PCLK_CIC, "pclk_cic", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(18), 15, GFLAGS), -+ GATE(PCLK_DDR_SGRF, "pclk_ddr_sgrf", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(19), 2, GFLAGS), ++ reg_base = of_iomap(np, 0); ++ if (!reg_base) { ++ pr_err("%s: could not map cru region\n", __func__); ++ return ERR_PTR(-ENOMEM); ++ } + -+ GATE(SCLK_PVTM_DDR, "clk_pvtm_ddr", "xin24m", 0, -+ RK3399_CLKGATE_CON(4), 11, GFLAGS), -+ GATE(SCLK_DFIMON0_TIMER, "clk_dfimon0_timer", "xin24m", 0, -+ RK3399_CLKGATE_CON(3), 5, GFLAGS), -+ GATE(SCLK_DFIMON1_TIMER, "clk_dfimon1_timer", "xin24m", 0, -+ RK3399_CLKGATE_CON(3), 6, GFLAGS), ++ rk312x_reg_base = reg_base; ++ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); ++ if (IS_ERR(ctx)) { ++ pr_err("%s: rockchip clk init failed\n", __func__); ++ iounmap(reg_base); ++ return ERR_PTR(-ENOMEM); ++ } ++ clks = ctx->clk_data.clks; + -+ /* cci */ -+ GATE(0, "cpll_aclk_cci_src", "cpll", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(2), 0, GFLAGS), -+ GATE(0, "gpll_aclk_cci_src", "gpll", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(2), 1, GFLAGS), -+ GATE(0, "npll_aclk_cci_src", "npll", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(2), 2, GFLAGS), -+ GATE(0, "vpll_aclk_cci_src", "vpll", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(2), 3, GFLAGS), ++ rockchip_clk_register_plls(ctx, rk3128_pll_clks, ++ ARRAY_SIZE(rk3128_pll_clks), ++ RK3128_GRF_SOC_STATUS0); ++ rockchip_clk_register_branches(ctx, common_clk_branches, ++ ARRAY_SIZE(common_clk_branches)); + -+ COMPOSITE(0, "aclk_cci_pre", mux_aclk_cci_p, CLK_IS_CRITICAL, -+ RK3399_CLKSEL_CON(5), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(2), 4, GFLAGS), ++ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", ++ 2, clks[PLL_APLL], clks[PLL_GPLL_DIV2], ++ &rk3128_cpuclk_data, rk3128_cpuclk_rates, ++ ARRAY_SIZE(rk3128_cpuclk_rates)); + -+ GATE(ACLK_ADB400M_PD_CORE_L, "aclk_adb400m_pd_core_l", "aclk_cci_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(15), 0, GFLAGS), -+ GATE(ACLK_ADB400M_PD_CORE_B, "aclk_adb400m_pd_core_b", "aclk_cci_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(15), 1, GFLAGS), -+ GATE(ACLK_CCI, "aclk_cci", "aclk_cci_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(15), 2, GFLAGS), -+ GATE(ACLK_CCI_NOC0, "aclk_cci_noc0", "aclk_cci_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(15), 3, GFLAGS), -+ GATE(ACLK_CCI_NOC1, "aclk_cci_noc1", "aclk_cci_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(15), 4, GFLAGS), -+ GATE(ACLK_CCI_GRF, "aclk_cci_grf", "aclk_cci_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(15), 7, GFLAGS), ++ rockchip_register_softrst(np, 9, reg_base + RK2928_SOFTRST_CON(0), ++ ROCKCHIP_SOFTRST_HIWORD_MASK); + -+ GATE(0, "cpll_cci_trace", "cpll", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(2), 5, GFLAGS), -+ GATE(0, "gpll_cci_trace", "gpll", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(2), 6, GFLAGS), -+ COMPOSITE(SCLK_CCI_TRACE, "clk_cci_trace", mux_cci_trace_p, CLK_IGNORE_UNUSED, -+ RK3399_CLKSEL_CON(5), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(2), 7, GFLAGS), ++ rockchip_register_restart_notifier(ctx, RK2928_GLB_SRST_FST, NULL); + -+ GATE(0, "cpll_cs", "cpll", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(2), 8, GFLAGS), -+ GATE(0, "gpll_cs", "gpll", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(2), 9, GFLAGS), -+ GATE(0, "npll_cs", "npll", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(2), 10, GFLAGS), -+ COMPOSITE_NOGATE(SCLK_CS, "clk_cs", mux_cs_p, CLK_IS_CRITICAL, -+ RK3399_CLKSEL_CON(4), 6, 2, MFLAGS, 0, 5, DFLAGS), -+ GATE(0, "clk_dbg_cxcs", "clk_cs", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(15), 5, GFLAGS), -+ GATE(0, "clk_dbg_noc", "clk_cs", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(15), 6, GFLAGS), ++ if (!rk_dump_cru) ++ rk_dump_cru = rk3128_dump_cru; + -+ /* vcodec */ -+ COMPOSITE(0, "aclk_vcodec_pre", mux_pll_src_cpll_gpll_npll_ppll_p, 0, -+ RK3399_CLKSEL_CON(7), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(4), 0, GFLAGS), -+ COMPOSITE_NOMUX(0, "hclk_vcodec_pre", "aclk_vcodec_pre", 0, -+ RK3399_CLKSEL_CON(7), 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(4), 1, GFLAGS), -+ GATE(HCLK_VCODEC, "hclk_vcodec", "hclk_vcodec_pre", 0, -+ RK3399_CLKGATE_CON(17), 2, GFLAGS), -+ GATE(0, "hclk_vcodec_noc", "hclk_vcodec_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(17), 3, GFLAGS), ++ return ctx; ++} + -+ GATE(ACLK_VCODEC, "aclk_vcodec", "aclk_vcodec_pre", 0, -+ RK3399_CLKGATE_CON(17), 0, GFLAGS), -+ GATE(0, "aclk_vcodec_noc", "aclk_vcodec_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(17), 1, GFLAGS), ++static void __init rk3126_clk_init(struct device_node *np) ++{ ++ struct rockchip_clk_provider *ctx; + -+ /* vdu */ -+ COMPOSITE(SCLK_VDU_CORE, "clk_vdu_core", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3399_CLKSEL_CON(9), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(4), 4, GFLAGS), -+ COMPOSITE(SCLK_VDU_CA, "clk_vdu_ca", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3399_CLKSEL_CON(9), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(4), 5, GFLAGS), ++ ctx = rk3128_common_clk_init(np); ++ if (IS_ERR(ctx)) ++ return; + -+ COMPOSITE(0, "aclk_vdu_pre", mux_pll_src_cpll_gpll_npll_ppll_p, 0, -+ RK3399_CLKSEL_CON(8), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(4), 2, GFLAGS), -+ COMPOSITE_NOMUX(0, "hclk_vdu_pre", "aclk_vdu_pre", 0, -+ RK3399_CLKSEL_CON(8), 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(4), 3, GFLAGS), -+ GATE(HCLK_VDU, "hclk_vdu", "hclk_vdu_pre", 0, -+ RK3399_CLKGATE_CON(17), 10, GFLAGS), -+ GATE(HCLK_VDU_NOC, "hclk_vdu_noc", "hclk_vdu_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(17), 11, GFLAGS), ++ rockchip_clk_register_branches(ctx, rk3126_clk_branches, ++ ARRAY_SIZE(rk3126_clk_branches)); + -+ GATE(ACLK_VDU, "aclk_vdu", "aclk_vdu_pre", 0, -+ RK3399_CLKGATE_CON(17), 8, GFLAGS), -+ GATE(ACLK_VDU_NOC, "aclk_vdu_noc", "aclk_vdu_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(17), 9, GFLAGS), ++ rockchip_clk_of_add_provider(np, ctx); ++} + -+ /* iep */ -+ COMPOSITE(0, "aclk_iep_pre", mux_pll_src_cpll_gpll_npll_ppll_p, 0, -+ RK3399_CLKSEL_CON(10), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(4), 6, GFLAGS), -+ COMPOSITE_NOMUX(0, "hclk_iep_pre", "aclk_iep_pre", 0, -+ RK3399_CLKSEL_CON(10), 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(4), 7, GFLAGS), -+ GATE(HCLK_IEP, "hclk_iep", "hclk_iep_pre", 0, -+ RK3399_CLKGATE_CON(16), 2, GFLAGS), -+ GATE(HCLK_IEP_NOC, "hclk_iep_noc", "hclk_iep_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(16), 3, GFLAGS), ++CLK_OF_DECLARE(rk3126_cru, "rockchip,rk3126-cru", rk3126_clk_init); + -+ GATE(ACLK_IEP, "aclk_iep", "aclk_iep_pre", 0, -+ RK3399_CLKGATE_CON(16), 0, GFLAGS), -+ GATE(ACLK_IEP_NOC, "aclk_iep_noc", "aclk_iep_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(16), 1, GFLAGS), ++static void __init rk3128_clk_init(struct device_node *np) ++{ ++ struct rockchip_clk_provider *ctx; + -+ /* rga */ -+ COMPOSITE(SCLK_RGA_CORE, "clk_rga_core", mux_pll_src_cpll_gpll_npll_ppll_p, 0, -+ RK3399_CLKSEL_CON(12), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(4), 10, GFLAGS), ++ ctx = rk3128_common_clk_init(np); ++ if (IS_ERR(ctx)) ++ return; + -+ COMPOSITE(0, "aclk_rga_pre", mux_pll_src_cpll_gpll_npll_ppll_p, 0, -+ RK3399_CLKSEL_CON(11), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(4), 8, GFLAGS), -+ COMPOSITE_NOMUX(0, "hclk_rga_pre", "aclk_rga_pre", 0, -+ RK3399_CLKSEL_CON(11), 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(4), 9, GFLAGS), -+ GATE(HCLK_RGA, "hclk_rga", "hclk_rga_pre", 0, -+ RK3399_CLKGATE_CON(16), 10, GFLAGS), -+ GATE(HCLK_RGA_NOC, "hclk_rga_noc", "hclk_rga_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(16), 11, GFLAGS), ++ rockchip_clk_register_branches(ctx, rk3128_clk_branches, ++ ARRAY_SIZE(rk3128_clk_branches)); + -+ GATE(ACLK_RGA, "aclk_rga", "aclk_rga_pre", 0, -+ RK3399_CLKGATE_CON(16), 8, GFLAGS), -+ GATE(ACLK_RGA_NOC, "aclk_rga_noc", "aclk_rga_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(16), 9, GFLAGS), ++ rockchip_clk_of_add_provider(np, ctx); ++} + -+ /* center */ -+ COMPOSITE(ACLK_CENTER, "aclk_center", mux_pll_src_cpll_gpll_npll_p, CLK_IS_CRITICAL, -+ RK3399_CLKSEL_CON(12), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(3), 7, GFLAGS), -+ GATE(ACLK_CENTER_MAIN_NOC, "aclk_center_main_noc", "aclk_center", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(19), 0, GFLAGS), -+ GATE(ACLK_CENTER_PERI_NOC, "aclk_center_peri_noc", "aclk_center", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(19), 1, GFLAGS), ++CLK_OF_DECLARE(rk3128_cru, "rockchip,rk3128-cru", rk3128_clk_init); + -+ /* gpu */ -+ COMPOSITE(0, "aclk_gpu_pre", mux_pll_src_ppll_cpll_gpll_npll_p, CLK_IGNORE_UNUSED, -+ RK3399_CLKSEL_CON(13), 5, 3, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(13), 0, GFLAGS), -+ GATE(ACLK_GPU, "aclk_gpu", "aclk_gpu_pre", 0, -+ RK3399_CLKGATE_CON(30), 8, GFLAGS), -+ GATE(ACLK_PERF_GPU, "aclk_perf_gpu", "aclk_gpu_pre", 0, -+ RK3399_CLKGATE_CON(30), 10, GFLAGS), -+ GATE(ACLK_GPU_GRF, "aclk_gpu_grf", "aclk_gpu_pre", 0, -+ RK3399_CLKGATE_CON(30), 11, GFLAGS), -+ GATE(SCLK_PVTM_GPU, "aclk_pvtm_gpu", "xin24m", 0, -+ RK3399_CLKGATE_CON(13), 1, GFLAGS), ++struct clk_rk3128_inits { ++ void (*inits)(struct device_node *np); ++}; + -+ /* perihp */ -+ GATE(0, "cpll_aclk_perihp_src", "cpll", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(5), 1, GFLAGS), -+ GATE(0, "gpll_aclk_perihp_src", "gpll", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(5), 0, GFLAGS), -+ COMPOSITE(ACLK_PERIHP, "aclk_perihp", mux_aclk_perihp_p, CLK_IS_CRITICAL, -+ RK3399_CLKSEL_CON(14), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(5), 2, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_PERIHP, "hclk_perihp", "aclk_perihp", CLK_IS_CRITICAL, -+ RK3399_CLKSEL_CON(14), 8, 2, DFLAGS, -+ RK3399_CLKGATE_CON(5), 3, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_PERIHP, "pclk_perihp", "aclk_perihp", CLK_IS_CRITICAL, -+ RK3399_CLKSEL_CON(14), 12, 3, DFLAGS, -+ RK3399_CLKGATE_CON(5), 4, GFLAGS), ++static const struct clk_rk3128_inits clk_rk3126_init = { ++ .inits = rk3126_clk_init, ++}; + -+ GATE(ACLK_PERF_PCIE, "aclk_perf_pcie", "aclk_perihp", 0, -+ RK3399_CLKGATE_CON(20), 2, GFLAGS), -+ GATE(ACLK_PCIE, "aclk_pcie", "aclk_perihp", 0, -+ RK3399_CLKGATE_CON(20), 10, GFLAGS), -+ GATE(0, "aclk_perihp_noc", "aclk_perihp", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(20), 12, GFLAGS), ++static const struct clk_rk3128_inits clk_rk3128_init = { ++ .inits = rk3128_clk_init, ++}; + -+ GATE(HCLK_HOST0, "hclk_host0", "hclk_perihp", 0, -+ RK3399_CLKGATE_CON(20), 5, GFLAGS), -+ GATE(HCLK_HOST0_ARB, "hclk_host0_arb", "hclk_perihp", 0, -+ RK3399_CLKGATE_CON(20), 6, GFLAGS), -+ GATE(HCLK_HOST1, "hclk_host1", "hclk_perihp", 0, -+ RK3399_CLKGATE_CON(20), 7, GFLAGS), -+ GATE(HCLK_HOST1_ARB, "hclk_host1_arb", "hclk_perihp", 0, -+ RK3399_CLKGATE_CON(20), 8, GFLAGS), -+ GATE(HCLK_HSIC, "hclk_hsic", "hclk_perihp", 0, -+ RK3399_CLKGATE_CON(20), 9, GFLAGS), -+ GATE(0, "hclk_perihp_noc", "hclk_perihp", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(20), 13, GFLAGS), -+ GATE(0, "hclk_ahb1tom", "hclk_perihp", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(20), 15, GFLAGS), ++static const struct of_device_id clk_rk3128_match_table[] = { ++ { ++ .compatible = "rockchip,rk3126-cru", ++ .data = &clk_rk3126_init, ++ }, { ++ .compatible = "rockchip,rk3128-cru", ++ .data = &clk_rk3128_init, ++ }, ++ { } ++}; ++MODULE_DEVICE_TABLE(of, clk_rk3128_match_table); + -+ GATE(PCLK_PERIHP_GRF, "pclk_perihp_grf", "pclk_perihp", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(20), 4, GFLAGS), -+ GATE(PCLK_PCIE, "pclk_pcie", "pclk_perihp", 0, -+ RK3399_CLKGATE_CON(20), 11, GFLAGS), -+ GATE(0, "pclk_perihp_noc", "pclk_perihp", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(20), 14, GFLAGS), -+ GATE(PCLK_HSICPHY, "pclk_hsicphy", "pclk_perihp", 0, -+ RK3399_CLKGATE_CON(31), 8, GFLAGS), ++static int __init clk_rk3128_probe(struct platform_device *pdev) ++{ ++ struct device_node *np = pdev->dev.of_node; ++ const struct of_device_id *match; ++ const struct clk_rk3128_inits *init_data; + -+ /* sdio & sdmmc */ -+ COMPOSITE(HCLK_SD, "hclk_sd", mux_pll_src_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(13), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(12), 13, GFLAGS), -+ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_sd", 0, -+ RK3399_CLKGATE_CON(33), 8, GFLAGS), -+ GATE(0, "hclk_sdmmc_noc", "hclk_sd", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(33), 9, GFLAGS), ++ match = of_match_device(clk_rk3128_match_table, &pdev->dev); ++ if (!match || !match->data) ++ return -EINVAL; + -+ COMPOSITE(SCLK_SDIO, "clk_sdio", mux_pll_src_cpll_gpll_npll_ppll_upll_24m_p, 0, -+ RK3399_CLKSEL_CON(15), 8, 3, MFLAGS, 0, 7, DFLAGS, -+ RK3399_CLKGATE_CON(6), 0, GFLAGS), ++ init_data = match->data; ++ if (init_data->inits) ++ init_data->inits(np); + -+ COMPOSITE(SCLK_SDMMC, "clk_sdmmc", mux_pll_src_cpll_gpll_npll_ppll_upll_24m_p, 0, -+ RK3399_CLKSEL_CON(16), 8, 3, MFLAGS, 0, 7, DFLAGS, -+ RK3399_CLKGATE_CON(6), 1, GFLAGS), ++ return 0; ++} + -+ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "clk_sdmmc", RK3399_SDMMC_CON0, 1), -+ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "clk_sdmmc", RK3399_SDMMC_CON1, 1), ++static struct platform_driver clk_rk3128_driver = { ++ .driver = { ++ .name = "clk-rk3128", ++ .of_match_table = clk_rk3128_match_table, ++ }, ++}; ++builtin_platform_driver_probe(clk_rk3128_driver, clk_rk3128_probe); + -+ MMC(SCLK_SDIO_DRV, "sdio_drv", "clk_sdio", RK3399_SDIO_CON0, 1), -+ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "clk_sdio", RK3399_SDIO_CON1, 1), ++MODULE_DESCRIPTION("Rockchip RK3128 Clock Driver"); ++MODULE_LICENSE("GPL"); +diff --git a/drivers/clk/rockchip-oh/clk-rk3188.c b/drivers/clk/rockchip-oh/clk-rk3188.c +new file mode 100644 +index 000000000..d891b80d6 +--- /dev/null ++++ b/drivers/clk/rockchip-oh/clk-rk3188.c +@@ -0,0 +1,921 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later ++/* ++ * Copyright (c) 2014 MundoReader S.L. ++ * Author: Heiko Stuebner ++ */ + -+ /* pcie */ -+ COMPOSITE(SCLK_PCIE_PM, "clk_pcie_pm", mux_pll_src_cpll_gpll_npll_24m_p, 0, -+ RK3399_CLKSEL_CON(17), 8, 3, MFLAGS, 0, 7, DFLAGS, -+ RK3399_CLKGATE_CON(6), 2, GFLAGS), ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "clk.h" + -+ COMPOSITE_NOMUX(SCLK_PCIEPHY_REF100M, "clk_pciephy_ref100m", "npll", 0, -+ RK3399_CLKSEL_CON(18), 11, 5, DFLAGS, -+ RK3399_CLKGATE_CON(12), 6, GFLAGS), -+ MUX(SCLK_PCIEPHY_REF, "clk_pciephy_ref", mux_pll_src_24m_pciephy_p, CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(18), 10, 1, MFLAGS), ++#define RK3066_GRF_SOC_STATUS 0x15c ++#define RK3188_GRF_SOC_STATUS 0xac + -+ COMPOSITE(0, "clk_pcie_core_cru", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3399_CLKSEL_CON(18), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3399_CLKGATE_CON(6), 3, GFLAGS), -+ MUX(SCLK_PCIE_CORE, "clk_pcie_core", mux_pciecore_cru_phy_p, CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(18), 7, 1, MFLAGS), ++enum rk3188_plls { ++ apll, cpll, dpll, gpll, ++}; + -+ /* emmc */ -+ COMPOSITE(SCLK_EMMC, "clk_emmc", mux_pll_src_cpll_gpll_npll_upll_24m_p, 0, -+ RK3399_CLKSEL_CON(22), 8, 3, MFLAGS, 0, 7, DFLAGS, -+ RK3399_CLKGATE_CON(6), 14, GFLAGS), ++static struct rockchip_pll_rate_table rk3188_pll_rates[] = { ++ RK3066_PLL_RATE(2208000000, 1, 92, 1), ++ RK3066_PLL_RATE(2184000000, 1, 91, 1), ++ RK3066_PLL_RATE(2160000000, 1, 90, 1), ++ RK3066_PLL_RATE(2136000000, 1, 89, 1), ++ RK3066_PLL_RATE(2112000000, 1, 88, 1), ++ RK3066_PLL_RATE(2088000000, 1, 87, 1), ++ RK3066_PLL_RATE(2064000000, 1, 86, 1), ++ RK3066_PLL_RATE(2040000000, 1, 85, 1), ++ RK3066_PLL_RATE(2016000000, 1, 84, 1), ++ RK3066_PLL_RATE(1992000000, 1, 83, 1), ++ RK3066_PLL_RATE(1968000000, 1, 82, 1), ++ RK3066_PLL_RATE(1944000000, 1, 81, 1), ++ RK3066_PLL_RATE(1920000000, 1, 80, 1), ++ RK3066_PLL_RATE(1896000000, 1, 79, 1), ++ RK3066_PLL_RATE(1872000000, 1, 78, 1), ++ RK3066_PLL_RATE(1848000000, 1, 77, 1), ++ RK3066_PLL_RATE(1824000000, 1, 76, 1), ++ RK3066_PLL_RATE(1800000000, 1, 75, 1), ++ RK3066_PLL_RATE(1776000000, 1, 74, 1), ++ RK3066_PLL_RATE(1752000000, 1, 73, 1), ++ RK3066_PLL_RATE(1728000000, 1, 72, 1), ++ RK3066_PLL_RATE(1704000000, 1, 71, 1), ++ RK3066_PLL_RATE(1680000000, 1, 70, 1), ++ RK3066_PLL_RATE(1656000000, 1, 69, 1), ++ RK3066_PLL_RATE(1632000000, 1, 68, 1), ++ RK3066_PLL_RATE(1608000000, 1, 67, 1), ++ RK3066_PLL_RATE(1560000000, 1, 65, 1), ++ RK3066_PLL_RATE(1512000000, 1, 63, 1), ++ RK3066_PLL_RATE(1488000000, 1, 62, 1), ++ RK3066_PLL_RATE(1464000000, 1, 61, 1), ++ RK3066_PLL_RATE(1440000000, 1, 60, 1), ++ RK3066_PLL_RATE(1416000000, 1, 59, 1), ++ RK3066_PLL_RATE(1392000000, 1, 58, 1), ++ RK3066_PLL_RATE(1368000000, 1, 57, 1), ++ RK3066_PLL_RATE(1344000000, 1, 56, 1), ++ RK3066_PLL_RATE(1320000000, 1, 55, 1), ++ RK3066_PLL_RATE(1296000000, 1, 54, 1), ++ RK3066_PLL_RATE(1272000000, 1, 53, 1), ++ RK3066_PLL_RATE(1248000000, 1, 52, 1), ++ RK3066_PLL_RATE(1224000000, 1, 51, 1), ++ RK3066_PLL_RATE(1200000000, 1, 50, 1), ++ RK3066_PLL_RATE(1188000000, 2, 99, 1), ++ RK3066_PLL_RATE(1176000000, 1, 49, 1), ++ RK3066_PLL_RATE(1128000000, 1, 47, 1), ++ RK3066_PLL_RATE(1104000000, 1, 46, 1), ++ RK3066_PLL_RATE(1008000000, 1, 84, 2), ++ RK3066_PLL_RATE( 912000000, 1, 76, 2), ++ RK3066_PLL_RATE( 891000000, 8, 594, 2), ++ RK3066_PLL_RATE( 888000000, 1, 74, 2), ++ RK3066_PLL_RATE( 816000000, 1, 68, 2), ++ RK3066_PLL_RATE( 798000000, 2, 133, 2), ++ RK3066_PLL_RATE( 792000000, 1, 66, 2), ++ RK3066_PLL_RATE( 768000000, 1, 64, 2), ++ RK3066_PLL_RATE( 742500000, 8, 495, 2), ++ RK3066_PLL_RATE( 696000000, 1, 58, 2), ++ RK3066_PLL_RATE( 600000000, 1, 50, 2), ++ RK3066_PLL_RATE( 594000000, 2, 198, 4), ++ RK3066_PLL_RATE( 552000000, 1, 46, 2), ++ RK3066_PLL_RATE( 504000000, 1, 84, 4), ++ RK3066_PLL_RATE( 456000000, 1, 76, 4), ++ RK3066_PLL_RATE( 408000000, 1, 68, 4), ++ RK3066_PLL_RATE( 400000000, 3, 100, 2), ++ RK3066_PLL_RATE( 384000000, 2, 128, 4), ++ RK3066_PLL_RATE( 360000000, 1, 60, 4), ++ RK3066_PLL_RATE( 312000000, 1, 52, 4), ++ RK3066_PLL_RATE( 300000000, 1, 50, 4), ++ RK3066_PLL_RATE( 297000000, 2, 198, 8), ++ RK3066_PLL_RATE( 252000000, 1, 84, 8), ++ RK3066_PLL_RATE( 216000000, 1, 72, 8), ++ RK3066_PLL_RATE( 148500000, 2, 99, 8), ++ RK3066_PLL_RATE( 126000000, 1, 84, 16), ++ RK3066_PLL_RATE( 48000000, 1, 64, 32), ++ { /* sentinel */ }, ++}; + -+ GATE(0, "cpll_aclk_emmc_src", "cpll", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(6), 13, GFLAGS), -+ GATE(0, "gpll_aclk_emmc_src", "gpll", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(6), 12, GFLAGS), -+ COMPOSITE_NOGATE(ACLK_EMMC, "aclk_emmc", mux_aclk_emmc_p, CLK_IGNORE_UNUSED, -+ RK3399_CLKSEL_CON(21), 7, 1, MFLAGS, 0, 5, DFLAGS), -+ GATE(ACLK_EMMC_CORE, "aclk_emmccore", "aclk_emmc", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(32), 8, GFLAGS), -+ GATE(ACLK_EMMC_NOC, "aclk_emmc_noc", "aclk_emmc", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(32), 9, GFLAGS), -+ GATE(ACLK_EMMC_GRF, "aclk_emmcgrf", "aclk_emmc", CLK_IGNORE_UNUSED, -+ RK3399_CLKGATE_CON(32), 10, GFLAGS), ++#define RK3066_DIV_CORE_PERIPH_MASK 0x3 ++#define RK3066_DIV_CORE_PERIPH_SHIFT 6 ++#define RK3066_DIV_ACLK_CORE_MASK 0x7 ++#define RK3066_DIV_ACLK_CORE_SHIFT 0 ++#define RK3066_DIV_ACLK_HCLK_MASK 0x3 ++#define RK3066_DIV_ACLK_HCLK_SHIFT 8 ++#define RK3066_DIV_ACLK_PCLK_MASK 0x3 ++#define RK3066_DIV_ACLK_PCLK_SHIFT 12 ++#define RK3066_DIV_AHB2APB_MASK 0x3 ++#define RK3066_DIV_AHB2APB_SHIFT 14 + -+ /* perilp0 */ -+ GATE(0, "cpll_aclk_perilp0_src", "cpll", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(7), 1, GFLAGS), -+ GATE(0, "gpll_aclk_perilp0_src", "gpll", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(7), 0, GFLAGS), -+ COMPOSITE(ACLK_PERILP0, "aclk_perilp0", mux_aclk_perilp0_p, CLK_IS_CRITICAL, -+ RK3399_CLKSEL_CON(23), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(7), 2, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_PERILP0, "hclk_perilp0", "aclk_perilp0", CLK_IS_CRITICAL, -+ RK3399_CLKSEL_CON(23), 8, 2, DFLAGS, -+ RK3399_CLKGATE_CON(7), 3, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_PERILP0, "pclk_perilp0", "aclk_perilp0", CLK_IS_CRITICAL, -+ RK3399_CLKSEL_CON(23), 12, 3, DFLAGS, -+ RK3399_CLKGATE_CON(7), 4, GFLAGS), ++#define RK3066_CLKSEL0(_core_peri) \ ++ { \ ++ .reg = RK2928_CLKSEL_CON(0), \ ++ .val = HIWORD_UPDATE(_core_peri, RK3066_DIV_CORE_PERIPH_MASK, \ ++ RK3066_DIV_CORE_PERIPH_SHIFT) \ ++ } ++#define RK3066_CLKSEL1(_aclk_core, _aclk_hclk, _aclk_pclk, _ahb2apb) \ ++ { \ ++ .reg = RK2928_CLKSEL_CON(1), \ ++ .val = HIWORD_UPDATE(_aclk_core, RK3066_DIV_ACLK_CORE_MASK, \ ++ RK3066_DIV_ACLK_CORE_SHIFT) | \ ++ HIWORD_UPDATE(_aclk_hclk, RK3066_DIV_ACLK_HCLK_MASK, \ ++ RK3066_DIV_ACLK_HCLK_SHIFT) | \ ++ HIWORD_UPDATE(_aclk_pclk, RK3066_DIV_ACLK_PCLK_MASK, \ ++ RK3066_DIV_ACLK_PCLK_SHIFT) | \ ++ HIWORD_UPDATE(_ahb2apb, RK3066_DIV_AHB2APB_MASK, \ ++ RK3066_DIV_AHB2APB_SHIFT), \ ++ } + -+ /* aclk_perilp0 gates */ -+ GATE(ACLK_INTMEM, "aclk_intmem", "aclk_perilp0", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(23), 0, GFLAGS), -+ GATE(ACLK_TZMA, "aclk_tzma", "aclk_perilp0", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(23), 1, GFLAGS), -+ GATE(SCLK_INTMEM0, "clk_intmem0", "aclk_perilp0", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(23), 2, GFLAGS), -+ GATE(SCLK_INTMEM1, "clk_intmem1", "aclk_perilp0", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(23), 3, GFLAGS), -+ GATE(SCLK_INTMEM2, "clk_intmem2", "aclk_perilp0", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(23), 4, GFLAGS), -+ GATE(SCLK_INTMEM3, "clk_intmem3", "aclk_perilp0", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(23), 5, GFLAGS), -+ GATE(SCLK_INTMEM4, "clk_intmem4", "aclk_perilp0", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(23), 6, GFLAGS), -+ GATE(SCLK_INTMEM5, "clk_intmem5", "aclk_perilp0", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(23), 7, GFLAGS), -+ GATE(ACLK_DCF, "aclk_dcf", "aclk_perilp0", 0, RK3399_CLKGATE_CON(23), 8, GFLAGS), -+ GATE(ACLK_DMAC0_PERILP, "aclk_dmac0_perilp", "aclk_perilp0", 0, RK3399_CLKGATE_CON(25), 5, GFLAGS), -+ GATE(ACLK_DMAC1_PERILP, "aclk_dmac1_perilp", "aclk_perilp0", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(25), 6, GFLAGS), -+ GATE(ACLK_PERILP0_NOC, "aclk_perilp0_noc", "aclk_perilp0", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(25), 7, GFLAGS), ++#define RK3066_CPUCLK_RATE(_prate, _core_peri, _acore, _ahclk, _apclk, _h2p) \ ++ { \ ++ .prate = _prate, \ ++ .divs = { \ ++ RK3066_CLKSEL0(_core_peri), \ ++ RK3066_CLKSEL1(_acore, _ahclk, _apclk, _h2p), \ ++ }, \ ++ } + -+ /* hclk_perilp0 gates */ -+ GATE(HCLK_ROM, "hclk_rom", "hclk_perilp0", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(24), 4, GFLAGS), -+ GATE(HCLK_M_CRYPTO0, "hclk_m_crypto0", "hclk_perilp0", 0, RK3399_CLKGATE_CON(24), 5, GFLAGS), -+ GATE(HCLK_S_CRYPTO0, "hclk_s_crypto0", "hclk_perilp0", 0, RK3399_CLKGATE_CON(24), 6, GFLAGS), -+ GATE(HCLK_M_CRYPTO1, "hclk_m_crypto1", "hclk_perilp0", 0, RK3399_CLKGATE_CON(24), 14, GFLAGS), -+ GATE(HCLK_S_CRYPTO1, "hclk_s_crypto1", "hclk_perilp0", 0, RK3399_CLKGATE_CON(24), 15, GFLAGS), -+ GATE(HCLK_PERILP0_NOC, "hclk_perilp0_noc", "hclk_perilp0", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(25), 8, GFLAGS), ++static struct rockchip_cpuclk_rate_table rk3066_cpuclk_rates[] __initdata = { ++ RK3066_CPUCLK_RATE(1416000000, 2, 3, 1, 2, 1), ++ RK3066_CPUCLK_RATE(1200000000, 2, 3, 1, 2, 1), ++ RK3066_CPUCLK_RATE(1008000000, 2, 2, 1, 2, 1), ++ RK3066_CPUCLK_RATE( 816000000, 2, 2, 1, 2, 1), ++ RK3066_CPUCLK_RATE( 600000000, 1, 2, 1, 2, 1), ++ RK3066_CPUCLK_RATE( 504000000, 1, 1, 1, 2, 1), ++ RK3066_CPUCLK_RATE( 312000000, 0, 1, 1, 1, 0), ++}; + -+ /* pclk_perilp0 gates */ -+ GATE(PCLK_DCF, "pclk_dcf", "pclk_perilp0", 0, RK3399_CLKGATE_CON(23), 9, GFLAGS), ++static const struct rockchip_cpuclk_reg_data rk3066_cpuclk_data = { ++ .core_reg[0] = RK2928_CLKSEL_CON(0), ++ .div_core_shift[0] = 0, ++ .div_core_mask[0] = 0x1f, ++ .num_cores = 1, ++ .mux_core_alt = 1, ++ .mux_core_main = 0, ++ .mux_core_shift = 8, ++ .mux_core_mask = 0x1, ++}; + -+ /* crypto */ -+ COMPOSITE(SCLK_CRYPTO0, "clk_crypto0", mux_pll_src_cpll_gpll_ppll_p, 0, -+ RK3399_CLKSEL_CON(24), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(7), 7, GFLAGS), ++#define RK3188_DIV_ACLK_CORE_MASK 0x7 ++#define RK3188_DIV_ACLK_CORE_SHIFT 3 + -+ COMPOSITE(SCLK_CRYPTO1, "clk_crypto1", mux_pll_src_cpll_gpll_ppll_p, 0, -+ RK3399_CLKSEL_CON(26), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(7), 8, GFLAGS), ++#define RK3188_CLKSEL1(_aclk_core) \ ++ { \ ++ .reg = RK2928_CLKSEL_CON(1), \ ++ .val = HIWORD_UPDATE(_aclk_core, RK3188_DIV_ACLK_CORE_MASK,\ ++ RK3188_DIV_ACLK_CORE_SHIFT) \ ++ } ++#define RK3188_CPUCLK_RATE(_prate, _core_peri, _aclk_core) \ ++ { \ ++ .prate = _prate, \ ++ .divs = { \ ++ RK3066_CLKSEL0(_core_peri), \ ++ RK3188_CLKSEL1(_aclk_core), \ ++ }, \ ++ } + -+ /* cm0s_perilp */ -+ GATE(0, "cpll_fclk_cm0s_src", "cpll", 0, -+ RK3399_CLKGATE_CON(7), 6, GFLAGS), -+ GATE(0, "gpll_fclk_cm0s_src", "gpll", 0, -+ RK3399_CLKGATE_CON(7), 5, GFLAGS), -+ COMPOSITE(FCLK_CM0S, "fclk_cm0s", mux_fclk_cm0s_p, 0, -+ RK3399_CLKSEL_CON(24), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(7), 9, GFLAGS), ++static struct rockchip_cpuclk_rate_table rk3188_cpuclk_rates[] __initdata = { ++ RK3188_CPUCLK_RATE(1608000000, 2, 3), ++ RK3188_CPUCLK_RATE(1416000000, 2, 3), ++ RK3188_CPUCLK_RATE(1200000000, 2, 3), ++ RK3188_CPUCLK_RATE(1008000000, 2, 3), ++ RK3188_CPUCLK_RATE( 816000000, 2, 3), ++ RK3188_CPUCLK_RATE( 600000000, 1, 3), ++ RK3188_CPUCLK_RATE( 504000000, 1, 3), ++ RK3188_CPUCLK_RATE( 312000000, 0, 1), ++}; + -+ /* fclk_cm0s gates */ -+ GATE(SCLK_M0_PERILP, "sclk_m0_perilp", "fclk_cm0s", 0, RK3399_CLKGATE_CON(24), 8, GFLAGS), -+ GATE(HCLK_M0_PERILP, "hclk_m0_perilp", "fclk_cm0s", 0, RK3399_CLKGATE_CON(24), 9, GFLAGS), -+ GATE(DCLK_M0_PERILP, "dclk_m0_perilp", "fclk_cm0s", 0, RK3399_CLKGATE_CON(24), 10, GFLAGS), -+ GATE(SCLK_M0_PERILP_DEC, "clk_m0_perilp_dec", "fclk_cm0s", 0, RK3399_CLKGATE_CON(24), 11, GFLAGS), -+ GATE(HCLK_M0_PERILP_NOC, "hclk_m0_perilp_noc", "fclk_cm0s", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(25), 11, GFLAGS), ++static const struct rockchip_cpuclk_reg_data rk3188_cpuclk_data = { ++ .core_reg[0] = RK2928_CLKSEL_CON(0), ++ .div_core_shift[0] = 9, ++ .div_core_mask[0] = 0x1f, ++ .num_cores = 1, ++ .mux_core_alt = 1, ++ .mux_core_main = 0, ++ .mux_core_shift = 8, ++ .mux_core_mask = 0x1, ++}; + -+ /* perilp1 */ -+ GATE(0, "cpll_hclk_perilp1_src", "cpll", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(8), 1, GFLAGS), -+ GATE(0, "gpll_hclk_perilp1_src", "gpll", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(8), 0, GFLAGS), -+ COMPOSITE_NOGATE(HCLK_PERILP1, "hclk_perilp1", mux_hclk_perilp1_p, CLK_IS_CRITICAL, -+ RK3399_CLKSEL_CON(25), 7, 1, MFLAGS, 0, 5, DFLAGS), -+ COMPOSITE_NOMUX(PCLK_PERILP1, "pclk_perilp1", "hclk_perilp1", CLK_IS_CRITICAL, -+ RK3399_CLKSEL_CON(25), 8, 3, DFLAGS, -+ RK3399_CLKGATE_CON(8), 2, GFLAGS), ++PNAME(mux_pll_p) = { "xin24m", "xin32k" }; ++PNAME(mux_ddrphy_p) = { "dpll", "gpll_ddr" }; ++PNAME(mux_pll_src_gpll_cpll_p) = { "gpll", "cpll" }; ++PNAME(mux_pll_src_cpll_gpll_p) = { "cpll", "gpll" }; ++PNAME(mux_aclk_cpu_p) = { "apll", "gpll" }; ++PNAME(mux_sclk_cif0_p) = { "cif0_pre", "xin24m" }; ++PNAME(mux_sclk_i2s0_p) = { "i2s0_pre", "i2s0_frac", "xin12m" }; ++PNAME(mux_sclk_spdif_p) = { "spdif_pre", "spdif_frac", "xin12m" }; ++PNAME(mux_sclk_uart0_p) = { "uart0_pre", "uart0_frac", "xin24m" }; ++PNAME(mux_sclk_uart1_p) = { "uart1_pre", "uart1_frac", "xin24m" }; ++PNAME(mux_sclk_uart2_p) = { "uart2_pre", "uart2_frac", "xin24m" }; ++PNAME(mux_sclk_uart3_p) = { "uart3_pre", "uart3_frac", "xin24m" }; ++PNAME(mux_sclk_hsadc_p) = { "hsadc_src", "hsadc_frac", "ext_hsadc" }; ++PNAME(mux_mac_p) = { "gpll", "dpll" }; ++PNAME(mux_sclk_macref_p) = { "mac_src", "ext_rmii" }; + -+ /* hclk_perilp1 gates */ -+ GATE(0, "hclk_perilp1_noc", "hclk_perilp1", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(25), 9, GFLAGS), -+ GATE(0, "hclk_sdio_noc", "hclk_perilp1", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(25), 12, GFLAGS), -+ GATE(HCLK_I2S0_8CH, "hclk_i2s0", "hclk_perilp1", 0, RK3399_CLKGATE_CON(34), 0, GFLAGS), -+ GATE(HCLK_I2S1_8CH, "hclk_i2s1", "hclk_perilp1", 0, RK3399_CLKGATE_CON(34), 1, GFLAGS), -+ GATE(HCLK_I2S2_8CH, "hclk_i2s2", "hclk_perilp1", 0, RK3399_CLKGATE_CON(34), 2, GFLAGS), -+ GATE(HCLK_SPDIF, "hclk_spdif", "hclk_perilp1", 0, RK3399_CLKGATE_CON(34), 3, GFLAGS), -+ GATE(HCLK_SDIO, "hclk_sdio", "hclk_perilp1", 0, RK3399_CLKGATE_CON(34), 4, GFLAGS), -+ GATE(PCLK_SPI5, "pclk_spi5", "hclk_perilp1", 0, RK3399_CLKGATE_CON(34), 5, GFLAGS), -+ GATE(0, "hclk_sdioaudio_noc", "hclk_perilp1", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(34), 6, GFLAGS), ++static struct rockchip_pll_clock rk3066_pll_clks[] __initdata = { ++ [apll] = PLL(pll_rk3066, PLL_APLL, "apll", mux_pll_p, 0, RK2928_PLL_CON(0), ++ RK2928_MODE_CON, 0, 5, 0, rk3188_pll_rates), ++ [dpll] = PLL(pll_rk3066, PLL_DPLL, "dpll", mux_pll_p, 0, RK2928_PLL_CON(4), ++ RK2928_MODE_CON, 4, 4, 0, NULL), ++ [cpll] = PLL(pll_rk3066, PLL_CPLL, "cpll", mux_pll_p, 0, RK2928_PLL_CON(8), ++ RK2928_MODE_CON, 8, 6, ROCKCHIP_PLL_SYNC_RATE, rk3188_pll_rates), ++ [gpll] = PLL(pll_rk3066, PLL_GPLL, "gpll", mux_pll_p, 0, RK2928_PLL_CON(12), ++ RK2928_MODE_CON, 12, 7, ROCKCHIP_PLL_SYNC_RATE, rk3188_pll_rates), ++}; + -+ /* pclk_perilp1 gates */ -+ GATE(PCLK_UART0, "pclk_uart0", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 0, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 1, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 2, GFLAGS), -+ GATE(PCLK_UART3, "pclk_uart3", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 3, GFLAGS), -+ GATE(PCLK_I2C7, "pclk_rki2c7", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 5, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_rki2c1", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 6, GFLAGS), -+ GATE(PCLK_I2C5, "pclk_rki2c5", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 7, GFLAGS), -+ GATE(PCLK_I2C6, "pclk_rki2c6", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 8, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_rki2c2", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 9, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_rki2c3", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 10, GFLAGS), -+ GATE(PCLK_MAILBOX0, "pclk_mailbox0", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 11, GFLAGS), -+ GATE(PCLK_SARADC, "pclk_saradc", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 12, GFLAGS), -+ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 13, GFLAGS), -+ GATE(PCLK_EFUSE1024NS, "pclk_efuse1024ns", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 14, GFLAGS), -+ GATE(PCLK_EFUSE1024S, "pclk_efuse1024s", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 15, GFLAGS), -+ GATE(PCLK_SPI0, "pclk_spi0", "pclk_perilp1", 0, RK3399_CLKGATE_CON(23), 10, GFLAGS), -+ GATE(PCLK_SPI1, "pclk_spi1", "pclk_perilp1", 0, RK3399_CLKGATE_CON(23), 11, GFLAGS), -+ GATE(PCLK_SPI2, "pclk_spi2", "pclk_perilp1", 0, RK3399_CLKGATE_CON(23), 12, GFLAGS), -+ GATE(PCLK_SPI4, "pclk_spi4", "pclk_perilp1", 0, RK3399_CLKGATE_CON(23), 13, GFLAGS), -+ GATE(PCLK_PERIHP_GRF, "pclk_perilp_sgrf", "pclk_perilp1", 0, RK3399_CLKGATE_CON(24), 13, GFLAGS), -+ GATE(0, "pclk_perilp1_noc", "pclk_perilp1", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(25), 10, GFLAGS), ++static struct rockchip_pll_clock rk3188_pll_clks[] __initdata = { ++ [apll] = PLL(pll_rk3066, PLL_APLL, "apll", mux_pll_p, 0, RK2928_PLL_CON(0), ++ RK2928_MODE_CON, 0, 6, 0, rk3188_pll_rates), ++ [dpll] = PLL(pll_rk3066, PLL_DPLL, "dpll", mux_pll_p, 0, RK2928_PLL_CON(4), ++ RK2928_MODE_CON, 4, 5, 0, NULL), ++ [cpll] = PLL(pll_rk3066, PLL_CPLL, "cpll", mux_pll_p, 0, RK2928_PLL_CON(8), ++ RK2928_MODE_CON, 8, 7, ROCKCHIP_PLL_SYNC_RATE, rk3188_pll_rates), ++ [gpll] = PLL(pll_rk3066, PLL_GPLL, "gpll", mux_pll_p, 0, RK2928_PLL_CON(12), ++ RK2928_MODE_CON, 12, 8, ROCKCHIP_PLL_SYNC_RATE, rk3188_pll_rates), ++}; + -+ /* saradc */ -+ COMPOSITE_NOMUX(SCLK_SARADC, "clk_saradc", "xin24m", 0, -+ RK3399_CLKSEL_CON(26), 8, 8, DFLAGS, -+ RK3399_CLKGATE_CON(9), 11, GFLAGS), ++#define MFLAGS CLK_MUX_HIWORD_MASK ++#define DFLAGS CLK_DIVIDER_HIWORD_MASK ++#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) ++#define IFLAGS ROCKCHIP_INVERTER_HIWORD_MASK + -+ /* tsadc */ -+ COMPOSITE(SCLK_TSADC, "clk_tsadc", mux_pll_p, 0, -+ RK3399_CLKSEL_CON(27), 15, 1, MFLAGS, 0, 10, DFLAGS, -+ RK3399_CLKGATE_CON(9), 10, GFLAGS), ++/* 2 ^ (val + 1) */ ++static struct clk_div_table div_core_peri_t[] = { ++ { .val = 0, .div = 2 }, ++ { .val = 1, .div = 4 }, ++ { .val = 2, .div = 8 }, ++ { .val = 3, .div = 16 }, ++ { /* sentinel */ }, ++}; + -+ /* cif_testout */ -+ MUX(0, "clk_testout1_pll_src", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3399_CLKSEL_CON(38), 6, 2, MFLAGS), -+ COMPOSITE(SCLK_TESTCLKOUT1, "clk_testout1", mux_clk_testout1_p, 0, -+ RK3399_CLKSEL_CON(38), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(13), 14, GFLAGS), ++static struct rockchip_clk_branch common_hsadc_out_fracmux __initdata = ++ MUX(0, "sclk_hsadc_out", mux_sclk_hsadc_p, 0, ++ RK2928_CLKSEL_CON(22), 4, 2, MFLAGS); + -+ MUX(0, "clk_testout2_pll_src", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3399_CLKSEL_CON(38), 14, 2, MFLAGS), -+ COMPOSITE(SCLK_TESTCLKOUT2, "clk_testout2", mux_clk_testout2_p, 0, -+ RK3399_CLKSEL_CON(38), 13, 1, MFLAGS, 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(13), 15, GFLAGS), ++static struct rockchip_clk_branch common_spdif_fracmux __initdata = ++ MUX(SCLK_SPDIF, "sclk_spdif", mux_sclk_spdif_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(5), 8, 2, MFLAGS); + -+ /* vio */ -+ COMPOSITE(ACLK_VIO, "aclk_vio", mux_pll_src_cpll_gpll_ppll_p, CLK_IGNORE_UNUSED, -+ RK3399_CLKSEL_CON(42), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(11), 0, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_VIO, "pclk_vio", "aclk_vio", CLK_IS_CRITICAL, -+ RK3399_CLKSEL_CON(43), 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(11), 1, GFLAGS), ++static struct rockchip_clk_branch common_uart0_fracmux __initdata = ++ MUX(SCLK_UART0, "sclk_uart0", mux_sclk_uart0_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(13), 8, 2, MFLAGS); + -+ GATE(ACLK_VIO_NOC, "aclk_vio_noc", "aclk_vio", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(29), 0, GFLAGS), ++static struct rockchip_clk_branch common_uart1_fracmux __initdata = ++ MUX(SCLK_UART1, "sclk_uart1", mux_sclk_uart1_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(14), 8, 2, MFLAGS); + -+ GATE(PCLK_MIPI_DSI0, "pclk_mipi_dsi0", "pclk_vio", 0, -+ RK3399_CLKGATE_CON(29), 1, GFLAGS), -+ GATE(PCLK_MIPI_DSI1, "pclk_mipi_dsi1", "pclk_vio", 0, -+ RK3399_CLKGATE_CON(29), 2, GFLAGS), -+ GATE(PCLK_VIO_GRF, "pclk_vio_grf", "pclk_vio", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(29), 12, GFLAGS), ++static struct rockchip_clk_branch common_uart2_fracmux __initdata = ++ MUX(SCLK_UART2, "sclk_uart2", mux_sclk_uart2_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(15), 8, 2, MFLAGS); + -+ /* hdcp */ -+ COMPOSITE_NOGATE(ACLK_HDCP, "aclk_hdcp", mux_pll_src_cpll_gpll_ppll_p, 0, -+ RK3399_CLKSEL_CON(42), 14, 2, MFLAGS, 8, 5, DFLAGS), -+ COMPOSITE_NOMUX(HCLK_HDCP, "hclk_hdcp", "aclk_hdcp", 0, -+ RK3399_CLKSEL_CON(43), 5, 5, DFLAGS, -+ RK3399_CLKGATE_CON(11), 3, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_HDCP, "pclk_hdcp", "aclk_hdcp", 0, -+ RK3399_CLKSEL_CON(43), 10, 5, DFLAGS, -+ RK3399_CLKGATE_CON(11), 10, GFLAGS), ++static struct rockchip_clk_branch common_uart3_fracmux __initdata = ++ MUX(SCLK_UART3, "sclk_uart3", mux_sclk_uart3_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(16), 8, 2, MFLAGS); + -+ GATE(ACLK_HDCP_NOC, "aclk_hdcp_noc", "aclk_hdcp", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(29), 4, GFLAGS), -+ GATE(ACLK_HDCP22, "aclk_hdcp22", "aclk_hdcp", 0, -+ RK3399_CLKGATE_CON(29), 10, GFLAGS), ++static struct rockchip_clk_branch common_clk_branches[] __initdata = { ++ /* ++ * Clock-Architecture Diagram 2 ++ */ + -+ GATE(HCLK_HDCP_NOC, "hclk_hdcp_noc", "hclk_hdcp", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(29), 5, GFLAGS), -+ GATE(HCLK_HDCP22, "hclk_hdcp22", "hclk_hdcp", 0, -+ RK3399_CLKGATE_CON(29), 9, GFLAGS), ++ GATE(0, "gpll_armclk", "gpll", 0, RK2928_CLKGATE_CON(0), 1, GFLAGS), + -+ GATE(PCLK_HDCP_NOC, "pclk_hdcp_noc", "pclk_hdcp", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(29), 3, GFLAGS), -+ GATE(PCLK_HDMI_CTRL, "pclk_hdmi_ctrl", "pclk_hdcp", 0, -+ RK3399_CLKGATE_CON(29), 6, GFLAGS), -+ GATE(PCLK_DP_CTRL, "pclk_dp_ctrl", "pclk_hdcp", 0, -+ RK3399_CLKGATE_CON(29), 7, GFLAGS), -+ GATE(PCLK_HDCP22, "pclk_hdcp22", "pclk_hdcp", 0, -+ RK3399_CLKGATE_CON(29), 8, GFLAGS), -+ GATE(PCLK_GASKET, "pclk_gasket", "pclk_hdcp", 0, -+ RK3399_CLKGATE_CON(29), 11, GFLAGS), ++ /* these two are set by the cpuclk and should not be changed */ ++ COMPOSITE_NOMUX_DIVTBL(CORE_PERI, "core_peri", "armclk", 0, ++ RK2928_CLKSEL_CON(0), 6, 2, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ div_core_peri_t, RK2928_CLKGATE_CON(0), 0, GFLAGS), + -+ /* edp */ -+ COMPOSITE(SCLK_DP_CORE, "clk_dp_core", mux_pll_src_npll_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(46), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(11), 8, GFLAGS), ++ COMPOSITE(ACLK_VEPU, "aclk_vepu", mux_pll_src_cpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(32), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 9, GFLAGS), ++ GATE(HCLK_VEPU, "hclk_vepu", "aclk_vepu", 0, ++ RK2928_CLKGATE_CON(3), 10, GFLAGS), ++ COMPOSITE(ACLK_VDPU, "aclk_vdpu", mux_pll_src_cpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(32), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 11, GFLAGS), ++ GATE(HCLK_VDPU, "hclk_vdpu", "aclk_vdpu", 0, ++ RK2928_CLKGATE_CON(3), 12, GFLAGS), + -+ COMPOSITE(PCLK_EDP, "pclk_edp", mux_pll_src_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(44), 15, 1, MFLAGS, 8, 6, DFLAGS, -+ RK3399_CLKGATE_CON(11), 11, GFLAGS), -+ GATE(PCLK_EDP_NOC, "pclk_edp_noc", "pclk_edp", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(32), 12, GFLAGS), -+ GATE(PCLK_EDP_CTRL, "pclk_edp_ctrl", "pclk_edp", 0, -+ RK3399_CLKGATE_CON(32), 13, GFLAGS), ++ GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(1), 7, GFLAGS), ++ COMPOSITE(0, "ddrphy", mux_ddrphy_p, CLK_IGNORE_UNUSED, ++ RK2928_CLKSEL_CON(26), 8, 1, MFLAGS, 0, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, ++ RK2928_CLKGATE_CON(0), 2, GFLAGS), + -+ /* hdmi */ -+ GATE(SCLK_HDMI_SFR, "clk_hdmi_sfr", "xin24m", 0, -+ RK3399_CLKGATE_CON(11), 6, GFLAGS), ++ GATE(ACLK_CPU, "aclk_cpu", "aclk_cpu_pre", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(0), 3, GFLAGS), + -+ COMPOSITE(SCLK_HDMI_CEC, "clk_hdmi_cec", mux_pll_p, 0, -+ RK3399_CLKSEL_CON(45), 15, 1, MFLAGS, 0, 10, DFLAGS, -+ RK3399_CLKGATE_CON(11), 7, GFLAGS), ++ GATE(0, "atclk_cpu", "pclk_cpu_pre", 0, ++ RK2928_CLKGATE_CON(0), 6, GFLAGS), ++ GATE(PCLK_CPU, "pclk_cpu", "pclk_cpu_pre", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(0), 5, GFLAGS), ++ GATE(HCLK_CPU, "hclk_cpu", "hclk_cpu_pre", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(0), 4, GFLAGS), + -+ /* vop0 */ -+ COMPOSITE(ACLK_VOP0_PRE, "aclk_vop0_pre", mux_pll_src_dmyvpll_cpll_gpll_npll_p, 0, -+ RK3399_CLKSEL_CON(47), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(10), 8, GFLAGS), -+ COMPOSITE_NOMUX(0, "hclk_vop0_pre", "aclk_vop0_pre", 0, -+ RK3399_CLKSEL_CON(47), 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(10), 9, GFLAGS), ++ COMPOSITE(0, "aclk_lcdc0_pre", mux_pll_src_cpll_gpll_p, CLK_IGNORE_UNUSED, ++ RK2928_CLKSEL_CON(31), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 0, GFLAGS), ++ COMPOSITE(0, "aclk_lcdc1_pre", mux_pll_src_cpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(31), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(1), 4, GFLAGS), + -+ GATE(ACLK_VOP0, "aclk_vop0", "aclk_vop0_pre", 0, -+ RK3399_CLKGATE_CON(28), 3, GFLAGS), -+ GATE(ACLK_VOP0_NOC, "aclk_vop0_noc", "aclk_vop0_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(28), 1, GFLAGS), ++ GATE(ACLK_PERI, "aclk_peri", "aclk_peri_pre", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(2), 1, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "aclk_peri_pre", CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(10), 8, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, ++ RK2928_CLKGATE_CON(2), 2, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "aclk_peri_pre", CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(10), 12, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, ++ RK2928_CLKGATE_CON(2), 3, GFLAGS), + -+ GATE(HCLK_VOP0, "hclk_vop0", "hclk_vop0_pre", 0, -+ RK3399_CLKGATE_CON(28), 2, GFLAGS), -+ GATE(HCLK_VOP0_NOC, "hclk_vop0_noc", "hclk_vop0_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(28), 0, GFLAGS), ++ MUX(0, "cif_src", mux_pll_src_cpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(29), 0, 1, MFLAGS), ++ COMPOSITE_NOMUX(0, "cif0_pre", "cif_src", 0, ++ RK2928_CLKSEL_CON(29), 1, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 7, GFLAGS), ++ MUX(SCLK_CIF0, "sclk_cif0", mux_sclk_cif0_p, 0, ++ RK2928_CLKSEL_CON(29), 7, 1, MFLAGS), + -+#ifdef RK3399_TWO_PLL_FOR_VOP -+ COMPOSITE(DCLK_VOP0_DIV, "dclk_vop0_div", mux_pll_src_vpll_cpll_gpll_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3399_CLKSEL_CON(49), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3399_CLKGATE_CON(10), 12, GFLAGS), -+#else -+ COMPOSITE(DCLK_VOP0_DIV, "dclk_vop0_div", mux_pll_src_vpll_cpll_gpll_p, CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(49), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3399_CLKGATE_CON(10), 12, GFLAGS), -+#endif ++ GATE(0, "pclkin_cif0", "ext_cif0", 0, ++ RK2928_CLKGATE_CON(3), 3, GFLAGS), ++ INVERTER(0, "pclk_cif0", "pclkin_cif0", ++ RK2928_CLKSEL_CON(30), 8, IFLAGS), + -+ /* The VOP0 is main screen, it is able to re-set parent rate. */ -+ COMPOSITE_FRACMUX_NOGATE(0, "dclk_vop0_frac", "dclk_vop0_div", CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(106), 0, -+ &rk3399_dclk_vop0_fracmux), ++ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), + -+ COMPOSITE(SCLK_VOP0_PWM, "clk_vop0_pwm", mux_pll_src_dmyvpll_cpll_gpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(51), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(10), 14, GFLAGS), ++ /* ++ * the 480m are generated inside the usb block from these clocks, ++ * but they are also a source for the hsicphy clock. ++ */ ++ GATE(SCLK_OTGPHY0, "sclk_otgphy0", "xin24m", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(1), 5, GFLAGS), ++ GATE(SCLK_OTGPHY1, "sclk_otgphy1", "xin24m", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(1), 6, GFLAGS), + -+ /* vop1 */ -+ COMPOSITE(ACLK_VOP1_PRE, "aclk_vop1_pre", mux_pll_src_dmyvpll_cpll_gpll_npll_p, 0, -+ RK3399_CLKSEL_CON(48), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(10), 10, GFLAGS), -+ COMPOSITE_NOMUX(0, "hclk_vop1_pre", "aclk_vop1_pre", 0, -+ RK3399_CLKSEL_CON(48), 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(10), 11, GFLAGS), ++ COMPOSITE(0, "mac_src", mux_mac_p, 0, ++ RK2928_CLKSEL_CON(21), 0, 1, MFLAGS, 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(2), 5, GFLAGS), ++ MUX(SCLK_MAC, "sclk_macref", mux_sclk_macref_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(21), 4, 1, MFLAGS), ++ GATE(0, "sclk_mac_lbtest", "sclk_macref", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(2), 12, GFLAGS), + -+ GATE(ACLK_VOP1, "aclk_vop1", "aclk_vop1_pre", 0, -+ RK3399_CLKGATE_CON(28), 7, GFLAGS), -+ GATE(ACLK_VOP1_NOC, "aclk_vop1_noc", "aclk_vop1_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(28), 5, GFLAGS), ++ COMPOSITE(0, "hsadc_src", mux_pll_src_gpll_cpll_p, 0, ++ RK2928_CLKSEL_CON(22), 0, 1, MFLAGS, 8, 8, DFLAGS, ++ RK2928_CLKGATE_CON(2), 6, GFLAGS), ++ COMPOSITE_FRACMUX(0, "hsadc_frac", "hsadc_src", 0, ++ RK2928_CLKSEL_CON(23), 0, ++ RK2928_CLKGATE_CON(2), 7, GFLAGS, ++ &common_hsadc_out_fracmux), ++ INVERTER(SCLK_HSADC, "sclk_hsadc", "sclk_hsadc_out", ++ RK2928_CLKSEL_CON(22), 7, IFLAGS), + -+ GATE(HCLK_VOP1, "hclk_vop1", "hclk_vop1_pre", 0, -+ RK3399_CLKGATE_CON(28), 6, GFLAGS), -+ GATE(HCLK_VOP1_NOC, "hclk_vop1_noc", "hclk_vop1_pre", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(28), 4, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_SARADC, "sclk_saradc", "xin24m", 0, ++ RK2928_CLKSEL_CON(24), 8, 8, DFLAGS, ++ RK2928_CLKGATE_CON(2), 8, GFLAGS), + -+ /* The VOP1 is sub screen, it is note able to re-set parent rate. */ -+#ifdef RK3399_TWO_PLL_FOR_VOP -+ COMPOSITE(DCLK_VOP1_DIV, "dclk_vop1_div", mux_pll_src_vpll_cpll_gpll_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3399_CLKSEL_CON(50), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3399_CLKGATE_CON(10), 13, GFLAGS), -+#else -+ COMPOSITE(DCLK_VOP1_DIV, "dclk_vop1_div", mux_pll_src_dmyvpll_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(50), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3399_CLKGATE_CON(10), 13, GFLAGS), -+#endif ++ COMPOSITE_NOMUX(0, "spdif_pre", "i2s_src", 0, ++ RK2928_CLKSEL_CON(5), 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(0), 13, GFLAGS), ++ COMPOSITE_FRACMUX(0, "spdif_frac", "spdif_pre", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(9), 0, ++ RK2928_CLKGATE_CON(0), 14, GFLAGS, ++ &common_spdif_fracmux), + -+ COMPOSITE_FRACMUX_NOGATE(DCLK_VOP1_FRAC, "dclk_vop1_frac", "dclk_vop1_div", 0, -+ RK3399_CLKSEL_CON(107), 0, -+ &rk3399_dclk_vop1_fracmux), ++ /* ++ * Clock-Architecture Diagram 4 ++ */ + -+ COMPOSITE(SCLK_VOP1_PWM, "clk_vop1_pwm", mux_pll_src_dmyvpll_cpll_gpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(52), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(10), 15, GFLAGS), ++ GATE(SCLK_SMC, "sclk_smc", "hclk_peri", 0, ++ RK2928_CLKGATE_CON(2), 4, GFLAGS), + -+ /* isp */ -+ COMPOSITE(ACLK_ISP0, "aclk_isp0", mux_pll_src_cpll_gpll_ppll_p, 0, -+ RK3399_CLKSEL_CON(53), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(12), 8, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_ISP0, "hclk_isp0", "aclk_isp0", 0, -+ RK3399_CLKSEL_CON(53), 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(12), 9, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_SPI0, "sclk_spi0", "pclk_peri", 0, ++ RK2928_CLKSEL_CON(25), 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(2), 9, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_SPI1, "sclk_spi1", "pclk_peri", 0, ++ RK2928_CLKSEL_CON(25), 8, 7, DFLAGS, ++ RK2928_CLKGATE_CON(2), 10, GFLAGS), + -+ GATE(ACLK_ISP0_NOC, "aclk_isp0_noc", "aclk_isp0", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(27), 1, GFLAGS), -+ GATE(ACLK_ISP0_WRAPPER, "aclk_isp0_wrapper", "aclk_isp0", 0, -+ RK3399_CLKGATE_CON(27), 5, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_SDMMC, "sclk_sdmmc", "hclk_peri", 0, ++ RK2928_CLKSEL_CON(11), 0, 6, DFLAGS, ++ RK2928_CLKGATE_CON(2), 11, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_SDIO, "sclk_sdio", "hclk_peri", 0, ++ RK2928_CLKSEL_CON(12), 0, 6, DFLAGS, ++ RK2928_CLKGATE_CON(2), 13, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_EMMC, "sclk_emmc", "hclk_peri", 0, ++ RK2928_CLKSEL_CON(12), 8, 6, DFLAGS, ++ RK2928_CLKGATE_CON(2), 14, GFLAGS), + -+ GATE(HCLK_ISP0_NOC, "hclk_isp0_noc", "hclk_isp0", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(27), 0, GFLAGS), -+ GATE(HCLK_ISP0_WRAPPER, "hclk_isp0_wrapper", "hclk_isp0", 0, -+ RK3399_CLKGATE_CON(27), 4, GFLAGS), ++ MUX(0, "uart_src", mux_pll_src_gpll_cpll_p, 0, ++ RK2928_CLKSEL_CON(12), 15, 1, MFLAGS), ++ COMPOSITE_NOMUX(0, "uart0_pre", "uart_src", 0, ++ RK2928_CLKSEL_CON(13), 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(1), 8, GFLAGS), ++ COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_pre", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(17), 0, ++ RK2928_CLKGATE_CON(1), 9, GFLAGS, ++ &common_uart0_fracmux), ++ COMPOSITE_NOMUX(0, "uart1_pre", "uart_src", 0, ++ RK2928_CLKSEL_CON(14), 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(1), 10, GFLAGS), ++ COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_pre", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(18), 0, ++ RK2928_CLKGATE_CON(1), 11, GFLAGS, ++ &common_uart1_fracmux), ++ COMPOSITE_NOMUX(0, "uart2_pre", "uart_src", 0, ++ RK2928_CLKSEL_CON(15), 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(1), 12, GFLAGS), ++ COMPOSITE_FRACMUX(0, "uart2_frac", "uart2_pre", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(19), 0, ++ RK2928_CLKGATE_CON(1), 13, GFLAGS, ++ &common_uart2_fracmux), ++ COMPOSITE_NOMUX(0, "uart3_pre", "uart_src", 0, ++ RK2928_CLKSEL_CON(16), 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(1), 14, GFLAGS), ++ COMPOSITE_FRACMUX(0, "uart3_frac", "uart3_pre", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(20), 0, ++ RK2928_CLKGATE_CON(1), 15, GFLAGS, ++ &common_uart3_fracmux), + -+ COMPOSITE(SCLK_ISP0, "clk_isp0", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3399_CLKSEL_CON(55), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(11), 4, GFLAGS), ++ GATE(SCLK_JTAG, "jtag", "ext_jtag", 0, RK2928_CLKGATE_CON(1), 3, GFLAGS), + -+ COMPOSITE(ACLK_ISP1, "aclk_isp1", mux_pll_src_cpll_gpll_ppll_p, 0, -+ RK3399_CLKSEL_CON(54), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(12), 10, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_ISP1, "hclk_isp1", "aclk_isp1", 0, -+ RK3399_CLKSEL_CON(54), 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(12), 11, GFLAGS), ++ GATE(SCLK_TIMER0, "timer0", "xin24m", 0, RK2928_CLKGATE_CON(1), 0, GFLAGS), ++ GATE(SCLK_TIMER1, "timer1", "xin24m", 0, RK2928_CLKGATE_CON(1), 1, GFLAGS), + -+ GATE(ACLK_ISP1_NOC, "aclk_isp1_noc", "aclk_isp1", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(27), 3, GFLAGS), -+ GATE(ACLK_ISP1_WRAPPER, "aclk_isp1_wrapper", "aclk_isp1", 0, -+ RK3399_CLKGATE_CON(27), 8, GFLAGS), ++ /* clk_core_pre gates */ ++ GATE(0, "core_dbg", "armclk", 0, RK2928_CLKGATE_CON(9), 0, GFLAGS), + -+ GATE(HCLK_ISP1_NOC, "hclk_isp1_noc", "hclk_isp1", CLK_IS_CRITICAL, -+ RK3399_CLKGATE_CON(27), 2, GFLAGS), -+ GATE(HCLK_ISP1_WRAPPER, "hclk_isp1_wrapper", "hclk_isp1", 0, -+ RK3399_CLKGATE_CON(27), 7, GFLAGS), ++ /* aclk_cpu gates */ ++ GATE(ACLK_DMA1, "aclk_dma1", "aclk_cpu", 0, RK2928_CLKGATE_CON(5), 0, GFLAGS), ++ GATE(0, "aclk_intmem", "aclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 12, GFLAGS), ++ GATE(0, "aclk_strc_sys", "aclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 10, GFLAGS), + -+ COMPOSITE(SCLK_ISP1, "clk_isp1", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3399_CLKSEL_CON(55), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(11), 5, GFLAGS), ++ /* hclk_cpu gates */ ++ GATE(HCLK_ROM, "hclk_rom", "hclk_cpu", 0, RK2928_CLKGATE_CON(5), 6, GFLAGS), ++ GATE(HCLK_I2S0_2CH, "hclk_i2s0_2ch", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 2, GFLAGS), ++ GATE(HCLK_SPDIF, "hclk_spdif", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 1, GFLAGS), ++ GATE(0, "hclk_cpubus", "hclk_cpu", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(4), 8, GFLAGS), ++ /* hclk_ahb2apb is part of a clk branch */ ++ GATE(0, "hclk_vio_bus", "hclk_cpu", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(6), 12, GFLAGS), ++ GATE(HCLK_LCDC0, "hclk_lcdc0", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 1, GFLAGS), ++ GATE(HCLK_LCDC1, "hclk_lcdc1", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 2, GFLAGS), ++ GATE(HCLK_CIF0, "hclk_cif0", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 4, GFLAGS), ++ GATE(HCLK_IPP, "hclk_ipp", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 9, GFLAGS), ++ GATE(HCLK_RGA, "hclk_rga", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 10, GFLAGS), + -+ /* -+ * We use pclkin_cifinv by default GRF_SOC_CON20[9] (GSC20_9) setting in system, -+ * so we ignore the mux and make clocks nodes as following, -+ * -+ * pclkin_cifinv --|-------\ -+ * |GSC20_9|-- pclkin_cifmux -- |G27_6| -- pclkin_isp1_wrapper -+ * pclkin_cif --|-------/ -+ */ -+ GATE(PCLK_ISP1_WRAPPER, "pclkin_isp1_wrapper", "pclkin_cif", 0, -+ RK3399_CLKGATE_CON(27), 6, GFLAGS), ++ /* hclk_peri gates */ ++ GATE(0, "hclk_peri_axi_matrix", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 0, GFLAGS), ++ GATE(0, "hclk_peri_ahb_arbi", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 6, GFLAGS), ++ GATE(0, "hclk_emem_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 7, GFLAGS), ++ GATE(HCLK_EMAC, "hclk_emac", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 0, GFLAGS), ++ GATE(HCLK_NANDC0, "hclk_nandc0", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 9, GFLAGS), ++ GATE(0, "hclk_usb_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 5, GFLAGS), ++ GATE(HCLK_OTG0, "hclk_usbotg0", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 13, GFLAGS), ++ GATE(HCLK_HSADC, "hclk_hsadc", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 5, GFLAGS), ++ GATE(HCLK_PIDF, "hclk_pidfilter", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 6, GFLAGS), ++ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 10, GFLAGS), ++ GATE(HCLK_SDIO, "hclk_sdio", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 11, GFLAGS), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 12, GFLAGS), + -+ /* cif */ -+ COMPOSITE_NODIV(SCLK_CIF_OUT_SRC, "clk_cifout_src", mux_pll_src_cpll_gpll_npll_p, 0, -+ RK3399_CLKSEL_CON(56), 6, 2, MFLAGS, -+ RK3399_CLKGATE_CON(10), 7, GFLAGS), ++ /* aclk_lcdc0_pre gates */ ++ GATE(0, "aclk_vio0", "aclk_lcdc0_pre", 0, RK2928_CLKGATE_CON(6), 13, GFLAGS), ++ GATE(ACLK_LCDC0, "aclk_lcdc0", "aclk_vio0", 0, RK2928_CLKGATE_CON(6), 0, GFLAGS), ++ GATE(ACLK_CIF0, "aclk_cif0", "aclk_vio0", 0, RK2928_CLKGATE_CON(6), 5, GFLAGS), ++ GATE(ACLK_IPP, "aclk_ipp", "aclk_vio0", 0, RK2928_CLKGATE_CON(6), 8, GFLAGS), + -+ COMPOSITE_NOGATE(SCLK_CIF_OUT, "clk_cifout", mux_clk_cif_p, CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(56), 5, 1, MFLAGS, 0, 5, DFLAGS), ++ /* aclk_lcdc1_pre gates */ ++ GATE(0, "aclk_vio1", "aclk_lcdc1_pre", 0, RK2928_CLKGATE_CON(9), 5, GFLAGS), ++ GATE(ACLK_LCDC1, "aclk_lcdc1", "aclk_vio1", 0, RK2928_CLKGATE_CON(6), 3, GFLAGS), ++ GATE(ACLK_RGA, "aclk_rga", "aclk_vio1", 0, RK2928_CLKGATE_CON(6), 11, GFLAGS), + -+ /* gic */ -+ COMPOSITE(ACLK_GIC_PRE, "aclk_gic_pre", mux_pll_src_cpll_gpll_p, CLK_IS_CRITICAL, -+ RK3399_CLKSEL_CON(56), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK3399_CLKGATE_CON(12), 12, GFLAGS), ++ /* atclk_cpu gates */ ++ GATE(0, "atclk", "atclk_cpu", 0, RK2928_CLKGATE_CON(9), 3, GFLAGS), ++ GATE(0, "trace", "atclk_cpu", 0, RK2928_CLKGATE_CON(9), 2, GFLAGS), + -+ GATE(ACLK_GIC, "aclk_gic", "aclk_gic_pre", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(33), 0, GFLAGS), -+ GATE(ACLK_GIC_NOC, "aclk_gic_noc", "aclk_gic_pre", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(33), 1, GFLAGS), -+ GATE(ACLK_GIC_ADB400_CORE_L_2_GIC, "aclk_gic_adb400_core_l_2_gic", "aclk_gic_pre", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(33), 2, GFLAGS), -+ GATE(ACLK_GIC_ADB400_CORE_B_2_GIC, "aclk_gic_adb400_core_b_2_gic", "aclk_gic_pre", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(33), 3, GFLAGS), -+ GATE(ACLK_GIC_ADB400_GIC_2_CORE_L, "aclk_gic_adb400_gic_2_core_l", "aclk_gic_pre", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(33), 4, GFLAGS), -+ GATE(ACLK_GIC_ADB400_GIC_2_CORE_B, "aclk_gic_adb400_gic_2_core_b", "aclk_gic_pre", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(33), 5, GFLAGS), ++ /* pclk_cpu gates */ ++ GATE(PCLK_PWM01, "pclk_pwm01", "pclk_cpu", 0, RK2928_CLKGATE_CON(7), 10, GFLAGS), ++ GATE(PCLK_TIMER0, "pclk_timer0", "pclk_cpu", 0, RK2928_CLKGATE_CON(7), 7, GFLAGS), ++ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 4, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 5, GFLAGS), ++ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 9, GFLAGS), ++ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 10, GFLAGS), ++ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 11, GFLAGS), ++ GATE(PCLK_EFUSE, "pclk_efuse", "pclk_cpu", 0, RK2928_CLKGATE_CON(5), 2, GFLAGS), ++ GATE(PCLK_TZPC, "pclk_tzpc", "pclk_cpu", 0, RK2928_CLKGATE_CON(5), 3, GFLAGS), ++ GATE(PCLK_DDRUPCTL, "pclk_ddrupctl", "pclk_cpu", 0, RK2928_CLKGATE_CON(5), 7, GFLAGS), ++ GATE(PCLK_PUBL, "pclk_ddrpubl", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 6, GFLAGS), ++ GATE(0, "pclk_dbg", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 1, GFLAGS), ++ GATE(PCLK_GRF, "pclk_grf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 4, GFLAGS), ++ GATE(PCLK_PMU, "pclk_pmu", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 5, GFLAGS), + -+ /* alive */ -+ /* pclk_alive_gpll_src is controlled by PMUGRF_SOC_CON0[6] */ -+ DIV(PCLK_ALIVE, "pclk_alive", "gpll", 0, -+ RK3399_CLKSEL_CON(57), 0, 5, DFLAGS), ++ /* aclk_peri */ ++ GATE(ACLK_DMA2, "aclk_dma2", "aclk_peri", 0, RK2928_CLKGATE_CON(5), 1, GFLAGS), ++ GATE(ACLK_SMC, "aclk_smc", "aclk_peri", 0, RK2928_CLKGATE_CON(5), 8, GFLAGS), ++ GATE(0, "aclk_peri_niu", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 4, GFLAGS), ++ GATE(0, "aclk_cpu_peri", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 2, GFLAGS), ++ GATE(0, "aclk_peri_axi_matrix", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 3, GFLAGS), + -+ GATE(PCLK_USBPHY_MUX_G, "pclk_usbphy_mux_g", "pclk_alive", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(21), 4, GFLAGS), -+ GATE(PCLK_UPHY0_TCPHY_G, "pclk_uphy0_tcphy_g", "pclk_alive", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(21), 5, GFLAGS), -+ GATE(PCLK_UPHY0_TCPD_G, "pclk_uphy0_tcpd_g", "pclk_alive", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(21), 6, GFLAGS), -+ GATE(PCLK_UPHY1_TCPHY_G, "pclk_uphy1_tcphy_g", "pclk_alive", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(21), 8, GFLAGS), -+ GATE(PCLK_UPHY1_TCPD_G, "pclk_uphy1_tcpd_g", "pclk_alive", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(21), 9, GFLAGS), ++ /* pclk_peri gates */ ++ GATE(0, "pclk_peri_axi_matrix", "pclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 1, GFLAGS), ++ GATE(PCLK_PWM23, "pclk_pwm23", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 11, GFLAGS), ++ GATE(PCLK_WDT, "pclk_wdt", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 15, GFLAGS), ++ GATE(PCLK_SPI0, "pclk_spi0", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 12, GFLAGS), ++ GATE(PCLK_SPI1, "pclk_spi1", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 13, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 2, GFLAGS), ++ GATE(PCLK_UART3, "pclk_uart3", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 3, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 6, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 7, GFLAGS), ++ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 8, GFLAGS), ++ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 12, GFLAGS), ++ GATE(PCLK_SARADC, "pclk_saradc", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 14, GFLAGS), ++}; + -+ GATE(PCLK_GRF, "pclk_grf", "pclk_alive", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(31), 1, GFLAGS), -+ GATE(PCLK_INTR_ARB, "pclk_intr_arb", "pclk_alive", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(31), 2, GFLAGS), -+ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_alive", 0, RK3399_CLKGATE_CON(31), 3, GFLAGS), -+ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_alive", 0, RK3399_CLKGATE_CON(31), 4, GFLAGS), -+ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_alive", 0, RK3399_CLKGATE_CON(31), 5, GFLAGS), -+ GATE(PCLK_TIMER0, "pclk_timer0", "pclk_alive", 0, RK3399_CLKGATE_CON(31), 6, GFLAGS), -+ GATE(PCLK_TIMER1, "pclk_timer1", "pclk_alive", 0, RK3399_CLKGATE_CON(31), 7, GFLAGS), -+ GATE(PCLK_PMU_INTR_ARB, "pclk_pmu_intr_arb", "pclk_alive", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(31), 9, GFLAGS), -+ GATE(PCLK_SGRF, "pclk_sgrf", "pclk_alive", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(31), 10, GFLAGS), ++PNAME(mux_rk3066_lcdc0_p) = { "dclk_lcdc0_src", "xin27m" }; ++PNAME(mux_rk3066_lcdc1_p) = { "dclk_lcdc1_src", "xin27m" }; ++PNAME(mux_sclk_cif1_p) = { "cif1_pre", "xin24m" }; ++PNAME(mux_sclk_i2s1_p) = { "i2s1_pre", "i2s1_frac", "xin12m" }; ++PNAME(mux_sclk_i2s2_p) = { "i2s2_pre", "i2s2_frac", "xin12m" }; + -+ /* Watchdog pclk is controlled by RK3399 SECURE_GRF_SOC_CON3[8]. */ -+ SGRF_GATE(PCLK_WDT, "pclk_wdt", "pclk_alive"), ++static struct clk_div_table div_aclk_cpu_t[] = { ++ { .val = 0, .div = 1 }, ++ { .val = 1, .div = 2 }, ++ { .val = 2, .div = 3 }, ++ { .val = 3, .div = 4 }, ++ { .val = 4, .div = 8 }, ++ { /* sentinel */ }, ++}; + -+ GATE(SCLK_MIPIDPHY_REF, "clk_mipidphy_ref", "xin24m", 0, RK3399_CLKGATE_CON(11), 14, GFLAGS), -+ GATE(SCLK_DPHY_PLL, "clk_dphy_pll", "clk_mipidphy_ref", 0, RK3399_CLKGATE_CON(21), 0, GFLAGS), ++static struct rockchip_clk_branch rk3066a_i2s0_fracmux __initdata = ++ MUX(SCLK_I2S0, "sclk_i2s0", mux_sclk_i2s0_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(2), 8, 2, MFLAGS); + -+ GATE(SCLK_MIPIDPHY_CFG, "clk_mipidphy_cfg", "xin24m", 0, RK3399_CLKGATE_CON(11), 15, GFLAGS), -+ GATE(SCLK_DPHY_TX0_CFG, "clk_dphy_tx0_cfg", "clk_mipidphy_cfg", 0, RK3399_CLKGATE_CON(21), 1, GFLAGS), -+ GATE(SCLK_DPHY_TX1RX1_CFG, "clk_dphy_tx1rx1_cfg", "clk_mipidphy_cfg", 0, RK3399_CLKGATE_CON(21), 2, GFLAGS), -+ GATE(SCLK_DPHY_RX0_CFG, "clk_dphy_rx0_cfg", "clk_mipidphy_cfg", 0, RK3399_CLKGATE_CON(21), 3, GFLAGS), ++static struct rockchip_clk_branch rk3066a_i2s1_fracmux __initdata = ++ MUX(SCLK_I2S1, "sclk_i2s1", mux_sclk_i2s1_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(3), 8, 2, MFLAGS); + -+ /* testout */ -+ MUX(0, "clk_test_pre", mux_pll_src_cpll_gpll_p, CLK_SET_RATE_PARENT, -+ RK3399_CLKSEL_CON(58), 7, 1, MFLAGS), -+ COMPOSITE_FRAC(0, "clk_test_frac", "clk_test_pre", 0, -+ RK3399_CLKSEL_CON(105), 0, -+ RK3399_CLKGATE_CON(13), 9, GFLAGS), ++static struct rockchip_clk_branch rk3066a_i2s2_fracmux __initdata = ++ MUX(SCLK_I2S2, "sclk_i2s2", mux_sclk_i2s2_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(4), 8, 2, MFLAGS); + -+ DIV(0, "clk_test_24m", "xin24m", 0, -+ RK3399_CLKSEL_CON(57), 6, 10, DFLAGS), ++static struct rockchip_clk_branch rk3066a_clk_branches[] __initdata = { ++ DIVTBL(0, "aclk_cpu_pre", "armclk", 0, ++ RK2928_CLKSEL_CON(1), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, div_aclk_cpu_t), ++ DIV(0, "pclk_cpu_pre", "aclk_cpu_pre", 0, ++ RK2928_CLKSEL_CON(1), 12, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO ++ | CLK_DIVIDER_READ_ONLY), ++ DIV(0, "hclk_cpu_pre", "aclk_cpu_pre", 0, ++ RK2928_CLKSEL_CON(1), 8, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO ++ | CLK_DIVIDER_READ_ONLY), ++ COMPOSITE_NOMUX(0, "hclk_ahb2apb", "hclk_cpu_pre", 0, ++ RK2928_CLKSEL_CON(1), 14, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO ++ | CLK_DIVIDER_READ_ONLY, ++ RK2928_CLKGATE_CON(4), 9, GFLAGS), + -+ /* spi */ -+ COMPOSITE(SCLK_SPI0, "clk_spi0", mux_pll_src_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(59), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3399_CLKGATE_CON(9), 12, GFLAGS), ++ GATE(CORE_L2C, "core_l2c", "aclk_cpu", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(9), 4, GFLAGS), + -+ COMPOSITE(SCLK_SPI1, "clk_spi1", mux_pll_src_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(59), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK3399_CLKGATE_CON(9), 13, GFLAGS), ++ COMPOSITE(0, "aclk_peri_pre", mux_pll_src_gpll_cpll_p, CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(10), 15, 1, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(2), 0, GFLAGS), + -+ COMPOSITE(SCLK_SPI2, "clk_spi2", mux_pll_src_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(60), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3399_CLKGATE_CON(9), 14, GFLAGS), ++ COMPOSITE(0, "dclk_lcdc0_src", mux_pll_src_cpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(27), 0, 1, MFLAGS, 8, 8, DFLAGS, ++ RK2928_CLKGATE_CON(3), 1, GFLAGS), ++ MUX(DCLK_LCDC0, "dclk_lcdc0", mux_rk3066_lcdc0_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(27), 4, 1, MFLAGS), ++ COMPOSITE(0, "dclk_lcdc1_src", mux_pll_src_cpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(28), 0, 1, MFLAGS, 8, 8, DFLAGS, ++ RK2928_CLKGATE_CON(3), 2, GFLAGS), ++ MUX(DCLK_LCDC1, "dclk_lcdc1", mux_rk3066_lcdc1_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(28), 4, 1, MFLAGS), + -+ COMPOSITE(SCLK_SPI4, "clk_spi4", mux_pll_src_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(60), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK3399_CLKGATE_CON(9), 15, GFLAGS), ++ COMPOSITE_NOMUX(0, "cif1_pre", "cif_src", 0, ++ RK2928_CLKSEL_CON(29), 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 8, GFLAGS), ++ MUX(SCLK_CIF1, "sclk_cif1", mux_sclk_cif1_p, 0, ++ RK2928_CLKSEL_CON(29), 15, 1, MFLAGS), + -+ COMPOSITE(SCLK_SPI5, "clk_spi5", mux_pll_src_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(58), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK3399_CLKGATE_CON(13), 13, GFLAGS), ++ GATE(0, "pclkin_cif1", "ext_cif1", 0, ++ RK2928_CLKGATE_CON(3), 4, GFLAGS), ++ INVERTER(0, "pclk_cif1", "pclkin_cif1", ++ RK2928_CLKSEL_CON(30), 12, IFLAGS), + -+ /* i2c */ -+ COMPOSITE(SCLK_I2C1, "clk_i2c1", mux_pll_src_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(61), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3399_CLKGATE_CON(10), 0, GFLAGS), ++ COMPOSITE(0, "aclk_gpu_src", mux_pll_src_cpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(33), 8, 1, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 13, GFLAGS), ++ GATE(ACLK_GPU, "aclk_gpu", "aclk_gpu_src", 0, ++ RK2928_CLKGATE_CON(5), 15, GFLAGS), + -+ COMPOSITE(SCLK_I2C2, "clk_i2c2", mux_pll_src_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(62), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3399_CLKGATE_CON(10), 2, GFLAGS), ++ GATE(SCLK_TIMER2, "timer2", "xin24m", 0, ++ RK2928_CLKGATE_CON(3), 2, GFLAGS), + -+ COMPOSITE(SCLK_I2C3, "clk_i2c3", mux_pll_src_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(63), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3399_CLKGATE_CON(10), 4, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_TSADC, "sclk_tsadc", "xin24m", 0, ++ RK2928_CLKSEL_CON(34), 0, 16, DFLAGS, ++ RK2928_CLKGATE_CON(2), 15, GFLAGS), + -+ COMPOSITE(SCLK_I2C5, "clk_i2c5", mux_pll_src_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(61), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK3399_CLKGATE_CON(10), 1, GFLAGS), ++ MUX(0, "i2s_src", mux_pll_src_gpll_cpll_p, 0, ++ RK2928_CLKSEL_CON(2), 15, 1, MFLAGS), ++ COMPOSITE_NOMUX(0, "i2s0_pre", "i2s_src", 0, ++ RK2928_CLKSEL_CON(2), 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(0), 7, GFLAGS), ++ COMPOSITE_FRACMUX(0, "i2s0_frac", "i2s0_pre", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(6), 0, ++ RK2928_CLKGATE_CON(0), 8, GFLAGS, ++ &rk3066a_i2s0_fracmux), ++ COMPOSITE_NOMUX(0, "i2s1_pre", "i2s_src", 0, ++ RK2928_CLKSEL_CON(3), 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(0), 9, GFLAGS), ++ COMPOSITE_FRACMUX(0, "i2s1_frac", "i2s1_pre", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(7), 0, ++ RK2928_CLKGATE_CON(0), 10, GFLAGS, ++ &rk3066a_i2s1_fracmux), ++ COMPOSITE_NOMUX(0, "i2s2_pre", "i2s_src", 0, ++ RK2928_CLKSEL_CON(4), 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(0), 11, GFLAGS), ++ COMPOSITE_FRACMUX(0, "i2s2_frac", "i2s2_pre", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(8), 0, ++ RK2928_CLKGATE_CON(0), 12, GFLAGS, ++ &rk3066a_i2s2_fracmux), + -+ COMPOSITE(SCLK_I2C6, "clk_i2c6", mux_pll_src_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(62), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK3399_CLKGATE_CON(10), 3, GFLAGS), ++ GATE(HCLK_I2S1_2CH, "hclk_i2s1_2ch", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 3, GFLAGS), ++ GATE(HCLK_I2S_8CH, "hclk_i2s_8ch", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 4, GFLAGS), ++ GATE(HCLK_CIF1, "hclk_cif1", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 6, GFLAGS), ++ GATE(HCLK_HDMI, "hclk_hdmi", "hclk_cpu", 0, RK2928_CLKGATE_CON(4), 14, GFLAGS), + -+ COMPOSITE(SCLK_I2C7, "clk_i2c7", mux_pll_src_cpll_gpll_p, 0, -+ RK3399_CLKSEL_CON(63), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK3399_CLKGATE_CON(10), 5, GFLAGS), ++ GATE(HCLK_OTG1, "hclk_usbotg1", "hclk_peri", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(5), 14, GFLAGS), + -+ /* timer */ -+ GATE(SCLK_TIMER00, "clk_timer00", "xin24m", 0, RK3399_CLKGATE_CON(26), 0, GFLAGS), -+ GATE(SCLK_TIMER01, "clk_timer01", "xin24m", 0, RK3399_CLKGATE_CON(26), 1, GFLAGS), -+ GATE(SCLK_TIMER02, "clk_timer02", "xin24m", 0, RK3399_CLKGATE_CON(26), 2, GFLAGS), -+ GATE(SCLK_TIMER03, "clk_timer03", "xin24m", 0, RK3399_CLKGATE_CON(26), 3, GFLAGS), -+ GATE(SCLK_TIMER04, "clk_timer04", "xin24m", 0, RK3399_CLKGATE_CON(26), 4, GFLAGS), -+ GATE(SCLK_TIMER05, "clk_timer05", "xin24m", 0, RK3399_CLKGATE_CON(26), 5, GFLAGS), -+ GATE(SCLK_TIMER06, "clk_timer06", "xin24m", 0, RK3399_CLKGATE_CON(26), 6, GFLAGS), -+ GATE(SCLK_TIMER07, "clk_timer07", "xin24m", 0, RK3399_CLKGATE_CON(26), 7, GFLAGS), -+ GATE(SCLK_TIMER08, "clk_timer08", "xin24m", 0, RK3399_CLKGATE_CON(26), 8, GFLAGS), -+ GATE(SCLK_TIMER09, "clk_timer09", "xin24m", 0, RK3399_CLKGATE_CON(26), 9, GFLAGS), -+ GATE(SCLK_TIMER10, "clk_timer10", "xin24m", 0, RK3399_CLKGATE_CON(26), 10, GFLAGS), -+ GATE(SCLK_TIMER11, "clk_timer11", "xin24m", 0, RK3399_CLKGATE_CON(26), 11, GFLAGS), ++ GATE(ACLK_CIF1, "aclk_cif1", "aclk_vio1", 0, RK2928_CLKGATE_CON(6), 7, GFLAGS), + -+ /* clk_test */ -+ /* clk_test_pre is controlled by CRU_MISC_CON[3] */ -+ COMPOSITE_NOMUX(0, "clk_test", "clk_test_pre", CLK_IGNORE_UNUSED, -+ RK3399_CLKSEL_CON(58), 0, 5, DFLAGS, -+ RK3399_CLKGATE_CON(13), 11, GFLAGS), ++ GATE(PCLK_TIMER1, "pclk_timer1", "pclk_cpu", 0, RK2928_CLKGATE_CON(7), 8, GFLAGS), ++ GATE(PCLK_TIMER2, "pclk_timer2", "pclk_cpu", 0, RK2928_CLKGATE_CON(7), 9, GFLAGS), ++ GATE(PCLK_GPIO6, "pclk_gpio6", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 15, GFLAGS), ++ GATE(PCLK_UART0, "pclk_uart0", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 0, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 1, GFLAGS), + -+ /* ddrc */ -+ GATE(0, "clk_ddrc_lpll_src", "lpll", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(3), -+ 0, GFLAGS), -+ GATE(0, "clk_ddrc_bpll_src", "bpll", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(3), -+ 1, GFLAGS), -+ GATE(0, "clk_ddrc_dpll_src", "dpll", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(3), -+ 2, GFLAGS), -+ GATE(0, "clk_ddrc_gpll_src", "gpll", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(3), -+ 3, GFLAGS), -+ COMPOSITE_DDRCLK(SCLK_DDRC, "sclk_ddrc", mux_ddrclk_p, 0, -+ RK3399_CLKSEL_CON(6), 4, 2, 0, 0, ROCKCHIP_DDRCLK_SIP), ++ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_peri", 0, RK2928_CLKGATE_CON(8), 13, GFLAGS), ++ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_peri", 0, RK2928_CLKGATE_CON(4), 13, GFLAGS), +}; + -+static struct rockchip_clk_branch rk3399_clk_pmu_branches[] __initdata = { -+ /* -+ * PMU CRU Clock-Architecture -+ */ -+ -+ GATE(0, "fclk_cm0s_pmu_ppll_src", "ppll", CLK_IS_CRITICAL, -+ RK3399_PMU_CLKGATE_CON(0), 1, GFLAGS), -+ -+ COMPOSITE_NOGATE(FCLK_CM0S_SRC_PMU, "fclk_cm0s_src_pmu", mux_fclk_cm0s_pmu_ppll_p, CLK_IS_CRITICAL, -+ RK3399_PMU_CLKSEL_CON(0), 15, 1, MFLAGS, 8, 5, DFLAGS), -+ -+ COMPOSITE(SCLK_SPI3_PMU, "clk_spi3_pmu", mux_24m_ppll_p, 0, -+ RK3399_PMU_CLKSEL_CON(1), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3399_PMU_CLKGATE_CON(0), 2, GFLAGS), -+ -+ COMPOSITE(0, "clk_wifi_div", mux_ppll_24m_p, CLK_IGNORE_UNUSED, -+ RK3399_PMU_CLKSEL_CON(1), 13, 1, MFLAGS, 8, 5, DFLAGS, -+ RK3399_PMU_CLKGATE_CON(0), 8, GFLAGS), -+ -+ COMPOSITE_FRACMUX_NOGATE(0, "clk_wifi_frac", "clk_wifi_div", CLK_SET_RATE_PARENT, -+ RK3399_PMU_CLKSEL_CON(7), 0, -+ &rk3399_pmuclk_wifi_fracmux), -+ -+ MUX(0, "clk_timer_src_pmu", mux_pll_p, CLK_IGNORE_UNUSED, -+ RK3399_PMU_CLKSEL_CON(1), 15, 1, MFLAGS), ++static struct clk_div_table div_rk3188_aclk_core_t[] = { ++ { .val = 0, .div = 1 }, ++ { .val = 1, .div = 2 }, ++ { .val = 2, .div = 3 }, ++ { .val = 3, .div = 4 }, ++ { .val = 4, .div = 8 }, ++ { /* sentinel */ }, ++}; + -+ COMPOSITE_NOMUX(SCLK_I2C0_PMU, "clk_i2c0_pmu", "ppll", 0, -+ RK3399_PMU_CLKSEL_CON(2), 0, 7, DFLAGS, -+ RK3399_PMU_CLKGATE_CON(0), 9, GFLAGS), ++PNAME(mux_hsicphy_p) = { "sclk_otgphy0_480m", "sclk_otgphy1_480m", ++ "gpll", "cpll" }; + -+ COMPOSITE_NOMUX(SCLK_I2C4_PMU, "clk_i2c4_pmu", "ppll", 0, -+ RK3399_PMU_CLKSEL_CON(3), 0, 7, DFLAGS, -+ RK3399_PMU_CLKGATE_CON(0), 10, GFLAGS), ++static struct rockchip_clk_branch rk3188_i2s0_fracmux __initdata = ++ MUX(SCLK_I2S0, "sclk_i2s0", mux_sclk_i2s0_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(3), 8, 2, MFLAGS); + -+ COMPOSITE_NOMUX(SCLK_I2C8_PMU, "clk_i2c8_pmu", "ppll", 0, -+ RK3399_PMU_CLKSEL_CON(2), 8, 7, DFLAGS, -+ RK3399_PMU_CLKGATE_CON(0), 11, GFLAGS), ++static struct rockchip_clk_branch rk3188_clk_branches[] __initdata = { ++ COMPOSITE_NOMUX_DIVTBL(0, "aclk_core", "armclk", CLK_IGNORE_UNUSED, ++ RK2928_CLKSEL_CON(1), 3, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ div_rk3188_aclk_core_t, RK2928_CLKGATE_CON(0), 7, GFLAGS), + -+ DIV(0, "clk_32k_suspend_pmu", "xin24m", CLK_IGNORE_UNUSED, -+ RK3399_PMU_CLKSEL_CON(4), 0, 10, DFLAGS), -+ MUX(0, "clk_testout_2io", mux_clk_testout2_2io_p, CLK_IGNORE_UNUSED, -+ RK3399_PMU_CLKSEL_CON(4), 15, 1, MFLAGS), ++ /* do not source aclk_cpu_pre from the apll, to keep complexity down */ ++ COMPOSITE_NOGATE(ACLK_CPU_PRE, "aclk_cpu_pre", mux_aclk_cpu_p, CLK_SET_RATE_NO_REPARENT, ++ RK2928_CLKSEL_CON(0), 5, 1, MFLAGS, 0, 5, DFLAGS), ++ DIV(0, "pclk_cpu_pre", "aclk_cpu_pre", 0, ++ RK2928_CLKSEL_CON(1), 12, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO), ++ DIV(0, "hclk_cpu_pre", "aclk_cpu_pre", 0, ++ RK2928_CLKSEL_CON(1), 8, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO), ++ COMPOSITE_NOMUX(0, "hclk_ahb2apb", "hclk_cpu_pre", 0, ++ RK2928_CLKSEL_CON(1), 14, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, ++ RK2928_CLKGATE_CON(4), 9, GFLAGS), + -+ MUX(SCLK_UART4_SRC, "clk_uart4_src", mux_24m_ppll_p, CLK_SET_RATE_NO_REPARENT, -+ RK3399_PMU_CLKSEL_CON(5), 10, 1, MFLAGS), ++ GATE(CORE_L2C, "core_l2c", "armclk", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(9), 4, GFLAGS), + -+ COMPOSITE_NOMUX(0, "clk_uart4_div", "clk_uart4_src", CLK_SET_RATE_PARENT, -+ RK3399_PMU_CLKSEL_CON(5), 0, 7, DFLAGS, -+ RK3399_PMU_CLKGATE_CON(0), 5, GFLAGS), ++ COMPOSITE(0, "aclk_peri_pre", mux_pll_src_cpll_gpll_p, CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(10), 15, 1, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(2), 0, GFLAGS), + -+ COMPOSITE_FRACMUX(0, "clk_uart4_frac", "clk_uart4_div", CLK_SET_RATE_PARENT, -+ RK3399_PMU_CLKSEL_CON(6), 0, -+ RK3399_PMU_CLKGATE_CON(0), 6, GFLAGS, -+ &rk3399_uart4_pmu_fracmux), ++ COMPOSITE(DCLK_LCDC0, "dclk_lcdc0", mux_pll_src_cpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(27), 0, 1, MFLAGS, 8, 8, DFLAGS, ++ RK2928_CLKGATE_CON(3), 1, GFLAGS), ++ COMPOSITE(DCLK_LCDC1, "dclk_lcdc1", mux_pll_src_cpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(28), 0, 1, MFLAGS, 8, 8, DFLAGS, ++ RK2928_CLKGATE_CON(3), 2, GFLAGS), + -+ DIV(PCLK_SRC_PMU, "pclk_pmu_src", "ppll", CLK_IS_CRITICAL, -+ RK3399_PMU_CLKSEL_CON(0), 0, 5, DFLAGS), ++ COMPOSITE(0, "aclk_gpu_src", mux_pll_src_cpll_gpll_p, 0, ++ RK2928_CLKSEL_CON(34), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 15, GFLAGS), ++ GATE(ACLK_GPU, "aclk_gpu", "aclk_gpu_src", 0, ++ RK2928_CLKGATE_CON(9), 7, GFLAGS), + -+ /* pmu clock gates */ -+ GATE(SCLK_TIMER12_PMU, "clk_timer0_pmu", "clk_timer_src_pmu", 0, RK3399_PMU_CLKGATE_CON(0), 3, GFLAGS), -+ GATE(SCLK_TIMER13_PMU, "clk_timer1_pmu", "clk_timer_src_pmu", 0, RK3399_PMU_CLKGATE_CON(0), 4, GFLAGS), ++ GATE(SCLK_TIMER2, "timer2", "xin24m", 0, RK2928_CLKGATE_CON(3), 4, GFLAGS), ++ GATE(SCLK_TIMER3, "timer3", "xin24m", 0, RK2928_CLKGATE_CON(1), 2, GFLAGS), ++ GATE(SCLK_TIMER4, "timer4", "xin24m", 0, RK2928_CLKGATE_CON(3), 5, GFLAGS), ++ GATE(SCLK_TIMER5, "timer5", "xin24m", 0, RK2928_CLKGATE_CON(3), 8, GFLAGS), ++ GATE(SCLK_TIMER6, "timer6", "xin24m", 0, RK2928_CLKGATE_CON(3), 14, GFLAGS), + -+ GATE(SCLK_PVTM_PMU, "clk_pvtm_pmu", "xin24m", 0, RK3399_PMU_CLKGATE_CON(0), 7, GFLAGS), ++ COMPOSITE_NODIV(0, "sclk_hsicphy_480m", mux_hsicphy_p, 0, ++ RK2928_CLKSEL_CON(30), 0, 2, DFLAGS, ++ RK2928_CLKGATE_CON(3), 6, GFLAGS), ++ DIV(0, "sclk_hsicphy_12m", "sclk_hsicphy_480m", 0, ++ RK2928_CLKSEL_CON(11), 8, 6, DFLAGS), + -+ GATE(PCLK_PMU, "pclk_pmu", "pclk_pmu_src", CLK_IGNORE_UNUSED, RK3399_PMU_CLKGATE_CON(1), 0, GFLAGS), -+ GATE(PCLK_PMUGRF_PMU, "pclk_pmugrf_pmu", "pclk_pmu_src", CLK_IGNORE_UNUSED, RK3399_PMU_CLKGATE_CON(1), 1, GFLAGS), -+ GATE(PCLK_INTMEM1_PMU, "pclk_intmem1_pmu", "pclk_pmu_src", CLK_IGNORE_UNUSED, RK3399_PMU_CLKGATE_CON(1), 2, GFLAGS), -+ GATE(PCLK_GPIO0_PMU, "pclk_gpio0_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 3, GFLAGS), -+ GATE(PCLK_GPIO1_PMU, "pclk_gpio1_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 4, GFLAGS), -+ GATE(PCLK_SGRF_PMU, "pclk_sgrf_pmu", "pclk_pmu_src", CLK_IGNORE_UNUSED, RK3399_PMU_CLKGATE_CON(1), 5, GFLAGS), -+ GATE(PCLK_NOC_PMU, "pclk_noc_pmu", "pclk_pmu_src", CLK_IS_CRITICAL, RK3399_PMU_CLKGATE_CON(1), 6, GFLAGS), -+ GATE(PCLK_I2C0_PMU, "pclk_i2c0_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 7, GFLAGS), -+ GATE(PCLK_I2C4_PMU, "pclk_i2c4_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 8, GFLAGS), -+ GATE(PCLK_I2C8_PMU, "pclk_i2c8_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 9, GFLAGS), -+ GATE(PCLK_RKPWM_PMU, "pclk_rkpwm_pmu", "pclk_pmu_src", CLK_IS_CRITICAL, RK3399_PMU_CLKGATE_CON(1), 10, GFLAGS), -+ GATE(PCLK_SPI3_PMU, "pclk_spi3_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 11, GFLAGS), -+ GATE(PCLK_TIMER_PMU, "pclk_timer_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 12, GFLAGS), -+ GATE(PCLK_MAILBOX_PMU, "pclk_mailbox_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 13, GFLAGS), -+ GATE(PCLK_UART4_PMU, "pclk_uart4_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 14, GFLAGS), -+ GATE(PCLK_WDT_M0_PMU, "pclk_wdt_m0_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 15, GFLAGS), ++ MUX(0, "i2s_src", mux_pll_src_gpll_cpll_p, 0, ++ RK2928_CLKSEL_CON(2), 15, 1, MFLAGS), ++ COMPOSITE_NOMUX(0, "i2s0_pre", "i2s_src", 0, ++ RK2928_CLKSEL_CON(3), 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(0), 9, GFLAGS), ++ COMPOSITE_FRACMUX(0, "i2s0_frac", "i2s0_pre", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(7), 0, ++ RK2928_CLKGATE_CON(0), 10, GFLAGS, ++ &rk3188_i2s0_fracmux), + -+ GATE(FCLK_CM0S_PMU, "fclk_cm0s_pmu", "fclk_cm0s_src_pmu", 0, RK3399_PMU_CLKGATE_CON(2), 0, GFLAGS), -+ GATE(SCLK_CM0S_PMU, "sclk_cm0s_pmu", "fclk_cm0s_src_pmu", 0, RK3399_PMU_CLKGATE_CON(2), 1, GFLAGS), -+ GATE(HCLK_CM0S_PMU, "hclk_cm0s_pmu", "fclk_cm0s_src_pmu", 0, RK3399_PMU_CLKGATE_CON(2), 2, GFLAGS), -+ GATE(DCLK_CM0S_PMU, "dclk_cm0s_pmu", "fclk_cm0s_src_pmu", 0, RK3399_PMU_CLKGATE_CON(2), 3, GFLAGS), -+ GATE(HCLK_NOC_PMU, "hclk_noc_pmu", "fclk_cm0s_src_pmu", CLK_IS_CRITICAL, RK3399_PMU_CLKGATE_CON(2), 5, GFLAGS), -+}; ++ GATE(0, "hclk_imem0", "hclk_cpu", 0, RK2928_CLKGATE_CON(4), 14, GFLAGS), ++ GATE(0, "hclk_imem1", "hclk_cpu", 0, RK2928_CLKGATE_CON(4), 15, GFLAGS), + -+static void __iomem *rk3399_cru_base; -+static void __iomem *rk3399_pmucru_base; ++ GATE(HCLK_OTG1, "hclk_usbotg1", "hclk_peri", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(7), 3, GFLAGS), ++ GATE(HCLK_HSIC, "hclk_hsic", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 4, GFLAGS), + -+void rk3399_dump_cru(void) -+{ -+ if (rk3399_cru_base) { -+ pr_warn("CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk3399_cru_base, -+ 0x594, false); -+ } -+ if (rk3399_pmucru_base) { -+ pr_warn("PMU CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk3399_pmucru_base, -+ 0x134, false); -+ } -+} -+EXPORT_SYMBOL_GPL(rk3399_dump_cru); ++ GATE(PCLK_TIMER3, "pclk_timer3", "pclk_cpu", 0, RK2928_CLKGATE_CON(7), 9, GFLAGS), + -+static int rk3399_clk_panic(struct notifier_block *this, -+ unsigned long ev, void *ptr) -+{ -+ rk3399_dump_cru(); -+ return NOTIFY_DONE; -+} ++ GATE(PCLK_UART0, "pclk_uart0", "hclk_ahb2apb", 0, RK2928_CLKGATE_CON(8), 0, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "hclk_ahb2apb", 0, RK2928_CLKGATE_CON(8), 1, GFLAGS), + -+static struct notifier_block rk3399_clk_panic_block = { -+ .notifier_call = rk3399_clk_panic, ++ GATE(ACLK_GPS, "aclk_gps", "aclk_peri", 0, RK2928_CLKGATE_CON(8), 13, GFLAGS), +}; + -+static void __init rk3399_clk_init(struct device_node *np) ++static struct rockchip_clk_provider *__init rk3188_common_clk_init(struct device_node *np) +{ + struct rockchip_clk_provider *ctx; + void __iomem *reg_base; -+ struct clk **clks; + + reg_base = of_iomap(np, 0); + if (!reg_base) { + pr_err("%s: could not map cru region\n", __func__); -+ return; ++ return ERR_PTR(-ENOMEM); + } + -+ rk3399_cru_base = reg_base; -+ + ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); + if (IS_ERR(ctx)) { + pr_err("%s: rockchip clk init failed\n", __func__); + iounmap(reg_base); -+ return; ++ return ERR_PTR(-ENOMEM); + } -+ clks = ctx->clk_data.clks; + -+ rockchip_clk_register_plls(ctx, rk3399_pll_clks, -+ ARRAY_SIZE(rk3399_pll_clks), -1); ++ rockchip_clk_register_branches(ctx, common_clk_branches, ++ ARRAY_SIZE(common_clk_branches)); + -+ rockchip_clk_register_branches(ctx, rk3399_clk_branches, -+ ARRAY_SIZE(rk3399_clk_branches)); ++ rockchip_register_softrst(np, 9, reg_base + RK2928_SOFTRST_CON(0), ++ ROCKCHIP_SOFTRST_HIWORD_MASK); + -+ rockchip_clk_register_armclk(ctx, ARMCLKL, "armclkl", -+ 4, clks[PLL_APLLL], clks[PLL_GPLL], -+ &rk3399_cpuclkl_data, rk3399_cpuclkl_rates, -+ ARRAY_SIZE(rk3399_cpuclkl_rates)); ++ rockchip_register_restart_notifier(ctx, RK2928_GLB_SRST_FST, NULL); + -+ rockchip_clk_register_armclk(ctx, ARMCLKB, "armclkb", -+ 4, clks[PLL_APLLB], clks[PLL_GPLL], -+ &rk3399_cpuclkb_data, rk3399_cpuclkb_rates, -+ ARRAY_SIZE(rk3399_cpuclkb_rates)); ++ return ctx; ++} + -+ rockchip_register_softrst(np, 21, reg_base + RK3399_SOFTRST_CON(0), -+ ROCKCHIP_SOFTRST_HIWORD_MASK); ++static void __init rk3066a_clk_init(struct device_node *np) ++{ ++ struct rockchip_clk_provider *ctx; ++ struct clk **clks; + -+ rockchip_register_restart_notifier(ctx, RK3399_GLB_SRST_FST, NULL); ++ ctx = rk3188_common_clk_init(np); ++ if (IS_ERR(ctx)) ++ return; ++ clks = ctx->clk_data.clks; + ++ rockchip_clk_register_plls(ctx, rk3066_pll_clks, ++ ARRAY_SIZE(rk3066_pll_clks), ++ RK3066_GRF_SOC_STATUS); ++ rockchip_clk_register_branches(ctx, rk3066a_clk_branches, ++ ARRAY_SIZE(rk3066a_clk_branches)); ++ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", ++ 2, clks[PLL_APLL], clks[PLL_GPLL], ++ &rk3066_cpuclk_data, rk3066_cpuclk_rates, ++ ARRAY_SIZE(rk3066_cpuclk_rates)); + rockchip_clk_of_add_provider(np, ctx); +} -+CLK_OF_DECLARE(rk3399_cru, "rockchip,rk3399-cru", rk3399_clk_init); ++CLK_OF_DECLARE(rk3066a_cru, "rockchip,rk3066a-cru", rk3066a_clk_init); + -+static void __init rk3399_pmu_clk_init(struct device_node *np) ++static void __init rk3188a_clk_init(struct device_node *np) +{ + struct rockchip_clk_provider *ctx; -+ void __iomem *reg_base; ++ struct clk **clks; ++ unsigned long rate; ++ int ret; + -+ reg_base = of_iomap(np, 0); -+ if (!reg_base) { -+ pr_err("%s: could not map cru pmu region\n", __func__); ++ ctx = rk3188_common_clk_init(np); ++ if (IS_ERR(ctx)) + return; -+ } ++ clks = ctx->clk_data.clks; + -+ rk3399_pmucru_base = reg_base; ++ rockchip_clk_register_plls(ctx, rk3188_pll_clks, ++ ARRAY_SIZE(rk3188_pll_clks), ++ RK3188_GRF_SOC_STATUS); ++ rockchip_clk_register_branches(ctx, rk3188_clk_branches, ++ ARRAY_SIZE(rk3188_clk_branches)); ++ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", ++ 2, clks[PLL_APLL], clks[PLL_GPLL], ++ &rk3188_cpuclk_data, rk3188_cpuclk_rates, ++ ARRAY_SIZE(rk3188_cpuclk_rates)); + -+ ctx = rockchip_clk_init(np, reg_base, CLKPMU_NR_CLKS); -+ if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip pmu clk init failed\n", __func__); -+ iounmap(reg_base); -+ return; ++ /* reparent aclk_cpu_pre from apll */ ++ if (clks[ACLK_CPU_PRE] && clks[PLL_GPLL]) { ++ rate = clk_get_rate(clks[ACLK_CPU_PRE]); ++ ++ ret = clk_set_parent(clks[ACLK_CPU_PRE], clks[PLL_GPLL]); ++ if (ret < 0) ++ pr_warn("%s: could not reparent aclk_cpu_pre to gpll\n", ++ __func__); ++ ++ clk_set_rate(clks[ACLK_CPU_PRE], rate); ++ } else { ++ pr_warn("%s: missing clocks to reparent aclk_cpu_pre to gpll\n", ++ __func__); + } + -+ rockchip_clk_register_plls(ctx, rk3399_pmu_pll_clks, -+ ARRAY_SIZE(rk3399_pmu_pll_clks), -1); ++ rockchip_clk_of_add_provider(np, ctx); ++} ++CLK_OF_DECLARE(rk3188a_cru, "rockchip,rk3188a-cru", rk3188a_clk_init); + -+ rockchip_clk_register_branches(ctx, rk3399_clk_pmu_branches, -+ ARRAY_SIZE(rk3399_clk_pmu_branches)); ++static void __init rk3188_clk_init(struct device_node *np) ++{ ++ int i; + -+ rockchip_register_softrst(np, 2, reg_base + RK3399_PMU_SOFTRST_CON(0), -+ ROCKCHIP_SOFTRST_HIWORD_MASK); ++ for (i = 0; i < ARRAY_SIZE(rk3188_pll_clks); i++) { ++ struct rockchip_pll_clock *pll = &rk3188_pll_clks[i]; ++ struct rockchip_pll_rate_table *rate; + -+ rockchip_clk_of_add_provider(np, ctx); ++ if (!pll->rate_table) ++ continue; + -+ atomic_notifier_chain_register(&panic_notifier_list, -+ &rk3399_clk_panic_block); ++ rate = pll->rate_table; ++ while (rate->rate > 0) { ++ rate->nb = 1; ++ rate++; ++ } ++ } ++ ++ rk3188a_clk_init(np); +} -+CLK_OF_DECLARE(rk3399_cru_pmu, "rockchip,rk3399-pmucru", rk3399_pmu_clk_init); ++CLK_OF_DECLARE(rk3188_cru, "rockchip,rk3188-cru", rk3188_clk_init); + -+#ifdef MODULE -+struct clk_rk3399_inits { ++struct clk_rk3188_inits { + void (*inits)(struct device_node *np); +}; + -+static const struct clk_rk3399_inits clk_rk3399_pmucru_init = { -+ .inits = rk3399_pmu_clk_init, ++static const struct clk_rk3188_inits clk_rk3066a_init = { ++ .inits = rk3066a_clk_init, +}; + -+static const struct clk_rk3399_inits clk_rk3399_cru_init = { -+ .inits = rk3399_clk_init, ++static const struct clk_rk3188_inits clk_rk3188a_init = { ++ .inits = rk3188a_clk_init, +}; + -+static const struct of_device_id clk_rk3399_match_table[] = { ++static const struct clk_rk3188_inits clk_rk3188_init = { ++ .inits = rk3188_clk_init, ++}; ++ ++static const struct of_device_id clk_rk3188_match_table[] = { + { -+ .compatible = "rockchip,rk3399-cru", -+ .data = &clk_rk3399_cru_init, -+ }, { -+ .compatible = "rockchip,rk3399-pmucru", -+ .data = &clk_rk3399_pmucru_init, ++ .compatible = "rockchip,rk3066a-cru", ++ .data = &clk_rk3066a_init, ++ }, { ++ .compatible = "rockchip,rk3188a-cru", ++ .data = &clk_rk3188a_init, ++ }, { ++ .compatible = "rockchip,rk3188-cru", ++ .data = &rk3188_clk_init, + }, + { } +}; -+MODULE_DEVICE_TABLE(of, clk_rk3399_match_table); ++MODULE_DEVICE_TABLE(of, clk_rk3188_match_table); + -+static int clk_rk3399_probe(struct platform_device *pdev) ++static int __init clk_rk3188_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + const struct of_device_id *match; -+ const struct clk_rk3399_inits *init_data; ++ const struct clk_rk3188_inits *init_data; + -+ match = of_match_device(clk_rk3399_match_table, &pdev->dev); ++ match = of_match_device(clk_rk3188_match_table, &pdev->dev); + if (!match || !match->data) + return -EINVAL; + @@ -53245,1130 +53339,683 @@ index 000000000..e1b6c5267 + return 0; +} + -+static struct platform_driver clk_rk3399_driver = { -+ .probe = clk_rk3399_probe, ++static struct platform_driver clk_rk3188_driver = { + .driver = { -+ .name = "clk-rk3399", -+ .of_match_table = clk_rk3399_match_table, -+ .suppress_bind_attrs = true, ++ .name = "clk-rk3188", ++ .of_match_table = clk_rk3188_match_table, + }, +}; -+module_platform_driver(clk_rk3399_driver); ++builtin_platform_driver_probe(clk_rk3188_driver, clk_rk3188_probe); + -+MODULE_DESCRIPTION("Rockchip RK3399 Clock Driver"); ++MODULE_DESCRIPTION("Rockchip RK3188 Clock Driver"); +MODULE_LICENSE("GPL"); -+MODULE_ALIAS("platform:clk-rk3399"); -+#endif /* MODULE */ -diff --git a/drivers/clk/rockchip-oh/clk-rk3528.c b/drivers/clk/rockchip-oh/clk-rk3528.c +diff --git a/drivers/clk/rockchip-oh/clk-rk3228.c b/drivers/clk/rockchip-oh/clk-rk3228.c new file mode 100644 -index 000000000..1b14cd57b +index 000000000..18c94343b --- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-rk3528.c -@@ -0,0 +1,1174 @@ -+// SPDX-License-Identifier: GPL-2.0 ++++ b/drivers/clk/rockchip-oh/clk-rk3228.c +@@ -0,0 +1,732 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later +/* -+ * Copyright (c) 2022 Rockchip Electronics Co. Ltd. -+ * Author: Joseph Chen ++ * Copyright (c) 2015 Rockchip Electronics Co. Ltd. ++ * Author: Xing Zheng ++ * Jeffy Chen + */ + +#include ++#include +#include +#include -+#include +#include ++#include +#include -+#include ++#include +#include "clk.h" + -+/* A placeholder for rk3066 pll type. We are rk3328 pll type */ -+#define RK3528_GRF_SOC_STATUS0 0x1a0 ++#define RK3228_GRF_SOC_STATUS0 0x480 + -+enum rk3528_plls { -+ apll, cpll, gpll, ppll, dpll, ++enum rk3228_plls { ++ apll, dpll, cpll, gpll, +}; + -+/* -+ * ## PLL attention. -+ * -+ * [FRAC PLL]: GPLL, PPLL, DPLL -+ * - frac mode: refdiv can be 1 or 2 only -+ * - int mode: refdiv has no special limit -+ * - VCO range: [950, 3800] MHZ -+ * -+ * [INT PLL]: CPLL, APLL -+ * - int mode: refdiv can be 1 or 2 only -+ * - VCO range: [475, 1900] MHZ -+ * -+ * [PPLL]: normal mode only. -+ * -+ * -+ * ## CRU access attention. -+ * -+ * pclk_cru => pclk_vo_root => aclk_vo_root -+ * pclk_cru_pcie => pclk_vpu_root => aclk_vpu_root -+ * pclk_cru_ddrphy => hclk_rkvdec_root => aclk_rkvdec_root -+ */ -+static struct rockchip_pll_rate_table rk3528_pll_rates[] = { ++static struct rockchip_pll_rate_table rk3228_pll_rates[] = { + /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ -+ RK3036_PLL_RATE(1896000000, 1, 79, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1800000000, 1, 75, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1704000000, 1, 71, 1, 1, 1, 0), + RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0), + RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1488000000, 1, 62, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1464000000, 1, 61, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1440000000, 1, 60, 1, 1, 1, 0), + RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1392000000, 1, 58, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1368000000, 1, 57, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1344000000, 1, 56, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1320000000, 1, 55, 1, 1, 1, 0), + RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1272000000, 1, 53, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1248000000, 1, 52, 1, 1, 1, 0), + RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1188000000, 1, 99, 2, 1, 1, 0), /* GPLL */ -+ RK3036_PLL_RATE(1092000000, 2, 91, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1008000000, 1, 42, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1000000000, 1, 125, 3, 1, 1, 0), /* PPLL */ -+ RK3036_PLL_RATE(996000000, 2, 83, 1, 1, 1, 0), /* CPLL */ -+ RK3036_PLL_RATE(960000000, 1, 40, 1, 1, 1, 0), -+ RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), -+ RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), -+ RK3036_PLL_RATE(600000000, 1, 50, 2, 1, 1, 0), -+ RK3036_PLL_RATE(594000000, 2, 99, 2, 1, 1, 0), -+ RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), -+ RK3036_PLL_RATE(312000000, 1, 78, 6, 1, 1, 0), -+ RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), -+ RK3036_PLL_RATE(96000000, 1, 24, 3, 2, 1, 0), ++ RK3036_PLL_RATE(1188000000, 2, 99, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1100000000, 12, 550, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1000000000, 6, 500, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 984000000, 1, 82, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 960000000, 1, 80, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 936000000, 1, 78, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 912000000, 1, 76, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 900000000, 4, 300, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 888000000, 1, 74, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 864000000, 1, 72, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 840000000, 1, 70, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 816000000, 1, 68, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 800000000, 6, 400, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 700000000, 6, 350, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 696000000, 1, 58, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 600000000, 1, 75, 3, 1, 1, 0), ++ RK3036_PLL_RATE( 594000000, 2, 99, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 504000000, 1, 63, 3, 1, 1, 0), ++ RK3036_PLL_RATE( 500000000, 6, 250, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 408000000, 1, 68, 2, 2, 1, 0), ++ RK3036_PLL_RATE( 312000000, 1, 52, 2, 2, 1, 0), ++ RK3036_PLL_RATE( 216000000, 1, 72, 4, 2, 1, 0), ++ RK3036_PLL_RATE( 96000000, 1, 64, 4, 4, 1, 0), + { /* sentinel */ }, +}; + -+#define RK3528_DIV_ACLK_M_CORE_MASK 0x1f -+#define RK3528_DIV_ACLK_M_CORE_SHIFT 11 -+#define RK3528_DIV_PCLK_DBG_MASK 0x1f -+#define RK3528_DIV_PCLK_DBG_SHIFT 1 ++#define RK3228_DIV_CPU_MASK 0x1f ++#define RK3228_DIV_CPU_SHIFT 8 + -+#define RK3528_CLKSEL39(_aclk_m_core) \ -+{ \ -+ .reg = RK3528_CLKSEL_CON(39), \ -+ .val = HIWORD_UPDATE(_aclk_m_core, RK3528_DIV_ACLK_M_CORE_MASK, \ -+ RK3528_DIV_ACLK_M_CORE_SHIFT), \ -+} ++#define RK3228_DIV_PERI_MASK 0xf ++#define RK3228_DIV_PERI_SHIFT 0 ++#define RK3228_DIV_ACLK_MASK 0x7 ++#define RK3228_DIV_ACLK_SHIFT 4 ++#define RK3228_DIV_HCLK_MASK 0x3 ++#define RK3228_DIV_HCLK_SHIFT 8 ++#define RK3228_DIV_PCLK_MASK 0x7 ++#define RK3228_DIV_PCLK_SHIFT 12 + -+#define RK3528_CLKSEL40(_pclk_dbg) \ ++#define RK3228_CLKSEL1(_core_aclk_div, _core_peri_div) \ +{ \ -+ .reg = RK3528_CLKSEL_CON(40), \ -+ .val = HIWORD_UPDATE(_pclk_dbg, RK3528_DIV_PCLK_DBG_MASK, \ -+ RK3528_DIV_PCLK_DBG_SHIFT), \ ++ .reg = RK2928_CLKSEL_CON(1), \ ++ .val = HIWORD_UPDATE(_core_peri_div, RK3228_DIV_PERI_MASK, \ ++ RK3228_DIV_PERI_SHIFT) | \ ++ HIWORD_UPDATE(_core_aclk_div, RK3228_DIV_ACLK_MASK, \ ++ RK3228_DIV_ACLK_SHIFT), \ +} + -+/* SIGN-OFF: _aclk_m_core: 550M, _pclk_dbg: 137.5M, */ -+#define RK3528_CPUCLK_RATE(_prate, _aclk_m_core, _pclk_dbg) \ ++#define RK3228_CPUCLK_RATE(_prate, _core_aclk_div, _core_peri_div) \ +{ \ + .prate = _prate, \ + .divs = { \ -+ RK3528_CLKSEL39(_aclk_m_core), \ -+ RK3528_CLKSEL40(_pclk_dbg), \ ++ RK3228_CLKSEL1(_core_aclk_div, _core_peri_div), \ + }, \ +} + -+static struct rockchip_cpuclk_rate_table rk3528_cpuclk_rates[] __initdata = { -+ /* APLL(CPU) rate <= 1900M, due to APLL VCO limit */ -+ RK3528_CPUCLK_RATE(1896000000, 1, 13), -+ RK3528_CPUCLK_RATE(1800000000, 1, 12), -+ RK3528_CPUCLK_RATE(1704000000, 1, 11), -+ RK3528_CPUCLK_RATE(1608000000, 1, 11), -+ RK3528_CPUCLK_RATE(1512000000, 1, 11), -+ RK3528_CPUCLK_RATE(1416000000, 1, 9), -+ RK3528_CPUCLK_RATE(1296000000, 1, 8), -+ RK3528_CPUCLK_RATE(1200000000, 1, 8), -+ RK3528_CPUCLK_RATE(1188000000, 1, 8), -+ RK3528_CPUCLK_RATE(1092000000, 1, 7), -+ RK3528_CPUCLK_RATE(1008000000, 1, 6), -+ RK3528_CPUCLK_RATE(1000000000, 1, 6), -+ RK3528_CPUCLK_RATE(996000000, 1, 6), -+ RK3528_CPUCLK_RATE(960000000, 1, 6), -+ RK3528_CPUCLK_RATE(912000000, 1, 6), -+ RK3528_CPUCLK_RATE(816000000, 1, 5), -+ RK3528_CPUCLK_RATE(600000000, 1, 3), -+ RK3528_CPUCLK_RATE(594000000, 1, 3), -+ RK3528_CPUCLK_RATE(408000000, 1, 2), -+ RK3528_CPUCLK_RATE(312000000, 1, 2), -+ RK3528_CPUCLK_RATE(216000000, 1, 1), -+ RK3528_CPUCLK_RATE(96000000, 1, 0), ++static struct rockchip_cpuclk_rate_table rk3228_cpuclk_rates[] __initdata = { ++ RK3228_CPUCLK_RATE(1800000000, 1, 7), ++ RK3228_CPUCLK_RATE(1704000000, 1, 7), ++ RK3228_CPUCLK_RATE(1608000000, 1, 7), ++ RK3228_CPUCLK_RATE(1512000000, 1, 7), ++ RK3228_CPUCLK_RATE(1488000000, 1, 5), ++ RK3228_CPUCLK_RATE(1464000000, 1, 5), ++ RK3228_CPUCLK_RATE(1416000000, 1, 5), ++ RK3228_CPUCLK_RATE(1392000000, 1, 5), ++ RK3228_CPUCLK_RATE(1296000000, 1, 5), ++ RK3228_CPUCLK_RATE(1200000000, 1, 5), ++ RK3228_CPUCLK_RATE(1104000000, 1, 5), ++ RK3228_CPUCLK_RATE(1008000000, 1, 5), ++ RK3228_CPUCLK_RATE(912000000, 1, 5), ++ RK3228_CPUCLK_RATE(816000000, 1, 3), ++ RK3228_CPUCLK_RATE(696000000, 1, 3), ++ RK3228_CPUCLK_RATE(600000000, 1, 3), ++ RK3228_CPUCLK_RATE(408000000, 1, 1), ++ RK3228_CPUCLK_RATE(312000000, 1, 1), ++ RK3228_CPUCLK_RATE(216000000, 1, 1), ++ RK3228_CPUCLK_RATE(96000000, 1, 1), +}; + -+static const struct rockchip_cpuclk_reg_data rk3528_cpuclk_data = { -+ .core_reg[0] = RK3528_CLKSEL_CON(39), -+ .div_core_shift[0] = 5, ++static const struct rockchip_cpuclk_reg_data rk3228_cpuclk_data = { ++ .core_reg[0] = RK2928_CLKSEL_CON(0), ++ .div_core_shift[0] = 0, + .div_core_mask[0] = 0x1f, + .num_cores = 1, + .mux_core_alt = 1, + .mux_core_main = 0, -+ .mux_core_shift = 10, ++ .mux_core_shift = 6, + .mux_core_mask = 0x1, +}; + -+PNAME(mux_pll_p) = { "xin24m" }; -+PNAME(mux_24m_32k_p) = { "xin24m", "clk_32k" }; -+PNAME(mux_gpll_cpll_p) = { "gpll", "cpll" }; -+PNAME(mux_gpll_cpll_xin24m_p) = { "gpll", "cpll", "xin24m" }; -+PNAME(mux_100m_50m_24m_p) = { "clk_100m_src", "clk_50m_src", "xin24m" }; -+PNAME(mux_150m_100m_24m_p) = { "clk_150m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_200m_100m_24m_p) = { "clk_200m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_200m_100m_50m_24m_p) = { "clk_200m_src", "clk_100m_src", "clk_50m_src", "xin24m" }; -+PNAME(mux_300m_200m_100m_24m_p) = { "clk_300m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_339m_200m_100m_24m_p) = { "clk_339m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_500m_200m_100m_24m_p) = { "clk_500m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_500m_300m_100m_24m_p) = { "clk_500m_src", "clk_300m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_600m_300m_200m_24m_p) = { "clk_600m_src", "clk_300m_src", "clk_200m_src", "xin24m" }; -+PNAME(aclk_gpu_p) = { "aclk_gpu_root", "clk_gpu_pvtpll_src" }; -+PNAME(aclk_rkvdec_pvtmux_root_p) = { "aclk_rkvdec_root", "clk_rkvdec_pvtpll_src" }; -+PNAME(clk_i2c2_p) = { "clk_200m_src", "clk_100m_src", "xin24m", "clk_32k" }; -+PNAME(clk_ref_pcie_inner_phy_p) = { "clk_ppll_100m_src", "xin24m" }; -+PNAME(dclk_vop0_p) = { "dclk_vop_src0", "clk_hdmiphy_pixel_io" }; -+PNAME(mclk_i2s0_2ch_sai_src_p) = { "clk_i2s0_2ch_src", "clk_i2s0_2ch_frac", "xin12m" }; -+PNAME(mclk_i2s1_8ch_sai_src_p) = { "clk_i2s1_8ch_src", "clk_i2s1_8ch_frac", "xin12m" }; -+PNAME(mclk_i2s2_2ch_sai_src_p) = { "clk_i2s2_2ch_src", "clk_i2s2_2ch_frac", "xin12m" }; -+PNAME(mclk_i2s3_8ch_sai_src_p) = { "clk_i2s3_8ch_src", "clk_i2s3_8ch_frac", "xin12m" }; -+PNAME(mclk_sai_i2s0_p) = { "mclk_i2s0_2ch_sai_src", "i2s0_mclkin" }; -+PNAME(mclk_sai_i2s1_p) = { "mclk_i2s1_8ch_sai_src", "i2s1_mclkin" }; -+PNAME(mclk_spdif_src_p) = { "clk_spdif_src", "clk_spdif_frac", "xin12m" }; -+PNAME(sclk_uart0_src_p) = { "clk_uart0_src", "clk_uart0_frac", "xin24m" }; -+PNAME(sclk_uart1_src_p) = { "clk_uart1_src", "clk_uart1_frac", "xin24m" }; -+PNAME(sclk_uart2_src_p) = { "clk_uart2_src", "clk_uart2_frac", "xin24m" }; -+PNAME(sclk_uart3_src_p) = { "clk_uart3_src", "clk_uart3_frac", "xin24m" }; -+PNAME(sclk_uart4_src_p) = { "clk_uart4_src", "clk_uart4_frac", "xin24m" }; -+PNAME(sclk_uart5_src_p) = { "clk_uart5_src", "clk_uart5_frac", "xin24m" }; -+PNAME(sclk_uart6_src_p) = { "clk_uart6_src", "clk_uart6_frac", "xin24m" }; -+PNAME(sclk_uart7_src_p) = { "clk_uart7_src", "clk_uart7_frac", "xin24m" }; -+PNAME(clk_32k_p) = { "xin_osc0_div", "clk_pvtm_32k" }; ++PNAME(mux_pll_p) = { "clk_24m", "xin24m" }; + -+/* Pass 0 to PLL() '_lshift' as a placeholder for rk3066 pll type. We are rk3328 pll type */ -+static struct rockchip_pll_clock rk3528_pll_clks[] __initdata = { -+ [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, -+ CLK_IS_CRITICAL, RK3528_PLL_CON(0), -+ RK3528_MODE_CON, 0, 0, 0, rk3528_pll_rates), ++PNAME(mux_ddrphy_p) = { "dpll_ddr", "gpll_ddr", "apll_ddr" }; ++PNAME(mux_usb480m_phy_p) = { "usb480m_phy0", "usb480m_phy1" }; ++PNAME(mux_usb480m_p) = { "usb480m_phy", "xin24m" }; ++PNAME(mux_hdmiphy_p) = { "hdmiphy_phy", "xin24m" }; ++PNAME(mux_aclk_cpu_src_p) = { "cpll_aclk_cpu", "gpll_aclk_cpu", "hdmiphy_aclk_cpu" }; + -+ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, -+ CLK_IS_CRITICAL, RK3528_PLL_CON(8), -+ RK3528_MODE_CON, 2, 0, 0, rk3528_pll_rates), ++PNAME(mux_pll_src_4plls_p) = { "cpll", "gpll", "hdmiphy", "usb480m" }; ++PNAME(mux_pll_src_3plls_p) = { "cpll", "gpll", "hdmiphy" }; ++PNAME(mux_pll_src_2plls_p) = { "cpll", "gpll" }; ++PNAME(mux_sclk_hdmi_cec_p) = { "cpll", "gpll", "xin24m" }; ++PNAME(mux_aclk_peri_src_p) = { "cpll_peri", "gpll_peri", "hdmiphy_peri" }; ++PNAME(mux_mmc_src_p) = { "cpll", "gpll", "xin24m", "usb480m" }; ++PNAME(mux_pll_src_cpll_gpll_usb480m_p) = { "cpll", "gpll", "usb480m" }; + -+ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, -+ CLK_IS_CRITICAL, RK3528_PLL_CON(24), -+ RK3528_MODE_CON, 4, 0, 0, rk3528_pll_rates), ++PNAME(mux_sclk_rga_p) = { "gpll", "cpll", "sclk_rga_src" }; + -+ [ppll] = PLL(pll_rk3328, PLL_PPLL, "ppll", mux_pll_p, -+ CLK_IS_CRITICAL, RK3528_PCIE_PLL_CON(32), -+ RK3528_MODE_CON, 6, 0, -+ ROCKCHIP_PLL_FIXED_MODE, rk3528_pll_rates), ++PNAME(mux_sclk_vop_src_p) = { "gpll_vop", "cpll_vop" }; ++PNAME(mux_dclk_vop_p) = { "hdmiphy", "sclk_vop_pre" }; + -+ [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p, -+ CLK_IGNORE_UNUSED, RK3528_DDRPHY_PLL_CON(16), -+ RK3528_DDRPHY_MODE_CON, 0, 0, 0, rk3528_pll_rates), ++PNAME(mux_i2s0_p) = { "i2s0_src", "i2s0_frac", "ext_i2s", "xin12m" }; ++PNAME(mux_i2s1_pre_p) = { "i2s1_src", "i2s1_frac", "ext_i2s", "xin12m" }; ++PNAME(mux_i2s_out_p) = { "i2s1_pre", "xin12m" }; ++PNAME(mux_i2s2_p) = { "i2s2_src", "i2s2_frac", "xin12m" }; ++PNAME(mux_sclk_spdif_p) = { "sclk_spdif_src", "spdif_frac", "xin12m" }; ++ ++PNAME(mux_uart0_p) = { "uart0_src", "uart0_frac", "xin24m" }; ++PNAME(mux_uart1_p) = { "uart1_src", "uart1_frac", "xin24m" }; ++PNAME(mux_uart2_p) = { "uart2_src", "uart2_frac", "xin24m" }; ++ ++PNAME(mux_sclk_mac_extclk_p) = { "ext_gmac", "phy_50m_out" }; ++PNAME(mux_sclk_gmac_pre_p) = { "sclk_gmac_src", "sclk_mac_extclk" }; ++PNAME(mux_sclk_macphy_p) = { "sclk_gmac_src", "ext_gmac" }; ++ ++static struct rockchip_pll_clock rk3228_pll_clks[] __initdata = { ++ [apll] = PLL(pll_rk3036, PLL_APLL, "apll", mux_pll_p, 0, RK2928_PLL_CON(0), ++ RK2928_MODE_CON, 0, 7, 0, rk3228_pll_rates), ++ [dpll] = PLL(pll_rk3036, PLL_DPLL, "dpll", mux_pll_p, 0, RK2928_PLL_CON(3), ++ RK2928_MODE_CON, 4, 6, 0, NULL), ++ [cpll] = PLL(pll_rk3036, PLL_CPLL, "cpll", mux_pll_p, 0, RK2928_PLL_CON(6), ++ RK2928_MODE_CON, 8, 8, 0, NULL), ++ [gpll] = PLL(pll_rk3036, PLL_GPLL, "gpll", mux_pll_p, 0, RK2928_PLL_CON(9), ++ RK2928_MODE_CON, 12, 9, ROCKCHIP_PLL_SYNC_RATE, rk3228_pll_rates), +}; + +#define MFLAGS CLK_MUX_HIWORD_MASK +#define DFLAGS CLK_DIVIDER_HIWORD_MASK +#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) + -+static struct rockchip_clk_branch rk3528_uart0_fracmux __initdata = -+ MUX(CLK_UART0, "clk_uart0", sclk_uart0_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(6), 0, 2, MFLAGS); ++static struct rockchip_clk_branch rk3228_i2s0_fracmux __initdata = ++ MUX(0, "i2s0_pre", mux_i2s0_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(9), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3528_uart1_fracmux __initdata = -+ MUX(CLK_UART1, "clk_uart1", sclk_uart1_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(8), 0, 2, MFLAGS); ++static struct rockchip_clk_branch rk3228_i2s1_fracmux __initdata = ++ MUX(0, "i2s1_pre", mux_i2s1_pre_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(3), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3528_uart2_fracmux __initdata = -+ MUX(CLK_UART2, "clk_uart2", sclk_uart2_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(10), 0, 2, MFLAGS); ++static struct rockchip_clk_branch rk3228_i2s2_fracmux __initdata = ++ MUX(0, "i2s2_pre", mux_i2s2_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(16), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3528_uart3_fracmux __initdata = -+ MUX(CLK_UART3, "clk_uart3", sclk_uart3_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(12), 0, 2, MFLAGS); ++static struct rockchip_clk_branch rk3228_spdif_fracmux __initdata = ++ MUX(SCLK_SPDIF, "sclk_spdif", mux_sclk_spdif_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(6), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3528_uart4_fracmux __initdata = -+ MUX(CLK_UART4, "clk_uart4", sclk_uart4_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(14), 0, 2, MFLAGS); ++static struct rockchip_clk_branch rk3228_uart0_fracmux __initdata = ++ MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(13), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3528_uart5_fracmux __initdata = -+ MUX(CLK_UART5, "clk_uart5", sclk_uart5_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(16), 0, 2, MFLAGS); ++static struct rockchip_clk_branch rk3228_uart1_fracmux __initdata = ++ MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(14), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3528_uart6_fracmux __initdata = -+ MUX(CLK_UART6, "clk_uart6", sclk_uart6_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(18), 0, 2, MFLAGS); ++static struct rockchip_clk_branch rk3228_uart2_fracmux __initdata = ++ MUX(SCLK_UART2, "sclk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(15), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3528_uart7_fracmux __initdata = -+ MUX(CLK_UART7, "clk_uart7", sclk_uart7_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(20), 0, 2, MFLAGS); ++static struct rockchip_clk_branch rk3228_clk_branches[] __initdata = { ++ /* ++ * Clock-Architecture Diagram 1 ++ */ + -+static struct rockchip_clk_branch mclk_i2s0_2ch_sai_src_fracmux __initdata = -+ MUX(MCLK_I2S0_2CH_SAI_SRC_PRE, "mclk_i2s0_2ch_sai_src_pre", mclk_i2s0_2ch_sai_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(22), 0, 2, MFLAGS); ++ DIV(0, "clk_24m", "xin24m", CLK_IGNORE_UNUSED, ++ RK2928_CLKSEL_CON(4), 8, 5, DFLAGS), + -+static struct rockchip_clk_branch mclk_i2s1_8ch_sai_src_fracmux __initdata = -+ MUX(MCLK_I2S1_8CH_SAI_SRC_PRE, "mclk_i2s1_8ch_sai_src_pre", mclk_i2s1_8ch_sai_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(26), 0, 2, MFLAGS); -+ -+static struct rockchip_clk_branch mclk_i2s2_2ch_sai_src_fracmux __initdata = -+ MUX(MCLK_I2S2_2CH_SAI_SRC_PRE, "mclk_i2s2_2ch_sai_src_pre", mclk_i2s2_2ch_sai_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(28), 0, 2, MFLAGS); -+ -+static struct rockchip_clk_branch mclk_i2s3_8ch_sai_src_fracmux __initdata = -+ MUX(MCLK_I2S3_8CH_SAI_SRC_PRE, "mclk_i2s3_8ch_sai_src_pre", mclk_i2s3_8ch_sai_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(24), 0, 2, MFLAGS); -+ -+static struct rockchip_clk_branch mclk_spdif_src_fracmux __initdata = -+ MUX(MCLK_SDPDIF_SRC_PRE, "mclk_spdif_src_pre", mclk_spdif_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(32), 0, 2, MFLAGS); -+ -+/* -+ * CRU Clock-Architecture -+ */ -+static struct rockchip_clk_branch rk3528_clk_branches[] __initdata = { -+ /* top */ -+ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), -+ -+ COMPOSITE(CLK_MATRIX_250M_SRC, "clk_250m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(1), 15, 1, MFLAGS, 10, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 5, GFLAGS), -+ COMPOSITE(CLK_MATRIX_500M_SRC, "clk_500m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(3), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 10, GFLAGS), -+ COMPOSITE_NOMUX(CLK_MATRIX_50M_SRC, "clk_50m_src", "cpll", CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(0), 2, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE_NOMUX(CLK_MATRIX_100M_SRC, "clk_100m_src", "cpll", CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(0), 7, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE_NOMUX(CLK_MATRIX_150M_SRC, "clk_150m_src", "gpll", CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(1), 0, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 3, GFLAGS), -+ COMPOSITE_NOMUX(CLK_MATRIX_200M_SRC, "clk_200m_src", "gpll", CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(1), 5, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 4, GFLAGS), -+ COMPOSITE_NOMUX(CLK_MATRIX_300M_SRC, "clk_300m_src", "gpll", CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(2), 0, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 6, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(CLK_MATRIX_339M_SRC, "clk_339m_src", "gpll", CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(2), 5, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 7, GFLAGS), -+ COMPOSITE_NOMUX(CLK_MATRIX_400M_SRC, "clk_400m_src", "gpll", CLK_IGNORE_UNUSED, -+ RK3528_CLKSEL_CON(2), 10, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 8, GFLAGS), -+ COMPOSITE_NOMUX(CLK_MATRIX_600M_SRC, "clk_600m_src", "gpll", CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(4), 0, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 11, GFLAGS), -+ COMPOSITE(DCLK_VOP_SRC0, "dclk_vop_src0", mux_gpll_cpll_p, 0, -+ RK3528_CLKSEL_CON(32), 10, 1, MFLAGS, 2, 8, DFLAGS, -+ RK3528_CLKGATE_CON(3), 7, GFLAGS), -+ COMPOSITE(DCLK_VOP_SRC1, "dclk_vop_src1", mux_gpll_cpll_p, 0, -+ RK3528_CLKSEL_CON(33), 8, 1, MFLAGS, 0, 8, DFLAGS, -+ RK3528_CLKGATE_CON(3), 8, GFLAGS), -+ COMPOSITE_NOMUX(CLK_HSM, "clk_hsm", "xin24m", 0, -+ RK3528_CLKSEL_CON(36), 5, 5, DFLAGS, -+ RK3528_CLKGATE_CON(3), 13, GFLAGS), -+ -+ COMPOSITE_NOMUX(CLK_UART0_SRC, "clk_uart0_src", "gpll", 0, -+ RK3528_CLKSEL_CON(4), 5, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 12, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART0_FRAC, "clk_uart0_frac", "clk_uart0_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(5), 0, -+ RK3528_CLKGATE_CON(0), 13, GFLAGS, &rk3528_uart0_fracmux), -+ GATE(SCLK_UART0, "sclk_uart0", "clk_uart0", 0, -+ RK3528_CLKGATE_CON(0), 14, GFLAGS), -+ -+ COMPOSITE_NOMUX(CLK_UART1_SRC, "clk_uart1_src", "gpll", 0, -+ RK3528_CLKSEL_CON(6), 2, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 15, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART1_FRAC, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(7), 0, -+ RK3528_CLKGATE_CON(1), 0, GFLAGS, &rk3528_uart1_fracmux), -+ GATE(SCLK_UART1, "sclk_uart1", "clk_uart1", 0, -+ RK3528_CLKGATE_CON(1), 1, GFLAGS), -+ -+ COMPOSITE_NOMUX(CLK_UART2_SRC, "clk_uart2_src", "gpll", 0, -+ RK3528_CLKSEL_CON(8), 2, 5, DFLAGS, -+ RK3528_CLKGATE_CON(1), 2, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART2_FRAC, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(9), 0, -+ RK3528_CLKGATE_CON(1), 3, GFLAGS, &rk3528_uart2_fracmux), -+ GATE(SCLK_UART2, "sclk_uart2", "clk_uart2", 0, -+ RK3528_CLKGATE_CON(1), 4, GFLAGS), -+ -+ COMPOSITE_NOMUX(CLK_UART3_SRC, "clk_uart3_src", "gpll", 0, -+ RK3528_CLKSEL_CON(10), 2, 5, DFLAGS, -+ RK3528_CLKGATE_CON(1), 5, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART3_FRAC, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(11), 0, -+ RK3528_CLKGATE_CON(1), 6, GFLAGS, &rk3528_uart3_fracmux), -+ GATE(SCLK_UART3, "sclk_uart3", "clk_uart3", 0, -+ RK3528_CLKGATE_CON(1), 7, GFLAGS), -+ -+ COMPOSITE_NOMUX(CLK_UART4_SRC, "clk_uart4_src", "gpll", 0, -+ RK3528_CLKSEL_CON(12), 2, 5, DFLAGS, -+ RK3528_CLKGATE_CON(1), 8, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART4_FRAC, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(13), 0, -+ RK3528_CLKGATE_CON(1), 9, GFLAGS, &rk3528_uart4_fracmux), -+ GATE(SCLK_UART4, "sclk_uart4", "clk_uart4", 0, -+ RK3528_CLKGATE_CON(1), 10, GFLAGS), -+ -+ COMPOSITE_NOMUX(CLK_UART5_SRC, "clk_uart5_src", "gpll", 0, -+ RK3528_CLKSEL_CON(14), 2, 5, DFLAGS, -+ RK3528_CLKGATE_CON(1), 11, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART5_FRAC, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(15), 0, -+ RK3528_CLKGATE_CON(1), 12, GFLAGS, &rk3528_uart5_fracmux), -+ GATE(SCLK_UART5, "sclk_uart5", "clk_uart5", 0, -+ RK3528_CLKGATE_CON(1), 13, GFLAGS), -+ -+ COMPOSITE_NOMUX(CLK_UART6_SRC, "clk_uart6_src", "gpll", 0, -+ RK3528_CLKSEL_CON(16), 2, 5, DFLAGS, -+ RK3528_CLKGATE_CON(1), 14, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART6_FRAC, "clk_uart6_frac", "clk_uart6_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(17), 0, -+ RK3528_CLKGATE_CON(1), 15, GFLAGS, &rk3528_uart6_fracmux), -+ GATE(SCLK_UART6, "sclk_uart6", "clk_uart6", 0, -+ RK3528_CLKGATE_CON(2), 0, GFLAGS), -+ -+ COMPOSITE_NOMUX(CLK_UART7_SRC, "clk_uart7_src", "gpll", 0, -+ RK3528_CLKSEL_CON(18), 2, 5, DFLAGS, -+ RK3528_CLKGATE_CON(2), 1, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART7_FRAC, "clk_uart7_frac", "clk_uart7_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(19), 0, -+ RK3528_CLKGATE_CON(2), 2, GFLAGS, &rk3528_uart7_fracmux), -+ GATE(SCLK_UART7, "sclk_uart7", "clk_uart7", 0, -+ RK3528_CLKGATE_CON(2), 3, GFLAGS), -+ -+ COMPOSITE_NOMUX(CLK_I2S0_2CH_SRC, "clk_i2s0_2ch_src", "gpll", 0, -+ RK3528_CLKSEL_CON(20), 8, 5, DFLAGS, -+ RK3528_CLKGATE_CON(2), 5, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S0_2CH_FRAC, "clk_i2s0_2ch_frac", "clk_i2s0_2ch_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(21), 0, -+ RK3528_CLKGATE_CON(2), 6, GFLAGS, &mclk_i2s0_2ch_sai_src_fracmux), -+ GATE(MCLK_I2S0_2CH_SAI_SRC, "mclk_i2s0_2ch_sai_src", "mclk_i2s0_2ch_sai_src_pre", 0, -+ RK3528_CLKGATE_CON(2), 7, GFLAGS), -+ -+ COMPOSITE_NOMUX(CLK_I2S1_8CH_SRC, "clk_i2s1_8ch_src", "gpll", 0, -+ RK3528_CLKSEL_CON(24), 3, 5, DFLAGS, -+ RK3528_CLKGATE_CON(2), 11, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S1_8CH_FRAC, "clk_i2s1_8ch_frac", "clk_i2s1_8ch_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(25), 0, -+ RK3528_CLKGATE_CON(2), 12, GFLAGS, &mclk_i2s1_8ch_sai_src_fracmux), -+ GATE(MCLK_I2S1_8CH_SAI_SRC, "mclk_i2s1_8ch_sai_src", "mclk_i2s1_8ch_sai_src_pre", 0, -+ RK3528_CLKGATE_CON(2), 13, GFLAGS), -+ -+ COMPOSITE_NOMUX(CLK_I2S2_2CH_SRC, "clk_i2s2_2ch_src", "gpll", 0, -+ RK3528_CLKSEL_CON(26), 3, 5, DFLAGS, -+ RK3528_CLKGATE_CON(2), 14, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S2_2CH_FRAC, "clk_i2s2_2ch_frac", "clk_i2s2_2ch_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(27), 0, -+ RK3528_CLKGATE_CON(2), 15, GFLAGS, &mclk_i2s2_2ch_sai_src_fracmux), -+ GATE(MCLK_I2S2_2CH_SAI_SRC, "mclk_i2s2_2ch_sai_src", "mclk_i2s2_2ch_sai_src_pre", 0, -+ RK3528_CLKGATE_CON(3), 0, GFLAGS), ++ /* PD_DDR */ ++ GATE(0, "apll_ddr", "apll", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(0), 2, GFLAGS), ++ GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(0), 2, GFLAGS), ++ GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE(0, "ddrphy4x", mux_ddrphy_p, CLK_IGNORE_UNUSED, ++ RK2928_CLKSEL_CON(26), 8, 2, MFLAGS, 0, 3, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, ++ RK2928_CLKGATE_CON(7), 1, GFLAGS), ++ GATE(0, "ddrc", "ddrphy_pre", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(8), 5, GFLAGS), ++ FACTOR_GATE(0, "ddrphy", "ddrphy4x", CLK_IGNORE_UNUSED, 1, 4, ++ RK2928_CLKGATE_CON(7), 0, GFLAGS), + -+ COMPOSITE_NOMUX(CLK_I2S3_8CH_SRC, "clk_i2s3_8ch_src", "gpll", 0, -+ RK3528_CLKSEL_CON(22), 3, 5, DFLAGS, -+ RK3528_CLKGATE_CON(2), 8, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S3_8CH_FRAC, "clk_i2s3_8ch_frac", "clk_i2s3_8ch_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(23), 0, -+ RK3528_CLKGATE_CON(2), 9, GFLAGS, &mclk_i2s3_8ch_sai_src_fracmux), -+ GATE(MCLK_I2S3_8CH_SAI_SRC, "mclk_i2s3_8ch_sai_src", "mclk_i2s3_8ch_sai_src_pre", 0, -+ RK3528_CLKGATE_CON(2), 10, GFLAGS), ++ /* PD_CORE */ ++ GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(0), 6, GFLAGS), ++ GATE(0, "gpll_core", "gpll", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(0), 6, GFLAGS), ++ GATE(0, "dpll_core", "dpll", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(0), 6, GFLAGS), ++ COMPOSITE_NOMUX(0, "pclk_dbg", "armclk", CLK_IGNORE_UNUSED, ++ RK2928_CLKSEL_CON(1), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK2928_CLKGATE_CON(4), 1, GFLAGS), ++ COMPOSITE_NOMUX(0, "armcore", "armclk", CLK_IGNORE_UNUSED, ++ RK2928_CLKSEL_CON(1), 4, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK2928_CLKGATE_CON(4), 0, GFLAGS), + -+ COMPOSITE_NOMUX(CLK_SPDIF_SRC, "clk_spdif_src", "gpll", 0, -+ RK3528_CLKSEL_CON(30), 2, 5, DFLAGS, -+ RK3528_CLKGATE_CON(3), 4, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_SPDIF_FRAC, "clk_spdif_frac", "clk_spdif_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(31), 0, -+ RK3528_CLKGATE_CON(3), 5, GFLAGS, &mclk_spdif_src_fracmux), -+ GATE(MCLK_SPDIF_SRC, "mclk_spdif_src", "mclk_spdif_src_pre", 0, -+ RK3528_CLKGATE_CON(3), 6, GFLAGS), ++ /* PD_MISC */ ++ MUX(SCLK_HDMI_PHY, "hdmiphy", mux_hdmiphy_p, CLK_SET_RATE_PARENT, ++ RK2928_MISC_CON, 13, 1, MFLAGS), ++ MUX(0, "usb480m_phy", mux_usb480m_phy_p, CLK_SET_RATE_PARENT, ++ RK2928_MISC_CON, 14, 1, MFLAGS), ++ MUX(0, "usb480m", mux_usb480m_p, CLK_SET_RATE_PARENT, ++ RK2928_MISC_CON, 15, 1, MFLAGS), + -+ /* bus */ -+ COMPOSITE_NODIV(ACLK_BUS_M_ROOT, "aclk_bus_m_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(43), 12, 2, MFLAGS, -+ RK3528_CLKGATE_CON(8), 7, GFLAGS), -+ GATE(ACLK_GIC, "aclk_gic", "aclk_bus_m_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(9), 1, GFLAGS), ++ /* PD_BUS */ ++ GATE(0, "hdmiphy_aclk_cpu", "hdmiphy", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(0), 1, GFLAGS), ++ GATE(0, "gpll_aclk_cpu", "gpll", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(0), 1, GFLAGS), ++ GATE(0, "cpll_aclk_cpu", "cpll", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NOGATE(0, "aclk_cpu_src", mux_aclk_cpu_src_p, CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(0), 13, 2, MFLAGS, 8, 5, DFLAGS), ++ GATE(ACLK_CPU, "aclk_cpu", "aclk_cpu_src", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(6), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_CPU, "hclk_cpu", "aclk_cpu_src", CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(1), 8, 2, DFLAGS, ++ RK2928_CLKGATE_CON(6), 1, GFLAGS), ++ COMPOSITE_NOMUX(0, "pclk_bus_src", "aclk_cpu_src", CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(1), 12, 3, DFLAGS, ++ RK2928_CLKGATE_CON(6), 2, GFLAGS), ++ GATE(PCLK_CPU, "pclk_cpu", "pclk_bus_src", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(6), 3, GFLAGS), ++ GATE(0, "pclk_phy_pre", "pclk_bus_src", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(6), 4, GFLAGS), ++ GATE(0, "pclk_ddr_pre", "pclk_bus_src", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(6), 13, GFLAGS), + -+ COMPOSITE_NODIV(ACLK_BUS_ROOT, "aclk_bus_root", mux_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(43), 6, 2, MFLAGS, -+ RK3528_CLKGATE_CON(8), 4, GFLAGS), -+ GATE(ACLK_SPINLOCK, "aclk_spinlock", "aclk_bus_root", 0, -+ RK3528_CLKGATE_CON(9), 2, GFLAGS), -+ GATE(ACLK_DMAC, "aclk_dmac", "aclk_bus_root", 0, -+ RK3528_CLKGATE_CON(9), 4, GFLAGS), -+ GATE(ACLK_DCF, "aclk_dcf", "aclk_bus_root", 0, -+ RK3528_CLKGATE_CON(11), 11, GFLAGS), -+ COMPOSITE(ACLK_BUS_VOPGL_ROOT, "aclk_bus_vopgl_root", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(43), 3, 1, MFLAGS, 0, 3, DFLAGS, -+ RK3528_CLKGATE_CON(8), 0, GFLAGS), -+ COMPOSITE_NODIV(ACLK_BUS_H_ROOT, "aclk_bus_h_root", mux_500m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(43), 4, 2, MFLAGS, -+ RK3528_CLKGATE_CON(8), 2, GFLAGS), -+ GATE(ACLK_DMA2DDR, "aclk_dma2ddr", "aclk_bus_h_root", 0, -+ RK3528_CLKGATE_CON(10), 14, GFLAGS), ++ /* PD_VIDEO */ ++ COMPOSITE(ACLK_VPU_PRE, "aclk_vpu_pre", mux_pll_src_4plls_p, 0, ++ RK2928_CLKSEL_CON(32), 5, 2, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 11, GFLAGS), ++ FACTOR_GATE(HCLK_VPU_PRE, "hclk_vpu_pre", "aclk_vpu_pre", 0, 1, 4, ++ RK2928_CLKGATE_CON(4), 4, GFLAGS), + -+ COMPOSITE_NODIV(HCLK_BUS_ROOT, "hclk_bus_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(43), 8, 2, MFLAGS, -+ RK3528_CLKGATE_CON(8), 5, GFLAGS), ++ COMPOSITE(ACLK_RKVDEC_PRE, "aclk_rkvdec_pre", mux_pll_src_4plls_p, 0, ++ RK2928_CLKSEL_CON(28), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 2, GFLAGS), ++ FACTOR_GATE(HCLK_RKVDEC_PRE, "hclk_rkvdec_pre", "aclk_rkvdec_pre", 0, 1, 4, ++ RK2928_CLKGATE_CON(4), 5, GFLAGS), + -+ COMPOSITE_NODIV(PCLK_BUS_ROOT, "pclk_bus_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(43), 10, 2, MFLAGS, -+ RK3528_CLKGATE_CON(8), 6, GFLAGS), -+ GATE(PCLK_DFT2APB, "pclk_dft2apb", "pclk_bus_root", 0, -+ RK3528_CLKGATE_CON(8), 13, GFLAGS), -+ GATE(PCLK_BUS_GRF, "pclk_bus_grf", "pclk_bus_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(8), 15, GFLAGS), -+ GATE(PCLK_TIMER, "pclk_timer", "pclk_bus_root", 0, -+ RK3528_CLKGATE_CON(9), 5, GFLAGS), -+ GATE(PCLK_JDBCK_DAP, "pclk_jdbck_dap", "pclk_bus_root", 0, -+ RK3528_CLKGATE_CON(9), 12, GFLAGS), -+ GATE(PCLK_WDT_NS, "pclk_wdt_ns", "pclk_bus_root", 0, -+ RK3528_CLKGATE_CON(9), 15, GFLAGS), -+ GATE(PCLK_UART0, "pclk_uart0", "pclk_bus_root", 0, -+ RK3528_CLKGATE_CON(10), 7, GFLAGS), -+ GATE(PCLK_PWM0, "pclk_pwm0", "pclk_bus_root", 0, -+ RK3528_CLKGATE_CON(11), 4, GFLAGS), -+ GATE(PCLK_PWM1, "pclk_pwm1", "pclk_bus_root", 0, -+ RK3528_CLKGATE_CON(11), 7, GFLAGS), -+ GATE(PCLK_DMA2DDR, "pclk_dma2ddr", "pclk_bus_root", 0, -+ RK3528_CLKGATE_CON(10), 13, GFLAGS), -+ GATE(PCLK_SCR, "pclk_scr", "pclk_bus_root", 0, -+ RK3528_CLKGATE_CON(11), 10, GFLAGS), -+ GATE(PCLK_INTMUX, "pclk_intmux", "pclk_bus_root", CLK_IGNORE_UNUSED, -+ RK3528_CLKGATE_CON(11), 12, GFLAGS), ++ COMPOSITE(SCLK_VDEC_CABAC, "sclk_vdec_cabac", mux_pll_src_4plls_p, 0, ++ RK2928_CLKSEL_CON(28), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 3, GFLAGS), + -+ COMPOSITE_NODIV(CLK_PWM0, "clk_pwm0", mux_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(44), 6, 2, MFLAGS, -+ RK3528_CLKGATE_CON(11), 5, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM1, "clk_pwm1", mux_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(44), 8, 2, MFLAGS, -+ RK3528_CLKGATE_CON(11), 8, GFLAGS), ++ COMPOSITE(SCLK_VDEC_CORE, "sclk_vdec_core", mux_pll_src_4plls_p, 0, ++ RK2928_CLKSEL_CON(34), 13, 2, MFLAGS, 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 4, GFLAGS), + -+ GATE(CLK_CAPTURE_PWM1, "clk_capture_pwm1", "xin24m", 0, -+ RK3528_CLKGATE_CON(11), 9, GFLAGS), -+ GATE(CLK_CAPTURE_PWM0, "clk_capture_pwm0", "xin24m", 0, -+ RK3528_CLKGATE_CON(11), 6, GFLAGS), -+ GATE(CLK_JDBCK_DAP, "clk_jdbck_dap", "xin24m", 0, -+ RK3528_CLKGATE_CON(9), 13, GFLAGS), -+ GATE(TCLK_WDT_NS, "tclk_wdt_ns", "xin24m", 0, -+ RK3528_CLKGATE_CON(10), 0, GFLAGS), ++ /* PD_VIO */ ++ COMPOSITE(ACLK_IEP_PRE, "aclk_iep_pre", mux_pll_src_4plls_p, 0, ++ RK2928_CLKSEL_CON(31), 5, 2, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 0, GFLAGS), ++ DIV(HCLK_VIO_PRE, "hclk_vio_pre", "aclk_iep_pre", 0, ++ RK2928_CLKSEL_CON(2), 0, 5, DFLAGS), + -+ GATE(CLK_TIMER_ROOT, "clk_timer_root", "xin24m", 0, -+ RK3528_CLKGATE_CON(8), 9, GFLAGS), -+ GATE(CLK_TIMER0, "clk_timer0", "clk_timer_root", 0, -+ RK3528_CLKGATE_CON(9), 6, GFLAGS), -+ GATE(CLK_TIMER1, "clk_timer1", "clk_timer_root", 0, -+ RK3528_CLKGATE_CON(9), 7, GFLAGS), -+ GATE(CLK_TIMER2, "clk_timer2", "clk_timer_root", 0, -+ RK3528_CLKGATE_CON(9), 8, GFLAGS), -+ GATE(CLK_TIMER3, "clk_timer3", "clk_timer_root", 0, -+ RK3528_CLKGATE_CON(9), 9, GFLAGS), -+ GATE(CLK_TIMER4, "clk_timer4", "clk_timer_root", 0, -+ RK3528_CLKGATE_CON(9), 10, GFLAGS), -+ GATE(CLK_TIMER5, "clk_timer5", "clk_timer_root", 0, -+ RK3528_CLKGATE_CON(9), 11, GFLAGS), ++ COMPOSITE(ACLK_HDCP_PRE, "aclk_hdcp_pre", mux_pll_src_4plls_p, 0, ++ RK2928_CLKSEL_CON(31), 13, 2, MFLAGS, 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(1), 4, GFLAGS), + -+ /* pmu */ -+ GATE(HCLK_PMU_ROOT, "hclk_pmu_root", "clk_100m_src", CLK_IGNORE_UNUSED, -+ RK3528_PMU_CLKGATE_CON(0), 1, GFLAGS), -+ GATE(PCLK_PMU_ROOT, "pclk_pmu_root", "clk_100m_src", CLK_IGNORE_UNUSED, -+ RK3528_PMU_CLKGATE_CON(0), 0, GFLAGS), ++ MUX(0, "sclk_rga_src", mux_pll_src_4plls_p, CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(33), 13, 2, MFLAGS), ++ COMPOSITE_NOMUX(ACLK_RGA_PRE, "aclk_rga_pre", "sclk_rga_src", CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(33), 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(1), 2, GFLAGS), ++ COMPOSITE(SCLK_RGA, "sclk_rga", mux_sclk_rga_p, 0, ++ RK2928_CLKSEL_CON(22), 5, 2, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 6, GFLAGS), + -+ GATE(FCLK_MCU, "fclk_mcu", "hclk_pmu_root", 0, -+ RK3528_PMU_CLKGATE_CON(0), 7, GFLAGS), -+ GATE(HCLK_PMU_SRAM, "hclk_pmu_sram", "hclk_pmu_root", CLK_IS_CRITICAL, -+ RK3528_PMU_CLKGATE_CON(5), 4, GFLAGS), ++ COMPOSITE(ACLK_VOP_PRE, "aclk_vop_pre", mux_pll_src_4plls_p, 0, ++ RK2928_CLKSEL_CON(33), 5, 2, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(1), 1, GFLAGS), + -+ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_pmu_root", 0, -+ RK3528_PMU_CLKGATE_CON(0), 2, GFLAGS), -+ GATE(PCLK_PMU_HP_TIMER, "pclk_pmu_hp_timer", "pclk_pmu_root", 0, -+ RK3528_PMU_CLKGATE_CON(1), 2, GFLAGS), -+ GATE(PCLK_PMU_IOC, "pclk_pmu_ioc", "pclk_pmu_root", CLK_IS_CRITICAL, -+ RK3528_PMU_CLKGATE_CON(1), 5, GFLAGS), -+ GATE(PCLK_PMU_CRU, "pclk_pmu_cru", "pclk_pmu_root", CLK_IS_CRITICAL, -+ RK3528_PMU_CLKGATE_CON(1), 6, GFLAGS), -+ GATE(PCLK_PMU_GRF, "pclk_pmu_grf", "pclk_pmu_root", CLK_IS_CRITICAL, -+ RK3528_PMU_CLKGATE_CON(1), 7, GFLAGS), -+ GATE(PCLK_PMU_WDT, "pclk_pmu_wdt", "pclk_pmu_root", 0, -+ RK3528_PMU_CLKGATE_CON(1), 10, GFLAGS), -+ GATE(PCLK_PMU, "pclk_pmu", "pclk_pmu_root", CLK_IS_CRITICAL, -+ RK3528_PMU_CLKGATE_CON(0), 13, GFLAGS), -+ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pmu_root", 0, -+ RK3528_PMU_CLKGATE_CON(0), 14, GFLAGS), -+ GATE(PCLK_OSCCHK, "pclk_oscchk", "pclk_pmu_root", 0, -+ RK3528_PMU_CLKGATE_CON(0), 9, GFLAGS), -+ GATE(PCLK_PMU_MAILBOX, "pclk_pmu_mailbox", "pclk_pmu_root", 0, -+ RK3528_PMU_CLKGATE_CON(1), 12, GFLAGS), -+ GATE(PCLK_SCRKEYGEN, "pclk_scrkeygen", "pclk_pmu_root", 0, -+ RK3528_PMU_CLKGATE_CON(1), 15, GFLAGS), -+ GATE(PCLK_PVTM_PMU, "pclk_pvtm_pmu", "pclk_pmu_root", 0, -+ RK3528_PMU_CLKGATE_CON(5), 1, GFLAGS), ++ COMPOSITE(SCLK_HDCP, "sclk_hdcp", mux_pll_src_3plls_p, 0, ++ RK2928_CLKSEL_CON(23), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK2928_CLKGATE_CON(3), 5, GFLAGS), + -+ COMPOSITE_NODIV(CLK_I2C2, "clk_i2c2", clk_i2c2_p, 0, -+ RK3528_PMU_CLKSEL_CON(0), 0, 2, MFLAGS, -+ RK3528_PMU_CLKGATE_CON(0), 3, GFLAGS), ++ GATE(SCLK_HDMI_HDCP, "sclk_hdmi_hdcp", "xin24m", 0, ++ RK2928_CLKGATE_CON(3), 7, GFLAGS), + -+ GATE(CLK_REFOUT, "clk_refout", "xin24m", 0, -+ RK3528_PMU_CLKGATE_CON(2), 4, GFLAGS), -+ COMPOSITE_NOMUX(CLK_PVTM_PMU, "clk_pvtm_pmu", "xin24m", 0, -+ RK3528_PMU_CLKSEL_CON(5), 0, 5, DFLAGS, -+ RK3528_PMU_CLKGATE_CON(5), 0, GFLAGS), ++ COMPOSITE(SCLK_HDMI_CEC, "sclk_hdmi_cec", mux_sclk_hdmi_cec_p, 0, ++ RK2928_CLKSEL_CON(21), 14, 2, MFLAGS, 0, 14, DFLAGS, ++ RK2928_CLKGATE_CON(3), 8, GFLAGS), + -+ COMPOSITE_FRAC(XIN_OSC0_DIV, "xin_osc0_div", "xin24m", 0, -+ RK3528_PMU_CLKSEL_CON(1), 0, -+ RK3528_PMU_CLKGATE_CON(1), 0, GFLAGS), -+ /* clk_32k: internal! No path from external osc 32k */ -+ MUX(CLK_DEEPSLOW, "clk_32k", clk_32k_p, CLK_IS_CRITICAL, -+ RK3528_PMU_CLKSEL_CON(2), 0, 1, MFLAGS), -+ GATE(RTC_CLK_MCU, "rtc_clk_mcu", "clk_32k", 0, -+ RK3528_PMU_CLKGATE_CON(0), 8, GFLAGS), -+ GATE(CLK_DDR_FAIL_SAFE, "clk_ddr_fail_safe", "xin24m", CLK_IGNORE_UNUSED, -+ RK3528_PMU_CLKGATE_CON(1), 1, GFLAGS), ++ /* PD_PERI */ ++ GATE(0, "cpll_peri", "cpll", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(2), 0, GFLAGS), ++ GATE(0, "gpll_peri", "gpll", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(2), 0, GFLAGS), ++ GATE(0, "hdmiphy_peri", "hdmiphy", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(2), 0, GFLAGS), ++ COMPOSITE_NOGATE(0, "aclk_peri_src", mux_aclk_peri_src_p, CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(10), 10, 2, MFLAGS, 0, 5, DFLAGS), ++ COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(10), 12, 3, DFLAGS, ++ RK2928_CLKGATE_CON(5), 2, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, ++ RK2928_CLKSEL_CON(10), 8, 2, DFLAGS, ++ RK2928_CLKGATE_CON(5), 1, GFLAGS), ++ GATE(ACLK_PERI, "aclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, ++ RK2928_CLKGATE_CON(5), 0, GFLAGS), + -+ COMPOSITE_NODIV(DBCLK_GPIO0, "dbclk_gpio0", mux_24m_32k_p, 0, -+ RK3528_PMU_CLKSEL_CON(0), 2, 1, MFLAGS, -+ RK3528_PMU_CLKGATE_CON(0), 15, GFLAGS), -+ COMPOSITE_NODIV(TCLK_PMU_WDT, "tclk_pmu_wdt", mux_24m_32k_p, 0, -+ RK3528_PMU_CLKSEL_CON(2), 1, 1, MFLAGS, -+ RK3528_PMU_CLKGATE_CON(1), 11, GFLAGS), ++ GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0, ++ RK2928_CLKGATE_CON(6), 5, GFLAGS), ++ GATE(SCLK_TIMER1, "sclk_timer1", "xin24m", 0, ++ RK2928_CLKGATE_CON(6), 6, GFLAGS), ++ GATE(SCLK_TIMER2, "sclk_timer2", "xin24m", 0, ++ RK2928_CLKGATE_CON(6), 7, GFLAGS), ++ GATE(SCLK_TIMER3, "sclk_timer3", "xin24m", 0, ++ RK2928_CLKGATE_CON(6), 8, GFLAGS), ++ GATE(SCLK_TIMER4, "sclk_timer4", "xin24m", 0, ++ RK2928_CLKGATE_CON(6), 9, GFLAGS), ++ GATE(SCLK_TIMER5, "sclk_timer5", "xin24m", 0, ++ RK2928_CLKGATE_CON(6), 10, GFLAGS), + -+ /* core */ -+ COMPOSITE_NOMUX(ACLK_M_CORE_BIU, "aclk_m_core", "armclk", CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(39), 11, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3528_CLKGATE_CON(5), 12, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_DBG, "pclk_dbg", "armclk", CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(40), 1, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3528_CLKGATE_CON(5), 13, GFLAGS), -+ GATE(PCLK_CPU_ROOT, "pclk_cpu_root", "pclk_dbg", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(6), 1, GFLAGS), -+ GATE(PCLK_CORE_GRF, "pclk_core_grf", "pclk_cpu_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(6), 2, GFLAGS), ++ COMPOSITE(SCLK_CRYPTO, "sclk_crypto", mux_pll_src_2plls_p, 0, ++ RK2928_CLKSEL_CON(24), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(2), 7, GFLAGS), + -+ /* ddr */ -+ GATE(CLK_DDRC_SRC, "clk_ddrc_src", "dpll", CLK_IS_CRITICAL, -+ RK3528_DDRPHY_CLKGATE_CON(0), 0, GFLAGS), -+ GATE(CLK_DDR_PHY, "clk_ddr_phy", "dpll", CLK_IS_CRITICAL, -+ RK3528_DDRPHY_CLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE(SCLK_TSP, "sclk_tsp", mux_pll_src_2plls_p, 0, ++ RK2928_CLKSEL_CON(22), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(2), 6, GFLAGS), + -+ COMPOSITE_NODIV(PCLK_DDR_ROOT, "pclk_ddr_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(90), 0, 2, MFLAGS, -+ RK3528_CLKGATE_CON(45), 0, GFLAGS), -+ GATE(PCLK_DDRMON, "pclk_ddrmon", "pclk_ddr_root", CLK_IGNORE_UNUSED, -+ RK3528_CLKGATE_CON(45), 3, GFLAGS), -+ GATE(PCLK_DDR_HWLP, "pclk_ddr_hwlp", "pclk_ddr_root", CLK_IGNORE_UNUSED, -+ RK3528_CLKGATE_CON(45), 8, GFLAGS), -+ GATE(CLK_TIMER_DDRMON, "clk_timer_ddrmon", "xin24m", CLK_IGNORE_UNUSED, -+ RK3528_CLKGATE_CON(45), 4, GFLAGS), ++ GATE(SCLK_HSADC, "sclk_hsadc", "ext_hsadc", 0, ++ RK2928_CLKGATE_CON(10), 12, GFLAGS), + -+ GATE(PCLK_DDRC, "pclk_ddrc", "pclk_ddr_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(45), 2, GFLAGS), -+ GATE(PCLK_DDR_GRF, "pclk_ddr_grf", "pclk_ddr_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(45), 6, GFLAGS), -+ GATE(PCLK_DDRPHY, "pclk_ddrphy", "pclk_ddr_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(45), 9, GFLAGS), ++ COMPOSITE(SCLK_WIFI, "sclk_wifi", mux_pll_src_cpll_gpll_usb480m_p, 0, ++ RK2928_CLKSEL_CON(23), 5, 2, MFLAGS, 0, 6, DFLAGS, ++ RK2928_CLKGATE_CON(2), 15, GFLAGS), + -+ GATE(ACLK_DDR_UPCTL, "aclk_ddr_upctl", "clk_ddrc_src", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(45), 11, GFLAGS), -+ GATE(CLK_DDR_UPCTL, "clk_ddr_upctl", "clk_ddrc_src", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(45), 12, GFLAGS), -+ GATE(CLK_DDRMON, "clk_ddrmon", "clk_ddrc_src", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(45), 13, GFLAGS), -+ GATE(ACLK_DDR_SCRAMBLE, "aclk_ddr_scramble", "clk_ddrc_src", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(45), 14, GFLAGS), -+ GATE(ACLK_SPLIT, "aclk_split", "clk_ddrc_src", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(45), 15, GFLAGS), ++ COMPOSITE(SCLK_SDMMC, "sclk_sdmmc", mux_mmc_src_p, 0, ++ RK2928_CLKSEL_CON(11), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK2928_CLKGATE_CON(2), 11, GFLAGS), + -+ /* gpu */ -+ COMPOSITE_NODIV(ACLK_GPU_ROOT, "aclk_gpu_root", mux_500m_300m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(76), 0, 2, MFLAGS, -+ RK3528_CLKGATE_CON(34), 0, GFLAGS), -+ COMPOSITE_NODIV(ACLK_GPU, "aclk_gpu", aclk_gpu_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(76), 6, 1, MFLAGS, -+ RK3528_CLKGATE_CON(34), 7, GFLAGS), -+ GATE(ACLK_GPU_MALI, "aclk_gpu_mali", "aclk_gpu", 0, -+ RK3528_CLKGATE_CON(34), 8, GFLAGS), -+ COMPOSITE_NODIV(PCLK_GPU_ROOT, "pclk_gpu_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(76), 4, 2, MFLAGS, -+ RK3528_CLKGATE_CON(34), 2, GFLAGS), ++ COMPOSITE_NODIV(SCLK_SDIO_SRC, "sclk_sdio_src", mux_mmc_src_p, 0, ++ RK2928_CLKSEL_CON(11), 10, 2, MFLAGS, ++ RK2928_CLKGATE_CON(2), 13, GFLAGS), ++ DIV(SCLK_SDIO, "sclk_sdio", "sclk_sdio_src", 0, ++ RK2928_CLKSEL_CON(12), 0, 8, DFLAGS), + -+ /* rkvdec */ -+ COMPOSITE_NODIV(ACLK_RKVDEC_ROOT_NDFT, "aclk_rkvdec_root", mux_339m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(88), 6, 2, MFLAGS, -+ RK3528_CLKGATE_CON(44), 3, GFLAGS), -+ COMPOSITE_NODIV(HCLK_RKVDEC_ROOT, "hclk_rkvdec_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(88), 4, 2, MFLAGS, -+ RK3528_CLKGATE_CON(44), 2, GFLAGS), -+ GATE(PCLK_DDRPHY_CRU, "pclk_ddrphy_cru", "hclk_rkvdec_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(44), 4, GFLAGS), -+ GATE(HCLK_RKVDEC, "hclk_rkvdec", "hclk_rkvdec_root", 0, -+ RK3528_CLKGATE_CON(44), 9, GFLAGS), -+ COMPOSITE_NODIV(CLK_HEVC_CA_RKVDEC, "clk_hevc_ca_rkvdec", mux_600m_300m_200m_24m_p, 0, -+ RK3528_CLKSEL_CON(88), 11, 2, MFLAGS, -+ RK3528_CLKGATE_CON(44), 11, GFLAGS), -+ MUX(ACLK_RKVDEC_PVTMUX_ROOT, "aclk_rkvdec_pvtmux_root", aclk_rkvdec_pvtmux_root_p, CLK_IS_CRITICAL | CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(88), 13, 1, MFLAGS), -+ GATE(ACLK_RKVDEC, "aclk_rkvdec", "aclk_rkvdec_pvtmux_root", 0, -+ RK3528_CLKGATE_CON(44), 8, GFLAGS), ++ COMPOSITE_NODIV(0, "sclk_emmc_src", mux_mmc_src_p, 0, ++ RK2928_CLKSEL_CON(11), 12, 2, MFLAGS, ++ RK2928_CLKGATE_CON(2), 14, GFLAGS), ++ DIV(SCLK_EMMC, "sclk_emmc", "sclk_emmc_src", 0, ++ RK2928_CLKSEL_CON(12), 8, 8, DFLAGS), + -+ /* rkvenc */ -+ COMPOSITE_NODIV(ACLK_RKVENC_ROOT, "aclk_rkvenc_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(79), 2, 2, MFLAGS, -+ RK3528_CLKGATE_CON(36), 1, GFLAGS), -+ GATE(ACLK_RKVENC, "aclk_rkvenc", "aclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(36), 7, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 2 ++ */ + -+ COMPOSITE_NODIV(PCLK_RKVENC_ROOT, "pclk_rkvenc_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(79), 4, 2, MFLAGS, -+ RK3528_CLKGATE_CON(36), 2, GFLAGS), -+ GATE(PCLK_RKVENC_IOC, "pclk_rkvenc_ioc", "pclk_rkvenc_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(37), 10, GFLAGS), -+ GATE(PCLK_RKVENC_GRF, "pclk_rkvenc_grf", "pclk_rkvenc_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(38), 6, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(36), 11, GFLAGS), -+ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(36), 13, GFLAGS), -+ GATE(PCLK_SPI0, "pclk_spi0", "pclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(37), 2, GFLAGS), -+ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(37), 8, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(38), 2, GFLAGS), -+ GATE(PCLK_UART3, "pclk_uart3", "pclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(38), 4, GFLAGS), -+ GATE(PCLK_CAN0, "pclk_can0", "pclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(38), 7, GFLAGS), -+ GATE(PCLK_CAN1, "pclk_can1", "pclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(38), 9, GFLAGS), ++ GATE(0, "gpll_vop", "gpll", 0, ++ RK2928_CLKGATE_CON(3), 1, GFLAGS), ++ GATE(0, "cpll_vop", "cpll", 0, ++ RK2928_CLKGATE_CON(3), 1, GFLAGS), ++ MUX(0, "sclk_vop_src", mux_sclk_vop_src_p, 0, ++ RK2928_CLKSEL_CON(27), 0, 1, MFLAGS), ++ DIV(DCLK_HDMI_PHY, "dclk_hdmiphy", "sclk_vop_src", 0, ++ RK2928_CLKSEL_CON(29), 0, 3, DFLAGS), ++ DIV(0, "sclk_vop_pre", "sclk_vop_src", 0, ++ RK2928_CLKSEL_CON(27), 8, 8, DFLAGS), ++ MUX(DCLK_VOP, "dclk_vop", mux_dclk_vop_p, 0, ++ RK2928_CLKSEL_CON(27), 1, 1, MFLAGS), + -+ COMPOSITE_NODIV(MCLK_PDM, "mclk_pdm", mux_150m_100m_24m_p, 0, -+ RK3528_CLKSEL_CON(80), 12, 2, MFLAGS, -+ RK3528_CLKGATE_CON(38), 1, GFLAGS), -+ COMPOSITE(CLK_CAN0, "clk_can0", mux_gpll_cpll_p, 0, -+ RK3528_CLKSEL_CON(81), 6, 1, MFLAGS, 0, 6, DFLAGS, -+ RK3528_CLKGATE_CON(38), 8, GFLAGS), -+ COMPOSITE(CLK_CAN1, "clk_can1", mux_gpll_cpll_p, 0, -+ RK3528_CLKSEL_CON(81), 13, 1, MFLAGS, 7, 6, DFLAGS, -+ RK3528_CLKGATE_CON(38), 10, GFLAGS), ++ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), + -+ COMPOSITE_NODIV(HCLK_RKVENC_ROOT, "hclk_rkvenc_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(79), 0, 2, MFLAGS, -+ RK3528_CLKGATE_CON(36), 0, GFLAGS), -+ GATE(HCLK_SAI_I2S1, "hclk_sai_i2s1", "hclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(36), 9, GFLAGS), -+ GATE(HCLK_SPDIF, "hclk_spdif", "hclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(37), 14, GFLAGS), -+ GATE(HCLK_PDM, "hclk_pdm", "hclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(38), 0, GFLAGS), -+ GATE(HCLK_RKVENC, "hclk_rkvenc", "hclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(36), 6, GFLAGS), ++ COMPOSITE(0, "i2s0_src", mux_pll_src_2plls_p, 0, ++ RK2928_CLKSEL_CON(9), 15, 1, MFLAGS, 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(0), 3, GFLAGS), ++ COMPOSITE_FRACMUX(0, "i2s0_frac", "i2s0_src", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(8), 0, ++ RK2928_CLKGATE_CON(0), 4, GFLAGS, ++ &rk3228_i2s0_fracmux), ++ GATE(SCLK_I2S0, "sclk_i2s0", "i2s0_pre", CLK_SET_RATE_PARENT, ++ RK2928_CLKGATE_CON(0), 5, GFLAGS), + -+ COMPOSITE_NODIV(CLK_CORE_RKVENC, "clk_core_rkvenc", mux_300m_200m_100m_24m_p, 0, -+ RK3528_CLKSEL_CON(79), 6, 2, MFLAGS, -+ RK3528_CLKGATE_CON(36), 8, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C0, "clk_i2c0", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(79), 11, 2, MFLAGS, -+ RK3528_CLKGATE_CON(36), 14, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C1, "clk_i2c1", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(79), 9, 2, MFLAGS, -+ RK3528_CLKGATE_CON(36), 12, GFLAGS), -+#if 0 -+ GATE(SCLK_IN_SPI0, "sclk_in_spi0", "sclk_in_spi0_io", 0, -+ RK3528_CLKGATE_CON(37), 4, GFLAGS), -+ GATE(CLK_UART_JTAG, "clk_uart_jtag", "xin24m", 0, -+ RK3528_CLKGATE_CON(37), 0, GFLAGS), -+#endif -+ COMPOSITE_NODIV(CLK_SPI0, "clk_spi0", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(79), 13, 2, MFLAGS, -+ RK3528_CLKGATE_CON(37), 3, GFLAGS), -+ COMPOSITE_NODIV(MCLK_SAI_I2S1, "mclk_sai_i2s1", mclk_sai_i2s1_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(79), 8, 1, MFLAGS, -+ RK3528_CLKGATE_CON(36), 10, GFLAGS), -+ GATE(DBCLK_GPIO4, "dbclk_gpio4", "xin24m", 0, -+ RK3528_CLKGATE_CON(37), 9, GFLAGS), ++ COMPOSITE(0, "i2s1_src", mux_pll_src_2plls_p, 0, ++ RK2928_CLKSEL_CON(3), 15, 1, MFLAGS, 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(0), 10, GFLAGS), ++ COMPOSITE_FRACMUX(0, "i2s1_frac", "i2s1_src", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(7), 0, ++ RK2928_CLKGATE_CON(0), 11, GFLAGS, ++ &rk3228_i2s1_fracmux), ++ GATE(SCLK_I2S1, "sclk_i2s1", "i2s1_pre", CLK_SET_RATE_PARENT, ++ RK2928_CLKGATE_CON(0), 14, GFLAGS), ++ COMPOSITE_NODIV(SCLK_I2S_OUT, "i2s_out", mux_i2s_out_p, 0, ++ RK2928_CLKSEL_CON(3), 12, 1, MFLAGS, ++ RK2928_CLKGATE_CON(0), 13, GFLAGS), + -+ /* vo */ -+ COMPOSITE_NODIV(HCLK_VO_ROOT, "hclk_vo_root", mux_150m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(83), 2, 2, MFLAGS, -+ RK3528_CLKGATE_CON(39), 1, GFLAGS), -+ GATE(HCLK_VOP, "hclk_vop", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(40), 2, GFLAGS), -+ GATE(HCLK_USBHOST, "hclk_usbhost", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(43), 3, GFLAGS), -+ GATE(HCLK_JPEG_DECODER, "hclk_jpeg_decoder", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(41), 7, GFLAGS), -+ GATE(HCLK_VDPP, "hclk_vdpp", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(39), 10, GFLAGS), -+ GATE(HCLK_CVBS, "hclk_cvbs", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(41), 3, GFLAGS), -+ GATE(HCLK_USBHOST_ARB, "hclk_usbhost_arb", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(43), 4, GFLAGS), -+ GATE(HCLK_SAI_I2S3, "hclk_sai_i2s3", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(42), 1, GFLAGS), -+ GATE(HCLK_HDCP, "hclk_hdcp", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(41), 1, GFLAGS), -+ GATE(HCLK_RGA2E, "hclk_rga2e", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(39), 7, GFLAGS), -+ GATE(HCLK_SDMMC0, "hclk_sdmmc0", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(42), 9, GFLAGS), -+ GATE(HCLK_HDCP_KEY, "hclk_hdcp_key", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(40), 15, GFLAGS), ++ COMPOSITE(0, "i2s2_src", mux_pll_src_2plls_p, 0, ++ RK2928_CLKSEL_CON(16), 15, 1, MFLAGS, 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(0), 7, GFLAGS), ++ COMPOSITE_FRACMUX(0, "i2s2_frac", "i2s2_src", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(30), 0, ++ RK2928_CLKGATE_CON(0), 8, GFLAGS, ++ &rk3228_i2s2_fracmux), ++ GATE(SCLK_I2S2, "sclk_i2s2", "i2s2_pre", CLK_SET_RATE_PARENT, ++ RK2928_CLKGATE_CON(0), 9, GFLAGS), + -+ COMPOSITE_NODIV(ACLK_VO_L_ROOT, "aclk_vo_l_root", mux_150m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(84), 1, 2, MFLAGS, -+ RK3528_CLKGATE_CON(41), 8, GFLAGS), -+ GATE(ACLK_MAC_VO, "aclk_gmac0", "aclk_vo_l_root", 0, -+ RK3528_CLKGATE_CON(41), 10, GFLAGS), ++ COMPOSITE(0, "sclk_spdif_src", mux_pll_src_2plls_p, 0, ++ RK2928_CLKSEL_CON(6), 15, 1, MFLAGS, 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(2), 10, GFLAGS), ++ COMPOSITE_FRACMUX(0, "spdif_frac", "sclk_spdif_src", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(20), 0, ++ RK2928_CLKGATE_CON(2), 12, GFLAGS, ++ &rk3228_spdif_fracmux), + -+ COMPOSITE_NODIV(PCLK_VO_ROOT, "pclk_vo_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(83), 4, 2, MFLAGS, -+ RK3528_CLKGATE_CON(39), 2, GFLAGS), -+ GATE(PCLK_MAC_VO, "pclk_gmac0", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(41), 11, GFLAGS), -+ GATE(PCLK_VCDCPHY, "pclk_vcdcphy", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(42), 4, GFLAGS), -+ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(42), 5, GFLAGS), -+ GATE(PCLK_VO_IOC, "pclk_vo_ioc", "pclk_vo_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(42), 7, GFLAGS), -+ GATE(PCLK_OTPC_NS, "pclk_otpc_ns", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(42), 11, GFLAGS), -+ GATE(PCLK_UART4, "pclk_uart4", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(43), 7, GFLAGS), -+ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(43), 9, GFLAGS), -+ GATE(PCLK_I2C7, "pclk_i2c7", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(43), 11, GFLAGS), ++ GATE(0, "jtag", "ext_jtag", CLK_IGNORE_UNUSED, ++ RK2928_CLKGATE_CON(1), 3, GFLAGS), + -+ GATE(PCLK_USBPHY, "pclk_usbphy", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(43), 13, GFLAGS), ++ GATE(SCLK_OTGPHY0, "sclk_otgphy0", "xin24m", 0, ++ RK2928_CLKGATE_CON(1), 5, GFLAGS), ++ GATE(SCLK_OTGPHY1, "sclk_otgphy1", "xin24m", 0, ++ RK2928_CLKGATE_CON(1), 6, GFLAGS), + -+ GATE(PCLK_VO_GRF, "pclk_vo_grf", "pclk_vo_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(39), 13, GFLAGS), -+ GATE(PCLK_CRU, "pclk_cru", "pclk_vo_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(39), 15, GFLAGS), -+ GATE(PCLK_HDMI, "pclk_hdmi", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(40), 6, GFLAGS), -+ GATE(PCLK_HDMIPHY, "pclk_hdmiphy", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(40), 14, GFLAGS), -+ GATE(PCLK_HDCP, "pclk_hdcp", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(41), 2, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_TSADC, "sclk_tsadc", "xin24m", 0, ++ RK2928_CLKSEL_CON(24), 6, 10, DFLAGS, ++ RK2928_CLKGATE_CON(2), 8, GFLAGS), + -+ COMPOSITE_NODIV(CLK_CORE_VDPP, "clk_core_vdpp", mux_339m_200m_100m_24m_p, 0, -+ RK3528_CLKSEL_CON(83), 10, 2, MFLAGS, -+ RK3528_CLKGATE_CON(39), 12, GFLAGS), -+ COMPOSITE_NODIV(CLK_CORE_RGA2E, "clk_core_rga2e", mux_339m_200m_100m_24m_p, 0, -+ RK3528_CLKSEL_CON(83), 8, 2, MFLAGS, -+ RK3528_CLKGATE_CON(39), 9, GFLAGS), -+ COMPOSITE_NODIV(ACLK_JPEG_ROOT, "aclk_jpeg_root", mux_339m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(84), 9, 2, MFLAGS, -+ RK3528_CLKGATE_CON(41), 15, GFLAGS), -+ GATE(ACLK_JPEG_DECODER, "aclk_jpeg_decoder", "aclk_jpeg_root", 0, -+ RK3528_CLKGATE_CON(41), 6, GFLAGS), ++ COMPOSITE(0, "aclk_gpu_pre", mux_pll_src_4plls_p, 0, ++ RK2928_CLKSEL_CON(34), 5, 2, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(3), 13, GFLAGS), + -+ COMPOSITE_NODIV(ACLK_VO_ROOT, "aclk_vo_root", mux_339m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(83), 0, 2, MFLAGS, -+ RK3528_CLKGATE_CON(39), 0, GFLAGS), -+ GATE_NO_SET_RATE(ACLK_RGA2E, "aclk_rga2e", "aclk_vo_root", 0, -+ RK3528_CLKGATE_CON(39), 8, GFLAGS), -+ GATE_NO_SET_RATE(ACLK_VDPP, "aclk_vdpp", "aclk_vo_root", 0, -+ RK3528_CLKGATE_CON(39), 11, GFLAGS), -+ GATE_NO_SET_RATE(ACLK_HDCP, "aclk_hdcp", "aclk_vo_root", 0, -+ RK3528_CLKGATE_CON(41), 0, GFLAGS), ++ COMPOSITE(SCLK_SPI0, "sclk_spi0", mux_pll_src_2plls_p, 0, ++ RK2928_CLKSEL_CON(25), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(2), 9, GFLAGS), + -+ COMPOSITE(CCLK_SRC_SDMMC0, "cclk_src_sdmmc0", mux_gpll_cpll_xin24m_p, 0, -+ RK3528_CLKSEL_CON(85), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3528_CLKGATE_CON(42), 8, GFLAGS), ++ /* PD_UART */ ++ COMPOSITE(0, "uart0_src", mux_pll_src_cpll_gpll_usb480m_p, 0, ++ RK2928_CLKSEL_CON(13), 12, 2, MFLAGS, 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(1), 8, GFLAGS), ++ COMPOSITE(0, "uart1_src", mux_pll_src_cpll_gpll_usb480m_p, 0, ++ RK2928_CLKSEL_CON(14), 12, 2, MFLAGS, 0, 7, DFLAGS, ++ RK2928_CLKGATE_CON(1), 10, GFLAGS), ++ COMPOSITE(0, "uart2_src", mux_pll_src_cpll_gpll_usb480m_p, ++ 0, RK2928_CLKSEL_CON(15), 12, 2, ++ MFLAGS, 0, 7, DFLAGS, RK2928_CLKGATE_CON(1), 12, GFLAGS), ++ COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(17), 0, ++ RK2928_CLKGATE_CON(1), 9, GFLAGS, ++ &rk3228_uart0_fracmux), ++ COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_src", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(18), 0, ++ RK2928_CLKGATE_CON(1), 11, GFLAGS, ++ &rk3228_uart1_fracmux), ++ COMPOSITE_FRACMUX(0, "uart2_frac", "uart2_src", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(19), 0, ++ RK2928_CLKGATE_CON(1), 13, GFLAGS, ++ &rk3228_uart2_fracmux), + -+ COMPOSITE(ACLK_VOP_ROOT, "aclk_vop_root", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(83), 15, 1, MFLAGS, 12, 3, DFLAGS, -+ RK3528_CLKGATE_CON(40), 0, GFLAGS), -+ GATE(ACLK_VOP, "aclk_vop", "aclk_vop_root", 0, -+ RK3528_CLKGATE_CON(40), 5, GFLAGS), ++ COMPOSITE(SCLK_NANDC, "sclk_nandc", mux_pll_src_2plls_p, 0, ++ RK2928_CLKSEL_CON(2), 14, 1, MFLAGS, 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(1), 0, GFLAGS), + -+ COMPOSITE_NODIV(CLK_I2C4, "clk_i2c4", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(85), 13, 2, MFLAGS, -+ RK3528_CLKGATE_CON(43), 10, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C7, "clk_i2c7", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(86), 0, 2, MFLAGS, -+ RK3528_CLKGATE_CON(43), 12, GFLAGS), -+ GATE(DBCLK_GPIO2, "dbclk_gpio2", "xin24m", 0, -+ RK3528_CLKGATE_CON(42), 6, GFLAGS), ++ COMPOSITE(SCLK_MAC_SRC, "sclk_gmac_src", mux_pll_src_2plls_p, 0, ++ RK2928_CLKSEL_CON(5), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK2928_CLKGATE_CON(1), 7, GFLAGS), ++ MUX(SCLK_MAC_EXTCLK, "sclk_mac_extclk", mux_sclk_mac_extclk_p, 0, ++ RK2928_CLKSEL_CON(29), 10, 1, MFLAGS), ++ MUX(SCLK_MAC, "sclk_gmac_pre", mux_sclk_gmac_pre_p, 0, ++ RK2928_CLKSEL_CON(5), 5, 1, MFLAGS), ++ GATE(SCLK_MAC_REFOUT, "sclk_mac_refout", "sclk_gmac_pre", 0, ++ RK2928_CLKGATE_CON(5), 4, GFLAGS), ++ GATE(SCLK_MAC_REF, "sclk_mac_ref", "sclk_gmac_pre", 0, ++ RK2928_CLKGATE_CON(5), 3, GFLAGS), ++ GATE(SCLK_MAC_RX, "sclk_mac_rx", "sclk_gmac_pre", 0, ++ RK2928_CLKGATE_CON(5), 5, GFLAGS), ++ GATE(SCLK_MAC_TX, "sclk_mac_tx", "sclk_gmac_pre", 0, ++ RK2928_CLKGATE_CON(5), 6, GFLAGS), ++ COMPOSITE(SCLK_MAC_PHY, "sclk_macphy", mux_sclk_macphy_p, 0, ++ RK2928_CLKSEL_CON(29), 12, 1, MFLAGS, 8, 2, DFLAGS, ++ RK2928_CLKGATE_CON(5), 7, GFLAGS), ++ COMPOSITE(SCLK_MAC_OUT, "sclk_gmac_out", mux_pll_src_2plls_p, 0, ++ RK2928_CLKSEL_CON(5), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK2928_CLKGATE_CON(2), 2, GFLAGS), + -+ GATE(CLK_HDMIHDP0, "clk_hdmihdp0", "xin24m", 0, -+ RK3528_CLKGATE_CON(43), 2, GFLAGS), -+ GATE(CLK_MACPHY, "clk_macphy", "xin24m", 0, -+ RK3528_CLKGATE_CON(42), 3, GFLAGS), -+ GATE(CLK_REF_USBPHY, "clk_ref_usbphy", "xin24m", 0, -+ RK3528_CLKGATE_CON(43), 14, GFLAGS), -+ GATE(CLK_SBPI_OTPC_NS, "clk_sbpi_otpc_ns", "xin24m", 0, -+ RK3528_CLKGATE_CON(42), 12, GFLAGS), -+ FACTOR(CLK_USER_OTPC_NS, "clk_user_otpc_ns", "clk_sbpi_otpc_ns", 0, 1, 2), ++ /* ++ * Clock-Architecture Diagram 3 ++ */ + -+ GATE(MCLK_SAI_I2S3, "mclk_sai_i2s3", "mclk_i2s3_8ch_sai_src", 0, -+ RK3528_CLKGATE_CON(42), 2, GFLAGS), -+ COMPOSITE_NODIV(DCLK_VOP0, "dclk_vop0", dclk_vop0_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3528_CLKSEL_CON(84), 0, 1, MFLAGS, -+ RK3528_CLKGATE_CON(40), 3, GFLAGS), -+ GATE(DCLK_VOP1, "dclk_vop1", "dclk_vop_src1", CLK_SET_RATE_PARENT, -+ RK3528_CLKGATE_CON(40), 4, GFLAGS), -+ FACTOR_GATE(DCLK_CVBS, "dclk_cvbs", "dclk_vop1", 0, 1, 4, -+ RK3528_CLKGATE_CON(41), 4, GFLAGS), -+ GATE(DCLK_4X_CVBS, "dclk_4x_cvbs", "dclk_vop1", 0, -+ RK3528_CLKGATE_CON(41), 5, GFLAGS), ++ /* PD_VOP */ ++ GATE(ACLK_RGA, "aclk_rga", "aclk_rga_pre", 0, RK2928_CLKGATE_CON(13), 0, GFLAGS), ++ GATE(0, "aclk_rga_noc", "aclk_rga_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(13), 11, GFLAGS), ++ GATE(ACLK_IEP, "aclk_iep", "aclk_iep_pre", 0, RK2928_CLKGATE_CON(13), 2, GFLAGS), ++ GATE(0, "aclk_iep_noc", "aclk_iep_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(13), 9, GFLAGS), + -+ FACTOR_GATE(CLK_SFR_HDMI, "clk_sfr_hdmi", "dclk_vop_src1", 0, 1, 4, -+ RK3528_CLKGATE_CON(40), 7, GFLAGS), ++ GATE(ACLK_VOP, "aclk_vop", "aclk_vop_pre", 0, RK2928_CLKGATE_CON(13), 5, GFLAGS), ++ GATE(0, "aclk_vop_noc", "aclk_vop_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(13), 12, GFLAGS), + -+ GATE(CLK_SPDIF_HDMI, "clk_spdif_hdmi", "mclk_spdif_src", 0, -+ RK3528_CLKGATE_CON(40), 10, GFLAGS), -+ GATE(MCLK_SPDIF, "mclk_spdif", "mclk_spdif_src", 0, -+ RK3528_CLKGATE_CON(37), 15, GFLAGS), -+ GATE(CLK_CEC_HDMI, "clk_cec_hdmi", "clk_32k", 0, -+ RK3528_CLKGATE_CON(40), 8, GFLAGS), -+#if 0 -+ GATE(CLK_USBHOST_OHCI, "clk_usbhost_ohci", "clk_usbhost_ohci_io", 0, -+ RK3528_CLKGATE_CON(43), 5, GFLAGS), -+ GATE(CLK_USBHOST_UTMI, "clk_usbhost_utmi", "clk_usbhost_utmi_io", 0, -+ RK3528_CLKGATE_CON(43), 6, GFLAGS), -+ GATE(CLK_HDMIPHY_TMDSSRC, "clk_hdmiphy_tmdssrc", "clk_hdmiphy_tmdssrc_io", 0, -+ RK3528_CLKGATE_CON(40), 11, GFLAGS), -+ GATE(CLK_HDMIPHY_PREP, "clk_hdmiphy_prep", "clk_hdmiphy_prep_io", 0, -+ RK3528_CLKGATE_CON(40), 12, GFLAGS), -+#endif -+ /* vpu */ -+ GATE(DBCLK_GPIO1, "dbclk_gpio1", "xin24m", 0, -+ RK3528_CLKGATE_CON(26), 5, GFLAGS), -+ GATE(DBCLK_GPIO3, "dbclk_gpio3", "xin24m", 0, -+ RK3528_CLKGATE_CON(27), 1, GFLAGS), -+ GATE(CLK_SUSPEND_USB3OTG, "clk_suspend_usb3otg", "xin24m", 0, -+ RK3528_CLKGATE_CON(33), 4, GFLAGS), -+ GATE(CLK_PCIE_AUX, "clk_pcie_aux", "xin24m", 0, -+ RK3528_CLKGATE_CON(30), 2, GFLAGS), -+ GATE(TCLK_EMMC, "tclk_emmc", "xin24m", 0, -+ RK3528_CLKGATE_CON(26), 3, GFLAGS), -+ GATE(CLK_REF_USB3OTG, "clk_ref_usb3otg", "xin24m", 0, -+ RK3528_CLKGATE_CON(33), 2, GFLAGS), -+ COMPOSITE(CCLK_SRC_SDIO0, "cclk_src_sdio0", mux_gpll_cpll_xin24m_p, 0, -+ RK3528_CLKSEL_CON(72), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3528_CLKGATE_CON(32), 1, GFLAGS), ++ GATE(ACLK_HDCP, "aclk_hdcp", "aclk_hdcp_pre", 0, RK2928_CLKGATE_CON(14), 10, GFLAGS), ++ GATE(0, "aclk_hdcp_noc", "aclk_hdcp_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(13), 10, GFLAGS), + -+ COMPOSITE_NODIV(PCLK_VPU_ROOT, "pclk_vpu_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(61), 4, 2, MFLAGS, -+ RK3528_CLKGATE_CON(25), 5, GFLAGS), -+ GATE(PCLK_VPU_GRF, "pclk_vpu_grf", "pclk_vpu_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(25), 12, GFLAGS), -+ GATE(PCLK_CRU_PCIE, "pclk_cru_pcie", "pclk_vpu_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(25), 11, GFLAGS), -+ GATE(PCLK_UART6, "pclk_uart6", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(27), 11, GFLAGS), -+ GATE(PCLK_CAN2, "pclk_can2", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(32), 7, GFLAGS), -+ GATE(PCLK_SPI1, "pclk_spi1", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(27), 4, GFLAGS), -+ GATE(PCLK_CAN3, "pclk_can3", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(32), 9, GFLAGS), -+ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(27), 0, GFLAGS), -+ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(26), 4, GFLAGS), -+ GATE(PCLK_SARADC, "pclk_saradc", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(32), 11, GFLAGS), -+ GATE(PCLK_ACODEC, "pclk_acodec", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(26), 13, GFLAGS), -+ GATE(PCLK_UART7, "pclk_uart7", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(27), 13, GFLAGS), -+ GATE(PCLK_UART5, "pclk_uart5", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(27), 9, GFLAGS), -+ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(32), 14, GFLAGS), -+ GATE(PCLK_PCIE, "pclk_pcie", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(30), 1, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(27), 7, GFLAGS), -+ GATE(PCLK_VPU_IOC, "pclk_vpu_ioc", "pclk_vpu_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(26), 8, GFLAGS), -+ GATE(PCLK_PIPE_GRF, "pclk_pipe_grf", "pclk_vpu_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(30), 7, GFLAGS), -+ GATE(PCLK_I2C5, "pclk_i2c5", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(28), 1, GFLAGS), -+ GATE(PCLK_PCIE_PHY, "pclk_pcie_phy", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(30), 6, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(27), 15, GFLAGS), -+ GATE(PCLK_MAC_VPU, "pclk_gmac1", "pclk_vpu_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(28), 6, GFLAGS), -+ GATE(PCLK_I2C6, "pclk_i2c6", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(28), 3, GFLAGS), ++ GATE(HCLK_RGA, "hclk_rga", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 1, GFLAGS), ++ GATE(HCLK_IEP, "hclk_iep", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 3, GFLAGS), ++ GATE(HCLK_VOP, "hclk_vop", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 6, GFLAGS), ++ GATE(0, "hclk_vio_ahb_arbi", "hclk_vio_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(13), 7, GFLAGS), ++ GATE(0, "hclk_vio_noc", "hclk_vio_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(13), 8, GFLAGS), ++ GATE(0, "hclk_vop_noc", "hclk_vio_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(13), 13, GFLAGS), ++ GATE(HCLK_VIO_H2P, "hclk_vio_h2p", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 7, GFLAGS), ++ GATE(HCLK_HDCP_MMU, "hclk_hdcp_mmu", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 12, GFLAGS), ++ GATE(PCLK_HDMI_CTRL, "pclk_hdmi_ctrl", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 6, GFLAGS), ++ GATE(PCLK_VIO_H2P, "pclk_vio_h2p", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 8, GFLAGS), ++ GATE(PCLK_HDCP, "pclk_hdcp", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 11, GFLAGS), + -+ COMPOSITE_NODIV(ACLK_VPU_L_ROOT, "aclk_vpu_l_root", mux_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(60), 0, 2, MFLAGS, -+ RK3528_CLKGATE_CON(25), 0, GFLAGS), -+ GATE_NO_SET_RATE(ACLK_EMMC, "aclk_emmc", "aclk_vpu_l_root", 0, -+ RK3528_CLKGATE_CON(26), 1, GFLAGS), -+ GATE_NO_SET_RATE(ACLK_MAC_VPU, "aclk_gmac1", "aclk_vpu_l_root", 0, -+ RK3528_CLKGATE_CON(28), 5, GFLAGS), -+ GATE_NO_SET_RATE(ACLK_PCIE, "aclk_pcie", "aclk_vpu_l_root", 0, -+ RK3528_CLKGATE_CON(30), 3, GFLAGS), ++ /* PD_PERI */ ++ GATE(0, "aclk_peri_noc", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(12), 0, GFLAGS), ++ GATE(ACLK_GMAC, "aclk_gmac", "aclk_peri", 0, RK2928_CLKGATE_CON(11), 4, GFLAGS), + -+ GATE_NO_SET_RATE(ACLK_USB3OTG, "aclk_usb3otg", "aclk_vpu_l_root", 0, -+ RK3528_CLKGATE_CON(33), 1, GFLAGS), ++ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK2928_CLKGATE_CON(11), 0, GFLAGS), ++ GATE(HCLK_SDIO, "hclk_sdio", "hclk_peri", 0, RK2928_CLKGATE_CON(11), 1, GFLAGS), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, RK2928_CLKGATE_CON(11), 2, GFLAGS), ++ GATE(HCLK_NANDC, "hclk_nandc", "hclk_peri", 0, RK2928_CLKGATE_CON(11), 3, GFLAGS), ++ GATE(HCLK_HOST0, "hclk_host0", "hclk_peri", 0, RK2928_CLKGATE_CON(11), 6, GFLAGS), ++ GATE(0, "hclk_host0_arb", "hclk_peri", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(11), 7, GFLAGS), ++ GATE(HCLK_HOST1, "hclk_host1", "hclk_peri", 0, RK2928_CLKGATE_CON(11), 8, GFLAGS), ++ GATE(0, "hclk_host1_arb", "hclk_peri", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(11), 9, GFLAGS), ++ GATE(HCLK_HOST2, "hclk_host2", "hclk_peri", 0, RK2928_CLKGATE_CON(11), 10, GFLAGS), ++ GATE(HCLK_OTG, "hclk_otg", "hclk_peri", 0, RK2928_CLKGATE_CON(11), 12, GFLAGS), ++ GATE(0, "hclk_otg_pmu", "hclk_peri", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(11), 13, GFLAGS), ++ GATE(0, "hclk_host2_arb", "hclk_peri", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(11), 14, GFLAGS), ++ GATE(0, "hclk_peri_noc", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(12), 1, GFLAGS), + -+ COMPOSITE_NODIV(HCLK_VPU_ROOT, "hclk_vpu_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(61), 2, 2, MFLAGS, -+ RK3528_CLKGATE_CON(25), 4, GFLAGS), -+ GATE(HCLK_VPU, "hclk_vpu", "hclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(25), 10, GFLAGS), -+ GATE(HCLK_SFC, "hclk_sfc", "hclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(25), 13, GFLAGS), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(26), 0, GFLAGS), -+ GATE(HCLK_SAI_I2S0, "hclk_sai_i2s0", "hclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(26), 9, GFLAGS), -+ GATE(HCLK_SAI_I2S2, "hclk_sai_i2s2", "hclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(26), 11, GFLAGS), ++ GATE(PCLK_GMAC, "pclk_gmac", "pclk_peri", 0, RK2928_CLKGATE_CON(11), 5, GFLAGS), ++ GATE(0, "pclk_peri_noc", "pclk_peri", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(12), 2, GFLAGS), + -+ GATE(HCLK_PCIE_SLV, "hclk_pcie_slv", "hclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(30), 4, GFLAGS), -+ GATE(HCLK_PCIE_DBI, "hclk_pcie_dbi", "hclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(30), 5, GFLAGS), -+ GATE(HCLK_SDIO0, "hclk_sdio0", "hclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(32), 2, GFLAGS), -+ GATE(HCLK_SDIO1, "hclk_sdio1", "hclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(32), 4, GFLAGS), ++ /* PD_GPU */ ++ GATE(ACLK_GPU, "aclk_gpu", "aclk_gpu_pre", 0, RK2928_CLKGATE_CON(7), 14, GFLAGS), ++ GATE(0, "aclk_gpu_noc", "aclk_gpu_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(7), 15, GFLAGS), + -+ COMPOSITE_NOMUX(CLK_GMAC1_VPU_25M, "clk_gmac1_25m", "ppll", 0, -+ RK3528_CLKSEL_CON(60), 2, 8, DFLAGS, -+ RK3528_CLKGATE_CON(25), 1, GFLAGS), -+ COMPOSITE_NOMUX(CLK_PPLL_125M_MATRIX, "clk_ppll_125m_src", "ppll", 0, -+ RK3528_CLKSEL_CON(60), 10, 5, DFLAGS, -+ RK3528_CLKGATE_CON(25), 2, GFLAGS), ++ /* PD_BUS */ ++ GATE(0, "sclk_initmem_mbist", "aclk_cpu", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(8), 1, GFLAGS), ++ GATE(0, "aclk_initmem", "aclk_cpu", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(8), 0, GFLAGS), ++ GATE(ACLK_DMAC, "aclk_dmac_bus", "aclk_cpu", 0, RK2928_CLKGATE_CON(8), 2, GFLAGS), ++ GATE(0, "aclk_bus_noc", "aclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(10), 1, GFLAGS), + -+ COMPOSITE(CLK_CAN3, "clk_can3", mux_gpll_cpll_p, 0, -+ RK3528_CLKSEL_CON(73), 13, 1, MFLAGS, 7, 6, DFLAGS, -+ RK3528_CLKGATE_CON(32), 10, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C6, "clk_i2c6", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(64), 0, 2, MFLAGS, -+ RK3528_CLKGATE_CON(28), 4, GFLAGS), ++ GATE(0, "hclk_rom", "hclk_cpu", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(8), 3, GFLAGS), ++ GATE(HCLK_I2S0_8CH, "hclk_i2s0_8ch", "hclk_cpu", 0, RK2928_CLKGATE_CON(8), 7, GFLAGS), ++ GATE(HCLK_I2S1_8CH, "hclk_i2s1_8ch", "hclk_cpu", 0, RK2928_CLKGATE_CON(8), 8, GFLAGS), ++ GATE(HCLK_I2S2_2CH, "hclk_i2s2_2ch", "hclk_cpu", 0, RK2928_CLKGATE_CON(8), 9, GFLAGS), ++ GATE(HCLK_SPDIF_8CH, "hclk_spdif_8ch", "hclk_cpu", 0, RK2928_CLKGATE_CON(8), 10, GFLAGS), ++ GATE(HCLK_TSP, "hclk_tsp", "hclk_cpu", 0, RK2928_CLKGATE_CON(10), 11, GFLAGS), ++ GATE(HCLK_M_CRYPTO, "hclk_crypto_mst", "hclk_cpu", 0, RK2928_CLKGATE_CON(8), 11, GFLAGS), ++ GATE(HCLK_S_CRYPTO, "hclk_crypto_slv", "hclk_cpu", 0, RK2928_CLKGATE_CON(8), 12, GFLAGS), + -+ COMPOSITE(SCLK_SFC, "sclk_sfc", mux_gpll_cpll_xin24m_p, 0, -+ RK3528_CLKSEL_CON(61), 12, 2, MFLAGS, 6, 6, DFLAGS, -+ RK3528_CLKGATE_CON(25), 14, GFLAGS), -+ COMPOSITE(CCLK_SRC_EMMC, "cclk_src_emmc", mux_gpll_cpll_xin24m_p, 0, -+ RK3528_CLKSEL_CON(62), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3528_CLKGATE_CON(25), 15, GFLAGS), ++ GATE(0, "pclk_ddrupctl", "pclk_ddr_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(8), 4, GFLAGS), ++ GATE(0, "pclk_ddrmon", "pclk_ddr_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(8), 6, GFLAGS), ++ GATE(0, "pclk_msch_noc", "pclk_ddr_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(10), 2, GFLAGS), + -+ COMPOSITE_NODIV(ACLK_VPU_ROOT, "aclk_vpu_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(61), 0, 2, MFLAGS, -+ RK3528_CLKGATE_CON(25), 3, GFLAGS), -+ GATE(ACLK_VPU, "aclk_vpu", "aclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(25), 9, GFLAGS), ++ GATE(PCLK_EFUSE_1024, "pclk_efuse_1024", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 13, GFLAGS), ++ GATE(PCLK_EFUSE_256, "pclk_efuse_256", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 14, GFLAGS), ++ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_cpu", 0, RK2928_CLKGATE_CON(8), 15, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 0, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 1, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 2, GFLAGS), ++ GATE(PCLK_TIMER, "pclk_timer0", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 4, GFLAGS), ++ GATE(0, "pclk_stimer", "pclk_cpu", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(9), 5, GFLAGS), ++ GATE(PCLK_SPI0, "pclk_spi0", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 6, GFLAGS), ++ GATE(PCLK_PWM, "pclk_rk_pwm", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 7, GFLAGS), ++ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 8, GFLAGS), ++ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 9, GFLAGS), ++ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 10, GFLAGS), ++ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 11, GFLAGS), ++ GATE(PCLK_UART0, "pclk_uart0", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 12, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 13, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 14, GFLAGS), ++ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 15, GFLAGS), ++ GATE(PCLK_GRF, "pclk_grf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(10), 0, GFLAGS), ++ GATE(0, "pclk_cru", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(10), 1, GFLAGS), ++ GATE(0, "pclk_sgrf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(10), 2, GFLAGS), ++ GATE(0, "pclk_sim", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(10), 3, GFLAGS), + -+ COMPOSITE_NODIV(CLK_SPI1, "clk_spi1", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(63), 10, 2, MFLAGS, -+ RK3528_CLKGATE_CON(27), 5, GFLAGS), -+ COMPOSITE(CCLK_SRC_SDIO1, "cclk_src_sdio1", mux_gpll_cpll_xin24m_p, 0, -+ RK3528_CLKSEL_CON(72), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3528_CLKGATE_CON(32), 3, GFLAGS), -+ COMPOSITE(CLK_CAN2, "clk_can2", mux_gpll_cpll_p, 0, -+ RK3528_CLKSEL_CON(73), 6, 1, MFLAGS, 0, 6, DFLAGS, -+ RK3528_CLKGATE_CON(32), 8, GFLAGS), -+ COMPOSITE_NOMUX(CLK_TSADC, "clk_tsadc", "xin24m", 0, -+ RK3528_CLKSEL_CON(74), 3, 5, DFLAGS, -+ RK3528_CLKGATE_CON(32), 15, GFLAGS), -+ COMPOSITE_NOMUX(CLK_SARADC, "clk_saradc", "xin24m", 0, -+ RK3528_CLKSEL_CON(74), 0, 3, DFLAGS, -+ RK3528_CLKGATE_CON(32), 12, GFLAGS), -+ COMPOSITE_NOMUX(CLK_TSADC_TSEN, "clk_tsadc_tsen", "xin24m", 0, -+ RK3528_CLKSEL_CON(74), 8, 5, DFLAGS, -+ RK3528_CLKGATE_CON(33), 0, GFLAGS), -+ COMPOSITE_NODIV(BCLK_EMMC, "bclk_emmc", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(62), 8, 2, MFLAGS, -+ RK3528_CLKGATE_CON(26), 2, GFLAGS), -+ COMPOSITE_NOMUX(MCLK_ACODEC_TX, "mclk_acodec_tx", "mclk_i2s2_2ch_sai_src", 0, -+ RK3528_CLKSEL_CON(63), 0, 8, DFLAGS, -+ RK3528_CLKGATE_CON(26), 14, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C3, "clk_i2c3", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(63), 12, 2, MFLAGS, -+ RK3528_CLKGATE_CON(28), 0, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C5, "clk_i2c5", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(63), 14, 2, MFLAGS, -+ RK3528_CLKGATE_CON(28), 2, GFLAGS), -+ COMPOSITE_NODIV(MCLK_SAI_I2S0, "mclk_sai_i2s0", mclk_sai_i2s0_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(62), 10, 1, MFLAGS, -+ RK3528_CLKGATE_CON(26), 10, GFLAGS), -+ GATE(MCLK_SAI_I2S2, "mclk_sai_i2s2", "mclk_i2s2_2ch_sai_src", 0, -+ RK3528_CLKGATE_CON(26), 12, GFLAGS), -+#if 0 -+ GATE(SCLK_IN_SPI1, "sclk_in_spi1", "sclk_in_spi1_io", 0, -+ RK3528_CLKGATE_CON(27), 6, GFLAGS), ++ GATE(0, "pclk_ddrphy", "pclk_phy_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(10), 3, GFLAGS), ++ GATE(0, "pclk_acodecphy", "pclk_phy_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(10), 5, GFLAGS), ++ GATE(PCLK_HDMI_PHY, "pclk_hdmiphy", "pclk_phy_pre", 0, RK2928_CLKGATE_CON(10), 7, GFLAGS), ++ GATE(0, "pclk_vdacphy", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(10), 8, GFLAGS), ++ GATE(0, "pclk_phy_noc", "pclk_phy_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(10), 9, GFLAGS), + -+ /* vpuphy */ -+ GATE(CLK_PIPE_USB3OTG_COMBO, "clk_pipe_usb3otg_combo", "clk_pipe_usb3otg_io", 0, -+ RK3528_CLKGATE_CON(31), 0, GFLAGS), -+ GATE(CLK_UTMI_USB3OTG, "clk_utmi_usb3otg", "clk_utmi_usb3otg_io", 0, -+ RK3528_CLKGATE_CON(31), 1, GFLAGS), -+ GATE(CLK_PCIE_PIPE_PHY, "clk_pcie_pipe_phy", "clk_pipe_usb3otg_io", 0, -+ RK3528_CLKGATE_CON(31), 2, GFLAGS), -+#endif -+ /* pcie */ -+ COMPOSITE_NOMUX(CLK_PPLL_100M_MATRIX, "clk_ppll_100m_src", "ppll", CLK_IS_CRITICAL, -+ RK3528_PCIE_CLKSEL_CON(1), 2, 5, DFLAGS, -+ RK3528_PCIE_CLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE_NOMUX(CLK_PPLL_50M_MATRIX, "clk_ppll_50m_src", "ppll", CLK_IS_CRITICAL, -+ RK3528_PCIE_CLKSEL_CON(1), 7, 5, DFLAGS, -+ RK3528_PCIE_CLKGATE_CON(0), 2, GFLAGS), -+ MUX(CLK_REF_PCIE_INNER_PHY, "clk_ref_pcie_inner_phy", clk_ref_pcie_inner_phy_p, 0, -+ RK3528_PCIE_CLKSEL_CON(1), 13, 1, MFLAGS), -+ FACTOR(CLK_REF_PCIE_100M_PHY, "clk_ref_pcie_100m_phy", "clk_ppll_100m_src", 0, 1, 1), ++ GATE(ACLK_VPU, "aclk_vpu", "aclk_vpu_pre", 0, RK2928_CLKGATE_CON(15), 0, GFLAGS), ++ GATE(0, "aclk_vpu_noc", "aclk_vpu_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(15), 4, GFLAGS), ++ GATE(ACLK_RKVDEC, "aclk_rkvdec", "aclk_rkvdec_pre", 0, RK2928_CLKGATE_CON(15), 2, GFLAGS), ++ GATE(0, "aclk_rkvdec_noc", "aclk_rkvdec_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(15), 6, GFLAGS), ++ GATE(HCLK_VPU, "hclk_vpu", "hclk_vpu_pre", 0, RK2928_CLKGATE_CON(15), 1, GFLAGS), ++ GATE(0, "hclk_vpu_noc", "hclk_vpu_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(15), 5, GFLAGS), ++ GATE(HCLK_RKVDEC, "hclk_rkvdec", "hclk_rkvdec_pre", 0, RK2928_CLKGATE_CON(15), 3, GFLAGS), ++ GATE(0, "hclk_rkvdec_noc", "hclk_rkvdec_pre", CLK_IS_CRITICAL, RK2928_CLKGATE_CON(15), 7, GFLAGS), + -+ /* gmac */ -+ FACTOR(CLK_GMAC1_RMII_VPU, "clk_gmac1_50m", "clk_ppll_50m_src", 0, 1, 1), -+ FACTOR(CLK_GMAC1_SRC_VPU, "clk_gmac1_125m", "clk_ppll_125m_src", 0, 1, 1), ++ /* PD_MMC */ ++ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "sclk_sdmmc", RK3228_SDMMC_CON0, 1), ++ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RK3228_SDMMC_CON1, 1), + -+ /* they are orphans */ -+ DIV(CLK_GMAC0_SRC, "clk_gmac0_src", "clk_gmac0_io_i", 0, -+ RK3528_CLKSEL_CON(84), 3, 6, DFLAGS), -+ GATE(CLK_GMAC0_TX, "clk_gmac0_tx", "clk_gmac0_src", 0, -+ RK3528_CLKGATE_CON(41), 13, GFLAGS), -+ GATE(CLK_GMAC0_RX, "clk_gmac0_rx", "clk_gmac0_src", 0, -+ RK3528_CLKGATE_CON(41), 14, GFLAGS), -+ GATE(CLK_GMAC0_RMII_50M, "clk_gmac0_rmii_50m", "clk_gmac0_io_i", 0, -+ RK3528_CLKGATE_CON(41), 12, GFLAGS), -+ GATE(CLK_SCRKEYGEN, "clk_scrkeygen", "clk_pmupvtm_out", 0, -+ RK3528_PMU_CLKGATE_CON(2), 0, GFLAGS), -+ GATE(CLK_PVTM_OSCCHK, "clk_pvtm_oscchk", "clk_pmupvtm_out", 0, -+ RK3528_PMU_CLKGATE_CON(2), 1, GFLAGS), -+}; ++ MMC(SCLK_SDIO_DRV, "sdio_drv", "sclk_sdio", RK3228_SDIO_CON0, 1), ++ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "sclk_sdio", RK3228_SDIO_CON1, 1), + -+static struct rockchip_clk_branch rk3528_grf_clk_branches[] __initdata = { -+ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "cclk_src_sdmmc0", RK3528_SDMMC_CON0, 1), -+ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "cclk_src_sdmmc0", RK3528_SDMMC_CON1, 1), -+ MMC(SCLK_SDIO0_DRV, "sdio0_drv", "cclk_src_sdio0", RK3528_SDIO0_CON0, 1), -+ MMC(SCLK_SDIO0_SAMPLE, "sdio0_sample", "cclk_src_sdio0", RK3528_SDIO0_CON1, 1), -+ MMC(SCLK_SDIO1_DRV, "sdio1_drv", "cclk_src_sdio1", RK3528_SDIO1_CON0, 1), -+ MMC(SCLK_SDIO1_SAMPLE, "sdio1_sample", "cclk_src_sdio1", RK3528_SDIO1_CON1, 1), ++ MMC(SCLK_EMMC_DRV, "emmc_drv", "sclk_emmc", RK3228_EMMC_CON0, 1), ++ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "sclk_emmc", RK3228_EMMC_CON1, 1), +}; + -+static void __iomem *rk3528_cru_base; ++static void __iomem *rk3228_cru_base; + -+static void rk3528_dump_cru(void) ++static void rk3228_dump_cru(void) +{ -+ if (rk3528_cru_base) { ++ if (rk3228_cru_base) { + pr_warn("CRU:\n"); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk3528_cru_base, -+ 0x8b8, false); -+ pr_warn("PCIE CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk3528_cru_base + RK3528_PCIE_CRU_BASE, -+ 0x804, false); -+ pr_warn("DDRPHY CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk3528_cru_base + RK3528_DDRPHY_CRU_BASE, -+ 0x804, false); ++ 32, 4, rk3228_cru_base, ++ 0x1f8, false); + } +} + -+static void __init rk3528_clk_init(struct device_node *np) ++static void __init rk3228_clk_init(struct device_node *np) +{ + struct rockchip_clk_provider *ctx; + void __iomem *reg_base; @@ -54380,8 +54027,6 @@ index 000000000..1b14cd57b + return; + } + -+ rk3528_cru_base = reg_base; -+ + ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); + if (IS_ERR(ctx)) { + pr_err("%s: rockchip clk init failed\n", __func__); @@ -54390,1171 +54035,1107 @@ index 000000000..1b14cd57b + } + clks = ctx->clk_data.clks; + -+ rockchip_clk_register_plls(ctx, rk3528_pll_clks, -+ ARRAY_SIZE(rk3528_pll_clks), -+ RK3528_GRF_SOC_STATUS0); ++ rockchip_clk_register_plls(ctx, rk3228_pll_clks, ++ ARRAY_SIZE(rk3228_pll_clks), ++ RK3228_GRF_SOC_STATUS0); ++ rockchip_clk_register_branches(ctx, rk3228_clk_branches, ++ ARRAY_SIZE(rk3228_clk_branches)); + + rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", -+ 2, clks[PLL_APLL], clks[PLL_GPLL], -+ &rk3528_cpuclk_data, rk3528_cpuclk_rates, -+ ARRAY_SIZE(rk3528_cpuclk_rates)); -+ rockchip_clk_register_branches(ctx, rk3528_clk_branches, -+ ARRAY_SIZE(rk3528_clk_branches)); ++ 3, clks[PLL_APLL], clks[PLL_GPLL], ++ &rk3228_cpuclk_data, rk3228_cpuclk_rates, ++ ARRAY_SIZE(rk3228_cpuclk_rates)); + -+ rockchip_register_softrst(np, 47, reg_base + RK3528_SOFTRST_CON(0), ++ rockchip_register_softrst(np, 9, reg_base + RK2928_SOFTRST_CON(0), + ROCKCHIP_SOFTRST_HIWORD_MASK); -+ rockchip_register_restart_notifier(ctx, RK3528_GLB_SRST_FST, NULL); + -+ rockchip_clk_of_add_provider(np, ctx); ++ rockchip_register_restart_notifier(ctx, RK3228_GLB_SRST_FST, NULL); + -+ if (!rk_dump_cru) -+ rk_dump_cru = rk3528_dump_cru; ++ rockchip_clk_of_add_provider(np, ctx); + ++ if (!rk_dump_cru) { ++ rk3228_cru_base = reg_base; ++ rk_dump_cru = rk3228_dump_cru; ++ } +} ++CLK_OF_DECLARE(rk3228_cru, "rockchip,rk3228-cru", rk3228_clk_init); + -+CLK_OF_DECLARE(rk3528_cru, "rockchip,rk3528-cru", rk3528_clk_init); -+ -+static void __init rk3528_grf_clk_init(struct device_node *np) ++static int __init clk_rk3228_probe(struct platform_device *pdev) +{ -+ struct rockchip_clk_provider *ctx; -+ void __iomem *reg_base; -+ -+ reg_base = of_iomap(of_get_parent(np), 0); -+ if (!reg_base) { -+ pr_err("%s: could not map cru grf region\n", __func__); -+ return; -+ } -+ -+ ctx = rockchip_clk_init(np, reg_base, CLK_NR_GRF_CLKS); -+ if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip grf clk init failed\n", __func__); -+ return; -+ } ++ struct device_node *np = pdev->dev.of_node; + -+ rockchip_clk_register_branches(ctx, rk3528_grf_clk_branches, -+ ARRAY_SIZE(rk3528_grf_clk_branches)); ++ rk3228_clk_init(np); + -+ rockchip_clk_of_add_provider(np, ctx); ++ return 0; +} + -+CLK_OF_DECLARE(rk3528_grf_cru, "rockchip,rk3528-grf-cru", rk3528_grf_clk_init); ++static const struct of_device_id clk_rk3228_match_table[] = { ++ { ++ .compatible = "rockchip,rk3228-cru", ++ }, ++ { } ++}; ++MODULE_DEVICE_TABLE(of, clk_rk3228_match_table); + -diff --git a/drivers/clk/rockchip-oh/clk-rk3562.c b/drivers/clk/rockchip-oh/clk-rk3562.c ++static struct platform_driver clk_rk3228_driver = { ++ .driver = { ++ .name = "clk-rk3228", ++ .of_match_table = clk_rk3228_match_table, ++ }, ++}; ++builtin_platform_driver_probe(clk_rk3228_driver, clk_rk3228_probe); ++ ++MODULE_DESCRIPTION("Rockchip RK3228 Clock Driver"); ++MODULE_LICENSE("GPL"); +diff --git a/drivers/clk/rockchip-oh/clk-rk3288.c b/drivers/clk/rockchip-oh/clk-rk3288.c new file mode 100644 -index 000000000..3c6f78fec +index 000000000..f9c4678fc --- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-rk3562.c -@@ -0,0 +1,1134 @@ -+// SPDX-License-Identifier: GPL-2.0 ++++ b/drivers/clk/rockchip-oh/clk-rk3288.c +@@ -0,0 +1,1063 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later +/* -+ * Copyright (c) 2022 Rockchip Electronics Co. Ltd. -+ * Author: Elaine Zhang -+ * Author: Finley Xiao ++ * Copyright (c) 2014 MundoReader S.L. ++ * Author: Heiko Stuebner + */ + +#include ++#include +#include +#include -+#include +#include ++#include +#include -+#include ++#include +#include "clk.h" ++#include + -+#define RK3562_GRF_SOC_STATUS0 0x430 ++#define RK3288_GRF_SOC_CON(x) (0x244 + x * 4) ++#define RK3288_GRF_SOC_STATUS1 0x284 + -+enum rk3562_plls { -+ apll, gpll, vpll, hpll, cpll, dpll, ++enum rk3288_variant { ++ RK3288_CRU, ++ RK3288W_CRU, +}; + -+static struct rockchip_pll_rate_table rk3562_pll_rates[] = { -+ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ -+ RK3036_PLL_RATE(2208000000, 1, 92, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2184000000, 1, 91, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2160000000, 1, 90, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2088000000, 1, 87, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2064000000, 1, 86, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2040000000, 1, 85, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2016000000, 1, 84, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1992000000, 1, 83, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1920000000, 1, 80, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1896000000, 1, 79, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1800000000, 1, 75, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1704000000, 1, 71, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1600000000, 3, 200, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1584000000, 1, 132, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1560000000, 1, 130, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1536000000, 1, 128, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1512000000, 1, 126, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1488000000, 1, 124, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1464000000, 1, 122, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1440000000, 1, 120, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1416000000, 1, 118, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1400000000, 3, 350, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1392000000, 1, 116, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1368000000, 1, 114, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1344000000, 1, 112, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1320000000, 1, 110, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1296000000, 1, 108, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1272000000, 1, 106, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1248000000, 1, 104, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1200000000, 1, 100, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1188000000, 1, 99, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1104000000, 1, 92, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1100000000, 3, 275, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1000000000, 3, 250, 2, 1, 1, 0), -+ RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), -+ RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), -+ RK3036_PLL_RATE(800000000, 3, 200, 2, 1, 1, 0), -+ RK3036_PLL_RATE(700000000, 3, 350, 4, 1, 1, 0), -+ RK3036_PLL_RATE(696000000, 1, 116, 4, 1, 1, 0), -+ RK3036_PLL_RATE(600000000, 1, 100, 4, 1, 1, 0), -+ RK3036_PLL_RATE(594000000, 1, 99, 4, 1, 1, 0), -+ RK3036_PLL_RATE(500000000, 1, 125, 6, 1, 1, 0), -+ RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), -+ RK3036_PLL_RATE(312000000, 1, 78, 6, 1, 1, 0), -+ RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), -+ RK3036_PLL_RATE(200000000, 1, 100, 3, 4, 1, 0), -+ RK3036_PLL_RATE(148500000, 1, 99, 4, 4, 1, 0), -+ RK3036_PLL_RATE(100000000, 1, 150, 6, 6, 1, 0), -+ RK3036_PLL_RATE(96000000, 1, 96, 6, 4, 1, 0), -+ RK3036_PLL_RATE(74250000, 2, 99, 4, 4, 1, 0), ++enum rk3288_plls { ++ apll, dpll, cpll, gpll, npll, ++}; ++ ++static struct rockchip_pll_rate_table rk3288_pll_rates[] = { ++ RK3066_PLL_RATE(2208000000, 1, 92, 1), ++ RK3066_PLL_RATE(2184000000, 1, 91, 1), ++ RK3066_PLL_RATE(2160000000, 1, 90, 1), ++ RK3066_PLL_RATE(2136000000, 1, 89, 1), ++ RK3066_PLL_RATE(2112000000, 1, 88, 1), ++ RK3066_PLL_RATE(2088000000, 1, 87, 1), ++ RK3066_PLL_RATE(2064000000, 1, 86, 1), ++ RK3066_PLL_RATE(2040000000, 1, 85, 1), ++ RK3066_PLL_RATE(2016000000, 1, 84, 1), ++ RK3066_PLL_RATE(1992000000, 1, 83, 1), ++ RK3066_PLL_RATE(1968000000, 1, 82, 1), ++ RK3066_PLL_RATE(1944000000, 1, 81, 1), ++ RK3066_PLL_RATE(1920000000, 1, 80, 1), ++ RK3066_PLL_RATE(1896000000, 1, 79, 1), ++ RK3066_PLL_RATE(1872000000, 1, 78, 1), ++ RK3066_PLL_RATE(1848000000, 1, 77, 1), ++ RK3066_PLL_RATE(1824000000, 1, 76, 1), ++ RK3066_PLL_RATE(1800000000, 1, 75, 1), ++ RK3066_PLL_RATE(1776000000, 1, 74, 1), ++ RK3066_PLL_RATE(1752000000, 1, 73, 1), ++ RK3066_PLL_RATE(1728000000, 1, 72, 1), ++ RK3066_PLL_RATE(1704000000, 1, 71, 1), ++ RK3066_PLL_RATE(1680000000, 1, 70, 1), ++ RK3066_PLL_RATE(1656000000, 1, 69, 1), ++ RK3066_PLL_RATE(1632000000, 1, 68, 1), ++ RK3066_PLL_RATE(1608000000, 1, 67, 1), ++ RK3066_PLL_RATE(1560000000, 1, 65, 1), ++ RK3066_PLL_RATE(1512000000, 1, 63, 1), ++ RK3066_PLL_RATE(1488000000, 1, 62, 1), ++ RK3066_PLL_RATE(1464000000, 1, 61, 1), ++ RK3066_PLL_RATE(1440000000, 1, 60, 1), ++ RK3066_PLL_RATE(1416000000, 1, 59, 1), ++ RK3066_PLL_RATE(1392000000, 1, 58, 1), ++ RK3066_PLL_RATE(1368000000, 1, 57, 1), ++ RK3066_PLL_RATE(1344000000, 1, 56, 1), ++ RK3066_PLL_RATE(1320000000, 1, 55, 1), ++ RK3066_PLL_RATE(1296000000, 1, 54, 1), ++ RK3066_PLL_RATE(1272000000, 1, 53, 1), ++ RK3066_PLL_RATE(1248000000, 1, 52, 1), ++ RK3066_PLL_RATE(1224000000, 1, 51, 1), ++ RK3066_PLL_RATE(1200000000, 1, 50, 1), ++ RK3066_PLL_RATE(1188000000, 1, 99, 2), ++ RK3066_PLL_RATE(1176000000, 1, 49, 1), ++ RK3066_PLL_RATE(1128000000, 1, 47, 1), ++ RK3066_PLL_RATE(1104000000, 1, 46, 1), ++ RK3066_PLL_RATE(1008000000, 1, 84, 2), ++ RK3066_PLL_RATE( 912000000, 1, 76, 2), ++ RK3066_PLL_RATE( 891000000, 2, 297, 4), ++ RK3066_PLL_RATE( 888000000, 1, 74, 2), ++ RK3066_PLL_RATE( 816000000, 1, 68, 2), ++ RK3066_PLL_RATE( 798000000, 1, 133, 4), ++ RK3066_PLL_RATE( 792000000, 1, 66, 2), ++ RK3066_PLL_RATE( 768000000, 1, 64, 2), ++ RK3066_PLL_RATE( 742500000, 4, 495, 4), ++ RK3066_PLL_RATE( 696000000, 1, 58, 2), ++ RK3066_PLL_RATE_NB(621000000, 1, 207, 8, 1), ++ RK3066_PLL_RATE( 600000000, 1, 50, 2), ++ RK3066_PLL_RATE_NB(594000000, 1, 99, 4, 1), ++ RK3066_PLL_RATE( 552000000, 1, 46, 2), ++ RK3066_PLL_RATE( 504000000, 1, 84, 4), ++ RK3066_PLL_RATE( 500000000, 1, 125, 6), ++ RK3066_PLL_RATE( 456000000, 1, 76, 4), ++ RK3066_PLL_RATE( 428000000, 1, 107, 6), ++ RK3066_PLL_RATE( 408000000, 1, 68, 4), ++ RK3066_PLL_RATE( 400000000, 1, 100, 6), ++ RK3066_PLL_RATE_NB( 394000000, 1, 197, 12, 1), ++ RK3066_PLL_RATE( 384000000, 1, 64, 4), ++ RK3066_PLL_RATE( 360000000, 1, 60, 4), ++ RK3066_PLL_RATE_NB( 356000000, 1, 178, 12, 1), ++ RK3066_PLL_RATE_NB( 324000000, 1, 189, 14, 1), ++ RK3066_PLL_RATE( 312000000, 1, 52, 4), ++ RK3066_PLL_RATE_NB( 308000000, 1, 154, 12, 1), ++ RK3066_PLL_RATE_NB( 303000000, 1, 202, 16, 1), ++ RK3066_PLL_RATE( 300000000, 1, 75, 6), ++ RK3066_PLL_RATE_NB( 297750000, 2, 397, 16, 1), ++ RK3066_PLL_RATE( 297000000, 1, 99, 8), ++ RK3066_PLL_RATE_NB( 293250000, 2, 391, 16, 1), ++ RK3066_PLL_RATE_NB( 292500000, 1, 195, 16, 1), ++ RK3066_PLL_RATE( 273600000, 1, 114, 10), ++ RK3066_PLL_RATE_NB( 273000000, 1, 182, 16, 1), ++ RK3066_PLL_RATE_NB( 270000000, 1, 180, 16, 1), ++ RK3066_PLL_RATE_NB( 266250000, 2, 355, 16, 1), ++ RK3066_PLL_RATE_NB( 256500000, 1, 171, 16, 1), ++ RK3066_PLL_RATE( 252000000, 1, 84, 8), ++ RK3066_PLL_RATE_NB( 250500000, 1, 167, 16, 1), ++ RK3066_PLL_RATE_NB( 243428571, 1, 142, 14, 1), ++ RK3066_PLL_RATE( 238000000, 1, 119, 12), ++ RK3066_PLL_RATE_NB( 219750000, 2, 293, 16, 1), ++ RK3066_PLL_RATE_NB( 216000000, 1, 144, 16, 1), ++ RK3066_PLL_RATE_NB( 213000000, 1, 142, 16, 1), ++ RK3066_PLL_RATE( 195428571, 1, 114, 14), ++ RK3066_PLL_RATE( 160000000, 1, 80, 12), ++ RK3066_PLL_RATE( 157500000, 1, 105, 16), ++ RK3066_PLL_RATE( 148500000, 1, 99, 16), ++ RK3066_PLL_RATE( 126000000, 1, 84, 16), + { /* sentinel */ }, +}; + -+PNAME(mux_pll_p) = { "xin24m" }; -+PNAME(gpll_cpll_p) = { "gpll", "cpll" }; -+PNAME(gpll_cpll_hpll_p) = { "gpll", "cpll", "hpll" }; -+PNAME(gpll_cpll_pvtpll_dmyapll_p) = { "gpll", "cpll", "log_pvtpll", "dummy_apll" }; -+PNAME(gpll_cpll_hpll_xin24m_p) = { "gpll", "cpll", "hpll", "xin24m" }; -+PNAME(gpll_cpll_vpll_dmyhpll_p) = { "gpll", "cpll", "vpll", "dummy_hpll" }; -+PNAME(gpll_dmyhpll_vpll_apll_p) = { "gpll", "dummy_hpll", "vpll", "apll" }; -+PNAME(gpll_cpll_xin24m_p) = { "gpll", "cpll", "xin24m" }; -+PNAME(gpll_cpll_xin24m_dmyapll_p) = { "gpll", "cpll", "xin24m", "dummy_apll" }; -+PNAME(gpll_cpll_xin24m_dmyhpll_p) = { "gpll", "cpll", "xin24m", "dummy_hpll" }; -+PNAME(vpll_dmyhpll_gpll_cpll_p) = { "vpll", "dummy_hpll", "gpll", "cpll" }; -+PNAME(mux_xin24m_32k_p) = { "xin24m", "clk_rtc_32k" }; -+PNAME(mux_50m_xin24m_p) = { "clk_matrix_50m_src", "xin24m" }; -+PNAME(mux_100m_50m_xin24m_p) = { "clk_matrix_100m_src", "clk_matrix_50m_src", "xin24m" }; -+PNAME(mux_125m_xin24m_p) = { "clk_matrix_125m_src", "xin24m" }; -+PNAME(mux_200m_xin24m_32k_p) = { "clk_200m_pmu", "xin24m", "clk_rtc_32k" }; -+PNAME(mux_200m_100m_p) = { "clk_matrix_200m_src", "clk_matrix_100m_src" }; -+PNAME(mux_200m_100m_50m_xin24m_p) = { "clk_matrix_200m_src", "clk_matrix_100m_src", "clk_matrix_50m_src", "xin24m" }; -+PNAME(clk_sai0_p) = { "clk_sai0_src", "clk_sai0_frac", "xin_osc0_half", "mclk_sai0_from_io" }; -+PNAME(mclk_sai0_out2io_p) = { "mclk_sai0", "xin_osc0_half" }; -+PNAME(clk_sai1_p) = { "clk_sai1_src", "clk_sai1_frac", "xin_osc0_half", "mclk_sai1_from_io" }; -+PNAME(mclk_sai1_out2io_p) = { "mclk_sai1", "xin_osc0_half" }; -+PNAME(clk_sai2_p) = { "clk_sai2_src", "clk_sai2_frac", "xin_osc0_half", "mclk_sai2_from_io" }; -+PNAME(mclk_sai2_out2io_p) = { "mclk_sai2", "xin_osc0_half" }; -+PNAME(clk_spdif_p) = { "clk_spdif_src", "clk_spdif_frac", "xin_osc0_half" }; -+PNAME(clk_uart1_p) = { "clk_uart1_src", "clk_uart1_frac", "xin24m" }; -+PNAME(clk_uart2_p) = { "clk_uart2_src", "clk_uart2_frac", "xin24m" }; -+PNAME(clk_uart3_p) = { "clk_uart3_src", "clk_uart3_frac", "xin24m" }; -+PNAME(clk_uart4_p) = { "clk_uart4_src", "clk_uart4_frac", "xin24m" }; -+PNAME(clk_uart5_p) = { "clk_uart5_src", "clk_uart5_frac", "xin24m" }; -+PNAME(clk_uart6_p) = { "clk_uart6_src", "clk_uart6_frac", "xin24m" }; -+PNAME(clk_uart7_p) = { "clk_uart7_src", "clk_uart7_frac", "xin24m" }; -+PNAME(clk_uart8_p) = { "clk_uart8_src", "clk_uart8_frac", "xin24m" }; -+PNAME(clk_uart9_p) = { "clk_uart9_src", "clk_uart9_frac", "xin24m" }; -+PNAME(clk_rtc32k_pmu_p) = { "clk_rtc32k_frac", "xin32k", "clk_32k_pvtm" }; -+PNAME(clk_pmu1_uart0_p) = { "clk_pmu1_uart0_src", "clk_pmu1_uart0_frac", "xin24m" }; -+PNAME(clk_pipephy_ref_p) = { "clk_pipephy_div", "clk_pipephy_xin24m" }; -+PNAME(clk_usbphy_ref_p) = { "clk_usb2phy_xin24m", "clk_24m_sscsrc" }; -+PNAME(clk_mipidsi_ref_p) = { "clk_mipidsiphy_xin24m", "clk_24m_sscsrc" }; ++#define RK3288_DIV_ACLK_CORE_M0_MASK 0xf ++#define RK3288_DIV_ACLK_CORE_M0_SHIFT 0 ++#define RK3288_DIV_ACLK_CORE_MP_MASK 0xf ++#define RK3288_DIV_ACLK_CORE_MP_SHIFT 4 ++#define RK3288_DIV_L2RAM_MASK 0x7 ++#define RK3288_DIV_L2RAM_SHIFT 0 ++#define RK3288_DIV_ATCLK_MASK 0x1f ++#define RK3288_DIV_ATCLK_SHIFT 4 ++#define RK3288_DIV_PCLK_DBGPRE_MASK 0x1f ++#define RK3288_DIV_PCLK_DBGPRE_SHIFT 9 + -+static struct rockchip_pll_clock rk3562_pll_clks[] __initdata = { -+ [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, -+ 0, RK3562_PLL_CON(0), -+ RK3562_MODE_CON, 0, 0, -+ ROCKCHIP_PLL_ALLOW_POWER_DOWN, rk3562_pll_rates), -+ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, -+ 0, RK3562_PLL_CON(24), -+ RK3562_MODE_CON, 2, 3, 0, rk3562_pll_rates), -+ [vpll] = PLL(pll_rk3328, PLL_VPLL, "vpll", mux_pll_p, -+ 0, RK3562_PLL_CON(32), -+ RK3562_MODE_CON, 6, 4, -+ ROCKCHIP_PLL_ALLOW_POWER_DOWN, rk3562_pll_rates), -+ [hpll] = PLL(pll_rk3328, PLL_HPLL, "hpll", mux_pll_p, -+ 0, RK3562_PLL_CON(40), -+ RK3562_MODE_CON, 8, 5, -+ ROCKCHIP_PLL_ALLOW_POWER_DOWN, rk3562_pll_rates), -+ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, -+ 0, RK3562_PMU1_PLL_CON(0), -+ RK3562_PMU1_MODE_CON, 0, 2, 0, rk3562_pll_rates), -+ [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p, -+ CLK_IS_CRITICAL, RK3562_SUBDDR_PLL_CON(0), -+ RK3562_SUBDDR_MODE_CON, 0, 1, 0, NULL), ++#define RK3288_CLKSEL0(_core_m0, _core_mp) \ ++ { \ ++ .reg = RK3288_CLKSEL_CON(0), \ ++ .val = HIWORD_UPDATE(_core_m0, RK3288_DIV_ACLK_CORE_M0_MASK, \ ++ RK3288_DIV_ACLK_CORE_M0_SHIFT) | \ ++ HIWORD_UPDATE(_core_mp, RK3288_DIV_ACLK_CORE_MP_MASK, \ ++ RK3288_DIV_ACLK_CORE_MP_SHIFT), \ ++ } ++#define RK3288_CLKSEL37(_l2ram, _atclk, _pclk_dbg_pre) \ ++ { \ ++ .reg = RK3288_CLKSEL_CON(37), \ ++ .val = HIWORD_UPDATE(_l2ram, RK3288_DIV_L2RAM_MASK, \ ++ RK3288_DIV_L2RAM_SHIFT) | \ ++ HIWORD_UPDATE(_atclk, RK3288_DIV_ATCLK_MASK, \ ++ RK3288_DIV_ATCLK_SHIFT) | \ ++ HIWORD_UPDATE(_pclk_dbg_pre, \ ++ RK3288_DIV_PCLK_DBGPRE_MASK, \ ++ RK3288_DIV_PCLK_DBGPRE_SHIFT), \ ++ } ++ ++#define RK3288_CPUCLK_RATE(_prate, _core_m0, _core_mp, _l2ram, _atclk, _pdbg) \ ++ { \ ++ .prate = _prate, \ ++ .divs = { \ ++ RK3288_CLKSEL0(_core_m0, _core_mp), \ ++ RK3288_CLKSEL37(_l2ram, _atclk, _pdbg), \ ++ }, \ ++ } ++ ++static struct rockchip_cpuclk_rate_table rk3288_cpuclk_rates[] __initdata = { ++ RK3288_CPUCLK_RATE(1800000000, 1, 3, 1, 3, 3), ++ RK3288_CPUCLK_RATE(1704000000, 1, 3, 1, 3, 3), ++ RK3288_CPUCLK_RATE(1608000000, 1, 3, 1, 3, 3), ++ RK3288_CPUCLK_RATE(1512000000, 1, 3, 1, 3, 3), ++ RK3288_CPUCLK_RATE(1416000000, 1, 3, 1, 3, 3), ++ RK3288_CPUCLK_RATE(1200000000, 1, 3, 1, 3, 3), ++ RK3288_CPUCLK_RATE(1008000000, 1, 3, 1, 3, 3), ++ RK3288_CPUCLK_RATE( 816000000, 1, 3, 1, 3, 3), ++ RK3288_CPUCLK_RATE( 696000000, 1, 3, 1, 3, 3), ++ RK3288_CPUCLK_RATE( 600000000, 1, 3, 1, 3, 3), ++ RK3288_CPUCLK_RATE( 408000000, 1, 3, 1, 3, 3), ++ RK3288_CPUCLK_RATE( 312000000, 1, 3, 1, 3, 3), ++ RK3288_CPUCLK_RATE( 216000000, 1, 3, 1, 3, 3), ++ RK3288_CPUCLK_RATE( 126000000, 1, 3, 1, 3, 3), +}; + -+#define MFLAGS CLK_MUX_HIWORD_MASK -+#define DFLAGS CLK_DIVIDER_HIWORD_MASK -+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) ++static const struct rockchip_cpuclk_reg_data rk3288_cpuclk_data = { ++ .core_reg[0] = RK3288_CLKSEL_CON(0), ++ .div_core_shift[0] = 8, ++ .div_core_mask[0] = 0x1f, ++ .num_cores = 1, ++ .mux_core_alt = 1, ++ .mux_core_main = 0, ++ .mux_core_shift = 15, ++ .mux_core_mask = 0x1, ++}; + -+static struct rockchip_clk_branch rk3562_clk_sai0_fracmux __initdata = -+ MUX(CLK_SAI0, "clk_sai0", clk_sai0_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(3), 6, 2, MFLAGS); ++PNAME(mux_pll_p) = { "xin24m", "xin32k" }; ++PNAME(mux_ddrphy_p) = { "dpll_ddr", "gpll_ddr" }; ++PNAME(mux_aclk_cpu_src_p) = { "cpll_aclk_cpu", "gpll_aclk_cpu" }; + -+static struct rockchip_clk_branch rk3562_clk_sai1_fracmux __initdata = -+ MUX(CLK_SAI1, "clk_sai1", clk_sai1_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(5), 6, 2, MFLAGS); ++PNAME(mux_pll_src_cpll_gpll_p) = { "cpll", "gpll" }; ++PNAME(mux_pll_src_npll_cpll_gpll_p) = { "npll", "cpll", "gpll" }; ++PNAME(mux_pll_src_cpll_gpll_npll_p) = { "cpll", "gpll", "npll" }; ++PNAME(mux_pll_src_cpll_gpll_usb480m_p) = { "cpll", "gpll", "unstable:usbphy480m_src" }; ++PNAME(mux_pll_src_cpll_gll_usb_npll_p) = { "cpll", "gpll", "unstable:usbphy480m_src", "npll" }; + -+static struct rockchip_clk_branch rk3562_clk_sai2_fracmux __initdata = -+ MUX(CLK_SAI2, "clk_sai2", clk_sai2_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(8), 6, 2, MFLAGS); ++PNAME(mux_mmc_src_p) = { "cpll", "gpll", "xin24m", "xin24m" }; ++PNAME(mux_i2s_pre_p) = { "i2s_src", "i2s_frac", "ext_i2s", "xin12m" }; ++PNAME(mux_i2s_clkout_p) = { "i2s_pre", "xin12m" }; ++PNAME(mux_spdif_p) = { "spdif_pre", "spdif_frac", "xin12m" }; ++PNAME(mux_spdif_8ch_p) = { "spdif_8ch_pre", "spdif_8ch_frac", "xin12m" }; ++PNAME(mux_uart0_p) = { "uart0_src", "uart0_frac", "xin24m" }; ++PNAME(mux_uart1_p) = { "uart1_src", "uart1_frac", "xin24m" }; ++PNAME(mux_uart2_p) = { "uart2_src", "uart2_frac", "xin24m" }; ++PNAME(mux_uart3_p) = { "uart3_src", "uart3_frac", "xin24m" }; ++PNAME(mux_uart4_p) = { "uart4_src", "uart4_frac", "xin24m" }; ++PNAME(mux_vip_out_p) = { "vip_src", "xin24m" }; ++PNAME(mux_mac_p) = { "mac_pll_src", "ext_gmac" }; ++PNAME(mux_hsadcout_p) = { "hsadc_src", "ext_hsadc" }; ++PNAME(mux_edp_24m_p) = { "ext_edp_24m", "xin24m" }; ++PNAME(mux_tspout_p) = { "cpll", "gpll", "npll", "xin27m" }; + -+static struct rockchip_clk_branch rk3562_clk_spdif_fracmux __initdata = -+ MUX(CLK_SPDIF, "clk_spdif", clk_spdif_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(15), 6, 2, MFLAGS); ++PNAME(mux_aclk_vcodec_pre_p) = { "aclk_vdpu", "aclk_vepu" }; ++PNAME(mux_testout_src_p) = { "aclk_peri", "armclk", "aclk_vio0", "ddrphy", ++ "aclk_vcodec", "aclk_gpu", "sclk_rga", "aclk_cpu", ++ "xin24m", "xin27m", "xin32k", "clk_wifi", ++ "dclk_vop0", "dclk_vop1", "sclk_isp_jpe", ++ "sclk_isp" }; + -+static struct rockchip_clk_branch rk3562_clk_uart1_fracmux __initdata = -+ MUX(CLK_UART1, "clk_uart1", clk_uart1_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(21), 14, 2, MFLAGS); ++PNAME(mux_usbphy480m_p) = { "sclk_otgphy1_480m", "sclk_otgphy2_480m", ++ "sclk_otgphy0_480m" }; ++PNAME(mux_hsicphy480m_p) = { "cpll", "gpll", "usbphy480m_src" }; ++PNAME(mux_hsicphy12m_p) = { "hsicphy12m_xin12m", "hsicphy12m_usbphy" }; + -+static struct rockchip_clk_branch rk3562_clk_uart2_fracmux __initdata = -+ MUX(CLK_UART2, "clk_uart2", clk_uart2_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(23), 14, 2, MFLAGS); ++static struct rockchip_pll_clock rk3288_pll_clks[] __initdata = { ++ [apll] = PLL(pll_rk3066, PLL_APLL, "apll", mux_pll_p, 0, RK3288_PLL_CON(0), ++ RK3288_MODE_CON, 0, 6, 0, rk3288_pll_rates), ++ [dpll] = PLL(pll_rk3066, PLL_DPLL, "dpll", mux_pll_p, 0, RK3288_PLL_CON(4), ++ RK3288_MODE_CON, 4, 5, 0, NULL), ++ [cpll] = PLL(pll_rk3066, PLL_CPLL, "cpll", mux_pll_p, 0, RK3288_PLL_CON(8), ++ RK3288_MODE_CON, 8, 7, ROCKCHIP_PLL_SYNC_RATE, rk3288_pll_rates), ++ [gpll] = PLL(pll_rk3066, PLL_GPLL, "gpll", mux_pll_p, 0, RK3288_PLL_CON(12), ++ RK3288_MODE_CON, 12, 8, ROCKCHIP_PLL_SYNC_RATE, rk3288_pll_rates), ++ [npll] = PLL(pll_rk3066, PLL_NPLL, "npll", mux_pll_p, 0, RK3288_PLL_CON(16), ++ RK3288_MODE_CON, 14, 9, ROCKCHIP_PLL_SYNC_RATE, rk3288_pll_rates), ++}; + -+static struct rockchip_clk_branch rk3562_clk_uart3_fracmux __initdata = -+ MUX(CLK_UART3, "clk_uart3", clk_uart3_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(25), 14, 2, MFLAGS); ++static struct clk_div_table div_hclk_cpu_t[] = { ++ { .val = 0, .div = 1 }, ++ { .val = 1, .div = 2 }, ++ { .val = 3, .div = 4 }, ++ { /* sentinel */}, ++}; + -+static struct rockchip_clk_branch rk3562_clk_uart4_fracmux __initdata = -+ MUX(CLK_UART4, "clk_uart4", clk_uart4_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(27), 14, 2, MFLAGS); ++#define MFLAGS CLK_MUX_HIWORD_MASK ++#define DFLAGS CLK_DIVIDER_HIWORD_MASK ++#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) ++#define IFLAGS ROCKCHIP_INVERTER_HIWORD_MASK + -+static struct rockchip_clk_branch rk3562_clk_uart5_fracmux __initdata = -+ MUX(CLK_UART5, "clk_uart5", clk_uart5_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(29), 14, 2, MFLAGS); ++static struct rockchip_clk_branch rk3288_i2s_fracmux __initdata = ++ MUX(0, "i2s_pre", mux_i2s_pre_p, CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(4), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3562_clk_uart6_fracmux __initdata = -+ MUX(CLK_UART6, "clk_uart6", clk_uart6_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(31), 14, 2, MFLAGS); ++static struct rockchip_clk_branch rk3288_spdif_fracmux __initdata = ++ MUX(0, "spdif_mux", mux_spdif_p, CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(5), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3562_clk_uart7_fracmux __initdata = -+ MUX(CLK_UART7, "clk_uart7", clk_uart7_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(33), 14, 2, MFLAGS); ++static struct rockchip_clk_branch rk3288_spdif_8ch_fracmux __initdata = ++ MUX(0, "spdif_8ch_mux", mux_spdif_8ch_p, CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(40), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3562_clk_uart8_fracmux __initdata = -+ MUX(CLK_UART8, "clk_uart8", clk_uart8_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(35), 14, 2, MFLAGS); ++static struct rockchip_clk_branch rk3288_uart0_fracmux __initdata = ++ MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(13), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3562_clk_uart9_fracmux __initdata = -+ MUX(CLK_UART9, "clk_uart9", clk_uart9_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(37), 14, 2, MFLAGS); ++static struct rockchip_clk_branch rk3288_uart1_fracmux __initdata = ++ MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(14), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3562_rtc32k_pmu_fracmux __initdata = -+ MUX(CLK_RTC_32K, "clk_rtc_32k", clk_rtc32k_pmu_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3562_PMU0_CLKSEL_CON(1), 0, 2, MFLAGS); ++static struct rockchip_clk_branch rk3288_uart2_fracmux __initdata = ++ MUX(SCLK_UART2, "sclk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(15), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3562_clk_pmu1_uart0_fracmux __initdata = -+ MUX(CLK_PMU1_UART0, "clk_pmu1_uart0", clk_pmu1_uart0_p, CLK_SET_RATE_PARENT, -+ RK3562_PMU1_CLKSEL_CON(2), 6, 2, MFLAGS); ++static struct rockchip_clk_branch rk3288_uart3_fracmux __initdata = ++ MUX(SCLK_UART3, "sclk_uart3", mux_uart3_p, CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(16), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3562_clk_branches[] __initdata = { ++static struct rockchip_clk_branch rk3288_uart4_fracmux __initdata = ++ MUX(SCLK_UART4, "sclk_uart4", mux_uart4_p, CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(3), 8, 2, MFLAGS); ++ ++static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = { + /* -+ * CRU Clock-Architecture ++ * Clock-Architecture Diagram 1 + */ -+ /* PD_TOP */ -+ COMPOSITE(CLK_MATRIX_50M_SRC, "clk_matrix_50m_src", gpll_cpll_p, 0, -+ RK3562_CLKSEL_CON(0), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3562_CLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE(CLK_MATRIX_100M_SRC, "clk_matrix_100m_src", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_CLKSEL_CON(0), 15, 1, MFLAGS, 8, 4, DFLAGS, -+ RK3562_CLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE(CLK_MATRIX_125M_SRC, "clk_matrix_125m_src", gpll_cpll_p, 0, -+ RK3562_CLKSEL_CON(1), 7, 1, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE(CLK_MATRIX_200M_SRC, "clk_matrix_200m_src", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_CLKSEL_CON(2), 7, 1, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(0), 4, GFLAGS), -+ COMPOSITE(CLK_MATRIX_300M_SRC, "clk_matrix_300m_src", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_CLKSEL_CON(3), 7, 1, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(0), 6, GFLAGS), -+ COMPOSITE(ACLK_TOP, "aclk_top", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_CLKSEL_CON(5), 7, 1, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(1), 0, GFLAGS), -+ COMPOSITE(ACLK_TOP_VIO, "aclk_top_vio", gpll_cpll_p, 0, -+ RK3562_CLKSEL_CON(5), 15, 1, MFLAGS, 8, 4, DFLAGS, -+ RK3562_CLKGATE_CON(1), 1, GFLAGS), -+ COMPOSITE(CLK_24M_SSCSRC, "clk_24m_sscsrc", vpll_dmyhpll_gpll_cpll_p, 0, -+ RK3562_CLKSEL_CON(6), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3562_CLKGATE_CON(1), 9, GFLAGS), -+ COMPOSITE(CLK_CAM0_OUT2IO, "clk_cam0_out2io", gpll_cpll_xin24m_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(8), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3562_CLKGATE_CON(1), 12, GFLAGS), -+ COMPOSITE(CLK_CAM1_OUT2IO, "clk_cam1_out2io", gpll_cpll_xin24m_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(8), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3562_CLKGATE_CON(1), 13, GFLAGS), -+ COMPOSITE(CLK_CAM2_OUT2IO, "clk_cam2_out2io", gpll_cpll_xin24m_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(9), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3562_CLKGATE_CON(1), 14, GFLAGS), -+ COMPOSITE(CLK_CAM3_OUT2IO, "clk_cam3_out2io", gpll_cpll_xin24m_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(9), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3562_CLKGATE_CON(1), 15, GFLAGS), -+ FACTOR(0, "xin_osc0_half", "xin24m", 0, 1, 2), + -+ /* PD_BUS */ -+ COMPOSITE(ACLK_BUS, "aclk_bus", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_CLKSEL_CON(40), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3562_CLKGATE_CON(18), 0, GFLAGS), -+ COMPOSITE(HCLK_BUS, "hclk_bus", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_CLKSEL_CON(40), 15, 1, MFLAGS, 8, 6, DFLAGS, -+ RK3562_CLKGATE_CON(18), 1, GFLAGS), -+ COMPOSITE(PCLK_BUS, "pclk_bus", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_CLKSEL_CON(41), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3562_CLKGATE_CON(18), 2, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(19), 0, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(19), 1, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(19), 2, GFLAGS), -+ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(19), 3, GFLAGS), -+ GATE(PCLK_I2C5, "pclk_i2c5", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(19), 4, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C, "clk_i2c", mux_200m_100m_50m_xin24m_p, 0, -+ RK3562_CLKSEL_CON(41), 8, 2, MFLAGS, -+ RK3562_CLKGATE_CON(19), 5, GFLAGS), -+ GATE(CLK_I2C1, "clk_i2c1", "clk_i2c", 0, -+ RK3562_CLKGATE_CON(19), 6, GFLAGS), -+ GATE(CLK_I2C2, "clk_i2c2", "clk_i2c", 0, -+ RK3562_CLKGATE_CON(19), 7, GFLAGS), -+ GATE(CLK_I2C3, "clk_i2c3", "clk_i2c", 0, -+ RK3562_CLKGATE_CON(19), 8, GFLAGS), -+ GATE(CLK_I2C4, "clk_i2c4", "clk_i2c", 0, -+ RK3562_CLKGATE_CON(19), 9, GFLAGS), -+ GATE(CLK_I2C5, "clk_i2c5", "clk_i2c", 0, -+ RK3562_CLKGATE_CON(19), 10, GFLAGS), -+ COMPOSITE_NODIV(DCLK_BUS_GPIO, "dclk_bus_gpio", mux_xin24m_32k_p, 0, -+ RK3562_CLKSEL_CON(41), 15, 1, MFLAGS, -+ RK3562_CLKGATE_CON(20), 4, GFLAGS), -+ GATE(DCLK_BUS_GPIO3, "dclk_bus_gpio3", "dclk_bus_gpio", 0, -+ RK3562_CLKGATE_CON(20), 5, GFLAGS), -+ GATE(DCLK_BUS_GPIO4, "dclk_bus_gpio4", "dclk_bus_gpio", 0, -+ RK3562_CLKGATE_CON(20), 6, GFLAGS), -+ GATE(PCLK_TIMER, "pclk_timer", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(21), 0, GFLAGS), -+ GATE(CLK_TIMER0, "clk_timer0", "xin24m", 0, -+ RK3562_CLKGATE_CON(21), 1, GFLAGS), -+ GATE(CLK_TIMER1, "clk_timer1", "xin24m", 0, -+ RK3562_CLKGATE_CON(21), 2, GFLAGS), -+ GATE(CLK_TIMER2, "clk_timer2", "xin24m", 0, -+ RK3562_CLKGATE_CON(21), 3, GFLAGS), -+ GATE(CLK_TIMER3, "clk_timer3", "xin24m", 0, -+ RK3562_CLKGATE_CON(21), 4, GFLAGS), -+ GATE(CLK_TIMER4, "clk_timer4", "xin24m", 0, -+ RK3562_CLKGATE_CON(21), 5, GFLAGS), -+ GATE(CLK_TIMER5, "clk_timer5", "xin24m", 0, -+ RK3562_CLKGATE_CON(21), 6, GFLAGS), -+ GATE(PCLK_STIMER, "pclk_stimer", "pclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(21), 7, GFLAGS), -+ GATE(CLK_STIMER0, "clk_stimer0", "xin24m", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(21), 8, GFLAGS), -+ GATE(CLK_STIMER1, "clk_stimer1", "xin24m", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(21), 9, GFLAGS), -+ GATE(PCLK_WDTNS, "pclk_wdtns", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(22), 0, GFLAGS), -+ GATE(CLK_WDTNS, "clk_wdtns", "xin24m", 0, -+ RK3562_CLKGATE_CON(22), 1, GFLAGS), -+ GATE(PCLK_GRF, "pclk_grf", "pclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(22), 2, GFLAGS), -+ GATE(PCLK_SGRF, "pclk_sgrf", "pclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(22), 3, GFLAGS), -+ GATE(PCLK_MAILBOX, "pclk_mailbox", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(22), 4, GFLAGS), -+ GATE(PCLK_INTC, "pclk_intc", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(22), 5, GFLAGS), -+ GATE(ACLK_BUS_GIC400, "aclk_bus_gic400", "aclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(22), 6, GFLAGS), -+ GATE(ACLK_BUS_SPINLOCK, "aclk_bus_spinlock", "aclk_bus", 0, -+ RK3562_CLKGATE_CON(23), 0, GFLAGS), -+ GATE(ACLK_DCF, "aclk_dcf", "aclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(23), 1, GFLAGS), -+ GATE(PCLK_DCF, "pclk_dcf", "pclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(23), 2, GFLAGS), -+ GATE(FCLK_BUS_CM0_CORE, "fclk_bus_cm0_core", "hclk_bus", 0, -+ RK3562_CLKGATE_CON(23), 3, GFLAGS), -+ GATE(CLK_BUS_CM0_RTC, "clk_bus_cm0_rtc", "clk_rtc_32k", 0, -+ RK3562_CLKGATE_CON(23), 4, GFLAGS), -+ GATE(HCLK_ICACHE, "hclk_icache", "hclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(23), 8, GFLAGS), -+ GATE(HCLK_DCACHE, "hclk_dcache", "hclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(23), 9, GFLAGS), -+ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(24), 0, GFLAGS), -+ COMPOSITE_NOMUX(CLK_TSADC, "clk_tsadc", "xin24m", 0, -+ RK3562_CLKSEL_CON(43), 0, 11, DFLAGS, -+ RK3562_CLKGATE_CON(24), 1, GFLAGS), -+ COMPOSITE_NOMUX(CLK_TSADC_TSEN, "clk_tsadc_tsen", "xin24m", 0, -+ RK3562_CLKSEL_CON(43), 11, 5, DFLAGS, -+ RK3562_CLKGATE_CON(24), 3, GFLAGS), -+ GATE(PCLK_DFT2APB, "pclk_dft2apb", "pclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(24), 4, GFLAGS), -+ COMPOSITE_NOMUX(CLK_SARADC_VCCIO156, "clk_saradc_vccio156", "xin24m", 0, -+ RK3562_CLKSEL_CON(44), 0, 12, DFLAGS, -+ RK3562_CLKGATE_CON(24), 9, GFLAGS), -+ GATE(PCLK_GMAC, "pclk_gmac", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(25), 0, GFLAGS), -+ GATE(ACLK_GMAC, "aclk_gmac", "aclk_bus", 0, -+ RK3562_CLKGATE_CON(25), 1, GFLAGS), -+ COMPOSITE_NODIV(CLK_GMAC_125M_CRU_I, "clk_gmac_125m_cru_i", mux_125m_xin24m_p, 0, -+ RK3562_CLKSEL_CON(45), 8, 1, MFLAGS, -+ RK3562_CLKGATE_CON(25), 2, GFLAGS), -+ COMPOSITE_NODIV(CLK_GMAC_50M_CRU_I, "clk_gmac_50m_cru_i", mux_50m_xin24m_p, 0, -+ RK3562_CLKSEL_CON(45), 7, 1, MFLAGS, -+ RK3562_CLKGATE_CON(25), 3, GFLAGS), -+ COMPOSITE(CLK_GMAC_ETH_OUT2IO, "clk_gmac_eth_out2io", gpll_cpll_p, 0, -+ RK3562_CLKSEL_CON(46), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_CLKGATE_CON(25), 4, GFLAGS), -+ GATE(PCLK_APB2ASB_VCCIO156, "pclk_apb2asb_vccio156", "pclk_bus", CLK_IS_CRITICAL, -+ RK3562_CLKGATE_CON(25), 5, GFLAGS), -+ GATE(PCLK_TO_VCCIO156, "pclk_to_vccio156", "pclk_bus", CLK_IS_CRITICAL, -+ RK3562_CLKGATE_CON(25), 6, GFLAGS), -+ GATE(PCLK_DSIPHY, "pclk_dsiphy", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(25), 8, GFLAGS), -+ GATE(PCLK_DSITX, "pclk_dsitx", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(25), 9, GFLAGS), -+ GATE(PCLK_CPU_EMA_DET, "pclk_cpu_ema_det", "pclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(25), 10, GFLAGS), -+ GATE(PCLK_HASH, "pclk_hash", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(25), 11, GFLAGS), -+ GATE(PCLK_TOPCRU, "pclk_topcru", "pclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(25), 15, GFLAGS), -+ GATE(PCLK_ASB2APB_VCCIO156, "pclk_asb2apb_vccio156", "pclk_to_vccio156", CLK_IS_CRITICAL, -+ RK3562_CLKGATE_CON(26), 0, GFLAGS), -+ GATE(PCLK_IOC_VCCIO156, "pclk_ioc_vccio156", "pclk_to_vccio156", CLK_IS_CRITICAL, -+ RK3562_CLKGATE_CON(26), 1, GFLAGS), -+ GATE(PCLK_GPIO3_VCCIO156, "pclk_gpio3_vccio156", "pclk_to_vccio156", 0, -+ RK3562_CLKGATE_CON(26), 2, GFLAGS), -+ GATE(PCLK_GPIO4_VCCIO156, "pclk_gpio4_vccio156", "pclk_to_vccio156", 0, -+ RK3562_CLKGATE_CON(26), 3, GFLAGS), -+ GATE(PCLK_SARADC_VCCIO156, "pclk_saradc_vccio156", "pclk_to_vccio156", 0, -+ RK3562_CLKGATE_CON(26), 4, GFLAGS), -+ GATE(PCLK_MAC100, "pclk_mac100", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(27), 0, GFLAGS), -+ GATE(ACLK_MAC100, "aclk_mac100", "aclk_bus", 0, -+ RK3562_CLKGATE_CON(27), 1, GFLAGS), -+ COMPOSITE_NODIV(CLK_MAC100_50M_MATRIX, "clk_mac100_50m_matrix", mux_50m_xin24m_p, 0, -+ RK3562_CLKSEL_CON(47), 7, 1, MFLAGS, -+ RK3562_CLKGATE_CON(27), 2, GFLAGS), ++ GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED, ++ RK3288_CLKGATE_CON(0), 1, GFLAGS), ++ GATE(0, "gpll_core", "gpll", CLK_IGNORE_UNUSED, ++ RK3288_CLKGATE_CON(0), 2, GFLAGS), + -+ /* PD_CORE */ -+ COMPOSITE_NOMUX(0, "aclk_core_pre", "scmi_clk_cpu", CLK_IGNORE_UNUSED, -+ RK3562_CLKSEL_CON(11), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3562_CLKGATE_CON(4), 3, GFLAGS), -+ COMPOSITE_NOMUX(0, "pclk_dbg_pre", "scmi_clk_cpu", CLK_IGNORE_UNUSED, -+ RK3562_CLKSEL_CON(12), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3562_CLKGATE_CON(4), 5, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_CORE, "hclk_core", "gpll", CLK_IS_CRITICAL, -+ RK3562_CLKSEL_CON(13), 0, 6, DFLAGS, -+ RK3562_CLKGATE_CON(5), 2, GFLAGS), -+ GATE(0, "pclk_dbg_daplite", "pclk_dbg_pre", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(4), 10, GFLAGS), ++ COMPOSITE_NOMUX(0, "armcore0", "armclk", CLK_IGNORE_UNUSED, ++ RK3288_CLKSEL_CON(36), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3288_CLKGATE_CON(12), 0, GFLAGS), ++ COMPOSITE_NOMUX(0, "armcore1", "armclk", CLK_IGNORE_UNUSED, ++ RK3288_CLKSEL_CON(36), 4, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3288_CLKGATE_CON(12), 1, GFLAGS), ++ COMPOSITE_NOMUX(0, "armcore2", "armclk", CLK_IGNORE_UNUSED, ++ RK3288_CLKSEL_CON(36), 8, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3288_CLKGATE_CON(12), 2, GFLAGS), ++ COMPOSITE_NOMUX(0, "armcore3", "armclk", CLK_IGNORE_UNUSED, ++ RK3288_CLKSEL_CON(36), 12, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3288_CLKGATE_CON(12), 3, GFLAGS), ++ COMPOSITE_NOMUX(0, "l2ram", "armclk", CLK_IGNORE_UNUSED, ++ RK3288_CLKSEL_CON(37), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3288_CLKGATE_CON(12), 4, GFLAGS), ++ COMPOSITE_NOMUX(0, "aclk_core_m0", "armclk", CLK_IGNORE_UNUSED, ++ RK3288_CLKSEL_CON(0), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3288_CLKGATE_CON(12), 5, GFLAGS), ++ COMPOSITE_NOMUX(0, "aclk_core_mp", "armclk", CLK_IGNORE_UNUSED, ++ RK3288_CLKSEL_CON(0), 4, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3288_CLKGATE_CON(12), 6, GFLAGS), ++ COMPOSITE_NOMUX(0, "atclk", "armclk", 0, ++ RK3288_CLKSEL_CON(37), 4, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3288_CLKGATE_CON(12), 7, GFLAGS), ++ COMPOSITE_NOMUX(0, "pclk_dbg_pre", "armclk", CLK_IGNORE_UNUSED, ++ RK3288_CLKSEL_CON(37), 9, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3288_CLKGATE_CON(12), 8, GFLAGS), ++ GATE(0, "pclk_dbg", "pclk_dbg_pre", 0, ++ RK3288_CLKGATE_CON(12), 9, GFLAGS), ++ GATE(0, "cs_dbg", "pclk_dbg_pre", CLK_IGNORE_UNUSED, ++ RK3288_CLKGATE_CON(12), 10, GFLAGS), ++ GATE(0, "pclk_core_niu", "pclk_dbg_pre", 0, ++ RK3288_CLKGATE_CON(12), 11, GFLAGS), + -+ /* PD_DDR */ -+ FACTOR_GATE(0, "clk_gpll_mux_to_ddr", "gpll", 0, 1, 4, -+ RK3328_CLKGATE_CON(1), 6, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_DDR, "pclk_ddr", "clk_gpll_mux_to_ddr", CLK_IS_CRITICAL, -+ RK3562_DDR_CLKSEL_CON(1), 8, 5, DFLAGS, -+ RK3562_DDR_CLKGATE_CON(0), 3, GFLAGS), -+ COMPOSITE_NOMUX(CLK_MSCH_BRG_BIU, "clk_msch_brg_biu", "clk_gpll_mux_to_ddr", CLK_IS_CRITICAL, -+ RK3562_DDR_CLKSEL_CON(1), 0, 4, DFLAGS, -+ RK3562_DDR_CLKGATE_CON(0), 4, GFLAGS), -+ GATE(PCLK_DDR_HWLP, "pclk_ddr_hwlp", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(0), 6, GFLAGS), -+ GATE(PCLK_DDR_UPCTL, "pclk_ddr_upctl", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(0), 7, GFLAGS), -+ GATE(PCLK_DDR_PHY, "pclk_ddr_phy", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(0), 8, GFLAGS), -+ GATE(PCLK_DDR_DFICTL, "pclk_ddr_dfictl", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(0), 9, GFLAGS), -+ GATE(PCLK_DDR_DMA2DDR, "pclk_ddr_dma2ddr", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(0), 10, GFLAGS), -+ GATE(PCLK_DDR_MON, "pclk_ddr_mon", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(1), 0, GFLAGS), -+ GATE(TMCLK_DDR_MON, "tmclk_ddr_mon", "xin24m", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(1), 1, GFLAGS), -+ GATE(PCLK_DDR_GRF, "pclk_ddr_grf", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(1), 2, GFLAGS), -+ GATE(PCLK_DDR_CRU, "pclk_ddr_cru", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(1), 3, GFLAGS), -+ GATE(PCLK_SUBDDR_CRU, "pclk_subddr_cru", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(1), 4, GFLAGS), ++ GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED, ++ RK3288_CLKGATE_CON(0), 8, GFLAGS), ++ GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED, ++ RK3288_CLKGATE_CON(0), 9, GFLAGS), ++ COMPOSITE_DDRCLK(SCLK_DDRCLK, "sclk_ddrc", mux_ddrphy_p, 0, ++ RK3288_CLKSEL_CON(26), 2, 1, 0, 0, ++ ROCKCHIP_DDRCLK_SIP_V2), ++ COMPOSITE_NOGATE(0, "ddrphy", mux_ddrphy_p, CLK_IGNORE_UNUSED, ++ RK3288_CLKSEL_CON(26), 2, 1, MFLAGS, 0, 2, ++ DFLAGS | CLK_DIVIDER_POWER_OF_TWO), + -+ /* PD_GPU */ -+ COMPOSITE(CLK_GPU_PRE, "clk_gpu_pre", gpll_cpll_p, 0, -+ RK3562_CLKSEL_CON(18), 7, 1, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(8), 0, GFLAGS), -+ COMPOSITE_NOMUX(ACLK_GPU_PRE, "aclk_gpu_pre", "clk_gpu_pre", 0, -+ RK3562_CLKSEL_CON(19), 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(8), 2, GFLAGS), -+ GATE(CLK_GPU, "clk_gpu", "clk_gpu_pre", 0, -+ RK3562_CLKGATE_CON(8), 4, GFLAGS), -+ COMPOSITE_NODIV(CLK_GPU_BRG, "clk_gpu_brg", mux_200m_100m_p, 0, -+ RK3562_CLKSEL_CON(19), 15, 1, MFLAGS, -+ RK3562_CLKGATE_CON(8), 8, GFLAGS), ++ GATE(0, "gpll_aclk_cpu", "gpll", CLK_IS_CRITICAL, ++ RK3288_CLKGATE_CON(0), 10, GFLAGS), ++ GATE(0, "cpll_aclk_cpu", "cpll", CLK_IS_CRITICAL, ++ RK3288_CLKGATE_CON(0), 11, GFLAGS), ++ COMPOSITE_NOGATE(0, "aclk_cpu_src", mux_aclk_cpu_src_p, CLK_IS_CRITICAL, ++ RK3288_CLKSEL_CON(1), 15, 1, MFLAGS, 3, 5, DFLAGS), ++ DIV(0, "aclk_cpu_pre", "aclk_cpu_src", CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(1), 0, 3, DFLAGS), ++ GATE(ACLK_CPU, "aclk_cpu", "aclk_cpu_pre", CLK_IS_CRITICAL, ++ RK3288_CLKGATE_CON(0), 3, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_CPU, "pclk_cpu", "aclk_cpu_pre", CLK_IS_CRITICAL, ++ RK3288_CLKSEL_CON(1), 12, 3, DFLAGS, ++ RK3288_CLKGATE_CON(0), 5, GFLAGS), ++ COMPOSITE_NOMUX_DIVTBL(HCLK_CPU, "hclk_cpu", "aclk_cpu_pre", CLK_IS_CRITICAL, ++ RK3288_CLKSEL_CON(1), 8, 2, DFLAGS, div_hclk_cpu_t, ++ RK3288_CLKGATE_CON(0), 4, GFLAGS), ++ GATE(0, "c2c_host", "aclk_cpu_src", 0, ++ RK3288_CLKGATE_CON(13), 8, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_CRYPTO, "crypto", "aclk_cpu_pre", 0, ++ RK3288_CLKSEL_CON(26), 6, 2, DFLAGS, ++ RK3288_CLKGATE_CON(5), 4, GFLAGS), ++ GATE(0, "aclk_bus_2pmu", "aclk_cpu_pre", CLK_IGNORE_UNUSED, ++ RK3288_CLKGATE_CON(0), 7, GFLAGS), + -+ /* PD_NPU */ -+ COMPOSITE(CLK_NPU_PRE, "clk_npu_pre", gpll_cpll_p, 0, -+ RK3562_CLKSEL_CON(15), 7, 1, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(6), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_NPU_PRE, "hclk_npu_pre", "clk_npu_pre", 0, -+ RK3562_CLKSEL_CON(16), 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(6), 1, GFLAGS), -+ GATE(ACLK_RKNN, "aclk_rknn", "clk_npu_pre", 0, -+ RK3562_CLKGATE_CON(6), 4, GFLAGS), -+ GATE(HCLK_RKNN, "hclk_rknn", "hclk_npu_pre", 0, -+ RK3562_CLKGATE_CON(6), 5, GFLAGS), ++ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), + -+ /* PD_PERI */ -+ COMPOSITE(ACLK_PERI, "aclk_peri", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_PERI_CLKSEL_CON(0), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(1), 0, GFLAGS), -+ COMPOSITE(HCLK_PERI, "hclk_peri", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_PERI_CLKSEL_CON(0), 15, 1, MFLAGS, 8, 6, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(1), 1, GFLAGS), -+ COMPOSITE(PCLK_PERI, "pclk_peri", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_PERI_CLKSEL_CON(1), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(1), 2, GFLAGS), -+ GATE(PCLK_PERICRU, "pclk_pericru", "pclk_peri", CLK_IGNORE_UNUSED, -+ RK3562_PERI_CLKGATE_CON(1), 6, GFLAGS), -+ GATE(HCLK_SAI0, "hclk_sai0", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(2), 0, GFLAGS), -+ COMPOSITE(CLK_SAI0_SRC, "clk_sai0_src", gpll_cpll_hpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(1), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(2), 1, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_SAI0_FRAC, "clk_sai0_frac", "clk_sai0_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(2), 0, -+ RK3562_PERI_CLKGATE_CON(2), 2, GFLAGS, -+ &rk3562_clk_sai0_fracmux), -+ GATE(MCLK_SAI0, "mclk_sai0", "clk_sai0", 0, -+ RK3562_PERI_CLKGATE_CON(2), 3, GFLAGS), -+ COMPOSITE_NODIV(MCLK_SAI0_OUT2IO, "mclk_sai0_out2io", mclk_sai0_out2io_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(3), 5, 1, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(2), 4, GFLAGS), -+ GATE(HCLK_SAI1, "hclk_sai1", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(2), 5, GFLAGS), -+ COMPOSITE(CLK_SAI1_SRC, "clk_sai1_src", gpll_cpll_hpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(3), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(2), 6, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_SAI1_FRAC, "clk_sai1_frac", "clk_sai1_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(4), 0, -+ RK3562_PERI_CLKGATE_CON(2), 7, GFLAGS, -+ &rk3562_clk_sai1_fracmux), -+ GATE(MCLK_SAI1, "mclk_sai1", "clk_sai1", 0, -+ RK3562_PERI_CLKGATE_CON(2), 8, GFLAGS), -+ COMPOSITE_NODIV(MCLK_SAI1_OUT2IO, "mclk_sai1_out2io", mclk_sai1_out2io_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(5), 5, 1, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(2), 9, GFLAGS), -+ GATE(HCLK_SAI2, "hclk_sai2", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(2), 10, GFLAGS), -+ COMPOSITE(CLK_SAI2_SRC, "clk_sai2_src", gpll_cpll_hpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(6), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(2), 11, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_SAI2_FRAC, "clk_sai2_frac", "clk_sai2_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(7), 0, -+ RK3562_PERI_CLKGATE_CON(2), 12, GFLAGS, -+ &rk3562_clk_sai2_fracmux), -+ GATE(MCLK_SAI2, "mclk_sai2", "clk_sai2", 0, -+ RK3562_PERI_CLKGATE_CON(2), 13, GFLAGS), -+ COMPOSITE_NODIV(MCLK_SAI2_OUT2IO, "mclk_sai2_out2io", mclk_sai2_out2io_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(8), 5, 1, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(2), 14, GFLAGS), -+ GATE(HCLK_DSM, "hclk_dsm", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(3), 1, GFLAGS), -+ GATE(CLK_DSM, "clk_dsm", "mclk_sai1", 0, -+ RK3562_PERI_CLKGATE_CON(3), 2, GFLAGS), -+ GATE(HCLK_PDM, "hclk_pdm", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(3), 4, GFLAGS), -+ COMPOSITE(MCLK_PDM, "mclk_pdm", gpll_cpll_hpll_xin24m_p, 0, -+ RK3562_PERI_CLKSEL_CON(12), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(3), 5, GFLAGS), -+ GATE(HCLK_SPDIF, "hclk_spdif", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(3), 8, GFLAGS), -+ COMPOSITE(CLK_SPDIF_SRC, "clk_spdif_src", gpll_cpll_hpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(13), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(3), 9, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_SPDIF_FRAC, "clk_spdif_frac", "clk_spdif_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(14), 0, -+ RK3562_PERI_CLKGATE_CON(3), 10, GFLAGS, -+ &rk3562_clk_spdif_fracmux), -+ GATE(MCLK_SPDIF, "mclk_spdif", "clk_spdif", 0, -+ RK3562_PERI_CLKGATE_CON(3), 11, GFLAGS), -+ GATE(HCLK_SDMMC0, "hclk_sdmmc0", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(4), 0, GFLAGS), -+ COMPOSITE(CCLK_SDMMC0, "cclk_sdmmc0", gpll_cpll_xin24m_dmyhpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(16), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(4), 1, GFLAGS), -+ MMC(SCLK_SDMMC0_DRV, "sdmmc0_drv", "cclk_sdmmc0", RK3562_SDMMC0_CON0, 1), -+ MMC(SCLK_SDMMC0_SAMPLE, "sdmmc0_sample", "cclk_sdmmc0", RK3562_SDMMC0_CON1, 1), -+ GATE(HCLK_SDMMC1, "hclk_sdmmc1", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(4), 2, GFLAGS), -+ COMPOSITE(CCLK_SDMMC1, "cclk_sdmmc1", gpll_cpll_xin24m_dmyhpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(17), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(4), 3, GFLAGS), -+ MMC(SCLK_SDMMC1_DRV, "sdmmc1_drv", "cclk_sdmmc1", RK3562_SDMMC1_CON0, 1), -+ MMC(SCLK_SDMMC1_SAMPLE, "sdmmc1_sample", "cclk_sdmmc1", RK3562_SDMMC1_CON1, 1), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(4), 8, GFLAGS), -+ GATE(ACLK_EMMC, "aclk_emmc", "aclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(4), 9, GFLAGS), -+ COMPOSITE(CCLK_EMMC, "cclk_emmc", gpll_cpll_xin24m_dmyhpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(18), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(4), 10, GFLAGS), -+ COMPOSITE(BCLK_EMMC, "bclk_emmc", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(19), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(4), 11, GFLAGS), -+ GATE(TMCLK_EMMC, "tmclk_emmc", "xin24m", 0, -+ RK3562_PERI_CLKGATE_CON(4), 12, GFLAGS), -+ COMPOSITE(SCLK_SFC, "sclk_sfc", gpll_cpll_xin24m_p, 0, -+ RK3562_PERI_CLKSEL_CON(20), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(4), 13, GFLAGS), -+ GATE(HCLK_SFC, "hclk_sfc", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(4), 14, GFLAGS), -+ GATE(HCLK_USB2HOST, "hclk_usb2host", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(5), 0, GFLAGS), -+ GATE(HCLK_USB2HOST_ARB, "hclk_usb2host_arb", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(5), 1, GFLAGS), -+ GATE(PCLK_SPI1, "pclk_spi1", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(6), 0, GFLAGS), -+ COMPOSITE_NODIV(CLK_SPI1, "clk_spi1", mux_200m_100m_50m_xin24m_p, 0, -+ RK3562_PERI_CLKSEL_CON(20), 12, 2, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(6), 1, GFLAGS), -+ GATE(SCLK_IN_SPI1, "sclk_in_spi1", "sclk_in_spi1_io", 0, -+ RK3562_PERI_CLKGATE_CON(6), 2, GFLAGS), -+ GATE(PCLK_SPI2, "pclk_spi2", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(6), 3, GFLAGS), -+ COMPOSITE_NODIV(CLK_SPI2, "clk_spi2", mux_200m_100m_50m_xin24m_p, 0, -+ RK3562_PERI_CLKSEL_CON(20), 14, 2, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(6), 4, GFLAGS), -+ GATE(SCLK_IN_SPI2, "sclk_in_spi2", "sclk_in_spi2_io", 0, -+ RK3562_PERI_CLKGATE_CON(6), 5, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(7), 0, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(7), 1, GFLAGS), -+ GATE(PCLK_UART3, "pclk_uart3", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(7), 2, GFLAGS), -+ GATE(PCLK_UART4, "pclk_uart4", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(7), 3, GFLAGS), -+ GATE(PCLK_UART5, "pclk_uart5", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(7), 4, GFLAGS), -+ GATE(PCLK_UART6, "pclk_uart6", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(7), 5, GFLAGS), -+ GATE(PCLK_UART7, "pclk_uart7", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(7), 6, GFLAGS), -+ GATE(PCLK_UART8, "pclk_uart8", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(7), 7, GFLAGS), -+ GATE(PCLK_UART9, "pclk_uart9", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(7), 8, GFLAGS), -+ COMPOSITE(CLK_UART1_SRC, "clk_uart1_src", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(21), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(7), 9, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART1_FRAC, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(22), 0, -+ RK3562_PERI_CLKGATE_CON(7), 10, GFLAGS, -+ &rk3562_clk_uart1_fracmux), -+ GATE(SCLK_UART1, "sclk_uart1", "clk_uart1", 0, -+ RK3562_PERI_CLKGATE_CON(7), 11, GFLAGS), -+ COMPOSITE(CLK_UART2_SRC, "clk_uart2_src", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(23), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(7), 12, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART2_FRAC, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(24), 0, -+ RK3562_PERI_CLKGATE_CON(7), 13, GFLAGS, -+ &rk3562_clk_uart2_fracmux), -+ GATE(SCLK_UART2, "sclk_uart2", "clk_uart2", 0, -+ RK3562_PERI_CLKGATE_CON(7), 14, GFLAGS), -+ COMPOSITE(CLK_UART3_SRC, "clk_uart3_src", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(25), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(7), 15, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART3_FRAC, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(26), 0, -+ RK3562_PERI_CLKGATE_CON(8), 0, GFLAGS, -+ &rk3562_clk_uart3_fracmux), -+ GATE(SCLK_UART3, "sclk_uart3", "clk_uart3", 0, -+ RK3562_PERI_CLKGATE_CON(8), 1, GFLAGS), -+ COMPOSITE(CLK_UART4_SRC, "clk_uart4_src", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(27), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(8), 2, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART4_FRAC, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(28), 0, -+ RK3562_PERI_CLKGATE_CON(8), 3, GFLAGS, -+ &rk3562_clk_uart4_fracmux), -+ GATE(SCLK_UART4, "sclk_uart4", "clk_uart4", 0, -+ RK3562_PERI_CLKGATE_CON(8), 4, GFLAGS), -+ COMPOSITE(CLK_UART5_SRC, "clk_uart5_src", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(29), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(8), 5, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART5_FRAC, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(30), 0, -+ RK3562_PERI_CLKGATE_CON(8), 6, GFLAGS, -+ &rk3562_clk_uart5_fracmux), -+ GATE(SCLK_UART5, "sclk_uart5", "clk_uart5", 0, -+ RK3562_PERI_CLKGATE_CON(8), 7, GFLAGS), -+ COMPOSITE(CLK_UART6_SRC, "clk_uart6_src", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(31), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(8), 8, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART6_FRAC, "clk_uart6_frac", "clk_uart6_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(32), 0, -+ RK3562_PERI_CLKGATE_CON(8), 9, GFLAGS, -+ &rk3562_clk_uart6_fracmux), -+ GATE(SCLK_UART6, "sclk_uart6", "clk_uart6", 0, -+ RK3562_PERI_CLKGATE_CON(8), 10, GFLAGS), -+ COMPOSITE(CLK_UART7_SRC, "clk_uart7_src", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(33), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(8), 11, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART7_FRAC, "clk_uart7_frac", "clk_uart7_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(34), 0, -+ RK3562_PERI_CLKGATE_CON(8), 12, GFLAGS, -+ &rk3562_clk_uart7_fracmux), -+ GATE(SCLK_UART7, "sclk_uart7", "clk_uart7", 0, -+ RK3562_PERI_CLKGATE_CON(8), 13, GFLAGS), -+ COMPOSITE(CLK_UART8_SRC, "clk_uart8_src", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(35), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(8), 14, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART8_FRAC, "clk_uart8_frac", "clk_uart8_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(36), 0, -+ RK3562_PERI_CLKGATE_CON(8), 15, GFLAGS, -+ &rk3562_clk_uart8_fracmux), -+ GATE(SCLK_UART8, "sclk_uart8", "clk_uart8", 0, -+ RK3562_PERI_CLKGATE_CON(9), 0, GFLAGS), -+ COMPOSITE(CLK_UART9_SRC, "clk_uart9_src", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(37), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(9), 1, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART9_FRAC, "clk_uart9_frac", "clk_uart9_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(38), 0, -+ RK3562_PERI_CLKGATE_CON(9), 2, GFLAGS, -+ &rk3562_clk_uart9_fracmux), -+ GATE(SCLK_UART9, "sclk_uart9", "clk_uart9", 0, -+ RK3562_PERI_CLKGATE_CON(9), 3, GFLAGS), -+ GATE(PCLK_PWM1_PERI, "pclk_pwm1_peri", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(10), 0, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM1_PERI, "clk_pwm1_peri", mux_100m_50m_xin24m_p, 0, -+ RK3562_PERI_CLKSEL_CON(40), 0, 2, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(10), 1, GFLAGS), -+ GATE(CLK_CAPTURE_PWM1_PERI, "clk_capture_pwm1_peri", "xin24m", 0, -+ RK3562_PERI_CLKGATE_CON(10), 2, GFLAGS), -+ GATE(PCLK_PWM2_PERI, "pclk_pwm2_peri", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(10), 3, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM2_PERI, "clk_pwm2_peri", mux_100m_50m_xin24m_p, 0, -+ RK3562_PERI_CLKSEL_CON(40), 6, 2, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(10), 4, GFLAGS), -+ GATE(CLK_CAPTURE_PWM2_PERI, "clk_capture_pwm2_peri", "xin24m", 0, -+ RK3562_PERI_CLKGATE_CON(10), 5, GFLAGS), -+ GATE(PCLK_PWM3_PERI, "pclk_pwm3_peri", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(10), 6, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM3_PERI, "clk_pwm3_peri", mux_100m_50m_xin24m_p, 0, -+ RK3562_PERI_CLKSEL_CON(40), 8, 2, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(10), 7, GFLAGS), -+ GATE(CLK_CAPTURE_PWM3_PERI, "clk_capture_pwm3_peri", "xin24m", 0, -+ RK3562_PERI_CLKGATE_CON(10), 8, GFLAGS), -+ GATE(PCLK_CAN0, "pclk_can0", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(11), 0, GFLAGS), -+ COMPOSITE(CLK_CAN0, "clk_can0", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(41), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(11), 1, GFLAGS), -+ GATE(PCLK_CAN1, "pclk_can1", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(11), 2, GFLAGS), -+ COMPOSITE(CLK_CAN1, "clk_can1", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(41), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(11), 3, GFLAGS), -+ GATE(PCLK_PERI_WDT, "pclk_peri_wdt", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(13), 0, GFLAGS), -+ COMPOSITE_NODIV(TCLK_PERI_WDT, "tclk_peri_wdt", mux_xin24m_32k_p, 0, -+ RK3562_PERI_CLKSEL_CON(43), 15, 1, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(13), 1, GFLAGS), -+ GATE(ACLK_SYSMEM, "aclk_sysmem", "aclk_peri", CLK_IGNORE_UNUSED, -+ RK3562_PERI_CLKGATE_CON(13), 2, GFLAGS), -+ GATE(HCLK_BOOTROM, "hclk_bootrom", "hclk_peri", CLK_IGNORE_UNUSED, -+ RK3562_PERI_CLKGATE_CON(13), 3, GFLAGS), -+ GATE(PCLK_PERI_GRF, "pclk_peri_grf", "pclk_peri", CLK_IGNORE_UNUSED, -+ RK3562_PERI_CLKGATE_CON(13), 4, GFLAGS), -+ GATE(ACLK_DMAC, "aclk_dmac", "aclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(13), 5, GFLAGS), -+ GATE(ACLK_RKDMAC, "aclk_rkdmac", "aclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(13), 6, GFLAGS), -+ GATE(PCLK_OTPC_NS, "pclk_otpc_ns", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(14), 0, GFLAGS), -+ GATE(CLK_SBPI_OTPC_NS, "clk_sbpi_otpc_ns", "xin24m", 0, -+ RK3562_PERI_CLKGATE_CON(14), 1, GFLAGS), -+ COMPOSITE_NOMUX(CLK_USER_OTPC_NS, "clk_user_otpc_ns", "xin24m", 0, -+ RK3562_PERI_CLKSEL_CON(44), 0, 8, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(14), 2, GFLAGS), -+ GATE(PCLK_OTPC_S, "pclk_otpc_s", "pclk_peri", CLK_IGNORE_UNUSED, -+ RK3562_PERI_CLKGATE_CON(14), 3, GFLAGS), -+ GATE(CLK_SBPI_OTPC_S, "clk_sbpi_otpc_s", "xin24m", CLK_IGNORE_UNUSED, -+ RK3562_PERI_CLKGATE_CON(14), 4, GFLAGS), -+ COMPOSITE_NOMUX(CLK_USER_OTPC_S, "clk_user_otpc_s", "xin24m", CLK_IGNORE_UNUSED, -+ RK3562_PERI_CLKSEL_CON(44), 8, 8, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(14), 5, GFLAGS), -+ GATE(CLK_OTPC_ARB, "clk_otpc_arb", "xin24m", 0, -+ RK3562_PERI_CLKGATE_CON(14), 6, GFLAGS), -+ GATE(PCLK_OTPPHY, "pclk_otpphy", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(14), 7, GFLAGS), -+ GATE(PCLK_USB2PHY, "pclk_usb2phy", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(15), 0, GFLAGS), -+ GATE(PCLK_PIPEPHY, "pclk_pipephy", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(15), 7, GFLAGS), -+ GATE(PCLK_SARADC, "pclk_saradc", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(16), 4, GFLAGS), -+ COMPOSITE_NOMUX(CLK_SARADC, "clk_saradc", "xin24m", 0, -+ RK3562_PERI_CLKSEL_CON(46), 0, 12, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(16), 5, GFLAGS), -+ GATE(PCLK_IOC_VCCIO234, "pclk_ioc_vccio234", "pclk_peri", CLK_IS_CRITICAL, -+ RK3562_PERI_CLKGATE_CON(16), 12, GFLAGS), -+ GATE(PCLK_PERI_GPIO1, "pclk_peri_gpio1", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(17), 0, GFLAGS), -+ GATE(PCLK_PERI_GPIO2, "pclk_peri_gpio2", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(17), 1, GFLAGS), -+ COMPOSITE_NODIV(DCLK_PERI_GPIO, "dclk_peri_gpio", mux_xin24m_32k_p, 0, -+ RK3562_PERI_CLKSEL_CON(47), 8, 1, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(17), 4, GFLAGS), -+ GATE(DCLK_PERI_GPIO1, "dclk_peri_gpio1", "dclk_peri_gpio", 0, -+ RK3562_PERI_CLKGATE_CON(17), 2, GFLAGS), -+ GATE(DCLK_PERI_GPIO2, "dclk_peri_gpio2", "dclk_peri_gpio", 0, -+ RK3562_PERI_CLKGATE_CON(17), 3, GFLAGS), ++ COMPOSITE(SCLK_I2S_SRC, "i2s_src", mux_pll_src_cpll_gpll_p, 0, ++ RK3288_CLKSEL_CON(4), 15, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3288_CLKGATE_CON(4), 1, GFLAGS), ++ COMPOSITE_FRACMUX(0, "i2s_frac", "i2s_src", CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(8), 0, ++ RK3288_CLKGATE_CON(4), 2, GFLAGS, ++ &rk3288_i2s_fracmux), ++ COMPOSITE_NODIV(SCLK_I2S0_OUT, "i2s0_clkout", mux_i2s_clkout_p, 0, ++ RK3288_CLKSEL_CON(4), 12, 1, MFLAGS, ++ RK3288_CLKGATE_CON(4), 0, GFLAGS), ++ GATE(SCLK_I2S0, "sclk_i2s0", "i2s_pre", CLK_SET_RATE_PARENT, ++ RK3288_CLKGATE_CON(4), 3, GFLAGS), + -+ /* PD_PHP */ -+ COMPOSITE(ACLK_PHP, "aclk_php", gpll_cpll_p, 0, -+ RK3562_CLKSEL_CON(36), 7, 1, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(16), 0, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_PHP, "pclk_php", "aclk_php", 0, -+ RK3562_CLKSEL_CON(36), 8, 4, DFLAGS, -+ RK3562_CLKGATE_CON(16), 1, GFLAGS), -+ GATE(ACLK_PCIE20_MST, "aclk_pcie20_mst", "aclk_php", 0, -+ RK3562_CLKGATE_CON(16), 4, GFLAGS), -+ GATE(ACLK_PCIE20_SLV, "aclk_pcie20_slv", "aclk_php", 0, -+ RK3562_CLKGATE_CON(16), 5, GFLAGS), -+ GATE(ACLK_PCIE20_DBI, "aclk_pcie20_dbi", "aclk_php", 0, -+ RK3562_CLKGATE_CON(16), 6, GFLAGS), -+ GATE(PCLK_PCIE20, "pclk_pcie20", "pclk_php", 0, -+ RK3562_CLKGATE_CON(16), 7, GFLAGS), -+ GATE(CLK_PCIE20_AUX, "clk_pcie20_aux", "xin24m", 0, -+ RK3562_CLKGATE_CON(16), 8, GFLAGS), -+ GATE(ACLK_USB3OTG, "aclk_usb3otg", "aclk_php", 0, -+ RK3562_CLKGATE_CON(16), 10, GFLAGS), -+ COMPOSITE_NODIV(CLK_USB3OTG_SUSPEND, "clk_usb3otg_suspend", mux_xin24m_32k_p, 0, -+ RK3562_CLKSEL_CON(36), 15, 1, MFLAGS, -+ RK3562_CLKGATE_CON(16), 11, GFLAGS), -+ GATE(CLK_USB3OTG_REF, "clk_usb3otg_ref", "xin24m", 0, -+ RK3562_CLKGATE_CON(16), 12, GFLAGS), -+ GATE(CLK_PIPEPHY_REF_FUNC, "clk_pipephy_ref_func", "pclk_pcie20", 0, -+ RK3562_CLKGATE_CON(17), 3, GFLAGS), ++ MUX(0, "spdif_src", mux_pll_src_cpll_gpll_p, 0, ++ RK3288_CLKSEL_CON(5), 15, 1, MFLAGS), ++ COMPOSITE_NOMUX(0, "spdif_pre", "spdif_src", CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(5), 0, 7, DFLAGS, ++ RK3288_CLKGATE_CON(4), 4, GFLAGS), ++ COMPOSITE_FRACMUX(0, "spdif_frac", "spdif_src", CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(9), 0, ++ RK3288_CLKGATE_CON(4), 5, GFLAGS, ++ &rk3288_spdif_fracmux), ++ GATE(SCLK_SPDIF, "sclk_spdif", "spdif_mux", CLK_SET_RATE_PARENT, ++ RK3288_CLKGATE_CON(4), 6, GFLAGS), ++ COMPOSITE_NOMUX(0, "spdif_8ch_pre", "spdif_src", CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(40), 0, 7, DFLAGS, ++ RK3288_CLKGATE_CON(4), 7, GFLAGS), ++ COMPOSITE_FRACMUX(0, "spdif_8ch_frac", "spdif_8ch_pre", CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(41), 0, ++ RK3288_CLKGATE_CON(4), 8, GFLAGS, ++ &rk3288_spdif_8ch_fracmux), ++ GATE(SCLK_SPDIF8CH, "sclk_spdif_8ch", "spdif_8ch_mux", CLK_SET_RATE_PARENT, ++ RK3288_CLKGATE_CON(4), 9, GFLAGS), + -+ /* PD_PMU1 */ -+ COMPOSITE_NOMUX(CLK_200M_PMU, "clk_200m_pmu", "cpll", CLK_IS_CRITICAL, -+ RK3562_PMU1_CLKSEL_CON(0), 0, 5, DFLAGS, -+ RK3562_PMU1_CLKGATE_CON(0), 1, GFLAGS), -+ /* PD_PMU0 */ -+ COMPOSITE_FRACMUX(CLK_RTC32K_FRAC, "clk_rtc32k_frac", "xin24m", CLK_IS_CRITICAL, -+ RK3562_PMU0_CLKSEL_CON(0), 0, -+ RK3562_PMU0_CLKGATE_CON(0), 15, GFLAGS, -+ &rk3562_rtc32k_pmu_fracmux), -+ COMPOSITE_NOMUX(BUSCLK_PDPMU0, "busclk_pdpmu0", "clk_200m_pmu", CLK_IS_CRITICAL, -+ RK3562_PMU0_CLKSEL_CON(1), 3, 2, DFLAGS, -+ RK3562_PMU0_CLKGATE_CON(0), 14, GFLAGS), -+ GATE(PCLK_PMU0_CRU, "pclk_pmu0_cru", "busclk_pdpmu0", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(0), 0, GFLAGS), -+ GATE(PCLK_PMU0_PMU, "pclk_pmu0_pmu", "busclk_pdpmu0", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(0), 1, GFLAGS), -+ GATE(CLK_PMU0_PMU, "clk_pmu0_pmu", "xin24m", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(0), 2, GFLAGS), -+ GATE(PCLK_PMU0_HP_TIMER, "pclk_pmu0_hp_timer", "busclk_pdpmu0", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(0), 3, GFLAGS), -+ GATE(CLK_PMU0_HP_TIMER, "clk_pmu0_hp_timer", "xin24m", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(0), 4, GFLAGS), -+ GATE(CLK_PMU0_32K_HP_TIMER, "clk_pmu0_32k_hp_timer", "clk_rtc_32k", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(0), 5, GFLAGS), -+ GATE(PCLK_PMU0_PVTM, "pclk_pmu0_pvtm", "busclk_pdpmu0", 0, -+ RK3562_PMU0_CLKGATE_CON(0), 6, GFLAGS), -+ GATE(CLK_PMU0_PVTM, "clk_pmu0_pvtm", "xin24m", 0, -+ RK3562_PMU0_CLKGATE_CON(0), 7, GFLAGS), -+ GATE(PCLK_IOC_PMUIO, "pclk_ioc_pmuio", "busclk_pdpmu0", CLK_IS_CRITICAL, -+ RK3562_PMU0_CLKGATE_CON(0), 8, GFLAGS), -+ GATE(PCLK_PMU0_GPIO0, "pclk_pmu0_gpio0", "busclk_pdpmu0", 0, -+ RK3562_PMU0_CLKGATE_CON(0), 9, GFLAGS), -+ GATE(DBCLK_PMU0_GPIO0, "dbclk_pmu0_gpio0", "xin24m", 0, -+ RK3562_PMU0_CLKGATE_CON(0), 10, GFLAGS), -+ GATE(PCLK_PMU0_GRF, "pclk_pmu0_grf", "busclk_pdpmu0", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(0), 11, GFLAGS), -+ GATE(PCLK_PMU0_SGRF, "pclk_pmu0_sgrf", "busclk_pdpmu0", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(0), 12, GFLAGS), -+ GATE(CLK_DDR_FAIL_SAFE, "clk_ddr_fail_safe", "xin24m", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(1), 0, GFLAGS), -+ GATE(PCLK_PMU0_SCRKEYGEN, "pclk_pmu0_scrkeygen", "busclk_pdpmu0", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(1), 1, GFLAGS), -+ COMPOSITE_NOMUX(CLK_PIPEPHY_DIV, "clk_pipephy_div", "cpll", 0, -+ RK3562_PMU0_CLKSEL_CON(2), 0, 6, DFLAGS, -+ RK3562_PMU0_CLKGATE_CON(2), 0, GFLAGS), -+ GATE(CLK_PIPEPHY_XIN24M, "clk_pipephy_xin24m", "xin24m", 0, -+ RK3562_PMU0_CLKGATE_CON(2), 1, GFLAGS), -+ COMPOSITE_NODIV(CLK_PIPEPHY_REF, "clk_pipephy_ref", clk_pipephy_ref_p, 0, -+ RK3562_PMU0_CLKSEL_CON(2), 7, 1, MFLAGS, -+ RK3562_PMU0_CLKGATE_CON(2), 2, GFLAGS), -+ GATE(CLK_USB2PHY_XIN24M, "clk_usb2phy_xin24m", "xin24m", 0, -+ RK3562_PMU0_CLKGATE_CON(2), 4, GFLAGS), -+ COMPOSITE_NODIV(CLK_USB2PHY_REF, "clk_usb2phy_ref", clk_usbphy_ref_p, 0, -+ RK3562_PMU0_CLKSEL_CON(2), 8, 1, MFLAGS, -+ RK3562_PMU0_CLKGATE_CON(2), 5, GFLAGS), -+ GATE(CLK_MIPIDSIPHY_XIN24M, "clk_mipidsiphy_xin24m", "xin24m", 0, -+ RK3562_PMU0_CLKGATE_CON(2), 6, GFLAGS), -+ COMPOSITE_NODIV(CLK_MIPIDSIPHY_REF, "clk_mipidsiphy_ref", clk_mipidsi_ref_p, 0, -+ RK3562_PMU0_CLKSEL_CON(2), 15, 1, MFLAGS, -+ RK3562_PMU0_CLKGATE_CON(2), 7, GFLAGS), -+ GATE(PCLK_PMU0_I2C0, "pclk_pmu0_i2c0", "busclk_pdpmu0", 0, -+ RK3562_PMU0_CLKGATE_CON(2), 8, GFLAGS), -+ COMPOSITE(CLK_PMU0_I2C0, "clk_pmu0_i2c0", mux_200m_xin24m_32k_p, 0, -+ RK3562_PMU0_CLKSEL_CON(3), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3562_PMU0_CLKGATE_CON(2), 9, GFLAGS), -+ /* PD_PMU1 */ -+ GATE(PCLK_PMU1_CRU, "pclk_pmu1_cru", "busclk_pdpmu0", CLK_IGNORE_UNUSED, -+ RK3562_PMU1_CLKGATE_CON(0), 0, GFLAGS), -+ GATE(HCLK_PMU1_MEM, "hclk_pmu1_mem", "busclk_pdpmu0", CLK_IGNORE_UNUSED, -+ RK3562_PMU1_CLKGATE_CON(0), 2, GFLAGS), -+ GATE(PCLK_PMU1_UART0, "pclk_pmu1_uart0", "busclk_pdpmu0", 0, -+ RK3562_PMU1_CLKGATE_CON(0), 7, GFLAGS), -+ COMPOSITE_NOMUX(CLK_PMU1_UART0_SRC, "clk_pmu1_uart0_src", "cpll", 0, -+ RK3562_PMU1_CLKSEL_CON(2), 0, 4, DFLAGS, -+ RK3562_PMU1_CLKGATE_CON(0), 8, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_PMU1_UART0_FRAC, "clk_pmu1_uart0_frac", "clk_pmu1_uart0_src", CLK_SET_RATE_PARENT, -+ RK3562_PMU1_CLKSEL_CON(3), 0, -+ RK3562_PMU1_CLKGATE_CON(0), 9, GFLAGS, -+ &rk3562_clk_pmu1_uart0_fracmux), -+ GATE(SCLK_PMU1_UART0, "sclk_pmu1_uart0", "clk_pmu1_uart0", 0, -+ RK3562_PMU1_CLKGATE_CON(0), 10, GFLAGS), -+ GATE(PCLK_PMU1_SPI0, "pclk_pmu1_spi0", "busclk_pdpmu0", 0, -+ RK3562_PMU1_CLKGATE_CON(1), 0, GFLAGS), -+ COMPOSITE(CLK_PMU1_SPI0, "clk_pmu1_spi0", mux_200m_xin24m_32k_p, 0, -+ RK3562_PMU1_CLKSEL_CON(4), 6, 2, MFLAGS, 0, 2, DFLAGS, -+ RK3562_PMU1_CLKGATE_CON(1), 1, GFLAGS), -+ GATE(SCLK_IN_PMU1_SPI0, "sclk_in_pmu1_spi0", "sclk_in_pmu1_spi0_io", 0, -+ RK3562_PMU1_CLKGATE_CON(1), 2, GFLAGS), -+ GATE(PCLK_PMU1_PWM0, "pclk_pmu1_pwm0", "busclk_pdpmu0", 0, -+ RK3562_PMU1_CLKGATE_CON(1), 3, GFLAGS), -+ COMPOSITE(CLK_PMU1_PWM0, "clk_pmu1_pwm0", mux_200m_xin24m_32k_p, 0, -+ RK3562_PMU1_CLKSEL_CON(4), 14, 2, MFLAGS, 8, 2, DFLAGS, -+ RK3562_PMU1_CLKGATE_CON(1), 4, GFLAGS), -+ GATE(CLK_CAPTURE_PMU1_PWM0, "clk_capture_pmu1_pwm0", "xin24m", 0, -+ RK3562_PMU1_CLKGATE_CON(1), 5, GFLAGS), -+ GATE(CLK_PMU1_WIFI, "clk_pmu1_wifi", "xin24m", 0, -+ RK3562_PMU1_CLKGATE_CON(1), 6, GFLAGS), -+ GATE(FCLK_PMU1_CM0_CORE, "fclk_pmu1_cm0_core", "busclk_pdpmu0", 0, -+ RK3562_PMU1_CLKGATE_CON(2), 0, GFLAGS), -+ GATE(CLK_PMU1_CM0_RTC, "clk_pmu1_cm0_rtc", "clk_rtc_32k", 0, -+ RK3562_PMU1_CLKGATE_CON(2), 1, GFLAGS), -+ GATE(PCLK_PMU1_WDTNS, "pclk_pmu1_wdtns", "busclk_pdpmu0", 0, -+ RK3562_PMU1_CLKGATE_CON(2), 3, GFLAGS), -+ GATE(CLK_PMU1_WDTNS, "clk_pmu1_wdtns", "xin24m", 0, -+ RK3562_PMU1_CLKGATE_CON(2), 4, GFLAGS), -+ GATE(PCLK_PMU1_MAILBOX, "pclk_pmu1_mailbox", "busclk_pdpmu0", 0, -+ RK3562_PMU1_CLKGATE_CON(3), 8, GFLAGS), ++ GATE(0, "sclk_acc_efuse", "xin24m", 0, ++ RK3288_CLKGATE_CON(0), 12, GFLAGS), + -+ /* PD_RGA */ -+ COMPOSITE(ACLK_RGA_PRE, "aclk_rga_pre", gpll_cpll_pvtpll_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(32), 6, 2, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(14), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_RGA_PRE, "hclk_rga_pre", "aclk_rga_jdec", 0, -+ RK3562_CLKSEL_CON(32), 8, 3, DFLAGS, -+ RK3562_CLKGATE_CON(14), 1, GFLAGS), -+ GATE(ACLK_RGA, "aclk_rga", "aclk_rga_jdec", 0, -+ RK3562_CLKGATE_CON(14), 6, GFLAGS), -+ GATE(HCLK_RGA, "hclk_rga", "hclk_rga_pre", 0, -+ RK3562_CLKGATE_CON(14), 7, GFLAGS), -+ COMPOSITE(CLK_RGA_CORE, "clk_rga_core", gpll_cpll_pvtpll_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(33), 6, 2, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(14), 8, GFLAGS), -+ GATE(ACLK_JDEC, "aclk_jdec", "aclk_rga_jdec", 0, -+ RK3562_CLKGATE_CON(14), 9, GFLAGS), -+ GATE(HCLK_JDEC, "hclk_jdec", "hclk_rga_pre", 0, -+ RK3562_CLKGATE_CON(14), 10, GFLAGS), ++ GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0, ++ RK3288_CLKGATE_CON(1), 0, GFLAGS), ++ GATE(SCLK_TIMER1, "sclk_timer1", "xin24m", 0, ++ RK3288_CLKGATE_CON(1), 1, GFLAGS), ++ GATE(SCLK_TIMER2, "sclk_timer2", "xin24m", 0, ++ RK3288_CLKGATE_CON(1), 2, GFLAGS), ++ GATE(SCLK_TIMER3, "sclk_timer3", "xin24m", 0, ++ RK3288_CLKGATE_CON(1), 3, GFLAGS), ++ GATE(SCLK_TIMER4, "sclk_timer4", "xin24m", 0, ++ RK3288_CLKGATE_CON(1), 4, GFLAGS), ++ GATE(SCLK_TIMER5, "sclk_timer5", "xin24m", 0, ++ RK3288_CLKGATE_CON(1), 5, GFLAGS), + -+ /* PD_VDPU */ -+ COMPOSITE(ACLK_VDPU_PRE, "aclk_vdpu_pre", gpll_cpll_pvtpll_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(22), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3562_CLKGATE_CON(10), 0, GFLAGS), -+ COMPOSITE(CLK_RKVDEC_HEVC_CA, "clk_rkvdec_hevc_ca", gpll_cpll_pvtpll_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(23), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3562_CLKGATE_CON(10), 3, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_VDPU_PRE, "hclk_vdpu_pre", "aclk_vdpu", 0, -+ RK3562_CLKSEL_CON(24), 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(10), 4, GFLAGS), -+ GATE(ACLK_RKVDEC, "aclk_rkvdec", "aclk_vdpu", 0, -+ RK3562_CLKGATE_CON(10), 7, GFLAGS), -+ GATE(HCLK_RKVDEC, "hclk_rkvdec", "hclk_vdpu_pre", 0, -+ RK3562_CLKGATE_CON(10), 8, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 2 ++ */ + -+ /* PD_VEPU */ -+ COMPOSITE(CLK_RKVENC_CORE, "clk_rkvenc_core", gpll_cpll_pvtpll_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(20), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3562_CLKGATE_CON(9), 0, GFLAGS), -+ COMPOSITE(ACLK_VEPU_PRE, "aclk_vepu_pre", gpll_cpll_pvtpll_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(20), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3562_CLKGATE_CON(9), 1, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_VEPU_PRE, "hclk_vepu_pre", "aclk_vepu", 0, -+ RK3562_CLKSEL_CON(21), 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(9), 2, GFLAGS), -+ GATE(ACLK_RKVENC, "aclk_rkvenc", "aclk_vepu", 0, -+ RK3562_CLKGATE_CON(9), 5, GFLAGS), -+ GATE(HCLK_RKVENC, "hclk_rkvenc", "hclk_vepu", 0, -+ RK3562_CLKGATE_CON(9), 6, GFLAGS), ++ COMPOSITE(0, "aclk_vepu", mux_pll_src_cpll_gpll_usb480m_p, 0, ++ RK3288_CLKSEL_CON(32), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3288_CLKGATE_CON(3), 9, GFLAGS), ++ COMPOSITE(0, "aclk_vdpu", mux_pll_src_cpll_gpll_usb480m_p, 0, ++ RK3288_CLKSEL_CON(32), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3288_CLKGATE_CON(3), 11, GFLAGS), ++ MUXGRF(0, "aclk_vcodec_pre", mux_aclk_vcodec_pre_p, CLK_SET_RATE_PARENT, ++ RK3288_GRF_SOC_CON(0), 7, 1, MFLAGS), ++ GATE(ACLK_VCODEC, "aclk_vcodec", "aclk_vcodec_pre", 0, ++ RK3288_CLKGATE_CON(9), 0, GFLAGS), + -+ /* PD_VI */ -+ COMPOSITE(ACLK_VI, "aclk_vi", gpll_cpll_pvtpll_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(25), 6, 2, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(11), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_VI, "hclk_vi", "aclk_vi_isp", 0, -+ RK3562_CLKSEL_CON(26), 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(11), 1, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_VI, "pclk_vi", "aclk_vi_isp", 0, -+ RK3562_CLKSEL_CON(26), 8, 4, DFLAGS, -+ RK3562_CLKGATE_CON(11), 2, GFLAGS), -+ GATE(ACLK_ISP, "aclk_isp", "aclk_vi_isp", 0, -+ RK3562_CLKGATE_CON(11), 6, GFLAGS), -+ GATE(HCLK_ISP, "hclk_isp", "hclk_vi", 0, -+ RK3562_CLKGATE_CON(11), 7, GFLAGS), -+ COMPOSITE(CLK_ISP, "clk_isp", gpll_cpll_pvtpll_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(27), 6, 2, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(11), 8, GFLAGS), -+ GATE(ACLK_VICAP, "aclk_vicap", "aclk_vi_isp", 0, -+ RK3562_CLKGATE_CON(11), 9, GFLAGS), -+ GATE(HCLK_VICAP, "hclk_vicap", "hclk_vi", 0, -+ RK3562_CLKGATE_CON(11), 10, GFLAGS), -+ COMPOSITE(DCLK_VICAP, "dclk_vicap", gpll_cpll_pvtpll_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(27), 14, 2, MFLAGS, 8, 4, DFLAGS, -+ RK3562_CLKGATE_CON(11), 11, GFLAGS), -+ GATE(CSIRX0_CLK_DATA, "csirx0_clk_data", "csirx0_clk_data_io", 0, -+ RK3562_CLKGATE_CON(11), 12, GFLAGS), -+ GATE(CSIRX1_CLK_DATA, "csirx1_clk_data", "csirx1_clk_data_io", 0, -+ RK3562_CLKGATE_CON(11), 13, GFLAGS), -+ GATE(CSIRX2_CLK_DATA, "csirx2_clk_data", "csirx2_clk_data_io", 0, -+ RK3562_CLKGATE_CON(11), 14, GFLAGS), -+ GATE(CSIRX3_CLK_DATA, "csirx3_clk_data", "csirx3_clk_data_io", 0, -+ RK3562_CLKGATE_CON(11), 15, GFLAGS), -+ GATE(PCLK_CSIHOST0, "pclk_csihost0", "pclk_vi", 0, -+ RK3562_CLKGATE_CON(12), 0, GFLAGS), -+ GATE(PCLK_CSIHOST1, "pclk_csihost1", "pclk_vi", 0, -+ RK3562_CLKGATE_CON(12), 1, GFLAGS), -+ GATE(PCLK_CSIHOST2, "pclk_csihost2", "pclk_vi", 0, -+ RK3562_CLKGATE_CON(12), 2, GFLAGS), -+ GATE(PCLK_CSIHOST3, "pclk_csihost3", "pclk_vi", 0, -+ RK3562_CLKGATE_CON(12), 3, GFLAGS), -+ GATE(PCLK_CSIPHY0, "pclk_csiphy0", "pclk_vi", 0, -+ RK3562_CLKGATE_CON(12), 4, GFLAGS), -+ GATE(PCLK_CSIPHY1, "pclk_csiphy1", "pclk_vi", 0, -+ RK3562_CLKGATE_CON(12), 5, GFLAGS), ++ FACTOR_GATE(0, "hclk_vcodec_pre", "aclk_vcodec_pre", 0, 1, 4, ++ RK3288_CLKGATE_CON(3), 10, GFLAGS), + -+ /* PD_VO */ -+ COMPOSITE(ACLK_VO_PRE, "aclk_vo_pre", gpll_cpll_vpll_dmyhpll_p, 0, -+ RK3562_CLKSEL_CON(28), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3562_CLKGATE_CON(13), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_VO_PRE, "hclk_vo_pre", "aclk_vo", 0, -+ RK3562_CLKSEL_CON(29), 0, 5, DFLAGS, -+ RK3562_CLKGATE_CON(13), 1, GFLAGS), -+ GATE(ACLK_VOP, "aclk_vop", "aclk_vo", 0, -+ RK3562_CLKGATE_CON(13), 6, GFLAGS), -+ GATE(HCLK_VOP, "hclk_vop", "hclk_vo_pre", 0, -+ RK3562_CLKGATE_CON(13), 7, GFLAGS), -+ COMPOSITE(DCLK_VOP, "dclk_vop", gpll_dmyhpll_vpll_apll_p, CLK_SET_RATE_NO_REPARENT, -+ RK3562_CLKSEL_CON(30), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3562_CLKGATE_CON(13), 8, GFLAGS), -+ COMPOSITE(DCLK_VOP1, "dclk_vop1", gpll_dmyhpll_vpll_apll_p, CLK_SET_RATE_NO_REPARENT, -+ RK3562_CLKSEL_CON(31), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3562_CLKGATE_CON(13), 9, GFLAGS), ++ GATE(HCLK_VCODEC, "hclk_vcodec", "hclk_vcodec_pre", 0, ++ RK3288_CLKGATE_CON(9), 1, GFLAGS), ++ ++ COMPOSITE(ACLK_VIO0, "aclk_vio0", mux_pll_src_cpll_gpll_usb480m_p, CLK_IGNORE_UNUSED, ++ RK3288_CLKSEL_CON(31), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3288_CLKGATE_CON(3), 0, GFLAGS), ++ COMPOSITE(ACLK_VIO1, "aclk_vio1", mux_pll_src_cpll_gpll_usb480m_p, CLK_IGNORE_UNUSED, ++ RK3288_CLKSEL_CON(31), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3288_CLKGATE_CON(3), 2, GFLAGS), ++ ++ COMPOSITE(0, "aclk_rga_pre", mux_pll_src_cpll_gpll_usb480m_p, 0, ++ RK3288_CLKSEL_CON(30), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3288_CLKGATE_CON(3), 5, GFLAGS), ++ COMPOSITE(SCLK_RGA, "sclk_rga", mux_pll_src_cpll_gpll_usb480m_p, 0, ++ RK3288_CLKSEL_CON(30), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3288_CLKGATE_CON(3), 4, GFLAGS), ++ ++ COMPOSITE(DCLK_VOP0, "dclk_vop0", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3288_CLKSEL_CON(27), 0, 2, MFLAGS, 8, 8, DFLAGS, ++ RK3288_CLKGATE_CON(3), 1, GFLAGS), ++ COMPOSITE(DCLK_VOP1, "dclk_vop1", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3288_CLKSEL_CON(29), 6, 2, MFLAGS, 8, 8, DFLAGS, ++ RK3288_CLKGATE_CON(3), 3, GFLAGS), ++ ++ COMPOSITE_NODIV(SCLK_EDP_24M, "sclk_edp_24m", mux_edp_24m_p, 0, ++ RK3288_CLKSEL_CON(28), 15, 1, MFLAGS, ++ RK3288_CLKGATE_CON(3), 12, GFLAGS), ++ COMPOSITE(SCLK_EDP, "sclk_edp", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3288_CLKSEL_CON(28), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3288_CLKGATE_CON(3), 13, GFLAGS), ++ ++ COMPOSITE(SCLK_ISP, "sclk_isp", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3288_CLKSEL_CON(6), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3288_CLKGATE_CON(3), 14, GFLAGS), ++ COMPOSITE(SCLK_ISP_JPE, "sclk_isp_jpe", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3288_CLKSEL_CON(6), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3288_CLKGATE_CON(3), 15, GFLAGS), ++ ++ GATE(SCLK_HDMI_HDCP, "sclk_hdmi_hdcp", "xin24m", 0, ++ RK3288_CLKGATE_CON(5), 12, GFLAGS), ++ GATE(SCLK_HDMI_CEC, "sclk_hdmi_cec", "xin32k", 0, ++ RK3288_CLKGATE_CON(5), 11, GFLAGS), ++ ++ COMPOSITE(ACLK_HEVC, "aclk_hevc", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3288_CLKSEL_CON(39), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3288_CLKGATE_CON(13), 13, GFLAGS), ++ DIV(HCLK_HEVC, "hclk_hevc", "aclk_hevc", 0, ++ RK3288_CLKSEL_CON(40), 12, 2, DFLAGS), ++ ++ COMPOSITE(SCLK_HEVC_CABAC, "sclk_hevc_cabac", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3288_CLKSEL_CON(42), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3288_CLKGATE_CON(13), 14, GFLAGS), ++ COMPOSITE(SCLK_HEVC_CORE, "sclk_hevc_core", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3288_CLKSEL_CON(42), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3288_CLKGATE_CON(13), 15, GFLAGS), ++ ++ COMPOSITE_NODIV(0, "vip_src", mux_pll_src_cpll_gpll_p, 0, ++ RK3288_CLKSEL_CON(26), 8, 1, MFLAGS, ++ RK3288_CLKGATE_CON(3), 7, GFLAGS), ++ COMPOSITE_NOGATE(SCLK_VIP_OUT, "sclk_vip_out", mux_vip_out_p, 0, ++ RK3288_CLKSEL_CON(26), 15, 1, MFLAGS, 9, 5, DFLAGS), ++ ++ DIV(PCLK_PD_ALIVE, "pclk_pd_alive", "gpll", CLK_IS_CRITICAL, ++ RK3288_CLKSEL_CON(33), 8, 5, DFLAGS), ++ COMPOSITE_NOMUX(PCLK_PD_PMU, "pclk_pd_pmu", "gpll", CLK_IS_CRITICAL, ++ RK3288_CLKSEL_CON(33), 0, 5, DFLAGS, ++ RK3288_CLKGATE_CON(5), 8, GFLAGS), ++ ++ COMPOSITE(SCLK_GPU, "sclk_gpu", mux_pll_src_cpll_gll_usb_npll_p, 0, ++ RK3288_CLKSEL_CON(34), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3288_CLKGATE_CON(5), 7, GFLAGS), ++ ++ COMPOSITE(0, "aclk_peri_src", mux_pll_src_cpll_gpll_p, CLK_IS_CRITICAL, ++ RK3288_CLKSEL_CON(10), 15, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3288_CLKGATE_CON(2), 0, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, ++ RK3288_CLKSEL_CON(10), 12, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, ++ RK3288_CLKGATE_CON(2), 3, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, ++ RK3288_CLKSEL_CON(10), 8, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, ++ RK3288_CLKGATE_CON(2), 2, GFLAGS), ++ GATE(ACLK_PERI, "aclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, ++ RK3288_CLKGATE_CON(2), 1, GFLAGS), ++ ++ /* ++ * Clock-Architecture Diagram 3 ++ */ ++ ++ COMPOSITE(SCLK_SPI0, "sclk_spi0", mux_pll_src_cpll_gpll_p, 0, ++ RK3288_CLKSEL_CON(25), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3288_CLKGATE_CON(2), 9, GFLAGS), ++ COMPOSITE(SCLK_SPI1, "sclk_spi1", mux_pll_src_cpll_gpll_p, 0, ++ RK3288_CLKSEL_CON(25), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK3288_CLKGATE_CON(2), 10, GFLAGS), ++ COMPOSITE(SCLK_SPI2, "sclk_spi2", mux_pll_src_cpll_gpll_p, 0, ++ RK3288_CLKSEL_CON(39), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3288_CLKGATE_CON(2), 11, GFLAGS), ++ ++ COMPOSITE(SCLK_SDMMC, "sclk_sdmmc", mux_mmc_src_p, 0, ++ RK3288_CLKSEL_CON(11), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3288_CLKGATE_CON(13), 0, GFLAGS), ++ COMPOSITE(SCLK_SDIO0, "sclk_sdio0", mux_mmc_src_p, 0, ++ RK3288_CLKSEL_CON(12), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3288_CLKGATE_CON(13), 1, GFLAGS), ++ COMPOSITE(SCLK_SDIO1, "sclk_sdio1", mux_mmc_src_p, 0, ++ RK3288_CLKSEL_CON(34), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3288_CLKGATE_CON(13), 2, GFLAGS), ++ COMPOSITE(SCLK_EMMC, "sclk_emmc", mux_mmc_src_p, 0, ++ RK3288_CLKSEL_CON(12), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3288_CLKGATE_CON(13), 3, GFLAGS), ++ ++ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "sclk_sdmmc", RK3288_SDMMC_CON0, 1), ++ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RK3288_SDMMC_CON1, 0), ++ ++ MMC(SCLK_SDIO0_DRV, "sdio0_drv", "sclk_sdio0", RK3288_SDIO0_CON0, 1), ++ MMC(SCLK_SDIO0_SAMPLE, "sdio0_sample", "sclk_sdio0", RK3288_SDIO0_CON1, 0), ++ ++ MMC(SCLK_SDIO1_DRV, "sdio1_drv", "sclk_sdio1", RK3288_SDIO1_CON0, 1), ++ MMC(SCLK_SDIO1_SAMPLE, "sdio1_sample", "sclk_sdio1", RK3288_SDIO1_CON1, 0), ++ ++ MMC(SCLK_EMMC_DRV, "emmc_drv", "sclk_emmc", RK3288_EMMC_CON0, 1), ++ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "sclk_emmc", RK3288_EMMC_CON1, 0), ++ ++ COMPOSITE(SCLK_TSPOUT, "sclk_tspout", mux_tspout_p, 0, ++ RK3288_CLKSEL_CON(35), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3288_CLKGATE_CON(4), 11, GFLAGS), ++ COMPOSITE(SCLK_TSP, "sclk_tsp", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3288_CLKSEL_CON(35), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3288_CLKGATE_CON(4), 10, GFLAGS), ++ ++ GATE(SCLK_OTGPHY0, "sclk_otgphy0", "xin24m", CLK_IGNORE_UNUSED, ++ RK3288_CLKGATE_CON(13), 4, GFLAGS), ++ GATE(SCLK_OTGPHY1, "sclk_otgphy1", "xin24m", CLK_IGNORE_UNUSED, ++ RK3288_CLKGATE_CON(13), 5, GFLAGS), ++ GATE(SCLK_OTGPHY2, "sclk_otgphy2", "xin24m", CLK_IGNORE_UNUSED, ++ RK3288_CLKGATE_CON(13), 6, GFLAGS), ++ GATE(SCLK_OTG_ADP, "sclk_otg_adp", "xin32k", CLK_IGNORE_UNUSED, ++ RK3288_CLKGATE_CON(13), 7, GFLAGS), ++ ++ COMPOSITE_NOMUX(SCLK_TSADC, "sclk_tsadc", "xin32k", 0, ++ RK3288_CLKSEL_CON(2), 0, 6, DFLAGS, ++ RK3288_CLKGATE_CON(2), 7, GFLAGS), ++ ++ MUX(SCLK_TESTOUT_SRC, "sclk_testout_src", mux_testout_src_p, 0, ++ RK3288_MISC_CON, 8, 4, MFLAGS), ++ COMPOSITE_NOMUX(SCLK_TESTOUT, "sclk_testout", "sclk_testout_src", 0, ++ RK3288_CLKSEL_CON(2), 8, 5, DFLAGS, ++ RK3288_CLKGATE_CON(4), 15, GFLAGS), ++ ++ COMPOSITE_NOMUX(SCLK_SARADC, "sclk_saradc", "xin24m", 0, ++ RK3288_CLKSEL_CON(24), 8, 8, DFLAGS, ++ RK3288_CLKGATE_CON(2), 8, GFLAGS), ++ ++ GATE(SCLK_PS2C, "sclk_ps2c", "xin24m", 0, ++ RK3288_CLKGATE_CON(5), 13, GFLAGS), ++ ++ COMPOSITE(SCLK_NANDC0, "sclk_nandc0", mux_pll_src_cpll_gpll_p, 0, ++ RK3288_CLKSEL_CON(38), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3288_CLKGATE_CON(5), 5, GFLAGS), ++ COMPOSITE(SCLK_NANDC1, "sclk_nandc1", mux_pll_src_cpll_gpll_p, 0, ++ RK3288_CLKSEL_CON(38), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK3288_CLKGATE_CON(5), 6, GFLAGS), ++ ++ COMPOSITE(0, "uart0_src", mux_pll_src_cpll_gll_usb_npll_p, 0, ++ RK3288_CLKSEL_CON(13), 13, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3288_CLKGATE_CON(1), 8, GFLAGS), ++ COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(17), 0, ++ RK3288_CLKGATE_CON(1), 9, GFLAGS, ++ &rk3288_uart0_fracmux), ++ MUX(0, "uart_src", mux_pll_src_cpll_gpll_p, 0, ++ RK3288_CLKSEL_CON(13), 15, 1, MFLAGS), ++ COMPOSITE_NOMUX(0, "uart1_src", "uart_src", 0, ++ RK3288_CLKSEL_CON(14), 0, 7, DFLAGS, ++ RK3288_CLKGATE_CON(1), 10, GFLAGS), ++ COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_src", CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(18), 0, ++ RK3288_CLKGATE_CON(1), 11, GFLAGS, ++ &rk3288_uart1_fracmux), ++ COMPOSITE_NOMUX(0, "uart2_src", "uart_src", 0, ++ RK3288_CLKSEL_CON(15), 0, 7, DFLAGS, ++ RK3288_CLKGATE_CON(1), 12, GFLAGS), ++ COMPOSITE_FRACMUX(0, "uart2_frac", "uart2_src", CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(19), 0, ++ RK3288_CLKGATE_CON(1), 13, GFLAGS, ++ &rk3288_uart2_fracmux), ++ COMPOSITE_NOMUX(0, "uart3_src", "uart_src", 0, ++ RK3288_CLKSEL_CON(16), 0, 7, DFLAGS, ++ RK3288_CLKGATE_CON(1), 14, GFLAGS), ++ COMPOSITE_FRACMUX(0, "uart3_frac", "uart3_src", CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(20), 0, ++ RK3288_CLKGATE_CON(1), 15, GFLAGS, ++ &rk3288_uart3_fracmux), ++ COMPOSITE_NOMUX(0, "uart4_src", "uart_src", 0, ++ RK3288_CLKSEL_CON(3), 0, 7, DFLAGS, ++ RK3288_CLKGATE_CON(2), 12, GFLAGS), ++ COMPOSITE_FRACMUX(0, "uart4_frac", "uart4_src", CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(7), 0, ++ RK3288_CLKGATE_CON(2), 13, GFLAGS, ++ &rk3288_uart4_fracmux), ++ ++ COMPOSITE(SCLK_MAC_PLL, "mac_pll_src", mux_pll_src_npll_cpll_gpll_p, 0, ++ RK3288_CLKSEL_CON(21), 0, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3288_CLKGATE_CON(2), 5, GFLAGS), ++ MUX(SCLK_MAC, "mac_clk", mux_mac_p, CLK_SET_RATE_PARENT, ++ RK3288_CLKSEL_CON(21), 4, 1, MFLAGS), ++ GATE(SCLK_MACREF_OUT, "sclk_macref_out", "mac_clk", 0, ++ RK3288_CLKGATE_CON(5), 3, GFLAGS), ++ GATE(SCLK_MACREF, "sclk_macref", "mac_clk", 0, ++ RK3288_CLKGATE_CON(5), 2, GFLAGS), ++ GATE(SCLK_MAC_RX, "sclk_mac_rx", "mac_clk", 0, ++ RK3288_CLKGATE_CON(5), 0, GFLAGS), ++ GATE(SCLK_MAC_TX, "sclk_mac_tx", "mac_clk", 0, ++ RK3288_CLKGATE_CON(5), 1, GFLAGS), ++ ++ COMPOSITE(0, "hsadc_src", mux_pll_src_cpll_gpll_p, 0, ++ RK3288_CLKSEL_CON(22), 0, 1, MFLAGS, 8, 8, DFLAGS, ++ RK3288_CLKGATE_CON(2), 6, GFLAGS), ++ MUX(0, "sclk_hsadc_out", mux_hsadcout_p, 0, ++ RK3288_CLKSEL_CON(22), 4, 1, MFLAGS), ++ INVERTER(SCLK_HSADC, "sclk_hsadc", "sclk_hsadc_out", ++ RK3288_CLKSEL_CON(22), 7, IFLAGS), ++ ++ GATE(0, "jtag", "ext_jtag", 0, ++ RK3288_CLKGATE_CON(4), 14, GFLAGS), ++ ++ COMPOSITE_NODIV(SCLK_USBPHY480M_SRC, "usbphy480m_src", mux_usbphy480m_p, 0, ++ RK3288_CLKSEL_CON(13), 11, 2, MFLAGS, ++ RK3288_CLKGATE_CON(5), 14, GFLAGS), ++ COMPOSITE_NODIV(SCLK_HSICPHY480M, "sclk_hsicphy480m", mux_hsicphy480m_p, 0, ++ RK3288_CLKSEL_CON(29), 0, 2, MFLAGS, ++ RK3288_CLKGATE_CON(3), 6, GFLAGS), ++ GATE(0, "hsicphy12m_xin12m", "xin12m", 0, ++ RK3288_CLKGATE_CON(13), 9, GFLAGS), ++ DIV(0, "hsicphy12m_usbphy", "sclk_hsicphy480m", 0, ++ RK3288_CLKSEL_CON(11), 8, 6, DFLAGS), ++ MUX(SCLK_HSICPHY12M, "sclk_hsicphy12m", mux_hsicphy12m_p, 0, ++ RK3288_CLKSEL_CON(22), 4, 1, MFLAGS), ++ ++ /* ++ * Clock-Architecture Diagram 4 ++ */ ++ ++ /* aclk_cpu gates */ ++ GATE(0, "sclk_intmem0", "aclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(10), 5, GFLAGS), ++ GATE(0, "sclk_intmem1", "aclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(10), 6, GFLAGS), ++ GATE(0, "sclk_intmem2", "aclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(10), 7, GFLAGS), ++ GATE(ACLK_DMAC1, "aclk_dmac1", "aclk_cpu", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(10), 12, GFLAGS), ++ GATE(0, "aclk_strc_sys", "aclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(10), 13, GFLAGS), ++ GATE(0, "aclk_intmem", "aclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(10), 4, GFLAGS), ++ GATE(ACLK_CRYPTO, "aclk_crypto", "aclk_cpu", 0, RK3288_CLKGATE_CON(11), 6, GFLAGS), ++ GATE(0, "aclk_ccp", "aclk_cpu", 0, RK3288_CLKGATE_CON(11), 8, GFLAGS), ++ ++ /* hclk_cpu gates */ ++ GATE(HCLK_CRYPTO, "hclk_crypto", "hclk_cpu", 0, RK3288_CLKGATE_CON(11), 7, GFLAGS), ++ GATE(HCLK_I2S0, "hclk_i2s0", "hclk_cpu", 0, RK3288_CLKGATE_CON(10), 8, GFLAGS), ++ GATE(HCLK_ROM, "hclk_rom", "hclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(10), 9, GFLAGS), ++ GATE(HCLK_SPDIF, "hclk_spdif", "hclk_cpu", 0, RK3288_CLKGATE_CON(10), 10, GFLAGS), ++ GATE(HCLK_SPDIF8CH, "hclk_spdif_8ch", "hclk_cpu", 0, RK3288_CLKGATE_CON(10), 11, GFLAGS), ++ ++ /* pclk_cpu gates */ ++ GATE(PCLK_PWM, "pclk_pwm", "pclk_cpu", 0, RK3288_CLKGATE_CON(10), 0, GFLAGS), ++ GATE(PCLK_TIMER, "pclk_timer", "pclk_cpu", 0, RK3288_CLKGATE_CON(10), 1, GFLAGS), ++ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_cpu", 0, RK3288_CLKGATE_CON(10), 2, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_cpu", 0, RK3288_CLKGATE_CON(10), 3, GFLAGS), ++ GATE(PCLK_DDRUPCTL0, "pclk_ddrupctl0", "pclk_cpu", 0, RK3288_CLKGATE_CON(10), 14, GFLAGS), ++ GATE(PCLK_PUBL0, "pclk_publ0", "pclk_cpu", 0, RK3288_CLKGATE_CON(10), 15, GFLAGS), ++ GATE(PCLK_DDRUPCTL1, "pclk_ddrupctl1", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 0, GFLAGS), ++ GATE(PCLK_PUBL1, "pclk_publ1", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 1, GFLAGS), ++ GATE(PCLK_EFUSE1024, "pclk_efuse_1024", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 2, GFLAGS), ++ GATE(PCLK_TZPC, "pclk_tzpc", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 3, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 9, GFLAGS), ++ GATE(PCLK_EFUSE256, "pclk_efuse_256", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 10, GFLAGS), ++ GATE(PCLK_RKPWM, "pclk_rkpwm", "pclk_cpu", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(11), 11, GFLAGS), ++ ++ /* ddrctrl [DDR Controller PHY clock] gates */ ++ GATE(0, "nclk_ddrupctl0", "ddrphy", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(11), 4, GFLAGS), ++ GATE(0, "nclk_ddrupctl1", "ddrphy", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(11), 5, GFLAGS), ++ ++ /* ddrphy gates */ ++ GATE(0, "sclk_ddrphy0", "ddrphy", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(4), 12, GFLAGS), ++ GATE(0, "sclk_ddrphy1", "ddrphy", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(4), 13, GFLAGS), ++ ++ /* aclk_peri gates */ ++ GATE(0, "aclk_peri_axi_matrix", "aclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(6), 2, GFLAGS), ++ GATE(ACLK_DMAC2, "aclk_dmac2", "aclk_peri", 0, RK3288_CLKGATE_CON(6), 3, GFLAGS), ++ GATE(0, "aclk_peri_niu", "aclk_peri", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(7), 11, GFLAGS), ++ GATE(ACLK_MMU, "aclk_mmu", "aclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(8), 12, GFLAGS), ++ GATE(ACLK_GMAC, "aclk_gmac", "aclk_peri", 0, RK3288_CLKGATE_CON(8), 0, GFLAGS), ++ GATE(HCLK_GPS, "hclk_gps", "aclk_peri", 0, RK3288_CLKGATE_CON(8), 2, GFLAGS), ++ ++ /* hclk_peri gates */ ++ GATE(0, "hclk_peri_matrix", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(6), 0, GFLAGS), ++ GATE(HCLK_OTG0, "hclk_otg0", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 4, GFLAGS), ++ GATE(HCLK_USBHOST0, "hclk_host0", "hclk_peri", 0, RK3288_CLKGATE_CON(7), 6, GFLAGS), ++ GATE(HCLK_USBHOST1, "hclk_host1", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 7, GFLAGS), ++ GATE(HCLK_HSIC, "hclk_hsic", "hclk_peri", 0, RK3288_CLKGATE_CON(7), 8, GFLAGS), ++ GATE(HCLK_USB_PERI, "hclk_usb_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 9, GFLAGS), ++ GATE(0, "hclk_peri_ahb_arbi", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 10, GFLAGS), ++ GATE(0, "hclk_emem", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 12, GFLAGS), ++ GATE(0, "hclk_mem", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 13, GFLAGS), ++ GATE(HCLK_NANDC0, "hclk_nandc0", "hclk_peri", 0, RK3288_CLKGATE_CON(7), 14, GFLAGS), ++ GATE(HCLK_NANDC1, "hclk_nandc1", "hclk_peri", 0, RK3288_CLKGATE_CON(7), 15, GFLAGS), ++ GATE(HCLK_TSP, "hclk_tsp", "hclk_peri", 0, RK3288_CLKGATE_CON(8), 8, GFLAGS), ++ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK3288_CLKGATE_CON(8), 3, GFLAGS), ++ GATE(HCLK_SDIO0, "hclk_sdio0", "hclk_peri", 0, RK3288_CLKGATE_CON(8), 4, GFLAGS), ++ GATE(HCLK_SDIO1, "hclk_sdio1", "hclk_peri", 0, RK3288_CLKGATE_CON(8), 5, GFLAGS), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, RK3288_CLKGATE_CON(8), 6, GFLAGS), ++ GATE(HCLK_HSADC, "hclk_hsadc", "hclk_peri", 0, RK3288_CLKGATE_CON(8), 7, GFLAGS), ++ GATE(0, "pmu_hclk_otg0", "hclk_peri", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(7), 5, GFLAGS), ++ ++ /* pclk_peri gates */ ++ GATE(0, "pclk_peri_matrix", "pclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(6), 1, GFLAGS), ++ GATE(PCLK_SPI0, "pclk_spi0", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 4, GFLAGS), ++ GATE(PCLK_SPI1, "pclk_spi1", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 5, GFLAGS), ++ GATE(PCLK_SPI2, "pclk_spi2", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 6, GFLAGS), ++ GATE(PCLK_PS2C, "pclk_ps2c", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 7, GFLAGS), ++ GATE(PCLK_UART0, "pclk_uart0", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 8, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 9, GFLAGS), ++ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 15, GFLAGS), ++ GATE(PCLK_UART3, "pclk_uart3", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 11, GFLAGS), ++ GATE(PCLK_UART4, "pclk_uart4", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 12, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 13, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 14, GFLAGS), ++ GATE(PCLK_SARADC, "pclk_saradc", "pclk_peri", 0, RK3288_CLKGATE_CON(7), 1, GFLAGS), ++ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_peri", 0, RK3288_CLKGATE_CON(7), 2, GFLAGS), ++ GATE(PCLK_SIM, "pclk_sim", "pclk_peri", 0, RK3288_CLKGATE_CON(7), 3, GFLAGS), ++ GATE(PCLK_I2C5, "pclk_i2c5", "pclk_peri", 0, RK3288_CLKGATE_CON(7), 0, GFLAGS), ++ GATE(PCLK_GMAC, "pclk_gmac", "pclk_peri", 0, RK3288_CLKGATE_CON(8), 1, GFLAGS), ++ ++ GATE(SCLK_LCDC_PWM0, "sclk_lcdc_pwm0", "xin24m", 0, RK3288_CLKGATE_CON(13), 10, GFLAGS), ++ GATE(SCLK_LCDC_PWM1, "sclk_lcdc_pwm1", "xin24m", 0, RK3288_CLKGATE_CON(13), 11, GFLAGS), ++ GATE(SCLK_PVTM_CORE, "sclk_pvtm_core", "xin24m", 0, RK3288_CLKGATE_CON(5), 9, GFLAGS), ++ GATE(SCLK_PVTM_GPU, "sclk_pvtm_gpu", "xin24m", 0, RK3288_CLKGATE_CON(5), 10, GFLAGS), ++ GATE(SCLK_MIPIDSI_24M, "sclk_mipidsi_24m", "xin24m", 0, RK3288_CLKGATE_CON(5), 15, GFLAGS), ++ ++ /* sclk_gpu gates */ ++ GATE(ACLK_GPU, "aclk_gpu", "sclk_gpu", 0, RK3288_CLKGATE_CON(18), 0, GFLAGS), ++ ++ /* pclk_pd_alive gates */ ++ GATE(PCLK_GPIO8, "pclk_gpio8", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 8, GFLAGS), ++ GATE(PCLK_GPIO7, "pclk_gpio7", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 7, GFLAGS), ++ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 1, GFLAGS), ++ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 2, GFLAGS), ++ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 3, GFLAGS), ++ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 4, GFLAGS), ++ GATE(PCLK_GPIO5, "pclk_gpio5", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 5, GFLAGS), ++ GATE(PCLK_GPIO6, "pclk_gpio6", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 6, GFLAGS), ++ GATE(PCLK_GRF, "pclk_grf", "pclk_pd_alive", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(14), 11, GFLAGS), ++ GATE(0, "pclk_alive_niu", "pclk_pd_alive", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(14), 12, GFLAGS), ++ ++ /* Watchdog pclk is controlled by RK3288_SGRF_SOC_CON0[1]. */ ++ SGRF_GATE(PCLK_WDT, "pclk_wdt", "pclk_pd_alive"), ++ ++ /* pclk_pd_pmu gates */ ++ GATE(PCLK_PMU, "pclk_pmu", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(17), 0, GFLAGS), ++ GATE(0, "pclk_intmem1", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(17), 1, GFLAGS), ++ GATE(0, "pclk_pmu_niu", "pclk_pd_pmu", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(17), 2, GFLAGS), ++ GATE(PCLK_SGRF, "pclk_sgrf", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(17), 3, GFLAGS), ++ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pd_pmu", 0, RK3288_CLKGATE_CON(17), 4, GFLAGS), ++ ++ /* hclk_vio gates */ ++ GATE(HCLK_RGA, "hclk_rga", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 1, GFLAGS), ++ GATE(HCLK_VOP0, "hclk_vop0", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 6, GFLAGS), ++ GATE(HCLK_VOP1, "hclk_vop1", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 8, GFLAGS), ++ GATE(HCLK_VIO_AHB_ARBI, "hclk_vio_ahb_arbi", "hclk_vio", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(15), 9, GFLAGS), ++ GATE(HCLK_VIO_NIU, "hclk_vio_niu", "hclk_vio", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(15), 10, GFLAGS), ++ GATE(HCLK_VIP, "hclk_vip", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 15, GFLAGS), ++ GATE(HCLK_IEP, "hclk_iep", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 3, GFLAGS), ++ GATE(HCLK_ISP, "hclk_isp", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 1, GFLAGS), ++ GATE(HCLK_VIO2_H2P, "hclk_vio2_h2p", "hclk_vio", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(16), 10, GFLAGS), ++ GATE(PCLK_MIPI_DSI0, "pclk_mipi_dsi0", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 4, GFLAGS), ++ GATE(PCLK_MIPI_DSI1, "pclk_mipi_dsi1", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 5, GFLAGS), ++ GATE(PCLK_MIPI_CSI, "pclk_mipi_csi", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 6, GFLAGS), ++ GATE(PCLK_LVDS_PHY, "pclk_lvds_phy", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 7, GFLAGS), ++ GATE(PCLK_EDP_CTRL, "pclk_edp_ctrl", "hclk_vio", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(16), 8, GFLAGS), ++ GATE(PCLK_HDMI_CTRL, "pclk_hdmi_ctrl", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 9, GFLAGS), ++ GATE(PCLK_VIO2_H2P, "pclk_vio2_h2p", "hclk_vio", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(16), 11, GFLAGS), ++ ++ /* aclk_vio0 gates */ ++ GATE(ACLK_VOP0, "aclk_vop0", "aclk_vio0", 0, RK3288_CLKGATE_CON(15), 5, GFLAGS), ++ GATE(ACLK_IEP, "aclk_iep", "aclk_vio0", 0, RK3288_CLKGATE_CON(15), 2, GFLAGS), ++ GATE(ACLK_VIO0_NIU, "aclk_vio0_niu", "aclk_vio0", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(15), 11, GFLAGS), ++ GATE(ACLK_VIP, "aclk_vip", "aclk_vio0", 0, RK3288_CLKGATE_CON(15), 14, GFLAGS), ++ ++ /* aclk_vio1 gates */ ++ GATE(ACLK_VOP1, "aclk_vop1", "aclk_vio1", 0, RK3288_CLKGATE_CON(15), 7, GFLAGS), ++ GATE(ACLK_ISP, "aclk_isp", "aclk_vio1", 0, RK3288_CLKGATE_CON(16), 2, GFLAGS), ++ GATE(ACLK_VIO1_NIU, "aclk_vio1_niu", "aclk_vio1", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(15), 12, GFLAGS), ++ ++ /* aclk_rga_pre gates */ ++ GATE(ACLK_RGA, "aclk_rga", "aclk_rga_pre", 0, RK3288_CLKGATE_CON(15), 0, GFLAGS), ++ GATE(ACLK_RGA_NIU, "aclk_rga_niu", "aclk_rga_pre", CLK_IS_CRITICAL, RK3288_CLKGATE_CON(15), 13, GFLAGS), ++ ++ /* ++ * Other ungrouped clocks. ++ */ ++ ++ GATE(PCLK_VIP_IN, "pclk_vip_in", "ext_vip", 0, RK3288_CLKGATE_CON(16), 0, GFLAGS), ++ INVERTER(PCLK_VIP, "pclk_vip", "pclk_vip_in", RK3288_CLKSEL_CON(29), 4, IFLAGS), ++ GATE(PCLK_ISP_IN, "pclk_isp_in", "ext_isp", 0, RK3288_CLKGATE_CON(16), 3, GFLAGS), ++ INVERTER(0, "pclk_isp", "pclk_isp_in", RK3288_CLKSEL_CON(29), 3, IFLAGS), ++ ++ GATE(SCLK_HSADC0_TSP, "clk_hsadc0_tsp", "ext_hsadc0_tsp", 0, RK3288_CLKGATE_CON(8), 9, GFLAGS), ++ GATE(SCLK_HSADC1_TSP, "clk_hsadc1_tsp", "ext_hsadc0_tsp", 0, RK3288_CLKGATE_CON(8), 10, GFLAGS), ++ GATE(SCLK_27M_TSP, "clk_27m_tsp", "ext_27m_tsp", 0, RK3288_CLKGATE_CON(8), 11, GFLAGS), +}; + -+static void __iomem *rk3562_cru_base; ++static struct rockchip_clk_branch rk3288w_hclkvio_branch[] __initdata = { ++ DIV(0, "hclk_vio", "aclk_vio1", 0, ++ RK3288_CLKSEL_CON(28), 8, 5, DFLAGS), ++}; + -+static void rk3562_dump_cru(void) ++static struct rockchip_clk_branch rk3288_hclkvio_branch[] __initdata = { ++ DIV(0, "hclk_vio", "aclk_vio0", 0, ++ RK3288_CLKSEL_CON(28), 8, 5, DFLAGS), ++}; ++ ++static void __iomem *rk3288_cru_base; ++ ++/* ++ * Some CRU registers will be reset in maskrom when the system ++ * wakes up from fastboot. ++ * So save them before suspend, restore them after resume. ++ */ ++static const int rk3288_saved_cru_reg_ids[] = { ++ RK3288_MODE_CON, ++ RK3288_CLKSEL_CON(0), ++ RK3288_CLKSEL_CON(1), ++ RK3288_CLKSEL_CON(10), ++ RK3288_CLKSEL_CON(33), ++ RK3288_CLKSEL_CON(37), ++ ++ /* We turn aclk_dmac1 on for suspend; this will restore it */ ++ RK3288_CLKGATE_CON(10), ++}; ++ ++static u32 rk3288_saved_cru_regs[ARRAY_SIZE(rk3288_saved_cru_reg_ids)]; ++ ++static int rk3288_clk_suspend(void) +{ -+ if (rk3562_cru_base) { -+ pr_warn("CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk3562_cru_base, -+ 0x600, false); ++ int i, reg_id; ++ ++ for (i = 0; i < ARRAY_SIZE(rk3288_saved_cru_reg_ids); i++) { ++ reg_id = rk3288_saved_cru_reg_ids[i]; ++ ++ rk3288_saved_cru_regs[i] = ++ readl_relaxed(rk3288_cru_base + reg_id); ++ } ++ ++ /* ++ * Going into deep sleep (specifically setting PMU_CLR_DMA in ++ * RK3288_PMU_PWRMODE_CON1) appears to fail unless ++ * "aclk_dmac1" is on. ++ */ ++ writel_relaxed(1 << (12 + 16), ++ rk3288_cru_base + RK3288_CLKGATE_CON(10)); ++ ++ /* ++ * Switch PLLs other than DPLL (for SDRAM) to slow mode to ++ * avoid crashes on resume. The Mask ROM on the system will ++ * put APLL, CPLL, and GPLL into slow mode at resume time ++ * anyway (which is why we restore them), but we might not ++ * even make it to the Mask ROM if this isn't done at suspend ++ * time. ++ * ++ * NOTE: only APLL truly matters here, but we'll do them all. ++ */ ++ ++ writel_relaxed(0xf3030000, rk3288_cru_base + RK3288_MODE_CON); ++ ++ return 0; ++} ++ ++static void rk3288_clk_resume(void) ++{ ++ int i, reg_id; ++ ++ for (i = ARRAY_SIZE(rk3288_saved_cru_reg_ids) - 1; i >= 0; i--) { ++ reg_id = rk3288_saved_cru_reg_ids[i]; ++ ++ writel_relaxed(rk3288_saved_cru_regs[i] | 0xffff0000, ++ rk3288_cru_base + reg_id); + } +} + -+static int protect_clocks[] = { -+ ACLK_VO_PRE, -+ HCLK_VO_PRE, -+ ACLK_VOP, -+ HCLK_VOP, -+ DCLK_VOP, -+ DCLK_VOP1, ++static void rk3288_clk_shutdown(void) ++{ ++ writel_relaxed(0xf3030000, rk3288_cru_base + RK3288_MODE_CON); ++} ++ ++static struct syscore_ops rk3288_clk_syscore_ops = { ++ .suspend = rk3288_clk_suspend, ++ .resume = rk3288_clk_resume, +}; + -+static void __init rk3562_clk_init(struct device_node *np) ++static void rk3288_dump_cru(void) ++{ ++ if (rk3288_cru_base) { ++ pr_warn("CRU:\n"); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rk3288_cru_base, ++ 0x21c, false); ++ } ++} ++ ++static void __init rk3288_common_init(struct device_node *np, ++ enum rk3288_variant soc) +{ + struct rockchip_clk_provider *ctx; -+ void __iomem *reg_base; ++ struct clk **clks; + -+ reg_base = of_iomap(np, 0); -+ if (!reg_base) { ++ rk3288_cru_base = of_iomap(np, 0); ++ if (!rk3288_cru_base) { + pr_err("%s: could not map cru region\n", __func__); + return; + } + -+ rk3562_cru_base = reg_base; -+ -+ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); ++ ctx = rockchip_clk_init(np, rk3288_cru_base, CLK_NR_CLKS); + if (IS_ERR(ctx)) { + pr_err("%s: rockchip clk init failed\n", __func__); -+ iounmap(reg_base); ++ iounmap(rk3288_cru_base); + return; + } ++ clks = ctx->clk_data.clks; + -+ rockchip_clk_register_plls(ctx, rk3562_pll_clks, -+ ARRAY_SIZE(rk3562_pll_clks), -+ RK3562_GRF_SOC_STATUS0); ++ rockchip_clk_register_plls(ctx, rk3288_pll_clks, ++ ARRAY_SIZE(rk3288_pll_clks), ++ RK3288_GRF_SOC_STATUS1); ++ rockchip_clk_register_branches(ctx, rk3288_clk_branches, ++ ARRAY_SIZE(rk3288_clk_branches)); + -+ rockchip_clk_register_branches(ctx, rk3562_clk_branches, -+ ARRAY_SIZE(rk3562_clk_branches)); ++ if (soc == RK3288W_CRU) ++ rockchip_clk_register_branches(ctx, rk3288w_hclkvio_branch, ++ ARRAY_SIZE(rk3288w_hclkvio_branch)); ++ else ++ rockchip_clk_register_branches(ctx, rk3288_hclkvio_branch, ++ ARRAY_SIZE(rk3288_hclkvio_branch)); + -+ /* (0x30444 - 0x400) / 4 + 1 = 49170 */ -+ rockchip_register_softrst(np, 49170, reg_base + RK3562_SOFTRST_CON(0), ++ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", ++ 2, clks[PLL_APLL], clks[PLL_GPLL], ++ &rk3288_cpuclk_data, rk3288_cpuclk_rates, ++ ARRAY_SIZE(rk3288_cpuclk_rates)); ++ ++ rockchip_register_softrst(np, 12, ++ rk3288_cru_base + RK3288_SOFTRST_CON(0), + ROCKCHIP_SOFTRST_HIWORD_MASK); + -+ rockchip_register_restart_notifier(ctx, RK3562_GLB_SRST_FST, NULL); ++ rockchip_register_restart_notifier(ctx, RK3288_GLB_SRST_FST, ++ rk3288_clk_shutdown); ++ ++ if (!psci_smp_available()) ++ register_syscore_ops(&rk3288_clk_syscore_ops); + + rockchip_clk_of_add_provider(np, ctx); + + if (!rk_dump_cru) -+ rk_dump_cru = rk3562_dump_cru; ++ rk_dump_cru = rk3288_dump_cru; ++} + -+ rockchip_clk_protect(ctx, protect_clocks, ARRAY_SIZE(protect_clocks)); ++static void __init rk3288_clk_init(struct device_node *np) ++{ ++ rk3288_common_init(np, RK3288_CRU); +} ++CLK_OF_DECLARE(rk3288_cru, "rockchip,rk3288-cru", rk3288_clk_init); + -+CLK_OF_DECLARE(rk3562_cru, "rockchip,rk3562-cru", rk3562_clk_init); ++static void __init rk3288w_clk_init(struct device_node *np) ++{ ++ rk3288_common_init(np, RK3288W_CRU); ++} ++CLK_OF_DECLARE(rk3288w_cru, "rockchip,rk3288w-cru", rk3288w_clk_init); + -+#ifdef MODULE -+struct clk_rk3562_inits { ++struct clk_rk3288_inits { + void (*inits)(struct device_node *np); +}; + -+static const struct clk_rk3562_inits clk_3562_cru_init = { -+ .inits = rk3562_clk_init, ++static const struct clk_rk3288_inits clk_rk3288_init = { ++ .inits = rk3288_clk_init, +}; + -+static const struct of_device_id clk_rk3562_match_table[] = { ++static const struct clk_rk3288_inits clk_rk3288w_init = { ++ .inits = rk3288w_clk_init, ++}; ++ ++static const struct of_device_id clk_rk3288_match_table[] = { + { -+ .compatible = "rockchip,rk3562-cru", -+ .data = &clk_3562_cru_init, ++ .compatible = "rockchip,rk3288-cru", ++ .data = &clk_rk3288_init, ++ }, { ++ .compatible = "rockchip,rk3288w-cru", ++ .data = &clk_rk3288w_init, + }, + { } +}; -+MODULE_DEVICE_TABLE(of, clk_rk3562_match_table); ++MODULE_DEVICE_TABLE(of, clk_rk3288_match_table); + -+static int clk_rk3562_probe(struct platform_device *pdev) ++static int __init clk_rk3288_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + const struct of_device_id *match; -+ const struct clk_rk3562_inits *init_data; ++ const struct clk_rk3288_inits *init_data; + -+ match = of_match_device(clk_rk3562_match_table, &pdev->dev); ++ match = of_match_device(clk_rk3288_match_table, &pdev->dev); + if (!match || !match->data) + return -EINVAL; + @@ -55565,1697 +55146,956 @@ index 000000000..3c6f78fec + return 0; +} + -+static struct platform_driver clk_rk3562_driver = { -+ .probe = clk_rk3562_probe, ++static struct platform_driver clk_rk3288_driver = { + .driver = { -+ .name = "clk-rk3562", -+ .of_match_table = clk_rk3562_match_table, -+ .suppress_bind_attrs = true, ++ .name = "clk-rk3288", ++ .of_match_table = clk_rk3288_match_table, + }, +}; -+module_platform_driver(clk_rk3562_driver); ++builtin_platform_driver_probe(clk_rk3288_driver, clk_rk3288_probe); + -+MODULE_DESCRIPTION("Rockchip RK3562 Clock Driver"); ++MODULE_DESCRIPTION("Rockchip RK3288 Clock Driver"); +MODULE_LICENSE("GPL"); -+MODULE_ALIAS("platform:clk-rk3562"); -+#endif /* MODULE */ -diff --git a/drivers/clk/rockchip-oh/clk-rk3568.c b/drivers/clk/rockchip-oh/clk-rk3568.c +diff --git a/drivers/clk/rockchip-oh/clk-rk3308.c b/drivers/clk/rockchip-oh/clk-rk3308.c new file mode 100644 -index 000000000..3e6955066 +index 000000000..6a8e38f11 --- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-rk3568.c -@@ -0,0 +1,1775 @@ -+// SPDX-License-Identifier: GPL-2.0 ++++ b/drivers/clk/rockchip-oh/clk-rk3308.c +@@ -0,0 +1,1013 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later +/* -+ * Copyright (c) 2021 Rockchip Electronics Co. Ltd. -+ * Author: Elaine Zhang ++ * Copyright (c) 2019 Rockchip Electronics Co. Ltd. ++ * Author: Finley Xiao + */ + +#include -+#include ++#include +#include -+#include +#include ++#include ++#include ++#include +#include -+#include ++#include +#include "clk.h" + -+#define RK3568_GRF_SOC_CON1 0x504 -+#define RK3568_GRF_SOC_CON2 0x508 -+#define RK3568_GRF_SOC_STATUS0 0x580 -+#define RK3568_PMU_GRF_SOC_CON0 0x100 -+ -+#define RK3568_FRAC_MAX_PRATE 1000000000 -+#define RK3568_SPDIF_FRAC_MAX_PRATE 600000000 -+#define RK3568_UART_FRAC_MAX_PRATE 600000000 -+#define RK3568_DCLK_PARENT_MAX_PRATE 600000000 -+ -+enum rk3568_pmu_plls { -+ ppll, hpll, -+}; ++#define RK3308_GRF_SOC_STATUS0 0x380 + -+enum rk3568_plls { -+ apll, dpll, gpll, cpll, npll, vpll, ++enum rk3308_plls { ++ apll, dpll, vpll0, vpll1, +}; + -+static struct rockchip_pll_rate_table rk3568_pll_rates[] = { ++static struct rockchip_pll_rate_table rk3308_pll_rates[] = { + /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ -+ RK3036_PLL_RATE(2208000000, 1, 92, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2184000000, 1, 91, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2160000000, 1, 90, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2088000000, 1, 87, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2064000000, 1, 86, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2040000000, 1, 85, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2016000000, 1, 84, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1992000000, 1, 83, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1920000000, 1, 80, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1896000000, 1, 79, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1800000000, 1, 75, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1704000000, 1, 71, 1, 1, 1, 0), + RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1600000000, 3, 200, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1584000000, 1, 132, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1560000000, 1, 130, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1536000000, 1, 128, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1512000000, 1, 126, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1488000000, 1, 124, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1464000000, 1, 122, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1440000000, 1, 120, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1416000000, 1, 118, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1400000000, 3, 350, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1392000000, 1, 116, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1368000000, 1, 114, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1344000000, 1, 112, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1320000000, 1, 110, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1296000000, 1, 108, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1272000000, 1, 106, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1248000000, 1, 104, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1200000000, 1, 100, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1188000000, 1, 99, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1104000000, 1, 92, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1100000000, 3, 275, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1488000000, 1, 62, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1464000000, 1, 61, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1440000000, 1, 60, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1392000000, 1, 58, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1368000000, 1, 57, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1344000000, 1, 56, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1320000000, 1, 55, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1272000000, 1, 53, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1248000000, 1, 52, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1188000000, 2, 99, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1100000000, 12, 550, 1, 1, 1, 0), + RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1000000000, 3, 250, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1000000000, 6, 500, 2, 1, 1, 0), ++ RK3036_PLL_RATE(984000000, 1, 82, 2, 1, 1, 0), ++ RK3036_PLL_RATE(960000000, 1, 80, 2, 1, 1, 0), ++ RK3036_PLL_RATE(936000000, 1, 78, 2, 1, 1, 0), + RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), ++ RK3036_PLL_RATE(900000000, 4, 300, 2, 1, 1, 0), ++ RK3036_PLL_RATE(888000000, 1, 74, 2, 1, 1, 0), ++ RK3036_PLL_RATE(864000000, 1, 72, 2, 1, 1, 0), ++ RK3036_PLL_RATE(840000000, 1, 70, 2, 1, 1, 0), + RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), -+ RK3036_PLL_RATE(800000000, 3, 200, 2, 1, 1, 0), -+ RK3036_PLL_RATE(700000000, 3, 350, 4, 1, 1, 0), -+ RK3036_PLL_RATE(696000000, 1, 116, 4, 1, 1, 0), -+ RK3036_PLL_RATE(600000000, 1, 100, 4, 1, 1, 0), -+ RK3036_PLL_RATE(594000000, 1, 99, 4, 1, 1, 0), -+ RK3036_PLL_RATE(500000000, 1, 125, 6, 1, 1, 0), ++ RK3036_PLL_RATE(800000000, 6, 400, 2, 1, 1, 0), ++ RK3036_PLL_RATE(700000000, 6, 350, 2, 1, 1, 0), ++ RK3036_PLL_RATE(696000000, 1, 58, 2, 1, 1, 0), ++ RK3036_PLL_RATE(624000000, 1, 52, 2, 1, 1, 0), ++ RK3036_PLL_RATE(600000000, 1, 75, 3, 1, 1, 0), ++ RK3036_PLL_RATE(594000000, 2, 99, 2, 1, 1, 0), ++ RK3036_PLL_RATE(504000000, 1, 63, 3, 1, 1, 0), ++ RK3036_PLL_RATE(500000000, 6, 250, 2, 1, 1, 0), + RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), -+ RK3036_PLL_RATE(312000000, 1, 78, 6, 1, 1, 0), -+ RK3036_PLL_RATE(297000000, 1, 99, 8, 1, 1, 0), -+ RK3036_PLL_RATE(241500000, 1, 161, 8, 2, 1, 0), ++ RK3036_PLL_RATE(312000000, 1, 52, 2, 2, 1, 0), + RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), -+ RK3036_PLL_RATE(200000000, 1, 100, 3, 4, 1, 0), -+ RK3036_PLL_RATE(148500000, 1, 99, 4, 4, 1, 0), -+ RK3036_PLL_RATE(135000000, 1, 45, 8, 1, 1, 0), -+ RK3036_PLL_RATE(119000000, 3, 119, 4, 2, 1, 0), -+ RK3036_PLL_RATE(108000000, 1, 45, 8, 1, 1, 0), -+ RK3036_PLL_RATE(100000000, 1, 150, 6, 6, 1, 0), -+ RK3036_PLL_RATE(96000000, 1, 96, 6, 4, 1, 0), -+ RK3036_PLL_RATE(78750000, 1, 96, 6, 4, 1, 0), -+ RK3036_PLL_RATE(74250000, 2, 99, 4, 4, 1, 0), ++ RK3036_PLL_RATE(96000000, 1, 64, 4, 4, 1, 0), + { /* sentinel */ }, +}; + -+#define RK3568_DIV_ATCLK_CORE_MASK 0x1f -+#define RK3568_DIV_ATCLK_CORE_SHIFT 0 -+#define RK3568_DIV_GICCLK_CORE_MASK 0x1f -+#define RK3568_DIV_GICCLK_CORE_SHIFT 8 -+#define RK3568_DIV_PCLK_CORE_MASK 0x1f -+#define RK3568_DIV_PCLK_CORE_SHIFT 0 -+#define RK3568_DIV_PERIPHCLK_CORE_MASK 0x1f -+#define RK3568_DIV_PERIPHCLK_CORE_SHIFT 8 -+#define RK3568_DIV_ACLK_CORE_MASK 0x1f -+#define RK3568_DIV_ACLK_CORE_SHIFT 8 -+ -+#define RK3568_DIV_SCLK_CORE_MASK 0xf -+#define RK3568_DIV_SCLK_CORE_SHIFT 0 -+#define RK3568_MUX_SCLK_CORE_MASK 0x3 -+#define RK3568_MUX_SCLK_CORE_SHIFT 8 -+#define RK3568_MUX_SCLK_CORE_NPLL_MASK 0x1 -+#define RK3568_MUX_SCLK_CORE_NPLL_SHIFT 15 -+#define RK3568_MUX_CLK_CORE_APLL_MASK 0x1 -+#define RK3568_MUX_CLK_CORE_APLL_SHIFT 7 -+#define RK3568_MUX_CLK_PVTPLL_MASK 0x1 -+#define RK3568_MUX_CLK_PVTPLL_SHIFT 15 -+ -+#define RK3568_CLKSEL1(_sclk_core) \ -+{ \ -+ .reg = RK3568_CLKSEL_CON(2), \ -+ .val = HIWORD_UPDATE(_sclk_core, RK3568_MUX_SCLK_CORE_NPLL_MASK, \ -+ RK3568_MUX_SCLK_CORE_NPLL_SHIFT) | \ -+ HIWORD_UPDATE(_sclk_core, RK3568_MUX_SCLK_CORE_MASK, \ -+ RK3568_MUX_SCLK_CORE_SHIFT) | \ -+ HIWORD_UPDATE(1, RK3568_DIV_SCLK_CORE_MASK, \ -+ RK3568_DIV_SCLK_CORE_SHIFT), \ -+} -+ -+#define RK3568_CLKSEL2(_aclk_core) \ -+{ \ -+ .reg = RK3568_CLKSEL_CON(5), \ -+ .val = HIWORD_UPDATE(_aclk_core, RK3568_DIV_ACLK_CORE_MASK, \ -+ RK3568_DIV_ACLK_CORE_SHIFT), \ -+} -+ -+#define RK3568_CLKSEL3(_atclk_core, _gic_core) \ -+{ \ -+ .reg = RK3568_CLKSEL_CON(3), \ -+ .val = HIWORD_UPDATE(_atclk_core, RK3568_DIV_ATCLK_CORE_MASK, \ -+ RK3568_DIV_ATCLK_CORE_SHIFT) | \ -+ HIWORD_UPDATE(_gic_core, RK3568_DIV_GICCLK_CORE_MASK, \ -+ RK3568_DIV_GICCLK_CORE_SHIFT), \ -+} ++#define RK3308_DIV_ACLKM_MASK 0x7 ++#define RK3308_DIV_ACLKM_SHIFT 12 ++#define RK3308_DIV_PCLK_DBG_MASK 0xf ++#define RK3308_DIV_PCLK_DBG_SHIFT 8 + -+#define RK3568_CLKSEL4(_pclk_core, _periph_core) \ -+{ \ -+ .reg = RK3568_CLKSEL_CON(4), \ -+ .val = HIWORD_UPDATE(_pclk_core, RK3568_DIV_PCLK_CORE_MASK, \ -+ RK3568_DIV_PCLK_CORE_SHIFT) | \ -+ HIWORD_UPDATE(_periph_core, RK3568_DIV_PERIPHCLK_CORE_MASK, \ -+ RK3568_DIV_PERIPHCLK_CORE_SHIFT), \ ++#define RK3308_CLKSEL0(_aclk_core, _pclk_dbg) \ ++{ \ ++ .reg = RK3308_CLKSEL_CON(0), \ ++ .val = HIWORD_UPDATE(_aclk_core, RK3308_DIV_ACLKM_MASK, \ ++ RK3308_DIV_ACLKM_SHIFT) | \ ++ HIWORD_UPDATE(_pclk_dbg, RK3308_DIV_PCLK_DBG_MASK, \ ++ RK3308_DIV_PCLK_DBG_SHIFT), \ +} + -+#define RK3568_CPUCLK_RATE(_prate, _sclk, _acore, _atcore, _gicclk, _pclk, _periph) \ -+{ \ -+ .prate = _prate##U, \ -+ .divs = { \ -+ RK3568_CLKSEL1(_sclk), \ -+ RK3568_CLKSEL2(_acore), \ -+ RK3568_CLKSEL3(_atcore, _gicclk), \ -+ RK3568_CLKSEL4(_pclk, _periph), \ -+ }, \ ++#define RK3308_CPUCLK_RATE(_prate, _aclk_core, _pclk_dbg) \ ++{ \ ++ .prate = _prate, \ ++ .divs = { \ ++ RK3308_CLKSEL0(_aclk_core, _pclk_dbg), \ ++ }, \ +} + -+static struct rockchip_cpuclk_rate_table rk3568_cpuclk_rates[] __initdata = { -+ RK3568_CPUCLK_RATE(1800000000, 0, 1, 7, 7, 7, 7), -+ RK3568_CPUCLK_RATE(1704000000, 0, 1, 7, 7, 7, 7), -+ RK3568_CPUCLK_RATE(1608000000, 0, 1, 5, 5, 5, 5), -+ RK3568_CPUCLK_RATE(1584000000, 0, 1, 5, 5, 5, 5), -+ RK3568_CPUCLK_RATE(1560000000, 0, 1, 5, 5, 5, 5), -+ RK3568_CPUCLK_RATE(1536000000, 0, 1, 5, 5, 5, 5), -+ RK3568_CPUCLK_RATE(1512000000, 0, 1, 5, 5, 5, 5), -+ RK3568_CPUCLK_RATE(1488000000, 0, 1, 5, 5, 5, 5), -+ RK3568_CPUCLK_RATE(1464000000, 0, 1, 5, 5, 5, 5), -+ RK3568_CPUCLK_RATE(1440000000, 0, 1, 5, 5, 5, 5), -+ RK3568_CPUCLK_RATE(1416000000, 0, 1, 5, 5, 5, 5), -+ RK3568_CPUCLK_RATE(1392000000, 0, 1, 5, 5, 5, 5), -+ RK3568_CPUCLK_RATE(1368000000, 0, 1, 5, 5, 5, 5), -+ RK3568_CPUCLK_RATE(1344000000, 0, 1, 5, 5, 5, 5), -+ RK3568_CPUCLK_RATE(1320000000, 0, 1, 5, 5, 5, 5), -+ RK3568_CPUCLK_RATE(1296000000, 0, 1, 5, 5, 5, 5), -+ RK3568_CPUCLK_RATE(1272000000, 0, 1, 5, 5, 5, 5), -+ RK3568_CPUCLK_RATE(1248000000, 0, 1, 5, 5, 5, 5), -+ RK3568_CPUCLK_RATE(1224000000, 0, 1, 5, 5, 5, 5), -+ RK3568_CPUCLK_RATE(1200000000, 0, 1, 3, 3, 3, 3), -+ RK3568_CPUCLK_RATE(1104000000, 0, 1, 3, 3, 3, 3), -+ RK3568_CPUCLK_RATE(1008000000, 0, 1, 3, 3, 3, 3), -+ RK3568_CPUCLK_RATE(912000000, 0, 1, 3, 3, 3, 3), -+ RK3568_CPUCLK_RATE(816000000, 0, 1, 3, 3, 3, 3), -+ RK3568_CPUCLK_RATE(696000000, 0, 1, 3, 3, 3, 3), -+ RK3568_CPUCLK_RATE(600000000, 0, 1, 3, 3, 3, 3), -+ RK3568_CPUCLK_RATE(408000000, 0, 1, 3, 3, 3, 3), -+ RK3568_CPUCLK_RATE(312000000, 0, 1, 3, 3, 3, 3), -+ RK3568_CPUCLK_RATE(216000000, 0, 1, 3, 3, 3, 3), -+ RK3568_CPUCLK_RATE(96000000, 0, 1, 3, 3, 3, 3), ++static struct rockchip_cpuclk_rate_table rk3308_cpuclk_rates[] __initdata = { ++ RK3308_CPUCLK_RATE(1608000000, 1, 7), ++ RK3308_CPUCLK_RATE(1512000000, 1, 7), ++ RK3308_CPUCLK_RATE(1488000000, 1, 5), ++ RK3308_CPUCLK_RATE(1416000000, 1, 5), ++ RK3308_CPUCLK_RATE(1392000000, 1, 5), ++ RK3308_CPUCLK_RATE(1296000000, 1, 5), ++ RK3308_CPUCLK_RATE(1200000000, 1, 5), ++ RK3308_CPUCLK_RATE(1104000000, 1, 5), ++ RK3308_CPUCLK_RATE(1008000000, 1, 5), ++ RK3308_CPUCLK_RATE(912000000, 1, 5), ++ RK3308_CPUCLK_RATE(816000000, 1, 3), ++ RK3308_CPUCLK_RATE(696000000, 1, 3), ++ RK3308_CPUCLK_RATE(600000000, 1, 3), ++ RK3308_CPUCLK_RATE(408000000, 1, 1), ++ RK3308_CPUCLK_RATE(312000000, 1, 1), ++ RK3308_CPUCLK_RATE(216000000, 1, 1), ++ RK3308_CPUCLK_RATE(96000000, 1, 1), +}; + -+static const struct rockchip_cpuclk_reg_data rk3568_cpuclk_data = { -+ .core_reg[0] = RK3568_CLKSEL_CON(0), ++static const struct rockchip_cpuclk_reg_data rk3308_cpuclk_data = { ++ .core_reg[0] = RK3308_CLKSEL_CON(0), + .div_core_shift[0] = 0, -+ .div_core_mask[0] = 0x1f, -+ .core_reg[1] = RK3568_CLKSEL_CON(0), -+ .div_core_shift[1] = 8, -+ .div_core_mask[1] = 0x1f, -+ .core_reg[2] = RK3568_CLKSEL_CON(1), -+ .div_core_shift[2] = 0, -+ .div_core_mask[2] = 0x1f, -+ .core_reg[3] = RK3568_CLKSEL_CON(1), -+ .div_core_shift[3] = 8, -+ .div_core_mask[3] = 0x1f, -+ .num_cores = 4, ++ .div_core_mask[0] = 0xf, ++ .num_cores = 1, + .mux_core_alt = 1, + .mux_core_main = 0, + .mux_core_shift = 6, -+ .mux_core_mask = 0x1, ++ .mux_core_mask = 0x3, +}; + -+PNAME(mux_pll_p) = { "xin24m" }; -+PNAME(mux_usb480m_p) = { "xin24m", "usb480m_phy", "clk_rtc_32k" }; -+PNAME(clk_i2s0_8ch_tx_p) = { "clk_i2s0_8ch_tx_src", "clk_i2s0_8ch_tx_frac", "i2s0_mclkin", "xin_osc0_half" }; -+PNAME(clk_i2s0_8ch_rx_p) = { "clk_i2s0_8ch_rx_src", "clk_i2s0_8ch_rx_frac", "i2s0_mclkin", "xin_osc0_half" }; -+PNAME(clk_i2s1_8ch_tx_p) = { "clk_i2s1_8ch_tx_src", "clk_i2s1_8ch_tx_frac", "i2s1_mclkin", "xin_osc0_half" }; -+PNAME(clk_i2s1_8ch_rx_p) = { "clk_i2s1_8ch_rx_src", "clk_i2s1_8ch_rx_frac", "i2s1_mclkin", "xin_osc0_half" }; -+PNAME(clk_i2s2_2ch_p) = { "clk_i2s2_2ch_src", "clk_i2s2_2ch_frac", "i2s2_mclkin", "xin_osc0_half "}; -+PNAME(clk_i2s3_2ch_tx_p) = { "clk_i2s3_2ch_tx_src", "clk_i2s3_2ch_tx_frac", "i2s3_mclkin", "xin_osc0_half" }; -+PNAME(clk_i2s3_2ch_rx_p) = { "clk_i2s3_2ch_rx_src", "clk_i2s3_2ch_rx_frac", "i2s3_mclkin", "xin_osc0_half" }; -+PNAME(mclk_spdif_8ch_p) = { "mclk_spdif_8ch_src", "mclk_spdif_8ch_frac" }; -+PNAME(sclk_audpwm_p) = { "sclk_audpwm_src", "sclk_audpwm_frac" }; -+PNAME(sclk_uart1_p) = { "clk_uart1_src", "clk_uart1_frac", "xin24m" }; -+PNAME(sclk_uart2_p) = { "clk_uart2_src", "clk_uart2_frac", "xin24m" }; -+PNAME(sclk_uart3_p) = { "clk_uart3_src", "clk_uart3_frac", "xin24m" }; -+PNAME(sclk_uart4_p) = { "clk_uart4_src", "clk_uart4_frac", "xin24m" }; -+PNAME(sclk_uart5_p) = { "clk_uart5_src", "clk_uart5_frac", "xin24m" }; -+PNAME(sclk_uart6_p) = { "clk_uart6_src", "clk_uart6_frac", "xin24m" }; -+PNAME(sclk_uart7_p) = { "clk_uart7_src", "clk_uart7_frac", "xin24m" }; -+PNAME(sclk_uart8_p) = { "clk_uart8_src", "clk_uart8_frac", "xin24m" }; -+PNAME(sclk_uart9_p) = { "clk_uart9_src", "clk_uart9_frac", "xin24m" }; -+PNAME(sclk_uart0_p) = { "sclk_uart0_div", "sclk_uart0_frac", "xin24m" }; -+PNAME(clk_rtc32k_pmu_p) = { "clk_32k_pvtm", "xin32k", "clk_rtc32k_frac" }; -+PNAME(mpll_gpll_cpll_npll_p) = { "mpll", "gpll", "cpll", "npll" }; -+PNAME(gpll_cpll_npll_p) = { "gpll", "cpll", "npll" }; -+PNAME(npll_gpll_p) = { "npll", "gpll" }; -+PNAME(cpll_gpll_p) = { "cpll", "gpll" }; -+PNAME(gpll_cpll_p) = { "gpll", "cpll" }; -+PNAME(gpll_cpll_npll_vpll_p) = { "gpll", "cpll", "npll", "vpll" }; -+PNAME(apll_gpll_npll_p) = { "apll", "gpll", "npll" }; -+PNAME(sclk_core_pre_p) = { "sclk_core_src", "npll" }; -+PNAME(gpll150_gpll100_gpll75_xin24m_p) = { "gpll_150m", "gpll_100m", "gpll_75m", "xin24m" }; -+PNAME(clk_gpu_pre_mux_p) = { "clk_gpu_src", "gpu_pvtpll_out" }; -+PNAME(clk_npu_pre_ndft_p) = { "clk_npu_src", "clk_npu_np5"}; -+PNAME(clk_npu_p) = { "clk_npu_pre_ndft", "npu_pvtpll_out" }; -+PNAME(dpll_gpll_cpll_p) = { "dpll", "gpll", "cpll" }; -+PNAME(clk_ddr1x_p) = { "clk_ddrphy1x_src", "dpll" }; -+PNAME(gpll200_gpll150_gpll100_xin24m_p) = { "gpll_200m", "gpll_150m", "gpll_100m", "xin24m" }; -+PNAME(gpll100_gpll75_gpll50_p) = { "gpll_100m", "gpll_75m", "cpll_50m" }; -+PNAME(i2s0_mclkout_tx_p) = { "mclk_i2s0_8ch_tx", "xin_osc0_half" }; -+PNAME(i2s0_mclkout_rx_p) = { "mclk_i2s0_8ch_rx", "xin_osc0_half" }; -+PNAME(i2s1_mclkout_tx_p) = { "mclk_i2s1_8ch_tx", "xin_osc0_half" }; -+PNAME(i2s1_mclkout_rx_p) = { "mclk_i2s1_8ch_rx", "xin_osc0_half" }; -+PNAME(i2s2_mclkout_p) = { "mclk_i2s2_2ch", "xin_osc0_half" }; -+PNAME(i2s3_mclkout_tx_p) = { "mclk_i2s3_2ch_tx", "xin_osc0_half" }; -+PNAME(i2s3_mclkout_rx_p) = { "mclk_i2s3_2ch_rx", "xin_osc0_half" }; -+PNAME(mclk_pdm_p) = { "gpll_300m", "cpll_250m", "gpll_200m", "gpll_100m" }; -+PNAME(clk_i2c_p) = { "gpll_200m", "gpll_100m", "xin24m", "cpll_100m" }; -+PNAME(gpll200_gpll150_gpll100_p) = { "gpll_200m", "gpll_150m", "gpll_100m" }; -+PNAME(gpll300_gpll200_gpll100_p) = { "gpll_300m", "gpll_200m", "gpll_100m" }; -+PNAME(clk_nandc_p) = { "gpll_200m", "gpll_150m", "cpll_100m", "xin24m" }; -+PNAME(sclk_sfc_p) = { "xin24m", "cpll_50m", "gpll_75m", "gpll_100m", "cpll_125m", "gpll_150m" }; -+PNAME(gpll200_gpll150_cpll125_p) = { "gpll_200m", "gpll_150m", "cpll_125m" }; -+PNAME(cclk_emmc_p) = { "xin24m", "gpll_200m", "gpll_150m", "cpll_100m", "cpll_50m", "clk_osc0_div_375k" }; -+PNAME(aclk_pipe_p) = { "gpll_400m", "gpll_300m", "gpll_200m", "xin24m" }; -+PNAME(gpll200_cpll125_p) = { "gpll_200m", "cpll_125m" }; -+PNAME(gpll300_gpll200_gpll100_xin24m_p) = { "gpll_300m", "gpll_200m", "gpll_100m", "xin24m" }; -+PNAME(clk_sdmmc_p) = { "xin24m", "gpll_400m", "gpll_300m", "cpll_100m", "cpll_50m", "clk_osc0_div_750k" }; -+PNAME(cpll125_cpll50_cpll25_xin24m_p) = { "cpll_125m", "cpll_50m", "cpll_25m", "xin24m" }; -+PNAME(clk_gmac_ptp_p) = { "cpll_62p5", "gpll_100m", "cpll_50m", "xin24m" }; -+PNAME(cpll333_gpll300_gpll200_p) = { "cpll_333m", "gpll_300m", "gpll_200m" }; -+PNAME(cpll_gpll_hpll_p) = { "cpll", "gpll", "hpll" }; -+PNAME(gpll_usb480m_xin24m_p) = { "gpll", "usb480m", "xin24m", "xin24m" }; -+PNAME(gpll300_cpll250_gpll100_xin24m_p) = { "gpll_300m", "cpll_250m", "gpll_100m", "xin24m" }; -+PNAME(cpll_gpll_hpll_vpll_p) = { "cpll", "gpll", "hpll", "vpll" }; -+PNAME(hpll_vpll_gpll_cpll_p) = { "hpll", "vpll", "gpll", "cpll" }; -+PNAME(gpll400_cpll333_gpll200_p) = { "gpll_400m", "cpll_333m", "gpll_200m" }; -+PNAME(gpll100_gpll75_cpll50_xin24m_p) = { "gpll_100m", "gpll_75m", "cpll_50m", "xin24m" }; -+PNAME(xin24m_gpll100_cpll100_p) = { "xin24m", "gpll_100m", "cpll_100m" }; -+PNAME(gpll_cpll_usb480m_p) = { "gpll", "cpll", "usb480m" }; -+PNAME(gpll100_xin24m_cpll100_p) = { "gpll_100m", "xin24m", "cpll_100m" }; -+PNAME(gpll200_xin24m_cpll100_p) = { "gpll_200m", "xin24m", "cpll_100m" }; -+PNAME(xin24m_32k_p) = { "xin24m", "clk_rtc_32k" }; -+PNAME(cpll500_gpll400_gpll300_xin24m_p) = { "cpll_500m", "gpll_400m", "gpll_300m", "xin24m" }; -+PNAME(gpll400_gpll300_gpll200_xin24m_p) = { "gpll_400m", "gpll_300m", "gpll_200m", "xin24m" }; -+PNAME(xin24m_cpll100_p) = { "xin24m", "cpll_100m" }; -+PNAME(ppll_usb480m_cpll_gpll_p) = { "ppll", "usb480m", "cpll", "gpll"}; -+PNAME(clk_usbphy0_ref_p) = { "clk_ref24m", "xin_osc0_usbphy0_g" }; -+PNAME(clk_usbphy1_ref_p) = { "clk_ref24m", "xin_osc0_usbphy1_g" }; -+PNAME(clk_mipidsiphy0_ref_p) = { "clk_ref24m", "xin_osc0_mipidsiphy0_g" }; -+PNAME(clk_mipidsiphy1_ref_p) = { "clk_ref24m", "xin_osc0_mipidsiphy1_g" }; -+PNAME(clk_wifi_p) = { "clk_wifi_osc0", "clk_wifi_div" }; -+PNAME(clk_pciephy0_ref_p) = { "clk_pciephy0_osc0", "clk_pciephy0_div" }; -+PNAME(clk_pciephy1_ref_p) = { "clk_pciephy1_osc0", "clk_pciephy1_div" }; -+PNAME(clk_pciephy2_ref_p) = { "clk_pciephy2_osc0", "clk_pciephy2_div" }; -+PNAME(mux_gmac0_p) = { "clk_mac0_2top", "gmac0_clkin" }; -+PNAME(mux_gmac0_rgmii_speed_p) = { "clk_gmac0", "clk_gmac0", "clk_gmac0_tx_div50", "clk_gmac0_tx_div5" }; -+PNAME(mux_gmac0_rmii_speed_p) = { "clk_gmac0_rx_div20", "clk_gmac0_rx_div2" }; -+PNAME(mux_gmac0_rx_tx_p) = { "clk_gmac0_rgmii_speed", "clk_gmac0_rmii_speed", "clk_gmac0_xpcs_mii" }; -+PNAME(mux_gmac1_p) = { "clk_mac1_2top", "gmac1_clkin" }; -+PNAME(mux_gmac1_rgmii_speed_p) = { "clk_gmac1", "clk_gmac1", "clk_gmac1_tx_div50", "clk_gmac1_tx_div5" }; -+PNAME(mux_gmac1_rmii_speed_p) = { "clk_gmac1_rx_div20", "clk_gmac1_rx_div2" }; -+PNAME(mux_gmac1_rx_tx_p) = { "clk_gmac1_rgmii_speed", "clk_gmac1_rmii_speed", "clk_gmac1_xpcs_mii" }; -+PNAME(clk_hdmi_ref_p) = { "hpll", "hpll_ph0" }; -+PNAME(clk_pdpmu_p) = { "ppll", "gpll" }; -+PNAME(clk_mac_2top_p) = { "cpll_125m", "cpll_50m", "cpll_25m", "ppll" }; -+PNAME(clk_pwm0_p) = { "xin24m", "clk_pdpmu" }; -+PNAME(aclk_rkvdec_pre_p) = { "gpll", "cpll" }; -+PNAME(clk_rkvdec_core_p) = { "gpll", "cpll", "dummy_npll", "dummy_vpll" }; -+PNAME(clk_32k_ioe_p) = { "clk_rtc_32k", "xin32k" }; -+PNAME(i2s1_mclkout_p) = { "i2s1_mclkout_rx", "i2s1_mclkout_tx" }; -+PNAME(i2s3_mclkout_p) = { "i2s3_mclkout_rx", "i2s3_mclkout_tx" }; -+PNAME(i2s1_mclk_rx_ioe_p) = { "i2s1_mclkin_rx", "i2s1_mclkout_rx" }; -+PNAME(i2s1_mclk_tx_ioe_p) = { "i2s1_mclkin_tx", "i2s1_mclkout_tx" }; -+PNAME(i2s2_mclk_ioe_p) = { "i2s2_mclkin", "i2s2_mclkout" }; -+PNAME(i2s3_mclk_ioe_p) = { "i2s3_mclkin", "i2s3_mclkout" }; -+ -+static struct rockchip_pll_clock rk3568_pmu_pll_clks[] __initdata = { -+ [ppll] = PLL(pll_rk3328, PLL_PPLL, "ppll", mux_pll_p, -+ 0, RK3568_PMU_PLL_CON(0), -+ RK3568_PMU_MODE_CON0, 0, 4, 0, rk3568_pll_rates), -+ [hpll] = PLL(pll_rk3328, PLL_HPLL, "hpll", mux_pll_p, -+ 0, RK3568_PMU_PLL_CON(16), -+ RK3568_PMU_MODE_CON0, 2, 7, 0, rk3568_pll_rates), -+}; ++PNAME(mux_pll_p) = { "xin24m" }; ++PNAME(mux_usb480m_p) = { "xin24m", "usb480m_phy", "clk_rtc32k" }; ++PNAME(mux_dpll_vpll0_p) = { "dpll", "vpll0" }; ++PNAME(mux_dpll_vpll0_xin24m_p) = { "dpll", "vpll0", "xin24m" }; ++PNAME(mux_dpll_vpll0_vpll1_p) = { "dpll", "vpll0", "vpll1" }; ++PNAME(mux_dpll_vpll0_vpll1_xin24m_p) = { "dpll", "vpll0", "vpll1", "xin24m" }; ++PNAME(mux_dpll_vpll0_vpll1_usb480m_xin24m_p) = { "dpll", "vpll0", "vpll1", "usb480m", "xin24m" }; ++PNAME(mux_vpll0_vpll1_p) = { "vpll0", "vpll1" }; ++PNAME(mux_vpll0_vpll1_xin24m_p) = { "vpll0", "vpll1", "xin24m" }; ++PNAME(mux_uart0_p) = { "clk_uart0_src", "dummy", "clk_uart0_frac" }; ++PNAME(mux_uart1_p) = { "clk_uart1_src", "dummy", "clk_uart1_frac" }; ++PNAME(mux_uart2_p) = { "clk_uart2_src", "dummy", "clk_uart2_frac" }; ++PNAME(mux_uart3_p) = { "clk_uart3_src", "dummy", "clk_uart3_frac" }; ++PNAME(mux_uart4_p) = { "clk_uart4_src", "dummy", "clk_uart4_frac" }; ++PNAME(mux_dclk_vop_p) = { "dclk_vop_src", "dclk_vop_frac", "xin24m" }; ++PNAME(mux_nandc_p) = { "clk_nandc_div", "clk_nandc_div50" }; ++PNAME(mux_sdmmc_p) = { "clk_sdmmc_div", "clk_sdmmc_div50" }; ++PNAME(mux_sdio_p) = { "clk_sdio_div", "clk_sdio_div50" }; ++PNAME(mux_emmc_p) = { "clk_emmc_div", "clk_emmc_div50" }; ++PNAME(mux_mac_p) = { "clk_mac_src", "mac_clkin" }; ++PNAME(mux_mac_rmii_sel_p) = { "clk_mac_rx_tx_div20", "clk_mac_rx_tx_div2" }; ++PNAME(mux_ddrstdby_p) = { "clk_ddrphy1x_out", "clk_ddr_stdby_div4" }; ++PNAME(mux_rtc32k_p) = { "xin32k", "clk_pvtm_32k", "clk_rtc32k_frac", "clk_rtc32k_div" }; ++PNAME(mux_usbphy_ref_p) = { "xin24m", "clk_usbphy_ref_src" }; ++PNAME(mux_wifi_src_p) = { "clk_wifi_dpll", "clk_wifi_vpll0" }; ++PNAME(mux_wifi_p) = { "clk_wifi_osc", "clk_wifi_src" }; ++PNAME(mux_pdm_p) = { "clk_pdm_src", "clk_pdm_frac" }; ++PNAME(mux_i2s0_8ch_tx_p) = { "clk_i2s0_8ch_tx_src", "clk_i2s0_8ch_tx_frac", "mclk_i2s0_8ch_in" }; ++PNAME(mux_i2s0_8ch_tx_rx_p) = { "clk_i2s0_8ch_tx_mux", "clk_i2s0_8ch_rx_mux"}; ++PNAME(mux_i2s0_8ch_tx_out_p) = { "clk_i2s0_8ch_tx", "xin12m" }; ++PNAME(mux_i2s0_8ch_rx_p) = { "clk_i2s0_8ch_rx_src", "clk_i2s0_8ch_rx_frac", "mclk_i2s0_8ch_in" }; ++PNAME(mux_i2s0_8ch_rx_tx_p) = { "clk_i2s0_8ch_rx_mux", "clk_i2s0_8ch_tx_mux"}; ++PNAME(mux_i2s1_8ch_tx_p) = { "clk_i2s1_8ch_tx_src", "clk_i2s1_8ch_tx_frac", "mclk_i2s1_8ch_in" }; ++PNAME(mux_i2s1_8ch_tx_rx_p) = { "clk_i2s1_8ch_tx_mux", "clk_i2s1_8ch_rx_mux"}; ++PNAME(mux_i2s1_8ch_tx_out_p) = { "clk_i2s1_8ch_tx", "xin12m" }; ++PNAME(mux_i2s1_8ch_rx_p) = { "clk_i2s1_8ch_rx_src", "clk_i2s1_8ch_rx_frac", "mclk_i2s1_8ch_in" }; ++PNAME(mux_i2s1_8ch_rx_tx_p) = { "clk_i2s1_8ch_rx_mux", "clk_i2s1_8ch_tx_mux"}; ++PNAME(mux_i2s2_8ch_tx_p) = { "clk_i2s2_8ch_tx_src", "clk_i2s2_8ch_tx_frac", "mclk_i2s2_8ch_in" }; ++PNAME(mux_i2s2_8ch_tx_rx_p) = { "clk_i2s2_8ch_tx_mux", "clk_i2s2_8ch_rx_mux"}; ++PNAME(mux_i2s2_8ch_tx_out_p) = { "clk_i2s2_8ch_tx", "xin12m" }; ++PNAME(mux_i2s2_8ch_rx_p) = { "clk_i2s2_8ch_rx_src", "clk_i2s2_8ch_rx_frac", "mclk_i2s2_8ch_in" }; ++PNAME(mux_i2s2_8ch_rx_tx_p) = { "clk_i2s2_8ch_rx_mux", "clk_i2s2_8ch_tx_mux"}; ++PNAME(mux_i2s3_8ch_tx_p) = { "clk_i2s3_8ch_tx_src", "clk_i2s3_8ch_tx_frac", "mclk_i2s3_8ch_in" }; ++PNAME(mux_i2s3_8ch_tx_rx_p) = { "clk_i2s3_8ch_tx_mux", "clk_i2s3_8ch_rx_mux"}; ++PNAME(mux_i2s3_8ch_tx_out_p) = { "clk_i2s3_8ch_tx", "xin12m" }; ++PNAME(mux_i2s3_8ch_rx_p) = { "clk_i2s3_8ch_rx_src", "clk_i2s3_8ch_rx_frac", "mclk_i2s3_8ch_in" }; ++PNAME(mux_i2s3_8ch_rx_tx_p) = { "clk_i2s3_8ch_rx_mux", "clk_i2s3_8ch_tx_mux"}; ++PNAME(mux_i2s0_2ch_p) = { "clk_i2s0_2ch_src", "clk_i2s0_2ch_frac", "mclk_i2s0_2ch_in" }; ++PNAME(mux_i2s0_2ch_out_p) = { "clk_i2s0_2ch", "xin12m" }; ++PNAME(mux_i2s1_2ch_p) = { "clk_i2s1_2ch_src", "clk_i2s1_2ch_frac", "mclk_i2s1_2ch_in"}; ++PNAME(mux_i2s1_2ch_out_p) = { "clk_i2s1_2ch", "xin12m" }; ++PNAME(mux_spdif_tx_src_p) = { "clk_spdif_tx_div", "clk_spdif_tx_div50" }; ++PNAME(mux_spdif_tx_p) = { "clk_spdif_tx_src", "clk_spdif_tx_frac", "mclk_i2s0_2ch_in" }; ++PNAME(mux_spdif_rx_src_p) = { "clk_spdif_rx_div", "clk_spdif_rx_div50" }; ++PNAME(mux_spdif_rx_p) = { "clk_spdif_rx_src", "clk_spdif_rx_frac" }; ++PNAME(mux_uart_src_p) = { "usb480m", "xin24m", "dpll", "vpll0", "vpll1" }; ++static u32 uart_src_mux_idx[] = { 3, 4, 0, 1, 2 }; + -+static struct rockchip_pll_clock rk3568_pll_clks[] __initdata = { ++static struct rockchip_pll_clock rk3308_pll_clks[] __initdata = { + [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, -+ 0, RK3568_PLL_CON(0), -+ RK3568_MODE_CON0, 0, 0, 0, rk3568_pll_rates), ++ 0, RK3308_PLL_CON(0), ++ RK3308_MODE_CON, 0, 0, 0, rk3308_pll_rates), + [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p, -+ 0, RK3568_PLL_CON(8), -+ RK3568_MODE_CON0, 2, 1, 0, NULL), -+ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, -+ 0, RK3568_PLL_CON(24), -+ RK3568_MODE_CON0, 4, 2, 0, rk3568_pll_rates), -+ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, -+ 0, RK3568_PLL_CON(16), -+ RK3568_MODE_CON0, 6, 3, 0, rk3568_pll_rates), -+ [npll] = PLL(pll_rk3328, PLL_NPLL, "npll", mux_pll_p, -+ CLK_IS_CRITICAL, RK3568_PLL_CON(32), -+ RK3568_MODE_CON0, 10, 5, 0, rk3568_pll_rates), -+ [vpll] = PLL(pll_rk3328, PLL_VPLL, "vpll", mux_pll_p, -+ 0, RK3568_PLL_CON(40), -+ RK3568_MODE_CON0, 12, 6, 0, rk3568_pll_rates), ++ 0, RK3308_PLL_CON(8), ++ RK3308_MODE_CON, 2, 1, 0, rk3308_pll_rates), ++ [vpll0] = PLL(pll_rk3328, PLL_VPLL0, "vpll0", mux_pll_p, ++ 0, RK3308_PLL_CON(16), ++ RK3308_MODE_CON, 4, 2, 0, rk3308_pll_rates), ++ [vpll1] = PLL(pll_rk3328, PLL_VPLL1, "vpll1", mux_pll_p, ++ 0, RK3308_PLL_CON(24), ++ RK3308_MODE_CON, 6, 3, 0, rk3308_pll_rates), +}; + +#define MFLAGS CLK_MUX_HIWORD_MASK +#define DFLAGS CLK_DIVIDER_HIWORD_MASK +#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) + -+static struct rockchip_clk_branch rk3568_i2s0_8ch_tx_fracmux __initdata = -+ MUX(CLK_I2S0_8CH_TX, "clk_i2s0_8ch_tx", clk_i2s0_8ch_tx_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(11), 10, 2, MFLAGS); ++static struct rockchip_clk_branch rk3308_uart0_fracmux __initdata = ++ MUX(0, "clk_uart0_mux", mux_uart0_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(11), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3568_i2s0_8ch_rx_fracmux __initdata = -+ MUX(CLK_I2S0_8CH_RX, "clk_i2s0_8ch_rx", clk_i2s0_8ch_rx_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(13), 10, 2, MFLAGS); ++static struct rockchip_clk_branch rk3308_uart1_fracmux __initdata = ++ MUX(0, "clk_uart1_mux", mux_uart1_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(14), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3568_i2s1_8ch_tx_fracmux __initdata = -+ MUX(CLK_I2S1_8CH_TX, "clk_i2s1_8ch_tx", clk_i2s1_8ch_tx_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(15), 10, 2, MFLAGS); ++static struct rockchip_clk_branch rk3308_uart2_fracmux __initdata = ++ MUX(0, "clk_uart2_mux", mux_uart2_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(17), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3568_i2s1_8ch_rx_fracmux __initdata = -+ MUX(CLK_I2S1_8CH_RX, "clk_i2s1_8ch_rx", clk_i2s1_8ch_rx_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(17), 10, 2, MFLAGS); ++static struct rockchip_clk_branch rk3308_uart3_fracmux __initdata = ++ MUX(0, "clk_uart3_mux", mux_uart3_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(20), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3568_i2s2_2ch_fracmux __initdata = -+ MUX(CLK_I2S2_2CH, "clk_i2s2_2ch", clk_i2s2_2ch_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(19), 10, 2, MFLAGS); ++static struct rockchip_clk_branch rk3308_uart4_fracmux __initdata = ++ MUX(0, "clk_uart4_mux", mux_uart4_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(23), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3568_i2s3_2ch_tx_fracmux __initdata = -+ MUX(CLK_I2S3_2CH_TX, "clk_i2s3_2ch_tx", clk_i2s3_2ch_tx_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(21), 10, 2, MFLAGS); ++static struct rockchip_clk_branch rk3308_dclk_vop_fracmux __initdata = ++ MUX(0, "dclk_vop_mux", mux_dclk_vop_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(8), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3568_i2s3_2ch_rx_fracmux __initdata = -+ MUX(CLK_I2S3_2CH_RX, "clk_i2s3_2ch_rx", clk_i2s3_2ch_rx_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(83), 10, 2, MFLAGS); ++static struct rockchip_clk_branch rk3308_rtc32k_fracmux __initdata = ++ MUX(SCLK_RTC32K, "clk_rtc32k", mux_rtc32k_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(2), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3568_spdif_8ch_fracmux __initdata = -+ MUX(MCLK_SPDIF_8CH, "mclk_spdif_8ch", mclk_spdif_8ch_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(23), 15, 1, MFLAGS); ++static struct rockchip_clk_branch rk3308_pdm_fracmux __initdata = ++ MUX(0, "clk_pdm_mux", mux_pdm_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(46), 15, 1, MFLAGS); + -+static struct rockchip_clk_branch rk3568_audpwm_fracmux __initdata = -+ MUX(SCLK_AUDPWM, "sclk_audpwm", sclk_audpwm_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(25), 15, 1, MFLAGS); ++static struct rockchip_clk_branch rk3308_i2s0_8ch_tx_fracmux __initdata = ++ MUX(SCLK_I2S0_8CH_TX_MUX, "clk_i2s0_8ch_tx_mux", mux_i2s0_8ch_tx_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(52), 10, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3568_uart1_fracmux __initdata = -+ MUX(0, "sclk_uart1_mux", sclk_uart1_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(52), 12, 2, MFLAGS); ++static struct rockchip_clk_branch rk3308_i2s0_8ch_rx_fracmux __initdata = ++ MUX(SCLK_I2S0_8CH_RX_MUX, "clk_i2s0_8ch_rx_mux", mux_i2s0_8ch_rx_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(54), 10, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3568_uart2_fracmux __initdata = -+ MUX(0, "sclk_uart2_mux", sclk_uart2_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(54), 12, 2, MFLAGS); ++static struct rockchip_clk_branch rk3308_i2s1_8ch_tx_fracmux __initdata = ++ MUX(SCLK_I2S1_8CH_TX_MUX, "clk_i2s1_8ch_tx_mux", mux_i2s1_8ch_tx_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(56), 10, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3568_uart3_fracmux __initdata = -+ MUX(0, "sclk_uart3_mux", sclk_uart3_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(56), 12, 2, MFLAGS); ++static struct rockchip_clk_branch rk3308_i2s1_8ch_rx_fracmux __initdata = ++ MUX(SCLK_I2S1_8CH_RX_MUX, "clk_i2s1_8ch_rx_mux", mux_i2s1_8ch_rx_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(58), 10, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3568_uart4_fracmux __initdata = -+ MUX(0, "sclk_uart4_mux", sclk_uart4_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(58), 12, 2, MFLAGS); ++static struct rockchip_clk_branch rk3308_i2s2_8ch_tx_fracmux __initdata = ++ MUX(SCLK_I2S2_8CH_TX_MUX, "clk_i2s2_8ch_tx_mux", mux_i2s2_8ch_tx_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(60), 10, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3568_uart5_fracmux __initdata = -+ MUX(0, "sclk_uart5_mux", sclk_uart5_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(60), 12, 2, MFLAGS); ++static struct rockchip_clk_branch rk3308_i2s2_8ch_rx_fracmux __initdata = ++ MUX(SCLK_I2S2_8CH_RX_MUX, "clk_i2s2_8ch_rx_mux", mux_i2s2_8ch_rx_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(62), 10, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3568_uart6_fracmux __initdata = -+ MUX(0, "sclk_uart6_mux", sclk_uart6_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(62), 12, 2, MFLAGS); ++static struct rockchip_clk_branch rk3308_i2s3_8ch_tx_fracmux __initdata = ++ MUX(SCLK_I2S3_8CH_TX_MUX, "clk_i2s3_8ch_tx_mux", mux_i2s3_8ch_tx_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(64), 10, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3568_uart7_fracmux __initdata = -+ MUX(0, "sclk_uart7_mux", sclk_uart7_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(64), 12, 2, MFLAGS); ++static struct rockchip_clk_branch rk3308_i2s3_8ch_rx_fracmux __initdata = ++ MUX(SCLK_I2S3_8CH_RX_MUX, "clk_i2s3_8ch_rx_mux", mux_i2s3_8ch_rx_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(66), 10, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3568_uart8_fracmux __initdata = -+ MUX(0, "sclk_uart8_mux", sclk_uart8_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(66), 12, 2, MFLAGS); ++static struct rockchip_clk_branch rk3308_i2s0_2ch_fracmux __initdata = ++ MUX(0, "clk_i2s0_2ch_mux", mux_i2s0_2ch_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(68), 10, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3568_uart9_fracmux __initdata = -+ MUX(0, "sclk_uart9_mux", sclk_uart9_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(68), 12, 2, MFLAGS); ++static struct rockchip_clk_branch rk3308_i2s1_2ch_fracmux __initdata = ++ MUX(0, "clk_i2s1_2ch_mux", mux_i2s1_2ch_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(70), 10, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3568_uart0_fracmux __initdata = -+ MUX(0, "sclk_uart0_mux", sclk_uart0_p, CLK_SET_RATE_PARENT, -+ RK3568_PMU_CLKSEL_CON(4), 10, 2, MFLAGS); ++static struct rockchip_clk_branch rk3308_spdif_tx_fracmux __initdata = ++ MUX(0, "clk_spdif_tx_mux", mux_spdif_tx_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(48), 14, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3568_rtc32k_pmu_fracmux __initdata = -+ MUX(CLK_RTC_32K, "clk_rtc_32k", clk_rtc32k_pmu_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3568_PMU_CLKSEL_CON(0), 6, 2, MFLAGS); ++static struct rockchip_clk_branch rk3308_spdif_rx_fracmux __initdata = ++ MUX(0, "clk_spdif_rx_mux", mux_spdif_rx_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(50), 15, 1, MFLAGS); + -+static struct rockchip_clk_branch rk3568_clk_branches[] __initdata = { ++ ++static struct rockchip_clk_branch rk3308_clk_branches[] __initdata = { + /* + * Clock-Architecture Diagram 1 + */ -+ /* SRC_CLK */ -+ COMPOSITE_NOMUX(0, "gpll_400m", "gpll", CLK_IGNORE_UNUSED, -+ RK3568_CLKSEL_CON(75), 0, 5, DFLAGS, -+ RK3568_CLKGATE_CON(35), 0, GFLAGS), -+ COMPOSITE_NOMUX(0, "gpll_300m", "gpll", CLK_IGNORE_UNUSED, -+ RK3568_CLKSEL_CON(75), 8, 5, DFLAGS, -+ RK3568_CLKGATE_CON(35), 1, GFLAGS), -+ COMPOSITE_NOMUX(0, "gpll_200m", "gpll", CLK_IGNORE_UNUSED, -+ RK3568_CLKSEL_CON(76), 0, 5, DFLAGS, -+ RK3568_CLKGATE_CON(35), 2, GFLAGS), -+ COMPOSITE_NOMUX(0, "gpll_150m", "gpll", CLK_IGNORE_UNUSED, -+ RK3568_CLKSEL_CON(76), 8, 5, DFLAGS, -+ RK3568_CLKGATE_CON(35), 3, GFLAGS), -+ COMPOSITE_NOMUX(0, "gpll_100m", "gpll", CLK_IGNORE_UNUSED, -+ RK3568_CLKSEL_CON(77), 0, 5, DFLAGS, -+ RK3568_CLKGATE_CON(35), 4, GFLAGS), -+ COMPOSITE_NOMUX(0, "gpll_75m", "gpll", CLK_IGNORE_UNUSED, -+ RK3568_CLKSEL_CON(77), 8, 5, DFLAGS, -+ RK3568_CLKGATE_CON(35), 5, GFLAGS), -+ COMPOSITE_NOMUX(0, "gpll_20m", "gpll", CLK_IGNORE_UNUSED, -+ RK3568_CLKSEL_CON(78), 0, 6, DFLAGS, -+ RK3568_CLKGATE_CON(35), 6, GFLAGS), -+ COMPOSITE_NOMUX(CPLL_500M, "cpll_500m", "cpll", CLK_IGNORE_UNUSED, -+ RK3568_CLKSEL_CON(78), 8, 5, DFLAGS, -+ RK3568_CLKGATE_CON(35), 7, GFLAGS), -+ COMPOSITE_NOMUX(CPLL_333M, "cpll_333m", "cpll", CLK_IGNORE_UNUSED, -+ RK3568_CLKSEL_CON(79), 0, 5, DFLAGS, -+ RK3568_CLKGATE_CON(35), 8, GFLAGS), -+ COMPOSITE_NOMUX(CPLL_250M, "cpll_250m", "cpll", CLK_IGNORE_UNUSED, -+ RK3568_CLKSEL_CON(79), 8, 5, DFLAGS, -+ RK3568_CLKGATE_CON(35), 9, GFLAGS), -+ COMPOSITE_NOMUX(CPLL_125M, "cpll_125m", "cpll", CLK_IGNORE_UNUSED, -+ RK3568_CLKSEL_CON(80), 0, 5, DFLAGS, -+ RK3568_CLKGATE_CON(35), 10, GFLAGS), -+ COMPOSITE_NOMUX(CPLL_100M, "cpll_100m", "cpll", CLK_IGNORE_UNUSED, -+ RK3568_CLKSEL_CON(82), 0, 5, DFLAGS, -+ RK3568_CLKGATE_CON(35), 11, GFLAGS), -+ COMPOSITE_NOMUX(CPLL_62P5M, "cpll_62p5", "cpll", CLK_IGNORE_UNUSED, -+ RK3568_CLKSEL_CON(80), 8, 5, DFLAGS, -+ RK3568_CLKGATE_CON(35), 12, GFLAGS), -+ COMPOSITE_NOMUX(CPLL_50M, "cpll_50m", "cpll", CLK_IGNORE_UNUSED, -+ RK3568_CLKSEL_CON(81), 0, 5, DFLAGS, -+ RK3568_CLKGATE_CON(35), 13, GFLAGS), -+ COMPOSITE_NOMUX(CPLL_25M, "cpll_25m", "cpll", CLK_IGNORE_UNUSED, -+ RK3568_CLKSEL_CON(81), 8, 6, DFLAGS, -+ RK3568_CLKGATE_CON(35), 14, GFLAGS), -+ COMPOSITE_NOMUX(0, "clk_osc0_div_750k", "xin24m", CLK_IGNORE_UNUSED, -+ RK3568_CLKSEL_CON(82), 8, 6, DFLAGS, -+ RK3568_CLKGATE_CON(35), 15, GFLAGS), -+ FACTOR(0, "clk_osc0_div_375k", "clk_osc0_div_750k", 0, 1, 2), -+ FACTOR(0, "xin_osc0_half", "xin24m", 0, 1, 2), ++ + MUX(USB480M, "usb480m", mux_usb480m_p, CLK_SET_RATE_PARENT, -+ RK3568_MODE_CON0, 14, 2, MFLAGS), ++ RK3308_MODE_CON, 8, 2, MFLAGS), ++ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), + -+ /* PD_CORE */ -+ COMPOSITE(0, "sclk_core_src", apll_gpll_npll_p, CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(2), 8, 2, MFLAGS, 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3568_CLKGATE_CON(0), 5, GFLAGS), -+ COMPOSITE_NODIV(0, "sclk_core", sclk_core_pre_p, CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(2), 15, 1, MFLAGS, -+ RK3568_CLKGATE_CON(0), 7, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 2 ++ */ + -+ COMPOSITE_NOMUX(0, "atclk_core", "armclk", CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(3), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3568_CLKGATE_CON(0), 8, GFLAGS), -+ COMPOSITE_NOMUX(0, "gicclk_core", "armclk", CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(3), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3568_CLKGATE_CON(0), 9, GFLAGS), -+ COMPOSITE_NOMUX(0, "pclk_core_pre", "armclk", CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(4), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3568_CLKGATE_CON(0), 10, GFLAGS), -+ COMPOSITE_NOMUX(0, "periphclk_core_pre", "armclk", CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(4), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3568_CLKGATE_CON(0), 11, GFLAGS), -+ COMPOSITE_NOMUX(0, "tsclk_core", "periphclk_core_pre", CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(5), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3568_CLKGATE_CON(0), 14, GFLAGS), -+ COMPOSITE_NOMUX(0, "cntclk_core", "periphclk_core_pre", CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(5), 4, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3568_CLKGATE_CON(0), 15, GFLAGS), -+ COMPOSITE_NOMUX(0, "aclk_core", "sclk_core", CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(5), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3568_CLKGATE_CON(1), 0, GFLAGS), ++ GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED, ++ RK3308_CLKGATE_CON(0), 0, GFLAGS), ++ GATE(0, "vpll0_core", "vpll0", CLK_IGNORE_UNUSED, ++ RK3308_CLKGATE_CON(0), 0, GFLAGS), ++ GATE(0, "vpll1_core", "vpll1", CLK_IGNORE_UNUSED, ++ RK3308_CLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE_NOMUX(0, "pclk_core_dbg", "armclk", CLK_IGNORE_UNUSED, ++ RK3308_CLKSEL_CON(0), 8, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3308_CLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE_NOMUX(0, "aclk_core", "armclk", CLK_IGNORE_UNUSED, ++ RK3308_CLKSEL_CON(0), 12, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3308_CLKGATE_CON(0), 1, GFLAGS), + -+ COMPOSITE_NODIV(ACLK_CORE_NIU2BUS, "aclk_core_niu2bus", gpll150_gpll100_gpll75_xin24m_p, CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(5), 14, 2, MFLAGS, -+ RK3568_CLKGATE_CON(1), 2, GFLAGS), ++ GATE(0, "clk_jtag", "jtag_clkin", CLK_IGNORE_UNUSED, ++ RK3308_CLKGATE_CON(0), 3, GFLAGS), + -+ GATE(CLK_CORE_PVTM, "clk_core_pvtm", "xin24m", 0, -+ RK3568_CLKGATE_CON(1), 10, GFLAGS), -+ GATE(CLK_CORE_PVTM_CORE, "clk_core_pvtm_core", "armclk", 0, -+ RK3568_CLKGATE_CON(1), 11, GFLAGS), -+ GATE(CLK_CORE_PVTPLL, "clk_core_pvtpll", "armclk", CLK_IGNORE_UNUSED, -+ RK3568_CLKGATE_CON(1), 12, GFLAGS), -+ GATE(PCLK_CORE_PVTM, "pclk_core_pvtm", "pclk_core_pre", 0, -+ RK3568_CLKGATE_CON(1), 9, GFLAGS), ++ GATE(SCLK_PVTM_CORE, "clk_pvtm_core", "xin24m", 0, ++ RK3308_CLKGATE_CON(0), 4, GFLAGS), + -+ /* PD_GPU */ -+ COMPOSITE(CLK_GPU_SRC, "clk_gpu_src", mpll_gpll_cpll_npll_p, 0, -+ RK3568_CLKSEL_CON(6), 6, 2, MFLAGS | CLK_MUX_READ_ONLY, 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3568_CLKGATE_CON(2), 0, GFLAGS), -+ MUX(CLK_GPU_PRE_MUX, "clk_gpu_pre_mux", clk_gpu_pre_mux_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(6), 11, 1, MFLAGS | CLK_MUX_READ_ONLY), -+ DIV(ACLK_GPU_PRE, "aclk_gpu_pre", "clk_gpu_pre_mux", 0, -+ RK3568_CLKSEL_CON(6), 8, 2, DFLAGS), -+ DIV(PCLK_GPU_PRE, "pclk_gpu_pre", "clk_gpu_pre_mux", 0, -+ RK3568_CLKSEL_CON(6), 12, 4, DFLAGS), -+ GATE(CLK_GPU, "clk_gpu", "clk_gpu_pre_mux", 0, -+ RK3568_CLKGATE_CON(2), 3, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 3 ++ */ + -+ GATE(PCLK_GPU_PVTM, "pclk_gpu_pvtm", "pclk_gpu_pre", 0, -+ RK3568_CLKGATE_CON(2), 6, GFLAGS), -+ GATE(CLK_GPU_PVTM, "clk_gpu_pvtm", "xin24m", 0, -+ RK3568_CLKGATE_CON(2), 7, GFLAGS), -+ GATE(CLK_GPU_PVTM_CORE, "clk_gpu_pvtm_core", "clk_gpu_src", 0, -+ RK3568_CLKGATE_CON(2), 8, GFLAGS), -+ GATE(CLK_GPU_PVTPLL, "clk_gpu_pvtpll", "clk_gpu_src", CLK_IGNORE_UNUSED, -+ RK3568_CLKGATE_CON(2), 9, GFLAGS), ++ COMPOSITE_NODIV(ACLK_BUS_SRC, "clk_bus_src", mux_dpll_vpll0_vpll1_p, CLK_IS_CRITICAL, ++ RK3308_CLKSEL_CON(5), 6, 2, MFLAGS, ++ RK3308_CLKGATE_CON(1), 0, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_BUS, "pclk_bus", "clk_bus_src", CLK_IS_CRITICAL, ++ RK3308_CLKSEL_CON(6), 8, 5, DFLAGS, ++ RK3308_CLKGATE_CON(1), 3, GFLAGS), ++ GATE(PCLK_DDR, "pclk_ddr", "pclk_bus", CLK_IGNORE_UNUSED, ++ RK3308_CLKGATE_CON(4), 15, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_BUS, "hclk_bus", "clk_bus_src", CLK_IS_CRITICAL, ++ RK3308_CLKSEL_CON(6), 0, 5, DFLAGS, ++ RK3308_CLKGATE_CON(1), 2, GFLAGS), ++ COMPOSITE_NOMUX(ACLK_BUS, "aclk_bus", "clk_bus_src", CLK_IS_CRITICAL, ++ RK3308_CLKSEL_CON(5), 0, 5, DFLAGS, ++ RK3308_CLKGATE_CON(1), 1, GFLAGS), + -+ /* PD_NPU */ -+ COMPOSITE(CLK_NPU_SRC, "clk_npu_src", npll_gpll_p, 0, -+ RK3568_CLKSEL_CON(7), 6, 1, MFLAGS, 0, 4, DFLAGS, -+ RK3568_CLKGATE_CON(3), 0, GFLAGS), -+ COMPOSITE_HALFDIV(CLK_NPU_NP5, "clk_npu_np5", npll_gpll_p, 0, -+ RK3568_CLKSEL_CON(7), 7, 1, MFLAGS, 4, 2, DFLAGS, -+ RK3568_CLKGATE_CON(3), 1, GFLAGS), -+ MUX(CLK_NPU_PRE_NDFT, "clk_npu_pre_ndft", clk_npu_pre_ndft_p, CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, -+ RK3568_CLKSEL_CON(7), 8, 1, MFLAGS), -+ MUX(CLK_NPU, "clk_npu", clk_npu_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(7), 15, 1, MFLAGS), -+ COMPOSITE_NOMUX(HCLK_NPU_PRE, "hclk_npu_pre", "clk_npu", 0, -+ RK3568_CLKSEL_CON(8), 0, 4, DFLAGS, -+ RK3568_CLKGATE_CON(3), 2, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_NPU_PRE, "pclk_npu_pre", "clk_npu", 0, -+ RK3568_CLKSEL_CON(8), 4, 4, DFLAGS, -+ RK3568_CLKGATE_CON(3), 3, GFLAGS), -+ GATE(ACLK_NPU_PRE, "aclk_npu_pre", "clk_npu", 0, -+ RK3568_CLKGATE_CON(3), 4, GFLAGS), -+ GATE(ACLK_NPU, "aclk_npu", "aclk_npu_pre", 0, -+ RK3568_CLKGATE_CON(3), 7, GFLAGS), -+ GATE(HCLK_NPU, "hclk_npu", "hclk_npu_pre", 0, -+ RK3568_CLKGATE_CON(3), 8, GFLAGS), ++ COMPOSITE_MUXTBL(0, "clk_uart0_src", mux_uart_src_p, 0, ++ RK3308_CLKSEL_CON(10), 13, 3, MFLAGS, uart_src_mux_idx, 0, 5, DFLAGS, ++ RK3308_CLKGATE_CON(1), 9, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart0_frac", "clk_uart0_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(12), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3308_CLKGATE_CON(1), 11, GFLAGS, ++ &rk3308_uart0_fracmux), ++ GATE(SCLK_UART0, "clk_uart0", "clk_uart0_mux", 0, ++ RK3308_CLKGATE_CON(1), 12, GFLAGS), + -+ GATE(PCLK_NPU_PVTM, "pclk_npu_pvtm", "pclk_npu_pre", 0, -+ RK3568_CLKGATE_CON(3), 9, GFLAGS), -+ GATE(CLK_NPU_PVTM, "clk_npu_pvtm", "xin24m", 0, -+ RK3568_CLKGATE_CON(3), 10, GFLAGS), -+ GATE(CLK_NPU_PVTM_CORE, "clk_npu_pvtm_core", "clk_npu_pre_ndft", 0, -+ RK3568_CLKGATE_CON(3), 11, GFLAGS), -+ GATE(CLK_NPU_PVTPLL, "clk_npu_pvtpll", "clk_npu_pre_ndft", CLK_IGNORE_UNUSED, -+ RK3568_CLKGATE_CON(3), 12, GFLAGS), ++ COMPOSITE_MUXTBL(0, "clk_uart1_src", mux_uart_src_p, 0, ++ RK3308_CLKSEL_CON(13), 13, 3, MFLAGS, uart_src_mux_idx, 0, 5, DFLAGS, ++ RK3308_CLKGATE_CON(1), 13, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(15), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3308_CLKGATE_CON(1), 15, GFLAGS, ++ &rk3308_uart1_fracmux), ++ GATE(SCLK_UART1, "clk_uart1", "clk_uart1_mux", 0, ++ RK3308_CLKGATE_CON(2), 0, GFLAGS), + -+ /* PD_DDR */ -+ COMPOSITE(CLK_DDRPHY1X_SRC, "clk_ddrphy1x_src", dpll_gpll_cpll_p, CLK_IGNORE_UNUSED, -+ RK3568_CLKSEL_CON(9), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3568_CLKGATE_CON(4), 0, GFLAGS), -+ MUXGRF(CLK_DDR1X, "clk_ddr1x", clk_ddr1x_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(9), 15, 1, MFLAGS), ++ COMPOSITE_MUXTBL(0, "clk_uart2_src", mux_uart_src_p, 0, ++ RK3308_CLKSEL_CON(16), 13, 3, MFLAGS, uart_src_mux_idx, 0, 5, DFLAGS, ++ RK3308_CLKGATE_CON(2), 1, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(18), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3308_CLKGATE_CON(2), 3, GFLAGS, ++ &rk3308_uart2_fracmux), ++ GATE(SCLK_UART2, "clk_uart2", "clk_uart2_mux", CLK_SET_RATE_PARENT, ++ RK3308_CLKGATE_CON(2), 4, GFLAGS), + -+ COMPOSITE_NOMUX(CLK_MSCH, "clk_msch", "clk_ddr1x", CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(10), 0, 2, DFLAGS, -+ RK3568_CLKGATE_CON(4), 2, GFLAGS), -+ GATE(CLK24_DDRMON, "clk24_ddrmon", "xin24m", CLK_IGNORE_UNUSED, -+ RK3568_CLKGATE_CON(4), 15, GFLAGS), ++ COMPOSITE_MUXTBL(0, "clk_uart3_src", mux_uart_src_p, 0, ++ RK3308_CLKSEL_CON(19), 13, 3, MFLAGS, uart_src_mux_idx, 0, 5, DFLAGS, ++ RK3308_CLKGATE_CON(2), 5, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(21), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3308_CLKGATE_CON(2), 7, GFLAGS, ++ &rk3308_uart3_fracmux), ++ GATE(SCLK_UART3, "clk_uart3", "clk_uart3_mux", 0, ++ RK3308_CLKGATE_CON(2), 8, GFLAGS), + -+ /* PD_GIC_AUDIO */ -+ COMPOSITE_NODIV(ACLK_GIC_AUDIO, "aclk_gic_audio", gpll200_gpll150_gpll100_xin24m_p, CLK_IGNORE_UNUSED, -+ RK3568_CLKSEL_CON(10), 8, 2, MFLAGS, -+ RK3568_CLKGATE_CON(5), 0, GFLAGS), -+ COMPOSITE_NODIV(HCLK_GIC_AUDIO, "hclk_gic_audio", gpll150_gpll100_gpll75_xin24m_p, CLK_IGNORE_UNUSED, -+ RK3568_CLKSEL_CON(10), 10, 2, MFLAGS, -+ RK3568_CLKGATE_CON(5), 1, GFLAGS), -+ GATE(HCLK_SDMMC_BUFFER, "hclk_sdmmc_buffer", "hclk_gic_audio", 0, -+ RK3568_CLKGATE_CON(5), 8, GFLAGS), -+ COMPOSITE_NODIV(DCLK_SDMMC_BUFFER, "dclk_sdmmc_buffer", gpll100_gpll75_gpll50_p, 0, -+ RK3568_CLKSEL_CON(10), 12, 2, MFLAGS, -+ RK3568_CLKGATE_CON(5), 9, GFLAGS), -+ GATE(ACLK_GIC600, "aclk_gic600", "aclk_gic_audio", CLK_IGNORE_UNUSED, -+ RK3568_CLKGATE_CON(5), 4, GFLAGS), -+ GATE(ACLK_SPINLOCK, "aclk_spinlock", "aclk_gic_audio", CLK_IGNORE_UNUSED, -+ RK3568_CLKGATE_CON(5), 7, GFLAGS), -+ GATE(HCLK_I2S0_8CH, "hclk_i2s0_8ch", "hclk_gic_audio", 0, -+ RK3568_CLKGATE_CON(5), 10, GFLAGS), -+ GATE(HCLK_I2S1_8CH, "hclk_i2s1_8ch", "hclk_gic_audio", 0, -+ RK3568_CLKGATE_CON(5), 11, GFLAGS), -+ GATE(HCLK_I2S2_2CH, "hclk_i2s2_2ch", "hclk_gic_audio", 0, -+ RK3568_CLKGATE_CON(5), 12, GFLAGS), -+ GATE(HCLK_I2S3_2CH, "hclk_i2s3_2ch", "hclk_gic_audio", 0, -+ RK3568_CLKGATE_CON(5), 13, GFLAGS), ++ COMPOSITE_MUXTBL(0, "clk_uart4_src", mux_uart_src_p, 0, ++ RK3308_CLKSEL_CON(22), 13, 3, MFLAGS, uart_src_mux_idx, 0, 5, DFLAGS, ++ RK3308_CLKGATE_CON(2), 9, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(24), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3308_CLKGATE_CON(2), 11, GFLAGS, ++ &rk3308_uart4_fracmux), ++ GATE(SCLK_UART4, "clk_uart4", "clk_uart4_mux", 0, ++ RK3308_CLKGATE_CON(2), 12, GFLAGS), + -+ COMPOSITE(CLK_I2S0_8CH_TX_SRC, "clk_i2s0_8ch_tx_src", gpll_cpll_npll_p, 0, -+ RK3568_CLKSEL_CON(11), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3568_CLKGATE_CON(6), 0, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S0_8CH_TX_FRAC, "clk_i2s0_8ch_tx_frac", "clk_i2s0_8ch_tx_src", CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(12), 0, -+ RK3568_CLKGATE_CON(6), 1, GFLAGS, -+ &rk3568_i2s0_8ch_tx_fracmux), -+ GATE(MCLK_I2S0_8CH_TX, "mclk_i2s0_8ch_tx", "clk_i2s0_8ch_tx", 0, -+ RK3568_CLKGATE_CON(6), 2, GFLAGS), -+ COMPOSITE_NODIV(I2S0_MCLKOUT_TX, "i2s0_mclkout_tx", i2s0_mclkout_tx_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(11), 15, 1, MFLAGS, -+ RK3568_CLKGATE_CON(6), 3, GFLAGS), ++ COMPOSITE(SCLK_I2C0, "clk_i2c0", mux_dpll_vpll0_xin24m_p, 0, ++ RK3308_CLKSEL_CON(25), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(2), 13, GFLAGS), ++ COMPOSITE(SCLK_I2C1, "clk_i2c1", mux_dpll_vpll0_xin24m_p, 0, ++ RK3308_CLKSEL_CON(26), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(2), 14, GFLAGS), ++ COMPOSITE(SCLK_I2C2, "clk_i2c2", mux_dpll_vpll0_xin24m_p, 0, ++ RK3308_CLKSEL_CON(27), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(2), 15, GFLAGS), ++ COMPOSITE(SCLK_I2C3, "clk_i2c3", mux_dpll_vpll0_xin24m_p, 0, ++ RK3308_CLKSEL_CON(28), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(3), 0, GFLAGS), + -+ COMPOSITE(CLK_I2S0_8CH_RX_SRC, "clk_i2s0_8ch_rx_src", gpll_cpll_npll_p, 0, -+ RK3568_CLKSEL_CON(13), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3568_CLKGATE_CON(6), 4, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S0_8CH_RX_FRAC, "clk_i2s0_8ch_rx_frac", "clk_i2s0_8ch_rx_src", CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(14), 0, -+ RK3568_CLKGATE_CON(6), 5, GFLAGS, -+ &rk3568_i2s0_8ch_rx_fracmux), -+ GATE(MCLK_I2S0_8CH_RX, "mclk_i2s0_8ch_rx", "clk_i2s0_8ch_rx", 0, -+ RK3568_CLKGATE_CON(6), 6, GFLAGS), -+ COMPOSITE_NODIV(I2S0_MCLKOUT_RX, "i2s0_mclkout_rx", i2s0_mclkout_rx_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(13), 15, 1, MFLAGS, -+ RK3568_CLKGATE_CON(6), 7, GFLAGS), ++ COMPOSITE(SCLK_PWM0, "clk_pwm0", mux_dpll_vpll0_xin24m_p, 0, ++ RK3308_CLKSEL_CON(29), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(3), 1, GFLAGS), ++ COMPOSITE(SCLK_PWM1, "clk_pwm1", mux_dpll_vpll0_xin24m_p, 0, ++ RK3308_CLKSEL_CON(74), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(15), 0, GFLAGS), ++ COMPOSITE(SCLK_PWM2, "clk_pwm2", mux_dpll_vpll0_xin24m_p, 0, ++ RK3308_CLKSEL_CON(75), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(15), 1, GFLAGS), + -+ COMPOSITE(CLK_I2S1_8CH_TX_SRC, "clk_i2s1_8ch_tx_src", gpll_cpll_npll_p, 0, -+ RK3568_CLKSEL_CON(15), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3568_CLKGATE_CON(6), 8, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S1_8CH_TX_FRAC, "clk_i2s1_8ch_tx_frac", "clk_i2s1_8ch_tx_src", CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(16), 0, -+ RK3568_CLKGATE_CON(6), 9, GFLAGS, -+ &rk3568_i2s1_8ch_tx_fracmux), -+ GATE(MCLK_I2S1_8CH_TX, "mclk_i2s1_8ch_tx", "clk_i2s1_8ch_tx", 0, -+ RK3568_CLKGATE_CON(6), 10, GFLAGS), -+ COMPOSITE_NODIV(I2S1_MCLKOUT_TX, "i2s1_mclkout_tx", i2s1_mclkout_tx_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(15), 15, 1, MFLAGS, -+ RK3568_CLKGATE_CON(6), 11, GFLAGS), ++ COMPOSITE(SCLK_SPI0, "clk_spi0", mux_dpll_vpll0_xin24m_p, 0, ++ RK3308_CLKSEL_CON(30), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(3), 2, GFLAGS), ++ COMPOSITE(SCLK_SPI1, "clk_spi1", mux_dpll_vpll0_xin24m_p, 0, ++ RK3308_CLKSEL_CON(31), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(3), 3, GFLAGS), ++ COMPOSITE(SCLK_SPI2, "clk_spi2", mux_dpll_vpll0_xin24m_p, 0, ++ RK3308_CLKSEL_CON(32), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(3), 4, GFLAGS), + -+ COMPOSITE(CLK_I2S1_8CH_RX_SRC, "clk_i2s1_8ch_rx_src", gpll_cpll_npll_p, 0, -+ RK3568_CLKSEL_CON(17), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3568_CLKGATE_CON(6), 12, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S1_8CH_RX_FRAC, "clk_i2s1_8ch_rx_frac", "clk_i2s1_8ch_rx_src", CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(18), 0, -+ RK3568_CLKGATE_CON(6), 13, GFLAGS, -+ &rk3568_i2s1_8ch_rx_fracmux), -+ GATE(MCLK_I2S1_8CH_RX, "mclk_i2s1_8ch_rx", "clk_i2s1_8ch_rx", 0, -+ RK3568_CLKGATE_CON(6), 14, GFLAGS), -+ COMPOSITE_NODIV(I2S1_MCLKOUT_RX, "i2s1_mclkout_rx", i2s1_mclkout_rx_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(17), 15, 1, MFLAGS, -+ RK3568_CLKGATE_CON(6), 15, GFLAGS), ++ GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0, ++ RK3308_CLKGATE_CON(3), 10, GFLAGS), ++ GATE(SCLK_TIMER1, "sclk_timer1", "xin24m", 0, ++ RK3308_CLKGATE_CON(3), 11, GFLAGS), ++ GATE(SCLK_TIMER2, "sclk_timer2", "xin24m", 0, ++ RK3308_CLKGATE_CON(3), 12, GFLAGS), ++ GATE(SCLK_TIMER3, "sclk_timer3", "xin24m", 0, ++ RK3308_CLKGATE_CON(3), 13, GFLAGS), ++ GATE(SCLK_TIMER4, "sclk_timer4", "xin24m", 0, ++ RK3308_CLKGATE_CON(3), 14, GFLAGS), ++ GATE(SCLK_TIMER5, "sclk_timer5", "xin24m", 0, ++ RK3308_CLKGATE_CON(3), 15, GFLAGS), + -+ COMPOSITE(CLK_I2S2_2CH_SRC, "clk_i2s2_2ch_src", gpll_cpll_npll_p, 0, -+ RK3568_CLKSEL_CON(19), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3568_CLKGATE_CON(7), 0, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S2_2CH_FRAC, "clk_i2s2_2ch_frac", "clk_i2s2_2ch_src", CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(20), 0, -+ RK3568_CLKGATE_CON(7), 1, GFLAGS, -+ &rk3568_i2s2_2ch_fracmux), -+ GATE(MCLK_I2S2_2CH, "mclk_i2s2_2ch", "clk_i2s2_2ch", 0, -+ RK3568_CLKGATE_CON(7), 2, GFLAGS), -+ COMPOSITE_NODIV(I2S2_MCLKOUT, "i2s2_mclkout", i2s2_mclkout_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(19), 15, 1, MFLAGS, -+ RK3568_CLKGATE_CON(7), 3, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_TSADC, "clk_tsadc", "xin24m", 0, ++ RK3308_CLKSEL_CON(33), 0, 11, DFLAGS, ++ RK3308_CLKGATE_CON(3), 5, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_SARADC, "clk_saradc", "xin24m", 0, ++ RK3308_CLKSEL_CON(34), 0, 11, DFLAGS, ++ RK3308_CLKGATE_CON(3), 6, GFLAGS), + -+ COMPOSITE(CLK_I2S3_2CH_TX_SRC, "clk_i2s3_2ch_tx_src", gpll_cpll_npll_p, 0, -+ RK3568_CLKSEL_CON(21), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3568_CLKGATE_CON(7), 4, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S3_2CH_TX_FRAC, "clk_i2s3_2ch_tx_frac", "clk_i2s3_2ch_tx_src", CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(22), 0, -+ RK3568_CLKGATE_CON(7), 5, GFLAGS, -+ &rk3568_i2s3_2ch_tx_fracmux), -+ GATE(MCLK_I2S3_2CH_TX, "mclk_i2s3_2ch_tx", "clk_i2s3_2ch_tx", 0, -+ RK3568_CLKGATE_CON(7), 6, GFLAGS), -+ COMPOSITE_NODIV(I2S3_MCLKOUT_TX, "i2s3_mclkout_tx", i2s3_mclkout_tx_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(21), 15, 1, MFLAGS, -+ RK3568_CLKGATE_CON(7), 7, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_OTP, "clk_otp", "xin24m", 0, ++ RK3308_CLKSEL_CON(35), 0, 4, DFLAGS, ++ RK3308_CLKGATE_CON(3), 7, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_OTP_USR, "clk_otp_usr", "clk_otp", 0, ++ RK3308_CLKSEL_CON(35), 4, 2, DFLAGS, ++ RK3308_CLKGATE_CON(3), 8, GFLAGS), + -+ COMPOSITE(CLK_I2S3_2CH_RX_SRC, "clk_i2s3_2ch_rx_src", gpll_cpll_npll_p, 0, -+ RK3568_CLKSEL_CON(83), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3568_CLKGATE_CON(7), 8, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S3_2CH_RX_FRAC, "clk_i2s3_2ch_rx_frac", "clk_i2s3_2ch_rx_src", CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(84), 0, -+ RK3568_CLKGATE_CON(7), 9, GFLAGS, -+ &rk3568_i2s3_2ch_rx_fracmux), -+ GATE(MCLK_I2S3_2CH_RX, "mclk_i2s3_2ch_rx", "clk_i2s3_2ch_rx", 0, -+ RK3568_CLKGATE_CON(7), 10, GFLAGS), -+ COMPOSITE_NODIV(I2S3_MCLKOUT_RX, "i2s3_mclkout_rx", i2s3_mclkout_rx_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(83), 15, 1, MFLAGS, -+ RK3568_CLKGATE_CON(7), 11, GFLAGS), ++ GATE(SCLK_CPU_BOOST, "clk_cpu_boost", "xin24m", CLK_IGNORE_UNUSED, ++ RK3308_CLKGATE_CON(3), 9, GFLAGS), + -+ MUXGRF(I2S1_MCLKOUT, "i2s1_mclkout", i2s1_mclkout_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3568_GRF_SOC_CON1, 5, 1, MFLAGS), -+ MUXGRF(I2S3_MCLKOUT, "i2s3_mclkout", i2s3_mclkout_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3568_GRF_SOC_CON2, 15, 1, MFLAGS), -+ MUXGRF(I2S1_MCLK_RX_IOE, "i2s1_mclk_rx_ioe", i2s1_mclk_rx_ioe_p, 0, -+ RK3568_GRF_SOC_CON2, 0, 1, MFLAGS), -+ MUXGRF(I2S1_MCLK_TX_IOE, "i2s1_mclk_tx_ioe", i2s1_mclk_tx_ioe_p, 0, -+ RK3568_GRF_SOC_CON2, 1, 1, MFLAGS), -+ MUXGRF(I2S2_MCLK_IOE, "i2s2_mclk_ioe", i2s2_mclk_ioe_p, 0, -+ RK3568_GRF_SOC_CON2, 2, 1, MFLAGS), -+ MUXGRF(I2S3_MCLK_IOE, "i2s3_mclk_ioe", i2s3_mclk_ioe_p, 0, -+ RK3568_GRF_SOC_CON2, 3, 1, MFLAGS), ++ COMPOSITE(SCLK_CRYPTO, "clk_crypto", mux_dpll_vpll0_vpll1_p, 0, ++ RK3308_CLKSEL_CON(7), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3308_CLKGATE_CON(1), 4, GFLAGS), ++ COMPOSITE(SCLK_CRYPTO_APK, "clk_crypto_apk", mux_dpll_vpll0_vpll1_p, 0, ++ RK3308_CLKSEL_CON(7), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3308_CLKGATE_CON(1), 5, GFLAGS), + -+ GATE(HCLK_PDM, "hclk_pdm", "hclk_gic_audio", 0, -+ RK3568_CLKGATE_CON(5), 14, GFLAGS), -+ COMPOSITE_NODIV(MCLK_PDM, "mclk_pdm", mclk_pdm_p, 0, -+ RK3568_CLKSEL_CON(23), 8, 2, MFLAGS, -+ RK3568_CLKGATE_CON(5), 15, GFLAGS), -+ GATE(HCLK_VAD, "hclk_vad", "hclk_gic_audio", 0, -+ RK3568_CLKGATE_CON(7), 12, GFLAGS), -+ GATE(HCLK_SPDIF_8CH, "hclk_spdif_8ch", "hclk_gic_audio", 0, -+ RK3568_CLKGATE_CON(7), 13, GFLAGS), ++ COMPOSITE(0, "dclk_vop_src", mux_dpll_vpll0_vpll1_p, 0, ++ RK3308_CLKSEL_CON(8), 10, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3308_CLKGATE_CON(1), 6, GFLAGS), ++ COMPOSITE_FRACMUX(0, "dclk_vop_frac", "dclk_vop_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(9), 0, ++ RK3308_CLKGATE_CON(1), 7, GFLAGS, ++ &rk3308_dclk_vop_fracmux), ++ GATE(DCLK_VOP, "dclk_vop", "dclk_vop_mux", 0, ++ RK3308_CLKGATE_CON(1), 8, GFLAGS), + -+ COMPOSITE(MCLK_SPDIF_8CH_SRC, "mclk_spdif_8ch_src", cpll_gpll_p, 0, -+ RK3568_CLKSEL_CON(23), 14, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3568_CLKGATE_CON(7), 14, GFLAGS), -+ COMPOSITE_FRACMUX(MCLK_SPDIF_8CH_FRAC, "mclk_spdif_8ch_frac", "mclk_spdif_8ch_src", CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(24), 0, -+ RK3568_CLKGATE_CON(7), 15, GFLAGS, -+ &rk3568_spdif_8ch_fracmux), ++ /* ++ * Clock-Architecture Diagram 4 ++ */ + -+ GATE(HCLK_AUDPWM, "hclk_audpwm", "hclk_gic_audio", 0, -+ RK3568_CLKGATE_CON(8), 0, GFLAGS), -+ COMPOSITE(SCLK_AUDPWM_SRC, "sclk_audpwm_src", gpll_cpll_p, 0, -+ RK3568_CLKSEL_CON(25), 14, 1, MFLAGS, 0, 6, DFLAGS, -+ RK3568_CLKGATE_CON(8), 1, GFLAGS), -+ COMPOSITE_FRACMUX(SCLK_AUDPWM_FRAC, "sclk_audpwm_frac", "sclk_audpwm_src", CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(26), 0, -+ RK3568_CLKGATE_CON(8), 2, GFLAGS, -+ &rk3568_audpwm_fracmux), ++ COMPOSITE_NODIV(ACLK_PERI_SRC, "clk_peri_src", mux_dpll_vpll0_vpll1_p, CLK_IS_CRITICAL, ++ RK3308_CLKSEL_CON(36), 6, 2, MFLAGS, ++ RK3308_CLKGATE_CON(8), 0, GFLAGS), ++ COMPOSITE_NOMUX(ACLK_PERI, "aclk_peri", "clk_peri_src", CLK_IS_CRITICAL, ++ RK3308_CLKSEL_CON(36), 0, 5, DFLAGS, ++ RK3308_CLKGATE_CON(8), 1, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "clk_peri_src", CLK_IS_CRITICAL, ++ RK3308_CLKSEL_CON(37), 0, 5, DFLAGS, ++ RK3308_CLKGATE_CON(8), 2, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "clk_peri_src", CLK_IS_CRITICAL, ++ RK3308_CLKSEL_CON(37), 8, 5, DFLAGS, ++ RK3308_CLKGATE_CON(8), 3, GFLAGS), + -+ GATE(HCLK_ACDCDIG, "hclk_acdcdig", "hclk_gic_audio", 0, -+ RK3568_CLKGATE_CON(8), 3, GFLAGS), -+ COMPOSITE_NODIV(CLK_ACDCDIG_I2C, "clk_acdcdig_i2c", clk_i2c_p, 0, -+ RK3568_CLKSEL_CON(23), 10, 2, MFLAGS, -+ RK3568_CLKGATE_CON(8), 4, GFLAGS), -+ GATE(CLK_ACDCDIG_DAC, "clk_acdcdig_dac", "mclk_i2s3_2ch_tx", 0, -+ RK3568_CLKGATE_CON(8), 5, GFLAGS), -+ GATE(CLK_ACDCDIG_ADC, "clk_acdcdig_adc", "mclk_i2s3_2ch_rx", 0, -+ RK3568_CLKGATE_CON(8), 6, GFLAGS), ++ COMPOSITE(SCLK_NANDC_DIV, "clk_nandc_div", mux_dpll_vpll0_vpll1_p, CLK_IGNORE_UNUSED, ++ RK3308_CLKSEL_CON(38), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3308_CLKGATE_CON(8), 4, GFLAGS), ++ COMPOSITE(SCLK_NANDC_DIV50, "clk_nandc_div50", mux_dpll_vpll0_vpll1_p, CLK_IGNORE_UNUSED, ++ RK3308_CLKSEL_CON(38), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3308_CLKGATE_CON(8), 4, GFLAGS), ++ COMPOSITE_NODIV(SCLK_NANDC, "clk_nandc", mux_nandc_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3308_CLKSEL_CON(38), 15, 1, MFLAGS, ++ RK3308_CLKGATE_CON(8), 5, GFLAGS), + -+ /* PD_SECURE_FLASH */ -+ COMPOSITE_NODIV(ACLK_SECURE_FLASH, "aclk_secure_flash", gpll200_gpll150_gpll100_xin24m_p, CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(27), 0, 2, MFLAGS, -+ RK3568_CLKGATE_CON(8), 7, GFLAGS), -+ COMPOSITE_NODIV(HCLK_SECURE_FLASH, "hclk_secure_flash", gpll150_gpll100_gpll75_xin24m_p, CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(27), 2, 2, MFLAGS, -+ RK3568_CLKGATE_CON(8), 8, GFLAGS), -+ GATE(ACLK_CRYPTO_NS, "aclk_crypto_ns", "aclk_secure_flash", 0, -+ RK3568_CLKGATE_CON(8), 11, GFLAGS), -+ GATE(HCLK_CRYPTO_NS, "hclk_crypto_ns", "hclk_secure_flash", 0, -+ RK3568_CLKGATE_CON(8), 12, GFLAGS), -+ COMPOSITE_NODIV(CLK_CRYPTO_NS_CORE, "clk_crypto_ns_core", gpll200_gpll150_gpll100_p, 0, -+ RK3568_CLKSEL_CON(27), 4, 2, MFLAGS, -+ RK3568_CLKGATE_CON(8), 13, GFLAGS), -+ COMPOSITE_NODIV(CLK_CRYPTO_NS_PKA, "clk_crypto_ns_pka", gpll300_gpll200_gpll100_p, 0, -+ RK3568_CLKSEL_CON(27), 6, 2, MFLAGS, -+ RK3568_CLKGATE_CON(8), 14, GFLAGS), -+ GATE(CLK_CRYPTO_NS_RNG, "clk_crypto_ns_rng", "hclk_secure_flash", 0, -+ RK3568_CLKGATE_CON(8), 15, GFLAGS), -+ GATE(HCLK_TRNG_NS, "hclk_trng_ns", "hclk_secure_flash", CLK_IGNORE_UNUSED, -+ RK3568_CLKGATE_CON(9), 10, GFLAGS), -+ GATE(CLK_TRNG_NS, "clk_trng_ns", "hclk_secure_flash", CLK_IGNORE_UNUSED, -+ RK3568_CLKGATE_CON(9), 11, GFLAGS), -+ GATE(PCLK_OTPC_NS, "pclk_otpc_ns", "hclk_secure_flash", 0, -+ RK3568_CLKGATE_CON(26), 9, GFLAGS), -+ GATE(CLK_OTPC_NS_SBPI, "clk_otpc_ns_sbpi", "xin24m", 0, -+ RK3568_CLKGATE_CON(26), 10, GFLAGS), -+ GATE(CLK_OTPC_NS_USR, "clk_otpc_ns_usr", "xin_osc0_half", 0, -+ RK3568_CLKGATE_CON(26), 11, GFLAGS), -+ GATE(HCLK_NANDC, "hclk_nandc", "hclk_secure_flash", 0, -+ RK3568_CLKGATE_CON(9), 0, GFLAGS), -+ COMPOSITE_NODIV(NCLK_NANDC, "nclk_nandc", clk_nandc_p, 0, -+ RK3568_CLKSEL_CON(28), 0, 2, MFLAGS, -+ RK3568_CLKGATE_CON(9), 1, GFLAGS), -+ GATE(HCLK_SFC, "hclk_sfc", "hclk_secure_flash", 0, -+ RK3568_CLKGATE_CON(9), 2, GFLAGS), -+ GATE(HCLK_SFC_XIP, "hclk_sfc_xip", "hclk_secure_flash", 0, -+ RK3568_CLKGATE_CON(9), 3, GFLAGS), -+ COMPOSITE_NODIV(SCLK_SFC, "sclk_sfc", sclk_sfc_p, 0, -+ RK3568_CLKSEL_CON(28), 4, 3, MFLAGS, -+ RK3568_CLKGATE_CON(9), 4, GFLAGS), -+ GATE(ACLK_EMMC, "aclk_emmc", "aclk_secure_flash", 0, -+ RK3568_CLKGATE_CON(9), 5, GFLAGS), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_secure_flash", 0, -+ RK3568_CLKGATE_CON(9), 6, GFLAGS), -+ COMPOSITE_NODIV(BCLK_EMMC, "bclk_emmc", gpll200_gpll150_cpll125_p, 0, -+ RK3568_CLKSEL_CON(28), 8, 2, MFLAGS, -+ RK3568_CLKGATE_CON(9), 7, GFLAGS), -+ COMPOSITE_NODIV(CCLK_EMMC, "cclk_emmc", cclk_emmc_p, 0, -+ RK3568_CLKSEL_CON(28), 12, 3, MFLAGS, -+ RK3568_CLKGATE_CON(9), 8, GFLAGS), -+ GATE(TCLK_EMMC, "tclk_emmc", "xin24m", 0, -+ RK3568_CLKGATE_CON(9), 9, GFLAGS), -+ MMC(SCLK_EMMC_DRV, "emmc_drv", "cclk_emmc", RK3568_EMMC_CON0, 1), -+ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "cclk_emmc", RK3568_EMMC_CON1, 1), ++ COMPOSITE(SCLK_SDMMC_DIV, "clk_sdmmc_div", mux_dpll_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, ++ RK3308_CLKSEL_CON(39), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3308_CLKGATE_CON(8), 6, GFLAGS), ++ COMPOSITE(SCLK_SDMMC_DIV50, "clk_sdmmc_div50", mux_dpll_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, ++ RK3308_CLKSEL_CON(39), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3308_CLKGATE_CON(8), 6, GFLAGS), ++ COMPOSITE_NODIV(SCLK_SDMMC, "clk_sdmmc", mux_sdmmc_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3308_CLKSEL_CON(39), 15, 1, MFLAGS, ++ RK3308_CLKGATE_CON(8), 7, GFLAGS), ++ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "clk_sdmmc", RK3308_SDMMC_CON0, 1), ++ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "clk_sdmmc", RK3308_SDMMC_CON1, 1), + -+ /* PD_PIPE */ -+ COMPOSITE_NODIV(ACLK_PIPE, "aclk_pipe", aclk_pipe_p, 0, -+ RK3568_CLKSEL_CON(29), 0, 2, MFLAGS, -+ RK3568_CLKGATE_CON(10), 0, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_PIPE, "pclk_pipe", "aclk_pipe", 0, -+ RK3568_CLKSEL_CON(29), 4, 4, DFLAGS, -+ RK3568_CLKGATE_CON(10), 1, GFLAGS), -+ GATE(ACLK_PCIE20_MST, "aclk_pcie20_mst", "aclk_pipe", 0, -+ RK3568_CLKGATE_CON(12), 0, GFLAGS), -+ GATE(ACLK_PCIE20_SLV, "aclk_pcie20_slv", "aclk_pipe", 0, -+ RK3568_CLKGATE_CON(12), 1, GFLAGS), -+ GATE(ACLK_PCIE20_DBI, "aclk_pcie20_dbi", "aclk_pipe", 0, -+ RK3568_CLKGATE_CON(12), 2, GFLAGS), -+ GATE(PCLK_PCIE20, "pclk_pcie20", "pclk_pipe", 0, -+ RK3568_CLKGATE_CON(12), 3, GFLAGS), -+ GATE(CLK_PCIE20_AUX_NDFT, "clk_pcie20_aux_ndft", "xin24m", 0, -+ RK3568_CLKGATE_CON(12), 4, GFLAGS), -+ GATE(ACLK_PCIE30X1_MST, "aclk_pcie30x1_mst", "aclk_pipe", 0, -+ RK3568_CLKGATE_CON(12), 8, GFLAGS), -+ GATE(ACLK_PCIE30X1_SLV, "aclk_pcie30x1_slv", "aclk_pipe", 0, -+ RK3568_CLKGATE_CON(12), 9, GFLAGS), -+ GATE(ACLK_PCIE30X1_DBI, "aclk_pcie30x1_dbi", "aclk_pipe", 0, -+ RK3568_CLKGATE_CON(12), 10, GFLAGS), -+ GATE(PCLK_PCIE30X1, "pclk_pcie30x1", "pclk_pipe", 0, -+ RK3568_CLKGATE_CON(12), 11, GFLAGS), -+ GATE(CLK_PCIE30X1_AUX_NDFT, "clk_pcie30x1_aux_ndft", "xin24m", 0, -+ RK3568_CLKGATE_CON(12), 12, GFLAGS), -+ GATE(ACLK_PCIE30X2_MST, "aclk_pcie30x2_mst", "aclk_pipe", 0, -+ RK3568_CLKGATE_CON(13), 0, GFLAGS), -+ GATE(ACLK_PCIE30X2_SLV, "aclk_pcie30x2_slv", "aclk_pipe", 0, -+ RK3568_CLKGATE_CON(13), 1, GFLAGS), -+ GATE(ACLK_PCIE30X2_DBI, "aclk_pcie30x2_dbi", "aclk_pipe", 0, -+ RK3568_CLKGATE_CON(13), 2, GFLAGS), -+ GATE(PCLK_PCIE30X2, "pclk_pcie30x2", "pclk_pipe", 0, -+ RK3568_CLKGATE_CON(13), 3, GFLAGS), -+ GATE(CLK_PCIE30X2_AUX_NDFT, "clk_pcie30x2_aux_ndft", "xin24m", 0, -+ RK3568_CLKGATE_CON(13), 4, GFLAGS), -+ GATE(ACLK_SATA0, "aclk_sata0", "aclk_pipe", 0, -+ RK3568_CLKGATE_CON(11), 0, GFLAGS), -+ GATE(CLK_SATA0_PMALIVE, "clk_sata0_pmalive", "gpll_20m", 0, -+ RK3568_CLKGATE_CON(11), 1, GFLAGS), -+ GATE(CLK_SATA0_RXOOB, "clk_sata0_rxoob", "cpll_50m", 0, -+ RK3568_CLKGATE_CON(11), 2, GFLAGS), -+ GATE(ACLK_SATA1, "aclk_sata1", "aclk_pipe", 0, -+ RK3568_CLKGATE_CON(11), 4, GFLAGS), -+ GATE(CLK_SATA1_PMALIVE, "clk_sata1_pmalive", "gpll_20m", 0, -+ RK3568_CLKGATE_CON(11), 5, GFLAGS), -+ GATE(CLK_SATA1_RXOOB, "clk_sata1_rxoob", "cpll_50m", 0, -+ RK3568_CLKGATE_CON(11), 6, GFLAGS), -+ GATE(ACLK_SATA2, "aclk_sata2", "aclk_pipe", 0, -+ RK3568_CLKGATE_CON(11), 8, GFLAGS), -+ GATE(CLK_SATA2_PMALIVE, "clk_sata2_pmalive", "gpll_20m", 0, -+ RK3568_CLKGATE_CON(11), 9, GFLAGS), -+ GATE(CLK_SATA2_RXOOB, "clk_sata2_rxoob", "cpll_50m", 0, -+ RK3568_CLKGATE_CON(11), 10, GFLAGS), -+ GATE(ACLK_USB3OTG0, "aclk_usb3otg0", "aclk_pipe", 0, -+ RK3568_CLKGATE_CON(10), 8, GFLAGS), -+ GATE(CLK_USB3OTG0_REF, "clk_usb3otg0_ref", "xin24m", 0, -+ RK3568_CLKGATE_CON(10), 9, GFLAGS), -+ COMPOSITE_NODIV(CLK_USB3OTG0_SUSPEND, "clk_usb3otg0_suspend", xin24m_32k_p, 0, -+ RK3568_CLKSEL_CON(29), 8, 1, MFLAGS, -+ RK3568_CLKGATE_CON(10), 10, GFLAGS), -+ GATE(ACLK_USB3OTG1, "aclk_usb3otg1", "aclk_pipe", 0, -+ RK3568_CLKGATE_CON(10), 12, GFLAGS), -+ GATE(CLK_USB3OTG1_REF, "clk_usb3otg1_ref", "xin24m", 0, -+ RK3568_CLKGATE_CON(10), 13, GFLAGS), -+ COMPOSITE_NODIV(CLK_USB3OTG1_SUSPEND, "clk_usb3otg1_suspend", xin24m_32k_p, 0, -+ RK3568_CLKSEL_CON(29), 9, 1, MFLAGS, -+ RK3568_CLKGATE_CON(10), 14, GFLAGS), -+ COMPOSITE_NODIV(CLK_XPCS_EEE, "clk_xpcs_eee", gpll200_cpll125_p, 0, -+ RK3568_CLKSEL_CON(29), 13, 1, MFLAGS, -+ RK3568_CLKGATE_CON(10), 4, GFLAGS), -+ GATE(PCLK_XPCS, "pclk_xpcs", "pclk_pipe", 0, -+ RK3568_CLKGATE_CON(13), 6, GFLAGS), ++ COMPOSITE(SCLK_SDIO_DIV, "clk_sdio_div", mux_dpll_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, ++ RK3308_CLKSEL_CON(40), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3308_CLKGATE_CON(8), 8, GFLAGS), ++ COMPOSITE(SCLK_SDIO_DIV50, "clk_sdio_div50", mux_dpll_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, ++ RK3308_CLKSEL_CON(40), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3308_CLKGATE_CON(8), 8, GFLAGS), ++ COMPOSITE_NODIV(SCLK_SDIO, "clk_sdio", mux_sdio_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3308_CLKSEL_CON(40), 15, 1, MFLAGS, ++ RK3308_CLKGATE_CON(8), 9, GFLAGS), ++ MMC(SCLK_SDIO_DRV, "sdio_drv", "clk_sdio", RK3308_SDIO_CON0, 1), ++ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "clk_sdio", RK3308_SDIO_CON1, 1), + -+ /* PD_PHP */ -+ COMPOSITE_NODIV(ACLK_PHP, "aclk_php", gpll300_gpll200_gpll100_xin24m_p, 0, -+ RK3568_CLKSEL_CON(30), 0, 2, MFLAGS, -+ RK3568_CLKGATE_CON(14), 8, GFLAGS), -+ COMPOSITE_NODIV(HCLK_PHP, "hclk_php", gpll150_gpll100_gpll75_xin24m_p, CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(30), 2, 2, MFLAGS, -+ RK3568_CLKGATE_CON(14), 9, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_PHP, "pclk_php", "aclk_php", CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(30), 4, 4, DFLAGS, -+ RK3568_CLKGATE_CON(14), 10, GFLAGS), -+ GATE(HCLK_SDMMC0, "hclk_sdmmc0", "hclk_php", 0, -+ RK3568_CLKGATE_CON(15), 0, GFLAGS), -+ COMPOSITE_NODIV(CLK_SDMMC0, "clk_sdmmc0", clk_sdmmc_p, 0, -+ RK3568_CLKSEL_CON(30), 8, 3, MFLAGS, -+ RK3568_CLKGATE_CON(15), 1, GFLAGS), -+ MMC(SCLK_SDMMC0_DRV, "sdmmc0_drv", "clk_sdmmc0", RK3568_SDMMC0_CON0, 1), -+ MMC(SCLK_SDMMC0_SAMPLE, "sdmmc0_sample", "clk_sdmmc0", RK3568_SDMMC0_CON1, 1), ++ COMPOSITE(SCLK_EMMC_DIV, "clk_emmc_div", mux_dpll_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, ++ RK3308_CLKSEL_CON(41), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3308_CLKGATE_CON(8), 10, GFLAGS), ++ COMPOSITE(SCLK_EMMC_DIV50, "clk_emmc_div50", mux_dpll_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, ++ RK3308_CLKSEL_CON(41), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3308_CLKGATE_CON(8), 10, GFLAGS), ++ COMPOSITE_NODIV(SCLK_EMMC, "clk_emmc", mux_emmc_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3308_CLKSEL_CON(41), 15, 1, MFLAGS, ++ RK3308_CLKGATE_CON(8), 11, GFLAGS), ++ MMC(SCLK_EMMC_DRV, "emmc_drv", "clk_emmc", RK3308_EMMC_CON0, 1), ++ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "clk_emmc", RK3308_EMMC_CON1, 1), + -+ GATE(HCLK_SDMMC1, "hclk_sdmmc1", "hclk_php", 0, -+ RK3568_CLKGATE_CON(15), 2, GFLAGS), -+ COMPOSITE_NODIV(CLK_SDMMC1, "clk_sdmmc1", clk_sdmmc_p, 0, -+ RK3568_CLKSEL_CON(30), 12, 3, MFLAGS, -+ RK3568_CLKGATE_CON(15), 3, GFLAGS), -+ MMC(SCLK_SDMMC1_DRV, "sdmmc1_drv", "clk_sdmmc1", RK3568_SDMMC1_CON0, 1), -+ MMC(SCLK_SDMMC1_SAMPLE, "sdmmc1_sample", "clk_sdmmc1", RK3568_SDMMC1_CON1, 1), ++ COMPOSITE(SCLK_SFC, "clk_sfc", mux_dpll_vpll0_vpll1_p, 0, ++ RK3308_CLKSEL_CON(42), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(8), 12, GFLAGS), + -+ GATE(ACLK_GMAC0, "aclk_gmac0", "aclk_php", 0, -+ RK3568_CLKGATE_CON(15), 5, GFLAGS), -+ GATE(PCLK_GMAC0, "pclk_gmac0", "pclk_php", 0, -+ RK3568_CLKGATE_CON(15), 6, GFLAGS), -+ COMPOSITE_NODIV(CLK_MAC0_2TOP, "clk_mac0_2top", clk_mac_2top_p, 0, -+ RK3568_CLKSEL_CON(31), 8, 2, MFLAGS, -+ RK3568_CLKGATE_CON(15), 7, GFLAGS), -+ COMPOSITE_NODIV(CLK_MAC0_OUT, "clk_mac0_out", cpll125_cpll50_cpll25_xin24m_p, 0, -+ RK3568_CLKSEL_CON(31), 14, 2, MFLAGS, -+ RK3568_CLKGATE_CON(15), 8, GFLAGS), -+ GATE(CLK_MAC0_REFOUT, "clk_mac0_refout", "clk_mac0_2top", 0, -+ RK3568_CLKGATE_CON(15), 12, GFLAGS), -+ COMPOSITE_NODIV(CLK_GMAC0_PTP_REF, "clk_gmac0_ptp_ref", clk_gmac_ptp_p, 0, -+ RK3568_CLKSEL_CON(31), 12, 2, MFLAGS, -+ RK3568_CLKGATE_CON(15), 4, GFLAGS), -+ MUX(SCLK_GMAC0, "clk_gmac0", mux_gmac0_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3568_CLKSEL_CON(31), 2, 1, MFLAGS), -+ FACTOR(0, "clk_gmac0_tx_div5", "clk_gmac0", 0, 1, 5), -+ FACTOR(0, "clk_gmac0_tx_div50", "clk_gmac0", 0, 1, 50), -+ FACTOR(0, "clk_gmac0_rx_div2", "clk_gmac0", 0, 1, 2), -+ FACTOR(0, "clk_gmac0_rx_div20", "clk_gmac0", 0, 1, 20), -+ MUX(SCLK_GMAC0_RGMII_SPEED, "clk_gmac0_rgmii_speed", mux_gmac0_rgmii_speed_p, 0, -+ RK3568_CLKSEL_CON(31), 4, 2, MFLAGS), -+ MUX(SCLK_GMAC0_RMII_SPEED, "clk_gmac0_rmii_speed", mux_gmac0_rmii_speed_p, 0, -+ RK3568_CLKSEL_CON(31), 3, 1, MFLAGS), -+ MUX(SCLK_GMAC0_RX_TX, "clk_gmac0_rx_tx", mux_gmac0_rx_tx_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(31), 0, 2, MFLAGS), ++ GATE(SCLK_OTG_ADP, "clk_otg_adp", "clk_rtc32k", 0, ++ RK3308_CLKGATE_CON(8), 13, GFLAGS), + -+ /* PD_USB */ -+ COMPOSITE_NODIV(ACLK_USB, "aclk_usb", gpll300_gpll200_gpll100_xin24m_p, 0, -+ RK3568_CLKSEL_CON(32), 0, 2, MFLAGS, -+ RK3568_CLKGATE_CON(16), 0, GFLAGS), -+ COMPOSITE_NODIV(HCLK_USB, "hclk_usb", gpll150_gpll100_gpll75_xin24m_p, CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(32), 2, 2, MFLAGS, -+ RK3568_CLKGATE_CON(16), 1, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_USB, "pclk_usb", "aclk_usb", CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(32), 4, 4, DFLAGS, -+ RK3568_CLKGATE_CON(16), 2, GFLAGS), -+ GATE(HCLK_USB2HOST0, "hclk_usb2host0", "hclk_usb", 0, -+ RK3568_CLKGATE_CON(16), 12, GFLAGS), -+ GATE(HCLK_USB2HOST0_ARB, "hclk_usb2host0_arb", "hclk_usb", 0, -+ RK3568_CLKGATE_CON(16), 13, GFLAGS), -+ GATE(HCLK_USB2HOST1, "hclk_usb2host1", "hclk_usb", 0, -+ RK3568_CLKGATE_CON(16), 14, GFLAGS), -+ GATE(HCLK_USB2HOST1_ARB, "hclk_usb2host1_arb", "hclk_usb", 0, -+ RK3568_CLKGATE_CON(16), 15, GFLAGS), -+ GATE(HCLK_SDMMC2, "hclk_sdmmc2", "hclk_usb", 0, -+ RK3568_CLKGATE_CON(17), 0, GFLAGS), -+ COMPOSITE_NODIV(CLK_SDMMC2, "clk_sdmmc2", clk_sdmmc_p, 0, -+ RK3568_CLKSEL_CON(32), 8, 3, MFLAGS, -+ RK3568_CLKGATE_CON(17), 1, GFLAGS), -+ MMC(SCLK_SDMMC2_DRV, "sdmmc2_drv", "clk_sdmmc2", RK3568_SDMMC2_CON0, 1), -+ MMC(SCLK_SDMMC2_SAMPLE, "sdmmc2_sample", "clk_sdmmc2", RK3568_SDMMC2_CON1, 1), ++ COMPOSITE(SCLK_MAC_SRC, "clk_mac_src", mux_dpll_vpll0_vpll1_p, 0, ++ RK3308_CLKSEL_CON(43), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3308_CLKGATE_CON(8), 14, GFLAGS), ++ MUX(SCLK_MAC, "clk_mac", mux_mac_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(43), 14, 1, MFLAGS), ++ GATE(SCLK_MAC_REF, "clk_mac_ref", "clk_mac", 0, ++ RK3308_CLKGATE_CON(9), 1, GFLAGS), ++ GATE(SCLK_MAC_RX_TX, "clk_mac_rx_tx", "clk_mac", 0, ++ RK3308_CLKGATE_CON(9), 0, GFLAGS), ++ FACTOR(0, "clk_mac_rx_tx_div2", "clk_mac_rx_tx", 0, 1, 2), ++ FACTOR(0, "clk_mac_rx_tx_div20", "clk_mac_rx_tx", 0, 1, 20), ++ MUX(SCLK_MAC_RMII, "clk_mac_rmii_sel", mux_mac_rmii_sel_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(43), 15, 1, MFLAGS), + -+ GATE(ACLK_GMAC1, "aclk_gmac1", "aclk_usb", 0, -+ RK3568_CLKGATE_CON(17), 3, GFLAGS), -+ GATE(PCLK_GMAC1, "pclk_gmac1", "pclk_usb", 0, -+ RK3568_CLKGATE_CON(17), 4, GFLAGS), -+ COMPOSITE_NODIV(CLK_MAC1_2TOP, "clk_mac1_2top", clk_mac_2top_p, 0, -+ RK3568_CLKSEL_CON(33), 8, 2, MFLAGS, -+ RK3568_CLKGATE_CON(17), 5, GFLAGS), -+ COMPOSITE_NODIV(CLK_MAC1_OUT, "clk_mac1_out", cpll125_cpll50_cpll25_xin24m_p, 0, -+ RK3568_CLKSEL_CON(33), 14, 2, MFLAGS, -+ RK3568_CLKGATE_CON(17), 6, GFLAGS), -+ GATE(CLK_MAC1_REFOUT, "clk_mac1_refout", "clk_mac1_2top", 0, -+ RK3568_CLKGATE_CON(17), 10, GFLAGS), -+ COMPOSITE_NODIV(CLK_GMAC1_PTP_REF, "clk_gmac1_ptp_ref", clk_gmac_ptp_p, 0, -+ RK3568_CLKSEL_CON(33), 12, 2, MFLAGS, -+ RK3568_CLKGATE_CON(17), 2, GFLAGS), -+ MUX(SCLK_GMAC1, "clk_gmac1", mux_gmac1_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3568_CLKSEL_CON(33), 2, 1, MFLAGS), -+ FACTOR(0, "clk_gmac1_tx_div5", "clk_gmac1", 0, 1, 5), -+ FACTOR(0, "clk_gmac1_tx_div50", "clk_gmac1", 0, 1, 50), -+ FACTOR(0, "clk_gmac1_rx_div2", "clk_gmac1", 0, 1, 2), -+ FACTOR(0, "clk_gmac1_rx_div20", "clk_gmac1", 0, 1, 20), -+ MUX(SCLK_GMAC1_RGMII_SPEED, "clk_gmac1_rgmii_speed", mux_gmac1_rgmii_speed_p, 0, -+ RK3568_CLKSEL_CON(33), 4, 2, MFLAGS), -+ MUX(SCLK_GMAC1_RMII_SPEED, "clk_gmac1_rmii_speed", mux_gmac1_rmii_speed_p, 0, -+ RK3568_CLKSEL_CON(33), 3, 1, MFLAGS), -+ MUX(SCLK_GMAC1_RX_TX, "clk_gmac1_rx_tx", mux_gmac1_rx_tx_p, CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(33), 0, 2, MFLAGS), ++ COMPOSITE(SCLK_OWIRE, "clk_owire", mux_dpll_vpll0_xin24m_p, 0, ++ RK3308_CLKSEL_CON(44), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3308_CLKGATE_CON(8), 15, GFLAGS), + -+ /* PD_PERI */ -+ COMPOSITE_NODIV(ACLK_PERIMID, "aclk_perimid", gpll300_gpll200_gpll100_xin24m_p, CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(10), 4, 2, MFLAGS, -+ RK3568_CLKGATE_CON(14), 0, GFLAGS), -+ COMPOSITE_NODIV(HCLK_PERIMID, "hclk_perimid", gpll150_gpll100_gpll75_xin24m_p, CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(10), 6, 2, MFLAGS, -+ RK3568_CLKGATE_CON(14), 1, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 5 ++ */ + -+ /* PD_VI */ -+ COMPOSITE_NODIV(ACLK_VI, "aclk_vi", gpll400_gpll300_gpll200_xin24m_p, 0, -+ RK3568_CLKSEL_CON(34), 0, 2, MFLAGS, -+ RK3568_CLKGATE_CON(18), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_VI, "hclk_vi", "aclk_vi", 0, -+ RK3568_CLKSEL_CON(34), 4, 4, DFLAGS, -+ RK3568_CLKGATE_CON(18), 1, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_VI, "pclk_vi", "aclk_vi", 0, -+ RK3568_CLKSEL_CON(34), 8, 4, DFLAGS, -+ RK3568_CLKGATE_CON(18), 2, GFLAGS), -+ GATE(ACLK_VICAP, "aclk_vicap", "aclk_vi", 0, -+ RK3568_CLKGATE_CON(18), 9, GFLAGS), -+ GATE(HCLK_VICAP, "hclk_vicap", "hclk_vi", 0, -+ RK3568_CLKGATE_CON(18), 10, GFLAGS), -+ COMPOSITE_NODIV(DCLK_VICAP, "dclk_vicap", cpll333_gpll300_gpll200_p, 0, -+ RK3568_CLKSEL_CON(34), 14, 2, MFLAGS, -+ RK3568_CLKGATE_CON(18), 11, GFLAGS), -+ GATE(ICLK_VICAP_G, "iclk_vicap_g", "iclk_vicap", 0, -+ RK3568_CLKGATE_CON(18), 13, GFLAGS), -+ GATE(ACLK_ISP, "aclk_isp", "aclk_vi", 0, -+ RK3568_CLKGATE_CON(19), 0, GFLAGS), -+ GATE(HCLK_ISP, "hclk_isp", "hclk_vi", 0, -+ RK3568_CLKGATE_CON(19), 1, GFLAGS), -+ COMPOSITE(CLK_ISP, "clk_isp", cpll_gpll_hpll_p, 0, -+ RK3568_CLKSEL_CON(35), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3568_CLKGATE_CON(19), 2, GFLAGS), -+ GATE(PCLK_CSI2HOST1, "pclk_csi2host1", "pclk_vi", 0, -+ RK3568_CLKGATE_CON(19), 4, GFLAGS), -+ COMPOSITE(CLK_CIF_OUT, "clk_cif_out", gpll_usb480m_xin24m_p, 0, -+ RK3568_CLKSEL_CON(35), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3568_CLKGATE_CON(19), 8, GFLAGS), -+ COMPOSITE(CLK_CAM0_OUT, "clk_cam0_out", gpll_usb480m_xin24m_p, 0, -+ RK3568_CLKSEL_CON(36), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3568_CLKGATE_CON(19), 9, GFLAGS), -+ COMPOSITE(CLK_CAM1_OUT, "clk_cam1_out", gpll_usb480m_xin24m_p, 0, -+ RK3568_CLKSEL_CON(36), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3568_CLKGATE_CON(19), 10, GFLAGS), ++ GATE(0, "clk_ddr_mon_timer", "xin24m", CLK_IGNORE_UNUSED, ++ RK3308_CLKGATE_CON(0), 12, GFLAGS), + -+ /* PD_VO */ -+ COMPOSITE_NODIV(ACLK_VO, "aclk_vo", gpll300_cpll250_gpll100_xin24m_p, 0, -+ RK3568_CLKSEL_CON(37), 0, 2, MFLAGS, -+ RK3568_CLKGATE_CON(20), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_VO, "hclk_vo", "aclk_vo", 0, -+ RK3568_CLKSEL_CON(37), 8, 4, DFLAGS, -+ RK3568_CLKGATE_CON(20), 1, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_VO, "pclk_vo", "aclk_vo", 0, -+ RK3568_CLKSEL_CON(37), 12, 4, DFLAGS, -+ RK3568_CLKGATE_CON(20), 2, GFLAGS), -+ COMPOSITE(ACLK_VOP_PRE, "aclk_vop_pre", cpll_gpll_hpll_vpll_p, 0, -+ RK3568_CLKSEL_CON(38), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3568_CLKGATE_CON(20), 6, GFLAGS), -+ GATE(ACLK_VOP, "aclk_vop", "aclk_vop_pre", 0, -+ RK3568_CLKGATE_CON(20), 8, GFLAGS), -+ GATE(HCLK_VOP, "hclk_vop", "hclk_vo", 0, -+ RK3568_CLKGATE_CON(20), 9, GFLAGS), -+ COMPOSITE(DCLK_VOP0, "dclk_vop0", hpll_vpll_gpll_cpll_p, CLK_SET_RATE_NO_REPARENT, -+ RK3568_CLKSEL_CON(39), 10, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3568_CLKGATE_CON(20), 10, GFLAGS), -+ COMPOSITE(DCLK_VOP1, "dclk_vop1", hpll_vpll_gpll_cpll_p, CLK_SET_RATE_NO_REPARENT, -+ RK3568_CLKSEL_CON(40), 10, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3568_CLKGATE_CON(20), 11, GFLAGS), -+ COMPOSITE(DCLK_VOP2, "dclk_vop2", hpll_vpll_gpll_cpll_p, CLK_SET_RATE_NO_REPARENT, -+ RK3568_CLKSEL_CON(41), 10, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3568_CLKGATE_CON(20), 12, GFLAGS), -+ GATE(CLK_VOP_PWM, "clk_vop_pwm", "xin24m", 0, -+ RK3568_CLKGATE_CON(20), 13, GFLAGS), -+ GATE(ACLK_HDCP, "aclk_hdcp", "aclk_vo", 0, -+ RK3568_CLKGATE_CON(21), 0, GFLAGS), -+ GATE(HCLK_HDCP, "hclk_hdcp", "hclk_vo", 0, -+ RK3568_CLKGATE_CON(21), 1, GFLAGS), -+ GATE(PCLK_HDCP, "pclk_hdcp", "pclk_vo", 0, -+ RK3568_CLKGATE_CON(21), 2, GFLAGS), -+ GATE(PCLK_HDMI_HOST, "pclk_hdmi_host", "pclk_vo", 0, -+ RK3568_CLKGATE_CON(21), 3, GFLAGS), -+ GATE(CLK_HDMI_SFR, "clk_hdmi_sfr", "xin24m", 0, -+ RK3568_CLKGATE_CON(21), 4, GFLAGS), -+ GATE(CLK_HDMI_CEC, "clk_hdmi_cec", "clk_rtc_32k", 0, -+ RK3568_CLKGATE_CON(21), 5, GFLAGS), -+ GATE(PCLK_DSITX_0, "pclk_dsitx_0", "pclk_vo", 0, -+ RK3568_CLKGATE_CON(21), 6, GFLAGS), -+ GATE(PCLK_DSITX_1, "pclk_dsitx_1", "pclk_vo", 0, -+ RK3568_CLKGATE_CON(21), 7, GFLAGS), -+ GATE(PCLK_EDP_CTRL, "pclk_edp_ctrl", "pclk_vo", 0, -+ RK3568_CLKGATE_CON(21), 8, GFLAGS), -+ COMPOSITE_NODIV(CLK_EDP_200M, "clk_edp_200m", gpll200_gpll150_cpll125_p, 0, -+ RK3568_CLKSEL_CON(38), 8, 2, MFLAGS, -+ RK3568_CLKGATE_CON(21), 9, GFLAGS), ++ GATE(0, "clk_ddr_mon", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, ++ RK3308_CLKGATE_CON(4), 10, GFLAGS), ++ GATE(0, "clk_ddr_upctrl", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, ++ RK3308_CLKGATE_CON(4), 11, GFLAGS), ++ GATE(0, "clk_ddr_msch", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, ++ RK3308_CLKGATE_CON(4), 12, GFLAGS), ++ GATE(0, "clk_ddr_msch_peribus", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, ++ RK3308_CLKGATE_CON(4), 13, GFLAGS), + -+ /* PD_VPU */ -+ COMPOSITE(ACLK_VPU_PRE, "aclk_vpu_pre", gpll_cpll_p, 0, -+ RK3568_CLKSEL_CON(42), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3568_CLKGATE_CON(22), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_VPU_PRE, "hclk_vpu_pre", "aclk_vpu_pre", 0, -+ RK3568_CLKSEL_CON(42), 8, 4, DFLAGS, -+ RK3568_CLKGATE_CON(22), 1, GFLAGS), -+ GATE(ACLK_VPU, "aclk_vpu", "aclk_vpu_pre", 0, -+ RK3568_CLKGATE_CON(22), 4, GFLAGS), -+ GATE(HCLK_VPU, "hclk_vpu", "hclk_vpu_pre", 0, -+ RK3568_CLKGATE_CON(22), 5, GFLAGS), ++ COMPOSITE(SCLK_DDRCLK, "clk_ddrphy4x_src", mux_dpll_vpll0_vpll1_p, CLK_IS_CRITICAL, ++ RK3308_CLKSEL_CON(1), 6, 2, MFLAGS, 0, 3, DFLAGS, ++ RK3308_CLKGATE_CON(0), 10, GFLAGS), ++ GATE(0, "clk_ddrphy4x", "clk_ddrphy4x_src", CLK_IS_CRITICAL, ++ RK3308_CLKGATE_CON(0), 11, GFLAGS), ++ FACTOR_GATE(0, "clk_ddr_stdby_div4", "clk_ddrphy4x", CLK_IGNORE_UNUSED, 1, 4, ++ RK3308_CLKGATE_CON(0), 13, GFLAGS), ++ COMPOSITE_NODIV(0, "clk_ddrstdby", mux_ddrstdby_p, CLK_IGNORE_UNUSED, ++ RK3308_CLKSEL_CON(1), 8, 1, MFLAGS, ++ RK3308_CLKGATE_CON(4), 14, GFLAGS), + -+ /* PD_RGA */ -+ COMPOSITE_NODIV(ACLK_RGA_PRE, "aclk_rga_pre", gpll300_cpll250_gpll100_xin24m_p, 0, -+ RK3568_CLKSEL_CON(43), 0, 2, MFLAGS, -+ RK3568_CLKGATE_CON(23), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_RGA_PRE, "hclk_rga_pre", "aclk_rga_pre", 0, -+ RK3568_CLKSEL_CON(43), 8, 4, DFLAGS, -+ RK3568_CLKGATE_CON(23), 1, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_RGA_PRE, "pclk_rga_pre", "aclk_rga_pre", 0, -+ RK3568_CLKSEL_CON(43), 12, 4, DFLAGS, -+ RK3568_CLKGATE_CON(22), 12, GFLAGS), -+ GATE(ACLK_RGA, "aclk_rga", "aclk_rga_pre", 0, -+ RK3568_CLKGATE_CON(23), 4, GFLAGS), -+ GATE(HCLK_RGA, "hclk_rga", "hclk_rga_pre", 0, -+ RK3568_CLKGATE_CON(23), 5, GFLAGS), -+ COMPOSITE_NODIV(CLK_RGA_CORE, "clk_rga_core", gpll300_gpll200_gpll100_p, 0, -+ RK3568_CLKSEL_CON(43), 2, 2, MFLAGS, -+ RK3568_CLKGATE_CON(23), 6, GFLAGS), -+ GATE(ACLK_IEP, "aclk_iep", "aclk_rga_pre", 0, -+ RK3568_CLKGATE_CON(23), 7, GFLAGS), -+ GATE(HCLK_IEP, "hclk_iep", "hclk_rga_pre", 0, -+ RK3568_CLKGATE_CON(23), 8, GFLAGS), -+ COMPOSITE_NODIV(CLK_IEP_CORE, "clk_iep_core", gpll300_gpll200_gpll100_p, 0, -+ RK3568_CLKSEL_CON(43), 4, 2, MFLAGS, -+ RK3568_CLKGATE_CON(23), 9, GFLAGS), -+ GATE(HCLK_EBC, "hclk_ebc", "hclk_rga_pre", 0, RK3568_CLKGATE_CON(23), 10, GFLAGS), -+ COMPOSITE_NODIV(DCLK_EBC, "dclk_ebc", gpll400_cpll333_gpll200_p, 0, -+ RK3568_CLKSEL_CON(43), 6, 2, MFLAGS, -+ RK3568_CLKGATE_CON(23), 11, GFLAGS), -+ GATE(ACLK_JDEC, "aclk_jdec", "aclk_rga_pre", 0, -+ RK3568_CLKGATE_CON(23), 12, GFLAGS), -+ GATE(HCLK_JDEC, "hclk_jdec", "hclk_rga_pre", 0, -+ RK3568_CLKGATE_CON(23), 13, GFLAGS), -+ GATE(ACLK_JENC, "aclk_jenc", "aclk_rga_pre", 0, -+ RK3568_CLKGATE_CON(23), 14, GFLAGS), -+ GATE(HCLK_JENC, "hclk_jenc", "hclk_rga_pre", 0, -+ RK3568_CLKGATE_CON(23), 15, GFLAGS), -+ GATE(PCLK_EINK, "pclk_eink", "pclk_rga_pre", 0, -+ RK3568_CLKGATE_CON(22), 14, GFLAGS), -+ GATE(HCLK_EINK, "hclk_eink", "hclk_rga_pre", 0, -+ RK3568_CLKGATE_CON(22), 15, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 6 ++ */ + -+ /* PD_RKVENC */ -+ COMPOSITE(ACLK_RKVENC_PRE, "aclk_rkvenc_pre", gpll_cpll_npll_p, 0, -+ RK3568_CLKSEL_CON(44), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3568_CLKGATE_CON(24), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_RKVENC_PRE, "hclk_rkvenc_pre", "aclk_rkvenc_pre", 0, -+ RK3568_CLKSEL_CON(44), 8, 4, DFLAGS, -+ RK3568_CLKGATE_CON(24), 1, GFLAGS), -+ GATE(ACLK_RKVENC, "aclk_rkvenc", "aclk_rkvenc_pre", 0, -+ RK3568_CLKGATE_CON(24), 6, GFLAGS), -+ GATE(HCLK_RKVENC, "hclk_rkvenc", "hclk_rkvenc_pre", 0, -+ RK3568_CLKGATE_CON(24), 7, GFLAGS), -+ COMPOSITE(CLK_RKVENC_CORE, "clk_rkvenc_core", gpll_cpll_npll_vpll_p, 0, -+ RK3568_CLKSEL_CON(45), 14, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3568_CLKGATE_CON(24), 8, GFLAGS), -+ COMPOSITE(ACLK_RKVDEC_PRE, "aclk_rkvdec_pre", aclk_rkvdec_pre_p, CLK_SET_RATE_NO_REPARENT, -+ RK3568_CLKSEL_CON(47), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3568_CLKGATE_CON(25), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_RKVDEC_PRE, "hclk_rkvdec_pre", "aclk_rkvdec_pre", 0, -+ RK3568_CLKSEL_CON(47), 8, 4, DFLAGS, -+ RK3568_CLKGATE_CON(25), 1, GFLAGS), -+ GATE(ACLK_RKVDEC, "aclk_rkvdec", "aclk_rkvdec_pre", 0, -+ RK3568_CLKGATE_CON(25), 4, GFLAGS), -+ GATE(HCLK_RKVDEC, "hclk_rkvdec", "hclk_rkvdec_pre", 0, -+ RK3568_CLKGATE_CON(25), 5, GFLAGS), -+ COMPOSITE(CLK_RKVDEC_CA, "clk_rkvdec_ca", gpll_cpll_npll_vpll_p, 0, -+ RK3568_CLKSEL_CON(48), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3568_CLKGATE_CON(25), 6, GFLAGS), -+ COMPOSITE(CLK_RKVDEC_CORE, "clk_rkvdec_core", clk_rkvdec_core_p, CLK_SET_RATE_NO_REPARENT, -+ RK3568_CLKSEL_CON(49), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3568_CLKGATE_CON(25), 7, GFLAGS), -+ COMPOSITE(CLK_RKVDEC_HEVC_CA, "clk_rkvdec_hevc_ca", gpll_cpll_npll_vpll_p, 0, -+ RK3568_CLKSEL_CON(49), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3568_CLKGATE_CON(25), 8, GFLAGS), ++ GATE(PCLK_PMU, "pclk_pmu", "pclk_bus", CLK_IGNORE_UNUSED, ++ RK3308_CLKGATE_CON(4), 5, GFLAGS), ++ GATE(SCLK_PMU, "clk_pmu", "pclk_bus", CLK_IGNORE_UNUSED, ++ RK3308_CLKGATE_CON(4), 6, GFLAGS), + -+ /* PD_BUS */ -+ COMPOSITE_NODIV(ACLK_BUS, "aclk_bus", gpll200_gpll150_gpll100_xin24m_p, CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(50), 0, 2, MFLAGS, -+ RK3568_CLKGATE_CON(26), 0, GFLAGS), -+ COMPOSITE_NODIV(PCLK_BUS, "pclk_bus", gpll100_gpll75_cpll50_xin24m_p, CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(50), 4, 2, MFLAGS, -+ RK3568_CLKGATE_CON(26), 1, GFLAGS), -+ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(26), 4, GFLAGS), -+ COMPOSITE(CLK_TSADC_TSEN, "clk_tsadc_tsen", xin24m_gpll100_cpll100_p, 0, -+ RK3568_CLKSEL_CON(51), 4, 2, MFLAGS, 0, 3, DFLAGS, -+ RK3568_CLKGATE_CON(26), 5, GFLAGS), -+ COMPOSITE_NOMUX(CLK_TSADC, "clk_tsadc", "clk_tsadc_tsen", 0, -+ RK3568_CLKSEL_CON(51), 8, 7, DFLAGS, -+ RK3568_CLKGATE_CON(26), 6, GFLAGS), -+ GATE(PCLK_SARADC, "pclk_saradc", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(26), 7, GFLAGS), -+ GATE(CLK_SARADC, "clk_saradc", "xin24m", 0, -+ RK3568_CLKGATE_CON(26), 8, GFLAGS), -+ GATE(PCLK_SCR, "pclk_scr", "pclk_bus", CLK_IGNORE_UNUSED, -+ RK3568_CLKGATE_CON(26), 12, GFLAGS), -+ GATE(PCLK_WDT_NS, "pclk_wdt_ns", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(26), 13, GFLAGS), -+ GATE(TCLK_WDT_NS, "tclk_wdt_ns", "xin24m", 0, -+ RK3568_CLKGATE_CON(26), 14, GFLAGS), -+ GATE(ACLK_MCU, "aclk_mcu", "aclk_bus", CLK_IGNORE_UNUSED, -+ RK3568_CLKGATE_CON(32), 13, GFLAGS), -+ GATE(PCLK_INTMUX, "pclk_intmux", "pclk_bus", CLK_IGNORE_UNUSED, -+ RK3568_CLKGATE_CON(32), 14, GFLAGS), -+ GATE(PCLK_MAILBOX, "pclk_mailbox", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(32), 15, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_rtc32k_frac", "xin24m", CLK_IGNORE_UNUSED, ++ RK3308_CLKSEL_CON(3), 0, ++ RK3308_CLKGATE_CON(4), 3, GFLAGS, ++ &rk3308_rtc32k_fracmux), ++ MUX(0, "clk_rtc32k_div_src", mux_vpll0_vpll1_p, 0, ++ RK3308_CLKSEL_CON(2), 10, 1, MFLAGS), ++ COMPOSITE_NOMUX(0, "clk_rtc32k_div", "clk_rtc32k_div_src", CLK_IGNORE_UNUSED | CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(4), 0, 16, DFLAGS, ++ RK3308_CLKGATE_CON(4), 2, GFLAGS), + -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(27), 12, GFLAGS), -+ COMPOSITE(CLK_UART1_SRC, "clk_uart1_src", gpll_cpll_usb480m_p, 0, -+ RK3568_CLKSEL_CON(52), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3568_CLKGATE_CON(27), 13, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART1_FRAC, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(53), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3568_CLKGATE_CON(27), 14, GFLAGS, -+ &rk3568_uart1_fracmux), -+ GATE(SCLK_UART1, "sclk_uart1", "sclk_uart1_mux", 0, -+ RK3568_CLKGATE_CON(27), 15, GFLAGS), ++ COMPOSITE(0, "clk_usbphy_ref_src", mux_dpll_vpll0_p, 0, ++ RK3308_CLKSEL_CON(72), 6, 1, MFLAGS, 0, 6, DFLAGS, ++ RK3308_CLKGATE_CON(4), 7, GFLAGS), ++ COMPOSITE_NODIV(SCLK_USBPHY_REF, "clk_usbphy_ref", mux_usbphy_ref_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(72), 7, 1, MFLAGS, ++ RK3308_CLKGATE_CON(4), 8, GFLAGS), + -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(28), 0, GFLAGS), -+ COMPOSITE(CLK_UART2_SRC, "clk_uart2_src", gpll_cpll_usb480m_p, 0, -+ RK3568_CLKSEL_CON(54), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3568_CLKGATE_CON(28), 1, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART2_FRAC, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(55), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3568_CLKGATE_CON(28), 2, GFLAGS, -+ &rk3568_uart2_fracmux), -+ GATE(SCLK_UART2, "sclk_uart2", "sclk_uart2_mux", 0, -+ RK3568_CLKGATE_CON(28), 3, GFLAGS), ++ GATE(0, "clk_wifi_dpll", "dpll", 0, ++ RK3308_CLKGATE_CON(15), 2, GFLAGS), ++ GATE(0, "clk_wifi_vpll0", "vpll0", 0, ++ RK3308_CLKGATE_CON(15), 3, GFLAGS), ++ GATE(0, "clk_wifi_osc", "xin24m", 0, ++ RK3308_CLKGATE_CON(15), 4, GFLAGS), ++ COMPOSITE(0, "clk_wifi_src", mux_wifi_src_p, 0, ++ RK3308_CLKSEL_CON(44), 6, 1, MFLAGS, 0, 6, DFLAGS, ++ RK3308_CLKGATE_CON(4), 0, GFLAGS), ++ COMPOSITE_NODIV(SCLK_WIFI, "clk_wifi", mux_wifi_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(44), 7, 1, MFLAGS, ++ RK3308_CLKGATE_CON(4), 1, GFLAGS), + -+ GATE(PCLK_UART3, "pclk_uart3", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(28), 4, GFLAGS), -+ COMPOSITE(CLK_UART3_SRC, "clk_uart3_src", gpll_cpll_usb480m_p, 0, -+ RK3568_CLKSEL_CON(56), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3568_CLKGATE_CON(28), 5, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART3_FRAC, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(57), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3568_CLKGATE_CON(28), 6, GFLAGS, -+ &rk3568_uart3_fracmux), -+ GATE(SCLK_UART3, "sclk_uart3", "sclk_uart3_mux", 0, -+ RK3568_CLKGATE_CON(28), 7, GFLAGS), ++ GATE(SCLK_PVTM_PMU, "clk_pvtm_pmu", "xin24m", 0, ++ RK3308_CLKGATE_CON(4), 4, GFLAGS), + -+ GATE(PCLK_UART4, "pclk_uart4", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(28), 8, GFLAGS), -+ COMPOSITE(CLK_UART4_SRC, "clk_uart4_src", gpll_cpll_usb480m_p, 0, -+ RK3568_CLKSEL_CON(58), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3568_CLKGATE_CON(28), 9, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART4_FRAC, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(59), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3568_CLKGATE_CON(28), 10, GFLAGS, -+ &rk3568_uart4_fracmux), -+ GATE(SCLK_UART4, "sclk_uart4", "sclk_uart4_mux", 0, -+ RK3568_CLKGATE_CON(28), 11, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 7 ++ */ + -+ GATE(PCLK_UART5, "pclk_uart5", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(28), 12, GFLAGS), -+ COMPOSITE(CLK_UART5_SRC, "clk_uart5_src", gpll_cpll_usb480m_p, 0, -+ RK3568_CLKSEL_CON(60), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3568_CLKGATE_CON(28), 13, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART5_FRAC, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(61), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3568_CLKGATE_CON(28), 14, GFLAGS, -+ &rk3568_uart5_fracmux), -+ GATE(SCLK_UART5, "sclk_uart5", "sclk_uart5_mux", 0, -+ RK3568_CLKGATE_CON(28), 15, GFLAGS), ++ COMPOSITE_NODIV(0, "clk_audio_src", mux_vpll0_vpll1_xin24m_p, CLK_IS_CRITICAL, ++ RK3308_CLKSEL_CON(45), 6, 2, MFLAGS, ++ RK3308_CLKGATE_CON(10), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_AUDIO, "hclk_audio", "clk_audio_src", CLK_IS_CRITICAL, ++ RK3308_CLKSEL_CON(45), 0, 5, DFLAGS, ++ RK3308_CLKGATE_CON(10), 1, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_AUDIO, "pclk_audio", "clk_audio_src", CLK_IS_CRITICAL, ++ RK3308_CLKSEL_CON(45), 8, 5, DFLAGS, ++ RK3308_CLKGATE_CON(10), 2, GFLAGS), + -+ GATE(PCLK_UART6, "pclk_uart6", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(29), 0, GFLAGS), -+ COMPOSITE(CLK_UART6_SRC, "clk_uart6_src", gpll_cpll_usb480m_p, 0, -+ RK3568_CLKSEL_CON(62), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3568_CLKGATE_CON(29), 1, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART6_FRAC, "clk_uart6_frac", "clk_uart6_src", CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(63), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3568_CLKGATE_CON(29), 2, GFLAGS, -+ &rk3568_uart6_fracmux), -+ GATE(SCLK_UART6, "sclk_uart6", "sclk_uart6_mux", 0, -+ RK3568_CLKGATE_CON(29), 3, GFLAGS), ++ COMPOSITE(0, "clk_pdm_src", mux_vpll0_vpll1_xin24m_p, 0, ++ RK3308_CLKSEL_CON(46), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(10), 3, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_pdm_frac", "clk_pdm_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(47), 0, ++ RK3308_CLKGATE_CON(10), 4, GFLAGS, ++ &rk3308_pdm_fracmux), ++ GATE(SCLK_PDM, "clk_pdm", "clk_pdm_mux", 0, ++ RK3308_CLKGATE_CON(10), 5, GFLAGS), + -+ GATE(PCLK_UART7, "pclk_uart7", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(29), 4, GFLAGS), -+ COMPOSITE(CLK_UART7_SRC, "clk_uart7_src", gpll_cpll_usb480m_p, 0, -+ RK3568_CLKSEL_CON(64), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3568_CLKGATE_CON(29), 5, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART7_FRAC, "clk_uart7_frac", "clk_uart7_src", CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(65), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3568_CLKGATE_CON(29), 6, GFLAGS, -+ &rk3568_uart7_fracmux), -+ GATE(SCLK_UART7, "sclk_uart7", "sclk_uart7_mux", 0, -+ RK3568_CLKGATE_CON(29), 7, GFLAGS), ++ COMPOSITE(SCLK_I2S0_8CH_TX_SRC, "clk_i2s0_8ch_tx_src", mux_vpll0_vpll1_xin24m_p, 0, ++ RK3308_CLKSEL_CON(52), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(10), 12, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s0_8ch_tx_frac", "clk_i2s0_8ch_tx_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(53), 0, ++ RK3308_CLKGATE_CON(10), 13, GFLAGS, ++ &rk3308_i2s0_8ch_tx_fracmux), ++ COMPOSITE_NODIV(SCLK_I2S0_8CH_TX, "clk_i2s0_8ch_tx", mux_i2s0_8ch_tx_rx_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(52), 12, 1, MFLAGS, ++ RK3308_CLKGATE_CON(10), 14, GFLAGS), ++ COMPOSITE_NODIV(SCLK_I2S0_8CH_TX_OUT, "clk_i2s0_8ch_tx_out", mux_i2s0_8ch_tx_out_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(52), 15, 1, MFLAGS, ++ RK3308_CLKGATE_CON(10), 15, GFLAGS), + -+ GATE(PCLK_UART8, "pclk_uart8", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(29), 8, GFLAGS), -+ COMPOSITE(CLK_UART8_SRC, "clk_uart8_src", gpll_cpll_usb480m_p, 0, -+ RK3568_CLKSEL_CON(66), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3568_CLKGATE_CON(29), 9, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART8_FRAC, "clk_uart8_frac", "clk_uart8_src", CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(67), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3568_CLKGATE_CON(29), 10, GFLAGS, -+ &rk3568_uart8_fracmux), -+ GATE(SCLK_UART8, "sclk_uart8", "sclk_uart8_mux", 0, -+ RK3568_CLKGATE_CON(29), 11, GFLAGS), ++ COMPOSITE(SCLK_I2S0_8CH_RX_SRC, "clk_i2s0_8ch_rx_src", mux_vpll0_vpll1_xin24m_p, 0, ++ RK3308_CLKSEL_CON(54), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(11), 0, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s0_8ch_rx_frac", "clk_i2s0_8ch_rx_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(55), 0, ++ RK3308_CLKGATE_CON(11), 1, GFLAGS, ++ &rk3308_i2s0_8ch_rx_fracmux), ++ COMPOSITE_NODIV(SCLK_I2S0_8CH_RX, "clk_i2s0_8ch_rx", mux_i2s0_8ch_rx_tx_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(54), 12, 1, MFLAGS, ++ RK3308_CLKGATE_CON(11), 2, GFLAGS), ++ GATE(SCLK_I2S0_8CH_RX_OUT, "clk_i2s0_8ch_rx_out", "clk_i2s0_8ch_rx", 0, ++ RK3308_CLKGATE_CON(11), 3, GFLAGS), + -+ GATE(PCLK_UART9, "pclk_uart9", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(29), 12, GFLAGS), -+ COMPOSITE(CLK_UART9_SRC, "clk_uart9_src", gpll_cpll_usb480m_p, 0, -+ RK3568_CLKSEL_CON(68), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3568_CLKGATE_CON(29), 13, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART9_FRAC, "clk_uart9_frac", "clk_uart9_src", CLK_SET_RATE_PARENT, -+ RK3568_CLKSEL_CON(69), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3568_CLKGATE_CON(29), 14, GFLAGS, -+ &rk3568_uart9_fracmux), -+ GATE(SCLK_UART9, "sclk_uart9", "sclk_uart9_mux", 0, -+ RK3568_CLKGATE_CON(29), 15, GFLAGS), ++ COMPOSITE(SCLK_I2S1_8CH_TX_SRC, "clk_i2s1_8ch_tx_src", mux_vpll0_vpll1_xin24m_p, 0, ++ RK3308_CLKSEL_CON(56), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(11), 4, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s1_8ch_tx_frac", "clk_i2s1_8ch_tx_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(57), 0, ++ RK3308_CLKGATE_CON(11), 5, GFLAGS, ++ &rk3308_i2s1_8ch_tx_fracmux), ++ COMPOSITE_NODIV(SCLK_I2S1_8CH_TX, "clk_i2s1_8ch_tx", mux_i2s1_8ch_tx_rx_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(56), 12, 1, MFLAGS, ++ RK3308_CLKGATE_CON(11), 6, GFLAGS), ++ COMPOSITE_NODIV(SCLK_I2S1_8CH_TX_OUT, "clk_i2s1_8ch_tx_out", mux_i2s1_8ch_tx_out_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(56), 15, 1, MFLAGS, ++ RK3308_CLKGATE_CON(11), 7, GFLAGS), + -+ GATE(PCLK_CAN0, "pclk_can0", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(27), 5, GFLAGS), -+ COMPOSITE(CLK_CAN0, "clk_can0", gpll_cpll_p, 0, -+ RK3568_CLKSEL_CON(70), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3568_CLKGATE_CON(27), 6, GFLAGS), -+ GATE(PCLK_CAN1, "pclk_can1", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(27), 7, GFLAGS), -+ COMPOSITE(CLK_CAN1, "clk_can1", gpll_cpll_p, 0, -+ RK3568_CLKSEL_CON(70), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK3568_CLKGATE_CON(27), 8, GFLAGS), -+ GATE(PCLK_CAN2, "pclk_can2", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(27), 9, GFLAGS), -+ COMPOSITE(CLK_CAN2, "clk_can2", gpll_cpll_p, 0, -+ RK3568_CLKSEL_CON(71), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3568_CLKGATE_CON(27), 10, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C, "clk_i2c", clk_i2c_p, 0, -+ RK3568_CLKSEL_CON(71), 8, 2, MFLAGS, -+ RK3568_CLKGATE_CON(32), 10, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(30), 0, GFLAGS), -+ GATE(CLK_I2C1, "clk_i2c1", "clk_i2c", 0, -+ RK3568_CLKGATE_CON(30), 1, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(30), 2, GFLAGS), -+ GATE(CLK_I2C2, "clk_i2c2", "clk_i2c", 0, -+ RK3568_CLKGATE_CON(30), 3, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(30), 4, GFLAGS), -+ GATE(CLK_I2C3, "clk_i2c3", "clk_i2c", 0, -+ RK3568_CLKGATE_CON(30), 5, GFLAGS), -+ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(30), 6, GFLAGS), -+ GATE(CLK_I2C4, "clk_i2c4", "clk_i2c", 0, -+ RK3568_CLKGATE_CON(30), 7, GFLAGS), -+ GATE(PCLK_I2C5, "pclk_i2c5", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(30), 8, GFLAGS), -+ GATE(CLK_I2C5, "clk_i2c5", "clk_i2c", 0, -+ RK3568_CLKGATE_CON(30), 9, GFLAGS), -+ GATE(PCLK_SPI0, "pclk_spi0", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(30), 10, GFLAGS), -+ COMPOSITE_NODIV(CLK_SPI0, "clk_spi0", gpll200_xin24m_cpll100_p, 0, -+ RK3568_CLKSEL_CON(72), 0, 1, MFLAGS, -+ RK3568_CLKGATE_CON(30), 11, GFLAGS), -+ GATE(PCLK_SPI1, "pclk_spi1", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(30), 12, GFLAGS), -+ COMPOSITE_NODIV(CLK_SPI1, "clk_spi1", gpll200_xin24m_cpll100_p, 0, -+ RK3568_CLKSEL_CON(72), 2, 1, MFLAGS, -+ RK3568_CLKGATE_CON(30), 13, GFLAGS), -+ GATE(PCLK_SPI2, "pclk_spi2", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(30), 14, GFLAGS), -+ COMPOSITE_NODIV(CLK_SPI2, "clk_spi2", gpll200_xin24m_cpll100_p, 0, -+ RK3568_CLKSEL_CON(72), 4, 1, MFLAGS, -+ RK3568_CLKGATE_CON(30), 15, GFLAGS), -+ GATE(PCLK_SPI3, "pclk_spi3", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(31), 0, GFLAGS), -+ COMPOSITE_NODIV(CLK_SPI3, "clk_spi3", gpll200_xin24m_cpll100_p, 0, -+ RK3568_CLKSEL_CON(72), 6, 1, MFLAGS, RK3568_CLKGATE_CON(31), 1, GFLAGS), -+ GATE(PCLK_PWM1, "pclk_pwm1", "pclk_bus", 0, RK3568_CLKGATE_CON(31), 10, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM1, "clk_pwm1", gpll100_xin24m_cpll100_p, 0, -+ RK3568_CLKSEL_CON(72), 8, 2, MFLAGS, -+ RK3568_CLKGATE_CON(31), 11, GFLAGS), -+ GATE(CLK_PWM1_CAPTURE, "clk_pwm1_capture", "xin24m", 0, -+ RK3568_CLKGATE_CON(31), 12, GFLAGS), -+ GATE(PCLK_PWM2, "pclk_pwm2", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(31), 13, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM2, "clk_pwm2", gpll100_xin24m_cpll100_p, 0, -+ RK3568_CLKSEL_CON(72), 10, 2, MFLAGS, -+ RK3568_CLKGATE_CON(31), 14, GFLAGS), -+ GATE(CLK_PWM2_CAPTURE, "clk_pwm2_capture", "xin24m", 0, -+ RK3568_CLKGATE_CON(31), 15, GFLAGS), -+ GATE(PCLK_PWM3, "pclk_pwm3", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(32), 0, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM3, "clk_pwm3", gpll100_xin24m_cpll100_p, 0, -+ RK3568_CLKSEL_CON(72), 12, 2, MFLAGS, -+ RK3568_CLKGATE_CON(32), 1, GFLAGS), -+ GATE(CLK_PWM3_CAPTURE, "clk_pwm3_capture", "xin24m", 0, -+ RK3568_CLKGATE_CON(32), 2, GFLAGS), -+ COMPOSITE_NODIV(DBCLK_GPIO, "dbclk_gpio", xin24m_32k_p, 0, -+ RK3568_CLKSEL_CON(72), 14, 1, MFLAGS, -+ RK3568_CLKGATE_CON(32), 11, GFLAGS), -+ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(31), 2, GFLAGS), -+ GATE(DBCLK_GPIO1, "dbclk_gpio1", "dbclk_gpio", 0, -+ RK3568_CLKGATE_CON(31), 3, GFLAGS), -+ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(31), 4, GFLAGS), -+ GATE(DBCLK_GPIO2, "dbclk_gpio2", "dbclk_gpio", 0, -+ RK3568_CLKGATE_CON(31), 5, GFLAGS), -+ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(31), 6, GFLAGS), -+ GATE(DBCLK_GPIO3, "dbclk_gpio3", "dbclk_gpio", 0, -+ RK3568_CLKGATE_CON(31), 7, GFLAGS), -+ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(31), 8, GFLAGS), -+ GATE(DBCLK_GPIO4, "dbclk_gpio4", "dbclk_gpio", 0, -+ RK3568_CLKGATE_CON(31), 9, GFLAGS), -+ GATE(PCLK_TIMER, "pclk_timer", "pclk_bus", 0, -+ RK3568_CLKGATE_CON(32), 3, GFLAGS), -+ GATE(CLK_TIMER0, "clk_timer0", "xin24m", 0, -+ RK3568_CLKGATE_CON(32), 4, GFLAGS), -+ GATE(CLK_TIMER1, "clk_timer1", "xin24m", 0, -+ RK3568_CLKGATE_CON(32), 5, GFLAGS), -+ GATE(CLK_TIMER2, "clk_timer2", "xin24m", 0, -+ RK3568_CLKGATE_CON(32), 6, GFLAGS), -+ GATE(CLK_TIMER3, "clk_timer3", "xin24m", 0, -+ RK3568_CLKGATE_CON(32), 7, GFLAGS), -+ GATE(CLK_TIMER4, "clk_timer4", "xin24m", 0, -+ RK3568_CLKGATE_CON(32), 8, GFLAGS), -+ GATE(CLK_TIMER5, "clk_timer5", "xin24m", 0, -+ RK3568_CLKGATE_CON(32), 9, GFLAGS), ++ COMPOSITE(SCLK_I2S1_8CH_RX_SRC, "clk_i2s1_8ch_rx_src", mux_vpll0_vpll1_xin24m_p, 0, ++ RK3308_CLKSEL_CON(58), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(11), 8, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s1_8ch_rx_frac", "clk_i2s1_8ch_rx_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(59), 0, ++ RK3308_CLKGATE_CON(11), 9, GFLAGS, ++ &rk3308_i2s1_8ch_rx_fracmux), ++ COMPOSITE_NODIV(SCLK_I2S1_8CH_RX, "clk_i2s1_8ch_rx", mux_i2s1_8ch_rx_tx_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(58), 12, 1, MFLAGS, ++ RK3308_CLKGATE_CON(11), 10, GFLAGS), ++ GATE(SCLK_I2S1_8CH_RX_OUT, "clk_i2s1_8ch_rx_out", "clk_i2s1_8ch_rx", 0, ++ RK3308_CLKGATE_CON(11), 11, GFLAGS), + -+ /* PD_TOP */ -+ COMPOSITE_NODIV(ACLK_TOP_HIGH, "aclk_top_high", cpll500_gpll400_gpll300_xin24m_p, CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(73), 0, 2, MFLAGS, -+ RK3568_CLKGATE_CON(33), 0, GFLAGS), -+ COMPOSITE_NODIV(ACLK_TOP_LOW, "aclk_top_low", gpll400_gpll300_gpll200_xin24m_p, CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(73), 4, 2, MFLAGS, -+ RK3568_CLKGATE_CON(33), 1, GFLAGS), -+ COMPOSITE_NODIV(HCLK_TOP, "hclk_top", gpll150_gpll100_gpll75_xin24m_p, CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(73), 8, 2, MFLAGS, -+ RK3568_CLKGATE_CON(33), 2, GFLAGS), -+ COMPOSITE_NODIV(PCLK_TOP, "pclk_top", gpll100_gpll75_cpll50_xin24m_p, CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(73), 12, 2, MFLAGS, -+ RK3568_CLKGATE_CON(33), 3, GFLAGS), -+ GATE(PCLK_PCIE30PHY, "pclk_pcie30phy", "pclk_top", 0, -+ RK3568_CLKGATE_CON(33), 8, GFLAGS), -+ COMPOSITE_NODIV(CLK_OPTC_ARB, "clk_optc_arb", xin24m_cpll100_p, CLK_IS_CRITICAL, -+ RK3568_CLKSEL_CON(73), 15, 1, MFLAGS, -+ RK3568_CLKGATE_CON(33), 9, GFLAGS), -+ GATE(PCLK_MIPICSIPHY, "pclk_mipicsiphy", "pclk_top", 0, -+ RK3568_CLKGATE_CON(33), 13, GFLAGS), -+ GATE(PCLK_MIPIDSIPHY0, "pclk_mipidsiphy0", "pclk_top", 0, -+ RK3568_CLKGATE_CON(33), 14, GFLAGS), -+ GATE(PCLK_MIPIDSIPHY1, "pclk_mipidsiphy1", "pclk_top", 0, -+ RK3568_CLKGATE_CON(33), 15, GFLAGS), -+ GATE(PCLK_PIPEPHY0, "pclk_pipephy0", "pclk_top", 0, -+ RK3568_CLKGATE_CON(34), 4, GFLAGS), -+ GATE(PCLK_PIPEPHY1, "pclk_pipephy1", "pclk_top", 0, -+ RK3568_CLKGATE_CON(34), 5, GFLAGS), -+ GATE(PCLK_PIPEPHY2, "pclk_pipephy2", "pclk_top", 0, -+ RK3568_CLKGATE_CON(34), 6, GFLAGS), -+ GATE(PCLK_CPU_BOOST, "pclk_cpu_boost", "pclk_top", 0, -+ RK3568_CLKGATE_CON(34), 11, GFLAGS), -+ GATE(CLK_CPU_BOOST, "clk_cpu_boost", "xin24m", 0, -+ RK3568_CLKGATE_CON(34), 12, GFLAGS), -+ GATE(PCLK_OTPPHY, "pclk_otpphy", "pclk_top", 0, -+ RK3568_CLKGATE_CON(34), 13, GFLAGS), -+ GATE(PCLK_EDPPHY_GRF, "pclk_edpphy_grf", "pclk_top", 0, -+ RK3568_CLKGATE_CON(34), 14, GFLAGS), -+}; ++ COMPOSITE(SCLK_I2S2_8CH_TX_SRC, "clk_i2s2_8ch_tx_src", mux_vpll0_vpll1_xin24m_p, 0, ++ RK3308_CLKSEL_CON(60), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(11), 12, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s2_8ch_tx_frac", "clk_i2s2_8ch_tx_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(61), 0, ++ RK3308_CLKGATE_CON(11), 13, GFLAGS, ++ &rk3308_i2s2_8ch_tx_fracmux), ++ COMPOSITE_NODIV(SCLK_I2S2_8CH_TX, "clk_i2s2_8ch_tx", mux_i2s2_8ch_tx_rx_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(60), 12, 1, MFLAGS, ++ RK3308_CLKGATE_CON(11), 14, GFLAGS), ++ COMPOSITE_NODIV(SCLK_I2S2_8CH_TX_OUT, "clk_i2s2_8ch_tx_out", mux_i2s2_8ch_tx_out_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(60), 15, 1, MFLAGS, ++ RK3308_CLKGATE_CON(11), 15, GFLAGS), + -+static struct rockchip_clk_branch rk3568_clk_pmu_branches[] __initdata = { -+ /* PD_PMU */ -+ FACTOR(0, "ppll_ph0", "ppll", 0, 1, 2), -+ FACTOR(0, "ppll_ph180", "ppll", 0, 1, 2), -+ FACTOR(0, "hpll_ph0", "hpll", 0, 1, 2), ++ COMPOSITE(SCLK_I2S2_8CH_RX_SRC, "clk_i2s2_8ch_rx_src", mux_vpll0_vpll1_xin24m_p, 0, ++ RK3308_CLKSEL_CON(62), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(12), 0, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s2_8ch_rx_frac", "clk_i2s2_8ch_rx_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(63), 0, ++ RK3308_CLKGATE_CON(12), 1, GFLAGS, ++ &rk3308_i2s2_8ch_rx_fracmux), ++ COMPOSITE_NODIV(SCLK_I2S2_8CH_RX, "clk_i2s2_8ch_rx", mux_i2s2_8ch_rx_tx_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(62), 12, 1, MFLAGS, ++ RK3308_CLKGATE_CON(12), 2, GFLAGS), ++ GATE(SCLK_I2S2_8CH_RX_OUT, "clk_i2s2_8ch_rx_out", "clk_i2s2_8ch_rx", 0, ++ RK3308_CLKGATE_CON(12), 3, GFLAGS), + -+ MUX(CLK_PDPMU, "clk_pdpmu", clk_pdpmu_p, 0, -+ RK3568_PMU_CLKSEL_CON(2), 15, 1, MFLAGS), -+ COMPOSITE_NOMUX(PCLK_PDPMU, "pclk_pdpmu", "clk_pdpmu", CLK_IS_CRITICAL, -+ RK3568_PMU_CLKSEL_CON(2), 0, 5, DFLAGS, -+ RK3568_PMU_CLKGATE_CON(0), 2, GFLAGS), -+ GATE(PCLK_PMU, "pclk_pmu", "pclk_pdpmu", CLK_IS_CRITICAL, -+ RK3568_PMU_CLKGATE_CON(0), 6, GFLAGS), -+ GATE(CLK_PMU, "clk_pmu", "xin24m", CLK_IS_CRITICAL, -+ RK3568_PMU_CLKGATE_CON(0), 7, GFLAGS), -+ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_pdpmu", 0, -+ RK3568_PMU_CLKGATE_CON(1), 0, GFLAGS), -+ COMPOSITE_NOMUX(CLK_I2C0, "clk_i2c0", "clk_pdpmu", 0, -+ RK3568_PMU_CLKSEL_CON(3), 0, 7, DFLAGS, -+ RK3568_PMU_CLKGATE_CON(1), 1, GFLAGS), -+ GATE(PCLK_UART0, "pclk_uart0", "pclk_pdpmu", 0, -+ RK3568_PMU_CLKGATE_CON(1), 2, GFLAGS), ++ COMPOSITE(SCLK_I2S3_8CH_TX_SRC, "clk_i2s3_8ch_tx_src", mux_vpll0_vpll1_xin24m_p, 0, ++ RK3308_CLKSEL_CON(64), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(12), 4, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s3_8ch_tx_frac", "clk_i2s3_8ch_tx_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(65), 0, ++ RK3308_CLKGATE_CON(12), 5, GFLAGS, ++ &rk3308_i2s3_8ch_tx_fracmux), ++ COMPOSITE_NODIV(SCLK_I2S3_8CH_TX, "clk_i2s3_8ch_tx", mux_i2s3_8ch_tx_rx_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(64), 12, 1, MFLAGS, ++ RK3308_CLKGATE_CON(12), 6, GFLAGS), ++ COMPOSITE_NODIV(SCLK_I2S3_8CH_TX_OUT, "clk_i2s3_8ch_tx_out", mux_i2s3_8ch_tx_out_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(64), 15, 1, MFLAGS, ++ RK3308_CLKGATE_CON(12), 7, GFLAGS), + -+ COMPOSITE_FRACMUX(CLK_RTC32K_FRAC, "clk_rtc32k_frac", "xin24m", CLK_IGNORE_UNUSED, -+ RK3568_PMU_CLKSEL_CON(1), 0, -+ RK3568_PMU_CLKGATE_CON(0), 1, GFLAGS, -+ &rk3568_rtc32k_pmu_fracmux), ++ COMPOSITE(SCLK_I2S3_8CH_RX_SRC, "clk_i2s3_8ch_rx_src", mux_vpll0_vpll1_xin24m_p, 0, ++ RK3308_CLKSEL_CON(66), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(12), 8, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s3_8ch_rx_frac", "clk_i2s3_8ch_rx_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(67), 0, ++ RK3308_CLKGATE_CON(12), 9, GFLAGS, ++ &rk3308_i2s3_8ch_rx_fracmux), ++ COMPOSITE_NODIV(SCLK_I2S3_8CH_RX, "clk_i2s3_8ch_rx", mux_i2s3_8ch_rx_tx_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(66), 12, 1, MFLAGS, ++ RK3308_CLKGATE_CON(12), 10, GFLAGS), ++ GATE(SCLK_I2S3_8CH_RX_OUT, "clk_i2s3_8ch_rx_out", "clk_i2s3_8ch_rx", 0, ++ RK3308_CLKGATE_CON(12), 11, GFLAGS), + -+ COMPOSITE_NOMUX(XIN_OSC0_DIV, "xin_osc0_div", "xin24m", CLK_IGNORE_UNUSED, -+ RK3568_PMU_CLKSEL_CON(0), 0, 5, DFLAGS, -+ RK3568_PMU_CLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE(SCLK_I2S0_2CH_SRC, "clk_i2s0_2ch_src", mux_vpll0_vpll1_xin24m_p, 0, ++ RK3308_CLKSEL_CON(68), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(12), 12, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s0_2ch_frac", "clk_i2s0_2ch_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(69), 0, ++ RK3308_CLKGATE_CON(12), 13, GFLAGS, ++ &rk3308_i2s0_2ch_fracmux), ++ GATE(SCLK_I2S0_2CH, "clk_i2s0_2ch", "clk_i2s0_2ch_mux", 0, ++ RK3308_CLKGATE_CON(12), 14, GFLAGS), ++ COMPOSITE_NODIV(SCLK_I2S0_2CH_OUT, "clk_i2s0_2ch_out", mux_i2s0_2ch_out_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(68), 15, 1, MFLAGS, ++ RK3308_CLKGATE_CON(12), 15, GFLAGS), + -+ COMPOSITE(CLK_UART0_DIV, "sclk_uart0_div", ppll_usb480m_cpll_gpll_p, 0, -+ RK3568_PMU_CLKSEL_CON(4), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3568_PMU_CLKGATE_CON(1), 3, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART0_FRAC, "sclk_uart0_frac", "sclk_uart0_div", CLK_SET_RATE_PARENT, -+ RK3568_PMU_CLKSEL_CON(5), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3568_PMU_CLKGATE_CON(1), 4, GFLAGS, -+ &rk3568_uart0_fracmux), -+ GATE(SCLK_UART0, "sclk_uart0", "sclk_uart0_mux", 0, -+ RK3568_PMU_CLKGATE_CON(1), 5, GFLAGS), ++ COMPOSITE(SCLK_I2S1_2CH_SRC, "clk_i2s1_2ch_src", mux_vpll0_vpll1_xin24m_p, 0, ++ RK3308_CLKSEL_CON(70), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(13), 0, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s1_2ch_frac", "clk_i2s1_2ch_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(71), 0, ++ RK3308_CLKGATE_CON(13), 1, GFLAGS, ++ &rk3308_i2s1_2ch_fracmux), ++ GATE(SCLK_I2S1_2CH, "clk_i2s1_2ch", "clk_i2s1_2ch_mux", 0, ++ RK3308_CLKGATE_CON(13), 2, GFLAGS), ++ COMPOSITE_NODIV(SCLK_I2S1_2CH_OUT, "clk_i2s1_2ch_out", mux_i2s1_2ch_out_p, CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(70), 15, 1, MFLAGS, ++ RK3308_CLKGATE_CON(13), 3, GFLAGS), + -+ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pdpmu", 0, -+ RK3568_PMU_CLKGATE_CON(1), 9, GFLAGS), -+ COMPOSITE_NODIV(DBCLK_GPIO0, "dbclk_gpio0", xin24m_32k_p, 0, -+ RK3568_PMU_CLKSEL_CON(6), 15, 1, MFLAGS, -+ RK3568_PMU_CLKGATE_CON(1), 10, GFLAGS), -+ GATE(PCLK_PWM0, "pclk_pwm0", "pclk_pdpmu", 0, -+ RK3568_PMU_CLKGATE_CON(1), 6, GFLAGS), -+ COMPOSITE(CLK_PWM0, "clk_pwm0", clk_pwm0_p, 0, -+ RK3568_PMU_CLKSEL_CON(6), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3568_PMU_CLKGATE_CON(1), 7, GFLAGS), -+ GATE(CLK_CAPTURE_PWM0_NDFT, "clk_capture_pwm0_ndft", "xin24m", 0, -+ RK3568_PMU_CLKGATE_CON(1), 8, GFLAGS), -+ GATE(PCLK_PMUPVTM, "pclk_pmupvtm", "pclk_pdpmu", 0, -+ RK3568_PMU_CLKGATE_CON(1), 11, GFLAGS), -+ GATE(CLK_PMUPVTM, "clk_pmupvtm", "xin24m", 0, -+ RK3568_PMU_CLKGATE_CON(1), 12, GFLAGS), -+ GATE(CLK_CORE_PMUPVTM, "clk_core_pmupvtm", "xin24m", 0, -+ RK3568_PMU_CLKGATE_CON(1), 13, GFLAGS), -+ COMPOSITE_NOMUX(CLK_REF24M, "clk_ref24m", "clk_pdpmu", 0, -+ RK3568_PMU_CLKSEL_CON(7), 0, 6, DFLAGS, -+ RK3568_PMU_CLKGATE_CON(2), 0, GFLAGS), -+ GATE(XIN_OSC0_USBPHY0_G, "xin_osc0_usbphy0_g", "xin24m", 0, -+ RK3568_PMU_CLKGATE_CON(2), 1, GFLAGS), -+ MUX(CLK_USBPHY0_REF, "clk_usbphy0_ref", clk_usbphy0_ref_p, 0, -+ RK3568_PMU_CLKSEL_CON(8), 0, 1, MFLAGS), -+ GATE(XIN_OSC0_USBPHY1_G, "xin_osc0_usbphy1_g", "xin24m", 0, -+ RK3568_PMU_CLKGATE_CON(2), 2, GFLAGS), -+ MUX(CLK_USBPHY1_REF, "clk_usbphy1_ref", clk_usbphy1_ref_p, 0, -+ RK3568_PMU_CLKSEL_CON(8), 1, 1, MFLAGS), -+ GATE(XIN_OSC0_MIPIDSIPHY0_G, "xin_osc0_mipidsiphy0_g", "xin24m", 0, -+ RK3568_PMU_CLKGATE_CON(2), 3, GFLAGS), -+ MUX(CLK_MIPIDSIPHY0_REF, "clk_mipidsiphy0_ref", clk_mipidsiphy0_ref_p, 0, -+ RK3568_PMU_CLKSEL_CON(8), 2, 1, MFLAGS), -+ GATE(XIN_OSC0_MIPIDSIPHY1_G, "xin_osc0_mipidsiphy1_g", "xin24m", 0, -+ RK3568_PMU_CLKGATE_CON(2), 4, GFLAGS), -+ MUX(CLK_MIPIDSIPHY1_REF, "clk_mipidsiphy1_ref", clk_mipidsiphy1_ref_p, 0, -+ RK3568_PMU_CLKSEL_CON(8), 3, 1, MFLAGS), -+ COMPOSITE_NOMUX(CLK_WIFI_DIV, "clk_wifi_div", "clk_pdpmu", 0, -+ RK3568_PMU_CLKSEL_CON(8), 8, 6, DFLAGS, -+ RK3568_PMU_CLKGATE_CON(2), 5, GFLAGS), -+ GATE(CLK_WIFI_OSC0, "clk_wifi_osc0", "xin24m", 0, -+ RK3568_PMU_CLKGATE_CON(2), 6, GFLAGS), -+ MUX(CLK_WIFI, "clk_wifi", clk_wifi_p, CLK_SET_RATE_PARENT, -+ RK3568_PMU_CLKSEL_CON(8), 15, 1, MFLAGS), -+ COMPOSITE_NOMUX(CLK_PCIEPHY0_DIV, "clk_pciephy0_div", "ppll_ph0", 0, -+ RK3568_PMU_CLKSEL_CON(9), 0, 3, DFLAGS, -+ RK3568_PMU_CLKGATE_CON(2), 7, GFLAGS), -+ GATE(CLK_PCIEPHY0_OSC0, "clk_pciephy0_osc0", "xin24m", 0, -+ RK3568_PMU_CLKGATE_CON(2), 8, GFLAGS), -+ MUX(CLK_PCIEPHY0_REF, "clk_pciephy0_ref", clk_pciephy0_ref_p, CLK_SET_RATE_PARENT, -+ RK3568_PMU_CLKSEL_CON(9), 3, 1, MFLAGS), -+ COMPOSITE_NOMUX(CLK_PCIEPHY1_DIV, "clk_pciephy1_div", "ppll_ph0", 0, -+ RK3568_PMU_CLKSEL_CON(9), 4, 3, DFLAGS, -+ RK3568_PMU_CLKGATE_CON(2), 9, GFLAGS), -+ GATE(CLK_PCIEPHY1_OSC0, "clk_pciephy1_osc0", "xin24m", 0, -+ RK3568_PMU_CLKGATE_CON(2), 10, GFLAGS), -+ MUX(CLK_PCIEPHY1_REF, "clk_pciephy1_ref", clk_pciephy1_ref_p, CLK_SET_RATE_PARENT, -+ RK3568_PMU_CLKSEL_CON(9), 7, 1, MFLAGS), -+ COMPOSITE_NOMUX(CLK_PCIEPHY2_DIV, "clk_pciephy2_div", "ppll_ph0", 0, -+ RK3568_PMU_CLKSEL_CON(9), 8, 3, DFLAGS, -+ RK3568_PMU_CLKGATE_CON(2), 11, GFLAGS), -+ GATE(CLK_PCIEPHY2_OSC0, "clk_pciephy2_osc0", "xin24m", 0, -+ RK3568_PMU_CLKGATE_CON(2), 12, GFLAGS), -+ MUX(CLK_PCIEPHY2_REF, "clk_pciephy2_ref", clk_pciephy2_ref_p, CLK_SET_RATE_PARENT, -+ RK3568_PMU_CLKSEL_CON(9), 11, 1, MFLAGS), -+ GATE(CLK_PCIE30PHY_REF_M, "clk_pcie30phy_ref_m", "ppll_ph0", 0, -+ RK3568_PMU_CLKGATE_CON(2), 13, GFLAGS), -+ GATE(CLK_PCIE30PHY_REF_N, "clk_pcie30phy_ref_n", "ppll_ph180", 0, -+ RK3568_PMU_CLKGATE_CON(2), 14, GFLAGS), -+ GATE(XIN_OSC0_EDPPHY_G, "xin_osc0_edpphy_g", "xin24m", 0, -+ RK3568_PMU_CLKGATE_CON(2), 15, GFLAGS), -+ MUX(CLK_HDMI_REF, "clk_hdmi_ref", clk_hdmi_ref_p, 0, -+ RK3568_PMU_CLKSEL_CON(8), 7, 1, MFLAGS), ++ COMPOSITE(SCLK_SPDIF_TX_DIV, "clk_spdif_tx_div", mux_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, ++ RK3308_CLKSEL_CON(48), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(10), 6, GFLAGS), ++ COMPOSITE(SCLK_SPDIF_TX_DIV50, "clk_spdif_tx_div50", mux_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, ++ RK3308_CLKSEL_CON(48), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(10), 6, GFLAGS), ++ MUX(0, "clk_spdif_tx_src", mux_spdif_tx_src_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3308_CLKSEL_CON(48), 12, 1, MFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_spdif_tx_frac", "clk_spdif_tx_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(49), 0, ++ RK3308_CLKGATE_CON(10), 7, GFLAGS, ++ &rk3308_spdif_tx_fracmux), ++ GATE(SCLK_SPDIF_TX, "clk_spdif_tx", "clk_spdif_tx_mux", 0, ++ RK3308_CLKGATE_CON(10), 8, GFLAGS), + -+ MUXPMUGRF(SCLK_32K_IOE, "clk_32k_ioe", clk_32k_ioe_p, 0, -+ RK3568_PMU_GRF_SOC_CON0, 0, 1, MFLAGS) -+}; ++ COMPOSITE(SCLK_SPDIF_RX_DIV, "clk_spdif_rx_div", mux_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, ++ RK3308_CLKSEL_CON(50), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(10), 9, GFLAGS), ++ COMPOSITE(SCLK_SPDIF_RX_DIV50, "clk_spdif_rx_div50", mux_vpll0_vpll1_xin24m_p, CLK_IGNORE_UNUSED, ++ RK3308_CLKSEL_CON(50), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3308_CLKGATE_CON(10), 9, GFLAGS), ++ MUX(0, "clk_spdif_rx_src", mux_spdif_rx_src_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3308_CLKSEL_CON(50), 14, 1, MFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_spdif_rx_frac", "clk_spdif_rx_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(51), 0, ++ RK3308_CLKGATE_CON(10), 10, GFLAGS, ++ &rk3308_spdif_rx_fracmux), ++ GATE(SCLK_SPDIF_RX, "clk_spdif_rx", "clk_spdif_rx_mux", 0, ++ RK3308_CLKGATE_CON(10), 11, GFLAGS), + -+static void __iomem *rk3568_cru_base; -+static void __iomem *rk3568_pmucru_base; ++ /* ++ * Clock-Architecture Diagram 8 ++ */ + -+static void rk3568_dump_cru(void) -+{ -+ if (rk3568_pmucru_base) { -+ pr_warn("PMU CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk3568_pmucru_base, -+ 0x248, false); -+ } -+ if (rk3568_cru_base) { -+ pr_warn("CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk3568_cru_base, -+ 0x588, false); -+ } -+} ++ GATE(0, "aclk_core_niu", "aclk_core", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(0), 5, GFLAGS), ++ GATE(0, "pclk_core_dbg_niu", "aclk_core", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(0), 6, GFLAGS), ++ GATE(0, "pclk_core_dbg_daplite", "pclk_core_dbg", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(0), 7, GFLAGS), ++ GATE(0, "aclk_core_perf", "pclk_core_dbg", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(0), 8, GFLAGS), ++ GATE(0, "pclk_core_grf", "pclk_core_dbg", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(0), 9, GFLAGS), + -+static int protect_clocks[] = { -+ ACLK_VO, -+ HCLK_VO, -+ ACLK_VOP, -+ HCLK_VOP, -+ DCLK_VOP0, -+ DCLK_VOP1, -+ DCLK_VOP2, -+}; ++ GATE(0, "aclk_peri_niu", "aclk_peri", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(9), 2, GFLAGS), ++ GATE(0, "aclk_peribus_niu", "aclk_peri", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(9), 3, GFLAGS), ++ GATE(ACLK_MAC, "aclk_mac", "aclk_peri", 0, RK3308_CLKGATE_CON(9), 4, GFLAGS), + -+static void __init rk3568_pmu_clk_init(struct device_node *np) -+{ -+ struct rockchip_clk_provider *ctx; -+ void __iomem *reg_base; ++ GATE(0, "hclk_peri_niu", "hclk_peri", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(9), 5, GFLAGS), ++ GATE(HCLK_NANDC, "hclk_nandc", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 6, GFLAGS), ++ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 7, GFLAGS), ++ GATE(HCLK_SDIO, "hclk_sdio", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 8, GFLAGS), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 9, GFLAGS), ++ GATE(HCLK_SFC, "hclk_sfc", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 10, GFLAGS), ++ GATE(HCLK_OTG, "hclk_otg", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 11, GFLAGS), ++ GATE(HCLK_HOST, "hclk_host", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 12, GFLAGS), ++ GATE(HCLK_HOST_ARB, "hclk_host_arb", "hclk_peri", 0, RK3308_CLKGATE_CON(9), 13, GFLAGS), + -+ reg_base = of_iomap(np, 0); -+ if (!reg_base) { -+ pr_err("%s: could not map cru pmu region\n", __func__); -+ return; -+ } ++ GATE(0, "pclk_peri_niu", "pclk_peri", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(9), 14, GFLAGS), ++ GATE(PCLK_MAC, "pclk_mac", "pclk_peri", 0, RK3308_CLKGATE_CON(9), 15, GFLAGS), + -+ rk3568_pmucru_base = reg_base; ++ GATE(0, "hclk_audio_niu", "hclk_audio", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(14), 0, GFLAGS), ++ GATE(HCLK_PDM, "hclk_pdm", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 1, GFLAGS), ++ GATE(HCLK_SPDIFTX, "hclk_spdiftx", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 2, GFLAGS), ++ GATE(HCLK_SPDIFRX, "hclk_spdifrx", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 3, GFLAGS), ++ GATE(HCLK_I2S0_8CH, "hclk_i2s0_8ch", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 4, GFLAGS), ++ GATE(HCLK_I2S1_8CH, "hclk_i2s1_8ch", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 5, GFLAGS), ++ GATE(HCLK_I2S2_8CH, "hclk_i2s2_8ch", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 6, GFLAGS), ++ GATE(HCLK_I2S3_8CH, "hclk_i2s3_8ch", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 7, GFLAGS), ++ GATE(HCLK_I2S0_2CH, "hclk_i2s0_2ch", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 8, GFLAGS), ++ GATE(HCLK_I2S1_2CH, "hclk_i2s1_2ch", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 9, GFLAGS), ++ GATE(HCLK_VAD, "hclk_vad", "hclk_audio", 0, RK3308_CLKGATE_CON(14), 10, GFLAGS), + -+ ctx = rockchip_clk_init(np, reg_base, CLKPMU_NR_CLKS); -+ if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip pmu clk init failed\n", __func__); -+ return; -+ } ++ GATE(0, "pclk_audio_niu", "pclk_audio", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(14), 11, GFLAGS), ++ GATE(PCLK_ACODEC, "pclk_acodec", "pclk_audio", 0, RK3308_CLKGATE_CON(14), 12, GFLAGS), + -+ rockchip_clk_register_plls(ctx, rk3568_pmu_pll_clks, -+ ARRAY_SIZE(rk3568_pmu_pll_clks), -+ RK3568_GRF_SOC_STATUS0); ++ GATE(0, "aclk_bus_niu", "aclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(5), 0, GFLAGS), ++ GATE(0, "aclk_intmem", "aclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(5), 1, GFLAGS), ++ GATE(ACLK_CRYPTO, "aclk_crypto", "aclk_bus", 0, RK3308_CLKGATE_CON(5), 2, GFLAGS), ++ GATE(ACLK_VOP, "aclk_vop", "aclk_bus", 0, RK3308_CLKGATE_CON(5), 3, GFLAGS), ++ GATE(0, "aclk_gic", "aclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(5), 4, GFLAGS), ++ /* aclk_dmaci0 is controlled by sgrf_clkgat_con. */ ++ SGRF_GATE(ACLK_DMAC0, "aclk_dmac0", "aclk_bus"), ++ /* aclk_dmac1 is controlled by sgrf_clkgat_con. */ ++ SGRF_GATE(ACLK_DMAC1, "aclk_dmac1", "aclk_bus"), ++ /* watchdog pclk is controlled by sgrf_clkgat_con. */ ++ SGRF_GATE(PCLK_WDT, "pclk_wdt", "pclk_bus"), + -+ rockchip_clk_register_branches(ctx, rk3568_clk_pmu_branches, -+ ARRAY_SIZE(rk3568_clk_pmu_branches)); ++ GATE(0, "hclk_bus_niu", "hclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(5), 5, GFLAGS), ++ GATE(0, "hclk_rom", "hclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(5), 6, GFLAGS), ++ GATE(HCLK_CRYPTO, "hclk_crypto", "hclk_bus", 0, RK3308_CLKGATE_CON(5), 7, GFLAGS), ++ GATE(HCLK_VOP, "hclk_vop", "hclk_bus", 0, RK3308_CLKGATE_CON(5), 8, GFLAGS), + -+ rockchip_register_softrst(np, 1, reg_base + RK3568_PMU_SOFTRST_CON(0), -+ ROCKCHIP_SOFTRST_HIWORD_MASK); ++ GATE(0, "pclk_bus_niu", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(5), 9, GFLAGS), ++ GATE(PCLK_UART0, "pclk_uart0", "pclk_bus", 0, RK3308_CLKGATE_CON(5), 10, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_bus", 0, RK3308_CLKGATE_CON(5), 11, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_bus", 0, RK3308_CLKGATE_CON(5), 12, GFLAGS), ++ GATE(PCLK_UART3, "pclk_uart3", "pclk_bus", 0, RK3308_CLKGATE_CON(5), 13, GFLAGS), ++ GATE(PCLK_UART4, "pclk_uart4", "pclk_bus", 0, RK3308_CLKGATE_CON(5), 14, GFLAGS), ++ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_bus", 0, RK3308_CLKGATE_CON(5), 15, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 0, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 1, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 2, GFLAGS), ++ GATE(PCLK_PWM0, "pclk_pwm0", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 3, GFLAGS), ++ GATE(PCLK_SPI0, "pclk_spi0", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 4, GFLAGS), ++ GATE(PCLK_SPI1, "pclk_spi1", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 5, GFLAGS), ++ GATE(PCLK_SPI2, "pclk_spi2", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 6, GFLAGS), ++ GATE(PCLK_SARADC, "pclk_saradc", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 7, GFLAGS), ++ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 8, GFLAGS), ++ GATE(PCLK_TIMER, "pclk_timer", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 9, GFLAGS), ++ GATE(PCLK_OTP_NS, "pclk_otp_ns", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 10, GFLAGS), ++ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 12, GFLAGS), ++ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 13, GFLAGS), ++ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 14, GFLAGS), ++ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_bus", 0, RK3308_CLKGATE_CON(6), 15, GFLAGS), ++ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_bus", 0, RK3308_CLKGATE_CON(7), 0, GFLAGS), ++ GATE(PCLK_SGRF, "pclk_sgrf", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 1, GFLAGS), ++ GATE(PCLK_GRF, "pclk_grf", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 2, GFLAGS), ++ GATE(PCLK_USBSD_DET, "pclk_usbsd_det", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 3, GFLAGS), ++ GATE(PCLK_DDR_UPCTL, "pclk_ddr_upctl", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 4, GFLAGS), ++ GATE(PCLK_DDR_MON, "pclk_ddr_mon", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 5, GFLAGS), ++ GATE(PCLK_DDRPHY, "pclk_ddrphy", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 6, GFLAGS), ++ GATE(PCLK_DDR_STDBY, "pclk_ddr_stdby", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 7, GFLAGS), ++ GATE(PCLK_USB_GRF, "pclk_usb_grf", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 8, GFLAGS), ++ GATE(PCLK_CRU, "pclk_cru", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 9, GFLAGS), ++ GATE(PCLK_OTP_PHY, "pclk_otp_phy", "pclk_bus", 0, RK3308_CLKGATE_CON(7), 10, GFLAGS), ++ GATE(PCLK_CPU_BOOST, "pclk_cpu_boost", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 11, GFLAGS), ++ GATE(PCLK_PWM1, "pclk_pwm1", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 12, GFLAGS), ++ GATE(PCLK_PWM2, "pclk_pwm2", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 13, GFLAGS), ++ GATE(PCLK_CAN, "pclk_can", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 14, GFLAGS), ++ GATE(PCLK_OWIRE, "pclk_owire", "pclk_bus", CLK_IGNORE_UNUSED, RK3308_CLKGATE_CON(7), 15, GFLAGS), ++}; + -+ rockchip_clk_of_add_provider(np, ctx); -+} ++static struct rockchip_clk_branch rk3308_dclk_vop_frac[] __initdata = { ++ COMPOSITE_FRACMUX(0, "dclk_vop_frac", "dclk_vop_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(9), 0, ++ RK3308_CLKGATE_CON(1), 7, GFLAGS, ++ &rk3308_dclk_vop_fracmux), ++}; + -+CLK_OF_DECLARE(rk3568_cru_pmu, "rockchip,rk3568-pmucru", rk3568_pmu_clk_init); ++static struct rockchip_clk_branch rk3308b_dclk_vop_frac[] __initdata = { ++ COMPOSITE_FRACMUX(0, "dclk_vop_frac", "dclk_vop_src", CLK_SET_RATE_PARENT, ++ RK3308_CLKSEL_CON(9), 0, ++ RK3308_CLKGATE_CON(1), 7, GFLAGS, ++ &rk3308_dclk_vop_fracmux), ++}; + -+static void __init rk3568_clk_init(struct device_node *np) ++static void __iomem *rk3308_cru_base; ++ ++void rk3308_dump_cru(void) ++{ ++ if (rk3308_cru_base) { ++ pr_warn("CRU:\n"); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rk3308_cru_base, ++ 0x500, false); ++ } ++} ++ ++static void __init rk3308_clk_init(struct device_node *np) +{ + struct rockchip_clk_provider *ctx; + void __iomem *reg_base; @@ -57267,8 +56107,6 @@ index 000000000..3e6955066 + return; + } + -+ rk3568_cru_base = reg_base; -+ + ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); + if (IS_ERR(ctx)) { + pr_err("%s: rockchip clk init failed\n", __func__); @@ -57277,2521 +56115,1858 @@ index 000000000..3e6955066 + } + clks = ctx->clk_data.clks; + -+ rockchip_clk_register_plls(ctx, rk3568_pll_clks, -+ ARRAY_SIZE(rk3568_pll_clks), -+ RK3568_GRF_SOC_STATUS0); ++ rockchip_clk_register_plls(ctx, rk3308_pll_clks, ++ ARRAY_SIZE(rk3308_pll_clks), ++ RK3308_GRF_SOC_STATUS0); ++ rockchip_clk_register_branches(ctx, rk3308_clk_branches, ++ ARRAY_SIZE(rk3308_clk_branches)); ++ rockchip_soc_id_init(); ++ if (soc_is_rk3308b()) ++ rockchip_clk_register_branches(ctx, rk3308b_dclk_vop_frac, ++ ARRAY_SIZE(rk3308b_dclk_vop_frac)); ++ else ++ rockchip_clk_register_branches(ctx, rk3308_dclk_vop_frac, ++ ARRAY_SIZE(rk3308_dclk_vop_frac)); + + rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", -+ 2, clks[PLL_APLL], clks[PLL_GPLL], -+ &rk3568_cpuclk_data, rk3568_cpuclk_rates, -+ ARRAY_SIZE(rk3568_cpuclk_rates)); -+ -+ rockchip_clk_register_branches(ctx, rk3568_clk_branches, -+ ARRAY_SIZE(rk3568_clk_branches)); ++ 3, clks[PLL_APLL], clks[PLL_VPLL0], ++ &rk3308_cpuclk_data, rk3308_cpuclk_rates, ++ ARRAY_SIZE(rk3308_cpuclk_rates)); + -+ rockchip_register_softrst(np, 30, reg_base + RK3568_SOFTRST_CON(0), ++ rockchip_register_softrst(np, 10, reg_base + RK3308_SOFTRST_CON(0), + ROCKCHIP_SOFTRST_HIWORD_MASK); + -+ rockchip_register_restart_notifier(ctx, RK3568_GLB_SRST_FST, NULL); ++ rockchip_register_restart_notifier(ctx, RK3308_GLB_SRST_FST, NULL); + + rockchip_clk_of_add_provider(np, ctx); + -+ if (!rk_dump_cru) -+ rk_dump_cru = rk3568_dump_cru; -+ -+ rockchip_clk_protect(ctx, protect_clocks, ARRAY_SIZE(protect_clocks)); ++ if (!rk_dump_cru) { ++ rk3308_cru_base = reg_base; ++ rk_dump_cru = rk3308_dump_cru; ++ } +} + -+CLK_OF_DECLARE(rk3568_cru, "rockchip,rk3568-cru", rk3568_clk_init); ++CLK_OF_DECLARE(rk3308_cru, "rockchip,rk3308-cru", rk3308_clk_init); + -+#ifdef MODULE -+struct clk_rk3568_inits { -+ void (*inits)(struct device_node *np); -+}; ++static int __init clk_rk3308_probe(struct platform_device *pdev) ++{ ++ struct device_node *np = pdev->dev.of_node; + -+static const struct clk_rk3568_inits clk_rk3568_pmucru_init = { -+ .inits = rk3568_pmu_clk_init, -+}; ++ rk3308_clk_init(np); + -+static const struct clk_rk3568_inits clk_3568_cru_init = { -+ .inits = rk3568_clk_init, -+}; ++ return 0; ++} + -+static const struct of_device_id clk_rk3568_match_table[] = { ++static const struct of_device_id clk_rk3308_match_table[] = { + { -+ .compatible = "rockchip,rk3568-cru", -+ .data = &clk_3568_cru_init, -+ }, { -+ .compatible = "rockchip,rk3568-pmucru", -+ .data = &clk_rk3568_pmucru_init, ++ .compatible = "rockchip,rk3308-cru", + }, + { } +}; -+MODULE_DEVICE_TABLE(of, clk_rk3568_match_table); -+ -+static int clk_rk3568_probe(struct platform_device *pdev) -+{ -+ struct device_node *np = pdev->dev.of_node; -+ const struct of_device_id *match; -+ const struct clk_rk3568_inits *init_data; -+ -+ match = of_match_device(clk_rk3568_match_table, &pdev->dev); -+ if (!match || !match->data) -+ return -EINVAL; -+ -+ init_data = match->data; -+ if (init_data->inits) -+ init_data->inits(np); -+ -+ return 0; -+} ++MODULE_DEVICE_TABLE(of, clk_rk3308_match_table); + -+static struct platform_driver clk_rk3568_driver = { -+ .probe = clk_rk3568_probe, ++static struct platform_driver clk_rk3308_driver = { + .driver = { -+ .name = "clk-rk3568", -+ .of_match_table = clk_rk3568_match_table, -+ .suppress_bind_attrs = true, ++ .name = "clk-rk3308", ++ .of_match_table = clk_rk3308_match_table, + }, +}; -+module_platform_driver(clk_rk3568_driver); ++builtin_platform_driver_probe(clk_rk3308_driver, clk_rk3308_probe); + -+MODULE_DESCRIPTION("Rockchip RK3568 Clock Driver"); ++MODULE_DESCRIPTION("Rockchip RK3308 Clock Driver"); +MODULE_LICENSE("GPL"); -+MODULE_ALIAS("platform:clk-rk3568"); -+#endif /* MODULE */ -diff --git a/drivers/clk/rockchip-oh/clk-rk3588.c b/drivers/clk/rockchip-oh/clk-rk3588.c +diff --git a/drivers/clk/rockchip-oh/clk-rk3328.c b/drivers/clk/rockchip-oh/clk-rk3328.c new file mode 100644 -index 000000000..c297e4e1e +index 000000000..8ec63aaae --- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-rk3588.c -@@ -0,0 +1,2528 @@ -+// SPDX-License-Identifier: GPL-2.0 ++++ b/drivers/clk/rockchip-oh/clk-rk3328.c +@@ -0,0 +1,900 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later +/* -+ * Copyright (c) 2021 Rockchip Electronics Co. Ltd. -+ * Author: Elaine Zhang ++ * Copyright (c) 2016 Rockchip Electronics Co. Ltd. ++ * Author: Elaine + */ + +#include ++#include +#include +#include -+#include +#include ++#include +#include -+#include ++#include +#include "clk.h" + -+#define RK3588_GRF_SOC_STATUS0 0x600 -+#define RK3588_PHYREF_ALT_GATE 0xc38 -+#define RK3588_FRAC_MAX_PRATE 1500000000 -+#define RK3588_DCLK_MAX_PRATE 594000000 ++#define RK3328_GRF_SOC_CON4 0x410 ++#define RK3328_GRF_SOC_STATUS0 0x480 ++#define RK3328_GRF_MAC_CON1 0x904 ++#define RK3328_GRF_MAC_CON2 0x908 + -+enum rk3588_plls { -+ b0pll, b1pll, lpll, v0pll, aupll, cpll, gpll, npll, ppll, ++enum rk3328_plls { ++ apll, dpll, cpll, gpll, npll, +}; + -+static struct rockchip_pll_rate_table rk3588_pll_rates[] = { -+ /* _mhz, _p, _m, _s, _k */ -+ RK3588_PLL_RATE(2520000000, 2, 210, 0, 0), -+ RK3588_PLL_RATE(2496000000, 2, 208, 0, 0), -+ RK3588_PLL_RATE(2472000000, 2, 206, 0, 0), -+ RK3588_PLL_RATE(2448000000, 2, 204, 0, 0), -+ RK3588_PLL_RATE(2424000000, 2, 202, 0, 0), -+ RK3588_PLL_RATE(2400000000, 2, 200, 0, 0), -+ RK3588_PLL_RATE(2376000000, 2, 198, 0, 0), -+ RK3588_PLL_RATE(2352000000, 2, 196, 0, 0), -+ RK3588_PLL_RATE(2328000000, 2, 194, 0, 0), -+ RK3588_PLL_RATE(2304000000, 2, 192, 0, 0), -+ RK3588_PLL_RATE(2280000000, 2, 190, 0, 0), -+ RK3588_PLL_RATE(2256000000, 2, 376, 1, 0), -+ RK3588_PLL_RATE(2232000000, 2, 372, 1, 0), -+ RK3588_PLL_RATE(2208000000, 2, 368, 1, 0), -+ RK3588_PLL_RATE(2184000000, 2, 364, 1, 0), -+ RK3588_PLL_RATE(2160000000, 2, 360, 1, 0), -+ RK3588_PLL_RATE(2136000000, 2, 356, 1, 0), -+ RK3588_PLL_RATE(2112000000, 2, 352, 1, 0), -+ RK3588_PLL_RATE(2088000000, 2, 348, 1, 0), -+ RK3588_PLL_RATE(2064000000, 2, 344, 1, 0), -+ RK3588_PLL_RATE(2040000000, 2, 340, 1, 0), -+ RK3588_PLL_RATE(2016000000, 2, 336, 1, 0), -+ RK3588_PLL_RATE(1992000000, 2, 332, 1, 0), -+ RK3588_PLL_RATE(1968000000, 2, 328, 1, 0), -+ RK3588_PLL_RATE(1944000000, 2, 324, 1, 0), -+ RK3588_PLL_RATE(1920000000, 2, 320, 1, 0), -+ RK3588_PLL_RATE(1896000000, 2, 316, 1, 0), -+ RK3588_PLL_RATE(1872000000, 2, 312, 1, 0), -+ RK3588_PLL_RATE(1848000000, 2, 308, 1, 0), -+ RK3588_PLL_RATE(1824000000, 2, 304, 1, 0), -+ RK3588_PLL_RATE(1800000000, 2, 300, 1, 0), -+ RK3588_PLL_RATE(1776000000, 2, 296, 1, 0), -+ RK3588_PLL_RATE(1752000000, 2, 292, 1, 0), -+ RK3588_PLL_RATE(1728000000, 2, 288, 1, 0), -+ RK3588_PLL_RATE(1704000000, 2, 284, 1, 0), -+ RK3588_PLL_RATE(1680000000, 2, 280, 1, 0), -+ RK3588_PLL_RATE(1656000000, 2, 276, 1, 0), -+ RK3588_PLL_RATE(1632000000, 2, 272, 1, 0), -+ RK3588_PLL_RATE(1608000000, 2, 268, 1, 0), -+ RK3588_PLL_RATE(1584000000, 2, 264, 1, 0), -+ RK3588_PLL_RATE(1560000000, 2, 260, 1, 0), -+ RK3588_PLL_RATE(1536000000, 2, 256, 1, 0), -+ RK3588_PLL_RATE(1512000000, 2, 252, 1, 0), -+ RK3588_PLL_RATE(1488000000, 2, 248, 1, 0), -+ RK3588_PLL_RATE(1464000000, 2, 244, 1, 0), -+ RK3588_PLL_RATE(1440000000, 2, 240, 1, 0), -+ RK3588_PLL_RATE(1416000000, 2, 236, 1, 0), -+ RK3588_PLL_RATE(1392000000, 2, 232, 1, 0), -+ RK3588_PLL_RATE(1320000000, 2, 220, 1, 0), -+ RK3588_PLL_RATE(1200000000, 2, 200, 1, 0), -+ RK3588_PLL_RATE(1188000000, 2, 198, 1, 0), -+ RK3588_PLL_RATE(1100000000, 3, 550, 2, 0), -+ RK3588_PLL_RATE(1008000000, 2, 336, 2, 0), -+ RK3588_PLL_RATE(1000000000, 3, 500, 2, 0), -+ RK3588_PLL_RATE(983040000, 4, 655, 2, 23592), -+ RK3588_PLL_RATE(955520000, 3, 478, 2, 49807), -+ RK3588_PLL_RATE(903168000, 6, 903, 2, 11009), -+ RK3588_PLL_RATE(900000000, 2, 300, 2, 0), -+ RK3588_PLL_RATE(816000000, 2, 272, 2, 0), -+ RK3588_PLL_RATE(786432000, 2, 262, 2, 9437), -+ RK3588_PLL_RATE(786000000, 1, 131, 2, 0), -+ RK3588_PLL_RATE(785560000, 3, 393, 2, 51119), -+ RK3588_PLL_RATE(722534400, 8, 963, 2, 24850), -+ RK3588_PLL_RATE(600000000, 2, 200, 2, 0), -+ RK3588_PLL_RATE(594000000, 1, 99, 2, 0), -+ RK3588_PLL_RATE(408000000, 2, 272, 3, 0), -+ RK3588_PLL_RATE(312000000, 2, 208, 3, 0), -+ RK3588_PLL_RATE(216000000, 2, 288, 4, 0), -+ RK3588_PLL_RATE(96000000, 2, 256, 5, 0), ++static struct rockchip_pll_rate_table rk3328_pll_rates[] = { ++ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ ++ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1488000000, 1, 62, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1464000000, 1, 61, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1440000000, 1, 60, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1392000000, 1, 58, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1368000000, 1, 57, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1344000000, 1, 56, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1320000000, 1, 55, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1272000000, 1, 53, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1248000000, 1, 52, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1188000000, 2, 99, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1100000000, 12, 550, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1000000000, 6, 500, 2, 1, 1, 0), ++ RK3036_PLL_RATE(984000000, 1, 82, 2, 1, 1, 0), ++ RK3036_PLL_RATE(960000000, 1, 80, 2, 1, 1, 0), ++ RK3036_PLL_RATE(936000000, 1, 78, 2, 1, 1, 0), ++ RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), ++ RK3036_PLL_RATE(900000000, 4, 300, 2, 1, 1, 0), ++ RK3036_PLL_RATE(888000000, 1, 74, 2, 1, 1, 0), ++ RK3036_PLL_RATE(864000000, 1, 72, 2, 1, 1, 0), ++ RK3036_PLL_RATE(840000000, 1, 70, 2, 1, 1, 0), ++ RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), ++ RK3036_PLL_RATE(800000000, 6, 400, 2, 1, 1, 0), ++ RK3036_PLL_RATE(700000000, 6, 350, 2, 1, 1, 0), ++ RK3036_PLL_RATE(696000000, 1, 58, 2, 1, 1, 0), ++ RK3036_PLL_RATE(600000000, 1, 75, 3, 1, 1, 0), ++ RK3036_PLL_RATE(594000000, 2, 99, 2, 1, 1, 0), ++ RK3036_PLL_RATE(504000000, 1, 63, 3, 1, 1, 0), ++ RK3036_PLL_RATE(500000000, 6, 250, 2, 1, 1, 0), ++ RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), ++ RK3036_PLL_RATE(312000000, 1, 52, 2, 2, 1, 0), ++ RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), ++ RK3036_PLL_RATE(96000000, 1, 64, 4, 4, 1, 0), + { /* sentinel */ }, +}; + -+#define RK3588_CLK_CORE_B0_SEL_CLEAN_MASK 0x3 -+#define RK3588_CLK_CORE_B0_SEL_CLEAN_SHIFT 13 -+#define RK3588_CLK_CORE_B1_SEL_CLEAN_MASK 0x3 -+#define RK3588_CLK_CORE_B1_SEL_CLEAN_SHIFT 5 -+#define RK3588_CLK_CORE_B0_GPLL_DIV_MASK 0x1f -+#define RK3588_CLK_CORE_B0_GPLL_DIV_SHIFT 1 -+#define RK3588_CLK_CORE_L_SEL_CLEAN_MASK 0x3 -+#define RK3588_CLK_CORE_L1_SEL_CLEAN_SHIFT 12 -+#define RK3588_CLK_CORE_L0_SEL_CLEAN_SHIFT 5 -+#define RK3588_CLK_DSU_SEL_DF_MASK 0x1 -+#define RK3588_CLK_DSU_SEL_DF_SHIFT 15 -+#define RK3588_CLK_DSU_DF_SRC_MASK 0x3 -+#define RK3588_CLK_DSU_DF_SRC_SHIFT 12 -+#define RK3588_CLK_DSU_DF_DIV_MASK 0x1f -+#define RK3588_CLK_DSU_DF_DIV_SHIFT 7 -+#define RK3588_ACLKM_DSU_DIV_MASK 0x1f -+#define RK3588_ACLKM_DSU_DIV_SHIFT 1 -+#define RK3588_ACLKS_DSU_DIV_MASK 0x1f -+#define RK3588_ACLKS_DSU_DIV_SHIFT 6 -+#define RK3588_ACLKMP_DSU_DIV_MASK 0x1f -+#define RK3588_ACLKMP_DSU_DIV_SHIFT 11 -+#define RK3588_PERIPH_DSU_DIV_MASK 0x1f -+#define RK3588_PERIPH_DSU_DIV_SHIFT 0 -+#define RK3588_ATCLK_DSU_DIV_MASK 0x1f -+#define RK3588_ATCLK_DSU_DIV_SHIFT 0 -+#define RK3588_GICCLK_DSU_DIV_MASK 0x1f -+#define RK3588_GICCLK_DSU_DIV_SHIFT 5 -+ -+#define RK3588_CORE_B0_SEL(_apllcore) \ -+{ \ -+ .reg = RK3588_BIGCORE0_CLKSEL_CON(0), \ -+ .val = HIWORD_UPDATE(_apllcore, RK3588_CLK_CORE_B0_SEL_CLEAN_MASK, \ -+ RK3588_CLK_CORE_B0_SEL_CLEAN_SHIFT) | \ -+ HIWORD_UPDATE(0, RK3588_CLK_CORE_B0_GPLL_DIV_MASK, \ -+ RK3588_CLK_CORE_B0_GPLL_DIV_SHIFT), \ -+} -+ -+#define RK3588_CORE_B1_SEL(_apllcore) \ -+{ \ -+ .reg = RK3588_BIGCORE0_CLKSEL_CON(1), \ -+ .val = HIWORD_UPDATE(_apllcore, RK3588_CLK_CORE_B1_SEL_CLEAN_MASK, \ -+ RK3588_CLK_CORE_B1_SEL_CLEAN_SHIFT), \ -+} -+ -+#define RK3588_CORE_B2_SEL(_apllcore) \ -+{ \ -+ .reg = RK3588_BIGCORE1_CLKSEL_CON(0), \ -+ .val = HIWORD_UPDATE(_apllcore, RK3588_CLK_CORE_B0_SEL_CLEAN_MASK, \ -+ RK3588_CLK_CORE_B0_SEL_CLEAN_SHIFT) | \ -+ HIWORD_UPDATE(0, RK3588_CLK_CORE_B0_GPLL_DIV_MASK, \ -+ RK3588_CLK_CORE_B0_GPLL_DIV_SHIFT), \ -+} -+ -+#define RK3588_CORE_B3_SEL(_apllcore) \ -+{ \ -+ .reg = RK3588_BIGCORE1_CLKSEL_CON(1), \ -+ .val = HIWORD_UPDATE(_apllcore, RK3588_CLK_CORE_B1_SEL_CLEAN_MASK, \ -+ RK3588_CLK_CORE_B1_SEL_CLEAN_SHIFT), \ -+} -+ -+#define RK3588_CORE_L_SEL0(_offs, _apllcore) \ -+{ \ -+ .reg = RK3588_DSU_CLKSEL_CON(6 + _offs), \ -+ .val = HIWORD_UPDATE(_apllcore, RK3588_CLK_CORE_L_SEL_CLEAN_MASK, \ -+ RK3588_CLK_CORE_L0_SEL_CLEAN_SHIFT) | \ -+ HIWORD_UPDATE(_apllcore, RK3588_CLK_CORE_L_SEL_CLEAN_MASK, \ -+ RK3588_CLK_CORE_L1_SEL_CLEAN_SHIFT), \ -+} -+ -+#define RK3588_CORE_L_SEL1(_seldsu, _divdsu) \ -+{ \ -+ .reg = RK3588_DSU_CLKSEL_CON(0), \ -+ .val = HIWORD_UPDATE(_seldsu, RK3588_CLK_DSU_DF_SRC_MASK, \ -+ RK3588_CLK_DSU_DF_SRC_SHIFT) | \ -+ HIWORD_UPDATE(_divdsu - 1, RK3588_CLK_DSU_DF_DIV_MASK, \ -+ RK3588_CLK_DSU_DF_DIV_SHIFT), \ -+} ++static struct rockchip_pll_rate_table rk3328_pll_frac_rates[] = { ++ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ ++ RK3036_PLL_RATE(1016064000, 3, 127, 1, 1, 0, 134218), ++ /* vco = 1016064000 */ ++ RK3036_PLL_RATE(983040000, 24, 983, 1, 1, 0, 671089), ++ /* vco = 983040000 */ ++ RK3036_PLL_RATE(491520000, 24, 983, 2, 1, 0, 671089), ++ /* vco = 983040000 */ ++ RK3036_PLL_RATE(61440000, 6, 215, 7, 2, 0, 671089), ++ /* vco = 860156000 */ ++ RK3036_PLL_RATE(56448000, 12, 451, 4, 4, 0, 9797895), ++ /* vco = 903168000 */ ++ RK3036_PLL_RATE(40960000, 12, 409, 4, 5, 0, 10066330), ++ /* vco = 819200000 */ ++ { /* sentinel */ }, ++}; + -+#define RK3588_CORE_L_SEL2(_aclkm, _aclkmp, _aclks) \ -+{ \ -+ .reg = RK3588_DSU_CLKSEL_CON(1), \ -+ .val = HIWORD_UPDATE(_aclkm - 1, RK3588_ACLKM_DSU_DIV_MASK, \ -+ RK3588_ACLKM_DSU_DIV_SHIFT) | \ -+ HIWORD_UPDATE(_aclkmp - 1, RK3588_ACLKMP_DSU_DIV_MASK, \ -+ RK3588_ACLKMP_DSU_DIV_SHIFT) | \ -+ HIWORD_UPDATE(_aclks - 1, RK3588_ACLKS_DSU_DIV_MASK, \ -+ RK3588_ACLKS_DSU_DIV_SHIFT), \ -+} ++#define RK3328_DIV_ACLKM_MASK 0x7 ++#define RK3328_DIV_ACLKM_SHIFT 4 ++#define RK3328_DIV_PCLK_DBG_MASK 0xf ++#define RK3328_DIV_PCLK_DBG_SHIFT 0 + -+#define RK3588_CORE_L_SEL3(_periph) \ ++#define RK3328_CLKSEL1(_aclk_core, _pclk_dbg) \ +{ \ -+ .reg = RK3588_DSU_CLKSEL_CON(2), \ -+ .val = HIWORD_UPDATE(_periph - 1, RK3588_PERIPH_DSU_DIV_MASK, \ -+ RK3588_PERIPH_DSU_DIV_SHIFT), \ ++ .reg = RK3328_CLKSEL_CON(1), \ ++ .val = HIWORD_UPDATE(_aclk_core, RK3328_DIV_ACLKM_MASK, \ ++ RK3328_DIV_ACLKM_SHIFT) | \ ++ HIWORD_UPDATE(_pclk_dbg, RK3328_DIV_PCLK_DBG_MASK, \ ++ RK3328_DIV_PCLK_DBG_SHIFT), \ +} + -+#define RK3588_CORE_L_SEL4(_gicclk, _atclk) \ ++#define RK3328_CPUCLK_RATE(_prate, _aclk_core, _pclk_dbg) \ +{ \ -+ .reg = RK3588_DSU_CLKSEL_CON(3), \ -+ .val = HIWORD_UPDATE(_gicclk - 1, RK3588_GICCLK_DSU_DIV_MASK, \ -+ RK3588_GICCLK_DSU_DIV_SHIFT) | \ -+ HIWORD_UPDATE(_atclk - 1, RK3588_ATCLK_DSU_DIV_MASK, \ -+ RK3588_ATCLK_DSU_DIV_SHIFT), \ -+} -+ -+#define RK3588_CPUB01CLK_RATE(_prate, _apllcore) \ -+{ \ -+ .prate = _prate##U, \ -+ .pre_muxs = { \ -+ RK3588_CORE_B0_SEL(0), \ -+ RK3588_CORE_B1_SEL(0), \ -+ }, \ -+ .post_muxs = { \ -+ RK3588_CORE_B0_SEL(_apllcore), \ -+ RK3588_CORE_B1_SEL(_apllcore), \ -+ }, \ -+} -+ -+#define RK3588_CPUB23CLK_RATE(_prate, _apllcore) \ -+{ \ -+ .prate = _prate##U, \ -+ .pre_muxs = { \ -+ RK3588_CORE_B2_SEL(0), \ -+ RK3588_CORE_B3_SEL(0), \ -+ }, \ -+ .post_muxs = { \ -+ RK3588_CORE_B2_SEL(_apllcore), \ -+ RK3588_CORE_B3_SEL(_apllcore), \ -+ }, \ -+} -+ -+#define RK3588_CPULCLK_RATE(_prate, _apllcore, _seldsu, _divdsu) \ -+{ \ -+ .prate = _prate##U, \ -+ .pre_muxs = { \ -+ RK3588_CORE_L_SEL0(0, 0), \ -+ RK3588_CORE_L_SEL0(1, 0), \ -+ RK3588_CORE_L_SEL1(3, 2), \ -+ RK3588_CORE_L_SEL2(2, 3, 3), \ -+ RK3588_CORE_L_SEL3(4), \ -+ RK3588_CORE_L_SEL4(4, 4), \ -+ }, \ -+ .post_muxs = { \ -+ RK3588_CORE_L_SEL0(0, _apllcore), \ -+ RK3588_CORE_L_SEL0(1, _apllcore), \ -+ RK3588_CORE_L_SEL1(_seldsu, _divdsu), \ -+ }, \ ++ .prate = _prate, \ ++ .divs = { \ ++ RK3328_CLKSEL1(_aclk_core, _pclk_dbg), \ ++ }, \ +} + -+static struct rockchip_cpuclk_rate_table rk3588_cpub0clk_rates[] __initdata = { -+ RK3588_CPUB01CLK_RATE(2496000000, 1), -+ RK3588_CPUB01CLK_RATE(2400000000, 1), -+ RK3588_CPUB01CLK_RATE(2304000000, 1), -+ RK3588_CPUB01CLK_RATE(2208000000, 1), -+ RK3588_CPUB01CLK_RATE(2184000000, 1), -+ RK3588_CPUB01CLK_RATE(2088000000, 1), -+ RK3588_CPUB01CLK_RATE(2040000000, 1), -+ RK3588_CPUB01CLK_RATE(2016000000, 1), -+ RK3588_CPUB01CLK_RATE(1992000000, 1), -+ RK3588_CPUB01CLK_RATE(1896000000, 1), -+ RK3588_CPUB01CLK_RATE(1800000000, 1), -+ RK3588_CPUB01CLK_RATE(1704000000, 0), -+ RK3588_CPUB01CLK_RATE(1608000000, 0), -+ RK3588_CPUB01CLK_RATE(1584000000, 0), -+ RK3588_CPUB01CLK_RATE(1560000000, 0), -+ RK3588_CPUB01CLK_RATE(1536000000, 0), -+ RK3588_CPUB01CLK_RATE(1512000000, 0), -+ RK3588_CPUB01CLK_RATE(1488000000, 0), -+ RK3588_CPUB01CLK_RATE(1464000000, 0), -+ RK3588_CPUB01CLK_RATE(1440000000, 0), -+ RK3588_CPUB01CLK_RATE(1416000000, 0), -+ RK3588_CPUB01CLK_RATE(1392000000, 0), -+ RK3588_CPUB01CLK_RATE(1368000000, 0), -+ RK3588_CPUB01CLK_RATE(1344000000, 0), -+ RK3588_CPUB01CLK_RATE(1320000000, 0), -+ RK3588_CPUB01CLK_RATE(1296000000, 0), -+ RK3588_CPUB01CLK_RATE(1272000000, 0), -+ RK3588_CPUB01CLK_RATE(1248000000, 0), -+ RK3588_CPUB01CLK_RATE(1224000000, 0), -+ RK3588_CPUB01CLK_RATE(1200000000, 0), -+ RK3588_CPUB01CLK_RATE(1104000000, 0), -+ RK3588_CPUB01CLK_RATE(1008000000, 0), -+ RK3588_CPUB01CLK_RATE(912000000, 0), -+ RK3588_CPUB01CLK_RATE(816000000, 0), -+ RK3588_CPUB01CLK_RATE(696000000, 0), -+ RK3588_CPUB01CLK_RATE(600000000, 0), -+ RK3588_CPUB01CLK_RATE(408000000, 0), -+ RK3588_CPUB01CLK_RATE(312000000, 0), -+ RK3588_CPUB01CLK_RATE(216000000, 0), -+ RK3588_CPUB01CLK_RATE(96000000, 0), ++static struct rockchip_cpuclk_rate_table rk3328_cpuclk_rates[] __initdata = { ++ RK3328_CPUCLK_RATE(1800000000, 1, 7), ++ RK3328_CPUCLK_RATE(1704000000, 1, 7), ++ RK3328_CPUCLK_RATE(1608000000, 1, 7), ++ RK3328_CPUCLK_RATE(1512000000, 1, 7), ++ RK3328_CPUCLK_RATE(1488000000, 1, 5), ++ RK3328_CPUCLK_RATE(1416000000, 1, 5), ++ RK3328_CPUCLK_RATE(1392000000, 1, 5), ++ RK3328_CPUCLK_RATE(1296000000, 1, 5), ++ RK3328_CPUCLK_RATE(1200000000, 1, 5), ++ RK3328_CPUCLK_RATE(1104000000, 1, 5), ++ RK3328_CPUCLK_RATE(1008000000, 1, 5), ++ RK3328_CPUCLK_RATE(912000000, 1, 5), ++ RK3328_CPUCLK_RATE(816000000, 1, 3), ++ RK3328_CPUCLK_RATE(696000000, 1, 3), ++ RK3328_CPUCLK_RATE(600000000, 1, 3), ++ RK3328_CPUCLK_RATE(408000000, 1, 1), ++ RK3328_CPUCLK_RATE(312000000, 1, 1), ++ RK3328_CPUCLK_RATE(216000000, 1, 1), ++ RK3328_CPUCLK_RATE(96000000, 1, 1), +}; + -+static const struct rockchip_cpuclk_reg_data rk3588_cpub0clk_data = { -+ .core_reg[0] = RK3588_BIGCORE0_CLKSEL_CON(0), -+ .div_core_shift[0] = 8, ++static const struct rockchip_cpuclk_reg_data rk3328_cpuclk_data = { ++ .core_reg[0] = RK3328_CLKSEL_CON(0), ++ .div_core_shift[0] = 0, + .div_core_mask[0] = 0x1f, -+ .core_reg[1] = RK3588_BIGCORE0_CLKSEL_CON(1), -+ .div_core_shift[1] = 0, -+ .div_core_mask[1] = 0x1f, -+ .num_cores = 2, ++ .num_cores = 1, + .mux_core_alt = 1, -+ .mux_core_main = 2, ++ .mux_core_main = 3, + .mux_core_shift = 6, + .mux_core_mask = 0x3, +}; + -+static struct rockchip_cpuclk_rate_table rk3588_cpub1clk_rates[] __initdata = { -+ RK3588_CPUB23CLK_RATE(2496000000, 1), -+ RK3588_CPUB23CLK_RATE(2400000000, 1), -+ RK3588_CPUB23CLK_RATE(2304000000, 1), -+ RK3588_CPUB23CLK_RATE(2208000000, 1), -+ RK3588_CPUB23CLK_RATE(2184000000, 1), -+ RK3588_CPUB23CLK_RATE(2088000000, 1), -+ RK3588_CPUB23CLK_RATE(2040000000, 1), -+ RK3588_CPUB23CLK_RATE(2016000000, 1), -+ RK3588_CPUB23CLK_RATE(1992000000, 1), -+ RK3588_CPUB23CLK_RATE(1896000000, 1), -+ RK3588_CPUB23CLK_RATE(1800000000, 1), -+ RK3588_CPUB23CLK_RATE(1704000000, 0), -+ RK3588_CPUB23CLK_RATE(1608000000, 0), -+ RK3588_CPUB23CLK_RATE(1584000000, 0), -+ RK3588_CPUB23CLK_RATE(1560000000, 0), -+ RK3588_CPUB23CLK_RATE(1536000000, 0), -+ RK3588_CPUB23CLK_RATE(1512000000, 0), -+ RK3588_CPUB23CLK_RATE(1488000000, 0), -+ RK3588_CPUB23CLK_RATE(1464000000, 0), -+ RK3588_CPUB23CLK_RATE(1440000000, 0), -+ RK3588_CPUB23CLK_RATE(1416000000, 0), -+ RK3588_CPUB23CLK_RATE(1392000000, 0), -+ RK3588_CPUB23CLK_RATE(1368000000, 0), -+ RK3588_CPUB23CLK_RATE(1344000000, 0), -+ RK3588_CPUB23CLK_RATE(1320000000, 0), -+ RK3588_CPUB23CLK_RATE(1296000000, 0), -+ RK3588_CPUB23CLK_RATE(1272000000, 0), -+ RK3588_CPUB23CLK_RATE(1248000000, 0), -+ RK3588_CPUB23CLK_RATE(1224000000, 0), -+ RK3588_CPUB23CLK_RATE(1200000000, 0), -+ RK3588_CPUB23CLK_RATE(1104000000, 0), -+ RK3588_CPUB23CLK_RATE(1008000000, 0), -+ RK3588_CPUB23CLK_RATE(912000000, 0), -+ RK3588_CPUB23CLK_RATE(816000000, 0), -+ RK3588_CPUB23CLK_RATE(696000000, 0), -+ RK3588_CPUB23CLK_RATE(600000000, 0), -+ RK3588_CPUB23CLK_RATE(408000000, 0), -+ RK3588_CPUB23CLK_RATE(312000000, 0), -+ RK3588_CPUB23CLK_RATE(216000000, 0), -+ RK3588_CPUB23CLK_RATE(96000000, 0), -+}; ++PNAME(mux_pll_p) = { "xin24m" }; + -+static const struct rockchip_cpuclk_reg_data rk3588_cpub1clk_data = { -+ .core_reg[0] = RK3588_BIGCORE1_CLKSEL_CON(0), -+ .div_core_shift[0] = 8, -+ .div_core_mask[0] = 0x1f, -+ .core_reg[1] = RK3588_BIGCORE1_CLKSEL_CON(1), -+ .div_core_shift[1] = 0, -+ .div_core_mask[1] = 0x1f, -+ .num_cores = 2, -+ .mux_core_alt = 1, -+ .mux_core_main = 2, -+ .mux_core_shift = 6, -+ .mux_core_mask = 0x3, -+}; ++PNAME(mux_2plls_p) = { "cpll", "gpll" }; ++PNAME(mux_gpll_cpll_p) = { "gpll", "cpll" }; ++PNAME(mux_cpll_gpll_apll_p) = { "cpll", "gpll", "apll" }; ++PNAME(mux_2plls_xin24m_p) = { "cpll", "gpll", "xin24m" }; ++PNAME(mux_2plls_hdmiphy_p) = { "cpll", "gpll", ++ "dummy_hdmiphy" }; ++PNAME(mux_4plls_p) = { "cpll", "gpll", ++ "dummy_hdmiphy", ++ "usb480m" }; ++PNAME(mux_2plls_u480m_p) = { "cpll", "gpll", ++ "usb480m" }; ++PNAME(mux_2plls_24m_u480m_p) = { "cpll", "gpll", ++ "xin24m", "usb480m" }; + -+static struct rockchip_cpuclk_rate_table rk3588_cpulclk_rates[] __initdata = { -+ RK3588_CPULCLK_RATE(2208000000, 1, 3, 1), -+ RK3588_CPULCLK_RATE(2184000000, 1, 3, 1), -+ RK3588_CPULCLK_RATE(2088000000, 1, 3, 1), -+ RK3588_CPULCLK_RATE(2040000000, 1, 3, 1), -+ RK3588_CPULCLK_RATE(2016000000, 1, 3, 1), -+ RK3588_CPULCLK_RATE(1992000000, 1, 3, 1), -+ RK3588_CPULCLK_RATE(1896000000, 1, 3, 1), -+ RK3588_CPULCLK_RATE(1800000000, 1, 3, 1), -+ RK3588_CPULCLK_RATE(1704000000, 0, 3, 1), -+ RK3588_CPULCLK_RATE(1608000000, 0, 3, 1), -+ RK3588_CPULCLK_RATE(1584000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(1560000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(1536000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(1512000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(1488000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(1464000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(1440000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(1416000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(1392000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(1368000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(1344000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(1320000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(1296000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(1272000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(1248000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(1224000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(1200000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(1104000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(1008000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(912000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(816000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(696000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(600000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(408000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(312000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(216000000, 0, 2, 1), -+ RK3588_CPULCLK_RATE(96000000, 0, 2, 1), -+}; ++PNAME(mux_ddrphy_p) = { "dpll", "apll", "cpll" }; ++PNAME(mux_armclk_p) = { "apll_core", ++ "gpll_core", ++ "dpll_core", ++ "npll_core"}; ++PNAME(mux_hdmiphy_p) = { "hdmi_phy", "xin24m" }; ++PNAME(mux_usb480m_p) = { "usb480m_phy", ++ "xin24m" }; + -+static const struct rockchip_cpuclk_reg_data rk3588_cpulclk_data = { -+ .core_reg[0] = RK3588_DSU_CLKSEL_CON(6), -+ .div_core_shift[0] = 0, -+ .div_core_mask[0] = 0x1f, -+ .core_reg[1] = RK3588_DSU_CLKSEL_CON(6), -+ .div_core_shift[1] = 7, -+ .div_core_mask[1] = 0x1f, -+ .core_reg[2] = RK3588_DSU_CLKSEL_CON(7), -+ .div_core_shift[2] = 0, -+ .div_core_mask[2] = 0x1f, -+ .core_reg[3] = RK3588_DSU_CLKSEL_CON(7), -+ .div_core_shift[3] = 7, -+ .div_core_mask[3] = 0x1f, -+ .num_cores = 4, -+ .mux_core_reg = RK3588_DSU_CLKSEL_CON(5), -+ .mux_core_alt = 1, -+ .mux_core_main = 2, -+ .mux_core_shift = 14, -+ .mux_core_mask = 0x3, -+}; ++PNAME(mux_i2s0_p) = { "clk_i2s0_div", ++ "clk_i2s0_frac", ++ "xin12m", ++ "xin12m" }; ++PNAME(mux_i2s1_p) = { "clk_i2s1_div", ++ "clk_i2s1_frac", ++ "clkin_i2s1", ++ "xin12m" }; ++PNAME(mux_i2s2_p) = { "clk_i2s2_div", ++ "clk_i2s2_frac", ++ "clkin_i2s2", ++ "xin12m" }; ++PNAME(mux_i2s1out_p) = { "clk_i2s1", "xin12m"}; ++PNAME(mux_i2s2out_p) = { "clk_i2s2", "xin12m" }; ++PNAME(mux_spdif_p) = { "clk_spdif_div", ++ "clk_spdif_frac", ++ "xin12m", ++ "xin12m" }; ++PNAME(mux_uart0_p) = { "clk_uart0_div", ++ "clk_uart0_frac", ++ "xin24m" }; ++PNAME(mux_uart1_p) = { "clk_uart1_div", ++ "clk_uart1_frac", ++ "xin24m" }; ++PNAME(mux_uart2_p) = { "clk_uart2_div", ++ "clk_uart2_frac", ++ "xin24m" }; + -+PNAME(mux_pll_p) = { "xin24m", "xin32k" }; -+PNAME(mux_armclkl_p) = { "xin24m", "gpll", "lpll" }; -+PNAME(mux_armclkb01_p) = { "xin24m", "gpll", "b0pll",}; -+PNAME(mux_armclkb23_p) = { "xin24m", "gpll", "b1pll",}; -+PNAME(b0pll_b1pll_lpll_gpll_p) = { "b0pll", "b1pll", "lpll", "gpll" }; -+PNAME(gpll_24m_p) = { "gpll", "xin24m" }; -+PNAME(gpll_aupll_p) = { "gpll", "aupll" }; -+PNAME(gpll_lpll_p) = { "gpll", "lpll" }; -+PNAME(gpll_cpll_p) = { "gpll", "cpll" }; -+PNAME(gpll_spll_p) = { "gpll", "spll" }; -+PNAME(gpll_cpll_24m_p) = { "gpll", "cpll", "xin24m"}; -+PNAME(gpll_cpll_aupll_p) = { "gpll", "cpll", "aupll"}; -+PNAME(gpll_cpll_npll_p) = { "gpll", "cpll", "npll"}; -+PNAME(gpll_cpll_npll_v0pll_p) = { "gpll", "cpll", "npll", "v0pll"}; -+PNAME(gpll_cpll_24m_spll_p) = { "gpll", "cpll", "xin24m", "spll" }; -+PNAME(gpll_cpll_aupll_spll_p) = { "gpll", "cpll", "aupll", "spll" }; -+PNAME(gpll_cpll_aupll_npll_p) = { "gpll", "cpll", "aupll", "npll" }; -+PNAME(gpll_cpll_v0pll_aupll_p) = { "gpll", "cpll", "v0pll", "aupll" }; -+PNAME(gpll_cpll_v0pll_spll_p) = { "gpll", "cpll", "v0pll", "spll" }; -+PNAME(gpll_cpll_aupll_npll_spll_p) = { "gpll", "cpll", "aupll", "npll", "spll" }; -+PNAME(gpll_cpll_dmyaupll_npll_spll_p) = { "gpll", "cpll", "dummy_aupll", "npll", "spll" }; -+PNAME(gpll_cpll_npll_aupll_spll_p) = { "gpll", "cpll", "npll", "aupll", "spll" }; -+PNAME(gpll_cpll_npll_1000m_p) = { "gpll", "cpll", "npll", "clk_1000m_src" }; -+PNAME(mux_24m_spll_gpll_cpll_p) = { "xin24m", "spll", "gpll", "cpll" }; -+PNAME(mux_24m_32k_p) = { "xin24m", "xin32k" }; -+PNAME(mux_24m_100m_p) = { "xin24m", "clk_100m_src" }; -+PNAME(mux_200m_100m_p) = { "clk_200m_src", "clk_100m_src" }; -+PNAME(mux_100m_50m_24m_p) = { "clk_100m_src", "clk_50m_src", "xin24m" }; -+PNAME(mux_150m_50m_24m_p) = { "clk_150m_src", "clk_50m_src", "xin24m" }; -+PNAME(mux_150m_100m_24m_p) = { "clk_150m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_200m_150m_24m_p) = { "clk_200m_src", "clk_150m_src", "xin24m" }; -+PNAME(mux_150m_100m_50m_24m_p) = { "clk_150m_src", "clk_100m_src", "clk_50m_src", "xin24m" }; -+PNAME(mux_200m_100m_50m_24m_p) = { "clk_200m_src", "clk_100m_src", "clk_50m_src", "xin24m" }; -+PNAME(mux_300m_200m_100m_24m_p) = { "clk_300m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_700m_400m_200m_24m_p) = { "clk_700m_src", "clk_400m_src", "clk_200m_src", "xin24m" }; -+PNAME(mux_500m_250m_100m_24m_p) = { "clk_500m_src", "clk_250m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_500m_300m_100m_24m_p) = { "clk_500m_src", "clk_300m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_400m_200m_100m_24m_p) = {"clk_400m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; -+PNAME(clk_i2s2_2ch_p) = { "clk_i2s2_2ch_src", "clk_i2s2_2ch_frac", "i2s2_mclkin", "xin12m" }; -+PNAME(i2s2_2ch_mclkout_p) = { "mclk_i2s2_2ch", "xin12m" }; -+PNAME(clk_i2s3_2ch_p) = { "clk_i2s3_2ch_src", "clk_i2s3_2ch_frac", "i2s3_mclkin", "xin12m" }; -+PNAME(i2s3_2ch_mclkout_p) = { "mclk_i2s3_2ch", "xin12m" }; -+PNAME(clk_i2s0_8ch_tx_p) = { "clk_i2s0_8ch_tx_src", "clk_i2s0_8ch_tx_frac", "i2s0_mclkin", "xin12m" }; -+PNAME(clk_i2s0_8ch_rx_p) = { "clk_i2s0_8ch_rx_src", "clk_i2s0_8ch_rx_frac", "i2s0_mclkin", "xin12m" }; -+PNAME(i2s0_8ch_mclkout_p) = { "mclk_i2s0_8ch_tx", "mclk_i2s0_8ch_rx", "xin12m" }; -+PNAME(clk_i2s1_8ch_tx_p) = { "clk_i2s1_8ch_tx_src", "clk_i2s1_8ch_tx_frac", "i2s1_mclkin", "xin12m" }; -+PNAME(clk_i2s1_8ch_rx_p) = { "clk_i2s1_8ch_rx_src", "clk_i2s1_8ch_rx_frac", "i2s1_mclkin", "xin12m" }; -+PNAME(i2s1_8ch_mclkout_p) = { "mclk_i2s1_8ch_tx", "mclk_i2s1_8ch_rx", "xin12m" }; -+PNAME(clk_i2s4_8ch_tx_p) = { "clk_i2s4_8ch_tx_src", "clk_i2s4_8ch_tx_frac", "i2s4_mclkin", "xin12m" }; -+PNAME(clk_i2s5_8ch_tx_p) = { "clk_i2s5_8ch_tx_src", "clk_i2s5_8ch_tx_frac", "i2s5_mclkin", "xin12m" }; -+PNAME(clk_i2s6_8ch_tx_p) = { "clk_i2s6_8ch_tx_src", "clk_i2s6_8ch_tx_frac", "i2s6_mclkin", "xin12m" }; -+PNAME(clk_i2s6_8ch_rx_p) = { "clk_i2s6_8ch_rx_src", "clk_i2s6_8ch_rx_frac", "i2s6_mclkin", "xin12m" }; -+PNAME(i2s6_8ch_mclkout_p) = { "mclk_i2s6_8ch_tx", "mclk_i2s6_8ch_rx", "xin12m" }; -+PNAME(clk_i2s7_8ch_rx_p) = { "clk_i2s7_8ch_rx_src", "clk_i2s7_8ch_rx_frac", "i2s7_mclkin", "xin12m" }; -+PNAME(clk_i2s8_8ch_tx_p) = { "clk_i2s8_8ch_tx_src", "clk_i2s8_8ch_tx_frac", "i2s8_mclkin", "xin12m" }; -+PNAME(clk_i2s9_8ch_rx_p) = { "clk_i2s9_8ch_rx_src", "clk_i2s9_8ch_rx_frac", "i2s9_mclkin", "xin12m" }; -+PNAME(clk_i2s10_8ch_rx_p) = { "clk_i2s10_8ch_rx_src", "clk_i2s10_8ch_rx_frac", "i2s10_mclkin", "xin12m" }; -+PNAME(clk_spdif0_p) = { "clk_spdif0_src", "clk_spdif0_frac", "xin12m" }; -+PNAME(clk_spdif1_p) = { "clk_spdif1_src", "clk_spdif1_frac", "xin12m" }; -+PNAME(clk_spdif2_dp0_p) = { "clk_spdif2_dp0_src", "clk_spdif2_dp0_frac", "xin12m" }; -+PNAME(clk_spdif3_p) = { "clk_spdif3_src", "clk_spdif3_frac", "xin12m" }; -+PNAME(clk_spdif4_p) = { "clk_spdif4_src", "clk_spdif4_frac", "xin12m" }; -+PNAME(clk_spdif5_dp1_p) = { "clk_spdif5_dp1_src", "clk_spdif5_dp1_frac", "xin12m" }; -+PNAME(clk_uart0_p) = { "clk_uart0_src", "clk_uart0_frac", "xin24m" }; -+PNAME(clk_uart1_p) = { "clk_uart1_src", "clk_uart1_frac", "xin24m" }; -+PNAME(clk_uart2_p) = { "clk_uart2_src", "clk_uart2_frac", "xin24m" }; -+PNAME(clk_uart3_p) = { "clk_uart3_src", "clk_uart3_frac", "xin24m" }; -+PNAME(clk_uart4_p) = { "clk_uart4_src", "clk_uart4_frac", "xin24m" }; -+PNAME(clk_uart5_p) = { "clk_uart5_src", "clk_uart5_frac", "xin24m" }; -+PNAME(clk_uart6_p) = { "clk_uart6_src", "clk_uart6_frac", "xin24m" }; -+PNAME(clk_uart7_p) = { "clk_uart7_src", "clk_uart7_frac", "xin24m" }; -+PNAME(clk_uart8_p) = { "clk_uart8_src", "clk_uart8_frac", "xin24m" }; -+PNAME(clk_uart9_p) = { "clk_uart9_src", "clk_uart9_frac", "xin24m" }; -+PNAME(clk_gmac0_ptp_ref_p) = { "cpll", "clk_gmac0_ptpref_io" }; -+PNAME(clk_gmac1_ptp_ref_p) = { "cpll", "clk_gmac1_ptpref_io" }; -+PNAME(clk_hdmirx_aud_p) = { "clk_hdmirx_aud_src", "clk_hdmirx_aud_frac" }; -+PNAME(aclk_hdcp1_root_p) = { "gpll", "cpll", "clk_hdmitrx_refsrc" }; -+PNAME(aclk_vop_sub_src_p) = { "aclk_vop_root", "aclk_vop_div2_src" }; -+PNAME(dclk_vop0_p) = { "dclk_vop0_src", "clk_hdmiphy_pixel0", "clk_hdmiphy_pixel1" }; -+PNAME(dclk_vop1_p) = { "dclk_vop1_src", "clk_hdmiphy_pixel0", "clk_hdmiphy_pixel1" }; -+PNAME(dclk_vop2_p) = { "dclk_vop2_src", "clk_hdmiphy_pixel0", "clk_hdmiphy_pixel1" }; -+PNAME(pmu_200m_100m_p) = { "clk_pmu1_200m_src", "clk_pmu1_100m_src" }; -+PNAME(pmu_300m_24m_p) = { "clk_300m_src", "xin24m" }; -+PNAME(pmu_400m_24m_p) = { "clk_400m_src", "xin24m" }; -+PNAME(pmu_100m_50m_24m_src_p) = { "clk_pmu1_100m_src", "clk_pmu1_50m_src", "xin24m" }; -+PNAME(pmu_24m_32k_100m_src_p) = { "xin24m", "32k", "clk_pmu1_100m_src" }; -+PNAME(hclk_pmu1_root_p) = { "clk_pmu1_200m_src", "clk_pmu1_100m_src", "clk_pmu1_50m_src", "xin24m" }; -+PNAME(hclk_pmu_cm0_root_p) = { "clk_pmu1_400m_src", "clk_pmu1_200m_src", "clk_pmu1_100m_src", "xin24m" }; -+PNAME(mclk_pdm0_p) = { "clk_pmu1_300m_src", "clk_pmu1_200m_src" }; -+PNAME(mux_24m_ppll_spll_p) = { "xin24m", "ppll", "spll" }; -+PNAME(mux_24m_ppll_p) = { "xin24m", "ppll" }; -+PNAME(clk_ref_pipe_phy0_p) = { "clk_ref_pipe_phy0_osc_src", "clk_ref_pipe_phy0_pll_src" }; -+PNAME(clk_ref_pipe_phy1_p) = { "clk_ref_pipe_phy1_osc_src", "clk_ref_pipe_phy1_pll_src" }; -+PNAME(clk_ref_pipe_phy2_p) = { "clk_ref_pipe_phy2_osc_src", "clk_ref_pipe_phy2_pll_src" }; ++PNAME(mux_sclk_cif_p) = { "clk_cif_src", ++ "xin24m" }; ++PNAME(mux_dclk_lcdc_p) = { "hdmiphy", ++ "dclk_lcdc_src" }; ++PNAME(mux_aclk_peri_pre_p) = { "cpll_peri", ++ "gpll_peri", ++ "hdmiphy_peri" }; ++PNAME(mux_ref_usb3otg_src_p) = { "xin24m", ++ "clk_usb3otg_ref" }; ++PNAME(mux_xin24m_32k_p) = { "xin24m", ++ "clk_rtc32k" }; ++PNAME(mux_mac2io_src_p) = { "clk_mac2io_src", ++ "gmac_clkin" }; ++PNAME(mux_mac2phy_src_p) = { "clk_mac2phy_src", ++ "phy_50m_out" }; ++PNAME(mux_mac2io_ext_p) = { "clk_mac2io", ++ "gmac_clkin" }; ++ ++static struct rockchip_pll_clock rk3328_pll_clks[] __initdata = { ++ [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, ++ 0, RK3328_PLL_CON(0), ++ RK3328_MODE_CON, 0, 4, 0, rk3328_pll_frac_rates), ++ [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p, ++ 0, RK3328_PLL_CON(8), ++ RK3328_MODE_CON, 4, 3, 0, NULL), ++ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, ++ 0, RK3328_PLL_CON(16), ++ RK3328_MODE_CON, 8, 2, 0, rk3328_pll_rates), ++ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, ++ 0, RK3328_PLL_CON(24), ++ RK3328_MODE_CON, 12, 1, 0, rk3328_pll_frac_rates), ++ [npll] = PLL(pll_rk3328, PLL_NPLL, "npll", mux_pll_p, ++ 0, RK3328_PLL_CON(40), ++ RK3328_MODE_CON, 1, 0, 0, rk3328_pll_rates), ++}; + +#define MFLAGS CLK_MUX_HIWORD_MASK +#define DFLAGS CLK_DIVIDER_HIWORD_MASK +#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) + -+static struct rockchip_clk_branch rk3588_i2s0_8ch_tx_fracmux __initdata = -+ MUX(CLK_I2S0_8CH_TX, "clk_i2s0_8ch_tx", clk_i2s0_8ch_tx_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(26), 0, 2, MFLAGS); ++static struct rockchip_clk_branch rk3328_i2s0_fracmux __initdata = ++ MUX(0, "i2s0_pre", mux_i2s0_p, CLK_SET_RATE_PARENT, ++ RK3328_CLKSEL_CON(6), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3588_i2s0_8ch_rx_fracmux __initdata = -+ MUX(CLK_I2S0_8CH_RX, "clk_i2s0_8ch_rx", clk_i2s0_8ch_rx_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(28), 0, 2, MFLAGS); ++static struct rockchip_clk_branch rk3328_i2s1_fracmux __initdata = ++ MUX(0, "i2s1_pre", mux_i2s1_p, CLK_SET_RATE_PARENT, ++ RK3328_CLKSEL_CON(8), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3588_i2s1_8ch_tx_fracmux __initdata = -+ MUX(CLK_I2S1_8CH_TX, "clk_i2s1_8ch_tx", clk_i2s1_8ch_tx_p, CLK_SET_RATE_PARENT, -+ RK3588_PMU_CLKSEL_CON(7), 0, 2, MFLAGS); ++static struct rockchip_clk_branch rk3328_i2s2_fracmux __initdata = ++ MUX(0, "i2s2_pre", mux_i2s2_p, CLK_SET_RATE_PARENT, ++ RK3328_CLKSEL_CON(10), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3588_i2s1_8ch_rx_fracmux __initdata = -+ MUX(CLK_I2S1_8CH_RX, "clk_i2s1_8ch_rx", clk_i2s1_8ch_rx_p, CLK_SET_RATE_PARENT, -+ RK3588_PMU_CLKSEL_CON(9), 0, 2, MFLAGS); ++static struct rockchip_clk_branch rk3328_spdif_fracmux __initdata = ++ MUX(SCLK_SPDIF, "sclk_spdif", mux_spdif_p, CLK_SET_RATE_PARENT, ++ RK3328_CLKSEL_CON(12), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3588_i2s2_2ch_fracmux __initdata = -+ MUX(CLK_I2S2_2CH, "clk_i2s2_2ch", clk_i2s2_2ch_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(30), 0, 2, MFLAGS); ++static struct rockchip_clk_branch rk3328_uart0_fracmux __initdata = ++ MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT, ++ RK3328_CLKSEL_CON(14), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3588_i2s3_2ch_fracmux __initdata = -+ MUX(CLK_I2S3_2CH, "clk_i2s3_2ch", clk_i2s3_2ch_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(32), 0, 2, MFLAGS); ++static struct rockchip_clk_branch rk3328_uart1_fracmux __initdata = ++ MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT, ++ RK3328_CLKSEL_CON(16), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3588_i2s4_8ch_tx_fracmux __initdata = -+ MUX(CLK_I2S4_8CH_TX, "clk_i2s4_8ch_tx", clk_i2s4_8ch_tx_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(120), 0, 2, MFLAGS); ++static struct rockchip_clk_branch rk3328_uart2_fracmux __initdata = ++ MUX(SCLK_UART2, "sclk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT, ++ RK3328_CLKSEL_CON(18), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk3588_i2s5_8ch_tx_fracmux __initdata = -+ MUX(CLK_I2S5_8CH_TX, "clk_i2s5_8ch_tx", clk_i2s5_8ch_tx_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(142), 0, 2, MFLAGS); ++static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = { ++ /* ++ * Clock-Architecture Diagram 1 ++ */ + -+static struct rockchip_clk_branch rk3588_i2s6_8ch_tx_fracmux __initdata = -+ MUX(CLK_I2S6_8CH_TX, "clk_i2s6_8ch_tx", clk_i2s6_8ch_tx_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(146), 0, 2, MFLAGS); ++ DIV(0, "clk_24m", "xin24m", CLK_IGNORE_UNUSED, ++ RK3328_CLKSEL_CON(2), 8, 5, DFLAGS), ++ COMPOSITE(SCLK_RTC32K, "clk_rtc32k", mux_2plls_xin24m_p, 0, ++ RK3328_CLKSEL_CON(38), 14, 2, MFLAGS, 0, 14, DFLAGS, ++ RK3328_CLKGATE_CON(0), 11, GFLAGS), + -+static struct rockchip_clk_branch rk3588_i2s6_8ch_rx_fracmux __initdata = -+ MUX(CLK_I2S6_8CH_RX, "clk_i2s6_8ch_rx", clk_i2s6_8ch_rx_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(148), 0, 2, MFLAGS); ++ /* PD_MISC */ ++ MUX(HDMIPHY, "hdmiphy", mux_hdmiphy_p, CLK_SET_RATE_PARENT, ++ RK3328_MISC_CON, 13, 1, MFLAGS), ++ MUX(USB480M, "usb480m", mux_usb480m_p, CLK_SET_RATE_PARENT, ++ RK3328_MISC_CON, 15, 1, MFLAGS), + -+static struct rockchip_clk_branch rk3588_i2s7_8ch_rx_fracmux __initdata = -+ MUX(CLK_I2S7_8CH_RX, "clk_i2s7_8ch_rx", clk_i2s7_8ch_rx_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(131), 0, 2, MFLAGS); ++ /* ++ * Clock-Architecture Diagram 2 ++ */ + -+static struct rockchip_clk_branch rk3588_i2s8_8ch_tx_fracmux __initdata = -+ MUX(CLK_I2S8_8CH_TX, "clk_i2s8_8ch_tx", clk_i2s8_8ch_tx_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(122), 0, 2, MFLAGS); ++ /* PD_CORE */ ++ GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED, ++ RK3328_CLKGATE_CON(0), 0, GFLAGS), ++ GATE(0, "gpll_core", "gpll", CLK_IGNORE_UNUSED, ++ RK3328_CLKGATE_CON(0), 2, GFLAGS), ++ GATE(0, "dpll_core", "dpll", CLK_IGNORE_UNUSED, ++ RK3328_CLKGATE_CON(0), 1, GFLAGS), ++ GATE(0, "npll_core", "npll", CLK_IGNORE_UNUSED, ++ RK3328_CLKGATE_CON(0), 12, GFLAGS), ++ COMPOSITE_NOMUX(0, "pclk_dbg", "armclk", CLK_IS_CRITICAL, ++ RK3328_CLKSEL_CON(1), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3328_CLKGATE_CON(7), 0, GFLAGS), ++ COMPOSITE_NOMUX(0, "aclk_core", "armclk", CLK_IS_CRITICAL, ++ RK3328_CLKSEL_CON(1), 4, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3328_CLKGATE_CON(7), 1, GFLAGS), ++ GATE(0, "aclk_core_niu", "aclk_core", CLK_IS_CRITICAL, ++ RK3328_CLKGATE_CON(13), 0, GFLAGS), ++ GATE(0, "aclk_gic400", "aclk_core", CLK_IS_CRITICAL, ++ RK3328_CLKGATE_CON(13), 1, GFLAGS), + -+static struct rockchip_clk_branch rk3588_i2s9_8ch_rx_fracmux __initdata = -+ MUX(CLK_I2S9_8CH_RX, "clk_i2s9_8ch_rx", clk_i2s9_8ch_rx_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(155), 0, 2, MFLAGS); ++ GATE(0, "clk_jtag", "jtag_clkin", CLK_IS_CRITICAL, ++ RK3328_CLKGATE_CON(7), 2, GFLAGS), + -+static struct rockchip_clk_branch rk3588_i2s10_8ch_rx_fracmux __initdata = -+ MUX(CLK_I2S10_8CH_RX, "clk_i2s10_8ch_rx", clk_i2s10_8ch_rx_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(157), 0, 2, MFLAGS); ++ /* PD_GPU */ ++ COMPOSITE(0, "aclk_gpu_pre", mux_4plls_p, 0, ++ RK3328_CLKSEL_CON(44), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3328_CLKGATE_CON(6), 6, GFLAGS), ++ GATE(ACLK_GPU, "aclk_gpu", "aclk_gpu_pre", CLK_SET_RATE_PARENT, ++ RK3328_CLKGATE_CON(14), 0, GFLAGS), ++ GATE(0, "aclk_gpu_niu", "aclk_gpu_pre", CLK_IS_CRITICAL, ++ RK3328_CLKGATE_CON(14), 1, GFLAGS), + -+static struct rockchip_clk_branch rk3588_spdif0_fracmux __initdata = -+ MUX(CLK_SPDIF0, "clk_spdif0", clk_spdif0_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(34), 0, 2, MFLAGS); ++ /* PD_DDR */ ++ COMPOSITE_DDRCLK(SCLK_DDRCLK, "sclk_ddrc", mux_ddrphy_p, 0, ++ RK3328_CLKSEL_CON(3), 8, 2, 0, 3, ++ ROCKCHIP_DDRCLK_SIP_V2), + -+static struct rockchip_clk_branch rk3588_spdif1_fracmux __initdata = -+ MUX(CLK_SPDIF1, "clk_spdif1", clk_spdif1_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(36), 0, 2, MFLAGS); ++ GATE(0, "clk_ddrmsch", "sclk_ddrc", CLK_IGNORE_UNUSED, ++ RK3328_CLKGATE_CON(18), 6, GFLAGS), ++ GATE(0, "clk_ddrupctl", "sclk_ddrc", CLK_IGNORE_UNUSED, ++ RK3328_CLKGATE_CON(18), 5, GFLAGS), ++ GATE(0, "aclk_ddrupctl", "sclk_ddrc", CLK_IGNORE_UNUSED, ++ RK3328_CLKGATE_CON(18), 4, GFLAGS), ++ GATE(0, "clk_ddrmon", "xin24m", CLK_IGNORE_UNUSED, ++ RK3328_CLKGATE_CON(0), 6, GFLAGS), + -+static struct rockchip_clk_branch rk3588_spdif2_dp0_fracmux __initdata = -+ MUX(CLK_SPDIF2_DP0, "clk_spdif2_dp0", clk_spdif2_dp0_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(124), 0, 2, MFLAGS); ++ COMPOSITE(PCLK_DDR, "pclk_ddr", mux_2plls_hdmiphy_p, CLK_IS_CRITICAL, ++ RK3328_CLKSEL_CON(4), 13, 2, MFLAGS, 8, 3, DFLAGS, ++ RK3328_CLKGATE_CON(7), 4, GFLAGS), ++ GATE(0, "pclk_ddrupctl", "pclk_ddr", CLK_IS_CRITICAL, ++ RK3328_CLKGATE_CON(18), 1, GFLAGS), ++ GATE(0, "pclk_ddr_msch", "pclk_ddr", CLK_IS_CRITICAL, ++ RK3328_CLKGATE_CON(18), 2, GFLAGS), ++ GATE(0, "pclk_ddr_mon", "pclk_ddr", CLK_IS_CRITICAL, ++ RK3328_CLKGATE_CON(18), 3, GFLAGS), ++ GATE(0, "pclk_ddrstdby", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3328_CLKGATE_CON(18), 7, GFLAGS), ++ GATE(0, "pclk_ddr_grf", "pclk_ddr", CLK_IS_CRITICAL, ++ RK3328_CLKGATE_CON(18), 9, GFLAGS), + -+static struct rockchip_clk_branch rk3588_spdif3_fracmux __initdata = -+ MUX(CLK_SPDIF3, "clk_spdif3", clk_spdif3_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(150), 0, 2, MFLAGS); ++ /* ++ * Clock-Architecture Diagram 3 ++ */ + -+static struct rockchip_clk_branch rk3588_spdif4_fracmux __initdata = -+ MUX(CLK_SPDIF4, "clk_spdif4", clk_spdif4_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(152), 0, 2, MFLAGS); ++ /* PD_BUS */ ++ COMPOSITE(ACLK_BUS_PRE, "aclk_bus_pre", mux_2plls_hdmiphy_p, CLK_IS_CRITICAL, ++ RK3328_CLKSEL_CON(0), 13, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3328_CLKGATE_CON(8), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_BUS_PRE, "hclk_bus_pre", "aclk_bus_pre", CLK_IS_CRITICAL, ++ RK3328_CLKSEL_CON(1), 8, 2, DFLAGS, ++ RK3328_CLKGATE_CON(8), 1, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_BUS_PRE, "pclk_bus_pre", "aclk_bus_pre", CLK_IS_CRITICAL, ++ RK3328_CLKSEL_CON(1), 12, 3, DFLAGS, ++ RK3328_CLKGATE_CON(8), 2, GFLAGS), ++ GATE(0, "pclk_bus", "pclk_bus_pre", CLK_IS_CRITICAL, ++ RK3328_CLKGATE_CON(8), 3, GFLAGS), ++ GATE(0, "pclk_phy_pre", "pclk_bus_pre", CLK_IS_CRITICAL, ++ RK3328_CLKGATE_CON(8), 4, GFLAGS), + -+static struct rockchip_clk_branch rk3588_spdif5_dp1_fracmux __initdata = -+ MUX(CLK_SPDIF5_DP1, "clk_spdif5_dp1", clk_spdif5_dp1_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(126), 0, 2, MFLAGS); ++ COMPOSITE(SCLK_TSP, "clk_tsp", mux_2plls_p, 0, ++ RK3328_CLKSEL_CON(21), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK3328_CLKGATE_CON(2), 5, GFLAGS), ++ GATE(0, "clk_hsadc_tsp", "ext_gpio3a2", 0, ++ RK3328_CLKGATE_CON(17), 13, GFLAGS), + -+static struct rockchip_clk_branch rk3588_uart0_fracmux __initdata = -+ MUX(CLK_UART0, "clk_uart0", clk_uart0_p, CLK_SET_RATE_PARENT, -+ RK3588_PMU_CLKSEL_CON(5), 0, 2, MFLAGS); ++ /* PD_I2S */ ++ COMPOSITE(0, "clk_i2s0_div", mux_2plls_p, 0, ++ RK3328_CLKSEL_CON(6), 15, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3328_CLKGATE_CON(1), 1, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s0_frac", "clk_i2s0_div", CLK_SET_RATE_PARENT, ++ RK3328_CLKSEL_CON(7), 0, ++ RK3328_CLKGATE_CON(1), 2, GFLAGS, ++ &rk3328_i2s0_fracmux), ++ GATE(SCLK_I2S0, "clk_i2s0", "i2s0_pre", CLK_SET_RATE_PARENT, ++ RK3328_CLKGATE_CON(1), 3, GFLAGS), + -+static struct rockchip_clk_branch rk3588_uart1_fracmux __initdata = -+ MUX(CLK_UART1, "clk_uart1", clk_uart1_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(43), 0, 2, MFLAGS); ++ COMPOSITE(0, "clk_i2s1_div", mux_2plls_p, 0, ++ RK3328_CLKSEL_CON(8), 15, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3328_CLKGATE_CON(1), 4, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s1_frac", "clk_i2s1_div", CLK_SET_RATE_PARENT, ++ RK3328_CLKSEL_CON(9), 0, ++ RK3328_CLKGATE_CON(1), 5, GFLAGS, ++ &rk3328_i2s1_fracmux), ++ GATE(SCLK_I2S1, "clk_i2s1", "i2s1_pre", CLK_SET_RATE_PARENT, ++ RK3328_CLKGATE_CON(1), 6, GFLAGS), ++ COMPOSITE_NODIV(SCLK_I2S1_OUT, "i2s1_out", mux_i2s1out_p, 0, ++ RK3328_CLKSEL_CON(8), 12, 1, MFLAGS, ++ RK3328_CLKGATE_CON(1), 7, GFLAGS), + -+static struct rockchip_clk_branch rk3588_uart2_fracmux __initdata = -+ MUX(CLK_UART2, "clk_uart2", clk_uart2_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(45), 0, 2, MFLAGS); ++ COMPOSITE(0, "clk_i2s2_div", mux_2plls_p, 0, ++ RK3328_CLKSEL_CON(10), 15, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3328_CLKGATE_CON(1), 8, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s2_frac", "clk_i2s2_div", CLK_SET_RATE_PARENT, ++ RK3328_CLKSEL_CON(11), 0, ++ RK3328_CLKGATE_CON(1), 9, GFLAGS, ++ &rk3328_i2s2_fracmux), ++ GATE(SCLK_I2S2, "clk_i2s2", "i2s2_pre", CLK_SET_RATE_PARENT, ++ RK3328_CLKGATE_CON(1), 10, GFLAGS), ++ COMPOSITE_NODIV(SCLK_I2S2_OUT, "i2s2_out", mux_i2s2out_p, 0, ++ RK3328_CLKSEL_CON(10), 12, 1, MFLAGS, ++ RK3328_CLKGATE_CON(1), 11, GFLAGS), + -+static struct rockchip_clk_branch rk3588_uart3_fracmux __initdata = -+ MUX(CLK_UART3, "clk_uart3", clk_uart3_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(47), 0, 2, MFLAGS); ++ COMPOSITE(0, "clk_spdif_div", mux_2plls_p, 0, ++ RK3328_CLKSEL_CON(12), 15, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3328_CLKGATE_CON(1), 12, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_spdif_frac", "clk_spdif_div", CLK_SET_RATE_PARENT, ++ RK3328_CLKSEL_CON(13), 0, ++ RK3328_CLKGATE_CON(1), 13, GFLAGS, ++ &rk3328_spdif_fracmux), + -+static struct rockchip_clk_branch rk3588_uart4_fracmux __initdata = -+ MUX(CLK_UART4, "clk_uart4", clk_uart4_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(49), 0, 2, MFLAGS); ++ /* PD_UART */ ++ COMPOSITE(0, "clk_uart0_div", mux_2plls_u480m_p, 0, ++ RK3328_CLKSEL_CON(14), 12, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3328_CLKGATE_CON(1), 14, GFLAGS), ++ COMPOSITE(0, "clk_uart1_div", mux_2plls_u480m_p, 0, ++ RK3328_CLKSEL_CON(16), 12, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3328_CLKGATE_CON(2), 0, GFLAGS), ++ COMPOSITE(0, "clk_uart2_div", mux_2plls_u480m_p, 0, ++ RK3328_CLKSEL_CON(18), 12, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3328_CLKGATE_CON(2), 2, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart0_frac", "clk_uart0_div", CLK_SET_RATE_PARENT, ++ RK3328_CLKSEL_CON(15), 0, ++ RK3328_CLKGATE_CON(1), 15, GFLAGS, ++ &rk3328_uart0_fracmux), ++ COMPOSITE_FRACMUX(0, "clk_uart1_frac", "clk_uart1_div", CLK_SET_RATE_PARENT, ++ RK3328_CLKSEL_CON(17), 0, ++ RK3328_CLKGATE_CON(2), 1, GFLAGS, ++ &rk3328_uart1_fracmux), ++ COMPOSITE_FRACMUX(0, "clk_uart2_frac", "clk_uart2_div", CLK_SET_RATE_PARENT, ++ RK3328_CLKSEL_CON(19), 0, ++ RK3328_CLKGATE_CON(2), 3, GFLAGS, ++ &rk3328_uart2_fracmux), + -+static struct rockchip_clk_branch rk3588_uart5_fracmux __initdata = -+ MUX(CLK_UART5, "clk_uart5", clk_uart5_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(51), 0, 2, MFLAGS); ++ /* ++ * Clock-Architecture Diagram 4 ++ */ + -+static struct rockchip_clk_branch rk3588_uart6_fracmux __initdata = -+ MUX(CLK_UART6, "clk_uart6", clk_uart6_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(53), 0, 2, MFLAGS); ++ COMPOSITE(SCLK_I2C0, "clk_i2c0", mux_2plls_p, 0, ++ RK3328_CLKSEL_CON(34), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3328_CLKGATE_CON(2), 9, GFLAGS), ++ COMPOSITE(SCLK_I2C1, "clk_i2c1", mux_2plls_p, 0, ++ RK3328_CLKSEL_CON(34), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK3328_CLKGATE_CON(2), 10, GFLAGS), ++ COMPOSITE(SCLK_I2C2, "clk_i2c2", mux_2plls_p, 0, ++ RK3328_CLKSEL_CON(35), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3328_CLKGATE_CON(2), 11, GFLAGS), ++ COMPOSITE(SCLK_I2C3, "clk_i2c3", mux_2plls_p, 0, ++ RK3328_CLKSEL_CON(35), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK3328_CLKGATE_CON(2), 12, GFLAGS), ++ COMPOSITE(SCLK_CRYPTO, "clk_crypto", mux_2plls_p, 0, ++ RK3328_CLKSEL_CON(20), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3328_CLKGATE_CON(2), 4, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_TSADC, "clk_tsadc", "clk_24m", 0, ++ RK3328_CLKSEL_CON(22), 0, 10, DFLAGS, ++ RK3328_CLKGATE_CON(2), 6, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_SARADC, "clk_saradc", "clk_24m", 0, ++ RK3328_CLKSEL_CON(23), 0, 10, DFLAGS, ++ RK3328_CLKGATE_CON(2), 14, GFLAGS), ++ COMPOSITE(SCLK_SPI, "clk_spi", mux_2plls_p, 0, ++ RK3328_CLKSEL_CON(24), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3328_CLKGATE_CON(2), 7, GFLAGS), ++ COMPOSITE(SCLK_PWM, "clk_pwm", mux_2plls_p, 0, ++ RK3328_CLKSEL_CON(24), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK3328_CLKGATE_CON(2), 8, GFLAGS), ++ COMPOSITE(SCLK_OTP, "clk_otp", mux_2plls_xin24m_p, 0, ++ RK3328_CLKSEL_CON(4), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3328_CLKGATE_CON(3), 8, GFLAGS), ++ COMPOSITE(SCLK_EFUSE, "clk_efuse", mux_2plls_xin24m_p, 0, ++ RK3328_CLKSEL_CON(5), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3328_CLKGATE_CON(2), 13, GFLAGS), ++ COMPOSITE(SCLK_PDM, "clk_pdm", mux_cpll_gpll_apll_p, CLK_SET_RATE_NO_REPARENT | CLK_SET_RATE_PARENT, ++ RK3328_CLKSEL_CON(20), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3328_CLKGATE_CON(2), 15, GFLAGS), + -+static struct rockchip_clk_branch rk3588_uart7_fracmux __initdata = -+ MUX(CLK_UART7, "clk_uart7", clk_uart7_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(55), 0, 2, MFLAGS); ++ GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0, ++ RK3328_CLKGATE_CON(8), 5, GFLAGS), ++ GATE(SCLK_TIMER1, "sclk_timer1", "xin24m", 0, ++ RK3328_CLKGATE_CON(8), 6, GFLAGS), ++ GATE(SCLK_TIMER2, "sclk_timer2", "xin24m", 0, ++ RK3328_CLKGATE_CON(8), 7, GFLAGS), ++ GATE(SCLK_TIMER3, "sclk_timer3", "xin24m", 0, ++ RK3328_CLKGATE_CON(8), 8, GFLAGS), ++ GATE(SCLK_TIMER4, "sclk_timer4", "xin24m", 0, ++ RK3328_CLKGATE_CON(8), 9, GFLAGS), ++ GATE(SCLK_TIMER5, "sclk_timer5", "xin24m", 0, ++ RK3328_CLKGATE_CON(8), 10, GFLAGS), + -+static struct rockchip_clk_branch rk3588_uart8_fracmux __initdata = -+ MUX(CLK_UART8, "clk_uart8", clk_uart8_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(57), 0, 2, MFLAGS); ++ COMPOSITE(SCLK_WIFI, "clk_wifi", mux_2plls_u480m_p, 0, ++ RK3328_CLKSEL_CON(52), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3328_CLKGATE_CON(0), 10, GFLAGS), + -+static struct rockchip_clk_branch rk3588_uart9_fracmux __initdata = -+ MUX(CLK_UART9, "clk_uart9", clk_uart9_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(59), 0, 2, MFLAGS); ++ /* ++ * Clock-Architecture Diagram 5 ++ */ + -+static struct rockchip_clk_branch rk3588_hdmirx_aud_fracmux __initdata = -+ MUX(CLK_HDMIRX_AUD_P_MUX, "clk_hdmirx_aud_mux", clk_hdmirx_aud_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(140), 0, 1, MFLAGS); ++ /* PD_VIDEO */ ++ COMPOSITE(ACLK_RKVDEC_PRE, "aclk_rkvdec_pre", mux_4plls_p, 0, ++ RK3328_CLKSEL_CON(48), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3328_CLKGATE_CON(6), 0, GFLAGS), ++ FACTOR_GATE(HCLK_RKVDEC_PRE, "hclk_rkvdec_pre", "aclk_rkvdec_pre", 0, 1, 4, ++ RK3328_CLKGATE_CON(11), 0, GFLAGS), ++ GATE(ACLK_RKVDEC, "aclk_rkvdec", "aclk_rkvdec_pre", CLK_SET_RATE_PARENT, ++ RK3328_CLKGATE_CON(24), 0, GFLAGS), ++ GATE(HCLK_RKVDEC, "hclk_rkvdec", "hclk_rkvdec_pre", CLK_SET_RATE_PARENT, ++ RK3328_CLKGATE_CON(24), 1, GFLAGS), ++ GATE(0, "aclk_rkvdec_niu", "aclk_rkvdec_pre", CLK_IS_CRITICAL, ++ RK3328_CLKGATE_CON(24), 2, GFLAGS), ++ GATE(0, "hclk_rkvdec_niu", "hclk_rkvdec_pre", CLK_IS_CRITICAL, ++ RK3328_CLKGATE_CON(24), 3, GFLAGS), + -+static struct rockchip_pll_clock rk3588_pll_clks[] __initdata = { -+ [b0pll] = PLL(pll_rk3588_core, PLL_B0PLL, "b0pll", mux_pll_p, -+ CLK_IGNORE_UNUSED, RK3588_B0_PLL_CON(0), -+ RK3588_B0_PLL_MODE_CON0, 0, 15, 0, rk3588_pll_rates), -+ [b1pll] = PLL(pll_rk3588_core, PLL_B1PLL, "b1pll", mux_pll_p, -+ CLK_IGNORE_UNUSED, RK3588_B1_PLL_CON(8), -+ RK3588_B1_PLL_MODE_CON0, 0, 15, 0, rk3588_pll_rates), -+ [lpll] = PLL(pll_rk3588_core, PLL_LPLL, "lpll", mux_pll_p, -+ CLK_IGNORE_UNUSED, RK3588_LPLL_CON(16), -+ RK3588_LPLL_MODE_CON0, 0, 15, 0, rk3588_pll_rates), -+ [v0pll] = PLL(pll_rk3588, PLL_V0PLL, "v0pll", mux_pll_p, -+ 0, RK3588_PLL_CON(88), -+ RK3588_MODE_CON0, 4, 15, 0, rk3588_pll_rates), -+ [aupll] = PLL(pll_rk3588, PLL_AUPLL, "aupll", mux_pll_p, -+ 0, RK3588_PLL_CON(96), -+ RK3588_MODE_CON0, 6, 15, 0, rk3588_pll_rates), -+ [cpll] = PLL(pll_rk3588, PLL_CPLL, "cpll", mux_pll_p, -+ CLK_IGNORE_UNUSED, RK3588_PLL_CON(104), -+ RK3588_MODE_CON0, 8, 15, 0, rk3588_pll_rates), -+ [gpll] = PLL(pll_rk3588, PLL_GPLL, "gpll", mux_pll_p, -+ CLK_IGNORE_UNUSED, RK3588_PLL_CON(112), -+ RK3588_MODE_CON0, 2, 15, 0, rk3588_pll_rates), -+ [npll] = PLL(pll_rk3588, PLL_NPLL, "npll", mux_pll_p, -+ 0, RK3588_PLL_CON(120), -+ RK3588_MODE_CON0, 0, 15, 0, rk3588_pll_rates), -+ [ppll] = PLL(pll_rk3588_core, PLL_PPLL, "ppll", mux_pll_p, -+ CLK_IGNORE_UNUSED, RK3588_PMU_PLL_CON(128), -+ RK3588_MODE_CON0, 10, 15, 0, rk3588_pll_rates), -+}; ++ COMPOSITE(SCLK_VDEC_CABAC, "sclk_vdec_cabac", mux_4plls_p, 0, ++ RK3328_CLKSEL_CON(48), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3328_CLKGATE_CON(6), 1, GFLAGS), ++ ++ COMPOSITE(SCLK_VDEC_CORE, "sclk_vdec_core", mux_4plls_p, 0, ++ RK3328_CLKSEL_CON(49), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3328_CLKGATE_CON(6), 2, GFLAGS), ++ ++ COMPOSITE(ACLK_VPU_PRE, "aclk_vpu_pre", mux_4plls_p, 0, ++ RK3328_CLKSEL_CON(50), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3328_CLKGATE_CON(6), 5, GFLAGS), ++ FACTOR_GATE(HCLK_VPU_PRE, "hclk_vpu_pre", "aclk_vpu_pre", 0, 1, 4, ++ RK3328_CLKGATE_CON(11), 8, GFLAGS), ++ GATE(ACLK_VPU, "aclk_vpu", "aclk_vpu_pre", CLK_SET_RATE_PARENT, ++ RK3328_CLKGATE_CON(23), 0, GFLAGS), ++ GATE(HCLK_VPU, "hclk_vpu", "hclk_vpu_pre", CLK_SET_RATE_PARENT, ++ RK3328_CLKGATE_CON(23), 1, GFLAGS), ++ GATE(0, "aclk_vpu_niu", "aclk_vpu_pre", CLK_IS_CRITICAL, ++ RK3328_CLKGATE_CON(23), 2, GFLAGS), ++ GATE(0, "hclk_vpu_niu", "hclk_vpu_pre", CLK_IS_CRITICAL, ++ RK3328_CLKGATE_CON(23), 3, GFLAGS), ++ ++ COMPOSITE(ACLK_RKVENC, "aclk_rkvenc", mux_4plls_p, 0, ++ RK3328_CLKSEL_CON(51), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3328_CLKGATE_CON(6), 3, GFLAGS), ++ ++ COMPOSITE(SCLK_VENC_CORE, "sclk_venc_core", mux_4plls_p, 0, ++ RK3328_CLKSEL_CON(51), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3328_CLKGATE_CON(6), 4, GFLAGS), ++ FACTOR_GATE(0, "hclk_venc", "sclk_venc_core", 0, 1, 4, ++ RK3328_CLKGATE_CON(11), 4, GFLAGS), ++ ++ GATE(0, "aclk_rkvenc_niu", "sclk_venc_core", CLK_IS_CRITICAL, ++ RK3328_CLKGATE_CON(25), 0, GFLAGS), ++ GATE(0, "hclk_rkvenc_niu", "hclk_venc", CLK_IS_CRITICAL, ++ RK3328_CLKGATE_CON(25), 1, GFLAGS), ++ GATE(ACLK_H265, "aclk_h265", "sclk_venc_core", 0, ++ RK3328_CLKGATE_CON(25), 2, GFLAGS), ++ GATE(PCLK_H265, "pclk_h265", "hclk_venc", 0, ++ RK3328_CLKGATE_CON(25), 3, GFLAGS), ++ GATE(ACLK_H264, "aclk_h264", "sclk_venc_core", 0, ++ RK3328_CLKGATE_CON(25), 4, GFLAGS), ++ GATE(HCLK_H264, "hclk_h264", "hclk_venc", 0, ++ RK3328_CLKGATE_CON(25), 5, GFLAGS), ++ GATE(ACLK_AXISRAM, "aclk_axisram", "sclk_venc_core", CLK_IGNORE_UNUSED, ++ RK3328_CLKGATE_CON(25), 6, GFLAGS), ++ ++ COMPOSITE(SCLK_VENC_DSP, "sclk_venc_dsp", mux_4plls_p, 0, ++ RK3328_CLKSEL_CON(52), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3328_CLKGATE_CON(6), 7, GFLAGS), + -+static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = { + /* -+ * CRU Clock-Architecture ++ * Clock-Architecture Diagram 6 + */ -+ /* fixed */ -+ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), + -+ /* top */ -+ COMPOSITE(CLK_50M_SRC, "clk_50m_src", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(0), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE(CLK_100M_SRC, "clk_100m_src", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(0), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RK3588_CLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE(CLK_150M_SRC, "clk_150m_src", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(1), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE(CLK_200M_SRC, "clk_200m_src", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(1), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RK3588_CLKGATE_CON(0), 3, GFLAGS), -+ COMPOSITE(CLK_250M_SRC, "clk_250m_src", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(2), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(0), 4, GFLAGS), -+ COMPOSITE(CLK_300M_SRC, "clk_300m_src", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(2), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RK3588_CLKGATE_CON(0), 5, GFLAGS), -+ COMPOSITE(CLK_350M_SRC, "clk_350m_src", gpll_spll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(3), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(0), 6, GFLAGS), -+ COMPOSITE(CLK_400M_SRC, "clk_400m_src", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(3), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RK3588_CLKGATE_CON(0), 7, GFLAGS), -+ COMPOSITE_HALFDIV(CLK_450M_SRC, "clk_450m_src", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(4), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(0), 8, GFLAGS), -+ COMPOSITE(CLK_500M_SRC, "clk_500m_src", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(4), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RK3588_CLKGATE_CON(0), 9, GFLAGS), -+ COMPOSITE(CLK_600M_SRC, "clk_600m_src", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(5), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(0), 10, GFLAGS), -+ COMPOSITE(CLK_650M_SRC, "clk_650m_src", gpll_lpll_p, 0, -+ RK3588_CLKSEL_CON(5), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RK3588_CLKGATE_CON(0), 11, GFLAGS), -+ COMPOSITE(CLK_700M_SRC, "clk_700m_src", gpll_spll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(6), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(0), 12, GFLAGS), -+ COMPOSITE(CLK_800M_SRC, "clk_800m_src", gpll_aupll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(6), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RK3588_CLKGATE_CON(0), 13, GFLAGS), -+ COMPOSITE_HALFDIV(CLK_1000M_SRC, "clk_1000m_src", gpll_cpll_npll_v0pll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(7), 5, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(0), 14, GFLAGS), -+ COMPOSITE(CLK_1200M_SRC, "clk_1200m_src", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(7), 12, 1, MFLAGS, 7, 5, DFLAGS, -+ RK3588_CLKGATE_CON(0), 15, GFLAGS), -+ COMPOSITE_NODIV(ACLK_TOP_M300_ROOT, "aclk_top_m300_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(9), 0, 2, MFLAGS, -+ RK3588_CLKGATE_CON(1), 10, GFLAGS), -+ COMPOSITE_NODIV(ACLK_TOP_M500_ROOT, "aclk_top_m500_root", mux_500m_300m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(9), 2, 2, MFLAGS, -+ RK3588_CLKGATE_CON(1), 11, GFLAGS), -+ COMPOSITE_NODIV(ACLK_TOP_M400_ROOT, "aclk_top_m400_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(9), 4, 2, MFLAGS, -+ RK3588_CLKGATE_CON(1), 12, GFLAGS), -+ COMPOSITE_NODIV(ACLK_TOP_S200_ROOT, "aclk_top_s200_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(9), 6, 2, MFLAGS, -+ RK3588_CLKGATE_CON(1), 13, GFLAGS), -+ COMPOSITE_NODIV(ACLK_TOP_S400_ROOT, "aclk_top_s400_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(9), 8, 2, MFLAGS, -+ RK3588_CLKGATE_CON(1), 14, GFLAGS), -+ COMPOSITE(ACLK_TOP_ROOT, "aclk_top_root", gpll_cpll_aupll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(8), 5, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(1), 0, GFLAGS), -+ COMPOSITE_NODIV(PCLK_TOP_ROOT, "pclk_top_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(8), 7, 2, MFLAGS, -+ RK3588_CLKGATE_CON(1), 1, GFLAGS), -+ COMPOSITE(ACLK_LOW_TOP_ROOT, "aclk_low_top_root", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(8), 14, 1, MFLAGS, 9, 5, DFLAGS, -+ RK3588_CLKGATE_CON(1), 2, GFLAGS), -+ COMPOSITE(CLK_MIPI_CAMARAOUT_M0, "clk_mipi_camaraout_m0", mux_24m_spll_gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(18), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3588_CLKGATE_CON(5), 9, GFLAGS), -+ COMPOSITE(CLK_MIPI_CAMARAOUT_M1, "clk_mipi_camaraout_m1", mux_24m_spll_gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(19), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3588_CLKGATE_CON(5), 10, GFLAGS), -+ COMPOSITE(CLK_MIPI_CAMARAOUT_M2, "clk_mipi_camaraout_m2", mux_24m_spll_gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(20), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3588_CLKGATE_CON(5), 11, GFLAGS), -+ COMPOSITE(CLK_MIPI_CAMARAOUT_M3, "clk_mipi_camaraout_m3", mux_24m_spll_gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(21), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3588_CLKGATE_CON(5), 12, GFLAGS), -+ COMPOSITE(CLK_MIPI_CAMARAOUT_M4, "clk_mipi_camaraout_m4", mux_24m_spll_gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(22), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3588_CLKGATE_CON(5), 13, GFLAGS), -+ COMPOSITE(MCLK_GMAC0_OUT, "mclk_gmac0_out", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(15), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3588_CLKGATE_CON(5), 3, GFLAGS), -+ COMPOSITE(REFCLKO25M_ETH0_OUT, "refclko25m_eth0_out", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(15), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK3588_CLKGATE_CON(5), 4, GFLAGS), -+ COMPOSITE(REFCLKO25M_ETH1_OUT, "refclko25m_eth1_out", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(16), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3588_CLKGATE_CON(5), 5, GFLAGS), -+ COMPOSITE(CLK_CIFOUT_OUT, "clk_cifout_out", gpll_cpll_24m_spll_p, 0, -+ RK3588_CLKSEL_CON(17), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3588_CLKGATE_CON(5), 6, GFLAGS), -+ GATE(PCLK_MIPI_DCPHY0, "pclk_mipi_dcphy0", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(3), 14, GFLAGS), -+ GATE(PCLK_MIPI_DCPHY1, "pclk_mipi_dcphy1", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(4), 3, GFLAGS), -+ GATE(PCLK_CSIPHY0, "pclk_csiphy0", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(1), 6, GFLAGS), -+ GATE(PCLK_CSIPHY1, "pclk_csiphy1", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(1), 8, GFLAGS), -+ GATE(PCLK_CRU, "pclk_cru", "pclk_top_root", CLK_IS_CRITICAL, -+ RK3588_CLKGATE_CON(5), 0, GFLAGS), ++ /* PD_VIO */ ++ COMPOSITE(ACLK_VIO_PRE, "aclk_vio_pre", mux_4plls_p, 0, ++ RK3328_CLKSEL_CON(37), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3328_CLKGATE_CON(5), 2, GFLAGS), ++ DIV(HCLK_VIO_PRE, "hclk_vio_pre", "aclk_vio_pre", 0, ++ RK3328_CLKSEL_CON(37), 8, 5, DFLAGS), + -+ /* bigcore0 */ -+ COMPOSITE_NODIV(PCLK_BIGCORE0_ROOT, "pclk_bigcore0_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3588_BIGCORE0_CLKSEL_CON(2), 0, 2, MFLAGS, -+ RK3588_BIGCORE0_CLKGATE_CON(0), 14, GFLAGS), -+ GATE(PCLK_BIGCORE0_PVTM, "pclk_bigcore0_pvtm", "pclk_bigcore0_root", 0, -+ RK3588_BIGCORE0_CLKGATE_CON(1), 0, GFLAGS), -+ GATE(CLK_BIGCORE0_PVTM, "clk_bigcore0_pvtm", "xin24m", 0, -+ RK3588_BIGCORE0_CLKGATE_CON(0), 12, GFLAGS), -+ GATE(CLK_CORE_BIGCORE0_PVTM, "clk_core_bigcore0_pvtm", "armclk_b01", 0, -+ RK3588_BIGCORE0_CLKGATE_CON(0), 13, GFLAGS), ++ COMPOSITE(ACLK_RGA_PRE, "aclk_rga_pre", mux_4plls_p, 0, ++ RK3328_CLKSEL_CON(36), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3328_CLKGATE_CON(5), 0, GFLAGS), ++ COMPOSITE(SCLK_RGA, "clk_rga", mux_4plls_p, 0, ++ RK3328_CLKSEL_CON(36), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3328_CLKGATE_CON(5), 1, GFLAGS), ++ COMPOSITE(ACLK_VOP_PRE, "aclk_vop_pre", mux_4plls_p, 0, ++ RK3328_CLKSEL_CON(39), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3328_CLKGATE_CON(5), 5, GFLAGS), ++ GATE(SCLK_HDMI_SFC, "sclk_hdmi_sfc", "xin24m", 0, ++ RK3328_CLKGATE_CON(5), 4, GFLAGS), + -+ /* bigcore1 */ -+ COMPOSITE_NODIV(PCLK_BIGCORE1_ROOT, "pclk_bigcore1_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3588_BIGCORE1_CLKSEL_CON(2), 0, 2, MFLAGS, -+ RK3588_BIGCORE1_CLKGATE_CON(0), 14, GFLAGS), -+ GATE(PCLK_BIGCORE1_PVTM, "pclk_bigcore1_pvtm", "pclk_bigcore1_root", 0, -+ RK3588_BIGCORE1_CLKGATE_CON(1), 0, GFLAGS), -+ GATE(CLK_BIGCORE1_PVTM, "clk_bigcore1_pvtm", "xin24m", 0, -+ RK3588_BIGCORE1_CLKGATE_CON(0), 12, GFLAGS), -+ GATE(CLK_CORE_BIGCORE1_PVTM, "clk_core_bigcore1_pvtm", "armclk_b23", 0, -+ RK3588_BIGCORE1_CLKGATE_CON(0), 13, GFLAGS), ++ COMPOSITE_NODIV(0, "clk_cif_src", mux_2plls_p, 0, ++ RK3328_CLKSEL_CON(42), 7, 1, MFLAGS, ++ RK3328_CLKGATE_CON(5), 3, GFLAGS), ++ COMPOSITE_NOGATE(SCLK_CIF_OUT, "clk_cif_out", mux_sclk_cif_p, CLK_SET_RATE_PARENT, ++ RK3328_CLKSEL_CON(42), 5, 1, MFLAGS, 0, 5, DFLAGS), + -+ /* dsu */ -+ COMPOSITE(0, "sclk_dsu", b0pll_b1pll_lpll_gpll_p, CLK_IS_CRITICAL, -+ RK3588_DSU_CLKSEL_CON(0), 12, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3588_DSU_CLKGATE_CON(0), 4, GFLAGS), -+ COMPOSITE_NOMUX(0, "atclk_dsu", "sclk_dsu", CLK_IS_CRITICAL, -+ RK3588_DSU_CLKSEL_CON(3), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3588_DSU_CLKGATE_CON(1), 0, GFLAGS), -+ COMPOSITE_NOMUX(0, "gicclk_dsu", "sclk_dsu", CLK_IS_CRITICAL, -+ RK3588_DSU_CLKSEL_CON(3), 5, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3588_DSU_CLKGATE_CON(1), 1, GFLAGS), -+ COMPOSITE_NOMUX(0, "aclkmp_dsu", "sclk_dsu", CLK_IS_CRITICAL, -+ RK3588_DSU_CLKSEL_CON(1), 11, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3588_DSU_CLKGATE_CON(0), 12, GFLAGS), -+ COMPOSITE_NOMUX(0, "aclkm_dsu", "sclk_dsu", CLK_IS_CRITICAL, -+ RK3588_DSU_CLKSEL_CON(1), 1, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3588_DSU_CLKGATE_CON(0), 8, GFLAGS), -+ COMPOSITE_NOMUX(0, "aclks_dsu", "sclk_dsu", CLK_IS_CRITICAL, -+ RK3588_DSU_CLKSEL_CON(1), 6, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3588_DSU_CLKGATE_CON(0), 9, GFLAGS), -+ COMPOSITE_NOMUX(0, "periph_dsu", "sclk_dsu", CLK_IS_CRITICAL, -+ RK3588_DSU_CLKSEL_CON(2), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3588_DSU_CLKGATE_CON(0), 13, GFLAGS), -+ COMPOSITE_NOMUX(0, "cntclk_dsu", "periph_dsu", CLK_IS_CRITICAL, -+ RK3588_DSU_CLKSEL_CON(2), 5, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3588_DSU_CLKGATE_CON(0), 14, GFLAGS), -+ COMPOSITE_NOMUX(0, "tsclk_dsu", "periph_dsu", CLK_IS_CRITICAL, -+ RK3588_DSU_CLKSEL_CON(2), 10, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3588_DSU_CLKGATE_CON(0), 15, GFLAGS), -+ COMPOSITE_NODIV(PCLK_DSU_S_ROOT, "pclk_dsu_s_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3588_DSU_CLKSEL_CON(4), 11, 2, MFLAGS, -+ RK3588_DSU_CLKGATE_CON(2), 2, GFLAGS), -+ COMPOSITE(PCLK_DSU_ROOT, "pclk_dsu_root", b0pll_b1pll_lpll_gpll_p, CLK_IS_CRITICAL, -+ RK3588_DSU_CLKSEL_CON(4), 5, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3588_DSU_CLKGATE_CON(1), 3, GFLAGS), -+ COMPOSITE_NODIV(PCLK_DSU_NS_ROOT, "pclk_dsu_ns_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3588_DSU_CLKSEL_CON(4), 7, 2, MFLAGS, -+ RK3588_DSU_CLKGATE_CON(1), 4, GFLAGS), -+ GATE(PCLK_LITCORE_PVTM, "pclk_litcore_pvtm", "pclk_dsu_ns_root", 0, -+ RK3588_DSU_CLKGATE_CON(2), 6, GFLAGS), -+ GATE(PCLK_DBG, "pclk_dbg", "pclk_dsu_root", CLK_IS_CRITICAL, -+ RK3588_DSU_CLKGATE_CON(1), 7, GFLAGS), -+ GATE(PCLK_DSU, "pclk_dsu", "pclk_dsu_root", CLK_IS_CRITICAL, -+ RK3588_DSU_CLKGATE_CON(1), 6, GFLAGS), -+ GATE(PCLK_S_DAPLITE, "pclk_s_daplite", "pclk_dsu_ns_root", CLK_IGNORE_UNUSED, -+ RK3588_DSU_CLKGATE_CON(1), 8, GFLAGS), -+ GATE(PCLK_M_DAPLITE, "pclk_m_daplite", "pclk_dsu_root", CLK_IGNORE_UNUSED, -+ RK3588_DSU_CLKGATE_CON(1), 9, GFLAGS), -+ GATE(CLK_LITCORE_PVTM, "clk_litcore_pvtm", "xin24m", 0, -+ RK3588_DSU_CLKGATE_CON(2), 0, GFLAGS), -+ GATE(CLK_CORE_LITCORE_PVTM, "clk_core_litcore_pvtm", "armclk_l", 0, -+ RK3588_DSU_CLKGATE_CON(2), 1, GFLAGS), ++ COMPOSITE(DCLK_LCDC_SRC, "dclk_lcdc_src", mux_gpll_cpll_p, 0, ++ RK3328_CLKSEL_CON(40), 0, 1, MFLAGS, 8, 8, DFLAGS, ++ RK3328_CLKGATE_CON(5), 6, GFLAGS), ++ DIV(DCLK_HDMIPHY, "dclk_hdmiphy", "dclk_lcdc_src", 0, ++ RK3328_CLKSEL_CON(40), 3, 3, DFLAGS), ++ MUX(DCLK_LCDC, "dclk_lcdc", mux_dclk_lcdc_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3328_CLKSEL_CON(40), 1, 1, MFLAGS), + -+ /* audio */ -+ COMPOSITE_NODIV(HCLK_AUDIO_ROOT, "hclk_audio_root", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(24), 0, 2, MFLAGS, -+ RK3588_CLKGATE_CON(7), 0, GFLAGS), -+ COMPOSITE_NODIV(PCLK_AUDIO_ROOT, "pclk_audio_root", mux_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(24), 2, 2, MFLAGS, -+ RK3588_CLKGATE_CON(7), 1, GFLAGS), -+ GATE(HCLK_I2S2_2CH, "hclk_i2s2_2ch", "hclk_audio_root", 0, -+ RK3588_CLKGATE_CON(7), 12, GFLAGS), -+ GATE(HCLK_I2S3_2CH, "hclk_i2s3_2ch", "hclk_audio_root", 0, -+ RK3588_CLKGATE_CON(7), 13, GFLAGS), -+ COMPOSITE(CLK_I2S2_2CH_SRC, "clk_i2s2_2ch_src", gpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(28), 9, 1, MFLAGS, 4, 5, DFLAGS, -+ RK3588_CLKGATE_CON(7), 14, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S2_2CH_FRAC, "clk_i2s2_2ch_frac", "clk_i2s2_2ch_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(29), 0, -+ RK3588_CLKGATE_CON(7), 15, GFLAGS, -+ &rk3588_i2s2_2ch_fracmux), -+ GATE(MCLK_I2S2_2CH, "mclk_i2s2_2ch", "clk_i2s2_2ch", 0, -+ RK3588_CLKGATE_CON(8), 0, GFLAGS), -+ MUX(I2S2_2CH_MCLKOUT, "i2s2_2ch_mclkout", i2s2_2ch_mclkout_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(30), 2, 1, MFLAGS), ++ /* ++ * Clock-Architecture Diagram 7 ++ */ + -+ COMPOSITE(CLK_I2S3_2CH_SRC, "clk_i2s3_2ch_src", gpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(30), 8, 1, MFLAGS, 3, 5, DFLAGS, -+ RK3588_CLKGATE_CON(8), 1, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S3_2CH_FRAC, "clk_i2s3_2ch_frac", "clk_i2s3_2ch_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(31), 0, -+ RK3588_CLKGATE_CON(8), 2, GFLAGS, -+ &rk3588_i2s3_2ch_fracmux), -+ GATE(MCLK_I2S3_2CH, "mclk_i2s3_2ch", "clk_i2s3_2ch", 0, -+ RK3588_CLKGATE_CON(8), 3, GFLAGS), -+ GATE(CLK_DAC_ACDCDIG, "clk_dac_acdcdig", "mclk_i2s3_2ch", 0, -+ RK3588_CLKGATE_CON(8), 4, GFLAGS), -+ MUX(I2S3_2CH_MCLKOUT, "i2s3_2ch_mclkout", i2s3_2ch_mclkout_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(32), 2, 1, MFLAGS), -+ GATE(PCLK_ACDCDIG, "pclk_acdcdig", "pclk_audio_root", 0, -+ RK3588_CLKGATE_CON(7), 11, GFLAGS), -+ GATE(HCLK_I2S0_8CH, "hclk_i2s0_8ch", "hclk_audio_root", 0, -+ RK3588_CLKGATE_CON(7), 4, GFLAGS), ++ /* PD_PERI */ ++ GATE(0, "gpll_peri", "gpll", CLK_IS_CRITICAL, ++ RK3328_CLKGATE_CON(4), 0, GFLAGS), ++ GATE(0, "cpll_peri", "cpll", CLK_IS_CRITICAL, ++ RK3328_CLKGATE_CON(4), 1, GFLAGS), ++ GATE(0, "hdmiphy_peri", "hdmiphy", CLK_IS_CRITICAL, ++ RK3328_CLKGATE_CON(4), 2, GFLAGS), ++ COMPOSITE_NOGATE(ACLK_PERI_PRE, "aclk_peri_pre", mux_aclk_peri_pre_p, CLK_IS_CRITICAL, ++ RK3328_CLKSEL_CON(28), 6, 2, MFLAGS, 0, 5, DFLAGS), ++ COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "aclk_peri_pre", CLK_IS_CRITICAL, ++ RK3328_CLKSEL_CON(29), 0, 2, DFLAGS, ++ RK3328_CLKGATE_CON(10), 2, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "aclk_peri_pre", CLK_IS_CRITICAL, ++ RK3328_CLKSEL_CON(29), 4, 3, DFLAGS, ++ RK3328_CLKGATE_CON(10), 1, GFLAGS), ++ GATE(ACLK_PERI, "aclk_peri", "aclk_peri_pre", CLK_IS_CRITICAL | CLK_SET_RATE_PARENT, ++ RK3328_CLKGATE_CON(10), 0, GFLAGS), + -+ COMPOSITE(CLK_I2S0_8CH_TX_SRC, "clk_i2s0_8ch_tx_src", gpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(24), 9, 1, MFLAGS, 4, 5, DFLAGS, -+ RK3588_CLKGATE_CON(7), 5, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S0_8CH_TX_FRAC, "clk_i2s0_8ch_tx_frac", "clk_i2s0_8ch_tx_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(25), 0, -+ RK3588_CLKGATE_CON(7), 6, GFLAGS, -+ &rk3588_i2s0_8ch_tx_fracmux), -+ GATE(MCLK_I2S0_8CH_TX, "mclk_i2s0_8ch_tx", "clk_i2s0_8ch_tx", 0, -+ RK3588_CLKGATE_CON(7), 7, GFLAGS), ++ COMPOSITE(SCLK_SDMMC, "clk_sdmmc", mux_2plls_24m_u480m_p, 0, ++ RK3328_CLKSEL_CON(30), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3328_CLKGATE_CON(4), 3, GFLAGS), + -+ COMPOSITE(CLK_I2S0_8CH_RX_SRC, "clk_i2s0_8ch_rx_src", gpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(26), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(7), 8, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S0_8CH_RX_FRAC, "clk_i2s0_8ch_rx_frac", "clk_i2s0_8ch_rx_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(27), 0, -+ RK3588_CLKGATE_CON(7), 9, GFLAGS, -+ &rk3588_i2s0_8ch_rx_fracmux), -+ GATE(MCLK_I2S0_8CH_RX, "mclk_i2s0_8ch_rx", "clk_i2s0_8ch_rx", 0, -+ RK3588_CLKGATE_CON(7), 10, GFLAGS), -+ MUX(I2S0_8CH_MCLKOUT, "i2s0_8ch_mclkout", i2s0_8ch_mclkout_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(28), 2, 2, MFLAGS), ++ COMPOSITE(SCLK_SDIO, "clk_sdio", mux_2plls_24m_u480m_p, 0, ++ RK3328_CLKSEL_CON(31), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3328_CLKGATE_CON(4), 4, GFLAGS), + -+ GATE(HCLK_PDM1, "hclk_pdm1", "hclk_audio_root", 0, -+ RK3588_CLKGATE_CON(9), 6, GFLAGS), -+ COMPOSITE(MCLK_PDM1, "mclk_pdm1", gpll_cpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(36), 7, 2, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(9), 7, GFLAGS), ++ COMPOSITE(SCLK_EMMC, "clk_emmc", mux_2plls_24m_u480m_p, 0, ++ RK3328_CLKSEL_CON(32), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3328_CLKGATE_CON(4), 5, GFLAGS), + -+ GATE(HCLK_SPDIF0, "hclk_spdif0", "hclk_audio_root", 0, -+ RK3588_CLKGATE_CON(8), 14, GFLAGS), -+ COMPOSITE(CLK_SPDIF0_SRC, "clk_spdif0_src", gpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(32), 8, 1, MFLAGS, 3, 5, DFLAGS, -+ RK3588_CLKGATE_CON(8), 15, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_SPDIF0_FRAC, "clk_spdif0_frac", "clk_spdif0_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(33), 0, -+ RK3588_CLKGATE_CON(9), 0, GFLAGS, -+ &rk3588_spdif0_fracmux), -+ GATE(MCLK_SPDIF0, "mclk_spdif0", "clk_spdif0", 0, -+ RK3588_CLKGATE_CON(9), 1, GFLAGS), ++ COMPOSITE(SCLK_SDMMC_EXT, "clk_sdmmc_ext", mux_2plls_24m_u480m_p, 0, ++ RK3328_CLKSEL_CON(43), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3328_CLKGATE_CON(4), 10, GFLAGS), + -+ GATE(HCLK_SPDIF1, "hclk_spdif1", "hclk_audio_root", 0, -+ RK3588_CLKGATE_CON(9), 2, GFLAGS), -+ COMPOSITE(CLK_SPDIF1_SRC, "clk_spdif1_src", gpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(34), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(9), 3, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_SPDIF1_FRAC, "clk_spdif1_frac", "clk_spdif1_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(35), 0, -+ RK3588_CLKGATE_CON(9), 4, GFLAGS, -+ &rk3588_spdif1_fracmux), -+ GATE(MCLK_SPDIF1, "mclk_spdif1", "clk_spdif1", 0, -+ RK3588_CLKGATE_CON(9), 5, GFLAGS), ++ COMPOSITE(SCLK_REF_USB3OTG_SRC, "clk_ref_usb3otg_src", mux_2plls_p, 0, ++ RK3328_CLKSEL_CON(45), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3328_CLKGATE_CON(4), 9, GFLAGS), + -+ COMPOSITE(ACLK_AV1_ROOT, "aclk_av1_root", gpll_cpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(163), 5, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(68), 0, GFLAGS), -+ COMPOSITE_NODIV(PCLK_AV1_ROOT, "pclk_av1_root", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(163), 7, 2, MFLAGS, -+ RK3588_CLKGATE_CON(68), 3, GFLAGS), ++ MUX(SCLK_REF_USB3OTG, "clk_ref_usb3otg", mux_ref_usb3otg_src_p, CLK_SET_RATE_PARENT, ++ RK3328_CLKSEL_CON(45), 8, 1, MFLAGS), + -+ /* bus */ -+ COMPOSITE(ACLK_BUS_ROOT, "aclk_bus_root", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(38), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(10), 0, GFLAGS), ++ GATE(SCLK_USB3OTG_REF, "clk_usb3otg_ref", "xin24m", 0, ++ RK3328_CLKGATE_CON(4), 7, GFLAGS), + -+ GATE(PCLK_MAILBOX0, "pclk_mailbox0", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(16), 11, GFLAGS), -+ GATE(PCLK_MAILBOX1, "pclk_mailbox1", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(16), 12, GFLAGS), -+ GATE(PCLK_MAILBOX2, "pclk_mailbox2", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(16), 13, GFLAGS), -+ GATE(PCLK_PMU2, "pclk_pmu2", "pclk_top_root", CLK_IS_CRITICAL, -+ RK3588_CLKGATE_CON(19), 3, GFLAGS), -+ GATE(PCLK_PMUCM0_INTMUX, "pclk_pmucm0_intmux", "pclk_top_root", CLK_IS_CRITICAL, -+ RK3588_CLKGATE_CON(19), 4, GFLAGS), -+ GATE(PCLK_DDRCM0_INTMUX, "pclk_ddrcm0_intmux", "pclk_top_root", CLK_IS_CRITICAL, -+ RK3588_CLKGATE_CON(19), 5, GFLAGS), ++ COMPOSITE(SCLK_USB3OTG_SUSPEND, "clk_usb3otg_suspend", mux_xin24m_32k_p, 0, ++ RK3328_CLKSEL_CON(33), 15, 1, MFLAGS, 0, 10, DFLAGS, ++ RK3328_CLKGATE_CON(4), 8, GFLAGS), + -+ GATE(PCLK_PWM1, "pclk_pwm1", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(15), 3, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM1, "clk_pwm1", mux_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(59), 12, 2, MFLAGS, -+ RK3588_CLKGATE_CON(15), 4, GFLAGS), -+ GATE(CLK_PWM1_CAPTURE, "clk_pwm1_capture", "xin24m", 0, -+ RK3588_CLKGATE_CON(15), 5, GFLAGS), -+ GATE(PCLK_PWM2, "pclk_pwm2", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(15), 6, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM2, "clk_pwm2", mux_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(59), 14, 2, MFLAGS, -+ RK3588_CLKGATE_CON(15), 7, GFLAGS), -+ GATE(CLK_PWM2_CAPTURE, "clk_pwm2_capture", "xin24m", 0, -+ RK3588_CLKGATE_CON(15), 8, GFLAGS), -+ GATE(PCLK_PWM3, "pclk_pwm3", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(15), 9, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM3, "clk_pwm3", mux_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(60), 0, 2, MFLAGS, -+ RK3588_CLKGATE_CON(15), 10, GFLAGS), -+ GATE(CLK_PWM3_CAPTURE, "clk_pwm3_capture", "xin24m", 0, -+ RK3588_CLKGATE_CON(15), 11, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 8 ++ */ + -+ GATE(PCLK_BUSTIMER0, "pclk_bustimer0", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(15), 12, GFLAGS), -+ GATE(PCLK_BUSTIMER1, "pclk_bustimer1", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(15), 13, GFLAGS), -+ COMPOSITE_NODIV(CLK_BUS_TIMER_ROOT, "clk_bus_timer_root", mux_24m_100m_p, 0, -+ RK3588_CLKSEL_CON(60), 2, 1, MFLAGS, -+ RK3588_CLKGATE_CON(15), 14, GFLAGS), -+ GATE(CLK_BUSTIMER0, "clk_bustimer0", "clk_bus_timer_root", 0, -+ RK3588_CLKGATE_CON(15), 15, GFLAGS), -+ GATE(CLK_BUSTIMER1, "clk_bustimer1", "clk_bus_timer_root", 0, -+ RK3588_CLKGATE_CON(16), 0, GFLAGS), -+ GATE(CLK_BUSTIMER2, "clk_bustimer2", "clk_bus_timer_root", 0, -+ RK3588_CLKGATE_CON(16), 1, GFLAGS), -+ GATE(CLK_BUSTIMER3, "clk_bustimer3", "clk_bus_timer_root", 0, -+ RK3588_CLKGATE_CON(16), 2, GFLAGS), -+ GATE(CLK_BUSTIMER4, "clk_bustimer4", "clk_bus_timer_root", 0, -+ RK3588_CLKGATE_CON(16), 3, GFLAGS), -+ GATE(CLK_BUSTIMER5, "clk_bustimer5", "clk_bus_timer_root", 0, -+ RK3588_CLKGATE_CON(16), 4, GFLAGS), -+ GATE(CLK_BUSTIMER6, "clk_bustimer6", "clk_bus_timer_root", 0, -+ RK3588_CLKGATE_CON(16), 5, GFLAGS), -+ GATE(CLK_BUSTIMER7, "clk_bustimer7", "clk_bus_timer_root", 0, -+ RK3588_CLKGATE_CON(16), 6, GFLAGS), -+ GATE(CLK_BUSTIMER8, "clk_bustimer8", "clk_bus_timer_root", 0, -+ RK3588_CLKGATE_CON(16), 7, GFLAGS), -+ GATE(CLK_BUSTIMER9, "clk_bustimer9", "clk_bus_timer_root", 0, -+ RK3588_CLKGATE_CON(16), 8, GFLAGS), -+ GATE(CLK_BUSTIMER10, "clk_bustimer10", "clk_bus_timer_root", 0, -+ RK3588_CLKGATE_CON(16), 9, GFLAGS), -+ GATE(CLK_BUSTIMER11, "clk_bustimer11", "clk_bus_timer_root", 0, -+ RK3588_CLKGATE_CON(16), 10, GFLAGS), ++ /* PD_GMAC */ ++ COMPOSITE(ACLK_GMAC, "aclk_gmac", mux_2plls_hdmiphy_p, 0, ++ RK3328_CLKSEL_CON(25), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3328_CLKGATE_CON(3), 2, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_GMAC, "pclk_gmac", "aclk_gmac", 0, ++ RK3328_CLKSEL_CON(25), 8, 3, DFLAGS, ++ RK3328_CLKGATE_CON(9), 0, GFLAGS), + -+ GATE(PCLK_WDT0, "pclk_wdt0", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(15), 0, GFLAGS), -+ GATE(TCLK_WDT0, "tclk_wdt0", "xin24m", 0, -+ RK3588_CLKGATE_CON(15), 1, GFLAGS), ++ COMPOSITE(SCLK_MAC2IO_SRC, "clk_mac2io_src", mux_2plls_p, 0, ++ RK3328_CLKSEL_CON(27), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3328_CLKGATE_CON(3), 1, GFLAGS), ++ GATE(SCLK_MAC2IO_REF, "clk_mac2io_ref", "clk_mac2io", 0, ++ RK3328_CLKGATE_CON(9), 7, GFLAGS), ++ GATE(SCLK_MAC2IO_RX, "clk_mac2io_rx", "clk_mac2io", 0, ++ RK3328_CLKGATE_CON(9), 4, GFLAGS), ++ GATE(SCLK_MAC2IO_TX, "clk_mac2io_tx", "clk_mac2io", 0, ++ RK3328_CLKGATE_CON(9), 5, GFLAGS), ++ GATE(SCLK_MAC2IO_REFOUT, "clk_mac2io_refout", "clk_mac2io", 0, ++ RK3328_CLKGATE_CON(9), 6, GFLAGS), ++ COMPOSITE(SCLK_MAC2IO_OUT, "clk_mac2io_out", mux_2plls_p, 0, ++ RK3328_CLKSEL_CON(27), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK3328_CLKGATE_CON(3), 5, GFLAGS), ++ MUXGRF(SCLK_MAC2IO, "clk_mac2io", mux_mac2io_src_p, CLK_SET_RATE_NO_REPARENT, ++ RK3328_GRF_MAC_CON1, 10, 1, MFLAGS), ++ MUXGRF(SCLK_MAC2IO_EXT, "clk_mac2io_ext", mux_mac2io_ext_p, CLK_SET_RATE_NO_REPARENT, ++ RK3328_GRF_SOC_CON4, 14, 1, MFLAGS), + -+ GATE(PCLK_CAN0, "pclk_can0", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(11), 8, GFLAGS), -+ COMPOSITE(CLK_CAN0, "clk_can0", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(39), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(11), 9, GFLAGS), -+ GATE(PCLK_CAN1, "pclk_can1", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(11), 10, GFLAGS), -+ COMPOSITE(CLK_CAN1, "clk_can1", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(39), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RK3588_CLKGATE_CON(11), 11, GFLAGS), -+ GATE(PCLK_CAN2, "pclk_can2", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(11), 12, GFLAGS), -+ COMPOSITE(CLK_CAN2, "clk_can2", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(40), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(11), 13, GFLAGS), ++ COMPOSITE(SCLK_MAC2PHY_SRC, "clk_mac2phy_src", mux_2plls_p, 0, ++ RK3328_CLKSEL_CON(26), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3328_CLKGATE_CON(3), 0, GFLAGS), ++ GATE(SCLK_MAC2PHY_REF, "clk_mac2phy_ref", "clk_mac2phy", 0, ++ RK3328_CLKGATE_CON(9), 3, GFLAGS), ++ GATE(SCLK_MAC2PHY_RXTX, "clk_mac2phy_rxtx", "clk_mac2phy", 0, ++ RK3328_CLKGATE_CON(9), 1, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_MAC2PHY_OUT, "clk_mac2phy_out", "clk_mac2phy", 0, ++ RK3328_CLKSEL_CON(26), 8, 2, DFLAGS, ++ RK3328_CLKGATE_CON(9), 2, GFLAGS), ++ MUXGRF(SCLK_MAC2PHY, "clk_mac2phy", mux_mac2phy_src_p, CLK_SET_RATE_NO_REPARENT, ++ RK3328_GRF_MAC_CON2, 10, 1, MFLAGS), + -+ GATE(ACLK_DECOM, "aclk_decom", "aclk_bus_root", 0, -+ RK3588_CLKGATE_CON(17), 6, GFLAGS), -+ GATE(PCLK_DECOM, "pclk_decom", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(17), 7, GFLAGS), -+ COMPOSITE(DCLK_DECOM, "dclk_decom", gpll_spll_p, 0, -+ RK3588_CLKSEL_CON(62), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(17), 8, GFLAGS), -+ GATE(ACLK_DMAC0, "aclk_dmac0", "aclk_bus_root", 0, -+ RK3588_CLKGATE_CON(10), 5, GFLAGS), -+ GATE(ACLK_DMAC1, "aclk_dmac1", "aclk_bus_root", 0, -+ RK3588_CLKGATE_CON(10), 6, GFLAGS), -+ GATE(ACLK_DMAC2, "aclk_dmac2", "aclk_bus_root", 0, -+ RK3588_CLKGATE_CON(10), 7, GFLAGS), -+ GATE(ACLK_GIC, "aclk_gic", "aclk_bus_root", CLK_IS_CRITICAL, -+ RK3588_CLKGATE_CON(10), 3, GFLAGS), ++ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), + -+ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(16), 14, GFLAGS), -+ COMPOSITE(DBCLK_GPIO1, "dbclk_gpio1", mux_24m_32k_p, 0, -+ RK3588_CLKSEL_CON(60), 8, 1, MFLAGS, 3, 5, DFLAGS, -+ RK3588_CLKGATE_CON(16), 15, GFLAGS), -+ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(17), 0, GFLAGS), -+ COMPOSITE(DBCLK_GPIO2, "dbclk_gpio2", mux_24m_32k_p, 0, -+ RK3588_CLKSEL_CON(60), 14, 1, MFLAGS, 9, 5, DFLAGS, -+ RK3588_CLKGATE_CON(17), 1, GFLAGS), -+ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(17), 2, GFLAGS), -+ COMPOSITE(DBCLK_GPIO3, "dbclk_gpio3", mux_24m_32k_p, 0, -+ RK3588_CLKSEL_CON(61), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(17), 3, GFLAGS), -+ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(17), 4, GFLAGS), -+ COMPOSITE(DBCLK_GPIO4, "dbclk_gpio4", mux_24m_32k_p, 0, -+ RK3588_CLKSEL_CON(61), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RK3588_CLKGATE_CON(17), 5, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 9 ++ */ + -+ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(10), 8, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(10), 9, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(10), 10, GFLAGS), -+ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(10), 11, GFLAGS), -+ GATE(PCLK_I2C5, "pclk_i2c5", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(10), 12, GFLAGS), -+ GATE(PCLK_I2C6, "pclk_i2c6", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(10), 13, GFLAGS), -+ GATE(PCLK_I2C7, "pclk_i2c7", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(10), 14, GFLAGS), -+ GATE(PCLK_I2C8, "pclk_i2c8", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(10), 15, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C1, "clk_i2c1", mux_200m_100m_p, 0, -+ RK3588_CLKSEL_CON(38), 6, 1, MFLAGS, -+ RK3588_CLKGATE_CON(11), 0, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C2, "clk_i2c2", mux_200m_100m_p, 0, -+ RK3588_CLKSEL_CON(38), 7, 1, MFLAGS, -+ RK3588_CLKGATE_CON(11), 1, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C3, "clk_i2c3", mux_200m_100m_p, 0, -+ RK3588_CLKSEL_CON(38), 8, 1, MFLAGS, -+ RK3588_CLKGATE_CON(11), 2, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C4, "clk_i2c4", mux_200m_100m_p, 0, -+ RK3588_CLKSEL_CON(38), 9, 1, MFLAGS, -+ RK3588_CLKGATE_CON(11), 3, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C5, "clk_i2c5", mux_200m_100m_p, 0, -+ RK3588_CLKSEL_CON(38), 10, 1, MFLAGS, -+ RK3588_CLKGATE_CON(11), 4, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C6, "clk_i2c6", mux_200m_100m_p, 0, -+ RK3588_CLKSEL_CON(38), 11, 1, MFLAGS, -+ RK3588_CLKGATE_CON(11), 5, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C7, "clk_i2c7", mux_200m_100m_p, 0, -+ RK3588_CLKSEL_CON(38), 12, 1, MFLAGS, -+ RK3588_CLKGATE_CON(11), 6, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C8, "clk_i2c8", mux_200m_100m_p, 0, -+ RK3588_CLKSEL_CON(38), 13, 1, MFLAGS, -+ RK3588_CLKGATE_CON(11), 7, GFLAGS), ++ /* PD_VOP */ ++ GATE(ACLK_RGA, "aclk_rga", "aclk_rga_pre", 0, RK3328_CLKGATE_CON(21), 10, GFLAGS), ++ GATE(0, "aclk_rga_niu", "aclk_rga_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(22), 3, GFLAGS), ++ GATE(ACLK_VOP, "aclk_vop", "aclk_vop_pre", 0, RK3328_CLKGATE_CON(21), 2, GFLAGS), ++ GATE(0, "aclk_vop_niu", "aclk_vop_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(21), 4, GFLAGS), + -+ GATE(PCLK_OTPC_NS, "pclk_otpc_ns", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(18), 9, GFLAGS), -+ GATE(CLK_OTPC_NS, "clk_otpc_ns", "xin24m", 0, -+ RK3588_CLKGATE_CON(18), 10, GFLAGS), -+ GATE(CLK_OTPC_ARB, "clk_otpc_arb", "xin24m", 0, -+ RK3588_CLKGATE_CON(18), 11, GFLAGS), -+ GATE(CLK_OTP_PHY_G, "clk_otp_phy_g", "xin24m", 0, -+ RK3588_CLKGATE_CON(18), 13, GFLAGS), -+ GATE(CLK_OTPC_AUTO_RD_G, "clk_otpc_auto_rd_g", "xin24m", 0, -+ RK3588_CLKGATE_CON(18), 12, GFLAGS), ++ GATE(ACLK_IEP, "aclk_iep", "aclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 6, GFLAGS), ++ GATE(ACLK_CIF, "aclk_cif", "aclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 8, GFLAGS), ++ GATE(ACLK_HDCP, "aclk_hdcp", "aclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 15, GFLAGS), ++ GATE(0, "aclk_vio_niu", "aclk_vio_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(22), 2, GFLAGS), + -+ GATE(PCLK_SARADC, "pclk_saradc", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(11), 14, GFLAGS), -+ COMPOSITE(CLK_SARADC, "clk_saradc", gpll_24m_p, 0, -+ RK3588_CLKSEL_CON(40), 14, 1, MFLAGS, 6, 8, DFLAGS, -+ RK3588_CLKGATE_CON(11), 15, GFLAGS), ++ GATE(HCLK_VOP, "hclk_vop", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 3, GFLAGS), ++ GATE(0, "hclk_vop_niu", "hclk_vio_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(21), 5, GFLAGS), ++ GATE(HCLK_IEP, "hclk_iep", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 7, GFLAGS), ++ GATE(HCLK_CIF, "hclk_cif", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 9, GFLAGS), ++ GATE(HCLK_RGA, "hclk_rga", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 11, GFLAGS), ++ GATE(0, "hclk_ahb1tom", "hclk_vio_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(21), 12, GFLAGS), ++ GATE(0, "pclk_vio_h2p", "hclk_vio_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(21), 13, GFLAGS), ++ GATE(0, "hclk_vio_h2p", "hclk_vio_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(21), 14, GFLAGS), ++ GATE(HCLK_HDCP, "hclk_hdcp", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(22), 0, GFLAGS), ++ GATE(0, "hclk_vio_niu", "hclk_vio_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(22), 1, GFLAGS), ++ GATE(PCLK_HDMI, "pclk_hdmi", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(22), 4, GFLAGS), ++ GATE(PCLK_HDCP, "pclk_hdcp", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(22), 5, GFLAGS), + -+ GATE(PCLK_SPI0, "pclk_spi0", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(14), 6, GFLAGS), -+ GATE(PCLK_SPI1, "pclk_spi1", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(14), 7, GFLAGS), -+ GATE(PCLK_SPI2, "pclk_spi2", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(14), 8, GFLAGS), -+ GATE(PCLK_SPI3, "pclk_spi3", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(14), 9, GFLAGS), -+ GATE(PCLK_SPI4, "pclk_spi4", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(14), 10, GFLAGS), -+ COMPOSITE_NODIV(CLK_SPI0, "clk_spi0", mux_200m_150m_24m_p, 0, -+ RK3588_CLKSEL_CON(59), 2, 2, MFLAGS, -+ RK3588_CLKGATE_CON(14), 11, GFLAGS), -+ COMPOSITE_NODIV(CLK_SPI1, "clk_spi1", mux_200m_150m_24m_p, 0, -+ RK3588_CLKSEL_CON(59), 4, 2, MFLAGS, -+ RK3588_CLKGATE_CON(14), 12, GFLAGS), -+ COMPOSITE_NODIV(CLK_SPI2, "clk_spi2", mux_200m_150m_24m_p, 0, -+ RK3588_CLKSEL_CON(59), 6, 2, MFLAGS, -+ RK3588_CLKGATE_CON(14), 13, GFLAGS), -+ COMPOSITE_NODIV(CLK_SPI3, "clk_spi3", mux_200m_150m_24m_p, 0, -+ RK3588_CLKSEL_CON(59), 8, 2, MFLAGS, -+ RK3588_CLKGATE_CON(14), 14, GFLAGS), -+ COMPOSITE_NODIV(CLK_SPI4, "clk_spi4", mux_200m_150m_24m_p, 0, -+ RK3588_CLKSEL_CON(59), 10, 2, MFLAGS, -+ RK3588_CLKGATE_CON(14), 15, GFLAGS), ++ /* PD_PERI */ ++ GATE(0, "aclk_peri_noc", "aclk_peri", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(19), 11, GFLAGS), ++ GATE(ACLK_USB3OTG, "aclk_usb3otg", "aclk_peri", 0, RK3328_CLKGATE_CON(19), 14, GFLAGS), + -+ GATE(ACLK_SPINLOCK, "aclk_spinlock", "aclk_bus_root", CLK_IGNORE_UNUSED, -+ RK3588_CLKGATE_CON(18), 6, GFLAGS), -+ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(12), 0, GFLAGS), -+ COMPOSITE(CLK_TSADC, "clk_tsadc", gpll_24m_p, 0, -+ RK3588_CLKSEL_CON(41), 8, 1, MFLAGS, 0, 8, DFLAGS, -+ RK3588_CLKGATE_CON(12), 1, GFLAGS), ++ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 0, GFLAGS), ++ GATE(HCLK_SDIO, "hclk_sdio", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 1, GFLAGS), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 2, GFLAGS), ++ GATE(HCLK_SDMMC_EXT, "hclk_sdmmc_ext", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 15, GFLAGS), ++ GATE(HCLK_HOST0, "hclk_host0", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 6, GFLAGS), ++ GATE(HCLK_HOST0_ARB, "hclk_host0_arb", "hclk_peri", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(19), 7, GFLAGS), ++ GATE(HCLK_OTG, "hclk_otg", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 8, GFLAGS), ++ GATE(HCLK_OTG_PMU, "hclk_otg_pmu", "hclk_peri", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(19), 9, GFLAGS), ++ GATE(0, "hclk_peri_niu", "hclk_peri", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(19), 12, GFLAGS), ++ GATE(0, "pclk_peri_niu", "hclk_peri", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(19), 13, GFLAGS), + -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(12), 2, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(12), 3, GFLAGS), -+ GATE(PCLK_UART3, "pclk_uart3", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(12), 4, GFLAGS), -+ GATE(PCLK_UART4, "pclk_uart4", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(12), 5, GFLAGS), -+ GATE(PCLK_UART5, "pclk_uart5", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(12), 6, GFLAGS), -+ GATE(PCLK_UART6, "pclk_uart6", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(12), 7, GFLAGS), -+ GATE(PCLK_UART7, "pclk_uart7", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(12), 8, GFLAGS), -+ GATE(PCLK_UART8, "pclk_uart8", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(12), 9, GFLAGS), -+ GATE(PCLK_UART9, "pclk_uart9", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(12), 10, GFLAGS), ++ /* PD_GMAC */ ++ GATE(ACLK_MAC2PHY, "aclk_mac2phy", "aclk_gmac", 0, RK3328_CLKGATE_CON(26), 0, GFLAGS), ++ GATE(ACLK_MAC2IO, "aclk_mac2io", "aclk_gmac", 0, RK3328_CLKGATE_CON(26), 2, GFLAGS), ++ GATE(0, "aclk_gmac_niu", "aclk_gmac", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(26), 4, GFLAGS), ++ GATE(PCLK_MAC2PHY, "pclk_mac2phy", "pclk_gmac", 0, RK3328_CLKGATE_CON(26), 1, GFLAGS), ++ GATE(PCLK_MAC2IO, "pclk_mac2io", "pclk_gmac", 0, RK3328_CLKGATE_CON(26), 3, GFLAGS), ++ GATE(0, "pclk_gmac_niu", "pclk_gmac", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(26), 5, GFLAGS), + -+ COMPOSITE(CLK_UART1_SRC, "clk_uart1_src", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(41), 14, 1, MFLAGS, 9, 5, DFLAGS, -+ RK3588_CLKGATE_CON(12), 11, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART1_FRAC, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(42), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3588_CLKGATE_CON(12), 12, GFLAGS, -+ &rk3588_uart1_fracmux), -+ GATE(SCLK_UART1, "sclk_uart1", "clk_uart1", 0, -+ RK3588_CLKGATE_CON(12), 13, GFLAGS), -+ COMPOSITE(CLK_UART2_SRC, "clk_uart2_src", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(43), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(12), 14, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART2_FRAC, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(44), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3588_CLKGATE_CON(12), 15, GFLAGS, -+ &rk3588_uart2_fracmux), -+ GATE(SCLK_UART2, "sclk_uart2", "clk_uart2", 0, -+ RK3588_CLKGATE_CON(13), 0, GFLAGS), -+ COMPOSITE(CLK_UART3_SRC, "clk_uart3_src", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(45), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(13), 1, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART3_FRAC, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(46), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3588_CLKGATE_CON(13), 2, GFLAGS, -+ &rk3588_uart3_fracmux), -+ GATE(SCLK_UART3, "sclk_uart3", "clk_uart3", 0, -+ RK3588_CLKGATE_CON(13), 3, GFLAGS), -+ COMPOSITE(CLK_UART4_SRC, "clk_uart4_src", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(47), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(13), 4, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART4_FRAC, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(48), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3588_CLKGATE_CON(13), 5, GFLAGS, -+ &rk3588_uart4_fracmux), -+ GATE(SCLK_UART4, "sclk_uart4", "clk_uart4", 0, -+ RK3588_CLKGATE_CON(13), 6, GFLAGS), -+ COMPOSITE(CLK_UART5_SRC, "clk_uart5_src", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(49), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(13), 7, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART5_FRAC, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(50), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3588_CLKGATE_CON(13), 8, GFLAGS, -+ &rk3588_uart5_fracmux), -+ GATE(SCLK_UART5, "sclk_uart5", "clk_uart5", 0, -+ RK3588_CLKGATE_CON(13), 9, GFLAGS), -+ COMPOSITE(CLK_UART6_SRC, "clk_uart6_src", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(51), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(13), 10, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART6_FRAC, "clk_uart6_frac", "clk_uart6_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(52), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3588_CLKGATE_CON(13), 11, GFLAGS, -+ &rk3588_uart6_fracmux), -+ GATE(SCLK_UART6, "sclk_uart6", "clk_uart6", 0, -+ RK3588_CLKGATE_CON(13), 12, GFLAGS), -+ COMPOSITE(CLK_UART7_SRC, "clk_uart7_src", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(53), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(13), 13, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART7_FRAC, "clk_uart7_frac", "clk_uart7_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(54), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3588_CLKGATE_CON(13), 14, GFLAGS, -+ &rk3588_uart7_fracmux), -+ GATE(SCLK_UART7, "sclk_uart7", "clk_uart7", 0, -+ RK3588_CLKGATE_CON(13), 15, GFLAGS), -+ COMPOSITE(CLK_UART8_SRC, "clk_uart8_src", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(55), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(14), 0, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART8_FRAC, "clk_uart8_frac", "clk_uart8_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(56), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3588_CLKGATE_CON(14), 1, GFLAGS, -+ &rk3588_uart8_fracmux), -+ GATE(SCLK_UART8, "sclk_uart8", "clk_uart8", 0, -+ RK3588_CLKGATE_CON(14), 2, GFLAGS), -+ COMPOSITE(CLK_UART9_SRC, "clk_uart9_src", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(57), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(14), 3, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART9_FRAC, "clk_uart9_frac", "clk_uart9_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(58), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3588_CLKGATE_CON(14), 4, GFLAGS, -+ &rk3588_uart9_fracmux), -+ GATE(SCLK_UART9, "sclk_uart9", "clk_uart9", 0, -+ RK3588_CLKGATE_CON(14), 5, GFLAGS), ++ /* PD_BUS */ ++ GATE(0, "aclk_bus_niu", "aclk_bus_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(15), 12, GFLAGS), ++ GATE(ACLK_DCF, "aclk_dcf", "aclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 11, GFLAGS), ++ GATE(ACLK_TSP, "aclk_tsp", "aclk_bus_pre", 0, RK3328_CLKGATE_CON(17), 12, GFLAGS), ++ GATE(0, "aclk_intmem", "aclk_bus_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(15), 0, GFLAGS), ++ GATE(ACLK_DMAC, "aclk_dmac_bus", "aclk_bus_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(15), 1, GFLAGS), + -+ /* center */ -+ COMPOSITE_NODIV(ACLK_CENTER_ROOT, "aclk_center_root", mux_700m_400m_200m_24m_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(165), 0, 2, MFLAGS, -+ RK3588_CLKGATE_CON(69), 0, GFLAGS), -+ COMPOSITE_NODIV(ACLK_CENTER_LOW_ROOT, "aclk_center_low_root", mux_500m_250m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(165), 2, 2, MFLAGS, -+ RK3588_CLKGATE_CON(69), 1, GFLAGS), -+ COMPOSITE_NODIV(HCLK_CENTER_ROOT, "hclk_center_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(165), 4, 2, MFLAGS, -+ RK3588_CLKGATE_CON(69), 2, GFLAGS), -+ COMPOSITE_NODIV(PCLK_CENTER_ROOT, "pclk_center_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(165), 6, 2, MFLAGS | CLK_MUX_READ_ONLY, -+ RK3588_CLKGATE_CON(69), 3, GFLAGS), -+ GATE(ACLK_DMA2DDR, "aclk_dma2ddr", "aclk_center_root", CLK_IS_CRITICAL, -+ RK3588_CLKGATE_CON(69), 5, GFLAGS), -+ GATE(ACLK_DDR_SHAREMEM, "aclk_ddr_sharemem", "aclk_center_low_root", CLK_IS_CRITICAL, -+ RK3588_CLKGATE_CON(69), 6, GFLAGS), -+ COMPOSITE_NODIV(ACLK_CENTER_S200_ROOT, "aclk_center_s200_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(165), 8, 2, MFLAGS, -+ RK3588_CLKGATE_CON(69), 8, GFLAGS), -+ COMPOSITE_NODIV(ACLK_CENTER_S400_ROOT, "aclk_center_s400_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(165), 10, 2, MFLAGS, -+ RK3588_CLKGATE_CON(69), 9, GFLAGS), -+ GATE(FCLK_DDR_CM0_CORE, "fclk_ddr_cm0_core", "hclk_center_root", CLK_IS_CRITICAL, -+ RK3588_CLKGATE_CON(69), 14, GFLAGS), -+ COMPOSITE_NODIV(CLK_DDR_TIMER_ROOT, "clk_ddr_timer_root", mux_24m_100m_p, CLK_IGNORE_UNUSED, -+ RK3588_CLKSEL_CON(165), 12, 1, MFLAGS, -+ RK3588_CLKGATE_CON(69), 15, GFLAGS), -+ GATE(CLK_DDR_TIMER0, "clk_ddr_timer0", "clk_ddr_timer_root", 0, -+ RK3588_CLKGATE_CON(70), 0, GFLAGS), -+ GATE(CLK_DDR_TIMER1, "clk_ddr_timer1", "clk_ddr_timer_root", 0, -+ RK3588_CLKGATE_CON(70), 1, GFLAGS), -+ GATE(TCLK_WDT_DDR, "tclk_wdt_ddr", "xin24m", 0, -+ RK3588_CLKGATE_CON(70), 2, GFLAGS), -+ COMPOSITE(CLK_DDR_CM0_RTC, "clk_ddr_cm0_rtc", mux_24m_32k_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(166), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(70), 4, GFLAGS), -+ GATE(PCLK_WDT, "pclk_wdt", "pclk_center_root", 0, -+ RK3588_CLKGATE_CON(70), 7, GFLAGS), -+ GATE(PCLK_TIMER, "pclk_timer", "pclk_center_root", 0, -+ RK3588_CLKGATE_CON(70), 8, GFLAGS), -+ GATE(PCLK_DMA2DDR, "pclk_dma2ddr", "pclk_center_root", CLK_IS_CRITICAL, -+ RK3588_CLKGATE_CON(70), 9, GFLAGS), -+ GATE(PCLK_SHAREMEM, "pclk_sharemem", "pclk_center_root", CLK_IS_CRITICAL, -+ RK3588_CLKGATE_CON(70), 10, GFLAGS), ++ GATE(0, "hclk_rom", "hclk_bus_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(15), 2, GFLAGS), ++ GATE(HCLK_I2S0_8CH, "hclk_i2s0_8ch", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 3, GFLAGS), ++ GATE(HCLK_I2S1_8CH, "hclk_i2s1_8ch", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 4, GFLAGS), ++ GATE(HCLK_I2S2_2CH, "hclk_i2s2_2ch", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 5, GFLAGS), ++ GATE(HCLK_SPDIF_8CH, "hclk_spdif_8ch", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 6, GFLAGS), ++ GATE(HCLK_TSP, "hclk_tsp", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(17), 11, GFLAGS), ++ GATE(HCLK_CRYPTO_MST, "hclk_crypto_mst", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 7, GFLAGS), ++ GATE(HCLK_CRYPTO_SLV, "hclk_crypto_slv", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 8, GFLAGS), ++ GATE(0, "hclk_bus_niu", "hclk_bus_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(15), 13, GFLAGS), ++ GATE(HCLK_PDM, "hclk_pdm", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(28), 0, GFLAGS), + -+ /* gpu */ -+ COMPOSITE(CLK_GPU_SRC, "clk_gpu_src", gpll_cpll_aupll_npll_spll_p, 0, -+ RK3588_CLKSEL_CON(158), 5, 3, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(66), 1, GFLAGS), -+ GATE(CLK_GPU, "clk_gpu", "clk_gpu_src", 0, -+ RK3588_CLKGATE_CON(66), 4, GFLAGS), -+ GATE(CLK_GPU_COREGROUP, "clk_gpu_coregroup", "clk_gpu_src", 0, -+ RK3588_CLKGATE_CON(66), 6, GFLAGS), -+ COMPOSITE_NOMUX(CLK_GPU_STACKS, "clk_gpu_stacks", "clk_gpu_src", 0, -+ RK3588_CLKSEL_CON(159), 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(66), 7, GFLAGS), -+ GATE(CLK_GPU_PVTM, "clk_gpu_pvtm", "xin24m", 0, -+ RK3588_CLKGATE_CON(67), 0, GFLAGS), -+ GATE(CLK_CORE_GPU_PVTM, "clk_core_gpu_pvtm", "clk_gpu_src", 0, -+ RK3588_CLKGATE_CON(67), 1, GFLAGS), ++ GATE(0, "pclk_bus_niu", "pclk_bus", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(15), 14, GFLAGS), ++ GATE(0, "pclk_efuse", "pclk_bus", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(15), 9, GFLAGS), ++ GATE(0, "pclk_otp", "pclk_bus", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(28), 4, GFLAGS), ++ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_bus", 0, RK3328_CLKGATE_CON(15), 10, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 0, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 1, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 2, GFLAGS), ++ GATE(PCLK_TIMER, "pclk_timer0", "pclk_bus", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(16), 3, GFLAGS), ++ GATE(0, "pclk_stimer", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 4, GFLAGS), ++ GATE(PCLK_SPI, "pclk_spi", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 5, GFLAGS), ++ GATE(PCLK_PWM, "pclk_rk_pwm", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 6, GFLAGS), ++ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 7, GFLAGS), ++ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 8, GFLAGS), ++ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 9, GFLAGS), ++ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 10, GFLAGS), ++ GATE(PCLK_UART0, "pclk_uart0", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 11, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 12, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 13, GFLAGS), ++ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 14, GFLAGS), ++ GATE(PCLK_DCF, "pclk_dcf", "pclk_bus", 0, RK3328_CLKGATE_CON(16), 15, GFLAGS), ++ GATE(PCLK_GRF, "pclk_grf", "pclk_bus", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(17), 0, GFLAGS), ++ GATE(0, "pclk_cru", "pclk_bus", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(17), 4, GFLAGS), ++ GATE(0, "pclk_sgrf", "pclk_bus", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(17), 6, GFLAGS), ++ GATE(0, "pclk_sim", "pclk_bus", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 10, GFLAGS), ++ GATE(PCLK_SARADC, "pclk_saradc", "pclk_bus", 0, RK3328_CLKGATE_CON(17), 15, GFLAGS), ++ GATE(0, "pclk_pmu", "pclk_bus", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(28), 3, GFLAGS), + -+ /* isp1 */ -+ COMPOSITE(ACLK_ISP1_ROOT, "aclk_isp1_root", gpll_cpll_aupll_spll_p, 0, -+ RK3588_CLKSEL_CON(67), 5, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(26), 0, GFLAGS), -+ COMPOSITE_NODIV(HCLK_ISP1_ROOT, "hclk_isp1_root", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(67), 7, 2, MFLAGS, -+ RK3588_CLKGATE_CON(26), 1, GFLAGS), -+ COMPOSITE(CLK_ISP1_CORE, "clk_isp1_core", gpll_cpll_aupll_spll_p, 0, -+ RK3588_CLKSEL_CON(67), 14, 2, MFLAGS, 9, 5, DFLAGS, -+ RK3588_CLKGATE_CON(26), 2, GFLAGS), -+ GATE(CLK_ISP1_CORE_MARVIN, "clk_isp1_core_marvin", "clk_isp1_core", 0, -+ RK3588_CLKGATE_CON(26), 3, GFLAGS), -+ GATE(CLK_ISP1_CORE_VICAP, "clk_isp1_core_vicap", "clk_isp1_core", 0, -+ RK3588_CLKGATE_CON(26), 4, GFLAGS), ++ /* Watchdog pclk is controlled from the secure GRF */ ++ SGRF_GATE(PCLK_WDT, "pclk_wdt", "pclk_bus"), + -+ /* npu */ -+ COMPOSITE_NODIV(HCLK_NPU_ROOT, "hclk_npu_root", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(73), 0, 2, MFLAGS, -+ RK3588_CLKGATE_CON(29), 0, GFLAGS), -+ COMPOSITE(CLK_NPU_DSU0, "clk_npu_dsu0", gpll_cpll_aupll_npll_spll_p, 0, -+ RK3588_CLKSEL_CON(73), 7, 3, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(29), 1, GFLAGS), -+ COMPOSITE_NODIV(PCLK_NPU_ROOT, "pclk_npu_root", mux_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(74), 1, 2, MFLAGS, -+ RK3588_CLKGATE_CON(29), 4, GFLAGS), -+ GATE(ACLK_NPU1, "aclk_npu1", "clk_npu_dsu0", 0, -+ RK3588_CLKGATE_CON(27), 0, GFLAGS), -+ GATE(HCLK_NPU1, "hclk_npu1", "hclk_npu_root", 0, -+ RK3588_CLKGATE_CON(27), 2, GFLAGS), -+ GATE(ACLK_NPU2, "aclk_npu2", "clk_npu_dsu0", 0, -+ RK3588_CLKGATE_CON(28), 0, GFLAGS), -+ GATE(HCLK_NPU2, "hclk_npu2", "hclk_npu_root", 0, -+ RK3588_CLKGATE_CON(28), 2, GFLAGS), -+ COMPOSITE_NODIV(HCLK_NPU_CM0_ROOT, "hclk_npu_cm0_root", mux_400m_200m_100m_24m_p, 0, -+ RK3588_CLKSEL_CON(74), 5, 2, MFLAGS, -+ RK3588_CLKGATE_CON(30), 1, GFLAGS), -+ GATE(FCLK_NPU_CM0_CORE, "fclk_npu_cm0_core", "hclk_npu_cm0_root", 0, -+ RK3588_CLKGATE_CON(30), 3, GFLAGS), -+ COMPOSITE(CLK_NPU_CM0_RTC, "clk_npu_cm0_rtc", mux_24m_32k_p, 0, -+ RK3588_CLKSEL_CON(74), 12, 1, MFLAGS, 7, 5, DFLAGS, -+ RK3588_CLKGATE_CON(30), 5, GFLAGS), -+ GATE(PCLK_NPU_PVTM, "pclk_npu_pvtm", "pclk_npu_root", 0, -+ RK3588_CLKGATE_CON(29), 12, GFLAGS), -+ GATE(PCLK_NPU_GRF, "pclk_npu_grf", "pclk_npu_root", CLK_IGNORE_UNUSED, -+ RK3588_CLKGATE_CON(29), 13, GFLAGS), -+ GATE(CLK_NPU_PVTM, "clk_npu_pvtm", "xin24m", 0, -+ RK3588_CLKGATE_CON(29), 14, GFLAGS), -+ GATE(CLK_CORE_NPU_PVTM, "clk_core_npu_pvtm", "clk_npu_dsu0", 0, -+ RK3588_CLKGATE_CON(29), 15, GFLAGS), -+ GATE(ACLK_NPU0, "aclk_npu0", "clk_npu_dsu0", 0, -+ RK3588_CLKGATE_CON(30), 6, GFLAGS), -+ GATE(HCLK_NPU0, "hclk_npu0", "hclk_npu_root", 0, -+ RK3588_CLKGATE_CON(30), 8, GFLAGS), -+ GATE(PCLK_NPU_TIMER, "pclk_npu_timer", "pclk_npu_root", 0, -+ RK3588_CLKGATE_CON(29), 6, GFLAGS), -+ COMPOSITE_NODIV(CLK_NPUTIMER_ROOT, "clk_nputimer_root", mux_24m_100m_p, 0, -+ RK3588_CLKSEL_CON(74), 3, 1, MFLAGS, -+ RK3588_CLKGATE_CON(29), 7, GFLAGS), -+ GATE(CLK_NPUTIMER0, "clk_nputimer0", "clk_nputimer_root", 0, -+ RK3588_CLKGATE_CON(29), 8, GFLAGS), -+ GATE(CLK_NPUTIMER1, "clk_nputimer1", "clk_nputimer_root", 0, -+ RK3588_CLKGATE_CON(29), 9, GFLAGS), -+ GATE(PCLK_NPU_WDT, "pclk_npu_wdt", "pclk_npu_root", 0, -+ RK3588_CLKGATE_CON(29), 10, GFLAGS), -+ GATE(TCLK_NPU_WDT, "tclk_npu_wdt", "xin24m", 0, -+ RK3588_CLKGATE_CON(29), 11, GFLAGS), ++ GATE(PCLK_USB3PHY_OTG, "pclk_usb3phy_otg", "pclk_phy_pre", 0, RK3328_CLKGATE_CON(28), 1, GFLAGS), ++ GATE(PCLK_USB3PHY_PIPE, "pclk_usb3phy_pipe", "pclk_phy_pre", 0, RK3328_CLKGATE_CON(28), 2, GFLAGS), ++ GATE(PCLK_USB3_GRF, "pclk_usb3_grf", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 2, GFLAGS), ++ GATE(PCLK_USB2_GRF, "pclk_usb2_grf", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 14, GFLAGS), ++ GATE(0, "pclk_ddrphy", "pclk_phy_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(17), 13, GFLAGS), ++ GATE(PCLK_ACODECPHY, "pclk_acodecphy", "pclk_phy_pre", 0, RK3328_CLKGATE_CON(17), 5, GFLAGS), ++ GATE(PCLK_HDMIPHY, "pclk_hdmiphy", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 7, GFLAGS), ++ GATE(0, "pclk_vdacphy", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 8, GFLAGS), ++ GATE(0, "pclk_phy_niu", "pclk_phy_pre", CLK_IS_CRITICAL, RK3328_CLKGATE_CON(15), 15, GFLAGS), + -+ /* nvm */ -+ COMPOSITE_NODIV(HCLK_NVM_ROOT, "hclk_nvm_root", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(77), 0, 2, MFLAGS, -+ RK3588_CLKGATE_CON(31), 0, GFLAGS), -+ COMPOSITE(ACLK_NVM_ROOT, "aclk_nvm_root", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(77), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(31), 1, GFLAGS), -+ GATE(ACLK_EMMC, "aclk_emmc", "aclk_nvm_root", 0, -+ RK3588_CLKGATE_CON(31), 5, GFLAGS), -+ COMPOSITE(CCLK_EMMC, "cclk_emmc", gpll_cpll_24m_p, 0, -+ RK3588_CLKSEL_CON(77), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3588_CLKGATE_CON(31), 6, GFLAGS), -+ COMPOSITE(BCLK_EMMC, "bclk_emmc", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(78), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(31), 7, GFLAGS), -+ GATE(TMCLK_EMMC, "tmclk_emmc", "xin24m", 0, -+ RK3588_CLKGATE_CON(31), 8, GFLAGS), ++ /* PD_MMC */ ++ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "clk_sdmmc", ++ RK3328_SDMMC_CON0, 1), ++ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "clk_sdmmc", ++ RK3328_SDMMC_CON1, 1), + -+ COMPOSITE(SCLK_SFC, "sclk_sfc", gpll_cpll_24m_p, 0, -+ RK3588_CLKSEL_CON(78), 12, 2, MFLAGS, 6, 6, DFLAGS, -+ RK3588_CLKGATE_CON(31), 9, GFLAGS), ++ MMC(SCLK_SDIO_DRV, "sdio_drv", "clk_sdio", ++ RK3328_SDIO_CON0, 1), ++ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "clk_sdio", ++ RK3328_SDIO_CON1, 1), + -+ /* php */ -+ COMPOSITE(CLK_GMAC0_PTP_REF, "clk_gmac0_ptp_ref", clk_gmac0_ptp_ref_p, 0, -+ RK3588_CLKSEL_CON(81), 6, 1, MFLAGS, 0, 6, DFLAGS, -+ RK3588_CLKGATE_CON(34), 10, GFLAGS), -+ COMPOSITE(CLK_GMAC1_PTP_REF, "clk_gmac1_ptp_ref", clk_gmac1_ptp_ref_p, 0, -+ RK3588_CLKSEL_CON(81), 13, 1, MFLAGS, 7, 6, DFLAGS, -+ RK3588_CLKGATE_CON(34), 11, GFLAGS), -+ COMPOSITE(CLK_GMAC_125M, "clk_gmac_125m", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(83), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK3588_CLKGATE_CON(35), 5, GFLAGS), -+ COMPOSITE(CLK_GMAC_50M, "clk_gmac_50m", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(84), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3588_CLKGATE_CON(35), 6, GFLAGS), ++ MMC(SCLK_EMMC_DRV, "emmc_drv", "clk_emmc", ++ RK3328_EMMC_CON0, 1), ++ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "clk_emmc", ++ RK3328_EMMC_CON1, 1), + -+ COMPOSITE(ACLK_PCIE_ROOT, "aclk_pcie_root", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(80), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(32), 6, GFLAGS), -+ COMPOSITE(ACLK_PHP_ROOT, "aclk_php_root", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(80), 13, 1, MFLAGS, 8, 5, DFLAGS, -+ RK3588_CLKGATE_CON(32), 7, GFLAGS), -+ COMPOSITE_NODIV(PCLK_PHP_ROOT, "pclk_php_root", mux_150m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(80), 0, 2, MFLAGS, -+ RK3588_CLKGATE_CON(32), 0, GFLAGS), -+ GATE(ACLK_PHP_GIC_ITS, "aclk_php_gic_its", "aclk_pcie_root", CLK_IS_CRITICAL, -+ RK3588_CLKGATE_CON(34), 6, GFLAGS), -+ GATE(ACLK_PCIE_BRIDGE, "aclk_pcie_bridge", "aclk_pcie_root", 0, -+ RK3588_CLKGATE_CON(32), 8, GFLAGS), -+ GATE(ACLK_MMU_PCIE, "aclk_mmu_pcie", "aclk_pcie_bridge", 0, -+ RK3588_CLKGATE_CON(34), 7, GFLAGS), -+ GATE(ACLK_MMU_PHP, "aclk_mmu_php", "aclk_php_root", 0, -+ RK3588_CLKGATE_CON(34), 8, GFLAGS), -+ GATE(ACLK_PCIE_4L_DBI, "aclk_pcie_4l_dbi", "aclk_php_root", 0, -+ RK3588_CLKGATE_CON(32), 13, GFLAGS), -+ GATE(ACLK_PCIE_2L_DBI, "aclk_pcie_2l_dbi", "aclk_php_root", 0, -+ RK3588_CLKGATE_CON(32), 14, GFLAGS), -+ GATE(ACLK_PCIE_1L0_DBI, "aclk_pcie_1l0_dbi", "aclk_php_root", 0, -+ RK3588_CLKGATE_CON(32), 15, GFLAGS), -+ GATE(ACLK_PCIE_1L1_DBI, "aclk_pcie_1l1_dbi", "aclk_php_root", 0, -+ RK3588_CLKGATE_CON(33), 0, GFLAGS), -+ GATE(ACLK_PCIE_1L2_DBI, "aclk_pcie_1l2_dbi", "aclk_php_root", 0, -+ RK3588_CLKGATE_CON(33), 1, GFLAGS), -+ GATE(ACLK_PCIE_4L_MSTR, "aclk_pcie_4l_mstr", "aclk_mmu_pcie", 0, -+ RK3588_CLKGATE_CON(33), 2, GFLAGS), -+ GATE(ACLK_PCIE_2L_MSTR, "aclk_pcie_2l_mstr", "aclk_mmu_pcie", 0, -+ RK3588_CLKGATE_CON(33), 3, GFLAGS), -+ GATE(ACLK_PCIE_1L0_MSTR, "aclk_pcie_1l0_mstr", "aclk_mmu_pcie", 0, -+ RK3588_CLKGATE_CON(33), 4, GFLAGS), -+ GATE(ACLK_PCIE_1L1_MSTR, "aclk_pcie_1l1_mstr", "aclk_mmu_pcie", 0, -+ RK3588_CLKGATE_CON(33), 5, GFLAGS), -+ GATE(ACLK_PCIE_1L2_MSTR, "aclk_pcie_1l2_mstr", "aclk_mmu_pcie", 0, -+ RK3588_CLKGATE_CON(33), 6, GFLAGS), -+ GATE(ACLK_PCIE_4L_SLV, "aclk_pcie_4l_slv", "aclk_php_root", 0, -+ RK3588_CLKGATE_CON(33), 7, GFLAGS), -+ GATE(ACLK_PCIE_2L_SLV, "aclk_pcie_2l_slv", "aclk_php_root", 0, -+ RK3588_CLKGATE_CON(33), 8, GFLAGS), -+ GATE(ACLK_PCIE_1L0_SLV, "aclk_pcie_1l0_slv", "aclk_php_root", 0, -+ RK3588_CLKGATE_CON(33), 9, GFLAGS), -+ GATE(ACLK_PCIE_1L1_SLV, "aclk_pcie_1l1_slv", "aclk_php_root", 0, -+ RK3588_CLKGATE_CON(33), 10, GFLAGS), -+ GATE(ACLK_PCIE_1L2_SLV, "aclk_pcie_1l2_slv", "aclk_php_root", 0, -+ RK3588_CLKGATE_CON(33), 11, GFLAGS), -+ GATE(PCLK_PCIE_4L, "pclk_pcie_4l", "pclk_php_root", 0, -+ RK3588_CLKGATE_CON(33), 12, GFLAGS), -+ GATE(PCLK_PCIE_2L, "pclk_pcie_2l", "pclk_php_root", 0, -+ RK3588_CLKGATE_CON(33), 13, GFLAGS), -+ GATE(PCLK_PCIE_1L0, "pclk_pcie_1l0", "pclk_php_root", 0, -+ RK3588_CLKGATE_CON(33), 14, GFLAGS), -+ GATE(PCLK_PCIE_1L1, "pclk_pcie_1l1", "pclk_php_root", 0, -+ RK3588_CLKGATE_CON(33), 15, GFLAGS), -+ GATE(PCLK_PCIE_1L2, "pclk_pcie_1l2", "pclk_php_root", 0, -+ RK3588_CLKGATE_CON(34), 0, GFLAGS), -+ GATE(CLK_PCIE_AUX0, "clk_pcie_aux0", "xin24m", 0, -+ RK3588_CLKGATE_CON(34), 1, GFLAGS), -+ GATE(CLK_PCIE_AUX1, "clk_pcie_aux1", "xin24m", 0, -+ RK3588_CLKGATE_CON(34), 2, GFLAGS), -+ GATE(CLK_PCIE_AUX2, "clk_pcie_aux2", "xin24m", 0, -+ RK3588_CLKGATE_CON(34), 3, GFLAGS), -+ GATE(CLK_PCIE_AUX3, "clk_pcie_aux3", "xin24m", 0, -+ RK3588_CLKGATE_CON(34), 4, GFLAGS), -+ GATE(CLK_PCIE_AUX4, "clk_pcie_aux4", "xin24m", 0, -+ RK3588_CLKGATE_CON(34), 5, GFLAGS), -+ GATE(CLK_PIPEPHY0_REF, "clk_pipephy0_ref", "xin24m", 0, -+ RK3588_CLKGATE_CON(37), 0, GFLAGS), -+ GATE(CLK_PIPEPHY1_REF, "clk_pipephy1_ref", "xin24m", 0, -+ RK3588_CLKGATE_CON(37), 1, GFLAGS), -+ GATE(CLK_PIPEPHY2_REF, "clk_pipephy2_ref", "xin24m", 0, -+ RK3588_CLKGATE_CON(37), 2, GFLAGS), -+ GATE(PCLK_GMAC0, "pclk_gmac0", "pclk_php_root", 0, -+ RK3588_CLKGATE_CON(32), 3, GFLAGS), -+ GATE(PCLK_GMAC1, "pclk_gmac1", "pclk_php_root", 0, -+ RK3588_CLKGATE_CON(32), 4, GFLAGS), -+ GATE(ACLK_GMAC0, "aclk_gmac0", "aclk_mmu_php", 0, -+ RK3588_CLKGATE_CON(32), 10, GFLAGS), -+ GATE(ACLK_GMAC1, "aclk_gmac1", "aclk_mmu_php", 0, -+ RK3588_CLKGATE_CON(32), 11, GFLAGS), -+ GATE(CLK_PMALIVE0, "clk_pmalive0", "xin24m", 0, -+ RK3588_CLKGATE_CON(37), 4, GFLAGS), -+ GATE(CLK_PMALIVE1, "clk_pmalive1", "xin24m", 0, -+ RK3588_CLKGATE_CON(37), 5, GFLAGS), -+ GATE(CLK_PMALIVE2, "clk_pmalive2", "xin24m", 0, -+ RK3588_CLKGATE_CON(37), 6, GFLAGS), -+ GATE(ACLK_SATA0, "aclk_sata0", "aclk_mmu_php", 0, -+ RK3588_CLKGATE_CON(37), 7, GFLAGS), -+ GATE(ACLK_SATA1, "aclk_sata1", "aclk_mmu_php", 0, -+ RK3588_CLKGATE_CON(37), 8, GFLAGS), -+ GATE(ACLK_SATA2, "aclk_sata2", "aclk_mmu_php", 0, -+ RK3588_CLKGATE_CON(37), 9, GFLAGS), -+ COMPOSITE(CLK_RXOOB0, "clk_rxoob0", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(82), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3588_CLKGATE_CON(37), 10, GFLAGS), -+ COMPOSITE(CLK_RXOOB1, "clk_rxoob1", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(82), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK3588_CLKGATE_CON(37), 11, GFLAGS), -+ COMPOSITE(CLK_RXOOB2, "clk_rxoob2", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(83), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3588_CLKGATE_CON(37), 12, GFLAGS), -+ GATE(ACLK_USB3OTG2, "aclk_usb3otg2", "aclk_mmu_php", 0, -+ RK3588_CLKGATE_CON(35), 7, GFLAGS), -+ GATE(SUSPEND_CLK_USB3OTG2, "suspend_clk_usb3otg2", "xin24m", 0, -+ RK3588_CLKGATE_CON(35), 8, GFLAGS), -+ GATE(REF_CLK_USB3OTG2, "ref_clk_usb3otg2", "xin24m", 0, -+ RK3588_CLKGATE_CON(35), 9, GFLAGS), -+ COMPOSITE(CLK_UTMI_OTG2, "clk_utmi_otg2", mux_150m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(84), 12, 2, MFLAGS, 8, 4, DFLAGS, -+ RK3588_CLKGATE_CON(35), 10, GFLAGS), -+ GATE(PCLK_PCIE_COMBO_PIPE_PHY0, "pclk_pcie_combo_pipe_phy0", "pclk_top_root", 0, -+ RK3588_PHP_CLKGATE_CON(0), 5, GFLAGS), -+ GATE(PCLK_PCIE_COMBO_PIPE_PHY1, "pclk_pcie_combo_pipe_phy1", "pclk_top_root", 0, -+ RK3588_PHP_CLKGATE_CON(0), 6, GFLAGS), -+ GATE(PCLK_PCIE_COMBO_PIPE_PHY2, "pclk_pcie_combo_pipe_phy2", "pclk_top_root", 0, -+ RK3588_PHP_CLKGATE_CON(0), 7, GFLAGS), -+ GATE(PCLK_PCIE_COMBO_PIPE_PHY, "pclk_pcie_combo_pipe_phy", "pclk_top_root", 0, -+ RK3588_PHP_CLKGATE_CON(0), 8, GFLAGS), ++ MMC(SCLK_SDMMC_EXT_DRV, "sdmmc_ext_drv", "clk_sdmmc_ext", ++ RK3328_SDMMC_EXT_CON0, 1), ++ MMC(SCLK_SDMMC_EXT_SAMPLE, "sdmmc_ext_sample", "clk_sdmmc_ext", ++ RK3328_SDMMC_EXT_CON1, 1), ++}; + -+ /* rga */ -+ COMPOSITE(CLK_RGA3_1_CORE, "clk_rga3_1_core", gpll_cpll_aupll_spll_p, 0, -+ RK3588_CLKSEL_CON(174), 14, 2, MFLAGS, 9, 5, DFLAGS, -+ RK3588_CLKGATE_CON(76), 6, GFLAGS), -+ COMPOSITE(ACLK_RGA3_ROOT, "aclk_rga3_root", gpll_cpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(174), 5, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(76), 0, GFLAGS), -+ COMPOSITE_NODIV(HCLK_RGA3_ROOT, "hclk_rga3_root", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(174), 7, 2, MFLAGS, -+ RK3588_CLKGATE_CON(76), 1, GFLAGS), -+ GATE(HCLK_RGA3_1, "hclk_rga3_1", "hclk_rga3_root", 0, -+ RK3588_CLKGATE_CON(76), 4, GFLAGS), -+ GATE(ACLK_RGA3_1, "aclk_rga3_1", "aclk_rga3_root", 0, -+ RK3588_CLKGATE_CON(76), 5, GFLAGS), ++static void __init rk3328_clk_init(struct device_node *np) ++{ ++ struct rockchip_clk_provider *ctx; ++ void __iomem *reg_base; ++ struct clk **clks; + -+ /* vdec */ -+ COMPOSITE_NODIV(0, "hclk_rkvdec0_root", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(89), 0, 2, MFLAGS, -+ RK3588_CLKGATE_CON(40), 0, GFLAGS), -+ COMPOSITE(0, "aclk_rkvdec0_root", gpll_cpll_aupll_spll_p, 0, -+ RK3588_CLKSEL_CON(89), 7, 2, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(40), 1, GFLAGS), -+ COMPOSITE(ACLK_RKVDEC_CCU, "aclk_rkvdec_ccu", gpll_cpll_aupll_spll_p, 0, -+ RK3588_CLKSEL_CON(89), 14, 2, MFLAGS, 9, 5, DFLAGS, -+ RK3588_CLKGATE_CON(40), 2, GFLAGS), -+ COMPOSITE(CLK_RKVDEC0_CA, "clk_rkvdec0_ca", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(90), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(40), 7, GFLAGS), -+ COMPOSITE(CLK_RKVDEC0_HEVC_CA, "clk_rkvdec0_hevc_ca", gpll_cpll_npll_1000m_p, 0, -+ RK3588_CLKSEL_CON(90), 11, 2, MFLAGS, 6, 5, DFLAGS, -+ RK3588_CLKGATE_CON(40), 8, GFLAGS), -+ COMPOSITE(CLK_RKVDEC0_CORE, "clk_rkvdec0_core", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(91), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(40), 9, GFLAGS), -+ COMPOSITE_NODIV(0, "hclk_rkvdec1_root", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(93), 0, 2, MFLAGS, -+ RK3588_CLKGATE_CON(41), 0, GFLAGS), -+ COMPOSITE(0, "aclk_rkvdec1_root", gpll_cpll_aupll_npll_p, 0, -+ RK3588_CLKSEL_CON(93), 7, 2, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(41), 1, GFLAGS), -+ COMPOSITE(CLK_RKVDEC1_CA, "clk_rkvdec1_ca", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(93), 14, 1, MFLAGS, 9, 5, DFLAGS, -+ RK3588_CLKGATE_CON(41), 6, GFLAGS), -+ COMPOSITE(CLK_RKVDEC1_HEVC_CA, "clk_rkvdec1_hevc_ca", gpll_cpll_npll_1000m_p, 0, -+ RK3588_CLKSEL_CON(94), 5, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(41), 7, GFLAGS), -+ COMPOSITE(CLK_RKVDEC1_CORE, "clk_rkvdec1_core", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(94), 12, 1, MFLAGS, 7, 5, DFLAGS, -+ RK3588_CLKGATE_CON(41), 8, GFLAGS), ++ reg_base = of_iomap(np, 0); ++ if (!reg_base) { ++ pr_err("%s: could not map cru region\n", __func__); ++ return; ++ } + -+ /* sdio */ -+ COMPOSITE_NODIV(0, "hclk_sdio_root", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(172), 0, 2, MFLAGS, -+ RK3588_CLKGATE_CON(75), 0, GFLAGS), -+ COMPOSITE(CCLK_SRC_SDIO, "cclk_src_sdio", gpll_cpll_24m_p, 0, -+ RK3588_CLKSEL_CON(172), 8, 2, MFLAGS, 2, 6, DFLAGS, -+ RK3588_CLKGATE_CON(75), 3, GFLAGS), -+ MMC(SCLK_SDIO_DRV, "sdio_drv", "cclk_src_sdio", RK3588_SDIO_CON0, 1), -+ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "cclk_src_sdio", RK3588_SDIO_CON1, 1), ++ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); ++ if (IS_ERR(ctx)) { ++ pr_err("%s: rockchip clk init failed\n", __func__); ++ iounmap(reg_base); ++ return; ++ } ++ clks = ctx->clk_data.clks; + -+ /* usb */ -+ COMPOSITE(ACLK_USB_ROOT, "aclk_usb_root", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(96), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(42), 0, GFLAGS), -+ COMPOSITE_NODIV(HCLK_USB_ROOT, "hclk_usb_root", mux_150m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(96), 6, 2, MFLAGS, -+ RK3588_CLKGATE_CON(42), 1, GFLAGS), -+ GATE(SUSPEND_CLK_USB3OTG0, "suspend_clk_usb3otg0", "xin24m", 0, -+ RK3588_CLKGATE_CON(42), 5, GFLAGS), -+ GATE(REF_CLK_USB3OTG0, "ref_clk_usb3otg0", "xin24m", 0, -+ RK3588_CLKGATE_CON(42), 6, GFLAGS), -+ GATE(SUSPEND_CLK_USB3OTG1, "suspend_clk_usb3otg1", "xin24m", 0, -+ RK3588_CLKGATE_CON(42), 8, GFLAGS), -+ GATE(REF_CLK_USB3OTG1, "ref_clk_usb3otg1", "xin24m", 0, -+ RK3588_CLKGATE_CON(42), 9, GFLAGS), ++ rockchip_clk_register_plls(ctx, rk3328_pll_clks, ++ ARRAY_SIZE(rk3328_pll_clks), ++ RK3328_GRF_SOC_STATUS0); ++ rockchip_clk_register_branches(ctx, rk3328_clk_branches, ++ ARRAY_SIZE(rk3328_clk_branches)); + -+ /* vdpu */ -+ COMPOSITE(ACLK_VDPU_ROOT, "aclk_vdpu_root", gpll_cpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(98), 5, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(44), 0, GFLAGS), -+ COMPOSITE_NODIV(ACLK_VDPU_LOW_ROOT, "aclk_vdpu_low_root", mux_400m_200m_100m_24m_p, 0, -+ RK3588_CLKSEL_CON(98), 7, 2, MFLAGS, -+ RK3588_CLKGATE_CON(44), 1, GFLAGS), -+ COMPOSITE_NODIV(HCLK_VDPU_ROOT, "hclk_vdpu_root", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(98), 9, 2, MFLAGS, -+ RK3588_CLKGATE_CON(44), 2, GFLAGS), -+ COMPOSITE(ACLK_JPEG_DECODER_ROOT, "aclk_jpeg_decoder_root", gpll_cpll_aupll_spll_p, 0, -+ RK3588_CLKSEL_CON(99), 5, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(44), 3, GFLAGS), -+ GATE(HCLK_IEP2P0, "hclk_iep2p0", "hclk_vdpu_root", 0, -+ RK3588_CLKGATE_CON(45), 4, GFLAGS), -+ COMPOSITE(CLK_IEP2P0_CORE, "clk_iep2p0_core", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(99), 12, 1, MFLAGS, 7, 5, DFLAGS, -+ RK3588_CLKGATE_CON(45), 6, GFLAGS), -+ GATE(HCLK_JPEG_ENCODER0, "hclk_jpeg_encoder0", "hclk_vdpu_root", 0, -+ RK3588_CLKGATE_CON(44), 11, GFLAGS), -+ GATE(HCLK_JPEG_ENCODER1, "hclk_jpeg_encoder1", "hclk_vdpu_root", 0, -+ RK3588_CLKGATE_CON(44), 13, GFLAGS), -+ GATE(HCLK_JPEG_ENCODER2, "hclk_jpeg_encoder2", "hclk_vdpu_root", 0, -+ RK3588_CLKGATE_CON(44), 15, GFLAGS), -+ GATE(HCLK_JPEG_ENCODER3, "hclk_jpeg_encoder3", "hclk_vdpu_root", 0, -+ RK3588_CLKGATE_CON(45), 1, GFLAGS), -+ GATE(HCLK_JPEG_DECODER, "hclk_jpeg_decoder", "hclk_vdpu_root", 0, -+ RK3588_CLKGATE_CON(45), 3, GFLAGS), -+ GATE(HCLK_RGA2, "hclk_rga2", "hclk_vdpu_root", 0, -+ RK3588_CLKGATE_CON(45), 7, GFLAGS), -+ GATE(ACLK_RGA2, "aclk_rga2", "aclk_vdpu_root", 0, -+ RK3588_CLKGATE_CON(45), 8, GFLAGS), -+ COMPOSITE(CLK_RGA2_CORE, "clk_rga2_core", gpll_cpll_npll_aupll_spll_p, 0, -+ RK3588_CLKSEL_CON(100), 5, 3, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(45), 9, GFLAGS), -+ GATE(HCLK_RGA3_0, "hclk_rga3_0", "hclk_vdpu_root", 0, -+ RK3588_CLKGATE_CON(45), 10, GFLAGS), -+ GATE(ACLK_RGA3_0, "aclk_rga3_0", "aclk_vdpu_root", 0, -+ RK3588_CLKGATE_CON(45), 11, GFLAGS), -+ COMPOSITE(CLK_RGA3_0_CORE, "clk_rga3_0_core", gpll_cpll_npll_aupll_spll_p, 0, -+ RK3588_CLKSEL_CON(100), 13, 3, MFLAGS, 8, 5, DFLAGS, -+ RK3588_CLKGATE_CON(45), 12, GFLAGS), -+ GATE(HCLK_VPU, "hclk_vpu", "hclk_vdpu_root", 0, -+ RK3588_CLKGATE_CON(44), 9, GFLAGS), ++ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", ++ 4, clks[PLL_APLL], clks[PLL_GPLL], ++ &rk3328_cpuclk_data, rk3328_cpuclk_rates, ++ ARRAY_SIZE(rk3328_cpuclk_rates)); + -+ /* venc */ -+ COMPOSITE_NODIV(HCLK_RKVENC1_ROOT, "hclk_rkvenc1_root", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(104), 0, 2, MFLAGS, -+ RK3588_CLKGATE_CON(48), 0, GFLAGS), -+ COMPOSITE(ACLK_RKVENC1_ROOT, "aclk_rkvenc1_root", gpll_cpll_npll_p, 0, -+ RK3588_CLKSEL_CON(104), 7, 2, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(48), 1, GFLAGS), -+ COMPOSITE_NODIV(HCLK_RKVENC0_ROOT, "hclk_rkvenc0_root", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(102), 0, 2, MFLAGS, -+ RK3588_CLKGATE_CON(47), 0, GFLAGS), -+ COMPOSITE(ACLK_RKVENC0_ROOT, "aclk_rkvenc0_root", gpll_cpll_npll_p, 0, -+ RK3588_CLKSEL_CON(102), 7, 2, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(47), 1, GFLAGS), -+ GATE(HCLK_RKVENC0, "hclk_rkvenc0", "hclk_rkvenc0_root", 0, -+ RK3588_CLKGATE_CON(47), 4, GFLAGS), -+ GATE(ACLK_RKVENC0, "aclk_rkvenc0", "aclk_rkvenc0_root", 0, -+ RK3588_CLKGATE_CON(47), 5, GFLAGS), -+ COMPOSITE(CLK_RKVENC0_CORE, "clk_rkvenc0_core", gpll_cpll_aupll_npll_p, 0, -+ RK3588_CLKSEL_CON(102), 14, 2, MFLAGS, 9, 5, DFLAGS, -+ RK3588_CLKGATE_CON(47), 6, GFLAGS), -+ COMPOSITE(CLK_RKVENC1_CORE, "clk_rkvenc1_core", gpll_cpll_aupll_npll_p, 0, -+ RK3588_CLKSEL_CON(104), 14, 2, MFLAGS, 9, 5, DFLAGS, -+ RK3588_CLKGATE_CON(48), 6, GFLAGS), ++ rockchip_register_softrst(np, 12, reg_base + RK3328_SOFTRST_CON(0), ++ ROCKCHIP_SOFTRST_HIWORD_MASK); + -+ /* vi */ -+ COMPOSITE(ACLK_VI_ROOT, "aclk_vi_root", gpll_cpll_npll_aupll_spll_p, 0, -+ RK3588_CLKSEL_CON(106), 5, 3, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(49), 0, GFLAGS), -+ COMPOSITE_NODIV(HCLK_VI_ROOT, "hclk_vi_root", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(106), 8, 2, MFLAGS, -+ RK3588_CLKGATE_CON(49), 1, GFLAGS), -+ COMPOSITE_NODIV(PCLK_VI_ROOT, "pclk_vi_root", mux_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(106), 10, 2, MFLAGS, -+ RK3588_CLKGATE_CON(49), 2, GFLAGS), -+ COMPOSITE_NODIV(ICLK_CSIHOST01, "iclk_csihost01", mux_400m_200m_100m_24m_p, 0, -+ RK3588_CLKSEL_CON(108), 14, 2, MFLAGS, -+ RK3588_CLKGATE_CON(51), 10, GFLAGS), -+ GATE(ICLK_CSIHOST0, "iclk_csihost0", "iclk_csihost01", 0, -+ RK3588_CLKGATE_CON(51), 11, GFLAGS), -+ GATE(ICLK_CSIHOST1, "iclk_csihost1", "iclk_csihost01", 0, -+ RK3588_CLKGATE_CON(51), 12, GFLAGS), -+ GATE(PCLK_CSI_HOST_0, "pclk_csi_host_0", "pclk_vi_root", 0, -+ RK3588_CLKGATE_CON(50), 4, GFLAGS), -+ GATE(PCLK_CSI_HOST_1, "pclk_csi_host_1", "pclk_vi_root", 0, -+ RK3588_CLKGATE_CON(50), 5, GFLAGS), -+ GATE(PCLK_CSI_HOST_2, "pclk_csi_host_2", "pclk_vi_root", 0, -+ RK3588_CLKGATE_CON(50), 6, GFLAGS), -+ GATE(PCLK_CSI_HOST_3, "pclk_csi_host_3", "pclk_vi_root", 0, -+ RK3588_CLKGATE_CON(50), 7, GFLAGS), -+ GATE(PCLK_CSI_HOST_4, "pclk_csi_host_4", "pclk_vi_root", 0, -+ RK3588_CLKGATE_CON(50), 8, GFLAGS), -+ GATE(PCLK_CSI_HOST_5, "pclk_csi_host_5", "pclk_vi_root", 0, -+ RK3588_CLKGATE_CON(50), 9, GFLAGS), -+ GATE(ACLK_FISHEYE0, "aclk_fisheye0", "aclk_vi_root", 0, -+ RK3588_CLKGATE_CON(49), 14, GFLAGS), -+ GATE(HCLK_FISHEYE0, "hclk_fisheye0", "hclk_vi_root", 0, -+ RK3588_CLKGATE_CON(49), 15, GFLAGS), -+ COMPOSITE(CLK_FISHEYE0_CORE, "clk_fisheye0_core", gpll_cpll_aupll_spll_p, 0, -+ RK3588_CLKSEL_CON(108), 5, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(50), 0, GFLAGS), -+ GATE(ACLK_FISHEYE1, "aclk_fisheye1", "aclk_vi_root", 0, -+ RK3588_CLKGATE_CON(50), 1, GFLAGS), -+ GATE(HCLK_FISHEYE1, "hclk_fisheye1", "hclk_vi_root", 0, -+ RK3588_CLKGATE_CON(50), 2, GFLAGS), -+ COMPOSITE(CLK_FISHEYE1_CORE, "clk_fisheye1_core", gpll_cpll_aupll_spll_p, 0, -+ RK3588_CLKSEL_CON(108), 12, 2, MFLAGS, 7, 5, DFLAGS, -+ RK3588_CLKGATE_CON(50), 3, GFLAGS), -+ COMPOSITE(CLK_ISP0_CORE, "clk_isp0_core", gpll_cpll_aupll_spll_p, 0, -+ RK3588_CLKSEL_CON(107), 11, 2, MFLAGS, 6, 5, DFLAGS, -+ RK3588_CLKGATE_CON(49), 9, GFLAGS), -+ GATE(CLK_ISP0_CORE_MARVIN, "clk_isp0_core_marvin", "clk_isp0_core", 0, -+ RK3588_CLKGATE_CON(49), 10, GFLAGS), -+ GATE(CLK_ISP0_CORE_VICAP, "clk_isp0_core_vicap", "clk_isp0_core", 0, -+ RK3588_CLKGATE_CON(49), 11, GFLAGS), -+ GATE(ACLK_ISP0, "aclk_isp0", "aclk_vi_root", 0, -+ RK3588_CLKGATE_CON(49), 12, GFLAGS), -+ GATE(HCLK_ISP0, "hclk_isp0", "hclk_vi_root", 0, -+ RK3588_CLKGATE_CON(49), 13, GFLAGS), -+ COMPOSITE(DCLK_VICAP, "dclk_vicap", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(107), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(49), 6, GFLAGS), -+ GATE(ACLK_VICAP, "aclk_vicap", "aclk_vi_root", 0, -+ RK3588_CLKGATE_CON(49), 7, GFLAGS), -+ GATE(HCLK_VICAP, "hclk_vicap", "hclk_vi_root", 0, -+ RK3588_CLKGATE_CON(49), 8, GFLAGS), ++ rockchip_register_restart_notifier(ctx, RK3328_GLB_SRST_FST, NULL); + -+ /* vo0 */ -+ COMPOSITE(ACLK_VO0_ROOT, "aclk_vo0_root", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(116), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(55), 0, GFLAGS), -+ COMPOSITE_NODIV(HCLK_VO0_ROOT, "hclk_vo0_root", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(116), 6, 2, MFLAGS, -+ RK3588_CLKGATE_CON(55), 1, GFLAGS), -+ COMPOSITE_NODIV(HCLK_VO0_S_ROOT, "hclk_vo0_s_root", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(116), 8, 2, MFLAGS, -+ RK3588_CLKGATE_CON(55), 2, GFLAGS), -+ COMPOSITE_NODIV(PCLK_VO0_ROOT, "pclk_vo0_root", mux_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(116), 10, 2, MFLAGS, -+ RK3588_CLKGATE_CON(55), 3, GFLAGS), -+ COMPOSITE_NODIV(PCLK_VO0_S_ROOT, "pclk_vo0_s_root", mux_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(116), 12, 2, MFLAGS, -+ RK3588_CLKGATE_CON(55), 4, GFLAGS), -+ GATE(PCLK_DP0, "pclk_dp0", "pclk_vo0_root", 0, -+ RK3588_CLKGATE_CON(56), 4, GFLAGS), -+ GATE(PCLK_DP1, "pclk_dp1", "pclk_vo0_root", 0, -+ RK3588_CLKGATE_CON(56), 5, GFLAGS), -+ GATE(PCLK_S_DP0, "pclk_s_dp0", "pclk_vo0_s_root", 0, -+ RK3588_CLKGATE_CON(56), 6, GFLAGS), -+ GATE(PCLK_S_DP1, "pclk_s_dp1", "pclk_vo0_s_root", 0, -+ RK3588_CLKGATE_CON(56), 7, GFLAGS), -+ GATE(CLK_DP0, "clk_dp0", "aclk_vo0_root", 0, -+ RK3588_CLKGATE_CON(56), 8, GFLAGS), -+ GATE(CLK_DP1, "clk_dp1", "aclk_vo0_root", 0, -+ RK3588_CLKGATE_CON(56), 9, GFLAGS), -+ GATE(HCLK_HDCP_KEY0, "hclk_hdcp_key0", "hclk_vo0_s_root", 0, -+ RK3588_CLKGATE_CON(55), 11, GFLAGS), -+ GATE(PCLK_HDCP0, "pclk_hdcp0", "pclk_vo0_root", 0, -+ RK3588_CLKGATE_CON(55), 14, GFLAGS), -+ GATE(ACLK_TRNG0, "aclk_trng0", "aclk_vo0_root", 0, -+ RK3588_CLKGATE_CON(56), 0, GFLAGS), -+ GATE(PCLK_TRNG0, "pclk_trng0", "pclk_vo0_root", 0, -+ RK3588_CLKGATE_CON(56), 1, GFLAGS), -+ GATE(PCLK_VO0GRF, "pclk_vo0grf", "pclk_vo0_root", CLK_IGNORE_UNUSED, -+ RK3588_CLKGATE_CON(55), 10, GFLAGS), -+ COMPOSITE(CLK_I2S4_8CH_TX_SRC, "clk_i2s4_8ch_tx_src", gpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(118), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(56), 11, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S4_8CH_TX_FRAC, "clk_i2s4_8ch_tx_frac", "clk_i2s4_8ch_tx_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(119), 0, -+ RK3588_CLKGATE_CON(56), 12, GFLAGS, -+ &rk3588_i2s4_8ch_tx_fracmux), -+ GATE(MCLK_I2S4_8CH_TX, "mclk_i2s4_8ch_tx", "clk_i2s4_8ch_tx", 0, -+ RK3588_CLKGATE_CON(56), 13, GFLAGS), -+ COMPOSITE(CLK_I2S8_8CH_TX_SRC, "clk_i2s8_8ch_tx_src", gpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(120), 8, 1, MFLAGS, 3, 5, DFLAGS, -+ RK3588_CLKGATE_CON(56), 15, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S8_8CH_TX_FRAC, "clk_i2s8_8ch_tx_frac", "clk_i2s8_8ch_tx_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(121), 0, -+ RK3588_CLKGATE_CON(57), 0, GFLAGS, -+ &rk3588_i2s8_8ch_tx_fracmux), -+ GATE(MCLK_I2S8_8CH_TX, "mclk_i2s8_8ch_tx", "clk_i2s8_8ch_tx", 0, -+ RK3588_CLKGATE_CON(57), 1, GFLAGS), -+ COMPOSITE(CLK_SPDIF2_DP0_SRC, "clk_spdif2_dp0_src", gpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(122), 8, 1, MFLAGS, 3, 5, DFLAGS, -+ RK3588_CLKGATE_CON(57), 3, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_SPDIF2_DP0_FRAC, "clk_spdif2_dp0_frac", "clk_spdif2_dp0_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(123), 0, -+ RK3588_CLKGATE_CON(57), 4, GFLAGS, -+ &rk3588_spdif2_dp0_fracmux), -+ GATE(MCLK_SPDIF2_DP0, "mclk_spdif2_dp0", "clk_spdif2_dp0", 0, -+ RK3588_CLKGATE_CON(57), 5, GFLAGS), -+ GATE(MCLK_SPDIF2, "mclk_spdif2", "clk_spdif2_dp0", 0, -+ RK3588_CLKGATE_CON(57), 6, GFLAGS), -+ COMPOSITE(CLK_SPDIF5_DP1_SRC, "clk_spdif5_dp1_src", gpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(124), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(57), 8, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_SPDIF5_DP1_FRAC, "clk_spdif5_dp1_frac", "clk_spdif5_dp1_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(125), 0, -+ RK3588_CLKGATE_CON(57), 9, GFLAGS, -+ &rk3588_spdif5_dp1_fracmux), -+ GATE(MCLK_SPDIF5_DP1, "mclk_spdif5_dp1", "clk_spdif5_dp1", 0, -+ RK3588_CLKGATE_CON(57), 10, GFLAGS), -+ GATE(MCLK_SPDIF5, "mclk_spdif5", "clk_spdif5_dp1", 0, -+ RK3588_CLKGATE_CON(57), 11, GFLAGS), -+ COMPOSITE_NOMUX(CLK_AUX16M_0, "clk_aux16m_0", "gpll", 0, -+ RK3588_CLKSEL_CON(117), 0, 8, DFLAGS, -+ RK3588_CLKGATE_CON(56), 2, GFLAGS), -+ COMPOSITE_NOMUX(CLK_AUX16M_1, "clk_aux16m_1", "gpll", 0, -+ RK3588_CLKSEL_CON(117), 8, 8, DFLAGS, -+ RK3588_CLKGATE_CON(56), 3, GFLAGS), ++ rockchip_clk_of_add_provider(np, ctx); ++} ++CLK_OF_DECLARE(rk3328_cru, "rockchip,rk3328-cru", rk3328_clk_init); + -+ /* vo1 */ -+ COMPOSITE_HALFDIV(CLK_HDMITRX_REFSRC, "clk_hdmitrx_refsrc", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(157), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(65), 9, GFLAGS), -+ COMPOSITE(ACLK_HDCP1_ROOT, "aclk_hdcp1_root", aclk_hdcp1_root_p, 0, -+ RK3588_CLKSEL_CON(128), 5, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(59), 0, GFLAGS), -+ COMPOSITE(ACLK_HDMIRX_ROOT, "aclk_hdmirx_root", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(128), 12, 1, MFLAGS, 7, 5, DFLAGS, -+ RK3588_CLKGATE_CON(59), 1, GFLAGS), -+ COMPOSITE_NODIV(HCLK_VO1_ROOT, "hclk_vo1_root", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(128), 13, 2, MFLAGS, -+ RK3588_CLKGATE_CON(59), 2, GFLAGS), -+ COMPOSITE_NODIV(HCLK_VO1_S_ROOT, "hclk_vo1_s_root", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(129), 0, 2, MFLAGS, -+ RK3588_CLKGATE_CON(59), 3, GFLAGS), -+ COMPOSITE_NODIV(PCLK_VO1_ROOT, "pclk_vo1_root", mux_150m_100m_24m_p, 0, -+ RK3588_CLKSEL_CON(129), 2, 2, MFLAGS, -+ RK3588_CLKGATE_CON(59), 4, GFLAGS), -+ COMPOSITE_NODIV(PCLK_VO1_S_ROOT, "pclk_vo1_s_root", mux_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(129), 4, 2, MFLAGS, -+ RK3588_CLKGATE_CON(59), 5, GFLAGS), -+ COMPOSITE(ACLK_VOP_ROOT, "aclk_vop_root", gpll_cpll_dmyaupll_npll_spll_p, 0, -+ RK3588_CLKSEL_CON(110), 5, 3, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(52), 0, GFLAGS), -+ FACTOR(ACLK_VOP_DIV2_SRC, "aclk_vop_div2_src", "aclk_vop_root", 0, 1, 2), -+ COMPOSITE_NODIV(ACLK_VOP_LOW_ROOT, "aclk_vop_low_root", mux_400m_200m_100m_24m_p, 0, -+ RK3588_CLKSEL_CON(110), 8, 2, MFLAGS, -+ RK3588_CLKGATE_CON(52), 1, GFLAGS), -+ COMPOSITE_NODIV(HCLK_VOP_ROOT, "hclk_vop_root", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(110), 10, 2, MFLAGS, -+ RK3588_CLKGATE_CON(52), 2, GFLAGS), -+ COMPOSITE_NODIV(PCLK_VOP_ROOT, "pclk_vop_root", mux_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(110), 12, 2, MFLAGS, -+ RK3588_CLKGATE_CON(52), 3, GFLAGS), -+ COMPOSITE(ACLK_VO1USB_TOP_ROOT, "aclk_vo1usb_top_root", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(170), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(74), 0, GFLAGS), -+ COMPOSITE_NODIV(HCLK_VO1USB_TOP_ROOT, "hclk_vo1usb_top_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3588_CLKSEL_CON(170), 6, 2, MFLAGS, -+ RK3588_CLKGATE_CON(74), 2, GFLAGS), -+ COMPOSITE_NODIV(ACLK_VOP, "aclk_vop", aclk_vop_sub_src_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(115), 9, 1, MFLAGS, -+ RK3588_CLKGATE_CON(52), 9, GFLAGS), -+ GATE(PCLK_EDP0, "pclk_edp0", "pclk_vo1_root", 0, -+ RK3588_CLKGATE_CON(62), 0, GFLAGS), -+ GATE(CLK_EDP0_24M, "clk_edp0_24m", "xin24m", 0, -+ RK3588_CLKGATE_CON(62), 1, GFLAGS), -+ COMPOSITE_NODIV(CLK_EDP0_200M, "clk_edp0_200m", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(140), 1, 2, MFLAGS, -+ RK3588_CLKGATE_CON(62), 2, GFLAGS), -+ GATE(PCLK_EDP1, "pclk_edp1", "pclk_vo1_root", 0, -+ RK3588_CLKGATE_CON(62), 3, GFLAGS), -+ GATE(CLK_EDP1_24M, "clk_edp1_24m", "xin24m", 0, -+ RK3588_CLKGATE_CON(62), 4, GFLAGS), -+ COMPOSITE_NODIV(CLK_EDP1_200M, "clk_edp1_200m", mux_200m_100m_50m_24m_p, 0, -+ RK3588_CLKSEL_CON(140), 3, 2, MFLAGS, -+ RK3588_CLKGATE_CON(62), 5, GFLAGS), -+ GATE(HCLK_HDCP_KEY1, "hclk_hdcp_key1", "hclk_vo1_s_root", 0, -+ RK3588_CLKGATE_CON(60), 4, GFLAGS), -+ GATE(PCLK_HDCP1, "pclk_hdcp1", "pclk_vo1_root", 0, -+ RK3588_CLKGATE_CON(60), 7, GFLAGS), -+ GATE(ACLK_HDMIRX, "aclk_hdmirx", "aclk_hdmirx_root", 0, -+ RK3588_CLKGATE_CON(61), 9, GFLAGS), -+ GATE(PCLK_HDMIRX, "pclk_hdmirx", "pclk_vo1_root", 0, -+ RK3588_CLKGATE_CON(61), 10, GFLAGS), -+ GATE(CLK_HDMIRX_REF, "clk_hdmirx_ref", "aclk_hdcp1_root", 0, -+ RK3588_CLKGATE_CON(61), 11, GFLAGS), -+ COMPOSITE(CLK_HDMIRX_AUD_SRC, "clk_hdmirx_aud_src", gpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(138), 8, 1, MFLAGS, 0, 8, DFLAGS, -+ RK3588_CLKGATE_CON(61), 12, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_HDMIRX_AUD_FRAC, "clk_hdmirx_aud_frac", "clk_hdmirx_aud_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(139), 0, -+ RK3588_CLKGATE_CON(61), 13, GFLAGS, -+ &rk3588_hdmirx_aud_fracmux), -+ GATE(CLK_HDMIRX_AUD, "clk_hdmirx_aud", "clk_hdmirx_aud_mux", 0, -+ RK3588_CLKGATE_CON(61), 14, GFLAGS), -+ GATE(PCLK_HDMITX0, "pclk_hdmitx0", "pclk_vo1_root", 0, -+ RK3588_CLKGATE_CON(60), 11, GFLAGS), -+ COMPOSITE(CLK_HDMITX0_EARC, "clk_hdmitx0_earc", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(133), 6, 1, MFLAGS, 1, 5, DFLAGS, -+ RK3588_CLKGATE_CON(60), 15, GFLAGS), -+ GATE(CLK_HDMITX0_REF, "clk_hdmitx0_ref", "aclk_hdcp1_root", 0, -+ RK3588_CLKGATE_CON(61), 0, GFLAGS), -+ GATE(PCLK_HDMITX1, "pclk_hdmitx1", "pclk_vo1_root", 0, -+ RK3588_CLKGATE_CON(61), 2, GFLAGS), -+ COMPOSITE(CLK_HDMITX1_EARC, "clk_hdmitx1_earc", gpll_cpll_p, 0, -+ RK3588_CLKSEL_CON(136), 6, 1, MFLAGS, 1, 5, DFLAGS, -+ RK3588_CLKGATE_CON(61), 6, GFLAGS), -+ GATE(CLK_HDMITX1_REF, "clk_hdmitx1_ref", "aclk_hdcp1_root", 0, -+ RK3588_CLKGATE_CON(61), 7, GFLAGS), -+ GATE(ACLK_TRNG1, "aclk_trng1", "aclk_hdcp1_root", 0, -+ RK3588_CLKGATE_CON(60), 9, GFLAGS), -+ GATE(PCLK_TRNG1, "pclk_trng1", "pclk_vo1_root", 0, -+ RK3588_CLKGATE_CON(60), 10, GFLAGS), -+ GATE(0, "pclk_vo1grf", "pclk_vo1_root", CLK_IGNORE_UNUSED, -+ RK3588_CLKGATE_CON(59), 12, GFLAGS), -+ GATE(PCLK_S_EDP0, "pclk_s_edp0", "pclk_vo1_s_root", 0, -+ RK3588_CLKGATE_CON(59), 14, GFLAGS), -+ GATE(PCLK_S_EDP1, "pclk_s_edp1", "pclk_vo1_s_root", 0, -+ RK3588_CLKGATE_CON(59), 15, GFLAGS), -+ GATE(PCLK_S_HDMIRX, "pclk_s_hdmirx", "pclk_vo1_s_root", 0, -+ RK3588_CLKGATE_CON(65), 8, GFLAGS), -+ COMPOSITE(CLK_I2S10_8CH_RX_SRC, "clk_i2s10_8ch_rx_src", gpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(155), 8, 1, MFLAGS, 3, 5, DFLAGS, -+ RK3588_CLKGATE_CON(65), 5, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S10_8CH_RX_FRAC, "clk_i2s10_8ch_rx_frac", "clk_i2s10_8ch_rx_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(156), 0, -+ RK3588_CLKGATE_CON(65), 6, GFLAGS, -+ &rk3588_i2s10_8ch_rx_fracmux), -+ GATE(MCLK_I2S10_8CH_RX, "mclk_i2s10_8ch_rx", "clk_i2s10_8ch_rx", 0, -+ RK3588_CLKGATE_CON(65), 7, GFLAGS), -+ COMPOSITE(CLK_I2S7_8CH_RX_SRC, "clk_i2s7_8ch_rx_src", gpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(129), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RK3588_CLKGATE_CON(60), 1, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S7_8CH_RX_FRAC, "clk_i2s7_8ch_rx_frac", "clk_i2s7_8ch_rx_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(130), 0, -+ RK3588_CLKGATE_CON(60), 2, GFLAGS, -+ &rk3588_i2s7_8ch_rx_fracmux), -+ GATE(MCLK_I2S7_8CH_RX, "mclk_i2s7_8ch_rx", "clk_i2s7_8ch_rx", 0, -+ RK3588_CLKGATE_CON(60), 3, GFLAGS), -+ COMPOSITE(CLK_I2S9_8CH_RX_SRC, "clk_i2s9_8ch_rx_src", gpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(153), 12, 1, MFLAGS, 7, 5, DFLAGS, -+ RK3588_CLKGATE_CON(65), 1, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S9_8CH_RX_FRAC, "clk_i2s9_8ch_rx_frac", "clk_i2s9_8ch_rx_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(154), 0, -+ RK3588_CLKGATE_CON(65), 2, GFLAGS, -+ &rk3588_i2s9_8ch_rx_fracmux), -+ GATE(MCLK_I2S9_8CH_RX, "mclk_i2s9_8ch_rx", "clk_i2s9_8ch_rx", 0, -+ RK3588_CLKGATE_CON(65), 3, GFLAGS), -+ COMPOSITE(CLK_I2S5_8CH_TX_SRC, "clk_i2s5_8ch_tx_src", gpll_aupll_p, CLK_SET_RATE_NO_REPARENT, -+ RK3588_CLKSEL_CON(140), 10, 1, MFLAGS, 5, 5, DFLAGS, -+ RK3588_CLKGATE_CON(62), 6, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S5_8CH_TX_FRAC, "clk_i2s5_8ch_tx_frac", "clk_i2s5_8ch_tx_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(141), 0, -+ RK3588_CLKGATE_CON(62), 7, GFLAGS, -+ &rk3588_i2s5_8ch_tx_fracmux), -+ GATE(MCLK_I2S5_8CH_TX, "mclk_i2s5_8ch_tx", "clk_i2s5_8ch_tx", 0, -+ RK3588_CLKGATE_CON(62), 8, GFLAGS), -+ COMPOSITE(CLK_I2S6_8CH_TX_SRC, "clk_i2s6_8ch_tx_src", gpll_aupll_p, CLK_SET_RATE_NO_REPARENT, -+ RK3588_CLKSEL_CON(144), 8, 1, MFLAGS, 3, 5, DFLAGS, -+ RK3588_CLKGATE_CON(62), 13, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S6_8CH_TX_FRAC, "clk_i2s6_8ch_tx_frac", "clk_i2s6_8ch_tx_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(145), 0, -+ RK3588_CLKGATE_CON(62), 14, GFLAGS, -+ &rk3588_i2s6_8ch_tx_fracmux), -+ GATE(MCLK_I2S6_8CH_TX, "mclk_i2s6_8ch_tx", "clk_i2s6_8ch_tx", 0, -+ RK3588_CLKGATE_CON(62), 15, GFLAGS), -+ COMPOSITE(CLK_I2S6_8CH_RX_SRC, "clk_i2s6_8ch_rx_src", gpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(146), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(63), 0, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S6_8CH_RX_FRAC, "clk_i2s6_8ch_rx_frac", "clk_i2s6_8ch_rx_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(147), 0, -+ RK3588_CLKGATE_CON(63), 1, GFLAGS, -+ &rk3588_i2s6_8ch_rx_fracmux), -+ GATE(MCLK_I2S6_8CH_RX, "mclk_i2s6_8ch_rx", "clk_i2s6_8ch_rx", 0, -+ RK3588_CLKGATE_CON(63), 2, GFLAGS), -+ MUX(I2S6_8CH_MCLKOUT, "i2s6_8ch_mclkout", i2s6_8ch_mclkout_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(148), 2, 2, MFLAGS), -+ COMPOSITE(CLK_SPDIF3_SRC, "clk_spdif3_src", gpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(148), 9, 1, MFLAGS, 4, 5, DFLAGS, -+ RK3588_CLKGATE_CON(63), 5, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_SPDIF3_FRAC, "clk_spdif3_frac", "clk_spdif3_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(149), 0, -+ RK3588_CLKGATE_CON(63), 6, GFLAGS, -+ &rk3588_spdif3_fracmux), -+ GATE(MCLK_SPDIF3, "mclk_spdif3", "clk_spdif3", 0, -+ RK3588_CLKGATE_CON(63), 7, GFLAGS), -+ COMPOSITE(CLK_SPDIF4_SRC, "clk_spdif4_src", gpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(150), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(63), 9, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_SPDIF4_FRAC, "clk_spdif4_frac", "clk_spdif4_src", CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(151), 0, -+ RK3588_CLKGATE_CON(63), 10, GFLAGS, -+ &rk3588_spdif4_fracmux), -+ GATE(MCLK_SPDIF4, "mclk_spdif4", "clk_spdif4", 0, -+ RK3588_CLKGATE_CON(63), 11, GFLAGS), -+ COMPOSITE(MCLK_SPDIFRX0, "mclk_spdifrx0", gpll_cpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(152), 7, 2, MFLAGS, 2, 5, DFLAGS, -+ RK3588_CLKGATE_CON(63), 13, GFLAGS), -+ COMPOSITE(MCLK_SPDIFRX1, "mclk_spdifrx1", gpll_cpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(152), 14, 2, MFLAGS, 9, 5, DFLAGS, -+ RK3588_CLKGATE_CON(63), 15, GFLAGS), -+ COMPOSITE(MCLK_SPDIFRX2, "mclk_spdifrx2", gpll_cpll_aupll_p, 0, -+ RK3588_CLKSEL_CON(153), 5, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(64), 1, GFLAGS), -+ GATE(CLK_HDMIHDP0, "clk_hdmihdp0", "xin24m", 0, -+ RK3588_CLKGATE_CON(73), 12, GFLAGS), -+ GATE(CLK_HDMIHDP1, "clk_hdmihdp1", "xin24m", 0, -+ RK3588_CLKGATE_CON(73), 13, GFLAGS), -+ GATE(PCLK_HDPTX0, "pclk_hdptx0", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(72), 5, GFLAGS), -+ GATE(PCLK_HDPTX1, "pclk_hdptx1", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(72), 6, GFLAGS), -+ GATE(PCLK_USBDPPHY0, "pclk_usbdpphy0", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(72), 2, GFLAGS), -+ GATE(PCLK_USBDPPHY1, "pclk_usbdpphy1", "pclk_top_root", 0, -+ RK3588_CLKGATE_CON(72), 4, GFLAGS), -+ GATE(HCLK_VOP, "hclk_vop", "hclk_vop_root", 0, -+ RK3588_CLKGATE_CON(52), 8, GFLAGS), -+ COMPOSITE(DCLK_VOP0_SRC, "dclk_vop0_src", gpll_cpll_v0pll_aupll_p, CLK_SET_RATE_NO_REPARENT, -+ RK3588_CLKSEL_CON(111), 7, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3588_CLKGATE_CON(52), 10, GFLAGS), -+ COMPOSITE(DCLK_VOP1_SRC, "dclk_vop1_src", gpll_cpll_v0pll_aupll_p, CLK_SET_RATE_NO_REPARENT, -+ RK3588_CLKSEL_CON(111), 14, 2, MFLAGS, 9, 5, DFLAGS, -+ RK3588_CLKGATE_CON(52), 11, GFLAGS), -+ COMPOSITE(DCLK_VOP2_SRC, "dclk_vop2_src", gpll_cpll_v0pll_aupll_p, CLK_SET_RATE_NO_REPARENT, -+ RK3588_CLKSEL_CON(112), 5, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3588_CLKGATE_CON(52), 12, GFLAGS), -+ COMPOSITE_NODIV(DCLK_VOP0, "dclk_vop0", dclk_vop0_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3588_CLKSEL_CON(112), 7, 2, MFLAGS, -+ RK3588_CLKGATE_CON(52), 13, GFLAGS), -+ COMPOSITE_NODIV(DCLK_VOP1, "dclk_vop1", dclk_vop1_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3588_CLKSEL_CON(112), 9, 2, MFLAGS, -+ RK3588_CLKGATE_CON(53), 0, GFLAGS), -+ COMPOSITE_NODIV(DCLK_VOP2, "dclk_vop2", dclk_vop2_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3588_CLKSEL_CON(112), 11, 2, MFLAGS, -+ RK3588_CLKGATE_CON(53), 1, GFLAGS), -+ COMPOSITE(DCLK_VOP3, "dclk_vop3", gpll_cpll_v0pll_aupll_p, CLK_SET_RATE_NO_REPARENT, -+ RK3588_CLKSEL_CON(113), 7, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3588_CLKGATE_CON(53), 2, GFLAGS), -+ GATE(PCLK_DSIHOST0, "pclk_dsihost0", "pclk_vop_root", 0, -+ RK3588_CLKGATE_CON(53), 4, GFLAGS), -+ GATE(PCLK_DSIHOST1, "pclk_dsihost1", "pclk_vop_root", 0, -+ RK3588_CLKGATE_CON(53), 5, GFLAGS), -+ COMPOSITE(CLK_DSIHOST0, "clk_dsihost0", gpll_cpll_v0pll_spll_p, 0, -+ RK3588_CLKSEL_CON(114), 7, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3588_CLKGATE_CON(53), 6, GFLAGS), -+ COMPOSITE(CLK_DSIHOST1, "clk_dsihost1", gpll_cpll_v0pll_spll_p, 0, -+ RK3588_CLKSEL_CON(115), 7, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3588_CLKGATE_CON(53), 7, GFLAGS), -+ GATE(CLK_VOP_PMU, "clk_vop_pmu", "xin24m", CLK_IGNORE_UNUSED, -+ RK3588_CLKGATE_CON(53), 8, GFLAGS), -+ GATE(ACLK_VOP_DOBY, "aclk_vop_doby", "aclk_vop_root", 0, -+ RK3588_CLKGATE_CON(53), 10, GFLAGS), -+ GATE(CLK_USBDP_PHY0_IMMORTAL, "clk_usbdp_phy0_immortal", "xin24m", CLK_IGNORE_UNUSED, -+ RK3588_CLKGATE_CON(2), 8, GFLAGS), -+ GATE(CLK_USBDP_PHY1_IMMORTAL, "clk_usbdp_phy1_immortal", "xin24m", CLK_IGNORE_UNUSED, -+ RK3588_CLKGATE_CON(2), 15, GFLAGS), ++static int __init clk_rk3328_probe(struct platform_device *pdev) ++{ ++ struct device_node *np = pdev->dev.of_node; + -+ GATE(CLK_REF_PIPE_PHY0_OSC_SRC, "clk_ref_pipe_phy0_osc_src", "xin24m", 0, -+ RK3588_CLKGATE_CON(77), 0, GFLAGS), -+ GATE(CLK_REF_PIPE_PHY1_OSC_SRC, "clk_ref_pipe_phy1_osc_src", "xin24m", 0, -+ RK3588_CLKGATE_CON(77), 1, GFLAGS), -+ GATE(CLK_REF_PIPE_PHY2_OSC_SRC, "clk_ref_pipe_phy2_osc_src", "xin24m", 0, -+ RK3588_CLKGATE_CON(77), 2, GFLAGS), -+ COMPOSITE_NOMUX(CLK_REF_PIPE_PHY0_PLL_SRC, "clk_ref_pipe_phy0_pll_src", "ppll", 0, -+ RK3588_CLKSEL_CON(176), 0, 6, DFLAGS, -+ RK3588_CLKGATE_CON(77), 3, GFLAGS), -+ COMPOSITE_NOMUX(CLK_REF_PIPE_PHY1_PLL_SRC, "clk_ref_pipe_phy1_pll_src", "ppll", 0, -+ RK3588_CLKSEL_CON(176), 6, 6, DFLAGS, -+ RK3588_CLKGATE_CON(77), 4, GFLAGS), -+ COMPOSITE_NOMUX(CLK_REF_PIPE_PHY2_PLL_SRC, "clk_ref_pipe_phy2_pll_src", "ppll", 0, -+ RK3588_CLKSEL_CON(177), 0, 6, DFLAGS, -+ RK3588_CLKGATE_CON(77), 5, GFLAGS), -+ MUX(CLK_REF_PIPE_PHY0, "clk_ref_pipe_phy0", clk_ref_pipe_phy0_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(177), 6, 1, MFLAGS), -+ MUX(CLK_REF_PIPE_PHY1, "clk_ref_pipe_phy1", clk_ref_pipe_phy1_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(177), 7, 1, MFLAGS), -+ MUX(CLK_REF_PIPE_PHY2, "clk_ref_pipe_phy2", clk_ref_pipe_phy2_p, CLK_SET_RATE_PARENT, -+ RK3588_CLKSEL_CON(177), 8, 1, MFLAGS), ++ rk3328_clk_init(np); + -+ /* pmu */ -+ COMPOSITE(CLK_PMU1_300M_SRC, "clk_pmu1_300m_src", pmu_300m_24m_p, 0, -+ RK3588_PMU_CLKSEL_CON(0), 15, 1, MFLAGS, 10, 5, DFLAGS, -+ RK3588_PMU_CLKGATE_CON(0), 3, GFLAGS), -+ COMPOSITE(CLK_PMU1_400M_SRC, "clk_pmu1_400m_src", pmu_400m_24m_p, 0, -+ RK3588_PMU_CLKSEL_CON(1), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_PMU_CLKGATE_CON(0), 4, GFLAGS), -+ COMPOSITE_NOMUX(CLK_PMU1_50M_SRC, "clk_pmu1_50m_src", "clk_pmu1_400m_src", 0, -+ RK3588_PMU_CLKSEL_CON(0), 0, 4, DFLAGS, -+ RK3588_PMU_CLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE_NOMUX(CLK_PMU1_100M_SRC, "clk_pmu1_100m_src", "clk_pmu1_400m_src", 0, -+ RK3588_PMU_CLKSEL_CON(0), 4, 3, DFLAGS, -+ RK3588_PMU_CLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE_NOMUX(CLK_PMU1_200M_SRC, "clk_pmu1_200m_src", "clk_pmu1_400m_src", 0, -+ RK3588_PMU_CLKSEL_CON(0), 7, 3, DFLAGS, -+ RK3588_PMU_CLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE_NODIV(HCLK_PMU1_ROOT, "hclk_pmu1_root", hclk_pmu1_root_p, CLK_IS_CRITICAL, -+ RK3588_PMU_CLKSEL_CON(1), 6, 2, MFLAGS, -+ RK3588_PMU_CLKGATE_CON(0), 5, GFLAGS), -+ COMPOSITE_NODIV(PCLK_PMU1_ROOT, "pclk_pmu1_root", pmu_100m_50m_24m_src_p, CLK_IS_CRITICAL, -+ RK3588_PMU_CLKSEL_CON(1), 8, 2, MFLAGS, -+ RK3588_PMU_CLKGATE_CON(0), 7, GFLAGS), -+ GATE(PCLK_PMU0_ROOT, "pclk_pmu0_root", "pclk_pmu1_root", CLK_IS_CRITICAL, -+ RK3588_PMU_CLKGATE_CON(5), 0, GFLAGS), -+ COMPOSITE_NODIV(HCLK_PMU_CM0_ROOT, "hclk_pmu_cm0_root", hclk_pmu_cm0_root_p, CLK_IS_CRITICAL, -+ RK3588_PMU_CLKSEL_CON(1), 10, 2, MFLAGS, -+ RK3588_PMU_CLKGATE_CON(0), 8, GFLAGS), -+ GATE(CLK_PMU0, "clk_pmu0", "xin24m", CLK_IS_CRITICAL, -+ RK3588_PMU_CLKGATE_CON(5), 1, GFLAGS), -+ GATE(PCLK_PMU0, "pclk_pmu0", "pclk_pmu0_root", CLK_IS_CRITICAL, -+ RK3588_PMU_CLKGATE_CON(5), 2, GFLAGS), -+ GATE(PCLK_PMU0IOC, "pclk_pmu0ioc", "pclk_pmu0_root", CLK_IS_CRITICAL, -+ RK3588_PMU_CLKGATE_CON(5), 4, GFLAGS), -+ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pmu0_root", 0, -+ RK3588_PMU_CLKGATE_CON(5), 5, GFLAGS), -+ COMPOSITE_NODIV(DBCLK_GPIO0, "dbclk_gpio0", mux_24m_32k_p, 0, -+ RK3588_PMU_CLKSEL_CON(17), 0, 1, MFLAGS, -+ RK3588_PMU_CLKGATE_CON(5), 6, GFLAGS), -+ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_pmu0_root", 0, -+ RK3588_PMU_CLKGATE_CON(2), 1, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C0, "clk_i2c0", pmu_200m_100m_p, 0, -+ RK3588_PMU_CLKSEL_CON(3), 6, 1, MFLAGS, -+ RK3588_PMU_CLKGATE_CON(2), 2, GFLAGS), -+ GATE(HCLK_I2S1_8CH, "hclk_i2s1_8ch", "hclk_pmu1_root", 0, -+ RK3588_PMU_CLKGATE_CON(2), 7, GFLAGS), -+ COMPOSITE_NOMUX(CLK_I2S1_8CH_TX_SRC, "clk_i2s1_8ch_tx_src", "cpll", 0, -+ RK3588_PMU_CLKSEL_CON(5), 2, 5, DFLAGS, -+ RK3588_PMU_CLKGATE_CON(2), 8, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S1_8CH_TX_FRAC, "clk_i2s1_8ch_tx_frac", "clk_i2s1_8ch_tx_src", CLK_SET_RATE_PARENT, -+ RK3588_PMU_CLKSEL_CON(6), 0, -+ RK3588_PMU_CLKGATE_CON(2), 9, GFLAGS, -+ &rk3588_i2s1_8ch_tx_fracmux), -+ GATE(MCLK_I2S1_8CH_TX, "mclk_i2s1_8ch_tx", "clk_i2s1_8ch_tx", 0, -+ RK3588_PMU_CLKGATE_CON(2), 10, GFLAGS), -+ COMPOSITE_NOMUX(CLK_I2S1_8CH_RX_SRC, "clk_i2s1_8ch_rx_src", "cpll", 0, -+ RK3588_PMU_CLKSEL_CON(7), 2, 5, DFLAGS, -+ RK3588_PMU_CLKGATE_CON(2), 11, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S1_8CH_RX_FRAC, "clk_i2s1_8ch_rx_frac", "clk_i2s1_8ch_rx_src", CLK_SET_RATE_PARENT, -+ RK3588_PMU_CLKSEL_CON(8), 0, -+ RK3588_PMU_CLKGATE_CON(2), 12, GFLAGS, -+ &rk3588_i2s1_8ch_rx_fracmux), -+ GATE(MCLK_I2S1_8CH_RX, "mclk_i2s1_8ch_rx", "clk_i2s1_8ch_rx", 0, -+ RK3588_PMU_CLKGATE_CON(2), 13, GFLAGS), -+ MUX(I2S1_8CH_MCLKOUT, "i2s1_8ch_mclkout", i2s1_8ch_mclkout_p, CLK_SET_RATE_PARENT, -+ RK3588_PMU_CLKSEL_CON(9), 2, 2, MFLAGS), -+ GATE(PCLK_PMU1, "pclk_pmu1", "pclk_pmu0_root", CLK_IS_CRITICAL, -+ RK3588_PMU_CLKGATE_CON(1), 0, GFLAGS), -+ GATE(CLK_DDR_FAIL_SAFE, "clk_ddr_fail_safe", "clk_pmu0", CLK_IGNORE_UNUSED, -+ RK3588_PMU_CLKGATE_CON(1), 1, GFLAGS), -+ GATE(CLK_PMU1, "clk_pmu1", "clk_pmu0", CLK_IS_CRITICAL, -+ RK3588_PMU_CLKGATE_CON(1), 3, GFLAGS), -+ GATE(HCLK_PDM0, "hclk_pdm0", "hclk_pmu1_root", 0, -+ RK3588_PMU_CLKGATE_CON(2), 14, GFLAGS), -+ COMPOSITE_NODIV(MCLK_PDM0, "mclk_pdm0", mclk_pdm0_p, 0, -+ RK3588_PMU_CLKSEL_CON(9), 4, 1, MFLAGS, -+ RK3588_PMU_CLKGATE_CON(2), 15, GFLAGS), -+ GATE(HCLK_VAD, "hclk_vad", "hclk_pmu1_root", 0, -+ RK3588_PMU_CLKGATE_CON(3), 0, GFLAGS), -+ GATE(FCLK_PMU_CM0_CORE, "fclk_pmu_cm0_core", "hclk_pmu_cm0_root", CLK_IS_CRITICAL, -+ RK3588_PMU_CLKGATE_CON(0), 13, GFLAGS), -+ COMPOSITE(CLK_PMU_CM0_RTC, "clk_pmu_cm0_rtc", mux_24m_32k_p, CLK_IS_CRITICAL, -+ RK3588_PMU_CLKSEL_CON(2), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3588_PMU_CLKGATE_CON(0), 15, GFLAGS), -+ GATE(PCLK_PMU1_IOC, "pclk_pmu1_ioc", "pclk_pmu0_root", CLK_IGNORE_UNUSED, -+ RK3588_PMU_CLKGATE_CON(1), 5, GFLAGS), -+ GATE(PCLK_PMU1PWM, "pclk_pmu1pwm", "pclk_pmu0_root", 0, -+ RK3588_PMU_CLKGATE_CON(1), 12, GFLAGS), -+ COMPOSITE_NODIV(CLK_PMU1PWM, "clk_pmu1pwm", pmu_100m_50m_24m_src_p, 0, -+ RK3588_PMU_CLKSEL_CON(2), 9, 2, MFLAGS, -+ RK3588_PMU_CLKGATE_CON(1), 13, GFLAGS), -+ GATE(CLK_PMU1PWM_CAPTURE, "clk_pmu1pwm_capture", "xin24m", 0, -+ RK3588_PMU_CLKGATE_CON(1), 14, GFLAGS), -+ GATE(PCLK_PMU1TIMER, "pclk_pmu1timer", "pclk_pmu0_root", 0, -+ RK3588_PMU_CLKGATE_CON(1), 8, GFLAGS), -+ COMPOSITE_NODIV(CLK_PMU1TIMER_ROOT, "clk_pmu1timer_root", pmu_24m_32k_100m_src_p, 0, -+ RK3588_PMU_CLKSEL_CON(2), 7, 2, MFLAGS, -+ RK3588_PMU_CLKGATE_CON(1), 9, GFLAGS), -+ GATE(CLK_PMU1TIMER0, "clk_pmu1timer0", "clk_pmu1timer_root", 0, -+ RK3588_PMU_CLKGATE_CON(1), 10, GFLAGS), -+ GATE(CLK_PMU1TIMER1, "clk_pmu1timer1", "clk_pmu1timer_root", 0, -+ RK3588_PMU_CLKGATE_CON(1), 11, GFLAGS), -+ COMPOSITE_NOMUX(CLK_UART0_SRC, "clk_uart0_src", "cpll", 0, -+ RK3588_PMU_CLKSEL_CON(3), 7, 5, DFLAGS, -+ RK3588_PMU_CLKGATE_CON(2), 3, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART0_FRAC, "clk_uart0_frac", "clk_uart0_src", CLK_SET_RATE_PARENT, -+ RK3588_PMU_CLKSEL_CON(4), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RK3588_PMU_CLKGATE_CON(2), 4, GFLAGS, -+ &rk3588_uart0_fracmux), -+ GATE(SCLK_UART0, "sclk_uart0", "clk_uart0", 0, -+ RK3588_PMU_CLKGATE_CON(2), 5, GFLAGS), -+ GATE(PCLK_UART0, "pclk_uart0", "pclk_pmu0_root", 0, -+ RK3588_PMU_CLKGATE_CON(2), 6, GFLAGS), -+ GATE(PCLK_PMU1WDT, "pclk_pmu1wdt", "pclk_pmu0_root", 0, -+ RK3588_PMU_CLKGATE_CON(1), 6, GFLAGS), -+ COMPOSITE_NODIV(TCLK_PMU1WDT, "tclk_pmu1wdt", mux_24m_32k_p, 0, -+ RK3588_PMU_CLKSEL_CON(2), 6, 1, MFLAGS, -+ RK3588_PMU_CLKGATE_CON(1), 7, GFLAGS), -+ COMPOSITE(CLK_CR_PARA, "clk_cr_para", mux_24m_ppll_spll_p, 0, -+ RK3588_PMU_CLKSEL_CON(15), 5, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3588_PMU_CLKGATE_CON(4), 11, GFLAGS), -+ COMPOSITE(CLK_USB2PHY_HDPTXRXPHY_REF, "clk_usb2phy_hdptxrxphy_ref", mux_24m_ppll_p, CLK_IS_CRITICAL, -+ RK3588_PMU_CLKSEL_CON(14), 14, 1, MFLAGS, 9, 5, DFLAGS, -+ RK3588_PMU_CLKGATE_CON(4), 7, GFLAGS), -+ COMPOSITE(CLK_USBDPPHY_MIPIDCPPHY_REF, "clk_usbdpphy_mipidcpphy_ref", mux_24m_ppll_spll_p, CLK_IS_CRITICAL, -+ RK3588_PMU_CLKSEL_CON(14), 7, 2, MFLAGS, 0, 7, DFLAGS, -+ RK3588_PMU_CLKGATE_CON(4), 3, GFLAGS), ++ return 0; ++} + -+ GATE(CLK_PHY0_REF_ALT_P, "clk_phy0_ref_alt_p", "ppll", 0, -+ RK3588_PHYREF_ALT_GATE, 0, GFLAGS), -+ GATE(CLK_PHY0_REF_ALT_M, "clk_phy0_ref_alt_m", "ppll", 0, -+ RK3588_PHYREF_ALT_GATE, 1, GFLAGS), -+ GATE(CLK_PHY1_REF_ALT_P, "clk_phy1_ref_alt_p", "ppll", 0, -+ RK3588_PHYREF_ALT_GATE, 2, GFLAGS), -+ GATE(CLK_PHY1_REF_ALT_M, "clk_phy1_ref_alt_m", "ppll", 0, -+ RK3588_PHYREF_ALT_GATE, 3, GFLAGS), ++static const struct of_device_id clk_rk3328_match_table[] = { ++ { ++ .compatible = "rockchip,rk3328-cru", ++ }, ++ { } ++}; ++MODULE_DEVICE_TABLE(of, clk_rk3328_match_table); + -+ GATE(HCLK_SPDIFRX0, "hclk_spdifrx0", "hclk_vo1", 0, -+ RK3588_CLKGATE_CON(63), 12, GFLAGS), -+ GATE(HCLK_SPDIFRX1, "hclk_spdifrx1", "hclk_vo1", 0, -+ RK3588_CLKGATE_CON(63), 14, GFLAGS), -+ GATE(HCLK_SPDIFRX2, "hclk_spdifrx2", "hclk_vo1", 0, -+ RK3588_CLKGATE_CON(64), 0, GFLAGS), -+ GATE(HCLK_SPDIF4, "hclk_spdif4", "hclk_vo1", 0, -+ RK3588_CLKGATE_CON(63), 8, GFLAGS), -+ GATE(HCLK_SPDIF3, "hclk_spdif3", "hclk_vo1", 0, -+ RK3588_CLKGATE_CON(63), 4, GFLAGS), -+ GATE(HCLK_I2S6_8CH, "hclk_i2s6_8ch", "hclk_vo1", 0, -+ RK3588_CLKGATE_CON(63), 3, GFLAGS), -+ GATE(HCLK_I2S5_8CH, "hclk_i2s5_8ch", "hclk_vo1", 0, -+ RK3588_CLKGATE_CON(62), 12, GFLAGS), -+ GATE(HCLK_I2S9_8CH, "hclk_i2s9_8ch", "hclk_vo1", 0, -+ RK3588_CLKGATE_CON(65), 0, GFLAGS), -+ GATE(HCLK_I2S7_8CH, "hclk_i2s7_8ch", "hclk_vo1", 0, -+ RK3588_CLKGATE_CON(60), 0, GFLAGS), -+ GATE(HCLK_I2S10_8CH, "hclk_i2s10_8ch", "hclk_vo1", 0, -+ RK3588_CLKGATE_CON(65), 4, GFLAGS), -+ GATE(ACLK_HDCP1, "aclk_hdcp1", "aclk_hdcp1_pre", 0, -+ RK3588_CLKGATE_CON(60), 5, GFLAGS), -+ GATE(HCLK_HDCP1, "hclk_hdcp1", "hclk_vo1", 0, -+ RK3588_CLKGATE_CON(60), 6, GFLAGS), -+ GATE(HCLK_SPDIF5_DP1, "hclk_spdif5_dp1", "hclk_vo0", 0, -+ RK3588_CLKGATE_CON(57), 7, GFLAGS), -+ GATE(HCLK_SPDIF2_DP0, "hclk_spdif2_dp0", "hclk_vo0", 0, -+ RK3588_CLKGATE_CON(57), 2, GFLAGS), -+ GATE(HCLK_I2S8_8CH, "hclk_i2s8_8ch", "hclk_vo0", 0, -+ RK3588_CLKGATE_CON(56), 14, GFLAGS), -+ GATE(HCLK_I2S4_8CH, "hclk_i2s4_8ch", "hclk_vo0", 0, -+ RK3588_CLKGATE_CON(56), 10, GFLAGS), -+ GATE(ACLK_HDCP0, "aclk_hdcp0", "aclk_hdcp0_pre", 0, -+ RK3588_CLKGATE_CON(55), 12, GFLAGS), -+ GATE(HCLK_HDCP0, "hclk_hdcp0", "hclk_vo0", 0, -+ RK3588_CLKGATE_CON(55), 13, GFLAGS), -+ GATE(HCLK_RKVENC1, "hclk_rkvenc1", "hclk_rkvenc1_pre", 0, -+ RK3588_CLKGATE_CON(48), 4, GFLAGS), -+ GATE(ACLK_RKVENC1, "aclk_rkvenc1", "aclk_rkvenc1_pre", 0, -+ RK3588_CLKGATE_CON(48), 5, GFLAGS), -+ GATE(ACLK_VPU, "aclk_vpu", "aclk_vdpu_low_pre", 0, -+ RK3588_CLKGATE_CON(44), 8, GFLAGS), -+ GATE(ACLK_IEP2P0, "aclk_iep2p0", "aclk_vdpu_low_pre", 0, -+ RK3588_CLKGATE_CON(45), 5, GFLAGS), -+ GATE(ACLK_JPEG_ENCODER0, "aclk_jpeg_encoder0", "aclk_vdpu_low_pre", 0, -+ RK3588_CLKGATE_CON(44), 10, GFLAGS), -+ GATE(ACLK_JPEG_ENCODER1, "aclk_jpeg_encoder1", "aclk_vdpu_low_pre", 0, -+ RK3588_CLKGATE_CON(44), 12, GFLAGS), -+ GATE(ACLK_JPEG_ENCODER2, "aclk_jpeg_encoder2", "aclk_vdpu_low_pre", 0, -+ RK3588_CLKGATE_CON(44), 14, GFLAGS), -+ GATE(ACLK_JPEG_ENCODER3, "aclk_jpeg_encoder3", "aclk_vdpu_low_pre", 0, -+ RK3588_CLKGATE_CON(45), 0, GFLAGS), -+ GATE(ACLK_JPEG_DECODER, "aclk_jpeg_decoder", "aclk_jpeg_decoder_pre", 0, -+ RK3588_CLKGATE_CON(45), 2, GFLAGS), -+ GATE(ACLK_USB3OTG1, "aclk_usb3otg1", "aclk_usb", 0, -+ RK3588_CLKGATE_CON(42), 7, GFLAGS), -+ GATE(HCLK_HOST0, "hclk_host0", "hclk_usb", 0, -+ RK3588_CLKGATE_CON(42), 10, GFLAGS), -+ GATE(HCLK_HOST_ARB0, "hclk_host_arb0", "hclk_usb", 0, -+ RK3588_CLKGATE_CON(42), 11, GFLAGS), -+ GATE(HCLK_HOST1, "hclk_host1", "hclk_usb", 0, -+ RK3588_CLKGATE_CON(42), 12, GFLAGS), -+ GATE(HCLK_HOST_ARB1, "hclk_host_arb1", "hclk_usb", 0, -+ RK3588_CLKGATE_CON(42), 13, GFLAGS), -+ GATE(ACLK_USB3OTG0, "aclk_usb3otg0", "aclk_usb", 0, -+ RK3588_CLKGATE_CON(42), 4, GFLAGS), -+ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "scmi_cclk_sd", RK3588_SDMMC_CON0, 1), -+ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "scmi_cclk_sd", RK3588_SDMMC_CON1, 1), -+ GATE(HCLK_SDIO, "hclk_sdio", "hclk_sdio_pre", 0, -+ RK3588_CLKGATE_CON(75), 2, GFLAGS), -+ GATE(HCLK_RKVDEC1, "hclk_rkvdec1", "hclk_rkvdec1_pre", 0, -+ RK3588_CLKGATE_CON(41), 2, GFLAGS), -+ GATE(ACLK_RKVDEC1, "aclk_rkvdec1", "aclk_rkvdec1_pre", 0, -+ RK3588_CLKGATE_CON(41), 3, GFLAGS), -+ GATE(HCLK_RKVDEC0, "hclk_rkvdec0", "hclk_rkvdec0_pre", 0, -+ RK3588_CLKGATE_CON(40), 3, GFLAGS), -+ GATE(ACLK_RKVDEC0, "aclk_rkvdec0", "aclk_rkvdec0_pre", 0, -+ RK3588_CLKGATE_CON(40), 4, GFLAGS), -+ GATE(CLK_PCIE4L_PIPE, "clk_pcie4l_pipe", "clk_pipe30phy_pipe0_i", 0, -+ RK3588_CLKGATE_CON(39), 0, GFLAGS), -+ GATE(CLK_PCIE2L_PIPE, "clk_pcie2l_pipe", "clk_pipe30phy_pipe2_i", 0, -+ RK3588_CLKGATE_CON(39), 1, GFLAGS), -+ GATE(CLK_PIPEPHY0_PIPE_G, "clk_pipephy0_pipe_g", "clk_pipephy0_pipe_i", 0, -+ RK3588_CLKGATE_CON(38), 3, GFLAGS), -+ GATE(CLK_PIPEPHY1_PIPE_G, "clk_pipephy1_pipe_g", "clk_pipephy1_pipe_i", 0, -+ RK3588_CLKGATE_CON(38), 4, GFLAGS), -+ GATE(CLK_PIPEPHY2_PIPE_G, "clk_pipephy2_pipe_g", "clk_pipephy2_pipe_i", 0, -+ RK3588_CLKGATE_CON(38), 5, GFLAGS), -+ GATE(CLK_PIPEPHY0_PIPE_ASIC_G, "clk_pipephy0_pipe_asic_g", "clk_pipephy0_pipe_i", 0, -+ RK3588_CLKGATE_CON(38), 6, GFLAGS), -+ GATE(CLK_PIPEPHY1_PIPE_ASIC_G, "clk_pipephy1_pipe_asic_g", "clk_pipephy1_pipe_i", 0, -+ RK3588_CLKGATE_CON(38), 7, GFLAGS), -+ GATE(CLK_PIPEPHY2_PIPE_ASIC_G, "clk_pipephy2_pipe_asic_g", "clk_pipephy2_pipe_i", 0, -+ RK3588_CLKGATE_CON(38), 8, GFLAGS), -+ GATE(CLK_PIPEPHY2_PIPE_U3_G, "clk_pipephy2_pipe_u3_g", "clk_pipephy2_pipe_i", 0, -+ RK3588_CLKGATE_CON(38), 9, GFLAGS), -+ GATE(CLK_PCIE1L2_PIPE, "clk_pcie1l2_pipe", "clk_pipephy0_pipe_g", 0, -+ RK3588_CLKGATE_CON(38), 13, GFLAGS), -+ GATE(CLK_PCIE1L0_PIPE, "clk_pcie1l0_pipe", "clk_pipephy1_pipe_g", 0, -+ RK3588_CLKGATE_CON(38), 14, GFLAGS), -+ GATE(CLK_PCIE1L1_PIPE, "clk_pcie1l1_pipe", "clk_pipephy2_pipe_g", 0, -+ RK3588_CLKGATE_CON(38), 15, GFLAGS), -+ GATE(HCLK_SFC, "hclk_sfc", "hclk_nvm", 0, -+ RK3588_CLKGATE_CON(31), 10, GFLAGS), -+ GATE(HCLK_SFC_XIP, "hclk_sfc_xip", "hclk_nvm", 0, -+ RK3588_CLKGATE_CON(31), 11, GFLAGS), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_nvm", 0, -+ RK3588_CLKGATE_CON(31), 4, GFLAGS), -+ GATE(ACLK_ISP1, "aclk_isp1", "aclk_isp1_pre", 0, -+ RK3588_CLKGATE_CON(26), 5, GFLAGS), -+ GATE(HCLK_ISP1, "hclk_isp1", "hclk_isp1_pre", 0, -+ RK3588_CLKGATE_CON(26), 7, GFLAGS), -+ GATE(PCLK_AV1, "pclk_av1", "pclk_av1_pre", 0, -+ RK3588_CLKGATE_CON(68), 5, GFLAGS), -+ GATE(ACLK_AV1, "aclk_av1", "aclk_av1_pre", 0, -+ RK3588_CLKGATE_CON(68), 2, GFLAGS), ++static struct platform_driver clk_rk3328_driver = { ++ .driver = { ++ .name = "clk-rk3328", ++ .of_match_table = clk_rk3328_match_table, ++ }, +}; ++builtin_platform_driver_probe(clk_rk3328_driver, clk_rk3328_probe); + -+static void __iomem *rk3588_cru_base; ++MODULE_DESCRIPTION("Rockchip RK3328 Clock Driver"); ++MODULE_LICENSE("GPL"); +diff --git a/drivers/clk/rockchip-oh/clk-rk3368.c b/drivers/clk/rockchip-oh/clk-rk3368.c +new file mode 100644 +index 000000000..3ddff548e +--- /dev/null ++++ b/drivers/clk/rockchip-oh/clk-rk3368.c +@@ -0,0 +1,956 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later ++/* ++ * Copyright (c) 2015 Heiko Stuebner ++ */ + -+static void dump_offset(const char *name, u32 offset, u32 len) -+{ -+ int i = 0, cnt = 0; ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "clk.h" + -+ if (!offset) -+ return; ++#define RK3368_GRF_SOC_STATUS0 0x480 + -+ cnt = DIV_ROUND_UP(len, 32); -+ for (i = 0; i < cnt; i++) { -+ pr_warn("%-12s 0x%05x: ", name, offset + i * 32); -+ print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 32, 4, -+ rk3588_cru_base + offset + i * 0x10, 32, false); ++enum rk3368_plls { ++ apllb, aplll, dpll, cpll, gpll, npll, ++}; ++ ++static struct rockchip_pll_rate_table rk3368_pll_rates[] = { ++ RK3066_PLL_RATE(2208000000, 1, 92, 1), ++ RK3066_PLL_RATE(2184000000, 1, 91, 1), ++ RK3066_PLL_RATE(2160000000, 1, 90, 1), ++ RK3066_PLL_RATE(2136000000, 1, 89, 1), ++ RK3066_PLL_RATE(2112000000, 1, 88, 1), ++ RK3066_PLL_RATE(2088000000, 1, 87, 1), ++ RK3066_PLL_RATE(2064000000, 1, 86, 1), ++ RK3066_PLL_RATE(2040000000, 1, 85, 1), ++ RK3066_PLL_RATE(2016000000, 1, 84, 1), ++ RK3066_PLL_RATE(1992000000, 1, 83, 1), ++ RK3066_PLL_RATE(1968000000, 1, 82, 1), ++ RK3066_PLL_RATE(1944000000, 1, 81, 1), ++ RK3066_PLL_RATE(1920000000, 1, 80, 1), ++ RK3066_PLL_RATE(1896000000, 1, 79, 1), ++ RK3066_PLL_RATE(1872000000, 1, 78, 1), ++ RK3066_PLL_RATE(1848000000, 1, 77, 1), ++ RK3066_PLL_RATE(1824000000, 1, 76, 1), ++ RK3066_PLL_RATE(1800000000, 1, 75, 1), ++ RK3066_PLL_RATE(1776000000, 1, 74, 1), ++ RK3066_PLL_RATE(1752000000, 1, 73, 1), ++ RK3066_PLL_RATE(1728000000, 1, 72, 1), ++ RK3066_PLL_RATE(1704000000, 1, 71, 1), ++ RK3066_PLL_RATE(1680000000, 1, 70, 1), ++ RK3066_PLL_RATE(1656000000, 1, 69, 1), ++ RK3066_PLL_RATE(1632000000, 1, 68, 1), ++ RK3066_PLL_RATE(1608000000, 1, 67, 1), ++ RK3066_PLL_RATE(1560000000, 1, 65, 1), ++ RK3066_PLL_RATE(1512000000, 1, 63, 1), ++ RK3066_PLL_RATE(1488000000, 1, 62, 1), ++ RK3066_PLL_RATE(1464000000, 1, 61, 1), ++ RK3066_PLL_RATE(1440000000, 1, 60, 1), ++ RK3066_PLL_RATE(1416000000, 1, 59, 1), ++ RK3066_PLL_RATE(1392000000, 1, 58, 1), ++ RK3066_PLL_RATE(1368000000, 1, 57, 1), ++ RK3066_PLL_RATE(1344000000, 1, 56, 1), ++ RK3066_PLL_RATE(1320000000, 1, 55, 1), ++ RK3066_PLL_RATE(1296000000, 1, 54, 1), ++ RK3066_PLL_RATE(1272000000, 1, 53, 1), ++ RK3066_PLL_RATE(1248000000, 1, 52, 1), ++ RK3066_PLL_RATE(1224000000, 1, 51, 1), ++ RK3066_PLL_RATE(1200000000, 1, 50, 1), ++ RK3066_PLL_RATE(1176000000, 1, 49, 1), ++ RK3066_PLL_RATE(1128000000, 1, 47, 1), ++ RK3066_PLL_RATE(1104000000, 1, 46, 1), ++ RK3066_PLL_RATE(1008000000, 1, 84, 2), ++ RK3066_PLL_RATE( 912000000, 1, 76, 2), ++ RK3066_PLL_RATE( 888000000, 1, 74, 2), ++ RK3066_PLL_RATE( 816000000, 1, 68, 2), ++ RK3066_PLL_RATE( 792000000, 1, 66, 2), ++ RK3066_PLL_RATE( 696000000, 1, 58, 2), ++ RK3066_PLL_RATE( 672000000, 1, 56, 2), ++ RK3066_PLL_RATE( 648000000, 1, 54, 2), ++ RK3066_PLL_RATE( 624000000, 1, 52, 2), ++ RK3066_PLL_RATE( 600000000, 1, 50, 2), ++ RK3066_PLL_RATE( 576000000, 1, 48, 2), ++ RK3066_PLL_RATE( 552000000, 1, 46, 2), ++ RK3066_PLL_RATE( 528000000, 1, 88, 4), ++ RK3066_PLL_RATE( 504000000, 1, 84, 4), ++ RK3066_PLL_RATE( 480000000, 1, 80, 4), ++ RK3066_PLL_RATE( 456000000, 1, 76, 4), ++ RK3066_PLL_RATE( 408000000, 1, 68, 4), ++ RK3066_PLL_RATE( 312000000, 1, 52, 4), ++ RK3066_PLL_RATE( 252000000, 1, 84, 8), ++ RK3066_PLL_RATE( 216000000, 1, 72, 8), ++ RK3066_PLL_RATE( 126000000, 2, 84, 8), ++ RK3066_PLL_RATE( 48000000, 2, 32, 8), ++ { /* sentinel */ }, ++}; ++ ++static struct rockchip_pll_rate_table rk3368_npll_rates[] = { ++ RK3066_PLL_RATE_NB(594000000, 1, 99, 4, 32), ++ RK3066_PLL_RATE_NB(585000000, 6, 585, 4, 32), ++ RK3066_PLL_RATE_NB(432000000, 3, 216, 4, 32), ++ RK3066_PLL_RATE_NB(426000000, 3, 213, 4, 32), ++ RK3066_PLL_RATE_NB(400000000, 1, 100, 6, 32), ++ RK3066_PLL_RATE_NB(342000000, 3, 171, 4, 32), ++ RK3066_PLL_RATE_NB(297000000, 2, 198, 8, 16), ++ RK3066_PLL_RATE_NB(270000000, 1, 135, 12, 32), ++ RK3066_PLL_RATE_NB(260000000, 1, 130, 12, 32), ++ RK3066_PLL_RATE_NB(148500000, 1, 99, 16, 32), ++ RK3066_PLL_RATE_NB(146250000, 6, 585, 16, 32), ++ RK3066_PLL_RATE_NB(108000000, 1, 54, 12, 32), ++ RK3066_PLL_RATE_NB(106500000, 4, 213, 12, 32), ++ RK3066_PLL_RATE_NB(85500000, 4, 171, 12, 32), ++ RK3066_PLL_RATE_NB(74250000, 4, 198, 16, 32), ++}; ++ ++PNAME(mux_pll_p) = { "xin24m", "xin32k" }; ++PNAME(mux_ddrphy_p) = { "dpll_ddr", "gpll_ddr" }; ++PNAME(mux_cs_src_p) = { "apllb_cs", "aplll_cs", "gpll_cs"}; ++PNAME(mux_aclk_bus_src_p) = { "cpll_aclk_bus", "gpll_aclk_bus" }; ++ ++PNAME(mux_pll_src_cpll_gpll_p) = { "cpll", "gpll" }; ++PNAME(mux_pll_src_cpll_gpll_npll_p) = { "cpll", "gpll", "dummy_npll" }; ++PNAME(mux_pll_src_dmycpll_dmygpll_npll_p) = { "dummy_cpll", "dummy_gpll", "npll" }; ++PNAME(mux_pll_src_npll_cpll_gpll_p) = { "dummy_npll", "cpll", "gpll" }; ++PNAME(mux_pll_src_cpll_gpll_usb_p) = { "cpll", "gpll", "usbphy_480m" }; ++PNAME(mux_pll_src_cpll_gpll_usb_usb_p) = { "cpll", "gpll", "usbphy_480m", ++ "usbphy_480m" }; ++PNAME(mux_pll_src_cpll_gpll_usb_npll_p) = { "cpll", "gpll", "usbphy_480m", ++ "dummy_npll" }; ++PNAME(mux_pll_src_cpll_gpll_npll_npll_p) = { "cpll", "gpll", "dummy_npll", "dummy_npll" }; ++PNAME(mux_pll_src_cpll_gpll_npll_usb_p) = { "cpll", "gpll", "dummy_npll", ++ "usbphy_480m" }; ++ ++PNAME(mux_i2s_8ch_pre_p) = { "i2s_8ch_src", "i2s_8ch_frac", ++ "ext_i2s", "xin12m" }; ++PNAME(mux_i2s_8ch_clkout_p) = { "i2s_8ch_pre", "xin12m" }; ++PNAME(mux_i2s_2ch_p) = { "i2s_2ch_src", "i2s_2ch_frac", ++ "dummy", "xin12m" }; ++PNAME(mux_spdif_8ch_p) = { "spdif_8ch_pre", "spdif_8ch_frac", ++ "ext_i2s", "xin12m" }; ++PNAME(mux_edp_24m_p) = { "xin24m", "dummy" }; ++PNAME(mux_vip_out_p) = { "vip_src", "xin24m" }; ++PNAME(mux_usbphy480m_p) = { "usbotg_out", "xin24m" }; ++PNAME(mux_hsic_usbphy480m_p) = { "usbotg_out", "dummy" }; ++PNAME(mux_hsicphy480m_p) = { "cpll", "gpll", "usbphy_480m" }; ++PNAME(mux_uart0_p) = { "uart0_src", "uart0_frac", "xin24m" }; ++PNAME(mux_uart1_p) = { "uart1_src", "uart1_frac", "xin24m" }; ++PNAME(mux_uart2_p) = { "uart2_src", "xin24m" }; ++PNAME(mux_uart3_p) = { "uart3_src", "uart3_frac", "xin24m" }; ++PNAME(mux_uart4_p) = { "uart4_src", "uart4_frac", "xin24m" }; ++PNAME(mux_mac_p) = { "mac_pll_src", "ext_gmac" }; ++PNAME(mux_mmc_src_p) = { "cpll", "gpll", "usbphy_480m", "xin24m" }; ++ ++static struct rockchip_pll_clock rk3368_pll_clks[] __initdata = { ++ [apllb] = PLL(pll_rk3066, PLL_APLLB, "apllb", mux_pll_p, 0, RK3368_PLL_CON(0), ++ RK3368_PLL_CON(3), 8, 1, 0, rk3368_pll_rates), ++ [aplll] = PLL(pll_rk3066, PLL_APLLL, "aplll", mux_pll_p, 0, RK3368_PLL_CON(4), ++ RK3368_PLL_CON(7), 8, 0, 0, rk3368_pll_rates), ++ [dpll] = PLL(pll_rk3066, PLL_DPLL, "dpll", mux_pll_p, 0, RK3368_PLL_CON(8), ++ RK3368_PLL_CON(11), 8, 2, 0, NULL), ++ [cpll] = PLL(pll_rk3066, PLL_CPLL, "cpll", mux_pll_p, 0, RK3368_PLL_CON(12), ++ RK3368_PLL_CON(15), 8, 3, ROCKCHIP_PLL_SYNC_RATE, rk3368_pll_rates), ++ [gpll] = PLL(pll_rk3066, PLL_GPLL, "gpll", mux_pll_p, 0, RK3368_PLL_CON(16), ++ RK3368_PLL_CON(19), 8, 4, ROCKCHIP_PLL_SYNC_RATE, rk3368_pll_rates), ++ [npll] = PLL(pll_rk3066, PLL_NPLL, "npll", mux_pll_p, 0, RK3368_PLL_CON(20), ++ RK3368_PLL_CON(23), 8, 5, 0, rk3368_npll_rates), ++}; ++ ++static struct clk_div_table div_ddrphy_t[] = { ++ { .val = 0, .div = 1 }, ++ { .val = 1, .div = 2 }, ++ { .val = 3, .div = 4 }, ++ { /* sentinel */ }, ++}; ++ ++#define MFLAGS CLK_MUX_HIWORD_MASK ++#define DFLAGS CLK_DIVIDER_HIWORD_MASK ++#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) ++#define IFLAGS ROCKCHIP_INVERTER_HIWORD_MASK ++ ++static const struct rockchip_cpuclk_reg_data rk3368_cpuclkb_data = { ++ .core_reg[0] = RK3368_CLKSEL_CON(0), ++ .div_core_shift[0] = 0, ++ .div_core_mask[0] = 0x1f, ++ .num_cores = 1, ++ .mux_core_alt = 1, ++ .mux_core_main = 0, ++ .mux_core_shift = 7, ++ .mux_core_mask = 0x1, ++}; ++ ++static const struct rockchip_cpuclk_reg_data rk3368_cpuclkl_data = { ++ .core_reg[0] = RK3368_CLKSEL_CON(2), ++ .div_core_shift[0] = 0, ++ .mux_core_alt = 1, ++ .num_cores = 1, ++ .mux_core_main = 0, ++ .div_core_mask[0] = 0x1f, ++ .mux_core_shift = 7, ++ .mux_core_mask = 0x1, ++}; ++ ++#define RK3368_DIV_ACLKM_MASK 0x1f ++#define RK3368_DIV_ACLKM_SHIFT 8 ++#define RK3368_DIV_ATCLK_MASK 0x1f ++#define RK3368_DIV_ATCLK_SHIFT 0 ++#define RK3368_DIV_PCLK_DBG_MASK 0x1f ++#define RK3368_DIV_PCLK_DBG_SHIFT 8 ++ ++#define RK3368_CLKSEL0(_offs, _aclkm) \ ++ { \ ++ .reg = RK3368_CLKSEL_CON(0 + _offs), \ ++ .val = HIWORD_UPDATE(_aclkm, RK3368_DIV_ACLKM_MASK, \ ++ RK3368_DIV_ACLKM_SHIFT), \ ++ } ++#define RK3368_CLKSEL1(_offs, _atclk, _pdbg) \ ++ { \ ++ .reg = RK3368_CLKSEL_CON(1 + _offs), \ ++ .val = HIWORD_UPDATE(_atclk, RK3368_DIV_ATCLK_MASK, \ ++ RK3368_DIV_ATCLK_SHIFT) | \ ++ HIWORD_UPDATE(_pdbg, RK3368_DIV_PCLK_DBG_MASK, \ ++ RK3368_DIV_PCLK_DBG_SHIFT), \ + } -+} + -+static void rk3588_dump_cru(void) ++/* cluster_b: aclkm in clksel0, rest in clksel1 */ ++#define RK3368_CPUCLKB_RATE(_prate, _aclkm, _atclk, _pdbg) \ ++ { \ ++ .prate = _prate, \ ++ .divs = { \ ++ RK3368_CLKSEL0(0, _aclkm), \ ++ RK3368_CLKSEL1(0, _atclk, _pdbg), \ ++ }, \ ++ } ++ ++/* cluster_l: aclkm in clksel2, rest in clksel3 */ ++#define RK3368_CPUCLKL_RATE(_prate, _aclkm, _atclk, _pdbg) \ ++ { \ ++ .prate = _prate, \ ++ .divs = { \ ++ RK3368_CLKSEL0(2, _aclkm), \ ++ RK3368_CLKSEL1(2, _atclk, _pdbg), \ ++ }, \ ++ } ++ ++static struct rockchip_cpuclk_rate_table rk3368_cpuclkb_rates[] __initdata = { ++ RK3368_CPUCLKB_RATE(1512000000, 1, 5, 5), ++ RK3368_CPUCLKB_RATE(1488000000, 1, 4, 4), ++ RK3368_CPUCLKB_RATE(1416000000, 1, 4, 4), ++ RK3368_CPUCLKB_RATE(1200000000, 1, 3, 3), ++ RK3368_CPUCLKB_RATE(1008000000, 1, 3, 3), ++ RK3368_CPUCLKB_RATE( 816000000, 1, 2, 2), ++ RK3368_CPUCLKB_RATE( 696000000, 1, 2, 2), ++ RK3368_CPUCLKB_RATE( 600000000, 1, 1, 1), ++ RK3368_CPUCLKB_RATE( 408000000, 1, 1, 1), ++ RK3368_CPUCLKB_RATE( 312000000, 1, 1, 1), ++}; ++ ++static struct rockchip_cpuclk_rate_table rk3368_cpuclkl_rates[] __initdata = { ++ RK3368_CPUCLKL_RATE(1512000000, 1, 6, 6), ++ RK3368_CPUCLKL_RATE(1488000000, 1, 5, 5), ++ RK3368_CPUCLKL_RATE(1416000000, 1, 5, 5), ++ RK3368_CPUCLKL_RATE(1200000000, 1, 4, 4), ++ RK3368_CPUCLKL_RATE(1008000000, 1, 4, 4), ++ RK3368_CPUCLKL_RATE( 816000000, 1, 3, 3), ++ RK3368_CPUCLKL_RATE( 696000000, 1, 2, 2), ++ RK3368_CPUCLKL_RATE( 600000000, 1, 2, 2), ++ RK3368_CPUCLKL_RATE( 408000000, 1, 1, 1), ++ RK3368_CPUCLKL_RATE( 312000000, 1, 1, 1), ++}; ++ ++static struct rockchip_clk_branch rk3368_i2s_8ch_fracmux __initdata = ++ MUX(0, "i2s_8ch_pre", mux_i2s_8ch_pre_p, CLK_SET_RATE_PARENT, ++ RK3368_CLKSEL_CON(27), 8, 2, MFLAGS); ++ ++static struct rockchip_clk_branch rk3368_spdif_8ch_fracmux __initdata = ++ MUX(0, "spdif_8ch_pre", mux_spdif_8ch_p, CLK_SET_RATE_PARENT, ++ RK3368_CLKSEL_CON(31), 8, 2, MFLAGS); ++ ++static struct rockchip_clk_branch rk3368_i2s_2ch_fracmux __initdata = ++ MUX(0, "i2s_2ch_pre", mux_i2s_2ch_p, CLK_SET_RATE_PARENT, ++ RK3368_CLKSEL_CON(53), 8, 2, MFLAGS); ++ ++static struct rockchip_clk_branch rk3368_uart0_fracmux __initdata = ++ MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT, ++ RK3368_CLKSEL_CON(33), 8, 2, MFLAGS); ++ ++static struct rockchip_clk_branch rk3368_uart1_fracmux __initdata = ++ MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT, ++ RK3368_CLKSEL_CON(35), 8, 2, MFLAGS); ++ ++static struct rockchip_clk_branch rk3368_uart3_fracmux __initdata = ++ MUX(SCLK_UART3, "sclk_uart3", mux_uart3_p, CLK_SET_RATE_PARENT, ++ RK3368_CLKSEL_CON(39), 8, 2, MFLAGS); ++ ++static struct rockchip_clk_branch rk3368_uart4_fracmux __initdata = ++ MUX(SCLK_UART4, "sclk_uart4", mux_uart4_p, CLK_SET_RATE_PARENT, ++ RK3368_CLKSEL_CON(41), 8, 2, MFLAGS); ++ ++static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = { ++ /* ++ * Clock-Architecture Diagram 2 ++ */ ++ ++ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), ++ ++ MUX(SCLK_USBPHY480M, "usbphy_480m", mux_usbphy480m_p, CLK_SET_RATE_PARENT, ++ RK3368_CLKSEL_CON(13), 8, 1, MFLAGS), ++ ++ GATE(0, "apllb_core", "apllb", CLK_IGNORE_UNUSED, ++ RK3368_CLKGATE_CON(0), 0, GFLAGS), ++ GATE(0, "gpllb_core", "gpll", CLK_IGNORE_UNUSED, ++ RK3368_CLKGATE_CON(0), 1, GFLAGS), ++ ++ GATE(0, "aplll_core", "aplll", CLK_IGNORE_UNUSED, ++ RK3368_CLKGATE_CON(0), 4, GFLAGS), ++ GATE(0, "gplll_core", "gpll", CLK_IGNORE_UNUSED, ++ RK3368_CLKGATE_CON(0), 5, GFLAGS), ++ ++ DIV(0, "aclkm_core_b", "armclkb", 0, ++ RK3368_CLKSEL_CON(0), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY), ++ DIV(0, "atclk_core_b", "armclkb", 0, ++ RK3368_CLKSEL_CON(1), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY), ++ DIV(0, "pclk_dbg_b", "armclkb", 0, ++ RK3368_CLKSEL_CON(1), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY), ++ ++ DIV(0, "aclkm_core_l", "armclkl", 0, ++ RK3368_CLKSEL_CON(2), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY), ++ DIV(0, "atclk_core_l", "armclkl", 0, ++ RK3368_CLKSEL_CON(3), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY), ++ DIV(0, "pclk_dbg_l", "armclkl", 0, ++ RK3368_CLKSEL_CON(3), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY), ++ ++ GATE(0, "apllb_cs", "apllb", CLK_IGNORE_UNUSED, ++ RK3368_CLKGATE_CON(0), 9, GFLAGS), ++ GATE(0, "aplll_cs", "aplll", CLK_IGNORE_UNUSED, ++ RK3368_CLKGATE_CON(0), 10, GFLAGS), ++ GATE(0, "gpll_cs", "gpll", CLK_IGNORE_UNUSED, ++ RK3368_CLKGATE_CON(0), 8, GFLAGS), ++ COMPOSITE_NOGATE(0, "sclk_cs_pre", mux_cs_src_p, CLK_IGNORE_UNUSED, ++ RK3368_CLKSEL_CON(4), 6, 2, MFLAGS, 0, 5, DFLAGS), ++ COMPOSITE_NOMUX(0, "clkin_trace", "sclk_cs_pre", CLK_IGNORE_UNUSED, ++ RK3368_CLKSEL_CON(4), 8, 5, DFLAGS, ++ RK3368_CLKGATE_CON(0), 13, GFLAGS), ++ ++ COMPOSITE(ACLK_CCI_PRE, "aclk_cci_pre", mux_pll_src_cpll_gpll_usb_npll_p, CLK_IGNORE_UNUSED, ++ RK3368_CLKSEL_CON(5), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3368_CLKGATE_CON(0), 12, GFLAGS), ++ GATE(SCLK_PVTM_CORE, "sclk_pvtm_core", "xin24m", 0, RK3368_CLKGATE_CON(7), 10, GFLAGS), ++ ++ GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED, ++ RK3368_CLKGATE_CON(1), 8, GFLAGS), ++ GATE(0, "gpll_ddr", "gpll", 0, ++ RK3368_CLKGATE_CON(1), 9, GFLAGS), ++ COMPOSITE_NOGATE_DIVTBL(0, "ddrphy_src", mux_ddrphy_p, CLK_IGNORE_UNUSED, ++ RK3368_CLKSEL_CON(13), 4, 1, MFLAGS, 0, 2, DFLAGS, div_ddrphy_t), ++ ++ FACTOR_GATE(0, "sclk_ddr", "ddrphy_src", CLK_IGNORE_UNUSED, 1, 4, ++ RK3368_CLKGATE_CON(6), 14, GFLAGS), ++ GATE(0, "sclk_ddr4x", "ddrphy_src", CLK_IGNORE_UNUSED, ++ RK3368_CLKGATE_CON(6), 15, GFLAGS), ++ ++ GATE(0, "gpll_aclk_bus", "gpll", CLK_IS_CRITICAL, ++ RK3368_CLKGATE_CON(1), 10, GFLAGS), ++ GATE(0, "cpll_aclk_bus", "cpll", CLK_IS_CRITICAL, ++ RK3368_CLKGATE_CON(1), 11, GFLAGS), ++ COMPOSITE_NOGATE(0, "aclk_bus_src", mux_aclk_bus_src_p, CLK_IS_CRITICAL, ++ RK3368_CLKSEL_CON(8), 7, 1, MFLAGS, 0, 5, DFLAGS), ++ ++ GATE(ACLK_BUS, "aclk_bus", "aclk_bus_src", CLK_IS_CRITICAL, ++ RK3368_CLKGATE_CON(1), 0, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_BUS, "pclk_bus", "aclk_bus_src", CLK_IS_CRITICAL, ++ RK3368_CLKSEL_CON(8), 12, 3, DFLAGS, ++ RK3368_CLKGATE_CON(1), 2, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_BUS, "hclk_bus", "aclk_bus_src", CLK_IS_CRITICAL, ++ RK3368_CLKSEL_CON(8), 8, 2, DFLAGS, ++ RK3368_CLKGATE_CON(1), 1, GFLAGS), ++ COMPOSITE_NOMUX(0, "sclk_crypto", "aclk_bus_src", 0, ++ RK3368_CLKSEL_CON(10), 14, 2, DFLAGS, ++ RK3368_CLKGATE_CON(7), 2, GFLAGS), ++ ++ COMPOSITE(0, "fclk_mcu_src", mux_pll_src_cpll_gpll_p, CLK_IGNORE_UNUSED, ++ RK3368_CLKSEL_CON(12), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3368_CLKGATE_CON(1), 3, GFLAGS), ++ /* ++ * stclk_mcu is listed as child of fclk_mcu_src in diagram 5, ++ * but stclk_mcu has an additional own divider in diagram 2 ++ */ ++ COMPOSITE_NOMUX(0, "stclk_mcu", "fclk_mcu_src", CLK_IGNORE_UNUSED, ++ RK3368_CLKSEL_CON(12), 8, 3, DFLAGS, ++ RK3368_CLKGATE_CON(13), 13, GFLAGS), ++ ++ COMPOSITE(0, "i2s_8ch_src", mux_pll_src_cpll_gpll_p, 0, ++ RK3368_CLKSEL_CON(27), 12, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3368_CLKGATE_CON(6), 1, GFLAGS), ++ COMPOSITE_FRACMUX(0, "i2s_8ch_frac", "i2s_8ch_src", CLK_SET_RATE_PARENT, ++ RK3368_CLKSEL_CON(28), 0, ++ RK3368_CLKGATE_CON(6), 2, GFLAGS, ++ &rk3368_i2s_8ch_fracmux), ++ COMPOSITE_NODIV(SCLK_I2S_8CH_OUT, "i2s_8ch_clkout", mux_i2s_8ch_clkout_p, 0, ++ RK3368_CLKSEL_CON(27), 15, 1, MFLAGS, ++ RK3368_CLKGATE_CON(6), 0, GFLAGS), ++ GATE(SCLK_I2S_8CH, "sclk_i2s_8ch", "i2s_8ch_pre", CLK_SET_RATE_PARENT, ++ RK3368_CLKGATE_CON(6), 3, GFLAGS), ++ COMPOSITE(0, "spdif_8ch_src", mux_pll_src_cpll_gpll_p, 0, ++ RK3368_CLKSEL_CON(31), 12, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3368_CLKGATE_CON(6), 4, GFLAGS), ++ COMPOSITE_FRACMUX(0, "spdif_8ch_frac", "spdif_8ch_src", CLK_SET_RATE_PARENT, ++ RK3368_CLKSEL_CON(32), 0, ++ RK3368_CLKGATE_CON(6), 5, GFLAGS, ++ &rk3368_spdif_8ch_fracmux), ++ GATE(SCLK_SPDIF_8CH, "sclk_spdif_8ch", "spdif_8ch_pre", CLK_SET_RATE_PARENT, ++ RK3368_CLKGATE_CON(6), 6, GFLAGS), ++ COMPOSITE(0, "i2s_2ch_src", mux_pll_src_cpll_gpll_p, 0, ++ RK3368_CLKSEL_CON(53), 12, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3368_CLKGATE_CON(5), 13, GFLAGS), ++ COMPOSITE_FRACMUX(0, "i2s_2ch_frac", "i2s_2ch_src", CLK_SET_RATE_PARENT, ++ RK3368_CLKSEL_CON(54), 0, ++ RK3368_CLKGATE_CON(5), 14, GFLAGS, ++ &rk3368_i2s_2ch_fracmux), ++ GATE(SCLK_I2S_2CH, "sclk_i2s_2ch", "i2s_2ch_pre", CLK_SET_RATE_PARENT, ++ RK3368_CLKGATE_CON(5), 15, GFLAGS), ++ ++ COMPOSITE(0, "sclk_tsp", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3368_CLKSEL_CON(46), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3368_CLKGATE_CON(6), 12, GFLAGS), ++ GATE(0, "sclk_hsadc_tsp", "ext_hsadc_tsp", 0, ++ RK3368_CLKGATE_CON(13), 7, GFLAGS), ++ ++ MUX(0, "uart_src", mux_pll_src_cpll_gpll_p, 0, ++ RK3368_CLKSEL_CON(35), 12, 1, MFLAGS), ++ COMPOSITE_NOMUX(0, "uart2_src", "uart_src", 0, ++ RK3368_CLKSEL_CON(37), 0, 7, DFLAGS, ++ RK3368_CLKGATE_CON(2), 4, GFLAGS), ++ MUX(SCLK_UART2, "sclk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT, ++ RK3368_CLKSEL_CON(37), 8, 1, MFLAGS), ++ ++ /* ++ * Clock-Architecture Diagram 3 ++ */ ++ ++ COMPOSITE(0, "aclk_vepu", mux_pll_src_cpll_gpll_npll_usb_p, 0, ++ RK3368_CLKSEL_CON(15), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3368_CLKGATE_CON(4), 6, GFLAGS), ++ COMPOSITE(0, "aclk_vdpu", mux_pll_src_cpll_gpll_npll_usb_p, 0, ++ RK3368_CLKSEL_CON(15), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3368_CLKGATE_CON(4), 7, GFLAGS), ++ ++ /* ++ * We use aclk_vdpu by default ---GRF_SOC_CON0[7] setting in system, ++ * so we ignore the mux and make clocks nodes as following, ++ */ ++ FACTOR_GATE(0, "hclk_video_pre", "aclk_vdpu", 0, 1, 4, ++ RK3368_CLKGATE_CON(4), 8, GFLAGS), ++ ++ COMPOSITE(0, "sclk_hevc_cabac_src", mux_pll_src_cpll_gpll_npll_usb_p, 0, ++ RK3368_CLKSEL_CON(17), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3368_CLKGATE_CON(5), 1, GFLAGS), ++ COMPOSITE(0, "sclk_hevc_core_src", mux_pll_src_cpll_gpll_npll_usb_p, 0, ++ RK3368_CLKSEL_CON(17), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3368_CLKGATE_CON(5), 2, GFLAGS), ++ ++ COMPOSITE(0, "aclk_vio0", mux_pll_src_cpll_gpll_usb_p, CLK_IGNORE_UNUSED, ++ RK3368_CLKSEL_CON(19), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3368_CLKGATE_CON(4), 0, GFLAGS), ++ DIV(0, "hclk_vio", "aclk_vio0", 0, ++ RK3368_CLKSEL_CON(21), 0, 5, DFLAGS), ++ ++ COMPOSITE(0, "aclk_rga_pre", mux_pll_src_cpll_gpll_usb_p, 0, ++ RK3368_CLKSEL_CON(18), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3368_CLKGATE_CON(4), 3, GFLAGS), ++ COMPOSITE(SCLK_RGA, "sclk_rga", mux_pll_src_cpll_gpll_usb_p, 0, ++ RK3368_CLKSEL_CON(18), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3368_CLKGATE_CON(4), 4, GFLAGS), ++ ++ COMPOSITE(DCLK_VOP, "dclk_vop", mux_pll_src_dmycpll_dmygpll_npll_p, CLK_SET_RATE_PARENT, ++ RK3368_CLKSEL_CON(20), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3368_CLKGATE_CON(4), 1, GFLAGS), ++ ++ GATE(SCLK_VOP0_PWM, "sclk_vop0_pwm", "xin24m", 0, ++ RK3368_CLKGATE_CON(4), 2, GFLAGS), ++ ++ COMPOSITE(SCLK_ISP, "sclk_isp", mux_pll_src_cpll_gpll_npll_npll_p, 0, ++ RK3368_CLKSEL_CON(22), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3368_CLKGATE_CON(4), 9, GFLAGS), ++ ++ GATE(0, "pclk_isp_in", "ext_isp", 0, ++ RK3368_CLKGATE_CON(17), 2, GFLAGS), ++ INVERTER(PCLK_ISP, "pclk_isp", "pclk_isp_in", ++ RK3368_CLKSEL_CON(21), 6, IFLAGS), ++ ++ GATE(0, "pclk_vip_in", "ext_vip", 0, ++ RK3368_CLKGATE_CON(16), 13, GFLAGS), ++ INVERTER(PCLK_VIP, "pclk_vip", "pclk_vip_in", ++ RK3368_CLKSEL_CON(21), 13, IFLAGS), ++ ++ GATE(SCLK_HDMI_HDCP, "sclk_hdmi_hdcp", "xin24m", 0, ++ RK3368_CLKGATE_CON(4), 13, GFLAGS), ++ GATE(SCLK_HDMI_CEC, "sclk_hdmi_cec", "xin32k", 0, ++ RK3368_CLKGATE_CON(4), 12, GFLAGS), ++ ++ COMPOSITE_NODIV(0, "vip_src", mux_pll_src_cpll_gpll_p, 0, ++ RK3368_CLKSEL_CON(21), 15, 1, MFLAGS, ++ RK3368_CLKGATE_CON(4), 5, GFLAGS), ++ COMPOSITE_NOGATE(SCLK_VIP_OUT, "sclk_vip_out", mux_vip_out_p, 0, ++ RK3368_CLKSEL_CON(21), 14, 1, MFLAGS, 8, 5, DFLAGS), ++ ++ COMPOSITE_NODIV(SCLK_EDP_24M, "sclk_edp_24m", mux_edp_24m_p, 0, ++ RK3368_CLKSEL_CON(23), 8, 1, MFLAGS, ++ RK3368_CLKGATE_CON(5), 4, GFLAGS), ++ COMPOSITE(SCLK_EDP, "sclk_edp", mux_pll_src_cpll_gpll_npll_npll_p, 0, ++ RK3368_CLKSEL_CON(23), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3368_CLKGATE_CON(5), 3, GFLAGS), ++ ++ COMPOSITE(SCLK_HDCP, "sclk_hdcp", mux_pll_src_cpll_gpll_npll_npll_p, 0, ++ RK3368_CLKSEL_CON(55), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3368_CLKGATE_CON(5), 5, GFLAGS), ++ ++ DIV(0, "pclk_pd_alive", "gpll", CLK_IS_CRITICAL, ++ RK3368_CLKSEL_CON(10), 8, 5, DFLAGS), ++ ++ /* sclk_timer has a gate in the sgrf */ ++ ++ COMPOSITE_NOMUX(0, "pclk_pd_pmu", "gpll", CLK_IS_CRITICAL, ++ RK3368_CLKSEL_CON(10), 0, 5, DFLAGS, ++ RK3368_CLKGATE_CON(7), 9, GFLAGS), ++ GATE(SCLK_PVTM_PMU, "sclk_pvtm_pmu", "xin24m", 0, ++ RK3368_CLKGATE_CON(7), 3, GFLAGS), ++ COMPOSITE(0, "sclk_gpu_core_src", mux_pll_src_cpll_gpll_usb_npll_p, 0, ++ RK3368_CLKSEL_CON(14), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3368_CLKGATE_CON(4), 11, GFLAGS), ++ MUX(0, "aclk_gpu_src", mux_pll_src_cpll_gpll_p, 0, ++ RK3368_CLKSEL_CON(14), 14, 1, MFLAGS), ++ COMPOSITE_NOMUX(0, "aclk_gpu_mem_pre", "aclk_gpu_src", 0, ++ RK3368_CLKSEL_CON(14), 8, 5, DFLAGS, ++ RK3368_CLKGATE_CON(5), 8, GFLAGS), ++ COMPOSITE_NOMUX(0, "aclk_gpu_cfg_pre", "aclk_gpu_src", 0, ++ RK3368_CLKSEL_CON(16), 8, 5, DFLAGS, ++ RK3368_CLKGATE_CON(5), 9, GFLAGS), ++ GATE(SCLK_PVTM_GPU, "sclk_pvtm_gpu", "xin24m", 0, ++ RK3368_CLKGATE_CON(7), 11, GFLAGS), ++ ++ COMPOSITE(0, "aclk_peri_src", mux_pll_src_cpll_gpll_p, CLK_IS_CRITICAL, ++ RK3368_CLKSEL_CON(9), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3368_CLKGATE_CON(3), 0, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, ++ RK3368_CLKSEL_CON(9), 12, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, ++ RK3368_CLKGATE_CON(3), 3, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, ++ RK3368_CLKSEL_CON(9), 8, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, ++ RK3368_CLKGATE_CON(3), 2, GFLAGS), ++ GATE(ACLK_PERI, "aclk_peri", "aclk_peri_src", CLK_IS_CRITICAL, ++ RK3368_CLKGATE_CON(3), 1, GFLAGS), ++ ++ GATE(0, "sclk_mipidsi_24m", "xin24m", 0, RK3368_CLKGATE_CON(4), 14, GFLAGS), ++ ++ /* ++ * Clock-Architecture Diagram 4 ++ */ ++ ++ COMPOSITE(SCLK_SPI0, "sclk_spi0", mux_pll_src_cpll_gpll_p, 0, ++ RK3368_CLKSEL_CON(45), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3368_CLKGATE_CON(3), 7, GFLAGS), ++ COMPOSITE(SCLK_SPI1, "sclk_spi1", mux_pll_src_cpll_gpll_p, 0, ++ RK3368_CLKSEL_CON(45), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK3368_CLKGATE_CON(3), 8, GFLAGS), ++ COMPOSITE(SCLK_SPI2, "sclk_spi2", mux_pll_src_cpll_gpll_p, 0, ++ RK3368_CLKSEL_CON(46), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK3368_CLKGATE_CON(3), 9, GFLAGS), ++ ++ ++ COMPOSITE(SCLK_SDMMC, "sclk_sdmmc", mux_mmc_src_p, 0, ++ RK3368_CLKSEL_CON(50), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3368_CLKGATE_CON(7), 12, GFLAGS), ++ COMPOSITE(SCLK_SDIO0, "sclk_sdio0", mux_mmc_src_p, 0, ++ RK3368_CLKSEL_CON(48), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3368_CLKGATE_CON(7), 13, GFLAGS), ++ COMPOSITE(SCLK_EMMC, "sclk_emmc", mux_mmc_src_p, 0, ++ RK3368_CLKSEL_CON(51), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3368_CLKGATE_CON(7), 15, GFLAGS), ++ ++ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "sclk_sdmmc", RK3368_SDMMC_CON0, 1), ++ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RK3368_SDMMC_CON1, 0), ++ ++ MMC(SCLK_SDIO0_DRV, "sdio0_drv", "sclk_sdio0", RK3368_SDIO0_CON0, 1), ++ MMC(SCLK_SDIO0_SAMPLE, "sdio0_sample", "sclk_sdio0", RK3368_SDIO0_CON1, 0), ++ ++ MMC(SCLK_EMMC_DRV, "emmc_drv", "sclk_emmc", RK3368_EMMC_CON0, 1), ++ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "sclk_emmc", RK3368_EMMC_CON1, 0), ++ ++ GATE(SCLK_OTGPHY0, "sclk_otgphy0", "xin24m", CLK_IGNORE_UNUSED, ++ RK3368_CLKGATE_CON(8), 1, GFLAGS), ++ ++ /* pmu_grf_soc_con0[6] allows to select between xin32k and pvtm_pmu */ ++ GATE(SCLK_OTG_ADP, "sclk_otg_adp", "xin32k", CLK_IGNORE_UNUSED, ++ RK3368_CLKGATE_CON(8), 4, GFLAGS), ++ ++ /* pmu_grf_soc_con0[6] allows to select between xin32k and pvtm_pmu */ ++ COMPOSITE_NOMUX(SCLK_TSADC, "sclk_tsadc", "xin32k", 0, ++ RK3368_CLKSEL_CON(25), 0, 6, DFLAGS, ++ RK3368_CLKGATE_CON(3), 5, GFLAGS), ++ ++ COMPOSITE_NOMUX(SCLK_SARADC, "sclk_saradc", "xin24m", 0, ++ RK3368_CLKSEL_CON(25), 8, 8, DFLAGS, ++ RK3368_CLKGATE_CON(3), 6, GFLAGS), ++ ++ COMPOSITE(SCLK_NANDC0, "sclk_nandc0", mux_pll_src_cpll_gpll_p, 0, ++ RK3368_CLKSEL_CON(47), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3368_CLKGATE_CON(7), 8, GFLAGS), ++ ++ COMPOSITE(SCLK_SFC, "sclk_sfc", mux_pll_src_cpll_gpll_p, 0, ++ RK3368_CLKSEL_CON(52), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3368_CLKGATE_CON(6), 7, GFLAGS), ++ ++ COMPOSITE(0, "uart0_src", mux_pll_src_cpll_gpll_usb_usb_p, 0, ++ RK3368_CLKSEL_CON(33), 12, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3368_CLKGATE_CON(2), 0, GFLAGS), ++ COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT, ++ RK3368_CLKSEL_CON(34), 0, ++ RK3368_CLKGATE_CON(2), 1, GFLAGS, ++ &rk3368_uart0_fracmux), ++ ++ COMPOSITE_NOMUX(0, "uart1_src", "uart_src", 0, ++ RK3368_CLKSEL_CON(35), 0, 7, DFLAGS, ++ RK3368_CLKGATE_CON(2), 2, GFLAGS), ++ COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_src", CLK_SET_RATE_PARENT, ++ RK3368_CLKSEL_CON(36), 0, ++ RK3368_CLKGATE_CON(2), 3, GFLAGS, ++ &rk3368_uart1_fracmux), ++ ++ COMPOSITE_NOMUX(0, "uart3_src", "uart_src", 0, ++ RK3368_CLKSEL_CON(39), 0, 7, DFLAGS, ++ RK3368_CLKGATE_CON(2), 6, GFLAGS), ++ COMPOSITE_FRACMUX(0, "uart3_frac", "uart3_src", CLK_SET_RATE_PARENT, ++ RK3368_CLKSEL_CON(40), 0, ++ RK3368_CLKGATE_CON(2), 7, GFLAGS, ++ &rk3368_uart3_fracmux), ++ ++ COMPOSITE_NOMUX(0, "uart4_src", "uart_src", 0, ++ RK3368_CLKSEL_CON(41), 0, 7, DFLAGS, ++ RK3368_CLKGATE_CON(2), 8, GFLAGS), ++ COMPOSITE_FRACMUX(0, "uart4_frac", "uart4_src", CLK_SET_RATE_PARENT, ++ RK3368_CLKSEL_CON(42), 0, ++ RK3368_CLKGATE_CON(2), 9, GFLAGS, ++ &rk3368_uart4_fracmux), ++ ++ COMPOSITE(0, "mac_pll_src", mux_pll_src_npll_cpll_gpll_p, 0, ++ RK3368_CLKSEL_CON(43), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3368_CLKGATE_CON(3), 4, GFLAGS), ++ MUX(SCLK_MAC, "mac_clk", mux_mac_p, CLK_SET_RATE_PARENT, ++ RK3368_CLKSEL_CON(43), 8, 1, MFLAGS), ++ GATE(SCLK_MACREF_OUT, "sclk_macref_out", "mac_clk", 0, ++ RK3368_CLKGATE_CON(7), 7, GFLAGS), ++ GATE(SCLK_MACREF, "sclk_macref", "mac_clk", 0, ++ RK3368_CLKGATE_CON(7), 6, GFLAGS), ++ GATE(SCLK_MAC_RX, "sclk_mac_rx", "mac_clk", 0, ++ RK3368_CLKGATE_CON(7), 4, GFLAGS), ++ GATE(SCLK_MAC_TX, "sclk_mac_tx", "mac_clk", 0, ++ RK3368_CLKGATE_CON(7), 5, GFLAGS), ++ ++ GATE(0, "jtag", "ext_jtag", CLK_IGNORE_UNUSED, ++ RK3368_CLKGATE_CON(7), 0, GFLAGS), ++ ++ COMPOSITE_NODIV(0, "hsic_usbphy_480m", mux_hsic_usbphy480m_p, 0, ++ RK3368_CLKSEL_CON(26), 8, 2, MFLAGS, ++ RK3368_CLKGATE_CON(8), 0, GFLAGS), ++ COMPOSITE_NODIV(SCLK_HSICPHY480M, "sclk_hsicphy480m", mux_hsicphy480m_p, 0, ++ RK3368_CLKSEL_CON(26), 12, 2, MFLAGS, ++ RK3368_CLKGATE_CON(8), 7, GFLAGS), ++ GATE(SCLK_HSICPHY12M, "sclk_hsicphy12m", "xin12m", 0, ++ RK3368_CLKGATE_CON(8), 6, GFLAGS), ++ ++ /* ++ * Clock-Architecture Diagram 5 ++ */ ++ ++ /* aclk_cci_pre gates */ ++ GATE(0, "aclk_core_niu_cpup", "aclk_cci_pre", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(11), 4, GFLAGS), ++ GATE(0, "aclk_core_niu_cci", "aclk_cci_pre", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(11), 3, GFLAGS), ++ GATE(0, "aclk_cci400", "aclk_cci_pre", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(11), 2, GFLAGS), ++ GATE(0, "aclk_adb400m_pd_core_b", "aclk_cci_pre", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(11), 1, GFLAGS), ++ GATE(0, "aclk_adb400m_pd_core_l", "aclk_cci_pre", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(11), 0, GFLAGS), ++ ++ /* aclkm_core_* gates */ ++ GATE(0, "aclk_adb400s_pd_core_b", "aclkm_core_b", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(10), 0, GFLAGS), ++ GATE(0, "aclk_adb400s_pd_core_l", "aclkm_core_l", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(9), 0, GFLAGS), ++ ++ /* armclk* gates */ ++ GATE(0, "sclk_dbg_pd_core_b", "armclkb", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(10), 1, GFLAGS), ++ GATE(0, "sclk_dbg_pd_core_l", "armclkl", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(9), 1, GFLAGS), ++ ++ /* sclk_cs_pre gates */ ++ GATE(0, "sclk_dbg", "sclk_cs_pre", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(11), 7, GFLAGS), ++ GATE(0, "pclk_core_niu_sdbg", "sclk_cs_pre", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(11), 6, GFLAGS), ++ GATE(0, "hclk_core_niu_dbg", "sclk_cs_pre", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(11), 5, GFLAGS), ++ ++ /* aclk_bus gates */ ++ GATE(0, "aclk_strc_sys", "aclk_bus", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(12), 12, GFLAGS), ++ GATE(ACLK_DMAC_BUS, "aclk_dmac_bus", "aclk_bus", CLK_IS_CRITICAL, RK3368_CLKGATE_CON(12), 11, GFLAGS), ++ GATE(0, "sclk_intmem1", "aclk_bus", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(12), 6, GFLAGS), ++ GATE(0, "sclk_intmem0", "aclk_bus", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(12), 5, GFLAGS), ++ GATE(0, "aclk_intmem", "aclk_bus", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(12), 4, GFLAGS), ++ GATE(0, "aclk_gic400", "aclk_bus", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(13), 9, GFLAGS), ++ ++ /* sclk_ddr gates */ ++ GATE(0, "nclk_ddrupctl", "sclk_ddr", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(13), 2, GFLAGS), ++ ++ /* clk_hsadc_tsp is part of diagram2 */ ++ ++ /* fclk_mcu_src gates */ ++ GATE(0, "hclk_noc_mcu", "fclk_mcu_src", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(13), 14, GFLAGS), ++ GATE(0, "fclk_mcu", "fclk_mcu_src", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(13), 12, GFLAGS), ++ GATE(0, "hclk_mcu", "fclk_mcu_src", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(13), 11, GFLAGS), ++ ++ /* hclk_cpu gates */ ++ GATE(HCLK_SPDIF, "hclk_spdif", "hclk_bus", 0, RK3368_CLKGATE_CON(12), 10, GFLAGS), ++ GATE(HCLK_ROM, "hclk_rom", "hclk_bus", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(12), 9, GFLAGS), ++ GATE(HCLK_I2S_2CH, "hclk_i2s_2ch", "hclk_bus", 0, RK3368_CLKGATE_CON(12), 8, GFLAGS), ++ GATE(HCLK_I2S_8CH, "hclk_i2s_8ch", "hclk_bus", 0, RK3368_CLKGATE_CON(12), 7, GFLAGS), ++ GATE(HCLK_TSP, "hclk_tsp", "hclk_bus", 0, RK3368_CLKGATE_CON(13), 10, GFLAGS), ++ GATE(HCLK_CRYPTO, "hclk_crypto", "hclk_bus", 0, RK3368_CLKGATE_CON(13), 4, GFLAGS), ++ GATE(MCLK_CRYPTO, "mclk_crypto", "hclk_bus", 0, RK3368_CLKGATE_CON(13), 3, GFLAGS), ++ ++ /* pclk_cpu gates */ ++ GATE(PCLK_DDRPHY, "pclk_ddrphy", "pclk_bus", CLK_IS_CRITICAL, RK3368_CLKGATE_CON(12), 14, GFLAGS), ++ GATE(PCLK_DDRUPCTL, "pclk_ddrupctl", "pclk_bus", CLK_IS_CRITICAL, RK3368_CLKGATE_CON(12), 13, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_bus", 0, RK3368_CLKGATE_CON(12), 3, GFLAGS), ++ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_bus", 0, RK3368_CLKGATE_CON(12), 2, GFLAGS), ++ GATE(PCLK_MAILBOX, "pclk_mailbox", "pclk_bus", 0, RK3368_CLKGATE_CON(12), 1, GFLAGS), ++ GATE(PCLK_PWM0, "pclk_pwm0", "pclk_bus", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(12), 0, GFLAGS), ++ GATE(PCLK_SIM, "pclk_sim", "pclk_bus", 0, RK3368_CLKGATE_CON(13), 8, GFLAGS), ++ GATE(PCLK_PWM1, "pclk_pwm1", "pclk_bus", CLK_IS_CRITICAL, RK3368_CLKGATE_CON(13), 6, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_bus", 0, RK3368_CLKGATE_CON(13), 5, GFLAGS), ++ GATE(PCLK_EFUSE256, "pclk_efuse_256", "pclk_bus", 0, RK3368_CLKGATE_CON(13), 1, GFLAGS), ++ GATE(0, "pclk_efuse_1024", "pclk_bus", 0, RK3368_CLKGATE_CON(13), 0, GFLAGS), ++ ++ /* ++ * video clk gates ++ * aclk_video(_pre) can actually select between parents of aclk_vdpu ++ * and aclk_vepu by setting bit GRF_SOC_CON0[7]. ++ */ ++ GATE(ACLK_VIDEO, "aclk_video", "aclk_vdpu", 0, RK3368_CLKGATE_CON(15), 0, GFLAGS), ++ GATE(SCLK_HEVC_CABAC, "sclk_hevc_cabac", "sclk_hevc_cabac_src", 0, RK3368_CLKGATE_CON(15), 3, GFLAGS), ++ GATE(SCLK_HEVC_CORE, "sclk_hevc_core", "sclk_hevc_core_src", 0, RK3368_CLKGATE_CON(15), 2, GFLAGS), ++ GATE(HCLK_VIDEO, "hclk_video", "hclk_video_pre", 0, RK3368_CLKGATE_CON(15), 1, GFLAGS), ++ ++ /* aclk_rga_pre gates */ ++ GATE(ACLK_VIO1_NOC, "aclk_vio1_noc", "aclk_rga_pre", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(16), 10, GFLAGS), ++ GATE(ACLK_RGA, "aclk_rga", "aclk_rga_pre", 0, RK3368_CLKGATE_CON(16), 0, GFLAGS), ++ GATE(ACLK_HDCP, "aclk_hdcp", "aclk_rga_pre", 0, RK3368_CLKGATE_CON(17), 10, GFLAGS), ++ ++ /* aclk_vio0 gates */ ++ GATE(ACLK_VIP, "aclk_vip", "aclk_vio0", 0, RK3368_CLKGATE_CON(16), 11, GFLAGS), ++ GATE(ACLK_VIO0_NOC, "aclk_vio0_noc", "aclk_vio0", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(16), 9, GFLAGS), ++ GATE(ACLK_VOP, "aclk_vop", "aclk_vio0", 0, RK3368_CLKGATE_CON(16), 5, GFLAGS), ++ GATE(ACLK_VOP_IEP, "aclk_vop_iep", "aclk_vio0", 0, RK3368_CLKGATE_CON(16), 4, GFLAGS), ++ GATE(ACLK_IEP, "aclk_iep", "aclk_vio0", 0, RK3368_CLKGATE_CON(16), 2, GFLAGS), ++ ++ /* sclk_isp gates */ ++ GATE(HCLK_ISP, "hclk_isp", "sclk_isp", 0, RK3368_CLKGATE_CON(16), 14, GFLAGS), ++ GATE(ACLK_ISP, "aclk_isp", "sclk_isp", 0, RK3368_CLKGATE_CON(17), 0, GFLAGS), ++ ++ /* hclk_vio gates */ ++ GATE(HCLK_VIP, "hclk_vip", "hclk_vio", 0, RK3368_CLKGATE_CON(16), 12, GFLAGS), ++ GATE(HCLK_VIO_NOC, "hclk_vio_noc", "hclk_vio", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(16), 8, GFLAGS), ++ GATE(HCLK_VIO_AHB_ARBI, "hclk_vio_ahb_arbi", "hclk_vio", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(16), 7, GFLAGS), ++ GATE(HCLK_VOP, "hclk_vop", "hclk_vio", 0, RK3368_CLKGATE_CON(16), 6, GFLAGS), ++ GATE(HCLK_IEP, "hclk_iep", "hclk_vio", 0, RK3368_CLKGATE_CON(16), 3, GFLAGS), ++ GATE(HCLK_RGA, "hclk_rga", "hclk_vio", 0, RK3368_CLKGATE_CON(16), 1, GFLAGS), ++ GATE(HCLK_VIO_HDCPMMU, "hclk_hdcpmmu", "hclk_vio", 0, RK3368_CLKGATE_CON(17), 12, GFLAGS), ++ GATE(HCLK_VIO_H2P, "hclk_vio_h2p", "hclk_vio", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(17), 7, GFLAGS), ++ ++ /* ++ * pclk_vio gates ++ * pclk_vio comes from the exactly same source as hclk_vio ++ */ ++ GATE(PCLK_HDCP, "pclk_hdcp", "hclk_vio", 0, RK3368_CLKGATE_CON(17), 11, GFLAGS), ++ GATE(PCLK_EDP_CTRL, "pclk_edp_ctrl", "hclk_vio", 0, RK3368_CLKGATE_CON(17), 9, GFLAGS), ++ GATE(PCLK_VIO_H2P, "pclk_vio_h2p", "hclk_vio", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(17), 8, GFLAGS), ++ GATE(PCLK_HDMI_CTRL, "pclk_hdmi_ctrl", "hclk_vio", 0, RK3368_CLKGATE_CON(17), 6, GFLAGS), ++ GATE(PCLK_MIPI_CSI, "pclk_mipi_csi", "hclk_vio", 0, RK3368_CLKGATE_CON(17), 4, GFLAGS), ++ GATE(PCLK_MIPI_DSI0, "pclk_mipi_dsi0", "hclk_vio", 0, RK3368_CLKGATE_CON(17), 3, GFLAGS), ++ ++ /* ext_vip gates in diagram3 */ ++ ++ /* gpu gates */ ++ GATE(SCLK_GPU_CORE, "sclk_gpu_core", "sclk_gpu_core_src", 0, RK3368_CLKGATE_CON(18), 2, GFLAGS), ++ GATE(ACLK_GPU_MEM, "aclk_gpu_mem", "aclk_gpu_mem_pre", 0, RK3368_CLKGATE_CON(18), 1, GFLAGS), ++ GATE(ACLK_GPU_CFG, "aclk_gpu_cfg", "aclk_gpu_cfg_pre", 0, RK3368_CLKGATE_CON(18), 0, GFLAGS), ++ ++ /* aclk_peri gates */ ++ GATE(ACLK_DMAC_PERI, "aclk_dmac_peri", "aclk_peri", 0, RK3368_CLKGATE_CON(19), 3, GFLAGS), ++ GATE(0, "aclk_peri_axi_matrix", "aclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(19), 2, GFLAGS), ++ GATE(HCLK_SFC, "hclk_sfc", "aclk_peri", 0, RK3368_CLKGATE_CON(20), 15, GFLAGS), ++ GATE(ACLK_GMAC, "aclk_gmac", "aclk_peri", 0, RK3368_CLKGATE_CON(20), 13, GFLAGS), ++ GATE(0, "aclk_peri_niu", "aclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(20), 8, GFLAGS), ++ GATE(ACLK_PERI_MMU, "aclk_peri_mmu", "aclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(21), 4, GFLAGS), ++ ++ /* hclk_peri gates */ ++ GATE(0, "hclk_peri_axi_matrix", "hclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(19), 0, GFLAGS), ++ GATE(HCLK_NANDC0, "hclk_nandc0", "hclk_peri", 0, RK3368_CLKGATE_CON(20), 11, GFLAGS), ++ GATE(0, "hclk_mmc_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(20), 10, GFLAGS), ++ GATE(0, "hclk_emem_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(20), 9, GFLAGS), ++ GATE(0, "hclk_peri_ahb_arbi", "hclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(20), 7, GFLAGS), ++ GATE(0, "hclk_usb_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(20), 6, GFLAGS), ++ GATE(HCLK_HSIC, "hclk_hsic", "hclk_peri", 0, RK3368_CLKGATE_CON(20), 5, GFLAGS), ++ GATE(HCLK_HOST1, "hclk_host1", "hclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(20), 4, GFLAGS), ++ GATE(HCLK_HOST0, "hclk_host0", "hclk_peri", 0, RK3368_CLKGATE_CON(20), 3, GFLAGS), ++ GATE(0, "pmu_hclk_otg0", "hclk_peri", CLK_IS_CRITICAL, RK3368_CLKGATE_CON(20), 2, GFLAGS), ++ GATE(HCLK_OTG0, "hclk_otg0", "hclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(20), 1, GFLAGS), ++ GATE(HCLK_HSADC, "hclk_hsadc", "hclk_peri", 0, RK3368_CLKGATE_CON(21), 3, GFLAGS), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, RK3368_CLKGATE_CON(21), 2, GFLAGS), ++ GATE(HCLK_SDIO0, "hclk_sdio0", "hclk_peri", 0, RK3368_CLKGATE_CON(21), 1, GFLAGS), ++ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK3368_CLKGATE_CON(21), 0, GFLAGS), ++ ++ /* pclk_peri gates */ ++ GATE(PCLK_SARADC, "pclk_saradc", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 15, GFLAGS), ++ GATE(PCLK_I2C5, "pclk_i2c5", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 14, GFLAGS), ++ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 13, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 12, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 11, GFLAGS), ++ GATE(PCLK_UART4, "pclk_uart4", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 10, GFLAGS), ++ GATE(PCLK_UART3, "pclk_uart3", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 9, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 8, GFLAGS), ++ GATE(PCLK_UART0, "pclk_uart0", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 7, GFLAGS), ++ GATE(PCLK_SPI2, "pclk_spi2", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 6, GFLAGS), ++ GATE(PCLK_SPI1, "pclk_spi1", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 5, GFLAGS), ++ GATE(PCLK_SPI0, "pclk_spi0", "pclk_peri", 0, RK3368_CLKGATE_CON(19), 4, GFLAGS), ++ GATE(0, "pclk_peri_axi_matrix", "pclk_peri", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(19), 1, GFLAGS), ++ GATE(PCLK_GMAC, "pclk_gmac", "pclk_peri", 0, RK3368_CLKGATE_CON(20), 14, GFLAGS), ++ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_peri", 0, RK3368_CLKGATE_CON(20), 0, GFLAGS), ++ ++ /* pclk_pd_alive gates */ ++ GATE(PCLK_TIMER1, "pclk_timer1", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 13, GFLAGS), ++ GATE(PCLK_TIMER0, "pclk_timer0", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 12, GFLAGS), ++ GATE(0, "pclk_alive_niu", "pclk_pd_alive", CLK_IS_CRITICAL, RK3368_CLKGATE_CON(22), 9, GFLAGS), ++ GATE(PCLK_GRF, "pclk_grf", "pclk_pd_alive", CLK_IS_CRITICAL, RK3368_CLKGATE_CON(22), 8, GFLAGS), ++ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 3, GFLAGS), ++ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 2, GFLAGS), ++ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_pd_alive", 0, RK3368_CLKGATE_CON(22), 1, GFLAGS), ++ ++ /* Watchdog pclk is controlled by sgrf_soc_con3[7]. */ ++ SGRF_GATE(PCLK_WDT, "pclk_wdt", "pclk_pd_alive"), ++ ++ /* ++ * pclk_vio gates ++ * pclk_vio comes from the exactly same source as hclk_vio ++ */ ++ GATE(PCLK_DPHYRX, "pclk_dphyrx", "hclk_vio", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(22), 11, GFLAGS), ++ GATE(PCLK_DPHYTX0, "pclk_dphytx0", "hclk_vio", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(22), 10, GFLAGS), ++ ++ /* pclk_pd_pmu gates */ ++ GATE(PCLK_PMUGRF, "pclk_pmugrf", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 5, GFLAGS), ++ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pd_pmu", 0, RK3368_CLKGATE_CON(23), 4, GFLAGS), ++ GATE(PCLK_SGRF, "pclk_sgrf", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 3, GFLAGS), ++ GATE(0, "pclk_pmu_noc", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 2, GFLAGS), ++ GATE(0, "pclk_intmem1", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 1, GFLAGS), ++ GATE(PCLK_PMU, "pclk_pmu", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(23), 0, GFLAGS), ++ ++ /* timer gates */ ++ GATE(SCLK_TIMER15, "sclk_timer15", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 11, GFLAGS), ++ GATE(SCLK_TIMER14, "sclk_timer14", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 10, GFLAGS), ++ GATE(SCLK_TIMER13, "sclk_timer13", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 9, GFLAGS), ++ GATE(SCLK_TIMER12, "sclk_timer12", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 8, GFLAGS), ++ GATE(SCLK_TIMER11, "sclk_timer11", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 7, GFLAGS), ++ GATE(SCLK_TIMER10, "sclk_timer10", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 6, GFLAGS), ++ GATE(SCLK_TIMER05, "sclk_timer05", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 5, GFLAGS), ++ GATE(SCLK_TIMER04, "sclk_timer04", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 4, GFLAGS), ++ GATE(SCLK_TIMER03, "sclk_timer03", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 3, GFLAGS), ++ GATE(SCLK_TIMER02, "sclk_timer02", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 2, GFLAGS), ++ GATE(SCLK_TIMER01, "sclk_timer01", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 1, GFLAGS), ++ GATE(SCLK_TIMER00, "sclk_timer00", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 0, GFLAGS), ++}; ++ ++static void __iomem *rk3368_cru_base; ++ ++static void rk3368_dump_cru(void) +{ -+ if (rk3588_cru_base) { -+ pr_warn("CRU REGS:\n"); -+ dump_offset("LPLL", RK3588_LPLL_CON(16), 0x10); -+ dump_offset("B0PLL", RK3588_B0_PLL_CON(0), 0x10); -+ dump_offset("B1PLL", RK3588_B1_PLL_CON(8), 0x10); -+ dump_offset("GPLL", RK3588_PLL_CON(112), 0x10); -+ dump_offset("CPLL", RK3588_PLL_CON(104), 0x10); -+ dump_offset("V0PLL", RK3588_PLL_CON(88), 0x10); -+ dump_offset("AUPLL", RK3588_PLL_CON(96), 0x10); -+ dump_offset("PPLL", RK3588_PMU_PLL_CON(128), 0x10); -+ dump_offset("DSUCRU_SEL", RK3588_DSU_CLKSEL_CON(0), 0x20); -+ dump_offset("DSUCRU_GATE", RK3588_DSU_CLKGATE_CON(0), 0x10); -+ dump_offset("BIG0CRU_SEL", RK3588_BIGCORE0_CLKSEL_CON(0), 0x10); -+ dump_offset("BIG0CRU_GATE", RK3588_BIGCORE0_CLKGATE_CON(0), 0x10); -+ dump_offset("BIG1CRU_SEL", RK3588_BIGCORE1_CLKSEL_CON(0), 0x10); -+ dump_offset("BIG1CRU_GATE", RK3588_BIGCORE1_CLKGATE_CON(0), 0x10); -+ dump_offset("CRU_SEL", RK3588_CLKSEL_CON(0), 0x2d0); -+ dump_offset("CRU_GATE", RK3588_CLKGATE_CON(0), 0x140); -+ dump_offset("PMUCRU_SEL", RK3588_PMU_CLKSEL_CON(0), 0x50); -+ dump_offset("PMUCRU_GATE", RK3588_PMU_CLKGATE_CON(0), 0x20); ++ if (rk3368_cru_base) { ++ pr_warn("CRU:\n"); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rk3368_cru_base, ++ 0x41c, false); + } +} + -+static void __init rk3588_clk_init(struct device_node *np) ++static void __init rk3368_clk_init(struct device_node *np) +{ + struct rockchip_clk_provider *ctx; + void __iomem *reg_base; @@ -59803,8 +57978,6 @@ index 000000000..c297e4e1e + return; + } + -+ rk3588_cru_base = reg_base; -+ + ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); + if (IS_ERR(ctx)) { + pr_err("%s: rockchip clk init failed\n", __func__); @@ -59813,2163 +57986,1683 @@ index 000000000..c297e4e1e + } + clks = ctx->clk_data.clks; + -+ rockchip_clk_register_plls(ctx, rk3588_pll_clks, -+ ARRAY_SIZE(rk3588_pll_clks), -+ RK3588_GRF_SOC_STATUS0); ++ rockchip_clk_register_plls(ctx, rk3368_pll_clks, ++ ARRAY_SIZE(rk3368_pll_clks), ++ RK3368_GRF_SOC_STATUS0); ++ rockchip_clk_register_branches(ctx, rk3368_clk_branches, ++ ARRAY_SIZE(rk3368_clk_branches)); + -+ rockchip_clk_register_armclk(ctx, ARMCLK_L, "armclk_l", -+ 3, clks[PLL_LPLL], clks[PLL_GPLL], -+ &rk3588_cpulclk_data, rk3588_cpulclk_rates, -+ ARRAY_SIZE(rk3588_cpulclk_rates)); -+ rockchip_clk_register_armclk(ctx, ARMCLK_B01, "armclk_b01", -+ 3, clks[PLL_B0PLL], clks[PLL_GPLL], -+ &rk3588_cpub0clk_data, rk3588_cpub0clk_rates, -+ ARRAY_SIZE(rk3588_cpub0clk_rates)); -+ rockchip_clk_register_armclk(ctx, ARMCLK_B23, "armclk_b23", -+ 3, clks[PLL_B1PLL], clks[PLL_GPLL], -+ &rk3588_cpub1clk_data, rk3588_cpub1clk_rates, -+ ARRAY_SIZE(rk3588_cpub1clk_rates)); ++ rockchip_clk_register_armclk(ctx, ARMCLKB, "armclkb", ++ 2, clks[PLL_APLLB], clks[PLL_GPLL], ++ &rk3368_cpuclkb_data, rk3368_cpuclkb_rates, ++ ARRAY_SIZE(rk3368_cpuclkb_rates)); + -+ rockchip_clk_register_branches(ctx, rk3588_clk_branches, -+ ARRAY_SIZE(rk3588_clk_branches)); ++ rockchip_clk_register_armclk(ctx, ARMCLKL, "armclkl", ++ 2, clks[PLL_APLLL], clks[PLL_GPLL], ++ &rk3368_cpuclkl_data, rk3368_cpuclkl_rates, ++ ARRAY_SIZE(rk3368_cpuclkl_rates)); + -+ rockchip_register_softrst(np, 49158, reg_base + RK3588_SOFTRST_CON(0), ++ rockchip_register_softrst(np, 15, reg_base + RK3368_SOFTRST_CON(0), + ROCKCHIP_SOFTRST_HIWORD_MASK); + -+ rockchip_register_restart_notifier(ctx, RK3588_GLB_SRST_FST, NULL); ++ rockchip_register_restart_notifier(ctx, RK3368_GLB_SRST_FST, NULL); + + rockchip_clk_of_add_provider(np, ctx); + -+ if (!rk_dump_cru) -+ rk_dump_cru = rk3588_dump_cru; ++ if (!rk_dump_cru) { ++ rk3368_cru_base = reg_base; ++ rk_dump_cru = rk3368_dump_cru; ++ } +} ++CLK_OF_DECLARE(rk3368_cru, "rockchip,rk3368-cru", rk3368_clk_init); + -+CLK_OF_DECLARE(rk3588_cru, "rockchip,rk3588-cru", rk3588_clk_init); ++static int __init clk_rk3368_probe(struct platform_device *pdev) ++{ ++ struct device_node *np = pdev->dev.of_node; + -+#ifdef MODULE -+struct clk_rk3588_inits { -+ void (*inits)(struct device_node *np); -+}; ++ rk3368_clk_init(np); + -+static const struct clk_rk3588_inits clk_3588_cru_init = { -+ .inits = rk3588_clk_init, -+}; ++ return 0; ++} + -+static const struct of_device_id clk_rk3588_match_table[] = { ++static const struct of_device_id clk_rk3368_match_table[] = { + { -+ .compatible = "rockchip,rk3588-cru", -+ .data = &clk_3588_cru_init, ++ .compatible = "rockchip,rk3368-cru", + }, + { } +}; -+MODULE_DEVICE_TABLE(of, clk_rk3588_match_table); -+ -+static int clk_rk3588_probe(struct platform_device *pdev) -+{ -+ struct device_node *np = pdev->dev.of_node; -+ const struct of_device_id *match; -+ const struct clk_rk3588_inits *init_data; -+ -+ match = of_match_device(clk_rk3588_match_table, &pdev->dev); -+ if (!match || !match->data) -+ return -EINVAL; -+ -+ init_data = match->data; -+ if (init_data->inits) -+ init_data->inits(np); -+ -+ return 0; -+} ++MODULE_DEVICE_TABLE(of, clk_rk3368_match_table); + -+static struct platform_driver clk_rk3588_driver = { -+ .probe = clk_rk3588_probe, ++static struct platform_driver clk_rk3368_driver = { + .driver = { -+ .name = "clk-rk3588", -+ .of_match_table = clk_rk3588_match_table, -+ .suppress_bind_attrs = true, ++ .name = "clk-rk3368", ++ .of_match_table = clk_rk3368_match_table, + }, +}; -+module_platform_driver(clk_rk3588_driver); ++builtin_platform_driver_probe(clk_rk3368_driver, clk_rk3368_probe); + -+MODULE_DESCRIPTION("Rockchip RK3588 Clock Driver"); ++MODULE_DESCRIPTION("Rockchip RK3368 Clock Driver"); +MODULE_LICENSE("GPL"); -+#endif /* MODULE */ -diff --git a/drivers/clk/rockchip-oh/clk-rv1106.c b/drivers/clk/rockchip-oh/clk-rv1106.c +diff --git a/drivers/clk/rockchip-oh/clk-rk3399.c b/drivers/clk/rockchip-oh/clk-rk3399.c new file mode 100644 -index 000000000..0833bf2ad +index 000000000..e1b6c5267 --- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-rv1106.c -@@ -0,0 +1,1280 @@ -+// SPDX-License-Identifier: GPL-2.0 ++++ b/drivers/clk/rockchip-oh/clk-rk3399.c +@@ -0,0 +1,1752 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later +/* -+ * Copyright (c) 2022 Rockchip Electronics Co. Ltd. -+ * Author: Elaine Zhang ++ * Copyright (c) 2016 Rockchip Electronics Co. Ltd. ++ * Author: Xing Zheng + */ + -+#include +#include +#include ++#include +#include +#include +#include ++#include +#include -+#include -+#include ++#include +#include "clk.h" + -+#define CRU_PVTPLL0_CON0_L 0x11000 -+#define CRU_PVTPLL0_CON0_H 0x11004 -+#define CRU_PVTPLL0_CON1_L 0x11008 -+#define CRU_PVTPLL0_CON1_H 0x1100c -+#define CRU_PVTPLL0_CON2_L 0x11010 -+#define CRU_PVTPLL0_CON2_H 0x11014 -+#define CRU_PVTPLL0_CON3_L 0x11018 -+#define CRU_PVTPLL0_CON3_H 0x1101c -+#define CRU_PVTPLL0_OSC_CNT 0x11020 -+#define CRU_PVTPLL0_OSC_CNT_AVG 0x11024 -+ -+#define CRU_PVTPLL1_CON0_L 0x11030 -+#define CRU_PVTPLL1_CON0_H 0x11034 -+#define CRU_PVTPLL1_CON1_L 0x11038 -+#define CRU_PVTPLL1_CON1_H 0x1103c -+#define CRU_PVTPLL1_CON2_L 0x11040 -+#define CRU_PVTPLL1_CON2_H 0x11044 -+#define CRU_PVTPLL1_CON3_L 0x11048 -+#define CRU_PVTPLL1_CON3_H 0x1104c -+#define CRU_PVTPLL1_OSC_CNT 0x11050 -+#define CRU_PVTPLL1_OSC_CNT_AVG 0x11054 -+ -+#define RV1106_GRF_SOC_STATUS0 0x10 -+#define CPU_PVTPLL_CON0_L 0x40000 -+#define CPU_PVTPLL_CON0_H 0x40004 -+#define CPU_PVTPLL_CON1 0x40008 -+#define CPU_PVTPLL_CON2 0x4000c -+#define CPU_PVTPLL_CON3 0x40010 -+#define CPU_PVTPLL_OSC_CNT 0x40018 -+#define CPU_PVTPLL_OSC_CNT_AVG 0x4001c -+ -+#define PVTPLL_RING_SEL_MASK 0x7 -+#define PVTPLL_RING_SEL_SHIFT 8 -+#define PVTPLL_EN_MASK 0x3 -+#define PVTPLL_EN_SHIFT 0 -+#define PVTPLL_LENGTH_SEL_MASK 0x7f -+#define PVTPLL_LENGTH_SEL_SHIFT 0 -+ -+#define CPU_CLK_PATH_BASE (0x18300) -+#define CPU_PVTPLL_PATH_CORE ((1 << 12) | (1 << 28)) -+ -+#define RV1106_FRAC_MAX_PRATE 1200000000 ++enum rk3399_plls { ++ lpll, bpll, dpll, cpll, gpll, npll, vpll, ++}; + -+enum rv1106_plls { -+ apll, dpll, cpll, gpll, ++enum rk3399_pmu_plls { ++ ppll, +}; + -+static struct rockchip_pll_rate_table rv1106_pll_rates[] = { ++static struct rockchip_pll_rate_table rk3399_pll_rates[] = { + /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ ++ RK3036_PLL_RATE(2208000000, 1, 92, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2184000000, 1, 91, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2160000000, 1, 90, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2136000000, 1, 89, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2112000000, 1, 88, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2088000000, 1, 87, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2064000000, 1, 86, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2040000000, 1, 85, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2016000000, 1, 84, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1992000000, 1, 83, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1968000000, 1, 82, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1944000000, 1, 81, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1920000000, 1, 80, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1896000000, 1, 79, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1872000000, 1, 78, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1848000000, 1, 77, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1824000000, 1, 76, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1800000000, 1, 75, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1776000000, 1, 74, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1752000000, 1, 73, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1728000000, 1, 72, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1704000000, 1, 71, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1680000000, 1, 70, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1656000000, 1, 69, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1632000000, 1, 68, 1, 1, 1, 0), + RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), + RK3036_PLL_RATE(1600000000, 3, 200, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1584000000, 1, 132, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1560000000, 1, 130, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1536000000, 1, 128, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1512000000, 1, 126, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1488000000, 1, 124, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1464000000, 1, 122, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1440000000, 1, 120, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1416000000, 1, 118, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1400000000, 3, 350, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1392000000, 1, 116, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1368000000, 1, 114, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1344000000, 1, 112, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1320000000, 1, 110, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1296000000, 1, 108, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1272000000, 1, 106, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1248000000, 1, 104, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1200000000, 1, 100, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1188000000, 1, 99, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1104000000, 1, 92, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1100000000, 3, 275, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1488000000, 1, 62, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1464000000, 1, 61, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1440000000, 1, 60, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1392000000, 1, 58, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1368000000, 1, 57, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1344000000, 1, 56, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1320000000, 1, 55, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1272000000, 1, 53, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1248000000, 1, 52, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1188000000, 2, 99, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1100000000, 12, 550, 1, 1, 1, 0), + RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1000000000, 3, 250, 2, 1, 1, 0), -+ RK3036_PLL_RATE(993484800, 1, 124, 3, 1, 0, 3113851), -+ RK3036_PLL_RATE(984000000, 1, 82, 2, 1, 1, 0), -+ RK3036_PLL_RATE(983040000, 1, 81, 2, 1, 0, 15435038), -+ RK3036_PLL_RATE(960000000, 1, 80, 2, 1, 1, 0), -+ RK3036_PLL_RATE(936000000, 1, 78, 2, 1, 1, 0), -+ RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), -+ RK3036_PLL_RATE(900000000, 1, 75, 2, 1, 1, 0), -+ RK3036_PLL_RATE(888000000, 1, 74, 2, 1, 1, 0), -+ RK3036_PLL_RATE(864000000, 1, 72, 2, 1, 1, 0), -+ RK3036_PLL_RATE(840000000, 1, 70, 2, 1, 1, 0), -+ RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), -+ RK3036_PLL_RATE(800000000, 3, 200, 2, 1, 1, 0), -+ RK3036_PLL_RATE(700000000, 3, 350, 4, 1, 1, 0), -+ RK3036_PLL_RATE(696000000, 1, 116, 4, 1, 1, 0), -+ RK3036_PLL_RATE(624000000, 1, 104, 4, 1, 1, 0), -+ RK3036_PLL_RATE(600000000, 1, 100, 4, 1, 1, 0), -+ RK3036_PLL_RATE(594000000, 1, 99, 4, 1, 1, 0), -+ RK3036_PLL_RATE(504000000, 1, 84, 4, 1, 1, 0), -+ RK3036_PLL_RATE(500000000, 1, 125, 6, 1, 1, 0), -+ RK3036_PLL_RATE(496742400, 1, 124, 6, 1, 0, 3113851), -+ RK3036_PLL_RATE(491520000, 1, 40, 2, 1, 0, 16106127), -+ RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), -+ RK3036_PLL_RATE(312000000, 1, 78, 6, 1, 1, 0), -+ RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), -+ RK3036_PLL_RATE(96000000, 1, 96, 6, 4, 1, 0), ++ RK3036_PLL_RATE(1000000000, 1, 125, 3, 1, 1, 0), ++ RK3036_PLL_RATE( 984000000, 1, 82, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 960000000, 1, 80, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 936000000, 1, 78, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 912000000, 1, 76, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 900000000, 4, 300, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 888000000, 1, 74, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 864000000, 1, 72, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 840000000, 1, 70, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 816000000, 1, 68, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 800000000, 1, 100, 3, 1, 1, 0), ++ RK3036_PLL_RATE( 700000000, 6, 350, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 696000000, 1, 58, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 676000000, 3, 169, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 600000000, 1, 75, 3, 1, 1, 0), ++ RK3036_PLL_RATE( 594000000, 1, 99, 4, 1, 1, 0), ++ RK3036_PLL_RATE( 533250000, 8, 711, 4, 1, 1, 0), ++ RK3036_PLL_RATE( 504000000, 1, 63, 3, 1, 1, 0), ++ RK3036_PLL_RATE( 500000000, 6, 250, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 408000000, 1, 68, 2, 2, 1, 0), ++ RK3036_PLL_RATE( 312000000, 1, 52, 2, 2, 1, 0), ++ RK3036_PLL_RATE( 297000000, 1, 99, 4, 2, 1, 0), ++ RK3036_PLL_RATE( 216000000, 1, 72, 4, 2, 1, 0), ++ RK3036_PLL_RATE( 148500000, 1, 99, 4, 4, 1, 0), ++ RK3036_PLL_RATE( 106500000, 1, 71, 4, 4, 1, 0), ++ RK3036_PLL_RATE( 96000000, 1, 64, 4, 4, 1, 0), ++ RK3036_PLL_RATE( 74250000, 2, 99, 4, 4, 1, 0), ++ RK3036_PLL_RATE( 65000000, 1, 65, 6, 4, 1, 0), ++ RK3036_PLL_RATE( 54000000, 1, 54, 6, 4, 1, 0), ++ RK3036_PLL_RATE( 27000000, 1, 27, 6, 4, 1, 0), + { /* sentinel */ }, +}; + -+#define RV1106_DIV_ACLK_CORE_MASK 0x1f -+#define RV1106_DIV_ACLK_CORE_SHIFT 7 -+#define RV1106_DIV_PCLK_DBG_MASK 0x1f -+#define RV1106_DIV_PCLK_DBG_SHIFT 0 -+#define RV1106_CORE_SEL_MASK 0x3 -+#define RV1106_CORE_SEL_SHIFT 5 -+#define RV1106_ALT_DIV_MASK 0x1f -+#define RV1106_ALT_DIV_SHIFT 0 ++static struct rockchip_pll_rate_table rk3399_vpll_rates[] = { ++ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ ++ RK3036_PLL_RATE( 594000000, 1, 123, 5, 1, 0, 12582912), /* vco = 2970000000 */ ++ RK3036_PLL_RATE( 593406593, 1, 123, 5, 1, 0, 10508804), /* vco = 2967032965 */ ++ RK3036_PLL_RATE( 297000000, 1, 123, 5, 2, 0, 12582912), /* vco = 2970000000 */ ++ RK3036_PLL_RATE( 296703297, 1, 123, 5, 2, 0, 10508807), /* vco = 2967032970 */ ++ RK3036_PLL_RATE( 148500000, 1, 129, 7, 3, 0, 15728640), /* vco = 3118500000 */ ++ RK3036_PLL_RATE( 148351648, 1, 123, 5, 4, 0, 10508800), /* vco = 2967032960 */ ++ RK3036_PLL_RATE( 106500000, 1, 124, 7, 4, 0, 4194304), /* vco = 2982000000 */ ++ RK3036_PLL_RATE( 74250000, 1, 129, 7, 6, 0, 15728640), /* vco = 3118500000 */ ++ RK3036_PLL_RATE( 74175824, 1, 129, 7, 6, 0, 13550823), /* vco = 3115384608 */ ++ RK3036_PLL_RATE( 65000000, 1, 113, 7, 6, 0, 12582912), /* vco = 2730000000 */ ++ RK3036_PLL_RATE( 59340659, 1, 121, 7, 7, 0, 2581098), /* vco = 2907692291 */ ++ RK3036_PLL_RATE( 54000000, 1, 110, 7, 7, 0, 4194304), /* vco = 2646000000 */ ++ RK3036_PLL_RATE( 27000000, 1, 55, 7, 7, 0, 2097152), /* vco = 1323000000 */ ++ RK3036_PLL_RATE( 26973027, 1, 55, 7, 7, 0, 1173232), /* vco = 1321678323 */ ++ { /* sentinel */ }, ++}; + -+#define RV1106_CLKSEL0(_aclk_core) \ -+{ \ -+ .reg = RV1106_CORECLKSEL_CON(0), \ -+ .val = HIWORD_UPDATE(_aclk_core, RV1106_DIV_ACLK_CORE_MASK, \ -+ RV1106_DIV_ACLK_CORE_SHIFT), \ -+} ++/* CRU parents */ ++PNAME(mux_pll_p) = { "xin24m", "xin32k" }; + -+#define RV1106_CLKSEL1(_pclk_dbg) \ -+{ \ -+ .reg = RV1106_CORECLKSEL_CON(1), \ -+ .val = HIWORD_UPDATE(_pclk_dbg, RV1106_DIV_PCLK_DBG_MASK, \ -+ RV1106_DIV_PCLK_DBG_SHIFT), \ -+} ++PNAME(mux_ddrclk_p) = { "clk_ddrc_lpll_src", ++ "clk_ddrc_bpll_src", ++ "clk_ddrc_dpll_src", ++ "clk_ddrc_gpll_src" }; + -+#define RV1106_CLKSEL2(_is_pvtpll) \ -+{ \ -+ .reg = RV1106_CORECLKSEL_CON(0), \ -+ .val = HIWORD_UPDATE(_is_pvtpll, RV1106_CORE_SEL_MASK, \ -+ RV1106_CORE_SEL_SHIFT), \ -+} ++PNAME(mux_pll_src_vpll_cpll_gpll_p) = { "vpll", "cpll", "gpll" }; ++PNAME(mux_pll_src_dmyvpll_cpll_gpll_p) = { "dummy_vpll", "cpll", "gpll" }; + -+#define RV1106_CLKSEL3(_alt_div) \ -+{ \ -+ .reg = RV1106_CORECLKSEL_CON(0), \ -+ .val = HIWORD_UPDATE(_alt_div, RV1106_ALT_DIV_MASK, \ -+ RV1106_ALT_DIV_SHIFT), \ -+} ++#ifdef RK3399_TWO_PLL_FOR_VOP ++PNAME(mux_aclk_cci_p) = { "dummy_cpll", ++ "gpll_aclk_cci_src", ++ "npll_aclk_cci_src", ++ "dummy_vpll" }; ++PNAME(mux_cci_trace_p) = { "dummy_cpll", ++ "gpll_cci_trace" }; ++PNAME(mux_cs_p) = { "dummy_cpll", "gpll_cs", ++ "npll_cs"}; ++PNAME(mux_aclk_perihp_p) = { "dummy_cpll", ++ "gpll_aclk_perihp_src" }; + -+#define RV1106_CPUCLK_RATE(_prate, _aclk_core, _pclk_dbg, _is_pvtpll) \ -+{ \ -+ .prate = _prate, \ -+ .divs = { \ -+ RV1106_CLKSEL0(_aclk_core), \ -+ RV1106_CLKSEL1(_pclk_dbg), \ -+ }, \ -+ .pre_muxs = { \ -+ RV1106_CLKSEL3(1), \ -+ RV1106_CLKSEL2(2), \ -+ }, \ -+ .post_muxs = { \ -+ RV1106_CLKSEL2(_is_pvtpll), \ -+ RV1106_CLKSEL3(0), \ -+ }, \ -+} ++PNAME(mux_pll_src_cpll_gpll_p) = { "dummy_cpll", "gpll" }; ++PNAME(mux_pll_src_cpll_gpll_npll_p) = { "dummy_cpll", "gpll", "npll" }; ++PNAME(mux_pll_src_cpll_gpll_ppll_p) = { "dummy_cpll", "gpll", "ppll" }; ++PNAME(mux_pll_src_cpll_gpll_upll_p) = { "dummy_cpll", "gpll", "upll" }; ++PNAME(mux_pll_src_npll_cpll_gpll_p) = { "npll", "dummy_cpll", "gpll" }; ++PNAME(mux_pll_src_cpll_gpll_npll_ppll_p) = { "dummy_cpll", "gpll", "npll", ++ "ppll" }; ++PNAME(mux_pll_src_cpll_gpll_npll_24m_p) = { "dummy_cpll", "gpll", "npll", ++ "xin24m" }; ++PNAME(mux_pll_src_cpll_gpll_npll_usbphy480m_p) = { "dummy_cpll", "gpll", "npll", ++ "clk_usbphy_480m" }; ++PNAME(mux_pll_src_ppll_cpll_gpll_npll_p) = { "ppll", "dummy_cpll", "gpll", ++ "npll", "upll" }; ++PNAME(mux_pll_src_cpll_gpll_npll_upll_24m_p) = { "dummy_cpll", "gpll", "npll", ++ "upll", "xin24m" }; ++PNAME(mux_pll_src_cpll_gpll_npll_ppll_upll_24m_p) = { "dummy_cpll", "gpll", "npll", ++ "ppll", "upll", "xin24m" }; ++/* ++ * We hope to be able to HDMI/DP can obtain better signal quality, ++ * therefore, we move VOP pwm and aclk clocks to other PLLs, let ++ * HDMI/DP phyclock can monopolize VPLL. ++ */ ++PNAME(mux_pll_src_dmyvpll_cpll_gpll_npll_p) = { "dummy_vpll", "dummy_cpll", "gpll", ++ "npll" }; ++PNAME(mux_pll_src_dmyvpll_cpll_gpll_gpll_p) = { "dummy_vpll", "dummy_cpll", "gpll", ++ "gpll" }; ++PNAME(mux_pll_src_24m_32k_cpll_gpll_p) = { "xin24m", "xin32k", ++ "dummy_cpll", "gpll" }; + -+static struct rockchip_cpuclk_rate_table rv1106_cpuclk_rates[] __initdata = { -+ RV1106_CPUCLK_RATE(1608000000, 3, 7, 1), -+ RV1106_CPUCLK_RATE(1584000000, 3, 7, 1), -+ RV1106_CPUCLK_RATE(1560000000, 3, 7, 1), -+ RV1106_CPUCLK_RATE(1536000000, 3, 7, 1), -+ RV1106_CPUCLK_RATE(1512000000, 3, 7, 1), -+ RV1106_CPUCLK_RATE(1488000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1464000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1440000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1416000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1392000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1368000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1344000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1320000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1296000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1272000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1248000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1224000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1200000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1104000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1096000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1008000000, 1, 5, 1), -+ RV1106_CPUCLK_RATE(912000000, 1, 5, 1), -+ RV1106_CPUCLK_RATE(816000000, 1, 3, 1), -+ RV1106_CPUCLK_RATE(696000000, 1, 3, 0), -+ RV1106_CPUCLK_RATE(600000000, 1, 3, 0), -+ RV1106_CPUCLK_RATE(408000000, 1, 1, 0), -+ RV1106_CPUCLK_RATE(312000000, 1, 1, 0), -+ RV1106_CPUCLK_RATE(216000000, 1, 1, 0), -+ RV1106_CPUCLK_RATE(96000000, 1, 1, 0), -+}; ++PNAME(mux_aclk_emmc_p) = { "dummy_cpll", ++ "gpll_aclk_emmc_src" }; + -+static const struct rockchip_cpuclk_reg_data rv1106_cpuclk_data = { -+ .core_reg[0] = RV1106_CORECLKSEL_CON(0), -+ .div_core_shift[0] = 0, -+ .div_core_mask[0] = 0x1f, -+ .num_cores = 1, -+ .mux_core_alt = 2, -+ .mux_core_main = 2, -+ .mux_core_shift = 5, -+ .mux_core_mask = 0x3, -+}; ++PNAME(mux_aclk_perilp0_p) = { "dummy_cpll", ++ "gpll_aclk_perilp0_src" }; + -+PNAME(mux_pll_p) = { "xin24m" }; -+PNAME(mux_24m_32k_p) = { "xin24m", "clk_rtc_32k" }; -+PNAME(mux_gpll_cpll_p) = { "gpll", "cpll" }; -+PNAME(mux_gpll_24m_p) = { "gpll", "xin24m" }; -+PNAME(mux_100m_50m_24m_p) = { "clk_100m_src", "clk_50m_src", "xin24m" }; -+PNAME(mux_150m_100m_50m_24m_p) = { "clk_150m_src", "clk_100m_src", "clk_50m_src", "xin24m" }; -+PNAME(mux_500m_300m_100m_24m_p) = { "clk_500m_src", "clk_300m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_400m_300m_pvtpll0_pvtpll1_p) = { "clk_400m_src", "clk_300m_src", "clk_pvtpll_0", "clk_pvtpll_1" }; -+PNAME(mux_500m_300m_pvtpll0_pvtpll1_p) = { "clk_500m_src", "clk_300m_src", "clk_pvtpll_0", "clk_pvtpll_1" }; -+PNAME(mux_339m_200m_pvtpll0_pvtpll1_p) = { "clk_339m_src", "clk_200m_src", "clk_pvtpll_0", "clk_pvtpll_1" }; -+PNAME(mux_400m_200m_100m_24m_p) = { "clk_400m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_200m_100m_50m_24m_p) = { "clk_200m_src", "clk_100m_src", "clk_50m_src", "xin24m" }; -+PNAME(mux_300m_200m_100m_24m_p) = { "clk_300m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_500m_300m_200m_24m_p) = { "clk_500m_src", "clk_300m_src", "clk_200m_src", "xin24m" }; -+PNAME(mux_50m_24m_p) = { "clk_50m_src", "xin24m" }; -+PNAME(mux_400m_24m_p) = { "clk_400m_src", "xin24m" }; -+PNAME(clk_rtc32k_pmu_p) = { "clk_rtc32k_frac", "xin32k", "clk_pvtm_32k" }; -+PNAME(mux_200m_100m_24m_32k_p) = { "clk_200m_src", "clk_100m_src", "xin24m", "clk_rtc_32k" }; -+PNAME(mux_100m_pmu_24m_p) = { "clk_100m_pmu", "xin24m" }; -+PNAME(mux_200m_100m_24m_p) = { "clk_200m_src", "clk_100m_pmu", "xin24m" }; -+PNAME(mux_339m_200m_100m_24m_p) = { "clk_339m_src", "clk_200m_src", "clk_100m_pmu", "xin24m" }; -+PNAME(mux_dpll_300m_p) = { "dpll", "clk_300m_src" }; -+PNAME(clk_i2s0_8ch_tx_p) = { "clk_i2s0_8ch_tx_src", "clk_i2s0_8ch_tx_frac", "i2s0_mclkin", "xin_osc0_half" }; -+PNAME(clk_i2s0_8ch_rx_p) = { "clk_i2s0_8ch_rx_src", "clk_i2s0_8ch_rx_frac", "i2s0_mclkin", "xin_osc0_half" }; -+PNAME(i2s0_8ch_mclkout_p) = { "mclk_i2s0_8ch_tx", "mclk_i2s0_8ch_rx", "xin_osc0_half" }; -+PNAME(clk_ref_mipi0_p) = { "clk_ref_mipi0_src", "clk_ref_mipi0_frac", "xin24m" }; -+PNAME(clk_ref_mipi1_p) = { "clk_ref_mipi1_src", "clk_ref_mipi1_frac", "xin24m" }; -+PNAME(clk_uart0_p) = { "clk_uart0_src", "clk_uart0_frac", "xin24m" }; -+PNAME(clk_uart1_p) = { "clk_uart1_src", "clk_uart1_frac", "xin24m" }; -+PNAME(clk_uart2_p) = { "clk_uart2_src", "clk_uart2_frac", "xin24m" }; -+PNAME(clk_uart3_p) = { "clk_uart3_src", "clk_uart3_frac", "xin24m" }; -+PNAME(clk_uart4_p) = { "clk_uart4_src", "clk_uart4_frac", "xin24m" }; -+PNAME(clk_uart5_p) = { "clk_uart5_src", "clk_uart5_frac", "xin24m" }; -+PNAME(clk_vicap_m0_p) = { "clk_vicap_m0_src", "clk_vicap_m0_frac", "xin24m" }; -+PNAME(clk_vicap_m1_p) = { "clk_vicap_m1_src", "clk_vicap_m1_frac", "xin24m" }; ++PNAME(mux_fclk_cm0s_p) = { "dummy_cpll", ++ "gpll_fclk_cm0s_src" }; + -+static struct rockchip_pll_clock rv1106_pll_clks[] __initdata = { -+ [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, -+ CLK_IGNORE_UNUSED, RV1106_PLL_CON(0), -+ RV1106_MODE_CON, 0, 10, 0, rv1106_pll_rates), -+ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, -+ 0, RV1106_PLL_CON(8), -+ RV1106_MODE_CON, 2, 10, 0, rv1106_pll_rates), -+ [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p, -+ CLK_IGNORE_UNUSED, RV1106_PLL_CON(16), -+ RV1106_SUBDDRMODE_CON, 0, 10, 0, NULL), -+ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, -+ 0, RV1106_PLL_CON(24), -+ RV1106_MODE_CON, 4, 10, 0, rv1106_pll_rates), -+}; ++PNAME(mux_hclk_perilp1_p) = { "dummy_cpll", ++ "gpll_hclk_perilp1_src" }; ++PNAME(mux_aclk_gmac_p) = { "dummy_cpll", ++ "gpll_aclk_gmac_src" }; ++#else ++PNAME(mux_aclk_cci_p) = { "cpll_aclk_cci_src", ++ "gpll_aclk_cci_src", ++ "npll_aclk_cci_src", ++ "dummy_vpll" }; ++PNAME(mux_cci_trace_p) = { "cpll_cci_trace", ++ "gpll_cci_trace" }; ++PNAME(mux_cs_p) = { "cpll_cs", "gpll_cs", ++ "npll_cs"}; ++PNAME(mux_aclk_perihp_p) = { "cpll_aclk_perihp_src", ++ "gpll_aclk_perihp_src" }; + -+#define MFLAGS CLK_MUX_HIWORD_MASK -+#define DFLAGS CLK_DIVIDER_HIWORD_MASK -+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) ++PNAME(mux_pll_src_cpll_gpll_p) = { "cpll", "gpll" }; ++PNAME(mux_pll_src_cpll_gpll_npll_p) = { "cpll", "gpll", "npll" }; ++PNAME(mux_pll_src_cpll_gpll_ppll_p) = { "cpll", "gpll", "ppll" }; ++PNAME(mux_pll_src_cpll_gpll_upll_p) = { "cpll", "gpll", "upll" }; ++PNAME(mux_pll_src_npll_cpll_gpll_p) = { "npll", "cpll", "gpll" }; ++PNAME(mux_pll_src_cpll_gpll_npll_ppll_p) = { "cpll", "gpll", "npll", ++ "ppll" }; ++PNAME(mux_pll_src_cpll_gpll_npll_24m_p) = { "cpll", "gpll", "npll", ++ "xin24m" }; ++PNAME(mux_pll_src_cpll_gpll_npll_usbphy480m_p) = { "cpll", "gpll", "npll", ++ "clk_usbphy_480m" }; ++PNAME(mux_pll_src_ppll_cpll_gpll_npll_p) = { "ppll", "cpll", "gpll", ++ "npll", "upll" }; ++PNAME(mux_pll_src_cpll_gpll_npll_upll_24m_p) = { "cpll", "gpll", "npll", ++ "upll", "xin24m" }; ++PNAME(mux_pll_src_cpll_gpll_npll_ppll_upll_24m_p) = { "cpll", "gpll", "npll", ++ "ppll", "upll", "xin24m" }; ++/* ++ * We hope to be able to HDMI/DP can obtain better signal quality, ++ * therefore, we move VOP pwm and aclk clocks to other PLLs, let ++ * HDMI/DP phyclock can monopolize VPLL. ++ */ ++PNAME(mux_pll_src_dmyvpll_cpll_gpll_npll_p) = { "dummy_vpll", "cpll", "gpll", ++ "npll" }; ++PNAME(mux_pll_src_dmyvpll_cpll_gpll_gpll_p) = { "dummy_vpll", "cpll", "gpll", ++ "gpll" }; ++PNAME(mux_pll_src_24m_32k_cpll_gpll_p) = { "xin24m", "xin32k", ++ "cpll", "gpll" }; + -+static struct rockchip_clk_branch rv1106_rtc32k_pmu_fracmux __initdata = -+ MUX(CLK_RTC_32K, "clk_rtc_32k", clk_rtc32k_pmu_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RV1106_PMUCLKSEL_CON(0), 6, 2, MFLAGS); ++PNAME(mux_aclk_emmc_p) = { "cpll_aclk_emmc_src", ++ "gpll_aclk_emmc_src" }; + -+static struct rockchip_clk_branch rv1106_i2s0_8ch_tx_fracmux __initdata = -+ MUX(CLK_I2S0_8CH_TX, "clk_i2s0_8ch_tx", clk_i2s0_8ch_tx_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(19), 0, 2, MFLAGS); ++PNAME(mux_aclk_perilp0_p) = { "cpll_aclk_perilp0_src", ++ "gpll_aclk_perilp0_src" }; + -+static struct rockchip_clk_branch rv1106_i2s0_8ch_rx_fracmux __initdata = -+ MUX(CLK_I2S0_8CH_RX, "clk_i2s0_8ch_rx", clk_i2s0_8ch_rx_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(21), 0, 2, MFLAGS); ++PNAME(mux_fclk_cm0s_p) = { "cpll_fclk_cm0s_src", ++ "gpll_fclk_cm0s_src" }; + -+static struct rockchip_clk_branch rv1106_clk_ref_mipi0_fracmux __initdata = -+ MUX(CLK_REF_MIPI0, "clk_ref_mipi0", clk_ref_mipi0_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(27), 0, 2, MFLAGS); ++PNAME(mux_hclk_perilp1_p) = { "cpll_hclk_perilp1_src", ++ "gpll_hclk_perilp1_src" }; ++PNAME(mux_aclk_gmac_p) = { "cpll_aclk_gmac_src", ++ "gpll_aclk_gmac_src" }; ++#endif + -+static struct rockchip_clk_branch rv1106_clk_ref_mipi1_fracmux __initdata = -+ MUX(CLK_REF_MIPI1, "clk_ref_mipi1", clk_ref_mipi1_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(29), 0, 2, MFLAGS); ++PNAME(mux_dclk_vop0_p) = { "dclk_vop0_div", ++ "dummy_dclk_vop0_frac" }; ++PNAME(mux_dclk_vop1_p) = { "dclk_vop1_div", ++ "dummy_dclk_vop1_frac" }; + -+static struct rockchip_clk_branch rv1106_clk_uart0_fracmux __initdata = -+ MUX(CLK_UART0, "clk_uart0", clk_uart0_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(7), 0, 2, MFLAGS); ++PNAME(mux_clk_cif_p) = { "clk_cifout_src", "xin24m" }; + -+static struct rockchip_clk_branch rv1106_clk_uart1_fracmux __initdata = -+ MUX(CLK_UART1, "clk_uart1", clk_uart1_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(9), 0, 2, MFLAGS); ++PNAME(mux_pll_src_24m_usbphy480m_p) = { "xin24m", "clk_usbphy_480m" }; ++PNAME(mux_pll_src_24m_pciephy_p) = { "xin24m", "clk_pciephy_ref100m" }; ++PNAME(mux_pciecore_cru_phy_p) = { "clk_pcie_core_cru", ++ "clk_pcie_core_phy" }; ++PNAME(mux_clk_testout1_p) = { "clk_testout1_pll_src", "xin24m" }; ++PNAME(mux_clk_testout2_p) = { "clk_testout2_pll_src", "xin24m" }; + -+static struct rockchip_clk_branch rv1106_clk_uart2_fracmux __initdata = -+ MUX(CLK_UART2, "clk_uart2", clk_uart2_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(11), 0, 2, MFLAGS); ++PNAME(mux_usbphy_480m_p) = { "clk_usbphy0_480m_src", ++ "clk_usbphy1_480m_src" }; ++PNAME(mux_rmii_p) = { "clk_gmac", "clkin_gmac" }; ++PNAME(mux_spdif_p) = { "clk_spdif_div", "clk_spdif_frac", ++ "clkin_i2s", "xin12m" }; ++PNAME(mux_i2s0_p) = { "clk_i2s0_div", "clk_i2s0_frac", ++ "clkin_i2s", "xin12m" }; ++PNAME(mux_i2s1_p) = { "clk_i2s1_div", "clk_i2s1_frac", ++ "clkin_i2s", "xin12m" }; ++PNAME(mux_i2s2_p) = { "clk_i2s2_div", "clk_i2s2_frac", ++ "clkin_i2s", "xin12m" }; ++PNAME(mux_i2sch_p) = { "clk_i2s0", "clk_i2s1", ++ "clk_i2s2" }; ++PNAME(mux_i2sout_p) = { "clk_i2sout_src", "xin12m" }; + -+static struct rockchip_clk_branch rv1106_clk_uart3_fracmux __initdata = -+ MUX(CLK_UART3, "clk_uart3", clk_uart3_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(13), 0, 2, MFLAGS); ++PNAME(mux_uart0_p) = { "xin24m", "clk_uart0_div", "clk_uart0_frac" }; ++PNAME(mux_uart1_p) = { "xin24m", "clk_uart1_div", "clk_uart1_frac" }; ++PNAME(mux_uart2_p) = { "xin24m", "clk_uart2_div", "clk_uart2_frac" }; ++PNAME(mux_uart3_p) = { "xin24m", "clk_uart3_div", "clk_uart3_frac" }; + -+static struct rockchip_clk_branch rv1106_clk_uart4_fracmux __initdata = -+ MUX(CLK_UART4, "clk_uart4", clk_uart4_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(15), 0, 2, MFLAGS); ++/* PMU CRU parents */ ++PNAME(mux_ppll_24m_p) = { "ppll", "xin24m" }; ++PNAME(mux_24m_ppll_p) = { "xin24m", "ppll" }; ++PNAME(mux_fclk_cm0s_pmu_ppll_p) = { "fclk_cm0s_pmu_ppll_src", "xin24m" }; ++PNAME(mux_wifi_pmu_p) = { "clk_wifi_div", "clk_wifi_frac" }; ++PNAME(mux_uart4_pmu_p) = { "xin24m", "clk_uart4_div", ++ "clk_uart4_frac" }; ++PNAME(mux_clk_testout2_2io_p) = { "clk_testout2", "clk_32k_suspend_pmu" }; + -+static struct rockchip_clk_branch rv1106_clk_uart5_fracmux __initdata = -+ MUX(CLK_UART5, "clk_uart5", clk_uart5_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(17), 0, 2, MFLAGS); -+ -+static struct rockchip_clk_branch rv1106_clk_vicap_m0_fracmux __initdata = -+ MUX(CLK_VICAP_M0, "clk_vicap_m0", clk_vicap_m0_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(31), 0, 2, MFLAGS); ++static u32 uart_mux_idx[] = { 2, 0, 1 }; + -+static struct rockchip_clk_branch rv1106_clk_vicap_m1_fracmux __initdata = -+ MUX(CLK_VICAP_M1, "clk_vicap_m1", clk_vicap_m1_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(33), 0, 2, MFLAGS); ++static struct rockchip_pll_clock rk3399_pll_clks[] __initdata = { ++ [lpll] = PLL(pll_rk3399, PLL_APLLL, "lpll", mux_pll_p, 0, RK3399_PLL_CON(0), ++ RK3399_PLL_CON(3), 8, 31, 0, rk3399_pll_rates), ++ [bpll] = PLL(pll_rk3399, PLL_APLLB, "bpll", mux_pll_p, 0, RK3399_PLL_CON(8), ++ RK3399_PLL_CON(11), 8, 31, 0, rk3399_pll_rates), ++ [dpll] = PLL(pll_rk3399, PLL_DPLL, "dpll", mux_pll_p, 0, RK3399_PLL_CON(16), ++ RK3399_PLL_CON(19), 8, 31, 0, NULL), ++#ifdef RK3399_TWO_PLL_FOR_VOP ++ [cpll] = PLL(pll_rk3399, PLL_CPLL, "cpll", mux_pll_p, 0, RK3399_PLL_CON(24), ++ RK3399_PLL_CON(27), 8, 31, 0, rk3399_pll_rates), ++#else ++ [cpll] = PLL(pll_rk3399, PLL_CPLL, "cpll", mux_pll_p, 0, RK3399_PLL_CON(24), ++ RK3399_PLL_CON(27), 8, 31, ROCKCHIP_PLL_SYNC_RATE, rk3399_pll_rates), ++#endif ++ [gpll] = PLL(pll_rk3399, PLL_GPLL, "gpll", mux_pll_p, 0, RK3399_PLL_CON(32), ++ RK3399_PLL_CON(35), 8, 31, 0, rk3399_pll_rates), ++ [npll] = PLL(pll_rk3399, PLL_NPLL, "npll", mux_pll_p, 0, RK3399_PLL_CON(40), ++ RK3399_PLL_CON(43), 8, 31, ROCKCHIP_PLL_SYNC_RATE, rk3399_pll_rates), ++ [vpll] = PLL(pll_rk3399, PLL_VPLL, "vpll", mux_pll_p, 0, RK3399_PLL_CON(48), ++ RK3399_PLL_CON(51), 8, 31, 0, rk3399_vpll_rates), ++}; + -+static struct rockchip_clk_branch rv1106_clk_branches[] __initdata = { ++static struct rockchip_pll_clock rk3399_pmu_pll_clks[] __initdata = { ++ [ppll] = PLL(pll_rk3399, PLL_PPLL, "ppll", mux_pll_p, CLK_IS_CRITICAL, RK3399_PMU_PLL_CON(0), ++ RK3399_PMU_PLL_CON(3), 8, 31, ROCKCHIP_PLL_SYNC_RATE, rk3399_pll_rates), ++}; + -+ FACTOR(0, "xin_osc0_half", "xin24m", 0, 1, 2), ++#define MFLAGS CLK_MUX_HIWORD_MASK ++#define DFLAGS CLK_DIVIDER_HIWORD_MASK ++#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) ++#define IFLAGS ROCKCHIP_INVERTER_HIWORD_MASK + -+ /* PD_CORE */ -+ GATE(CLK_PVTM_CORE, "clk_pvtm_core", "xin24m", 0, -+ RV1106_CORECLKGATE_CON(0), 14, GFLAGS), -+ GATE(CLK_CORE_MCU_RTC, "clk_core_mcu_rtc", "xin24m", 0, -+ RV1106_CORECLKGATE_CON(1), 6, GFLAGS), -+ COMPOSITE(HCLK_CPU, "hclk_cpu", mux_gpll_24m_p, CLK_IS_CRITICAL, -+ RV1106_CORECLKSEL_CON(2), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1106_CORECLKGATE_CON(0), 12, GFLAGS), -+ COMPOSITE(CLK_CORE_MCU, "clk_core_mcu", mux_gpll_24m_p, 0, -+ RV1106_CORECLKSEL_CON(3), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RV1106_CORECLKGATE_CON(1), 1, GFLAGS), -+ COMPOSITE_NOMUX(0, "pclk_dbg", "armclk", CLK_IS_CRITICAL, -+ RV1106_CORECLKSEL_CON(1), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RV1106_CORECLKGATE_CON(0), 6, GFLAGS), -+ GATE(0, "pclk_cpu_root", "pclk_dbg", CLK_IS_CRITICAL, -+ RV1106_CORECLKGATE_CON(0), 10, GFLAGS), -+ GATE(PCLK_MAILBOX, "pclk_mailbox", "pclk_cpu_root", 0, -+ RV1106_CORECLKGATE_CON(1), 8, GFLAGS), ++static struct rockchip_clk_branch rk3399_spdif_fracmux __initdata = ++ MUX(0, "clk_spdif_mux", mux_spdif_p, CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(32), 13, 2, MFLAGS); + -+ /* PD _TOP */ -+ COMPOSITE(CLK_50M_SRC, "clk_50m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(0), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE(CLK_100M_SRC, "clk_100m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(0), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE(CLK_150M_SRC, "clk_150m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(1), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 3, GFLAGS), -+ COMPOSITE(CLK_200M_SRC, "clk_200m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(1), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 4, GFLAGS), -+ COMPOSITE(CLK_250M_SRC, "clk_250m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(2), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 5, GFLAGS), -+ COMPOSITE(CLK_300M_SRC, "clk_300m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(2), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 6, GFLAGS), -+ COMPOSITE_HALFDIV(CLK_339M_SRC, "clk_339m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(3), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 7, GFLAGS), -+ COMPOSITE(CLK_400M_SRC, "clk_400m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(3), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 8, GFLAGS), -+ COMPOSITE_HALFDIV(CLK_450M_SRC, "clk_450m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(4), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 9, GFLAGS), -+ COMPOSITE(CLK_500M_SRC, "clk_500m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(4), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 10, GFLAGS), ++static struct rockchip_clk_branch rk3399_i2s0_fracmux __initdata = ++ MUX(0, "clk_i2s0_mux", mux_i2s0_p, CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(28), 8, 2, MFLAGS); + -+ COMPOSITE_NODIV(PCLK_TOP_ROOT, "pclk_top_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(24), 5, 2, MFLAGS, -+ RV1106_CLKGATE_CON(2), 9, GFLAGS), ++static struct rockchip_clk_branch rk3399_i2s1_fracmux __initdata = ++ MUX(0, "clk_i2s1_mux", mux_i2s1_p, CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(29), 8, 2, MFLAGS); + -+ COMPOSITE(CLK_I2S0_8CH_TX_SRC, "clk_i2s0_8ch_tx_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(17), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(1), 13, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S0_8CH_TX_FRAC, "clk_i2s0_8ch_tx_frac", "clk_i2s0_8ch_tx_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(18), 0, -+ RV1106_CLKGATE_CON(1), 14, GFLAGS, -+ &rv1106_i2s0_8ch_tx_fracmux), -+ GATE(MCLK_I2S0_8CH_TX, "mclk_i2s0_8ch_tx", "clk_i2s0_8ch_tx", 0, -+ RV1106_CLKGATE_CON(1), 15, GFLAGS), -+ COMPOSITE(CLK_I2S0_8CH_RX_SRC, "clk_i2s0_8ch_rx_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(19), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(2), 0, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S0_8CH_RX_FRAC, "clk_i2s0_8ch_rx_frac", "clk_i2s0_8ch_rx_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(20), 0, -+ RV1106_CLKGATE_CON(2), 1, GFLAGS, -+ &rv1106_i2s0_8ch_rx_fracmux), -+ GATE(MCLK_I2S0_8CH_RX, "mclk_i2s0_8ch_rx", "clk_i2s0_8ch_rx", 0, -+ RV1106_CLKGATE_CON(2), 2, GFLAGS), -+ MUX(I2S0_8CH_MCLKOUT, "i2s0_8ch_mclkout", i2s0_8ch_mclkout_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(21), 2, 2, MFLAGS), -+ COMPOSITE(CLK_REF_MIPI0_SRC, "clk_ref_mipi0_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(25), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(3), 4, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_REF_MIPI0_FRAC, "clk_ref_mipi0_frac", "clk_ref_mipi0_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(26), 0, -+ RV1106_CLKGATE_CON(3), 5, GFLAGS, -+ &rv1106_clk_ref_mipi0_fracmux), -+ GATE(MCLK_REF_MIPI0, "mclk_ref_mipi0", "clk_ref_mipi0", 0, -+ RV1106_CLKGATE_CON(3), 6, GFLAGS), -+ COMPOSITE(CLK_REF_MIPI1_SRC, "clk_ref_mipi1_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(27), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(3), 7, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_REF_MIPI1_FRAC, "clk_ref_mipi1_frac", "clk_ref_mipi1_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(28), 0, -+ RV1106_CLKGATE_CON(3), 8, GFLAGS, -+ &rv1106_clk_ref_mipi1_fracmux), -+ GATE(MCLK_REF_MIPI1, "mclk_ref_mipi1", "clk_ref_mipi1", 0, -+ RV1106_CLKGATE_CON(3), 9, GFLAGS), -+ COMPOSITE(CLK_UART0_SRC, "clk_uart0_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(5), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 11, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART0_FRAC, "clk_uart0_frac", "clk_uart0_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(6), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RV1106_CLKGATE_CON(0), 12, GFLAGS, -+ &rv1106_clk_uart0_fracmux), -+ GATE(SCLK_UART0, "sclk_uart0", "clk_uart0", 0, -+ RV1106_CLKGATE_CON(0), 13, GFLAGS), -+ COMPOSITE(CLK_UART1_SRC, "clk_uart1_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(7), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 14, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART1_FRAC, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(8), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RV1106_CLKGATE_CON(0), 15, GFLAGS, -+ &rv1106_clk_uart1_fracmux), -+ GATE(SCLK_UART1, "sclk_uart1", "clk_uart1", 0, -+ RV1106_CLKGATE_CON(1), 0, GFLAGS), -+ COMPOSITE(CLK_UART2_SRC, "clk_uart2_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(9), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(1), 1, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART2_FRAC, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(10), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RV1106_CLKGATE_CON(1), 2, GFLAGS, -+ &rv1106_clk_uart2_fracmux), -+ GATE(SCLK_UART2, "sclk_uart2", "clk_uart2", 0, -+ RV1106_CLKGATE_CON(1), 3, GFLAGS), -+ COMPOSITE(CLK_UART3_SRC, "clk_uart3_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(11), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(1), 4, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART3_FRAC, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(12), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RV1106_CLKGATE_CON(1), 5, GFLAGS, -+ &rv1106_clk_uart3_fracmux), -+ GATE(SCLK_UART3, "sclk_uart3", "clk_uart3", 0, -+ RV1106_CLKGATE_CON(1), 6, GFLAGS), -+ COMPOSITE(CLK_UART4_SRC, "clk_uart4_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(13), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(1), 7, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART4_FRAC, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(14), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RV1106_CLKGATE_CON(1), 8, GFLAGS, -+ &rv1106_clk_uart4_fracmux), -+ GATE(SCLK_UART4, "sclk_uart4", "clk_uart4", 0, -+ RV1106_CLKGATE_CON(1), 9, GFLAGS), -+ COMPOSITE(CLK_UART5_SRC, "clk_uart5_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(15), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(1), 10, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART5_FRAC, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(16), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RV1106_CLKGATE_CON(1), 11, GFLAGS, -+ &rv1106_clk_uart5_fracmux), -+ GATE(SCLK_UART5, "sclk_uart5", "clk_uart5", 0, -+ RV1106_CLKGATE_CON(1), 12, GFLAGS), -+ COMPOSITE(CLK_VICAP_M0_SRC, "clk_vicap_m0_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(29), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(3), 10, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_VICAP_M0_FRAC, "clk_vicap_m0_frac", "clk_vicap_m0_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(30), 0, -+ RV1106_CLKGATE_CON(3), 11, GFLAGS, -+ &rv1106_clk_vicap_m0_fracmux), -+ GATE(SCLK_VICAP_M0, "sclk_vicap_m0", "clk_vicap_m0", 0, -+ RV1106_CLKGATE_CON(3), 12, GFLAGS), -+ COMPOSITE(CLK_VICAP_M1_SRC, "clk_vicap_m1_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(31), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(3), 13, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_VICAP_M1_FRAC, "clk_vicap_m1_frac", "clk_vicap_m1_src", 0, -+ RV1106_CLKSEL_CON(32), 0, -+ RV1106_CLKGATE_CON(3), 14, GFLAGS, -+ &rv1106_clk_vicap_m1_fracmux), -+ GATE(SCLK_VICAP_M1, "sclk_vicap_m1", "clk_vicap_m1", 0, -+ RV1106_CLKGATE_CON(3), 15, GFLAGS), -+ COMPOSITE(DCLK_VOP_SRC, "dclk_vop_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(23), 8, 1, MFLAGS, 3, 5, DFLAGS, -+ RV1106_CLKGATE_CON(2), 6, GFLAGS), ++static struct rockchip_clk_branch rk3399_i2s2_fracmux __initdata = ++ MUX(0, "clk_i2s2_mux", mux_i2s2_p, CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(30), 8, 2, MFLAGS); + -+ /* PD_DDR */ -+ COMPOSITE_NODIV(PCLK_DDR_ROOT, "pclk_ddr_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_DDRCLKSEL_CON(0), 0, 2, MFLAGS, -+ RV1106_DDRCLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE_NODIV(ACLK_DDR_ROOT, "aclk_ddr_root", mux_500m_300m_100m_24m_p, CLK_IS_CRITICAL, -+ RV1106_DDRCLKSEL_CON(0), 8, 2, MFLAGS, -+ RV1106_DDRCLKGATE_CON(0), 12, GFLAGS), -+ GATE(PCLK_DDRPHY, "pclk_ddrphy", "pclk_ddr_root", CLK_IGNORE_UNUSED, -+ RV1106_DDRCLKGATE_CON(1), 3, GFLAGS), -+ GATE(PCLK_DDR_HWLP, "pclk_ddr_hwlp", "pclk_ddr_root", CLK_IGNORE_UNUSED, -+ RV1106_DDRCLKGATE_CON(1), 2, GFLAGS), -+ GATE(PCLK_DDRMON, "pclk_ddrmon", "pclk_ddr_root", 0, -+ RV1106_DDRCLKGATE_CON(0), 7, GFLAGS), -+ GATE(CLK_TIMER_DDRMON, "clk_timer_ddrmon", "xin24m", 0, -+ RV1106_DDRCLKGATE_CON(0), 8, GFLAGS), -+ GATE(PCLK_DDRC, "pclk_ddrc", "pclk_ddr_root", CLK_IGNORE_UNUSED, -+ RV1106_DDRCLKGATE_CON(0), 5, GFLAGS), -+ GATE(PCLK_DFICTRL, "pclk_dfictrl", "pclk_ddr_root", CLK_IS_CRITICAL, -+ RV1106_DDRCLKGATE_CON(0), 11, GFLAGS), -+ GATE(ACLK_SYS_SHRM, "aclk_sys_shrm", "aclk_ddr_root", CLK_IS_CRITICAL, -+ RV1106_DDRCLKGATE_CON(0), 13, GFLAGS), ++static struct rockchip_clk_branch rk3399_uart0_fracmux __initdata = ++ MUXTBL(SCLK_UART0, "clk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(33), 8, 2, MFLAGS, uart_mux_idx); + -+ /* PD_NPU */ -+ COMPOSITE_NODIV(HCLK_NPU_ROOT, "hclk_npu_root", mux_150m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_NPUCLKSEL_CON(0), 0, 2, MFLAGS, -+ RV1106_NPUCLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE_NODIV(ACLK_NPU_ROOT, "aclk_npu_root", mux_500m_300m_pvtpll0_pvtpll1_p, CLK_IS_CRITICAL, -+ RV1106_NPUCLKSEL_CON(0), 2, 2, MFLAGS, -+ RV1106_NPUCLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE_NODIV(PCLK_NPU_ROOT, "pclk_npu_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_NPUCLKSEL_CON(0), 4, 2, MFLAGS, -+ RV1106_NPUCLKGATE_CON(0), 2, GFLAGS), -+ GATE(HCLK_RKNN, "hclk_rknn", "hclk_npu_root", 0, -+ RV1106_NPUCLKGATE_CON(0), 9, GFLAGS), -+ GATE(ACLK_RKNN, "aclk_rknn", "aclk_npu_root", 0, -+ RV1106_NPUCLKGATE_CON(0), 10, GFLAGS), ++static struct rockchip_clk_branch rk3399_uart1_fracmux __initdata = ++ MUXTBL(SCLK_UART1, "clk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(34), 8, 2, MFLAGS, uart_mux_idx); + -+ /* PD_PERI */ -+ COMPOSITE_NODIV(PCLK_PERI_ROOT, "pclk_peri_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_PERICLKSEL_CON(1), 0, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE_NODIV(ACLK_PERI_ROOT, "aclk_peri_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RV1106_PERICLKSEL_CON(1), 2, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE_NODIV(HCLK_PERI_ROOT, "hclk_peri_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_PERICLKSEL_CON(1), 4, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE_NODIV(ACLK_BUS_ROOT, "aclk_bus_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RV1106_PERICLKSEL_CON(9), 0, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(6), 8, GFLAGS), -+ GATE(PCLK_ACODEC, "pclk_acodec", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(6), 3, GFLAGS), -+ COMPOSITE_NOMUX(MCLK_ACODEC_TX, "mclk_acodec_tx", "mclk_i2s0_8ch_tx", 0, -+ RV1106_PERICLKSEL_CON(8), 0, 8, DFLAGS, -+ RV1106_PERICLKGATE_CON(6), 4, GFLAGS), -+ COMPOSITE_NODIV(CLK_CORE_CRYPTO, "clk_core_crypto", mux_300m_200m_100m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(6), 5, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(3), 11, GFLAGS), -+ COMPOSITE_NODIV(CLK_PKA_CRYPTO, "clk_pka_crypto", mux_300m_200m_100m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(6), 7, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(3), 12, GFLAGS), -+ GATE(ACLK_CRYPTO, "aclk_crypto", "aclk_bus_root", 0, -+ RV1106_PERICLKGATE_CON(3), 13, GFLAGS), -+ GATE(HCLK_CRYPTO, "hclk_crypto", "hclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(3), 14, GFLAGS), -+ GATE(ACLK_DECOM, "aclk_decom", "aclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(5), 9, GFLAGS), -+ GATE(PCLK_DECOM, "pclk_decom", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(5), 10, GFLAGS), -+ COMPOSITE_NODIV(DCLK_DECOM, "dclk_decom", mux_400m_200m_100m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(7), 14, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(5), 11, GFLAGS), -+ GATE(ACLK_DMAC, "aclk_dmac", "aclk_bus_root", 0, -+ RV1106_PERICLKGATE_CON(5), 8, GFLAGS), -+ GATE(PCLK_DSM, "pclk_dsm", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(6), 2, GFLAGS), -+ GATE(MCLK_DSM, "mclk_dsm", "mclk_i2s0_8ch_tx", 0, -+ RV1106_PERICLKGATE_CON(6), 1, GFLAGS), -+ COMPOSITE(CCLK_SRC_EMMC, "cclk_src_emmc", mux_400m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(7), 6, 1, MFLAGS, 0, 6, DFLAGS, -+ RV1106_PERICLKGATE_CON(4), 12, GFLAGS), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(4), 13, GFLAGS), -+ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(2), 0, GFLAGS), -+ GATE(DBCLK_GPIO4, "dbclk_gpio4", "xin24m", 0, -+ RV1106_PERICLKGATE_CON(2), 1, GFLAGS), -+ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(1), 6, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C0, "clk_i2c0", mux_200m_100m_50m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(1), 8, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(1), 7, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(1), 10, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C2, "clk_i2c2", mux_200m_100m_50m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(1), 12, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(1), 11, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(1), 12, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C3, "clk_i2c3", mux_200m_100m_50m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(1), 14, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(1), 13, GFLAGS), -+ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(1), 14, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C4, "clk_i2c4", mux_200m_100m_50m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(2), 0, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(1), 15, GFLAGS), -+ GATE(HCLK_I2S0, "hclk_i2s0", "hclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(6), 0, GFLAGS), -+ GATE(PCLK_DFT2APB, "pclk_dft2apb", "pclk_peri_root", CLK_IGNORE_UNUSED, -+ RV1106_PERICLKGATE_CON(6), 7, GFLAGS), -+ GATE(HCLK_IVE, "hclk_ive", "hclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(6), 9, GFLAGS), -+ GATE(ACLK_IVE, "aclk_ive", "aclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(6), 10, GFLAGS), -+ GATE(PCLK_PWM0_PERI, "pclk_pwm0_peri", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(7), 3, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM0_PERI, "clk_pwm0_peri", mux_100m_50m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(11), 0, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(7), 4, GFLAGS), -+ GATE(CLK_CAPTURE_PWM0_PERI, "clk_capture_pwm0_peri", "xin24m", 0, -+ RV1106_PERICLKGATE_CON(7), 5, GFLAGS), -+ GATE(CLK_TIMER_ROOT, "clk_timer_root", "xin24m", 0, -+ RV1106_PERICLKGATE_CON(0), 3, GFLAGS), -+ GATE(HCLK_SFC, "hclk_sfc", "hclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(4), 14, GFLAGS), -+ COMPOSITE(SCLK_SFC, "sclk_sfc", mux_500m_300m_200m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(7), 12, 2, MFLAGS, 7, 5, DFLAGS, -+ RV1106_PERICLKGATE_CON(5), 0, GFLAGS), -+ GATE(PCLK_UART0, "pclk_uart0", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(6), 11, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(6), 15, GFLAGS), -+ GATE(PCLK_PWM1_PERI, "pclk_pwm1_peri", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(3), 15, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM1_PERI, "clk_pwm1_peri", mux_100m_50m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(6), 9, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(4), 0, GFLAGS), -+ GATE(CLK_CAPTURE_PWM1_PERI, "clk_capture_pwm1_peri", "xin24m", 0, -+ RV1106_PERICLKGATE_CON(4), 1, GFLAGS), -+ GATE(PCLK_PWM2_PERI, "pclk_pwm2_peri", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(4), 2, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM2_PERI, "clk_pwm2_peri", mux_100m_50m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(6), 11, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(4), 3, GFLAGS), -+ GATE(CLK_CAPTURE_PWM2_PERI, "clk_capture_pwm2_peri", "xin24m", 0, -+ RV1106_PERICLKGATE_CON(4), 4, GFLAGS), -+ GATE(HCLK_BOOTROM, "hclk_bootrom", "hclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(0), 7, GFLAGS), -+ GATE(HCLK_SAI, "hclk_sai", "hclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(5), 13, GFLAGS), -+ GATE(MCLK_SAI, "mclk_sai", "mclk_i2s0_8ch_tx", 0, -+ RV1106_PERICLKGATE_CON(5), 14, GFLAGS), -+ GATE(PCLK_SARADC, "pclk_saradc", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(3), 3, GFLAGS), -+ COMPOSITE_NOMUX(CLK_SARADC, "clk_saradc", "xin24m", 0, -+ RV1106_PERICLKSEL_CON(6), 0, 3, DFLAGS, -+ RV1106_PERICLKGATE_CON(3), 4, GFLAGS), -+ GATE(PCLK_SPI1, "pclk_spi1", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(3), 6, GFLAGS), -+ COMPOSITE_NODIV(CLK_SPI1, "clk_spi1", mux_200m_100m_50m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(6), 3, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(3), 7, GFLAGS), -+ GATE(PCLK_STIMER, "pclk_stimer", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(0), 15, GFLAGS), -+ GATE(CLK_STIMER0, "clk_stimer0", "clk_timer_root", 0, -+ RV1106_PERICLKGATE_CON(1), 0, GFLAGS), -+ GATE(CLK_STIMER1, "clk_stimer1", "clk_timer_root", 0, -+ RV1106_PERICLKGATE_CON(1), 1, GFLAGS), -+ GATE(PCLK_TIMER, "pclk_timer", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(0), 8, GFLAGS), -+ GATE(CLK_TIMER0, "clk_timer0", "clk_timer_root", 0, -+ RV1106_PERICLKGATE_CON(0), 9, GFLAGS), -+ GATE(CLK_TIMER1, "clk_timer1", "clk_timer_root", 0, -+ RV1106_PERICLKGATE_CON(0), 10, GFLAGS), -+ GATE(CLK_TIMER2, "clk_timer2", "clk_timer_root", 0, -+ RV1106_PERICLKGATE_CON(0), 11, GFLAGS), -+ GATE(CLK_TIMER3, "clk_timer3", "clk_timer_root", 0, -+ RV1106_PERICLKGATE_CON(0), 12, GFLAGS), -+ GATE(CLK_TIMER4, "clk_timer4", "clk_timer_root", 0, -+ RV1106_PERICLKGATE_CON(0), 13, GFLAGS), -+ GATE(CLK_TIMER5, "clk_timer5", "clk_timer_root", 0, -+ RV1106_PERICLKGATE_CON(0), 14, GFLAGS), -+ GATE(HCLK_TRNG_NS, "hclk_trng_ns", "hclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(3), 9, GFLAGS), -+ GATE(HCLK_TRNG_S, "hclk_trng_s", "hclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(3), 10, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(2), 3, GFLAGS), -+ GATE(PCLK_UART3, "pclk_uart3", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(2), 7, GFLAGS), -+ GATE(PCLK_UART4, "pclk_uart4", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(2), 11, GFLAGS), -+ GATE(PCLK_UART5, "pclk_uart5", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(2), 15, GFLAGS), -+ GATE(ACLK_USBOTG, "aclk_usbotg", "aclk_bus_root", 0, -+ RV1106_PERICLKGATE_CON(4), 7, GFLAGS), -+ GATE(CLK_REF_USBOTG, "clk_ref_usbotg", "xin24m", 0, -+ RV1106_PERICLKGATE_CON(4), 8, GFLAGS), -+ GATE(PCLK_USBPHY, "pclk_usbphy", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(5), 1, GFLAGS), -+ GATE(CLK_REF_USBPHY, "clk_ref_usbphy", "xin24m", 0, -+ RV1106_PERICLKGATE_CON(5), 2, GFLAGS), -+ GATE(PCLK_WDT_NS, "pclk_wdt_ns", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(1), 2, GFLAGS), -+ GATE(TCLK_WDT_NS, "tclk_wdt_ns", "xin24m", 0, -+ RV1106_PERICLKGATE_CON(1), 3, GFLAGS), -+ GATE(PCLK_WDT_S, "pclk_wdt_s", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(1), 4, GFLAGS), -+ GATE(TCLK_WDT_S, "tclk_wdt_s", "xin24m", 0, -+ RV1106_PERICLKGATE_CON(1), 5, GFLAGS), ++static struct rockchip_clk_branch rk3399_uart2_fracmux __initdata = ++ MUXTBL(SCLK_UART2, "clk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(35), 8, 2, MFLAGS, uart_mux_idx); + -+ /* PD_PMU */ -+ COMPOSITE_FRACMUX(0, "clk_rtc32k_frac", "xin24m", CLK_IGNORE_UNUSED, -+ RV1106_PMUCLKSEL_CON(6), 0, -+ RV1106_PMUCLKGATE_CON(1), 14, GFLAGS, -+ &rv1106_rtc32k_pmu_fracmux), -+ DIV(CLK_100M_PMU, "clk_100m_pmu", "clk_200m_src", 0, -+ RV1106_PMUCLKSEL_CON(0), 0, 3, DFLAGS), -+ COMPOSITE_NODIV(PCLK_PMU_ROOT, "pclk_pmu_root", mux_100m_pmu_24m_p, CLK_IS_CRITICAL, -+ RV1106_PMUCLKSEL_CON(0), 3, 1, MFLAGS, -+ RV1106_PMUCLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE_NODIV(HCLK_PMU_ROOT, "hclk_pmu_root", mux_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RV1106_PMUCLKSEL_CON(0), 4, 2, MFLAGS, -+ RV1106_PMUCLKGATE_CON(0), 2, GFLAGS), -+ GATE(CLK_PMU, "clk_pmu", "xin24m", CLK_IS_CRITICAL, -+ RV1106_PMUCLKGATE_CON(1), 0, GFLAGS), -+ GATE(PCLK_PMU, "pclk_pmu", "pclk_pmu_root", CLK_IS_CRITICAL, -+ RV1106_PMUCLKGATE_CON(1), 1, GFLAGS), -+ GATE(CLK_DDR_FAIL_SAFE, "clk_ddr_fail_safe", "clk_pmu", 0, -+ RV1106_PMUCLKGATE_CON(1), 15, GFLAGS), -+ GATE(PCLK_PMU_GPIO0, "pclk_pmu_gpio0", "pclk_pmu_root", 0, -+ RV1106_PMUCLKGATE_CON(1), 2, GFLAGS), -+ COMPOSITE_NODIV(DBCLK_PMU_GPIO0, "dbclk_pmu_gpio0", mux_24m_32k_p, 0, -+ RV1106_PMUCLKSEL_CON(0), 15, 1, MFLAGS, -+ RV1106_PMUCLKGATE_CON(1), 3, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_pmu_root", 0, -+ RV1106_PMUCLKGATE_CON(0), 3, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C1, "clk_i2c1", mux_200m_100m_24m_32k_p, 0, -+ RV1106_PMUCLKSEL_CON(0), 6, 2, MFLAGS, -+ RV1106_PMUCLKGATE_CON(0), 4, GFLAGS), -+ GATE(PCLK_PMU_MAILBOX, "pclk_pmu_mailbox", "pclk_pmu_root", 0, -+ RV1106_PMUCLKGATE_CON(2), 10, GFLAGS), -+ GATE(CLK_PMU_MCU, "clk_pmu_mcu", "hclk_pmu_root", 0, -+ RV1106_PMUCLKGATE_CON(0), 9, GFLAGS), -+ GATE(CLK_PMU_MCU_RTC, "clk_pmu_mcu_rtc", "xin24m", 0, -+ RV1106_PMUCLKGATE_CON(0), 13, GFLAGS), -+ COMPOSITE_NOMUX(CLK_PVTM_PMU, "clk_pvtm_pmu", "xin24m", 0, -+ RV1106_PMUCLKSEL_CON(1), 0, 5, DFLAGS, -+ RV1106_PMUCLKGATE_CON(1), 4, GFLAGS), -+ GATE(PCLK_PVTM_PMU, "pclk_pvtm_pmu", "pclk_pmu_root", 0, -+ RV1106_PMUCLKGATE_CON(1), 5, GFLAGS), -+ GATE(CLK_REFOUT, "clk_refout", "xin24m", 0, -+ RV1106_PMUCLKGATE_CON(2), 13, GFLAGS), -+ GATE(HCLK_PMU_SRAM, "hclk_pmu_sram", "hclk_pmu_root", CLK_IGNORE_UNUSED, -+ RV1106_PMUCLKGATE_CON(0), 8, GFLAGS), -+ GATE(PCLK_PMU_WDT, "pclk_pmu_wdt", "pclk_pmu_root", 0, -+ RV1106_PMUCLKGATE_CON(2), 8, GFLAGS), -+ COMPOSITE_NODIV(TCLK_PMU_WDT, "tclk_pmu_wdt", mux_24m_32k_p, 0, -+ RV1106_PMUCLKSEL_CON(7), 2, 1, MFLAGS, -+ RV1106_PMUCLKGATE_CON(2), 9, GFLAGS), ++static struct rockchip_clk_branch rk3399_uart3_fracmux __initdata = ++ MUXTBL(SCLK_UART3, "clk_uart3", mux_uart3_p, CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(36), 8, 2, MFLAGS, uart_mux_idx); + -+ /* PD_SUBDDR */ -+ COMPOSITE(CLK_CORE_DDRC_SRC, "clk_core_ddrc_src", mux_dpll_300m_p, CLK_IGNORE_UNUSED, -+ RV1106_SUBDDRCLKSEL_CON(0), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1106_SUBDDRCLKGATE_CON(0), 2, GFLAGS), -+ GATE(CLK_DFICTRL, "clk_dfictrl", "clk_core_ddrc_src", CLK_IGNORE_UNUSED, -+ RV1106_SUBDDRCLKGATE_CON(0), 5, GFLAGS), -+ GATE(CLK_DDRMON, "clk_ddrmon", "clk_core_ddrc_src", CLK_IGNORE_UNUSED, -+ RV1106_SUBDDRCLKGATE_CON(0), 4, GFLAGS), -+ GATE(CLK_DDR_PHY, "clk_ddr_phy", "clk_core_ddrc_src", CLK_IGNORE_UNUSED, -+ RV1106_SUBDDRCLKGATE_CON(0), 6, GFLAGS), -+ GATE(ACLK_DDRC, "aclk_ddrc", "clk_core_ddrc_src", CLK_IS_CRITICAL, -+ RV1106_SUBDDRCLKGATE_CON(0), 1, GFLAGS), -+ GATE(CLK_CORE_DDRC, "clk_core_ddrc", "clk_core_ddrc_src", CLK_IS_CRITICAL, -+ RV1106_SUBDDRCLKGATE_CON(0), 3, GFLAGS), ++static struct rockchip_clk_branch rk3399_uart4_pmu_fracmux __initdata = ++ MUXTBL(SCLK_UART4_PMU, "clk_uart4_pmu", mux_uart4_pmu_p, CLK_SET_RATE_PARENT, ++ RK3399_PMU_CLKSEL_CON(5), 8, 2, MFLAGS, uart_mux_idx); + ++static struct rockchip_clk_branch rk3399_dclk_vop0_fracmux __initdata = ++ MUX(DCLK_VOP0, "dclk_vop0", mux_dclk_vop0_p, CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(49), 11, 1, MFLAGS); + -+ /* PD_VEPU */ -+ COMPOSITE_NODIV(HCLK_VEPU_ROOT, "hclk_vepu_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VEPUCLKSEL_CON(0), 0, 2, MFLAGS, -+ RV1106_VEPUCLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE_NODIV(ACLK_VEPU_COM_ROOT, "aclk_vepu_com_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VEPUCLKSEL_CON(0), 2, 2, MFLAGS, -+ RV1106_VEPUCLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE_NODIV(ACLK_VEPU_ROOT, "aclk_vepu_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VEPUCLKSEL_CON(0), 4, 2, MFLAGS, -+ RV1106_VEPUCLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE_NODIV(PCLK_VEPU_ROOT, "pclk_vepu_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VEPUCLKSEL_CON(0), 6, 2, MFLAGS, -+ RV1106_VEPUCLKGATE_CON(0), 3, GFLAGS), -+ GATE(PCLK_SPI0, "pclk_spi0", "pclk_vepu_root", 0, -+ RV1106_VEPUCLKGATE_CON(1), 2, GFLAGS), -+ COMPOSITE_NODIV(CLK_SPI0, "clk_spi0", mux_200m_100m_50m_24m_p, 0, -+ RV1106_VEPUCLKSEL_CON(0), 12, 2, MFLAGS, -+ RV1106_VEPUCLKGATE_CON(1), 3, GFLAGS), -+ GATE(CLK_UART_DETN_FLT, "clk_uart_detn_flt", "xin24m", 0, -+ RV1106_VEPUCLKGATE_CON(1), 8, GFLAGS), -+ GATE(HCLK_VEPU, "hclk_vepu", "hclk_vepu_root", 0, -+ RV1106_VEPUCLKGATE_CON(0), 8, GFLAGS), -+ GATE(ACLK_VEPU, "aclk_vepu", "aclk_vepu_root", 0, -+ RV1106_VEPUCLKGATE_CON(0), 9, GFLAGS), -+ COMPOSITE_NODIV(CLK_CORE_VEPU, "clk_core_vepu", mux_400m_300m_pvtpll0_pvtpll1_p, 0, -+ RV1106_VEPUCLKSEL_CON(0), 8, 2, MFLAGS, -+ RV1106_VEPUCLKGATE_CON(0), 10, GFLAGS), -+ COMPOSITE_NODIV(CLK_CORE_VEPU_DVBM, "clk_core_vepu_dvbm", mux_200m_100m_50m_24m_p, 0, -+ RV1106_VEPUCLKSEL_CON(0), 10, 2, MFLAGS, -+ RV1106_VEPUCLKGATE_CON(0), 13, GFLAGS), -+ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_vepu_root", 0, -+ RV1106_VEPUCLKGATE_CON(0), 15, GFLAGS), -+ GATE(DBCLK_GPIO1, "dbclk_gpio1", "xin24m", 0, -+ RV1106_VEPUCLKGATE_CON(1), 0, GFLAGS), -+ GATE(HCLK_VEPU_PP, "hclk_vepu_pp", "hclk_vepu_root", 0, -+ RV1106_VEPUCLKGATE_CON(0), 11, GFLAGS), -+ GATE(ACLK_VEPU_PP, "aclk_vepu_pp", "aclk_vepu_root", 0, -+ RV1106_VEPUCLKGATE_CON(0), 12, GFLAGS), ++static struct rockchip_clk_branch rk3399_dclk_vop1_fracmux __initdata = ++ MUX(DCLK_VOP1, "dclk_vop1", mux_dclk_vop1_p, CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(50), 11, 1, MFLAGS); + -+ /* PD_VI */ -+ COMPOSITE_NODIV(HCLK_VI_ROOT, "hclk_vi_root", mux_150m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VICLKSEL_CON(0), 0, 2, MFLAGS, -+ RV1106_VICLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE_NODIV(ACLK_VI_ROOT, "aclk_vi_root", mux_339m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VICLKSEL_CON(0), 2, 2, MFLAGS, -+ RV1106_VICLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE_NODIV(PCLK_VI_ROOT, "pclk_vi_root", mux_150m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VICLKSEL_CON(0), 4, 2, MFLAGS, -+ RV1106_VICLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE_NODIV(PCLK_VI_RTC_ROOT, "pclk_vi_rtc_root", mux_50m_24m_p, 0, -+ RV1106_VICLKSEL_CON(0), 6, 1, MFLAGS, -+ RV1106_VICLKGATE_CON(0), 3, GFLAGS), ++static struct rockchip_clk_branch rk3399_pmuclk_wifi_fracmux __initdata = ++ MUX(SCLK_WIFI_PMU, "clk_wifi_pmu", mux_wifi_pmu_p, CLK_SET_RATE_PARENT, ++ RK3399_PMU_CLKSEL_CON(1), 14, 1, MFLAGS); + -+ GATE(PCLK_CSIHOST0, "pclk_csihost0", "pclk_vi_root", 0, -+ RV1106_VICLKGATE_CON(1), 3, GFLAGS), -+ GATE(PCLK_CSIHOST1, "pclk_csihost1", "pclk_vi_root", 0, -+ RV1106_VICLKGATE_CON(1), 5, GFLAGS), -+ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_vi_root", 0, -+ RV1106_VICLKGATE_CON(1), 15, GFLAGS), -+ GATE(DBCLK_GPIO3, "dbclk_gpio3", "xin24m", 0, -+ RV1106_VICLKGATE_CON(2), 0, GFLAGS), -+ GATE(HCLK_ISP3P2, "hclk_isp3p2", "hclk_vi_root", 0, -+ RV1106_VICLKGATE_CON(0), 7, GFLAGS), -+ GATE(ACLK_ISP3P2, "aclk_isp3p2", "aclk_vi_root", 0, -+ RV1106_VICLKGATE_CON(0), 8, GFLAGS), -+ COMPOSITE_NODIV(CLK_CORE_ISP3P2, "clk_core_isp3p2", mux_339m_200m_pvtpll0_pvtpll1_p, 0, -+ RV1106_VICLKSEL_CON(0), 7, 2, MFLAGS, -+ RV1106_VICLKGATE_CON(0), 9, GFLAGS), -+ GATE(PCLK_MIPICSIPHY, "pclk_mipicsiphy", "pclk_vi_root", 0, -+ RV1106_VICLKGATE_CON(1), 14, GFLAGS), -+ COMPOSITE(CCLK_SRC_SDMMC, "cclk_src_sdmmc", mux_400m_24m_p, 0, -+ RV1106_VICLKSEL_CON(1), 14, 1, MFLAGS, 8, 6, DFLAGS, -+ RV1106_VICLKGATE_CON(1), 11, GFLAGS), -+ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_vi_root", 0, -+ RV1106_VICLKGATE_CON(1), 12, GFLAGS), -+ GATE(CLK_SDMMC_DETN_FLT, "clk_sdmmc_detn_flt", "xin24m", 0, -+ RV1106_VICLKGATE_CON(1), 13, GFLAGS), -+ GATE(PCLK_VI_RTC_TEST, "pclk_vi_rtc_test", "pclk_vi_rtc_root", 0, -+ RV1106_VICLKGATE_CON(2), 5, GFLAGS), -+ GATE(PCLK_VI_RTC_PHY, "pclk_vi_rtc_phy", "pclk_vi_rtc_root", 0, -+ RV1106_VICLKGATE_CON(2), 6, GFLAGS), -+ COMPOSITE_NODIV(DCLK_VICAP, "dclk_vicap", mux_339m_200m_100m_24m_p, 0, -+ RV1106_VICLKSEL_CON(0), 9, 2, MFLAGS, -+ RV1106_VICLKGATE_CON(0), 10, GFLAGS), -+ GATE(ACLK_VICAP, "aclk_vicap", "aclk_vi_root", 0, -+ RV1106_VICLKGATE_CON(0), 12, GFLAGS), -+ GATE(HCLK_VICAP, "hclk_vicap", "hclk_vi_root", 0, -+ RV1106_VICLKGATE_CON(0), 13, GFLAGS), ++static const struct rockchip_cpuclk_reg_data rk3399_cpuclkl_data = { ++ .core_reg[0] = RK3399_CLKSEL_CON(0), ++ .div_core_shift[0] = 0, ++ .div_core_mask[0] = 0x1f, ++ .num_cores = 1, ++ .mux_core_alt = 3, ++ .mux_core_main = 0, ++ .mux_core_shift = 6, ++ .mux_core_mask = 0x3, ++}; + -+ /* PD_VO */ -+ COMPOSITE_NODIV(ACLK_MAC_ROOT, "aclk_mac_root", mux_300m_200m_100m_24m_p, 0, -+ RV1106_VOCLKSEL_CON(1), 12, 2, MFLAGS, -+ RV1106_VOCLKGATE_CON(1), 4, GFLAGS), -+ COMPOSITE_NODIV(ACLK_VO_ROOT, "aclk_vo_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VOCLKSEL_CON(0), 0, 2, MFLAGS, -+ RV1106_VOCLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE_NODIV(HCLK_VO_ROOT, "hclk_vo_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VOCLKSEL_CON(0), 2, 2, MFLAGS, -+ RV1106_VOCLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE_NODIV(PCLK_VO_ROOT, "pclk_vo_root", mux_150m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VOCLKSEL_CON(0), 4, 2, MFLAGS, -+ RV1106_VOCLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE_NODIV(ACLK_VOP_ROOT, "aclk_vop_root", mux_300m_200m_100m_24m_p, 0, -+ RV1106_VOCLKSEL_CON(1), 10, 2, MFLAGS, -+ RV1106_VOCLKGATE_CON(0), 11, GFLAGS), ++static const struct rockchip_cpuclk_reg_data rk3399_cpuclkb_data = { ++ .core_reg[0] = RK3399_CLKSEL_CON(2), ++ .div_core_shift[0] = 0, ++ .div_core_mask[0] = 0x1f, ++ .num_cores = 1, ++ .mux_core_alt = 3, ++ .mux_core_main = 1, ++ .mux_core_shift = 6, ++ .mux_core_mask = 0x3, ++}; + -+ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(3), 0, GFLAGS), -+ GATE(DBCLK_GPIO2, "dbclk_gpio2", "xin24m", 0, -+ RV1106_VOCLKGATE_CON(3), 1, GFLAGS), -+ GATE(ACLK_MAC, "aclk_mac", "aclk_mac_root", 0, -+ RV1106_VOCLKGATE_CON(1), 8, GFLAGS), -+ GATE(PCLK_MAC, "pclk_mac", "pclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(1), 9, GFLAGS), -+ FACTOR(CLK_GMAC0_50M_O, "clk_gmac0_50m_o", "clk_50m_src", 0, 1, 1), -+ FACTOR(CLK_GMAC0_REF_50M, "clk_gmac0_ref_50m", "clk_gmac0_50m_o", 0, 1, 1), -+ DIV(CLK_GMAC0_TX_50M_O, "clk_gmac0_tx_50m_o", "clk_gmac0_50m_o", 0, -+ RV1106_VOCLKSEL_CON(2), 1, 6, DFLAGS), -+ GATE(CLK_MACPHY, "clk_macphy", "xin24m", 0, -+ RV1106_VOCLKGATE_CON(2), 13, GFLAGS), -+ GATE(CLK_OTPC_ARB, "clk_otpc_arb", "xin24m", 0, -+ RV1106_VOCLKGATE_CON(2), 11, GFLAGS), -+ GATE(PCLK_OTPC_NS, "pclk_otpc_ns", "pclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(2), 3, GFLAGS), -+ GATE(CLK_SBPI_OTPC_NS, "clk_sbpi_otpc_ns", "xin24m", 0, -+ RV1106_VOCLKGATE_CON(2), 5, GFLAGS), -+ COMPOSITE_NOMUX(CLK_USER_OTPC_NS, "clk_user_otpc_ns", "xin24m", 0, -+ RV1106_VOCLKSEL_CON(3), 10, 3, DFLAGS, -+ RV1106_VOCLKGATE_CON(2), 6, GFLAGS), -+ GATE(PCLK_OTPC_S, "pclk_otpc_s", "pclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(2), 7, GFLAGS), -+ GATE(CLK_SBPI_OTPC_S, "clk_sbpi_otpc_s", "xin24m", 0, -+ RV1106_VOCLKGATE_CON(2), 9, GFLAGS), -+ COMPOSITE_NOMUX(CLK_USER_OTPC_S, "clk_user_otpc_s", "xin24m", 0, -+ RV1106_VOCLKSEL_CON(3), 13, 3, DFLAGS, -+ RV1106_VOCLKGATE_CON(2), 10, GFLAGS), -+ GATE(PCLK_OTP_MASK, "pclk_otp_mask", "pclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(2), 14, GFLAGS), -+ GATE(CLK_PMC_OTP, "clk_pmc_otp", "clk_sbpi_otpc_s", 0, -+ RV1106_VOCLKGATE_CON(2), 15, GFLAGS), -+ GATE(HCLK_RGA2E, "hclk_rga2e", "hclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(0), 7, GFLAGS), -+ GATE(ACLK_RGA2E, "aclk_rga2e", "aclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(0), 8, GFLAGS), -+ COMPOSITE_NODIV(CLK_CORE_RGA2E, "clk_core_rga2e", mux_400m_200m_100m_24m_p, 0, -+ RV1106_VOCLKSEL_CON(1), 8, 2, MFLAGS, -+ RV1106_VOCLKGATE_CON(0), 9, GFLAGS), -+ COMPOSITE(CCLK_SRC_SDIO, "cclk_src_sdio", mux_400m_24m_p, 0, -+ RV1106_VOCLKSEL_CON(2), 13, 1, MFLAGS, 7, 6, DFLAGS, -+ RV1106_VOCLKGATE_CON(1), 14, GFLAGS), -+ GATE(HCLK_SDIO, "hclk_sdio", "hclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(1), 15, GFLAGS), -+ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(2), 0, GFLAGS), -+ COMPOSITE_NOMUX(CLK_TSADC, "clk_tsadc", "xin24m", 0, -+ RV1106_VOCLKSEL_CON(3), 0, 5, DFLAGS, -+ RV1106_VOCLKGATE_CON(2), 1, GFLAGS), -+ COMPOSITE_NOMUX(CLK_TSADC_TSEN, "clk_tsadc_tsen", "xin24m", 0, -+ RV1106_VOCLKSEL_CON(3), 5, 5, DFLAGS, -+ RV1106_VOCLKGATE_CON(2), 2, GFLAGS), -+ GATE(HCLK_VOP, "hclk_vop", "hclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(0), 13, GFLAGS), -+ GATE(DCLK_VOP, "dclk_vop", "dclk_vop_src", 0, -+ RV1106_VOCLKGATE_CON(0), 14, GFLAGS), -+ GATE(ACLK_VOP, "aclk_vop", "aclk_vop_root", 0, -+ RV1106_VOCLKGATE_CON(0), 15, GFLAGS), ++#define RK3399_DIV_ACLKM_MASK 0x1f ++#define RK3399_DIV_ACLKM_SHIFT 8 ++#define RK3399_DIV_ATCLK_MASK 0x1f ++#define RK3399_DIV_ATCLK_SHIFT 0 ++#define RK3399_DIV_PCLK_DBG_MASK 0x1f ++#define RK3399_DIV_PCLK_DBG_SHIFT 8 + -+ /* IO CLK */ -+ GATE(RX0PCLK_VICAP, "rx0pclk_vicap", "rx0pclk_vicap_io", 0, -+ RV1106_VICLKGATE_CON(1), 0, GFLAGS), -+ GATE(RX1PCLK_VICAP, "rx1pclk_vicap", "rx1pclk_vicap_io", 0, -+ RV1106_VICLKGATE_CON(1), 1, GFLAGS), -+ GATE(ISP0CLK_VICAP, "isp0clk_vicap", "isp0clk_vicap_io", 0, -+ RV1106_VICLKGATE_CON(1), 2, GFLAGS), -+ GATE(I0CLK_VICAP, "i0clk_vicap", "i0clk_vicap_io", 0, -+ RV1106_VICLKGATE_CON(0), 14, GFLAGS), -+ GATE(I1CLK_VICAP, "i1clk_vicap", "i1clk_vicap_io", 0, -+ RV1106_VICLKGATE_CON(0), 15, GFLAGS), -+ GATE(PCLK_VICAP, "pclk_vicap", "pclk_vicap_io", 0, -+ RV1106_VICLKGATE_CON(0), 11, GFLAGS), -+ GATE(CLK_RXBYTECLKHS_0, "clk_rxbyteclkhs_0", "clk_rxbyteclkhs_0_io", 0, -+ RV1106_VICLKGATE_CON(1), 4, GFLAGS), -+ GATE(CLK_RXBYTECLKHS_1, "clk_rxbyteclkhs_1", "clk_rxbyteclkhs_1_io", 0, -+ RV1106_VICLKGATE_CON(1), 6, GFLAGS), ++#define RK3399_CLKSEL0(_offs, _aclkm) \ ++ { \ ++ .reg = RK3399_CLKSEL_CON(0 + _offs), \ ++ .val = HIWORD_UPDATE(_aclkm, RK3399_DIV_ACLKM_MASK, \ ++ RK3399_DIV_ACLKM_SHIFT), \ ++ } ++#define RK3399_CLKSEL1(_offs, _atclk, _pdbg) \ ++ { \ ++ .reg = RK3399_CLKSEL_CON(1 + _offs), \ ++ .val = HIWORD_UPDATE(_atclk, RK3399_DIV_ATCLK_MASK, \ ++ RK3399_DIV_ATCLK_SHIFT) | \ ++ HIWORD_UPDATE(_pdbg, RK3399_DIV_PCLK_DBG_MASK, \ ++ RK3399_DIV_PCLK_DBG_SHIFT), \ ++ } + -+ GATE(PCLK_VICAP_VEPU, "pclk_vicap_vepu", "pclk_vicap_vepu_io", 0, -+ RV1106_VEPUCLKGATE_CON(0), 14, GFLAGS), -+ GATE(SCLK_IN_SPI0, "sclk_in_spi0", "sclk_in_spi0_io", 0, -+ RV1106_VEPUCLKGATE_CON(1), 4, GFLAGS), ++/* cluster_l: aclkm in clksel0, rest in clksel1 */ ++#define RK3399_CPUCLKL_RATE(_prate, _aclkm, _atclk, _pdbg) \ ++ { \ ++ .prate = _prate##U, \ ++ .divs = { \ ++ RK3399_CLKSEL0(0, _aclkm), \ ++ RK3399_CLKSEL1(0, _atclk, _pdbg), \ ++ }, \ ++ } + -+ GATE(CLK_UTMI_USBOTG, "clk_utmi_usbotg", "clk_utmi_usbotg_io", 0, -+ RV1106_PERICLKGATE_CON(4), 9, GFLAGS), ++/* cluster_b: aclkm in clksel2, rest in clksel3 */ ++#define RK3399_CPUCLKB_RATE(_prate, _aclkm, _atclk, _pdbg) \ ++ { \ ++ .prate = _prate##U, \ ++ .divs = { \ ++ RK3399_CLKSEL0(2, _aclkm), \ ++ RK3399_CLKSEL1(2, _atclk, _pdbg), \ ++ }, \ ++ } + ++static struct rockchip_cpuclk_rate_table rk3399_cpuclkl_rates[] __initdata = { ++ RK3399_CPUCLKL_RATE(1800000000, 1, 8, 8), ++ RK3399_CPUCLKL_RATE(1704000000, 1, 8, 8), ++ RK3399_CPUCLKL_RATE(1608000000, 1, 7, 7), ++ RK3399_CPUCLKL_RATE(1512000000, 1, 7, 7), ++ RK3399_CPUCLKL_RATE(1488000000, 1, 6, 6), ++ RK3399_CPUCLKL_RATE(1416000000, 1, 6, 6), ++ RK3399_CPUCLKL_RATE(1200000000, 1, 5, 5), ++ RK3399_CPUCLKL_RATE(1008000000, 1, 5, 5), ++ RK3399_CPUCLKL_RATE( 816000000, 1, 4, 4), ++ RK3399_CPUCLKL_RATE( 696000000, 1, 3, 3), ++ RK3399_CPUCLKL_RATE( 600000000, 1, 3, 3), ++ RK3399_CPUCLKL_RATE( 408000000, 1, 2, 2), ++ RK3399_CPUCLKL_RATE( 312000000, 1, 1, 1), ++ RK3399_CPUCLKL_RATE( 216000000, 1, 1, 1), ++ RK3399_CPUCLKL_RATE( 96000000, 1, 1, 1), +}; + -+static struct rockchip_clk_branch rv1106_grf_clk_branches[] __initdata = { -+ MMC(SCLK_EMMC_DRV, "emmc_drv", "cclk_src_emmc", RV1106_EMMC_CON0, 1), -+ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "cclk_src_emmc", RV1106_EMMC_CON1, 1), -+ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "cclk_src_sdmmc", RV1106_SDMMC_CON0, 1), -+ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "cclk_src_sdmmc", RV1106_SDMMC_CON1, 1), -+ MMC(SCLK_SDIO_DRV, "sdio_drv", "cclk_src_sdio", RV1106_SDIO_CON0, 1), -+ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "cclk_src_sdio", RV1106_SDIO_CON1, 1), ++static struct rockchip_cpuclk_rate_table rk3399_cpuclkb_rates[] __initdata = { ++ RK3399_CPUCLKB_RATE(2208000000, 1, 11, 11), ++ RK3399_CPUCLKB_RATE(2184000000, 1, 11, 11), ++ RK3399_CPUCLKB_RATE(2088000000, 1, 10, 10), ++ RK3399_CPUCLKB_RATE(2040000000, 1, 10, 10), ++ RK3399_CPUCLKB_RATE(2016000000, 1, 9, 9), ++ RK3399_CPUCLKB_RATE(1992000000, 1, 9, 9), ++ RK3399_CPUCLKB_RATE(1896000000, 1, 9, 9), ++ RK3399_CPUCLKB_RATE(1800000000, 1, 8, 8), ++ RK3399_CPUCLKB_RATE(1704000000, 1, 8, 8), ++ RK3399_CPUCLKB_RATE(1608000000, 1, 7, 7), ++ RK3399_CPUCLKB_RATE(1512000000, 1, 7, 7), ++ RK3399_CPUCLKB_RATE(1488000000, 1, 6, 6), ++ RK3399_CPUCLKB_RATE(1416000000, 1, 6, 6), ++ RK3399_CPUCLKB_RATE(1200000000, 1, 5, 5), ++ RK3399_CPUCLKB_RATE(1008000000, 1, 5, 5), ++ RK3399_CPUCLKB_RATE( 816000000, 1, 4, 4), ++ RK3399_CPUCLKB_RATE( 696000000, 1, 3, 3), ++ RK3399_CPUCLKB_RATE( 600000000, 1, 3, 3), ++ RK3399_CPUCLKB_RATE( 408000000, 1, 2, 2), ++ RK3399_CPUCLKB_RATE( 312000000, 1, 1, 1), ++ RK3399_CPUCLKB_RATE( 216000000, 1, 1, 1), ++ RK3399_CPUCLKB_RATE( 96000000, 1, 1, 1), +}; + -+static void __iomem *rv1106_cru_base; -+static struct rockchip_clk_provider *grf_ctx, *cru_ctx; -+ -+void rv1106_dump_cru(void) -+{ -+ if (rv1106_cru_base) { -+ pr_warn("CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rv1106_cru_base, -+ 0x588, false); -+ } -+} -+EXPORT_SYMBOL_GPL(rv1106_dump_cru); -+ -+static void _cru_pvtpll_calibrate(int count_offset, int length_offset, int target_rate) -+{ -+ unsigned int rate0, rate1, delta, length_ori, length, step, val, i = 0; -+ -+ rate0 = readl_relaxed(rv1106_cru_base + count_offset); -+ if (rate0 < target_rate) -+ return; -+ /* delta < (3.125% * target_rate) */ -+ if ((rate0 - target_rate) < (target_rate >> 5)) -+ return; ++static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = { ++ /* ++ * CRU Clock-Architecture ++ */ + -+ length_ori = readl_relaxed(rv1106_cru_base + length_offset) & PVTPLL_LENGTH_SEL_MASK; -+ length = length_ori; -+ length++; -+ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); -+ writel_relaxed(val, rv1106_cru_base + length_offset); -+ usleep_range(2000, 2100); -+ rate1 = readl_relaxed(rv1106_cru_base + count_offset); -+ if ((rate1 < target_rate) || (rate1 >= rate0)) -+ return; -+ if (abs(rate1 - target_rate) < (target_rate >> 5)) -+ return; ++ /* usbphy */ ++ GATE(SCLK_USB2PHY0_REF, "clk_usb2phy0_ref", "xin24m", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(6), 5, GFLAGS), ++ GATE(SCLK_USB2PHY1_REF, "clk_usb2phy1_ref", "xin24m", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(6), 6, GFLAGS), + -+ step = rate0 - rate1; -+ delta = rate1 - target_rate; -+ length += delta / step; -+ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); -+ writel_relaxed(val, rv1106_cru_base + length_offset); -+ usleep_range(2000, 2100); -+ rate0 = readl_relaxed(rv1106_cru_base + count_offset); ++ GATE(SCLK_USBPHY0_480M_SRC, "clk_usbphy0_480m_src", "clk_usbphy0_480m", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(13), 12, GFLAGS), ++ GATE(SCLK_USBPHY1_480M_SRC, "clk_usbphy1_480m_src", "clk_usbphy1_480m", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(13), 12, GFLAGS), ++ MUX(0, "clk_usbphy_480m", mux_usbphy_480m_p, 0, ++ RK3399_CLKSEL_CON(14), 6, 1, MFLAGS), + -+ while (abs(rate0 - target_rate) >= (target_rate >> 5)) { -+ if (i++ > 20) -+ break; -+ if (rate0 > target_rate) -+ length++; -+ else -+ length--; -+ if (length <= length_ori) -+ break; -+ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); -+ writel_relaxed(val, rv1106_cru_base + length_offset); -+ usleep_range(2000, 2100); -+ rate0 = readl_relaxed(rv1106_cru_base + count_offset); -+ } -+} ++ MUX(0, "upll", mux_pll_src_24m_usbphy480m_p, 0, ++ RK3399_CLKSEL_CON(14), 15, 1, MFLAGS), + -+static void _grf_pvtpll_calibrate(int count_offset, int length_offset, int target_rate) -+{ -+ unsigned int rate0, rate1, delta, length_ori, length, step, val, i = 0; ++ COMPOSITE_NODIV(SCLK_HSICPHY, "clk_hsicphy", mux_pll_src_cpll_gpll_npll_usbphy480m_p, 0, ++ RK3399_CLKSEL_CON(19), 0, 2, MFLAGS, ++ RK3399_CLKGATE_CON(6), 4, GFLAGS), + -+ regmap_read(cru_ctx->grf, count_offset, &rate0); -+ if (rate0 < target_rate) -+ return; -+ /* delta < (3.125% * target_rate) */ -+ if ((rate0 - target_rate) < (target_rate >> 5)) -+ return; ++ COMPOSITE(ACLK_USB3, "aclk_usb3", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3399_CLKSEL_CON(39), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(12), 0, GFLAGS), ++ GATE(ACLK_USB3_NOC, "aclk_usb3_noc", "aclk_usb3", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(30), 0, GFLAGS), ++ GATE(ACLK_USB3OTG0, "aclk_usb3otg0", "aclk_usb3", 0, ++ RK3399_CLKGATE_CON(30), 1, GFLAGS), ++ GATE(ACLK_USB3OTG1, "aclk_usb3otg1", "aclk_usb3", 0, ++ RK3399_CLKGATE_CON(30), 2, GFLAGS), ++ GATE(ACLK_USB3_RKSOC_AXI_PERF, "aclk_usb3_rksoc_axi_perf", "aclk_usb3", 0, ++ RK3399_CLKGATE_CON(30), 3, GFLAGS), ++ GATE(ACLK_USB3_GRF, "aclk_usb3_grf", "aclk_usb3", 0, ++ RK3399_CLKGATE_CON(30), 4, GFLAGS), + -+ regmap_read(cru_ctx->grf, length_offset, &length_ori); -+ length = length_ori; -+ length_ori = length; -+ length &= PVTPLL_LENGTH_SEL_MASK; -+ length++; -+ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); -+ regmap_write(cru_ctx->grf, length_offset, val); -+ usleep_range(2000, 2100); -+ regmap_read(cru_ctx->grf, count_offset, &rate1); -+ if ((rate1 < target_rate) || (rate1 >= rate0)) -+ return; -+ if (abs(rate1 - target_rate) < (target_rate >> 5)) -+ return; ++ GATE(SCLK_USB3OTG0_REF, "clk_usb3otg0_ref", "xin24m", 0, ++ RK3399_CLKGATE_CON(12), 1, GFLAGS), ++ GATE(SCLK_USB3OTG1_REF, "clk_usb3otg1_ref", "xin24m", 0, ++ RK3399_CLKGATE_CON(12), 2, GFLAGS), + -+ step = rate0 - rate1; -+ delta = rate1 - target_rate; -+ length += delta / step; -+ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); -+ regmap_write(cru_ctx->grf, length_offset, val); -+ usleep_range(2000, 2100); -+ regmap_read(cru_ctx->grf, count_offset, &rate0); ++ COMPOSITE(SCLK_USB3OTG0_SUSPEND, "clk_usb3otg0_suspend", mux_pll_p, 0, ++ RK3399_CLKSEL_CON(40), 15, 1, MFLAGS, 0, 10, DFLAGS, ++ RK3399_CLKGATE_CON(12), 3, GFLAGS), + -+ while (abs(rate0 - target_rate) >= (target_rate >> 5)) { -+ if (i++ > 20) -+ break; -+ if (rate0 > target_rate) -+ length++; -+ else -+ length--; -+ if (length <= length_ori) -+ break; -+ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); -+ regmap_write(cru_ctx->grf, length_offset, val); -+ usleep_range(2000, 2100); -+ regmap_read(cru_ctx->grf, count_offset, &rate0); -+ } -+} ++ COMPOSITE(SCLK_USB3OTG1_SUSPEND, "clk_usb3otg1_suspend", mux_pll_p, 0, ++ RK3399_CLKSEL_CON(41), 15, 1, MFLAGS, 0, 10, DFLAGS, ++ RK3399_CLKGATE_CON(12), 4, GFLAGS), + -+static void rockchip_rv1106_pvtpll_calibrate(struct work_struct *w) -+{ -+ struct clk *clk; -+ unsigned long rate; ++ COMPOSITE(SCLK_UPHY0_TCPDPHY_REF, "clk_uphy0_tcpdphy_ref", mux_pll_p, 0, ++ RK3399_CLKSEL_CON(64), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(13), 4, GFLAGS), + -+ clk = __clk_lookup("clk_pvtpll_0"); -+ if (clk) { -+ rate = clk_get_rate(clk); -+ _cru_pvtpll_calibrate(CRU_PVTPLL0_OSC_CNT_AVG, -+ CRU_PVTPLL0_CON0_H, rate / 1000000); -+ } ++ COMPOSITE(SCLK_UPHY0_TCPDCORE, "clk_uphy0_tcpdcore", mux_pll_src_24m_32k_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(64), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(13), 5, GFLAGS), + -+ clk = __clk_lookup("clk_pvtpll_1"); -+ if (clk) { -+ rate = clk_get_rate(clk); -+ _cru_pvtpll_calibrate(CRU_PVTPLL1_OSC_CNT_AVG, -+ CRU_PVTPLL1_CON0_H, rate / 1000000); -+ } ++ COMPOSITE(SCLK_UPHY1_TCPDPHY_REF, "clk_uphy1_tcpdphy_ref", mux_pll_p, 0, ++ RK3399_CLKSEL_CON(65), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(13), 6, GFLAGS), + -+ clk = __clk_lookup("cpu_pvtpll"); -+ if (clk) { -+ rate = clk_get_rate(clk); -+ _grf_pvtpll_calibrate(CPU_PVTPLL_OSC_CNT_AVG, -+ CPU_PVTPLL_CON0_H, rate / 1000000); -+ } -+} -+static DECLARE_DEFERRABLE_WORK(pvtpll_calibrate_work, rockchip_rv1106_pvtpll_calibrate); ++ COMPOSITE(SCLK_UPHY1_TCPDCORE, "clk_uphy1_tcpdcore", mux_pll_src_24m_32k_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(65), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(13), 7, GFLAGS), + -+static void rockchip_rv1106_pvtpll_init(struct rockchip_clk_provider *ctx) -+{ -+ /* set pvtpll ref clk mux */ -+ writel_relaxed(CPU_PVTPLL_PATH_CORE, ctx->reg_base + CPU_CLK_PATH_BASE); ++ /* little core */ ++ GATE(0, "clk_core_l_lpll_src", "lpll", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(0), 0, GFLAGS), ++ GATE(0, "clk_core_l_bpll_src", "bpll", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(0), 1, GFLAGS), ++ GATE(0, "clk_core_l_dpll_src", "dpll", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(0), 2, GFLAGS), ++ GATE(0, "clk_core_l_gpll_src", "gpll", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(0), 3, GFLAGS), + -+ regmap_write(ctx->grf, CPU_PVTPLL_CON0_H, HIWORD_UPDATE(0x7, PVTPLL_LENGTH_SEL_MASK, -+ PVTPLL_LENGTH_SEL_SHIFT)); -+ regmap_write(ctx->grf, CPU_PVTPLL_CON0_L, HIWORD_UPDATE(0x1, PVTPLL_RING_SEL_MASK, -+ PVTPLL_RING_SEL_SHIFT)); -+ regmap_write(ctx->grf, CPU_PVTPLL_CON0_L, HIWORD_UPDATE(0x3, PVTPLL_EN_MASK, -+ PVTPLL_EN_SHIFT)); ++ COMPOSITE_NOMUX(0, "aclkm_core_l", "armclkl", CLK_IGNORE_UNUSED, ++ RK3399_CLKSEL_CON(0), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3399_CLKGATE_CON(0), 4, GFLAGS), ++ COMPOSITE_NOMUX(0, "atclk_core_l", "armclkl", CLK_IGNORE_UNUSED, ++ RK3399_CLKSEL_CON(1), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3399_CLKGATE_CON(0), 5, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_COREDBG_L, "pclk_dbg_core_l", "armclkl", CLK_IGNORE_UNUSED, ++ RK3399_CLKSEL_CON(1), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3399_CLKGATE_CON(0), 6, GFLAGS), + -+ writel_relaxed(0x007f0000, ctx->reg_base + CRU_PVTPLL0_CON0_H); -+ writel_relaxed(0xffff0018, ctx->reg_base + CRU_PVTPLL0_CON1_L); -+ writel_relaxed(0xffff0004, ctx->reg_base + CRU_PVTPLL0_CON2_H); -+ writel_relaxed(0x00030003, ctx->reg_base + CRU_PVTPLL0_CON0_L); ++ GATE(ACLK_CORE_ADB400_CORE_L_2_CCI500, "aclk_core_adb400_core_l_2_cci500", "aclkm_core_l", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(14), 12, GFLAGS), ++ GATE(ACLK_PERF_CORE_L, "aclk_perf_core_l", "aclkm_core_l", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(14), 13, GFLAGS), + -+ writel_relaxed(0x007f0000, ctx->reg_base + CRU_PVTPLL1_CON0_H); -+ writel_relaxed(0xffff0018, ctx->reg_base + CRU_PVTPLL1_CON1_L); -+ writel_relaxed(0xffff0004, ctx->reg_base + CRU_PVTPLL1_CON2_H); -+ writel_relaxed(0x00030003, ctx->reg_base + CRU_PVTPLL1_CON0_L); ++ GATE(0, "clk_dbg_pd_core_l", "armclkl", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(14), 9, GFLAGS), ++ GATE(ACLK_GIC_ADB400_GIC_2_CORE_L, "aclk_core_adb400_gic_2_core_l", "armclkl", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(14), 10, GFLAGS), ++ GATE(ACLK_GIC_ADB400_CORE_L_2_GIC, "aclk_core_adb400_core_l_2_gic", "armclkl", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(14), 11, GFLAGS), ++ GATE(SCLK_PVTM_CORE_L, "clk_pvtm_core_l", "xin24m", 0, ++ RK3399_CLKGATE_CON(0), 7, GFLAGS), + -+ schedule_delayed_work(&pvtpll_calibrate_work, msecs_to_jiffies(3000)); -+} ++ /* big core */ ++ GATE(0, "clk_core_b_lpll_src", "lpll", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(1), 0, GFLAGS), ++ GATE(0, "clk_core_b_bpll_src", "bpll", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(1), 1, GFLAGS), ++ GATE(0, "clk_core_b_dpll_src", "dpll", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(1), 2, GFLAGS), ++ GATE(0, "clk_core_b_gpll_src", "gpll", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(1), 3, GFLAGS), + -+static int rv1106_clk_panic(struct notifier_block *this, -+ unsigned long ev, void *ptr) -+{ -+ rv1106_dump_cru(); -+ return NOTIFY_DONE; -+} ++ COMPOSITE_NOMUX(0, "aclkm_core_b", "armclkb", CLK_IGNORE_UNUSED, ++ RK3399_CLKSEL_CON(2), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3399_CLKGATE_CON(1), 4, GFLAGS), ++ COMPOSITE_NOMUX(0, "atclk_core_b", "armclkb", CLK_IGNORE_UNUSED, ++ RK3399_CLKSEL_CON(3), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3399_CLKGATE_CON(1), 5, GFLAGS), ++ COMPOSITE_NOMUX(0, "pclk_dbg_core_b", "armclkb", CLK_IGNORE_UNUSED, ++ RK3399_CLKSEL_CON(3), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3399_CLKGATE_CON(1), 6, GFLAGS), + -+static struct notifier_block rv1106_clk_panic_block = { -+ .notifier_call = rv1106_clk_panic, -+}; ++ GATE(ACLK_CORE_ADB400_CORE_B_2_CCI500, "aclk_core_adb400_core_b_2_cci500", "aclkm_core_b", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(14), 5, GFLAGS), ++ GATE(ACLK_PERF_CORE_B, "aclk_perf_core_b", "aclkm_core_b", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(14), 6, GFLAGS), + -+static void __init rv1106_clk_init(struct device_node *np) -+{ -+ struct rockchip_clk_provider *ctx; -+ void __iomem *reg_base; -+ struct clk **cru_clks; ++ GATE(0, "clk_dbg_pd_core_b", "armclkb", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(14), 1, GFLAGS), ++ GATE(ACLK_GIC_ADB400_GIC_2_CORE_B, "aclk_core_adb400_gic_2_core_b", "armclkb", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(14), 3, GFLAGS), ++ GATE(ACLK_GIC_ADB400_CORE_B_2_GIC, "aclk_core_adb400_core_b_2_gic", "armclkb", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(14), 4, GFLAGS), + -+ reg_base = of_iomap(np, 0); -+ if (!reg_base) { -+ pr_err("%s: could not map cru region\n", __func__); -+ return; -+ } ++ DIV(PCLK_COREDBG_B, "pclken_dbg_core_b", "pclk_dbg_core_b", CLK_IGNORE_UNUSED, ++ RK3399_CLKSEL_CON(3), 13, 2, DFLAGS | CLK_DIVIDER_READ_ONLY), + -+ rv1106_cru_base = reg_base; ++ GATE(0, "pclk_dbg_cxcs_pd_core_b", "pclk_dbg_core_b", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(14), 2, GFLAGS), + -+ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); -+ if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip clk init failed\n", __func__); -+ iounmap(reg_base); -+ return; -+ } -+ cru_ctx = ctx; ++ GATE(SCLK_PVTM_CORE_B, "clk_pvtm_core_b", "xin24m", 0, ++ RK3399_CLKGATE_CON(1), 7, GFLAGS), + -+ rockchip_rv1106_pvtpll_init(ctx); ++ /* gmac */ ++ GATE(0, "cpll_aclk_gmac_src", "cpll", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(6), 9, GFLAGS), ++ GATE(0, "gpll_aclk_gmac_src", "gpll", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(6), 8, GFLAGS), ++ COMPOSITE(0, "aclk_gmac_pre", mux_aclk_gmac_p, 0, ++ RK3399_CLKSEL_CON(20), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(6), 10, GFLAGS), + -+ cru_clks = ctx->clk_data.clks; ++ GATE(ACLK_GMAC, "aclk_gmac", "aclk_gmac_pre", 0, ++ RK3399_CLKGATE_CON(32), 0, GFLAGS), ++ GATE(ACLK_GMAC_NOC, "aclk_gmac_noc", "aclk_gmac_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(32), 1, GFLAGS), ++ GATE(ACLK_PERF_GMAC, "aclk_perf_gmac", "aclk_gmac_pre", 0, ++ RK3399_CLKGATE_CON(32), 4, GFLAGS), + -+ rockchip_clk_register_plls(ctx, rv1106_pll_clks, -+ ARRAY_SIZE(rv1106_pll_clks), -+ RV1106_GRF_SOC_STATUS0); ++ COMPOSITE_NOMUX(0, "pclk_gmac_pre", "aclk_gmac_pre", 0, ++ RK3399_CLKSEL_CON(19), 8, 3, DFLAGS, ++ RK3399_CLKGATE_CON(6), 11, GFLAGS), ++ GATE(PCLK_GMAC, "pclk_gmac", "pclk_gmac_pre", 0, ++ RK3399_CLKGATE_CON(32), 2, GFLAGS), ++ GATE(PCLK_GMAC_NOC, "pclk_gmac_noc", "pclk_gmac_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(32), 3, GFLAGS), + -+ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", -+ 3, cru_clks[PLL_APLL], cru_clks[PLL_GPLL], -+ &rv1106_cpuclk_data, rv1106_cpuclk_rates, -+ ARRAY_SIZE(rv1106_cpuclk_rates)); ++ COMPOSITE(SCLK_MAC, "clk_gmac", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3399_CLKSEL_CON(20), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(5), 5, GFLAGS), + -+ rockchip_clk_register_branches(ctx, rv1106_clk_branches, -+ ARRAY_SIZE(rv1106_clk_branches)); ++ MUX(SCLK_RMII_SRC, "clk_rmii_src", mux_rmii_p, CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(19), 4, 1, MFLAGS), ++ GATE(SCLK_MACREF_OUT, "clk_mac_refout", "clk_rmii_src", 0, ++ RK3399_CLKGATE_CON(5), 7, GFLAGS), ++ GATE(SCLK_MACREF, "clk_mac_ref", "clk_rmii_src", 0, ++ RK3399_CLKGATE_CON(5), 6, GFLAGS), ++ GATE(SCLK_MAC_RX, "clk_rmii_rx", "clk_rmii_src", 0, ++ RK3399_CLKGATE_CON(5), 8, GFLAGS), ++ GATE(SCLK_MAC_TX, "clk_rmii_tx", "clk_rmii_src", 0, ++ RK3399_CLKGATE_CON(5), 9, GFLAGS), + -+ rockchip_clk_register_branches(grf_ctx, rv1106_grf_clk_branches, -+ ARRAY_SIZE(rv1106_grf_clk_branches)); ++ /* spdif */ ++ COMPOSITE(SCLK_SPDIF_DIV, "clk_spdif_div", mux_pll_src_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(32), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3399_CLKGATE_CON(8), 13, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_spdif_frac", "clk_spdif_div", CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(99), 0, ++ RK3399_CLKGATE_CON(8), 14, GFLAGS, ++ &rk3399_spdif_fracmux), ++ GATE(SCLK_SPDIF_8CH, "clk_spdif", "clk_spdif_mux", CLK_SET_RATE_PARENT, ++ RK3399_CLKGATE_CON(8), 15, GFLAGS), + -+ rockchip_register_softrst(np, 31745, reg_base + RV1106_PMUSOFTRST_CON(0), -+ ROCKCHIP_SOFTRST_HIWORD_MASK); ++ COMPOSITE(SCLK_SPDIF_REC_DPTX, "clk_spdif_rec_dptx", mux_pll_src_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(32), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(10), 6, GFLAGS), ++ /* i2s */ ++ COMPOSITE(SCLK_I2S0_DIV, "clk_i2s0_div", mux_pll_src_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(28), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3399_CLKGATE_CON(8), 3, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s0_frac", "clk_i2s0_div", CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(96), 0, ++ RK3399_CLKGATE_CON(8), 4, GFLAGS, ++ &rk3399_i2s0_fracmux), ++ GATE(SCLK_I2S0_8CH, "clk_i2s0", "clk_i2s0_mux", CLK_SET_RATE_PARENT, ++ RK3399_CLKGATE_CON(8), 5, GFLAGS), + -+ rockchip_register_restart_notifier(ctx, RV1106_GLB_SRST_FST, NULL); ++ COMPOSITE(SCLK_I2S1_DIV, "clk_i2s1_div", mux_pll_src_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(29), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3399_CLKGATE_CON(8), 6, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s1_frac", "clk_i2s1_div", CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(97), 0, ++ RK3399_CLKGATE_CON(8), 7, GFLAGS, ++ &rk3399_i2s1_fracmux), ++ GATE(SCLK_I2S1_8CH, "clk_i2s1", "clk_i2s1_mux", CLK_SET_RATE_PARENT, ++ RK3399_CLKGATE_CON(8), 8, GFLAGS), + -+ rockchip_clk_of_add_provider(np, ctx); ++ COMPOSITE(SCLK_I2S2_DIV, "clk_i2s2_div", mux_pll_src_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(30), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3399_CLKGATE_CON(8), 9, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s2_frac", "clk_i2s2_div", CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(98), 0, ++ RK3399_CLKGATE_CON(8), 10, GFLAGS, ++ &rk3399_i2s2_fracmux), ++ GATE(SCLK_I2S2_8CH, "clk_i2s2", "clk_i2s2_mux", CLK_SET_RATE_PARENT, ++ RK3399_CLKGATE_CON(8), 11, GFLAGS), + -+ atomic_notifier_chain_register(&panic_notifier_list, -+ &rv1106_clk_panic_block); -+} ++ MUX(SCLK_I2SOUT_SRC, "clk_i2sout_src", mux_i2sch_p, CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(31), 0, 2, MFLAGS), ++ COMPOSITE_NODIV(SCLK_I2S_8CH_OUT, "clk_i2sout", mux_i2sout_p, CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(31), 2, 1, MFLAGS, ++ RK3399_CLKGATE_CON(8), 12, GFLAGS), + -+CLK_OF_DECLARE(rv1106_cru, "rockchip,rv1106-cru", rv1106_clk_init); ++ /* uart */ ++ MUX(SCLK_UART0_SRC, "clk_uart0_src", mux_pll_src_cpll_gpll_upll_p, 0, ++ RK3399_CLKSEL_CON(33), 12, 2, MFLAGS), ++ COMPOSITE_NOMUX(0, "clk_uart0_div", "clk_uart0_src", 0, ++ RK3399_CLKSEL_CON(33), 0, 7, DFLAGS, ++ RK3399_CLKGATE_CON(9), 0, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart0_frac", "clk_uart0_div", CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(100), 0, ++ RK3399_CLKGATE_CON(9), 1, GFLAGS, ++ &rk3399_uart0_fracmux), + -+static void __init rv1106_grf_clk_init(struct device_node *np) -+{ -+ struct rockchip_clk_provider *ctx; -+ void __iomem *reg_base; ++ MUX(SCLK_UART_SRC, "clk_uart_src", mux_pll_src_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(33), 15, 1, MFLAGS), ++ COMPOSITE_NOMUX(0, "clk_uart1_div", "clk_uart_src", 0, ++ RK3399_CLKSEL_CON(34), 0, 7, DFLAGS, ++ RK3399_CLKGATE_CON(9), 2, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart1_frac", "clk_uart1_div", CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(101), 0, ++ RK3399_CLKGATE_CON(9), 3, GFLAGS, ++ &rk3399_uart1_fracmux), + -+ reg_base = of_iomap(of_get_parent(np), 0); -+ if (!reg_base) { -+ pr_err("%s: could not map cru grf region\n", __func__); -+ return; -+ } ++ COMPOSITE_NOMUX(0, "clk_uart2_div", "clk_uart_src", 0, ++ RK3399_CLKSEL_CON(35), 0, 7, DFLAGS, ++ RK3399_CLKGATE_CON(9), 4, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart2_frac", "clk_uart2_div", CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(102), 0, ++ RK3399_CLKGATE_CON(9), 5, GFLAGS, ++ &rk3399_uart2_fracmux), + -+ ctx = rockchip_clk_init(np, reg_base, CLK_NR_GRF_CLKS); -+ if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip grf clk init failed\n", __func__); -+ return; -+ } -+ grf_ctx = ctx; ++ COMPOSITE_NOMUX(0, "clk_uart3_div", "clk_uart_src", 0, ++ RK3399_CLKSEL_CON(36), 0, 7, DFLAGS, ++ RK3399_CLKGATE_CON(9), 6, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart3_frac", "clk_uart3_div", CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(103), 0, ++ RK3399_CLKGATE_CON(9), 7, GFLAGS, ++ &rk3399_uart3_fracmux), + -+ rockchip_clk_of_add_provider(np, ctx); -+} -+CLK_OF_DECLARE(rv1106_grf_cru, "rockchip,rv1106-grf-cru", rv1106_grf_clk_init); ++ COMPOSITE(PCLK_DDR, "pclk_ddr", mux_pll_src_cpll_gpll_p, CLK_IS_CRITICAL, ++ RK3399_CLKSEL_CON(6), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(3), 4, GFLAGS), + -+#ifdef MODULE -+struct clk_rv1106_inits { -+ void (*inits)(struct device_node *np); -+}; ++ GATE(PCLK_CENTER_MAIN_NOC, "pclk_center_main_noc", "pclk_ddr", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(18), 10, GFLAGS), ++ GATE(PCLK_DDR_MON, "pclk_ddr_mon", "pclk_ddr", 0, ++ RK3399_CLKGATE_CON(18), 12, GFLAGS), ++ GATE(PCLK_CIC, "pclk_cic", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(18), 15, GFLAGS), ++ GATE(PCLK_DDR_SGRF, "pclk_ddr_sgrf", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(19), 2, GFLAGS), + -+static const struct clk_rv1106_inits clk_rv1106_init = { -+ .inits = rv1106_clk_init, -+}; ++ GATE(SCLK_PVTM_DDR, "clk_pvtm_ddr", "xin24m", 0, ++ RK3399_CLKGATE_CON(4), 11, GFLAGS), ++ GATE(SCLK_DFIMON0_TIMER, "clk_dfimon0_timer", "xin24m", 0, ++ RK3399_CLKGATE_CON(3), 5, GFLAGS), ++ GATE(SCLK_DFIMON1_TIMER, "clk_dfimon1_timer", "xin24m", 0, ++ RK3399_CLKGATE_CON(3), 6, GFLAGS), + -+static const struct clk_rv1106_inits clk_rv1106_grf_init = { -+ .inits = rv1106_grf_clk_init, -+}; ++ /* cci */ ++ GATE(0, "cpll_aclk_cci_src", "cpll", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(2), 0, GFLAGS), ++ GATE(0, "gpll_aclk_cci_src", "gpll", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(2), 1, GFLAGS), ++ GATE(0, "npll_aclk_cci_src", "npll", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(2), 2, GFLAGS), ++ GATE(0, "vpll_aclk_cci_src", "vpll", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(2), 3, GFLAGS), + -+static const struct of_device_id clk_rv1106_match_table[] = { -+ { -+ .compatible = "rockchip,rv1106-cru", -+ .data = &clk_rv1106_init, -+ }, { -+ .compatible = "rockchip,rv1106-grf-cru", -+ .data = &clk_rv1106_grf_init, -+ }, -+ { } -+}; -+MODULE_DEVICE_TABLE(of, clk_rv1106_match_table); ++ COMPOSITE(0, "aclk_cci_pre", mux_aclk_cci_p, CLK_IS_CRITICAL, ++ RK3399_CLKSEL_CON(5), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(2), 4, GFLAGS), + -+static int __init clk_rv1106_probe(struct platform_device *pdev) -+{ -+ struct device_node *np = pdev->dev.of_node; -+ const struct of_device_id *match; -+ const struct clk_rv1106_inits *init_data; ++ GATE(ACLK_ADB400M_PD_CORE_L, "aclk_adb400m_pd_core_l", "aclk_cci_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(15), 0, GFLAGS), ++ GATE(ACLK_ADB400M_PD_CORE_B, "aclk_adb400m_pd_core_b", "aclk_cci_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(15), 1, GFLAGS), ++ GATE(ACLK_CCI, "aclk_cci", "aclk_cci_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(15), 2, GFLAGS), ++ GATE(ACLK_CCI_NOC0, "aclk_cci_noc0", "aclk_cci_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(15), 3, GFLAGS), ++ GATE(ACLK_CCI_NOC1, "aclk_cci_noc1", "aclk_cci_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(15), 4, GFLAGS), ++ GATE(ACLK_CCI_GRF, "aclk_cci_grf", "aclk_cci_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(15), 7, GFLAGS), + -+ match = of_match_device(clk_rv1106_match_table, &pdev->dev); -+ if (!match || !match->data) -+ return -EINVAL; ++ GATE(0, "cpll_cci_trace", "cpll", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(2), 5, GFLAGS), ++ GATE(0, "gpll_cci_trace", "gpll", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(2), 6, GFLAGS), ++ COMPOSITE(SCLK_CCI_TRACE, "clk_cci_trace", mux_cci_trace_p, CLK_IGNORE_UNUSED, ++ RK3399_CLKSEL_CON(5), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(2), 7, GFLAGS), + -+ init_data = match->data; -+ if (init_data->inits) -+ init_data->inits(np); ++ GATE(0, "cpll_cs", "cpll", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(2), 8, GFLAGS), ++ GATE(0, "gpll_cs", "gpll", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(2), 9, GFLAGS), ++ GATE(0, "npll_cs", "npll", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(2), 10, GFLAGS), ++ COMPOSITE_NOGATE(SCLK_CS, "clk_cs", mux_cs_p, CLK_IS_CRITICAL, ++ RK3399_CLKSEL_CON(4), 6, 2, MFLAGS, 0, 5, DFLAGS), ++ GATE(0, "clk_dbg_cxcs", "clk_cs", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(15), 5, GFLAGS), ++ GATE(0, "clk_dbg_noc", "clk_cs", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(15), 6, GFLAGS), + -+ return 0; -+} ++ /* vcodec */ ++ COMPOSITE(0, "aclk_vcodec_pre", mux_pll_src_cpll_gpll_npll_ppll_p, 0, ++ RK3399_CLKSEL_CON(7), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(4), 0, GFLAGS), ++ COMPOSITE_NOMUX(0, "hclk_vcodec_pre", "aclk_vcodec_pre", 0, ++ RK3399_CLKSEL_CON(7), 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(4), 1, GFLAGS), ++ GATE(HCLK_VCODEC, "hclk_vcodec", "hclk_vcodec_pre", 0, ++ RK3399_CLKGATE_CON(17), 2, GFLAGS), ++ GATE(0, "hclk_vcodec_noc", "hclk_vcodec_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(17), 3, GFLAGS), + -+static struct platform_driver clk_rv1106_driver = { -+ .driver = { -+ .name = "clk-rv1106", -+ .of_match_table = clk_rv1106_match_table, -+ }, -+}; -+builtin_platform_driver_probe(clk_rv1106_driver, clk_rv1106_probe); ++ GATE(ACLK_VCODEC, "aclk_vcodec", "aclk_vcodec_pre", 0, ++ RK3399_CLKGATE_CON(17), 0, GFLAGS), ++ GATE(0, "aclk_vcodec_noc", "aclk_vcodec_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(17), 1, GFLAGS), + -+MODULE_DESCRIPTION("Rockchip RV1106 Clock Driver"); -+MODULE_LICENSE("GPL"); -+#endif /* MODULE */ -diff --git a/drivers/clk/rockchip-oh/clk-rv1108.c b/drivers/clk/rockchip-oh/clk-rv1108.c -new file mode 100644 -index 000000000..0a93b9333 ---- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-rv1108.c -@@ -0,0 +1,855 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2016 Rockchip Electronics Co. Ltd. -+ * Author: Shawn Lin -+ * Andy Yan -+ */ ++ /* vdu */ ++ COMPOSITE(SCLK_VDU_CORE, "clk_vdu_core", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3399_CLKSEL_CON(9), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(4), 4, GFLAGS), ++ COMPOSITE(SCLK_VDU_CA, "clk_vdu_ca", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3399_CLKSEL_CON(9), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(4), 5, GFLAGS), + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "clk.h" ++ COMPOSITE(0, "aclk_vdu_pre", mux_pll_src_cpll_gpll_npll_ppll_p, 0, ++ RK3399_CLKSEL_CON(8), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(4), 2, GFLAGS), ++ COMPOSITE_NOMUX(0, "hclk_vdu_pre", "aclk_vdu_pre", 0, ++ RK3399_CLKSEL_CON(8), 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(4), 3, GFLAGS), ++ GATE(HCLK_VDU, "hclk_vdu", "hclk_vdu_pre", 0, ++ RK3399_CLKGATE_CON(17), 10, GFLAGS), ++ GATE(HCLK_VDU_NOC, "hclk_vdu_noc", "hclk_vdu_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(17), 11, GFLAGS), + -+#define RV1108_GRF_SOC_STATUS0 0x480 ++ GATE(ACLK_VDU, "aclk_vdu", "aclk_vdu_pre", 0, ++ RK3399_CLKGATE_CON(17), 8, GFLAGS), ++ GATE(ACLK_VDU_NOC, "aclk_vdu_noc", "aclk_vdu_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(17), 9, GFLAGS), + -+enum rv1108_plls { -+ apll, dpll, gpll, -+}; ++ /* iep */ ++ COMPOSITE(0, "aclk_iep_pre", mux_pll_src_cpll_gpll_npll_ppll_p, 0, ++ RK3399_CLKSEL_CON(10), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(4), 6, GFLAGS), ++ COMPOSITE_NOMUX(0, "hclk_iep_pre", "aclk_iep_pre", 0, ++ RK3399_CLKSEL_CON(10), 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(4), 7, GFLAGS), ++ GATE(HCLK_IEP, "hclk_iep", "hclk_iep_pre", 0, ++ RK3399_CLKGATE_CON(16), 2, GFLAGS), ++ GATE(HCLK_IEP_NOC, "hclk_iep_noc", "hclk_iep_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(16), 3, GFLAGS), + -+static struct rockchip_pll_rate_table rv1108_pll_rates[] = { -+ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ -+ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1488000000, 1, 62, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1464000000, 1, 61, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1440000000, 1, 60, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1392000000, 1, 58, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1368000000, 1, 57, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1344000000, 1, 56, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1320000000, 1, 55, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1272000000, 1, 53, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1248000000, 1, 52, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1188000000, 2, 99, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1100000000, 12, 550, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1000000000, 6, 500, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 984000000, 1, 82, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 960000000, 1, 80, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 936000000, 1, 78, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 912000000, 1, 76, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 900000000, 4, 300, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 888000000, 1, 74, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 864000000, 1, 72, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 840000000, 1, 70, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 816000000, 1, 68, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 800000000, 6, 400, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 700000000, 6, 350, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 696000000, 1, 58, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 600000000, 1, 75, 3, 1, 1, 0), -+ RK3036_PLL_RATE( 594000000, 2, 99, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 504000000, 1, 63, 3, 1, 1, 0), -+ RK3036_PLL_RATE( 500000000, 6, 250, 2, 1, 1, 0), -+ RK3036_PLL_RATE( 408000000, 1, 68, 2, 2, 1, 0), -+ RK3036_PLL_RATE( 312000000, 1, 52, 2, 2, 1, 0), -+ RK3036_PLL_RATE( 216000000, 1, 72, 4, 2, 1, 0), -+ RK3036_PLL_RATE( 96000000, 1, 64, 4, 4, 1, 0), -+ { /* sentinel */ }, -+}; ++ GATE(ACLK_IEP, "aclk_iep", "aclk_iep_pre", 0, ++ RK3399_CLKGATE_CON(16), 0, GFLAGS), ++ GATE(ACLK_IEP_NOC, "aclk_iep_noc", "aclk_iep_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(16), 1, GFLAGS), + -+#define RV1108_DIV_CORE_MASK 0xf -+#define RV1108_DIV_CORE_SHIFT 4 ++ /* rga */ ++ COMPOSITE(SCLK_RGA_CORE, "clk_rga_core", mux_pll_src_cpll_gpll_npll_ppll_p, 0, ++ RK3399_CLKSEL_CON(12), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(4), 10, GFLAGS), + -+#define RV1108_CLKSEL0(_core_peri_div) \ -+ { \ -+ .reg = RV1108_CLKSEL_CON(1), \ -+ .val = HIWORD_UPDATE(_core_peri_div, RV1108_DIV_CORE_MASK,\ -+ RV1108_DIV_CORE_SHIFT) \ -+ } ++ COMPOSITE(0, "aclk_rga_pre", mux_pll_src_cpll_gpll_npll_ppll_p, 0, ++ RK3399_CLKSEL_CON(11), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(4), 8, GFLAGS), ++ COMPOSITE_NOMUX(0, "hclk_rga_pre", "aclk_rga_pre", 0, ++ RK3399_CLKSEL_CON(11), 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(4), 9, GFLAGS), ++ GATE(HCLK_RGA, "hclk_rga", "hclk_rga_pre", 0, ++ RK3399_CLKGATE_CON(16), 10, GFLAGS), ++ GATE(HCLK_RGA_NOC, "hclk_rga_noc", "hclk_rga_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(16), 11, GFLAGS), + -+#define RV1108_CPUCLK_RATE(_prate, _core_peri_div) \ -+ { \ -+ .prate = _prate, \ -+ .divs = { \ -+ RV1108_CLKSEL0(_core_peri_div), \ -+ }, \ -+ } ++ GATE(ACLK_RGA, "aclk_rga", "aclk_rga_pre", 0, ++ RK3399_CLKGATE_CON(16), 8, GFLAGS), ++ GATE(ACLK_RGA_NOC, "aclk_rga_noc", "aclk_rga_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(16), 9, GFLAGS), + -+static struct rockchip_cpuclk_rate_table rv1108_cpuclk_rates[] __initdata = { -+ RV1108_CPUCLK_RATE(1608000000, 7), -+ RV1108_CPUCLK_RATE(1512000000, 7), -+ RV1108_CPUCLK_RATE(1488000000, 5), -+ RV1108_CPUCLK_RATE(1416000000, 5), -+ RV1108_CPUCLK_RATE(1392000000, 5), -+ RV1108_CPUCLK_RATE(1296000000, 5), -+ RV1108_CPUCLK_RATE(1200000000, 5), -+ RV1108_CPUCLK_RATE(1104000000, 5), -+ RV1108_CPUCLK_RATE(1008000000, 5), -+ RV1108_CPUCLK_RATE(912000000, 5), -+ RV1108_CPUCLK_RATE(816000000, 3), -+ RV1108_CPUCLK_RATE(696000000, 3), -+ RV1108_CPUCLK_RATE(600000000, 3), -+ RV1108_CPUCLK_RATE(500000000, 3), -+ RV1108_CPUCLK_RATE(408000000, 1), -+ RV1108_CPUCLK_RATE(312000000, 1), -+ RV1108_CPUCLK_RATE(216000000, 1), -+ RV1108_CPUCLK_RATE(96000000, 1), -+}; ++ /* center */ ++ COMPOSITE(ACLK_CENTER, "aclk_center", mux_pll_src_cpll_gpll_npll_p, CLK_IS_CRITICAL, ++ RK3399_CLKSEL_CON(12), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(3), 7, GFLAGS), ++ GATE(ACLK_CENTER_MAIN_NOC, "aclk_center_main_noc", "aclk_center", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(19), 0, GFLAGS), ++ GATE(ACLK_CENTER_PERI_NOC, "aclk_center_peri_noc", "aclk_center", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(19), 1, GFLAGS), + -+static const struct rockchip_cpuclk_reg_data rv1108_cpuclk_data = { -+ .core_reg[0] = RV1108_CLKSEL_CON(0), -+ .div_core_shift[0] = 0, -+ .div_core_mask[0] = 0x1f, -+ .num_cores = 1, -+ .mux_core_alt = 1, -+ .mux_core_main = 0, -+ .mux_core_shift = 8, -+ .mux_core_mask = 0x3, -+}; ++ /* gpu */ ++ COMPOSITE(0, "aclk_gpu_pre", mux_pll_src_ppll_cpll_gpll_npll_p, CLK_IGNORE_UNUSED, ++ RK3399_CLKSEL_CON(13), 5, 3, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(13), 0, GFLAGS), ++ GATE(ACLK_GPU, "aclk_gpu", "aclk_gpu_pre", 0, ++ RK3399_CLKGATE_CON(30), 8, GFLAGS), ++ GATE(ACLK_PERF_GPU, "aclk_perf_gpu", "aclk_gpu_pre", 0, ++ RK3399_CLKGATE_CON(30), 10, GFLAGS), ++ GATE(ACLK_GPU_GRF, "aclk_gpu_grf", "aclk_gpu_pre", 0, ++ RK3399_CLKGATE_CON(30), 11, GFLAGS), ++ GATE(SCLK_PVTM_GPU, "aclk_pvtm_gpu", "xin24m", 0, ++ RK3399_CLKGATE_CON(13), 1, GFLAGS), + -+PNAME(mux_pll_p) = { "xin24m", "xin24m"}; -+PNAME(mux_ddrphy_p) = { "dpll_ddr", "gpll_ddr", "apll_ddr" }; -+PNAME(mux_usb480m_pre_p) = { "usbphy", "xin24m" }; -+PNAME(mux_hdmiphy_phy_p) = { "hdmiphy", "xin24m" }; -+PNAME(mux_dclk_hdmiphy_pre_p) = { "dclk_hdmiphy_src_gpll", "dclk_hdmiphy_src_dpll" }; -+PNAME(mux_pll_src_4plls_p) = { "dpll", "gpll", "hdmiphy", "usb480m" }; -+PNAME(mux_pll_src_2plls_p) = { "dpll", "gpll" }; -+PNAME(mux_pll_src_apll_gpll_p) = { "apll", "gpll" }; -+PNAME(mux_aclk_peri_src_p) = { "aclk_peri_src_gpll", "aclk_peri_src_dpll" }; -+PNAME(mux_aclk_bus_src_p) = { "aclk_bus_src_gpll", "aclk_bus_src_apll", "aclk_bus_src_dpll" }; -+PNAME(mux_mmc_src_p) = { "dpll", "gpll", "xin24m", "usb480m" }; -+PNAME(mux_pll_src_dpll_gpll_usb480m_p) = { "dpll", "gpll", "usb480m" }; -+PNAME(mux_uart0_p) = { "uart0_src", "uart0_frac", "xin24m" }; -+PNAME(mux_uart1_p) = { "uart1_src", "uart1_frac", "xin24m" }; -+PNAME(mux_uart2_p) = { "uart2_src", "uart2_frac", "xin24m" }; -+PNAME(mux_sclk_mac_p) = { "sclk_mac_pre", "ext_gmac" }; -+PNAME(mux_i2s0_pre_p) = { "i2s0_src", "i2s0_frac", "ext_i2s", "xin12m" }; -+PNAME(mux_i2s_out_p) = { "i2s0_pre", "xin12m" }; -+PNAME(mux_i2s1_p) = { "i2s1_src", "i2s1_frac", "dummy", "xin12m" }; -+PNAME(mux_i2s2_p) = { "i2s2_src", "i2s2_frac", "dummy", "xin12m" }; -+PNAME(mux_wifi_src_p) = { "gpll", "xin24m" }; -+PNAME(mux_cifout_src_p) = { "hdmiphy", "gpll" }; -+PNAME(mux_cifout_p) = { "sclk_cifout_src", "xin24m" }; -+PNAME(mux_sclk_cif0_src_p) = { "pclk_vip", "clk_cif0_chn_out", "pclkin_cvbs2cif" }; -+PNAME(mux_sclk_cif1_src_p) = { "pclk_vip", "clk_cif1_chn_out", "pclkin_cvbs2cif" }; -+PNAME(mux_sclk_cif2_src_p) = { "pclk_vip", "clk_cif2_chn_out", "pclkin_cvbs2cif" }; -+PNAME(mux_sclk_cif3_src_p) = { "pclk_vip", "clk_cif3_chn_out", "pclkin_cvbs2cif" }; -+PNAME(mux_dsp_src_p) = { "dpll", "gpll", "apll", "usb480m" }; -+PNAME(mux_dclk_hdmiphy_p) = { "hdmiphy", "xin24m" }; -+PNAME(mux_dclk_vop_p) = { "dclk_hdmiphy", "dclk_vop_src" }; -+PNAME(mux_hdmi_cec_src_p) = { "dpll", "gpll", "xin24m" }; -+PNAME(mux_cvbs_src_p) = { "apll", "io_cvbs_clkin", "hdmiphy", "gpll" }; ++ /* perihp */ ++ GATE(0, "cpll_aclk_perihp_src", "cpll", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(5), 1, GFLAGS), ++ GATE(0, "gpll_aclk_perihp_src", "gpll", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(5), 0, GFLAGS), ++ COMPOSITE(ACLK_PERIHP, "aclk_perihp", mux_aclk_perihp_p, CLK_IS_CRITICAL, ++ RK3399_CLKSEL_CON(14), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(5), 2, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_PERIHP, "hclk_perihp", "aclk_perihp", CLK_IS_CRITICAL, ++ RK3399_CLKSEL_CON(14), 8, 2, DFLAGS, ++ RK3399_CLKGATE_CON(5), 3, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_PERIHP, "pclk_perihp", "aclk_perihp", CLK_IS_CRITICAL, ++ RK3399_CLKSEL_CON(14), 12, 3, DFLAGS, ++ RK3399_CLKGATE_CON(5), 4, GFLAGS), + -+static struct rockchip_pll_clock rv1108_pll_clks[] __initdata = { -+ [apll] = PLL(pll_rk3399, PLL_APLL, "apll", mux_pll_p, 0, RV1108_PLL_CON(0), -+ RV1108_PLL_CON(3), 8, 0, 0, rv1108_pll_rates), -+ [dpll] = PLL(pll_rk3399, PLL_DPLL, "dpll", mux_pll_p, 0, RV1108_PLL_CON(8), -+ RV1108_PLL_CON(11), 8, 1, 0, NULL), -+ [gpll] = PLL(pll_rk3399, PLL_GPLL, "gpll", mux_pll_p, 0, RV1108_PLL_CON(16), -+ RV1108_PLL_CON(19), 8, 2, 0, rv1108_pll_rates), -+}; ++ GATE(ACLK_PERF_PCIE, "aclk_perf_pcie", "aclk_perihp", 0, ++ RK3399_CLKGATE_CON(20), 2, GFLAGS), ++ GATE(ACLK_PCIE, "aclk_pcie", "aclk_perihp", 0, ++ RK3399_CLKGATE_CON(20), 10, GFLAGS), ++ GATE(0, "aclk_perihp_noc", "aclk_perihp", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(20), 12, GFLAGS), + -+#define MFLAGS CLK_MUX_HIWORD_MASK -+#define DFLAGS CLK_DIVIDER_HIWORD_MASK -+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) -+#define IFLAGS ROCKCHIP_INVERTER_HIWORD_MASK ++ GATE(HCLK_HOST0, "hclk_host0", "hclk_perihp", 0, ++ RK3399_CLKGATE_CON(20), 5, GFLAGS), ++ GATE(HCLK_HOST0_ARB, "hclk_host0_arb", "hclk_perihp", 0, ++ RK3399_CLKGATE_CON(20), 6, GFLAGS), ++ GATE(HCLK_HOST1, "hclk_host1", "hclk_perihp", 0, ++ RK3399_CLKGATE_CON(20), 7, GFLAGS), ++ GATE(HCLK_HOST1_ARB, "hclk_host1_arb", "hclk_perihp", 0, ++ RK3399_CLKGATE_CON(20), 8, GFLAGS), ++ GATE(HCLK_HSIC, "hclk_hsic", "hclk_perihp", 0, ++ RK3399_CLKGATE_CON(20), 9, GFLAGS), ++ GATE(0, "hclk_perihp_noc", "hclk_perihp", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(20), 13, GFLAGS), ++ GATE(0, "hclk_ahb1tom", "hclk_perihp", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(20), 15, GFLAGS), + -+static struct rockchip_clk_branch rv1108_uart0_fracmux __initdata = -+ MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT, -+ RV1108_CLKSEL_CON(13), 8, 2, MFLAGS); ++ GATE(PCLK_PERIHP_GRF, "pclk_perihp_grf", "pclk_perihp", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(20), 4, GFLAGS), ++ GATE(PCLK_PCIE, "pclk_pcie", "pclk_perihp", 0, ++ RK3399_CLKGATE_CON(20), 11, GFLAGS), ++ GATE(0, "pclk_perihp_noc", "pclk_perihp", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(20), 14, GFLAGS), ++ GATE(PCLK_HSICPHY, "pclk_hsicphy", "pclk_perihp", 0, ++ RK3399_CLKGATE_CON(31), 8, GFLAGS), + -+static struct rockchip_clk_branch rv1108_uart1_fracmux __initdata = -+ MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT, -+ RV1108_CLKSEL_CON(14), 8, 2, MFLAGS); ++ /* sdio & sdmmc */ ++ COMPOSITE(HCLK_SD, "hclk_sd", mux_pll_src_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(13), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(12), 13, GFLAGS), ++ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_sd", 0, ++ RK3399_CLKGATE_CON(33), 8, GFLAGS), ++ GATE(0, "hclk_sdmmc_noc", "hclk_sd", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(33), 9, GFLAGS), + -+static struct rockchip_clk_branch rv1108_uart2_fracmux __initdata = -+ MUX(SCLK_UART2, "sclk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT, -+ RV1108_CLKSEL_CON(15), 8, 2, MFLAGS); ++ COMPOSITE(SCLK_SDIO, "clk_sdio", mux_pll_src_cpll_gpll_npll_ppll_upll_24m_p, 0, ++ RK3399_CLKSEL_CON(15), 8, 3, MFLAGS, 0, 7, DFLAGS, ++ RK3399_CLKGATE_CON(6), 0, GFLAGS), + -+static struct rockchip_clk_branch rv1108_i2s0_fracmux __initdata = -+ MUX(0, "i2s0_pre", mux_i2s0_pre_p, CLK_SET_RATE_PARENT, -+ RV1108_CLKSEL_CON(5), 12, 2, MFLAGS); ++ COMPOSITE(SCLK_SDMMC, "clk_sdmmc", mux_pll_src_cpll_gpll_npll_ppll_upll_24m_p, 0, ++ RK3399_CLKSEL_CON(16), 8, 3, MFLAGS, 0, 7, DFLAGS, ++ RK3399_CLKGATE_CON(6), 1, GFLAGS), + -+static struct rockchip_clk_branch rv1108_i2s1_fracmux __initdata = -+ MUX(0, "i2s1_pre", mux_i2s1_p, CLK_SET_RATE_PARENT, -+ RV1108_CLKSEL_CON(6), 12, 2, MFLAGS); ++ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "clk_sdmmc", RK3399_SDMMC_CON0, 1), ++ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "clk_sdmmc", RK3399_SDMMC_CON1, 1), + -+static struct rockchip_clk_branch rv1108_i2s2_fracmux __initdata = -+ MUX(0, "i2s2_pre", mux_i2s2_p, CLK_SET_RATE_PARENT, -+ RV1108_CLKSEL_CON(7), 12, 2, MFLAGS); ++ MMC(SCLK_SDIO_DRV, "sdio_drv", "clk_sdio", RK3399_SDIO_CON0, 1), ++ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "clk_sdio", RK3399_SDIO_CON1, 1), + -+static struct rockchip_clk_branch rv1108_clk_branches[] __initdata = { -+ MUX(0, "hdmiphy", mux_hdmiphy_phy_p, CLK_SET_RATE_PARENT, -+ RV1108_MISC_CON, 13, 1, MFLAGS), -+ MUX(0, "usb480m", mux_usb480m_pre_p, CLK_SET_RATE_PARENT, -+ RV1108_MISC_CON, 15, 1, MFLAGS), -+ /* -+ * Clock-Architecture Diagram 2 -+ */ ++ /* pcie */ ++ COMPOSITE(SCLK_PCIE_PM, "clk_pcie_pm", mux_pll_src_cpll_gpll_npll_24m_p, 0, ++ RK3399_CLKSEL_CON(17), 8, 3, MFLAGS, 0, 7, DFLAGS, ++ RK3399_CLKGATE_CON(6), 2, GFLAGS), + -+ /* PD_CORE */ -+ GATE(0, "dpll_core", "dpll", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(0), 1, GFLAGS), -+ GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(0), 0, GFLAGS), -+ GATE(0, "gpll_core", "gpll", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE_NOMUX(0, "pclken_dbg", "armclk", CLK_IGNORE_UNUSED, -+ RV1108_CLKSEL_CON(1), 4, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RV1108_CLKGATE_CON(0), 5, GFLAGS), -+ COMPOSITE_NOMUX(ACLK_ENMCORE, "aclkenm_core", "armclk", CLK_IGNORE_UNUSED, -+ RV1108_CLKSEL_CON(1), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RV1108_CLKGATE_CON(0), 4, GFLAGS), -+ GATE(ACLK_CORE, "aclk_core", "aclkenm_core", CLK_IS_CRITICAL, -+ RV1108_CLKGATE_CON(11), 0, GFLAGS), -+ GATE(0, "pclk_dbg", "pclken_dbg", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(11), 1, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_PCIEPHY_REF100M, "clk_pciephy_ref100m", "npll", 0, ++ RK3399_CLKSEL_CON(18), 11, 5, DFLAGS, ++ RK3399_CLKGATE_CON(12), 6, GFLAGS), ++ MUX(SCLK_PCIEPHY_REF, "clk_pciephy_ref", mux_pll_src_24m_pciephy_p, CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(18), 10, 1, MFLAGS), + -+ /* PD_RKVENC */ -+ COMPOSITE(0, "aclk_rkvenc_pre", mux_pll_src_4plls_p, 0, -+ RV1108_CLKSEL_CON(37), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RV1108_CLKGATE_CON(8), 8, GFLAGS), -+ FACTOR_GATE(0, "hclk_rkvenc_pre", "aclk_rkvenc_pre", 0, 1, 4, -+ RV1108_CLKGATE_CON(8), 10, GFLAGS), -+ COMPOSITE(SCLK_VENC_CORE, "clk_venc_core", mux_pll_src_4plls_p, 0, -+ RV1108_CLKSEL_CON(37), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RV1108_CLKGATE_CON(8), 9, GFLAGS), -+ GATE(ACLK_RKVENC, "aclk_rkvenc", "aclk_rkvenc_pre", 0, -+ RV1108_CLKGATE_CON(19), 8, GFLAGS), -+ GATE(HCLK_RKVENC, "hclk_rkvenc", "hclk_rkvenc_pre", 0, -+ RV1108_CLKGATE_CON(19), 9, GFLAGS), -+ GATE(0, "aclk_rkvenc_niu", "aclk_rkvenc_pre", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(19), 11, GFLAGS), -+ GATE(0, "hclk_rkvenc_niu", "hclk_rkvenc_pre", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(19), 10, GFLAGS), ++ COMPOSITE(0, "clk_pcie_core_cru", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3399_CLKSEL_CON(18), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3399_CLKGATE_CON(6), 3, GFLAGS), ++ MUX(SCLK_PCIE_CORE, "clk_pcie_core", mux_pciecore_cru_phy_p, CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(18), 7, 1, MFLAGS), + -+ /* PD_RKVDEC */ -+ COMPOSITE(SCLK_HEVC_CORE, "sclk_hevc_core", mux_pll_src_4plls_p, 0, -+ RV1108_CLKSEL_CON(36), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RV1108_CLKGATE_CON(8), 2, GFLAGS), -+ FACTOR_GATE(0, "hclk_rkvdec_pre", "sclk_hevc_core", 0, 1, 4, -+ RV1108_CLKGATE_CON(8), 10, GFLAGS), -+ COMPOSITE(SCLK_HEVC_CABAC, "clk_hevc_cabac", mux_pll_src_4plls_p, 0, -+ RV1108_CLKSEL_CON(35), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RV1108_CLKGATE_CON(8), 1, GFLAGS), ++ /* emmc */ ++ COMPOSITE(SCLK_EMMC, "clk_emmc", mux_pll_src_cpll_gpll_npll_upll_24m_p, 0, ++ RK3399_CLKSEL_CON(22), 8, 3, MFLAGS, 0, 7, DFLAGS, ++ RK3399_CLKGATE_CON(6), 14, GFLAGS), + -+ COMPOSITE(0, "aclk_rkvdec_pre", mux_pll_src_4plls_p, 0, -+ RV1108_CLKSEL_CON(35), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RV1108_CLKGATE_CON(8), 0, GFLAGS), -+ COMPOSITE(0, "aclk_vpu_pre", mux_pll_src_4plls_p, 0, -+ RV1108_CLKSEL_CON(36), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RV1108_CLKGATE_CON(8), 3, GFLAGS), -+ GATE(ACLK_RKVDEC, "aclk_rkvdec", "aclk_rkvdec_pre", 0, -+ RV1108_CLKGATE_CON(19), 0, GFLAGS), -+ GATE(ACLK_VPU, "aclk_vpu", "aclk_vpu_pre", 0, -+ RV1108_CLKGATE_CON(19), 1, GFLAGS), -+ GATE(HCLK_RKVDEC, "hclk_rkvdec", "hclk_rkvdec_pre", 0, -+ RV1108_CLKGATE_CON(19), 2, GFLAGS), -+ GATE(HCLK_VPU, "hclk_vpu", "hclk_rkvdec_pre", 0, -+ RV1108_CLKGATE_CON(19), 3, GFLAGS), -+ GATE(0, "aclk_rkvdec_niu", "aclk_rkvdec_pre", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(19), 4, GFLAGS), -+ GATE(0, "hclk_rkvdec_niu", "hclk_rkvdec_pre", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(19), 5, GFLAGS), -+ GATE(0, "aclk_vpu_niu", "aclk_vpu_pre", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(19), 6, GFLAGS), ++ GATE(0, "cpll_aclk_emmc_src", "cpll", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(6), 13, GFLAGS), ++ GATE(0, "gpll_aclk_emmc_src", "gpll", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(6), 12, GFLAGS), ++ COMPOSITE_NOGATE(ACLK_EMMC, "aclk_emmc", mux_aclk_emmc_p, CLK_IGNORE_UNUSED, ++ RK3399_CLKSEL_CON(21), 7, 1, MFLAGS, 0, 5, DFLAGS), ++ GATE(ACLK_EMMC_CORE, "aclk_emmccore", "aclk_emmc", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(32), 8, GFLAGS), ++ GATE(ACLK_EMMC_NOC, "aclk_emmc_noc", "aclk_emmc", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(32), 9, GFLAGS), ++ GATE(ACLK_EMMC_GRF, "aclk_emmcgrf", "aclk_emmc", CLK_IGNORE_UNUSED, ++ RK3399_CLKGATE_CON(32), 10, GFLAGS), + -+ /* PD_PMU_wrapper */ -+ COMPOSITE_NOMUX(0, "pmu_24m_ena", "gpll", CLK_IS_CRITICAL, -+ RV1108_CLKSEL_CON(38), 0, 5, DFLAGS, -+ RV1108_CLKGATE_CON(8), 12, GFLAGS), -+ GATE(0, "pclk_pmu", "pmu_24m_ena", CLK_IS_CRITICAL, -+ RV1108_CLKGATE_CON(10), 0, GFLAGS), -+ GATE(0, "pclk_intmem1", "pmu_24m_ena", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(10), 1, GFLAGS), -+ GATE(PCLK_GPIO0_PMU, "pclk_gpio0_pmu", "pmu_24m_ena", 0, -+ RV1108_CLKGATE_CON(10), 2, GFLAGS), -+ GATE(0, "pclk_pmugrf", "pmu_24m_ena", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(10), 3, GFLAGS), -+ GATE(0, "pclk_pmu_niu", "pmu_24m_ena", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(10), 4, GFLAGS), -+ GATE(PCLK_I2C0_PMU, "pclk_i2c0_pmu", "pmu_24m_ena", 0, -+ RV1108_CLKGATE_CON(10), 5, GFLAGS), -+ GATE(PCLK_PWM0_PMU, "pclk_pwm0_pmu", "pmu_24m_ena", 0, -+ RV1108_CLKGATE_CON(10), 6, GFLAGS), -+ COMPOSITE(SCLK_PWM0_PMU, "sclk_pwm0_pmu", mux_pll_src_2plls_p, 0, -+ RV1108_CLKSEL_CON(12), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RV1108_CLKGATE_CON(8), 15, GFLAGS), -+ COMPOSITE(SCLK_I2C0_PMU, "sclk_i2c0_pmu", mux_pll_src_2plls_p, 0, -+ RV1108_CLKSEL_CON(19), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RV1108_CLKGATE_CON(8), 14, GFLAGS), -+ GATE(0, "pvtm_pmu", "xin24m", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(8), 13, GFLAGS), ++ /* perilp0 */ ++ GATE(0, "cpll_aclk_perilp0_src", "cpll", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(7), 1, GFLAGS), ++ GATE(0, "gpll_aclk_perilp0_src", "gpll", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(7), 0, GFLAGS), ++ COMPOSITE(ACLK_PERILP0, "aclk_perilp0", mux_aclk_perilp0_p, CLK_IS_CRITICAL, ++ RK3399_CLKSEL_CON(23), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(7), 2, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_PERILP0, "hclk_perilp0", "aclk_perilp0", CLK_IS_CRITICAL, ++ RK3399_CLKSEL_CON(23), 8, 2, DFLAGS, ++ RK3399_CLKGATE_CON(7), 3, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_PERILP0, "pclk_perilp0", "aclk_perilp0", CLK_IS_CRITICAL, ++ RK3399_CLKSEL_CON(23), 12, 3, DFLAGS, ++ RK3399_CLKGATE_CON(7), 4, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 3 -+ */ -+ COMPOSITE(SCLK_WIFI, "sclk_wifi", mux_wifi_src_p, 0, -+ RV1108_CLKSEL_CON(28), 15, 1, MFLAGS, 8, 6, DFLAGS, -+ RV1108_CLKGATE_CON(9), 8, GFLAGS), -+ COMPOSITE_NODIV(0, "sclk_cifout_src", mux_cifout_src_p, 0, -+ RV1108_CLKSEL_CON(40), 8, 1, MFLAGS, -+ RV1108_CLKGATE_CON(9), 11, GFLAGS), -+ COMPOSITE_NOGATE(SCLK_CIFOUT, "sclk_cifout", mux_cifout_p, 0, -+ RV1108_CLKSEL_CON(40), 12, 1, MFLAGS, 0, 5, DFLAGS), -+ COMPOSITE_NOMUX(SCLK_MIPI_CSI_OUT, "sclk_mipi_csi_out", "xin24m", 0, -+ RV1108_CLKSEL_CON(41), 0, 5, DFLAGS, -+ RV1108_CLKGATE_CON(9), 12, GFLAGS), ++ /* aclk_perilp0 gates */ ++ GATE(ACLK_INTMEM, "aclk_intmem", "aclk_perilp0", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(23), 0, GFLAGS), ++ GATE(ACLK_TZMA, "aclk_tzma", "aclk_perilp0", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(23), 1, GFLAGS), ++ GATE(SCLK_INTMEM0, "clk_intmem0", "aclk_perilp0", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(23), 2, GFLAGS), ++ GATE(SCLK_INTMEM1, "clk_intmem1", "aclk_perilp0", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(23), 3, GFLAGS), ++ GATE(SCLK_INTMEM2, "clk_intmem2", "aclk_perilp0", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(23), 4, GFLAGS), ++ GATE(SCLK_INTMEM3, "clk_intmem3", "aclk_perilp0", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(23), 5, GFLAGS), ++ GATE(SCLK_INTMEM4, "clk_intmem4", "aclk_perilp0", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(23), 6, GFLAGS), ++ GATE(SCLK_INTMEM5, "clk_intmem5", "aclk_perilp0", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(23), 7, GFLAGS), ++ GATE(ACLK_DCF, "aclk_dcf", "aclk_perilp0", 0, RK3399_CLKGATE_CON(23), 8, GFLAGS), ++ GATE(ACLK_DMAC0_PERILP, "aclk_dmac0_perilp", "aclk_perilp0", 0, RK3399_CLKGATE_CON(25), 5, GFLAGS), ++ GATE(ACLK_DMAC1_PERILP, "aclk_dmac1_perilp", "aclk_perilp0", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(25), 6, GFLAGS), ++ GATE(ACLK_PERILP0_NOC, "aclk_perilp0_noc", "aclk_perilp0", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(25), 7, GFLAGS), + -+ GATE(0, "pclk_acodecphy", "pclk_top_pre", CLK_IS_CRITICAL, -+ RV1108_CLKGATE_CON(14), 6, GFLAGS), -+ GATE(0, "pclk_usbgrf", "pclk_top_pre", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(14), 14, GFLAGS), ++ /* hclk_perilp0 gates */ ++ GATE(HCLK_ROM, "hclk_rom", "hclk_perilp0", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(24), 4, GFLAGS), ++ GATE(HCLK_M_CRYPTO0, "hclk_m_crypto0", "hclk_perilp0", 0, RK3399_CLKGATE_CON(24), 5, GFLAGS), ++ GATE(HCLK_S_CRYPTO0, "hclk_s_crypto0", "hclk_perilp0", 0, RK3399_CLKGATE_CON(24), 6, GFLAGS), ++ GATE(HCLK_M_CRYPTO1, "hclk_m_crypto1", "hclk_perilp0", 0, RK3399_CLKGATE_CON(24), 14, GFLAGS), ++ GATE(HCLK_S_CRYPTO1, "hclk_s_crypto1", "hclk_perilp0", 0, RK3399_CLKGATE_CON(24), 15, GFLAGS), ++ GATE(HCLK_PERILP0_NOC, "hclk_perilp0_noc", "hclk_perilp0", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(25), 8, GFLAGS), + -+ GATE(ACLK_CIF0, "aclk_cif0", "aclk_vio1_pre", 0, -+ RV1108_CLKGATE_CON(18), 10, GFLAGS), -+ GATE(HCLK_CIF0, "hclk_cif0", "hclk_vio_pre", 0, -+ RV1108_CLKGATE_CON(18), 10, GFLAGS), -+ COMPOSITE_NODIV(SCLK_CIF0, "sclk_cif0", mux_sclk_cif0_src_p, 0, -+ RV1108_CLKSEL_CON(31), 0, 2, MFLAGS, -+ RV1108_CLKGATE_CON(7), 9, GFLAGS), -+ GATE(ACLK_CIF1, "aclk_cif1", "aclk_vio1_pre", 0, -+ RV1108_CLKGATE_CON(17), 6, GFLAGS), -+ GATE(HCLK_CIF1, "hclk_cif1", "hclk_vio_pre", 0, -+ RV1108_CLKGATE_CON(17), 7, GFLAGS), -+ COMPOSITE_NODIV(SCLK_CIF1, "sclk_cif1", mux_sclk_cif1_src_p, 0, -+ RV1108_CLKSEL_CON(31), 2, 2, MFLAGS, -+ RV1108_CLKGATE_CON(7), 10, GFLAGS), -+ GATE(ACLK_CIF2, "aclk_cif2", "aclk_vio1_pre", 0, -+ RV1108_CLKGATE_CON(17), 8, GFLAGS), -+ GATE(HCLK_CIF2, "hclk_cif2", "hclk_vio_pre", 0, -+ RV1108_CLKGATE_CON(17), 9, GFLAGS), -+ COMPOSITE_NODIV(SCLK_CIF2, "sclk_cif2", mux_sclk_cif2_src_p, 0, -+ RV1108_CLKSEL_CON(31), 4, 2, MFLAGS, -+ RV1108_CLKGATE_CON(7), 11, GFLAGS), -+ GATE(ACLK_CIF3, "aclk_cif3", "aclk_vio1_pre", 0, -+ RV1108_CLKGATE_CON(17), 10, GFLAGS), -+ GATE(HCLK_CIF3, "hclk_cif3", "hclk_vio_pre", 0, -+ RV1108_CLKGATE_CON(17), 11, GFLAGS), -+ COMPOSITE_NODIV(SCLK_CIF3, "sclk_cif3", mux_sclk_cif3_src_p, 0, -+ RV1108_CLKSEL_CON(31), 6, 2, MFLAGS, -+ RV1108_CLKGATE_CON(7), 12, GFLAGS), -+ GATE(0, "pclk_cif1to4", "pclk_vip", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(7), 8, GFLAGS), ++ /* pclk_perilp0 gates */ ++ GATE(PCLK_DCF, "pclk_dcf", "pclk_perilp0", 0, RK3399_CLKGATE_CON(23), 9, GFLAGS), + -+ /* PD_DSP_wrapper */ -+ COMPOSITE(SCLK_DSP, "sclk_dsp", mux_dsp_src_p, 0, -+ RV1108_CLKSEL_CON(42), 8, 2, MFLAGS, 0, 5, DFLAGS, -+ RV1108_CLKGATE_CON(9), 0, GFLAGS), -+ GATE(0, "clk_dsp_sys_wd", "sclk_dsp", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(16), 0, GFLAGS), -+ GATE(0, "clk_dsp_epp_wd", "sclk_dsp", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(16), 1, GFLAGS), -+ GATE(0, "clk_dsp_edp_wd", "sclk_dsp", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(16), 2, GFLAGS), -+ GATE(0, "clk_dsp_iop_wd", "sclk_dsp", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(16), 3, GFLAGS), -+ GATE(0, "clk_dsp_free", "sclk_dsp", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(16), 13, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_DSP_IOP, "sclk_dsp_iop", "sclk_dsp", 0, -+ RV1108_CLKSEL_CON(44), 0, 5, DFLAGS, -+ RV1108_CLKGATE_CON(9), 1, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_DSP_EPP, "sclk_dsp_epp", "sclk_dsp", 0, -+ RV1108_CLKSEL_CON(44), 8, 5, DFLAGS, -+ RV1108_CLKGATE_CON(9), 2, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_DSP_EDP, "sclk_dsp_edp", "sclk_dsp", 0, -+ RV1108_CLKSEL_CON(45), 0, 5, DFLAGS, -+ RV1108_CLKGATE_CON(9), 3, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_DSP_EDAP, "sclk_dsp_edap", "sclk_dsp", 0, -+ RV1108_CLKSEL_CON(45), 8, 5, DFLAGS, -+ RV1108_CLKGATE_CON(9), 4, GFLAGS), -+ GATE(0, "pclk_dsp_iop_niu", "sclk_dsp_iop", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(16), 4, GFLAGS), -+ GATE(0, "aclk_dsp_epp_niu", "sclk_dsp_epp", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(16), 5, GFLAGS), -+ GATE(0, "aclk_dsp_edp_niu", "sclk_dsp_edp", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(16), 6, GFLAGS), -+ GATE(0, "pclk_dsp_dbg_niu", "sclk_dsp", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(16), 7, GFLAGS), -+ GATE(0, "aclk_dsp_edap_niu", "sclk_dsp_edap", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(16), 14, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_DSP_PFM, "sclk_dsp_pfm", "sclk_dsp", 0, -+ RV1108_CLKSEL_CON(43), 0, 5, DFLAGS, -+ RV1108_CLKGATE_CON(9), 5, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_DSP_CFG, "pclk_dsp_cfg", "sclk_dsp", 0, -+ RV1108_CLKSEL_CON(43), 8, 5, DFLAGS, -+ RV1108_CLKGATE_CON(9), 6, GFLAGS), -+ GATE(0, "pclk_dsp_cfg_niu", "pclk_dsp_cfg", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(16), 8, GFLAGS), -+ GATE(0, "pclk_dsp_pfm_mon", "pclk_dsp_cfg", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(16), 9, GFLAGS), -+ GATE(0, "pclk_intc", "pclk_dsp_cfg", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(16), 10, GFLAGS), -+ GATE(0, "pclk_dsp_grf", "pclk_dsp_cfg", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(16), 11, GFLAGS), -+ GATE(0, "pclk_mailbox", "pclk_dsp_cfg", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(16), 12, GFLAGS), -+ GATE(0, "aclk_dsp_epp_perf", "sclk_dsp_epp", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(16), 15, GFLAGS), -+ GATE(0, "aclk_dsp_edp_perf", "sclk_dsp_edp", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(11), 8, GFLAGS), ++ /* crypto */ ++ COMPOSITE(SCLK_CRYPTO0, "clk_crypto0", mux_pll_src_cpll_gpll_ppll_p, 0, ++ RK3399_CLKSEL_CON(24), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(7), 7, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 4 -+ */ -+ COMPOSITE(0, "aclk_vio0_pre", mux_pll_src_4plls_p, CLK_IGNORE_UNUSED, -+ RV1108_CLKSEL_CON(28), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RV1108_CLKGATE_CON(6), 0, GFLAGS), -+ GATE(ACLK_VIO0, "aclk_vio0", "aclk_vio0_pre", 0, -+ RV1108_CLKGATE_CON(17), 0, GFLAGS), -+ COMPOSITE_NOMUX(0, "hclk_vio_pre", "aclk_vio0_pre", 0, -+ RV1108_CLKSEL_CON(29), 0, 5, DFLAGS, -+ RV1108_CLKGATE_CON(7), 2, GFLAGS), -+ GATE(HCLK_VIO, "hclk_vio", "hclk_vio_pre", 0, -+ RV1108_CLKGATE_CON(17), 2, GFLAGS), -+ COMPOSITE_NOMUX(0, "pclk_vio_pre", "aclk_vio0_pre", 0, -+ RV1108_CLKSEL_CON(29), 8, 5, DFLAGS, -+ RV1108_CLKGATE_CON(7), 3, GFLAGS), -+ GATE(PCLK_VIO, "pclk_vio", "pclk_vio_pre", 0, -+ RV1108_CLKGATE_CON(17), 3, GFLAGS), -+ COMPOSITE(0, "aclk_vio1_pre", mux_pll_src_4plls_p, CLK_IGNORE_UNUSED, -+ RV1108_CLKSEL_CON(28), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RV1108_CLKGATE_CON(6), 1, GFLAGS), -+ GATE(ACLK_VIO1, "aclk_vio1", "aclk_vio1_pre", 0, -+ RV1108_CLKGATE_CON(17), 1, GFLAGS), ++ COMPOSITE(SCLK_CRYPTO1, "clk_crypto1", mux_pll_src_cpll_gpll_ppll_p, 0, ++ RK3399_CLKSEL_CON(26), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(7), 8, GFLAGS), + -+ INVERTER(0, "pclk_vip", "ext_vip", -+ RV1108_CLKSEL_CON(31), 8, IFLAGS), -+ GATE(0, "pclk_isp_pre", "pclk_vip", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(7), 6, GFLAGS), -+ GATE(0, "pclk_isp", "pclk_isp_pre", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(18), 10, GFLAGS), -+ GATE(0, "dclk_hdmiphy_src_gpll", "gpll", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(6), 5, GFLAGS), -+ GATE(0, "dclk_hdmiphy_src_dpll", "dpll", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(6), 4, GFLAGS), -+ COMPOSITE_NOGATE(0, "dclk_hdmiphy_pre", mux_dclk_hdmiphy_pre_p, 0, -+ RV1108_CLKSEL_CON(32), 6, 1, MFLAGS, 8, 6, DFLAGS), -+ COMPOSITE_NOGATE(DCLK_VOP_SRC, "dclk_vop_src", mux_dclk_hdmiphy_pre_p, 0, -+ RV1108_CLKSEL_CON(32), 6, 1, MFLAGS, 0, 6, DFLAGS), -+ MUX(DCLK_HDMIPHY, "dclk_hdmiphy", mux_dclk_hdmiphy_p, CLK_SET_RATE_PARENT, -+ RV1108_CLKSEL_CON(32), 15, 1, MFLAGS), -+ MUX(DCLK_VOP, "dclk_vop", mux_dclk_vop_p, CLK_SET_RATE_PARENT, -+ RV1108_CLKSEL_CON(32), 7, 1, MFLAGS), -+ GATE(ACLK_VOP, "aclk_vop", "aclk_vio0_pre", 0, -+ RV1108_CLKGATE_CON(18), 0, GFLAGS), -+ GATE(HCLK_VOP, "hclk_vop", "hclk_vio_pre", 0, -+ RV1108_CLKGATE_CON(18), 1, GFLAGS), -+ GATE(ACLK_IEP, "aclk_iep", "aclk_vio0_pre", 0, -+ RV1108_CLKGATE_CON(18), 2, GFLAGS), -+ GATE(HCLK_IEP, "hclk_iep", "hclk_vio_pre", 0, -+ RV1108_CLKGATE_CON(18), 3, GFLAGS), ++ /* cm0s_perilp */ ++ GATE(0, "cpll_fclk_cm0s_src", "cpll", 0, ++ RK3399_CLKGATE_CON(7), 6, GFLAGS), ++ GATE(0, "gpll_fclk_cm0s_src", "gpll", 0, ++ RK3399_CLKGATE_CON(7), 5, GFLAGS), ++ COMPOSITE(FCLK_CM0S, "fclk_cm0s", mux_fclk_cm0s_p, 0, ++ RK3399_CLKSEL_CON(24), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(7), 9, GFLAGS), + -+ GATE(ACLK_RGA, "aclk_rga", "aclk_vio1_pre", 0, -+ RV1108_CLKGATE_CON(18), 4, GFLAGS), -+ GATE(HCLK_RGA, "hclk_rga", "hclk_vio_pre", 0, -+ RV1108_CLKGATE_CON(18), 5, GFLAGS), -+ COMPOSITE(SCLK_RGA, "sclk_rga", mux_pll_src_4plls_p, 0, -+ RV1108_CLKSEL_CON(33), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RV1108_CLKGATE_CON(6), 6, GFLAGS), ++ /* fclk_cm0s gates */ ++ GATE(SCLK_M0_PERILP, "sclk_m0_perilp", "fclk_cm0s", 0, RK3399_CLKGATE_CON(24), 8, GFLAGS), ++ GATE(HCLK_M0_PERILP, "hclk_m0_perilp", "fclk_cm0s", 0, RK3399_CLKGATE_CON(24), 9, GFLAGS), ++ GATE(DCLK_M0_PERILP, "dclk_m0_perilp", "fclk_cm0s", 0, RK3399_CLKGATE_CON(24), 10, GFLAGS), ++ GATE(SCLK_M0_PERILP_DEC, "clk_m0_perilp_dec", "fclk_cm0s", 0, RK3399_CLKGATE_CON(24), 11, GFLAGS), ++ GATE(HCLK_M0_PERILP_NOC, "hclk_m0_perilp_noc", "fclk_cm0s", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(25), 11, GFLAGS), + -+ COMPOSITE(SCLK_CVBS_HOST, "sclk_cvbs_host", mux_cvbs_src_p, 0, -+ RV1108_CLKSEL_CON(33), 13, 2, MFLAGS, 8, 5, DFLAGS, -+ RV1108_CLKGATE_CON(6), 7, GFLAGS), -+ FACTOR(0, "sclk_cvbs_27m", "sclk_cvbs_host", 0, 1, 2), ++ /* perilp1 */ ++ GATE(0, "cpll_hclk_perilp1_src", "cpll", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(8), 1, GFLAGS), ++ GATE(0, "gpll_hclk_perilp1_src", "gpll", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(8), 0, GFLAGS), ++ COMPOSITE_NOGATE(HCLK_PERILP1, "hclk_perilp1", mux_hclk_perilp1_p, CLK_IS_CRITICAL, ++ RK3399_CLKSEL_CON(25), 7, 1, MFLAGS, 0, 5, DFLAGS), ++ COMPOSITE_NOMUX(PCLK_PERILP1, "pclk_perilp1", "hclk_perilp1", CLK_IS_CRITICAL, ++ RK3399_CLKSEL_CON(25), 8, 3, DFLAGS, ++ RK3399_CLKGATE_CON(8), 2, GFLAGS), + -+ GATE(SCLK_HDMI_SFR, "sclk_hdmi_sfr", "xin24m", 0, -+ RV1108_CLKGATE_CON(6), 8, GFLAGS), ++ /* hclk_perilp1 gates */ ++ GATE(0, "hclk_perilp1_noc", "hclk_perilp1", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(25), 9, GFLAGS), ++ GATE(0, "hclk_sdio_noc", "hclk_perilp1", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(25), 12, GFLAGS), ++ GATE(HCLK_I2S0_8CH, "hclk_i2s0", "hclk_perilp1", 0, RK3399_CLKGATE_CON(34), 0, GFLAGS), ++ GATE(HCLK_I2S1_8CH, "hclk_i2s1", "hclk_perilp1", 0, RK3399_CLKGATE_CON(34), 1, GFLAGS), ++ GATE(HCLK_I2S2_8CH, "hclk_i2s2", "hclk_perilp1", 0, RK3399_CLKGATE_CON(34), 2, GFLAGS), ++ GATE(HCLK_SPDIF, "hclk_spdif", "hclk_perilp1", 0, RK3399_CLKGATE_CON(34), 3, GFLAGS), ++ GATE(HCLK_SDIO, "hclk_sdio", "hclk_perilp1", 0, RK3399_CLKGATE_CON(34), 4, GFLAGS), ++ GATE(PCLK_SPI5, "pclk_spi5", "hclk_perilp1", 0, RK3399_CLKGATE_CON(34), 5, GFLAGS), ++ GATE(0, "hclk_sdioaudio_noc", "hclk_perilp1", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(34), 6, GFLAGS), + -+ COMPOSITE(SCLK_HDMI_CEC, "sclk_hdmi_cec", mux_hdmi_cec_src_p, 0, -+ RV1108_CLKSEL_CON(34), 14, 2, MFLAGS, 0, 14, DFLAGS, -+ RV1108_CLKGATE_CON(6), 9, GFLAGS), -+ GATE(PCLK_MIPI_DSI, "pclk_mipi_dsi", "pclk_vio_pre", 0, -+ RV1108_CLKGATE_CON(18), 8, GFLAGS), -+ GATE(PCLK_HDMI_CTRL, "pclk_hdmi_ctrl", "pclk_vio_pre", 0, -+ RV1108_CLKGATE_CON(18), 9, GFLAGS), ++ /* pclk_perilp1 gates */ ++ GATE(PCLK_UART0, "pclk_uart0", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 0, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 1, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 2, GFLAGS), ++ GATE(PCLK_UART3, "pclk_uart3", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 3, GFLAGS), ++ GATE(PCLK_I2C7, "pclk_rki2c7", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 5, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_rki2c1", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 6, GFLAGS), ++ GATE(PCLK_I2C5, "pclk_rki2c5", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 7, GFLAGS), ++ GATE(PCLK_I2C6, "pclk_rki2c6", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 8, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_rki2c2", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 9, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_rki2c3", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 10, GFLAGS), ++ GATE(PCLK_MAILBOX0, "pclk_mailbox0", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 11, GFLAGS), ++ GATE(PCLK_SARADC, "pclk_saradc", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 12, GFLAGS), ++ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 13, GFLAGS), ++ GATE(PCLK_EFUSE1024NS, "pclk_efuse1024ns", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 14, GFLAGS), ++ GATE(PCLK_EFUSE1024S, "pclk_efuse1024s", "pclk_perilp1", 0, RK3399_CLKGATE_CON(22), 15, GFLAGS), ++ GATE(PCLK_SPI0, "pclk_spi0", "pclk_perilp1", 0, RK3399_CLKGATE_CON(23), 10, GFLAGS), ++ GATE(PCLK_SPI1, "pclk_spi1", "pclk_perilp1", 0, RK3399_CLKGATE_CON(23), 11, GFLAGS), ++ GATE(PCLK_SPI2, "pclk_spi2", "pclk_perilp1", 0, RK3399_CLKGATE_CON(23), 12, GFLAGS), ++ GATE(PCLK_SPI4, "pclk_spi4", "pclk_perilp1", 0, RK3399_CLKGATE_CON(23), 13, GFLAGS), ++ GATE(PCLK_PERIHP_GRF, "pclk_perilp_sgrf", "pclk_perilp1", 0, RK3399_CLKGATE_CON(24), 13, GFLAGS), ++ GATE(0, "pclk_perilp1_noc", "pclk_perilp1", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(25), 10, GFLAGS), + -+ GATE(ACLK_ISP, "aclk_isp", "aclk_vio1_pre", 0, -+ RV1108_CLKGATE_CON(18), 12, GFLAGS), -+ GATE(HCLK_ISP, "hclk_isp", "hclk_vio_pre", 0, -+ RV1108_CLKGATE_CON(18), 11, GFLAGS), -+ COMPOSITE(SCLK_ISP, "sclk_isp", mux_pll_src_4plls_p, 0, -+ RV1108_CLKSEL_CON(30), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RV1108_CLKGATE_CON(6), 3, GFLAGS), ++ /* saradc */ ++ COMPOSITE_NOMUX(SCLK_SARADC, "clk_saradc", "xin24m", 0, ++ RK3399_CLKSEL_CON(26), 8, 8, DFLAGS, ++ RK3399_CLKGATE_CON(9), 11, GFLAGS), + -+ GATE(0, "clk_dsiphy24m", "xin24m", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(9), 10, GFLAGS), -+ GATE(0, "pclk_vdacphy", "pclk_top_pre", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(14), 9, GFLAGS), -+ GATE(0, "pclk_mipi_dsiphy", "pclk_top_pre", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(14), 11, GFLAGS), -+ GATE(0, "pclk_mipi_csiphy", "pclk_top_pre", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(14), 12, GFLAGS), ++ /* tsadc */ ++ COMPOSITE(SCLK_TSADC, "clk_tsadc", mux_pll_p, 0, ++ RK3399_CLKSEL_CON(27), 15, 1, MFLAGS, 0, 10, DFLAGS, ++ RK3399_CLKGATE_CON(9), 10, GFLAGS), ++ ++ /* cif_testout */ ++ MUX(0, "clk_testout1_pll_src", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3399_CLKSEL_CON(38), 6, 2, MFLAGS), ++ COMPOSITE(SCLK_TESTCLKOUT1, "clk_testout1", mux_clk_testout1_p, 0, ++ RK3399_CLKSEL_CON(38), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(13), 14, GFLAGS), ++ ++ MUX(0, "clk_testout2_pll_src", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3399_CLKSEL_CON(38), 14, 2, MFLAGS), ++ COMPOSITE(SCLK_TESTCLKOUT2, "clk_testout2", mux_clk_testout2_p, 0, ++ RK3399_CLKSEL_CON(38), 13, 1, MFLAGS, 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(13), 15, GFLAGS), ++ ++ /* vio */ ++ COMPOSITE(ACLK_VIO, "aclk_vio", mux_pll_src_cpll_gpll_ppll_p, CLK_IGNORE_UNUSED, ++ RK3399_CLKSEL_CON(42), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(11), 0, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_VIO, "pclk_vio", "aclk_vio", CLK_IS_CRITICAL, ++ RK3399_CLKSEL_CON(43), 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(11), 1, GFLAGS), ++ ++ GATE(ACLK_VIO_NOC, "aclk_vio_noc", "aclk_vio", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(29), 0, GFLAGS), ++ ++ GATE(PCLK_MIPI_DSI0, "pclk_mipi_dsi0", "pclk_vio", 0, ++ RK3399_CLKGATE_CON(29), 1, GFLAGS), ++ GATE(PCLK_MIPI_DSI1, "pclk_mipi_dsi1", "pclk_vio", 0, ++ RK3399_CLKGATE_CON(29), 2, GFLAGS), ++ GATE(PCLK_VIO_GRF, "pclk_vio_grf", "pclk_vio", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(29), 12, GFLAGS), ++ ++ /* hdcp */ ++ COMPOSITE_NOGATE(ACLK_HDCP, "aclk_hdcp", mux_pll_src_cpll_gpll_ppll_p, 0, ++ RK3399_CLKSEL_CON(42), 14, 2, MFLAGS, 8, 5, DFLAGS), ++ COMPOSITE_NOMUX(HCLK_HDCP, "hclk_hdcp", "aclk_hdcp", 0, ++ RK3399_CLKSEL_CON(43), 5, 5, DFLAGS, ++ RK3399_CLKGATE_CON(11), 3, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_HDCP, "pclk_hdcp", "aclk_hdcp", 0, ++ RK3399_CLKSEL_CON(43), 10, 5, DFLAGS, ++ RK3399_CLKGATE_CON(11), 10, GFLAGS), ++ ++ GATE(ACLK_HDCP_NOC, "aclk_hdcp_noc", "aclk_hdcp", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(29), 4, GFLAGS), ++ GATE(ACLK_HDCP22, "aclk_hdcp22", "aclk_hdcp", 0, ++ RK3399_CLKGATE_CON(29), 10, GFLAGS), ++ ++ GATE(HCLK_HDCP_NOC, "hclk_hdcp_noc", "hclk_hdcp", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(29), 5, GFLAGS), ++ GATE(HCLK_HDCP22, "hclk_hdcp22", "hclk_hdcp", 0, ++ RK3399_CLKGATE_CON(29), 9, GFLAGS), ++ ++ GATE(PCLK_HDCP_NOC, "pclk_hdcp_noc", "pclk_hdcp", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(29), 3, GFLAGS), ++ GATE(PCLK_HDMI_CTRL, "pclk_hdmi_ctrl", "pclk_hdcp", 0, ++ RK3399_CLKGATE_CON(29), 6, GFLAGS), ++ GATE(PCLK_DP_CTRL, "pclk_dp_ctrl", "pclk_hdcp", 0, ++ RK3399_CLKGATE_CON(29), 7, GFLAGS), ++ GATE(PCLK_HDCP22, "pclk_hdcp22", "pclk_hdcp", 0, ++ RK3399_CLKGATE_CON(29), 8, GFLAGS), ++ GATE(PCLK_GASKET, "pclk_gasket", "pclk_hdcp", 0, ++ RK3399_CLKGATE_CON(29), 11, GFLAGS), ++ ++ /* edp */ ++ COMPOSITE(SCLK_DP_CORE, "clk_dp_core", mux_pll_src_npll_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(46), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(11), 8, GFLAGS), ++ ++ COMPOSITE(PCLK_EDP, "pclk_edp", mux_pll_src_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(44), 15, 1, MFLAGS, 8, 6, DFLAGS, ++ RK3399_CLKGATE_CON(11), 11, GFLAGS), ++ GATE(PCLK_EDP_NOC, "pclk_edp_noc", "pclk_edp", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(32), 12, GFLAGS), ++ GATE(PCLK_EDP_CTRL, "pclk_edp_ctrl", "pclk_edp", 0, ++ RK3399_CLKGATE_CON(32), 13, GFLAGS), ++ ++ /* hdmi */ ++ GATE(SCLK_HDMI_SFR, "clk_hdmi_sfr", "xin24m", 0, ++ RK3399_CLKGATE_CON(11), 6, GFLAGS), ++ ++ COMPOSITE(SCLK_HDMI_CEC, "clk_hdmi_cec", mux_pll_p, 0, ++ RK3399_CLKSEL_CON(45), 15, 1, MFLAGS, 0, 10, DFLAGS, ++ RK3399_CLKGATE_CON(11), 7, GFLAGS), ++ ++ /* vop0 */ ++ COMPOSITE(ACLK_VOP0_PRE, "aclk_vop0_pre", mux_pll_src_dmyvpll_cpll_gpll_npll_p, 0, ++ RK3399_CLKSEL_CON(47), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(10), 8, GFLAGS), ++ COMPOSITE_NOMUX(0, "hclk_vop0_pre", "aclk_vop0_pre", 0, ++ RK3399_CLKSEL_CON(47), 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(10), 9, GFLAGS), ++ ++ GATE(ACLK_VOP0, "aclk_vop0", "aclk_vop0_pre", 0, ++ RK3399_CLKGATE_CON(28), 3, GFLAGS), ++ GATE(ACLK_VOP0_NOC, "aclk_vop0_noc", "aclk_vop0_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(28), 1, GFLAGS), ++ ++ GATE(HCLK_VOP0, "hclk_vop0", "hclk_vop0_pre", 0, ++ RK3399_CLKGATE_CON(28), 2, GFLAGS), ++ GATE(HCLK_VOP0_NOC, "hclk_vop0_noc", "hclk_vop0_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(28), 0, GFLAGS), ++ ++#ifdef RK3399_TWO_PLL_FOR_VOP ++ COMPOSITE(DCLK_VOP0_DIV, "dclk_vop0_div", mux_pll_src_vpll_cpll_gpll_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3399_CLKSEL_CON(49), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3399_CLKGATE_CON(10), 12, GFLAGS), ++#else ++ COMPOSITE(DCLK_VOP0_DIV, "dclk_vop0_div", mux_pll_src_vpll_cpll_gpll_p, CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(49), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3399_CLKGATE_CON(10), 12, GFLAGS), ++#endif ++ ++ /* The VOP0 is main screen, it is able to re-set parent rate. */ ++ COMPOSITE_FRACMUX_NOGATE(0, "dclk_vop0_frac", "dclk_vop0_div", CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(106), 0, ++ &rk3399_dclk_vop0_fracmux), ++ ++ COMPOSITE(SCLK_VOP0_PWM, "clk_vop0_pwm", mux_pll_src_dmyvpll_cpll_gpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(51), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(10), 14, GFLAGS), ++ ++ /* vop1 */ ++ COMPOSITE(ACLK_VOP1_PRE, "aclk_vop1_pre", mux_pll_src_dmyvpll_cpll_gpll_npll_p, 0, ++ RK3399_CLKSEL_CON(48), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(10), 10, GFLAGS), ++ COMPOSITE_NOMUX(0, "hclk_vop1_pre", "aclk_vop1_pre", 0, ++ RK3399_CLKSEL_CON(48), 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(10), 11, GFLAGS), ++ ++ GATE(ACLK_VOP1, "aclk_vop1", "aclk_vop1_pre", 0, ++ RK3399_CLKGATE_CON(28), 7, GFLAGS), ++ GATE(ACLK_VOP1_NOC, "aclk_vop1_noc", "aclk_vop1_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(28), 5, GFLAGS), ++ ++ GATE(HCLK_VOP1, "hclk_vop1", "hclk_vop1_pre", 0, ++ RK3399_CLKGATE_CON(28), 6, GFLAGS), ++ GATE(HCLK_VOP1_NOC, "hclk_vop1_noc", "hclk_vop1_pre", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(28), 4, GFLAGS), ++ ++ /* The VOP1 is sub screen, it is note able to re-set parent rate. */ ++#ifdef RK3399_TWO_PLL_FOR_VOP ++ COMPOSITE(DCLK_VOP1_DIV, "dclk_vop1_div", mux_pll_src_vpll_cpll_gpll_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3399_CLKSEL_CON(50), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3399_CLKGATE_CON(10), 13, GFLAGS), ++#else ++ COMPOSITE(DCLK_VOP1_DIV, "dclk_vop1_div", mux_pll_src_dmyvpll_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(50), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3399_CLKGATE_CON(10), 13, GFLAGS), ++#endif ++ ++ COMPOSITE_FRACMUX_NOGATE(DCLK_VOP1_FRAC, "dclk_vop1_frac", "dclk_vop1_div", 0, ++ RK3399_CLKSEL_CON(107), 0, ++ &rk3399_dclk_vop1_fracmux), ++ ++ COMPOSITE(SCLK_VOP1_PWM, "clk_vop1_pwm", mux_pll_src_dmyvpll_cpll_gpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(52), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(10), 15, GFLAGS), ++ ++ /* isp */ ++ COMPOSITE(ACLK_ISP0, "aclk_isp0", mux_pll_src_cpll_gpll_ppll_p, 0, ++ RK3399_CLKSEL_CON(53), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(12), 8, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_ISP0, "hclk_isp0", "aclk_isp0", 0, ++ RK3399_CLKSEL_CON(53), 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(12), 9, GFLAGS), ++ ++ GATE(ACLK_ISP0_NOC, "aclk_isp0_noc", "aclk_isp0", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(27), 1, GFLAGS), ++ GATE(ACLK_ISP0_WRAPPER, "aclk_isp0_wrapper", "aclk_isp0", 0, ++ RK3399_CLKGATE_CON(27), 5, GFLAGS), ++ ++ GATE(HCLK_ISP0_NOC, "hclk_isp0_noc", "hclk_isp0", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(27), 0, GFLAGS), ++ GATE(HCLK_ISP0_WRAPPER, "hclk_isp0_wrapper", "hclk_isp0", 0, ++ RK3399_CLKGATE_CON(27), 4, GFLAGS), ++ ++ COMPOSITE(SCLK_ISP0, "clk_isp0", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3399_CLKSEL_CON(55), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(11), 4, GFLAGS), ++ ++ COMPOSITE(ACLK_ISP1, "aclk_isp1", mux_pll_src_cpll_gpll_ppll_p, 0, ++ RK3399_CLKSEL_CON(54), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(12), 10, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_ISP1, "hclk_isp1", "aclk_isp1", 0, ++ RK3399_CLKSEL_CON(54), 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(12), 11, GFLAGS), ++ ++ GATE(ACLK_ISP1_NOC, "aclk_isp1_noc", "aclk_isp1", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(27), 3, GFLAGS), ++ GATE(ACLK_ISP1_WRAPPER, "aclk_isp1_wrapper", "aclk_isp1", 0, ++ RK3399_CLKGATE_CON(27), 8, GFLAGS), ++ ++ GATE(HCLK_ISP1_NOC, "hclk_isp1_noc", "hclk_isp1", CLK_IS_CRITICAL, ++ RK3399_CLKGATE_CON(27), 2, GFLAGS), ++ GATE(HCLK_ISP1_WRAPPER, "hclk_isp1_wrapper", "hclk_isp1", 0, ++ RK3399_CLKGATE_CON(27), 7, GFLAGS), ++ ++ COMPOSITE(SCLK_ISP1, "clk_isp1", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3399_CLKSEL_CON(55), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(11), 5, GFLAGS), + + /* -+ * Clock-Architecture Diagram 5 ++ * We use pclkin_cifinv by default GRF_SOC_CON20[9] (GSC20_9) setting in system, ++ * so we ignore the mux and make clocks nodes as following, ++ * ++ * pclkin_cifinv --|-------\ ++ * |GSC20_9|-- pclkin_cifmux -- |G27_6| -- pclkin_isp1_wrapper ++ * pclkin_cif --|-------/ + */ ++ GATE(PCLK_ISP1_WRAPPER, "pclkin_isp1_wrapper", "pclkin_cif", 0, ++ RK3399_CLKGATE_CON(27), 6, GFLAGS), + -+ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), ++ /* cif */ ++ COMPOSITE_NODIV(SCLK_CIF_OUT_SRC, "clk_cifout_src", mux_pll_src_cpll_gpll_npll_p, 0, ++ RK3399_CLKSEL_CON(56), 6, 2, MFLAGS, ++ RK3399_CLKGATE_CON(10), 7, GFLAGS), + ++ COMPOSITE_NOGATE(SCLK_CIF_OUT, "clk_cifout", mux_clk_cif_p, CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(56), 5, 1, MFLAGS, 0, 5, DFLAGS), + -+ COMPOSITE(SCLK_I2S0_SRC, "i2s0_src", mux_pll_src_2plls_p, 0, -+ RV1108_CLKSEL_CON(5), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RV1108_CLKGATE_CON(2), 0, GFLAGS), -+ COMPOSITE_FRACMUX(0, "i2s0_frac", "i2s0_src", CLK_SET_RATE_PARENT, -+ RV1108_CLKSEL_CON(8), 0, -+ RV1108_CLKGATE_CON(2), 1, GFLAGS, -+ &rv1108_i2s0_fracmux), -+ GATE(SCLK_I2S0, "sclk_i2s0", "i2s0_pre", CLK_SET_RATE_PARENT, -+ RV1108_CLKGATE_CON(2), 2, GFLAGS), -+ COMPOSITE_NODIV(0, "i2s_out", mux_i2s_out_p, 0, -+ RV1108_CLKSEL_CON(5), 15, 1, MFLAGS, -+ RV1108_CLKGATE_CON(2), 3, GFLAGS), ++ /* gic */ ++ COMPOSITE(ACLK_GIC_PRE, "aclk_gic_pre", mux_pll_src_cpll_gpll_p, CLK_IS_CRITICAL, ++ RK3399_CLKSEL_CON(56), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK3399_CLKGATE_CON(12), 12, GFLAGS), + -+ COMPOSITE(SCLK_I2S1_SRC, "i2s1_src", mux_pll_src_2plls_p, 0, -+ RV1108_CLKSEL_CON(6), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RV1108_CLKGATE_CON(2), 4, GFLAGS), -+ COMPOSITE_FRACMUX(0, "i2s1_frac", "i2s1_src", CLK_SET_RATE_PARENT, -+ RK2928_CLKSEL_CON(9), 0, -+ RK2928_CLKGATE_CON(2), 5, GFLAGS, -+ &rv1108_i2s1_fracmux), -+ GATE(SCLK_I2S1, "sclk_i2s1", "i2s1_pre", CLK_SET_RATE_PARENT, -+ RV1108_CLKGATE_CON(2), 6, GFLAGS), ++ GATE(ACLK_GIC, "aclk_gic", "aclk_gic_pre", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(33), 0, GFLAGS), ++ GATE(ACLK_GIC_NOC, "aclk_gic_noc", "aclk_gic_pre", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(33), 1, GFLAGS), ++ GATE(ACLK_GIC_ADB400_CORE_L_2_GIC, "aclk_gic_adb400_core_l_2_gic", "aclk_gic_pre", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(33), 2, GFLAGS), ++ GATE(ACLK_GIC_ADB400_CORE_B_2_GIC, "aclk_gic_adb400_core_b_2_gic", "aclk_gic_pre", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(33), 3, GFLAGS), ++ GATE(ACLK_GIC_ADB400_GIC_2_CORE_L, "aclk_gic_adb400_gic_2_core_l", "aclk_gic_pre", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(33), 4, GFLAGS), ++ GATE(ACLK_GIC_ADB400_GIC_2_CORE_B, "aclk_gic_adb400_gic_2_core_b", "aclk_gic_pre", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(33), 5, GFLAGS), + -+ COMPOSITE(SCLK_I2S2_SRC, "i2s2_src", mux_pll_src_2plls_p, 0, -+ RV1108_CLKSEL_CON(7), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RV1108_CLKGATE_CON(3), 8, GFLAGS), -+ COMPOSITE_FRACMUX(0, "i2s2_frac", "i2s2_src", CLK_SET_RATE_PARENT, -+ RV1108_CLKSEL_CON(10), 0, -+ RV1108_CLKGATE_CON(2), 9, GFLAGS, -+ &rv1108_i2s2_fracmux), -+ GATE(SCLK_I2S2, "sclk_i2s2", "i2s2_pre", CLK_SET_RATE_PARENT, -+ RV1108_CLKGATE_CON(2), 10, GFLAGS), ++ /* alive */ ++ /* pclk_alive_gpll_src is controlled by PMUGRF_SOC_CON0[6] */ ++ DIV(PCLK_ALIVE, "pclk_alive", "gpll", 0, ++ RK3399_CLKSEL_CON(57), 0, 5, DFLAGS), + -+ /* PD_BUS */ -+ GATE(0, "aclk_bus_src_gpll", "gpll", CLK_IS_CRITICAL, -+ RV1108_CLKGATE_CON(1), 0, GFLAGS), -+ GATE(0, "aclk_bus_src_apll", "apll", CLK_IS_CRITICAL, -+ RV1108_CLKGATE_CON(1), 1, GFLAGS), -+ GATE(0, "aclk_bus_src_dpll", "dpll", CLK_IS_CRITICAL, -+ RV1108_CLKGATE_CON(1), 2, GFLAGS), -+ COMPOSITE_NOGATE(ACLK_PRE, "aclk_bus_pre", mux_aclk_bus_src_p, CLK_IS_CRITICAL, -+ RV1108_CLKSEL_CON(2), 8, 2, MFLAGS, 0, 5, DFLAGS), -+ COMPOSITE_NOMUX(HCLK_BUS, "hclk_bus_pre", "aclk_bus_pre", CLK_IS_CRITICAL, -+ RV1108_CLKSEL_CON(3), 0, 5, DFLAGS, -+ RV1108_CLKGATE_CON(1), 4, GFLAGS), -+ COMPOSITE_NOMUX(0, "pclk_bus_pre", "aclk_bus_pre", CLK_IS_CRITICAL, -+ RV1108_CLKSEL_CON(3), 8, 5, DFLAGS, -+ RV1108_CLKGATE_CON(1), 5, GFLAGS), -+ GATE(PCLK_BUS, "pclk_bus", "pclk_bus_pre", CLK_IS_CRITICAL, -+ RV1108_CLKGATE_CON(1), 6, GFLAGS), -+ GATE(0, "pclk_top_pre", "pclk_bus_pre", CLK_IS_CRITICAL, -+ RV1108_CLKGATE_CON(1), 7, GFLAGS), -+ GATE(0, "pclk_ddr_pre", "pclk_bus_pre", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(1), 8, GFLAGS), -+ GATE(SCLK_TIMER0, "clk_timer0", "xin24m", 0, -+ RV1108_CLKGATE_CON(1), 9, GFLAGS), -+ GATE(SCLK_TIMER1, "clk_timer1", "xin24m", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(1), 10, GFLAGS), -+ GATE(PCLK_TIMER, "pclk_timer", "pclk_bus_pre", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(13), 4, GFLAGS), ++ GATE(PCLK_USBPHY_MUX_G, "pclk_usbphy_mux_g", "pclk_alive", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(21), 4, GFLAGS), ++ GATE(PCLK_UPHY0_TCPHY_G, "pclk_uphy0_tcphy_g", "pclk_alive", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(21), 5, GFLAGS), ++ GATE(PCLK_UPHY0_TCPD_G, "pclk_uphy0_tcpd_g", "pclk_alive", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(21), 6, GFLAGS), ++ GATE(PCLK_UPHY1_TCPHY_G, "pclk_uphy1_tcphy_g", "pclk_alive", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(21), 8, GFLAGS), ++ GATE(PCLK_UPHY1_TCPD_G, "pclk_uphy1_tcpd_g", "pclk_alive", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(21), 9, GFLAGS), + -+ GATE(HCLK_I2S0_8CH, "hclk_i2s0_8ch", "hclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(12), 7, GFLAGS), -+ GATE(HCLK_I2S1_2CH, "hclk_i2s1_2ch", "hclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(12), 8, GFLAGS), -+ GATE(HCLK_I2S2_2CH, "hclk_i2s2_2ch", "hclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(12), 9, GFLAGS), ++ GATE(PCLK_GRF, "pclk_grf", "pclk_alive", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(31), 1, GFLAGS), ++ GATE(PCLK_INTR_ARB, "pclk_intr_arb", "pclk_alive", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(31), 2, GFLAGS), ++ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_alive", 0, RK3399_CLKGATE_CON(31), 3, GFLAGS), ++ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_alive", 0, RK3399_CLKGATE_CON(31), 4, GFLAGS), ++ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_alive", 0, RK3399_CLKGATE_CON(31), 5, GFLAGS), ++ GATE(PCLK_TIMER0, "pclk_timer0", "pclk_alive", 0, RK3399_CLKGATE_CON(31), 6, GFLAGS), ++ GATE(PCLK_TIMER1, "pclk_timer1", "pclk_alive", 0, RK3399_CLKGATE_CON(31), 7, GFLAGS), ++ GATE(PCLK_PMU_INTR_ARB, "pclk_pmu_intr_arb", "pclk_alive", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(31), 9, GFLAGS), ++ GATE(PCLK_SGRF, "pclk_sgrf", "pclk_alive", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(31), 10, GFLAGS), + -+ GATE(HCLK_CRYPTO_MST, "hclk_crypto_mst", "hclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(12), 10, GFLAGS), -+ GATE(HCLK_CRYPTO_SLV, "hclk_crypto_slv", "hclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(12), 11, GFLAGS), -+ COMPOSITE(SCLK_CRYPTO, "sclk_crypto", mux_pll_src_2plls_p, 0, -+ RV1108_CLKSEL_CON(11), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1108_CLKGATE_CON(2), 12, GFLAGS), ++ /* Watchdog pclk is controlled by RK3399 SECURE_GRF_SOC_CON3[8]. */ ++ SGRF_GATE(PCLK_WDT, "pclk_wdt", "pclk_alive"), + -+ COMPOSITE(SCLK_SPI, "sclk_spi", mux_pll_src_2plls_p, 0, -+ RV1108_CLKSEL_CON(11), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RV1108_CLKGATE_CON(3), 0, GFLAGS), -+ GATE(PCLK_SPI, "pclk_spi", "pclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(13), 5, GFLAGS), ++ GATE(SCLK_MIPIDPHY_REF, "clk_mipidphy_ref", "xin24m", 0, RK3399_CLKGATE_CON(11), 14, GFLAGS), ++ GATE(SCLK_DPHY_PLL, "clk_dphy_pll", "clk_mipidphy_ref", 0, RK3399_CLKGATE_CON(21), 0, GFLAGS), + -+ COMPOSITE(SCLK_UART0_SRC, "uart0_src", mux_pll_src_dpll_gpll_usb480m_p, CLK_IGNORE_UNUSED, -+ RV1108_CLKSEL_CON(13), 12, 2, MFLAGS, 0, 7, DFLAGS, -+ RV1108_CLKGATE_CON(3), 1, GFLAGS), -+ COMPOSITE(SCLK_UART1_SRC, "uart1_src", mux_pll_src_dpll_gpll_usb480m_p, CLK_IGNORE_UNUSED, -+ RV1108_CLKSEL_CON(14), 12, 2, MFLAGS, 0, 7, DFLAGS, -+ RV1108_CLKGATE_CON(3), 3, GFLAGS), -+ COMPOSITE(SCLK_UART2_SRC, "uart2_src", mux_pll_src_dpll_gpll_usb480m_p, CLK_IGNORE_UNUSED, -+ RV1108_CLKSEL_CON(15), 12, 2, MFLAGS, 0, 7, DFLAGS, -+ RV1108_CLKGATE_CON(3), 5, GFLAGS), ++ GATE(SCLK_MIPIDPHY_CFG, "clk_mipidphy_cfg", "xin24m", 0, RK3399_CLKGATE_CON(11), 15, GFLAGS), ++ GATE(SCLK_DPHY_TX0_CFG, "clk_dphy_tx0_cfg", "clk_mipidphy_cfg", 0, RK3399_CLKGATE_CON(21), 1, GFLAGS), ++ GATE(SCLK_DPHY_TX1RX1_CFG, "clk_dphy_tx1rx1_cfg", "clk_mipidphy_cfg", 0, RK3399_CLKGATE_CON(21), 2, GFLAGS), ++ GATE(SCLK_DPHY_RX0_CFG, "clk_dphy_rx0_cfg", "clk_mipidphy_cfg", 0, RK3399_CLKGATE_CON(21), 3, GFLAGS), + -+ COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT, -+ RV1108_CLKSEL_CON(16), 0, -+ RV1108_CLKGATE_CON(3), 2, GFLAGS, -+ &rv1108_uart0_fracmux), -+ COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_src", CLK_SET_RATE_PARENT, -+ RV1108_CLKSEL_CON(17), 0, -+ RV1108_CLKGATE_CON(3), 4, GFLAGS, -+ &rv1108_uart1_fracmux), -+ COMPOSITE_FRACMUX(0, "uart2_frac", "uart2_src", CLK_SET_RATE_PARENT, -+ RV1108_CLKSEL_CON(18), 0, -+ RV1108_CLKGATE_CON(3), 6, GFLAGS, -+ &rv1108_uart2_fracmux), -+ GATE(PCLK_UART0, "pclk_uart0", "pclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(13), 10, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(13), 11, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(13), 12, GFLAGS), ++ /* testout */ ++ MUX(0, "clk_test_pre", mux_pll_src_cpll_gpll_p, CLK_SET_RATE_PARENT, ++ RK3399_CLKSEL_CON(58), 7, 1, MFLAGS), ++ COMPOSITE_FRAC(0, "clk_test_frac", "clk_test_pre", 0, ++ RK3399_CLKSEL_CON(105), 0, ++ RK3399_CLKGATE_CON(13), 9, GFLAGS), + -+ COMPOSITE(SCLK_I2C1, "clk_i2c1", mux_pll_src_2plls_p, 0, -+ RV1108_CLKSEL_CON(19), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RV1108_CLKGATE_CON(3), 7, GFLAGS), -+ COMPOSITE(SCLK_I2C2, "clk_i2c2", mux_pll_src_2plls_p, 0, -+ RV1108_CLKSEL_CON(20), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RV1108_CLKGATE_CON(3), 8, GFLAGS), -+ COMPOSITE(SCLK_I2C3, "clk_i2c3", mux_pll_src_2plls_p, 0, -+ RV1108_CLKSEL_CON(20), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RV1108_CLKGATE_CON(3), 9, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(13), 0, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(13), 1, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(13), 2, GFLAGS), -+ COMPOSITE(SCLK_PWM, "clk_pwm", mux_pll_src_2plls_p, 0, -+ RV1108_CLKSEL_CON(12), 15, 2, MFLAGS, 8, 7, DFLAGS, -+ RV1108_CLKGATE_CON(3), 10, GFLAGS), -+ GATE(PCLK_PWM, "pclk_pwm", "pclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(13), 6, GFLAGS), -+ GATE(PCLK_WDT, "pclk_wdt", "pclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(13), 3, GFLAGS), -+ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(13), 7, GFLAGS), -+ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(13), 8, GFLAGS), -+ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(13), 9, GFLAGS), ++ DIV(0, "clk_test_24m", "xin24m", 0, ++ RK3399_CLKSEL_CON(57), 6, 10, DFLAGS), + -+ GATE(0, "pclk_grf", "pclk_bus_pre", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(14), 0, GFLAGS), -+ GATE(PCLK_EFUSE0, "pclk_efuse0", "pclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(12), 12, GFLAGS), -+ GATE(PCLK_EFUSE1, "pclk_efuse1", "pclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(12), 13, GFLAGS), -+ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(13), 13, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_TSADC, "sclk_tsadc", "xin24m", 0, -+ RV1108_CLKSEL_CON(21), 0, 10, DFLAGS, -+ RV1108_CLKGATE_CON(3), 11, GFLAGS), -+ GATE(PCLK_SARADC, "pclk_saradc", "pclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(13), 14, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_SARADC, "sclk_saradc", "xin24m", 0, -+ RV1108_CLKSEL_CON(22), 0, 10, DFLAGS, -+ RV1108_CLKGATE_CON(3), 12, GFLAGS), ++ /* spi */ ++ COMPOSITE(SCLK_SPI0, "clk_spi0", mux_pll_src_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(59), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3399_CLKGATE_CON(9), 12, GFLAGS), + -+ GATE(ACLK_DMAC, "aclk_dmac", "aclk_bus_pre", 0, -+ RV1108_CLKGATE_CON(12), 2, GFLAGS), -+ GATE(0, "hclk_rom", "hclk_bus_pre", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(12), 3, GFLAGS), -+ GATE(0, "aclk_intmem", "aclk_bus_pre", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(12), 1, GFLAGS), ++ COMPOSITE(SCLK_SPI1, "clk_spi1", mux_pll_src_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(59), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK3399_CLKGATE_CON(9), 13, GFLAGS), + -+ /* PD_DDR */ -+ GATE(0, "apll_ddr", "apll", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(0), 8, GFLAGS), -+ GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(0), 9, GFLAGS), -+ GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(0), 10, GFLAGS), -+ COMPOSITE_NOGATE(0, "clk_ddrphy_src", mux_ddrphy_p, CLK_IS_CRITICAL, -+ RV1108_CLKSEL_CON(4), 8, 2, MFLAGS, 0, 3, -+ DFLAGS | CLK_DIVIDER_POWER_OF_TWO), -+ FACTOR(0, "clk_ddr", "clk_ddrphy_src", 0, 1, 2), -+ GATE(0, "clk_ddrphy4x", "clk_ddr", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(10), 9, GFLAGS), -+ GATE(0, "pclk_ddrupctl", "pclk_ddr_pre", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(12), 4, GFLAGS), -+ GATE(0, "nclk_ddrupctl", "clk_ddr", CLK_IS_CRITICAL, -+ RV1108_CLKGATE_CON(12), 5, GFLAGS), -+ GATE(0, "pclk_ddrmon", "pclk_ddr_pre", CLK_IS_CRITICAL, -+ RV1108_CLKGATE_CON(12), 6, GFLAGS), -+ GATE(0, "timer_clk", "xin24m", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(0), 11, GFLAGS), -+ GATE(0, "pclk_mschniu", "pclk_ddr_pre", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(14), 2, GFLAGS), -+ GATE(0, "pclk_ddrphy", "pclk_ddr_pre", CLK_IGNORE_UNUSED, -+ RV1108_CLKGATE_CON(14), 4, GFLAGS), ++ COMPOSITE(SCLK_SPI2, "clk_spi2", mux_pll_src_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(60), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3399_CLKGATE_CON(9), 14, GFLAGS), ++ ++ COMPOSITE(SCLK_SPI4, "clk_spi4", mux_pll_src_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(60), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK3399_CLKGATE_CON(9), 15, GFLAGS), ++ ++ COMPOSITE(SCLK_SPI5, "clk_spi5", mux_pll_src_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(58), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK3399_CLKGATE_CON(13), 13, GFLAGS), ++ ++ /* i2c */ ++ COMPOSITE(SCLK_I2C1, "clk_i2c1", mux_pll_src_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(61), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3399_CLKGATE_CON(10), 0, GFLAGS), + ++ COMPOSITE(SCLK_I2C2, "clk_i2c2", mux_pll_src_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(62), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3399_CLKGATE_CON(10), 2, GFLAGS), ++ ++ COMPOSITE(SCLK_I2C3, "clk_i2c3", mux_pll_src_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(63), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3399_CLKGATE_CON(10), 4, GFLAGS), ++ ++ COMPOSITE(SCLK_I2C5, "clk_i2c5", mux_pll_src_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(61), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK3399_CLKGATE_CON(10), 1, GFLAGS), ++ ++ COMPOSITE(SCLK_I2C6, "clk_i2c6", mux_pll_src_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(62), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK3399_CLKGATE_CON(10), 3, GFLAGS), ++ ++ COMPOSITE(SCLK_I2C7, "clk_i2c7", mux_pll_src_cpll_gpll_p, 0, ++ RK3399_CLKSEL_CON(63), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK3399_CLKGATE_CON(10), 5, GFLAGS), ++ ++ /* timer */ ++ GATE(SCLK_TIMER00, "clk_timer00", "xin24m", 0, RK3399_CLKGATE_CON(26), 0, GFLAGS), ++ GATE(SCLK_TIMER01, "clk_timer01", "xin24m", 0, RK3399_CLKGATE_CON(26), 1, GFLAGS), ++ GATE(SCLK_TIMER02, "clk_timer02", "xin24m", 0, RK3399_CLKGATE_CON(26), 2, GFLAGS), ++ GATE(SCLK_TIMER03, "clk_timer03", "xin24m", 0, RK3399_CLKGATE_CON(26), 3, GFLAGS), ++ GATE(SCLK_TIMER04, "clk_timer04", "xin24m", 0, RK3399_CLKGATE_CON(26), 4, GFLAGS), ++ GATE(SCLK_TIMER05, "clk_timer05", "xin24m", 0, RK3399_CLKGATE_CON(26), 5, GFLAGS), ++ GATE(SCLK_TIMER06, "clk_timer06", "xin24m", 0, RK3399_CLKGATE_CON(26), 6, GFLAGS), ++ GATE(SCLK_TIMER07, "clk_timer07", "xin24m", 0, RK3399_CLKGATE_CON(26), 7, GFLAGS), ++ GATE(SCLK_TIMER08, "clk_timer08", "xin24m", 0, RK3399_CLKGATE_CON(26), 8, GFLAGS), ++ GATE(SCLK_TIMER09, "clk_timer09", "xin24m", 0, RK3399_CLKGATE_CON(26), 9, GFLAGS), ++ GATE(SCLK_TIMER10, "clk_timer10", "xin24m", 0, RK3399_CLKGATE_CON(26), 10, GFLAGS), ++ GATE(SCLK_TIMER11, "clk_timer11", "xin24m", 0, RK3399_CLKGATE_CON(26), 11, GFLAGS), ++ ++ /* clk_test */ ++ /* clk_test_pre is controlled by CRU_MISC_CON[3] */ ++ COMPOSITE_NOMUX(0, "clk_test", "clk_test_pre", CLK_IGNORE_UNUSED, ++ RK3399_CLKSEL_CON(58), 0, 5, DFLAGS, ++ RK3399_CLKGATE_CON(13), 11, GFLAGS), ++ ++ /* ddrc */ ++ GATE(0, "clk_ddrc_lpll_src", "lpll", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(3), ++ 0, GFLAGS), ++ GATE(0, "clk_ddrc_bpll_src", "bpll", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(3), ++ 1, GFLAGS), ++ GATE(0, "clk_ddrc_dpll_src", "dpll", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(3), ++ 2, GFLAGS), ++ GATE(0, "clk_ddrc_gpll_src", "gpll", CLK_IS_CRITICAL, RK3399_CLKGATE_CON(3), ++ 3, GFLAGS), ++ COMPOSITE_DDRCLK(SCLK_DDRC, "sclk_ddrc", mux_ddrclk_p, 0, ++ RK3399_CLKSEL_CON(6), 4, 2, 0, 0, ROCKCHIP_DDRCLK_SIP), ++}; ++ ++static struct rockchip_clk_branch rk3399_clk_pmu_branches[] __initdata = { + /* -+ * Clock-Architecture Diagram 6 ++ * PMU CRU Clock-Architecture + */ + -+ /* PD_PERI */ -+ COMPOSITE_NOMUX(0, "pclk_periph_pre", "gpll", CLK_IS_CRITICAL, -+ RV1108_CLKSEL_CON(23), 10, 5, DFLAGS, -+ RV1108_CLKGATE_CON(4), 5, GFLAGS), -+ GATE(PCLK_PERI, "pclk_periph", "pclk_periph_pre", CLK_IS_CRITICAL, -+ RV1108_CLKGATE_CON(15), 13, GFLAGS), -+ COMPOSITE_NOMUX(0, "hclk_periph_pre", "gpll", CLK_IS_CRITICAL, -+ RV1108_CLKSEL_CON(23), 5, 5, DFLAGS, -+ RV1108_CLKGATE_CON(4), 4, GFLAGS), -+ GATE(HCLK_PERI, "hclk_periph", "hclk_periph_pre", CLK_IS_CRITICAL, -+ RV1108_CLKGATE_CON(15), 12, GFLAGS), ++ GATE(0, "fclk_cm0s_pmu_ppll_src", "ppll", CLK_IS_CRITICAL, ++ RK3399_PMU_CLKGATE_CON(0), 1, GFLAGS), + -+ GATE(0, "aclk_peri_src_dpll", "dpll", CLK_IS_CRITICAL, -+ RV1108_CLKGATE_CON(4), 1, GFLAGS), -+ GATE(0, "aclk_peri_src_gpll", "gpll", CLK_IS_CRITICAL, -+ RV1108_CLKGATE_CON(4), 2, GFLAGS), -+ COMPOSITE(ACLK_PERI, "aclk_periph", mux_aclk_peri_src_p, CLK_IS_CRITICAL, -+ RV1108_CLKSEL_CON(23), 15, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1108_CLKGATE_CON(15), 11, GFLAGS), ++ COMPOSITE_NOGATE(FCLK_CM0S_SRC_PMU, "fclk_cm0s_src_pmu", mux_fclk_cm0s_pmu_ppll_p, CLK_IS_CRITICAL, ++ RK3399_PMU_CLKSEL_CON(0), 15, 1, MFLAGS, 8, 5, DFLAGS), + -+ COMPOSITE(SCLK_SDMMC, "sclk_sdmmc", mux_mmc_src_p, 0, -+ RV1108_CLKSEL_CON(25), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RV1108_CLKGATE_CON(5), 0, GFLAGS), ++ COMPOSITE(SCLK_SPI3_PMU, "clk_spi3_pmu", mux_24m_ppll_p, 0, ++ RK3399_PMU_CLKSEL_CON(1), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3399_PMU_CLKGATE_CON(0), 2, GFLAGS), + -+ COMPOSITE_NODIV(0, "sclk_sdio_src", mux_mmc_src_p, 0, -+ RV1108_CLKSEL_CON(25), 10, 2, MFLAGS, -+ RV1108_CLKGATE_CON(5), 2, GFLAGS), -+ DIV(SCLK_SDIO, "sclk_sdio", "sclk_sdio_src", 0, -+ RV1108_CLKSEL_CON(26), 0, 8, DFLAGS), ++ COMPOSITE(0, "clk_wifi_div", mux_ppll_24m_p, CLK_IGNORE_UNUSED, ++ RK3399_PMU_CLKSEL_CON(1), 13, 1, MFLAGS, 8, 5, DFLAGS, ++ RK3399_PMU_CLKGATE_CON(0), 8, GFLAGS), + -+ COMPOSITE_NODIV(0, "sclk_emmc_src", mux_mmc_src_p, 0, -+ RV1108_CLKSEL_CON(25), 12, 2, MFLAGS, -+ RV1108_CLKGATE_CON(5), 1, GFLAGS), -+ DIV(SCLK_EMMC, "sclk_emmc", "sclk_emmc_src", 0, -+ RK2928_CLKSEL_CON(26), 8, 8, DFLAGS), -+ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 0, GFLAGS), -+ GATE(HCLK_SDIO, "hclk_sdio", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 1, GFLAGS), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 2, GFLAGS), ++ COMPOSITE_FRACMUX_NOGATE(0, "clk_wifi_frac", "clk_wifi_div", CLK_SET_RATE_PARENT, ++ RK3399_PMU_CLKSEL_CON(7), 0, ++ &rk3399_pmuclk_wifi_fracmux), + -+ COMPOSITE(SCLK_NANDC, "sclk_nandc", mux_pll_src_2plls_p, 0, -+ RV1108_CLKSEL_CON(27), 14, 1, MFLAGS, 8, 5, DFLAGS, -+ RV1108_CLKGATE_CON(5), 3, GFLAGS), -+ GATE(HCLK_NANDC, "hclk_nandc", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 3, GFLAGS), ++ MUX(0, "clk_timer_src_pmu", mux_pll_p, CLK_IGNORE_UNUSED, ++ RK3399_PMU_CLKSEL_CON(1), 15, 1, MFLAGS), + -+ GATE(HCLK_HOST0, "hclk_host0", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 6, GFLAGS), -+ GATE(0, "hclk_host0_arb", "hclk_periph", CLK_IGNORE_UNUSED, RV1108_CLKGATE_CON(15), 7, GFLAGS), -+ GATE(HCLK_OTG, "hclk_otg", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 8, GFLAGS), -+ GATE(0, "hclk_otg_pmu", "hclk_periph", CLK_IGNORE_UNUSED, RV1108_CLKGATE_CON(15), 9, GFLAGS), -+ GATE(SCLK_USBPHY, "clk_usbphy", "xin24m", CLK_IGNORE_UNUSED, RV1108_CLKGATE_CON(5), 5, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_I2C0_PMU, "clk_i2c0_pmu", "ppll", 0, ++ RK3399_PMU_CLKSEL_CON(2), 0, 7, DFLAGS, ++ RK3399_PMU_CLKGATE_CON(0), 9, GFLAGS), + -+ COMPOSITE(SCLK_SFC, "sclk_sfc", mux_pll_src_2plls_p, 0, -+ RV1108_CLKSEL_CON(27), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RV1108_CLKGATE_CON(5), 4, GFLAGS), -+ GATE(HCLK_SFC, "hclk_sfc", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 10, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_I2C4_PMU, "clk_i2c4_pmu", "ppll", 0, ++ RK3399_PMU_CLKSEL_CON(3), 0, 7, DFLAGS, ++ RK3399_PMU_CLKGATE_CON(0), 10, GFLAGS), + -+ COMPOSITE(SCLK_MAC_PRE, "sclk_mac_pre", mux_pll_src_apll_gpll_p, 0, -+ RV1108_CLKSEL_CON(24), 12, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1108_CLKGATE_CON(4), 10, GFLAGS), -+ MUX(SCLK_MAC, "sclk_mac", mux_sclk_mac_p, CLK_SET_RATE_PARENT, -+ RV1108_CLKSEL_CON(24), 8, 1, MFLAGS), -+ GATE(SCLK_MAC_RX, "sclk_mac_rx", "sclk_mac", 0, RV1108_CLKGATE_CON(4), 8, GFLAGS), -+ GATE(SCLK_MAC_REF, "sclk_mac_ref", "sclk_mac", 0, RV1108_CLKGATE_CON(4), 6, GFLAGS), -+ GATE(SCLK_MAC_REFOUT, "sclk_mac_refout", "sclk_mac", 0, RV1108_CLKGATE_CON(4), 7, GFLAGS), -+ GATE(ACLK_GMAC, "aclk_gmac", "aclk_periph", 0, RV1108_CLKGATE_CON(15), 4, GFLAGS), -+ GATE(PCLK_GMAC, "pclk_gmac", "pclk_periph", 0, RV1108_CLKGATE_CON(15), 5, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_I2C8_PMU, "clk_i2c8_pmu", "ppll", 0, ++ RK3399_PMU_CLKSEL_CON(2), 8, 7, DFLAGS, ++ RK3399_PMU_CLKGATE_CON(0), 11, GFLAGS), + -+ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "sclk_sdmmc", RV1108_SDMMC_CON0, 1), -+ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RV1108_SDMMC_CON1, 1), ++ DIV(0, "clk_32k_suspend_pmu", "xin24m", CLK_IGNORE_UNUSED, ++ RK3399_PMU_CLKSEL_CON(4), 0, 10, DFLAGS), ++ MUX(0, "clk_testout_2io", mux_clk_testout2_2io_p, CLK_IGNORE_UNUSED, ++ RK3399_PMU_CLKSEL_CON(4), 15, 1, MFLAGS), + -+ MMC(SCLK_SDIO_DRV, "sdio_drv", "sclk_sdio", RV1108_SDIO_CON0, 1), -+ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "sclk_sdio", RV1108_SDIO_CON1, 1), ++ MUX(SCLK_UART4_SRC, "clk_uart4_src", mux_24m_ppll_p, CLK_SET_RATE_NO_REPARENT, ++ RK3399_PMU_CLKSEL_CON(5), 10, 1, MFLAGS), + -+ MMC(SCLK_EMMC_DRV, "emmc_drv", "sclk_emmc", RV1108_EMMC_CON0, 1), -+ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "sclk_emmc", RV1108_EMMC_CON1, 1), ++ COMPOSITE_NOMUX(0, "clk_uart4_div", "clk_uart4_src", CLK_SET_RATE_PARENT, ++ RK3399_PMU_CLKSEL_CON(5), 0, 7, DFLAGS, ++ RK3399_PMU_CLKGATE_CON(0), 5, GFLAGS), ++ ++ COMPOSITE_FRACMUX(0, "clk_uart4_frac", "clk_uart4_div", CLK_SET_RATE_PARENT, ++ RK3399_PMU_CLKSEL_CON(6), 0, ++ RK3399_PMU_CLKGATE_CON(0), 6, GFLAGS, ++ &rk3399_uart4_pmu_fracmux), ++ ++ DIV(PCLK_SRC_PMU, "pclk_pmu_src", "ppll", CLK_IS_CRITICAL, ++ RK3399_PMU_CLKSEL_CON(0), 0, 5, DFLAGS), ++ ++ /* pmu clock gates */ ++ GATE(SCLK_TIMER12_PMU, "clk_timer0_pmu", "clk_timer_src_pmu", 0, RK3399_PMU_CLKGATE_CON(0), 3, GFLAGS), ++ GATE(SCLK_TIMER13_PMU, "clk_timer1_pmu", "clk_timer_src_pmu", 0, RK3399_PMU_CLKGATE_CON(0), 4, GFLAGS), ++ ++ GATE(SCLK_PVTM_PMU, "clk_pvtm_pmu", "xin24m", 0, RK3399_PMU_CLKGATE_CON(0), 7, GFLAGS), ++ ++ GATE(PCLK_PMU, "pclk_pmu", "pclk_pmu_src", CLK_IGNORE_UNUSED, RK3399_PMU_CLKGATE_CON(1), 0, GFLAGS), ++ GATE(PCLK_PMUGRF_PMU, "pclk_pmugrf_pmu", "pclk_pmu_src", CLK_IGNORE_UNUSED, RK3399_PMU_CLKGATE_CON(1), 1, GFLAGS), ++ GATE(PCLK_INTMEM1_PMU, "pclk_intmem1_pmu", "pclk_pmu_src", CLK_IGNORE_UNUSED, RK3399_PMU_CLKGATE_CON(1), 2, GFLAGS), ++ GATE(PCLK_GPIO0_PMU, "pclk_gpio0_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 3, GFLAGS), ++ GATE(PCLK_GPIO1_PMU, "pclk_gpio1_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 4, GFLAGS), ++ GATE(PCLK_SGRF_PMU, "pclk_sgrf_pmu", "pclk_pmu_src", CLK_IGNORE_UNUSED, RK3399_PMU_CLKGATE_CON(1), 5, GFLAGS), ++ GATE(PCLK_NOC_PMU, "pclk_noc_pmu", "pclk_pmu_src", CLK_IS_CRITICAL, RK3399_PMU_CLKGATE_CON(1), 6, GFLAGS), ++ GATE(PCLK_I2C0_PMU, "pclk_i2c0_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 7, GFLAGS), ++ GATE(PCLK_I2C4_PMU, "pclk_i2c4_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 8, GFLAGS), ++ GATE(PCLK_I2C8_PMU, "pclk_i2c8_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 9, GFLAGS), ++ GATE(PCLK_RKPWM_PMU, "pclk_rkpwm_pmu", "pclk_pmu_src", CLK_IS_CRITICAL, RK3399_PMU_CLKGATE_CON(1), 10, GFLAGS), ++ GATE(PCLK_SPI3_PMU, "pclk_spi3_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 11, GFLAGS), ++ GATE(PCLK_TIMER_PMU, "pclk_timer_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 12, GFLAGS), ++ GATE(PCLK_MAILBOX_PMU, "pclk_mailbox_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 13, GFLAGS), ++ GATE(PCLK_UART4_PMU, "pclk_uart4_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 14, GFLAGS), ++ GATE(PCLK_WDT_M0_PMU, "pclk_wdt_m0_pmu", "pclk_pmu_src", 0, RK3399_PMU_CLKGATE_CON(1), 15, GFLAGS), ++ ++ GATE(FCLK_CM0S_PMU, "fclk_cm0s_pmu", "fclk_cm0s_src_pmu", 0, RK3399_PMU_CLKGATE_CON(2), 0, GFLAGS), ++ GATE(SCLK_CM0S_PMU, "sclk_cm0s_pmu", "fclk_cm0s_src_pmu", 0, RK3399_PMU_CLKGATE_CON(2), 1, GFLAGS), ++ GATE(HCLK_CM0S_PMU, "hclk_cm0s_pmu", "fclk_cm0s_src_pmu", 0, RK3399_PMU_CLKGATE_CON(2), 2, GFLAGS), ++ GATE(DCLK_CM0S_PMU, "dclk_cm0s_pmu", "fclk_cm0s_src_pmu", 0, RK3399_PMU_CLKGATE_CON(2), 3, GFLAGS), ++ GATE(HCLK_NOC_PMU, "hclk_noc_pmu", "fclk_cm0s_src_pmu", CLK_IS_CRITICAL, RK3399_PMU_CLKGATE_CON(2), 5, GFLAGS), +}; + -+static void __iomem *rv1108_cru_base; ++static void __iomem *rk3399_cru_base; ++static void __iomem *rk3399_pmucru_base; + -+static void rv1108_dump_cru(void) ++void rk3399_dump_cru(void) +{ -+ if (rv1108_cru_base) { ++ if (rk3399_cru_base) { + pr_warn("CRU:\n"); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rv1108_cru_base, -+ 0x1f8, false); ++ 32, 4, rk3399_cru_base, ++ 0x594, false); ++ } ++ if (rk3399_pmucru_base) { ++ pr_warn("PMU CRU:\n"); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rk3399_pmucru_base, ++ 0x134, false); + } +} ++EXPORT_SYMBOL_GPL(rk3399_dump_cru); + -+static void __init rv1108_clk_init(struct device_node *np) ++static int rk3399_clk_panic(struct notifier_block *this, ++ unsigned long ev, void *ptr) ++{ ++ rk3399_dump_cru(); ++ return NOTIFY_DONE; ++} ++ ++static struct notifier_block rk3399_clk_panic_block = { ++ .notifier_call = rk3399_clk_panic, ++}; ++ ++static void __init rk3399_clk_init(struct device_node *np) +{ + struct rockchip_clk_provider *ctx; + void __iomem *reg_base; @@ -61981,6 +59674,8 @@ index 000000000..0a93b9333 + return; + } + ++ rk3399_cru_base = reg_base; ++ + ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); + if (IS_ERR(ctx)) { + pr_err("%s: rockchip clk init failed\n", __func__); @@ -61989,7454 +59684,8968 @@ index 000000000..0a93b9333 + } + clks = ctx->clk_data.clks; + -+ rockchip_clk_register_plls(ctx, rv1108_pll_clks, -+ ARRAY_SIZE(rv1108_pll_clks), -+ RV1108_GRF_SOC_STATUS0); -+ rockchip_clk_register_branches(ctx, rv1108_clk_branches, -+ ARRAY_SIZE(rv1108_clk_branches)); ++ rockchip_clk_register_plls(ctx, rk3399_pll_clks, ++ ARRAY_SIZE(rk3399_pll_clks), -1); + -+ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", -+ 3, clks[PLL_APLL], clks[PLL_GPLL], -+ &rv1108_cpuclk_data, rv1108_cpuclk_rates, -+ ARRAY_SIZE(rv1108_cpuclk_rates)); ++ rockchip_clk_register_branches(ctx, rk3399_clk_branches, ++ ARRAY_SIZE(rk3399_clk_branches)); + -+ rockchip_register_softrst(np, 13, reg_base + RV1108_SOFTRST_CON(0), ++ rockchip_clk_register_armclk(ctx, ARMCLKL, "armclkl", ++ 4, clks[PLL_APLLL], clks[PLL_GPLL], ++ &rk3399_cpuclkl_data, rk3399_cpuclkl_rates, ++ ARRAY_SIZE(rk3399_cpuclkl_rates)); ++ ++ rockchip_clk_register_armclk(ctx, ARMCLKB, "armclkb", ++ 4, clks[PLL_APLLB], clks[PLL_GPLL], ++ &rk3399_cpuclkb_data, rk3399_cpuclkb_rates, ++ ARRAY_SIZE(rk3399_cpuclkb_rates)); ++ ++ rockchip_register_softrst(np, 21, reg_base + RK3399_SOFTRST_CON(0), + ROCKCHIP_SOFTRST_HIWORD_MASK); + -+ rockchip_register_restart_notifier(ctx, RV1108_GLB_SRST_FST, NULL); ++ rockchip_register_restart_notifier(ctx, RK3399_GLB_SRST_FST, NULL); + + rockchip_clk_of_add_provider(np, ctx); -+ -+ if (!rk_dump_cru) { -+ rv1108_cru_base = reg_base; -+ rk_dump_cru = rv1108_dump_cru; -+ } +} -+CLK_OF_DECLARE(rv1108_cru, "rockchip,rv1108-cru", rv1108_clk_init); ++CLK_OF_DECLARE(rk3399_cru, "rockchip,rk3399-cru", rk3399_clk_init); + -+static int __init clk_rv1108_probe(struct platform_device *pdev) ++static void __init rk3399_pmu_clk_init(struct device_node *np) +{ -+ struct device_node *np = pdev->dev.of_node; ++ struct rockchip_clk_provider *ctx; ++ void __iomem *reg_base; + -+ rv1108_clk_init(np); ++ reg_base = of_iomap(np, 0); ++ if (!reg_base) { ++ pr_err("%s: could not map cru pmu region\n", __func__); ++ return; ++ } + -+ return 0; ++ rk3399_pmucru_base = reg_base; ++ ++ ctx = rockchip_clk_init(np, reg_base, CLKPMU_NR_CLKS); ++ if (IS_ERR(ctx)) { ++ pr_err("%s: rockchip pmu clk init failed\n", __func__); ++ iounmap(reg_base); ++ return; ++ } ++ ++ rockchip_clk_register_plls(ctx, rk3399_pmu_pll_clks, ++ ARRAY_SIZE(rk3399_pmu_pll_clks), -1); ++ ++ rockchip_clk_register_branches(ctx, rk3399_clk_pmu_branches, ++ ARRAY_SIZE(rk3399_clk_pmu_branches)); ++ ++ rockchip_register_softrst(np, 2, reg_base + RK3399_PMU_SOFTRST_CON(0), ++ ROCKCHIP_SOFTRST_HIWORD_MASK); ++ ++ rockchip_clk_of_add_provider(np, ctx); ++ ++ atomic_notifier_chain_register(&panic_notifier_list, ++ &rk3399_clk_panic_block); +} ++CLK_OF_DECLARE(rk3399_cru_pmu, "rockchip,rk3399-pmucru", rk3399_pmu_clk_init); + -+static const struct of_device_id clk_rv1108_match_table[] = { ++#ifdef MODULE ++struct clk_rk3399_inits { ++ void (*inits)(struct device_node *np); ++}; ++ ++static const struct clk_rk3399_inits clk_rk3399_pmucru_init = { ++ .inits = rk3399_pmu_clk_init, ++}; ++ ++static const struct clk_rk3399_inits clk_rk3399_cru_init = { ++ .inits = rk3399_clk_init, ++}; ++ ++static const struct of_device_id clk_rk3399_match_table[] = { + { -+ .compatible = "rockchip,rv1108-cru", ++ .compatible = "rockchip,rk3399-cru", ++ .data = &clk_rk3399_cru_init, ++ }, { ++ .compatible = "rockchip,rk3399-pmucru", ++ .data = &clk_rk3399_pmucru_init, + }, + { } +}; -+MODULE_DEVICE_TABLE(of, clk_rv1108_match_table); ++MODULE_DEVICE_TABLE(of, clk_rk3399_match_table); + -+static struct platform_driver clk_rv1108_driver = { ++static int clk_rk3399_probe(struct platform_device *pdev) ++{ ++ struct device_node *np = pdev->dev.of_node; ++ const struct of_device_id *match; ++ const struct clk_rk3399_inits *init_data; ++ ++ match = of_match_device(clk_rk3399_match_table, &pdev->dev); ++ if (!match || !match->data) ++ return -EINVAL; ++ ++ init_data = match->data; ++ if (init_data->inits) ++ init_data->inits(np); ++ ++ return 0; ++} ++ ++static struct platform_driver clk_rk3399_driver = { ++ .probe = clk_rk3399_probe, + .driver = { -+ .name = "clk-rv1108", -+ .of_match_table = clk_rv1108_match_table, ++ .name = "clk-rk3399", ++ .of_match_table = clk_rk3399_match_table, ++ .suppress_bind_attrs = true, + }, +}; -+builtin_platform_driver_probe(clk_rv1108_driver, clk_rv1108_probe); ++module_platform_driver(clk_rk3399_driver); + -+MODULE_DESCRIPTION("Rockchip RV1108 Clock Driver"); ++MODULE_DESCRIPTION("Rockchip RK3399 Clock Driver"); +MODULE_LICENSE("GPL"); -diff --git a/drivers/clk/rockchip-oh/clk-rv1126.c b/drivers/clk/rockchip-oh/clk-rv1126.c ++MODULE_ALIAS("platform:clk-rk3399"); ++#endif /* MODULE */ +diff --git a/drivers/clk/rockchip-oh/clk-rk3528.c b/drivers/clk/rockchip-oh/clk-rk3528.c new file mode 100644 -index 000000000..62b204d89 +index 000000000..1b14cd57b --- /dev/null -+++ b/drivers/clk/rockchip-oh/clk-rv1126.c -@@ -0,0 +1,1566 @@ ++++ b/drivers/clk/rockchip-oh/clk-rk3528.c +@@ -0,0 +1,1174 @@ +// SPDX-License-Identifier: GPL-2.0 +/* -+ * Copyright (c) 2019 Rockchip Electronics Co. Ltd. -+ * Author: Finley Xiao ++ * Copyright (c) 2022 Rockchip Electronics Co. Ltd. ++ * Author: Joseph Chen + */ + +#include +#include +#include -+#include +#include ++#include +#include -+#include ++#include +#include "clk.h" + -+#define RV1126_GMAC_CON 0x460 -+#define RV1126_GRF_IOFUNC_CON1 0x10264 -+#define RV1126_GRF_SOC_STATUS0 0x10 -+#define RV1126_PMUGRF_SOC_CON0 0x100 -+ -+#define RV1126_FRAC_MAX_PRATE 1200000000 -+#define RV1126_CSIOUT_FRAC_MAX_PRATE 300000000 -+ -+enum rv1126_pmu_plls { -+ gpll, -+}; ++/* A placeholder for rk3066 pll type. We are rk3328 pll type */ ++#define RK3528_GRF_SOC_STATUS0 0x1a0 + -+enum rv1126_plls { -+ apll, dpll, cpll, hpll, ++enum rk3528_plls { ++ apll, cpll, gpll, ppll, dpll, +}; + -+static struct rockchip_pll_rate_table rv1126_pll_rates[] = { ++/* ++ * ## PLL attention. ++ * ++ * [FRAC PLL]: GPLL, PPLL, DPLL ++ * - frac mode: refdiv can be 1 or 2 only ++ * - int mode: refdiv has no special limit ++ * - VCO range: [950, 3800] MHZ ++ * ++ * [INT PLL]: CPLL, APLL ++ * - int mode: refdiv can be 1 or 2 only ++ * - VCO range: [475, 1900] MHZ ++ * ++ * [PPLL]: normal mode only. ++ * ++ * ++ * ## CRU access attention. ++ * ++ * pclk_cru => pclk_vo_root => aclk_vo_root ++ * pclk_cru_pcie => pclk_vpu_root => aclk_vpu_root ++ * pclk_cru_ddrphy => hclk_rkvdec_root => aclk_rkvdec_root ++ */ ++static struct rockchip_pll_rate_table rk3528_pll_rates[] = { + /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ ++ RK3036_PLL_RATE(1896000000, 1, 79, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1800000000, 1, 75, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1704000000, 1, 71, 1, 1, 1, 0), + RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1600000000, 3, 200, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1584000000, 1, 132, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1560000000, 1, 130, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1536000000, 1, 128, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1512000000, 1, 126, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1488000000, 1, 124, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1464000000, 1, 122, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1440000000, 1, 120, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1416000000, 1, 118, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1400000000, 3, 350, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1392000000, 1, 116, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1368000000, 1, 114, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1344000000, 1, 112, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1320000000, 1, 110, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1296000000, 1, 108, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1272000000, 1, 106, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1248000000, 1, 104, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1200000000, 1, 100, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1188000000, 1, 99, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1104000000, 1, 92, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1100000000, 3, 275, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1000000000, 3, 250, 2, 1, 1, 0), -+ RK3036_PLL_RATE(984000000, 1, 82, 2, 1, 1, 0), -+ RK3036_PLL_RATE(960000000, 1, 80, 2, 1, 1, 0), -+ RK3036_PLL_RATE(936000000, 1, 78, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1188000000, 1, 99, 2, 1, 1, 0), /* GPLL */ ++ RK3036_PLL_RATE(1092000000, 2, 91, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1008000000, 1, 42, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1000000000, 1, 125, 3, 1, 1, 0), /* PPLL */ ++ RK3036_PLL_RATE(996000000, 2, 83, 1, 1, 1, 0), /* CPLL */ ++ RK3036_PLL_RATE(960000000, 1, 40, 1, 1, 1, 0), + RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), -+ RK3036_PLL_RATE(900000000, 1, 75, 2, 1, 1, 0), -+ RK3036_PLL_RATE(888000000, 1, 74, 2, 1, 1, 0), -+ RK3036_PLL_RATE(864000000, 1, 72, 2, 1, 1, 0), -+ RK3036_PLL_RATE(840000000, 1, 70, 2, 1, 1, 0), + RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), -+ RK3036_PLL_RATE(800000000, 3, 200, 2, 1, 1, 0), -+ RK3036_PLL_RATE(700000000, 3, 350, 4, 1, 1, 0), -+ RK3036_PLL_RATE(696000000, 1, 116, 4, 1, 1, 0), -+ RK3036_PLL_RATE(624000000, 1, 104, 4, 1, 1, 0), -+#ifdef CONFIG_ROCKCHIP_LOW_PERFORMANCE + RK3036_PLL_RATE(600000000, 1, 50, 2, 1, 1, 0), -+#else -+ RK3036_PLL_RATE(600000000, 1, 100, 4, 1, 1, 0), -+#endif -+ RK3036_PLL_RATE(594000000, 1, 99, 4, 1, 1, 0), -+ RK3036_PLL_RATE(504000000, 1, 84, 4, 1, 1, 0), -+ RK3036_PLL_RATE(500000000, 1, 125, 6, 1, 1, 0), -+ RK3036_PLL_RATE(496742400, 1, 124, 6, 1, 0, 3113851), -+ RK3036_PLL_RATE(491520000, 1, 40, 2, 1, 0, 16106127), ++ RK3036_PLL_RATE(594000000, 2, 99, 2, 1, 1, 0), + RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), + RK3036_PLL_RATE(312000000, 1, 78, 6, 1, 1, 0), + RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), -+ RK3036_PLL_RATE(96000000, 1, 96, 6, 4, 1, 0), ++ RK3036_PLL_RATE(96000000, 1, 24, 3, 2, 1, 0), + { /* sentinel */ }, +}; + -+#define RV1126_DIV_ACLK_CORE_MASK 0xf -+#define RV1126_DIV_ACLK_CORE_SHIFT 4 -+#define RV1126_DIV_PCLK_DBG_MASK 0x7 -+#define RV1126_DIV_PCLK_DBG_SHIFT 0 ++#define RK3528_DIV_ACLK_M_CORE_MASK 0x1f ++#define RK3528_DIV_ACLK_M_CORE_SHIFT 11 ++#define RK3528_DIV_PCLK_DBG_MASK 0x1f ++#define RK3528_DIV_PCLK_DBG_SHIFT 1 + -+#define RV1126_CLKSEL1(_aclk_core, _pclk_dbg) \ ++#define RK3528_CLKSEL39(_aclk_m_core) \ +{ \ -+ .reg = RV1126_CLKSEL_CON(1), \ -+ .val = HIWORD_UPDATE(_aclk_core, RV1126_DIV_ACLK_CORE_MASK, \ -+ RV1126_DIV_ACLK_CORE_SHIFT) | \ -+ HIWORD_UPDATE(_pclk_dbg, RV1126_DIV_PCLK_DBG_MASK, \ -+ RV1126_DIV_PCLK_DBG_SHIFT), \ ++ .reg = RK3528_CLKSEL_CON(39), \ ++ .val = HIWORD_UPDATE(_aclk_m_core, RK3528_DIV_ACLK_M_CORE_MASK, \ ++ RK3528_DIV_ACLK_M_CORE_SHIFT), \ +} + -+#define RV1126_CPUCLK_RATE(_prate, _aclk_core, _pclk_dbg) \ ++#define RK3528_CLKSEL40(_pclk_dbg) \ ++{ \ ++ .reg = RK3528_CLKSEL_CON(40), \ ++ .val = HIWORD_UPDATE(_pclk_dbg, RK3528_DIV_PCLK_DBG_MASK, \ ++ RK3528_DIV_PCLK_DBG_SHIFT), \ ++} ++ ++/* SIGN-OFF: _aclk_m_core: 550M, _pclk_dbg: 137.5M, */ ++#define RK3528_CPUCLK_RATE(_prate, _aclk_m_core, _pclk_dbg) \ +{ \ + .prate = _prate, \ + .divs = { \ -+ RV1126_CLKSEL1(_aclk_core, _pclk_dbg), \ ++ RK3528_CLKSEL39(_aclk_m_core), \ ++ RK3528_CLKSEL40(_pclk_dbg), \ + }, \ +} + -+static struct rockchip_cpuclk_rate_table rv1126_cpuclk_rates[] __initdata = { -+ RV1126_CPUCLK_RATE(1608000000, 1, 7), -+ RV1126_CPUCLK_RATE(1584000000, 1, 7), -+ RV1126_CPUCLK_RATE(1560000000, 1, 7), -+ RV1126_CPUCLK_RATE(1536000000, 1, 7), -+ RV1126_CPUCLK_RATE(1512000000, 1, 7), -+ RV1126_CPUCLK_RATE(1488000000, 1, 5), -+ RV1126_CPUCLK_RATE(1464000000, 1, 5), -+ RV1126_CPUCLK_RATE(1440000000, 1, 5), -+ RV1126_CPUCLK_RATE(1416000000, 1, 5), -+ RV1126_CPUCLK_RATE(1392000000, 1, 5), -+ RV1126_CPUCLK_RATE(1368000000, 1, 5), -+ RV1126_CPUCLK_RATE(1344000000, 1, 5), -+ RV1126_CPUCLK_RATE(1320000000, 1, 5), -+ RV1126_CPUCLK_RATE(1296000000, 1, 5), -+ RV1126_CPUCLK_RATE(1272000000, 1, 5), -+ RV1126_CPUCLK_RATE(1248000000, 1, 5), -+ RV1126_CPUCLK_RATE(1224000000, 1, 5), -+ RV1126_CPUCLK_RATE(1200000000, 1, 5), -+ RV1126_CPUCLK_RATE(1104000000, 1, 5), -+ RV1126_CPUCLK_RATE(1008000000, 1, 5), -+ RV1126_CPUCLK_RATE(912000000, 1, 5), -+ RV1126_CPUCLK_RATE(816000000, 1, 3), -+ RV1126_CPUCLK_RATE(696000000, 1, 3), -+ RV1126_CPUCLK_RATE(600000000, 1, 3), -+ RV1126_CPUCLK_RATE(408000000, 1, 1), -+ RV1126_CPUCLK_RATE(312000000, 1, 1), -+ RV1126_CPUCLK_RATE(216000000, 1, 1), -+ RV1126_CPUCLK_RATE(96000000, 1, 1), ++static struct rockchip_cpuclk_rate_table rk3528_cpuclk_rates[] __initdata = { ++ /* APLL(CPU) rate <= 1900M, due to APLL VCO limit */ ++ RK3528_CPUCLK_RATE(1896000000, 1, 13), ++ RK3528_CPUCLK_RATE(1800000000, 1, 12), ++ RK3528_CPUCLK_RATE(1704000000, 1, 11), ++ RK3528_CPUCLK_RATE(1608000000, 1, 11), ++ RK3528_CPUCLK_RATE(1512000000, 1, 11), ++ RK3528_CPUCLK_RATE(1416000000, 1, 9), ++ RK3528_CPUCLK_RATE(1296000000, 1, 8), ++ RK3528_CPUCLK_RATE(1200000000, 1, 8), ++ RK3528_CPUCLK_RATE(1188000000, 1, 8), ++ RK3528_CPUCLK_RATE(1092000000, 1, 7), ++ RK3528_CPUCLK_RATE(1008000000, 1, 6), ++ RK3528_CPUCLK_RATE(1000000000, 1, 6), ++ RK3528_CPUCLK_RATE(996000000, 1, 6), ++ RK3528_CPUCLK_RATE(960000000, 1, 6), ++ RK3528_CPUCLK_RATE(912000000, 1, 6), ++ RK3528_CPUCLK_RATE(816000000, 1, 5), ++ RK3528_CPUCLK_RATE(600000000, 1, 3), ++ RK3528_CPUCLK_RATE(594000000, 1, 3), ++ RK3528_CPUCLK_RATE(408000000, 1, 2), ++ RK3528_CPUCLK_RATE(312000000, 1, 2), ++ RK3528_CPUCLK_RATE(216000000, 1, 1), ++ RK3528_CPUCLK_RATE(96000000, 1, 0), +}; + -+static const struct rockchip_cpuclk_reg_data rv1126_cpuclk_data = { -+ .core_reg[0] = RV1126_CLKSEL_CON(0), -+ .div_core_shift[0] = 0, ++static const struct rockchip_cpuclk_reg_data rk3528_cpuclk_data = { ++ .core_reg[0] = RK3528_CLKSEL_CON(39), ++ .div_core_shift[0] = 5, + .div_core_mask[0] = 0x1f, + .num_cores = 1, -+ .mux_core_alt = 0, -+ .mux_core_main = 2, -+ .mux_core_shift = 6, -+ .mux_core_mask = 0x3, ++ .mux_core_alt = 1, ++ .mux_core_main = 0, ++ .mux_core_shift = 10, ++ .mux_core_mask = 0x1, +}; + -+PNAME(mux_pll_p) = { "xin24m" }; -+PNAME(mux_rtc32k_p) = { "clk_pmupvtm_divout", "xin32k", "clk_osc0_div32k" }; -+PNAME(mux_clk_32k_ioe_p) = { "xin32k", "clk_rtc32k" }; -+PNAME(mux_wifi_p) = { "clk_wifi_osc0", "clk_wifi_div" }; -+PNAME(mux_uart1_p) = { "sclk_uart1_div", "sclk_uart1_fracdiv", "xin24m" }; -+PNAME(mux_xin24m_gpll_p) = { "xin24m", "gpll" }; -+PNAME(mux_gpll_xin24m_p) = { "gpll", "xin24m" }; -+PNAME(mux_xin24m_32k_p) = { "xin24m", "clk_rtc32k" }; -+PNAME(mux_usbphy_otg_ref_p) = { "clk_ref12m", "xin_osc0_div2_usbphyref_otg" }; -+PNAME(mux_usbphy_host_ref_p) = { "clk_ref12m", "xin_osc0_div2_usbphyref_host" }; -+PNAME(mux_mipidsiphy_ref_p) = { "clk_ref24m", "xin_osc0_mipiphyref" }; -+PNAME(mux_usb480m_p) = { "xin24m", "usb480m_phy", "clk_rtc32k" }; -+PNAME(mux_hclk_pclk_pdbus_p) = { "gpll", "dummy_cpll" }; -+PNAME(mux_uart0_p) = { "sclk_uart0_div", "sclk_uart0_frac", "xin24m" }; -+PNAME(mux_uart2_p) = { "sclk_uart2_div", "sclk_uart2_frac", "xin24m" }; -+PNAME(mux_uart3_p) = { "sclk_uart3_div", "sclk_uart3_frac", "xin24m" }; -+PNAME(mux_uart4_p) = { "sclk_uart4_div", "sclk_uart4_frac", "xin24m" }; -+PNAME(mux_uart5_p) = { "sclk_uart5_div", "sclk_uart5_frac", "xin24m" }; -+PNAME(mux_i2s0_tx_p) = { "mclk_i2s0_tx_div", "mclk_i2s0_tx_fracdiv", "i2s0_mclkin", "xin12m" }; -+PNAME(mux_i2s0_rx_p) = { "mclk_i2s0_rx_div", "mclk_i2s0_rx_fracdiv", "i2s0_mclkin", "xin12m" }; -+PNAME(mux_i2s0_tx_out2io_p) = { "mclk_i2s0_tx", "xin12m" }; -+PNAME(mux_i2s0_rx_out2io_p) = { "mclk_i2s0_rx", "xin12m" }; -+PNAME(mux_i2s1_p) = { "mclk_i2s1_div", "mclk_i2s1_fracdiv", "i2s1_mclkin", "xin12m" }; -+PNAME(mux_i2s1_out2io_p) = { "mclk_i2s1", "xin12m" }; -+PNAME(mux_i2s2_p) = { "mclk_i2s2_div", "mclk_i2s2_fracdiv", "i2s2_mclkin", "xin12m" }; -+PNAME(mux_i2s2_out2io_p) = { "mclk_i2s2", "xin12m" }; -+PNAME(mux_audpwm_p) = { "sclk_audpwm_div", "sclk_audpwm_fracdiv", "xin24m" }; -+PNAME(mux_dclk_vop_p) = { "dclk_vop_div", "dclk_vop_fracdiv", "xin24m" }; -+PNAME(mux_aclk_pdvi_p) = { "aclk_pdvi_div", "aclk_pdvi_np5" }; -+PNAME(mux_clk_isp_p) = { "clk_isp_div", "clk_isp_np5" }; -+PNAME(mux_gpll_usb480m_p) = { "gpll", "usb480m" }; -+PNAME(mux_cif_out2io_p) = { "xin24m", "clk_cif_out2io_div", "clk_cif_out2io_fracdiv" }; -+PNAME(mux_mipicsi_out2io_p) = { "xin24m", "clk_mipicsi_out2io_div", "clk_mipicsi_out2io_fracdiv" }; -+PNAME(mux_aclk_pdispp_p) = { "aclk_pdispp_div", "aclk_pdispp_np5" }; -+PNAME(mux_clk_ispp_p) = { "clk_ispp_div", "clk_ispp_np5" }; -+PNAME(mux_usb480m_gpll_p) = { "usb480m", "gpll" }; -+PNAME(clk_gmac_src_m0_p) = { "clk_gmac_div", "clk_gmac_rgmii_m0" }; -+PNAME(clk_gmac_src_m1_p) = { "clk_gmac_div", "clk_gmac_rgmii_m1" }; -+PNAME(mux_clk_gmac_src_p) = { "clk_gmac_src_m0", "clk_gmac_src_m1" }; -+PNAME(mux_rgmii_clk_p) = { "clk_gmac_tx_div50", "clk_gmac_tx_div5", "clk_gmac_tx_src", "clk_gmac_tx_src"}; -+PNAME(mux_rmii_clk_p) = { "clk_gmac_rx_div20", "clk_gmac_rx_div2" }; -+PNAME(mux_gmac_tx_rx_p) = { "rgmii_mode_clk", "rmii_mode_clk" }; -+PNAME(mux_dpll_gpll_p) = { "dpll", "gpll" }; -+PNAME(mux_aclk_pdnpu_p) = { "aclk_pdnpu_div", "aclk_pdnpu_np5" }; -+PNAME(mux_clk_npu_p) = { "clk_npu_div", "clk_npu_np5" }; ++PNAME(mux_pll_p) = { "xin24m" }; ++PNAME(mux_24m_32k_p) = { "xin24m", "clk_32k" }; ++PNAME(mux_gpll_cpll_p) = { "gpll", "cpll" }; ++PNAME(mux_gpll_cpll_xin24m_p) = { "gpll", "cpll", "xin24m" }; ++PNAME(mux_100m_50m_24m_p) = { "clk_100m_src", "clk_50m_src", "xin24m" }; ++PNAME(mux_150m_100m_24m_p) = { "clk_150m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_200m_100m_24m_p) = { "clk_200m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_200m_100m_50m_24m_p) = { "clk_200m_src", "clk_100m_src", "clk_50m_src", "xin24m" }; ++PNAME(mux_300m_200m_100m_24m_p) = { "clk_300m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_339m_200m_100m_24m_p) = { "clk_339m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_500m_200m_100m_24m_p) = { "clk_500m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_500m_300m_100m_24m_p) = { "clk_500m_src", "clk_300m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_600m_300m_200m_24m_p) = { "clk_600m_src", "clk_300m_src", "clk_200m_src", "xin24m" }; ++PNAME(aclk_gpu_p) = { "aclk_gpu_root", "clk_gpu_pvtpll_src" }; ++PNAME(aclk_rkvdec_pvtmux_root_p) = { "aclk_rkvdec_root", "clk_rkvdec_pvtpll_src" }; ++PNAME(clk_i2c2_p) = { "clk_200m_src", "clk_100m_src", "xin24m", "clk_32k" }; ++PNAME(clk_ref_pcie_inner_phy_p) = { "clk_ppll_100m_src", "xin24m" }; ++PNAME(dclk_vop0_p) = { "dclk_vop_src0", "clk_hdmiphy_pixel_io" }; ++PNAME(mclk_i2s0_2ch_sai_src_p) = { "clk_i2s0_2ch_src", "clk_i2s0_2ch_frac", "xin12m" }; ++PNAME(mclk_i2s1_8ch_sai_src_p) = { "clk_i2s1_8ch_src", "clk_i2s1_8ch_frac", "xin12m" }; ++PNAME(mclk_i2s2_2ch_sai_src_p) = { "clk_i2s2_2ch_src", "clk_i2s2_2ch_frac", "xin12m" }; ++PNAME(mclk_i2s3_8ch_sai_src_p) = { "clk_i2s3_8ch_src", "clk_i2s3_8ch_frac", "xin12m" }; ++PNAME(mclk_sai_i2s0_p) = { "mclk_i2s0_2ch_sai_src", "i2s0_mclkin" }; ++PNAME(mclk_sai_i2s1_p) = { "mclk_i2s1_8ch_sai_src", "i2s1_mclkin" }; ++PNAME(mclk_spdif_src_p) = { "clk_spdif_src", "clk_spdif_frac", "xin12m" }; ++PNAME(sclk_uart0_src_p) = { "clk_uart0_src", "clk_uart0_frac", "xin24m" }; ++PNAME(sclk_uart1_src_p) = { "clk_uart1_src", "clk_uart1_frac", "xin24m" }; ++PNAME(sclk_uart2_src_p) = { "clk_uart2_src", "clk_uart2_frac", "xin24m" }; ++PNAME(sclk_uart3_src_p) = { "clk_uart3_src", "clk_uart3_frac", "xin24m" }; ++PNAME(sclk_uart4_src_p) = { "clk_uart4_src", "clk_uart4_frac", "xin24m" }; ++PNAME(sclk_uart5_src_p) = { "clk_uart5_src", "clk_uart5_frac", "xin24m" }; ++PNAME(sclk_uart6_src_p) = { "clk_uart6_src", "clk_uart6_frac", "xin24m" }; ++PNAME(sclk_uart7_src_p) = { "clk_uart7_src", "clk_uart7_frac", "xin24m" }; ++PNAME(clk_32k_p) = { "xin_osc0_div", "clk_pvtm_32k" }; + ++/* Pass 0 to PLL() '_lshift' as a placeholder for rk3066 pll type. We are rk3328 pll type */ ++static struct rockchip_pll_clock rk3528_pll_clks[] __initdata = { ++ [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, ++ CLK_IS_CRITICAL, RK3528_PLL_CON(0), ++ RK3528_MODE_CON, 0, 0, 0, rk3528_pll_rates), + -+#ifndef CONFIG_ROCKCHIP_LOW_PERFORMANCE -+PNAME(mux_gpll_usb480m_cpll_xin24m_p) = { "gpll", "usb480m", "cpll", "xin24m" }; -+PNAME(mux_gpll_cpll_dpll_p) = { "gpll", "cpll", "dummy_dpll" }; -+PNAME(mux_gpll_cpll_p) = { "gpll", "cpll" }; -+PNAME(mux_gpll_cpll_usb480m_xin24m_p) = { "gpll", "cpll", "usb480m", "xin24m" }; -+PNAME(mux_cpll_gpll_p) = { "cpll", "gpll" }; -+PNAME(mux_gpll_cpll_xin24m_p) = { "gpll", "cpll", "xin24m" }; -+PNAME(mux_cpll_hpll_gpll_p) = { "cpll", "hpll", "gpll" }; -+PNAME(mux_cpll_gpll_hpll_p) = { "cpll", "gpll", "hpll" }; -+PNAME(mux_gpll_cpll_hpll_p) = { "gpll", "cpll", "hpll" }; -+PNAME(mux_gpll_cpll_apll_hpll_p) = { "gpll", "cpll", "dummy_apll", "hpll" }; -+#else -+PNAME(mux_gpll_usb480m_cpll_xin24m_p) = { "gpll", "usb480m", "dummy_cpll", "xin24m" }; -+PNAME(mux_gpll_cpll_dpll_p) = { "gpll", "dummy_cpll", "dummy_dpll" }; -+PNAME(mux_gpll_cpll_p) = { "gpll", "dummy_cpll" }; -+PNAME(mux_gpll_cpll_usb480m_xin24m_p) = { "gpll", "dummy_cpll", "usb480m", "xin24m" }; -+PNAME(mux_cpll_gpll_p) = { "dummy_cpll", "gpll" }; -+PNAME(mux_gpll_cpll_xin24m_p) = { "gpll", "dummy_cpll", "xin24m" }; -+PNAME(mux_cpll_hpll_gpll_p) = { "dummy_cpll", "dummy_hpll", "gpll" }; -+PNAME(mux_cpll_gpll_hpll_p) = { "dummy_cpll", "gpll", "dummy_hpll" }; -+PNAME(mux_gpll_cpll_hpll_p) = { "gpll", "dummy_cpll", "dummy_hpll" }; -+PNAME(mux_gpll_cpll_apll_hpll_p) = { "gpll", "dummy_cpll", "dummy_apll", "dummy_hpll" }; -+#endif ++ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, ++ CLK_IS_CRITICAL, RK3528_PLL_CON(8), ++ RK3528_MODE_CON, 2, 0, 0, rk3528_pll_rates), + -+static u32 rgmii_mux_idx[] = { 2, 3, 0, 1 }; ++ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, ++ CLK_IS_CRITICAL, RK3528_PLL_CON(24), ++ RK3528_MODE_CON, 4, 0, 0, rk3528_pll_rates), + -+static struct rockchip_pll_clock rv1126_pmu_pll_clks[] __initdata = { -+ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, -+ CLK_IS_CRITICAL, RV1126_PMU_PLL_CON(0), -+ RV1126_PMU_MODE, 0, 3, 0, rv1126_pll_rates), -+}; ++ [ppll] = PLL(pll_rk3328, PLL_PPLL, "ppll", mux_pll_p, ++ CLK_IS_CRITICAL, RK3528_PCIE_PLL_CON(32), ++ RK3528_MODE_CON, 6, 0, ++ ROCKCHIP_PLL_FIXED_MODE, rk3528_pll_rates), + -+static struct rockchip_pll_clock rv1126_pll_clks[] __initdata = { -+ [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, -+ CLK_IGNORE_UNUSED, RV1126_PLL_CON(0), -+ RV1126_MODE_CON, 0, 0, 0, rv1126_pll_rates), + [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p, -+ CLK_IGNORE_UNUSED, RV1126_PLL_CON(8), -+ RV1126_MODE_CON, 2, 1, 0, NULL), -+#ifndef CONFIG_ROCKCHIP_LOW_PERFORMANCE -+ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, -+ CLK_IS_CRITICAL, RV1126_PLL_CON(16), -+ RV1126_MODE_CON, 4, 2, 0, rv1126_pll_rates), -+ [hpll] = PLL(pll_rk3328, PLL_HPLL, "hpll", mux_pll_p, -+ CLK_IS_CRITICAL, RV1126_PLL_CON(24), -+ RV1126_MODE_CON, 6, 4, 0, rv1126_pll_rates), -+#else -+ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, -+ 0, RV1126_PLL_CON(16), -+ RV1126_MODE_CON, 4, 2, 0, rv1126_pll_rates), -+ [hpll] = PLL(pll_rk3328, PLL_HPLL, "hpll", mux_pll_p, -+ 0, RV1126_PLL_CON(24), -+ RV1126_MODE_CON, 6, 4, 0, rv1126_pll_rates), -+#endif ++ CLK_IGNORE_UNUSED, RK3528_DDRPHY_PLL_CON(16), ++ RK3528_DDRPHY_MODE_CON, 0, 0, 0, rk3528_pll_rates), +}; + +#define MFLAGS CLK_MUX_HIWORD_MASK +#define DFLAGS CLK_DIVIDER_HIWORD_MASK +#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) + -+static struct rockchip_clk_branch rv1126_rtc32k_fracmux __initdata = -+ MUX(CLK_RTC32K, "clk_rtc32k", mux_rtc32k_p, CLK_SET_RATE_PARENT, -+ RV1126_PMU_CLKSEL_CON(0), 7, 2, MFLAGS); ++static struct rockchip_clk_branch rk3528_uart0_fracmux __initdata = ++ MUX(CLK_UART0, "clk_uart0", sclk_uart0_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(6), 0, 2, MFLAGS); + -+static struct rockchip_clk_branch rv1126_uart1_fracmux __initdata = -+ MUX(SCLK_UART1_MUX, "sclk_uart1_mux", mux_uart1_p, CLK_SET_RATE_PARENT, -+ RV1126_PMU_CLKSEL_CON(4), 10, 2, MFLAGS); ++static struct rockchip_clk_branch rk3528_uart1_fracmux __initdata = ++ MUX(CLK_UART1, "clk_uart1", sclk_uart1_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(8), 0, 2, MFLAGS); + -+static struct rockchip_clk_branch rv1126_uart0_fracmux __initdata = -+ MUX(SCLK_UART0_MUX, "sclk_uart0_mux", mux_uart0_p, CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(10), 10, 2, MFLAGS); ++static struct rockchip_clk_branch rk3528_uart2_fracmux __initdata = ++ MUX(CLK_UART2, "clk_uart2", sclk_uart2_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(10), 0, 2, MFLAGS); + -+static struct rockchip_clk_branch rv1126_uart2_fracmux __initdata = -+ MUX(SCLK_UART2_MUX, "sclk_uart2_mux", mux_uart2_p, CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(12), 10, 2, MFLAGS); ++static struct rockchip_clk_branch rk3528_uart3_fracmux __initdata = ++ MUX(CLK_UART3, "clk_uart3", sclk_uart3_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(12), 0, 2, MFLAGS); + -+static struct rockchip_clk_branch rv1126_uart3_fracmux __initdata = -+ MUX(SCLK_UART3_MUX, "sclk_uart3_mux", mux_uart3_p, CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(14), 10, 2, MFLAGS); ++static struct rockchip_clk_branch rk3528_uart4_fracmux __initdata = ++ MUX(CLK_UART4, "clk_uart4", sclk_uart4_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(14), 0, 2, MFLAGS); + -+static struct rockchip_clk_branch rv1126_uart4_fracmux __initdata = -+ MUX(SCLK_UART4_MUX, "sclk_uart4_mux", mux_uart4_p, CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(16), 10, 2, MFLAGS); ++static struct rockchip_clk_branch rk3528_uart5_fracmux __initdata = ++ MUX(CLK_UART5, "clk_uart5", sclk_uart5_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(16), 0, 2, MFLAGS); + -+static struct rockchip_clk_branch rv1126_uart5_fracmux __initdata = -+ MUX(SCLK_UART5_MUX, "sclk_uart5_mux", mux_uart5_p, CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(18), 10, 2, MFLAGS); ++static struct rockchip_clk_branch rk3528_uart6_fracmux __initdata = ++ MUX(CLK_UART6, "clk_uart6", sclk_uart6_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(18), 0, 2, MFLAGS); + -+static struct rockchip_clk_branch rv1126_i2s0_tx_fracmux __initdata = -+ MUX(MCLK_I2S0_TX_MUX, "mclk_i2s0_tx_mux", mux_i2s0_tx_p, CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(30), 0, 2, MFLAGS); ++static struct rockchip_clk_branch rk3528_uart7_fracmux __initdata = ++ MUX(CLK_UART7, "clk_uart7", sclk_uart7_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(20), 0, 2, MFLAGS); + -+static struct rockchip_clk_branch rv1126_i2s0_rx_fracmux __initdata = -+ MUX(MCLK_I2S0_RX_MUX, "mclk_i2s0_rx_mux", mux_i2s0_rx_p, CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(30), 2, 2, MFLAGS); ++static struct rockchip_clk_branch mclk_i2s0_2ch_sai_src_fracmux __initdata = ++ MUX(MCLK_I2S0_2CH_SAI_SRC_PRE, "mclk_i2s0_2ch_sai_src_pre", mclk_i2s0_2ch_sai_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(22), 0, 2, MFLAGS); + -+static struct rockchip_clk_branch rv1126_i2s1_fracmux __initdata = -+ MUX(MCLK_I2S1_MUX, "mclk_i2s1_mux", mux_i2s1_p, CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(31), 8, 2, MFLAGS); ++static struct rockchip_clk_branch mclk_i2s1_8ch_sai_src_fracmux __initdata = ++ MUX(MCLK_I2S1_8CH_SAI_SRC_PRE, "mclk_i2s1_8ch_sai_src_pre", mclk_i2s1_8ch_sai_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(26), 0, 2, MFLAGS); + -+static struct rockchip_clk_branch rv1126_i2s2_fracmux __initdata = -+ MUX(MCLK_I2S2_MUX, "mclk_i2s2_mux", mux_i2s2_p, CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(33), 8, 2, MFLAGS); ++static struct rockchip_clk_branch mclk_i2s2_2ch_sai_src_fracmux __initdata = ++ MUX(MCLK_I2S2_2CH_SAI_SRC_PRE, "mclk_i2s2_2ch_sai_src_pre", mclk_i2s2_2ch_sai_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(28), 0, 2, MFLAGS); + -+static struct rockchip_clk_branch rv1126_audpwm_fracmux __initdata = -+ MUX(SCLK_AUDPWM_MUX, "mclk_audpwm_mux", mux_audpwm_p, CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(36), 8, 2, MFLAGS); ++static struct rockchip_clk_branch mclk_i2s3_8ch_sai_src_fracmux __initdata = ++ MUX(MCLK_I2S3_8CH_SAI_SRC_PRE, "mclk_i2s3_8ch_sai_src_pre", mclk_i2s3_8ch_sai_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(24), 0, 2, MFLAGS); + -+static struct rockchip_clk_branch rv1126_dclk_vop_fracmux __initdata = -+ MUX(DCLK_VOP_MUX, "dclk_vop_mux", mux_dclk_vop_p, CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(47), 10, 2, MFLAGS); ++static struct rockchip_clk_branch mclk_spdif_src_fracmux __initdata = ++ MUX(MCLK_SDPDIF_SRC_PRE, "mclk_spdif_src_pre", mclk_spdif_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(32), 0, 2, MFLAGS); + -+static struct rockchip_clk_branch rv1126_cif_out2io_fracmux __initdata = -+ MUX(CLK_CIF_OUT_MUX, "clk_cif_out2io_mux", mux_cif_out2io_p, CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(50), 14, 2, MFLAGS); ++/* ++ * CRU Clock-Architecture ++ */ ++static struct rockchip_clk_branch rk3528_clk_branches[] __initdata = { ++ /* top */ ++ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), + -+static struct rockchip_clk_branch rv1126_mipicsi_out2io_fracmux __initdata = -+ MUX(CLK_MIPICSI_OUT_MUX, "clk_mipicsi_out2io_mux", mux_mipicsi_out2io_p, CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(73), 10, 2, MFLAGS); ++ COMPOSITE(CLK_MATRIX_250M_SRC, "clk_250m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(1), 15, 1, MFLAGS, 10, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 5, GFLAGS), ++ COMPOSITE(CLK_MATRIX_500M_SRC, "clk_500m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(3), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 10, GFLAGS), ++ COMPOSITE_NOMUX(CLK_MATRIX_50M_SRC, "clk_50m_src", "cpll", CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(0), 2, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NOMUX(CLK_MATRIX_100M_SRC, "clk_100m_src", "cpll", CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(0), 7, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE_NOMUX(CLK_MATRIX_150M_SRC, "clk_150m_src", "gpll", CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(1), 0, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 3, GFLAGS), ++ COMPOSITE_NOMUX(CLK_MATRIX_200M_SRC, "clk_200m_src", "gpll", CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(1), 5, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 4, GFLAGS), ++ COMPOSITE_NOMUX(CLK_MATRIX_300M_SRC, "clk_300m_src", "gpll", CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(2), 0, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 6, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(CLK_MATRIX_339M_SRC, "clk_339m_src", "gpll", CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(2), 5, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 7, GFLAGS), ++ COMPOSITE_NOMUX(CLK_MATRIX_400M_SRC, "clk_400m_src", "gpll", CLK_IGNORE_UNUSED, ++ RK3528_CLKSEL_CON(2), 10, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 8, GFLAGS), ++ COMPOSITE_NOMUX(CLK_MATRIX_600M_SRC, "clk_600m_src", "gpll", CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(4), 0, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 11, GFLAGS), ++ COMPOSITE(DCLK_VOP_SRC0, "dclk_vop_src0", mux_gpll_cpll_p, 0, ++ RK3528_CLKSEL_CON(32), 10, 1, MFLAGS, 2, 8, DFLAGS, ++ RK3528_CLKGATE_CON(3), 7, GFLAGS), ++ COMPOSITE(DCLK_VOP_SRC1, "dclk_vop_src1", mux_gpll_cpll_p, 0, ++ RK3528_CLKSEL_CON(33), 8, 1, MFLAGS, 0, 8, DFLAGS, ++ RK3528_CLKGATE_CON(3), 8, GFLAGS), ++ COMPOSITE_NOMUX(CLK_HSM, "clk_hsm", "xin24m", 0, ++ RK3528_CLKSEL_CON(36), 5, 5, DFLAGS, ++ RK3528_CLKGATE_CON(3), 13, GFLAGS), + -+static struct rockchip_clk_branch rv1126_clk_pmu_branches[] __initdata = { -+ /* -+ * Clock-Architecture Diagram 2 -+ */ -+ /* PD_PMU */ -+ COMPOSITE_NOMUX(PCLK_PDPMU, "pclk_pdpmu", "gpll", CLK_IS_CRITICAL, -+ RV1126_PMU_CLKSEL_CON(1), 0, 5, DFLAGS, -+ RV1126_PMU_CLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE_NOMUX(CLK_UART0_SRC, "clk_uart0_src", "gpll", 0, ++ RK3528_CLKSEL_CON(4), 5, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 12, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART0_FRAC, "clk_uart0_frac", "clk_uart0_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(5), 0, ++ RK3528_CLKGATE_CON(0), 13, GFLAGS, &rk3528_uart0_fracmux), ++ GATE(SCLK_UART0, "sclk_uart0", "clk_uart0", 0, ++ RK3528_CLKGATE_CON(0), 14, GFLAGS), + -+ COMPOSITE_FRACMUX(CLK_OSC0_DIV32K, "clk_osc0_div32k", "xin24m", CLK_IGNORE_UNUSED, -+ RV1126_PMU_CLKSEL_CON(13), 0, -+ RV1126_PMU_CLKGATE_CON(2), 9, GFLAGS, -+ &rv1126_rtc32k_fracmux), ++ COMPOSITE_NOMUX(CLK_UART1_SRC, "clk_uart1_src", "gpll", 0, ++ RK3528_CLKSEL_CON(6), 2, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 15, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART1_FRAC, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(7), 0, ++ RK3528_CLKGATE_CON(1), 0, GFLAGS, &rk3528_uart1_fracmux), ++ GATE(SCLK_UART1, "sclk_uart1", "clk_uart1", 0, ++ RK3528_CLKGATE_CON(1), 1, GFLAGS), + -+ MUXPMUGRF(CLK_32K_IOE, "clk_32k_ioe", mux_clk_32k_ioe_p, 0, -+ RV1126_PMUGRF_SOC_CON0, 0, 1, MFLAGS), ++ COMPOSITE_NOMUX(CLK_UART2_SRC, "clk_uart2_src", "gpll", 0, ++ RK3528_CLKSEL_CON(8), 2, 5, DFLAGS, ++ RK3528_CLKGATE_CON(1), 2, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART2_FRAC, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(9), 0, ++ RK3528_CLKGATE_CON(1), 3, GFLAGS, &rk3528_uart2_fracmux), ++ GATE(SCLK_UART2, "sclk_uart2", "clk_uart2", 0, ++ RK3528_CLKGATE_CON(1), 4, GFLAGS), + -+ COMPOSITE_NOMUX(CLK_WIFI_DIV, "clk_wifi_div", "gpll", 0, -+ RV1126_PMU_CLKSEL_CON(12), 0, 6, DFLAGS, -+ RV1126_PMU_CLKGATE_CON(2), 10, GFLAGS), -+ GATE(CLK_WIFI_OSC0, "clk_wifi_osc0", "xin24m", 0, -+ RV1126_PMU_CLKGATE_CON(2), 11, GFLAGS), -+ MUX(CLK_WIFI, "clk_wifi", mux_wifi_p, CLK_SET_RATE_PARENT, -+ RV1126_PMU_CLKSEL_CON(12), 8, 1, MFLAGS), ++ COMPOSITE_NOMUX(CLK_UART3_SRC, "clk_uart3_src", "gpll", 0, ++ RK3528_CLKSEL_CON(10), 2, 5, DFLAGS, ++ RK3528_CLKGATE_CON(1), 5, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART3_FRAC, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(11), 0, ++ RK3528_CLKGATE_CON(1), 6, GFLAGS, &rk3528_uart3_fracmux), ++ GATE(SCLK_UART3, "sclk_uart3", "clk_uart3", 0, ++ RK3528_CLKGATE_CON(1), 7, GFLAGS), + -+ GATE(PCLK_PMU, "pclk_pmu", "pclk_pdpmu", CLK_IGNORE_UNUSED, -+ RV1126_PMU_CLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NOMUX(CLK_UART4_SRC, "clk_uart4_src", "gpll", 0, ++ RK3528_CLKSEL_CON(12), 2, 5, DFLAGS, ++ RK3528_CLKGATE_CON(1), 8, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART4_FRAC, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(13), 0, ++ RK3528_CLKGATE_CON(1), 9, GFLAGS, &rk3528_uart4_fracmux), ++ GATE(SCLK_UART4, "sclk_uart4", "clk_uart4", 0, ++ RK3528_CLKGATE_CON(1), 10, GFLAGS), + -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_pdpmu", 0, -+ RV1126_PMU_CLKGATE_CON(0), 11, GFLAGS), -+ COMPOSITE(SCLK_UART1_DIV, "sclk_uart1_div", mux_gpll_usb480m_cpll_xin24m_p, 0, -+ RV1126_PMU_CLKSEL_CON(4), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RV1126_PMU_CLKGATE_CON(0), 12, GFLAGS), -+ COMPOSITE_FRACMUX(SCLK_UART1_FRACDIV, "sclk_uart1_fracdiv", "sclk_uart1_div", CLK_SET_RATE_PARENT, -+ RV1126_PMU_CLKSEL_CON(5), 0, -+ RV1126_PMU_CLKGATE_CON(0), 13, GFLAGS, -+ &rv1126_uart1_fracmux), -+ GATE(SCLK_UART1, "sclk_uart1", "sclk_uart1_mux", 0, -+ RV1126_PMU_CLKGATE_CON(0), 14, GFLAGS), ++ COMPOSITE_NOMUX(CLK_UART5_SRC, "clk_uart5_src", "gpll", 0, ++ RK3528_CLKSEL_CON(14), 2, 5, DFLAGS, ++ RK3528_CLKGATE_CON(1), 11, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART5_FRAC, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(15), 0, ++ RK3528_CLKGATE_CON(1), 12, GFLAGS, &rk3528_uart5_fracmux), ++ GATE(SCLK_UART5, "sclk_uart5", "clk_uart5", 0, ++ RK3528_CLKGATE_CON(1), 13, GFLAGS), + -+ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_pdpmu", 0, -+ RV1126_PMU_CLKGATE_CON(0), 5, GFLAGS), -+ COMPOSITE_NOMUX(CLK_I2C0, "clk_i2c0", "gpll", 0, -+ RV1126_PMU_CLKSEL_CON(2), 0, 7, DFLAGS, -+ RV1126_PMU_CLKGATE_CON(0), 6, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_pdpmu", 0, -+ RV1126_PMU_CLKGATE_CON(0), 9, GFLAGS), -+ COMPOSITE_NOMUX(CLK_I2C2, "clk_i2c2", "gpll", 0, -+ RV1126_PMU_CLKSEL_CON(3), 0, 7, DFLAGS, -+ RV1126_PMU_CLKGATE_CON(0), 10, GFLAGS), ++ COMPOSITE_NOMUX(CLK_UART6_SRC, "clk_uart6_src", "gpll", 0, ++ RK3528_CLKSEL_CON(16), 2, 5, DFLAGS, ++ RK3528_CLKGATE_CON(1), 14, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART6_FRAC, "clk_uart6_frac", "clk_uart6_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(17), 0, ++ RK3528_CLKGATE_CON(1), 15, GFLAGS, &rk3528_uart6_fracmux), ++ GATE(SCLK_UART6, "sclk_uart6", "clk_uart6", 0, ++ RK3528_CLKGATE_CON(2), 0, GFLAGS), + -+ GATE(CLK_CAPTURE_PWM0, "clk_capture_pwm0", "xin24m", 0, -+ RV1126_PMU_CLKGATE_CON(1), 2, GFLAGS), -+ GATE(PCLK_PWM0, "pclk_pwm0", "pclk_pdpmu", 0, -+ RV1126_PMU_CLKGATE_CON(1), 0, GFLAGS), -+ COMPOSITE(CLK_PWM0, "clk_pwm0", mux_xin24m_gpll_p, 0, -+ RV1126_PMU_CLKSEL_CON(6), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RV1126_PMU_CLKGATE_CON(1), 1, GFLAGS), -+ GATE(CLK_CAPTURE_PWM1, "clk_capture_pwm1", "xin24m", 0, -+ RV1126_PMU_CLKGATE_CON(1), 5, GFLAGS), -+ GATE(PCLK_PWM1, "pclk_pwm1", "pclk_pdpmu", 0, -+ RV1126_PMU_CLKGATE_CON(1), 3, GFLAGS), -+ COMPOSITE(CLK_PWM1, "clk_pwm1", mux_xin24m_gpll_p, 0, -+ RV1126_PMU_CLKSEL_CON(6), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RV1126_PMU_CLKGATE_CON(1), 4, GFLAGS), ++ COMPOSITE_NOMUX(CLK_UART7_SRC, "clk_uart7_src", "gpll", 0, ++ RK3528_CLKSEL_CON(18), 2, 5, DFLAGS, ++ RK3528_CLKGATE_CON(2), 1, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART7_FRAC, "clk_uart7_frac", "clk_uart7_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(19), 0, ++ RK3528_CLKGATE_CON(2), 2, GFLAGS, &rk3528_uart7_fracmux), ++ GATE(SCLK_UART7, "sclk_uart7", "clk_uart7", 0, ++ RK3528_CLKGATE_CON(2), 3, GFLAGS), + -+ GATE(PCLK_SPI0, "pclk_spi0", "pclk_pdpmu", 0, -+ RV1126_PMU_CLKGATE_CON(1), 11, GFLAGS), -+ COMPOSITE(CLK_SPI0, "clk_spi0", mux_gpll_xin24m_p, 0, -+ RV1126_PMU_CLKSEL_CON(9), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RV1126_PMU_CLKGATE_CON(1), 12, GFLAGS), ++ COMPOSITE_NOMUX(CLK_I2S0_2CH_SRC, "clk_i2s0_2ch_src", "gpll", 0, ++ RK3528_CLKSEL_CON(20), 8, 5, DFLAGS, ++ RK3528_CLKGATE_CON(2), 5, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S0_2CH_FRAC, "clk_i2s0_2ch_frac", "clk_i2s0_2ch_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(21), 0, ++ RK3528_CLKGATE_CON(2), 6, GFLAGS, &mclk_i2s0_2ch_sai_src_fracmux), ++ GATE(MCLK_I2S0_2CH_SAI_SRC, "mclk_i2s0_2ch_sai_src", "mclk_i2s0_2ch_sai_src_pre", 0, ++ RK3528_CLKGATE_CON(2), 7, GFLAGS), + -+ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pdpmu", 0, -+ RV1126_PMU_CLKGATE_CON(1), 9, GFLAGS), -+ COMPOSITE_NODIV(DBCLK_GPIO0, "dbclk_gpio0", mux_xin24m_32k_p, 0, -+ RV1126_PMU_CLKSEL_CON(8), 15, 1, MFLAGS, -+ RV1126_PMU_CLKGATE_CON(1), 10, GFLAGS), ++ COMPOSITE_NOMUX(CLK_I2S1_8CH_SRC, "clk_i2s1_8ch_src", "gpll", 0, ++ RK3528_CLKSEL_CON(24), 3, 5, DFLAGS, ++ RK3528_CLKGATE_CON(2), 11, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S1_8CH_FRAC, "clk_i2s1_8ch_frac", "clk_i2s1_8ch_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(25), 0, ++ RK3528_CLKGATE_CON(2), 12, GFLAGS, &mclk_i2s1_8ch_sai_src_fracmux), ++ GATE(MCLK_I2S1_8CH_SAI_SRC, "mclk_i2s1_8ch_sai_src", "mclk_i2s1_8ch_sai_src_pre", 0, ++ RK3528_CLKGATE_CON(2), 13, GFLAGS), + -+ GATE(PCLK_PMUPVTM, "pclk_pmupvtm", "pclk_pdpmu", 0, -+ RV1126_PMU_CLKGATE_CON(2), 6, GFLAGS), -+ GATE(CLK_PMUPVTM, "clk_pmupvtm", "xin24m", 0, -+ RV1126_PMU_CLKGATE_CON(2), 5, GFLAGS), -+ GATE(CLK_CORE_PMUPVTM, "clk_core_pmupvtm", "xin24m", 0, -+ RV1126_PMU_CLKGATE_CON(2), 7, GFLAGS), ++ COMPOSITE_NOMUX(CLK_I2S2_2CH_SRC, "clk_i2s2_2ch_src", "gpll", 0, ++ RK3528_CLKSEL_CON(26), 3, 5, DFLAGS, ++ RK3528_CLKGATE_CON(2), 14, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S2_2CH_FRAC, "clk_i2s2_2ch_frac", "clk_i2s2_2ch_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(27), 0, ++ RK3528_CLKGATE_CON(2), 15, GFLAGS, &mclk_i2s2_2ch_sai_src_fracmux), ++ GATE(MCLK_I2S2_2CH_SAI_SRC, "mclk_i2s2_2ch_sai_src", "mclk_i2s2_2ch_sai_src_pre", 0, ++ RK3528_CLKGATE_CON(3), 0, GFLAGS), + -+ COMPOSITE_NOMUX(CLK_REF12M, "clk_ref12m", "gpll", 0, -+ RV1126_PMU_CLKSEL_CON(7), 8, 7, DFLAGS, -+ RV1126_PMU_CLKGATE_CON(1), 15, GFLAGS), -+ GATE(0, "xin_osc0_usbphyref_otg", "xin24m", 0, -+ RV1126_PMU_CLKGATE_CON(1), 6, GFLAGS), -+ GATE(0, "xin_osc0_usbphyref_host", "xin24m", 0, -+ RV1126_PMU_CLKGATE_CON(1), 7, GFLAGS), -+ FACTOR(0, "xin_osc0_div2_usbphyref_otg", "xin_osc0_usbphyref_otg", 0, 1, 2), -+ FACTOR(0, "xin_osc0_div2_usbphyref_host", "xin_osc0_usbphyref_host", 0, 1, 2), -+ MUX(CLK_USBPHY_OTG_REF, "clk_usbphy_otg_ref", mux_usbphy_otg_ref_p, CLK_SET_RATE_PARENT, -+ RV1126_PMU_CLKSEL_CON(7), 6, 1, MFLAGS), -+ MUX(CLK_USBPHY_HOST_REF, "clk_usbphy_host_ref", mux_usbphy_host_ref_p, CLK_SET_RATE_PARENT, -+ RV1126_PMU_CLKSEL_CON(7), 7, 1, MFLAGS), ++ COMPOSITE_NOMUX(CLK_I2S3_8CH_SRC, "clk_i2s3_8ch_src", "gpll", 0, ++ RK3528_CLKSEL_CON(22), 3, 5, DFLAGS, ++ RK3528_CLKGATE_CON(2), 8, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S3_8CH_FRAC, "clk_i2s3_8ch_frac", "clk_i2s3_8ch_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(23), 0, ++ RK3528_CLKGATE_CON(2), 9, GFLAGS, &mclk_i2s3_8ch_sai_src_fracmux), ++ GATE(MCLK_I2S3_8CH_SAI_SRC, "mclk_i2s3_8ch_sai_src", "mclk_i2s3_8ch_sai_src_pre", 0, ++ RK3528_CLKGATE_CON(2), 10, GFLAGS), + -+ COMPOSITE_NOMUX(CLK_REF24M, "clk_ref24m", "gpll", 0, -+ RV1126_PMU_CLKSEL_CON(7), 0, 6, DFLAGS, -+ RV1126_PMU_CLKGATE_CON(1), 14, GFLAGS), -+ GATE(0, "xin_osc0_mipiphyref", "xin24m", 0, -+ RV1126_PMU_CLKGATE_CON(1), 8, GFLAGS), -+ MUX(CLK_MIPIDSIPHY_REF, "clk_mipidsiphy_ref", mux_mipidsiphy_ref_p, CLK_SET_RATE_PARENT, -+ RV1126_PMU_CLKSEL_CON(7), 15, 1, MFLAGS), ++ COMPOSITE_NOMUX(CLK_SPDIF_SRC, "clk_spdif_src", "gpll", 0, ++ RK3528_CLKSEL_CON(30), 2, 5, DFLAGS, ++ RK3528_CLKGATE_CON(3), 4, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_SPDIF_FRAC, "clk_spdif_frac", "clk_spdif_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(31), 0, ++ RK3528_CLKGATE_CON(3), 5, GFLAGS, &mclk_spdif_src_fracmux), ++ GATE(MCLK_SPDIF_SRC, "mclk_spdif_src", "mclk_spdif_src_pre", 0, ++ RK3528_CLKGATE_CON(3), 6, GFLAGS), + -+#ifndef CONFIG_ROCKCHIP_LOW_PERFORMANCE -+ GATE(CLK_PMU, "clk_pmu", "xin24m", CLK_IGNORE_UNUSED, -+ RV1126_PMU_CLKGATE_CON(0), 15, GFLAGS), ++ /* bus */ ++ COMPOSITE_NODIV(ACLK_BUS_M_ROOT, "aclk_bus_m_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(43), 12, 2, MFLAGS, ++ RK3528_CLKGATE_CON(8), 7, GFLAGS), ++ GATE(ACLK_GIC, "aclk_gic", "aclk_bus_m_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(9), 1, GFLAGS), + -+ GATE(PCLK_PMUSGRF, "pclk_pmusgrf", "pclk_pdpmu", CLK_IGNORE_UNUSED, -+ RV1126_PMU_CLKGATE_CON(0), 4, GFLAGS), -+ GATE(PCLK_PMUGRF, "pclk_pmugrf", "pclk_pdpmu", CLK_IGNORE_UNUSED, -+ RV1126_PMU_CLKGATE_CON(1), 13, GFLAGS), -+ GATE(PCLK_PMUCRU, "pclk_pmucru", "pclk_pdpmu", CLK_IGNORE_UNUSED, -+ RV1126_PMU_CLKGATE_CON(2), 4, GFLAGS), -+ GATE(PCLK_CHIPVEROTP, "pclk_chipverotp", "pclk_pdpmu", CLK_IGNORE_UNUSED, -+ RV1126_PMU_CLKGATE_CON(2), 0, GFLAGS), -+ GATE(PCLK_PDPMU_NIU, "pclk_pdpmu_niu", "pclk_pdpmu", CLK_IGNORE_UNUSED, -+ RV1126_PMU_CLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE_NODIV(ACLK_BUS_ROOT, "aclk_bus_root", mux_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(43), 6, 2, MFLAGS, ++ RK3528_CLKGATE_CON(8), 4, GFLAGS), ++ GATE(ACLK_SPINLOCK, "aclk_spinlock", "aclk_bus_root", 0, ++ RK3528_CLKGATE_CON(9), 2, GFLAGS), ++ GATE(ACLK_DMAC, "aclk_dmac", "aclk_bus_root", 0, ++ RK3528_CLKGATE_CON(9), 4, GFLAGS), ++ GATE(ACLK_DCF, "aclk_dcf", "aclk_bus_root", 0, ++ RK3528_CLKGATE_CON(11), 11, GFLAGS), ++ COMPOSITE(ACLK_BUS_VOPGL_ROOT, "aclk_bus_vopgl_root", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(43), 3, 1, MFLAGS, 0, 3, DFLAGS, ++ RK3528_CLKGATE_CON(8), 0, GFLAGS), ++ COMPOSITE_NODIV(ACLK_BUS_H_ROOT, "aclk_bus_h_root", mux_500m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(43), 4, 2, MFLAGS, ++ RK3528_CLKGATE_CON(8), 2, GFLAGS), ++ GATE(ACLK_DMA2DDR, "aclk_dma2ddr", "aclk_bus_h_root", 0, ++ RK3528_CLKGATE_CON(10), 14, GFLAGS), + -+ GATE(PCLK_SCRKEYGEN, "pclk_scrkeygen", "pclk_pdpmu", 0, -+ RV1126_PMU_CLKGATE_CON(0), 7, GFLAGS), -+#endif -+}; ++ COMPOSITE_NODIV(HCLK_BUS_ROOT, "hclk_bus_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(43), 8, 2, MFLAGS, ++ RK3528_CLKGATE_CON(8), 5, GFLAGS), + -+static struct rockchip_clk_branch rv1126_clk_branches[] __initdata = { -+ /* -+ * Clock-Architecture Diagram 1 -+ */ -+ MUX(USB480M, "usb480m", mux_usb480m_p, CLK_SET_RATE_PARENT, -+ RV1126_MODE_CON, 10, 2, MFLAGS), -+ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), ++ COMPOSITE_NODIV(PCLK_BUS_ROOT, "pclk_bus_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(43), 10, 2, MFLAGS, ++ RK3528_CLKGATE_CON(8), 6, GFLAGS), ++ GATE(PCLK_DFT2APB, "pclk_dft2apb", "pclk_bus_root", 0, ++ RK3528_CLKGATE_CON(8), 13, GFLAGS), ++ GATE(PCLK_BUS_GRF, "pclk_bus_grf", "pclk_bus_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(8), 15, GFLAGS), ++ GATE(PCLK_TIMER, "pclk_timer", "pclk_bus_root", 0, ++ RK3528_CLKGATE_CON(9), 5, GFLAGS), ++ GATE(PCLK_JDBCK_DAP, "pclk_jdbck_dap", "pclk_bus_root", 0, ++ RK3528_CLKGATE_CON(9), 12, GFLAGS), ++ GATE(PCLK_WDT_NS, "pclk_wdt_ns", "pclk_bus_root", 0, ++ RK3528_CLKGATE_CON(9), 15, GFLAGS), ++ GATE(PCLK_UART0, "pclk_uart0", "pclk_bus_root", 0, ++ RK3528_CLKGATE_CON(10), 7, GFLAGS), ++ GATE(PCLK_PWM0, "pclk_pwm0", "pclk_bus_root", 0, ++ RK3528_CLKGATE_CON(11), 4, GFLAGS), ++ GATE(PCLK_PWM1, "pclk_pwm1", "pclk_bus_root", 0, ++ RK3528_CLKGATE_CON(11), 7, GFLAGS), ++ GATE(PCLK_DMA2DDR, "pclk_dma2ddr", "pclk_bus_root", 0, ++ RK3528_CLKGATE_CON(10), 13, GFLAGS), ++ GATE(PCLK_SCR, "pclk_scr", "pclk_bus_root", 0, ++ RK3528_CLKGATE_CON(11), 10, GFLAGS), ++ GATE(PCLK_INTMUX, "pclk_intmux", "pclk_bus_root", CLK_IGNORE_UNUSED, ++ RK3528_CLKGATE_CON(11), 12, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 3 -+ */ -+ /* PD_CORE */ -+ COMPOSITE_NOMUX(0, "pclk_dbg", "armclk", CLK_IS_CRITICAL, -+ RV1126_CLKSEL_CON(1), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RV1126_CLKGATE_CON(0), 6, GFLAGS), -+ GATE(CLK_CORE_CPUPVTM, "clk_core_cpupvtm", "armclk", 0, -+ RV1126_CLKGATE_CON(0), 12, GFLAGS), -+ GATE(PCLK_CPUPVTM, "pclk_cpupvtm", "pclk_dbg", 0, -+ RV1126_CLKGATE_CON(0), 10, GFLAGS), -+ GATE(CLK_CPUPVTM, "clk_cpupvtm", "xin24m", 0, -+ RV1126_CLKGATE_CON(0), 11, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_PDCORE_NIU, "hclk_pdcore_niu", "gpll", CLK_IGNORE_UNUSED, -+ RV1126_CLKSEL_CON(0), 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(0), 8, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM0, "clk_pwm0", mux_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(44), 6, 2, MFLAGS, ++ RK3528_CLKGATE_CON(11), 5, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM1, "clk_pwm1", mux_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(44), 8, 2, MFLAGS, ++ RK3528_CLKGATE_CON(11), 8, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 4 -+ */ -+ /* PD_BUS */ -+ COMPOSITE(0, "aclk_pdbus_pre", mux_gpll_cpll_dpll_p, CLK_IS_CRITICAL, -+ RV1126_CLKSEL_CON(2), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(2), 0, GFLAGS), -+ GATE(ACLK_PDBUS, "aclk_pdbus", "aclk_pdbus_pre", CLK_IS_CRITICAL, -+ RV1126_CLKGATE_CON(2), 11, GFLAGS), -+ COMPOSITE(0, "hclk_pdbus_pre", mux_hclk_pclk_pdbus_p, CLK_IS_CRITICAL, -+ RV1126_CLKSEL_CON(2), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(2), 1, GFLAGS), -+ GATE(HCLK_PDBUS, "hclk_pdbus", "hclk_pdbus_pre", CLK_IS_CRITICAL, -+ RV1126_CLKGATE_CON(2), 12, GFLAGS), -+ COMPOSITE(0, "pclk_pdbus_pre", mux_hclk_pclk_pdbus_p, CLK_IS_CRITICAL, -+ RV1126_CLKSEL_CON(3), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(2), 2, GFLAGS), -+ GATE(PCLK_PDBUS, "pclk_pdbus", "pclk_pdbus_pre", CLK_IS_CRITICAL, -+ RV1126_CLKGATE_CON(2), 13, GFLAGS), -+ /* aclk_dmac is controlled by sgrf_clkgat_con. */ -+ SGRF_GATE(ACLK_DMAC, "aclk_dmac", "hclk_pdbus"), -+ GATE(ACLK_DCF, "aclk_dcf", "hclk_pdbus", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(3), 6, GFLAGS), -+ GATE(PCLK_DCF, "pclk_dcf", "pclk_pdbus", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(3), 7, GFLAGS), -+ GATE(PCLK_WDT, "pclk_wdt", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(6), 14, GFLAGS), -+ GATE(PCLK_MAILBOX, "pclk_mailbox", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(7), 10, GFLAGS), ++ GATE(CLK_CAPTURE_PWM1, "clk_capture_pwm1", "xin24m", 0, ++ RK3528_CLKGATE_CON(11), 9, GFLAGS), ++ GATE(CLK_CAPTURE_PWM0, "clk_capture_pwm0", "xin24m", 0, ++ RK3528_CLKGATE_CON(11), 6, GFLAGS), ++ GATE(CLK_JDBCK_DAP, "clk_jdbck_dap", "xin24m", 0, ++ RK3528_CLKGATE_CON(9), 13, GFLAGS), ++ GATE(TCLK_WDT_NS, "tclk_wdt_ns", "xin24m", 0, ++ RK3528_CLKGATE_CON(10), 0, GFLAGS), + -+ COMPOSITE(CLK_SCR1, "clk_scr1", mux_gpll_cpll_p, 0, -+ RV1126_CLKSEL_CON(3), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(4), 7, GFLAGS), -+ GATE(0, "clk_scr1_niu", "clk_scr1", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(2), 14, GFLAGS), -+ GATE(CLK_SCR1_CORE, "clk_scr1_core", "clk_scr1", 0, -+ RV1126_CLKGATE_CON(4), 8, GFLAGS), -+ GATE(CLK_SCR1_RTC, "clk_scr1_rtc", "xin24m", 0, -+ RV1126_CLKGATE_CON(4), 9, GFLAGS), -+ GATE(CLK_SCR1_JTAG, "clk_scr1_jtag", "clk_scr1_jtag_io", 0, -+ RV1126_CLKGATE_CON(4), 10, GFLAGS), ++ GATE(CLK_TIMER_ROOT, "clk_timer_root", "xin24m", 0, ++ RK3528_CLKGATE_CON(8), 9, GFLAGS), ++ GATE(CLK_TIMER0, "clk_timer0", "clk_timer_root", 0, ++ RK3528_CLKGATE_CON(9), 6, GFLAGS), ++ GATE(CLK_TIMER1, "clk_timer1", "clk_timer_root", 0, ++ RK3528_CLKGATE_CON(9), 7, GFLAGS), ++ GATE(CLK_TIMER2, "clk_timer2", "clk_timer_root", 0, ++ RK3528_CLKGATE_CON(9), 8, GFLAGS), ++ GATE(CLK_TIMER3, "clk_timer3", "clk_timer_root", 0, ++ RK3528_CLKGATE_CON(9), 9, GFLAGS), ++ GATE(CLK_TIMER4, "clk_timer4", "clk_timer_root", 0, ++ RK3528_CLKGATE_CON(9), 10, GFLAGS), ++ GATE(CLK_TIMER5, "clk_timer5", "clk_timer_root", 0, ++ RK3528_CLKGATE_CON(9), 11, GFLAGS), + -+ GATE(PCLK_UART0, "pclk_uart0", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(5), 0, GFLAGS), -+ COMPOSITE(SCLK_UART0_DIV, "sclk_uart0_div", mux_gpll_cpll_usb480m_xin24m_p, 0, -+ RV1126_CLKSEL_CON(10), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RV1126_CLKGATE_CON(5), 1, GFLAGS), -+ COMPOSITE_FRACMUX(SCLK_UART0_FRAC, "sclk_uart0_frac", "sclk_uart0_div", CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(11), 0, -+ RV1126_CLKGATE_CON(5), 2, GFLAGS, -+ &rv1126_uart0_fracmux), -+ GATE(SCLK_UART0, "sclk_uart0", "sclk_uart0_mux", 0, -+ RV1126_CLKGATE_CON(5), 3, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(5), 4, GFLAGS), -+ COMPOSITE(SCLK_UART2_DIV, "sclk_uart2_div", mux_gpll_cpll_usb480m_xin24m_p, 0, -+ RV1126_CLKSEL_CON(12), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RV1126_CLKGATE_CON(5), 5, GFLAGS), -+ COMPOSITE_FRACMUX(SCLK_UART2_FRAC, "sclk_uart2_frac", "sclk_uart2_div", CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(13), 0, -+ RV1126_CLKGATE_CON(5), 6, GFLAGS, -+ &rv1126_uart2_fracmux), -+ GATE(SCLK_UART2, "sclk_uart2", "sclk_uart2_mux", 0, -+ RV1126_CLKGATE_CON(5), 7, GFLAGS), -+ GATE(PCLK_UART3, "pclk_uart3", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(5), 8, GFLAGS), -+ COMPOSITE(SCLK_UART3_DIV, "sclk_uart3_div", mux_gpll_cpll_usb480m_xin24m_p, 0, -+ RV1126_CLKSEL_CON(14), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RV1126_CLKGATE_CON(5), 9, GFLAGS), -+ COMPOSITE_FRACMUX(SCLK_UART3_FRAC, "sclk_uart3_frac", "sclk_uart3_div", CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(15), 0, -+ RV1126_CLKGATE_CON(5), 10, GFLAGS, -+ &rv1126_uart3_fracmux), -+ GATE(SCLK_UART3, "sclk_uart3", "sclk_uart3_mux", 0, -+ RV1126_CLKGATE_CON(5), 11, GFLAGS), -+ GATE(PCLK_UART4, "pclk_uart4", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(5), 12, GFLAGS), -+ COMPOSITE(SCLK_UART4_DIV, "sclk_uart4_div", mux_gpll_cpll_usb480m_xin24m_p, 0, -+ RV1126_CLKSEL_CON(16), 8, 2, MFLAGS, 0, 7, -+ DFLAGS, RV1126_CLKGATE_CON(5), 13, GFLAGS), -+ COMPOSITE_FRACMUX(SCLK_UART4_FRAC, "sclk_uart4_frac", "sclk_uart4_div", CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(17), 0, -+ RV1126_CLKGATE_CON(5), 14, GFLAGS, -+ &rv1126_uart4_fracmux), -+ GATE(SCLK_UART4, "sclk_uart4", "sclk_uart4_mux", 0, -+ RV1126_CLKGATE_CON(5), 15, GFLAGS), -+ GATE(PCLK_UART5, "pclk_uart5", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(6), 0, GFLAGS), -+ COMPOSITE(SCLK_UART5_DIV, "sclk_uart5_div", mux_gpll_cpll_usb480m_xin24m_p, 0, -+ RV1126_CLKSEL_CON(18), 8, 2, MFLAGS, 0, 7, -+ DFLAGS, RV1126_CLKGATE_CON(6), 1, GFLAGS), -+ COMPOSITE_FRACMUX(SCLK_UART5_FRAC, "sclk_uart5_frac", "sclk_uart5_div", CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(19), 0, -+ RV1126_CLKGATE_CON(6), 2, GFLAGS, -+ &rv1126_uart5_fracmux), -+ GATE(SCLK_UART5, "sclk_uart5", "sclk_uart5_mux", 0, -+ RV1126_CLKGATE_CON(6), 3, GFLAGS), ++ /* pmu */ ++ GATE(HCLK_PMU_ROOT, "hclk_pmu_root", "clk_100m_src", CLK_IGNORE_UNUSED, ++ RK3528_PMU_CLKGATE_CON(0), 1, GFLAGS), ++ GATE(PCLK_PMU_ROOT, "pclk_pmu_root", "clk_100m_src", CLK_IGNORE_UNUSED, ++ RK3528_PMU_CLKGATE_CON(0), 0, GFLAGS), + -+ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(3), 10, GFLAGS), -+ COMPOSITE_NOMUX(CLK_I2C1, "clk_i2c1", "gpll", 0, -+ RV1126_CLKSEL_CON(5), 0, 7, DFLAGS, -+ RV1126_CLKGATE_CON(3), 11, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(3), 12, GFLAGS), -+ COMPOSITE_NOMUX(CLK_I2C3, "clk_i2c3", "gpll", 0, -+ RV1126_CLKSEL_CON(5), 8, 7, DFLAGS, -+ RV1126_CLKGATE_CON(3), 13, GFLAGS), -+ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(3), 14, GFLAGS), -+ COMPOSITE_NOMUX(CLK_I2C4, "clk_i2c4", "gpll", 0, -+ RV1126_CLKSEL_CON(6), 0, 7, DFLAGS, -+ RV1126_CLKGATE_CON(3), 15, GFLAGS), -+ GATE(PCLK_I2C5, "pclk_i2c5", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(4), 0, GFLAGS), -+ COMPOSITE_NOMUX(CLK_I2C5, "clk_i2c5", "gpll", 0, -+ RV1126_CLKSEL_CON(6), 8, 7, DFLAGS, -+ RV1126_CLKGATE_CON(4), 1, GFLAGS), ++ GATE(FCLK_MCU, "fclk_mcu", "hclk_pmu_root", 0, ++ RK3528_PMU_CLKGATE_CON(0), 7, GFLAGS), ++ GATE(HCLK_PMU_SRAM, "hclk_pmu_sram", "hclk_pmu_root", CLK_IS_CRITICAL, ++ RK3528_PMU_CLKGATE_CON(5), 4, GFLAGS), + -+ GATE(PCLK_SPI1, "pclk_spi1", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(4), 2, GFLAGS), -+ COMPOSITE(CLK_SPI1, "clk_spi1", mux_gpll_xin24m_p, 0, -+ RV1126_CLKSEL_CON(8), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RV1126_CLKGATE_CON(4), 3, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_pmu_root", 0, ++ RK3528_PMU_CLKGATE_CON(0), 2, GFLAGS), ++ GATE(PCLK_PMU_HP_TIMER, "pclk_pmu_hp_timer", "pclk_pmu_root", 0, ++ RK3528_PMU_CLKGATE_CON(1), 2, GFLAGS), ++ GATE(PCLK_PMU_IOC, "pclk_pmu_ioc", "pclk_pmu_root", CLK_IS_CRITICAL, ++ RK3528_PMU_CLKGATE_CON(1), 5, GFLAGS), ++ GATE(PCLK_PMU_CRU, "pclk_pmu_cru", "pclk_pmu_root", CLK_IS_CRITICAL, ++ RK3528_PMU_CLKGATE_CON(1), 6, GFLAGS), ++ GATE(PCLK_PMU_GRF, "pclk_pmu_grf", "pclk_pmu_root", CLK_IS_CRITICAL, ++ RK3528_PMU_CLKGATE_CON(1), 7, GFLAGS), ++ GATE(PCLK_PMU_WDT, "pclk_pmu_wdt", "pclk_pmu_root", 0, ++ RK3528_PMU_CLKGATE_CON(1), 10, GFLAGS), ++ GATE(PCLK_PMU, "pclk_pmu", "pclk_pmu_root", CLK_IS_CRITICAL, ++ RK3528_PMU_CLKGATE_CON(0), 13, GFLAGS), ++ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pmu_root", 0, ++ RK3528_PMU_CLKGATE_CON(0), 14, GFLAGS), ++ GATE(PCLK_OSCCHK, "pclk_oscchk", "pclk_pmu_root", 0, ++ RK3528_PMU_CLKGATE_CON(0), 9, GFLAGS), ++ GATE(PCLK_PMU_MAILBOX, "pclk_pmu_mailbox", "pclk_pmu_root", 0, ++ RK3528_PMU_CLKGATE_CON(1), 12, GFLAGS), ++ GATE(PCLK_SCRKEYGEN, "pclk_scrkeygen", "pclk_pmu_root", 0, ++ RK3528_PMU_CLKGATE_CON(1), 15, GFLAGS), ++ GATE(PCLK_PVTM_PMU, "pclk_pvtm_pmu", "pclk_pmu_root", 0, ++ RK3528_PMU_CLKGATE_CON(5), 1, GFLAGS), + -+ GATE(CLK_CAPTURE_PWM2, "clk_capture_pwm2", "xin24m", 0, -+ RV1126_CLKGATE_CON(4), 6, GFLAGS), -+ GATE(PCLK_PWM2, "pclk_pwm2", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(4), 4, GFLAGS), -+ COMPOSITE(CLK_PWM2, "clk_pwm2", mux_xin24m_gpll_p, 0, -+ RV1126_CLKSEL_CON(9), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RV1126_CLKGATE_CON(4), 5, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C2, "clk_i2c2", clk_i2c2_p, 0, ++ RK3528_PMU_CLKSEL_CON(0), 0, 2, MFLAGS, ++ RK3528_PMU_CLKGATE_CON(0), 3, GFLAGS), + -+ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(7), 0, GFLAGS), -+ COMPOSITE_NODIV(DBCLK_GPIO1, "dbclk_gpio1", mux_xin24m_32k_p, 0, -+ RV1126_CLKSEL_CON(21), 15, 1, MFLAGS, -+ RV1126_CLKGATE_CON(7), 1, GFLAGS), -+ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(7), 2, GFLAGS), -+ COMPOSITE_NODIV(DBCLK_GPIO2, "dbclk_gpio2", mux_xin24m_32k_p, 0, -+ RV1126_CLKSEL_CON(22), 15, 1, MFLAGS, -+ RV1126_CLKGATE_CON(7), 3, GFLAGS), -+ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(7), 4, GFLAGS), -+ COMPOSITE_NODIV(DBCLK_GPIO3, "dbclk_gpio3", mux_xin24m_32k_p, 0, -+ RV1126_CLKSEL_CON(23), 15, 1, MFLAGS, -+ RV1126_CLKGATE_CON(7), 5, GFLAGS), -+ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(7), 6, GFLAGS), -+ COMPOSITE_NODIV(DBCLK_GPIO4, "dbclk_gpio4", mux_xin24m_32k_p, 0, -+ RV1126_CLKSEL_CON(24), 15, 1, MFLAGS, -+ RV1126_CLKGATE_CON(7), 7, GFLAGS), ++ GATE(CLK_REFOUT, "clk_refout", "xin24m", 0, ++ RK3528_PMU_CLKGATE_CON(2), 4, GFLAGS), ++ COMPOSITE_NOMUX(CLK_PVTM_PMU, "clk_pvtm_pmu", "xin24m", 0, ++ RK3528_PMU_CLKSEL_CON(5), 0, 5, DFLAGS, ++ RK3528_PMU_CLKGATE_CON(5), 0, GFLAGS), + -+ GATE(PCLK_SARADC, "pclk_saradc", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(6), 4, GFLAGS), -+ COMPOSITE_NOMUX(CLK_SARADC, "clk_saradc", "xin24m", 0, -+ RV1126_CLKSEL_CON(20), 0, 11, DFLAGS, -+ RV1126_CLKGATE_CON(6), 5, GFLAGS), ++ COMPOSITE_FRAC(XIN_OSC0_DIV, "xin_osc0_div", "xin24m", 0, ++ RK3528_PMU_CLKSEL_CON(1), 0, ++ RK3528_PMU_CLKGATE_CON(1), 0, GFLAGS), ++ /* clk_32k: internal! No path from external osc 32k */ ++ MUX(CLK_DEEPSLOW, "clk_32k", clk_32k_p, CLK_IS_CRITICAL, ++ RK3528_PMU_CLKSEL_CON(2), 0, 1, MFLAGS), ++ GATE(RTC_CLK_MCU, "rtc_clk_mcu", "clk_32k", 0, ++ RK3528_PMU_CLKGATE_CON(0), 8, GFLAGS), ++ GATE(CLK_DDR_FAIL_SAFE, "clk_ddr_fail_safe", "xin24m", CLK_IGNORE_UNUSED, ++ RK3528_PMU_CLKGATE_CON(1), 1, GFLAGS), + -+ GATE(PCLK_TIMER, "pclk_timer", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(6), 7, GFLAGS), -+ GATE(CLK_TIMER0, "clk_timer0", "xin24m", 0, -+ RV1126_CLKGATE_CON(6), 8, GFLAGS), -+ GATE(CLK_TIMER1, "clk_timer1", "xin24m", 0, -+ RV1126_CLKGATE_CON(6), 9, GFLAGS), -+ GATE(CLK_TIMER2, "clk_timer2", "xin24m", 0, -+ RV1126_CLKGATE_CON(6), 10, GFLAGS), -+ GATE(CLK_TIMER3, "clk_timer3", "xin24m", 0, -+ RV1126_CLKGATE_CON(6), 11, GFLAGS), -+ GATE(CLK_TIMER4, "clk_timer4", "xin24m", 0, -+ RV1126_CLKGATE_CON(6), 12, GFLAGS), -+ GATE(CLK_TIMER5, "clk_timer5", "xin24m", 0, -+ RV1126_CLKGATE_CON(6), 13, GFLAGS), ++ COMPOSITE_NODIV(DBCLK_GPIO0, "dbclk_gpio0", mux_24m_32k_p, 0, ++ RK3528_PMU_CLKSEL_CON(0), 2, 1, MFLAGS, ++ RK3528_PMU_CLKGATE_CON(0), 15, GFLAGS), ++ COMPOSITE_NODIV(TCLK_PMU_WDT, "tclk_pmu_wdt", mux_24m_32k_p, 0, ++ RK3528_PMU_CLKSEL_CON(2), 1, 1, MFLAGS, ++ RK3528_PMU_CLKGATE_CON(1), 11, GFLAGS), + -+ GATE(ACLK_SPINLOCK, "aclk_spinlock", "hclk_pdbus", 0, -+ RV1126_CLKGATE_CON(6), 6, GFLAGS), ++ /* core */ ++ COMPOSITE_NOMUX(ACLK_M_CORE_BIU, "aclk_m_core", "armclk", CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(39), 11, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3528_CLKGATE_CON(5), 12, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_DBG, "pclk_dbg", "armclk", CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(40), 1, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3528_CLKGATE_CON(5), 13, GFLAGS), ++ GATE(PCLK_CPU_ROOT, "pclk_cpu_root", "pclk_dbg", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(6), 1, GFLAGS), ++ GATE(PCLK_CORE_GRF, "pclk_core_grf", "pclk_cpu_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(6), 2, GFLAGS), + -+ GATE(ACLK_DECOM, "aclk_decom", "aclk_pdbus", 0, -+ RV1126_CLKGATE_CON(7), 11, GFLAGS), -+ GATE(PCLK_DECOM, "pclk_decom", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(7), 12, GFLAGS), -+ COMPOSITE(DCLK_DECOM, "dclk_decom", mux_gpll_cpll_p, 0, -+ RV1126_CLKSEL_CON(25), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RV1126_CLKGATE_CON(7), 13, GFLAGS), ++ /* ddr */ ++ GATE(CLK_DDRC_SRC, "clk_ddrc_src", "dpll", CLK_IS_CRITICAL, ++ RK3528_DDRPHY_CLKGATE_CON(0), 0, GFLAGS), ++ GATE(CLK_DDR_PHY, "clk_ddr_phy", "dpll", CLK_IS_CRITICAL, ++ RK3528_DDRPHY_CLKGATE_CON(0), 1, GFLAGS), + -+ GATE(PCLK_CAN, "pclk_can", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(7), 8, GFLAGS), -+ COMPOSITE(CLK_CAN, "clk_can", mux_gpll_xin24m_p, 0, -+ RV1126_CLKSEL_CON(25), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RV1126_CLKGATE_CON(7), 9, GFLAGS), -+ /* pclk_otp and clk_otp are controlled by sgrf_clkgat_con. */ -+ SGRF_GATE(CLK_OTP, "clk_otp", "xin24m"), -+ SGRF_GATE(PCLK_OTP, "pclk_otp", "pclk_pdbus"), ++ COMPOSITE_NODIV(PCLK_DDR_ROOT, "pclk_ddr_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(90), 0, 2, MFLAGS, ++ RK3528_CLKGATE_CON(45), 0, GFLAGS), ++ GATE(PCLK_DDRMON, "pclk_ddrmon", "pclk_ddr_root", CLK_IGNORE_UNUSED, ++ RK3528_CLKGATE_CON(45), 3, GFLAGS), ++ GATE(PCLK_DDR_HWLP, "pclk_ddr_hwlp", "pclk_ddr_root", CLK_IGNORE_UNUSED, ++ RK3528_CLKGATE_CON(45), 8, GFLAGS), ++ GATE(CLK_TIMER_DDRMON, "clk_timer_ddrmon", "xin24m", CLK_IGNORE_UNUSED, ++ RK3528_CLKGATE_CON(45), 4, GFLAGS), + -+ GATE(PCLK_NPU_TSADC, "pclk_npu_tsadc", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(24), 3, GFLAGS), -+ COMPOSITE_NOMUX(CLK_NPU_TSADC, "clk_npu_tsadc", "xin24m", 0, -+ RV1126_CLKSEL_CON(71), 0, 11, DFLAGS, -+ RV1126_CLKGATE_CON(24), 4, GFLAGS), -+ GATE(CLK_NPU_TSADCPHY, "clk_npu_tsadcphy", "clk_npu_tsadc", 0, -+ RV1126_CLKGATE_CON(24), 5, GFLAGS), -+ GATE(PCLK_CPU_TSADC, "pclk_cpu_tsadc", "pclk_pdbus", 0, -+ RV1126_CLKGATE_CON(24), 0, GFLAGS), -+ COMPOSITE_NOMUX(CLK_CPU_TSADC, "clk_cpu_tsadc", "xin24m", 0, -+ RV1126_CLKSEL_CON(70), 0, 11, DFLAGS, -+ RV1126_CLKGATE_CON(24), 1, GFLAGS), -+ GATE(CLK_CPU_TSADCPHY, "clk_cpu_tsadcphy", "clk_cpu_tsadc", 0, -+ RV1126_CLKGATE_CON(24), 2, GFLAGS), ++ GATE(PCLK_DDRC, "pclk_ddrc", "pclk_ddr_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(45), 2, GFLAGS), ++ GATE(PCLK_DDR_GRF, "pclk_ddr_grf", "pclk_ddr_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(45), 6, GFLAGS), ++ GATE(PCLK_DDRPHY, "pclk_ddrphy", "pclk_ddr_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(45), 9, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 5 -+ */ -+ /* PD_CRYPTO */ -+ COMPOSITE(ACLK_PDCRYPTO, "aclk_pdcrypto", mux_gpll_cpll_p, 0, -+ RV1126_CLKSEL_CON(4), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(4), 11, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_PDCRYPTO, "hclk_pdcrypto", "aclk_pdcrypto", 0, -+ RV1126_CLKSEL_CON(4), 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(4), 12, GFLAGS), -+ GATE(ACLK_CRYPTO, "aclk_crypto", "aclk_pdcrypto", 0, -+ RV1126_CLKGATE_CON(3), 2, GFLAGS), -+ GATE(HCLK_CRYPTO, "hclk_crypto", "hclk_pdcrypto", 0, -+ RV1126_CLKGATE_CON(3), 3, GFLAGS), -+ COMPOSITE(CLK_CRYPTO_CORE, "aclk_crypto_core", mux_gpll_cpll_p, 0, -+ RV1126_CLKSEL_CON(7), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(3), 4, GFLAGS), -+ COMPOSITE(CLK_CRYPTO_PKA, "aclk_crypto_pka", mux_gpll_cpll_p, 0, -+ RV1126_CLKSEL_CON(7), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(3), 5, GFLAGS), ++ GATE(ACLK_DDR_UPCTL, "aclk_ddr_upctl", "clk_ddrc_src", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(45), 11, GFLAGS), ++ GATE(CLK_DDR_UPCTL, "clk_ddr_upctl", "clk_ddrc_src", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(45), 12, GFLAGS), ++ GATE(CLK_DDRMON, "clk_ddrmon", "clk_ddrc_src", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(45), 13, GFLAGS), ++ GATE(ACLK_DDR_SCRAMBLE, "aclk_ddr_scramble", "clk_ddrc_src", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(45), 14, GFLAGS), ++ GATE(ACLK_SPLIT, "aclk_split", "clk_ddrc_src", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(45), 15, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 6 -+ */ -+ /* PD_AUDIO */ -+ COMPOSITE_NOMUX(HCLK_PDAUDIO, "hclk_pdaudio", "gpll", 0, -+ RV1126_CLKSEL_CON(26), 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(9), 0, GFLAGS), ++ /* gpu */ ++ COMPOSITE_NODIV(ACLK_GPU_ROOT, "aclk_gpu_root", mux_500m_300m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(76), 0, 2, MFLAGS, ++ RK3528_CLKGATE_CON(34), 0, GFLAGS), ++ COMPOSITE_NODIV(ACLK_GPU, "aclk_gpu", aclk_gpu_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(76), 6, 1, MFLAGS, ++ RK3528_CLKGATE_CON(34), 7, GFLAGS), ++ GATE(ACLK_GPU_MALI, "aclk_gpu_mali", "aclk_gpu", 0, ++ RK3528_CLKGATE_CON(34), 8, GFLAGS), ++ COMPOSITE_NODIV(PCLK_GPU_ROOT, "pclk_gpu_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(76), 4, 2, MFLAGS, ++ RK3528_CLKGATE_CON(34), 2, GFLAGS), + -+ GATE(HCLK_I2S0, "hclk_i2s0", "hclk_pdaudio", 0, -+ RV1126_CLKGATE_CON(9), 4, GFLAGS), -+ COMPOSITE(MCLK_I2S0_TX_DIV, "mclk_i2s0_tx_div", mux_cpll_gpll_p, 0, -+ RV1126_CLKSEL_CON(27), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RV1126_CLKGATE_CON(9), 5, GFLAGS), -+ COMPOSITE_FRACMUX(MCLK_I2S0_TX_FRACDIV, "mclk_i2s0_tx_fracdiv", "mclk_i2s0_tx_div", CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(28), 0, -+ RV1126_CLKGATE_CON(9), 6, GFLAGS, -+ &rv1126_i2s0_tx_fracmux), -+ GATE(MCLK_I2S0_TX, "mclk_i2s0_tx", "mclk_i2s0_tx_mux", 0, -+ RV1126_CLKGATE_CON(9), 9, GFLAGS), -+ COMPOSITE(MCLK_I2S0_RX_DIV, "mclk_i2s0_rx_div", mux_cpll_gpll_p, 0, -+ RV1126_CLKSEL_CON(27), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RV1126_CLKGATE_CON(9), 7, GFLAGS), -+ COMPOSITE_FRACMUX(MCLK_I2S0_RX_FRACDIV, "mclk_i2s0_rx_fracdiv", "mclk_i2s0_rx_div", CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(29), 0, -+ RV1126_CLKGATE_CON(9), 8, GFLAGS, -+ &rv1126_i2s0_rx_fracmux), -+ GATE(MCLK_I2S0_RX, "mclk_i2s0_rx", "mclk_i2s0_rx_mux", 0, -+ RV1126_CLKGATE_CON(9), 10, GFLAGS), -+ COMPOSITE_NODIV(MCLK_I2S0_TX_OUT2IO, "mclk_i2s0_tx_out2io", mux_i2s0_tx_out2io_p, CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(30), 6, 1, MFLAGS, -+ RV1126_CLKGATE_CON(9), 13, GFLAGS), -+ COMPOSITE_NODIV(MCLK_I2S0_RX_OUT2IO, "mclk_i2s0_rx_out2io", mux_i2s0_rx_out2io_p, CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(30), 8, 1, MFLAGS, -+ RV1126_CLKGATE_CON(9), 14, GFLAGS), ++ /* rkvdec */ ++ COMPOSITE_NODIV(ACLK_RKVDEC_ROOT_NDFT, "aclk_rkvdec_root", mux_339m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(88), 6, 2, MFLAGS, ++ RK3528_CLKGATE_CON(44), 3, GFLAGS), ++ COMPOSITE_NODIV(HCLK_RKVDEC_ROOT, "hclk_rkvdec_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(88), 4, 2, MFLAGS, ++ RK3528_CLKGATE_CON(44), 2, GFLAGS), ++ GATE(PCLK_DDRPHY_CRU, "pclk_ddrphy_cru", "hclk_rkvdec_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(44), 4, GFLAGS), ++ GATE(HCLK_RKVDEC, "hclk_rkvdec", "hclk_rkvdec_root", 0, ++ RK3528_CLKGATE_CON(44), 9, GFLAGS), ++ COMPOSITE_NODIV(CLK_HEVC_CA_RKVDEC, "clk_hevc_ca_rkvdec", mux_600m_300m_200m_24m_p, 0, ++ RK3528_CLKSEL_CON(88), 11, 2, MFLAGS, ++ RK3528_CLKGATE_CON(44), 11, GFLAGS), ++ MUX(ACLK_RKVDEC_PVTMUX_ROOT, "aclk_rkvdec_pvtmux_root", aclk_rkvdec_pvtmux_root_p, CLK_IS_CRITICAL | CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(88), 13, 1, MFLAGS), ++ GATE(ACLK_RKVDEC, "aclk_rkvdec", "aclk_rkvdec_pvtmux_root", 0, ++ RK3528_CLKGATE_CON(44), 8, GFLAGS), + -+ GATE(HCLK_I2S1, "hclk_i2s1", "hclk_pdaudio", 0, -+ RV1126_CLKGATE_CON(10), 0, GFLAGS), -+ COMPOSITE(MCLK_I2S1_DIV, "mclk_i2s1_div", mux_cpll_gpll_p, 0, -+ RV1126_CLKSEL_CON(31), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RV1126_CLKGATE_CON(10), 1, GFLAGS), -+ COMPOSITE_FRACMUX(MCLK_I2S1_FRACDIV, "mclk_i2s1_fracdiv", "mclk_i2s1_div", CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(32), 0, -+ RV1126_CLKGATE_CON(10), 2, GFLAGS, -+ &rv1126_i2s1_fracmux), -+ GATE(MCLK_I2S1, "mclk_i2s1", "mclk_i2s1_mux", 0, -+ RV1126_CLKGATE_CON(10), 3, GFLAGS), -+ COMPOSITE_NODIV(MCLK_I2S1_OUT2IO, "mclk_i2s1_out2io", mux_i2s1_out2io_p, CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(31), 12, 1, MFLAGS, -+ RV1126_CLKGATE_CON(10), 4, GFLAGS), -+ GATE(HCLK_I2S2, "hclk_i2s2", "hclk_pdaudio", 0, -+ RV1126_CLKGATE_CON(10), 5, GFLAGS), -+ COMPOSITE(MCLK_I2S2_DIV, "mclk_i2s2_div", mux_cpll_gpll_p, 0, -+ RV1126_CLKSEL_CON(33), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RV1126_CLKGATE_CON(10), 6, GFLAGS), -+ COMPOSITE_FRACMUX(MCLK_I2S2_FRACDIV, "mclk_i2s2_fracdiv", "mclk_i2s2_div", CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(34), 0, -+ RV1126_CLKGATE_CON(10), 7, GFLAGS, -+ &rv1126_i2s2_fracmux), -+ GATE(MCLK_I2S2, "mclk_i2s2", "mclk_i2s2_mux", 0, -+ RV1126_CLKGATE_CON(10), 8, GFLAGS), -+ COMPOSITE_NODIV(MCLK_I2S2_OUT2IO, "mclk_i2s2_out2io", mux_i2s2_out2io_p, CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(33), 10, 1, MFLAGS, -+ RV1126_CLKGATE_CON(10), 9, GFLAGS), -+ -+ GATE(HCLK_PDM, "hclk_pdm", "hclk_pdaudio", 0, -+ RV1126_CLKGATE_CON(10), 10, GFLAGS), -+ COMPOSITE(MCLK_PDM, "mclk_pdm", mux_gpll_cpll_xin24m_p, 0, -+ RV1126_CLKSEL_CON(35), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RV1126_CLKGATE_CON(10), 11, GFLAGS), ++ /* rkvenc */ ++ COMPOSITE_NODIV(ACLK_RKVENC_ROOT, "aclk_rkvenc_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(79), 2, 2, MFLAGS, ++ RK3528_CLKGATE_CON(36), 1, GFLAGS), ++ GATE(ACLK_RKVENC, "aclk_rkvenc", "aclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(36), 7, GFLAGS), + -+ GATE(HCLK_AUDPWM, "hclk_audpwm", "hclk_pdaudio", 0, -+ RV1126_CLKGATE_CON(10), 12, GFLAGS), -+ COMPOSITE(SCLK_ADUPWM_DIV, "sclk_audpwm_div", mux_gpll_cpll_p, 0, -+ RV1126_CLKSEL_CON(36), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RV1126_CLKGATE_CON(10), 13, GFLAGS), -+ COMPOSITE_FRACMUX(SCLK_AUDPWM_FRACDIV, "sclk_audpwm_fracdiv", "sclk_audpwm_div", CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(37), 0, -+ RV1126_CLKGATE_CON(10), 14, GFLAGS, -+ &rv1126_audpwm_fracmux), -+ GATE(SCLK_AUDPWM, "sclk_audpwm", "mclk_audpwm_mux", 0, -+ RV1126_CLKGATE_CON(10), 15, GFLAGS), ++ COMPOSITE_NODIV(PCLK_RKVENC_ROOT, "pclk_rkvenc_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(79), 4, 2, MFLAGS, ++ RK3528_CLKGATE_CON(36), 2, GFLAGS), ++ GATE(PCLK_RKVENC_IOC, "pclk_rkvenc_ioc", "pclk_rkvenc_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(37), 10, GFLAGS), ++ GATE(PCLK_RKVENC_GRF, "pclk_rkvenc_grf", "pclk_rkvenc_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(38), 6, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(36), 11, GFLAGS), ++ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(36), 13, GFLAGS), ++ GATE(PCLK_SPI0, "pclk_spi0", "pclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(37), 2, GFLAGS), ++ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(37), 8, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(38), 2, GFLAGS), ++ GATE(PCLK_UART3, "pclk_uart3", "pclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(38), 4, GFLAGS), ++ GATE(PCLK_CAN0, "pclk_can0", "pclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(38), 7, GFLAGS), ++ GATE(PCLK_CAN1, "pclk_can1", "pclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(38), 9, GFLAGS), + -+ GATE(PCLK_ACDCDIG, "pclk_acdcdig", "hclk_pdaudio", 0, -+ RV1126_CLKGATE_CON(11), 0, GFLAGS), -+ GATE(CLK_ACDCDIG_ADC, "clk_acdcdig_adc", "mclk_i2s0_rx", 0, -+ RV1126_CLKGATE_CON(11), 2, GFLAGS), -+ GATE(CLK_ACDCDIG_DAC, "clk_acdcdig_dac", "mclk_i2s0_tx", 0, -+ RV1126_CLKGATE_CON(11), 3, GFLAGS), -+ COMPOSITE(CLK_ACDCDIG_I2C, "clk_acdcdig_i2c", mux_gpll_xin24m_p, 0, -+ RV1126_CLKSEL_CON(72), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RV1126_CLKGATE_CON(11), 1, GFLAGS), ++ COMPOSITE_NODIV(MCLK_PDM, "mclk_pdm", mux_150m_100m_24m_p, 0, ++ RK3528_CLKSEL_CON(80), 12, 2, MFLAGS, ++ RK3528_CLKGATE_CON(38), 1, GFLAGS), ++ COMPOSITE(CLK_CAN0, "clk_can0", mux_gpll_cpll_p, 0, ++ RK3528_CLKSEL_CON(81), 6, 1, MFLAGS, 0, 6, DFLAGS, ++ RK3528_CLKGATE_CON(38), 8, GFLAGS), ++ COMPOSITE(CLK_CAN1, "clk_can1", mux_gpll_cpll_p, 0, ++ RK3528_CLKSEL_CON(81), 13, 1, MFLAGS, 7, 6, DFLAGS, ++ RK3528_CLKGATE_CON(38), 10, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 7 -+ */ -+ /* PD_VEPU */ -+ COMPOSITE(ACLK_PDVEPU, "aclk_pdvepu", mux_cpll_hpll_gpll_p, 0, -+ RV1126_CLKSEL_CON(40), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(12), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_PDVEPU, "hclk_pdvepu", "aclk_pdvepu", 0, -+ RV1126_CLKSEL_CON(41), 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(12), 2, GFLAGS), -+ GATE(ACLK_VENC, "aclk_venc", "aclk_pdvepu", 0, -+ RV1126_CLKGATE_CON(12), 5, GFLAGS), -+ GATE(HCLK_VENC, "hclk_venc", "hclk_pdvepu", 0, -+ RV1126_CLKGATE_CON(12), 6, GFLAGS), -+ COMPOSITE(CLK_VENC_CORE, "clk_venc_core", mux_cpll_gpll_hpll_p, 0, -+ RV1126_CLKSEL_CON(40), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(12), 1, GFLAGS), ++ COMPOSITE_NODIV(HCLK_RKVENC_ROOT, "hclk_rkvenc_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(79), 0, 2, MFLAGS, ++ RK3528_CLKGATE_CON(36), 0, GFLAGS), ++ GATE(HCLK_SAI_I2S1, "hclk_sai_i2s1", "hclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(36), 9, GFLAGS), ++ GATE(HCLK_SPDIF, "hclk_spdif", "hclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(37), 14, GFLAGS), ++ GATE(HCLK_PDM, "hclk_pdm", "hclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(38), 0, GFLAGS), ++ GATE(HCLK_RKVENC, "hclk_rkvenc", "hclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(36), 6, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 8 -+ */ -+ /* PD_VDPU */ -+#if IS_ENABLED(CONFIG_ROCKCHIP_MPP_VDPU2) || IS_ENABLED(CONFIG_ROCKCHIP_MPP_RKVDEC) -+ COMPOSITE(ACLK_PDVDEC, "aclk_pdvdec", mux_cpll_hpll_gpll_p, CLK_IS_CRITICAL, -+ RV1126_CLKSEL_CON(42), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(13), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_PDVDEC, "hclk_pdvdec", "aclk_pdvdec", CLK_IS_CRITICAL, -+ RV1126_CLKSEL_CON(41), 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(13), 4, GFLAGS), -+ GATE(0, "aclk_pdvdec_niu", "aclk_pdvdec", CLK_IS_CRITICAL, -+ RV1126_CLKGATE_CON(13), 5, GFLAGS), -+ GATE(0, "hclk_pdvdec_niu", "hclk_pdvdec", CLK_IS_CRITICAL, -+ RV1126_CLKGATE_CON(13), 6, GFLAGS), -+ COMPOSITE(ACLK_PDJPEG, "aclk_pdjpeg", mux_cpll_hpll_gpll_p, CLK_IS_CRITICAL, -+ RV1126_CLKSEL_CON(44), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(13), 9, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_PDJPEG, "hclk_pdjpeg", "aclk_pdjpeg", CLK_IS_CRITICAL, -+ RV1126_CLKSEL_CON(44), 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(13), 10, GFLAGS), -+ GATE(0, "aclk_pdjpeg_niu", "aclk_pdjpeg", CLK_IS_CRITICAL, -+ RV1126_CLKGATE_CON(13), 11, GFLAGS), -+ GATE(0, "hclk_pdjpeg_niu", "hclk_pdjpeg", CLK_IS_CRITICAL, -+ RV1126_CLKGATE_CON(13), 12, GFLAGS), -+#else -+ COMPOSITE(ACLK_PDVDEC, "aclk_pdvdec", mux_cpll_hpll_gpll_p, 0, -+ RV1126_CLKSEL_CON(42), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(13), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_PDVDEC, "hclk_pdvdec", "aclk_pdvdec", 0, -+ RV1126_CLKSEL_CON(41), 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(13), 4, GFLAGS), -+ GATE(0, "aclk_pdvdec_niu", "aclk_pdvdec", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(13), 5, GFLAGS), -+ GATE(0, "hclk_pdvdec_niu", "hclk_pdvdec", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(13), 6, GFLAGS), -+ COMPOSITE(ACLK_PDJPEG, "aclk_pdjpeg", mux_cpll_hpll_gpll_p, 0, -+ RV1126_CLKSEL_CON(44), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(13), 9, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_PDJPEG, "hclk_pdjpeg", "aclk_pdjpeg", 0, -+ RV1126_CLKSEL_CON(44), 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(13), 10, GFLAGS), -+ GATE(0, "aclk_pdjpeg_niu", "aclk_pdjpeg", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(13), 11, GFLAGS), -+ GATE(0, "hclk_pdjpeg_niu", "hclk_pdjpeg", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(13), 12, GFLAGS), ++ COMPOSITE_NODIV(CLK_CORE_RKVENC, "clk_core_rkvenc", mux_300m_200m_100m_24m_p, 0, ++ RK3528_CLKSEL_CON(79), 6, 2, MFLAGS, ++ RK3528_CLKGATE_CON(36), 8, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C0, "clk_i2c0", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(79), 11, 2, MFLAGS, ++ RK3528_CLKGATE_CON(36), 14, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C1, "clk_i2c1", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(79), 9, 2, MFLAGS, ++ RK3528_CLKGATE_CON(36), 12, GFLAGS), ++#if 0 ++ GATE(SCLK_IN_SPI0, "sclk_in_spi0", "sclk_in_spi0_io", 0, ++ RK3528_CLKGATE_CON(37), 4, GFLAGS), ++ GATE(CLK_UART_JTAG, "clk_uart_jtag", "xin24m", 0, ++ RK3528_CLKGATE_CON(37), 0, GFLAGS), +#endif -+ GATE(ACLK_VDEC, "aclk_vdec", "aclk_pdvdec", 0, -+ RV1126_CLKGATE_CON(13), 7, GFLAGS), -+ GATE(HCLK_VDEC, "hclk_vdec", "hclk_pdvdec", 0, -+ RV1126_CLKGATE_CON(13), 8, GFLAGS), -+ COMPOSITE(CLK_VDEC_CORE, "clk_vdec_core", mux_cpll_hpll_gpll_p, 0, -+ RV1126_CLKSEL_CON(42), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(13), 1, GFLAGS), -+ COMPOSITE(CLK_VDEC_CA, "clk_vdec_ca", mux_cpll_hpll_gpll_p, 0, -+ RV1126_CLKSEL_CON(43), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(13), 2, GFLAGS), -+ COMPOSITE(CLK_VDEC_HEVC_CA, "clk_vdec_hevc_ca", mux_cpll_hpll_gpll_p, 0, -+ RV1126_CLKSEL_CON(43), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(13), 3, GFLAGS), -+ GATE(ACLK_JPEG, "aclk_jpeg", "aclk_pdjpeg", 0, -+ RV1126_CLKGATE_CON(13), 13, GFLAGS), -+ GATE(HCLK_JPEG, "hclk_jpeg", "hclk_pdjpeg", 0, -+ RV1126_CLKGATE_CON(13), 14, GFLAGS), -+ -+ /* -+ * Clock-Architecture Diagram 9 -+ */ -+ /* PD_VO */ -+ COMPOSITE(ACLK_PDVO, "aclk_pdvo", mux_gpll_cpll_p, 0, -+ RV1126_CLKSEL_CON(45), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(14), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_PDVO, "hclk_pdvo", "aclk_pdvo", 0, -+ RV1126_CLKSEL_CON(45), 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(14), 1, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_PDVO, "pclk_pdvo", "aclk_pdvo", 0, -+ RV1126_CLKSEL_CON(46), 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(14), 2, GFLAGS), -+ GATE(ACLK_RGA, "aclk_rga", "aclk_pdvo", 0, -+ RV1126_CLKGATE_CON(14), 6, GFLAGS), -+ GATE(HCLK_RGA, "hclk_rga", "hclk_pdvo", 0, -+ RV1126_CLKGATE_CON(14), 7, GFLAGS), -+ COMPOSITE(CLK_RGA_CORE, "clk_rga_core", mux_gpll_cpll_p, 0, -+ RV1126_CLKSEL_CON(46), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(14), 8, GFLAGS), -+ GATE(ACLK_VOP, "aclk_vop", "aclk_pdvo", 0, -+ RV1126_CLKGATE_CON(14), 9, GFLAGS), -+ GATE(HCLK_VOP, "hclk_vop", "hclk_pdvo", 0, -+ RV1126_CLKGATE_CON(14), 10, GFLAGS), -+ COMPOSITE(DCLK_VOP_DIV, "dclk_vop_div", mux_gpll_cpll_p, 0, -+ RV1126_CLKSEL_CON(47), 8, 1, MFLAGS, 0, 8, DFLAGS, -+ RV1126_CLKGATE_CON(14), 11, GFLAGS), -+ COMPOSITE_FRACMUX(DCLK_VOP_FRACDIV, "dclk_vop_fracdiv", "dclk_vop_div", CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(48), 0, -+ RV1126_CLKGATE_CON(14), 12, GFLAGS, -+ &rv1126_dclk_vop_fracmux), -+ GATE(DCLK_VOP, "dclk_vop", "dclk_vop_mux", 0, -+ RV1126_CLKGATE_CON(14), 13, GFLAGS), -+ GATE(PCLK_DSIHOST, "pclk_dsihost", "pclk_pdvo", 0, -+ RV1126_CLKGATE_CON(14), 14, GFLAGS), -+ GATE(ACLK_IEP, "aclk_iep", "aclk_pdvo", 0, -+ RV1126_CLKGATE_CON(12), 7, GFLAGS), -+ GATE(HCLK_IEP, "hclk_iep", "hclk_pdvo", 0, -+ RV1126_CLKGATE_CON(12), 8, GFLAGS), -+ COMPOSITE(CLK_IEP_CORE, "clk_iep_core", mux_gpll_cpll_p, 0, -+ RV1126_CLKSEL_CON(54), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(12), 9, GFLAGS), ++ COMPOSITE_NODIV(CLK_SPI0, "clk_spi0", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(79), 13, 2, MFLAGS, ++ RK3528_CLKGATE_CON(37), 3, GFLAGS), ++ COMPOSITE_NODIV(MCLK_SAI_I2S1, "mclk_sai_i2s1", mclk_sai_i2s1_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(79), 8, 1, MFLAGS, ++ RK3528_CLKGATE_CON(36), 10, GFLAGS), ++ GATE(DBCLK_GPIO4, "dbclk_gpio4", "xin24m", 0, ++ RK3528_CLKGATE_CON(37), 9, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 10 -+ */ -+ /* PD_VI */ -+ COMPOSITE(ACLK_PDVI_DIV, "aclk_pdvi_div", mux_cpll_gpll_hpll_p, 0, -+ RV1126_CLKSEL_CON(49), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(15), 0, GFLAGS), -+ COMPOSITE_HALFDIV_OFFSET(ACLK_PDVI_NP5, "aclk_pdvi_np5", mux_cpll_gpll_hpll_p, 0, -+ RV1126_CLKSEL_CON(49), 6, 2, MFLAGS, -+ RV1126_CLKSEL_CON(76), 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(16), 13, GFLAGS), -+ MUX(ACLK_PDVI, "aclk_pdvi", mux_aclk_pdvi_p, CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, -+ RV1126_CLKSEL_CON(76), 5, 1, MFLAGS), -+ COMPOSITE_NOMUX(HCLK_PDVI, "hclk_pdvi", "aclk_pdvi", 0, -+ RV1126_CLKSEL_CON(49), 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(15), 1, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_PDVI, "pclk_pdvi", "aclk_pdvi", 0, -+ RV1126_CLKSEL_CON(50), 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(15), 2, GFLAGS), -+ GATE(ACLK_ISP, "aclk_isp", "aclk_pdvi", 0, -+ RV1126_CLKGATE_CON(15), 6, GFLAGS), -+ GATE(HCLK_ISP, "hclk_isp", "hclk_pdvi", 0, -+ RV1126_CLKGATE_CON(15), 7, GFLAGS), -+ COMPOSITE(CLK_ISP_DIV, "clk_isp_div", mux_gpll_cpll_hpll_p, 0, -+ RV1126_CLKSEL_CON(50), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(15), 8, GFLAGS), -+ COMPOSITE_HALFDIV_OFFSET(CLK_ISP_NP5, "clk_isp_np5", mux_gpll_cpll_hpll_p, 0, -+ RV1126_CLKSEL_CON(50), 6, 2, MFLAGS, -+ RV1126_CLKSEL_CON(76), 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(16), 14, GFLAGS), -+ MUX(CLK_ISP, "clk_isp", mux_clk_isp_p, CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, -+ RV1126_CLKSEL_CON(76), 13, 1, MFLAGS), -+ GATE(ACLK_CIF, "aclk_cif", "aclk_pdvi", 0, -+ RV1126_CLKGATE_CON(15), 9, GFLAGS), -+ GATE(HCLK_CIF, "hclk_cif", "hclk_pdvi", 0, -+ RV1126_CLKGATE_CON(15), 10, GFLAGS), -+ COMPOSITE(DCLK_CIF, "dclk_cif", mux_gpll_cpll_hpll_p, 0, -+ RV1126_CLKSEL_CON(51), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(15), 11, GFLAGS), -+ COMPOSITE(CLK_CIF_OUT_DIV, "clk_cif_out2io_div", mux_gpll_usb480m_p, 0, -+ RV1126_CLKSEL_CON(51), 15, 1, MFLAGS, 8, 6, DFLAGS, -+ RV1126_CLKGATE_CON(15), 12, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_CIF_OUT_FRACDIV, "clk_cif_out2io_fracdiv", "clk_cif_out2io_div", CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(52), 0, -+ RV1126_CLKGATE_CON(15), 13, GFLAGS, -+ &rv1126_cif_out2io_fracmux), -+ GATE(CLK_CIF_OUT, "clk_cif_out2io", "clk_cif_out2io_mux", 0, -+ RV1126_CLKGATE_CON(15), 14, GFLAGS), -+ COMPOSITE(CLK_MIPICSI_OUT_DIV, "clk_mipicsi_out2io_div", mux_gpll_usb480m_p, 0, -+ RV1126_CLKSEL_CON(73), 8, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(23), 5, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_MIPICSI_OUT_FRACDIV, "clk_mipicsi_out2io_fracdiv", "clk_mipicsi_out2io_div", CLK_SET_RATE_PARENT, -+ RV1126_CLKSEL_CON(74), 0, -+ RV1126_CLKGATE_CON(23), 6, GFLAGS, -+ &rv1126_mipicsi_out2io_fracmux), -+ GATE(CLK_MIPICSI_OUT, "clk_mipicsi_out2io", "clk_mipicsi_out2io_mux", 0, -+ RV1126_CLKGATE_CON(23), 7, GFLAGS), -+ GATE(PCLK_CSIHOST, "pclk_csihost", "pclk_pdvi", 0, -+ RV1126_CLKGATE_CON(15), 15, GFLAGS), -+ GATE(ACLK_CIFLITE, "aclk_ciflite", "aclk_pdvi", 0, -+ RV1126_CLKGATE_CON(16), 10, GFLAGS), -+ GATE(HCLK_CIFLITE, "hclk_ciflite", "hclk_pdvi", 0, -+ RV1126_CLKGATE_CON(16), 11, GFLAGS), -+ COMPOSITE(DCLK_CIFLITE, "dclk_ciflite", mux_gpll_cpll_hpll_p, 0, -+ RV1126_CLKSEL_CON(54), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(16), 12, GFLAGS), ++ /* vo */ ++ COMPOSITE_NODIV(HCLK_VO_ROOT, "hclk_vo_root", mux_150m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(83), 2, 2, MFLAGS, ++ RK3528_CLKGATE_CON(39), 1, GFLAGS), ++ GATE(HCLK_VOP, "hclk_vop", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(40), 2, GFLAGS), ++ GATE(HCLK_USBHOST, "hclk_usbhost", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(43), 3, GFLAGS), ++ GATE(HCLK_JPEG_DECODER, "hclk_jpeg_decoder", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(41), 7, GFLAGS), ++ GATE(HCLK_VDPP, "hclk_vdpp", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(39), 10, GFLAGS), ++ GATE(HCLK_CVBS, "hclk_cvbs", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(41), 3, GFLAGS), ++ GATE(HCLK_USBHOST_ARB, "hclk_usbhost_arb", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(43), 4, GFLAGS), ++ GATE(HCLK_SAI_I2S3, "hclk_sai_i2s3", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(42), 1, GFLAGS), ++ GATE(HCLK_HDCP, "hclk_hdcp", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(41), 1, GFLAGS), ++ GATE(HCLK_RGA2E, "hclk_rga2e", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(39), 7, GFLAGS), ++ GATE(HCLK_SDMMC0, "hclk_sdmmc0", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(42), 9, GFLAGS), ++ GATE(HCLK_HDCP_KEY, "hclk_hdcp_key", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(40), 15, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 11 -+ */ -+ /* PD_ISPP */ -+ COMPOSITE(ACLK_PDISPP_DIV, "aclk_pdispp_div", mux_cpll_gpll_hpll_p, 0, -+ RV1126_CLKSEL_CON(68), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(16), 0, GFLAGS), -+ COMPOSITE_HALFDIV_OFFSET(ACLK_PDISPP_NP5, "aclk_pdispp_np5", mux_cpll_gpll_hpll_p, 0, -+ RV1126_CLKSEL_CON(68), 6, 2, MFLAGS, -+ RV1126_CLKSEL_CON(77), 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(16), 8, GFLAGS), -+ MUX(ACLK_PDISPP, "aclk_pdispp", mux_aclk_pdispp_p, CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, -+ RV1126_CLKSEL_CON(77), 5, 1, MFLAGS), -+ COMPOSITE_NOMUX(HCLK_PDISPP, "hclk_pdispp", "aclk_pdispp", 0, -+ RV1126_CLKSEL_CON(69), 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(16), 1, GFLAGS), -+ GATE(ACLK_ISPP, "aclk_ispp", "aclk_pdispp", 0, -+ RV1126_CLKGATE_CON(16), 4, GFLAGS), -+ GATE(HCLK_ISPP, "hclk_ispp", "hclk_pdispp", 0, -+ RV1126_CLKGATE_CON(16), 5, GFLAGS), -+ COMPOSITE(CLK_ISPP_DIV, "clk_ispp_div", mux_cpll_gpll_hpll_p, 0, -+ RV1126_CLKSEL_CON(69), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(16), 6, GFLAGS), -+ COMPOSITE_HALFDIV_OFFSET(CLK_ISPP_NP5, "clk_ispp_np5", mux_cpll_gpll_hpll_p, 0, -+ RV1126_CLKSEL_CON(69), 6, 2, MFLAGS, -+ RV1126_CLKSEL_CON(77), 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(16), 7, GFLAGS), -+ MUX(CLK_ISPP, "clk_ispp", mux_clk_ispp_p, CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, -+ RV1126_CLKSEL_CON(77), 13, 1, MFLAGS), ++ COMPOSITE_NODIV(ACLK_VO_L_ROOT, "aclk_vo_l_root", mux_150m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(84), 1, 2, MFLAGS, ++ RK3528_CLKGATE_CON(41), 8, GFLAGS), ++ GATE(ACLK_MAC_VO, "aclk_gmac0", "aclk_vo_l_root", 0, ++ RK3528_CLKGATE_CON(41), 10, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 12 -+ */ -+ /* PD_PHP */ -+ COMPOSITE(ACLK_PDPHP, "aclk_pdphp", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1126_CLKSEL_CON(53), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(17), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_PDPHP, "hclk_pdphp", "gpll", CLK_IS_CRITICAL, -+ RV1126_CLKSEL_CON(53), 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(17), 1, GFLAGS), -+ /* PD_SDCARD */ -+ GATE(HCLK_PDSDMMC, "hclk_pdsdmmc", "hclk_pdphp", 0, -+ RV1126_CLKGATE_CON(17), 6, GFLAGS), -+ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_pdsdmmc", 0, -+ RV1126_CLKGATE_CON(18), 4, GFLAGS), -+ COMPOSITE(CLK_SDMMC, "clk_sdmmc", mux_gpll_cpll_xin24m_p, 0, -+ RV1126_CLKSEL_CON(55), 14, 2, MFLAGS, 0, 8, -+ DFLAGS, RV1126_CLKGATE_CON(18), 5, GFLAGS), -+ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "clk_sdmmc", RV1126_SDMMC_CON0, 1), -+ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "clk_sdmmc", RV1126_SDMMC_CON1, 1), ++ COMPOSITE_NODIV(PCLK_VO_ROOT, "pclk_vo_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(83), 4, 2, MFLAGS, ++ RK3528_CLKGATE_CON(39), 2, GFLAGS), ++ GATE(PCLK_MAC_VO, "pclk_gmac0", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(41), 11, GFLAGS), ++ GATE(PCLK_VCDCPHY, "pclk_vcdcphy", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(42), 4, GFLAGS), ++ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(42), 5, GFLAGS), ++ GATE(PCLK_VO_IOC, "pclk_vo_ioc", "pclk_vo_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(42), 7, GFLAGS), ++ GATE(PCLK_OTPC_NS, "pclk_otpc_ns", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(42), 11, GFLAGS), ++ GATE(PCLK_UART4, "pclk_uart4", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(43), 7, GFLAGS), ++ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(43), 9, GFLAGS), ++ GATE(PCLK_I2C7, "pclk_i2c7", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(43), 11, GFLAGS), + -+ /* PD_SDIO */ -+ GATE(HCLK_PDSDIO, "hclk_pdsdio", "hclk_pdphp", 0, -+ RV1126_CLKGATE_CON(17), 8, GFLAGS), -+ GATE(HCLK_SDIO, "hclk_sdio", "hclk_pdsdio", 0, -+ RV1126_CLKGATE_CON(18), 6, GFLAGS), -+ COMPOSITE(CLK_SDIO, "clk_sdio", mux_gpll_cpll_xin24m_p, 0, -+ RV1126_CLKSEL_CON(56), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ RV1126_CLKGATE_CON(18), 7, GFLAGS), -+ MMC(SCLK_SDIO_DRV, "sdio_drv", "clk_sdio", RV1126_SDIO_CON0, 1), -+ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "clk_sdio", RV1126_SDIO_CON1, 1), ++ GATE(PCLK_USBPHY, "pclk_usbphy", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(43), 13, GFLAGS), + -+ /* PD_NVM */ -+ GATE(HCLK_PDNVM, "hclk_pdnvm", "hclk_pdphp", 0, -+ RV1126_CLKGATE_CON(18), 1, GFLAGS), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_pdnvm", 0, -+ RV1126_CLKGATE_CON(18), 8, GFLAGS), -+ COMPOSITE(CLK_EMMC, "clk_emmc", mux_gpll_cpll_xin24m_p, 0, -+ RV1126_CLKSEL_CON(57), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ RV1126_CLKGATE_CON(18), 9, GFLAGS), -+ GATE(HCLK_NANDC, "hclk_nandc", "hclk_pdnvm", 0, -+ RV1126_CLKGATE_CON(18), 13, GFLAGS), -+ COMPOSITE(CLK_NANDC, "clk_nandc", mux_gpll_cpll_p, 0, -+ RV1126_CLKSEL_CON(59), 15, 1, MFLAGS, 0, 8, DFLAGS, -+ RV1126_CLKGATE_CON(18), 14, GFLAGS), -+ GATE(HCLK_SFC, "hclk_sfc", "hclk_pdnvm", 0, -+ RV1126_CLKGATE_CON(18), 10, GFLAGS), -+ GATE(HCLK_SFCXIP, "hclk_sfcxip", "hclk_pdnvm", 0, -+ RV1126_CLKGATE_CON(18), 11, GFLAGS), -+ COMPOSITE(SCLK_SFC, "sclk_sfc", mux_cpll_gpll_p, 0, -+ RV1126_CLKSEL_CON(58), 15, 1, MFLAGS, 0, 8, DFLAGS, -+ RV1126_CLKGATE_CON(18), 12, GFLAGS), -+ MMC(SCLK_EMMC_DRV, "emmc_drv", "clk_emmc", RV1126_EMMC_CON0, 1), -+ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "clk_emmc", RV1126_EMMC_CON1, 1), ++ GATE(PCLK_VO_GRF, "pclk_vo_grf", "pclk_vo_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(39), 13, GFLAGS), ++ GATE(PCLK_CRU, "pclk_cru", "pclk_vo_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(39), 15, GFLAGS), ++ GATE(PCLK_HDMI, "pclk_hdmi", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(40), 6, GFLAGS), ++ GATE(PCLK_HDMIPHY, "pclk_hdmiphy", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(40), 14, GFLAGS), ++ GATE(PCLK_HDCP, "pclk_hdcp", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(41), 2, GFLAGS), + -+ /* PD_USB */ -+ GATE(ACLK_PDUSB, "aclk_pdusb", "aclk_pdphp", 0, -+ RV1126_CLKGATE_CON(19), 0, GFLAGS), -+ GATE(HCLK_PDUSB, "hclk_pdusb", "hclk_pdphp", 0, -+ RV1126_CLKGATE_CON(19), 1, GFLAGS), -+ GATE(HCLK_USBHOST, "hclk_usbhost", "hclk_pdusb", 0, -+ RV1126_CLKGATE_CON(19), 4, GFLAGS), -+ GATE(HCLK_USBHOST_ARB, "hclk_usbhost_arb", "hclk_pdusb", 0, -+ RV1126_CLKGATE_CON(19), 5, GFLAGS), -+#if IS_ENABLED(CONFIG_USB_EHCI_HCD_PLATFORM) || IS_ENABLED(CONFIG_USB_OHCI_HCD_PLATFORM) -+ COMPOSITE(CLK_USBHOST_UTMI_OHCI, "clk_usbhost_utmi_ohci", mux_usb480m_gpll_p, CLK_IS_CRITICAL, -+ RV1126_CLKSEL_CON(61), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(19), 6, GFLAGS), -+#else -+ COMPOSITE(CLK_USBHOST_UTMI_OHCI, "clk_usbhost_utmi_ohci", mux_usb480m_gpll_p, 0, -+ RV1126_CLKSEL_CON(61), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(19), 6, GFLAGS), -+#endif -+ GATE(ACLK_USBOTG, "aclk_usbotg", "aclk_pdusb", 0, -+ RV1126_CLKGATE_CON(19), 7, GFLAGS), -+ GATE(CLK_USBOTG_REF, "clk_usbotg_ref", "xin24m", 0, -+ RV1126_CLKGATE_CON(19), 8, GFLAGS), -+ /* PD_GMAC */ -+ GATE(ACLK_PDGMAC, "aclk_pdgmac", "aclk_pdphp", 0, -+ RV1126_CLKGATE_CON(20), 0, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_PDGMAC, "pclk_pdgmac", "aclk_pdgmac", 0, -+ RV1126_CLKSEL_CON(63), 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(20), 1, GFLAGS), -+ GATE(ACLK_GMAC, "aclk_gmac", "aclk_pdgmac", 0, -+ RV1126_CLKGATE_CON(20), 4, GFLAGS), -+ GATE(PCLK_GMAC, "pclk_gmac", "pclk_pdgmac", 0, -+ RV1126_CLKGATE_CON(20), 5, GFLAGS), ++ COMPOSITE_NODIV(CLK_CORE_VDPP, "clk_core_vdpp", mux_339m_200m_100m_24m_p, 0, ++ RK3528_CLKSEL_CON(83), 10, 2, MFLAGS, ++ RK3528_CLKGATE_CON(39), 12, GFLAGS), ++ COMPOSITE_NODIV(CLK_CORE_RGA2E, "clk_core_rga2e", mux_339m_200m_100m_24m_p, 0, ++ RK3528_CLKSEL_CON(83), 8, 2, MFLAGS, ++ RK3528_CLKGATE_CON(39), 9, GFLAGS), ++ COMPOSITE_NODIV(ACLK_JPEG_ROOT, "aclk_jpeg_root", mux_339m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(84), 9, 2, MFLAGS, ++ RK3528_CLKGATE_CON(41), 15, GFLAGS), ++ GATE(ACLK_JPEG_DECODER, "aclk_jpeg_decoder", "aclk_jpeg_root", 0, ++ RK3528_CLKGATE_CON(41), 6, GFLAGS), + -+ COMPOSITE(CLK_GMAC_DIV, "clk_gmac_div", mux_cpll_gpll_p, 0, -+ RV1126_CLKSEL_CON(63), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(20), 6, GFLAGS), -+ GATE(CLK_GMAC_RGMII_M0, "clk_gmac_rgmii_m0", "clk_gmac_rgmii_clkin_m0", 0, -+ RV1126_CLKGATE_CON(20), 12, GFLAGS), -+ MUX(CLK_GMAC_SRC_M0, "clk_gmac_src_m0", clk_gmac_src_m0_p, CLK_SET_RATE_PARENT, -+ RV1126_GMAC_CON, 0, 1, MFLAGS), -+ GATE(CLK_GMAC_RGMII_M1, "clk_gmac_rgmii_m1", "clk_gmac_rgmii_clkin_m1", 0, -+ RV1126_CLKGATE_CON(20), 13, GFLAGS), -+ MUX(CLK_GMAC_SRC_M1, "clk_gmac_src_m1", clk_gmac_src_m1_p, CLK_SET_RATE_PARENT, -+ RV1126_GMAC_CON, 5, 1, MFLAGS), -+ MUXGRF(CLK_GMAC_SRC, "clk_gmac_src", mux_clk_gmac_src_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RV1126_GRF_IOFUNC_CON1, 12, 1, MFLAGS), ++ COMPOSITE_NODIV(ACLK_VO_ROOT, "aclk_vo_root", mux_339m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(83), 0, 2, MFLAGS, ++ RK3528_CLKGATE_CON(39), 0, GFLAGS), ++ GATE_NO_SET_RATE(ACLK_RGA2E, "aclk_rga2e", "aclk_vo_root", 0, ++ RK3528_CLKGATE_CON(39), 8, GFLAGS), ++ GATE_NO_SET_RATE(ACLK_VDPP, "aclk_vdpp", "aclk_vo_root", 0, ++ RK3528_CLKGATE_CON(39), 11, GFLAGS), ++ GATE_NO_SET_RATE(ACLK_HDCP, "aclk_hdcp", "aclk_vo_root", 0, ++ RK3528_CLKGATE_CON(41), 0, GFLAGS), + -+ GATE(CLK_GMAC_REF, "clk_gmac_ref", "clk_gmac_src", 0, -+ RV1126_CLKGATE_CON(20), 7, GFLAGS), ++ COMPOSITE(CCLK_SRC_SDMMC0, "cclk_src_sdmmc0", mux_gpll_cpll_xin24m_p, 0, ++ RK3528_CLKSEL_CON(85), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3528_CLKGATE_CON(42), 8, GFLAGS), + -+ GATE(CLK_GMAC_TX_SRC, "clk_gmac_tx_src", "clk_gmac_src", 0, -+ RV1126_CLKGATE_CON(20), 9, GFLAGS), -+ FACTOR(CLK_GMAC_TX_DIV5, "clk_gmac_tx_div5", "clk_gmac_tx_src", 0, 1, 5), -+ FACTOR(CLK_GMAC_TX_DIV50, "clk_gmac_tx_div50", "clk_gmac_tx_src", 0, 1, 50), -+ MUXTBL(RGMII_MODE_CLK, "rgmii_mode_clk", mux_rgmii_clk_p, CLK_SET_RATE_PARENT, -+ RV1126_GMAC_CON, 2, 2, MFLAGS, rgmii_mux_idx), -+ GATE(CLK_GMAC_RX_SRC, "clk_gmac_rx_src", "clk_gmac_src", 0, -+ RV1126_CLKGATE_CON(20), 8, GFLAGS), -+ FACTOR(CLK_GMAC_RX_DIV2, "clk_gmac_rx_div2", "clk_gmac_rx_src", 0, 1, 2), -+ FACTOR(CLK_GMAC_RX_DIV20, "clk_gmac_rx_div20", "clk_gmac_rx_src", 0, 1, 20), -+ MUX(RMII_MODE_CLK, "rmii_mode_clk", mux_rmii_clk_p, CLK_SET_RATE_PARENT, -+ RV1126_GMAC_CON, 1, 1, MFLAGS), -+ MUX(CLK_GMAC_TX_RX, "clk_gmac_tx_rx", mux_gmac_tx_rx_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RV1126_GMAC_CON, 4, 1, MFLAGS), ++ COMPOSITE(ACLK_VOP_ROOT, "aclk_vop_root", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(83), 15, 1, MFLAGS, 12, 3, DFLAGS, ++ RK3528_CLKGATE_CON(40), 0, GFLAGS), ++ GATE(ACLK_VOP, "aclk_vop", "aclk_vop_root", 0, ++ RK3528_CLKGATE_CON(40), 5, GFLAGS), + -+ GATE(CLK_GMAC_PTPREF, "clk_gmac_ptpref", "xin24m", 0, -+ RV1126_CLKGATE_CON(20), 10, GFLAGS), -+ COMPOSITE(CLK_GMAC_ETHERNET_OUT, "clk_gmac_ethernet_out2io", mux_cpll_gpll_p, 0, -+ RV1126_CLKSEL_CON(61), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(20), 11, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C4, "clk_i2c4", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(85), 13, 2, MFLAGS, ++ RK3528_CLKGATE_CON(43), 10, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C7, "clk_i2c7", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(86), 0, 2, MFLAGS, ++ RK3528_CLKGATE_CON(43), 12, GFLAGS), ++ GATE(DBCLK_GPIO2, "dbclk_gpio2", "xin24m", 0, ++ RK3528_CLKGATE_CON(42), 6, GFLAGS), + ++ GATE(CLK_HDMIHDP0, "clk_hdmihdp0", "xin24m", 0, ++ RK3528_CLKGATE_CON(43), 2, GFLAGS), ++ GATE(CLK_MACPHY, "clk_macphy", "xin24m", 0, ++ RK3528_CLKGATE_CON(42), 3, GFLAGS), ++ GATE(CLK_REF_USBPHY, "clk_ref_usbphy", "xin24m", 0, ++ RK3528_CLKGATE_CON(43), 14, GFLAGS), ++ GATE(CLK_SBPI_OTPC_NS, "clk_sbpi_otpc_ns", "xin24m", 0, ++ RK3528_CLKGATE_CON(42), 12, GFLAGS), ++ FACTOR(CLK_USER_OTPC_NS, "clk_user_otpc_ns", "clk_sbpi_otpc_ns", 0, 1, 2), + -+ /* -+ * Clock-Architecture Diagram 14 -+ */ -+ /* PD_NPU */ -+ COMPOSITE(ACLK_PDNPU_DIV, "aclk_pdnpu_div", mux_gpll_cpll_apll_hpll_p, 0, -+ RV1126_CLKSEL_CON(65), 8, 2, MFLAGS, 0, 4, DFLAGS, -+ RV1126_CLKGATE_CON(22), 0, GFLAGS), -+ COMPOSITE_HALFDIV(ACLK_PDNPU_NP5, "aclk_pdnpu_np5", mux_gpll_cpll_apll_hpll_p, 0, -+ RV1126_CLKSEL_CON(65), 8, 2, MFLAGS, 4, 4, DFLAGS, -+ RV1126_CLKGATE_CON(22), 1, GFLAGS), -+ MUX(ACLK_PDNPU, "aclk_pdnpu", mux_aclk_pdnpu_p, CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, -+ RV1126_CLKSEL_CON(65), 12, 1, MFLAGS), -+ COMPOSITE_NOMUX(HCLK_PDNPU, "hclk_pdnpu", "gpll", 0, -+ RV1126_CLKSEL_CON(66), 8, 4, DFLAGS, -+ RV1126_CLKGATE_CON(22), 2, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_PDNPU, "pclk_pdnpu", "hclk_pdnpu", 0, -+ RV1126_CLKSEL_CON(66), 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(22), 3, GFLAGS), -+ GATE(ACLK_NPU, "aclk_npu", "aclk_pdnpu", 0, -+ RV1126_CLKGATE_CON(22), 7, GFLAGS), -+ GATE(HCLK_NPU, "hclk_npu", "hclk_pdnpu", 0, -+ RV1126_CLKGATE_CON(22), 8, GFLAGS), -+ COMPOSITE(CLK_NPU_DIV, "clk_npu_div", mux_gpll_cpll_apll_hpll_p, 0, -+ RV1126_CLKSEL_CON(67), 8, 2, MFLAGS, 0, 4, DFLAGS, -+ RV1126_CLKGATE_CON(22), 9, GFLAGS), -+ COMPOSITE_HALFDIV(CLK_NPU_NP5, "clk_npu_np5", mux_gpll_cpll_apll_hpll_p, 0, -+ RV1126_CLKSEL_CON(67), 8, 2, MFLAGS, 4, 4, DFLAGS, -+ RV1126_CLKGATE_CON(22), 10, GFLAGS), -+ MUX(CLK_CORE_NPU, "clk_core_npu", mux_clk_npu_p, CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, -+ RV1126_CLKSEL_CON(67), 12, 1, MFLAGS), -+ GATE(CLK_CORE_NPUPVTM, "clk_core_npupvtm", "clk_core_npu", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(22), 14, GFLAGS), -+ GATE(CLK_NPUPVTM, "clk_npupvtm", "xin24m", 0, -+ RV1126_CLKGATE_CON(22), 13, GFLAGS), -+ GATE(PCLK_NPUPVTM, "pclk_npupvtm", "pclk_pdnpu", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(22), 12, GFLAGS), ++ GATE(MCLK_SAI_I2S3, "mclk_sai_i2s3", "mclk_i2s3_8ch_sai_src", 0, ++ RK3528_CLKGATE_CON(42), 2, GFLAGS), ++ COMPOSITE_NODIV(DCLK_VOP0, "dclk_vop0", dclk_vop0_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3528_CLKSEL_CON(84), 0, 1, MFLAGS, ++ RK3528_CLKGATE_CON(40), 3, GFLAGS), ++ GATE(DCLK_VOP1, "dclk_vop1", "dclk_vop_src1", CLK_SET_RATE_PARENT, ++ RK3528_CLKGATE_CON(40), 4, GFLAGS), ++ FACTOR_GATE(DCLK_CVBS, "dclk_cvbs", "dclk_vop1", 0, 1, 4, ++ RK3528_CLKGATE_CON(41), 4, GFLAGS), ++ GATE(DCLK_4X_CVBS, "dclk_4x_cvbs", "dclk_vop1", 0, ++ RK3528_CLKGATE_CON(41), 5, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 15 -+ */ -+ GATE(PCLK_PDTOP, "pclk_pdtop", "pclk_pdbus", CLK_IS_CRITICAL, -+ RV1126_CLKGATE_CON(23), 8, GFLAGS), -+ GATE(PCLK_DSIPHY, "pclk_dsiphy", "pclk_pdtop", 0, -+ RV1126_CLKGATE_CON(23), 4, GFLAGS), -+ GATE(PCLK_CSIPHY0, "pclk_csiphy0", "pclk_pdtop", 0, -+ RV1126_CLKGATE_CON(23), 2, GFLAGS), -+ GATE(PCLK_CSIPHY1, "pclk_csiphy1", "pclk_pdtop", 0, -+ RV1126_CLKGATE_CON(23), 3, GFLAGS), -+ GATE(PCLK_USBPHY_HOST, "pclk_usbphy_host", "pclk_pdtop", 0, -+ RV1126_CLKGATE_CON(19), 13, GFLAGS), -+ GATE(PCLK_USBPHY_OTG, "pclk_usbphy_otg", "pclk_pdtop", 0, -+ RV1126_CLKGATE_CON(19), 12, GFLAGS), ++ FACTOR_GATE(CLK_SFR_HDMI, "clk_sfr_hdmi", "dclk_vop_src1", 0, 1, 4, ++ RK3528_CLKGATE_CON(40), 7, GFLAGS), + -+#ifndef CONFIG_ROCKCHIP_LOW_PERFORMANCE -+ /* -+ * Clock-Architecture Diagram 3 -+ */ -+ /* PD_CORE */ -+ COMPOSITE_NOMUX(0, "aclk_core", "armclk", CLK_IS_CRITICAL, -+ RV1126_CLKSEL_CON(1), 4, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RV1126_CLKGATE_CON(0), 2, GFLAGS), -+ GATE(0, "pclk_dbg_daplite", "pclk_dbg", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(0), 5, GFLAGS), -+ GATE(0, "clk_a7_jtag", "clk_jtag_ori", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(0), 9, GFLAGS), -+ GATE(0, "aclk_core_niu", "aclk_core", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(0), 3, GFLAGS), -+ GATE(0, "pclk_dbg_niu", "pclk_dbg", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(0), 4, GFLAGS), -+ /* -+ * Clock-Architecture Diagram 4 -+ */ -+ /* PD_BUS */ -+ GATE(0, "aclk_pdbus_hold_niu1", "aclk_pdbus", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(2), 10, GFLAGS), -+ GATE(0, "aclk_pdbus_niu1", "aclk_pdbus", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(2), 3, GFLAGS), -+ GATE(0, "hclk_pdbus_niu1", "hclk_pdbus", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(2), 4, GFLAGS), -+ GATE(0, "pclk_pdbus_niu1", "pclk_pdbus", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(2), 5, GFLAGS), -+ GATE(0, "aclk_pdbus_niu2", "aclk_pdbus", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(2), 6, GFLAGS), -+ GATE(0, "hclk_pdbus_niu2", "hclk_pdbus", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(2), 7, GFLAGS), -+ GATE(0, "aclk_pdbus_niu3", "aclk_pdbus", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(2), 8, GFLAGS), -+ GATE(0, "hclk_pdbus_niu3", "hclk_pdbus", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(2), 9, GFLAGS), -+ GATE(0, "pclk_grf", "pclk_pdbus", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(6), 15, GFLAGS), -+ GATE(0, "pclk_sgrf", "pclk_pdbus", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(8), 4, GFLAGS), -+ GATE(0, "aclk_sysram", "hclk_pdbus", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(3), 9, GFLAGS), -+ GATE(0, "pclk_intmux", "pclk_pdbus", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(7), 14, GFLAGS), ++ GATE(CLK_SPDIF_HDMI, "clk_spdif_hdmi", "mclk_spdif_src", 0, ++ RK3528_CLKGATE_CON(40), 10, GFLAGS), ++ GATE(MCLK_SPDIF, "mclk_spdif", "mclk_spdif_src", 0, ++ RK3528_CLKGATE_CON(37), 15, GFLAGS), ++ GATE(CLK_CEC_HDMI, "clk_cec_hdmi", "clk_32k", 0, ++ RK3528_CLKGATE_CON(40), 8, GFLAGS), ++#if 0 ++ GATE(CLK_USBHOST_OHCI, "clk_usbhost_ohci", "clk_usbhost_ohci_io", 0, ++ RK3528_CLKGATE_CON(43), 5, GFLAGS), ++ GATE(CLK_USBHOST_UTMI, "clk_usbhost_utmi", "clk_usbhost_utmi_io", 0, ++ RK3528_CLKGATE_CON(43), 6, GFLAGS), ++ GATE(CLK_HDMIPHY_TMDSSRC, "clk_hdmiphy_tmdssrc", "clk_hdmiphy_tmdssrc_io", 0, ++ RK3528_CLKGATE_CON(40), 11, GFLAGS), ++ GATE(CLK_HDMIPHY_PREP, "clk_hdmiphy_prep", "clk_hdmiphy_prep_io", 0, ++ RK3528_CLKGATE_CON(40), 12, GFLAGS), ++#endif ++ /* vpu */ ++ GATE(DBCLK_GPIO1, "dbclk_gpio1", "xin24m", 0, ++ RK3528_CLKGATE_CON(26), 5, GFLAGS), ++ GATE(DBCLK_GPIO3, "dbclk_gpio3", "xin24m", 0, ++ RK3528_CLKGATE_CON(27), 1, GFLAGS), ++ GATE(CLK_SUSPEND_USB3OTG, "clk_suspend_usb3otg", "xin24m", 0, ++ RK3528_CLKGATE_CON(33), 4, GFLAGS), ++ GATE(CLK_PCIE_AUX, "clk_pcie_aux", "xin24m", 0, ++ RK3528_CLKGATE_CON(30), 2, GFLAGS), ++ GATE(TCLK_EMMC, "tclk_emmc", "xin24m", 0, ++ RK3528_CLKGATE_CON(26), 3, GFLAGS), ++ GATE(CLK_REF_USB3OTG, "clk_ref_usb3otg", "xin24m", 0, ++ RK3528_CLKGATE_CON(33), 2, GFLAGS), ++ COMPOSITE(CCLK_SRC_SDIO0, "cclk_src_sdio0", mux_gpll_cpll_xin24m_p, 0, ++ RK3528_CLKSEL_CON(72), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3528_CLKGATE_CON(32), 1, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 5 -+ */ -+ /* PD_CRYPTO */ -+ GATE(0, "aclk_pdcrypto_niu", "aclk_pdcrypto", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(4), 13, GFLAGS), -+ GATE(0, "hclk_pdcrypto_niu", "hclk_pdcrypto", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(4), 14, GFLAGS), ++ COMPOSITE_NODIV(PCLK_VPU_ROOT, "pclk_vpu_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(61), 4, 2, MFLAGS, ++ RK3528_CLKGATE_CON(25), 5, GFLAGS), ++ GATE(PCLK_VPU_GRF, "pclk_vpu_grf", "pclk_vpu_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(25), 12, GFLAGS), ++ GATE(PCLK_CRU_PCIE, "pclk_cru_pcie", "pclk_vpu_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(25), 11, GFLAGS), ++ GATE(PCLK_UART6, "pclk_uart6", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(27), 11, GFLAGS), ++ GATE(PCLK_CAN2, "pclk_can2", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(32), 7, GFLAGS), ++ GATE(PCLK_SPI1, "pclk_spi1", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(27), 4, GFLAGS), ++ GATE(PCLK_CAN3, "pclk_can3", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(32), 9, GFLAGS), ++ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(27), 0, GFLAGS), ++ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(26), 4, GFLAGS), ++ GATE(PCLK_SARADC, "pclk_saradc", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(32), 11, GFLAGS), ++ GATE(PCLK_ACODEC, "pclk_acodec", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(26), 13, GFLAGS), ++ GATE(PCLK_UART7, "pclk_uart7", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(27), 13, GFLAGS), ++ GATE(PCLK_UART5, "pclk_uart5", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(27), 9, GFLAGS), ++ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(32), 14, GFLAGS), ++ GATE(PCLK_PCIE, "pclk_pcie", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(30), 1, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(27), 7, GFLAGS), ++ GATE(PCLK_VPU_IOC, "pclk_vpu_ioc", "pclk_vpu_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(26), 8, GFLAGS), ++ GATE(PCLK_PIPE_GRF, "pclk_pipe_grf", "pclk_vpu_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(30), 7, GFLAGS), ++ GATE(PCLK_I2C5, "pclk_i2c5", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(28), 1, GFLAGS), ++ GATE(PCLK_PCIE_PHY, "pclk_pcie_phy", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(30), 6, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(27), 15, GFLAGS), ++ GATE(PCLK_MAC_VPU, "pclk_gmac1", "pclk_vpu_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(28), 6, GFLAGS), ++ GATE(PCLK_I2C6, "pclk_i2c6", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(28), 3, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 6 -+ */ -+ /* PD_AUDIO */ -+ GATE(0, "hclk_pdaudio_niu", "hclk_pdaudio", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(9), 2, GFLAGS), -+ GATE(0, "pclk_pdaudio_niu", "hclk_pdaudio", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(9), 3, GFLAGS), ++ COMPOSITE_NODIV(ACLK_VPU_L_ROOT, "aclk_vpu_l_root", mux_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(60), 0, 2, MFLAGS, ++ RK3528_CLKGATE_CON(25), 0, GFLAGS), ++ GATE_NO_SET_RATE(ACLK_EMMC, "aclk_emmc", "aclk_vpu_l_root", 0, ++ RK3528_CLKGATE_CON(26), 1, GFLAGS), ++ GATE_NO_SET_RATE(ACLK_MAC_VPU, "aclk_gmac1", "aclk_vpu_l_root", 0, ++ RK3528_CLKGATE_CON(28), 5, GFLAGS), ++ GATE_NO_SET_RATE(ACLK_PCIE, "aclk_pcie", "aclk_vpu_l_root", 0, ++ RK3528_CLKGATE_CON(30), 3, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 7 -+ */ -+ /* PD_VEPU */ -+ GATE(0, "aclk_pdvepu_niu", "aclk_pdvepu", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(12), 3, GFLAGS), -+ GATE(0, "hclk_pdvepu_niu", "hclk_pdvepu", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(12), 4, GFLAGS), ++ GATE_NO_SET_RATE(ACLK_USB3OTG, "aclk_usb3otg", "aclk_vpu_l_root", 0, ++ RK3528_CLKGATE_CON(33), 1, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 9 -+ */ -+ /* PD_VO */ -+ GATE(0, "aclk_pdvo_niu", "aclk_pdvo", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(14), 3, GFLAGS), -+ GATE(0, "hclk_pdvo_niu", "hclk_pdvo", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(14), 4, GFLAGS), -+ GATE(0, "pclk_pdvo_niu", "pclk_pdvo", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(14), 5, GFLAGS), ++ COMPOSITE_NODIV(HCLK_VPU_ROOT, "hclk_vpu_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(61), 2, 2, MFLAGS, ++ RK3528_CLKGATE_CON(25), 4, GFLAGS), ++ GATE(HCLK_VPU, "hclk_vpu", "hclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(25), 10, GFLAGS), ++ GATE(HCLK_SFC, "hclk_sfc", "hclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(25), 13, GFLAGS), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(26), 0, GFLAGS), ++ GATE(HCLK_SAI_I2S0, "hclk_sai_i2s0", "hclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(26), 9, GFLAGS), ++ GATE(HCLK_SAI_I2S2, "hclk_sai_i2s2", "hclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(26), 11, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 10 -+ */ -+ /* PD_VI */ -+ GATE(0, "aclk_pdvi_niu", "aclk_pdvi", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(15), 3, GFLAGS), -+ GATE(0, "hclk_pdvi_niu", "hclk_pdvi", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(15), 4, GFLAGS), -+ GATE(0, "pclk_pdvi_niu", "pclk_pdvi", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(15), 5, GFLAGS), -+ /* -+ * Clock-Architecture Diagram 11 -+ */ -+ /* PD_ISPP */ -+ GATE(0, "aclk_pdispp_niu", "aclk_pdispp", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(16), 2, GFLAGS), -+ GATE(0, "hclk_pdispp_niu", "hclk_pdispp", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(16), 3, GFLAGS), ++ GATE(HCLK_PCIE_SLV, "hclk_pcie_slv", "hclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(30), 4, GFLAGS), ++ GATE(HCLK_PCIE_DBI, "hclk_pcie_dbi", "hclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(30), 5, GFLAGS), ++ GATE(HCLK_SDIO0, "hclk_sdio0", "hclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(32), 2, GFLAGS), ++ GATE(HCLK_SDIO1, "hclk_sdio1", "hclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(32), 4, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 12 -+ */ -+ /* PD_PHP */ -+ GATE(0, "aclk_pdphpmid", "aclk_pdphp", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(17), 2, GFLAGS), -+ GATE(0, "hclk_pdphpmid", "hclk_pdphp", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(17), 3, GFLAGS), -+ GATE(0, "aclk_pdphpmid_niu", "aclk_pdphpmid", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(17), 4, GFLAGS), -+ GATE(0, "hclk_pdphpmid_niu", "hclk_pdphpmid", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(17), 5, GFLAGS), ++ COMPOSITE_NOMUX(CLK_GMAC1_VPU_25M, "clk_gmac1_25m", "ppll", 0, ++ RK3528_CLKSEL_CON(60), 2, 8, DFLAGS, ++ RK3528_CLKGATE_CON(25), 1, GFLAGS), ++ COMPOSITE_NOMUX(CLK_PPLL_125M_MATRIX, "clk_ppll_125m_src", "ppll", 0, ++ RK3528_CLKSEL_CON(60), 10, 5, DFLAGS, ++ RK3528_CLKGATE_CON(25), 2, GFLAGS), + -+ /* PD_SDCARD */ -+ GATE(0, "hclk_pdsdmmc_niu", "hclk_pdsdmmc", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(17), 7, GFLAGS), ++ COMPOSITE(CLK_CAN3, "clk_can3", mux_gpll_cpll_p, 0, ++ RK3528_CLKSEL_CON(73), 13, 1, MFLAGS, 7, 6, DFLAGS, ++ RK3528_CLKGATE_CON(32), 10, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C6, "clk_i2c6", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(64), 0, 2, MFLAGS, ++ RK3528_CLKGATE_CON(28), 4, GFLAGS), + -+ /* PD_SDIO */ -+ GATE(0, "hclk_pdsdio_niu", "hclk_pdsdio", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(17), 9, GFLAGS), ++ COMPOSITE(SCLK_SFC, "sclk_sfc", mux_gpll_cpll_xin24m_p, 0, ++ RK3528_CLKSEL_CON(61), 12, 2, MFLAGS, 6, 6, DFLAGS, ++ RK3528_CLKGATE_CON(25), 14, GFLAGS), ++ COMPOSITE(CCLK_SRC_EMMC, "cclk_src_emmc", mux_gpll_cpll_xin24m_p, 0, ++ RK3528_CLKSEL_CON(62), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3528_CLKGATE_CON(25), 15, GFLAGS), + -+ /* PD_NVM */ -+ GATE(0, "hclk_pdnvm_niu", "hclk_pdnvm", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(18), 3, GFLAGS), ++ COMPOSITE_NODIV(ACLK_VPU_ROOT, "aclk_vpu_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(61), 0, 2, MFLAGS, ++ RK3528_CLKGATE_CON(25), 3, GFLAGS), ++ GATE(ACLK_VPU, "aclk_vpu", "aclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(25), 9, GFLAGS), + -+ /* PD_USB */ -+ GATE(0, "aclk_pdusb_niu", "aclk_pdusb", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(19), 2, GFLAGS), -+ GATE(0, "hclk_pdusb_niu", "hclk_pdusb", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(19), 3, GFLAGS), ++ COMPOSITE_NODIV(CLK_SPI1, "clk_spi1", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(63), 10, 2, MFLAGS, ++ RK3528_CLKGATE_CON(27), 5, GFLAGS), ++ COMPOSITE(CCLK_SRC_SDIO1, "cclk_src_sdio1", mux_gpll_cpll_xin24m_p, 0, ++ RK3528_CLKSEL_CON(72), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3528_CLKGATE_CON(32), 3, GFLAGS), ++ COMPOSITE(CLK_CAN2, "clk_can2", mux_gpll_cpll_p, 0, ++ RK3528_CLKSEL_CON(73), 6, 1, MFLAGS, 0, 6, DFLAGS, ++ RK3528_CLKGATE_CON(32), 8, GFLAGS), ++ COMPOSITE_NOMUX(CLK_TSADC, "clk_tsadc", "xin24m", 0, ++ RK3528_CLKSEL_CON(74), 3, 5, DFLAGS, ++ RK3528_CLKGATE_CON(32), 15, GFLAGS), ++ COMPOSITE_NOMUX(CLK_SARADC, "clk_saradc", "xin24m", 0, ++ RK3528_CLKSEL_CON(74), 0, 3, DFLAGS, ++ RK3528_CLKGATE_CON(32), 12, GFLAGS), ++ COMPOSITE_NOMUX(CLK_TSADC_TSEN, "clk_tsadc_tsen", "xin24m", 0, ++ RK3528_CLKSEL_CON(74), 8, 5, DFLAGS, ++ RK3528_CLKGATE_CON(33), 0, GFLAGS), ++ COMPOSITE_NODIV(BCLK_EMMC, "bclk_emmc", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(62), 8, 2, MFLAGS, ++ RK3528_CLKGATE_CON(26), 2, GFLAGS), ++ COMPOSITE_NOMUX(MCLK_ACODEC_TX, "mclk_acodec_tx", "mclk_i2s2_2ch_sai_src", 0, ++ RK3528_CLKSEL_CON(63), 0, 8, DFLAGS, ++ RK3528_CLKGATE_CON(26), 14, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C3, "clk_i2c3", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(63), 12, 2, MFLAGS, ++ RK3528_CLKGATE_CON(28), 0, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C5, "clk_i2c5", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(63), 14, 2, MFLAGS, ++ RK3528_CLKGATE_CON(28), 2, GFLAGS), ++ COMPOSITE_NODIV(MCLK_SAI_I2S0, "mclk_sai_i2s0", mclk_sai_i2s0_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(62), 10, 1, MFLAGS, ++ RK3528_CLKGATE_CON(26), 10, GFLAGS), ++ GATE(MCLK_SAI_I2S2, "mclk_sai_i2s2", "mclk_i2s2_2ch_sai_src", 0, ++ RK3528_CLKGATE_CON(26), 12, GFLAGS), ++#if 0 ++ GATE(SCLK_IN_SPI1, "sclk_in_spi1", "sclk_in_spi1_io", 0, ++ RK3528_CLKGATE_CON(27), 6, GFLAGS), + -+ /* PD_GMAC */ -+ GATE(0, "aclk_pdgmac_niu", "aclk_pdgmac", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(20), 2, GFLAGS), -+ GATE(0, "pclk_pdgmac_niu", "pclk_pdgmac", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(20), 3, GFLAGS), ++ /* vpuphy */ ++ GATE(CLK_PIPE_USB3OTG_COMBO, "clk_pipe_usb3otg_combo", "clk_pipe_usb3otg_io", 0, ++ RK3528_CLKGATE_CON(31), 0, GFLAGS), ++ GATE(CLK_UTMI_USB3OTG, "clk_utmi_usb3otg", "clk_utmi_usb3otg_io", 0, ++ RK3528_CLKGATE_CON(31), 1, GFLAGS), ++ GATE(CLK_PCIE_PIPE_PHY, "clk_pcie_pipe_phy", "clk_pipe_usb3otg_io", 0, ++ RK3528_CLKGATE_CON(31), 2, GFLAGS), ++#endif ++ /* pcie */ ++ COMPOSITE_NOMUX(CLK_PPLL_100M_MATRIX, "clk_ppll_100m_src", "ppll", CLK_IS_CRITICAL, ++ RK3528_PCIE_CLKSEL_CON(1), 2, 5, DFLAGS, ++ RK3528_PCIE_CLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NOMUX(CLK_PPLL_50M_MATRIX, "clk_ppll_50m_src", "ppll", CLK_IS_CRITICAL, ++ RK3528_PCIE_CLKSEL_CON(1), 7, 5, DFLAGS, ++ RK3528_PCIE_CLKGATE_CON(0), 2, GFLAGS), ++ MUX(CLK_REF_PCIE_INNER_PHY, "clk_ref_pcie_inner_phy", clk_ref_pcie_inner_phy_p, 0, ++ RK3528_PCIE_CLKSEL_CON(1), 13, 1, MFLAGS), ++ FACTOR(CLK_REF_PCIE_100M_PHY, "clk_ref_pcie_100m_phy", "clk_ppll_100m_src", 0, 1, 1), + -+ /* -+ * Clock-Architecture Diagram 13 -+ */ -+ /* PD_DDR */ -+ COMPOSITE_NOMUX(0, "pclk_pdddr_pre", "gpll", CLK_IS_CRITICAL, -+ RV1126_CLKSEL_CON(64), 0, 5, DFLAGS, -+ RV1126_CLKGATE_CON(21), 0, GFLAGS), -+ GATE(PCLK_PDDDR, "pclk_pdddr", "pclk_pdddr_pre", CLK_IS_CRITICAL, -+ RV1126_CLKGATE_CON(21), 15, GFLAGS), -+ GATE(0, "pclk_ddr_msch", "pclk_pdddr", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(21), 6, GFLAGS), -+ COMPOSITE_NOGATE(SCLK_DDRCLK, "sclk_ddrc", mux_dpll_gpll_p, CLK_IS_CRITICAL, -+ RV1126_CLKSEL_CON(64), 15, 1, MFLAGS, 8, 5, DFLAGS), -+ COMPOSITE(CLK_DDRPHY, "clk_ddrphy", mux_dpll_gpll_p, CLK_IS_CRITICAL, -+ RV1126_CLKSEL_CON(64), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RV1126_CLKGATE_CON(21), 8, GFLAGS), -+ GATE(0, "clk1x_phy", "clk_ddrphy", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(23), 1, GFLAGS), -+ GATE(0, "clk_ddr_msch", "clk_ddrphy", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(21), 10, GFLAGS), -+ GATE(0, "pclk_ddr_dfictl", "pclk_pdddr", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(21), 2, GFLAGS), -+ GATE(0, "clk_ddr_dfictl", "clk_ddrphy", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(21), 13, GFLAGS), -+ GATE(0, "pclk_ddr_standby", "pclk_pdddr", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(21), 4, GFLAGS), -+ GATE(0, "clk_ddr_standby", "clk_ddrphy", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(21), 14, GFLAGS), -+ GATE(0, "aclk_ddr_split", "clk_ddrphy", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(21), 9, GFLAGS), -+ GATE(0, "pclk_ddr_grf", "pclk_pdddr", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(21), 5, GFLAGS), -+ GATE(PCLK_DDR_MON, "pclk_ddr_mon", "pclk_pdddr", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(21), 3, GFLAGS), -+ GATE(CLK_DDR_MON, "clk_ddr_mon", "clk_ddrphy", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(20), 15, GFLAGS), -+ GATE(TMCLK_DDR_MON, "tmclk_ddr_mon", "xin24m", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(21), 7, GFLAGS), ++ /* gmac */ ++ FACTOR(CLK_GMAC1_RMII_VPU, "clk_gmac1_50m", "clk_ppll_50m_src", 0, 1, 1), ++ FACTOR(CLK_GMAC1_SRC_VPU, "clk_gmac1_125m", "clk_ppll_125m_src", 0, 1, 1), + -+ /* -+ * Clock-Architecture Diagram 14 -+ */ -+ /* PD_NPU */ -+ GATE(0, "aclk_pdnpu_niu", "aclk_pdnpu", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(22), 4, GFLAGS), -+ GATE(0, "hclk_pdnpu_niu", "hclk_pdnpu", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(22), 5, GFLAGS), -+ GATE(0, "pclk_pdnpu_niu", "pclk_pdnpu", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(22), 6, GFLAGS), ++ /* they are orphans */ ++ DIV(CLK_GMAC0_SRC, "clk_gmac0_src", "clk_gmac0_io_i", 0, ++ RK3528_CLKSEL_CON(84), 3, 6, DFLAGS), ++ GATE(CLK_GMAC0_TX, "clk_gmac0_tx", "clk_gmac0_src", 0, ++ RK3528_CLKGATE_CON(41), 13, GFLAGS), ++ GATE(CLK_GMAC0_RX, "clk_gmac0_rx", "clk_gmac0_src", 0, ++ RK3528_CLKGATE_CON(41), 14, GFLAGS), ++ GATE(CLK_GMAC0_RMII_50M, "clk_gmac0_rmii_50m", "clk_gmac0_io_i", 0, ++ RK3528_CLKGATE_CON(41), 12, GFLAGS), ++ GATE(CLK_SCRKEYGEN, "clk_scrkeygen", "clk_pmupvtm_out", 0, ++ RK3528_PMU_CLKGATE_CON(2), 0, GFLAGS), ++ GATE(CLK_PVTM_OSCCHK, "clk_pvtm_oscchk", "clk_pmupvtm_out", 0, ++ RK3528_PMU_CLKGATE_CON(2), 1, GFLAGS), ++}; + -+ /* -+ * Clock-Architecture Diagram 15 -+ */ -+ GATE(0, "pclk_topniu", "pclk_pdtop", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(23), 9, GFLAGS), -+ GATE(PCLK_TOPCRU, "pclk_topcru", "pclk_pdtop", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(23), 10, GFLAGS), -+ GATE(PCLK_TOPGRF, "pclk_topgrf", "pclk_pdtop", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(23), 11, GFLAGS), -+ GATE(PCLK_CPUEMADET, "pclk_cpuemadet", "pclk_pdtop", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(23), 12, GFLAGS), -+ GATE(PCLK_DDRPHY, "pclk_ddrphy", "pclk_pdtop", CLK_IGNORE_UNUSED, -+ RV1126_CLKGATE_CON(23), 0, GFLAGS), -+#endif ++static struct rockchip_clk_branch rk3528_grf_clk_branches[] __initdata = { ++ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "cclk_src_sdmmc0", RK3528_SDMMC_CON0, 1), ++ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "cclk_src_sdmmc0", RK3528_SDMMC_CON1, 1), ++ MMC(SCLK_SDIO0_DRV, "sdio0_drv", "cclk_src_sdio0", RK3528_SDIO0_CON0, 1), ++ MMC(SCLK_SDIO0_SAMPLE, "sdio0_sample", "cclk_src_sdio0", RK3528_SDIO0_CON1, 1), ++ MMC(SCLK_SDIO1_DRV, "sdio1_drv", "cclk_src_sdio1", RK3528_SDIO1_CON0, 1), ++ MMC(SCLK_SDIO1_SAMPLE, "sdio1_sample", "cclk_src_sdio1", RK3528_SDIO1_CON1, 1), +}; + -+static void __iomem *rv1126_cru_base; -+static void __iomem *rv1126_pmucru_base; ++static void __iomem *rk3528_cru_base; + -+void rv1126_dump_cru(void) ++static void rk3528_dump_cru(void) +{ -+ if (rv1126_pmucru_base) { -+ pr_warn("PMU CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rv1126_pmucru_base, -+ 0x248, false); -+ } -+ if (rv1126_cru_base) { ++ if (rk3528_cru_base) { + pr_warn("CRU:\n"); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rv1126_cru_base, -+ 0x588, false); ++ 32, 4, rk3528_cru_base, ++ 0x8b8, false); ++ pr_warn("PCIE CRU:\n"); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rk3528_cru_base + RK3528_PCIE_CRU_BASE, ++ 0x804, false); ++ pr_warn("DDRPHY CRU:\n"); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rk3528_cru_base + RK3528_DDRPHY_CRU_BASE, ++ 0x804, false); + } +} -+EXPORT_SYMBOL_GPL(rv1126_dump_cru); -+ -+static int rv1126_clk_panic(struct notifier_block *this, -+ unsigned long ev, void *ptr) -+{ -+ rv1126_dump_cru(); -+ return NOTIFY_DONE; -+} -+ -+static struct notifier_block rv1126_clk_panic_block = { -+ .notifier_call = rv1126_clk_panic, -+}; + -+static struct rockchip_clk_provider *pmucru_ctx; -+static void __init rv1126_pmu_clk_init(struct device_node *np) ++static void __init rk3528_clk_init(struct device_node *np) +{ + struct rockchip_clk_provider *ctx; + void __iomem *reg_base; ++ struct clk **clks; + + reg_base = of_iomap(np, 0); + if (!reg_base) { -+ pr_err("%s: could not map cru pmu region\n", __func__); ++ pr_err("%s: could not map cru region\n", __func__); + return; + } + -+ rv1126_pmucru_base = reg_base; ++ rk3528_cru_base = reg_base; + -+ ctx = rockchip_clk_init(np, reg_base, CLKPMU_NR_CLKS); ++ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); + if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip pmu clk init failed\n", __func__); ++ pr_err("%s: rockchip clk init failed\n", __func__); ++ iounmap(reg_base); + return; + } ++ clks = ctx->clk_data.clks; + -+ rockchip_clk_register_plls(ctx, rv1126_pmu_pll_clks, -+ ARRAY_SIZE(rv1126_pmu_pll_clks), -+ RV1126_GRF_SOC_STATUS0); ++ rockchip_clk_register_plls(ctx, rk3528_pll_clks, ++ ARRAY_SIZE(rk3528_pll_clks), ++ RK3528_GRF_SOC_STATUS0); + -+ rockchip_clk_register_branches(ctx, rv1126_clk_pmu_branches, -+ ARRAY_SIZE(rv1126_clk_pmu_branches)); ++ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", ++ 2, clks[PLL_APLL], clks[PLL_GPLL], ++ &rk3528_cpuclk_data, rk3528_cpuclk_rates, ++ ARRAY_SIZE(rk3528_cpuclk_rates)); ++ rockchip_clk_register_branches(ctx, rk3528_clk_branches, ++ ARRAY_SIZE(rk3528_clk_branches)); + -+ rockchip_register_softrst(np, 2, reg_base + RV1126_PMU_SOFTRST_CON(0), ++ rockchip_register_softrst(np, 47, reg_base + RK3528_SOFTRST_CON(0), + ROCKCHIP_SOFTRST_HIWORD_MASK); ++ rockchip_register_restart_notifier(ctx, RK3528_GLB_SRST_FST, NULL); + + rockchip_clk_of_add_provider(np, ctx); + -+ pmucru_ctx = ctx; ++ if (!rk_dump_cru) ++ rk_dump_cru = rk3528_dump_cru; ++ +} + -+CLK_OF_DECLARE(rv1126_cru_pmu, "rockchip,rv1126-pmucru", rv1126_pmu_clk_init); ++CLK_OF_DECLARE(rk3528_cru, "rockchip,rk3528-cru", rk3528_clk_init); + -+static void __init rv1126_clk_init(struct device_node *np) ++static void __init rk3528_grf_clk_init(struct device_node *np) +{ + struct rockchip_clk_provider *ctx; + void __iomem *reg_base; -+ struct clk **cru_clks, **pmucru_clks; + -+ reg_base = of_iomap(np, 0); ++ reg_base = of_iomap(of_get_parent(np), 0); + if (!reg_base) { -+ pr_err("%s: could not map cru region\n", __func__); ++ pr_err("%s: could not map cru grf region\n", __func__); + return; + } + -+ rv1126_cru_base = reg_base; -+ -+ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); ++ ctx = rockchip_clk_init(np, reg_base, CLK_NR_GRF_CLKS); + if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip clk init failed\n", __func__); -+ iounmap(reg_base); ++ pr_err("%s: rockchip grf clk init failed\n", __func__); + return; + } -+ cru_clks = ctx->clk_data.clks; -+ pmucru_clks = pmucru_ctx->clk_data.clks; -+ -+ rockchip_clk_register_plls(ctx, rv1126_pll_clks, -+ ARRAY_SIZE(rv1126_pll_clks), -+ RV1126_GRF_SOC_STATUS0); -+ -+ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", -+ 3, cru_clks[PLL_APLL], pmucru_clks[PLL_GPLL], -+ &rv1126_cpuclk_data, rv1126_cpuclk_rates, -+ ARRAY_SIZE(rv1126_cpuclk_rates)); -+ -+ rockchip_clk_register_branches(ctx, rv1126_clk_branches, -+ ARRAY_SIZE(rv1126_clk_branches)); -+ -+ rockchip_register_softrst(np, 15, reg_base + RV1126_SOFTRST_CON(0), -+ ROCKCHIP_SOFTRST_HIWORD_MASK); + -+ rockchip_register_restart_notifier(ctx, RV1126_GLB_SRST_FST, NULL); ++ rockchip_clk_register_branches(ctx, rk3528_grf_clk_branches, ++ ARRAY_SIZE(rk3528_grf_clk_branches)); + + rockchip_clk_of_add_provider(np, ctx); -+ -+ atomic_notifier_chain_register(&panic_notifier_list, -+ &rv1126_clk_panic_block); -+} -+ -+CLK_OF_DECLARE(rv1126_cru, "rockchip,rv1126-cru", rv1126_clk_init); -+ -+struct clk_rv1126_inits { -+ void (*inits)(struct device_node *np); -+}; -+ -+static const struct clk_rv1126_inits clk_rv1126_pmucru_init = { -+ .inits = rv1126_pmu_clk_init, -+}; -+ -+static const struct clk_rv1126_inits clk_rv1126_cru_init = { -+ .inits = rv1126_clk_init, -+}; -+ -+static const struct of_device_id clk_rv1126_match_table[] = { -+ { -+ .compatible = "rockchip,rv1126-cru", -+ .data = &clk_rv1126_cru_init, -+ }, { -+ .compatible = "rockchip,rv1126-pmucru", -+ .data = &clk_rv1126_pmucru_init, -+ }, -+ { } -+}; -+MODULE_DEVICE_TABLE(of, clk_rv1126_match_table); -+ -+static int __init clk_rv1126_probe(struct platform_device *pdev) -+{ -+ struct device_node *np = pdev->dev.of_node; -+ const struct clk_rv1126_inits *init_data; -+ -+ init_data = (struct clk_rv1126_inits *)of_device_get_match_data(&pdev->dev); -+ if (!init_data) -+ return -EINVAL; -+ -+ if (init_data->inits) -+ init_data->inits(np); -+ -+ return 0; +} + -+static struct platform_driver clk_rv1126_driver = { -+ .driver = { -+ .name = "clk-rv1126", -+ .of_match_table = clk_rv1126_match_table, -+ }, -+}; -+builtin_platform_driver_probe(clk_rv1126_driver, clk_rv1126_probe); ++CLK_OF_DECLARE(rk3528_grf_cru, "rockchip,rk3528-grf-cru", rk3528_grf_clk_init); + -+MODULE_DESCRIPTION("Rockchip RV1126 Clock Driver"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/clk/rockchip-oh/clk.c b/drivers/clk/rockchip-oh/clk.c +diff --git a/drivers/clk/rockchip-oh/clk-rk3562.c b/drivers/clk/rockchip-oh/clk-rk3562.c new file mode 100644 -index 000000000..96984dea0 +index 000000000..3c6f78fec --- /dev/null -+++ b/drivers/clk/rockchip-oh/clk.c -@@ -0,0 +1,828 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later ++++ b/drivers/clk/rockchip-oh/clk-rk3562.c +@@ -0,0 +1,1134 @@ ++// SPDX-License-Identifier: GPL-2.0 +/* -+ * Copyright (c) 2014 MundoReader S.L. -+ * Author: Heiko Stuebner -+ * -+ * Copyright (c) 2016 Rockchip Electronics Co. Ltd. -+ * Author: Xing Zheng -+ * -+ * based on -+ * -+ * samsung/clk.c -+ * Copyright (c) 2013 Samsung Electronics Co., Ltd. -+ * Copyright (c) 2013 Linaro Ltd. -+ * Author: Thomas Abraham ++ * Copyright (c) 2022 Rockchip Electronics Co. Ltd. ++ * Author: Elaine Zhang ++ * Author: Finley Xiao + */ + -+#include -+#include +#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "../clk-fractional-divider.h" ++#include ++#include ++#include ++#include ++#include ++#include +#include "clk.h" + -+#ifdef MODULE -+static HLIST_HEAD(clk_ctx_list); -+#endif -+ -+/* -+ * Register a clock branch. -+ * Most clock branches have a form like -+ * -+ * src1 --|--\ -+ * |M |--[GATE]-[DIV]- -+ * src2 --|--/ -+ * -+ * sometimes without one of those components. -+ */ -+static struct clk *rockchip_clk_register_branch(const char *name, -+ const char *const *parent_names, u8 num_parents, -+ void __iomem *base, -+ int muxdiv_offset, u8 mux_shift, u8 mux_width, u8 mux_flags, -+ u32 *mux_table, -+ int div_offset, u8 div_shift, u8 div_width, u8 div_flags, -+ struct clk_div_table *div_table, int gate_offset, -+ u8 gate_shift, u8 gate_flags, unsigned long flags, -+ spinlock_t *lock) -+{ -+ struct clk_hw *hw; -+ struct clk_mux *mux = NULL; -+ struct clk_gate *gate = NULL; -+ struct clk_divider *div = NULL; -+ const struct clk_ops *mux_ops = NULL, *div_ops = NULL, -+ *gate_ops = NULL; -+ int ret; -+ -+ if (num_parents > 1) { -+ mux = kzalloc(sizeof(*mux), GFP_KERNEL); -+ if (!mux) -+ return ERR_PTR(-ENOMEM); -+ -+ mux->reg = base + muxdiv_offset; -+ mux->shift = mux_shift; -+ mux->mask = BIT(mux_width) - 1; -+ mux->flags = mux_flags; -+ mux->table = mux_table; -+ mux->lock = lock; -+ mux_ops = (mux_flags & CLK_MUX_READ_ONLY) ? &clk_mux_ro_ops -+ : &clk_mux_ops; -+ } -+ -+ if (gate_offset >= 0) { -+ gate = kzalloc(sizeof(*gate), GFP_KERNEL); -+ if (!gate) { -+ ret = -ENOMEM; -+ goto err_gate; -+ } -+ -+ gate->flags = gate_flags; -+ gate->reg = base + gate_offset; -+ gate->bit_idx = gate_shift; -+ gate->lock = lock; -+ gate_ops = &clk_gate_ops; -+ } -+ -+ if (div_width > 0) { -+ div = kzalloc(sizeof(*div), GFP_KERNEL); -+ if (!div) { -+ ret = -ENOMEM; -+ goto err_div; -+ } -+ -+ div->flags = div_flags; -+ if (div_offset) -+ div->reg = base + div_offset; -+ else -+ div->reg = base + muxdiv_offset; -+ div->shift = div_shift; -+ div->width = div_width; -+ div->lock = lock; -+ div->table = div_table; -+ div_ops = (div_flags & CLK_DIVIDER_READ_ONLY) -+ ? &clk_divider_ro_ops -+ : &clk_divider_ops; -+ } -+ -+ hw = clk_hw_register_composite(NULL, name, parent_names, num_parents, -+ mux ? &mux->hw : NULL, mux_ops, -+ div ? &div->hw : NULL, div_ops, -+ gate ? &gate->hw : NULL, gate_ops, -+ flags); -+ if (IS_ERR(hw)) { -+ kfree(div); -+ kfree(gate); -+ return ERR_CAST(hw); -+ } -+ -+ return hw->clk; -+err_div: -+ kfree(gate); -+err_gate: -+ kfree(mux); -+ return ERR_PTR(ret); -+} -+ -+struct rockchip_clk_frac { -+ struct notifier_block clk_nb; -+ struct clk_fractional_divider div; -+ struct clk_gate gate; -+ -+ struct clk_mux mux; -+ const struct clk_ops *mux_ops; -+ int mux_frac_idx; ++#define RK3562_GRF_SOC_STATUS0 0x430 + -+ bool rate_change_remuxed; -+ int rate_change_idx; ++enum rk3562_plls { ++ apll, gpll, vpll, hpll, cpll, dpll, +}; + -+#define to_rockchip_clk_frac_nb(nb) \ -+ container_of(nb, struct rockchip_clk_frac, clk_nb) -+ -+static int rockchip_clk_frac_notifier_cb(struct notifier_block *nb, -+ unsigned long event, void *data) -+{ -+ struct clk_notifier_data *ndata = data; -+ struct rockchip_clk_frac *frac = to_rockchip_clk_frac_nb(nb); -+ struct clk_mux *frac_mux = &frac->mux; -+ int ret = 0; -+ -+ pr_debug("%s: event %lu, old_rate %lu, new_rate: %lu\n", -+ __func__, event, ndata->old_rate, ndata->new_rate); -+ if (event == PRE_RATE_CHANGE) { -+ frac->rate_change_idx = -+ frac->mux_ops->get_parent(&frac_mux->hw); -+ if (frac->rate_change_idx != frac->mux_frac_idx) { -+ frac->mux_ops->set_parent(&frac_mux->hw, -+ frac->mux_frac_idx); -+ frac->rate_change_remuxed = 1; -+ } -+ } else if (event == POST_RATE_CHANGE) { -+ /* -+ * The POST_RATE_CHANGE notifier runs directly after the -+ * divider clock is set in clk_change_rate, so we'll have -+ * remuxed back to the original parent before clk_change_rate -+ * reaches the mux itself. -+ */ -+ if (frac->rate_change_remuxed) { -+ frac->mux_ops->set_parent(&frac_mux->hw, -+ frac->rate_change_idx); -+ frac->rate_change_remuxed = 0; -+ } -+ } -+ -+ return notifier_from_errno(ret); -+} -+ -+/* -+ * fractional divider must set that denominator is 20 times larger than -+ * numerator to generate precise clock frequency. -+ */ -+static void rockchip_fractional_approximation(struct clk_hw *hw, -+ unsigned long rate, unsigned long *parent_rate, -+ unsigned long *m, unsigned long *n) -+{ -+ struct clk_fractional_divider *fd = to_clk_fd(hw); -+ unsigned long p_rate, p_parent_rate; -+ struct clk_hw *p_parent; -+ -+ if (rate == 0) { -+ pr_warn("%s p_rate(%ld), rate(%ld), maybe invalid frequency setting!\n", -+ clk_hw_get_name(hw), *parent_rate, rate); -+ *m = 0; -+ *n = 1; -+ return; -+ } -+ -+ p_rate = clk_hw_get_rate(clk_hw_get_parent(hw)); -+ if ((rate * 20 > p_rate) && (p_rate % rate != 0)) { -+ p_parent = clk_hw_get_parent(clk_hw_get_parent(hw)); -+ if (!p_parent) { -+ *parent_rate = p_rate; -+ } else { -+ p_parent_rate = clk_hw_get_rate(p_parent); -+ *parent_rate = p_parent_rate; -+ } -+ -+ if (*parent_rate == 0) { -+ pr_warn("%s p_rate(%ld), rate(%ld), maybe invalid frequency setting!\n", -+ clk_hw_get_name(hw), *parent_rate, rate); -+ *m = 0; -+ *n = 1; -+ return; -+ } -+ -+ if (*parent_rate < rate * 20) { -+ /* -+ * Fractional frequency divider to do -+ * integer frequency divider does not -+ * need 20 times the limit. -+ */ -+ if (!(*parent_rate % rate)) { -+ *m = 1; -+ *n = *parent_rate / rate; -+ return; -+ } else if (!(fd->flags & CLK_FRAC_DIVIDER_NO_LIMIT)) { -+ pr_warn("%s p_rate(%ld) is low than rate(%ld)*20, use integer or half-div\n", -+ clk_hw_get_name(hw), -+ *parent_rate, rate); -+ *m = 0; -+ *n = 1; -+ return; -+ } -+ } -+ } -+ -+ fd->flags |= CLK_FRAC_DIVIDER_POWER_OF_TWO_PS; -+ -+ clk_fractional_divider_general_approximation(hw, rate, parent_rate, m, n); -+} -+ -+static struct clk *rockchip_clk_register_frac_branch( -+ struct rockchip_clk_provider *ctx, const char *name, -+ const char *const *parent_names, u8 num_parents, -+ void __iomem *base, int muxdiv_offset, u8 div_flags, -+ int gate_offset, u8 gate_shift, u8 gate_flags, -+ unsigned long flags, struct rockchip_clk_branch *child, -+ spinlock_t *lock) -+{ -+ struct clk_hw *hw; -+ struct rockchip_clk_frac *frac; -+ struct clk_gate *gate = NULL; -+ struct clk_fractional_divider *div = NULL; -+ const struct clk_ops *div_ops = NULL, *gate_ops = NULL; -+ -+ if (muxdiv_offset < 0) -+ return ERR_PTR(-EINVAL); -+ -+ if (child && child->branch_type != branch_mux) { -+ pr_err("%s: fractional child clock for %s can only be a mux\n", -+ __func__, name); -+ return ERR_PTR(-EINVAL); -+ } -+ -+ frac = kzalloc(sizeof(*frac), GFP_KERNEL); -+ if (!frac) -+ return ERR_PTR(-ENOMEM); -+ -+ if (gate_offset >= 0) { -+ gate = &frac->gate; -+ gate->flags = gate_flags; -+ gate->reg = base + gate_offset; -+ gate->bit_idx = gate_shift; -+ gate->lock = lock; -+ gate_ops = &clk_gate_ops; -+ } -+ -+ div = &frac->div; -+ div->flags = div_flags; -+ div->reg = base + muxdiv_offset; -+ div->mshift = 16; -+ div->mwidth = 16; -+ div->mmask = GENMASK(div->mwidth - 1, 0) << div->mshift; -+ div->nshift = 0; -+ div->nwidth = 16; -+ div->nmask = GENMASK(div->nwidth - 1, 0) << div->nshift; -+ div->lock = lock; -+ div->approximation = rockchip_fractional_approximation; -+ div_ops = &clk_fractional_divider_ops; -+ -+ hw = clk_hw_register_composite(NULL, name, parent_names, num_parents, -+ NULL, NULL, -+ &div->hw, div_ops, -+ gate ? &gate->hw : NULL, gate_ops, -+ flags | CLK_SET_RATE_UNGATE); -+ if (IS_ERR(hw)) { -+ kfree(frac); -+ return ERR_CAST(hw); -+ } -+ -+ if (child) { -+ struct clk_mux *frac_mux = &frac->mux; -+ struct clk_init_data init; -+ struct clk *mux_clk; -+ int ret; -+ -+ frac->mux_frac_idx = match_string(child->parent_names, -+ child->num_parents, name); -+ frac->mux_ops = &clk_mux_ops; -+ frac->clk_nb.notifier_call = rockchip_clk_frac_notifier_cb; ++static struct rockchip_pll_rate_table rk3562_pll_rates[] = { ++ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ ++ RK3036_PLL_RATE(2208000000, 1, 92, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2184000000, 1, 91, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2160000000, 1, 90, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2088000000, 1, 87, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2064000000, 1, 86, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2040000000, 1, 85, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2016000000, 1, 84, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1992000000, 1, 83, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1920000000, 1, 80, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1896000000, 1, 79, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1800000000, 1, 75, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1704000000, 1, 71, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1600000000, 3, 200, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1584000000, 1, 132, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1560000000, 1, 130, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1536000000, 1, 128, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1512000000, 1, 126, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1488000000, 1, 124, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1464000000, 1, 122, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1440000000, 1, 120, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1416000000, 1, 118, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1400000000, 3, 350, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1392000000, 1, 116, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1368000000, 1, 114, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1344000000, 1, 112, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1320000000, 1, 110, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1296000000, 1, 108, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1272000000, 1, 106, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1248000000, 1, 104, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1200000000, 1, 100, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1188000000, 1, 99, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1104000000, 1, 92, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1100000000, 3, 275, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1000000000, 3, 250, 2, 1, 1, 0), ++ RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), ++ RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), ++ RK3036_PLL_RATE(800000000, 3, 200, 2, 1, 1, 0), ++ RK3036_PLL_RATE(700000000, 3, 350, 4, 1, 1, 0), ++ RK3036_PLL_RATE(696000000, 1, 116, 4, 1, 1, 0), ++ RK3036_PLL_RATE(600000000, 1, 100, 4, 1, 1, 0), ++ RK3036_PLL_RATE(594000000, 1, 99, 4, 1, 1, 0), ++ RK3036_PLL_RATE(500000000, 1, 125, 6, 1, 1, 0), ++ RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), ++ RK3036_PLL_RATE(312000000, 1, 78, 6, 1, 1, 0), ++ RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), ++ RK3036_PLL_RATE(200000000, 1, 100, 3, 4, 1, 0), ++ RK3036_PLL_RATE(148500000, 1, 99, 4, 4, 1, 0), ++ RK3036_PLL_RATE(100000000, 1, 150, 6, 6, 1, 0), ++ RK3036_PLL_RATE(96000000, 1, 96, 6, 4, 1, 0), ++ RK3036_PLL_RATE(74250000, 2, 99, 4, 4, 1, 0), ++ { /* sentinel */ }, ++}; + -+ frac_mux->reg = base + child->muxdiv_offset; -+ frac_mux->shift = child->mux_shift; -+ frac_mux->mask = BIT(child->mux_width) - 1; -+ frac_mux->flags = child->mux_flags; -+ if (child->mux_table) -+ frac_mux->table = child->mux_table; -+ frac_mux->lock = lock; -+ frac_mux->hw.init = &init; ++PNAME(mux_pll_p) = { "xin24m" }; ++PNAME(gpll_cpll_p) = { "gpll", "cpll" }; ++PNAME(gpll_cpll_hpll_p) = { "gpll", "cpll", "hpll" }; ++PNAME(gpll_cpll_pvtpll_dmyapll_p) = { "gpll", "cpll", "log_pvtpll", "dummy_apll" }; ++PNAME(gpll_cpll_hpll_xin24m_p) = { "gpll", "cpll", "hpll", "xin24m" }; ++PNAME(gpll_cpll_vpll_dmyhpll_p) = { "gpll", "cpll", "vpll", "dummy_hpll" }; ++PNAME(gpll_dmyhpll_vpll_apll_p) = { "gpll", "dummy_hpll", "vpll", "apll" }; ++PNAME(gpll_cpll_xin24m_p) = { "gpll", "cpll", "xin24m" }; ++PNAME(gpll_cpll_xin24m_dmyapll_p) = { "gpll", "cpll", "xin24m", "dummy_apll" }; ++PNAME(gpll_cpll_xin24m_dmyhpll_p) = { "gpll", "cpll", "xin24m", "dummy_hpll" }; ++PNAME(vpll_dmyhpll_gpll_cpll_p) = { "vpll", "dummy_hpll", "gpll", "cpll" }; ++PNAME(mux_xin24m_32k_p) = { "xin24m", "clk_rtc_32k" }; ++PNAME(mux_50m_xin24m_p) = { "clk_matrix_50m_src", "xin24m" }; ++PNAME(mux_100m_50m_xin24m_p) = { "clk_matrix_100m_src", "clk_matrix_50m_src", "xin24m" }; ++PNAME(mux_125m_xin24m_p) = { "clk_matrix_125m_src", "xin24m" }; ++PNAME(mux_200m_xin24m_32k_p) = { "clk_200m_pmu", "xin24m", "clk_rtc_32k" }; ++PNAME(mux_200m_100m_p) = { "clk_matrix_200m_src", "clk_matrix_100m_src" }; ++PNAME(mux_200m_100m_50m_xin24m_p) = { "clk_matrix_200m_src", "clk_matrix_100m_src", "clk_matrix_50m_src", "xin24m" }; ++PNAME(clk_sai0_p) = { "clk_sai0_src", "clk_sai0_frac", "xin_osc0_half", "mclk_sai0_from_io" }; ++PNAME(mclk_sai0_out2io_p) = { "mclk_sai0", "xin_osc0_half" }; ++PNAME(clk_sai1_p) = { "clk_sai1_src", "clk_sai1_frac", "xin_osc0_half", "mclk_sai1_from_io" }; ++PNAME(mclk_sai1_out2io_p) = { "mclk_sai1", "xin_osc0_half" }; ++PNAME(clk_sai2_p) = { "clk_sai2_src", "clk_sai2_frac", "xin_osc0_half", "mclk_sai2_from_io" }; ++PNAME(mclk_sai2_out2io_p) = { "mclk_sai2", "xin_osc0_half" }; ++PNAME(clk_spdif_p) = { "clk_spdif_src", "clk_spdif_frac", "xin_osc0_half" }; ++PNAME(clk_uart1_p) = { "clk_uart1_src", "clk_uart1_frac", "xin24m" }; ++PNAME(clk_uart2_p) = { "clk_uart2_src", "clk_uart2_frac", "xin24m" }; ++PNAME(clk_uart3_p) = { "clk_uart3_src", "clk_uart3_frac", "xin24m" }; ++PNAME(clk_uart4_p) = { "clk_uart4_src", "clk_uart4_frac", "xin24m" }; ++PNAME(clk_uart5_p) = { "clk_uart5_src", "clk_uart5_frac", "xin24m" }; ++PNAME(clk_uart6_p) = { "clk_uart6_src", "clk_uart6_frac", "xin24m" }; ++PNAME(clk_uart7_p) = { "clk_uart7_src", "clk_uart7_frac", "xin24m" }; ++PNAME(clk_uart8_p) = { "clk_uart8_src", "clk_uart8_frac", "xin24m" }; ++PNAME(clk_uart9_p) = { "clk_uart9_src", "clk_uart9_frac", "xin24m" }; ++PNAME(clk_rtc32k_pmu_p) = { "clk_rtc32k_frac", "xin32k", "clk_32k_pvtm" }; ++PNAME(clk_pmu1_uart0_p) = { "clk_pmu1_uart0_src", "clk_pmu1_uart0_frac", "xin24m" }; ++PNAME(clk_pipephy_ref_p) = { "clk_pipephy_div", "clk_pipephy_xin24m" }; ++PNAME(clk_usbphy_ref_p) = { "clk_usb2phy_xin24m", "clk_24m_sscsrc" }; ++PNAME(clk_mipidsi_ref_p) = { "clk_mipidsiphy_xin24m", "clk_24m_sscsrc" }; + -+ init.name = child->name; -+ init.flags = child->flags | CLK_SET_RATE_PARENT; -+ init.ops = frac->mux_ops; -+ init.parent_names = child->parent_names; -+ init.num_parents = child->num_parents; ++static struct rockchip_pll_clock rk3562_pll_clks[] __initdata = { ++ [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, ++ 0, RK3562_PLL_CON(0), ++ RK3562_MODE_CON, 0, 0, ++ ROCKCHIP_PLL_ALLOW_POWER_DOWN, rk3562_pll_rates), ++ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, ++ 0, RK3562_PLL_CON(24), ++ RK3562_MODE_CON, 2, 3, 0, rk3562_pll_rates), ++ [vpll] = PLL(pll_rk3328, PLL_VPLL, "vpll", mux_pll_p, ++ 0, RK3562_PLL_CON(32), ++ RK3562_MODE_CON, 6, 4, ++ ROCKCHIP_PLL_ALLOW_POWER_DOWN, rk3562_pll_rates), ++ [hpll] = PLL(pll_rk3328, PLL_HPLL, "hpll", mux_pll_p, ++ 0, RK3562_PLL_CON(40), ++ RK3562_MODE_CON, 8, 5, ++ ROCKCHIP_PLL_ALLOW_POWER_DOWN, rk3562_pll_rates), ++ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, ++ 0, RK3562_PMU1_PLL_CON(0), ++ RK3562_PMU1_MODE_CON, 0, 2, 0, rk3562_pll_rates), ++ [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p, ++ CLK_IS_CRITICAL, RK3562_SUBDDR_PLL_CON(0), ++ RK3562_SUBDDR_MODE_CON, 0, 1, 0, NULL), ++}; + -+ mux_clk = clk_register(NULL, &frac_mux->hw); -+ if (IS_ERR(mux_clk)) { -+ kfree(frac); -+ return mux_clk; -+ } ++#define MFLAGS CLK_MUX_HIWORD_MASK ++#define DFLAGS CLK_DIVIDER_HIWORD_MASK ++#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) + -+ rockchip_clk_add_lookup(ctx, mux_clk, child->id); ++static struct rockchip_clk_branch rk3562_clk_sai0_fracmux __initdata = ++ MUX(CLK_SAI0, "clk_sai0", clk_sai0_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(3), 6, 2, MFLAGS); + -+ /* notifier on the fraction divider to catch rate changes */ -+ if (frac->mux_frac_idx >= 0) { -+ pr_debug("%s: found fractional parent in mux at pos %d\n", -+ __func__, frac->mux_frac_idx); -+ ret = clk_notifier_register(hw->clk, &frac->clk_nb); -+ if (ret) -+ pr_err("%s: failed to register clock notifier for %s\n", -+ __func__, name); -+ } else { -+ pr_warn("%s: could not find %s as parent of %s, rate changes may not work\n", -+ __func__, name, child->name); -+ } -+ } ++static struct rockchip_clk_branch rk3562_clk_sai1_fracmux __initdata = ++ MUX(CLK_SAI1, "clk_sai1", clk_sai1_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(5), 6, 2, MFLAGS); + -+ return hw->clk; -+} ++static struct rockchip_clk_branch rk3562_clk_sai2_fracmux __initdata = ++ MUX(CLK_SAI2, "clk_sai2", clk_sai2_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(8), 6, 2, MFLAGS); + -+static struct clk *rockchip_clk_register_factor_branch(const char *name, -+ const char *const *parent_names, u8 num_parents, -+ void __iomem *base, unsigned int mult, unsigned int div, -+ int gate_offset, u8 gate_shift, u8 gate_flags, -+ unsigned long flags, spinlock_t *lock) -+{ -+ struct clk_hw *hw; -+ struct clk_gate *gate = NULL; -+ struct clk_fixed_factor *fix = NULL; ++static struct rockchip_clk_branch rk3562_clk_spdif_fracmux __initdata = ++ MUX(CLK_SPDIF, "clk_spdif", clk_spdif_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(15), 6, 2, MFLAGS); + -+ /* without gate, register a simple factor clock */ -+ if (gate_offset == 0) { -+ return clk_register_fixed_factor(NULL, name, -+ parent_names[0], flags, mult, -+ div); -+ } ++static struct rockchip_clk_branch rk3562_clk_uart1_fracmux __initdata = ++ MUX(CLK_UART1, "clk_uart1", clk_uart1_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(21), 14, 2, MFLAGS); + -+ gate = kzalloc(sizeof(*gate), GFP_KERNEL); -+ if (!gate) -+ return ERR_PTR(-ENOMEM); ++static struct rockchip_clk_branch rk3562_clk_uart2_fracmux __initdata = ++ MUX(CLK_UART2, "clk_uart2", clk_uart2_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(23), 14, 2, MFLAGS); + -+ gate->flags = gate_flags; -+ gate->reg = base + gate_offset; -+ gate->bit_idx = gate_shift; -+ gate->lock = lock; ++static struct rockchip_clk_branch rk3562_clk_uart3_fracmux __initdata = ++ MUX(CLK_UART3, "clk_uart3", clk_uart3_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(25), 14, 2, MFLAGS); + -+ fix = kzalloc(sizeof(*fix), GFP_KERNEL); -+ if (!fix) { -+ kfree(gate); -+ return ERR_PTR(-ENOMEM); -+ } ++static struct rockchip_clk_branch rk3562_clk_uart4_fracmux __initdata = ++ MUX(CLK_UART4, "clk_uart4", clk_uart4_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(27), 14, 2, MFLAGS); + -+ fix->mult = mult; -+ fix->div = div; ++static struct rockchip_clk_branch rk3562_clk_uart5_fracmux __initdata = ++ MUX(CLK_UART5, "clk_uart5", clk_uart5_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(29), 14, 2, MFLAGS); + -+ hw = clk_hw_register_composite(NULL, name, parent_names, num_parents, -+ NULL, NULL, -+ &fix->hw, &clk_fixed_factor_ops, -+ &gate->hw, &clk_gate_ops, flags); -+ if (IS_ERR(hw)) { -+ kfree(fix); -+ kfree(gate); -+ return ERR_CAST(hw); -+ } ++static struct rockchip_clk_branch rk3562_clk_uart6_fracmux __initdata = ++ MUX(CLK_UART6, "clk_uart6", clk_uart6_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(31), 14, 2, MFLAGS); + -+ return hw->clk; -+} ++static struct rockchip_clk_branch rk3562_clk_uart7_fracmux __initdata = ++ MUX(CLK_UART7, "clk_uart7", clk_uart7_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(33), 14, 2, MFLAGS); + -+struct rockchip_clk_provider *rockchip_clk_init(struct device_node *np, -+ void __iomem *base, -+ unsigned long nr_clks) -+{ -+ struct rockchip_clk_provider *ctx; -+ struct clk **clk_table; -+ int i; ++static struct rockchip_clk_branch rk3562_clk_uart8_fracmux __initdata = ++ MUX(CLK_UART8, "clk_uart8", clk_uart8_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(35), 14, 2, MFLAGS); + -+ ctx = kzalloc(sizeof(struct rockchip_clk_provider), GFP_KERNEL); -+ if (!ctx) -+ return ERR_PTR(-ENOMEM); ++static struct rockchip_clk_branch rk3562_clk_uart9_fracmux __initdata = ++ MUX(CLK_UART9, "clk_uart9", clk_uart9_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(37), 14, 2, MFLAGS); + -+ clk_table = kcalloc(nr_clks, sizeof(struct clk *), GFP_KERNEL); -+ if (!clk_table) -+ goto err_free; ++static struct rockchip_clk_branch rk3562_rtc32k_pmu_fracmux __initdata = ++ MUX(CLK_RTC_32K, "clk_rtc_32k", clk_rtc32k_pmu_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3562_PMU0_CLKSEL_CON(1), 0, 2, MFLAGS); + -+ for (i = 0; i < nr_clks; ++i) -+ clk_table[i] = ERR_PTR(-ENOENT); ++static struct rockchip_clk_branch rk3562_clk_pmu1_uart0_fracmux __initdata = ++ MUX(CLK_PMU1_UART0, "clk_pmu1_uart0", clk_pmu1_uart0_p, CLK_SET_RATE_PARENT, ++ RK3562_PMU1_CLKSEL_CON(2), 6, 2, MFLAGS); + -+ ctx->reg_base = base; -+ ctx->clk_data.clks = clk_table; -+ ctx->clk_data.clk_num = nr_clks; -+ ctx->cru_node = np; -+ spin_lock_init(&ctx->lock); ++static struct rockchip_clk_branch rk3562_clk_branches[] __initdata = { ++ /* ++ * CRU Clock-Architecture ++ */ ++ /* PD_TOP */ ++ COMPOSITE(CLK_MATRIX_50M_SRC, "clk_matrix_50m_src", gpll_cpll_p, 0, ++ RK3562_CLKSEL_CON(0), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3562_CLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE(CLK_MATRIX_100M_SRC, "clk_matrix_100m_src", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_CLKSEL_CON(0), 15, 1, MFLAGS, 8, 4, DFLAGS, ++ RK3562_CLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE(CLK_MATRIX_125M_SRC, "clk_matrix_125m_src", gpll_cpll_p, 0, ++ RK3562_CLKSEL_CON(1), 7, 1, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE(CLK_MATRIX_200M_SRC, "clk_matrix_200m_src", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_CLKSEL_CON(2), 7, 1, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(0), 4, GFLAGS), ++ COMPOSITE(CLK_MATRIX_300M_SRC, "clk_matrix_300m_src", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_CLKSEL_CON(3), 7, 1, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(0), 6, GFLAGS), ++ COMPOSITE(ACLK_TOP, "aclk_top", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_CLKSEL_CON(5), 7, 1, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(1), 0, GFLAGS), ++ COMPOSITE(ACLK_TOP_VIO, "aclk_top_vio", gpll_cpll_p, 0, ++ RK3562_CLKSEL_CON(5), 15, 1, MFLAGS, 8, 4, DFLAGS, ++ RK3562_CLKGATE_CON(1), 1, GFLAGS), ++ COMPOSITE(CLK_24M_SSCSRC, "clk_24m_sscsrc", vpll_dmyhpll_gpll_cpll_p, 0, ++ RK3562_CLKSEL_CON(6), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3562_CLKGATE_CON(1), 9, GFLAGS), ++ COMPOSITE(CLK_CAM0_OUT2IO, "clk_cam0_out2io", gpll_cpll_xin24m_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(8), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3562_CLKGATE_CON(1), 12, GFLAGS), ++ COMPOSITE(CLK_CAM1_OUT2IO, "clk_cam1_out2io", gpll_cpll_xin24m_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(8), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3562_CLKGATE_CON(1), 13, GFLAGS), ++ COMPOSITE(CLK_CAM2_OUT2IO, "clk_cam2_out2io", gpll_cpll_xin24m_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(9), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3562_CLKGATE_CON(1), 14, GFLAGS), ++ COMPOSITE(CLK_CAM3_OUT2IO, "clk_cam3_out2io", gpll_cpll_xin24m_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(9), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3562_CLKGATE_CON(1), 15, GFLAGS), ++ FACTOR(0, "xin_osc0_half", "xin24m", 0, 1, 2), + -+ ctx->grf = syscon_regmap_lookup_by_phandle(ctx->cru_node, -+ "rockchip,grf"); -+ ctx->pmugrf = syscon_regmap_lookup_by_phandle(ctx->cru_node, -+ "rockchip,pmugrf"); ++ /* PD_BUS */ ++ COMPOSITE(ACLK_BUS, "aclk_bus", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_CLKSEL_CON(40), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3562_CLKGATE_CON(18), 0, GFLAGS), ++ COMPOSITE(HCLK_BUS, "hclk_bus", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_CLKSEL_CON(40), 15, 1, MFLAGS, 8, 6, DFLAGS, ++ RK3562_CLKGATE_CON(18), 1, GFLAGS), ++ COMPOSITE(PCLK_BUS, "pclk_bus", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_CLKSEL_CON(41), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3562_CLKGATE_CON(18), 2, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(19), 0, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(19), 1, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(19), 2, GFLAGS), ++ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(19), 3, GFLAGS), ++ GATE(PCLK_I2C5, "pclk_i2c5", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(19), 4, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C, "clk_i2c", mux_200m_100m_50m_xin24m_p, 0, ++ RK3562_CLKSEL_CON(41), 8, 2, MFLAGS, ++ RK3562_CLKGATE_CON(19), 5, GFLAGS), ++ GATE(CLK_I2C1, "clk_i2c1", "clk_i2c", 0, ++ RK3562_CLKGATE_CON(19), 6, GFLAGS), ++ GATE(CLK_I2C2, "clk_i2c2", "clk_i2c", 0, ++ RK3562_CLKGATE_CON(19), 7, GFLAGS), ++ GATE(CLK_I2C3, "clk_i2c3", "clk_i2c", 0, ++ RK3562_CLKGATE_CON(19), 8, GFLAGS), ++ GATE(CLK_I2C4, "clk_i2c4", "clk_i2c", 0, ++ RK3562_CLKGATE_CON(19), 9, GFLAGS), ++ GATE(CLK_I2C5, "clk_i2c5", "clk_i2c", 0, ++ RK3562_CLKGATE_CON(19), 10, GFLAGS), ++ COMPOSITE_NODIV(DCLK_BUS_GPIO, "dclk_bus_gpio", mux_xin24m_32k_p, 0, ++ RK3562_CLKSEL_CON(41), 15, 1, MFLAGS, ++ RK3562_CLKGATE_CON(20), 4, GFLAGS), ++ GATE(DCLK_BUS_GPIO3, "dclk_bus_gpio3", "dclk_bus_gpio", 0, ++ RK3562_CLKGATE_CON(20), 5, GFLAGS), ++ GATE(DCLK_BUS_GPIO4, "dclk_bus_gpio4", "dclk_bus_gpio", 0, ++ RK3562_CLKGATE_CON(20), 6, GFLAGS), ++ GATE(PCLK_TIMER, "pclk_timer", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(21), 0, GFLAGS), ++ GATE(CLK_TIMER0, "clk_timer0", "xin24m", 0, ++ RK3562_CLKGATE_CON(21), 1, GFLAGS), ++ GATE(CLK_TIMER1, "clk_timer1", "xin24m", 0, ++ RK3562_CLKGATE_CON(21), 2, GFLAGS), ++ GATE(CLK_TIMER2, "clk_timer2", "xin24m", 0, ++ RK3562_CLKGATE_CON(21), 3, GFLAGS), ++ GATE(CLK_TIMER3, "clk_timer3", "xin24m", 0, ++ RK3562_CLKGATE_CON(21), 4, GFLAGS), ++ GATE(CLK_TIMER4, "clk_timer4", "xin24m", 0, ++ RK3562_CLKGATE_CON(21), 5, GFLAGS), ++ GATE(CLK_TIMER5, "clk_timer5", "xin24m", 0, ++ RK3562_CLKGATE_CON(21), 6, GFLAGS), ++ GATE(PCLK_STIMER, "pclk_stimer", "pclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(21), 7, GFLAGS), ++ GATE(CLK_STIMER0, "clk_stimer0", "xin24m", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(21), 8, GFLAGS), ++ GATE(CLK_STIMER1, "clk_stimer1", "xin24m", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(21), 9, GFLAGS), ++ GATE(PCLK_WDTNS, "pclk_wdtns", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(22), 0, GFLAGS), ++ GATE(CLK_WDTNS, "clk_wdtns", "xin24m", 0, ++ RK3562_CLKGATE_CON(22), 1, GFLAGS), ++ GATE(PCLK_GRF, "pclk_grf", "pclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(22), 2, GFLAGS), ++ GATE(PCLK_SGRF, "pclk_sgrf", "pclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(22), 3, GFLAGS), ++ GATE(PCLK_MAILBOX, "pclk_mailbox", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(22), 4, GFLAGS), ++ GATE(PCLK_INTC, "pclk_intc", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(22), 5, GFLAGS), ++ GATE(ACLK_BUS_GIC400, "aclk_bus_gic400", "aclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(22), 6, GFLAGS), ++ GATE(ACLK_BUS_SPINLOCK, "aclk_bus_spinlock", "aclk_bus", 0, ++ RK3562_CLKGATE_CON(23), 0, GFLAGS), ++ GATE(ACLK_DCF, "aclk_dcf", "aclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(23), 1, GFLAGS), ++ GATE(PCLK_DCF, "pclk_dcf", "pclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(23), 2, GFLAGS), ++ GATE(FCLK_BUS_CM0_CORE, "fclk_bus_cm0_core", "hclk_bus", 0, ++ RK3562_CLKGATE_CON(23), 3, GFLAGS), ++ GATE(CLK_BUS_CM0_RTC, "clk_bus_cm0_rtc", "clk_rtc_32k", 0, ++ RK3562_CLKGATE_CON(23), 4, GFLAGS), ++ GATE(HCLK_ICACHE, "hclk_icache", "hclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(23), 8, GFLAGS), ++ GATE(HCLK_DCACHE, "hclk_dcache", "hclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(23), 9, GFLAGS), ++ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(24), 0, GFLAGS), ++ COMPOSITE_NOMUX(CLK_TSADC, "clk_tsadc", "xin24m", 0, ++ RK3562_CLKSEL_CON(43), 0, 11, DFLAGS, ++ RK3562_CLKGATE_CON(24), 1, GFLAGS), ++ COMPOSITE_NOMUX(CLK_TSADC_TSEN, "clk_tsadc_tsen", "xin24m", 0, ++ RK3562_CLKSEL_CON(43), 11, 5, DFLAGS, ++ RK3562_CLKGATE_CON(24), 3, GFLAGS), ++ GATE(PCLK_DFT2APB, "pclk_dft2apb", "pclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(24), 4, GFLAGS), ++ COMPOSITE_NOMUX(CLK_SARADC_VCCIO156, "clk_saradc_vccio156", "xin24m", 0, ++ RK3562_CLKSEL_CON(44), 0, 12, DFLAGS, ++ RK3562_CLKGATE_CON(24), 9, GFLAGS), ++ GATE(PCLK_GMAC, "pclk_gmac", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(25), 0, GFLAGS), ++ GATE(ACLK_GMAC, "aclk_gmac", "aclk_bus", 0, ++ RK3562_CLKGATE_CON(25), 1, GFLAGS), ++ COMPOSITE_NODIV(CLK_GMAC_125M_CRU_I, "clk_gmac_125m_cru_i", mux_125m_xin24m_p, 0, ++ RK3562_CLKSEL_CON(45), 8, 1, MFLAGS, ++ RK3562_CLKGATE_CON(25), 2, GFLAGS), ++ COMPOSITE_NODIV(CLK_GMAC_50M_CRU_I, "clk_gmac_50m_cru_i", mux_50m_xin24m_p, 0, ++ RK3562_CLKSEL_CON(45), 7, 1, MFLAGS, ++ RK3562_CLKGATE_CON(25), 3, GFLAGS), ++ COMPOSITE(CLK_GMAC_ETH_OUT2IO, "clk_gmac_eth_out2io", gpll_cpll_p, 0, ++ RK3562_CLKSEL_CON(46), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_CLKGATE_CON(25), 4, GFLAGS), ++ GATE(PCLK_APB2ASB_VCCIO156, "pclk_apb2asb_vccio156", "pclk_bus", CLK_IS_CRITICAL, ++ RK3562_CLKGATE_CON(25), 5, GFLAGS), ++ GATE(PCLK_TO_VCCIO156, "pclk_to_vccio156", "pclk_bus", CLK_IS_CRITICAL, ++ RK3562_CLKGATE_CON(25), 6, GFLAGS), ++ GATE(PCLK_DSIPHY, "pclk_dsiphy", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(25), 8, GFLAGS), ++ GATE(PCLK_DSITX, "pclk_dsitx", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(25), 9, GFLAGS), ++ GATE(PCLK_CPU_EMA_DET, "pclk_cpu_ema_det", "pclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(25), 10, GFLAGS), ++ GATE(PCLK_HASH, "pclk_hash", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(25), 11, GFLAGS), ++ GATE(PCLK_TOPCRU, "pclk_topcru", "pclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(25), 15, GFLAGS), ++ GATE(PCLK_ASB2APB_VCCIO156, "pclk_asb2apb_vccio156", "pclk_to_vccio156", CLK_IS_CRITICAL, ++ RK3562_CLKGATE_CON(26), 0, GFLAGS), ++ GATE(PCLK_IOC_VCCIO156, "pclk_ioc_vccio156", "pclk_to_vccio156", CLK_IS_CRITICAL, ++ RK3562_CLKGATE_CON(26), 1, GFLAGS), ++ GATE(PCLK_GPIO3_VCCIO156, "pclk_gpio3_vccio156", "pclk_to_vccio156", 0, ++ RK3562_CLKGATE_CON(26), 2, GFLAGS), ++ GATE(PCLK_GPIO4_VCCIO156, "pclk_gpio4_vccio156", "pclk_to_vccio156", 0, ++ RK3562_CLKGATE_CON(26), 3, GFLAGS), ++ GATE(PCLK_SARADC_VCCIO156, "pclk_saradc_vccio156", "pclk_to_vccio156", 0, ++ RK3562_CLKGATE_CON(26), 4, GFLAGS), ++ GATE(PCLK_MAC100, "pclk_mac100", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(27), 0, GFLAGS), ++ GATE(ACLK_MAC100, "aclk_mac100", "aclk_bus", 0, ++ RK3562_CLKGATE_CON(27), 1, GFLAGS), ++ COMPOSITE_NODIV(CLK_MAC100_50M_MATRIX, "clk_mac100_50m_matrix", mux_50m_xin24m_p, 0, ++ RK3562_CLKSEL_CON(47), 7, 1, MFLAGS, ++ RK3562_CLKGATE_CON(27), 2, GFLAGS), + -+#ifdef MODULE -+ hlist_add_head(&ctx->list_node, &clk_ctx_list); -+#endif ++ /* PD_CORE */ ++ COMPOSITE_NOMUX(0, "aclk_core_pre", "scmi_clk_cpu", CLK_IGNORE_UNUSED, ++ RK3562_CLKSEL_CON(11), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3562_CLKGATE_CON(4), 3, GFLAGS), ++ COMPOSITE_NOMUX(0, "pclk_dbg_pre", "scmi_clk_cpu", CLK_IGNORE_UNUSED, ++ RK3562_CLKSEL_CON(12), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3562_CLKGATE_CON(4), 5, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_CORE, "hclk_core", "gpll", CLK_IS_CRITICAL, ++ RK3562_CLKSEL_CON(13), 0, 6, DFLAGS, ++ RK3562_CLKGATE_CON(5), 2, GFLAGS), ++ GATE(0, "pclk_dbg_daplite", "pclk_dbg_pre", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(4), 10, GFLAGS), + -+ return ctx; ++ /* PD_DDR */ ++ FACTOR_GATE(0, "clk_gpll_mux_to_ddr", "gpll", 0, 1, 4, ++ RK3328_CLKGATE_CON(1), 6, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_DDR, "pclk_ddr", "clk_gpll_mux_to_ddr", CLK_IS_CRITICAL, ++ RK3562_DDR_CLKSEL_CON(1), 8, 5, DFLAGS, ++ RK3562_DDR_CLKGATE_CON(0), 3, GFLAGS), ++ COMPOSITE_NOMUX(CLK_MSCH_BRG_BIU, "clk_msch_brg_biu", "clk_gpll_mux_to_ddr", CLK_IS_CRITICAL, ++ RK3562_DDR_CLKSEL_CON(1), 0, 4, DFLAGS, ++ RK3562_DDR_CLKGATE_CON(0), 4, GFLAGS), ++ GATE(PCLK_DDR_HWLP, "pclk_ddr_hwlp", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(0), 6, GFLAGS), ++ GATE(PCLK_DDR_UPCTL, "pclk_ddr_upctl", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(0), 7, GFLAGS), ++ GATE(PCLK_DDR_PHY, "pclk_ddr_phy", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(0), 8, GFLAGS), ++ GATE(PCLK_DDR_DFICTL, "pclk_ddr_dfictl", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(0), 9, GFLAGS), ++ GATE(PCLK_DDR_DMA2DDR, "pclk_ddr_dma2ddr", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(0), 10, GFLAGS), ++ GATE(PCLK_DDR_MON, "pclk_ddr_mon", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(1), 0, GFLAGS), ++ GATE(TMCLK_DDR_MON, "tmclk_ddr_mon", "xin24m", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(1), 1, GFLAGS), ++ GATE(PCLK_DDR_GRF, "pclk_ddr_grf", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(1), 2, GFLAGS), ++ GATE(PCLK_DDR_CRU, "pclk_ddr_cru", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(1), 3, GFLAGS), ++ GATE(PCLK_SUBDDR_CRU, "pclk_subddr_cru", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(1), 4, GFLAGS), + -+err_free: -+ kfree(ctx); -+ return ERR_PTR(-ENOMEM); -+} -+EXPORT_SYMBOL_GPL(rockchip_clk_init); ++ /* PD_GPU */ ++ COMPOSITE(CLK_GPU_PRE, "clk_gpu_pre", gpll_cpll_p, 0, ++ RK3562_CLKSEL_CON(18), 7, 1, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(8), 0, GFLAGS), ++ COMPOSITE_NOMUX(ACLK_GPU_PRE, "aclk_gpu_pre", "clk_gpu_pre", 0, ++ RK3562_CLKSEL_CON(19), 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(8), 2, GFLAGS), ++ GATE(CLK_GPU, "clk_gpu", "clk_gpu_pre", 0, ++ RK3562_CLKGATE_CON(8), 4, GFLAGS), ++ COMPOSITE_NODIV(CLK_GPU_BRG, "clk_gpu_brg", mux_200m_100m_p, 0, ++ RK3562_CLKSEL_CON(19), 15, 1, MFLAGS, ++ RK3562_CLKGATE_CON(8), 8, GFLAGS), + -+void rockchip_clk_of_add_provider(struct device_node *np, -+ struct rockchip_clk_provider *ctx) -+{ -+ if (of_clk_add_provider(np, of_clk_src_onecell_get, -+ &ctx->clk_data)) -+ pr_err("%s: could not register clk provider\n", __func__); -+} -+EXPORT_SYMBOL_GPL(rockchip_clk_of_add_provider); ++ /* PD_NPU */ ++ COMPOSITE(CLK_NPU_PRE, "clk_npu_pre", gpll_cpll_p, 0, ++ RK3562_CLKSEL_CON(15), 7, 1, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(6), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_NPU_PRE, "hclk_npu_pre", "clk_npu_pre", 0, ++ RK3562_CLKSEL_CON(16), 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(6), 1, GFLAGS), ++ GATE(ACLK_RKNN, "aclk_rknn", "clk_npu_pre", 0, ++ RK3562_CLKGATE_CON(6), 4, GFLAGS), ++ GATE(HCLK_RKNN, "hclk_rknn", "hclk_npu_pre", 0, ++ RK3562_CLKGATE_CON(6), 5, GFLAGS), + -+void rockchip_clk_add_lookup(struct rockchip_clk_provider *ctx, -+ struct clk *clk, unsigned int id) -+{ -+ if (ctx->clk_data.clks && id) -+ ctx->clk_data.clks[id] = clk; -+} -+EXPORT_SYMBOL_GPL(rockchip_clk_add_lookup); -+ -+void rockchip_clk_register_plls(struct rockchip_clk_provider *ctx, -+ struct rockchip_pll_clock *list, -+ unsigned int nr_pll, int grf_lock_offset) -+{ -+ struct clk *clk; -+ int idx; -+ -+ for (idx = 0; idx < nr_pll; idx++, list++) { -+ clk = rockchip_clk_register_pll(ctx, list->type, list->name, -+ list->parent_names, list->num_parents, -+ list->con_offset, grf_lock_offset, -+ list->lock_shift, list->mode_offset, -+ list->mode_shift, list->rate_table, -+ list->flags, list->pll_flags); -+ if (IS_ERR(clk)) { -+ pr_err("%s: failed to register clock %s\n", __func__, -+ list->name); -+ continue; -+ } ++ /* PD_PERI */ ++ COMPOSITE(ACLK_PERI, "aclk_peri", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_PERI_CLKSEL_CON(0), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(1), 0, GFLAGS), ++ COMPOSITE(HCLK_PERI, "hclk_peri", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_PERI_CLKSEL_CON(0), 15, 1, MFLAGS, 8, 6, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(1), 1, GFLAGS), ++ COMPOSITE(PCLK_PERI, "pclk_peri", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_PERI_CLKSEL_CON(1), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(1), 2, GFLAGS), ++ GATE(PCLK_PERICRU, "pclk_pericru", "pclk_peri", CLK_IGNORE_UNUSED, ++ RK3562_PERI_CLKGATE_CON(1), 6, GFLAGS), ++ GATE(HCLK_SAI0, "hclk_sai0", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(2), 0, GFLAGS), ++ COMPOSITE(CLK_SAI0_SRC, "clk_sai0_src", gpll_cpll_hpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(1), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(2), 1, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_SAI0_FRAC, "clk_sai0_frac", "clk_sai0_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(2), 0, ++ RK3562_PERI_CLKGATE_CON(2), 2, GFLAGS, ++ &rk3562_clk_sai0_fracmux), ++ GATE(MCLK_SAI0, "mclk_sai0", "clk_sai0", 0, ++ RK3562_PERI_CLKGATE_CON(2), 3, GFLAGS), ++ COMPOSITE_NODIV(MCLK_SAI0_OUT2IO, "mclk_sai0_out2io", mclk_sai0_out2io_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(3), 5, 1, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(2), 4, GFLAGS), ++ GATE(HCLK_SAI1, "hclk_sai1", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(2), 5, GFLAGS), ++ COMPOSITE(CLK_SAI1_SRC, "clk_sai1_src", gpll_cpll_hpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(3), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(2), 6, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_SAI1_FRAC, "clk_sai1_frac", "clk_sai1_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(4), 0, ++ RK3562_PERI_CLKGATE_CON(2), 7, GFLAGS, ++ &rk3562_clk_sai1_fracmux), ++ GATE(MCLK_SAI1, "mclk_sai1", "clk_sai1", 0, ++ RK3562_PERI_CLKGATE_CON(2), 8, GFLAGS), ++ COMPOSITE_NODIV(MCLK_SAI1_OUT2IO, "mclk_sai1_out2io", mclk_sai1_out2io_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(5), 5, 1, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(2), 9, GFLAGS), ++ GATE(HCLK_SAI2, "hclk_sai2", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(2), 10, GFLAGS), ++ COMPOSITE(CLK_SAI2_SRC, "clk_sai2_src", gpll_cpll_hpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(6), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(2), 11, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_SAI2_FRAC, "clk_sai2_frac", "clk_sai2_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(7), 0, ++ RK3562_PERI_CLKGATE_CON(2), 12, GFLAGS, ++ &rk3562_clk_sai2_fracmux), ++ GATE(MCLK_SAI2, "mclk_sai2", "clk_sai2", 0, ++ RK3562_PERI_CLKGATE_CON(2), 13, GFLAGS), ++ COMPOSITE_NODIV(MCLK_SAI2_OUT2IO, "mclk_sai2_out2io", mclk_sai2_out2io_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(8), 5, 1, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(2), 14, GFLAGS), ++ GATE(HCLK_DSM, "hclk_dsm", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(3), 1, GFLAGS), ++ GATE(CLK_DSM, "clk_dsm", "mclk_sai1", 0, ++ RK3562_PERI_CLKGATE_CON(3), 2, GFLAGS), ++ GATE(HCLK_PDM, "hclk_pdm", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(3), 4, GFLAGS), ++ COMPOSITE(MCLK_PDM, "mclk_pdm", gpll_cpll_hpll_xin24m_p, 0, ++ RK3562_PERI_CLKSEL_CON(12), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(3), 5, GFLAGS), ++ GATE(HCLK_SPDIF, "hclk_spdif", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(3), 8, GFLAGS), ++ COMPOSITE(CLK_SPDIF_SRC, "clk_spdif_src", gpll_cpll_hpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(13), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(3), 9, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_SPDIF_FRAC, "clk_spdif_frac", "clk_spdif_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(14), 0, ++ RK3562_PERI_CLKGATE_CON(3), 10, GFLAGS, ++ &rk3562_clk_spdif_fracmux), ++ GATE(MCLK_SPDIF, "mclk_spdif", "clk_spdif", 0, ++ RK3562_PERI_CLKGATE_CON(3), 11, GFLAGS), ++ GATE(HCLK_SDMMC0, "hclk_sdmmc0", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(4), 0, GFLAGS), ++ COMPOSITE(CCLK_SDMMC0, "cclk_sdmmc0", gpll_cpll_xin24m_dmyhpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(16), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(4), 1, GFLAGS), ++ MMC(SCLK_SDMMC0_DRV, "sdmmc0_drv", "cclk_sdmmc0", RK3562_SDMMC0_CON0, 1), ++ MMC(SCLK_SDMMC0_SAMPLE, "sdmmc0_sample", "cclk_sdmmc0", RK3562_SDMMC0_CON1, 1), ++ GATE(HCLK_SDMMC1, "hclk_sdmmc1", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(4), 2, GFLAGS), ++ COMPOSITE(CCLK_SDMMC1, "cclk_sdmmc1", gpll_cpll_xin24m_dmyhpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(17), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(4), 3, GFLAGS), ++ MMC(SCLK_SDMMC1_DRV, "sdmmc1_drv", "cclk_sdmmc1", RK3562_SDMMC1_CON0, 1), ++ MMC(SCLK_SDMMC1_SAMPLE, "sdmmc1_sample", "cclk_sdmmc1", RK3562_SDMMC1_CON1, 1), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(4), 8, GFLAGS), ++ GATE(ACLK_EMMC, "aclk_emmc", "aclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(4), 9, GFLAGS), ++ COMPOSITE(CCLK_EMMC, "cclk_emmc", gpll_cpll_xin24m_dmyhpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(18), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(4), 10, GFLAGS), ++ COMPOSITE(BCLK_EMMC, "bclk_emmc", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(19), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(4), 11, GFLAGS), ++ GATE(TMCLK_EMMC, "tmclk_emmc", "xin24m", 0, ++ RK3562_PERI_CLKGATE_CON(4), 12, GFLAGS), ++ COMPOSITE(SCLK_SFC, "sclk_sfc", gpll_cpll_xin24m_p, 0, ++ RK3562_PERI_CLKSEL_CON(20), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(4), 13, GFLAGS), ++ GATE(HCLK_SFC, "hclk_sfc", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(4), 14, GFLAGS), ++ GATE(HCLK_USB2HOST, "hclk_usb2host", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(5), 0, GFLAGS), ++ GATE(HCLK_USB2HOST_ARB, "hclk_usb2host_arb", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(5), 1, GFLAGS), ++ GATE(PCLK_SPI1, "pclk_spi1", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(6), 0, GFLAGS), ++ COMPOSITE_NODIV(CLK_SPI1, "clk_spi1", mux_200m_100m_50m_xin24m_p, 0, ++ RK3562_PERI_CLKSEL_CON(20), 12, 2, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(6), 1, GFLAGS), ++ GATE(SCLK_IN_SPI1, "sclk_in_spi1", "sclk_in_spi1_io", 0, ++ RK3562_PERI_CLKGATE_CON(6), 2, GFLAGS), ++ GATE(PCLK_SPI2, "pclk_spi2", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(6), 3, GFLAGS), ++ COMPOSITE_NODIV(CLK_SPI2, "clk_spi2", mux_200m_100m_50m_xin24m_p, 0, ++ RK3562_PERI_CLKSEL_CON(20), 14, 2, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(6), 4, GFLAGS), ++ GATE(SCLK_IN_SPI2, "sclk_in_spi2", "sclk_in_spi2_io", 0, ++ RK3562_PERI_CLKGATE_CON(6), 5, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(7), 0, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(7), 1, GFLAGS), ++ GATE(PCLK_UART3, "pclk_uart3", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(7), 2, GFLAGS), ++ GATE(PCLK_UART4, "pclk_uart4", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(7), 3, GFLAGS), ++ GATE(PCLK_UART5, "pclk_uart5", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(7), 4, GFLAGS), ++ GATE(PCLK_UART6, "pclk_uart6", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(7), 5, GFLAGS), ++ GATE(PCLK_UART7, "pclk_uart7", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(7), 6, GFLAGS), ++ GATE(PCLK_UART8, "pclk_uart8", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(7), 7, GFLAGS), ++ GATE(PCLK_UART9, "pclk_uart9", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(7), 8, GFLAGS), ++ COMPOSITE(CLK_UART1_SRC, "clk_uart1_src", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(21), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(7), 9, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART1_FRAC, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(22), 0, ++ RK3562_PERI_CLKGATE_CON(7), 10, GFLAGS, ++ &rk3562_clk_uart1_fracmux), ++ GATE(SCLK_UART1, "sclk_uart1", "clk_uart1", 0, ++ RK3562_PERI_CLKGATE_CON(7), 11, GFLAGS), ++ COMPOSITE(CLK_UART2_SRC, "clk_uart2_src", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(23), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(7), 12, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART2_FRAC, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(24), 0, ++ RK3562_PERI_CLKGATE_CON(7), 13, GFLAGS, ++ &rk3562_clk_uart2_fracmux), ++ GATE(SCLK_UART2, "sclk_uart2", "clk_uart2", 0, ++ RK3562_PERI_CLKGATE_CON(7), 14, GFLAGS), ++ COMPOSITE(CLK_UART3_SRC, "clk_uart3_src", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(25), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(7), 15, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART3_FRAC, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(26), 0, ++ RK3562_PERI_CLKGATE_CON(8), 0, GFLAGS, ++ &rk3562_clk_uart3_fracmux), ++ GATE(SCLK_UART3, "sclk_uart3", "clk_uart3", 0, ++ RK3562_PERI_CLKGATE_CON(8), 1, GFLAGS), ++ COMPOSITE(CLK_UART4_SRC, "clk_uart4_src", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(27), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(8), 2, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART4_FRAC, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(28), 0, ++ RK3562_PERI_CLKGATE_CON(8), 3, GFLAGS, ++ &rk3562_clk_uart4_fracmux), ++ GATE(SCLK_UART4, "sclk_uart4", "clk_uart4", 0, ++ RK3562_PERI_CLKGATE_CON(8), 4, GFLAGS), ++ COMPOSITE(CLK_UART5_SRC, "clk_uart5_src", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(29), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(8), 5, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART5_FRAC, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(30), 0, ++ RK3562_PERI_CLKGATE_CON(8), 6, GFLAGS, ++ &rk3562_clk_uart5_fracmux), ++ GATE(SCLK_UART5, "sclk_uart5", "clk_uart5", 0, ++ RK3562_PERI_CLKGATE_CON(8), 7, GFLAGS), ++ COMPOSITE(CLK_UART6_SRC, "clk_uart6_src", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(31), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(8), 8, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART6_FRAC, "clk_uart6_frac", "clk_uart6_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(32), 0, ++ RK3562_PERI_CLKGATE_CON(8), 9, GFLAGS, ++ &rk3562_clk_uart6_fracmux), ++ GATE(SCLK_UART6, "sclk_uart6", "clk_uart6", 0, ++ RK3562_PERI_CLKGATE_CON(8), 10, GFLAGS), ++ COMPOSITE(CLK_UART7_SRC, "clk_uart7_src", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(33), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(8), 11, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART7_FRAC, "clk_uart7_frac", "clk_uart7_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(34), 0, ++ RK3562_PERI_CLKGATE_CON(8), 12, GFLAGS, ++ &rk3562_clk_uart7_fracmux), ++ GATE(SCLK_UART7, "sclk_uart7", "clk_uart7", 0, ++ RK3562_PERI_CLKGATE_CON(8), 13, GFLAGS), ++ COMPOSITE(CLK_UART8_SRC, "clk_uart8_src", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(35), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(8), 14, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART8_FRAC, "clk_uart8_frac", "clk_uart8_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(36), 0, ++ RK3562_PERI_CLKGATE_CON(8), 15, GFLAGS, ++ &rk3562_clk_uart8_fracmux), ++ GATE(SCLK_UART8, "sclk_uart8", "clk_uart8", 0, ++ RK3562_PERI_CLKGATE_CON(9), 0, GFLAGS), ++ COMPOSITE(CLK_UART9_SRC, "clk_uart9_src", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(37), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(9), 1, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART9_FRAC, "clk_uart9_frac", "clk_uart9_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(38), 0, ++ RK3562_PERI_CLKGATE_CON(9), 2, GFLAGS, ++ &rk3562_clk_uart9_fracmux), ++ GATE(SCLK_UART9, "sclk_uart9", "clk_uart9", 0, ++ RK3562_PERI_CLKGATE_CON(9), 3, GFLAGS), ++ GATE(PCLK_PWM1_PERI, "pclk_pwm1_peri", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(10), 0, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM1_PERI, "clk_pwm1_peri", mux_100m_50m_xin24m_p, 0, ++ RK3562_PERI_CLKSEL_CON(40), 0, 2, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(10), 1, GFLAGS), ++ GATE(CLK_CAPTURE_PWM1_PERI, "clk_capture_pwm1_peri", "xin24m", 0, ++ RK3562_PERI_CLKGATE_CON(10), 2, GFLAGS), ++ GATE(PCLK_PWM2_PERI, "pclk_pwm2_peri", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(10), 3, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM2_PERI, "clk_pwm2_peri", mux_100m_50m_xin24m_p, 0, ++ RK3562_PERI_CLKSEL_CON(40), 6, 2, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(10), 4, GFLAGS), ++ GATE(CLK_CAPTURE_PWM2_PERI, "clk_capture_pwm2_peri", "xin24m", 0, ++ RK3562_PERI_CLKGATE_CON(10), 5, GFLAGS), ++ GATE(PCLK_PWM3_PERI, "pclk_pwm3_peri", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(10), 6, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM3_PERI, "clk_pwm3_peri", mux_100m_50m_xin24m_p, 0, ++ RK3562_PERI_CLKSEL_CON(40), 8, 2, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(10), 7, GFLAGS), ++ GATE(CLK_CAPTURE_PWM3_PERI, "clk_capture_pwm3_peri", "xin24m", 0, ++ RK3562_PERI_CLKGATE_CON(10), 8, GFLAGS), ++ GATE(PCLK_CAN0, "pclk_can0", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(11), 0, GFLAGS), ++ COMPOSITE(CLK_CAN0, "clk_can0", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(41), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(11), 1, GFLAGS), ++ GATE(PCLK_CAN1, "pclk_can1", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(11), 2, GFLAGS), ++ COMPOSITE(CLK_CAN1, "clk_can1", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(41), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(11), 3, GFLAGS), ++ GATE(PCLK_PERI_WDT, "pclk_peri_wdt", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(13), 0, GFLAGS), ++ COMPOSITE_NODIV(TCLK_PERI_WDT, "tclk_peri_wdt", mux_xin24m_32k_p, 0, ++ RK3562_PERI_CLKSEL_CON(43), 15, 1, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(13), 1, GFLAGS), ++ GATE(ACLK_SYSMEM, "aclk_sysmem", "aclk_peri", CLK_IGNORE_UNUSED, ++ RK3562_PERI_CLKGATE_CON(13), 2, GFLAGS), ++ GATE(HCLK_BOOTROM, "hclk_bootrom", "hclk_peri", CLK_IGNORE_UNUSED, ++ RK3562_PERI_CLKGATE_CON(13), 3, GFLAGS), ++ GATE(PCLK_PERI_GRF, "pclk_peri_grf", "pclk_peri", CLK_IGNORE_UNUSED, ++ RK3562_PERI_CLKGATE_CON(13), 4, GFLAGS), ++ GATE(ACLK_DMAC, "aclk_dmac", "aclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(13), 5, GFLAGS), ++ GATE(ACLK_RKDMAC, "aclk_rkdmac", "aclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(13), 6, GFLAGS), ++ GATE(PCLK_OTPC_NS, "pclk_otpc_ns", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(14), 0, GFLAGS), ++ GATE(CLK_SBPI_OTPC_NS, "clk_sbpi_otpc_ns", "xin24m", 0, ++ RK3562_PERI_CLKGATE_CON(14), 1, GFLAGS), ++ COMPOSITE_NOMUX(CLK_USER_OTPC_NS, "clk_user_otpc_ns", "xin24m", 0, ++ RK3562_PERI_CLKSEL_CON(44), 0, 8, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(14), 2, GFLAGS), ++ GATE(PCLK_OTPC_S, "pclk_otpc_s", "pclk_peri", CLK_IGNORE_UNUSED, ++ RK3562_PERI_CLKGATE_CON(14), 3, GFLAGS), ++ GATE(CLK_SBPI_OTPC_S, "clk_sbpi_otpc_s", "xin24m", CLK_IGNORE_UNUSED, ++ RK3562_PERI_CLKGATE_CON(14), 4, GFLAGS), ++ COMPOSITE_NOMUX(CLK_USER_OTPC_S, "clk_user_otpc_s", "xin24m", CLK_IGNORE_UNUSED, ++ RK3562_PERI_CLKSEL_CON(44), 8, 8, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(14), 5, GFLAGS), ++ GATE(CLK_OTPC_ARB, "clk_otpc_arb", "xin24m", 0, ++ RK3562_PERI_CLKGATE_CON(14), 6, GFLAGS), ++ GATE(PCLK_OTPPHY, "pclk_otpphy", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(14), 7, GFLAGS), ++ GATE(PCLK_USB2PHY, "pclk_usb2phy", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(15), 0, GFLAGS), ++ GATE(PCLK_PIPEPHY, "pclk_pipephy", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(15), 7, GFLAGS), ++ GATE(PCLK_SARADC, "pclk_saradc", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(16), 4, GFLAGS), ++ COMPOSITE_NOMUX(CLK_SARADC, "clk_saradc", "xin24m", 0, ++ RK3562_PERI_CLKSEL_CON(46), 0, 12, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(16), 5, GFLAGS), ++ GATE(PCLK_IOC_VCCIO234, "pclk_ioc_vccio234", "pclk_peri", CLK_IS_CRITICAL, ++ RK3562_PERI_CLKGATE_CON(16), 12, GFLAGS), ++ GATE(PCLK_PERI_GPIO1, "pclk_peri_gpio1", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(17), 0, GFLAGS), ++ GATE(PCLK_PERI_GPIO2, "pclk_peri_gpio2", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(17), 1, GFLAGS), ++ COMPOSITE_NODIV(DCLK_PERI_GPIO, "dclk_peri_gpio", mux_xin24m_32k_p, 0, ++ RK3562_PERI_CLKSEL_CON(47), 8, 1, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(17), 4, GFLAGS), ++ GATE(DCLK_PERI_GPIO1, "dclk_peri_gpio1", "dclk_peri_gpio", 0, ++ RK3562_PERI_CLKGATE_CON(17), 2, GFLAGS), ++ GATE(DCLK_PERI_GPIO2, "dclk_peri_gpio2", "dclk_peri_gpio", 0, ++ RK3562_PERI_CLKGATE_CON(17), 3, GFLAGS), + -+ rockchip_clk_add_lookup(ctx, clk, list->id); -+ } -+} -+EXPORT_SYMBOL_GPL(rockchip_clk_register_plls); ++ /* PD_PHP */ ++ COMPOSITE(ACLK_PHP, "aclk_php", gpll_cpll_p, 0, ++ RK3562_CLKSEL_CON(36), 7, 1, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(16), 0, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_PHP, "pclk_php", "aclk_php", 0, ++ RK3562_CLKSEL_CON(36), 8, 4, DFLAGS, ++ RK3562_CLKGATE_CON(16), 1, GFLAGS), ++ GATE(ACLK_PCIE20_MST, "aclk_pcie20_mst", "aclk_php", 0, ++ RK3562_CLKGATE_CON(16), 4, GFLAGS), ++ GATE(ACLK_PCIE20_SLV, "aclk_pcie20_slv", "aclk_php", 0, ++ RK3562_CLKGATE_CON(16), 5, GFLAGS), ++ GATE(ACLK_PCIE20_DBI, "aclk_pcie20_dbi", "aclk_php", 0, ++ RK3562_CLKGATE_CON(16), 6, GFLAGS), ++ GATE(PCLK_PCIE20, "pclk_pcie20", "pclk_php", 0, ++ RK3562_CLKGATE_CON(16), 7, GFLAGS), ++ GATE(CLK_PCIE20_AUX, "clk_pcie20_aux", "xin24m", 0, ++ RK3562_CLKGATE_CON(16), 8, GFLAGS), ++ GATE(ACLK_USB3OTG, "aclk_usb3otg", "aclk_php", 0, ++ RK3562_CLKGATE_CON(16), 10, GFLAGS), ++ COMPOSITE_NODIV(CLK_USB3OTG_SUSPEND, "clk_usb3otg_suspend", mux_xin24m_32k_p, 0, ++ RK3562_CLKSEL_CON(36), 15, 1, MFLAGS, ++ RK3562_CLKGATE_CON(16), 11, GFLAGS), ++ GATE(CLK_USB3OTG_REF, "clk_usb3otg_ref", "xin24m", 0, ++ RK3562_CLKGATE_CON(16), 12, GFLAGS), ++ GATE(CLK_PIPEPHY_REF_FUNC, "clk_pipephy_ref_func", "pclk_pcie20", 0, ++ RK3562_CLKGATE_CON(17), 3, GFLAGS), + -+void rockchip_clk_register_branches(struct rockchip_clk_provider *ctx, -+ struct rockchip_clk_branch *list, -+ unsigned int nr_clk) -+{ -+ struct clk *clk = NULL; -+ unsigned int idx; -+ unsigned long flags; ++ /* PD_PMU1 */ ++ COMPOSITE_NOMUX(CLK_200M_PMU, "clk_200m_pmu", "cpll", CLK_IS_CRITICAL, ++ RK3562_PMU1_CLKSEL_CON(0), 0, 5, DFLAGS, ++ RK3562_PMU1_CLKGATE_CON(0), 1, GFLAGS), ++ /* PD_PMU0 */ ++ COMPOSITE_FRACMUX(CLK_RTC32K_FRAC, "clk_rtc32k_frac", "xin24m", CLK_IS_CRITICAL, ++ RK3562_PMU0_CLKSEL_CON(0), 0, ++ RK3562_PMU0_CLKGATE_CON(0), 15, GFLAGS, ++ &rk3562_rtc32k_pmu_fracmux), ++ COMPOSITE_NOMUX(BUSCLK_PDPMU0, "busclk_pdpmu0", "clk_200m_pmu", CLK_IS_CRITICAL, ++ RK3562_PMU0_CLKSEL_CON(1), 3, 2, DFLAGS, ++ RK3562_PMU0_CLKGATE_CON(0), 14, GFLAGS), ++ GATE(PCLK_PMU0_CRU, "pclk_pmu0_cru", "busclk_pdpmu0", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(0), 0, GFLAGS), ++ GATE(PCLK_PMU0_PMU, "pclk_pmu0_pmu", "busclk_pdpmu0", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(0), 1, GFLAGS), ++ GATE(CLK_PMU0_PMU, "clk_pmu0_pmu", "xin24m", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(0), 2, GFLAGS), ++ GATE(PCLK_PMU0_HP_TIMER, "pclk_pmu0_hp_timer", "busclk_pdpmu0", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(0), 3, GFLAGS), ++ GATE(CLK_PMU0_HP_TIMER, "clk_pmu0_hp_timer", "xin24m", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(0), 4, GFLAGS), ++ GATE(CLK_PMU0_32K_HP_TIMER, "clk_pmu0_32k_hp_timer", "clk_rtc_32k", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(0), 5, GFLAGS), ++ GATE(PCLK_PMU0_PVTM, "pclk_pmu0_pvtm", "busclk_pdpmu0", 0, ++ RK3562_PMU0_CLKGATE_CON(0), 6, GFLAGS), ++ GATE(CLK_PMU0_PVTM, "clk_pmu0_pvtm", "xin24m", 0, ++ RK3562_PMU0_CLKGATE_CON(0), 7, GFLAGS), ++ GATE(PCLK_IOC_PMUIO, "pclk_ioc_pmuio", "busclk_pdpmu0", CLK_IS_CRITICAL, ++ RK3562_PMU0_CLKGATE_CON(0), 8, GFLAGS), ++ GATE(PCLK_PMU0_GPIO0, "pclk_pmu0_gpio0", "busclk_pdpmu0", 0, ++ RK3562_PMU0_CLKGATE_CON(0), 9, GFLAGS), ++ GATE(DBCLK_PMU0_GPIO0, "dbclk_pmu0_gpio0", "xin24m", 0, ++ RK3562_PMU0_CLKGATE_CON(0), 10, GFLAGS), ++ GATE(PCLK_PMU0_GRF, "pclk_pmu0_grf", "busclk_pdpmu0", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(0), 11, GFLAGS), ++ GATE(PCLK_PMU0_SGRF, "pclk_pmu0_sgrf", "busclk_pdpmu0", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(0), 12, GFLAGS), ++ GATE(CLK_DDR_FAIL_SAFE, "clk_ddr_fail_safe", "xin24m", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(1), 0, GFLAGS), ++ GATE(PCLK_PMU0_SCRKEYGEN, "pclk_pmu0_scrkeygen", "busclk_pdpmu0", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(1), 1, GFLAGS), ++ COMPOSITE_NOMUX(CLK_PIPEPHY_DIV, "clk_pipephy_div", "cpll", 0, ++ RK3562_PMU0_CLKSEL_CON(2), 0, 6, DFLAGS, ++ RK3562_PMU0_CLKGATE_CON(2), 0, GFLAGS), ++ GATE(CLK_PIPEPHY_XIN24M, "clk_pipephy_xin24m", "xin24m", 0, ++ RK3562_PMU0_CLKGATE_CON(2), 1, GFLAGS), ++ COMPOSITE_NODIV(CLK_PIPEPHY_REF, "clk_pipephy_ref", clk_pipephy_ref_p, 0, ++ RK3562_PMU0_CLKSEL_CON(2), 7, 1, MFLAGS, ++ RK3562_PMU0_CLKGATE_CON(2), 2, GFLAGS), ++ GATE(CLK_USB2PHY_XIN24M, "clk_usb2phy_xin24m", "xin24m", 0, ++ RK3562_PMU0_CLKGATE_CON(2), 4, GFLAGS), ++ COMPOSITE_NODIV(CLK_USB2PHY_REF, "clk_usb2phy_ref", clk_usbphy_ref_p, 0, ++ RK3562_PMU0_CLKSEL_CON(2), 8, 1, MFLAGS, ++ RK3562_PMU0_CLKGATE_CON(2), 5, GFLAGS), ++ GATE(CLK_MIPIDSIPHY_XIN24M, "clk_mipidsiphy_xin24m", "xin24m", 0, ++ RK3562_PMU0_CLKGATE_CON(2), 6, GFLAGS), ++ COMPOSITE_NODIV(CLK_MIPIDSIPHY_REF, "clk_mipidsiphy_ref", clk_mipidsi_ref_p, 0, ++ RK3562_PMU0_CLKSEL_CON(2), 15, 1, MFLAGS, ++ RK3562_PMU0_CLKGATE_CON(2), 7, GFLAGS), ++ GATE(PCLK_PMU0_I2C0, "pclk_pmu0_i2c0", "busclk_pdpmu0", 0, ++ RK3562_PMU0_CLKGATE_CON(2), 8, GFLAGS), ++ COMPOSITE(CLK_PMU0_I2C0, "clk_pmu0_i2c0", mux_200m_xin24m_32k_p, 0, ++ RK3562_PMU0_CLKSEL_CON(3), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3562_PMU0_CLKGATE_CON(2), 9, GFLAGS), ++ /* PD_PMU1 */ ++ GATE(PCLK_PMU1_CRU, "pclk_pmu1_cru", "busclk_pdpmu0", CLK_IGNORE_UNUSED, ++ RK3562_PMU1_CLKGATE_CON(0), 0, GFLAGS), ++ GATE(HCLK_PMU1_MEM, "hclk_pmu1_mem", "busclk_pdpmu0", CLK_IGNORE_UNUSED, ++ RK3562_PMU1_CLKGATE_CON(0), 2, GFLAGS), ++ GATE(PCLK_PMU1_UART0, "pclk_pmu1_uart0", "busclk_pdpmu0", 0, ++ RK3562_PMU1_CLKGATE_CON(0), 7, GFLAGS), ++ COMPOSITE_NOMUX(CLK_PMU1_UART0_SRC, "clk_pmu1_uart0_src", "cpll", 0, ++ RK3562_PMU1_CLKSEL_CON(2), 0, 4, DFLAGS, ++ RK3562_PMU1_CLKGATE_CON(0), 8, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_PMU1_UART0_FRAC, "clk_pmu1_uart0_frac", "clk_pmu1_uart0_src", CLK_SET_RATE_PARENT, ++ RK3562_PMU1_CLKSEL_CON(3), 0, ++ RK3562_PMU1_CLKGATE_CON(0), 9, GFLAGS, ++ &rk3562_clk_pmu1_uart0_fracmux), ++ GATE(SCLK_PMU1_UART0, "sclk_pmu1_uart0", "clk_pmu1_uart0", 0, ++ RK3562_PMU1_CLKGATE_CON(0), 10, GFLAGS), ++ GATE(PCLK_PMU1_SPI0, "pclk_pmu1_spi0", "busclk_pdpmu0", 0, ++ RK3562_PMU1_CLKGATE_CON(1), 0, GFLAGS), ++ COMPOSITE(CLK_PMU1_SPI0, "clk_pmu1_spi0", mux_200m_xin24m_32k_p, 0, ++ RK3562_PMU1_CLKSEL_CON(4), 6, 2, MFLAGS, 0, 2, DFLAGS, ++ RK3562_PMU1_CLKGATE_CON(1), 1, GFLAGS), ++ GATE(SCLK_IN_PMU1_SPI0, "sclk_in_pmu1_spi0", "sclk_in_pmu1_spi0_io", 0, ++ RK3562_PMU1_CLKGATE_CON(1), 2, GFLAGS), ++ GATE(PCLK_PMU1_PWM0, "pclk_pmu1_pwm0", "busclk_pdpmu0", 0, ++ RK3562_PMU1_CLKGATE_CON(1), 3, GFLAGS), ++ COMPOSITE(CLK_PMU1_PWM0, "clk_pmu1_pwm0", mux_200m_xin24m_32k_p, 0, ++ RK3562_PMU1_CLKSEL_CON(4), 14, 2, MFLAGS, 8, 2, DFLAGS, ++ RK3562_PMU1_CLKGATE_CON(1), 4, GFLAGS), ++ GATE(CLK_CAPTURE_PMU1_PWM0, "clk_capture_pmu1_pwm0", "xin24m", 0, ++ RK3562_PMU1_CLKGATE_CON(1), 5, GFLAGS), ++ GATE(CLK_PMU1_WIFI, "clk_pmu1_wifi", "xin24m", 0, ++ RK3562_PMU1_CLKGATE_CON(1), 6, GFLAGS), ++ GATE(FCLK_PMU1_CM0_CORE, "fclk_pmu1_cm0_core", "busclk_pdpmu0", 0, ++ RK3562_PMU1_CLKGATE_CON(2), 0, GFLAGS), ++ GATE(CLK_PMU1_CM0_RTC, "clk_pmu1_cm0_rtc", "clk_rtc_32k", 0, ++ RK3562_PMU1_CLKGATE_CON(2), 1, GFLAGS), ++ GATE(PCLK_PMU1_WDTNS, "pclk_pmu1_wdtns", "busclk_pdpmu0", 0, ++ RK3562_PMU1_CLKGATE_CON(2), 3, GFLAGS), ++ GATE(CLK_PMU1_WDTNS, "clk_pmu1_wdtns", "xin24m", 0, ++ RK3562_PMU1_CLKGATE_CON(2), 4, GFLAGS), ++ GATE(PCLK_PMU1_MAILBOX, "pclk_pmu1_mailbox", "busclk_pdpmu0", 0, ++ RK3562_PMU1_CLKGATE_CON(3), 8, GFLAGS), + -+ for (idx = 0; idx < nr_clk; idx++, list++) { -+ flags = list->flags; ++ /* PD_RGA */ ++ COMPOSITE(ACLK_RGA_PRE, "aclk_rga_pre", gpll_cpll_pvtpll_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(32), 6, 2, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(14), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_RGA_PRE, "hclk_rga_pre", "aclk_rga_jdec", 0, ++ RK3562_CLKSEL_CON(32), 8, 3, DFLAGS, ++ RK3562_CLKGATE_CON(14), 1, GFLAGS), ++ GATE(ACLK_RGA, "aclk_rga", "aclk_rga_jdec", 0, ++ RK3562_CLKGATE_CON(14), 6, GFLAGS), ++ GATE(HCLK_RGA, "hclk_rga", "hclk_rga_pre", 0, ++ RK3562_CLKGATE_CON(14), 7, GFLAGS), ++ COMPOSITE(CLK_RGA_CORE, "clk_rga_core", gpll_cpll_pvtpll_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(33), 6, 2, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(14), 8, GFLAGS), ++ GATE(ACLK_JDEC, "aclk_jdec", "aclk_rga_jdec", 0, ++ RK3562_CLKGATE_CON(14), 9, GFLAGS), ++ GATE(HCLK_JDEC, "hclk_jdec", "hclk_rga_pre", 0, ++ RK3562_CLKGATE_CON(14), 10, GFLAGS), + -+ /* catch simple muxes */ -+ switch (list->branch_type) { -+ case branch_mux: -+ if (list->mux_table) -+ clk = clk_register_mux_table(NULL, list->name, -+ list->parent_names, list->num_parents, -+ flags, -+ ctx->reg_base + list->muxdiv_offset, -+ list->mux_shift, -+ BIT(list->mux_width) - 1, -+ list->mux_flags, list->mux_table, -+ &ctx->lock); -+ else -+ clk = clk_register_mux(NULL, list->name, -+ list->parent_names, list->num_parents, -+ flags, -+ ctx->reg_base + list->muxdiv_offset, -+ list->mux_shift, list->mux_width, -+ list->mux_flags, &ctx->lock); -+ break; -+ case branch_muxgrf: -+ clk = rockchip_clk_register_muxgrf(list->name, -+ list->parent_names, list->num_parents, -+ flags, ctx->grf, list->muxdiv_offset, -+ list->mux_shift, list->mux_width, -+ list->mux_flags); -+ break; -+ case branch_muxpmugrf: -+ clk = rockchip_clk_register_muxgrf(list->name, -+ list->parent_names, list->num_parents, -+ flags, ctx->pmugrf, list->muxdiv_offset, -+ list->mux_shift, list->mux_width, -+ list->mux_flags); -+ break; -+ case branch_divider: -+ if (list->div_table) -+ clk = clk_register_divider_table(NULL, -+ list->name, list->parent_names[0], -+ flags, -+ ctx->reg_base + list->muxdiv_offset, -+ list->div_shift, list->div_width, -+ list->div_flags, list->div_table, -+ &ctx->lock); -+ else -+ clk = clk_register_divider(NULL, list->name, -+ list->parent_names[0], flags, -+ ctx->reg_base + list->muxdiv_offset, -+ list->div_shift, list->div_width, -+ list->div_flags, &ctx->lock); -+ break; -+ case branch_fraction_divider: -+ clk = rockchip_clk_register_frac_branch(ctx, list->name, -+ list->parent_names, list->num_parents, -+ ctx->reg_base, list->muxdiv_offset, -+ list->div_flags, -+ list->gate_offset, list->gate_shift, -+ list->gate_flags, flags, list->child, -+ &ctx->lock); -+ break; -+ case branch_half_divider: -+ clk = rockchip_clk_register_halfdiv(list->name, -+ list->parent_names, list->num_parents, -+ ctx->reg_base, list->muxdiv_offset, -+ list->mux_shift, list->mux_width, -+ list->mux_flags, list->div_offset, -+ list->div_shift, list->div_width, -+ list->div_flags, list->gate_offset, -+ list->gate_shift, list->gate_flags, -+ flags, &ctx->lock); -+ break; -+ case branch_gate: -+ flags |= CLK_SET_RATE_PARENT; ++ /* PD_VDPU */ ++ COMPOSITE(ACLK_VDPU_PRE, "aclk_vdpu_pre", gpll_cpll_pvtpll_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(22), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3562_CLKGATE_CON(10), 0, GFLAGS), ++ COMPOSITE(CLK_RKVDEC_HEVC_CA, "clk_rkvdec_hevc_ca", gpll_cpll_pvtpll_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(23), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3562_CLKGATE_CON(10), 3, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_VDPU_PRE, "hclk_vdpu_pre", "aclk_vdpu", 0, ++ RK3562_CLKSEL_CON(24), 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(10), 4, GFLAGS), ++ GATE(ACLK_RKVDEC, "aclk_rkvdec", "aclk_vdpu", 0, ++ RK3562_CLKGATE_CON(10), 7, GFLAGS), ++ GATE(HCLK_RKVDEC, "hclk_rkvdec", "hclk_vdpu_pre", 0, ++ RK3562_CLKGATE_CON(10), 8, GFLAGS), + -+ clk = clk_register_gate(NULL, list->name, -+ list->parent_names[0], flags, -+ ctx->reg_base + list->gate_offset, -+ list->gate_shift, list->gate_flags, &ctx->lock); -+ break; -+ case branch_gate_no_set_rate: -+ flags &= ~CLK_SET_RATE_PARENT; ++ /* PD_VEPU */ ++ COMPOSITE(CLK_RKVENC_CORE, "clk_rkvenc_core", gpll_cpll_pvtpll_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(20), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3562_CLKGATE_CON(9), 0, GFLAGS), ++ COMPOSITE(ACLK_VEPU_PRE, "aclk_vepu_pre", gpll_cpll_pvtpll_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(20), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3562_CLKGATE_CON(9), 1, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_VEPU_PRE, "hclk_vepu_pre", "aclk_vepu", 0, ++ RK3562_CLKSEL_CON(21), 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(9), 2, GFLAGS), ++ GATE(ACLK_RKVENC, "aclk_rkvenc", "aclk_vepu", 0, ++ RK3562_CLKGATE_CON(9), 5, GFLAGS), ++ GATE(HCLK_RKVENC, "hclk_rkvenc", "hclk_vepu", 0, ++ RK3562_CLKGATE_CON(9), 6, GFLAGS), + -+ clk = clk_register_gate(NULL, list->name, -+ list->parent_names[0], flags, -+ ctx->reg_base + list->gate_offset, -+ list->gate_shift, list->gate_flags, &ctx->lock); -+ break; -+ case branch_composite: -+ clk = rockchip_clk_register_branch(list->name, -+ list->parent_names, list->num_parents, -+ ctx->reg_base, list->muxdiv_offset, -+ list->mux_shift, -+ list->mux_width, list->mux_flags, -+ list->mux_table, list->div_offset, -+ list->div_shift, list->div_width, -+ list->div_flags, list->div_table, -+ list->gate_offset, list->gate_shift, -+ list->gate_flags, flags, &ctx->lock); -+ break; -+ case branch_mmc: -+ clk = rockchip_clk_register_mmc( -+ list->name, -+ list->parent_names, list->num_parents, -+ ctx->reg_base + list->muxdiv_offset, -+ list->div_shift -+ ); -+ break; -+ case branch_inverter: -+#ifdef CONFIG_ROCKCHIP_CLK_INV -+ clk = rockchip_clk_register_inverter( -+ list->name, list->parent_names, -+ list->num_parents, -+ ctx->reg_base + list->muxdiv_offset, -+ list->div_shift, list->div_flags, &ctx->lock); -+#endif -+ break; -+ case branch_factor: -+ clk = rockchip_clk_register_factor_branch( -+ list->name, list->parent_names, -+ list->num_parents, ctx->reg_base, -+ list->div_shift, list->div_width, -+ list->gate_offset, list->gate_shift, -+ list->gate_flags, flags, &ctx->lock); -+ break; -+ case branch_ddrclk: -+ clk = rockchip_clk_register_ddrclk( -+ list->name, list->flags, -+ list->parent_names, list->num_parents, -+ list->muxdiv_offset, list->mux_shift, -+ list->mux_width, list->div_shift, -+ list->div_width, list->div_flags, -+ ctx->reg_base); -+ break; -+ } ++ /* PD_VI */ ++ COMPOSITE(ACLK_VI, "aclk_vi", gpll_cpll_pvtpll_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(25), 6, 2, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(11), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_VI, "hclk_vi", "aclk_vi_isp", 0, ++ RK3562_CLKSEL_CON(26), 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(11), 1, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_VI, "pclk_vi", "aclk_vi_isp", 0, ++ RK3562_CLKSEL_CON(26), 8, 4, DFLAGS, ++ RK3562_CLKGATE_CON(11), 2, GFLAGS), ++ GATE(ACLK_ISP, "aclk_isp", "aclk_vi_isp", 0, ++ RK3562_CLKGATE_CON(11), 6, GFLAGS), ++ GATE(HCLK_ISP, "hclk_isp", "hclk_vi", 0, ++ RK3562_CLKGATE_CON(11), 7, GFLAGS), ++ COMPOSITE(CLK_ISP, "clk_isp", gpll_cpll_pvtpll_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(27), 6, 2, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(11), 8, GFLAGS), ++ GATE(ACLK_VICAP, "aclk_vicap", "aclk_vi_isp", 0, ++ RK3562_CLKGATE_CON(11), 9, GFLAGS), ++ GATE(HCLK_VICAP, "hclk_vicap", "hclk_vi", 0, ++ RK3562_CLKGATE_CON(11), 10, GFLAGS), ++ COMPOSITE(DCLK_VICAP, "dclk_vicap", gpll_cpll_pvtpll_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(27), 14, 2, MFLAGS, 8, 4, DFLAGS, ++ RK3562_CLKGATE_CON(11), 11, GFLAGS), ++ GATE(CSIRX0_CLK_DATA, "csirx0_clk_data", "csirx0_clk_data_io", 0, ++ RK3562_CLKGATE_CON(11), 12, GFLAGS), ++ GATE(CSIRX1_CLK_DATA, "csirx1_clk_data", "csirx1_clk_data_io", 0, ++ RK3562_CLKGATE_CON(11), 13, GFLAGS), ++ GATE(CSIRX2_CLK_DATA, "csirx2_clk_data", "csirx2_clk_data_io", 0, ++ RK3562_CLKGATE_CON(11), 14, GFLAGS), ++ GATE(CSIRX3_CLK_DATA, "csirx3_clk_data", "csirx3_clk_data_io", 0, ++ RK3562_CLKGATE_CON(11), 15, GFLAGS), ++ GATE(PCLK_CSIHOST0, "pclk_csihost0", "pclk_vi", 0, ++ RK3562_CLKGATE_CON(12), 0, GFLAGS), ++ GATE(PCLK_CSIHOST1, "pclk_csihost1", "pclk_vi", 0, ++ RK3562_CLKGATE_CON(12), 1, GFLAGS), ++ GATE(PCLK_CSIHOST2, "pclk_csihost2", "pclk_vi", 0, ++ RK3562_CLKGATE_CON(12), 2, GFLAGS), ++ GATE(PCLK_CSIHOST3, "pclk_csihost3", "pclk_vi", 0, ++ RK3562_CLKGATE_CON(12), 3, GFLAGS), ++ GATE(PCLK_CSIPHY0, "pclk_csiphy0", "pclk_vi", 0, ++ RK3562_CLKGATE_CON(12), 4, GFLAGS), ++ GATE(PCLK_CSIPHY1, "pclk_csiphy1", "pclk_vi", 0, ++ RK3562_CLKGATE_CON(12), 5, GFLAGS), + -+ /* none of the cases above matched */ -+ if (!clk) { -+ pr_err("%s: unknown clock type %d\n", -+ __func__, list->branch_type); -+ continue; -+ } ++ /* PD_VO */ ++ COMPOSITE(ACLK_VO_PRE, "aclk_vo_pre", gpll_cpll_vpll_dmyhpll_p, 0, ++ RK3562_CLKSEL_CON(28), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3562_CLKGATE_CON(13), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_VO_PRE, "hclk_vo_pre", "aclk_vo", 0, ++ RK3562_CLKSEL_CON(29), 0, 5, DFLAGS, ++ RK3562_CLKGATE_CON(13), 1, GFLAGS), ++ GATE(ACLK_VOP, "aclk_vop", "aclk_vo", 0, ++ RK3562_CLKGATE_CON(13), 6, GFLAGS), ++ GATE(HCLK_VOP, "hclk_vop", "hclk_vo_pre", 0, ++ RK3562_CLKGATE_CON(13), 7, GFLAGS), ++ COMPOSITE(DCLK_VOP, "dclk_vop", gpll_dmyhpll_vpll_apll_p, CLK_SET_RATE_NO_REPARENT, ++ RK3562_CLKSEL_CON(30), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3562_CLKGATE_CON(13), 8, GFLAGS), ++ COMPOSITE(DCLK_VOP1, "dclk_vop1", gpll_dmyhpll_vpll_apll_p, CLK_SET_RATE_NO_REPARENT, ++ RK3562_CLKSEL_CON(31), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3562_CLKGATE_CON(13), 9, GFLAGS), ++}; + -+ if (IS_ERR(clk)) { -+ pr_err("%s: failed to register clock %s: %ld\n", -+ __func__, list->name, PTR_ERR(clk)); -+ continue; -+ } ++static void __iomem *rk3562_cru_base; + -+ rockchip_clk_add_lookup(ctx, clk, list->id); ++static void rk3562_dump_cru(void) ++{ ++ if (rk3562_cru_base) { ++ pr_warn("CRU:\n"); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rk3562_cru_base, ++ 0x600, false); + } +} -+EXPORT_SYMBOL_GPL(rockchip_clk_register_branches); + -+void rockchip_clk_register_armclk(struct rockchip_clk_provider *ctx, -+ unsigned int lookup_id, -+ const char *name, -+ u8 num_parents, -+ struct clk *parent, struct clk *alt_parent, -+ const struct rockchip_cpuclk_reg_data *reg_data, -+ const struct rockchip_cpuclk_rate_table *rates, -+ int nrates) ++static int protect_clocks[] = { ++ ACLK_VO_PRE, ++ HCLK_VO_PRE, ++ ACLK_VOP, ++ HCLK_VOP, ++ DCLK_VOP, ++ DCLK_VOP1, ++}; ++ ++static void __init rk3562_clk_init(struct device_node *np) +{ -+ struct clk *clk; ++ struct rockchip_clk_provider *ctx; ++ void __iomem *reg_base; + -+ clk = rockchip_clk_register_cpuclk(name, num_parents, -+ parent, alt_parent, -+ reg_data, rates, nrates, -+ ctx->reg_base, &ctx->lock); -+ if (IS_ERR(clk)) { -+ pr_err("%s: failed to register clock %s: %ld\n", -+ __func__, name, PTR_ERR(clk)); ++ reg_base = of_iomap(np, 0); ++ if (!reg_base) { ++ pr_err("%s: could not map cru region\n", __func__); + return; + } + -+ rockchip_clk_add_lookup(ctx, clk, lookup_id); -+} -+EXPORT_SYMBOL_GPL(rockchip_clk_register_armclk); -+ -+void rockchip_clk_register_armclk_v2(struct rockchip_clk_provider *ctx, -+ struct rockchip_clk_branch *list, -+ const struct rockchip_cpuclk_rate_table *rates, -+ int nrates) -+{ -+ struct clk *clk; ++ rk3562_cru_base = reg_base; + -+ clk = rockchip_clk_register_cpuclk_v2(list->name, list->parent_names, -+ list->num_parents, ctx->reg_base, -+ list->muxdiv_offset, list->mux_shift, -+ list->mux_width, list->mux_flags, -+ list->div_offset, list->div_shift, -+ list->div_width, list->div_flags, -+ list->flags, &ctx->lock, rates, nrates); -+ if (IS_ERR(clk)) { -+ pr_err("%s: failed to register clock %s: %ld\n", -+ __func__, list->name, PTR_ERR(clk)); ++ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); ++ if (IS_ERR(ctx)) { ++ pr_err("%s: rockchip clk init failed\n", __func__); ++ iounmap(reg_base); + return; + } + -+ rockchip_clk_add_lookup(ctx, clk, list->id); -+} -+EXPORT_SYMBOL_GPL(rockchip_clk_register_armclk_v2); -+ -+void (*rk_dump_cru)(void); -+EXPORT_SYMBOL(rk_dump_cru); -+ -+static int rk_clk_panic(struct notifier_block *this, -+ unsigned long ev, void *ptr) -+{ -+ if (rk_dump_cru) -+ rk_dump_cru(); -+ return NOTIFY_DONE; -+} ++ rockchip_clk_register_plls(ctx, rk3562_pll_clks, ++ ARRAY_SIZE(rk3562_pll_clks), ++ RK3562_GRF_SOC_STATUS0); + -+static struct notifier_block rk_clk_panic_block = { -+ .notifier_call = rk_clk_panic, -+}; ++ rockchip_clk_register_branches(ctx, rk3562_clk_branches, ++ ARRAY_SIZE(rk3562_clk_branches)); + -+static void __iomem *rst_base; -+static unsigned int reg_restart; -+static void (*cb_restart)(void); -+static int rockchip_restart_notify(struct notifier_block *this, -+ unsigned long mode, void *cmd) -+{ -+ if (cb_restart) -+ cb_restart(); ++ /* (0x30444 - 0x400) / 4 + 1 = 49170 */ ++ rockchip_register_softrst(np, 49170, reg_base + RK3562_SOFTRST_CON(0), ++ ROCKCHIP_SOFTRST_HIWORD_MASK); + -+ writel(0xfdb9, rst_base + reg_restart); -+ return NOTIFY_DONE; -+} ++ rockchip_register_restart_notifier(ctx, RK3562_GLB_SRST_FST, NULL); + -+static struct notifier_block rockchip_restart_handler = { -+ .notifier_call = rockchip_restart_notify, -+ .priority = 128, -+}; ++ rockchip_clk_of_add_provider(np, ctx); + -+void -+rockchip_register_restart_notifier(struct rockchip_clk_provider *ctx, -+ unsigned int reg, -+ void (*cb)(void)) -+{ -+ int ret; ++ if (!rk_dump_cru) ++ rk_dump_cru = rk3562_dump_cru; + -+ rst_base = ctx->reg_base; -+ reg_restart = reg; -+ cb_restart = cb; -+ ret = register_restart_handler(&rockchip_restart_handler); -+ if (ret) -+ pr_err("%s: cannot register restart handler, %d\n", -+ __func__, ret); -+ atomic_notifier_chain_register(&panic_notifier_list, -+ &rk_clk_panic_block); ++ rockchip_clk_protect(ctx, protect_clocks, ARRAY_SIZE(protect_clocks)); +} -+EXPORT_SYMBOL_GPL(rockchip_register_restart_notifier); -+ -+#ifdef MODULE -+static struct clk **protect_clocks; -+static unsigned int protect_nclocks; -+ -+int rockchip_clk_protect(struct rockchip_clk_provider *ctx, -+ unsigned int *clocks, unsigned int nclocks) -+{ -+ struct clk *clk = NULL; -+ int i = 0; + -+ if (protect_clocks || !ctx || !clocks || !ctx->clk_data.clks) -+ return 0; ++CLK_OF_DECLARE(rk3562_cru, "rockchip,rk3562-cru", rk3562_clk_init); + -+ protect_clocks = kcalloc(nclocks, sizeof(void *), GFP_KERNEL); -+ if (!protect_clocks) -+ return -ENOMEM; ++#ifdef MODULE ++struct clk_rk3562_inits { ++ void (*inits)(struct device_node *np); ++}; + -+ for (i = 0; i < nclocks; i++) { -+ if (clocks[i] >= ctx->clk_data.clk_num) { -+ pr_err("%s: invalid clock id %u\n", __func__, clocks[i]); -+ continue; -+ } -+ clk = ctx->clk_data.clks[clocks[i]]; -+ if (clk) { -+ clk_prepare_enable(clk); -+ protect_clocks[i] = clk; -+ } -+ } -+ protect_nclocks = nclocks; ++static const struct clk_rk3562_inits clk_3562_cru_init = { ++ .inits = rk3562_clk_init, ++}; + -+ return 0; -+} -+EXPORT_SYMBOL_GPL(rockchip_clk_protect); ++static const struct of_device_id clk_rk3562_match_table[] = { ++ { ++ .compatible = "rockchip,rk3562-cru", ++ .data = &clk_3562_cru_init, ++ }, ++ { } ++}; ++MODULE_DEVICE_TABLE(of, clk_rk3562_match_table); + -+void rockchip_clk_unprotect(void) ++static int clk_rk3562_probe(struct platform_device *pdev) +{ -+ int i = 0; ++ struct device_node *np = pdev->dev.of_node; ++ const struct of_device_id *match; ++ const struct clk_rk3562_inits *init_data; + -+ if (!protect_clocks || !protect_nclocks) -+ return; ++ match = of_match_device(clk_rk3562_match_table, &pdev->dev); ++ if (!match || !match->data) ++ return -EINVAL; + -+ for (i = 0; i < protect_nclocks; i++) { -+ if (protect_clocks[i]) -+ clk_disable_unprepare(protect_clocks[i]); -+ } -+ protect_nclocks = 0; -+ kfree(protect_clocks); -+ protect_clocks = NULL; ++ init_data = match->data; ++ if (init_data->inits) ++ init_data->inits(np); + ++ return 0; +} -+EXPORT_SYMBOL_GPL(rockchip_clk_unprotect); + -+void rockchip_clk_disable_unused(void) -+{ -+ struct rockchip_clk_provider *ctx; -+ struct clk *clk; -+ struct clk_hw *hw; -+ int i = 0, flag = 0; ++static struct platform_driver clk_rk3562_driver = { ++ .probe = clk_rk3562_probe, ++ .driver = { ++ .name = "clk-rk3562", ++ .of_match_table = clk_rk3562_match_table, ++ .suppress_bind_attrs = true, ++ }, ++}; ++module_platform_driver(clk_rk3562_driver); + -+ hlist_for_each_entry(ctx, &clk_ctx_list, list_node) { -+ for (i = 0; i < ctx->clk_data.clk_num; i++) { -+ clk = ctx->clk_data.clks[i]; -+ if (clk && !IS_ERR(clk)) { -+ hw = __clk_get_hw(clk); -+ if (hw) -+ flag = clk_hw_get_flags(hw); -+ if (flag & CLK_IGNORE_UNUSED) -+ continue; -+ if (flag & CLK_IS_CRITICAL) -+ continue; -+ clk_prepare_enable(clk); -+ clk_disable_unprepare(clk); -+ } -+ } -+ } -+} -+EXPORT_SYMBOL_GPL(rockchip_clk_disable_unused); ++MODULE_DESCRIPTION("Rockchip RK3562 Clock Driver"); ++MODULE_LICENSE("GPL"); ++MODULE_ALIAS("platform:clk-rk3562"); +#endif /* MODULE */ -diff --git a/drivers/clk/rockchip-oh/clk.h b/drivers/clk/rockchip-oh/clk.h +diff --git a/drivers/clk/rockchip-oh/clk-rk3568.c b/drivers/clk/rockchip-oh/clk-rk3568.c new file mode 100644 -index 000000000..82345742c +index 000000000..3e6955066 --- /dev/null -+++ b/drivers/clk/rockchip-oh/clk.h -@@ -0,0 +1,1336 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ ++++ b/drivers/clk/rockchip-oh/clk-rk3568.c +@@ -0,0 +1,1775 @@ ++// SPDX-License-Identifier: GPL-2.0 +/* -+ * Copyright (c) 2014 MundoReader S.L. -+ * Author: Heiko Stuebner -+ * -+ * Copyright (c) 2015 Rockchip Electronics Co. Ltd. -+ * Author: Xing Zheng -+ * -+ * based on -+ * -+ * samsung/clk.h -+ * Copyright (c) 2013 Samsung Electronics Co., Ltd. -+ * Copyright (c) 2013 Linaro Ltd. -+ * Author: Thomas Abraham ++ * Copyright (c) 2021 Rockchip Electronics Co. Ltd. ++ * Author: Elaine Zhang + */ + -+#ifndef CLK_ROCKCHIP_CLK_H -+#define CLK_ROCKCHIP_CLK_H -+ -+#include +#include -+#include -+ -+struct clk; -+ -+#define HIWORD_UPDATE(val, mask, shift) \ -+ ((val) << (shift) | (mask) << ((shift) + 16)) -+ -+/* register positions shared by PX30, RV1108, RK2928, RK3036, RK3066, RK3188 and RK3228 */ -+#define BOOST_PLL_H_CON(x) ((x) * 0x4) -+#define BOOST_CLK_CON 0x0008 -+#define BOOST_BOOST_CON 0x000c -+#define BOOST_SWITCH_CNT 0x0010 -+#define BOOST_HIGH_PERF_CNT0 0x0014 -+#define BOOST_HIGH_PERF_CNT1 0x0018 -+#define BOOST_STATIS_THRESHOLD 0x001c -+#define BOOST_SHORT_SWITCH_CNT 0x0020 -+#define BOOST_SWITCH_THRESHOLD 0x0024 -+#define BOOST_FSM_STATUS 0x0028 -+#define BOOST_PLL_L_CON(x) ((x) * 0x4 + 0x2c) -+#define BOOST_PLL_CON_MASK 0xffff -+#define BOOST_CORE_DIV_MASK 0x1f -+#define BOOST_CORE_DIV_SHIFT 0 -+#define BOOST_BACKUP_PLL_MASK 0x3 -+#define BOOST_BACKUP_PLL_SHIFT 8 -+#define BOOST_BACKUP_PLL_USAGE_MASK 0x1 -+#define BOOST_BACKUP_PLL_USAGE_SHIFT 12 -+#define BOOST_BACKUP_PLL_USAGE_BORROW 0 -+#define BOOST_BACKUP_PLL_USAGE_TARGET 1 -+#define BOOST_ENABLE_MASK 0x1 -+#define BOOST_ENABLE_SHIFT 0 -+#define BOOST_RECOVERY_MASK 0x1 -+#define BOOST_RECOVERY_SHIFT 1 -+#define BOOST_SW_CTRL_MASK 0x1 -+#define BOOST_SW_CTRL_SHIFT 2 -+#define BOOST_LOW_FREQ_EN_MASK 0x1 -+#define BOOST_LOW_FREQ_EN_SHIFT 3 -+#define BOOST_STATIS_ENABLE_MASK 0x1 -+#define BOOST_STATIS_ENABLE_SHIFT 4 -+#define BOOST_BUSY_STATE BIT(8) -+ -+#define PX30_PLL_CON(x) ((x) * 0x4) -+#define PX30_CLKSEL_CON(x) ((x) * 0x4 + 0x100) -+#define PX30_CLKGATE_CON(x) ((x) * 0x4 + 0x200) -+#define PX30_GLB_SRST_FST 0xb8 -+#define PX30_GLB_SRST_SND 0xbc -+#define PX30_SOFTRST_CON(x) ((x) * 0x4 + 0x300) -+#define PX30_MODE_CON 0xa0 -+#define PX30_MISC_CON 0xa4 -+#define PX30_SDMMC_CON0 0x380 -+#define PX30_SDMMC_CON1 0x384 -+#define PX30_SDIO_CON0 0x388 -+#define PX30_SDIO_CON1 0x38c -+#define PX30_EMMC_CON0 0x390 -+#define PX30_EMMC_CON1 0x394 -+ -+#define PX30_PMU_PLL_CON(x) ((x) * 0x4) -+#define PX30_PMU_CLKSEL_CON(x) ((x) * 0x4 + 0x40) -+#define PX30_PMU_CLKGATE_CON(x) ((x) * 0x4 + 0x80) -+#define PX30_PMU_MODE 0x0020 -+ -+#define RV1106_TOPCRU_BASE 0x10000 -+#define RV1106_PERICRU_BASE 0x12000 -+#define RV1106_VICRU_BASE 0x14000 -+#define RV1106_NPUCRU_BASE 0x16000 -+#define RV1106_CORECRU_BASE 0x18000 -+#define RV1106_VEPUCRU_BASE 0x1A000 -+#define RV1106_VOCRU_BASE 0x1C000 -+#define RV1106_DDRCRU_BASE 0x1E000 -+#define RV1106_SUBDDRCRU_BASE 0x1F000 -+ -+#define RV1106_VI_GRF_BASE 0x50000 -+#define RV1106_VO_GRF_BASE 0x60000 -+ -+#define RV1106_PMUCLKSEL_CON(x) ((x) * 0x4 + 0x300) -+#define RV1106_PMUCLKGATE_CON(x) ((x) * 0x4 + 0x800) -+#define RV1106_PMUSOFTRST_CON(x) ((x) * 0x4 + 0xa00) -+#define RV1106_PLL_CON(x) ((x) * 0x4 + RV1106_TOPCRU_BASE) -+#define RV1106_MODE_CON (0x280 + RV1106_TOPCRU_BASE) -+#define RV1106_CLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_TOPCRU_BASE) -+#define RV1106_CLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_TOPCRU_BASE) -+#define RV1106_SOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_TOPCRU_BASE) -+#define RV1106_GLB_SRST_FST (0xc08 + RV1106_TOPCRU_BASE) -+#define RV1106_GLB_SRST_SND (0xc0c + RV1106_TOPCRU_BASE) -+#define RV1106_SDIO_CON0 (0x1c + RV1106_VO_GRF_BASE) -+#define RV1106_SDIO_CON1 (0x20 + RV1106_VO_GRF_BASE) -+#define RV1106_SDMMC_CON0 (0x4 + RV1106_VI_GRF_BASE) -+#define RV1106_SDMMC_CON1 (0x8 + RV1106_VI_GRF_BASE) -+#define RV1106_EMMC_CON0 (0x20) -+#define RV1106_EMMC_CON1 (0x24) -+#define RV1106_PERICLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_PERICRU_BASE) -+#define RV1106_PERICLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_PERICRU_BASE) -+#define RV1106_PERISOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_PERICRU_BASE) -+#define RV1106_VICLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_VICRU_BASE) -+#define RV1106_VICLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_VICRU_BASE) -+#define RV1106_VISOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_VICRU_BASE) -+#define RV1106_VICLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_VICRU_BASE) -+#define RV1106_VICLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_VICRU_BASE) -+#define RV1106_VISOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_VICRU_BASE) -+#define RV1106_NPUCLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_NPUCRU_BASE) -+#define RV1106_NPUCLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_NPUCRU_BASE) -+#define RV1106_NPUSOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_NPUCRU_BASE) -+#define RV1106_CORECLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_CORECRU_BASE) -+#define RV1106_CORECLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_CORECRU_BASE) -+#define RV1106_CORESOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_CORECRU_BASE) -+#define RV1106_VEPUCLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_VEPUCRU_BASE) -+#define RV1106_VEPUCLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_VEPUCRU_BASE) -+#define RV1106_VEPUSOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_VEPUCRU_BASE) -+#define RV1106_VOCLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_VOCRU_BASE) -+#define RV1106_VOCLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_VOCRU_BASE) -+#define RV1106_VOSOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_VOCRU_BASE) -+#define RV1106_DDRCLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_DDRCRU_BASE) -+#define RV1106_DDRCLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_DDRCRU_BASE) -+#define RV1106_DDRSOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_DDRCRU_BASE) -+#define RV1106_SUBDDRCLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_SUBDDRCRU_BASE) -+#define RV1106_SUBDDRCLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_SUBDDRCRU_BASE) -+#define RV1106_SUBDDRSOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_SUBDDRCRU_BASE) -+#define RV1106_SUBDDRMODE_CON (0x280 + RV1106_SUBDDRCRU_BASE) -+ -+#define RV1108_PLL_CON(x) ((x) * 0x4) -+#define RV1108_CLKSEL_CON(x) ((x) * 0x4 + 0x60) -+#define RV1108_CLKGATE_CON(x) ((x) * 0x4 + 0x120) -+#define RV1108_SOFTRST_CON(x) ((x) * 0x4 + 0x180) -+#define RV1108_GLB_SRST_FST 0x1c0 -+#define RV1108_GLB_SRST_SND 0x1c4 -+#define RV1108_MISC_CON 0x1cc -+#define RV1108_SDMMC_CON0 0x1d8 -+#define RV1108_SDMMC_CON1 0x1dc -+#define RV1108_SDIO_CON0 0x1e0 -+#define RV1108_SDIO_CON1 0x1e4 -+#define RV1108_EMMC_CON0 0x1e8 -+#define RV1108_EMMC_CON1 0x1ec -+ -+#define RV1126_PMU_MODE 0x0 -+#define RV1126_PMU_PLL_CON(x) ((x) * 0x4 + 0x10) -+#define RV1126_PMU_CLKSEL_CON(x) ((x) * 0x4 + 0x100) -+#define RV1126_PMU_CLKGATE_CON(x) ((x) * 0x4 + 0x180) -+#define RV1126_PMU_SOFTRST_CON(x) ((x) * 0x4 + 0x200) -+#define RV1126_PLL_CON(x) ((x) * 0x4) -+#define RV1126_MODE_CON 0x90 -+#define RV1126_CLKSEL_CON(x) ((x) * 0x4 + 0x100) -+#define RV1126_CLKGATE_CON(x) ((x) * 0x4 + 0x280) -+#define RV1126_SOFTRST_CON(x) ((x) * 0x4 + 0x300) -+#define RV1126_GLB_SRST_FST 0x408 -+#define RV1126_GLB_SRST_SND 0x40c -+#define RV1126_SDMMC_CON0 0x440 -+#define RV1126_SDMMC_CON1 0x444 -+#define RV1126_SDIO_CON0 0x448 -+#define RV1126_SDIO_CON1 0x44c -+#define RV1126_EMMC_CON0 0x450 -+#define RV1126_EMMC_CON1 0x454 -+ -+/* -+ * register positions shared by RK1808 RK2928, RK3036, -+ * RK3066, RK3188 and RK3228 -+ */ -+ -+#define RK1808_PLL_CON(x) ((x) * 0x4) -+#define RK1808_MODE_CON 0xa0 -+#define RK1808_MISC_CON 0xa4 -+#define RK1808_MISC1_CON 0xa8 -+#define RK1808_GLB_SRST_FST 0xb8 -+#define RK1808_GLB_SRST_SND 0xbc -+#define RK1808_CLKSEL_CON(x) ((x) * 0x4 + 0x100) -+#define RK1808_CLKGATE_CON(x) ((x) * 0x4 + 0x230) -+#define RK1808_SOFTRST_CON(x) ((x) * 0x4 + 0x300) -+#define RK1808_SDMMC_CON0 0x380 -+#define RK1808_SDMMC_CON1 0x384 -+#define RK1808_SDIO_CON0 0x388 -+#define RK1808_SDIO_CON1 0x38c -+#define RK1808_EMMC_CON0 0x390 -+#define RK1808_EMMC_CON1 0x394 -+ -+#define RK1808_PMU_PLL_CON(x) ((x) * 0x4 + 0x4000) -+#define RK1808_PMU_MODE_CON 0x4020 -+#define RK1808_PMU_CLKSEL_CON(x) ((x) * 0x4 + 0x4040) -+#define RK1808_PMU_CLKGATE_CON(x) ((x) * 0x4 + 0x4080) -+ -+#define RK2928_PLL_CON(x) ((x) * 0x4) -+#define RK2928_MODE_CON 0x40 -+#define RK2928_CLKSEL_CON(x) ((x) * 0x4 + 0x44) -+#define RK2928_CLKGATE_CON(x) ((x) * 0x4 + 0xd0) -+#define RK2928_GLB_SRST_FST 0x100 -+#define RK2928_GLB_SRST_SND 0x104 -+#define RK2928_SOFTRST_CON(x) ((x) * 0x4 + 0x110) -+#define RK2928_MISC_CON 0x134 -+ -+#define RK3036_SDMMC_CON0 0x144 -+#define RK3036_SDMMC_CON1 0x148 -+#define RK3036_SDIO_CON0 0x14c -+#define RK3036_SDIO_CON1 0x150 -+#define RK3036_EMMC_CON0 0x154 -+#define RK3036_EMMC_CON1 0x158 -+ -+#define RK3228_GLB_SRST_FST 0x1f0 -+#define RK3228_GLB_SRST_SND 0x1f4 -+#define RK3228_SDMMC_CON0 0x1c0 -+#define RK3228_SDMMC_CON1 0x1c4 -+#define RK3228_SDIO_CON0 0x1c8 -+#define RK3228_SDIO_CON1 0x1cc -+#define RK3228_EMMC_CON0 0x1d8 -+#define RK3228_EMMC_CON1 0x1dc -+ -+#define RK3288_PLL_CON(x) RK2928_PLL_CON(x) -+#define RK3288_MODE_CON 0x50 -+#define RK3288_CLKSEL_CON(x) ((x) * 0x4 + 0x60) -+#define RK3288_CLKGATE_CON(x) ((x) * 0x4 + 0x160) -+#define RK3288_GLB_SRST_FST 0x1b0 -+#define RK3288_GLB_SRST_SND 0x1b4 -+#define RK3288_SOFTRST_CON(x) ((x) * 0x4 + 0x1b8) -+#define RK3288_MISC_CON 0x1e8 -+#define RK3288_SDMMC_CON0 0x200 -+#define RK3288_SDMMC_CON1 0x204 -+#define RK3288_SDIO0_CON0 0x208 -+#define RK3288_SDIO0_CON1 0x20c -+#define RK3288_SDIO1_CON0 0x210 -+#define RK3288_SDIO1_CON1 0x214 -+#define RK3288_EMMC_CON0 0x218 -+#define RK3288_EMMC_CON1 0x21c -+ -+#define RK3308_PLL_CON(x) RK2928_PLL_CON(x) -+#define RK3308_CLKSEL_CON(x) ((x) * 0x4 + 0x100) -+#define RK3308_CLKGATE_CON(x) ((x) * 0x4 + 0x300) -+#define RK3308_GLB_SRST_FST 0xb8 -+#define RK3308_SOFTRST_CON(x) ((x) * 0x4 + 0x400) -+#define RK3308_MODE_CON 0xa0 -+#define RK3308_SDMMC_CON0 0x480 -+#define RK3308_SDMMC_CON1 0x484 -+#define RK3308_SDIO_CON0 0x488 -+#define RK3308_SDIO_CON1 0x48c -+#define RK3308_EMMC_CON0 0x490 -+#define RK3308_EMMC_CON1 0x494 -+ -+#define RK3328_PLL_CON(x) RK2928_PLL_CON(x) -+#define RK3328_CLKSEL_CON(x) ((x) * 0x4 + 0x100) -+#define RK3328_CLKGATE_CON(x) ((x) * 0x4 + 0x200) -+#define RK3328_GRFCLKSEL_CON(x) ((x) * 0x4 + 0x100) -+#define RK3328_GLB_SRST_FST 0x9c -+#define RK3328_GLB_SRST_SND 0x98 -+#define RK3328_SOFTRST_CON(x) ((x) * 0x4 + 0x300) -+#define RK3328_MODE_CON 0x80 -+#define RK3328_MISC_CON 0x84 -+#define RK3328_SDMMC_CON0 0x380 -+#define RK3328_SDMMC_CON1 0x384 -+#define RK3328_SDIO_CON0 0x388 -+#define RK3328_SDIO_CON1 0x38c -+#define RK3328_EMMC_CON0 0x390 -+#define RK3328_EMMC_CON1 0x394 -+#define RK3328_SDMMC_EXT_CON0 0x398 -+#define RK3328_SDMMC_EXT_CON1 0x39C -+ -+#define RK3368_PLL_CON(x) RK2928_PLL_CON(x) -+#define RK3368_CLKSEL_CON(x) ((x) * 0x4 + 0x100) -+#define RK3368_CLKGATE_CON(x) ((x) * 0x4 + 0x200) -+#define RK3368_GLB_SRST_FST 0x280 -+#define RK3368_GLB_SRST_SND 0x284 -+#define RK3368_SOFTRST_CON(x) ((x) * 0x4 + 0x300) -+#define RK3368_MISC_CON 0x380 -+#define RK3368_SDMMC_CON0 0x400 -+#define RK3368_SDMMC_CON1 0x404 -+#define RK3368_SDIO0_CON0 0x408 -+#define RK3368_SDIO0_CON1 0x40c -+#define RK3368_SDIO1_CON0 0x410 -+#define RK3368_SDIO1_CON1 0x414 -+#define RK3368_EMMC_CON0 0x418 -+#define RK3368_EMMC_CON1 0x41c -+ -+#define RK3399_PLL_CON(x) RK2928_PLL_CON(x) -+#define RK3399_CLKSEL_CON(x) ((x) * 0x4 + 0x100) -+#define RK3399_CLKGATE_CON(x) ((x) * 0x4 + 0x300) -+#define RK3399_SOFTRST_CON(x) ((x) * 0x4 + 0x400) -+#define RK3399_GLB_SRST_FST 0x500 -+#define RK3399_GLB_SRST_SND 0x504 -+#define RK3399_GLB_CNT_TH 0x508 -+#define RK3399_MISC_CON 0x50c -+#define RK3399_RST_CON 0x510 -+#define RK3399_RST_ST 0x514 -+#define RK3399_SDMMC_CON0 0x580 -+#define RK3399_SDMMC_CON1 0x584 -+#define RK3399_SDIO_CON0 0x588 -+#define RK3399_SDIO_CON1 0x58c -+ -+#define RK3399_PMU_PLL_CON(x) RK2928_PLL_CON(x) -+#define RK3399_PMU_CLKSEL_CON(x) ((x) * 0x4 + 0x80) -+#define RK3399_PMU_CLKGATE_CON(x) ((x) * 0x4 + 0x100) -+#define RK3399_PMU_SOFTRST_CON(x) ((x) * 0x4 + 0x110) -+ -+#define RK3528_PMU_CRU_BASE 0x10000 -+#define RK3528_PCIE_CRU_BASE 0x20000 -+#define RK3528_DDRPHY_CRU_BASE 0x28000 -+#define RK3528_VPU_GRF_BASE 0x40000 -+#define RK3528_VO_GRF_BASE 0x60000 -+#define RK3528_SDMMC_CON0 (RK3528_VO_GRF_BASE + 0x24) -+#define RK3528_SDMMC_CON1 (RK3528_VO_GRF_BASE + 0x28) -+#define RK3528_SDIO0_CON0 (RK3528_VPU_GRF_BASE + 0x4) -+#define RK3528_SDIO0_CON1 (RK3528_VPU_GRF_BASE + 0x8) -+#define RK3528_SDIO1_CON0 (RK3528_VPU_GRF_BASE + 0xc) -+#define RK3528_SDIO1_CON1 (RK3528_VPU_GRF_BASE + 0x10) -+#define RK3528_PLL_CON(x) RK2928_PLL_CON(x) -+#define RK3528_PCIE_PLL_CON(x) ((x) * 0x4 + RK3528_PCIE_CRU_BASE) -+#define RK3528_DDRPHY_PLL_CON(x) ((x) * 0x4 + RK3528_DDRPHY_CRU_BASE) -+#define RK3528_MODE_CON 0x280 -+#define RK3528_CLKSEL_CON(x) ((x) * 0x4 + 0x300) -+#define RK3528_CLKGATE_CON(x) ((x) * 0x4 + 0x800) -+#define RK3528_SOFTRST_CON(x) ((x) * 0x4 + 0xa00) -+#define RK3528_PMU_CLKSEL_CON(x) ((x) * 0x4 + 0x300 + RK3528_PMU_CRU_BASE) -+#define RK3528_PMU_CLKGATE_CON(x) ((x) * 0x4 + 0x800 + RK3528_PMU_CRU_BASE) -+#define RK3528_PCIE_CLKSEL_CON(x) ((x) * 0x4 + 0x300 + RK3528_PCIE_CRU_BASE) -+#define RK3528_PCIE_CLKGATE_CON(x) ((x) * 0x4 + 0x800 + RK3528_PCIE_CRU_BASE) -+#define RK3528_DDRPHY_CLKGATE_CON(x) ((x) * 0x4 + 0x800 + RK3528_DDRPHY_CRU_BASE) -+#define RK3528_DDRPHY_MODE_CON (0x280 + RK3528_DDRPHY_CRU_BASE) -+#define RK3528_GLB_CNT_TH 0xc00 -+#define RK3528_GLB_SRST_FST 0xc08 -+#define RK3528_GLB_SRST_SND 0xc0c -+ -+#define RK3562_PMU0_CRU_BASE 0x10000 -+#define RK3562_PMU1_CRU_BASE 0x18000 -+#define RK3562_DDR_CRU_BASE 0x20000 -+#define RK3562_SUBDDR_CRU_BASE 0x28000 -+#define RK3562_PERI_CRU_BASE 0x30000 -+ -+#define RK3562_PLL_CON(x) RK2928_PLL_CON(x) -+#define RK3562_PMU1_PLL_CON(x) ((x) * 0x4 + RK3562_PMU1_CRU_BASE + 0x40) -+#define RK3562_SUBDDR_PLL_CON(x) ((x) * 0x4 + RK3562_SUBDDR_CRU_BASE + 0x20) -+#define RK3562_MODE_CON 0x600 -+#define RK3562_PMU1_MODE_CON (RK3562_PMU1_CRU_BASE + 0x380) -+#define RK3562_SUBDDR_MODE_CON (RK3562_SUBDDR_CRU_BASE + 0x380) -+#define RK3562_CLKSEL_CON(x) ((x) * 0x4 + 0x100) -+#define RK3562_CLKGATE_CON(x) ((x) * 0x4 + 0x300) -+#define RK3562_SOFTRST_CON(x) ((x) * 0x4 + 0x400) -+#define RK3562_DDR_CLKSEL_CON(x) ((x) * 0x4 + RK3562_DDR_CRU_BASE + 0x100) -+#define RK3562_DDR_CLKGATE_CON(x) ((x) * 0x4 + RK3562_DDR_CRU_BASE + 0x180) -+#define RK3562_DDR_SOFTRST_CON(x) ((x) * 0x4 + RK3562_DDR_CRU_BASE + 0x200) -+#define RK3562_SUBDDR_CLKSEL_CON(x) ((x) * 0x4 + RK3562_SUBDDR_CRU_BASE + 0x100) -+#define RK3562_SUBDDR_CLKGATE_CON(x) ((x) * 0x4 + RK3562_SUBDDR_CRU_BASE + 0x180) -+#define RK3562_SUBDDR_SOFTRST_CON(x) ((x) * 0x4 + RK3562_SUBDDR_CRU_BASE + 0x200) -+#define RK3562_PERI_CLKSEL_CON(x) ((x) * 0x4 + RK3562_PERI_CRU_BASE + 0x100) -+#define RK3562_PERI_CLKGATE_CON(x) ((x) * 0x4 + RK3562_PERI_CRU_BASE + 0x300) -+#define RK3562_PERI_SOFTRST_CON(x) ((x) * 0x4 + RK3562_PERI_CRU_BASE + 0x400) -+#define RK3562_PMU0_CLKSEL_CON(x) ((x) * 0x4 + RK3562_PMU0_CRU_BASE + 0x100) -+#define RK3562_PMU0_CLKGATE_CON(x) ((x) * 0x4 + RK3562_PMU0_CRU_BASE + 0x180) -+#define RK3562_PMU0_SOFTRST_CON(x) ((x) * 0x4 + RK3562_PMU0_CRU_BASE + 0x200) -+#define RK3562_PMU1_CLKSEL_CON(x) ((x) * 0x4 + RK3562_PMU1_CRU_BASE + 0x100) -+#define RK3562_PMU1_CLKGATE_CON(x) ((x) * 0x4 + RK3562_PMU1_CRU_BASE + 0x180) -+#define RK3562_PMU1_SOFTRST_CON(x) ((x) * 0x4 + RK3562_PMU1_CRU_BASE + 0x200) -+#define RK3562_GLB_SRST_FST 0x614 -+#define RK3562_GLB_SRST_SND 0x618 -+#define RK3562_GLB_RST_CON 0x61c -+#define RK3562_GLB_RST_ST 0x620 -+#define RK3562_SDMMC0_CON0 0x624 -+#define RK3562_SDMMC0_CON1 0x628 -+#define RK3562_SDMMC1_CON0 0x62c -+#define RK3562_SDMMC1_CON1 0x630 ++#include ++#include ++#include ++#include ++#include ++#include ++#include "clk.h" + -+#define RK3568_PLL_CON(x) RK2928_PLL_CON(x) -+#define RK3568_MODE_CON0 0xc0 -+#define RK3568_MISC_CON0 0xc4 -+#define RK3568_MISC_CON1 0xc8 -+#define RK3568_MISC_CON2 0xcc -+#define RK3568_GLB_CNT_TH 0xd0 -+#define RK3568_GLB_SRST_FST 0xd4 -+#define RK3568_GLB_SRST_SND 0xd8 -+#define RK3568_GLB_RST_CON 0xdc -+#define RK3568_GLB_RST_ST 0xe0 -+#define RK3568_CLKSEL_CON(x) ((x) * 0x4 + 0x100) -+#define RK3568_CLKGATE_CON(x) ((x) * 0x4 + 0x300) -+#define RK3568_SOFTRST_CON(x) ((x) * 0x4 + 0x400) -+#define RK3568_SDMMC0_CON0 0x580 -+#define RK3568_SDMMC0_CON1 0x584 -+#define RK3568_SDMMC1_CON0 0x588 -+#define RK3568_SDMMC1_CON1 0x58c -+#define RK3568_SDMMC2_CON0 0x590 -+#define RK3568_SDMMC2_CON1 0x594 -+#define RK3568_EMMC_CON0 0x598 -+#define RK3568_EMMC_CON1 0x59c ++#define RK3568_GRF_SOC_CON1 0x504 ++#define RK3568_GRF_SOC_CON2 0x508 ++#define RK3568_GRF_SOC_STATUS0 0x580 ++#define RK3568_PMU_GRF_SOC_CON0 0x100 + -+#define RK3568_PMU_PLL_CON(x) RK2928_PLL_CON(x) -+#define RK3568_PMU_MODE_CON0 0x80 -+#define RK3568_PMU_CLKSEL_CON(x) ((x) * 0x4 + 0x100) -+#define RK3568_PMU_CLKGATE_CON(x) ((x) * 0x4 + 0x180) -+#define RK3568_PMU_SOFTRST_CON(x) ((x) * 0x4 + 0x200) ++#define RK3568_FRAC_MAX_PRATE 1000000000 ++#define RK3568_SPDIF_FRAC_MAX_PRATE 600000000 ++#define RK3568_UART_FRAC_MAX_PRATE 600000000 ++#define RK3568_DCLK_PARENT_MAX_PRATE 600000000 + -+#define RK3588_PHP_CRU_BASE 0x8000 -+#define RK3588_PMU_CRU_BASE 0x30000 -+#define RK3588_BIGCORE0_CRU_BASE 0x50000 -+#define RK3588_BIGCORE1_CRU_BASE 0x52000 -+#define RK3588_DSU_CRU_BASE 0x58000 ++enum rk3568_pmu_plls { ++ ppll, hpll, ++}; + -+#define RK3588_PLL_CON(x) RK2928_PLL_CON(x) -+#define RK3588_MODE_CON0 0x280 -+#define RK3588_B0_PLL_MODE_CON0 (RK3588_BIGCORE0_CRU_BASE + 0x280) -+#define RK3588_B1_PLL_MODE_CON0 (RK3588_BIGCORE1_CRU_BASE + 0x280) -+#define RK3588_LPLL_MODE_CON0 (RK3588_DSU_CRU_BASE + 0x280) -+#define RK3588_CLKSEL_CON(x) ((x) * 0x4 + 0x300) -+#define RK3588_CLKGATE_CON(x) ((x) * 0x4 + 0x800) -+#define RK3588_SOFTRST_CON(x) ((x) * 0x4 + 0xa00) -+#define RK3588_GLB_CNT_TH 0xc00 -+#define RK3588_GLB_SRST_FST 0xc08 -+#define RK3588_GLB_SRST_SND 0xc0c -+#define RK3588_GLB_RST_CON 0xc10 -+#define RK3588_GLB_RST_ST 0xc04 -+#define RK3588_SDIO_CON0 0xC24 -+#define RK3588_SDIO_CON1 0xC28 -+#define RK3588_SDMMC_CON0 0xC30 -+#define RK3588_SDMMC_CON1 0xC34 ++enum rk3568_plls { ++ apll, dpll, gpll, cpll, npll, vpll, ++}; + -+#define RK3588_PHP_CLKGATE_CON(x) ((x) * 0x4 + RK3588_PHP_CRU_BASE + 0x800) -+#define RK3588_PHP_SOFTRST_CON(x) ((x) * 0x4 + RK3588_PHP_CRU_BASE + 0xa00) ++static struct rockchip_pll_rate_table rk3568_pll_rates[] = { ++ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ ++ RK3036_PLL_RATE(2208000000, 1, 92, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2184000000, 1, 91, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2160000000, 1, 90, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2088000000, 1, 87, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2064000000, 1, 86, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2040000000, 1, 85, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2016000000, 1, 84, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1992000000, 1, 83, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1920000000, 1, 80, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1896000000, 1, 79, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1800000000, 1, 75, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1704000000, 1, 71, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1600000000, 3, 200, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1584000000, 1, 132, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1560000000, 1, 130, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1536000000, 1, 128, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1512000000, 1, 126, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1488000000, 1, 124, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1464000000, 1, 122, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1440000000, 1, 120, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1416000000, 1, 118, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1400000000, 3, 350, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1392000000, 1, 116, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1368000000, 1, 114, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1344000000, 1, 112, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1320000000, 1, 110, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1296000000, 1, 108, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1272000000, 1, 106, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1248000000, 1, 104, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1200000000, 1, 100, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1188000000, 1, 99, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1104000000, 1, 92, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1100000000, 3, 275, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1000000000, 3, 250, 2, 1, 1, 0), ++ RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), ++ RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), ++ RK3036_PLL_RATE(800000000, 3, 200, 2, 1, 1, 0), ++ RK3036_PLL_RATE(700000000, 3, 350, 4, 1, 1, 0), ++ RK3036_PLL_RATE(696000000, 1, 116, 4, 1, 1, 0), ++ RK3036_PLL_RATE(600000000, 1, 100, 4, 1, 1, 0), ++ RK3036_PLL_RATE(594000000, 1, 99, 4, 1, 1, 0), ++ RK3036_PLL_RATE(500000000, 1, 125, 6, 1, 1, 0), ++ RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), ++ RK3036_PLL_RATE(312000000, 1, 78, 6, 1, 1, 0), ++ RK3036_PLL_RATE(297000000, 1, 99, 8, 1, 1, 0), ++ RK3036_PLL_RATE(241500000, 1, 161, 8, 2, 1, 0), ++ RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), ++ RK3036_PLL_RATE(200000000, 1, 100, 3, 4, 1, 0), ++ RK3036_PLL_RATE(148500000, 1, 99, 4, 4, 1, 0), ++ RK3036_PLL_RATE(135000000, 1, 45, 8, 1, 1, 0), ++ RK3036_PLL_RATE(119000000, 3, 119, 4, 2, 1, 0), ++ RK3036_PLL_RATE(108000000, 1, 45, 8, 1, 1, 0), ++ RK3036_PLL_RATE(100000000, 1, 150, 6, 6, 1, 0), ++ RK3036_PLL_RATE(96000000, 1, 96, 6, 4, 1, 0), ++ RK3036_PLL_RATE(78750000, 1, 96, 6, 4, 1, 0), ++ RK3036_PLL_RATE(74250000, 2, 99, 4, 4, 1, 0), ++ { /* sentinel */ }, ++}; + -+#define RK3588_PMU_PLL_CON(x) ((x) * 0x4 + RK3588_PHP_CRU_BASE) -+#define RK3588_PMU_CLKSEL_CON(x) ((x) * 0x4 + RK3588_PMU_CRU_BASE + 0x300) -+#define RK3588_PMU_CLKGATE_CON(x) ((x) * 0x4 + RK3588_PMU_CRU_BASE + 0x800) -+#define RK3588_PMU_SOFTRST_CON(x) ((x) * 0x4 + RK3588_PMU_CRU_BASE + 0xa00) ++#define RK3568_DIV_ATCLK_CORE_MASK 0x1f ++#define RK3568_DIV_ATCLK_CORE_SHIFT 0 ++#define RK3568_DIV_GICCLK_CORE_MASK 0x1f ++#define RK3568_DIV_GICCLK_CORE_SHIFT 8 ++#define RK3568_DIV_PCLK_CORE_MASK 0x1f ++#define RK3568_DIV_PCLK_CORE_SHIFT 0 ++#define RK3568_DIV_PERIPHCLK_CORE_MASK 0x1f ++#define RK3568_DIV_PERIPHCLK_CORE_SHIFT 8 ++#define RK3568_DIV_ACLK_CORE_MASK 0x1f ++#define RK3568_DIV_ACLK_CORE_SHIFT 8 + -+#define RK3588_B0_PLL_CON(x) ((x) * 0x4 + RK3588_BIGCORE0_CRU_BASE) -+#define RK3588_BIGCORE0_CLKSEL_CON(x) ((x) * 0x4 + RK3588_BIGCORE0_CRU_BASE + 0x300) -+#define RK3588_BIGCORE0_CLKGATE_CON(x) ((x) * 0x4 + RK3588_BIGCORE0_CRU_BASE + 0x800) -+#define RK3588_BIGCORE0_SOFTRST_CON(x) ((x) * 0x4 + RK3588_BIGCORE0_CRU_BASE + 0xa00) -+#define RK3588_B1_PLL_CON(x) ((x) * 0x4 + RK3588_BIGCORE1_CRU_BASE) -+#define RK3588_BIGCORE1_CLKSEL_CON(x) ((x) * 0x4 + RK3588_BIGCORE1_CRU_BASE + 0x300) -+#define RK3588_BIGCORE1_CLKGATE_CON(x) ((x) * 0x4 + RK3588_BIGCORE1_CRU_BASE + 0x800) -+#define RK3588_BIGCORE1_SOFTRST_CON(x) ((x) * 0x4 + RK3588_BIGCORE1_CRU_BASE + 0xa00) -+#define RK3588_LPLL_CON(x) ((x) * 0x4 + RK3588_DSU_CRU_BASE) -+#define RK3588_DSU_CLKSEL_CON(x) ((x) * 0x4 + RK3588_DSU_CRU_BASE + 0x300) -+#define RK3588_DSU_CLKGATE_CON(x) ((x) * 0x4 + RK3588_DSU_CRU_BASE + 0x800) -+#define RK3588_DSU_SOFTRST_CON(x) ((x) * 0x4 + RK3588_DSU_CRU_BASE + 0xa00) ++#define RK3568_DIV_SCLK_CORE_MASK 0xf ++#define RK3568_DIV_SCLK_CORE_SHIFT 0 ++#define RK3568_MUX_SCLK_CORE_MASK 0x3 ++#define RK3568_MUX_SCLK_CORE_SHIFT 8 ++#define RK3568_MUX_SCLK_CORE_NPLL_MASK 0x1 ++#define RK3568_MUX_SCLK_CORE_NPLL_SHIFT 15 ++#define RK3568_MUX_CLK_CORE_APLL_MASK 0x1 ++#define RK3568_MUX_CLK_CORE_APLL_SHIFT 7 ++#define RK3568_MUX_CLK_PVTPLL_MASK 0x1 ++#define RK3568_MUX_CLK_PVTPLL_SHIFT 15 + -+enum rockchip_pll_type { -+ pll_rk3036, -+ pll_rk3066, -+ pll_rk3328, -+ pll_rk3399, -+ pll_rk3588, -+ pll_rk3588_core, -+}; ++#define RK3568_CLKSEL1(_sclk_core) \ ++{ \ ++ .reg = RK3568_CLKSEL_CON(2), \ ++ .val = HIWORD_UPDATE(_sclk_core, RK3568_MUX_SCLK_CORE_NPLL_MASK, \ ++ RK3568_MUX_SCLK_CORE_NPLL_SHIFT) | \ ++ HIWORD_UPDATE(_sclk_core, RK3568_MUX_SCLK_CORE_MASK, \ ++ RK3568_MUX_SCLK_CORE_SHIFT) | \ ++ HIWORD_UPDATE(1, RK3568_DIV_SCLK_CORE_MASK, \ ++ RK3568_DIV_SCLK_CORE_SHIFT), \ ++} + -+#define RK3036_PLL_RATE(_rate, _refdiv, _fbdiv, _postdiv1, \ -+ _postdiv2, _dsmpd, _frac) \ ++#define RK3568_CLKSEL2(_aclk_core) \ +{ \ -+ .rate = _rate##U, \ -+ .fbdiv = _fbdiv, \ -+ .postdiv1 = _postdiv1, \ -+ .refdiv = _refdiv, \ -+ .postdiv2 = _postdiv2, \ -+ .dsmpd = _dsmpd, \ -+ .frac = _frac, \ ++ .reg = RK3568_CLKSEL_CON(5), \ ++ .val = HIWORD_UPDATE(_aclk_core, RK3568_DIV_ACLK_CORE_MASK, \ ++ RK3568_DIV_ACLK_CORE_SHIFT), \ +} + -+#define RK3066_PLL_RATE(_rate, _nr, _nf, _no) \ -+{ \ -+ .rate = _rate##U, \ -+ .nr = _nr, \ -+ .nf = _nf, \ -+ .no = _no, \ -+ .nb = ((_nf) < 2) ? 1 : (_nf) >> 1, \ ++#define RK3568_CLKSEL3(_atclk_core, _gic_core) \ ++{ \ ++ .reg = RK3568_CLKSEL_CON(3), \ ++ .val = HIWORD_UPDATE(_atclk_core, RK3568_DIV_ATCLK_CORE_MASK, \ ++ RK3568_DIV_ATCLK_CORE_SHIFT) | \ ++ HIWORD_UPDATE(_gic_core, RK3568_DIV_GICCLK_CORE_MASK, \ ++ RK3568_DIV_GICCLK_CORE_SHIFT), \ +} + -+#define RK3066_PLL_RATE_NB(_rate, _nr, _nf, _no, _nb) \ ++#define RK3568_CLKSEL4(_pclk_core, _periph_core) \ +{ \ -+ .rate = _rate##U, \ -+ .nr = _nr, \ -+ .nf = _nf, \ -+ .no = _no, \ -+ .nb = _nb, \ ++ .reg = RK3568_CLKSEL_CON(4), \ ++ .val = HIWORD_UPDATE(_pclk_core, RK3568_DIV_PCLK_CORE_MASK, \ ++ RK3568_DIV_PCLK_CORE_SHIFT) | \ ++ HIWORD_UPDATE(_periph_core, RK3568_DIV_PERIPHCLK_CORE_MASK, \ ++ RK3568_DIV_PERIPHCLK_CORE_SHIFT), \ +} + -+#define RK3588_PLL_RATE(_rate, _p, _m, _s, _k) \ ++#define RK3568_CPUCLK_RATE(_prate, _sclk, _acore, _atcore, _gicclk, _pclk, _periph) \ +{ \ -+ .rate = _rate##U, \ -+ .p = _p, \ -+ .m = _m, \ -+ .s = _s, \ -+ .k = _k, \ ++ .prate = _prate##U, \ ++ .divs = { \ ++ RK3568_CLKSEL1(_sclk), \ ++ RK3568_CLKSEL2(_acore), \ ++ RK3568_CLKSEL3(_atcore, _gicclk), \ ++ RK3568_CLKSEL4(_pclk, _periph), \ ++ }, \ +} + -+/** -+ * struct rockchip_clk_provider - information about clock provider -+ * @reg_base: virtual address for the register base. -+ * @clk_data: holds clock related data like clk* and number of clocks. -+ * @cru_node: device-node of the clock-provider -+ * @grf: regmap of the general-register-files syscon -+ * @list_node: node in the global ctx list -+ * @lock: maintains exclusion between callbacks for a given clock-provider. -+ */ -+struct rockchip_clk_provider { -+ void __iomem *reg_base; -+ struct clk_onecell_data clk_data; -+ struct device_node *cru_node; -+ struct regmap *grf; -+ struct regmap *pmugrf; -+ struct hlist_node list_node; -+ spinlock_t lock; ++static struct rockchip_cpuclk_rate_table rk3568_cpuclk_rates[] __initdata = { ++ RK3568_CPUCLK_RATE(1800000000, 0, 1, 7, 7, 7, 7), ++ RK3568_CPUCLK_RATE(1704000000, 0, 1, 7, 7, 7, 7), ++ RK3568_CPUCLK_RATE(1608000000, 0, 1, 5, 5, 5, 5), ++ RK3568_CPUCLK_RATE(1584000000, 0, 1, 5, 5, 5, 5), ++ RK3568_CPUCLK_RATE(1560000000, 0, 1, 5, 5, 5, 5), ++ RK3568_CPUCLK_RATE(1536000000, 0, 1, 5, 5, 5, 5), ++ RK3568_CPUCLK_RATE(1512000000, 0, 1, 5, 5, 5, 5), ++ RK3568_CPUCLK_RATE(1488000000, 0, 1, 5, 5, 5, 5), ++ RK3568_CPUCLK_RATE(1464000000, 0, 1, 5, 5, 5, 5), ++ RK3568_CPUCLK_RATE(1440000000, 0, 1, 5, 5, 5, 5), ++ RK3568_CPUCLK_RATE(1416000000, 0, 1, 5, 5, 5, 5), ++ RK3568_CPUCLK_RATE(1392000000, 0, 1, 5, 5, 5, 5), ++ RK3568_CPUCLK_RATE(1368000000, 0, 1, 5, 5, 5, 5), ++ RK3568_CPUCLK_RATE(1344000000, 0, 1, 5, 5, 5, 5), ++ RK3568_CPUCLK_RATE(1320000000, 0, 1, 5, 5, 5, 5), ++ RK3568_CPUCLK_RATE(1296000000, 0, 1, 5, 5, 5, 5), ++ RK3568_CPUCLK_RATE(1272000000, 0, 1, 5, 5, 5, 5), ++ RK3568_CPUCLK_RATE(1248000000, 0, 1, 5, 5, 5, 5), ++ RK3568_CPUCLK_RATE(1224000000, 0, 1, 5, 5, 5, 5), ++ RK3568_CPUCLK_RATE(1200000000, 0, 1, 3, 3, 3, 3), ++ RK3568_CPUCLK_RATE(1104000000, 0, 1, 3, 3, 3, 3), ++ RK3568_CPUCLK_RATE(1008000000, 0, 1, 3, 3, 3, 3), ++ RK3568_CPUCLK_RATE(912000000, 0, 1, 3, 3, 3, 3), ++ RK3568_CPUCLK_RATE(816000000, 0, 1, 3, 3, 3, 3), ++ RK3568_CPUCLK_RATE(696000000, 0, 1, 3, 3, 3, 3), ++ RK3568_CPUCLK_RATE(600000000, 0, 1, 3, 3, 3, 3), ++ RK3568_CPUCLK_RATE(408000000, 0, 1, 3, 3, 3, 3), ++ RK3568_CPUCLK_RATE(312000000, 0, 1, 3, 3, 3, 3), ++ RK3568_CPUCLK_RATE(216000000, 0, 1, 3, 3, 3, 3), ++ RK3568_CPUCLK_RATE(96000000, 0, 1, 3, 3, 3, 3), +}; + -+struct rockchip_pll_rate_table { -+ unsigned long rate; -+ union { -+ struct { -+ /* for RK3066 */ -+ unsigned int nr; -+ unsigned int nf; -+ unsigned int no; -+ unsigned int nb; -+ }; -+ struct { -+ /* for RK3036/RK3399 */ -+ unsigned int fbdiv; -+ unsigned int postdiv1; -+ unsigned int refdiv; -+ unsigned int postdiv2; -+ unsigned int dsmpd; -+ unsigned int frac; -+ }; -+ struct { -+ /* for RK3588 */ -+ unsigned int m; -+ unsigned int p; -+ unsigned int s; -+ unsigned int k; -+ }; -+ }; ++static const struct rockchip_cpuclk_reg_data rk3568_cpuclk_data = { ++ .core_reg[0] = RK3568_CLKSEL_CON(0), ++ .div_core_shift[0] = 0, ++ .div_core_mask[0] = 0x1f, ++ .core_reg[1] = RK3568_CLKSEL_CON(0), ++ .div_core_shift[1] = 8, ++ .div_core_mask[1] = 0x1f, ++ .core_reg[2] = RK3568_CLKSEL_CON(1), ++ .div_core_shift[2] = 0, ++ .div_core_mask[2] = 0x1f, ++ .core_reg[3] = RK3568_CLKSEL_CON(1), ++ .div_core_shift[3] = 8, ++ .div_core_mask[3] = 0x1f, ++ .num_cores = 4, ++ .mux_core_alt = 1, ++ .mux_core_main = 0, ++ .mux_core_shift = 6, ++ .mux_core_mask = 0x1, +}; + -+/** -+ * struct rockchip_pll_clock - information about pll clock -+ * @id: platform specific id of the clock. -+ * @name: name of this pll clock. -+ * @parent_names: name of the parent clock. -+ * @num_parents: number of parents -+ * @flags: optional flags for basic clock. -+ * @con_offset: offset of the register for configuring the PLL. -+ * @mode_offset: offset of the register for configuring the PLL-mode. -+ * @mode_shift: offset inside the mode-register for the mode of this pll. -+ * @lock_shift: offset inside the lock register for the lock status. -+ * @type: Type of PLL to be registered. -+ * @pll_flags: hardware-specific flags -+ * @rate_table: Table of usable pll rates -+ * -+ * Flags: -+ * ROCKCHIP_PLL_SYNC_RATE - check rate parameters to match against the -+ * rate_table parameters and ajust them if necessary. -+ */ -+struct rockchip_pll_clock { -+ unsigned int id; -+ const char *name; -+ const char *const *parent_names; -+ u8 num_parents; -+ unsigned long flags; -+ int con_offset; -+ int mode_offset; -+ int mode_shift; -+ int lock_shift; -+ enum rockchip_pll_type type; -+ u8 pll_flags; -+ struct rockchip_pll_rate_table *rate_table; ++PNAME(mux_pll_p) = { "xin24m" }; ++PNAME(mux_usb480m_p) = { "xin24m", "usb480m_phy", "clk_rtc_32k" }; ++PNAME(clk_i2s0_8ch_tx_p) = { "clk_i2s0_8ch_tx_src", "clk_i2s0_8ch_tx_frac", "i2s0_mclkin", "xin_osc0_half" }; ++PNAME(clk_i2s0_8ch_rx_p) = { "clk_i2s0_8ch_rx_src", "clk_i2s0_8ch_rx_frac", "i2s0_mclkin", "xin_osc0_half" }; ++PNAME(clk_i2s1_8ch_tx_p) = { "clk_i2s1_8ch_tx_src", "clk_i2s1_8ch_tx_frac", "i2s1_mclkin", "xin_osc0_half" }; ++PNAME(clk_i2s1_8ch_rx_p) = { "clk_i2s1_8ch_rx_src", "clk_i2s1_8ch_rx_frac", "i2s1_mclkin", "xin_osc0_half" }; ++PNAME(clk_i2s2_2ch_p) = { "clk_i2s2_2ch_src", "clk_i2s2_2ch_frac", "i2s2_mclkin", "xin_osc0_half "}; ++PNAME(clk_i2s3_2ch_tx_p) = { "clk_i2s3_2ch_tx_src", "clk_i2s3_2ch_tx_frac", "i2s3_mclkin", "xin_osc0_half" }; ++PNAME(clk_i2s3_2ch_rx_p) = { "clk_i2s3_2ch_rx_src", "clk_i2s3_2ch_rx_frac", "i2s3_mclkin", "xin_osc0_half" }; ++PNAME(mclk_spdif_8ch_p) = { "mclk_spdif_8ch_src", "mclk_spdif_8ch_frac" }; ++PNAME(sclk_audpwm_p) = { "sclk_audpwm_src", "sclk_audpwm_frac" }; ++PNAME(sclk_uart1_p) = { "clk_uart1_src", "clk_uart1_frac", "xin24m" }; ++PNAME(sclk_uart2_p) = { "clk_uart2_src", "clk_uart2_frac", "xin24m" }; ++PNAME(sclk_uart3_p) = { "clk_uart3_src", "clk_uart3_frac", "xin24m" }; ++PNAME(sclk_uart4_p) = { "clk_uart4_src", "clk_uart4_frac", "xin24m" }; ++PNAME(sclk_uart5_p) = { "clk_uart5_src", "clk_uart5_frac", "xin24m" }; ++PNAME(sclk_uart6_p) = { "clk_uart6_src", "clk_uart6_frac", "xin24m" }; ++PNAME(sclk_uart7_p) = { "clk_uart7_src", "clk_uart7_frac", "xin24m" }; ++PNAME(sclk_uart8_p) = { "clk_uart8_src", "clk_uart8_frac", "xin24m" }; ++PNAME(sclk_uart9_p) = { "clk_uart9_src", "clk_uart9_frac", "xin24m" }; ++PNAME(sclk_uart0_p) = { "sclk_uart0_div", "sclk_uart0_frac", "xin24m" }; ++PNAME(clk_rtc32k_pmu_p) = { "clk_32k_pvtm", "xin32k", "clk_rtc32k_frac" }; ++PNAME(mpll_gpll_cpll_npll_p) = { "mpll", "gpll", "cpll", "npll" }; ++PNAME(gpll_cpll_npll_p) = { "gpll", "cpll", "npll" }; ++PNAME(npll_gpll_p) = { "npll", "gpll" }; ++PNAME(cpll_gpll_p) = { "cpll", "gpll" }; ++PNAME(gpll_cpll_p) = { "gpll", "cpll" }; ++PNAME(gpll_cpll_npll_vpll_p) = { "gpll", "cpll", "npll", "vpll" }; ++PNAME(apll_gpll_npll_p) = { "apll", "gpll", "npll" }; ++PNAME(sclk_core_pre_p) = { "sclk_core_src", "npll" }; ++PNAME(gpll150_gpll100_gpll75_xin24m_p) = { "gpll_150m", "gpll_100m", "gpll_75m", "xin24m" }; ++PNAME(clk_gpu_pre_mux_p) = { "clk_gpu_src", "gpu_pvtpll_out" }; ++PNAME(clk_npu_pre_ndft_p) = { "clk_npu_src", "clk_npu_np5"}; ++PNAME(clk_npu_p) = { "clk_npu_pre_ndft", "npu_pvtpll_out" }; ++PNAME(dpll_gpll_cpll_p) = { "dpll", "gpll", "cpll" }; ++PNAME(clk_ddr1x_p) = { "clk_ddrphy1x_src", "dpll" }; ++PNAME(gpll200_gpll150_gpll100_xin24m_p) = { "gpll_200m", "gpll_150m", "gpll_100m", "xin24m" }; ++PNAME(gpll100_gpll75_gpll50_p) = { "gpll_100m", "gpll_75m", "cpll_50m" }; ++PNAME(i2s0_mclkout_tx_p) = { "mclk_i2s0_8ch_tx", "xin_osc0_half" }; ++PNAME(i2s0_mclkout_rx_p) = { "mclk_i2s0_8ch_rx", "xin_osc0_half" }; ++PNAME(i2s1_mclkout_tx_p) = { "mclk_i2s1_8ch_tx", "xin_osc0_half" }; ++PNAME(i2s1_mclkout_rx_p) = { "mclk_i2s1_8ch_rx", "xin_osc0_half" }; ++PNAME(i2s2_mclkout_p) = { "mclk_i2s2_2ch", "xin_osc0_half" }; ++PNAME(i2s3_mclkout_tx_p) = { "mclk_i2s3_2ch_tx", "xin_osc0_half" }; ++PNAME(i2s3_mclkout_rx_p) = { "mclk_i2s3_2ch_rx", "xin_osc0_half" }; ++PNAME(mclk_pdm_p) = { "gpll_300m", "cpll_250m", "gpll_200m", "gpll_100m" }; ++PNAME(clk_i2c_p) = { "gpll_200m", "gpll_100m", "xin24m", "cpll_100m" }; ++PNAME(gpll200_gpll150_gpll100_p) = { "gpll_200m", "gpll_150m", "gpll_100m" }; ++PNAME(gpll300_gpll200_gpll100_p) = { "gpll_300m", "gpll_200m", "gpll_100m" }; ++PNAME(clk_nandc_p) = { "gpll_200m", "gpll_150m", "cpll_100m", "xin24m" }; ++PNAME(sclk_sfc_p) = { "xin24m", "cpll_50m", "gpll_75m", "gpll_100m", "cpll_125m", "gpll_150m" }; ++PNAME(gpll200_gpll150_cpll125_p) = { "gpll_200m", "gpll_150m", "cpll_125m" }; ++PNAME(cclk_emmc_p) = { "xin24m", "gpll_200m", "gpll_150m", "cpll_100m", "cpll_50m", "clk_osc0_div_375k" }; ++PNAME(aclk_pipe_p) = { "gpll_400m", "gpll_300m", "gpll_200m", "xin24m" }; ++PNAME(gpll200_cpll125_p) = { "gpll_200m", "cpll_125m" }; ++PNAME(gpll300_gpll200_gpll100_xin24m_p) = { "gpll_300m", "gpll_200m", "gpll_100m", "xin24m" }; ++PNAME(clk_sdmmc_p) = { "xin24m", "gpll_400m", "gpll_300m", "cpll_100m", "cpll_50m", "clk_osc0_div_750k" }; ++PNAME(cpll125_cpll50_cpll25_xin24m_p) = { "cpll_125m", "cpll_50m", "cpll_25m", "xin24m" }; ++PNAME(clk_gmac_ptp_p) = { "cpll_62p5", "gpll_100m", "cpll_50m", "xin24m" }; ++PNAME(cpll333_gpll300_gpll200_p) = { "cpll_333m", "gpll_300m", "gpll_200m" }; ++PNAME(cpll_gpll_hpll_p) = { "cpll", "gpll", "hpll" }; ++PNAME(gpll_usb480m_xin24m_p) = { "gpll", "usb480m", "xin24m", "xin24m" }; ++PNAME(gpll300_cpll250_gpll100_xin24m_p) = { "gpll_300m", "cpll_250m", "gpll_100m", "xin24m" }; ++PNAME(cpll_gpll_hpll_vpll_p) = { "cpll", "gpll", "hpll", "vpll" }; ++PNAME(hpll_vpll_gpll_cpll_p) = { "hpll", "vpll", "gpll", "cpll" }; ++PNAME(gpll400_cpll333_gpll200_p) = { "gpll_400m", "cpll_333m", "gpll_200m" }; ++PNAME(gpll100_gpll75_cpll50_xin24m_p) = { "gpll_100m", "gpll_75m", "cpll_50m", "xin24m" }; ++PNAME(xin24m_gpll100_cpll100_p) = { "xin24m", "gpll_100m", "cpll_100m" }; ++PNAME(gpll_cpll_usb480m_p) = { "gpll", "cpll", "usb480m" }; ++PNAME(gpll100_xin24m_cpll100_p) = { "gpll_100m", "xin24m", "cpll_100m" }; ++PNAME(gpll200_xin24m_cpll100_p) = { "gpll_200m", "xin24m", "cpll_100m" }; ++PNAME(xin24m_32k_p) = { "xin24m", "clk_rtc_32k" }; ++PNAME(cpll500_gpll400_gpll300_xin24m_p) = { "cpll_500m", "gpll_400m", "gpll_300m", "xin24m" }; ++PNAME(gpll400_gpll300_gpll200_xin24m_p) = { "gpll_400m", "gpll_300m", "gpll_200m", "xin24m" }; ++PNAME(xin24m_cpll100_p) = { "xin24m", "cpll_100m" }; ++PNAME(ppll_usb480m_cpll_gpll_p) = { "ppll", "usb480m", "cpll", "gpll"}; ++PNAME(clk_usbphy0_ref_p) = { "clk_ref24m", "xin_osc0_usbphy0_g" }; ++PNAME(clk_usbphy1_ref_p) = { "clk_ref24m", "xin_osc0_usbphy1_g" }; ++PNAME(clk_mipidsiphy0_ref_p) = { "clk_ref24m", "xin_osc0_mipidsiphy0_g" }; ++PNAME(clk_mipidsiphy1_ref_p) = { "clk_ref24m", "xin_osc0_mipidsiphy1_g" }; ++PNAME(clk_wifi_p) = { "clk_wifi_osc0", "clk_wifi_div" }; ++PNAME(clk_pciephy0_ref_p) = { "clk_pciephy0_osc0", "clk_pciephy0_div" }; ++PNAME(clk_pciephy1_ref_p) = { "clk_pciephy1_osc0", "clk_pciephy1_div" }; ++PNAME(clk_pciephy2_ref_p) = { "clk_pciephy2_osc0", "clk_pciephy2_div" }; ++PNAME(mux_gmac0_p) = { "clk_mac0_2top", "gmac0_clkin" }; ++PNAME(mux_gmac0_rgmii_speed_p) = { "clk_gmac0", "clk_gmac0", "clk_gmac0_tx_div50", "clk_gmac0_tx_div5" }; ++PNAME(mux_gmac0_rmii_speed_p) = { "clk_gmac0_rx_div20", "clk_gmac0_rx_div2" }; ++PNAME(mux_gmac0_rx_tx_p) = { "clk_gmac0_rgmii_speed", "clk_gmac0_rmii_speed", "clk_gmac0_xpcs_mii" }; ++PNAME(mux_gmac1_p) = { "clk_mac1_2top", "gmac1_clkin" }; ++PNAME(mux_gmac1_rgmii_speed_p) = { "clk_gmac1", "clk_gmac1", "clk_gmac1_tx_div50", "clk_gmac1_tx_div5" }; ++PNAME(mux_gmac1_rmii_speed_p) = { "clk_gmac1_rx_div20", "clk_gmac1_rx_div2" }; ++PNAME(mux_gmac1_rx_tx_p) = { "clk_gmac1_rgmii_speed", "clk_gmac1_rmii_speed", "clk_gmac1_xpcs_mii" }; ++PNAME(clk_hdmi_ref_p) = { "hpll", "hpll_ph0" }; ++PNAME(clk_pdpmu_p) = { "ppll", "gpll" }; ++PNAME(clk_mac_2top_p) = { "cpll_125m", "cpll_50m", "cpll_25m", "ppll" }; ++PNAME(clk_pwm0_p) = { "xin24m", "clk_pdpmu" }; ++PNAME(aclk_rkvdec_pre_p) = { "gpll", "cpll" }; ++PNAME(clk_rkvdec_core_p) = { "gpll", "cpll", "dummy_npll", "dummy_vpll" }; ++PNAME(clk_32k_ioe_p) = { "clk_rtc_32k", "xin32k" }; ++PNAME(i2s1_mclkout_p) = { "i2s1_mclkout_rx", "i2s1_mclkout_tx" }; ++PNAME(i2s3_mclkout_p) = { "i2s3_mclkout_rx", "i2s3_mclkout_tx" }; ++PNAME(i2s1_mclk_rx_ioe_p) = { "i2s1_mclkin_rx", "i2s1_mclkout_rx" }; ++PNAME(i2s1_mclk_tx_ioe_p) = { "i2s1_mclkin_tx", "i2s1_mclkout_tx" }; ++PNAME(i2s2_mclk_ioe_p) = { "i2s2_mclkin", "i2s2_mclkout" }; ++PNAME(i2s3_mclk_ioe_p) = { "i2s3_mclkin", "i2s3_mclkout" }; ++ ++static struct rockchip_pll_clock rk3568_pmu_pll_clks[] __initdata = { ++ [ppll] = PLL(pll_rk3328, PLL_PPLL, "ppll", mux_pll_p, ++ 0, RK3568_PMU_PLL_CON(0), ++ RK3568_PMU_MODE_CON0, 0, 4, 0, rk3568_pll_rates), ++ [hpll] = PLL(pll_rk3328, PLL_HPLL, "hpll", mux_pll_p, ++ 0, RK3568_PMU_PLL_CON(16), ++ RK3568_PMU_MODE_CON0, 2, 7, 0, rk3568_pll_rates), +}; + -+/* -+ * PLL flags -+ */ -+#define ROCKCHIP_PLL_SYNC_RATE BIT(0) -+/* normal mode only. now only for pll_rk3036, pll_rk3328 type */ -+#define ROCKCHIP_PLL_FIXED_MODE BIT(1) -+#define ROCKCHIP_PLL_ALLOW_POWER_DOWN BIT(2) ++static struct rockchip_pll_clock rk3568_pll_clks[] __initdata = { ++ [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, ++ 0, RK3568_PLL_CON(0), ++ RK3568_MODE_CON0, 0, 0, 0, rk3568_pll_rates), ++ [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p, ++ 0, RK3568_PLL_CON(8), ++ RK3568_MODE_CON0, 2, 1, 0, NULL), ++ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, ++ 0, RK3568_PLL_CON(24), ++ RK3568_MODE_CON0, 4, 2, 0, rk3568_pll_rates), ++ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, ++ 0, RK3568_PLL_CON(16), ++ RK3568_MODE_CON0, 6, 3, 0, rk3568_pll_rates), ++ [npll] = PLL(pll_rk3328, PLL_NPLL, "npll", mux_pll_p, ++ CLK_IS_CRITICAL, RK3568_PLL_CON(32), ++ RK3568_MODE_CON0, 10, 5, 0, rk3568_pll_rates), ++ [vpll] = PLL(pll_rk3328, PLL_VPLL, "vpll", mux_pll_p, ++ 0, RK3568_PLL_CON(40), ++ RK3568_MODE_CON0, 12, 6, 0, rk3568_pll_rates), ++}; + -+#define PLL(_type, _id, _name, _pnames, _flags, _con, _mode, _mshift, \ -+ _lshift, _pflags, _rtable) \ -+ { \ -+ .id = _id, \ -+ .type = _type, \ -+ .name = _name, \ -+ .parent_names = _pnames, \ -+ .num_parents = ARRAY_SIZE(_pnames), \ -+ .flags = CLK_GET_RATE_NOCACHE | _flags, \ -+ .con_offset = _con, \ -+ .mode_offset = _mode, \ -+ .mode_shift = _mshift, \ -+ .lock_shift = _lshift, \ -+ .pll_flags = _pflags, \ -+ .rate_table = _rtable, \ -+ } ++#define MFLAGS CLK_MUX_HIWORD_MASK ++#define DFLAGS CLK_DIVIDER_HIWORD_MASK ++#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) + -+struct clk *rockchip_clk_register_pll(struct rockchip_clk_provider *ctx, -+ enum rockchip_pll_type pll_type, -+ const char *name, const char *const *parent_names, -+ u8 num_parents, int con_offset, int grf_lock_offset, -+ int lock_shift, int mode_offset, int mode_shift, -+ struct rockchip_pll_rate_table *rate_table, -+ unsigned long flags, u8 clk_pll_flags); ++static struct rockchip_clk_branch rk3568_i2s0_8ch_tx_fracmux __initdata = ++ MUX(CLK_I2S0_8CH_TX, "clk_i2s0_8ch_tx", clk_i2s0_8ch_tx_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(11), 10, 2, MFLAGS); + -+void rockchip_boost_init(struct clk_hw *hw); ++static struct rockchip_clk_branch rk3568_i2s0_8ch_rx_fracmux __initdata = ++ MUX(CLK_I2S0_8CH_RX, "clk_i2s0_8ch_rx", clk_i2s0_8ch_rx_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(13), 10, 2, MFLAGS); + -+void rockchip_boost_enable_recovery_sw_low(struct clk_hw *hw); ++static struct rockchip_clk_branch rk3568_i2s1_8ch_tx_fracmux __initdata = ++ MUX(CLK_I2S1_8CH_TX, "clk_i2s1_8ch_tx", clk_i2s1_8ch_tx_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(15), 10, 2, MFLAGS); + -+void rockchip_boost_disable_recovery_sw(struct clk_hw *hw); ++static struct rockchip_clk_branch rk3568_i2s1_8ch_rx_fracmux __initdata = ++ MUX(CLK_I2S1_8CH_RX, "clk_i2s1_8ch_rx", clk_i2s1_8ch_rx_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(17), 10, 2, MFLAGS); + -+void rockchip_boost_add_core_div(struct clk_hw *hw, unsigned long prate); ++static struct rockchip_clk_branch rk3568_i2s2_2ch_fracmux __initdata = ++ MUX(CLK_I2S2_2CH, "clk_i2s2_2ch", clk_i2s2_2ch_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(19), 10, 2, MFLAGS); + -+struct rockchip_cpuclk_clksel { -+ int reg; -+ u32 val; -+}; ++static struct rockchip_clk_branch rk3568_i2s3_2ch_tx_fracmux __initdata = ++ MUX(CLK_I2S3_2CH_TX, "clk_i2s3_2ch_tx", clk_i2s3_2ch_tx_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(21), 10, 2, MFLAGS); + -+#define ROCKCHIP_CPUCLK_NUM_DIVIDERS 6 -+#define ROCKCHIP_CPUCLK_MAX_CORES 4 -+struct rockchip_cpuclk_rate_table { -+ unsigned long prate; -+ struct rockchip_cpuclk_clksel divs[ROCKCHIP_CPUCLK_NUM_DIVIDERS]; -+ struct rockchip_cpuclk_clksel pre_muxs[ROCKCHIP_CPUCLK_NUM_DIVIDERS]; -+ struct rockchip_cpuclk_clksel post_muxs[ROCKCHIP_CPUCLK_NUM_DIVIDERS]; -+}; ++static struct rockchip_clk_branch rk3568_i2s3_2ch_rx_fracmux __initdata = ++ MUX(CLK_I2S3_2CH_RX, "clk_i2s3_2ch_rx", clk_i2s3_2ch_rx_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(83), 10, 2, MFLAGS); + -+/** -+ * struct rockchip_cpuclk_reg_data - register offsets and masks of the cpuclock -+ * @core_reg[]: register offset of the cores setting register -+ * @div_core_shift[]: cores divider offset used to divide the pll value -+ * @div_core_mask[]: cores divider mask -+ * @num_cores: number of cpu cores -+ * @mux_core_reg: register offset of the cores select parent -+ * @mux_core_alt: mux value to select alternate parent -+ * @mux_core_main: mux value to select main parent of core -+ * @mux_core_shift: offset of the core multiplexer -+ * @mux_core_mask: core multiplexer mask -+ */ -+struct rockchip_cpuclk_reg_data { -+ int core_reg[ROCKCHIP_CPUCLK_MAX_CORES]; -+ u8 div_core_shift[ROCKCHIP_CPUCLK_MAX_CORES]; -+ u32 div_core_mask[ROCKCHIP_CPUCLK_MAX_CORES]; -+ int num_cores; -+ int mux_core_reg; -+ u8 mux_core_alt; -+ u8 mux_core_main; -+ u8 mux_core_shift; -+ u32 mux_core_mask; -+ const char *pll_name; -+}; ++static struct rockchip_clk_branch rk3568_spdif_8ch_fracmux __initdata = ++ MUX(MCLK_SPDIF_8CH, "mclk_spdif_8ch", mclk_spdif_8ch_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(23), 15, 1, MFLAGS); + -+struct clk *rockchip_clk_register_cpuclk(const char *name, -+ u8 num_parents, -+ struct clk *parent, struct clk *alt_parent, -+ const struct rockchip_cpuclk_reg_data *reg_data, -+ const struct rockchip_cpuclk_rate_table *rates, -+ int nrates, void __iomem *reg_base, spinlock_t *lock); ++static struct rockchip_clk_branch rk3568_audpwm_fracmux __initdata = ++ MUX(SCLK_AUDPWM, "sclk_audpwm", sclk_audpwm_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(25), 15, 1, MFLAGS); + -+struct clk *rockchip_clk_register_cpuclk_v2(const char *name, -+ const char *const *parent_names, -+ u8 num_parents, void __iomem *base, -+ int muxdiv_offset, u8 mux_shift, -+ u8 mux_width, u8 mux_flags, -+ int div_offset, u8 div_shift, -+ u8 div_width, u8 div_flags, -+ unsigned long flags, spinlock_t *lock, -+ const struct rockchip_cpuclk_rate_table *rates, -+ int nrates); ++static struct rockchip_clk_branch rk3568_uart1_fracmux __initdata = ++ MUX(0, "sclk_uart1_mux", sclk_uart1_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(52), 12, 2, MFLAGS); + -+struct clk *rockchip_clk_register_mmc(const char *name, -+ const char *const *parent_names, u8 num_parents, -+ void __iomem *reg, int shift); ++static struct rockchip_clk_branch rk3568_uart2_fracmux __initdata = ++ MUX(0, "sclk_uart2_mux", sclk_uart2_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(54), 12, 2, MFLAGS); + -+/* -+ * DDRCLK flags, including method of setting the rate -+ * ROCKCHIP_DDRCLK_SIP: use SIP call to bl31 to change ddrclk rate. -+ */ -+#define ROCKCHIP_DDRCLK_SIP BIT(0) -+#define ROCKCHIP_DDRCLK_SIP_V2 0x03 ++static struct rockchip_clk_branch rk3568_uart3_fracmux __initdata = ++ MUX(0, "sclk_uart3_mux", sclk_uart3_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(56), 12, 2, MFLAGS); + -+#ifdef CONFIG_ROCKCHIP_DDRCLK -+void rockchip_set_ddrclk_params(void __iomem *params); -+void rockchip_set_ddrclk_dmcfreq_wait_complete(int (*func)(void)); ++static struct rockchip_clk_branch rk3568_uart4_fracmux __initdata = ++ MUX(0, "sclk_uart4_mux", sclk_uart4_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(58), 12, 2, MFLAGS); + -+struct clk *rockchip_clk_register_ddrclk(const char *name, int flags, -+ const char *const *parent_names, -+ u8 num_parents, int mux_offset, -+ int mux_shift, int mux_width, -+ int div_shift, int div_width, -+ int ddr_flags, void __iomem *reg_base); -+#else -+static inline void rockchip_set_ddrclk_params(void __iomem *params) {} -+static inline void rockchip_set_ddrclk_dmcfreq_wait_complete(int (*func)(void)) {} -+static inline -+struct clk *rockchip_clk_register_ddrclk(const char *name, int flags, -+ const char *const *parent_names, -+ u8 num_parents, int mux_offset, -+ int mux_shift, int mux_width, -+ int div_shift, int div_width, -+ int ddr_flags, void __iomem *reg_base) -+{ -+ return NULL; -+} -+#endif ++static struct rockchip_clk_branch rk3568_uart5_fracmux __initdata = ++ MUX(0, "sclk_uart5_mux", sclk_uart5_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(60), 12, 2, MFLAGS); + -+#define ROCKCHIP_INVERTER_HIWORD_MASK BIT(0) ++static struct rockchip_clk_branch rk3568_uart6_fracmux __initdata = ++ MUX(0, "sclk_uart6_mux", sclk_uart6_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(62), 12, 2, MFLAGS); + -+struct clk *rockchip_clk_register_inverter(const char *name, -+ const char *const *parent_names, u8 num_parents, -+ void __iomem *reg, int shift, int flags, -+ spinlock_t *lock); ++static struct rockchip_clk_branch rk3568_uart7_fracmux __initdata = ++ MUX(0, "sclk_uart7_mux", sclk_uart7_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(64), 12, 2, MFLAGS); + -+struct clk *rockchip_clk_register_muxgrf(const char *name, -+ const char *const *parent_names, u8 num_parents, -+ int flags, struct regmap *grf, int reg, -+ int shift, int width, int mux_flags); ++static struct rockchip_clk_branch rk3568_uart8_fracmux __initdata = ++ MUX(0, "sclk_uart8_mux", sclk_uart8_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(66), 12, 2, MFLAGS); + -+#define PNAME(x) static const char *const x[] __initconst ++static struct rockchip_clk_branch rk3568_uart9_fracmux __initdata = ++ MUX(0, "sclk_uart9_mux", sclk_uart9_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(68), 12, 2, MFLAGS); + -+enum rockchip_clk_branch_type { -+ branch_composite, -+ branch_mux, -+ branch_muxgrf, -+ branch_muxpmugrf, -+ branch_divider, -+ branch_fraction_divider, -+ branch_gate, -+ branch_gate_no_set_rate, -+ branch_mmc, -+ branch_inverter, -+ branch_factor, -+ branch_ddrclk, -+ branch_half_divider, -+}; ++static struct rockchip_clk_branch rk3568_uart0_fracmux __initdata = ++ MUX(0, "sclk_uart0_mux", sclk_uart0_p, CLK_SET_RATE_PARENT, ++ RK3568_PMU_CLKSEL_CON(4), 10, 2, MFLAGS); + -+struct rockchip_clk_branch { -+ unsigned int id; -+ enum rockchip_clk_branch_type branch_type; -+ const char *name; -+ const char *const *parent_names; -+ u8 num_parents; -+ unsigned long flags; -+ int muxdiv_offset; -+ u8 mux_shift; -+ u8 mux_width; -+ u8 mux_flags; -+ u32 *mux_table; -+ int div_offset; -+ u8 div_shift; -+ u8 div_width; -+ u8 div_flags; -+ struct clk_div_table *div_table; -+ int gate_offset; -+ u8 gate_shift; -+ u8 gate_flags; -+ struct rockchip_clk_branch *child; -+}; ++static struct rockchip_clk_branch rk3568_rtc32k_pmu_fracmux __initdata = ++ MUX(CLK_RTC_32K, "clk_rtc_32k", clk_rtc32k_pmu_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3568_PMU_CLKSEL_CON(0), 6, 2, MFLAGS); + -+#define COMPOSITE(_id, cname, pnames, f, mo, ms, mw, mf, ds, dw,\ -+ df, go, gs, gf) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_composite, \ -+ .name = cname, \ -+ .parent_names = pnames, \ -+ .num_parents = ARRAY_SIZE(pnames), \ -+ .flags = f, \ -+ .muxdiv_offset = mo, \ -+ .mux_shift = ms, \ -+ .mux_width = mw, \ -+ .mux_flags = mf, \ -+ .div_shift = ds, \ -+ .div_width = dw, \ -+ .div_flags = df, \ -+ .gate_offset = go, \ -+ .gate_shift = gs, \ -+ .gate_flags = gf, \ -+ } ++static struct rockchip_clk_branch rk3568_clk_branches[] __initdata = { ++ /* ++ * Clock-Architecture Diagram 1 ++ */ ++ /* SRC_CLK */ ++ COMPOSITE_NOMUX(0, "gpll_400m", "gpll", CLK_IGNORE_UNUSED, ++ RK3568_CLKSEL_CON(75), 0, 5, DFLAGS, ++ RK3568_CLKGATE_CON(35), 0, GFLAGS), ++ COMPOSITE_NOMUX(0, "gpll_300m", "gpll", CLK_IGNORE_UNUSED, ++ RK3568_CLKSEL_CON(75), 8, 5, DFLAGS, ++ RK3568_CLKGATE_CON(35), 1, GFLAGS), ++ COMPOSITE_NOMUX(0, "gpll_200m", "gpll", CLK_IGNORE_UNUSED, ++ RK3568_CLKSEL_CON(76), 0, 5, DFLAGS, ++ RK3568_CLKGATE_CON(35), 2, GFLAGS), ++ COMPOSITE_NOMUX(0, "gpll_150m", "gpll", CLK_IGNORE_UNUSED, ++ RK3568_CLKSEL_CON(76), 8, 5, DFLAGS, ++ RK3568_CLKGATE_CON(35), 3, GFLAGS), ++ COMPOSITE_NOMUX(0, "gpll_100m", "gpll", CLK_IGNORE_UNUSED, ++ RK3568_CLKSEL_CON(77), 0, 5, DFLAGS, ++ RK3568_CLKGATE_CON(35), 4, GFLAGS), ++ COMPOSITE_NOMUX(0, "gpll_75m", "gpll", CLK_IGNORE_UNUSED, ++ RK3568_CLKSEL_CON(77), 8, 5, DFLAGS, ++ RK3568_CLKGATE_CON(35), 5, GFLAGS), ++ COMPOSITE_NOMUX(0, "gpll_20m", "gpll", CLK_IGNORE_UNUSED, ++ RK3568_CLKSEL_CON(78), 0, 6, DFLAGS, ++ RK3568_CLKGATE_CON(35), 6, GFLAGS), ++ COMPOSITE_NOMUX(CPLL_500M, "cpll_500m", "cpll", CLK_IGNORE_UNUSED, ++ RK3568_CLKSEL_CON(78), 8, 5, DFLAGS, ++ RK3568_CLKGATE_CON(35), 7, GFLAGS), ++ COMPOSITE_NOMUX(CPLL_333M, "cpll_333m", "cpll", CLK_IGNORE_UNUSED, ++ RK3568_CLKSEL_CON(79), 0, 5, DFLAGS, ++ RK3568_CLKGATE_CON(35), 8, GFLAGS), ++ COMPOSITE_NOMUX(CPLL_250M, "cpll_250m", "cpll", CLK_IGNORE_UNUSED, ++ RK3568_CLKSEL_CON(79), 8, 5, DFLAGS, ++ RK3568_CLKGATE_CON(35), 9, GFLAGS), ++ COMPOSITE_NOMUX(CPLL_125M, "cpll_125m", "cpll", CLK_IGNORE_UNUSED, ++ RK3568_CLKSEL_CON(80), 0, 5, DFLAGS, ++ RK3568_CLKGATE_CON(35), 10, GFLAGS), ++ COMPOSITE_NOMUX(CPLL_100M, "cpll_100m", "cpll", CLK_IGNORE_UNUSED, ++ RK3568_CLKSEL_CON(82), 0, 5, DFLAGS, ++ RK3568_CLKGATE_CON(35), 11, GFLAGS), ++ COMPOSITE_NOMUX(CPLL_62P5M, "cpll_62p5", "cpll", CLK_IGNORE_UNUSED, ++ RK3568_CLKSEL_CON(80), 8, 5, DFLAGS, ++ RK3568_CLKGATE_CON(35), 12, GFLAGS), ++ COMPOSITE_NOMUX(CPLL_50M, "cpll_50m", "cpll", CLK_IGNORE_UNUSED, ++ RK3568_CLKSEL_CON(81), 0, 5, DFLAGS, ++ RK3568_CLKGATE_CON(35), 13, GFLAGS), ++ COMPOSITE_NOMUX(CPLL_25M, "cpll_25m", "cpll", CLK_IGNORE_UNUSED, ++ RK3568_CLKSEL_CON(81), 8, 6, DFLAGS, ++ RK3568_CLKGATE_CON(35), 14, GFLAGS), ++ COMPOSITE_NOMUX(0, "clk_osc0_div_750k", "xin24m", CLK_IGNORE_UNUSED, ++ RK3568_CLKSEL_CON(82), 8, 6, DFLAGS, ++ RK3568_CLKGATE_CON(35), 15, GFLAGS), ++ FACTOR(0, "clk_osc0_div_375k", "clk_osc0_div_750k", 0, 1, 2), ++ FACTOR(0, "xin_osc0_half", "xin24m", 0, 1, 2), ++ MUX(USB480M, "usb480m", mux_usb480m_p, CLK_SET_RATE_PARENT, ++ RK3568_MODE_CON0, 14, 2, MFLAGS), + -+#define COMPOSITE_MUXTBL(_id, cname, pnames, f, mo, ms, mw, mf, \ -+ mt, ds, dw, df, go, gs, gf) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_composite, \ -+ .name = cname, \ -+ .parent_names = pnames, \ -+ .num_parents = ARRAY_SIZE(pnames), \ -+ .flags = f, \ -+ .muxdiv_offset = mo, \ -+ .mux_shift = ms, \ -+ .mux_width = mw, \ -+ .mux_flags = mf, \ -+ .mux_table = mt, \ -+ .div_shift = ds, \ -+ .div_width = dw, \ -+ .div_flags = df, \ -+ .gate_offset = go, \ -+ .gate_shift = gs, \ -+ .gate_flags = gf, \ -+ } ++ /* PD_CORE */ ++ COMPOSITE(0, "sclk_core_src", apll_gpll_npll_p, CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(2), 8, 2, MFLAGS, 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3568_CLKGATE_CON(0), 5, GFLAGS), ++ COMPOSITE_NODIV(0, "sclk_core", sclk_core_pre_p, CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(2), 15, 1, MFLAGS, ++ RK3568_CLKGATE_CON(0), 7, GFLAGS), + -+#define COMPOSITE_DIV_OFFSET(_id, cname, pnames, f, mo, ms, mw, \ -+ mf, do, ds, dw, df, go, gs, gf) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_composite, \ -+ .name = cname, \ -+ .parent_names = pnames, \ -+ .num_parents = ARRAY_SIZE(pnames), \ -+ .flags = f, \ -+ .muxdiv_offset = mo, \ -+ .mux_shift = ms, \ -+ .mux_width = mw, \ -+ .mux_flags = mf, \ -+ .div_offset = do, \ -+ .div_shift = ds, \ -+ .div_width = dw, \ -+ .div_flags = df, \ -+ .gate_offset = go, \ -+ .gate_shift = gs, \ -+ .gate_flags = gf, \ -+ } ++ COMPOSITE_NOMUX(0, "atclk_core", "armclk", CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(3), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3568_CLKGATE_CON(0), 8, GFLAGS), ++ COMPOSITE_NOMUX(0, "gicclk_core", "armclk", CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(3), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3568_CLKGATE_CON(0), 9, GFLAGS), ++ COMPOSITE_NOMUX(0, "pclk_core_pre", "armclk", CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(4), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3568_CLKGATE_CON(0), 10, GFLAGS), ++ COMPOSITE_NOMUX(0, "periphclk_core_pre", "armclk", CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(4), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3568_CLKGATE_CON(0), 11, GFLAGS), ++ COMPOSITE_NOMUX(0, "tsclk_core", "periphclk_core_pre", CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(5), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3568_CLKGATE_CON(0), 14, GFLAGS), ++ COMPOSITE_NOMUX(0, "cntclk_core", "periphclk_core_pre", CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(5), 4, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3568_CLKGATE_CON(0), 15, GFLAGS), ++ COMPOSITE_NOMUX(0, "aclk_core", "sclk_core", CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(5), 8, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3568_CLKGATE_CON(1), 0, GFLAGS), + -+#define COMPOSITE_NOMUX(_id, cname, pname, f, mo, ds, dw, df, \ -+ go, gs, gf) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_composite, \ -+ .name = cname, \ -+ .parent_names = (const char *[]){ pname }, \ -+ .num_parents = 1, \ -+ .flags = f, \ -+ .muxdiv_offset = mo, \ -+ .div_shift = ds, \ -+ .div_width = dw, \ -+ .div_flags = df, \ -+ .gate_offset = go, \ -+ .gate_shift = gs, \ -+ .gate_flags = gf, \ -+ } ++ COMPOSITE_NODIV(ACLK_CORE_NIU2BUS, "aclk_core_niu2bus", gpll150_gpll100_gpll75_xin24m_p, CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(5), 14, 2, MFLAGS, ++ RK3568_CLKGATE_CON(1), 2, GFLAGS), + -+#define COMPOSITE_NOMUX_DIVTBL(_id, cname, pname, f, mo, ds, dw,\ -+ df, dt, go, gs, gf) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_composite, \ -+ .name = cname, \ -+ .parent_names = (const char *[]){ pname }, \ -+ .num_parents = 1, \ -+ .flags = f, \ -+ .muxdiv_offset = mo, \ -+ .div_shift = ds, \ -+ .div_width = dw, \ -+ .div_flags = df, \ -+ .div_table = dt, \ -+ .gate_offset = go, \ -+ .gate_shift = gs, \ -+ .gate_flags = gf, \ -+ } ++ GATE(CLK_CORE_PVTM, "clk_core_pvtm", "xin24m", 0, ++ RK3568_CLKGATE_CON(1), 10, GFLAGS), ++ GATE(CLK_CORE_PVTM_CORE, "clk_core_pvtm_core", "armclk", 0, ++ RK3568_CLKGATE_CON(1), 11, GFLAGS), ++ GATE(CLK_CORE_PVTPLL, "clk_core_pvtpll", "armclk", CLK_IGNORE_UNUSED, ++ RK3568_CLKGATE_CON(1), 12, GFLAGS), ++ GATE(PCLK_CORE_PVTM, "pclk_core_pvtm", "pclk_core_pre", 0, ++ RK3568_CLKGATE_CON(1), 9, GFLAGS), + -+#define COMPOSITE_NODIV(_id, cname, pnames, f, mo, ms, mw, mf, \ -+ go, gs, gf) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_composite, \ -+ .name = cname, \ -+ .parent_names = pnames, \ -+ .num_parents = ARRAY_SIZE(pnames), \ -+ .flags = f, \ -+ .muxdiv_offset = mo, \ -+ .mux_shift = ms, \ -+ .mux_width = mw, \ -+ .mux_flags = mf, \ -+ .gate_offset = go, \ -+ .gate_shift = gs, \ -+ .gate_flags = gf, \ -+ } ++ /* PD_GPU */ ++ COMPOSITE(CLK_GPU_SRC, "clk_gpu_src", mpll_gpll_cpll_npll_p, 0, ++ RK3568_CLKSEL_CON(6), 6, 2, MFLAGS | CLK_MUX_READ_ONLY, 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3568_CLKGATE_CON(2), 0, GFLAGS), ++ MUX(CLK_GPU_PRE_MUX, "clk_gpu_pre_mux", clk_gpu_pre_mux_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(6), 11, 1, MFLAGS | CLK_MUX_READ_ONLY), ++ DIV(ACLK_GPU_PRE, "aclk_gpu_pre", "clk_gpu_pre_mux", 0, ++ RK3568_CLKSEL_CON(6), 8, 2, DFLAGS), ++ DIV(PCLK_GPU_PRE, "pclk_gpu_pre", "clk_gpu_pre_mux", 0, ++ RK3568_CLKSEL_CON(6), 12, 4, DFLAGS), ++ GATE(CLK_GPU, "clk_gpu", "clk_gpu_pre_mux", 0, ++ RK3568_CLKGATE_CON(2), 3, GFLAGS), + -+#define COMPOSITE_NOGATE(_id, cname, pnames, f, mo, ms, mw, mf, \ -+ ds, dw, df) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_composite, \ -+ .name = cname, \ -+ .parent_names = pnames, \ -+ .num_parents = ARRAY_SIZE(pnames), \ -+ .flags = f, \ -+ .muxdiv_offset = mo, \ -+ .mux_shift = ms, \ -+ .mux_width = mw, \ -+ .mux_flags = mf, \ -+ .div_shift = ds, \ -+ .div_width = dw, \ -+ .div_flags = df, \ -+ .gate_offset = -1, \ -+ } ++ GATE(PCLK_GPU_PVTM, "pclk_gpu_pvtm", "pclk_gpu_pre", 0, ++ RK3568_CLKGATE_CON(2), 6, GFLAGS), ++ GATE(CLK_GPU_PVTM, "clk_gpu_pvtm", "xin24m", 0, ++ RK3568_CLKGATE_CON(2), 7, GFLAGS), ++ GATE(CLK_GPU_PVTM_CORE, "clk_gpu_pvtm_core", "clk_gpu_src", 0, ++ RK3568_CLKGATE_CON(2), 8, GFLAGS), ++ GATE(CLK_GPU_PVTPLL, "clk_gpu_pvtpll", "clk_gpu_src", CLK_IGNORE_UNUSED, ++ RK3568_CLKGATE_CON(2), 9, GFLAGS), + -+#define COMPOSITE_NOGATE_DIVTBL(_id, cname, pnames, f, mo, ms, \ -+ mw, mf, ds, dw, df, dt) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_composite, \ -+ .name = cname, \ -+ .parent_names = pnames, \ -+ .num_parents = ARRAY_SIZE(pnames), \ -+ .flags = f, \ -+ .muxdiv_offset = mo, \ -+ .mux_shift = ms, \ -+ .mux_width = mw, \ -+ .mux_flags = mf, \ -+ .div_shift = ds, \ -+ .div_width = dw, \ -+ .div_flags = df, \ -+ .div_table = dt, \ -+ .gate_offset = -1, \ -+ } ++ /* PD_NPU */ ++ COMPOSITE(CLK_NPU_SRC, "clk_npu_src", npll_gpll_p, 0, ++ RK3568_CLKSEL_CON(7), 6, 1, MFLAGS, 0, 4, DFLAGS, ++ RK3568_CLKGATE_CON(3), 0, GFLAGS), ++ COMPOSITE_HALFDIV(CLK_NPU_NP5, "clk_npu_np5", npll_gpll_p, 0, ++ RK3568_CLKSEL_CON(7), 7, 1, MFLAGS, 4, 2, DFLAGS, ++ RK3568_CLKGATE_CON(3), 1, GFLAGS), ++ MUX(CLK_NPU_PRE_NDFT, "clk_npu_pre_ndft", clk_npu_pre_ndft_p, CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, ++ RK3568_CLKSEL_CON(7), 8, 1, MFLAGS), ++ MUX(CLK_NPU, "clk_npu", clk_npu_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(7), 15, 1, MFLAGS), ++ COMPOSITE_NOMUX(HCLK_NPU_PRE, "hclk_npu_pre", "clk_npu", 0, ++ RK3568_CLKSEL_CON(8), 0, 4, DFLAGS, ++ RK3568_CLKGATE_CON(3), 2, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_NPU_PRE, "pclk_npu_pre", "clk_npu", 0, ++ RK3568_CLKSEL_CON(8), 4, 4, DFLAGS, ++ RK3568_CLKGATE_CON(3), 3, GFLAGS), ++ GATE(ACLK_NPU_PRE, "aclk_npu_pre", "clk_npu", 0, ++ RK3568_CLKGATE_CON(3), 4, GFLAGS), ++ GATE(ACLK_NPU, "aclk_npu", "aclk_npu_pre", 0, ++ RK3568_CLKGATE_CON(3), 7, GFLAGS), ++ GATE(HCLK_NPU, "hclk_npu", "hclk_npu_pre", 0, ++ RK3568_CLKGATE_CON(3), 8, GFLAGS), + -+#define COMPOSITE_FRAC(_id, cname, pname, f, mo, df, go, gs, gf)\ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_fraction_divider, \ -+ .name = cname, \ -+ .parent_names = (const char *[]){ pname }, \ -+ .num_parents = 1, \ -+ .flags = f, \ -+ .muxdiv_offset = mo, \ -+ .div_shift = 16, \ -+ .div_width = 16, \ -+ .div_flags = df, \ -+ .gate_offset = go, \ -+ .gate_shift = gs, \ -+ .gate_flags = gf, \ -+ } ++ GATE(PCLK_NPU_PVTM, "pclk_npu_pvtm", "pclk_npu_pre", 0, ++ RK3568_CLKGATE_CON(3), 9, GFLAGS), ++ GATE(CLK_NPU_PVTM, "clk_npu_pvtm", "xin24m", 0, ++ RK3568_CLKGATE_CON(3), 10, GFLAGS), ++ GATE(CLK_NPU_PVTM_CORE, "clk_npu_pvtm_core", "clk_npu_pre_ndft", 0, ++ RK3568_CLKGATE_CON(3), 11, GFLAGS), ++ GATE(CLK_NPU_PVTPLL, "clk_npu_pvtpll", "clk_npu_pre_ndft", CLK_IGNORE_UNUSED, ++ RK3568_CLKGATE_CON(3), 12, GFLAGS), + -+#define COMPOSITE_FRACMUX(_id, cname, pname, f, mo, df, go, gs, gf, ch) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_fraction_divider, \ -+ .name = cname, \ -+ .parent_names = (const char *[]){ pname }, \ -+ .num_parents = 1, \ -+ .flags = f, \ -+ .muxdiv_offset = mo, \ -+ .div_shift = 16, \ -+ .div_width = 16, \ -+ .div_flags = df, \ -+ .gate_offset = go, \ -+ .gate_shift = gs, \ -+ .gate_flags = gf, \ -+ .child = ch, \ -+ } ++ /* PD_DDR */ ++ COMPOSITE(CLK_DDRPHY1X_SRC, "clk_ddrphy1x_src", dpll_gpll_cpll_p, CLK_IGNORE_UNUSED, ++ RK3568_CLKSEL_CON(9), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3568_CLKGATE_CON(4), 0, GFLAGS), ++ MUXGRF(CLK_DDR1X, "clk_ddr1x", clk_ddr1x_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(9), 15, 1, MFLAGS), + -+#define COMPOSITE_FRACMUX_NOGATE(_id, cname, pname, f, mo, df, ch) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_fraction_divider, \ -+ .name = cname, \ -+ .parent_names = (const char *[]){ pname }, \ -+ .num_parents = 1, \ -+ .flags = f, \ -+ .muxdiv_offset = mo, \ -+ .div_shift = 16, \ -+ .div_width = 16, \ -+ .div_flags = df, \ -+ .gate_offset = -1, \ -+ .child = ch, \ -+ } ++ COMPOSITE_NOMUX(CLK_MSCH, "clk_msch", "clk_ddr1x", CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(10), 0, 2, DFLAGS, ++ RK3568_CLKGATE_CON(4), 2, GFLAGS), ++ GATE(CLK24_DDRMON, "clk24_ddrmon", "xin24m", CLK_IGNORE_UNUSED, ++ RK3568_CLKGATE_CON(4), 15, GFLAGS), + -+#define COMPOSITE_DDRCLK(_id, cname, pnames, f, mo, ms, mw, \ -+ ds, dw, df) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_ddrclk, \ -+ .name = cname, \ -+ .parent_names = pnames, \ -+ .num_parents = ARRAY_SIZE(pnames), \ -+ .flags = f, \ -+ .muxdiv_offset = mo, \ -+ .mux_shift = ms, \ -+ .mux_width = mw, \ -+ .div_shift = ds, \ -+ .div_width = dw, \ -+ .div_flags = df, \ -+ .gate_offset = -1, \ -+ } ++ /* PD_GIC_AUDIO */ ++ COMPOSITE_NODIV(ACLK_GIC_AUDIO, "aclk_gic_audio", gpll200_gpll150_gpll100_xin24m_p, CLK_IGNORE_UNUSED, ++ RK3568_CLKSEL_CON(10), 8, 2, MFLAGS, ++ RK3568_CLKGATE_CON(5), 0, GFLAGS), ++ COMPOSITE_NODIV(HCLK_GIC_AUDIO, "hclk_gic_audio", gpll150_gpll100_gpll75_xin24m_p, CLK_IGNORE_UNUSED, ++ RK3568_CLKSEL_CON(10), 10, 2, MFLAGS, ++ RK3568_CLKGATE_CON(5), 1, GFLAGS), ++ GATE(HCLK_SDMMC_BUFFER, "hclk_sdmmc_buffer", "hclk_gic_audio", 0, ++ RK3568_CLKGATE_CON(5), 8, GFLAGS), ++ COMPOSITE_NODIV(DCLK_SDMMC_BUFFER, "dclk_sdmmc_buffer", gpll100_gpll75_gpll50_p, 0, ++ RK3568_CLKSEL_CON(10), 12, 2, MFLAGS, ++ RK3568_CLKGATE_CON(5), 9, GFLAGS), ++ GATE(ACLK_GIC600, "aclk_gic600", "aclk_gic_audio", CLK_IGNORE_UNUSED, ++ RK3568_CLKGATE_CON(5), 4, GFLAGS), ++ GATE(ACLK_SPINLOCK, "aclk_spinlock", "aclk_gic_audio", CLK_IGNORE_UNUSED, ++ RK3568_CLKGATE_CON(5), 7, GFLAGS), ++ GATE(HCLK_I2S0_8CH, "hclk_i2s0_8ch", "hclk_gic_audio", 0, ++ RK3568_CLKGATE_CON(5), 10, GFLAGS), ++ GATE(HCLK_I2S1_8CH, "hclk_i2s1_8ch", "hclk_gic_audio", 0, ++ RK3568_CLKGATE_CON(5), 11, GFLAGS), ++ GATE(HCLK_I2S2_2CH, "hclk_i2s2_2ch", "hclk_gic_audio", 0, ++ RK3568_CLKGATE_CON(5), 12, GFLAGS), ++ GATE(HCLK_I2S3_2CH, "hclk_i2s3_2ch", "hclk_gic_audio", 0, ++ RK3568_CLKGATE_CON(5), 13, GFLAGS), + -+#define MUX(_id, cname, pnames, f, o, s, w, mf) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_mux, \ -+ .name = cname, \ -+ .parent_names = pnames, \ -+ .num_parents = ARRAY_SIZE(pnames), \ -+ .flags = f, \ -+ .muxdiv_offset = o, \ -+ .mux_shift = s, \ -+ .mux_width = w, \ -+ .mux_flags = mf, \ -+ .gate_offset = -1, \ -+ } ++ COMPOSITE(CLK_I2S0_8CH_TX_SRC, "clk_i2s0_8ch_tx_src", gpll_cpll_npll_p, 0, ++ RK3568_CLKSEL_CON(11), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3568_CLKGATE_CON(6), 0, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S0_8CH_TX_FRAC, "clk_i2s0_8ch_tx_frac", "clk_i2s0_8ch_tx_src", CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(12), 0, ++ RK3568_CLKGATE_CON(6), 1, GFLAGS, ++ &rk3568_i2s0_8ch_tx_fracmux), ++ GATE(MCLK_I2S0_8CH_TX, "mclk_i2s0_8ch_tx", "clk_i2s0_8ch_tx", 0, ++ RK3568_CLKGATE_CON(6), 2, GFLAGS), ++ COMPOSITE_NODIV(I2S0_MCLKOUT_TX, "i2s0_mclkout_tx", i2s0_mclkout_tx_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(11), 15, 1, MFLAGS, ++ RK3568_CLKGATE_CON(6), 3, GFLAGS), + -+#define MUXTBL(_id, cname, pnames, f, o, s, w, mf, mt) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_mux, \ -+ .name = cname, \ -+ .parent_names = pnames, \ -+ .num_parents = ARRAY_SIZE(pnames), \ -+ .flags = f, \ -+ .muxdiv_offset = o, \ -+ .mux_shift = s, \ -+ .mux_width = w, \ -+ .mux_flags = mf, \ -+ .gate_offset = -1, \ -+ .mux_table = mt, \ -+ } ++ COMPOSITE(CLK_I2S0_8CH_RX_SRC, "clk_i2s0_8ch_rx_src", gpll_cpll_npll_p, 0, ++ RK3568_CLKSEL_CON(13), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3568_CLKGATE_CON(6), 4, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S0_8CH_RX_FRAC, "clk_i2s0_8ch_rx_frac", "clk_i2s0_8ch_rx_src", CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(14), 0, ++ RK3568_CLKGATE_CON(6), 5, GFLAGS, ++ &rk3568_i2s0_8ch_rx_fracmux), ++ GATE(MCLK_I2S0_8CH_RX, "mclk_i2s0_8ch_rx", "clk_i2s0_8ch_rx", 0, ++ RK3568_CLKGATE_CON(6), 6, GFLAGS), ++ COMPOSITE_NODIV(I2S0_MCLKOUT_RX, "i2s0_mclkout_rx", i2s0_mclkout_rx_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(13), 15, 1, MFLAGS, ++ RK3568_CLKGATE_CON(6), 7, GFLAGS), + -+#define MUXGRF(_id, cname, pnames, f, o, s, w, mf) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_muxgrf, \ -+ .name = cname, \ -+ .parent_names = pnames, \ -+ .num_parents = ARRAY_SIZE(pnames), \ -+ .flags = f, \ -+ .muxdiv_offset = o, \ -+ .mux_shift = s, \ -+ .mux_width = w, \ -+ .mux_flags = mf, \ -+ .gate_offset = -1, \ -+ } ++ COMPOSITE(CLK_I2S1_8CH_TX_SRC, "clk_i2s1_8ch_tx_src", gpll_cpll_npll_p, 0, ++ RK3568_CLKSEL_CON(15), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3568_CLKGATE_CON(6), 8, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S1_8CH_TX_FRAC, "clk_i2s1_8ch_tx_frac", "clk_i2s1_8ch_tx_src", CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(16), 0, ++ RK3568_CLKGATE_CON(6), 9, GFLAGS, ++ &rk3568_i2s1_8ch_tx_fracmux), ++ GATE(MCLK_I2S1_8CH_TX, "mclk_i2s1_8ch_tx", "clk_i2s1_8ch_tx", 0, ++ RK3568_CLKGATE_CON(6), 10, GFLAGS), ++ COMPOSITE_NODIV(I2S1_MCLKOUT_TX, "i2s1_mclkout_tx", i2s1_mclkout_tx_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(15), 15, 1, MFLAGS, ++ RK3568_CLKGATE_CON(6), 11, GFLAGS), + -+#define MUXPMUGRF(_id, cname, pnames, f, o, s, w, mf) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_muxpmugrf, \ -+ .name = cname, \ -+ .parent_names = pnames, \ -+ .num_parents = ARRAY_SIZE(pnames), \ -+ .flags = f, \ -+ .muxdiv_offset = o, \ -+ .mux_shift = s, \ -+ .mux_width = w, \ -+ .mux_flags = mf, \ -+ .gate_offset = -1, \ -+ } ++ COMPOSITE(CLK_I2S1_8CH_RX_SRC, "clk_i2s1_8ch_rx_src", gpll_cpll_npll_p, 0, ++ RK3568_CLKSEL_CON(17), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3568_CLKGATE_CON(6), 12, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S1_8CH_RX_FRAC, "clk_i2s1_8ch_rx_frac", "clk_i2s1_8ch_rx_src", CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(18), 0, ++ RK3568_CLKGATE_CON(6), 13, GFLAGS, ++ &rk3568_i2s1_8ch_rx_fracmux), ++ GATE(MCLK_I2S1_8CH_RX, "mclk_i2s1_8ch_rx", "clk_i2s1_8ch_rx", 0, ++ RK3568_CLKGATE_CON(6), 14, GFLAGS), ++ COMPOSITE_NODIV(I2S1_MCLKOUT_RX, "i2s1_mclkout_rx", i2s1_mclkout_rx_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(17), 15, 1, MFLAGS, ++ RK3568_CLKGATE_CON(6), 15, GFLAGS), + -+#define DIV(_id, cname, pname, f, o, s, w, df) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_divider, \ -+ .name = cname, \ -+ .parent_names = (const char *[]){ pname }, \ -+ .num_parents = 1, \ -+ .flags = f, \ -+ .muxdiv_offset = o, \ -+ .div_shift = s, \ -+ .div_width = w, \ -+ .div_flags = df, \ -+ .gate_offset = -1, \ -+ } ++ COMPOSITE(CLK_I2S2_2CH_SRC, "clk_i2s2_2ch_src", gpll_cpll_npll_p, 0, ++ RK3568_CLKSEL_CON(19), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3568_CLKGATE_CON(7), 0, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S2_2CH_FRAC, "clk_i2s2_2ch_frac", "clk_i2s2_2ch_src", CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(20), 0, ++ RK3568_CLKGATE_CON(7), 1, GFLAGS, ++ &rk3568_i2s2_2ch_fracmux), ++ GATE(MCLK_I2S2_2CH, "mclk_i2s2_2ch", "clk_i2s2_2ch", 0, ++ RK3568_CLKGATE_CON(7), 2, GFLAGS), ++ COMPOSITE_NODIV(I2S2_MCLKOUT, "i2s2_mclkout", i2s2_mclkout_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(19), 15, 1, MFLAGS, ++ RK3568_CLKGATE_CON(7), 3, GFLAGS), + -+#define DIVTBL(_id, cname, pname, f, o, s, w, df, dt) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_divider, \ -+ .name = cname, \ -+ .parent_names = (const char *[]){ pname }, \ -+ .num_parents = 1, \ -+ .flags = f, \ -+ .muxdiv_offset = o, \ -+ .div_shift = s, \ -+ .div_width = w, \ -+ .div_flags = df, \ -+ .div_table = dt, \ -+ } ++ COMPOSITE(CLK_I2S3_2CH_TX_SRC, "clk_i2s3_2ch_tx_src", gpll_cpll_npll_p, 0, ++ RK3568_CLKSEL_CON(21), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3568_CLKGATE_CON(7), 4, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S3_2CH_TX_FRAC, "clk_i2s3_2ch_tx_frac", "clk_i2s3_2ch_tx_src", CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(22), 0, ++ RK3568_CLKGATE_CON(7), 5, GFLAGS, ++ &rk3568_i2s3_2ch_tx_fracmux), ++ GATE(MCLK_I2S3_2CH_TX, "mclk_i2s3_2ch_tx", "clk_i2s3_2ch_tx", 0, ++ RK3568_CLKGATE_CON(7), 6, GFLAGS), ++ COMPOSITE_NODIV(I2S3_MCLKOUT_TX, "i2s3_mclkout_tx", i2s3_mclkout_tx_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(21), 15, 1, MFLAGS, ++ RK3568_CLKGATE_CON(7), 7, GFLAGS), + -+#define GATE(_id, cname, pname, f, o, b, gf) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_gate, \ -+ .name = cname, \ -+ .parent_names = (const char *[]){ pname }, \ -+ .num_parents = 1, \ -+ .flags = f, \ -+ .gate_offset = o, \ -+ .gate_shift = b, \ -+ .gate_flags = gf, \ -+ } ++ COMPOSITE(CLK_I2S3_2CH_RX_SRC, "clk_i2s3_2ch_rx_src", gpll_cpll_npll_p, 0, ++ RK3568_CLKSEL_CON(83), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3568_CLKGATE_CON(7), 8, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S3_2CH_RX_FRAC, "clk_i2s3_2ch_rx_frac", "clk_i2s3_2ch_rx_src", CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(84), 0, ++ RK3568_CLKGATE_CON(7), 9, GFLAGS, ++ &rk3568_i2s3_2ch_rx_fracmux), ++ GATE(MCLK_I2S3_2CH_RX, "mclk_i2s3_2ch_rx", "clk_i2s3_2ch_rx", 0, ++ RK3568_CLKGATE_CON(7), 10, GFLAGS), ++ COMPOSITE_NODIV(I2S3_MCLKOUT_RX, "i2s3_mclkout_rx", i2s3_mclkout_rx_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(83), 15, 1, MFLAGS, ++ RK3568_CLKGATE_CON(7), 11, GFLAGS), + -+#define GATE_NO_SET_RATE(_id, cname, pname, f, o, b, gf) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_gate_no_set_rate, \ -+ .name = cname, \ -+ .parent_names = (const char *[]){ pname }, \ -+ .num_parents = 1, \ -+ .flags = f, \ -+ .gate_offset = o, \ -+ .gate_shift = b, \ -+ .gate_flags = gf, \ -+ } ++ MUXGRF(I2S1_MCLKOUT, "i2s1_mclkout", i2s1_mclkout_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3568_GRF_SOC_CON1, 5, 1, MFLAGS), ++ MUXGRF(I2S3_MCLKOUT, "i2s3_mclkout", i2s3_mclkout_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3568_GRF_SOC_CON2, 15, 1, MFLAGS), ++ MUXGRF(I2S1_MCLK_RX_IOE, "i2s1_mclk_rx_ioe", i2s1_mclk_rx_ioe_p, 0, ++ RK3568_GRF_SOC_CON2, 0, 1, MFLAGS), ++ MUXGRF(I2S1_MCLK_TX_IOE, "i2s1_mclk_tx_ioe", i2s1_mclk_tx_ioe_p, 0, ++ RK3568_GRF_SOC_CON2, 1, 1, MFLAGS), ++ MUXGRF(I2S2_MCLK_IOE, "i2s2_mclk_ioe", i2s2_mclk_ioe_p, 0, ++ RK3568_GRF_SOC_CON2, 2, 1, MFLAGS), ++ MUXGRF(I2S3_MCLK_IOE, "i2s3_mclk_ioe", i2s3_mclk_ioe_p, 0, ++ RK3568_GRF_SOC_CON2, 3, 1, MFLAGS), + -+#define MMC(_id, cname, pname, offset, shift) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_mmc, \ -+ .name = cname, \ -+ .parent_names = (const char *[]){ pname }, \ -+ .num_parents = 1, \ -+ .muxdiv_offset = offset, \ -+ .div_shift = shift, \ -+ } ++ GATE(HCLK_PDM, "hclk_pdm", "hclk_gic_audio", 0, ++ RK3568_CLKGATE_CON(5), 14, GFLAGS), ++ COMPOSITE_NODIV(MCLK_PDM, "mclk_pdm", mclk_pdm_p, 0, ++ RK3568_CLKSEL_CON(23), 8, 2, MFLAGS, ++ RK3568_CLKGATE_CON(5), 15, GFLAGS), ++ GATE(HCLK_VAD, "hclk_vad", "hclk_gic_audio", 0, ++ RK3568_CLKGATE_CON(7), 12, GFLAGS), ++ GATE(HCLK_SPDIF_8CH, "hclk_spdif_8ch", "hclk_gic_audio", 0, ++ RK3568_CLKGATE_CON(7), 13, GFLAGS), + -+#define INVERTER(_id, cname, pname, io, is, if) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_inverter, \ -+ .name = cname, \ -+ .parent_names = (const char *[]){ pname }, \ -+ .num_parents = 1, \ -+ .muxdiv_offset = io, \ -+ .div_shift = is, \ -+ .div_flags = if, \ -+ } ++ COMPOSITE(MCLK_SPDIF_8CH_SRC, "mclk_spdif_8ch_src", cpll_gpll_p, 0, ++ RK3568_CLKSEL_CON(23), 14, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3568_CLKGATE_CON(7), 14, GFLAGS), ++ COMPOSITE_FRACMUX(MCLK_SPDIF_8CH_FRAC, "mclk_spdif_8ch_frac", "mclk_spdif_8ch_src", CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(24), 0, ++ RK3568_CLKGATE_CON(7), 15, GFLAGS, ++ &rk3568_spdif_8ch_fracmux), + -+#define FACTOR(_id, cname, pname, f, fm, fd) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_factor, \ -+ .name = cname, \ -+ .parent_names = (const char *[]){ pname }, \ -+ .num_parents = 1, \ -+ .flags = f, \ -+ .div_shift = fm, \ -+ .div_width = fd, \ -+ } ++ GATE(HCLK_AUDPWM, "hclk_audpwm", "hclk_gic_audio", 0, ++ RK3568_CLKGATE_CON(8), 0, GFLAGS), ++ COMPOSITE(SCLK_AUDPWM_SRC, "sclk_audpwm_src", gpll_cpll_p, 0, ++ RK3568_CLKSEL_CON(25), 14, 1, MFLAGS, 0, 6, DFLAGS, ++ RK3568_CLKGATE_CON(8), 1, GFLAGS), ++ COMPOSITE_FRACMUX(SCLK_AUDPWM_FRAC, "sclk_audpwm_frac", "sclk_audpwm_src", CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(26), 0, ++ RK3568_CLKGATE_CON(8), 2, GFLAGS, ++ &rk3568_audpwm_fracmux), + -+#define FACTOR_GATE(_id, cname, pname, f, fm, fd, go, gb, gf) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_factor, \ -+ .name = cname, \ -+ .parent_names = (const char *[]){ pname }, \ -+ .num_parents = 1, \ -+ .flags = f, \ -+ .div_shift = fm, \ -+ .div_width = fd, \ -+ .gate_offset = go, \ -+ .gate_shift = gb, \ -+ .gate_flags = gf, \ -+ } ++ GATE(HCLK_ACDCDIG, "hclk_acdcdig", "hclk_gic_audio", 0, ++ RK3568_CLKGATE_CON(8), 3, GFLAGS), ++ COMPOSITE_NODIV(CLK_ACDCDIG_I2C, "clk_acdcdig_i2c", clk_i2c_p, 0, ++ RK3568_CLKSEL_CON(23), 10, 2, MFLAGS, ++ RK3568_CLKGATE_CON(8), 4, GFLAGS), ++ GATE(CLK_ACDCDIG_DAC, "clk_acdcdig_dac", "mclk_i2s3_2ch_tx", 0, ++ RK3568_CLKGATE_CON(8), 5, GFLAGS), ++ GATE(CLK_ACDCDIG_ADC, "clk_acdcdig_adc", "mclk_i2s3_2ch_rx", 0, ++ RK3568_CLKGATE_CON(8), 6, GFLAGS), + -+#define COMPOSITE_HALFDIV(_id, cname, pnames, f, mo, ms, mw, mf, ds, dw,\ -+ df, go, gs, gf) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_half_divider, \ -+ .name = cname, \ -+ .parent_names = pnames, \ -+ .num_parents = ARRAY_SIZE(pnames), \ -+ .flags = f, \ -+ .muxdiv_offset = mo, \ -+ .mux_shift = ms, \ -+ .mux_width = mw, \ -+ .mux_flags = mf, \ -+ .div_shift = ds, \ -+ .div_width = dw, \ -+ .div_flags = df, \ -+ .gate_offset = go, \ -+ .gate_shift = gs, \ -+ .gate_flags = gf, \ -+ } ++ /* PD_SECURE_FLASH */ ++ COMPOSITE_NODIV(ACLK_SECURE_FLASH, "aclk_secure_flash", gpll200_gpll150_gpll100_xin24m_p, CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(27), 0, 2, MFLAGS, ++ RK3568_CLKGATE_CON(8), 7, GFLAGS), ++ COMPOSITE_NODIV(HCLK_SECURE_FLASH, "hclk_secure_flash", gpll150_gpll100_gpll75_xin24m_p, CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(27), 2, 2, MFLAGS, ++ RK3568_CLKGATE_CON(8), 8, GFLAGS), ++ GATE(ACLK_CRYPTO_NS, "aclk_crypto_ns", "aclk_secure_flash", 0, ++ RK3568_CLKGATE_CON(8), 11, GFLAGS), ++ GATE(HCLK_CRYPTO_NS, "hclk_crypto_ns", "hclk_secure_flash", 0, ++ RK3568_CLKGATE_CON(8), 12, GFLAGS), ++ COMPOSITE_NODIV(CLK_CRYPTO_NS_CORE, "clk_crypto_ns_core", gpll200_gpll150_gpll100_p, 0, ++ RK3568_CLKSEL_CON(27), 4, 2, MFLAGS, ++ RK3568_CLKGATE_CON(8), 13, GFLAGS), ++ COMPOSITE_NODIV(CLK_CRYPTO_NS_PKA, "clk_crypto_ns_pka", gpll300_gpll200_gpll100_p, 0, ++ RK3568_CLKSEL_CON(27), 6, 2, MFLAGS, ++ RK3568_CLKGATE_CON(8), 14, GFLAGS), ++ GATE(CLK_CRYPTO_NS_RNG, "clk_crypto_ns_rng", "hclk_secure_flash", 0, ++ RK3568_CLKGATE_CON(8), 15, GFLAGS), ++ GATE(HCLK_TRNG_NS, "hclk_trng_ns", "hclk_secure_flash", CLK_IGNORE_UNUSED, ++ RK3568_CLKGATE_CON(9), 10, GFLAGS), ++ GATE(CLK_TRNG_NS, "clk_trng_ns", "hclk_secure_flash", CLK_IGNORE_UNUSED, ++ RK3568_CLKGATE_CON(9), 11, GFLAGS), ++ GATE(PCLK_OTPC_NS, "pclk_otpc_ns", "hclk_secure_flash", 0, ++ RK3568_CLKGATE_CON(26), 9, GFLAGS), ++ GATE(CLK_OTPC_NS_SBPI, "clk_otpc_ns_sbpi", "xin24m", 0, ++ RK3568_CLKGATE_CON(26), 10, GFLAGS), ++ GATE(CLK_OTPC_NS_USR, "clk_otpc_ns_usr", "xin_osc0_half", 0, ++ RK3568_CLKGATE_CON(26), 11, GFLAGS), ++ GATE(HCLK_NANDC, "hclk_nandc", "hclk_secure_flash", 0, ++ RK3568_CLKGATE_CON(9), 0, GFLAGS), ++ COMPOSITE_NODIV(NCLK_NANDC, "nclk_nandc", clk_nandc_p, 0, ++ RK3568_CLKSEL_CON(28), 0, 2, MFLAGS, ++ RK3568_CLKGATE_CON(9), 1, GFLAGS), ++ GATE(HCLK_SFC, "hclk_sfc", "hclk_secure_flash", 0, ++ RK3568_CLKGATE_CON(9), 2, GFLAGS), ++ GATE(HCLK_SFC_XIP, "hclk_sfc_xip", "hclk_secure_flash", 0, ++ RK3568_CLKGATE_CON(9), 3, GFLAGS), ++ COMPOSITE_NODIV(SCLK_SFC, "sclk_sfc", sclk_sfc_p, 0, ++ RK3568_CLKSEL_CON(28), 4, 3, MFLAGS, ++ RK3568_CLKGATE_CON(9), 4, GFLAGS), ++ GATE(ACLK_EMMC, "aclk_emmc", "aclk_secure_flash", 0, ++ RK3568_CLKGATE_CON(9), 5, GFLAGS), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_secure_flash", 0, ++ RK3568_CLKGATE_CON(9), 6, GFLAGS), ++ COMPOSITE_NODIV(BCLK_EMMC, "bclk_emmc", gpll200_gpll150_cpll125_p, 0, ++ RK3568_CLKSEL_CON(28), 8, 2, MFLAGS, ++ RK3568_CLKGATE_CON(9), 7, GFLAGS), ++ COMPOSITE_NODIV(CCLK_EMMC, "cclk_emmc", cclk_emmc_p, 0, ++ RK3568_CLKSEL_CON(28), 12, 3, MFLAGS, ++ RK3568_CLKGATE_CON(9), 8, GFLAGS), ++ GATE(TCLK_EMMC, "tclk_emmc", "xin24m", 0, ++ RK3568_CLKGATE_CON(9), 9, GFLAGS), ++ MMC(SCLK_EMMC_DRV, "emmc_drv", "cclk_emmc", RK3568_EMMC_CON0, 1), ++ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "cclk_emmc", RK3568_EMMC_CON1, 1), + -+#define COMPOSITE_HALFDIV_OFFSET(_id, cname, pnames, f, mo, ms, mw, mf, do,\ -+ ds, dw, df, go, gs, gf) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_half_divider, \ -+ .name = cname, \ -+ .parent_names = pnames, \ -+ .num_parents = ARRAY_SIZE(pnames), \ -+ .flags = f, \ -+ .muxdiv_offset = mo, \ -+ .mux_shift = ms, \ -+ .mux_width = mw, \ -+ .mux_flags = mf, \ -+ .div_offset = do, \ -+ .div_shift = ds, \ -+ .div_width = dw, \ -+ .div_flags = df, \ -+ .gate_offset = go, \ -+ .gate_shift = gs, \ -+ .gate_flags = gf, \ -+ } ++ /* PD_PIPE */ ++ COMPOSITE_NODIV(ACLK_PIPE, "aclk_pipe", aclk_pipe_p, 0, ++ RK3568_CLKSEL_CON(29), 0, 2, MFLAGS, ++ RK3568_CLKGATE_CON(10), 0, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_PIPE, "pclk_pipe", "aclk_pipe", 0, ++ RK3568_CLKSEL_CON(29), 4, 4, DFLAGS, ++ RK3568_CLKGATE_CON(10), 1, GFLAGS), ++ GATE(ACLK_PCIE20_MST, "aclk_pcie20_mst", "aclk_pipe", 0, ++ RK3568_CLKGATE_CON(12), 0, GFLAGS), ++ GATE(ACLK_PCIE20_SLV, "aclk_pcie20_slv", "aclk_pipe", 0, ++ RK3568_CLKGATE_CON(12), 1, GFLAGS), ++ GATE(ACLK_PCIE20_DBI, "aclk_pcie20_dbi", "aclk_pipe", 0, ++ RK3568_CLKGATE_CON(12), 2, GFLAGS), ++ GATE(PCLK_PCIE20, "pclk_pcie20", "pclk_pipe", 0, ++ RK3568_CLKGATE_CON(12), 3, GFLAGS), ++ GATE(CLK_PCIE20_AUX_NDFT, "clk_pcie20_aux_ndft", "xin24m", 0, ++ RK3568_CLKGATE_CON(12), 4, GFLAGS), ++ GATE(ACLK_PCIE30X1_MST, "aclk_pcie30x1_mst", "aclk_pipe", 0, ++ RK3568_CLKGATE_CON(12), 8, GFLAGS), ++ GATE(ACLK_PCIE30X1_SLV, "aclk_pcie30x1_slv", "aclk_pipe", 0, ++ RK3568_CLKGATE_CON(12), 9, GFLAGS), ++ GATE(ACLK_PCIE30X1_DBI, "aclk_pcie30x1_dbi", "aclk_pipe", 0, ++ RK3568_CLKGATE_CON(12), 10, GFLAGS), ++ GATE(PCLK_PCIE30X1, "pclk_pcie30x1", "pclk_pipe", 0, ++ RK3568_CLKGATE_CON(12), 11, GFLAGS), ++ GATE(CLK_PCIE30X1_AUX_NDFT, "clk_pcie30x1_aux_ndft", "xin24m", 0, ++ RK3568_CLKGATE_CON(12), 12, GFLAGS), ++ GATE(ACLK_PCIE30X2_MST, "aclk_pcie30x2_mst", "aclk_pipe", 0, ++ RK3568_CLKGATE_CON(13), 0, GFLAGS), ++ GATE(ACLK_PCIE30X2_SLV, "aclk_pcie30x2_slv", "aclk_pipe", 0, ++ RK3568_CLKGATE_CON(13), 1, GFLAGS), ++ GATE(ACLK_PCIE30X2_DBI, "aclk_pcie30x2_dbi", "aclk_pipe", 0, ++ RK3568_CLKGATE_CON(13), 2, GFLAGS), ++ GATE(PCLK_PCIE30X2, "pclk_pcie30x2", "pclk_pipe", 0, ++ RK3568_CLKGATE_CON(13), 3, GFLAGS), ++ GATE(CLK_PCIE30X2_AUX_NDFT, "clk_pcie30x2_aux_ndft", "xin24m", 0, ++ RK3568_CLKGATE_CON(13), 4, GFLAGS), ++ GATE(ACLK_SATA0, "aclk_sata0", "aclk_pipe", 0, ++ RK3568_CLKGATE_CON(11), 0, GFLAGS), ++ GATE(CLK_SATA0_PMALIVE, "clk_sata0_pmalive", "gpll_20m", 0, ++ RK3568_CLKGATE_CON(11), 1, GFLAGS), ++ GATE(CLK_SATA0_RXOOB, "clk_sata0_rxoob", "cpll_50m", 0, ++ RK3568_CLKGATE_CON(11), 2, GFLAGS), ++ GATE(ACLK_SATA1, "aclk_sata1", "aclk_pipe", 0, ++ RK3568_CLKGATE_CON(11), 4, GFLAGS), ++ GATE(CLK_SATA1_PMALIVE, "clk_sata1_pmalive", "gpll_20m", 0, ++ RK3568_CLKGATE_CON(11), 5, GFLAGS), ++ GATE(CLK_SATA1_RXOOB, "clk_sata1_rxoob", "cpll_50m", 0, ++ RK3568_CLKGATE_CON(11), 6, GFLAGS), ++ GATE(ACLK_SATA2, "aclk_sata2", "aclk_pipe", 0, ++ RK3568_CLKGATE_CON(11), 8, GFLAGS), ++ GATE(CLK_SATA2_PMALIVE, "clk_sata2_pmalive", "gpll_20m", 0, ++ RK3568_CLKGATE_CON(11), 9, GFLAGS), ++ GATE(CLK_SATA2_RXOOB, "clk_sata2_rxoob", "cpll_50m", 0, ++ RK3568_CLKGATE_CON(11), 10, GFLAGS), ++ GATE(ACLK_USB3OTG0, "aclk_usb3otg0", "aclk_pipe", 0, ++ RK3568_CLKGATE_CON(10), 8, GFLAGS), ++ GATE(CLK_USB3OTG0_REF, "clk_usb3otg0_ref", "xin24m", 0, ++ RK3568_CLKGATE_CON(10), 9, GFLAGS), ++ COMPOSITE_NODIV(CLK_USB3OTG0_SUSPEND, "clk_usb3otg0_suspend", xin24m_32k_p, 0, ++ RK3568_CLKSEL_CON(29), 8, 1, MFLAGS, ++ RK3568_CLKGATE_CON(10), 10, GFLAGS), ++ GATE(ACLK_USB3OTG1, "aclk_usb3otg1", "aclk_pipe", 0, ++ RK3568_CLKGATE_CON(10), 12, GFLAGS), ++ GATE(CLK_USB3OTG1_REF, "clk_usb3otg1_ref", "xin24m", 0, ++ RK3568_CLKGATE_CON(10), 13, GFLAGS), ++ COMPOSITE_NODIV(CLK_USB3OTG1_SUSPEND, "clk_usb3otg1_suspend", xin24m_32k_p, 0, ++ RK3568_CLKSEL_CON(29), 9, 1, MFLAGS, ++ RK3568_CLKGATE_CON(10), 14, GFLAGS), ++ COMPOSITE_NODIV(CLK_XPCS_EEE, "clk_xpcs_eee", gpll200_cpll125_p, 0, ++ RK3568_CLKSEL_CON(29), 13, 1, MFLAGS, ++ RK3568_CLKGATE_CON(10), 4, GFLAGS), ++ GATE(PCLK_XPCS, "pclk_xpcs", "pclk_pipe", 0, ++ RK3568_CLKGATE_CON(13), 6, GFLAGS), + -+#define COMPOSITE_NOGATE_HALFDIV(_id, cname, pnames, f, mo, ms, mw, mf, \ -+ ds, dw, df) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_half_divider, \ -+ .name = cname, \ -+ .parent_names = pnames, \ -+ .num_parents = ARRAY_SIZE(pnames), \ -+ .flags = f, \ -+ .muxdiv_offset = mo, \ -+ .mux_shift = ms, \ -+ .mux_width = mw, \ -+ .mux_flags = mf, \ -+ .div_shift = ds, \ -+ .div_width = dw, \ -+ .div_flags = df, \ -+ .gate_offset = -1, \ -+ } ++ /* PD_PHP */ ++ COMPOSITE_NODIV(ACLK_PHP, "aclk_php", gpll300_gpll200_gpll100_xin24m_p, 0, ++ RK3568_CLKSEL_CON(30), 0, 2, MFLAGS, ++ RK3568_CLKGATE_CON(14), 8, GFLAGS), ++ COMPOSITE_NODIV(HCLK_PHP, "hclk_php", gpll150_gpll100_gpll75_xin24m_p, CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(30), 2, 2, MFLAGS, ++ RK3568_CLKGATE_CON(14), 9, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_PHP, "pclk_php", "aclk_php", CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(30), 4, 4, DFLAGS, ++ RK3568_CLKGATE_CON(14), 10, GFLAGS), ++ GATE(HCLK_SDMMC0, "hclk_sdmmc0", "hclk_php", 0, ++ RK3568_CLKGATE_CON(15), 0, GFLAGS), ++ COMPOSITE_NODIV(CLK_SDMMC0, "clk_sdmmc0", clk_sdmmc_p, 0, ++ RK3568_CLKSEL_CON(30), 8, 3, MFLAGS, ++ RK3568_CLKGATE_CON(15), 1, GFLAGS), ++ MMC(SCLK_SDMMC0_DRV, "sdmmc0_drv", "clk_sdmmc0", RK3568_SDMMC0_CON0, 1), ++ MMC(SCLK_SDMMC0_SAMPLE, "sdmmc0_sample", "clk_sdmmc0", RK3568_SDMMC0_CON1, 1), + -+#define COMPOSITE_NOMUX_HALFDIV(_id, cname, pname, f, mo, ds, dw, df, \ -+ go, gs, gf) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_half_divider, \ -+ .name = cname, \ -+ .parent_names = (const char *[]){ pname }, \ -+ .num_parents = 1, \ -+ .flags = f, \ -+ .muxdiv_offset = mo, \ -+ .div_shift = ds, \ -+ .div_width = dw, \ -+ .div_flags = df, \ -+ .gate_offset = go, \ -+ .gate_shift = gs, \ -+ .gate_flags = gf, \ -+ } ++ GATE(HCLK_SDMMC1, "hclk_sdmmc1", "hclk_php", 0, ++ RK3568_CLKGATE_CON(15), 2, GFLAGS), ++ COMPOSITE_NODIV(CLK_SDMMC1, "clk_sdmmc1", clk_sdmmc_p, 0, ++ RK3568_CLKSEL_CON(30), 12, 3, MFLAGS, ++ RK3568_CLKGATE_CON(15), 3, GFLAGS), ++ MMC(SCLK_SDMMC1_DRV, "sdmmc1_drv", "clk_sdmmc1", RK3568_SDMMC1_CON0, 1), ++ MMC(SCLK_SDMMC1_SAMPLE, "sdmmc1_sample", "clk_sdmmc1", RK3568_SDMMC1_CON1, 1), + -+#define DIV_HALF(_id, cname, pname, f, o, s, w, df) \ -+ { \ -+ .id = _id, \ -+ .branch_type = branch_half_divider, \ -+ .name = cname, \ -+ .parent_names = (const char *[]){ pname }, \ -+ .num_parents = 1, \ -+ .flags = f, \ -+ .muxdiv_offset = o, \ -+ .div_shift = s, \ -+ .div_width = w, \ -+ .div_flags = df, \ -+ .gate_offset = -1, \ -+ } ++ GATE(ACLK_GMAC0, "aclk_gmac0", "aclk_php", 0, ++ RK3568_CLKGATE_CON(15), 5, GFLAGS), ++ GATE(PCLK_GMAC0, "pclk_gmac0", "pclk_php", 0, ++ RK3568_CLKGATE_CON(15), 6, GFLAGS), ++ COMPOSITE_NODIV(CLK_MAC0_2TOP, "clk_mac0_2top", clk_mac_2top_p, 0, ++ RK3568_CLKSEL_CON(31), 8, 2, MFLAGS, ++ RK3568_CLKGATE_CON(15), 7, GFLAGS), ++ COMPOSITE_NODIV(CLK_MAC0_OUT, "clk_mac0_out", cpll125_cpll50_cpll25_xin24m_p, 0, ++ RK3568_CLKSEL_CON(31), 14, 2, MFLAGS, ++ RK3568_CLKGATE_CON(15), 8, GFLAGS), ++ GATE(CLK_MAC0_REFOUT, "clk_mac0_refout", "clk_mac0_2top", 0, ++ RK3568_CLKGATE_CON(15), 12, GFLAGS), ++ COMPOSITE_NODIV(CLK_GMAC0_PTP_REF, "clk_gmac0_ptp_ref", clk_gmac_ptp_p, 0, ++ RK3568_CLKSEL_CON(31), 12, 2, MFLAGS, ++ RK3568_CLKGATE_CON(15), 4, GFLAGS), ++ MUX(SCLK_GMAC0, "clk_gmac0", mux_gmac0_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3568_CLKSEL_CON(31), 2, 1, MFLAGS), ++ FACTOR(0, "clk_gmac0_tx_div5", "clk_gmac0", 0, 1, 5), ++ FACTOR(0, "clk_gmac0_tx_div50", "clk_gmac0", 0, 1, 50), ++ FACTOR(0, "clk_gmac0_rx_div2", "clk_gmac0", 0, 1, 2), ++ FACTOR(0, "clk_gmac0_rx_div20", "clk_gmac0", 0, 1, 20), ++ MUX(SCLK_GMAC0_RGMII_SPEED, "clk_gmac0_rgmii_speed", mux_gmac0_rgmii_speed_p, 0, ++ RK3568_CLKSEL_CON(31), 4, 2, MFLAGS), ++ MUX(SCLK_GMAC0_RMII_SPEED, "clk_gmac0_rmii_speed", mux_gmac0_rmii_speed_p, 0, ++ RK3568_CLKSEL_CON(31), 3, 1, MFLAGS), ++ MUX(SCLK_GMAC0_RX_TX, "clk_gmac0_rx_tx", mux_gmac0_rx_tx_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(31), 0, 2, MFLAGS), + -+/* SGRF clocks are only accessible from secure mode, so not controllable */ -+#define SGRF_GATE(_id, cname, pname) \ -+ FACTOR(_id, cname, pname, 0, 1, 1) ++ /* PD_USB */ ++ COMPOSITE_NODIV(ACLK_USB, "aclk_usb", gpll300_gpll200_gpll100_xin24m_p, 0, ++ RK3568_CLKSEL_CON(32), 0, 2, MFLAGS, ++ RK3568_CLKGATE_CON(16), 0, GFLAGS), ++ COMPOSITE_NODIV(HCLK_USB, "hclk_usb", gpll150_gpll100_gpll75_xin24m_p, CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(32), 2, 2, MFLAGS, ++ RK3568_CLKGATE_CON(16), 1, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_USB, "pclk_usb", "aclk_usb", CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(32), 4, 4, DFLAGS, ++ RK3568_CLKGATE_CON(16), 2, GFLAGS), ++ GATE(HCLK_USB2HOST0, "hclk_usb2host0", "hclk_usb", 0, ++ RK3568_CLKGATE_CON(16), 12, GFLAGS), ++ GATE(HCLK_USB2HOST0_ARB, "hclk_usb2host0_arb", "hclk_usb", 0, ++ RK3568_CLKGATE_CON(16), 13, GFLAGS), ++ GATE(HCLK_USB2HOST1, "hclk_usb2host1", "hclk_usb", 0, ++ RK3568_CLKGATE_CON(16), 14, GFLAGS), ++ GATE(HCLK_USB2HOST1_ARB, "hclk_usb2host1_arb", "hclk_usb", 0, ++ RK3568_CLKGATE_CON(16), 15, GFLAGS), ++ GATE(HCLK_SDMMC2, "hclk_sdmmc2", "hclk_usb", 0, ++ RK3568_CLKGATE_CON(17), 0, GFLAGS), ++ COMPOSITE_NODIV(CLK_SDMMC2, "clk_sdmmc2", clk_sdmmc_p, 0, ++ RK3568_CLKSEL_CON(32), 8, 3, MFLAGS, ++ RK3568_CLKGATE_CON(17), 1, GFLAGS), ++ MMC(SCLK_SDMMC2_DRV, "sdmmc2_drv", "clk_sdmmc2", RK3568_SDMMC2_CON0, 1), ++ MMC(SCLK_SDMMC2_SAMPLE, "sdmmc2_sample", "clk_sdmmc2", RK3568_SDMMC2_CON1, 1), + -+struct rockchip_clk_provider *rockchip_clk_init(struct device_node *np, -+ void __iomem *base, unsigned long nr_clks); -+void rockchip_clk_of_add_provider(struct device_node *np, -+ struct rockchip_clk_provider *ctx); -+void rockchip_clk_add_lookup(struct rockchip_clk_provider *ctx, -+ struct clk *clk, unsigned int id); -+void rockchip_clk_register_branches(struct rockchip_clk_provider *ctx, -+ struct rockchip_clk_branch *list, -+ unsigned int nr_clk); -+void rockchip_clk_register_plls(struct rockchip_clk_provider *ctx, -+ struct rockchip_pll_clock *pll_list, -+ unsigned int nr_pll, int grf_lock_offset); -+void rockchip_clk_register_armclk(struct rockchip_clk_provider *ctx, -+ unsigned int lookup_id, -+ const char *name, -+ u8 num_parents, -+ struct clk *parent, struct clk *alt_parent, -+ const struct rockchip_cpuclk_reg_data *reg_data, -+ const struct rockchip_cpuclk_rate_table *rates, -+ int nrates); -+void rockchip_clk_register_armclk_v2(struct rockchip_clk_provider *ctx, -+ struct rockchip_clk_branch *list, -+ const struct rockchip_cpuclk_rate_table *rates, -+ int nrates); -+int rockchip_pll_clk_rate_to_scale(struct clk *clk, unsigned long rate); -+int rockchip_pll_clk_scale_to_rate(struct clk *clk, unsigned int scale); -+int rockchip_pll_clk_adaptive_scaling(struct clk *clk, int sel); -+void rockchip_register_restart_notifier(struct rockchip_clk_provider *ctx, -+ unsigned int reg, void (*cb)(void)); ++ GATE(ACLK_GMAC1, "aclk_gmac1", "aclk_usb", 0, ++ RK3568_CLKGATE_CON(17), 3, GFLAGS), ++ GATE(PCLK_GMAC1, "pclk_gmac1", "pclk_usb", 0, ++ RK3568_CLKGATE_CON(17), 4, GFLAGS), ++ COMPOSITE_NODIV(CLK_MAC1_2TOP, "clk_mac1_2top", clk_mac_2top_p, 0, ++ RK3568_CLKSEL_CON(33), 8, 2, MFLAGS, ++ RK3568_CLKGATE_CON(17), 5, GFLAGS), ++ COMPOSITE_NODIV(CLK_MAC1_OUT, "clk_mac1_out", cpll125_cpll50_cpll25_xin24m_p, 0, ++ RK3568_CLKSEL_CON(33), 14, 2, MFLAGS, ++ RK3568_CLKGATE_CON(17), 6, GFLAGS), ++ GATE(CLK_MAC1_REFOUT, "clk_mac1_refout", "clk_mac1_2top", 0, ++ RK3568_CLKGATE_CON(17), 10, GFLAGS), ++ COMPOSITE_NODIV(CLK_GMAC1_PTP_REF, "clk_gmac1_ptp_ref", clk_gmac_ptp_p, 0, ++ RK3568_CLKSEL_CON(33), 12, 2, MFLAGS, ++ RK3568_CLKGATE_CON(17), 2, GFLAGS), ++ MUX(SCLK_GMAC1, "clk_gmac1", mux_gmac1_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3568_CLKSEL_CON(33), 2, 1, MFLAGS), ++ FACTOR(0, "clk_gmac1_tx_div5", "clk_gmac1", 0, 1, 5), ++ FACTOR(0, "clk_gmac1_tx_div50", "clk_gmac1", 0, 1, 50), ++ FACTOR(0, "clk_gmac1_rx_div2", "clk_gmac1", 0, 1, 2), ++ FACTOR(0, "clk_gmac1_rx_div20", "clk_gmac1", 0, 1, 20), ++ MUX(SCLK_GMAC1_RGMII_SPEED, "clk_gmac1_rgmii_speed", mux_gmac1_rgmii_speed_p, 0, ++ RK3568_CLKSEL_CON(33), 4, 2, MFLAGS), ++ MUX(SCLK_GMAC1_RMII_SPEED, "clk_gmac1_rmii_speed", mux_gmac1_rmii_speed_p, 0, ++ RK3568_CLKSEL_CON(33), 3, 1, MFLAGS), ++ MUX(SCLK_GMAC1_RX_TX, "clk_gmac1_rx_tx", mux_gmac1_rx_tx_p, CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(33), 0, 2, MFLAGS), + -+#define ROCKCHIP_SOFTRST_HIWORD_MASK BIT(0) ++ /* PD_PERI */ ++ COMPOSITE_NODIV(ACLK_PERIMID, "aclk_perimid", gpll300_gpll200_gpll100_xin24m_p, CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(10), 4, 2, MFLAGS, ++ RK3568_CLKGATE_CON(14), 0, GFLAGS), ++ COMPOSITE_NODIV(HCLK_PERIMID, "hclk_perimid", gpll150_gpll100_gpll75_xin24m_p, CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(10), 6, 2, MFLAGS, ++ RK3568_CLKGATE_CON(14), 1, GFLAGS), + -+struct clk *rockchip_clk_register_halfdiv(const char *name, -+ const char *const *parent_names, -+ u8 num_parents, void __iomem *base, -+ int muxdiv_offset, u8 mux_shift, -+ u8 mux_width, u8 mux_flags, -+ int div_offset, u8 div_shift, -+ u8 div_width, u8 div_flags, -+ int gate_offset, u8 gate_shift, -+ u8 gate_flags, unsigned long flags, -+ spinlock_t *lock); ++ /* PD_VI */ ++ COMPOSITE_NODIV(ACLK_VI, "aclk_vi", gpll400_gpll300_gpll200_xin24m_p, 0, ++ RK3568_CLKSEL_CON(34), 0, 2, MFLAGS, ++ RK3568_CLKGATE_CON(18), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_VI, "hclk_vi", "aclk_vi", 0, ++ RK3568_CLKSEL_CON(34), 4, 4, DFLAGS, ++ RK3568_CLKGATE_CON(18), 1, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_VI, "pclk_vi", "aclk_vi", 0, ++ RK3568_CLKSEL_CON(34), 8, 4, DFLAGS, ++ RK3568_CLKGATE_CON(18), 2, GFLAGS), ++ GATE(ACLK_VICAP, "aclk_vicap", "aclk_vi", 0, ++ RK3568_CLKGATE_CON(18), 9, GFLAGS), ++ GATE(HCLK_VICAP, "hclk_vicap", "hclk_vi", 0, ++ RK3568_CLKGATE_CON(18), 10, GFLAGS), ++ COMPOSITE_NODIV(DCLK_VICAP, "dclk_vicap", cpll333_gpll300_gpll200_p, 0, ++ RK3568_CLKSEL_CON(34), 14, 2, MFLAGS, ++ RK3568_CLKGATE_CON(18), 11, GFLAGS), ++ GATE(ICLK_VICAP_G, "iclk_vicap_g", "iclk_vicap", 0, ++ RK3568_CLKGATE_CON(18), 13, GFLAGS), ++ GATE(ACLK_ISP, "aclk_isp", "aclk_vi", 0, ++ RK3568_CLKGATE_CON(19), 0, GFLAGS), ++ GATE(HCLK_ISP, "hclk_isp", "hclk_vi", 0, ++ RK3568_CLKGATE_CON(19), 1, GFLAGS), ++ COMPOSITE(CLK_ISP, "clk_isp", cpll_gpll_hpll_p, 0, ++ RK3568_CLKSEL_CON(35), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3568_CLKGATE_CON(19), 2, GFLAGS), ++ GATE(PCLK_CSI2HOST1, "pclk_csi2host1", "pclk_vi", 0, ++ RK3568_CLKGATE_CON(19), 4, GFLAGS), ++ COMPOSITE(CLK_CIF_OUT, "clk_cif_out", gpll_usb480m_xin24m_p, 0, ++ RK3568_CLKSEL_CON(35), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3568_CLKGATE_CON(19), 8, GFLAGS), ++ COMPOSITE(CLK_CAM0_OUT, "clk_cam0_out", gpll_usb480m_xin24m_p, 0, ++ RK3568_CLKSEL_CON(36), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3568_CLKGATE_CON(19), 9, GFLAGS), ++ COMPOSITE(CLK_CAM1_OUT, "clk_cam1_out", gpll_usb480m_xin24m_p, 0, ++ RK3568_CLKSEL_CON(36), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3568_CLKGATE_CON(19), 10, GFLAGS), + -+#ifdef CONFIG_RESET_CONTROLLER -+void rockchip_register_softrst(struct device_node *np, -+ unsigned int num_regs, -+ void __iomem *base, u8 flags); -+#else -+static inline void rockchip_register_softrst(struct device_node *np, -+ unsigned int num_regs, -+ void __iomem *base, u8 flags) -+{ -+} -+#endif -+extern void (*rk_dump_cru)(void); ++ /* PD_VO */ ++ COMPOSITE_NODIV(ACLK_VO, "aclk_vo", gpll300_cpll250_gpll100_xin24m_p, 0, ++ RK3568_CLKSEL_CON(37), 0, 2, MFLAGS, ++ RK3568_CLKGATE_CON(20), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_VO, "hclk_vo", "aclk_vo", 0, ++ RK3568_CLKSEL_CON(37), 8, 4, DFLAGS, ++ RK3568_CLKGATE_CON(20), 1, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_VO, "pclk_vo", "aclk_vo", 0, ++ RK3568_CLKSEL_CON(37), 12, 4, DFLAGS, ++ RK3568_CLKGATE_CON(20), 2, GFLAGS), ++ COMPOSITE(ACLK_VOP_PRE, "aclk_vop_pre", cpll_gpll_hpll_vpll_p, 0, ++ RK3568_CLKSEL_CON(38), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3568_CLKGATE_CON(20), 6, GFLAGS), ++ GATE(ACLK_VOP, "aclk_vop", "aclk_vop_pre", 0, ++ RK3568_CLKGATE_CON(20), 8, GFLAGS), ++ GATE(HCLK_VOP, "hclk_vop", "hclk_vo", 0, ++ RK3568_CLKGATE_CON(20), 9, GFLAGS), ++ COMPOSITE(DCLK_VOP0, "dclk_vop0", hpll_vpll_gpll_cpll_p, CLK_SET_RATE_NO_REPARENT, ++ RK3568_CLKSEL_CON(39), 10, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3568_CLKGATE_CON(20), 10, GFLAGS), ++ COMPOSITE(DCLK_VOP1, "dclk_vop1", hpll_vpll_gpll_cpll_p, CLK_SET_RATE_NO_REPARENT, ++ RK3568_CLKSEL_CON(40), 10, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3568_CLKGATE_CON(20), 11, GFLAGS), ++ COMPOSITE(DCLK_VOP2, "dclk_vop2", hpll_vpll_gpll_cpll_p, CLK_SET_RATE_NO_REPARENT, ++ RK3568_CLKSEL_CON(41), 10, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3568_CLKGATE_CON(20), 12, GFLAGS), ++ GATE(CLK_VOP_PWM, "clk_vop_pwm", "xin24m", 0, ++ RK3568_CLKGATE_CON(20), 13, GFLAGS), ++ GATE(ACLK_HDCP, "aclk_hdcp", "aclk_vo", 0, ++ RK3568_CLKGATE_CON(21), 0, GFLAGS), ++ GATE(HCLK_HDCP, "hclk_hdcp", "hclk_vo", 0, ++ RK3568_CLKGATE_CON(21), 1, GFLAGS), ++ GATE(PCLK_HDCP, "pclk_hdcp", "pclk_vo", 0, ++ RK3568_CLKGATE_CON(21), 2, GFLAGS), ++ GATE(PCLK_HDMI_HOST, "pclk_hdmi_host", "pclk_vo", 0, ++ RK3568_CLKGATE_CON(21), 3, GFLAGS), ++ GATE(CLK_HDMI_SFR, "clk_hdmi_sfr", "xin24m", 0, ++ RK3568_CLKGATE_CON(21), 4, GFLAGS), ++ GATE(CLK_HDMI_CEC, "clk_hdmi_cec", "clk_rtc_32k", 0, ++ RK3568_CLKGATE_CON(21), 5, GFLAGS), ++ GATE(PCLK_DSITX_0, "pclk_dsitx_0", "pclk_vo", 0, ++ RK3568_CLKGATE_CON(21), 6, GFLAGS), ++ GATE(PCLK_DSITX_1, "pclk_dsitx_1", "pclk_vo", 0, ++ RK3568_CLKGATE_CON(21), 7, GFLAGS), ++ GATE(PCLK_EDP_CTRL, "pclk_edp_ctrl", "pclk_vo", 0, ++ RK3568_CLKGATE_CON(21), 8, GFLAGS), ++ COMPOSITE_NODIV(CLK_EDP_200M, "clk_edp_200m", gpll200_gpll150_cpll125_p, 0, ++ RK3568_CLKSEL_CON(38), 8, 2, MFLAGS, ++ RK3568_CLKGATE_CON(21), 9, GFLAGS), + -+#if IS_MODULE(CONFIG_COMMON_CLK_ROCKCHIP) -+int rockchip_clk_protect(struct rockchip_clk_provider *ctx, -+ unsigned int *clocks, unsigned int nclocks); -+void rockchip_clk_unprotect(void); -+void rockchip_clk_disable_unused(void); -+#else -+static inline int rockchip_clk_protect(struct rockchip_clk_provider *ctx, -+ unsigned int *clocks, -+ unsigned int nclocks) -+{ -+ return -EOPNOTSUPP; -+} ++ /* PD_VPU */ ++ COMPOSITE(ACLK_VPU_PRE, "aclk_vpu_pre", gpll_cpll_p, 0, ++ RK3568_CLKSEL_CON(42), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3568_CLKGATE_CON(22), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_VPU_PRE, "hclk_vpu_pre", "aclk_vpu_pre", 0, ++ RK3568_CLKSEL_CON(42), 8, 4, DFLAGS, ++ RK3568_CLKGATE_CON(22), 1, GFLAGS), ++ GATE(ACLK_VPU, "aclk_vpu", "aclk_vpu_pre", 0, ++ RK3568_CLKGATE_CON(22), 4, GFLAGS), ++ GATE(HCLK_VPU, "hclk_vpu", "hclk_vpu_pre", 0, ++ RK3568_CLKGATE_CON(22), 5, GFLAGS), + -+static inline void rockchip_clk_unprotect(void) -+{ -+} ++ /* PD_RGA */ ++ COMPOSITE_NODIV(ACLK_RGA_PRE, "aclk_rga_pre", gpll300_cpll250_gpll100_xin24m_p, 0, ++ RK3568_CLKSEL_CON(43), 0, 2, MFLAGS, ++ RK3568_CLKGATE_CON(23), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_RGA_PRE, "hclk_rga_pre", "aclk_rga_pre", 0, ++ RK3568_CLKSEL_CON(43), 8, 4, DFLAGS, ++ RK3568_CLKGATE_CON(23), 1, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_RGA_PRE, "pclk_rga_pre", "aclk_rga_pre", 0, ++ RK3568_CLKSEL_CON(43), 12, 4, DFLAGS, ++ RK3568_CLKGATE_CON(22), 12, GFLAGS), ++ GATE(ACLK_RGA, "aclk_rga", "aclk_rga_pre", 0, ++ RK3568_CLKGATE_CON(23), 4, GFLAGS), ++ GATE(HCLK_RGA, "hclk_rga", "hclk_rga_pre", 0, ++ RK3568_CLKGATE_CON(23), 5, GFLAGS), ++ COMPOSITE_NODIV(CLK_RGA_CORE, "clk_rga_core", gpll300_gpll200_gpll100_p, 0, ++ RK3568_CLKSEL_CON(43), 2, 2, MFLAGS, ++ RK3568_CLKGATE_CON(23), 6, GFLAGS), ++ GATE(ACLK_IEP, "aclk_iep", "aclk_rga_pre", 0, ++ RK3568_CLKGATE_CON(23), 7, GFLAGS), ++ GATE(HCLK_IEP, "hclk_iep", "hclk_rga_pre", 0, ++ RK3568_CLKGATE_CON(23), 8, GFLAGS), ++ COMPOSITE_NODIV(CLK_IEP_CORE, "clk_iep_core", gpll300_gpll200_gpll100_p, 0, ++ RK3568_CLKSEL_CON(43), 4, 2, MFLAGS, ++ RK3568_CLKGATE_CON(23), 9, GFLAGS), ++ GATE(HCLK_EBC, "hclk_ebc", "hclk_rga_pre", 0, RK3568_CLKGATE_CON(23), 10, GFLAGS), ++ COMPOSITE_NODIV(DCLK_EBC, "dclk_ebc", gpll400_cpll333_gpll200_p, 0, ++ RK3568_CLKSEL_CON(43), 6, 2, MFLAGS, ++ RK3568_CLKGATE_CON(23), 11, GFLAGS), ++ GATE(ACLK_JDEC, "aclk_jdec", "aclk_rga_pre", 0, ++ RK3568_CLKGATE_CON(23), 12, GFLAGS), ++ GATE(HCLK_JDEC, "hclk_jdec", "hclk_rga_pre", 0, ++ RK3568_CLKGATE_CON(23), 13, GFLAGS), ++ GATE(ACLK_JENC, "aclk_jenc", "aclk_rga_pre", 0, ++ RK3568_CLKGATE_CON(23), 14, GFLAGS), ++ GATE(HCLK_JENC, "hclk_jenc", "hclk_rga_pre", 0, ++ RK3568_CLKGATE_CON(23), 15, GFLAGS), ++ GATE(PCLK_EINK, "pclk_eink", "pclk_rga_pre", 0, ++ RK3568_CLKGATE_CON(22), 14, GFLAGS), ++ GATE(HCLK_EINK, "hclk_eink", "hclk_rga_pre", 0, ++ RK3568_CLKGATE_CON(22), 15, GFLAGS), + -+static inline void rockchip_clk_disable_unused(void) -+{ -+} -+#endif -+#endif -diff --git a/drivers/clk/rockchip-oh/regmap/Kconfig b/drivers/clk/rockchip-oh/regmap/Kconfig -new file mode 100644 -index 000000000..65f691bc4 ---- /dev/null -+++ b/drivers/clk/rockchip-oh/regmap/Kconfig -@@ -0,0 +1,16 @@ -+# SPDX-License-Identifier: GPL-2.0 ++ /* PD_RKVENC */ ++ COMPOSITE(ACLK_RKVENC_PRE, "aclk_rkvenc_pre", gpll_cpll_npll_p, 0, ++ RK3568_CLKSEL_CON(44), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3568_CLKGATE_CON(24), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_RKVENC_PRE, "hclk_rkvenc_pre", "aclk_rkvenc_pre", 0, ++ RK3568_CLKSEL_CON(44), 8, 4, DFLAGS, ++ RK3568_CLKGATE_CON(24), 1, GFLAGS), ++ GATE(ACLK_RKVENC, "aclk_rkvenc", "aclk_rkvenc_pre", 0, ++ RK3568_CLKGATE_CON(24), 6, GFLAGS), ++ GATE(HCLK_RKVENC, "hclk_rkvenc", "hclk_rkvenc_pre", 0, ++ RK3568_CLKGATE_CON(24), 7, GFLAGS), ++ COMPOSITE(CLK_RKVENC_CORE, "clk_rkvenc_core", gpll_cpll_npll_vpll_p, 0, ++ RK3568_CLKSEL_CON(45), 14, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3568_CLKGATE_CON(24), 8, GFLAGS), ++ COMPOSITE(ACLK_RKVDEC_PRE, "aclk_rkvdec_pre", aclk_rkvdec_pre_p, CLK_SET_RATE_NO_REPARENT, ++ RK3568_CLKSEL_CON(47), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3568_CLKGATE_CON(25), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_RKVDEC_PRE, "hclk_rkvdec_pre", "aclk_rkvdec_pre", 0, ++ RK3568_CLKSEL_CON(47), 8, 4, DFLAGS, ++ RK3568_CLKGATE_CON(25), 1, GFLAGS), ++ GATE(ACLK_RKVDEC, "aclk_rkvdec", "aclk_rkvdec_pre", 0, ++ RK3568_CLKGATE_CON(25), 4, GFLAGS), ++ GATE(HCLK_RKVDEC, "hclk_rkvdec", "hclk_rkvdec_pre", 0, ++ RK3568_CLKGATE_CON(25), 5, GFLAGS), ++ COMPOSITE(CLK_RKVDEC_CA, "clk_rkvdec_ca", gpll_cpll_npll_vpll_p, 0, ++ RK3568_CLKSEL_CON(48), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3568_CLKGATE_CON(25), 6, GFLAGS), ++ COMPOSITE(CLK_RKVDEC_CORE, "clk_rkvdec_core", clk_rkvdec_core_p, CLK_SET_RATE_NO_REPARENT, ++ RK3568_CLKSEL_CON(49), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3568_CLKGATE_CON(25), 7, GFLAGS), ++ COMPOSITE(CLK_RKVDEC_HEVC_CA, "clk_rkvdec_hevc_ca", gpll_cpll_npll_vpll_p, 0, ++ RK3568_CLKSEL_CON(49), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3568_CLKGATE_CON(25), 8, GFLAGS), + -+config COMMON_CLK_ROCKCHIP_REGMAP -+ tristate ++ /* PD_BUS */ ++ COMPOSITE_NODIV(ACLK_BUS, "aclk_bus", gpll200_gpll150_gpll100_xin24m_p, CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(50), 0, 2, MFLAGS, ++ RK3568_CLKGATE_CON(26), 0, GFLAGS), ++ COMPOSITE_NODIV(PCLK_BUS, "pclk_bus", gpll100_gpll75_cpll50_xin24m_p, CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(50), 4, 2, MFLAGS, ++ RK3568_CLKGATE_CON(26), 1, GFLAGS), ++ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(26), 4, GFLAGS), ++ COMPOSITE(CLK_TSADC_TSEN, "clk_tsadc_tsen", xin24m_gpll100_cpll100_p, 0, ++ RK3568_CLKSEL_CON(51), 4, 2, MFLAGS, 0, 3, DFLAGS, ++ RK3568_CLKGATE_CON(26), 5, GFLAGS), ++ COMPOSITE_NOMUX(CLK_TSADC, "clk_tsadc", "clk_tsadc_tsen", 0, ++ RK3568_CLKSEL_CON(51), 8, 7, DFLAGS, ++ RK3568_CLKGATE_CON(26), 6, GFLAGS), ++ GATE(PCLK_SARADC, "pclk_saradc", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(26), 7, GFLAGS), ++ GATE(CLK_SARADC, "clk_saradc", "xin24m", 0, ++ RK3568_CLKGATE_CON(26), 8, GFLAGS), ++ GATE(PCLK_SCR, "pclk_scr", "pclk_bus", CLK_IGNORE_UNUSED, ++ RK3568_CLKGATE_CON(26), 12, GFLAGS), ++ GATE(PCLK_WDT_NS, "pclk_wdt_ns", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(26), 13, GFLAGS), ++ GATE(TCLK_WDT_NS, "tclk_wdt_ns", "xin24m", 0, ++ RK3568_CLKGATE_CON(26), 14, GFLAGS), ++ GATE(ACLK_MCU, "aclk_mcu", "aclk_bus", CLK_IGNORE_UNUSED, ++ RK3568_CLKGATE_CON(32), 13, GFLAGS), ++ GATE(PCLK_INTMUX, "pclk_intmux", "pclk_bus", CLK_IGNORE_UNUSED, ++ RK3568_CLKGATE_CON(32), 14, GFLAGS), ++ GATE(PCLK_MAILBOX, "pclk_mailbox", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(32), 15, GFLAGS), + -+config CLK_RK618 -+ tristate "Clock driver for Rockchip RK618" -+ depends on MFD_RK618 -+ default MFD_RK618 -+ select COMMON_CLK_ROCKCHIP_REGMAP ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(27), 12, GFLAGS), ++ COMPOSITE(CLK_UART1_SRC, "clk_uart1_src", gpll_cpll_usb480m_p, 0, ++ RK3568_CLKSEL_CON(52), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3568_CLKGATE_CON(27), 13, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART1_FRAC, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(53), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3568_CLKGATE_CON(27), 14, GFLAGS, ++ &rk3568_uart1_fracmux), ++ GATE(SCLK_UART1, "sclk_uart1", "sclk_uart1_mux", 0, ++ RK3568_CLKGATE_CON(27), 15, GFLAGS), + -+config CLK_RK628 -+ tristate "Clock driver for Rockchip RK628" -+ depends on MFD_RK628 -+ default MFD_RK628 -+ select COMMON_CLK_ROCKCHIP_REGMAP -diff --git a/drivers/clk/rockchip-oh/regmap/Makefile b/drivers/clk/rockchip-oh/regmap/Makefile -new file mode 100644 -index 000000000..18d075d09 ---- /dev/null -+++ b/drivers/clk/rockchip-oh/regmap/Makefile -@@ -0,0 +1,13 @@ -+# SPDX-License-Identifier: GPL-2.0 ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(28), 0, GFLAGS), ++ COMPOSITE(CLK_UART2_SRC, "clk_uart2_src", gpll_cpll_usb480m_p, 0, ++ RK3568_CLKSEL_CON(54), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3568_CLKGATE_CON(28), 1, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART2_FRAC, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(55), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3568_CLKGATE_CON(28), 2, GFLAGS, ++ &rk3568_uart2_fracmux), ++ GATE(SCLK_UART2, "sclk_uart2", "sclk_uart2_mux", 0, ++ RK3568_CLKGATE_CON(28), 3, GFLAGS), + -+obj-$(CONFIG_COMMON_CLK_ROCKCHIP_REGMAP) += clk-rockchip-regmap.o ++ GATE(PCLK_UART3, "pclk_uart3", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(28), 4, GFLAGS), ++ COMPOSITE(CLK_UART3_SRC, "clk_uart3_src", gpll_cpll_usb480m_p, 0, ++ RK3568_CLKSEL_CON(56), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3568_CLKGATE_CON(28), 5, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART3_FRAC, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(57), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3568_CLKGATE_CON(28), 6, GFLAGS, ++ &rk3568_uart3_fracmux), ++ GATE(SCLK_UART3, "sclk_uart3", "sclk_uart3_mux", 0, ++ RK3568_CLKGATE_CON(28), 7, GFLAGS), + -+clk-rockchip-regmap-objs := clk-regmap-mux.o \ -+ clk-regmap-divider.o \ -+ clk-regmap-gate.o \ -+ clk-regmap-fractional-divider.o \ -+ clk-regmap-composite.o \ -+ clk-regmap-pll.o ++ GATE(PCLK_UART4, "pclk_uart4", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(28), 8, GFLAGS), ++ COMPOSITE(CLK_UART4_SRC, "clk_uart4_src", gpll_cpll_usb480m_p, 0, ++ RK3568_CLKSEL_CON(58), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3568_CLKGATE_CON(28), 9, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART4_FRAC, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(59), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3568_CLKGATE_CON(28), 10, GFLAGS, ++ &rk3568_uart4_fracmux), ++ GATE(SCLK_UART4, "sclk_uart4", "sclk_uart4_mux", 0, ++ RK3568_CLKGATE_CON(28), 11, GFLAGS), + -+obj-$(CONFIG_CLK_RK618) += clk-rk618.o -+obj-$(CONFIG_CLK_RK628) += clk-rk628.o -diff --git a/drivers/clk/rockchip-oh/regmap/clk-regmap-composite.c b/drivers/clk/rockchip-oh/regmap/clk-regmap-composite.c -new file mode 100644 -index 000000000..43d2b9a45 ---- /dev/null -+++ b/drivers/clk/rockchip-oh/regmap/clk-regmap-composite.c -@@ -0,0 +1,400 @@ -+/* -+ * Copyright (c) 2017 Rockchip Electronics Co. Ltd. -+ * -+ * Base on code in drivers/clk/clk-composite.c. -+ * See clk-composite.c for further copyright information. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ ++ GATE(PCLK_UART5, "pclk_uart5", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(28), 12, GFLAGS), ++ COMPOSITE(CLK_UART5_SRC, "clk_uart5_src", gpll_cpll_usb480m_p, 0, ++ RK3568_CLKSEL_CON(60), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3568_CLKGATE_CON(28), 13, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART5_FRAC, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(61), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3568_CLKGATE_CON(28), 14, GFLAGS, ++ &rk3568_uart5_fracmux), ++ GATE(SCLK_UART5, "sclk_uart5", "sclk_uart5_mux", 0, ++ RK3568_CLKGATE_CON(28), 15, GFLAGS), + -+#include "clk-regmap.h" ++ GATE(PCLK_UART6, "pclk_uart6", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(29), 0, GFLAGS), ++ COMPOSITE(CLK_UART6_SRC, "clk_uart6_src", gpll_cpll_usb480m_p, 0, ++ RK3568_CLKSEL_CON(62), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3568_CLKGATE_CON(29), 1, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART6_FRAC, "clk_uart6_frac", "clk_uart6_src", CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(63), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3568_CLKGATE_CON(29), 2, GFLAGS, ++ &rk3568_uart6_fracmux), ++ GATE(SCLK_UART6, "sclk_uart6", "sclk_uart6_mux", 0, ++ RK3568_CLKGATE_CON(29), 3, GFLAGS), + -+struct clk_regmap_composite { -+ struct device *dev; -+ struct clk_hw hw; -+ struct clk_ops ops; ++ GATE(PCLK_UART7, "pclk_uart7", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(29), 4, GFLAGS), ++ COMPOSITE(CLK_UART7_SRC, "clk_uart7_src", gpll_cpll_usb480m_p, 0, ++ RK3568_CLKSEL_CON(64), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3568_CLKGATE_CON(29), 5, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART7_FRAC, "clk_uart7_frac", "clk_uart7_src", CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(65), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3568_CLKGATE_CON(29), 6, GFLAGS, ++ &rk3568_uart7_fracmux), ++ GATE(SCLK_UART7, "sclk_uart7", "sclk_uart7_mux", 0, ++ RK3568_CLKGATE_CON(29), 7, GFLAGS), + -+ struct clk_hw *mux_hw; -+ struct clk_hw *rate_hw; -+ struct clk_hw *gate_hw; ++ GATE(PCLK_UART8, "pclk_uart8", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(29), 8, GFLAGS), ++ COMPOSITE(CLK_UART8_SRC, "clk_uart8_src", gpll_cpll_usb480m_p, 0, ++ RK3568_CLKSEL_CON(66), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3568_CLKGATE_CON(29), 9, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART8_FRAC, "clk_uart8_frac", "clk_uart8_src", CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(67), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3568_CLKGATE_CON(29), 10, GFLAGS, ++ &rk3568_uart8_fracmux), ++ GATE(SCLK_UART8, "sclk_uart8", "sclk_uart8_mux", 0, ++ RK3568_CLKGATE_CON(29), 11, GFLAGS), + -+ const struct clk_ops *mux_ops; -+ const struct clk_ops *rate_ops; -+ const struct clk_ops *gate_ops; -+}; ++ GATE(PCLK_UART9, "pclk_uart9", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(29), 12, GFLAGS), ++ COMPOSITE(CLK_UART9_SRC, "clk_uart9_src", gpll_cpll_usb480m_p, 0, ++ RK3568_CLKSEL_CON(68), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3568_CLKGATE_CON(29), 13, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART9_FRAC, "clk_uart9_frac", "clk_uart9_src", CLK_SET_RATE_PARENT, ++ RK3568_CLKSEL_CON(69), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3568_CLKGATE_CON(29), 14, GFLAGS, ++ &rk3568_uart9_fracmux), ++ GATE(SCLK_UART9, "sclk_uart9", "sclk_uart9_mux", 0, ++ RK3568_CLKGATE_CON(29), 15, GFLAGS), + -+#define to_clk_regmap_composite(_hw) \ -+ container_of(_hw, struct clk_regmap_composite, hw) ++ GATE(PCLK_CAN0, "pclk_can0", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(27), 5, GFLAGS), ++ COMPOSITE(CLK_CAN0, "clk_can0", gpll_cpll_p, 0, ++ RK3568_CLKSEL_CON(70), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3568_CLKGATE_CON(27), 6, GFLAGS), ++ GATE(PCLK_CAN1, "pclk_can1", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(27), 7, GFLAGS), ++ COMPOSITE(CLK_CAN1, "clk_can1", gpll_cpll_p, 0, ++ RK3568_CLKSEL_CON(70), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK3568_CLKGATE_CON(27), 8, GFLAGS), ++ GATE(PCLK_CAN2, "pclk_can2", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(27), 9, GFLAGS), ++ COMPOSITE(CLK_CAN2, "clk_can2", gpll_cpll_p, 0, ++ RK3568_CLKSEL_CON(71), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3568_CLKGATE_CON(27), 10, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C, "clk_i2c", clk_i2c_p, 0, ++ RK3568_CLKSEL_CON(71), 8, 2, MFLAGS, ++ RK3568_CLKGATE_CON(32), 10, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(30), 0, GFLAGS), ++ GATE(CLK_I2C1, "clk_i2c1", "clk_i2c", 0, ++ RK3568_CLKGATE_CON(30), 1, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(30), 2, GFLAGS), ++ GATE(CLK_I2C2, "clk_i2c2", "clk_i2c", 0, ++ RK3568_CLKGATE_CON(30), 3, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(30), 4, GFLAGS), ++ GATE(CLK_I2C3, "clk_i2c3", "clk_i2c", 0, ++ RK3568_CLKGATE_CON(30), 5, GFLAGS), ++ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(30), 6, GFLAGS), ++ GATE(CLK_I2C4, "clk_i2c4", "clk_i2c", 0, ++ RK3568_CLKGATE_CON(30), 7, GFLAGS), ++ GATE(PCLK_I2C5, "pclk_i2c5", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(30), 8, GFLAGS), ++ GATE(CLK_I2C5, "clk_i2c5", "clk_i2c", 0, ++ RK3568_CLKGATE_CON(30), 9, GFLAGS), ++ GATE(PCLK_SPI0, "pclk_spi0", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(30), 10, GFLAGS), ++ COMPOSITE_NODIV(CLK_SPI0, "clk_spi0", gpll200_xin24m_cpll100_p, 0, ++ RK3568_CLKSEL_CON(72), 0, 1, MFLAGS, ++ RK3568_CLKGATE_CON(30), 11, GFLAGS), ++ GATE(PCLK_SPI1, "pclk_spi1", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(30), 12, GFLAGS), ++ COMPOSITE_NODIV(CLK_SPI1, "clk_spi1", gpll200_xin24m_cpll100_p, 0, ++ RK3568_CLKSEL_CON(72), 2, 1, MFLAGS, ++ RK3568_CLKGATE_CON(30), 13, GFLAGS), ++ GATE(PCLK_SPI2, "pclk_spi2", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(30), 14, GFLAGS), ++ COMPOSITE_NODIV(CLK_SPI2, "clk_spi2", gpll200_xin24m_cpll100_p, 0, ++ RK3568_CLKSEL_CON(72), 4, 1, MFLAGS, ++ RK3568_CLKGATE_CON(30), 15, GFLAGS), ++ GATE(PCLK_SPI3, "pclk_spi3", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(31), 0, GFLAGS), ++ COMPOSITE_NODIV(CLK_SPI3, "clk_spi3", gpll200_xin24m_cpll100_p, 0, ++ RK3568_CLKSEL_CON(72), 6, 1, MFLAGS, RK3568_CLKGATE_CON(31), 1, GFLAGS), ++ GATE(PCLK_PWM1, "pclk_pwm1", "pclk_bus", 0, RK3568_CLKGATE_CON(31), 10, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM1, "clk_pwm1", gpll100_xin24m_cpll100_p, 0, ++ RK3568_CLKSEL_CON(72), 8, 2, MFLAGS, ++ RK3568_CLKGATE_CON(31), 11, GFLAGS), ++ GATE(CLK_PWM1_CAPTURE, "clk_pwm1_capture", "xin24m", 0, ++ RK3568_CLKGATE_CON(31), 12, GFLAGS), ++ GATE(PCLK_PWM2, "pclk_pwm2", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(31), 13, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM2, "clk_pwm2", gpll100_xin24m_cpll100_p, 0, ++ RK3568_CLKSEL_CON(72), 10, 2, MFLAGS, ++ RK3568_CLKGATE_CON(31), 14, GFLAGS), ++ GATE(CLK_PWM2_CAPTURE, "clk_pwm2_capture", "xin24m", 0, ++ RK3568_CLKGATE_CON(31), 15, GFLAGS), ++ GATE(PCLK_PWM3, "pclk_pwm3", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(32), 0, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM3, "clk_pwm3", gpll100_xin24m_cpll100_p, 0, ++ RK3568_CLKSEL_CON(72), 12, 2, MFLAGS, ++ RK3568_CLKGATE_CON(32), 1, GFLAGS), ++ GATE(CLK_PWM3_CAPTURE, "clk_pwm3_capture", "xin24m", 0, ++ RK3568_CLKGATE_CON(32), 2, GFLAGS), ++ COMPOSITE_NODIV(DBCLK_GPIO, "dbclk_gpio", xin24m_32k_p, 0, ++ RK3568_CLKSEL_CON(72), 14, 1, MFLAGS, ++ RK3568_CLKGATE_CON(32), 11, GFLAGS), ++ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(31), 2, GFLAGS), ++ GATE(DBCLK_GPIO1, "dbclk_gpio1", "dbclk_gpio", 0, ++ RK3568_CLKGATE_CON(31), 3, GFLAGS), ++ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(31), 4, GFLAGS), ++ GATE(DBCLK_GPIO2, "dbclk_gpio2", "dbclk_gpio", 0, ++ RK3568_CLKGATE_CON(31), 5, GFLAGS), ++ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(31), 6, GFLAGS), ++ GATE(DBCLK_GPIO3, "dbclk_gpio3", "dbclk_gpio", 0, ++ RK3568_CLKGATE_CON(31), 7, GFLAGS), ++ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(31), 8, GFLAGS), ++ GATE(DBCLK_GPIO4, "dbclk_gpio4", "dbclk_gpio", 0, ++ RK3568_CLKGATE_CON(31), 9, GFLAGS), ++ GATE(PCLK_TIMER, "pclk_timer", "pclk_bus", 0, ++ RK3568_CLKGATE_CON(32), 3, GFLAGS), ++ GATE(CLK_TIMER0, "clk_timer0", "xin24m", 0, ++ RK3568_CLKGATE_CON(32), 4, GFLAGS), ++ GATE(CLK_TIMER1, "clk_timer1", "xin24m", 0, ++ RK3568_CLKGATE_CON(32), 5, GFLAGS), ++ GATE(CLK_TIMER2, "clk_timer2", "xin24m", 0, ++ RK3568_CLKGATE_CON(32), 6, GFLAGS), ++ GATE(CLK_TIMER3, "clk_timer3", "xin24m", 0, ++ RK3568_CLKGATE_CON(32), 7, GFLAGS), ++ GATE(CLK_TIMER4, "clk_timer4", "xin24m", 0, ++ RK3568_CLKGATE_CON(32), 8, GFLAGS), ++ GATE(CLK_TIMER5, "clk_timer5", "xin24m", 0, ++ RK3568_CLKGATE_CON(32), 9, GFLAGS), + -+static u8 clk_regmap_composite_get_parent(struct clk_hw *hw) -+{ -+ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); -+ const struct clk_ops *mux_ops = composite->mux_ops; -+ struct clk_hw *mux_hw = composite->mux_hw; ++ /* PD_TOP */ ++ COMPOSITE_NODIV(ACLK_TOP_HIGH, "aclk_top_high", cpll500_gpll400_gpll300_xin24m_p, CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(73), 0, 2, MFLAGS, ++ RK3568_CLKGATE_CON(33), 0, GFLAGS), ++ COMPOSITE_NODIV(ACLK_TOP_LOW, "aclk_top_low", gpll400_gpll300_gpll200_xin24m_p, CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(73), 4, 2, MFLAGS, ++ RK3568_CLKGATE_CON(33), 1, GFLAGS), ++ COMPOSITE_NODIV(HCLK_TOP, "hclk_top", gpll150_gpll100_gpll75_xin24m_p, CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(73), 8, 2, MFLAGS, ++ RK3568_CLKGATE_CON(33), 2, GFLAGS), ++ COMPOSITE_NODIV(PCLK_TOP, "pclk_top", gpll100_gpll75_cpll50_xin24m_p, CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(73), 12, 2, MFLAGS, ++ RK3568_CLKGATE_CON(33), 3, GFLAGS), ++ GATE(PCLK_PCIE30PHY, "pclk_pcie30phy", "pclk_top", 0, ++ RK3568_CLKGATE_CON(33), 8, GFLAGS), ++ COMPOSITE_NODIV(CLK_OPTC_ARB, "clk_optc_arb", xin24m_cpll100_p, CLK_IS_CRITICAL, ++ RK3568_CLKSEL_CON(73), 15, 1, MFLAGS, ++ RK3568_CLKGATE_CON(33), 9, GFLAGS), ++ GATE(PCLK_MIPICSIPHY, "pclk_mipicsiphy", "pclk_top", 0, ++ RK3568_CLKGATE_CON(33), 13, GFLAGS), ++ GATE(PCLK_MIPIDSIPHY0, "pclk_mipidsiphy0", "pclk_top", 0, ++ RK3568_CLKGATE_CON(33), 14, GFLAGS), ++ GATE(PCLK_MIPIDSIPHY1, "pclk_mipidsiphy1", "pclk_top", 0, ++ RK3568_CLKGATE_CON(33), 15, GFLAGS), ++ GATE(PCLK_PIPEPHY0, "pclk_pipephy0", "pclk_top", 0, ++ RK3568_CLKGATE_CON(34), 4, GFLAGS), ++ GATE(PCLK_PIPEPHY1, "pclk_pipephy1", "pclk_top", 0, ++ RK3568_CLKGATE_CON(34), 5, GFLAGS), ++ GATE(PCLK_PIPEPHY2, "pclk_pipephy2", "pclk_top", 0, ++ RK3568_CLKGATE_CON(34), 6, GFLAGS), ++ GATE(PCLK_CPU_BOOST, "pclk_cpu_boost", "pclk_top", 0, ++ RK3568_CLKGATE_CON(34), 11, GFLAGS), ++ GATE(CLK_CPU_BOOST, "clk_cpu_boost", "xin24m", 0, ++ RK3568_CLKGATE_CON(34), 12, GFLAGS), ++ GATE(PCLK_OTPPHY, "pclk_otpphy", "pclk_top", 0, ++ RK3568_CLKGATE_CON(34), 13, GFLAGS), ++ GATE(PCLK_EDPPHY_GRF, "pclk_edpphy_grf", "pclk_top", 0, ++ RK3568_CLKGATE_CON(34), 14, GFLAGS), ++}; + -+ __clk_hw_set_clk(mux_hw, hw); ++static struct rockchip_clk_branch rk3568_clk_pmu_branches[] __initdata = { ++ /* PD_PMU */ ++ FACTOR(0, "ppll_ph0", "ppll", 0, 1, 2), ++ FACTOR(0, "ppll_ph180", "ppll", 0, 1, 2), ++ FACTOR(0, "hpll_ph0", "hpll", 0, 1, 2), + -+ return mux_ops->get_parent(mux_hw); -+} ++ MUX(CLK_PDPMU, "clk_pdpmu", clk_pdpmu_p, 0, ++ RK3568_PMU_CLKSEL_CON(2), 15, 1, MFLAGS), ++ COMPOSITE_NOMUX(PCLK_PDPMU, "pclk_pdpmu", "clk_pdpmu", CLK_IS_CRITICAL, ++ RK3568_PMU_CLKSEL_CON(2), 0, 5, DFLAGS, ++ RK3568_PMU_CLKGATE_CON(0), 2, GFLAGS), ++ GATE(PCLK_PMU, "pclk_pmu", "pclk_pdpmu", CLK_IS_CRITICAL, ++ RK3568_PMU_CLKGATE_CON(0), 6, GFLAGS), ++ GATE(CLK_PMU, "clk_pmu", "xin24m", CLK_IS_CRITICAL, ++ RK3568_PMU_CLKGATE_CON(0), 7, GFLAGS), ++ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_pdpmu", 0, ++ RK3568_PMU_CLKGATE_CON(1), 0, GFLAGS), ++ COMPOSITE_NOMUX(CLK_I2C0, "clk_i2c0", "clk_pdpmu", 0, ++ RK3568_PMU_CLKSEL_CON(3), 0, 7, DFLAGS, ++ RK3568_PMU_CLKGATE_CON(1), 1, GFLAGS), ++ GATE(PCLK_UART0, "pclk_uart0", "pclk_pdpmu", 0, ++ RK3568_PMU_CLKGATE_CON(1), 2, GFLAGS), + -+static int clk_regmap_composite_set_parent(struct clk_hw *hw, u8 index) -+{ -+ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); -+ const struct clk_ops *mux_ops = composite->mux_ops; -+ struct clk_hw *mux_hw = composite->mux_hw; ++ COMPOSITE_FRACMUX(CLK_RTC32K_FRAC, "clk_rtc32k_frac", "xin24m", CLK_IGNORE_UNUSED, ++ RK3568_PMU_CLKSEL_CON(1), 0, ++ RK3568_PMU_CLKGATE_CON(0), 1, GFLAGS, ++ &rk3568_rtc32k_pmu_fracmux), + -+ __clk_hw_set_clk(mux_hw, hw); ++ COMPOSITE_NOMUX(XIN_OSC0_DIV, "xin_osc0_div", "xin24m", CLK_IGNORE_UNUSED, ++ RK3568_PMU_CLKSEL_CON(0), 0, 5, DFLAGS, ++ RK3568_PMU_CLKGATE_CON(0), 0, GFLAGS), + -+ return mux_ops->set_parent(mux_hw, index); -+} ++ COMPOSITE(CLK_UART0_DIV, "sclk_uart0_div", ppll_usb480m_cpll_gpll_p, 0, ++ RK3568_PMU_CLKSEL_CON(4), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3568_PMU_CLKGATE_CON(1), 3, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART0_FRAC, "sclk_uart0_frac", "sclk_uart0_div", CLK_SET_RATE_PARENT, ++ RK3568_PMU_CLKSEL_CON(5), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3568_PMU_CLKGATE_CON(1), 4, GFLAGS, ++ &rk3568_uart0_fracmux), ++ GATE(SCLK_UART0, "sclk_uart0", "sclk_uart0_mux", 0, ++ RK3568_PMU_CLKGATE_CON(1), 5, GFLAGS), + -+static unsigned long clk_regmap_composite_recalc_rate(struct clk_hw *hw, -+ unsigned long parent_rate) -+{ -+ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); -+ const struct clk_ops *rate_ops = composite->rate_ops; -+ struct clk_hw *rate_hw = composite->rate_hw; ++ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pdpmu", 0, ++ RK3568_PMU_CLKGATE_CON(1), 9, GFLAGS), ++ COMPOSITE_NODIV(DBCLK_GPIO0, "dbclk_gpio0", xin24m_32k_p, 0, ++ RK3568_PMU_CLKSEL_CON(6), 15, 1, MFLAGS, ++ RK3568_PMU_CLKGATE_CON(1), 10, GFLAGS), ++ GATE(PCLK_PWM0, "pclk_pwm0", "pclk_pdpmu", 0, ++ RK3568_PMU_CLKGATE_CON(1), 6, GFLAGS), ++ COMPOSITE(CLK_PWM0, "clk_pwm0", clk_pwm0_p, 0, ++ RK3568_PMU_CLKSEL_CON(6), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3568_PMU_CLKGATE_CON(1), 7, GFLAGS), ++ GATE(CLK_CAPTURE_PWM0_NDFT, "clk_capture_pwm0_ndft", "xin24m", 0, ++ RK3568_PMU_CLKGATE_CON(1), 8, GFLAGS), ++ GATE(PCLK_PMUPVTM, "pclk_pmupvtm", "pclk_pdpmu", 0, ++ RK3568_PMU_CLKGATE_CON(1), 11, GFLAGS), ++ GATE(CLK_PMUPVTM, "clk_pmupvtm", "xin24m", 0, ++ RK3568_PMU_CLKGATE_CON(1), 12, GFLAGS), ++ GATE(CLK_CORE_PMUPVTM, "clk_core_pmupvtm", "xin24m", 0, ++ RK3568_PMU_CLKGATE_CON(1), 13, GFLAGS), ++ COMPOSITE_NOMUX(CLK_REF24M, "clk_ref24m", "clk_pdpmu", 0, ++ RK3568_PMU_CLKSEL_CON(7), 0, 6, DFLAGS, ++ RK3568_PMU_CLKGATE_CON(2), 0, GFLAGS), ++ GATE(XIN_OSC0_USBPHY0_G, "xin_osc0_usbphy0_g", "xin24m", 0, ++ RK3568_PMU_CLKGATE_CON(2), 1, GFLAGS), ++ MUX(CLK_USBPHY0_REF, "clk_usbphy0_ref", clk_usbphy0_ref_p, 0, ++ RK3568_PMU_CLKSEL_CON(8), 0, 1, MFLAGS), ++ GATE(XIN_OSC0_USBPHY1_G, "xin_osc0_usbphy1_g", "xin24m", 0, ++ RK3568_PMU_CLKGATE_CON(2), 2, GFLAGS), ++ MUX(CLK_USBPHY1_REF, "clk_usbphy1_ref", clk_usbphy1_ref_p, 0, ++ RK3568_PMU_CLKSEL_CON(8), 1, 1, MFLAGS), ++ GATE(XIN_OSC0_MIPIDSIPHY0_G, "xin_osc0_mipidsiphy0_g", "xin24m", 0, ++ RK3568_PMU_CLKGATE_CON(2), 3, GFLAGS), ++ MUX(CLK_MIPIDSIPHY0_REF, "clk_mipidsiphy0_ref", clk_mipidsiphy0_ref_p, 0, ++ RK3568_PMU_CLKSEL_CON(8), 2, 1, MFLAGS), ++ GATE(XIN_OSC0_MIPIDSIPHY1_G, "xin_osc0_mipidsiphy1_g", "xin24m", 0, ++ RK3568_PMU_CLKGATE_CON(2), 4, GFLAGS), ++ MUX(CLK_MIPIDSIPHY1_REF, "clk_mipidsiphy1_ref", clk_mipidsiphy1_ref_p, 0, ++ RK3568_PMU_CLKSEL_CON(8), 3, 1, MFLAGS), ++ COMPOSITE_NOMUX(CLK_WIFI_DIV, "clk_wifi_div", "clk_pdpmu", 0, ++ RK3568_PMU_CLKSEL_CON(8), 8, 6, DFLAGS, ++ RK3568_PMU_CLKGATE_CON(2), 5, GFLAGS), ++ GATE(CLK_WIFI_OSC0, "clk_wifi_osc0", "xin24m", 0, ++ RK3568_PMU_CLKGATE_CON(2), 6, GFLAGS), ++ MUX(CLK_WIFI, "clk_wifi", clk_wifi_p, CLK_SET_RATE_PARENT, ++ RK3568_PMU_CLKSEL_CON(8), 15, 1, MFLAGS), ++ COMPOSITE_NOMUX(CLK_PCIEPHY0_DIV, "clk_pciephy0_div", "ppll_ph0", 0, ++ RK3568_PMU_CLKSEL_CON(9), 0, 3, DFLAGS, ++ RK3568_PMU_CLKGATE_CON(2), 7, GFLAGS), ++ GATE(CLK_PCIEPHY0_OSC0, "clk_pciephy0_osc0", "xin24m", 0, ++ RK3568_PMU_CLKGATE_CON(2), 8, GFLAGS), ++ MUX(CLK_PCIEPHY0_REF, "clk_pciephy0_ref", clk_pciephy0_ref_p, CLK_SET_RATE_PARENT, ++ RK3568_PMU_CLKSEL_CON(9), 3, 1, MFLAGS), ++ COMPOSITE_NOMUX(CLK_PCIEPHY1_DIV, "clk_pciephy1_div", "ppll_ph0", 0, ++ RK3568_PMU_CLKSEL_CON(9), 4, 3, DFLAGS, ++ RK3568_PMU_CLKGATE_CON(2), 9, GFLAGS), ++ GATE(CLK_PCIEPHY1_OSC0, "clk_pciephy1_osc0", "xin24m", 0, ++ RK3568_PMU_CLKGATE_CON(2), 10, GFLAGS), ++ MUX(CLK_PCIEPHY1_REF, "clk_pciephy1_ref", clk_pciephy1_ref_p, CLK_SET_RATE_PARENT, ++ RK3568_PMU_CLKSEL_CON(9), 7, 1, MFLAGS), ++ COMPOSITE_NOMUX(CLK_PCIEPHY2_DIV, "clk_pciephy2_div", "ppll_ph0", 0, ++ RK3568_PMU_CLKSEL_CON(9), 8, 3, DFLAGS, ++ RK3568_PMU_CLKGATE_CON(2), 11, GFLAGS), ++ GATE(CLK_PCIEPHY2_OSC0, "clk_pciephy2_osc0", "xin24m", 0, ++ RK3568_PMU_CLKGATE_CON(2), 12, GFLAGS), ++ MUX(CLK_PCIEPHY2_REF, "clk_pciephy2_ref", clk_pciephy2_ref_p, CLK_SET_RATE_PARENT, ++ RK3568_PMU_CLKSEL_CON(9), 11, 1, MFLAGS), ++ GATE(CLK_PCIE30PHY_REF_M, "clk_pcie30phy_ref_m", "ppll_ph0", 0, ++ RK3568_PMU_CLKGATE_CON(2), 13, GFLAGS), ++ GATE(CLK_PCIE30PHY_REF_N, "clk_pcie30phy_ref_n", "ppll_ph180", 0, ++ RK3568_PMU_CLKGATE_CON(2), 14, GFLAGS), ++ GATE(XIN_OSC0_EDPPHY_G, "xin_osc0_edpphy_g", "xin24m", 0, ++ RK3568_PMU_CLKGATE_CON(2), 15, GFLAGS), ++ MUX(CLK_HDMI_REF, "clk_hdmi_ref", clk_hdmi_ref_p, 0, ++ RK3568_PMU_CLKSEL_CON(8), 7, 1, MFLAGS), + -+ __clk_hw_set_clk(rate_hw, hw); ++ MUXPMUGRF(SCLK_32K_IOE, "clk_32k_ioe", clk_32k_ioe_p, 0, ++ RK3568_PMU_GRF_SOC_CON0, 0, 1, MFLAGS) ++}; + -+ return rate_ops->recalc_rate(rate_hw, parent_rate); -+} ++static void __iomem *rk3568_cru_base; ++static void __iomem *rk3568_pmucru_base; + -+static int clk_regmap_composite_determine_rate(struct clk_hw *hw, -+ struct clk_rate_request *req) ++static void rk3568_dump_cru(void) +{ -+ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); -+ const struct clk_ops *rate_ops = composite->rate_ops; -+ const struct clk_ops *mux_ops = composite->mux_ops; -+ struct clk_hw *rate_hw = composite->rate_hw; -+ struct clk_hw *mux_hw = composite->mux_hw; -+ struct clk_hw *parent; -+ unsigned long parent_rate; -+ long tmp_rate, best_rate = 0; -+ unsigned long rate_diff; -+ unsigned long best_rate_diff = ULONG_MAX; -+ long rate; -+ unsigned int i; ++ if (rk3568_pmucru_base) { ++ pr_warn("PMU CRU:\n"); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rk3568_pmucru_base, ++ 0x248, false); ++ } ++ if (rk3568_cru_base) { ++ pr_warn("CRU:\n"); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rk3568_cru_base, ++ 0x588, false); ++ } ++} + -+ if (rate_hw && rate_ops && rate_ops->determine_rate) { -+ __clk_hw_set_clk(rate_hw, hw); -+ return rate_ops->determine_rate(rate_hw, req); -+ } else if (rate_hw && rate_ops && rate_ops->round_rate && -+ mux_hw && mux_ops && mux_ops->set_parent) { -+ req->best_parent_hw = NULL; ++static int protect_clocks[] = { ++ ACLK_VO, ++ HCLK_VO, ++ ACLK_VOP, ++ HCLK_VOP, ++ DCLK_VOP0, ++ DCLK_VOP1, ++ DCLK_VOP2, ++}; + -+ if (clk_hw_get_flags(hw) & CLK_SET_RATE_NO_REPARENT) { -+ parent = clk_hw_get_parent(mux_hw); -+ req->best_parent_hw = parent; -+ req->best_parent_rate = clk_hw_get_rate(parent); ++static void __init rk3568_pmu_clk_init(struct device_node *np) ++{ ++ struct rockchip_clk_provider *ctx; ++ void __iomem *reg_base; + -+ rate = rate_ops->round_rate(rate_hw, req->rate, -+ &req->best_parent_rate); -+ if (rate < 0) -+ return rate; ++ reg_base = of_iomap(np, 0); ++ if (!reg_base) { ++ pr_err("%s: could not map cru pmu region\n", __func__); ++ return; ++ } + -+ req->rate = rate; -+ return 0; -+ } ++ rk3568_pmucru_base = reg_base; + -+ for (i = 0; i < clk_hw_get_num_parents(mux_hw); i++) { -+ parent = clk_hw_get_parent_by_index(mux_hw, i); -+ if (!parent) -+ continue; ++ ctx = rockchip_clk_init(np, reg_base, CLKPMU_NR_CLKS); ++ if (IS_ERR(ctx)) { ++ pr_err("%s: rockchip pmu clk init failed\n", __func__); ++ return; ++ } + -+ parent_rate = clk_hw_get_rate(parent); ++ rockchip_clk_register_plls(ctx, rk3568_pmu_pll_clks, ++ ARRAY_SIZE(rk3568_pmu_pll_clks), ++ RK3568_GRF_SOC_STATUS0); + -+ tmp_rate = rate_ops->round_rate(rate_hw, req->rate, -+ &parent_rate); -+ if (tmp_rate < 0) -+ continue; ++ rockchip_clk_register_branches(ctx, rk3568_clk_pmu_branches, ++ ARRAY_SIZE(rk3568_clk_pmu_branches)); + -+ rate_diff = abs(req->rate - tmp_rate); ++ rockchip_register_softrst(np, 1, reg_base + RK3568_PMU_SOFTRST_CON(0), ++ ROCKCHIP_SOFTRST_HIWORD_MASK); + -+ if (!rate_diff || !req->best_parent_hw || -+ best_rate_diff > rate_diff) { -+ req->best_parent_hw = parent; -+ req->best_parent_rate = parent_rate; -+ best_rate_diff = rate_diff; -+ best_rate = tmp_rate; -+ } ++ rockchip_clk_of_add_provider(np, ctx); ++} + -+ if (!rate_diff) -+ return 0; -+ } ++CLK_OF_DECLARE(rk3568_cru_pmu, "rockchip,rk3568-pmucru", rk3568_pmu_clk_init); + -+ req->rate = best_rate; -+ return 0; -+ } else if (mux_hw && mux_ops && mux_ops->determine_rate) { -+ __clk_hw_set_clk(mux_hw, hw); -+ return mux_ops->determine_rate(mux_hw, req); -+ } else { -+ return -EINVAL; ++static void __init rk3568_clk_init(struct device_node *np) ++{ ++ struct rockchip_clk_provider *ctx; ++ void __iomem *reg_base; ++ struct clk **clks; ++ ++ reg_base = of_iomap(np, 0); ++ if (!reg_base) { ++ pr_err("%s: could not map cru region\n", __func__); ++ return; + } + -+ return 0; -+} ++ rk3568_cru_base = reg_base; + -+static long clk_regmap_composite_round_rate(struct clk_hw *hw, -+ unsigned long rate, -+ unsigned long *prate) -+{ -+ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); -+ const struct clk_ops *rate_ops = composite->rate_ops; -+ struct clk_hw *rate_hw = composite->rate_hw; ++ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); ++ if (IS_ERR(ctx)) { ++ pr_err("%s: rockchip clk init failed\n", __func__); ++ iounmap(reg_base); ++ return; ++ } ++ clks = ctx->clk_data.clks; + -+ __clk_hw_set_clk(rate_hw, hw); ++ rockchip_clk_register_plls(ctx, rk3568_pll_clks, ++ ARRAY_SIZE(rk3568_pll_clks), ++ RK3568_GRF_SOC_STATUS0); + -+ return rate_ops->round_rate(rate_hw, rate, prate); -+} ++ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", ++ 2, clks[PLL_APLL], clks[PLL_GPLL], ++ &rk3568_cpuclk_data, rk3568_cpuclk_rates, ++ ARRAY_SIZE(rk3568_cpuclk_rates)); + -+static int clk_regmap_composite_set_rate(struct clk_hw *hw, -+ unsigned long rate, -+ unsigned long parent_rate) -+{ -+ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); -+ const struct clk_ops *rate_ops = composite->rate_ops; -+ struct clk_hw *rate_hw = composite->rate_hw; ++ rockchip_clk_register_branches(ctx, rk3568_clk_branches, ++ ARRAY_SIZE(rk3568_clk_branches)); + -+ __clk_hw_set_clk(rate_hw, hw); ++ rockchip_register_softrst(np, 30, reg_base + RK3568_SOFTRST_CON(0), ++ ROCKCHIP_SOFTRST_HIWORD_MASK); + -+ return rate_ops->set_rate(rate_hw, rate, parent_rate); -+} ++ rockchip_register_restart_notifier(ctx, RK3568_GLB_SRST_FST, NULL); + -+static int clk_regmap_composite_is_prepared(struct clk_hw *hw) -+{ -+ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); -+ const struct clk_ops *gate_ops = composite->gate_ops; -+ struct clk_hw *gate_hw = composite->gate_hw; ++ rockchip_clk_of_add_provider(np, ctx); + -+ __clk_hw_set_clk(gate_hw, hw); ++ if (!rk_dump_cru) ++ rk_dump_cru = rk3568_dump_cru; + -+ return gate_ops->is_prepared(gate_hw); ++ rockchip_clk_protect(ctx, protect_clocks, ARRAY_SIZE(protect_clocks)); +} + -+static int clk_regmap_composite_prepare(struct clk_hw *hw) -+{ -+ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); -+ const struct clk_ops *gate_ops = composite->gate_ops; -+ struct clk_hw *gate_hw = composite->gate_hw; -+ -+ __clk_hw_set_clk(gate_hw, hw); ++CLK_OF_DECLARE(rk3568_cru, "rockchip,rk3568-cru", rk3568_clk_init); + -+ return gate_ops->prepare(gate_hw); -+} ++#ifdef MODULE ++struct clk_rk3568_inits { ++ void (*inits)(struct device_node *np); ++}; + -+static void clk_regmap_composite_unprepare(struct clk_hw *hw) -+{ -+ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); -+ const struct clk_ops *gate_ops = composite->gate_ops; -+ struct clk_hw *gate_hw = composite->gate_hw; ++static const struct clk_rk3568_inits clk_rk3568_pmucru_init = { ++ .inits = rk3568_pmu_clk_init, ++}; + -+ __clk_hw_set_clk(gate_hw, hw); ++static const struct clk_rk3568_inits clk_3568_cru_init = { ++ .inits = rk3568_clk_init, ++}; + -+ gate_ops->unprepare(gate_hw); -+} ++static const struct of_device_id clk_rk3568_match_table[] = { ++ { ++ .compatible = "rockchip,rk3568-cru", ++ .data = &clk_3568_cru_init, ++ }, { ++ .compatible = "rockchip,rk3568-pmucru", ++ .data = &clk_rk3568_pmucru_init, ++ }, ++ { } ++}; ++MODULE_DEVICE_TABLE(of, clk_rk3568_match_table); + -+struct clk * -+devm_clk_regmap_register_composite(struct device *dev, const char *name, -+ const char *const *parent_names, -+ u8 num_parents, struct regmap *regmap, -+ u32 mux_reg, u8 mux_shift, u8 mux_width, -+ u32 div_reg, u8 div_shift, u8 div_width, -+ u8 div_flags, -+ u32 gate_reg, u8 gate_shift, -+ unsigned long flags) ++static int clk_rk3568_probe(struct platform_device *pdev) +{ -+ struct clk_regmap_gate *gate = NULL; -+ struct clk_regmap_mux *mux = NULL; -+ struct clk_regmap_divider *div = NULL; -+ struct clk_regmap_fractional_divider *fd = NULL; -+ const struct clk_ops *mux_ops = NULL, *div_ops = NULL, *gate_ops = NULL; -+ const struct clk_ops *fd_ops = NULL; -+ struct clk_hw *mux_hw = NULL, *div_hw = NULL, *gate_hw = NULL; -+ struct clk_hw *fd_hw = NULL; -+ struct clk *clk; -+ struct clk_init_data init = {}; -+ struct clk_regmap_composite *composite; -+ struct clk_ops *clk_composite_ops; -+ -+ if (num_parents > 1) { -+ mux = devm_kzalloc(dev, sizeof(*mux), GFP_KERNEL); -+ if (!mux) -+ return ERR_PTR(-ENOMEM); ++ struct device_node *np = pdev->dev.of_node; ++ const struct of_device_id *match; ++ const struct clk_rk3568_inits *init_data; + -+ mux->dev = dev; -+ mux->regmap = regmap; -+ mux->reg = mux_reg; -+ mux->shift = mux_shift; -+ mux->mask = BIT(mux_width) - 1; -+ mux_ops = &clk_regmap_mux_ops; -+ mux_hw = &mux->hw; -+ } ++ match = of_match_device(clk_rk3568_match_table, &pdev->dev); ++ if (!match || !match->data) ++ return -EINVAL; + -+ if (gate_reg > 0) { -+ gate = devm_kzalloc(dev, sizeof(*gate), GFP_KERNEL); -+ if (!gate) -+ return ERR_PTR(-ENOMEM); ++ init_data = match->data; ++ if (init_data->inits) ++ init_data->inits(np); + -+ gate->dev = dev; -+ gate->regmap = regmap; -+ gate->reg = gate_reg; -+ gate->shift = gate_shift; -+ gate_ops = &clk_regmap_gate_ops; -+ gate_hw = &gate->hw; -+ } ++ return 0; ++} + -+ if (div_reg > 0) { -+ if (div_flags & CLK_DIVIDER_HIWORD_MASK) { -+ div = devm_kzalloc(dev, sizeof(*div), GFP_KERNEL); -+ if (!div) -+ return ERR_PTR(-ENOMEM); ++static struct platform_driver clk_rk3568_driver = { ++ .probe = clk_rk3568_probe, ++ .driver = { ++ .name = "clk-rk3568", ++ .of_match_table = clk_rk3568_match_table, ++ .suppress_bind_attrs = true, ++ }, ++}; ++module_platform_driver(clk_rk3568_driver); + -+ div->dev = dev; -+ div->regmap = regmap; -+ div->reg = div_reg; -+ div->shift = div_shift; -+ div->width = div_width; -+ div_ops = &clk_regmap_divider_ops; -+ div_hw = &div->hw; -+ } else { -+ fd = devm_kzalloc(dev, sizeof(*fd), GFP_KERNEL); -+ if (!fd) -+ return ERR_PTR(-ENOMEM); -+ -+ fd->dev = dev; -+ fd->regmap = regmap; -+ fd->reg = div_reg; -+ fd->mshift = 16; -+ fd->mwidth = 16; -+ fd->mmask = GENMASK(fd->mwidth - 1, 0) << fd->mshift; -+ fd->nshift = 0; -+ fd->nwidth = 16; -+ fd->nmask = GENMASK(fd->nwidth - 1, 0) << fd->nshift; -+ fd_ops = &clk_regmap_fractional_divider_ops; -+ fd_hw = &fd->hw; -+ } -+ } -+ -+ composite = devm_kzalloc(dev, sizeof(*composite), GFP_KERNEL); -+ if (!composite) -+ return ERR_PTR(-ENOMEM); -+ -+ init.name = name; -+ init.flags = flags; -+ init.parent_names = parent_names; -+ init.num_parents = num_parents; -+ -+ clk_composite_ops = &composite->ops; -+ -+ if (mux_hw && mux_ops) { -+ if (!mux_ops->get_parent) -+ return ERR_PTR(-EINVAL); -+ -+ composite->mux_hw = mux_hw; -+ composite->mux_ops = mux_ops; -+ clk_composite_ops->get_parent = -+ clk_regmap_composite_get_parent; -+ if (mux_ops->set_parent) -+ clk_composite_ops->set_parent = -+ clk_regmap_composite_set_parent; -+ if (mux_ops->determine_rate) -+ clk_composite_ops->determine_rate = -+ clk_regmap_composite_determine_rate; -+ } -+ -+ if (div_hw && div_ops) { -+ if (!div_ops->recalc_rate) -+ return ERR_PTR(-EINVAL); -+ -+ clk_composite_ops->recalc_rate = -+ clk_regmap_composite_recalc_rate; -+ -+ if (div_ops->determine_rate) -+ clk_composite_ops->determine_rate = -+ clk_regmap_composite_determine_rate; -+ else if (div_ops->round_rate) -+ clk_composite_ops->round_rate = -+ clk_regmap_composite_round_rate; -+ -+ /* .set_rate requires either .round_rate or .determine_rate */ -+ if (div_ops->set_rate) { -+ if (div_ops->determine_rate || div_ops->round_rate) -+ clk_composite_ops->set_rate = -+ clk_regmap_composite_set_rate; -+ else -+ WARN(1, "missing round_rate op\n"); -+ } -+ -+ composite->rate_hw = div_hw; -+ composite->rate_ops = div_ops; -+ } -+ -+ if (fd_hw && fd_ops) { -+ if (!fd_ops->recalc_rate) -+ return ERR_PTR(-EINVAL); -+ -+ clk_composite_ops->recalc_rate = -+ clk_regmap_composite_recalc_rate; -+ -+ if (fd_ops->determine_rate) -+ clk_composite_ops->determine_rate = -+ clk_regmap_composite_determine_rate; -+ else if (fd_ops->round_rate) -+ clk_composite_ops->round_rate = -+ clk_regmap_composite_round_rate; -+ -+ /* .set_rate requires either .round_rate or .determine_rate */ -+ if (fd_ops->set_rate) { -+ if (fd_ops->determine_rate || fd_ops->round_rate) -+ clk_composite_ops->set_rate = -+ clk_regmap_composite_set_rate; -+ else -+ WARN(1, "missing round_rate op\n"); -+ } -+ -+ composite->rate_hw = fd_hw; -+ composite->rate_ops = fd_ops; -+ } -+ -+ if (gate_hw && gate_ops) { -+ if (!gate_ops->is_prepared || !gate_ops->prepare || -+ !gate_ops->unprepare) -+ return ERR_PTR(-EINVAL); -+ -+ composite->gate_hw = gate_hw; -+ composite->gate_ops = gate_ops; -+ clk_composite_ops->is_prepared = -+ clk_regmap_composite_is_prepared; -+ clk_composite_ops->prepare = clk_regmap_composite_prepare; -+ clk_composite_ops->unprepare = clk_regmap_composite_unprepare; -+ } -+ -+ init.ops = clk_composite_ops; -+ composite->dev = dev; -+ composite->hw.init = &init; -+ -+ clk = devm_clk_register(dev, &composite->hw); -+ if (IS_ERR(clk)) -+ return clk; -+ -+ if (composite->mux_hw) -+ composite->mux_hw->clk = clk; -+ -+ if (composite->rate_hw) -+ composite->rate_hw->clk = clk; -+ -+ if (composite->gate_hw) -+ composite->gate_hw->clk = clk; -+ -+ return clk; -+} -+EXPORT_SYMBOL_GPL(devm_clk_regmap_register_composite); -diff --git a/drivers/clk/rockchip-oh/regmap/clk-regmap-divider.c b/drivers/clk/rockchip-oh/regmap/clk-regmap-divider.c ++MODULE_DESCRIPTION("Rockchip RK3568 Clock Driver"); ++MODULE_LICENSE("GPL"); ++MODULE_ALIAS("platform:clk-rk3568"); ++#endif /* MODULE */ +diff --git a/drivers/clk/rockchip-oh/clk-rk3588.c b/drivers/clk/rockchip-oh/clk-rk3588.c new file mode 100644 -index 000000000..d57f2c7f8 +index 000000000..c297e4e1e --- /dev/null -+++ b/drivers/clk/rockchip-oh/regmap/clk-regmap-divider.c -@@ -0,0 +1,117 @@ ++++ b/drivers/clk/rockchip-oh/clk-rk3588.c +@@ -0,0 +1,2528 @@ ++// SPDX-License-Identifier: GPL-2.0 +/* -+ * Copyright (c) 2017 Rockchip Electronics Co. Ltd. -+ * -+ * Base on code in drivers/clk/clk-divider.c. -+ * See clk-divider.c for further copyright information. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * Copyright (c) 2021 Rockchip Electronics Co. Ltd. ++ * Author: Elaine Zhang + */ + -+#include "clk-regmap.h" -+ -+#define div_mask(width) ((1 << (width)) - 1) ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "clk.h" + -+#define to_clk_regmap_divider(_hw) \ -+ container_of(_hw, struct clk_regmap_divider, hw) ++#define RK3588_GRF_SOC_STATUS0 0x600 ++#define RK3588_PHYREF_ALT_GATE 0xc38 ++#define RK3588_FRAC_MAX_PRATE 1500000000 ++#define RK3588_DCLK_MAX_PRATE 594000000 + -+static unsigned long -+clk_regmap_divider_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) -+{ -+ struct clk_regmap_divider *divider = to_clk_regmap_divider(hw); -+ unsigned int val, div; ++enum rk3588_plls { ++ b0pll, b1pll, lpll, v0pll, aupll, cpll, gpll, npll, ppll, ++}; + -+ regmap_read(divider->regmap, divider->reg, &val); ++static struct rockchip_pll_rate_table rk3588_pll_rates[] = { ++ /* _mhz, _p, _m, _s, _k */ ++ RK3588_PLL_RATE(2520000000, 2, 210, 0, 0), ++ RK3588_PLL_RATE(2496000000, 2, 208, 0, 0), ++ RK3588_PLL_RATE(2472000000, 2, 206, 0, 0), ++ RK3588_PLL_RATE(2448000000, 2, 204, 0, 0), ++ RK3588_PLL_RATE(2424000000, 2, 202, 0, 0), ++ RK3588_PLL_RATE(2400000000, 2, 200, 0, 0), ++ RK3588_PLL_RATE(2376000000, 2, 198, 0, 0), ++ RK3588_PLL_RATE(2352000000, 2, 196, 0, 0), ++ RK3588_PLL_RATE(2328000000, 2, 194, 0, 0), ++ RK3588_PLL_RATE(2304000000, 2, 192, 0, 0), ++ RK3588_PLL_RATE(2280000000, 2, 190, 0, 0), ++ RK3588_PLL_RATE(2256000000, 2, 376, 1, 0), ++ RK3588_PLL_RATE(2232000000, 2, 372, 1, 0), ++ RK3588_PLL_RATE(2208000000, 2, 368, 1, 0), ++ RK3588_PLL_RATE(2184000000, 2, 364, 1, 0), ++ RK3588_PLL_RATE(2160000000, 2, 360, 1, 0), ++ RK3588_PLL_RATE(2136000000, 2, 356, 1, 0), ++ RK3588_PLL_RATE(2112000000, 2, 352, 1, 0), ++ RK3588_PLL_RATE(2088000000, 2, 348, 1, 0), ++ RK3588_PLL_RATE(2064000000, 2, 344, 1, 0), ++ RK3588_PLL_RATE(2040000000, 2, 340, 1, 0), ++ RK3588_PLL_RATE(2016000000, 2, 336, 1, 0), ++ RK3588_PLL_RATE(1992000000, 2, 332, 1, 0), ++ RK3588_PLL_RATE(1968000000, 2, 328, 1, 0), ++ RK3588_PLL_RATE(1944000000, 2, 324, 1, 0), ++ RK3588_PLL_RATE(1920000000, 2, 320, 1, 0), ++ RK3588_PLL_RATE(1896000000, 2, 316, 1, 0), ++ RK3588_PLL_RATE(1872000000, 2, 312, 1, 0), ++ RK3588_PLL_RATE(1848000000, 2, 308, 1, 0), ++ RK3588_PLL_RATE(1824000000, 2, 304, 1, 0), ++ RK3588_PLL_RATE(1800000000, 2, 300, 1, 0), ++ RK3588_PLL_RATE(1776000000, 2, 296, 1, 0), ++ RK3588_PLL_RATE(1752000000, 2, 292, 1, 0), ++ RK3588_PLL_RATE(1728000000, 2, 288, 1, 0), ++ RK3588_PLL_RATE(1704000000, 2, 284, 1, 0), ++ RK3588_PLL_RATE(1680000000, 2, 280, 1, 0), ++ RK3588_PLL_RATE(1656000000, 2, 276, 1, 0), ++ RK3588_PLL_RATE(1632000000, 2, 272, 1, 0), ++ RK3588_PLL_RATE(1608000000, 2, 268, 1, 0), ++ RK3588_PLL_RATE(1584000000, 2, 264, 1, 0), ++ RK3588_PLL_RATE(1560000000, 2, 260, 1, 0), ++ RK3588_PLL_RATE(1536000000, 2, 256, 1, 0), ++ RK3588_PLL_RATE(1512000000, 2, 252, 1, 0), ++ RK3588_PLL_RATE(1488000000, 2, 248, 1, 0), ++ RK3588_PLL_RATE(1464000000, 2, 244, 1, 0), ++ RK3588_PLL_RATE(1440000000, 2, 240, 1, 0), ++ RK3588_PLL_RATE(1416000000, 2, 236, 1, 0), ++ RK3588_PLL_RATE(1392000000, 2, 232, 1, 0), ++ RK3588_PLL_RATE(1320000000, 2, 220, 1, 0), ++ RK3588_PLL_RATE(1200000000, 2, 200, 1, 0), ++ RK3588_PLL_RATE(1188000000, 2, 198, 1, 0), ++ RK3588_PLL_RATE(1100000000, 3, 550, 2, 0), ++ RK3588_PLL_RATE(1008000000, 2, 336, 2, 0), ++ RK3588_PLL_RATE(1000000000, 3, 500, 2, 0), ++ RK3588_PLL_RATE(983040000, 4, 655, 2, 23592), ++ RK3588_PLL_RATE(955520000, 3, 478, 2, 49807), ++ RK3588_PLL_RATE(903168000, 6, 903, 2, 11009), ++ RK3588_PLL_RATE(900000000, 2, 300, 2, 0), ++ RK3588_PLL_RATE(816000000, 2, 272, 2, 0), ++ RK3588_PLL_RATE(786432000, 2, 262, 2, 9437), ++ RK3588_PLL_RATE(786000000, 1, 131, 2, 0), ++ RK3588_PLL_RATE(785560000, 3, 393, 2, 51119), ++ RK3588_PLL_RATE(722534400, 8, 963, 2, 24850), ++ RK3588_PLL_RATE(600000000, 2, 200, 2, 0), ++ RK3588_PLL_RATE(594000000, 1, 99, 2, 0), ++ RK3588_PLL_RATE(408000000, 2, 272, 3, 0), ++ RK3588_PLL_RATE(312000000, 2, 208, 3, 0), ++ RK3588_PLL_RATE(216000000, 2, 288, 4, 0), ++ RK3588_PLL_RATE(96000000, 2, 256, 5, 0), ++ { /* sentinel */ }, ++}; + -+ div = val >> divider->shift; -+ div &= div_mask(divider->width); ++#define RK3588_CLK_CORE_B0_SEL_CLEAN_MASK 0x3 ++#define RK3588_CLK_CORE_B0_SEL_CLEAN_SHIFT 13 ++#define RK3588_CLK_CORE_B1_SEL_CLEAN_MASK 0x3 ++#define RK3588_CLK_CORE_B1_SEL_CLEAN_SHIFT 5 ++#define RK3588_CLK_CORE_B0_GPLL_DIV_MASK 0x1f ++#define RK3588_CLK_CORE_B0_GPLL_DIV_SHIFT 1 ++#define RK3588_CLK_CORE_L_SEL_CLEAN_MASK 0x3 ++#define RK3588_CLK_CORE_L1_SEL_CLEAN_SHIFT 12 ++#define RK3588_CLK_CORE_L0_SEL_CLEAN_SHIFT 5 ++#define RK3588_CLK_DSU_SEL_DF_MASK 0x1 ++#define RK3588_CLK_DSU_SEL_DF_SHIFT 15 ++#define RK3588_CLK_DSU_DF_SRC_MASK 0x3 ++#define RK3588_CLK_DSU_DF_SRC_SHIFT 12 ++#define RK3588_CLK_DSU_DF_DIV_MASK 0x1f ++#define RK3588_CLK_DSU_DF_DIV_SHIFT 7 ++#define RK3588_ACLKM_DSU_DIV_MASK 0x1f ++#define RK3588_ACLKM_DSU_DIV_SHIFT 1 ++#define RK3588_ACLKS_DSU_DIV_MASK 0x1f ++#define RK3588_ACLKS_DSU_DIV_SHIFT 6 ++#define RK3588_ACLKMP_DSU_DIV_MASK 0x1f ++#define RK3588_ACLKMP_DSU_DIV_SHIFT 11 ++#define RK3588_PERIPH_DSU_DIV_MASK 0x1f ++#define RK3588_PERIPH_DSU_DIV_SHIFT 0 ++#define RK3588_ATCLK_DSU_DIV_MASK 0x1f ++#define RK3588_ATCLK_DSU_DIV_SHIFT 0 ++#define RK3588_GICCLK_DSU_DIV_MASK 0x1f ++#define RK3588_GICCLK_DSU_DIV_SHIFT 5 + -+ return divider_recalc_rate(hw, parent_rate, div, NULL, -+ CLK_DIVIDER_ROUND_CLOSEST, divider->width); ++#define RK3588_CORE_B0_SEL(_apllcore) \ ++{ \ ++ .reg = RK3588_BIGCORE0_CLKSEL_CON(0), \ ++ .val = HIWORD_UPDATE(_apllcore, RK3588_CLK_CORE_B0_SEL_CLEAN_MASK, \ ++ RK3588_CLK_CORE_B0_SEL_CLEAN_SHIFT) | \ ++ HIWORD_UPDATE(0, RK3588_CLK_CORE_B0_GPLL_DIV_MASK, \ ++ RK3588_CLK_CORE_B0_GPLL_DIV_SHIFT), \ +} + -+static long -+clk_regmap_divider_round_rate(struct clk_hw *hw, unsigned long rate, -+ unsigned long *prate) -+{ -+ struct clk_regmap_divider *divider = to_clk_regmap_divider(hw); -+ -+ return divider_round_rate(hw, rate, prate, NULL, divider->width, -+ CLK_DIVIDER_ROUND_CLOSEST); ++#define RK3588_CORE_B1_SEL(_apllcore) \ ++{ \ ++ .reg = RK3588_BIGCORE0_CLKSEL_CON(1), \ ++ .val = HIWORD_UPDATE(_apllcore, RK3588_CLK_CORE_B1_SEL_CLEAN_MASK, \ ++ RK3588_CLK_CORE_B1_SEL_CLEAN_SHIFT), \ +} + -+static int div_round_closest(unsigned long parent_rate, unsigned long rate) -+{ -+ int up, down; -+ unsigned long up_rate, down_rate; -+ -+ up = DIV_ROUND_UP_ULL((u64)parent_rate, rate); -+ down = parent_rate / rate; -+ -+ up_rate = DIV_ROUND_UP_ULL((u64)parent_rate, up); -+ down_rate = DIV_ROUND_UP_ULL((u64)parent_rate, down); -+ -+ return (rate - up_rate) <= (down_rate - rate) ? up : down; ++#define RK3588_CORE_B2_SEL(_apllcore) \ ++{ \ ++ .reg = RK3588_BIGCORE1_CLKSEL_CON(0), \ ++ .val = HIWORD_UPDATE(_apllcore, RK3588_CLK_CORE_B0_SEL_CLEAN_MASK, \ ++ RK3588_CLK_CORE_B0_SEL_CLEAN_SHIFT) | \ ++ HIWORD_UPDATE(0, RK3588_CLK_CORE_B0_GPLL_DIV_MASK, \ ++ RK3588_CLK_CORE_B0_GPLL_DIV_SHIFT), \ +} + -+static int -+clk_regmap_divider_set_rate(struct clk_hw *hw, unsigned long rate, -+ unsigned long parent_rate) -+{ -+ struct clk_regmap_divider *divider = to_clk_regmap_divider(hw); -+ u32 val, div; -+ -+ div = div_round_closest(parent_rate, rate); -+ -+ dev_dbg(divider->dev, "%s: parent_rate=%ld, div=%d, rate=%ld\n", -+ clk_hw_get_name(hw), parent_rate, div, rate); -+ -+ val = div_mask(divider->width) << (divider->shift + 16); -+ val |= (div - 1) << divider->shift; -+ -+ return regmap_write(divider->regmap, divider->reg, val); ++#define RK3588_CORE_B3_SEL(_apllcore) \ ++{ \ ++ .reg = RK3588_BIGCORE1_CLKSEL_CON(1), \ ++ .val = HIWORD_UPDATE(_apllcore, RK3588_CLK_CORE_B1_SEL_CLEAN_MASK, \ ++ RK3588_CLK_CORE_B1_SEL_CLEAN_SHIFT), \ +} + -+const struct clk_ops clk_regmap_divider_ops = { -+ .recalc_rate = clk_regmap_divider_recalc_rate, -+ .round_rate = clk_regmap_divider_round_rate, -+ .set_rate = clk_regmap_divider_set_rate, -+}; -+EXPORT_SYMBOL_GPL(clk_regmap_divider_ops); -+ -+struct clk * -+devm_clk_regmap_register_divider(struct device *dev, const char *name, -+ const char *parent_name, struct regmap *regmap, -+ u32 reg, u8 shift, u8 width, -+ unsigned long flags) -+{ -+ struct clk_regmap_divider *divider; -+ struct clk_init_data init = {}; -+ -+ divider = devm_kzalloc(dev, sizeof(*divider), GFP_KERNEL); -+ if (!divider) -+ return ERR_PTR(-ENOMEM); -+ -+ init.name = name; -+ init.ops = &clk_regmap_divider_ops; -+ init.flags = flags; -+ init.parent_names = (parent_name ? &parent_name : NULL); -+ init.num_parents = (parent_name ? 1 : 0); -+ -+ divider->dev = dev; -+ divider->regmap = regmap; -+ divider->reg = reg; -+ divider->shift = shift; -+ divider->width = width; -+ divider->hw.init = &init; -+ -+ return devm_clk_register(dev, ÷r->hw); ++#define RK3588_CORE_L_SEL0(_offs, _apllcore) \ ++{ \ ++ .reg = RK3588_DSU_CLKSEL_CON(6 + _offs), \ ++ .val = HIWORD_UPDATE(_apllcore, RK3588_CLK_CORE_L_SEL_CLEAN_MASK, \ ++ RK3588_CLK_CORE_L0_SEL_CLEAN_SHIFT) | \ ++ HIWORD_UPDATE(_apllcore, RK3588_CLK_CORE_L_SEL_CLEAN_MASK, \ ++ RK3588_CLK_CORE_L1_SEL_CLEAN_SHIFT), \ +} -+EXPORT_SYMBOL_GPL(devm_clk_regmap_register_divider); -diff --git a/drivers/clk/rockchip-oh/regmap/clk-regmap-fractional-divider.c b/drivers/clk/rockchip-oh/regmap/clk-regmap-fractional-divider.c -new file mode 100644 -index 000000000..1acbc16e7 ---- /dev/null -+++ b/drivers/clk/rockchip-oh/regmap/clk-regmap-fractional-divider.c -@@ -0,0 +1,167 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (c) 2019 Rockchip Electronics Co. Ltd. -+ * -+ * Base on code in drivers/clk/clk-fractional-divider.c. -+ * See clk-fractional-divider.c for further copyright information. -+ */ -+ -+#include -+ -+#include "clk-regmap.h" -+ -+#define to_clk_regmap_fractional_divider(_hw) \ -+ container_of(_hw, struct clk_regmap_fractional_divider, hw) -+ -+static unsigned long -+clk_regmap_fractional_divider_recalc_rate(struct clk_hw *hw, -+ unsigned long parent_rate) -+{ -+ struct clk_regmap_fractional_divider *fd = -+ to_clk_regmap_fractional_divider(hw); -+ unsigned long m, n; -+ u32 val; -+ u64 ret; -+ -+ regmap_read(fd->regmap, fd->reg, &val); -+ -+ m = (val & fd->mmask) >> fd->mshift; -+ n = (val & fd->nmask) >> fd->nshift; -+ -+ if (!n || !m) -+ return parent_rate; + -+ ret = (u64)parent_rate * m; -+ do_div(ret, n); -+ -+ return ret; ++#define RK3588_CORE_L_SEL1(_seldsu, _divdsu) \ ++{ \ ++ .reg = RK3588_DSU_CLKSEL_CON(0), \ ++ .val = HIWORD_UPDATE(_seldsu, RK3588_CLK_DSU_DF_SRC_MASK, \ ++ RK3588_CLK_DSU_DF_SRC_SHIFT) | \ ++ HIWORD_UPDATE(_divdsu - 1, RK3588_CLK_DSU_DF_DIV_MASK, \ ++ RK3588_CLK_DSU_DF_DIV_SHIFT), \ +} + -+static void clk_regmap_fractional_divider_approximation(struct clk_hw *hw, -+ unsigned long rate, unsigned long *parent_rate, -+ unsigned long *m, unsigned long *n) -+{ -+ struct clk_regmap_fractional_divider *fd = -+ to_clk_regmap_fractional_divider(hw); -+ unsigned long p_rate, p_parent_rate; -+ struct clk_hw *p_parent; -+ unsigned long scale; -+ -+ if (!rate) { -+ *m = 0; -+ *n = 1; -+ -+ dev_dbg(fd->dev, "%s rate:(%ld) maybe invalid frequency setting!\n", -+ clk_hw_get_name(hw), rate); -+ -+ return; -+ } -+ -+ p_rate = clk_hw_get_rate(clk_hw_get_parent(hw)); -+ if ((rate * 20 > p_rate) && (p_rate % rate != 0)) { -+ p_parent = clk_hw_get_parent(clk_hw_get_parent(hw)); -+ p_parent_rate = clk_hw_get_rate(p_parent); -+ *parent_rate = p_parent_rate; -+ } -+ -+ /* -+ * Get rate closer to *parent_rate to guarantee there is no overflow -+ * for m and n. In the result it will be the nearest rate left shifted -+ * by (scale - fd->nwidth) bits. -+ */ -+ scale = fls_long(*parent_rate / rate - 1); -+ if (scale > fd->nwidth) -+ rate <<= scale - fd->nwidth; -+ -+ rational_best_approximation(rate, *parent_rate, -+ GENMASK(fd->mwidth - 1, 0), -+ GENMASK(fd->nwidth - 1, 0), -+ m, n); ++#define RK3588_CORE_L_SEL2(_aclkm, _aclkmp, _aclks) \ ++{ \ ++ .reg = RK3588_DSU_CLKSEL_CON(1), \ ++ .val = HIWORD_UPDATE(_aclkm - 1, RK3588_ACLKM_DSU_DIV_MASK, \ ++ RK3588_ACLKM_DSU_DIV_SHIFT) | \ ++ HIWORD_UPDATE(_aclkmp - 1, RK3588_ACLKMP_DSU_DIV_MASK, \ ++ RK3588_ACLKMP_DSU_DIV_SHIFT) | \ ++ HIWORD_UPDATE(_aclks - 1, RK3588_ACLKS_DSU_DIV_MASK, \ ++ RK3588_ACLKS_DSU_DIV_SHIFT), \ +} + -+static long -+clk_regmap_fractional_divider_round_rate(struct clk_hw *hw, unsigned long rate, -+ unsigned long *parent_rate) -+{ -+ unsigned long m, n; -+ u64 ret; -+ -+ if (!rate) -+ return *parent_rate; -+ -+ if (rate >= *parent_rate) -+ return *parent_rate; -+ -+ clk_regmap_fractional_divider_approximation(hw, rate, parent_rate, -+ &m, &n); -+ -+ ret = (u64)*parent_rate * m; -+ do_div(ret, n); -+ -+ return ret; ++#define RK3588_CORE_L_SEL3(_periph) \ ++{ \ ++ .reg = RK3588_DSU_CLKSEL_CON(2), \ ++ .val = HIWORD_UPDATE(_periph - 1, RK3588_PERIPH_DSU_DIV_MASK, \ ++ RK3588_PERIPH_DSU_DIV_SHIFT), \ +} + -+static int -+clk_regmap_fractional_divider_set_rate(struct clk_hw *hw, unsigned long rate, -+ unsigned long parent_rate) -+{ -+ struct clk_regmap_fractional_divider *fd = -+ to_clk_regmap_fractional_divider(hw); -+ unsigned long m, n; -+ u32 val; -+ -+ rational_best_approximation(rate, parent_rate, -+ GENMASK(fd->mwidth - 1, 0), GENMASK(fd->nwidth - 1, 0), -+ &m, &n); -+ -+ dev_dbg(fd->dev, "%s: parent_rate=%ld, m=%ld, n=%ld, rate=%ld\n", -+ clk_hw_get_name(hw), parent_rate, m, n, rate); -+ -+ regmap_read(fd->regmap, fd->reg, &val); -+ val &= ~(fd->mmask | fd->nmask); -+ val |= (m << fd->mshift) | (n << fd->nshift); -+ -+ return regmap_write(fd->regmap, fd->reg, val); ++#define RK3588_CORE_L_SEL4(_gicclk, _atclk) \ ++{ \ ++ .reg = RK3588_DSU_CLKSEL_CON(3), \ ++ .val = HIWORD_UPDATE(_gicclk - 1, RK3588_GICCLK_DSU_DIV_MASK, \ ++ RK3588_GICCLK_DSU_DIV_SHIFT) | \ ++ HIWORD_UPDATE(_atclk - 1, RK3588_ATCLK_DSU_DIV_MASK, \ ++ RK3588_ATCLK_DSU_DIV_SHIFT), \ +} + -+const struct clk_ops clk_regmap_fractional_divider_ops = { -+ .recalc_rate = clk_regmap_fractional_divider_recalc_rate, -+ .round_rate = clk_regmap_fractional_divider_round_rate, -+ .set_rate = clk_regmap_fractional_divider_set_rate, -+}; -+EXPORT_SYMBOL_GPL(clk_regmap_fractional_divider_ops); -+ -+struct clk * -+devm_clk_regmap_register_fractional_divider(struct device *dev, -+ const char *name, -+ const char *parent_name, -+ struct regmap *regmap, -+ u32 reg, unsigned long flags) -+{ -+ struct clk_regmap_fractional_divider *fd; -+ struct clk_init_data init; -+ -+ fd = devm_kzalloc(dev, sizeof(*fd), GFP_KERNEL); -+ if (!fd) -+ return ERR_PTR(-ENOMEM); -+ -+ init.name = name; -+ init.ops = &clk_regmap_fractional_divider_ops; -+ init.flags = flags; -+ init.parent_names = (parent_name ? &parent_name : NULL); -+ init.num_parents = (parent_name ? 1 : 0); -+ -+ fd->dev = dev; -+ fd->regmap = regmap; -+ fd->reg = reg; -+ fd->mshift = 16; -+ fd->mwidth = 16; -+ fd->mmask = GENMASK(fd->mwidth - 1, 0) << fd->mshift; -+ fd->nshift = 0; -+ fd->nwidth = 16; -+ fd->nmask = GENMASK(fd->nwidth - 1, 0) << fd->nshift; -+ fd->hw.init = &init; -+ -+ return devm_clk_register(dev, &fd->hw); ++#define RK3588_CPUB01CLK_RATE(_prate, _apllcore) \ ++{ \ ++ .prate = _prate##U, \ ++ .pre_muxs = { \ ++ RK3588_CORE_B0_SEL(0), \ ++ RK3588_CORE_B1_SEL(0), \ ++ }, \ ++ .post_muxs = { \ ++ RK3588_CORE_B0_SEL(_apllcore), \ ++ RK3588_CORE_B1_SEL(_apllcore), \ ++ }, \ +} -+EXPORT_SYMBOL_GPL(devm_clk_regmap_register_fractional_divider); -diff --git a/drivers/clk/rockchip-oh/regmap/clk-regmap-gate.c b/drivers/clk/rockchip-oh/regmap/clk-regmap-gate.c -new file mode 100644 -index 000000000..36549b912 ---- /dev/null -+++ b/drivers/clk/rockchip-oh/regmap/clk-regmap-gate.c -@@ -0,0 +1,82 @@ -+/* -+ * Copyright (c) 2017 Rockchip Electronics Co. Ltd. -+ * -+ * Base on code in drivers/clk/clk-gate.c. -+ * See clk-gate.c for further copyright information. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include "clk-regmap.h" + -+#define to_clk_regmap_gate(_hw) container_of(_hw, struct clk_regmap_gate, hw) -+ -+static int clk_regmap_gate_prepare(struct clk_hw *hw) -+{ -+ struct clk_regmap_gate *gate = to_clk_regmap_gate(hw); -+ -+ return regmap_write(gate->regmap, gate->reg, -+ 0 | BIT(gate->shift + 16)); ++#define RK3588_CPUB23CLK_RATE(_prate, _apllcore) \ ++{ \ ++ .prate = _prate##U, \ ++ .pre_muxs = { \ ++ RK3588_CORE_B2_SEL(0), \ ++ RK3588_CORE_B3_SEL(0), \ ++ }, \ ++ .post_muxs = { \ ++ RK3588_CORE_B2_SEL(_apllcore), \ ++ RK3588_CORE_B3_SEL(_apllcore), \ ++ }, \ +} + -+static void clk_regmap_gate_unprepare(struct clk_hw *hw) -+{ -+ struct clk_regmap_gate *gate = to_clk_regmap_gate(hw); -+ -+ regmap_write(gate->regmap, gate->reg, -+ BIT(gate->shift) | BIT(gate->shift + 16)); ++#define RK3588_CPULCLK_RATE(_prate, _apllcore, _seldsu, _divdsu) \ ++{ \ ++ .prate = _prate##U, \ ++ .pre_muxs = { \ ++ RK3588_CORE_L_SEL0(0, 0), \ ++ RK3588_CORE_L_SEL0(1, 0), \ ++ RK3588_CORE_L_SEL1(3, 2), \ ++ RK3588_CORE_L_SEL2(2, 3, 3), \ ++ RK3588_CORE_L_SEL3(4), \ ++ RK3588_CORE_L_SEL4(4, 4), \ ++ }, \ ++ .post_muxs = { \ ++ RK3588_CORE_L_SEL0(0, _apllcore), \ ++ RK3588_CORE_L_SEL0(1, _apllcore), \ ++ RK3588_CORE_L_SEL1(_seldsu, _divdsu), \ ++ }, \ +} + -+static int clk_regmap_gate_is_prepared(struct clk_hw *hw) -+{ -+ struct clk_regmap_gate *gate = to_clk_regmap_gate(hw); -+ u32 val; -+ -+ regmap_read(gate->regmap, gate->reg, &val); -+ -+ return !(val & BIT(gate->shift)); -+} ++static struct rockchip_cpuclk_rate_table rk3588_cpub0clk_rates[] __initdata = { ++ RK3588_CPUB01CLK_RATE(2496000000, 1), ++ RK3588_CPUB01CLK_RATE(2400000000, 1), ++ RK3588_CPUB01CLK_RATE(2304000000, 1), ++ RK3588_CPUB01CLK_RATE(2208000000, 1), ++ RK3588_CPUB01CLK_RATE(2184000000, 1), ++ RK3588_CPUB01CLK_RATE(2088000000, 1), ++ RK3588_CPUB01CLK_RATE(2040000000, 1), ++ RK3588_CPUB01CLK_RATE(2016000000, 1), ++ RK3588_CPUB01CLK_RATE(1992000000, 1), ++ RK3588_CPUB01CLK_RATE(1896000000, 1), ++ RK3588_CPUB01CLK_RATE(1800000000, 1), ++ RK3588_CPUB01CLK_RATE(1704000000, 0), ++ RK3588_CPUB01CLK_RATE(1608000000, 0), ++ RK3588_CPUB01CLK_RATE(1584000000, 0), ++ RK3588_CPUB01CLK_RATE(1560000000, 0), ++ RK3588_CPUB01CLK_RATE(1536000000, 0), ++ RK3588_CPUB01CLK_RATE(1512000000, 0), ++ RK3588_CPUB01CLK_RATE(1488000000, 0), ++ RK3588_CPUB01CLK_RATE(1464000000, 0), ++ RK3588_CPUB01CLK_RATE(1440000000, 0), ++ RK3588_CPUB01CLK_RATE(1416000000, 0), ++ RK3588_CPUB01CLK_RATE(1392000000, 0), ++ RK3588_CPUB01CLK_RATE(1368000000, 0), ++ RK3588_CPUB01CLK_RATE(1344000000, 0), ++ RK3588_CPUB01CLK_RATE(1320000000, 0), ++ RK3588_CPUB01CLK_RATE(1296000000, 0), ++ RK3588_CPUB01CLK_RATE(1272000000, 0), ++ RK3588_CPUB01CLK_RATE(1248000000, 0), ++ RK3588_CPUB01CLK_RATE(1224000000, 0), ++ RK3588_CPUB01CLK_RATE(1200000000, 0), ++ RK3588_CPUB01CLK_RATE(1104000000, 0), ++ RK3588_CPUB01CLK_RATE(1008000000, 0), ++ RK3588_CPUB01CLK_RATE(912000000, 0), ++ RK3588_CPUB01CLK_RATE(816000000, 0), ++ RK3588_CPUB01CLK_RATE(696000000, 0), ++ RK3588_CPUB01CLK_RATE(600000000, 0), ++ RK3588_CPUB01CLK_RATE(408000000, 0), ++ RK3588_CPUB01CLK_RATE(312000000, 0), ++ RK3588_CPUB01CLK_RATE(216000000, 0), ++ RK3588_CPUB01CLK_RATE(96000000, 0), ++}; + -+const struct clk_ops clk_regmap_gate_ops = { -+ .prepare = clk_regmap_gate_prepare, -+ .unprepare = clk_regmap_gate_unprepare, -+ .is_prepared = clk_regmap_gate_is_prepared, ++static const struct rockchip_cpuclk_reg_data rk3588_cpub0clk_data = { ++ .core_reg[0] = RK3588_BIGCORE0_CLKSEL_CON(0), ++ .div_core_shift[0] = 8, ++ .div_core_mask[0] = 0x1f, ++ .core_reg[1] = RK3588_BIGCORE0_CLKSEL_CON(1), ++ .div_core_shift[1] = 0, ++ .div_core_mask[1] = 0x1f, ++ .num_cores = 2, ++ .mux_core_alt = 1, ++ .mux_core_main = 2, ++ .mux_core_shift = 6, ++ .mux_core_mask = 0x3, +}; -+EXPORT_SYMBOL_GPL(clk_regmap_gate_ops); + -+struct clk * -+devm_clk_regmap_register_gate(struct device *dev, const char *name, -+ const char *parent_name, -+ struct regmap *regmap, u32 reg, u8 shift, -+ unsigned long flags) -+{ -+ struct clk_regmap_gate *gate; -+ struct clk_init_data init = {}; ++static struct rockchip_cpuclk_rate_table rk3588_cpub1clk_rates[] __initdata = { ++ RK3588_CPUB23CLK_RATE(2496000000, 1), ++ RK3588_CPUB23CLK_RATE(2400000000, 1), ++ RK3588_CPUB23CLK_RATE(2304000000, 1), ++ RK3588_CPUB23CLK_RATE(2208000000, 1), ++ RK3588_CPUB23CLK_RATE(2184000000, 1), ++ RK3588_CPUB23CLK_RATE(2088000000, 1), ++ RK3588_CPUB23CLK_RATE(2040000000, 1), ++ RK3588_CPUB23CLK_RATE(2016000000, 1), ++ RK3588_CPUB23CLK_RATE(1992000000, 1), ++ RK3588_CPUB23CLK_RATE(1896000000, 1), ++ RK3588_CPUB23CLK_RATE(1800000000, 1), ++ RK3588_CPUB23CLK_RATE(1704000000, 0), ++ RK3588_CPUB23CLK_RATE(1608000000, 0), ++ RK3588_CPUB23CLK_RATE(1584000000, 0), ++ RK3588_CPUB23CLK_RATE(1560000000, 0), ++ RK3588_CPUB23CLK_RATE(1536000000, 0), ++ RK3588_CPUB23CLK_RATE(1512000000, 0), ++ RK3588_CPUB23CLK_RATE(1488000000, 0), ++ RK3588_CPUB23CLK_RATE(1464000000, 0), ++ RK3588_CPUB23CLK_RATE(1440000000, 0), ++ RK3588_CPUB23CLK_RATE(1416000000, 0), ++ RK3588_CPUB23CLK_RATE(1392000000, 0), ++ RK3588_CPUB23CLK_RATE(1368000000, 0), ++ RK3588_CPUB23CLK_RATE(1344000000, 0), ++ RK3588_CPUB23CLK_RATE(1320000000, 0), ++ RK3588_CPUB23CLK_RATE(1296000000, 0), ++ RK3588_CPUB23CLK_RATE(1272000000, 0), ++ RK3588_CPUB23CLK_RATE(1248000000, 0), ++ RK3588_CPUB23CLK_RATE(1224000000, 0), ++ RK3588_CPUB23CLK_RATE(1200000000, 0), ++ RK3588_CPUB23CLK_RATE(1104000000, 0), ++ RK3588_CPUB23CLK_RATE(1008000000, 0), ++ RK3588_CPUB23CLK_RATE(912000000, 0), ++ RK3588_CPUB23CLK_RATE(816000000, 0), ++ RK3588_CPUB23CLK_RATE(696000000, 0), ++ RK3588_CPUB23CLK_RATE(600000000, 0), ++ RK3588_CPUB23CLK_RATE(408000000, 0), ++ RK3588_CPUB23CLK_RATE(312000000, 0), ++ RK3588_CPUB23CLK_RATE(216000000, 0), ++ RK3588_CPUB23CLK_RATE(96000000, 0), ++}; + -+ gate = devm_kzalloc(dev, sizeof(*gate), GFP_KERNEL); -+ if (!gate) -+ return ERR_PTR(-ENOMEM); ++static const struct rockchip_cpuclk_reg_data rk3588_cpub1clk_data = { ++ .core_reg[0] = RK3588_BIGCORE1_CLKSEL_CON(0), ++ .div_core_shift[0] = 8, ++ .div_core_mask[0] = 0x1f, ++ .core_reg[1] = RK3588_BIGCORE1_CLKSEL_CON(1), ++ .div_core_shift[1] = 0, ++ .div_core_mask[1] = 0x1f, ++ .num_cores = 2, ++ .mux_core_alt = 1, ++ .mux_core_main = 2, ++ .mux_core_shift = 6, ++ .mux_core_mask = 0x3, ++}; + -+ init.name = name; -+ init.ops = &clk_regmap_gate_ops; -+ init.flags = flags; -+ init.parent_names = (parent_name ? &parent_name : NULL); -+ init.num_parents = (parent_name ? 1 : 0); ++static struct rockchip_cpuclk_rate_table rk3588_cpulclk_rates[] __initdata = { ++ RK3588_CPULCLK_RATE(2208000000, 1, 3, 1), ++ RK3588_CPULCLK_RATE(2184000000, 1, 3, 1), ++ RK3588_CPULCLK_RATE(2088000000, 1, 3, 1), ++ RK3588_CPULCLK_RATE(2040000000, 1, 3, 1), ++ RK3588_CPULCLK_RATE(2016000000, 1, 3, 1), ++ RK3588_CPULCLK_RATE(1992000000, 1, 3, 1), ++ RK3588_CPULCLK_RATE(1896000000, 1, 3, 1), ++ RK3588_CPULCLK_RATE(1800000000, 1, 3, 1), ++ RK3588_CPULCLK_RATE(1704000000, 0, 3, 1), ++ RK3588_CPULCLK_RATE(1608000000, 0, 3, 1), ++ RK3588_CPULCLK_RATE(1584000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(1560000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(1536000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(1512000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(1488000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(1464000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(1440000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(1416000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(1392000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(1368000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(1344000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(1320000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(1296000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(1272000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(1248000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(1224000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(1200000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(1104000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(1008000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(912000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(816000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(696000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(600000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(408000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(312000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(216000000, 0, 2, 1), ++ RK3588_CPULCLK_RATE(96000000, 0, 2, 1), ++}; + -+ gate->dev = dev; -+ gate->regmap = regmap; -+ gate->reg = reg; -+ gate->shift = shift; -+ gate->hw.init = &init; ++static const struct rockchip_cpuclk_reg_data rk3588_cpulclk_data = { ++ .core_reg[0] = RK3588_DSU_CLKSEL_CON(6), ++ .div_core_shift[0] = 0, ++ .div_core_mask[0] = 0x1f, ++ .core_reg[1] = RK3588_DSU_CLKSEL_CON(6), ++ .div_core_shift[1] = 7, ++ .div_core_mask[1] = 0x1f, ++ .core_reg[2] = RK3588_DSU_CLKSEL_CON(7), ++ .div_core_shift[2] = 0, ++ .div_core_mask[2] = 0x1f, ++ .core_reg[3] = RK3588_DSU_CLKSEL_CON(7), ++ .div_core_shift[3] = 7, ++ .div_core_mask[3] = 0x1f, ++ .num_cores = 4, ++ .mux_core_reg = RK3588_DSU_CLKSEL_CON(5), ++ .mux_core_alt = 1, ++ .mux_core_main = 2, ++ .mux_core_shift = 14, ++ .mux_core_mask = 0x3, ++}; + -+ return devm_clk_register(dev, &gate->hw); -+} -+EXPORT_SYMBOL_GPL(devm_clk_regmap_register_gate); -diff --git a/drivers/clk/rockchip-oh/regmap/clk-regmap-mux.c b/drivers/clk/rockchip-oh/regmap/clk-regmap-mux.c -new file mode 100644 -index 000000000..eb37b5f95 ---- /dev/null -+++ b/drivers/clk/rockchip-oh/regmap/clk-regmap-mux.c -@@ -0,0 +1,81 @@ -+/* -+ * Copyright (c) 2017 Rockchip Electronics Co. Ltd. -+ * -+ * Base on code in drivers/clk/clk-mux.c. -+ * See clk-mux.c for further copyright information. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ ++PNAME(mux_pll_p) = { "xin24m", "xin32k" }; ++PNAME(mux_armclkl_p) = { "xin24m", "gpll", "lpll" }; ++PNAME(mux_armclkb01_p) = { "xin24m", "gpll", "b0pll",}; ++PNAME(mux_armclkb23_p) = { "xin24m", "gpll", "b1pll",}; ++PNAME(b0pll_b1pll_lpll_gpll_p) = { "b0pll", "b1pll", "lpll", "gpll" }; ++PNAME(gpll_24m_p) = { "gpll", "xin24m" }; ++PNAME(gpll_aupll_p) = { "gpll", "aupll" }; ++PNAME(gpll_lpll_p) = { "gpll", "lpll" }; ++PNAME(gpll_cpll_p) = { "gpll", "cpll" }; ++PNAME(gpll_spll_p) = { "gpll", "spll" }; ++PNAME(gpll_cpll_24m_p) = { "gpll", "cpll", "xin24m"}; ++PNAME(gpll_cpll_aupll_p) = { "gpll", "cpll", "aupll"}; ++PNAME(gpll_cpll_npll_p) = { "gpll", "cpll", "npll"}; ++PNAME(gpll_cpll_npll_v0pll_p) = { "gpll", "cpll", "npll", "v0pll"}; ++PNAME(gpll_cpll_24m_spll_p) = { "gpll", "cpll", "xin24m", "spll" }; ++PNAME(gpll_cpll_aupll_spll_p) = { "gpll", "cpll", "aupll", "spll" }; ++PNAME(gpll_cpll_aupll_npll_p) = { "gpll", "cpll", "aupll", "npll" }; ++PNAME(gpll_cpll_v0pll_aupll_p) = { "gpll", "cpll", "v0pll", "aupll" }; ++PNAME(gpll_cpll_v0pll_spll_p) = { "gpll", "cpll", "v0pll", "spll" }; ++PNAME(gpll_cpll_aupll_npll_spll_p) = { "gpll", "cpll", "aupll", "npll", "spll" }; ++PNAME(gpll_cpll_dmyaupll_npll_spll_p) = { "gpll", "cpll", "dummy_aupll", "npll", "spll" }; ++PNAME(gpll_cpll_npll_aupll_spll_p) = { "gpll", "cpll", "npll", "aupll", "spll" }; ++PNAME(gpll_cpll_npll_1000m_p) = { "gpll", "cpll", "npll", "clk_1000m_src" }; ++PNAME(mux_24m_spll_gpll_cpll_p) = { "xin24m", "spll", "gpll", "cpll" }; ++PNAME(mux_24m_32k_p) = { "xin24m", "xin32k" }; ++PNAME(mux_24m_100m_p) = { "xin24m", "clk_100m_src" }; ++PNAME(mux_200m_100m_p) = { "clk_200m_src", "clk_100m_src" }; ++PNAME(mux_100m_50m_24m_p) = { "clk_100m_src", "clk_50m_src", "xin24m" }; ++PNAME(mux_150m_50m_24m_p) = { "clk_150m_src", "clk_50m_src", "xin24m" }; ++PNAME(mux_150m_100m_24m_p) = { "clk_150m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_200m_150m_24m_p) = { "clk_200m_src", "clk_150m_src", "xin24m" }; ++PNAME(mux_150m_100m_50m_24m_p) = { "clk_150m_src", "clk_100m_src", "clk_50m_src", "xin24m" }; ++PNAME(mux_200m_100m_50m_24m_p) = { "clk_200m_src", "clk_100m_src", "clk_50m_src", "xin24m" }; ++PNAME(mux_300m_200m_100m_24m_p) = { "clk_300m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_700m_400m_200m_24m_p) = { "clk_700m_src", "clk_400m_src", "clk_200m_src", "xin24m" }; ++PNAME(mux_500m_250m_100m_24m_p) = { "clk_500m_src", "clk_250m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_500m_300m_100m_24m_p) = { "clk_500m_src", "clk_300m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_400m_200m_100m_24m_p) = {"clk_400m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; ++PNAME(clk_i2s2_2ch_p) = { "clk_i2s2_2ch_src", "clk_i2s2_2ch_frac", "i2s2_mclkin", "xin12m" }; ++PNAME(i2s2_2ch_mclkout_p) = { "mclk_i2s2_2ch", "xin12m" }; ++PNAME(clk_i2s3_2ch_p) = { "clk_i2s3_2ch_src", "clk_i2s3_2ch_frac", "i2s3_mclkin", "xin12m" }; ++PNAME(i2s3_2ch_mclkout_p) = { "mclk_i2s3_2ch", "xin12m" }; ++PNAME(clk_i2s0_8ch_tx_p) = { "clk_i2s0_8ch_tx_src", "clk_i2s0_8ch_tx_frac", "i2s0_mclkin", "xin12m" }; ++PNAME(clk_i2s0_8ch_rx_p) = { "clk_i2s0_8ch_rx_src", "clk_i2s0_8ch_rx_frac", "i2s0_mclkin", "xin12m" }; ++PNAME(i2s0_8ch_mclkout_p) = { "mclk_i2s0_8ch_tx", "mclk_i2s0_8ch_rx", "xin12m" }; ++PNAME(clk_i2s1_8ch_tx_p) = { "clk_i2s1_8ch_tx_src", "clk_i2s1_8ch_tx_frac", "i2s1_mclkin", "xin12m" }; ++PNAME(clk_i2s1_8ch_rx_p) = { "clk_i2s1_8ch_rx_src", "clk_i2s1_8ch_rx_frac", "i2s1_mclkin", "xin12m" }; ++PNAME(i2s1_8ch_mclkout_p) = { "mclk_i2s1_8ch_tx", "mclk_i2s1_8ch_rx", "xin12m" }; ++PNAME(clk_i2s4_8ch_tx_p) = { "clk_i2s4_8ch_tx_src", "clk_i2s4_8ch_tx_frac", "i2s4_mclkin", "xin12m" }; ++PNAME(clk_i2s5_8ch_tx_p) = { "clk_i2s5_8ch_tx_src", "clk_i2s5_8ch_tx_frac", "i2s5_mclkin", "xin12m" }; ++PNAME(clk_i2s6_8ch_tx_p) = { "clk_i2s6_8ch_tx_src", "clk_i2s6_8ch_tx_frac", "i2s6_mclkin", "xin12m" }; ++PNAME(clk_i2s6_8ch_rx_p) = { "clk_i2s6_8ch_rx_src", "clk_i2s6_8ch_rx_frac", "i2s6_mclkin", "xin12m" }; ++PNAME(i2s6_8ch_mclkout_p) = { "mclk_i2s6_8ch_tx", "mclk_i2s6_8ch_rx", "xin12m" }; ++PNAME(clk_i2s7_8ch_rx_p) = { "clk_i2s7_8ch_rx_src", "clk_i2s7_8ch_rx_frac", "i2s7_mclkin", "xin12m" }; ++PNAME(clk_i2s8_8ch_tx_p) = { "clk_i2s8_8ch_tx_src", "clk_i2s8_8ch_tx_frac", "i2s8_mclkin", "xin12m" }; ++PNAME(clk_i2s9_8ch_rx_p) = { "clk_i2s9_8ch_rx_src", "clk_i2s9_8ch_rx_frac", "i2s9_mclkin", "xin12m" }; ++PNAME(clk_i2s10_8ch_rx_p) = { "clk_i2s10_8ch_rx_src", "clk_i2s10_8ch_rx_frac", "i2s10_mclkin", "xin12m" }; ++PNAME(clk_spdif0_p) = { "clk_spdif0_src", "clk_spdif0_frac", "xin12m" }; ++PNAME(clk_spdif1_p) = { "clk_spdif1_src", "clk_spdif1_frac", "xin12m" }; ++PNAME(clk_spdif2_dp0_p) = { "clk_spdif2_dp0_src", "clk_spdif2_dp0_frac", "xin12m" }; ++PNAME(clk_spdif3_p) = { "clk_spdif3_src", "clk_spdif3_frac", "xin12m" }; ++PNAME(clk_spdif4_p) = { "clk_spdif4_src", "clk_spdif4_frac", "xin12m" }; ++PNAME(clk_spdif5_dp1_p) = { "clk_spdif5_dp1_src", "clk_spdif5_dp1_frac", "xin12m" }; ++PNAME(clk_uart0_p) = { "clk_uart0_src", "clk_uart0_frac", "xin24m" }; ++PNAME(clk_uart1_p) = { "clk_uart1_src", "clk_uart1_frac", "xin24m" }; ++PNAME(clk_uart2_p) = { "clk_uart2_src", "clk_uart2_frac", "xin24m" }; ++PNAME(clk_uart3_p) = { "clk_uart3_src", "clk_uart3_frac", "xin24m" }; ++PNAME(clk_uart4_p) = { "clk_uart4_src", "clk_uart4_frac", "xin24m" }; ++PNAME(clk_uart5_p) = { "clk_uart5_src", "clk_uart5_frac", "xin24m" }; ++PNAME(clk_uart6_p) = { "clk_uart6_src", "clk_uart6_frac", "xin24m" }; ++PNAME(clk_uart7_p) = { "clk_uart7_src", "clk_uart7_frac", "xin24m" }; ++PNAME(clk_uart8_p) = { "clk_uart8_src", "clk_uart8_frac", "xin24m" }; ++PNAME(clk_uart9_p) = { "clk_uart9_src", "clk_uart9_frac", "xin24m" }; ++PNAME(clk_gmac0_ptp_ref_p) = { "cpll", "clk_gmac0_ptpref_io" }; ++PNAME(clk_gmac1_ptp_ref_p) = { "cpll", "clk_gmac1_ptpref_io" }; ++PNAME(clk_hdmirx_aud_p) = { "clk_hdmirx_aud_src", "clk_hdmirx_aud_frac" }; ++PNAME(aclk_hdcp1_root_p) = { "gpll", "cpll", "clk_hdmitrx_refsrc" }; ++PNAME(aclk_vop_sub_src_p) = { "aclk_vop_root", "aclk_vop_div2_src" }; ++PNAME(dclk_vop0_p) = { "dclk_vop0_src", "clk_hdmiphy_pixel0", "clk_hdmiphy_pixel1" }; ++PNAME(dclk_vop1_p) = { "dclk_vop1_src", "clk_hdmiphy_pixel0", "clk_hdmiphy_pixel1" }; ++PNAME(dclk_vop2_p) = { "dclk_vop2_src", "clk_hdmiphy_pixel0", "clk_hdmiphy_pixel1" }; ++PNAME(pmu_200m_100m_p) = { "clk_pmu1_200m_src", "clk_pmu1_100m_src" }; ++PNAME(pmu_300m_24m_p) = { "clk_300m_src", "xin24m" }; ++PNAME(pmu_400m_24m_p) = { "clk_400m_src", "xin24m" }; ++PNAME(pmu_100m_50m_24m_src_p) = { "clk_pmu1_100m_src", "clk_pmu1_50m_src", "xin24m" }; ++PNAME(pmu_24m_32k_100m_src_p) = { "xin24m", "32k", "clk_pmu1_100m_src" }; ++PNAME(hclk_pmu1_root_p) = { "clk_pmu1_200m_src", "clk_pmu1_100m_src", "clk_pmu1_50m_src", "xin24m" }; ++PNAME(hclk_pmu_cm0_root_p) = { "clk_pmu1_400m_src", "clk_pmu1_200m_src", "clk_pmu1_100m_src", "xin24m" }; ++PNAME(mclk_pdm0_p) = { "clk_pmu1_300m_src", "clk_pmu1_200m_src" }; ++PNAME(mux_24m_ppll_spll_p) = { "xin24m", "ppll", "spll" }; ++PNAME(mux_24m_ppll_p) = { "xin24m", "ppll" }; ++PNAME(clk_ref_pipe_phy0_p) = { "clk_ref_pipe_phy0_osc_src", "clk_ref_pipe_phy0_pll_src" }; ++PNAME(clk_ref_pipe_phy1_p) = { "clk_ref_pipe_phy1_osc_src", "clk_ref_pipe_phy1_pll_src" }; ++PNAME(clk_ref_pipe_phy2_p) = { "clk_ref_pipe_phy2_osc_src", "clk_ref_pipe_phy2_pll_src" }; + -+#include "clk-regmap.h" ++#define MFLAGS CLK_MUX_HIWORD_MASK ++#define DFLAGS CLK_DIVIDER_HIWORD_MASK ++#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) + -+#define to_clk_regmap_mux(_hw) container_of(_hw, struct clk_regmap_mux, hw) ++static struct rockchip_clk_branch rk3588_i2s0_8ch_tx_fracmux __initdata = ++ MUX(CLK_I2S0_8CH_TX, "clk_i2s0_8ch_tx", clk_i2s0_8ch_tx_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(26), 0, 2, MFLAGS); + -+static u8 clk_regmap_mux_get_parent(struct clk_hw *hw) -+{ -+ struct clk_regmap_mux *mux = to_clk_regmap_mux(hw); -+ u8 index; -+ u32 val; ++static struct rockchip_clk_branch rk3588_i2s0_8ch_rx_fracmux __initdata = ++ MUX(CLK_I2S0_8CH_RX, "clk_i2s0_8ch_rx", clk_i2s0_8ch_rx_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(28), 0, 2, MFLAGS); + -+ regmap_read(mux->regmap, mux->reg, &val); ++static struct rockchip_clk_branch rk3588_i2s1_8ch_tx_fracmux __initdata = ++ MUX(CLK_I2S1_8CH_TX, "clk_i2s1_8ch_tx", clk_i2s1_8ch_tx_p, CLK_SET_RATE_PARENT, ++ RK3588_PMU_CLKSEL_CON(7), 0, 2, MFLAGS); + -+ index = val >> mux->shift; -+ index &= mux->mask; ++static struct rockchip_clk_branch rk3588_i2s1_8ch_rx_fracmux __initdata = ++ MUX(CLK_I2S1_8CH_RX, "clk_i2s1_8ch_rx", clk_i2s1_8ch_rx_p, CLK_SET_RATE_PARENT, ++ RK3588_PMU_CLKSEL_CON(9), 0, 2, MFLAGS); + -+ return index; -+} ++static struct rockchip_clk_branch rk3588_i2s2_2ch_fracmux __initdata = ++ MUX(CLK_I2S2_2CH, "clk_i2s2_2ch", clk_i2s2_2ch_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(30), 0, 2, MFLAGS); + -+static int clk_regmap_mux_set_parent(struct clk_hw *hw, u8 index) -+{ -+ struct clk_regmap_mux *mux = to_clk_regmap_mux(hw); ++static struct rockchip_clk_branch rk3588_i2s3_2ch_fracmux __initdata = ++ MUX(CLK_I2S3_2CH, "clk_i2s3_2ch", clk_i2s3_2ch_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(32), 0, 2, MFLAGS); + -+ return regmap_write(mux->regmap, mux->reg, (index << mux->shift) | -+ (mux->mask << (mux->shift + 16))); -+} ++static struct rockchip_clk_branch rk3588_i2s4_8ch_tx_fracmux __initdata = ++ MUX(CLK_I2S4_8CH_TX, "clk_i2s4_8ch_tx", clk_i2s4_8ch_tx_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(120), 0, 2, MFLAGS); + -+const struct clk_ops clk_regmap_mux_ops = { -+ .set_parent = clk_regmap_mux_set_parent, -+ .get_parent = clk_regmap_mux_get_parent, -+ .determine_rate = __clk_mux_determine_rate, -+}; -+EXPORT_SYMBOL_GPL(clk_regmap_mux_ops); ++static struct rockchip_clk_branch rk3588_i2s5_8ch_tx_fracmux __initdata = ++ MUX(CLK_I2S5_8CH_TX, "clk_i2s5_8ch_tx", clk_i2s5_8ch_tx_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(142), 0, 2, MFLAGS); + -+struct clk * -+devm_clk_regmap_register_mux(struct device *dev, const char *name, -+ const char * const *parent_names, u8 num_parents, -+ struct regmap *regmap, u32 reg, u8 shift, u8 width, -+ unsigned long flags) -+{ -+ struct clk_regmap_mux *mux; -+ struct clk_init_data init = {}; ++static struct rockchip_clk_branch rk3588_i2s6_8ch_tx_fracmux __initdata = ++ MUX(CLK_I2S6_8CH_TX, "clk_i2s6_8ch_tx", clk_i2s6_8ch_tx_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(146), 0, 2, MFLAGS); + -+ mux = devm_kzalloc(dev, sizeof(*mux), GFP_KERNEL); -+ if (!mux) -+ return ERR_PTR(-ENOMEM); ++static struct rockchip_clk_branch rk3588_i2s6_8ch_rx_fracmux __initdata = ++ MUX(CLK_I2S6_8CH_RX, "clk_i2s6_8ch_rx", clk_i2s6_8ch_rx_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(148), 0, 2, MFLAGS); + -+ init.name = name; -+ init.ops = &clk_regmap_mux_ops; -+ init.flags = flags; -+ init.parent_names = parent_names; -+ init.num_parents = num_parents; ++static struct rockchip_clk_branch rk3588_i2s7_8ch_rx_fracmux __initdata = ++ MUX(CLK_I2S7_8CH_RX, "clk_i2s7_8ch_rx", clk_i2s7_8ch_rx_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(131), 0, 2, MFLAGS); + -+ mux->dev = dev; -+ mux->regmap = regmap; -+ mux->reg = reg; -+ mux->shift = shift; -+ mux->mask = BIT(width) - 1; -+ mux->hw.init = &init; ++static struct rockchip_clk_branch rk3588_i2s8_8ch_tx_fracmux __initdata = ++ MUX(CLK_I2S8_8CH_TX, "clk_i2s8_8ch_tx", clk_i2s8_8ch_tx_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(122), 0, 2, MFLAGS); + -+ return devm_clk_register(dev, &mux->hw); -+} -+EXPORT_SYMBOL_GPL(devm_clk_regmap_register_mux); ++static struct rockchip_clk_branch rk3588_i2s9_8ch_rx_fracmux __initdata = ++ MUX(CLK_I2S9_8CH_RX, "clk_i2s9_8ch_rx", clk_i2s9_8ch_rx_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(155), 0, 2, MFLAGS); + -+MODULE_LICENSE("GPL"); -diff --git a/drivers/clk/rockchip-oh/regmap/clk-regmap-pll.c b/drivers/clk/rockchip-oh/regmap/clk-regmap-pll.c -new file mode 100644 -index 000000000..24ad7eda9 ---- /dev/null -+++ b/drivers/clk/rockchip-oh/regmap/clk-regmap-pll.c -@@ -0,0 +1,363 @@ -+/* -+ * Copyright (c) 2017 Rockchip Electronics Co. Ltd. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ ++static struct rockchip_clk_branch rk3588_i2s10_8ch_rx_fracmux __initdata = ++ MUX(CLK_I2S10_8CH_RX, "clk_i2s10_8ch_rx", clk_i2s10_8ch_rx_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(157), 0, 2, MFLAGS); + -+#include "clk-regmap.h" ++static struct rockchip_clk_branch rk3588_spdif0_fracmux __initdata = ++ MUX(CLK_SPDIF0, "clk_spdif0", clk_spdif0_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(34), 0, 2, MFLAGS); + -+#define PLLCON_OFFSET(x) (x * 4) ++static struct rockchip_clk_branch rk3588_spdif1_fracmux __initdata = ++ MUX(CLK_SPDIF1, "clk_spdif1", clk_spdif1_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(36), 0, 2, MFLAGS); + -+#define PLL_BYPASS(x) HIWORD_UPDATE(x, 15, 15) -+#define PLL_BYPASS_MASK BIT(15) -+#define PLL_BYPASS_SHIFT 15 -+#define PLL_POSTDIV1(x) HIWORD_UPDATE(x, 14, 12) -+#define PLL_POSTDIV1_MASK GENMASK(14, 12) -+#define PLL_POSTDIV1_SHIFT 12 -+#define PLL_FBDIV(x) HIWORD_UPDATE(x, 11, 0) -+#define PLL_FBDIV_MASK GENMASK(11, 0) -+#define PLL_FBDIV_SHIFT 0 ++static struct rockchip_clk_branch rk3588_spdif2_dp0_fracmux __initdata = ++ MUX(CLK_SPDIF2_DP0, "clk_spdif2_dp0", clk_spdif2_dp0_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(124), 0, 2, MFLAGS); + -+#define PLL_POSTDIV2(x) HIWORD_UPDATE(x, 8, 6) -+#define PLL_POSTDIV2_MASK GENMASK(8, 6) -+#define PLL_POSTDIV2_SHIFT 6 -+#define PLL_REFDIV(x) HIWORD_UPDATE(x, 5, 0) -+#define PLL_REFDIV_MASK GENMASK(5, 0) -+#define PLL_REFDIV_SHIFT 0 ++static struct rockchip_clk_branch rk3588_spdif3_fracmux __initdata = ++ MUX(CLK_SPDIF3, "clk_spdif3", clk_spdif3_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(150), 0, 2, MFLAGS); + -+#define PLL_FOUT_4PHASE_CLK_POWER_DOWN BIT(27) -+#define PLL_FOUT_VCO_CLK_POWER_DOWN BIT(26) -+#define PLL_FOUT_POST_DIV_POWER_DOWN BIT(25) -+#define PLL_DAC_POWER_DOWN BIT(24) -+#define PLL_FRAC(x) UPDATE(x, 23, 0) -+#define PLL_FRAC_MASK GENMASK(23, 0) -+#define PLL_FRAC_SHIFT 0 ++static struct rockchip_clk_branch rk3588_spdif4_fracmux __initdata = ++ MUX(CLK_SPDIF4, "clk_spdif4", clk_spdif4_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(152), 0, 2, MFLAGS); + -+#define MIN_FREF_RATE 10000000UL -+#define MAX_FREF_RATE 800000000UL -+#define MIN_FREFDIV_RATE 1000000UL -+#define MAX_FREFDIV_RATE 40000000UL -+#define MIN_FVCO_RATE 400000000UL -+#define MAX_FVCO_RATE 1600000000UL -+#define MIN_FOUTPOSTDIV_RATE 8000000UL -+#define MAX_FOUTPOSTDIV_RATE 1600000000UL ++static struct rockchip_clk_branch rk3588_spdif5_dp1_fracmux __initdata = ++ MUX(CLK_SPDIF5_DP1, "clk_spdif5_dp1", clk_spdif5_dp1_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(126), 0, 2, MFLAGS); + -+struct clk_regmap_pll { -+ struct clk_hw hw; -+ struct device *dev; -+ struct regmap *regmap; -+ unsigned int reg; -+ u8 pd_shift; -+ u8 dsmpd_shift; -+ u8 lock_shift; -+}; ++static struct rockchip_clk_branch rk3588_uart0_fracmux __initdata = ++ MUX(CLK_UART0, "clk_uart0", clk_uart0_p, CLK_SET_RATE_PARENT, ++ RK3588_PMU_CLKSEL_CON(5), 0, 2, MFLAGS); + -+#define to_clk_regmap_pll(_hw) container_of(_hw, struct clk_regmap_pll, hw) ++static struct rockchip_clk_branch rk3588_uart1_fracmux __initdata = ++ MUX(CLK_UART1, "clk_uart1", clk_uart1_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(43), 0, 2, MFLAGS); + -+static unsigned long -+clk_regmap_pll_recalc_rate(struct clk_hw *hw, unsigned long prate) -+{ -+ struct clk_regmap_pll *pll = to_clk_regmap_pll(hw); -+ unsigned int postdiv1, fbdiv, dsmpd, postdiv2, refdiv, frac, bypass; -+ unsigned int con0, con1, con2; -+ u64 foutvco, foutpostdiv; ++static struct rockchip_clk_branch rk3588_uart2_fracmux __initdata = ++ MUX(CLK_UART2, "clk_uart2", clk_uart2_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(45), 0, 2, MFLAGS); + -+ regmap_read(pll->regmap, pll->reg + PLLCON_OFFSET(0), &con0); -+ regmap_read(pll->regmap, pll->reg + PLLCON_OFFSET(1), &con1); -+ regmap_read(pll->regmap, pll->reg + PLLCON_OFFSET(2), &con2); ++static struct rockchip_clk_branch rk3588_uart3_fracmux __initdata = ++ MUX(CLK_UART3, "clk_uart3", clk_uart3_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(47), 0, 2, MFLAGS); + -+ bypass = (con0 & PLL_BYPASS_MASK) >> PLL_BYPASS_SHIFT; -+ postdiv1 = (con0 & PLL_POSTDIV1_MASK) >> PLL_POSTDIV1_SHIFT; -+ fbdiv = (con0 & PLL_FBDIV_MASK) >> PLL_FBDIV_SHIFT; -+ dsmpd = (con1 & BIT(pll->dsmpd_shift)) >> pll->dsmpd_shift; -+ postdiv2 = (con1 & PLL_POSTDIV2_MASK) >> PLL_POSTDIV2_SHIFT; -+ refdiv = (con1 & PLL_REFDIV_MASK) >> PLL_REFDIV_SHIFT; -+ frac = (con2 & PLL_FRAC_MASK) >> PLL_FRAC_SHIFT; ++static struct rockchip_clk_branch rk3588_uart4_fracmux __initdata = ++ MUX(CLK_UART4, "clk_uart4", clk_uart4_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(49), 0, 2, MFLAGS); + -+ if (bypass) -+ return prate; ++static struct rockchip_clk_branch rk3588_uart5_fracmux __initdata = ++ MUX(CLK_UART5, "clk_uart5", clk_uart5_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(51), 0, 2, MFLAGS); + -+ foutvco = prate * fbdiv; -+ do_div(foutvco, refdiv); ++static struct rockchip_clk_branch rk3588_uart6_fracmux __initdata = ++ MUX(CLK_UART6, "clk_uart6", clk_uart6_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(53), 0, 2, MFLAGS); + -+ if (!dsmpd) { -+ u64 frac_rate = (u64)prate * frac; ++static struct rockchip_clk_branch rk3588_uart7_fracmux __initdata = ++ MUX(CLK_UART7, "clk_uart7", clk_uart7_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(55), 0, 2, MFLAGS); + -+ do_div(frac_rate, refdiv); -+ foutvco += frac_rate >> 24; -+ } ++static struct rockchip_clk_branch rk3588_uart8_fracmux __initdata = ++ MUX(CLK_UART8, "clk_uart8", clk_uart8_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(57), 0, 2, MFLAGS); + -+ foutpostdiv = foutvco; -+ do_div(foutpostdiv, postdiv1); -+ do_div(foutpostdiv, postdiv2); ++static struct rockchip_clk_branch rk3588_uart9_fracmux __initdata = ++ MUX(CLK_UART9, "clk_uart9", clk_uart9_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(59), 0, 2, MFLAGS); + -+ return foutpostdiv; -+} ++static struct rockchip_clk_branch rk3588_hdmirx_aud_fracmux __initdata = ++ MUX(CLK_HDMIRX_AUD_P_MUX, "clk_hdmirx_aud_mux", clk_hdmirx_aud_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(140), 0, 1, MFLAGS); + -+static long clk_pll_round_rate(unsigned long fin, unsigned long fout, -+ u8 *refdiv, u16 *fbdiv, -+ u8 *postdiv1, u8 *postdiv2, -+ u32 *frac, u8 *dsmpd, u8 *bypass) -+{ -+ u8 min_refdiv, max_refdiv, postdiv; -+ u8 _dsmpd = 1, _postdiv1 = 0, _postdiv2 = 0, _refdiv = 0; -+ u16 _fbdiv = 0; -+ u32 _frac = 0; -+ u64 foutvco, foutpostdiv; ++static struct rockchip_pll_clock rk3588_pll_clks[] __initdata = { ++ [b0pll] = PLL(pll_rk3588_core, PLL_B0PLL, "b0pll", mux_pll_p, ++ CLK_IGNORE_UNUSED, RK3588_B0_PLL_CON(0), ++ RK3588_B0_PLL_MODE_CON0, 0, 15, 0, rk3588_pll_rates), ++ [b1pll] = PLL(pll_rk3588_core, PLL_B1PLL, "b1pll", mux_pll_p, ++ CLK_IGNORE_UNUSED, RK3588_B1_PLL_CON(8), ++ RK3588_B1_PLL_MODE_CON0, 0, 15, 0, rk3588_pll_rates), ++ [lpll] = PLL(pll_rk3588_core, PLL_LPLL, "lpll", mux_pll_p, ++ CLK_IGNORE_UNUSED, RK3588_LPLL_CON(16), ++ RK3588_LPLL_MODE_CON0, 0, 15, 0, rk3588_pll_rates), ++ [v0pll] = PLL(pll_rk3588, PLL_V0PLL, "v0pll", mux_pll_p, ++ 0, RK3588_PLL_CON(88), ++ RK3588_MODE_CON0, 4, 15, 0, rk3588_pll_rates), ++ [aupll] = PLL(pll_rk3588, PLL_AUPLL, "aupll", mux_pll_p, ++ 0, RK3588_PLL_CON(96), ++ RK3588_MODE_CON0, 6, 15, 0, rk3588_pll_rates), ++ [cpll] = PLL(pll_rk3588, PLL_CPLL, "cpll", mux_pll_p, ++ CLK_IGNORE_UNUSED, RK3588_PLL_CON(104), ++ RK3588_MODE_CON0, 8, 15, 0, rk3588_pll_rates), ++ [gpll] = PLL(pll_rk3588, PLL_GPLL, "gpll", mux_pll_p, ++ CLK_IGNORE_UNUSED, RK3588_PLL_CON(112), ++ RK3588_MODE_CON0, 2, 15, 0, rk3588_pll_rates), ++ [npll] = PLL(pll_rk3588, PLL_NPLL, "npll", mux_pll_p, ++ 0, RK3588_PLL_CON(120), ++ RK3588_MODE_CON0, 0, 15, 0, rk3588_pll_rates), ++ [ppll] = PLL(pll_rk3588_core, PLL_PPLL, "ppll", mux_pll_p, ++ CLK_IGNORE_UNUSED, RK3588_PMU_PLL_CON(128), ++ RK3588_MODE_CON0, 10, 15, 0, rk3588_pll_rates), ++}; + ++static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = { + /* -+ * FREF : 10MHz ~ 800MHz -+ * FREFDIV : 1MHz ~ 40MHz -+ * FOUTVCO : 400MHz ~ 1.6GHz -+ * FOUTPOSTDIV : 8MHz ~ 1.6GHz ++ * CRU Clock-Architecture + */ -+ if (fin < MIN_FREF_RATE || fin > MAX_FREF_RATE) -+ return -EINVAL; -+ -+ if (fout < MIN_FOUTPOSTDIV_RATE || fout > MAX_FOUTPOSTDIV_RATE) -+ return -EINVAL; -+ -+ if (fin == fout) { -+ if (bypass) -+ *bypass = true; -+ return fin; -+ } ++ /* fixed */ ++ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), + -+ min_refdiv = DIV_ROUND_UP(fin, MAX_FREFDIV_RATE); -+ max_refdiv = fin / MIN_FREFDIV_RATE; -+ if (max_refdiv > 64) -+ max_refdiv = 64; ++ /* top */ ++ COMPOSITE(CLK_50M_SRC, "clk_50m_src", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(0), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE(CLK_100M_SRC, "clk_100m_src", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(0), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RK3588_CLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE(CLK_150M_SRC, "clk_150m_src", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(1), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE(CLK_200M_SRC, "clk_200m_src", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(1), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RK3588_CLKGATE_CON(0), 3, GFLAGS), ++ COMPOSITE(CLK_250M_SRC, "clk_250m_src", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(2), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(0), 4, GFLAGS), ++ COMPOSITE(CLK_300M_SRC, "clk_300m_src", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(2), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RK3588_CLKGATE_CON(0), 5, GFLAGS), ++ COMPOSITE(CLK_350M_SRC, "clk_350m_src", gpll_spll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(3), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(0), 6, GFLAGS), ++ COMPOSITE(CLK_400M_SRC, "clk_400m_src", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(3), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RK3588_CLKGATE_CON(0), 7, GFLAGS), ++ COMPOSITE_HALFDIV(CLK_450M_SRC, "clk_450m_src", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(4), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(0), 8, GFLAGS), ++ COMPOSITE(CLK_500M_SRC, "clk_500m_src", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(4), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RK3588_CLKGATE_CON(0), 9, GFLAGS), ++ COMPOSITE(CLK_600M_SRC, "clk_600m_src", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(5), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(0), 10, GFLAGS), ++ COMPOSITE(CLK_650M_SRC, "clk_650m_src", gpll_lpll_p, 0, ++ RK3588_CLKSEL_CON(5), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RK3588_CLKGATE_CON(0), 11, GFLAGS), ++ COMPOSITE(CLK_700M_SRC, "clk_700m_src", gpll_spll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(6), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(0), 12, GFLAGS), ++ COMPOSITE(CLK_800M_SRC, "clk_800m_src", gpll_aupll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(6), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RK3588_CLKGATE_CON(0), 13, GFLAGS), ++ COMPOSITE_HALFDIV(CLK_1000M_SRC, "clk_1000m_src", gpll_cpll_npll_v0pll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(7), 5, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(0), 14, GFLAGS), ++ COMPOSITE(CLK_1200M_SRC, "clk_1200m_src", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(7), 12, 1, MFLAGS, 7, 5, DFLAGS, ++ RK3588_CLKGATE_CON(0), 15, GFLAGS), ++ COMPOSITE_NODIV(ACLK_TOP_M300_ROOT, "aclk_top_m300_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(9), 0, 2, MFLAGS, ++ RK3588_CLKGATE_CON(1), 10, GFLAGS), ++ COMPOSITE_NODIV(ACLK_TOP_M500_ROOT, "aclk_top_m500_root", mux_500m_300m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(9), 2, 2, MFLAGS, ++ RK3588_CLKGATE_CON(1), 11, GFLAGS), ++ COMPOSITE_NODIV(ACLK_TOP_M400_ROOT, "aclk_top_m400_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(9), 4, 2, MFLAGS, ++ RK3588_CLKGATE_CON(1), 12, GFLAGS), ++ COMPOSITE_NODIV(ACLK_TOP_S200_ROOT, "aclk_top_s200_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(9), 6, 2, MFLAGS, ++ RK3588_CLKGATE_CON(1), 13, GFLAGS), ++ COMPOSITE_NODIV(ACLK_TOP_S400_ROOT, "aclk_top_s400_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(9), 8, 2, MFLAGS, ++ RK3588_CLKGATE_CON(1), 14, GFLAGS), ++ COMPOSITE(ACLK_TOP_ROOT, "aclk_top_root", gpll_cpll_aupll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(8), 5, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(1), 0, GFLAGS), ++ COMPOSITE_NODIV(PCLK_TOP_ROOT, "pclk_top_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(8), 7, 2, MFLAGS, ++ RK3588_CLKGATE_CON(1), 1, GFLAGS), ++ COMPOSITE(ACLK_LOW_TOP_ROOT, "aclk_low_top_root", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(8), 14, 1, MFLAGS, 9, 5, DFLAGS, ++ RK3588_CLKGATE_CON(1), 2, GFLAGS), ++ COMPOSITE(CLK_MIPI_CAMARAOUT_M0, "clk_mipi_camaraout_m0", mux_24m_spll_gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(18), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3588_CLKGATE_CON(5), 9, GFLAGS), ++ COMPOSITE(CLK_MIPI_CAMARAOUT_M1, "clk_mipi_camaraout_m1", mux_24m_spll_gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(19), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3588_CLKGATE_CON(5), 10, GFLAGS), ++ COMPOSITE(CLK_MIPI_CAMARAOUT_M2, "clk_mipi_camaraout_m2", mux_24m_spll_gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(20), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3588_CLKGATE_CON(5), 11, GFLAGS), ++ COMPOSITE(CLK_MIPI_CAMARAOUT_M3, "clk_mipi_camaraout_m3", mux_24m_spll_gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(21), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3588_CLKGATE_CON(5), 12, GFLAGS), ++ COMPOSITE(CLK_MIPI_CAMARAOUT_M4, "clk_mipi_camaraout_m4", mux_24m_spll_gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(22), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3588_CLKGATE_CON(5), 13, GFLAGS), ++ COMPOSITE(MCLK_GMAC0_OUT, "mclk_gmac0_out", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(15), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3588_CLKGATE_CON(5), 3, GFLAGS), ++ COMPOSITE(REFCLKO25M_ETH0_OUT, "refclko25m_eth0_out", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(15), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK3588_CLKGATE_CON(5), 4, GFLAGS), ++ COMPOSITE(REFCLKO25M_ETH1_OUT, "refclko25m_eth1_out", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(16), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3588_CLKGATE_CON(5), 5, GFLAGS), ++ COMPOSITE(CLK_CIFOUT_OUT, "clk_cifout_out", gpll_cpll_24m_spll_p, 0, ++ RK3588_CLKSEL_CON(17), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3588_CLKGATE_CON(5), 6, GFLAGS), ++ GATE(PCLK_MIPI_DCPHY0, "pclk_mipi_dcphy0", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(3), 14, GFLAGS), ++ GATE(PCLK_MIPI_DCPHY1, "pclk_mipi_dcphy1", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(4), 3, GFLAGS), ++ GATE(PCLK_CSIPHY0, "pclk_csiphy0", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(1), 6, GFLAGS), ++ GATE(PCLK_CSIPHY1, "pclk_csiphy1", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(1), 8, GFLAGS), ++ GATE(PCLK_CRU, "pclk_cru", "pclk_top_root", CLK_IS_CRITICAL, ++ RK3588_CLKGATE_CON(5), 0, GFLAGS), + -+ if (fout < MIN_FVCO_RATE) { -+ postdiv = DIV_ROUND_UP_ULL(MIN_FVCO_RATE, fout); ++ /* bigcore0 */ ++ COMPOSITE_NODIV(PCLK_BIGCORE0_ROOT, "pclk_bigcore0_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3588_BIGCORE0_CLKSEL_CON(2), 0, 2, MFLAGS, ++ RK3588_BIGCORE0_CLKGATE_CON(0), 14, GFLAGS), ++ GATE(PCLK_BIGCORE0_PVTM, "pclk_bigcore0_pvtm", "pclk_bigcore0_root", 0, ++ RK3588_BIGCORE0_CLKGATE_CON(1), 0, GFLAGS), ++ GATE(CLK_BIGCORE0_PVTM, "clk_bigcore0_pvtm", "xin24m", 0, ++ RK3588_BIGCORE0_CLKGATE_CON(0), 12, GFLAGS), ++ GATE(CLK_CORE_BIGCORE0_PVTM, "clk_core_bigcore0_pvtm", "armclk_b01", 0, ++ RK3588_BIGCORE0_CLKGATE_CON(0), 13, GFLAGS), + -+ for (_postdiv2 = 1; _postdiv2 < 8; _postdiv2++) { -+ if (postdiv % _postdiv2) -+ continue; ++ /* bigcore1 */ ++ COMPOSITE_NODIV(PCLK_BIGCORE1_ROOT, "pclk_bigcore1_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3588_BIGCORE1_CLKSEL_CON(2), 0, 2, MFLAGS, ++ RK3588_BIGCORE1_CLKGATE_CON(0), 14, GFLAGS), ++ GATE(PCLK_BIGCORE1_PVTM, "pclk_bigcore1_pvtm", "pclk_bigcore1_root", 0, ++ RK3588_BIGCORE1_CLKGATE_CON(1), 0, GFLAGS), ++ GATE(CLK_BIGCORE1_PVTM, "clk_bigcore1_pvtm", "xin24m", 0, ++ RK3588_BIGCORE1_CLKGATE_CON(0), 12, GFLAGS), ++ GATE(CLK_CORE_BIGCORE1_PVTM, "clk_core_bigcore1_pvtm", "armclk_b23", 0, ++ RK3588_BIGCORE1_CLKGATE_CON(0), 13, GFLAGS), + -+ _postdiv1 = postdiv / _postdiv2; ++ /* dsu */ ++ COMPOSITE(0, "sclk_dsu", b0pll_b1pll_lpll_gpll_p, CLK_IS_CRITICAL, ++ RK3588_DSU_CLKSEL_CON(0), 12, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3588_DSU_CLKGATE_CON(0), 4, GFLAGS), ++ COMPOSITE_NOMUX(0, "atclk_dsu", "sclk_dsu", CLK_IS_CRITICAL, ++ RK3588_DSU_CLKSEL_CON(3), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3588_DSU_CLKGATE_CON(1), 0, GFLAGS), ++ COMPOSITE_NOMUX(0, "gicclk_dsu", "sclk_dsu", CLK_IS_CRITICAL, ++ RK3588_DSU_CLKSEL_CON(3), 5, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3588_DSU_CLKGATE_CON(1), 1, GFLAGS), ++ COMPOSITE_NOMUX(0, "aclkmp_dsu", "sclk_dsu", CLK_IS_CRITICAL, ++ RK3588_DSU_CLKSEL_CON(1), 11, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3588_DSU_CLKGATE_CON(0), 12, GFLAGS), ++ COMPOSITE_NOMUX(0, "aclkm_dsu", "sclk_dsu", CLK_IS_CRITICAL, ++ RK3588_DSU_CLKSEL_CON(1), 1, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3588_DSU_CLKGATE_CON(0), 8, GFLAGS), ++ COMPOSITE_NOMUX(0, "aclks_dsu", "sclk_dsu", CLK_IS_CRITICAL, ++ RK3588_DSU_CLKSEL_CON(1), 6, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3588_DSU_CLKGATE_CON(0), 9, GFLAGS), ++ COMPOSITE_NOMUX(0, "periph_dsu", "sclk_dsu", CLK_IS_CRITICAL, ++ RK3588_DSU_CLKSEL_CON(2), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3588_DSU_CLKGATE_CON(0), 13, GFLAGS), ++ COMPOSITE_NOMUX(0, "cntclk_dsu", "periph_dsu", CLK_IS_CRITICAL, ++ RK3588_DSU_CLKSEL_CON(2), 5, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3588_DSU_CLKGATE_CON(0), 14, GFLAGS), ++ COMPOSITE_NOMUX(0, "tsclk_dsu", "periph_dsu", CLK_IS_CRITICAL, ++ RK3588_DSU_CLKSEL_CON(2), 10, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3588_DSU_CLKGATE_CON(0), 15, GFLAGS), ++ COMPOSITE_NODIV(PCLK_DSU_S_ROOT, "pclk_dsu_s_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3588_DSU_CLKSEL_CON(4), 11, 2, MFLAGS, ++ RK3588_DSU_CLKGATE_CON(2), 2, GFLAGS), ++ COMPOSITE(PCLK_DSU_ROOT, "pclk_dsu_root", b0pll_b1pll_lpll_gpll_p, CLK_IS_CRITICAL, ++ RK3588_DSU_CLKSEL_CON(4), 5, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3588_DSU_CLKGATE_CON(1), 3, GFLAGS), ++ COMPOSITE_NODIV(PCLK_DSU_NS_ROOT, "pclk_dsu_ns_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3588_DSU_CLKSEL_CON(4), 7, 2, MFLAGS, ++ RK3588_DSU_CLKGATE_CON(1), 4, GFLAGS), ++ GATE(PCLK_LITCORE_PVTM, "pclk_litcore_pvtm", "pclk_dsu_ns_root", 0, ++ RK3588_DSU_CLKGATE_CON(2), 6, GFLAGS), ++ GATE(PCLK_DBG, "pclk_dbg", "pclk_dsu_root", CLK_IS_CRITICAL, ++ RK3588_DSU_CLKGATE_CON(1), 7, GFLAGS), ++ GATE(PCLK_DSU, "pclk_dsu", "pclk_dsu_root", CLK_IS_CRITICAL, ++ RK3588_DSU_CLKGATE_CON(1), 6, GFLAGS), ++ GATE(PCLK_S_DAPLITE, "pclk_s_daplite", "pclk_dsu_ns_root", CLK_IGNORE_UNUSED, ++ RK3588_DSU_CLKGATE_CON(1), 8, GFLAGS), ++ GATE(PCLK_M_DAPLITE, "pclk_m_daplite", "pclk_dsu_root", CLK_IGNORE_UNUSED, ++ RK3588_DSU_CLKGATE_CON(1), 9, GFLAGS), ++ GATE(CLK_LITCORE_PVTM, "clk_litcore_pvtm", "xin24m", 0, ++ RK3588_DSU_CLKGATE_CON(2), 0, GFLAGS), ++ GATE(CLK_CORE_LITCORE_PVTM, "clk_core_litcore_pvtm", "armclk_l", 0, ++ RK3588_DSU_CLKGATE_CON(2), 1, GFLAGS), + -+ if (_postdiv1 > 0 && _postdiv1 < 8) -+ break; -+ } ++ /* audio */ ++ COMPOSITE_NODIV(HCLK_AUDIO_ROOT, "hclk_audio_root", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(24), 0, 2, MFLAGS, ++ RK3588_CLKGATE_CON(7), 0, GFLAGS), ++ COMPOSITE_NODIV(PCLK_AUDIO_ROOT, "pclk_audio_root", mux_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(24), 2, 2, MFLAGS, ++ RK3588_CLKGATE_CON(7), 1, GFLAGS), ++ GATE(HCLK_I2S2_2CH, "hclk_i2s2_2ch", "hclk_audio_root", 0, ++ RK3588_CLKGATE_CON(7), 12, GFLAGS), ++ GATE(HCLK_I2S3_2CH, "hclk_i2s3_2ch", "hclk_audio_root", 0, ++ RK3588_CLKGATE_CON(7), 13, GFLAGS), ++ COMPOSITE(CLK_I2S2_2CH_SRC, "clk_i2s2_2ch_src", gpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(28), 9, 1, MFLAGS, 4, 5, DFLAGS, ++ RK3588_CLKGATE_CON(7), 14, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S2_2CH_FRAC, "clk_i2s2_2ch_frac", "clk_i2s2_2ch_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(29), 0, ++ RK3588_CLKGATE_CON(7), 15, GFLAGS, ++ &rk3588_i2s2_2ch_fracmux), ++ GATE(MCLK_I2S2_2CH, "mclk_i2s2_2ch", "clk_i2s2_2ch", 0, ++ RK3588_CLKGATE_CON(8), 0, GFLAGS), ++ MUX(I2S2_2CH_MCLKOUT, "i2s2_2ch_mclkout", i2s2_2ch_mclkout_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(30), 2, 1, MFLAGS), + -+ if (_postdiv2 > 7) -+ return -EINVAL; ++ COMPOSITE(CLK_I2S3_2CH_SRC, "clk_i2s3_2ch_src", gpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(30), 8, 1, MFLAGS, 3, 5, DFLAGS, ++ RK3588_CLKGATE_CON(8), 1, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S3_2CH_FRAC, "clk_i2s3_2ch_frac", "clk_i2s3_2ch_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(31), 0, ++ RK3588_CLKGATE_CON(8), 2, GFLAGS, ++ &rk3588_i2s3_2ch_fracmux), ++ GATE(MCLK_I2S3_2CH, "mclk_i2s3_2ch", "clk_i2s3_2ch", 0, ++ RK3588_CLKGATE_CON(8), 3, GFLAGS), ++ GATE(CLK_DAC_ACDCDIG, "clk_dac_acdcdig", "mclk_i2s3_2ch", 0, ++ RK3588_CLKGATE_CON(8), 4, GFLAGS), ++ MUX(I2S3_2CH_MCLKOUT, "i2s3_2ch_mclkout", i2s3_2ch_mclkout_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(32), 2, 1, MFLAGS), ++ GATE(PCLK_ACDCDIG, "pclk_acdcdig", "pclk_audio_root", 0, ++ RK3588_CLKGATE_CON(7), 11, GFLAGS), ++ GATE(HCLK_I2S0_8CH, "hclk_i2s0_8ch", "hclk_audio_root", 0, ++ RK3588_CLKGATE_CON(7), 4, GFLAGS), + -+ fout *= _postdiv1 * _postdiv2; -+ } else { -+ _postdiv1 = 1; -+ _postdiv2 = 1; -+ } ++ COMPOSITE(CLK_I2S0_8CH_TX_SRC, "clk_i2s0_8ch_tx_src", gpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(24), 9, 1, MFLAGS, 4, 5, DFLAGS, ++ RK3588_CLKGATE_CON(7), 5, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S0_8CH_TX_FRAC, "clk_i2s0_8ch_tx_frac", "clk_i2s0_8ch_tx_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(25), 0, ++ RK3588_CLKGATE_CON(7), 6, GFLAGS, ++ &rk3588_i2s0_8ch_tx_fracmux), ++ GATE(MCLK_I2S0_8CH_TX, "mclk_i2s0_8ch_tx", "clk_i2s0_8ch_tx", 0, ++ RK3588_CLKGATE_CON(7), 7, GFLAGS), + -+ for (_refdiv = min_refdiv; _refdiv <= max_refdiv; _refdiv++) { -+ u64 tmp, frac_rate; ++ COMPOSITE(CLK_I2S0_8CH_RX_SRC, "clk_i2s0_8ch_rx_src", gpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(26), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(7), 8, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S0_8CH_RX_FRAC, "clk_i2s0_8ch_rx_frac", "clk_i2s0_8ch_rx_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(27), 0, ++ RK3588_CLKGATE_CON(7), 9, GFLAGS, ++ &rk3588_i2s0_8ch_rx_fracmux), ++ GATE(MCLK_I2S0_8CH_RX, "mclk_i2s0_8ch_rx", "clk_i2s0_8ch_rx", 0, ++ RK3588_CLKGATE_CON(7), 10, GFLAGS), ++ MUX(I2S0_8CH_MCLKOUT, "i2s0_8ch_mclkout", i2s0_8ch_mclkout_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(28), 2, 2, MFLAGS), + -+ if (fin % _refdiv) -+ continue; ++ GATE(HCLK_PDM1, "hclk_pdm1", "hclk_audio_root", 0, ++ RK3588_CLKGATE_CON(9), 6, GFLAGS), ++ COMPOSITE(MCLK_PDM1, "mclk_pdm1", gpll_cpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(36), 7, 2, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(9), 7, GFLAGS), + -+ tmp = (u64)fout * _refdiv; -+ do_div(tmp, fin); -+ _fbdiv = tmp; -+ if (_fbdiv < 10 || _fbdiv > 1600) -+ continue; ++ GATE(HCLK_SPDIF0, "hclk_spdif0", "hclk_audio_root", 0, ++ RK3588_CLKGATE_CON(8), 14, GFLAGS), ++ COMPOSITE(CLK_SPDIF0_SRC, "clk_spdif0_src", gpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(32), 8, 1, MFLAGS, 3, 5, DFLAGS, ++ RK3588_CLKGATE_CON(8), 15, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_SPDIF0_FRAC, "clk_spdif0_frac", "clk_spdif0_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(33), 0, ++ RK3588_CLKGATE_CON(9), 0, GFLAGS, ++ &rk3588_spdif0_fracmux), ++ GATE(MCLK_SPDIF0, "mclk_spdif0", "clk_spdif0", 0, ++ RK3588_CLKGATE_CON(9), 1, GFLAGS), + -+ tmp = (u64)_fbdiv * fin; -+ do_div(tmp, _refdiv); -+ if (fout < MIN_FVCO_RATE || fout > MAX_FVCO_RATE) -+ continue; ++ GATE(HCLK_SPDIF1, "hclk_spdif1", "hclk_audio_root", 0, ++ RK3588_CLKGATE_CON(9), 2, GFLAGS), ++ COMPOSITE(CLK_SPDIF1_SRC, "clk_spdif1_src", gpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(34), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(9), 3, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_SPDIF1_FRAC, "clk_spdif1_frac", "clk_spdif1_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(35), 0, ++ RK3588_CLKGATE_CON(9), 4, GFLAGS, ++ &rk3588_spdif1_fracmux), ++ GATE(MCLK_SPDIF1, "mclk_spdif1", "clk_spdif1", 0, ++ RK3588_CLKGATE_CON(9), 5, GFLAGS), + -+ frac_rate = fout - tmp; ++ COMPOSITE(ACLK_AV1_ROOT, "aclk_av1_root", gpll_cpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(163), 5, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(68), 0, GFLAGS), ++ COMPOSITE_NODIV(PCLK_AV1_ROOT, "pclk_av1_root", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(163), 7, 2, MFLAGS, ++ RK3588_CLKGATE_CON(68), 3, GFLAGS), + -+ if (frac_rate) { -+ tmp = (u64)frac_rate * _refdiv; -+ tmp <<= 24; -+ do_div(tmp, fin); -+ _frac = tmp; -+ _dsmpd = 0; -+ } ++ /* bus */ ++ COMPOSITE(ACLK_BUS_ROOT, "aclk_bus_root", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(38), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(10), 0, GFLAGS), + -+ break; -+ } ++ GATE(PCLK_MAILBOX0, "pclk_mailbox0", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(16), 11, GFLAGS), ++ GATE(PCLK_MAILBOX1, "pclk_mailbox1", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(16), 12, GFLAGS), ++ GATE(PCLK_MAILBOX2, "pclk_mailbox2", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(16), 13, GFLAGS), ++ GATE(PCLK_PMU2, "pclk_pmu2", "pclk_top_root", CLK_IS_CRITICAL, ++ RK3588_CLKGATE_CON(19), 3, GFLAGS), ++ GATE(PCLK_PMUCM0_INTMUX, "pclk_pmucm0_intmux", "pclk_top_root", CLK_IS_CRITICAL, ++ RK3588_CLKGATE_CON(19), 4, GFLAGS), ++ GATE(PCLK_DDRCM0_INTMUX, "pclk_ddrcm0_intmux", "pclk_top_root", CLK_IS_CRITICAL, ++ RK3588_CLKGATE_CON(19), 5, GFLAGS), + -+ /* -+ * If DSMPD = 1 (DSM is disabled, "integer mode") -+ * FOUTVCO = FREF / REFDIV * FBDIV -+ * FOUTPOSTDIV = FOUTVCO / POSTDIV1 / POSTDIV2 -+ * -+ * If DSMPD = 0 (DSM is enabled, "fractional mode") -+ * FOUTVCO = FREF / REFDIV * (FBDIV + FRAC / 2^24) -+ * FOUTPOSTDIV = FOUTVCO / POSTDIV1 / POSTDIV2 -+ */ -+ foutvco = fin * _fbdiv; -+ do_div(foutvco, _refdiv); ++ GATE(PCLK_PWM1, "pclk_pwm1", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(15), 3, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM1, "clk_pwm1", mux_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(59), 12, 2, MFLAGS, ++ RK3588_CLKGATE_CON(15), 4, GFLAGS), ++ GATE(CLK_PWM1_CAPTURE, "clk_pwm1_capture", "xin24m", 0, ++ RK3588_CLKGATE_CON(15), 5, GFLAGS), ++ GATE(PCLK_PWM2, "pclk_pwm2", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(15), 6, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM2, "clk_pwm2", mux_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(59), 14, 2, MFLAGS, ++ RK3588_CLKGATE_CON(15), 7, GFLAGS), ++ GATE(CLK_PWM2_CAPTURE, "clk_pwm2_capture", "xin24m", 0, ++ RK3588_CLKGATE_CON(15), 8, GFLAGS), ++ GATE(PCLK_PWM3, "pclk_pwm3", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(15), 9, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM3, "clk_pwm3", mux_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(60), 0, 2, MFLAGS, ++ RK3588_CLKGATE_CON(15), 10, GFLAGS), ++ GATE(CLK_PWM3_CAPTURE, "clk_pwm3_capture", "xin24m", 0, ++ RK3588_CLKGATE_CON(15), 11, GFLAGS), + -+ if (!_dsmpd) { -+ u64 frac_rate = (u64)fin * _frac; ++ GATE(PCLK_BUSTIMER0, "pclk_bustimer0", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(15), 12, GFLAGS), ++ GATE(PCLK_BUSTIMER1, "pclk_bustimer1", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(15), 13, GFLAGS), ++ COMPOSITE_NODIV(CLK_BUS_TIMER_ROOT, "clk_bus_timer_root", mux_24m_100m_p, 0, ++ RK3588_CLKSEL_CON(60), 2, 1, MFLAGS, ++ RK3588_CLKGATE_CON(15), 14, GFLAGS), ++ GATE(CLK_BUSTIMER0, "clk_bustimer0", "clk_bus_timer_root", 0, ++ RK3588_CLKGATE_CON(15), 15, GFLAGS), ++ GATE(CLK_BUSTIMER1, "clk_bustimer1", "clk_bus_timer_root", 0, ++ RK3588_CLKGATE_CON(16), 0, GFLAGS), ++ GATE(CLK_BUSTIMER2, "clk_bustimer2", "clk_bus_timer_root", 0, ++ RK3588_CLKGATE_CON(16), 1, GFLAGS), ++ GATE(CLK_BUSTIMER3, "clk_bustimer3", "clk_bus_timer_root", 0, ++ RK3588_CLKGATE_CON(16), 2, GFLAGS), ++ GATE(CLK_BUSTIMER4, "clk_bustimer4", "clk_bus_timer_root", 0, ++ RK3588_CLKGATE_CON(16), 3, GFLAGS), ++ GATE(CLK_BUSTIMER5, "clk_bustimer5", "clk_bus_timer_root", 0, ++ RK3588_CLKGATE_CON(16), 4, GFLAGS), ++ GATE(CLK_BUSTIMER6, "clk_bustimer6", "clk_bus_timer_root", 0, ++ RK3588_CLKGATE_CON(16), 5, GFLAGS), ++ GATE(CLK_BUSTIMER7, "clk_bustimer7", "clk_bus_timer_root", 0, ++ RK3588_CLKGATE_CON(16), 6, GFLAGS), ++ GATE(CLK_BUSTIMER8, "clk_bustimer8", "clk_bus_timer_root", 0, ++ RK3588_CLKGATE_CON(16), 7, GFLAGS), ++ GATE(CLK_BUSTIMER9, "clk_bustimer9", "clk_bus_timer_root", 0, ++ RK3588_CLKGATE_CON(16), 8, GFLAGS), ++ GATE(CLK_BUSTIMER10, "clk_bustimer10", "clk_bus_timer_root", 0, ++ RK3588_CLKGATE_CON(16), 9, GFLAGS), ++ GATE(CLK_BUSTIMER11, "clk_bustimer11", "clk_bus_timer_root", 0, ++ RK3588_CLKGATE_CON(16), 10, GFLAGS), + -+ do_div(frac_rate, _refdiv); -+ foutvco += frac_rate >> 24; -+ } ++ GATE(PCLK_WDT0, "pclk_wdt0", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(15), 0, GFLAGS), ++ GATE(TCLK_WDT0, "tclk_wdt0", "xin24m", 0, ++ RK3588_CLKGATE_CON(15), 1, GFLAGS), + -+ foutpostdiv = foutvco; -+ do_div(foutpostdiv, _postdiv1); -+ do_div(foutpostdiv, _postdiv2); ++ GATE(PCLK_CAN0, "pclk_can0", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(11), 8, GFLAGS), ++ COMPOSITE(CLK_CAN0, "clk_can0", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(39), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(11), 9, GFLAGS), ++ GATE(PCLK_CAN1, "pclk_can1", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(11), 10, GFLAGS), ++ COMPOSITE(CLK_CAN1, "clk_can1", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(39), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RK3588_CLKGATE_CON(11), 11, GFLAGS), ++ GATE(PCLK_CAN2, "pclk_can2", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(11), 12, GFLAGS), ++ COMPOSITE(CLK_CAN2, "clk_can2", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(40), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(11), 13, GFLAGS), + -+ if (refdiv) -+ *refdiv = _refdiv; -+ if (fbdiv) -+ *fbdiv = _fbdiv; -+ if (postdiv1) -+ *postdiv1 = _postdiv1; -+ if (postdiv2) -+ *postdiv2 = _postdiv2; -+ if (frac) -+ *frac = _frac; -+ if (dsmpd) -+ *dsmpd = _dsmpd; -+ if (bypass) -+ *bypass = false; ++ GATE(ACLK_DECOM, "aclk_decom", "aclk_bus_root", 0, ++ RK3588_CLKGATE_CON(17), 6, GFLAGS), ++ GATE(PCLK_DECOM, "pclk_decom", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(17), 7, GFLAGS), ++ COMPOSITE(DCLK_DECOM, "dclk_decom", gpll_spll_p, 0, ++ RK3588_CLKSEL_CON(62), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(17), 8, GFLAGS), ++ GATE(ACLK_DMAC0, "aclk_dmac0", "aclk_bus_root", 0, ++ RK3588_CLKGATE_CON(10), 5, GFLAGS), ++ GATE(ACLK_DMAC1, "aclk_dmac1", "aclk_bus_root", 0, ++ RK3588_CLKGATE_CON(10), 6, GFLAGS), ++ GATE(ACLK_DMAC2, "aclk_dmac2", "aclk_bus_root", 0, ++ RK3588_CLKGATE_CON(10), 7, GFLAGS), ++ GATE(ACLK_GIC, "aclk_gic", "aclk_bus_root", CLK_IS_CRITICAL, ++ RK3588_CLKGATE_CON(10), 3, GFLAGS), + -+ return (unsigned long)foutpostdiv; -+} ++ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(16), 14, GFLAGS), ++ COMPOSITE(DBCLK_GPIO1, "dbclk_gpio1", mux_24m_32k_p, 0, ++ RK3588_CLKSEL_CON(60), 8, 1, MFLAGS, 3, 5, DFLAGS, ++ RK3588_CLKGATE_CON(16), 15, GFLAGS), ++ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(17), 0, GFLAGS), ++ COMPOSITE(DBCLK_GPIO2, "dbclk_gpio2", mux_24m_32k_p, 0, ++ RK3588_CLKSEL_CON(60), 14, 1, MFLAGS, 9, 5, DFLAGS, ++ RK3588_CLKGATE_CON(17), 1, GFLAGS), ++ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(17), 2, GFLAGS), ++ COMPOSITE(DBCLK_GPIO3, "dbclk_gpio3", mux_24m_32k_p, 0, ++ RK3588_CLKSEL_CON(61), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(17), 3, GFLAGS), ++ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(17), 4, GFLAGS), ++ COMPOSITE(DBCLK_GPIO4, "dbclk_gpio4", mux_24m_32k_p, 0, ++ RK3588_CLKSEL_CON(61), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RK3588_CLKGATE_CON(17), 5, GFLAGS), + -+static long -+clk_regmap_pll_round_rate(struct clk_hw *hw, unsigned long drate, -+ unsigned long *prate) -+{ -+ struct clk_regmap_pll *pll = to_clk_regmap_pll(hw); -+ long rate; ++ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(10), 8, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(10), 9, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(10), 10, GFLAGS), ++ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(10), 11, GFLAGS), ++ GATE(PCLK_I2C5, "pclk_i2c5", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(10), 12, GFLAGS), ++ GATE(PCLK_I2C6, "pclk_i2c6", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(10), 13, GFLAGS), ++ GATE(PCLK_I2C7, "pclk_i2c7", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(10), 14, GFLAGS), ++ GATE(PCLK_I2C8, "pclk_i2c8", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(10), 15, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C1, "clk_i2c1", mux_200m_100m_p, 0, ++ RK3588_CLKSEL_CON(38), 6, 1, MFLAGS, ++ RK3588_CLKGATE_CON(11), 0, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C2, "clk_i2c2", mux_200m_100m_p, 0, ++ RK3588_CLKSEL_CON(38), 7, 1, MFLAGS, ++ RK3588_CLKGATE_CON(11), 1, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C3, "clk_i2c3", mux_200m_100m_p, 0, ++ RK3588_CLKSEL_CON(38), 8, 1, MFLAGS, ++ RK3588_CLKGATE_CON(11), 2, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C4, "clk_i2c4", mux_200m_100m_p, 0, ++ RK3588_CLKSEL_CON(38), 9, 1, MFLAGS, ++ RK3588_CLKGATE_CON(11), 3, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C5, "clk_i2c5", mux_200m_100m_p, 0, ++ RK3588_CLKSEL_CON(38), 10, 1, MFLAGS, ++ RK3588_CLKGATE_CON(11), 4, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C6, "clk_i2c6", mux_200m_100m_p, 0, ++ RK3588_CLKSEL_CON(38), 11, 1, MFLAGS, ++ RK3588_CLKGATE_CON(11), 5, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C7, "clk_i2c7", mux_200m_100m_p, 0, ++ RK3588_CLKSEL_CON(38), 12, 1, MFLAGS, ++ RK3588_CLKGATE_CON(11), 6, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C8, "clk_i2c8", mux_200m_100m_p, 0, ++ RK3588_CLKSEL_CON(38), 13, 1, MFLAGS, ++ RK3588_CLKGATE_CON(11), 7, GFLAGS), + -+ rate = clk_pll_round_rate(*prate, drate, NULL, NULL, NULL, NULL, NULL, -+ NULL, NULL); ++ GATE(PCLK_OTPC_NS, "pclk_otpc_ns", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(18), 9, GFLAGS), ++ GATE(CLK_OTPC_NS, "clk_otpc_ns", "xin24m", 0, ++ RK3588_CLKGATE_CON(18), 10, GFLAGS), ++ GATE(CLK_OTPC_ARB, "clk_otpc_arb", "xin24m", 0, ++ RK3588_CLKGATE_CON(18), 11, GFLAGS), ++ GATE(CLK_OTP_PHY_G, "clk_otp_phy_g", "xin24m", 0, ++ RK3588_CLKGATE_CON(18), 13, GFLAGS), ++ GATE(CLK_OTPC_AUTO_RD_G, "clk_otpc_auto_rd_g", "xin24m", 0, ++ RK3588_CLKGATE_CON(18), 12, GFLAGS), + -+ dev_dbg(pll->dev, "%s: prate=%ld, drate=%ld, rate=%ld\n", -+ clk_hw_get_name(hw), *prate, drate, rate); ++ GATE(PCLK_SARADC, "pclk_saradc", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(11), 14, GFLAGS), ++ COMPOSITE(CLK_SARADC, "clk_saradc", gpll_24m_p, 0, ++ RK3588_CLKSEL_CON(40), 14, 1, MFLAGS, 6, 8, DFLAGS, ++ RK3588_CLKGATE_CON(11), 15, GFLAGS), + -+ return rate; -+} ++ GATE(PCLK_SPI0, "pclk_spi0", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(14), 6, GFLAGS), ++ GATE(PCLK_SPI1, "pclk_spi1", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(14), 7, GFLAGS), ++ GATE(PCLK_SPI2, "pclk_spi2", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(14), 8, GFLAGS), ++ GATE(PCLK_SPI3, "pclk_spi3", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(14), 9, GFLAGS), ++ GATE(PCLK_SPI4, "pclk_spi4", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(14), 10, GFLAGS), ++ COMPOSITE_NODIV(CLK_SPI0, "clk_spi0", mux_200m_150m_24m_p, 0, ++ RK3588_CLKSEL_CON(59), 2, 2, MFLAGS, ++ RK3588_CLKGATE_CON(14), 11, GFLAGS), ++ COMPOSITE_NODIV(CLK_SPI1, "clk_spi1", mux_200m_150m_24m_p, 0, ++ RK3588_CLKSEL_CON(59), 4, 2, MFLAGS, ++ RK3588_CLKGATE_CON(14), 12, GFLAGS), ++ COMPOSITE_NODIV(CLK_SPI2, "clk_spi2", mux_200m_150m_24m_p, 0, ++ RK3588_CLKSEL_CON(59), 6, 2, MFLAGS, ++ RK3588_CLKGATE_CON(14), 13, GFLAGS), ++ COMPOSITE_NODIV(CLK_SPI3, "clk_spi3", mux_200m_150m_24m_p, 0, ++ RK3588_CLKSEL_CON(59), 8, 2, MFLAGS, ++ RK3588_CLKGATE_CON(14), 14, GFLAGS), ++ COMPOSITE_NODIV(CLK_SPI4, "clk_spi4", mux_200m_150m_24m_p, 0, ++ RK3588_CLKSEL_CON(59), 10, 2, MFLAGS, ++ RK3588_CLKGATE_CON(14), 15, GFLAGS), + -+static int -+clk_regmap_pll_set_rate(struct clk_hw *hw, unsigned long drate, -+ unsigned long prate) -+{ -+ struct clk_regmap_pll *pll = to_clk_regmap_pll(hw); -+ u8 refdiv, postdiv1, postdiv2, dsmpd, bypass; -+ u16 fbdiv; -+ u32 frac; -+ long rate; ++ GATE(ACLK_SPINLOCK, "aclk_spinlock", "aclk_bus_root", CLK_IGNORE_UNUSED, ++ RK3588_CLKGATE_CON(18), 6, GFLAGS), ++ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(12), 0, GFLAGS), ++ COMPOSITE(CLK_TSADC, "clk_tsadc", gpll_24m_p, 0, ++ RK3588_CLKSEL_CON(41), 8, 1, MFLAGS, 0, 8, DFLAGS, ++ RK3588_CLKGATE_CON(12), 1, GFLAGS), + -+ rate = clk_pll_round_rate(prate, drate, &refdiv, &fbdiv, &postdiv1, -+ &postdiv2, &frac, &dsmpd, &bypass); -+ if (rate < 0) -+ return rate; ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(12), 2, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(12), 3, GFLAGS), ++ GATE(PCLK_UART3, "pclk_uart3", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(12), 4, GFLAGS), ++ GATE(PCLK_UART4, "pclk_uart4", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(12), 5, GFLAGS), ++ GATE(PCLK_UART5, "pclk_uart5", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(12), 6, GFLAGS), ++ GATE(PCLK_UART6, "pclk_uart6", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(12), 7, GFLAGS), ++ GATE(PCLK_UART7, "pclk_uart7", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(12), 8, GFLAGS), ++ GATE(PCLK_UART8, "pclk_uart8", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(12), 9, GFLAGS), ++ GATE(PCLK_UART9, "pclk_uart9", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(12), 10, GFLAGS), + -+ dev_dbg(pll->dev, "%s: rate=%ld, bypass=%d\n", -+ clk_hw_get_name(hw), drate, bypass); ++ COMPOSITE(CLK_UART1_SRC, "clk_uart1_src", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(41), 14, 1, MFLAGS, 9, 5, DFLAGS, ++ RK3588_CLKGATE_CON(12), 11, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART1_FRAC, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(42), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3588_CLKGATE_CON(12), 12, GFLAGS, ++ &rk3588_uart1_fracmux), ++ GATE(SCLK_UART1, "sclk_uart1", "clk_uart1", 0, ++ RK3588_CLKGATE_CON(12), 13, GFLAGS), ++ COMPOSITE(CLK_UART2_SRC, "clk_uart2_src", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(43), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(12), 14, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART2_FRAC, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(44), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3588_CLKGATE_CON(12), 15, GFLAGS, ++ &rk3588_uart2_fracmux), ++ GATE(SCLK_UART2, "sclk_uart2", "clk_uart2", 0, ++ RK3588_CLKGATE_CON(13), 0, GFLAGS), ++ COMPOSITE(CLK_UART3_SRC, "clk_uart3_src", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(45), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(13), 1, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART3_FRAC, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(46), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3588_CLKGATE_CON(13), 2, GFLAGS, ++ &rk3588_uart3_fracmux), ++ GATE(SCLK_UART3, "sclk_uart3", "clk_uart3", 0, ++ RK3588_CLKGATE_CON(13), 3, GFLAGS), ++ COMPOSITE(CLK_UART4_SRC, "clk_uart4_src", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(47), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(13), 4, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART4_FRAC, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(48), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3588_CLKGATE_CON(13), 5, GFLAGS, ++ &rk3588_uart4_fracmux), ++ GATE(SCLK_UART4, "sclk_uart4", "clk_uart4", 0, ++ RK3588_CLKGATE_CON(13), 6, GFLAGS), ++ COMPOSITE(CLK_UART5_SRC, "clk_uart5_src", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(49), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(13), 7, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART5_FRAC, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(50), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3588_CLKGATE_CON(13), 8, GFLAGS, ++ &rk3588_uart5_fracmux), ++ GATE(SCLK_UART5, "sclk_uart5", "clk_uart5", 0, ++ RK3588_CLKGATE_CON(13), 9, GFLAGS), ++ COMPOSITE(CLK_UART6_SRC, "clk_uart6_src", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(51), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(13), 10, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART6_FRAC, "clk_uart6_frac", "clk_uart6_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(52), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3588_CLKGATE_CON(13), 11, GFLAGS, ++ &rk3588_uart6_fracmux), ++ GATE(SCLK_UART6, "sclk_uart6", "clk_uart6", 0, ++ RK3588_CLKGATE_CON(13), 12, GFLAGS), ++ COMPOSITE(CLK_UART7_SRC, "clk_uart7_src", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(53), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(13), 13, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART7_FRAC, "clk_uart7_frac", "clk_uart7_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(54), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3588_CLKGATE_CON(13), 14, GFLAGS, ++ &rk3588_uart7_fracmux), ++ GATE(SCLK_UART7, "sclk_uart7", "clk_uart7", 0, ++ RK3588_CLKGATE_CON(13), 15, GFLAGS), ++ COMPOSITE(CLK_UART8_SRC, "clk_uart8_src", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(55), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(14), 0, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART8_FRAC, "clk_uart8_frac", "clk_uart8_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(56), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3588_CLKGATE_CON(14), 1, GFLAGS, ++ &rk3588_uart8_fracmux), ++ GATE(SCLK_UART8, "sclk_uart8", "clk_uart8", 0, ++ RK3588_CLKGATE_CON(14), 2, GFLAGS), ++ COMPOSITE(CLK_UART9_SRC, "clk_uart9_src", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(57), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(14), 3, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART9_FRAC, "clk_uart9_frac", "clk_uart9_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(58), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3588_CLKGATE_CON(14), 4, GFLAGS, ++ &rk3588_uart9_fracmux), ++ GATE(SCLK_UART9, "sclk_uart9", "clk_uart9", 0, ++ RK3588_CLKGATE_CON(14), 5, GFLAGS), + -+ if (bypass) { -+ regmap_write(pll->regmap, pll->reg + PLLCON_OFFSET(0), -+ PLL_BYPASS(1)); -+ } else { -+ regmap_write(pll->regmap, pll->reg + PLLCON_OFFSET(0), -+ PLL_BYPASS(0) | PLL_POSTDIV1(postdiv1) | -+ PLL_FBDIV(fbdiv)); -+ regmap_write(pll->regmap, pll->reg + PLLCON_OFFSET(1), -+ HIWORD_UPDATE(dsmpd, pll->dsmpd_shift, pll->dsmpd_shift) | -+ PLL_POSTDIV2(postdiv2) | PLL_REFDIV(refdiv)); -+ regmap_write(pll->regmap, pll->reg + PLLCON_OFFSET(2), -+ PLL_FRAC(frac)); ++ /* center */ ++ COMPOSITE_NODIV(ACLK_CENTER_ROOT, "aclk_center_root", mux_700m_400m_200m_24m_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(165), 0, 2, MFLAGS, ++ RK3588_CLKGATE_CON(69), 0, GFLAGS), ++ COMPOSITE_NODIV(ACLK_CENTER_LOW_ROOT, "aclk_center_low_root", mux_500m_250m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(165), 2, 2, MFLAGS, ++ RK3588_CLKGATE_CON(69), 1, GFLAGS), ++ COMPOSITE_NODIV(HCLK_CENTER_ROOT, "hclk_center_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(165), 4, 2, MFLAGS, ++ RK3588_CLKGATE_CON(69), 2, GFLAGS), ++ COMPOSITE_NODIV(PCLK_CENTER_ROOT, "pclk_center_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(165), 6, 2, MFLAGS | CLK_MUX_READ_ONLY, ++ RK3588_CLKGATE_CON(69), 3, GFLAGS), ++ GATE(ACLK_DMA2DDR, "aclk_dma2ddr", "aclk_center_root", CLK_IS_CRITICAL, ++ RK3588_CLKGATE_CON(69), 5, GFLAGS), ++ GATE(ACLK_DDR_SHAREMEM, "aclk_ddr_sharemem", "aclk_center_low_root", CLK_IS_CRITICAL, ++ RK3588_CLKGATE_CON(69), 6, GFLAGS), ++ COMPOSITE_NODIV(ACLK_CENTER_S200_ROOT, "aclk_center_s200_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(165), 8, 2, MFLAGS, ++ RK3588_CLKGATE_CON(69), 8, GFLAGS), ++ COMPOSITE_NODIV(ACLK_CENTER_S400_ROOT, "aclk_center_s400_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(165), 10, 2, MFLAGS, ++ RK3588_CLKGATE_CON(69), 9, GFLAGS), ++ GATE(FCLK_DDR_CM0_CORE, "fclk_ddr_cm0_core", "hclk_center_root", CLK_IS_CRITICAL, ++ RK3588_CLKGATE_CON(69), 14, GFLAGS), ++ COMPOSITE_NODIV(CLK_DDR_TIMER_ROOT, "clk_ddr_timer_root", mux_24m_100m_p, CLK_IGNORE_UNUSED, ++ RK3588_CLKSEL_CON(165), 12, 1, MFLAGS, ++ RK3588_CLKGATE_CON(69), 15, GFLAGS), ++ GATE(CLK_DDR_TIMER0, "clk_ddr_timer0", "clk_ddr_timer_root", 0, ++ RK3588_CLKGATE_CON(70), 0, GFLAGS), ++ GATE(CLK_DDR_TIMER1, "clk_ddr_timer1", "clk_ddr_timer_root", 0, ++ RK3588_CLKGATE_CON(70), 1, GFLAGS), ++ GATE(TCLK_WDT_DDR, "tclk_wdt_ddr", "xin24m", 0, ++ RK3588_CLKGATE_CON(70), 2, GFLAGS), ++ COMPOSITE(CLK_DDR_CM0_RTC, "clk_ddr_cm0_rtc", mux_24m_32k_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(166), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(70), 4, GFLAGS), ++ GATE(PCLK_WDT, "pclk_wdt", "pclk_center_root", 0, ++ RK3588_CLKGATE_CON(70), 7, GFLAGS), ++ GATE(PCLK_TIMER, "pclk_timer", "pclk_center_root", 0, ++ RK3588_CLKGATE_CON(70), 8, GFLAGS), ++ GATE(PCLK_DMA2DDR, "pclk_dma2ddr", "pclk_center_root", CLK_IS_CRITICAL, ++ RK3588_CLKGATE_CON(70), 9, GFLAGS), ++ GATE(PCLK_SHAREMEM, "pclk_sharemem", "pclk_center_root", CLK_IS_CRITICAL, ++ RK3588_CLKGATE_CON(70), 10, GFLAGS), + -+ dev_dbg(pll->dev, "refdiv=%d, fbdiv=%d, frac=%d\n", -+ refdiv, fbdiv, frac); -+ dev_dbg(pll->dev, "postdiv1=%d, postdiv2=%d\n", -+ postdiv1, postdiv2); -+ } ++ /* gpu */ ++ COMPOSITE(CLK_GPU_SRC, "clk_gpu_src", gpll_cpll_aupll_npll_spll_p, 0, ++ RK3588_CLKSEL_CON(158), 5, 3, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(66), 1, GFLAGS), ++ GATE(CLK_GPU, "clk_gpu", "clk_gpu_src", 0, ++ RK3588_CLKGATE_CON(66), 4, GFLAGS), ++ GATE(CLK_GPU_COREGROUP, "clk_gpu_coregroup", "clk_gpu_src", 0, ++ RK3588_CLKGATE_CON(66), 6, GFLAGS), ++ COMPOSITE_NOMUX(CLK_GPU_STACKS, "clk_gpu_stacks", "clk_gpu_src", 0, ++ RK3588_CLKSEL_CON(159), 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(66), 7, GFLAGS), ++ GATE(CLK_GPU_PVTM, "clk_gpu_pvtm", "xin24m", 0, ++ RK3588_CLKGATE_CON(67), 0, GFLAGS), ++ GATE(CLK_CORE_GPU_PVTM, "clk_core_gpu_pvtm", "clk_gpu_src", 0, ++ RK3588_CLKGATE_CON(67), 1, GFLAGS), + -+ return 0; -+} -+ -+static int clk_regmap_pll_prepare(struct clk_hw *hw) -+{ -+ struct clk_regmap_pll *pll = to_clk_regmap_pll(hw); -+ u32 v; -+ int ret; ++ /* isp1 */ ++ COMPOSITE(ACLK_ISP1_ROOT, "aclk_isp1_root", gpll_cpll_aupll_spll_p, 0, ++ RK3588_CLKSEL_CON(67), 5, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(26), 0, GFLAGS), ++ COMPOSITE_NODIV(HCLK_ISP1_ROOT, "hclk_isp1_root", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(67), 7, 2, MFLAGS, ++ RK3588_CLKGATE_CON(26), 1, GFLAGS), ++ COMPOSITE(CLK_ISP1_CORE, "clk_isp1_core", gpll_cpll_aupll_spll_p, 0, ++ RK3588_CLKSEL_CON(67), 14, 2, MFLAGS, 9, 5, DFLAGS, ++ RK3588_CLKGATE_CON(26), 2, GFLAGS), ++ GATE(CLK_ISP1_CORE_MARVIN, "clk_isp1_core_marvin", "clk_isp1_core", 0, ++ RK3588_CLKGATE_CON(26), 3, GFLAGS), ++ GATE(CLK_ISP1_CORE_VICAP, "clk_isp1_core_vicap", "clk_isp1_core", 0, ++ RK3588_CLKGATE_CON(26), 4, GFLAGS), + -+ regmap_write(pll->regmap, pll->reg + PLLCON_OFFSET(1), -+ HIWORD_UPDATE(0, pll->pd_shift, pll->pd_shift)); ++ /* npu */ ++ COMPOSITE_NODIV(HCLK_NPU_ROOT, "hclk_npu_root", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(73), 0, 2, MFLAGS, ++ RK3588_CLKGATE_CON(29), 0, GFLAGS), ++ COMPOSITE(CLK_NPU_DSU0, "clk_npu_dsu0", gpll_cpll_aupll_npll_spll_p, 0, ++ RK3588_CLKSEL_CON(73), 7, 3, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(29), 1, GFLAGS), ++ COMPOSITE_NODIV(PCLK_NPU_ROOT, "pclk_npu_root", mux_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(74), 1, 2, MFLAGS, ++ RK3588_CLKGATE_CON(29), 4, GFLAGS), ++ GATE(ACLK_NPU1, "aclk_npu1", "clk_npu_dsu0", 0, ++ RK3588_CLKGATE_CON(27), 0, GFLAGS), ++ GATE(HCLK_NPU1, "hclk_npu1", "hclk_npu_root", 0, ++ RK3588_CLKGATE_CON(27), 2, GFLAGS), ++ GATE(ACLK_NPU2, "aclk_npu2", "clk_npu_dsu0", 0, ++ RK3588_CLKGATE_CON(28), 0, GFLAGS), ++ GATE(HCLK_NPU2, "hclk_npu2", "hclk_npu_root", 0, ++ RK3588_CLKGATE_CON(28), 2, GFLAGS), ++ COMPOSITE_NODIV(HCLK_NPU_CM0_ROOT, "hclk_npu_cm0_root", mux_400m_200m_100m_24m_p, 0, ++ RK3588_CLKSEL_CON(74), 5, 2, MFLAGS, ++ RK3588_CLKGATE_CON(30), 1, GFLAGS), ++ GATE(FCLK_NPU_CM0_CORE, "fclk_npu_cm0_core", "hclk_npu_cm0_root", 0, ++ RK3588_CLKGATE_CON(30), 3, GFLAGS), ++ COMPOSITE(CLK_NPU_CM0_RTC, "clk_npu_cm0_rtc", mux_24m_32k_p, 0, ++ RK3588_CLKSEL_CON(74), 12, 1, MFLAGS, 7, 5, DFLAGS, ++ RK3588_CLKGATE_CON(30), 5, GFLAGS), ++ GATE(PCLK_NPU_PVTM, "pclk_npu_pvtm", "pclk_npu_root", 0, ++ RK3588_CLKGATE_CON(29), 12, GFLAGS), ++ GATE(PCLK_NPU_GRF, "pclk_npu_grf", "pclk_npu_root", CLK_IGNORE_UNUSED, ++ RK3588_CLKGATE_CON(29), 13, GFLAGS), ++ GATE(CLK_NPU_PVTM, "clk_npu_pvtm", "xin24m", 0, ++ RK3588_CLKGATE_CON(29), 14, GFLAGS), ++ GATE(CLK_CORE_NPU_PVTM, "clk_core_npu_pvtm", "clk_npu_dsu0", 0, ++ RK3588_CLKGATE_CON(29), 15, GFLAGS), ++ GATE(ACLK_NPU0, "aclk_npu0", "clk_npu_dsu0", 0, ++ RK3588_CLKGATE_CON(30), 6, GFLAGS), ++ GATE(HCLK_NPU0, "hclk_npu0", "hclk_npu_root", 0, ++ RK3588_CLKGATE_CON(30), 8, GFLAGS), ++ GATE(PCLK_NPU_TIMER, "pclk_npu_timer", "pclk_npu_root", 0, ++ RK3588_CLKGATE_CON(29), 6, GFLAGS), ++ COMPOSITE_NODIV(CLK_NPUTIMER_ROOT, "clk_nputimer_root", mux_24m_100m_p, 0, ++ RK3588_CLKSEL_CON(74), 3, 1, MFLAGS, ++ RK3588_CLKGATE_CON(29), 7, GFLAGS), ++ GATE(CLK_NPUTIMER0, "clk_nputimer0", "clk_nputimer_root", 0, ++ RK3588_CLKGATE_CON(29), 8, GFLAGS), ++ GATE(CLK_NPUTIMER1, "clk_nputimer1", "clk_nputimer_root", 0, ++ RK3588_CLKGATE_CON(29), 9, GFLAGS), ++ GATE(PCLK_NPU_WDT, "pclk_npu_wdt", "pclk_npu_root", 0, ++ RK3588_CLKGATE_CON(29), 10, GFLAGS), ++ GATE(TCLK_NPU_WDT, "tclk_npu_wdt", "xin24m", 0, ++ RK3588_CLKGATE_CON(29), 11, GFLAGS), + -+ ret = regmap_read_poll_timeout(pll->regmap, -+ pll->reg + PLLCON_OFFSET(1), -+ v, v & BIT(pll->lock_shift), 50, 50000); -+ if (ret) -+ dev_err(pll->dev, "%s is not lock\n", clk_hw_get_name(hw)); ++ /* nvm */ ++ COMPOSITE_NODIV(HCLK_NVM_ROOT, "hclk_nvm_root", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(77), 0, 2, MFLAGS, ++ RK3588_CLKGATE_CON(31), 0, GFLAGS), ++ COMPOSITE(ACLK_NVM_ROOT, "aclk_nvm_root", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(77), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(31), 1, GFLAGS), ++ GATE(ACLK_EMMC, "aclk_emmc", "aclk_nvm_root", 0, ++ RK3588_CLKGATE_CON(31), 5, GFLAGS), ++ COMPOSITE(CCLK_EMMC, "cclk_emmc", gpll_cpll_24m_p, 0, ++ RK3588_CLKSEL_CON(77), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3588_CLKGATE_CON(31), 6, GFLAGS), ++ COMPOSITE(BCLK_EMMC, "bclk_emmc", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(78), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(31), 7, GFLAGS), ++ GATE(TMCLK_EMMC, "tmclk_emmc", "xin24m", 0, ++ RK3588_CLKGATE_CON(31), 8, GFLAGS), + -+ return 0; -+} ++ COMPOSITE(SCLK_SFC, "sclk_sfc", gpll_cpll_24m_p, 0, ++ RK3588_CLKSEL_CON(78), 12, 2, MFLAGS, 6, 6, DFLAGS, ++ RK3588_CLKGATE_CON(31), 9, GFLAGS), + -+static void clk_regmap_pll_unprepare(struct clk_hw *hw) -+{ -+ struct clk_regmap_pll *pll = to_clk_regmap_pll(hw); ++ /* php */ ++ COMPOSITE(CLK_GMAC0_PTP_REF, "clk_gmac0_ptp_ref", clk_gmac0_ptp_ref_p, 0, ++ RK3588_CLKSEL_CON(81), 6, 1, MFLAGS, 0, 6, DFLAGS, ++ RK3588_CLKGATE_CON(34), 10, GFLAGS), ++ COMPOSITE(CLK_GMAC1_PTP_REF, "clk_gmac1_ptp_ref", clk_gmac1_ptp_ref_p, 0, ++ RK3588_CLKSEL_CON(81), 13, 1, MFLAGS, 7, 6, DFLAGS, ++ RK3588_CLKGATE_CON(34), 11, GFLAGS), ++ COMPOSITE(CLK_GMAC_125M, "clk_gmac_125m", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(83), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK3588_CLKGATE_CON(35), 5, GFLAGS), ++ COMPOSITE(CLK_GMAC_50M, "clk_gmac_50m", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(84), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3588_CLKGATE_CON(35), 6, GFLAGS), + -+ regmap_write(pll->regmap, pll->reg + PLLCON_OFFSET(1), -+ HIWORD_UPDATE(1, pll->pd_shift, pll->pd_shift)); -+} ++ COMPOSITE(ACLK_PCIE_ROOT, "aclk_pcie_root", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(80), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(32), 6, GFLAGS), ++ COMPOSITE(ACLK_PHP_ROOT, "aclk_php_root", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(80), 13, 1, MFLAGS, 8, 5, DFLAGS, ++ RK3588_CLKGATE_CON(32), 7, GFLAGS), ++ COMPOSITE_NODIV(PCLK_PHP_ROOT, "pclk_php_root", mux_150m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(80), 0, 2, MFLAGS, ++ RK3588_CLKGATE_CON(32), 0, GFLAGS), ++ GATE(ACLK_PHP_GIC_ITS, "aclk_php_gic_its", "aclk_pcie_root", CLK_IS_CRITICAL, ++ RK3588_CLKGATE_CON(34), 6, GFLAGS), ++ GATE(ACLK_PCIE_BRIDGE, "aclk_pcie_bridge", "aclk_pcie_root", 0, ++ RK3588_CLKGATE_CON(32), 8, GFLAGS), ++ GATE(ACLK_MMU_PCIE, "aclk_mmu_pcie", "aclk_pcie_bridge", 0, ++ RK3588_CLKGATE_CON(34), 7, GFLAGS), ++ GATE(ACLK_MMU_PHP, "aclk_mmu_php", "aclk_php_root", 0, ++ RK3588_CLKGATE_CON(34), 8, GFLAGS), ++ GATE(ACLK_PCIE_4L_DBI, "aclk_pcie_4l_dbi", "aclk_php_root", 0, ++ RK3588_CLKGATE_CON(32), 13, GFLAGS), ++ GATE(ACLK_PCIE_2L_DBI, "aclk_pcie_2l_dbi", "aclk_php_root", 0, ++ RK3588_CLKGATE_CON(32), 14, GFLAGS), ++ GATE(ACLK_PCIE_1L0_DBI, "aclk_pcie_1l0_dbi", "aclk_php_root", 0, ++ RK3588_CLKGATE_CON(32), 15, GFLAGS), ++ GATE(ACLK_PCIE_1L1_DBI, "aclk_pcie_1l1_dbi", "aclk_php_root", 0, ++ RK3588_CLKGATE_CON(33), 0, GFLAGS), ++ GATE(ACLK_PCIE_1L2_DBI, "aclk_pcie_1l2_dbi", "aclk_php_root", 0, ++ RK3588_CLKGATE_CON(33), 1, GFLAGS), ++ GATE(ACLK_PCIE_4L_MSTR, "aclk_pcie_4l_mstr", "aclk_mmu_pcie", 0, ++ RK3588_CLKGATE_CON(33), 2, GFLAGS), ++ GATE(ACLK_PCIE_2L_MSTR, "aclk_pcie_2l_mstr", "aclk_mmu_pcie", 0, ++ RK3588_CLKGATE_CON(33), 3, GFLAGS), ++ GATE(ACLK_PCIE_1L0_MSTR, "aclk_pcie_1l0_mstr", "aclk_mmu_pcie", 0, ++ RK3588_CLKGATE_CON(33), 4, GFLAGS), ++ GATE(ACLK_PCIE_1L1_MSTR, "aclk_pcie_1l1_mstr", "aclk_mmu_pcie", 0, ++ RK3588_CLKGATE_CON(33), 5, GFLAGS), ++ GATE(ACLK_PCIE_1L2_MSTR, "aclk_pcie_1l2_mstr", "aclk_mmu_pcie", 0, ++ RK3588_CLKGATE_CON(33), 6, GFLAGS), ++ GATE(ACLK_PCIE_4L_SLV, "aclk_pcie_4l_slv", "aclk_php_root", 0, ++ RK3588_CLKGATE_CON(33), 7, GFLAGS), ++ GATE(ACLK_PCIE_2L_SLV, "aclk_pcie_2l_slv", "aclk_php_root", 0, ++ RK3588_CLKGATE_CON(33), 8, GFLAGS), ++ GATE(ACLK_PCIE_1L0_SLV, "aclk_pcie_1l0_slv", "aclk_php_root", 0, ++ RK3588_CLKGATE_CON(33), 9, GFLAGS), ++ GATE(ACLK_PCIE_1L1_SLV, "aclk_pcie_1l1_slv", "aclk_php_root", 0, ++ RK3588_CLKGATE_CON(33), 10, GFLAGS), ++ GATE(ACLK_PCIE_1L2_SLV, "aclk_pcie_1l2_slv", "aclk_php_root", 0, ++ RK3588_CLKGATE_CON(33), 11, GFLAGS), ++ GATE(PCLK_PCIE_4L, "pclk_pcie_4l", "pclk_php_root", 0, ++ RK3588_CLKGATE_CON(33), 12, GFLAGS), ++ GATE(PCLK_PCIE_2L, "pclk_pcie_2l", "pclk_php_root", 0, ++ RK3588_CLKGATE_CON(33), 13, GFLAGS), ++ GATE(PCLK_PCIE_1L0, "pclk_pcie_1l0", "pclk_php_root", 0, ++ RK3588_CLKGATE_CON(33), 14, GFLAGS), ++ GATE(PCLK_PCIE_1L1, "pclk_pcie_1l1", "pclk_php_root", 0, ++ RK3588_CLKGATE_CON(33), 15, GFLAGS), ++ GATE(PCLK_PCIE_1L2, "pclk_pcie_1l2", "pclk_php_root", 0, ++ RK3588_CLKGATE_CON(34), 0, GFLAGS), ++ GATE(CLK_PCIE_AUX0, "clk_pcie_aux0", "xin24m", 0, ++ RK3588_CLKGATE_CON(34), 1, GFLAGS), ++ GATE(CLK_PCIE_AUX1, "clk_pcie_aux1", "xin24m", 0, ++ RK3588_CLKGATE_CON(34), 2, GFLAGS), ++ GATE(CLK_PCIE_AUX2, "clk_pcie_aux2", "xin24m", 0, ++ RK3588_CLKGATE_CON(34), 3, GFLAGS), ++ GATE(CLK_PCIE_AUX3, "clk_pcie_aux3", "xin24m", 0, ++ RK3588_CLKGATE_CON(34), 4, GFLAGS), ++ GATE(CLK_PCIE_AUX4, "clk_pcie_aux4", "xin24m", 0, ++ RK3588_CLKGATE_CON(34), 5, GFLAGS), ++ GATE(CLK_PIPEPHY0_REF, "clk_pipephy0_ref", "xin24m", 0, ++ RK3588_CLKGATE_CON(37), 0, GFLAGS), ++ GATE(CLK_PIPEPHY1_REF, "clk_pipephy1_ref", "xin24m", 0, ++ RK3588_CLKGATE_CON(37), 1, GFLAGS), ++ GATE(CLK_PIPEPHY2_REF, "clk_pipephy2_ref", "xin24m", 0, ++ RK3588_CLKGATE_CON(37), 2, GFLAGS), ++ GATE(PCLK_GMAC0, "pclk_gmac0", "pclk_php_root", 0, ++ RK3588_CLKGATE_CON(32), 3, GFLAGS), ++ GATE(PCLK_GMAC1, "pclk_gmac1", "pclk_php_root", 0, ++ RK3588_CLKGATE_CON(32), 4, GFLAGS), ++ GATE(ACLK_GMAC0, "aclk_gmac0", "aclk_mmu_php", 0, ++ RK3588_CLKGATE_CON(32), 10, GFLAGS), ++ GATE(ACLK_GMAC1, "aclk_gmac1", "aclk_mmu_php", 0, ++ RK3588_CLKGATE_CON(32), 11, GFLAGS), ++ GATE(CLK_PMALIVE0, "clk_pmalive0", "xin24m", 0, ++ RK3588_CLKGATE_CON(37), 4, GFLAGS), ++ GATE(CLK_PMALIVE1, "clk_pmalive1", "xin24m", 0, ++ RK3588_CLKGATE_CON(37), 5, GFLAGS), ++ GATE(CLK_PMALIVE2, "clk_pmalive2", "xin24m", 0, ++ RK3588_CLKGATE_CON(37), 6, GFLAGS), ++ GATE(ACLK_SATA0, "aclk_sata0", "aclk_mmu_php", 0, ++ RK3588_CLKGATE_CON(37), 7, GFLAGS), ++ GATE(ACLK_SATA1, "aclk_sata1", "aclk_mmu_php", 0, ++ RK3588_CLKGATE_CON(37), 8, GFLAGS), ++ GATE(ACLK_SATA2, "aclk_sata2", "aclk_mmu_php", 0, ++ RK3588_CLKGATE_CON(37), 9, GFLAGS), ++ COMPOSITE(CLK_RXOOB0, "clk_rxoob0", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(82), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3588_CLKGATE_CON(37), 10, GFLAGS), ++ COMPOSITE(CLK_RXOOB1, "clk_rxoob1", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(82), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK3588_CLKGATE_CON(37), 11, GFLAGS), ++ COMPOSITE(CLK_RXOOB2, "clk_rxoob2", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(83), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3588_CLKGATE_CON(37), 12, GFLAGS), ++ GATE(ACLK_USB3OTG2, "aclk_usb3otg2", "aclk_mmu_php", 0, ++ RK3588_CLKGATE_CON(35), 7, GFLAGS), ++ GATE(SUSPEND_CLK_USB3OTG2, "suspend_clk_usb3otg2", "xin24m", 0, ++ RK3588_CLKGATE_CON(35), 8, GFLAGS), ++ GATE(REF_CLK_USB3OTG2, "ref_clk_usb3otg2", "xin24m", 0, ++ RK3588_CLKGATE_CON(35), 9, GFLAGS), ++ COMPOSITE(CLK_UTMI_OTG2, "clk_utmi_otg2", mux_150m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(84), 12, 2, MFLAGS, 8, 4, DFLAGS, ++ RK3588_CLKGATE_CON(35), 10, GFLAGS), ++ GATE(PCLK_PCIE_COMBO_PIPE_PHY0, "pclk_pcie_combo_pipe_phy0", "pclk_top_root", 0, ++ RK3588_PHP_CLKGATE_CON(0), 5, GFLAGS), ++ GATE(PCLK_PCIE_COMBO_PIPE_PHY1, "pclk_pcie_combo_pipe_phy1", "pclk_top_root", 0, ++ RK3588_PHP_CLKGATE_CON(0), 6, GFLAGS), ++ GATE(PCLK_PCIE_COMBO_PIPE_PHY2, "pclk_pcie_combo_pipe_phy2", "pclk_top_root", 0, ++ RK3588_PHP_CLKGATE_CON(0), 7, GFLAGS), ++ GATE(PCLK_PCIE_COMBO_PIPE_PHY, "pclk_pcie_combo_pipe_phy", "pclk_top_root", 0, ++ RK3588_PHP_CLKGATE_CON(0), 8, GFLAGS), + -+static int clk_regmap_pll_is_prepared(struct clk_hw *hw) -+{ -+ struct clk_regmap_pll *pll = to_clk_regmap_pll(hw); -+ unsigned int con1; ++ /* rga */ ++ COMPOSITE(CLK_RGA3_1_CORE, "clk_rga3_1_core", gpll_cpll_aupll_spll_p, 0, ++ RK3588_CLKSEL_CON(174), 14, 2, MFLAGS, 9, 5, DFLAGS, ++ RK3588_CLKGATE_CON(76), 6, GFLAGS), ++ COMPOSITE(ACLK_RGA3_ROOT, "aclk_rga3_root", gpll_cpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(174), 5, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(76), 0, GFLAGS), ++ COMPOSITE_NODIV(HCLK_RGA3_ROOT, "hclk_rga3_root", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(174), 7, 2, MFLAGS, ++ RK3588_CLKGATE_CON(76), 1, GFLAGS), ++ GATE(HCLK_RGA3_1, "hclk_rga3_1", "hclk_rga3_root", 0, ++ RK3588_CLKGATE_CON(76), 4, GFLAGS), ++ GATE(ACLK_RGA3_1, "aclk_rga3_1", "aclk_rga3_root", 0, ++ RK3588_CLKGATE_CON(76), 5, GFLAGS), + -+ regmap_read(pll->regmap, pll->reg + PLLCON_OFFSET(1), &con1); ++ /* vdec */ ++ COMPOSITE_NODIV(0, "hclk_rkvdec0_root", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(89), 0, 2, MFLAGS, ++ RK3588_CLKGATE_CON(40), 0, GFLAGS), ++ COMPOSITE(0, "aclk_rkvdec0_root", gpll_cpll_aupll_spll_p, 0, ++ RK3588_CLKSEL_CON(89), 7, 2, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(40), 1, GFLAGS), ++ COMPOSITE(ACLK_RKVDEC_CCU, "aclk_rkvdec_ccu", gpll_cpll_aupll_spll_p, 0, ++ RK3588_CLKSEL_CON(89), 14, 2, MFLAGS, 9, 5, DFLAGS, ++ RK3588_CLKGATE_CON(40), 2, GFLAGS), ++ COMPOSITE(CLK_RKVDEC0_CA, "clk_rkvdec0_ca", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(90), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(40), 7, GFLAGS), ++ COMPOSITE(CLK_RKVDEC0_HEVC_CA, "clk_rkvdec0_hevc_ca", gpll_cpll_npll_1000m_p, 0, ++ RK3588_CLKSEL_CON(90), 11, 2, MFLAGS, 6, 5, DFLAGS, ++ RK3588_CLKGATE_CON(40), 8, GFLAGS), ++ COMPOSITE(CLK_RKVDEC0_CORE, "clk_rkvdec0_core", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(91), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(40), 9, GFLAGS), ++ COMPOSITE_NODIV(0, "hclk_rkvdec1_root", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(93), 0, 2, MFLAGS, ++ RK3588_CLKGATE_CON(41), 0, GFLAGS), ++ COMPOSITE(0, "aclk_rkvdec1_root", gpll_cpll_aupll_npll_p, 0, ++ RK3588_CLKSEL_CON(93), 7, 2, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(41), 1, GFLAGS), ++ COMPOSITE(CLK_RKVDEC1_CA, "clk_rkvdec1_ca", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(93), 14, 1, MFLAGS, 9, 5, DFLAGS, ++ RK3588_CLKGATE_CON(41), 6, GFLAGS), ++ COMPOSITE(CLK_RKVDEC1_HEVC_CA, "clk_rkvdec1_hevc_ca", gpll_cpll_npll_1000m_p, 0, ++ RK3588_CLKSEL_CON(94), 5, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(41), 7, GFLAGS), ++ COMPOSITE(CLK_RKVDEC1_CORE, "clk_rkvdec1_core", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(94), 12, 1, MFLAGS, 7, 5, DFLAGS, ++ RK3588_CLKGATE_CON(41), 8, GFLAGS), + -+ return !(con1 & BIT(pll->pd_shift)); -+} ++ /* sdio */ ++ COMPOSITE_NODIV(0, "hclk_sdio_root", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(172), 0, 2, MFLAGS, ++ RK3588_CLKGATE_CON(75), 0, GFLAGS), ++ COMPOSITE(CCLK_SRC_SDIO, "cclk_src_sdio", gpll_cpll_24m_p, 0, ++ RK3588_CLKSEL_CON(172), 8, 2, MFLAGS, 2, 6, DFLAGS, ++ RK3588_CLKGATE_CON(75), 3, GFLAGS), ++ MMC(SCLK_SDIO_DRV, "sdio_drv", "cclk_src_sdio", RK3588_SDIO_CON0, 1), ++ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "cclk_src_sdio", RK3588_SDIO_CON1, 1), + -+static const struct clk_ops clk_regmap_pll_ops = { -+ .recalc_rate = clk_regmap_pll_recalc_rate, -+ .round_rate = clk_regmap_pll_round_rate, -+ .set_rate = clk_regmap_pll_set_rate, -+ .prepare = clk_regmap_pll_prepare, -+ .unprepare = clk_regmap_pll_unprepare, -+ .is_prepared = clk_regmap_pll_is_prepared, -+}; ++ /* usb */ ++ COMPOSITE(ACLK_USB_ROOT, "aclk_usb_root", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(96), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(42), 0, GFLAGS), ++ COMPOSITE_NODIV(HCLK_USB_ROOT, "hclk_usb_root", mux_150m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(96), 6, 2, MFLAGS, ++ RK3588_CLKGATE_CON(42), 1, GFLAGS), ++ GATE(SUSPEND_CLK_USB3OTG0, "suspend_clk_usb3otg0", "xin24m", 0, ++ RK3588_CLKGATE_CON(42), 5, GFLAGS), ++ GATE(REF_CLK_USB3OTG0, "ref_clk_usb3otg0", "xin24m", 0, ++ RK3588_CLKGATE_CON(42), 6, GFLAGS), ++ GATE(SUSPEND_CLK_USB3OTG1, "suspend_clk_usb3otg1", "xin24m", 0, ++ RK3588_CLKGATE_CON(42), 8, GFLAGS), ++ GATE(REF_CLK_USB3OTG1, "ref_clk_usb3otg1", "xin24m", 0, ++ RK3588_CLKGATE_CON(42), 9, GFLAGS), + -+struct clk * -+devm_clk_regmap_register_pll(struct device *dev, const char *name, -+ const char *parent_name, -+ struct regmap *regmap, u32 reg, u8 pd_shift, -+ u8 dsmpd_shift, u8 lock_shift, -+ unsigned long flags) -+{ -+ struct clk_regmap_pll *pll; -+ struct clk_init_data init = {}; ++ /* vdpu */ ++ COMPOSITE(ACLK_VDPU_ROOT, "aclk_vdpu_root", gpll_cpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(98), 5, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(44), 0, GFLAGS), ++ COMPOSITE_NODIV(ACLK_VDPU_LOW_ROOT, "aclk_vdpu_low_root", mux_400m_200m_100m_24m_p, 0, ++ RK3588_CLKSEL_CON(98), 7, 2, MFLAGS, ++ RK3588_CLKGATE_CON(44), 1, GFLAGS), ++ COMPOSITE_NODIV(HCLK_VDPU_ROOT, "hclk_vdpu_root", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(98), 9, 2, MFLAGS, ++ RK3588_CLKGATE_CON(44), 2, GFLAGS), ++ COMPOSITE(ACLK_JPEG_DECODER_ROOT, "aclk_jpeg_decoder_root", gpll_cpll_aupll_spll_p, 0, ++ RK3588_CLKSEL_CON(99), 5, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(44), 3, GFLAGS), ++ GATE(HCLK_IEP2P0, "hclk_iep2p0", "hclk_vdpu_root", 0, ++ RK3588_CLKGATE_CON(45), 4, GFLAGS), ++ COMPOSITE(CLK_IEP2P0_CORE, "clk_iep2p0_core", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(99), 12, 1, MFLAGS, 7, 5, DFLAGS, ++ RK3588_CLKGATE_CON(45), 6, GFLAGS), ++ GATE(HCLK_JPEG_ENCODER0, "hclk_jpeg_encoder0", "hclk_vdpu_root", 0, ++ RK3588_CLKGATE_CON(44), 11, GFLAGS), ++ GATE(HCLK_JPEG_ENCODER1, "hclk_jpeg_encoder1", "hclk_vdpu_root", 0, ++ RK3588_CLKGATE_CON(44), 13, GFLAGS), ++ GATE(HCLK_JPEG_ENCODER2, "hclk_jpeg_encoder2", "hclk_vdpu_root", 0, ++ RK3588_CLKGATE_CON(44), 15, GFLAGS), ++ GATE(HCLK_JPEG_ENCODER3, "hclk_jpeg_encoder3", "hclk_vdpu_root", 0, ++ RK3588_CLKGATE_CON(45), 1, GFLAGS), ++ GATE(HCLK_JPEG_DECODER, "hclk_jpeg_decoder", "hclk_vdpu_root", 0, ++ RK3588_CLKGATE_CON(45), 3, GFLAGS), ++ GATE(HCLK_RGA2, "hclk_rga2", "hclk_vdpu_root", 0, ++ RK3588_CLKGATE_CON(45), 7, GFLAGS), ++ GATE(ACLK_RGA2, "aclk_rga2", "aclk_vdpu_root", 0, ++ RK3588_CLKGATE_CON(45), 8, GFLAGS), ++ COMPOSITE(CLK_RGA2_CORE, "clk_rga2_core", gpll_cpll_npll_aupll_spll_p, 0, ++ RK3588_CLKSEL_CON(100), 5, 3, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(45), 9, GFLAGS), ++ GATE(HCLK_RGA3_0, "hclk_rga3_0", "hclk_vdpu_root", 0, ++ RK3588_CLKGATE_CON(45), 10, GFLAGS), ++ GATE(ACLK_RGA3_0, "aclk_rga3_0", "aclk_vdpu_root", 0, ++ RK3588_CLKGATE_CON(45), 11, GFLAGS), ++ COMPOSITE(CLK_RGA3_0_CORE, "clk_rga3_0_core", gpll_cpll_npll_aupll_spll_p, 0, ++ RK3588_CLKSEL_CON(100), 13, 3, MFLAGS, 8, 5, DFLAGS, ++ RK3588_CLKGATE_CON(45), 12, GFLAGS), ++ GATE(HCLK_VPU, "hclk_vpu", "hclk_vdpu_root", 0, ++ RK3588_CLKGATE_CON(44), 9, GFLAGS), + -+ pll = devm_kzalloc(dev, sizeof(*pll), GFP_KERNEL); -+ if (!pll) -+ return ERR_PTR(-ENOMEM); ++ /* venc */ ++ COMPOSITE_NODIV(HCLK_RKVENC1_ROOT, "hclk_rkvenc1_root", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(104), 0, 2, MFLAGS, ++ RK3588_CLKGATE_CON(48), 0, GFLAGS), ++ COMPOSITE(ACLK_RKVENC1_ROOT, "aclk_rkvenc1_root", gpll_cpll_npll_p, 0, ++ RK3588_CLKSEL_CON(104), 7, 2, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(48), 1, GFLAGS), ++ COMPOSITE_NODIV(HCLK_RKVENC0_ROOT, "hclk_rkvenc0_root", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(102), 0, 2, MFLAGS, ++ RK3588_CLKGATE_CON(47), 0, GFLAGS), ++ COMPOSITE(ACLK_RKVENC0_ROOT, "aclk_rkvenc0_root", gpll_cpll_npll_p, 0, ++ RK3588_CLKSEL_CON(102), 7, 2, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(47), 1, GFLAGS), ++ GATE(HCLK_RKVENC0, "hclk_rkvenc0", "hclk_rkvenc0_root", 0, ++ RK3588_CLKGATE_CON(47), 4, GFLAGS), ++ GATE(ACLK_RKVENC0, "aclk_rkvenc0", "aclk_rkvenc0_root", 0, ++ RK3588_CLKGATE_CON(47), 5, GFLAGS), ++ COMPOSITE(CLK_RKVENC0_CORE, "clk_rkvenc0_core", gpll_cpll_aupll_npll_p, 0, ++ RK3588_CLKSEL_CON(102), 14, 2, MFLAGS, 9, 5, DFLAGS, ++ RK3588_CLKGATE_CON(47), 6, GFLAGS), ++ COMPOSITE(CLK_RKVENC1_CORE, "clk_rkvenc1_core", gpll_cpll_aupll_npll_p, 0, ++ RK3588_CLKSEL_CON(104), 14, 2, MFLAGS, 9, 5, DFLAGS, ++ RK3588_CLKGATE_CON(48), 6, GFLAGS), + -+ init.name = name; -+ init.ops = &clk_regmap_pll_ops; -+ init.flags = flags; -+ init.parent_names = (parent_name ? &parent_name : NULL); -+ init.num_parents = (parent_name ? 1 : 0); ++ /* vi */ ++ COMPOSITE(ACLK_VI_ROOT, "aclk_vi_root", gpll_cpll_npll_aupll_spll_p, 0, ++ RK3588_CLKSEL_CON(106), 5, 3, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(49), 0, GFLAGS), ++ COMPOSITE_NODIV(HCLK_VI_ROOT, "hclk_vi_root", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(106), 8, 2, MFLAGS, ++ RK3588_CLKGATE_CON(49), 1, GFLAGS), ++ COMPOSITE_NODIV(PCLK_VI_ROOT, "pclk_vi_root", mux_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(106), 10, 2, MFLAGS, ++ RK3588_CLKGATE_CON(49), 2, GFLAGS), ++ COMPOSITE_NODIV(ICLK_CSIHOST01, "iclk_csihost01", mux_400m_200m_100m_24m_p, 0, ++ RK3588_CLKSEL_CON(108), 14, 2, MFLAGS, ++ RK3588_CLKGATE_CON(51), 10, GFLAGS), ++ GATE(ICLK_CSIHOST0, "iclk_csihost0", "iclk_csihost01", 0, ++ RK3588_CLKGATE_CON(51), 11, GFLAGS), ++ GATE(ICLK_CSIHOST1, "iclk_csihost1", "iclk_csihost01", 0, ++ RK3588_CLKGATE_CON(51), 12, GFLAGS), ++ GATE(PCLK_CSI_HOST_0, "pclk_csi_host_0", "pclk_vi_root", 0, ++ RK3588_CLKGATE_CON(50), 4, GFLAGS), ++ GATE(PCLK_CSI_HOST_1, "pclk_csi_host_1", "pclk_vi_root", 0, ++ RK3588_CLKGATE_CON(50), 5, GFLAGS), ++ GATE(PCLK_CSI_HOST_2, "pclk_csi_host_2", "pclk_vi_root", 0, ++ RK3588_CLKGATE_CON(50), 6, GFLAGS), ++ GATE(PCLK_CSI_HOST_3, "pclk_csi_host_3", "pclk_vi_root", 0, ++ RK3588_CLKGATE_CON(50), 7, GFLAGS), ++ GATE(PCLK_CSI_HOST_4, "pclk_csi_host_4", "pclk_vi_root", 0, ++ RK3588_CLKGATE_CON(50), 8, GFLAGS), ++ GATE(PCLK_CSI_HOST_5, "pclk_csi_host_5", "pclk_vi_root", 0, ++ RK3588_CLKGATE_CON(50), 9, GFLAGS), ++ GATE(ACLK_FISHEYE0, "aclk_fisheye0", "aclk_vi_root", 0, ++ RK3588_CLKGATE_CON(49), 14, GFLAGS), ++ GATE(HCLK_FISHEYE0, "hclk_fisheye0", "hclk_vi_root", 0, ++ RK3588_CLKGATE_CON(49), 15, GFLAGS), ++ COMPOSITE(CLK_FISHEYE0_CORE, "clk_fisheye0_core", gpll_cpll_aupll_spll_p, 0, ++ RK3588_CLKSEL_CON(108), 5, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(50), 0, GFLAGS), ++ GATE(ACLK_FISHEYE1, "aclk_fisheye1", "aclk_vi_root", 0, ++ RK3588_CLKGATE_CON(50), 1, GFLAGS), ++ GATE(HCLK_FISHEYE1, "hclk_fisheye1", "hclk_vi_root", 0, ++ RK3588_CLKGATE_CON(50), 2, GFLAGS), ++ COMPOSITE(CLK_FISHEYE1_CORE, "clk_fisheye1_core", gpll_cpll_aupll_spll_p, 0, ++ RK3588_CLKSEL_CON(108), 12, 2, MFLAGS, 7, 5, DFLAGS, ++ RK3588_CLKGATE_CON(50), 3, GFLAGS), ++ COMPOSITE(CLK_ISP0_CORE, "clk_isp0_core", gpll_cpll_aupll_spll_p, 0, ++ RK3588_CLKSEL_CON(107), 11, 2, MFLAGS, 6, 5, DFLAGS, ++ RK3588_CLKGATE_CON(49), 9, GFLAGS), ++ GATE(CLK_ISP0_CORE_MARVIN, "clk_isp0_core_marvin", "clk_isp0_core", 0, ++ RK3588_CLKGATE_CON(49), 10, GFLAGS), ++ GATE(CLK_ISP0_CORE_VICAP, "clk_isp0_core_vicap", "clk_isp0_core", 0, ++ RK3588_CLKGATE_CON(49), 11, GFLAGS), ++ GATE(ACLK_ISP0, "aclk_isp0", "aclk_vi_root", 0, ++ RK3588_CLKGATE_CON(49), 12, GFLAGS), ++ GATE(HCLK_ISP0, "hclk_isp0", "hclk_vi_root", 0, ++ RK3588_CLKGATE_CON(49), 13, GFLAGS), ++ COMPOSITE(DCLK_VICAP, "dclk_vicap", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(107), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(49), 6, GFLAGS), ++ GATE(ACLK_VICAP, "aclk_vicap", "aclk_vi_root", 0, ++ RK3588_CLKGATE_CON(49), 7, GFLAGS), ++ GATE(HCLK_VICAP, "hclk_vicap", "hclk_vi_root", 0, ++ RK3588_CLKGATE_CON(49), 8, GFLAGS), + -+ pll->dev = dev; -+ pll->regmap = regmap; -+ pll->reg = reg; -+ pll->pd_shift = pd_shift; -+ pll->dsmpd_shift = dsmpd_shift; -+ pll->lock_shift = lock_shift; -+ pll->hw.init = &init; ++ /* vo0 */ ++ COMPOSITE(ACLK_VO0_ROOT, "aclk_vo0_root", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(116), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(55), 0, GFLAGS), ++ COMPOSITE_NODIV(HCLK_VO0_ROOT, "hclk_vo0_root", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(116), 6, 2, MFLAGS, ++ RK3588_CLKGATE_CON(55), 1, GFLAGS), ++ COMPOSITE_NODIV(HCLK_VO0_S_ROOT, "hclk_vo0_s_root", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(116), 8, 2, MFLAGS, ++ RK3588_CLKGATE_CON(55), 2, GFLAGS), ++ COMPOSITE_NODIV(PCLK_VO0_ROOT, "pclk_vo0_root", mux_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(116), 10, 2, MFLAGS, ++ RK3588_CLKGATE_CON(55), 3, GFLAGS), ++ COMPOSITE_NODIV(PCLK_VO0_S_ROOT, "pclk_vo0_s_root", mux_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(116), 12, 2, MFLAGS, ++ RK3588_CLKGATE_CON(55), 4, GFLAGS), ++ GATE(PCLK_DP0, "pclk_dp0", "pclk_vo0_root", 0, ++ RK3588_CLKGATE_CON(56), 4, GFLAGS), ++ GATE(PCLK_DP1, "pclk_dp1", "pclk_vo0_root", 0, ++ RK3588_CLKGATE_CON(56), 5, GFLAGS), ++ GATE(PCLK_S_DP0, "pclk_s_dp0", "pclk_vo0_s_root", 0, ++ RK3588_CLKGATE_CON(56), 6, GFLAGS), ++ GATE(PCLK_S_DP1, "pclk_s_dp1", "pclk_vo0_s_root", 0, ++ RK3588_CLKGATE_CON(56), 7, GFLAGS), ++ GATE(CLK_DP0, "clk_dp0", "aclk_vo0_root", 0, ++ RK3588_CLKGATE_CON(56), 8, GFLAGS), ++ GATE(CLK_DP1, "clk_dp1", "aclk_vo0_root", 0, ++ RK3588_CLKGATE_CON(56), 9, GFLAGS), ++ GATE(HCLK_HDCP_KEY0, "hclk_hdcp_key0", "hclk_vo0_s_root", 0, ++ RK3588_CLKGATE_CON(55), 11, GFLAGS), ++ GATE(PCLK_HDCP0, "pclk_hdcp0", "pclk_vo0_root", 0, ++ RK3588_CLKGATE_CON(55), 14, GFLAGS), ++ GATE(ACLK_TRNG0, "aclk_trng0", "aclk_vo0_root", 0, ++ RK3588_CLKGATE_CON(56), 0, GFLAGS), ++ GATE(PCLK_TRNG0, "pclk_trng0", "pclk_vo0_root", 0, ++ RK3588_CLKGATE_CON(56), 1, GFLAGS), ++ GATE(PCLK_VO0GRF, "pclk_vo0grf", "pclk_vo0_root", CLK_IGNORE_UNUSED, ++ RK3588_CLKGATE_CON(55), 10, GFLAGS), ++ COMPOSITE(CLK_I2S4_8CH_TX_SRC, "clk_i2s4_8ch_tx_src", gpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(118), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(56), 11, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S4_8CH_TX_FRAC, "clk_i2s4_8ch_tx_frac", "clk_i2s4_8ch_tx_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(119), 0, ++ RK3588_CLKGATE_CON(56), 12, GFLAGS, ++ &rk3588_i2s4_8ch_tx_fracmux), ++ GATE(MCLK_I2S4_8CH_TX, "mclk_i2s4_8ch_tx", "clk_i2s4_8ch_tx", 0, ++ RK3588_CLKGATE_CON(56), 13, GFLAGS), ++ COMPOSITE(CLK_I2S8_8CH_TX_SRC, "clk_i2s8_8ch_tx_src", gpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(120), 8, 1, MFLAGS, 3, 5, DFLAGS, ++ RK3588_CLKGATE_CON(56), 15, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S8_8CH_TX_FRAC, "clk_i2s8_8ch_tx_frac", "clk_i2s8_8ch_tx_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(121), 0, ++ RK3588_CLKGATE_CON(57), 0, GFLAGS, ++ &rk3588_i2s8_8ch_tx_fracmux), ++ GATE(MCLK_I2S8_8CH_TX, "mclk_i2s8_8ch_tx", "clk_i2s8_8ch_tx", 0, ++ RK3588_CLKGATE_CON(57), 1, GFLAGS), ++ COMPOSITE(CLK_SPDIF2_DP0_SRC, "clk_spdif2_dp0_src", gpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(122), 8, 1, MFLAGS, 3, 5, DFLAGS, ++ RK3588_CLKGATE_CON(57), 3, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_SPDIF2_DP0_FRAC, "clk_spdif2_dp0_frac", "clk_spdif2_dp0_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(123), 0, ++ RK3588_CLKGATE_CON(57), 4, GFLAGS, ++ &rk3588_spdif2_dp0_fracmux), ++ GATE(MCLK_SPDIF2_DP0, "mclk_spdif2_dp0", "clk_spdif2_dp0", 0, ++ RK3588_CLKGATE_CON(57), 5, GFLAGS), ++ GATE(MCLK_SPDIF2, "mclk_spdif2", "clk_spdif2_dp0", 0, ++ RK3588_CLKGATE_CON(57), 6, GFLAGS), ++ COMPOSITE(CLK_SPDIF5_DP1_SRC, "clk_spdif5_dp1_src", gpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(124), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(57), 8, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_SPDIF5_DP1_FRAC, "clk_spdif5_dp1_frac", "clk_spdif5_dp1_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(125), 0, ++ RK3588_CLKGATE_CON(57), 9, GFLAGS, ++ &rk3588_spdif5_dp1_fracmux), ++ GATE(MCLK_SPDIF5_DP1, "mclk_spdif5_dp1", "clk_spdif5_dp1", 0, ++ RK3588_CLKGATE_CON(57), 10, GFLAGS), ++ GATE(MCLK_SPDIF5, "mclk_spdif5", "clk_spdif5_dp1", 0, ++ RK3588_CLKGATE_CON(57), 11, GFLAGS), ++ COMPOSITE_NOMUX(CLK_AUX16M_0, "clk_aux16m_0", "gpll", 0, ++ RK3588_CLKSEL_CON(117), 0, 8, DFLAGS, ++ RK3588_CLKGATE_CON(56), 2, GFLAGS), ++ COMPOSITE_NOMUX(CLK_AUX16M_1, "clk_aux16m_1", "gpll", 0, ++ RK3588_CLKSEL_CON(117), 8, 8, DFLAGS, ++ RK3588_CLKGATE_CON(56), 3, GFLAGS), + -+ return devm_clk_register(dev, &pll->hw); -+} -+EXPORT_SYMBOL_GPL(devm_clk_regmap_register_pll); -diff --git a/drivers/clk/rockchip-oh/regmap/clk-regmap.h b/drivers/clk/rockchip-oh/regmap/clk-regmap.h -new file mode 100644 -index 000000000..4626e1982 ---- /dev/null -+++ b/drivers/clk/rockchip-oh/regmap/clk-regmap.h -@@ -0,0 +1,308 @@ -+/* -+ * Copyright (c) 2017 Rockchip Electronics Co. Ltd. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ ++ /* vo1 */ ++ COMPOSITE_HALFDIV(CLK_HDMITRX_REFSRC, "clk_hdmitrx_refsrc", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(157), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(65), 9, GFLAGS), ++ COMPOSITE(ACLK_HDCP1_ROOT, "aclk_hdcp1_root", aclk_hdcp1_root_p, 0, ++ RK3588_CLKSEL_CON(128), 5, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(59), 0, GFLAGS), ++ COMPOSITE(ACLK_HDMIRX_ROOT, "aclk_hdmirx_root", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(128), 12, 1, MFLAGS, 7, 5, DFLAGS, ++ RK3588_CLKGATE_CON(59), 1, GFLAGS), ++ COMPOSITE_NODIV(HCLK_VO1_ROOT, "hclk_vo1_root", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(128), 13, 2, MFLAGS, ++ RK3588_CLKGATE_CON(59), 2, GFLAGS), ++ COMPOSITE_NODIV(HCLK_VO1_S_ROOT, "hclk_vo1_s_root", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(129), 0, 2, MFLAGS, ++ RK3588_CLKGATE_CON(59), 3, GFLAGS), ++ COMPOSITE_NODIV(PCLK_VO1_ROOT, "pclk_vo1_root", mux_150m_100m_24m_p, 0, ++ RK3588_CLKSEL_CON(129), 2, 2, MFLAGS, ++ RK3588_CLKGATE_CON(59), 4, GFLAGS), ++ COMPOSITE_NODIV(PCLK_VO1_S_ROOT, "pclk_vo1_s_root", mux_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(129), 4, 2, MFLAGS, ++ RK3588_CLKGATE_CON(59), 5, GFLAGS), ++ COMPOSITE(ACLK_VOP_ROOT, "aclk_vop_root", gpll_cpll_dmyaupll_npll_spll_p, 0, ++ RK3588_CLKSEL_CON(110), 5, 3, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(52), 0, GFLAGS), ++ FACTOR(ACLK_VOP_DIV2_SRC, "aclk_vop_div2_src", "aclk_vop_root", 0, 1, 2), ++ COMPOSITE_NODIV(ACLK_VOP_LOW_ROOT, "aclk_vop_low_root", mux_400m_200m_100m_24m_p, 0, ++ RK3588_CLKSEL_CON(110), 8, 2, MFLAGS, ++ RK3588_CLKGATE_CON(52), 1, GFLAGS), ++ COMPOSITE_NODIV(HCLK_VOP_ROOT, "hclk_vop_root", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(110), 10, 2, MFLAGS, ++ RK3588_CLKGATE_CON(52), 2, GFLAGS), ++ COMPOSITE_NODIV(PCLK_VOP_ROOT, "pclk_vop_root", mux_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(110), 12, 2, MFLAGS, ++ RK3588_CLKGATE_CON(52), 3, GFLAGS), ++ COMPOSITE(ACLK_VO1USB_TOP_ROOT, "aclk_vo1usb_top_root", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(170), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(74), 0, GFLAGS), ++ COMPOSITE_NODIV(HCLK_VO1USB_TOP_ROOT, "hclk_vo1usb_top_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3588_CLKSEL_CON(170), 6, 2, MFLAGS, ++ RK3588_CLKGATE_CON(74), 2, GFLAGS), ++ COMPOSITE_NODIV(ACLK_VOP, "aclk_vop", aclk_vop_sub_src_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(115), 9, 1, MFLAGS, ++ RK3588_CLKGATE_CON(52), 9, GFLAGS), ++ GATE(PCLK_EDP0, "pclk_edp0", "pclk_vo1_root", 0, ++ RK3588_CLKGATE_CON(62), 0, GFLAGS), ++ GATE(CLK_EDP0_24M, "clk_edp0_24m", "xin24m", 0, ++ RK3588_CLKGATE_CON(62), 1, GFLAGS), ++ COMPOSITE_NODIV(CLK_EDP0_200M, "clk_edp0_200m", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(140), 1, 2, MFLAGS, ++ RK3588_CLKGATE_CON(62), 2, GFLAGS), ++ GATE(PCLK_EDP1, "pclk_edp1", "pclk_vo1_root", 0, ++ RK3588_CLKGATE_CON(62), 3, GFLAGS), ++ GATE(CLK_EDP1_24M, "clk_edp1_24m", "xin24m", 0, ++ RK3588_CLKGATE_CON(62), 4, GFLAGS), ++ COMPOSITE_NODIV(CLK_EDP1_200M, "clk_edp1_200m", mux_200m_100m_50m_24m_p, 0, ++ RK3588_CLKSEL_CON(140), 3, 2, MFLAGS, ++ RK3588_CLKGATE_CON(62), 5, GFLAGS), ++ GATE(HCLK_HDCP_KEY1, "hclk_hdcp_key1", "hclk_vo1_s_root", 0, ++ RK3588_CLKGATE_CON(60), 4, GFLAGS), ++ GATE(PCLK_HDCP1, "pclk_hdcp1", "pclk_vo1_root", 0, ++ RK3588_CLKGATE_CON(60), 7, GFLAGS), ++ GATE(ACLK_HDMIRX, "aclk_hdmirx", "aclk_hdmirx_root", 0, ++ RK3588_CLKGATE_CON(61), 9, GFLAGS), ++ GATE(PCLK_HDMIRX, "pclk_hdmirx", "pclk_vo1_root", 0, ++ RK3588_CLKGATE_CON(61), 10, GFLAGS), ++ GATE(CLK_HDMIRX_REF, "clk_hdmirx_ref", "aclk_hdcp1_root", 0, ++ RK3588_CLKGATE_CON(61), 11, GFLAGS), ++ COMPOSITE(CLK_HDMIRX_AUD_SRC, "clk_hdmirx_aud_src", gpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(138), 8, 1, MFLAGS, 0, 8, DFLAGS, ++ RK3588_CLKGATE_CON(61), 12, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_HDMIRX_AUD_FRAC, "clk_hdmirx_aud_frac", "clk_hdmirx_aud_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(139), 0, ++ RK3588_CLKGATE_CON(61), 13, GFLAGS, ++ &rk3588_hdmirx_aud_fracmux), ++ GATE(CLK_HDMIRX_AUD, "clk_hdmirx_aud", "clk_hdmirx_aud_mux", 0, ++ RK3588_CLKGATE_CON(61), 14, GFLAGS), ++ GATE(PCLK_HDMITX0, "pclk_hdmitx0", "pclk_vo1_root", 0, ++ RK3588_CLKGATE_CON(60), 11, GFLAGS), ++ COMPOSITE(CLK_HDMITX0_EARC, "clk_hdmitx0_earc", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(133), 6, 1, MFLAGS, 1, 5, DFLAGS, ++ RK3588_CLKGATE_CON(60), 15, GFLAGS), ++ GATE(CLK_HDMITX0_REF, "clk_hdmitx0_ref", "aclk_hdcp1_root", 0, ++ RK3588_CLKGATE_CON(61), 0, GFLAGS), ++ GATE(PCLK_HDMITX1, "pclk_hdmitx1", "pclk_vo1_root", 0, ++ RK3588_CLKGATE_CON(61), 2, GFLAGS), ++ COMPOSITE(CLK_HDMITX1_EARC, "clk_hdmitx1_earc", gpll_cpll_p, 0, ++ RK3588_CLKSEL_CON(136), 6, 1, MFLAGS, 1, 5, DFLAGS, ++ RK3588_CLKGATE_CON(61), 6, GFLAGS), ++ GATE(CLK_HDMITX1_REF, "clk_hdmitx1_ref", "aclk_hdcp1_root", 0, ++ RK3588_CLKGATE_CON(61), 7, GFLAGS), ++ GATE(ACLK_TRNG1, "aclk_trng1", "aclk_hdcp1_root", 0, ++ RK3588_CLKGATE_CON(60), 9, GFLAGS), ++ GATE(PCLK_TRNG1, "pclk_trng1", "pclk_vo1_root", 0, ++ RK3588_CLKGATE_CON(60), 10, GFLAGS), ++ GATE(0, "pclk_vo1grf", "pclk_vo1_root", CLK_IGNORE_UNUSED, ++ RK3588_CLKGATE_CON(59), 12, GFLAGS), ++ GATE(PCLK_S_EDP0, "pclk_s_edp0", "pclk_vo1_s_root", 0, ++ RK3588_CLKGATE_CON(59), 14, GFLAGS), ++ GATE(PCLK_S_EDP1, "pclk_s_edp1", "pclk_vo1_s_root", 0, ++ RK3588_CLKGATE_CON(59), 15, GFLAGS), ++ GATE(PCLK_S_HDMIRX, "pclk_s_hdmirx", "pclk_vo1_s_root", 0, ++ RK3588_CLKGATE_CON(65), 8, GFLAGS), ++ COMPOSITE(CLK_I2S10_8CH_RX_SRC, "clk_i2s10_8ch_rx_src", gpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(155), 8, 1, MFLAGS, 3, 5, DFLAGS, ++ RK3588_CLKGATE_CON(65), 5, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S10_8CH_RX_FRAC, "clk_i2s10_8ch_rx_frac", "clk_i2s10_8ch_rx_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(156), 0, ++ RK3588_CLKGATE_CON(65), 6, GFLAGS, ++ &rk3588_i2s10_8ch_rx_fracmux), ++ GATE(MCLK_I2S10_8CH_RX, "mclk_i2s10_8ch_rx", "clk_i2s10_8ch_rx", 0, ++ RK3588_CLKGATE_CON(65), 7, GFLAGS), ++ COMPOSITE(CLK_I2S7_8CH_RX_SRC, "clk_i2s7_8ch_rx_src", gpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(129), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RK3588_CLKGATE_CON(60), 1, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S7_8CH_RX_FRAC, "clk_i2s7_8ch_rx_frac", "clk_i2s7_8ch_rx_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(130), 0, ++ RK3588_CLKGATE_CON(60), 2, GFLAGS, ++ &rk3588_i2s7_8ch_rx_fracmux), ++ GATE(MCLK_I2S7_8CH_RX, "mclk_i2s7_8ch_rx", "clk_i2s7_8ch_rx", 0, ++ RK3588_CLKGATE_CON(60), 3, GFLAGS), ++ COMPOSITE(CLK_I2S9_8CH_RX_SRC, "clk_i2s9_8ch_rx_src", gpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(153), 12, 1, MFLAGS, 7, 5, DFLAGS, ++ RK3588_CLKGATE_CON(65), 1, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S9_8CH_RX_FRAC, "clk_i2s9_8ch_rx_frac", "clk_i2s9_8ch_rx_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(154), 0, ++ RK3588_CLKGATE_CON(65), 2, GFLAGS, ++ &rk3588_i2s9_8ch_rx_fracmux), ++ GATE(MCLK_I2S9_8CH_RX, "mclk_i2s9_8ch_rx", "clk_i2s9_8ch_rx", 0, ++ RK3588_CLKGATE_CON(65), 3, GFLAGS), ++ COMPOSITE(CLK_I2S5_8CH_TX_SRC, "clk_i2s5_8ch_tx_src", gpll_aupll_p, CLK_SET_RATE_NO_REPARENT, ++ RK3588_CLKSEL_CON(140), 10, 1, MFLAGS, 5, 5, DFLAGS, ++ RK3588_CLKGATE_CON(62), 6, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S5_8CH_TX_FRAC, "clk_i2s5_8ch_tx_frac", "clk_i2s5_8ch_tx_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(141), 0, ++ RK3588_CLKGATE_CON(62), 7, GFLAGS, ++ &rk3588_i2s5_8ch_tx_fracmux), ++ GATE(MCLK_I2S5_8CH_TX, "mclk_i2s5_8ch_tx", "clk_i2s5_8ch_tx", 0, ++ RK3588_CLKGATE_CON(62), 8, GFLAGS), ++ COMPOSITE(CLK_I2S6_8CH_TX_SRC, "clk_i2s6_8ch_tx_src", gpll_aupll_p, CLK_SET_RATE_NO_REPARENT, ++ RK3588_CLKSEL_CON(144), 8, 1, MFLAGS, 3, 5, DFLAGS, ++ RK3588_CLKGATE_CON(62), 13, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S6_8CH_TX_FRAC, "clk_i2s6_8ch_tx_frac", "clk_i2s6_8ch_tx_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(145), 0, ++ RK3588_CLKGATE_CON(62), 14, GFLAGS, ++ &rk3588_i2s6_8ch_tx_fracmux), ++ GATE(MCLK_I2S6_8CH_TX, "mclk_i2s6_8ch_tx", "clk_i2s6_8ch_tx", 0, ++ RK3588_CLKGATE_CON(62), 15, GFLAGS), ++ COMPOSITE(CLK_I2S6_8CH_RX_SRC, "clk_i2s6_8ch_rx_src", gpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(146), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(63), 0, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S6_8CH_RX_FRAC, "clk_i2s6_8ch_rx_frac", "clk_i2s6_8ch_rx_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(147), 0, ++ RK3588_CLKGATE_CON(63), 1, GFLAGS, ++ &rk3588_i2s6_8ch_rx_fracmux), ++ GATE(MCLK_I2S6_8CH_RX, "mclk_i2s6_8ch_rx", "clk_i2s6_8ch_rx", 0, ++ RK3588_CLKGATE_CON(63), 2, GFLAGS), ++ MUX(I2S6_8CH_MCLKOUT, "i2s6_8ch_mclkout", i2s6_8ch_mclkout_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(148), 2, 2, MFLAGS), ++ COMPOSITE(CLK_SPDIF3_SRC, "clk_spdif3_src", gpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(148), 9, 1, MFLAGS, 4, 5, DFLAGS, ++ RK3588_CLKGATE_CON(63), 5, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_SPDIF3_FRAC, "clk_spdif3_frac", "clk_spdif3_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(149), 0, ++ RK3588_CLKGATE_CON(63), 6, GFLAGS, ++ &rk3588_spdif3_fracmux), ++ GATE(MCLK_SPDIF3, "mclk_spdif3", "clk_spdif3", 0, ++ RK3588_CLKGATE_CON(63), 7, GFLAGS), ++ COMPOSITE(CLK_SPDIF4_SRC, "clk_spdif4_src", gpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(150), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(63), 9, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_SPDIF4_FRAC, "clk_spdif4_frac", "clk_spdif4_src", CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(151), 0, ++ RK3588_CLKGATE_CON(63), 10, GFLAGS, ++ &rk3588_spdif4_fracmux), ++ GATE(MCLK_SPDIF4, "mclk_spdif4", "clk_spdif4", 0, ++ RK3588_CLKGATE_CON(63), 11, GFLAGS), ++ COMPOSITE(MCLK_SPDIFRX0, "mclk_spdifrx0", gpll_cpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(152), 7, 2, MFLAGS, 2, 5, DFLAGS, ++ RK3588_CLKGATE_CON(63), 13, GFLAGS), ++ COMPOSITE(MCLK_SPDIFRX1, "mclk_spdifrx1", gpll_cpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(152), 14, 2, MFLAGS, 9, 5, DFLAGS, ++ RK3588_CLKGATE_CON(63), 15, GFLAGS), ++ COMPOSITE(MCLK_SPDIFRX2, "mclk_spdifrx2", gpll_cpll_aupll_p, 0, ++ RK3588_CLKSEL_CON(153), 5, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(64), 1, GFLAGS), ++ GATE(CLK_HDMIHDP0, "clk_hdmihdp0", "xin24m", 0, ++ RK3588_CLKGATE_CON(73), 12, GFLAGS), ++ GATE(CLK_HDMIHDP1, "clk_hdmihdp1", "xin24m", 0, ++ RK3588_CLKGATE_CON(73), 13, GFLAGS), ++ GATE(PCLK_HDPTX0, "pclk_hdptx0", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(72), 5, GFLAGS), ++ GATE(PCLK_HDPTX1, "pclk_hdptx1", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(72), 6, GFLAGS), ++ GATE(PCLK_USBDPPHY0, "pclk_usbdpphy0", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(72), 2, GFLAGS), ++ GATE(PCLK_USBDPPHY1, "pclk_usbdpphy1", "pclk_top_root", 0, ++ RK3588_CLKGATE_CON(72), 4, GFLAGS), ++ GATE(HCLK_VOP, "hclk_vop", "hclk_vop_root", 0, ++ RK3588_CLKGATE_CON(52), 8, GFLAGS), ++ COMPOSITE(DCLK_VOP0_SRC, "dclk_vop0_src", gpll_cpll_v0pll_aupll_p, CLK_SET_RATE_NO_REPARENT, ++ RK3588_CLKSEL_CON(111), 7, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3588_CLKGATE_CON(52), 10, GFLAGS), ++ COMPOSITE(DCLK_VOP1_SRC, "dclk_vop1_src", gpll_cpll_v0pll_aupll_p, CLK_SET_RATE_NO_REPARENT, ++ RK3588_CLKSEL_CON(111), 14, 2, MFLAGS, 9, 5, DFLAGS, ++ RK3588_CLKGATE_CON(52), 11, GFLAGS), ++ COMPOSITE(DCLK_VOP2_SRC, "dclk_vop2_src", gpll_cpll_v0pll_aupll_p, CLK_SET_RATE_NO_REPARENT, ++ RK3588_CLKSEL_CON(112), 5, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3588_CLKGATE_CON(52), 12, GFLAGS), ++ COMPOSITE_NODIV(DCLK_VOP0, "dclk_vop0", dclk_vop0_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3588_CLKSEL_CON(112), 7, 2, MFLAGS, ++ RK3588_CLKGATE_CON(52), 13, GFLAGS), ++ COMPOSITE_NODIV(DCLK_VOP1, "dclk_vop1", dclk_vop1_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3588_CLKSEL_CON(112), 9, 2, MFLAGS, ++ RK3588_CLKGATE_CON(53), 0, GFLAGS), ++ COMPOSITE_NODIV(DCLK_VOP2, "dclk_vop2", dclk_vop2_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3588_CLKSEL_CON(112), 11, 2, MFLAGS, ++ RK3588_CLKGATE_CON(53), 1, GFLAGS), ++ COMPOSITE(DCLK_VOP3, "dclk_vop3", gpll_cpll_v0pll_aupll_p, CLK_SET_RATE_NO_REPARENT, ++ RK3588_CLKSEL_CON(113), 7, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3588_CLKGATE_CON(53), 2, GFLAGS), ++ GATE(PCLK_DSIHOST0, "pclk_dsihost0", "pclk_vop_root", 0, ++ RK3588_CLKGATE_CON(53), 4, GFLAGS), ++ GATE(PCLK_DSIHOST1, "pclk_dsihost1", "pclk_vop_root", 0, ++ RK3588_CLKGATE_CON(53), 5, GFLAGS), ++ COMPOSITE(CLK_DSIHOST0, "clk_dsihost0", gpll_cpll_v0pll_spll_p, 0, ++ RK3588_CLKSEL_CON(114), 7, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3588_CLKGATE_CON(53), 6, GFLAGS), ++ COMPOSITE(CLK_DSIHOST1, "clk_dsihost1", gpll_cpll_v0pll_spll_p, 0, ++ RK3588_CLKSEL_CON(115), 7, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3588_CLKGATE_CON(53), 7, GFLAGS), ++ GATE(CLK_VOP_PMU, "clk_vop_pmu", "xin24m", CLK_IGNORE_UNUSED, ++ RK3588_CLKGATE_CON(53), 8, GFLAGS), ++ GATE(ACLK_VOP_DOBY, "aclk_vop_doby", "aclk_vop_root", 0, ++ RK3588_CLKGATE_CON(53), 10, GFLAGS), ++ GATE(CLK_USBDP_PHY0_IMMORTAL, "clk_usbdp_phy0_immortal", "xin24m", CLK_IGNORE_UNUSED, ++ RK3588_CLKGATE_CON(2), 8, GFLAGS), ++ GATE(CLK_USBDP_PHY1_IMMORTAL, "clk_usbdp_phy1_immortal", "xin24m", CLK_IGNORE_UNUSED, ++ RK3588_CLKGATE_CON(2), 15, GFLAGS), + -+#ifndef __CLK_REGMAP_H__ -+#define __CLK_REGMAP_H__ ++ GATE(CLK_REF_PIPE_PHY0_OSC_SRC, "clk_ref_pipe_phy0_osc_src", "xin24m", 0, ++ RK3588_CLKGATE_CON(77), 0, GFLAGS), ++ GATE(CLK_REF_PIPE_PHY1_OSC_SRC, "clk_ref_pipe_phy1_osc_src", "xin24m", 0, ++ RK3588_CLKGATE_CON(77), 1, GFLAGS), ++ GATE(CLK_REF_PIPE_PHY2_OSC_SRC, "clk_ref_pipe_phy2_osc_src", "xin24m", 0, ++ RK3588_CLKGATE_CON(77), 2, GFLAGS), ++ COMPOSITE_NOMUX(CLK_REF_PIPE_PHY0_PLL_SRC, "clk_ref_pipe_phy0_pll_src", "ppll", 0, ++ RK3588_CLKSEL_CON(176), 0, 6, DFLAGS, ++ RK3588_CLKGATE_CON(77), 3, GFLAGS), ++ COMPOSITE_NOMUX(CLK_REF_PIPE_PHY1_PLL_SRC, "clk_ref_pipe_phy1_pll_src", "ppll", 0, ++ RK3588_CLKSEL_CON(176), 6, 6, DFLAGS, ++ RK3588_CLKGATE_CON(77), 4, GFLAGS), ++ COMPOSITE_NOMUX(CLK_REF_PIPE_PHY2_PLL_SRC, "clk_ref_pipe_phy2_pll_src", "ppll", 0, ++ RK3588_CLKSEL_CON(177), 0, 6, DFLAGS, ++ RK3588_CLKGATE_CON(77), 5, GFLAGS), ++ MUX(CLK_REF_PIPE_PHY0, "clk_ref_pipe_phy0", clk_ref_pipe_phy0_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(177), 6, 1, MFLAGS), ++ MUX(CLK_REF_PIPE_PHY1, "clk_ref_pipe_phy1", clk_ref_pipe_phy1_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(177), 7, 1, MFLAGS), ++ MUX(CLK_REF_PIPE_PHY2, "clk_ref_pipe_phy2", clk_ref_pipe_phy2_p, CLK_SET_RATE_PARENT, ++ RK3588_CLKSEL_CON(177), 8, 1, MFLAGS), + -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ /* pmu */ ++ COMPOSITE(CLK_PMU1_300M_SRC, "clk_pmu1_300m_src", pmu_300m_24m_p, 0, ++ RK3588_PMU_CLKSEL_CON(0), 15, 1, MFLAGS, 10, 5, DFLAGS, ++ RK3588_PMU_CLKGATE_CON(0), 3, GFLAGS), ++ COMPOSITE(CLK_PMU1_400M_SRC, "clk_pmu1_400m_src", pmu_400m_24m_p, 0, ++ RK3588_PMU_CLKSEL_CON(1), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_PMU_CLKGATE_CON(0), 4, GFLAGS), ++ COMPOSITE_NOMUX(CLK_PMU1_50M_SRC, "clk_pmu1_50m_src", "clk_pmu1_400m_src", 0, ++ RK3588_PMU_CLKSEL_CON(0), 0, 4, DFLAGS, ++ RK3588_PMU_CLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE_NOMUX(CLK_PMU1_100M_SRC, "clk_pmu1_100m_src", "clk_pmu1_400m_src", 0, ++ RK3588_PMU_CLKSEL_CON(0), 4, 3, DFLAGS, ++ RK3588_PMU_CLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NOMUX(CLK_PMU1_200M_SRC, "clk_pmu1_200m_src", "clk_pmu1_400m_src", 0, ++ RK3588_PMU_CLKSEL_CON(0), 7, 3, DFLAGS, ++ RK3588_PMU_CLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE_NODIV(HCLK_PMU1_ROOT, "hclk_pmu1_root", hclk_pmu1_root_p, CLK_IS_CRITICAL, ++ RK3588_PMU_CLKSEL_CON(1), 6, 2, MFLAGS, ++ RK3588_PMU_CLKGATE_CON(0), 5, GFLAGS), ++ COMPOSITE_NODIV(PCLK_PMU1_ROOT, "pclk_pmu1_root", pmu_100m_50m_24m_src_p, CLK_IS_CRITICAL, ++ RK3588_PMU_CLKSEL_CON(1), 8, 2, MFLAGS, ++ RK3588_PMU_CLKGATE_CON(0), 7, GFLAGS), ++ GATE(PCLK_PMU0_ROOT, "pclk_pmu0_root", "pclk_pmu1_root", CLK_IS_CRITICAL, ++ RK3588_PMU_CLKGATE_CON(5), 0, GFLAGS), ++ COMPOSITE_NODIV(HCLK_PMU_CM0_ROOT, "hclk_pmu_cm0_root", hclk_pmu_cm0_root_p, CLK_IS_CRITICAL, ++ RK3588_PMU_CLKSEL_CON(1), 10, 2, MFLAGS, ++ RK3588_PMU_CLKGATE_CON(0), 8, GFLAGS), ++ GATE(CLK_PMU0, "clk_pmu0", "xin24m", CLK_IS_CRITICAL, ++ RK3588_PMU_CLKGATE_CON(5), 1, GFLAGS), ++ GATE(PCLK_PMU0, "pclk_pmu0", "pclk_pmu0_root", CLK_IS_CRITICAL, ++ RK3588_PMU_CLKGATE_CON(5), 2, GFLAGS), ++ GATE(PCLK_PMU0IOC, "pclk_pmu0ioc", "pclk_pmu0_root", CLK_IS_CRITICAL, ++ RK3588_PMU_CLKGATE_CON(5), 4, GFLAGS), ++ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pmu0_root", 0, ++ RK3588_PMU_CLKGATE_CON(5), 5, GFLAGS), ++ COMPOSITE_NODIV(DBCLK_GPIO0, "dbclk_gpio0", mux_24m_32k_p, 0, ++ RK3588_PMU_CLKSEL_CON(17), 0, 1, MFLAGS, ++ RK3588_PMU_CLKGATE_CON(5), 6, GFLAGS), ++ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_pmu0_root", 0, ++ RK3588_PMU_CLKGATE_CON(2), 1, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C0, "clk_i2c0", pmu_200m_100m_p, 0, ++ RK3588_PMU_CLKSEL_CON(3), 6, 1, MFLAGS, ++ RK3588_PMU_CLKGATE_CON(2), 2, GFLAGS), ++ GATE(HCLK_I2S1_8CH, "hclk_i2s1_8ch", "hclk_pmu1_root", 0, ++ RK3588_PMU_CLKGATE_CON(2), 7, GFLAGS), ++ COMPOSITE_NOMUX(CLK_I2S1_8CH_TX_SRC, "clk_i2s1_8ch_tx_src", "cpll", 0, ++ RK3588_PMU_CLKSEL_CON(5), 2, 5, DFLAGS, ++ RK3588_PMU_CLKGATE_CON(2), 8, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S1_8CH_TX_FRAC, "clk_i2s1_8ch_tx_frac", "clk_i2s1_8ch_tx_src", CLK_SET_RATE_PARENT, ++ RK3588_PMU_CLKSEL_CON(6), 0, ++ RK3588_PMU_CLKGATE_CON(2), 9, GFLAGS, ++ &rk3588_i2s1_8ch_tx_fracmux), ++ GATE(MCLK_I2S1_8CH_TX, "mclk_i2s1_8ch_tx", "clk_i2s1_8ch_tx", 0, ++ RK3588_PMU_CLKGATE_CON(2), 10, GFLAGS), ++ COMPOSITE_NOMUX(CLK_I2S1_8CH_RX_SRC, "clk_i2s1_8ch_rx_src", "cpll", 0, ++ RK3588_PMU_CLKSEL_CON(7), 2, 5, DFLAGS, ++ RK3588_PMU_CLKGATE_CON(2), 11, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S1_8CH_RX_FRAC, "clk_i2s1_8ch_rx_frac", "clk_i2s1_8ch_rx_src", CLK_SET_RATE_PARENT, ++ RK3588_PMU_CLKSEL_CON(8), 0, ++ RK3588_PMU_CLKGATE_CON(2), 12, GFLAGS, ++ &rk3588_i2s1_8ch_rx_fracmux), ++ GATE(MCLK_I2S1_8CH_RX, "mclk_i2s1_8ch_rx", "clk_i2s1_8ch_rx", 0, ++ RK3588_PMU_CLKGATE_CON(2), 13, GFLAGS), ++ MUX(I2S1_8CH_MCLKOUT, "i2s1_8ch_mclkout", i2s1_8ch_mclkout_p, CLK_SET_RATE_PARENT, ++ RK3588_PMU_CLKSEL_CON(9), 2, 2, MFLAGS), ++ GATE(PCLK_PMU1, "pclk_pmu1", "pclk_pmu0_root", CLK_IS_CRITICAL, ++ RK3588_PMU_CLKGATE_CON(1), 0, GFLAGS), ++ GATE(CLK_DDR_FAIL_SAFE, "clk_ddr_fail_safe", "clk_pmu0", CLK_IGNORE_UNUSED, ++ RK3588_PMU_CLKGATE_CON(1), 1, GFLAGS), ++ GATE(CLK_PMU1, "clk_pmu1", "clk_pmu0", CLK_IS_CRITICAL, ++ RK3588_PMU_CLKGATE_CON(1), 3, GFLAGS), ++ GATE(HCLK_PDM0, "hclk_pdm0", "hclk_pmu1_root", 0, ++ RK3588_PMU_CLKGATE_CON(2), 14, GFLAGS), ++ COMPOSITE_NODIV(MCLK_PDM0, "mclk_pdm0", mclk_pdm0_p, 0, ++ RK3588_PMU_CLKSEL_CON(9), 4, 1, MFLAGS, ++ RK3588_PMU_CLKGATE_CON(2), 15, GFLAGS), ++ GATE(HCLK_VAD, "hclk_vad", "hclk_pmu1_root", 0, ++ RK3588_PMU_CLKGATE_CON(3), 0, GFLAGS), ++ GATE(FCLK_PMU_CM0_CORE, "fclk_pmu_cm0_core", "hclk_pmu_cm0_root", CLK_IS_CRITICAL, ++ RK3588_PMU_CLKGATE_CON(0), 13, GFLAGS), ++ COMPOSITE(CLK_PMU_CM0_RTC, "clk_pmu_cm0_rtc", mux_24m_32k_p, CLK_IS_CRITICAL, ++ RK3588_PMU_CLKSEL_CON(2), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3588_PMU_CLKGATE_CON(0), 15, GFLAGS), ++ GATE(PCLK_PMU1_IOC, "pclk_pmu1_ioc", "pclk_pmu0_root", CLK_IGNORE_UNUSED, ++ RK3588_PMU_CLKGATE_CON(1), 5, GFLAGS), ++ GATE(PCLK_PMU1PWM, "pclk_pmu1pwm", "pclk_pmu0_root", 0, ++ RK3588_PMU_CLKGATE_CON(1), 12, GFLAGS), ++ COMPOSITE_NODIV(CLK_PMU1PWM, "clk_pmu1pwm", pmu_100m_50m_24m_src_p, 0, ++ RK3588_PMU_CLKSEL_CON(2), 9, 2, MFLAGS, ++ RK3588_PMU_CLKGATE_CON(1), 13, GFLAGS), ++ GATE(CLK_PMU1PWM_CAPTURE, "clk_pmu1pwm_capture", "xin24m", 0, ++ RK3588_PMU_CLKGATE_CON(1), 14, GFLAGS), ++ GATE(PCLK_PMU1TIMER, "pclk_pmu1timer", "pclk_pmu0_root", 0, ++ RK3588_PMU_CLKGATE_CON(1), 8, GFLAGS), ++ COMPOSITE_NODIV(CLK_PMU1TIMER_ROOT, "clk_pmu1timer_root", pmu_24m_32k_100m_src_p, 0, ++ RK3588_PMU_CLKSEL_CON(2), 7, 2, MFLAGS, ++ RK3588_PMU_CLKGATE_CON(1), 9, GFLAGS), ++ GATE(CLK_PMU1TIMER0, "clk_pmu1timer0", "clk_pmu1timer_root", 0, ++ RK3588_PMU_CLKGATE_CON(1), 10, GFLAGS), ++ GATE(CLK_PMU1TIMER1, "clk_pmu1timer1", "clk_pmu1timer_root", 0, ++ RK3588_PMU_CLKGATE_CON(1), 11, GFLAGS), ++ COMPOSITE_NOMUX(CLK_UART0_SRC, "clk_uart0_src", "cpll", 0, ++ RK3588_PMU_CLKSEL_CON(3), 7, 5, DFLAGS, ++ RK3588_PMU_CLKGATE_CON(2), 3, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART0_FRAC, "clk_uart0_frac", "clk_uart0_src", CLK_SET_RATE_PARENT, ++ RK3588_PMU_CLKSEL_CON(4), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RK3588_PMU_CLKGATE_CON(2), 4, GFLAGS, ++ &rk3588_uart0_fracmux), ++ GATE(SCLK_UART0, "sclk_uart0", "clk_uart0", 0, ++ RK3588_PMU_CLKGATE_CON(2), 5, GFLAGS), ++ GATE(PCLK_UART0, "pclk_uart0", "pclk_pmu0_root", 0, ++ RK3588_PMU_CLKGATE_CON(2), 6, GFLAGS), ++ GATE(PCLK_PMU1WDT, "pclk_pmu1wdt", "pclk_pmu0_root", 0, ++ RK3588_PMU_CLKGATE_CON(1), 6, GFLAGS), ++ COMPOSITE_NODIV(TCLK_PMU1WDT, "tclk_pmu1wdt", mux_24m_32k_p, 0, ++ RK3588_PMU_CLKSEL_CON(2), 6, 1, MFLAGS, ++ RK3588_PMU_CLKGATE_CON(1), 7, GFLAGS), ++ COMPOSITE(CLK_CR_PARA, "clk_cr_para", mux_24m_ppll_spll_p, 0, ++ RK3588_PMU_CLKSEL_CON(15), 5, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3588_PMU_CLKGATE_CON(4), 11, GFLAGS), ++ COMPOSITE(CLK_USB2PHY_HDPTXRXPHY_REF, "clk_usb2phy_hdptxrxphy_ref", mux_24m_ppll_p, CLK_IS_CRITICAL, ++ RK3588_PMU_CLKSEL_CON(14), 14, 1, MFLAGS, 9, 5, DFLAGS, ++ RK3588_PMU_CLKGATE_CON(4), 7, GFLAGS), ++ COMPOSITE(CLK_USBDPPHY_MIPIDCPPHY_REF, "clk_usbdpphy_mipidcpphy_ref", mux_24m_ppll_spll_p, CLK_IS_CRITICAL, ++ RK3588_PMU_CLKSEL_CON(14), 7, 2, MFLAGS, 0, 7, DFLAGS, ++ RK3588_PMU_CLKGATE_CON(4), 3, GFLAGS), + -+#define UPDATE(x, h, l) (((x) << (l)) & GENMASK((h), (l))) -+#define HIWORD_UPDATE(v, h, l) (((v) << (l)) | (GENMASK((h), (l)) << 16)) ++ GATE(CLK_PHY0_REF_ALT_P, "clk_phy0_ref_alt_p", "ppll", 0, ++ RK3588_PHYREF_ALT_GATE, 0, GFLAGS), ++ GATE(CLK_PHY0_REF_ALT_M, "clk_phy0_ref_alt_m", "ppll", 0, ++ RK3588_PHYREF_ALT_GATE, 1, GFLAGS), ++ GATE(CLK_PHY1_REF_ALT_P, "clk_phy1_ref_alt_p", "ppll", 0, ++ RK3588_PHYREF_ALT_GATE, 2, GFLAGS), ++ GATE(CLK_PHY1_REF_ALT_M, "clk_phy1_ref_alt_m", "ppll", 0, ++ RK3588_PHYREF_ALT_GATE, 3, GFLAGS), + -+struct clk_pll_data { -+ unsigned int id; -+ const char *name; -+ const char *parent_name; -+ u32 reg; -+ u8 pd_shift; -+ u8 dsmpd_shift; -+ u8 lock_shift; -+ unsigned long flags; ++ GATE(HCLK_SPDIFRX0, "hclk_spdifrx0", "hclk_vo1", 0, ++ RK3588_CLKGATE_CON(63), 12, GFLAGS), ++ GATE(HCLK_SPDIFRX1, "hclk_spdifrx1", "hclk_vo1", 0, ++ RK3588_CLKGATE_CON(63), 14, GFLAGS), ++ GATE(HCLK_SPDIFRX2, "hclk_spdifrx2", "hclk_vo1", 0, ++ RK3588_CLKGATE_CON(64), 0, GFLAGS), ++ GATE(HCLK_SPDIF4, "hclk_spdif4", "hclk_vo1", 0, ++ RK3588_CLKGATE_CON(63), 8, GFLAGS), ++ GATE(HCLK_SPDIF3, "hclk_spdif3", "hclk_vo1", 0, ++ RK3588_CLKGATE_CON(63), 4, GFLAGS), ++ GATE(HCLK_I2S6_8CH, "hclk_i2s6_8ch", "hclk_vo1", 0, ++ RK3588_CLKGATE_CON(63), 3, GFLAGS), ++ GATE(HCLK_I2S5_8CH, "hclk_i2s5_8ch", "hclk_vo1", 0, ++ RK3588_CLKGATE_CON(62), 12, GFLAGS), ++ GATE(HCLK_I2S9_8CH, "hclk_i2s9_8ch", "hclk_vo1", 0, ++ RK3588_CLKGATE_CON(65), 0, GFLAGS), ++ GATE(HCLK_I2S7_8CH, "hclk_i2s7_8ch", "hclk_vo1", 0, ++ RK3588_CLKGATE_CON(60), 0, GFLAGS), ++ GATE(HCLK_I2S10_8CH, "hclk_i2s10_8ch", "hclk_vo1", 0, ++ RK3588_CLKGATE_CON(65), 4, GFLAGS), ++ GATE(ACLK_HDCP1, "aclk_hdcp1", "aclk_hdcp1_pre", 0, ++ RK3588_CLKGATE_CON(60), 5, GFLAGS), ++ GATE(HCLK_HDCP1, "hclk_hdcp1", "hclk_vo1", 0, ++ RK3588_CLKGATE_CON(60), 6, GFLAGS), ++ GATE(HCLK_SPDIF5_DP1, "hclk_spdif5_dp1", "hclk_vo0", 0, ++ RK3588_CLKGATE_CON(57), 7, GFLAGS), ++ GATE(HCLK_SPDIF2_DP0, "hclk_spdif2_dp0", "hclk_vo0", 0, ++ RK3588_CLKGATE_CON(57), 2, GFLAGS), ++ GATE(HCLK_I2S8_8CH, "hclk_i2s8_8ch", "hclk_vo0", 0, ++ RK3588_CLKGATE_CON(56), 14, GFLAGS), ++ GATE(HCLK_I2S4_8CH, "hclk_i2s4_8ch", "hclk_vo0", 0, ++ RK3588_CLKGATE_CON(56), 10, GFLAGS), ++ GATE(ACLK_HDCP0, "aclk_hdcp0", "aclk_hdcp0_pre", 0, ++ RK3588_CLKGATE_CON(55), 12, GFLAGS), ++ GATE(HCLK_HDCP0, "hclk_hdcp0", "hclk_vo0", 0, ++ RK3588_CLKGATE_CON(55), 13, GFLAGS), ++ GATE(HCLK_RKVENC1, "hclk_rkvenc1", "hclk_rkvenc1_pre", 0, ++ RK3588_CLKGATE_CON(48), 4, GFLAGS), ++ GATE(ACLK_RKVENC1, "aclk_rkvenc1", "aclk_rkvenc1_pre", 0, ++ RK3588_CLKGATE_CON(48), 5, GFLAGS), ++ GATE(ACLK_VPU, "aclk_vpu", "aclk_vdpu_low_pre", 0, ++ RK3588_CLKGATE_CON(44), 8, GFLAGS), ++ GATE(ACLK_IEP2P0, "aclk_iep2p0", "aclk_vdpu_low_pre", 0, ++ RK3588_CLKGATE_CON(45), 5, GFLAGS), ++ GATE(ACLK_JPEG_ENCODER0, "aclk_jpeg_encoder0", "aclk_vdpu_low_pre", 0, ++ RK3588_CLKGATE_CON(44), 10, GFLAGS), ++ GATE(ACLK_JPEG_ENCODER1, "aclk_jpeg_encoder1", "aclk_vdpu_low_pre", 0, ++ RK3588_CLKGATE_CON(44), 12, GFLAGS), ++ GATE(ACLK_JPEG_ENCODER2, "aclk_jpeg_encoder2", "aclk_vdpu_low_pre", 0, ++ RK3588_CLKGATE_CON(44), 14, GFLAGS), ++ GATE(ACLK_JPEG_ENCODER3, "aclk_jpeg_encoder3", "aclk_vdpu_low_pre", 0, ++ RK3588_CLKGATE_CON(45), 0, GFLAGS), ++ GATE(ACLK_JPEG_DECODER, "aclk_jpeg_decoder", "aclk_jpeg_decoder_pre", 0, ++ RK3588_CLKGATE_CON(45), 2, GFLAGS), ++ GATE(ACLK_USB3OTG1, "aclk_usb3otg1", "aclk_usb", 0, ++ RK3588_CLKGATE_CON(42), 7, GFLAGS), ++ GATE(HCLK_HOST0, "hclk_host0", "hclk_usb", 0, ++ RK3588_CLKGATE_CON(42), 10, GFLAGS), ++ GATE(HCLK_HOST_ARB0, "hclk_host_arb0", "hclk_usb", 0, ++ RK3588_CLKGATE_CON(42), 11, GFLAGS), ++ GATE(HCLK_HOST1, "hclk_host1", "hclk_usb", 0, ++ RK3588_CLKGATE_CON(42), 12, GFLAGS), ++ GATE(HCLK_HOST_ARB1, "hclk_host_arb1", "hclk_usb", 0, ++ RK3588_CLKGATE_CON(42), 13, GFLAGS), ++ GATE(ACLK_USB3OTG0, "aclk_usb3otg0", "aclk_usb", 0, ++ RK3588_CLKGATE_CON(42), 4, GFLAGS), ++ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "scmi_cclk_sd", RK3588_SDMMC_CON0, 1), ++ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "scmi_cclk_sd", RK3588_SDMMC_CON1, 1), ++ GATE(HCLK_SDIO, "hclk_sdio", "hclk_sdio_pre", 0, ++ RK3588_CLKGATE_CON(75), 2, GFLAGS), ++ GATE(HCLK_RKVDEC1, "hclk_rkvdec1", "hclk_rkvdec1_pre", 0, ++ RK3588_CLKGATE_CON(41), 2, GFLAGS), ++ GATE(ACLK_RKVDEC1, "aclk_rkvdec1", "aclk_rkvdec1_pre", 0, ++ RK3588_CLKGATE_CON(41), 3, GFLAGS), ++ GATE(HCLK_RKVDEC0, "hclk_rkvdec0", "hclk_rkvdec0_pre", 0, ++ RK3588_CLKGATE_CON(40), 3, GFLAGS), ++ GATE(ACLK_RKVDEC0, "aclk_rkvdec0", "aclk_rkvdec0_pre", 0, ++ RK3588_CLKGATE_CON(40), 4, GFLAGS), ++ GATE(CLK_PCIE4L_PIPE, "clk_pcie4l_pipe", "clk_pipe30phy_pipe0_i", 0, ++ RK3588_CLKGATE_CON(39), 0, GFLAGS), ++ GATE(CLK_PCIE2L_PIPE, "clk_pcie2l_pipe", "clk_pipe30phy_pipe2_i", 0, ++ RK3588_CLKGATE_CON(39), 1, GFLAGS), ++ GATE(CLK_PIPEPHY0_PIPE_G, "clk_pipephy0_pipe_g", "clk_pipephy0_pipe_i", 0, ++ RK3588_CLKGATE_CON(38), 3, GFLAGS), ++ GATE(CLK_PIPEPHY1_PIPE_G, "clk_pipephy1_pipe_g", "clk_pipephy1_pipe_i", 0, ++ RK3588_CLKGATE_CON(38), 4, GFLAGS), ++ GATE(CLK_PIPEPHY2_PIPE_G, "clk_pipephy2_pipe_g", "clk_pipephy2_pipe_i", 0, ++ RK3588_CLKGATE_CON(38), 5, GFLAGS), ++ GATE(CLK_PIPEPHY0_PIPE_ASIC_G, "clk_pipephy0_pipe_asic_g", "clk_pipephy0_pipe_i", 0, ++ RK3588_CLKGATE_CON(38), 6, GFLAGS), ++ GATE(CLK_PIPEPHY1_PIPE_ASIC_G, "clk_pipephy1_pipe_asic_g", "clk_pipephy1_pipe_i", 0, ++ RK3588_CLKGATE_CON(38), 7, GFLAGS), ++ GATE(CLK_PIPEPHY2_PIPE_ASIC_G, "clk_pipephy2_pipe_asic_g", "clk_pipephy2_pipe_i", 0, ++ RK3588_CLKGATE_CON(38), 8, GFLAGS), ++ GATE(CLK_PIPEPHY2_PIPE_U3_G, "clk_pipephy2_pipe_u3_g", "clk_pipephy2_pipe_i", 0, ++ RK3588_CLKGATE_CON(38), 9, GFLAGS), ++ GATE(CLK_PCIE1L2_PIPE, "clk_pcie1l2_pipe", "clk_pipephy0_pipe_g", 0, ++ RK3588_CLKGATE_CON(38), 13, GFLAGS), ++ GATE(CLK_PCIE1L0_PIPE, "clk_pcie1l0_pipe", "clk_pipephy1_pipe_g", 0, ++ RK3588_CLKGATE_CON(38), 14, GFLAGS), ++ GATE(CLK_PCIE1L1_PIPE, "clk_pcie1l1_pipe", "clk_pipephy2_pipe_g", 0, ++ RK3588_CLKGATE_CON(38), 15, GFLAGS), ++ GATE(HCLK_SFC, "hclk_sfc", "hclk_nvm", 0, ++ RK3588_CLKGATE_CON(31), 10, GFLAGS), ++ GATE(HCLK_SFC_XIP, "hclk_sfc_xip", "hclk_nvm", 0, ++ RK3588_CLKGATE_CON(31), 11, GFLAGS), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_nvm", 0, ++ RK3588_CLKGATE_CON(31), 4, GFLAGS), ++ GATE(ACLK_ISP1, "aclk_isp1", "aclk_isp1_pre", 0, ++ RK3588_CLKGATE_CON(26), 5, GFLAGS), ++ GATE(HCLK_ISP1, "hclk_isp1", "hclk_isp1_pre", 0, ++ RK3588_CLKGATE_CON(26), 7, GFLAGS), ++ GATE(PCLK_AV1, "pclk_av1", "pclk_av1_pre", 0, ++ RK3588_CLKGATE_CON(68), 5, GFLAGS), ++ GATE(ACLK_AV1, "aclk_av1", "aclk_av1_pre", 0, ++ RK3588_CLKGATE_CON(68), 2, GFLAGS), +}; + -+#define PLL(_id, _name, _parent_name, _reg, _pd_shift, _dsmpd_shift, \ -+ _lock_shift, _flags) \ -+{ \ -+ .id = _id, \ -+ .name = _name, \ -+ .parent_name = _parent_name, \ -+ .reg = _reg, \ -+ .pd_shift = _pd_shift, \ -+ .dsmpd_shift = _dsmpd_shift, \ -+ .lock_shift = _lock_shift, \ -+ .flags = _flags, \ -+} ++static void __iomem *rk3588_cru_base; + -+#define RK618_PLL(_id, _name, _parent_name, _reg, _flags) \ -+ PLL(_id, _name, _parent_name, _reg, 10, 9, 15, _flags) ++static void dump_offset(const char *name, u32 offset, u32 len) ++{ ++ int i = 0, cnt = 0; + -+struct clk_mux_data { -+ unsigned int id; -+ const char *name; -+ const char *const *parent_names; -+ u8 num_parents; -+ u32 reg; -+ u8 shift; -+ u8 width; -+ unsigned long flags; -+}; ++ if (!offset) ++ return; + -+#define MUX(_id, _name, _parent_names, _reg, _shift, _width, _flags) \ -+{ \ -+ .id = _id, \ -+ .name = _name, \ -+ .parent_names = _parent_names, \ -+ .num_parents = ARRAY_SIZE(_parent_names), \ -+ .reg = _reg, \ -+ .shift = _shift, \ -+ .width = _width, \ -+ .flags = _flags, \ ++ cnt = DIV_ROUND_UP(len, 32); ++ for (i = 0; i < cnt; i++) { ++ pr_warn("%-12s 0x%05x: ", name, offset + i * 32); ++ print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 32, 4, ++ rk3588_cru_base + offset + i * 0x10, 32, false); ++ } +} + -+struct clk_gate_data { -+ unsigned int id; -+ const char *name; -+ const char *parent_name; -+ u32 reg; -+ u8 shift; -+ unsigned long flags; -+}; -+ -+#define GATE(_id, _name, _parent_name, _reg, _shift, _flags) \ -+{ \ -+ .id = _id, \ -+ .name = _name, \ -+ .parent_name = _parent_name, \ -+ .reg = _reg, \ -+ .shift = _shift, \ -+ .flags = _flags, \ ++static void rk3588_dump_cru(void) ++{ ++ if (rk3588_cru_base) { ++ pr_warn("CRU REGS:\n"); ++ dump_offset("LPLL", RK3588_LPLL_CON(16), 0x10); ++ dump_offset("B0PLL", RK3588_B0_PLL_CON(0), 0x10); ++ dump_offset("B1PLL", RK3588_B1_PLL_CON(8), 0x10); ++ dump_offset("GPLL", RK3588_PLL_CON(112), 0x10); ++ dump_offset("CPLL", RK3588_PLL_CON(104), 0x10); ++ dump_offset("V0PLL", RK3588_PLL_CON(88), 0x10); ++ dump_offset("AUPLL", RK3588_PLL_CON(96), 0x10); ++ dump_offset("PPLL", RK3588_PMU_PLL_CON(128), 0x10); ++ dump_offset("DSUCRU_SEL", RK3588_DSU_CLKSEL_CON(0), 0x20); ++ dump_offset("DSUCRU_GATE", RK3588_DSU_CLKGATE_CON(0), 0x10); ++ dump_offset("BIG0CRU_SEL", RK3588_BIGCORE0_CLKSEL_CON(0), 0x10); ++ dump_offset("BIG0CRU_GATE", RK3588_BIGCORE0_CLKGATE_CON(0), 0x10); ++ dump_offset("BIG1CRU_SEL", RK3588_BIGCORE1_CLKSEL_CON(0), 0x10); ++ dump_offset("BIG1CRU_GATE", RK3588_BIGCORE1_CLKGATE_CON(0), 0x10); ++ dump_offset("CRU_SEL", RK3588_CLKSEL_CON(0), 0x2d0); ++ dump_offset("CRU_GATE", RK3588_CLKGATE_CON(0), 0x140); ++ dump_offset("PMUCRU_SEL", RK3588_PMU_CLKSEL_CON(0), 0x50); ++ dump_offset("PMUCRU_GATE", RK3588_PMU_CLKGATE_CON(0), 0x20); ++ } +} + -+struct clk_divider_data { -+ unsigned int id; -+ const char *name; -+ const char *parent_name; -+ u32 reg; -+ u8 shift; -+ u8 width; -+ unsigned long flags; -+}; ++static void __init rk3588_clk_init(struct device_node *np) ++{ ++ struct rockchip_clk_provider *ctx; ++ void __iomem *reg_base; ++ struct clk **clks; + -+#define DIV(_id, _name, _parent_name, _reg, _shift, _width, _flags) \ -+{ \ -+ .id = _id, \ -+ .name = _name, \ -+ .parent_name = _parent_name, \ -+ .reg = _reg, \ -+ .shift = _shift, \ -+ .width = _width, \ -+ .flags = _flags, \ -+} ++ reg_base = of_iomap(np, 0); ++ if (!reg_base) { ++ pr_err("%s: could not map cru region\n", __func__); ++ return; ++ } + -+struct clk_composite_data { -+ unsigned int id; -+ const char *name; -+ const char *const *parent_names; -+ u8 num_parents; -+ u32 mux_reg; -+ u8 mux_shift; -+ u8 mux_width; -+ u32 div_reg; -+ u8 div_shift; -+ u8 div_width; -+ u8 div_flags; -+ u32 gate_reg; -+ u8 gate_shift; -+ unsigned long flags; -+}; ++ rk3588_cru_base = reg_base; + -+#define COMPOSITE(_id, _name, _parent_names, \ -+ _mux_reg, _mux_shift, _mux_width, \ -+ _div_reg, _div_shift, _div_width, \ -+ _gate_reg, _gate_shift, _flags) \ -+{ \ -+ .id = _id, \ -+ .name = _name, \ -+ .parent_names = _parent_names, \ -+ .num_parents = ARRAY_SIZE(_parent_names), \ -+ .mux_reg = _mux_reg, \ -+ .mux_shift = _mux_shift, \ -+ .mux_width = _mux_width, \ -+ .div_reg = _div_reg, \ -+ .div_shift = _div_shift, \ -+ .div_width = _div_width, \ -+ .div_flags = CLK_DIVIDER_HIWORD_MASK, \ -+ .gate_reg = _gate_reg, \ -+ .gate_shift = _gate_shift, \ -+ .flags = _flags, \ -+} ++ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); ++ if (IS_ERR(ctx)) { ++ pr_err("%s: rockchip clk init failed\n", __func__); ++ iounmap(reg_base); ++ return; ++ } ++ clks = ctx->clk_data.clks; + -+#define COMPOSITE_NOMUX(_id, _name, _parent_name, \ -+ _div_reg, _div_shift, _div_width, \ -+ _gate_reg, _gate_shift, _flags) \ -+{ \ -+ .id = _id, \ -+ .name = _name, \ -+ .parent_names = (const char *[]){ _parent_name }, \ -+ .num_parents = 1, \ -+ .div_reg = _div_reg, \ -+ .div_shift = _div_shift, \ -+ .div_width = _div_width, \ -+ .div_flags = CLK_DIVIDER_HIWORD_MASK, \ -+ .gate_reg = _gate_reg, \ -+ .gate_shift = _gate_shift, \ -+ .flags = _flags, \ -+} ++ rockchip_clk_register_plls(ctx, rk3588_pll_clks, ++ ARRAY_SIZE(rk3588_pll_clks), ++ RK3588_GRF_SOC_STATUS0); + -+#define COMPOSITE_NODIV(_id, _name, _parent_names, \ -+ _mux_reg, _mux_shift, _mux_width, \ -+ _gate_reg, _gate_shift, _flags) \ -+ COMPOSITE(_id, _name, _parent_names, \ -+ _mux_reg, _mux_shift, _mux_width, \ -+ 0, 0, 0, \ -+ _gate_reg, _gate_shift, _flags) ++ rockchip_clk_register_armclk(ctx, ARMCLK_L, "armclk_l", ++ 3, clks[PLL_LPLL], clks[PLL_GPLL], ++ &rk3588_cpulclk_data, rk3588_cpulclk_rates, ++ ARRAY_SIZE(rk3588_cpulclk_rates)); ++ rockchip_clk_register_armclk(ctx, ARMCLK_B01, "armclk_b01", ++ 3, clks[PLL_B0PLL], clks[PLL_GPLL], ++ &rk3588_cpub0clk_data, rk3588_cpub0clk_rates, ++ ARRAY_SIZE(rk3588_cpub0clk_rates)); ++ rockchip_clk_register_armclk(ctx, ARMCLK_B23, "armclk_b23", ++ 3, clks[PLL_B1PLL], clks[PLL_GPLL], ++ &rk3588_cpub1clk_data, rk3588_cpub1clk_rates, ++ ARRAY_SIZE(rk3588_cpub1clk_rates)); + -+#define COMPOSITE_FRAC(_id, _name, _parent_names, \ -+ _mux_reg, _mux_shift, _mux_width, \ -+ _div_reg, \ -+ _gate_reg, _gate_shift, _flags) \ -+{ \ -+ .id = _id, \ -+ .name = _name, \ -+ .parent_names = _parent_names, \ -+ .num_parents = ARRAY_SIZE(_parent_names), \ -+ .mux_reg = _mux_reg, \ -+ .mux_shift = _mux_shift, \ -+ .mux_width = _mux_width, \ -+ .div_reg = _div_reg, \ -+ .gate_reg = _gate_reg, \ -+ .gate_shift = _gate_shift, \ -+ .flags = _flags, \ -+} ++ rockchip_clk_register_branches(ctx, rk3588_clk_branches, ++ ARRAY_SIZE(rk3588_clk_branches)); + -+#define COMPOSITE_FRAC_NOMUX(_id, _name, _parent_name, \ -+ _div_reg, \ -+ _gate_reg, _gate_shift, _flags) \ -+{ \ -+ .id = _id, \ -+ .name = _name, \ -+ .parent_names = (const char *[]){ _parent_name }, \ -+ .num_parents = 1, \ -+ .div_reg = _div_reg, \ -+ .gate_reg = _gate_reg, \ -+ .gate_shift = _gate_shift, \ -+ .flags = _flags, \ -+} ++ rockchip_register_softrst(np, 49158, reg_base + RK3588_SOFTRST_CON(0), ++ ROCKCHIP_SOFTRST_HIWORD_MASK); + -+#define COMPOSITE_FRAC_NOGATE(_id, _name, _parent_names, \ -+ _mux_reg, _mux_shift, _mux_width, \ -+ _div_reg, \ -+ _flags) \ -+ COMPOSITE_FRAC(_id, _name, _parent_names, \ -+ _mux_reg, _mux_shift, _mux_width, \ -+ _div_reg, 0, 0, _flags) ++ rockchip_register_restart_notifier(ctx, RK3588_GLB_SRST_FST, NULL); + -+struct clk_regmap_fractional_divider { -+ struct clk_hw hw; -+ struct device *dev; -+ struct regmap *regmap; -+ u32 reg; -+ u8 mshift; -+ u8 mwidth; -+ u32 mmask; -+ u8 nshift; -+ u8 nwidth; -+ u32 nmask; -+}; ++ rockchip_clk_of_add_provider(np, ctx); + -+struct clk_regmap_divider { -+ struct clk_hw hw; -+ struct device *dev; -+ struct regmap *regmap; -+ u32 reg; -+ u8 shift; -+ u8 width; -+}; ++ if (!rk_dump_cru) ++ rk_dump_cru = rk3588_dump_cru; ++} + -+struct clk_regmap_gate { -+ struct clk_hw hw; -+ struct device *dev; -+ struct regmap *regmap; -+ u32 reg; -+ u8 shift; -+}; ++CLK_OF_DECLARE(rk3588_cru, "rockchip,rk3588-cru", rk3588_clk_init); + -+struct clk_regmap_mux { -+ struct clk_hw hw; -+ struct device *dev; -+ struct regmap *regmap; -+ u32 reg; -+ u32 mask; -+ u8 shift; ++#ifdef MODULE ++struct clk_rk3588_inits { ++ void (*inits)(struct device_node *np); +}; + -+extern const struct clk_ops clk_regmap_mux_ops; -+extern const struct clk_ops clk_regmap_divider_ops; -+extern const struct clk_ops clk_regmap_gate_ops; -+extern const struct clk_ops clk_regmap_fractional_divider_ops; ++static const struct clk_rk3588_inits clk_3588_cru_init = { ++ .inits = rk3588_clk_init, ++}; + -+struct clk * -+devm_clk_regmap_register_pll(struct device *dev, const char *name, -+ const char *parent_name, -+ struct regmap *regmap, u32 reg, u8 pd_shift, -+ u8 dsmpd_shift, u8 lock_shift, -+ unsigned long flags); ++static const struct of_device_id clk_rk3588_match_table[] = { ++ { ++ .compatible = "rockchip,rk3588-cru", ++ .data = &clk_3588_cru_init, ++ }, ++ { } ++}; ++MODULE_DEVICE_TABLE(of, clk_rk3588_match_table); + -+struct clk * -+devm_clk_regmap_register_mux(struct device *dev, const char *name, -+ const char * const *parent_names, u8 num_parents, -+ struct regmap *regmap, u32 reg, u8 shift, u8 width, -+ unsigned long flags); ++static int clk_rk3588_probe(struct platform_device *pdev) ++{ ++ struct device_node *np = pdev->dev.of_node; ++ const struct of_device_id *match; ++ const struct clk_rk3588_inits *init_data; + -+struct clk * -+devm_clk_regmap_register_divider(struct device *dev, const char *name, -+ const char *parent_name, struct regmap *regmap, -+ u32 reg, u8 shift, u8 width, -+ unsigned long flags); ++ match = of_match_device(clk_rk3588_match_table, &pdev->dev); ++ if (!match || !match->data) ++ return -EINVAL; + -+struct clk * -+devm_clk_regmap_register_gate(struct device *dev, const char *name, -+ const char *parent_name, -+ struct regmap *regmap, u32 reg, u8 shift, -+ unsigned long flags); ++ init_data = match->data; ++ if (init_data->inits) ++ init_data->inits(np); + -+struct clk * -+devm_clk_regmap_register_fractional_divider(struct device *dev, -+ const char *name, -+ const char *parent_name, -+ struct regmap *regmap, -+ u32 reg, unsigned long flags); ++ return 0; ++} + -+struct clk * -+devm_clk_regmap_register_composite(struct device *dev, const char *name, -+ const char *const *parent_names, -+ u8 num_parents, struct regmap *regmap, -+ u32 mux_reg, u8 mux_shift, u8 mux_width, -+ u32 div_reg, u8 div_shift, u8 div_width, -+ u8 div_flags, -+ u32 gate_reg, u8 gate_shift, -+ unsigned long flags); ++static struct platform_driver clk_rk3588_driver = { ++ .probe = clk_rk3588_probe, ++ .driver = { ++ .name = "clk-rk3588", ++ .of_match_table = clk_rk3588_match_table, ++ .suppress_bind_attrs = true, ++ }, ++}; ++module_platform_driver(clk_rk3588_driver); + -+#endif -diff --git a/drivers/clk/rockchip-oh/regmap/clk-rk618.c b/drivers/clk/rockchip-oh/regmap/clk-rk618.c ++MODULE_DESCRIPTION("Rockchip RK3588 Clock Driver"); ++MODULE_LICENSE("GPL"); ++#endif /* MODULE */ +diff --git a/drivers/clk/rockchip-oh/clk-rv1106.c b/drivers/clk/rockchip-oh/clk-rv1106.c new file mode 100644 -index 000000000..c780f502b +index 000000000..0833bf2ad --- /dev/null -+++ b/drivers/clk/rockchip-oh/regmap/clk-rk618.c -@@ -0,0 +1,408 @@ ++++ b/drivers/clk/rockchip-oh/clk-rv1106.c +@@ -0,0 +1,1280 @@ ++// SPDX-License-Identifier: GPL-2.0 +/* -+ * Copyright (c) 2017 Rockchip Electronics Co. Ltd. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * Copyright (c) 2022 Rockchip Electronics Co. Ltd. ++ * Author: Elaine Zhang + */ + +#include -+#include -+#include +#include -+#include -+ -+#include "clk-regmap.h" -+ -+#define RK618_CRU_CLKSEL0 0x0058 -+#define RK618_CRU_CLKSEL1 0x005c -+#define RK618_CRU_CLKSEL2 0x0060 -+#define RK618_CRU_CLKSEL3 0x0064 -+#define RK618_CRU_PLL0_CON0 0x0068 -+#define RK618_CRU_PLL0_CON1 0x006c -+#define RK618_CRU_PLL0_CON2 0x0070 -+#define RK618_CRU_PLL1_CON0 0x0074 -+#define RK618_CRU_PLL1_CON1 0x0078 -+#define RK618_CRU_PLL1_CON2 0x007c -+ -+enum { -+ LCDC0_CLK = 1, -+ LCDC1_CLK, -+ VIF_PLLIN_CLK, -+ SCALER_PLLIN_CLK, -+ VIF_PLL_CLK, -+ SCALER_PLL_CLK, -+ VIF0_CLK, -+ VIF1_CLK, -+ SCALER_IN_CLK, -+ SCALER_CLK, -+ DITHER_CLK, -+ HDMI_CLK, -+ MIPI_CLK, -+ LVDS_CLK, -+ LVTTL_CLK, -+ RGB_CLK, -+ VIF0_PRE_CLK, -+ VIF1_PRE_CLK, -+ CODEC_CLK, -+ NR_CLKS, -+}; -+ -+struct rk618_cru { -+ struct device *dev; -+ struct rk618 *parent; -+ struct regmap *regmap; ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "clk.h" + -+ struct clk_onecell_data clk_data; -+}; ++#define CRU_PVTPLL0_CON0_L 0x11000 ++#define CRU_PVTPLL0_CON0_H 0x11004 ++#define CRU_PVTPLL0_CON1_L 0x11008 ++#define CRU_PVTPLL0_CON1_H 0x1100c ++#define CRU_PVTPLL0_CON2_L 0x11010 ++#define CRU_PVTPLL0_CON2_H 0x11014 ++#define CRU_PVTPLL0_CON3_L 0x11018 ++#define CRU_PVTPLL0_CON3_H 0x1101c ++#define CRU_PVTPLL0_OSC_CNT 0x11020 ++#define CRU_PVTPLL0_OSC_CNT_AVG 0x11024 + -+static char clkin_name[32] = "dummy"; -+static char lcdc0_dclkp_name[32] = "dummy"; -+static char lcdc1_dclkp_name[32] = "dummy"; ++#define CRU_PVTPLL1_CON0_L 0x11030 ++#define CRU_PVTPLL1_CON0_H 0x11034 ++#define CRU_PVTPLL1_CON1_L 0x11038 ++#define CRU_PVTPLL1_CON1_H 0x1103c ++#define CRU_PVTPLL1_CON2_L 0x11040 ++#define CRU_PVTPLL1_CON2_H 0x11044 ++#define CRU_PVTPLL1_CON3_L 0x11048 ++#define CRU_PVTPLL1_CON3_H 0x1104c ++#define CRU_PVTPLL1_OSC_CNT 0x11050 ++#define CRU_PVTPLL1_OSC_CNT_AVG 0x11054 + -+#define PNAME(x) static const char *const x[] ++#define RV1106_GRF_SOC_STATUS0 0x10 ++#define CPU_PVTPLL_CON0_L 0x40000 ++#define CPU_PVTPLL_CON0_H 0x40004 ++#define CPU_PVTPLL_CON1 0x40008 ++#define CPU_PVTPLL_CON2 0x4000c ++#define CPU_PVTPLL_CON3 0x40010 ++#define CPU_PVTPLL_OSC_CNT 0x40018 ++#define CPU_PVTPLL_OSC_CNT_AVG 0x4001c + -+PNAME(mux_pll_in_p) = { "lcdc0_clk", "lcdc1_clk", clkin_name }; -+PNAME(mux_pll_src_p) = { "vif_pll_clk", "scaler_pll_clk", }; -+PNAME(mux_scaler_in_src_p) = { "vif0_clk", "vif1_clk" }; -+PNAME(mux_hdmi_src_p) = { "vif1_clk", "scaler_clk", "vif0_clk" }; -+PNAME(mux_dither_src_p) = { "vif0_clk", "scaler_clk" }; -+PNAME(mux_vif0_src_p) = { "vif0_pre_clk", lcdc0_dclkp_name }; -+PNAME(mux_vif1_src_p) = { "vif1_pre_clk", lcdc1_dclkp_name }; -+PNAME(mux_codec_src_p) = { "codec_pre_clk", clkin_name }; ++#define PVTPLL_RING_SEL_MASK 0x7 ++#define PVTPLL_RING_SEL_SHIFT 8 ++#define PVTPLL_EN_MASK 0x3 ++#define PVTPLL_EN_SHIFT 0 ++#define PVTPLL_LENGTH_SEL_MASK 0x7f ++#define PVTPLL_LENGTH_SEL_SHIFT 0 + -+/* Two PLL, one for dual datarate input logic, the other for scaler */ -+static const struct clk_pll_data rk618_clk_plls[] = { -+ RK618_PLL(VIF_PLL_CLK, "vif_pll_clk", "vif_pllin_clk", -+ RK618_CRU_PLL0_CON0, -+ 0), -+ RK618_PLL(SCALER_PLL_CLK, "scaler_pll_clk", "scaler_pllin_clk", -+ RK618_CRU_PLL1_CON0, -+ 0), -+}; ++#define CPU_CLK_PATH_BASE (0x18300) ++#define CPU_PVTPLL_PATH_CORE ((1 << 12) | (1 << 28)) + -+static const struct clk_mux_data rk618_clk_muxes[] = { -+ MUX(VIF_PLLIN_CLK, "vif_pllin_clk", mux_pll_in_p, -+ RK618_CRU_CLKSEL0, 6, 2, -+ 0), -+ MUX(SCALER_PLLIN_CLK, "scaler_pllin_clk", mux_pll_in_p, -+ RK618_CRU_CLKSEL0, 8, 2, -+ 0), -+ MUX(SCALER_IN_CLK, "scaler_in_clk", mux_scaler_in_src_p, -+ RK618_CRU_CLKSEL3, 15, 1, -+ 0), -+ MUX(DITHER_CLK, "dither_clk", mux_dither_src_p, -+ RK618_CRU_CLKSEL3, 14, 1, -+ 0), -+ MUX(VIF0_CLK, "vif0_clk", mux_vif0_src_p, -+ RK618_CRU_CLKSEL3, 1, 1, -+ CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT), -+ MUX(VIF1_CLK, "vif1_clk", mux_vif1_src_p, -+ RK618_CRU_CLKSEL3, 7, 1, -+ CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT), -+ MUX(CODEC_CLK, "codec_clk", mux_codec_src_p, -+ RK618_CRU_CLKSEL1, 1, 1, -+ CLK_SET_RATE_PARENT), -+}; ++#define RV1106_FRAC_MAX_PRATE 1200000000 + -+static const struct clk_divider_data rk618_clk_dividers[] = { -+ DIV(LCDC0_CLK, "lcdc0_clk", lcdc0_dclkp_name, -+ RK618_CRU_CLKSEL0, 0, 3, -+ 0), -+ DIV(LCDC1_CLK, "lcdc1_clk", lcdc1_dclkp_name, -+ RK618_CRU_CLKSEL0, 3, 3, -+ 0), ++enum rv1106_plls { ++ apll, dpll, cpll, gpll, +}; + -+static const struct clk_gate_data rk618_clk_gates[] = { -+ GATE(MIPI_CLK, "mipi_clk", "dither_clk", -+ RK618_CRU_CLKSEL1, 10, -+ CLK_IGNORE_UNUSED), -+ GATE(LVDS_CLK, "lvds_clk", "dither_clk", -+ RK618_CRU_CLKSEL1, 9, -+ CLK_IGNORE_UNUSED), -+ GATE(LVTTL_CLK, "lvttl_clk", "dither_clk", -+ RK618_CRU_CLKSEL1, 12, -+ 0), -+ GATE(RGB_CLK, "rgb_clk", "dither_clk", -+ RK618_CRU_CLKSEL1, 11, -+ 0), ++static struct rockchip_pll_rate_table rv1106_pll_rates[] = { ++ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ ++ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1600000000, 3, 200, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1584000000, 1, 132, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1560000000, 1, 130, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1536000000, 1, 128, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1512000000, 1, 126, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1488000000, 1, 124, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1464000000, 1, 122, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1440000000, 1, 120, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1416000000, 1, 118, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1400000000, 3, 350, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1392000000, 1, 116, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1368000000, 1, 114, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1344000000, 1, 112, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1320000000, 1, 110, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1296000000, 1, 108, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1272000000, 1, 106, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1248000000, 1, 104, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1200000000, 1, 100, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1188000000, 1, 99, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1104000000, 1, 92, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1100000000, 3, 275, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1000000000, 3, 250, 2, 1, 1, 0), ++ RK3036_PLL_RATE(993484800, 1, 124, 3, 1, 0, 3113851), ++ RK3036_PLL_RATE(984000000, 1, 82, 2, 1, 1, 0), ++ RK3036_PLL_RATE(983040000, 1, 81, 2, 1, 0, 15435038), ++ RK3036_PLL_RATE(960000000, 1, 80, 2, 1, 1, 0), ++ RK3036_PLL_RATE(936000000, 1, 78, 2, 1, 1, 0), ++ RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), ++ RK3036_PLL_RATE(900000000, 1, 75, 2, 1, 1, 0), ++ RK3036_PLL_RATE(888000000, 1, 74, 2, 1, 1, 0), ++ RK3036_PLL_RATE(864000000, 1, 72, 2, 1, 1, 0), ++ RK3036_PLL_RATE(840000000, 1, 70, 2, 1, 1, 0), ++ RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), ++ RK3036_PLL_RATE(800000000, 3, 200, 2, 1, 1, 0), ++ RK3036_PLL_RATE(700000000, 3, 350, 4, 1, 1, 0), ++ RK3036_PLL_RATE(696000000, 1, 116, 4, 1, 1, 0), ++ RK3036_PLL_RATE(624000000, 1, 104, 4, 1, 1, 0), ++ RK3036_PLL_RATE(600000000, 1, 100, 4, 1, 1, 0), ++ RK3036_PLL_RATE(594000000, 1, 99, 4, 1, 1, 0), ++ RK3036_PLL_RATE(504000000, 1, 84, 4, 1, 1, 0), ++ RK3036_PLL_RATE(500000000, 1, 125, 6, 1, 1, 0), ++ RK3036_PLL_RATE(496742400, 1, 124, 6, 1, 0, 3113851), ++ RK3036_PLL_RATE(491520000, 1, 40, 2, 1, 0, 16106127), ++ RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), ++ RK3036_PLL_RATE(312000000, 1, 78, 6, 1, 1, 0), ++ RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), ++ RK3036_PLL_RATE(96000000, 1, 96, 6, 4, 1, 0), ++ { /* sentinel */ }, +}; + -+static const struct clk_composite_data rk618_clk_composites[] = { -+ COMPOSITE(SCALER_CLK, "scaler_clk", mux_pll_src_p, -+ RK618_CRU_CLKSEL1, 3, 1, -+ RK618_CRU_CLKSEL1, 5, 3, -+ RK618_CRU_CLKSEL1, 4, -+ CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT), -+ COMPOSITE_NODIV(HDMI_CLK, "hdmi_clk", mux_hdmi_src_p, -+ RK618_CRU_CLKSEL3, 12, 2, -+ RK618_CRU_CLKSEL1, 8, -+ 0), -+ COMPOSITE(VIF0_PRE_CLK, "vif0_pre_clk", mux_pll_src_p, -+ RK618_CRU_CLKSEL3, 0, 1, -+ RK618_CRU_CLKSEL3, 3, 3, -+ RK618_CRU_CLKSEL3, 2, -+ CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT), -+ COMPOSITE(VIF1_PRE_CLK, "vif1_pre_clk", mux_pll_src_p, -+ RK618_CRU_CLKSEL3, 6, 1, -+ RK618_CRU_CLKSEL3, 9, 3, -+ RK618_CRU_CLKSEL3, 8, -+ CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT), -+ COMPOSITE_FRAC_NOGATE(0, "codec_pre_clk", mux_pll_src_p, -+ RK618_CRU_CLKSEL1, 0, 1, -+ RK618_CRU_CLKSEL2, -+ 0), -+}; ++#define RV1106_DIV_ACLK_CORE_MASK 0x1f ++#define RV1106_DIV_ACLK_CORE_SHIFT 7 ++#define RV1106_DIV_PCLK_DBG_MASK 0x1f ++#define RV1106_DIV_PCLK_DBG_SHIFT 0 ++#define RV1106_CORE_SEL_MASK 0x3 ++#define RV1106_CORE_SEL_SHIFT 5 ++#define RV1106_ALT_DIV_MASK 0x1f ++#define RV1106_ALT_DIV_SHIFT 0 + -+static void rk618_clk_add_lookup(struct rk618_cru *cru, struct clk *clk, -+ unsigned int id) -+{ -+ if (cru->clk_data.clks && id) -+ cru->clk_data.clks[id] = clk; ++#define RV1106_CLKSEL0(_aclk_core) \ ++{ \ ++ .reg = RV1106_CORECLKSEL_CON(0), \ ++ .val = HIWORD_UPDATE(_aclk_core, RV1106_DIV_ACLK_CORE_MASK, \ ++ RV1106_DIV_ACLK_CORE_SHIFT), \ +} + -+static void rk618_clk_register_muxes(struct rk618_cru *cru) -+{ -+ struct clk *clk; -+ unsigned int i; -+ -+ for (i = 0; i < ARRAY_SIZE(rk618_clk_muxes); i++) { -+ const struct clk_mux_data *data = &rk618_clk_muxes[i]; -+ -+ clk = devm_clk_regmap_register_mux(cru->dev, data->name, -+ data->parent_names, -+ data->num_parents, -+ cru->regmap, data->reg, -+ data->shift, data->width, -+ data->flags); -+ if (IS_ERR(clk)) { -+ dev_err(cru->dev, "failed to register clock %s\n", -+ data->name); -+ continue; -+ } ++#define RV1106_CLKSEL1(_pclk_dbg) \ ++{ \ ++ .reg = RV1106_CORECLKSEL_CON(1), \ ++ .val = HIWORD_UPDATE(_pclk_dbg, RV1106_DIV_PCLK_DBG_MASK, \ ++ RV1106_DIV_PCLK_DBG_SHIFT), \ ++} + -+ rk618_clk_add_lookup(cru, clk, data->id); -+ } ++#define RV1106_CLKSEL2(_is_pvtpll) \ ++{ \ ++ .reg = RV1106_CORECLKSEL_CON(0), \ ++ .val = HIWORD_UPDATE(_is_pvtpll, RV1106_CORE_SEL_MASK, \ ++ RV1106_CORE_SEL_SHIFT), \ +} + -+static void rk618_clk_register_dividers(struct rk618_cru *cru) -+{ -+ struct clk *clk; -+ unsigned int i; ++#define RV1106_CLKSEL3(_alt_div) \ ++{ \ ++ .reg = RV1106_CORECLKSEL_CON(0), \ ++ .val = HIWORD_UPDATE(_alt_div, RV1106_ALT_DIV_MASK, \ ++ RV1106_ALT_DIV_SHIFT), \ ++} + -+ for (i = 0; i < ARRAY_SIZE(rk618_clk_dividers); i++) { -+ const struct clk_divider_data *data = &rk618_clk_dividers[i]; ++#define RV1106_CPUCLK_RATE(_prate, _aclk_core, _pclk_dbg, _is_pvtpll) \ ++{ \ ++ .prate = _prate, \ ++ .divs = { \ ++ RV1106_CLKSEL0(_aclk_core), \ ++ RV1106_CLKSEL1(_pclk_dbg), \ ++ }, \ ++ .pre_muxs = { \ ++ RV1106_CLKSEL3(1), \ ++ RV1106_CLKSEL2(2), \ ++ }, \ ++ .post_muxs = { \ ++ RV1106_CLKSEL2(_is_pvtpll), \ ++ RV1106_CLKSEL3(0), \ ++ }, \ ++} + -+ clk = devm_clk_regmap_register_divider(cru->dev, data->name, -+ data->parent_name, -+ cru->regmap, data->reg, -+ data->shift, data->width, -+ data->flags); -+ if (IS_ERR(clk)) { -+ dev_err(cru->dev, "failed to register clock %s\n", -+ data->name); -+ continue; -+ } ++static struct rockchip_cpuclk_rate_table rv1106_cpuclk_rates[] __initdata = { ++ RV1106_CPUCLK_RATE(1608000000, 3, 7, 1), ++ RV1106_CPUCLK_RATE(1584000000, 3, 7, 1), ++ RV1106_CPUCLK_RATE(1560000000, 3, 7, 1), ++ RV1106_CPUCLK_RATE(1536000000, 3, 7, 1), ++ RV1106_CPUCLK_RATE(1512000000, 3, 7, 1), ++ RV1106_CPUCLK_RATE(1488000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1464000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1440000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1416000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1392000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1368000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1344000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1320000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1296000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1272000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1248000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1224000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1200000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1104000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1096000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1008000000, 1, 5, 1), ++ RV1106_CPUCLK_RATE(912000000, 1, 5, 1), ++ RV1106_CPUCLK_RATE(816000000, 1, 3, 1), ++ RV1106_CPUCLK_RATE(696000000, 1, 3, 0), ++ RV1106_CPUCLK_RATE(600000000, 1, 3, 0), ++ RV1106_CPUCLK_RATE(408000000, 1, 1, 0), ++ RV1106_CPUCLK_RATE(312000000, 1, 1, 0), ++ RV1106_CPUCLK_RATE(216000000, 1, 1, 0), ++ RV1106_CPUCLK_RATE(96000000, 1, 1, 0), ++}; + -+ rk618_clk_add_lookup(cru, clk, data->id); -+ } -+} ++static const struct rockchip_cpuclk_reg_data rv1106_cpuclk_data = { ++ .core_reg[0] = RV1106_CORECLKSEL_CON(0), ++ .div_core_shift[0] = 0, ++ .div_core_mask[0] = 0x1f, ++ .num_cores = 1, ++ .mux_core_alt = 2, ++ .mux_core_main = 2, ++ .mux_core_shift = 5, ++ .mux_core_mask = 0x3, ++}; + -+static void rk618_clk_register_gates(struct rk618_cru *cru) -+{ -+ struct clk *clk; -+ unsigned int i; ++PNAME(mux_pll_p) = { "xin24m" }; ++PNAME(mux_24m_32k_p) = { "xin24m", "clk_rtc_32k" }; ++PNAME(mux_gpll_cpll_p) = { "gpll", "cpll" }; ++PNAME(mux_gpll_24m_p) = { "gpll", "xin24m" }; ++PNAME(mux_100m_50m_24m_p) = { "clk_100m_src", "clk_50m_src", "xin24m" }; ++PNAME(mux_150m_100m_50m_24m_p) = { "clk_150m_src", "clk_100m_src", "clk_50m_src", "xin24m" }; ++PNAME(mux_500m_300m_100m_24m_p) = { "clk_500m_src", "clk_300m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_400m_300m_pvtpll0_pvtpll1_p) = { "clk_400m_src", "clk_300m_src", "clk_pvtpll_0", "clk_pvtpll_1" }; ++PNAME(mux_500m_300m_pvtpll0_pvtpll1_p) = { "clk_500m_src", "clk_300m_src", "clk_pvtpll_0", "clk_pvtpll_1" }; ++PNAME(mux_339m_200m_pvtpll0_pvtpll1_p) = { "clk_339m_src", "clk_200m_src", "clk_pvtpll_0", "clk_pvtpll_1" }; ++PNAME(mux_400m_200m_100m_24m_p) = { "clk_400m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_200m_100m_50m_24m_p) = { "clk_200m_src", "clk_100m_src", "clk_50m_src", "xin24m" }; ++PNAME(mux_300m_200m_100m_24m_p) = { "clk_300m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_500m_300m_200m_24m_p) = { "clk_500m_src", "clk_300m_src", "clk_200m_src", "xin24m" }; ++PNAME(mux_50m_24m_p) = { "clk_50m_src", "xin24m" }; ++PNAME(mux_400m_24m_p) = { "clk_400m_src", "xin24m" }; ++PNAME(clk_rtc32k_pmu_p) = { "clk_rtc32k_frac", "xin32k", "clk_pvtm_32k" }; ++PNAME(mux_200m_100m_24m_32k_p) = { "clk_200m_src", "clk_100m_src", "xin24m", "clk_rtc_32k" }; ++PNAME(mux_100m_pmu_24m_p) = { "clk_100m_pmu", "xin24m" }; ++PNAME(mux_200m_100m_24m_p) = { "clk_200m_src", "clk_100m_pmu", "xin24m" }; ++PNAME(mux_339m_200m_100m_24m_p) = { "clk_339m_src", "clk_200m_src", "clk_100m_pmu", "xin24m" }; ++PNAME(mux_dpll_300m_p) = { "dpll", "clk_300m_src" }; ++PNAME(clk_i2s0_8ch_tx_p) = { "clk_i2s0_8ch_tx_src", "clk_i2s0_8ch_tx_frac", "i2s0_mclkin", "xin_osc0_half" }; ++PNAME(clk_i2s0_8ch_rx_p) = { "clk_i2s0_8ch_rx_src", "clk_i2s0_8ch_rx_frac", "i2s0_mclkin", "xin_osc0_half" }; ++PNAME(i2s0_8ch_mclkout_p) = { "mclk_i2s0_8ch_tx", "mclk_i2s0_8ch_rx", "xin_osc0_half" }; ++PNAME(clk_ref_mipi0_p) = { "clk_ref_mipi0_src", "clk_ref_mipi0_frac", "xin24m" }; ++PNAME(clk_ref_mipi1_p) = { "clk_ref_mipi1_src", "clk_ref_mipi1_frac", "xin24m" }; ++PNAME(clk_uart0_p) = { "clk_uart0_src", "clk_uart0_frac", "xin24m" }; ++PNAME(clk_uart1_p) = { "clk_uart1_src", "clk_uart1_frac", "xin24m" }; ++PNAME(clk_uart2_p) = { "clk_uart2_src", "clk_uart2_frac", "xin24m" }; ++PNAME(clk_uart3_p) = { "clk_uart3_src", "clk_uart3_frac", "xin24m" }; ++PNAME(clk_uart4_p) = { "clk_uart4_src", "clk_uart4_frac", "xin24m" }; ++PNAME(clk_uart5_p) = { "clk_uart5_src", "clk_uart5_frac", "xin24m" }; ++PNAME(clk_vicap_m0_p) = { "clk_vicap_m0_src", "clk_vicap_m0_frac", "xin24m" }; ++PNAME(clk_vicap_m1_p) = { "clk_vicap_m1_src", "clk_vicap_m1_frac", "xin24m" }; + -+ for (i = 0; i < ARRAY_SIZE(rk618_clk_gates); i++) { -+ const struct clk_gate_data *data = &rk618_clk_gates[i]; ++static struct rockchip_pll_clock rv1106_pll_clks[] __initdata = { ++ [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, ++ CLK_IGNORE_UNUSED, RV1106_PLL_CON(0), ++ RV1106_MODE_CON, 0, 10, 0, rv1106_pll_rates), ++ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, ++ 0, RV1106_PLL_CON(8), ++ RV1106_MODE_CON, 2, 10, 0, rv1106_pll_rates), ++ [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p, ++ CLK_IGNORE_UNUSED, RV1106_PLL_CON(16), ++ RV1106_SUBDDRMODE_CON, 0, 10, 0, NULL), ++ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, ++ 0, RV1106_PLL_CON(24), ++ RV1106_MODE_CON, 4, 10, 0, rv1106_pll_rates), ++}; + -+ clk = devm_clk_regmap_register_gate(cru->dev, data->name, -+ data->parent_name, -+ cru->regmap, -+ data->reg, data->shift, -+ data->flags); -+ if (IS_ERR(clk)) { -+ dev_err(cru->dev, "failed to register clock %s\n", -+ data->name); -+ continue; -+ } ++#define MFLAGS CLK_MUX_HIWORD_MASK ++#define DFLAGS CLK_DIVIDER_HIWORD_MASK ++#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) + -+ rk618_clk_add_lookup(cru, clk, data->id); -+ } -+} ++static struct rockchip_clk_branch rv1106_rtc32k_pmu_fracmux __initdata = ++ MUX(CLK_RTC_32K, "clk_rtc_32k", clk_rtc32k_pmu_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RV1106_PMUCLKSEL_CON(0), 6, 2, MFLAGS); + -+static void rk618_clk_register_composites(struct rk618_cru *cru) -+{ -+ struct clk *clk; -+ unsigned int i; ++static struct rockchip_clk_branch rv1106_i2s0_8ch_tx_fracmux __initdata = ++ MUX(CLK_I2S0_8CH_TX, "clk_i2s0_8ch_tx", clk_i2s0_8ch_tx_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(19), 0, 2, MFLAGS); + -+ for (i = 0; i < ARRAY_SIZE(rk618_clk_composites); i++) { -+ const struct clk_composite_data *data = -+ &rk618_clk_composites[i]; ++static struct rockchip_clk_branch rv1106_i2s0_8ch_rx_fracmux __initdata = ++ MUX(CLK_I2S0_8CH_RX, "clk_i2s0_8ch_rx", clk_i2s0_8ch_rx_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(21), 0, 2, MFLAGS); + -+ clk = devm_clk_regmap_register_composite(cru->dev, data->name, -+ data->parent_names, -+ data->num_parents, -+ cru->regmap, -+ data->mux_reg, -+ data->mux_shift, -+ data->mux_width, -+ data->div_reg, -+ data->div_shift, -+ data->div_width, -+ data->div_flags, -+ data->gate_reg, -+ data->gate_shift, -+ data->flags); -+ if (IS_ERR(clk)) { -+ dev_err(cru->dev, "failed to register clock %s\n", -+ data->name); -+ continue; -+ } ++static struct rockchip_clk_branch rv1106_clk_ref_mipi0_fracmux __initdata = ++ MUX(CLK_REF_MIPI0, "clk_ref_mipi0", clk_ref_mipi0_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(27), 0, 2, MFLAGS); + -+ rk618_clk_add_lookup(cru, clk, data->id); -+ } -+} ++static struct rockchip_clk_branch rv1106_clk_ref_mipi1_fracmux __initdata = ++ MUX(CLK_REF_MIPI1, "clk_ref_mipi1", clk_ref_mipi1_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(29), 0, 2, MFLAGS); + -+static void rk618_clk_register_plls(struct rk618_cru *cru) -+{ -+ struct clk *clk; -+ unsigned int i; ++static struct rockchip_clk_branch rv1106_clk_uart0_fracmux __initdata = ++ MUX(CLK_UART0, "clk_uart0", clk_uart0_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(7), 0, 2, MFLAGS); + -+ for (i = 0; i < ARRAY_SIZE(rk618_clk_plls); i++) { -+ const struct clk_pll_data *data = &rk618_clk_plls[i]; ++static struct rockchip_clk_branch rv1106_clk_uart1_fracmux __initdata = ++ MUX(CLK_UART1, "clk_uart1", clk_uart1_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(9), 0, 2, MFLAGS); + -+ clk = devm_clk_regmap_register_pll(cru->dev, data->name, -+ data->parent_name, -+ cru->regmap, -+ data->reg, -+ data->pd_shift, -+ data->dsmpd_shift, -+ data->lock_shift, -+ data->flags); -+ if (IS_ERR(clk)) { -+ dev_err(cru->dev, "failed to register clock %s\n", -+ data->name); -+ continue; -+ } ++static struct rockchip_clk_branch rv1106_clk_uart2_fracmux __initdata = ++ MUX(CLK_UART2, "clk_uart2", clk_uart2_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(11), 0, 2, MFLAGS); + -+ rk618_clk_add_lookup(cru, clk, data->id); -+ } -+} ++static struct rockchip_clk_branch rv1106_clk_uart3_fracmux __initdata = ++ MUX(CLK_UART3, "clk_uart3", clk_uart3_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(13), 0, 2, MFLAGS); + -+static int rk618_cru_probe(struct platform_device *pdev) -+{ -+ struct rk618 *rk618 = dev_get_drvdata(pdev->dev.parent); -+ struct device *dev = &pdev->dev; -+ struct rk618_cru *cru; -+ struct clk **clk_table; -+ const char *parent_name; -+ struct clk *clk; -+ int ret, i; ++static struct rockchip_clk_branch rv1106_clk_uart4_fracmux __initdata = ++ MUX(CLK_UART4, "clk_uart4", clk_uart4_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(15), 0, 2, MFLAGS); + -+ if (!of_device_is_available(dev->of_node)) -+ return -ENODEV; ++static struct rockchip_clk_branch rv1106_clk_uart5_fracmux __initdata = ++ MUX(CLK_UART5, "clk_uart5", clk_uart5_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(17), 0, 2, MFLAGS); + -+ cru = devm_kzalloc(dev, sizeof(*cru), GFP_KERNEL); -+ if (!cru) -+ return -ENOMEM; ++static struct rockchip_clk_branch rv1106_clk_vicap_m0_fracmux __initdata = ++ MUX(CLK_VICAP_M0, "clk_vicap_m0", clk_vicap_m0_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(31), 0, 2, MFLAGS); + -+ clk_table = devm_kcalloc(dev, NR_CLKS, sizeof(struct clk *), -+ GFP_KERNEL); -+ if (!clk_table) -+ return -ENOMEM; ++static struct rockchip_clk_branch rv1106_clk_vicap_m1_fracmux __initdata = ++ MUX(CLK_VICAP_M1, "clk_vicap_m1", clk_vicap_m1_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(33), 0, 2, MFLAGS); + -+ for (i = 0; i < NR_CLKS; i++) -+ clk_table[i] = ERR_PTR(-ENOENT); ++static struct rockchip_clk_branch rv1106_clk_branches[] __initdata = { + -+ cru->dev = dev; -+ cru->parent = rk618; -+ cru->regmap = rk618->regmap; -+ cru->clk_data.clks = clk_table; -+ cru->clk_data.clk_num = NR_CLKS; -+ platform_set_drvdata(pdev, cru); ++ FACTOR(0, "xin_osc0_half", "xin24m", 0, 1, 2), + -+ clk = devm_clk_get(dev, "clkin"); -+ if (IS_ERR(clk)) { -+ ret = PTR_ERR(clk); -+ dev_err(dev, "failed to get clkin: %d\n", ret); -+ return ret; -+ } -+ -+ strlcpy(clkin_name, __clk_get_name(clk), sizeof(clkin_name)); -+ -+ clk = devm_clk_get(dev, "lcdc0_dclkp"); -+ if (IS_ERR(clk)) { -+ if (PTR_ERR(clk) != -ENOENT) { -+ ret = PTR_ERR(clk); -+ dev_err(dev, "failed to get lcdc0_dclkp: %d\n", ret); -+ return ret; -+ } -+ -+ clk = NULL; -+ } -+ -+ parent_name = __clk_get_name(clk); -+ if (parent_name) -+ strlcpy(lcdc0_dclkp_name, parent_name, -+ sizeof(lcdc0_dclkp_name)); -+ -+ clk = devm_clk_get(dev, "lcdc1_dclkp"); -+ if (IS_ERR(clk)) { -+ if (PTR_ERR(clk) != -ENOENT) { -+ ret = PTR_ERR(clk); -+ dev_err(dev, "failed to get lcdc1_dclkp: %d\n", ret); -+ return ret; -+ } -+ -+ clk = NULL; -+ } -+ -+ parent_name = __clk_get_name(clk); -+ if (parent_name) -+ strlcpy(lcdc1_dclkp_name, parent_name, -+ sizeof(lcdc1_dclkp_name)); -+ -+ rk618_clk_register_plls(cru); -+ rk618_clk_register_muxes(cru); -+ rk618_clk_register_dividers(cru); -+ rk618_clk_register_gates(cru); -+ rk618_clk_register_composites(cru); -+ -+ return of_clk_add_provider(dev->of_node, of_clk_src_onecell_get, -+ &cru->clk_data); -+} -+ -+static int rk618_cru_remove(struct platform_device *pdev) -+{ -+ of_clk_del_provider(pdev->dev.of_node); ++ /* PD_CORE */ ++ GATE(CLK_PVTM_CORE, "clk_pvtm_core", "xin24m", 0, ++ RV1106_CORECLKGATE_CON(0), 14, GFLAGS), ++ GATE(CLK_CORE_MCU_RTC, "clk_core_mcu_rtc", "xin24m", 0, ++ RV1106_CORECLKGATE_CON(1), 6, GFLAGS), ++ COMPOSITE(HCLK_CPU, "hclk_cpu", mux_gpll_24m_p, CLK_IS_CRITICAL, ++ RV1106_CORECLKSEL_CON(2), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1106_CORECLKGATE_CON(0), 12, GFLAGS), ++ COMPOSITE(CLK_CORE_MCU, "clk_core_mcu", mux_gpll_24m_p, 0, ++ RV1106_CORECLKSEL_CON(3), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RV1106_CORECLKGATE_CON(1), 1, GFLAGS), ++ COMPOSITE_NOMUX(0, "pclk_dbg", "armclk", CLK_IS_CRITICAL, ++ RV1106_CORECLKSEL_CON(1), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RV1106_CORECLKGATE_CON(0), 6, GFLAGS), ++ GATE(0, "pclk_cpu_root", "pclk_dbg", CLK_IS_CRITICAL, ++ RV1106_CORECLKGATE_CON(0), 10, GFLAGS), ++ GATE(PCLK_MAILBOX, "pclk_mailbox", "pclk_cpu_root", 0, ++ RV1106_CORECLKGATE_CON(1), 8, GFLAGS), + -+ return 0; -+} ++ /* PD _TOP */ ++ COMPOSITE(CLK_50M_SRC, "clk_50m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(0), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE(CLK_100M_SRC, "clk_100m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(0), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE(CLK_150M_SRC, "clk_150m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(1), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 3, GFLAGS), ++ COMPOSITE(CLK_200M_SRC, "clk_200m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(1), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 4, GFLAGS), ++ COMPOSITE(CLK_250M_SRC, "clk_250m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(2), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 5, GFLAGS), ++ COMPOSITE(CLK_300M_SRC, "clk_300m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(2), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 6, GFLAGS), ++ COMPOSITE_HALFDIV(CLK_339M_SRC, "clk_339m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(3), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 7, GFLAGS), ++ COMPOSITE(CLK_400M_SRC, "clk_400m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(3), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 8, GFLAGS), ++ COMPOSITE_HALFDIV(CLK_450M_SRC, "clk_450m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(4), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 9, GFLAGS), ++ COMPOSITE(CLK_500M_SRC, "clk_500m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(4), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 10, GFLAGS), + -+static const struct of_device_id rk618_cru_of_match[] = { -+ { .compatible = "rockchip,rk618-cru", }, -+ {}, -+}; -+MODULE_DEVICE_TABLE(of, rk618_cru_of_match); ++ COMPOSITE_NODIV(PCLK_TOP_ROOT, "pclk_top_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(24), 5, 2, MFLAGS, ++ RV1106_CLKGATE_CON(2), 9, GFLAGS), + -+static struct platform_driver rk618_cru_driver = { -+ .driver = { -+ .name = "rk618-cru", -+ .of_match_table = of_match_ptr(rk618_cru_of_match), -+ }, -+ .probe = rk618_cru_probe, -+ .remove = rk618_cru_remove, -+}; -+module_platform_driver(rk618_cru_driver); ++ COMPOSITE(CLK_I2S0_8CH_TX_SRC, "clk_i2s0_8ch_tx_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(17), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(1), 13, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S0_8CH_TX_FRAC, "clk_i2s0_8ch_tx_frac", "clk_i2s0_8ch_tx_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(18), 0, ++ RV1106_CLKGATE_CON(1), 14, GFLAGS, ++ &rv1106_i2s0_8ch_tx_fracmux), ++ GATE(MCLK_I2S0_8CH_TX, "mclk_i2s0_8ch_tx", "clk_i2s0_8ch_tx", 0, ++ RV1106_CLKGATE_CON(1), 15, GFLAGS), ++ COMPOSITE(CLK_I2S0_8CH_RX_SRC, "clk_i2s0_8ch_rx_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(19), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(2), 0, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S0_8CH_RX_FRAC, "clk_i2s0_8ch_rx_frac", "clk_i2s0_8ch_rx_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(20), 0, ++ RV1106_CLKGATE_CON(2), 1, GFLAGS, ++ &rv1106_i2s0_8ch_rx_fracmux), ++ GATE(MCLK_I2S0_8CH_RX, "mclk_i2s0_8ch_rx", "clk_i2s0_8ch_rx", 0, ++ RV1106_CLKGATE_CON(2), 2, GFLAGS), ++ MUX(I2S0_8CH_MCLKOUT, "i2s0_8ch_mclkout", i2s0_8ch_mclkout_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(21), 2, 2, MFLAGS), ++ COMPOSITE(CLK_REF_MIPI0_SRC, "clk_ref_mipi0_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(25), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(3), 4, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_REF_MIPI0_FRAC, "clk_ref_mipi0_frac", "clk_ref_mipi0_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(26), 0, ++ RV1106_CLKGATE_CON(3), 5, GFLAGS, ++ &rv1106_clk_ref_mipi0_fracmux), ++ GATE(MCLK_REF_MIPI0, "mclk_ref_mipi0", "clk_ref_mipi0", 0, ++ RV1106_CLKGATE_CON(3), 6, GFLAGS), ++ COMPOSITE(CLK_REF_MIPI1_SRC, "clk_ref_mipi1_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(27), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(3), 7, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_REF_MIPI1_FRAC, "clk_ref_mipi1_frac", "clk_ref_mipi1_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(28), 0, ++ RV1106_CLKGATE_CON(3), 8, GFLAGS, ++ &rv1106_clk_ref_mipi1_fracmux), ++ GATE(MCLK_REF_MIPI1, "mclk_ref_mipi1", "clk_ref_mipi1", 0, ++ RV1106_CLKGATE_CON(3), 9, GFLAGS), ++ COMPOSITE(CLK_UART0_SRC, "clk_uart0_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(5), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 11, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART0_FRAC, "clk_uart0_frac", "clk_uart0_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(6), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RV1106_CLKGATE_CON(0), 12, GFLAGS, ++ &rv1106_clk_uart0_fracmux), ++ GATE(SCLK_UART0, "sclk_uart0", "clk_uart0", 0, ++ RV1106_CLKGATE_CON(0), 13, GFLAGS), ++ COMPOSITE(CLK_UART1_SRC, "clk_uart1_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(7), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 14, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART1_FRAC, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(8), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RV1106_CLKGATE_CON(0), 15, GFLAGS, ++ &rv1106_clk_uart1_fracmux), ++ GATE(SCLK_UART1, "sclk_uart1", "clk_uart1", 0, ++ RV1106_CLKGATE_CON(1), 0, GFLAGS), ++ COMPOSITE(CLK_UART2_SRC, "clk_uart2_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(9), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(1), 1, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART2_FRAC, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(10), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RV1106_CLKGATE_CON(1), 2, GFLAGS, ++ &rv1106_clk_uart2_fracmux), ++ GATE(SCLK_UART2, "sclk_uart2", "clk_uart2", 0, ++ RV1106_CLKGATE_CON(1), 3, GFLAGS), ++ COMPOSITE(CLK_UART3_SRC, "clk_uart3_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(11), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(1), 4, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART3_FRAC, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(12), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RV1106_CLKGATE_CON(1), 5, GFLAGS, ++ &rv1106_clk_uart3_fracmux), ++ GATE(SCLK_UART3, "sclk_uart3", "clk_uart3", 0, ++ RV1106_CLKGATE_CON(1), 6, GFLAGS), ++ COMPOSITE(CLK_UART4_SRC, "clk_uart4_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(13), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(1), 7, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART4_FRAC, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(14), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RV1106_CLKGATE_CON(1), 8, GFLAGS, ++ &rv1106_clk_uart4_fracmux), ++ GATE(SCLK_UART4, "sclk_uart4", "clk_uart4", 0, ++ RV1106_CLKGATE_CON(1), 9, GFLAGS), ++ COMPOSITE(CLK_UART5_SRC, "clk_uart5_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(15), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(1), 10, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART5_FRAC, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(16), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RV1106_CLKGATE_CON(1), 11, GFLAGS, ++ &rv1106_clk_uart5_fracmux), ++ GATE(SCLK_UART5, "sclk_uart5", "clk_uart5", 0, ++ RV1106_CLKGATE_CON(1), 12, GFLAGS), ++ COMPOSITE(CLK_VICAP_M0_SRC, "clk_vicap_m0_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(29), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(3), 10, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_VICAP_M0_FRAC, "clk_vicap_m0_frac", "clk_vicap_m0_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(30), 0, ++ RV1106_CLKGATE_CON(3), 11, GFLAGS, ++ &rv1106_clk_vicap_m0_fracmux), ++ GATE(SCLK_VICAP_M0, "sclk_vicap_m0", "clk_vicap_m0", 0, ++ RV1106_CLKGATE_CON(3), 12, GFLAGS), ++ COMPOSITE(CLK_VICAP_M1_SRC, "clk_vicap_m1_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(31), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(3), 13, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_VICAP_M1_FRAC, "clk_vicap_m1_frac", "clk_vicap_m1_src", 0, ++ RV1106_CLKSEL_CON(32), 0, ++ RV1106_CLKGATE_CON(3), 14, GFLAGS, ++ &rv1106_clk_vicap_m1_fracmux), ++ GATE(SCLK_VICAP_M1, "sclk_vicap_m1", "clk_vicap_m1", 0, ++ RV1106_CLKGATE_CON(3), 15, GFLAGS), ++ COMPOSITE(DCLK_VOP_SRC, "dclk_vop_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(23), 8, 1, MFLAGS, 3, 5, DFLAGS, ++ RV1106_CLKGATE_CON(2), 6, GFLAGS), + -+MODULE_AUTHOR("Wyon Bi "); -+MODULE_DESCRIPTION("Rockchip rk618 CRU driver"); -+MODULE_LICENSE("GPL v2"); -diff --git a/drivers/clk/rockchip-oh/regmap/clk-rk628.c b/drivers/clk/rockchip-oh/regmap/clk-rk628.c -new file mode 100644 -index 000000000..7f501db66 ---- /dev/null -+++ b/drivers/clk/rockchip-oh/regmap/clk-rk628.c -@@ -0,0 +1,609 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (c) 2020 Rockchip Electronics Co. Ltd. -+ * -+ * Author: Wyon Bi -+ */ ++ /* PD_DDR */ ++ COMPOSITE_NODIV(PCLK_DDR_ROOT, "pclk_ddr_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_DDRCLKSEL_CON(0), 0, 2, MFLAGS, ++ RV1106_DDRCLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE_NODIV(ACLK_DDR_ROOT, "aclk_ddr_root", mux_500m_300m_100m_24m_p, CLK_IS_CRITICAL, ++ RV1106_DDRCLKSEL_CON(0), 8, 2, MFLAGS, ++ RV1106_DDRCLKGATE_CON(0), 12, GFLAGS), ++ GATE(PCLK_DDRPHY, "pclk_ddrphy", "pclk_ddr_root", CLK_IGNORE_UNUSED, ++ RV1106_DDRCLKGATE_CON(1), 3, GFLAGS), ++ GATE(PCLK_DDR_HWLP, "pclk_ddr_hwlp", "pclk_ddr_root", CLK_IGNORE_UNUSED, ++ RV1106_DDRCLKGATE_CON(1), 2, GFLAGS), ++ GATE(PCLK_DDRMON, "pclk_ddrmon", "pclk_ddr_root", 0, ++ RV1106_DDRCLKGATE_CON(0), 7, GFLAGS), ++ GATE(CLK_TIMER_DDRMON, "clk_timer_ddrmon", "xin24m", 0, ++ RV1106_DDRCLKGATE_CON(0), 8, GFLAGS), ++ GATE(PCLK_DDRC, "pclk_ddrc", "pclk_ddr_root", CLK_IGNORE_UNUSED, ++ RV1106_DDRCLKGATE_CON(0), 5, GFLAGS), ++ GATE(PCLK_DFICTRL, "pclk_dfictrl", "pclk_ddr_root", CLK_IS_CRITICAL, ++ RV1106_DDRCLKGATE_CON(0), 11, GFLAGS), ++ GATE(ACLK_SYS_SHRM, "aclk_sys_shrm", "aclk_ddr_root", CLK_IS_CRITICAL, ++ RV1106_DDRCLKGATE_CON(0), 13, GFLAGS), + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ /* PD_NPU */ ++ COMPOSITE_NODIV(HCLK_NPU_ROOT, "hclk_npu_root", mux_150m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_NPUCLKSEL_CON(0), 0, 2, MFLAGS, ++ RV1106_NPUCLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE_NODIV(ACLK_NPU_ROOT, "aclk_npu_root", mux_500m_300m_pvtpll0_pvtpll1_p, CLK_IS_CRITICAL, ++ RV1106_NPUCLKSEL_CON(0), 2, 2, MFLAGS, ++ RV1106_NPUCLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NODIV(PCLK_NPU_ROOT, "pclk_npu_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_NPUCLKSEL_CON(0), 4, 2, MFLAGS, ++ RV1106_NPUCLKGATE_CON(0), 2, GFLAGS), ++ GATE(HCLK_RKNN, "hclk_rknn", "hclk_npu_root", 0, ++ RV1106_NPUCLKGATE_CON(0), 9, GFLAGS), ++ GATE(ACLK_RKNN, "aclk_rknn", "aclk_npu_root", 0, ++ RV1106_NPUCLKGATE_CON(0), 10, GFLAGS), + -+#include "clk-regmap.h" ++ /* PD_PERI */ ++ COMPOSITE_NODIV(PCLK_PERI_ROOT, "pclk_peri_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_PERICLKSEL_CON(1), 0, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE_NODIV(ACLK_PERI_ROOT, "aclk_peri_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RV1106_PERICLKSEL_CON(1), 2, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NODIV(HCLK_PERI_ROOT, "hclk_peri_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_PERICLKSEL_CON(1), 4, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE_NODIV(ACLK_BUS_ROOT, "aclk_bus_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RV1106_PERICLKSEL_CON(9), 0, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(6), 8, GFLAGS), ++ GATE(PCLK_ACODEC, "pclk_acodec", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(6), 3, GFLAGS), ++ COMPOSITE_NOMUX(MCLK_ACODEC_TX, "mclk_acodec_tx", "mclk_i2s0_8ch_tx", 0, ++ RV1106_PERICLKSEL_CON(8), 0, 8, DFLAGS, ++ RV1106_PERICLKGATE_CON(6), 4, GFLAGS), ++ COMPOSITE_NODIV(CLK_CORE_CRYPTO, "clk_core_crypto", mux_300m_200m_100m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(6), 5, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(3), 11, GFLAGS), ++ COMPOSITE_NODIV(CLK_PKA_CRYPTO, "clk_pka_crypto", mux_300m_200m_100m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(6), 7, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(3), 12, GFLAGS), ++ GATE(ACLK_CRYPTO, "aclk_crypto", "aclk_bus_root", 0, ++ RV1106_PERICLKGATE_CON(3), 13, GFLAGS), ++ GATE(HCLK_CRYPTO, "hclk_crypto", "hclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(3), 14, GFLAGS), ++ GATE(ACLK_DECOM, "aclk_decom", "aclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(5), 9, GFLAGS), ++ GATE(PCLK_DECOM, "pclk_decom", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(5), 10, GFLAGS), ++ COMPOSITE_NODIV(DCLK_DECOM, "dclk_decom", mux_400m_200m_100m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(7), 14, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(5), 11, GFLAGS), ++ GATE(ACLK_DMAC, "aclk_dmac", "aclk_bus_root", 0, ++ RV1106_PERICLKGATE_CON(5), 8, GFLAGS), ++ GATE(PCLK_DSM, "pclk_dsm", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(6), 2, GFLAGS), ++ GATE(MCLK_DSM, "mclk_dsm", "mclk_i2s0_8ch_tx", 0, ++ RV1106_PERICLKGATE_CON(6), 1, GFLAGS), ++ COMPOSITE(CCLK_SRC_EMMC, "cclk_src_emmc", mux_400m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(7), 6, 1, MFLAGS, 0, 6, DFLAGS, ++ RV1106_PERICLKGATE_CON(4), 12, GFLAGS), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(4), 13, GFLAGS), ++ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(2), 0, GFLAGS), ++ GATE(DBCLK_GPIO4, "dbclk_gpio4", "xin24m", 0, ++ RV1106_PERICLKGATE_CON(2), 1, GFLAGS), ++ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(1), 6, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C0, "clk_i2c0", mux_200m_100m_50m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(1), 8, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(1), 7, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(1), 10, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C2, "clk_i2c2", mux_200m_100m_50m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(1), 12, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(1), 11, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(1), 12, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C3, "clk_i2c3", mux_200m_100m_50m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(1), 14, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(1), 13, GFLAGS), ++ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(1), 14, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C4, "clk_i2c4", mux_200m_100m_50m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(2), 0, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(1), 15, GFLAGS), ++ GATE(HCLK_I2S0, "hclk_i2s0", "hclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(6), 0, GFLAGS), ++ GATE(PCLK_DFT2APB, "pclk_dft2apb", "pclk_peri_root", CLK_IGNORE_UNUSED, ++ RV1106_PERICLKGATE_CON(6), 7, GFLAGS), ++ GATE(HCLK_IVE, "hclk_ive", "hclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(6), 9, GFLAGS), ++ GATE(ACLK_IVE, "aclk_ive", "aclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(6), 10, GFLAGS), ++ GATE(PCLK_PWM0_PERI, "pclk_pwm0_peri", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(7), 3, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM0_PERI, "clk_pwm0_peri", mux_100m_50m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(11), 0, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(7), 4, GFLAGS), ++ GATE(CLK_CAPTURE_PWM0_PERI, "clk_capture_pwm0_peri", "xin24m", 0, ++ RV1106_PERICLKGATE_CON(7), 5, GFLAGS), ++ GATE(CLK_TIMER_ROOT, "clk_timer_root", "xin24m", 0, ++ RV1106_PERICLKGATE_CON(0), 3, GFLAGS), ++ GATE(HCLK_SFC, "hclk_sfc", "hclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(4), 14, GFLAGS), ++ COMPOSITE(SCLK_SFC, "sclk_sfc", mux_500m_300m_200m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(7), 12, 2, MFLAGS, 7, 5, DFLAGS, ++ RV1106_PERICLKGATE_CON(5), 0, GFLAGS), ++ GATE(PCLK_UART0, "pclk_uart0", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(6), 11, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(6), 15, GFLAGS), ++ GATE(PCLK_PWM1_PERI, "pclk_pwm1_peri", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(3), 15, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM1_PERI, "clk_pwm1_peri", mux_100m_50m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(6), 9, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(4), 0, GFLAGS), ++ GATE(CLK_CAPTURE_PWM1_PERI, "clk_capture_pwm1_peri", "xin24m", 0, ++ RV1106_PERICLKGATE_CON(4), 1, GFLAGS), ++ GATE(PCLK_PWM2_PERI, "pclk_pwm2_peri", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(4), 2, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM2_PERI, "clk_pwm2_peri", mux_100m_50m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(6), 11, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(4), 3, GFLAGS), ++ GATE(CLK_CAPTURE_PWM2_PERI, "clk_capture_pwm2_peri", "xin24m", 0, ++ RV1106_PERICLKGATE_CON(4), 4, GFLAGS), ++ GATE(HCLK_BOOTROM, "hclk_bootrom", "hclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(0), 7, GFLAGS), ++ GATE(HCLK_SAI, "hclk_sai", "hclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(5), 13, GFLAGS), ++ GATE(MCLK_SAI, "mclk_sai", "mclk_i2s0_8ch_tx", 0, ++ RV1106_PERICLKGATE_CON(5), 14, GFLAGS), ++ GATE(PCLK_SARADC, "pclk_saradc", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(3), 3, GFLAGS), ++ COMPOSITE_NOMUX(CLK_SARADC, "clk_saradc", "xin24m", 0, ++ RV1106_PERICLKSEL_CON(6), 0, 3, DFLAGS, ++ RV1106_PERICLKGATE_CON(3), 4, GFLAGS), ++ GATE(PCLK_SPI1, "pclk_spi1", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(3), 6, GFLAGS), ++ COMPOSITE_NODIV(CLK_SPI1, "clk_spi1", mux_200m_100m_50m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(6), 3, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(3), 7, GFLAGS), ++ GATE(PCLK_STIMER, "pclk_stimer", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(0), 15, GFLAGS), ++ GATE(CLK_STIMER0, "clk_stimer0", "clk_timer_root", 0, ++ RV1106_PERICLKGATE_CON(1), 0, GFLAGS), ++ GATE(CLK_STIMER1, "clk_stimer1", "clk_timer_root", 0, ++ RV1106_PERICLKGATE_CON(1), 1, GFLAGS), ++ GATE(PCLK_TIMER, "pclk_timer", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(0), 8, GFLAGS), ++ GATE(CLK_TIMER0, "clk_timer0", "clk_timer_root", 0, ++ RV1106_PERICLKGATE_CON(0), 9, GFLAGS), ++ GATE(CLK_TIMER1, "clk_timer1", "clk_timer_root", 0, ++ RV1106_PERICLKGATE_CON(0), 10, GFLAGS), ++ GATE(CLK_TIMER2, "clk_timer2", "clk_timer_root", 0, ++ RV1106_PERICLKGATE_CON(0), 11, GFLAGS), ++ GATE(CLK_TIMER3, "clk_timer3", "clk_timer_root", 0, ++ RV1106_PERICLKGATE_CON(0), 12, GFLAGS), ++ GATE(CLK_TIMER4, "clk_timer4", "clk_timer_root", 0, ++ RV1106_PERICLKGATE_CON(0), 13, GFLAGS), ++ GATE(CLK_TIMER5, "clk_timer5", "clk_timer_root", 0, ++ RV1106_PERICLKGATE_CON(0), 14, GFLAGS), ++ GATE(HCLK_TRNG_NS, "hclk_trng_ns", "hclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(3), 9, GFLAGS), ++ GATE(HCLK_TRNG_S, "hclk_trng_s", "hclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(3), 10, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(2), 3, GFLAGS), ++ GATE(PCLK_UART3, "pclk_uart3", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(2), 7, GFLAGS), ++ GATE(PCLK_UART4, "pclk_uart4", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(2), 11, GFLAGS), ++ GATE(PCLK_UART5, "pclk_uart5", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(2), 15, GFLAGS), ++ GATE(ACLK_USBOTG, "aclk_usbotg", "aclk_bus_root", 0, ++ RV1106_PERICLKGATE_CON(4), 7, GFLAGS), ++ GATE(CLK_REF_USBOTG, "clk_ref_usbotg", "xin24m", 0, ++ RV1106_PERICLKGATE_CON(4), 8, GFLAGS), ++ GATE(PCLK_USBPHY, "pclk_usbphy", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(5), 1, GFLAGS), ++ GATE(CLK_REF_USBPHY, "clk_ref_usbphy", "xin24m", 0, ++ RV1106_PERICLKGATE_CON(5), 2, GFLAGS), ++ GATE(PCLK_WDT_NS, "pclk_wdt_ns", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(1), 2, GFLAGS), ++ GATE(TCLK_WDT_NS, "tclk_wdt_ns", "xin24m", 0, ++ RV1106_PERICLKGATE_CON(1), 3, GFLAGS), ++ GATE(PCLK_WDT_S, "pclk_wdt_s", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(1), 4, GFLAGS), ++ GATE(TCLK_WDT_S, "tclk_wdt_s", "xin24m", 0, ++ RV1106_PERICLKGATE_CON(1), 5, GFLAGS), + -+#define RK628_PLL(_id, _name, _parent_name, _reg, _flags) \ -+ PLL(_id, _name, _parent_name, _reg, 13, 12, 10, _flags) ++ /* PD_PMU */ ++ COMPOSITE_FRACMUX(0, "clk_rtc32k_frac", "xin24m", CLK_IGNORE_UNUSED, ++ RV1106_PMUCLKSEL_CON(6), 0, ++ RV1106_PMUCLKGATE_CON(1), 14, GFLAGS, ++ &rv1106_rtc32k_pmu_fracmux), ++ DIV(CLK_100M_PMU, "clk_100m_pmu", "clk_200m_src", 0, ++ RV1106_PMUCLKSEL_CON(0), 0, 3, DFLAGS), ++ COMPOSITE_NODIV(PCLK_PMU_ROOT, "pclk_pmu_root", mux_100m_pmu_24m_p, CLK_IS_CRITICAL, ++ RV1106_PMUCLKSEL_CON(0), 3, 1, MFLAGS, ++ RV1106_PMUCLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NODIV(HCLK_PMU_ROOT, "hclk_pmu_root", mux_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RV1106_PMUCLKSEL_CON(0), 4, 2, MFLAGS, ++ RV1106_PMUCLKGATE_CON(0), 2, GFLAGS), ++ GATE(CLK_PMU, "clk_pmu", "xin24m", CLK_IS_CRITICAL, ++ RV1106_PMUCLKGATE_CON(1), 0, GFLAGS), ++ GATE(PCLK_PMU, "pclk_pmu", "pclk_pmu_root", CLK_IS_CRITICAL, ++ RV1106_PMUCLKGATE_CON(1), 1, GFLAGS), ++ GATE(CLK_DDR_FAIL_SAFE, "clk_ddr_fail_safe", "clk_pmu", 0, ++ RV1106_PMUCLKGATE_CON(1), 15, GFLAGS), ++ GATE(PCLK_PMU_GPIO0, "pclk_pmu_gpio0", "pclk_pmu_root", 0, ++ RV1106_PMUCLKGATE_CON(1), 2, GFLAGS), ++ COMPOSITE_NODIV(DBCLK_PMU_GPIO0, "dbclk_pmu_gpio0", mux_24m_32k_p, 0, ++ RV1106_PMUCLKSEL_CON(0), 15, 1, MFLAGS, ++ RV1106_PMUCLKGATE_CON(1), 3, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_pmu_root", 0, ++ RV1106_PMUCLKGATE_CON(0), 3, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C1, "clk_i2c1", mux_200m_100m_24m_32k_p, 0, ++ RV1106_PMUCLKSEL_CON(0), 6, 2, MFLAGS, ++ RV1106_PMUCLKGATE_CON(0), 4, GFLAGS), ++ GATE(PCLK_PMU_MAILBOX, "pclk_pmu_mailbox", "pclk_pmu_root", 0, ++ RV1106_PMUCLKGATE_CON(2), 10, GFLAGS), ++ GATE(CLK_PMU_MCU, "clk_pmu_mcu", "hclk_pmu_root", 0, ++ RV1106_PMUCLKGATE_CON(0), 9, GFLAGS), ++ GATE(CLK_PMU_MCU_RTC, "clk_pmu_mcu_rtc", "xin24m", 0, ++ RV1106_PMUCLKGATE_CON(0), 13, GFLAGS), ++ COMPOSITE_NOMUX(CLK_PVTM_PMU, "clk_pvtm_pmu", "xin24m", 0, ++ RV1106_PMUCLKSEL_CON(1), 0, 5, DFLAGS, ++ RV1106_PMUCLKGATE_CON(1), 4, GFLAGS), ++ GATE(PCLK_PVTM_PMU, "pclk_pvtm_pmu", "pclk_pmu_root", 0, ++ RV1106_PMUCLKGATE_CON(1), 5, GFLAGS), ++ GATE(CLK_REFOUT, "clk_refout", "xin24m", 0, ++ RV1106_PMUCLKGATE_CON(2), 13, GFLAGS), ++ GATE(HCLK_PMU_SRAM, "hclk_pmu_sram", "hclk_pmu_root", CLK_IGNORE_UNUSED, ++ RV1106_PMUCLKGATE_CON(0), 8, GFLAGS), ++ GATE(PCLK_PMU_WDT, "pclk_pmu_wdt", "pclk_pmu_root", 0, ++ RV1106_PMUCLKGATE_CON(2), 8, GFLAGS), ++ COMPOSITE_NODIV(TCLK_PMU_WDT, "tclk_pmu_wdt", mux_24m_32k_p, 0, ++ RV1106_PMUCLKSEL_CON(7), 2, 1, MFLAGS, ++ RV1106_PMUCLKGATE_CON(2), 9, GFLAGS), + -+#define REG(x) ((x) + 0xc0000) ++ /* PD_SUBDDR */ ++ COMPOSITE(CLK_CORE_DDRC_SRC, "clk_core_ddrc_src", mux_dpll_300m_p, CLK_IGNORE_UNUSED, ++ RV1106_SUBDDRCLKSEL_CON(0), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1106_SUBDDRCLKGATE_CON(0), 2, GFLAGS), ++ GATE(CLK_DFICTRL, "clk_dfictrl", "clk_core_ddrc_src", CLK_IGNORE_UNUSED, ++ RV1106_SUBDDRCLKGATE_CON(0), 5, GFLAGS), ++ GATE(CLK_DDRMON, "clk_ddrmon", "clk_core_ddrc_src", CLK_IGNORE_UNUSED, ++ RV1106_SUBDDRCLKGATE_CON(0), 4, GFLAGS), ++ GATE(CLK_DDR_PHY, "clk_ddr_phy", "clk_core_ddrc_src", CLK_IGNORE_UNUSED, ++ RV1106_SUBDDRCLKGATE_CON(0), 6, GFLAGS), ++ GATE(ACLK_DDRC, "aclk_ddrc", "clk_core_ddrc_src", CLK_IS_CRITICAL, ++ RV1106_SUBDDRCLKGATE_CON(0), 1, GFLAGS), ++ GATE(CLK_CORE_DDRC, "clk_core_ddrc", "clk_core_ddrc_src", CLK_IS_CRITICAL, ++ RV1106_SUBDDRCLKGATE_CON(0), 3, GFLAGS), + -+#define CRU_CPLL_CON0 REG(0x0000) -+#define CRU_CPLL_CON1 REG(0x0004) -+#define CRU_CPLL_CON2 REG(0x0008) -+#define CRU_CPLL_CON3 REG(0x000c) -+#define CRU_CPLL_CON4 REG(0x0010) -+#define CRU_GPLL_CON0 REG(0x0020) -+#define CRU_GPLL_CON1 REG(0x0024) -+#define CRU_GPLL_CON2 REG(0x0028) -+#define CRU_GPLL_CON3 REG(0x002c) -+#define CRU_GPLL_CON4 REG(0x0030) -+#define CRU_MODE_CON REG(0x0060) -+#define CRU_CLKSEL_CON00 REG(0x0080) -+#define CRU_CLKSEL_CON01 REG(0x0084) -+#define CRU_CLKSEL_CON02 REG(0x0088) -+#define CRU_CLKSEL_CON03 REG(0x008c) -+#define CRU_CLKSEL_CON04 REG(0x0090) -+#define CRU_CLKSEL_CON05 REG(0x0094) -+#define CRU_CLKSEL_CON06 REG(0x0098) -+#define CRU_CLKSEL_CON07 REG(0x009c) -+#define CRU_CLKSEL_CON08 REG(0x00a0) -+#define CRU_CLKSEL_CON09 REG(0x00a4) -+#define CRU_CLKSEL_CON10 REG(0x00a8) -+#define CRU_CLKSEL_CON11 REG(0x00ac) -+#define CRU_CLKSEL_CON12 REG(0x00b0) -+#define CRU_CLKSEL_CON13 REG(0x00b4) -+#define CRU_CLKSEL_CON14 REG(0x00b8) -+#define CRU_CLKSEL_CON15 REG(0x00bc) -+#define CRU_CLKSEL_CON16 REG(0x00c0) -+#define CRU_CLKSEL_CON17 REG(0x00c4) -+#define CRU_CLKSEL_CON18 REG(0x00c8) -+#define CRU_CLKSEL_CON20 REG(0x00d0) -+#define CRU_CLKSEL_CON21 REG(0x00d4) -+#define CRU_GATE_CON00 REG(0x0180) -+#define CRU_GATE_CON01 REG(0x0184) -+#define CRU_GATE_CON02 REG(0x0188) -+#define CRU_GATE_CON03 REG(0x018c) -+#define CRU_GATE_CON04 REG(0x0190) -+#define CRU_GATE_CON05 REG(0x0194) -+#define CRU_SOFTRST_CON00 REG(0x0200) -+#define CRU_SOFTRST_CON01 REG(0x0204) -+#define CRU_SOFTRST_CON02 REG(0x0208) -+#define CRU_SOFTRST_CON04 REG(0x0210) -+#define CRU_MAX_REGISTER CRU_SOFTRST_CON04 + -+#define reset_to_cru(_rst) container_of(_rst, struct rk628_cru, rcdev) ++ /* PD_VEPU */ ++ COMPOSITE_NODIV(HCLK_VEPU_ROOT, "hclk_vepu_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VEPUCLKSEL_CON(0), 0, 2, MFLAGS, ++ RV1106_VEPUCLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE_NODIV(ACLK_VEPU_COM_ROOT, "aclk_vepu_com_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VEPUCLKSEL_CON(0), 2, 2, MFLAGS, ++ RV1106_VEPUCLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NODIV(ACLK_VEPU_ROOT, "aclk_vepu_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VEPUCLKSEL_CON(0), 4, 2, MFLAGS, ++ RV1106_VEPUCLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE_NODIV(PCLK_VEPU_ROOT, "pclk_vepu_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VEPUCLKSEL_CON(0), 6, 2, MFLAGS, ++ RV1106_VEPUCLKGATE_CON(0), 3, GFLAGS), ++ GATE(PCLK_SPI0, "pclk_spi0", "pclk_vepu_root", 0, ++ RV1106_VEPUCLKGATE_CON(1), 2, GFLAGS), ++ COMPOSITE_NODIV(CLK_SPI0, "clk_spi0", mux_200m_100m_50m_24m_p, 0, ++ RV1106_VEPUCLKSEL_CON(0), 12, 2, MFLAGS, ++ RV1106_VEPUCLKGATE_CON(1), 3, GFLAGS), ++ GATE(CLK_UART_DETN_FLT, "clk_uart_detn_flt", "xin24m", 0, ++ RV1106_VEPUCLKGATE_CON(1), 8, GFLAGS), ++ GATE(HCLK_VEPU, "hclk_vepu", "hclk_vepu_root", 0, ++ RV1106_VEPUCLKGATE_CON(0), 8, GFLAGS), ++ GATE(ACLK_VEPU, "aclk_vepu", "aclk_vepu_root", 0, ++ RV1106_VEPUCLKGATE_CON(0), 9, GFLAGS), ++ COMPOSITE_NODIV(CLK_CORE_VEPU, "clk_core_vepu", mux_400m_300m_pvtpll0_pvtpll1_p, 0, ++ RV1106_VEPUCLKSEL_CON(0), 8, 2, MFLAGS, ++ RV1106_VEPUCLKGATE_CON(0), 10, GFLAGS), ++ COMPOSITE_NODIV(CLK_CORE_VEPU_DVBM, "clk_core_vepu_dvbm", mux_200m_100m_50m_24m_p, 0, ++ RV1106_VEPUCLKSEL_CON(0), 10, 2, MFLAGS, ++ RV1106_VEPUCLKGATE_CON(0), 13, GFLAGS), ++ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_vepu_root", 0, ++ RV1106_VEPUCLKGATE_CON(0), 15, GFLAGS), ++ GATE(DBCLK_GPIO1, "dbclk_gpio1", "xin24m", 0, ++ RV1106_VEPUCLKGATE_CON(1), 0, GFLAGS), ++ GATE(HCLK_VEPU_PP, "hclk_vepu_pp", "hclk_vepu_root", 0, ++ RV1106_VEPUCLKGATE_CON(0), 11, GFLAGS), ++ GATE(ACLK_VEPU_PP, "aclk_vepu_pp", "aclk_vepu_root", 0, ++ RV1106_VEPUCLKGATE_CON(0), 12, GFLAGS), + -+struct rk628_cru { -+ struct device *dev; -+ struct rk628 *parent; -+ struct regmap *regmap; -+ struct reset_controller_dev rcdev; -+ struct clk_onecell_data clk_data; -+}; ++ /* PD_VI */ ++ COMPOSITE_NODIV(HCLK_VI_ROOT, "hclk_vi_root", mux_150m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VICLKSEL_CON(0), 0, 2, MFLAGS, ++ RV1106_VICLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE_NODIV(ACLK_VI_ROOT, "aclk_vi_root", mux_339m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VICLKSEL_CON(0), 2, 2, MFLAGS, ++ RV1106_VICLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NODIV(PCLK_VI_ROOT, "pclk_vi_root", mux_150m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VICLKSEL_CON(0), 4, 2, MFLAGS, ++ RV1106_VICLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE_NODIV(PCLK_VI_RTC_ROOT, "pclk_vi_rtc_root", mux_50m_24m_p, 0, ++ RV1106_VICLKSEL_CON(0), 6, 1, MFLAGS, ++ RV1106_VICLKGATE_CON(0), 3, GFLAGS), + -+#define CNAME(x) "rk628_" x ++ GATE(PCLK_CSIHOST0, "pclk_csihost0", "pclk_vi_root", 0, ++ RV1106_VICLKGATE_CON(1), 3, GFLAGS), ++ GATE(PCLK_CSIHOST1, "pclk_csihost1", "pclk_vi_root", 0, ++ RV1106_VICLKGATE_CON(1), 5, GFLAGS), ++ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_vi_root", 0, ++ RV1106_VICLKGATE_CON(1), 15, GFLAGS), ++ GATE(DBCLK_GPIO3, "dbclk_gpio3", "xin24m", 0, ++ RV1106_VICLKGATE_CON(2), 0, GFLAGS), ++ GATE(HCLK_ISP3P2, "hclk_isp3p2", "hclk_vi_root", 0, ++ RV1106_VICLKGATE_CON(0), 7, GFLAGS), ++ GATE(ACLK_ISP3P2, "aclk_isp3p2", "aclk_vi_root", 0, ++ RV1106_VICLKGATE_CON(0), 8, GFLAGS), ++ COMPOSITE_NODIV(CLK_CORE_ISP3P2, "clk_core_isp3p2", mux_339m_200m_pvtpll0_pvtpll1_p, 0, ++ RV1106_VICLKSEL_CON(0), 7, 2, MFLAGS, ++ RV1106_VICLKGATE_CON(0), 9, GFLAGS), ++ GATE(PCLK_MIPICSIPHY, "pclk_mipicsiphy", "pclk_vi_root", 0, ++ RV1106_VICLKGATE_CON(1), 14, GFLAGS), ++ COMPOSITE(CCLK_SRC_SDMMC, "cclk_src_sdmmc", mux_400m_24m_p, 0, ++ RV1106_VICLKSEL_CON(1), 14, 1, MFLAGS, 8, 6, DFLAGS, ++ RV1106_VICLKGATE_CON(1), 11, GFLAGS), ++ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_vi_root", 0, ++ RV1106_VICLKGATE_CON(1), 12, GFLAGS), ++ GATE(CLK_SDMMC_DETN_FLT, "clk_sdmmc_detn_flt", "xin24m", 0, ++ RV1106_VICLKGATE_CON(1), 13, GFLAGS), ++ GATE(PCLK_VI_RTC_TEST, "pclk_vi_rtc_test", "pclk_vi_rtc_root", 0, ++ RV1106_VICLKGATE_CON(2), 5, GFLAGS), ++ GATE(PCLK_VI_RTC_PHY, "pclk_vi_rtc_phy", "pclk_vi_rtc_root", 0, ++ RV1106_VICLKGATE_CON(2), 6, GFLAGS), ++ COMPOSITE_NODIV(DCLK_VICAP, "dclk_vicap", mux_339m_200m_100m_24m_p, 0, ++ RV1106_VICLKSEL_CON(0), 9, 2, MFLAGS, ++ RV1106_VICLKGATE_CON(0), 10, GFLAGS), ++ GATE(ACLK_VICAP, "aclk_vicap", "aclk_vi_root", 0, ++ RV1106_VICLKGATE_CON(0), 12, GFLAGS), ++ GATE(HCLK_VICAP, "hclk_vicap", "hclk_vi_root", 0, ++ RV1106_VICLKGATE_CON(0), 13, GFLAGS), + -+#define PNAME(x) static const char *const x[] ++ /* PD_VO */ ++ COMPOSITE_NODIV(ACLK_MAC_ROOT, "aclk_mac_root", mux_300m_200m_100m_24m_p, 0, ++ RV1106_VOCLKSEL_CON(1), 12, 2, MFLAGS, ++ RV1106_VOCLKGATE_CON(1), 4, GFLAGS), ++ COMPOSITE_NODIV(ACLK_VO_ROOT, "aclk_vo_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VOCLKSEL_CON(0), 0, 2, MFLAGS, ++ RV1106_VOCLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE_NODIV(HCLK_VO_ROOT, "hclk_vo_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VOCLKSEL_CON(0), 2, 2, MFLAGS, ++ RV1106_VOCLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NODIV(PCLK_VO_ROOT, "pclk_vo_root", mux_150m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VOCLKSEL_CON(0), 4, 2, MFLAGS, ++ RV1106_VOCLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE_NODIV(ACLK_VOP_ROOT, "aclk_vop_root", mux_300m_200m_100m_24m_p, 0, ++ RV1106_VOCLKSEL_CON(1), 10, 2, MFLAGS, ++ RV1106_VOCLKGATE_CON(0), 11, GFLAGS), + -+PNAME(mux_cpll_osc_p) = { CNAME("xin_osc0_func"), CNAME("clk_cpll") }; -+PNAME(mux_gpll_osc_p) = { CNAME("xin_osc0_func"), CNAME("clk_gpll") }; -+PNAME(mux_cpll_gpll_mux_p) = { CNAME("clk_cpll_mux"), CNAME("clk_gpll_mux") }; -+PNAME(mux_mclk_i2s_8ch_p) = { CNAME("clk_i2s_8ch_src"), -+ CNAME("clk_i2s_8ch_frac"), CNAME("i2s_mclkin"), -+ CNAME("xin_osc0_half") }; -+PNAME(mux_i2s_mclkout_p) = { CNAME("mclk_i2s_8ch"), CNAME("xin_osc0_half") }; -+PNAME(mux_clk_testout_p) = { CNAME("xin_osc0_func"), CNAME("xin_osc0_half"), -+ CNAME("clk_gpll"), CNAME("clk_gpll_mux"), -+ CNAME("clk_cpll"), CNAME("clk_gpll_mux"), -+ CNAME("pclk_logic"), CNAME("sclk_vop"), -+ CNAME("mclk_i2s_8ch"), CNAME("i2s_mclkout"), -+ CNAME("dummy"), CNAME("clk_hdmirx_aud"), -+ CNAME("clk_hdmirx_cec"), CNAME("clk_imodet"), -+ CNAME("clk_txesc"), CNAME("clk_gpio_db0") }; ++ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(3), 0, GFLAGS), ++ GATE(DBCLK_GPIO2, "dbclk_gpio2", "xin24m", 0, ++ RV1106_VOCLKGATE_CON(3), 1, GFLAGS), ++ GATE(ACLK_MAC, "aclk_mac", "aclk_mac_root", 0, ++ RV1106_VOCLKGATE_CON(1), 8, GFLAGS), ++ GATE(PCLK_MAC, "pclk_mac", "pclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(1), 9, GFLAGS), ++ FACTOR(CLK_GMAC0_50M_O, "clk_gmac0_50m_o", "clk_50m_src", 0, 1, 1), ++ FACTOR(CLK_GMAC0_REF_50M, "clk_gmac0_ref_50m", "clk_gmac0_50m_o", 0, 1, 1), ++ DIV(CLK_GMAC0_TX_50M_O, "clk_gmac0_tx_50m_o", "clk_gmac0_50m_o", 0, ++ RV1106_VOCLKSEL_CON(2), 1, 6, DFLAGS), ++ GATE(CLK_MACPHY, "clk_macphy", "xin24m", 0, ++ RV1106_VOCLKGATE_CON(2), 13, GFLAGS), ++ GATE(CLK_OTPC_ARB, "clk_otpc_arb", "xin24m", 0, ++ RV1106_VOCLKGATE_CON(2), 11, GFLAGS), ++ GATE(PCLK_OTPC_NS, "pclk_otpc_ns", "pclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(2), 3, GFLAGS), ++ GATE(CLK_SBPI_OTPC_NS, "clk_sbpi_otpc_ns", "xin24m", 0, ++ RV1106_VOCLKGATE_CON(2), 5, GFLAGS), ++ COMPOSITE_NOMUX(CLK_USER_OTPC_NS, "clk_user_otpc_ns", "xin24m", 0, ++ RV1106_VOCLKSEL_CON(3), 10, 3, DFLAGS, ++ RV1106_VOCLKGATE_CON(2), 6, GFLAGS), ++ GATE(PCLK_OTPC_S, "pclk_otpc_s", "pclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(2), 7, GFLAGS), ++ GATE(CLK_SBPI_OTPC_S, "clk_sbpi_otpc_s", "xin24m", 0, ++ RV1106_VOCLKGATE_CON(2), 9, GFLAGS), ++ COMPOSITE_NOMUX(CLK_USER_OTPC_S, "clk_user_otpc_s", "xin24m", 0, ++ RV1106_VOCLKSEL_CON(3), 13, 3, DFLAGS, ++ RV1106_VOCLKGATE_CON(2), 10, GFLAGS), ++ GATE(PCLK_OTP_MASK, "pclk_otp_mask", "pclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(2), 14, GFLAGS), ++ GATE(CLK_PMC_OTP, "clk_pmc_otp", "clk_sbpi_otpc_s", 0, ++ RV1106_VOCLKGATE_CON(2), 15, GFLAGS), ++ GATE(HCLK_RGA2E, "hclk_rga2e", "hclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(0), 7, GFLAGS), ++ GATE(ACLK_RGA2E, "aclk_rga2e", "aclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(0), 8, GFLAGS), ++ COMPOSITE_NODIV(CLK_CORE_RGA2E, "clk_core_rga2e", mux_400m_200m_100m_24m_p, 0, ++ RV1106_VOCLKSEL_CON(1), 8, 2, MFLAGS, ++ RV1106_VOCLKGATE_CON(0), 9, GFLAGS), ++ COMPOSITE(CCLK_SRC_SDIO, "cclk_src_sdio", mux_400m_24m_p, 0, ++ RV1106_VOCLKSEL_CON(2), 13, 1, MFLAGS, 7, 6, DFLAGS, ++ RV1106_VOCLKGATE_CON(1), 14, GFLAGS), ++ GATE(HCLK_SDIO, "hclk_sdio", "hclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(1), 15, GFLAGS), ++ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(2), 0, GFLAGS), ++ COMPOSITE_NOMUX(CLK_TSADC, "clk_tsadc", "xin24m", 0, ++ RV1106_VOCLKSEL_CON(3), 0, 5, DFLAGS, ++ RV1106_VOCLKGATE_CON(2), 1, GFLAGS), ++ COMPOSITE_NOMUX(CLK_TSADC_TSEN, "clk_tsadc_tsen", "xin24m", 0, ++ RV1106_VOCLKSEL_CON(3), 5, 5, DFLAGS, ++ RV1106_VOCLKGATE_CON(2), 2, GFLAGS), ++ GATE(HCLK_VOP, "hclk_vop", "hclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(0), 13, GFLAGS), ++ GATE(DCLK_VOP, "dclk_vop", "dclk_vop_src", 0, ++ RV1106_VOCLKGATE_CON(0), 14, GFLAGS), ++ GATE(ACLK_VOP, "aclk_vop", "aclk_vop_root", 0, ++ RV1106_VOCLKGATE_CON(0), 15, GFLAGS), + -+static const struct clk_pll_data rk628_clk_plls[] = { -+ RK628_PLL(CGU_CLK_CPLL, CNAME("clk_cpll"), CNAME("xin_osc0_func"), -+ CRU_CPLL_CON0, -+ 0), -+ RK628_PLL(CGU_CLK_GPLL, CNAME("clk_gpll"), CNAME("xin_osc0_func"), -+ CRU_GPLL_CON0, -+ 0), -+}; ++ /* IO CLK */ ++ GATE(RX0PCLK_VICAP, "rx0pclk_vicap", "rx0pclk_vicap_io", 0, ++ RV1106_VICLKGATE_CON(1), 0, GFLAGS), ++ GATE(RX1PCLK_VICAP, "rx1pclk_vicap", "rx1pclk_vicap_io", 0, ++ RV1106_VICLKGATE_CON(1), 1, GFLAGS), ++ GATE(ISP0CLK_VICAP, "isp0clk_vicap", "isp0clk_vicap_io", 0, ++ RV1106_VICLKGATE_CON(1), 2, GFLAGS), ++ GATE(I0CLK_VICAP, "i0clk_vicap", "i0clk_vicap_io", 0, ++ RV1106_VICLKGATE_CON(0), 14, GFLAGS), ++ GATE(I1CLK_VICAP, "i1clk_vicap", "i1clk_vicap_io", 0, ++ RV1106_VICLKGATE_CON(0), 15, GFLAGS), ++ GATE(PCLK_VICAP, "pclk_vicap", "pclk_vicap_io", 0, ++ RV1106_VICLKGATE_CON(0), 11, GFLAGS), ++ GATE(CLK_RXBYTECLKHS_0, "clk_rxbyteclkhs_0", "clk_rxbyteclkhs_0_io", 0, ++ RV1106_VICLKGATE_CON(1), 4, GFLAGS), ++ GATE(CLK_RXBYTECLKHS_1, "clk_rxbyteclkhs_1", "clk_rxbyteclkhs_1_io", 0, ++ RV1106_VICLKGATE_CON(1), 6, GFLAGS), + -+static const struct clk_mux_data rk628_clk_muxes[] = { -+ MUX(CGU_CLK_CPLL_MUX, CNAME("clk_cpll_mux"), mux_cpll_osc_p, -+ CRU_MODE_CON, 0, 1, -+ 0), -+ MUX(CGU_CLK_GPLL_MUX, CNAME("clk_gpll_mux"), mux_gpll_osc_p, -+ CRU_MODE_CON, 2, 1, -+ CLK_SET_RATE_NO_REPARENT | CLK_SET_RATE_PARENT), -+}; ++ GATE(PCLK_VICAP_VEPU, "pclk_vicap_vepu", "pclk_vicap_vepu_io", 0, ++ RV1106_VEPUCLKGATE_CON(0), 14, GFLAGS), ++ GATE(SCLK_IN_SPI0, "sclk_in_spi0", "sclk_in_spi0_io", 0, ++ RV1106_VEPUCLKGATE_CON(1), 4, GFLAGS), + -+static const struct clk_gate_data rk628_clk_gates[] = { -+ GATE(CGU_PCLK_GPIO0, CNAME("pclk_gpio0"), CNAME("pclk_logic"), -+ CRU_GATE_CON01, 0, -+ 0), -+ GATE(CGU_PCLK_GPIO1, CNAME("pclk_gpio1"), CNAME("pclk_logic"), -+ CRU_GATE_CON01, 1, -+ 0), -+ GATE(CGU_PCLK_GPIO2, CNAME("pclk_gpio2"), CNAME("pclk_logic"), -+ CRU_GATE_CON01, 2, -+ 0), -+ GATE(CGU_PCLK_GPIO3, CNAME("pclk_gpio3"), CNAME("pclk_logic"), -+ CRU_GATE_CON01, 3, -+ 0), ++ GATE(CLK_UTMI_USBOTG, "clk_utmi_usbotg", "clk_utmi_usbotg_io", 0, ++ RV1106_PERICLKGATE_CON(4), 9, GFLAGS), + -+ GATE(CGU_PCLK_TXPHY_CON, CNAME("pclk_txphy_con"), CNAME("pclk_logic"), -+ CRU_GATE_CON02, 3, -+ CLK_IGNORE_UNUSED), -+ GATE(CGU_PCLK_EFUSE, CNAME("pclk_efuse"), CNAME("pclk_logic"), -+ CRU_GATE_CON00, 5, -+ 0), -+ GATE(0, CNAME("pclk_i2c2apb"), CNAME("pclk_logic"), -+ CRU_GATE_CON00, 3, -+ CLK_IGNORE_UNUSED), -+ GATE(0, CNAME("pclk_cru"), CNAME("pclk_logic"), -+ CRU_GATE_CON00, 1, -+ CLK_IGNORE_UNUSED), -+ GATE(0, CNAME("pclk_adapter"), CNAME("pclk_logic"), -+ CRU_GATE_CON00, 7, -+ CLK_IGNORE_UNUSED), -+ GATE(0, CNAME("pclk_regfile"), CNAME("pclk_logic"), -+ CRU_GATE_CON00, 2, -+ CLK_IGNORE_UNUSED), -+ GATE(CGU_PCLK_DSI0, CNAME("pclk_dsi0"), CNAME("pclk_logic"), -+ CRU_GATE_CON02, 6, -+ 0), -+ GATE(CGU_PCLK_DSI1, CNAME("pclk_dsi1"), CNAME("pclk_logic"), -+ CRU_GATE_CON02, 7, -+ 0), -+ GATE(CGU_PCLK_CSI, CNAME("pclk_csi"), CNAME("pclk_logic"), -+ CRU_GATE_CON02, 8, -+ 0), -+ GATE(CGU_PCLK_HDMITX, CNAME("pclk_hdmitx"), CNAME("pclk_logic"), -+ CRU_GATE_CON02, 4, -+ 0), -+ GATE(CGU_PCLK_RXPHY, CNAME("pclk_rxphy"), CNAME("pclk_logic"), -+ CRU_GATE_CON02, 0, -+ 0), -+ GATE(CGU_PCLK_HDMIRX, CNAME("pclk_hdmirx"), CNAME("pclk_logic"), -+ CRU_GATE_CON02, 2, -+ 0), -+ GATE(CGU_PCLK_GVIHOST, CNAME("pclk_gvihost"), CNAME("pclk_logic"), -+ CRU_GATE_CON02, 5, -+ 0), -+ GATE(CGU_CLK_CFG_DPHY0, CNAME("clk_cfg_dphy0"), CNAME("xin_osc0_func"), -+ CRU_GATE_CON02, 13, -+ 0), -+ GATE(CGU_CLK_CFG_DPHY1, CNAME("clk_cfg_dphy1"), CNAME("xin_osc0_func"), -+ CRU_GATE_CON02, 14, -+ 0), -+ GATE(CGU_CLK_TXESC, CNAME("clk_txesc"), CNAME("xin_osc0_func"), -+ CRU_GATE_CON02, 12, -+ 0), +}; + -+static const struct clk_composite_data rk628_clk_composites[] = { -+ COMPOSITE(CGU_CLK_IMODET, CNAME("clk_imodet"), mux_cpll_gpll_mux_p, -+ CRU_CLKSEL_CON05, 5, 1, -+ CRU_CLKSEL_CON05, 0, 5, -+ CRU_GATE_CON02, 11, -+ 0), -+ COMPOSITE(CGU_CLK_HDMIRX_AUD, CNAME("clk_hdmirx_aud"), -+ mux_cpll_gpll_mux_p, -+ CRU_CLKSEL_CON05, 15, 1, -+ CRU_CLKSEL_CON05, 6, 8, -+ CRU_GATE_CON02, 10, -+ CLK_SET_RATE_NO_REPARENT | CLK_SET_RATE_PARENT), -+ COMPOSITE_FRAC_NOMUX(CGU_CLK_HDMIRX_CEC, CNAME("clk_hdmirx_cec"), -+ CNAME("xin_osc0_func"), -+ CRU_CLKSEL_CON12, -+ CRU_GATE_CON01, 15, -+ 0), -+ COMPOSITE_FRAC(CGU_CLK_RX_READ, CNAME("clk_rx_read"), -+ mux_cpll_gpll_mux_p, -+ CRU_CLKSEL_CON02, 8, 1, -+ CRU_CLKSEL_CON14, -+ CRU_GATE_CON00, 11, -+ 0), -+ COMPOSITE_FRAC(CGU_SCLK_VOP, CNAME("sclk_vop"), mux_cpll_gpll_mux_p, -+ CRU_CLKSEL_CON02, 9, 1, -+ CRU_CLKSEL_CON13, -+ CRU_GATE_CON00, 13, -+ CLK_SET_RATE_NO_REPARENT), -+ COMPOSITE(CGU_PCLK_LOGIC, CNAME("pclk_logic"), mux_cpll_gpll_mux_p, -+ CRU_CLKSEL_CON00, 7, 1, -+ CRU_CLKSEL_CON00, 0, 5, -+ CRU_GATE_CON00, 0, -+ 0), -+ COMPOSITE_NOMUX(CGU_CLK_GPIO_DB0, CNAME("clk_gpio_db0"), -+ CNAME("xin_osc0_func"), -+ CRU_CLKSEL_CON08, 0, 10, -+ CRU_GATE_CON01, 4, -+ 0), -+ COMPOSITE_NOMUX(CGU_CLK_GPIO_DB1, CNAME("clk_gpio_db1"), -+ CNAME("xin_osc0_func"), -+ CRU_CLKSEL_CON09, 0, 10, -+ CRU_GATE_CON01, 5, -+ 0), -+ COMPOSITE_NOMUX(CGU_CLK_GPIO_DB2, CNAME("clk_gpio_db2"), -+ CNAME("xin_osc0_func"), -+ CRU_CLKSEL_CON10, 0, 10, -+ CRU_GATE_CON01, 6, -+ 0), -+ COMPOSITE_NOMUX(CGU_CLK_GPIO_DB3, CNAME("clk_gpio_db3"), -+ CNAME("xin_osc0_func"), -+ CRU_CLKSEL_CON11, 0, 10, -+ CRU_GATE_CON01, 7, -+ 0), -+ COMPOSITE(CGU_CLK_I2S_8CH_SRC, CNAME("clk_i2s_8ch_src"), -+ mux_cpll_gpll_mux_p, -+ CRU_CLKSEL_CON03, 13, 1, -+ CRU_CLKSEL_CON03, 8, 5, -+ CRU_GATE_CON03, 9, -+ 0), -+ COMPOSITE_FRAC_NOMUX(CGU_CLK_I2S_8CH_FRAC, CNAME("clk_i2s_8ch_frac"), -+ CNAME("clk_i2s_8ch_src"), -+ CRU_CLKSEL_CON04, -+ CRU_GATE_CON03, 10, -+ 0), -+ COMPOSITE_NODIV(CGU_MCLK_I2S_8CH, CNAME("mclk_i2s_8ch"), -+ mux_mclk_i2s_8ch_p, -+ CRU_CLKSEL_CON03, 14, 2, -+ CRU_GATE_CON03, 11, -+ CLK_SET_RATE_PARENT), -+ COMPOSITE_NODIV(CGU_I2S_MCLKOUT, CNAME("i2s_mclkout"), -+ mux_i2s_mclkout_p, -+ CRU_CLKSEL_CON03, 7, 1, -+ CRU_GATE_CON03, 12, -+ CLK_SET_RATE_PARENT), -+ COMPOSITE(CGU_BT1120DEC, CNAME("clk_bt1120dec"), mux_cpll_gpll_mux_p, -+ CRU_CLKSEL_CON02, 7, 1, -+ CRU_CLKSEL_CON02, 0, 5, -+ CRU_GATE_CON00, 12, -+ 0), -+ COMPOSITE(CGU_CLK_TESTOUT, CNAME("clk_testout"), mux_clk_testout_p, -+ CRU_CLKSEL_CON06, 0, 4, -+ CRU_CLKSEL_CON06, 8, 6, -+ CRU_GATE_CON04, 7, -+ 0), ++static struct rockchip_clk_branch rv1106_grf_clk_branches[] __initdata = { ++ MMC(SCLK_EMMC_DRV, "emmc_drv", "cclk_src_emmc", RV1106_EMMC_CON0, 1), ++ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "cclk_src_emmc", RV1106_EMMC_CON1, 1), ++ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "cclk_src_sdmmc", RV1106_SDMMC_CON0, 1), ++ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "cclk_src_sdmmc", RV1106_SDMMC_CON1, 1), ++ MMC(SCLK_SDIO_DRV, "sdio_drv", "cclk_src_sdio", RV1106_SDIO_CON0, 1), ++ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "cclk_src_sdio", RV1106_SDIO_CON1, 1), +}; + -+static void rk628_clk_add_lookup(struct rk628_cru *cru, struct clk *clk, -+ unsigned int id) -+{ -+ if (cru->clk_data.clks && id) -+ cru->clk_data.clks[id] = clk; -+} ++static void __iomem *rv1106_cru_base; ++static struct rockchip_clk_provider *grf_ctx, *cru_ctx; + -+static void rk628_clk_register_muxes(struct rk628_cru *cru) ++void rv1106_dump_cru(void) +{ -+ struct clk *clk; -+ unsigned int i; -+ -+ for (i = 0; i < ARRAY_SIZE(rk628_clk_muxes); i++) { -+ const struct clk_mux_data *data = &rk628_clk_muxes[i]; -+ -+ clk = devm_clk_regmap_register_mux(cru->dev, data->name, -+ data->parent_names, -+ data->num_parents, -+ cru->regmap, data->reg, -+ data->shift, data->width, -+ data->flags); -+ if (IS_ERR(clk)) { -+ dev_err(cru->dev, "failed to register clock %s\n", -+ data->name); -+ continue; -+ } -+ -+ rk628_clk_add_lookup(cru, clk, data->id); ++ if (rv1106_cru_base) { ++ pr_warn("CRU:\n"); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rv1106_cru_base, ++ 0x588, false); + } +} ++EXPORT_SYMBOL_GPL(rv1106_dump_cru); + -+static void rk628_clk_register_gates(struct rk628_cru *cru) ++static void _cru_pvtpll_calibrate(int count_offset, int length_offset, int target_rate) +{ -+ struct clk *clk; -+ unsigned int i; ++ unsigned int rate0, rate1, delta, length_ori, length, step, val, i = 0; + -+ for (i = 0; i < ARRAY_SIZE(rk628_clk_gates); i++) { -+ const struct clk_gate_data *data = &rk628_clk_gates[i]; ++ rate0 = readl_relaxed(rv1106_cru_base + count_offset); ++ if (rate0 < target_rate) ++ return; ++ /* delta < (3.125% * target_rate) */ ++ if ((rate0 - target_rate) < (target_rate >> 5)) ++ return; + -+ clk = devm_clk_regmap_register_gate(cru->dev, data->name, -+ data->parent_name, -+ cru->regmap, -+ data->reg, data->shift, -+ data->flags); -+ if (IS_ERR(clk)) { -+ dev_err(cru->dev, "failed to register clock %s\n", -+ data->name); -+ continue; -+ } ++ length_ori = readl_relaxed(rv1106_cru_base + length_offset) & PVTPLL_LENGTH_SEL_MASK; ++ length = length_ori; ++ length++; ++ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); ++ writel_relaxed(val, rv1106_cru_base + length_offset); ++ usleep_range(2000, 2100); ++ rate1 = readl_relaxed(rv1106_cru_base + count_offset); ++ if ((rate1 < target_rate) || (rate1 >= rate0)) ++ return; ++ if (abs(rate1 - target_rate) < (target_rate >> 5)) ++ return; + -+ rk628_clk_add_lookup(cru, clk, data->id); ++ step = rate0 - rate1; ++ delta = rate1 - target_rate; ++ length += delta / step; ++ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); ++ writel_relaxed(val, rv1106_cru_base + length_offset); ++ usleep_range(2000, 2100); ++ rate0 = readl_relaxed(rv1106_cru_base + count_offset); ++ ++ while (abs(rate0 - target_rate) >= (target_rate >> 5)) { ++ if (i++ > 20) ++ break; ++ if (rate0 > target_rate) ++ length++; ++ else ++ length--; ++ if (length <= length_ori) ++ break; ++ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); ++ writel_relaxed(val, rv1106_cru_base + length_offset); ++ usleep_range(2000, 2100); ++ rate0 = readl_relaxed(rv1106_cru_base + count_offset); + } +} + -+static void rk628_clk_register_composites(struct rk628_cru *cru) ++static void _grf_pvtpll_calibrate(int count_offset, int length_offset, int target_rate) +{ -+ struct clk *clk; -+ unsigned int i; ++ unsigned int rate0, rate1, delta, length_ori, length, step, val, i = 0; + -+ for (i = 0; i < ARRAY_SIZE(rk628_clk_composites); i++) { -+ const struct clk_composite_data *data = -+ &rk628_clk_composites[i]; ++ regmap_read(cru_ctx->grf, count_offset, &rate0); ++ if (rate0 < target_rate) ++ return; ++ /* delta < (3.125% * target_rate) */ ++ if ((rate0 - target_rate) < (target_rate >> 5)) ++ return; + -+ clk = devm_clk_regmap_register_composite(cru->dev, data->name, -+ data->parent_names, -+ data->num_parents, -+ cru->regmap, -+ data->mux_reg, -+ data->mux_shift, -+ data->mux_width, -+ data->div_reg, -+ data->div_shift, -+ data->div_width, -+ data->div_flags, -+ data->gate_reg, -+ data->gate_shift, -+ data->flags); -+ if (IS_ERR(clk)) { -+ dev_err(cru->dev, "failed to register clock %s\n", -+ data->name); -+ continue; -+ } ++ regmap_read(cru_ctx->grf, length_offset, &length_ori); ++ length = length_ori; ++ length_ori = length; ++ length &= PVTPLL_LENGTH_SEL_MASK; ++ length++; ++ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); ++ regmap_write(cru_ctx->grf, length_offset, val); ++ usleep_range(2000, 2100); ++ regmap_read(cru_ctx->grf, count_offset, &rate1); ++ if ((rate1 < target_rate) || (rate1 >= rate0)) ++ return; ++ if (abs(rate1 - target_rate) < (target_rate >> 5)) ++ return; + -+ rk628_clk_add_lookup(cru, clk, data->id); ++ step = rate0 - rate1; ++ delta = rate1 - target_rate; ++ length += delta / step; ++ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); ++ regmap_write(cru_ctx->grf, length_offset, val); ++ usleep_range(2000, 2100); ++ regmap_read(cru_ctx->grf, count_offset, &rate0); ++ ++ while (abs(rate0 - target_rate) >= (target_rate >> 5)) { ++ if (i++ > 20) ++ break; ++ if (rate0 > target_rate) ++ length++; ++ else ++ length--; ++ if (length <= length_ori) ++ break; ++ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); ++ regmap_write(cru_ctx->grf, length_offset, val); ++ usleep_range(2000, 2100); ++ regmap_read(cru_ctx->grf, count_offset, &rate0); + } +} + -+static void rk628_clk_register_plls(struct rk628_cru *cru) ++static void rockchip_rv1106_pvtpll_calibrate(struct work_struct *w) +{ + struct clk *clk; -+ unsigned int i; -+ -+ for (i = 0; i < ARRAY_SIZE(rk628_clk_plls); i++) { -+ const struct clk_pll_data *data = &rk628_clk_plls[i]; -+ -+ clk = devm_clk_regmap_register_pll(cru->dev, data->name, -+ data->parent_name, -+ cru->regmap, -+ data->reg, -+ data->pd_shift, -+ data->dsmpd_shift, -+ data->lock_shift, -+ data->flags); -+ if (IS_ERR(clk)) { -+ dev_err(cru->dev, "failed to register clock %s\n", -+ data->name); -+ continue; -+ } ++ unsigned long rate; + -+ rk628_clk_add_lookup(cru, clk, data->id); ++ clk = __clk_lookup("clk_pvtpll_0"); ++ if (clk) { ++ rate = clk_get_rate(clk); ++ _cru_pvtpll_calibrate(CRU_PVTPLL0_OSC_CNT_AVG, ++ CRU_PVTPLL0_CON0_H, rate / 1000000); + } -+} -+ -+struct rk628_rgu_data { -+ unsigned int id; -+ unsigned int reg; -+ unsigned int bit; -+}; + -+#define RSTGEN(_id, _reg, _bit) \ -+ { \ -+ .id = (_id), \ -+ .reg = (_reg), \ -+ .bit = (_bit), \ ++ clk = __clk_lookup("clk_pvtpll_1"); ++ if (clk) { ++ rate = clk_get_rate(clk); ++ _cru_pvtpll_calibrate(CRU_PVTPLL1_OSC_CNT_AVG, ++ CRU_PVTPLL1_CON0_H, rate / 1000000); + } + -+static const struct rk628_rgu_data rk628_rgu_data[] = { -+ RSTGEN(RGU_LOGIC, CRU_SOFTRST_CON00, 0), -+ RSTGEN(RGU_CRU, CRU_SOFTRST_CON00, 1), -+ RSTGEN(RGU_REGFILE, CRU_SOFTRST_CON00, 2), -+ RSTGEN(RGU_I2C2APB, CRU_SOFTRST_CON00, 3), -+ RSTGEN(RGU_EFUSE, CRU_SOFTRST_CON00, 5), -+ RSTGEN(RGU_ADAPTER, CRU_SOFTRST_CON00, 7), -+ RSTGEN(RGU_CLK_RX, CRU_SOFTRST_CON00, 11), -+ RSTGEN(RGU_BT1120DEC, CRU_SOFTRST_CON00, 12), -+ RSTGEN(RGU_VOP, CRU_SOFTRST_CON00, 13), -+ -+ RSTGEN(RGU_GPIO0, CRU_SOFTRST_CON01, 0), -+ RSTGEN(RGU_GPIO1, CRU_SOFTRST_CON01, 1), -+ RSTGEN(RGU_GPIO2, CRU_SOFTRST_CON01, 2), -+ RSTGEN(RGU_GPIO3, CRU_SOFTRST_CON01, 3), -+ RSTGEN(RGU_GPIO_DB0, CRU_SOFTRST_CON01, 4), -+ RSTGEN(RGU_GPIO_DB1, CRU_SOFTRST_CON01, 5), -+ RSTGEN(RGU_GPIO_DB2, CRU_SOFTRST_CON01, 6), -+ RSTGEN(RGU_GPIO_DB3, CRU_SOFTRST_CON01, 7), -+ -+ RSTGEN(RGU_RXPHY, CRU_SOFTRST_CON02, 0), -+ RSTGEN(RGU_HDMIRX, CRU_SOFTRST_CON02, 2), -+ RSTGEN(RGU_TXPHY_CON, CRU_SOFTRST_CON02, 3), -+ RSTGEN(RGU_HDMITX, CRU_SOFTRST_CON02, 4), -+ RSTGEN(RGU_GVIHOST, CRU_SOFTRST_CON02, 5), -+ RSTGEN(RGU_DSI0, CRU_SOFTRST_CON02, 6), -+ RSTGEN(RGU_DSI1, CRU_SOFTRST_CON02, 7), -+ RSTGEN(RGU_CSI, CRU_SOFTRST_CON02, 8), -+ RSTGEN(RGU_TXDATA, CRU_SOFTRST_CON02, 9), -+ RSTGEN(RGU_DECODER, CRU_SOFTRST_CON02, 10), -+ RSTGEN(RGU_ENCODER, CRU_SOFTRST_CON02, 11), -+ RSTGEN(RGU_HDMIRX_PON, CRU_SOFTRST_CON02, 12), -+ RSTGEN(RGU_TXBYTEHS, CRU_SOFTRST_CON02, 13), -+ RSTGEN(RGU_TXESC, CRU_SOFTRST_CON02, 14), -+}; -+ -+static int rk628_rgu_update(struct rk628_cru *cru, unsigned long id, int assert) -+{ -+ const struct rk628_rgu_data *data = &rk628_rgu_data[id]; -+ -+ return regmap_write(cru->regmap, data->reg, -+ BIT(data->bit + 16) | (assert << data->bit)); ++ clk = __clk_lookup("cpu_pvtpll"); ++ if (clk) { ++ rate = clk_get_rate(clk); ++ _grf_pvtpll_calibrate(CPU_PVTPLL_OSC_CNT_AVG, ++ CPU_PVTPLL_CON0_H, rate / 1000000); ++ } +} ++static DECLARE_DEFERRABLE_WORK(pvtpll_calibrate_work, rockchip_rv1106_pvtpll_calibrate); + -+static int rk628_rgu_assert(struct reset_controller_dev *rcdev, -+ unsigned long id) ++static void rockchip_rv1106_pvtpll_init(struct rockchip_clk_provider *ctx) +{ -+ struct rk628_cru *cru = reset_to_cru(rcdev); ++ /* set pvtpll ref clk mux */ ++ writel_relaxed(CPU_PVTPLL_PATH_CORE, ctx->reg_base + CPU_CLK_PATH_BASE); + -+ return rk628_rgu_update(cru, id, 1); -+} ++ regmap_write(ctx->grf, CPU_PVTPLL_CON0_H, HIWORD_UPDATE(0x7, PVTPLL_LENGTH_SEL_MASK, ++ PVTPLL_LENGTH_SEL_SHIFT)); ++ regmap_write(ctx->grf, CPU_PVTPLL_CON0_L, HIWORD_UPDATE(0x1, PVTPLL_RING_SEL_MASK, ++ PVTPLL_RING_SEL_SHIFT)); ++ regmap_write(ctx->grf, CPU_PVTPLL_CON0_L, HIWORD_UPDATE(0x3, PVTPLL_EN_MASK, ++ PVTPLL_EN_SHIFT)); + -+static int rk628_rgu_deassert(struct reset_controller_dev *rcdev, -+ unsigned long id) -+{ -+ struct rk628_cru *cru = reset_to_cru(rcdev); ++ writel_relaxed(0x007f0000, ctx->reg_base + CRU_PVTPLL0_CON0_H); ++ writel_relaxed(0xffff0018, ctx->reg_base + CRU_PVTPLL0_CON1_L); ++ writel_relaxed(0xffff0004, ctx->reg_base + CRU_PVTPLL0_CON2_H); ++ writel_relaxed(0x00030003, ctx->reg_base + CRU_PVTPLL0_CON0_L); + -+ return rk628_rgu_update(cru, id, 0); -+} ++ writel_relaxed(0x007f0000, ctx->reg_base + CRU_PVTPLL1_CON0_H); ++ writel_relaxed(0xffff0018, ctx->reg_base + CRU_PVTPLL1_CON1_L); ++ writel_relaxed(0xffff0004, ctx->reg_base + CRU_PVTPLL1_CON2_H); ++ writel_relaxed(0x00030003, ctx->reg_base + CRU_PVTPLL1_CON0_L); + -+static struct reset_control_ops rk628_rgu_ops = { -+ .assert = rk628_rgu_assert, -+ .deassert = rk628_rgu_deassert, -+}; ++ schedule_delayed_work(&pvtpll_calibrate_work, msecs_to_jiffies(3000)); ++} + -+static int rk628_reset_controller_register(struct rk628_cru *cru) ++static int rv1106_clk_panic(struct notifier_block *this, ++ unsigned long ev, void *ptr) +{ -+ struct device *dev = cru->dev; -+ -+ cru->rcdev.owner = THIS_MODULE; -+ cru->rcdev.nr_resets = ARRAY_SIZE(rk628_rgu_data); -+ cru->rcdev.of_node = dev->of_node; -+ cru->rcdev.ops = &rk628_rgu_ops; -+ -+ return devm_reset_controller_register(dev, &cru->rcdev); ++ rv1106_dump_cru(); ++ return NOTIFY_DONE; +} + -+static const struct regmap_range rk628_cru_readable_ranges[] = { -+ regmap_reg_range(CRU_CPLL_CON0, CRU_CPLL_CON4), -+ regmap_reg_range(CRU_GPLL_CON0, CRU_GPLL_CON4), -+ regmap_reg_range(CRU_MODE_CON, CRU_MODE_CON), -+ regmap_reg_range(CRU_CLKSEL_CON00, CRU_CLKSEL_CON21), -+ regmap_reg_range(CRU_GATE_CON00, CRU_GATE_CON05), -+ regmap_reg_range(CRU_SOFTRST_CON00, CRU_SOFTRST_CON04), -+}; -+ -+static const struct regmap_access_table rk628_cru_readable_table = { -+ .yes_ranges = rk628_cru_readable_ranges, -+ .n_yes_ranges = ARRAY_SIZE(rk628_cru_readable_ranges), -+}; -+ -+static const struct regmap_config rk628_cru_regmap_config = { -+ .name = "cru", -+ .reg_bits = 32, -+ .val_bits = 32, -+ .reg_stride = 4, -+ .max_register = CRU_MAX_REGISTER, -+ .reg_format_endian = REGMAP_ENDIAN_LITTLE, -+ .val_format_endian = REGMAP_ENDIAN_LITTLE, -+ .rd_table = &rk628_cru_readable_table, ++static struct notifier_block rv1106_clk_panic_block = { ++ .notifier_call = rv1106_clk_panic, +}; + -+static void rk628_cru_init(struct rk628_cru *cru) ++static void __init rv1106_clk_init(struct device_node *np) +{ -+ u32 val = 0; -+ u8 mcu_mode; ++ struct rockchip_clk_provider *ctx; ++ void __iomem *reg_base; ++ struct clk **cru_clks; + -+ regmap_read(cru->parent->grf, GRF_SYSTEM_STATUS0, &val); -+ mcu_mode = (val & I2C_ONLY_FLAG) ? 0 : 1; -+ if (mcu_mode) ++ reg_base = of_iomap(np, 0); ++ if (!reg_base) { ++ pr_err("%s: could not map cru region\n", __func__); + return; -+ -+ /* clock switch and first set gpll almost 99MHz */ -+ regmap_write(cru->regmap, CRU_GPLL_CON0, 0xffff701d); -+ usleep_range(1000, 1100); -+ /* set clk_gpll_mux from gpll */ -+ regmap_write(cru->regmap, CRU_MODE_CON, 0xffff0004); -+ usleep_range(1000, 1100); -+ /* set pclk_logic from clk_gpll_mux and set pclk div 4 */ -+ regmap_write(cru->regmap, CRU_CLKSEL_CON00, 0xff0080); -+ regmap_write(cru->regmap, CRU_CLKSEL_CON00, 0xff0083); -+ /* set cpll almost 400MHz */ -+ regmap_write(cru->regmap, CRU_CPLL_CON0, 0xffff3063); -+ usleep_range(1000, 1100); -+ /* set clk_cpll_mux from clk_cpll */ -+ regmap_write(cru->regmap, CRU_MODE_CON, 0xffff0005); -+ /* set pclk use cpll, now div is 4 */ -+ regmap_write(cru->regmap, CRU_CLKSEL_CON00, 0xff0003); -+ /* set pclk use cpll, now div is 12 */ -+ regmap_write(cru->regmap, CRU_CLKSEL_CON00, 0xff000b); -+ /* gpll 983.04MHz */ -+ regmap_write(cru->regmap, CRU_GPLL_CON0, 0xffff1028); -+ usleep_range(1000, 1100); -+ /* set pclk use gpll, nuw div is 0xb */ -+ regmap_write(cru->regmap, CRU_CLKSEL_CON00, 0xff008b); -+ /* set cpll 1188MHz */ -+ regmap_write(cru->regmap, CRU_CPLL_CON0, 0xffff1063); -+ usleep_range(1000, 1100); -+ /* set pclk use cpll, and set pclk 99MHz */ -+ regmap_write(cru->regmap, CRU_CLKSEL_CON00, 0xff000b); -+} -+ -+static int rk628_cru_probe(struct platform_device *pdev) -+{ -+ struct rk628 *rk628 = dev_get_drvdata(pdev->dev.parent); -+ struct device *dev = &pdev->dev; -+ struct rk628_cru *cru; -+ struct clk **clk_table; -+ unsigned int i; -+ int ret; -+ -+ cru = devm_kzalloc(dev, sizeof(*cru), GFP_KERNEL); -+ if (!cru) -+ return -ENOMEM; -+ -+ cru->dev = dev; -+ cru->parent = rk628; -+ platform_set_drvdata(pdev, cru); -+ -+ cru->regmap = devm_regmap_init_i2c(rk628->client, -+ &rk628_cru_regmap_config); -+ if (IS_ERR(cru->regmap)) { -+ ret = PTR_ERR(cru->regmap); -+ dev_err(dev, "failed to allocate register map: %d\n", ret); -+ return ret; + } + -+ rk628_cru_init(cru); -+ -+ clk_table = devm_kcalloc(dev, CGU_NR_CLKS, sizeof(struct clk *), -+ GFP_KERNEL); -+ if (!clk_table) -+ return -ENOMEM; ++ rv1106_cru_base = reg_base; + -+ for (i = 0; i < CGU_NR_CLKS; i++) -+ clk_table[i] = ERR_PTR(-ENOENT); ++ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); ++ if (IS_ERR(ctx)) { ++ pr_err("%s: rockchip clk init failed\n", __func__); ++ iounmap(reg_base); ++ return; ++ } ++ cru_ctx = ctx; + -+ cru->clk_data.clks = clk_table; -+ cru->clk_data.clk_num = CGU_NR_CLKS; ++ rockchip_rv1106_pvtpll_init(ctx); + -+ rk628_clk_register_plls(cru); -+ rk628_clk_register_muxes(cru); -+ rk628_clk_register_gates(cru); -+ rk628_clk_register_composites(cru); -+ rk628_reset_controller_register(cru); ++ cru_clks = ctx->clk_data.clks; + -+ clk_prepare_enable(clk_table[CGU_PCLK_LOGIC]); ++ rockchip_clk_register_plls(ctx, rv1106_pll_clks, ++ ARRAY_SIZE(rv1106_pll_clks), ++ RV1106_GRF_SOC_STATUS0); + -+ return of_clk_add_provider(dev->of_node, of_clk_src_onecell_get, -+ &cru->clk_data); -+} ++ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", ++ 3, cru_clks[PLL_APLL], cru_clks[PLL_GPLL], ++ &rv1106_cpuclk_data, rv1106_cpuclk_rates, ++ ARRAY_SIZE(rv1106_cpuclk_rates)); + -+static int rk628_cru_remove(struct platform_device *pdev) -+{ -+ of_clk_del_provider(pdev->dev.of_node); ++ rockchip_clk_register_branches(ctx, rv1106_clk_branches, ++ ARRAY_SIZE(rv1106_clk_branches)); + -+ return 0; -+} ++ rockchip_clk_register_branches(grf_ctx, rv1106_grf_clk_branches, ++ ARRAY_SIZE(rv1106_grf_clk_branches)); + -+static const struct of_device_id rk628_cru_of_match[] = { -+ { .compatible = "rockchip,rk628-cru", }, -+ {}, -+}; -+MODULE_DEVICE_TABLE(of, rk628_cru_of_match); ++ rockchip_register_softrst(np, 31745, reg_base + RV1106_PMUSOFTRST_CON(0), ++ ROCKCHIP_SOFTRST_HIWORD_MASK); + -+static struct platform_driver rk628_cru_driver = { -+ .driver = { -+ .name = "rk628-cru", -+ .of_match_table = of_match_ptr(rk628_cru_of_match), -+ }, -+ .probe = rk628_cru_probe, -+ .remove = rk628_cru_remove, -+}; -+module_platform_driver(rk628_cru_driver); ++ rockchip_register_restart_notifier(ctx, RV1106_GLB_SRST_FST, NULL); + -+MODULE_AUTHOR("Wyon Bi "); -+MODULE_DESCRIPTION("Rockchip RK628 CRU driver"); -+MODULE_LICENSE("GPL v2"); -diff --git a/drivers/clk/rockchip-oh/softrst.c b/drivers/clk/rockchip-oh/softrst.c -new file mode 100644 -index 000000000..5d0726674 ---- /dev/null -+++ b/drivers/clk/rockchip-oh/softrst.c -@@ -0,0 +1,110 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2014 MundoReader S.L. -+ * Author: Heiko Stuebner -+ */ ++ rockchip_clk_of_add_provider(np, ctx); + -+#include -+#include -+#include -+#include -+#include "clk.h" ++ atomic_notifier_chain_register(&panic_notifier_list, ++ &rv1106_clk_panic_block); ++} + -+struct rockchip_softrst { -+ struct reset_controller_dev rcdev; -+ void __iomem *reg_base; -+ int num_regs; -+ int num_per_reg; -+ u8 flags; -+ spinlock_t lock; -+}; ++CLK_OF_DECLARE(rv1106_cru, "rockchip,rv1106-cru", rv1106_clk_init); + -+static int rockchip_softrst_assert(struct reset_controller_dev *rcdev, -+ unsigned long id) ++static void __init rv1106_grf_clk_init(struct device_node *np) +{ -+ struct rockchip_softrst *softrst = container_of(rcdev, -+ struct rockchip_softrst, -+ rcdev); -+ int bank = id / softrst->num_per_reg; -+ int offset = id % softrst->num_per_reg; -+ -+ if (softrst->flags & ROCKCHIP_SOFTRST_HIWORD_MASK) { -+ writel(BIT(offset) | (BIT(offset) << 16), -+ softrst->reg_base + (bank * 4)); -+ } else { -+ unsigned long flags; -+ u32 reg; -+ -+ spin_lock_irqsave(&softrst->lock, flags); -+ -+ reg = readl(softrst->reg_base + (bank * 4)); -+ writel(reg | BIT(offset), softrst->reg_base + (bank * 4)); ++ struct rockchip_clk_provider *ctx; ++ void __iomem *reg_base; + -+ spin_unlock_irqrestore(&softrst->lock, flags); ++ reg_base = of_iomap(of_get_parent(np), 0); ++ if (!reg_base) { ++ pr_err("%s: could not map cru grf region\n", __func__); ++ return; + } + -+ return 0; -+} -+ -+static int rockchip_softrst_deassert(struct reset_controller_dev *rcdev, -+ unsigned long id) -+{ -+ struct rockchip_softrst *softrst = container_of(rcdev, -+ struct rockchip_softrst, -+ rcdev); -+ int bank = id / softrst->num_per_reg; -+ int offset = id % softrst->num_per_reg; -+ -+ if (softrst->flags & ROCKCHIP_SOFTRST_HIWORD_MASK) { -+ writel((BIT(offset) << 16), softrst->reg_base + (bank * 4)); -+ } else { -+ unsigned long flags; -+ u32 reg; -+ -+ spin_lock_irqsave(&softrst->lock, flags); -+ -+ reg = readl(softrst->reg_base + (bank * 4)); -+ writel(reg & ~BIT(offset), softrst->reg_base + (bank * 4)); -+ -+ spin_unlock_irqrestore(&softrst->lock, flags); ++ ctx = rockchip_clk_init(np, reg_base, CLK_NR_GRF_CLKS); ++ if (IS_ERR(ctx)) { ++ pr_err("%s: rockchip grf clk init failed\n", __func__); ++ return; + } ++ grf_ctx = ctx; + -+ return 0; ++ rockchip_clk_of_add_provider(np, ctx); +} ++CLK_OF_DECLARE(rv1106_grf_cru, "rockchip,rv1106-grf-cru", rv1106_grf_clk_init); + -+static const struct reset_control_ops rockchip_softrst_ops = { -+ .assert = rockchip_softrst_assert, -+ .deassert = rockchip_softrst_deassert, ++#ifdef MODULE ++struct clk_rv1106_inits { ++ void (*inits)(struct device_node *np); +}; + -+void rockchip_register_softrst(struct device_node *np, -+ unsigned int num_regs, -+ void __iomem *base, u8 flags) -+{ -+ struct rockchip_softrst *softrst; -+ int ret; -+ -+ softrst = kzalloc(sizeof(*softrst), GFP_KERNEL); -+ if (!softrst) -+ return; -+ -+ spin_lock_init(&softrst->lock); -+ -+ softrst->reg_base = base; -+ softrst->flags = flags; -+ softrst->num_regs = num_regs; -+ softrst->num_per_reg = (flags & ROCKCHIP_SOFTRST_HIWORD_MASK) ? 16 -+ : 32; -+ -+ softrst->rcdev.owner = THIS_MODULE; -+ softrst->rcdev.nr_resets = num_regs * softrst->num_per_reg; -+ softrst->rcdev.ops = &rockchip_softrst_ops; -+ softrst->rcdev.of_node = np; -+ ret = reset_controller_register(&softrst->rcdev); -+ if (ret) { -+ pr_err("%s: could not register reset controller, %d\n", -+ __func__, ret); -+ kfree(softrst); -+ } ++static const struct clk_rv1106_inits clk_rv1106_init = { ++ .inits = rv1106_clk_init, +}; -+EXPORT_SYMBOL_GPL(rockchip_register_softrst); -diff --git a/drivers/clk/rockchip/clk-dclk-divider.c b/drivers/clk/rockchip/clk-dclk-divider.c -new file mode 100644 -index 000000000..88cf7ab82 ---- /dev/null -+++ b/drivers/clk/rockchip/clk-dclk-divider.c -@@ -0,0 +1,168 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (c) 2019 Fuzhou Rockchip Electronics Co., Ltd -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include "clk.h" -+ -+#define div_mask(width) ((1 << (width)) - 1) -+ -+static unsigned long clk_dclk_recalc_rate(struct clk_hw *hw, -+ unsigned long parent_rate) -+{ -+ struct clk_divider *divider = to_clk_divider(hw); -+ unsigned int val; -+ -+ val = clk_readl(divider->reg) >> divider->shift; -+ val &= div_mask(divider->width); -+ -+ return DIV_ROUND_UP_ULL(((u64)parent_rate), val + 1); -+} + -+static long clk_dclk_round_rate(struct clk_hw *hw, unsigned long rate, -+ unsigned long *prate) -+{ -+ struct clk_divider *divider = to_clk_divider(hw); -+ int div, maxdiv = div_mask(divider->width) + 1; ++static const struct clk_rv1106_inits clk_rv1106_grf_init = { ++ .inits = rv1106_grf_clk_init, ++}; + -+ div = DIV_ROUND_UP_ULL(divider->max_prate, rate); -+ if (div % 2) -+ div = __rounddown_pow_of_two(div); -+ div = div > maxdiv ? maxdiv : div; -+ *prate = div * rate; -+ return rate; -+} ++static const struct of_device_id clk_rv1106_match_table[] = { ++ { ++ .compatible = "rockchip,rv1106-cru", ++ .data = &clk_rv1106_init, ++ }, { ++ .compatible = "rockchip,rv1106-grf-cru", ++ .data = &clk_rv1106_grf_init, ++ }, ++ { } ++}; ++MODULE_DEVICE_TABLE(of, clk_rv1106_match_table); + -+static int clk_dclk_set_rate(struct clk_hw *hw, unsigned long rate, -+ unsigned long parent_rate) ++static int __init clk_rv1106_probe(struct platform_device *pdev) +{ -+ struct clk_divider *divider = to_clk_divider(hw); -+ unsigned int value; -+ unsigned long flags = 0; -+ u32 val; -+ -+ value = divider_get_val(rate, parent_rate, divider->table, -+ divider->width, divider->flags); -+ -+ if (divider->lock) -+ spin_lock_irqsave(divider->lock, flags); -+ else -+ __acquire(divider->lock); ++ struct device_node *np = pdev->dev.of_node; ++ const struct of_device_id *match; ++ const struct clk_rv1106_inits *init_data; + -+ if (divider->flags & CLK_DIVIDER_HIWORD_MASK) { -+ val = div_mask(divider->width) << (divider->shift + 16); -+ } else { -+ val = clk_readl(divider->reg); -+ val &= ~(div_mask(divider->width) << divider->shift); -+ } -+ val |= value << divider->shift; -+ clk_writel(val, divider->reg); ++ match = of_match_device(clk_rv1106_match_table, &pdev->dev); ++ if (!match || !match->data) ++ return -EINVAL; + -+ if (divider->lock) -+ spin_unlock_irqrestore(divider->lock, flags); -+ else -+ __release(divider->lock); ++ init_data = match->data; ++ if (init_data->inits) ++ init_data->inits(np); + + return 0; +} + -+const struct clk_ops clk_dclk_divider_ops = { -+ .recalc_rate = clk_dclk_recalc_rate, -+ .round_rate = clk_dclk_round_rate, -+ .set_rate = clk_dclk_set_rate, ++static struct platform_driver clk_rv1106_driver = { ++ .driver = { ++ .name = "clk-rv1106", ++ .of_match_table = clk_rv1106_match_table, ++ }, +}; -+EXPORT_SYMBOL_GPL(clk_dclk_divider_ops); -+ -+/** -+ * Register a clock branch. -+ * Most clock branches have a form like -+ * -+ * src1 --|--\ -+ * |M |--[GATE]-[DIV]- -+ * src2 --|--/ -+ * -+ * sometimes without one of those components. -+ */ -+struct clk *rockchip_clk_register_dclk_branch(const char *name, -+ const char *const *parent_names, -+ u8 num_parents, -+ void __iomem *base, -+ int muxdiv_offset, u8 mux_shift, -+ u8 mux_width, u8 mux_flags, -+ int div_offset, u8 div_shift, -+ u8 div_width, u8 div_flags, -+ struct clk_div_table *div_table, -+ int gate_offset, -+ u8 gate_shift, u8 gate_flags, -+ unsigned long flags, -+ unsigned long max_prate, -+ spinlock_t *lock) -+{ -+ struct clk *clk; -+ struct clk_mux *mux = NULL; -+ struct clk_gate *gate = NULL; -+ struct clk_divider *div = NULL; -+ const struct clk_ops *mux_ops = NULL, *div_ops = NULL, -+ *gate_ops = NULL; -+ -+ if (num_parents > 1) { -+ mux = kzalloc(sizeof(*mux), GFP_KERNEL); -+ if (!mux) -+ return ERR_PTR(-ENOMEM); -+ -+ mux->reg = base + muxdiv_offset; -+ mux->shift = mux_shift; -+ mux->mask = BIT(mux_width) - 1; -+ mux->flags = mux_flags; -+ mux->lock = lock; -+ mux_ops = (mux_flags & CLK_MUX_READ_ONLY) ? &clk_mux_ro_ops -+ : &clk_mux_ops; -+ } -+ -+ if (gate_offset >= 0) { -+ gate = kzalloc(sizeof(*gate), GFP_KERNEL); -+ if (!gate) -+ goto err_gate; -+ -+ gate->flags = gate_flags; -+ gate->reg = base + gate_offset; -+ gate->bit_idx = gate_shift; -+ gate->lock = lock; -+ gate_ops = &clk_gate_ops; -+ } -+ -+ if (div_width > 0) { -+ div = kzalloc(sizeof(*div), GFP_KERNEL); -+ if (!div) -+ goto err_div; -+ -+ div->flags = div_flags; -+ if (div_offset) -+ div->reg = base + div_offset; -+ else -+ div->reg = base + muxdiv_offset; -+ div->shift = div_shift; -+ div->width = div_width; -+ div->lock = lock; -+ div->max_prate = max_prate; -+ div_ops = &clk_dclk_divider_ops; -+ } -+ -+ clk = clk_register_composite(NULL, name, parent_names, num_parents, -+ mux ? &mux->hw : NULL, mux_ops, -+ div ? &div->hw : NULL, div_ops, -+ gate ? &gate->hw : NULL, gate_ops, -+ flags); ++builtin_platform_driver_probe(clk_rv1106_driver, clk_rv1106_probe); + -+ return clk; -+err_div: -+ kfree(gate); -+err_gate: -+ kfree(mux); -+ return ERR_PTR(-ENOMEM); -+} -diff --git a/drivers/clk/rockchip/clk-link.c b/drivers/clk/rockchip/clk-link.c ++MODULE_DESCRIPTION("Rockchip RV1106 Clock Driver"); ++MODULE_LICENSE("GPL"); ++#endif /* MODULE */ +diff --git a/drivers/clk/rockchip-oh/clk-rv1108.c b/drivers/clk/rockchip-oh/clk-rv1108.c new file mode 100644 -index 000000000..78ff9b53c +index 000000000..0a93b9333 --- /dev/null -+++ b/drivers/clk/rockchip/clk-link.c -@@ -0,0 +1,244 @@ -+// SPDX-License-Identifier: GPL-2.0 ++++ b/drivers/clk/rockchip-oh/clk-rv1108.c +@@ -0,0 +1,855 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later +/* -+ * Copyright (c) 2021 Rockchip Electronics Co., Ltd ++ * Copyright (c) 2016 Rockchip Electronics Co. Ltd. ++ * Author: Shawn Lin ++ * Andy Yan + */ + +#include +#include +#include ++#include +#include -+#include -+#include -+#include -+#include ++#include ++#include ++#include ++#include "clk.h" + -+struct rockchip_link_info { -+ u32 shift; -+ const char *name; -+ const char *pname; -+}; ++#define RV1108_GRF_SOC_STATUS0 0x480 + -+struct rockchip_link { -+ int num; -+ const struct rockchip_link_info *info; ++enum rv1108_plls { ++ apll, dpll, gpll, +}; + -+struct rockchip_link_clk { -+ void __iomem *base; -+ struct clk_gate *gate; -+ spinlock_t lock; -+ u32 shift; -+ u32 flag; -+ const char *name; -+ const char *pname; -+ const char *link_name; -+ const struct rockchip_link *link; ++static struct rockchip_pll_rate_table rv1108_pll_rates[] = { ++ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ ++ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1488000000, 1, 62, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1464000000, 1, 61, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1440000000, 1, 60, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1392000000, 1, 58, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1368000000, 1, 57, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1344000000, 1, 56, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1320000000, 1, 55, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1272000000, 1, 53, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1248000000, 1, 52, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1188000000, 2, 99, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1100000000, 12, 550, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1000000000, 6, 500, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 984000000, 1, 82, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 960000000, 1, 80, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 936000000, 1, 78, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 912000000, 1, 76, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 900000000, 4, 300, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 888000000, 1, 74, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 864000000, 1, 72, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 840000000, 1, 70, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 816000000, 1, 68, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 800000000, 6, 400, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 700000000, 6, 350, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 696000000, 1, 58, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 600000000, 1, 75, 3, 1, 1, 0), ++ RK3036_PLL_RATE( 594000000, 2, 99, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 504000000, 1, 63, 3, 1, 1, 0), ++ RK3036_PLL_RATE( 500000000, 6, 250, 2, 1, 1, 0), ++ RK3036_PLL_RATE( 408000000, 1, 68, 2, 2, 1, 0), ++ RK3036_PLL_RATE( 312000000, 1, 52, 2, 2, 1, 0), ++ RK3036_PLL_RATE( 216000000, 1, 72, 4, 2, 1, 0), ++ RK3036_PLL_RATE( 96000000, 1, 64, 4, 4, 1, 0), ++ { /* sentinel */ }, +}; + -+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) -+ -+#define GATE_LINK(_name, _pname, _shift) \ -+{ \ -+ .name = _name, \ -+ .pname = _pname, \ -+ .shift = (_shift), \ -+} -+ -+static int register_clocks(struct rockchip_link_clk *priv, struct device *dev) -+{ -+ struct clk_gate *gate; -+ struct clk_init_data init = {}; -+ struct clk *clk; -+ -+ gate = devm_kzalloc(dev, sizeof(struct clk_gate), GFP_KERNEL); -+ if (!gate) -+ return -ENOMEM; -+ -+ init.name = priv->name; -+ init.ops = &clk_gate_ops; -+ init.flags |= CLK_SET_RATE_PARENT; -+ init.parent_names = &priv->pname; -+ init.num_parents = 1; -+ -+ /* struct clk_gate assignments */ -+ gate->reg = priv->base; -+ gate->bit_idx = priv->shift; -+ gate->flags = GFLAGS; -+ gate->lock = &priv->lock; -+ gate->hw.init = &init; ++#define RV1108_DIV_CORE_MASK 0xf ++#define RV1108_DIV_CORE_SHIFT 4 + -+ clk = devm_clk_register(dev, &gate->hw); -+ if (IS_ERR(clk)) -+ return -EINVAL; ++#define RV1108_CLKSEL0(_core_peri_div) \ ++ { \ ++ .reg = RV1108_CLKSEL_CON(1), \ ++ .val = HIWORD_UPDATE(_core_peri_div, RV1108_DIV_CORE_MASK,\ ++ RV1108_DIV_CORE_SHIFT) \ ++ } + -+ return of_clk_add_provider(dev->of_node, of_clk_src_simple_get, clk); -+} ++#define RV1108_CPUCLK_RATE(_prate, _core_peri_div) \ ++ { \ ++ .prate = _prate, \ ++ .divs = { \ ++ RV1108_CLKSEL0(_core_peri_div), \ ++ }, \ ++ } + -+static const struct rockchip_link_info rk3562_clk_gate_link_info[] = { -+ GATE_LINK("aclk_rga_jdec", "aclk_rga_pre", 3), -+ GATE_LINK("aclk_vdpu", "aclk_vdpu_pre", 5), -+ GATE_LINK("aclk_vepu", "aclk_vepu_pre", 3), -+ GATE_LINK("aclk_vi_isp", "aclk_vi", 3), -+ GATE_LINK("aclk_vo", "aclk_vo_pre", 3), -+ GATE_LINK("hclk_vepu", "hclk_vepu_pre", 4), ++static struct rockchip_cpuclk_rate_table rv1108_cpuclk_rates[] __initdata = { ++ RV1108_CPUCLK_RATE(1608000000, 7), ++ RV1108_CPUCLK_RATE(1512000000, 7), ++ RV1108_CPUCLK_RATE(1488000000, 5), ++ RV1108_CPUCLK_RATE(1416000000, 5), ++ RV1108_CPUCLK_RATE(1392000000, 5), ++ RV1108_CPUCLK_RATE(1296000000, 5), ++ RV1108_CPUCLK_RATE(1200000000, 5), ++ RV1108_CPUCLK_RATE(1104000000, 5), ++ RV1108_CPUCLK_RATE(1008000000, 5), ++ RV1108_CPUCLK_RATE(912000000, 5), ++ RV1108_CPUCLK_RATE(816000000, 3), ++ RV1108_CPUCLK_RATE(696000000, 3), ++ RV1108_CPUCLK_RATE(600000000, 3), ++ RV1108_CPUCLK_RATE(500000000, 3), ++ RV1108_CPUCLK_RATE(408000000, 1), ++ RV1108_CPUCLK_RATE(312000000, 1), ++ RV1108_CPUCLK_RATE(216000000, 1), ++ RV1108_CPUCLK_RATE(96000000, 1), +}; + -+static const struct rockchip_link rk3562_clk_gate_link = { -+ .num = ARRAY_SIZE(rk3562_clk_gate_link_info), -+ .info = rk3562_clk_gate_link_info, ++static const struct rockchip_cpuclk_reg_data rv1108_cpuclk_data = { ++ .core_reg[0] = RV1108_CLKSEL_CON(0), ++ .div_core_shift[0] = 0, ++ .div_core_mask[0] = 0x1f, ++ .num_cores = 1, ++ .mux_core_alt = 1, ++ .mux_core_main = 0, ++ .mux_core_shift = 8, ++ .mux_core_mask = 0x3, +}; + -+static const struct rockchip_link_info rk3588_clk_gate_link_info[] = { -+ GATE_LINK("aclk_isp1_pre", "aclk_isp1_root", 6), -+ GATE_LINK("hclk_isp1_pre", "hclk_isp1_root", 8), -+ GATE_LINK("hclk_nvm", "hclk_nvm_root", 2), -+ GATE_LINK("aclk_usb", "aclk_usb_root", 2), -+ GATE_LINK("hclk_usb", "hclk_usb_root", 3), -+ GATE_LINK("aclk_jpeg_decoder_pre", "aclk_jpeg_decoder_root", 7), -+ GATE_LINK("aclk_vdpu_low_pre", "aclk_vdpu_low_root", 5), -+ GATE_LINK("aclk_rkvenc1_pre", "aclk_rkvenc1_root", 3), -+ GATE_LINK("hclk_rkvenc1_pre", "hclk_rkvenc1_root", 2), -+ GATE_LINK("hclk_rkvdec0_pre", "hclk_rkvdec0_root", 5), -+ GATE_LINK("aclk_rkvdec0_pre", "aclk_rkvdec0_root", 6), -+ GATE_LINK("hclk_rkvdec1_pre", "hclk_rkvdec1_root", 4), -+ GATE_LINK("aclk_rkvdec1_pre", "aclk_rkvdec1_root", 5), -+ GATE_LINK("aclk_hdcp0_pre", "aclk_vo0_root", 9), -+ GATE_LINK("hclk_vo0", "hclk_vo0_root", 5), -+ GATE_LINK("aclk_hdcp1_pre", "aclk_hdcp1_root", 6), -+ GATE_LINK("hclk_vo1", "hclk_vo1_root", 9), -+ GATE_LINK("aclk_av1_pre", "aclk_av1_root", 1), -+ GATE_LINK("pclk_av1_pre", "pclk_av1_root", 4), -+ GATE_LINK("hclk_sdio_pre", "hclk_sdio_root", 1), -+ GATE_LINK("pclk_vo0_grf", "pclk_vo0_root", 10), -+ GATE_LINK("pclk_vo1_grf", "pclk_vo1_root", 12), -+}; ++PNAME(mux_pll_p) = { "xin24m", "xin24m"}; ++PNAME(mux_ddrphy_p) = { "dpll_ddr", "gpll_ddr", "apll_ddr" }; ++PNAME(mux_usb480m_pre_p) = { "usbphy", "xin24m" }; ++PNAME(mux_hdmiphy_phy_p) = { "hdmiphy", "xin24m" }; ++PNAME(mux_dclk_hdmiphy_pre_p) = { "dclk_hdmiphy_src_gpll", "dclk_hdmiphy_src_dpll" }; ++PNAME(mux_pll_src_4plls_p) = { "dpll", "gpll", "hdmiphy", "usb480m" }; ++PNAME(mux_pll_src_2plls_p) = { "dpll", "gpll" }; ++PNAME(mux_pll_src_apll_gpll_p) = { "apll", "gpll" }; ++PNAME(mux_aclk_peri_src_p) = { "aclk_peri_src_gpll", "aclk_peri_src_dpll" }; ++PNAME(mux_aclk_bus_src_p) = { "aclk_bus_src_gpll", "aclk_bus_src_apll", "aclk_bus_src_dpll" }; ++PNAME(mux_mmc_src_p) = { "dpll", "gpll", "xin24m", "usb480m" }; ++PNAME(mux_pll_src_dpll_gpll_usb480m_p) = { "dpll", "gpll", "usb480m" }; ++PNAME(mux_uart0_p) = { "uart0_src", "uart0_frac", "xin24m" }; ++PNAME(mux_uart1_p) = { "uart1_src", "uart1_frac", "xin24m" }; ++PNAME(mux_uart2_p) = { "uart2_src", "uart2_frac", "xin24m" }; ++PNAME(mux_sclk_mac_p) = { "sclk_mac_pre", "ext_gmac" }; ++PNAME(mux_i2s0_pre_p) = { "i2s0_src", "i2s0_frac", "ext_i2s", "xin12m" }; ++PNAME(mux_i2s_out_p) = { "i2s0_pre", "xin12m" }; ++PNAME(mux_i2s1_p) = { "i2s1_src", "i2s1_frac", "dummy", "xin12m" }; ++PNAME(mux_i2s2_p) = { "i2s2_src", "i2s2_frac", "dummy", "xin12m" }; ++PNAME(mux_wifi_src_p) = { "gpll", "xin24m" }; ++PNAME(mux_cifout_src_p) = { "hdmiphy", "gpll" }; ++PNAME(mux_cifout_p) = { "sclk_cifout_src", "xin24m" }; ++PNAME(mux_sclk_cif0_src_p) = { "pclk_vip", "clk_cif0_chn_out", "pclkin_cvbs2cif" }; ++PNAME(mux_sclk_cif1_src_p) = { "pclk_vip", "clk_cif1_chn_out", "pclkin_cvbs2cif" }; ++PNAME(mux_sclk_cif2_src_p) = { "pclk_vip", "clk_cif2_chn_out", "pclkin_cvbs2cif" }; ++PNAME(mux_sclk_cif3_src_p) = { "pclk_vip", "clk_cif3_chn_out", "pclkin_cvbs2cif" }; ++PNAME(mux_dsp_src_p) = { "dpll", "gpll", "apll", "usb480m" }; ++PNAME(mux_dclk_hdmiphy_p) = { "hdmiphy", "xin24m" }; ++PNAME(mux_dclk_vop_p) = { "dclk_hdmiphy", "dclk_vop_src" }; ++PNAME(mux_hdmi_cec_src_p) = { "dpll", "gpll", "xin24m" }; ++PNAME(mux_cvbs_src_p) = { "apll", "io_cvbs_clkin", "hdmiphy", "gpll" }; + -+static const struct rockchip_link rk3588_clk_gate_link = { -+ .num = ARRAY_SIZE(rk3588_clk_gate_link_info), -+ .info = rk3588_clk_gate_link_info, ++static struct rockchip_pll_clock rv1108_pll_clks[] __initdata = { ++ [apll] = PLL(pll_rk3399, PLL_APLL, "apll", mux_pll_p, 0, RV1108_PLL_CON(0), ++ RV1108_PLL_CON(3), 8, 0, 0, rv1108_pll_rates), ++ [dpll] = PLL(pll_rk3399, PLL_DPLL, "dpll", mux_pll_p, 0, RV1108_PLL_CON(8), ++ RV1108_PLL_CON(11), 8, 1, 0, NULL), ++ [gpll] = PLL(pll_rk3399, PLL_GPLL, "gpll", mux_pll_p, 0, RV1108_PLL_CON(16), ++ RV1108_PLL_CON(19), 8, 2, 0, rv1108_pll_rates), +}; + -+static const struct of_device_id rockchip_clk_link_of_match[] = { -+ { -+ .compatible = "rockchip,rk3562-clock-gate-link", -+ .data = (void *)&rk3562_clk_gate_link, -+ }, -+ { -+ .compatible = "rockchip,rk3588-clock-gate-link", -+ .data = (void *)&rk3588_clk_gate_link, -+ }, -+ {} -+}; -+MODULE_DEVICE_TABLE(of, rockchip_clk_link_of_match); ++#define MFLAGS CLK_MUX_HIWORD_MASK ++#define DFLAGS CLK_DIVIDER_HIWORD_MASK ++#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) ++#define IFLAGS ROCKCHIP_INVERTER_HIWORD_MASK + -+static const struct rockchip_link_info * -+rockchip_get_link_infos(const struct rockchip_link *link, const char *name) -+{ -+ const struct rockchip_link_info *info = link->info; -+ int i = 0; ++static struct rockchip_clk_branch rv1108_uart0_fracmux __initdata = ++ MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT, ++ RV1108_CLKSEL_CON(13), 8, 2, MFLAGS); + -+ for (i = 0; i < link->num; i++) { -+ if (strcmp(info->name, name) == 0) -+ break; -+ info++; -+ } -+ return info; -+} ++static struct rockchip_clk_branch rv1108_uart1_fracmux __initdata = ++ MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT, ++ RV1108_CLKSEL_CON(14), 8, 2, MFLAGS); + -+static int rockchip_clk_link_probe(struct platform_device *pdev) -+{ -+ struct rockchip_link_clk *priv; -+ struct device_node *node = pdev->dev.of_node; -+ const struct of_device_id *match; -+ const char *clk_name; -+ const struct rockchip_link_info *link_info; -+ int ret; ++static struct rockchip_clk_branch rv1108_uart2_fracmux __initdata = ++ MUX(SCLK_UART2, "sclk_uart2", mux_uart2_p, CLK_SET_RATE_PARENT, ++ RV1108_CLKSEL_CON(15), 8, 2, MFLAGS); + -+ match = of_match_node(rockchip_clk_link_of_match, node); -+ if (!match) -+ return -ENXIO; ++static struct rockchip_clk_branch rv1108_i2s0_fracmux __initdata = ++ MUX(0, "i2s0_pre", mux_i2s0_pre_p, CLK_SET_RATE_PARENT, ++ RV1108_CLKSEL_CON(5), 12, 2, MFLAGS); + -+ priv = devm_kzalloc(&pdev->dev, sizeof(struct rockchip_link_clk), -+ GFP_KERNEL); -+ if (!priv) -+ return -ENOMEM; ++static struct rockchip_clk_branch rv1108_i2s1_fracmux __initdata = ++ MUX(0, "i2s1_pre", mux_i2s1_p, CLK_SET_RATE_PARENT, ++ RV1108_CLKSEL_CON(6), 12, 2, MFLAGS); + -+ priv->link = match->data; ++static struct rockchip_clk_branch rv1108_i2s2_fracmux __initdata = ++ MUX(0, "i2s2_pre", mux_i2s2_p, CLK_SET_RATE_PARENT, ++ RV1108_CLKSEL_CON(7), 12, 2, MFLAGS); + -+ spin_lock_init(&priv->lock); -+ platform_set_drvdata(pdev, priv); ++static struct rockchip_clk_branch rv1108_clk_branches[] __initdata = { ++ MUX(0, "hdmiphy", mux_hdmiphy_phy_p, CLK_SET_RATE_PARENT, ++ RV1108_MISC_CON, 13, 1, MFLAGS), ++ MUX(0, "usb480m", mux_usb480m_pre_p, CLK_SET_RATE_PARENT, ++ RV1108_MISC_CON, 15, 1, MFLAGS), ++ /* ++ * Clock-Architecture Diagram 2 ++ */ + -+ priv->base = of_iomap(node, 0); -+ if (IS_ERR(priv->base)) -+ return PTR_ERR(priv->base); ++ /* PD_CORE */ ++ GATE(0, "dpll_core", "dpll", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(0), 1, GFLAGS), ++ GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(0), 0, GFLAGS), ++ GATE(0, "gpll_core", "gpll", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE_NOMUX(0, "pclken_dbg", "armclk", CLK_IGNORE_UNUSED, ++ RV1108_CLKSEL_CON(1), 4, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RV1108_CLKGATE_CON(0), 5, GFLAGS), ++ COMPOSITE_NOMUX(ACLK_ENMCORE, "aclkenm_core", "armclk", CLK_IGNORE_UNUSED, ++ RV1108_CLKSEL_CON(1), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RV1108_CLKGATE_CON(0), 4, GFLAGS), ++ GATE(ACLK_CORE, "aclk_core", "aclkenm_core", CLK_IS_CRITICAL, ++ RV1108_CLKGATE_CON(11), 0, GFLAGS), ++ GATE(0, "pclk_dbg", "pclken_dbg", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(11), 1, GFLAGS), + -+ if (of_property_read_string(node, "clock-output-names", &clk_name)) -+ priv->name = node->name; -+ else -+ priv->name = clk_name; ++ /* PD_RKVENC */ ++ COMPOSITE(0, "aclk_rkvenc_pre", mux_pll_src_4plls_p, 0, ++ RV1108_CLKSEL_CON(37), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RV1108_CLKGATE_CON(8), 8, GFLAGS), ++ FACTOR_GATE(0, "hclk_rkvenc_pre", "aclk_rkvenc_pre", 0, 1, 4, ++ RV1108_CLKGATE_CON(8), 10, GFLAGS), ++ COMPOSITE(SCLK_VENC_CORE, "clk_venc_core", mux_pll_src_4plls_p, 0, ++ RV1108_CLKSEL_CON(37), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RV1108_CLKGATE_CON(8), 9, GFLAGS), ++ GATE(ACLK_RKVENC, "aclk_rkvenc", "aclk_rkvenc_pre", 0, ++ RV1108_CLKGATE_CON(19), 8, GFLAGS), ++ GATE(HCLK_RKVENC, "hclk_rkvenc", "hclk_rkvenc_pre", 0, ++ RV1108_CLKGATE_CON(19), 9, GFLAGS), ++ GATE(0, "aclk_rkvenc_niu", "aclk_rkvenc_pre", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(19), 11, GFLAGS), ++ GATE(0, "hclk_rkvenc_niu", "hclk_rkvenc_pre", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(19), 10, GFLAGS), + -+ link_info = rockchip_get_link_infos(priv->link, priv->name); -+ priv->shift = link_info->shift; -+ priv->pname = link_info->pname; ++ /* PD_RKVDEC */ ++ COMPOSITE(SCLK_HEVC_CORE, "sclk_hevc_core", mux_pll_src_4plls_p, 0, ++ RV1108_CLKSEL_CON(36), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RV1108_CLKGATE_CON(8), 2, GFLAGS), ++ FACTOR_GATE(0, "hclk_rkvdec_pre", "sclk_hevc_core", 0, 1, 4, ++ RV1108_CLKGATE_CON(8), 10, GFLAGS), ++ COMPOSITE(SCLK_HEVC_CABAC, "clk_hevc_cabac", mux_pll_src_4plls_p, 0, ++ RV1108_CLKSEL_CON(35), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RV1108_CLKGATE_CON(8), 1, GFLAGS), + -+ pm_runtime_enable(&pdev->dev); -+ ret = pm_clk_create(&pdev->dev); -+ if (ret) -+ goto disable_pm_runtime; ++ COMPOSITE(0, "aclk_rkvdec_pre", mux_pll_src_4plls_p, 0, ++ RV1108_CLKSEL_CON(35), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RV1108_CLKGATE_CON(8), 0, GFLAGS), ++ COMPOSITE(0, "aclk_vpu_pre", mux_pll_src_4plls_p, 0, ++ RV1108_CLKSEL_CON(36), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RV1108_CLKGATE_CON(8), 3, GFLAGS), ++ GATE(ACLK_RKVDEC, "aclk_rkvdec", "aclk_rkvdec_pre", 0, ++ RV1108_CLKGATE_CON(19), 0, GFLAGS), ++ GATE(ACLK_VPU, "aclk_vpu", "aclk_vpu_pre", 0, ++ RV1108_CLKGATE_CON(19), 1, GFLAGS), ++ GATE(HCLK_RKVDEC, "hclk_rkvdec", "hclk_rkvdec_pre", 0, ++ RV1108_CLKGATE_CON(19), 2, GFLAGS), ++ GATE(HCLK_VPU, "hclk_vpu", "hclk_rkvdec_pre", 0, ++ RV1108_CLKGATE_CON(19), 3, GFLAGS), ++ GATE(0, "aclk_rkvdec_niu", "aclk_rkvdec_pre", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(19), 4, GFLAGS), ++ GATE(0, "hclk_rkvdec_niu", "hclk_rkvdec_pre", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(19), 5, GFLAGS), ++ GATE(0, "aclk_vpu_niu", "aclk_vpu_pre", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(19), 6, GFLAGS), + -+ ret = pm_clk_add(&pdev->dev, "link"); ++ /* PD_PMU_wrapper */ ++ COMPOSITE_NOMUX(0, "pmu_24m_ena", "gpll", CLK_IS_CRITICAL, ++ RV1108_CLKSEL_CON(38), 0, 5, DFLAGS, ++ RV1108_CLKGATE_CON(8), 12, GFLAGS), ++ GATE(0, "pclk_pmu", "pmu_24m_ena", CLK_IS_CRITICAL, ++ RV1108_CLKGATE_CON(10), 0, GFLAGS), ++ GATE(0, "pclk_intmem1", "pmu_24m_ena", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(10), 1, GFLAGS), ++ GATE(PCLK_GPIO0_PMU, "pclk_gpio0_pmu", "pmu_24m_ena", 0, ++ RV1108_CLKGATE_CON(10), 2, GFLAGS), ++ GATE(0, "pclk_pmugrf", "pmu_24m_ena", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(10), 3, GFLAGS), ++ GATE(0, "pclk_pmu_niu", "pmu_24m_ena", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(10), 4, GFLAGS), ++ GATE(PCLK_I2C0_PMU, "pclk_i2c0_pmu", "pmu_24m_ena", 0, ++ RV1108_CLKGATE_CON(10), 5, GFLAGS), ++ GATE(PCLK_PWM0_PMU, "pclk_pwm0_pmu", "pmu_24m_ena", 0, ++ RV1108_CLKGATE_CON(10), 6, GFLAGS), ++ COMPOSITE(SCLK_PWM0_PMU, "sclk_pwm0_pmu", mux_pll_src_2plls_p, 0, ++ RV1108_CLKSEL_CON(12), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RV1108_CLKGATE_CON(8), 15, GFLAGS), ++ COMPOSITE(SCLK_I2C0_PMU, "sclk_i2c0_pmu", mux_pll_src_2plls_p, 0, ++ RV1108_CLKSEL_CON(19), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RV1108_CLKGATE_CON(8), 14, GFLAGS), ++ GATE(0, "pvtm_pmu", "xin24m", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(8), 13, GFLAGS), + -+ if (ret) -+ goto destroy_pm_clk; ++ /* ++ * Clock-Architecture Diagram 3 ++ */ ++ COMPOSITE(SCLK_WIFI, "sclk_wifi", mux_wifi_src_p, 0, ++ RV1108_CLKSEL_CON(28), 15, 1, MFLAGS, 8, 6, DFLAGS, ++ RV1108_CLKGATE_CON(9), 8, GFLAGS), ++ COMPOSITE_NODIV(0, "sclk_cifout_src", mux_cifout_src_p, 0, ++ RV1108_CLKSEL_CON(40), 8, 1, MFLAGS, ++ RV1108_CLKGATE_CON(9), 11, GFLAGS), ++ COMPOSITE_NOGATE(SCLK_CIFOUT, "sclk_cifout", mux_cifout_p, 0, ++ RV1108_CLKSEL_CON(40), 12, 1, MFLAGS, 0, 5, DFLAGS), ++ COMPOSITE_NOMUX(SCLK_MIPI_CSI_OUT, "sclk_mipi_csi_out", "xin24m", 0, ++ RV1108_CLKSEL_CON(41), 0, 5, DFLAGS, ++ RV1108_CLKGATE_CON(9), 12, GFLAGS), + -+ ret = register_clocks(priv, &pdev->dev); -+ if (ret) -+ goto destroy_pm_clk; ++ GATE(0, "pclk_acodecphy", "pclk_top_pre", CLK_IS_CRITICAL, ++ RV1108_CLKGATE_CON(14), 6, GFLAGS), ++ GATE(0, "pclk_usbgrf", "pclk_top_pre", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(14), 14, GFLAGS), + -+ return 0; ++ GATE(ACLK_CIF0, "aclk_cif0", "aclk_vio1_pre", 0, ++ RV1108_CLKGATE_CON(18), 10, GFLAGS), ++ GATE(HCLK_CIF0, "hclk_cif0", "hclk_vio_pre", 0, ++ RV1108_CLKGATE_CON(18), 10, GFLAGS), ++ COMPOSITE_NODIV(SCLK_CIF0, "sclk_cif0", mux_sclk_cif0_src_p, 0, ++ RV1108_CLKSEL_CON(31), 0, 2, MFLAGS, ++ RV1108_CLKGATE_CON(7), 9, GFLAGS), ++ GATE(ACLK_CIF1, "aclk_cif1", "aclk_vio1_pre", 0, ++ RV1108_CLKGATE_CON(17), 6, GFLAGS), ++ GATE(HCLK_CIF1, "hclk_cif1", "hclk_vio_pre", 0, ++ RV1108_CLKGATE_CON(17), 7, GFLAGS), ++ COMPOSITE_NODIV(SCLK_CIF1, "sclk_cif1", mux_sclk_cif1_src_p, 0, ++ RV1108_CLKSEL_CON(31), 2, 2, MFLAGS, ++ RV1108_CLKGATE_CON(7), 10, GFLAGS), ++ GATE(ACLK_CIF2, "aclk_cif2", "aclk_vio1_pre", 0, ++ RV1108_CLKGATE_CON(17), 8, GFLAGS), ++ GATE(HCLK_CIF2, "hclk_cif2", "hclk_vio_pre", 0, ++ RV1108_CLKGATE_CON(17), 9, GFLAGS), ++ COMPOSITE_NODIV(SCLK_CIF2, "sclk_cif2", mux_sclk_cif2_src_p, 0, ++ RV1108_CLKSEL_CON(31), 4, 2, MFLAGS, ++ RV1108_CLKGATE_CON(7), 11, GFLAGS), ++ GATE(ACLK_CIF3, "aclk_cif3", "aclk_vio1_pre", 0, ++ RV1108_CLKGATE_CON(17), 10, GFLAGS), ++ GATE(HCLK_CIF3, "hclk_cif3", "hclk_vio_pre", 0, ++ RV1108_CLKGATE_CON(17), 11, GFLAGS), ++ COMPOSITE_NODIV(SCLK_CIF3, "sclk_cif3", mux_sclk_cif3_src_p, 0, ++ RV1108_CLKSEL_CON(31), 6, 2, MFLAGS, ++ RV1108_CLKGATE_CON(7), 12, GFLAGS), ++ GATE(0, "pclk_cif1to4", "pclk_vip", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(7), 8, GFLAGS), + -+destroy_pm_clk: -+ pm_clk_destroy(&pdev->dev); -+disable_pm_runtime: -+ pm_runtime_disable(&pdev->dev); ++ /* PD_DSP_wrapper */ ++ COMPOSITE(SCLK_DSP, "sclk_dsp", mux_dsp_src_p, 0, ++ RV1108_CLKSEL_CON(42), 8, 2, MFLAGS, 0, 5, DFLAGS, ++ RV1108_CLKGATE_CON(9), 0, GFLAGS), ++ GATE(0, "clk_dsp_sys_wd", "sclk_dsp", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(16), 0, GFLAGS), ++ GATE(0, "clk_dsp_epp_wd", "sclk_dsp", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(16), 1, GFLAGS), ++ GATE(0, "clk_dsp_edp_wd", "sclk_dsp", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(16), 2, GFLAGS), ++ GATE(0, "clk_dsp_iop_wd", "sclk_dsp", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(16), 3, GFLAGS), ++ GATE(0, "clk_dsp_free", "sclk_dsp", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(16), 13, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_DSP_IOP, "sclk_dsp_iop", "sclk_dsp", 0, ++ RV1108_CLKSEL_CON(44), 0, 5, DFLAGS, ++ RV1108_CLKGATE_CON(9), 1, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_DSP_EPP, "sclk_dsp_epp", "sclk_dsp", 0, ++ RV1108_CLKSEL_CON(44), 8, 5, DFLAGS, ++ RV1108_CLKGATE_CON(9), 2, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_DSP_EDP, "sclk_dsp_edp", "sclk_dsp", 0, ++ RV1108_CLKSEL_CON(45), 0, 5, DFLAGS, ++ RV1108_CLKGATE_CON(9), 3, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_DSP_EDAP, "sclk_dsp_edap", "sclk_dsp", 0, ++ RV1108_CLKSEL_CON(45), 8, 5, DFLAGS, ++ RV1108_CLKGATE_CON(9), 4, GFLAGS), ++ GATE(0, "pclk_dsp_iop_niu", "sclk_dsp_iop", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(16), 4, GFLAGS), ++ GATE(0, "aclk_dsp_epp_niu", "sclk_dsp_epp", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(16), 5, GFLAGS), ++ GATE(0, "aclk_dsp_edp_niu", "sclk_dsp_edp", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(16), 6, GFLAGS), ++ GATE(0, "pclk_dsp_dbg_niu", "sclk_dsp", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(16), 7, GFLAGS), ++ GATE(0, "aclk_dsp_edap_niu", "sclk_dsp_edap", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(16), 14, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_DSP_PFM, "sclk_dsp_pfm", "sclk_dsp", 0, ++ RV1108_CLKSEL_CON(43), 0, 5, DFLAGS, ++ RV1108_CLKGATE_CON(9), 5, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_DSP_CFG, "pclk_dsp_cfg", "sclk_dsp", 0, ++ RV1108_CLKSEL_CON(43), 8, 5, DFLAGS, ++ RV1108_CLKGATE_CON(9), 6, GFLAGS), ++ GATE(0, "pclk_dsp_cfg_niu", "pclk_dsp_cfg", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(16), 8, GFLAGS), ++ GATE(0, "pclk_dsp_pfm_mon", "pclk_dsp_cfg", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(16), 9, GFLAGS), ++ GATE(0, "pclk_intc", "pclk_dsp_cfg", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(16), 10, GFLAGS), ++ GATE(0, "pclk_dsp_grf", "pclk_dsp_cfg", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(16), 11, GFLAGS), ++ GATE(0, "pclk_mailbox", "pclk_dsp_cfg", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(16), 12, GFLAGS), ++ GATE(0, "aclk_dsp_epp_perf", "sclk_dsp_epp", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(16), 15, GFLAGS), ++ GATE(0, "aclk_dsp_edp_perf", "sclk_dsp_edp", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(11), 8, GFLAGS), + -+ return ret; -+} ++ /* ++ * Clock-Architecture Diagram 4 ++ */ ++ COMPOSITE(0, "aclk_vio0_pre", mux_pll_src_4plls_p, CLK_IGNORE_UNUSED, ++ RV1108_CLKSEL_CON(28), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RV1108_CLKGATE_CON(6), 0, GFLAGS), ++ GATE(ACLK_VIO0, "aclk_vio0", "aclk_vio0_pre", 0, ++ RV1108_CLKGATE_CON(17), 0, GFLAGS), ++ COMPOSITE_NOMUX(0, "hclk_vio_pre", "aclk_vio0_pre", 0, ++ RV1108_CLKSEL_CON(29), 0, 5, DFLAGS, ++ RV1108_CLKGATE_CON(7), 2, GFLAGS), ++ GATE(HCLK_VIO, "hclk_vio", "hclk_vio_pre", 0, ++ RV1108_CLKGATE_CON(17), 2, GFLAGS), ++ COMPOSITE_NOMUX(0, "pclk_vio_pre", "aclk_vio0_pre", 0, ++ RV1108_CLKSEL_CON(29), 8, 5, DFLAGS, ++ RV1108_CLKGATE_CON(7), 3, GFLAGS), ++ GATE(PCLK_VIO, "pclk_vio", "pclk_vio_pre", 0, ++ RV1108_CLKGATE_CON(17), 3, GFLAGS), ++ COMPOSITE(0, "aclk_vio1_pre", mux_pll_src_4plls_p, CLK_IGNORE_UNUSED, ++ RV1108_CLKSEL_CON(28), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RV1108_CLKGATE_CON(6), 1, GFLAGS), ++ GATE(ACLK_VIO1, "aclk_vio1", "aclk_vio1_pre", 0, ++ RV1108_CLKGATE_CON(17), 1, GFLAGS), + -+static int rockchip_clk_link_remove(struct platform_device *pdev) -+{ -+ pm_clk_destroy(&pdev->dev); -+ pm_runtime_disable(&pdev->dev); ++ INVERTER(0, "pclk_vip", "ext_vip", ++ RV1108_CLKSEL_CON(31), 8, IFLAGS), ++ GATE(0, "pclk_isp_pre", "pclk_vip", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(7), 6, GFLAGS), ++ GATE(0, "pclk_isp", "pclk_isp_pre", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(18), 10, GFLAGS), ++ GATE(0, "dclk_hdmiphy_src_gpll", "gpll", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(6), 5, GFLAGS), ++ GATE(0, "dclk_hdmiphy_src_dpll", "dpll", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(6), 4, GFLAGS), ++ COMPOSITE_NOGATE(0, "dclk_hdmiphy_pre", mux_dclk_hdmiphy_pre_p, 0, ++ RV1108_CLKSEL_CON(32), 6, 1, MFLAGS, 8, 6, DFLAGS), ++ COMPOSITE_NOGATE(DCLK_VOP_SRC, "dclk_vop_src", mux_dclk_hdmiphy_pre_p, 0, ++ RV1108_CLKSEL_CON(32), 6, 1, MFLAGS, 0, 6, DFLAGS), ++ MUX(DCLK_HDMIPHY, "dclk_hdmiphy", mux_dclk_hdmiphy_p, CLK_SET_RATE_PARENT, ++ RV1108_CLKSEL_CON(32), 15, 1, MFLAGS), ++ MUX(DCLK_VOP, "dclk_vop", mux_dclk_vop_p, CLK_SET_RATE_PARENT, ++ RV1108_CLKSEL_CON(32), 7, 1, MFLAGS), ++ GATE(ACLK_VOP, "aclk_vop", "aclk_vio0_pre", 0, ++ RV1108_CLKGATE_CON(18), 0, GFLAGS), ++ GATE(HCLK_VOP, "hclk_vop", "hclk_vio_pre", 0, ++ RV1108_CLKGATE_CON(18), 1, GFLAGS), ++ GATE(ACLK_IEP, "aclk_iep", "aclk_vio0_pre", 0, ++ RV1108_CLKGATE_CON(18), 2, GFLAGS), ++ GATE(HCLK_IEP, "hclk_iep", "hclk_vio_pre", 0, ++ RV1108_CLKGATE_CON(18), 3, GFLAGS), + -+ return 0; -+} ++ GATE(ACLK_RGA, "aclk_rga", "aclk_vio1_pre", 0, ++ RV1108_CLKGATE_CON(18), 4, GFLAGS), ++ GATE(HCLK_RGA, "hclk_rga", "hclk_vio_pre", 0, ++ RV1108_CLKGATE_CON(18), 5, GFLAGS), ++ COMPOSITE(SCLK_RGA, "sclk_rga", mux_pll_src_4plls_p, 0, ++ RV1108_CLKSEL_CON(33), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RV1108_CLKGATE_CON(6), 6, GFLAGS), + -+static const struct dev_pm_ops rockchip_clk_link_pm_ops = { -+ SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL) -+}; ++ COMPOSITE(SCLK_CVBS_HOST, "sclk_cvbs_host", mux_cvbs_src_p, 0, ++ RV1108_CLKSEL_CON(33), 13, 2, MFLAGS, 8, 5, DFLAGS, ++ RV1108_CLKGATE_CON(6), 7, GFLAGS), ++ FACTOR(0, "sclk_cvbs_27m", "sclk_cvbs_host", 0, 1, 2), + -+static struct platform_driver rockchip_clk_link_driver = { -+ .driver = { -+ .name = "clock-link", -+ .of_match_table = of_match_ptr(rockchip_clk_link_of_match), -+ .pm = &rockchip_clk_link_pm_ops, -+ }, -+ .probe = rockchip_clk_link_probe, -+ .remove = rockchip_clk_link_remove, -+}; ++ GATE(SCLK_HDMI_SFR, "sclk_hdmi_sfr", "xin24m", 0, ++ RV1108_CLKGATE_CON(6), 8, GFLAGS), + -+static int __init rockchip_clk_link_drv_register(void) -+{ -+ return platform_driver_register(&rockchip_clk_link_driver); -+} -+postcore_initcall_sync(rockchip_clk_link_drv_register); ++ COMPOSITE(SCLK_HDMI_CEC, "sclk_hdmi_cec", mux_hdmi_cec_src_p, 0, ++ RV1108_CLKSEL_CON(34), 14, 2, MFLAGS, 0, 14, DFLAGS, ++ RV1108_CLKGATE_CON(6), 9, GFLAGS), ++ GATE(PCLK_MIPI_DSI, "pclk_mipi_dsi", "pclk_vio_pre", 0, ++ RV1108_CLKGATE_CON(18), 8, GFLAGS), ++ GATE(PCLK_HDMI_CTRL, "pclk_hdmi_ctrl", "pclk_vio_pre", 0, ++ RV1108_CLKGATE_CON(18), 9, GFLAGS), + -+static void __exit rockchip_clk_link_drv_unregister(void) -+{ -+ platform_driver_unregister(&rockchip_clk_link_driver); -+} -+module_exit(rockchip_clk_link_drv_unregister); ++ GATE(ACLK_ISP, "aclk_isp", "aclk_vio1_pre", 0, ++ RV1108_CLKGATE_CON(18), 12, GFLAGS), ++ GATE(HCLK_ISP, "hclk_isp", "hclk_vio_pre", 0, ++ RV1108_CLKGATE_CON(18), 11, GFLAGS), ++ COMPOSITE(SCLK_ISP, "sclk_isp", mux_pll_src_4plls_p, 0, ++ RV1108_CLKSEL_CON(30), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RV1108_CLKGATE_CON(6), 3, GFLAGS), + -+MODULE_AUTHOR("Elaine Zhang "); -+MODULE_DESCRIPTION("Clock driver for Niu Dependencies"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/clk/rockchip/clk-out.c b/drivers/clk/rockchip/clk-out.c -new file mode 100644 -index 000000000..22dcd98fb ---- /dev/null -+++ b/drivers/clk/rockchip/clk-out.c -@@ -0,0 +1,99 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Copyright (c) 2023 Rockchip Electronics Co., Ltd -+ */ ++ GATE(0, "clk_dsiphy24m", "xin24m", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(9), 10, GFLAGS), ++ GATE(0, "pclk_vdacphy", "pclk_top_pre", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(14), 9, GFLAGS), ++ GATE(0, "pclk_mipi_dsiphy", "pclk_top_pre", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(14), 11, GFLAGS), ++ GATE(0, "pclk_mipi_csiphy", "pclk_top_pre", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(14), 12, GFLAGS), + -+#include -+#include -+#include -+#include -+#include -+#include ++ /* ++ * Clock-Architecture Diagram 5 ++ */ + -+static DEFINE_SPINLOCK(clk_out_lock); ++ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), + -+static int rockchip_clk_out_probe(struct platform_device *pdev) -+{ -+ struct device *dev = &pdev->dev; -+ struct device_node *node = pdev->dev.of_node; -+ struct clk_hw *hw; -+ struct resource *res; -+ const char *clk_name = node->name; -+ const char *parent_name; -+ void __iomem *reg; -+ u32 shift = 0; -+ u8 clk_gate_flags = CLK_GATE_HIWORD_MASK; -+ int ret; + -+ ret = device_property_read_string(dev, "clock-output-names", &clk_name); -+ if (ret) -+ return ret; ++ COMPOSITE(SCLK_I2S0_SRC, "i2s0_src", mux_pll_src_2plls_p, 0, ++ RV1108_CLKSEL_CON(5), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RV1108_CLKGATE_CON(2), 0, GFLAGS), ++ COMPOSITE_FRACMUX(0, "i2s0_frac", "i2s0_src", CLK_SET_RATE_PARENT, ++ RV1108_CLKSEL_CON(8), 0, ++ RV1108_CLKGATE_CON(2), 1, GFLAGS, ++ &rv1108_i2s0_fracmux), ++ GATE(SCLK_I2S0, "sclk_i2s0", "i2s0_pre", CLK_SET_RATE_PARENT, ++ RV1108_CLKGATE_CON(2), 2, GFLAGS), ++ COMPOSITE_NODIV(0, "i2s_out", mux_i2s_out_p, 0, ++ RV1108_CLKSEL_CON(5), 15, 1, MFLAGS, ++ RV1108_CLKGATE_CON(2), 3, GFLAGS), + -+ ret = device_property_read_u32(dev, "rockchip,bit-shift", &shift); -+ if (ret) -+ return ret; ++ COMPOSITE(SCLK_I2S1_SRC, "i2s1_src", mux_pll_src_2plls_p, 0, ++ RV1108_CLKSEL_CON(6), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RV1108_CLKGATE_CON(2), 4, GFLAGS), ++ COMPOSITE_FRACMUX(0, "i2s1_frac", "i2s1_src", CLK_SET_RATE_PARENT, ++ RK2928_CLKSEL_CON(9), 0, ++ RK2928_CLKGATE_CON(2), 5, GFLAGS, ++ &rv1108_i2s1_fracmux), ++ GATE(SCLK_I2S1, "sclk_i2s1", "i2s1_pre", CLK_SET_RATE_PARENT, ++ RV1108_CLKGATE_CON(2), 6, GFLAGS), + -+ if (device_property_read_bool(dev, "rockchip,bit-set-to-disable")) -+ clk_gate_flags |= CLK_GATE_SET_TO_DISABLE; ++ COMPOSITE(SCLK_I2S2_SRC, "i2s2_src", mux_pll_src_2plls_p, 0, ++ RV1108_CLKSEL_CON(7), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RV1108_CLKGATE_CON(3), 8, GFLAGS), ++ COMPOSITE_FRACMUX(0, "i2s2_frac", "i2s2_src", CLK_SET_RATE_PARENT, ++ RV1108_CLKSEL_CON(10), 0, ++ RV1108_CLKGATE_CON(2), 9, GFLAGS, ++ &rv1108_i2s2_fracmux), ++ GATE(SCLK_I2S2, "sclk_i2s2", "i2s2_pre", CLK_SET_RATE_PARENT, ++ RV1108_CLKGATE_CON(2), 10, GFLAGS), + -+ ret = of_clk_parent_fill(node, &parent_name, 1); -+ if (ret != 1) -+ return -EINVAL; ++ /* PD_BUS */ ++ GATE(0, "aclk_bus_src_gpll", "gpll", CLK_IS_CRITICAL, ++ RV1108_CLKGATE_CON(1), 0, GFLAGS), ++ GATE(0, "aclk_bus_src_apll", "apll", CLK_IS_CRITICAL, ++ RV1108_CLKGATE_CON(1), 1, GFLAGS), ++ GATE(0, "aclk_bus_src_dpll", "dpll", CLK_IS_CRITICAL, ++ RV1108_CLKGATE_CON(1), 2, GFLAGS), ++ COMPOSITE_NOGATE(ACLK_PRE, "aclk_bus_pre", mux_aclk_bus_src_p, CLK_IS_CRITICAL, ++ RV1108_CLKSEL_CON(2), 8, 2, MFLAGS, 0, 5, DFLAGS), ++ COMPOSITE_NOMUX(HCLK_BUS, "hclk_bus_pre", "aclk_bus_pre", CLK_IS_CRITICAL, ++ RV1108_CLKSEL_CON(3), 0, 5, DFLAGS, ++ RV1108_CLKGATE_CON(1), 4, GFLAGS), ++ COMPOSITE_NOMUX(0, "pclk_bus_pre", "aclk_bus_pre", CLK_IS_CRITICAL, ++ RV1108_CLKSEL_CON(3), 8, 5, DFLAGS, ++ RV1108_CLKGATE_CON(1), 5, GFLAGS), ++ GATE(PCLK_BUS, "pclk_bus", "pclk_bus_pre", CLK_IS_CRITICAL, ++ RV1108_CLKGATE_CON(1), 6, GFLAGS), ++ GATE(0, "pclk_top_pre", "pclk_bus_pre", CLK_IS_CRITICAL, ++ RV1108_CLKGATE_CON(1), 7, GFLAGS), ++ GATE(0, "pclk_ddr_pre", "pclk_bus_pre", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(1), 8, GFLAGS), ++ GATE(SCLK_TIMER0, "clk_timer0", "xin24m", 0, ++ RV1108_CLKGATE_CON(1), 9, GFLAGS), ++ GATE(SCLK_TIMER1, "clk_timer1", "xin24m", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(1), 10, GFLAGS), ++ GATE(PCLK_TIMER, "pclk_timer", "pclk_bus_pre", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(13), 4, GFLAGS), + -+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); -+ if (!res) -+ return -ENOMEM; ++ GATE(HCLK_I2S0_8CH, "hclk_i2s0_8ch", "hclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(12), 7, GFLAGS), ++ GATE(HCLK_I2S1_2CH, "hclk_i2s1_2ch", "hclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(12), 8, GFLAGS), ++ GATE(HCLK_I2S2_2CH, "hclk_i2s2_2ch", "hclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(12), 9, GFLAGS), + -+ reg = devm_ioremap(dev, res->start, resource_size(res)); -+ if (!reg) -+ return -ENOMEM; ++ GATE(HCLK_CRYPTO_MST, "hclk_crypto_mst", "hclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(12), 10, GFLAGS), ++ GATE(HCLK_CRYPTO_SLV, "hclk_crypto_slv", "hclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(12), 11, GFLAGS), ++ COMPOSITE(SCLK_CRYPTO, "sclk_crypto", mux_pll_src_2plls_p, 0, ++ RV1108_CLKSEL_CON(11), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1108_CLKGATE_CON(2), 12, GFLAGS), + -+ pm_runtime_enable(dev); ++ COMPOSITE(SCLK_SPI, "sclk_spi", mux_pll_src_2plls_p, 0, ++ RV1108_CLKSEL_CON(11), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RV1108_CLKGATE_CON(3), 0, GFLAGS), ++ GATE(PCLK_SPI, "pclk_spi", "pclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(13), 5, GFLAGS), + -+ hw = clk_hw_register_gate(dev, clk_name, parent_name, CLK_SET_RATE_PARENT, -+ reg, shift, clk_gate_flags, &clk_out_lock); -+ if (IS_ERR(hw)) { -+ ret = -EINVAL; -+ goto err_disable_pm_runtime; -+ } ++ COMPOSITE(SCLK_UART0_SRC, "uart0_src", mux_pll_src_dpll_gpll_usb480m_p, CLK_IGNORE_UNUSED, ++ RV1108_CLKSEL_CON(13), 12, 2, MFLAGS, 0, 7, DFLAGS, ++ RV1108_CLKGATE_CON(3), 1, GFLAGS), ++ COMPOSITE(SCLK_UART1_SRC, "uart1_src", mux_pll_src_dpll_gpll_usb480m_p, CLK_IGNORE_UNUSED, ++ RV1108_CLKSEL_CON(14), 12, 2, MFLAGS, 0, 7, DFLAGS, ++ RV1108_CLKGATE_CON(3), 3, GFLAGS), ++ COMPOSITE(SCLK_UART2_SRC, "uart2_src", mux_pll_src_dpll_gpll_usb480m_p, CLK_IGNORE_UNUSED, ++ RV1108_CLKSEL_CON(15), 12, 2, MFLAGS, 0, 7, DFLAGS, ++ RV1108_CLKGATE_CON(3), 5, GFLAGS), + -+ of_clk_add_hw_provider(node, of_clk_hw_simple_get, hw); ++ COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT, ++ RV1108_CLKSEL_CON(16), 0, ++ RV1108_CLKGATE_CON(3), 2, GFLAGS, ++ &rv1108_uart0_fracmux), ++ COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_src", CLK_SET_RATE_PARENT, ++ RV1108_CLKSEL_CON(17), 0, ++ RV1108_CLKGATE_CON(3), 4, GFLAGS, ++ &rv1108_uart1_fracmux), ++ COMPOSITE_FRACMUX(0, "uart2_frac", "uart2_src", CLK_SET_RATE_PARENT, ++ RV1108_CLKSEL_CON(18), 0, ++ RV1108_CLKGATE_CON(3), 6, GFLAGS, ++ &rv1108_uart2_fracmux), ++ GATE(PCLK_UART0, "pclk_uart0", "pclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(13), 10, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(13), 11, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(13), 12, GFLAGS), + -+ return 0; ++ COMPOSITE(SCLK_I2C1, "clk_i2c1", mux_pll_src_2plls_p, 0, ++ RV1108_CLKSEL_CON(19), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RV1108_CLKGATE_CON(3), 7, GFLAGS), ++ COMPOSITE(SCLK_I2C2, "clk_i2c2", mux_pll_src_2plls_p, 0, ++ RV1108_CLKSEL_CON(20), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RV1108_CLKGATE_CON(3), 8, GFLAGS), ++ COMPOSITE(SCLK_I2C3, "clk_i2c3", mux_pll_src_2plls_p, 0, ++ RV1108_CLKSEL_CON(20), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RV1108_CLKGATE_CON(3), 9, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(13), 0, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(13), 1, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(13), 2, GFLAGS), ++ COMPOSITE(SCLK_PWM, "clk_pwm", mux_pll_src_2plls_p, 0, ++ RV1108_CLKSEL_CON(12), 15, 2, MFLAGS, 8, 7, DFLAGS, ++ RV1108_CLKGATE_CON(3), 10, GFLAGS), ++ GATE(PCLK_PWM, "pclk_pwm", "pclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(13), 6, GFLAGS), ++ GATE(PCLK_WDT, "pclk_wdt", "pclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(13), 3, GFLAGS), ++ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(13), 7, GFLAGS), ++ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(13), 8, GFLAGS), ++ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(13), 9, GFLAGS), + -+err_disable_pm_runtime: -+ pm_runtime_disable(dev); ++ GATE(0, "pclk_grf", "pclk_bus_pre", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(14), 0, GFLAGS), ++ GATE(PCLK_EFUSE0, "pclk_efuse0", "pclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(12), 12, GFLAGS), ++ GATE(PCLK_EFUSE1, "pclk_efuse1", "pclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(12), 13, GFLAGS), ++ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(13), 13, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_TSADC, "sclk_tsadc", "xin24m", 0, ++ RV1108_CLKSEL_CON(21), 0, 10, DFLAGS, ++ RV1108_CLKGATE_CON(3), 11, GFLAGS), ++ GATE(PCLK_SARADC, "pclk_saradc", "pclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(13), 14, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_SARADC, "sclk_saradc", "xin24m", 0, ++ RV1108_CLKSEL_CON(22), 0, 10, DFLAGS, ++ RV1108_CLKGATE_CON(3), 12, GFLAGS), + -+ return ret; -+} ++ GATE(ACLK_DMAC, "aclk_dmac", "aclk_bus_pre", 0, ++ RV1108_CLKGATE_CON(12), 2, GFLAGS), ++ GATE(0, "hclk_rom", "hclk_bus_pre", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(12), 3, GFLAGS), ++ GATE(0, "aclk_intmem", "aclk_bus_pre", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(12), 1, GFLAGS), + -+static int rockchip_clk_out_remove(struct platform_device *pdev) -+{ -+ struct device_node *node = pdev->dev.of_node; ++ /* PD_DDR */ ++ GATE(0, "apll_ddr", "apll", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(0), 8, GFLAGS), ++ GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(0), 9, GFLAGS), ++ GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(0), 10, GFLAGS), ++ COMPOSITE_NOGATE(0, "clk_ddrphy_src", mux_ddrphy_p, CLK_IS_CRITICAL, ++ RV1108_CLKSEL_CON(4), 8, 2, MFLAGS, 0, 3, ++ DFLAGS | CLK_DIVIDER_POWER_OF_TWO), ++ FACTOR(0, "clk_ddr", "clk_ddrphy_src", 0, 1, 2), ++ GATE(0, "clk_ddrphy4x", "clk_ddr", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(10), 9, GFLAGS), ++ GATE(0, "pclk_ddrupctl", "pclk_ddr_pre", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(12), 4, GFLAGS), ++ GATE(0, "nclk_ddrupctl", "clk_ddr", CLK_IS_CRITICAL, ++ RV1108_CLKGATE_CON(12), 5, GFLAGS), ++ GATE(0, "pclk_ddrmon", "pclk_ddr_pre", CLK_IS_CRITICAL, ++ RV1108_CLKGATE_CON(12), 6, GFLAGS), ++ GATE(0, "timer_clk", "xin24m", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(0), 11, GFLAGS), ++ GATE(0, "pclk_mschniu", "pclk_ddr_pre", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(14), 2, GFLAGS), ++ GATE(0, "pclk_ddrphy", "pclk_ddr_pre", CLK_IGNORE_UNUSED, ++ RV1108_CLKGATE_CON(14), 4, GFLAGS), + -+ of_clk_del_provider(node); -+ pm_runtime_disable(&pdev->dev); ++ /* ++ * Clock-Architecture Diagram 6 ++ */ + -+ return 0; -+} ++ /* PD_PERI */ ++ COMPOSITE_NOMUX(0, "pclk_periph_pre", "gpll", CLK_IS_CRITICAL, ++ RV1108_CLKSEL_CON(23), 10, 5, DFLAGS, ++ RV1108_CLKGATE_CON(4), 5, GFLAGS), ++ GATE(PCLK_PERI, "pclk_periph", "pclk_periph_pre", CLK_IS_CRITICAL, ++ RV1108_CLKGATE_CON(15), 13, GFLAGS), ++ COMPOSITE_NOMUX(0, "hclk_periph_pre", "gpll", CLK_IS_CRITICAL, ++ RV1108_CLKSEL_CON(23), 5, 5, DFLAGS, ++ RV1108_CLKGATE_CON(4), 4, GFLAGS), ++ GATE(HCLK_PERI, "hclk_periph", "hclk_periph_pre", CLK_IS_CRITICAL, ++ RV1108_CLKGATE_CON(15), 12, GFLAGS), + -+static const struct of_device_id rockchip_clk_out_match[] = { -+ { .compatible = "rockchip,clk-out", }, -+ {}, -+}; ++ GATE(0, "aclk_peri_src_dpll", "dpll", CLK_IS_CRITICAL, ++ RV1108_CLKGATE_CON(4), 1, GFLAGS), ++ GATE(0, "aclk_peri_src_gpll", "gpll", CLK_IS_CRITICAL, ++ RV1108_CLKGATE_CON(4), 2, GFLAGS), ++ COMPOSITE(ACLK_PERI, "aclk_periph", mux_aclk_peri_src_p, CLK_IS_CRITICAL, ++ RV1108_CLKSEL_CON(23), 15, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1108_CLKGATE_CON(15), 11, GFLAGS), + -+static struct platform_driver rockchip_clk_out_driver = { -+ .driver = { -+ .name = "rockchip-clk-out", -+ .of_match_table = rockchip_clk_out_match, -+ }, -+ .probe = rockchip_clk_out_probe, -+ .remove = rockchip_clk_out_remove, -+}; ++ COMPOSITE(SCLK_SDMMC, "sclk_sdmmc", mux_mmc_src_p, 0, ++ RV1108_CLKSEL_CON(25), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RV1108_CLKGATE_CON(5), 0, GFLAGS), + -+module_platform_driver(rockchip_clk_out_driver); ++ COMPOSITE_NODIV(0, "sclk_sdio_src", mux_mmc_src_p, 0, ++ RV1108_CLKSEL_CON(25), 10, 2, MFLAGS, ++ RV1108_CLKGATE_CON(5), 2, GFLAGS), ++ DIV(SCLK_SDIO, "sclk_sdio", "sclk_sdio_src", 0, ++ RV1108_CLKSEL_CON(26), 0, 8, DFLAGS), + -+MODULE_DESCRIPTION("Rockchip Clock Input-Output-Switch"); -+MODULE_AUTHOR("Sugar Zhang "); -+MODULE_LICENSE("GPL"); -+MODULE_DEVICE_TABLE(of, rockchip_clk_out_match); -diff --git a/drivers/clk/rockchip/clk-pvtm.c b/drivers/clk/rockchip/clk-pvtm.c -new file mode 100644 -index 000000000..c748589dd ---- /dev/null -+++ b/drivers/clk/rockchip/clk-pvtm.c -@@ -0,0 +1,311 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (c) 2018 Fuzhou Rockchip Electronics Co., Ltd -+ */ ++ COMPOSITE_NODIV(0, "sclk_emmc_src", mux_mmc_src_p, 0, ++ RV1108_CLKSEL_CON(25), 12, 2, MFLAGS, ++ RV1108_CLKGATE_CON(5), 1, GFLAGS), ++ DIV(SCLK_EMMC, "sclk_emmc", "sclk_emmc_src", 0, ++ RK2928_CLKSEL_CON(26), 8, 8, DFLAGS), ++ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 0, GFLAGS), ++ GATE(HCLK_SDIO, "hclk_sdio", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 1, GFLAGS), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 2, GFLAGS), + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ COMPOSITE(SCLK_NANDC, "sclk_nandc", mux_pll_src_2plls_p, 0, ++ RV1108_CLKSEL_CON(27), 14, 1, MFLAGS, 8, 5, DFLAGS, ++ RV1108_CLKGATE_CON(5), 3, GFLAGS), ++ GATE(HCLK_NANDC, "hclk_nandc", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 3, GFLAGS), + -+#define CLK_SEL_EXTERNAL_32K 0 -+#define CLK_SEL_INTERNAL_PVTM 1 ++ GATE(HCLK_HOST0, "hclk_host0", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 6, GFLAGS), ++ GATE(0, "hclk_host0_arb", "hclk_periph", CLK_IGNORE_UNUSED, RV1108_CLKGATE_CON(15), 7, GFLAGS), ++ GATE(HCLK_OTG, "hclk_otg", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 8, GFLAGS), ++ GATE(0, "hclk_otg_pmu", "hclk_periph", CLK_IGNORE_UNUSED, RV1108_CLKGATE_CON(15), 9, GFLAGS), ++ GATE(SCLK_USBPHY, "clk_usbphy", "xin24m", CLK_IGNORE_UNUSED, RV1108_CLKGATE_CON(5), 5, GFLAGS), + -+#define wr_msk_bit(v, off, msk) ((v) << (off) | (msk << (16 + (off)))) ++ COMPOSITE(SCLK_SFC, "sclk_sfc", mux_pll_src_2plls_p, 0, ++ RV1108_CLKSEL_CON(27), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RV1108_CLKGATE_CON(5), 4, GFLAGS), ++ GATE(HCLK_SFC, "hclk_sfc", "hclk_periph", 0, RV1108_CLKGATE_CON(15), 10, GFLAGS), + -+struct rockchip_clock_pvtm; ++ COMPOSITE(SCLK_MAC_PRE, "sclk_mac_pre", mux_pll_src_apll_gpll_p, 0, ++ RV1108_CLKSEL_CON(24), 12, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1108_CLKGATE_CON(4), 10, GFLAGS), ++ MUX(SCLK_MAC, "sclk_mac", mux_sclk_mac_p, CLK_SET_RATE_PARENT, ++ RV1108_CLKSEL_CON(24), 8, 1, MFLAGS), ++ GATE(SCLK_MAC_RX, "sclk_mac_rx", "sclk_mac", 0, RV1108_CLKGATE_CON(4), 8, GFLAGS), ++ GATE(SCLK_MAC_REF, "sclk_mac_ref", "sclk_mac", 0, RV1108_CLKGATE_CON(4), 6, GFLAGS), ++ GATE(SCLK_MAC_REFOUT, "sclk_mac_refout", "sclk_mac", 0, RV1108_CLKGATE_CON(4), 7, GFLAGS), ++ GATE(ACLK_GMAC, "aclk_gmac", "aclk_periph", 0, RV1108_CLKGATE_CON(15), 4, GFLAGS), ++ GATE(PCLK_GMAC, "pclk_gmac", "pclk_periph", 0, RV1108_CLKGATE_CON(15), 5, GFLAGS), + -+struct rockchip_clock_pvtm_info { -+ u32 con; -+ u32 sta; -+ u32 sel_con; -+ u32 sel_shift; -+ u32 sel_value; -+ u32 sel_mask; -+ u32 div_shift; -+ u32 div_mask; ++ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "sclk_sdmmc", RV1108_SDMMC_CON0, 1), ++ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RV1108_SDMMC_CON1, 1), + -+ u32 (*get_value)(struct rockchip_clock_pvtm *pvtm, -+ unsigned int time_us); -+ int (*init_freq)(struct rockchip_clock_pvtm *pvtm); -+ int (*sel_enable)(struct rockchip_clock_pvtm *pvtm); -+}; ++ MMC(SCLK_SDIO_DRV, "sdio_drv", "sclk_sdio", RV1108_SDIO_CON0, 1), ++ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "sclk_sdio", RV1108_SDIO_CON1, 1), + -+struct rockchip_clock_pvtm { -+ const struct rockchip_clock_pvtm_info *info; -+ struct regmap *grf; -+ struct clk *pvtm_clk; -+ struct clk *clk; -+ unsigned long rate; ++ MMC(SCLK_EMMC_DRV, "emmc_drv", "sclk_emmc", RV1108_EMMC_CON0, 1), ++ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "sclk_emmc", RV1108_EMMC_CON1, 1), +}; + -+static unsigned long xin32k_pvtm_recalc_rate(struct clk_hw *hw, -+ unsigned long parent_rate) -+{ -+ return 32768; -+} -+ -+static const struct clk_ops xin32k_pvtm = { -+ .recalc_rate = xin32k_pvtm_recalc_rate, -+}; ++static void __iomem *rv1108_cru_base; + -+static void rockchip_clock_pvtm_delay(unsigned int delay) ++static void rv1108_dump_cru(void) +{ -+ unsigned int ms = delay / 1000; -+ unsigned int us = delay % 1000; -+ -+ if (ms > 0) { -+ if (ms < 20) -+ us += ms * 1000; -+ else -+ msleep(ms); ++ if (rv1108_cru_base) { ++ pr_warn("CRU:\n"); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rv1108_cru_base, ++ 0x1f8, false); + } -+ -+ if (us >= 10) -+ usleep_range(us, us + 100); -+ else -+ udelay(us); -+} -+ -+static int rockchip_clock_sel_internal_pvtm(struct rockchip_clock_pvtm *pvtm) -+{ -+ int ret = 0; -+ -+ ret = regmap_write(pvtm->grf, pvtm->info->sel_con, -+ wr_msk_bit(pvtm->info->sel_value, -+ pvtm->info->sel_shift, -+ pvtm->info->sel_mask)); -+ if (ret != 0) -+ pr_err("%s: fail to write register\n", __func__); -+ -+ return ret; +} + -+/* get pmu pvtm value */ -+static u32 rockchip_clock_pvtm_get_value(struct rockchip_clock_pvtm *pvtm, -+ u32 time_us) ++static void __init rv1108_clk_init(struct device_node *np) +{ -+ const struct rockchip_clock_pvtm_info *info = pvtm->info; -+ u32 val = 0, sta = 0; -+ u32 clk_cnt, check_cnt; -+ -+ /* 24m clk ,24cnt=1us */ -+ clk_cnt = time_us * 24; -+ -+ regmap_write(pvtm->grf, info->con + 0x4, clk_cnt); -+ regmap_write(pvtm->grf, info->con, wr_msk_bit(3, 0, 0x3)); -+ -+ rockchip_clock_pvtm_delay(time_us); -+ -+ check_cnt = 100; -+ while (check_cnt) { -+ regmap_read(pvtm->grf, info->sta, &sta); -+ if (sta & 0x1) -+ break; -+ udelay(4); -+ check_cnt--; -+ } ++ struct rockchip_clk_provider *ctx; ++ void __iomem *reg_base; ++ struct clk **clks; + -+ if (check_cnt) { -+ regmap_read(pvtm->grf, info->sta + 0x4, &val); -+ } else { -+ pr_err("%s: wait pvtm_done timeout!\n", __func__); -+ val = 0; ++ reg_base = of_iomap(np, 0); ++ if (!reg_base) { ++ pr_err("%s: could not map cru region\n", __func__); ++ return; + } + -+ regmap_write(pvtm->grf, info->con, wr_msk_bit(0, 0, 0x3)); -+ -+ return val; -+} -+ -+static int rockchip_clock_pvtm_init_freq(struct rockchip_clock_pvtm *pvtm) -+{ -+ u32 pvtm_cnt = 0; -+ u32 div, time_us; -+ int ret = 0; -+ -+ time_us = 1000; -+ pvtm_cnt = pvtm->info->get_value(pvtm, time_us); -+ pr_debug("get pvtm_cnt = %d\n", pvtm_cnt); -+ -+ /* set pvtm_div to get rate */ -+ div = DIV_ROUND_UP(1000 * pvtm_cnt, pvtm->rate); -+ if (div > pvtm->info->div_mask) { -+ pr_err("pvtm_div out of bounary! set max instead\n"); -+ div = pvtm->info->div_mask; ++ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); ++ if (IS_ERR(ctx)) { ++ pr_err("%s: rockchip clk init failed\n", __func__); ++ iounmap(reg_base); ++ return; + } ++ clks = ctx->clk_data.clks; + -+ pr_debug("set div %d, rate %luKHZ\n", div, pvtm->rate); -+ ret = regmap_write(pvtm->grf, pvtm->info->con, -+ wr_msk_bit(div, pvtm->info->div_shift, -+ pvtm->info->div_mask)); -+ if (ret != 0) -+ goto out; -+ -+ /* pmu pvtm oscilator enable */ -+ ret = regmap_write(pvtm->grf, pvtm->info->con, -+ wr_msk_bit(1, 1, 0x1)); -+ if (ret != 0) -+ goto out; -+ -+ ret = pvtm->info->sel_enable(pvtm); -+out: -+ if (ret != 0) -+ pr_err("%s: fail to write register\n", __func__); -+ -+ return ret; -+} -+ -+static int clock_pvtm_regitstor(struct device *dev, -+ struct rockchip_clock_pvtm *pvtm) -+{ -+ struct clk_init_data init = {}; -+ struct clk_hw *clk_hw; -+ -+ /* Init the xin32k_pvtm */ -+ pvtm->info->init_freq(pvtm); -+ -+ init.parent_names = NULL; -+ init.num_parents = 0; -+ init.name = "xin32k_pvtm"; -+ init.ops = &xin32k_pvtm; -+ -+ clk_hw = devm_kzalloc(dev, sizeof(*clk_hw), GFP_KERNEL); -+ if (!clk_hw) -+ return -ENOMEM; -+ clk_hw->init = &init; -+ -+ /* optional override of the clockname */ -+ of_property_read_string_index(dev->of_node, "clock-output-names", -+ 0, &init.name); -+ pvtm->clk = devm_clk_register(dev, clk_hw); -+ if (IS_ERR(pvtm->clk)) -+ return PTR_ERR(pvtm->clk); -+ -+ return of_clk_add_provider(dev->of_node, of_clk_src_simple_get, -+ pvtm->clk); -+} -+ -+static const struct rockchip_clock_pvtm_info rk3368_pvtm_data = { -+ .con = 0x180, -+ .sta = 0x190, -+ .sel_con = 0x100, -+ .sel_shift = 6, -+ .sel_value = CLK_SEL_INTERNAL_PVTM, -+ .sel_mask = 0x1, -+ .div_shift = 2, -+ .div_mask = 0x3f, -+ -+ .sel_enable = rockchip_clock_sel_internal_pvtm, -+ .get_value = rockchip_clock_pvtm_get_value, -+ .init_freq = rockchip_clock_pvtm_init_freq, -+}; -+ -+static const struct of_device_id rockchip_clock_pvtm_match[] = { -+ { -+ .compatible = "rockchip,rk3368-pvtm-clock", -+ .data = (void *)&rk3368_pvtm_data, -+ }, -+ {} -+}; -+MODULE_DEVICE_TABLE(of, rockchip_clock_pvtm_match); -+ -+static int rockchip_clock_pvtm_probe(struct platform_device *pdev) -+{ -+ struct device *dev = &pdev->dev; -+ struct device_node *np = pdev->dev.of_node; -+ const struct of_device_id *match; -+ struct rockchip_clock_pvtm *pvtm; -+ int error; -+ u32 rate; -+ -+ pvtm = devm_kzalloc(dev, sizeof(*pvtm), GFP_KERNEL); -+ if (!pvtm) -+ return -ENOMEM; -+ -+ match = of_match_node(rockchip_clock_pvtm_match, np); -+ if (!match) -+ return -ENXIO; -+ -+ pvtm->info = (const struct rockchip_clock_pvtm_info *)match->data; -+ if (!pvtm->info) -+ return -EINVAL; -+ -+ if (!dev->parent || !dev->parent->of_node) -+ return -EINVAL; -+ -+ pvtm->grf = syscon_node_to_regmap(dev->parent->of_node); -+ if (IS_ERR(pvtm->grf)) -+ return PTR_ERR(pvtm->grf); ++ rockchip_clk_register_plls(ctx, rv1108_pll_clks, ++ ARRAY_SIZE(rv1108_pll_clks), ++ RV1108_GRF_SOC_STATUS0); ++ rockchip_clk_register_branches(ctx, rv1108_clk_branches, ++ ARRAY_SIZE(rv1108_clk_branches)); + -+ if (!of_property_read_u32(np, "pvtm-rate", &rate)) -+ pvtm->rate = rate; -+ else -+ pvtm->rate = 32768; ++ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", ++ 3, clks[PLL_APLL], clks[PLL_GPLL], ++ &rv1108_cpuclk_data, rv1108_cpuclk_rates, ++ ARRAY_SIZE(rv1108_cpuclk_rates)); + -+ pvtm->pvtm_clk = devm_clk_get(&pdev->dev, "pvtm_pmu_clk"); -+ if (IS_ERR(pvtm->pvtm_clk)) { -+ error = PTR_ERR(pvtm->pvtm_clk); -+ if (error != -EPROBE_DEFER) -+ dev_err(&pdev->dev, -+ "failed to get pvtm core clock: %d\n", -+ error); -+ goto out_probe; -+ } ++ rockchip_register_softrst(np, 13, reg_base + RV1108_SOFTRST_CON(0), ++ ROCKCHIP_SOFTRST_HIWORD_MASK); + -+ error = clk_prepare_enable(pvtm->pvtm_clk); -+ if (error) { -+ dev_err(&pdev->dev, "failed to enable the clock: %d\n", -+ error); -+ goto out_probe; -+ } ++ rockchip_register_restart_notifier(ctx, RV1108_GLB_SRST_FST, NULL); + -+ platform_set_drvdata(pdev, pvtm); ++ rockchip_clk_of_add_provider(np, ctx); + -+ error = clock_pvtm_regitstor(&pdev->dev, pvtm); -+ if (error) { -+ dev_err(&pdev->dev, "failed to registor clock: %d\n", -+ error); -+ goto out_clk_put; ++ if (!rk_dump_cru) { ++ rv1108_cru_base = reg_base; ++ rk_dump_cru = rv1108_dump_cru; + } -+ -+ return error; -+ -+out_clk_put: -+ clk_disable_unprepare(pvtm->pvtm_clk); -+out_probe: -+ return error; +} ++CLK_OF_DECLARE(rv1108_cru, "rockchip,rv1108-cru", rv1108_clk_init); + -+static int rockchip_clock_pvtm_remove(struct platform_device *pdev) ++static int __init clk_rv1108_probe(struct platform_device *pdev) +{ -+ struct rockchip_clock_pvtm *pvtm = platform_get_drvdata(pdev); + struct device_node *np = pdev->dev.of_node; + -+ of_clk_del_provider(np); -+ clk_disable_unprepare(pvtm->pvtm_clk); ++ rv1108_clk_init(np); + + return 0; +} + -+static struct platform_driver rockchip_clock_pvtm_driver = { -+ .driver = { -+ .name = "rockchip-clcok-pvtm", -+ .of_match_table = rockchip_clock_pvtm_match, ++static const struct of_device_id clk_rv1108_match_table[] = { ++ { ++ .compatible = "rockchip,rv1108-cru", + }, -+ .probe = rockchip_clock_pvtm_probe, -+ .remove = rockchip_clock_pvtm_remove, ++ { } +}; ++MODULE_DEVICE_TABLE(of, clk_rv1108_match_table); + -+module_platform_driver(rockchip_clock_pvtm_driver); ++static struct platform_driver clk_rv1108_driver = { ++ .driver = { ++ .name = "clk-rv1108", ++ .of_match_table = clk_rv1108_match_table, ++ }, ++}; ++builtin_platform_driver_probe(clk_rv1108_driver, clk_rv1108_probe); + -+MODULE_DESCRIPTION("Rockchip Clock Pvtm Driver"); -+MODULE_LICENSE("GPL v2"); -diff --git a/drivers/clk/rockchip/clk-rk1808.c b/drivers/clk/rockchip/clk-rk1808.c ++MODULE_DESCRIPTION("Rockchip RV1108 Clock Driver"); ++MODULE_LICENSE("GPL"); +diff --git a/drivers/clk/rockchip-oh/clk-rv1126.c b/drivers/clk/rockchip-oh/clk-rv1126.c new file mode 100644 -index 000000000..e177a3dd6 +index 000000000..62b204d89 --- /dev/null -+++ b/drivers/clk/rockchip/clk-rk1808.c -@@ -0,0 +1,1249 @@ ++++ b/drivers/clk/rockchip-oh/clk-rv1126.c +@@ -0,0 +1,1566 @@ +// SPDX-License-Identifier: GPL-2.0 +/* -+ * Copyright (c) 2018 Fuzhou Rockchip Electronics Co., Ltd -+ * Author: Elaine Zhang ++ * Copyright (c) 2019 Rockchip Electronics Co. Ltd. ++ * Author: Finley Xiao + */ ++ +#include +#include +#include +#include +#include +#include -+#include ++#include +#include "clk.h" + -+#define RK1808_GRF_SOC_STATUS0 0x480 -+#define RK1808_PMUGRF_SOC_CON0 0x100 -+#define RK1808_UART_FRAC_MAX_PRATE 800000000 -+#define RK1808_PDM_FRAC_MAX_PRATE 300000000 -+#define RK1808_I2S_FRAC_MAX_PRATE 600000000 -+#define RK1808_VOP_RAW_FRAC_MAX_PRATE 300000000 -+#define RK1808_VOP_LITE_FRAC_MAX_PRATE 400000000 ++#define RV1126_GMAC_CON 0x460 ++#define RV1126_GRF_IOFUNC_CON1 0x10264 ++#define RV1126_GRF_SOC_STATUS0 0x10 ++#define RV1126_PMUGRF_SOC_CON0 0x100 + -+enum rk1808_plls { -+ apll, dpll, cpll, gpll, npll, ppll, ++#define RV1126_FRAC_MAX_PRATE 1200000000 ++#define RV1126_CSIOUT_FRAC_MAX_PRATE 300000000 ++ ++enum rv1126_pmu_plls { ++ gpll, +}; + -+static struct rockchip_pll_rate_table rk1808_pll_rates[] = { ++enum rv1126_plls { ++ apll, dpll, cpll, hpll, ++}; ++ ++static struct rockchip_pll_rate_table rv1126_pll_rates[] = { + /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ + RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1488000000, 1, 62, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1464000000, 1, 61, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1440000000, 1, 60, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1392000000, 1, 58, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1368000000, 1, 57, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1344000000, 1, 56, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1320000000, 1, 55, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1272000000, 1, 53, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1248000000, 1, 52, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1600000000, 3, 200, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1584000000, 1, 132, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1560000000, 1, 130, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1536000000, 1, 128, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1512000000, 1, 126, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1488000000, 1, 124, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1464000000, 1, 122, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1440000000, 1, 120, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1416000000, 1, 118, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1400000000, 3, 350, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1392000000, 1, 116, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1368000000, 1, 114, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1344000000, 1, 112, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1320000000, 1, 110, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1296000000, 1, 108, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1272000000, 1, 106, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1248000000, 1, 104, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1200000000, 1, 100, 2, 1, 1, 0), + RK3036_PLL_RATE(1188000000, 1, 99, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1100000000, 2, 275, 3, 1, 1, 0), ++ RK3036_PLL_RATE(1104000000, 1, 92, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1100000000, 3, 275, 2, 1, 1, 0), + RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1000000000, 1, 125, 3, 1, 1, 0), ++ RK3036_PLL_RATE(1000000000, 3, 250, 2, 1, 1, 0), + RK3036_PLL_RATE(984000000, 1, 82, 2, 1, 1, 0), + RK3036_PLL_RATE(960000000, 1, 80, 2, 1, 1, 0), + RK3036_PLL_RATE(936000000, 1, 78, 2, 1, 1, 0), @@ -69446,1128 +68655,1414 @@ index 000000000..e177a3dd6 + RK3036_PLL_RATE(864000000, 1, 72, 2, 1, 1, 0), + RK3036_PLL_RATE(840000000, 1, 70, 2, 1, 1, 0), + RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), -+ RK3036_PLL_RATE(800000000, 1, 100, 3, 1, 1, 0), -+ RK3036_PLL_RATE(700000000, 1, 175, 2, 1, 1, 0), -+ RK3036_PLL_RATE(696000000, 1, 58, 2, 1, 1, 0), -+ RK3036_PLL_RATE(624000000, 1, 52, 2, 1, 1, 0), -+ RK3036_PLL_RATE(600000000, 1, 75, 3, 1, 1, 0), ++ RK3036_PLL_RATE(800000000, 3, 200, 2, 1, 1, 0), ++ RK3036_PLL_RATE(700000000, 3, 350, 4, 1, 1, 0), ++ RK3036_PLL_RATE(696000000, 1, 116, 4, 1, 1, 0), ++ RK3036_PLL_RATE(624000000, 1, 104, 4, 1, 1, 0), ++#ifdef CONFIG_ROCKCHIP_LOW_PERFORMANCE ++ RK3036_PLL_RATE(600000000, 1, 50, 2, 1, 1, 0), ++#else ++ RK3036_PLL_RATE(600000000, 1, 100, 4, 1, 1, 0), ++#endif + RK3036_PLL_RATE(594000000, 1, 99, 4, 1, 1, 0), -+ RK3036_PLL_RATE(504000000, 1, 63, 3, 1, 1, 0), ++ RK3036_PLL_RATE(504000000, 1, 84, 4, 1, 1, 0), + RK3036_PLL_RATE(500000000, 1, 125, 6, 1, 1, 0), -+ RK3036_PLL_RATE(416000000, 1, 52, 3, 1, 1, 0), ++ RK3036_PLL_RATE(496742400, 1, 124, 6, 1, 0, 3113851), ++ RK3036_PLL_RATE(491520000, 1, 40, 2, 1, 0, 16106127), + RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), -+ RK3036_PLL_RATE(312000000, 1, 52, 2, 2, 1, 0), ++ RK3036_PLL_RATE(312000000, 1, 78, 6, 1, 1, 0), + RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), -+ RK3036_PLL_RATE(200000000, 1, 200, 6, 4, 1, 0), -+ RK3036_PLL_RATE(100000000, 1, 150, 6, 6, 1, 0), -+ RK3036_PLL_RATE(96000000, 1, 64, 4, 4, 1, 0), ++ RK3036_PLL_RATE(96000000, 1, 96, 6, 4, 1, 0), + { /* sentinel */ }, +}; + -+#define RK1808_DIV_ACLKM_MASK 0x7 -+#define RK1808_DIV_ACLKM_SHIFT 12 -+#define RK1808_DIV_PCLK_DBG_MASK 0xf -+#define RK1808_DIV_PCLK_DBG_SHIFT 8 ++#define RV1126_DIV_ACLK_CORE_MASK 0xf ++#define RV1126_DIV_ACLK_CORE_SHIFT 4 ++#define RV1126_DIV_PCLK_DBG_MASK 0x7 ++#define RV1126_DIV_PCLK_DBG_SHIFT 0 + -+#define RK1808_CLKSEL0(_aclk_core, _pclk_dbg) \ ++#define RV1126_CLKSEL1(_aclk_core, _pclk_dbg) \ +{ \ -+ .reg = RK1808_CLKSEL_CON(0), \ -+ .val = HIWORD_UPDATE(_aclk_core, RK1808_DIV_ACLKM_MASK, \ -+ RK1808_DIV_ACLKM_SHIFT) | \ -+ HIWORD_UPDATE(_pclk_dbg, RK1808_DIV_PCLK_DBG_MASK, \ -+ RK1808_DIV_PCLK_DBG_SHIFT), \ ++ .reg = RV1126_CLKSEL_CON(1), \ ++ .val = HIWORD_UPDATE(_aclk_core, RV1126_DIV_ACLK_CORE_MASK, \ ++ RV1126_DIV_ACLK_CORE_SHIFT) | \ ++ HIWORD_UPDATE(_pclk_dbg, RV1126_DIV_PCLK_DBG_MASK, \ ++ RV1126_DIV_PCLK_DBG_SHIFT), \ +} + -+#define RK1808_CPUCLK_RATE(_prate, _aclk_core, _pclk_dbg) \ ++#define RV1126_CPUCLK_RATE(_prate, _aclk_core, _pclk_dbg) \ +{ \ + .prate = _prate, \ + .divs = { \ -+ RK1808_CLKSEL0(_aclk_core, _pclk_dbg), \ ++ RV1126_CLKSEL1(_aclk_core, _pclk_dbg), \ + }, \ +} + -+static struct rockchip_cpuclk_rate_table rk1808_cpuclk_rates[] __initdata = { -+ RK1808_CPUCLK_RATE(1608000000, 1, 7), -+ RK1808_CPUCLK_RATE(1512000000, 1, 7), -+ RK1808_CPUCLK_RATE(1488000000, 1, 5), -+ RK1808_CPUCLK_RATE(1416000000, 1, 5), -+ RK1808_CPUCLK_RATE(1392000000, 1, 5), -+ RK1808_CPUCLK_RATE(1296000000, 1, 5), -+ RK1808_CPUCLK_RATE(1200000000, 1, 5), -+ RK1808_CPUCLK_RATE(1104000000, 1, 5), -+ RK1808_CPUCLK_RATE(1008000000, 1, 5), -+ RK1808_CPUCLK_RATE(912000000, 1, 5), -+ RK1808_CPUCLK_RATE(816000000, 1, 3), -+ RK1808_CPUCLK_RATE(696000000, 1, 3), -+ RK1808_CPUCLK_RATE(600000000, 1, 3), -+ RK1808_CPUCLK_RATE(408000000, 1, 1), -+ RK1808_CPUCLK_RATE(312000000, 1, 1), -+ RK1808_CPUCLK_RATE(216000000, 1, 1), -+ RK1808_CPUCLK_RATE(96000000, 1, 1), ++static struct rockchip_cpuclk_rate_table rv1126_cpuclk_rates[] __initdata = { ++ RV1126_CPUCLK_RATE(1608000000, 1, 7), ++ RV1126_CPUCLK_RATE(1584000000, 1, 7), ++ RV1126_CPUCLK_RATE(1560000000, 1, 7), ++ RV1126_CPUCLK_RATE(1536000000, 1, 7), ++ RV1126_CPUCLK_RATE(1512000000, 1, 7), ++ RV1126_CPUCLK_RATE(1488000000, 1, 5), ++ RV1126_CPUCLK_RATE(1464000000, 1, 5), ++ RV1126_CPUCLK_RATE(1440000000, 1, 5), ++ RV1126_CPUCLK_RATE(1416000000, 1, 5), ++ RV1126_CPUCLK_RATE(1392000000, 1, 5), ++ RV1126_CPUCLK_RATE(1368000000, 1, 5), ++ RV1126_CPUCLK_RATE(1344000000, 1, 5), ++ RV1126_CPUCLK_RATE(1320000000, 1, 5), ++ RV1126_CPUCLK_RATE(1296000000, 1, 5), ++ RV1126_CPUCLK_RATE(1272000000, 1, 5), ++ RV1126_CPUCLK_RATE(1248000000, 1, 5), ++ RV1126_CPUCLK_RATE(1224000000, 1, 5), ++ RV1126_CPUCLK_RATE(1200000000, 1, 5), ++ RV1126_CPUCLK_RATE(1104000000, 1, 5), ++ RV1126_CPUCLK_RATE(1008000000, 1, 5), ++ RV1126_CPUCLK_RATE(912000000, 1, 5), ++ RV1126_CPUCLK_RATE(816000000, 1, 3), ++ RV1126_CPUCLK_RATE(696000000, 1, 3), ++ RV1126_CPUCLK_RATE(600000000, 1, 3), ++ RV1126_CPUCLK_RATE(408000000, 1, 1), ++ RV1126_CPUCLK_RATE(312000000, 1, 1), ++ RV1126_CPUCLK_RATE(216000000, 1, 1), ++ RV1126_CPUCLK_RATE(96000000, 1, 1), +}; + -+static const struct rockchip_cpuclk_reg_data rk1808_cpuclk_data = { -+ .core_reg[0] = RK1808_CLKSEL_CON(0), ++static const struct rockchip_cpuclk_reg_data rv1126_cpuclk_data = { ++ .core_reg[0] = RV1126_CLKSEL_CON(0), + .div_core_shift[0] = 0, -+ .div_core_mask[0] = 0xf, ++ .div_core_mask[0] = 0x1f, + .num_cores = 1, -+ .mux_core_alt = 2, -+ .mux_core_main = 0, ++ .mux_core_alt = 0, ++ .mux_core_main = 2, + .mux_core_shift = 6, + .mux_core_mask = 0x3, +}; + -+PNAME(mux_pll_p) = { "xin24m", "xin32k"}; -+PNAME(mux_usb480m_p) = { "xin24m", "usb480m_phy", "xin32k" }; -+PNAME(mux_gpll_cpll_p) = { "gpll", "cpll" }; -+PNAME(mux_gpll_cpll_apll_p) = { "gpll", "cpll", "apll" }; -+PNAME(mux_npu_p) = { "clk_npu_div", "clk_npu_np5" }; -+PNAME(mux_ddr_p) = { "dpll_ddr", "gpll_ddr" }; -+PNAME(mux_cpll_gpll_npll_p) = { "cpll", "gpll", "npll" }; -+PNAME(mux_gpll_cpll_npll_p) = { "gpll", "cpll", "npll" }; -+PNAME(mux_dclk_vopraw_p) = { "dclk_vopraw_src", "dclk_vopraw_frac", "xin24m" }; -+PNAME(mux_dclk_voplite_p) = { "dclk_voplite_src", "dclk_voplite_frac", "xin24m" }; -+PNAME(mux_24m_npll_gpll_usb480m_p) = { "xin24m", "npll", "gpll", "usb480m" }; -+PNAME(mux_usb3_otg0_suspend_p) = { "xin32k", "xin24m" }; -+PNAME(mux_pcie_aux_p) = { "xin24m", "clk_pcie_src" }; -+PNAME(mux_gpll_cpll_npll_24m_p) = { "gpll", "cpll", "npll", "xin24m" }; -+PNAME(mux_sdio_p) = { "clk_sdio_div", "clk_sdio_div50" }; -+PNAME(mux_sdmmc_p) = { "clk_sdmmc_div", "clk_sdmmc_div50" }; -+PNAME(mux_emmc_p) = { "clk_emmc_div", "clk_emmc_div50" }; -+PNAME(mux_cpll_npll_ppll_p) = { "cpll", "npll", "ppll" }; -+PNAME(mux_gmac_p) = { "clk_gmac_src", "gmac_clkin" }; -+PNAME(mux_gmac_rgmii_speed_p) = { "clk_gmac_tx_src", "clk_gmac_tx_src", "clk_gmac_tx_div50", "clk_gmac_tx_div5" }; -+PNAME(mux_gmac_rmii_speed_p) = { "clk_gmac_rx_div20", "clk_gmac_rx_div2" }; -+PNAME(mux_gmac_rx_tx_p) = { "clk_gmac_rgmii_speed", "clk_gmac_rmii_speed" }; -+PNAME(mux_gpll_usb480m_cpll_npll_p) = { "gpll", "usb480m", "cpll", "npll" }; -+PNAME(mux_uart1_p) = { "clk_uart1_src", "clk_uart1_np5", "clk_uart1_frac", "xin24m" }; -+PNAME(mux_uart2_p) = { "clk_uart2_src", "clk_uart2_np5", "clk_uart2_frac", "xin24m" }; -+PNAME(mux_uart3_p) = { "clk_uart3_src", "clk_uart3_np5", "clk_uart3_frac", "xin24m" }; -+PNAME(mux_uart4_p) = { "clk_uart4_src", "clk_uart4_np5", "clk_uart4_frac", "xin24m" }; -+PNAME(mux_uart5_p) = { "clk_uart5_src", "clk_uart5_np5", "clk_uart5_frac", "xin24m" }; -+PNAME(mux_uart6_p) = { "clk_uart6_src", "clk_uart6_np5", "clk_uart6_frac", "xin24m" }; -+PNAME(mux_uart7_p) = { "clk_uart7_src", "clk_uart7_np5", "clk_uart7_frac", "xin24m" }; ++PNAME(mux_pll_p) = { "xin24m" }; ++PNAME(mux_rtc32k_p) = { "clk_pmupvtm_divout", "xin32k", "clk_osc0_div32k" }; ++PNAME(mux_clk_32k_ioe_p) = { "xin32k", "clk_rtc32k" }; ++PNAME(mux_wifi_p) = { "clk_wifi_osc0", "clk_wifi_div" }; ++PNAME(mux_uart1_p) = { "sclk_uart1_div", "sclk_uart1_fracdiv", "xin24m" }; ++PNAME(mux_xin24m_gpll_p) = { "xin24m", "gpll" }; +PNAME(mux_gpll_xin24m_p) = { "gpll", "xin24m" }; ++PNAME(mux_xin24m_32k_p) = { "xin24m", "clk_rtc32k" }; ++PNAME(mux_usbphy_otg_ref_p) = { "clk_ref12m", "xin_osc0_div2_usbphyref_otg" }; ++PNAME(mux_usbphy_host_ref_p) = { "clk_ref12m", "xin_osc0_div2_usbphyref_host" }; ++PNAME(mux_mipidsiphy_ref_p) = { "clk_ref24m", "xin_osc0_mipiphyref" }; ++PNAME(mux_usb480m_p) = { "xin24m", "usb480m_phy", "clk_rtc32k" }; ++PNAME(mux_hclk_pclk_pdbus_p) = { "gpll", "dummy_cpll" }; ++PNAME(mux_uart0_p) = { "sclk_uart0_div", "sclk_uart0_frac", "xin24m" }; ++PNAME(mux_uart2_p) = { "sclk_uart2_div", "sclk_uart2_frac", "xin24m" }; ++PNAME(mux_uart3_p) = { "sclk_uart3_div", "sclk_uart3_frac", "xin24m" }; ++PNAME(mux_uart4_p) = { "sclk_uart4_div", "sclk_uart4_frac", "xin24m" }; ++PNAME(mux_uart5_p) = { "sclk_uart5_div", "sclk_uart5_frac", "xin24m" }; ++PNAME(mux_i2s0_tx_p) = { "mclk_i2s0_tx_div", "mclk_i2s0_tx_fracdiv", "i2s0_mclkin", "xin12m" }; ++PNAME(mux_i2s0_rx_p) = { "mclk_i2s0_rx_div", "mclk_i2s0_rx_fracdiv", "i2s0_mclkin", "xin12m" }; ++PNAME(mux_i2s0_tx_out2io_p) = { "mclk_i2s0_tx", "xin12m" }; ++PNAME(mux_i2s0_rx_out2io_p) = { "mclk_i2s0_rx", "xin12m" }; ++PNAME(mux_i2s1_p) = { "mclk_i2s1_div", "mclk_i2s1_fracdiv", "i2s1_mclkin", "xin12m" }; ++PNAME(mux_i2s1_out2io_p) = { "mclk_i2s1", "xin12m" }; ++PNAME(mux_i2s2_p) = { "mclk_i2s2_div", "mclk_i2s2_fracdiv", "i2s2_mclkin", "xin12m" }; ++PNAME(mux_i2s2_out2io_p) = { "mclk_i2s2", "xin12m" }; ++PNAME(mux_audpwm_p) = { "sclk_audpwm_div", "sclk_audpwm_fracdiv", "xin24m" }; ++PNAME(mux_dclk_vop_p) = { "dclk_vop_div", "dclk_vop_fracdiv", "xin24m" }; ++PNAME(mux_aclk_pdvi_p) = { "aclk_pdvi_div", "aclk_pdvi_np5" }; ++PNAME(mux_clk_isp_p) = { "clk_isp_div", "clk_isp_np5" }; ++PNAME(mux_gpll_usb480m_p) = { "gpll", "usb480m" }; ++PNAME(mux_cif_out2io_p) = { "xin24m", "clk_cif_out2io_div", "clk_cif_out2io_fracdiv" }; ++PNAME(mux_mipicsi_out2io_p) = { "xin24m", "clk_mipicsi_out2io_div", "clk_mipicsi_out2io_fracdiv" }; ++PNAME(mux_aclk_pdispp_p) = { "aclk_pdispp_div", "aclk_pdispp_np5" }; ++PNAME(mux_clk_ispp_p) = { "clk_ispp_div", "clk_ispp_np5" }; ++PNAME(mux_usb480m_gpll_p) = { "usb480m", "gpll" }; ++PNAME(clk_gmac_src_m0_p) = { "clk_gmac_div", "clk_gmac_rgmii_m0" }; ++PNAME(clk_gmac_src_m1_p) = { "clk_gmac_div", "clk_gmac_rgmii_m1" }; ++PNAME(mux_clk_gmac_src_p) = { "clk_gmac_src_m0", "clk_gmac_src_m1" }; ++PNAME(mux_rgmii_clk_p) = { "clk_gmac_tx_div50", "clk_gmac_tx_div5", "clk_gmac_tx_src", "clk_gmac_tx_src"}; ++PNAME(mux_rmii_clk_p) = { "clk_gmac_rx_div20", "clk_gmac_rx_div2" }; ++PNAME(mux_gmac_tx_rx_p) = { "rgmii_mode_clk", "rmii_mode_clk" }; ++PNAME(mux_dpll_gpll_p) = { "dpll", "gpll" }; ++PNAME(mux_aclk_pdnpu_p) = { "aclk_pdnpu_div", "aclk_pdnpu_np5" }; ++PNAME(mux_clk_npu_p) = { "clk_npu_div", "clk_npu_np5" }; ++ ++ ++#ifndef CONFIG_ROCKCHIP_LOW_PERFORMANCE ++PNAME(mux_gpll_usb480m_cpll_xin24m_p) = { "gpll", "usb480m", "cpll", "xin24m" }; ++PNAME(mux_gpll_cpll_dpll_p) = { "gpll", "cpll", "dummy_dpll" }; ++PNAME(mux_gpll_cpll_p) = { "gpll", "cpll" }; ++PNAME(mux_gpll_cpll_usb480m_xin24m_p) = { "gpll", "cpll", "usb480m", "xin24m" }; ++PNAME(mux_cpll_gpll_p) = { "cpll", "gpll" }; +PNAME(mux_gpll_cpll_xin24m_p) = { "gpll", "cpll", "xin24m" }; -+PNAME(mux_gpll_xin24m_cpll_npll_p) = { "gpll", "xin24m", "cpll", "npll" }; -+PNAME(mux_pdm_p) = { "clk_pdm_src", "clk_pdm_frac" }; -+PNAME(mux_i2s0_8ch_tx_p) = { "clk_i2s0_8ch_tx_src", "clk_i2s0_8ch_tx_frac", "mclk_i2s0_8ch_in", "xin12m" }; -+PNAME(mux_i2s0_8ch_tx_rx_p) = { "clk_i2s0_8ch_tx_mux", "clk_i2s0_8ch_rx_mux"}; -+PNAME(mux_i2s0_8ch_tx_out_p) = { "clk_i2s0_8ch_tx", "xin12m", "clk_i2s0_8ch_rx" }; -+PNAME(mux_i2s0_8ch_rx_p) = { "clk_i2s0_8ch_rx_src", "clk_i2s0_8ch_rx_frac", "mclk_i2s0_8ch_in", "xin12m" }; -+PNAME(mux_i2s0_8ch_rx_tx_p) = { "clk_i2s0_8ch_rx_mux", "clk_i2s0_8ch_tx_mux"}; -+PNAME(mux_i2s0_8ch_rx_out_p) = { "clk_i2s0_8ch_rx", "xin12m", "clk_i2s0_8ch_tx" }; -+PNAME(mux_i2s1_2ch_p) = { "clk_i2s1_2ch_src", "clk_i2s1_2ch_frac", "mclk_i2s1_2ch_in", "xin12m" }; -+PNAME(mux_i2s1_2ch_out_p) = { "clk_i2s1_2ch", "xin12m" }; -+PNAME(mux_rtc32k_pmu_p) = { "xin32k", "pmu_pvtm_32k", "clk_rtc32k_frac" }; -+PNAME(mux_wifi_pmu_p) = { "xin24m", "clk_wifi_pmu_src" }; -+PNAME(mux_gpll_usb480m_cpll_ppll_p) = { "gpll", "usb480m", "cpll", "ppll" }; -+PNAME(mux_uart0_pmu_p) = { "clk_uart0_pmu_src", "clk_uart0_np5", "clk_uart0_frac", "xin24m" }; -+PNAME(mux_usbphy_ref_p) = { "xin24m", "clk_ref24m_pmu" }; -+PNAME(mux_mipidsiphy_ref_p) = { "xin24m", "clk_ref24m_pmu" }; -+PNAME(mux_pciephy_ref_p) = { "xin24m", "clk_pciephy_src" }; -+PNAME(mux_ppll_xin24m_p) = { "ppll", "xin24m" }; -+PNAME(mux_xin24m_32k_p) = { "xin24m", "xin32k" }; -+PNAME(mux_clk_32k_ioe_p) = { "clk_rtc32k_pmu", "xin32k" }; ++PNAME(mux_cpll_hpll_gpll_p) = { "cpll", "hpll", "gpll" }; ++PNAME(mux_cpll_gpll_hpll_p) = { "cpll", "gpll", "hpll" }; ++PNAME(mux_gpll_cpll_hpll_p) = { "gpll", "cpll", "hpll" }; ++PNAME(mux_gpll_cpll_apll_hpll_p) = { "gpll", "cpll", "dummy_apll", "hpll" }; ++#else ++PNAME(mux_gpll_usb480m_cpll_xin24m_p) = { "gpll", "usb480m", "dummy_cpll", "xin24m" }; ++PNAME(mux_gpll_cpll_dpll_p) = { "gpll", "dummy_cpll", "dummy_dpll" }; ++PNAME(mux_gpll_cpll_p) = { "gpll", "dummy_cpll" }; ++PNAME(mux_gpll_cpll_usb480m_xin24m_p) = { "gpll", "dummy_cpll", "usb480m", "xin24m" }; ++PNAME(mux_cpll_gpll_p) = { "dummy_cpll", "gpll" }; ++PNAME(mux_gpll_cpll_xin24m_p) = { "gpll", "dummy_cpll", "xin24m" }; ++PNAME(mux_cpll_hpll_gpll_p) = { "dummy_cpll", "dummy_hpll", "gpll" }; ++PNAME(mux_cpll_gpll_hpll_p) = { "dummy_cpll", "gpll", "dummy_hpll" }; ++PNAME(mux_gpll_cpll_hpll_p) = { "gpll", "dummy_cpll", "dummy_hpll" }; ++PNAME(mux_gpll_cpll_apll_hpll_p) = { "gpll", "dummy_cpll", "dummy_apll", "dummy_hpll" }; ++#endif + -+static struct rockchip_pll_clock rk1808_pll_clks[] __initdata = { -+ [apll] = PLL(pll_rk3036, PLL_APLL, "apll", mux_pll_p, -+ 0, RK1808_PLL_CON(0), -+ RK1808_MODE_CON, 0, 0, 0, rk1808_pll_rates), -+ [dpll] = PLL(pll_rk3036, PLL_DPLL, "dpll", mux_pll_p, -+ 0, RK1808_PLL_CON(8), -+ RK1808_MODE_CON, 2, 1, 0, NULL), -+ [cpll] = PLL(pll_rk3036, PLL_CPLL, "cpll", mux_pll_p, -+ 0, RK1808_PLL_CON(16), -+ RK1808_MODE_CON, 4, 2, 0, rk1808_pll_rates), -+ [gpll] = PLL(pll_rk3036, PLL_GPLL, "gpll", mux_pll_p, -+ 0, RK1808_PLL_CON(24), -+ RK1808_MODE_CON, 6, 3, 0, rk1808_pll_rates), -+ [npll] = PLL(pll_rk3036, PLL_NPLL, "npll", mux_pll_p, -+ 0, RK1808_PLL_CON(32), -+ RK1808_MODE_CON, 8, 5, 0, rk1808_pll_rates), -+ [ppll] = PLL(pll_rk3036, PLL_PPLL, "ppll", mux_pll_p, -+ 0, RK1808_PMU_PLL_CON(0), -+ RK1808_PMU_MODE_CON, 0, 4, 0, rk1808_pll_rates), ++static u32 rgmii_mux_idx[] = { 2, 3, 0, 1 }; ++ ++static struct rockchip_pll_clock rv1126_pmu_pll_clks[] __initdata = { ++ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, ++ CLK_IS_CRITICAL, RV1126_PMU_PLL_CON(0), ++ RV1126_PMU_MODE, 0, 3, 0, rv1126_pll_rates), ++}; ++ ++static struct rockchip_pll_clock rv1126_pll_clks[] __initdata = { ++ [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, ++ CLK_IGNORE_UNUSED, RV1126_PLL_CON(0), ++ RV1126_MODE_CON, 0, 0, 0, rv1126_pll_rates), ++ [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p, ++ CLK_IGNORE_UNUSED, RV1126_PLL_CON(8), ++ RV1126_MODE_CON, 2, 1, 0, NULL), ++#ifndef CONFIG_ROCKCHIP_LOW_PERFORMANCE ++ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, ++ CLK_IS_CRITICAL, RV1126_PLL_CON(16), ++ RV1126_MODE_CON, 4, 2, 0, rv1126_pll_rates), ++ [hpll] = PLL(pll_rk3328, PLL_HPLL, "hpll", mux_pll_p, ++ CLK_IS_CRITICAL, RV1126_PLL_CON(24), ++ RV1126_MODE_CON, 6, 4, 0, rv1126_pll_rates), ++#else ++ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, ++ 0, RV1126_PLL_CON(16), ++ RV1126_MODE_CON, 4, 2, 0, rv1126_pll_rates), ++ [hpll] = PLL(pll_rk3328, PLL_HPLL, "hpll", mux_pll_p, ++ 0, RV1126_PLL_CON(24), ++ RV1126_MODE_CON, 6, 4, 0, rv1126_pll_rates), ++#endif +}; + +#define MFLAGS CLK_MUX_HIWORD_MASK +#define DFLAGS CLK_DIVIDER_HIWORD_MASK +#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) + -+static struct rockchip_clk_branch rk1808_uart1_fracmux __initdata = -+ MUX(0, "clk_uart1_mux", mux_uart1_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(39), 14, 2, MFLAGS); -+ -+static struct rockchip_clk_branch rk1808_uart2_fracmux __initdata = -+ MUX(0, "clk_uart2_mux", mux_uart2_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(42), 14, 2, MFLAGS); -+ -+static struct rockchip_clk_branch rk1808_uart3_fracmux __initdata = -+ MUX(0, "clk_uart3_mux", mux_uart3_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(45), 14, 2, MFLAGS); ++static struct rockchip_clk_branch rv1126_rtc32k_fracmux __initdata = ++ MUX(CLK_RTC32K, "clk_rtc32k", mux_rtc32k_p, CLK_SET_RATE_PARENT, ++ RV1126_PMU_CLKSEL_CON(0), 7, 2, MFLAGS); + -+static struct rockchip_clk_branch rk1808_uart4_fracmux __initdata = -+ MUX(0, "clk_uart4_mux", mux_uart4_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(48), 14, 2, MFLAGS); ++static struct rockchip_clk_branch rv1126_uart1_fracmux __initdata = ++ MUX(SCLK_UART1_MUX, "sclk_uart1_mux", mux_uart1_p, CLK_SET_RATE_PARENT, ++ RV1126_PMU_CLKSEL_CON(4), 10, 2, MFLAGS); + -+static struct rockchip_clk_branch rk1808_uart5_fracmux __initdata = -+ MUX(0, "clk_uart5_mux", mux_uart5_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(51), 14, 2, MFLAGS); ++static struct rockchip_clk_branch rv1126_uart0_fracmux __initdata = ++ MUX(SCLK_UART0_MUX, "sclk_uart0_mux", mux_uart0_p, CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(10), 10, 2, MFLAGS); + -+static struct rockchip_clk_branch rk1808_uart6_fracmux __initdata = -+ MUX(0, "clk_uart6_mux", mux_uart6_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(54), 14, 2, MFLAGS); ++static struct rockchip_clk_branch rv1126_uart2_fracmux __initdata = ++ MUX(SCLK_UART2_MUX, "sclk_uart2_mux", mux_uart2_p, CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(12), 10, 2, MFLAGS); + -+static struct rockchip_clk_branch rk1808_uart7_fracmux __initdata = -+ MUX(0, "clk_uart7_mux", mux_uart7_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(57), 14, 2, MFLAGS); ++static struct rockchip_clk_branch rv1126_uart3_fracmux __initdata = ++ MUX(SCLK_UART3_MUX, "sclk_uart3_mux", mux_uart3_p, CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(14), 10, 2, MFLAGS); + -+static struct rockchip_clk_branch rk1808_dclk_vopraw_fracmux __initdata = -+ MUX(0, "dclk_vopraw_mux", mux_dclk_vopraw_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(5), 14, 2, MFLAGS); ++static struct rockchip_clk_branch rv1126_uart4_fracmux __initdata = ++ MUX(SCLK_UART4_MUX, "sclk_uart4_mux", mux_uart4_p, CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(16), 10, 2, MFLAGS); + -+static struct rockchip_clk_branch rk1808_dclk_voplite_fracmux __initdata = -+ MUX(0, "dclk_voplite_mux", mux_dclk_voplite_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(7), 14, 2, MFLAGS); ++static struct rockchip_clk_branch rv1126_uart5_fracmux __initdata = ++ MUX(SCLK_UART5_MUX, "sclk_uart5_mux", mux_uart5_p, CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(18), 10, 2, MFLAGS); + -+static struct rockchip_clk_branch rk1808_pdm_fracmux __initdata = -+ MUX(0, "clk_pdm_mux", mux_pdm_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(30), 15, 1, MFLAGS); ++static struct rockchip_clk_branch rv1126_i2s0_tx_fracmux __initdata = ++ MUX(MCLK_I2S0_TX_MUX, "mclk_i2s0_tx_mux", mux_i2s0_tx_p, CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(30), 0, 2, MFLAGS); + -+static struct rockchip_clk_branch rk1808_i2s0_8ch_tx_fracmux __initdata = -+ MUX(SCLK_I2S0_8CH_TX_MUX, "clk_i2s0_8ch_tx_mux", mux_i2s0_8ch_tx_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(32), 10, 2, MFLAGS); ++static struct rockchip_clk_branch rv1126_i2s0_rx_fracmux __initdata = ++ MUX(MCLK_I2S0_RX_MUX, "mclk_i2s0_rx_mux", mux_i2s0_rx_p, CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(30), 2, 2, MFLAGS); + -+static struct rockchip_clk_branch rk1808_i2s0_8ch_rx_fracmux __initdata = -+ MUX(SCLK_I2S0_8CH_RX_MUX, "clk_i2s0_8ch_rx_mux", mux_i2s0_8ch_rx_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(34), 10, 2, MFLAGS); ++static struct rockchip_clk_branch rv1126_i2s1_fracmux __initdata = ++ MUX(MCLK_I2S1_MUX, "mclk_i2s1_mux", mux_i2s1_p, CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(31), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk1808_i2s1_2ch_fracmux __initdata = -+ MUX(0, "clk_i2s1_2ch_mux", mux_i2s1_2ch_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(36), 10, 2, MFLAGS); ++static struct rockchip_clk_branch rv1126_i2s2_fracmux __initdata = ++ MUX(MCLK_I2S2_MUX, "mclk_i2s2_mux", mux_i2s2_p, CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(33), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk1808_rtc32k_pmu_fracmux __initdata = -+ MUX(SCLK_RTC32K_PMU, "clk_rtc32k_pmu", mux_rtc32k_pmu_p, CLK_SET_RATE_PARENT, -+ RK1808_PMU_CLKSEL_CON(0), 14, 2, MFLAGS); ++static struct rockchip_clk_branch rv1126_audpwm_fracmux __initdata = ++ MUX(SCLK_AUDPWM_MUX, "mclk_audpwm_mux", mux_audpwm_p, CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(36), 8, 2, MFLAGS); + -+static struct rockchip_clk_branch rk1808_uart0_pmu_fracmux __initdata = -+ MUX(0, "clk_uart0_pmu_mux", mux_uart0_pmu_p, CLK_SET_RATE_PARENT, -+ RK1808_PMU_CLKSEL_CON(4), 14, 2, MFLAGS); ++static struct rockchip_clk_branch rv1126_dclk_vop_fracmux __initdata = ++ MUX(DCLK_VOP_MUX, "dclk_vop_mux", mux_dclk_vop_p, CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(47), 10, 2, MFLAGS); + -+static struct rockchip_clk_branch rk1808_clk_branches[] __initdata = { -+ /* -+ * Clock-Architecture Diagram 1 -+ */ ++static struct rockchip_clk_branch rv1126_cif_out2io_fracmux __initdata = ++ MUX(CLK_CIF_OUT_MUX, "clk_cif_out2io_mux", mux_cif_out2io_p, CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(50), 14, 2, MFLAGS); + -+ MUX(USB480M, "usb480m", mux_usb480m_p, CLK_SET_RATE_PARENT, -+ RK1808_MODE_CON, 10, 2, MFLAGS), -+ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), ++static struct rockchip_clk_branch rv1126_mipicsi_out2io_fracmux __initdata = ++ MUX(CLK_MIPICSI_OUT_MUX, "clk_mipicsi_out2io_mux", mux_mipicsi_out2io_p, CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(73), 10, 2, MFLAGS); + ++static struct rockchip_clk_branch rv1126_clk_pmu_branches[] __initdata = { + /* + * Clock-Architecture Diagram 2 + */ ++ /* PD_PMU */ ++ COMPOSITE_NOMUX(PCLK_PDPMU, "pclk_pdpmu", "gpll", CLK_IS_CRITICAL, ++ RV1126_PMU_CLKSEL_CON(1), 0, 5, DFLAGS, ++ RV1126_PMU_CLKGATE_CON(0), 0, GFLAGS), + -+ GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(0), 0, GFLAGS), -+ GATE(0, "cpll_core", "cpll", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(0), 0, GFLAGS), -+ GATE(0, "gpll_core", "gpll", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE_NOMUX(0, "pclk_core_dbg", "armclk", CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(0), 8, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK1808_CLKGATE_CON(0), 3, GFLAGS), -+ COMPOSITE_NOMUX(0, "aclk_core", "armclk", CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(0), 12, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK1808_CLKGATE_CON(0), 2, GFLAGS), -+ -+ GATE(0, "clk_jtag", "jtag_clkin", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(0), 4, GFLAGS), -+ -+ GATE(SCLK_PVTM_CORE, "clk_pvtm_core", "xin24m", 0, -+ RK1808_CLKGATE_CON(0), 5, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_OSC0_DIV32K, "clk_osc0_div32k", "xin24m", CLK_IGNORE_UNUSED, ++ RV1126_PMU_CLKSEL_CON(13), 0, ++ RV1126_PMU_CLKGATE_CON(2), 9, GFLAGS, ++ &rv1126_rtc32k_fracmux), + -+ COMPOSITE_NOMUX(MSCLK_CORE_NIU, "msclk_core_niu", "gpll", CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(18), 0, 5, DFLAGS, -+ RK1808_CLKGATE_CON(0), 1, GFLAGS), ++ MUXPMUGRF(CLK_32K_IOE, "clk_32k_ioe", mux_clk_32k_ioe_p, 0, ++ RV1126_PMUGRF_SOC_CON0, 0, 1, MFLAGS), + -+ /* -+ * Clock-Architecture Diagram 3 -+ */ ++ COMPOSITE_NOMUX(CLK_WIFI_DIV, "clk_wifi_div", "gpll", 0, ++ RV1126_PMU_CLKSEL_CON(12), 0, 6, DFLAGS, ++ RV1126_PMU_CLKGATE_CON(2), 10, GFLAGS), ++ GATE(CLK_WIFI_OSC0, "clk_wifi_osc0", "xin24m", 0, ++ RV1126_PMU_CLKGATE_CON(2), 11, GFLAGS), ++ MUX(CLK_WIFI, "clk_wifi", mux_wifi_p, CLK_SET_RATE_PARENT, ++ RV1126_PMU_CLKSEL_CON(12), 8, 1, MFLAGS), + -+ COMPOSITE(ACLK_GIC_PRE, "aclk_gic_pre", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(15), 11, 1, MFLAGS, 12, 4, DFLAGS, -+ RK1808_CLKGATE_CON(1), 0, GFLAGS), -+ GATE(0, "aclk_gic_niu", "aclk_gic_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(1), 1, GFLAGS), -+ GATE(ACLK_GIC, "aclk_gic", "aclk_gic_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(1), 2, GFLAGS), -+ GATE(0, "aclk_core2gic", "aclk_gic_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(1), 3, GFLAGS), -+ GATE(0, "aclk_gic2core", "aclk_gic_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(1), 4, GFLAGS), -+ GATE(0, "aclk_spinlock", "aclk_gic_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(1), 4, GFLAGS), ++ GATE(PCLK_PMU, "pclk_pmu", "pclk_pdpmu", CLK_IGNORE_UNUSED, ++ RV1126_PMU_CLKGATE_CON(0), 1, GFLAGS), + -+ COMPOSITE(0, "aclk_vpu_pre", mux_gpll_cpll_p, 0, -+ RK1808_CLKSEL_CON(16), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK1808_CLKGATE_CON(8), 8, GFLAGS), -+ COMPOSITE_NOMUX(0, "hclk_vpu_pre", "aclk_vpu_pre", 0, -+ RK1808_CLKSEL_CON(16), 8, 4, DFLAGS, -+ RK1808_CLKGATE_CON(8), 9, GFLAGS), -+ GATE(ACLK_VPU, "aclk_vpu", "aclk_vpu_pre", 0, -+ RK1808_CLKGATE_CON(8), 12, GFLAGS), -+ GATE(0, "aclk_vpu_niu", "aclk_vpu_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(8), 10, GFLAGS), -+ GATE(HCLK_VPU, "hclk_vpu", "hclk_vpu_pre", 0, -+ RK1808_CLKGATE_CON(8), 13, GFLAGS), -+ GATE(0, "hclk_vpu_niu", "hclk_vpu_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(8), 11, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_pdpmu", 0, ++ RV1126_PMU_CLKGATE_CON(0), 11, GFLAGS), ++ COMPOSITE(SCLK_UART1_DIV, "sclk_uart1_div", mux_gpll_usb480m_cpll_xin24m_p, 0, ++ RV1126_PMU_CLKSEL_CON(4), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RV1126_PMU_CLKGATE_CON(0), 12, GFLAGS), ++ COMPOSITE_FRACMUX(SCLK_UART1_FRACDIV, "sclk_uart1_fracdiv", "sclk_uart1_div", CLK_SET_RATE_PARENT, ++ RV1126_PMU_CLKSEL_CON(5), 0, ++ RV1126_PMU_CLKGATE_CON(0), 13, GFLAGS, ++ &rv1126_uart1_fracmux), ++ GATE(SCLK_UART1, "sclk_uart1", "sclk_uart1_mux", 0, ++ RV1126_PMU_CLKGATE_CON(0), 14, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 4 -+ */ -+ COMPOSITE_NOGATE(0, "clk_npu_div", mux_gpll_cpll_p, CLK_OPS_PARENT_ENABLE, -+ RK1808_CLKSEL_CON(1), 8, 2, MFLAGS, 0, 4, DFLAGS), -+ COMPOSITE_NOGATE_HALFDIV(0, "clk_npu_np5", mux_gpll_cpll_p, CLK_OPS_PARENT_ENABLE, -+ RK1808_CLKSEL_CON(1), 10, 2, MFLAGS, 4, 4, DFLAGS), -+ MUX(0, "clk_npu_pre", mux_npu_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(1), 15, 1, MFLAGS), -+ FACTOR(0, "clk_npu_scan", "clk_npu_pre", 0, 1, 2), -+ GATE(SCLK_NPU, "clk_npu", "clk_npu_pre", 0, -+ RK1808_CLKGATE_CON(1), 10, GFLAGS), ++ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_pdpmu", 0, ++ RV1126_PMU_CLKGATE_CON(0), 5, GFLAGS), ++ COMPOSITE_NOMUX(CLK_I2C0, "clk_i2c0", "gpll", 0, ++ RV1126_PMU_CLKSEL_CON(2), 0, 7, DFLAGS, ++ RV1126_PMU_CLKGATE_CON(0), 6, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_pdpmu", 0, ++ RV1126_PMU_CLKGATE_CON(0), 9, GFLAGS), ++ COMPOSITE_NOMUX(CLK_I2C2, "clk_i2c2", "gpll", 0, ++ RV1126_PMU_CLKSEL_CON(3), 0, 7, DFLAGS, ++ RV1126_PMU_CLKGATE_CON(0), 10, GFLAGS), + -+ COMPOSITE(0, "aclk_npu_pre", mux_gpll_cpll_p, 0, -+ RK1808_CLKSEL_CON(2), 14, 1, MFLAGS, 0, 4, DFLAGS, -+ RK1808_CLKGATE_CON(1), 8, GFLAGS), -+ COMPOSITE(0, "hclk_npu_pre", mux_gpll_cpll_p, 0, -+ RK1808_CLKSEL_CON(2), 15, 1, MFLAGS, 8, 4, DFLAGS, -+ RK1808_CLKGATE_CON(1), 9, GFLAGS), -+ GATE(ACLK_NPU, "aclk_npu", "aclk_npu_pre", 0, -+ RK1808_CLKGATE_CON(1), 11, GFLAGS), -+ GATE(0, "aclk_npu_niu", "aclk_npu_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(1), 13, GFLAGS), -+ COMPOSITE_NOMUX(0, "aclk_npu2mem", "aclk_npu_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(2), 4, 4, DFLAGS, -+ RK1808_CLKGATE_CON(1), 15, GFLAGS), -+ GATE(HCLK_NPU, "hclk_npu", "hclk_npu_pre", 0, -+ RK1808_CLKGATE_CON(1), 12, GFLAGS), -+ GATE(0, "hclk_npu_niu", "hclk_npu_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(1), 14, GFLAGS), ++ GATE(CLK_CAPTURE_PWM0, "clk_capture_pwm0", "xin24m", 0, ++ RV1126_PMU_CLKGATE_CON(1), 2, GFLAGS), ++ GATE(PCLK_PWM0, "pclk_pwm0", "pclk_pdpmu", 0, ++ RV1126_PMU_CLKGATE_CON(1), 0, GFLAGS), ++ COMPOSITE(CLK_PWM0, "clk_pwm0", mux_xin24m_gpll_p, 0, ++ RV1126_PMU_CLKSEL_CON(6), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RV1126_PMU_CLKGATE_CON(1), 1, GFLAGS), ++ GATE(CLK_CAPTURE_PWM1, "clk_capture_pwm1", "xin24m", 0, ++ RV1126_PMU_CLKGATE_CON(1), 5, GFLAGS), ++ GATE(PCLK_PWM1, "pclk_pwm1", "pclk_pdpmu", 0, ++ RV1126_PMU_CLKGATE_CON(1), 3, GFLAGS), ++ COMPOSITE(CLK_PWM1, "clk_pwm1", mux_xin24m_gpll_p, 0, ++ RV1126_PMU_CLKSEL_CON(6), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RV1126_PMU_CLKGATE_CON(1), 4, GFLAGS), + -+ GATE(SCLK_PVTM_NPU, "clk_pvtm_npu", "xin24m", 0, -+ RK1808_CLKGATE_CON(0), 15, GFLAGS), ++ GATE(PCLK_SPI0, "pclk_spi0", "pclk_pdpmu", 0, ++ RV1126_PMU_CLKGATE_CON(1), 11, GFLAGS), ++ COMPOSITE(CLK_SPI0, "clk_spi0", mux_gpll_xin24m_p, 0, ++ RV1126_PMU_CLKSEL_CON(9), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RV1126_PMU_CLKGATE_CON(1), 12, GFLAGS), + -+ COMPOSITE(ACLK_IMEM_PRE, "aclk_imem_pre", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(17), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK1808_CLKGATE_CON(7), 0, GFLAGS), -+ GATE(ACLK_IMEM0, "aclk_imem0", "aclk_imem_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(7), 6, GFLAGS), -+ GATE(0, "aclk_imem0_niu", "aclk_imem_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(7), 10, GFLAGS), -+ GATE(ACLK_IMEM1, "aclk_imem1", "aclk_imem_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(7), 7, GFLAGS), -+ GATE(0, "aclk_imem1_niu", "aclk_imem_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(7), 11, GFLAGS), -+ GATE(ACLK_IMEM2, "aclk_imem2", "aclk_imem_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(7), 8, GFLAGS), -+ GATE(0, "aclk_imem2_niu", "aclk_imem_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(7), 12, GFLAGS), -+ GATE(ACLK_IMEM3, "aclk_imem3", "aclk_imem_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(7), 9, GFLAGS), -+ GATE(0, "aclk_imem3_niu", "aclk_imem_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(7), 13, GFLAGS), ++ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pdpmu", 0, ++ RV1126_PMU_CLKGATE_CON(1), 9, GFLAGS), ++ COMPOSITE_NODIV(DBCLK_GPIO0, "dbclk_gpio0", mux_xin24m_32k_p, 0, ++ RV1126_PMU_CLKSEL_CON(8), 15, 1, MFLAGS, ++ RV1126_PMU_CLKGATE_CON(1), 10, GFLAGS), + -+ COMPOSITE(HSCLK_IMEM, "hsclk_imem", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(17), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(7), 5, GFLAGS), ++ GATE(PCLK_PMUPVTM, "pclk_pmupvtm", "pclk_pdpmu", 0, ++ RV1126_PMU_CLKGATE_CON(2), 6, GFLAGS), ++ GATE(CLK_PMUPVTM, "clk_pmupvtm", "xin24m", 0, ++ RV1126_PMU_CLKGATE_CON(2), 5, GFLAGS), ++ GATE(CLK_CORE_PMUPVTM, "clk_core_pmupvtm", "xin24m", 0, ++ RV1126_PMU_CLKGATE_CON(2), 7, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 5 -+ */ -+ GATE(0, "clk_ddr_mon_timer", "xin24m", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 0, GFLAGS), ++ COMPOSITE_NOMUX(CLK_REF12M, "clk_ref12m", "gpll", 0, ++ RV1126_PMU_CLKSEL_CON(7), 8, 7, DFLAGS, ++ RV1126_PMU_CLKGATE_CON(1), 15, GFLAGS), ++ GATE(0, "xin_osc0_usbphyref_otg", "xin24m", 0, ++ RV1126_PMU_CLKGATE_CON(1), 6, GFLAGS), ++ GATE(0, "xin_osc0_usbphyref_host", "xin24m", 0, ++ RV1126_PMU_CLKGATE_CON(1), 7, GFLAGS), ++ FACTOR(0, "xin_osc0_div2_usbphyref_otg", "xin_osc0_usbphyref_otg", 0, 1, 2), ++ FACTOR(0, "xin_osc0_div2_usbphyref_host", "xin_osc0_usbphyref_host", 0, 1, 2), ++ MUX(CLK_USBPHY_OTG_REF, "clk_usbphy_otg_ref", mux_usbphy_otg_ref_p, CLK_SET_RATE_PARENT, ++ RV1126_PMU_CLKSEL_CON(7), 6, 1, MFLAGS), ++ MUX(CLK_USBPHY_HOST_REF, "clk_usbphy_host_ref", mux_usbphy_host_ref_p, CLK_SET_RATE_PARENT, ++ RV1126_PMU_CLKSEL_CON(7), 7, 1, MFLAGS), + -+ GATE(0, "clk_ddr_mon", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 11, GFLAGS), -+ GATE(0, "aclk_split", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 15, GFLAGS), -+ GATE(0, "clk_ddr_msch", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 8, GFLAGS), -+ GATE(0, "clk_ddrdfi_ctl", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 3, GFLAGS), -+ GATE(0, "clk_stdby", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 13, GFLAGS), -+ GATE(0, "aclk_ddrc", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 5, GFLAGS), -+ GATE(0, "clk_core_ddrc", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 6, GFLAGS), ++ COMPOSITE_NOMUX(CLK_REF24M, "clk_ref24m", "gpll", 0, ++ RV1126_PMU_CLKSEL_CON(7), 0, 6, DFLAGS, ++ RV1126_PMU_CLKGATE_CON(1), 14, GFLAGS), ++ GATE(0, "xin_osc0_mipiphyref", "xin24m", 0, ++ RV1126_PMU_CLKGATE_CON(1), 8, GFLAGS), ++ MUX(CLK_MIPIDSIPHY_REF, "clk_mipidsiphy_ref", mux_mipidsiphy_ref_p, CLK_SET_RATE_PARENT, ++ RV1126_PMU_CLKSEL_CON(7), 15, 1, MFLAGS), + -+ GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(8), 5, GFLAGS), -+ GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(8), 6, GFLAGS), ++#ifndef CONFIG_ROCKCHIP_LOW_PERFORMANCE ++ GATE(CLK_PMU, "clk_pmu", "xin24m", CLK_IGNORE_UNUSED, ++ RV1126_PMU_CLKGATE_CON(0), 15, GFLAGS), + -+ COMPOSITE_NOGATE(SCLK_DDRCLK, "sclk_ddrc", mux_ddr_p, CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(3), 7, 1, MFLAGS, 0, 5, DFLAGS), -+ FACTOR(0, "clk_ddrphy1x_out", "sclk_ddrc", CLK_IGNORE_UNUSED, 1, 1), ++ GATE(PCLK_PMUSGRF, "pclk_pmusgrf", "pclk_pdpmu", CLK_IGNORE_UNUSED, ++ RV1126_PMU_CLKGATE_CON(0), 4, GFLAGS), ++ GATE(PCLK_PMUGRF, "pclk_pmugrf", "pclk_pdpmu", CLK_IGNORE_UNUSED, ++ RV1126_PMU_CLKGATE_CON(1), 13, GFLAGS), ++ GATE(PCLK_PMUCRU, "pclk_pmucru", "pclk_pdpmu", CLK_IGNORE_UNUSED, ++ RV1126_PMU_CLKGATE_CON(2), 4, GFLAGS), ++ GATE(PCLK_CHIPVEROTP, "pclk_chipverotp", "pclk_pdpmu", CLK_IGNORE_UNUSED, ++ RV1126_PMU_CLKGATE_CON(2), 0, GFLAGS), ++ GATE(PCLK_PDPMU_NIU, "pclk_pdpmu_niu", "pclk_pdpmu", CLK_IGNORE_UNUSED, ++ RV1126_PMU_CLKGATE_CON(0), 2, GFLAGS), + -+ COMPOSITE_NOMUX(PCLK_DDR, "pclk_ddr", "gpll", CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(3), 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(2), 1, GFLAGS), -+ GATE(PCLK_DDRMON, "pclk_ddrmon", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 10, GFLAGS), -+ GATE(PCLK_DDRC, "pclk_ddrc", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 7, GFLAGS), -+ GATE(PCLK_MSCH, "pclk_msch", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 9, GFLAGS), -+ GATE(PCLK_STDBY, "pclk_stdby", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 12, GFLAGS), -+ GATE(0, "pclk_ddr_grf", "pclk_ddr", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(2), 14, GFLAGS), -+ GATE(0, "pclk_ddrdfi_ctl", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(2), 2, GFLAGS), ++ GATE(PCLK_SCRKEYGEN, "pclk_scrkeygen", "pclk_pdpmu", 0, ++ RV1126_PMU_CLKGATE_CON(0), 7, GFLAGS), ++#endif ++}; + ++static struct rockchip_clk_branch rv1126_clk_branches[] __initdata = { + /* -+ * Clock-Architecture Diagram 6 ++ * Clock-Architecture Diagram 1 + */ ++ MUX(USB480M, "usb480m", mux_usb480m_p, CLK_SET_RATE_PARENT, ++ RV1126_MODE_CON, 10, 2, MFLAGS), ++ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), + -+ COMPOSITE(HSCLK_VIO, "hsclk_vio", mux_gpll_cpll_p, 0, -+ RK1808_CLKSEL_CON(4), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK1808_CLKGATE_CON(3), 0, GFLAGS), -+ COMPOSITE_NOMUX(LSCLK_VIO, "lsclk_vio", "hsclk_vio", 0, -+ RK1808_CLKSEL_CON(4), 8, 4, DFLAGS, -+ RK1808_CLKGATE_CON(3), 12, GFLAGS), -+ GATE(0, "hsclk_vio_niu", "hsclk_vio", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(4), 0, GFLAGS), -+ GATE(0, "lsclk_vio_niu", "lsclk_vio", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(4), 1, GFLAGS), -+ GATE(ACLK_VOPRAW, "aclk_vopraw", "hsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 2, GFLAGS), -+ GATE(HCLK_VOPRAW, "hclk_vopraw", "lsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 3, GFLAGS), -+ GATE(ACLK_VOPLITE, "aclk_voplite", "hsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 4, GFLAGS), -+ GATE(HCLK_VOPLITE, "hclk_voplite", "lsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 5, GFLAGS), -+ GATE(PCLK_DSI_TX, "pclk_dsi_tx", "lsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 6, GFLAGS), -+ GATE(PCLK_CSI_TX, "pclk_csi_tx", "lsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 7, GFLAGS), -+ GATE(ACLK_RGA, "aclk_rga", "hsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 8, GFLAGS), -+ GATE(HCLK_RGA, "hclk_rga", "lsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 9, GFLAGS), -+ GATE(ACLK_ISP, "aclk_isp", "hsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 13, GFLAGS), -+ GATE(HCLK_ISP, "hclk_isp", "lsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 14, GFLAGS), -+ GATE(ACLK_CIF, "aclk_cif", "hsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 10, GFLAGS), -+ GATE(HCLK_CIF, "hclk_cif", "lsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 11, GFLAGS), -+ GATE(PCLK_CSI2HOST, "pclk_csi2host", "lsclk_vio", 0, -+ RK1808_CLKGATE_CON(4), 12, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 3 ++ */ ++ /* PD_CORE */ ++ COMPOSITE_NOMUX(0, "pclk_dbg", "armclk", CLK_IS_CRITICAL, ++ RV1126_CLKSEL_CON(1), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RV1126_CLKGATE_CON(0), 6, GFLAGS), ++ GATE(CLK_CORE_CPUPVTM, "clk_core_cpupvtm", "armclk", 0, ++ RV1126_CLKGATE_CON(0), 12, GFLAGS), ++ GATE(PCLK_CPUPVTM, "pclk_cpupvtm", "pclk_dbg", 0, ++ RV1126_CLKGATE_CON(0), 10, GFLAGS), ++ GATE(CLK_CPUPVTM, "clk_cpupvtm", "xin24m", 0, ++ RV1126_CLKGATE_CON(0), 11, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_PDCORE_NIU, "hclk_pdcore_niu", "gpll", CLK_IGNORE_UNUSED, ++ RV1126_CLKSEL_CON(0), 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(0), 8, GFLAGS), + -+ COMPOSITE(0, "dclk_vopraw_src", mux_cpll_gpll_npll_p, 0, -+ RK1808_CLKSEL_CON(5), 10, 2, MFLAGS, 0, 8, DFLAGS, -+ RK1808_CLKGATE_CON(3), 1, GFLAGS), -+ COMPOSITE_FRACMUX(0, "dclk_vopraw_frac", "dclk_vopraw_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(6), 0, -+ RK1808_CLKGATE_CON(3), 2, GFLAGS, -+ &rk1808_dclk_vopraw_fracmux), -+ GATE(DCLK_VOPRAW, "dclk_vopraw", "dclk_vopraw_mux", 0, -+ RK1808_CLKGATE_CON(3), 3, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 4 ++ */ ++ /* PD_BUS */ ++ COMPOSITE(0, "aclk_pdbus_pre", mux_gpll_cpll_dpll_p, CLK_IS_CRITICAL, ++ RV1126_CLKSEL_CON(2), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(2), 0, GFLAGS), ++ GATE(ACLK_PDBUS, "aclk_pdbus", "aclk_pdbus_pre", CLK_IS_CRITICAL, ++ RV1126_CLKGATE_CON(2), 11, GFLAGS), ++ COMPOSITE(0, "hclk_pdbus_pre", mux_hclk_pclk_pdbus_p, CLK_IS_CRITICAL, ++ RV1126_CLKSEL_CON(2), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(2), 1, GFLAGS), ++ GATE(HCLK_PDBUS, "hclk_pdbus", "hclk_pdbus_pre", CLK_IS_CRITICAL, ++ RV1126_CLKGATE_CON(2), 12, GFLAGS), ++ COMPOSITE(0, "pclk_pdbus_pre", mux_hclk_pclk_pdbus_p, CLK_IS_CRITICAL, ++ RV1126_CLKSEL_CON(3), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(2), 2, GFLAGS), ++ GATE(PCLK_PDBUS, "pclk_pdbus", "pclk_pdbus_pre", CLK_IS_CRITICAL, ++ RV1126_CLKGATE_CON(2), 13, GFLAGS), ++ /* aclk_dmac is controlled by sgrf_clkgat_con. */ ++ SGRF_GATE(ACLK_DMAC, "aclk_dmac", "hclk_pdbus"), ++ GATE(ACLK_DCF, "aclk_dcf", "hclk_pdbus", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(3), 6, GFLAGS), ++ GATE(PCLK_DCF, "pclk_dcf", "pclk_pdbus", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(3), 7, GFLAGS), ++ GATE(PCLK_WDT, "pclk_wdt", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(6), 14, GFLAGS), ++ GATE(PCLK_MAILBOX, "pclk_mailbox", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(7), 10, GFLAGS), + -+ COMPOSITE(0, "dclk_voplite_src", mux_cpll_gpll_npll_p, 0, -+ RK1808_CLKSEL_CON(7), 10, 2, MFLAGS, 0, 8, DFLAGS, -+ RK1808_CLKGATE_CON(3), 4, GFLAGS), -+ COMPOSITE_FRACMUX(0, "dclk_voplite_frac", "dclk_voplite_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(8), 0, -+ RK1808_CLKGATE_CON(3), 5, GFLAGS, -+ &rk1808_dclk_voplite_fracmux), -+ GATE(DCLK_VOPLITE, "dclk_voplite", "dclk_voplite_mux", 0, -+ RK1808_CLKGATE_CON(3), 6, GFLAGS), ++ COMPOSITE(CLK_SCR1, "clk_scr1", mux_gpll_cpll_p, 0, ++ RV1126_CLKSEL_CON(3), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(4), 7, GFLAGS), ++ GATE(0, "clk_scr1_niu", "clk_scr1", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(2), 14, GFLAGS), ++ GATE(CLK_SCR1_CORE, "clk_scr1_core", "clk_scr1", 0, ++ RV1126_CLKGATE_CON(4), 8, GFLAGS), ++ GATE(CLK_SCR1_RTC, "clk_scr1_rtc", "xin24m", 0, ++ RV1126_CLKGATE_CON(4), 9, GFLAGS), ++ GATE(CLK_SCR1_JTAG, "clk_scr1_jtag", "clk_scr1_jtag_io", 0, ++ RV1126_CLKGATE_CON(4), 10, GFLAGS), + -+ COMPOSITE_NOMUX(SCLK_TXESC, "clk_txesc", "gpll", 0, -+ RK1808_CLKSEL_CON(9), 0, 12, DFLAGS, -+ RK1808_CLKGATE_CON(3), 7, GFLAGS), ++ GATE(PCLK_UART0, "pclk_uart0", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(5), 0, GFLAGS), ++ COMPOSITE(SCLK_UART0_DIV, "sclk_uart0_div", mux_gpll_cpll_usb480m_xin24m_p, 0, ++ RV1126_CLKSEL_CON(10), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RV1126_CLKGATE_CON(5), 1, GFLAGS), ++ COMPOSITE_FRACMUX(SCLK_UART0_FRAC, "sclk_uart0_frac", "sclk_uart0_div", CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(11), 0, ++ RV1126_CLKGATE_CON(5), 2, GFLAGS, ++ &rv1126_uart0_fracmux), ++ GATE(SCLK_UART0, "sclk_uart0", "sclk_uart0_mux", 0, ++ RV1126_CLKGATE_CON(5), 3, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(5), 4, GFLAGS), ++ COMPOSITE(SCLK_UART2_DIV, "sclk_uart2_div", mux_gpll_cpll_usb480m_xin24m_p, 0, ++ RV1126_CLKSEL_CON(12), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RV1126_CLKGATE_CON(5), 5, GFLAGS), ++ COMPOSITE_FRACMUX(SCLK_UART2_FRAC, "sclk_uart2_frac", "sclk_uart2_div", CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(13), 0, ++ RV1126_CLKGATE_CON(5), 6, GFLAGS, ++ &rv1126_uart2_fracmux), ++ GATE(SCLK_UART2, "sclk_uart2", "sclk_uart2_mux", 0, ++ RV1126_CLKGATE_CON(5), 7, GFLAGS), ++ GATE(PCLK_UART3, "pclk_uart3", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(5), 8, GFLAGS), ++ COMPOSITE(SCLK_UART3_DIV, "sclk_uart3_div", mux_gpll_cpll_usb480m_xin24m_p, 0, ++ RV1126_CLKSEL_CON(14), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RV1126_CLKGATE_CON(5), 9, GFLAGS), ++ COMPOSITE_FRACMUX(SCLK_UART3_FRAC, "sclk_uart3_frac", "sclk_uart3_div", CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(15), 0, ++ RV1126_CLKGATE_CON(5), 10, GFLAGS, ++ &rv1126_uart3_fracmux), ++ GATE(SCLK_UART3, "sclk_uart3", "sclk_uart3_mux", 0, ++ RV1126_CLKGATE_CON(5), 11, GFLAGS), ++ GATE(PCLK_UART4, "pclk_uart4", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(5), 12, GFLAGS), ++ COMPOSITE(SCLK_UART4_DIV, "sclk_uart4_div", mux_gpll_cpll_usb480m_xin24m_p, 0, ++ RV1126_CLKSEL_CON(16), 8, 2, MFLAGS, 0, 7, ++ DFLAGS, RV1126_CLKGATE_CON(5), 13, GFLAGS), ++ COMPOSITE_FRACMUX(SCLK_UART4_FRAC, "sclk_uart4_frac", "sclk_uart4_div", CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(17), 0, ++ RV1126_CLKGATE_CON(5), 14, GFLAGS, ++ &rv1126_uart4_fracmux), ++ GATE(SCLK_UART4, "sclk_uart4", "sclk_uart4_mux", 0, ++ RV1126_CLKGATE_CON(5), 15, GFLAGS), ++ GATE(PCLK_UART5, "pclk_uart5", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(6), 0, GFLAGS), ++ COMPOSITE(SCLK_UART5_DIV, "sclk_uart5_div", mux_gpll_cpll_usb480m_xin24m_p, 0, ++ RV1126_CLKSEL_CON(18), 8, 2, MFLAGS, 0, 7, ++ DFLAGS, RV1126_CLKGATE_CON(6), 1, GFLAGS), ++ COMPOSITE_FRACMUX(SCLK_UART5_FRAC, "sclk_uart5_frac", "sclk_uart5_div", CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(19), 0, ++ RV1126_CLKGATE_CON(6), 2, GFLAGS, ++ &rv1126_uart5_fracmux), ++ GATE(SCLK_UART5, "sclk_uart5", "sclk_uart5_mux", 0, ++ RV1126_CLKGATE_CON(6), 3, GFLAGS), + -+ COMPOSITE(SCLK_RGA, "clk_rga", mux_gpll_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(10), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK1808_CLKGATE_CON(3), 8, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(3), 10, GFLAGS), ++ COMPOSITE_NOMUX(CLK_I2C1, "clk_i2c1", "gpll", 0, ++ RV1126_CLKSEL_CON(5), 0, 7, DFLAGS, ++ RV1126_CLKGATE_CON(3), 11, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(3), 12, GFLAGS), ++ COMPOSITE_NOMUX(CLK_I2C3, "clk_i2c3", "gpll", 0, ++ RV1126_CLKSEL_CON(5), 8, 7, DFLAGS, ++ RV1126_CLKGATE_CON(3), 13, GFLAGS), ++ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(3), 14, GFLAGS), ++ COMPOSITE_NOMUX(CLK_I2C4, "clk_i2c4", "gpll", 0, ++ RV1126_CLKSEL_CON(6), 0, 7, DFLAGS, ++ RV1126_CLKGATE_CON(3), 15, GFLAGS), ++ GATE(PCLK_I2C5, "pclk_i2c5", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(4), 0, GFLAGS), ++ COMPOSITE_NOMUX(CLK_I2C5, "clk_i2c5", "gpll", 0, ++ RV1126_CLKSEL_CON(6), 8, 7, DFLAGS, ++ RV1126_CLKGATE_CON(4), 1, GFLAGS), + -+ COMPOSITE(SCLK_ISP, "clk_isp", mux_gpll_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(10), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(3), 10, GFLAGS), ++ GATE(PCLK_SPI1, "pclk_spi1", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(4), 2, GFLAGS), ++ COMPOSITE(CLK_SPI1, "clk_spi1", mux_gpll_xin24m_p, 0, ++ RV1126_CLKSEL_CON(8), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RV1126_CLKGATE_CON(4), 3, GFLAGS), + -+ COMPOSITE(DCLK_CIF, "dclk_cif", mux_cpll_gpll_npll_p, 0, -+ RK1808_CLKSEL_CON(11), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(3), 11, GFLAGS), ++ GATE(CLK_CAPTURE_PWM2, "clk_capture_pwm2", "xin24m", 0, ++ RV1126_CLKGATE_CON(4), 6, GFLAGS), ++ GATE(PCLK_PWM2, "pclk_pwm2", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(4), 4, GFLAGS), ++ COMPOSITE(CLK_PWM2, "clk_pwm2", mux_xin24m_gpll_p, 0, ++ RV1126_CLKSEL_CON(9), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RV1126_CLKGATE_CON(4), 5, GFLAGS), + -+ COMPOSITE(SCLK_CIF_OUT, "clk_cif_out", mux_24m_npll_gpll_usb480m_p, 0, -+ RK1808_CLKSEL_CON(11), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK1808_CLKGATE_CON(3), 9, GFLAGS), ++ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(7), 0, GFLAGS), ++ COMPOSITE_NODIV(DBCLK_GPIO1, "dbclk_gpio1", mux_xin24m_32k_p, 0, ++ RV1126_CLKSEL_CON(21), 15, 1, MFLAGS, ++ RV1126_CLKGATE_CON(7), 1, GFLAGS), ++ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(7), 2, GFLAGS), ++ COMPOSITE_NODIV(DBCLK_GPIO2, "dbclk_gpio2", mux_xin24m_32k_p, 0, ++ RV1126_CLKSEL_CON(22), 15, 1, MFLAGS, ++ RV1126_CLKGATE_CON(7), 3, GFLAGS), ++ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(7), 4, GFLAGS), ++ COMPOSITE_NODIV(DBCLK_GPIO3, "dbclk_gpio3", mux_xin24m_32k_p, 0, ++ RV1126_CLKSEL_CON(23), 15, 1, MFLAGS, ++ RV1126_CLKGATE_CON(7), 5, GFLAGS), ++ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(7), 6, GFLAGS), ++ COMPOSITE_NODIV(DBCLK_GPIO4, "dbclk_gpio4", mux_xin24m_32k_p, 0, ++ RV1126_CLKSEL_CON(24), 15, 1, MFLAGS, ++ RV1126_CLKGATE_CON(7), 7, GFLAGS), + -+ /* -+ * Clock-Architecture Diagram 7 -+ */ ++ GATE(PCLK_SARADC, "pclk_saradc", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(6), 4, GFLAGS), ++ COMPOSITE_NOMUX(CLK_SARADC, "clk_saradc", "xin24m", 0, ++ RV1126_CLKSEL_CON(20), 0, 11, DFLAGS, ++ RV1126_CLKGATE_CON(6), 5, GFLAGS), + -+ /* PD_PCIE */ -+ COMPOSITE_NODIV(0, "clk_pcie_src", mux_gpll_cpll_p, 0, -+ RK1808_CLKSEL_CON(12), 15, 1, MFLAGS, -+ RK1808_CLKGATE_CON(5), 0, GFLAGS), -+ DIV(HSCLK_PCIE, "hsclk_pcie", "clk_pcie_src", 0, -+ RK1808_CLKSEL_CON(12), 0, 5, DFLAGS), -+ DIV(LSCLK_PCIE, "lsclk_pcie", "clk_pcie_src", 0, -+ RK1808_CLKSEL_CON(12), 8, 5, DFLAGS), -+ GATE(0, "hsclk_pcie_niu", "hsclk_pcie", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(6), 0, GFLAGS), -+ GATE(0, "lsclk_pcie_niu", "lsclk_pcie", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(6), 1, GFLAGS), -+ GATE(0, "pclk_pcie_grf", "lsclk_pcie", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(6), 5, GFLAGS), -+ GATE(ACLK_USB3OTG, "aclk_usb3otg", "hsclk_pcie", 0, -+ RK1808_CLKGATE_CON(6), 6, GFLAGS), -+ GATE(HCLK_HOST, "hclk_host", "lsclk_pcie", 0, -+ RK1808_CLKGATE_CON(6), 7, GFLAGS), -+ GATE(HCLK_HOST_ARB, "hclk_host_arb", "lsclk_pcie", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(6), 8, GFLAGS), ++ GATE(PCLK_TIMER, "pclk_timer", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(6), 7, GFLAGS), ++ GATE(CLK_TIMER0, "clk_timer0", "xin24m", 0, ++ RV1126_CLKGATE_CON(6), 8, GFLAGS), ++ GATE(CLK_TIMER1, "clk_timer1", "xin24m", 0, ++ RV1126_CLKGATE_CON(6), 9, GFLAGS), ++ GATE(CLK_TIMER2, "clk_timer2", "xin24m", 0, ++ RV1126_CLKGATE_CON(6), 10, GFLAGS), ++ GATE(CLK_TIMER3, "clk_timer3", "xin24m", 0, ++ RV1126_CLKGATE_CON(6), 11, GFLAGS), ++ GATE(CLK_TIMER4, "clk_timer4", "xin24m", 0, ++ RV1126_CLKGATE_CON(6), 12, GFLAGS), ++ GATE(CLK_TIMER5, "clk_timer5", "xin24m", 0, ++ RV1126_CLKGATE_CON(6), 13, GFLAGS), + -+ COMPOSITE(ACLK_PCIE, "aclk_pcie", mux_gpll_cpll_p, 0, -+ RK1808_CLKSEL_CON(15), 8, 1, MFLAGS, 0, 4, DFLAGS, -+ RK1808_CLKGATE_CON(5), 5, GFLAGS), -+ DIV(0, "pclk_pcie_pre", "aclk_pcie", 0, -+ RK1808_CLKSEL_CON(15), 4, 4, DFLAGS), -+ GATE(0, "aclk_pcie_niu", "aclk_pcie", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(6), 10, GFLAGS), -+ GATE(ACLK_PCIE_MST, "aclk_pcie_mst", "aclk_pcie", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(6), 2, GFLAGS), -+ GATE(ACLK_PCIE_SLV, "aclk_pcie_slv", "aclk_pcie", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(6), 3, GFLAGS), -+ GATE(0, "pclk_pcie_niu", "pclk_pcie_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(6), 11, GFLAGS), -+ GATE(0, "pclk_pcie_dbi", "pclk_pcie_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(6), 4, GFLAGS), -+ GATE(PCLK_PCIE, "pclk_pcie", "pclk_pcie_pre", 0, -+ RK1808_CLKGATE_CON(6), 9, GFLAGS), ++ GATE(ACLK_SPINLOCK, "aclk_spinlock", "hclk_pdbus", 0, ++ RV1126_CLKGATE_CON(6), 6, GFLAGS), + -+ COMPOSITE(0, "clk_pcie_aux_src", mux_cpll_gpll_npll_p, 0, -+ RK1808_CLKSEL_CON(14), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(5), 3, GFLAGS), -+ COMPOSITE_NODIV(SCLK_PCIE_AUX, "clk_pcie_aux", mux_pcie_aux_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(14), 12, 1, MFLAGS, -+ RK1808_CLKGATE_CON(5), 4, GFLAGS), ++ GATE(ACLK_DECOM, "aclk_decom", "aclk_pdbus", 0, ++ RV1126_CLKGATE_CON(7), 11, GFLAGS), ++ GATE(PCLK_DECOM, "pclk_decom", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(7), 12, GFLAGS), ++ COMPOSITE(DCLK_DECOM, "dclk_decom", mux_gpll_cpll_p, 0, ++ RV1126_CLKSEL_CON(25), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RV1126_CLKGATE_CON(7), 13, GFLAGS), + -+ GATE(SCLK_USB3_OTG0_REF, "clk_usb3_otg0_ref", "xin24m", 0, -+ RK1808_CLKGATE_CON(5), 1, GFLAGS), ++ GATE(PCLK_CAN, "pclk_can", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(7), 8, GFLAGS), ++ COMPOSITE(CLK_CAN, "clk_can", mux_gpll_xin24m_p, 0, ++ RV1126_CLKSEL_CON(25), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RV1126_CLKGATE_CON(7), 9, GFLAGS), ++ /* pclk_otp and clk_otp are controlled by sgrf_clkgat_con. */ ++ SGRF_GATE(CLK_OTP, "clk_otp", "xin24m"), ++ SGRF_GATE(PCLK_OTP, "pclk_otp", "pclk_pdbus"), + -+ COMPOSITE(SCLK_USB3_OTG0_SUSPEND, "clk_usb3_otg0_suspend", mux_usb3_otg0_suspend_p, 0, -+ RK1808_CLKSEL_CON(13), 12, 1, MFLAGS, 0, 10, DFLAGS, -+ RK1808_CLKGATE_CON(5), 2, GFLAGS), ++ GATE(PCLK_NPU_TSADC, "pclk_npu_tsadc", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(24), 3, GFLAGS), ++ COMPOSITE_NOMUX(CLK_NPU_TSADC, "clk_npu_tsadc", "xin24m", 0, ++ RV1126_CLKSEL_CON(71), 0, 11, DFLAGS, ++ RV1126_CLKGATE_CON(24), 4, GFLAGS), ++ GATE(CLK_NPU_TSADCPHY, "clk_npu_tsadcphy", "clk_npu_tsadc", 0, ++ RV1126_CLKGATE_CON(24), 5, GFLAGS), ++ GATE(PCLK_CPU_TSADC, "pclk_cpu_tsadc", "pclk_pdbus", 0, ++ RV1126_CLKGATE_CON(24), 0, GFLAGS), ++ COMPOSITE_NOMUX(CLK_CPU_TSADC, "clk_cpu_tsadc", "xin24m", 0, ++ RV1126_CLKSEL_CON(70), 0, 11, DFLAGS, ++ RV1126_CLKGATE_CON(24), 1, GFLAGS), ++ GATE(CLK_CPU_TSADCPHY, "clk_cpu_tsadcphy", "clk_cpu_tsadc", 0, ++ RV1126_CLKGATE_CON(24), 2, GFLAGS), + + /* -+ * Clock-Architecture Diagram 8 ++ * Clock-Architecture Diagram 5 + */ ++ /* PD_CRYPTO */ ++ COMPOSITE(ACLK_PDCRYPTO, "aclk_pdcrypto", mux_gpll_cpll_p, 0, ++ RV1126_CLKSEL_CON(4), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(4), 11, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_PDCRYPTO, "hclk_pdcrypto", "aclk_pdcrypto", 0, ++ RV1126_CLKSEL_CON(4), 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(4), 12, GFLAGS), ++ GATE(ACLK_CRYPTO, "aclk_crypto", "aclk_pdcrypto", 0, ++ RV1126_CLKGATE_CON(3), 2, GFLAGS), ++ GATE(HCLK_CRYPTO, "hclk_crypto", "hclk_pdcrypto", 0, ++ RV1126_CLKGATE_CON(3), 3, GFLAGS), ++ COMPOSITE(CLK_CRYPTO_CORE, "aclk_crypto_core", mux_gpll_cpll_p, 0, ++ RV1126_CLKSEL_CON(7), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(3), 4, GFLAGS), ++ COMPOSITE(CLK_CRYPTO_PKA, "aclk_crypto_pka", mux_gpll_cpll_p, 0, ++ RV1126_CLKSEL_CON(7), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(3), 5, GFLAGS), + -+ /* PD_PHP */ -+ -+ COMPOSITE_NODIV(0, "clk_peri_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(19), 15, 1, MFLAGS, -+ RK1808_CLKGATE_CON(8), 0, GFLAGS), -+ COMPOSITE_NOMUX(MSCLK_PERI, "msclk_peri", "clk_peri_src", CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(19), 0, 5, DFLAGS, -+ RK1808_CLKGATE_CON(8), 1, GFLAGS), -+ COMPOSITE_NOMUX(LSCLK_PERI, "lsclk_peri", "clk_peri_src", CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(19), 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(8), 2, GFLAGS), -+ GATE(0, "msclk_peri_niu", "msclk_peri", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(8), 3, GFLAGS), -+ GATE(0, "lsclk_peri_niu", "lsclk_peri", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(8), 4, GFLAGS), -+ -+ /* PD_MMC */ -+ -+ GATE(0, "hclk_mmc_sfc", "msclk_peri", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(9), 0, GFLAGS), -+ GATE(0, "hclk_mmc_sfc_niu", "hclk_mmc_sfc", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(9), 11, GFLAGS), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_mmc_sfc", 0, -+ RK1808_CLKGATE_CON(9), 12, GFLAGS), -+ GATE(HCLK_SFC, "hclk_sfc", "hclk_mmc_sfc", 0, -+ RK1808_CLKGATE_CON(9), 13, GFLAGS), -+ -+ COMPOSITE(SCLK_SDIO_DIV, "clk_sdio_div", mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(22), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ RK1808_CLKGATE_CON(9), 1, GFLAGS), -+ COMPOSITE_DIV_OFFSET(SCLK_SDIO_DIV50, "clk_sdio_div50", -+ mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(22), 14, 2, MFLAGS, -+ RK1808_CLKSEL_CON(23), 0, 8, DFLAGS, -+ RK1808_CLKGATE_CON(9), 2, GFLAGS), -+ COMPOSITE_NODIV(SCLK_SDIO, "clk_sdio", mux_sdio_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK1808_CLKSEL_CON(23), 15, 1, MFLAGS, -+ RK1808_CLKGATE_CON(9), 3, GFLAGS), -+ -+ MMC(SCLK_SDIO_DRV, "sdio_drv", "clk_sdio", RK1808_SDIO_CON0, 1), -+ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "clk_sdio", RK1808_SDIO_CON1, 1), ++ /* ++ * Clock-Architecture Diagram 6 ++ */ ++ /* PD_AUDIO */ ++ COMPOSITE_NOMUX(HCLK_PDAUDIO, "hclk_pdaudio", "gpll", 0, ++ RV1126_CLKSEL_CON(26), 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(9), 0, GFLAGS), + -+ COMPOSITE(SCLK_EMMC_DIV, "clk_emmc_div", -+ mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(24), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ RK1808_CLKGATE_CON(9), 4, GFLAGS), -+ COMPOSITE_DIV_OFFSET(SCLK_EMMC_DIV50, "clk_emmc_div50", mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(24), 14, 2, MFLAGS, -+ RK1808_CLKSEL_CON(25), 0, 8, DFLAGS, -+ RK1808_CLKGATE_CON(9), 5, GFLAGS), -+ COMPOSITE_NODIV(SCLK_EMMC, "clk_emmc", mux_emmc_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK1808_CLKSEL_CON(25), 15, 1, MFLAGS, -+ RK1808_CLKGATE_CON(9), 6, GFLAGS), -+ MMC(SCLK_EMMC_DRV, "emmc_drv", "clk_emmc", RK1808_EMMC_CON0, 1), -+ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "clk_emmc", RK1808_EMMC_CON1, 1), ++ GATE(HCLK_I2S0, "hclk_i2s0", "hclk_pdaudio", 0, ++ RV1126_CLKGATE_CON(9), 4, GFLAGS), ++ COMPOSITE(MCLK_I2S0_TX_DIV, "mclk_i2s0_tx_div", mux_cpll_gpll_p, 0, ++ RV1126_CLKSEL_CON(27), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RV1126_CLKGATE_CON(9), 5, GFLAGS), ++ COMPOSITE_FRACMUX(MCLK_I2S0_TX_FRACDIV, "mclk_i2s0_tx_fracdiv", "mclk_i2s0_tx_div", CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(28), 0, ++ RV1126_CLKGATE_CON(9), 6, GFLAGS, ++ &rv1126_i2s0_tx_fracmux), ++ GATE(MCLK_I2S0_TX, "mclk_i2s0_tx", "mclk_i2s0_tx_mux", 0, ++ RV1126_CLKGATE_CON(9), 9, GFLAGS), ++ COMPOSITE(MCLK_I2S0_RX_DIV, "mclk_i2s0_rx_div", mux_cpll_gpll_p, 0, ++ RV1126_CLKSEL_CON(27), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RV1126_CLKGATE_CON(9), 7, GFLAGS), ++ COMPOSITE_FRACMUX(MCLK_I2S0_RX_FRACDIV, "mclk_i2s0_rx_fracdiv", "mclk_i2s0_rx_div", CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(29), 0, ++ RV1126_CLKGATE_CON(9), 8, GFLAGS, ++ &rv1126_i2s0_rx_fracmux), ++ GATE(MCLK_I2S0_RX, "mclk_i2s0_rx", "mclk_i2s0_rx_mux", 0, ++ RV1126_CLKGATE_CON(9), 10, GFLAGS), ++ COMPOSITE_NODIV(MCLK_I2S0_TX_OUT2IO, "mclk_i2s0_tx_out2io", mux_i2s0_tx_out2io_p, CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(30), 6, 1, MFLAGS, ++ RV1126_CLKGATE_CON(9), 13, GFLAGS), ++ COMPOSITE_NODIV(MCLK_I2S0_RX_OUT2IO, "mclk_i2s0_rx_out2io", mux_i2s0_rx_out2io_p, CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(30), 8, 1, MFLAGS, ++ RV1126_CLKGATE_CON(9), 14, GFLAGS), + -+ COMPOSITE(SCLK_SDMMC_DIV, "clk_sdmmc_div", mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(20), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ RK1808_CLKGATE_CON(9), 7, GFLAGS), -+ COMPOSITE_DIV_OFFSET(SCLK_SDMMC_DIV50, "clk_sdmmc_div50", -+ mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, -+ RK1808_CLKSEL_CON(20), 14, 2, MFLAGS, -+ RK1808_CLKSEL_CON(21), 0, 8, DFLAGS, -+ RK1808_CLKGATE_CON(9), 8, GFLAGS), -+ COMPOSITE_NODIV(SCLK_SDMMC, "clk_sdmmc", mux_sdmmc_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK1808_CLKSEL_CON(21), 15, 1, MFLAGS, -+ RK1808_CLKGATE_CON(9), 9, GFLAGS), -+ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "clk_sdmmc", RK1808_SDMMC_CON0, 1), -+ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "clk_sdmmc", RK1808_SDMMC_CON1, 1), ++ GATE(HCLK_I2S1, "hclk_i2s1", "hclk_pdaudio", 0, ++ RV1126_CLKGATE_CON(10), 0, GFLAGS), ++ COMPOSITE(MCLK_I2S1_DIV, "mclk_i2s1_div", mux_cpll_gpll_p, 0, ++ RV1126_CLKSEL_CON(31), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RV1126_CLKGATE_CON(10), 1, GFLAGS), ++ COMPOSITE_FRACMUX(MCLK_I2S1_FRACDIV, "mclk_i2s1_fracdiv", "mclk_i2s1_div", CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(32), 0, ++ RV1126_CLKGATE_CON(10), 2, GFLAGS, ++ &rv1126_i2s1_fracmux), ++ GATE(MCLK_I2S1, "mclk_i2s1", "mclk_i2s1_mux", 0, ++ RV1126_CLKGATE_CON(10), 3, GFLAGS), ++ COMPOSITE_NODIV(MCLK_I2S1_OUT2IO, "mclk_i2s1_out2io", mux_i2s1_out2io_p, CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(31), 12, 1, MFLAGS, ++ RV1126_CLKGATE_CON(10), 4, GFLAGS), ++ GATE(HCLK_I2S2, "hclk_i2s2", "hclk_pdaudio", 0, ++ RV1126_CLKGATE_CON(10), 5, GFLAGS), ++ COMPOSITE(MCLK_I2S2_DIV, "mclk_i2s2_div", mux_cpll_gpll_p, 0, ++ RV1126_CLKSEL_CON(33), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RV1126_CLKGATE_CON(10), 6, GFLAGS), ++ COMPOSITE_FRACMUX(MCLK_I2S2_FRACDIV, "mclk_i2s2_fracdiv", "mclk_i2s2_div", CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(34), 0, ++ RV1126_CLKGATE_CON(10), 7, GFLAGS, ++ &rv1126_i2s2_fracmux), ++ GATE(MCLK_I2S2, "mclk_i2s2", "mclk_i2s2_mux", 0, ++ RV1126_CLKGATE_CON(10), 8, GFLAGS), ++ COMPOSITE_NODIV(MCLK_I2S2_OUT2IO, "mclk_i2s2_out2io", mux_i2s2_out2io_p, CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(33), 10, 1, MFLAGS, ++ RV1126_CLKGATE_CON(10), 9, GFLAGS), + -+ COMPOSITE(SCLK_SFC, "clk_sfc", mux_gpll_cpll_p, 0, -+ RK1808_CLKSEL_CON(26), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(9), 10, GFLAGS), ++ GATE(HCLK_PDM, "hclk_pdm", "hclk_pdaudio", 0, ++ RV1126_CLKGATE_CON(10), 10, GFLAGS), ++ COMPOSITE(MCLK_PDM, "mclk_pdm", mux_gpll_cpll_xin24m_p, 0, ++ RV1126_CLKSEL_CON(35), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RV1126_CLKGATE_CON(10), 11, GFLAGS), + -+ /* PD_MAC */ ++ GATE(HCLK_AUDPWM, "hclk_audpwm", "hclk_pdaudio", 0, ++ RV1126_CLKGATE_CON(10), 12, GFLAGS), ++ COMPOSITE(SCLK_ADUPWM_DIV, "sclk_audpwm_div", mux_gpll_cpll_p, 0, ++ RV1126_CLKSEL_CON(36), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RV1126_CLKGATE_CON(10), 13, GFLAGS), ++ COMPOSITE_FRACMUX(SCLK_AUDPWM_FRACDIV, "sclk_audpwm_fracdiv", "sclk_audpwm_div", CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(37), 0, ++ RV1126_CLKGATE_CON(10), 14, GFLAGS, ++ &rv1126_audpwm_fracmux), ++ GATE(SCLK_AUDPWM, "sclk_audpwm", "mclk_audpwm_mux", 0, ++ RV1126_CLKGATE_CON(10), 15, GFLAGS), + -+ GATE(0, "pclk_sd_gmac", "lsclk_peri", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(10), 2, GFLAGS), -+ GATE(0, "aclk_sd_gmac", "msclk_peri", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(10), 0, GFLAGS), -+ GATE(0, "hclk_sd_gmac", "msclk_peri", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(10), 1, GFLAGS), -+ GATE(0, "pclk_gmac_niu", "pclk_sd_gmac", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(10), 10, GFLAGS), -+ GATE(PCLK_GMAC, "pclk_gmac", "pclk_sd_gmac", 0, -+ RK1808_CLKGATE_CON(10), 12, GFLAGS), -+ GATE(0, "aclk_gmac_niu", "aclk_sd_gmac", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(10), 8, GFLAGS), -+ GATE(ACLK_GMAC, "aclk_gmac", "aclk_sd_gmac", 0, -+ RK1808_CLKGATE_CON(10), 11, GFLAGS), -+ GATE(0, "hclk_gmac_niu", "hclk_sd_gmac", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(10), 9, GFLAGS), -+ GATE(HCLK_SDIO, "hclk_sdio", "hclk_sd_gmac", 0, -+ RK1808_CLKGATE_CON(10), 13, GFLAGS), -+ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_sd_gmac", 0, -+ RK1808_CLKGATE_CON(10), 14, GFLAGS), ++ GATE(PCLK_ACDCDIG, "pclk_acdcdig", "hclk_pdaudio", 0, ++ RV1126_CLKGATE_CON(11), 0, GFLAGS), ++ GATE(CLK_ACDCDIG_ADC, "clk_acdcdig_adc", "mclk_i2s0_rx", 0, ++ RV1126_CLKGATE_CON(11), 2, GFLAGS), ++ GATE(CLK_ACDCDIG_DAC, "clk_acdcdig_dac", "mclk_i2s0_tx", 0, ++ RV1126_CLKGATE_CON(11), 3, GFLAGS), ++ COMPOSITE(CLK_ACDCDIG_I2C, "clk_acdcdig_i2c", mux_gpll_xin24m_p, 0, ++ RV1126_CLKSEL_CON(72), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RV1126_CLKGATE_CON(11), 1, GFLAGS), + -+ COMPOSITE(SCLK_GMAC_OUT, "clk_gmac_out", mux_cpll_npll_ppll_p, 0, -+ RK1808_CLKSEL_CON(18), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(10), 15, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 7 ++ */ ++ /* PD_VEPU */ ++ COMPOSITE(ACLK_PDVEPU, "aclk_pdvepu", mux_cpll_hpll_gpll_p, 0, ++ RV1126_CLKSEL_CON(40), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(12), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_PDVEPU, "hclk_pdvepu", "aclk_pdvepu", 0, ++ RV1126_CLKSEL_CON(41), 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(12), 2, GFLAGS), ++ GATE(ACLK_VENC, "aclk_venc", "aclk_pdvepu", 0, ++ RV1126_CLKGATE_CON(12), 5, GFLAGS), ++ GATE(HCLK_VENC, "hclk_venc", "hclk_pdvepu", 0, ++ RV1126_CLKGATE_CON(12), 6, GFLAGS), ++ COMPOSITE(CLK_VENC_CORE, "clk_venc_core", mux_cpll_gpll_hpll_p, 0, ++ RV1126_CLKSEL_CON(40), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(12), 1, GFLAGS), + -+ COMPOSITE(SCLK_GMAC_SRC, "clk_gmac_src", mux_cpll_npll_ppll_p, 0, -+ RK1808_CLKSEL_CON(26), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(10), 3, GFLAGS), -+ MUX(SCLK_GMAC, "clk_gmac", mux_gmac_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK1808_CLKSEL_CON(27), 0, 1, MFLAGS), -+ GATE(SCLK_GMAC_REF, "clk_gmac_ref", "clk_gmac", 0, -+ RK1808_CLKGATE_CON(10), 4, GFLAGS), -+ GATE(0, "clk_gmac_tx_src", "clk_gmac", 0, -+ RK1808_CLKGATE_CON(10), 7, GFLAGS), -+ GATE(0, "clk_gmac_rx_src", "clk_gmac", 0, -+ RK1808_CLKGATE_CON(10), 6, GFLAGS), -+ GATE(SCLK_GMAC_REFOUT, "clk_gmac_refout", "clk_gmac", 0, -+ RK1808_CLKGATE_CON(10), 5, GFLAGS), -+ FACTOR(0, "clk_gmac_tx_div5", "clk_gmac_tx_src", 0, 1, 5), -+ FACTOR(0, "clk_gmac_tx_div50", "clk_gmac_tx_src", 0, 1, 50), -+ FACTOR(0, "clk_gmac_rx_div2", "clk_gmac_rx_src", 0, 1, 2), -+ FACTOR(0, "clk_gmac_rx_div20", "clk_gmac_rx_src", 0, 1, 20), -+ MUX(SCLK_GMAC_RGMII_SPEED, "clk_gmac_rgmii_speed", mux_gmac_rgmii_speed_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(27), 2, 2, MFLAGS), -+ MUX(SCLK_GMAC_RMII_SPEED, "clk_gmac_rmii_speed", mux_gmac_rmii_speed_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(27), 1, 1, MFLAGS), -+ MUX(SCLK_GMAC_RX_TX, "clk_gmac_rx_tx", mux_gmac_rx_tx_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(27), 4, 1, MFLAGS), ++ /* ++ * Clock-Architecture Diagram 8 ++ */ ++ /* PD_VDPU */ ++#if IS_ENABLED(CONFIG_ROCKCHIP_MPP_VDPU2) || IS_ENABLED(CONFIG_ROCKCHIP_MPP_RKVDEC) ++ COMPOSITE(ACLK_PDVDEC, "aclk_pdvdec", mux_cpll_hpll_gpll_p, CLK_IS_CRITICAL, ++ RV1126_CLKSEL_CON(42), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(13), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_PDVDEC, "hclk_pdvdec", "aclk_pdvdec", CLK_IS_CRITICAL, ++ RV1126_CLKSEL_CON(41), 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(13), 4, GFLAGS), ++ GATE(0, "aclk_pdvdec_niu", "aclk_pdvdec", CLK_IS_CRITICAL, ++ RV1126_CLKGATE_CON(13), 5, GFLAGS), ++ GATE(0, "hclk_pdvdec_niu", "hclk_pdvdec", CLK_IS_CRITICAL, ++ RV1126_CLKGATE_CON(13), 6, GFLAGS), ++ COMPOSITE(ACLK_PDJPEG, "aclk_pdjpeg", mux_cpll_hpll_gpll_p, CLK_IS_CRITICAL, ++ RV1126_CLKSEL_CON(44), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(13), 9, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_PDJPEG, "hclk_pdjpeg", "aclk_pdjpeg", CLK_IS_CRITICAL, ++ RV1126_CLKSEL_CON(44), 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(13), 10, GFLAGS), ++ GATE(0, "aclk_pdjpeg_niu", "aclk_pdjpeg", CLK_IS_CRITICAL, ++ RV1126_CLKGATE_CON(13), 11, GFLAGS), ++ GATE(0, "hclk_pdjpeg_niu", "hclk_pdjpeg", CLK_IS_CRITICAL, ++ RV1126_CLKGATE_CON(13), 12, GFLAGS), ++#else ++ COMPOSITE(ACLK_PDVDEC, "aclk_pdvdec", mux_cpll_hpll_gpll_p, 0, ++ RV1126_CLKSEL_CON(42), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(13), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_PDVDEC, "hclk_pdvdec", "aclk_pdvdec", 0, ++ RV1126_CLKSEL_CON(41), 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(13), 4, GFLAGS), ++ GATE(0, "aclk_pdvdec_niu", "aclk_pdvdec", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(13), 5, GFLAGS), ++ GATE(0, "hclk_pdvdec_niu", "hclk_pdvdec", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(13), 6, GFLAGS), ++ COMPOSITE(ACLK_PDJPEG, "aclk_pdjpeg", mux_cpll_hpll_gpll_p, 0, ++ RV1126_CLKSEL_CON(44), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(13), 9, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_PDJPEG, "hclk_pdjpeg", "aclk_pdjpeg", 0, ++ RV1126_CLKSEL_CON(44), 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(13), 10, GFLAGS), ++ GATE(0, "aclk_pdjpeg_niu", "aclk_pdjpeg", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(13), 11, GFLAGS), ++ GATE(0, "hclk_pdjpeg_niu", "hclk_pdjpeg", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(13), 12, GFLAGS), ++#endif ++ GATE(ACLK_VDEC, "aclk_vdec", "aclk_pdvdec", 0, ++ RV1126_CLKGATE_CON(13), 7, GFLAGS), ++ GATE(HCLK_VDEC, "hclk_vdec", "hclk_pdvdec", 0, ++ RV1126_CLKGATE_CON(13), 8, GFLAGS), ++ COMPOSITE(CLK_VDEC_CORE, "clk_vdec_core", mux_cpll_hpll_gpll_p, 0, ++ RV1126_CLKSEL_CON(42), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(13), 1, GFLAGS), ++ COMPOSITE(CLK_VDEC_CA, "clk_vdec_ca", mux_cpll_hpll_gpll_p, 0, ++ RV1126_CLKSEL_CON(43), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(13), 2, GFLAGS), ++ COMPOSITE(CLK_VDEC_HEVC_CA, "clk_vdec_hevc_ca", mux_cpll_hpll_gpll_p, 0, ++ RV1126_CLKSEL_CON(43), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(13), 3, GFLAGS), ++ GATE(ACLK_JPEG, "aclk_jpeg", "aclk_pdjpeg", 0, ++ RV1126_CLKGATE_CON(13), 13, GFLAGS), ++ GATE(HCLK_JPEG, "hclk_jpeg", "hclk_pdjpeg", 0, ++ RV1126_CLKGATE_CON(13), 14, GFLAGS), + + /* + * Clock-Architecture Diagram 9 + */ ++ /* PD_VO */ ++ COMPOSITE(ACLK_PDVO, "aclk_pdvo", mux_gpll_cpll_p, 0, ++ RV1126_CLKSEL_CON(45), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(14), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_PDVO, "hclk_pdvo", "aclk_pdvo", 0, ++ RV1126_CLKSEL_CON(45), 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(14), 1, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_PDVO, "pclk_pdvo", "aclk_pdvo", 0, ++ RV1126_CLKSEL_CON(46), 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(14), 2, GFLAGS), ++ GATE(ACLK_RGA, "aclk_rga", "aclk_pdvo", 0, ++ RV1126_CLKGATE_CON(14), 6, GFLAGS), ++ GATE(HCLK_RGA, "hclk_rga", "hclk_pdvo", 0, ++ RV1126_CLKGATE_CON(14), 7, GFLAGS), ++ COMPOSITE(CLK_RGA_CORE, "clk_rga_core", mux_gpll_cpll_p, 0, ++ RV1126_CLKSEL_CON(46), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(14), 8, GFLAGS), ++ GATE(ACLK_VOP, "aclk_vop", "aclk_pdvo", 0, ++ RV1126_CLKGATE_CON(14), 9, GFLAGS), ++ GATE(HCLK_VOP, "hclk_vop", "hclk_pdvo", 0, ++ RV1126_CLKGATE_CON(14), 10, GFLAGS), ++ COMPOSITE(DCLK_VOP_DIV, "dclk_vop_div", mux_gpll_cpll_p, 0, ++ RV1126_CLKSEL_CON(47), 8, 1, MFLAGS, 0, 8, DFLAGS, ++ RV1126_CLKGATE_CON(14), 11, GFLAGS), ++ COMPOSITE_FRACMUX(DCLK_VOP_FRACDIV, "dclk_vop_fracdiv", "dclk_vop_div", CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(48), 0, ++ RV1126_CLKGATE_CON(14), 12, GFLAGS, ++ &rv1126_dclk_vop_fracmux), ++ GATE(DCLK_VOP, "dclk_vop", "dclk_vop_mux", 0, ++ RV1126_CLKGATE_CON(14), 13, GFLAGS), ++ GATE(PCLK_DSIHOST, "pclk_dsihost", "pclk_pdvo", 0, ++ RV1126_CLKGATE_CON(14), 14, GFLAGS), ++ GATE(ACLK_IEP, "aclk_iep", "aclk_pdvo", 0, ++ RV1126_CLKGATE_CON(12), 7, GFLAGS), ++ GATE(HCLK_IEP, "hclk_iep", "hclk_pdvo", 0, ++ RV1126_CLKGATE_CON(12), 8, GFLAGS), ++ COMPOSITE(CLK_IEP_CORE, "clk_iep_core", mux_gpll_cpll_p, 0, ++ RV1126_CLKSEL_CON(54), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(12), 9, GFLAGS), + -+ /* PD_BUS */ -+ -+ COMPOSITE_NODIV(0, "clk_bus_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(27), 15, 1, MFLAGS, -+ RK1808_CLKGATE_CON(11), 0, GFLAGS), -+ COMPOSITE_NOMUX(HSCLK_BUS_PRE, "hsclk_bus_pre", "clk_bus_src", CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(27), 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(11), 1, GFLAGS), -+ COMPOSITE_NOMUX(MSCLK_BUS_PRE, "msclk_bus_pre", "clk_bus_src", CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(28), 0, 5, DFLAGS, -+ RK1808_CLKGATE_CON(11), 2, GFLAGS), -+ COMPOSITE_NOMUX(LSCLK_BUS_PRE, "lsclk_bus_pre", "clk_bus_src", CLK_IS_CRITICAL, -+ RK1808_CLKSEL_CON(28), 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(11), 3, GFLAGS), -+ GATE(0, "hsclk_bus_niu", "hsclk_bus_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(15), 0, GFLAGS), -+ GATE(0, "msclk_bus_niu", "msclk_bus_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(15), 1, GFLAGS), -+ GATE(0, "msclk_sub", "msclk_bus_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(15), 2, GFLAGS), -+ GATE(ACLK_DMAC, "aclk_dmac", "msclk_bus_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(14), 15, GFLAGS), -+ GATE(HCLK_ROM, "hclk_rom", "msclk_bus_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(15), 4, GFLAGS), -+ GATE(ACLK_CRYPTO, "aclk_crypto", "msclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 5, GFLAGS), -+ GATE(HCLK_CRYPTO, "hclk_crypto", "msclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 6, GFLAGS), -+ GATE(ACLK_DCF, "aclk_dcf", "msclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 7, GFLAGS), -+ GATE(0, "lsclk_bus_niu", "lsclk_bus_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(15), 3, GFLAGS), -+ GATE(PCLK_DCF, "pclk_dcf", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 8, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 9, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 10, GFLAGS), -+ GATE(PCLK_UART3, "pclk_uart3", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 11, GFLAGS), -+ GATE(PCLK_UART4, "pclk_uart4", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 12, GFLAGS), -+ GATE(PCLK_UART5, "pclk_uart5", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 13, GFLAGS), -+ GATE(PCLK_UART6, "pclk_uart6", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 14, GFLAGS), -+ GATE(PCLK_UART7, "pclk_uart7", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(15), 15, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_i2c1", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 0, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_i2c2", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 1, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 2, GFLAGS), -+ GATE(PCLK_I2C4, "pclk_i2c4", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(17), 4, GFLAGS), -+ GATE(PCLK_I2C5, "pclk_i2c5", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(17), 5, GFLAGS), -+ GATE(PCLK_SPI0, "pclk_spi0", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 3, GFLAGS), -+ GATE(PCLK_SPI1, "pclk_spi1", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 4, GFLAGS), -+ GATE(PCLK_SPI2, "pclk_spi2", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 5, GFLAGS), -+ GATE(PCLK_TSADC, "pclk_tsadc", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 9, GFLAGS), -+ GATE(PCLK_SARADC, "pclk_saradc", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 10, GFLAGS), -+ GATE(PCLK_EFUSE, "pclk_efuse", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 11, GFLAGS), -+ GATE(PCLK_GPIO1, "pclk_gpio1", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 12, GFLAGS), -+ GATE(PCLK_GPIO2, "pclk_gpio2", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 13, GFLAGS), -+ GATE(PCLK_GPIO3, "pclk_gpio3", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 14, GFLAGS), -+ GATE(PCLK_GPIO4, "pclk_gpio4", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 15, GFLAGS), -+ GATE(PCLK_PWM0, "pclk_pwm0", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 6, GFLAGS), -+ GATE(PCLK_PWM1, "pclk_pwm1", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 7, GFLAGS), -+ GATE(PCLK_PWM2, "pclk_pwm2", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(16), 8, GFLAGS), -+ GATE(PCLK_TIMER, "pclk_timer", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(17), 0, GFLAGS), -+ GATE(PCLK_WDT, "pclk_wdt", "lsclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(17), 1, GFLAGS), -+ GATE(0, "pclk_grf", "lsclk_bus_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(17), 2, GFLAGS), -+ GATE(0, "pclk_sgrf", "lsclk_bus_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(17), 3, GFLAGS), -+ GATE(0, "hclk_audio_pre", "msclk_bus_pre", 0, -+ RK1808_CLKGATE_CON(17), 8, GFLAGS), -+ GATE(0, "pclk_top_pre", "lsclk_bus_pre", CLK_IS_CRITICAL, -+ RK1808_CLKGATE_CON(11), 4, GFLAGS), -+ -+ COMPOSITE(SCLK_CRYPTO, "clk_crypto", mux_gpll_cpll_p, 0, -+ RK1808_CLKSEL_CON(29), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK1808_CLKGATE_CON(11), 5, GFLAGS), -+ COMPOSITE(SCLK_CRYPTO_APK, "clk_crypto_apk", mux_gpll_cpll_p, 0, -+ RK1808_CLKSEL_CON(29), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK1808_CLKGATE_CON(11), 6, GFLAGS), -+ -+ COMPOSITE(0, "clk_uart1_src", mux_gpll_usb480m_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(38), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(11), 8, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart1_np5", "clk_uart1_src", 0, -+ RK1808_CLKSEL_CON(39), 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(11), 9, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(40), 0, -+ RK1808_CLKGATE_CON(11), 10, GFLAGS, -+ &rk1808_uart1_fracmux), -+ GATE(SCLK_UART1, "clk_uart1", "clk_uart1_mux", 0, -+ RK1808_CLKGATE_CON(11), 11, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 10 ++ */ ++ /* PD_VI */ ++ COMPOSITE(ACLK_PDVI_DIV, "aclk_pdvi_div", mux_cpll_gpll_hpll_p, 0, ++ RV1126_CLKSEL_CON(49), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(15), 0, GFLAGS), ++ COMPOSITE_HALFDIV_OFFSET(ACLK_PDVI_NP5, "aclk_pdvi_np5", mux_cpll_gpll_hpll_p, 0, ++ RV1126_CLKSEL_CON(49), 6, 2, MFLAGS, ++ RV1126_CLKSEL_CON(76), 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(16), 13, GFLAGS), ++ MUX(ACLK_PDVI, "aclk_pdvi", mux_aclk_pdvi_p, CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, ++ RV1126_CLKSEL_CON(76), 5, 1, MFLAGS), ++ COMPOSITE_NOMUX(HCLK_PDVI, "hclk_pdvi", "aclk_pdvi", 0, ++ RV1126_CLKSEL_CON(49), 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(15), 1, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_PDVI, "pclk_pdvi", "aclk_pdvi", 0, ++ RV1126_CLKSEL_CON(50), 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(15), 2, GFLAGS), ++ GATE(ACLK_ISP, "aclk_isp", "aclk_pdvi", 0, ++ RV1126_CLKGATE_CON(15), 6, GFLAGS), ++ GATE(HCLK_ISP, "hclk_isp", "hclk_pdvi", 0, ++ RV1126_CLKGATE_CON(15), 7, GFLAGS), ++ COMPOSITE(CLK_ISP_DIV, "clk_isp_div", mux_gpll_cpll_hpll_p, 0, ++ RV1126_CLKSEL_CON(50), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(15), 8, GFLAGS), ++ COMPOSITE_HALFDIV_OFFSET(CLK_ISP_NP5, "clk_isp_np5", mux_gpll_cpll_hpll_p, 0, ++ RV1126_CLKSEL_CON(50), 6, 2, MFLAGS, ++ RV1126_CLKSEL_CON(76), 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(16), 14, GFLAGS), ++ MUX(CLK_ISP, "clk_isp", mux_clk_isp_p, CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, ++ RV1126_CLKSEL_CON(76), 13, 1, MFLAGS), ++ GATE(ACLK_CIF, "aclk_cif", "aclk_pdvi", 0, ++ RV1126_CLKGATE_CON(15), 9, GFLAGS), ++ GATE(HCLK_CIF, "hclk_cif", "hclk_pdvi", 0, ++ RV1126_CLKGATE_CON(15), 10, GFLAGS), ++ COMPOSITE(DCLK_CIF, "dclk_cif", mux_gpll_cpll_hpll_p, 0, ++ RV1126_CLKSEL_CON(51), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(15), 11, GFLAGS), ++ COMPOSITE(CLK_CIF_OUT_DIV, "clk_cif_out2io_div", mux_gpll_usb480m_p, 0, ++ RV1126_CLKSEL_CON(51), 15, 1, MFLAGS, 8, 6, DFLAGS, ++ RV1126_CLKGATE_CON(15), 12, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_CIF_OUT_FRACDIV, "clk_cif_out2io_fracdiv", "clk_cif_out2io_div", CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(52), 0, ++ RV1126_CLKGATE_CON(15), 13, GFLAGS, ++ &rv1126_cif_out2io_fracmux), ++ GATE(CLK_CIF_OUT, "clk_cif_out2io", "clk_cif_out2io_mux", 0, ++ RV1126_CLKGATE_CON(15), 14, GFLAGS), ++ COMPOSITE(CLK_MIPICSI_OUT_DIV, "clk_mipicsi_out2io_div", mux_gpll_usb480m_p, 0, ++ RV1126_CLKSEL_CON(73), 8, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(23), 5, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_MIPICSI_OUT_FRACDIV, "clk_mipicsi_out2io_fracdiv", "clk_mipicsi_out2io_div", CLK_SET_RATE_PARENT, ++ RV1126_CLKSEL_CON(74), 0, ++ RV1126_CLKGATE_CON(23), 6, GFLAGS, ++ &rv1126_mipicsi_out2io_fracmux), ++ GATE(CLK_MIPICSI_OUT, "clk_mipicsi_out2io", "clk_mipicsi_out2io_mux", 0, ++ RV1126_CLKGATE_CON(23), 7, GFLAGS), ++ GATE(PCLK_CSIHOST, "pclk_csihost", "pclk_pdvi", 0, ++ RV1126_CLKGATE_CON(15), 15, GFLAGS), ++ GATE(ACLK_CIFLITE, "aclk_ciflite", "aclk_pdvi", 0, ++ RV1126_CLKGATE_CON(16), 10, GFLAGS), ++ GATE(HCLK_CIFLITE, "hclk_ciflite", "hclk_pdvi", 0, ++ RV1126_CLKGATE_CON(16), 11, GFLAGS), ++ COMPOSITE(DCLK_CIFLITE, "dclk_ciflite", mux_gpll_cpll_hpll_p, 0, ++ RV1126_CLKSEL_CON(54), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(16), 12, GFLAGS), + -+ COMPOSITE(0, "clk_uart2_src", mux_gpll_usb480m_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(41), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(11), 12, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart2_np5", "clk_uart2_src", 0, -+ RK1808_CLKSEL_CON(42), 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(11), 13, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(43), 0, -+ RK1808_CLKGATE_CON(11), 14, GFLAGS, -+ &rk1808_uart2_fracmux), -+ GATE(SCLK_UART2, "clk_uart2", "clk_uart2_mux", 0, -+ RK1808_CLKGATE_CON(11), 15, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 11 ++ */ ++ /* PD_ISPP */ ++ COMPOSITE(ACLK_PDISPP_DIV, "aclk_pdispp_div", mux_cpll_gpll_hpll_p, 0, ++ RV1126_CLKSEL_CON(68), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(16), 0, GFLAGS), ++ COMPOSITE_HALFDIV_OFFSET(ACLK_PDISPP_NP5, "aclk_pdispp_np5", mux_cpll_gpll_hpll_p, 0, ++ RV1126_CLKSEL_CON(68), 6, 2, MFLAGS, ++ RV1126_CLKSEL_CON(77), 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(16), 8, GFLAGS), ++ MUX(ACLK_PDISPP, "aclk_pdispp", mux_aclk_pdispp_p, CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, ++ RV1126_CLKSEL_CON(77), 5, 1, MFLAGS), ++ COMPOSITE_NOMUX(HCLK_PDISPP, "hclk_pdispp", "aclk_pdispp", 0, ++ RV1126_CLKSEL_CON(69), 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(16), 1, GFLAGS), ++ GATE(ACLK_ISPP, "aclk_ispp", "aclk_pdispp", 0, ++ RV1126_CLKGATE_CON(16), 4, GFLAGS), ++ GATE(HCLK_ISPP, "hclk_ispp", "hclk_pdispp", 0, ++ RV1126_CLKGATE_CON(16), 5, GFLAGS), ++ COMPOSITE(CLK_ISPP_DIV, "clk_ispp_div", mux_cpll_gpll_hpll_p, 0, ++ RV1126_CLKSEL_CON(69), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(16), 6, GFLAGS), ++ COMPOSITE_HALFDIV_OFFSET(CLK_ISPP_NP5, "clk_ispp_np5", mux_cpll_gpll_hpll_p, 0, ++ RV1126_CLKSEL_CON(69), 6, 2, MFLAGS, ++ RV1126_CLKSEL_CON(77), 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(16), 7, GFLAGS), ++ MUX(CLK_ISPP, "clk_ispp", mux_clk_ispp_p, CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, ++ RV1126_CLKSEL_CON(77), 13, 1, MFLAGS), + -+ COMPOSITE(0, "clk_uart3_src", mux_gpll_usb480m_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(44), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(12), 0, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart3_np5", "clk_uart3_src", 0, -+ RK1808_CLKSEL_CON(45), 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(12), 1, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(46), 0, -+ RK1808_CLKGATE_CON(12), 2, GFLAGS, -+ &rk1808_uart3_fracmux), -+ GATE(SCLK_UART3, "clk_uart3", "clk_uart3_mux", 0, -+ RK1808_CLKGATE_CON(12), 3, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 12 ++ */ ++ /* PD_PHP */ ++ COMPOSITE(ACLK_PDPHP, "aclk_pdphp", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1126_CLKSEL_CON(53), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(17), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_PDPHP, "hclk_pdphp", "gpll", CLK_IS_CRITICAL, ++ RV1126_CLKSEL_CON(53), 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(17), 1, GFLAGS), ++ /* PD_SDCARD */ ++ GATE(HCLK_PDSDMMC, "hclk_pdsdmmc", "hclk_pdphp", 0, ++ RV1126_CLKGATE_CON(17), 6, GFLAGS), ++ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_pdsdmmc", 0, ++ RV1126_CLKGATE_CON(18), 4, GFLAGS), ++ COMPOSITE(CLK_SDMMC, "clk_sdmmc", mux_gpll_cpll_xin24m_p, 0, ++ RV1126_CLKSEL_CON(55), 14, 2, MFLAGS, 0, 8, ++ DFLAGS, RV1126_CLKGATE_CON(18), 5, GFLAGS), ++ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "clk_sdmmc", RV1126_SDMMC_CON0, 1), ++ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "clk_sdmmc", RV1126_SDMMC_CON1, 1), + -+ COMPOSITE(0, "clk_uart4_src", mux_gpll_usb480m_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(47), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(12), 4, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart4_np5", "clk_uart4_src", 0, -+ RK1808_CLKSEL_CON(48), 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(12), 5, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(49), 0, -+ RK1808_CLKGATE_CON(12), 6, GFLAGS, -+ &rk1808_uart4_fracmux), -+ GATE(SCLK_UART4, "clk_uart4", "clk_uart4_mux", 0, -+ RK1808_CLKGATE_CON(12), 7, GFLAGS), ++ /* PD_SDIO */ ++ GATE(HCLK_PDSDIO, "hclk_pdsdio", "hclk_pdphp", 0, ++ RV1126_CLKGATE_CON(17), 8, GFLAGS), ++ GATE(HCLK_SDIO, "hclk_sdio", "hclk_pdsdio", 0, ++ RV1126_CLKGATE_CON(18), 6, GFLAGS), ++ COMPOSITE(CLK_SDIO, "clk_sdio", mux_gpll_cpll_xin24m_p, 0, ++ RV1126_CLKSEL_CON(56), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ RV1126_CLKGATE_CON(18), 7, GFLAGS), ++ MMC(SCLK_SDIO_DRV, "sdio_drv", "clk_sdio", RV1126_SDIO_CON0, 1), ++ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "clk_sdio", RV1126_SDIO_CON1, 1), + -+ COMPOSITE(0, "clk_uart5_src", mux_gpll_usb480m_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(50), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(12), 8, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart5_np5", "clk_uart5_src", 0, -+ RK1808_CLKSEL_CON(51), 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(12), 9, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(52), 0, -+ RK1808_CLKGATE_CON(12), 10, GFLAGS, -+ &rk1808_uart5_fracmux), -+ GATE(SCLK_UART5, "clk_uart5", "clk_uart5_mux", 0, -+ RK1808_CLKGATE_CON(12), 11, GFLAGS), ++ /* PD_NVM */ ++ GATE(HCLK_PDNVM, "hclk_pdnvm", "hclk_pdphp", 0, ++ RV1126_CLKGATE_CON(18), 1, GFLAGS), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_pdnvm", 0, ++ RV1126_CLKGATE_CON(18), 8, GFLAGS), ++ COMPOSITE(CLK_EMMC, "clk_emmc", mux_gpll_cpll_xin24m_p, 0, ++ RV1126_CLKSEL_CON(57), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ RV1126_CLKGATE_CON(18), 9, GFLAGS), ++ GATE(HCLK_NANDC, "hclk_nandc", "hclk_pdnvm", 0, ++ RV1126_CLKGATE_CON(18), 13, GFLAGS), ++ COMPOSITE(CLK_NANDC, "clk_nandc", mux_gpll_cpll_p, 0, ++ RV1126_CLKSEL_CON(59), 15, 1, MFLAGS, 0, 8, DFLAGS, ++ RV1126_CLKGATE_CON(18), 14, GFLAGS), ++ GATE(HCLK_SFC, "hclk_sfc", "hclk_pdnvm", 0, ++ RV1126_CLKGATE_CON(18), 10, GFLAGS), ++ GATE(HCLK_SFCXIP, "hclk_sfcxip", "hclk_pdnvm", 0, ++ RV1126_CLKGATE_CON(18), 11, GFLAGS), ++ COMPOSITE(SCLK_SFC, "sclk_sfc", mux_cpll_gpll_p, 0, ++ RV1126_CLKSEL_CON(58), 15, 1, MFLAGS, 0, 8, DFLAGS, ++ RV1126_CLKGATE_CON(18), 12, GFLAGS), ++ MMC(SCLK_EMMC_DRV, "emmc_drv", "clk_emmc", RV1126_EMMC_CON0, 1), ++ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "clk_emmc", RV1126_EMMC_CON1, 1), + -+ COMPOSITE(0, "clk_uart6_src", mux_gpll_usb480m_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(53), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(12), 12, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart6_np5", "clk_uart6_src", 0, -+ RK1808_CLKSEL_CON(54), 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(12), 13, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart6_frac", "clk_uart6_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(55), 0, -+ RK1808_CLKGATE_CON(12), 14, GFLAGS, -+ &rk1808_uart6_fracmux), -+ GATE(SCLK_UART6, "clk_uart6", "clk_uart6_mux", 0, -+ RK1808_CLKGATE_CON(12), 15, GFLAGS), ++ /* PD_USB */ ++ GATE(ACLK_PDUSB, "aclk_pdusb", "aclk_pdphp", 0, ++ RV1126_CLKGATE_CON(19), 0, GFLAGS), ++ GATE(HCLK_PDUSB, "hclk_pdusb", "hclk_pdphp", 0, ++ RV1126_CLKGATE_CON(19), 1, GFLAGS), ++ GATE(HCLK_USBHOST, "hclk_usbhost", "hclk_pdusb", 0, ++ RV1126_CLKGATE_CON(19), 4, GFLAGS), ++ GATE(HCLK_USBHOST_ARB, "hclk_usbhost_arb", "hclk_pdusb", 0, ++ RV1126_CLKGATE_CON(19), 5, GFLAGS), ++#if IS_ENABLED(CONFIG_USB_EHCI_HCD_PLATFORM) || IS_ENABLED(CONFIG_USB_OHCI_HCD_PLATFORM) ++ COMPOSITE(CLK_USBHOST_UTMI_OHCI, "clk_usbhost_utmi_ohci", mux_usb480m_gpll_p, CLK_IS_CRITICAL, ++ RV1126_CLKSEL_CON(61), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(19), 6, GFLAGS), ++#else ++ COMPOSITE(CLK_USBHOST_UTMI_OHCI, "clk_usbhost_utmi_ohci", mux_usb480m_gpll_p, 0, ++ RV1126_CLKSEL_CON(61), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(19), 6, GFLAGS), ++#endif ++ GATE(ACLK_USBOTG, "aclk_usbotg", "aclk_pdusb", 0, ++ RV1126_CLKGATE_CON(19), 7, GFLAGS), ++ GATE(CLK_USBOTG_REF, "clk_usbotg_ref", "xin24m", 0, ++ RV1126_CLKGATE_CON(19), 8, GFLAGS), ++ /* PD_GMAC */ ++ GATE(ACLK_PDGMAC, "aclk_pdgmac", "aclk_pdphp", 0, ++ RV1126_CLKGATE_CON(20), 0, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_PDGMAC, "pclk_pdgmac", "aclk_pdgmac", 0, ++ RV1126_CLKSEL_CON(63), 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(20), 1, GFLAGS), ++ GATE(ACLK_GMAC, "aclk_gmac", "aclk_pdgmac", 0, ++ RV1126_CLKGATE_CON(20), 4, GFLAGS), ++ GATE(PCLK_GMAC, "pclk_gmac", "pclk_pdgmac", 0, ++ RV1126_CLKGATE_CON(20), 5, GFLAGS), + -+ COMPOSITE(0, "clk_uart7_src", mux_gpll_usb480m_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(56), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 0, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart7_np5", "clk_uart7_src", 0, -+ RK1808_CLKSEL_CON(57), 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 1, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart7_frac", "clk_uart7_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(58), 0, -+ RK1808_CLKGATE_CON(13), 2, GFLAGS, -+ &rk1808_uart7_fracmux), -+ GATE(SCLK_UART7, "clk_uart7", "clk_uart7_mux", 0, -+ RK1808_CLKGATE_CON(13), 3, GFLAGS), ++ COMPOSITE(CLK_GMAC_DIV, "clk_gmac_div", mux_cpll_gpll_p, 0, ++ RV1126_CLKSEL_CON(63), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(20), 6, GFLAGS), ++ GATE(CLK_GMAC_RGMII_M0, "clk_gmac_rgmii_m0", "clk_gmac_rgmii_clkin_m0", 0, ++ RV1126_CLKGATE_CON(20), 12, GFLAGS), ++ MUX(CLK_GMAC_SRC_M0, "clk_gmac_src_m0", clk_gmac_src_m0_p, CLK_SET_RATE_PARENT, ++ RV1126_GMAC_CON, 0, 1, MFLAGS), ++ GATE(CLK_GMAC_RGMII_M1, "clk_gmac_rgmii_m1", "clk_gmac_rgmii_clkin_m1", 0, ++ RV1126_CLKGATE_CON(20), 13, GFLAGS), ++ MUX(CLK_GMAC_SRC_M1, "clk_gmac_src_m1", clk_gmac_src_m1_p, CLK_SET_RATE_PARENT, ++ RV1126_GMAC_CON, 5, 1, MFLAGS), ++ MUXGRF(CLK_GMAC_SRC, "clk_gmac_src", mux_clk_gmac_src_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RV1126_GRF_IOFUNC_CON1, 12, 1, MFLAGS), + -+ COMPOSITE(SCLK_I2C1, "clk_i2c1", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(59), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 4, GFLAGS), -+ COMPOSITE(SCLK_I2C2, "clk_i2c2", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(59), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 5, GFLAGS), -+ COMPOSITE(SCLK_I2C3, "clk_i2c3", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(60), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 6, GFLAGS), -+ COMPOSITE(SCLK_I2C4, "clk_i2c4", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(71), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(14), 6, GFLAGS), -+ COMPOSITE(SCLK_I2C5, "clk_i2c5", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(71), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK1808_CLKGATE_CON(14), 7, GFLAGS), ++ GATE(CLK_GMAC_REF, "clk_gmac_ref", "clk_gmac_src", 0, ++ RV1126_CLKGATE_CON(20), 7, GFLAGS), + -+ COMPOSITE(SCLK_SPI0, "clk_spi0", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(60), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 7, GFLAGS), -+ COMPOSITE(SCLK_SPI1, "clk_spi1", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(61), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 8, GFLAGS), -+ COMPOSITE(SCLK_SPI2, "clk_spi2", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(61), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 9, GFLAGS), ++ GATE(CLK_GMAC_TX_SRC, "clk_gmac_tx_src", "clk_gmac_src", 0, ++ RV1126_CLKGATE_CON(20), 9, GFLAGS), ++ FACTOR(CLK_GMAC_TX_DIV5, "clk_gmac_tx_div5", "clk_gmac_tx_src", 0, 1, 5), ++ FACTOR(CLK_GMAC_TX_DIV50, "clk_gmac_tx_div50", "clk_gmac_tx_src", 0, 1, 50), ++ MUXTBL(RGMII_MODE_CLK, "rgmii_mode_clk", mux_rgmii_clk_p, CLK_SET_RATE_PARENT, ++ RV1126_GMAC_CON, 2, 2, MFLAGS, rgmii_mux_idx), ++ GATE(CLK_GMAC_RX_SRC, "clk_gmac_rx_src", "clk_gmac_src", 0, ++ RV1126_CLKGATE_CON(20), 8, GFLAGS), ++ FACTOR(CLK_GMAC_RX_DIV2, "clk_gmac_rx_div2", "clk_gmac_rx_src", 0, 1, 2), ++ FACTOR(CLK_GMAC_RX_DIV20, "clk_gmac_rx_div20", "clk_gmac_rx_src", 0, 1, 20), ++ MUX(RMII_MODE_CLK, "rmii_mode_clk", mux_rmii_clk_p, CLK_SET_RATE_PARENT, ++ RV1126_GMAC_CON, 1, 1, MFLAGS), ++ MUX(CLK_GMAC_TX_RX, "clk_gmac_tx_rx", mux_gmac_tx_rx_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RV1126_GMAC_CON, 4, 1, MFLAGS), + -+ COMPOSITE_NOMUX(SCLK_TSADC, "clk_tsadc", "xin24m", 0, -+ RK1808_CLKSEL_CON(62), 0, 11, DFLAGS, -+ RK1808_CLKGATE_CON(13), 13, GFLAGS), -+ COMPOSITE_NOMUX(SCLK_SARADC, "clk_saradc", "xin24m", 0, -+ RK1808_CLKSEL_CON(63), 0, 11, DFLAGS, -+ RK1808_CLKGATE_CON(13), 14, GFLAGS), ++ GATE(CLK_GMAC_PTPREF, "clk_gmac_ptpref", "xin24m", 0, ++ RV1126_CLKGATE_CON(20), 10, GFLAGS), ++ COMPOSITE(CLK_GMAC_ETHERNET_OUT, "clk_gmac_ethernet_out2io", mux_cpll_gpll_p, 0, ++ RV1126_CLKSEL_CON(61), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(20), 11, GFLAGS), + -+ COMPOSITE(SCLK_EFUSE_S, "clk_efuse_s", mux_gpll_cpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(64), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK1808_CLKGATE_CON(14), 0, GFLAGS), -+ COMPOSITE(SCLK_EFUSE_NS, "clk_efuse_ns", mux_gpll_cpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(64), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK1808_CLKGATE_CON(14), 1, GFLAGS), + -+ COMPOSITE(DBCLK_GPIO1, "dbclk_gpio1", mux_xin24m_32k_p, 0, -+ RK1808_CLKSEL_CON(65), 15, 1, MFLAGS, 0, 11, DFLAGS, -+ RK1808_CLKGATE_CON(14), 2, GFLAGS), -+ COMPOSITE(DBCLK_GPIO2, "dbclk_gpio2", mux_xin24m_32k_p, 0, -+ RK1808_CLKSEL_CON(66), 15, 1, MFLAGS, 0, 11, DFLAGS, -+ RK1808_CLKGATE_CON(14), 3, GFLAGS), -+ COMPOSITE(DBCLK_GPIO3, "dbclk_gpio3", mux_xin24m_32k_p, 0, -+ RK1808_CLKSEL_CON(67), 15, 1, MFLAGS, 0, 11, DFLAGS, -+ RK1808_CLKGATE_CON(14), 4, GFLAGS), -+ COMPOSITE(DBCLK_GPIO4, "dbclk_gpio4", mux_xin24m_32k_p, 0, -+ RK1808_CLKSEL_CON(68), 15, 1, MFLAGS, 0, 11, DFLAGS, -+ RK1808_CLKGATE_CON(14), 5, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 14 ++ */ ++ /* PD_NPU */ ++ COMPOSITE(ACLK_PDNPU_DIV, "aclk_pdnpu_div", mux_gpll_cpll_apll_hpll_p, 0, ++ RV1126_CLKSEL_CON(65), 8, 2, MFLAGS, 0, 4, DFLAGS, ++ RV1126_CLKGATE_CON(22), 0, GFLAGS), ++ COMPOSITE_HALFDIV(ACLK_PDNPU_NP5, "aclk_pdnpu_np5", mux_gpll_cpll_apll_hpll_p, 0, ++ RV1126_CLKSEL_CON(65), 8, 2, MFLAGS, 4, 4, DFLAGS, ++ RV1126_CLKGATE_CON(22), 1, GFLAGS), ++ MUX(ACLK_PDNPU, "aclk_pdnpu", mux_aclk_pdnpu_p, CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, ++ RV1126_CLKSEL_CON(65), 12, 1, MFLAGS), ++ COMPOSITE_NOMUX(HCLK_PDNPU, "hclk_pdnpu", "gpll", 0, ++ RV1126_CLKSEL_CON(66), 8, 4, DFLAGS, ++ RV1126_CLKGATE_CON(22), 2, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_PDNPU, "pclk_pdnpu", "hclk_pdnpu", 0, ++ RV1126_CLKSEL_CON(66), 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(22), 3, GFLAGS), ++ GATE(ACLK_NPU, "aclk_npu", "aclk_pdnpu", 0, ++ RV1126_CLKGATE_CON(22), 7, GFLAGS), ++ GATE(HCLK_NPU, "hclk_npu", "hclk_pdnpu", 0, ++ RV1126_CLKGATE_CON(22), 8, GFLAGS), ++ COMPOSITE(CLK_NPU_DIV, "clk_npu_div", mux_gpll_cpll_apll_hpll_p, 0, ++ RV1126_CLKSEL_CON(67), 8, 2, MFLAGS, 0, 4, DFLAGS, ++ RV1126_CLKGATE_CON(22), 9, GFLAGS), ++ COMPOSITE_HALFDIV(CLK_NPU_NP5, "clk_npu_np5", mux_gpll_cpll_apll_hpll_p, 0, ++ RV1126_CLKSEL_CON(67), 8, 2, MFLAGS, 4, 4, DFLAGS, ++ RV1126_CLKGATE_CON(22), 10, GFLAGS), ++ MUX(CLK_CORE_NPU, "clk_core_npu", mux_clk_npu_p, CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, ++ RV1126_CLKSEL_CON(67), 12, 1, MFLAGS), ++ GATE(CLK_CORE_NPUPVTM, "clk_core_npupvtm", "clk_core_npu", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(22), 14, GFLAGS), ++ GATE(CLK_NPUPVTM, "clk_npupvtm", "xin24m", 0, ++ RV1126_CLKGATE_CON(22), 13, GFLAGS), ++ GATE(PCLK_NPUPVTM, "pclk_npupvtm", "pclk_pdnpu", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(22), 12, GFLAGS), + -+ COMPOSITE(SCLK_PWM0, "clk_pwm0", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(69), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 10, GFLAGS), -+ COMPOSITE(SCLK_PWM1, "clk_pwm1", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(69), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 11, GFLAGS), -+ COMPOSITE(SCLK_PWM2, "clk_pwm2", mux_gpll_xin24m_p, 0, -+ RK1808_CLKSEL_CON(70), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(13), 12, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 15 ++ */ ++ GATE(PCLK_PDTOP, "pclk_pdtop", "pclk_pdbus", CLK_IS_CRITICAL, ++ RV1126_CLKGATE_CON(23), 8, GFLAGS), ++ GATE(PCLK_DSIPHY, "pclk_dsiphy", "pclk_pdtop", 0, ++ RV1126_CLKGATE_CON(23), 4, GFLAGS), ++ GATE(PCLK_CSIPHY0, "pclk_csiphy0", "pclk_pdtop", 0, ++ RV1126_CLKGATE_CON(23), 2, GFLAGS), ++ GATE(PCLK_CSIPHY1, "pclk_csiphy1", "pclk_pdtop", 0, ++ RV1126_CLKGATE_CON(23), 3, GFLAGS), ++ GATE(PCLK_USBPHY_HOST, "pclk_usbphy_host", "pclk_pdtop", 0, ++ RV1126_CLKGATE_CON(19), 13, GFLAGS), ++ GATE(PCLK_USBPHY_OTG, "pclk_usbphy_otg", "pclk_pdtop", 0, ++ RV1126_CLKGATE_CON(19), 12, GFLAGS), + -+ GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0, -+ RK1808_CLKGATE_CON(14), 8, GFLAGS), -+ GATE(SCLK_TIMER1, "sclk_timer1", "xin24m", 0, -+ RK1808_CLKGATE_CON(14), 9, GFLAGS), -+ GATE(SCLK_TIMER2, "sclk_timer2", "xin24m", 0, -+ RK1808_CLKGATE_CON(14), 10, GFLAGS), -+ GATE(SCLK_TIMER3, "sclk_timer3", "xin24m", 0, -+ RK1808_CLKGATE_CON(14), 11, GFLAGS), -+ GATE(SCLK_TIMER4, "sclk_timer4", "xin24m", 0, -+ RK1808_CLKGATE_CON(14), 12, GFLAGS), -+ GATE(SCLK_TIMER5, "sclk_timer5", "xin24m", 0, -+ RK1808_CLKGATE_CON(14), 13, GFLAGS), ++#ifndef CONFIG_ROCKCHIP_LOW_PERFORMANCE ++ /* ++ * Clock-Architecture Diagram 3 ++ */ ++ /* PD_CORE */ ++ COMPOSITE_NOMUX(0, "aclk_core", "armclk", CLK_IS_CRITICAL, ++ RV1126_CLKSEL_CON(1), 4, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RV1126_CLKGATE_CON(0), 2, GFLAGS), ++ GATE(0, "pclk_dbg_daplite", "pclk_dbg", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(0), 5, GFLAGS), ++ GATE(0, "clk_a7_jtag", "clk_jtag_ori", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(0), 9, GFLAGS), ++ GATE(0, "aclk_core_niu", "aclk_core", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(0), 3, GFLAGS), ++ GATE(0, "pclk_dbg_niu", "pclk_dbg", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(0), 4, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 4 ++ */ ++ /* PD_BUS */ ++ GATE(0, "aclk_pdbus_hold_niu1", "aclk_pdbus", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(2), 10, GFLAGS), ++ GATE(0, "aclk_pdbus_niu1", "aclk_pdbus", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(2), 3, GFLAGS), ++ GATE(0, "hclk_pdbus_niu1", "hclk_pdbus", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(2), 4, GFLAGS), ++ GATE(0, "pclk_pdbus_niu1", "pclk_pdbus", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(2), 5, GFLAGS), ++ GATE(0, "aclk_pdbus_niu2", "aclk_pdbus", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(2), 6, GFLAGS), ++ GATE(0, "hclk_pdbus_niu2", "hclk_pdbus", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(2), 7, GFLAGS), ++ GATE(0, "aclk_pdbus_niu3", "aclk_pdbus", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(2), 8, GFLAGS), ++ GATE(0, "hclk_pdbus_niu3", "hclk_pdbus", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(2), 9, GFLAGS), ++ GATE(0, "pclk_grf", "pclk_pdbus", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(6), 15, GFLAGS), ++ GATE(0, "pclk_sgrf", "pclk_pdbus", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(8), 4, GFLAGS), ++ GATE(0, "aclk_sysram", "hclk_pdbus", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(3), 9, GFLAGS), ++ GATE(0, "pclk_intmux", "pclk_pdbus", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(7), 14, GFLAGS), + + /* -+ * Clock-Architecture Diagram 10 ++ * Clock-Architecture Diagram 5 + */ ++ /* PD_CRYPTO */ ++ GATE(0, "aclk_pdcrypto_niu", "aclk_pdcrypto", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(4), 13, GFLAGS), ++ GATE(0, "hclk_pdcrypto_niu", "hclk_pdcrypto", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(4), 14, GFLAGS), + ++ /* ++ * Clock-Architecture Diagram 6 ++ */ + /* PD_AUDIO */ ++ GATE(0, "hclk_pdaudio_niu", "hclk_pdaudio", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(9), 2, GFLAGS), ++ GATE(0, "pclk_pdaudio_niu", "hclk_pdaudio", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(9), 3, GFLAGS), + -+ GATE(0, "hclk_audio_niu", "hclk_audio_pre", CLK_IGNORE_UNUSED, -+ RK1808_CLKGATE_CON(18), 11, GFLAGS), -+ GATE(HCLK_VAD, "hclk_vad", "hclk_audio_pre", 0, -+ RK1808_CLKGATE_CON(18), 12, GFLAGS), -+ GATE(HCLK_PDM, "hclk_pdm", "hclk_audio_pre", 0, -+ RK1808_CLKGATE_CON(18), 13, GFLAGS), -+ GATE(HCLK_I2S0_8CH, "hclk_i2s0_8ch", "hclk_audio_pre", 0, -+ RK1808_CLKGATE_CON(18), 14, GFLAGS), -+ GATE(HCLK_I2S1_2CH, "hclk_i2s1_2ch", "hclk_audio_pre", 0, -+ RK1808_CLKGATE_CON(18), 15, GFLAGS), -+ -+ COMPOSITE(0, "clk_pdm_src", mux_gpll_xin24m_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(30), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(17), 9, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_pdm_frac", "clk_pdm_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(31), 0, -+ RK1808_CLKGATE_CON(17), 10, GFLAGS, -+ &rk1808_pdm_fracmux), -+ GATE(SCLK_PDM, "clk_pdm", "clk_pdm_mux", 0, -+ RK1808_CLKGATE_CON(17), 11, GFLAGS), -+ -+ COMPOSITE(SCLK_I2S0_8CH_TX_SRC, "clk_i2s0_8ch_tx_src", mux_gpll_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(32), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(17), 12, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s0_8ch_tx_frac", "clk_i2s0_8ch_tx_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(33), 0, -+ RK1808_CLKGATE_CON(17), 13, GFLAGS, -+ &rk1808_i2s0_8ch_tx_fracmux), -+ COMPOSITE_NODIV(SCLK_I2S0_8CH_TX, "clk_i2s0_8ch_tx", mux_i2s0_8ch_tx_rx_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(32), 12, 1, MFLAGS, -+ RK1808_CLKGATE_CON(17), 14, GFLAGS), -+ COMPOSITE_NODIV(SCLK_I2S0_8CH_TX_OUT, "clk_i2s0_8ch_tx_out", mux_i2s0_8ch_tx_out_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(32), 14, 2, MFLAGS, -+ RK1808_CLKGATE_CON(17), 15, GFLAGS), -+ -+ COMPOSITE(SCLK_I2S0_8CH_RX_SRC, "clk_i2s0_8ch_rx_src", mux_gpll_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(34), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(18), 0, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s0_8ch_rx_frac", "clk_i2s0_8ch_rx_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(35), 0, -+ RK1808_CLKGATE_CON(18), 1, GFLAGS, -+ &rk1808_i2s0_8ch_rx_fracmux), -+ COMPOSITE_NODIV(SCLK_I2S0_8CH_RX, "clk_i2s0_8ch_rx", mux_i2s0_8ch_rx_tx_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(34), 12, 1, MFLAGS, -+ RK1808_CLKGATE_CON(18), 2, GFLAGS), -+ COMPOSITE_NODIV(SCLK_I2S0_8CH_RX_OUT, "clk_i2s0_8ch_rx_out", mux_i2s0_8ch_rx_out_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(34), 14, 2, MFLAGS, -+ RK1808_CLKGATE_CON(18), 3, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 7 ++ */ ++ /* PD_VEPU */ ++ GATE(0, "aclk_pdvepu_niu", "aclk_pdvepu", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(12), 3, GFLAGS), ++ GATE(0, "hclk_pdvepu_niu", "hclk_pdvepu", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(12), 4, GFLAGS), + -+ COMPOSITE(SCLK_I2S1_2CH_SRC, "clk_i2s1_2ch_src", mux_gpll_cpll_npll_p, 0, -+ RK1808_CLKSEL_CON(36), 8, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_CLKGATE_CON(18), 4, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_i2s1_2ch_frac", "clk_i2s1_2ch_src", CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(37), 0, -+ RK1808_CLKGATE_CON(18), 5, GFLAGS, -+ &rk1808_i2s1_2ch_fracmux), -+ GATE(SCLK_I2S1_2CH, "clk_i2s1_2ch", "clk_i2s1_2ch_mux", 0, -+ RK1808_CLKGATE_CON(18), 6, GFLAGS), -+ COMPOSITE_NODIV(SCLK_I2S1_2CH_OUT, "clk_i2s1_2ch_out", mux_i2s1_2ch_out_p, CLK_SET_RATE_PARENT, -+ RK1808_CLKSEL_CON(36), 15, 1, MFLAGS, -+ RK1808_CLKGATE_CON(18), 7, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 9 ++ */ ++ /* PD_VO */ ++ GATE(0, "aclk_pdvo_niu", "aclk_pdvo", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(14), 3, GFLAGS), ++ GATE(0, "hclk_pdvo_niu", "hclk_pdvo", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(14), 4, GFLAGS), ++ GATE(0, "pclk_pdvo_niu", "pclk_pdvo", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(14), 5, GFLAGS), + + /* + * Clock-Architecture Diagram 10 + */ -+ -+ /* PD_BUS */ -+ -+ GATE(0, "pclk_top_niu", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 0, GFLAGS), -+ GATE(0, "pclk_top_cru", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 1, GFLAGS), -+ GATE(0, "pclk_ddrphy", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 2, GFLAGS), -+ GATE(PCLK_MIPIDSIPHY, "pclk_mipidsiphy", "pclk_top_pre", 0, RK1808_CLKGATE_CON(19), 3, GFLAGS), -+ GATE(PCLK_MIPICSIPHY, "pclk_mipicsiphy", "pclk_top_pre", 0, RK1808_CLKGATE_CON(19), 4, GFLAGS), -+ -+ GATE(PCLK_USB3PHY_PIPE, "pclk_usb3phy_pipe", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 6, GFLAGS), -+ GATE(0, "pclk_usb3_grf", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 7, GFLAGS), -+ GATE(0, "pclk_usb_grf", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 8, GFLAGS), -+ ++ /* PD_VI */ ++ GATE(0, "aclk_pdvi_niu", "aclk_pdvi", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(15), 3, GFLAGS), ++ GATE(0, "hclk_pdvi_niu", "hclk_pdvi", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(15), 4, GFLAGS), ++ GATE(0, "pclk_pdvi_niu", "pclk_pdvi", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(15), 5, GFLAGS), + /* + * Clock-Architecture Diagram 11 + */ ++ /* PD_ISPP */ ++ GATE(0, "aclk_pdispp_niu", "aclk_pdispp", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(16), 2, GFLAGS), ++ GATE(0, "hclk_pdispp_niu", "hclk_pdispp", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(16), 3, GFLAGS), + -+ /* PD_PMU */ -+ -+ COMPOSITE_FRACMUX(SCLK_RTC32K_FRAC, "clk_rtc32k_frac", "xin24m", CLK_IGNORE_UNUSED, -+ RK1808_PMU_CLKSEL_CON(1), 0, -+ RK1808_PMU_CLKGATE_CON(0), 13, GFLAGS, -+ &rk1808_rtc32k_pmu_fracmux), -+ -+ COMPOSITE_NOMUX(XIN24M_DIV, "xin24m_div", "xin24m", CLK_IGNORE_UNUSED, -+ RK1808_PMU_CLKSEL_CON(0), 8, 5, DFLAGS, -+ RK1808_PMU_CLKGATE_CON(0), 12, GFLAGS), -+ -+ COMPOSITE_NOMUX(0, "clk_wifi_pmu_src", "ppll", 0, -+ RK1808_PMU_CLKSEL_CON(2), 8, 6, DFLAGS, -+ RK1808_PMU_CLKGATE_CON(0), 14, GFLAGS), -+ COMPOSITE_NODIV(SCLK_WIFI_PMU, "clk_wifi_pmu", mux_wifi_pmu_p, CLK_SET_RATE_PARENT, -+ RK1808_PMU_CLKSEL_CON(2), 15, 1, MFLAGS, -+ RK1808_PMU_CLKGATE_CON(0), 15, GFLAGS), -+ -+ COMPOSITE(0, "clk_uart0_pmu_src", mux_gpll_usb480m_cpll_ppll_p, 0, -+ RK1808_PMU_CLKSEL_CON(3), 14, 2, MFLAGS, 0, 7, DFLAGS, -+ RK1808_PMU_CLKGATE_CON(1), 0, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart0_np5", "clk_uart0_pmu_src", 0, -+ RK1808_PMU_CLKSEL_CON(4), 0, 7, DFLAGS, -+ RK1808_PMU_CLKGATE_CON(1), 1, GFLAGS), -+ COMPOSITE_FRACMUX(0, "clk_uart0_frac", "clk_uart0_pmu_src", CLK_SET_RATE_PARENT, -+ RK1808_PMU_CLKSEL_CON(5), 0, -+ RK1808_PMU_CLKGATE_CON(1), 2, GFLAGS, -+ &rk1808_uart0_pmu_fracmux), -+ GATE(SCLK_UART0_PMU, "clk_uart0_pmu", "clk_uart0_pmu_mux", CLK_SET_RATE_PARENT, -+ RK1808_PMU_CLKGATE_CON(1), 3, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 12 ++ */ ++ /* PD_PHP */ ++ GATE(0, "aclk_pdphpmid", "aclk_pdphp", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(17), 2, GFLAGS), ++ GATE(0, "hclk_pdphpmid", "hclk_pdphp", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(17), 3, GFLAGS), ++ GATE(0, "aclk_pdphpmid_niu", "aclk_pdphpmid", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(17), 4, GFLAGS), ++ GATE(0, "hclk_pdphpmid_niu", "hclk_pdphpmid", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(17), 5, GFLAGS), + -+ GATE(SCLK_PVTM_PMU, "clk_pvtm_pmu", "xin24m", 0, -+ RK1808_PMU_CLKGATE_CON(1), 4, GFLAGS), ++ /* PD_SDCARD */ ++ GATE(0, "hclk_pdsdmmc_niu", "hclk_pdsdmmc", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(17), 7, GFLAGS), + -+ COMPOSITE(SCLK_PMU_I2C0, "clk_pmu_i2c0", mux_ppll_xin24m_p, 0, -+ RK1808_PMU_CLKSEL_CON(7), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK1808_PMU_CLKGATE_CON(1), 5, GFLAGS), ++ /* PD_SDIO */ ++ GATE(0, "hclk_pdsdio_niu", "hclk_pdsdio", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(17), 9, GFLAGS), + -+ COMPOSITE(DBCLK_PMU_GPIO0, "dbclk_gpio0", mux_xin24m_32k_p, 0, -+ RK1808_PMU_CLKSEL_CON(6), 15, 1, MFLAGS, 0, 11, DFLAGS, -+ RK1808_PMU_CLKGATE_CON(1), 6, GFLAGS), ++ /* PD_NVM */ ++ GATE(0, "hclk_pdnvm_niu", "hclk_pdnvm", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(18), 3, GFLAGS), + -+ COMPOSITE_NOMUX(SCLK_REF24M_PMU, "clk_ref24m_pmu", "ppll", 0, -+ RK1808_PMU_CLKSEL_CON(2), 0, 6, DFLAGS, -+ RK1808_PMU_CLKGATE_CON(1), 8, GFLAGS), -+ COMPOSITE_NODIV(SCLK_USBPHY_REF, "clk_usbphy_ref", mux_usbphy_ref_p, CLK_SET_RATE_PARENT, -+ RK1808_PMU_CLKSEL_CON(2), 6, 1, MFLAGS, -+ RK1808_PMU_CLKGATE_CON(1), 9, GFLAGS), -+ COMPOSITE_NODIV(SCLK_MIPIDSIPHY_REF, "clk_mipidsiphy_ref", mux_mipidsiphy_ref_p, CLK_SET_RATE_PARENT, -+ RK1808_PMU_CLKSEL_CON(2), 7, 1, MFLAGS, -+ RK1808_PMU_CLKGATE_CON(1), 10, GFLAGS), ++ /* PD_USB */ ++ GATE(0, "aclk_pdusb_niu", "aclk_pdusb", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(19), 2, GFLAGS), ++ GATE(0, "hclk_pdusb_niu", "hclk_pdusb", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(19), 3, GFLAGS), + -+ FACTOR(0, "clk_ppll_ph0", "ppll", 0, 1, 2), -+ COMPOSITE_NOMUX(0, "clk_pciephy_src", "clk_ppll_ph0", 0, -+ RK1808_PMU_CLKSEL_CON(7), 0, 2, DFLAGS, -+ RK1808_PMU_CLKGATE_CON(1), 11, GFLAGS), -+ COMPOSITE_NODIV(SCLK_PCIEPHY_REF, "clk_pciephy_ref", mux_pciephy_ref_p, CLK_SET_RATE_PARENT, -+ RK1808_PMU_CLKSEL_CON(7), 4, 1, MFLAGS, -+ RK1808_PMU_CLKGATE_CON(1), 12, GFLAGS), ++ /* PD_GMAC */ ++ GATE(0, "aclk_pdgmac_niu", "aclk_pdgmac", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(20), 2, GFLAGS), ++ GATE(0, "pclk_pdgmac_niu", "pclk_pdgmac", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(20), 3, GFLAGS), + -+ COMPOSITE_NOMUX(PCLK_PMU_PRE, "pclk_pmu_pre", "ppll", CLK_IS_CRITICAL, -+ RK1808_PMU_CLKSEL_CON(0), 0, 5, DFLAGS, -+ RK1808_PMU_CLKGATE_CON(0), 0, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 13 ++ */ ++ /* PD_DDR */ ++ COMPOSITE_NOMUX(0, "pclk_pdddr_pre", "gpll", CLK_IS_CRITICAL, ++ RV1126_CLKSEL_CON(64), 0, 5, DFLAGS, ++ RV1126_CLKGATE_CON(21), 0, GFLAGS), ++ GATE(PCLK_PDDDR, "pclk_pdddr", "pclk_pdddr_pre", CLK_IS_CRITICAL, ++ RV1126_CLKGATE_CON(21), 15, GFLAGS), ++ GATE(0, "pclk_ddr_msch", "pclk_pdddr", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(21), 6, GFLAGS), ++ COMPOSITE_NOGATE(SCLK_DDRCLK, "sclk_ddrc", mux_dpll_gpll_p, CLK_IS_CRITICAL, ++ RV1126_CLKSEL_CON(64), 15, 1, MFLAGS, 8, 5, DFLAGS), ++ COMPOSITE(CLK_DDRPHY, "clk_ddrphy", mux_dpll_gpll_p, CLK_IS_CRITICAL, ++ RV1126_CLKSEL_CON(64), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RV1126_CLKGATE_CON(21), 8, GFLAGS), ++ GATE(0, "clk1x_phy", "clk_ddrphy", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(23), 1, GFLAGS), ++ GATE(0, "clk_ddr_msch", "clk_ddrphy", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(21), 10, GFLAGS), ++ GATE(0, "pclk_ddr_dfictl", "pclk_pdddr", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(21), 2, GFLAGS), ++ GATE(0, "clk_ddr_dfictl", "clk_ddrphy", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(21), 13, GFLAGS), ++ GATE(0, "pclk_ddr_standby", "pclk_pdddr", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(21), 4, GFLAGS), ++ GATE(0, "clk_ddr_standby", "clk_ddrphy", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(21), 14, GFLAGS), ++ GATE(0, "aclk_ddr_split", "clk_ddrphy", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(21), 9, GFLAGS), ++ GATE(0, "pclk_ddr_grf", "pclk_pdddr", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(21), 5, GFLAGS), ++ GATE(PCLK_DDR_MON, "pclk_ddr_mon", "pclk_pdddr", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(21), 3, GFLAGS), ++ GATE(CLK_DDR_MON, "clk_ddr_mon", "clk_ddrphy", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(20), 15, GFLAGS), ++ GATE(TMCLK_DDR_MON, "tmclk_ddr_mon", "xin24m", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(21), 7, GFLAGS), + -+ GATE(0, "pclk_pmu_niu", "pclk_pmu_pre", CLK_IS_CRITICAL, RK1808_PMU_CLKGATE_CON(0), 1, GFLAGS), -+ GATE(0, "pclk_pmu_sgrf", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 2, GFLAGS), -+ GATE(0, "pclk_pmu_grf", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 3, GFLAGS), -+ GATE(0, "pclk_pmu", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 4, GFLAGS), -+ GATE(0, "pclk_pmu_mem", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 5, GFLAGS), -+ GATE(PCLK_GPIO0_PMU, "pclk_gpio0_pmu", "pclk_pmu_pre", 0, RK1808_PMU_CLKGATE_CON(0), 6, GFLAGS), -+ GATE(PCLK_UART0_PMU, "pclk_uart0_pmu", "pclk_pmu_pre", 0, RK1808_PMU_CLKGATE_CON(0), 7, GFLAGS), -+ GATE(0, "pclk_cru_pmu", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 8, GFLAGS), -+ GATE(PCLK_I2C0_PMU, "pclk_i2c0_pmu", "pclk_pmu_pre", 0, RK1808_PMU_CLKGATE_CON(0), 9, GFLAGS), ++ /* ++ * Clock-Architecture Diagram 14 ++ */ ++ /* PD_NPU */ ++ GATE(0, "aclk_pdnpu_niu", "aclk_pdnpu", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(22), 4, GFLAGS), ++ GATE(0, "hclk_pdnpu_niu", "hclk_pdnpu", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(22), 5, GFLAGS), ++ GATE(0, "pclk_pdnpu_niu", "pclk_pdnpu", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(22), 6, GFLAGS), + -+ MUXPMUGRF(SCLK_32K_IOE, "clk_32k_ioe", mux_clk_32k_ioe_p, 0, -+ RK1808_PMUGRF_SOC_CON0, 0, 1, MFLAGS) ++ /* ++ * Clock-Architecture Diagram 15 ++ */ ++ GATE(0, "pclk_topniu", "pclk_pdtop", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(23), 9, GFLAGS), ++ GATE(PCLK_TOPCRU, "pclk_topcru", "pclk_pdtop", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(23), 10, GFLAGS), ++ GATE(PCLK_TOPGRF, "pclk_topgrf", "pclk_pdtop", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(23), 11, GFLAGS), ++ GATE(PCLK_CPUEMADET, "pclk_cpuemadet", "pclk_pdtop", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(23), 12, GFLAGS), ++ GATE(PCLK_DDRPHY, "pclk_ddrphy", "pclk_pdtop", CLK_IGNORE_UNUSED, ++ RV1126_CLKGATE_CON(23), 0, GFLAGS), ++#endif +}; + -+static void __iomem *rk1808_cru_base; ++static void __iomem *rv1126_cru_base; ++static void __iomem *rv1126_pmucru_base; + -+void rk1808_dump_cru(void) ++void rv1126_dump_cru(void) +{ -+ if (rk1808_cru_base) { -+ pr_warn("CRU:\n"); ++ if (rv1126_pmucru_base) { ++ pr_warn("PMU CRU:\n"); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk1808_cru_base, -+ 0x500, false); ++ 32, 4, rv1126_pmucru_base, ++ 0x248, false); ++ } ++ if (rv1126_cru_base) { ++ pr_warn("CRU:\n"); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk1808_cru_base + 0x4000, -+ 0x100, false); ++ 32, 4, rv1126_cru_base, ++ 0x588, false); + } +} -+EXPORT_SYMBOL_GPL(rk1808_dump_cru); ++EXPORT_SYMBOL_GPL(rv1126_dump_cru); + -+static int rk1808_clk_panic(struct notifier_block *this, -+ unsigned long ev, void *ptr) ++static int rv1126_clk_panic(struct notifier_block *this, ++ unsigned long ev, void *ptr) +{ -+ rk1808_dump_cru(); ++ rv1126_dump_cru(); + return NOTIFY_DONE; +} + -+static struct notifier_block rk1808_clk_panic_block = { -+ .notifier_call = rk1808_clk_panic, ++static struct notifier_block rv1126_clk_panic_block = { ++ .notifier_call = rv1126_clk_panic, +}; + -+static void __init rk1808_clk_init(struct device_node *np) ++static struct rockchip_clk_provider *pmucru_ctx; ++static void __init rv1126_pmu_clk_init(struct device_node *np) +{ + struct rockchip_clk_provider *ctx; + void __iomem *reg_base; -+ struct clk **clks; ++ ++ reg_base = of_iomap(np, 0); ++ if (!reg_base) { ++ pr_err("%s: could not map cru pmu region\n", __func__); ++ return; ++ } ++ ++ rv1126_pmucru_base = reg_base; ++ ++ ctx = rockchip_clk_init(np, reg_base, CLKPMU_NR_CLKS); ++ if (IS_ERR(ctx)) { ++ pr_err("%s: rockchip pmu clk init failed\n", __func__); ++ return; ++ } ++ ++ rockchip_clk_register_plls(ctx, rv1126_pmu_pll_clks, ++ ARRAY_SIZE(rv1126_pmu_pll_clks), ++ RV1126_GRF_SOC_STATUS0); ++ ++ rockchip_clk_register_branches(ctx, rv1126_clk_pmu_branches, ++ ARRAY_SIZE(rv1126_clk_pmu_branches)); ++ ++ rockchip_register_softrst(np, 2, reg_base + RV1126_PMU_SOFTRST_CON(0), ++ ROCKCHIP_SOFTRST_HIWORD_MASK); ++ ++ rockchip_clk_of_add_provider(np, ctx); ++ ++ pmucru_ctx = ctx; ++} ++ ++CLK_OF_DECLARE(rv1126_cru_pmu, "rockchip,rv1126-pmucru", rv1126_pmu_clk_init); ++ ++static void __init rv1126_clk_init(struct device_node *np) ++{ ++ struct rockchip_clk_provider *ctx; ++ void __iomem *reg_base; ++ struct clk **cru_clks, **pmucru_clks; + + reg_base = of_iomap(np, 0); + if (!reg_base) { @@ -70575,7 +70070,7 @@ index 000000000..e177a3dd6 + return; + } + -+ rk1808_cru_base = reg_base; ++ rv1126_cru_base = reg_base; + + ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); + if (IS_ERR(ctx)) { @@ -70583,3912 +70078,2506 @@ index 000000000..e177a3dd6 + iounmap(reg_base); + return; + } -+ clks = ctx->clk_data.clks; ++ cru_clks = ctx->clk_data.clks; ++ pmucru_clks = pmucru_ctx->clk_data.clks; + -+ rockchip_clk_register_plls(ctx, rk1808_pll_clks, -+ ARRAY_SIZE(rk1808_pll_clks), -+ RK1808_GRF_SOC_STATUS0); -+ rockchip_clk_register_branches(ctx, rk1808_clk_branches, -+ ARRAY_SIZE(rk1808_clk_branches)); ++ rockchip_clk_register_plls(ctx, rv1126_pll_clks, ++ ARRAY_SIZE(rv1126_pll_clks), ++ RV1126_GRF_SOC_STATUS0); + + rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", -+ 3, clks[PLL_APLL], clks[PLL_GPLL], -+ &rk1808_cpuclk_data, rk1808_cpuclk_rates, -+ ARRAY_SIZE(rk1808_cpuclk_rates)); ++ 3, cru_clks[PLL_APLL], pmucru_clks[PLL_GPLL], ++ &rv1126_cpuclk_data, rv1126_cpuclk_rates, ++ ARRAY_SIZE(rv1126_cpuclk_rates)); + -+ rockchip_register_softrst(np, 16, reg_base + RK1808_SOFTRST_CON(0), ++ rockchip_clk_register_branches(ctx, rv1126_clk_branches, ++ ARRAY_SIZE(rv1126_clk_branches)); ++ ++ rockchip_register_softrst(np, 15, reg_base + RV1126_SOFTRST_CON(0), + ROCKCHIP_SOFTRST_HIWORD_MASK); + -+ rockchip_register_restart_notifier(ctx, RK1808_GLB_SRST_FST, NULL); ++ rockchip_register_restart_notifier(ctx, RV1126_GLB_SRST_FST, NULL); + + rockchip_clk_of_add_provider(np, ctx); + + atomic_notifier_chain_register(&panic_notifier_list, -+ &rk1808_clk_panic_block); ++ &rv1126_clk_panic_block); +} + -+CLK_OF_DECLARE(rk1808_cru, "rockchip,rk1808-cru", rk1808_clk_init); ++CLK_OF_DECLARE(rv1126_cru, "rockchip,rv1126-cru", rv1126_clk_init); + -+static int __init clk_rk1808_probe(struct platform_device *pdev) -+{ -+ struct device_node *np = pdev->dev.of_node; ++struct clk_rv1126_inits { ++ void (*inits)(struct device_node *np); ++}; + -+ rk1808_clk_init(np); ++static const struct clk_rv1126_inits clk_rv1126_pmucru_init = { ++ .inits = rv1126_pmu_clk_init, ++}; + -+ return 0; -+} ++static const struct clk_rv1126_inits clk_rv1126_cru_init = { ++ .inits = rv1126_clk_init, ++}; + -+static const struct of_device_id clk_rk1808_match_table[] = { ++static const struct of_device_id clk_rv1126_match_table[] = { + { -+ .compatible = "rockchip,rk1808-cru", ++ .compatible = "rockchip,rv1126-cru", ++ .data = &clk_rv1126_cru_init, ++ }, { ++ .compatible = "rockchip,rv1126-pmucru", ++ .data = &clk_rv1126_pmucru_init, + }, + { } +}; -+MODULE_DEVICE_TABLE(of, clk_rk1808_match_table); ++MODULE_DEVICE_TABLE(of, clk_rv1126_match_table); + -+static struct platform_driver clk_rk1808_driver = { ++static int __init clk_rv1126_probe(struct platform_device *pdev) ++{ ++ struct device_node *np = pdev->dev.of_node; ++ const struct clk_rv1126_inits *init_data; ++ ++ init_data = (struct clk_rv1126_inits *)of_device_get_match_data(&pdev->dev); ++ if (!init_data) ++ return -EINVAL; ++ ++ if (init_data->inits) ++ init_data->inits(np); ++ ++ return 0; ++} ++ ++static struct platform_driver clk_rv1126_driver = { + .driver = { -+ .name = "clk-rk1808", -+ .of_match_table = clk_rk1808_match_table, ++ .name = "clk-rv1126", ++ .of_match_table = clk_rv1126_match_table, + }, +}; -+builtin_platform_driver_probe(clk_rk1808_driver, clk_rk1808_probe); ++builtin_platform_driver_probe(clk_rv1126_driver, clk_rv1126_probe); + -+MODULE_DESCRIPTION("Rockchip RK1808 Clock Driver"); ++MODULE_DESCRIPTION("Rockchip RV1126 Clock Driver"); +MODULE_LICENSE("GPL"); -diff --git a/drivers/clk/rockchip/clk-rk3528.c b/drivers/clk/rockchip/clk-rk3528.c +diff --git a/drivers/clk/rockchip-oh/clk.c b/drivers/clk/rockchip-oh/clk.c new file mode 100644 -index 000000000..1b14cd57b +index 000000000..96984dea0 --- /dev/null -+++ b/drivers/clk/rockchip/clk-rk3528.c -@@ -0,0 +1,1174 @@ -+// SPDX-License-Identifier: GPL-2.0 ++++ b/drivers/clk/rockchip-oh/clk.c +@@ -0,0 +1,828 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later +/* -+ * Copyright (c) 2022 Rockchip Electronics Co. Ltd. -+ * Author: Joseph Chen ++ * Copyright (c) 2014 MundoReader S.L. ++ * Author: Heiko Stuebner ++ * ++ * Copyright (c) 2016 Rockchip Electronics Co. Ltd. ++ * Author: Xing Zheng ++ * ++ * based on ++ * ++ * samsung/clk.c ++ * Copyright (c) 2013 Samsung Electronics Co., Ltd. ++ * Copyright (c) 2013 Linaro Ltd. ++ * Author: Thomas Abraham + */ + ++#include ++#include +#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "clk.h" ++#include ++#include ++#include ++#include ++#include + -+/* A placeholder for rk3066 pll type. We are rk3328 pll type */ -+#define RK3528_GRF_SOC_STATUS0 0x1a0 ++#include "../clk-fractional-divider.h" ++#include "clk.h" + -+enum rk3528_plls { -+ apll, cpll, gpll, ppll, dpll, -+}; ++#ifdef MODULE ++static HLIST_HEAD(clk_ctx_list); ++#endif + +/* -+ * ## PLL attention. -+ * -+ * [FRAC PLL]: GPLL, PPLL, DPLL -+ * - frac mode: refdiv can be 1 or 2 only -+ * - int mode: refdiv has no special limit -+ * - VCO range: [950, 3800] MHZ -+ * -+ * [INT PLL]: CPLL, APLL -+ * - int mode: refdiv can be 1 or 2 only -+ * - VCO range: [475, 1900] MHZ -+ * -+ * [PPLL]: normal mode only. -+ * ++ * Register a clock branch. ++ * Most clock branches have a form like + * -+ * ## CRU access attention. ++ * src1 --|--\ ++ * |M |--[GATE]-[DIV]- ++ * src2 --|--/ + * -+ * pclk_cru => pclk_vo_root => aclk_vo_root -+ * pclk_cru_pcie => pclk_vpu_root => aclk_vpu_root -+ * pclk_cru_ddrphy => hclk_rkvdec_root => aclk_rkvdec_root ++ * sometimes without one of those components. + */ -+static struct rockchip_pll_rate_table rk3528_pll_rates[] = { -+ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ -+ RK3036_PLL_RATE(1896000000, 1, 79, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1800000000, 1, 75, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1704000000, 1, 71, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1188000000, 1, 99, 2, 1, 1, 0), /* GPLL */ -+ RK3036_PLL_RATE(1092000000, 2, 91, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1008000000, 1, 42, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1000000000, 1, 125, 3, 1, 1, 0), /* PPLL */ -+ RK3036_PLL_RATE(996000000, 2, 83, 1, 1, 1, 0), /* CPLL */ -+ RK3036_PLL_RATE(960000000, 1, 40, 1, 1, 1, 0), -+ RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), -+ RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), -+ RK3036_PLL_RATE(600000000, 1, 50, 2, 1, 1, 0), -+ RK3036_PLL_RATE(594000000, 2, 99, 2, 1, 1, 0), -+ RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), -+ RK3036_PLL_RATE(312000000, 1, 78, 6, 1, 1, 0), -+ RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), -+ RK3036_PLL_RATE(96000000, 1, 24, 3, 2, 1, 0), -+ { /* sentinel */ }, -+}; -+ -+#define RK3528_DIV_ACLK_M_CORE_MASK 0x1f -+#define RK3528_DIV_ACLK_M_CORE_SHIFT 11 -+#define RK3528_DIV_PCLK_DBG_MASK 0x1f -+#define RK3528_DIV_PCLK_DBG_SHIFT 1 ++static struct clk *rockchip_clk_register_branch(const char *name, ++ const char *const *parent_names, u8 num_parents, ++ void __iomem *base, ++ int muxdiv_offset, u8 mux_shift, u8 mux_width, u8 mux_flags, ++ u32 *mux_table, ++ int div_offset, u8 div_shift, u8 div_width, u8 div_flags, ++ struct clk_div_table *div_table, int gate_offset, ++ u8 gate_shift, u8 gate_flags, unsigned long flags, ++ spinlock_t *lock) ++{ ++ struct clk_hw *hw; ++ struct clk_mux *mux = NULL; ++ struct clk_gate *gate = NULL; ++ struct clk_divider *div = NULL; ++ const struct clk_ops *mux_ops = NULL, *div_ops = NULL, ++ *gate_ops = NULL; ++ int ret; + -+#define RK3528_CLKSEL39(_aclk_m_core) \ -+{ \ -+ .reg = RK3528_CLKSEL_CON(39), \ -+ .val = HIWORD_UPDATE(_aclk_m_core, RK3528_DIV_ACLK_M_CORE_MASK, \ -+ RK3528_DIV_ACLK_M_CORE_SHIFT), \ -+} ++ if (num_parents > 1) { ++ mux = kzalloc(sizeof(*mux), GFP_KERNEL); ++ if (!mux) ++ return ERR_PTR(-ENOMEM); + -+#define RK3528_CLKSEL40(_pclk_dbg) \ -+{ \ -+ .reg = RK3528_CLKSEL_CON(40), \ -+ .val = HIWORD_UPDATE(_pclk_dbg, RK3528_DIV_PCLK_DBG_MASK, \ -+ RK3528_DIV_PCLK_DBG_SHIFT), \ -+} ++ mux->reg = base + muxdiv_offset; ++ mux->shift = mux_shift; ++ mux->mask = BIT(mux_width) - 1; ++ mux->flags = mux_flags; ++ mux->table = mux_table; ++ mux->lock = lock; ++ mux_ops = (mux_flags & CLK_MUX_READ_ONLY) ? &clk_mux_ro_ops ++ : &clk_mux_ops; ++ } + -+/* SIGN-OFF: _aclk_m_core: 550M, _pclk_dbg: 137.5M, */ -+#define RK3528_CPUCLK_RATE(_prate, _aclk_m_core, _pclk_dbg) \ -+{ \ -+ .prate = _prate, \ -+ .divs = { \ -+ RK3528_CLKSEL39(_aclk_m_core), \ -+ RK3528_CLKSEL40(_pclk_dbg), \ -+ }, \ -+} ++ if (gate_offset >= 0) { ++ gate = kzalloc(sizeof(*gate), GFP_KERNEL); ++ if (!gate) { ++ ret = -ENOMEM; ++ goto err_gate; ++ } + -+static struct rockchip_cpuclk_rate_table rk3528_cpuclk_rates[] __initdata = { -+ /* APLL(CPU) rate <= 1900M, due to APLL VCO limit */ -+ RK3528_CPUCLK_RATE(1896000000, 1, 13), -+ RK3528_CPUCLK_RATE(1800000000, 1, 12), -+ RK3528_CPUCLK_RATE(1704000000, 1, 11), -+ RK3528_CPUCLK_RATE(1608000000, 1, 11), -+ RK3528_CPUCLK_RATE(1512000000, 1, 11), -+ RK3528_CPUCLK_RATE(1416000000, 1, 9), -+ RK3528_CPUCLK_RATE(1296000000, 1, 8), -+ RK3528_CPUCLK_RATE(1200000000, 1, 8), -+ RK3528_CPUCLK_RATE(1188000000, 1, 8), -+ RK3528_CPUCLK_RATE(1092000000, 1, 7), -+ RK3528_CPUCLK_RATE(1008000000, 1, 6), -+ RK3528_CPUCLK_RATE(1000000000, 1, 6), -+ RK3528_CPUCLK_RATE(996000000, 1, 6), -+ RK3528_CPUCLK_RATE(960000000, 1, 6), -+ RK3528_CPUCLK_RATE(912000000, 1, 6), -+ RK3528_CPUCLK_RATE(816000000, 1, 5), -+ RK3528_CPUCLK_RATE(600000000, 1, 3), -+ RK3528_CPUCLK_RATE(594000000, 1, 3), -+ RK3528_CPUCLK_RATE(408000000, 1, 2), -+ RK3528_CPUCLK_RATE(312000000, 1, 2), -+ RK3528_CPUCLK_RATE(216000000, 1, 1), -+ RK3528_CPUCLK_RATE(96000000, 1, 0), -+}; ++ gate->flags = gate_flags; ++ gate->reg = base + gate_offset; ++ gate->bit_idx = gate_shift; ++ gate->lock = lock; ++ gate_ops = &clk_gate_ops; ++ } + -+static const struct rockchip_cpuclk_reg_data rk3528_cpuclk_data = { -+ .core_reg[0] = RK3528_CLKSEL_CON(39), -+ .div_core_shift[0] = 5, -+ .div_core_mask[0] = 0x1f, -+ .num_cores = 1, -+ .mux_core_alt = 1, -+ .mux_core_main = 0, -+ .mux_core_shift = 10, -+ .mux_core_mask = 0x1, -+}; ++ if (div_width > 0) { ++ div = kzalloc(sizeof(*div), GFP_KERNEL); ++ if (!div) { ++ ret = -ENOMEM; ++ goto err_div; ++ } + -+PNAME(mux_pll_p) = { "xin24m" }; -+PNAME(mux_24m_32k_p) = { "xin24m", "clk_32k" }; -+PNAME(mux_gpll_cpll_p) = { "gpll", "cpll" }; -+PNAME(mux_gpll_cpll_xin24m_p) = { "gpll", "cpll", "xin24m" }; -+PNAME(mux_100m_50m_24m_p) = { "clk_100m_src", "clk_50m_src", "xin24m" }; -+PNAME(mux_150m_100m_24m_p) = { "clk_150m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_200m_100m_24m_p) = { "clk_200m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_200m_100m_50m_24m_p) = { "clk_200m_src", "clk_100m_src", "clk_50m_src", "xin24m" }; -+PNAME(mux_300m_200m_100m_24m_p) = { "clk_300m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_339m_200m_100m_24m_p) = { "clk_339m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_500m_200m_100m_24m_p) = { "clk_500m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_500m_300m_100m_24m_p) = { "clk_500m_src", "clk_300m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_600m_300m_200m_24m_p) = { "clk_600m_src", "clk_300m_src", "clk_200m_src", "xin24m" }; -+PNAME(aclk_gpu_p) = { "aclk_gpu_root", "clk_gpu_pvtpll_src" }; -+PNAME(aclk_rkvdec_pvtmux_root_p) = { "aclk_rkvdec_root", "clk_rkvdec_pvtpll_src" }; -+PNAME(clk_i2c2_p) = { "clk_200m_src", "clk_100m_src", "xin24m", "clk_32k" }; -+PNAME(clk_ref_pcie_inner_phy_p) = { "clk_ppll_100m_src", "xin24m" }; -+PNAME(dclk_vop0_p) = { "dclk_vop_src0", "clk_hdmiphy_pixel_io" }; -+PNAME(mclk_i2s0_2ch_sai_src_p) = { "clk_i2s0_2ch_src", "clk_i2s0_2ch_frac", "xin12m" }; -+PNAME(mclk_i2s1_8ch_sai_src_p) = { "clk_i2s1_8ch_src", "clk_i2s1_8ch_frac", "xin12m" }; -+PNAME(mclk_i2s2_2ch_sai_src_p) = { "clk_i2s2_2ch_src", "clk_i2s2_2ch_frac", "xin12m" }; -+PNAME(mclk_i2s3_8ch_sai_src_p) = { "clk_i2s3_8ch_src", "clk_i2s3_8ch_frac", "xin12m" }; -+PNAME(mclk_sai_i2s0_p) = { "mclk_i2s0_2ch_sai_src", "i2s0_mclkin" }; -+PNAME(mclk_sai_i2s1_p) = { "mclk_i2s1_8ch_sai_src", "i2s1_mclkin" }; -+PNAME(mclk_spdif_src_p) = { "clk_spdif_src", "clk_spdif_frac", "xin12m" }; -+PNAME(sclk_uart0_src_p) = { "clk_uart0_src", "clk_uart0_frac", "xin24m" }; -+PNAME(sclk_uart1_src_p) = { "clk_uart1_src", "clk_uart1_frac", "xin24m" }; -+PNAME(sclk_uart2_src_p) = { "clk_uart2_src", "clk_uart2_frac", "xin24m" }; -+PNAME(sclk_uart3_src_p) = { "clk_uart3_src", "clk_uart3_frac", "xin24m" }; -+PNAME(sclk_uart4_src_p) = { "clk_uart4_src", "clk_uart4_frac", "xin24m" }; -+PNAME(sclk_uart5_src_p) = { "clk_uart5_src", "clk_uart5_frac", "xin24m" }; -+PNAME(sclk_uart6_src_p) = { "clk_uart6_src", "clk_uart6_frac", "xin24m" }; -+PNAME(sclk_uart7_src_p) = { "clk_uart7_src", "clk_uart7_frac", "xin24m" }; -+PNAME(clk_32k_p) = { "xin_osc0_div", "clk_pvtm_32k" }; ++ div->flags = div_flags; ++ if (div_offset) ++ div->reg = base + div_offset; ++ else ++ div->reg = base + muxdiv_offset; ++ div->shift = div_shift; ++ div->width = div_width; ++ div->lock = lock; ++ div->table = div_table; ++ div_ops = (div_flags & CLK_DIVIDER_READ_ONLY) ++ ? &clk_divider_ro_ops ++ : &clk_divider_ops; ++ } + -+/* Pass 0 to PLL() '_lshift' as a placeholder for rk3066 pll type. We are rk3328 pll type */ -+static struct rockchip_pll_clock rk3528_pll_clks[] __initdata = { -+ [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, -+ CLK_IS_CRITICAL, RK3528_PLL_CON(0), -+ RK3528_MODE_CON, 0, 0, 0, rk3528_pll_rates), ++ hw = clk_hw_register_composite(NULL, name, parent_names, num_parents, ++ mux ? &mux->hw : NULL, mux_ops, ++ div ? &div->hw : NULL, div_ops, ++ gate ? &gate->hw : NULL, gate_ops, ++ flags); ++ if (IS_ERR(hw)) { ++ kfree(div); ++ kfree(gate); ++ return ERR_CAST(hw); ++ } + -+ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, -+ CLK_IS_CRITICAL, RK3528_PLL_CON(8), -+ RK3528_MODE_CON, 2, 0, 0, rk3528_pll_rates), ++ return hw->clk; ++err_div: ++ kfree(gate); ++err_gate: ++ kfree(mux); ++ return ERR_PTR(ret); ++} + -+ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, -+ CLK_IS_CRITICAL, RK3528_PLL_CON(24), -+ RK3528_MODE_CON, 4, 0, 0, rk3528_pll_rates), ++struct rockchip_clk_frac { ++ struct notifier_block clk_nb; ++ struct clk_fractional_divider div; ++ struct clk_gate gate; + -+ [ppll] = PLL(pll_rk3328, PLL_PPLL, "ppll", mux_pll_p, -+ CLK_IS_CRITICAL, RK3528_PCIE_PLL_CON(32), -+ RK3528_MODE_CON, 6, 0, -+ ROCKCHIP_PLL_FIXED_MODE, rk3528_pll_rates), ++ struct clk_mux mux; ++ const struct clk_ops *mux_ops; ++ int mux_frac_idx; + -+ [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p, -+ CLK_IGNORE_UNUSED, RK3528_DDRPHY_PLL_CON(16), -+ RK3528_DDRPHY_MODE_CON, 0, 0, 0, rk3528_pll_rates), ++ bool rate_change_remuxed; ++ int rate_change_idx; +}; + -+#define MFLAGS CLK_MUX_HIWORD_MASK -+#define DFLAGS CLK_DIVIDER_HIWORD_MASK -+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) ++#define to_rockchip_clk_frac_nb(nb) \ ++ container_of(nb, struct rockchip_clk_frac, clk_nb) + -+static struct rockchip_clk_branch rk3528_uart0_fracmux __initdata = -+ MUX(CLK_UART0, "clk_uart0", sclk_uart0_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(6), 0, 2, MFLAGS); ++static int rockchip_clk_frac_notifier_cb(struct notifier_block *nb, ++ unsigned long event, void *data) ++{ ++ struct clk_notifier_data *ndata = data; ++ struct rockchip_clk_frac *frac = to_rockchip_clk_frac_nb(nb); ++ struct clk_mux *frac_mux = &frac->mux; ++ int ret = 0; + -+static struct rockchip_clk_branch rk3528_uart1_fracmux __initdata = -+ MUX(CLK_UART1, "clk_uart1", sclk_uart1_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(8), 0, 2, MFLAGS); ++ pr_debug("%s: event %lu, old_rate %lu, new_rate: %lu\n", ++ __func__, event, ndata->old_rate, ndata->new_rate); ++ if (event == PRE_RATE_CHANGE) { ++ frac->rate_change_idx = ++ frac->mux_ops->get_parent(&frac_mux->hw); ++ if (frac->rate_change_idx != frac->mux_frac_idx) { ++ frac->mux_ops->set_parent(&frac_mux->hw, ++ frac->mux_frac_idx); ++ frac->rate_change_remuxed = 1; ++ } ++ } else if (event == POST_RATE_CHANGE) { ++ /* ++ * The POST_RATE_CHANGE notifier runs directly after the ++ * divider clock is set in clk_change_rate, so we'll have ++ * remuxed back to the original parent before clk_change_rate ++ * reaches the mux itself. ++ */ ++ if (frac->rate_change_remuxed) { ++ frac->mux_ops->set_parent(&frac_mux->hw, ++ frac->rate_change_idx); ++ frac->rate_change_remuxed = 0; ++ } ++ } + -+static struct rockchip_clk_branch rk3528_uart2_fracmux __initdata = -+ MUX(CLK_UART2, "clk_uart2", sclk_uart2_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(10), 0, 2, MFLAGS); ++ return notifier_from_errno(ret); ++} + -+static struct rockchip_clk_branch rk3528_uart3_fracmux __initdata = -+ MUX(CLK_UART3, "clk_uart3", sclk_uart3_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(12), 0, 2, MFLAGS); ++/* ++ * fractional divider must set that denominator is 20 times larger than ++ * numerator to generate precise clock frequency. ++ */ ++static void rockchip_fractional_approximation(struct clk_hw *hw, ++ unsigned long rate, unsigned long *parent_rate, ++ unsigned long *m, unsigned long *n) ++{ ++ struct clk_fractional_divider *fd = to_clk_fd(hw); ++ unsigned long p_rate, p_parent_rate; ++ struct clk_hw *p_parent; + -+static struct rockchip_clk_branch rk3528_uart4_fracmux __initdata = -+ MUX(CLK_UART4, "clk_uart4", sclk_uart4_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(14), 0, 2, MFLAGS); ++ if (rate == 0) { ++ pr_warn("%s p_rate(%ld), rate(%ld), maybe invalid frequency setting!\n", ++ clk_hw_get_name(hw), *parent_rate, rate); ++ *m = 0; ++ *n = 1; ++ return; ++ } + -+static struct rockchip_clk_branch rk3528_uart5_fracmux __initdata = -+ MUX(CLK_UART5, "clk_uart5", sclk_uart5_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(16), 0, 2, MFLAGS); ++ p_rate = clk_hw_get_rate(clk_hw_get_parent(hw)); ++ if ((rate * 20 > p_rate) && (p_rate % rate != 0)) { ++ p_parent = clk_hw_get_parent(clk_hw_get_parent(hw)); ++ if (!p_parent) { ++ *parent_rate = p_rate; ++ } else { ++ p_parent_rate = clk_hw_get_rate(p_parent); ++ *parent_rate = p_parent_rate; ++ } + -+static struct rockchip_clk_branch rk3528_uart6_fracmux __initdata = -+ MUX(CLK_UART6, "clk_uart6", sclk_uart6_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(18), 0, 2, MFLAGS); ++ if (*parent_rate == 0) { ++ pr_warn("%s p_rate(%ld), rate(%ld), maybe invalid frequency setting!\n", ++ clk_hw_get_name(hw), *parent_rate, rate); ++ *m = 0; ++ *n = 1; ++ return; ++ } + -+static struct rockchip_clk_branch rk3528_uart7_fracmux __initdata = -+ MUX(CLK_UART7, "clk_uart7", sclk_uart7_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(20), 0, 2, MFLAGS); ++ if (*parent_rate < rate * 20) { ++ /* ++ * Fractional frequency divider to do ++ * integer frequency divider does not ++ * need 20 times the limit. ++ */ ++ if (!(*parent_rate % rate)) { ++ *m = 1; ++ *n = *parent_rate / rate; ++ return; ++ } else if (!(fd->flags & CLK_FRAC_DIVIDER_NO_LIMIT)) { ++ pr_warn("%s p_rate(%ld) is low than rate(%ld)*20, use integer or half-div\n", ++ clk_hw_get_name(hw), ++ *parent_rate, rate); ++ *m = 0; ++ *n = 1; ++ return; ++ } ++ } ++ } + -+static struct rockchip_clk_branch mclk_i2s0_2ch_sai_src_fracmux __initdata = -+ MUX(MCLK_I2S0_2CH_SAI_SRC_PRE, "mclk_i2s0_2ch_sai_src_pre", mclk_i2s0_2ch_sai_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(22), 0, 2, MFLAGS); ++ fd->flags |= CLK_FRAC_DIVIDER_POWER_OF_TWO_PS; + -+static struct rockchip_clk_branch mclk_i2s1_8ch_sai_src_fracmux __initdata = -+ MUX(MCLK_I2S1_8CH_SAI_SRC_PRE, "mclk_i2s1_8ch_sai_src_pre", mclk_i2s1_8ch_sai_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(26), 0, 2, MFLAGS); ++ clk_fractional_divider_general_approximation(hw, rate, parent_rate, m, n); ++} + -+static struct rockchip_clk_branch mclk_i2s2_2ch_sai_src_fracmux __initdata = -+ MUX(MCLK_I2S2_2CH_SAI_SRC_PRE, "mclk_i2s2_2ch_sai_src_pre", mclk_i2s2_2ch_sai_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(28), 0, 2, MFLAGS); ++static struct clk *rockchip_clk_register_frac_branch( ++ struct rockchip_clk_provider *ctx, const char *name, ++ const char *const *parent_names, u8 num_parents, ++ void __iomem *base, int muxdiv_offset, u8 div_flags, ++ int gate_offset, u8 gate_shift, u8 gate_flags, ++ unsigned long flags, struct rockchip_clk_branch *child, ++ spinlock_t *lock) ++{ ++ struct clk_hw *hw; ++ struct rockchip_clk_frac *frac; ++ struct clk_gate *gate = NULL; ++ struct clk_fractional_divider *div = NULL; ++ const struct clk_ops *div_ops = NULL, *gate_ops = NULL; + -+static struct rockchip_clk_branch mclk_i2s3_8ch_sai_src_fracmux __initdata = -+ MUX(MCLK_I2S3_8CH_SAI_SRC_PRE, "mclk_i2s3_8ch_sai_src_pre", mclk_i2s3_8ch_sai_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(24), 0, 2, MFLAGS); ++ if (muxdiv_offset < 0) ++ return ERR_PTR(-EINVAL); + -+static struct rockchip_clk_branch mclk_spdif_src_fracmux __initdata = -+ MUX(MCLK_SDPDIF_SRC_PRE, "mclk_spdif_src_pre", mclk_spdif_src_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(32), 0, 2, MFLAGS); ++ if (child && child->branch_type != branch_mux) { ++ pr_err("%s: fractional child clock for %s can only be a mux\n", ++ __func__, name); ++ return ERR_PTR(-EINVAL); ++ } + -+/* -+ * CRU Clock-Architecture -+ */ -+static struct rockchip_clk_branch rk3528_clk_branches[] __initdata = { -+ /* top */ -+ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), ++ frac = kzalloc(sizeof(*frac), GFP_KERNEL); ++ if (!frac) ++ return ERR_PTR(-ENOMEM); + -+ COMPOSITE(CLK_MATRIX_250M_SRC, "clk_250m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(1), 15, 1, MFLAGS, 10, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 5, GFLAGS), -+ COMPOSITE(CLK_MATRIX_500M_SRC, "clk_500m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(3), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 10, GFLAGS), -+ COMPOSITE_NOMUX(CLK_MATRIX_50M_SRC, "clk_50m_src", "cpll", CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(0), 2, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE_NOMUX(CLK_MATRIX_100M_SRC, "clk_100m_src", "cpll", CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(0), 7, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE_NOMUX(CLK_MATRIX_150M_SRC, "clk_150m_src", "gpll", CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(1), 0, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 3, GFLAGS), -+ COMPOSITE_NOMUX(CLK_MATRIX_200M_SRC, "clk_200m_src", "gpll", CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(1), 5, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 4, GFLAGS), -+ COMPOSITE_NOMUX(CLK_MATRIX_300M_SRC, "clk_300m_src", "gpll", CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(2), 0, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 6, GFLAGS), -+ COMPOSITE_NOMUX_HALFDIV(CLK_MATRIX_339M_SRC, "clk_339m_src", "gpll", CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(2), 5, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 7, GFLAGS), -+ COMPOSITE_NOMUX(CLK_MATRIX_400M_SRC, "clk_400m_src", "gpll", CLK_IGNORE_UNUSED, -+ RK3528_CLKSEL_CON(2), 10, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 8, GFLAGS), -+ COMPOSITE_NOMUX(CLK_MATRIX_600M_SRC, "clk_600m_src", "gpll", CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(4), 0, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 11, GFLAGS), -+ COMPOSITE(DCLK_VOP_SRC0, "dclk_vop_src0", mux_gpll_cpll_p, 0, -+ RK3528_CLKSEL_CON(32), 10, 1, MFLAGS, 2, 8, DFLAGS, -+ RK3528_CLKGATE_CON(3), 7, GFLAGS), -+ COMPOSITE(DCLK_VOP_SRC1, "dclk_vop_src1", mux_gpll_cpll_p, 0, -+ RK3528_CLKSEL_CON(33), 8, 1, MFLAGS, 0, 8, DFLAGS, -+ RK3528_CLKGATE_CON(3), 8, GFLAGS), -+ COMPOSITE_NOMUX(CLK_HSM, "clk_hsm", "xin24m", 0, -+ RK3528_CLKSEL_CON(36), 5, 5, DFLAGS, -+ RK3528_CLKGATE_CON(3), 13, GFLAGS), ++ if (gate_offset >= 0) { ++ gate = &frac->gate; ++ gate->flags = gate_flags; ++ gate->reg = base + gate_offset; ++ gate->bit_idx = gate_shift; ++ gate->lock = lock; ++ gate_ops = &clk_gate_ops; ++ } + -+ COMPOSITE_NOMUX(CLK_UART0_SRC, "clk_uart0_src", "gpll", 0, -+ RK3528_CLKSEL_CON(4), 5, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 12, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART0_FRAC, "clk_uart0_frac", "clk_uart0_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(5), 0, -+ RK3528_CLKGATE_CON(0), 13, GFLAGS, &rk3528_uart0_fracmux), -+ GATE(SCLK_UART0, "sclk_uart0", "clk_uart0", 0, -+ RK3528_CLKGATE_CON(0), 14, GFLAGS), ++ div = &frac->div; ++ div->flags = div_flags; ++ div->reg = base + muxdiv_offset; ++ div->mshift = 16; ++ div->mwidth = 16; ++ div->mmask = GENMASK(div->mwidth - 1, 0) << div->mshift; ++ div->nshift = 0; ++ div->nwidth = 16; ++ div->nmask = GENMASK(div->nwidth - 1, 0) << div->nshift; ++ div->lock = lock; ++ div->approximation = rockchip_fractional_approximation; ++ div_ops = &clk_fractional_divider_ops; + -+ COMPOSITE_NOMUX(CLK_UART1_SRC, "clk_uart1_src", "gpll", 0, -+ RK3528_CLKSEL_CON(6), 2, 5, DFLAGS, -+ RK3528_CLKGATE_CON(0), 15, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART1_FRAC, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(7), 0, -+ RK3528_CLKGATE_CON(1), 0, GFLAGS, &rk3528_uart1_fracmux), -+ GATE(SCLK_UART1, "sclk_uart1", "clk_uart1", 0, -+ RK3528_CLKGATE_CON(1), 1, GFLAGS), ++ hw = clk_hw_register_composite(NULL, name, parent_names, num_parents, ++ NULL, NULL, ++ &div->hw, div_ops, ++ gate ? &gate->hw : NULL, gate_ops, ++ flags | CLK_SET_RATE_UNGATE); ++ if (IS_ERR(hw)) { ++ kfree(frac); ++ return ERR_CAST(hw); ++ } + -+ COMPOSITE_NOMUX(CLK_UART2_SRC, "clk_uart2_src", "gpll", 0, -+ RK3528_CLKSEL_CON(8), 2, 5, DFLAGS, -+ RK3528_CLKGATE_CON(1), 2, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART2_FRAC, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(9), 0, -+ RK3528_CLKGATE_CON(1), 3, GFLAGS, &rk3528_uart2_fracmux), -+ GATE(SCLK_UART2, "sclk_uart2", "clk_uart2", 0, -+ RK3528_CLKGATE_CON(1), 4, GFLAGS), ++ if (child) { ++ struct clk_mux *frac_mux = &frac->mux; ++ struct clk_init_data init; ++ struct clk *mux_clk; ++ int ret; + -+ COMPOSITE_NOMUX(CLK_UART3_SRC, "clk_uart3_src", "gpll", 0, -+ RK3528_CLKSEL_CON(10), 2, 5, DFLAGS, -+ RK3528_CLKGATE_CON(1), 5, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART3_FRAC, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(11), 0, -+ RK3528_CLKGATE_CON(1), 6, GFLAGS, &rk3528_uart3_fracmux), -+ GATE(SCLK_UART3, "sclk_uart3", "clk_uart3", 0, -+ RK3528_CLKGATE_CON(1), 7, GFLAGS), ++ frac->mux_frac_idx = match_string(child->parent_names, ++ child->num_parents, name); ++ frac->mux_ops = &clk_mux_ops; ++ frac->clk_nb.notifier_call = rockchip_clk_frac_notifier_cb; + -+ COMPOSITE_NOMUX(CLK_UART4_SRC, "clk_uart4_src", "gpll", 0, -+ RK3528_CLKSEL_CON(12), 2, 5, DFLAGS, -+ RK3528_CLKGATE_CON(1), 8, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART4_FRAC, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(13), 0, -+ RK3528_CLKGATE_CON(1), 9, GFLAGS, &rk3528_uart4_fracmux), -+ GATE(SCLK_UART4, "sclk_uart4", "clk_uart4", 0, -+ RK3528_CLKGATE_CON(1), 10, GFLAGS), ++ frac_mux->reg = base + child->muxdiv_offset; ++ frac_mux->shift = child->mux_shift; ++ frac_mux->mask = BIT(child->mux_width) - 1; ++ frac_mux->flags = child->mux_flags; ++ if (child->mux_table) ++ frac_mux->table = child->mux_table; ++ frac_mux->lock = lock; ++ frac_mux->hw.init = &init; + -+ COMPOSITE_NOMUX(CLK_UART5_SRC, "clk_uart5_src", "gpll", 0, -+ RK3528_CLKSEL_CON(14), 2, 5, DFLAGS, -+ RK3528_CLKGATE_CON(1), 11, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART5_FRAC, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(15), 0, -+ RK3528_CLKGATE_CON(1), 12, GFLAGS, &rk3528_uart5_fracmux), -+ GATE(SCLK_UART5, "sclk_uart5", "clk_uart5", 0, -+ RK3528_CLKGATE_CON(1), 13, GFLAGS), ++ init.name = child->name; ++ init.flags = child->flags | CLK_SET_RATE_PARENT; ++ init.ops = frac->mux_ops; ++ init.parent_names = child->parent_names; ++ init.num_parents = child->num_parents; + -+ COMPOSITE_NOMUX(CLK_UART6_SRC, "clk_uart6_src", "gpll", 0, -+ RK3528_CLKSEL_CON(16), 2, 5, DFLAGS, -+ RK3528_CLKGATE_CON(1), 14, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART6_FRAC, "clk_uart6_frac", "clk_uart6_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(17), 0, -+ RK3528_CLKGATE_CON(1), 15, GFLAGS, &rk3528_uart6_fracmux), -+ GATE(SCLK_UART6, "sclk_uart6", "clk_uart6", 0, -+ RK3528_CLKGATE_CON(2), 0, GFLAGS), ++ mux_clk = clk_register(NULL, &frac_mux->hw); ++ if (IS_ERR(mux_clk)) { ++ kfree(frac); ++ return mux_clk; ++ } + -+ COMPOSITE_NOMUX(CLK_UART7_SRC, "clk_uart7_src", "gpll", 0, -+ RK3528_CLKSEL_CON(18), 2, 5, DFLAGS, -+ RK3528_CLKGATE_CON(2), 1, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART7_FRAC, "clk_uart7_frac", "clk_uart7_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(19), 0, -+ RK3528_CLKGATE_CON(2), 2, GFLAGS, &rk3528_uart7_fracmux), -+ GATE(SCLK_UART7, "sclk_uart7", "clk_uart7", 0, -+ RK3528_CLKGATE_CON(2), 3, GFLAGS), ++ rockchip_clk_add_lookup(ctx, mux_clk, child->id); + -+ COMPOSITE_NOMUX(CLK_I2S0_2CH_SRC, "clk_i2s0_2ch_src", "gpll", 0, -+ RK3528_CLKSEL_CON(20), 8, 5, DFLAGS, -+ RK3528_CLKGATE_CON(2), 5, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S0_2CH_FRAC, "clk_i2s0_2ch_frac", "clk_i2s0_2ch_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(21), 0, -+ RK3528_CLKGATE_CON(2), 6, GFLAGS, &mclk_i2s0_2ch_sai_src_fracmux), -+ GATE(MCLK_I2S0_2CH_SAI_SRC, "mclk_i2s0_2ch_sai_src", "mclk_i2s0_2ch_sai_src_pre", 0, -+ RK3528_CLKGATE_CON(2), 7, GFLAGS), ++ /* notifier on the fraction divider to catch rate changes */ ++ if (frac->mux_frac_idx >= 0) { ++ pr_debug("%s: found fractional parent in mux at pos %d\n", ++ __func__, frac->mux_frac_idx); ++ ret = clk_notifier_register(hw->clk, &frac->clk_nb); ++ if (ret) ++ pr_err("%s: failed to register clock notifier for %s\n", ++ __func__, name); ++ } else { ++ pr_warn("%s: could not find %s as parent of %s, rate changes may not work\n", ++ __func__, name, child->name); ++ } ++ } + -+ COMPOSITE_NOMUX(CLK_I2S1_8CH_SRC, "clk_i2s1_8ch_src", "gpll", 0, -+ RK3528_CLKSEL_CON(24), 3, 5, DFLAGS, -+ RK3528_CLKGATE_CON(2), 11, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S1_8CH_FRAC, "clk_i2s1_8ch_frac", "clk_i2s1_8ch_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(25), 0, -+ RK3528_CLKGATE_CON(2), 12, GFLAGS, &mclk_i2s1_8ch_sai_src_fracmux), -+ GATE(MCLK_I2S1_8CH_SAI_SRC, "mclk_i2s1_8ch_sai_src", "mclk_i2s1_8ch_sai_src_pre", 0, -+ RK3528_CLKGATE_CON(2), 13, GFLAGS), ++ return hw->clk; ++} + -+ COMPOSITE_NOMUX(CLK_I2S2_2CH_SRC, "clk_i2s2_2ch_src", "gpll", 0, -+ RK3528_CLKSEL_CON(26), 3, 5, DFLAGS, -+ RK3528_CLKGATE_CON(2), 14, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S2_2CH_FRAC, "clk_i2s2_2ch_frac", "clk_i2s2_2ch_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(27), 0, -+ RK3528_CLKGATE_CON(2), 15, GFLAGS, &mclk_i2s2_2ch_sai_src_fracmux), -+ GATE(MCLK_I2S2_2CH_SAI_SRC, "mclk_i2s2_2ch_sai_src", "mclk_i2s2_2ch_sai_src_pre", 0, -+ RK3528_CLKGATE_CON(3), 0, GFLAGS), ++static struct clk *rockchip_clk_register_factor_branch(const char *name, ++ const char *const *parent_names, u8 num_parents, ++ void __iomem *base, unsigned int mult, unsigned int div, ++ int gate_offset, u8 gate_shift, u8 gate_flags, ++ unsigned long flags, spinlock_t *lock) ++{ ++ struct clk_hw *hw; ++ struct clk_gate *gate = NULL; ++ struct clk_fixed_factor *fix = NULL; + -+ COMPOSITE_NOMUX(CLK_I2S3_8CH_SRC, "clk_i2s3_8ch_src", "gpll", 0, -+ RK3528_CLKSEL_CON(22), 3, 5, DFLAGS, -+ RK3528_CLKGATE_CON(2), 8, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S3_8CH_FRAC, "clk_i2s3_8ch_frac", "clk_i2s3_8ch_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(23), 0, -+ RK3528_CLKGATE_CON(2), 9, GFLAGS, &mclk_i2s3_8ch_sai_src_fracmux), -+ GATE(MCLK_I2S3_8CH_SAI_SRC, "mclk_i2s3_8ch_sai_src", "mclk_i2s3_8ch_sai_src_pre", 0, -+ RK3528_CLKGATE_CON(2), 10, GFLAGS), ++ /* without gate, register a simple factor clock */ ++ if (gate_offset == 0) { ++ return clk_register_fixed_factor(NULL, name, ++ parent_names[0], flags, mult, ++ div); ++ } + -+ COMPOSITE_NOMUX(CLK_SPDIF_SRC, "clk_spdif_src", "gpll", 0, -+ RK3528_CLKSEL_CON(30), 2, 5, DFLAGS, -+ RK3528_CLKGATE_CON(3), 4, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_SPDIF_FRAC, "clk_spdif_frac", "clk_spdif_src", CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(31), 0, -+ RK3528_CLKGATE_CON(3), 5, GFLAGS, &mclk_spdif_src_fracmux), -+ GATE(MCLK_SPDIF_SRC, "mclk_spdif_src", "mclk_spdif_src_pre", 0, -+ RK3528_CLKGATE_CON(3), 6, GFLAGS), ++ gate = kzalloc(sizeof(*gate), GFP_KERNEL); ++ if (!gate) ++ return ERR_PTR(-ENOMEM); + -+ /* bus */ -+ COMPOSITE_NODIV(ACLK_BUS_M_ROOT, "aclk_bus_m_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(43), 12, 2, MFLAGS, -+ RK3528_CLKGATE_CON(8), 7, GFLAGS), -+ GATE(ACLK_GIC, "aclk_gic", "aclk_bus_m_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(9), 1, GFLAGS), ++ gate->flags = gate_flags; ++ gate->reg = base + gate_offset; ++ gate->bit_idx = gate_shift; ++ gate->lock = lock; + -+ COMPOSITE_NODIV(ACLK_BUS_ROOT, "aclk_bus_root", mux_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(43), 6, 2, MFLAGS, -+ RK3528_CLKGATE_CON(8), 4, GFLAGS), -+ GATE(ACLK_SPINLOCK, "aclk_spinlock", "aclk_bus_root", 0, -+ RK3528_CLKGATE_CON(9), 2, GFLAGS), -+ GATE(ACLK_DMAC, "aclk_dmac", "aclk_bus_root", 0, -+ RK3528_CLKGATE_CON(9), 4, GFLAGS), -+ GATE(ACLK_DCF, "aclk_dcf", "aclk_bus_root", 0, -+ RK3528_CLKGATE_CON(11), 11, GFLAGS), -+ COMPOSITE(ACLK_BUS_VOPGL_ROOT, "aclk_bus_vopgl_root", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(43), 3, 1, MFLAGS, 0, 3, DFLAGS, -+ RK3528_CLKGATE_CON(8), 0, GFLAGS), -+ COMPOSITE_NODIV(ACLK_BUS_H_ROOT, "aclk_bus_h_root", mux_500m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(43), 4, 2, MFLAGS, -+ RK3528_CLKGATE_CON(8), 2, GFLAGS), -+ GATE(ACLK_DMA2DDR, "aclk_dma2ddr", "aclk_bus_h_root", 0, -+ RK3528_CLKGATE_CON(10), 14, GFLAGS), -+ -+ COMPOSITE_NODIV(HCLK_BUS_ROOT, "hclk_bus_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(43), 8, 2, MFLAGS, -+ RK3528_CLKGATE_CON(8), 5, GFLAGS), -+ -+ COMPOSITE_NODIV(PCLK_BUS_ROOT, "pclk_bus_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(43), 10, 2, MFLAGS, -+ RK3528_CLKGATE_CON(8), 6, GFLAGS), -+ GATE(PCLK_DFT2APB, "pclk_dft2apb", "pclk_bus_root", 0, -+ RK3528_CLKGATE_CON(8), 13, GFLAGS), -+ GATE(PCLK_BUS_GRF, "pclk_bus_grf", "pclk_bus_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(8), 15, GFLAGS), -+ GATE(PCLK_TIMER, "pclk_timer", "pclk_bus_root", 0, -+ RK3528_CLKGATE_CON(9), 5, GFLAGS), -+ GATE(PCLK_JDBCK_DAP, "pclk_jdbck_dap", "pclk_bus_root", 0, -+ RK3528_CLKGATE_CON(9), 12, GFLAGS), -+ GATE(PCLK_WDT_NS, "pclk_wdt_ns", "pclk_bus_root", 0, -+ RK3528_CLKGATE_CON(9), 15, GFLAGS), -+ GATE(PCLK_UART0, "pclk_uart0", "pclk_bus_root", 0, -+ RK3528_CLKGATE_CON(10), 7, GFLAGS), -+ GATE(PCLK_PWM0, "pclk_pwm0", "pclk_bus_root", 0, -+ RK3528_CLKGATE_CON(11), 4, GFLAGS), -+ GATE(PCLK_PWM1, "pclk_pwm1", "pclk_bus_root", 0, -+ RK3528_CLKGATE_CON(11), 7, GFLAGS), -+ GATE(PCLK_DMA2DDR, "pclk_dma2ddr", "pclk_bus_root", 0, -+ RK3528_CLKGATE_CON(10), 13, GFLAGS), -+ GATE(PCLK_SCR, "pclk_scr", "pclk_bus_root", 0, -+ RK3528_CLKGATE_CON(11), 10, GFLAGS), -+ GATE(PCLK_INTMUX, "pclk_intmux", "pclk_bus_root", CLK_IGNORE_UNUSED, -+ RK3528_CLKGATE_CON(11), 12, GFLAGS), ++ fix = kzalloc(sizeof(*fix), GFP_KERNEL); ++ if (!fix) { ++ kfree(gate); ++ return ERR_PTR(-ENOMEM); ++ } + -+ COMPOSITE_NODIV(CLK_PWM0, "clk_pwm0", mux_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(44), 6, 2, MFLAGS, -+ RK3528_CLKGATE_CON(11), 5, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM1, "clk_pwm1", mux_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(44), 8, 2, MFLAGS, -+ RK3528_CLKGATE_CON(11), 8, GFLAGS), ++ fix->mult = mult; ++ fix->div = div; + -+ GATE(CLK_CAPTURE_PWM1, "clk_capture_pwm1", "xin24m", 0, -+ RK3528_CLKGATE_CON(11), 9, GFLAGS), -+ GATE(CLK_CAPTURE_PWM0, "clk_capture_pwm0", "xin24m", 0, -+ RK3528_CLKGATE_CON(11), 6, GFLAGS), -+ GATE(CLK_JDBCK_DAP, "clk_jdbck_dap", "xin24m", 0, -+ RK3528_CLKGATE_CON(9), 13, GFLAGS), -+ GATE(TCLK_WDT_NS, "tclk_wdt_ns", "xin24m", 0, -+ RK3528_CLKGATE_CON(10), 0, GFLAGS), ++ hw = clk_hw_register_composite(NULL, name, parent_names, num_parents, ++ NULL, NULL, ++ &fix->hw, &clk_fixed_factor_ops, ++ &gate->hw, &clk_gate_ops, flags); ++ if (IS_ERR(hw)) { ++ kfree(fix); ++ kfree(gate); ++ return ERR_CAST(hw); ++ } + -+ GATE(CLK_TIMER_ROOT, "clk_timer_root", "xin24m", 0, -+ RK3528_CLKGATE_CON(8), 9, GFLAGS), -+ GATE(CLK_TIMER0, "clk_timer0", "clk_timer_root", 0, -+ RK3528_CLKGATE_CON(9), 6, GFLAGS), -+ GATE(CLK_TIMER1, "clk_timer1", "clk_timer_root", 0, -+ RK3528_CLKGATE_CON(9), 7, GFLAGS), -+ GATE(CLK_TIMER2, "clk_timer2", "clk_timer_root", 0, -+ RK3528_CLKGATE_CON(9), 8, GFLAGS), -+ GATE(CLK_TIMER3, "clk_timer3", "clk_timer_root", 0, -+ RK3528_CLKGATE_CON(9), 9, GFLAGS), -+ GATE(CLK_TIMER4, "clk_timer4", "clk_timer_root", 0, -+ RK3528_CLKGATE_CON(9), 10, GFLAGS), -+ GATE(CLK_TIMER5, "clk_timer5", "clk_timer_root", 0, -+ RK3528_CLKGATE_CON(9), 11, GFLAGS), ++ return hw->clk; ++} + -+ /* pmu */ -+ GATE(HCLK_PMU_ROOT, "hclk_pmu_root", "clk_100m_src", CLK_IGNORE_UNUSED, -+ RK3528_PMU_CLKGATE_CON(0), 1, GFLAGS), -+ GATE(PCLK_PMU_ROOT, "pclk_pmu_root", "clk_100m_src", CLK_IGNORE_UNUSED, -+ RK3528_PMU_CLKGATE_CON(0), 0, GFLAGS), ++struct rockchip_clk_provider *rockchip_clk_init(struct device_node *np, ++ void __iomem *base, ++ unsigned long nr_clks) ++{ ++ struct rockchip_clk_provider *ctx; ++ struct clk **clk_table; ++ int i; + -+ GATE(FCLK_MCU, "fclk_mcu", "hclk_pmu_root", 0, -+ RK3528_PMU_CLKGATE_CON(0), 7, GFLAGS), -+ GATE(HCLK_PMU_SRAM, "hclk_pmu_sram", "hclk_pmu_root", CLK_IS_CRITICAL, -+ RK3528_PMU_CLKGATE_CON(5), 4, GFLAGS), ++ ctx = kzalloc(sizeof(struct rockchip_clk_provider), GFP_KERNEL); ++ if (!ctx) ++ return ERR_PTR(-ENOMEM); + -+ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_pmu_root", 0, -+ RK3528_PMU_CLKGATE_CON(0), 2, GFLAGS), -+ GATE(PCLK_PMU_HP_TIMER, "pclk_pmu_hp_timer", "pclk_pmu_root", 0, -+ RK3528_PMU_CLKGATE_CON(1), 2, GFLAGS), -+ GATE(PCLK_PMU_IOC, "pclk_pmu_ioc", "pclk_pmu_root", CLK_IS_CRITICAL, -+ RK3528_PMU_CLKGATE_CON(1), 5, GFLAGS), -+ GATE(PCLK_PMU_CRU, "pclk_pmu_cru", "pclk_pmu_root", CLK_IS_CRITICAL, -+ RK3528_PMU_CLKGATE_CON(1), 6, GFLAGS), -+ GATE(PCLK_PMU_GRF, "pclk_pmu_grf", "pclk_pmu_root", CLK_IS_CRITICAL, -+ RK3528_PMU_CLKGATE_CON(1), 7, GFLAGS), -+ GATE(PCLK_PMU_WDT, "pclk_pmu_wdt", "pclk_pmu_root", 0, -+ RK3528_PMU_CLKGATE_CON(1), 10, GFLAGS), -+ GATE(PCLK_PMU, "pclk_pmu", "pclk_pmu_root", CLK_IS_CRITICAL, -+ RK3528_PMU_CLKGATE_CON(0), 13, GFLAGS), -+ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pmu_root", 0, -+ RK3528_PMU_CLKGATE_CON(0), 14, GFLAGS), -+ GATE(PCLK_OSCCHK, "pclk_oscchk", "pclk_pmu_root", 0, -+ RK3528_PMU_CLKGATE_CON(0), 9, GFLAGS), -+ GATE(PCLK_PMU_MAILBOX, "pclk_pmu_mailbox", "pclk_pmu_root", 0, -+ RK3528_PMU_CLKGATE_CON(1), 12, GFLAGS), -+ GATE(PCLK_SCRKEYGEN, "pclk_scrkeygen", "pclk_pmu_root", 0, -+ RK3528_PMU_CLKGATE_CON(1), 15, GFLAGS), -+ GATE(PCLK_PVTM_PMU, "pclk_pvtm_pmu", "pclk_pmu_root", 0, -+ RK3528_PMU_CLKGATE_CON(5), 1, GFLAGS), ++ clk_table = kcalloc(nr_clks, sizeof(struct clk *), GFP_KERNEL); ++ if (!clk_table) ++ goto err_free; + -+ COMPOSITE_NODIV(CLK_I2C2, "clk_i2c2", clk_i2c2_p, 0, -+ RK3528_PMU_CLKSEL_CON(0), 0, 2, MFLAGS, -+ RK3528_PMU_CLKGATE_CON(0), 3, GFLAGS), ++ for (i = 0; i < nr_clks; ++i) ++ clk_table[i] = ERR_PTR(-ENOENT); + -+ GATE(CLK_REFOUT, "clk_refout", "xin24m", 0, -+ RK3528_PMU_CLKGATE_CON(2), 4, GFLAGS), -+ COMPOSITE_NOMUX(CLK_PVTM_PMU, "clk_pvtm_pmu", "xin24m", 0, -+ RK3528_PMU_CLKSEL_CON(5), 0, 5, DFLAGS, -+ RK3528_PMU_CLKGATE_CON(5), 0, GFLAGS), ++ ctx->reg_base = base; ++ ctx->clk_data.clks = clk_table; ++ ctx->clk_data.clk_num = nr_clks; ++ ctx->cru_node = np; ++ spin_lock_init(&ctx->lock); + -+ COMPOSITE_FRAC(XIN_OSC0_DIV, "xin_osc0_div", "xin24m", 0, -+ RK3528_PMU_CLKSEL_CON(1), 0, -+ RK3528_PMU_CLKGATE_CON(1), 0, GFLAGS), -+ /* clk_32k: internal! No path from external osc 32k */ -+ MUX(CLK_DEEPSLOW, "clk_32k", clk_32k_p, CLK_IS_CRITICAL, -+ RK3528_PMU_CLKSEL_CON(2), 0, 1, MFLAGS), -+ GATE(RTC_CLK_MCU, "rtc_clk_mcu", "clk_32k", 0, -+ RK3528_PMU_CLKGATE_CON(0), 8, GFLAGS), -+ GATE(CLK_DDR_FAIL_SAFE, "clk_ddr_fail_safe", "xin24m", CLK_IGNORE_UNUSED, -+ RK3528_PMU_CLKGATE_CON(1), 1, GFLAGS), ++ ctx->grf = syscon_regmap_lookup_by_phandle(ctx->cru_node, ++ "rockchip,grf"); ++ ctx->pmugrf = syscon_regmap_lookup_by_phandle(ctx->cru_node, ++ "rockchip,pmugrf"); + -+ COMPOSITE_NODIV(DBCLK_GPIO0, "dbclk_gpio0", mux_24m_32k_p, 0, -+ RK3528_PMU_CLKSEL_CON(0), 2, 1, MFLAGS, -+ RK3528_PMU_CLKGATE_CON(0), 15, GFLAGS), -+ COMPOSITE_NODIV(TCLK_PMU_WDT, "tclk_pmu_wdt", mux_24m_32k_p, 0, -+ RK3528_PMU_CLKSEL_CON(2), 1, 1, MFLAGS, -+ RK3528_PMU_CLKGATE_CON(1), 11, GFLAGS), ++#ifdef MODULE ++ hlist_add_head(&ctx->list_node, &clk_ctx_list); ++#endif + -+ /* core */ -+ COMPOSITE_NOMUX(ACLK_M_CORE_BIU, "aclk_m_core", "armclk", CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(39), 11, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3528_CLKGATE_CON(5), 12, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_DBG, "pclk_dbg", "armclk", CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(40), 1, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3528_CLKGATE_CON(5), 13, GFLAGS), -+ GATE(PCLK_CPU_ROOT, "pclk_cpu_root", "pclk_dbg", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(6), 1, GFLAGS), -+ GATE(PCLK_CORE_GRF, "pclk_core_grf", "pclk_cpu_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(6), 2, GFLAGS), ++ return ctx; + -+ /* ddr */ -+ GATE(CLK_DDRC_SRC, "clk_ddrc_src", "dpll", CLK_IS_CRITICAL, -+ RK3528_DDRPHY_CLKGATE_CON(0), 0, GFLAGS), -+ GATE(CLK_DDR_PHY, "clk_ddr_phy", "dpll", CLK_IS_CRITICAL, -+ RK3528_DDRPHY_CLKGATE_CON(0), 1, GFLAGS), ++err_free: ++ kfree(ctx); ++ return ERR_PTR(-ENOMEM); ++} ++EXPORT_SYMBOL_GPL(rockchip_clk_init); + -+ COMPOSITE_NODIV(PCLK_DDR_ROOT, "pclk_ddr_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(90), 0, 2, MFLAGS, -+ RK3528_CLKGATE_CON(45), 0, GFLAGS), -+ GATE(PCLK_DDRMON, "pclk_ddrmon", "pclk_ddr_root", CLK_IGNORE_UNUSED, -+ RK3528_CLKGATE_CON(45), 3, GFLAGS), -+ GATE(PCLK_DDR_HWLP, "pclk_ddr_hwlp", "pclk_ddr_root", CLK_IGNORE_UNUSED, -+ RK3528_CLKGATE_CON(45), 8, GFLAGS), -+ GATE(CLK_TIMER_DDRMON, "clk_timer_ddrmon", "xin24m", CLK_IGNORE_UNUSED, -+ RK3528_CLKGATE_CON(45), 4, GFLAGS), ++void rockchip_clk_of_add_provider(struct device_node *np, ++ struct rockchip_clk_provider *ctx) ++{ ++ if (of_clk_add_provider(np, of_clk_src_onecell_get, ++ &ctx->clk_data)) ++ pr_err("%s: could not register clk provider\n", __func__); ++} ++EXPORT_SYMBOL_GPL(rockchip_clk_of_add_provider); + -+ GATE(PCLK_DDRC, "pclk_ddrc", "pclk_ddr_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(45), 2, GFLAGS), -+ GATE(PCLK_DDR_GRF, "pclk_ddr_grf", "pclk_ddr_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(45), 6, GFLAGS), -+ GATE(PCLK_DDRPHY, "pclk_ddrphy", "pclk_ddr_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(45), 9, GFLAGS), ++void rockchip_clk_add_lookup(struct rockchip_clk_provider *ctx, ++ struct clk *clk, unsigned int id) ++{ ++ if (ctx->clk_data.clks && id) ++ ctx->clk_data.clks[id] = clk; ++} ++EXPORT_SYMBOL_GPL(rockchip_clk_add_lookup); + -+ GATE(ACLK_DDR_UPCTL, "aclk_ddr_upctl", "clk_ddrc_src", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(45), 11, GFLAGS), -+ GATE(CLK_DDR_UPCTL, "clk_ddr_upctl", "clk_ddrc_src", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(45), 12, GFLAGS), -+ GATE(CLK_DDRMON, "clk_ddrmon", "clk_ddrc_src", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(45), 13, GFLAGS), -+ GATE(ACLK_DDR_SCRAMBLE, "aclk_ddr_scramble", "clk_ddrc_src", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(45), 14, GFLAGS), -+ GATE(ACLK_SPLIT, "aclk_split", "clk_ddrc_src", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(45), 15, GFLAGS), ++void rockchip_clk_register_plls(struct rockchip_clk_provider *ctx, ++ struct rockchip_pll_clock *list, ++ unsigned int nr_pll, int grf_lock_offset) ++{ ++ struct clk *clk; ++ int idx; + -+ /* gpu */ -+ COMPOSITE_NODIV(ACLK_GPU_ROOT, "aclk_gpu_root", mux_500m_300m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(76), 0, 2, MFLAGS, -+ RK3528_CLKGATE_CON(34), 0, GFLAGS), -+ COMPOSITE_NODIV(ACLK_GPU, "aclk_gpu", aclk_gpu_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(76), 6, 1, MFLAGS, -+ RK3528_CLKGATE_CON(34), 7, GFLAGS), -+ GATE(ACLK_GPU_MALI, "aclk_gpu_mali", "aclk_gpu", 0, -+ RK3528_CLKGATE_CON(34), 8, GFLAGS), -+ COMPOSITE_NODIV(PCLK_GPU_ROOT, "pclk_gpu_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(76), 4, 2, MFLAGS, -+ RK3528_CLKGATE_CON(34), 2, GFLAGS), ++ for (idx = 0; idx < nr_pll; idx++, list++) { ++ clk = rockchip_clk_register_pll(ctx, list->type, list->name, ++ list->parent_names, list->num_parents, ++ list->con_offset, grf_lock_offset, ++ list->lock_shift, list->mode_offset, ++ list->mode_shift, list->rate_table, ++ list->flags, list->pll_flags); ++ if (IS_ERR(clk)) { ++ pr_err("%s: failed to register clock %s\n", __func__, ++ list->name); ++ continue; ++ } + -+ /* rkvdec */ -+ COMPOSITE_NODIV(ACLK_RKVDEC_ROOT_NDFT, "aclk_rkvdec_root", mux_339m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(88), 6, 2, MFLAGS, -+ RK3528_CLKGATE_CON(44), 3, GFLAGS), -+ COMPOSITE_NODIV(HCLK_RKVDEC_ROOT, "hclk_rkvdec_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(88), 4, 2, MFLAGS, -+ RK3528_CLKGATE_CON(44), 2, GFLAGS), -+ GATE(PCLK_DDRPHY_CRU, "pclk_ddrphy_cru", "hclk_rkvdec_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(44), 4, GFLAGS), -+ GATE(HCLK_RKVDEC, "hclk_rkvdec", "hclk_rkvdec_root", 0, -+ RK3528_CLKGATE_CON(44), 9, GFLAGS), -+ COMPOSITE_NODIV(CLK_HEVC_CA_RKVDEC, "clk_hevc_ca_rkvdec", mux_600m_300m_200m_24m_p, 0, -+ RK3528_CLKSEL_CON(88), 11, 2, MFLAGS, -+ RK3528_CLKGATE_CON(44), 11, GFLAGS), -+ MUX(ACLK_RKVDEC_PVTMUX_ROOT, "aclk_rkvdec_pvtmux_root", aclk_rkvdec_pvtmux_root_p, CLK_IS_CRITICAL | CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(88), 13, 1, MFLAGS), -+ GATE(ACLK_RKVDEC, "aclk_rkvdec", "aclk_rkvdec_pvtmux_root", 0, -+ RK3528_CLKGATE_CON(44), 8, GFLAGS), ++ rockchip_clk_add_lookup(ctx, clk, list->id); ++ } ++} ++EXPORT_SYMBOL_GPL(rockchip_clk_register_plls); + -+ /* rkvenc */ -+ COMPOSITE_NODIV(ACLK_RKVENC_ROOT, "aclk_rkvenc_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(79), 2, 2, MFLAGS, -+ RK3528_CLKGATE_CON(36), 1, GFLAGS), -+ GATE(ACLK_RKVENC, "aclk_rkvenc", "aclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(36), 7, GFLAGS), ++void rockchip_clk_register_branches(struct rockchip_clk_provider *ctx, ++ struct rockchip_clk_branch *list, ++ unsigned int nr_clk) ++{ ++ struct clk *clk = NULL; ++ unsigned int idx; ++ unsigned long flags; + -+ COMPOSITE_NODIV(PCLK_RKVENC_ROOT, "pclk_rkvenc_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(79), 4, 2, MFLAGS, -+ RK3528_CLKGATE_CON(36), 2, GFLAGS), -+ GATE(PCLK_RKVENC_IOC, "pclk_rkvenc_ioc", "pclk_rkvenc_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(37), 10, GFLAGS), -+ GATE(PCLK_RKVENC_GRF, "pclk_rkvenc_grf", "pclk_rkvenc_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(38), 6, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(36), 11, GFLAGS), -+ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(36), 13, GFLAGS), -+ GATE(PCLK_SPI0, "pclk_spi0", "pclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(37), 2, GFLAGS), -+ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(37), 8, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(38), 2, GFLAGS), -+ GATE(PCLK_UART3, "pclk_uart3", "pclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(38), 4, GFLAGS), -+ GATE(PCLK_CAN0, "pclk_can0", "pclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(38), 7, GFLAGS), -+ GATE(PCLK_CAN1, "pclk_can1", "pclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(38), 9, GFLAGS), ++ for (idx = 0; idx < nr_clk; idx++, list++) { ++ flags = list->flags; + -+ COMPOSITE_NODIV(MCLK_PDM, "mclk_pdm", mux_150m_100m_24m_p, 0, -+ RK3528_CLKSEL_CON(80), 12, 2, MFLAGS, -+ RK3528_CLKGATE_CON(38), 1, GFLAGS), -+ COMPOSITE(CLK_CAN0, "clk_can0", mux_gpll_cpll_p, 0, -+ RK3528_CLKSEL_CON(81), 6, 1, MFLAGS, 0, 6, DFLAGS, -+ RK3528_CLKGATE_CON(38), 8, GFLAGS), -+ COMPOSITE(CLK_CAN1, "clk_can1", mux_gpll_cpll_p, 0, -+ RK3528_CLKSEL_CON(81), 13, 1, MFLAGS, 7, 6, DFLAGS, -+ RK3528_CLKGATE_CON(38), 10, GFLAGS), ++ /* catch simple muxes */ ++ switch (list->branch_type) { ++ case branch_mux: ++ if (list->mux_table) ++ clk = clk_register_mux_table(NULL, list->name, ++ list->parent_names, list->num_parents, ++ flags, ++ ctx->reg_base + list->muxdiv_offset, ++ list->mux_shift, ++ BIT(list->mux_width) - 1, ++ list->mux_flags, list->mux_table, ++ &ctx->lock); ++ else ++ clk = clk_register_mux(NULL, list->name, ++ list->parent_names, list->num_parents, ++ flags, ++ ctx->reg_base + list->muxdiv_offset, ++ list->mux_shift, list->mux_width, ++ list->mux_flags, &ctx->lock); ++ break; ++ case branch_muxgrf: ++ clk = rockchip_clk_register_muxgrf(list->name, ++ list->parent_names, list->num_parents, ++ flags, ctx->grf, list->muxdiv_offset, ++ list->mux_shift, list->mux_width, ++ list->mux_flags); ++ break; ++ case branch_muxpmugrf: ++ clk = rockchip_clk_register_muxgrf(list->name, ++ list->parent_names, list->num_parents, ++ flags, ctx->pmugrf, list->muxdiv_offset, ++ list->mux_shift, list->mux_width, ++ list->mux_flags); ++ break; ++ case branch_divider: ++ if (list->div_table) ++ clk = clk_register_divider_table(NULL, ++ list->name, list->parent_names[0], ++ flags, ++ ctx->reg_base + list->muxdiv_offset, ++ list->div_shift, list->div_width, ++ list->div_flags, list->div_table, ++ &ctx->lock); ++ else ++ clk = clk_register_divider(NULL, list->name, ++ list->parent_names[0], flags, ++ ctx->reg_base + list->muxdiv_offset, ++ list->div_shift, list->div_width, ++ list->div_flags, &ctx->lock); ++ break; ++ case branch_fraction_divider: ++ clk = rockchip_clk_register_frac_branch(ctx, list->name, ++ list->parent_names, list->num_parents, ++ ctx->reg_base, list->muxdiv_offset, ++ list->div_flags, ++ list->gate_offset, list->gate_shift, ++ list->gate_flags, flags, list->child, ++ &ctx->lock); ++ break; ++ case branch_half_divider: ++ clk = rockchip_clk_register_halfdiv(list->name, ++ list->parent_names, list->num_parents, ++ ctx->reg_base, list->muxdiv_offset, ++ list->mux_shift, list->mux_width, ++ list->mux_flags, list->div_offset, ++ list->div_shift, list->div_width, ++ list->div_flags, list->gate_offset, ++ list->gate_shift, list->gate_flags, ++ flags, &ctx->lock); ++ break; ++ case branch_gate: ++ flags |= CLK_SET_RATE_PARENT; + -+ COMPOSITE_NODIV(HCLK_RKVENC_ROOT, "hclk_rkvenc_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(79), 0, 2, MFLAGS, -+ RK3528_CLKGATE_CON(36), 0, GFLAGS), -+ GATE(HCLK_SAI_I2S1, "hclk_sai_i2s1", "hclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(36), 9, GFLAGS), -+ GATE(HCLK_SPDIF, "hclk_spdif", "hclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(37), 14, GFLAGS), -+ GATE(HCLK_PDM, "hclk_pdm", "hclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(38), 0, GFLAGS), -+ GATE(HCLK_RKVENC, "hclk_rkvenc", "hclk_rkvenc_root", 0, -+ RK3528_CLKGATE_CON(36), 6, GFLAGS), ++ clk = clk_register_gate(NULL, list->name, ++ list->parent_names[0], flags, ++ ctx->reg_base + list->gate_offset, ++ list->gate_shift, list->gate_flags, &ctx->lock); ++ break; ++ case branch_gate_no_set_rate: ++ flags &= ~CLK_SET_RATE_PARENT; + -+ COMPOSITE_NODIV(CLK_CORE_RKVENC, "clk_core_rkvenc", mux_300m_200m_100m_24m_p, 0, -+ RK3528_CLKSEL_CON(79), 6, 2, MFLAGS, -+ RK3528_CLKGATE_CON(36), 8, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C0, "clk_i2c0", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(79), 11, 2, MFLAGS, -+ RK3528_CLKGATE_CON(36), 14, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C1, "clk_i2c1", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(79), 9, 2, MFLAGS, -+ RK3528_CLKGATE_CON(36), 12, GFLAGS), -+#if 0 -+ GATE(SCLK_IN_SPI0, "sclk_in_spi0", "sclk_in_spi0_io", 0, -+ RK3528_CLKGATE_CON(37), 4, GFLAGS), -+ GATE(CLK_UART_JTAG, "clk_uart_jtag", "xin24m", 0, -+ RK3528_CLKGATE_CON(37), 0, GFLAGS), ++ clk = clk_register_gate(NULL, list->name, ++ list->parent_names[0], flags, ++ ctx->reg_base + list->gate_offset, ++ list->gate_shift, list->gate_flags, &ctx->lock); ++ break; ++ case branch_composite: ++ clk = rockchip_clk_register_branch(list->name, ++ list->parent_names, list->num_parents, ++ ctx->reg_base, list->muxdiv_offset, ++ list->mux_shift, ++ list->mux_width, list->mux_flags, ++ list->mux_table, list->div_offset, ++ list->div_shift, list->div_width, ++ list->div_flags, list->div_table, ++ list->gate_offset, list->gate_shift, ++ list->gate_flags, flags, &ctx->lock); ++ break; ++ case branch_mmc: ++ clk = rockchip_clk_register_mmc( ++ list->name, ++ list->parent_names, list->num_parents, ++ ctx->reg_base + list->muxdiv_offset, ++ list->div_shift ++ ); ++ break; ++ case branch_inverter: ++#ifdef CONFIG_ROCKCHIP_CLK_INV ++ clk = rockchip_clk_register_inverter( ++ list->name, list->parent_names, ++ list->num_parents, ++ ctx->reg_base + list->muxdiv_offset, ++ list->div_shift, list->div_flags, &ctx->lock); +#endif -+ COMPOSITE_NODIV(CLK_SPI0, "clk_spi0", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(79), 13, 2, MFLAGS, -+ RK3528_CLKGATE_CON(37), 3, GFLAGS), -+ COMPOSITE_NODIV(MCLK_SAI_I2S1, "mclk_sai_i2s1", mclk_sai_i2s1_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(79), 8, 1, MFLAGS, -+ RK3528_CLKGATE_CON(36), 10, GFLAGS), -+ GATE(DBCLK_GPIO4, "dbclk_gpio4", "xin24m", 0, -+ RK3528_CLKGATE_CON(37), 9, GFLAGS), -+ -+ /* vo */ -+ COMPOSITE_NODIV(HCLK_VO_ROOT, "hclk_vo_root", mux_150m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(83), 2, 2, MFLAGS, -+ RK3528_CLKGATE_CON(39), 1, GFLAGS), -+ GATE(HCLK_VOP, "hclk_vop", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(40), 2, GFLAGS), -+ GATE(HCLK_USBHOST, "hclk_usbhost", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(43), 3, GFLAGS), -+ GATE(HCLK_JPEG_DECODER, "hclk_jpeg_decoder", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(41), 7, GFLAGS), -+ GATE(HCLK_VDPP, "hclk_vdpp", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(39), 10, GFLAGS), -+ GATE(HCLK_CVBS, "hclk_cvbs", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(41), 3, GFLAGS), -+ GATE(HCLK_USBHOST_ARB, "hclk_usbhost_arb", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(43), 4, GFLAGS), -+ GATE(HCLK_SAI_I2S3, "hclk_sai_i2s3", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(42), 1, GFLAGS), -+ GATE(HCLK_HDCP, "hclk_hdcp", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(41), 1, GFLAGS), -+ GATE(HCLK_RGA2E, "hclk_rga2e", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(39), 7, GFLAGS), -+ GATE(HCLK_SDMMC0, "hclk_sdmmc0", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(42), 9, GFLAGS), -+ GATE(HCLK_HDCP_KEY, "hclk_hdcp_key", "hclk_vo_root", 0, -+ RK3528_CLKGATE_CON(40), 15, GFLAGS), -+ -+ COMPOSITE_NODIV(ACLK_VO_L_ROOT, "aclk_vo_l_root", mux_150m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(84), 1, 2, MFLAGS, -+ RK3528_CLKGATE_CON(41), 8, GFLAGS), -+ GATE(ACLK_MAC_VO, "aclk_gmac0", "aclk_vo_l_root", 0, -+ RK3528_CLKGATE_CON(41), 10, GFLAGS), -+ -+ COMPOSITE_NODIV(PCLK_VO_ROOT, "pclk_vo_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(83), 4, 2, MFLAGS, -+ RK3528_CLKGATE_CON(39), 2, GFLAGS), -+ GATE(PCLK_MAC_VO, "pclk_gmac0", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(41), 11, GFLAGS), -+ GATE(PCLK_VCDCPHY, "pclk_vcdcphy", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(42), 4, GFLAGS), -+ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(42), 5, GFLAGS), -+ GATE(PCLK_VO_IOC, "pclk_vo_ioc", "pclk_vo_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(42), 7, GFLAGS), -+ GATE(PCLK_OTPC_NS, "pclk_otpc_ns", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(42), 11, GFLAGS), -+ GATE(PCLK_UART4, "pclk_uart4", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(43), 7, GFLAGS), -+ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(43), 9, GFLAGS), -+ GATE(PCLK_I2C7, "pclk_i2c7", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(43), 11, GFLAGS), -+ -+ GATE(PCLK_USBPHY, "pclk_usbphy", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(43), 13, GFLAGS), ++ break; ++ case branch_factor: ++ clk = rockchip_clk_register_factor_branch( ++ list->name, list->parent_names, ++ list->num_parents, ctx->reg_base, ++ list->div_shift, list->div_width, ++ list->gate_offset, list->gate_shift, ++ list->gate_flags, flags, &ctx->lock); ++ break; ++ case branch_ddrclk: ++ clk = rockchip_clk_register_ddrclk( ++ list->name, list->flags, ++ list->parent_names, list->num_parents, ++ list->muxdiv_offset, list->mux_shift, ++ list->mux_width, list->div_shift, ++ list->div_width, list->div_flags, ++ ctx->reg_base); ++ break; ++ } + -+ GATE(PCLK_VO_GRF, "pclk_vo_grf", "pclk_vo_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(39), 13, GFLAGS), -+ GATE(PCLK_CRU, "pclk_cru", "pclk_vo_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(39), 15, GFLAGS), -+ GATE(PCLK_HDMI, "pclk_hdmi", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(40), 6, GFLAGS), -+ GATE(PCLK_HDMIPHY, "pclk_hdmiphy", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(40), 14, GFLAGS), -+ GATE(PCLK_HDCP, "pclk_hdcp", "pclk_vo_root", 0, -+ RK3528_CLKGATE_CON(41), 2, GFLAGS), ++ /* none of the cases above matched */ ++ if (!clk) { ++ pr_err("%s: unknown clock type %d\n", ++ __func__, list->branch_type); ++ continue; ++ } + -+ COMPOSITE_NODIV(CLK_CORE_VDPP, "clk_core_vdpp", mux_339m_200m_100m_24m_p, 0, -+ RK3528_CLKSEL_CON(83), 10, 2, MFLAGS, -+ RK3528_CLKGATE_CON(39), 12, GFLAGS), -+ COMPOSITE_NODIV(CLK_CORE_RGA2E, "clk_core_rga2e", mux_339m_200m_100m_24m_p, 0, -+ RK3528_CLKSEL_CON(83), 8, 2, MFLAGS, -+ RK3528_CLKGATE_CON(39), 9, GFLAGS), -+ COMPOSITE_NODIV(ACLK_JPEG_ROOT, "aclk_jpeg_root", mux_339m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(84), 9, 2, MFLAGS, -+ RK3528_CLKGATE_CON(41), 15, GFLAGS), -+ GATE(ACLK_JPEG_DECODER, "aclk_jpeg_decoder", "aclk_jpeg_root", 0, -+ RK3528_CLKGATE_CON(41), 6, GFLAGS), ++ if (IS_ERR(clk)) { ++ pr_err("%s: failed to register clock %s: %ld\n", ++ __func__, list->name, PTR_ERR(clk)); ++ continue; ++ } + -+ COMPOSITE_NODIV(ACLK_VO_ROOT, "aclk_vo_root", mux_339m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(83), 0, 2, MFLAGS, -+ RK3528_CLKGATE_CON(39), 0, GFLAGS), -+ GATE_NO_SET_RATE(ACLK_RGA2E, "aclk_rga2e", "aclk_vo_root", 0, -+ RK3528_CLKGATE_CON(39), 8, GFLAGS), -+ GATE_NO_SET_RATE(ACLK_VDPP, "aclk_vdpp", "aclk_vo_root", 0, -+ RK3528_CLKGATE_CON(39), 11, GFLAGS), -+ GATE_NO_SET_RATE(ACLK_HDCP, "aclk_hdcp", "aclk_vo_root", 0, -+ RK3528_CLKGATE_CON(41), 0, GFLAGS), ++ rockchip_clk_add_lookup(ctx, clk, list->id); ++ } ++} ++EXPORT_SYMBOL_GPL(rockchip_clk_register_branches); + -+ COMPOSITE(CCLK_SRC_SDMMC0, "cclk_src_sdmmc0", mux_gpll_cpll_xin24m_p, 0, -+ RK3528_CLKSEL_CON(85), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3528_CLKGATE_CON(42), 8, GFLAGS), ++void rockchip_clk_register_armclk(struct rockchip_clk_provider *ctx, ++ unsigned int lookup_id, ++ const char *name, ++ u8 num_parents, ++ struct clk *parent, struct clk *alt_parent, ++ const struct rockchip_cpuclk_reg_data *reg_data, ++ const struct rockchip_cpuclk_rate_table *rates, ++ int nrates) ++{ ++ struct clk *clk; + -+ COMPOSITE(ACLK_VOP_ROOT, "aclk_vop_root", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(83), 15, 1, MFLAGS, 12, 3, DFLAGS, -+ RK3528_CLKGATE_CON(40), 0, GFLAGS), -+ GATE(ACLK_VOP, "aclk_vop", "aclk_vop_root", 0, -+ RK3528_CLKGATE_CON(40), 5, GFLAGS), ++ clk = rockchip_clk_register_cpuclk(name, num_parents, ++ parent, alt_parent, ++ reg_data, rates, nrates, ++ ctx->reg_base, &ctx->lock); ++ if (IS_ERR(clk)) { ++ pr_err("%s: failed to register clock %s: %ld\n", ++ __func__, name, PTR_ERR(clk)); ++ return; ++ } + -+ COMPOSITE_NODIV(CLK_I2C4, "clk_i2c4", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(85), 13, 2, MFLAGS, -+ RK3528_CLKGATE_CON(43), 10, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C7, "clk_i2c7", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(86), 0, 2, MFLAGS, -+ RK3528_CLKGATE_CON(43), 12, GFLAGS), -+ GATE(DBCLK_GPIO2, "dbclk_gpio2", "xin24m", 0, -+ RK3528_CLKGATE_CON(42), 6, GFLAGS), ++ rockchip_clk_add_lookup(ctx, clk, lookup_id); ++} ++EXPORT_SYMBOL_GPL(rockchip_clk_register_armclk); + -+ GATE(CLK_HDMIHDP0, "clk_hdmihdp0", "xin24m", 0, -+ RK3528_CLKGATE_CON(43), 2, GFLAGS), -+ GATE(CLK_MACPHY, "clk_macphy", "xin24m", 0, -+ RK3528_CLKGATE_CON(42), 3, GFLAGS), -+ GATE(CLK_REF_USBPHY, "clk_ref_usbphy", "xin24m", 0, -+ RK3528_CLKGATE_CON(43), 14, GFLAGS), -+ GATE(CLK_SBPI_OTPC_NS, "clk_sbpi_otpc_ns", "xin24m", 0, -+ RK3528_CLKGATE_CON(42), 12, GFLAGS), -+ FACTOR(CLK_USER_OTPC_NS, "clk_user_otpc_ns", "clk_sbpi_otpc_ns", 0, 1, 2), ++void rockchip_clk_register_armclk_v2(struct rockchip_clk_provider *ctx, ++ struct rockchip_clk_branch *list, ++ const struct rockchip_cpuclk_rate_table *rates, ++ int nrates) ++{ ++ struct clk *clk; + -+ GATE(MCLK_SAI_I2S3, "mclk_sai_i2s3", "mclk_i2s3_8ch_sai_src", 0, -+ RK3528_CLKGATE_CON(42), 2, GFLAGS), -+ COMPOSITE_NODIV(DCLK_VOP0, "dclk_vop0", dclk_vop0_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3528_CLKSEL_CON(84), 0, 1, MFLAGS, -+ RK3528_CLKGATE_CON(40), 3, GFLAGS), -+ GATE(DCLK_VOP1, "dclk_vop1", "dclk_vop_src1", CLK_SET_RATE_PARENT, -+ RK3528_CLKGATE_CON(40), 4, GFLAGS), -+ FACTOR_GATE(DCLK_CVBS, "dclk_cvbs", "dclk_vop1", 0, 1, 4, -+ RK3528_CLKGATE_CON(41), 4, GFLAGS), -+ GATE(DCLK_4X_CVBS, "dclk_4x_cvbs", "dclk_vop1", 0, -+ RK3528_CLKGATE_CON(41), 5, GFLAGS), ++ clk = rockchip_clk_register_cpuclk_v2(list->name, list->parent_names, ++ list->num_parents, ctx->reg_base, ++ list->muxdiv_offset, list->mux_shift, ++ list->mux_width, list->mux_flags, ++ list->div_offset, list->div_shift, ++ list->div_width, list->div_flags, ++ list->flags, &ctx->lock, rates, nrates); ++ if (IS_ERR(clk)) { ++ pr_err("%s: failed to register clock %s: %ld\n", ++ __func__, list->name, PTR_ERR(clk)); ++ return; ++ } + -+ FACTOR_GATE(CLK_SFR_HDMI, "clk_sfr_hdmi", "dclk_vop_src1", 0, 1, 4, -+ RK3528_CLKGATE_CON(40), 7, GFLAGS), ++ rockchip_clk_add_lookup(ctx, clk, list->id); ++} ++EXPORT_SYMBOL_GPL(rockchip_clk_register_armclk_v2); + -+ GATE(CLK_SPDIF_HDMI, "clk_spdif_hdmi", "mclk_spdif_src", 0, -+ RK3528_CLKGATE_CON(40), 10, GFLAGS), -+ GATE(MCLK_SPDIF, "mclk_spdif", "mclk_spdif_src", 0, -+ RK3528_CLKGATE_CON(37), 15, GFLAGS), -+ GATE(CLK_CEC_HDMI, "clk_cec_hdmi", "clk_32k", 0, -+ RK3528_CLKGATE_CON(40), 8, GFLAGS), -+#if 0 -+ GATE(CLK_USBHOST_OHCI, "clk_usbhost_ohci", "clk_usbhost_ohci_io", 0, -+ RK3528_CLKGATE_CON(43), 5, GFLAGS), -+ GATE(CLK_USBHOST_UTMI, "clk_usbhost_utmi", "clk_usbhost_utmi_io", 0, -+ RK3528_CLKGATE_CON(43), 6, GFLAGS), -+ GATE(CLK_HDMIPHY_TMDSSRC, "clk_hdmiphy_tmdssrc", "clk_hdmiphy_tmdssrc_io", 0, -+ RK3528_CLKGATE_CON(40), 11, GFLAGS), -+ GATE(CLK_HDMIPHY_PREP, "clk_hdmiphy_prep", "clk_hdmiphy_prep_io", 0, -+ RK3528_CLKGATE_CON(40), 12, GFLAGS), -+#endif -+ /* vpu */ -+ GATE(DBCLK_GPIO1, "dbclk_gpio1", "xin24m", 0, -+ RK3528_CLKGATE_CON(26), 5, GFLAGS), -+ GATE(DBCLK_GPIO3, "dbclk_gpio3", "xin24m", 0, -+ RK3528_CLKGATE_CON(27), 1, GFLAGS), -+ GATE(CLK_SUSPEND_USB3OTG, "clk_suspend_usb3otg", "xin24m", 0, -+ RK3528_CLKGATE_CON(33), 4, GFLAGS), -+ GATE(CLK_PCIE_AUX, "clk_pcie_aux", "xin24m", 0, -+ RK3528_CLKGATE_CON(30), 2, GFLAGS), -+ GATE(TCLK_EMMC, "tclk_emmc", "xin24m", 0, -+ RK3528_CLKGATE_CON(26), 3, GFLAGS), -+ GATE(CLK_REF_USB3OTG, "clk_ref_usb3otg", "xin24m", 0, -+ RK3528_CLKGATE_CON(33), 2, GFLAGS), -+ COMPOSITE(CCLK_SRC_SDIO0, "cclk_src_sdio0", mux_gpll_cpll_xin24m_p, 0, -+ RK3528_CLKSEL_CON(72), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3528_CLKGATE_CON(32), 1, GFLAGS), ++void (*rk_dump_cru)(void); ++EXPORT_SYMBOL(rk_dump_cru); + -+ COMPOSITE_NODIV(PCLK_VPU_ROOT, "pclk_vpu_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(61), 4, 2, MFLAGS, -+ RK3528_CLKGATE_CON(25), 5, GFLAGS), -+ GATE(PCLK_VPU_GRF, "pclk_vpu_grf", "pclk_vpu_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(25), 12, GFLAGS), -+ GATE(PCLK_CRU_PCIE, "pclk_cru_pcie", "pclk_vpu_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(25), 11, GFLAGS), -+ GATE(PCLK_UART6, "pclk_uart6", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(27), 11, GFLAGS), -+ GATE(PCLK_CAN2, "pclk_can2", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(32), 7, GFLAGS), -+ GATE(PCLK_SPI1, "pclk_spi1", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(27), 4, GFLAGS), -+ GATE(PCLK_CAN3, "pclk_can3", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(32), 9, GFLAGS), -+ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(27), 0, GFLAGS), -+ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(26), 4, GFLAGS), -+ GATE(PCLK_SARADC, "pclk_saradc", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(32), 11, GFLAGS), -+ GATE(PCLK_ACODEC, "pclk_acodec", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(26), 13, GFLAGS), -+ GATE(PCLK_UART7, "pclk_uart7", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(27), 13, GFLAGS), -+ GATE(PCLK_UART5, "pclk_uart5", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(27), 9, GFLAGS), -+ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(32), 14, GFLAGS), -+ GATE(PCLK_PCIE, "pclk_pcie", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(30), 1, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(27), 7, GFLAGS), -+ GATE(PCLK_VPU_IOC, "pclk_vpu_ioc", "pclk_vpu_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(26), 8, GFLAGS), -+ GATE(PCLK_PIPE_GRF, "pclk_pipe_grf", "pclk_vpu_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(30), 7, GFLAGS), -+ GATE(PCLK_I2C5, "pclk_i2c5", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(28), 1, GFLAGS), -+ GATE(PCLK_PCIE_PHY, "pclk_pcie_phy", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(30), 6, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(27), 15, GFLAGS), -+ GATE(PCLK_MAC_VPU, "pclk_gmac1", "pclk_vpu_root", CLK_IS_CRITICAL, -+ RK3528_CLKGATE_CON(28), 6, GFLAGS), -+ GATE(PCLK_I2C6, "pclk_i2c6", "pclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(28), 3, GFLAGS), ++static int rk_clk_panic(struct notifier_block *this, ++ unsigned long ev, void *ptr) ++{ ++ if (rk_dump_cru) ++ rk_dump_cru(); ++ return NOTIFY_DONE; ++} + -+ COMPOSITE_NODIV(ACLK_VPU_L_ROOT, "aclk_vpu_l_root", mux_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(60), 0, 2, MFLAGS, -+ RK3528_CLKGATE_CON(25), 0, GFLAGS), -+ GATE_NO_SET_RATE(ACLK_EMMC, "aclk_emmc", "aclk_vpu_l_root", 0, -+ RK3528_CLKGATE_CON(26), 1, GFLAGS), -+ GATE_NO_SET_RATE(ACLK_MAC_VPU, "aclk_gmac1", "aclk_vpu_l_root", 0, -+ RK3528_CLKGATE_CON(28), 5, GFLAGS), -+ GATE_NO_SET_RATE(ACLK_PCIE, "aclk_pcie", "aclk_vpu_l_root", 0, -+ RK3528_CLKGATE_CON(30), 3, GFLAGS), ++static struct notifier_block rk_clk_panic_block = { ++ .notifier_call = rk_clk_panic, ++}; + -+ GATE_NO_SET_RATE(ACLK_USB3OTG, "aclk_usb3otg", "aclk_vpu_l_root", 0, -+ RK3528_CLKGATE_CON(33), 1, GFLAGS), ++static void __iomem *rst_base; ++static unsigned int reg_restart; ++static void (*cb_restart)(void); ++static int rockchip_restart_notify(struct notifier_block *this, ++ unsigned long mode, void *cmd) ++{ ++ if (cb_restart) ++ cb_restart(); + -+ COMPOSITE_NODIV(HCLK_VPU_ROOT, "hclk_vpu_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(61), 2, 2, MFLAGS, -+ RK3528_CLKGATE_CON(25), 4, GFLAGS), -+ GATE(HCLK_VPU, "hclk_vpu", "hclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(25), 10, GFLAGS), -+ GATE(HCLK_SFC, "hclk_sfc", "hclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(25), 13, GFLAGS), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(26), 0, GFLAGS), -+ GATE(HCLK_SAI_I2S0, "hclk_sai_i2s0", "hclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(26), 9, GFLAGS), -+ GATE(HCLK_SAI_I2S2, "hclk_sai_i2s2", "hclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(26), 11, GFLAGS), ++ writel(0xfdb9, rst_base + reg_restart); ++ return NOTIFY_DONE; ++} + -+ GATE(HCLK_PCIE_SLV, "hclk_pcie_slv", "hclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(30), 4, GFLAGS), -+ GATE(HCLK_PCIE_DBI, "hclk_pcie_dbi", "hclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(30), 5, GFLAGS), -+ GATE(HCLK_SDIO0, "hclk_sdio0", "hclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(32), 2, GFLAGS), -+ GATE(HCLK_SDIO1, "hclk_sdio1", "hclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(32), 4, GFLAGS), ++static struct notifier_block rockchip_restart_handler = { ++ .notifier_call = rockchip_restart_notify, ++ .priority = 128, ++}; + -+ COMPOSITE_NOMUX(CLK_GMAC1_VPU_25M, "clk_gmac1_25m", "ppll", 0, -+ RK3528_CLKSEL_CON(60), 2, 8, DFLAGS, -+ RK3528_CLKGATE_CON(25), 1, GFLAGS), -+ COMPOSITE_NOMUX(CLK_PPLL_125M_MATRIX, "clk_ppll_125m_src", "ppll", 0, -+ RK3528_CLKSEL_CON(60), 10, 5, DFLAGS, -+ RK3528_CLKGATE_CON(25), 2, GFLAGS), ++void ++rockchip_register_restart_notifier(struct rockchip_clk_provider *ctx, ++ unsigned int reg, ++ void (*cb)(void)) ++{ ++ int ret; + -+ COMPOSITE(CLK_CAN3, "clk_can3", mux_gpll_cpll_p, 0, -+ RK3528_CLKSEL_CON(73), 13, 1, MFLAGS, 7, 6, DFLAGS, -+ RK3528_CLKGATE_CON(32), 10, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C6, "clk_i2c6", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(64), 0, 2, MFLAGS, -+ RK3528_CLKGATE_CON(28), 4, GFLAGS), ++ rst_base = ctx->reg_base; ++ reg_restart = reg; ++ cb_restart = cb; ++ ret = register_restart_handler(&rockchip_restart_handler); ++ if (ret) ++ pr_err("%s: cannot register restart handler, %d\n", ++ __func__, ret); ++ atomic_notifier_chain_register(&panic_notifier_list, ++ &rk_clk_panic_block); ++} ++EXPORT_SYMBOL_GPL(rockchip_register_restart_notifier); + -+ COMPOSITE(SCLK_SFC, "sclk_sfc", mux_gpll_cpll_xin24m_p, 0, -+ RK3528_CLKSEL_CON(61), 12, 2, MFLAGS, 6, 6, DFLAGS, -+ RK3528_CLKGATE_CON(25), 14, GFLAGS), -+ COMPOSITE(CCLK_SRC_EMMC, "cclk_src_emmc", mux_gpll_cpll_xin24m_p, 0, -+ RK3528_CLKSEL_CON(62), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3528_CLKGATE_CON(25), 15, GFLAGS), ++#ifdef MODULE ++static struct clk **protect_clocks; ++static unsigned int protect_nclocks; + -+ COMPOSITE_NODIV(ACLK_VPU_ROOT, "aclk_vpu_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RK3528_CLKSEL_CON(61), 0, 2, MFLAGS, -+ RK3528_CLKGATE_CON(25), 3, GFLAGS), -+ GATE(ACLK_VPU, "aclk_vpu", "aclk_vpu_root", 0, -+ RK3528_CLKGATE_CON(25), 9, GFLAGS), ++int rockchip_clk_protect(struct rockchip_clk_provider *ctx, ++ unsigned int *clocks, unsigned int nclocks) ++{ ++ struct clk *clk = NULL; ++ int i = 0; + -+ COMPOSITE_NODIV(CLK_SPI1, "clk_spi1", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(63), 10, 2, MFLAGS, -+ RK3528_CLKGATE_CON(27), 5, GFLAGS), -+ COMPOSITE(CCLK_SRC_SDIO1, "cclk_src_sdio1", mux_gpll_cpll_xin24m_p, 0, -+ RK3528_CLKSEL_CON(72), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3528_CLKGATE_CON(32), 3, GFLAGS), -+ COMPOSITE(CLK_CAN2, "clk_can2", mux_gpll_cpll_p, 0, -+ RK3528_CLKSEL_CON(73), 6, 1, MFLAGS, 0, 6, DFLAGS, -+ RK3528_CLKGATE_CON(32), 8, GFLAGS), -+ COMPOSITE_NOMUX(CLK_TSADC, "clk_tsadc", "xin24m", 0, -+ RK3528_CLKSEL_CON(74), 3, 5, DFLAGS, -+ RK3528_CLKGATE_CON(32), 15, GFLAGS), -+ COMPOSITE_NOMUX(CLK_SARADC, "clk_saradc", "xin24m", 0, -+ RK3528_CLKSEL_CON(74), 0, 3, DFLAGS, -+ RK3528_CLKGATE_CON(32), 12, GFLAGS), -+ COMPOSITE_NOMUX(CLK_TSADC_TSEN, "clk_tsadc_tsen", "xin24m", 0, -+ RK3528_CLKSEL_CON(74), 8, 5, DFLAGS, -+ RK3528_CLKGATE_CON(33), 0, GFLAGS), -+ COMPOSITE_NODIV(BCLK_EMMC, "bclk_emmc", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(62), 8, 2, MFLAGS, -+ RK3528_CLKGATE_CON(26), 2, GFLAGS), -+ COMPOSITE_NOMUX(MCLK_ACODEC_TX, "mclk_acodec_tx", "mclk_i2s2_2ch_sai_src", 0, -+ RK3528_CLKSEL_CON(63), 0, 8, DFLAGS, -+ RK3528_CLKGATE_CON(26), 14, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C3, "clk_i2c3", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(63), 12, 2, MFLAGS, -+ RK3528_CLKGATE_CON(28), 0, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C5, "clk_i2c5", mux_200m_100m_50m_24m_p, 0, -+ RK3528_CLKSEL_CON(63), 14, 2, MFLAGS, -+ RK3528_CLKGATE_CON(28), 2, GFLAGS), -+ COMPOSITE_NODIV(MCLK_SAI_I2S0, "mclk_sai_i2s0", mclk_sai_i2s0_p, CLK_SET_RATE_PARENT, -+ RK3528_CLKSEL_CON(62), 10, 1, MFLAGS, -+ RK3528_CLKGATE_CON(26), 10, GFLAGS), -+ GATE(MCLK_SAI_I2S2, "mclk_sai_i2s2", "mclk_i2s2_2ch_sai_src", 0, -+ RK3528_CLKGATE_CON(26), 12, GFLAGS), -+#if 0 -+ GATE(SCLK_IN_SPI1, "sclk_in_spi1", "sclk_in_spi1_io", 0, -+ RK3528_CLKGATE_CON(27), 6, GFLAGS), ++ if (protect_clocks || !ctx || !clocks || !ctx->clk_data.clks) ++ return 0; + -+ /* vpuphy */ -+ GATE(CLK_PIPE_USB3OTG_COMBO, "clk_pipe_usb3otg_combo", "clk_pipe_usb3otg_io", 0, -+ RK3528_CLKGATE_CON(31), 0, GFLAGS), -+ GATE(CLK_UTMI_USB3OTG, "clk_utmi_usb3otg", "clk_utmi_usb3otg_io", 0, -+ RK3528_CLKGATE_CON(31), 1, GFLAGS), -+ GATE(CLK_PCIE_PIPE_PHY, "clk_pcie_pipe_phy", "clk_pipe_usb3otg_io", 0, -+ RK3528_CLKGATE_CON(31), 2, GFLAGS), -+#endif -+ /* pcie */ -+ COMPOSITE_NOMUX(CLK_PPLL_100M_MATRIX, "clk_ppll_100m_src", "ppll", CLK_IS_CRITICAL, -+ RK3528_PCIE_CLKSEL_CON(1), 2, 5, DFLAGS, -+ RK3528_PCIE_CLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE_NOMUX(CLK_PPLL_50M_MATRIX, "clk_ppll_50m_src", "ppll", CLK_IS_CRITICAL, -+ RK3528_PCIE_CLKSEL_CON(1), 7, 5, DFLAGS, -+ RK3528_PCIE_CLKGATE_CON(0), 2, GFLAGS), -+ MUX(CLK_REF_PCIE_INNER_PHY, "clk_ref_pcie_inner_phy", clk_ref_pcie_inner_phy_p, 0, -+ RK3528_PCIE_CLKSEL_CON(1), 13, 1, MFLAGS), -+ FACTOR(CLK_REF_PCIE_100M_PHY, "clk_ref_pcie_100m_phy", "clk_ppll_100m_src", 0, 1, 1), ++ protect_clocks = kcalloc(nclocks, sizeof(void *), GFP_KERNEL); ++ if (!protect_clocks) ++ return -ENOMEM; + -+ /* gmac */ -+ FACTOR(CLK_GMAC1_RMII_VPU, "clk_gmac1_50m", "clk_ppll_50m_src", 0, 1, 1), -+ FACTOR(CLK_GMAC1_SRC_VPU, "clk_gmac1_125m", "clk_ppll_125m_src", 0, 1, 1), ++ for (i = 0; i < nclocks; i++) { ++ if (clocks[i] >= ctx->clk_data.clk_num) { ++ pr_err("%s: invalid clock id %u\n", __func__, clocks[i]); ++ continue; ++ } ++ clk = ctx->clk_data.clks[clocks[i]]; ++ if (clk) { ++ clk_prepare_enable(clk); ++ protect_clocks[i] = clk; ++ } ++ } ++ protect_nclocks = nclocks; + -+ /* they are orphans */ -+ DIV(CLK_GMAC0_SRC, "clk_gmac0_src", "clk_gmac0_io_i", 0, -+ RK3528_CLKSEL_CON(84), 3, 6, DFLAGS), -+ GATE(CLK_GMAC0_TX, "clk_gmac0_tx", "clk_gmac0_src", 0, -+ RK3528_CLKGATE_CON(41), 13, GFLAGS), -+ GATE(CLK_GMAC0_RX, "clk_gmac0_rx", "clk_gmac0_src", 0, -+ RK3528_CLKGATE_CON(41), 14, GFLAGS), -+ GATE(CLK_GMAC0_RMII_50M, "clk_gmac0_rmii_50m", "clk_gmac0_io_i", 0, -+ RK3528_CLKGATE_CON(41), 12, GFLAGS), -+ GATE(CLK_SCRKEYGEN, "clk_scrkeygen", "clk_pmupvtm_out", 0, -+ RK3528_PMU_CLKGATE_CON(2), 0, GFLAGS), -+ GATE(CLK_PVTM_OSCCHK, "clk_pvtm_oscchk", "clk_pmupvtm_out", 0, -+ RK3528_PMU_CLKGATE_CON(2), 1, GFLAGS), -+}; ++ return 0; ++} ++EXPORT_SYMBOL_GPL(rockchip_clk_protect); + -+static struct rockchip_clk_branch rk3528_grf_clk_branches[] __initdata = { -+ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "cclk_src_sdmmc0", RK3528_SDMMC_CON0, 1), -+ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "cclk_src_sdmmc0", RK3528_SDMMC_CON1, 1), -+ MMC(SCLK_SDIO0_DRV, "sdio0_drv", "cclk_src_sdio0", RK3528_SDIO0_CON0, 1), -+ MMC(SCLK_SDIO0_SAMPLE, "sdio0_sample", "cclk_src_sdio0", RK3528_SDIO0_CON1, 1), -+ MMC(SCLK_SDIO1_DRV, "sdio1_drv", "cclk_src_sdio1", RK3528_SDIO1_CON0, 1), -+ MMC(SCLK_SDIO1_SAMPLE, "sdio1_sample", "cclk_src_sdio1", RK3528_SDIO1_CON1, 1), -+}; ++void rockchip_clk_unprotect(void) ++{ ++ int i = 0; + -+static void __iomem *rk3528_cru_base; ++ if (!protect_clocks || !protect_nclocks) ++ return; + -+static void rk3528_dump_cru(void) -+{ -+ if (rk3528_cru_base) { -+ pr_warn("CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk3528_cru_base, -+ 0x8b8, false); -+ pr_warn("PCIE CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk3528_cru_base + RK3528_PCIE_CRU_BASE, -+ 0x804, false); -+ pr_warn("DDRPHY CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk3528_cru_base + RK3528_DDRPHY_CRU_BASE, -+ 0x804, false); ++ for (i = 0; i < protect_nclocks; i++) { ++ if (protect_clocks[i]) ++ clk_disable_unprepare(protect_clocks[i]); + } ++ protect_nclocks = 0; ++ kfree(protect_clocks); ++ protect_clocks = NULL; ++ +} ++EXPORT_SYMBOL_GPL(rockchip_clk_unprotect); + -+static void __init rk3528_clk_init(struct device_node *np) ++void rockchip_clk_disable_unused(void) +{ + struct rockchip_clk_provider *ctx; -+ void __iomem *reg_base; -+ struct clk **clks; -+ -+ reg_base = of_iomap(np, 0); -+ if (!reg_base) { -+ pr_err("%s: could not map cru region\n", __func__); -+ return; -+ } -+ -+ rk3528_cru_base = reg_base; ++ struct clk *clk; ++ struct clk_hw *hw; ++ int i = 0, flag = 0; + -+ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); -+ if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip clk init failed\n", __func__); -+ iounmap(reg_base); -+ return; ++ hlist_for_each_entry(ctx, &clk_ctx_list, list_node) { ++ for (i = 0; i < ctx->clk_data.clk_num; i++) { ++ clk = ctx->clk_data.clks[i]; ++ if (clk && !IS_ERR(clk)) { ++ hw = __clk_get_hw(clk); ++ if (hw) ++ flag = clk_hw_get_flags(hw); ++ if (flag & CLK_IGNORE_UNUSED) ++ continue; ++ if (flag & CLK_IS_CRITICAL) ++ continue; ++ clk_prepare_enable(clk); ++ clk_disable_unprepare(clk); ++ } ++ } + } -+ clks = ctx->clk_data.clks; -+ -+ rockchip_clk_register_plls(ctx, rk3528_pll_clks, -+ ARRAY_SIZE(rk3528_pll_clks), -+ RK3528_GRF_SOC_STATUS0); ++} ++EXPORT_SYMBOL_GPL(rockchip_clk_disable_unused); ++#endif /* MODULE */ +diff --git a/drivers/clk/rockchip-oh/clk.h b/drivers/clk/rockchip-oh/clk.h +new file mode 100644 +index 000000000..82345742c +--- /dev/null ++++ b/drivers/clk/rockchip-oh/clk.h +@@ -0,0 +1,1336 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++/* ++ * Copyright (c) 2014 MundoReader S.L. ++ * Author: Heiko Stuebner ++ * ++ * Copyright (c) 2015 Rockchip Electronics Co. Ltd. ++ * Author: Xing Zheng ++ * ++ * based on ++ * ++ * samsung/clk.h ++ * Copyright (c) 2013 Samsung Electronics Co., Ltd. ++ * Copyright (c) 2013 Linaro Ltd. ++ * Author: Thomas Abraham ++ */ + -+ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", -+ 2, clks[PLL_APLL], clks[PLL_GPLL], -+ &rk3528_cpuclk_data, rk3528_cpuclk_rates, -+ ARRAY_SIZE(rk3528_cpuclk_rates)); -+ rockchip_clk_register_branches(ctx, rk3528_clk_branches, -+ ARRAY_SIZE(rk3528_clk_branches)); ++#ifndef CLK_ROCKCHIP_CLK_H ++#define CLK_ROCKCHIP_CLK_H + -+ rockchip_register_softrst(np, 47, reg_base + RK3528_SOFTRST_CON(0), -+ ROCKCHIP_SOFTRST_HIWORD_MASK); -+ rockchip_register_restart_notifier(ctx, RK3528_GLB_SRST_FST, NULL); ++#include ++#include ++#include + -+ rockchip_clk_of_add_provider(np, ctx); ++struct clk; + -+ if (!rk_dump_cru) -+ rk_dump_cru = rk3528_dump_cru; ++#define HIWORD_UPDATE(val, mask, shift) \ ++ ((val) << (shift) | (mask) << ((shift) + 16)) + -+} ++/* register positions shared by PX30, RV1108, RK2928, RK3036, RK3066, RK3188 and RK3228 */ ++#define BOOST_PLL_H_CON(x) ((x) * 0x4) ++#define BOOST_CLK_CON 0x0008 ++#define BOOST_BOOST_CON 0x000c ++#define BOOST_SWITCH_CNT 0x0010 ++#define BOOST_HIGH_PERF_CNT0 0x0014 ++#define BOOST_HIGH_PERF_CNT1 0x0018 ++#define BOOST_STATIS_THRESHOLD 0x001c ++#define BOOST_SHORT_SWITCH_CNT 0x0020 ++#define BOOST_SWITCH_THRESHOLD 0x0024 ++#define BOOST_FSM_STATUS 0x0028 ++#define BOOST_PLL_L_CON(x) ((x) * 0x4 + 0x2c) ++#define BOOST_PLL_CON_MASK 0xffff ++#define BOOST_CORE_DIV_MASK 0x1f ++#define BOOST_CORE_DIV_SHIFT 0 ++#define BOOST_BACKUP_PLL_MASK 0x3 ++#define BOOST_BACKUP_PLL_SHIFT 8 ++#define BOOST_BACKUP_PLL_USAGE_MASK 0x1 ++#define BOOST_BACKUP_PLL_USAGE_SHIFT 12 ++#define BOOST_BACKUP_PLL_USAGE_BORROW 0 ++#define BOOST_BACKUP_PLL_USAGE_TARGET 1 ++#define BOOST_ENABLE_MASK 0x1 ++#define BOOST_ENABLE_SHIFT 0 ++#define BOOST_RECOVERY_MASK 0x1 ++#define BOOST_RECOVERY_SHIFT 1 ++#define BOOST_SW_CTRL_MASK 0x1 ++#define BOOST_SW_CTRL_SHIFT 2 ++#define BOOST_LOW_FREQ_EN_MASK 0x1 ++#define BOOST_LOW_FREQ_EN_SHIFT 3 ++#define BOOST_STATIS_ENABLE_MASK 0x1 ++#define BOOST_STATIS_ENABLE_SHIFT 4 ++#define BOOST_BUSY_STATE BIT(8) + -+CLK_OF_DECLARE(rk3528_cru, "rockchip,rk3528-cru", rk3528_clk_init); ++#define PX30_PLL_CON(x) ((x) * 0x4) ++#define PX30_CLKSEL_CON(x) ((x) * 0x4 + 0x100) ++#define PX30_CLKGATE_CON(x) ((x) * 0x4 + 0x200) ++#define PX30_GLB_SRST_FST 0xb8 ++#define PX30_GLB_SRST_SND 0xbc ++#define PX30_SOFTRST_CON(x) ((x) * 0x4 + 0x300) ++#define PX30_MODE_CON 0xa0 ++#define PX30_MISC_CON 0xa4 ++#define PX30_SDMMC_CON0 0x380 ++#define PX30_SDMMC_CON1 0x384 ++#define PX30_SDIO_CON0 0x388 ++#define PX30_SDIO_CON1 0x38c ++#define PX30_EMMC_CON0 0x390 ++#define PX30_EMMC_CON1 0x394 + -+static void __init rk3528_grf_clk_init(struct device_node *np) -+{ -+ struct rockchip_clk_provider *ctx; -+ void __iomem *reg_base; ++#define PX30_PMU_PLL_CON(x) ((x) * 0x4) ++#define PX30_PMU_CLKSEL_CON(x) ((x) * 0x4 + 0x40) ++#define PX30_PMU_CLKGATE_CON(x) ((x) * 0x4 + 0x80) ++#define PX30_PMU_MODE 0x0020 + -+ reg_base = of_iomap(of_get_parent(np), 0); -+ if (!reg_base) { -+ pr_err("%s: could not map cru grf region\n", __func__); -+ return; -+ } ++#define RV1106_TOPCRU_BASE 0x10000 ++#define RV1106_PERICRU_BASE 0x12000 ++#define RV1106_VICRU_BASE 0x14000 ++#define RV1106_NPUCRU_BASE 0x16000 ++#define RV1106_CORECRU_BASE 0x18000 ++#define RV1106_VEPUCRU_BASE 0x1A000 ++#define RV1106_VOCRU_BASE 0x1C000 ++#define RV1106_DDRCRU_BASE 0x1E000 ++#define RV1106_SUBDDRCRU_BASE 0x1F000 + -+ ctx = rockchip_clk_init(np, reg_base, CLK_NR_GRF_CLKS); -+ if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip grf clk init failed\n", __func__); -+ return; -+ } ++#define RV1106_VI_GRF_BASE 0x50000 ++#define RV1106_VO_GRF_BASE 0x60000 + -+ rockchip_clk_register_branches(ctx, rk3528_grf_clk_branches, -+ ARRAY_SIZE(rk3528_grf_clk_branches)); ++#define RV1106_PMUCLKSEL_CON(x) ((x) * 0x4 + 0x300) ++#define RV1106_PMUCLKGATE_CON(x) ((x) * 0x4 + 0x800) ++#define RV1106_PMUSOFTRST_CON(x) ((x) * 0x4 + 0xa00) ++#define RV1106_PLL_CON(x) ((x) * 0x4 + RV1106_TOPCRU_BASE) ++#define RV1106_MODE_CON (0x280 + RV1106_TOPCRU_BASE) ++#define RV1106_CLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_TOPCRU_BASE) ++#define RV1106_CLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_TOPCRU_BASE) ++#define RV1106_SOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_TOPCRU_BASE) ++#define RV1106_GLB_SRST_FST (0xc08 + RV1106_TOPCRU_BASE) ++#define RV1106_GLB_SRST_SND (0xc0c + RV1106_TOPCRU_BASE) ++#define RV1106_SDIO_CON0 (0x1c + RV1106_VO_GRF_BASE) ++#define RV1106_SDIO_CON1 (0x20 + RV1106_VO_GRF_BASE) ++#define RV1106_SDMMC_CON0 (0x4 + RV1106_VI_GRF_BASE) ++#define RV1106_SDMMC_CON1 (0x8 + RV1106_VI_GRF_BASE) ++#define RV1106_EMMC_CON0 (0x20) ++#define RV1106_EMMC_CON1 (0x24) ++#define RV1106_PERICLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_PERICRU_BASE) ++#define RV1106_PERICLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_PERICRU_BASE) ++#define RV1106_PERISOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_PERICRU_BASE) ++#define RV1106_VICLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_VICRU_BASE) ++#define RV1106_VICLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_VICRU_BASE) ++#define RV1106_VISOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_VICRU_BASE) ++#define RV1106_VICLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_VICRU_BASE) ++#define RV1106_VICLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_VICRU_BASE) ++#define RV1106_VISOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_VICRU_BASE) ++#define RV1106_NPUCLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_NPUCRU_BASE) ++#define RV1106_NPUCLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_NPUCRU_BASE) ++#define RV1106_NPUSOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_NPUCRU_BASE) ++#define RV1106_CORECLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_CORECRU_BASE) ++#define RV1106_CORECLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_CORECRU_BASE) ++#define RV1106_CORESOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_CORECRU_BASE) ++#define RV1106_VEPUCLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_VEPUCRU_BASE) ++#define RV1106_VEPUCLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_VEPUCRU_BASE) ++#define RV1106_VEPUSOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_VEPUCRU_BASE) ++#define RV1106_VOCLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_VOCRU_BASE) ++#define RV1106_VOCLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_VOCRU_BASE) ++#define RV1106_VOSOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_VOCRU_BASE) ++#define RV1106_DDRCLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_DDRCRU_BASE) ++#define RV1106_DDRCLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_DDRCRU_BASE) ++#define RV1106_DDRSOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_DDRCRU_BASE) ++#define RV1106_SUBDDRCLKSEL_CON(x) ((x) * 0x4 + 0x300 + RV1106_SUBDDRCRU_BASE) ++#define RV1106_SUBDDRCLKGATE_CON(x) ((x) * 0x4 + 0x800 + RV1106_SUBDDRCRU_BASE) ++#define RV1106_SUBDDRSOFTRST_CON(x) ((x) * 0x4 + 0xa00 + RV1106_SUBDDRCRU_BASE) ++#define RV1106_SUBDDRMODE_CON (0x280 + RV1106_SUBDDRCRU_BASE) + -+ rockchip_clk_of_add_provider(np, ctx); -+} ++#define RV1108_PLL_CON(x) ((x) * 0x4) ++#define RV1108_CLKSEL_CON(x) ((x) * 0x4 + 0x60) ++#define RV1108_CLKGATE_CON(x) ((x) * 0x4 + 0x120) ++#define RV1108_SOFTRST_CON(x) ((x) * 0x4 + 0x180) ++#define RV1108_GLB_SRST_FST 0x1c0 ++#define RV1108_GLB_SRST_SND 0x1c4 ++#define RV1108_MISC_CON 0x1cc ++#define RV1108_SDMMC_CON0 0x1d8 ++#define RV1108_SDMMC_CON1 0x1dc ++#define RV1108_SDIO_CON0 0x1e0 ++#define RV1108_SDIO_CON1 0x1e4 ++#define RV1108_EMMC_CON0 0x1e8 ++#define RV1108_EMMC_CON1 0x1ec + -+CLK_OF_DECLARE(rk3528_grf_cru, "rockchip,rk3528-grf-cru", rk3528_grf_clk_init); ++#define RV1126_PMU_MODE 0x0 ++#define RV1126_PMU_PLL_CON(x) ((x) * 0x4 + 0x10) ++#define RV1126_PMU_CLKSEL_CON(x) ((x) * 0x4 + 0x100) ++#define RV1126_PMU_CLKGATE_CON(x) ((x) * 0x4 + 0x180) ++#define RV1126_PMU_SOFTRST_CON(x) ((x) * 0x4 + 0x200) ++#define RV1126_PLL_CON(x) ((x) * 0x4) ++#define RV1126_MODE_CON 0x90 ++#define RV1126_CLKSEL_CON(x) ((x) * 0x4 + 0x100) ++#define RV1126_CLKGATE_CON(x) ((x) * 0x4 + 0x280) ++#define RV1126_SOFTRST_CON(x) ((x) * 0x4 + 0x300) ++#define RV1126_GLB_SRST_FST 0x408 ++#define RV1126_GLB_SRST_SND 0x40c ++#define RV1126_SDMMC_CON0 0x440 ++#define RV1126_SDMMC_CON1 0x444 ++#define RV1126_SDIO_CON0 0x448 ++#define RV1126_SDIO_CON1 0x44c ++#define RV1126_EMMC_CON0 0x450 ++#define RV1126_EMMC_CON1 0x454 + -diff --git a/drivers/clk/rockchip/clk-rk3562.c b/drivers/clk/rockchip/clk-rk3562.c -new file mode 100644 -index 000000000..3c6f78fec ---- /dev/null -+++ b/drivers/clk/rockchip/clk-rk3562.c -@@ -0,0 +1,1134 @@ -+// SPDX-License-Identifier: GPL-2.0 +/* -+ * Copyright (c) 2022 Rockchip Electronics Co. Ltd. -+ * Author: Elaine Zhang -+ * Author: Finley Xiao ++ * register positions shared by RK1808 RK2928, RK3036, ++ * RK3066, RK3188 and RK3228 + */ + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "clk.h" ++#define RK1808_PLL_CON(x) ((x) * 0x4) ++#define RK1808_MODE_CON 0xa0 ++#define RK1808_MISC_CON 0xa4 ++#define RK1808_MISC1_CON 0xa8 ++#define RK1808_GLB_SRST_FST 0xb8 ++#define RK1808_GLB_SRST_SND 0xbc ++#define RK1808_CLKSEL_CON(x) ((x) * 0x4 + 0x100) ++#define RK1808_CLKGATE_CON(x) ((x) * 0x4 + 0x230) ++#define RK1808_SOFTRST_CON(x) ((x) * 0x4 + 0x300) ++#define RK1808_SDMMC_CON0 0x380 ++#define RK1808_SDMMC_CON1 0x384 ++#define RK1808_SDIO_CON0 0x388 ++#define RK1808_SDIO_CON1 0x38c ++#define RK1808_EMMC_CON0 0x390 ++#define RK1808_EMMC_CON1 0x394 + -+#define RK3562_GRF_SOC_STATUS0 0x430 ++#define RK1808_PMU_PLL_CON(x) ((x) * 0x4 + 0x4000) ++#define RK1808_PMU_MODE_CON 0x4020 ++#define RK1808_PMU_CLKSEL_CON(x) ((x) * 0x4 + 0x4040) ++#define RK1808_PMU_CLKGATE_CON(x) ((x) * 0x4 + 0x4080) + -+enum rk3562_plls { -+ apll, gpll, vpll, hpll, cpll, dpll, -+}; ++#define RK2928_PLL_CON(x) ((x) * 0x4) ++#define RK2928_MODE_CON 0x40 ++#define RK2928_CLKSEL_CON(x) ((x) * 0x4 + 0x44) ++#define RK2928_CLKGATE_CON(x) ((x) * 0x4 + 0xd0) ++#define RK2928_GLB_SRST_FST 0x100 ++#define RK2928_GLB_SRST_SND 0x104 ++#define RK2928_SOFTRST_CON(x) ((x) * 0x4 + 0x110) ++#define RK2928_MISC_CON 0x134 + -+static struct rockchip_pll_rate_table rk3562_pll_rates[] = { -+ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ -+ RK3036_PLL_RATE(2208000000, 1, 92, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2184000000, 1, 91, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2160000000, 1, 90, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2088000000, 1, 87, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2064000000, 1, 86, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2040000000, 1, 85, 1, 1, 1, 0), -+ RK3036_PLL_RATE(2016000000, 1, 84, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1992000000, 1, 83, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1920000000, 1, 80, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1896000000, 1, 79, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1800000000, 1, 75, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1704000000, 1, 71, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1600000000, 3, 200, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1584000000, 1, 132, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1560000000, 1, 130, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1536000000, 1, 128, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1512000000, 1, 126, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1488000000, 1, 124, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1464000000, 1, 122, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1440000000, 1, 120, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1416000000, 1, 118, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1400000000, 3, 350, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1392000000, 1, 116, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1368000000, 1, 114, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1344000000, 1, 112, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1320000000, 1, 110, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1296000000, 1, 108, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1272000000, 1, 106, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1248000000, 1, 104, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1200000000, 1, 100, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1188000000, 1, 99, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1104000000, 1, 92, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1100000000, 3, 275, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1000000000, 3, 250, 2, 1, 1, 0), -+ RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), -+ RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), -+ RK3036_PLL_RATE(800000000, 3, 200, 2, 1, 1, 0), -+ RK3036_PLL_RATE(700000000, 3, 350, 4, 1, 1, 0), -+ RK3036_PLL_RATE(696000000, 1, 116, 4, 1, 1, 0), -+ RK3036_PLL_RATE(600000000, 1, 100, 4, 1, 1, 0), -+ RK3036_PLL_RATE(594000000, 1, 99, 4, 1, 1, 0), -+ RK3036_PLL_RATE(500000000, 1, 125, 6, 1, 1, 0), -+ RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), -+ RK3036_PLL_RATE(312000000, 1, 78, 6, 1, 1, 0), -+ RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), -+ RK3036_PLL_RATE(200000000, 1, 100, 3, 4, 1, 0), -+ RK3036_PLL_RATE(148500000, 1, 99, 4, 4, 1, 0), -+ RK3036_PLL_RATE(100000000, 1, 150, 6, 6, 1, 0), -+ RK3036_PLL_RATE(96000000, 1, 96, 6, 4, 1, 0), -+ RK3036_PLL_RATE(74250000, 2, 99, 4, 4, 1, 0), -+ { /* sentinel */ }, -+}; ++#define RK3036_SDMMC_CON0 0x144 ++#define RK3036_SDMMC_CON1 0x148 ++#define RK3036_SDIO_CON0 0x14c ++#define RK3036_SDIO_CON1 0x150 ++#define RK3036_EMMC_CON0 0x154 ++#define RK3036_EMMC_CON1 0x158 + -+PNAME(mux_pll_p) = { "xin24m" }; -+PNAME(gpll_cpll_p) = { "gpll", "cpll" }; -+PNAME(gpll_cpll_hpll_p) = { "gpll", "cpll", "hpll" }; -+PNAME(gpll_cpll_pvtpll_dmyapll_p) = { "gpll", "cpll", "log_pvtpll", "dummy_apll" }; -+PNAME(gpll_cpll_hpll_xin24m_p) = { "gpll", "cpll", "hpll", "xin24m" }; -+PNAME(gpll_cpll_vpll_dmyhpll_p) = { "gpll", "cpll", "vpll", "dummy_hpll" }; -+PNAME(gpll_dmyhpll_vpll_apll_p) = { "gpll", "dummy_hpll", "vpll", "apll" }; -+PNAME(gpll_cpll_xin24m_p) = { "gpll", "cpll", "xin24m" }; -+PNAME(gpll_cpll_xin24m_dmyapll_p) = { "gpll", "cpll", "xin24m", "dummy_apll" }; -+PNAME(gpll_cpll_xin24m_dmyhpll_p) = { "gpll", "cpll", "xin24m", "dummy_hpll" }; -+PNAME(vpll_dmyhpll_gpll_cpll_p) = { "vpll", "dummy_hpll", "gpll", "cpll" }; -+PNAME(mux_xin24m_32k_p) = { "xin24m", "clk_rtc_32k" }; -+PNAME(mux_50m_xin24m_p) = { "clk_matrix_50m_src", "xin24m" }; -+PNAME(mux_100m_50m_xin24m_p) = { "clk_matrix_100m_src", "clk_matrix_50m_src", "xin24m" }; -+PNAME(mux_125m_xin24m_p) = { "clk_matrix_125m_src", "xin24m" }; -+PNAME(mux_200m_xin24m_32k_p) = { "clk_200m_pmu", "xin24m", "clk_rtc_32k" }; -+PNAME(mux_200m_100m_p) = { "clk_matrix_200m_src", "clk_matrix_100m_src" }; -+PNAME(mux_200m_100m_50m_xin24m_p) = { "clk_matrix_200m_src", "clk_matrix_100m_src", "clk_matrix_50m_src", "xin24m" }; -+PNAME(clk_sai0_p) = { "clk_sai0_src", "clk_sai0_frac", "xin_osc0_half", "mclk_sai0_from_io" }; -+PNAME(mclk_sai0_out2io_p) = { "mclk_sai0", "xin_osc0_half" }; -+PNAME(clk_sai1_p) = { "clk_sai1_src", "clk_sai1_frac", "xin_osc0_half", "mclk_sai1_from_io" }; -+PNAME(mclk_sai1_out2io_p) = { "mclk_sai1", "xin_osc0_half" }; -+PNAME(clk_sai2_p) = { "clk_sai2_src", "clk_sai2_frac", "xin_osc0_half", "mclk_sai2_from_io" }; -+PNAME(mclk_sai2_out2io_p) = { "mclk_sai2", "xin_osc0_half" }; -+PNAME(clk_spdif_p) = { "clk_spdif_src", "clk_spdif_frac", "xin_osc0_half" }; -+PNAME(clk_uart1_p) = { "clk_uart1_src", "clk_uart1_frac", "xin24m" }; -+PNAME(clk_uart2_p) = { "clk_uart2_src", "clk_uart2_frac", "xin24m" }; -+PNAME(clk_uart3_p) = { "clk_uart3_src", "clk_uart3_frac", "xin24m" }; -+PNAME(clk_uart4_p) = { "clk_uart4_src", "clk_uart4_frac", "xin24m" }; -+PNAME(clk_uart5_p) = { "clk_uart5_src", "clk_uart5_frac", "xin24m" }; -+PNAME(clk_uart6_p) = { "clk_uart6_src", "clk_uart6_frac", "xin24m" }; -+PNAME(clk_uart7_p) = { "clk_uart7_src", "clk_uart7_frac", "xin24m" }; -+PNAME(clk_uart8_p) = { "clk_uart8_src", "clk_uart8_frac", "xin24m" }; -+PNAME(clk_uart9_p) = { "clk_uart9_src", "clk_uart9_frac", "xin24m" }; -+PNAME(clk_rtc32k_pmu_p) = { "clk_rtc32k_frac", "xin32k", "clk_32k_pvtm" }; -+PNAME(clk_pmu1_uart0_p) = { "clk_pmu1_uart0_src", "clk_pmu1_uart0_frac", "xin24m" }; -+PNAME(clk_pipephy_ref_p) = { "clk_pipephy_div", "clk_pipephy_xin24m" }; -+PNAME(clk_usbphy_ref_p) = { "clk_usb2phy_xin24m", "clk_24m_sscsrc" }; -+PNAME(clk_mipidsi_ref_p) = { "clk_mipidsiphy_xin24m", "clk_24m_sscsrc" }; ++#define RK3228_GLB_SRST_FST 0x1f0 ++#define RK3228_GLB_SRST_SND 0x1f4 ++#define RK3228_SDMMC_CON0 0x1c0 ++#define RK3228_SDMMC_CON1 0x1c4 ++#define RK3228_SDIO_CON0 0x1c8 ++#define RK3228_SDIO_CON1 0x1cc ++#define RK3228_EMMC_CON0 0x1d8 ++#define RK3228_EMMC_CON1 0x1dc + -+static struct rockchip_pll_clock rk3562_pll_clks[] __initdata = { -+ [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, -+ 0, RK3562_PLL_CON(0), -+ RK3562_MODE_CON, 0, 0, -+ ROCKCHIP_PLL_ALLOW_POWER_DOWN, rk3562_pll_rates), -+ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, -+ 0, RK3562_PLL_CON(24), -+ RK3562_MODE_CON, 2, 3, 0, rk3562_pll_rates), -+ [vpll] = PLL(pll_rk3328, PLL_VPLL, "vpll", mux_pll_p, -+ 0, RK3562_PLL_CON(32), -+ RK3562_MODE_CON, 6, 4, -+ ROCKCHIP_PLL_ALLOW_POWER_DOWN, rk3562_pll_rates), -+ [hpll] = PLL(pll_rk3328, PLL_HPLL, "hpll", mux_pll_p, -+ 0, RK3562_PLL_CON(40), -+ RK3562_MODE_CON, 8, 5, -+ ROCKCHIP_PLL_ALLOW_POWER_DOWN, rk3562_pll_rates), -+ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, -+ 0, RK3562_PMU1_PLL_CON(0), -+ RK3562_PMU1_MODE_CON, 0, 2, 0, rk3562_pll_rates), -+ [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p, -+ CLK_IS_CRITICAL, RK3562_SUBDDR_PLL_CON(0), -+ RK3562_SUBDDR_MODE_CON, 0, 1, 0, NULL), -+}; ++#define RK3288_PLL_CON(x) RK2928_PLL_CON(x) ++#define RK3288_MODE_CON 0x50 ++#define RK3288_CLKSEL_CON(x) ((x) * 0x4 + 0x60) ++#define RK3288_CLKGATE_CON(x) ((x) * 0x4 + 0x160) ++#define RK3288_GLB_SRST_FST 0x1b0 ++#define RK3288_GLB_SRST_SND 0x1b4 ++#define RK3288_SOFTRST_CON(x) ((x) * 0x4 + 0x1b8) ++#define RK3288_MISC_CON 0x1e8 ++#define RK3288_SDMMC_CON0 0x200 ++#define RK3288_SDMMC_CON1 0x204 ++#define RK3288_SDIO0_CON0 0x208 ++#define RK3288_SDIO0_CON1 0x20c ++#define RK3288_SDIO1_CON0 0x210 ++#define RK3288_SDIO1_CON1 0x214 ++#define RK3288_EMMC_CON0 0x218 ++#define RK3288_EMMC_CON1 0x21c + -+#define MFLAGS CLK_MUX_HIWORD_MASK -+#define DFLAGS CLK_DIVIDER_HIWORD_MASK -+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) ++#define RK3308_PLL_CON(x) RK2928_PLL_CON(x) ++#define RK3308_CLKSEL_CON(x) ((x) * 0x4 + 0x100) ++#define RK3308_CLKGATE_CON(x) ((x) * 0x4 + 0x300) ++#define RK3308_GLB_SRST_FST 0xb8 ++#define RK3308_SOFTRST_CON(x) ((x) * 0x4 + 0x400) ++#define RK3308_MODE_CON 0xa0 ++#define RK3308_SDMMC_CON0 0x480 ++#define RK3308_SDMMC_CON1 0x484 ++#define RK3308_SDIO_CON0 0x488 ++#define RK3308_SDIO_CON1 0x48c ++#define RK3308_EMMC_CON0 0x490 ++#define RK3308_EMMC_CON1 0x494 + -+static struct rockchip_clk_branch rk3562_clk_sai0_fracmux __initdata = -+ MUX(CLK_SAI0, "clk_sai0", clk_sai0_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(3), 6, 2, MFLAGS); ++#define RK3328_PLL_CON(x) RK2928_PLL_CON(x) ++#define RK3328_CLKSEL_CON(x) ((x) * 0x4 + 0x100) ++#define RK3328_CLKGATE_CON(x) ((x) * 0x4 + 0x200) ++#define RK3328_GRFCLKSEL_CON(x) ((x) * 0x4 + 0x100) ++#define RK3328_GLB_SRST_FST 0x9c ++#define RK3328_GLB_SRST_SND 0x98 ++#define RK3328_SOFTRST_CON(x) ((x) * 0x4 + 0x300) ++#define RK3328_MODE_CON 0x80 ++#define RK3328_MISC_CON 0x84 ++#define RK3328_SDMMC_CON0 0x380 ++#define RK3328_SDMMC_CON1 0x384 ++#define RK3328_SDIO_CON0 0x388 ++#define RK3328_SDIO_CON1 0x38c ++#define RK3328_EMMC_CON0 0x390 ++#define RK3328_EMMC_CON1 0x394 ++#define RK3328_SDMMC_EXT_CON0 0x398 ++#define RK3328_SDMMC_EXT_CON1 0x39C + -+static struct rockchip_clk_branch rk3562_clk_sai1_fracmux __initdata = -+ MUX(CLK_SAI1, "clk_sai1", clk_sai1_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(5), 6, 2, MFLAGS); ++#define RK3368_PLL_CON(x) RK2928_PLL_CON(x) ++#define RK3368_CLKSEL_CON(x) ((x) * 0x4 + 0x100) ++#define RK3368_CLKGATE_CON(x) ((x) * 0x4 + 0x200) ++#define RK3368_GLB_SRST_FST 0x280 ++#define RK3368_GLB_SRST_SND 0x284 ++#define RK3368_SOFTRST_CON(x) ((x) * 0x4 + 0x300) ++#define RK3368_MISC_CON 0x380 ++#define RK3368_SDMMC_CON0 0x400 ++#define RK3368_SDMMC_CON1 0x404 ++#define RK3368_SDIO0_CON0 0x408 ++#define RK3368_SDIO0_CON1 0x40c ++#define RK3368_SDIO1_CON0 0x410 ++#define RK3368_SDIO1_CON1 0x414 ++#define RK3368_EMMC_CON0 0x418 ++#define RK3368_EMMC_CON1 0x41c + -+static struct rockchip_clk_branch rk3562_clk_sai2_fracmux __initdata = -+ MUX(CLK_SAI2, "clk_sai2", clk_sai2_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(8), 6, 2, MFLAGS); ++#define RK3399_PLL_CON(x) RK2928_PLL_CON(x) ++#define RK3399_CLKSEL_CON(x) ((x) * 0x4 + 0x100) ++#define RK3399_CLKGATE_CON(x) ((x) * 0x4 + 0x300) ++#define RK3399_SOFTRST_CON(x) ((x) * 0x4 + 0x400) ++#define RK3399_GLB_SRST_FST 0x500 ++#define RK3399_GLB_SRST_SND 0x504 ++#define RK3399_GLB_CNT_TH 0x508 ++#define RK3399_MISC_CON 0x50c ++#define RK3399_RST_CON 0x510 ++#define RK3399_RST_ST 0x514 ++#define RK3399_SDMMC_CON0 0x580 ++#define RK3399_SDMMC_CON1 0x584 ++#define RK3399_SDIO_CON0 0x588 ++#define RK3399_SDIO_CON1 0x58c + -+static struct rockchip_clk_branch rk3562_clk_spdif_fracmux __initdata = -+ MUX(CLK_SPDIF, "clk_spdif", clk_spdif_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(15), 6, 2, MFLAGS); ++#define RK3399_PMU_PLL_CON(x) RK2928_PLL_CON(x) ++#define RK3399_PMU_CLKSEL_CON(x) ((x) * 0x4 + 0x80) ++#define RK3399_PMU_CLKGATE_CON(x) ((x) * 0x4 + 0x100) ++#define RK3399_PMU_SOFTRST_CON(x) ((x) * 0x4 + 0x110) + -+static struct rockchip_clk_branch rk3562_clk_uart1_fracmux __initdata = -+ MUX(CLK_UART1, "clk_uart1", clk_uart1_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(21), 14, 2, MFLAGS); ++#define RK3528_PMU_CRU_BASE 0x10000 ++#define RK3528_PCIE_CRU_BASE 0x20000 ++#define RK3528_DDRPHY_CRU_BASE 0x28000 ++#define RK3528_VPU_GRF_BASE 0x40000 ++#define RK3528_VO_GRF_BASE 0x60000 ++#define RK3528_SDMMC_CON0 (RK3528_VO_GRF_BASE + 0x24) ++#define RK3528_SDMMC_CON1 (RK3528_VO_GRF_BASE + 0x28) ++#define RK3528_SDIO0_CON0 (RK3528_VPU_GRF_BASE + 0x4) ++#define RK3528_SDIO0_CON1 (RK3528_VPU_GRF_BASE + 0x8) ++#define RK3528_SDIO1_CON0 (RK3528_VPU_GRF_BASE + 0xc) ++#define RK3528_SDIO1_CON1 (RK3528_VPU_GRF_BASE + 0x10) ++#define RK3528_PLL_CON(x) RK2928_PLL_CON(x) ++#define RK3528_PCIE_PLL_CON(x) ((x) * 0x4 + RK3528_PCIE_CRU_BASE) ++#define RK3528_DDRPHY_PLL_CON(x) ((x) * 0x4 + RK3528_DDRPHY_CRU_BASE) ++#define RK3528_MODE_CON 0x280 ++#define RK3528_CLKSEL_CON(x) ((x) * 0x4 + 0x300) ++#define RK3528_CLKGATE_CON(x) ((x) * 0x4 + 0x800) ++#define RK3528_SOFTRST_CON(x) ((x) * 0x4 + 0xa00) ++#define RK3528_PMU_CLKSEL_CON(x) ((x) * 0x4 + 0x300 + RK3528_PMU_CRU_BASE) ++#define RK3528_PMU_CLKGATE_CON(x) ((x) * 0x4 + 0x800 + RK3528_PMU_CRU_BASE) ++#define RK3528_PCIE_CLKSEL_CON(x) ((x) * 0x4 + 0x300 + RK3528_PCIE_CRU_BASE) ++#define RK3528_PCIE_CLKGATE_CON(x) ((x) * 0x4 + 0x800 + RK3528_PCIE_CRU_BASE) ++#define RK3528_DDRPHY_CLKGATE_CON(x) ((x) * 0x4 + 0x800 + RK3528_DDRPHY_CRU_BASE) ++#define RK3528_DDRPHY_MODE_CON (0x280 + RK3528_DDRPHY_CRU_BASE) ++#define RK3528_GLB_CNT_TH 0xc00 ++#define RK3528_GLB_SRST_FST 0xc08 ++#define RK3528_GLB_SRST_SND 0xc0c + -+static struct rockchip_clk_branch rk3562_clk_uart2_fracmux __initdata = -+ MUX(CLK_UART2, "clk_uart2", clk_uart2_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(23), 14, 2, MFLAGS); ++#define RK3562_PMU0_CRU_BASE 0x10000 ++#define RK3562_PMU1_CRU_BASE 0x18000 ++#define RK3562_DDR_CRU_BASE 0x20000 ++#define RK3562_SUBDDR_CRU_BASE 0x28000 ++#define RK3562_PERI_CRU_BASE 0x30000 + -+static struct rockchip_clk_branch rk3562_clk_uart3_fracmux __initdata = -+ MUX(CLK_UART3, "clk_uart3", clk_uart3_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(25), 14, 2, MFLAGS); ++#define RK3562_PLL_CON(x) RK2928_PLL_CON(x) ++#define RK3562_PMU1_PLL_CON(x) ((x) * 0x4 + RK3562_PMU1_CRU_BASE + 0x40) ++#define RK3562_SUBDDR_PLL_CON(x) ((x) * 0x4 + RK3562_SUBDDR_CRU_BASE + 0x20) ++#define RK3562_MODE_CON 0x600 ++#define RK3562_PMU1_MODE_CON (RK3562_PMU1_CRU_BASE + 0x380) ++#define RK3562_SUBDDR_MODE_CON (RK3562_SUBDDR_CRU_BASE + 0x380) ++#define RK3562_CLKSEL_CON(x) ((x) * 0x4 + 0x100) ++#define RK3562_CLKGATE_CON(x) ((x) * 0x4 + 0x300) ++#define RK3562_SOFTRST_CON(x) ((x) * 0x4 + 0x400) ++#define RK3562_DDR_CLKSEL_CON(x) ((x) * 0x4 + RK3562_DDR_CRU_BASE + 0x100) ++#define RK3562_DDR_CLKGATE_CON(x) ((x) * 0x4 + RK3562_DDR_CRU_BASE + 0x180) ++#define RK3562_DDR_SOFTRST_CON(x) ((x) * 0x4 + RK3562_DDR_CRU_BASE + 0x200) ++#define RK3562_SUBDDR_CLKSEL_CON(x) ((x) * 0x4 + RK3562_SUBDDR_CRU_BASE + 0x100) ++#define RK3562_SUBDDR_CLKGATE_CON(x) ((x) * 0x4 + RK3562_SUBDDR_CRU_BASE + 0x180) ++#define RK3562_SUBDDR_SOFTRST_CON(x) ((x) * 0x4 + RK3562_SUBDDR_CRU_BASE + 0x200) ++#define RK3562_PERI_CLKSEL_CON(x) ((x) * 0x4 + RK3562_PERI_CRU_BASE + 0x100) ++#define RK3562_PERI_CLKGATE_CON(x) ((x) * 0x4 + RK3562_PERI_CRU_BASE + 0x300) ++#define RK3562_PERI_SOFTRST_CON(x) ((x) * 0x4 + RK3562_PERI_CRU_BASE + 0x400) ++#define RK3562_PMU0_CLKSEL_CON(x) ((x) * 0x4 + RK3562_PMU0_CRU_BASE + 0x100) ++#define RK3562_PMU0_CLKGATE_CON(x) ((x) * 0x4 + RK3562_PMU0_CRU_BASE + 0x180) ++#define RK3562_PMU0_SOFTRST_CON(x) ((x) * 0x4 + RK3562_PMU0_CRU_BASE + 0x200) ++#define RK3562_PMU1_CLKSEL_CON(x) ((x) * 0x4 + RK3562_PMU1_CRU_BASE + 0x100) ++#define RK3562_PMU1_CLKGATE_CON(x) ((x) * 0x4 + RK3562_PMU1_CRU_BASE + 0x180) ++#define RK3562_PMU1_SOFTRST_CON(x) ((x) * 0x4 + RK3562_PMU1_CRU_BASE + 0x200) ++#define RK3562_GLB_SRST_FST 0x614 ++#define RK3562_GLB_SRST_SND 0x618 ++#define RK3562_GLB_RST_CON 0x61c ++#define RK3562_GLB_RST_ST 0x620 ++#define RK3562_SDMMC0_CON0 0x624 ++#define RK3562_SDMMC0_CON1 0x628 ++#define RK3562_SDMMC1_CON0 0x62c ++#define RK3562_SDMMC1_CON1 0x630 + -+static struct rockchip_clk_branch rk3562_clk_uart4_fracmux __initdata = -+ MUX(CLK_UART4, "clk_uart4", clk_uart4_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(27), 14, 2, MFLAGS); ++#define RK3568_PLL_CON(x) RK2928_PLL_CON(x) ++#define RK3568_MODE_CON0 0xc0 ++#define RK3568_MISC_CON0 0xc4 ++#define RK3568_MISC_CON1 0xc8 ++#define RK3568_MISC_CON2 0xcc ++#define RK3568_GLB_CNT_TH 0xd0 ++#define RK3568_GLB_SRST_FST 0xd4 ++#define RK3568_GLB_SRST_SND 0xd8 ++#define RK3568_GLB_RST_CON 0xdc ++#define RK3568_GLB_RST_ST 0xe0 ++#define RK3568_CLKSEL_CON(x) ((x) * 0x4 + 0x100) ++#define RK3568_CLKGATE_CON(x) ((x) * 0x4 + 0x300) ++#define RK3568_SOFTRST_CON(x) ((x) * 0x4 + 0x400) ++#define RK3568_SDMMC0_CON0 0x580 ++#define RK3568_SDMMC0_CON1 0x584 ++#define RK3568_SDMMC1_CON0 0x588 ++#define RK3568_SDMMC1_CON1 0x58c ++#define RK3568_SDMMC2_CON0 0x590 ++#define RK3568_SDMMC2_CON1 0x594 ++#define RK3568_EMMC_CON0 0x598 ++#define RK3568_EMMC_CON1 0x59c + -+static struct rockchip_clk_branch rk3562_clk_uart5_fracmux __initdata = -+ MUX(CLK_UART5, "clk_uart5", clk_uart5_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(29), 14, 2, MFLAGS); ++#define RK3568_PMU_PLL_CON(x) RK2928_PLL_CON(x) ++#define RK3568_PMU_MODE_CON0 0x80 ++#define RK3568_PMU_CLKSEL_CON(x) ((x) * 0x4 + 0x100) ++#define RK3568_PMU_CLKGATE_CON(x) ((x) * 0x4 + 0x180) ++#define RK3568_PMU_SOFTRST_CON(x) ((x) * 0x4 + 0x200) + -+static struct rockchip_clk_branch rk3562_clk_uart6_fracmux __initdata = -+ MUX(CLK_UART6, "clk_uart6", clk_uart6_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(31), 14, 2, MFLAGS); ++#define RK3588_PHP_CRU_BASE 0x8000 ++#define RK3588_PMU_CRU_BASE 0x30000 ++#define RK3588_BIGCORE0_CRU_BASE 0x50000 ++#define RK3588_BIGCORE1_CRU_BASE 0x52000 ++#define RK3588_DSU_CRU_BASE 0x58000 + -+static struct rockchip_clk_branch rk3562_clk_uart7_fracmux __initdata = -+ MUX(CLK_UART7, "clk_uart7", clk_uart7_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(33), 14, 2, MFLAGS); ++#define RK3588_PLL_CON(x) RK2928_PLL_CON(x) ++#define RK3588_MODE_CON0 0x280 ++#define RK3588_B0_PLL_MODE_CON0 (RK3588_BIGCORE0_CRU_BASE + 0x280) ++#define RK3588_B1_PLL_MODE_CON0 (RK3588_BIGCORE1_CRU_BASE + 0x280) ++#define RK3588_LPLL_MODE_CON0 (RK3588_DSU_CRU_BASE + 0x280) ++#define RK3588_CLKSEL_CON(x) ((x) * 0x4 + 0x300) ++#define RK3588_CLKGATE_CON(x) ((x) * 0x4 + 0x800) ++#define RK3588_SOFTRST_CON(x) ((x) * 0x4 + 0xa00) ++#define RK3588_GLB_CNT_TH 0xc00 ++#define RK3588_GLB_SRST_FST 0xc08 ++#define RK3588_GLB_SRST_SND 0xc0c ++#define RK3588_GLB_RST_CON 0xc10 ++#define RK3588_GLB_RST_ST 0xc04 ++#define RK3588_SDIO_CON0 0xC24 ++#define RK3588_SDIO_CON1 0xC28 ++#define RK3588_SDMMC_CON0 0xC30 ++#define RK3588_SDMMC_CON1 0xC34 + -+static struct rockchip_clk_branch rk3562_clk_uart8_fracmux __initdata = -+ MUX(CLK_UART8, "clk_uart8", clk_uart8_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(35), 14, 2, MFLAGS); ++#define RK3588_PHP_CLKGATE_CON(x) ((x) * 0x4 + RK3588_PHP_CRU_BASE + 0x800) ++#define RK3588_PHP_SOFTRST_CON(x) ((x) * 0x4 + RK3588_PHP_CRU_BASE + 0xa00) + -+static struct rockchip_clk_branch rk3562_clk_uart9_fracmux __initdata = -+ MUX(CLK_UART9, "clk_uart9", clk_uart9_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(37), 14, 2, MFLAGS); ++#define RK3588_PMU_PLL_CON(x) ((x) * 0x4 + RK3588_PHP_CRU_BASE) ++#define RK3588_PMU_CLKSEL_CON(x) ((x) * 0x4 + RK3588_PMU_CRU_BASE + 0x300) ++#define RK3588_PMU_CLKGATE_CON(x) ((x) * 0x4 + RK3588_PMU_CRU_BASE + 0x800) ++#define RK3588_PMU_SOFTRST_CON(x) ((x) * 0x4 + RK3588_PMU_CRU_BASE + 0xa00) + -+static struct rockchip_clk_branch rk3562_rtc32k_pmu_fracmux __initdata = -+ MUX(CLK_RTC_32K, "clk_rtc_32k", clk_rtc32k_pmu_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RK3562_PMU0_CLKSEL_CON(1), 0, 2, MFLAGS); ++#define RK3588_B0_PLL_CON(x) ((x) * 0x4 + RK3588_BIGCORE0_CRU_BASE) ++#define RK3588_BIGCORE0_CLKSEL_CON(x) ((x) * 0x4 + RK3588_BIGCORE0_CRU_BASE + 0x300) ++#define RK3588_BIGCORE0_CLKGATE_CON(x) ((x) * 0x4 + RK3588_BIGCORE0_CRU_BASE + 0x800) ++#define RK3588_BIGCORE0_SOFTRST_CON(x) ((x) * 0x4 + RK3588_BIGCORE0_CRU_BASE + 0xa00) ++#define RK3588_B1_PLL_CON(x) ((x) * 0x4 + RK3588_BIGCORE1_CRU_BASE) ++#define RK3588_BIGCORE1_CLKSEL_CON(x) ((x) * 0x4 + RK3588_BIGCORE1_CRU_BASE + 0x300) ++#define RK3588_BIGCORE1_CLKGATE_CON(x) ((x) * 0x4 + RK3588_BIGCORE1_CRU_BASE + 0x800) ++#define RK3588_BIGCORE1_SOFTRST_CON(x) ((x) * 0x4 + RK3588_BIGCORE1_CRU_BASE + 0xa00) ++#define RK3588_LPLL_CON(x) ((x) * 0x4 + RK3588_DSU_CRU_BASE) ++#define RK3588_DSU_CLKSEL_CON(x) ((x) * 0x4 + RK3588_DSU_CRU_BASE + 0x300) ++#define RK3588_DSU_CLKGATE_CON(x) ((x) * 0x4 + RK3588_DSU_CRU_BASE + 0x800) ++#define RK3588_DSU_SOFTRST_CON(x) ((x) * 0x4 + RK3588_DSU_CRU_BASE + 0xa00) + -+static struct rockchip_clk_branch rk3562_clk_pmu1_uart0_fracmux __initdata = -+ MUX(CLK_PMU1_UART0, "clk_pmu1_uart0", clk_pmu1_uart0_p, CLK_SET_RATE_PARENT, -+ RK3562_PMU1_CLKSEL_CON(2), 6, 2, MFLAGS); ++enum rockchip_pll_type { ++ pll_rk3036, ++ pll_rk3066, ++ pll_rk3328, ++ pll_rk3399, ++ pll_rk3588, ++ pll_rk3588_core, ++}; + -+static struct rockchip_clk_branch rk3562_clk_branches[] __initdata = { -+ /* -+ * CRU Clock-Architecture -+ */ -+ /* PD_TOP */ -+ COMPOSITE(CLK_MATRIX_50M_SRC, "clk_matrix_50m_src", gpll_cpll_p, 0, -+ RK3562_CLKSEL_CON(0), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3562_CLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE(CLK_MATRIX_100M_SRC, "clk_matrix_100m_src", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_CLKSEL_CON(0), 15, 1, MFLAGS, 8, 4, DFLAGS, -+ RK3562_CLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE(CLK_MATRIX_125M_SRC, "clk_matrix_125m_src", gpll_cpll_p, 0, -+ RK3562_CLKSEL_CON(1), 7, 1, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE(CLK_MATRIX_200M_SRC, "clk_matrix_200m_src", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_CLKSEL_CON(2), 7, 1, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(0), 4, GFLAGS), -+ COMPOSITE(CLK_MATRIX_300M_SRC, "clk_matrix_300m_src", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_CLKSEL_CON(3), 7, 1, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(0), 6, GFLAGS), -+ COMPOSITE(ACLK_TOP, "aclk_top", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_CLKSEL_CON(5), 7, 1, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(1), 0, GFLAGS), -+ COMPOSITE(ACLK_TOP_VIO, "aclk_top_vio", gpll_cpll_p, 0, -+ RK3562_CLKSEL_CON(5), 15, 1, MFLAGS, 8, 4, DFLAGS, -+ RK3562_CLKGATE_CON(1), 1, GFLAGS), -+ COMPOSITE(CLK_24M_SSCSRC, "clk_24m_sscsrc", vpll_dmyhpll_gpll_cpll_p, 0, -+ RK3562_CLKSEL_CON(6), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3562_CLKGATE_CON(1), 9, GFLAGS), -+ COMPOSITE(CLK_CAM0_OUT2IO, "clk_cam0_out2io", gpll_cpll_xin24m_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(8), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3562_CLKGATE_CON(1), 12, GFLAGS), -+ COMPOSITE(CLK_CAM1_OUT2IO, "clk_cam1_out2io", gpll_cpll_xin24m_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(8), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3562_CLKGATE_CON(1), 13, GFLAGS), -+ COMPOSITE(CLK_CAM2_OUT2IO, "clk_cam2_out2io", gpll_cpll_xin24m_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(9), 6, 2, MFLAGS, 0, 6, DFLAGS, -+ RK3562_CLKGATE_CON(1), 14, GFLAGS), -+ COMPOSITE(CLK_CAM3_OUT2IO, "clk_cam3_out2io", gpll_cpll_xin24m_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(9), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3562_CLKGATE_CON(1), 15, GFLAGS), -+ FACTOR(0, "xin_osc0_half", "xin24m", 0, 1, 2), ++#define RK3036_PLL_RATE(_rate, _refdiv, _fbdiv, _postdiv1, \ ++ _postdiv2, _dsmpd, _frac) \ ++{ \ ++ .rate = _rate##U, \ ++ .fbdiv = _fbdiv, \ ++ .postdiv1 = _postdiv1, \ ++ .refdiv = _refdiv, \ ++ .postdiv2 = _postdiv2, \ ++ .dsmpd = _dsmpd, \ ++ .frac = _frac, \ ++} + -+ /* PD_BUS */ -+ COMPOSITE(ACLK_BUS, "aclk_bus", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_CLKSEL_CON(40), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3562_CLKGATE_CON(18), 0, GFLAGS), -+ COMPOSITE(HCLK_BUS, "hclk_bus", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_CLKSEL_CON(40), 15, 1, MFLAGS, 8, 6, DFLAGS, -+ RK3562_CLKGATE_CON(18), 1, GFLAGS), -+ COMPOSITE(PCLK_BUS, "pclk_bus", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_CLKSEL_CON(41), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3562_CLKGATE_CON(18), 2, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(19), 0, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(19), 1, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(19), 2, GFLAGS), -+ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(19), 3, GFLAGS), -+ GATE(PCLK_I2C5, "pclk_i2c5", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(19), 4, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C, "clk_i2c", mux_200m_100m_50m_xin24m_p, 0, -+ RK3562_CLKSEL_CON(41), 8, 2, MFLAGS, -+ RK3562_CLKGATE_CON(19), 5, GFLAGS), -+ GATE(CLK_I2C1, "clk_i2c1", "clk_i2c", 0, -+ RK3562_CLKGATE_CON(19), 6, GFLAGS), -+ GATE(CLK_I2C2, "clk_i2c2", "clk_i2c", 0, -+ RK3562_CLKGATE_CON(19), 7, GFLAGS), -+ GATE(CLK_I2C3, "clk_i2c3", "clk_i2c", 0, -+ RK3562_CLKGATE_CON(19), 8, GFLAGS), -+ GATE(CLK_I2C4, "clk_i2c4", "clk_i2c", 0, -+ RK3562_CLKGATE_CON(19), 9, GFLAGS), -+ GATE(CLK_I2C5, "clk_i2c5", "clk_i2c", 0, -+ RK3562_CLKGATE_CON(19), 10, GFLAGS), -+ COMPOSITE_NODIV(DCLK_BUS_GPIO, "dclk_bus_gpio", mux_xin24m_32k_p, 0, -+ RK3562_CLKSEL_CON(41), 15, 1, MFLAGS, -+ RK3562_CLKGATE_CON(20), 4, GFLAGS), -+ GATE(DCLK_BUS_GPIO3, "dclk_bus_gpio3", "dclk_bus_gpio", 0, -+ RK3562_CLKGATE_CON(20), 5, GFLAGS), -+ GATE(DCLK_BUS_GPIO4, "dclk_bus_gpio4", "dclk_bus_gpio", 0, -+ RK3562_CLKGATE_CON(20), 6, GFLAGS), -+ GATE(PCLK_TIMER, "pclk_timer", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(21), 0, GFLAGS), -+ GATE(CLK_TIMER0, "clk_timer0", "xin24m", 0, -+ RK3562_CLKGATE_CON(21), 1, GFLAGS), -+ GATE(CLK_TIMER1, "clk_timer1", "xin24m", 0, -+ RK3562_CLKGATE_CON(21), 2, GFLAGS), -+ GATE(CLK_TIMER2, "clk_timer2", "xin24m", 0, -+ RK3562_CLKGATE_CON(21), 3, GFLAGS), -+ GATE(CLK_TIMER3, "clk_timer3", "xin24m", 0, -+ RK3562_CLKGATE_CON(21), 4, GFLAGS), -+ GATE(CLK_TIMER4, "clk_timer4", "xin24m", 0, -+ RK3562_CLKGATE_CON(21), 5, GFLAGS), -+ GATE(CLK_TIMER5, "clk_timer5", "xin24m", 0, -+ RK3562_CLKGATE_CON(21), 6, GFLAGS), -+ GATE(PCLK_STIMER, "pclk_stimer", "pclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(21), 7, GFLAGS), -+ GATE(CLK_STIMER0, "clk_stimer0", "xin24m", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(21), 8, GFLAGS), -+ GATE(CLK_STIMER1, "clk_stimer1", "xin24m", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(21), 9, GFLAGS), -+ GATE(PCLK_WDTNS, "pclk_wdtns", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(22), 0, GFLAGS), -+ GATE(CLK_WDTNS, "clk_wdtns", "xin24m", 0, -+ RK3562_CLKGATE_CON(22), 1, GFLAGS), -+ GATE(PCLK_GRF, "pclk_grf", "pclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(22), 2, GFLAGS), -+ GATE(PCLK_SGRF, "pclk_sgrf", "pclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(22), 3, GFLAGS), -+ GATE(PCLK_MAILBOX, "pclk_mailbox", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(22), 4, GFLAGS), -+ GATE(PCLK_INTC, "pclk_intc", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(22), 5, GFLAGS), -+ GATE(ACLK_BUS_GIC400, "aclk_bus_gic400", "aclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(22), 6, GFLAGS), -+ GATE(ACLK_BUS_SPINLOCK, "aclk_bus_spinlock", "aclk_bus", 0, -+ RK3562_CLKGATE_CON(23), 0, GFLAGS), -+ GATE(ACLK_DCF, "aclk_dcf", "aclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(23), 1, GFLAGS), -+ GATE(PCLK_DCF, "pclk_dcf", "pclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(23), 2, GFLAGS), -+ GATE(FCLK_BUS_CM0_CORE, "fclk_bus_cm0_core", "hclk_bus", 0, -+ RK3562_CLKGATE_CON(23), 3, GFLAGS), -+ GATE(CLK_BUS_CM0_RTC, "clk_bus_cm0_rtc", "clk_rtc_32k", 0, -+ RK3562_CLKGATE_CON(23), 4, GFLAGS), -+ GATE(HCLK_ICACHE, "hclk_icache", "hclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(23), 8, GFLAGS), -+ GATE(HCLK_DCACHE, "hclk_dcache", "hclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(23), 9, GFLAGS), -+ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(24), 0, GFLAGS), -+ COMPOSITE_NOMUX(CLK_TSADC, "clk_tsadc", "xin24m", 0, -+ RK3562_CLKSEL_CON(43), 0, 11, DFLAGS, -+ RK3562_CLKGATE_CON(24), 1, GFLAGS), -+ COMPOSITE_NOMUX(CLK_TSADC_TSEN, "clk_tsadc_tsen", "xin24m", 0, -+ RK3562_CLKSEL_CON(43), 11, 5, DFLAGS, -+ RK3562_CLKGATE_CON(24), 3, GFLAGS), -+ GATE(PCLK_DFT2APB, "pclk_dft2apb", "pclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(24), 4, GFLAGS), -+ COMPOSITE_NOMUX(CLK_SARADC_VCCIO156, "clk_saradc_vccio156", "xin24m", 0, -+ RK3562_CLKSEL_CON(44), 0, 12, DFLAGS, -+ RK3562_CLKGATE_CON(24), 9, GFLAGS), -+ GATE(PCLK_GMAC, "pclk_gmac", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(25), 0, GFLAGS), -+ GATE(ACLK_GMAC, "aclk_gmac", "aclk_bus", 0, -+ RK3562_CLKGATE_CON(25), 1, GFLAGS), -+ COMPOSITE_NODIV(CLK_GMAC_125M_CRU_I, "clk_gmac_125m_cru_i", mux_125m_xin24m_p, 0, -+ RK3562_CLKSEL_CON(45), 8, 1, MFLAGS, -+ RK3562_CLKGATE_CON(25), 2, GFLAGS), -+ COMPOSITE_NODIV(CLK_GMAC_50M_CRU_I, "clk_gmac_50m_cru_i", mux_50m_xin24m_p, 0, -+ RK3562_CLKSEL_CON(45), 7, 1, MFLAGS, -+ RK3562_CLKGATE_CON(25), 3, GFLAGS), -+ COMPOSITE(CLK_GMAC_ETH_OUT2IO, "clk_gmac_eth_out2io", gpll_cpll_p, 0, -+ RK3562_CLKSEL_CON(46), 7, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_CLKGATE_CON(25), 4, GFLAGS), -+ GATE(PCLK_APB2ASB_VCCIO156, "pclk_apb2asb_vccio156", "pclk_bus", CLK_IS_CRITICAL, -+ RK3562_CLKGATE_CON(25), 5, GFLAGS), -+ GATE(PCLK_TO_VCCIO156, "pclk_to_vccio156", "pclk_bus", CLK_IS_CRITICAL, -+ RK3562_CLKGATE_CON(25), 6, GFLAGS), -+ GATE(PCLK_DSIPHY, "pclk_dsiphy", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(25), 8, GFLAGS), -+ GATE(PCLK_DSITX, "pclk_dsitx", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(25), 9, GFLAGS), -+ GATE(PCLK_CPU_EMA_DET, "pclk_cpu_ema_det", "pclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(25), 10, GFLAGS), -+ GATE(PCLK_HASH, "pclk_hash", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(25), 11, GFLAGS), -+ GATE(PCLK_TOPCRU, "pclk_topcru", "pclk_bus", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(25), 15, GFLAGS), -+ GATE(PCLK_ASB2APB_VCCIO156, "pclk_asb2apb_vccio156", "pclk_to_vccio156", CLK_IS_CRITICAL, -+ RK3562_CLKGATE_CON(26), 0, GFLAGS), -+ GATE(PCLK_IOC_VCCIO156, "pclk_ioc_vccio156", "pclk_to_vccio156", CLK_IS_CRITICAL, -+ RK3562_CLKGATE_CON(26), 1, GFLAGS), -+ GATE(PCLK_GPIO3_VCCIO156, "pclk_gpio3_vccio156", "pclk_to_vccio156", 0, -+ RK3562_CLKGATE_CON(26), 2, GFLAGS), -+ GATE(PCLK_GPIO4_VCCIO156, "pclk_gpio4_vccio156", "pclk_to_vccio156", 0, -+ RK3562_CLKGATE_CON(26), 3, GFLAGS), -+ GATE(PCLK_SARADC_VCCIO156, "pclk_saradc_vccio156", "pclk_to_vccio156", 0, -+ RK3562_CLKGATE_CON(26), 4, GFLAGS), -+ GATE(PCLK_MAC100, "pclk_mac100", "pclk_bus", 0, -+ RK3562_CLKGATE_CON(27), 0, GFLAGS), -+ GATE(ACLK_MAC100, "aclk_mac100", "aclk_bus", 0, -+ RK3562_CLKGATE_CON(27), 1, GFLAGS), -+ COMPOSITE_NODIV(CLK_MAC100_50M_MATRIX, "clk_mac100_50m_matrix", mux_50m_xin24m_p, 0, -+ RK3562_CLKSEL_CON(47), 7, 1, MFLAGS, -+ RK3562_CLKGATE_CON(27), 2, GFLAGS), ++#define RK3066_PLL_RATE(_rate, _nr, _nf, _no) \ ++{ \ ++ .rate = _rate##U, \ ++ .nr = _nr, \ ++ .nf = _nf, \ ++ .no = _no, \ ++ .nb = ((_nf) < 2) ? 1 : (_nf) >> 1, \ ++} + -+ /* PD_CORE */ -+ COMPOSITE_NOMUX(0, "aclk_core_pre", "scmi_clk_cpu", CLK_IGNORE_UNUSED, -+ RK3562_CLKSEL_CON(11), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3562_CLKGATE_CON(4), 3, GFLAGS), -+ COMPOSITE_NOMUX(0, "pclk_dbg_pre", "scmi_clk_cpu", CLK_IGNORE_UNUSED, -+ RK3562_CLKSEL_CON(12), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RK3562_CLKGATE_CON(4), 5, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_CORE, "hclk_core", "gpll", CLK_IS_CRITICAL, -+ RK3562_CLKSEL_CON(13), 0, 6, DFLAGS, -+ RK3562_CLKGATE_CON(5), 2, GFLAGS), -+ GATE(0, "pclk_dbg_daplite", "pclk_dbg_pre", CLK_IGNORE_UNUSED, -+ RK3562_CLKGATE_CON(4), 10, GFLAGS), ++#define RK3066_PLL_RATE_NB(_rate, _nr, _nf, _no, _nb) \ ++{ \ ++ .rate = _rate##U, \ ++ .nr = _nr, \ ++ .nf = _nf, \ ++ .no = _no, \ ++ .nb = _nb, \ ++} + -+ /* PD_DDR */ -+ FACTOR_GATE(0, "clk_gpll_mux_to_ddr", "gpll", 0, 1, 4, -+ RK3328_CLKGATE_CON(1), 6, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_DDR, "pclk_ddr", "clk_gpll_mux_to_ddr", CLK_IS_CRITICAL, -+ RK3562_DDR_CLKSEL_CON(1), 8, 5, DFLAGS, -+ RK3562_DDR_CLKGATE_CON(0), 3, GFLAGS), -+ COMPOSITE_NOMUX(CLK_MSCH_BRG_BIU, "clk_msch_brg_biu", "clk_gpll_mux_to_ddr", CLK_IS_CRITICAL, -+ RK3562_DDR_CLKSEL_CON(1), 0, 4, DFLAGS, -+ RK3562_DDR_CLKGATE_CON(0), 4, GFLAGS), -+ GATE(PCLK_DDR_HWLP, "pclk_ddr_hwlp", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(0), 6, GFLAGS), -+ GATE(PCLK_DDR_UPCTL, "pclk_ddr_upctl", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(0), 7, GFLAGS), -+ GATE(PCLK_DDR_PHY, "pclk_ddr_phy", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(0), 8, GFLAGS), -+ GATE(PCLK_DDR_DFICTL, "pclk_ddr_dfictl", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(0), 9, GFLAGS), -+ GATE(PCLK_DDR_DMA2DDR, "pclk_ddr_dma2ddr", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(0), 10, GFLAGS), -+ GATE(PCLK_DDR_MON, "pclk_ddr_mon", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(1), 0, GFLAGS), -+ GATE(TMCLK_DDR_MON, "tmclk_ddr_mon", "xin24m", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(1), 1, GFLAGS), -+ GATE(PCLK_DDR_GRF, "pclk_ddr_grf", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(1), 2, GFLAGS), -+ GATE(PCLK_DDR_CRU, "pclk_ddr_cru", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(1), 3, GFLAGS), -+ GATE(PCLK_SUBDDR_CRU, "pclk_subddr_cru", "pclk_ddr", CLK_IGNORE_UNUSED, -+ RK3562_DDR_CLKGATE_CON(1), 4, GFLAGS), ++#define RK3588_PLL_RATE(_rate, _p, _m, _s, _k) \ ++{ \ ++ .rate = _rate##U, \ ++ .p = _p, \ ++ .m = _m, \ ++ .s = _s, \ ++ .k = _k, \ ++} + -+ /* PD_GPU */ -+ COMPOSITE(CLK_GPU_PRE, "clk_gpu_pre", gpll_cpll_p, 0, -+ RK3562_CLKSEL_CON(18), 7, 1, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(8), 0, GFLAGS), -+ COMPOSITE_NOMUX(ACLK_GPU_PRE, "aclk_gpu_pre", "clk_gpu_pre", 0, -+ RK3562_CLKSEL_CON(19), 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(8), 2, GFLAGS), -+ GATE(CLK_GPU, "clk_gpu", "clk_gpu_pre", 0, -+ RK3562_CLKGATE_CON(8), 4, GFLAGS), -+ COMPOSITE_NODIV(CLK_GPU_BRG, "clk_gpu_brg", mux_200m_100m_p, 0, -+ RK3562_CLKSEL_CON(19), 15, 1, MFLAGS, -+ RK3562_CLKGATE_CON(8), 8, GFLAGS), ++/** ++ * struct rockchip_clk_provider - information about clock provider ++ * @reg_base: virtual address for the register base. ++ * @clk_data: holds clock related data like clk* and number of clocks. ++ * @cru_node: device-node of the clock-provider ++ * @grf: regmap of the general-register-files syscon ++ * @list_node: node in the global ctx list ++ * @lock: maintains exclusion between callbacks for a given clock-provider. ++ */ ++struct rockchip_clk_provider { ++ void __iomem *reg_base; ++ struct clk_onecell_data clk_data; ++ struct device_node *cru_node; ++ struct regmap *grf; ++ struct regmap *pmugrf; ++ struct hlist_node list_node; ++ spinlock_t lock; ++}; + -+ /* PD_NPU */ -+ COMPOSITE(CLK_NPU_PRE, "clk_npu_pre", gpll_cpll_p, 0, -+ RK3562_CLKSEL_CON(15), 7, 1, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(6), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_NPU_PRE, "hclk_npu_pre", "clk_npu_pre", 0, -+ RK3562_CLKSEL_CON(16), 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(6), 1, GFLAGS), -+ GATE(ACLK_RKNN, "aclk_rknn", "clk_npu_pre", 0, -+ RK3562_CLKGATE_CON(6), 4, GFLAGS), -+ GATE(HCLK_RKNN, "hclk_rknn", "hclk_npu_pre", 0, -+ RK3562_CLKGATE_CON(6), 5, GFLAGS), ++struct rockchip_pll_rate_table { ++ unsigned long rate; ++ union { ++ struct { ++ /* for RK3066 */ ++ unsigned int nr; ++ unsigned int nf; ++ unsigned int no; ++ unsigned int nb; ++ }; ++ struct { ++ /* for RK3036/RK3399 */ ++ unsigned int fbdiv; ++ unsigned int postdiv1; ++ unsigned int refdiv; ++ unsigned int postdiv2; ++ unsigned int dsmpd; ++ unsigned int frac; ++ }; ++ struct { ++ /* for RK3588 */ ++ unsigned int m; ++ unsigned int p; ++ unsigned int s; ++ unsigned int k; ++ }; ++ }; ++}; + -+ /* PD_PERI */ -+ COMPOSITE(ACLK_PERI, "aclk_peri", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_PERI_CLKSEL_CON(0), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(1), 0, GFLAGS), -+ COMPOSITE(HCLK_PERI, "hclk_peri", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_PERI_CLKSEL_CON(0), 15, 1, MFLAGS, 8, 6, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(1), 1, GFLAGS), -+ COMPOSITE(PCLK_PERI, "pclk_peri", gpll_cpll_p, CLK_IS_CRITICAL, -+ RK3562_PERI_CLKSEL_CON(1), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(1), 2, GFLAGS), -+ GATE(PCLK_PERICRU, "pclk_pericru", "pclk_peri", CLK_IGNORE_UNUSED, -+ RK3562_PERI_CLKGATE_CON(1), 6, GFLAGS), -+ GATE(HCLK_SAI0, "hclk_sai0", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(2), 0, GFLAGS), -+ COMPOSITE(CLK_SAI0_SRC, "clk_sai0_src", gpll_cpll_hpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(1), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(2), 1, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_SAI0_FRAC, "clk_sai0_frac", "clk_sai0_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(2), 0, -+ RK3562_PERI_CLKGATE_CON(2), 2, GFLAGS, -+ &rk3562_clk_sai0_fracmux), -+ GATE(MCLK_SAI0, "mclk_sai0", "clk_sai0", 0, -+ RK3562_PERI_CLKGATE_CON(2), 3, GFLAGS), -+ COMPOSITE_NODIV(MCLK_SAI0_OUT2IO, "mclk_sai0_out2io", mclk_sai0_out2io_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(3), 5, 1, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(2), 4, GFLAGS), -+ GATE(HCLK_SAI1, "hclk_sai1", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(2), 5, GFLAGS), -+ COMPOSITE(CLK_SAI1_SRC, "clk_sai1_src", gpll_cpll_hpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(3), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(2), 6, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_SAI1_FRAC, "clk_sai1_frac", "clk_sai1_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(4), 0, -+ RK3562_PERI_CLKGATE_CON(2), 7, GFLAGS, -+ &rk3562_clk_sai1_fracmux), -+ GATE(MCLK_SAI1, "mclk_sai1", "clk_sai1", 0, -+ RK3562_PERI_CLKGATE_CON(2), 8, GFLAGS), -+ COMPOSITE_NODIV(MCLK_SAI1_OUT2IO, "mclk_sai1_out2io", mclk_sai1_out2io_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(5), 5, 1, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(2), 9, GFLAGS), -+ GATE(HCLK_SAI2, "hclk_sai2", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(2), 10, GFLAGS), -+ COMPOSITE(CLK_SAI2_SRC, "clk_sai2_src", gpll_cpll_hpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(6), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(2), 11, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_SAI2_FRAC, "clk_sai2_frac", "clk_sai2_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(7), 0, -+ RK3562_PERI_CLKGATE_CON(2), 12, GFLAGS, -+ &rk3562_clk_sai2_fracmux), -+ GATE(MCLK_SAI2, "mclk_sai2", "clk_sai2", 0, -+ RK3562_PERI_CLKGATE_CON(2), 13, GFLAGS), -+ COMPOSITE_NODIV(MCLK_SAI2_OUT2IO, "mclk_sai2_out2io", mclk_sai2_out2io_p, CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(8), 5, 1, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(2), 14, GFLAGS), -+ GATE(HCLK_DSM, "hclk_dsm", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(3), 1, GFLAGS), -+ GATE(CLK_DSM, "clk_dsm", "mclk_sai1", 0, -+ RK3562_PERI_CLKGATE_CON(3), 2, GFLAGS), -+ GATE(HCLK_PDM, "hclk_pdm", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(3), 4, GFLAGS), -+ COMPOSITE(MCLK_PDM, "mclk_pdm", gpll_cpll_hpll_xin24m_p, 0, -+ RK3562_PERI_CLKSEL_CON(12), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(3), 5, GFLAGS), -+ GATE(HCLK_SPDIF, "hclk_spdif", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(3), 8, GFLAGS), -+ COMPOSITE(CLK_SPDIF_SRC, "clk_spdif_src", gpll_cpll_hpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(13), 14, 2, MFLAGS, 8, 6, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(3), 9, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_SPDIF_FRAC, "clk_spdif_frac", "clk_spdif_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(14), 0, -+ RK3562_PERI_CLKGATE_CON(3), 10, GFLAGS, -+ &rk3562_clk_spdif_fracmux), -+ GATE(MCLK_SPDIF, "mclk_spdif", "clk_spdif", 0, -+ RK3562_PERI_CLKGATE_CON(3), 11, GFLAGS), -+ GATE(HCLK_SDMMC0, "hclk_sdmmc0", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(4), 0, GFLAGS), -+ COMPOSITE(CCLK_SDMMC0, "cclk_sdmmc0", gpll_cpll_xin24m_dmyhpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(16), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(4), 1, GFLAGS), -+ MMC(SCLK_SDMMC0_DRV, "sdmmc0_drv", "cclk_sdmmc0", RK3562_SDMMC0_CON0, 1), -+ MMC(SCLK_SDMMC0_SAMPLE, "sdmmc0_sample", "cclk_sdmmc0", RK3562_SDMMC0_CON1, 1), -+ GATE(HCLK_SDMMC1, "hclk_sdmmc1", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(4), 2, GFLAGS), -+ COMPOSITE(CCLK_SDMMC1, "cclk_sdmmc1", gpll_cpll_xin24m_dmyhpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(17), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(4), 3, GFLAGS), -+ MMC(SCLK_SDMMC1_DRV, "sdmmc1_drv", "cclk_sdmmc1", RK3562_SDMMC1_CON0, 1), -+ MMC(SCLK_SDMMC1_SAMPLE, "sdmmc1_sample", "cclk_sdmmc1", RK3562_SDMMC1_CON1, 1), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(4), 8, GFLAGS), -+ GATE(ACLK_EMMC, "aclk_emmc", "aclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(4), 9, GFLAGS), -+ COMPOSITE(CCLK_EMMC, "cclk_emmc", gpll_cpll_xin24m_dmyhpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(18), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(4), 10, GFLAGS), -+ COMPOSITE(BCLK_EMMC, "bclk_emmc", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(19), 15, 1, MFLAGS, 8, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(4), 11, GFLAGS), -+ GATE(TMCLK_EMMC, "tmclk_emmc", "xin24m", 0, -+ RK3562_PERI_CLKGATE_CON(4), 12, GFLAGS), -+ COMPOSITE(SCLK_SFC, "sclk_sfc", gpll_cpll_xin24m_p, 0, -+ RK3562_PERI_CLKSEL_CON(20), 8, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(4), 13, GFLAGS), -+ GATE(HCLK_SFC, "hclk_sfc", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(4), 14, GFLAGS), -+ GATE(HCLK_USB2HOST, "hclk_usb2host", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(5), 0, GFLAGS), -+ GATE(HCLK_USB2HOST_ARB, "hclk_usb2host_arb", "hclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(5), 1, GFLAGS), -+ GATE(PCLK_SPI1, "pclk_spi1", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(6), 0, GFLAGS), -+ COMPOSITE_NODIV(CLK_SPI1, "clk_spi1", mux_200m_100m_50m_xin24m_p, 0, -+ RK3562_PERI_CLKSEL_CON(20), 12, 2, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(6), 1, GFLAGS), -+ GATE(SCLK_IN_SPI1, "sclk_in_spi1", "sclk_in_spi1_io", 0, -+ RK3562_PERI_CLKGATE_CON(6), 2, GFLAGS), -+ GATE(PCLK_SPI2, "pclk_spi2", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(6), 3, GFLAGS), -+ COMPOSITE_NODIV(CLK_SPI2, "clk_spi2", mux_200m_100m_50m_xin24m_p, 0, -+ RK3562_PERI_CLKSEL_CON(20), 14, 2, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(6), 4, GFLAGS), -+ GATE(SCLK_IN_SPI2, "sclk_in_spi2", "sclk_in_spi2_io", 0, -+ RK3562_PERI_CLKGATE_CON(6), 5, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(7), 0, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(7), 1, GFLAGS), -+ GATE(PCLK_UART3, "pclk_uart3", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(7), 2, GFLAGS), -+ GATE(PCLK_UART4, "pclk_uart4", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(7), 3, GFLAGS), -+ GATE(PCLK_UART5, "pclk_uart5", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(7), 4, GFLAGS), -+ GATE(PCLK_UART6, "pclk_uart6", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(7), 5, GFLAGS), -+ GATE(PCLK_UART7, "pclk_uart7", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(7), 6, GFLAGS), -+ GATE(PCLK_UART8, "pclk_uart8", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(7), 7, GFLAGS), -+ GATE(PCLK_UART9, "pclk_uart9", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(7), 8, GFLAGS), -+ COMPOSITE(CLK_UART1_SRC, "clk_uart1_src", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(21), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(7), 9, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART1_FRAC, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(22), 0, -+ RK3562_PERI_CLKGATE_CON(7), 10, GFLAGS, -+ &rk3562_clk_uart1_fracmux), -+ GATE(SCLK_UART1, "sclk_uart1", "clk_uart1", 0, -+ RK3562_PERI_CLKGATE_CON(7), 11, GFLAGS), -+ COMPOSITE(CLK_UART2_SRC, "clk_uart2_src", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(23), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(7), 12, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART2_FRAC, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(24), 0, -+ RK3562_PERI_CLKGATE_CON(7), 13, GFLAGS, -+ &rk3562_clk_uart2_fracmux), -+ GATE(SCLK_UART2, "sclk_uart2", "clk_uart2", 0, -+ RK3562_PERI_CLKGATE_CON(7), 14, GFLAGS), -+ COMPOSITE(CLK_UART3_SRC, "clk_uart3_src", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(25), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(7), 15, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART3_FRAC, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(26), 0, -+ RK3562_PERI_CLKGATE_CON(8), 0, GFLAGS, -+ &rk3562_clk_uart3_fracmux), -+ GATE(SCLK_UART3, "sclk_uart3", "clk_uart3", 0, -+ RK3562_PERI_CLKGATE_CON(8), 1, GFLAGS), -+ COMPOSITE(CLK_UART4_SRC, "clk_uart4_src", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(27), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(8), 2, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART4_FRAC, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(28), 0, -+ RK3562_PERI_CLKGATE_CON(8), 3, GFLAGS, -+ &rk3562_clk_uart4_fracmux), -+ GATE(SCLK_UART4, "sclk_uart4", "clk_uart4", 0, -+ RK3562_PERI_CLKGATE_CON(8), 4, GFLAGS), -+ COMPOSITE(CLK_UART5_SRC, "clk_uart5_src", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(29), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(8), 5, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART5_FRAC, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(30), 0, -+ RK3562_PERI_CLKGATE_CON(8), 6, GFLAGS, -+ &rk3562_clk_uart5_fracmux), -+ GATE(SCLK_UART5, "sclk_uart5", "clk_uart5", 0, -+ RK3562_PERI_CLKGATE_CON(8), 7, GFLAGS), -+ COMPOSITE(CLK_UART6_SRC, "clk_uart6_src", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(31), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(8), 8, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART6_FRAC, "clk_uart6_frac", "clk_uart6_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(32), 0, -+ RK3562_PERI_CLKGATE_CON(8), 9, GFLAGS, -+ &rk3562_clk_uart6_fracmux), -+ GATE(SCLK_UART6, "sclk_uart6", "clk_uart6", 0, -+ RK3562_PERI_CLKGATE_CON(8), 10, GFLAGS), -+ COMPOSITE(CLK_UART7_SRC, "clk_uart7_src", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(33), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(8), 11, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART7_FRAC, "clk_uart7_frac", "clk_uart7_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(34), 0, -+ RK3562_PERI_CLKGATE_CON(8), 12, GFLAGS, -+ &rk3562_clk_uart7_fracmux), -+ GATE(SCLK_UART7, "sclk_uart7", "clk_uart7", 0, -+ RK3562_PERI_CLKGATE_CON(8), 13, GFLAGS), -+ COMPOSITE(CLK_UART8_SRC, "clk_uart8_src", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(35), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(8), 14, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART8_FRAC, "clk_uart8_frac", "clk_uart8_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(36), 0, -+ RK3562_PERI_CLKGATE_CON(8), 15, GFLAGS, -+ &rk3562_clk_uart8_fracmux), -+ GATE(SCLK_UART8, "sclk_uart8", "clk_uart8", 0, -+ RK3562_PERI_CLKGATE_CON(9), 0, GFLAGS), -+ COMPOSITE(CLK_UART9_SRC, "clk_uart9_src", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(37), 8, 1, MFLAGS, 0, 7, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(9), 1, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART9_FRAC, "clk_uart9_frac", "clk_uart9_src", CLK_SET_RATE_PARENT, -+ RK3562_PERI_CLKSEL_CON(38), 0, -+ RK3562_PERI_CLKGATE_CON(9), 2, GFLAGS, -+ &rk3562_clk_uart9_fracmux), -+ GATE(SCLK_UART9, "sclk_uart9", "clk_uart9", 0, -+ RK3562_PERI_CLKGATE_CON(9), 3, GFLAGS), -+ GATE(PCLK_PWM1_PERI, "pclk_pwm1_peri", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(10), 0, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM1_PERI, "clk_pwm1_peri", mux_100m_50m_xin24m_p, 0, -+ RK3562_PERI_CLKSEL_CON(40), 0, 2, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(10), 1, GFLAGS), -+ GATE(CLK_CAPTURE_PWM1_PERI, "clk_capture_pwm1_peri", "xin24m", 0, -+ RK3562_PERI_CLKGATE_CON(10), 2, GFLAGS), -+ GATE(PCLK_PWM2_PERI, "pclk_pwm2_peri", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(10), 3, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM2_PERI, "clk_pwm2_peri", mux_100m_50m_xin24m_p, 0, -+ RK3562_PERI_CLKSEL_CON(40), 6, 2, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(10), 4, GFLAGS), -+ GATE(CLK_CAPTURE_PWM2_PERI, "clk_capture_pwm2_peri", "xin24m", 0, -+ RK3562_PERI_CLKGATE_CON(10), 5, GFLAGS), -+ GATE(PCLK_PWM3_PERI, "pclk_pwm3_peri", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(10), 6, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM3_PERI, "clk_pwm3_peri", mux_100m_50m_xin24m_p, 0, -+ RK3562_PERI_CLKSEL_CON(40), 8, 2, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(10), 7, GFLAGS), -+ GATE(CLK_CAPTURE_PWM3_PERI, "clk_capture_pwm3_peri", "xin24m", 0, -+ RK3562_PERI_CLKGATE_CON(10), 8, GFLAGS), -+ GATE(PCLK_CAN0, "pclk_can0", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(11), 0, GFLAGS), -+ COMPOSITE(CLK_CAN0, "clk_can0", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(41), 7, 1, MFLAGS, 0, 5, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(11), 1, GFLAGS), -+ GATE(PCLK_CAN1, "pclk_can1", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(11), 2, GFLAGS), -+ COMPOSITE(CLK_CAN1, "clk_can1", gpll_cpll_p, 0, -+ RK3562_PERI_CLKSEL_CON(41), 15, 1, MFLAGS, 8, 5, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(11), 3, GFLAGS), -+ GATE(PCLK_PERI_WDT, "pclk_peri_wdt", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(13), 0, GFLAGS), -+ COMPOSITE_NODIV(TCLK_PERI_WDT, "tclk_peri_wdt", mux_xin24m_32k_p, 0, -+ RK3562_PERI_CLKSEL_CON(43), 15, 1, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(13), 1, GFLAGS), -+ GATE(ACLK_SYSMEM, "aclk_sysmem", "aclk_peri", CLK_IGNORE_UNUSED, -+ RK3562_PERI_CLKGATE_CON(13), 2, GFLAGS), -+ GATE(HCLK_BOOTROM, "hclk_bootrom", "hclk_peri", CLK_IGNORE_UNUSED, -+ RK3562_PERI_CLKGATE_CON(13), 3, GFLAGS), -+ GATE(PCLK_PERI_GRF, "pclk_peri_grf", "pclk_peri", CLK_IGNORE_UNUSED, -+ RK3562_PERI_CLKGATE_CON(13), 4, GFLAGS), -+ GATE(ACLK_DMAC, "aclk_dmac", "aclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(13), 5, GFLAGS), -+ GATE(ACLK_RKDMAC, "aclk_rkdmac", "aclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(13), 6, GFLAGS), -+ GATE(PCLK_OTPC_NS, "pclk_otpc_ns", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(14), 0, GFLAGS), -+ GATE(CLK_SBPI_OTPC_NS, "clk_sbpi_otpc_ns", "xin24m", 0, -+ RK3562_PERI_CLKGATE_CON(14), 1, GFLAGS), -+ COMPOSITE_NOMUX(CLK_USER_OTPC_NS, "clk_user_otpc_ns", "xin24m", 0, -+ RK3562_PERI_CLKSEL_CON(44), 0, 8, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(14), 2, GFLAGS), -+ GATE(PCLK_OTPC_S, "pclk_otpc_s", "pclk_peri", CLK_IGNORE_UNUSED, -+ RK3562_PERI_CLKGATE_CON(14), 3, GFLAGS), -+ GATE(CLK_SBPI_OTPC_S, "clk_sbpi_otpc_s", "xin24m", CLK_IGNORE_UNUSED, -+ RK3562_PERI_CLKGATE_CON(14), 4, GFLAGS), -+ COMPOSITE_NOMUX(CLK_USER_OTPC_S, "clk_user_otpc_s", "xin24m", CLK_IGNORE_UNUSED, -+ RK3562_PERI_CLKSEL_CON(44), 8, 8, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(14), 5, GFLAGS), -+ GATE(CLK_OTPC_ARB, "clk_otpc_arb", "xin24m", 0, -+ RK3562_PERI_CLKGATE_CON(14), 6, GFLAGS), -+ GATE(PCLK_OTPPHY, "pclk_otpphy", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(14), 7, GFLAGS), -+ GATE(PCLK_USB2PHY, "pclk_usb2phy", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(15), 0, GFLAGS), -+ GATE(PCLK_PIPEPHY, "pclk_pipephy", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(15), 7, GFLAGS), -+ GATE(PCLK_SARADC, "pclk_saradc", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(16), 4, GFLAGS), -+ COMPOSITE_NOMUX(CLK_SARADC, "clk_saradc", "xin24m", 0, -+ RK3562_PERI_CLKSEL_CON(46), 0, 12, DFLAGS, -+ RK3562_PERI_CLKGATE_CON(16), 5, GFLAGS), -+ GATE(PCLK_IOC_VCCIO234, "pclk_ioc_vccio234", "pclk_peri", CLK_IS_CRITICAL, -+ RK3562_PERI_CLKGATE_CON(16), 12, GFLAGS), -+ GATE(PCLK_PERI_GPIO1, "pclk_peri_gpio1", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(17), 0, GFLAGS), -+ GATE(PCLK_PERI_GPIO2, "pclk_peri_gpio2", "pclk_peri", 0, -+ RK3562_PERI_CLKGATE_CON(17), 1, GFLAGS), -+ COMPOSITE_NODIV(DCLK_PERI_GPIO, "dclk_peri_gpio", mux_xin24m_32k_p, 0, -+ RK3562_PERI_CLKSEL_CON(47), 8, 1, MFLAGS, -+ RK3562_PERI_CLKGATE_CON(17), 4, GFLAGS), -+ GATE(DCLK_PERI_GPIO1, "dclk_peri_gpio1", "dclk_peri_gpio", 0, -+ RK3562_PERI_CLKGATE_CON(17), 2, GFLAGS), -+ GATE(DCLK_PERI_GPIO2, "dclk_peri_gpio2", "dclk_peri_gpio", 0, -+ RK3562_PERI_CLKGATE_CON(17), 3, GFLAGS), ++/** ++ * struct rockchip_pll_clock - information about pll clock ++ * @id: platform specific id of the clock. ++ * @name: name of this pll clock. ++ * @parent_names: name of the parent clock. ++ * @num_parents: number of parents ++ * @flags: optional flags for basic clock. ++ * @con_offset: offset of the register for configuring the PLL. ++ * @mode_offset: offset of the register for configuring the PLL-mode. ++ * @mode_shift: offset inside the mode-register for the mode of this pll. ++ * @lock_shift: offset inside the lock register for the lock status. ++ * @type: Type of PLL to be registered. ++ * @pll_flags: hardware-specific flags ++ * @rate_table: Table of usable pll rates ++ * ++ * Flags: ++ * ROCKCHIP_PLL_SYNC_RATE - check rate parameters to match against the ++ * rate_table parameters and ajust them if necessary. ++ */ ++struct rockchip_pll_clock { ++ unsigned int id; ++ const char *name; ++ const char *const *parent_names; ++ u8 num_parents; ++ unsigned long flags; ++ int con_offset; ++ int mode_offset; ++ int mode_shift; ++ int lock_shift; ++ enum rockchip_pll_type type; ++ u8 pll_flags; ++ struct rockchip_pll_rate_table *rate_table; ++}; + -+ /* PD_PHP */ -+ COMPOSITE(ACLK_PHP, "aclk_php", gpll_cpll_p, 0, -+ RK3562_CLKSEL_CON(36), 7, 1, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(16), 0, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_PHP, "pclk_php", "aclk_php", 0, -+ RK3562_CLKSEL_CON(36), 8, 4, DFLAGS, -+ RK3562_CLKGATE_CON(16), 1, GFLAGS), -+ GATE(ACLK_PCIE20_MST, "aclk_pcie20_mst", "aclk_php", 0, -+ RK3562_CLKGATE_CON(16), 4, GFLAGS), -+ GATE(ACLK_PCIE20_SLV, "aclk_pcie20_slv", "aclk_php", 0, -+ RK3562_CLKGATE_CON(16), 5, GFLAGS), -+ GATE(ACLK_PCIE20_DBI, "aclk_pcie20_dbi", "aclk_php", 0, -+ RK3562_CLKGATE_CON(16), 6, GFLAGS), -+ GATE(PCLK_PCIE20, "pclk_pcie20", "pclk_php", 0, -+ RK3562_CLKGATE_CON(16), 7, GFLAGS), -+ GATE(CLK_PCIE20_AUX, "clk_pcie20_aux", "xin24m", 0, -+ RK3562_CLKGATE_CON(16), 8, GFLAGS), -+ GATE(ACLK_USB3OTG, "aclk_usb3otg", "aclk_php", 0, -+ RK3562_CLKGATE_CON(16), 10, GFLAGS), -+ COMPOSITE_NODIV(CLK_USB3OTG_SUSPEND, "clk_usb3otg_suspend", mux_xin24m_32k_p, 0, -+ RK3562_CLKSEL_CON(36), 15, 1, MFLAGS, -+ RK3562_CLKGATE_CON(16), 11, GFLAGS), -+ GATE(CLK_USB3OTG_REF, "clk_usb3otg_ref", "xin24m", 0, -+ RK3562_CLKGATE_CON(16), 12, GFLAGS), -+ GATE(CLK_PIPEPHY_REF_FUNC, "clk_pipephy_ref_func", "pclk_pcie20", 0, -+ RK3562_CLKGATE_CON(17), 3, GFLAGS), ++/* ++ * PLL flags ++ */ ++#define ROCKCHIP_PLL_SYNC_RATE BIT(0) ++/* normal mode only. now only for pll_rk3036, pll_rk3328 type */ ++#define ROCKCHIP_PLL_FIXED_MODE BIT(1) ++#define ROCKCHIP_PLL_ALLOW_POWER_DOWN BIT(2) + -+ /* PD_PMU1 */ -+ COMPOSITE_NOMUX(CLK_200M_PMU, "clk_200m_pmu", "cpll", CLK_IS_CRITICAL, -+ RK3562_PMU1_CLKSEL_CON(0), 0, 5, DFLAGS, -+ RK3562_PMU1_CLKGATE_CON(0), 1, GFLAGS), -+ /* PD_PMU0 */ -+ COMPOSITE_FRACMUX(CLK_RTC32K_FRAC, "clk_rtc32k_frac", "xin24m", CLK_IS_CRITICAL, -+ RK3562_PMU0_CLKSEL_CON(0), 0, -+ RK3562_PMU0_CLKGATE_CON(0), 15, GFLAGS, -+ &rk3562_rtc32k_pmu_fracmux), -+ COMPOSITE_NOMUX(BUSCLK_PDPMU0, "busclk_pdpmu0", "clk_200m_pmu", CLK_IS_CRITICAL, -+ RK3562_PMU0_CLKSEL_CON(1), 3, 2, DFLAGS, -+ RK3562_PMU0_CLKGATE_CON(0), 14, GFLAGS), -+ GATE(PCLK_PMU0_CRU, "pclk_pmu0_cru", "busclk_pdpmu0", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(0), 0, GFLAGS), -+ GATE(PCLK_PMU0_PMU, "pclk_pmu0_pmu", "busclk_pdpmu0", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(0), 1, GFLAGS), -+ GATE(CLK_PMU0_PMU, "clk_pmu0_pmu", "xin24m", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(0), 2, GFLAGS), -+ GATE(PCLK_PMU0_HP_TIMER, "pclk_pmu0_hp_timer", "busclk_pdpmu0", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(0), 3, GFLAGS), -+ GATE(CLK_PMU0_HP_TIMER, "clk_pmu0_hp_timer", "xin24m", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(0), 4, GFLAGS), -+ GATE(CLK_PMU0_32K_HP_TIMER, "clk_pmu0_32k_hp_timer", "clk_rtc_32k", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(0), 5, GFLAGS), -+ GATE(PCLK_PMU0_PVTM, "pclk_pmu0_pvtm", "busclk_pdpmu0", 0, -+ RK3562_PMU0_CLKGATE_CON(0), 6, GFLAGS), -+ GATE(CLK_PMU0_PVTM, "clk_pmu0_pvtm", "xin24m", 0, -+ RK3562_PMU0_CLKGATE_CON(0), 7, GFLAGS), -+ GATE(PCLK_IOC_PMUIO, "pclk_ioc_pmuio", "busclk_pdpmu0", CLK_IS_CRITICAL, -+ RK3562_PMU0_CLKGATE_CON(0), 8, GFLAGS), -+ GATE(PCLK_PMU0_GPIO0, "pclk_pmu0_gpio0", "busclk_pdpmu0", 0, -+ RK3562_PMU0_CLKGATE_CON(0), 9, GFLAGS), -+ GATE(DBCLK_PMU0_GPIO0, "dbclk_pmu0_gpio0", "xin24m", 0, -+ RK3562_PMU0_CLKGATE_CON(0), 10, GFLAGS), -+ GATE(PCLK_PMU0_GRF, "pclk_pmu0_grf", "busclk_pdpmu0", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(0), 11, GFLAGS), -+ GATE(PCLK_PMU0_SGRF, "pclk_pmu0_sgrf", "busclk_pdpmu0", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(0), 12, GFLAGS), -+ GATE(CLK_DDR_FAIL_SAFE, "clk_ddr_fail_safe", "xin24m", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(1), 0, GFLAGS), -+ GATE(PCLK_PMU0_SCRKEYGEN, "pclk_pmu0_scrkeygen", "busclk_pdpmu0", CLK_IGNORE_UNUSED, -+ RK3562_PMU0_CLKGATE_CON(1), 1, GFLAGS), -+ COMPOSITE_NOMUX(CLK_PIPEPHY_DIV, "clk_pipephy_div", "cpll", 0, -+ RK3562_PMU0_CLKSEL_CON(2), 0, 6, DFLAGS, -+ RK3562_PMU0_CLKGATE_CON(2), 0, GFLAGS), -+ GATE(CLK_PIPEPHY_XIN24M, "clk_pipephy_xin24m", "xin24m", 0, -+ RK3562_PMU0_CLKGATE_CON(2), 1, GFLAGS), -+ COMPOSITE_NODIV(CLK_PIPEPHY_REF, "clk_pipephy_ref", clk_pipephy_ref_p, 0, -+ RK3562_PMU0_CLKSEL_CON(2), 7, 1, MFLAGS, -+ RK3562_PMU0_CLKGATE_CON(2), 2, GFLAGS), -+ GATE(CLK_USB2PHY_XIN24M, "clk_usb2phy_xin24m", "xin24m", 0, -+ RK3562_PMU0_CLKGATE_CON(2), 4, GFLAGS), -+ COMPOSITE_NODIV(CLK_USB2PHY_REF, "clk_usb2phy_ref", clk_usbphy_ref_p, 0, -+ RK3562_PMU0_CLKSEL_CON(2), 8, 1, MFLAGS, -+ RK3562_PMU0_CLKGATE_CON(2), 5, GFLAGS), -+ GATE(CLK_MIPIDSIPHY_XIN24M, "clk_mipidsiphy_xin24m", "xin24m", 0, -+ RK3562_PMU0_CLKGATE_CON(2), 6, GFLAGS), -+ COMPOSITE_NODIV(CLK_MIPIDSIPHY_REF, "clk_mipidsiphy_ref", clk_mipidsi_ref_p, 0, -+ RK3562_PMU0_CLKSEL_CON(2), 15, 1, MFLAGS, -+ RK3562_PMU0_CLKGATE_CON(2), 7, GFLAGS), -+ GATE(PCLK_PMU0_I2C0, "pclk_pmu0_i2c0", "busclk_pdpmu0", 0, -+ RK3562_PMU0_CLKGATE_CON(2), 8, GFLAGS), -+ COMPOSITE(CLK_PMU0_I2C0, "clk_pmu0_i2c0", mux_200m_xin24m_32k_p, 0, -+ RK3562_PMU0_CLKSEL_CON(3), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3562_PMU0_CLKGATE_CON(2), 9, GFLAGS), -+ /* PD_PMU1 */ -+ GATE(PCLK_PMU1_CRU, "pclk_pmu1_cru", "busclk_pdpmu0", CLK_IGNORE_UNUSED, -+ RK3562_PMU1_CLKGATE_CON(0), 0, GFLAGS), -+ GATE(HCLK_PMU1_MEM, "hclk_pmu1_mem", "busclk_pdpmu0", CLK_IGNORE_UNUSED, -+ RK3562_PMU1_CLKGATE_CON(0), 2, GFLAGS), -+ GATE(PCLK_PMU1_UART0, "pclk_pmu1_uart0", "busclk_pdpmu0", 0, -+ RK3562_PMU1_CLKGATE_CON(0), 7, GFLAGS), -+ COMPOSITE_NOMUX(CLK_PMU1_UART0_SRC, "clk_pmu1_uart0_src", "cpll", 0, -+ RK3562_PMU1_CLKSEL_CON(2), 0, 4, DFLAGS, -+ RK3562_PMU1_CLKGATE_CON(0), 8, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_PMU1_UART0_FRAC, "clk_pmu1_uart0_frac", "clk_pmu1_uart0_src", CLK_SET_RATE_PARENT, -+ RK3562_PMU1_CLKSEL_CON(3), 0, -+ RK3562_PMU1_CLKGATE_CON(0), 9, GFLAGS, -+ &rk3562_clk_pmu1_uart0_fracmux), -+ GATE(SCLK_PMU1_UART0, "sclk_pmu1_uart0", "clk_pmu1_uart0", 0, -+ RK3562_PMU1_CLKGATE_CON(0), 10, GFLAGS), -+ GATE(PCLK_PMU1_SPI0, "pclk_pmu1_spi0", "busclk_pdpmu0", 0, -+ RK3562_PMU1_CLKGATE_CON(1), 0, GFLAGS), -+ COMPOSITE(CLK_PMU1_SPI0, "clk_pmu1_spi0", mux_200m_xin24m_32k_p, 0, -+ RK3562_PMU1_CLKSEL_CON(4), 6, 2, MFLAGS, 0, 2, DFLAGS, -+ RK3562_PMU1_CLKGATE_CON(1), 1, GFLAGS), -+ GATE(SCLK_IN_PMU1_SPI0, "sclk_in_pmu1_spi0", "sclk_in_pmu1_spi0_io", 0, -+ RK3562_PMU1_CLKGATE_CON(1), 2, GFLAGS), -+ GATE(PCLK_PMU1_PWM0, "pclk_pmu1_pwm0", "busclk_pdpmu0", 0, -+ RK3562_PMU1_CLKGATE_CON(1), 3, GFLAGS), -+ COMPOSITE(CLK_PMU1_PWM0, "clk_pmu1_pwm0", mux_200m_xin24m_32k_p, 0, -+ RK3562_PMU1_CLKSEL_CON(4), 14, 2, MFLAGS, 8, 2, DFLAGS, -+ RK3562_PMU1_CLKGATE_CON(1), 4, GFLAGS), -+ GATE(CLK_CAPTURE_PMU1_PWM0, "clk_capture_pmu1_pwm0", "xin24m", 0, -+ RK3562_PMU1_CLKGATE_CON(1), 5, GFLAGS), -+ GATE(CLK_PMU1_WIFI, "clk_pmu1_wifi", "xin24m", 0, -+ RK3562_PMU1_CLKGATE_CON(1), 6, GFLAGS), -+ GATE(FCLK_PMU1_CM0_CORE, "fclk_pmu1_cm0_core", "busclk_pdpmu0", 0, -+ RK3562_PMU1_CLKGATE_CON(2), 0, GFLAGS), -+ GATE(CLK_PMU1_CM0_RTC, "clk_pmu1_cm0_rtc", "clk_rtc_32k", 0, -+ RK3562_PMU1_CLKGATE_CON(2), 1, GFLAGS), -+ GATE(PCLK_PMU1_WDTNS, "pclk_pmu1_wdtns", "busclk_pdpmu0", 0, -+ RK3562_PMU1_CLKGATE_CON(2), 3, GFLAGS), -+ GATE(CLK_PMU1_WDTNS, "clk_pmu1_wdtns", "xin24m", 0, -+ RK3562_PMU1_CLKGATE_CON(2), 4, GFLAGS), -+ GATE(PCLK_PMU1_MAILBOX, "pclk_pmu1_mailbox", "busclk_pdpmu0", 0, -+ RK3562_PMU1_CLKGATE_CON(3), 8, GFLAGS), ++#define PLL(_type, _id, _name, _pnames, _flags, _con, _mode, _mshift, \ ++ _lshift, _pflags, _rtable) \ ++ { \ ++ .id = _id, \ ++ .type = _type, \ ++ .name = _name, \ ++ .parent_names = _pnames, \ ++ .num_parents = ARRAY_SIZE(_pnames), \ ++ .flags = CLK_GET_RATE_NOCACHE | _flags, \ ++ .con_offset = _con, \ ++ .mode_offset = _mode, \ ++ .mode_shift = _mshift, \ ++ .lock_shift = _lshift, \ ++ .pll_flags = _pflags, \ ++ .rate_table = _rtable, \ ++ } + -+ /* PD_RGA */ -+ COMPOSITE(ACLK_RGA_PRE, "aclk_rga_pre", gpll_cpll_pvtpll_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(32), 6, 2, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(14), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_RGA_PRE, "hclk_rga_pre", "aclk_rga_jdec", 0, -+ RK3562_CLKSEL_CON(32), 8, 3, DFLAGS, -+ RK3562_CLKGATE_CON(14), 1, GFLAGS), -+ GATE(ACLK_RGA, "aclk_rga", "aclk_rga_jdec", 0, -+ RK3562_CLKGATE_CON(14), 6, GFLAGS), -+ GATE(HCLK_RGA, "hclk_rga", "hclk_rga_pre", 0, -+ RK3562_CLKGATE_CON(14), 7, GFLAGS), -+ COMPOSITE(CLK_RGA_CORE, "clk_rga_core", gpll_cpll_pvtpll_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(33), 6, 2, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(14), 8, GFLAGS), -+ GATE(ACLK_JDEC, "aclk_jdec", "aclk_rga_jdec", 0, -+ RK3562_CLKGATE_CON(14), 9, GFLAGS), -+ GATE(HCLK_JDEC, "hclk_jdec", "hclk_rga_pre", 0, -+ RK3562_CLKGATE_CON(14), 10, GFLAGS), ++struct clk *rockchip_clk_register_pll(struct rockchip_clk_provider *ctx, ++ enum rockchip_pll_type pll_type, ++ const char *name, const char *const *parent_names, ++ u8 num_parents, int con_offset, int grf_lock_offset, ++ int lock_shift, int mode_offset, int mode_shift, ++ struct rockchip_pll_rate_table *rate_table, ++ unsigned long flags, u8 clk_pll_flags); + -+ /* PD_VDPU */ -+ COMPOSITE(ACLK_VDPU_PRE, "aclk_vdpu_pre", gpll_cpll_pvtpll_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(22), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3562_CLKGATE_CON(10), 0, GFLAGS), -+ COMPOSITE(CLK_RKVDEC_HEVC_CA, "clk_rkvdec_hevc_ca", gpll_cpll_pvtpll_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(23), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3562_CLKGATE_CON(10), 3, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_VDPU_PRE, "hclk_vdpu_pre", "aclk_vdpu", 0, -+ RK3562_CLKSEL_CON(24), 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(10), 4, GFLAGS), -+ GATE(ACLK_RKVDEC, "aclk_rkvdec", "aclk_vdpu", 0, -+ RK3562_CLKGATE_CON(10), 7, GFLAGS), -+ GATE(HCLK_RKVDEC, "hclk_rkvdec", "hclk_vdpu_pre", 0, -+ RK3562_CLKGATE_CON(10), 8, GFLAGS), ++void rockchip_boost_init(struct clk_hw *hw); + -+ /* PD_VEPU */ -+ COMPOSITE(CLK_RKVENC_CORE, "clk_rkvenc_core", gpll_cpll_pvtpll_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(20), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3562_CLKGATE_CON(9), 0, GFLAGS), -+ COMPOSITE(ACLK_VEPU_PRE, "aclk_vepu_pre", gpll_cpll_pvtpll_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(20), 14, 2, MFLAGS, 8, 5, DFLAGS, -+ RK3562_CLKGATE_CON(9), 1, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_VEPU_PRE, "hclk_vepu_pre", "aclk_vepu", 0, -+ RK3562_CLKSEL_CON(21), 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(9), 2, GFLAGS), -+ GATE(ACLK_RKVENC, "aclk_rkvenc", "aclk_vepu", 0, -+ RK3562_CLKGATE_CON(9), 5, GFLAGS), -+ GATE(HCLK_RKVENC, "hclk_rkvenc", "hclk_vepu", 0, -+ RK3562_CLKGATE_CON(9), 6, GFLAGS), ++void rockchip_boost_enable_recovery_sw_low(struct clk_hw *hw); + -+ /* PD_VI */ -+ COMPOSITE(ACLK_VI, "aclk_vi", gpll_cpll_pvtpll_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(25), 6, 2, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(11), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_VI, "hclk_vi", "aclk_vi_isp", 0, -+ RK3562_CLKSEL_CON(26), 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(11), 1, GFLAGS), -+ COMPOSITE_NOMUX(PCLK_VI, "pclk_vi", "aclk_vi_isp", 0, -+ RK3562_CLKSEL_CON(26), 8, 4, DFLAGS, -+ RK3562_CLKGATE_CON(11), 2, GFLAGS), -+ GATE(ACLK_ISP, "aclk_isp", "aclk_vi_isp", 0, -+ RK3562_CLKGATE_CON(11), 6, GFLAGS), -+ GATE(HCLK_ISP, "hclk_isp", "hclk_vi", 0, -+ RK3562_CLKGATE_CON(11), 7, GFLAGS), -+ COMPOSITE(CLK_ISP, "clk_isp", gpll_cpll_pvtpll_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(27), 6, 2, MFLAGS, 0, 4, DFLAGS, -+ RK3562_CLKGATE_CON(11), 8, GFLAGS), -+ GATE(ACLK_VICAP, "aclk_vicap", "aclk_vi_isp", 0, -+ RK3562_CLKGATE_CON(11), 9, GFLAGS), -+ GATE(HCLK_VICAP, "hclk_vicap", "hclk_vi", 0, -+ RK3562_CLKGATE_CON(11), 10, GFLAGS), -+ COMPOSITE(DCLK_VICAP, "dclk_vicap", gpll_cpll_pvtpll_dmyapll_p, 0, -+ RK3562_CLKSEL_CON(27), 14, 2, MFLAGS, 8, 4, DFLAGS, -+ RK3562_CLKGATE_CON(11), 11, GFLAGS), -+ GATE(CSIRX0_CLK_DATA, "csirx0_clk_data", "csirx0_clk_data_io", 0, -+ RK3562_CLKGATE_CON(11), 12, GFLAGS), -+ GATE(CSIRX1_CLK_DATA, "csirx1_clk_data", "csirx1_clk_data_io", 0, -+ RK3562_CLKGATE_CON(11), 13, GFLAGS), -+ GATE(CSIRX2_CLK_DATA, "csirx2_clk_data", "csirx2_clk_data_io", 0, -+ RK3562_CLKGATE_CON(11), 14, GFLAGS), -+ GATE(CSIRX3_CLK_DATA, "csirx3_clk_data", "csirx3_clk_data_io", 0, -+ RK3562_CLKGATE_CON(11), 15, GFLAGS), -+ GATE(PCLK_CSIHOST0, "pclk_csihost0", "pclk_vi", 0, -+ RK3562_CLKGATE_CON(12), 0, GFLAGS), -+ GATE(PCLK_CSIHOST1, "pclk_csihost1", "pclk_vi", 0, -+ RK3562_CLKGATE_CON(12), 1, GFLAGS), -+ GATE(PCLK_CSIHOST2, "pclk_csihost2", "pclk_vi", 0, -+ RK3562_CLKGATE_CON(12), 2, GFLAGS), -+ GATE(PCLK_CSIHOST3, "pclk_csihost3", "pclk_vi", 0, -+ RK3562_CLKGATE_CON(12), 3, GFLAGS), -+ GATE(PCLK_CSIPHY0, "pclk_csiphy0", "pclk_vi", 0, -+ RK3562_CLKGATE_CON(12), 4, GFLAGS), -+ GATE(PCLK_CSIPHY1, "pclk_csiphy1", "pclk_vi", 0, -+ RK3562_CLKGATE_CON(12), 5, GFLAGS), ++void rockchip_boost_disable_recovery_sw(struct clk_hw *hw); + -+ /* PD_VO */ -+ COMPOSITE(ACLK_VO_PRE, "aclk_vo_pre", gpll_cpll_vpll_dmyhpll_p, 0, -+ RK3562_CLKSEL_CON(28), 6, 2, MFLAGS, 0, 5, DFLAGS, -+ RK3562_CLKGATE_CON(13), 0, GFLAGS), -+ COMPOSITE_NOMUX(HCLK_VO_PRE, "hclk_vo_pre", "aclk_vo", 0, -+ RK3562_CLKSEL_CON(29), 0, 5, DFLAGS, -+ RK3562_CLKGATE_CON(13), 1, GFLAGS), -+ GATE(ACLK_VOP, "aclk_vop", "aclk_vo", 0, -+ RK3562_CLKGATE_CON(13), 6, GFLAGS), -+ GATE(HCLK_VOP, "hclk_vop", "hclk_vo_pre", 0, -+ RK3562_CLKGATE_CON(13), 7, GFLAGS), -+ COMPOSITE(DCLK_VOP, "dclk_vop", gpll_dmyhpll_vpll_apll_p, CLK_SET_RATE_NO_REPARENT, -+ RK3562_CLKSEL_CON(30), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3562_CLKGATE_CON(13), 8, GFLAGS), -+ COMPOSITE(DCLK_VOP1, "dclk_vop1", gpll_dmyhpll_vpll_apll_p, CLK_SET_RATE_NO_REPARENT, -+ RK3562_CLKSEL_CON(31), 14, 2, MFLAGS, 0, 8, DFLAGS, -+ RK3562_CLKGATE_CON(13), 9, GFLAGS), ++void rockchip_boost_add_core_div(struct clk_hw *hw, unsigned long prate); ++ ++struct rockchip_cpuclk_clksel { ++ int reg; ++ u32 val; +}; + -+static void __iomem *rk3562_cru_base; ++#define ROCKCHIP_CPUCLK_NUM_DIVIDERS 6 ++#define ROCKCHIP_CPUCLK_MAX_CORES 4 ++struct rockchip_cpuclk_rate_table { ++ unsigned long prate; ++ struct rockchip_cpuclk_clksel divs[ROCKCHIP_CPUCLK_NUM_DIVIDERS]; ++ struct rockchip_cpuclk_clksel pre_muxs[ROCKCHIP_CPUCLK_NUM_DIVIDERS]; ++ struct rockchip_cpuclk_clksel post_muxs[ROCKCHIP_CPUCLK_NUM_DIVIDERS]; ++}; + -+static void rk3562_dump_cru(void) ++/** ++ * struct rockchip_cpuclk_reg_data - register offsets and masks of the cpuclock ++ * @core_reg[]: register offset of the cores setting register ++ * @div_core_shift[]: cores divider offset used to divide the pll value ++ * @div_core_mask[]: cores divider mask ++ * @num_cores: number of cpu cores ++ * @mux_core_reg: register offset of the cores select parent ++ * @mux_core_alt: mux value to select alternate parent ++ * @mux_core_main: mux value to select main parent of core ++ * @mux_core_shift: offset of the core multiplexer ++ * @mux_core_mask: core multiplexer mask ++ */ ++struct rockchip_cpuclk_reg_data { ++ int core_reg[ROCKCHIP_CPUCLK_MAX_CORES]; ++ u8 div_core_shift[ROCKCHIP_CPUCLK_MAX_CORES]; ++ u32 div_core_mask[ROCKCHIP_CPUCLK_MAX_CORES]; ++ int num_cores; ++ int mux_core_reg; ++ u8 mux_core_alt; ++ u8 mux_core_main; ++ u8 mux_core_shift; ++ u32 mux_core_mask; ++ const char *pll_name; ++}; ++ ++struct clk *rockchip_clk_register_cpuclk(const char *name, ++ u8 num_parents, ++ struct clk *parent, struct clk *alt_parent, ++ const struct rockchip_cpuclk_reg_data *reg_data, ++ const struct rockchip_cpuclk_rate_table *rates, ++ int nrates, void __iomem *reg_base, spinlock_t *lock); ++ ++struct clk *rockchip_clk_register_cpuclk_v2(const char *name, ++ const char *const *parent_names, ++ u8 num_parents, void __iomem *base, ++ int muxdiv_offset, u8 mux_shift, ++ u8 mux_width, u8 mux_flags, ++ int div_offset, u8 div_shift, ++ u8 div_width, u8 div_flags, ++ unsigned long flags, spinlock_t *lock, ++ const struct rockchip_cpuclk_rate_table *rates, ++ int nrates); ++ ++struct clk *rockchip_clk_register_mmc(const char *name, ++ const char *const *parent_names, u8 num_parents, ++ void __iomem *reg, int shift); ++ ++/* ++ * DDRCLK flags, including method of setting the rate ++ * ROCKCHIP_DDRCLK_SIP: use SIP call to bl31 to change ddrclk rate. ++ */ ++#define ROCKCHIP_DDRCLK_SIP BIT(0) ++#define ROCKCHIP_DDRCLK_SIP_V2 0x03 ++ ++#ifdef CONFIG_ROCKCHIP_DDRCLK ++void rockchip_set_ddrclk_params(void __iomem *params); ++void rockchip_set_ddrclk_dmcfreq_wait_complete(int (*func)(void)); ++ ++struct clk *rockchip_clk_register_ddrclk(const char *name, int flags, ++ const char *const *parent_names, ++ u8 num_parents, int mux_offset, ++ int mux_shift, int mux_width, ++ int div_shift, int div_width, ++ int ddr_flags, void __iomem *reg_base); ++#else ++static inline void rockchip_set_ddrclk_params(void __iomem *params) {} ++static inline void rockchip_set_ddrclk_dmcfreq_wait_complete(int (*func)(void)) {} ++static inline ++struct clk *rockchip_clk_register_ddrclk(const char *name, int flags, ++ const char *const *parent_names, ++ u8 num_parents, int mux_offset, ++ int mux_shift, int mux_width, ++ int div_shift, int div_width, ++ int ddr_flags, void __iomem *reg_base) +{ -+ if (rk3562_cru_base) { -+ pr_warn("CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rk3562_cru_base, -+ 0x600, false); -+ } ++ return NULL; +} ++#endif + -+static int protect_clocks[] = { -+ ACLK_VO_PRE, -+ HCLK_VO_PRE, -+ ACLK_VOP, -+ HCLK_VOP, -+ DCLK_VOP, -+ DCLK_VOP1, ++#define ROCKCHIP_INVERTER_HIWORD_MASK BIT(0) ++ ++struct clk *rockchip_clk_register_inverter(const char *name, ++ const char *const *parent_names, u8 num_parents, ++ void __iomem *reg, int shift, int flags, ++ spinlock_t *lock); ++ ++struct clk *rockchip_clk_register_muxgrf(const char *name, ++ const char *const *parent_names, u8 num_parents, ++ int flags, struct regmap *grf, int reg, ++ int shift, int width, int mux_flags); ++ ++#define PNAME(x) static const char *const x[] __initconst ++ ++enum rockchip_clk_branch_type { ++ branch_composite, ++ branch_mux, ++ branch_muxgrf, ++ branch_muxpmugrf, ++ branch_divider, ++ branch_fraction_divider, ++ branch_gate, ++ branch_gate_no_set_rate, ++ branch_mmc, ++ branch_inverter, ++ branch_factor, ++ branch_ddrclk, ++ branch_half_divider, +}; + -+static void __init rk3562_clk_init(struct device_node *np) -+{ -+ struct rockchip_clk_provider *ctx; -+ void __iomem *reg_base; ++struct rockchip_clk_branch { ++ unsigned int id; ++ enum rockchip_clk_branch_type branch_type; ++ const char *name; ++ const char *const *parent_names; ++ u8 num_parents; ++ unsigned long flags; ++ int muxdiv_offset; ++ u8 mux_shift; ++ u8 mux_width; ++ u8 mux_flags; ++ u32 *mux_table; ++ int div_offset; ++ u8 div_shift; ++ u8 div_width; ++ u8 div_flags; ++ struct clk_div_table *div_table; ++ int gate_offset; ++ u8 gate_shift; ++ u8 gate_flags; ++ struct rockchip_clk_branch *child; ++}; + -+ reg_base = of_iomap(np, 0); -+ if (!reg_base) { -+ pr_err("%s: could not map cru region\n", __func__); -+ return; ++#define COMPOSITE(_id, cname, pnames, f, mo, ms, mw, mf, ds, dw,\ ++ df, go, gs, gf) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_composite, \ ++ .name = cname, \ ++ .parent_names = pnames, \ ++ .num_parents = ARRAY_SIZE(pnames), \ ++ .flags = f, \ ++ .muxdiv_offset = mo, \ ++ .mux_shift = ms, \ ++ .mux_width = mw, \ ++ .mux_flags = mf, \ ++ .div_shift = ds, \ ++ .div_width = dw, \ ++ .div_flags = df, \ ++ .gate_offset = go, \ ++ .gate_shift = gs, \ ++ .gate_flags = gf, \ + } + -+ rk3562_cru_base = reg_base; ++#define COMPOSITE_MUXTBL(_id, cname, pnames, f, mo, ms, mw, mf, \ ++ mt, ds, dw, df, go, gs, gf) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_composite, \ ++ .name = cname, \ ++ .parent_names = pnames, \ ++ .num_parents = ARRAY_SIZE(pnames), \ ++ .flags = f, \ ++ .muxdiv_offset = mo, \ ++ .mux_shift = ms, \ ++ .mux_width = mw, \ ++ .mux_flags = mf, \ ++ .mux_table = mt, \ ++ .div_shift = ds, \ ++ .div_width = dw, \ ++ .div_flags = df, \ ++ .gate_offset = go, \ ++ .gate_shift = gs, \ ++ .gate_flags = gf, \ ++ } + -+ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); -+ if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip clk init failed\n", __func__); -+ iounmap(reg_base); -+ return; ++#define COMPOSITE_DIV_OFFSET(_id, cname, pnames, f, mo, ms, mw, \ ++ mf, do, ds, dw, df, go, gs, gf) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_composite, \ ++ .name = cname, \ ++ .parent_names = pnames, \ ++ .num_parents = ARRAY_SIZE(pnames), \ ++ .flags = f, \ ++ .muxdiv_offset = mo, \ ++ .mux_shift = ms, \ ++ .mux_width = mw, \ ++ .mux_flags = mf, \ ++ .div_offset = do, \ ++ .div_shift = ds, \ ++ .div_width = dw, \ ++ .div_flags = df, \ ++ .gate_offset = go, \ ++ .gate_shift = gs, \ ++ .gate_flags = gf, \ + } + -+ rockchip_clk_register_plls(ctx, rk3562_pll_clks, -+ ARRAY_SIZE(rk3562_pll_clks), -+ RK3562_GRF_SOC_STATUS0); ++#define COMPOSITE_NOMUX(_id, cname, pname, f, mo, ds, dw, df, \ ++ go, gs, gf) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_composite, \ ++ .name = cname, \ ++ .parent_names = (const char *[]){ pname }, \ ++ .num_parents = 1, \ ++ .flags = f, \ ++ .muxdiv_offset = mo, \ ++ .div_shift = ds, \ ++ .div_width = dw, \ ++ .div_flags = df, \ ++ .gate_offset = go, \ ++ .gate_shift = gs, \ ++ .gate_flags = gf, \ ++ } + -+ rockchip_clk_register_branches(ctx, rk3562_clk_branches, -+ ARRAY_SIZE(rk3562_clk_branches)); ++#define COMPOSITE_NOMUX_DIVTBL(_id, cname, pname, f, mo, ds, dw,\ ++ df, dt, go, gs, gf) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_composite, \ ++ .name = cname, \ ++ .parent_names = (const char *[]){ pname }, \ ++ .num_parents = 1, \ ++ .flags = f, \ ++ .muxdiv_offset = mo, \ ++ .div_shift = ds, \ ++ .div_width = dw, \ ++ .div_flags = df, \ ++ .div_table = dt, \ ++ .gate_offset = go, \ ++ .gate_shift = gs, \ ++ .gate_flags = gf, \ ++ } + -+ /* (0x30444 - 0x400) / 4 + 1 = 49170 */ -+ rockchip_register_softrst(np, 49170, reg_base + RK3562_SOFTRST_CON(0), -+ ROCKCHIP_SOFTRST_HIWORD_MASK); ++#define COMPOSITE_NODIV(_id, cname, pnames, f, mo, ms, mw, mf, \ ++ go, gs, gf) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_composite, \ ++ .name = cname, \ ++ .parent_names = pnames, \ ++ .num_parents = ARRAY_SIZE(pnames), \ ++ .flags = f, \ ++ .muxdiv_offset = mo, \ ++ .mux_shift = ms, \ ++ .mux_width = mw, \ ++ .mux_flags = mf, \ ++ .gate_offset = go, \ ++ .gate_shift = gs, \ ++ .gate_flags = gf, \ ++ } + -+ rockchip_register_restart_notifier(ctx, RK3562_GLB_SRST_FST, NULL); ++#define COMPOSITE_NOGATE(_id, cname, pnames, f, mo, ms, mw, mf, \ ++ ds, dw, df) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_composite, \ ++ .name = cname, \ ++ .parent_names = pnames, \ ++ .num_parents = ARRAY_SIZE(pnames), \ ++ .flags = f, \ ++ .muxdiv_offset = mo, \ ++ .mux_shift = ms, \ ++ .mux_width = mw, \ ++ .mux_flags = mf, \ ++ .div_shift = ds, \ ++ .div_width = dw, \ ++ .div_flags = df, \ ++ .gate_offset = -1, \ ++ } + -+ rockchip_clk_of_add_provider(np, ctx); ++#define COMPOSITE_NOGATE_DIVTBL(_id, cname, pnames, f, mo, ms, \ ++ mw, mf, ds, dw, df, dt) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_composite, \ ++ .name = cname, \ ++ .parent_names = pnames, \ ++ .num_parents = ARRAY_SIZE(pnames), \ ++ .flags = f, \ ++ .muxdiv_offset = mo, \ ++ .mux_shift = ms, \ ++ .mux_width = mw, \ ++ .mux_flags = mf, \ ++ .div_shift = ds, \ ++ .div_width = dw, \ ++ .div_flags = df, \ ++ .div_table = dt, \ ++ .gate_offset = -1, \ ++ } + -+ if (!rk_dump_cru) -+ rk_dump_cru = rk3562_dump_cru; ++#define COMPOSITE_FRAC(_id, cname, pname, f, mo, df, go, gs, gf)\ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_fraction_divider, \ ++ .name = cname, \ ++ .parent_names = (const char *[]){ pname }, \ ++ .num_parents = 1, \ ++ .flags = f, \ ++ .muxdiv_offset = mo, \ ++ .div_shift = 16, \ ++ .div_width = 16, \ ++ .div_flags = df, \ ++ .gate_offset = go, \ ++ .gate_shift = gs, \ ++ .gate_flags = gf, \ ++ } + -+ rockchip_clk_protect(ctx, protect_clocks, ARRAY_SIZE(protect_clocks)); -+} ++#define COMPOSITE_FRACMUX(_id, cname, pname, f, mo, df, go, gs, gf, ch) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_fraction_divider, \ ++ .name = cname, \ ++ .parent_names = (const char *[]){ pname }, \ ++ .num_parents = 1, \ ++ .flags = f, \ ++ .muxdiv_offset = mo, \ ++ .div_shift = 16, \ ++ .div_width = 16, \ ++ .div_flags = df, \ ++ .gate_offset = go, \ ++ .gate_shift = gs, \ ++ .gate_flags = gf, \ ++ .child = ch, \ ++ } + -+CLK_OF_DECLARE(rk3562_cru, "rockchip,rk3562-cru", rk3562_clk_init); ++#define COMPOSITE_FRACMUX_NOGATE(_id, cname, pname, f, mo, df, ch) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_fraction_divider, \ ++ .name = cname, \ ++ .parent_names = (const char *[]){ pname }, \ ++ .num_parents = 1, \ ++ .flags = f, \ ++ .muxdiv_offset = mo, \ ++ .div_shift = 16, \ ++ .div_width = 16, \ ++ .div_flags = df, \ ++ .gate_offset = -1, \ ++ .child = ch, \ ++ } + -+#ifdef MODULE -+struct clk_rk3562_inits { -+ void (*inits)(struct device_node *np); -+}; ++#define COMPOSITE_DDRCLK(_id, cname, pnames, f, mo, ms, mw, \ ++ ds, dw, df) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_ddrclk, \ ++ .name = cname, \ ++ .parent_names = pnames, \ ++ .num_parents = ARRAY_SIZE(pnames), \ ++ .flags = f, \ ++ .muxdiv_offset = mo, \ ++ .mux_shift = ms, \ ++ .mux_width = mw, \ ++ .div_shift = ds, \ ++ .div_width = dw, \ ++ .div_flags = df, \ ++ .gate_offset = -1, \ ++ } + -+static const struct clk_rk3562_inits clk_3562_cru_init = { -+ .inits = rk3562_clk_init, -+}; ++#define MUX(_id, cname, pnames, f, o, s, w, mf) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_mux, \ ++ .name = cname, \ ++ .parent_names = pnames, \ ++ .num_parents = ARRAY_SIZE(pnames), \ ++ .flags = f, \ ++ .muxdiv_offset = o, \ ++ .mux_shift = s, \ ++ .mux_width = w, \ ++ .mux_flags = mf, \ ++ .gate_offset = -1, \ ++ } + -+static const struct of_device_id clk_rk3562_match_table[] = { -+ { -+ .compatible = "rockchip,rk3562-cru", -+ .data = &clk_3562_cru_init, -+ }, -+ { } -+}; -+MODULE_DEVICE_TABLE(of, clk_rk3562_match_table); ++#define MUXTBL(_id, cname, pnames, f, o, s, w, mf, mt) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_mux, \ ++ .name = cname, \ ++ .parent_names = pnames, \ ++ .num_parents = ARRAY_SIZE(pnames), \ ++ .flags = f, \ ++ .muxdiv_offset = o, \ ++ .mux_shift = s, \ ++ .mux_width = w, \ ++ .mux_flags = mf, \ ++ .gate_offset = -1, \ ++ .mux_table = mt, \ ++ } + -+static int clk_rk3562_probe(struct platform_device *pdev) ++#define MUXGRF(_id, cname, pnames, f, o, s, w, mf) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_muxgrf, \ ++ .name = cname, \ ++ .parent_names = pnames, \ ++ .num_parents = ARRAY_SIZE(pnames), \ ++ .flags = f, \ ++ .muxdiv_offset = o, \ ++ .mux_shift = s, \ ++ .mux_width = w, \ ++ .mux_flags = mf, \ ++ .gate_offset = -1, \ ++ } ++ ++#define MUXPMUGRF(_id, cname, pnames, f, o, s, w, mf) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_muxpmugrf, \ ++ .name = cname, \ ++ .parent_names = pnames, \ ++ .num_parents = ARRAY_SIZE(pnames), \ ++ .flags = f, \ ++ .muxdiv_offset = o, \ ++ .mux_shift = s, \ ++ .mux_width = w, \ ++ .mux_flags = mf, \ ++ .gate_offset = -1, \ ++ } ++ ++#define DIV(_id, cname, pname, f, o, s, w, df) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_divider, \ ++ .name = cname, \ ++ .parent_names = (const char *[]){ pname }, \ ++ .num_parents = 1, \ ++ .flags = f, \ ++ .muxdiv_offset = o, \ ++ .div_shift = s, \ ++ .div_width = w, \ ++ .div_flags = df, \ ++ .gate_offset = -1, \ ++ } ++ ++#define DIVTBL(_id, cname, pname, f, o, s, w, df, dt) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_divider, \ ++ .name = cname, \ ++ .parent_names = (const char *[]){ pname }, \ ++ .num_parents = 1, \ ++ .flags = f, \ ++ .muxdiv_offset = o, \ ++ .div_shift = s, \ ++ .div_width = w, \ ++ .div_flags = df, \ ++ .div_table = dt, \ ++ } ++ ++#define GATE(_id, cname, pname, f, o, b, gf) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_gate, \ ++ .name = cname, \ ++ .parent_names = (const char *[]){ pname }, \ ++ .num_parents = 1, \ ++ .flags = f, \ ++ .gate_offset = o, \ ++ .gate_shift = b, \ ++ .gate_flags = gf, \ ++ } ++ ++#define GATE_NO_SET_RATE(_id, cname, pname, f, o, b, gf) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_gate_no_set_rate, \ ++ .name = cname, \ ++ .parent_names = (const char *[]){ pname }, \ ++ .num_parents = 1, \ ++ .flags = f, \ ++ .gate_offset = o, \ ++ .gate_shift = b, \ ++ .gate_flags = gf, \ ++ } ++ ++#define MMC(_id, cname, pname, offset, shift) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_mmc, \ ++ .name = cname, \ ++ .parent_names = (const char *[]){ pname }, \ ++ .num_parents = 1, \ ++ .muxdiv_offset = offset, \ ++ .div_shift = shift, \ ++ } ++ ++#define INVERTER(_id, cname, pname, io, is, if) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_inverter, \ ++ .name = cname, \ ++ .parent_names = (const char *[]){ pname }, \ ++ .num_parents = 1, \ ++ .muxdiv_offset = io, \ ++ .div_shift = is, \ ++ .div_flags = if, \ ++ } ++ ++#define FACTOR(_id, cname, pname, f, fm, fd) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_factor, \ ++ .name = cname, \ ++ .parent_names = (const char *[]){ pname }, \ ++ .num_parents = 1, \ ++ .flags = f, \ ++ .div_shift = fm, \ ++ .div_width = fd, \ ++ } ++ ++#define FACTOR_GATE(_id, cname, pname, f, fm, fd, go, gb, gf) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_factor, \ ++ .name = cname, \ ++ .parent_names = (const char *[]){ pname }, \ ++ .num_parents = 1, \ ++ .flags = f, \ ++ .div_shift = fm, \ ++ .div_width = fd, \ ++ .gate_offset = go, \ ++ .gate_shift = gb, \ ++ .gate_flags = gf, \ ++ } ++ ++#define COMPOSITE_HALFDIV(_id, cname, pnames, f, mo, ms, mw, mf, ds, dw,\ ++ df, go, gs, gf) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_half_divider, \ ++ .name = cname, \ ++ .parent_names = pnames, \ ++ .num_parents = ARRAY_SIZE(pnames), \ ++ .flags = f, \ ++ .muxdiv_offset = mo, \ ++ .mux_shift = ms, \ ++ .mux_width = mw, \ ++ .mux_flags = mf, \ ++ .div_shift = ds, \ ++ .div_width = dw, \ ++ .div_flags = df, \ ++ .gate_offset = go, \ ++ .gate_shift = gs, \ ++ .gate_flags = gf, \ ++ } ++ ++#define COMPOSITE_HALFDIV_OFFSET(_id, cname, pnames, f, mo, ms, mw, mf, do,\ ++ ds, dw, df, go, gs, gf) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_half_divider, \ ++ .name = cname, \ ++ .parent_names = pnames, \ ++ .num_parents = ARRAY_SIZE(pnames), \ ++ .flags = f, \ ++ .muxdiv_offset = mo, \ ++ .mux_shift = ms, \ ++ .mux_width = mw, \ ++ .mux_flags = mf, \ ++ .div_offset = do, \ ++ .div_shift = ds, \ ++ .div_width = dw, \ ++ .div_flags = df, \ ++ .gate_offset = go, \ ++ .gate_shift = gs, \ ++ .gate_flags = gf, \ ++ } ++ ++#define COMPOSITE_NOGATE_HALFDIV(_id, cname, pnames, f, mo, ms, mw, mf, \ ++ ds, dw, df) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_half_divider, \ ++ .name = cname, \ ++ .parent_names = pnames, \ ++ .num_parents = ARRAY_SIZE(pnames), \ ++ .flags = f, \ ++ .muxdiv_offset = mo, \ ++ .mux_shift = ms, \ ++ .mux_width = mw, \ ++ .mux_flags = mf, \ ++ .div_shift = ds, \ ++ .div_width = dw, \ ++ .div_flags = df, \ ++ .gate_offset = -1, \ ++ } ++ ++#define COMPOSITE_NOMUX_HALFDIV(_id, cname, pname, f, mo, ds, dw, df, \ ++ go, gs, gf) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_half_divider, \ ++ .name = cname, \ ++ .parent_names = (const char *[]){ pname }, \ ++ .num_parents = 1, \ ++ .flags = f, \ ++ .muxdiv_offset = mo, \ ++ .div_shift = ds, \ ++ .div_width = dw, \ ++ .div_flags = df, \ ++ .gate_offset = go, \ ++ .gate_shift = gs, \ ++ .gate_flags = gf, \ ++ } ++ ++#define DIV_HALF(_id, cname, pname, f, o, s, w, df) \ ++ { \ ++ .id = _id, \ ++ .branch_type = branch_half_divider, \ ++ .name = cname, \ ++ .parent_names = (const char *[]){ pname }, \ ++ .num_parents = 1, \ ++ .flags = f, \ ++ .muxdiv_offset = o, \ ++ .div_shift = s, \ ++ .div_width = w, \ ++ .div_flags = df, \ ++ .gate_offset = -1, \ ++ } ++ ++/* SGRF clocks are only accessible from secure mode, so not controllable */ ++#define SGRF_GATE(_id, cname, pname) \ ++ FACTOR(_id, cname, pname, 0, 1, 1) ++ ++struct rockchip_clk_provider *rockchip_clk_init(struct device_node *np, ++ void __iomem *base, unsigned long nr_clks); ++void rockchip_clk_of_add_provider(struct device_node *np, ++ struct rockchip_clk_provider *ctx); ++void rockchip_clk_add_lookup(struct rockchip_clk_provider *ctx, ++ struct clk *clk, unsigned int id); ++void rockchip_clk_register_branches(struct rockchip_clk_provider *ctx, ++ struct rockchip_clk_branch *list, ++ unsigned int nr_clk); ++void rockchip_clk_register_plls(struct rockchip_clk_provider *ctx, ++ struct rockchip_pll_clock *pll_list, ++ unsigned int nr_pll, int grf_lock_offset); ++void rockchip_clk_register_armclk(struct rockchip_clk_provider *ctx, ++ unsigned int lookup_id, ++ const char *name, ++ u8 num_parents, ++ struct clk *parent, struct clk *alt_parent, ++ const struct rockchip_cpuclk_reg_data *reg_data, ++ const struct rockchip_cpuclk_rate_table *rates, ++ int nrates); ++void rockchip_clk_register_armclk_v2(struct rockchip_clk_provider *ctx, ++ struct rockchip_clk_branch *list, ++ const struct rockchip_cpuclk_rate_table *rates, ++ int nrates); ++int rockchip_pll_clk_rate_to_scale(struct clk *clk, unsigned long rate); ++int rockchip_pll_clk_scale_to_rate(struct clk *clk, unsigned int scale); ++int rockchip_pll_clk_adaptive_scaling(struct clk *clk, int sel); ++void rockchip_register_restart_notifier(struct rockchip_clk_provider *ctx, ++ unsigned int reg, void (*cb)(void)); ++ ++#define ROCKCHIP_SOFTRST_HIWORD_MASK BIT(0) ++ ++struct clk *rockchip_clk_register_halfdiv(const char *name, ++ const char *const *parent_names, ++ u8 num_parents, void __iomem *base, ++ int muxdiv_offset, u8 mux_shift, ++ u8 mux_width, u8 mux_flags, ++ int div_offset, u8 div_shift, ++ u8 div_width, u8 div_flags, ++ int gate_offset, u8 gate_shift, ++ u8 gate_flags, unsigned long flags, ++ spinlock_t *lock); ++ ++#ifdef CONFIG_RESET_CONTROLLER ++void rockchip_register_softrst(struct device_node *np, ++ unsigned int num_regs, ++ void __iomem *base, u8 flags); ++#else ++static inline void rockchip_register_softrst(struct device_node *np, ++ unsigned int num_regs, ++ void __iomem *base, u8 flags) +{ -+ struct device_node *np = pdev->dev.of_node; -+ const struct of_device_id *match; -+ const struct clk_rk3562_inits *init_data; ++} ++#endif ++extern void (*rk_dump_cru)(void); + -+ match = of_match_device(clk_rk3562_match_table, &pdev->dev); -+ if (!match || !match->data) -+ return -EINVAL; ++#if IS_MODULE(CONFIG_COMMON_CLK_ROCKCHIP) ++int rockchip_clk_protect(struct rockchip_clk_provider *ctx, ++ unsigned int *clocks, unsigned int nclocks); ++void rockchip_clk_unprotect(void); ++void rockchip_clk_disable_unused(void); ++#else ++static inline int rockchip_clk_protect(struct rockchip_clk_provider *ctx, ++ unsigned int *clocks, ++ unsigned int nclocks) ++{ ++ return -EOPNOTSUPP; ++} + -+ init_data = match->data; -+ if (init_data->inits) -+ init_data->inits(np); ++static inline void rockchip_clk_unprotect(void) ++{ ++} + -+ return 0; ++static inline void rockchip_clk_disable_unused(void) ++{ +} ++#endif ++#endif +diff --git a/drivers/clk/rockchip-oh/regmap/Kconfig b/drivers/clk/rockchip-oh/regmap/Kconfig +new file mode 100644 +index 000000000..65f691bc4 +--- /dev/null ++++ b/drivers/clk/rockchip-oh/regmap/Kconfig +@@ -0,0 +1,16 @@ ++# SPDX-License-Identifier: GPL-2.0 + -+static struct platform_driver clk_rk3562_driver = { -+ .probe = clk_rk3562_probe, -+ .driver = { -+ .name = "clk-rk3562", -+ .of_match_table = clk_rk3562_match_table, -+ .suppress_bind_attrs = true, -+ }, -+}; -+module_platform_driver(clk_rk3562_driver); ++config COMMON_CLK_ROCKCHIP_REGMAP ++ tristate + -+MODULE_DESCRIPTION("Rockchip RK3562 Clock Driver"); -+MODULE_LICENSE("GPL"); -+MODULE_ALIAS("platform:clk-rk3562"); -+#endif /* MODULE */ -diff --git a/drivers/clk/rockchip/clk-rv1106.c b/drivers/clk/rockchip/clk-rv1106.c ++config CLK_RK618 ++ tristate "Clock driver for Rockchip RK618" ++ depends on MFD_RK618 ++ default MFD_RK618 ++ select COMMON_CLK_ROCKCHIP_REGMAP ++ ++config CLK_RK628 ++ tristate "Clock driver for Rockchip RK628" ++ depends on MFD_RK628 ++ default MFD_RK628 ++ select COMMON_CLK_ROCKCHIP_REGMAP +diff --git a/drivers/clk/rockchip-oh/regmap/Makefile b/drivers/clk/rockchip-oh/regmap/Makefile new file mode 100644 -index 000000000..0833bf2ad +index 000000000..18d075d09 --- /dev/null -+++ b/drivers/clk/rockchip/clk-rv1106.c -@@ -0,0 +1,1280 @@ -+// SPDX-License-Identifier: GPL-2.0 ++++ b/drivers/clk/rockchip-oh/regmap/Makefile +@@ -0,0 +1,13 @@ ++# SPDX-License-Identifier: GPL-2.0 ++ ++obj-$(CONFIG_COMMON_CLK_ROCKCHIP_REGMAP) += clk-rockchip-regmap.o ++ ++clk-rockchip-regmap-objs := clk-regmap-mux.o \ ++ clk-regmap-divider.o \ ++ clk-regmap-gate.o \ ++ clk-regmap-fractional-divider.o \ ++ clk-regmap-composite.o \ ++ clk-regmap-pll.o ++ ++obj-$(CONFIG_CLK_RK618) += clk-rk618.o ++obj-$(CONFIG_CLK_RK628) += clk-rk628.o +diff --git a/drivers/clk/rockchip-oh/regmap/clk-regmap-composite.c b/drivers/clk/rockchip-oh/regmap/clk-regmap-composite.c +new file mode 100644 +index 000000000..43d2b9a45 +--- /dev/null ++++ b/drivers/clk/rockchip-oh/regmap/clk-regmap-composite.c +@@ -0,0 +1,400 @@ +/* -+ * Copyright (c) 2022 Rockchip Electronics Co. Ltd. -+ * Author: Elaine Zhang ++ * Copyright (c) 2017 Rockchip Electronics Co. Ltd. ++ * ++ * Base on code in drivers/clk/clk-composite.c. ++ * See clk-composite.c for further copyright information. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + */ + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "clk.h" ++#include "clk-regmap.h" + -+#define CRU_PVTPLL0_CON0_L 0x11000 -+#define CRU_PVTPLL0_CON0_H 0x11004 -+#define CRU_PVTPLL0_CON1_L 0x11008 -+#define CRU_PVTPLL0_CON1_H 0x1100c -+#define CRU_PVTPLL0_CON2_L 0x11010 -+#define CRU_PVTPLL0_CON2_H 0x11014 -+#define CRU_PVTPLL0_CON3_L 0x11018 -+#define CRU_PVTPLL0_CON3_H 0x1101c -+#define CRU_PVTPLL0_OSC_CNT 0x11020 -+#define CRU_PVTPLL0_OSC_CNT_AVG 0x11024 ++struct clk_regmap_composite { ++ struct device *dev; ++ struct clk_hw hw; ++ struct clk_ops ops; + -+#define CRU_PVTPLL1_CON0_L 0x11030 -+#define CRU_PVTPLL1_CON0_H 0x11034 -+#define CRU_PVTPLL1_CON1_L 0x11038 -+#define CRU_PVTPLL1_CON1_H 0x1103c -+#define CRU_PVTPLL1_CON2_L 0x11040 -+#define CRU_PVTPLL1_CON2_H 0x11044 -+#define CRU_PVTPLL1_CON3_L 0x11048 -+#define CRU_PVTPLL1_CON3_H 0x1104c -+#define CRU_PVTPLL1_OSC_CNT 0x11050 -+#define CRU_PVTPLL1_OSC_CNT_AVG 0x11054 ++ struct clk_hw *mux_hw; ++ struct clk_hw *rate_hw; ++ struct clk_hw *gate_hw; + -+#define RV1106_GRF_SOC_STATUS0 0x10 -+#define CPU_PVTPLL_CON0_L 0x40000 -+#define CPU_PVTPLL_CON0_H 0x40004 -+#define CPU_PVTPLL_CON1 0x40008 -+#define CPU_PVTPLL_CON2 0x4000c -+#define CPU_PVTPLL_CON3 0x40010 -+#define CPU_PVTPLL_OSC_CNT 0x40018 -+#define CPU_PVTPLL_OSC_CNT_AVG 0x4001c ++ const struct clk_ops *mux_ops; ++ const struct clk_ops *rate_ops; ++ const struct clk_ops *gate_ops; ++}; + -+#define PVTPLL_RING_SEL_MASK 0x7 -+#define PVTPLL_RING_SEL_SHIFT 8 -+#define PVTPLL_EN_MASK 0x3 -+#define PVTPLL_EN_SHIFT 0 -+#define PVTPLL_LENGTH_SEL_MASK 0x7f -+#define PVTPLL_LENGTH_SEL_SHIFT 0 ++#define to_clk_regmap_composite(_hw) \ ++ container_of(_hw, struct clk_regmap_composite, hw) + -+#define CPU_CLK_PATH_BASE (0x18300) -+#define CPU_PVTPLL_PATH_CORE ((1 << 12) | (1 << 28)) ++static u8 clk_regmap_composite_get_parent(struct clk_hw *hw) ++{ ++ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); ++ const struct clk_ops *mux_ops = composite->mux_ops; ++ struct clk_hw *mux_hw = composite->mux_hw; + -+#define RV1106_FRAC_MAX_PRATE 1200000000 ++ __clk_hw_set_clk(mux_hw, hw); + -+enum rv1106_plls { -+ apll, dpll, cpll, gpll, -+}; ++ return mux_ops->get_parent(mux_hw); ++} + -+static struct rockchip_pll_rate_table rv1106_pll_rates[] = { -+ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ -+ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1600000000, 3, 200, 1, 1, 1, 0), -+ RK3036_PLL_RATE(1584000000, 1, 132, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1560000000, 1, 130, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1536000000, 1, 128, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1512000000, 1, 126, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1488000000, 1, 124, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1464000000, 1, 122, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1440000000, 1, 120, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1416000000, 1, 118, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1400000000, 3, 350, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1392000000, 1, 116, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1368000000, 1, 114, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1344000000, 1, 112, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1320000000, 1, 110, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1296000000, 1, 108, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1272000000, 1, 106, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1248000000, 1, 104, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1200000000, 1, 100, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1188000000, 1, 99, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1104000000, 1, 92, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1100000000, 3, 275, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), -+ RK3036_PLL_RATE(1000000000, 3, 250, 2, 1, 1, 0), -+ RK3036_PLL_RATE(993484800, 1, 124, 3, 1, 0, 3113851), -+ RK3036_PLL_RATE(984000000, 1, 82, 2, 1, 1, 0), -+ RK3036_PLL_RATE(983040000, 1, 81, 2, 1, 0, 15435038), -+ RK3036_PLL_RATE(960000000, 1, 80, 2, 1, 1, 0), -+ RK3036_PLL_RATE(936000000, 1, 78, 2, 1, 1, 0), -+ RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), -+ RK3036_PLL_RATE(900000000, 1, 75, 2, 1, 1, 0), -+ RK3036_PLL_RATE(888000000, 1, 74, 2, 1, 1, 0), -+ RK3036_PLL_RATE(864000000, 1, 72, 2, 1, 1, 0), -+ RK3036_PLL_RATE(840000000, 1, 70, 2, 1, 1, 0), -+ RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), -+ RK3036_PLL_RATE(800000000, 3, 200, 2, 1, 1, 0), -+ RK3036_PLL_RATE(700000000, 3, 350, 4, 1, 1, 0), -+ RK3036_PLL_RATE(696000000, 1, 116, 4, 1, 1, 0), -+ RK3036_PLL_RATE(624000000, 1, 104, 4, 1, 1, 0), -+ RK3036_PLL_RATE(600000000, 1, 100, 4, 1, 1, 0), -+ RK3036_PLL_RATE(594000000, 1, 99, 4, 1, 1, 0), -+ RK3036_PLL_RATE(504000000, 1, 84, 4, 1, 1, 0), -+ RK3036_PLL_RATE(500000000, 1, 125, 6, 1, 1, 0), -+ RK3036_PLL_RATE(496742400, 1, 124, 6, 1, 0, 3113851), -+ RK3036_PLL_RATE(491520000, 1, 40, 2, 1, 0, 16106127), -+ RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), -+ RK3036_PLL_RATE(312000000, 1, 78, 6, 1, 1, 0), -+ RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), -+ RK3036_PLL_RATE(96000000, 1, 96, 6, 4, 1, 0), -+ { /* sentinel */ }, -+}; ++static int clk_regmap_composite_set_parent(struct clk_hw *hw, u8 index) ++{ ++ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); ++ const struct clk_ops *mux_ops = composite->mux_ops; ++ struct clk_hw *mux_hw = composite->mux_hw; + -+#define RV1106_DIV_ACLK_CORE_MASK 0x1f -+#define RV1106_DIV_ACLK_CORE_SHIFT 7 -+#define RV1106_DIV_PCLK_DBG_MASK 0x1f -+#define RV1106_DIV_PCLK_DBG_SHIFT 0 -+#define RV1106_CORE_SEL_MASK 0x3 -+#define RV1106_CORE_SEL_SHIFT 5 -+#define RV1106_ALT_DIV_MASK 0x1f -+#define RV1106_ALT_DIV_SHIFT 0 ++ __clk_hw_set_clk(mux_hw, hw); + -+#define RV1106_CLKSEL0(_aclk_core) \ -+{ \ -+ .reg = RV1106_CORECLKSEL_CON(0), \ -+ .val = HIWORD_UPDATE(_aclk_core, RV1106_DIV_ACLK_CORE_MASK, \ -+ RV1106_DIV_ACLK_CORE_SHIFT), \ ++ return mux_ops->set_parent(mux_hw, index); +} + -+#define RV1106_CLKSEL1(_pclk_dbg) \ -+{ \ -+ .reg = RV1106_CORECLKSEL_CON(1), \ -+ .val = HIWORD_UPDATE(_pclk_dbg, RV1106_DIV_PCLK_DBG_MASK, \ -+ RV1106_DIV_PCLK_DBG_SHIFT), \ -+} ++static unsigned long clk_regmap_composite_recalc_rate(struct clk_hw *hw, ++ unsigned long parent_rate) ++{ ++ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); ++ const struct clk_ops *rate_ops = composite->rate_ops; ++ struct clk_hw *rate_hw = composite->rate_hw; + -+#define RV1106_CLKSEL2(_is_pvtpll) \ -+{ \ -+ .reg = RV1106_CORECLKSEL_CON(0), \ -+ .val = HIWORD_UPDATE(_is_pvtpll, RV1106_CORE_SEL_MASK, \ -+ RV1106_CORE_SEL_SHIFT), \ -+} ++ __clk_hw_set_clk(rate_hw, hw); + -+#define RV1106_CLKSEL3(_alt_div) \ -+{ \ -+ .reg = RV1106_CORECLKSEL_CON(0), \ -+ .val = HIWORD_UPDATE(_alt_div, RV1106_ALT_DIV_MASK, \ -+ RV1106_ALT_DIV_SHIFT), \ ++ return rate_ops->recalc_rate(rate_hw, parent_rate); +} + -+#define RV1106_CPUCLK_RATE(_prate, _aclk_core, _pclk_dbg, _is_pvtpll) \ -+{ \ -+ .prate = _prate, \ -+ .divs = { \ -+ RV1106_CLKSEL0(_aclk_core), \ -+ RV1106_CLKSEL1(_pclk_dbg), \ -+ }, \ -+ .pre_muxs = { \ -+ RV1106_CLKSEL3(1), \ -+ RV1106_CLKSEL2(2), \ -+ }, \ -+ .post_muxs = { \ -+ RV1106_CLKSEL2(_is_pvtpll), \ -+ RV1106_CLKSEL3(0), \ -+ }, \ -+} ++static int clk_regmap_composite_determine_rate(struct clk_hw *hw, ++ struct clk_rate_request *req) ++{ ++ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); ++ const struct clk_ops *rate_ops = composite->rate_ops; ++ const struct clk_ops *mux_ops = composite->mux_ops; ++ struct clk_hw *rate_hw = composite->rate_hw; ++ struct clk_hw *mux_hw = composite->mux_hw; ++ struct clk_hw *parent; ++ unsigned long parent_rate; ++ long tmp_rate, best_rate = 0; ++ unsigned long rate_diff; ++ unsigned long best_rate_diff = ULONG_MAX; ++ long rate; ++ unsigned int i; + -+static struct rockchip_cpuclk_rate_table rv1106_cpuclk_rates[] __initdata = { -+ RV1106_CPUCLK_RATE(1608000000, 3, 7, 1), -+ RV1106_CPUCLK_RATE(1584000000, 3, 7, 1), -+ RV1106_CPUCLK_RATE(1560000000, 3, 7, 1), -+ RV1106_CPUCLK_RATE(1536000000, 3, 7, 1), -+ RV1106_CPUCLK_RATE(1512000000, 3, 7, 1), -+ RV1106_CPUCLK_RATE(1488000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1464000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1440000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1416000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1392000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1368000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1344000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1320000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1296000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1272000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1248000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1224000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1200000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1104000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1096000000, 2, 5, 1), -+ RV1106_CPUCLK_RATE(1008000000, 1, 5, 1), -+ RV1106_CPUCLK_RATE(912000000, 1, 5, 1), -+ RV1106_CPUCLK_RATE(816000000, 1, 3, 1), -+ RV1106_CPUCLK_RATE(696000000, 1, 3, 0), -+ RV1106_CPUCLK_RATE(600000000, 1, 3, 0), -+ RV1106_CPUCLK_RATE(408000000, 1, 1, 0), -+ RV1106_CPUCLK_RATE(312000000, 1, 1, 0), -+ RV1106_CPUCLK_RATE(216000000, 1, 1, 0), -+ RV1106_CPUCLK_RATE(96000000, 1, 1, 0), -+}; ++ if (rate_hw && rate_ops && rate_ops->determine_rate) { ++ __clk_hw_set_clk(rate_hw, hw); ++ return rate_ops->determine_rate(rate_hw, req); ++ } else if (rate_hw && rate_ops && rate_ops->round_rate && ++ mux_hw && mux_ops && mux_ops->set_parent) { ++ req->best_parent_hw = NULL; + -+static const struct rockchip_cpuclk_reg_data rv1106_cpuclk_data = { -+ .core_reg[0] = RV1106_CORECLKSEL_CON(0), -+ .div_core_shift[0] = 0, -+ .div_core_mask[0] = 0x1f, -+ .num_cores = 1, -+ .mux_core_alt = 2, -+ .mux_core_main = 2, -+ .mux_core_shift = 5, -+ .mux_core_mask = 0x3, -+}; ++ if (clk_hw_get_flags(hw) & CLK_SET_RATE_NO_REPARENT) { ++ parent = clk_hw_get_parent(mux_hw); ++ req->best_parent_hw = parent; ++ req->best_parent_rate = clk_hw_get_rate(parent); + -+PNAME(mux_pll_p) = { "xin24m" }; -+PNAME(mux_24m_32k_p) = { "xin24m", "clk_rtc_32k" }; -+PNAME(mux_gpll_cpll_p) = { "gpll", "cpll" }; -+PNAME(mux_gpll_24m_p) = { "gpll", "xin24m" }; -+PNAME(mux_100m_50m_24m_p) = { "clk_100m_src", "clk_50m_src", "xin24m" }; -+PNAME(mux_150m_100m_50m_24m_p) = { "clk_150m_src", "clk_100m_src", "clk_50m_src", "xin24m" }; -+PNAME(mux_500m_300m_100m_24m_p) = { "clk_500m_src", "clk_300m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_400m_300m_pvtpll0_pvtpll1_p) = { "clk_400m_src", "clk_300m_src", "clk_pvtpll_0", "clk_pvtpll_1" }; -+PNAME(mux_500m_300m_pvtpll0_pvtpll1_p) = { "clk_500m_src", "clk_300m_src", "clk_pvtpll_0", "clk_pvtpll_1" }; -+PNAME(mux_339m_200m_pvtpll0_pvtpll1_p) = { "clk_339m_src", "clk_200m_src", "clk_pvtpll_0", "clk_pvtpll_1" }; -+PNAME(mux_400m_200m_100m_24m_p) = { "clk_400m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_200m_100m_50m_24m_p) = { "clk_200m_src", "clk_100m_src", "clk_50m_src", "xin24m" }; -+PNAME(mux_300m_200m_100m_24m_p) = { "clk_300m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; -+PNAME(mux_500m_300m_200m_24m_p) = { "clk_500m_src", "clk_300m_src", "clk_200m_src", "xin24m" }; -+PNAME(mux_50m_24m_p) = { "clk_50m_src", "xin24m" }; -+PNAME(mux_400m_24m_p) = { "clk_400m_src", "xin24m" }; -+PNAME(clk_rtc32k_pmu_p) = { "clk_rtc32k_frac", "xin32k", "clk_pvtm_32k" }; -+PNAME(mux_200m_100m_24m_32k_p) = { "clk_200m_src", "clk_100m_src", "xin24m", "clk_rtc_32k" }; -+PNAME(mux_100m_pmu_24m_p) = { "clk_100m_pmu", "xin24m" }; -+PNAME(mux_200m_100m_24m_p) = { "clk_200m_src", "clk_100m_pmu", "xin24m" }; -+PNAME(mux_339m_200m_100m_24m_p) = { "clk_339m_src", "clk_200m_src", "clk_100m_pmu", "xin24m" }; -+PNAME(mux_dpll_300m_p) = { "dpll", "clk_300m_src" }; -+PNAME(clk_i2s0_8ch_tx_p) = { "clk_i2s0_8ch_tx_src", "clk_i2s0_8ch_tx_frac", "i2s0_mclkin", "xin_osc0_half" }; -+PNAME(clk_i2s0_8ch_rx_p) = { "clk_i2s0_8ch_rx_src", "clk_i2s0_8ch_rx_frac", "i2s0_mclkin", "xin_osc0_half" }; -+PNAME(i2s0_8ch_mclkout_p) = { "mclk_i2s0_8ch_tx", "mclk_i2s0_8ch_rx", "xin_osc0_half" }; -+PNAME(clk_ref_mipi0_p) = { "clk_ref_mipi0_src", "clk_ref_mipi0_frac", "xin24m" }; -+PNAME(clk_ref_mipi1_p) = { "clk_ref_mipi1_src", "clk_ref_mipi1_frac", "xin24m" }; -+PNAME(clk_uart0_p) = { "clk_uart0_src", "clk_uart0_frac", "xin24m" }; -+PNAME(clk_uart1_p) = { "clk_uart1_src", "clk_uart1_frac", "xin24m" }; -+PNAME(clk_uart2_p) = { "clk_uart2_src", "clk_uart2_frac", "xin24m" }; -+PNAME(clk_uart3_p) = { "clk_uart3_src", "clk_uart3_frac", "xin24m" }; -+PNAME(clk_uart4_p) = { "clk_uart4_src", "clk_uart4_frac", "xin24m" }; -+PNAME(clk_uart5_p) = { "clk_uart5_src", "clk_uart5_frac", "xin24m" }; -+PNAME(clk_vicap_m0_p) = { "clk_vicap_m0_src", "clk_vicap_m0_frac", "xin24m" }; -+PNAME(clk_vicap_m1_p) = { "clk_vicap_m1_src", "clk_vicap_m1_frac", "xin24m" }; ++ rate = rate_ops->round_rate(rate_hw, req->rate, ++ &req->best_parent_rate); ++ if (rate < 0) ++ return rate; + -+static struct rockchip_pll_clock rv1106_pll_clks[] __initdata = { -+ [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, -+ CLK_IGNORE_UNUSED, RV1106_PLL_CON(0), -+ RV1106_MODE_CON, 0, 10, 0, rv1106_pll_rates), -+ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, -+ 0, RV1106_PLL_CON(8), -+ RV1106_MODE_CON, 2, 10, 0, rv1106_pll_rates), -+ [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p, -+ CLK_IGNORE_UNUSED, RV1106_PLL_CON(16), -+ RV1106_SUBDDRMODE_CON, 0, 10, 0, NULL), -+ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, -+ 0, RV1106_PLL_CON(24), -+ RV1106_MODE_CON, 4, 10, 0, rv1106_pll_rates), -+}; ++ req->rate = rate; ++ return 0; ++ } + -+#define MFLAGS CLK_MUX_HIWORD_MASK -+#define DFLAGS CLK_DIVIDER_HIWORD_MASK -+#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) ++ for (i = 0; i < clk_hw_get_num_parents(mux_hw); i++) { ++ parent = clk_hw_get_parent_by_index(mux_hw, i); ++ if (!parent) ++ continue; + -+static struct rockchip_clk_branch rv1106_rtc32k_pmu_fracmux __initdata = -+ MUX(CLK_RTC_32K, "clk_rtc_32k", clk_rtc32k_pmu_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, -+ RV1106_PMUCLKSEL_CON(0), 6, 2, MFLAGS); ++ parent_rate = clk_hw_get_rate(parent); + -+static struct rockchip_clk_branch rv1106_i2s0_8ch_tx_fracmux __initdata = -+ MUX(CLK_I2S0_8CH_TX, "clk_i2s0_8ch_tx", clk_i2s0_8ch_tx_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(19), 0, 2, MFLAGS); ++ tmp_rate = rate_ops->round_rate(rate_hw, req->rate, ++ &parent_rate); ++ if (tmp_rate < 0) ++ continue; + -+static struct rockchip_clk_branch rv1106_i2s0_8ch_rx_fracmux __initdata = -+ MUX(CLK_I2S0_8CH_RX, "clk_i2s0_8ch_rx", clk_i2s0_8ch_rx_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(21), 0, 2, MFLAGS); ++ rate_diff = abs(req->rate - tmp_rate); + -+static struct rockchip_clk_branch rv1106_clk_ref_mipi0_fracmux __initdata = -+ MUX(CLK_REF_MIPI0, "clk_ref_mipi0", clk_ref_mipi0_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(27), 0, 2, MFLAGS); ++ if (!rate_diff || !req->best_parent_hw || ++ best_rate_diff > rate_diff) { ++ req->best_parent_hw = parent; ++ req->best_parent_rate = parent_rate; ++ best_rate_diff = rate_diff; ++ best_rate = tmp_rate; ++ } + -+static struct rockchip_clk_branch rv1106_clk_ref_mipi1_fracmux __initdata = -+ MUX(CLK_REF_MIPI1, "clk_ref_mipi1", clk_ref_mipi1_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(29), 0, 2, MFLAGS); ++ if (!rate_diff) ++ return 0; ++ } + -+static struct rockchip_clk_branch rv1106_clk_uart0_fracmux __initdata = -+ MUX(CLK_UART0, "clk_uart0", clk_uart0_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(7), 0, 2, MFLAGS); ++ req->rate = best_rate; ++ return 0; ++ } else if (mux_hw && mux_ops && mux_ops->determine_rate) { ++ __clk_hw_set_clk(mux_hw, hw); ++ return mux_ops->determine_rate(mux_hw, req); ++ } else { ++ return -EINVAL; ++ } + -+static struct rockchip_clk_branch rv1106_clk_uart1_fracmux __initdata = -+ MUX(CLK_UART1, "clk_uart1", clk_uart1_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(9), 0, 2, MFLAGS); ++ return 0; ++} + -+static struct rockchip_clk_branch rv1106_clk_uart2_fracmux __initdata = -+ MUX(CLK_UART2, "clk_uart2", clk_uart2_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(11), 0, 2, MFLAGS); ++static long clk_regmap_composite_round_rate(struct clk_hw *hw, ++ unsigned long rate, ++ unsigned long *prate) ++{ ++ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); ++ const struct clk_ops *rate_ops = composite->rate_ops; ++ struct clk_hw *rate_hw = composite->rate_hw; + -+static struct rockchip_clk_branch rv1106_clk_uart3_fracmux __initdata = -+ MUX(CLK_UART3, "clk_uart3", clk_uart3_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(13), 0, 2, MFLAGS); ++ __clk_hw_set_clk(rate_hw, hw); + -+static struct rockchip_clk_branch rv1106_clk_uart4_fracmux __initdata = -+ MUX(CLK_UART4, "clk_uart4", clk_uart4_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(15), 0, 2, MFLAGS); ++ return rate_ops->round_rate(rate_hw, rate, prate); ++} + -+static struct rockchip_clk_branch rv1106_clk_uart5_fracmux __initdata = -+ MUX(CLK_UART5, "clk_uart5", clk_uart5_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(17), 0, 2, MFLAGS); ++static int clk_regmap_composite_set_rate(struct clk_hw *hw, ++ unsigned long rate, ++ unsigned long parent_rate) ++{ ++ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); ++ const struct clk_ops *rate_ops = composite->rate_ops; ++ struct clk_hw *rate_hw = composite->rate_hw; + -+static struct rockchip_clk_branch rv1106_clk_vicap_m0_fracmux __initdata = -+ MUX(CLK_VICAP_M0, "clk_vicap_m0", clk_vicap_m0_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(31), 0, 2, MFLAGS); ++ __clk_hw_set_clk(rate_hw, hw); + -+static struct rockchip_clk_branch rv1106_clk_vicap_m1_fracmux __initdata = -+ MUX(CLK_VICAP_M1, "clk_vicap_m1", clk_vicap_m1_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(33), 0, 2, MFLAGS); ++ return rate_ops->set_rate(rate_hw, rate, parent_rate); ++} + -+static struct rockchip_clk_branch rv1106_clk_branches[] __initdata = { ++static int clk_regmap_composite_is_prepared(struct clk_hw *hw) ++{ ++ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); ++ const struct clk_ops *gate_ops = composite->gate_ops; ++ struct clk_hw *gate_hw = composite->gate_hw; + -+ FACTOR(0, "xin_osc0_half", "xin24m", 0, 1, 2), ++ __clk_hw_set_clk(gate_hw, hw); + -+ /* PD_CORE */ -+ GATE(CLK_PVTM_CORE, "clk_pvtm_core", "xin24m", 0, -+ RV1106_CORECLKGATE_CON(0), 14, GFLAGS), -+ GATE(CLK_CORE_MCU_RTC, "clk_core_mcu_rtc", "xin24m", 0, -+ RV1106_CORECLKGATE_CON(1), 6, GFLAGS), -+ COMPOSITE(HCLK_CPU, "hclk_cpu", mux_gpll_24m_p, CLK_IS_CRITICAL, -+ RV1106_CORECLKSEL_CON(2), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1106_CORECLKGATE_CON(0), 12, GFLAGS), -+ COMPOSITE(CLK_CORE_MCU, "clk_core_mcu", mux_gpll_24m_p, 0, -+ RV1106_CORECLKSEL_CON(3), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RV1106_CORECLKGATE_CON(1), 1, GFLAGS), -+ COMPOSITE_NOMUX(0, "pclk_dbg", "armclk", CLK_IS_CRITICAL, -+ RV1106_CORECLKSEL_CON(1), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, -+ RV1106_CORECLKGATE_CON(0), 6, GFLAGS), -+ GATE(0, "pclk_cpu_root", "pclk_dbg", CLK_IS_CRITICAL, -+ RV1106_CORECLKGATE_CON(0), 10, GFLAGS), -+ GATE(PCLK_MAILBOX, "pclk_mailbox", "pclk_cpu_root", 0, -+ RV1106_CORECLKGATE_CON(1), 8, GFLAGS), ++ return gate_ops->is_prepared(gate_hw); ++} + -+ /* PD _TOP */ -+ COMPOSITE(CLK_50M_SRC, "clk_50m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(0), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE(CLK_100M_SRC, "clk_100m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(0), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE(CLK_150M_SRC, "clk_150m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(1), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 3, GFLAGS), -+ COMPOSITE(CLK_200M_SRC, "clk_200m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(1), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 4, GFLAGS), -+ COMPOSITE(CLK_250M_SRC, "clk_250m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(2), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 5, GFLAGS), -+ COMPOSITE(CLK_300M_SRC, "clk_300m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(2), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 6, GFLAGS), -+ COMPOSITE_HALFDIV(CLK_339M_SRC, "clk_339m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(3), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 7, GFLAGS), -+ COMPOSITE(CLK_400M_SRC, "clk_400m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(3), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 8, GFLAGS), -+ COMPOSITE_HALFDIV(CLK_450M_SRC, "clk_450m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(4), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 9, GFLAGS), -+ COMPOSITE(CLK_500M_SRC, "clk_500m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(4), 11, 1, MFLAGS, 6, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 10, GFLAGS), ++static int clk_regmap_composite_prepare(struct clk_hw *hw) ++{ ++ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); ++ const struct clk_ops *gate_ops = composite->gate_ops; ++ struct clk_hw *gate_hw = composite->gate_hw; + -+ COMPOSITE_NODIV(PCLK_TOP_ROOT, "pclk_top_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_CLKSEL_CON(24), 5, 2, MFLAGS, -+ RV1106_CLKGATE_CON(2), 9, GFLAGS), ++ __clk_hw_set_clk(gate_hw, hw); + -+ COMPOSITE(CLK_I2S0_8CH_TX_SRC, "clk_i2s0_8ch_tx_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(17), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(1), 13, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S0_8CH_TX_FRAC, "clk_i2s0_8ch_tx_frac", "clk_i2s0_8ch_tx_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(18), 0, -+ RV1106_CLKGATE_CON(1), 14, GFLAGS, -+ &rv1106_i2s0_8ch_tx_fracmux), -+ GATE(MCLK_I2S0_8CH_TX, "mclk_i2s0_8ch_tx", "clk_i2s0_8ch_tx", 0, -+ RV1106_CLKGATE_CON(1), 15, GFLAGS), -+ COMPOSITE(CLK_I2S0_8CH_RX_SRC, "clk_i2s0_8ch_rx_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(19), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(2), 0, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_I2S0_8CH_RX_FRAC, "clk_i2s0_8ch_rx_frac", "clk_i2s0_8ch_rx_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(20), 0, -+ RV1106_CLKGATE_CON(2), 1, GFLAGS, -+ &rv1106_i2s0_8ch_rx_fracmux), -+ GATE(MCLK_I2S0_8CH_RX, "mclk_i2s0_8ch_rx", "clk_i2s0_8ch_rx", 0, -+ RV1106_CLKGATE_CON(2), 2, GFLAGS), -+ MUX(I2S0_8CH_MCLKOUT, "i2s0_8ch_mclkout", i2s0_8ch_mclkout_p, CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(21), 2, 2, MFLAGS), -+ COMPOSITE(CLK_REF_MIPI0_SRC, "clk_ref_mipi0_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(25), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(3), 4, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_REF_MIPI0_FRAC, "clk_ref_mipi0_frac", "clk_ref_mipi0_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(26), 0, -+ RV1106_CLKGATE_CON(3), 5, GFLAGS, -+ &rv1106_clk_ref_mipi0_fracmux), -+ GATE(MCLK_REF_MIPI0, "mclk_ref_mipi0", "clk_ref_mipi0", 0, -+ RV1106_CLKGATE_CON(3), 6, GFLAGS), -+ COMPOSITE(CLK_REF_MIPI1_SRC, "clk_ref_mipi1_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(27), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(3), 7, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_REF_MIPI1_FRAC, "clk_ref_mipi1_frac", "clk_ref_mipi1_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(28), 0, -+ RV1106_CLKGATE_CON(3), 8, GFLAGS, -+ &rv1106_clk_ref_mipi1_fracmux), -+ GATE(MCLK_REF_MIPI1, "mclk_ref_mipi1", "clk_ref_mipi1", 0, -+ RV1106_CLKGATE_CON(3), 9, GFLAGS), -+ COMPOSITE(CLK_UART0_SRC, "clk_uart0_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(5), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 11, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART0_FRAC, "clk_uart0_frac", "clk_uart0_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(6), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RV1106_CLKGATE_CON(0), 12, GFLAGS, -+ &rv1106_clk_uart0_fracmux), -+ GATE(SCLK_UART0, "sclk_uart0", "clk_uart0", 0, -+ RV1106_CLKGATE_CON(0), 13, GFLAGS), -+ COMPOSITE(CLK_UART1_SRC, "clk_uart1_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(7), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(0), 14, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART1_FRAC, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(8), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RV1106_CLKGATE_CON(0), 15, GFLAGS, -+ &rv1106_clk_uart1_fracmux), -+ GATE(SCLK_UART1, "sclk_uart1", "clk_uart1", 0, -+ RV1106_CLKGATE_CON(1), 0, GFLAGS), -+ COMPOSITE(CLK_UART2_SRC, "clk_uart2_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(9), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(1), 1, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART2_FRAC, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(10), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RV1106_CLKGATE_CON(1), 2, GFLAGS, -+ &rv1106_clk_uart2_fracmux), -+ GATE(SCLK_UART2, "sclk_uart2", "clk_uart2", 0, -+ RV1106_CLKGATE_CON(1), 3, GFLAGS), -+ COMPOSITE(CLK_UART3_SRC, "clk_uart3_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(11), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(1), 4, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART3_FRAC, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(12), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RV1106_CLKGATE_CON(1), 5, GFLAGS, -+ &rv1106_clk_uart3_fracmux), -+ GATE(SCLK_UART3, "sclk_uart3", "clk_uart3", 0, -+ RV1106_CLKGATE_CON(1), 6, GFLAGS), -+ COMPOSITE(CLK_UART4_SRC, "clk_uart4_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(13), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(1), 7, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART4_FRAC, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(14), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RV1106_CLKGATE_CON(1), 8, GFLAGS, -+ &rv1106_clk_uart4_fracmux), -+ GATE(SCLK_UART4, "sclk_uart4", "clk_uart4", 0, -+ RV1106_CLKGATE_CON(1), 9, GFLAGS), -+ COMPOSITE(CLK_UART5_SRC, "clk_uart5_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(15), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(1), 10, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_UART5_FRAC, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(16), CLK_FRAC_DIVIDER_NO_LIMIT, -+ RV1106_CLKGATE_CON(1), 11, GFLAGS, -+ &rv1106_clk_uart5_fracmux), -+ GATE(SCLK_UART5, "sclk_uart5", "clk_uart5", 0, -+ RV1106_CLKGATE_CON(1), 12, GFLAGS), -+ COMPOSITE(CLK_VICAP_M0_SRC, "clk_vicap_m0_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(29), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(3), 10, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_VICAP_M0_FRAC, "clk_vicap_m0_frac", "clk_vicap_m0_src", CLK_SET_RATE_PARENT, -+ RV1106_CLKSEL_CON(30), 0, -+ RV1106_CLKGATE_CON(3), 11, GFLAGS, -+ &rv1106_clk_vicap_m0_fracmux), -+ GATE(SCLK_VICAP_M0, "sclk_vicap_m0", "clk_vicap_m0", 0, -+ RV1106_CLKGATE_CON(3), 12, GFLAGS), -+ COMPOSITE(CLK_VICAP_M1_SRC, "clk_vicap_m1_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(31), 7, 1, MFLAGS, 2, 5, DFLAGS, -+ RV1106_CLKGATE_CON(3), 13, GFLAGS), -+ COMPOSITE_FRACMUX(CLK_VICAP_M1_FRAC, "clk_vicap_m1_frac", "clk_vicap_m1_src", 0, -+ RV1106_CLKSEL_CON(32), 0, -+ RV1106_CLKGATE_CON(3), 14, GFLAGS, -+ &rv1106_clk_vicap_m1_fracmux), -+ GATE(SCLK_VICAP_M1, "sclk_vicap_m1", "clk_vicap_m1", 0, -+ RV1106_CLKGATE_CON(3), 15, GFLAGS), -+ COMPOSITE(DCLK_VOP_SRC, "dclk_vop_src", mux_gpll_cpll_p, 0, -+ RV1106_CLKSEL_CON(23), 8, 1, MFLAGS, 3, 5, DFLAGS, -+ RV1106_CLKGATE_CON(2), 6, GFLAGS), ++ return gate_ops->prepare(gate_hw); ++} + -+ /* PD_DDR */ -+ COMPOSITE_NODIV(PCLK_DDR_ROOT, "pclk_ddr_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_DDRCLKSEL_CON(0), 0, 2, MFLAGS, -+ RV1106_DDRCLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE_NODIV(ACLK_DDR_ROOT, "aclk_ddr_root", mux_500m_300m_100m_24m_p, CLK_IS_CRITICAL, -+ RV1106_DDRCLKSEL_CON(0), 8, 2, MFLAGS, -+ RV1106_DDRCLKGATE_CON(0), 12, GFLAGS), -+ GATE(PCLK_DDRPHY, "pclk_ddrphy", "pclk_ddr_root", CLK_IGNORE_UNUSED, -+ RV1106_DDRCLKGATE_CON(1), 3, GFLAGS), -+ GATE(PCLK_DDR_HWLP, "pclk_ddr_hwlp", "pclk_ddr_root", CLK_IGNORE_UNUSED, -+ RV1106_DDRCLKGATE_CON(1), 2, GFLAGS), -+ GATE(PCLK_DDRMON, "pclk_ddrmon", "pclk_ddr_root", 0, -+ RV1106_DDRCLKGATE_CON(0), 7, GFLAGS), -+ GATE(CLK_TIMER_DDRMON, "clk_timer_ddrmon", "xin24m", 0, -+ RV1106_DDRCLKGATE_CON(0), 8, GFLAGS), -+ GATE(PCLK_DDRC, "pclk_ddrc", "pclk_ddr_root", CLK_IGNORE_UNUSED, -+ RV1106_DDRCLKGATE_CON(0), 5, GFLAGS), -+ GATE(PCLK_DFICTRL, "pclk_dfictrl", "pclk_ddr_root", CLK_IS_CRITICAL, -+ RV1106_DDRCLKGATE_CON(0), 11, GFLAGS), -+ GATE(ACLK_SYS_SHRM, "aclk_sys_shrm", "aclk_ddr_root", CLK_IS_CRITICAL, -+ RV1106_DDRCLKGATE_CON(0), 13, GFLAGS), ++static void clk_regmap_composite_unprepare(struct clk_hw *hw) ++{ ++ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); ++ const struct clk_ops *gate_ops = composite->gate_ops; ++ struct clk_hw *gate_hw = composite->gate_hw; + -+ /* PD_NPU */ -+ COMPOSITE_NODIV(HCLK_NPU_ROOT, "hclk_npu_root", mux_150m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_NPUCLKSEL_CON(0), 0, 2, MFLAGS, -+ RV1106_NPUCLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE_NODIV(ACLK_NPU_ROOT, "aclk_npu_root", mux_500m_300m_pvtpll0_pvtpll1_p, CLK_IS_CRITICAL, -+ RV1106_NPUCLKSEL_CON(0), 2, 2, MFLAGS, -+ RV1106_NPUCLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE_NODIV(PCLK_NPU_ROOT, "pclk_npu_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_NPUCLKSEL_CON(0), 4, 2, MFLAGS, -+ RV1106_NPUCLKGATE_CON(0), 2, GFLAGS), -+ GATE(HCLK_RKNN, "hclk_rknn", "hclk_npu_root", 0, -+ RV1106_NPUCLKGATE_CON(0), 9, GFLAGS), -+ GATE(ACLK_RKNN, "aclk_rknn", "aclk_npu_root", 0, -+ RV1106_NPUCLKGATE_CON(0), 10, GFLAGS), -+ -+ /* PD_PERI */ -+ COMPOSITE_NODIV(PCLK_PERI_ROOT, "pclk_peri_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_PERICLKSEL_CON(1), 0, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE_NODIV(ACLK_PERI_ROOT, "aclk_peri_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RV1106_PERICLKSEL_CON(1), 2, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE_NODIV(HCLK_PERI_ROOT, "hclk_peri_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_PERICLKSEL_CON(1), 4, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE_NODIV(ACLK_BUS_ROOT, "aclk_bus_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RV1106_PERICLKSEL_CON(9), 0, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(6), 8, GFLAGS), -+ GATE(PCLK_ACODEC, "pclk_acodec", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(6), 3, GFLAGS), -+ COMPOSITE_NOMUX(MCLK_ACODEC_TX, "mclk_acodec_tx", "mclk_i2s0_8ch_tx", 0, -+ RV1106_PERICLKSEL_CON(8), 0, 8, DFLAGS, -+ RV1106_PERICLKGATE_CON(6), 4, GFLAGS), -+ COMPOSITE_NODIV(CLK_CORE_CRYPTO, "clk_core_crypto", mux_300m_200m_100m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(6), 5, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(3), 11, GFLAGS), -+ COMPOSITE_NODIV(CLK_PKA_CRYPTO, "clk_pka_crypto", mux_300m_200m_100m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(6), 7, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(3), 12, GFLAGS), -+ GATE(ACLK_CRYPTO, "aclk_crypto", "aclk_bus_root", 0, -+ RV1106_PERICLKGATE_CON(3), 13, GFLAGS), -+ GATE(HCLK_CRYPTO, "hclk_crypto", "hclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(3), 14, GFLAGS), -+ GATE(ACLK_DECOM, "aclk_decom", "aclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(5), 9, GFLAGS), -+ GATE(PCLK_DECOM, "pclk_decom", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(5), 10, GFLAGS), -+ COMPOSITE_NODIV(DCLK_DECOM, "dclk_decom", mux_400m_200m_100m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(7), 14, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(5), 11, GFLAGS), -+ GATE(ACLK_DMAC, "aclk_dmac", "aclk_bus_root", 0, -+ RV1106_PERICLKGATE_CON(5), 8, GFLAGS), -+ GATE(PCLK_DSM, "pclk_dsm", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(6), 2, GFLAGS), -+ GATE(MCLK_DSM, "mclk_dsm", "mclk_i2s0_8ch_tx", 0, -+ RV1106_PERICLKGATE_CON(6), 1, GFLAGS), -+ COMPOSITE(CCLK_SRC_EMMC, "cclk_src_emmc", mux_400m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(7), 6, 1, MFLAGS, 0, 6, DFLAGS, -+ RV1106_PERICLKGATE_CON(4), 12, GFLAGS), -+ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(4), 13, GFLAGS), -+ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(2), 0, GFLAGS), -+ GATE(DBCLK_GPIO4, "dbclk_gpio4", "xin24m", 0, -+ RV1106_PERICLKGATE_CON(2), 1, GFLAGS), -+ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(1), 6, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C0, "clk_i2c0", mux_200m_100m_50m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(1), 8, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(1), 7, GFLAGS), -+ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(1), 10, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C2, "clk_i2c2", mux_200m_100m_50m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(1), 12, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(1), 11, GFLAGS), -+ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(1), 12, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C3, "clk_i2c3", mux_200m_100m_50m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(1), 14, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(1), 13, GFLAGS), -+ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(1), 14, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C4, "clk_i2c4", mux_200m_100m_50m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(2), 0, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(1), 15, GFLAGS), -+ GATE(HCLK_I2S0, "hclk_i2s0", "hclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(6), 0, GFLAGS), -+ GATE(PCLK_DFT2APB, "pclk_dft2apb", "pclk_peri_root", CLK_IGNORE_UNUSED, -+ RV1106_PERICLKGATE_CON(6), 7, GFLAGS), -+ GATE(HCLK_IVE, "hclk_ive", "hclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(6), 9, GFLAGS), -+ GATE(ACLK_IVE, "aclk_ive", "aclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(6), 10, GFLAGS), -+ GATE(PCLK_PWM0_PERI, "pclk_pwm0_peri", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(7), 3, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM0_PERI, "clk_pwm0_peri", mux_100m_50m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(11), 0, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(7), 4, GFLAGS), -+ GATE(CLK_CAPTURE_PWM0_PERI, "clk_capture_pwm0_peri", "xin24m", 0, -+ RV1106_PERICLKGATE_CON(7), 5, GFLAGS), -+ GATE(CLK_TIMER_ROOT, "clk_timer_root", "xin24m", 0, -+ RV1106_PERICLKGATE_CON(0), 3, GFLAGS), -+ GATE(HCLK_SFC, "hclk_sfc", "hclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(4), 14, GFLAGS), -+ COMPOSITE(SCLK_SFC, "sclk_sfc", mux_500m_300m_200m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(7), 12, 2, MFLAGS, 7, 5, DFLAGS, -+ RV1106_PERICLKGATE_CON(5), 0, GFLAGS), -+ GATE(PCLK_UART0, "pclk_uart0", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(6), 11, GFLAGS), -+ GATE(PCLK_UART1, "pclk_uart1", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(6), 15, GFLAGS), -+ GATE(PCLK_PWM1_PERI, "pclk_pwm1_peri", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(3), 15, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM1_PERI, "clk_pwm1_peri", mux_100m_50m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(6), 9, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(4), 0, GFLAGS), -+ GATE(CLK_CAPTURE_PWM1_PERI, "clk_capture_pwm1_peri", "xin24m", 0, -+ RV1106_PERICLKGATE_CON(4), 1, GFLAGS), -+ GATE(PCLK_PWM2_PERI, "pclk_pwm2_peri", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(4), 2, GFLAGS), -+ COMPOSITE_NODIV(CLK_PWM2_PERI, "clk_pwm2_peri", mux_100m_50m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(6), 11, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(4), 3, GFLAGS), -+ GATE(CLK_CAPTURE_PWM2_PERI, "clk_capture_pwm2_peri", "xin24m", 0, -+ RV1106_PERICLKGATE_CON(4), 4, GFLAGS), -+ GATE(HCLK_BOOTROM, "hclk_bootrom", "hclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(0), 7, GFLAGS), -+ GATE(HCLK_SAI, "hclk_sai", "hclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(5), 13, GFLAGS), -+ GATE(MCLK_SAI, "mclk_sai", "mclk_i2s0_8ch_tx", 0, -+ RV1106_PERICLKGATE_CON(5), 14, GFLAGS), -+ GATE(PCLK_SARADC, "pclk_saradc", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(3), 3, GFLAGS), -+ COMPOSITE_NOMUX(CLK_SARADC, "clk_saradc", "xin24m", 0, -+ RV1106_PERICLKSEL_CON(6), 0, 3, DFLAGS, -+ RV1106_PERICLKGATE_CON(3), 4, GFLAGS), -+ GATE(PCLK_SPI1, "pclk_spi1", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(3), 6, GFLAGS), -+ COMPOSITE_NODIV(CLK_SPI1, "clk_spi1", mux_200m_100m_50m_24m_p, 0, -+ RV1106_PERICLKSEL_CON(6), 3, 2, MFLAGS, -+ RV1106_PERICLKGATE_CON(3), 7, GFLAGS), -+ GATE(PCLK_STIMER, "pclk_stimer", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(0), 15, GFLAGS), -+ GATE(CLK_STIMER0, "clk_stimer0", "clk_timer_root", 0, -+ RV1106_PERICLKGATE_CON(1), 0, GFLAGS), -+ GATE(CLK_STIMER1, "clk_stimer1", "clk_timer_root", 0, -+ RV1106_PERICLKGATE_CON(1), 1, GFLAGS), -+ GATE(PCLK_TIMER, "pclk_timer", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(0), 8, GFLAGS), -+ GATE(CLK_TIMER0, "clk_timer0", "clk_timer_root", 0, -+ RV1106_PERICLKGATE_CON(0), 9, GFLAGS), -+ GATE(CLK_TIMER1, "clk_timer1", "clk_timer_root", 0, -+ RV1106_PERICLKGATE_CON(0), 10, GFLAGS), -+ GATE(CLK_TIMER2, "clk_timer2", "clk_timer_root", 0, -+ RV1106_PERICLKGATE_CON(0), 11, GFLAGS), -+ GATE(CLK_TIMER3, "clk_timer3", "clk_timer_root", 0, -+ RV1106_PERICLKGATE_CON(0), 12, GFLAGS), -+ GATE(CLK_TIMER4, "clk_timer4", "clk_timer_root", 0, -+ RV1106_PERICLKGATE_CON(0), 13, GFLAGS), -+ GATE(CLK_TIMER5, "clk_timer5", "clk_timer_root", 0, -+ RV1106_PERICLKGATE_CON(0), 14, GFLAGS), -+ GATE(HCLK_TRNG_NS, "hclk_trng_ns", "hclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(3), 9, GFLAGS), -+ GATE(HCLK_TRNG_S, "hclk_trng_s", "hclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(3), 10, GFLAGS), -+ GATE(PCLK_UART2, "pclk_uart2", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(2), 3, GFLAGS), -+ GATE(PCLK_UART3, "pclk_uart3", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(2), 7, GFLAGS), -+ GATE(PCLK_UART4, "pclk_uart4", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(2), 11, GFLAGS), -+ GATE(PCLK_UART5, "pclk_uart5", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(2), 15, GFLAGS), -+ GATE(ACLK_USBOTG, "aclk_usbotg", "aclk_bus_root", 0, -+ RV1106_PERICLKGATE_CON(4), 7, GFLAGS), -+ GATE(CLK_REF_USBOTG, "clk_ref_usbotg", "xin24m", 0, -+ RV1106_PERICLKGATE_CON(4), 8, GFLAGS), -+ GATE(PCLK_USBPHY, "pclk_usbphy", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(5), 1, GFLAGS), -+ GATE(CLK_REF_USBPHY, "clk_ref_usbphy", "xin24m", 0, -+ RV1106_PERICLKGATE_CON(5), 2, GFLAGS), -+ GATE(PCLK_WDT_NS, "pclk_wdt_ns", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(1), 2, GFLAGS), -+ GATE(TCLK_WDT_NS, "tclk_wdt_ns", "xin24m", 0, -+ RV1106_PERICLKGATE_CON(1), 3, GFLAGS), -+ GATE(PCLK_WDT_S, "pclk_wdt_s", "pclk_peri_root", 0, -+ RV1106_PERICLKGATE_CON(1), 4, GFLAGS), -+ GATE(TCLK_WDT_S, "tclk_wdt_s", "xin24m", 0, -+ RV1106_PERICLKGATE_CON(1), 5, GFLAGS), -+ -+ /* PD_PMU */ -+ COMPOSITE_FRACMUX(0, "clk_rtc32k_frac", "xin24m", CLK_IGNORE_UNUSED, -+ RV1106_PMUCLKSEL_CON(6), 0, -+ RV1106_PMUCLKGATE_CON(1), 14, GFLAGS, -+ &rv1106_rtc32k_pmu_fracmux), -+ DIV(CLK_100M_PMU, "clk_100m_pmu", "clk_200m_src", 0, -+ RV1106_PMUCLKSEL_CON(0), 0, 3, DFLAGS), -+ COMPOSITE_NODIV(PCLK_PMU_ROOT, "pclk_pmu_root", mux_100m_pmu_24m_p, CLK_IS_CRITICAL, -+ RV1106_PMUCLKSEL_CON(0), 3, 1, MFLAGS, -+ RV1106_PMUCLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE_NODIV(HCLK_PMU_ROOT, "hclk_pmu_root", mux_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RV1106_PMUCLKSEL_CON(0), 4, 2, MFLAGS, -+ RV1106_PMUCLKGATE_CON(0), 2, GFLAGS), -+ GATE(CLK_PMU, "clk_pmu", "xin24m", CLK_IS_CRITICAL, -+ RV1106_PMUCLKGATE_CON(1), 0, GFLAGS), -+ GATE(PCLK_PMU, "pclk_pmu", "pclk_pmu_root", CLK_IS_CRITICAL, -+ RV1106_PMUCLKGATE_CON(1), 1, GFLAGS), -+ GATE(CLK_DDR_FAIL_SAFE, "clk_ddr_fail_safe", "clk_pmu", 0, -+ RV1106_PMUCLKGATE_CON(1), 15, GFLAGS), -+ GATE(PCLK_PMU_GPIO0, "pclk_pmu_gpio0", "pclk_pmu_root", 0, -+ RV1106_PMUCLKGATE_CON(1), 2, GFLAGS), -+ COMPOSITE_NODIV(DBCLK_PMU_GPIO0, "dbclk_pmu_gpio0", mux_24m_32k_p, 0, -+ RV1106_PMUCLKSEL_CON(0), 15, 1, MFLAGS, -+ RV1106_PMUCLKGATE_CON(1), 3, GFLAGS), -+ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_pmu_root", 0, -+ RV1106_PMUCLKGATE_CON(0), 3, GFLAGS), -+ COMPOSITE_NODIV(CLK_I2C1, "clk_i2c1", mux_200m_100m_24m_32k_p, 0, -+ RV1106_PMUCLKSEL_CON(0), 6, 2, MFLAGS, -+ RV1106_PMUCLKGATE_CON(0), 4, GFLAGS), -+ GATE(PCLK_PMU_MAILBOX, "pclk_pmu_mailbox", "pclk_pmu_root", 0, -+ RV1106_PMUCLKGATE_CON(2), 10, GFLAGS), -+ GATE(CLK_PMU_MCU, "clk_pmu_mcu", "hclk_pmu_root", 0, -+ RV1106_PMUCLKGATE_CON(0), 9, GFLAGS), -+ GATE(CLK_PMU_MCU_RTC, "clk_pmu_mcu_rtc", "xin24m", 0, -+ RV1106_PMUCLKGATE_CON(0), 13, GFLAGS), -+ COMPOSITE_NOMUX(CLK_PVTM_PMU, "clk_pvtm_pmu", "xin24m", 0, -+ RV1106_PMUCLKSEL_CON(1), 0, 5, DFLAGS, -+ RV1106_PMUCLKGATE_CON(1), 4, GFLAGS), -+ GATE(PCLK_PVTM_PMU, "pclk_pvtm_pmu", "pclk_pmu_root", 0, -+ RV1106_PMUCLKGATE_CON(1), 5, GFLAGS), -+ GATE(CLK_REFOUT, "clk_refout", "xin24m", 0, -+ RV1106_PMUCLKGATE_CON(2), 13, GFLAGS), -+ GATE(HCLK_PMU_SRAM, "hclk_pmu_sram", "hclk_pmu_root", CLK_IGNORE_UNUSED, -+ RV1106_PMUCLKGATE_CON(0), 8, GFLAGS), -+ GATE(PCLK_PMU_WDT, "pclk_pmu_wdt", "pclk_pmu_root", 0, -+ RV1106_PMUCLKGATE_CON(2), 8, GFLAGS), -+ COMPOSITE_NODIV(TCLK_PMU_WDT, "tclk_pmu_wdt", mux_24m_32k_p, 0, -+ RV1106_PMUCLKSEL_CON(7), 2, 1, MFLAGS, -+ RV1106_PMUCLKGATE_CON(2), 9, GFLAGS), -+ -+ /* PD_SUBDDR */ -+ COMPOSITE(CLK_CORE_DDRC_SRC, "clk_core_ddrc_src", mux_dpll_300m_p, CLK_IGNORE_UNUSED, -+ RV1106_SUBDDRCLKSEL_CON(0), 5, 1, MFLAGS, 0, 5, DFLAGS, -+ RV1106_SUBDDRCLKGATE_CON(0), 2, GFLAGS), -+ GATE(CLK_DFICTRL, "clk_dfictrl", "clk_core_ddrc_src", CLK_IGNORE_UNUSED, -+ RV1106_SUBDDRCLKGATE_CON(0), 5, GFLAGS), -+ GATE(CLK_DDRMON, "clk_ddrmon", "clk_core_ddrc_src", CLK_IGNORE_UNUSED, -+ RV1106_SUBDDRCLKGATE_CON(0), 4, GFLAGS), -+ GATE(CLK_DDR_PHY, "clk_ddr_phy", "clk_core_ddrc_src", CLK_IGNORE_UNUSED, -+ RV1106_SUBDDRCLKGATE_CON(0), 6, GFLAGS), -+ GATE(ACLK_DDRC, "aclk_ddrc", "clk_core_ddrc_src", CLK_IS_CRITICAL, -+ RV1106_SUBDDRCLKGATE_CON(0), 1, GFLAGS), -+ GATE(CLK_CORE_DDRC, "clk_core_ddrc", "clk_core_ddrc_src", CLK_IS_CRITICAL, -+ RV1106_SUBDDRCLKGATE_CON(0), 3, GFLAGS), -+ -+ -+ /* PD_VEPU */ -+ COMPOSITE_NODIV(HCLK_VEPU_ROOT, "hclk_vepu_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VEPUCLKSEL_CON(0), 0, 2, MFLAGS, -+ RV1106_VEPUCLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE_NODIV(ACLK_VEPU_COM_ROOT, "aclk_vepu_com_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VEPUCLKSEL_CON(0), 2, 2, MFLAGS, -+ RV1106_VEPUCLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE_NODIV(ACLK_VEPU_ROOT, "aclk_vepu_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VEPUCLKSEL_CON(0), 4, 2, MFLAGS, -+ RV1106_VEPUCLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE_NODIV(PCLK_VEPU_ROOT, "pclk_vepu_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VEPUCLKSEL_CON(0), 6, 2, MFLAGS, -+ RV1106_VEPUCLKGATE_CON(0), 3, GFLAGS), -+ GATE(PCLK_SPI0, "pclk_spi0", "pclk_vepu_root", 0, -+ RV1106_VEPUCLKGATE_CON(1), 2, GFLAGS), -+ COMPOSITE_NODIV(CLK_SPI0, "clk_spi0", mux_200m_100m_50m_24m_p, 0, -+ RV1106_VEPUCLKSEL_CON(0), 12, 2, MFLAGS, -+ RV1106_VEPUCLKGATE_CON(1), 3, GFLAGS), -+ GATE(CLK_UART_DETN_FLT, "clk_uart_detn_flt", "xin24m", 0, -+ RV1106_VEPUCLKGATE_CON(1), 8, GFLAGS), -+ GATE(HCLK_VEPU, "hclk_vepu", "hclk_vepu_root", 0, -+ RV1106_VEPUCLKGATE_CON(0), 8, GFLAGS), -+ GATE(ACLK_VEPU, "aclk_vepu", "aclk_vepu_root", 0, -+ RV1106_VEPUCLKGATE_CON(0), 9, GFLAGS), -+ COMPOSITE_NODIV(CLK_CORE_VEPU, "clk_core_vepu", mux_400m_300m_pvtpll0_pvtpll1_p, 0, -+ RV1106_VEPUCLKSEL_CON(0), 8, 2, MFLAGS, -+ RV1106_VEPUCLKGATE_CON(0), 10, GFLAGS), -+ COMPOSITE_NODIV(CLK_CORE_VEPU_DVBM, "clk_core_vepu_dvbm", mux_200m_100m_50m_24m_p, 0, -+ RV1106_VEPUCLKSEL_CON(0), 10, 2, MFLAGS, -+ RV1106_VEPUCLKGATE_CON(0), 13, GFLAGS), -+ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_vepu_root", 0, -+ RV1106_VEPUCLKGATE_CON(0), 15, GFLAGS), -+ GATE(DBCLK_GPIO1, "dbclk_gpio1", "xin24m", 0, -+ RV1106_VEPUCLKGATE_CON(1), 0, GFLAGS), -+ GATE(HCLK_VEPU_PP, "hclk_vepu_pp", "hclk_vepu_root", 0, -+ RV1106_VEPUCLKGATE_CON(0), 11, GFLAGS), -+ GATE(ACLK_VEPU_PP, "aclk_vepu_pp", "aclk_vepu_root", 0, -+ RV1106_VEPUCLKGATE_CON(0), 12, GFLAGS), -+ -+ /* PD_VI */ -+ COMPOSITE_NODIV(HCLK_VI_ROOT, "hclk_vi_root", mux_150m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VICLKSEL_CON(0), 0, 2, MFLAGS, -+ RV1106_VICLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE_NODIV(ACLK_VI_ROOT, "aclk_vi_root", mux_339m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VICLKSEL_CON(0), 2, 2, MFLAGS, -+ RV1106_VICLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE_NODIV(PCLK_VI_ROOT, "pclk_vi_root", mux_150m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VICLKSEL_CON(0), 4, 2, MFLAGS, -+ RV1106_VICLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE_NODIV(PCLK_VI_RTC_ROOT, "pclk_vi_rtc_root", mux_50m_24m_p, 0, -+ RV1106_VICLKSEL_CON(0), 6, 1, MFLAGS, -+ RV1106_VICLKGATE_CON(0), 3, GFLAGS), -+ -+ GATE(PCLK_CSIHOST0, "pclk_csihost0", "pclk_vi_root", 0, -+ RV1106_VICLKGATE_CON(1), 3, GFLAGS), -+ GATE(PCLK_CSIHOST1, "pclk_csihost1", "pclk_vi_root", 0, -+ RV1106_VICLKGATE_CON(1), 5, GFLAGS), -+ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_vi_root", 0, -+ RV1106_VICLKGATE_CON(1), 15, GFLAGS), -+ GATE(DBCLK_GPIO3, "dbclk_gpio3", "xin24m", 0, -+ RV1106_VICLKGATE_CON(2), 0, GFLAGS), -+ GATE(HCLK_ISP3P2, "hclk_isp3p2", "hclk_vi_root", 0, -+ RV1106_VICLKGATE_CON(0), 7, GFLAGS), -+ GATE(ACLK_ISP3P2, "aclk_isp3p2", "aclk_vi_root", 0, -+ RV1106_VICLKGATE_CON(0), 8, GFLAGS), -+ COMPOSITE_NODIV(CLK_CORE_ISP3P2, "clk_core_isp3p2", mux_339m_200m_pvtpll0_pvtpll1_p, 0, -+ RV1106_VICLKSEL_CON(0), 7, 2, MFLAGS, -+ RV1106_VICLKGATE_CON(0), 9, GFLAGS), -+ GATE(PCLK_MIPICSIPHY, "pclk_mipicsiphy", "pclk_vi_root", 0, -+ RV1106_VICLKGATE_CON(1), 14, GFLAGS), -+ COMPOSITE(CCLK_SRC_SDMMC, "cclk_src_sdmmc", mux_400m_24m_p, 0, -+ RV1106_VICLKSEL_CON(1), 14, 1, MFLAGS, 8, 6, DFLAGS, -+ RV1106_VICLKGATE_CON(1), 11, GFLAGS), -+ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_vi_root", 0, -+ RV1106_VICLKGATE_CON(1), 12, GFLAGS), -+ GATE(CLK_SDMMC_DETN_FLT, "clk_sdmmc_detn_flt", "xin24m", 0, -+ RV1106_VICLKGATE_CON(1), 13, GFLAGS), -+ GATE(PCLK_VI_RTC_TEST, "pclk_vi_rtc_test", "pclk_vi_rtc_root", 0, -+ RV1106_VICLKGATE_CON(2), 5, GFLAGS), -+ GATE(PCLK_VI_RTC_PHY, "pclk_vi_rtc_phy", "pclk_vi_rtc_root", 0, -+ RV1106_VICLKGATE_CON(2), 6, GFLAGS), -+ COMPOSITE_NODIV(DCLK_VICAP, "dclk_vicap", mux_339m_200m_100m_24m_p, 0, -+ RV1106_VICLKSEL_CON(0), 9, 2, MFLAGS, -+ RV1106_VICLKGATE_CON(0), 10, GFLAGS), -+ GATE(ACLK_VICAP, "aclk_vicap", "aclk_vi_root", 0, -+ RV1106_VICLKGATE_CON(0), 12, GFLAGS), -+ GATE(HCLK_VICAP, "hclk_vicap", "hclk_vi_root", 0, -+ RV1106_VICLKGATE_CON(0), 13, GFLAGS), -+ -+ /* PD_VO */ -+ COMPOSITE_NODIV(ACLK_MAC_ROOT, "aclk_mac_root", mux_300m_200m_100m_24m_p, 0, -+ RV1106_VOCLKSEL_CON(1), 12, 2, MFLAGS, -+ RV1106_VOCLKGATE_CON(1), 4, GFLAGS), -+ COMPOSITE_NODIV(ACLK_VO_ROOT, "aclk_vo_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VOCLKSEL_CON(0), 0, 2, MFLAGS, -+ RV1106_VOCLKGATE_CON(0), 0, GFLAGS), -+ COMPOSITE_NODIV(HCLK_VO_ROOT, "hclk_vo_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VOCLKSEL_CON(0), 2, 2, MFLAGS, -+ RV1106_VOCLKGATE_CON(0), 1, GFLAGS), -+ COMPOSITE_NODIV(PCLK_VO_ROOT, "pclk_vo_root", mux_150m_100m_50m_24m_p, CLK_IS_CRITICAL, -+ RV1106_VOCLKSEL_CON(0), 4, 2, MFLAGS, -+ RV1106_VOCLKGATE_CON(0), 2, GFLAGS), -+ COMPOSITE_NODIV(ACLK_VOP_ROOT, "aclk_vop_root", mux_300m_200m_100m_24m_p, 0, -+ RV1106_VOCLKSEL_CON(1), 10, 2, MFLAGS, -+ RV1106_VOCLKGATE_CON(0), 11, GFLAGS), -+ -+ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(3), 0, GFLAGS), -+ GATE(DBCLK_GPIO2, "dbclk_gpio2", "xin24m", 0, -+ RV1106_VOCLKGATE_CON(3), 1, GFLAGS), -+ GATE(ACLK_MAC, "aclk_mac", "aclk_mac_root", 0, -+ RV1106_VOCLKGATE_CON(1), 8, GFLAGS), -+ GATE(PCLK_MAC, "pclk_mac", "pclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(1), 9, GFLAGS), -+ FACTOR(CLK_GMAC0_50M_O, "clk_gmac0_50m_o", "clk_50m_src", 0, 1, 1), -+ FACTOR(CLK_GMAC0_REF_50M, "clk_gmac0_ref_50m", "clk_gmac0_50m_o", 0, 1, 1), -+ DIV(CLK_GMAC0_TX_50M_O, "clk_gmac0_tx_50m_o", "clk_gmac0_50m_o", 0, -+ RV1106_VOCLKSEL_CON(2), 1, 6, DFLAGS), -+ GATE(CLK_MACPHY, "clk_macphy", "xin24m", 0, -+ RV1106_VOCLKGATE_CON(2), 13, GFLAGS), -+ GATE(CLK_OTPC_ARB, "clk_otpc_arb", "xin24m", 0, -+ RV1106_VOCLKGATE_CON(2), 11, GFLAGS), -+ GATE(PCLK_OTPC_NS, "pclk_otpc_ns", "pclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(2), 3, GFLAGS), -+ GATE(CLK_SBPI_OTPC_NS, "clk_sbpi_otpc_ns", "xin24m", 0, -+ RV1106_VOCLKGATE_CON(2), 5, GFLAGS), -+ COMPOSITE_NOMUX(CLK_USER_OTPC_NS, "clk_user_otpc_ns", "xin24m", 0, -+ RV1106_VOCLKSEL_CON(3), 10, 3, DFLAGS, -+ RV1106_VOCLKGATE_CON(2), 6, GFLAGS), -+ GATE(PCLK_OTPC_S, "pclk_otpc_s", "pclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(2), 7, GFLAGS), -+ GATE(CLK_SBPI_OTPC_S, "clk_sbpi_otpc_s", "xin24m", 0, -+ RV1106_VOCLKGATE_CON(2), 9, GFLAGS), -+ COMPOSITE_NOMUX(CLK_USER_OTPC_S, "clk_user_otpc_s", "xin24m", 0, -+ RV1106_VOCLKSEL_CON(3), 13, 3, DFLAGS, -+ RV1106_VOCLKGATE_CON(2), 10, GFLAGS), -+ GATE(PCLK_OTP_MASK, "pclk_otp_mask", "pclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(2), 14, GFLAGS), -+ GATE(CLK_PMC_OTP, "clk_pmc_otp", "clk_sbpi_otpc_s", 0, -+ RV1106_VOCLKGATE_CON(2), 15, GFLAGS), -+ GATE(HCLK_RGA2E, "hclk_rga2e", "hclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(0), 7, GFLAGS), -+ GATE(ACLK_RGA2E, "aclk_rga2e", "aclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(0), 8, GFLAGS), -+ COMPOSITE_NODIV(CLK_CORE_RGA2E, "clk_core_rga2e", mux_400m_200m_100m_24m_p, 0, -+ RV1106_VOCLKSEL_CON(1), 8, 2, MFLAGS, -+ RV1106_VOCLKGATE_CON(0), 9, GFLAGS), -+ COMPOSITE(CCLK_SRC_SDIO, "cclk_src_sdio", mux_400m_24m_p, 0, -+ RV1106_VOCLKSEL_CON(2), 13, 1, MFLAGS, 7, 6, DFLAGS, -+ RV1106_VOCLKGATE_CON(1), 14, GFLAGS), -+ GATE(HCLK_SDIO, "hclk_sdio", "hclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(1), 15, GFLAGS), -+ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(2), 0, GFLAGS), -+ COMPOSITE_NOMUX(CLK_TSADC, "clk_tsadc", "xin24m", 0, -+ RV1106_VOCLKSEL_CON(3), 0, 5, DFLAGS, -+ RV1106_VOCLKGATE_CON(2), 1, GFLAGS), -+ COMPOSITE_NOMUX(CLK_TSADC_TSEN, "clk_tsadc_tsen", "xin24m", 0, -+ RV1106_VOCLKSEL_CON(3), 5, 5, DFLAGS, -+ RV1106_VOCLKGATE_CON(2), 2, GFLAGS), -+ GATE(HCLK_VOP, "hclk_vop", "hclk_vo_root", 0, -+ RV1106_VOCLKGATE_CON(0), 13, GFLAGS), -+ GATE(DCLK_VOP, "dclk_vop", "dclk_vop_src", 0, -+ RV1106_VOCLKGATE_CON(0), 14, GFLAGS), -+ GATE(ACLK_VOP, "aclk_vop", "aclk_vop_root", 0, -+ RV1106_VOCLKGATE_CON(0), 15, GFLAGS), -+ -+ /* IO CLK */ -+ GATE(RX0PCLK_VICAP, "rx0pclk_vicap", "rx0pclk_vicap_io", 0, -+ RV1106_VICLKGATE_CON(1), 0, GFLAGS), -+ GATE(RX1PCLK_VICAP, "rx1pclk_vicap", "rx1pclk_vicap_io", 0, -+ RV1106_VICLKGATE_CON(1), 1, GFLAGS), -+ GATE(ISP0CLK_VICAP, "isp0clk_vicap", "isp0clk_vicap_io", 0, -+ RV1106_VICLKGATE_CON(1), 2, GFLAGS), -+ GATE(I0CLK_VICAP, "i0clk_vicap", "i0clk_vicap_io", 0, -+ RV1106_VICLKGATE_CON(0), 14, GFLAGS), -+ GATE(I1CLK_VICAP, "i1clk_vicap", "i1clk_vicap_io", 0, -+ RV1106_VICLKGATE_CON(0), 15, GFLAGS), -+ GATE(PCLK_VICAP, "pclk_vicap", "pclk_vicap_io", 0, -+ RV1106_VICLKGATE_CON(0), 11, GFLAGS), -+ GATE(CLK_RXBYTECLKHS_0, "clk_rxbyteclkhs_0", "clk_rxbyteclkhs_0_io", 0, -+ RV1106_VICLKGATE_CON(1), 4, GFLAGS), -+ GATE(CLK_RXBYTECLKHS_1, "clk_rxbyteclkhs_1", "clk_rxbyteclkhs_1_io", 0, -+ RV1106_VICLKGATE_CON(1), 6, GFLAGS), -+ -+ GATE(PCLK_VICAP_VEPU, "pclk_vicap_vepu", "pclk_vicap_vepu_io", 0, -+ RV1106_VEPUCLKGATE_CON(0), 14, GFLAGS), -+ GATE(SCLK_IN_SPI0, "sclk_in_spi0", "sclk_in_spi0_io", 0, -+ RV1106_VEPUCLKGATE_CON(1), 4, GFLAGS), -+ -+ GATE(CLK_UTMI_USBOTG, "clk_utmi_usbotg", "clk_utmi_usbotg_io", 0, -+ RV1106_PERICLKGATE_CON(4), 9, GFLAGS), -+ -+}; -+ -+static struct rockchip_clk_branch rv1106_grf_clk_branches[] __initdata = { -+ MMC(SCLK_EMMC_DRV, "emmc_drv", "cclk_src_emmc", RV1106_EMMC_CON0, 1), -+ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "cclk_src_emmc", RV1106_EMMC_CON1, 1), -+ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "cclk_src_sdmmc", RV1106_SDMMC_CON0, 1), -+ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "cclk_src_sdmmc", RV1106_SDMMC_CON1, 1), -+ MMC(SCLK_SDIO_DRV, "sdio_drv", "cclk_src_sdio", RV1106_SDIO_CON0, 1), -+ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "cclk_src_sdio", RV1106_SDIO_CON1, 1), -+}; -+ -+static void __iomem *rv1106_cru_base; -+static struct rockchip_clk_provider *grf_ctx, *cru_ctx; -+ -+void rv1106_dump_cru(void) -+{ -+ if (rv1106_cru_base) { -+ pr_warn("CRU:\n"); -+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, -+ 32, 4, rv1106_cru_base, -+ 0x588, false); -+ } -+} -+EXPORT_SYMBOL_GPL(rv1106_dump_cru); -+ -+static void _cru_pvtpll_calibrate(int count_offset, int length_offset, int target_rate) -+{ -+ unsigned int rate0, rate1, delta, length_ori, length, step, val, i = 0; -+ -+ rate0 = readl_relaxed(rv1106_cru_base + count_offset); -+ if (rate0 < target_rate) -+ return; -+ /* delta < (3.125% * target_rate) */ -+ if ((rate0 - target_rate) < (target_rate >> 5)) -+ return; -+ -+ length_ori = readl_relaxed(rv1106_cru_base + length_offset) & PVTPLL_LENGTH_SEL_MASK; -+ length = length_ori; -+ length++; -+ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); -+ writel_relaxed(val, rv1106_cru_base + length_offset); -+ usleep_range(2000, 2100); -+ rate1 = readl_relaxed(rv1106_cru_base + count_offset); -+ if ((rate1 < target_rate) || (rate1 >= rate0)) -+ return; -+ if (abs(rate1 - target_rate) < (target_rate >> 5)) -+ return; -+ -+ step = rate0 - rate1; -+ delta = rate1 - target_rate; -+ length += delta / step; -+ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); -+ writel_relaxed(val, rv1106_cru_base + length_offset); -+ usleep_range(2000, 2100); -+ rate0 = readl_relaxed(rv1106_cru_base + count_offset); -+ -+ while (abs(rate0 - target_rate) >= (target_rate >> 5)) { -+ if (i++ > 20) -+ break; -+ if (rate0 > target_rate) -+ length++; -+ else -+ length--; -+ if (length <= length_ori) -+ break; -+ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); -+ writel_relaxed(val, rv1106_cru_base + length_offset); -+ usleep_range(2000, 2100); -+ rate0 = readl_relaxed(rv1106_cru_base + count_offset); -+ } -+} -+ -+static void _grf_pvtpll_calibrate(int count_offset, int length_offset, int target_rate) -+{ -+ unsigned int rate0, rate1, delta, length_ori, length, step, val, i = 0; -+ -+ regmap_read(cru_ctx->grf, count_offset, &rate0); -+ if (rate0 < target_rate) -+ return; -+ /* delta < (3.125% * target_rate) */ -+ if ((rate0 - target_rate) < (target_rate >> 5)) -+ return; -+ -+ regmap_read(cru_ctx->grf, length_offset, &length_ori); -+ length = length_ori; -+ length_ori = length; -+ length &= PVTPLL_LENGTH_SEL_MASK; -+ length++; -+ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); -+ regmap_write(cru_ctx->grf, length_offset, val); -+ usleep_range(2000, 2100); -+ regmap_read(cru_ctx->grf, count_offset, &rate1); -+ if ((rate1 < target_rate) || (rate1 >= rate0)) -+ return; -+ if (abs(rate1 - target_rate) < (target_rate >> 5)) -+ return; -+ -+ step = rate0 - rate1; -+ delta = rate1 - target_rate; -+ length += delta / step; -+ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); -+ regmap_write(cru_ctx->grf, length_offset, val); -+ usleep_range(2000, 2100); -+ regmap_read(cru_ctx->grf, count_offset, &rate0); -+ -+ while (abs(rate0 - target_rate) >= (target_rate >> 5)) { -+ if (i++ > 20) -+ break; -+ if (rate0 > target_rate) -+ length++; -+ else -+ length--; -+ if (length <= length_ori) -+ break; -+ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); -+ regmap_write(cru_ctx->grf, length_offset, val); -+ usleep_range(2000, 2100); -+ regmap_read(cru_ctx->grf, count_offset, &rate0); -+ } -+} -+ -+static void rockchip_rv1106_pvtpll_calibrate(struct work_struct *w) -+{ -+ struct clk *clk; -+ unsigned long rate; -+ -+ clk = __clk_lookup("clk_pvtpll_0"); -+ if (clk) { -+ rate = clk_get_rate(clk); -+ _cru_pvtpll_calibrate(CRU_PVTPLL0_OSC_CNT_AVG, -+ CRU_PVTPLL0_CON0_H, rate / 1000000); -+ } -+ -+ clk = __clk_lookup("clk_pvtpll_1"); -+ if (clk) { -+ rate = clk_get_rate(clk); -+ _cru_pvtpll_calibrate(CRU_PVTPLL1_OSC_CNT_AVG, -+ CRU_PVTPLL1_CON0_H, rate / 1000000); -+ } -+ -+ clk = __clk_lookup("cpu_pvtpll"); -+ if (clk) { -+ rate = clk_get_rate(clk); -+ _grf_pvtpll_calibrate(CPU_PVTPLL_OSC_CNT_AVG, -+ CPU_PVTPLL_CON0_H, rate / 1000000); -+ } -+} -+static DECLARE_DEFERRABLE_WORK(pvtpll_calibrate_work, rockchip_rv1106_pvtpll_calibrate); -+ -+static void rockchip_rv1106_pvtpll_init(struct rockchip_clk_provider *ctx) -+{ -+ /* set pvtpll ref clk mux */ -+ writel_relaxed(CPU_PVTPLL_PATH_CORE, ctx->reg_base + CPU_CLK_PATH_BASE); -+ -+ regmap_write(ctx->grf, CPU_PVTPLL_CON0_H, HIWORD_UPDATE(0x7, PVTPLL_LENGTH_SEL_MASK, -+ PVTPLL_LENGTH_SEL_SHIFT)); -+ regmap_write(ctx->grf, CPU_PVTPLL_CON0_L, HIWORD_UPDATE(0x1, PVTPLL_RING_SEL_MASK, -+ PVTPLL_RING_SEL_SHIFT)); -+ regmap_write(ctx->grf, CPU_PVTPLL_CON0_L, HIWORD_UPDATE(0x3, PVTPLL_EN_MASK, -+ PVTPLL_EN_SHIFT)); -+ -+ writel_relaxed(0x007f0000, ctx->reg_base + CRU_PVTPLL0_CON0_H); -+ writel_relaxed(0xffff0018, ctx->reg_base + CRU_PVTPLL0_CON1_L); -+ writel_relaxed(0xffff0004, ctx->reg_base + CRU_PVTPLL0_CON2_H); -+ writel_relaxed(0x00030003, ctx->reg_base + CRU_PVTPLL0_CON0_L); -+ -+ writel_relaxed(0x007f0000, ctx->reg_base + CRU_PVTPLL1_CON0_H); -+ writel_relaxed(0xffff0018, ctx->reg_base + CRU_PVTPLL1_CON1_L); -+ writel_relaxed(0xffff0004, ctx->reg_base + CRU_PVTPLL1_CON2_H); -+ writel_relaxed(0x00030003, ctx->reg_base + CRU_PVTPLL1_CON0_L); -+ -+ schedule_delayed_work(&pvtpll_calibrate_work, msecs_to_jiffies(3000)); -+} -+ -+static int rv1106_clk_panic(struct notifier_block *this, -+ unsigned long ev, void *ptr) -+{ -+ rv1106_dump_cru(); -+ return NOTIFY_DONE; -+} -+ -+static struct notifier_block rv1106_clk_panic_block = { -+ .notifier_call = rv1106_clk_panic, -+}; -+ -+static void __init rv1106_clk_init(struct device_node *np) -+{ -+ struct rockchip_clk_provider *ctx; -+ void __iomem *reg_base; -+ struct clk **cru_clks; -+ -+ reg_base = of_iomap(np, 0); -+ if (!reg_base) { -+ pr_err("%s: could not map cru region\n", __func__); -+ return; -+ } -+ -+ rv1106_cru_base = reg_base; -+ -+ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); -+ if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip clk init failed\n", __func__); -+ iounmap(reg_base); -+ return; -+ } -+ cru_ctx = ctx; -+ -+ rockchip_rv1106_pvtpll_init(ctx); -+ -+ cru_clks = ctx->clk_data.clks; -+ -+ rockchip_clk_register_plls(ctx, rv1106_pll_clks, -+ ARRAY_SIZE(rv1106_pll_clks), -+ RV1106_GRF_SOC_STATUS0); -+ -+ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", -+ 3, cru_clks[PLL_APLL], cru_clks[PLL_GPLL], -+ &rv1106_cpuclk_data, rv1106_cpuclk_rates, -+ ARRAY_SIZE(rv1106_cpuclk_rates)); -+ -+ rockchip_clk_register_branches(ctx, rv1106_clk_branches, -+ ARRAY_SIZE(rv1106_clk_branches)); -+ -+ rockchip_clk_register_branches(grf_ctx, rv1106_grf_clk_branches, -+ ARRAY_SIZE(rv1106_grf_clk_branches)); -+ -+ rockchip_register_softrst(np, 31745, reg_base + RV1106_PMUSOFTRST_CON(0), -+ ROCKCHIP_SOFTRST_HIWORD_MASK); -+ -+ rockchip_register_restart_notifier(ctx, RV1106_GLB_SRST_FST, NULL); -+ -+ rockchip_clk_of_add_provider(np, ctx); -+ -+ atomic_notifier_chain_register(&panic_notifier_list, -+ &rv1106_clk_panic_block); -+} -+ -+CLK_OF_DECLARE(rv1106_cru, "rockchip,rv1106-cru", rv1106_clk_init); -+ -+static void __init rv1106_grf_clk_init(struct device_node *np) -+{ -+ struct rockchip_clk_provider *ctx; -+ void __iomem *reg_base; -+ -+ reg_base = of_iomap(of_get_parent(np), 0); -+ if (!reg_base) { -+ pr_err("%s: could not map cru grf region\n", __func__); -+ return; -+ } -+ -+ ctx = rockchip_clk_init(np, reg_base, CLK_NR_GRF_CLKS); -+ if (IS_ERR(ctx)) { -+ pr_err("%s: rockchip grf clk init failed\n", __func__); -+ return; -+ } -+ grf_ctx = ctx; -+ -+ rockchip_clk_of_add_provider(np, ctx); -+} -+CLK_OF_DECLARE(rv1106_grf_cru, "rockchip,rv1106-grf-cru", rv1106_grf_clk_init); -+ -+#ifdef MODULE -+struct clk_rv1106_inits { -+ void (*inits)(struct device_node *np); -+}; -+ -+static const struct clk_rv1106_inits clk_rv1106_init = { -+ .inits = rv1106_clk_init, -+}; -+ -+static const struct clk_rv1106_inits clk_rv1106_grf_init = { -+ .inits = rv1106_grf_clk_init, -+}; -+ -+static const struct of_device_id clk_rv1106_match_table[] = { -+ { -+ .compatible = "rockchip,rv1106-cru", -+ .data = &clk_rv1106_init, -+ }, { -+ .compatible = "rockchip,rv1106-grf-cru", -+ .data = &clk_rv1106_grf_init, -+ }, -+ { } -+}; -+MODULE_DEVICE_TABLE(of, clk_rv1106_match_table); -+ -+static int __init clk_rv1106_probe(struct platform_device *pdev) -+{ -+ struct device_node *np = pdev->dev.of_node; -+ const struct of_device_id *match; -+ const struct clk_rv1106_inits *init_data; -+ -+ match = of_match_device(clk_rv1106_match_table, &pdev->dev); -+ if (!match || !match->data) -+ return -EINVAL; -+ -+ init_data = match->data; -+ if (init_data->inits) -+ init_data->inits(np); -+ -+ return 0; -+} -+ -+static struct platform_driver clk_rv1106_driver = { -+ .driver = { -+ .name = "clk-rv1106", -+ .of_match_table = clk_rv1106_match_table, -+ }, -+}; -+builtin_platform_driver_probe(clk_rv1106_driver, clk_rv1106_probe); -+ -+MODULE_DESCRIPTION("Rockchip RV1106 Clock Driver"); -+MODULE_LICENSE("GPL"); -+#endif /* MODULE */ -diff --git a/drivers/clk/rockchip/regmap/Kconfig b/drivers/clk/rockchip/regmap/Kconfig -new file mode 100644 -index 000000000..65f691bc4 ---- /dev/null -+++ b/drivers/clk/rockchip/regmap/Kconfig -@@ -0,0 +1,16 @@ -+# SPDX-License-Identifier: GPL-2.0 -+ -+config COMMON_CLK_ROCKCHIP_REGMAP -+ tristate -+ -+config CLK_RK618 -+ tristate "Clock driver for Rockchip RK618" -+ depends on MFD_RK618 -+ default MFD_RK618 -+ select COMMON_CLK_ROCKCHIP_REGMAP -+ -+config CLK_RK628 -+ tristate "Clock driver for Rockchip RK628" -+ depends on MFD_RK628 -+ default MFD_RK628 -+ select COMMON_CLK_ROCKCHIP_REGMAP -diff --git a/drivers/clk/rockchip/regmap/Makefile b/drivers/clk/rockchip/regmap/Makefile -new file mode 100644 -index 000000000..18d075d09 ---- /dev/null -+++ b/drivers/clk/rockchip/regmap/Makefile -@@ -0,0 +1,13 @@ -+# SPDX-License-Identifier: GPL-2.0 -+ -+obj-$(CONFIG_COMMON_CLK_ROCKCHIP_REGMAP) += clk-rockchip-regmap.o -+ -+clk-rockchip-regmap-objs := clk-regmap-mux.o \ -+ clk-regmap-divider.o \ -+ clk-regmap-gate.o \ -+ clk-regmap-fractional-divider.o \ -+ clk-regmap-composite.o \ -+ clk-regmap-pll.o -+ -+obj-$(CONFIG_CLK_RK618) += clk-rk618.o -+obj-$(CONFIG_CLK_RK628) += clk-rk628.o -diff --git a/drivers/clk/rockchip/regmap/clk-regmap-composite.c b/drivers/clk/rockchip/regmap/clk-regmap-composite.c -new file mode 100644 -index 000000000..43d2b9a45 ---- /dev/null -+++ b/drivers/clk/rockchip/regmap/clk-regmap-composite.c -@@ -0,0 +1,400 @@ -+/* -+ * Copyright (c) 2017 Rockchip Electronics Co. Ltd. -+ * -+ * Base on code in drivers/clk/clk-composite.c. -+ * See clk-composite.c for further copyright information. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include "clk-regmap.h" -+ -+struct clk_regmap_composite { -+ struct device *dev; -+ struct clk_hw hw; -+ struct clk_ops ops; -+ -+ struct clk_hw *mux_hw; -+ struct clk_hw *rate_hw; -+ struct clk_hw *gate_hw; -+ -+ const struct clk_ops *mux_ops; -+ const struct clk_ops *rate_ops; -+ const struct clk_ops *gate_ops; -+}; -+ -+#define to_clk_regmap_composite(_hw) \ -+ container_of(_hw, struct clk_regmap_composite, hw) -+ -+static u8 clk_regmap_composite_get_parent(struct clk_hw *hw) -+{ -+ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); -+ const struct clk_ops *mux_ops = composite->mux_ops; -+ struct clk_hw *mux_hw = composite->mux_hw; -+ -+ __clk_hw_set_clk(mux_hw, hw); -+ -+ return mux_ops->get_parent(mux_hw); -+} -+ -+static int clk_regmap_composite_set_parent(struct clk_hw *hw, u8 index) -+{ -+ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); -+ const struct clk_ops *mux_ops = composite->mux_ops; -+ struct clk_hw *mux_hw = composite->mux_hw; -+ -+ __clk_hw_set_clk(mux_hw, hw); -+ -+ return mux_ops->set_parent(mux_hw, index); -+} -+ -+static unsigned long clk_regmap_composite_recalc_rate(struct clk_hw *hw, -+ unsigned long parent_rate) -+{ -+ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); -+ const struct clk_ops *rate_ops = composite->rate_ops; -+ struct clk_hw *rate_hw = composite->rate_hw; -+ -+ __clk_hw_set_clk(rate_hw, hw); -+ -+ return rate_ops->recalc_rate(rate_hw, parent_rate); -+} -+ -+static int clk_regmap_composite_determine_rate(struct clk_hw *hw, -+ struct clk_rate_request *req) -+{ -+ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); -+ const struct clk_ops *rate_ops = composite->rate_ops; -+ const struct clk_ops *mux_ops = composite->mux_ops; -+ struct clk_hw *rate_hw = composite->rate_hw; -+ struct clk_hw *mux_hw = composite->mux_hw; -+ struct clk_hw *parent; -+ unsigned long parent_rate; -+ long tmp_rate, best_rate = 0; -+ unsigned long rate_diff; -+ unsigned long best_rate_diff = ULONG_MAX; -+ long rate; -+ unsigned int i; -+ -+ if (rate_hw && rate_ops && rate_ops->determine_rate) { -+ __clk_hw_set_clk(rate_hw, hw); -+ return rate_ops->determine_rate(rate_hw, req); -+ } else if (rate_hw && rate_ops && rate_ops->round_rate && -+ mux_hw && mux_ops && mux_ops->set_parent) { -+ req->best_parent_hw = NULL; -+ -+ if (clk_hw_get_flags(hw) & CLK_SET_RATE_NO_REPARENT) { -+ parent = clk_hw_get_parent(mux_hw); -+ req->best_parent_hw = parent; -+ req->best_parent_rate = clk_hw_get_rate(parent); -+ -+ rate = rate_ops->round_rate(rate_hw, req->rate, -+ &req->best_parent_rate); -+ if (rate < 0) -+ return rate; -+ -+ req->rate = rate; -+ return 0; -+ } -+ -+ for (i = 0; i < clk_hw_get_num_parents(mux_hw); i++) { -+ parent = clk_hw_get_parent_by_index(mux_hw, i); -+ if (!parent) -+ continue; -+ -+ parent_rate = clk_hw_get_rate(parent); -+ -+ tmp_rate = rate_ops->round_rate(rate_hw, req->rate, -+ &parent_rate); -+ if (tmp_rate < 0) -+ continue; -+ -+ rate_diff = abs(req->rate - tmp_rate); -+ -+ if (!rate_diff || !req->best_parent_hw || -+ best_rate_diff > rate_diff) { -+ req->best_parent_hw = parent; -+ req->best_parent_rate = parent_rate; -+ best_rate_diff = rate_diff; -+ best_rate = tmp_rate; -+ } -+ -+ if (!rate_diff) -+ return 0; -+ } -+ -+ req->rate = best_rate; -+ return 0; -+ } else if (mux_hw && mux_ops && mux_ops->determine_rate) { -+ __clk_hw_set_clk(mux_hw, hw); -+ return mux_ops->determine_rate(mux_hw, req); -+ } else { -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+static long clk_regmap_composite_round_rate(struct clk_hw *hw, -+ unsigned long rate, -+ unsigned long *prate) -+{ -+ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); -+ const struct clk_ops *rate_ops = composite->rate_ops; -+ struct clk_hw *rate_hw = composite->rate_hw; -+ -+ __clk_hw_set_clk(rate_hw, hw); -+ -+ return rate_ops->round_rate(rate_hw, rate, prate); -+} -+ -+static int clk_regmap_composite_set_rate(struct clk_hw *hw, -+ unsigned long rate, -+ unsigned long parent_rate) -+{ -+ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); -+ const struct clk_ops *rate_ops = composite->rate_ops; -+ struct clk_hw *rate_hw = composite->rate_hw; -+ -+ __clk_hw_set_clk(rate_hw, hw); -+ -+ return rate_ops->set_rate(rate_hw, rate, parent_rate); -+} -+ -+static int clk_regmap_composite_is_prepared(struct clk_hw *hw) -+{ -+ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); -+ const struct clk_ops *gate_ops = composite->gate_ops; -+ struct clk_hw *gate_hw = composite->gate_hw; -+ -+ __clk_hw_set_clk(gate_hw, hw); -+ -+ return gate_ops->is_prepared(gate_hw); -+} -+ -+static int clk_regmap_composite_prepare(struct clk_hw *hw) -+{ -+ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); -+ const struct clk_ops *gate_ops = composite->gate_ops; -+ struct clk_hw *gate_hw = composite->gate_hw; -+ -+ __clk_hw_set_clk(gate_hw, hw); -+ -+ return gate_ops->prepare(gate_hw); -+} -+ -+static void clk_regmap_composite_unprepare(struct clk_hw *hw) -+{ -+ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); -+ const struct clk_ops *gate_ops = composite->gate_ops; -+ struct clk_hw *gate_hw = composite->gate_hw; -+ -+ __clk_hw_set_clk(gate_hw, hw); ++ __clk_hw_set_clk(gate_hw, hw); + + gate_ops->unprepare(gate_hw); +} @@ -74689,11 +72778,11 @@ index 000000000..43d2b9a45 + return clk; +} +EXPORT_SYMBOL_GPL(devm_clk_regmap_register_composite); -diff --git a/drivers/clk/rockchip/regmap/clk-regmap-divider.c b/drivers/clk/rockchip/regmap/clk-regmap-divider.c +diff --git a/drivers/clk/rockchip-oh/regmap/clk-regmap-divider.c b/drivers/clk/rockchip-oh/regmap/clk-regmap-divider.c new file mode 100644 index 000000000..d57f2c7f8 --- /dev/null -+++ b/drivers/clk/rockchip/regmap/clk-regmap-divider.c ++++ b/drivers/clk/rockchip-oh/regmap/clk-regmap-divider.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2017 Rockchip Electronics Co. Ltd. @@ -74812,11 +72901,11 @@ index 000000000..d57f2c7f8 + return devm_clk_register(dev, ÷r->hw); +} +EXPORT_SYMBOL_GPL(devm_clk_regmap_register_divider); -diff --git a/drivers/clk/rockchip/regmap/clk-regmap-fractional-divider.c b/drivers/clk/rockchip/regmap/clk-regmap-fractional-divider.c +diff --git a/drivers/clk/rockchip-oh/regmap/clk-regmap-fractional-divider.c b/drivers/clk/rockchip-oh/regmap/clk-regmap-fractional-divider.c new file mode 100644 index 000000000..1acbc16e7 --- /dev/null -+++ b/drivers/clk/rockchip/regmap/clk-regmap-fractional-divider.c ++++ b/drivers/clk/rockchip-oh/regmap/clk-regmap-fractional-divider.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -74985,11 +73074,11 @@ index 000000000..1acbc16e7 + return devm_clk_register(dev, &fd->hw); +} +EXPORT_SYMBOL_GPL(devm_clk_regmap_register_fractional_divider); -diff --git a/drivers/clk/rockchip/regmap/clk-regmap-gate.c b/drivers/clk/rockchip/regmap/clk-regmap-gate.c +diff --git a/drivers/clk/rockchip-oh/regmap/clk-regmap-gate.c b/drivers/clk/rockchip-oh/regmap/clk-regmap-gate.c new file mode 100644 index 000000000..36549b912 --- /dev/null -+++ b/drivers/clk/rockchip/regmap/clk-regmap-gate.c ++++ b/drivers/clk/rockchip-oh/regmap/clk-regmap-gate.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2017 Rockchip Electronics Co. Ltd. @@ -75073,11 +73162,11 @@ index 000000000..36549b912 + return devm_clk_register(dev, &gate->hw); +} +EXPORT_SYMBOL_GPL(devm_clk_regmap_register_gate); -diff --git a/drivers/clk/rockchip/regmap/clk-regmap-mux.c b/drivers/clk/rockchip/regmap/clk-regmap-mux.c +diff --git a/drivers/clk/rockchip-oh/regmap/clk-regmap-mux.c b/drivers/clk/rockchip-oh/regmap/clk-regmap-mux.c new file mode 100644 index 000000000..eb37b5f95 --- /dev/null -+++ b/drivers/clk/rockchip/regmap/clk-regmap-mux.c ++++ b/drivers/clk/rockchip-oh/regmap/clk-regmap-mux.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2017 Rockchip Electronics Co. Ltd. @@ -75160,11 +73249,11 @@ index 000000000..eb37b5f95 +EXPORT_SYMBOL_GPL(devm_clk_regmap_register_mux); + +MODULE_LICENSE("GPL"); -diff --git a/drivers/clk/rockchip/regmap/clk-regmap-pll.c b/drivers/clk/rockchip/regmap/clk-regmap-pll.c +diff --git a/drivers/clk/rockchip-oh/regmap/clk-regmap-pll.c b/drivers/clk/rockchip-oh/regmap/clk-regmap-pll.c new file mode 100644 index 000000000..24ad7eda9 --- /dev/null -+++ b/drivers/clk/rockchip/regmap/clk-regmap-pll.c ++++ b/drivers/clk/rockchip-oh/regmap/clk-regmap-pll.c @@ -0,0 +1,363 @@ +/* + * Copyright (c) 2017 Rockchip Electronics Co. Ltd. @@ -75529,11 +73618,11 @@ index 000000000..24ad7eda9 + return devm_clk_register(dev, &pll->hw); +} +EXPORT_SYMBOL_GPL(devm_clk_regmap_register_pll); -diff --git a/drivers/clk/rockchip/regmap/clk-regmap.h b/drivers/clk/rockchip/regmap/clk-regmap.h +diff --git a/drivers/clk/rockchip-oh/regmap/clk-regmap.h b/drivers/clk/rockchip-oh/regmap/clk-regmap.h new file mode 100644 index 000000000..4626e1982 --- /dev/null -+++ b/drivers/clk/rockchip/regmap/clk-regmap.h ++++ b/drivers/clk/rockchip-oh/regmap/clk-regmap.h @@ -0,0 +1,308 @@ +/* + * Copyright (c) 2017 Rockchip Electronics Co. Ltd. @@ -75843,11 +73932,11 @@ index 000000000..4626e1982 + unsigned long flags); + +#endif -diff --git a/drivers/clk/rockchip/regmap/clk-rk618.c b/drivers/clk/rockchip/regmap/clk-rk618.c +diff --git a/drivers/clk/rockchip-oh/regmap/clk-rk618.c b/drivers/clk/rockchip-oh/regmap/clk-rk618.c new file mode 100644 index 000000000..c780f502b --- /dev/null -+++ b/drivers/clk/rockchip/regmap/clk-rk618.c ++++ b/drivers/clk/rockchip-oh/regmap/clk-rk618.c @@ -0,0 +1,408 @@ +/* + * Copyright (c) 2017 Rockchip Electronics Co. Ltd. @@ -76257,11 +74346,11 @@ index 000000000..c780f502b +MODULE_AUTHOR("Wyon Bi "); +MODULE_DESCRIPTION("Rockchip rk618 CRU driver"); +MODULE_LICENSE("GPL v2"); -diff --git a/drivers/clk/rockchip/regmap/clk-rk628.c b/drivers/clk/rockchip/regmap/clk-rk628.c +diff --git a/drivers/clk/rockchip-oh/regmap/clk-rk628.c b/drivers/clk/rockchip-oh/regmap/clk-rk628.c new file mode 100644 index 000000000..7f501db66 --- /dev/null -+++ b/drivers/clk/rockchip/regmap/clk-rk628.c ++++ b/drivers/clk/rockchip-oh/regmap/clk-rk628.c @@ -0,0 +1,609 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -76872,47038 +74961,49117 @@ index 000000000..7f501db66 +MODULE_AUTHOR("Wyon Bi "); +MODULE_DESCRIPTION("Rockchip RK628 CRU driver"); +MODULE_LICENSE("GPL v2"); -diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig -index 8208a3d89..b7022db25 100644 ---- a/drivers/clocksource/Kconfig -+++ b/drivers/clocksource/Kconfig -@@ -92,7 +92,9 @@ config IXP4XX_TIMER - Enables support for the Intel XScale IXP4xx SoC timer. - - config ROCKCHIP_TIMER -- bool "Rockchip timer driver" if COMPILE_TEST -+ tristate "Rockchip timer driver" -+ default ARCH_ROCKCHIP -+ depends on ARCH_ROCKCHIP || COMPILE_TEST - depends on ARM || ARM64 - select TIMER_OF - select CLKSRC_MMIO -diff --git a/drivers/clocksource/timer-rockchip.c b/drivers/clocksource/timer-rockchip.c -index 1f95d0aca..2f4e970d7 100644 ---- a/drivers/clocksource/timer-rockchip.c -+++ b/drivers/clocksource/timer-rockchip.c -@@ -8,11 +8,13 @@ - #include - #include - #include -+#include - #include - #include - #include - #include - #include -+#include - - #define TIMER_NAME "rk_timer" - -@@ -45,7 +47,9 @@ struct rk_clkevt { - }; - - static struct rk_clkevt *rk_clkevt; -+#ifndef MODULE - static struct rk_timer *rk_clksrc; -+#endif - - static inline struct rk_timer *rk_timer(struct clock_event_device *ce) - { -@@ -119,10 +123,12 @@ static irqreturn_t rk_timer_interrupt(int irq, void *dev_id) - return IRQ_HANDLED; - } - -+#ifndef MODULE - static u64 notrace rk_timer_sched_read(void) - { - return ~readl_relaxed(rk_clksrc->base + TIMER_CURRENT_VALUE0); - } -+#endif - - static int __init - rk_timer_probe(struct rk_timer *timer, struct device_node *np) -@@ -250,6 +256,7 @@ static int __init rk_clkevt_init(struct device_node *np) - return ret; - } - -+#ifndef MODULE - static int __init rk_clksrc_init(struct device_node *np) - { - int ret = -EINVAL; -@@ -287,14 +294,17 @@ static int __init rk_clksrc_init(struct device_node *np) - rk_clksrc = ERR_PTR(ret); - return ret; - } -+#endif - - static int __init rk_timer_init(struct device_node *np) - { - if (!rk_clkevt) - return rk_clkevt_init(np); - -+#ifndef MODULE - if (!rk_clksrc) - return rk_clksrc_init(np); -+#endif - - pr_err("Too many timer definitions for '%s'\n", TIMER_NAME); - return -EINVAL; -@@ -302,3 +312,26 @@ static int __init rk_timer_init(struct device_node *np) - - TIMER_OF_DECLARE(rk3288_timer, "rockchip,rk3288-timer", rk_timer_init); - TIMER_OF_DECLARE(rk3399_timer, "rockchip,rk3399-timer", rk_timer_init); -+ -+#ifdef MODULE -+static int __init rk_timer_driver_probe(struct platform_device *pdev) -+{ -+ return rk_timer_init(pdev->dev.of_node); -+} +diff --git a/drivers/clk/rockchip-oh/softrst.c b/drivers/clk/rockchip-oh/softrst.c +new file mode 100644 +index 000000000..5d0726674 +--- /dev/null ++++ b/drivers/clk/rockchip-oh/softrst.c +@@ -0,0 +1,110 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later ++/* ++ * Copyright (c) 2014 MundoReader S.L. ++ * Author: Heiko Stuebner ++ */ + -+static const struct of_device_id rk_timer_match_table[] = { -+ { .compatible = "rockchip,rk3288-timer" }, -+ { .compatible = "rockchip,rk3399-timer" }, -+ { /* sentinel */ }, -+}; ++#include ++#include ++#include ++#include ++#include "clk.h" + -+static struct platform_driver rk_timer_driver = { -+ .driver = { -+ .name = TIMER_NAME, -+ .of_match_table = rk_timer_match_table, -+ }, ++struct rockchip_softrst { ++ struct reset_controller_dev rcdev; ++ void __iomem *reg_base; ++ int num_regs; ++ int num_per_reg; ++ u8 flags; ++ spinlock_t lock; +}; -+module_platform_driver_probe(rk_timer_driver, rk_timer_driver_probe); -+ -+MODULE_LICENSE("GPL"); -+#endif -diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig -index f429b9b37..2533b01bd 100644 ---- a/drivers/cpufreq/Kconfig -+++ b/drivers/cpufreq/Kconfig -@@ -105,6 +105,17 @@ config CPU_FREQ_DEFAULT_GOV_SCHEDUTIL - have a look at the help section of that governor. The fallback - governor will be 'performance'. - -+config CPU_FREQ_DEFAULT_GOV_INTERACTIVE -+ bool "interactive" -+ depends on NO_GKI -+ select CPU_FREQ_GOV_INTERACTIVE -+ select CPU_FREQ_GOV_PERFORMANCE -+ help -+ Use the CPUFreq governor 'interactive' as default. This allows -+ you to get a full dynamic cpu frequency capable system by simply -+ loading your cpufreq low-level hardware driver, using the -+ 'interactive' governor for latency-sensitive workloads. -+ - endchoice - - config CPU_FREQ_GOV_PERFORMANCE -@@ -203,12 +214,33 @@ config CPU_FREQ_GOV_SCHEDUTIL - - If in doubt, say N. - -+config CPU_FREQ_GOV_INTERACTIVE -+ tristate "'interactive' cpufreq policy governor" -+ depends on NO_GKI -+ depends on CPU_FREQ -+ select CPU_FREQ_GOV_ATTR_SET -+ select IRQ_WORK -+ help -+ 'interactive' - This driver adds a dynamic cpufreq policy governor -+ designed for latency-sensitive workloads. -+ -+ This governor attempts to reduce the latency of clock -+ increases so that the system is more responsive to -+ interactive workloads. -+ -+ To compile this driver as a module, choose M here: the -+ module will be called cpufreq_interactive. -+ -+ For details, take a look at linux/Documentation/cpu-freq. + -+ If in doubt, say N. -+ - comment "CPU frequency scaling drivers" - - config CPUFREQ_DT - tristate "Generic DT based cpufreq driver" - depends on HAVE_CLK && OF -- select CPUFREQ_DT_PLATDEV -+ select CPUFREQ_DT_PLATDEV if !ARM_ROCKCHIP_CPUFREQ - select PM_OPP - help - This adds a generic DT based cpufreq driver for frequency management. -diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm -index c5cecbd89..52127fa44 100644 ---- a/drivers/cpufreq/Kconfig.arm -+++ b/drivers/cpufreq/Kconfig.arm -@@ -190,6 +190,16 @@ config ARM_RASPBERRYPI_CPUFREQ - - If in doubt, say N. - -+config ARM_ROCKCHIP_CPUFREQ -+ tristate "Rockchip CPUfreq driver" -+ depends on ARCH_ROCKCHIP && CPUFREQ_DT -+ select PM_OPP -+ help -+ This adds the CPUFreq driver support for Rockchip SoCs, -+ based on cpufreq-dt. ++static int rockchip_softrst_assert(struct reset_controller_dev *rcdev, ++ unsigned long id) ++{ ++ struct rockchip_softrst *softrst = container_of(rcdev, ++ struct rockchip_softrst, ++ rcdev); ++ int bank = id / softrst->num_per_reg; ++ int offset = id % softrst->num_per_reg; + -+ If in doubt, say N. ++ if (softrst->flags & ROCKCHIP_SOFTRST_HIWORD_MASK) { ++ writel(BIT(offset) | (BIT(offset) << 16), ++ softrst->reg_base + (bank * 4)); ++ } else { ++ unsigned long flags; ++ u32 reg; + - config ARM_S3C64XX_CPUFREQ - bool "Samsung S3C64XX" - depends on CPU_S3C6410 -diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile -index ef8510774..fab0394cf 100644 ---- a/drivers/cpufreq/Makefile -+++ b/drivers/cpufreq/Makefile -@@ -8,14 +8,15 @@ obj-$(CONFIG_CPU_FREQ_STAT) += cpufreq_stats.o - # CPUfreq governors - obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o - obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o --obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o -+obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace_rk.o - obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o - obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o -+obj-$(CONFIG_CPU_FREQ_GOV_INTERACTIVE) += cpufreq_interactive.o - obj-$(CONFIG_CPU_FREQ_GOV_COMMON) += cpufreq_governor.o - obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET) += cpufreq_governor_attr_set.o - --obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o --obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o -+obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt-rk.o -+obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev-rk.o - - # Traces - CFLAGS_amd-pstate-trace.o := -I$(src) -@@ -71,6 +72,7 @@ obj-$(CONFIG_PXA3xx) += pxa3xx-cpufreq.o - obj-$(CONFIG_ARM_QCOM_CPUFREQ_HW) += qcom-cpufreq-hw.o - obj-$(CONFIG_ARM_QCOM_CPUFREQ_NVMEM) += qcom-cpufreq-nvmem.o - obj-$(CONFIG_ARM_RASPBERRYPI_CPUFREQ) += raspberrypi-cpufreq.o -+obj-$(CONFIG_ARM_ROCKCHIP_CPUFREQ) += rockchip-cpufreq.o - obj-$(CONFIG_ARM_S3C64XX_CPUFREQ) += s3c64xx-cpufreq.o - obj-$(CONFIG_ARM_S5PV210_CPUFREQ) += s5pv210-cpufreq.o - obj-$(CONFIG_ARM_SA1110_CPUFREQ) += sa1110-cpufreq.o -diff --git a/drivers/cpufreq/cpufreq-dt-platdev-rk.c b/drivers/cpufreq/cpufreq-dt-platdev-rk.c -new file mode 100644 -index 000000000..52cdde09e ---- /dev/null -+++ b/drivers/cpufreq/cpufreq-dt-platdev-rk.c -@@ -0,0 +1,243 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+/* -+ * Copyright (C) 2016 Linaro. -+ * Viresh Kumar -+ */ ++ spin_lock_irqsave(&softrst->lock, flags); + -+#include -+#include -+#include -+#include ++ reg = readl(softrst->reg_base + (bank * 4)); ++ writel(reg | BIT(offset), softrst->reg_base + (bank * 4)); + -+#include "cpufreq-dt.h" ++ spin_unlock_irqrestore(&softrst->lock, flags); ++ } + -+/* -+ * Machines for which the cpufreq device is *always* created, mostly used for -+ * platforms using "operating-points" (V1) property. -+ */ -+static const struct of_device_id allowlist[] __initconst = { -+ { .compatible = "allwinner,sun4i-a10", }, -+ { .compatible = "allwinner,sun5i-a10s", }, -+ { .compatible = "allwinner,sun5i-a13", }, -+ { .compatible = "allwinner,sun5i-r8", }, -+ { .compatible = "allwinner,sun6i-a31", }, -+ { .compatible = "allwinner,sun6i-a31s", }, -+ { .compatible = "allwinner,sun7i-a20", }, -+ { .compatible = "allwinner,sun8i-a23", }, -+ { .compatible = "allwinner,sun8i-a83t", }, -+ { .compatible = "allwinner,sun8i-h3", }, ++ return 0; ++} + -+ { .compatible = "apm,xgene-shadowcat", }, ++static int rockchip_softrst_deassert(struct reset_controller_dev *rcdev, ++ unsigned long id) ++{ ++ struct rockchip_softrst *softrst = container_of(rcdev, ++ struct rockchip_softrst, ++ rcdev); ++ int bank = id / softrst->num_per_reg; ++ int offset = id % softrst->num_per_reg; + -+ { .compatible = "arm,integrator-ap", }, -+ { .compatible = "arm,integrator-cp", }, ++ if (softrst->flags & ROCKCHIP_SOFTRST_HIWORD_MASK) { ++ writel((BIT(offset) << 16), softrst->reg_base + (bank * 4)); ++ } else { ++ unsigned long flags; ++ u32 reg; + -+ { .compatible = "hisilicon,hi3660", }, ++ spin_lock_irqsave(&softrst->lock, flags); + -+ { .compatible = "fsl,imx27", }, -+ { .compatible = "fsl,imx51", }, -+ { .compatible = "fsl,imx53", }, ++ reg = readl(softrst->reg_base + (bank * 4)); ++ writel(reg & ~BIT(offset), softrst->reg_base + (bank * 4)); + -+ { .compatible = "marvell,berlin", }, -+ { .compatible = "marvell,pxa250", }, -+ { .compatible = "marvell,pxa270", }, ++ spin_unlock_irqrestore(&softrst->lock, flags); ++ } + -+ { .compatible = "samsung,exynos3250", }, -+ { .compatible = "samsung,exynos4210", }, -+ { .compatible = "samsung,exynos5250", }, -+#ifndef CONFIG_BL_SWITCHER -+ { .compatible = "samsung,exynos5800", }, -+#endif ++ return 0; ++} + -+ { .compatible = "renesas,emev2", }, -+ { .compatible = "renesas,r7s72100", }, -+ { .compatible = "renesas,r8a73a4", }, -+ { .compatible = "renesas,r8a7740", }, -+ { .compatible = "renesas,r8a7742", }, -+ { .compatible = "renesas,r8a7743", }, -+ { .compatible = "renesas,r8a7744", }, -+ { .compatible = "renesas,r8a7745", }, -+ { .compatible = "renesas,r8a7778", }, -+ { .compatible = "renesas,r8a7779", }, -+ { .compatible = "renesas,r8a7790", }, -+ { .compatible = "renesas,r8a7791", }, -+ { .compatible = "renesas,r8a7792", }, -+ { .compatible = "renesas,r8a7793", }, -+ { .compatible = "renesas,r8a7794", }, -+ { .compatible = "renesas,sh73a0", }, ++static const struct reset_control_ops rockchip_softrst_ops = { ++ .assert = rockchip_softrst_assert, ++ .deassert = rockchip_softrst_deassert, ++}; + -+ { .compatible = "st-ericsson,u8500", }, -+ { .compatible = "st-ericsson,u8540", }, -+ { .compatible = "st-ericsson,u9500", }, -+ { .compatible = "st-ericsson,u9540", }, ++void rockchip_register_softrst(struct device_node *np, ++ unsigned int num_regs, ++ void __iomem *base, u8 flags) ++{ ++ struct rockchip_softrst *softrst; ++ int ret; + -+ { .compatible = "starfive,jh7110", }, ++ softrst = kzalloc(sizeof(*softrst), GFP_KERNEL); ++ if (!softrst) ++ return; + -+ { .compatible = "ti,omap2", }, -+ { .compatible = "ti,omap4", }, -+ { .compatible = "ti,omap5", }, ++ spin_lock_init(&softrst->lock); + -+ { .compatible = "xlnx,zynq-7000", }, -+ { .compatible = "xlnx,zynqmp", }, ++ softrst->reg_base = base; ++ softrst->flags = flags; ++ softrst->num_regs = num_regs; ++ softrst->num_per_reg = (flags & ROCKCHIP_SOFTRST_HIWORD_MASK) ? 16 ++ : 32; + -+ { } ++ softrst->rcdev.owner = THIS_MODULE; ++ softrst->rcdev.nr_resets = num_regs * softrst->num_per_reg; ++ softrst->rcdev.ops = &rockchip_softrst_ops; ++ softrst->rcdev.of_node = np; ++ ret = reset_controller_register(&softrst->rcdev); ++ if (ret) { ++ pr_err("%s: could not register reset controller, %d\n", ++ __func__, ret); ++ kfree(softrst); ++ } +}; -+ ++EXPORT_SYMBOL_GPL(rockchip_register_softrst); +diff --git a/drivers/clk/rockchip/clk-dclk-divider.c b/drivers/clk/rockchip/clk-dclk-divider.c +new file mode 100644 +index 000000000..88cf7ab82 +--- /dev/null ++++ b/drivers/clk/rockchip/clk-dclk-divider.c +@@ -0,0 +1,168 @@ ++// SPDX-License-Identifier: GPL-2.0 +/* -+ * Machines for which the cpufreq device is *not* created, mostly used for -+ * platforms using "operating-points-v2" property. ++ * Copyright (c) 2019 Fuzhou Rockchip Electronics Co., Ltd + */ -+static const struct of_device_id blocklist[] __initconst = { -+ { .compatible = "allwinner,sun50i-h6", }, + -+ { .compatible = "apple,arm-platform", }, ++#include ++#include ++#include ++#include ++#include ++#include "clk.h" + -+ { .compatible = "arm,vexpress", }, ++#define div_mask(width) ((1 << (width)) - 1) + -+ { .compatible = "calxeda,highbank", }, -+ { .compatible = "calxeda,ecx-2000", }, ++static unsigned long clk_dclk_recalc_rate(struct clk_hw *hw, ++ unsigned long parent_rate) ++{ ++ struct clk_divider *divider = to_clk_divider(hw); ++ unsigned int val; + -+ { .compatible = "fsl,imx7ulp", }, -+ { .compatible = "fsl,imx7d", }, -+ { .compatible = "fsl,imx7s", }, -+ { .compatible = "fsl,imx8mq", }, -+ { .compatible = "fsl,imx8mm", }, -+ { .compatible = "fsl,imx8mn", }, -+ { .compatible = "fsl,imx8mp", }, ++ val = clk_readl(divider->reg) >> divider->shift; ++ val &= div_mask(divider->width); + -+ { .compatible = "marvell,armadaxp", }, ++ return DIV_ROUND_UP_ULL(((u64)parent_rate), val + 1); ++} + -+ { .compatible = "mediatek,mt2701", }, -+ { .compatible = "mediatek,mt2712", }, -+ { .compatible = "mediatek,mt7622", }, -+ { .compatible = "mediatek,mt7623", }, -+ { .compatible = "mediatek,mt8167", }, -+ { .compatible = "mediatek,mt817x", }, -+ { .compatible = "mediatek,mt8173", }, -+ { .compatible = "mediatek,mt8176", }, -+ { .compatible = "mediatek,mt8183", }, -+ { .compatible = "mediatek,mt8186", }, -+ { .compatible = "mediatek,mt8365", }, -+ { .compatible = "mediatek,mt8516", }, ++static long clk_dclk_round_rate(struct clk_hw *hw, unsigned long rate, ++ unsigned long *prate) ++{ ++ struct clk_divider *divider = to_clk_divider(hw); ++ int div, maxdiv = div_mask(divider->width) + 1; + -+ { .compatible = "nvidia,tegra20", }, -+ { .compatible = "nvidia,tegra30", }, -+ { .compatible = "nvidia,tegra124", }, -+ { .compatible = "nvidia,tegra210", }, -+ { .compatible = "nvidia,tegra234", }, ++ div = DIV_ROUND_UP_ULL(divider->max_prate, rate); ++ if (div % 2) ++ div = __rounddown_pow_of_two(div); ++ div = div > maxdiv ? maxdiv : div; ++ *prate = div * rate; ++ return rate; ++} + -+ { .compatible = "qcom,apq8096", }, -+ { .compatible = "qcom,msm8996", }, -+ { .compatible = "qcom,msm8998", }, -+ { .compatible = "qcom,qcm2290", }, -+ { .compatible = "qcom,qcs404", }, -+ { .compatible = "qcom,qdu1000", }, -+ { .compatible = "qcom,sa8155p" }, -+ { .compatible = "qcom,sa8540p" }, -+ { .compatible = "qcom,sa8775p" }, -+ { .compatible = "qcom,sc7180", }, -+ { .compatible = "qcom,sc7280", }, -+ { .compatible = "qcom,sc8180x", }, -+ { .compatible = "qcom,sc8280xp", }, -+ { .compatible = "qcom,sdm845", }, -+ { .compatible = "qcom,sdx75", }, -+ { .compatible = "qcom,sm6115", }, -+ { .compatible = "qcom,sm6350", }, -+ { .compatible = "qcom,sm6375", }, -+ { .compatible = "qcom,sm7225", }, -+ { .compatible = "qcom,sm8150", }, -+ { .compatible = "qcom,sm8250", }, -+ { .compatible = "qcom,sm8350", }, -+ { .compatible = "qcom,sm8450", }, -+ { .compatible = "qcom,sm8550", }, ++static int clk_dclk_set_rate(struct clk_hw *hw, unsigned long rate, ++ unsigned long parent_rate) ++{ ++ struct clk_divider *divider = to_clk_divider(hw); ++ unsigned int value; ++ unsigned long flags = 0; ++ u32 val; + -+ { .compatible = "rockchip,px30", }, -+ { .compatible = "rockchip,rk2928", }, -+ { .compatible = "rockchip,rk3036", }, -+ { .compatible = "rockchip,rk3066a", }, -+ { .compatible = "rockchip,rk3066b", }, -+ { .compatible = "rockchip,rk3126", }, -+ { .compatible = "rockchip,rk3128", }, -+ { .compatible = "rockchip,rk3188", }, -+ { .compatible = "rockchip,rk3228", }, -+ { .compatible = "rockchip,rk3229", }, -+ { .compatible = "rockchip,rk3288", }, -+ { .compatible = "rockchip,rk3288w", }, -+ { .compatible = "rockchip,rk3308", }, -+ { .compatible = "rockchip,rk3326", }, -+ { .compatible = "rockchip,rk3328", }, -+ { .compatible = "rockchip,rk3366", }, -+ { .compatible = "rockchip,rk3368", }, -+ { .compatible = "rockchip,rk3399", }, -+ { .compatible = "rockchip,rk3399pro", }, -+ { .compatible = "rockchip,rk3528", }, -+ { .compatible = "rockchip,rk3562", }, -+ { .compatible = "rockchip,rk3566", }, -+ { .compatible = "rockchip,rk3567", }, -+ { .compatible = "rockchip,rk3568", }, -+ { .compatible = "rockchip,rk3588", }, -+ { .compatible = "rockchip,rv1103", }, -+ { .compatible = "rockchip,rv1106", }, -+ { .compatible = "rockchip,rv1109", }, -+ { .compatible = "rockchip,rv1126", }, ++ value = divider_get_val(rate, parent_rate, divider->table, ++ divider->width, divider->flags); + -+ { .compatible = "st,stih407", }, -+ { .compatible = "st,stih410", }, -+ { .compatible = "st,stih418", }, ++ if (divider->lock) ++ spin_lock_irqsave(divider->lock, flags); ++ else ++ __acquire(divider->lock); + -+ { .compatible = "ti,am33xx", }, -+ { .compatible = "ti,am43", }, -+ { .compatible = "ti,dra7", }, -+ { .compatible = "ti,omap3", }, -+ { .compatible = "ti,am625", }, -+ { .compatible = "ti,am62a7", }, ++ if (divider->flags & CLK_DIVIDER_HIWORD_MASK) { ++ val = div_mask(divider->width) << (divider->shift + 16); ++ } else { ++ val = clk_readl(divider->reg); ++ val &= ~(div_mask(divider->width) << divider->shift); ++ } ++ val |= value << divider->shift; ++ clk_writel(val, divider->reg); + -+ { .compatible = "qcom,ipq8064", }, -+ { .compatible = "qcom,apq8064", }, -+ { .compatible = "qcom,msm8974", }, -+ { .compatible = "qcom,msm8960", }, ++ if (divider->lock) ++ spin_unlock_irqrestore(divider->lock, flags); ++ else ++ __release(divider->lock); + -+ { } ++ return 0; ++} ++ ++const struct clk_ops clk_dclk_divider_ops = { ++ .recalc_rate = clk_dclk_recalc_rate, ++ .round_rate = clk_dclk_round_rate, ++ .set_rate = clk_dclk_set_rate, +}; ++EXPORT_SYMBOL_GPL(clk_dclk_divider_ops); + -+static bool __init cpu0_node_has_opp_v2_prop(void) ++/** ++ * Register a clock branch. ++ * Most clock branches have a form like ++ * ++ * src1 --|--\ ++ * |M |--[GATE]-[DIV]- ++ * src2 --|--/ ++ * ++ * sometimes without one of those components. ++ */ ++struct clk *rockchip_clk_register_dclk_branch(const char *name, ++ const char *const *parent_names, ++ u8 num_parents, ++ void __iomem *base, ++ int muxdiv_offset, u8 mux_shift, ++ u8 mux_width, u8 mux_flags, ++ int div_offset, u8 div_shift, ++ u8 div_width, u8 div_flags, ++ struct clk_div_table *div_table, ++ int gate_offset, ++ u8 gate_shift, u8 gate_flags, ++ unsigned long flags, ++ unsigned long max_prate, ++ spinlock_t *lock) +{ -+ struct device_node *np = of_cpu_device_node_get(0); -+ bool ret = false; -+ -+ if (of_property_present(np, "operating-points-v2")) -+ ret = true; ++ struct clk *clk; ++ struct clk_mux *mux = NULL; ++ struct clk_gate *gate = NULL; ++ struct clk_divider *div = NULL; ++ const struct clk_ops *mux_ops = NULL, *div_ops = NULL, ++ *gate_ops = NULL; + -+ of_node_put(np); -+ return ret; -+} ++ if (num_parents > 1) { ++ mux = kzalloc(sizeof(*mux), GFP_KERNEL); ++ if (!mux) ++ return ERR_PTR(-ENOMEM); + -+static int __init cpufreq_dt_platdev_init(void) -+{ -+ struct device_node *np = of_find_node_by_path("/"); -+ const struct of_device_id *match; -+ const void *data = NULL; ++ mux->reg = base + muxdiv_offset; ++ mux->shift = mux_shift; ++ mux->mask = BIT(mux_width) - 1; ++ mux->flags = mux_flags; ++ mux->lock = lock; ++ mux_ops = (mux_flags & CLK_MUX_READ_ONLY) ? &clk_mux_ro_ops ++ : &clk_mux_ops; ++ } + -+ if (!np) -+ return -ENODEV; ++ if (gate_offset >= 0) { ++ gate = kzalloc(sizeof(*gate), GFP_KERNEL); ++ if (!gate) ++ goto err_gate; + -+ match = of_match_node(allowlist, np); -+ if (match) { -+ data = match->data; -+ goto create_pdev; ++ gate->flags = gate_flags; ++ gate->reg = base + gate_offset; ++ gate->bit_idx = gate_shift; ++ gate->lock = lock; ++ gate_ops = &clk_gate_ops; + } + -+ if (cpu0_node_has_opp_v2_prop() && !of_match_node(blocklist, np)) -+ goto create_pdev; ++ if (div_width > 0) { ++ div = kzalloc(sizeof(*div), GFP_KERNEL); ++ if (!div) ++ goto err_div; + -+ of_node_put(np); -+ return -ENODEV; ++ div->flags = div_flags; ++ if (div_offset) ++ div->reg = base + div_offset; ++ else ++ div->reg = base + muxdiv_offset; ++ div->shift = div_shift; ++ div->width = div_width; ++ div->lock = lock; ++ div->max_prate = max_prate; ++ div_ops = &clk_dclk_divider_ops; ++ } + -+create_pdev: -+ of_node_put(np); -+ return PTR_ERR_OR_ZERO(platform_device_register_data(NULL, "cpufreq-dt", -+ -1, data, -+ sizeof(struct cpufreq_dt_platform_data))); ++ clk = clk_register_composite(NULL, name, parent_names, num_parents, ++ mux ? &mux->hw : NULL, mux_ops, ++ div ? &div->hw : NULL, div_ops, ++ gate ? &gate->hw : NULL, gate_ops, ++ flags); ++ ++ return clk; ++err_div: ++ kfree(gate); ++err_gate: ++ kfree(mux); ++ return ERR_PTR(-ENOMEM); +} -+core_initcall(cpufreq_dt_platdev_init); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/cpufreq/cpufreq-dt-rk.c b/drivers/cpufreq/cpufreq-dt-rk.c +diff --git a/drivers/clk/rockchip/clk-link.c b/drivers/clk/rockchip/clk-link.c new file mode 100644 -index 000000000..914488f8e +index 000000000..78ff9b53c --- /dev/null -+++ b/drivers/cpufreq/cpufreq-dt-rk.c -@@ -0,0 +1,382 @@ -+// SPDX-License-Identifier: GPL-2.0-only ++++ b/drivers/clk/rockchip/clk-link.c +@@ -0,0 +1,244 @@ ++// SPDX-License-Identifier: GPL-2.0 +/* -+ * Copyright (C) 2012 Freescale Semiconductor, Inc. -+ * -+ * Copyright (C) 2014 Linaro. -+ * Viresh Kumar ++ * Copyright (c) 2021 Rockchip Electronics Co., Ltd + */ + -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#include -+#include -+#include -+#include -+#include -+#include ++#include ++#include +#include -+#include -+#include ++#include +#include -+#include ++#include ++#include +#include -+#include -+ -+#include "cpufreq-dt.h" -+#ifdef CONFIG_ARCH_ROCKCHIP -+#include "rockchip-cpufreq.h" -+#endif -+ -+struct private_data { -+ struct list_head node; + -+ cpumask_var_t cpus; -+ struct device *cpu_dev; -+ struct cpufreq_frequency_table *freq_table; -+ bool have_static_opps; -+ int opp_token; ++struct rockchip_link_info { ++ u32 shift; ++ const char *name; ++ const char *pname; +}; + -+static LIST_HEAD(priv_list); -+ -+static struct freq_attr *cpufreq_dt_attr[] = { -+ &cpufreq_freq_attr_scaling_available_freqs, -+ NULL, /* Extra space for boost-attr if required */ -+ NULL, ++struct rockchip_link { ++ int num; ++ const struct rockchip_link_info *info; +}; + -+static struct private_data *cpufreq_dt_find_data(int cpu) -+{ -+ struct private_data *priv; -+ -+ list_for_each_entry(priv, &priv_list, node) { -+ if (cpumask_test_cpu(cpu, priv->cpus)) -+ return priv; -+ } -+ -+ return NULL; -+} ++struct rockchip_link_clk { ++ void __iomem *base; ++ struct clk_gate *gate; ++ spinlock_t lock; ++ u32 shift; ++ u32 flag; ++ const char *name; ++ const char *pname; ++ const char *link_name; ++ const struct rockchip_link *link; ++}; + -+static int set_target(struct cpufreq_policy *policy, unsigned int index) -+{ -+ struct private_data *priv = policy->driver_data; -+ unsigned long freq = policy->freq_table[index].frequency; ++#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) + -+#ifdef CONFIG_ARCH_ROCKCHIP -+ return rockchip_cpufreq_opp_set_rate(priv->cpu_dev, freq * 1000); -+#else -+ return dev_pm_opp_set_rate(priv->cpu_dev, freq * 1000); -+#endif ++#define GATE_LINK(_name, _pname, _shift) \ ++{ \ ++ .name = _name, \ ++ .pname = _pname, \ ++ .shift = (_shift), \ +} + -+/* -+ * An earlier version of opp-v1 bindings used to name the regulator -+ * "cpu0-supply", we still need to handle that for backwards compatibility. -+ */ -+static const char *find_supply_name(struct device *dev) ++static int register_clocks(struct rockchip_link_clk *priv, struct device *dev) +{ -+ struct device_node *np; -+ struct property *pp; -+ int cpu = dev->id; -+ const char *name = NULL; ++ struct clk_gate *gate; ++ struct clk_init_data init = {}; ++ struct clk *clk; + -+ np = of_node_get(dev->of_node); ++ gate = devm_kzalloc(dev, sizeof(struct clk_gate), GFP_KERNEL); ++ if (!gate) ++ return -ENOMEM; + -+ /* This must be valid for sure */ -+ if (WARN_ON(!np)) -+ return NULL; ++ init.name = priv->name; ++ init.ops = &clk_gate_ops; ++ init.flags |= CLK_SET_RATE_PARENT; ++ init.parent_names = &priv->pname; ++ init.num_parents = 1; + -+ /* Try "cpu0" for older DTs */ -+ if (!cpu) { -+ pp = of_find_property(np, "cpu0-supply", NULL); -+ if (pp) { -+ name = "cpu0"; -+ goto node_put; -+ } -+ } ++ /* struct clk_gate assignments */ ++ gate->reg = priv->base; ++ gate->bit_idx = priv->shift; ++ gate->flags = GFLAGS; ++ gate->lock = &priv->lock; ++ gate->hw.init = &init; + -+ pp = of_find_property(np, "cpu-supply", NULL); -+ if (pp) { -+ name = "cpu"; -+ goto node_put; -+ } ++ clk = devm_clk_register(dev, &gate->hw); ++ if (IS_ERR(clk)) ++ return -EINVAL; + -+ dev_dbg(dev, "no regulator for cpu%d\n", cpu); -+node_put: -+ of_node_put(np); -+ return name; ++ return of_clk_add_provider(dev->of_node, of_clk_src_simple_get, clk); +} + -+static int cpufreq_init(struct cpufreq_policy *policy) -+{ -+ struct private_data *priv; -+ struct device *cpu_dev; -+ struct clk *cpu_clk; -+ unsigned int transition_latency; -+ int ret; -+ -+ priv = cpufreq_dt_find_data(policy->cpu); -+ if (!priv) { -+ pr_err("failed to find data for cpu%d\n", policy->cpu); -+ return -ENODEV; -+ } -+ cpu_dev = priv->cpu_dev; -+ -+ cpu_clk = clk_get(cpu_dev, NULL); -+ if (IS_ERR(cpu_clk)) { -+ ret = PTR_ERR(cpu_clk); -+ dev_err(cpu_dev, "%s: failed to get clk: %d\n", __func__, ret); -+ return ret; -+ } -+ -+ transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev); -+ if (!transition_latency) -+ transition_latency = CPUFREQ_ETERNAL; -+ -+ cpumask_copy(policy->cpus, priv->cpus); -+ policy->driver_data = priv; -+ policy->clk = cpu_clk; -+ policy->freq_table = priv->freq_table; -+ policy->suspend_freq = dev_pm_opp_get_suspend_opp_freq(cpu_dev) / 1000; -+ policy->cpuinfo.transition_latency = transition_latency; -+ policy->dvfs_possible_from_any_cpu = true; -+ -+ /* Support turbo/boost mode */ -+ if (policy_has_boost_freq(policy)) { -+ /* This gets disabled by core on driver unregister */ -+ ret = cpufreq_enable_boost_support(); -+ if (ret) -+ goto out_clk_put; -+ cpufreq_dt_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; -+ } ++static const struct rockchip_link_info rk3562_clk_gate_link_info[] = { ++ GATE_LINK("aclk_rga_jdec", "aclk_rga_pre", 3), ++ GATE_LINK("aclk_vdpu", "aclk_vdpu_pre", 5), ++ GATE_LINK("aclk_vepu", "aclk_vepu_pre", 3), ++ GATE_LINK("aclk_vi_isp", "aclk_vi", 3), ++ GATE_LINK("aclk_vo", "aclk_vo_pre", 3), ++ GATE_LINK("hclk_vepu", "hclk_vepu_pre", 4), ++}; + -+ return 0; ++static const struct rockchip_link rk3562_clk_gate_link = { ++ .num = ARRAY_SIZE(rk3562_clk_gate_link_info), ++ .info = rk3562_clk_gate_link_info, ++}; + -+out_clk_put: -+ clk_put(cpu_clk); ++static const struct rockchip_link_info rk3588_clk_gate_link_info[] = { ++ GATE_LINK("aclk_isp1_pre", "aclk_isp1_root", 6), ++ GATE_LINK("hclk_isp1_pre", "hclk_isp1_root", 8), ++ GATE_LINK("hclk_nvm", "hclk_nvm_root", 2), ++ GATE_LINK("aclk_usb", "aclk_usb_root", 2), ++ GATE_LINK("hclk_usb", "hclk_usb_root", 3), ++ GATE_LINK("aclk_jpeg_decoder_pre", "aclk_jpeg_decoder_root", 7), ++ GATE_LINK("aclk_vdpu_low_pre", "aclk_vdpu_low_root", 5), ++ GATE_LINK("aclk_rkvenc1_pre", "aclk_rkvenc1_root", 3), ++ GATE_LINK("hclk_rkvenc1_pre", "hclk_rkvenc1_root", 2), ++ GATE_LINK("hclk_rkvdec0_pre", "hclk_rkvdec0_root", 5), ++ GATE_LINK("aclk_rkvdec0_pre", "aclk_rkvdec0_root", 6), ++ GATE_LINK("hclk_rkvdec1_pre", "hclk_rkvdec1_root", 4), ++ GATE_LINK("aclk_rkvdec1_pre", "aclk_rkvdec1_root", 5), ++ GATE_LINK("aclk_hdcp0_pre", "aclk_vo0_root", 9), ++ GATE_LINK("hclk_vo0", "hclk_vo0_root", 5), ++ GATE_LINK("aclk_hdcp1_pre", "aclk_hdcp1_root", 6), ++ GATE_LINK("hclk_vo1", "hclk_vo1_root", 9), ++ GATE_LINK("aclk_av1_pre", "aclk_av1_root", 1), ++ GATE_LINK("pclk_av1_pre", "pclk_av1_root", 4), ++ GATE_LINK("hclk_sdio_pre", "hclk_sdio_root", 1), ++ GATE_LINK("pclk_vo0_grf", "pclk_vo0_root", 10), ++ GATE_LINK("pclk_vo1_grf", "pclk_vo1_root", 12), ++}; + -+ return ret; -+} ++static const struct rockchip_link rk3588_clk_gate_link = { ++ .num = ARRAY_SIZE(rk3588_clk_gate_link_info), ++ .info = rk3588_clk_gate_link_info, ++}; + -+static int cpufreq_online(struct cpufreq_policy *policy) -+{ -+ /* We did light-weight tear down earlier, nothing to do here */ -+ return 0; -+} ++static const struct of_device_id rockchip_clk_link_of_match[] = { ++ { ++ .compatible = "rockchip,rk3562-clock-gate-link", ++ .data = (void *)&rk3562_clk_gate_link, ++ }, ++ { ++ .compatible = "rockchip,rk3588-clock-gate-link", ++ .data = (void *)&rk3588_clk_gate_link, ++ }, ++ {} ++}; ++MODULE_DEVICE_TABLE(of, rockchip_clk_link_of_match); + -+static int cpufreq_offline(struct cpufreq_policy *policy) ++static const struct rockchip_link_info * ++rockchip_get_link_infos(const struct rockchip_link *link, const char *name) +{ -+ /* -+ * Preserve policy->driver_data and don't free resources on light-weight -+ * tear down. -+ */ -+ return 0; -+} ++ const struct rockchip_link_info *info = link->info; ++ int i = 0; + -+static int cpufreq_exit(struct cpufreq_policy *policy) -+{ -+ clk_put(policy->clk); -+ return 0; ++ for (i = 0; i < link->num; i++) { ++ if (strcmp(info->name, name) == 0) ++ break; ++ info++; ++ } ++ return info; +} + -+static struct cpufreq_driver dt_cpufreq_driver = { -+ .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK | -+ CPUFREQ_IS_COOLING_DEV, -+ .verify = cpufreq_generic_frequency_table_verify, -+ .target_index = set_target, -+ .get = cpufreq_generic_get, -+ .init = cpufreq_init, -+ .exit = cpufreq_exit, -+ .online = cpufreq_online, -+ .offline = cpufreq_offline, -+ .register_em = cpufreq_register_em_with_opp, -+ .name = "cpufreq-dt", -+ .attr = cpufreq_dt_attr, -+ .suspend = cpufreq_generic_suspend, -+}; -+ -+static int dt_cpufreq_early_init(struct device *dev, int cpu) ++static int rockchip_clk_link_probe(struct platform_device *pdev) +{ -+ struct private_data *priv; -+ struct device *cpu_dev; -+ bool fallback = false; -+ const char *reg_name[] = { NULL, NULL }; ++ struct rockchip_link_clk *priv; ++ struct device_node *node = pdev->dev.of_node; ++ const struct of_device_id *match; ++ const char *clk_name; ++ const struct rockchip_link_info *link_info; + int ret; + -+ /* Check if this CPU is already covered by some other policy */ -+ if (cpufreq_dt_find_data(cpu)) -+ return 0; -+ -+ cpu_dev = get_cpu_device(cpu); -+ if (!cpu_dev) -+ return -EPROBE_DEFER; ++ match = of_match_node(rockchip_clk_link_of_match, node); ++ if (!match) ++ return -ENXIO; + -+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); ++ priv = devm_kzalloc(&pdev->dev, sizeof(struct rockchip_link_clk), ++ GFP_KERNEL); + if (!priv) + return -ENOMEM; + -+ if (!zalloc_cpumask_var(&priv->cpus, GFP_KERNEL)) -+ return -ENOMEM; -+ -+ cpumask_set_cpu(cpu, priv->cpus); -+ priv->cpu_dev = cpu_dev; ++ priv->link = match->data; + -+ /* -+ * OPP layer will be taking care of regulators now, but it needs to know -+ * the name of the regulator first. -+ */ -+ reg_name[0] = find_supply_name(cpu_dev); -+ if (reg_name[0]) { -+ priv->opp_token = dev_pm_opp_set_regulators(cpu_dev, reg_name); -+ if (priv->opp_token < 0) { -+ ret = dev_err_probe(cpu_dev, priv->opp_token, -+ "failed to set regulators\n"); -+ goto free_cpumask; -+ } -+ } ++ spin_lock_init(&priv->lock); ++ platform_set_drvdata(pdev, priv); + -+ /* Get OPP-sharing information from "operating-points-v2" bindings */ -+ ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, priv->cpus); -+ if (ret) { -+ if (ret != -ENOENT) -+ goto out; ++ priv->base = of_iomap(node, 0); ++ if (IS_ERR(priv->base)) ++ return PTR_ERR(priv->base); + -+ /* -+ * operating-points-v2 not supported, fallback to all CPUs share -+ * OPP for backward compatibility if the platform hasn't set -+ * sharing CPUs. -+ */ -+ if (dev_pm_opp_get_sharing_cpus(cpu_dev, priv->cpus)) -+ fallback = true; -+ } ++ if (of_property_read_string(node, "clock-output-names", &clk_name)) ++ priv->name = node->name; ++ else ++ priv->name = clk_name; + -+ /* -+ * Initialize OPP tables for all priv->cpus. They will be shared by -+ * all CPUs which have marked their CPUs shared with OPP bindings. -+ * -+ * For platforms not using operating-points-v2 bindings, we do this -+ * before updating priv->cpus. Otherwise, we will end up creating -+ * duplicate OPPs for the CPUs. -+ * -+ * OPPs might be populated at runtime, don't fail for error here unless -+ * it is -EPROBE_DEFER. -+ */ -+ ret = dev_pm_opp_of_cpumask_add_table(priv->cpus); -+ if (!ret) { -+ priv->have_static_opps = true; -+ } else if (ret == -EPROBE_DEFER) { -+ goto out; -+ } ++ link_info = rockchip_get_link_infos(priv->link, priv->name); ++ priv->shift = link_info->shift; ++ priv->pname = link_info->pname; + -+ /* -+ * The OPP table must be initialized, statically or dynamically, by this -+ * point. -+ */ -+ ret = dev_pm_opp_get_opp_count(cpu_dev); -+ if (ret <= 0) { -+ dev_err(cpu_dev, "OPP table can't be empty\n"); -+ ret = -ENODEV; -+ goto out; -+ } ++ pm_runtime_enable(&pdev->dev); ++ ret = pm_clk_create(&pdev->dev); ++ if (ret) ++ goto disable_pm_runtime; + -+ if (fallback) { -+ cpumask_setall(priv->cpus); -+ ret = dev_pm_opp_set_sharing_cpus(cpu_dev, priv->cpus); -+ if (ret) -+ dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", -+ __func__, ret); -+ } ++ ret = pm_clk_add(&pdev->dev, "link"); + -+#ifdef CONFIG_ARCH_ROCKCHIP -+ rockchip_cpufreq_adjust_table(cpu_dev); -+#endif ++ if (ret) ++ goto destroy_pm_clk; + -+ ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &priv->freq_table); -+ if (ret) { -+ dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); -+ goto out; -+ } ++ ret = register_clocks(priv, &pdev->dev); ++ if (ret) ++ goto destroy_pm_clk; + -+ list_add(&priv->node, &priv_list); + return 0; + -+out: -+ if (priv->have_static_opps) -+ dev_pm_opp_of_cpumask_remove_table(priv->cpus); -+ dev_pm_opp_put_regulators(priv->opp_token); -+free_cpumask: -+ free_cpumask_var(priv->cpus); ++destroy_pm_clk: ++ pm_clk_destroy(&pdev->dev); ++disable_pm_runtime: ++ pm_runtime_disable(&pdev->dev); ++ + return ret; +} + -+static void dt_cpufreq_release(void) ++static int rockchip_clk_link_remove(struct platform_device *pdev) +{ -+ struct private_data *priv, *tmp; ++ pm_clk_destroy(&pdev->dev); ++ pm_runtime_disable(&pdev->dev); + -+ list_for_each_entry_safe(priv, tmp, &priv_list, node) { -+ dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &priv->freq_table); -+ if (priv->have_static_opps) -+ dev_pm_opp_of_cpumask_remove_table(priv->cpus); -+ dev_pm_opp_put_regulators(priv->opp_token); -+ free_cpumask_var(priv->cpus); -+ list_del(&priv->node); -+ } ++ return 0; +} + -+static int dt_cpufreq_probe(struct platform_device *pdev) -+{ -+ struct cpufreq_dt_platform_data *data = dev_get_platdata(&pdev->dev); -+ int ret, cpu; -+ -+ /* Request resources early so we can return in case of -EPROBE_DEFER */ -+ for_each_possible_cpu(cpu) { -+ ret = dt_cpufreq_early_init(&pdev->dev, cpu); -+ if (ret) -+ goto err; -+ } -+ -+ if (data) { -+ if (data->have_governor_per_policy) -+ dt_cpufreq_driver.flags |= CPUFREQ_HAVE_GOVERNOR_PER_POLICY; -+ -+ dt_cpufreq_driver.resume = data->resume; -+ if (data->suspend) -+ dt_cpufreq_driver.suspend = data->suspend; -+ if (data->get_intermediate) { -+ dt_cpufreq_driver.target_intermediate = data->target_intermediate; -+ dt_cpufreq_driver.get_intermediate = data->get_intermediate; -+ } -+ } ++static const struct dev_pm_ops rockchip_clk_link_pm_ops = { ++ SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL) ++}; + -+ ret = cpufreq_register_driver(&dt_cpufreq_driver); -+ if (ret) { -+ dev_err(&pdev->dev, "failed register driver: %d\n", ret); -+ goto err; -+ } ++static struct platform_driver rockchip_clk_link_driver = { ++ .driver = { ++ .name = "clock-link", ++ .of_match_table = of_match_ptr(rockchip_clk_link_of_match), ++ .pm = &rockchip_clk_link_pm_ops, ++ }, ++ .probe = rockchip_clk_link_probe, ++ .remove = rockchip_clk_link_remove, ++}; + -+ return 0; -+err: -+ dt_cpufreq_release(); -+ return ret; ++static int __init rockchip_clk_link_drv_register(void) ++{ ++ return platform_driver_register(&rockchip_clk_link_driver); +} ++postcore_initcall_sync(rockchip_clk_link_drv_register); + -+static void dt_cpufreq_remove(struct platform_device *pdev) ++static void __exit rockchip_clk_link_drv_unregister(void) +{ -+ cpufreq_unregister_driver(&dt_cpufreq_driver); -+ dt_cpufreq_release(); ++ platform_driver_unregister(&rockchip_clk_link_driver); +} ++module_exit(rockchip_clk_link_drv_unregister); + -+static struct platform_driver dt_cpufreq_platdrv = { -+ .driver = { -+ .name = "cpufreq-dt", -+ }, -+ .probe = dt_cpufreq_probe, -+ .remove_new = dt_cpufreq_remove, -+}; -+module_platform_driver(dt_cpufreq_platdrv); -+ -+MODULE_ALIAS("platform:cpufreq-dt"); -+MODULE_AUTHOR("Viresh Kumar "); -+MODULE_AUTHOR("Shawn Guo "); -+MODULE_DESCRIPTION("Generic cpufreq driver"); ++MODULE_AUTHOR("Elaine Zhang "); ++MODULE_DESCRIPTION("Clock driver for Niu Dependencies"); +MODULE_LICENSE("GPL"); -diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c +diff --git a/drivers/clk/rockchip/clk-out.c b/drivers/clk/rockchip/clk-out.c new file mode 100644 -index 000000000..a128216fc +index 000000000..22dcd98fb --- /dev/null -+++ b/drivers/cpufreq/cpufreq_interactive.c -@@ -0,0 +1,1643 @@ ++++ b/drivers/clk/rockchip/clk-out.c +@@ -0,0 +1,99 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later +/* -+ * drivers/cpufreq/cpufreq_interactive.c -+ * -+ * Copyright (C) 2010-2016 Google, Inc. -+ * -+ * This software is licensed under the terms of the GNU General Public -+ * License version 2, as published by the Free Software Foundation, and -+ * may be copied, distributed, and modified under those terms. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * Author: Mike Chan (mike@android.com) ++ * Copyright (c) 2023 Rockchip Electronics Co., Ltd + */ + -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#include -+#include -+#include -+#ifdef CONFIG_ARCH_ROCKCHIP -+#include -+#endif -+#include ++#include ++#include +#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define CREATE_TRACE_POINTS -+#include -+ -+#define gov_attr_ro(_name) \ -+static struct governor_attr _name = \ -+__ATTR(_name, 0444, show_##_name, NULL) ++#include ++#include ++#include + -+#define gov_attr_wo(_name) \ -+static struct governor_attr _name = \ -+__ATTR(_name, 0200, NULL, store_##_name) ++static DEFINE_SPINLOCK(clk_out_lock); + -+#define gov_attr_rw(_name) \ -+static struct governor_attr _name = \ -+__ATTR(_name, 0644, show_##_name, store_##_name) ++static int rockchip_clk_out_probe(struct platform_device *pdev) ++{ ++ struct device *dev = &pdev->dev; ++ struct device_node *node = pdev->dev.of_node; ++ struct clk_hw *hw; ++ struct resource *res; ++ const char *clk_name = node->name; ++ const char *parent_name; ++ void __iomem *reg; ++ u32 shift = 0; ++ u8 clk_gate_flags = CLK_GATE_HIWORD_MASK; ++ int ret; + -+/* Separate instance required for each 'interactive' directory in sysfs */ -+struct interactive_tunables { -+ struct gov_attr_set attr_set; ++ ret = device_property_read_string(dev, "clock-output-names", &clk_name); ++ if (ret) ++ return ret; + -+ /* Hi speed to bump to from lo speed when load burst (default max) */ -+ unsigned int hispeed_freq; ++ ret = device_property_read_u32(dev, "rockchip,bit-shift", &shift); ++ if (ret) ++ return ret; + -+ /* Go to hi speed when CPU load at or above this value. */ -+#define DEFAULT_GO_HISPEED_LOAD 99 -+ unsigned long go_hispeed_load; ++ if (device_property_read_bool(dev, "rockchip,bit-set-to-disable")) ++ clk_gate_flags |= CLK_GATE_SET_TO_DISABLE; + -+ /* Target load. Lower values result in higher CPU speeds. */ -+ spinlock_t target_loads_lock; -+ unsigned int *target_loads; -+ int ntarget_loads; ++ ret = of_clk_parent_fill(node, &parent_name, 1); ++ if (ret != 1) ++ return -EINVAL; + -+ /* -+ * The minimum amount of time to spend at a frequency before we can ramp -+ * down. -+ */ -+#define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC) -+ unsigned long min_sample_time; ++ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ if (!res) ++ return -ENOMEM; + -+ /* The sample rate of the timer used to increase frequency */ -+ unsigned long sampling_rate; ++ reg = devm_ioremap(dev, res->start, resource_size(res)); ++ if (!reg) ++ return -ENOMEM; + -+ /* -+ * Wait this long before raising speed above hispeed, by default a -+ * single timer interval. -+ */ -+ spinlock_t above_hispeed_delay_lock; -+ unsigned int *above_hispeed_delay; -+ int nabove_hispeed_delay; ++ pm_runtime_enable(dev); + -+ /* Non-zero means indefinite speed boost active */ -+ int boost; -+ /* Duration of a boot pulse in usecs */ -+ int boostpulse_duration; -+ /* End time of boost pulse in ktime converted to usecs */ -+ u64 boostpulse_endtime; -+#ifdef CONFIG_ARCH_ROCKCHIP -+ /* Frequency to which a touch boost takes the cpus to */ -+ unsigned long touchboost_freq; -+ /* Duration of a touchboost pulse in usecs */ -+ int touchboostpulse_duration_val; -+ /* End time of touchboost pulse in ktime converted to usecs */ -+ u64 touchboostpulse_endtime; -+#endif -+ bool boosted; ++ hw = clk_hw_register_gate(dev, clk_name, parent_name, CLK_SET_RATE_PARENT, ++ reg, shift, clk_gate_flags, &clk_out_lock); ++ if (IS_ERR(hw)) { ++ ret = -EINVAL; ++ goto err_disable_pm_runtime; ++ } + -+ /* -+ * Max additional time to wait in idle, beyond sampling_rate, at speeds -+ * above minimum before wakeup to reduce speed, or -1 if unnecessary. -+ */ -+#define DEFAULT_TIMER_SLACK (4 * DEFAULT_SAMPLING_RATE) -+ unsigned long timer_slack_delay; -+ unsigned long timer_slack; -+ bool io_is_busy; -+}; ++ of_clk_add_hw_provider(node, of_clk_hw_simple_get, hw); + -+/* Separate instance required for each 'struct cpufreq_policy' */ -+struct interactive_policy { -+ struct cpufreq_policy *policy; -+ struct interactive_tunables *tunables; -+ struct list_head tunables_hook; -+}; ++ return 0; + -+/* Separate instance required for each CPU */ -+struct interactive_cpu { -+ struct update_util_data update_util; -+ struct interactive_policy *ipolicy; ++err_disable_pm_runtime: ++ pm_runtime_disable(dev); + -+ struct irq_work irq_work; -+ u64 last_sample_time; -+ unsigned long next_sample_jiffies; -+ bool work_in_progress; ++ return ret; ++} + -+ struct rw_semaphore enable_sem; -+ struct timer_list slack_timer; ++static int rockchip_clk_out_remove(struct platform_device *pdev) ++{ ++ struct device_node *node = pdev->dev.of_node; + -+ spinlock_t load_lock; /* protects the next 4 fields */ -+ u64 time_in_idle; -+ u64 time_in_idle_timestamp; -+ u64 cputime_speedadj; -+ u64 cputime_speedadj_timestamp; ++ of_clk_del_provider(node); ++ pm_runtime_disable(&pdev->dev); + -+ spinlock_t target_freq_lock; /*protects target freq */ -+ unsigned int target_freq; ++ return 0; ++} + -+ unsigned int floor_freq; -+ u64 pol_floor_val_time; /* policy floor_validate_time */ -+ u64 loc_floor_val_time; /* per-cpu floor_validate_time */ -+ u64 pol_hispeed_val_time; /* policy hispeed_validate_time */ -+ u64 loc_hispeed_val_time; /* per-cpu hispeed_validate_time */ -+ int cpu; ++static const struct of_device_id rockchip_clk_out_match[] = { ++ { .compatible = "rockchip,clk-out", }, ++ {}, +}; + -+static DEFINE_PER_CPU(struct interactive_cpu, interactive_cpu); -+ -+/* Realtime thread handles frequency scaling */ -+static struct task_struct *speedchange_task; -+static cpumask_t speedchange_cpumask; -+static spinlock_t speedchange_cpumask_lock; -+ -+/* Target load. Lower values result in higher CPU speeds. */ -+#define DEFAULT_TARGET_LOAD 90 -+static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD}; -+ -+#define DEFAULT_SAMPLING_RATE (20 * USEC_PER_MSEC) -+#define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_SAMPLING_RATE -+static unsigned int default_above_hispeed_delay[] = { -+ DEFAULT_ABOVE_HISPEED_DELAY ++static struct platform_driver rockchip_clk_out_driver = { ++ .driver = { ++ .name = "rockchip-clk-out", ++ .of_match_table = rockchip_clk_out_match, ++ }, ++ .probe = rockchip_clk_out_probe, ++ .remove = rockchip_clk_out_remove, +}; + -+/* Iterate over interactive policies for tunables */ -+#define for_each_ipolicy(__ip) \ -+ list_for_each_entry(__ip, &tunables->attr_set.policy_list, tunables_hook) ++module_platform_driver(rockchip_clk_out_driver); + -+static struct interactive_tunables *global_tunables; -+static DEFINE_MUTEX(global_tunables_lock); -+#ifdef CONFIG_ARCH_ROCKCHIP -+static struct interactive_tunables backup_tunables[2]; -+#endif ++MODULE_DESCRIPTION("Rockchip Clock Input-Output-Switch"); ++MODULE_AUTHOR("Sugar Zhang "); ++MODULE_LICENSE("GPL"); ++MODULE_DEVICE_TABLE(of, rockchip_clk_out_match); +diff --git a/drivers/clk/rockchip/clk-pvtm.c b/drivers/clk/rockchip/clk-pvtm.c +new file mode 100644 +index 000000000..c748589dd +--- /dev/null ++++ b/drivers/clk/rockchip/clk-pvtm.c +@@ -0,0 +1,311 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (c) 2018 Fuzhou Rockchip Electronics Co., Ltd ++ */ + -+static inline void update_slack_delay(struct interactive_tunables *tunables) -+{ -+ tunables->timer_slack_delay = usecs_to_jiffies(tunables->timer_slack + -+ tunables->sampling_rate); -+} ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+static bool timer_slack_required(struct interactive_cpu *icpu) -+{ -+ struct interactive_policy *ipolicy = icpu->ipolicy; -+ struct interactive_tunables *tunables = ipolicy->tunables; ++#define CLK_SEL_EXTERNAL_32K 0 ++#define CLK_SEL_INTERNAL_PVTM 1 + -+ if (tunables->timer_slack == 0) -+ return false; ++#define wr_msk_bit(v, off, msk) ((v) << (off) | (msk << (16 + (off)))) + -+ if (icpu->target_freq > ipolicy->policy->min) -+ return true; ++struct rockchip_clock_pvtm; + -+ return false; -+} ++struct rockchip_clock_pvtm_info { ++ u32 con; ++ u32 sta; ++ u32 sel_con; ++ u32 sel_shift; ++ u32 sel_value; ++ u32 sel_mask; ++ u32 div_shift; ++ u32 div_mask; + -+static void gov_slack_timer_start(struct interactive_cpu *icpu, int cpu) -+{ -+ struct interactive_tunables *tunables = icpu->ipolicy->tunables; ++ u32 (*get_value)(struct rockchip_clock_pvtm *pvtm, ++ unsigned int time_us); ++ int (*init_freq)(struct rockchip_clock_pvtm *pvtm); ++ int (*sel_enable)(struct rockchip_clock_pvtm *pvtm); ++}; + -+ icpu->slack_timer.expires = jiffies + tunables->timer_slack_delay; -+ add_timer_on(&icpu->slack_timer, cpu); -+} ++struct rockchip_clock_pvtm { ++ const struct rockchip_clock_pvtm_info *info; ++ struct regmap *grf; ++ struct clk *pvtm_clk; ++ struct clk *clk; ++ unsigned long rate; ++}; + -+static void gov_slack_timer_modify(struct interactive_cpu *icpu) ++static unsigned long xin32k_pvtm_recalc_rate(struct clk_hw *hw, ++ unsigned long parent_rate) +{ -+ struct interactive_tunables *tunables = icpu->ipolicy->tunables; -+ -+ mod_timer(&icpu->slack_timer, jiffies + tunables->timer_slack_delay); ++ return 32768; +} + -+static void slack_timer_resched(struct interactive_cpu *icpu, int cpu, -+ bool modify) -+{ -+ struct interactive_tunables *tunables = icpu->ipolicy->tunables; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&icpu->load_lock, flags); ++static const struct clk_ops xin32k_pvtm = { ++ .recalc_rate = xin32k_pvtm_recalc_rate, ++}; + -+ icpu->time_in_idle = get_cpu_idle_time(cpu, -+ &icpu->time_in_idle_timestamp, -+ tunables->io_is_busy); -+ icpu->cputime_speedadj = 0; -+ icpu->cputime_speedadj_timestamp = icpu->time_in_idle_timestamp; ++static void rockchip_clock_pvtm_delay(unsigned int delay) ++{ ++ unsigned int ms = delay / 1000; ++ unsigned int us = delay % 1000; + -+ if (timer_slack_required(icpu)) { -+ if (modify) -+ gov_slack_timer_modify(icpu); ++ if (ms > 0) { ++ if (ms < 20) ++ us += ms * 1000; + else -+ gov_slack_timer_start(icpu, cpu); ++ msleep(ms); + } + -+ spin_unlock_irqrestore(&icpu->load_lock, flags); -+} -+ -+static unsigned int -+freq_to_above_hispeed_delay(struct interactive_tunables *tunables, -+ unsigned int freq) -+{ -+ unsigned long flags; -+ unsigned int ret; -+ int i; -+ -+ spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); -+ -+ for (i = 0; i < tunables->nabove_hispeed_delay - 1 && -+ freq >= tunables->above_hispeed_delay[i + 1]; i += 2) -+ ; -+ -+ ret = tunables->above_hispeed_delay[i]; -+ spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); -+ -+ return ret; ++ if (us >= 10) ++ usleep_range(us, us + 100); ++ else ++ udelay(us); +} + -+static unsigned int freq_to_targetload(struct interactive_tunables *tunables, -+ unsigned int freq) ++static int rockchip_clock_sel_internal_pvtm(struct rockchip_clock_pvtm *pvtm) +{ -+ unsigned long flags; -+ unsigned int ret; -+ int i; -+ -+ spin_lock_irqsave(&tunables->target_loads_lock, flags); ++ int ret = 0; + -+ for (i = 0; i < tunables->ntarget_loads - 1 && -+ freq >= tunables->target_loads[i + 1]; i += 2) -+ ; ++ ret = regmap_write(pvtm->grf, pvtm->info->sel_con, ++ wr_msk_bit(pvtm->info->sel_value, ++ pvtm->info->sel_shift, ++ pvtm->info->sel_mask)); ++ if (ret != 0) ++ pr_err("%s: fail to write register\n", __func__); + -+ ret = tunables->target_loads[i]; -+ spin_unlock_irqrestore(&tunables->target_loads_lock, flags); + return ret; +} + -+/* -+ * If increasing frequencies never map to a lower target load then -+ * choose_freq() will find the minimum frequency that does not exceed its -+ * target load given the current load. -+ */ -+static unsigned int choose_freq(struct interactive_cpu *icpu, -+ unsigned int loadadjfreq) ++/* get pmu pvtm value */ ++static u32 rockchip_clock_pvtm_get_value(struct rockchip_clock_pvtm *pvtm, ++ u32 time_us) +{ -+ struct cpufreq_policy *policy = icpu->ipolicy->policy; -+ struct cpufreq_frequency_table *freq_table = policy->freq_table; -+ unsigned int prevfreq, freqmin = 0, freqmax = UINT_MAX, tl; -+ unsigned int freq = policy->cur; -+ int index; -+ -+ do { -+ prevfreq = freq; -+ tl = freq_to_targetload(icpu->ipolicy->tunables, freq); ++ const struct rockchip_clock_pvtm_info *info = pvtm->info; ++ u32 val = 0, sta = 0; ++ u32 clk_cnt, check_cnt; + -+ /* -+ * Find the lowest frequency where the computed load is less -+ * than or equal to the target load. -+ */ ++ /* 24m clk ,24cnt=1us */ ++ clk_cnt = time_us * 24; + -+ index = cpufreq_frequency_table_target(policy, loadadjfreq / tl, -+ CPUFREQ_RELATION_L); ++ regmap_write(pvtm->grf, info->con + 0x4, clk_cnt); ++ regmap_write(pvtm->grf, info->con, wr_msk_bit(3, 0, 0x3)); + -+ freq = freq_table[index].frequency; ++ rockchip_clock_pvtm_delay(time_us); + -+ if (freq > prevfreq) { -+ /* The previous frequency is too low */ -+ freqmin = prevfreq; ++ check_cnt = 100; ++ while (check_cnt) { ++ regmap_read(pvtm->grf, info->sta, &sta); ++ if (sta & 0x1) ++ break; ++ udelay(4); ++ check_cnt--; ++ } + -+ if (freq < freqmax) -+ continue; ++ if (check_cnt) { ++ regmap_read(pvtm->grf, info->sta + 0x4, &val); ++ } else { ++ pr_err("%s: wait pvtm_done timeout!\n", __func__); ++ val = 0; ++ } + -+ /* Find highest frequency that is less than freqmax */ -+ index = cpufreq_frequency_table_target(policy, -+ freqmax - 1, CPUFREQ_RELATION_H); ++ regmap_write(pvtm->grf, info->con, wr_msk_bit(0, 0, 0x3)); + -+ freq = freq_table[index].frequency; ++ return val; ++} + -+ if (freq == freqmin) { -+ /* -+ * The first frequency below freqmax has already -+ * been found to be too low. freqmax is the -+ * lowest speed we found that is fast enough. -+ */ -+ freq = freqmax; -+ break; -+ } -+ } else if (freq < prevfreq) { -+ /* The previous frequency is high enough. */ -+ freqmax = prevfreq; ++static int rockchip_clock_pvtm_init_freq(struct rockchip_clock_pvtm *pvtm) ++{ ++ u32 pvtm_cnt = 0; ++ u32 div, time_us; ++ int ret = 0; + -+ if (freq > freqmin) -+ continue; ++ time_us = 1000; ++ pvtm_cnt = pvtm->info->get_value(pvtm, time_us); ++ pr_debug("get pvtm_cnt = %d\n", pvtm_cnt); + -+ /* Find lowest frequency that is higher than freqmin */ -+ index = cpufreq_frequency_table_target(policy, -+ freqmin + 1, CPUFREQ_RELATION_L); ++ /* set pvtm_div to get rate */ ++ div = DIV_ROUND_UP(1000 * pvtm_cnt, pvtm->rate); ++ if (div > pvtm->info->div_mask) { ++ pr_err("pvtm_div out of bounary! set max instead\n"); ++ div = pvtm->info->div_mask; ++ } + -+ freq = freq_table[index].frequency; ++ pr_debug("set div %d, rate %luKHZ\n", div, pvtm->rate); ++ ret = regmap_write(pvtm->grf, pvtm->info->con, ++ wr_msk_bit(div, pvtm->info->div_shift, ++ pvtm->info->div_mask)); ++ if (ret != 0) ++ goto out; + -+ /* -+ * If freqmax is the first frequency above -+ * freqmin then we have already found that -+ * this speed is fast enough. -+ */ -+ if (freq == freqmax) -+ break; -+ } ++ /* pmu pvtm oscilator enable */ ++ ret = regmap_write(pvtm->grf, pvtm->info->con, ++ wr_msk_bit(1, 1, 0x1)); ++ if (ret != 0) ++ goto out; + -+ /* If same frequency chosen as previous then done. */ -+ } while (freq != prevfreq); ++ ret = pvtm->info->sel_enable(pvtm); ++out: ++ if (ret != 0) ++ pr_err("%s: fail to write register\n", __func__); + -+ return freq; ++ return ret; +} + -+static u64 update_load(struct interactive_cpu *icpu, int cpu) ++static int clock_pvtm_regitstor(struct device *dev, ++ struct rockchip_clock_pvtm *pvtm) +{ -+ struct interactive_tunables *tunables = icpu->ipolicy->tunables; -+ u64 now_idle, now, active_time, delta_idle, delta_time; ++ struct clk_init_data init = {}; ++ struct clk_hw *clk_hw; + -+ now_idle = get_cpu_idle_time(cpu, &now, tunables->io_is_busy); -+ delta_idle = (now_idle - icpu->time_in_idle); -+ delta_time = (now - icpu->time_in_idle_timestamp); ++ /* Init the xin32k_pvtm */ ++ pvtm->info->init_freq(pvtm); + -+ if (delta_time <= delta_idle) -+ active_time = 0; -+ else -+ active_time = delta_time - delta_idle; ++ init.parent_names = NULL; ++ init.num_parents = 0; ++ init.name = "xin32k_pvtm"; ++ init.ops = &xin32k_pvtm; + -+ icpu->cputime_speedadj += active_time * icpu->ipolicy->policy->cur; ++ clk_hw = devm_kzalloc(dev, sizeof(*clk_hw), GFP_KERNEL); ++ if (!clk_hw) ++ return -ENOMEM; ++ clk_hw->init = &init; + -+ icpu->time_in_idle = now_idle; -+ icpu->time_in_idle_timestamp = now; ++ /* optional override of the clockname */ ++ of_property_read_string_index(dev->of_node, "clock-output-names", ++ 0, &init.name); ++ pvtm->clk = devm_clk_register(dev, clk_hw); ++ if (IS_ERR(pvtm->clk)) ++ return PTR_ERR(pvtm->clk); + -+ return now; ++ return of_clk_add_provider(dev->of_node, of_clk_src_simple_get, ++ pvtm->clk); +} + -+/* Re-evaluate load to see if a frequency change is required or not */ -+static void eval_target_freq(struct interactive_cpu *icpu) -+{ -+ struct interactive_tunables *tunables = icpu->ipolicy->tunables; -+ struct cpufreq_policy *policy = icpu->ipolicy->policy; -+ struct cpufreq_frequency_table *freq_table = policy->freq_table; -+ u64 cputime_speedadj, now, max_fvtime; -+ unsigned int new_freq, loadadjfreq, index, delta_time; -+ unsigned long flags; -+ int cpu_load; -+ int cpu = smp_processor_id(); -+ -+ spin_lock_irqsave(&icpu->load_lock, flags); -+ now = update_load(icpu, smp_processor_id()); -+ delta_time = (unsigned int)(now - icpu->cputime_speedadj_timestamp); -+ cputime_speedadj = icpu->cputime_speedadj; -+ spin_unlock_irqrestore(&icpu->load_lock, flags); ++static const struct rockchip_clock_pvtm_info rk3368_pvtm_data = { ++ .con = 0x180, ++ .sta = 0x190, ++ .sel_con = 0x100, ++ .sel_shift = 6, ++ .sel_value = CLK_SEL_INTERNAL_PVTM, ++ .sel_mask = 0x1, ++ .div_shift = 2, ++ .div_mask = 0x3f, + -+ if (!delta_time) -+ return; ++ .sel_enable = rockchip_clock_sel_internal_pvtm, ++ .get_value = rockchip_clock_pvtm_get_value, ++ .init_freq = rockchip_clock_pvtm_init_freq, ++}; + -+ spin_lock_irqsave(&icpu->target_freq_lock, flags); -+ do_div(cputime_speedadj, delta_time); -+ loadadjfreq = (unsigned int)cputime_speedadj * 100; -+ cpu_load = loadadjfreq / policy->cur; -+ tunables->boosted = tunables->boost || -+ now < tunables->boostpulse_endtime; ++static const struct of_device_id rockchip_clock_pvtm_match[] = { ++ { ++ .compatible = "rockchip,rk3368-pvtm-clock", ++ .data = (void *)&rk3368_pvtm_data, ++ }, ++ {} ++}; ++MODULE_DEVICE_TABLE(of, rockchip_clock_pvtm_match); + -+ if (cpu_load >= tunables->go_hispeed_load || tunables->boosted) { -+ if (policy->cur < tunables->hispeed_freq) { -+ new_freq = tunables->hispeed_freq; -+ } else { -+ new_freq = choose_freq(icpu, loadadjfreq); ++static int rockchip_clock_pvtm_probe(struct platform_device *pdev) ++{ ++ struct device *dev = &pdev->dev; ++ struct device_node *np = pdev->dev.of_node; ++ const struct of_device_id *match; ++ struct rockchip_clock_pvtm *pvtm; ++ int error; ++ u32 rate; + -+ if (new_freq < tunables->hispeed_freq) -+ new_freq = tunables->hispeed_freq; -+ } -+ } else { -+ new_freq = choose_freq(icpu, loadadjfreq); -+ if (new_freq > tunables->hispeed_freq && -+ policy->cur < tunables->hispeed_freq) -+ new_freq = tunables->hispeed_freq; -+ } ++ pvtm = devm_kzalloc(dev, sizeof(*pvtm), GFP_KERNEL); ++ if (!pvtm) ++ return -ENOMEM; + -+#ifdef CONFIG_ARCH_ROCKCHIP -+ if (now < tunables->touchboostpulse_endtime && -+ new_freq < tunables->touchboost_freq) { -+ new_freq = tunables->touchboost_freq; -+ } -+#endif -+ if (policy->cur >= tunables->hispeed_freq && -+ new_freq > policy->cur && -+ now - icpu->pol_hispeed_val_time < freq_to_above_hispeed_delay(tunables, policy->cur)) { -+ trace_cpufreq_interactive_notyet(cpu, cpu_load, -+ icpu->target_freq, policy->cur, new_freq); -+ goto exit; -+ } ++ match = of_match_node(rockchip_clock_pvtm_match, np); ++ if (!match) ++ return -ENXIO; + -+ icpu->loc_hispeed_val_time = now; ++ pvtm->info = (const struct rockchip_clock_pvtm_info *)match->data; ++ if (!pvtm->info) ++ return -EINVAL; + -+ index = cpufreq_frequency_table_target(policy, new_freq, -+ CPUFREQ_RELATION_L); -+ new_freq = freq_table[index].frequency; ++ if (!dev->parent || !dev->parent->of_node) ++ return -EINVAL; + -+ /* -+ * Do not scale below floor_freq unless we have been at or above the -+ * floor frequency for the minimum sample time since last validated. -+ */ -+ max_fvtime = max(icpu->pol_floor_val_time, icpu->loc_floor_val_time); -+ if (new_freq < icpu->floor_freq && icpu->target_freq >= policy->cur) { -+ if (now - max_fvtime < tunables->min_sample_time) { -+ trace_cpufreq_interactive_notyet(cpu, cpu_load, -+ icpu->target_freq, policy->cur, new_freq); -+ goto exit; -+ } -+ } ++ pvtm->grf = syscon_node_to_regmap(dev->parent->of_node); ++ if (IS_ERR(pvtm->grf)) ++ return PTR_ERR(pvtm->grf); + -+ /* -+ * Update the timestamp for checking whether speed has been held at -+ * or above the selected frequency for a minimum of min_sample_time, -+ * if not boosted to hispeed_freq. If boosted to hispeed_freq then we -+ * allow the speed to drop as soon as the boostpulse duration expires -+ * (or the indefinite boost is turned off). -+ */ ++ if (!of_property_read_u32(np, "pvtm-rate", &rate)) ++ pvtm->rate = rate; ++ else ++ pvtm->rate = 32768; + -+ if (!tunables->boosted || new_freq > tunables->hispeed_freq) { -+ icpu->floor_freq = new_freq; -+ if (icpu->target_freq >= policy->cur || new_freq >= policy->cur) -+ icpu->loc_floor_val_time = now; ++ pvtm->pvtm_clk = devm_clk_get(&pdev->dev, "pvtm_pmu_clk"); ++ if (IS_ERR(pvtm->pvtm_clk)) { ++ error = PTR_ERR(pvtm->pvtm_clk); ++ if (error != -EPROBE_DEFER) ++ dev_err(&pdev->dev, ++ "failed to get pvtm core clock: %d\n", ++ error); ++ goto out_probe; + } + -+ if (icpu->target_freq == new_freq && -+ icpu->target_freq <= policy->cur) { -+ trace_cpufreq_interactive_already(cpu, cpu_load, -+ icpu->target_freq, policy->cur, new_freq); -+ goto exit; ++ error = clk_prepare_enable(pvtm->pvtm_clk); ++ if (error) { ++ dev_err(&pdev->dev, "failed to enable the clock: %d\n", ++ error); ++ goto out_probe; + } + -+ trace_cpufreq_interactive_target(cpu, cpu_load, icpu->target_freq, -+ policy->cur, new_freq); -+ -+ icpu->target_freq = new_freq; -+ spin_unlock_irqrestore(&icpu->target_freq_lock, flags); -+ -+ spin_lock_irqsave(&speedchange_cpumask_lock, flags); -+ cpumask_set_cpu(cpu, &speedchange_cpumask); -+ spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); ++ platform_set_drvdata(pdev, pvtm); + -+ wake_up_process(speedchange_task); -+ return; ++ error = clock_pvtm_regitstor(&pdev->dev, pvtm); ++ if (error) { ++ dev_err(&pdev->dev, "failed to registor clock: %d\n", ++ error); ++ goto out_clk_put; ++ } + -+exit: -+ spin_unlock_irqrestore(&icpu->target_freq_lock, flags); -+} ++ return error; + -+static void cpufreq_interactive_update(struct interactive_cpu *icpu) -+{ -+ eval_target_freq(icpu); -+ slack_timer_resched(icpu, smp_processor_id(), true); ++out_clk_put: ++ clk_disable_unprepare(pvtm->pvtm_clk); ++out_probe: ++ return error; +} + -+static void cpufreq_interactive_idle_end(void) ++static int rockchip_clock_pvtm_remove(struct platform_device *pdev) +{ -+ struct interactive_cpu *icpu = &per_cpu(interactive_cpu, -+ smp_processor_id()); -+ unsigned long sampling_rate; -+ -+ if (!down_read_trylock(&icpu->enable_sem)) -+ return; ++ struct rockchip_clock_pvtm *pvtm = platform_get_drvdata(pdev); ++ struct device_node *np = pdev->dev.of_node; + -+ if (icpu->ipolicy) { -+ /* -+ * We haven't sampled load for more than sampling_rate time, do -+ * it right now. -+ */ -+ if (time_after_eq(jiffies, icpu->next_sample_jiffies)) { -+ sampling_rate = icpu->ipolicy->tunables->sampling_rate; -+ icpu->last_sample_time = local_clock(); -+ icpu->next_sample_jiffies = usecs_to_jiffies(sampling_rate) + jiffies; -+ cpufreq_interactive_update(icpu); -+ } -+ } ++ of_clk_del_provider(np); ++ clk_disable_unprepare(pvtm->pvtm_clk); + -+ up_read(&icpu->enable_sem); ++ return 0; +} + -+static void cpufreq_interactive_get_policy_info(struct cpufreq_policy *policy, -+ unsigned int *pmax_freq, -+ u64 *phvt, u64 *pfvt) -+{ -+ struct interactive_cpu *icpu; -+ u64 hvt = ~0ULL, fvt = 0; -+ unsigned int max_freq = 0, i; ++static struct platform_driver rockchip_clock_pvtm_driver = { ++ .driver = { ++ .name = "rockchip-clcok-pvtm", ++ .of_match_table = rockchip_clock_pvtm_match, ++ }, ++ .probe = rockchip_clock_pvtm_probe, ++ .remove = rockchip_clock_pvtm_remove, ++}; + -+ for_each_cpu(i, policy->cpus) { -+ icpu = &per_cpu(interactive_cpu, i); ++module_platform_driver(rockchip_clock_pvtm_driver); + -+ fvt = max(fvt, icpu->loc_floor_val_time); -+ if (icpu->target_freq > max_freq) { -+ max_freq = icpu->target_freq; -+ hvt = icpu->loc_hispeed_val_time; -+ } else if (icpu->target_freq == max_freq) { -+ hvt = min(hvt, icpu->loc_hispeed_val_time); -+ } -+ } ++MODULE_DESCRIPTION("Rockchip Clock Pvtm Driver"); ++MODULE_LICENSE("GPL v2"); +diff --git a/drivers/clk/rockchip/clk-rk1808.c b/drivers/clk/rockchip/clk-rk1808.c +new file mode 100644 +index 000000000..e177a3dd6 +--- /dev/null ++++ b/drivers/clk/rockchip/clk-rk1808.c +@@ -0,0 +1,1249 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (c) 2018 Fuzhou Rockchip Electronics Co., Ltd ++ * Author: Elaine Zhang ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "clk.h" + -+ *pmax_freq = max_freq; -+ *phvt = hvt; -+ *pfvt = fvt; -+} ++#define RK1808_GRF_SOC_STATUS0 0x480 ++#define RK1808_PMUGRF_SOC_CON0 0x100 ++#define RK1808_UART_FRAC_MAX_PRATE 800000000 ++#define RK1808_PDM_FRAC_MAX_PRATE 300000000 ++#define RK1808_I2S_FRAC_MAX_PRATE 600000000 ++#define RK1808_VOP_RAW_FRAC_MAX_PRATE 300000000 ++#define RK1808_VOP_LITE_FRAC_MAX_PRATE 400000000 + -+static void cpufreq_interactive_adjust_cpu(unsigned int cpu, -+ struct cpufreq_policy *policy) -+{ -+ struct interactive_cpu *icpu; -+ u64 hvt, fvt; -+ unsigned int max_freq; -+ int i; ++enum rk1808_plls { ++ apll, dpll, cpll, gpll, npll, ppll, ++}; + -+ cpufreq_interactive_get_policy_info(policy, &max_freq, &hvt, &fvt); ++static struct rockchip_pll_rate_table rk1808_pll_rates[] = { ++ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ ++ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1488000000, 1, 62, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1464000000, 1, 61, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1440000000, 1, 60, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1392000000, 1, 58, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1368000000, 1, 57, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1344000000, 1, 56, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1320000000, 1, 55, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1272000000, 1, 53, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1248000000, 1, 52, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1188000000, 1, 99, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1104000000, 1, 46, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1100000000, 2, 275, 3, 1, 1, 0), ++ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1000000000, 1, 125, 3, 1, 1, 0), ++ RK3036_PLL_RATE(984000000, 1, 82, 2, 1, 1, 0), ++ RK3036_PLL_RATE(960000000, 1, 80, 2, 1, 1, 0), ++ RK3036_PLL_RATE(936000000, 1, 78, 2, 1, 1, 0), ++ RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), ++ RK3036_PLL_RATE(900000000, 1, 75, 2, 1, 1, 0), ++ RK3036_PLL_RATE(888000000, 1, 74, 2, 1, 1, 0), ++ RK3036_PLL_RATE(864000000, 1, 72, 2, 1, 1, 0), ++ RK3036_PLL_RATE(840000000, 1, 70, 2, 1, 1, 0), ++ RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), ++ RK3036_PLL_RATE(800000000, 1, 100, 3, 1, 1, 0), ++ RK3036_PLL_RATE(700000000, 1, 175, 2, 1, 1, 0), ++ RK3036_PLL_RATE(696000000, 1, 58, 2, 1, 1, 0), ++ RK3036_PLL_RATE(624000000, 1, 52, 2, 1, 1, 0), ++ RK3036_PLL_RATE(600000000, 1, 75, 3, 1, 1, 0), ++ RK3036_PLL_RATE(594000000, 1, 99, 4, 1, 1, 0), ++ RK3036_PLL_RATE(504000000, 1, 63, 3, 1, 1, 0), ++ RK3036_PLL_RATE(500000000, 1, 125, 6, 1, 1, 0), ++ RK3036_PLL_RATE(416000000, 1, 52, 3, 1, 1, 0), ++ RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), ++ RK3036_PLL_RATE(312000000, 1, 52, 2, 2, 1, 0), ++ RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), ++ RK3036_PLL_RATE(200000000, 1, 200, 6, 4, 1, 0), ++ RK3036_PLL_RATE(100000000, 1, 150, 6, 6, 1, 0), ++ RK3036_PLL_RATE(96000000, 1, 64, 4, 4, 1, 0), ++ { /* sentinel */ }, ++}; + -+ for_each_cpu(i, policy->cpus) { -+ icpu = &per_cpu(interactive_cpu, i); -+ icpu->pol_floor_val_time = fvt; -+ } ++#define RK1808_DIV_ACLKM_MASK 0x7 ++#define RK1808_DIV_ACLKM_SHIFT 12 ++#define RK1808_DIV_PCLK_DBG_MASK 0xf ++#define RK1808_DIV_PCLK_DBG_SHIFT 8 + -+ if (max_freq != policy->cur) { -+ __cpufreq_driver_target(policy, max_freq, CPUFREQ_RELATION_H); -+ for_each_cpu(i, policy->cpus) { -+ icpu = &per_cpu(interactive_cpu, i); -+ icpu->pol_hispeed_val_time = hvt; -+ } -+ } ++#define RK1808_CLKSEL0(_aclk_core, _pclk_dbg) \ ++{ \ ++ .reg = RK1808_CLKSEL_CON(0), \ ++ .val = HIWORD_UPDATE(_aclk_core, RK1808_DIV_ACLKM_MASK, \ ++ RK1808_DIV_ACLKM_SHIFT) | \ ++ HIWORD_UPDATE(_pclk_dbg, RK1808_DIV_PCLK_DBG_MASK, \ ++ RK1808_DIV_PCLK_DBG_SHIFT), \ ++} + -+ trace_cpufreq_interactive_setspeed(cpu, max_freq, policy->cur); ++#define RK1808_CPUCLK_RATE(_prate, _aclk_core, _pclk_dbg) \ ++{ \ ++ .prate = _prate, \ ++ .divs = { \ ++ RK1808_CLKSEL0(_aclk_core, _pclk_dbg), \ ++ }, \ +} + -+static int cpufreq_interactive_speedchange_task(void *data) -+{ -+ unsigned int cpu; -+ cpumask_t tmp_mask; -+ unsigned long flags; -+ -+again: -+ set_current_state(TASK_INTERRUPTIBLE); -+ spin_lock_irqsave(&speedchange_cpumask_lock, flags); -+ -+ if (cpumask_empty(&speedchange_cpumask)) { -+ spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); -+ schedule(); -+ -+ if (kthread_should_stop()) -+ return 0; -+ -+ spin_lock_irqsave(&speedchange_cpumask_lock, flags); -+ } -+ -+ set_current_state(TASK_RUNNING); -+ tmp_mask = speedchange_cpumask; -+ cpumask_clear(&speedchange_cpumask); -+ spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); ++static struct rockchip_cpuclk_rate_table rk1808_cpuclk_rates[] __initdata = { ++ RK1808_CPUCLK_RATE(1608000000, 1, 7), ++ RK1808_CPUCLK_RATE(1512000000, 1, 7), ++ RK1808_CPUCLK_RATE(1488000000, 1, 5), ++ RK1808_CPUCLK_RATE(1416000000, 1, 5), ++ RK1808_CPUCLK_RATE(1392000000, 1, 5), ++ RK1808_CPUCLK_RATE(1296000000, 1, 5), ++ RK1808_CPUCLK_RATE(1200000000, 1, 5), ++ RK1808_CPUCLK_RATE(1104000000, 1, 5), ++ RK1808_CPUCLK_RATE(1008000000, 1, 5), ++ RK1808_CPUCLK_RATE(912000000, 1, 5), ++ RK1808_CPUCLK_RATE(816000000, 1, 3), ++ RK1808_CPUCLK_RATE(696000000, 1, 3), ++ RK1808_CPUCLK_RATE(600000000, 1, 3), ++ RK1808_CPUCLK_RATE(408000000, 1, 1), ++ RK1808_CPUCLK_RATE(312000000, 1, 1), ++ RK1808_CPUCLK_RATE(216000000, 1, 1), ++ RK1808_CPUCLK_RATE(96000000, 1, 1), ++}; + -+ for_each_cpu(cpu, &tmp_mask) { -+ struct interactive_cpu *icpu = &per_cpu(interactive_cpu, cpu); -+ struct cpufreq_policy *policy; ++static const struct rockchip_cpuclk_reg_data rk1808_cpuclk_data = { ++ .core_reg[0] = RK1808_CLKSEL_CON(0), ++ .div_core_shift[0] = 0, ++ .div_core_mask[0] = 0xf, ++ .num_cores = 1, ++ .mux_core_alt = 2, ++ .mux_core_main = 0, ++ .mux_core_shift = 6, ++ .mux_core_mask = 0x3, ++}; + -+ policy = cpufreq_cpu_get(cpu); -+ if (!policy) -+ continue; ++PNAME(mux_pll_p) = { "xin24m", "xin32k"}; ++PNAME(mux_usb480m_p) = { "xin24m", "usb480m_phy", "xin32k" }; ++PNAME(mux_gpll_cpll_p) = { "gpll", "cpll" }; ++PNAME(mux_gpll_cpll_apll_p) = { "gpll", "cpll", "apll" }; ++PNAME(mux_npu_p) = { "clk_npu_div", "clk_npu_np5" }; ++PNAME(mux_ddr_p) = { "dpll_ddr", "gpll_ddr" }; ++PNAME(mux_cpll_gpll_npll_p) = { "cpll", "gpll", "npll" }; ++PNAME(mux_gpll_cpll_npll_p) = { "gpll", "cpll", "npll" }; ++PNAME(mux_dclk_vopraw_p) = { "dclk_vopraw_src", "dclk_vopraw_frac", "xin24m" }; ++PNAME(mux_dclk_voplite_p) = { "dclk_voplite_src", "dclk_voplite_frac", "xin24m" }; ++PNAME(mux_24m_npll_gpll_usb480m_p) = { "xin24m", "npll", "gpll", "usb480m" }; ++PNAME(mux_usb3_otg0_suspend_p) = { "xin32k", "xin24m" }; ++PNAME(mux_pcie_aux_p) = { "xin24m", "clk_pcie_src" }; ++PNAME(mux_gpll_cpll_npll_24m_p) = { "gpll", "cpll", "npll", "xin24m" }; ++PNAME(mux_sdio_p) = { "clk_sdio_div", "clk_sdio_div50" }; ++PNAME(mux_sdmmc_p) = { "clk_sdmmc_div", "clk_sdmmc_div50" }; ++PNAME(mux_emmc_p) = { "clk_emmc_div", "clk_emmc_div50" }; ++PNAME(mux_cpll_npll_ppll_p) = { "cpll", "npll", "ppll" }; ++PNAME(mux_gmac_p) = { "clk_gmac_src", "gmac_clkin" }; ++PNAME(mux_gmac_rgmii_speed_p) = { "clk_gmac_tx_src", "clk_gmac_tx_src", "clk_gmac_tx_div50", "clk_gmac_tx_div5" }; ++PNAME(mux_gmac_rmii_speed_p) = { "clk_gmac_rx_div20", "clk_gmac_rx_div2" }; ++PNAME(mux_gmac_rx_tx_p) = { "clk_gmac_rgmii_speed", "clk_gmac_rmii_speed" }; ++PNAME(mux_gpll_usb480m_cpll_npll_p) = { "gpll", "usb480m", "cpll", "npll" }; ++PNAME(mux_uart1_p) = { "clk_uart1_src", "clk_uart1_np5", "clk_uart1_frac", "xin24m" }; ++PNAME(mux_uart2_p) = { "clk_uart2_src", "clk_uart2_np5", "clk_uart2_frac", "xin24m" }; ++PNAME(mux_uart3_p) = { "clk_uart3_src", "clk_uart3_np5", "clk_uart3_frac", "xin24m" }; ++PNAME(mux_uart4_p) = { "clk_uart4_src", "clk_uart4_np5", "clk_uart4_frac", "xin24m" }; ++PNAME(mux_uart5_p) = { "clk_uart5_src", "clk_uart5_np5", "clk_uart5_frac", "xin24m" }; ++PNAME(mux_uart6_p) = { "clk_uart6_src", "clk_uart6_np5", "clk_uart6_frac", "xin24m" }; ++PNAME(mux_uart7_p) = { "clk_uart7_src", "clk_uart7_np5", "clk_uart7_frac", "xin24m" }; ++PNAME(mux_gpll_xin24m_p) = { "gpll", "xin24m" }; ++PNAME(mux_gpll_cpll_xin24m_p) = { "gpll", "cpll", "xin24m" }; ++PNAME(mux_gpll_xin24m_cpll_npll_p) = { "gpll", "xin24m", "cpll", "npll" }; ++PNAME(mux_pdm_p) = { "clk_pdm_src", "clk_pdm_frac" }; ++PNAME(mux_i2s0_8ch_tx_p) = { "clk_i2s0_8ch_tx_src", "clk_i2s0_8ch_tx_frac", "mclk_i2s0_8ch_in", "xin12m" }; ++PNAME(mux_i2s0_8ch_tx_rx_p) = { "clk_i2s0_8ch_tx_mux", "clk_i2s0_8ch_rx_mux"}; ++PNAME(mux_i2s0_8ch_tx_out_p) = { "clk_i2s0_8ch_tx", "xin12m", "clk_i2s0_8ch_rx" }; ++PNAME(mux_i2s0_8ch_rx_p) = { "clk_i2s0_8ch_rx_src", "clk_i2s0_8ch_rx_frac", "mclk_i2s0_8ch_in", "xin12m" }; ++PNAME(mux_i2s0_8ch_rx_tx_p) = { "clk_i2s0_8ch_rx_mux", "clk_i2s0_8ch_tx_mux"}; ++PNAME(mux_i2s0_8ch_rx_out_p) = { "clk_i2s0_8ch_rx", "xin12m", "clk_i2s0_8ch_tx" }; ++PNAME(mux_i2s1_2ch_p) = { "clk_i2s1_2ch_src", "clk_i2s1_2ch_frac", "mclk_i2s1_2ch_in", "xin12m" }; ++PNAME(mux_i2s1_2ch_out_p) = { "clk_i2s1_2ch", "xin12m" }; ++PNAME(mux_rtc32k_pmu_p) = { "xin32k", "pmu_pvtm_32k", "clk_rtc32k_frac" }; ++PNAME(mux_wifi_pmu_p) = { "xin24m", "clk_wifi_pmu_src" }; ++PNAME(mux_gpll_usb480m_cpll_ppll_p) = { "gpll", "usb480m", "cpll", "ppll" }; ++PNAME(mux_uart0_pmu_p) = { "clk_uart0_pmu_src", "clk_uart0_np5", "clk_uart0_frac", "xin24m" }; ++PNAME(mux_usbphy_ref_p) = { "xin24m", "clk_ref24m_pmu" }; ++PNAME(mux_mipidsiphy_ref_p) = { "xin24m", "clk_ref24m_pmu" }; ++PNAME(mux_pciephy_ref_p) = { "xin24m", "clk_pciephy_src" }; ++PNAME(mux_ppll_xin24m_p) = { "ppll", "xin24m" }; ++PNAME(mux_xin24m_32k_p) = { "xin24m", "xin32k" }; ++PNAME(mux_clk_32k_ioe_p) = { "clk_rtc32k_pmu", "xin32k" }; + -+ down_write(&policy->rwsem); ++static struct rockchip_pll_clock rk1808_pll_clks[] __initdata = { ++ [apll] = PLL(pll_rk3036, PLL_APLL, "apll", mux_pll_p, ++ 0, RK1808_PLL_CON(0), ++ RK1808_MODE_CON, 0, 0, 0, rk1808_pll_rates), ++ [dpll] = PLL(pll_rk3036, PLL_DPLL, "dpll", mux_pll_p, ++ 0, RK1808_PLL_CON(8), ++ RK1808_MODE_CON, 2, 1, 0, NULL), ++ [cpll] = PLL(pll_rk3036, PLL_CPLL, "cpll", mux_pll_p, ++ 0, RK1808_PLL_CON(16), ++ RK1808_MODE_CON, 4, 2, 0, rk1808_pll_rates), ++ [gpll] = PLL(pll_rk3036, PLL_GPLL, "gpll", mux_pll_p, ++ 0, RK1808_PLL_CON(24), ++ RK1808_MODE_CON, 6, 3, 0, rk1808_pll_rates), ++ [npll] = PLL(pll_rk3036, PLL_NPLL, "npll", mux_pll_p, ++ 0, RK1808_PLL_CON(32), ++ RK1808_MODE_CON, 8, 5, 0, rk1808_pll_rates), ++ [ppll] = PLL(pll_rk3036, PLL_PPLL, "ppll", mux_pll_p, ++ 0, RK1808_PMU_PLL_CON(0), ++ RK1808_PMU_MODE_CON, 0, 4, 0, rk1808_pll_rates), ++}; + -+ if (likely(down_read_trylock(&icpu->enable_sem))) { -+ if (likely(icpu->ipolicy)) -+ cpufreq_interactive_adjust_cpu(cpu, policy); -+ up_read(&icpu->enable_sem); -+ } ++#define MFLAGS CLK_MUX_HIWORD_MASK ++#define DFLAGS CLK_DIVIDER_HIWORD_MASK ++#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) + -+ up_write(&policy->rwsem); -+ cpufreq_cpu_put(policy); -+ } ++static struct rockchip_clk_branch rk1808_uart1_fracmux __initdata = ++ MUX(0, "clk_uart1_mux", mux_uart1_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(39), 14, 2, MFLAGS); + -+ goto again; -+} ++static struct rockchip_clk_branch rk1808_uart2_fracmux __initdata = ++ MUX(0, "clk_uart2_mux", mux_uart2_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(42), 14, 2, MFLAGS); + -+static void cpufreq_interactive_boost(struct interactive_tunables *tunables) -+{ -+ struct interactive_policy *ipolicy; -+ struct cpufreq_policy *policy; -+ struct interactive_cpu *icpu; -+ unsigned long flags[2]; -+ bool wakeup = false; -+ int i; ++static struct rockchip_clk_branch rk1808_uart3_fracmux __initdata = ++ MUX(0, "clk_uart3_mux", mux_uart3_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(45), 14, 2, MFLAGS); + -+ tunables->boosted = true; ++static struct rockchip_clk_branch rk1808_uart4_fracmux __initdata = ++ MUX(0, "clk_uart4_mux", mux_uart4_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(48), 14, 2, MFLAGS); + -+ spin_lock_irqsave(&speedchange_cpumask_lock, flags[0]); ++static struct rockchip_clk_branch rk1808_uart5_fracmux __initdata = ++ MUX(0, "clk_uart5_mux", mux_uart5_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(51), 14, 2, MFLAGS); + -+ for_each_ipolicy(ipolicy) { -+ policy = ipolicy->policy; ++static struct rockchip_clk_branch rk1808_uart6_fracmux __initdata = ++ MUX(0, "clk_uart6_mux", mux_uart6_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(54), 14, 2, MFLAGS); + -+ for_each_cpu(i, policy->cpus) { -+ icpu = &per_cpu(interactive_cpu, i); ++static struct rockchip_clk_branch rk1808_uart7_fracmux __initdata = ++ MUX(0, "clk_uart7_mux", mux_uart7_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(57), 14, 2, MFLAGS); + -+ if (!down_read_trylock(&icpu->enable_sem)) -+ continue; ++static struct rockchip_clk_branch rk1808_dclk_vopraw_fracmux __initdata = ++ MUX(0, "dclk_vopraw_mux", mux_dclk_vopraw_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(5), 14, 2, MFLAGS); + -+ if (!icpu->ipolicy) { -+ up_read(&icpu->enable_sem); -+ continue; -+ } ++static struct rockchip_clk_branch rk1808_dclk_voplite_fracmux __initdata = ++ MUX(0, "dclk_voplite_mux", mux_dclk_voplite_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(7), 14, 2, MFLAGS); + -+ spin_lock_irqsave(&icpu->target_freq_lock, flags[1]); -+ if (icpu->target_freq < tunables->hispeed_freq) { -+ icpu->target_freq = tunables->hispeed_freq; -+ cpumask_set_cpu(i, &speedchange_cpumask); -+ icpu->pol_hispeed_val_time = ktime_to_us(ktime_get()); -+ wakeup = true; -+ } -+ spin_unlock_irqrestore(&icpu->target_freq_lock, flags[1]); ++static struct rockchip_clk_branch rk1808_pdm_fracmux __initdata = ++ MUX(0, "clk_pdm_mux", mux_pdm_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(30), 15, 1, MFLAGS); + -+ up_read(&icpu->enable_sem); -+ } -+ } ++static struct rockchip_clk_branch rk1808_i2s0_8ch_tx_fracmux __initdata = ++ MUX(SCLK_I2S0_8CH_TX_MUX, "clk_i2s0_8ch_tx_mux", mux_i2s0_8ch_tx_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(32), 10, 2, MFLAGS); + -+ spin_unlock_irqrestore(&speedchange_cpumask_lock, flags[0]); ++static struct rockchip_clk_branch rk1808_i2s0_8ch_rx_fracmux __initdata = ++ MUX(SCLK_I2S0_8CH_RX_MUX, "clk_i2s0_8ch_rx_mux", mux_i2s0_8ch_rx_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(34), 10, 2, MFLAGS); + -+ if (wakeup) -+ wake_up_process(speedchange_task); -+} ++static struct rockchip_clk_branch rk1808_i2s1_2ch_fracmux __initdata = ++ MUX(0, "clk_i2s1_2ch_mux", mux_i2s1_2ch_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(36), 10, 2, MFLAGS); + -+static int cpufreq_interactive_notifier(struct notifier_block *nb, -+ unsigned long val, void *data) -+{ -+ struct cpufreq_freqs *freq = data; -+ struct cpufreq_policy *policy = freq->policy; -+ struct interactive_cpu *icpu; -+ unsigned long flags; -+ int cpu; ++static struct rockchip_clk_branch rk1808_rtc32k_pmu_fracmux __initdata = ++ MUX(SCLK_RTC32K_PMU, "clk_rtc32k_pmu", mux_rtc32k_pmu_p, CLK_SET_RATE_PARENT, ++ RK1808_PMU_CLKSEL_CON(0), 14, 2, MFLAGS); + -+ if (val != CPUFREQ_POSTCHANGE) -+ return 0; ++static struct rockchip_clk_branch rk1808_uart0_pmu_fracmux __initdata = ++ MUX(0, "clk_uart0_pmu_mux", mux_uart0_pmu_p, CLK_SET_RATE_PARENT, ++ RK1808_PMU_CLKSEL_CON(4), 14, 2, MFLAGS); + -+ for_each_cpu(cpu, policy->cpus) { -+ icpu = &per_cpu(interactive_cpu, cpu); ++static struct rockchip_clk_branch rk1808_clk_branches[] __initdata = { ++ /* ++ * Clock-Architecture Diagram 1 ++ */ + -+ if (!down_read_trylock(&icpu->enable_sem)) -+ continue; ++ MUX(USB480M, "usb480m", mux_usb480m_p, CLK_SET_RATE_PARENT, ++ RK1808_MODE_CON, 10, 2, MFLAGS), ++ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), + -+ if (!icpu->ipolicy) { -+ up_read(&icpu->enable_sem); -+ continue; -+ } ++ /* ++ * Clock-Architecture Diagram 2 ++ */ + -+ spin_lock_irqsave(&icpu->load_lock, flags); -+ update_load(icpu, cpu); -+ spin_unlock_irqrestore(&icpu->load_lock, flags); ++ GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(0), 0, GFLAGS), ++ GATE(0, "cpll_core", "cpll", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(0), 0, GFLAGS), ++ GATE(0, "gpll_core", "gpll", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE_NOMUX(0, "pclk_core_dbg", "armclk", CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(0), 8, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK1808_CLKGATE_CON(0), 3, GFLAGS), ++ COMPOSITE_NOMUX(0, "aclk_core", "armclk", CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(0), 12, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK1808_CLKGATE_CON(0), 2, GFLAGS), + -+ up_read(&icpu->enable_sem); -+ } ++ GATE(0, "clk_jtag", "jtag_clkin", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(0), 4, GFLAGS), + -+ return 0; -+} ++ GATE(SCLK_PVTM_CORE, "clk_pvtm_core", "xin24m", 0, ++ RK1808_CLKGATE_CON(0), 5, GFLAGS), + -+static struct notifier_block cpufreq_notifier_block = { -+ .notifier_call = cpufreq_interactive_notifier, -+}; ++ COMPOSITE_NOMUX(MSCLK_CORE_NIU, "msclk_core_niu", "gpll", CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(18), 0, 5, DFLAGS, ++ RK1808_CLKGATE_CON(0), 1, GFLAGS), + -+static unsigned int *get_tokenized_data(const char *buf, int *num_tokens) -+{ -+ const char *cp = buf; -+ int ntokens = 1, i = 0; -+ unsigned int *tokenized_data; -+ int err = -EINVAL; ++ /* ++ * Clock-Architecture Diagram 3 ++ */ + -+ while ((cp = strpbrk(cp + 1, " :"))) -+ ntokens++; ++ COMPOSITE(ACLK_GIC_PRE, "aclk_gic_pre", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(15), 11, 1, MFLAGS, 12, 4, DFLAGS, ++ RK1808_CLKGATE_CON(1), 0, GFLAGS), ++ GATE(0, "aclk_gic_niu", "aclk_gic_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(1), 1, GFLAGS), ++ GATE(ACLK_GIC, "aclk_gic", "aclk_gic_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(1), 2, GFLAGS), ++ GATE(0, "aclk_core2gic", "aclk_gic_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(1), 3, GFLAGS), ++ GATE(0, "aclk_gic2core", "aclk_gic_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(1), 4, GFLAGS), ++ GATE(0, "aclk_spinlock", "aclk_gic_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(1), 4, GFLAGS), + -+ if (!(ntokens & 0x1)) -+ goto err; ++ COMPOSITE(0, "aclk_vpu_pre", mux_gpll_cpll_p, 0, ++ RK1808_CLKSEL_CON(16), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK1808_CLKGATE_CON(8), 8, GFLAGS), ++ COMPOSITE_NOMUX(0, "hclk_vpu_pre", "aclk_vpu_pre", 0, ++ RK1808_CLKSEL_CON(16), 8, 4, DFLAGS, ++ RK1808_CLKGATE_CON(8), 9, GFLAGS), ++ GATE(ACLK_VPU, "aclk_vpu", "aclk_vpu_pre", 0, ++ RK1808_CLKGATE_CON(8), 12, GFLAGS), ++ GATE(0, "aclk_vpu_niu", "aclk_vpu_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(8), 10, GFLAGS), ++ GATE(HCLK_VPU, "hclk_vpu", "hclk_vpu_pre", 0, ++ RK1808_CLKGATE_CON(8), 13, GFLAGS), ++ GATE(0, "hclk_vpu_niu", "hclk_vpu_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(8), 11, GFLAGS), + -+ tokenized_data = kcalloc(ntokens, sizeof(*tokenized_data), GFP_KERNEL); -+ if (!tokenized_data) { -+ err = -ENOMEM; -+ goto err; -+ } ++ /* ++ * Clock-Architecture Diagram 4 ++ */ ++ COMPOSITE_NOGATE(0, "clk_npu_div", mux_gpll_cpll_p, CLK_OPS_PARENT_ENABLE, ++ RK1808_CLKSEL_CON(1), 8, 2, MFLAGS, 0, 4, DFLAGS), ++ COMPOSITE_NOGATE_HALFDIV(0, "clk_npu_np5", mux_gpll_cpll_p, CLK_OPS_PARENT_ENABLE, ++ RK1808_CLKSEL_CON(1), 10, 2, MFLAGS, 4, 4, DFLAGS), ++ MUX(0, "clk_npu_pre", mux_npu_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(1), 15, 1, MFLAGS), ++ FACTOR(0, "clk_npu_scan", "clk_npu_pre", 0, 1, 2), ++ GATE(SCLK_NPU, "clk_npu", "clk_npu_pre", 0, ++ RK1808_CLKGATE_CON(1), 10, GFLAGS), + -+ cp = buf; -+ while (i < ntokens) { -+ if (sscanf(cp, "%u", &tokenized_data[i++]) != 1) -+ goto err_kfree; ++ COMPOSITE(0, "aclk_npu_pre", mux_gpll_cpll_p, 0, ++ RK1808_CLKSEL_CON(2), 14, 1, MFLAGS, 0, 4, DFLAGS, ++ RK1808_CLKGATE_CON(1), 8, GFLAGS), ++ COMPOSITE(0, "hclk_npu_pre", mux_gpll_cpll_p, 0, ++ RK1808_CLKSEL_CON(2), 15, 1, MFLAGS, 8, 4, DFLAGS, ++ RK1808_CLKGATE_CON(1), 9, GFLAGS), ++ GATE(ACLK_NPU, "aclk_npu", "aclk_npu_pre", 0, ++ RK1808_CLKGATE_CON(1), 11, GFLAGS), ++ GATE(0, "aclk_npu_niu", "aclk_npu_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(1), 13, GFLAGS), ++ COMPOSITE_NOMUX(0, "aclk_npu2mem", "aclk_npu_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(2), 4, 4, DFLAGS, ++ RK1808_CLKGATE_CON(1), 15, GFLAGS), ++ GATE(HCLK_NPU, "hclk_npu", "hclk_npu_pre", 0, ++ RK1808_CLKGATE_CON(1), 12, GFLAGS), ++ GATE(0, "hclk_npu_niu", "hclk_npu_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(1), 14, GFLAGS), + -+ cp = strpbrk(cp, " :"); -+ if (!cp) -+ break; -+ cp++; -+ } ++ GATE(SCLK_PVTM_NPU, "clk_pvtm_npu", "xin24m", 0, ++ RK1808_CLKGATE_CON(0), 15, GFLAGS), + -+ if (i != ntokens) -+ goto err_kfree; ++ COMPOSITE(ACLK_IMEM_PRE, "aclk_imem_pre", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(17), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK1808_CLKGATE_CON(7), 0, GFLAGS), ++ GATE(ACLK_IMEM0, "aclk_imem0", "aclk_imem_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(7), 6, GFLAGS), ++ GATE(0, "aclk_imem0_niu", "aclk_imem_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(7), 10, GFLAGS), ++ GATE(ACLK_IMEM1, "aclk_imem1", "aclk_imem_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(7), 7, GFLAGS), ++ GATE(0, "aclk_imem1_niu", "aclk_imem_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(7), 11, GFLAGS), ++ GATE(ACLK_IMEM2, "aclk_imem2", "aclk_imem_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(7), 8, GFLAGS), ++ GATE(0, "aclk_imem2_niu", "aclk_imem_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(7), 12, GFLAGS), ++ GATE(ACLK_IMEM3, "aclk_imem3", "aclk_imem_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(7), 9, GFLAGS), ++ GATE(0, "aclk_imem3_niu", "aclk_imem_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(7), 13, GFLAGS), + -+ *num_tokens = ntokens; -+ return tokenized_data; ++ COMPOSITE(HSCLK_IMEM, "hsclk_imem", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(17), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(7), 5, GFLAGS), + -+err_kfree: -+ kfree(tokenized_data); -+err: -+ return ERR_PTR(err); -+} ++ /* ++ * Clock-Architecture Diagram 5 ++ */ ++ GATE(0, "clk_ddr_mon_timer", "xin24m", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 0, GFLAGS), + -+/* Interactive governor sysfs interface */ -+static struct interactive_tunables *to_tunables(struct gov_attr_set *attr_set) -+{ -+ return container_of(attr_set, struct interactive_tunables, attr_set); -+} ++ GATE(0, "clk_ddr_mon", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 11, GFLAGS), ++ GATE(0, "aclk_split", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 15, GFLAGS), ++ GATE(0, "clk_ddr_msch", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 8, GFLAGS), ++ GATE(0, "clk_ddrdfi_ctl", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 3, GFLAGS), ++ GATE(0, "clk_stdby", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 13, GFLAGS), ++ GATE(0, "aclk_ddrc", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 5, GFLAGS), ++ GATE(0, "clk_core_ddrc", "clk_ddrphy1x_out", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 6, GFLAGS), + -+#define show_one(file_name, type) \ -+static ssize_t show_##file_name(struct gov_attr_set *attr_set, char *buf) \ -+{ \ -+ struct interactive_tunables *tunables = to_tunables(attr_set); \ -+ return sprintf(buf, type "\n", tunables->file_name); \ -+} ++ GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(8), 5, GFLAGS), ++ GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(8), 6, GFLAGS), + -+static ssize_t show_target_loads(struct gov_attr_set *attr_set, char *buf) -+{ -+ struct interactive_tunables *tunables = to_tunables(attr_set); -+ unsigned long flags; -+ ssize_t ret = 0; -+ int i; ++ COMPOSITE_NOGATE(SCLK_DDRCLK, "sclk_ddrc", mux_ddr_p, CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(3), 7, 1, MFLAGS, 0, 5, DFLAGS), ++ FACTOR(0, "clk_ddrphy1x_out", "sclk_ddrc", CLK_IGNORE_UNUSED, 1, 1), + -+ spin_lock_irqsave(&tunables->target_loads_lock, flags); ++ COMPOSITE_NOMUX(PCLK_DDR, "pclk_ddr", "gpll", CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(3), 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(2), 1, GFLAGS), ++ GATE(PCLK_DDRMON, "pclk_ddrmon", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 10, GFLAGS), ++ GATE(PCLK_DDRC, "pclk_ddrc", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 7, GFLAGS), ++ GATE(PCLK_MSCH, "pclk_msch", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 9, GFLAGS), ++ GATE(PCLK_STDBY, "pclk_stdby", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 12, GFLAGS), ++ GATE(0, "pclk_ddr_grf", "pclk_ddr", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(2), 14, GFLAGS), ++ GATE(0, "pclk_ddrdfi_ctl", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(2), 2, GFLAGS), + -+ for (i = 0; i < tunables->ntarget_loads; i++) -+ ret += sprintf(buf + ret, "%u%s", tunables->target_loads[i], -+ i & 0x1 ? ":" : " "); ++ /* ++ * Clock-Architecture Diagram 6 ++ */ + -+ sprintf(buf + ret - 1, "\n"); -+ spin_unlock_irqrestore(&tunables->target_loads_lock, flags); ++ COMPOSITE(HSCLK_VIO, "hsclk_vio", mux_gpll_cpll_p, 0, ++ RK1808_CLKSEL_CON(4), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK1808_CLKGATE_CON(3), 0, GFLAGS), ++ COMPOSITE_NOMUX(LSCLK_VIO, "lsclk_vio", "hsclk_vio", 0, ++ RK1808_CLKSEL_CON(4), 8, 4, DFLAGS, ++ RK1808_CLKGATE_CON(3), 12, GFLAGS), ++ GATE(0, "hsclk_vio_niu", "hsclk_vio", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(4), 0, GFLAGS), ++ GATE(0, "lsclk_vio_niu", "lsclk_vio", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(4), 1, GFLAGS), ++ GATE(ACLK_VOPRAW, "aclk_vopraw", "hsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 2, GFLAGS), ++ GATE(HCLK_VOPRAW, "hclk_vopraw", "lsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 3, GFLAGS), ++ GATE(ACLK_VOPLITE, "aclk_voplite", "hsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 4, GFLAGS), ++ GATE(HCLK_VOPLITE, "hclk_voplite", "lsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 5, GFLAGS), ++ GATE(PCLK_DSI_TX, "pclk_dsi_tx", "lsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 6, GFLAGS), ++ GATE(PCLK_CSI_TX, "pclk_csi_tx", "lsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 7, GFLAGS), ++ GATE(ACLK_RGA, "aclk_rga", "hsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 8, GFLAGS), ++ GATE(HCLK_RGA, "hclk_rga", "lsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 9, GFLAGS), ++ GATE(ACLK_ISP, "aclk_isp", "hsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 13, GFLAGS), ++ GATE(HCLK_ISP, "hclk_isp", "lsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 14, GFLAGS), ++ GATE(ACLK_CIF, "aclk_cif", "hsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 10, GFLAGS), ++ GATE(HCLK_CIF, "hclk_cif", "lsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 11, GFLAGS), ++ GATE(PCLK_CSI2HOST, "pclk_csi2host", "lsclk_vio", 0, ++ RK1808_CLKGATE_CON(4), 12, GFLAGS), + -+ return ret; -+} ++ COMPOSITE(0, "dclk_vopraw_src", mux_cpll_gpll_npll_p, 0, ++ RK1808_CLKSEL_CON(5), 10, 2, MFLAGS, 0, 8, DFLAGS, ++ RK1808_CLKGATE_CON(3), 1, GFLAGS), ++ COMPOSITE_FRACMUX(0, "dclk_vopraw_frac", "dclk_vopraw_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(6), 0, ++ RK1808_CLKGATE_CON(3), 2, GFLAGS, ++ &rk1808_dclk_vopraw_fracmux), ++ GATE(DCLK_VOPRAW, "dclk_vopraw", "dclk_vopraw_mux", 0, ++ RK1808_CLKGATE_CON(3), 3, GFLAGS), + -+static ssize_t store_target_loads(struct gov_attr_set *attr_set, -+ const char *buf, size_t count) -+{ -+ struct interactive_tunables *tunables = to_tunables(attr_set); -+ unsigned int *new_target_loads; -+ unsigned long flags; -+ int ntokens; ++ COMPOSITE(0, "dclk_voplite_src", mux_cpll_gpll_npll_p, 0, ++ RK1808_CLKSEL_CON(7), 10, 2, MFLAGS, 0, 8, DFLAGS, ++ RK1808_CLKGATE_CON(3), 4, GFLAGS), ++ COMPOSITE_FRACMUX(0, "dclk_voplite_frac", "dclk_voplite_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(8), 0, ++ RK1808_CLKGATE_CON(3), 5, GFLAGS, ++ &rk1808_dclk_voplite_fracmux), ++ GATE(DCLK_VOPLITE, "dclk_voplite", "dclk_voplite_mux", 0, ++ RK1808_CLKGATE_CON(3), 6, GFLAGS), + -+ new_target_loads = get_tokenized_data(buf, &ntokens); -+ if (IS_ERR(new_target_loads)) -+ return PTR_ERR(new_target_loads); ++ COMPOSITE_NOMUX(SCLK_TXESC, "clk_txesc", "gpll", 0, ++ RK1808_CLKSEL_CON(9), 0, 12, DFLAGS, ++ RK1808_CLKGATE_CON(3), 7, GFLAGS), + -+ spin_lock_irqsave(&tunables->target_loads_lock, flags); -+ if (tunables->target_loads != default_target_loads) -+ kfree(tunables->target_loads); -+ tunables->target_loads = new_target_loads; -+ tunables->ntarget_loads = ntokens; -+ spin_unlock_irqrestore(&tunables->target_loads_lock, flags); ++ COMPOSITE(SCLK_RGA, "clk_rga", mux_gpll_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(10), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK1808_CLKGATE_CON(3), 8, GFLAGS), + -+ return count; -+} ++ COMPOSITE(SCLK_ISP, "clk_isp", mux_gpll_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(10), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(3), 10, GFLAGS), + -+static ssize_t show_above_hispeed_delay(struct gov_attr_set *attr_set, -+ char *buf) -+{ -+ struct interactive_tunables *tunables = to_tunables(attr_set); -+ unsigned long flags; -+ ssize_t ret = 0; -+ int i; ++ COMPOSITE(DCLK_CIF, "dclk_cif", mux_cpll_gpll_npll_p, 0, ++ RK1808_CLKSEL_CON(11), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(3), 11, GFLAGS), + -+ spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); ++ COMPOSITE(SCLK_CIF_OUT, "clk_cif_out", mux_24m_npll_gpll_usb480m_p, 0, ++ RK1808_CLKSEL_CON(11), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK1808_CLKGATE_CON(3), 9, GFLAGS), + -+ for (i = 0; i < tunables->nabove_hispeed_delay; i++) -+ ret += sprintf(buf + ret, "%u%s", -+ tunables->above_hispeed_delay[i], -+ i & 0x1 ? ":" : " "); ++ /* ++ * Clock-Architecture Diagram 7 ++ */ + -+ sprintf(buf + ret - 1, "\n"); -+ spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); ++ /* PD_PCIE */ ++ COMPOSITE_NODIV(0, "clk_pcie_src", mux_gpll_cpll_p, 0, ++ RK1808_CLKSEL_CON(12), 15, 1, MFLAGS, ++ RK1808_CLKGATE_CON(5), 0, GFLAGS), ++ DIV(HSCLK_PCIE, "hsclk_pcie", "clk_pcie_src", 0, ++ RK1808_CLKSEL_CON(12), 0, 5, DFLAGS), ++ DIV(LSCLK_PCIE, "lsclk_pcie", "clk_pcie_src", 0, ++ RK1808_CLKSEL_CON(12), 8, 5, DFLAGS), ++ GATE(0, "hsclk_pcie_niu", "hsclk_pcie", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(6), 0, GFLAGS), ++ GATE(0, "lsclk_pcie_niu", "lsclk_pcie", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(6), 1, GFLAGS), ++ GATE(0, "pclk_pcie_grf", "lsclk_pcie", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(6), 5, GFLAGS), ++ GATE(ACLK_USB3OTG, "aclk_usb3otg", "hsclk_pcie", 0, ++ RK1808_CLKGATE_CON(6), 6, GFLAGS), ++ GATE(HCLK_HOST, "hclk_host", "lsclk_pcie", 0, ++ RK1808_CLKGATE_CON(6), 7, GFLAGS), ++ GATE(HCLK_HOST_ARB, "hclk_host_arb", "lsclk_pcie", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(6), 8, GFLAGS), + -+ return ret; -+} ++ COMPOSITE(ACLK_PCIE, "aclk_pcie", mux_gpll_cpll_p, 0, ++ RK1808_CLKSEL_CON(15), 8, 1, MFLAGS, 0, 4, DFLAGS, ++ RK1808_CLKGATE_CON(5), 5, GFLAGS), ++ DIV(0, "pclk_pcie_pre", "aclk_pcie", 0, ++ RK1808_CLKSEL_CON(15), 4, 4, DFLAGS), ++ GATE(0, "aclk_pcie_niu", "aclk_pcie", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(6), 10, GFLAGS), ++ GATE(ACLK_PCIE_MST, "aclk_pcie_mst", "aclk_pcie", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(6), 2, GFLAGS), ++ GATE(ACLK_PCIE_SLV, "aclk_pcie_slv", "aclk_pcie", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(6), 3, GFLAGS), ++ GATE(0, "pclk_pcie_niu", "pclk_pcie_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(6), 11, GFLAGS), ++ GATE(0, "pclk_pcie_dbi", "pclk_pcie_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(6), 4, GFLAGS), ++ GATE(PCLK_PCIE, "pclk_pcie", "pclk_pcie_pre", 0, ++ RK1808_CLKGATE_CON(6), 9, GFLAGS), + -+static ssize_t store_above_hispeed_delay(struct gov_attr_set *attr_set, -+ const char *buf, size_t count) -+{ -+ struct interactive_tunables *tunables = to_tunables(attr_set); -+ unsigned int *new_above_hispeed_delay = NULL; -+ unsigned long flags; -+ int ntokens; ++ COMPOSITE(0, "clk_pcie_aux_src", mux_cpll_gpll_npll_p, 0, ++ RK1808_CLKSEL_CON(14), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(5), 3, GFLAGS), ++ COMPOSITE_NODIV(SCLK_PCIE_AUX, "clk_pcie_aux", mux_pcie_aux_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(14), 12, 1, MFLAGS, ++ RK1808_CLKGATE_CON(5), 4, GFLAGS), + -+ new_above_hispeed_delay = get_tokenized_data(buf, &ntokens); -+ if (IS_ERR(new_above_hispeed_delay)) -+ return PTR_ERR(new_above_hispeed_delay); ++ GATE(SCLK_USB3_OTG0_REF, "clk_usb3_otg0_ref", "xin24m", 0, ++ RK1808_CLKGATE_CON(5), 1, GFLAGS), + -+ spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); -+ if (tunables->above_hispeed_delay != default_above_hispeed_delay) -+ kfree(tunables->above_hispeed_delay); -+ tunables->above_hispeed_delay = new_above_hispeed_delay; -+ tunables->nabove_hispeed_delay = ntokens; -+ spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); ++ COMPOSITE(SCLK_USB3_OTG0_SUSPEND, "clk_usb3_otg0_suspend", mux_usb3_otg0_suspend_p, 0, ++ RK1808_CLKSEL_CON(13), 12, 1, MFLAGS, 0, 10, DFLAGS, ++ RK1808_CLKGATE_CON(5), 2, GFLAGS), + -+ return count; -+} ++ /* ++ * Clock-Architecture Diagram 8 ++ */ + -+static ssize_t store_hispeed_freq(struct gov_attr_set *attr_set, -+ const char *buf, size_t count) -+{ -+ struct interactive_tunables *tunables = to_tunables(attr_set); -+ unsigned long int val; -+ int ret; ++ /* PD_PHP */ + -+ ret = kstrtoul(buf, 0, &val); -+ if (ret < 0) -+ return ret; ++ COMPOSITE_NODIV(0, "clk_peri_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(19), 15, 1, MFLAGS, ++ RK1808_CLKGATE_CON(8), 0, GFLAGS), ++ COMPOSITE_NOMUX(MSCLK_PERI, "msclk_peri", "clk_peri_src", CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(19), 0, 5, DFLAGS, ++ RK1808_CLKGATE_CON(8), 1, GFLAGS), ++ COMPOSITE_NOMUX(LSCLK_PERI, "lsclk_peri", "clk_peri_src", CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(19), 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(8), 2, GFLAGS), ++ GATE(0, "msclk_peri_niu", "msclk_peri", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(8), 3, GFLAGS), ++ GATE(0, "lsclk_peri_niu", "lsclk_peri", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(8), 4, GFLAGS), + -+ tunables->hispeed_freq = val; ++ /* PD_MMC */ + -+ return count; -+} ++ GATE(0, "hclk_mmc_sfc", "msclk_peri", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(9), 0, GFLAGS), ++ GATE(0, "hclk_mmc_sfc_niu", "hclk_mmc_sfc", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(9), 11, GFLAGS), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_mmc_sfc", 0, ++ RK1808_CLKGATE_CON(9), 12, GFLAGS), ++ GATE(HCLK_SFC, "hclk_sfc", "hclk_mmc_sfc", 0, ++ RK1808_CLKGATE_CON(9), 13, GFLAGS), + -+static ssize_t store_go_hispeed_load(struct gov_attr_set *attr_set, -+ const char *buf, size_t count) -+{ -+ struct interactive_tunables *tunables = to_tunables(attr_set); -+ unsigned long val; -+ int ret; ++ COMPOSITE(SCLK_SDIO_DIV, "clk_sdio_div", mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(22), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ RK1808_CLKGATE_CON(9), 1, GFLAGS), ++ COMPOSITE_DIV_OFFSET(SCLK_SDIO_DIV50, "clk_sdio_div50", ++ mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(22), 14, 2, MFLAGS, ++ RK1808_CLKSEL_CON(23), 0, 8, DFLAGS, ++ RK1808_CLKGATE_CON(9), 2, GFLAGS), ++ COMPOSITE_NODIV(SCLK_SDIO, "clk_sdio", mux_sdio_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK1808_CLKSEL_CON(23), 15, 1, MFLAGS, ++ RK1808_CLKGATE_CON(9), 3, GFLAGS), + -+ ret = kstrtoul(buf, 0, &val); -+ if (ret < 0) -+ return ret; ++ MMC(SCLK_SDIO_DRV, "sdio_drv", "clk_sdio", RK1808_SDIO_CON0, 1), ++ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "clk_sdio", RK1808_SDIO_CON1, 1), + -+ tunables->go_hispeed_load = val; ++ COMPOSITE(SCLK_EMMC_DIV, "clk_emmc_div", ++ mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(24), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ RK1808_CLKGATE_CON(9), 4, GFLAGS), ++ COMPOSITE_DIV_OFFSET(SCLK_EMMC_DIV50, "clk_emmc_div50", mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(24), 14, 2, MFLAGS, ++ RK1808_CLKSEL_CON(25), 0, 8, DFLAGS, ++ RK1808_CLKGATE_CON(9), 5, GFLAGS), ++ COMPOSITE_NODIV(SCLK_EMMC, "clk_emmc", mux_emmc_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK1808_CLKSEL_CON(25), 15, 1, MFLAGS, ++ RK1808_CLKGATE_CON(9), 6, GFLAGS), ++ MMC(SCLK_EMMC_DRV, "emmc_drv", "clk_emmc", RK1808_EMMC_CON0, 1), ++ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "clk_emmc", RK1808_EMMC_CON1, 1), + -+ return count; -+} ++ COMPOSITE(SCLK_SDMMC_DIV, "clk_sdmmc_div", mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(20), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ RK1808_CLKGATE_CON(9), 7, GFLAGS), ++ COMPOSITE_DIV_OFFSET(SCLK_SDMMC_DIV50, "clk_sdmmc_div50", ++ mux_gpll_cpll_npll_24m_p, CLK_IGNORE_UNUSED, ++ RK1808_CLKSEL_CON(20), 14, 2, MFLAGS, ++ RK1808_CLKSEL_CON(21), 0, 8, DFLAGS, ++ RK1808_CLKGATE_CON(9), 8, GFLAGS), ++ COMPOSITE_NODIV(SCLK_SDMMC, "clk_sdmmc", mux_sdmmc_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK1808_CLKSEL_CON(21), 15, 1, MFLAGS, ++ RK1808_CLKGATE_CON(9), 9, GFLAGS), ++ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "clk_sdmmc", RK1808_SDMMC_CON0, 1), ++ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "clk_sdmmc", RK1808_SDMMC_CON1, 1), + -+static ssize_t store_min_sample_time(struct gov_attr_set *attr_set, -+ const char *buf, size_t count) -+{ -+ struct interactive_tunables *tunables = to_tunables(attr_set); -+ unsigned long val; -+ int ret; ++ COMPOSITE(SCLK_SFC, "clk_sfc", mux_gpll_cpll_p, 0, ++ RK1808_CLKSEL_CON(26), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(9), 10, GFLAGS), + -+ ret = kstrtoul(buf, 0, &val); -+ if (ret < 0) -+ return ret; ++ /* PD_MAC */ + -+ tunables->min_sample_time = val; ++ GATE(0, "pclk_sd_gmac", "lsclk_peri", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(10), 2, GFLAGS), ++ GATE(0, "aclk_sd_gmac", "msclk_peri", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(10), 0, GFLAGS), ++ GATE(0, "hclk_sd_gmac", "msclk_peri", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(10), 1, GFLAGS), ++ GATE(0, "pclk_gmac_niu", "pclk_sd_gmac", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(10), 10, GFLAGS), ++ GATE(PCLK_GMAC, "pclk_gmac", "pclk_sd_gmac", 0, ++ RK1808_CLKGATE_CON(10), 12, GFLAGS), ++ GATE(0, "aclk_gmac_niu", "aclk_sd_gmac", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(10), 8, GFLAGS), ++ GATE(ACLK_GMAC, "aclk_gmac", "aclk_sd_gmac", 0, ++ RK1808_CLKGATE_CON(10), 11, GFLAGS), ++ GATE(0, "hclk_gmac_niu", "hclk_sd_gmac", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(10), 9, GFLAGS), ++ GATE(HCLK_SDIO, "hclk_sdio", "hclk_sd_gmac", 0, ++ RK1808_CLKGATE_CON(10), 13, GFLAGS), ++ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_sd_gmac", 0, ++ RK1808_CLKGATE_CON(10), 14, GFLAGS), + -+ return count; -+} ++ COMPOSITE(SCLK_GMAC_OUT, "clk_gmac_out", mux_cpll_npll_ppll_p, 0, ++ RK1808_CLKSEL_CON(18), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(10), 15, GFLAGS), + -+static ssize_t show_timer_rate(struct gov_attr_set *attr_set, char *buf) -+{ -+ struct interactive_tunables *tunables = to_tunables(attr_set); ++ COMPOSITE(SCLK_GMAC_SRC, "clk_gmac_src", mux_cpll_npll_ppll_p, 0, ++ RK1808_CLKSEL_CON(26), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(10), 3, GFLAGS), ++ MUX(SCLK_GMAC, "clk_gmac", mux_gmac_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK1808_CLKSEL_CON(27), 0, 1, MFLAGS), ++ GATE(SCLK_GMAC_REF, "clk_gmac_ref", "clk_gmac", 0, ++ RK1808_CLKGATE_CON(10), 4, GFLAGS), ++ GATE(0, "clk_gmac_tx_src", "clk_gmac", 0, ++ RK1808_CLKGATE_CON(10), 7, GFLAGS), ++ GATE(0, "clk_gmac_rx_src", "clk_gmac", 0, ++ RK1808_CLKGATE_CON(10), 6, GFLAGS), ++ GATE(SCLK_GMAC_REFOUT, "clk_gmac_refout", "clk_gmac", 0, ++ RK1808_CLKGATE_CON(10), 5, GFLAGS), ++ FACTOR(0, "clk_gmac_tx_div5", "clk_gmac_tx_src", 0, 1, 5), ++ FACTOR(0, "clk_gmac_tx_div50", "clk_gmac_tx_src", 0, 1, 50), ++ FACTOR(0, "clk_gmac_rx_div2", "clk_gmac_rx_src", 0, 1, 2), ++ FACTOR(0, "clk_gmac_rx_div20", "clk_gmac_rx_src", 0, 1, 20), ++ MUX(SCLK_GMAC_RGMII_SPEED, "clk_gmac_rgmii_speed", mux_gmac_rgmii_speed_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(27), 2, 2, MFLAGS), ++ MUX(SCLK_GMAC_RMII_SPEED, "clk_gmac_rmii_speed", mux_gmac_rmii_speed_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(27), 1, 1, MFLAGS), ++ MUX(SCLK_GMAC_RX_TX, "clk_gmac_rx_tx", mux_gmac_rx_tx_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(27), 4, 1, MFLAGS), + -+ return sprintf(buf, "%lu\n", tunables->sampling_rate); -+} ++ /* ++ * Clock-Architecture Diagram 9 ++ */ + -+static ssize_t store_timer_rate(struct gov_attr_set *attr_set, const char *buf, -+ size_t count) -+{ -+ struct interactive_tunables *tunables = to_tunables(attr_set); -+ unsigned long val, val_round; -+ int ret; ++ /* PD_BUS */ + -+ ret = kstrtoul(buf, 0, &val); -+ if (ret < 0) -+ return ret; ++ COMPOSITE_NODIV(0, "clk_bus_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(27), 15, 1, MFLAGS, ++ RK1808_CLKGATE_CON(11), 0, GFLAGS), ++ COMPOSITE_NOMUX(HSCLK_BUS_PRE, "hsclk_bus_pre", "clk_bus_src", CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(27), 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(11), 1, GFLAGS), ++ COMPOSITE_NOMUX(MSCLK_BUS_PRE, "msclk_bus_pre", "clk_bus_src", CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(28), 0, 5, DFLAGS, ++ RK1808_CLKGATE_CON(11), 2, GFLAGS), ++ COMPOSITE_NOMUX(LSCLK_BUS_PRE, "lsclk_bus_pre", "clk_bus_src", CLK_IS_CRITICAL, ++ RK1808_CLKSEL_CON(28), 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(11), 3, GFLAGS), ++ GATE(0, "hsclk_bus_niu", "hsclk_bus_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(15), 0, GFLAGS), ++ GATE(0, "msclk_bus_niu", "msclk_bus_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(15), 1, GFLAGS), ++ GATE(0, "msclk_sub", "msclk_bus_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(15), 2, GFLAGS), ++ GATE(ACLK_DMAC, "aclk_dmac", "msclk_bus_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(14), 15, GFLAGS), ++ GATE(HCLK_ROM, "hclk_rom", "msclk_bus_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(15), 4, GFLAGS), ++ GATE(ACLK_CRYPTO, "aclk_crypto", "msclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 5, GFLAGS), ++ GATE(HCLK_CRYPTO, "hclk_crypto", "msclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 6, GFLAGS), ++ GATE(ACLK_DCF, "aclk_dcf", "msclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 7, GFLAGS), ++ GATE(0, "lsclk_bus_niu", "lsclk_bus_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(15), 3, GFLAGS), ++ GATE(PCLK_DCF, "pclk_dcf", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 8, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 9, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 10, GFLAGS), ++ GATE(PCLK_UART3, "pclk_uart3", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 11, GFLAGS), ++ GATE(PCLK_UART4, "pclk_uart4", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 12, GFLAGS), ++ GATE(PCLK_UART5, "pclk_uart5", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 13, GFLAGS), ++ GATE(PCLK_UART6, "pclk_uart6", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 14, GFLAGS), ++ GATE(PCLK_UART7, "pclk_uart7", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(15), 15, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 0, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 1, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 2, GFLAGS), ++ GATE(PCLK_I2C4, "pclk_i2c4", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(17), 4, GFLAGS), ++ GATE(PCLK_I2C5, "pclk_i2c5", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(17), 5, GFLAGS), ++ GATE(PCLK_SPI0, "pclk_spi0", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 3, GFLAGS), ++ GATE(PCLK_SPI1, "pclk_spi1", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 4, GFLAGS), ++ GATE(PCLK_SPI2, "pclk_spi2", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 5, GFLAGS), ++ GATE(PCLK_TSADC, "pclk_tsadc", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 9, GFLAGS), ++ GATE(PCLK_SARADC, "pclk_saradc", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 10, GFLAGS), ++ GATE(PCLK_EFUSE, "pclk_efuse", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 11, GFLAGS), ++ GATE(PCLK_GPIO1, "pclk_gpio1", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 12, GFLAGS), ++ GATE(PCLK_GPIO2, "pclk_gpio2", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 13, GFLAGS), ++ GATE(PCLK_GPIO3, "pclk_gpio3", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 14, GFLAGS), ++ GATE(PCLK_GPIO4, "pclk_gpio4", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 15, GFLAGS), ++ GATE(PCLK_PWM0, "pclk_pwm0", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 6, GFLAGS), ++ GATE(PCLK_PWM1, "pclk_pwm1", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 7, GFLAGS), ++ GATE(PCLK_PWM2, "pclk_pwm2", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(16), 8, GFLAGS), ++ GATE(PCLK_TIMER, "pclk_timer", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(17), 0, GFLAGS), ++ GATE(PCLK_WDT, "pclk_wdt", "lsclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(17), 1, GFLAGS), ++ GATE(0, "pclk_grf", "lsclk_bus_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(17), 2, GFLAGS), ++ GATE(0, "pclk_sgrf", "lsclk_bus_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(17), 3, GFLAGS), ++ GATE(0, "hclk_audio_pre", "msclk_bus_pre", 0, ++ RK1808_CLKGATE_CON(17), 8, GFLAGS), ++ GATE(0, "pclk_top_pre", "lsclk_bus_pre", CLK_IS_CRITICAL, ++ RK1808_CLKGATE_CON(11), 4, GFLAGS), + -+ val_round = jiffies_to_usecs(usecs_to_jiffies(val)); -+ if (val != val_round) -+ pr_warn("timer_rate not aligned to jiffy. Rounded up to %lu\n", -+ val_round); ++ COMPOSITE(SCLK_CRYPTO, "clk_crypto", mux_gpll_cpll_p, 0, ++ RK1808_CLKSEL_CON(29), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK1808_CLKGATE_CON(11), 5, GFLAGS), ++ COMPOSITE(SCLK_CRYPTO_APK, "clk_crypto_apk", mux_gpll_cpll_p, 0, ++ RK1808_CLKSEL_CON(29), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK1808_CLKGATE_CON(11), 6, GFLAGS), + -+ tunables->sampling_rate = val_round; ++ COMPOSITE(0, "clk_uart1_src", mux_gpll_usb480m_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(38), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(11), 8, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart1_np5", "clk_uart1_src", 0, ++ RK1808_CLKSEL_CON(39), 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(11), 9, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(40), 0, ++ RK1808_CLKGATE_CON(11), 10, GFLAGS, ++ &rk1808_uart1_fracmux), ++ GATE(SCLK_UART1, "clk_uart1", "clk_uart1_mux", 0, ++ RK1808_CLKGATE_CON(11), 11, GFLAGS), + -+ return count; -+} ++ COMPOSITE(0, "clk_uart2_src", mux_gpll_usb480m_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(41), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(11), 12, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart2_np5", "clk_uart2_src", 0, ++ RK1808_CLKSEL_CON(42), 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(11), 13, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(43), 0, ++ RK1808_CLKGATE_CON(11), 14, GFLAGS, ++ &rk1808_uart2_fracmux), ++ GATE(SCLK_UART2, "clk_uart2", "clk_uart2_mux", 0, ++ RK1808_CLKGATE_CON(11), 15, GFLAGS), + -+static ssize_t store_timer_slack(struct gov_attr_set *attr_set, const char *buf, -+ size_t count) -+{ -+ struct interactive_tunables *tunables = to_tunables(attr_set); -+ unsigned long val; -+ int ret; ++ COMPOSITE(0, "clk_uart3_src", mux_gpll_usb480m_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(44), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(12), 0, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart3_np5", "clk_uart3_src", 0, ++ RK1808_CLKSEL_CON(45), 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(12), 1, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(46), 0, ++ RK1808_CLKGATE_CON(12), 2, GFLAGS, ++ &rk1808_uart3_fracmux), ++ GATE(SCLK_UART3, "clk_uart3", "clk_uart3_mux", 0, ++ RK1808_CLKGATE_CON(12), 3, GFLAGS), + -+ ret = kstrtol(buf, 10, &val); -+ if (ret < 0) -+ return ret; ++ COMPOSITE(0, "clk_uart4_src", mux_gpll_usb480m_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(47), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(12), 4, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart4_np5", "clk_uart4_src", 0, ++ RK1808_CLKSEL_CON(48), 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(12), 5, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(49), 0, ++ RK1808_CLKGATE_CON(12), 6, GFLAGS, ++ &rk1808_uart4_fracmux), ++ GATE(SCLK_UART4, "clk_uart4", "clk_uart4_mux", 0, ++ RK1808_CLKGATE_CON(12), 7, GFLAGS), + -+ tunables->timer_slack = val; -+ update_slack_delay(tunables); ++ COMPOSITE(0, "clk_uart5_src", mux_gpll_usb480m_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(50), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(12), 8, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart5_np5", "clk_uart5_src", 0, ++ RK1808_CLKSEL_CON(51), 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(12), 9, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(52), 0, ++ RK1808_CLKGATE_CON(12), 10, GFLAGS, ++ &rk1808_uart5_fracmux), ++ GATE(SCLK_UART5, "clk_uart5", "clk_uart5_mux", 0, ++ RK1808_CLKGATE_CON(12), 11, GFLAGS), + -+ return count; -+} ++ COMPOSITE(0, "clk_uart6_src", mux_gpll_usb480m_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(53), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(12), 12, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart6_np5", "clk_uart6_src", 0, ++ RK1808_CLKSEL_CON(54), 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(12), 13, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart6_frac", "clk_uart6_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(55), 0, ++ RK1808_CLKGATE_CON(12), 14, GFLAGS, ++ &rk1808_uart6_fracmux), ++ GATE(SCLK_UART6, "clk_uart6", "clk_uart6_mux", 0, ++ RK1808_CLKGATE_CON(12), 15, GFLAGS), + -+static ssize_t store_boost(struct gov_attr_set *attr_set, const char *buf, -+ size_t count) -+{ -+ struct interactive_tunables *tunables = to_tunables(attr_set); -+ unsigned long val; -+ int ret; ++ COMPOSITE(0, "clk_uart7_src", mux_gpll_usb480m_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(56), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 0, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart7_np5", "clk_uart7_src", 0, ++ RK1808_CLKSEL_CON(57), 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 1, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart7_frac", "clk_uart7_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(58), 0, ++ RK1808_CLKGATE_CON(13), 2, GFLAGS, ++ &rk1808_uart7_fracmux), ++ GATE(SCLK_UART7, "clk_uart7", "clk_uart7_mux", 0, ++ RK1808_CLKGATE_CON(13), 3, GFLAGS), + -+ ret = kstrtoul(buf, 0, &val); -+ if (ret < 0) -+ return ret; ++ COMPOSITE(SCLK_I2C1, "clk_i2c1", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(59), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 4, GFLAGS), ++ COMPOSITE(SCLK_I2C2, "clk_i2c2", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(59), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 5, GFLAGS), ++ COMPOSITE(SCLK_I2C3, "clk_i2c3", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(60), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 6, GFLAGS), ++ COMPOSITE(SCLK_I2C4, "clk_i2c4", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(71), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(14), 6, GFLAGS), ++ COMPOSITE(SCLK_I2C5, "clk_i2c5", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(71), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK1808_CLKGATE_CON(14), 7, GFLAGS), + -+ tunables->boost = val; ++ COMPOSITE(SCLK_SPI0, "clk_spi0", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(60), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 7, GFLAGS), ++ COMPOSITE(SCLK_SPI1, "clk_spi1", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(61), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 8, GFLAGS), ++ COMPOSITE(SCLK_SPI2, "clk_spi2", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(61), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 9, GFLAGS), + -+ if (tunables->boost) { -+ trace_cpufreq_interactive_boost("on"); -+ if (!tunables->boosted) -+ cpufreq_interactive_boost(tunables); -+ } else { -+ tunables->boostpulse_endtime = ktime_to_us(ktime_get()); -+ trace_cpufreq_interactive_unboost("off"); -+ } ++ COMPOSITE_NOMUX(SCLK_TSADC, "clk_tsadc", "xin24m", 0, ++ RK1808_CLKSEL_CON(62), 0, 11, DFLAGS, ++ RK1808_CLKGATE_CON(13), 13, GFLAGS), ++ COMPOSITE_NOMUX(SCLK_SARADC, "clk_saradc", "xin24m", 0, ++ RK1808_CLKSEL_CON(63), 0, 11, DFLAGS, ++ RK1808_CLKGATE_CON(13), 14, GFLAGS), + -+ return count; -+} ++ COMPOSITE(SCLK_EFUSE_S, "clk_efuse_s", mux_gpll_cpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(64), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK1808_CLKGATE_CON(14), 0, GFLAGS), ++ COMPOSITE(SCLK_EFUSE_NS, "clk_efuse_ns", mux_gpll_cpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(64), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK1808_CLKGATE_CON(14), 1, GFLAGS), + -+static ssize_t store_boostpulse(struct gov_attr_set *attr_set, const char *buf, -+ size_t count) -+{ -+ struct interactive_tunables *tunables = to_tunables(attr_set); -+ unsigned long val; -+ int ret; ++ COMPOSITE(DBCLK_GPIO1, "dbclk_gpio1", mux_xin24m_32k_p, 0, ++ RK1808_CLKSEL_CON(65), 15, 1, MFLAGS, 0, 11, DFLAGS, ++ RK1808_CLKGATE_CON(14), 2, GFLAGS), ++ COMPOSITE(DBCLK_GPIO2, "dbclk_gpio2", mux_xin24m_32k_p, 0, ++ RK1808_CLKSEL_CON(66), 15, 1, MFLAGS, 0, 11, DFLAGS, ++ RK1808_CLKGATE_CON(14), 3, GFLAGS), ++ COMPOSITE(DBCLK_GPIO3, "dbclk_gpio3", mux_xin24m_32k_p, 0, ++ RK1808_CLKSEL_CON(67), 15, 1, MFLAGS, 0, 11, DFLAGS, ++ RK1808_CLKGATE_CON(14), 4, GFLAGS), ++ COMPOSITE(DBCLK_GPIO4, "dbclk_gpio4", mux_xin24m_32k_p, 0, ++ RK1808_CLKSEL_CON(68), 15, 1, MFLAGS, 0, 11, DFLAGS, ++ RK1808_CLKGATE_CON(14), 5, GFLAGS), + -+ ret = kstrtoul(buf, 0, &val); -+ if (ret < 0) -+ return ret; ++ COMPOSITE(SCLK_PWM0, "clk_pwm0", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(69), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 10, GFLAGS), ++ COMPOSITE(SCLK_PWM1, "clk_pwm1", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(69), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 11, GFLAGS), ++ COMPOSITE(SCLK_PWM2, "clk_pwm2", mux_gpll_xin24m_p, 0, ++ RK1808_CLKSEL_CON(70), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(13), 12, GFLAGS), + -+ tunables->boostpulse_endtime = ktime_to_us(ktime_get()) + -+ tunables->boostpulse_duration; -+ trace_cpufreq_interactive_boost("pulse"); -+ if (!tunables->boosted) -+ cpufreq_interactive_boost(tunables); ++ GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0, ++ RK1808_CLKGATE_CON(14), 8, GFLAGS), ++ GATE(SCLK_TIMER1, "sclk_timer1", "xin24m", 0, ++ RK1808_CLKGATE_CON(14), 9, GFLAGS), ++ GATE(SCLK_TIMER2, "sclk_timer2", "xin24m", 0, ++ RK1808_CLKGATE_CON(14), 10, GFLAGS), ++ GATE(SCLK_TIMER3, "sclk_timer3", "xin24m", 0, ++ RK1808_CLKGATE_CON(14), 11, GFLAGS), ++ GATE(SCLK_TIMER4, "sclk_timer4", "xin24m", 0, ++ RK1808_CLKGATE_CON(14), 12, GFLAGS), ++ GATE(SCLK_TIMER5, "sclk_timer5", "xin24m", 0, ++ RK1808_CLKGATE_CON(14), 13, GFLAGS), + -+ return count; -+} ++ /* ++ * Clock-Architecture Diagram 10 ++ */ + -+static ssize_t store_boostpulse_duration(struct gov_attr_set *attr_set, -+ const char *buf, size_t count) -+{ -+ struct interactive_tunables *tunables = to_tunables(attr_set); -+ unsigned long val; -+ int ret; ++ /* PD_AUDIO */ + -+ ret = kstrtoul(buf, 0, &val); -+ if (ret < 0) -+ return ret; ++ GATE(0, "hclk_audio_niu", "hclk_audio_pre", CLK_IGNORE_UNUSED, ++ RK1808_CLKGATE_CON(18), 11, GFLAGS), ++ GATE(HCLK_VAD, "hclk_vad", "hclk_audio_pre", 0, ++ RK1808_CLKGATE_CON(18), 12, GFLAGS), ++ GATE(HCLK_PDM, "hclk_pdm", "hclk_audio_pre", 0, ++ RK1808_CLKGATE_CON(18), 13, GFLAGS), ++ GATE(HCLK_I2S0_8CH, "hclk_i2s0_8ch", "hclk_audio_pre", 0, ++ RK1808_CLKGATE_CON(18), 14, GFLAGS), ++ GATE(HCLK_I2S1_2CH, "hclk_i2s1_2ch", "hclk_audio_pre", 0, ++ RK1808_CLKGATE_CON(18), 15, GFLAGS), + -+ tunables->boostpulse_duration = val; ++ COMPOSITE(0, "clk_pdm_src", mux_gpll_xin24m_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(30), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(17), 9, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_pdm_frac", "clk_pdm_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(31), 0, ++ RK1808_CLKGATE_CON(17), 10, GFLAGS, ++ &rk1808_pdm_fracmux), ++ GATE(SCLK_PDM, "clk_pdm", "clk_pdm_mux", 0, ++ RK1808_CLKGATE_CON(17), 11, GFLAGS), + -+ return count; -+} ++ COMPOSITE(SCLK_I2S0_8CH_TX_SRC, "clk_i2s0_8ch_tx_src", mux_gpll_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(32), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(17), 12, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s0_8ch_tx_frac", "clk_i2s0_8ch_tx_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(33), 0, ++ RK1808_CLKGATE_CON(17), 13, GFLAGS, ++ &rk1808_i2s0_8ch_tx_fracmux), ++ COMPOSITE_NODIV(SCLK_I2S0_8CH_TX, "clk_i2s0_8ch_tx", mux_i2s0_8ch_tx_rx_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(32), 12, 1, MFLAGS, ++ RK1808_CLKGATE_CON(17), 14, GFLAGS), ++ COMPOSITE_NODIV(SCLK_I2S0_8CH_TX_OUT, "clk_i2s0_8ch_tx_out", mux_i2s0_8ch_tx_out_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(32), 14, 2, MFLAGS, ++ RK1808_CLKGATE_CON(17), 15, GFLAGS), + -+static ssize_t store_io_is_busy(struct gov_attr_set *attr_set, const char *buf, -+ size_t count) -+{ -+ struct interactive_tunables *tunables = to_tunables(attr_set); -+ unsigned long val; -+ int ret; ++ COMPOSITE(SCLK_I2S0_8CH_RX_SRC, "clk_i2s0_8ch_rx_src", mux_gpll_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(34), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(18), 0, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s0_8ch_rx_frac", "clk_i2s0_8ch_rx_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(35), 0, ++ RK1808_CLKGATE_CON(18), 1, GFLAGS, ++ &rk1808_i2s0_8ch_rx_fracmux), ++ COMPOSITE_NODIV(SCLK_I2S0_8CH_RX, "clk_i2s0_8ch_rx", mux_i2s0_8ch_rx_tx_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(34), 12, 1, MFLAGS, ++ RK1808_CLKGATE_CON(18), 2, GFLAGS), ++ COMPOSITE_NODIV(SCLK_I2S0_8CH_RX_OUT, "clk_i2s0_8ch_rx_out", mux_i2s0_8ch_rx_out_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(34), 14, 2, MFLAGS, ++ RK1808_CLKGATE_CON(18), 3, GFLAGS), + -+ ret = kstrtoul(buf, 0, &val); -+ if (ret < 0) -+ return ret; ++ COMPOSITE(SCLK_I2S1_2CH_SRC, "clk_i2s1_2ch_src", mux_gpll_cpll_npll_p, 0, ++ RK1808_CLKSEL_CON(36), 8, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_CLKGATE_CON(18), 4, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_i2s1_2ch_frac", "clk_i2s1_2ch_src", CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(37), 0, ++ RK1808_CLKGATE_CON(18), 5, GFLAGS, ++ &rk1808_i2s1_2ch_fracmux), ++ GATE(SCLK_I2S1_2CH, "clk_i2s1_2ch", "clk_i2s1_2ch_mux", 0, ++ RK1808_CLKGATE_CON(18), 6, GFLAGS), ++ COMPOSITE_NODIV(SCLK_I2S1_2CH_OUT, "clk_i2s1_2ch_out", mux_i2s1_2ch_out_p, CLK_SET_RATE_PARENT, ++ RK1808_CLKSEL_CON(36), 15, 1, MFLAGS, ++ RK1808_CLKGATE_CON(18), 7, GFLAGS), + -+ tunables->io_is_busy = val; ++ /* ++ * Clock-Architecture Diagram 10 ++ */ + -+ return count; -+} ++ /* PD_BUS */ + -+show_one(hispeed_freq, "%u"); -+show_one(go_hispeed_load, "%lu"); -+show_one(min_sample_time, "%lu"); -+show_one(timer_slack, "%lu"); -+show_one(boost, "%u"); -+show_one(boostpulse_duration, "%u"); -+show_one(io_is_busy, "%u"); ++ GATE(0, "pclk_top_niu", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 0, GFLAGS), ++ GATE(0, "pclk_top_cru", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 1, GFLAGS), ++ GATE(0, "pclk_ddrphy", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 2, GFLAGS), ++ GATE(PCLK_MIPIDSIPHY, "pclk_mipidsiphy", "pclk_top_pre", 0, RK1808_CLKGATE_CON(19), 3, GFLAGS), ++ GATE(PCLK_MIPICSIPHY, "pclk_mipicsiphy", "pclk_top_pre", 0, RK1808_CLKGATE_CON(19), 4, GFLAGS), + -+gov_attr_rw(target_loads); -+gov_attr_rw(above_hispeed_delay); -+gov_attr_rw(hispeed_freq); -+gov_attr_rw(go_hispeed_load); -+gov_attr_rw(min_sample_time); -+gov_attr_rw(timer_rate); -+gov_attr_rw(timer_slack); -+gov_attr_rw(boost); -+gov_attr_wo(boostpulse); -+gov_attr_rw(boostpulse_duration); -+gov_attr_rw(io_is_busy); ++ GATE(PCLK_USB3PHY_PIPE, "pclk_usb3phy_pipe", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 6, GFLAGS), ++ GATE(0, "pclk_usb3_grf", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 7, GFLAGS), ++ GATE(0, "pclk_usb_grf", "pclk_top_pre", CLK_IGNORE_UNUSED, RK1808_CLKGATE_CON(19), 8, GFLAGS), + -+static struct attribute *interactive_attrs[] = { -+ &target_loads.attr, -+ &above_hispeed_delay.attr, -+ &hispeed_freq.attr, -+ &go_hispeed_load.attr, -+ &min_sample_time.attr, -+ &timer_rate.attr, -+ &timer_slack.attr, -+ &boost.attr, -+ &boostpulse.attr, -+ &boostpulse_duration.attr, -+ &io_is_busy.attr, -+ NULL -+}; -+ATTRIBUTE_GROUPS(interactive); ++ /* ++ * Clock-Architecture Diagram 11 ++ */ + -+static struct kobj_type interactive_tunables_ktype = { -+ .default_groups = interactive_groups, -+ .sysfs_ops = &governor_sysfs_ops, -+}; ++ /* PD_PMU */ + -+static int cpufreq_interactive_idle_notifier(struct notifier_block *nb, -+ unsigned long val, void *data) -+{ -+ if (val == IDLE_END) -+ cpufreq_interactive_idle_end(); ++ COMPOSITE_FRACMUX(SCLK_RTC32K_FRAC, "clk_rtc32k_frac", "xin24m", CLK_IGNORE_UNUSED, ++ RK1808_PMU_CLKSEL_CON(1), 0, ++ RK1808_PMU_CLKGATE_CON(0), 13, GFLAGS, ++ &rk1808_rtc32k_pmu_fracmux), + -+ return 0; -+} ++ COMPOSITE_NOMUX(XIN24M_DIV, "xin24m_div", "xin24m", CLK_IGNORE_UNUSED, ++ RK1808_PMU_CLKSEL_CON(0), 8, 5, DFLAGS, ++ RK1808_PMU_CLKGATE_CON(0), 12, GFLAGS), + -+static struct notifier_block cpufreq_interactive_idle_nb = { -+ .notifier_call = cpufreq_interactive_idle_notifier, -+}; ++ COMPOSITE_NOMUX(0, "clk_wifi_pmu_src", "ppll", 0, ++ RK1808_PMU_CLKSEL_CON(2), 8, 6, DFLAGS, ++ RK1808_PMU_CLKGATE_CON(0), 14, GFLAGS), ++ COMPOSITE_NODIV(SCLK_WIFI_PMU, "clk_wifi_pmu", mux_wifi_pmu_p, CLK_SET_RATE_PARENT, ++ RK1808_PMU_CLKSEL_CON(2), 15, 1, MFLAGS, ++ RK1808_PMU_CLKGATE_CON(0), 15, GFLAGS), + -+/* Interactive Governor callbacks */ -+struct interactive_governor { -+ struct cpufreq_governor gov; -+ unsigned int usage_count; -+}; ++ COMPOSITE(0, "clk_uart0_pmu_src", mux_gpll_usb480m_cpll_ppll_p, 0, ++ RK1808_PMU_CLKSEL_CON(3), 14, 2, MFLAGS, 0, 7, DFLAGS, ++ RK1808_PMU_CLKGATE_CON(1), 0, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(0, "clk_uart0_np5", "clk_uart0_pmu_src", 0, ++ RK1808_PMU_CLKSEL_CON(4), 0, 7, DFLAGS, ++ RK1808_PMU_CLKGATE_CON(1), 1, GFLAGS), ++ COMPOSITE_FRACMUX(0, "clk_uart0_frac", "clk_uart0_pmu_src", CLK_SET_RATE_PARENT, ++ RK1808_PMU_CLKSEL_CON(5), 0, ++ RK1808_PMU_CLKGATE_CON(1), 2, GFLAGS, ++ &rk1808_uart0_pmu_fracmux), ++ GATE(SCLK_UART0_PMU, "clk_uart0_pmu", "clk_uart0_pmu_mux", CLK_SET_RATE_PARENT, ++ RK1808_PMU_CLKGATE_CON(1), 3, GFLAGS), + -+static struct interactive_governor interactive_gov; ++ GATE(SCLK_PVTM_PMU, "clk_pvtm_pmu", "xin24m", 0, ++ RK1808_PMU_CLKGATE_CON(1), 4, GFLAGS), + -+#define CPU_FREQ_GOV_INTERACTIVE (&interactive_gov.gov) ++ COMPOSITE(SCLK_PMU_I2C0, "clk_pmu_i2c0", mux_ppll_xin24m_p, 0, ++ RK1808_PMU_CLKSEL_CON(7), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK1808_PMU_CLKGATE_CON(1), 5, GFLAGS), + -+static void irq_work(struct irq_work *irq_work) -+{ -+ struct interactive_cpu *icpu = container_of(irq_work, struct -+ interactive_cpu, irq_work); ++ COMPOSITE(DBCLK_PMU_GPIO0, "dbclk_gpio0", mux_xin24m_32k_p, 0, ++ RK1808_PMU_CLKSEL_CON(6), 15, 1, MFLAGS, 0, 11, DFLAGS, ++ RK1808_PMU_CLKGATE_CON(1), 6, GFLAGS), + -+ cpufreq_interactive_update(icpu); -+ icpu->work_in_progress = false; -+} ++ COMPOSITE_NOMUX(SCLK_REF24M_PMU, "clk_ref24m_pmu", "ppll", 0, ++ RK1808_PMU_CLKSEL_CON(2), 0, 6, DFLAGS, ++ RK1808_PMU_CLKGATE_CON(1), 8, GFLAGS), ++ COMPOSITE_NODIV(SCLK_USBPHY_REF, "clk_usbphy_ref", mux_usbphy_ref_p, CLK_SET_RATE_PARENT, ++ RK1808_PMU_CLKSEL_CON(2), 6, 1, MFLAGS, ++ RK1808_PMU_CLKGATE_CON(1), 9, GFLAGS), ++ COMPOSITE_NODIV(SCLK_MIPIDSIPHY_REF, "clk_mipidsiphy_ref", mux_mipidsiphy_ref_p, CLK_SET_RATE_PARENT, ++ RK1808_PMU_CLKSEL_CON(2), 7, 1, MFLAGS, ++ RK1808_PMU_CLKGATE_CON(1), 10, GFLAGS), + -+static void update_util_handler(struct update_util_data *data, u64 time, -+ unsigned int flags) -+{ -+ struct interactive_cpu *icpu = container_of(data, -+ struct interactive_cpu, update_util); -+ struct interactive_policy *ipolicy = icpu->ipolicy; -+ struct interactive_tunables *tunables = ipolicy->tunables; -+ u64 delta_ns; ++ FACTOR(0, "clk_ppll_ph0", "ppll", 0, 1, 2), ++ COMPOSITE_NOMUX(0, "clk_pciephy_src", "clk_ppll_ph0", 0, ++ RK1808_PMU_CLKSEL_CON(7), 0, 2, DFLAGS, ++ RK1808_PMU_CLKGATE_CON(1), 11, GFLAGS), ++ COMPOSITE_NODIV(SCLK_PCIEPHY_REF, "clk_pciephy_ref", mux_pciephy_ref_p, CLK_SET_RATE_PARENT, ++ RK1808_PMU_CLKSEL_CON(7), 4, 1, MFLAGS, ++ RK1808_PMU_CLKGATE_CON(1), 12, GFLAGS), + -+ /* -+ * The irq-work may not be allowed to be queued up right now. -+ * Possible reasons: -+ * - Work has already been queued up or is in progress. -+ * - It is too early (too little time from the previous sample). -+ */ -+ if (icpu->work_in_progress) -+ return; ++ COMPOSITE_NOMUX(PCLK_PMU_PRE, "pclk_pmu_pre", "ppll", CLK_IS_CRITICAL, ++ RK1808_PMU_CLKSEL_CON(0), 0, 5, DFLAGS, ++ RK1808_PMU_CLKGATE_CON(0), 0, GFLAGS), + -+ delta_ns = time - icpu->last_sample_time; -+ if ((s64)delta_ns < tunables->sampling_rate * NSEC_PER_USEC) -+ return; ++ GATE(0, "pclk_pmu_niu", "pclk_pmu_pre", CLK_IS_CRITICAL, RK1808_PMU_CLKGATE_CON(0), 1, GFLAGS), ++ GATE(0, "pclk_pmu_sgrf", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 2, GFLAGS), ++ GATE(0, "pclk_pmu_grf", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 3, GFLAGS), ++ GATE(0, "pclk_pmu", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 4, GFLAGS), ++ GATE(0, "pclk_pmu_mem", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 5, GFLAGS), ++ GATE(PCLK_GPIO0_PMU, "pclk_gpio0_pmu", "pclk_pmu_pre", 0, RK1808_PMU_CLKGATE_CON(0), 6, GFLAGS), ++ GATE(PCLK_UART0_PMU, "pclk_uart0_pmu", "pclk_pmu_pre", 0, RK1808_PMU_CLKGATE_CON(0), 7, GFLAGS), ++ GATE(0, "pclk_cru_pmu", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK1808_PMU_CLKGATE_CON(0), 8, GFLAGS), ++ GATE(PCLK_I2C0_PMU, "pclk_i2c0_pmu", "pclk_pmu_pre", 0, RK1808_PMU_CLKGATE_CON(0), 9, GFLAGS), + -+ icpu->last_sample_time = time; -+ icpu->next_sample_jiffies = usecs_to_jiffies(tunables->sampling_rate) + -+ jiffies; ++ MUXPMUGRF(SCLK_32K_IOE, "clk_32k_ioe", mux_clk_32k_ioe_p, 0, ++ RK1808_PMUGRF_SOC_CON0, 0, 1, MFLAGS) ++}; + -+ icpu->work_in_progress = true; -+ irq_work_queue_on(&icpu->irq_work, icpu->cpu); -+} ++static void __iomem *rk1808_cru_base; + -+static void gov_set_update_util(struct interactive_policy *ipolicy) ++void rk1808_dump_cru(void) +{ -+ struct cpufreq_policy *policy = ipolicy->policy; -+ struct interactive_cpu *icpu; -+ int cpu; -+ -+ for_each_cpu(cpu, policy->cpus) { -+ icpu = &per_cpu(interactive_cpu, cpu); -+ -+ icpu->last_sample_time = 0; -+ icpu->next_sample_jiffies = 0; -+ cpufreq_add_update_util_hook(cpu, &icpu->update_util, -+ update_util_handler); ++ if (rk1808_cru_base) { ++ pr_warn("CRU:\n"); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rk1808_cru_base, ++ 0x500, false); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rk1808_cru_base + 0x4000, ++ 0x100, false); + } +} ++EXPORT_SYMBOL_GPL(rk1808_dump_cru); + -+static inline void gov_clear_update_util(struct cpufreq_policy *policy) ++static int rk1808_clk_panic(struct notifier_block *this, ++ unsigned long ev, void *ptr) +{ -+ int i; -+ -+ for_each_cpu(i, policy->cpus) -+ cpufreq_remove_update_util_hook(i); -+ -+ synchronize_rcu(); ++ rk1808_dump_cru(); ++ return NOTIFY_DONE; +} + -+static void icpu_cancel_work(struct interactive_cpu *icpu) -+{ -+ irq_work_sync(&icpu->irq_work); -+ icpu->work_in_progress = false; -+ del_timer_sync(&icpu->slack_timer); -+} ++static struct notifier_block rk1808_clk_panic_block = { ++ .notifier_call = rk1808_clk_panic, ++}; + -+static struct interactive_policy * -+interactive_policy_alloc(struct cpufreq_policy *policy) ++static void __init rk1808_clk_init(struct device_node *np) +{ -+ struct interactive_policy *ipolicy; ++ struct rockchip_clk_provider *ctx; ++ void __iomem *reg_base; ++ struct clk **clks; + -+ ipolicy = kzalloc(sizeof(*ipolicy), GFP_KERNEL); -+ if (!ipolicy) -+ return NULL; ++ reg_base = of_iomap(np, 0); ++ if (!reg_base) { ++ pr_err("%s: could not map cru region\n", __func__); ++ return; ++ } + -+ ipolicy->policy = policy; ++ rk1808_cru_base = reg_base; + -+ return ipolicy; -+} ++ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); ++ if (IS_ERR(ctx)) { ++ pr_err("%s: rockchip clk init failed\n", __func__); ++ iounmap(reg_base); ++ return; ++ } ++ clks = ctx->clk_data.clks; + -+static void interactive_policy_free(struct interactive_policy *ipolicy) -+{ -+ kfree(ipolicy); -+} ++ rockchip_clk_register_plls(ctx, rk1808_pll_clks, ++ ARRAY_SIZE(rk1808_pll_clks), ++ RK1808_GRF_SOC_STATUS0); ++ rockchip_clk_register_branches(ctx, rk1808_clk_branches, ++ ARRAY_SIZE(rk1808_clk_branches)); + -+static struct interactive_tunables * -+interactive_tunables_alloc(struct interactive_policy *ipolicy) -+{ -+ struct interactive_tunables *tunables; ++ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", ++ 3, clks[PLL_APLL], clks[PLL_GPLL], ++ &rk1808_cpuclk_data, rk1808_cpuclk_rates, ++ ARRAY_SIZE(rk1808_cpuclk_rates)); + -+ tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); -+ if (!tunables) -+ return NULL; ++ rockchip_register_softrst(np, 16, reg_base + RK1808_SOFTRST_CON(0), ++ ROCKCHIP_SOFTRST_HIWORD_MASK); + -+ gov_attr_set_init(&tunables->attr_set, &ipolicy->tunables_hook); -+ if (!have_governor_per_policy()) -+ global_tunables = tunables; ++ rockchip_register_restart_notifier(ctx, RK1808_GLB_SRST_FST, NULL); + -+ ipolicy->tunables = tunables; ++ rockchip_clk_of_add_provider(np, ctx); + -+ return tunables; ++ atomic_notifier_chain_register(&panic_notifier_list, ++ &rk1808_clk_panic_block); +} + -+static void interactive_tunables_free(struct interactive_tunables *tunables) -+{ -+ if (!have_governor_per_policy()) -+ global_tunables = NULL; -+ -+ kfree(tunables); -+} ++CLK_OF_DECLARE(rk1808_cru, "rockchip,rk1808-cru", rk1808_clk_init); + -+#ifdef CONFIG_ARCH_ROCKCHIP -+static void cpufreq_interactive_input_event(struct input_handle *handle, -+ unsigned int type, -+ unsigned int code, -+ int value) ++static int __init clk_rk1808_probe(struct platform_device *pdev) +{ -+ u64 now, endtime; -+ int i; -+ int anyboost = 0; -+ unsigned long flags[2]; -+ struct interactive_cpu *pcpu; -+ struct interactive_tunables *tunables; -+ -+ if (type != EV_ABS && type != EV_KEY && type != EV_REL) -+ return; ++ struct device_node *np = pdev->dev.of_node; + -+ trace_cpufreq_interactive_boost("touch"); -+ spin_lock_irqsave(&speedchange_cpumask_lock, flags[0]); ++ rk1808_clk_init(np); + -+ now = ktime_to_us(ktime_get()); -+ for_each_online_cpu(i) { -+ pcpu = &per_cpu(interactive_cpu, i); -+ if (!down_read_trylock(&pcpu->enable_sem)) -+ continue; ++ return 0; ++} + -+ if (!pcpu->ipolicy) { -+ up_read(&pcpu->enable_sem); -+ continue; -+ } ++static const struct of_device_id clk_rk1808_match_table[] = { ++ { ++ .compatible = "rockchip,rk1808-cru", ++ }, ++ { } ++}; ++MODULE_DEVICE_TABLE(of, clk_rk1808_match_table); + -+ tunables = pcpu->ipolicy->tunables; -+ if (!tunables) { -+ up_read(&pcpu->enable_sem); -+ continue; -+ } ++static struct platform_driver clk_rk1808_driver = { ++ .driver = { ++ .name = "clk-rk1808", ++ .of_match_table = clk_rk1808_match_table, ++ }, ++}; ++builtin_platform_driver_probe(clk_rk1808_driver, clk_rk1808_probe); + -+ endtime = now + tunables->touchboostpulse_duration_val; -+ if (endtime < (tunables->touchboostpulse_endtime + -+ 10 * USEC_PER_MSEC)) { -+ up_read(&pcpu->enable_sem); -+ continue; -+ } -+ tunables->touchboostpulse_endtime = endtime; ++MODULE_DESCRIPTION("Rockchip RK1808 Clock Driver"); ++MODULE_LICENSE("GPL"); +diff --git a/drivers/clk/rockchip/clk-rk3528.c b/drivers/clk/rockchip/clk-rk3528.c +new file mode 100644 +index 000000000..1b14cd57b +--- /dev/null ++++ b/drivers/clk/rockchip/clk-rk3528.c +@@ -0,0 +1,1174 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (c) 2022 Rockchip Electronics Co. Ltd. ++ * Author: Joseph Chen ++ */ + -+ spin_lock_irqsave(&pcpu->target_freq_lock, flags[1]); -+ if (pcpu->target_freq < tunables->touchboost_freq) { -+ pcpu->target_freq = tunables->touchboost_freq; -+ cpumask_set_cpu(i, &speedchange_cpumask); -+ pcpu->loc_hispeed_val_time = -+ ktime_to_us(ktime_get()); -+ anyboost = 1; -+ } ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "clk.h" + -+ pcpu->floor_freq = tunables->touchboost_freq; -+ pcpu->loc_floor_val_time = ktime_to_us(ktime_get()); ++/* A placeholder for rk3066 pll type. We are rk3328 pll type */ ++#define RK3528_GRF_SOC_STATUS0 0x1a0 + -+ spin_unlock_irqrestore(&pcpu->target_freq_lock, flags[1]); ++enum rk3528_plls { ++ apll, cpll, gpll, ppll, dpll, ++}; + -+ up_read(&pcpu->enable_sem); -+ } ++/* ++ * ## PLL attention. ++ * ++ * [FRAC PLL]: GPLL, PPLL, DPLL ++ * - frac mode: refdiv can be 1 or 2 only ++ * - int mode: refdiv has no special limit ++ * - VCO range: [950, 3800] MHZ ++ * ++ * [INT PLL]: CPLL, APLL ++ * - int mode: refdiv can be 1 or 2 only ++ * - VCO range: [475, 1900] MHZ ++ * ++ * [PPLL]: normal mode only. ++ * ++ * ++ * ## CRU access attention. ++ * ++ * pclk_cru => pclk_vo_root => aclk_vo_root ++ * pclk_cru_pcie => pclk_vpu_root => aclk_vpu_root ++ * pclk_cru_ddrphy => hclk_rkvdec_root => aclk_rkvdec_root ++ */ ++static struct rockchip_pll_rate_table rk3528_pll_rates[] = { ++ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ ++ RK3036_PLL_RATE(1896000000, 1, 79, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1800000000, 1, 75, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1704000000, 1, 71, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1512000000, 1, 63, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1416000000, 1, 59, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1296000000, 1, 54, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1200000000, 1, 50, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1188000000, 1, 99, 2, 1, 1, 0), /* GPLL */ ++ RK3036_PLL_RATE(1092000000, 2, 91, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1008000000, 1, 42, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1000000000, 1, 125, 3, 1, 1, 0), /* PPLL */ ++ RK3036_PLL_RATE(996000000, 2, 83, 1, 1, 1, 0), /* CPLL */ ++ RK3036_PLL_RATE(960000000, 1, 40, 1, 1, 1, 0), ++ RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), ++ RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), ++ RK3036_PLL_RATE(600000000, 1, 50, 2, 1, 1, 0), ++ RK3036_PLL_RATE(594000000, 2, 99, 2, 1, 1, 0), ++ RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), ++ RK3036_PLL_RATE(312000000, 1, 78, 6, 1, 1, 0), ++ RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), ++ RK3036_PLL_RATE(96000000, 1, 24, 3, 2, 1, 0), ++ { /* sentinel */ }, ++}; + -+ spin_unlock_irqrestore(&speedchange_cpumask_lock, flags[0]); ++#define RK3528_DIV_ACLK_M_CORE_MASK 0x1f ++#define RK3528_DIV_ACLK_M_CORE_SHIFT 11 ++#define RK3528_DIV_PCLK_DBG_MASK 0x1f ++#define RK3528_DIV_PCLK_DBG_SHIFT 1 + -+ if (anyboost) -+ wake_up_process(speedchange_task); ++#define RK3528_CLKSEL39(_aclk_m_core) \ ++{ \ ++ .reg = RK3528_CLKSEL_CON(39), \ ++ .val = HIWORD_UPDATE(_aclk_m_core, RK3528_DIV_ACLK_M_CORE_MASK, \ ++ RK3528_DIV_ACLK_M_CORE_SHIFT), \ +} + -+static int cpufreq_interactive_input_connect(struct input_handler *handler, -+ struct input_dev *dev, -+ const struct input_device_id *id) -+{ -+ struct input_handle *handle; -+ int error; -+ -+ handle = kzalloc(sizeof(*handle), GFP_KERNEL); -+ if (!handle) -+ return -ENOMEM; -+ -+ handle->dev = dev; -+ handle->handler = handler; -+ handle->name = "cpufreq"; -+ -+ error = input_register_handle(handle); -+ if (error) -+ goto err2; -+ -+ error = input_open_device(handle); -+ if (error) -+ goto err1; -+ -+ return 0; -+err1: -+ input_unregister_handle(handle); -+err2: -+ kfree(handle); -+ return error; ++#define RK3528_CLKSEL40(_pclk_dbg) \ ++{ \ ++ .reg = RK3528_CLKSEL_CON(40), \ ++ .val = HIWORD_UPDATE(_pclk_dbg, RK3528_DIV_PCLK_DBG_MASK, \ ++ RK3528_DIV_PCLK_DBG_SHIFT), \ +} + -+static void cpufreq_interactive_input_disconnect(struct input_handle *handle) -+{ -+ input_close_device(handle); -+ input_unregister_handle(handle); -+ kfree(handle); ++/* SIGN-OFF: _aclk_m_core: 550M, _pclk_dbg: 137.5M, */ ++#define RK3528_CPUCLK_RATE(_prate, _aclk_m_core, _pclk_dbg) \ ++{ \ ++ .prate = _prate, \ ++ .divs = { \ ++ RK3528_CLKSEL39(_aclk_m_core), \ ++ RK3528_CLKSEL40(_pclk_dbg), \ ++ }, \ +} + -+static const struct input_device_id cpufreq_interactive_ids[] = { -+ { -+ .flags = INPUT_DEVICE_ID_MATCH_EVBIT | -+ INPUT_DEVICE_ID_MATCH_ABSBIT, -+ .evbit = { BIT_MASK(EV_ABS) }, -+ .absbit = { [BIT_WORD(ABS_MT_POSITION_X)] = -+ BIT_MASK(ABS_MT_POSITION_X) | -+ BIT_MASK(ABS_MT_POSITION_Y) }, -+ }, -+ { -+ .flags = INPUT_DEVICE_ID_MATCH_KEYBIT | -+ INPUT_DEVICE_ID_MATCH_ABSBIT, -+ .keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) }, -+ .absbit = { [BIT_WORD(ABS_X)] = -+ BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) }, -+ }, -+ { -+ .flags = INPUT_DEVICE_ID_MATCH_EVBIT, -+ .evbit = { BIT_MASK(EV_KEY) }, -+ }, -+ {/* A mouse like device, at least one button,two relative axes */ -+ .flags = INPUT_DEVICE_ID_MATCH_EVBIT | -+ INPUT_DEVICE_ID_MATCH_KEYBIT | -+ INPUT_DEVICE_ID_MATCH_RELBIT, -+ .evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) }, -+ .keybit = { [BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) }, -+ .relbit = { BIT_MASK(REL_X) | BIT_MASK(REL_Y) }, -+ }, -+ {/* A separate scrollwheel */ -+ .flags = INPUT_DEVICE_ID_MATCH_EVBIT | -+ INPUT_DEVICE_ID_MATCH_RELBIT, -+ .evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) }, -+ .relbit = { BIT_MASK(REL_WHEEL) }, -+ }, -+ { }, ++static struct rockchip_cpuclk_rate_table rk3528_cpuclk_rates[] __initdata = { ++ /* APLL(CPU) rate <= 1900M, due to APLL VCO limit */ ++ RK3528_CPUCLK_RATE(1896000000, 1, 13), ++ RK3528_CPUCLK_RATE(1800000000, 1, 12), ++ RK3528_CPUCLK_RATE(1704000000, 1, 11), ++ RK3528_CPUCLK_RATE(1608000000, 1, 11), ++ RK3528_CPUCLK_RATE(1512000000, 1, 11), ++ RK3528_CPUCLK_RATE(1416000000, 1, 9), ++ RK3528_CPUCLK_RATE(1296000000, 1, 8), ++ RK3528_CPUCLK_RATE(1200000000, 1, 8), ++ RK3528_CPUCLK_RATE(1188000000, 1, 8), ++ RK3528_CPUCLK_RATE(1092000000, 1, 7), ++ RK3528_CPUCLK_RATE(1008000000, 1, 6), ++ RK3528_CPUCLK_RATE(1000000000, 1, 6), ++ RK3528_CPUCLK_RATE(996000000, 1, 6), ++ RK3528_CPUCLK_RATE(960000000, 1, 6), ++ RK3528_CPUCLK_RATE(912000000, 1, 6), ++ RK3528_CPUCLK_RATE(816000000, 1, 5), ++ RK3528_CPUCLK_RATE(600000000, 1, 3), ++ RK3528_CPUCLK_RATE(594000000, 1, 3), ++ RK3528_CPUCLK_RATE(408000000, 1, 2), ++ RK3528_CPUCLK_RATE(312000000, 1, 2), ++ RK3528_CPUCLK_RATE(216000000, 1, 1), ++ RK3528_CPUCLK_RATE(96000000, 1, 0), +}; + -+static struct input_handler cpufreq_interactive_input_handler = { -+ .event = cpufreq_interactive_input_event, -+ .connect = cpufreq_interactive_input_connect, -+ .disconnect = cpufreq_interactive_input_disconnect, -+ .name = "cpufreq_interactive", -+ .id_table = cpufreq_interactive_ids, ++static const struct rockchip_cpuclk_reg_data rk3528_cpuclk_data = { ++ .core_reg[0] = RK3528_CLKSEL_CON(39), ++ .div_core_shift[0] = 5, ++ .div_core_mask[0] = 0x1f, ++ .num_cores = 1, ++ .mux_core_alt = 1, ++ .mux_core_main = 0, ++ .mux_core_shift = 10, ++ .mux_core_mask = 0x1, +}; + -+static void rockchip_cpufreq_policy_init(struct interactive_policy *ipolicy) -+{ -+ struct interactive_tunables *tunables = ipolicy->tunables; -+ struct gov_attr_set attr_set; -+ int index; -+ -+ tunables->min_sample_time = 40 * USEC_PER_MSEC; -+ tunables->boostpulse_duration = 40 * USEC_PER_MSEC; -+ if (ipolicy->policy->cpu == 0) { -+ tunables->hispeed_freq = 1008000; -+ tunables->touchboostpulse_duration_val = 500 * USEC_PER_MSEC; -+ tunables->touchboost_freq = 1200000; -+ } else { -+ tunables->hispeed_freq = 816000; -+ } ++PNAME(mux_pll_p) = { "xin24m" }; ++PNAME(mux_24m_32k_p) = { "xin24m", "clk_32k" }; ++PNAME(mux_gpll_cpll_p) = { "gpll", "cpll" }; ++PNAME(mux_gpll_cpll_xin24m_p) = { "gpll", "cpll", "xin24m" }; ++PNAME(mux_100m_50m_24m_p) = { "clk_100m_src", "clk_50m_src", "xin24m" }; ++PNAME(mux_150m_100m_24m_p) = { "clk_150m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_200m_100m_24m_p) = { "clk_200m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_200m_100m_50m_24m_p) = { "clk_200m_src", "clk_100m_src", "clk_50m_src", "xin24m" }; ++PNAME(mux_300m_200m_100m_24m_p) = { "clk_300m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_339m_200m_100m_24m_p) = { "clk_339m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_500m_200m_100m_24m_p) = { "clk_500m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_500m_300m_100m_24m_p) = { "clk_500m_src", "clk_300m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_600m_300m_200m_24m_p) = { "clk_600m_src", "clk_300m_src", "clk_200m_src", "xin24m" }; ++PNAME(aclk_gpu_p) = { "aclk_gpu_root", "clk_gpu_pvtpll_src" }; ++PNAME(aclk_rkvdec_pvtmux_root_p) = { "aclk_rkvdec_root", "clk_rkvdec_pvtpll_src" }; ++PNAME(clk_i2c2_p) = { "clk_200m_src", "clk_100m_src", "xin24m", "clk_32k" }; ++PNAME(clk_ref_pcie_inner_phy_p) = { "clk_ppll_100m_src", "xin24m" }; ++PNAME(dclk_vop0_p) = { "dclk_vop_src0", "clk_hdmiphy_pixel_io" }; ++PNAME(mclk_i2s0_2ch_sai_src_p) = { "clk_i2s0_2ch_src", "clk_i2s0_2ch_frac", "xin12m" }; ++PNAME(mclk_i2s1_8ch_sai_src_p) = { "clk_i2s1_8ch_src", "clk_i2s1_8ch_frac", "xin12m" }; ++PNAME(mclk_i2s2_2ch_sai_src_p) = { "clk_i2s2_2ch_src", "clk_i2s2_2ch_frac", "xin12m" }; ++PNAME(mclk_i2s3_8ch_sai_src_p) = { "clk_i2s3_8ch_src", "clk_i2s3_8ch_frac", "xin12m" }; ++PNAME(mclk_sai_i2s0_p) = { "mclk_i2s0_2ch_sai_src", "i2s0_mclkin" }; ++PNAME(mclk_sai_i2s1_p) = { "mclk_i2s1_8ch_sai_src", "i2s1_mclkin" }; ++PNAME(mclk_spdif_src_p) = { "clk_spdif_src", "clk_spdif_frac", "xin12m" }; ++PNAME(sclk_uart0_src_p) = { "clk_uart0_src", "clk_uart0_frac", "xin24m" }; ++PNAME(sclk_uart1_src_p) = { "clk_uart1_src", "clk_uart1_frac", "xin24m" }; ++PNAME(sclk_uart2_src_p) = { "clk_uart2_src", "clk_uart2_frac", "xin24m" }; ++PNAME(sclk_uart3_src_p) = { "clk_uart3_src", "clk_uart3_frac", "xin24m" }; ++PNAME(sclk_uart4_src_p) = { "clk_uart4_src", "clk_uart4_frac", "xin24m" }; ++PNAME(sclk_uart5_src_p) = { "clk_uart5_src", "clk_uart5_frac", "xin24m" }; ++PNAME(sclk_uart6_src_p) = { "clk_uart6_src", "clk_uart6_frac", "xin24m" }; ++PNAME(sclk_uart7_src_p) = { "clk_uart7_src", "clk_uart7_frac", "xin24m" }; ++PNAME(clk_32k_p) = { "xin_osc0_div", "clk_pvtm_32k" }; + -+ index = (ipolicy->policy->cpu == 0) ? 0 : 1; -+ if (!backup_tunables[index].sampling_rate) { -+ backup_tunables[index] = *tunables; -+ } else { -+ attr_set = tunables->attr_set; -+ *tunables = backup_tunables[index]; -+ tunables->attr_set = attr_set; -+ } -+} -+#endif ++/* Pass 0 to PLL() '_lshift' as a placeholder for rk3066 pll type. We are rk3328 pll type */ ++static struct rockchip_pll_clock rk3528_pll_clks[] __initdata = { ++ [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, ++ CLK_IS_CRITICAL, RK3528_PLL_CON(0), ++ RK3528_MODE_CON, 0, 0, 0, rk3528_pll_rates), + -+int cpufreq_interactive_init(struct cpufreq_policy *policy) -+{ -+ struct interactive_policy *ipolicy; -+ struct interactive_tunables *tunables; -+ int ret; ++ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, ++ CLK_IS_CRITICAL, RK3528_PLL_CON(8), ++ RK3528_MODE_CON, 2, 0, 0, rk3528_pll_rates), + -+ /* State should be equivalent to EXIT */ -+ if (policy->governor_data) -+ return -EBUSY; ++ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, ++ CLK_IS_CRITICAL, RK3528_PLL_CON(24), ++ RK3528_MODE_CON, 4, 0, 0, rk3528_pll_rates), + -+ ipolicy = interactive_policy_alloc(policy); -+ if (!ipolicy) -+ return -ENOMEM; ++ [ppll] = PLL(pll_rk3328, PLL_PPLL, "ppll", mux_pll_p, ++ CLK_IS_CRITICAL, RK3528_PCIE_PLL_CON(32), ++ RK3528_MODE_CON, 6, 0, ++ ROCKCHIP_PLL_FIXED_MODE, rk3528_pll_rates), + -+ mutex_lock(&global_tunables_lock); ++ [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p, ++ CLK_IGNORE_UNUSED, RK3528_DDRPHY_PLL_CON(16), ++ RK3528_DDRPHY_MODE_CON, 0, 0, 0, rk3528_pll_rates), ++}; + -+ if (global_tunables) { -+ if (WARN_ON(have_governor_per_policy())) { -+ ret = -EINVAL; -+ goto free_int_policy; -+ } ++#define MFLAGS CLK_MUX_HIWORD_MASK ++#define DFLAGS CLK_DIVIDER_HIWORD_MASK ++#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) + -+ policy->governor_data = ipolicy; -+ ipolicy->tunables = global_tunables; ++static struct rockchip_clk_branch rk3528_uart0_fracmux __initdata = ++ MUX(CLK_UART0, "clk_uart0", sclk_uart0_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(6), 0, 2, MFLAGS); + -+ gov_attr_set_get(&global_tunables->attr_set, -+ &ipolicy->tunables_hook); -+ goto out; -+ } ++static struct rockchip_clk_branch rk3528_uart1_fracmux __initdata = ++ MUX(CLK_UART1, "clk_uart1", sclk_uart1_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(8), 0, 2, MFLAGS); + -+ tunables = interactive_tunables_alloc(ipolicy); -+ if (!tunables) { -+ ret = -ENOMEM; -+ goto free_int_policy; -+ } ++static struct rockchip_clk_branch rk3528_uart2_fracmux __initdata = ++ MUX(CLK_UART2, "clk_uart2", sclk_uart2_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(10), 0, 2, MFLAGS); + -+ tunables->hispeed_freq = policy->max; -+ tunables->above_hispeed_delay = default_above_hispeed_delay; -+ tunables->nabove_hispeed_delay = -+ ARRAY_SIZE(default_above_hispeed_delay); -+ tunables->go_hispeed_load = DEFAULT_GO_HISPEED_LOAD; -+ tunables->target_loads = default_target_loads; -+ tunables->ntarget_loads = ARRAY_SIZE(default_target_loads); -+ tunables->min_sample_time = DEFAULT_MIN_SAMPLE_TIME; -+ tunables->boostpulse_duration = DEFAULT_MIN_SAMPLE_TIME; -+ tunables->sampling_rate = DEFAULT_SAMPLING_RATE; -+ tunables->timer_slack = DEFAULT_TIMER_SLACK; -+ update_slack_delay(tunables); ++static struct rockchip_clk_branch rk3528_uart3_fracmux __initdata = ++ MUX(CLK_UART3, "clk_uart3", sclk_uart3_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(12), 0, 2, MFLAGS); + -+ spin_lock_init(&tunables->target_loads_lock); -+ spin_lock_init(&tunables->above_hispeed_delay_lock); ++static struct rockchip_clk_branch rk3528_uart4_fracmux __initdata = ++ MUX(CLK_UART4, "clk_uart4", sclk_uart4_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(14), 0, 2, MFLAGS); + -+ policy->governor_data = ipolicy; ++static struct rockchip_clk_branch rk3528_uart5_fracmux __initdata = ++ MUX(CLK_UART5, "clk_uart5", sclk_uart5_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(16), 0, 2, MFLAGS); + -+#ifdef CONFIG_ARCH_ROCKCHIP -+ rockchip_cpufreq_policy_init(ipolicy); -+#endif -+ ret = kobject_init_and_add(&tunables->attr_set.kobj, -+ &interactive_tunables_ktype, -+ get_governor_parent_kobj(policy), "%s", -+ interactive_gov.gov.name); -+ if (ret) -+ goto fail; ++static struct rockchip_clk_branch rk3528_uart6_fracmux __initdata = ++ MUX(CLK_UART6, "clk_uart6", sclk_uart6_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(18), 0, 2, MFLAGS); + -+ /* One time initialization for governor */ -+ if (!interactive_gov.usage_count++) { -+ idle_notifier_register(&cpufreq_interactive_idle_nb); -+ cpufreq_register_notifier(&cpufreq_notifier_block, -+ CPUFREQ_TRANSITION_NOTIFIER); -+#ifdef CONFIG_ARCH_ROCKCHIP -+ ret = input_register_handler(&cpufreq_interactive_input_handler); -+#endif -+ } ++static struct rockchip_clk_branch rk3528_uart7_fracmux __initdata = ++ MUX(CLK_UART7, "clk_uart7", sclk_uart7_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(20), 0, 2, MFLAGS); + -+ out: -+ mutex_unlock(&global_tunables_lock); -+ return 0; ++static struct rockchip_clk_branch mclk_i2s0_2ch_sai_src_fracmux __initdata = ++ MUX(MCLK_I2S0_2CH_SAI_SRC_PRE, "mclk_i2s0_2ch_sai_src_pre", mclk_i2s0_2ch_sai_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(22), 0, 2, MFLAGS); + -+ fail: -+ policy->governor_data = NULL; -+ interactive_tunables_free(tunables); ++static struct rockchip_clk_branch mclk_i2s1_8ch_sai_src_fracmux __initdata = ++ MUX(MCLK_I2S1_8CH_SAI_SRC_PRE, "mclk_i2s1_8ch_sai_src_pre", mclk_i2s1_8ch_sai_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(26), 0, 2, MFLAGS); + -+ free_int_policy: -+ mutex_unlock(&global_tunables_lock); ++static struct rockchip_clk_branch mclk_i2s2_2ch_sai_src_fracmux __initdata = ++ MUX(MCLK_I2S2_2CH_SAI_SRC_PRE, "mclk_i2s2_2ch_sai_src_pre", mclk_i2s2_2ch_sai_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(28), 0, 2, MFLAGS); + -+ interactive_policy_free(ipolicy); -+ pr_err("governor initialization failed (%d)\n", ret); ++static struct rockchip_clk_branch mclk_i2s3_8ch_sai_src_fracmux __initdata = ++ MUX(MCLK_I2S3_8CH_SAI_SRC_PRE, "mclk_i2s3_8ch_sai_src_pre", mclk_i2s3_8ch_sai_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(24), 0, 2, MFLAGS); + -+ return ret; -+} ++static struct rockchip_clk_branch mclk_spdif_src_fracmux __initdata = ++ MUX(MCLK_SDPDIF_SRC_PRE, "mclk_spdif_src_pre", mclk_spdif_src_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(32), 0, 2, MFLAGS); + -+void cpufreq_interactive_exit(struct cpufreq_policy *policy) -+{ -+ struct interactive_policy *ipolicy = policy->governor_data; -+ struct interactive_tunables *tunables = ipolicy->tunables; -+ unsigned int count; ++/* ++ * CRU Clock-Architecture ++ */ ++static struct rockchip_clk_branch rk3528_clk_branches[] __initdata = { ++ /* top */ ++ FACTOR(0, "xin12m", "xin24m", 0, 1, 2), + -+ mutex_lock(&global_tunables_lock); ++ COMPOSITE(CLK_MATRIX_250M_SRC, "clk_250m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(1), 15, 1, MFLAGS, 10, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 5, GFLAGS), ++ COMPOSITE(CLK_MATRIX_500M_SRC, "clk_500m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(3), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 10, GFLAGS), ++ COMPOSITE_NOMUX(CLK_MATRIX_50M_SRC, "clk_50m_src", "cpll", CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(0), 2, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NOMUX(CLK_MATRIX_100M_SRC, "clk_100m_src", "cpll", CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(0), 7, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE_NOMUX(CLK_MATRIX_150M_SRC, "clk_150m_src", "gpll", CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(1), 0, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 3, GFLAGS), ++ COMPOSITE_NOMUX(CLK_MATRIX_200M_SRC, "clk_200m_src", "gpll", CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(1), 5, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 4, GFLAGS), ++ COMPOSITE_NOMUX(CLK_MATRIX_300M_SRC, "clk_300m_src", "gpll", CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(2), 0, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 6, GFLAGS), ++ COMPOSITE_NOMUX_HALFDIV(CLK_MATRIX_339M_SRC, "clk_339m_src", "gpll", CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(2), 5, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 7, GFLAGS), ++ COMPOSITE_NOMUX(CLK_MATRIX_400M_SRC, "clk_400m_src", "gpll", CLK_IGNORE_UNUSED, ++ RK3528_CLKSEL_CON(2), 10, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 8, GFLAGS), ++ COMPOSITE_NOMUX(CLK_MATRIX_600M_SRC, "clk_600m_src", "gpll", CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(4), 0, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 11, GFLAGS), ++ COMPOSITE(DCLK_VOP_SRC0, "dclk_vop_src0", mux_gpll_cpll_p, 0, ++ RK3528_CLKSEL_CON(32), 10, 1, MFLAGS, 2, 8, DFLAGS, ++ RK3528_CLKGATE_CON(3), 7, GFLAGS), ++ COMPOSITE(DCLK_VOP_SRC1, "dclk_vop_src1", mux_gpll_cpll_p, 0, ++ RK3528_CLKSEL_CON(33), 8, 1, MFLAGS, 0, 8, DFLAGS, ++ RK3528_CLKGATE_CON(3), 8, GFLAGS), ++ COMPOSITE_NOMUX(CLK_HSM, "clk_hsm", "xin24m", 0, ++ RK3528_CLKSEL_CON(36), 5, 5, DFLAGS, ++ RK3528_CLKGATE_CON(3), 13, GFLAGS), + -+ /* Last policy using the governor ? */ -+ if (!--interactive_gov.usage_count) { -+ cpufreq_unregister_notifier(&cpufreq_notifier_block, -+ CPUFREQ_TRANSITION_NOTIFIER); -+ idle_notifier_unregister(&cpufreq_interactive_idle_nb); -+#ifdef CONFIG_ARCH_ROCKCHIP -+ input_unregister_handler(&cpufreq_interactive_input_handler); -+#endif -+ } ++ COMPOSITE_NOMUX(CLK_UART0_SRC, "clk_uart0_src", "gpll", 0, ++ RK3528_CLKSEL_CON(4), 5, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 12, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART0_FRAC, "clk_uart0_frac", "clk_uart0_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(5), 0, ++ RK3528_CLKGATE_CON(0), 13, GFLAGS, &rk3528_uart0_fracmux), ++ GATE(SCLK_UART0, "sclk_uart0", "clk_uart0", 0, ++ RK3528_CLKGATE_CON(0), 14, GFLAGS), + -+ count = gov_attr_set_put(&tunables->attr_set, &ipolicy->tunables_hook); -+ policy->governor_data = NULL; -+ if (!count) { -+#ifdef CONFIG_ARCH_ROCKCHIP -+ if (policy->cpu == 0) -+ backup_tunables[0] = *tunables; -+ else -+ backup_tunables[1] = *tunables; -+#endif -+ interactive_tunables_free(tunables); -+ } ++ COMPOSITE_NOMUX(CLK_UART1_SRC, "clk_uart1_src", "gpll", 0, ++ RK3528_CLKSEL_CON(6), 2, 5, DFLAGS, ++ RK3528_CLKGATE_CON(0), 15, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART1_FRAC, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(7), 0, ++ RK3528_CLKGATE_CON(1), 0, GFLAGS, &rk3528_uart1_fracmux), ++ GATE(SCLK_UART1, "sclk_uart1", "clk_uart1", 0, ++ RK3528_CLKGATE_CON(1), 1, GFLAGS), + -+ mutex_unlock(&global_tunables_lock); ++ COMPOSITE_NOMUX(CLK_UART2_SRC, "clk_uart2_src", "gpll", 0, ++ RK3528_CLKSEL_CON(8), 2, 5, DFLAGS, ++ RK3528_CLKGATE_CON(1), 2, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART2_FRAC, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(9), 0, ++ RK3528_CLKGATE_CON(1), 3, GFLAGS, &rk3528_uart2_fracmux), ++ GATE(SCLK_UART2, "sclk_uart2", "clk_uart2", 0, ++ RK3528_CLKGATE_CON(1), 4, GFLAGS), + -+ interactive_policy_free(ipolicy); -+} ++ COMPOSITE_NOMUX(CLK_UART3_SRC, "clk_uart3_src", "gpll", 0, ++ RK3528_CLKSEL_CON(10), 2, 5, DFLAGS, ++ RK3528_CLKGATE_CON(1), 5, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART3_FRAC, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(11), 0, ++ RK3528_CLKGATE_CON(1), 6, GFLAGS, &rk3528_uart3_fracmux), ++ GATE(SCLK_UART3, "sclk_uart3", "clk_uart3", 0, ++ RK3528_CLKGATE_CON(1), 7, GFLAGS), + -+int cpufreq_interactive_start(struct cpufreq_policy *policy) -+{ -+ struct interactive_policy *ipolicy = policy->governor_data; -+ struct interactive_cpu *icpu; -+ unsigned int cpu; -+ -+ for_each_cpu(cpu, policy->cpus) { -+ icpu = &per_cpu(interactive_cpu, cpu); -+ -+ icpu->target_freq = policy->cur; -+ icpu->floor_freq = icpu->target_freq; -+ icpu->pol_floor_val_time = ktime_to_us(ktime_get()); -+ icpu->loc_floor_val_time = icpu->pol_floor_val_time; -+ icpu->pol_hispeed_val_time = icpu->pol_floor_val_time; -+ icpu->loc_hispeed_val_time = icpu->pol_floor_val_time; -+ icpu->cpu = cpu; -+ -+ down_write(&icpu->enable_sem); -+ icpu->ipolicy = ipolicy; -+ slack_timer_resched(icpu, cpu, false); -+ up_write(&icpu->enable_sem); -+ } -+ -+ gov_set_update_util(ipolicy); -+ return 0; -+} -+ -+void cpufreq_interactive_stop(struct cpufreq_policy *policy) -+{ -+ struct interactive_policy *ipolicy = policy->governor_data; -+ struct interactive_cpu *icpu; -+ unsigned int cpu; -+ -+ gov_clear_update_util(ipolicy->policy); -+ -+ for_each_cpu(cpu, policy->cpus) { -+ icpu = &per_cpu(interactive_cpu, cpu); ++ COMPOSITE_NOMUX(CLK_UART4_SRC, "clk_uart4_src", "gpll", 0, ++ RK3528_CLKSEL_CON(12), 2, 5, DFLAGS, ++ RK3528_CLKGATE_CON(1), 8, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART4_FRAC, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(13), 0, ++ RK3528_CLKGATE_CON(1), 9, GFLAGS, &rk3528_uart4_fracmux), ++ GATE(SCLK_UART4, "sclk_uart4", "clk_uart4", 0, ++ RK3528_CLKGATE_CON(1), 10, GFLAGS), + -+ down_write(&icpu->enable_sem); -+ icpu_cancel_work(icpu); -+ icpu->ipolicy = NULL; -+ up_write(&icpu->enable_sem); -+ } -+} ++ COMPOSITE_NOMUX(CLK_UART5_SRC, "clk_uart5_src", "gpll", 0, ++ RK3528_CLKSEL_CON(14), 2, 5, DFLAGS, ++ RK3528_CLKGATE_CON(1), 11, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART5_FRAC, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(15), 0, ++ RK3528_CLKGATE_CON(1), 12, GFLAGS, &rk3528_uart5_fracmux), ++ GATE(SCLK_UART5, "sclk_uart5", "clk_uart5", 0, ++ RK3528_CLKGATE_CON(1), 13, GFLAGS), + -+void cpufreq_interactive_limits(struct cpufreq_policy *policy) -+{ -+ struct interactive_cpu *icpu; -+ unsigned int cpu; -+ unsigned long flags; ++ COMPOSITE_NOMUX(CLK_UART6_SRC, "clk_uart6_src", "gpll", 0, ++ RK3528_CLKSEL_CON(16), 2, 5, DFLAGS, ++ RK3528_CLKGATE_CON(1), 14, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART6_FRAC, "clk_uart6_frac", "clk_uart6_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(17), 0, ++ RK3528_CLKGATE_CON(1), 15, GFLAGS, &rk3528_uart6_fracmux), ++ GATE(SCLK_UART6, "sclk_uart6", "clk_uart6", 0, ++ RK3528_CLKGATE_CON(2), 0, GFLAGS), + -+ cpufreq_policy_apply_limits(policy); ++ COMPOSITE_NOMUX(CLK_UART7_SRC, "clk_uart7_src", "gpll", 0, ++ RK3528_CLKSEL_CON(18), 2, 5, DFLAGS, ++ RK3528_CLKGATE_CON(2), 1, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART7_FRAC, "clk_uart7_frac", "clk_uart7_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(19), 0, ++ RK3528_CLKGATE_CON(2), 2, GFLAGS, &rk3528_uart7_fracmux), ++ GATE(SCLK_UART7, "sclk_uart7", "clk_uart7", 0, ++ RK3528_CLKGATE_CON(2), 3, GFLAGS), + -+ for_each_cpu(cpu, policy->cpus) { -+ icpu = &per_cpu(interactive_cpu, cpu); ++ COMPOSITE_NOMUX(CLK_I2S0_2CH_SRC, "clk_i2s0_2ch_src", "gpll", 0, ++ RK3528_CLKSEL_CON(20), 8, 5, DFLAGS, ++ RK3528_CLKGATE_CON(2), 5, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S0_2CH_FRAC, "clk_i2s0_2ch_frac", "clk_i2s0_2ch_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(21), 0, ++ RK3528_CLKGATE_CON(2), 6, GFLAGS, &mclk_i2s0_2ch_sai_src_fracmux), ++ GATE(MCLK_I2S0_2CH_SAI_SRC, "mclk_i2s0_2ch_sai_src", "mclk_i2s0_2ch_sai_src_pre", 0, ++ RK3528_CLKGATE_CON(2), 7, GFLAGS), + -+ spin_lock_irqsave(&icpu->target_freq_lock, flags); ++ COMPOSITE_NOMUX(CLK_I2S1_8CH_SRC, "clk_i2s1_8ch_src", "gpll", 0, ++ RK3528_CLKSEL_CON(24), 3, 5, DFLAGS, ++ RK3528_CLKGATE_CON(2), 11, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S1_8CH_FRAC, "clk_i2s1_8ch_frac", "clk_i2s1_8ch_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(25), 0, ++ RK3528_CLKGATE_CON(2), 12, GFLAGS, &mclk_i2s1_8ch_sai_src_fracmux), ++ GATE(MCLK_I2S1_8CH_SAI_SRC, "mclk_i2s1_8ch_sai_src", "mclk_i2s1_8ch_sai_src_pre", 0, ++ RK3528_CLKGATE_CON(2), 13, GFLAGS), + -+ if (policy->max < icpu->target_freq) -+ icpu->target_freq = policy->max; -+ else if (policy->min > icpu->target_freq) -+ icpu->target_freq = policy->min; ++ COMPOSITE_NOMUX(CLK_I2S2_2CH_SRC, "clk_i2s2_2ch_src", "gpll", 0, ++ RK3528_CLKSEL_CON(26), 3, 5, DFLAGS, ++ RK3528_CLKGATE_CON(2), 14, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S2_2CH_FRAC, "clk_i2s2_2ch_frac", "clk_i2s2_2ch_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(27), 0, ++ RK3528_CLKGATE_CON(2), 15, GFLAGS, &mclk_i2s2_2ch_sai_src_fracmux), ++ GATE(MCLK_I2S2_2CH_SAI_SRC, "mclk_i2s2_2ch_sai_src", "mclk_i2s2_2ch_sai_src_pre", 0, ++ RK3528_CLKGATE_CON(3), 0, GFLAGS), + -+ spin_unlock_irqrestore(&icpu->target_freq_lock, flags); -+ } -+} ++ COMPOSITE_NOMUX(CLK_I2S3_8CH_SRC, "clk_i2s3_8ch_src", "gpll", 0, ++ RK3528_CLKSEL_CON(22), 3, 5, DFLAGS, ++ RK3528_CLKGATE_CON(2), 8, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S3_8CH_FRAC, "clk_i2s3_8ch_frac", "clk_i2s3_8ch_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(23), 0, ++ RK3528_CLKGATE_CON(2), 9, GFLAGS, &mclk_i2s3_8ch_sai_src_fracmux), ++ GATE(MCLK_I2S3_8CH_SAI_SRC, "mclk_i2s3_8ch_sai_src", "mclk_i2s3_8ch_sai_src_pre", 0, ++ RK3528_CLKGATE_CON(2), 10, GFLAGS), + -+static struct interactive_governor interactive_gov = { -+ .gov = { -+ .name = "interactive", -+ .owner = THIS_MODULE, -+ .init = cpufreq_interactive_init, -+ .exit = cpufreq_interactive_exit, -+ .start = cpufreq_interactive_start, -+ .stop = cpufreq_interactive_stop, -+ .limits = cpufreq_interactive_limits, -+ } -+}; ++ COMPOSITE_NOMUX(CLK_SPDIF_SRC, "clk_spdif_src", "gpll", 0, ++ RK3528_CLKSEL_CON(30), 2, 5, DFLAGS, ++ RK3528_CLKGATE_CON(3), 4, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_SPDIF_FRAC, "clk_spdif_frac", "clk_spdif_src", CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(31), 0, ++ RK3528_CLKGATE_CON(3), 5, GFLAGS, &mclk_spdif_src_fracmux), ++ GATE(MCLK_SPDIF_SRC, "mclk_spdif_src", "mclk_spdif_src_pre", 0, ++ RK3528_CLKGATE_CON(3), 6, GFLAGS), + -+static void cpufreq_interactive_nop_timer(struct timer_list *t) -+{ -+ /* -+ * The purpose of slack-timer is to wake up the CPU from IDLE, in order -+ * to decrease its frequency if it is not set to minimum already. -+ * -+ * This is important for platforms where CPU with higher frequencies -+ * consume higher power even at IDLE. -+ */ -+} ++ /* bus */ ++ COMPOSITE_NODIV(ACLK_BUS_M_ROOT, "aclk_bus_m_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(43), 12, 2, MFLAGS, ++ RK3528_CLKGATE_CON(8), 7, GFLAGS), ++ GATE(ACLK_GIC, "aclk_gic", "aclk_bus_m_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(9), 1, GFLAGS), + -+static int __init cpufreq_interactive_gov_init(void) -+{ -+ struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; -+ struct interactive_cpu *icpu; -+ unsigned int cpu; ++ COMPOSITE_NODIV(ACLK_BUS_ROOT, "aclk_bus_root", mux_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(43), 6, 2, MFLAGS, ++ RK3528_CLKGATE_CON(8), 4, GFLAGS), ++ GATE(ACLK_SPINLOCK, "aclk_spinlock", "aclk_bus_root", 0, ++ RK3528_CLKGATE_CON(9), 2, GFLAGS), ++ GATE(ACLK_DMAC, "aclk_dmac", "aclk_bus_root", 0, ++ RK3528_CLKGATE_CON(9), 4, GFLAGS), ++ GATE(ACLK_DCF, "aclk_dcf", "aclk_bus_root", 0, ++ RK3528_CLKGATE_CON(11), 11, GFLAGS), ++ COMPOSITE(ACLK_BUS_VOPGL_ROOT, "aclk_bus_vopgl_root", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(43), 3, 1, MFLAGS, 0, 3, DFLAGS, ++ RK3528_CLKGATE_CON(8), 0, GFLAGS), ++ COMPOSITE_NODIV(ACLK_BUS_H_ROOT, "aclk_bus_h_root", mux_500m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(43), 4, 2, MFLAGS, ++ RK3528_CLKGATE_CON(8), 2, GFLAGS), ++ GATE(ACLK_DMA2DDR, "aclk_dma2ddr", "aclk_bus_h_root", 0, ++ RK3528_CLKGATE_CON(10), 14, GFLAGS), + -+ for_each_possible_cpu(cpu) { -+ icpu = &per_cpu(interactive_cpu, cpu); ++ COMPOSITE_NODIV(HCLK_BUS_ROOT, "hclk_bus_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(43), 8, 2, MFLAGS, ++ RK3528_CLKGATE_CON(8), 5, GFLAGS), + -+ init_irq_work(&icpu->irq_work, irq_work); -+ spin_lock_init(&icpu->load_lock); -+ spin_lock_init(&icpu->target_freq_lock); -+ init_rwsem(&icpu->enable_sem); ++ COMPOSITE_NODIV(PCLK_BUS_ROOT, "pclk_bus_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(43), 10, 2, MFLAGS, ++ RK3528_CLKGATE_CON(8), 6, GFLAGS), ++ GATE(PCLK_DFT2APB, "pclk_dft2apb", "pclk_bus_root", 0, ++ RK3528_CLKGATE_CON(8), 13, GFLAGS), ++ GATE(PCLK_BUS_GRF, "pclk_bus_grf", "pclk_bus_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(8), 15, GFLAGS), ++ GATE(PCLK_TIMER, "pclk_timer", "pclk_bus_root", 0, ++ RK3528_CLKGATE_CON(9), 5, GFLAGS), ++ GATE(PCLK_JDBCK_DAP, "pclk_jdbck_dap", "pclk_bus_root", 0, ++ RK3528_CLKGATE_CON(9), 12, GFLAGS), ++ GATE(PCLK_WDT_NS, "pclk_wdt_ns", "pclk_bus_root", 0, ++ RK3528_CLKGATE_CON(9), 15, GFLAGS), ++ GATE(PCLK_UART0, "pclk_uart0", "pclk_bus_root", 0, ++ RK3528_CLKGATE_CON(10), 7, GFLAGS), ++ GATE(PCLK_PWM0, "pclk_pwm0", "pclk_bus_root", 0, ++ RK3528_CLKGATE_CON(11), 4, GFLAGS), ++ GATE(PCLK_PWM1, "pclk_pwm1", "pclk_bus_root", 0, ++ RK3528_CLKGATE_CON(11), 7, GFLAGS), ++ GATE(PCLK_DMA2DDR, "pclk_dma2ddr", "pclk_bus_root", 0, ++ RK3528_CLKGATE_CON(10), 13, GFLAGS), ++ GATE(PCLK_SCR, "pclk_scr", "pclk_bus_root", 0, ++ RK3528_CLKGATE_CON(11), 10, GFLAGS), ++ GATE(PCLK_INTMUX, "pclk_intmux", "pclk_bus_root", CLK_IGNORE_UNUSED, ++ RK3528_CLKGATE_CON(11), 12, GFLAGS), + -+ /* Initialize per-cpu slack-timer */ -+ timer_setup(&icpu->slack_timer, cpufreq_interactive_nop_timer, -+ TIMER_PINNED); -+ } ++ COMPOSITE_NODIV(CLK_PWM0, "clk_pwm0", mux_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(44), 6, 2, MFLAGS, ++ RK3528_CLKGATE_CON(11), 5, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM1, "clk_pwm1", mux_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(44), 8, 2, MFLAGS, ++ RK3528_CLKGATE_CON(11), 8, GFLAGS), + -+ spin_lock_init(&speedchange_cpumask_lock); -+ speedchange_task = kthread_create(cpufreq_interactive_speedchange_task, -+ NULL, "cfinteractive"); -+ if (IS_ERR(speedchange_task)) -+ return PTR_ERR(speedchange_task); ++ GATE(CLK_CAPTURE_PWM1, "clk_capture_pwm1", "xin24m", 0, ++ RK3528_CLKGATE_CON(11), 9, GFLAGS), ++ GATE(CLK_CAPTURE_PWM0, "clk_capture_pwm0", "xin24m", 0, ++ RK3528_CLKGATE_CON(11), 6, GFLAGS), ++ GATE(CLK_JDBCK_DAP, "clk_jdbck_dap", "xin24m", 0, ++ RK3528_CLKGATE_CON(9), 13, GFLAGS), ++ GATE(TCLK_WDT_NS, "tclk_wdt_ns", "xin24m", 0, ++ RK3528_CLKGATE_CON(10), 0, GFLAGS), + -+ sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, ¶m); -+ get_task_struct(speedchange_task); ++ GATE(CLK_TIMER_ROOT, "clk_timer_root", "xin24m", 0, ++ RK3528_CLKGATE_CON(8), 9, GFLAGS), ++ GATE(CLK_TIMER0, "clk_timer0", "clk_timer_root", 0, ++ RK3528_CLKGATE_CON(9), 6, GFLAGS), ++ GATE(CLK_TIMER1, "clk_timer1", "clk_timer_root", 0, ++ RK3528_CLKGATE_CON(9), 7, GFLAGS), ++ GATE(CLK_TIMER2, "clk_timer2", "clk_timer_root", 0, ++ RK3528_CLKGATE_CON(9), 8, GFLAGS), ++ GATE(CLK_TIMER3, "clk_timer3", "clk_timer_root", 0, ++ RK3528_CLKGATE_CON(9), 9, GFLAGS), ++ GATE(CLK_TIMER4, "clk_timer4", "clk_timer_root", 0, ++ RK3528_CLKGATE_CON(9), 10, GFLAGS), ++ GATE(CLK_TIMER5, "clk_timer5", "clk_timer_root", 0, ++ RK3528_CLKGATE_CON(9), 11, GFLAGS), + -+ /* wake up so the thread does not look hung to the freezer */ -+ wake_up_process(speedchange_task); ++ /* pmu */ ++ GATE(HCLK_PMU_ROOT, "hclk_pmu_root", "clk_100m_src", CLK_IGNORE_UNUSED, ++ RK3528_PMU_CLKGATE_CON(0), 1, GFLAGS), ++ GATE(PCLK_PMU_ROOT, "pclk_pmu_root", "clk_100m_src", CLK_IGNORE_UNUSED, ++ RK3528_PMU_CLKGATE_CON(0), 0, GFLAGS), + -+ return cpufreq_register_governor(CPU_FREQ_GOV_INTERACTIVE); -+} ++ GATE(FCLK_MCU, "fclk_mcu", "hclk_pmu_root", 0, ++ RK3528_PMU_CLKGATE_CON(0), 7, GFLAGS), ++ GATE(HCLK_PMU_SRAM, "hclk_pmu_sram", "hclk_pmu_root", CLK_IS_CRITICAL, ++ RK3528_PMU_CLKGATE_CON(5), 4, GFLAGS), + -+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE -+struct cpufreq_governor *cpufreq_default_governor(void) -+{ -+ return CPU_FREQ_GOV_INTERACTIVE; -+} ++ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_pmu_root", 0, ++ RK3528_PMU_CLKGATE_CON(0), 2, GFLAGS), ++ GATE(PCLK_PMU_HP_TIMER, "pclk_pmu_hp_timer", "pclk_pmu_root", 0, ++ RK3528_PMU_CLKGATE_CON(1), 2, GFLAGS), ++ GATE(PCLK_PMU_IOC, "pclk_pmu_ioc", "pclk_pmu_root", CLK_IS_CRITICAL, ++ RK3528_PMU_CLKGATE_CON(1), 5, GFLAGS), ++ GATE(PCLK_PMU_CRU, "pclk_pmu_cru", "pclk_pmu_root", CLK_IS_CRITICAL, ++ RK3528_PMU_CLKGATE_CON(1), 6, GFLAGS), ++ GATE(PCLK_PMU_GRF, "pclk_pmu_grf", "pclk_pmu_root", CLK_IS_CRITICAL, ++ RK3528_PMU_CLKGATE_CON(1), 7, GFLAGS), ++ GATE(PCLK_PMU_WDT, "pclk_pmu_wdt", "pclk_pmu_root", 0, ++ RK3528_PMU_CLKGATE_CON(1), 10, GFLAGS), ++ GATE(PCLK_PMU, "pclk_pmu", "pclk_pmu_root", CLK_IS_CRITICAL, ++ RK3528_PMU_CLKGATE_CON(0), 13, GFLAGS), ++ GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pmu_root", 0, ++ RK3528_PMU_CLKGATE_CON(0), 14, GFLAGS), ++ GATE(PCLK_OSCCHK, "pclk_oscchk", "pclk_pmu_root", 0, ++ RK3528_PMU_CLKGATE_CON(0), 9, GFLAGS), ++ GATE(PCLK_PMU_MAILBOX, "pclk_pmu_mailbox", "pclk_pmu_root", 0, ++ RK3528_PMU_CLKGATE_CON(1), 12, GFLAGS), ++ GATE(PCLK_SCRKEYGEN, "pclk_scrkeygen", "pclk_pmu_root", 0, ++ RK3528_PMU_CLKGATE_CON(1), 15, GFLAGS), ++ GATE(PCLK_PVTM_PMU, "pclk_pvtm_pmu", "pclk_pmu_root", 0, ++ RK3528_PMU_CLKGATE_CON(5), 1, GFLAGS), + -+fs_initcall(cpufreq_interactive_gov_init); -+#else -+module_init(cpufreq_interactive_gov_init); -+#endif ++ COMPOSITE_NODIV(CLK_I2C2, "clk_i2c2", clk_i2c2_p, 0, ++ RK3528_PMU_CLKSEL_CON(0), 0, 2, MFLAGS, ++ RK3528_PMU_CLKGATE_CON(0), 3, GFLAGS), + -+static void __exit cpufreq_interactive_gov_exit(void) -+{ -+ cpufreq_unregister_governor(CPU_FREQ_GOV_INTERACTIVE); -+ kthread_stop(speedchange_task); -+ put_task_struct(speedchange_task); -+} -+module_exit(cpufreq_interactive_gov_exit); ++ GATE(CLK_REFOUT, "clk_refout", "xin24m", 0, ++ RK3528_PMU_CLKGATE_CON(2), 4, GFLAGS), ++ COMPOSITE_NOMUX(CLK_PVTM_PMU, "clk_pvtm_pmu", "xin24m", 0, ++ RK3528_PMU_CLKSEL_CON(5), 0, 5, DFLAGS, ++ RK3528_PMU_CLKGATE_CON(5), 0, GFLAGS), + -+MODULE_AUTHOR("Mike Chan "); -+MODULE_DESCRIPTION("'cpufreq_interactive' - A dynamic cpufreq governor for Latency sensitive workloads"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/cpufreq/cpufreq_userspace_rk.c b/drivers/cpufreq/cpufreq_userspace_rk.c -new file mode 100644 -index 000000000..1f001d281 ---- /dev/null -+++ b/drivers/cpufreq/cpufreq_userspace_rk.c -@@ -0,0 +1,140 @@ -+// SPDX-License-Identifier: GPL-2.0-only ++ COMPOSITE_FRAC(XIN_OSC0_DIV, "xin_osc0_div", "xin24m", 0, ++ RK3528_PMU_CLKSEL_CON(1), 0, ++ RK3528_PMU_CLKGATE_CON(1), 0, GFLAGS), ++ /* clk_32k: internal! No path from external osc 32k */ ++ MUX(CLK_DEEPSLOW, "clk_32k", clk_32k_p, CLK_IS_CRITICAL, ++ RK3528_PMU_CLKSEL_CON(2), 0, 1, MFLAGS), ++ GATE(RTC_CLK_MCU, "rtc_clk_mcu", "clk_32k", 0, ++ RK3528_PMU_CLKGATE_CON(0), 8, GFLAGS), ++ GATE(CLK_DDR_FAIL_SAFE, "clk_ddr_fail_safe", "xin24m", CLK_IGNORE_UNUSED, ++ RK3528_PMU_CLKGATE_CON(1), 1, GFLAGS), + -+/* -+ * linux/drivers/cpufreq/cpufreq_userspace.c -+ * -+ * Copyright (C) 2001 Russell King -+ * (C) 2002 - 2004 Dominik Brodowski -+ */ ++ COMPOSITE_NODIV(DBCLK_GPIO0, "dbclk_gpio0", mux_24m_32k_p, 0, ++ RK3528_PMU_CLKSEL_CON(0), 2, 1, MFLAGS, ++ RK3528_PMU_CLKGATE_CON(0), 15, GFLAGS), ++ COMPOSITE_NODIV(TCLK_PMU_WDT, "tclk_pmu_wdt", mux_24m_32k_p, 0, ++ RK3528_PMU_CLKSEL_CON(2), 1, 1, MFLAGS, ++ RK3528_PMU_CLKGATE_CON(1), 11, GFLAGS), + -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ /* core */ ++ COMPOSITE_NOMUX(ACLK_M_CORE_BIU, "aclk_m_core", "armclk", CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(39), 11, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3528_CLKGATE_CON(5), 12, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_DBG, "pclk_dbg", "armclk", CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(40), 1, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3528_CLKGATE_CON(5), 13, GFLAGS), ++ GATE(PCLK_CPU_ROOT, "pclk_cpu_root", "pclk_dbg", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(6), 1, GFLAGS), ++ GATE(PCLK_CORE_GRF, "pclk_core_grf", "pclk_cpu_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(6), 2, GFLAGS), + -+#include -+#include -+#include -+#include -+#include ++ /* ddr */ ++ GATE(CLK_DDRC_SRC, "clk_ddrc_src", "dpll", CLK_IS_CRITICAL, ++ RK3528_DDRPHY_CLKGATE_CON(0), 0, GFLAGS), ++ GATE(CLK_DDR_PHY, "clk_ddr_phy", "dpll", CLK_IS_CRITICAL, ++ RK3528_DDRPHY_CLKGATE_CON(0), 1, GFLAGS), + -+static DEFINE_PER_CPU(unsigned int, cpu_is_managed); -+static DEFINE_MUTEX(userspace_mutex); ++ COMPOSITE_NODIV(PCLK_DDR_ROOT, "pclk_ddr_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(90), 0, 2, MFLAGS, ++ RK3528_CLKGATE_CON(45), 0, GFLAGS), ++ GATE(PCLK_DDRMON, "pclk_ddrmon", "pclk_ddr_root", CLK_IGNORE_UNUSED, ++ RK3528_CLKGATE_CON(45), 3, GFLAGS), ++ GATE(PCLK_DDR_HWLP, "pclk_ddr_hwlp", "pclk_ddr_root", CLK_IGNORE_UNUSED, ++ RK3528_CLKGATE_CON(45), 8, GFLAGS), ++ GATE(CLK_TIMER_DDRMON, "clk_timer_ddrmon", "xin24m", CLK_IGNORE_UNUSED, ++ RK3528_CLKGATE_CON(45), 4, GFLAGS), + -+/** -+ * cpufreq_set - set the CPU frequency -+ * @policy: pointer to policy struct where freq is being set -+ * @freq: target frequency in kHz -+ * -+ * Sets the CPU frequency to freq. -+ */ -+static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq) -+{ -+ int ret = -EINVAL; -+ unsigned int *setspeed = policy->governor_data; ++ GATE(PCLK_DDRC, "pclk_ddrc", "pclk_ddr_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(45), 2, GFLAGS), ++ GATE(PCLK_DDR_GRF, "pclk_ddr_grf", "pclk_ddr_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(45), 6, GFLAGS), ++ GATE(PCLK_DDRPHY, "pclk_ddrphy", "pclk_ddr_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(45), 9, GFLAGS), + -+ pr_debug("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq); ++ GATE(ACLK_DDR_UPCTL, "aclk_ddr_upctl", "clk_ddrc_src", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(45), 11, GFLAGS), ++ GATE(CLK_DDR_UPCTL, "clk_ddr_upctl", "clk_ddrc_src", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(45), 12, GFLAGS), ++ GATE(CLK_DDRMON, "clk_ddrmon", "clk_ddrc_src", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(45), 13, GFLAGS), ++ GATE(ACLK_DDR_SCRAMBLE, "aclk_ddr_scramble", "clk_ddrc_src", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(45), 14, GFLAGS), ++ GATE(ACLK_SPLIT, "aclk_split", "clk_ddrc_src", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(45), 15, GFLAGS), + -+ mutex_lock(&userspace_mutex); -+ if (!per_cpu(cpu_is_managed, policy->cpu)) -+ goto err; ++ /* gpu */ ++ COMPOSITE_NODIV(ACLK_GPU_ROOT, "aclk_gpu_root", mux_500m_300m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(76), 0, 2, MFLAGS, ++ RK3528_CLKGATE_CON(34), 0, GFLAGS), ++ COMPOSITE_NODIV(ACLK_GPU, "aclk_gpu", aclk_gpu_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(76), 6, 1, MFLAGS, ++ RK3528_CLKGATE_CON(34), 7, GFLAGS), ++ GATE(ACLK_GPU_MALI, "aclk_gpu_mali", "aclk_gpu", 0, ++ RK3528_CLKGATE_CON(34), 8, GFLAGS), ++ COMPOSITE_NODIV(PCLK_GPU_ROOT, "pclk_gpu_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(76), 4, 2, MFLAGS, ++ RK3528_CLKGATE_CON(34), 2, GFLAGS), + -+ *setspeed = freq; ++ /* rkvdec */ ++ COMPOSITE_NODIV(ACLK_RKVDEC_ROOT_NDFT, "aclk_rkvdec_root", mux_339m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(88), 6, 2, MFLAGS, ++ RK3528_CLKGATE_CON(44), 3, GFLAGS), ++ COMPOSITE_NODIV(HCLK_RKVDEC_ROOT, "hclk_rkvdec_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(88), 4, 2, MFLAGS, ++ RK3528_CLKGATE_CON(44), 2, GFLAGS), ++ GATE(PCLK_DDRPHY_CRU, "pclk_ddrphy_cru", "hclk_rkvdec_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(44), 4, GFLAGS), ++ GATE(HCLK_RKVDEC, "hclk_rkvdec", "hclk_rkvdec_root", 0, ++ RK3528_CLKGATE_CON(44), 9, GFLAGS), ++ COMPOSITE_NODIV(CLK_HEVC_CA_RKVDEC, "clk_hevc_ca_rkvdec", mux_600m_300m_200m_24m_p, 0, ++ RK3528_CLKSEL_CON(88), 11, 2, MFLAGS, ++ RK3528_CLKGATE_CON(44), 11, GFLAGS), ++ MUX(ACLK_RKVDEC_PVTMUX_ROOT, "aclk_rkvdec_pvtmux_root", aclk_rkvdec_pvtmux_root_p, CLK_IS_CRITICAL | CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(88), 13, 1, MFLAGS), ++ GATE(ACLK_RKVDEC, "aclk_rkvdec", "aclk_rkvdec_pvtmux_root", 0, ++ RK3528_CLKGATE_CON(44), 8, GFLAGS), + -+ ret = __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L); -+ err: -+ mutex_unlock(&userspace_mutex); -+ return ret; -+} ++ /* rkvenc */ ++ COMPOSITE_NODIV(ACLK_RKVENC_ROOT, "aclk_rkvenc_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(79), 2, 2, MFLAGS, ++ RK3528_CLKGATE_CON(36), 1, GFLAGS), ++ GATE(ACLK_RKVENC, "aclk_rkvenc", "aclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(36), 7, GFLAGS), + -+static ssize_t show_speed(struct cpufreq_policy *policy, char *buf) -+{ -+ return sprintf(buf, "%u\n", policy->cur); -+} ++ COMPOSITE_NODIV(PCLK_RKVENC_ROOT, "pclk_rkvenc_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(79), 4, 2, MFLAGS, ++ RK3528_CLKGATE_CON(36), 2, GFLAGS), ++ GATE(PCLK_RKVENC_IOC, "pclk_rkvenc_ioc", "pclk_rkvenc_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(37), 10, GFLAGS), ++ GATE(PCLK_RKVENC_GRF, "pclk_rkvenc_grf", "pclk_rkvenc_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(38), 6, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(36), 11, GFLAGS), ++ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(36), 13, GFLAGS), ++ GATE(PCLK_SPI0, "pclk_spi0", "pclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(37), 2, GFLAGS), ++ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(37), 8, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(38), 2, GFLAGS), ++ GATE(PCLK_UART3, "pclk_uart3", "pclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(38), 4, GFLAGS), ++ GATE(PCLK_CAN0, "pclk_can0", "pclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(38), 7, GFLAGS), ++ GATE(PCLK_CAN1, "pclk_can1", "pclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(38), 9, GFLAGS), + -+static int cpufreq_userspace_policy_init(struct cpufreq_policy *policy) -+{ -+ unsigned int *setspeed; ++ COMPOSITE_NODIV(MCLK_PDM, "mclk_pdm", mux_150m_100m_24m_p, 0, ++ RK3528_CLKSEL_CON(80), 12, 2, MFLAGS, ++ RK3528_CLKGATE_CON(38), 1, GFLAGS), ++ COMPOSITE(CLK_CAN0, "clk_can0", mux_gpll_cpll_p, 0, ++ RK3528_CLKSEL_CON(81), 6, 1, MFLAGS, 0, 6, DFLAGS, ++ RK3528_CLKGATE_CON(38), 8, GFLAGS), ++ COMPOSITE(CLK_CAN1, "clk_can1", mux_gpll_cpll_p, 0, ++ RK3528_CLKSEL_CON(81), 13, 1, MFLAGS, 7, 6, DFLAGS, ++ RK3528_CLKGATE_CON(38), 10, GFLAGS), + -+ setspeed = kzalloc(sizeof(*setspeed), GFP_KERNEL); -+ if (!setspeed) -+ return -ENOMEM; ++ COMPOSITE_NODIV(HCLK_RKVENC_ROOT, "hclk_rkvenc_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(79), 0, 2, MFLAGS, ++ RK3528_CLKGATE_CON(36), 0, GFLAGS), ++ GATE(HCLK_SAI_I2S1, "hclk_sai_i2s1", "hclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(36), 9, GFLAGS), ++ GATE(HCLK_SPDIF, "hclk_spdif", "hclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(37), 14, GFLAGS), ++ GATE(HCLK_PDM, "hclk_pdm", "hclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(38), 0, GFLAGS), ++ GATE(HCLK_RKVENC, "hclk_rkvenc", "hclk_rkvenc_root", 0, ++ RK3528_CLKGATE_CON(36), 6, GFLAGS), + -+ policy->governor_data = setspeed; -+ return 0; -+} ++ COMPOSITE_NODIV(CLK_CORE_RKVENC, "clk_core_rkvenc", mux_300m_200m_100m_24m_p, 0, ++ RK3528_CLKSEL_CON(79), 6, 2, MFLAGS, ++ RK3528_CLKGATE_CON(36), 8, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C0, "clk_i2c0", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(79), 11, 2, MFLAGS, ++ RK3528_CLKGATE_CON(36), 14, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C1, "clk_i2c1", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(79), 9, 2, MFLAGS, ++ RK3528_CLKGATE_CON(36), 12, GFLAGS), ++#if 0 ++ GATE(SCLK_IN_SPI0, "sclk_in_spi0", "sclk_in_spi0_io", 0, ++ RK3528_CLKGATE_CON(37), 4, GFLAGS), ++ GATE(CLK_UART_JTAG, "clk_uart_jtag", "xin24m", 0, ++ RK3528_CLKGATE_CON(37), 0, GFLAGS), ++#endif ++ COMPOSITE_NODIV(CLK_SPI0, "clk_spi0", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(79), 13, 2, MFLAGS, ++ RK3528_CLKGATE_CON(37), 3, GFLAGS), ++ COMPOSITE_NODIV(MCLK_SAI_I2S1, "mclk_sai_i2s1", mclk_sai_i2s1_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(79), 8, 1, MFLAGS, ++ RK3528_CLKGATE_CON(36), 10, GFLAGS), ++ GATE(DBCLK_GPIO4, "dbclk_gpio4", "xin24m", 0, ++ RK3528_CLKGATE_CON(37), 9, GFLAGS), + -+static void cpufreq_userspace_policy_exit(struct cpufreq_policy *policy) -+{ -+ mutex_lock(&userspace_mutex); -+ kfree(policy->governor_data); -+ policy->governor_data = NULL; -+ mutex_unlock(&userspace_mutex); -+} ++ /* vo */ ++ COMPOSITE_NODIV(HCLK_VO_ROOT, "hclk_vo_root", mux_150m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(83), 2, 2, MFLAGS, ++ RK3528_CLKGATE_CON(39), 1, GFLAGS), ++ GATE(HCLK_VOP, "hclk_vop", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(40), 2, GFLAGS), ++ GATE(HCLK_USBHOST, "hclk_usbhost", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(43), 3, GFLAGS), ++ GATE(HCLK_JPEG_DECODER, "hclk_jpeg_decoder", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(41), 7, GFLAGS), ++ GATE(HCLK_VDPP, "hclk_vdpp", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(39), 10, GFLAGS), ++ GATE(HCLK_CVBS, "hclk_cvbs", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(41), 3, GFLAGS), ++ GATE(HCLK_USBHOST_ARB, "hclk_usbhost_arb", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(43), 4, GFLAGS), ++ GATE(HCLK_SAI_I2S3, "hclk_sai_i2s3", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(42), 1, GFLAGS), ++ GATE(HCLK_HDCP, "hclk_hdcp", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(41), 1, GFLAGS), ++ GATE(HCLK_RGA2E, "hclk_rga2e", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(39), 7, GFLAGS), ++ GATE(HCLK_SDMMC0, "hclk_sdmmc0", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(42), 9, GFLAGS), ++ GATE(HCLK_HDCP_KEY, "hclk_hdcp_key", "hclk_vo_root", 0, ++ RK3528_CLKGATE_CON(40), 15, GFLAGS), + -+static int cpufreq_userspace_policy_start(struct cpufreq_policy *policy) -+{ -+ unsigned int *setspeed = policy->governor_data; ++ COMPOSITE_NODIV(ACLK_VO_L_ROOT, "aclk_vo_l_root", mux_150m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(84), 1, 2, MFLAGS, ++ RK3528_CLKGATE_CON(41), 8, GFLAGS), ++ GATE(ACLK_MAC_VO, "aclk_gmac0", "aclk_vo_l_root", 0, ++ RK3528_CLKGATE_CON(41), 10, GFLAGS), + -+ BUG_ON(!policy->cur); -+ pr_debug("started managing cpu %u\n", policy->cpu); ++ COMPOSITE_NODIV(PCLK_VO_ROOT, "pclk_vo_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(83), 4, 2, MFLAGS, ++ RK3528_CLKGATE_CON(39), 2, GFLAGS), ++ GATE(PCLK_MAC_VO, "pclk_gmac0", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(41), 11, GFLAGS), ++ GATE(PCLK_VCDCPHY, "pclk_vcdcphy", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(42), 4, GFLAGS), ++ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(42), 5, GFLAGS), ++ GATE(PCLK_VO_IOC, "pclk_vo_ioc", "pclk_vo_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(42), 7, GFLAGS), ++ GATE(PCLK_OTPC_NS, "pclk_otpc_ns", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(42), 11, GFLAGS), ++ GATE(PCLK_UART4, "pclk_uart4", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(43), 7, GFLAGS), ++ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(43), 9, GFLAGS), ++ GATE(PCLK_I2C7, "pclk_i2c7", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(43), 11, GFLAGS), + -+ mutex_lock(&userspace_mutex); -+ per_cpu(cpu_is_managed, policy->cpu) = 1; -+ if (!*setspeed) -+ *setspeed = policy->cur; -+ mutex_unlock(&userspace_mutex); -+ return 0; -+} ++ GATE(PCLK_USBPHY, "pclk_usbphy", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(43), 13, GFLAGS), + -+static void cpufreq_userspace_policy_stop(struct cpufreq_policy *policy) -+{ -+ pr_debug("managing cpu %u stopped\n", policy->cpu); ++ GATE(PCLK_VO_GRF, "pclk_vo_grf", "pclk_vo_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(39), 13, GFLAGS), ++ GATE(PCLK_CRU, "pclk_cru", "pclk_vo_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(39), 15, GFLAGS), ++ GATE(PCLK_HDMI, "pclk_hdmi", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(40), 6, GFLAGS), ++ GATE(PCLK_HDMIPHY, "pclk_hdmiphy", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(40), 14, GFLAGS), ++ GATE(PCLK_HDCP, "pclk_hdcp", "pclk_vo_root", 0, ++ RK3528_CLKGATE_CON(41), 2, GFLAGS), + -+ mutex_lock(&userspace_mutex); -+ per_cpu(cpu_is_managed, policy->cpu) = 0; -+ mutex_unlock(&userspace_mutex); -+} ++ COMPOSITE_NODIV(CLK_CORE_VDPP, "clk_core_vdpp", mux_339m_200m_100m_24m_p, 0, ++ RK3528_CLKSEL_CON(83), 10, 2, MFLAGS, ++ RK3528_CLKGATE_CON(39), 12, GFLAGS), ++ COMPOSITE_NODIV(CLK_CORE_RGA2E, "clk_core_rga2e", mux_339m_200m_100m_24m_p, 0, ++ RK3528_CLKSEL_CON(83), 8, 2, MFLAGS, ++ RK3528_CLKGATE_CON(39), 9, GFLAGS), ++ COMPOSITE_NODIV(ACLK_JPEG_ROOT, "aclk_jpeg_root", mux_339m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(84), 9, 2, MFLAGS, ++ RK3528_CLKGATE_CON(41), 15, GFLAGS), ++ GATE(ACLK_JPEG_DECODER, "aclk_jpeg_decoder", "aclk_jpeg_root", 0, ++ RK3528_CLKGATE_CON(41), 6, GFLAGS), + -+static void cpufreq_userspace_policy_limits(struct cpufreq_policy *policy) -+{ -+ unsigned int *setspeed = policy->governor_data; ++ COMPOSITE_NODIV(ACLK_VO_ROOT, "aclk_vo_root", mux_339m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(83), 0, 2, MFLAGS, ++ RK3528_CLKGATE_CON(39), 0, GFLAGS), ++ GATE_NO_SET_RATE(ACLK_RGA2E, "aclk_rga2e", "aclk_vo_root", 0, ++ RK3528_CLKGATE_CON(39), 8, GFLAGS), ++ GATE_NO_SET_RATE(ACLK_VDPP, "aclk_vdpp", "aclk_vo_root", 0, ++ RK3528_CLKGATE_CON(39), 11, GFLAGS), ++ GATE_NO_SET_RATE(ACLK_HDCP, "aclk_hdcp", "aclk_vo_root", 0, ++ RK3528_CLKGATE_CON(41), 0, GFLAGS), + -+ mutex_lock(&userspace_mutex); ++ COMPOSITE(CCLK_SRC_SDMMC0, "cclk_src_sdmmc0", mux_gpll_cpll_xin24m_p, 0, ++ RK3528_CLKSEL_CON(85), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3528_CLKGATE_CON(42), 8, GFLAGS), + -+ pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz, last set to %u kHz\n", -+ policy->cpu, policy->min, policy->max, policy->cur, *setspeed); ++ COMPOSITE(ACLK_VOP_ROOT, "aclk_vop_root", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(83), 15, 1, MFLAGS, 12, 3, DFLAGS, ++ RK3528_CLKGATE_CON(40), 0, GFLAGS), ++ GATE(ACLK_VOP, "aclk_vop", "aclk_vop_root", 0, ++ RK3528_CLKGATE_CON(40), 5, GFLAGS), + -+ if (policy->max < *setspeed) -+ __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); -+ else if (policy->min > *setspeed) -+ __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); -+ else -+ __cpufreq_driver_target(policy, *setspeed, CPUFREQ_RELATION_L); ++ COMPOSITE_NODIV(CLK_I2C4, "clk_i2c4", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(85), 13, 2, MFLAGS, ++ RK3528_CLKGATE_CON(43), 10, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C7, "clk_i2c7", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(86), 0, 2, MFLAGS, ++ RK3528_CLKGATE_CON(43), 12, GFLAGS), ++ GATE(DBCLK_GPIO2, "dbclk_gpio2", "xin24m", 0, ++ RK3528_CLKGATE_CON(42), 6, GFLAGS), + -+ mutex_unlock(&userspace_mutex); -+} ++ GATE(CLK_HDMIHDP0, "clk_hdmihdp0", "xin24m", 0, ++ RK3528_CLKGATE_CON(43), 2, GFLAGS), ++ GATE(CLK_MACPHY, "clk_macphy", "xin24m", 0, ++ RK3528_CLKGATE_CON(42), 3, GFLAGS), ++ GATE(CLK_REF_USBPHY, "clk_ref_usbphy", "xin24m", 0, ++ RK3528_CLKGATE_CON(43), 14, GFLAGS), ++ GATE(CLK_SBPI_OTPC_NS, "clk_sbpi_otpc_ns", "xin24m", 0, ++ RK3528_CLKGATE_CON(42), 12, GFLAGS), ++ FACTOR(CLK_USER_OTPC_NS, "clk_user_otpc_ns", "clk_sbpi_otpc_ns", 0, 1, 2), + -+static struct cpufreq_governor cpufreq_gov_userspace = { -+ .name = "userspace", -+ .init = cpufreq_userspace_policy_init, -+ .exit = cpufreq_userspace_policy_exit, -+ .start = cpufreq_userspace_policy_start, -+ .stop = cpufreq_userspace_policy_stop, -+ .limits = cpufreq_userspace_policy_limits, -+ .store_setspeed = cpufreq_set, -+ .show_setspeed = show_speed, -+ .owner = THIS_MODULE, -+}; ++ GATE(MCLK_SAI_I2S3, "mclk_sai_i2s3", "mclk_i2s3_8ch_sai_src", 0, ++ RK3528_CLKGATE_CON(42), 2, GFLAGS), ++ COMPOSITE_NODIV(DCLK_VOP0, "dclk_vop0", dclk_vop0_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3528_CLKSEL_CON(84), 0, 1, MFLAGS, ++ RK3528_CLKGATE_CON(40), 3, GFLAGS), ++ GATE(DCLK_VOP1, "dclk_vop1", "dclk_vop_src1", CLK_SET_RATE_PARENT, ++ RK3528_CLKGATE_CON(40), 4, GFLAGS), ++ FACTOR_GATE(DCLK_CVBS, "dclk_cvbs", "dclk_vop1", 0, 1, 4, ++ RK3528_CLKGATE_CON(41), 4, GFLAGS), ++ GATE(DCLK_4X_CVBS, "dclk_4x_cvbs", "dclk_vop1", 0, ++ RK3528_CLKGATE_CON(41), 5, GFLAGS), + -+MODULE_AUTHOR("Dominik Brodowski , " -+ "Russell King "); -+MODULE_DESCRIPTION("CPUfreq policy governor 'userspace'"); -+MODULE_LICENSE("GPL"); ++ FACTOR_GATE(CLK_SFR_HDMI, "clk_sfr_hdmi", "dclk_vop_src1", 0, 1, 4, ++ RK3528_CLKGATE_CON(40), 7, GFLAGS), + -+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE -+struct cpufreq_governor *cpufreq_default_governor(void) -+{ -+ return &cpufreq_gov_userspace; -+} ++ GATE(CLK_SPDIF_HDMI, "clk_spdif_hdmi", "mclk_spdif_src", 0, ++ RK3528_CLKGATE_CON(40), 10, GFLAGS), ++ GATE(MCLK_SPDIF, "mclk_spdif", "mclk_spdif_src", 0, ++ RK3528_CLKGATE_CON(37), 15, GFLAGS), ++ GATE(CLK_CEC_HDMI, "clk_cec_hdmi", "clk_32k", 0, ++ RK3528_CLKGATE_CON(40), 8, GFLAGS), ++#if 0 ++ GATE(CLK_USBHOST_OHCI, "clk_usbhost_ohci", "clk_usbhost_ohci_io", 0, ++ RK3528_CLKGATE_CON(43), 5, GFLAGS), ++ GATE(CLK_USBHOST_UTMI, "clk_usbhost_utmi", "clk_usbhost_utmi_io", 0, ++ RK3528_CLKGATE_CON(43), 6, GFLAGS), ++ GATE(CLK_HDMIPHY_TMDSSRC, "clk_hdmiphy_tmdssrc", "clk_hdmiphy_tmdssrc_io", 0, ++ RK3528_CLKGATE_CON(40), 11, GFLAGS), ++ GATE(CLK_HDMIPHY_PREP, "clk_hdmiphy_prep", "clk_hdmiphy_prep_io", 0, ++ RK3528_CLKGATE_CON(40), 12, GFLAGS), +#endif ++ /* vpu */ ++ GATE(DBCLK_GPIO1, "dbclk_gpio1", "xin24m", 0, ++ RK3528_CLKGATE_CON(26), 5, GFLAGS), ++ GATE(DBCLK_GPIO3, "dbclk_gpio3", "xin24m", 0, ++ RK3528_CLKGATE_CON(27), 1, GFLAGS), ++ GATE(CLK_SUSPEND_USB3OTG, "clk_suspend_usb3otg", "xin24m", 0, ++ RK3528_CLKGATE_CON(33), 4, GFLAGS), ++ GATE(CLK_PCIE_AUX, "clk_pcie_aux", "xin24m", 0, ++ RK3528_CLKGATE_CON(30), 2, GFLAGS), ++ GATE(TCLK_EMMC, "tclk_emmc", "xin24m", 0, ++ RK3528_CLKGATE_CON(26), 3, GFLAGS), ++ GATE(CLK_REF_USB3OTG, "clk_ref_usb3otg", "xin24m", 0, ++ RK3528_CLKGATE_CON(33), 2, GFLAGS), ++ COMPOSITE(CCLK_SRC_SDIO0, "cclk_src_sdio0", mux_gpll_cpll_xin24m_p, 0, ++ RK3528_CLKSEL_CON(72), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3528_CLKGATE_CON(32), 1, GFLAGS), + -+cpufreq_governor_init(cpufreq_gov_userspace); -+cpufreq_governor_exit(cpufreq_gov_userspace); -diff --git a/drivers/cpufreq/rockchip-cpufreq.c b/drivers/cpufreq/rockchip-cpufreq.c -new file mode 100644 -index 000000000..dfed7d6b7 ---- /dev/null -+++ b/drivers/cpufreq/rockchip-cpufreq.c -@@ -0,0 +1,852 @@ -+/* -+ * Rockchip CPUFreq Driver -+ * -+ * Copyright (C) 2017 Fuzhou Rockchip Electronics Co., Ltd -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any -+ * kind, whether express or implied; without even the implied warranty -+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "cpufreq-dt.h" -+#include "rockchip-cpufreq.h" -+ -+struct cluster_info { -+ struct list_head list_head; -+ struct monitor_dev_info *mdev_info; -+ struct rockchip_opp_info opp_info; -+ struct freq_qos_request dsu_qos_req; -+ cpumask_t cpus; -+ unsigned int idle_threshold_freq; -+ bool is_idle_disabled; -+ bool is_opp_shared_dsu; -+ unsigned long rate; -+ unsigned long volt, mem_volt; -+}; -+static LIST_HEAD(cluster_info_list); -+ -+static struct cluster_info *rockchip_cluster_info_lookup(int cpu); -+ -+static int px30_get_soc_info(struct device *dev, struct device_node *np, -+ int *bin, int *process) -+{ -+ int ret = 0; -+ u8 value = 0; -+ -+ if (!bin) -+ return 0; ++ COMPOSITE_NODIV(PCLK_VPU_ROOT, "pclk_vpu_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(61), 4, 2, MFLAGS, ++ RK3528_CLKGATE_CON(25), 5, GFLAGS), ++ GATE(PCLK_VPU_GRF, "pclk_vpu_grf", "pclk_vpu_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(25), 12, GFLAGS), ++ GATE(PCLK_CRU_PCIE, "pclk_cru_pcie", "pclk_vpu_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(25), 11, GFLAGS), ++ GATE(PCLK_UART6, "pclk_uart6", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(27), 11, GFLAGS), ++ GATE(PCLK_CAN2, "pclk_can2", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(32), 7, GFLAGS), ++ GATE(PCLK_SPI1, "pclk_spi1", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(27), 4, GFLAGS), ++ GATE(PCLK_CAN3, "pclk_can3", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(32), 9, GFLAGS), ++ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(27), 0, GFLAGS), ++ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(26), 4, GFLAGS), ++ GATE(PCLK_SARADC, "pclk_saradc", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(32), 11, GFLAGS), ++ GATE(PCLK_ACODEC, "pclk_acodec", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(26), 13, GFLAGS), ++ GATE(PCLK_UART7, "pclk_uart7", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(27), 13, GFLAGS), ++ GATE(PCLK_UART5, "pclk_uart5", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(27), 9, GFLAGS), ++ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(32), 14, GFLAGS), ++ GATE(PCLK_PCIE, "pclk_pcie", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(30), 1, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(27), 7, GFLAGS), ++ GATE(PCLK_VPU_IOC, "pclk_vpu_ioc", "pclk_vpu_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(26), 8, GFLAGS), ++ GATE(PCLK_PIPE_GRF, "pclk_pipe_grf", "pclk_vpu_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(30), 7, GFLAGS), ++ GATE(PCLK_I2C5, "pclk_i2c5", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(28), 1, GFLAGS), ++ GATE(PCLK_PCIE_PHY, "pclk_pcie_phy", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(30), 6, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(27), 15, GFLAGS), ++ GATE(PCLK_MAC_VPU, "pclk_gmac1", "pclk_vpu_root", CLK_IS_CRITICAL, ++ RK3528_CLKGATE_CON(28), 6, GFLAGS), ++ GATE(PCLK_I2C6, "pclk_i2c6", "pclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(28), 3, GFLAGS), + -+ if (of_property_match_string(np, "nvmem-cell-names", -+ "performance") >= 0) { -+ ret = rockchip_nvmem_cell_read_u8(np, "performance", &value); -+ if (ret) { -+ dev_err(dev, "Failed to get soc performance value\n"); -+ return ret; -+ } -+ *bin = value; -+ } -+ if (*bin >= 0) -+ dev_info(dev, "bin=%d\n", *bin); ++ COMPOSITE_NODIV(ACLK_VPU_L_ROOT, "aclk_vpu_l_root", mux_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(60), 0, 2, MFLAGS, ++ RK3528_CLKGATE_CON(25), 0, GFLAGS), ++ GATE_NO_SET_RATE(ACLK_EMMC, "aclk_emmc", "aclk_vpu_l_root", 0, ++ RK3528_CLKGATE_CON(26), 1, GFLAGS), ++ GATE_NO_SET_RATE(ACLK_MAC_VPU, "aclk_gmac1", "aclk_vpu_l_root", 0, ++ RK3528_CLKGATE_CON(28), 5, GFLAGS), ++ GATE_NO_SET_RATE(ACLK_PCIE, "aclk_pcie", "aclk_vpu_l_root", 0, ++ RK3528_CLKGATE_CON(30), 3, GFLAGS), + -+ return ret; -+} ++ GATE_NO_SET_RATE(ACLK_USB3OTG, "aclk_usb3otg", "aclk_vpu_l_root", 0, ++ RK3528_CLKGATE_CON(33), 1, GFLAGS), + -+static int rk3288_get_soc_info(struct device *dev, struct device_node *np, -+ int *bin, int *process) -+{ -+ int ret = 0; -+ u8 value = 0; -+ char *name; ++ COMPOSITE_NODIV(HCLK_VPU_ROOT, "hclk_vpu_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(61), 2, 2, MFLAGS, ++ RK3528_CLKGATE_CON(25), 4, GFLAGS), ++ GATE(HCLK_VPU, "hclk_vpu", "hclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(25), 10, GFLAGS), ++ GATE(HCLK_SFC, "hclk_sfc", "hclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(25), 13, GFLAGS), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(26), 0, GFLAGS), ++ GATE(HCLK_SAI_I2S0, "hclk_sai_i2s0", "hclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(26), 9, GFLAGS), ++ GATE(HCLK_SAI_I2S2, "hclk_sai_i2s2", "hclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(26), 11, GFLAGS), + -+ if (!bin) -+ goto next; -+ if (of_property_match_string(np, "nvmem-cell-names", "special") >= 0) { -+ ret = rockchip_nvmem_cell_read_u8(np, "special", &value); -+ if (ret) { -+ dev_err(dev, "Failed to get soc special value\n"); -+ goto out; -+ } -+ if (value == 0xc) -+ *bin = 0; -+ else -+ *bin = 1; -+ } ++ GATE(HCLK_PCIE_SLV, "hclk_pcie_slv", "hclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(30), 4, GFLAGS), ++ GATE(HCLK_PCIE_DBI, "hclk_pcie_dbi", "hclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(30), 5, GFLAGS), ++ GATE(HCLK_SDIO0, "hclk_sdio0", "hclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(32), 2, GFLAGS), ++ GATE(HCLK_SDIO1, "hclk_sdio1", "hclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(32), 4, GFLAGS), + -+ if (soc_is_rk3288w()) -+ name = "performance-w"; -+ else -+ name = "performance"; ++ COMPOSITE_NOMUX(CLK_GMAC1_VPU_25M, "clk_gmac1_25m", "ppll", 0, ++ RK3528_CLKSEL_CON(60), 2, 8, DFLAGS, ++ RK3528_CLKGATE_CON(25), 1, GFLAGS), ++ COMPOSITE_NOMUX(CLK_PPLL_125M_MATRIX, "clk_ppll_125m_src", "ppll", 0, ++ RK3528_CLKSEL_CON(60), 10, 5, DFLAGS, ++ RK3528_CLKGATE_CON(25), 2, GFLAGS), + -+ if (of_property_match_string(np, "nvmem-cell-names", name) >= 0) { -+ ret = rockchip_nvmem_cell_read_u8(np, name, &value); -+ if (ret) { -+ dev_err(dev, "Failed to get soc performance value\n"); -+ goto out; -+ } -+ if (value & 0x2) -+ *bin = 3; -+ else if (value & 0x01) -+ *bin = 2; -+ } -+ if (*bin >= 0) -+ dev_info(dev, "bin=%d\n", *bin); ++ COMPOSITE(CLK_CAN3, "clk_can3", mux_gpll_cpll_p, 0, ++ RK3528_CLKSEL_CON(73), 13, 1, MFLAGS, 7, 6, DFLAGS, ++ RK3528_CLKGATE_CON(32), 10, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C6, "clk_i2c6", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(64), 0, 2, MFLAGS, ++ RK3528_CLKGATE_CON(28), 4, GFLAGS), + -+next: -+ if (!process) -+ goto out; -+ if (of_property_match_string(np, "nvmem-cell-names", -+ "process") >= 0) { -+ ret = rockchip_nvmem_cell_read_u8(np, "process", &value); -+ if (ret) { -+ dev_err(dev, "Failed to get soc process version\n"); -+ goto out; -+ } -+ if (soc_is_rk3288() && (value == 0 || value == 1)) -+ *process = 0; -+ } -+ if (*process >= 0) -+ dev_info(dev, "process=%d\n", *process); ++ COMPOSITE(SCLK_SFC, "sclk_sfc", mux_gpll_cpll_xin24m_p, 0, ++ RK3528_CLKSEL_CON(61), 12, 2, MFLAGS, 6, 6, DFLAGS, ++ RK3528_CLKGATE_CON(25), 14, GFLAGS), ++ COMPOSITE(CCLK_SRC_EMMC, "cclk_src_emmc", mux_gpll_cpll_xin24m_p, 0, ++ RK3528_CLKSEL_CON(62), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3528_CLKGATE_CON(25), 15, GFLAGS), + -+out: -+ return ret; -+} ++ COMPOSITE_NODIV(ACLK_VPU_ROOT, "aclk_vpu_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RK3528_CLKSEL_CON(61), 0, 2, MFLAGS, ++ RK3528_CLKGATE_CON(25), 3, GFLAGS), ++ GATE(ACLK_VPU, "aclk_vpu", "aclk_vpu_root", 0, ++ RK3528_CLKGATE_CON(25), 9, GFLAGS), + -+static int rk3399_get_soc_info(struct device *dev, struct device_node *np, -+ int *bin, int *process) -+{ -+ int ret = 0; -+ u8 value = 0; ++ COMPOSITE_NODIV(CLK_SPI1, "clk_spi1", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(63), 10, 2, MFLAGS, ++ RK3528_CLKGATE_CON(27), 5, GFLAGS), ++ COMPOSITE(CCLK_SRC_SDIO1, "cclk_src_sdio1", mux_gpll_cpll_xin24m_p, 0, ++ RK3528_CLKSEL_CON(72), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3528_CLKGATE_CON(32), 3, GFLAGS), ++ COMPOSITE(CLK_CAN2, "clk_can2", mux_gpll_cpll_p, 0, ++ RK3528_CLKSEL_CON(73), 6, 1, MFLAGS, 0, 6, DFLAGS, ++ RK3528_CLKGATE_CON(32), 8, GFLAGS), ++ COMPOSITE_NOMUX(CLK_TSADC, "clk_tsadc", "xin24m", 0, ++ RK3528_CLKSEL_CON(74), 3, 5, DFLAGS, ++ RK3528_CLKGATE_CON(32), 15, GFLAGS), ++ COMPOSITE_NOMUX(CLK_SARADC, "clk_saradc", "xin24m", 0, ++ RK3528_CLKSEL_CON(74), 0, 3, DFLAGS, ++ RK3528_CLKGATE_CON(32), 12, GFLAGS), ++ COMPOSITE_NOMUX(CLK_TSADC_TSEN, "clk_tsadc_tsen", "xin24m", 0, ++ RK3528_CLKSEL_CON(74), 8, 5, DFLAGS, ++ RK3528_CLKGATE_CON(33), 0, GFLAGS), ++ COMPOSITE_NODIV(BCLK_EMMC, "bclk_emmc", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(62), 8, 2, MFLAGS, ++ RK3528_CLKGATE_CON(26), 2, GFLAGS), ++ COMPOSITE_NOMUX(MCLK_ACODEC_TX, "mclk_acodec_tx", "mclk_i2s2_2ch_sai_src", 0, ++ RK3528_CLKSEL_CON(63), 0, 8, DFLAGS, ++ RK3528_CLKGATE_CON(26), 14, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C3, "clk_i2c3", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(63), 12, 2, MFLAGS, ++ RK3528_CLKGATE_CON(28), 0, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C5, "clk_i2c5", mux_200m_100m_50m_24m_p, 0, ++ RK3528_CLKSEL_CON(63), 14, 2, MFLAGS, ++ RK3528_CLKGATE_CON(28), 2, GFLAGS), ++ COMPOSITE_NODIV(MCLK_SAI_I2S0, "mclk_sai_i2s0", mclk_sai_i2s0_p, CLK_SET_RATE_PARENT, ++ RK3528_CLKSEL_CON(62), 10, 1, MFLAGS, ++ RK3528_CLKGATE_CON(26), 10, GFLAGS), ++ GATE(MCLK_SAI_I2S2, "mclk_sai_i2s2", "mclk_i2s2_2ch_sai_src", 0, ++ RK3528_CLKGATE_CON(26), 12, GFLAGS), ++#if 0 ++ GATE(SCLK_IN_SPI1, "sclk_in_spi1", "sclk_in_spi1_io", 0, ++ RK3528_CLKGATE_CON(27), 6, GFLAGS), + -+ if (!bin) -+ return 0; ++ /* vpuphy */ ++ GATE(CLK_PIPE_USB3OTG_COMBO, "clk_pipe_usb3otg_combo", "clk_pipe_usb3otg_io", 0, ++ RK3528_CLKGATE_CON(31), 0, GFLAGS), ++ GATE(CLK_UTMI_USB3OTG, "clk_utmi_usb3otg", "clk_utmi_usb3otg_io", 0, ++ RK3528_CLKGATE_CON(31), 1, GFLAGS), ++ GATE(CLK_PCIE_PIPE_PHY, "clk_pcie_pipe_phy", "clk_pipe_usb3otg_io", 0, ++ RK3528_CLKGATE_CON(31), 2, GFLAGS), ++#endif ++ /* pcie */ ++ COMPOSITE_NOMUX(CLK_PPLL_100M_MATRIX, "clk_ppll_100m_src", "ppll", CLK_IS_CRITICAL, ++ RK3528_PCIE_CLKSEL_CON(1), 2, 5, DFLAGS, ++ RK3528_PCIE_CLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NOMUX(CLK_PPLL_50M_MATRIX, "clk_ppll_50m_src", "ppll", CLK_IS_CRITICAL, ++ RK3528_PCIE_CLKSEL_CON(1), 7, 5, DFLAGS, ++ RK3528_PCIE_CLKGATE_CON(0), 2, GFLAGS), ++ MUX(CLK_REF_PCIE_INNER_PHY, "clk_ref_pcie_inner_phy", clk_ref_pcie_inner_phy_p, 0, ++ RK3528_PCIE_CLKSEL_CON(1), 13, 1, MFLAGS), ++ FACTOR(CLK_REF_PCIE_100M_PHY, "clk_ref_pcie_100m_phy", "clk_ppll_100m_src", 0, 1, 1), + -+ if (of_property_match_string(np, "nvmem-cell-names", -+ "specification_serial_number") >= 0) { -+ ret = rockchip_nvmem_cell_read_u8(np, -+ "specification_serial_number", -+ &value); -+ if (ret) { -+ dev_err(dev, -+ "Failed to get specification_serial_number\n"); -+ goto out; -+ } ++ /* gmac */ ++ FACTOR(CLK_GMAC1_RMII_VPU, "clk_gmac1_50m", "clk_ppll_50m_src", 0, 1, 1), ++ FACTOR(CLK_GMAC1_SRC_VPU, "clk_gmac1_125m", "clk_ppll_125m_src", 0, 1, 1), + -+ if (value == 0xb) { -+ *bin = 0; -+ } else if (value == 0x1) { -+ if (of_property_match_string(np, "nvmem-cell-names", -+ "customer_demand") >= 0) { -+ ret = rockchip_nvmem_cell_read_u8(np, -+ "customer_demand", -+ &value); -+ if (ret) { -+ dev_err(dev, "Failed to get customer_demand\n"); -+ goto out; -+ } -+ if (value == 0x0) -+ *bin = 0; -+ else -+ *bin = 1; -+ } -+ } else if (value == 0x10) { -+ *bin = 1; -+ } -+ } ++ /* they are orphans */ ++ DIV(CLK_GMAC0_SRC, "clk_gmac0_src", "clk_gmac0_io_i", 0, ++ RK3528_CLKSEL_CON(84), 3, 6, DFLAGS), ++ GATE(CLK_GMAC0_TX, "clk_gmac0_tx", "clk_gmac0_src", 0, ++ RK3528_CLKGATE_CON(41), 13, GFLAGS), ++ GATE(CLK_GMAC0_RX, "clk_gmac0_rx", "clk_gmac0_src", 0, ++ RK3528_CLKGATE_CON(41), 14, GFLAGS), ++ GATE(CLK_GMAC0_RMII_50M, "clk_gmac0_rmii_50m", "clk_gmac0_io_i", 0, ++ RK3528_CLKGATE_CON(41), 12, GFLAGS), ++ GATE(CLK_SCRKEYGEN, "clk_scrkeygen", "clk_pmupvtm_out", 0, ++ RK3528_PMU_CLKGATE_CON(2), 0, GFLAGS), ++ GATE(CLK_PVTM_OSCCHK, "clk_pvtm_oscchk", "clk_pmupvtm_out", 0, ++ RK3528_PMU_CLKGATE_CON(2), 1, GFLAGS), ++}; + -+out: -+ if (*bin >= 0) -+ dev_info(dev, "bin=%d\n", *bin); ++static struct rockchip_clk_branch rk3528_grf_clk_branches[] __initdata = { ++ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "cclk_src_sdmmc0", RK3528_SDMMC_CON0, 1), ++ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "cclk_src_sdmmc0", RK3528_SDMMC_CON1, 1), ++ MMC(SCLK_SDIO0_DRV, "sdio0_drv", "cclk_src_sdio0", RK3528_SDIO0_CON0, 1), ++ MMC(SCLK_SDIO0_SAMPLE, "sdio0_sample", "cclk_src_sdio0", RK3528_SDIO0_CON1, 1), ++ MMC(SCLK_SDIO1_DRV, "sdio1_drv", "cclk_src_sdio1", RK3528_SDIO1_CON0, 1), ++ MMC(SCLK_SDIO1_SAMPLE, "sdio1_sample", "cclk_src_sdio1", RK3528_SDIO1_CON1, 1), ++}; + -+ return ret; -+} ++static void __iomem *rk3528_cru_base; + -+static int rk3588_get_soc_info(struct device *dev, struct device_node *np, -+ int *bin, int *process) ++static void rk3528_dump_cru(void) +{ -+ int ret = 0; -+ u8 value = 0; -+ -+ if (!bin) -+ return 0; -+ -+ if (of_property_match_string(np, "nvmem-cell-names", -+ "specification_serial_number") >= 0) { -+ ret = rockchip_nvmem_cell_read_u8(np, -+ "specification_serial_number", -+ &value); -+ if (ret) { -+ dev_err(dev, -+ "Failed to get specification_serial_number\n"); -+ return ret; -+ } -+ /* RK3588M */ -+ if (value == 0xd) -+ *bin = 1; -+ /* RK3588J */ -+ else if (value == 0xa) -+ *bin = 2; ++ if (rk3528_cru_base) { ++ pr_warn("CRU:\n"); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rk3528_cru_base, ++ 0x8b8, false); ++ pr_warn("PCIE CRU:\n"); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rk3528_cru_base + RK3528_PCIE_CRU_BASE, ++ 0x804, false); ++ pr_warn("DDRPHY CRU:\n"); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rk3528_cru_base + RK3528_DDRPHY_CRU_BASE, ++ 0x804, false); + } -+ if (*bin < 0) -+ *bin = 0; -+ dev_info(dev, "bin=%d\n", *bin); -+ -+ return ret; +} + -+static int rk3588_change_length(struct device *dev, struct device_node *np, -+ struct rockchip_opp_info *opp_info) ++static void __init rk3528_clk_init(struct device_node *np) +{ -+ struct clk *clk; -+ unsigned long old_rate; -+ unsigned int low_len_sel; -+ u32 opp_flag = 0; -+ int ret = 0; -+ -+ if (opp_info->volt_sel < 0) -+ return 0; ++ struct rockchip_clk_provider *ctx; ++ void __iomem *reg_base; ++ struct clk **clks; + -+ clk = clk_get(dev, NULL); -+ if (IS_ERR(clk)) { -+ dev_warn(dev, "failed to get cpu clk\n"); -+ return PTR_ERR(clk); ++ reg_base = of_iomap(np, 0); ++ if (!reg_base) { ++ pr_err("%s: could not map cru region\n", __func__); ++ return; + } + -+ /* RK3588 low speed grade should change to low length */ -+ if (of_property_read_u32(np, "rockchip,pvtm-low-len-sel", -+ &low_len_sel)) -+ goto out; -+ if (opp_info->volt_sel > low_len_sel) -+ goto out; -+ opp_flag = OPP_LENGTH_LOW; ++ rk3528_cru_base = reg_base; + -+ old_rate = clk_get_rate(clk); -+ ret = clk_set_rate(clk, old_rate | opp_flag); -+ if (ret) { -+ dev_err(dev, "failed to change length\n"); -+ goto out; ++ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); ++ if (IS_ERR(ctx)) { ++ pr_err("%s: rockchip clk init failed\n", __func__); ++ iounmap(reg_base); ++ return; + } -+ clk_set_rate(clk, old_rate); -+out: -+ clk_put(clk); ++ clks = ctx->clk_data.clks; + -+ return ret; -+} ++ rockchip_clk_register_plls(ctx, rk3528_pll_clks, ++ ARRAY_SIZE(rk3528_pll_clks), ++ RK3528_GRF_SOC_STATUS0); + -+static int rk3588_set_supported_hw(struct device *dev, struct device_node *np, -+ struct rockchip_opp_info *opp_info) -+{ -+ int bin = opp_info->bin; ++ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", ++ 2, clks[PLL_APLL], clks[PLL_GPLL], ++ &rk3528_cpuclk_data, rk3528_cpuclk_rates, ++ ARRAY_SIZE(rk3528_cpuclk_rates)); ++ rockchip_clk_register_branches(ctx, rk3528_clk_branches, ++ ARRAY_SIZE(rk3528_clk_branches)); + -+ if (!of_property_read_bool(np, "rockchip,supported-hw")) -+ return 0; ++ rockchip_register_softrst(np, 47, reg_base + RK3528_SOFTRST_CON(0), ++ ROCKCHIP_SOFTRST_HIWORD_MASK); ++ rockchip_register_restart_notifier(ctx, RK3528_GLB_SRST_FST, NULL); + -+ if (bin < 0) -+ bin = 0; ++ rockchip_clk_of_add_provider(np, ctx); + -+ /* SoC Version */ -+ opp_info->supported_hw[0] = BIT(bin); -+ /* Speed Grade */ -+ opp_info->supported_hw[1] = BIT(opp_info->volt_sel); ++ if (!rk_dump_cru) ++ rk_dump_cru = rk3528_dump_cru; + -+ return 0; +} + -+static int rk3588_set_soc_info(struct device *dev, struct device_node *np, -+ struct rockchip_opp_info *opp_info) -+{ -+ rk3588_change_length(dev, np, opp_info); -+ rk3588_set_supported_hw(dev, np, opp_info); -+ -+ return 0; -+} ++CLK_OF_DECLARE(rk3528_cru, "rockchip,rk3528-cru", rk3528_clk_init); + -+static int rk3588_cpu_set_read_margin(struct device *dev, -+ struct rockchip_opp_info *opp_info, -+ u32 rm) ++static void __init rk3528_grf_clk_init(struct device_node *np) +{ -+ if (!opp_info->volt_rm_tbl) -+ return 0; -+ if (rm == opp_info->current_rm || rm == UINT_MAX) -+ return 0; ++ struct rockchip_clk_provider *ctx; ++ void __iomem *reg_base; + -+ dev_dbg(dev, "set rm to %d\n", rm); -+ if (opp_info->grf) { -+ regmap_write(opp_info->grf, 0x20, 0x001c0000 | (rm << 2)); -+ regmap_write(opp_info->grf, 0x28, 0x003c0000 | (rm << 2)); -+ regmap_write(opp_info->grf, 0x2c, 0x003c0000 | (rm << 2)); -+ regmap_write(opp_info->grf, 0x30, 0x00200020); -+ udelay(1); -+ regmap_write(opp_info->grf, 0x30, 0x00200000); -+ } -+ if (opp_info->dsu_grf) { -+ regmap_write(opp_info->dsu_grf, 0x20, 0x001c0000 | (rm << 2)); -+ regmap_write(opp_info->dsu_grf, 0x28, 0x003c0000 | (rm << 2)); -+ regmap_write(opp_info->dsu_grf, 0x2c, 0x003c0000 | (rm << 2)); -+ regmap_write(opp_info->dsu_grf, 0x30, 0x001c0000 | (rm << 2)); -+ regmap_write(opp_info->dsu_grf, 0x38, 0x001c0000 | (rm << 2)); -+ regmap_write(opp_info->dsu_grf, 0x18, 0x40004000); -+ udelay(1); -+ regmap_write(opp_info->dsu_grf, 0x18, 0x40000000); ++ reg_base = of_iomap(of_get_parent(np), 0); ++ if (!reg_base) { ++ pr_err("%s: could not map cru grf region\n", __func__); ++ return; + } + -+ opp_info->current_rm = rm; -+ -+ return 0; -+} -+ -+static int cpu_opp_config_regulators(struct device *dev, -+ struct dev_pm_opp *old_opp, -+ struct dev_pm_opp *new_opp, -+ struct regulator **regulators, -+ unsigned int count) -+{ -+ struct cluster_info *cluster; ++ ctx = rockchip_clk_init(np, reg_base, CLK_NR_GRF_CLKS); ++ if (IS_ERR(ctx)) { ++ pr_err("%s: rockchip grf clk init failed\n", __func__); ++ return; ++ } + -+ cluster = rockchip_cluster_info_lookup(dev->id); -+ if (!cluster) -+ return -EINVAL; ++ rockchip_clk_register_branches(ctx, rk3528_grf_clk_branches, ++ ARRAY_SIZE(rk3528_grf_clk_branches)); + -+ return rockchip_opp_config_regulators(dev, old_opp, new_opp, regulators, -+ count, &cluster->opp_info); ++ rockchip_clk_of_add_provider(np, ctx); +} + -+static int rv1126_get_soc_info(struct device *dev, struct device_node *np, -+ int *bin, int *process) -+{ -+ int ret = 0; -+ u8 value = 0; -+ -+ if (of_property_match_string(np, "nvmem-cell-names", "performance") >= 0) { -+ ret = rockchip_nvmem_cell_read_u8(np, "performance", &value); -+ if (ret) { -+ dev_err(dev, "Failed to get soc performance value\n"); -+ return ret; -+ } -+ if (value == 0x1) -+ *bin = 1; -+ else -+ *bin = 0; -+ } -+ if (*bin >= 0) -+ dev_info(dev, "bin=%d\n", *bin); ++CLK_OF_DECLARE(rk3528_grf_cru, "rockchip,rk3528-grf-cru", rk3528_grf_clk_init); + -+ return ret; -+} +diff --git a/drivers/clk/rockchip/clk-rk3562.c b/drivers/clk/rockchip/clk-rk3562.c +new file mode 100644 +index 000000000..3c6f78fec +--- /dev/null ++++ b/drivers/clk/rockchip/clk-rk3562.c +@@ -0,0 +1,1134 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (c) 2022 Rockchip Electronics Co. Ltd. ++ * Author: Elaine Zhang ++ * Author: Finley Xiao ++ */ + -+static const struct rockchip_opp_data px30_cpu_opp_data = { -+ .get_soc_info = px30_get_soc_info, -+}; ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "clk.h" + -+static const struct rockchip_opp_data rk3288_cpu_opp_data = { -+ .get_soc_info = rk3288_get_soc_info, -+}; ++#define RK3562_GRF_SOC_STATUS0 0x430 + -+static const struct rockchip_opp_data rk3399_cpu_opp_data = { -+ .get_soc_info = rk3399_get_soc_info, ++enum rk3562_plls { ++ apll, gpll, vpll, hpll, cpll, dpll, +}; + -+static const struct rockchip_opp_data rk3588_cpu_opp_data = { -+ .get_soc_info = rk3588_get_soc_info, -+ .set_soc_info = rk3588_set_soc_info, -+ .set_read_margin = rk3588_cpu_set_read_margin, -+ .config_regulators = cpu_opp_config_regulators, ++static struct rockchip_pll_rate_table rk3562_pll_rates[] = { ++ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ ++ RK3036_PLL_RATE(2208000000, 1, 92, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2184000000, 1, 91, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2160000000, 1, 90, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2088000000, 1, 87, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2064000000, 1, 86, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2040000000, 1, 85, 1, 1, 1, 0), ++ RK3036_PLL_RATE(2016000000, 1, 84, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1992000000, 1, 83, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1920000000, 1, 80, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1896000000, 1, 79, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1800000000, 1, 75, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1704000000, 1, 71, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1600000000, 3, 200, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1584000000, 1, 132, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1560000000, 1, 130, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1536000000, 1, 128, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1512000000, 1, 126, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1488000000, 1, 124, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1464000000, 1, 122, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1440000000, 1, 120, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1416000000, 1, 118, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1400000000, 3, 350, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1392000000, 1, 116, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1368000000, 1, 114, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1344000000, 1, 112, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1320000000, 1, 110, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1296000000, 1, 108, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1272000000, 1, 106, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1248000000, 1, 104, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1200000000, 1, 100, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1188000000, 1, 99, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1104000000, 1, 92, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1100000000, 3, 275, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1000000000, 3, 250, 2, 1, 1, 0), ++ RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), ++ RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), ++ RK3036_PLL_RATE(800000000, 3, 200, 2, 1, 1, 0), ++ RK3036_PLL_RATE(700000000, 3, 350, 4, 1, 1, 0), ++ RK3036_PLL_RATE(696000000, 1, 116, 4, 1, 1, 0), ++ RK3036_PLL_RATE(600000000, 1, 100, 4, 1, 1, 0), ++ RK3036_PLL_RATE(594000000, 1, 99, 4, 1, 1, 0), ++ RK3036_PLL_RATE(500000000, 1, 125, 6, 1, 1, 0), ++ RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), ++ RK3036_PLL_RATE(312000000, 1, 78, 6, 1, 1, 0), ++ RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), ++ RK3036_PLL_RATE(200000000, 1, 100, 3, 4, 1, 0), ++ RK3036_PLL_RATE(148500000, 1, 99, 4, 4, 1, 0), ++ RK3036_PLL_RATE(100000000, 1, 150, 6, 6, 1, 0), ++ RK3036_PLL_RATE(96000000, 1, 96, 6, 4, 1, 0), ++ RK3036_PLL_RATE(74250000, 2, 99, 4, 4, 1, 0), ++ { /* sentinel */ }, +}; + -+static const struct rockchip_opp_data rv1126_cpu_opp_data = { -+ .get_soc_info = rv1126_get_soc_info, -+}; ++PNAME(mux_pll_p) = { "xin24m" }; ++PNAME(gpll_cpll_p) = { "gpll", "cpll" }; ++PNAME(gpll_cpll_hpll_p) = { "gpll", "cpll", "hpll" }; ++PNAME(gpll_cpll_pvtpll_dmyapll_p) = { "gpll", "cpll", "log_pvtpll", "dummy_apll" }; ++PNAME(gpll_cpll_hpll_xin24m_p) = { "gpll", "cpll", "hpll", "xin24m" }; ++PNAME(gpll_cpll_vpll_dmyhpll_p) = { "gpll", "cpll", "vpll", "dummy_hpll" }; ++PNAME(gpll_dmyhpll_vpll_apll_p) = { "gpll", "dummy_hpll", "vpll", "apll" }; ++PNAME(gpll_cpll_xin24m_p) = { "gpll", "cpll", "xin24m" }; ++PNAME(gpll_cpll_xin24m_dmyapll_p) = { "gpll", "cpll", "xin24m", "dummy_apll" }; ++PNAME(gpll_cpll_xin24m_dmyhpll_p) = { "gpll", "cpll", "xin24m", "dummy_hpll" }; ++PNAME(vpll_dmyhpll_gpll_cpll_p) = { "vpll", "dummy_hpll", "gpll", "cpll" }; ++PNAME(mux_xin24m_32k_p) = { "xin24m", "clk_rtc_32k" }; ++PNAME(mux_50m_xin24m_p) = { "clk_matrix_50m_src", "xin24m" }; ++PNAME(mux_100m_50m_xin24m_p) = { "clk_matrix_100m_src", "clk_matrix_50m_src", "xin24m" }; ++PNAME(mux_125m_xin24m_p) = { "clk_matrix_125m_src", "xin24m" }; ++PNAME(mux_200m_xin24m_32k_p) = { "clk_200m_pmu", "xin24m", "clk_rtc_32k" }; ++PNAME(mux_200m_100m_p) = { "clk_matrix_200m_src", "clk_matrix_100m_src" }; ++PNAME(mux_200m_100m_50m_xin24m_p) = { "clk_matrix_200m_src", "clk_matrix_100m_src", "clk_matrix_50m_src", "xin24m" }; ++PNAME(clk_sai0_p) = { "clk_sai0_src", "clk_sai0_frac", "xin_osc0_half", "mclk_sai0_from_io" }; ++PNAME(mclk_sai0_out2io_p) = { "mclk_sai0", "xin_osc0_half" }; ++PNAME(clk_sai1_p) = { "clk_sai1_src", "clk_sai1_frac", "xin_osc0_half", "mclk_sai1_from_io" }; ++PNAME(mclk_sai1_out2io_p) = { "mclk_sai1", "xin_osc0_half" }; ++PNAME(clk_sai2_p) = { "clk_sai2_src", "clk_sai2_frac", "xin_osc0_half", "mclk_sai2_from_io" }; ++PNAME(mclk_sai2_out2io_p) = { "mclk_sai2", "xin_osc0_half" }; ++PNAME(clk_spdif_p) = { "clk_spdif_src", "clk_spdif_frac", "xin_osc0_half" }; ++PNAME(clk_uart1_p) = { "clk_uart1_src", "clk_uart1_frac", "xin24m" }; ++PNAME(clk_uart2_p) = { "clk_uart2_src", "clk_uart2_frac", "xin24m" }; ++PNAME(clk_uart3_p) = { "clk_uart3_src", "clk_uart3_frac", "xin24m" }; ++PNAME(clk_uart4_p) = { "clk_uart4_src", "clk_uart4_frac", "xin24m" }; ++PNAME(clk_uart5_p) = { "clk_uart5_src", "clk_uart5_frac", "xin24m" }; ++PNAME(clk_uart6_p) = { "clk_uart6_src", "clk_uart6_frac", "xin24m" }; ++PNAME(clk_uart7_p) = { "clk_uart7_src", "clk_uart7_frac", "xin24m" }; ++PNAME(clk_uart8_p) = { "clk_uart8_src", "clk_uart8_frac", "xin24m" }; ++PNAME(clk_uart9_p) = { "clk_uart9_src", "clk_uart9_frac", "xin24m" }; ++PNAME(clk_rtc32k_pmu_p) = { "clk_rtc32k_frac", "xin32k", "clk_32k_pvtm" }; ++PNAME(clk_pmu1_uart0_p) = { "clk_pmu1_uart0_src", "clk_pmu1_uart0_frac", "xin24m" }; ++PNAME(clk_pipephy_ref_p) = { "clk_pipephy_div", "clk_pipephy_xin24m" }; ++PNAME(clk_usbphy_ref_p) = { "clk_usb2phy_xin24m", "clk_24m_sscsrc" }; ++PNAME(clk_mipidsi_ref_p) = { "clk_mipidsiphy_xin24m", "clk_24m_sscsrc" }; + -+static const struct of_device_id rockchip_cpufreq_of_match[] = { -+ { -+ .compatible = "rockchip,px30", -+ .data = (void *)&px30_cpu_opp_data, -+ }, -+ { -+ .compatible = "rockchip,rk3288", -+ .data = (void *)&rk3288_cpu_opp_data, -+ }, -+ { -+ .compatible = "rockchip,rk3288w", -+ .data = (void *)&rk3288_cpu_opp_data, -+ }, -+ { -+ .compatible = "rockchip,rk3326", -+ .data = (void *)&px30_cpu_opp_data, -+ }, -+ { -+ .compatible = "rockchip,rk3399", -+ .data = (void *)&rk3399_cpu_opp_data, -+ }, -+ { -+ .compatible = "rockchip,rk3588", -+ .data = (void *)&rk3588_cpu_opp_data, -+ }, -+ { -+ .compatible = "rockchip,rv1109", -+ .data = (void *)&rv1126_cpu_opp_data, -+ }, -+ { -+ .compatible = "rockchip,rv1126", -+ .data = (void *)&rv1126_cpu_opp_data, -+ }, -+ {}, ++static struct rockchip_pll_clock rk3562_pll_clks[] __initdata = { ++ [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, ++ 0, RK3562_PLL_CON(0), ++ RK3562_MODE_CON, 0, 0, ++ ROCKCHIP_PLL_ALLOW_POWER_DOWN, rk3562_pll_rates), ++ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, ++ 0, RK3562_PLL_CON(24), ++ RK3562_MODE_CON, 2, 3, 0, rk3562_pll_rates), ++ [vpll] = PLL(pll_rk3328, PLL_VPLL, "vpll", mux_pll_p, ++ 0, RK3562_PLL_CON(32), ++ RK3562_MODE_CON, 6, 4, ++ ROCKCHIP_PLL_ALLOW_POWER_DOWN, rk3562_pll_rates), ++ [hpll] = PLL(pll_rk3328, PLL_HPLL, "hpll", mux_pll_p, ++ 0, RK3562_PLL_CON(40), ++ RK3562_MODE_CON, 8, 5, ++ ROCKCHIP_PLL_ALLOW_POWER_DOWN, rk3562_pll_rates), ++ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, ++ 0, RK3562_PMU1_PLL_CON(0), ++ RK3562_PMU1_MODE_CON, 0, 2, 0, rk3562_pll_rates), ++ [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p, ++ CLK_IS_CRITICAL, RK3562_SUBDDR_PLL_CON(0), ++ RK3562_SUBDDR_MODE_CON, 0, 1, 0, NULL), +}; + -+static struct cluster_info *rockchip_cluster_info_lookup(int cpu) -+{ -+ struct cluster_info *cluster; -+ -+ list_for_each_entry(cluster, &cluster_info_list, list_head) { -+ if (cpumask_test_cpu(cpu, &cluster->cpus)) -+ return cluster; -+ } -+ -+ return NULL; -+} -+ -+static int rockchip_cpufreq_cluster_init(int cpu, struct cluster_info *cluster) -+{ -+ struct rockchip_opp_info *opp_info = &cluster->opp_info; -+ struct device_node *np; -+ struct device *dev; -+ char *reg_name; -+ int ret = 0; -+ u32 freq = 0; -+ -+ dev = get_cpu_device(cpu); -+ if (!dev) -+ return -ENODEV; -+ -+ np = of_parse_phandle(dev->of_node, "operating-points-v2", 0); -+ if (!np) { -+ dev_warn(dev, "OPP-v2 not supported\n"); -+ return -ENOENT; -+ } -+ ret = dev_pm_opp_of_get_sharing_cpus(dev, &cluster->cpus); -+ if (ret) { -+ dev_err(dev, "Failed to get sharing cpus\n"); -+ of_node_put(np); -+ return ret; -+ } -+ cluster->is_opp_shared_dsu = of_property_read_bool(np, "rockchip,opp-shared-dsu"); -+ if (!of_property_read_u32(np, "rockchip,idle-threshold-freq", &freq)) -+ cluster->idle_threshold_freq = freq; -+ of_node_put(np); -+ -+ if (of_find_property(dev->of_node, "cpu-supply", NULL)) -+ reg_name = "cpu"; -+ else if (of_find_property(dev->of_node, "cpu0-supply", NULL)) -+ reg_name = "cpu0"; -+ else -+ return -ENOENT; -+ rockchip_get_opp_data(rockchip_cpufreq_of_match, opp_info); -+ ret = rockchip_init_opp_info(dev, opp_info, NULL, reg_name); -+ if (ret) -+ dev_err(dev, "failed to init opp info\n"); -+ -+ return ret; -+} -+ -+int rockchip_cpufreq_adjust_table(struct device *dev) -+{ -+ struct cluster_info *cluster; -+ -+ cluster = rockchip_cluster_info_lookup(dev->id); -+ if (!cluster) -+ return -EINVAL; -+ -+ return rockchip_adjust_opp_table(dev, &cluster->opp_info); -+} -+EXPORT_SYMBOL_GPL(rockchip_cpufreq_adjust_table); -+ -+int rockchip_cpufreq_opp_set_rate(struct device *dev, unsigned long target_freq) -+{ -+ struct cluster_info *cluster; -+ struct dev_pm_opp *opp; -+ struct rockchip_opp_info *opp_info; -+ struct dev_pm_opp_supply supplies[2] = {0}; -+ unsigned long freq; -+ int ret = 0; -+ -+ cluster = rockchip_cluster_info_lookup(dev->id); -+ if (!cluster) -+ return -EINVAL; -+ opp_info = &cluster->opp_info; -+ -+ rockchip_opp_dvfs_lock(opp_info); -+ ret = dev_pm_opp_set_rate(dev, target_freq); -+ if (!ret) { -+ cluster->rate = target_freq; -+ opp = dev_pm_opp_find_freq_ceil(dev, &freq); -+ if (!IS_ERR(opp)) { -+ dev_pm_opp_get_supplies(opp, supplies); -+ cluster->volt = supplies[0].u_volt; -+ if (opp_info->regulator_count > 1) -+ cluster->mem_volt = supplies[1].u_volt; -+ dev_pm_opp_put(opp); -+ } -+ } -+ rockchip_opp_dvfs_unlock(opp_info); -+ -+ return ret; -+} -+EXPORT_SYMBOL_GPL(rockchip_cpufreq_opp_set_rate); -+ -+static int rockchip_cpufreq_suspend(struct cpufreq_policy *policy) -+{ -+ int ret = 0; -+ -+ ret = cpufreq_generic_suspend(policy); -+ if (!ret) -+ rockchip_monitor_suspend_low_temp_adjust(policy->cpu); -+ -+ return ret; -+} -+ -+static int rockchip_cpufreq_add_monitor(struct cluster_info *cluster, -+ struct cpufreq_policy *policy) -+{ -+ struct device *dev = cluster->opp_info.dev; -+ struct monitor_dev_profile *mdevp = NULL; -+ struct monitor_dev_info *mdev_info = NULL; -+ -+ mdevp = kzalloc(sizeof(*mdevp), GFP_KERNEL); -+ if (!mdevp) -+ return -ENOMEM; -+ -+ mdevp->type = MONITOR_TYPE_CPU; -+ mdevp->low_temp_adjust = rockchip_monitor_cpu_low_temp_adjust; -+ mdevp->high_temp_adjust = rockchip_monitor_cpu_high_temp_adjust; -+ mdevp->check_rate_volt = rockchip_monitor_check_rate_volt; -+ mdevp->data = (void *)policy; -+ mdevp->opp_info = &cluster->opp_info; -+ cpumask_copy(&mdevp->allowed_cpus, policy->cpus); -+ mdev_info = rockchip_system_monitor_register(dev, mdevp); -+ if (IS_ERR(mdev_info)) { -+ kfree(mdevp); -+ dev_err(dev, "failed to register system monitor\n"); -+ return -EINVAL; -+ } -+ mdev_info->devp = mdevp; -+ cluster->mdev_info = mdev_info; -+ -+ return 0; -+} -+ -+static int rockchip_cpufreq_remove_monitor(struct cluster_info *cluster) -+{ -+ if (cluster->mdev_info) { -+ kfree(cluster->mdev_info->devp); -+ rockchip_system_monitor_unregister(cluster->mdev_info); -+ cluster->mdev_info = NULL; -+ } -+ -+ return 0; -+} -+ -+static int rockchip_cpufreq_remove_dsu_qos(struct cluster_info *cluster) -+{ -+ struct cluster_info *ci; ++#define MFLAGS CLK_MUX_HIWORD_MASK ++#define DFLAGS CLK_DIVIDER_HIWORD_MASK ++#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) + -+ if (!cluster->is_opp_shared_dsu) -+ return 0; ++static struct rockchip_clk_branch rk3562_clk_sai0_fracmux __initdata = ++ MUX(CLK_SAI0, "clk_sai0", clk_sai0_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(3), 6, 2, MFLAGS); + -+ list_for_each_entry(ci, &cluster_info_list, list_head) { -+ if (ci->is_opp_shared_dsu) -+ continue; -+ if (freq_qos_request_active(&ci->dsu_qos_req)) -+ freq_qos_remove_request(&ci->dsu_qos_req); -+ } ++static struct rockchip_clk_branch rk3562_clk_sai1_fracmux __initdata = ++ MUX(CLK_SAI1, "clk_sai1", clk_sai1_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(5), 6, 2, MFLAGS); + -+ return 0; -+} ++static struct rockchip_clk_branch rk3562_clk_sai2_fracmux __initdata = ++ MUX(CLK_SAI2, "clk_sai2", clk_sai2_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(8), 6, 2, MFLAGS); + -+static int rockchip_cpufreq_add_dsu_qos_req(struct cluster_info *cluster, -+ struct cpufreq_policy *policy) -+{ -+ struct device *dev = cluster->opp_info.dev; -+ struct cluster_info *ci; -+ int ret; ++static struct rockchip_clk_branch rk3562_clk_spdif_fracmux __initdata = ++ MUX(CLK_SPDIF, "clk_spdif", clk_spdif_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(15), 6, 2, MFLAGS); + -+ if (!cluster->is_opp_shared_dsu) -+ return 0; ++static struct rockchip_clk_branch rk3562_clk_uart1_fracmux __initdata = ++ MUX(CLK_UART1, "clk_uart1", clk_uart1_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(21), 14, 2, MFLAGS); + -+ list_for_each_entry(ci, &cluster_info_list, list_head) { -+ if (ci->is_opp_shared_dsu) -+ continue; -+ ret = freq_qos_add_request(&policy->constraints, -+ &ci->dsu_qos_req, -+ FREQ_QOS_MIN, -+ FREQ_QOS_MIN_DEFAULT_VALUE); -+ if (ret < 0) { -+ dev_err(dev, "failed to add dsu freq constraint\n"); -+ goto error; -+ } -+ } ++static struct rockchip_clk_branch rk3562_clk_uart2_fracmux __initdata = ++ MUX(CLK_UART2, "clk_uart2", clk_uart2_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(23), 14, 2, MFLAGS); + -+ return 0; ++static struct rockchip_clk_branch rk3562_clk_uart3_fracmux __initdata = ++ MUX(CLK_UART3, "clk_uart3", clk_uart3_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(25), 14, 2, MFLAGS); + -+error: -+ rockchip_cpufreq_remove_dsu_qos(cluster); ++static struct rockchip_clk_branch rk3562_clk_uart4_fracmux __initdata = ++ MUX(CLK_UART4, "clk_uart4", clk_uart4_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(27), 14, 2, MFLAGS); + -+ return ret; -+} ++static struct rockchip_clk_branch rk3562_clk_uart5_fracmux __initdata = ++ MUX(CLK_UART5, "clk_uart5", clk_uart5_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(29), 14, 2, MFLAGS); + -+static int rockchip_cpufreq_notifier(struct notifier_block *nb, -+ unsigned long event, void *data) -+{ -+ struct cpufreq_policy *policy = data; -+ struct cluster_info *cluster; ++static struct rockchip_clk_branch rk3562_clk_uart6_fracmux __initdata = ++ MUX(CLK_UART6, "clk_uart6", clk_uart6_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(31), 14, 2, MFLAGS); + -+ cluster = rockchip_cluster_info_lookup(policy->cpu); -+ if (!cluster) -+ return NOTIFY_BAD; ++static struct rockchip_clk_branch rk3562_clk_uart7_fracmux __initdata = ++ MUX(CLK_UART7, "clk_uart7", clk_uart7_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(33), 14, 2, MFLAGS); + -+ if (event == CPUFREQ_CREATE_POLICY) { -+ if (rockchip_cpufreq_add_monitor(cluster, policy)) -+ return NOTIFY_BAD; -+ if (rockchip_cpufreq_add_dsu_qos_req(cluster, policy)) -+ return NOTIFY_BAD; -+ } else if (event == CPUFREQ_REMOVE_POLICY) { -+ rockchip_cpufreq_remove_monitor(cluster); -+ rockchip_cpufreq_remove_dsu_qos(cluster); -+ } ++static struct rockchip_clk_branch rk3562_clk_uart8_fracmux __initdata = ++ MUX(CLK_UART8, "clk_uart8", clk_uart8_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(35), 14, 2, MFLAGS); + -+ return NOTIFY_OK; -+} ++static struct rockchip_clk_branch rk3562_clk_uart9_fracmux __initdata = ++ MUX(CLK_UART9, "clk_uart9", clk_uart9_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(37), 14, 2, MFLAGS); + -+static struct notifier_block rockchip_cpufreq_notifier_block = { -+ .notifier_call = rockchip_cpufreq_notifier, -+}; ++static struct rockchip_clk_branch rk3562_rtc32k_pmu_fracmux __initdata = ++ MUX(CLK_RTC_32K, "clk_rtc_32k", clk_rtc32k_pmu_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RK3562_PMU0_CLKSEL_CON(1), 0, 2, MFLAGS); + -+#ifdef MODULE -+static struct pm_qos_request idle_pm_qos; -+static int idle_disable_refcnt; -+static DEFINE_MUTEX(idle_disable_lock); ++static struct rockchip_clk_branch rk3562_clk_pmu1_uart0_fracmux __initdata = ++ MUX(CLK_PMU1_UART0, "clk_pmu1_uart0", clk_pmu1_uart0_p, CLK_SET_RATE_PARENT, ++ RK3562_PMU1_CLKSEL_CON(2), 6, 2, MFLAGS); + -+static int rockchip_cpufreq_idle_state_disable(struct cpumask *cpumask, -+ int index, bool disable) -+{ -+ mutex_lock(&idle_disable_lock); ++static struct rockchip_clk_branch rk3562_clk_branches[] __initdata = { ++ /* ++ * CRU Clock-Architecture ++ */ ++ /* PD_TOP */ ++ COMPOSITE(CLK_MATRIX_50M_SRC, "clk_matrix_50m_src", gpll_cpll_p, 0, ++ RK3562_CLKSEL_CON(0), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3562_CLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE(CLK_MATRIX_100M_SRC, "clk_matrix_100m_src", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_CLKSEL_CON(0), 15, 1, MFLAGS, 8, 4, DFLAGS, ++ RK3562_CLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE(CLK_MATRIX_125M_SRC, "clk_matrix_125m_src", gpll_cpll_p, 0, ++ RK3562_CLKSEL_CON(1), 7, 1, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE(CLK_MATRIX_200M_SRC, "clk_matrix_200m_src", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_CLKSEL_CON(2), 7, 1, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(0), 4, GFLAGS), ++ COMPOSITE(CLK_MATRIX_300M_SRC, "clk_matrix_300m_src", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_CLKSEL_CON(3), 7, 1, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(0), 6, GFLAGS), ++ COMPOSITE(ACLK_TOP, "aclk_top", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_CLKSEL_CON(5), 7, 1, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(1), 0, GFLAGS), ++ COMPOSITE(ACLK_TOP_VIO, "aclk_top_vio", gpll_cpll_p, 0, ++ RK3562_CLKSEL_CON(5), 15, 1, MFLAGS, 8, 4, DFLAGS, ++ RK3562_CLKGATE_CON(1), 1, GFLAGS), ++ COMPOSITE(CLK_24M_SSCSRC, "clk_24m_sscsrc", vpll_dmyhpll_gpll_cpll_p, 0, ++ RK3562_CLKSEL_CON(6), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3562_CLKGATE_CON(1), 9, GFLAGS), ++ COMPOSITE(CLK_CAM0_OUT2IO, "clk_cam0_out2io", gpll_cpll_xin24m_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(8), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3562_CLKGATE_CON(1), 12, GFLAGS), ++ COMPOSITE(CLK_CAM1_OUT2IO, "clk_cam1_out2io", gpll_cpll_xin24m_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(8), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3562_CLKGATE_CON(1), 13, GFLAGS), ++ COMPOSITE(CLK_CAM2_OUT2IO, "clk_cam2_out2io", gpll_cpll_xin24m_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(9), 6, 2, MFLAGS, 0, 6, DFLAGS, ++ RK3562_CLKGATE_CON(1), 14, GFLAGS), ++ COMPOSITE(CLK_CAM3_OUT2IO, "clk_cam3_out2io", gpll_cpll_xin24m_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(9), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3562_CLKGATE_CON(1), 15, GFLAGS), ++ FACTOR(0, "xin_osc0_half", "xin24m", 0, 1, 2), + -+ if (disable) { -+ if (idle_disable_refcnt == 0) -+ cpu_latency_qos_update_request(&idle_pm_qos, 0); -+ idle_disable_refcnt++; -+ } else { -+ if (--idle_disable_refcnt == 0) -+ cpu_latency_qos_update_request(&idle_pm_qos, -+ PM_QOS_DEFAULT_VALUE); -+ } ++ /* PD_BUS */ ++ COMPOSITE(ACLK_BUS, "aclk_bus", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_CLKSEL_CON(40), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3562_CLKGATE_CON(18), 0, GFLAGS), ++ COMPOSITE(HCLK_BUS, "hclk_bus", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_CLKSEL_CON(40), 15, 1, MFLAGS, 8, 6, DFLAGS, ++ RK3562_CLKGATE_CON(18), 1, GFLAGS), ++ COMPOSITE(PCLK_BUS, "pclk_bus", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_CLKSEL_CON(41), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3562_CLKGATE_CON(18), 2, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(19), 0, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(19), 1, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(19), 2, GFLAGS), ++ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(19), 3, GFLAGS), ++ GATE(PCLK_I2C5, "pclk_i2c5", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(19), 4, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C, "clk_i2c", mux_200m_100m_50m_xin24m_p, 0, ++ RK3562_CLKSEL_CON(41), 8, 2, MFLAGS, ++ RK3562_CLKGATE_CON(19), 5, GFLAGS), ++ GATE(CLK_I2C1, "clk_i2c1", "clk_i2c", 0, ++ RK3562_CLKGATE_CON(19), 6, GFLAGS), ++ GATE(CLK_I2C2, "clk_i2c2", "clk_i2c", 0, ++ RK3562_CLKGATE_CON(19), 7, GFLAGS), ++ GATE(CLK_I2C3, "clk_i2c3", "clk_i2c", 0, ++ RK3562_CLKGATE_CON(19), 8, GFLAGS), ++ GATE(CLK_I2C4, "clk_i2c4", "clk_i2c", 0, ++ RK3562_CLKGATE_CON(19), 9, GFLAGS), ++ GATE(CLK_I2C5, "clk_i2c5", "clk_i2c", 0, ++ RK3562_CLKGATE_CON(19), 10, GFLAGS), ++ COMPOSITE_NODIV(DCLK_BUS_GPIO, "dclk_bus_gpio", mux_xin24m_32k_p, 0, ++ RK3562_CLKSEL_CON(41), 15, 1, MFLAGS, ++ RK3562_CLKGATE_CON(20), 4, GFLAGS), ++ GATE(DCLK_BUS_GPIO3, "dclk_bus_gpio3", "dclk_bus_gpio", 0, ++ RK3562_CLKGATE_CON(20), 5, GFLAGS), ++ GATE(DCLK_BUS_GPIO4, "dclk_bus_gpio4", "dclk_bus_gpio", 0, ++ RK3562_CLKGATE_CON(20), 6, GFLAGS), ++ GATE(PCLK_TIMER, "pclk_timer", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(21), 0, GFLAGS), ++ GATE(CLK_TIMER0, "clk_timer0", "xin24m", 0, ++ RK3562_CLKGATE_CON(21), 1, GFLAGS), ++ GATE(CLK_TIMER1, "clk_timer1", "xin24m", 0, ++ RK3562_CLKGATE_CON(21), 2, GFLAGS), ++ GATE(CLK_TIMER2, "clk_timer2", "xin24m", 0, ++ RK3562_CLKGATE_CON(21), 3, GFLAGS), ++ GATE(CLK_TIMER3, "clk_timer3", "xin24m", 0, ++ RK3562_CLKGATE_CON(21), 4, GFLAGS), ++ GATE(CLK_TIMER4, "clk_timer4", "xin24m", 0, ++ RK3562_CLKGATE_CON(21), 5, GFLAGS), ++ GATE(CLK_TIMER5, "clk_timer5", "xin24m", 0, ++ RK3562_CLKGATE_CON(21), 6, GFLAGS), ++ GATE(PCLK_STIMER, "pclk_stimer", "pclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(21), 7, GFLAGS), ++ GATE(CLK_STIMER0, "clk_stimer0", "xin24m", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(21), 8, GFLAGS), ++ GATE(CLK_STIMER1, "clk_stimer1", "xin24m", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(21), 9, GFLAGS), ++ GATE(PCLK_WDTNS, "pclk_wdtns", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(22), 0, GFLAGS), ++ GATE(CLK_WDTNS, "clk_wdtns", "xin24m", 0, ++ RK3562_CLKGATE_CON(22), 1, GFLAGS), ++ GATE(PCLK_GRF, "pclk_grf", "pclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(22), 2, GFLAGS), ++ GATE(PCLK_SGRF, "pclk_sgrf", "pclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(22), 3, GFLAGS), ++ GATE(PCLK_MAILBOX, "pclk_mailbox", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(22), 4, GFLAGS), ++ GATE(PCLK_INTC, "pclk_intc", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(22), 5, GFLAGS), ++ GATE(ACLK_BUS_GIC400, "aclk_bus_gic400", "aclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(22), 6, GFLAGS), ++ GATE(ACLK_BUS_SPINLOCK, "aclk_bus_spinlock", "aclk_bus", 0, ++ RK3562_CLKGATE_CON(23), 0, GFLAGS), ++ GATE(ACLK_DCF, "aclk_dcf", "aclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(23), 1, GFLAGS), ++ GATE(PCLK_DCF, "pclk_dcf", "pclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(23), 2, GFLAGS), ++ GATE(FCLK_BUS_CM0_CORE, "fclk_bus_cm0_core", "hclk_bus", 0, ++ RK3562_CLKGATE_CON(23), 3, GFLAGS), ++ GATE(CLK_BUS_CM0_RTC, "clk_bus_cm0_rtc", "clk_rtc_32k", 0, ++ RK3562_CLKGATE_CON(23), 4, GFLAGS), ++ GATE(HCLK_ICACHE, "hclk_icache", "hclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(23), 8, GFLAGS), ++ GATE(HCLK_DCACHE, "hclk_dcache", "hclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(23), 9, GFLAGS), ++ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(24), 0, GFLAGS), ++ COMPOSITE_NOMUX(CLK_TSADC, "clk_tsadc", "xin24m", 0, ++ RK3562_CLKSEL_CON(43), 0, 11, DFLAGS, ++ RK3562_CLKGATE_CON(24), 1, GFLAGS), ++ COMPOSITE_NOMUX(CLK_TSADC_TSEN, "clk_tsadc_tsen", "xin24m", 0, ++ RK3562_CLKSEL_CON(43), 11, 5, DFLAGS, ++ RK3562_CLKGATE_CON(24), 3, GFLAGS), ++ GATE(PCLK_DFT2APB, "pclk_dft2apb", "pclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(24), 4, GFLAGS), ++ COMPOSITE_NOMUX(CLK_SARADC_VCCIO156, "clk_saradc_vccio156", "xin24m", 0, ++ RK3562_CLKSEL_CON(44), 0, 12, DFLAGS, ++ RK3562_CLKGATE_CON(24), 9, GFLAGS), ++ GATE(PCLK_GMAC, "pclk_gmac", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(25), 0, GFLAGS), ++ GATE(ACLK_GMAC, "aclk_gmac", "aclk_bus", 0, ++ RK3562_CLKGATE_CON(25), 1, GFLAGS), ++ COMPOSITE_NODIV(CLK_GMAC_125M_CRU_I, "clk_gmac_125m_cru_i", mux_125m_xin24m_p, 0, ++ RK3562_CLKSEL_CON(45), 8, 1, MFLAGS, ++ RK3562_CLKGATE_CON(25), 2, GFLAGS), ++ COMPOSITE_NODIV(CLK_GMAC_50M_CRU_I, "clk_gmac_50m_cru_i", mux_50m_xin24m_p, 0, ++ RK3562_CLKSEL_CON(45), 7, 1, MFLAGS, ++ RK3562_CLKGATE_CON(25), 3, GFLAGS), ++ COMPOSITE(CLK_GMAC_ETH_OUT2IO, "clk_gmac_eth_out2io", gpll_cpll_p, 0, ++ RK3562_CLKSEL_CON(46), 7, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_CLKGATE_CON(25), 4, GFLAGS), ++ GATE(PCLK_APB2ASB_VCCIO156, "pclk_apb2asb_vccio156", "pclk_bus", CLK_IS_CRITICAL, ++ RK3562_CLKGATE_CON(25), 5, GFLAGS), ++ GATE(PCLK_TO_VCCIO156, "pclk_to_vccio156", "pclk_bus", CLK_IS_CRITICAL, ++ RK3562_CLKGATE_CON(25), 6, GFLAGS), ++ GATE(PCLK_DSIPHY, "pclk_dsiphy", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(25), 8, GFLAGS), ++ GATE(PCLK_DSITX, "pclk_dsitx", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(25), 9, GFLAGS), ++ GATE(PCLK_CPU_EMA_DET, "pclk_cpu_ema_det", "pclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(25), 10, GFLAGS), ++ GATE(PCLK_HASH, "pclk_hash", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(25), 11, GFLAGS), ++ GATE(PCLK_TOPCRU, "pclk_topcru", "pclk_bus", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(25), 15, GFLAGS), ++ GATE(PCLK_ASB2APB_VCCIO156, "pclk_asb2apb_vccio156", "pclk_to_vccio156", CLK_IS_CRITICAL, ++ RK3562_CLKGATE_CON(26), 0, GFLAGS), ++ GATE(PCLK_IOC_VCCIO156, "pclk_ioc_vccio156", "pclk_to_vccio156", CLK_IS_CRITICAL, ++ RK3562_CLKGATE_CON(26), 1, GFLAGS), ++ GATE(PCLK_GPIO3_VCCIO156, "pclk_gpio3_vccio156", "pclk_to_vccio156", 0, ++ RK3562_CLKGATE_CON(26), 2, GFLAGS), ++ GATE(PCLK_GPIO4_VCCIO156, "pclk_gpio4_vccio156", "pclk_to_vccio156", 0, ++ RK3562_CLKGATE_CON(26), 3, GFLAGS), ++ GATE(PCLK_SARADC_VCCIO156, "pclk_saradc_vccio156", "pclk_to_vccio156", 0, ++ RK3562_CLKGATE_CON(26), 4, GFLAGS), ++ GATE(PCLK_MAC100, "pclk_mac100", "pclk_bus", 0, ++ RK3562_CLKGATE_CON(27), 0, GFLAGS), ++ GATE(ACLK_MAC100, "aclk_mac100", "aclk_bus", 0, ++ RK3562_CLKGATE_CON(27), 1, GFLAGS), ++ COMPOSITE_NODIV(CLK_MAC100_50M_MATRIX, "clk_mac100_50m_matrix", mux_50m_xin24m_p, 0, ++ RK3562_CLKSEL_CON(47), 7, 1, MFLAGS, ++ RK3562_CLKGATE_CON(27), 2, GFLAGS), + -+ mutex_unlock(&idle_disable_lock); ++ /* PD_CORE */ ++ COMPOSITE_NOMUX(0, "aclk_core_pre", "scmi_clk_cpu", CLK_IGNORE_UNUSED, ++ RK3562_CLKSEL_CON(11), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3562_CLKGATE_CON(4), 3, GFLAGS), ++ COMPOSITE_NOMUX(0, "pclk_dbg_pre", "scmi_clk_cpu", CLK_IGNORE_UNUSED, ++ RK3562_CLKSEL_CON(12), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RK3562_CLKGATE_CON(4), 5, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_CORE, "hclk_core", "gpll", CLK_IS_CRITICAL, ++ RK3562_CLKSEL_CON(13), 0, 6, DFLAGS, ++ RK3562_CLKGATE_CON(5), 2, GFLAGS), ++ GATE(0, "pclk_dbg_daplite", "pclk_dbg_pre", CLK_IGNORE_UNUSED, ++ RK3562_CLKGATE_CON(4), 10, GFLAGS), + -+ return 0; -+} -+#else -+static int rockchip_cpufreq_idle_state_disable(struct cpumask *cpumask, -+ int index, bool disable) -+{ -+ unsigned int cpu; ++ /* PD_DDR */ ++ FACTOR_GATE(0, "clk_gpll_mux_to_ddr", "gpll", 0, 1, 4, ++ RK3328_CLKGATE_CON(1), 6, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_DDR, "pclk_ddr", "clk_gpll_mux_to_ddr", CLK_IS_CRITICAL, ++ RK3562_DDR_CLKSEL_CON(1), 8, 5, DFLAGS, ++ RK3562_DDR_CLKGATE_CON(0), 3, GFLAGS), ++ COMPOSITE_NOMUX(CLK_MSCH_BRG_BIU, "clk_msch_brg_biu", "clk_gpll_mux_to_ddr", CLK_IS_CRITICAL, ++ RK3562_DDR_CLKSEL_CON(1), 0, 4, DFLAGS, ++ RK3562_DDR_CLKGATE_CON(0), 4, GFLAGS), ++ GATE(PCLK_DDR_HWLP, "pclk_ddr_hwlp", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(0), 6, GFLAGS), ++ GATE(PCLK_DDR_UPCTL, "pclk_ddr_upctl", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(0), 7, GFLAGS), ++ GATE(PCLK_DDR_PHY, "pclk_ddr_phy", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(0), 8, GFLAGS), ++ GATE(PCLK_DDR_DFICTL, "pclk_ddr_dfictl", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(0), 9, GFLAGS), ++ GATE(PCLK_DDR_DMA2DDR, "pclk_ddr_dma2ddr", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(0), 10, GFLAGS), ++ GATE(PCLK_DDR_MON, "pclk_ddr_mon", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(1), 0, GFLAGS), ++ GATE(TMCLK_DDR_MON, "tmclk_ddr_mon", "xin24m", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(1), 1, GFLAGS), ++ GATE(PCLK_DDR_GRF, "pclk_ddr_grf", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(1), 2, GFLAGS), ++ GATE(PCLK_DDR_CRU, "pclk_ddr_cru", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(1), 3, GFLAGS), ++ GATE(PCLK_SUBDDR_CRU, "pclk_subddr_cru", "pclk_ddr", CLK_IGNORE_UNUSED, ++ RK3562_DDR_CLKGATE_CON(1), 4, GFLAGS), + -+ for_each_cpu(cpu, cpumask) { -+ struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu); -+ struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); ++ /* PD_GPU */ ++ COMPOSITE(CLK_GPU_PRE, "clk_gpu_pre", gpll_cpll_p, 0, ++ RK3562_CLKSEL_CON(18), 7, 1, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(8), 0, GFLAGS), ++ COMPOSITE_NOMUX(ACLK_GPU_PRE, "aclk_gpu_pre", "clk_gpu_pre", 0, ++ RK3562_CLKSEL_CON(19), 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(8), 2, GFLAGS), ++ GATE(CLK_GPU, "clk_gpu", "clk_gpu_pre", 0, ++ RK3562_CLKGATE_CON(8), 4, GFLAGS), ++ COMPOSITE_NODIV(CLK_GPU_BRG, "clk_gpu_brg", mux_200m_100m_p, 0, ++ RK3562_CLKSEL_CON(19), 15, 1, MFLAGS, ++ RK3562_CLKGATE_CON(8), 8, GFLAGS), + -+ if (!dev || !drv) -+ continue; -+ if (index >= drv->state_count) -+ continue; -+ cpuidle_driver_state_disabled(drv, index, disable); -+ } ++ /* PD_NPU */ ++ COMPOSITE(CLK_NPU_PRE, "clk_npu_pre", gpll_cpll_p, 0, ++ RK3562_CLKSEL_CON(15), 7, 1, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(6), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_NPU_PRE, "hclk_npu_pre", "clk_npu_pre", 0, ++ RK3562_CLKSEL_CON(16), 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(6), 1, GFLAGS), ++ GATE(ACLK_RKNN, "aclk_rknn", "clk_npu_pre", 0, ++ RK3562_CLKGATE_CON(6), 4, GFLAGS), ++ GATE(HCLK_RKNN, "hclk_rknn", "hclk_npu_pre", 0, ++ RK3562_CLKGATE_CON(6), 5, GFLAGS), + -+ if (disable) { -+ preempt_disable(); -+ for_each_cpu(cpu, cpumask) { -+ if (cpu != smp_processor_id() && cpu_online(cpu)) -+ wake_up_if_idle(cpu); -+ } -+ preempt_enable(); -+ } -+ -+ return 0; -+} -+#endif -+ -+#define cpu_to_dsu_freq(freq) ((freq) * 4 / 5) -+ -+static int rockchip_cpufreq_update_dsu_req(struct cluster_info *cluster, -+ unsigned int freq) -+{ -+ struct device *dev = cluster->opp_info.dev; -+ unsigned int dsu_freq = rounddown(cpu_to_dsu_freq(freq), 100000); -+ -+ if (cluster->is_opp_shared_dsu || -+ !freq_qos_request_active(&cluster->dsu_qos_req)) -+ return 0; ++ /* PD_PERI */ ++ COMPOSITE(ACLK_PERI, "aclk_peri", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_PERI_CLKSEL_CON(0), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(1), 0, GFLAGS), ++ COMPOSITE(HCLK_PERI, "hclk_peri", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_PERI_CLKSEL_CON(0), 15, 1, MFLAGS, 8, 6, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(1), 1, GFLAGS), ++ COMPOSITE(PCLK_PERI, "pclk_peri", gpll_cpll_p, CLK_IS_CRITICAL, ++ RK3562_PERI_CLKSEL_CON(1), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(1), 2, GFLAGS), ++ GATE(PCLK_PERICRU, "pclk_pericru", "pclk_peri", CLK_IGNORE_UNUSED, ++ RK3562_PERI_CLKGATE_CON(1), 6, GFLAGS), ++ GATE(HCLK_SAI0, "hclk_sai0", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(2), 0, GFLAGS), ++ COMPOSITE(CLK_SAI0_SRC, "clk_sai0_src", gpll_cpll_hpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(1), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(2), 1, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_SAI0_FRAC, "clk_sai0_frac", "clk_sai0_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(2), 0, ++ RK3562_PERI_CLKGATE_CON(2), 2, GFLAGS, ++ &rk3562_clk_sai0_fracmux), ++ GATE(MCLK_SAI0, "mclk_sai0", "clk_sai0", 0, ++ RK3562_PERI_CLKGATE_CON(2), 3, GFLAGS), ++ COMPOSITE_NODIV(MCLK_SAI0_OUT2IO, "mclk_sai0_out2io", mclk_sai0_out2io_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(3), 5, 1, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(2), 4, GFLAGS), ++ GATE(HCLK_SAI1, "hclk_sai1", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(2), 5, GFLAGS), ++ COMPOSITE(CLK_SAI1_SRC, "clk_sai1_src", gpll_cpll_hpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(3), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(2), 6, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_SAI1_FRAC, "clk_sai1_frac", "clk_sai1_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(4), 0, ++ RK3562_PERI_CLKGATE_CON(2), 7, GFLAGS, ++ &rk3562_clk_sai1_fracmux), ++ GATE(MCLK_SAI1, "mclk_sai1", "clk_sai1", 0, ++ RK3562_PERI_CLKGATE_CON(2), 8, GFLAGS), ++ COMPOSITE_NODIV(MCLK_SAI1_OUT2IO, "mclk_sai1_out2io", mclk_sai1_out2io_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(5), 5, 1, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(2), 9, GFLAGS), ++ GATE(HCLK_SAI2, "hclk_sai2", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(2), 10, GFLAGS), ++ COMPOSITE(CLK_SAI2_SRC, "clk_sai2_src", gpll_cpll_hpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(6), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(2), 11, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_SAI2_FRAC, "clk_sai2_frac", "clk_sai2_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(7), 0, ++ RK3562_PERI_CLKGATE_CON(2), 12, GFLAGS, ++ &rk3562_clk_sai2_fracmux), ++ GATE(MCLK_SAI2, "mclk_sai2", "clk_sai2", 0, ++ RK3562_PERI_CLKGATE_CON(2), 13, GFLAGS), ++ COMPOSITE_NODIV(MCLK_SAI2_OUT2IO, "mclk_sai2_out2io", mclk_sai2_out2io_p, CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(8), 5, 1, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(2), 14, GFLAGS), ++ GATE(HCLK_DSM, "hclk_dsm", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(3), 1, GFLAGS), ++ GATE(CLK_DSM, "clk_dsm", "mclk_sai1", 0, ++ RK3562_PERI_CLKGATE_CON(3), 2, GFLAGS), ++ GATE(HCLK_PDM, "hclk_pdm", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(3), 4, GFLAGS), ++ COMPOSITE(MCLK_PDM, "mclk_pdm", gpll_cpll_hpll_xin24m_p, 0, ++ RK3562_PERI_CLKSEL_CON(12), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(3), 5, GFLAGS), ++ GATE(HCLK_SPDIF, "hclk_spdif", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(3), 8, GFLAGS), ++ COMPOSITE(CLK_SPDIF_SRC, "clk_spdif_src", gpll_cpll_hpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(13), 14, 2, MFLAGS, 8, 6, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(3), 9, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_SPDIF_FRAC, "clk_spdif_frac", "clk_spdif_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(14), 0, ++ RK3562_PERI_CLKGATE_CON(3), 10, GFLAGS, ++ &rk3562_clk_spdif_fracmux), ++ GATE(MCLK_SPDIF, "mclk_spdif", "clk_spdif", 0, ++ RK3562_PERI_CLKGATE_CON(3), 11, GFLAGS), ++ GATE(HCLK_SDMMC0, "hclk_sdmmc0", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(4), 0, GFLAGS), ++ COMPOSITE(CCLK_SDMMC0, "cclk_sdmmc0", gpll_cpll_xin24m_dmyhpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(16), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(4), 1, GFLAGS), ++ MMC(SCLK_SDMMC0_DRV, "sdmmc0_drv", "cclk_sdmmc0", RK3562_SDMMC0_CON0, 1), ++ MMC(SCLK_SDMMC0_SAMPLE, "sdmmc0_sample", "cclk_sdmmc0", RK3562_SDMMC0_CON1, 1), ++ GATE(HCLK_SDMMC1, "hclk_sdmmc1", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(4), 2, GFLAGS), ++ COMPOSITE(CCLK_SDMMC1, "cclk_sdmmc1", gpll_cpll_xin24m_dmyhpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(17), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(4), 3, GFLAGS), ++ MMC(SCLK_SDMMC1_DRV, "sdmmc1_drv", "cclk_sdmmc1", RK3562_SDMMC1_CON0, 1), ++ MMC(SCLK_SDMMC1_SAMPLE, "sdmmc1_sample", "cclk_sdmmc1", RK3562_SDMMC1_CON1, 1), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(4), 8, GFLAGS), ++ GATE(ACLK_EMMC, "aclk_emmc", "aclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(4), 9, GFLAGS), ++ COMPOSITE(CCLK_EMMC, "cclk_emmc", gpll_cpll_xin24m_dmyhpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(18), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(4), 10, GFLAGS), ++ COMPOSITE(BCLK_EMMC, "bclk_emmc", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(19), 15, 1, MFLAGS, 8, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(4), 11, GFLAGS), ++ GATE(TMCLK_EMMC, "tmclk_emmc", "xin24m", 0, ++ RK3562_PERI_CLKGATE_CON(4), 12, GFLAGS), ++ COMPOSITE(SCLK_SFC, "sclk_sfc", gpll_cpll_xin24m_p, 0, ++ RK3562_PERI_CLKSEL_CON(20), 8, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(4), 13, GFLAGS), ++ GATE(HCLK_SFC, "hclk_sfc", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(4), 14, GFLAGS), ++ GATE(HCLK_USB2HOST, "hclk_usb2host", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(5), 0, GFLAGS), ++ GATE(HCLK_USB2HOST_ARB, "hclk_usb2host_arb", "hclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(5), 1, GFLAGS), ++ GATE(PCLK_SPI1, "pclk_spi1", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(6), 0, GFLAGS), ++ COMPOSITE_NODIV(CLK_SPI1, "clk_spi1", mux_200m_100m_50m_xin24m_p, 0, ++ RK3562_PERI_CLKSEL_CON(20), 12, 2, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(6), 1, GFLAGS), ++ GATE(SCLK_IN_SPI1, "sclk_in_spi1", "sclk_in_spi1_io", 0, ++ RK3562_PERI_CLKGATE_CON(6), 2, GFLAGS), ++ GATE(PCLK_SPI2, "pclk_spi2", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(6), 3, GFLAGS), ++ COMPOSITE_NODIV(CLK_SPI2, "clk_spi2", mux_200m_100m_50m_xin24m_p, 0, ++ RK3562_PERI_CLKSEL_CON(20), 14, 2, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(6), 4, GFLAGS), ++ GATE(SCLK_IN_SPI2, "sclk_in_spi2", "sclk_in_spi2_io", 0, ++ RK3562_PERI_CLKGATE_CON(6), 5, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(7), 0, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(7), 1, GFLAGS), ++ GATE(PCLK_UART3, "pclk_uart3", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(7), 2, GFLAGS), ++ GATE(PCLK_UART4, "pclk_uart4", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(7), 3, GFLAGS), ++ GATE(PCLK_UART5, "pclk_uart5", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(7), 4, GFLAGS), ++ GATE(PCLK_UART6, "pclk_uart6", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(7), 5, GFLAGS), ++ GATE(PCLK_UART7, "pclk_uart7", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(7), 6, GFLAGS), ++ GATE(PCLK_UART8, "pclk_uart8", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(7), 7, GFLAGS), ++ GATE(PCLK_UART9, "pclk_uart9", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(7), 8, GFLAGS), ++ COMPOSITE(CLK_UART1_SRC, "clk_uart1_src", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(21), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(7), 9, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART1_FRAC, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(22), 0, ++ RK3562_PERI_CLKGATE_CON(7), 10, GFLAGS, ++ &rk3562_clk_uart1_fracmux), ++ GATE(SCLK_UART1, "sclk_uart1", "clk_uart1", 0, ++ RK3562_PERI_CLKGATE_CON(7), 11, GFLAGS), ++ COMPOSITE(CLK_UART2_SRC, "clk_uart2_src", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(23), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(7), 12, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART2_FRAC, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(24), 0, ++ RK3562_PERI_CLKGATE_CON(7), 13, GFLAGS, ++ &rk3562_clk_uart2_fracmux), ++ GATE(SCLK_UART2, "sclk_uart2", "clk_uart2", 0, ++ RK3562_PERI_CLKGATE_CON(7), 14, GFLAGS), ++ COMPOSITE(CLK_UART3_SRC, "clk_uart3_src", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(25), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(7), 15, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART3_FRAC, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(26), 0, ++ RK3562_PERI_CLKGATE_CON(8), 0, GFLAGS, ++ &rk3562_clk_uart3_fracmux), ++ GATE(SCLK_UART3, "sclk_uart3", "clk_uart3", 0, ++ RK3562_PERI_CLKGATE_CON(8), 1, GFLAGS), ++ COMPOSITE(CLK_UART4_SRC, "clk_uart4_src", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(27), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(8), 2, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART4_FRAC, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(28), 0, ++ RK3562_PERI_CLKGATE_CON(8), 3, GFLAGS, ++ &rk3562_clk_uart4_fracmux), ++ GATE(SCLK_UART4, "sclk_uart4", "clk_uart4", 0, ++ RK3562_PERI_CLKGATE_CON(8), 4, GFLAGS), ++ COMPOSITE(CLK_UART5_SRC, "clk_uart5_src", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(29), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(8), 5, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART5_FRAC, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(30), 0, ++ RK3562_PERI_CLKGATE_CON(8), 6, GFLAGS, ++ &rk3562_clk_uart5_fracmux), ++ GATE(SCLK_UART5, "sclk_uart5", "clk_uart5", 0, ++ RK3562_PERI_CLKGATE_CON(8), 7, GFLAGS), ++ COMPOSITE(CLK_UART6_SRC, "clk_uart6_src", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(31), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(8), 8, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART6_FRAC, "clk_uart6_frac", "clk_uart6_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(32), 0, ++ RK3562_PERI_CLKGATE_CON(8), 9, GFLAGS, ++ &rk3562_clk_uart6_fracmux), ++ GATE(SCLK_UART6, "sclk_uart6", "clk_uart6", 0, ++ RK3562_PERI_CLKGATE_CON(8), 10, GFLAGS), ++ COMPOSITE(CLK_UART7_SRC, "clk_uart7_src", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(33), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(8), 11, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART7_FRAC, "clk_uart7_frac", "clk_uart7_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(34), 0, ++ RK3562_PERI_CLKGATE_CON(8), 12, GFLAGS, ++ &rk3562_clk_uart7_fracmux), ++ GATE(SCLK_UART7, "sclk_uart7", "clk_uart7", 0, ++ RK3562_PERI_CLKGATE_CON(8), 13, GFLAGS), ++ COMPOSITE(CLK_UART8_SRC, "clk_uart8_src", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(35), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(8), 14, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART8_FRAC, "clk_uart8_frac", "clk_uart8_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(36), 0, ++ RK3562_PERI_CLKGATE_CON(8), 15, GFLAGS, ++ &rk3562_clk_uart8_fracmux), ++ GATE(SCLK_UART8, "sclk_uart8", "clk_uart8", 0, ++ RK3562_PERI_CLKGATE_CON(9), 0, GFLAGS), ++ COMPOSITE(CLK_UART9_SRC, "clk_uart9_src", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(37), 8, 1, MFLAGS, 0, 7, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(9), 1, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART9_FRAC, "clk_uart9_frac", "clk_uart9_src", CLK_SET_RATE_PARENT, ++ RK3562_PERI_CLKSEL_CON(38), 0, ++ RK3562_PERI_CLKGATE_CON(9), 2, GFLAGS, ++ &rk3562_clk_uart9_fracmux), ++ GATE(SCLK_UART9, "sclk_uart9", "clk_uart9", 0, ++ RK3562_PERI_CLKGATE_CON(9), 3, GFLAGS), ++ GATE(PCLK_PWM1_PERI, "pclk_pwm1_peri", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(10), 0, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM1_PERI, "clk_pwm1_peri", mux_100m_50m_xin24m_p, 0, ++ RK3562_PERI_CLKSEL_CON(40), 0, 2, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(10), 1, GFLAGS), ++ GATE(CLK_CAPTURE_PWM1_PERI, "clk_capture_pwm1_peri", "xin24m", 0, ++ RK3562_PERI_CLKGATE_CON(10), 2, GFLAGS), ++ GATE(PCLK_PWM2_PERI, "pclk_pwm2_peri", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(10), 3, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM2_PERI, "clk_pwm2_peri", mux_100m_50m_xin24m_p, 0, ++ RK3562_PERI_CLKSEL_CON(40), 6, 2, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(10), 4, GFLAGS), ++ GATE(CLK_CAPTURE_PWM2_PERI, "clk_capture_pwm2_peri", "xin24m", 0, ++ RK3562_PERI_CLKGATE_CON(10), 5, GFLAGS), ++ GATE(PCLK_PWM3_PERI, "pclk_pwm3_peri", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(10), 6, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM3_PERI, "clk_pwm3_peri", mux_100m_50m_xin24m_p, 0, ++ RK3562_PERI_CLKSEL_CON(40), 8, 2, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(10), 7, GFLAGS), ++ GATE(CLK_CAPTURE_PWM3_PERI, "clk_capture_pwm3_peri", "xin24m", 0, ++ RK3562_PERI_CLKGATE_CON(10), 8, GFLAGS), ++ GATE(PCLK_CAN0, "pclk_can0", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(11), 0, GFLAGS), ++ COMPOSITE(CLK_CAN0, "clk_can0", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(41), 7, 1, MFLAGS, 0, 5, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(11), 1, GFLAGS), ++ GATE(PCLK_CAN1, "pclk_can1", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(11), 2, GFLAGS), ++ COMPOSITE(CLK_CAN1, "clk_can1", gpll_cpll_p, 0, ++ RK3562_PERI_CLKSEL_CON(41), 15, 1, MFLAGS, 8, 5, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(11), 3, GFLAGS), ++ GATE(PCLK_PERI_WDT, "pclk_peri_wdt", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(13), 0, GFLAGS), ++ COMPOSITE_NODIV(TCLK_PERI_WDT, "tclk_peri_wdt", mux_xin24m_32k_p, 0, ++ RK3562_PERI_CLKSEL_CON(43), 15, 1, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(13), 1, GFLAGS), ++ GATE(ACLK_SYSMEM, "aclk_sysmem", "aclk_peri", CLK_IGNORE_UNUSED, ++ RK3562_PERI_CLKGATE_CON(13), 2, GFLAGS), ++ GATE(HCLK_BOOTROM, "hclk_bootrom", "hclk_peri", CLK_IGNORE_UNUSED, ++ RK3562_PERI_CLKGATE_CON(13), 3, GFLAGS), ++ GATE(PCLK_PERI_GRF, "pclk_peri_grf", "pclk_peri", CLK_IGNORE_UNUSED, ++ RK3562_PERI_CLKGATE_CON(13), 4, GFLAGS), ++ GATE(ACLK_DMAC, "aclk_dmac", "aclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(13), 5, GFLAGS), ++ GATE(ACLK_RKDMAC, "aclk_rkdmac", "aclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(13), 6, GFLAGS), ++ GATE(PCLK_OTPC_NS, "pclk_otpc_ns", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(14), 0, GFLAGS), ++ GATE(CLK_SBPI_OTPC_NS, "clk_sbpi_otpc_ns", "xin24m", 0, ++ RK3562_PERI_CLKGATE_CON(14), 1, GFLAGS), ++ COMPOSITE_NOMUX(CLK_USER_OTPC_NS, "clk_user_otpc_ns", "xin24m", 0, ++ RK3562_PERI_CLKSEL_CON(44), 0, 8, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(14), 2, GFLAGS), ++ GATE(PCLK_OTPC_S, "pclk_otpc_s", "pclk_peri", CLK_IGNORE_UNUSED, ++ RK3562_PERI_CLKGATE_CON(14), 3, GFLAGS), ++ GATE(CLK_SBPI_OTPC_S, "clk_sbpi_otpc_s", "xin24m", CLK_IGNORE_UNUSED, ++ RK3562_PERI_CLKGATE_CON(14), 4, GFLAGS), ++ COMPOSITE_NOMUX(CLK_USER_OTPC_S, "clk_user_otpc_s", "xin24m", CLK_IGNORE_UNUSED, ++ RK3562_PERI_CLKSEL_CON(44), 8, 8, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(14), 5, GFLAGS), ++ GATE(CLK_OTPC_ARB, "clk_otpc_arb", "xin24m", 0, ++ RK3562_PERI_CLKGATE_CON(14), 6, GFLAGS), ++ GATE(PCLK_OTPPHY, "pclk_otpphy", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(14), 7, GFLAGS), ++ GATE(PCLK_USB2PHY, "pclk_usb2phy", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(15), 0, GFLAGS), ++ GATE(PCLK_PIPEPHY, "pclk_pipephy", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(15), 7, GFLAGS), ++ GATE(PCLK_SARADC, "pclk_saradc", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(16), 4, GFLAGS), ++ COMPOSITE_NOMUX(CLK_SARADC, "clk_saradc", "xin24m", 0, ++ RK3562_PERI_CLKSEL_CON(46), 0, 12, DFLAGS, ++ RK3562_PERI_CLKGATE_CON(16), 5, GFLAGS), ++ GATE(PCLK_IOC_VCCIO234, "pclk_ioc_vccio234", "pclk_peri", CLK_IS_CRITICAL, ++ RK3562_PERI_CLKGATE_CON(16), 12, GFLAGS), ++ GATE(PCLK_PERI_GPIO1, "pclk_peri_gpio1", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(17), 0, GFLAGS), ++ GATE(PCLK_PERI_GPIO2, "pclk_peri_gpio2", "pclk_peri", 0, ++ RK3562_PERI_CLKGATE_CON(17), 1, GFLAGS), ++ COMPOSITE_NODIV(DCLK_PERI_GPIO, "dclk_peri_gpio", mux_xin24m_32k_p, 0, ++ RK3562_PERI_CLKSEL_CON(47), 8, 1, MFLAGS, ++ RK3562_PERI_CLKGATE_CON(17), 4, GFLAGS), ++ GATE(DCLK_PERI_GPIO1, "dclk_peri_gpio1", "dclk_peri_gpio", 0, ++ RK3562_PERI_CLKGATE_CON(17), 2, GFLAGS), ++ GATE(DCLK_PERI_GPIO2, "dclk_peri_gpio2", "dclk_peri_gpio", 0, ++ RK3562_PERI_CLKGATE_CON(17), 3, GFLAGS), + -+ dev_dbg(dev, "cpu to dsu: %u -> %u\n", freq, dsu_freq); ++ /* PD_PHP */ ++ COMPOSITE(ACLK_PHP, "aclk_php", gpll_cpll_p, 0, ++ RK3562_CLKSEL_CON(36), 7, 1, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(16), 0, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_PHP, "pclk_php", "aclk_php", 0, ++ RK3562_CLKSEL_CON(36), 8, 4, DFLAGS, ++ RK3562_CLKGATE_CON(16), 1, GFLAGS), ++ GATE(ACLK_PCIE20_MST, "aclk_pcie20_mst", "aclk_php", 0, ++ RK3562_CLKGATE_CON(16), 4, GFLAGS), ++ GATE(ACLK_PCIE20_SLV, "aclk_pcie20_slv", "aclk_php", 0, ++ RK3562_CLKGATE_CON(16), 5, GFLAGS), ++ GATE(ACLK_PCIE20_DBI, "aclk_pcie20_dbi", "aclk_php", 0, ++ RK3562_CLKGATE_CON(16), 6, GFLAGS), ++ GATE(PCLK_PCIE20, "pclk_pcie20", "pclk_php", 0, ++ RK3562_CLKGATE_CON(16), 7, GFLAGS), ++ GATE(CLK_PCIE20_AUX, "clk_pcie20_aux", "xin24m", 0, ++ RK3562_CLKGATE_CON(16), 8, GFLAGS), ++ GATE(ACLK_USB3OTG, "aclk_usb3otg", "aclk_php", 0, ++ RK3562_CLKGATE_CON(16), 10, GFLAGS), ++ COMPOSITE_NODIV(CLK_USB3OTG_SUSPEND, "clk_usb3otg_suspend", mux_xin24m_32k_p, 0, ++ RK3562_CLKSEL_CON(36), 15, 1, MFLAGS, ++ RK3562_CLKGATE_CON(16), 11, GFLAGS), ++ GATE(CLK_USB3OTG_REF, "clk_usb3otg_ref", "xin24m", 0, ++ RK3562_CLKGATE_CON(16), 12, GFLAGS), ++ GATE(CLK_PIPEPHY_REF_FUNC, "clk_pipephy_ref_func", "pclk_pcie20", 0, ++ RK3562_CLKGATE_CON(17), 3, GFLAGS), + -+ return freq_qos_update_request(&cluster->dsu_qos_req, dsu_freq); -+} ++ /* PD_PMU1 */ ++ COMPOSITE_NOMUX(CLK_200M_PMU, "clk_200m_pmu", "cpll", CLK_IS_CRITICAL, ++ RK3562_PMU1_CLKSEL_CON(0), 0, 5, DFLAGS, ++ RK3562_PMU1_CLKGATE_CON(0), 1, GFLAGS), ++ /* PD_PMU0 */ ++ COMPOSITE_FRACMUX(CLK_RTC32K_FRAC, "clk_rtc32k_frac", "xin24m", CLK_IS_CRITICAL, ++ RK3562_PMU0_CLKSEL_CON(0), 0, ++ RK3562_PMU0_CLKGATE_CON(0), 15, GFLAGS, ++ &rk3562_rtc32k_pmu_fracmux), ++ COMPOSITE_NOMUX(BUSCLK_PDPMU0, "busclk_pdpmu0", "clk_200m_pmu", CLK_IS_CRITICAL, ++ RK3562_PMU0_CLKSEL_CON(1), 3, 2, DFLAGS, ++ RK3562_PMU0_CLKGATE_CON(0), 14, GFLAGS), ++ GATE(PCLK_PMU0_CRU, "pclk_pmu0_cru", "busclk_pdpmu0", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(0), 0, GFLAGS), ++ GATE(PCLK_PMU0_PMU, "pclk_pmu0_pmu", "busclk_pdpmu0", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(0), 1, GFLAGS), ++ GATE(CLK_PMU0_PMU, "clk_pmu0_pmu", "xin24m", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(0), 2, GFLAGS), ++ GATE(PCLK_PMU0_HP_TIMER, "pclk_pmu0_hp_timer", "busclk_pdpmu0", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(0), 3, GFLAGS), ++ GATE(CLK_PMU0_HP_TIMER, "clk_pmu0_hp_timer", "xin24m", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(0), 4, GFLAGS), ++ GATE(CLK_PMU0_32K_HP_TIMER, "clk_pmu0_32k_hp_timer", "clk_rtc_32k", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(0), 5, GFLAGS), ++ GATE(PCLK_PMU0_PVTM, "pclk_pmu0_pvtm", "busclk_pdpmu0", 0, ++ RK3562_PMU0_CLKGATE_CON(0), 6, GFLAGS), ++ GATE(CLK_PMU0_PVTM, "clk_pmu0_pvtm", "xin24m", 0, ++ RK3562_PMU0_CLKGATE_CON(0), 7, GFLAGS), ++ GATE(PCLK_IOC_PMUIO, "pclk_ioc_pmuio", "busclk_pdpmu0", CLK_IS_CRITICAL, ++ RK3562_PMU0_CLKGATE_CON(0), 8, GFLAGS), ++ GATE(PCLK_PMU0_GPIO0, "pclk_pmu0_gpio0", "busclk_pdpmu0", 0, ++ RK3562_PMU0_CLKGATE_CON(0), 9, GFLAGS), ++ GATE(DBCLK_PMU0_GPIO0, "dbclk_pmu0_gpio0", "xin24m", 0, ++ RK3562_PMU0_CLKGATE_CON(0), 10, GFLAGS), ++ GATE(PCLK_PMU0_GRF, "pclk_pmu0_grf", "busclk_pdpmu0", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(0), 11, GFLAGS), ++ GATE(PCLK_PMU0_SGRF, "pclk_pmu0_sgrf", "busclk_pdpmu0", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(0), 12, GFLAGS), ++ GATE(CLK_DDR_FAIL_SAFE, "clk_ddr_fail_safe", "xin24m", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(1), 0, GFLAGS), ++ GATE(PCLK_PMU0_SCRKEYGEN, "pclk_pmu0_scrkeygen", "busclk_pdpmu0", CLK_IGNORE_UNUSED, ++ RK3562_PMU0_CLKGATE_CON(1), 1, GFLAGS), ++ COMPOSITE_NOMUX(CLK_PIPEPHY_DIV, "clk_pipephy_div", "cpll", 0, ++ RK3562_PMU0_CLKSEL_CON(2), 0, 6, DFLAGS, ++ RK3562_PMU0_CLKGATE_CON(2), 0, GFLAGS), ++ GATE(CLK_PIPEPHY_XIN24M, "clk_pipephy_xin24m", "xin24m", 0, ++ RK3562_PMU0_CLKGATE_CON(2), 1, GFLAGS), ++ COMPOSITE_NODIV(CLK_PIPEPHY_REF, "clk_pipephy_ref", clk_pipephy_ref_p, 0, ++ RK3562_PMU0_CLKSEL_CON(2), 7, 1, MFLAGS, ++ RK3562_PMU0_CLKGATE_CON(2), 2, GFLAGS), ++ GATE(CLK_USB2PHY_XIN24M, "clk_usb2phy_xin24m", "xin24m", 0, ++ RK3562_PMU0_CLKGATE_CON(2), 4, GFLAGS), ++ COMPOSITE_NODIV(CLK_USB2PHY_REF, "clk_usb2phy_ref", clk_usbphy_ref_p, 0, ++ RK3562_PMU0_CLKSEL_CON(2), 8, 1, MFLAGS, ++ RK3562_PMU0_CLKGATE_CON(2), 5, GFLAGS), ++ GATE(CLK_MIPIDSIPHY_XIN24M, "clk_mipidsiphy_xin24m", "xin24m", 0, ++ RK3562_PMU0_CLKGATE_CON(2), 6, GFLAGS), ++ COMPOSITE_NODIV(CLK_MIPIDSIPHY_REF, "clk_mipidsiphy_ref", clk_mipidsi_ref_p, 0, ++ RK3562_PMU0_CLKSEL_CON(2), 15, 1, MFLAGS, ++ RK3562_PMU0_CLKGATE_CON(2), 7, GFLAGS), ++ GATE(PCLK_PMU0_I2C0, "pclk_pmu0_i2c0", "busclk_pdpmu0", 0, ++ RK3562_PMU0_CLKGATE_CON(2), 8, GFLAGS), ++ COMPOSITE(CLK_PMU0_I2C0, "clk_pmu0_i2c0", mux_200m_xin24m_32k_p, 0, ++ RK3562_PMU0_CLKSEL_CON(3), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3562_PMU0_CLKGATE_CON(2), 9, GFLAGS), ++ /* PD_PMU1 */ ++ GATE(PCLK_PMU1_CRU, "pclk_pmu1_cru", "busclk_pdpmu0", CLK_IGNORE_UNUSED, ++ RK3562_PMU1_CLKGATE_CON(0), 0, GFLAGS), ++ GATE(HCLK_PMU1_MEM, "hclk_pmu1_mem", "busclk_pdpmu0", CLK_IGNORE_UNUSED, ++ RK3562_PMU1_CLKGATE_CON(0), 2, GFLAGS), ++ GATE(PCLK_PMU1_UART0, "pclk_pmu1_uart0", "busclk_pdpmu0", 0, ++ RK3562_PMU1_CLKGATE_CON(0), 7, GFLAGS), ++ COMPOSITE_NOMUX(CLK_PMU1_UART0_SRC, "clk_pmu1_uart0_src", "cpll", 0, ++ RK3562_PMU1_CLKSEL_CON(2), 0, 4, DFLAGS, ++ RK3562_PMU1_CLKGATE_CON(0), 8, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_PMU1_UART0_FRAC, "clk_pmu1_uart0_frac", "clk_pmu1_uart0_src", CLK_SET_RATE_PARENT, ++ RK3562_PMU1_CLKSEL_CON(3), 0, ++ RK3562_PMU1_CLKGATE_CON(0), 9, GFLAGS, ++ &rk3562_clk_pmu1_uart0_fracmux), ++ GATE(SCLK_PMU1_UART0, "sclk_pmu1_uart0", "clk_pmu1_uart0", 0, ++ RK3562_PMU1_CLKGATE_CON(0), 10, GFLAGS), ++ GATE(PCLK_PMU1_SPI0, "pclk_pmu1_spi0", "busclk_pdpmu0", 0, ++ RK3562_PMU1_CLKGATE_CON(1), 0, GFLAGS), ++ COMPOSITE(CLK_PMU1_SPI0, "clk_pmu1_spi0", mux_200m_xin24m_32k_p, 0, ++ RK3562_PMU1_CLKSEL_CON(4), 6, 2, MFLAGS, 0, 2, DFLAGS, ++ RK3562_PMU1_CLKGATE_CON(1), 1, GFLAGS), ++ GATE(SCLK_IN_PMU1_SPI0, "sclk_in_pmu1_spi0", "sclk_in_pmu1_spi0_io", 0, ++ RK3562_PMU1_CLKGATE_CON(1), 2, GFLAGS), ++ GATE(PCLK_PMU1_PWM0, "pclk_pmu1_pwm0", "busclk_pdpmu0", 0, ++ RK3562_PMU1_CLKGATE_CON(1), 3, GFLAGS), ++ COMPOSITE(CLK_PMU1_PWM0, "clk_pmu1_pwm0", mux_200m_xin24m_32k_p, 0, ++ RK3562_PMU1_CLKSEL_CON(4), 14, 2, MFLAGS, 8, 2, DFLAGS, ++ RK3562_PMU1_CLKGATE_CON(1), 4, GFLAGS), ++ GATE(CLK_CAPTURE_PMU1_PWM0, "clk_capture_pmu1_pwm0", "xin24m", 0, ++ RK3562_PMU1_CLKGATE_CON(1), 5, GFLAGS), ++ GATE(CLK_PMU1_WIFI, "clk_pmu1_wifi", "xin24m", 0, ++ RK3562_PMU1_CLKGATE_CON(1), 6, GFLAGS), ++ GATE(FCLK_PMU1_CM0_CORE, "fclk_pmu1_cm0_core", "busclk_pdpmu0", 0, ++ RK3562_PMU1_CLKGATE_CON(2), 0, GFLAGS), ++ GATE(CLK_PMU1_CM0_RTC, "clk_pmu1_cm0_rtc", "clk_rtc_32k", 0, ++ RK3562_PMU1_CLKGATE_CON(2), 1, GFLAGS), ++ GATE(PCLK_PMU1_WDTNS, "pclk_pmu1_wdtns", "busclk_pdpmu0", 0, ++ RK3562_PMU1_CLKGATE_CON(2), 3, GFLAGS), ++ GATE(CLK_PMU1_WDTNS, "clk_pmu1_wdtns", "xin24m", 0, ++ RK3562_PMU1_CLKGATE_CON(2), 4, GFLAGS), ++ GATE(PCLK_PMU1_MAILBOX, "pclk_pmu1_mailbox", "busclk_pdpmu0", 0, ++ RK3562_PMU1_CLKGATE_CON(3), 8, GFLAGS), + -+static int rockchip_cpufreq_transition_notifier(struct notifier_block *nb, -+ unsigned long event, void *data) -+{ -+ struct cpufreq_freqs *freqs = data; -+ struct cpufreq_policy *policy = freqs->policy; -+ struct cluster_info *cluster; ++ /* PD_RGA */ ++ COMPOSITE(ACLK_RGA_PRE, "aclk_rga_pre", gpll_cpll_pvtpll_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(32), 6, 2, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(14), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_RGA_PRE, "hclk_rga_pre", "aclk_rga_jdec", 0, ++ RK3562_CLKSEL_CON(32), 8, 3, DFLAGS, ++ RK3562_CLKGATE_CON(14), 1, GFLAGS), ++ GATE(ACLK_RGA, "aclk_rga", "aclk_rga_jdec", 0, ++ RK3562_CLKGATE_CON(14), 6, GFLAGS), ++ GATE(HCLK_RGA, "hclk_rga", "hclk_rga_pre", 0, ++ RK3562_CLKGATE_CON(14), 7, GFLAGS), ++ COMPOSITE(CLK_RGA_CORE, "clk_rga_core", gpll_cpll_pvtpll_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(33), 6, 2, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(14), 8, GFLAGS), ++ GATE(ACLK_JDEC, "aclk_jdec", "aclk_rga_jdec", 0, ++ RK3562_CLKGATE_CON(14), 9, GFLAGS), ++ GATE(HCLK_JDEC, "hclk_jdec", "hclk_rga_pre", 0, ++ RK3562_CLKGATE_CON(14), 10, GFLAGS), + -+ cluster = rockchip_cluster_info_lookup(policy->cpu); -+ if (!cluster) -+ return NOTIFY_BAD; ++ /* PD_VDPU */ ++ COMPOSITE(ACLK_VDPU_PRE, "aclk_vdpu_pre", gpll_cpll_pvtpll_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(22), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3562_CLKGATE_CON(10), 0, GFLAGS), ++ COMPOSITE(CLK_RKVDEC_HEVC_CA, "clk_rkvdec_hevc_ca", gpll_cpll_pvtpll_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(23), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3562_CLKGATE_CON(10), 3, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_VDPU_PRE, "hclk_vdpu_pre", "aclk_vdpu", 0, ++ RK3562_CLKSEL_CON(24), 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(10), 4, GFLAGS), ++ GATE(ACLK_RKVDEC, "aclk_rkvdec", "aclk_vdpu", 0, ++ RK3562_CLKGATE_CON(10), 7, GFLAGS), ++ GATE(HCLK_RKVDEC, "hclk_rkvdec", "hclk_vdpu_pre", 0, ++ RK3562_CLKGATE_CON(10), 8, GFLAGS), + -+ if (event == CPUFREQ_PRECHANGE) { -+ if (cluster->idle_threshold_freq && -+ freqs->new >= cluster->idle_threshold_freq && -+ !cluster->is_idle_disabled) { -+ rockchip_cpufreq_idle_state_disable(policy->cpus, 1, -+ true); -+ cluster->is_idle_disabled = true; -+ } -+ } else if (event == CPUFREQ_POSTCHANGE) { -+ if (cluster->idle_threshold_freq && -+ freqs->new < cluster->idle_threshold_freq && -+ cluster->is_idle_disabled) { -+ rockchip_cpufreq_idle_state_disable(policy->cpus, 1, -+ false); -+ cluster->is_idle_disabled = false; -+ } -+ rockchip_cpufreq_update_dsu_req(cluster, freqs->new); -+ } ++ /* PD_VEPU */ ++ COMPOSITE(CLK_RKVENC_CORE, "clk_rkvenc_core", gpll_cpll_pvtpll_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(20), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3562_CLKGATE_CON(9), 0, GFLAGS), ++ COMPOSITE(ACLK_VEPU_PRE, "aclk_vepu_pre", gpll_cpll_pvtpll_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(20), 14, 2, MFLAGS, 8, 5, DFLAGS, ++ RK3562_CLKGATE_CON(9), 1, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_VEPU_PRE, "hclk_vepu_pre", "aclk_vepu", 0, ++ RK3562_CLKSEL_CON(21), 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(9), 2, GFLAGS), ++ GATE(ACLK_RKVENC, "aclk_rkvenc", "aclk_vepu", 0, ++ RK3562_CLKGATE_CON(9), 5, GFLAGS), ++ GATE(HCLK_RKVENC, "hclk_rkvenc", "hclk_vepu", 0, ++ RK3562_CLKGATE_CON(9), 6, GFLAGS), + -+ return NOTIFY_OK; -+} ++ /* PD_VI */ ++ COMPOSITE(ACLK_VI, "aclk_vi", gpll_cpll_pvtpll_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(25), 6, 2, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(11), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_VI, "hclk_vi", "aclk_vi_isp", 0, ++ RK3562_CLKSEL_CON(26), 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(11), 1, GFLAGS), ++ COMPOSITE_NOMUX(PCLK_VI, "pclk_vi", "aclk_vi_isp", 0, ++ RK3562_CLKSEL_CON(26), 8, 4, DFLAGS, ++ RK3562_CLKGATE_CON(11), 2, GFLAGS), ++ GATE(ACLK_ISP, "aclk_isp", "aclk_vi_isp", 0, ++ RK3562_CLKGATE_CON(11), 6, GFLAGS), ++ GATE(HCLK_ISP, "hclk_isp", "hclk_vi", 0, ++ RK3562_CLKGATE_CON(11), 7, GFLAGS), ++ COMPOSITE(CLK_ISP, "clk_isp", gpll_cpll_pvtpll_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(27), 6, 2, MFLAGS, 0, 4, DFLAGS, ++ RK3562_CLKGATE_CON(11), 8, GFLAGS), ++ GATE(ACLK_VICAP, "aclk_vicap", "aclk_vi_isp", 0, ++ RK3562_CLKGATE_CON(11), 9, GFLAGS), ++ GATE(HCLK_VICAP, "hclk_vicap", "hclk_vi", 0, ++ RK3562_CLKGATE_CON(11), 10, GFLAGS), ++ COMPOSITE(DCLK_VICAP, "dclk_vicap", gpll_cpll_pvtpll_dmyapll_p, 0, ++ RK3562_CLKSEL_CON(27), 14, 2, MFLAGS, 8, 4, DFLAGS, ++ RK3562_CLKGATE_CON(11), 11, GFLAGS), ++ GATE(CSIRX0_CLK_DATA, "csirx0_clk_data", "csirx0_clk_data_io", 0, ++ RK3562_CLKGATE_CON(11), 12, GFLAGS), ++ GATE(CSIRX1_CLK_DATA, "csirx1_clk_data", "csirx1_clk_data_io", 0, ++ RK3562_CLKGATE_CON(11), 13, GFLAGS), ++ GATE(CSIRX2_CLK_DATA, "csirx2_clk_data", "csirx2_clk_data_io", 0, ++ RK3562_CLKGATE_CON(11), 14, GFLAGS), ++ GATE(CSIRX3_CLK_DATA, "csirx3_clk_data", "csirx3_clk_data_io", 0, ++ RK3562_CLKGATE_CON(11), 15, GFLAGS), ++ GATE(PCLK_CSIHOST0, "pclk_csihost0", "pclk_vi", 0, ++ RK3562_CLKGATE_CON(12), 0, GFLAGS), ++ GATE(PCLK_CSIHOST1, "pclk_csihost1", "pclk_vi", 0, ++ RK3562_CLKGATE_CON(12), 1, GFLAGS), ++ GATE(PCLK_CSIHOST2, "pclk_csihost2", "pclk_vi", 0, ++ RK3562_CLKGATE_CON(12), 2, GFLAGS), ++ GATE(PCLK_CSIHOST3, "pclk_csihost3", "pclk_vi", 0, ++ RK3562_CLKGATE_CON(12), 3, GFLAGS), ++ GATE(PCLK_CSIPHY0, "pclk_csiphy0", "pclk_vi", 0, ++ RK3562_CLKGATE_CON(12), 4, GFLAGS), ++ GATE(PCLK_CSIPHY1, "pclk_csiphy1", "pclk_vi", 0, ++ RK3562_CLKGATE_CON(12), 5, GFLAGS), + -+static struct notifier_block rockchip_cpufreq_transition_notifier_block = { -+ .notifier_call = rockchip_cpufreq_transition_notifier, ++ /* PD_VO */ ++ COMPOSITE(ACLK_VO_PRE, "aclk_vo_pre", gpll_cpll_vpll_dmyhpll_p, 0, ++ RK3562_CLKSEL_CON(28), 6, 2, MFLAGS, 0, 5, DFLAGS, ++ RK3562_CLKGATE_CON(13), 0, GFLAGS), ++ COMPOSITE_NOMUX(HCLK_VO_PRE, "hclk_vo_pre", "aclk_vo", 0, ++ RK3562_CLKSEL_CON(29), 0, 5, DFLAGS, ++ RK3562_CLKGATE_CON(13), 1, GFLAGS), ++ GATE(ACLK_VOP, "aclk_vop", "aclk_vo", 0, ++ RK3562_CLKGATE_CON(13), 6, GFLAGS), ++ GATE(HCLK_VOP, "hclk_vop", "hclk_vo_pre", 0, ++ RK3562_CLKGATE_CON(13), 7, GFLAGS), ++ COMPOSITE(DCLK_VOP, "dclk_vop", gpll_dmyhpll_vpll_apll_p, CLK_SET_RATE_NO_REPARENT, ++ RK3562_CLKSEL_CON(30), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3562_CLKGATE_CON(13), 8, GFLAGS), ++ COMPOSITE(DCLK_VOP1, "dclk_vop1", gpll_dmyhpll_vpll_apll_p, CLK_SET_RATE_NO_REPARENT, ++ RK3562_CLKSEL_CON(31), 14, 2, MFLAGS, 0, 8, DFLAGS, ++ RK3562_CLKGATE_CON(13), 9, GFLAGS), +}; + -+static int rockchip_cpufreq_panic_notifier(struct notifier_block *nb, -+ unsigned long v, void *p) -+{ -+ struct cluster_info *ci; -+ struct rockchip_opp_info *opp_info; -+ -+ list_for_each_entry(ci, &cluster_info_list, list_head) { -+ opp_info = &ci->opp_info; ++static void __iomem *rk3562_cru_base; + -+ if (opp_info->regulator_count > 1) -+ dev_info(opp_info->dev, -+ "cur_freq: %lu Hz, volt_vdd: %lu uV, volt_mem: %lu uV\n", -+ ci->rate, ci->volt, ci->mem_volt); -+ else -+ dev_info(opp_info->dev, "cur_freq: %lu Hz, volt: %lu uV\n", -+ ci->rate, ci->volt); ++static void rk3562_dump_cru(void) ++{ ++ if (rk3562_cru_base) { ++ pr_warn("CRU:\n"); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rk3562_cru_base, ++ 0x600, false); + } -+ -+ return 0; +} + -+static struct notifier_block rockchip_cpufreq_panic_notifier_block = { -+ .notifier_call = rockchip_cpufreq_panic_notifier, ++static int protect_clocks[] = { ++ ACLK_VO_PRE, ++ HCLK_VO_PRE, ++ ACLK_VOP, ++ HCLK_VOP, ++ DCLK_VOP, ++ DCLK_VOP1, +}; + -+static int __init rockchip_cpufreq_driver_init(void) ++static void __init rk3562_clk_init(struct device_node *np) +{ -+ struct cluster_info *cluster, *pos; -+ struct cpufreq_dt_platform_data pdata = {0}; -+ int cpu, ret; -+ -+ for_each_possible_cpu(cpu) { -+ cluster = rockchip_cluster_info_lookup(cpu); -+ if (cluster) -+ continue; -+ -+ cluster = kzalloc(sizeof(*cluster), GFP_KERNEL); -+ if (!cluster) { -+ ret = -ENOMEM; -+ goto release_cluster_info; -+ } ++ struct rockchip_clk_provider *ctx; ++ void __iomem *reg_base; + -+ ret = rockchip_cpufreq_cluster_init(cpu, cluster); -+ if (ret) { -+ pr_err("Failed to initialize dvfs info cpu%d\n", cpu); -+ goto release_cluster_info; -+ } -+ list_add(&cluster->list_head, &cluster_info_list); ++ reg_base = of_iomap(np, 0); ++ if (!reg_base) { ++ pr_err("%s: could not map cru region\n", __func__); ++ return; + } + -+ pdata.have_governor_per_policy = true; -+ pdata.suspend = rockchip_cpufreq_suspend; ++ rk3562_cru_base = reg_base; + -+ ret = cpufreq_register_notifier(&rockchip_cpufreq_notifier_block, -+ CPUFREQ_POLICY_NOTIFIER); -+ if (ret) { -+ pr_err("failed to register cpufreq notifier\n"); -+ goto release_cluster_info; ++ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); ++ if (IS_ERR(ctx)) { ++ pr_err("%s: rockchip clk init failed\n", __func__); ++ iounmap(reg_base); ++ return; + } + -+ if (of_machine_is_compatible("rockchip,rk3588")) { -+ ret = cpufreq_register_notifier(&rockchip_cpufreq_transition_notifier_block, -+ CPUFREQ_TRANSITION_NOTIFIER); -+ if (ret) { -+ cpufreq_unregister_notifier(&rockchip_cpufreq_notifier_block, -+ CPUFREQ_POLICY_NOTIFIER); -+ pr_err("failed to register cpufreq notifier\n"); -+ goto release_cluster_info; -+ } -+#ifdef MODULE -+ cpu_latency_qos_add_request(&idle_pm_qos, PM_QOS_DEFAULT_VALUE); -+#endif -+ } ++ rockchip_clk_register_plls(ctx, rk3562_pll_clks, ++ ARRAY_SIZE(rk3562_pll_clks), ++ RK3562_GRF_SOC_STATUS0); + -+ ret = atomic_notifier_chain_register(&panic_notifier_list, -+ &rockchip_cpufreq_panic_notifier_block); -+ if (ret) -+ pr_err("failed to register cpufreq panic notifier\n"); ++ rockchip_clk_register_branches(ctx, rk3562_clk_branches, ++ ARRAY_SIZE(rk3562_clk_branches)); + -+ return PTR_ERR_OR_ZERO(platform_device_register_data(NULL, "cpufreq-dt", -+ -1, (void *)&pdata, -+ sizeof(struct cpufreq_dt_platform_data))); ++ /* (0x30444 - 0x400) / 4 + 1 = 49170 */ ++ rockchip_register_softrst(np, 49170, reg_base + RK3562_SOFTRST_CON(0), ++ ROCKCHIP_SOFTRST_HIWORD_MASK); + -+release_cluster_info: -+ list_for_each_entry_safe(cluster, pos, &cluster_info_list, list_head) { -+ list_del(&cluster->list_head); -+ kfree(cluster); -+ } -+ return ret; -+} -+module_init(rockchip_cpufreq_driver_init); ++ rockchip_register_restart_notifier(ctx, RK3562_GLB_SRST_FST, NULL); + -+MODULE_AUTHOR("Finley Xiao "); -+MODULE_DESCRIPTION("Rockchip cpufreq driver"); -+MODULE_LICENSE("GPL v2"); -diff --git a/drivers/cpufreq/rockchip-cpufreq.h b/drivers/cpufreq/rockchip-cpufreq.h -new file mode 100644 -index 000000000..2eb920606 ---- /dev/null -+++ b/drivers/cpufreq/rockchip-cpufreq.h -@@ -0,0 +1,24 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (c) 2019 Fuzhou Rockchip Electronics Co., Ltd -+ */ -+#ifndef __ROCKCHIP_CPUFREQ_H -+#define __ROCKCHIP_CPUFREQ_H ++ rockchip_clk_of_add_provider(np, ctx); + -+#if IS_ENABLED(CONFIG_ARM_ROCKCHIP_CPUFREQ) -+int rockchip_cpufreq_adjust_table(struct device *dev); -+int rockchip_cpufreq_opp_set_rate(struct device *dev, unsigned long target_freq); -+#else -+static inline int rockchip_cpufreq_adjust_table(struct device *dev) -+{ -+ return -EOPNOTSUPP; -+} ++ if (!rk_dump_cru) ++ rk_dump_cru = rk3562_dump_cru; + -+static inline int rockchip_cpufreq_opp_set_rate(struct device *dev, -+ unsigned long target_freq) -+{ -+ return -EOPNOTSUPP; ++ rockchip_clk_protect(ctx, protect_clocks, ARRAY_SIZE(protect_clocks)); +} -+#endif /* CONFIG_ARM_ROCKCHIP_CPUFREQ */ -+ -+#endif -diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig -index c761952f0..5f1c9813b 100644 ---- a/drivers/crypto/Kconfig -+++ b/drivers/crypto/Kconfig -@@ -638,26 +638,27 @@ config CRYPTO_DEV_ROCKCHIP - select CRYPTO_DES - select CRYPTO_AES - select CRYPTO_ENGINE -+ select CRYPTO_XTS -+ select CRYPTO_CFB -+ select CRYPTO_OFB -+ select CRYPTO_CTR -+ select CRYPTO_GCM - select CRYPTO_LIB_DES - select CRYPTO_MD5 - select CRYPTO_SHA1 -+ select CRYPTO_SM3 -+ select CRYPTO_SM4 - select CRYPTO_SHA256 -+ select CRTPTO_SHA512 - select CRYPTO_HASH - select CRYPTO_SKCIPHER -+ select CRYPTO_RSA - - help - This driver interfaces with the hardware crypto accelerator. - Supporting cbc/ecb chainmode, and aes/des/des3_ede cipher mode. - --config CRYPTO_DEV_ROCKCHIP_DEBUG -- bool "Enable Rockchip crypto stats" -- depends on CRYPTO_DEV_ROCKCHIP -- depends on DEBUG_FS -- help -- Say y to enable Rockchip crypto debug stats. -- This will create /sys/kernel/debug/rk3288_crypto/stats for displaying -- the number of requests per algorithm and other internal stats. -- -+source "drivers/crypto/rockchip/Kconfig" - - config CRYPTO_DEV_ZYNQMP_AES - tristate "Support for Xilinx ZynqMP AES hw accelerator" -diff --git a/drivers/crypto/rockchip/Kconfig b/drivers/crypto/rockchip/Kconfig -new file mode 100644 -index 000000000..b7402e8a5 ---- /dev/null -+++ b/drivers/crypto/rockchip/Kconfig -@@ -0,0 +1,25 @@ -+# SPDX-License-Identifier: GPL-2.0-only -+if CRYPTO_DEV_ROCKCHIP -+ -+config CRYPTO_DEV_ROCKCHIP_V1 -+ bool "crypto v1 for RV1108 RK3288 RK3368 RK3399" -+ default y if CPU_RV1108 || CPU_RK3288 || CPU_RK3368 || CPU_RK3399 -+ -+config CRYPTO_DEV_ROCKCHIP_V2 -+ bool "crypto v2 for RV1109/RV1126 RK1808 RK3308 PX30/RK3326 RK356X RK3588" -+ default y if CPU_RV1126 || CPU_RK1808 || CPU_RK3308 || CPU_PX30 || CPU_RK3568 || CPU_RK3588 -+ -+config CRYPTO_DEV_ROCKCHIP_V3 -+ bool "crypto v3/v4 for RV1106/RK3528/RK3562" -+ default y if CPU_RV1106 || CPU_RK3528 || CPU_RK3562 -+ -+endif -+ -+config CRYPTO_DEV_ROCKCHIP_DEV -+ tristate "Export rockchip crypto device for user space" -+ depends on CRYPTO_DEV_ROCKCHIP -+ default n -+ help -+ This is a /dev/crypto device driver.The main idea is to -+ access existing ciphers in kernel space from userspace, -+ thus enabling the re-use of a hardware implementation of a cipher. -diff --git a/drivers/crypto/rockchip/Makefile b/drivers/crypto/rockchip/Makefile -index 785277aca..53e34aa47 100644 ---- a/drivers/crypto/rockchip/Makefile -+++ b/drivers/crypto/rockchip/Makefile -@@ -1,5 +1,30 @@ - # SPDX-License-Identifier: GPL-2.0-only - obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rk_crypto.o --rk_crypto-objs := rk3288_crypto.o \ -- rk3288_crypto_skcipher.o \ -- rk3288_crypto_ahash.o -+rk_crypto-objs := rk_crypto_core.o \ -+ rk_crypto_utils.o \ -+ rk_crypto_ahash_utils.o \ -+ rk_crypto_skcipher_utils.o \ -+ procfs.o -+ -+rk_crypto-$(CONFIG_CRYPTO_DEV_ROCKCHIP_V1) += \ -+ rk_crypto_v1.o \ -+ rk_crypto_v1_ahash.o \ -+ rk_crypto_v1_skcipher.o -+ -+rk_crypto-$(CONFIG_CRYPTO_DEV_ROCKCHIP_V2) += \ -+ rk_crypto_v2.o \ -+ rk_crypto_v2_ahash.o \ -+ rk_crypto_v2_skcipher.o \ -+ rk_crypto_v2_akcipher.o \ -+ rk_crypto_v2_pka.o \ -+ rk_crypto_bignum.o -+ -+rk_crypto-$(CONFIG_CRYPTO_DEV_ROCKCHIP_V3) += \ -+ rk_crypto_v3.o \ -+ rk_crypto_v3_ahash.o \ -+ rk_crypto_v3_skcipher.o \ -+ rk_crypto_v2_akcipher.o \ -+ rk_crypto_v2_pka.o \ -+ rk_crypto_bignum.o -+ -+obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP_DEV) += cryptodev_linux/ -diff --git a/drivers/crypto/rockchip/cryptodev_linux/Makefile b/drivers/crypto/rockchip/cryptodev_linux/Makefile -new file mode 100644 -index 000000000..628262fb2 ---- /dev/null -+++ b/drivers/crypto/rockchip/cryptodev_linux/Makefile -@@ -0,0 +1,10 @@ -+# SPDX-License-Identifier: GPL-2.0+ -+obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP_DEV) += cryptodev.o -+cryptodev-objs := ioctl.o \ -+ main.o \ -+ cryptlib.o \ -+ authenc.o \ -+ zc.o \ -+ util.o \ -+ rk_cryptodev.o + -diff --git a/drivers/crypto/rockchip/cryptodev_linux/authenc.c b/drivers/crypto/rockchip/cryptodev_linux/authenc.c -new file mode 100644 -index 000000000..afca7f76d ---- /dev/null -+++ b/drivers/crypto/rockchip/cryptodev_linux/authenc.c -@@ -0,0 +1,1106 @@ -+/* -+ * Driver for /dev/crypto device (aka CryptoDev) -+ * -+ * Copyright (c) 2011, 2012 OpenSSL Software Foundation, Inc. -+ * -+ * Author: Nikos Mavrogiannopoulos -+ * -+ * This file is part of linux cryptodev. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version 2 -+ * of the License, or (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., -+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++CLK_OF_DECLARE(rk3562_cru, "rockchip,rk3562-cru", rk3562_clk_init); + -+/* -+ * This file handles the AEAD part of /dev/crypto. -+ * -+ */ ++#ifdef MODULE ++struct clk_rk3562_inits { ++ void (*inits)(struct device_node *np); ++}; + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "cryptodev.h" -+#include "zc.h" -+#include "util.h" -+#include "cryptlib.h" -+#include "version.h" ++static const struct clk_rk3562_inits clk_3562_cru_init = { ++ .inits = rk3562_clk_init, ++}; + ++static const struct of_device_id clk_rk3562_match_table[] = { ++ { ++ .compatible = "rockchip,rk3562-cru", ++ .data = &clk_3562_cru_init, ++ }, ++ { } ++}; ++MODULE_DEVICE_TABLE(of, clk_rk3562_match_table); + -+/* make caop->dst available in scatterlist. -+ * (caop->src is assumed to be equal to caop->dst) -+ */ -+static int get_userbuf_tls(struct csession *ses, struct kernel_crypt_auth_op *kcaop, -+ struct scatterlist **dst_sg) ++static int clk_rk3562_probe(struct platform_device *pdev) +{ -+ int pagecount = 0; -+ struct crypt_auth_op *caop = &kcaop->caop; -+ int rc; -+ -+ if (caop->dst == NULL) -+ return -EINVAL; -+ -+ if (ses->alignmask) { -+ if (!IS_ALIGNED((unsigned long)caop->dst, ses->alignmask + 1)) -+ dwarning(2, "careful - source address %p is not %d byte aligned", -+ caop->dst, ses->alignmask + 1); -+ } -+ -+ if (kcaop->dst_len == 0) { -+ dwarning(1, "Destination length cannot be zero"); -+ return -EINVAL; -+ } -+ -+ pagecount = PAGECOUNT(caop->dst, kcaop->dst_len); -+ -+ ses->used_pages = pagecount; -+ ses->readonly_pages = 0; -+ -+ rc = cryptodev_adjust_sg_array(ses, pagecount); -+ if (rc) -+ return rc; ++ struct device_node *np = pdev->dev.of_node; ++ const struct of_device_id *match; ++ const struct clk_rk3562_inits *init_data; + -+ rc = __cryptodev_get_userbuf(caop->dst, kcaop->dst_len, 1, pagecount, -+ ses->pages, ses->sg, kcaop->task, kcaop->mm); -+ if (unlikely(rc)) { -+ derr(1, "failed to get user pages for data input"); ++ match = of_match_device(clk_rk3562_match_table, &pdev->dev); ++ if (!match || !match->data) + return -EINVAL; -+ } + -+ (*dst_sg) = ses->sg; ++ init_data = match->data; ++ if (init_data->inits) ++ init_data->inits(np); + + return 0; +} + ++static struct platform_driver clk_rk3562_driver = { ++ .probe = clk_rk3562_probe, ++ .driver = { ++ .name = "clk-rk3562", ++ .of_match_table = clk_rk3562_match_table, ++ .suppress_bind_attrs = true, ++ }, ++}; ++module_platform_driver(clk_rk3562_driver); + -+#define MAX_SRTP_AUTH_DATA_DIFF 256 -+ -+/* Makes caop->auth_src available as scatterlist. -+ * It also provides a pointer to caop->dst, which however, -+ * is assumed to be within the caop->auth_src buffer. If not -+ * (if their difference exceeds MAX_SRTP_AUTH_DATA_DIFF) it -+ * returns error. ++MODULE_DESCRIPTION("Rockchip RK3562 Clock Driver"); ++MODULE_LICENSE("GPL"); ++MODULE_ALIAS("platform:clk-rk3562"); ++#endif /* MODULE */ +diff --git a/drivers/clk/rockchip/clk-rv1106.c b/drivers/clk/rockchip/clk-rv1106.c +new file mode 100644 +index 000000000..0833bf2ad +--- /dev/null ++++ b/drivers/clk/rockchip/clk-rv1106.c +@@ -0,0 +1,1280 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (c) 2022 Rockchip Electronics Co. Ltd. ++ * Author: Elaine Zhang + */ -+static int get_userbuf_srtp(struct csession *ses, struct kernel_crypt_auth_op *kcaop, -+ struct scatterlist **auth_sg, struct scatterlist **dst_sg) -+{ -+ int pagecount, diff; -+ int auth_pagecount = 0; -+ struct crypt_auth_op *caop = &kcaop->caop; -+ int rc; -+ -+ if (caop->dst == NULL && caop->auth_src == NULL) { -+ derr(1, "dst and auth_src cannot be both null"); -+ return -EINVAL; -+ } + -+ if (ses->alignmask) { -+ if (!IS_ALIGNED((unsigned long)caop->dst, ses->alignmask + 1)) -+ dwarning(2, "careful - source address %p is not %d byte aligned", -+ caop->dst, ses->alignmask + 1); -+ if (!IS_ALIGNED((unsigned long)caop->auth_src, ses->alignmask + 1)) -+ dwarning(2, "careful - source address %p is not %d byte aligned", -+ caop->auth_src, ses->alignmask + 1); -+ } ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "clk.h" + -+ if (unlikely(kcaop->dst_len == 0 || caop->auth_len == 0)) { -+ dwarning(1, "Destination length cannot be zero"); -+ return -EINVAL; -+ } ++#define CRU_PVTPLL0_CON0_L 0x11000 ++#define CRU_PVTPLL0_CON0_H 0x11004 ++#define CRU_PVTPLL0_CON1_L 0x11008 ++#define CRU_PVTPLL0_CON1_H 0x1100c ++#define CRU_PVTPLL0_CON2_L 0x11010 ++#define CRU_PVTPLL0_CON2_H 0x11014 ++#define CRU_PVTPLL0_CON3_L 0x11018 ++#define CRU_PVTPLL0_CON3_H 0x1101c ++#define CRU_PVTPLL0_OSC_CNT 0x11020 ++#define CRU_PVTPLL0_OSC_CNT_AVG 0x11024 + -+ /* Note that in SRTP auth data overlap with data to be encrypted (dst) -+ */ ++#define CRU_PVTPLL1_CON0_L 0x11030 ++#define CRU_PVTPLL1_CON0_H 0x11034 ++#define CRU_PVTPLL1_CON1_L 0x11038 ++#define CRU_PVTPLL1_CON1_H 0x1103c ++#define CRU_PVTPLL1_CON2_L 0x11040 ++#define CRU_PVTPLL1_CON2_H 0x11044 ++#define CRU_PVTPLL1_CON3_L 0x11048 ++#define CRU_PVTPLL1_CON3_H 0x1104c ++#define CRU_PVTPLL1_OSC_CNT 0x11050 ++#define CRU_PVTPLL1_OSC_CNT_AVG 0x11054 + -+ auth_pagecount = PAGECOUNT(caop->auth_src, caop->auth_len); -+ diff = (int)(caop->src - caop->auth_src); -+ if (diff > MAX_SRTP_AUTH_DATA_DIFF || diff < 0) { -+ dwarning(1, "auth_src must overlap with src (diff: %d).", diff); -+ return -EINVAL; -+ } ++#define RV1106_GRF_SOC_STATUS0 0x10 ++#define CPU_PVTPLL_CON0_L 0x40000 ++#define CPU_PVTPLL_CON0_H 0x40004 ++#define CPU_PVTPLL_CON1 0x40008 ++#define CPU_PVTPLL_CON2 0x4000c ++#define CPU_PVTPLL_CON3 0x40010 ++#define CPU_PVTPLL_OSC_CNT 0x40018 ++#define CPU_PVTPLL_OSC_CNT_AVG 0x4001c + -+ pagecount = auth_pagecount; ++#define PVTPLL_RING_SEL_MASK 0x7 ++#define PVTPLL_RING_SEL_SHIFT 8 ++#define PVTPLL_EN_MASK 0x3 ++#define PVTPLL_EN_SHIFT 0 ++#define PVTPLL_LENGTH_SEL_MASK 0x7f ++#define PVTPLL_LENGTH_SEL_SHIFT 0 + -+ rc = cryptodev_adjust_sg_array(ses, pagecount*2); /* double pages to have pages for dst(=auth_src) */ -+ if (rc) { -+ derr(1, "cannot adjust sg array"); -+ return rc; -+ } ++#define CPU_CLK_PATH_BASE (0x18300) ++#define CPU_PVTPLL_PATH_CORE ((1 << 12) | (1 << 28)) + -+ rc = __cryptodev_get_userbuf(caop->auth_src, caop->auth_len, 1, auth_pagecount, -+ ses->pages, ses->sg, kcaop->task, kcaop->mm); -+ if (unlikely(rc)) { -+ derr(1, "failed to get user pages for data input"); -+ return -EINVAL; -+ } ++#define RV1106_FRAC_MAX_PRATE 1200000000 + -+ ses->used_pages = pagecount; -+ ses->readonly_pages = 0; ++enum rv1106_plls { ++ apll, dpll, cpll, gpll, ++}; + -+ (*auth_sg) = ses->sg; ++static struct rockchip_pll_rate_table rv1106_pll_rates[] = { ++ /* _mhz, _refdiv, _fbdiv, _postdiv1, _postdiv2, _dsmpd, _frac */ ++ RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1600000000, 3, 200, 1, 1, 1, 0), ++ RK3036_PLL_RATE(1584000000, 1, 132, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1560000000, 1, 130, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1536000000, 1, 128, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1512000000, 1, 126, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1488000000, 1, 124, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1464000000, 1, 122, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1440000000, 1, 120, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1416000000, 1, 118, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1400000000, 3, 350, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1392000000, 1, 116, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1368000000, 1, 114, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1344000000, 1, 112, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1320000000, 1, 110, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1296000000, 1, 108, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1272000000, 1, 106, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1248000000, 1, 104, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1200000000, 1, 100, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1188000000, 1, 99, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1104000000, 1, 92, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1100000000, 3, 275, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1008000000, 1, 84, 2, 1, 1, 0), ++ RK3036_PLL_RATE(1000000000, 3, 250, 2, 1, 1, 0), ++ RK3036_PLL_RATE(993484800, 1, 124, 3, 1, 0, 3113851), ++ RK3036_PLL_RATE(984000000, 1, 82, 2, 1, 1, 0), ++ RK3036_PLL_RATE(983040000, 1, 81, 2, 1, 0, 15435038), ++ RK3036_PLL_RATE(960000000, 1, 80, 2, 1, 1, 0), ++ RK3036_PLL_RATE(936000000, 1, 78, 2, 1, 1, 0), ++ RK3036_PLL_RATE(912000000, 1, 76, 2, 1, 1, 0), ++ RK3036_PLL_RATE(900000000, 1, 75, 2, 1, 1, 0), ++ RK3036_PLL_RATE(888000000, 1, 74, 2, 1, 1, 0), ++ RK3036_PLL_RATE(864000000, 1, 72, 2, 1, 1, 0), ++ RK3036_PLL_RATE(840000000, 1, 70, 2, 1, 1, 0), ++ RK3036_PLL_RATE(816000000, 1, 68, 2, 1, 1, 0), ++ RK3036_PLL_RATE(800000000, 3, 200, 2, 1, 1, 0), ++ RK3036_PLL_RATE(700000000, 3, 350, 4, 1, 1, 0), ++ RK3036_PLL_RATE(696000000, 1, 116, 4, 1, 1, 0), ++ RK3036_PLL_RATE(624000000, 1, 104, 4, 1, 1, 0), ++ RK3036_PLL_RATE(600000000, 1, 100, 4, 1, 1, 0), ++ RK3036_PLL_RATE(594000000, 1, 99, 4, 1, 1, 0), ++ RK3036_PLL_RATE(504000000, 1, 84, 4, 1, 1, 0), ++ RK3036_PLL_RATE(500000000, 1, 125, 6, 1, 1, 0), ++ RK3036_PLL_RATE(496742400, 1, 124, 6, 1, 0, 3113851), ++ RK3036_PLL_RATE(491520000, 1, 40, 2, 1, 0, 16106127), ++ RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), ++ RK3036_PLL_RATE(312000000, 1, 78, 6, 1, 1, 0), ++ RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), ++ RK3036_PLL_RATE(96000000, 1, 96, 6, 4, 1, 0), ++ { /* sentinel */ }, ++}; + -+ (*dst_sg) = ses->sg + auth_pagecount; -+ sg_init_table(*dst_sg, auth_pagecount); -+ cryptodev_sg_copy(ses->sg, (*dst_sg), caop->auth_len); -+ (*dst_sg) = cryptodev_sg_advance(*dst_sg, diff); -+ if (*dst_sg == NULL) { -+ cryptodev_release_user_pages(ses); -+ derr(1, "failed to get enough pages for auth data"); -+ return -EINVAL; -+ } ++#define RV1106_DIV_ACLK_CORE_MASK 0x1f ++#define RV1106_DIV_ACLK_CORE_SHIFT 7 ++#define RV1106_DIV_PCLK_DBG_MASK 0x1f ++#define RV1106_DIV_PCLK_DBG_SHIFT 0 ++#define RV1106_CORE_SEL_MASK 0x3 ++#define RV1106_CORE_SEL_SHIFT 5 ++#define RV1106_ALT_DIV_MASK 0x1f ++#define RV1106_ALT_DIV_SHIFT 0 + -+ return 0; ++#define RV1106_CLKSEL0(_aclk_core) \ ++{ \ ++ .reg = RV1106_CORECLKSEL_CON(0), \ ++ .val = HIWORD_UPDATE(_aclk_core, RV1106_DIV_ACLK_CORE_MASK, \ ++ RV1106_DIV_ACLK_CORE_SHIFT), \ +} + -+/* -+ * Return tag (digest) length for authenticated encryption -+ * If the cipher and digest are separate, hdata.init is set - just return -+ * digest length. Otherwise return digest length for aead ciphers -+ */ -+static int cryptodev_get_tag_len(struct csession *ses_ptr) -+{ -+ if (ses_ptr->hdata.init) -+ return ses_ptr->hdata.digestsize; -+ else -+ return cryptodev_cipher_get_tag_size(&ses_ptr->cdata); ++#define RV1106_CLKSEL1(_pclk_dbg) \ ++{ \ ++ .reg = RV1106_CORECLKSEL_CON(1), \ ++ .val = HIWORD_UPDATE(_pclk_dbg, RV1106_DIV_PCLK_DBG_MASK, \ ++ RV1106_DIV_PCLK_DBG_SHIFT), \ +} + -+/* -+ * Calculate destination buffer length for authenticated encryption. The -+ * expectation is that user-space code allocates exactly the same space for -+ * destination buffer before calling cryptodev. The result is cipher-dependent. -+ */ -+static int cryptodev_get_dst_len(struct crypt_auth_op *caop, struct csession *ses_ptr) -+{ -+ int dst_len = caop->len; -+ if (caop->op == COP_DECRYPT) -+ return dst_len; -+ -+ if (caop->flags & COP_FLAG_AEAD_RK_TYPE) -+ return dst_len; -+ -+ dst_len += caop->tag_len; -+ -+ /* for TLS always add some padding so the total length is rounded to -+ * cipher block size */ -+ if (caop->flags & COP_FLAG_AEAD_TLS_TYPE) { -+ int bs = ses_ptr->cdata.blocksize; -+ dst_len += bs - (dst_len % bs); -+ } -+ -+ return dst_len; ++#define RV1106_CLKSEL2(_is_pvtpll) \ ++{ \ ++ .reg = RV1106_CORECLKSEL_CON(0), \ ++ .val = HIWORD_UPDATE(_is_pvtpll, RV1106_CORE_SEL_MASK, \ ++ RV1106_CORE_SEL_SHIFT), \ +} + -+static int fill_kcaop_from_caop(struct kernel_crypt_auth_op *kcaop, struct fcrypt *fcr) -+{ -+ struct crypt_auth_op *caop = &kcaop->caop; -+ struct csession *ses_ptr; -+ int ret; -+ -+ /* this also enters ses_ptr->sem */ -+ ses_ptr = crypto_get_session_by_sid(fcr, caop->ses); -+ if (unlikely(!ses_ptr)) { -+ derr(1, "invalid session ID=0x%08X", caop->ses); -+ return -EINVAL; -+ } -+ -+ if (caop->flags & COP_FLAG_AEAD_TLS_TYPE || caop->flags & COP_FLAG_AEAD_SRTP_TYPE) { -+ if (caop->src != caop->dst) { -+ derr(1, "Non-inplace encryption and decryption is not efficient and not implemented"); -+ ret = -EINVAL; -+ goto out_unlock; -+ } -+ } -+ -+ if (caop->tag_len == 0) -+ caop->tag_len = cryptodev_get_tag_len(ses_ptr); -+ -+ kcaop->ivlen = caop->iv ? ses_ptr->cdata.ivsize : 0; -+ kcaop->dst_len = cryptodev_get_dst_len(caop, ses_ptr); -+ kcaop->task = current; -+ kcaop->mm = current->mm; -+ -+ if (caop->iv) { -+ ret = copy_from_user(kcaop->iv, caop->iv, kcaop->ivlen); -+ if (unlikely(ret)) { -+ derr(1, "error copying IV (%d bytes), copy_from_user returned %d for address %p", -+ kcaop->ivlen, ret, caop->iv); -+ ret = -EFAULT; -+ goto out_unlock; -+ } -+ } -+ -+ ret = 0; -+ -+out_unlock: -+ crypto_put_session(ses_ptr); -+ return ret; -+ ++#define RV1106_CLKSEL3(_alt_div) \ ++{ \ ++ .reg = RV1106_CORECLKSEL_CON(0), \ ++ .val = HIWORD_UPDATE(_alt_div, RV1106_ALT_DIV_MASK, \ ++ RV1106_ALT_DIV_SHIFT), \ +} + -+static int fill_caop_from_kcaop(struct kernel_crypt_auth_op *kcaop, struct fcrypt *fcr) -+{ -+ int ret; -+ -+ kcaop->caop.len = kcaop->dst_len; -+ -+ if (kcaop->ivlen && kcaop->caop.flags & COP_FLAG_WRITE_IV) { -+ ret = copy_to_user(kcaop->caop.iv, -+ kcaop->iv, kcaop->ivlen); -+ if (unlikely(ret)) { -+ derr(1, "Error in copying to userspace"); -+ return -EFAULT; -+ } -+ } -+ return 0; ++#define RV1106_CPUCLK_RATE(_prate, _aclk_core, _pclk_dbg, _is_pvtpll) \ ++{ \ ++ .prate = _prate, \ ++ .divs = { \ ++ RV1106_CLKSEL0(_aclk_core), \ ++ RV1106_CLKSEL1(_pclk_dbg), \ ++ }, \ ++ .pre_muxs = { \ ++ RV1106_CLKSEL3(1), \ ++ RV1106_CLKSEL2(2), \ ++ }, \ ++ .post_muxs = { \ ++ RV1106_CLKSEL2(_is_pvtpll), \ ++ RV1106_CLKSEL3(0), \ ++ }, \ +} + ++static struct rockchip_cpuclk_rate_table rv1106_cpuclk_rates[] __initdata = { ++ RV1106_CPUCLK_RATE(1608000000, 3, 7, 1), ++ RV1106_CPUCLK_RATE(1584000000, 3, 7, 1), ++ RV1106_CPUCLK_RATE(1560000000, 3, 7, 1), ++ RV1106_CPUCLK_RATE(1536000000, 3, 7, 1), ++ RV1106_CPUCLK_RATE(1512000000, 3, 7, 1), ++ RV1106_CPUCLK_RATE(1488000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1464000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1440000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1416000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1392000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1368000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1344000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1320000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1296000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1272000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1248000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1224000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1200000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1104000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1096000000, 2, 5, 1), ++ RV1106_CPUCLK_RATE(1008000000, 1, 5, 1), ++ RV1106_CPUCLK_RATE(912000000, 1, 5, 1), ++ RV1106_CPUCLK_RATE(816000000, 1, 3, 1), ++ RV1106_CPUCLK_RATE(696000000, 1, 3, 0), ++ RV1106_CPUCLK_RATE(600000000, 1, 3, 0), ++ RV1106_CPUCLK_RATE(408000000, 1, 1, 0), ++ RV1106_CPUCLK_RATE(312000000, 1, 1, 0), ++ RV1106_CPUCLK_RATE(216000000, 1, 1, 0), ++ RV1106_CPUCLK_RATE(96000000, 1, 1, 0), ++}; + -+int cryptodev_kcaop_from_user(struct kernel_crypt_auth_op *kcaop, -+ struct fcrypt *fcr, void __user *arg) -+{ -+ if (unlikely(copy_from_user(&kcaop->caop, arg, sizeof(kcaop->caop)))) { -+ derr(1, "Error in copying from userspace"); -+ return -EFAULT; -+ } ++static const struct rockchip_cpuclk_reg_data rv1106_cpuclk_data = { ++ .core_reg[0] = RV1106_CORECLKSEL_CON(0), ++ .div_core_shift[0] = 0, ++ .div_core_mask[0] = 0x1f, ++ .num_cores = 1, ++ .mux_core_alt = 2, ++ .mux_core_main = 2, ++ .mux_core_shift = 5, ++ .mux_core_mask = 0x3, ++}; + -+ return fill_kcaop_from_caop(kcaop, fcr); -+} ++PNAME(mux_pll_p) = { "xin24m" }; ++PNAME(mux_24m_32k_p) = { "xin24m", "clk_rtc_32k" }; ++PNAME(mux_gpll_cpll_p) = { "gpll", "cpll" }; ++PNAME(mux_gpll_24m_p) = { "gpll", "xin24m" }; ++PNAME(mux_100m_50m_24m_p) = { "clk_100m_src", "clk_50m_src", "xin24m" }; ++PNAME(mux_150m_100m_50m_24m_p) = { "clk_150m_src", "clk_100m_src", "clk_50m_src", "xin24m" }; ++PNAME(mux_500m_300m_100m_24m_p) = { "clk_500m_src", "clk_300m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_400m_300m_pvtpll0_pvtpll1_p) = { "clk_400m_src", "clk_300m_src", "clk_pvtpll_0", "clk_pvtpll_1" }; ++PNAME(mux_500m_300m_pvtpll0_pvtpll1_p) = { "clk_500m_src", "clk_300m_src", "clk_pvtpll_0", "clk_pvtpll_1" }; ++PNAME(mux_339m_200m_pvtpll0_pvtpll1_p) = { "clk_339m_src", "clk_200m_src", "clk_pvtpll_0", "clk_pvtpll_1" }; ++PNAME(mux_400m_200m_100m_24m_p) = { "clk_400m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_200m_100m_50m_24m_p) = { "clk_200m_src", "clk_100m_src", "clk_50m_src", "xin24m" }; ++PNAME(mux_300m_200m_100m_24m_p) = { "clk_300m_src", "clk_200m_src", "clk_100m_src", "xin24m" }; ++PNAME(mux_500m_300m_200m_24m_p) = { "clk_500m_src", "clk_300m_src", "clk_200m_src", "xin24m" }; ++PNAME(mux_50m_24m_p) = { "clk_50m_src", "xin24m" }; ++PNAME(mux_400m_24m_p) = { "clk_400m_src", "xin24m" }; ++PNAME(clk_rtc32k_pmu_p) = { "clk_rtc32k_frac", "xin32k", "clk_pvtm_32k" }; ++PNAME(mux_200m_100m_24m_32k_p) = { "clk_200m_src", "clk_100m_src", "xin24m", "clk_rtc_32k" }; ++PNAME(mux_100m_pmu_24m_p) = { "clk_100m_pmu", "xin24m" }; ++PNAME(mux_200m_100m_24m_p) = { "clk_200m_src", "clk_100m_pmu", "xin24m" }; ++PNAME(mux_339m_200m_100m_24m_p) = { "clk_339m_src", "clk_200m_src", "clk_100m_pmu", "xin24m" }; ++PNAME(mux_dpll_300m_p) = { "dpll", "clk_300m_src" }; ++PNAME(clk_i2s0_8ch_tx_p) = { "clk_i2s0_8ch_tx_src", "clk_i2s0_8ch_tx_frac", "i2s0_mclkin", "xin_osc0_half" }; ++PNAME(clk_i2s0_8ch_rx_p) = { "clk_i2s0_8ch_rx_src", "clk_i2s0_8ch_rx_frac", "i2s0_mclkin", "xin_osc0_half" }; ++PNAME(i2s0_8ch_mclkout_p) = { "mclk_i2s0_8ch_tx", "mclk_i2s0_8ch_rx", "xin_osc0_half" }; ++PNAME(clk_ref_mipi0_p) = { "clk_ref_mipi0_src", "clk_ref_mipi0_frac", "xin24m" }; ++PNAME(clk_ref_mipi1_p) = { "clk_ref_mipi1_src", "clk_ref_mipi1_frac", "xin24m" }; ++PNAME(clk_uart0_p) = { "clk_uart0_src", "clk_uart0_frac", "xin24m" }; ++PNAME(clk_uart1_p) = { "clk_uart1_src", "clk_uart1_frac", "xin24m" }; ++PNAME(clk_uart2_p) = { "clk_uart2_src", "clk_uart2_frac", "xin24m" }; ++PNAME(clk_uart3_p) = { "clk_uart3_src", "clk_uart3_frac", "xin24m" }; ++PNAME(clk_uart4_p) = { "clk_uart4_src", "clk_uart4_frac", "xin24m" }; ++PNAME(clk_uart5_p) = { "clk_uart5_src", "clk_uart5_frac", "xin24m" }; ++PNAME(clk_vicap_m0_p) = { "clk_vicap_m0_src", "clk_vicap_m0_frac", "xin24m" }; ++PNAME(clk_vicap_m1_p) = { "clk_vicap_m1_src", "clk_vicap_m1_frac", "xin24m" }; + -+int cryptodev_kcaop_to_user(struct kernel_crypt_auth_op *kcaop, -+ struct fcrypt *fcr, void __user *arg) -+{ -+ int ret; ++static struct rockchip_pll_clock rv1106_pll_clks[] __initdata = { ++ [apll] = PLL(pll_rk3328, PLL_APLL, "apll", mux_pll_p, ++ CLK_IGNORE_UNUSED, RV1106_PLL_CON(0), ++ RV1106_MODE_CON, 0, 10, 0, rv1106_pll_rates), ++ [cpll] = PLL(pll_rk3328, PLL_CPLL, "cpll", mux_pll_p, ++ 0, RV1106_PLL_CON(8), ++ RV1106_MODE_CON, 2, 10, 0, rv1106_pll_rates), ++ [dpll] = PLL(pll_rk3328, PLL_DPLL, "dpll", mux_pll_p, ++ CLK_IGNORE_UNUSED, RV1106_PLL_CON(16), ++ RV1106_SUBDDRMODE_CON, 0, 10, 0, NULL), ++ [gpll] = PLL(pll_rk3328, PLL_GPLL, "gpll", mux_pll_p, ++ 0, RV1106_PLL_CON(24), ++ RV1106_MODE_CON, 4, 10, 0, rv1106_pll_rates), ++}; + -+ ret = fill_caop_from_kcaop(kcaop, fcr); -+ if (unlikely(ret)) { -+ derr(1, "fill_caop_from_kcaop"); -+ return ret; -+ } ++#define MFLAGS CLK_MUX_HIWORD_MASK ++#define DFLAGS CLK_DIVIDER_HIWORD_MASK ++#define GFLAGS (CLK_GATE_HIWORD_MASK | CLK_GATE_SET_TO_DISABLE) + -+ if (unlikely(copy_to_user(arg, &kcaop->caop, sizeof(kcaop->caop)))) { -+ derr(1, "Error in copying to userspace"); -+ return -EFAULT; -+ } -+ return 0; -+} ++static struct rockchip_clk_branch rv1106_rtc32k_pmu_fracmux __initdata = ++ MUX(CLK_RTC_32K, "clk_rtc_32k", clk_rtc32k_pmu_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, ++ RV1106_PMUCLKSEL_CON(0), 6, 2, MFLAGS); + -+/* compatibility code for 32bit userlands */ -+#ifdef CONFIG_COMPAT ++static struct rockchip_clk_branch rv1106_i2s0_8ch_tx_fracmux __initdata = ++ MUX(CLK_I2S0_8CH_TX, "clk_i2s0_8ch_tx", clk_i2s0_8ch_tx_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(19), 0, 2, MFLAGS); + -+static inline void -+compat_to_crypt_auth_op(struct compat_crypt_auth_op *compat, struct crypt_auth_op *caop) -+{ -+ caop->ses = compat->ses; -+ caop->op = compat->op; -+ caop->flags = compat->flags; -+ caop->len = compat->len; -+ caop->auth_len = compat->auth_len; -+ caop->tag_len = compat->tag_len; -+ caop->iv_len = compat->iv_len; ++static struct rockchip_clk_branch rv1106_i2s0_8ch_rx_fracmux __initdata = ++ MUX(CLK_I2S0_8CH_RX, "clk_i2s0_8ch_rx", clk_i2s0_8ch_rx_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(21), 0, 2, MFLAGS); + -+ caop->auth_src = compat_ptr(compat->auth_src); -+ caop->src = compat_ptr(compat->src); -+ caop->dst = compat_ptr(compat->dst); -+ caop->tag = compat_ptr(compat->tag); -+ caop->iv = compat_ptr(compat->iv); -+} ++static struct rockchip_clk_branch rv1106_clk_ref_mipi0_fracmux __initdata = ++ MUX(CLK_REF_MIPI0, "clk_ref_mipi0", clk_ref_mipi0_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(27), 0, 2, MFLAGS); + -+static inline void -+crypt_auth_op_to_compat(struct crypt_auth_op *caop, struct compat_crypt_auth_op *compat) -+{ -+ compat->ses = caop->ses; -+ compat->op = caop->op; -+ compat->flags = caop->flags; -+ compat->len = caop->len; -+ compat->auth_len = caop->auth_len; -+ compat->tag_len = caop->tag_len; -+ compat->iv_len = caop->iv_len; ++static struct rockchip_clk_branch rv1106_clk_ref_mipi1_fracmux __initdata = ++ MUX(CLK_REF_MIPI1, "clk_ref_mipi1", clk_ref_mipi1_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(29), 0, 2, MFLAGS); + -+ compat->auth_src = ptr_to_compat(caop->auth_src); -+ compat->src = ptr_to_compat(caop->src); -+ compat->dst = ptr_to_compat(caop->dst); -+ compat->tag = ptr_to_compat(caop->tag); -+ compat->iv = ptr_to_compat(caop->iv); -+} ++static struct rockchip_clk_branch rv1106_clk_uart0_fracmux __initdata = ++ MUX(CLK_UART0, "clk_uart0", clk_uart0_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(7), 0, 2, MFLAGS); + -+int compat_kcaop_from_user(struct kernel_crypt_auth_op *kcaop, -+ struct fcrypt *fcr, void __user *arg) -+{ -+ int ret; -+ struct compat_crypt_auth_op compat_auth_cop; ++static struct rockchip_clk_branch rv1106_clk_uart1_fracmux __initdata = ++ MUX(CLK_UART1, "clk_uart1", clk_uart1_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(9), 0, 2, MFLAGS); + -+ ret = copy_from_user(&compat_auth_cop, arg, sizeof(compat_auth_cop)); -+ if (unlikely(ret)) { -+ derr(1, "Error in copying from userspace"); -+ return -EFAULT; -+ } ++static struct rockchip_clk_branch rv1106_clk_uart2_fracmux __initdata = ++ MUX(CLK_UART2, "clk_uart2", clk_uart2_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(11), 0, 2, MFLAGS); + -+ compat_to_crypt_auth_op(&compat_auth_cop, &kcaop->caop); ++static struct rockchip_clk_branch rv1106_clk_uart3_fracmux __initdata = ++ MUX(CLK_UART3, "clk_uart3", clk_uart3_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(13), 0, 2, MFLAGS); + -+ return fill_kcaop_from_caop(kcaop, fcr); -+} ++static struct rockchip_clk_branch rv1106_clk_uart4_fracmux __initdata = ++ MUX(CLK_UART4, "clk_uart4", clk_uart4_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(15), 0, 2, MFLAGS); + -+int compat_kcaop_to_user(struct kernel_crypt_auth_op *kcaop, -+ struct fcrypt *fcr, void __user *arg) -+{ -+ int ret; -+ struct compat_crypt_auth_op compat_auth_cop; ++static struct rockchip_clk_branch rv1106_clk_uart5_fracmux __initdata = ++ MUX(CLK_UART5, "clk_uart5", clk_uart5_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(17), 0, 2, MFLAGS); + -+ ret = fill_caop_from_kcaop(kcaop, fcr); -+ if (unlikely(ret)) { -+ derr(1, "fill_caop_from_kcaop"); -+ return ret; -+ } ++static struct rockchip_clk_branch rv1106_clk_vicap_m0_fracmux __initdata = ++ MUX(CLK_VICAP_M0, "clk_vicap_m0", clk_vicap_m0_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(31), 0, 2, MFLAGS); + -+ crypt_auth_op_to_compat(&kcaop->caop, &compat_auth_cop); ++static struct rockchip_clk_branch rv1106_clk_vicap_m1_fracmux __initdata = ++ MUX(CLK_VICAP_M1, "clk_vicap_m1", clk_vicap_m1_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(33), 0, 2, MFLAGS); + -+ if (unlikely(copy_to_user(arg, &compat_auth_cop, sizeof(compat_auth_cop)))) { -+ derr(1, "Error in copying to userspace"); -+ return -EFAULT; -+ } -+ return 0; -+} ++static struct rockchip_clk_branch rv1106_clk_branches[] __initdata = { + -+#endif /* CONFIG_COMPAT */ ++ FACTOR(0, "xin_osc0_half", "xin24m", 0, 1, 2), + -+static void copy_tls_hash(struct scatterlist *dst_sg, int len, void *hash, int hash_len) -+{ -+ scatterwalk_map_and_copy(hash, dst_sg, len, hash_len, 1); -+} ++ /* PD_CORE */ ++ GATE(CLK_PVTM_CORE, "clk_pvtm_core", "xin24m", 0, ++ RV1106_CORECLKGATE_CON(0), 14, GFLAGS), ++ GATE(CLK_CORE_MCU_RTC, "clk_core_mcu_rtc", "xin24m", 0, ++ RV1106_CORECLKGATE_CON(1), 6, GFLAGS), ++ COMPOSITE(HCLK_CPU, "hclk_cpu", mux_gpll_24m_p, CLK_IS_CRITICAL, ++ RV1106_CORECLKSEL_CON(2), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1106_CORECLKGATE_CON(0), 12, GFLAGS), ++ COMPOSITE(CLK_CORE_MCU, "clk_core_mcu", mux_gpll_24m_p, 0, ++ RV1106_CORECLKSEL_CON(3), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RV1106_CORECLKGATE_CON(1), 1, GFLAGS), ++ COMPOSITE_NOMUX(0, "pclk_dbg", "armclk", CLK_IS_CRITICAL, ++ RV1106_CORECLKSEL_CON(1), 0, 5, DFLAGS | CLK_DIVIDER_READ_ONLY, ++ RV1106_CORECLKGATE_CON(0), 6, GFLAGS), ++ GATE(0, "pclk_cpu_root", "pclk_dbg", CLK_IS_CRITICAL, ++ RV1106_CORECLKGATE_CON(0), 10, GFLAGS), ++ GATE(PCLK_MAILBOX, "pclk_mailbox", "pclk_cpu_root", 0, ++ RV1106_CORECLKGATE_CON(1), 8, GFLAGS), + -+static void read_tls_hash(struct scatterlist *dst_sg, int len, void *hash, int hash_len) -+{ -+ scatterwalk_map_and_copy(hash, dst_sg, len - hash_len, hash_len, 0); -+} ++ /* PD _TOP */ ++ COMPOSITE(CLK_50M_SRC, "clk_50m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(0), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE(CLK_100M_SRC, "clk_100m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(0), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE(CLK_150M_SRC, "clk_150m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(1), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 3, GFLAGS), ++ COMPOSITE(CLK_200M_SRC, "clk_200m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(1), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 4, GFLAGS), ++ COMPOSITE(CLK_250M_SRC, "clk_250m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(2), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 5, GFLAGS), ++ COMPOSITE(CLK_300M_SRC, "clk_300m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(2), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 6, GFLAGS), ++ COMPOSITE_HALFDIV(CLK_339M_SRC, "clk_339m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(3), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 7, GFLAGS), ++ COMPOSITE(CLK_400M_SRC, "clk_400m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(3), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 8, GFLAGS), ++ COMPOSITE_HALFDIV(CLK_450M_SRC, "clk_450m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(4), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 9, GFLAGS), ++ COMPOSITE(CLK_500M_SRC, "clk_500m_src", mux_gpll_cpll_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(4), 11, 1, MFLAGS, 6, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 10, GFLAGS), + -+#define TLS_MAX_PADDING_SIZE 256 -+static int pad_record(struct scatterlist *dst_sg, int len, int block_size) -+{ -+ uint8_t pad[TLS_MAX_PADDING_SIZE]; -+ int pad_size = block_size - (len % block_size); ++ COMPOSITE_NODIV(PCLK_TOP_ROOT, "pclk_top_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_CLKSEL_CON(24), 5, 2, MFLAGS, ++ RV1106_CLKGATE_CON(2), 9, GFLAGS), + -+ memset(pad, pad_size - 1, pad_size); ++ COMPOSITE(CLK_I2S0_8CH_TX_SRC, "clk_i2s0_8ch_tx_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(17), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(1), 13, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S0_8CH_TX_FRAC, "clk_i2s0_8ch_tx_frac", "clk_i2s0_8ch_tx_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(18), 0, ++ RV1106_CLKGATE_CON(1), 14, GFLAGS, ++ &rv1106_i2s0_8ch_tx_fracmux), ++ GATE(MCLK_I2S0_8CH_TX, "mclk_i2s0_8ch_tx", "clk_i2s0_8ch_tx", 0, ++ RV1106_CLKGATE_CON(1), 15, GFLAGS), ++ COMPOSITE(CLK_I2S0_8CH_RX_SRC, "clk_i2s0_8ch_rx_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(19), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(2), 0, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_I2S0_8CH_RX_FRAC, "clk_i2s0_8ch_rx_frac", "clk_i2s0_8ch_rx_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(20), 0, ++ RV1106_CLKGATE_CON(2), 1, GFLAGS, ++ &rv1106_i2s0_8ch_rx_fracmux), ++ GATE(MCLK_I2S0_8CH_RX, "mclk_i2s0_8ch_rx", "clk_i2s0_8ch_rx", 0, ++ RV1106_CLKGATE_CON(2), 2, GFLAGS), ++ MUX(I2S0_8CH_MCLKOUT, "i2s0_8ch_mclkout", i2s0_8ch_mclkout_p, CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(21), 2, 2, MFLAGS), ++ COMPOSITE(CLK_REF_MIPI0_SRC, "clk_ref_mipi0_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(25), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(3), 4, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_REF_MIPI0_FRAC, "clk_ref_mipi0_frac", "clk_ref_mipi0_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(26), 0, ++ RV1106_CLKGATE_CON(3), 5, GFLAGS, ++ &rv1106_clk_ref_mipi0_fracmux), ++ GATE(MCLK_REF_MIPI0, "mclk_ref_mipi0", "clk_ref_mipi0", 0, ++ RV1106_CLKGATE_CON(3), 6, GFLAGS), ++ COMPOSITE(CLK_REF_MIPI1_SRC, "clk_ref_mipi1_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(27), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(3), 7, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_REF_MIPI1_FRAC, "clk_ref_mipi1_frac", "clk_ref_mipi1_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(28), 0, ++ RV1106_CLKGATE_CON(3), 8, GFLAGS, ++ &rv1106_clk_ref_mipi1_fracmux), ++ GATE(MCLK_REF_MIPI1, "mclk_ref_mipi1", "clk_ref_mipi1", 0, ++ RV1106_CLKGATE_CON(3), 9, GFLAGS), ++ COMPOSITE(CLK_UART0_SRC, "clk_uart0_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(5), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 11, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART0_FRAC, "clk_uart0_frac", "clk_uart0_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(6), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RV1106_CLKGATE_CON(0), 12, GFLAGS, ++ &rv1106_clk_uart0_fracmux), ++ GATE(SCLK_UART0, "sclk_uart0", "clk_uart0", 0, ++ RV1106_CLKGATE_CON(0), 13, GFLAGS), ++ COMPOSITE(CLK_UART1_SRC, "clk_uart1_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(7), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(0), 14, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART1_FRAC, "clk_uart1_frac", "clk_uart1_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(8), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RV1106_CLKGATE_CON(0), 15, GFLAGS, ++ &rv1106_clk_uart1_fracmux), ++ GATE(SCLK_UART1, "sclk_uart1", "clk_uart1", 0, ++ RV1106_CLKGATE_CON(1), 0, GFLAGS), ++ COMPOSITE(CLK_UART2_SRC, "clk_uart2_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(9), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(1), 1, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART2_FRAC, "clk_uart2_frac", "clk_uart2_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(10), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RV1106_CLKGATE_CON(1), 2, GFLAGS, ++ &rv1106_clk_uart2_fracmux), ++ GATE(SCLK_UART2, "sclk_uart2", "clk_uart2", 0, ++ RV1106_CLKGATE_CON(1), 3, GFLAGS), ++ COMPOSITE(CLK_UART3_SRC, "clk_uart3_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(11), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(1), 4, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART3_FRAC, "clk_uart3_frac", "clk_uart3_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(12), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RV1106_CLKGATE_CON(1), 5, GFLAGS, ++ &rv1106_clk_uart3_fracmux), ++ GATE(SCLK_UART3, "sclk_uart3", "clk_uart3", 0, ++ RV1106_CLKGATE_CON(1), 6, GFLAGS), ++ COMPOSITE(CLK_UART4_SRC, "clk_uart4_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(13), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(1), 7, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART4_FRAC, "clk_uart4_frac", "clk_uart4_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(14), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RV1106_CLKGATE_CON(1), 8, GFLAGS, ++ &rv1106_clk_uart4_fracmux), ++ GATE(SCLK_UART4, "sclk_uart4", "clk_uart4", 0, ++ RV1106_CLKGATE_CON(1), 9, GFLAGS), ++ COMPOSITE(CLK_UART5_SRC, "clk_uart5_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(15), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(1), 10, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_UART5_FRAC, "clk_uart5_frac", "clk_uart5_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(16), CLK_FRAC_DIVIDER_NO_LIMIT, ++ RV1106_CLKGATE_CON(1), 11, GFLAGS, ++ &rv1106_clk_uart5_fracmux), ++ GATE(SCLK_UART5, "sclk_uart5", "clk_uart5", 0, ++ RV1106_CLKGATE_CON(1), 12, GFLAGS), ++ COMPOSITE(CLK_VICAP_M0_SRC, "clk_vicap_m0_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(29), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(3), 10, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_VICAP_M0_FRAC, "clk_vicap_m0_frac", "clk_vicap_m0_src", CLK_SET_RATE_PARENT, ++ RV1106_CLKSEL_CON(30), 0, ++ RV1106_CLKGATE_CON(3), 11, GFLAGS, ++ &rv1106_clk_vicap_m0_fracmux), ++ GATE(SCLK_VICAP_M0, "sclk_vicap_m0", "clk_vicap_m0", 0, ++ RV1106_CLKGATE_CON(3), 12, GFLAGS), ++ COMPOSITE(CLK_VICAP_M1_SRC, "clk_vicap_m1_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(31), 7, 1, MFLAGS, 2, 5, DFLAGS, ++ RV1106_CLKGATE_CON(3), 13, GFLAGS), ++ COMPOSITE_FRACMUX(CLK_VICAP_M1_FRAC, "clk_vicap_m1_frac", "clk_vicap_m1_src", 0, ++ RV1106_CLKSEL_CON(32), 0, ++ RV1106_CLKGATE_CON(3), 14, GFLAGS, ++ &rv1106_clk_vicap_m1_fracmux), ++ GATE(SCLK_VICAP_M1, "sclk_vicap_m1", "clk_vicap_m1", 0, ++ RV1106_CLKGATE_CON(3), 15, GFLAGS), ++ COMPOSITE(DCLK_VOP_SRC, "dclk_vop_src", mux_gpll_cpll_p, 0, ++ RV1106_CLKSEL_CON(23), 8, 1, MFLAGS, 3, 5, DFLAGS, ++ RV1106_CLKGATE_CON(2), 6, GFLAGS), + -+ scatterwalk_map_and_copy(pad, dst_sg, len, pad_size, 1); ++ /* PD_DDR */ ++ COMPOSITE_NODIV(PCLK_DDR_ROOT, "pclk_ddr_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_DDRCLKSEL_CON(0), 0, 2, MFLAGS, ++ RV1106_DDRCLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE_NODIV(ACLK_DDR_ROOT, "aclk_ddr_root", mux_500m_300m_100m_24m_p, CLK_IS_CRITICAL, ++ RV1106_DDRCLKSEL_CON(0), 8, 2, MFLAGS, ++ RV1106_DDRCLKGATE_CON(0), 12, GFLAGS), ++ GATE(PCLK_DDRPHY, "pclk_ddrphy", "pclk_ddr_root", CLK_IGNORE_UNUSED, ++ RV1106_DDRCLKGATE_CON(1), 3, GFLAGS), ++ GATE(PCLK_DDR_HWLP, "pclk_ddr_hwlp", "pclk_ddr_root", CLK_IGNORE_UNUSED, ++ RV1106_DDRCLKGATE_CON(1), 2, GFLAGS), ++ GATE(PCLK_DDRMON, "pclk_ddrmon", "pclk_ddr_root", 0, ++ RV1106_DDRCLKGATE_CON(0), 7, GFLAGS), ++ GATE(CLK_TIMER_DDRMON, "clk_timer_ddrmon", "xin24m", 0, ++ RV1106_DDRCLKGATE_CON(0), 8, GFLAGS), ++ GATE(PCLK_DDRC, "pclk_ddrc", "pclk_ddr_root", CLK_IGNORE_UNUSED, ++ RV1106_DDRCLKGATE_CON(0), 5, GFLAGS), ++ GATE(PCLK_DFICTRL, "pclk_dfictrl", "pclk_ddr_root", CLK_IS_CRITICAL, ++ RV1106_DDRCLKGATE_CON(0), 11, GFLAGS), ++ GATE(ACLK_SYS_SHRM, "aclk_sys_shrm", "aclk_ddr_root", CLK_IS_CRITICAL, ++ RV1106_DDRCLKGATE_CON(0), 13, GFLAGS), + -+ return pad_size; -+} ++ /* PD_NPU */ ++ COMPOSITE_NODIV(HCLK_NPU_ROOT, "hclk_npu_root", mux_150m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_NPUCLKSEL_CON(0), 0, 2, MFLAGS, ++ RV1106_NPUCLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE_NODIV(ACLK_NPU_ROOT, "aclk_npu_root", mux_500m_300m_pvtpll0_pvtpll1_p, CLK_IS_CRITICAL, ++ RV1106_NPUCLKSEL_CON(0), 2, 2, MFLAGS, ++ RV1106_NPUCLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NODIV(PCLK_NPU_ROOT, "pclk_npu_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_NPUCLKSEL_CON(0), 4, 2, MFLAGS, ++ RV1106_NPUCLKGATE_CON(0), 2, GFLAGS), ++ GATE(HCLK_RKNN, "hclk_rknn", "hclk_npu_root", 0, ++ RV1106_NPUCLKGATE_CON(0), 9, GFLAGS), ++ GATE(ACLK_RKNN, "aclk_rknn", "aclk_npu_root", 0, ++ RV1106_NPUCLKGATE_CON(0), 10, GFLAGS), + -+static int verify_tls_record_pad(struct scatterlist *dst_sg, int len, int block_size) -+{ -+ uint8_t pad[TLS_MAX_PADDING_SIZE]; -+ uint8_t pad_size; -+ int i; ++ /* PD_PERI */ ++ COMPOSITE_NODIV(PCLK_PERI_ROOT, "pclk_peri_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_PERICLKSEL_CON(1), 0, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE_NODIV(ACLK_PERI_ROOT, "aclk_peri_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RV1106_PERICLKSEL_CON(1), 2, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NODIV(HCLK_PERI_ROOT, "hclk_peri_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_PERICLKSEL_CON(1), 4, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE_NODIV(ACLK_BUS_ROOT, "aclk_bus_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RV1106_PERICLKSEL_CON(9), 0, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(6), 8, GFLAGS), ++ GATE(PCLK_ACODEC, "pclk_acodec", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(6), 3, GFLAGS), ++ COMPOSITE_NOMUX(MCLK_ACODEC_TX, "mclk_acodec_tx", "mclk_i2s0_8ch_tx", 0, ++ RV1106_PERICLKSEL_CON(8), 0, 8, DFLAGS, ++ RV1106_PERICLKGATE_CON(6), 4, GFLAGS), ++ COMPOSITE_NODIV(CLK_CORE_CRYPTO, "clk_core_crypto", mux_300m_200m_100m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(6), 5, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(3), 11, GFLAGS), ++ COMPOSITE_NODIV(CLK_PKA_CRYPTO, "clk_pka_crypto", mux_300m_200m_100m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(6), 7, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(3), 12, GFLAGS), ++ GATE(ACLK_CRYPTO, "aclk_crypto", "aclk_bus_root", 0, ++ RV1106_PERICLKGATE_CON(3), 13, GFLAGS), ++ GATE(HCLK_CRYPTO, "hclk_crypto", "hclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(3), 14, GFLAGS), ++ GATE(ACLK_DECOM, "aclk_decom", "aclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(5), 9, GFLAGS), ++ GATE(PCLK_DECOM, "pclk_decom", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(5), 10, GFLAGS), ++ COMPOSITE_NODIV(DCLK_DECOM, "dclk_decom", mux_400m_200m_100m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(7), 14, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(5), 11, GFLAGS), ++ GATE(ACLK_DMAC, "aclk_dmac", "aclk_bus_root", 0, ++ RV1106_PERICLKGATE_CON(5), 8, GFLAGS), ++ GATE(PCLK_DSM, "pclk_dsm", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(6), 2, GFLAGS), ++ GATE(MCLK_DSM, "mclk_dsm", "mclk_i2s0_8ch_tx", 0, ++ RV1106_PERICLKGATE_CON(6), 1, GFLAGS), ++ COMPOSITE(CCLK_SRC_EMMC, "cclk_src_emmc", mux_400m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(7), 6, 1, MFLAGS, 0, 6, DFLAGS, ++ RV1106_PERICLKGATE_CON(4), 12, GFLAGS), ++ GATE(HCLK_EMMC, "hclk_emmc", "hclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(4), 13, GFLAGS), ++ GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(2), 0, GFLAGS), ++ GATE(DBCLK_GPIO4, "dbclk_gpio4", "xin24m", 0, ++ RV1106_PERICLKGATE_CON(2), 1, GFLAGS), ++ GATE(PCLK_I2C0, "pclk_i2c0", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(1), 6, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C0, "clk_i2c0", mux_200m_100m_50m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(1), 8, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(1), 7, GFLAGS), ++ GATE(PCLK_I2C2, "pclk_i2c2", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(1), 10, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C2, "clk_i2c2", mux_200m_100m_50m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(1), 12, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(1), 11, GFLAGS), ++ GATE(PCLK_I2C3, "pclk_i2c3", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(1), 12, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C3, "clk_i2c3", mux_200m_100m_50m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(1), 14, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(1), 13, GFLAGS), ++ GATE(PCLK_I2C4, "pclk_i2c4", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(1), 14, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C4, "clk_i2c4", mux_200m_100m_50m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(2), 0, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(1), 15, GFLAGS), ++ GATE(HCLK_I2S0, "hclk_i2s0", "hclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(6), 0, GFLAGS), ++ GATE(PCLK_DFT2APB, "pclk_dft2apb", "pclk_peri_root", CLK_IGNORE_UNUSED, ++ RV1106_PERICLKGATE_CON(6), 7, GFLAGS), ++ GATE(HCLK_IVE, "hclk_ive", "hclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(6), 9, GFLAGS), ++ GATE(ACLK_IVE, "aclk_ive", "aclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(6), 10, GFLAGS), ++ GATE(PCLK_PWM0_PERI, "pclk_pwm0_peri", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(7), 3, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM0_PERI, "clk_pwm0_peri", mux_100m_50m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(11), 0, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(7), 4, GFLAGS), ++ GATE(CLK_CAPTURE_PWM0_PERI, "clk_capture_pwm0_peri", "xin24m", 0, ++ RV1106_PERICLKGATE_CON(7), 5, GFLAGS), ++ GATE(CLK_TIMER_ROOT, "clk_timer_root", "xin24m", 0, ++ RV1106_PERICLKGATE_CON(0), 3, GFLAGS), ++ GATE(HCLK_SFC, "hclk_sfc", "hclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(4), 14, GFLAGS), ++ COMPOSITE(SCLK_SFC, "sclk_sfc", mux_500m_300m_200m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(7), 12, 2, MFLAGS, 7, 5, DFLAGS, ++ RV1106_PERICLKGATE_CON(5), 0, GFLAGS), ++ GATE(PCLK_UART0, "pclk_uart0", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(6), 11, GFLAGS), ++ GATE(PCLK_UART1, "pclk_uart1", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(6), 15, GFLAGS), ++ GATE(PCLK_PWM1_PERI, "pclk_pwm1_peri", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(3), 15, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM1_PERI, "clk_pwm1_peri", mux_100m_50m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(6), 9, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(4), 0, GFLAGS), ++ GATE(CLK_CAPTURE_PWM1_PERI, "clk_capture_pwm1_peri", "xin24m", 0, ++ RV1106_PERICLKGATE_CON(4), 1, GFLAGS), ++ GATE(PCLK_PWM2_PERI, "pclk_pwm2_peri", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(4), 2, GFLAGS), ++ COMPOSITE_NODIV(CLK_PWM2_PERI, "clk_pwm2_peri", mux_100m_50m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(6), 11, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(4), 3, GFLAGS), ++ GATE(CLK_CAPTURE_PWM2_PERI, "clk_capture_pwm2_peri", "xin24m", 0, ++ RV1106_PERICLKGATE_CON(4), 4, GFLAGS), ++ GATE(HCLK_BOOTROM, "hclk_bootrom", "hclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(0), 7, GFLAGS), ++ GATE(HCLK_SAI, "hclk_sai", "hclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(5), 13, GFLAGS), ++ GATE(MCLK_SAI, "mclk_sai", "mclk_i2s0_8ch_tx", 0, ++ RV1106_PERICLKGATE_CON(5), 14, GFLAGS), ++ GATE(PCLK_SARADC, "pclk_saradc", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(3), 3, GFLAGS), ++ COMPOSITE_NOMUX(CLK_SARADC, "clk_saradc", "xin24m", 0, ++ RV1106_PERICLKSEL_CON(6), 0, 3, DFLAGS, ++ RV1106_PERICLKGATE_CON(3), 4, GFLAGS), ++ GATE(PCLK_SPI1, "pclk_spi1", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(3), 6, GFLAGS), ++ COMPOSITE_NODIV(CLK_SPI1, "clk_spi1", mux_200m_100m_50m_24m_p, 0, ++ RV1106_PERICLKSEL_CON(6), 3, 2, MFLAGS, ++ RV1106_PERICLKGATE_CON(3), 7, GFLAGS), ++ GATE(PCLK_STIMER, "pclk_stimer", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(0), 15, GFLAGS), ++ GATE(CLK_STIMER0, "clk_stimer0", "clk_timer_root", 0, ++ RV1106_PERICLKGATE_CON(1), 0, GFLAGS), ++ GATE(CLK_STIMER1, "clk_stimer1", "clk_timer_root", 0, ++ RV1106_PERICLKGATE_CON(1), 1, GFLAGS), ++ GATE(PCLK_TIMER, "pclk_timer", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(0), 8, GFLAGS), ++ GATE(CLK_TIMER0, "clk_timer0", "clk_timer_root", 0, ++ RV1106_PERICLKGATE_CON(0), 9, GFLAGS), ++ GATE(CLK_TIMER1, "clk_timer1", "clk_timer_root", 0, ++ RV1106_PERICLKGATE_CON(0), 10, GFLAGS), ++ GATE(CLK_TIMER2, "clk_timer2", "clk_timer_root", 0, ++ RV1106_PERICLKGATE_CON(0), 11, GFLAGS), ++ GATE(CLK_TIMER3, "clk_timer3", "clk_timer_root", 0, ++ RV1106_PERICLKGATE_CON(0), 12, GFLAGS), ++ GATE(CLK_TIMER4, "clk_timer4", "clk_timer_root", 0, ++ RV1106_PERICLKGATE_CON(0), 13, GFLAGS), ++ GATE(CLK_TIMER5, "clk_timer5", "clk_timer_root", 0, ++ RV1106_PERICLKGATE_CON(0), 14, GFLAGS), ++ GATE(HCLK_TRNG_NS, "hclk_trng_ns", "hclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(3), 9, GFLAGS), ++ GATE(HCLK_TRNG_S, "hclk_trng_s", "hclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(3), 10, GFLAGS), ++ GATE(PCLK_UART2, "pclk_uart2", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(2), 3, GFLAGS), ++ GATE(PCLK_UART3, "pclk_uart3", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(2), 7, GFLAGS), ++ GATE(PCLK_UART4, "pclk_uart4", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(2), 11, GFLAGS), ++ GATE(PCLK_UART5, "pclk_uart5", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(2), 15, GFLAGS), ++ GATE(ACLK_USBOTG, "aclk_usbotg", "aclk_bus_root", 0, ++ RV1106_PERICLKGATE_CON(4), 7, GFLAGS), ++ GATE(CLK_REF_USBOTG, "clk_ref_usbotg", "xin24m", 0, ++ RV1106_PERICLKGATE_CON(4), 8, GFLAGS), ++ GATE(PCLK_USBPHY, "pclk_usbphy", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(5), 1, GFLAGS), ++ GATE(CLK_REF_USBPHY, "clk_ref_usbphy", "xin24m", 0, ++ RV1106_PERICLKGATE_CON(5), 2, GFLAGS), ++ GATE(PCLK_WDT_NS, "pclk_wdt_ns", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(1), 2, GFLAGS), ++ GATE(TCLK_WDT_NS, "tclk_wdt_ns", "xin24m", 0, ++ RV1106_PERICLKGATE_CON(1), 3, GFLAGS), ++ GATE(PCLK_WDT_S, "pclk_wdt_s", "pclk_peri_root", 0, ++ RV1106_PERICLKGATE_CON(1), 4, GFLAGS), ++ GATE(TCLK_WDT_S, "tclk_wdt_s", "xin24m", 0, ++ RV1106_PERICLKGATE_CON(1), 5, GFLAGS), + -+ scatterwalk_map_and_copy(&pad_size, dst_sg, len - 1, 1, 0); ++ /* PD_PMU */ ++ COMPOSITE_FRACMUX(0, "clk_rtc32k_frac", "xin24m", CLK_IGNORE_UNUSED, ++ RV1106_PMUCLKSEL_CON(6), 0, ++ RV1106_PMUCLKGATE_CON(1), 14, GFLAGS, ++ &rv1106_rtc32k_pmu_fracmux), ++ DIV(CLK_100M_PMU, "clk_100m_pmu", "clk_200m_src", 0, ++ RV1106_PMUCLKSEL_CON(0), 0, 3, DFLAGS), ++ COMPOSITE_NODIV(PCLK_PMU_ROOT, "pclk_pmu_root", mux_100m_pmu_24m_p, CLK_IS_CRITICAL, ++ RV1106_PMUCLKSEL_CON(0), 3, 1, MFLAGS, ++ RV1106_PMUCLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NODIV(HCLK_PMU_ROOT, "hclk_pmu_root", mux_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RV1106_PMUCLKSEL_CON(0), 4, 2, MFLAGS, ++ RV1106_PMUCLKGATE_CON(0), 2, GFLAGS), ++ GATE(CLK_PMU, "clk_pmu", "xin24m", CLK_IS_CRITICAL, ++ RV1106_PMUCLKGATE_CON(1), 0, GFLAGS), ++ GATE(PCLK_PMU, "pclk_pmu", "pclk_pmu_root", CLK_IS_CRITICAL, ++ RV1106_PMUCLKGATE_CON(1), 1, GFLAGS), ++ GATE(CLK_DDR_FAIL_SAFE, "clk_ddr_fail_safe", "clk_pmu", 0, ++ RV1106_PMUCLKGATE_CON(1), 15, GFLAGS), ++ GATE(PCLK_PMU_GPIO0, "pclk_pmu_gpio0", "pclk_pmu_root", 0, ++ RV1106_PMUCLKGATE_CON(1), 2, GFLAGS), ++ COMPOSITE_NODIV(DBCLK_PMU_GPIO0, "dbclk_pmu_gpio0", mux_24m_32k_p, 0, ++ RV1106_PMUCLKSEL_CON(0), 15, 1, MFLAGS, ++ RV1106_PMUCLKGATE_CON(1), 3, GFLAGS), ++ GATE(PCLK_I2C1, "pclk_i2c1", "pclk_pmu_root", 0, ++ RV1106_PMUCLKGATE_CON(0), 3, GFLAGS), ++ COMPOSITE_NODIV(CLK_I2C1, "clk_i2c1", mux_200m_100m_24m_32k_p, 0, ++ RV1106_PMUCLKSEL_CON(0), 6, 2, MFLAGS, ++ RV1106_PMUCLKGATE_CON(0), 4, GFLAGS), ++ GATE(PCLK_PMU_MAILBOX, "pclk_pmu_mailbox", "pclk_pmu_root", 0, ++ RV1106_PMUCLKGATE_CON(2), 10, GFLAGS), ++ GATE(CLK_PMU_MCU, "clk_pmu_mcu", "hclk_pmu_root", 0, ++ RV1106_PMUCLKGATE_CON(0), 9, GFLAGS), ++ GATE(CLK_PMU_MCU_RTC, "clk_pmu_mcu_rtc", "xin24m", 0, ++ RV1106_PMUCLKGATE_CON(0), 13, GFLAGS), ++ COMPOSITE_NOMUX(CLK_PVTM_PMU, "clk_pvtm_pmu", "xin24m", 0, ++ RV1106_PMUCLKSEL_CON(1), 0, 5, DFLAGS, ++ RV1106_PMUCLKGATE_CON(1), 4, GFLAGS), ++ GATE(PCLK_PVTM_PMU, "pclk_pvtm_pmu", "pclk_pmu_root", 0, ++ RV1106_PMUCLKGATE_CON(1), 5, GFLAGS), ++ GATE(CLK_REFOUT, "clk_refout", "xin24m", 0, ++ RV1106_PMUCLKGATE_CON(2), 13, GFLAGS), ++ GATE(HCLK_PMU_SRAM, "hclk_pmu_sram", "hclk_pmu_root", CLK_IGNORE_UNUSED, ++ RV1106_PMUCLKGATE_CON(0), 8, GFLAGS), ++ GATE(PCLK_PMU_WDT, "pclk_pmu_wdt", "pclk_pmu_root", 0, ++ RV1106_PMUCLKGATE_CON(2), 8, GFLAGS), ++ COMPOSITE_NODIV(TCLK_PMU_WDT, "tclk_pmu_wdt", mux_24m_32k_p, 0, ++ RV1106_PMUCLKSEL_CON(7), 2, 1, MFLAGS, ++ RV1106_PMUCLKGATE_CON(2), 9, GFLAGS), + -+ if (pad_size + 1 > len) { -+ derr(1, "Pad size: %d", pad_size); -+ return -EBADMSG; -+ } ++ /* PD_SUBDDR */ ++ COMPOSITE(CLK_CORE_DDRC_SRC, "clk_core_ddrc_src", mux_dpll_300m_p, CLK_IGNORE_UNUSED, ++ RV1106_SUBDDRCLKSEL_CON(0), 5, 1, MFLAGS, 0, 5, DFLAGS, ++ RV1106_SUBDDRCLKGATE_CON(0), 2, GFLAGS), ++ GATE(CLK_DFICTRL, "clk_dfictrl", "clk_core_ddrc_src", CLK_IGNORE_UNUSED, ++ RV1106_SUBDDRCLKGATE_CON(0), 5, GFLAGS), ++ GATE(CLK_DDRMON, "clk_ddrmon", "clk_core_ddrc_src", CLK_IGNORE_UNUSED, ++ RV1106_SUBDDRCLKGATE_CON(0), 4, GFLAGS), ++ GATE(CLK_DDR_PHY, "clk_ddr_phy", "clk_core_ddrc_src", CLK_IGNORE_UNUSED, ++ RV1106_SUBDDRCLKGATE_CON(0), 6, GFLAGS), ++ GATE(ACLK_DDRC, "aclk_ddrc", "clk_core_ddrc_src", CLK_IS_CRITICAL, ++ RV1106_SUBDDRCLKGATE_CON(0), 1, GFLAGS), ++ GATE(CLK_CORE_DDRC, "clk_core_ddrc", "clk_core_ddrc_src", CLK_IS_CRITICAL, ++ RV1106_SUBDDRCLKGATE_CON(0), 3, GFLAGS), + -+ scatterwalk_map_and_copy(pad, dst_sg, len - pad_size - 1, pad_size + 1, 0); + -+ for (i = 0; i < pad_size; i++) -+ if (pad[i] != pad_size) { -+ derr(1, "Pad size: %u, pad: %d", pad_size, pad[i]); -+ return -EBADMSG; -+ } ++ /* PD_VEPU */ ++ COMPOSITE_NODIV(HCLK_VEPU_ROOT, "hclk_vepu_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VEPUCLKSEL_CON(0), 0, 2, MFLAGS, ++ RV1106_VEPUCLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE_NODIV(ACLK_VEPU_COM_ROOT, "aclk_vepu_com_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VEPUCLKSEL_CON(0), 2, 2, MFLAGS, ++ RV1106_VEPUCLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NODIV(ACLK_VEPU_ROOT, "aclk_vepu_root", mux_300m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VEPUCLKSEL_CON(0), 4, 2, MFLAGS, ++ RV1106_VEPUCLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE_NODIV(PCLK_VEPU_ROOT, "pclk_vepu_root", mux_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VEPUCLKSEL_CON(0), 6, 2, MFLAGS, ++ RV1106_VEPUCLKGATE_CON(0), 3, GFLAGS), ++ GATE(PCLK_SPI0, "pclk_spi0", "pclk_vepu_root", 0, ++ RV1106_VEPUCLKGATE_CON(1), 2, GFLAGS), ++ COMPOSITE_NODIV(CLK_SPI0, "clk_spi0", mux_200m_100m_50m_24m_p, 0, ++ RV1106_VEPUCLKSEL_CON(0), 12, 2, MFLAGS, ++ RV1106_VEPUCLKGATE_CON(1), 3, GFLAGS), ++ GATE(CLK_UART_DETN_FLT, "clk_uart_detn_flt", "xin24m", 0, ++ RV1106_VEPUCLKGATE_CON(1), 8, GFLAGS), ++ GATE(HCLK_VEPU, "hclk_vepu", "hclk_vepu_root", 0, ++ RV1106_VEPUCLKGATE_CON(0), 8, GFLAGS), ++ GATE(ACLK_VEPU, "aclk_vepu", "aclk_vepu_root", 0, ++ RV1106_VEPUCLKGATE_CON(0), 9, GFLAGS), ++ COMPOSITE_NODIV(CLK_CORE_VEPU, "clk_core_vepu", mux_400m_300m_pvtpll0_pvtpll1_p, 0, ++ RV1106_VEPUCLKSEL_CON(0), 8, 2, MFLAGS, ++ RV1106_VEPUCLKGATE_CON(0), 10, GFLAGS), ++ COMPOSITE_NODIV(CLK_CORE_VEPU_DVBM, "clk_core_vepu_dvbm", mux_200m_100m_50m_24m_p, 0, ++ RV1106_VEPUCLKSEL_CON(0), 10, 2, MFLAGS, ++ RV1106_VEPUCLKGATE_CON(0), 13, GFLAGS), ++ GATE(PCLK_GPIO1, "pclk_gpio1", "pclk_vepu_root", 0, ++ RV1106_VEPUCLKGATE_CON(0), 15, GFLAGS), ++ GATE(DBCLK_GPIO1, "dbclk_gpio1", "xin24m", 0, ++ RV1106_VEPUCLKGATE_CON(1), 0, GFLAGS), ++ GATE(HCLK_VEPU_PP, "hclk_vepu_pp", "hclk_vepu_root", 0, ++ RV1106_VEPUCLKGATE_CON(0), 11, GFLAGS), ++ GATE(ACLK_VEPU_PP, "aclk_vepu_pp", "aclk_vepu_root", 0, ++ RV1106_VEPUCLKGATE_CON(0), 12, GFLAGS), + -+ return pad_size + 1; -+} ++ /* PD_VI */ ++ COMPOSITE_NODIV(HCLK_VI_ROOT, "hclk_vi_root", mux_150m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VICLKSEL_CON(0), 0, 2, MFLAGS, ++ RV1106_VICLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE_NODIV(ACLK_VI_ROOT, "aclk_vi_root", mux_339m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VICLKSEL_CON(0), 2, 2, MFLAGS, ++ RV1106_VICLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NODIV(PCLK_VI_ROOT, "pclk_vi_root", mux_150m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VICLKSEL_CON(0), 4, 2, MFLAGS, ++ RV1106_VICLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE_NODIV(PCLK_VI_RTC_ROOT, "pclk_vi_rtc_root", mux_50m_24m_p, 0, ++ RV1106_VICLKSEL_CON(0), 6, 1, MFLAGS, ++ RV1106_VICLKGATE_CON(0), 3, GFLAGS), + -+/* Authenticate and encrypt the TLS way (also perform padding). -+ * During decryption it verifies the pad and tag and returns -EBADMSG on error. -+ */ -+static int -+tls_auth_n_crypt(struct csession *ses_ptr, struct kernel_crypt_auth_op *kcaop, -+ struct scatterlist *auth_sg, uint32_t auth_len, -+ struct scatterlist *dst_sg, uint32_t len) -+{ -+ int ret, fail = 0; -+ struct crypt_auth_op *caop = &kcaop->caop; -+ uint8_t vhash[AALG_MAX_RESULT_LEN]; -+ uint8_t hash_output[AALG_MAX_RESULT_LEN]; ++ GATE(PCLK_CSIHOST0, "pclk_csihost0", "pclk_vi_root", 0, ++ RV1106_VICLKGATE_CON(1), 3, GFLAGS), ++ GATE(PCLK_CSIHOST1, "pclk_csihost1", "pclk_vi_root", 0, ++ RV1106_VICLKGATE_CON(1), 5, GFLAGS), ++ GATE(PCLK_GPIO3, "pclk_gpio3", "pclk_vi_root", 0, ++ RV1106_VICLKGATE_CON(1), 15, GFLAGS), ++ GATE(DBCLK_GPIO3, "dbclk_gpio3", "xin24m", 0, ++ RV1106_VICLKGATE_CON(2), 0, GFLAGS), ++ GATE(HCLK_ISP3P2, "hclk_isp3p2", "hclk_vi_root", 0, ++ RV1106_VICLKGATE_CON(0), 7, GFLAGS), ++ GATE(ACLK_ISP3P2, "aclk_isp3p2", "aclk_vi_root", 0, ++ RV1106_VICLKGATE_CON(0), 8, GFLAGS), ++ COMPOSITE_NODIV(CLK_CORE_ISP3P2, "clk_core_isp3p2", mux_339m_200m_pvtpll0_pvtpll1_p, 0, ++ RV1106_VICLKSEL_CON(0), 7, 2, MFLAGS, ++ RV1106_VICLKGATE_CON(0), 9, GFLAGS), ++ GATE(PCLK_MIPICSIPHY, "pclk_mipicsiphy", "pclk_vi_root", 0, ++ RV1106_VICLKGATE_CON(1), 14, GFLAGS), ++ COMPOSITE(CCLK_SRC_SDMMC, "cclk_src_sdmmc", mux_400m_24m_p, 0, ++ RV1106_VICLKSEL_CON(1), 14, 1, MFLAGS, 8, 6, DFLAGS, ++ RV1106_VICLKGATE_CON(1), 11, GFLAGS), ++ GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_vi_root", 0, ++ RV1106_VICLKGATE_CON(1), 12, GFLAGS), ++ GATE(CLK_SDMMC_DETN_FLT, "clk_sdmmc_detn_flt", "xin24m", 0, ++ RV1106_VICLKGATE_CON(1), 13, GFLAGS), ++ GATE(PCLK_VI_RTC_TEST, "pclk_vi_rtc_test", "pclk_vi_rtc_root", 0, ++ RV1106_VICLKGATE_CON(2), 5, GFLAGS), ++ GATE(PCLK_VI_RTC_PHY, "pclk_vi_rtc_phy", "pclk_vi_rtc_root", 0, ++ RV1106_VICLKGATE_CON(2), 6, GFLAGS), ++ COMPOSITE_NODIV(DCLK_VICAP, "dclk_vicap", mux_339m_200m_100m_24m_p, 0, ++ RV1106_VICLKSEL_CON(0), 9, 2, MFLAGS, ++ RV1106_VICLKGATE_CON(0), 10, GFLAGS), ++ GATE(ACLK_VICAP, "aclk_vicap", "aclk_vi_root", 0, ++ RV1106_VICLKGATE_CON(0), 12, GFLAGS), ++ GATE(HCLK_VICAP, "hclk_vicap", "hclk_vi_root", 0, ++ RV1106_VICLKGATE_CON(0), 13, GFLAGS), + -+ /* TLS authenticates the plaintext except for the padding. -+ */ -+ if (caop->op == COP_ENCRYPT) { -+ if (ses_ptr->hdata.init != 0) { -+ if (auth_len > 0) { -+ ret = cryptodev_hash_update(&ses_ptr->hdata, -+ auth_sg, auth_len); -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_hash_update: %d", ret); -+ return ret; -+ } -+ } ++ /* PD_VO */ ++ COMPOSITE_NODIV(ACLK_MAC_ROOT, "aclk_mac_root", mux_300m_200m_100m_24m_p, 0, ++ RV1106_VOCLKSEL_CON(1), 12, 2, MFLAGS, ++ RV1106_VOCLKGATE_CON(1), 4, GFLAGS), ++ COMPOSITE_NODIV(ACLK_VO_ROOT, "aclk_vo_root", mux_400m_200m_100m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VOCLKSEL_CON(0), 0, 2, MFLAGS, ++ RV1106_VOCLKGATE_CON(0), 0, GFLAGS), ++ COMPOSITE_NODIV(HCLK_VO_ROOT, "hclk_vo_root", mux_200m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VOCLKSEL_CON(0), 2, 2, MFLAGS, ++ RV1106_VOCLKGATE_CON(0), 1, GFLAGS), ++ COMPOSITE_NODIV(PCLK_VO_ROOT, "pclk_vo_root", mux_150m_100m_50m_24m_p, CLK_IS_CRITICAL, ++ RV1106_VOCLKSEL_CON(0), 4, 2, MFLAGS, ++ RV1106_VOCLKGATE_CON(0), 2, GFLAGS), ++ COMPOSITE_NODIV(ACLK_VOP_ROOT, "aclk_vop_root", mux_300m_200m_100m_24m_p, 0, ++ RV1106_VOCLKSEL_CON(1), 10, 2, MFLAGS, ++ RV1106_VOCLKGATE_CON(0), 11, GFLAGS), + -+ if (len > 0) { -+ ret = cryptodev_hash_update(&ses_ptr->hdata, -+ dst_sg, len); -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_hash_update: %d", ret); -+ return ret; -+ } -+ } ++ GATE(PCLK_GPIO2, "pclk_gpio2", "pclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(3), 0, GFLAGS), ++ GATE(DBCLK_GPIO2, "dbclk_gpio2", "xin24m", 0, ++ RV1106_VOCLKGATE_CON(3), 1, GFLAGS), ++ GATE(ACLK_MAC, "aclk_mac", "aclk_mac_root", 0, ++ RV1106_VOCLKGATE_CON(1), 8, GFLAGS), ++ GATE(PCLK_MAC, "pclk_mac", "pclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(1), 9, GFLAGS), ++ FACTOR(CLK_GMAC0_50M_O, "clk_gmac0_50m_o", "clk_50m_src", 0, 1, 1), ++ FACTOR(CLK_GMAC0_REF_50M, "clk_gmac0_ref_50m", "clk_gmac0_50m_o", 0, 1, 1), ++ DIV(CLK_GMAC0_TX_50M_O, "clk_gmac0_tx_50m_o", "clk_gmac0_50m_o", 0, ++ RV1106_VOCLKSEL_CON(2), 1, 6, DFLAGS), ++ GATE(CLK_MACPHY, "clk_macphy", "xin24m", 0, ++ RV1106_VOCLKGATE_CON(2), 13, GFLAGS), ++ GATE(CLK_OTPC_ARB, "clk_otpc_arb", "xin24m", 0, ++ RV1106_VOCLKGATE_CON(2), 11, GFLAGS), ++ GATE(PCLK_OTPC_NS, "pclk_otpc_ns", "pclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(2), 3, GFLAGS), ++ GATE(CLK_SBPI_OTPC_NS, "clk_sbpi_otpc_ns", "xin24m", 0, ++ RV1106_VOCLKGATE_CON(2), 5, GFLAGS), ++ COMPOSITE_NOMUX(CLK_USER_OTPC_NS, "clk_user_otpc_ns", "xin24m", 0, ++ RV1106_VOCLKSEL_CON(3), 10, 3, DFLAGS, ++ RV1106_VOCLKGATE_CON(2), 6, GFLAGS), ++ GATE(PCLK_OTPC_S, "pclk_otpc_s", "pclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(2), 7, GFLAGS), ++ GATE(CLK_SBPI_OTPC_S, "clk_sbpi_otpc_s", "xin24m", 0, ++ RV1106_VOCLKGATE_CON(2), 9, GFLAGS), ++ COMPOSITE_NOMUX(CLK_USER_OTPC_S, "clk_user_otpc_s", "xin24m", 0, ++ RV1106_VOCLKSEL_CON(3), 13, 3, DFLAGS, ++ RV1106_VOCLKGATE_CON(2), 10, GFLAGS), ++ GATE(PCLK_OTP_MASK, "pclk_otp_mask", "pclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(2), 14, GFLAGS), ++ GATE(CLK_PMC_OTP, "clk_pmc_otp", "clk_sbpi_otpc_s", 0, ++ RV1106_VOCLKGATE_CON(2), 15, GFLAGS), ++ GATE(HCLK_RGA2E, "hclk_rga2e", "hclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(0), 7, GFLAGS), ++ GATE(ACLK_RGA2E, "aclk_rga2e", "aclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(0), 8, GFLAGS), ++ COMPOSITE_NODIV(CLK_CORE_RGA2E, "clk_core_rga2e", mux_400m_200m_100m_24m_p, 0, ++ RV1106_VOCLKSEL_CON(1), 8, 2, MFLAGS, ++ RV1106_VOCLKGATE_CON(0), 9, GFLAGS), ++ COMPOSITE(CCLK_SRC_SDIO, "cclk_src_sdio", mux_400m_24m_p, 0, ++ RV1106_VOCLKSEL_CON(2), 13, 1, MFLAGS, 7, 6, DFLAGS, ++ RV1106_VOCLKGATE_CON(1), 14, GFLAGS), ++ GATE(HCLK_SDIO, "hclk_sdio", "hclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(1), 15, GFLAGS), ++ GATE(PCLK_TSADC, "pclk_tsadc", "pclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(2), 0, GFLAGS), ++ COMPOSITE_NOMUX(CLK_TSADC, "clk_tsadc", "xin24m", 0, ++ RV1106_VOCLKSEL_CON(3), 0, 5, DFLAGS, ++ RV1106_VOCLKGATE_CON(2), 1, GFLAGS), ++ COMPOSITE_NOMUX(CLK_TSADC_TSEN, "clk_tsadc_tsen", "xin24m", 0, ++ RV1106_VOCLKSEL_CON(3), 5, 5, DFLAGS, ++ RV1106_VOCLKGATE_CON(2), 2, GFLAGS), ++ GATE(HCLK_VOP, "hclk_vop", "hclk_vo_root", 0, ++ RV1106_VOCLKGATE_CON(0), 13, GFLAGS), ++ GATE(DCLK_VOP, "dclk_vop", "dclk_vop_src", 0, ++ RV1106_VOCLKGATE_CON(0), 14, GFLAGS), ++ GATE(ACLK_VOP, "aclk_vop", "aclk_vop_root", 0, ++ RV1106_VOCLKGATE_CON(0), 15, GFLAGS), + -+ ret = cryptodev_hash_final(&ses_ptr->hdata, hash_output); -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_hash_final: %d", ret); -+ return ret; -+ } ++ /* IO CLK */ ++ GATE(RX0PCLK_VICAP, "rx0pclk_vicap", "rx0pclk_vicap_io", 0, ++ RV1106_VICLKGATE_CON(1), 0, GFLAGS), ++ GATE(RX1PCLK_VICAP, "rx1pclk_vicap", "rx1pclk_vicap_io", 0, ++ RV1106_VICLKGATE_CON(1), 1, GFLAGS), ++ GATE(ISP0CLK_VICAP, "isp0clk_vicap", "isp0clk_vicap_io", 0, ++ RV1106_VICLKGATE_CON(1), 2, GFLAGS), ++ GATE(I0CLK_VICAP, "i0clk_vicap", "i0clk_vicap_io", 0, ++ RV1106_VICLKGATE_CON(0), 14, GFLAGS), ++ GATE(I1CLK_VICAP, "i1clk_vicap", "i1clk_vicap_io", 0, ++ RV1106_VICLKGATE_CON(0), 15, GFLAGS), ++ GATE(PCLK_VICAP, "pclk_vicap", "pclk_vicap_io", 0, ++ RV1106_VICLKGATE_CON(0), 11, GFLAGS), ++ GATE(CLK_RXBYTECLKHS_0, "clk_rxbyteclkhs_0", "clk_rxbyteclkhs_0_io", 0, ++ RV1106_VICLKGATE_CON(1), 4, GFLAGS), ++ GATE(CLK_RXBYTECLKHS_1, "clk_rxbyteclkhs_1", "clk_rxbyteclkhs_1_io", 0, ++ RV1106_VICLKGATE_CON(1), 6, GFLAGS), + -+ copy_tls_hash(dst_sg, len, hash_output, caop->tag_len); -+ len += caop->tag_len; -+ } ++ GATE(PCLK_VICAP_VEPU, "pclk_vicap_vepu", "pclk_vicap_vepu_io", 0, ++ RV1106_VEPUCLKGATE_CON(0), 14, GFLAGS), ++ GATE(SCLK_IN_SPI0, "sclk_in_spi0", "sclk_in_spi0_io", 0, ++ RV1106_VEPUCLKGATE_CON(1), 4, GFLAGS), + -+ if (ses_ptr->cdata.init != 0) { -+ if (ses_ptr->cdata.blocksize > 1) { -+ ret = pad_record(dst_sg, len, ses_ptr->cdata.blocksize); -+ len += ret; -+ } ++ GATE(CLK_UTMI_USBOTG, "clk_utmi_usbotg", "clk_utmi_usbotg_io", 0, ++ RV1106_PERICLKGATE_CON(4), 9, GFLAGS), + -+ ret = cryptodev_cipher_encrypt(&ses_ptr->cdata, -+ dst_sg, dst_sg, len); -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_cipher_encrypt: %d", ret); -+ return ret; -+ } -+ } -+ } else { -+ if (ses_ptr->cdata.init != 0) { -+ ret = cryptodev_cipher_decrypt(&ses_ptr->cdata, -+ dst_sg, dst_sg, len); ++}; + -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_cipher_decrypt: %d", ret); -+ return ret; -+ } ++static struct rockchip_clk_branch rv1106_grf_clk_branches[] __initdata = { ++ MMC(SCLK_EMMC_DRV, "emmc_drv", "cclk_src_emmc", RV1106_EMMC_CON0, 1), ++ MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "cclk_src_emmc", RV1106_EMMC_CON1, 1), ++ MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "cclk_src_sdmmc", RV1106_SDMMC_CON0, 1), ++ MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "cclk_src_sdmmc", RV1106_SDMMC_CON1, 1), ++ MMC(SCLK_SDIO_DRV, "sdio_drv", "cclk_src_sdio", RV1106_SDIO_CON0, 1), ++ MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "cclk_src_sdio", RV1106_SDIO_CON1, 1), ++}; + -+ if (ses_ptr->cdata.blocksize > 1) { -+ ret = verify_tls_record_pad(dst_sg, len, ses_ptr->cdata.blocksize); -+ if (unlikely(ret < 0)) { -+ derr(2, "verify_record_pad: %d", ret); -+ fail = 1; -+ } else { -+ len -= ret; -+ } -+ } -+ } ++static void __iomem *rv1106_cru_base; ++static struct rockchip_clk_provider *grf_ctx, *cru_ctx; + -+ if (ses_ptr->hdata.init != 0) { -+ if (unlikely(caop->tag_len > sizeof(vhash) || caop->tag_len > len)) { -+ derr(1, "Illegal tag len size"); -+ return -EINVAL; -+ } ++void rv1106_dump_cru(void) ++{ ++ if (rv1106_cru_base) { ++ pr_warn("CRU:\n"); ++ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, ++ 32, 4, rv1106_cru_base, ++ 0x588, false); ++ } ++} ++EXPORT_SYMBOL_GPL(rv1106_dump_cru); + -+ read_tls_hash(dst_sg, len, vhash, caop->tag_len); -+ len -= caop->tag_len; ++static void _cru_pvtpll_calibrate(int count_offset, int length_offset, int target_rate) ++{ ++ unsigned int rate0, rate1, delta, length_ori, length, step, val, i = 0; + -+ if (auth_len > 0) { -+ ret = cryptodev_hash_update(&ses_ptr->hdata, -+ auth_sg, auth_len); -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_hash_update: %d", ret); -+ return ret; -+ } -+ } ++ rate0 = readl_relaxed(rv1106_cru_base + count_offset); ++ if (rate0 < target_rate) ++ return; ++ /* delta < (3.125% * target_rate) */ ++ if ((rate0 - target_rate) < (target_rate >> 5)) ++ return; + -+ if (len > 0) { -+ ret = cryptodev_hash_update(&ses_ptr->hdata, -+ dst_sg, len); -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_hash_update: %d", ret); -+ return ret; -+ } -+ } ++ length_ori = readl_relaxed(rv1106_cru_base + length_offset) & PVTPLL_LENGTH_SEL_MASK; ++ length = length_ori; ++ length++; ++ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); ++ writel_relaxed(val, rv1106_cru_base + length_offset); ++ usleep_range(2000, 2100); ++ rate1 = readl_relaxed(rv1106_cru_base + count_offset); ++ if ((rate1 < target_rate) || (rate1 >= rate0)) ++ return; ++ if (abs(rate1 - target_rate) < (target_rate >> 5)) ++ return; + -+ ret = cryptodev_hash_final(&ses_ptr->hdata, hash_output); -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_hash_final: %d", ret); -+ return ret; -+ } ++ step = rate0 - rate1; ++ delta = rate1 - target_rate; ++ length += delta / step; ++ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); ++ writel_relaxed(val, rv1106_cru_base + length_offset); ++ usleep_range(2000, 2100); ++ rate0 = readl_relaxed(rv1106_cru_base + count_offset); + -+ if (memcmp(vhash, hash_output, caop->tag_len) != 0 || fail != 0) { -+ derr(2, "MAC verification failed (tag_len: %d)", caop->tag_len); -+ return -EBADMSG; -+ } -+ } ++ while (abs(rate0 - target_rate) >= (target_rate >> 5)) { ++ if (i++ > 20) ++ break; ++ if (rate0 > target_rate) ++ length++; ++ else ++ length--; ++ if (length <= length_ori) ++ break; ++ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); ++ writel_relaxed(val, rv1106_cru_base + length_offset); ++ usleep_range(2000, 2100); ++ rate0 = readl_relaxed(rv1106_cru_base + count_offset); + } -+ kcaop->dst_len = len; -+ return 0; +} + -+/* Authenticate and encrypt the SRTP way. During decryption -+ * it verifies the tag and returns -EBADMSG on error. -+ */ -+static int -+srtp_auth_n_crypt(struct csession *ses_ptr, struct kernel_crypt_auth_op *kcaop, -+ struct scatterlist *auth_sg, uint32_t auth_len, -+ struct scatterlist *dst_sg, uint32_t len) ++static void _grf_pvtpll_calibrate(int count_offset, int length_offset, int target_rate) +{ -+ int ret, fail = 0; -+ struct crypt_auth_op *caop = &kcaop->caop; -+ uint8_t vhash[AALG_MAX_RESULT_LEN]; -+ uint8_t hash_output[AALG_MAX_RESULT_LEN]; ++ unsigned int rate0, rate1, delta, length_ori, length, step, val, i = 0; + -+ /* SRTP authenticates the encrypted data. -+ */ -+ if (caop->op == COP_ENCRYPT) { -+ if (ses_ptr->cdata.init != 0) { -+ ret = cryptodev_cipher_encrypt(&ses_ptr->cdata, -+ dst_sg, dst_sg, len); -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_cipher_encrypt: %d", ret); -+ return ret; -+ } -+ } ++ regmap_read(cru_ctx->grf, count_offset, &rate0); ++ if (rate0 < target_rate) ++ return; ++ /* delta < (3.125% * target_rate) */ ++ if ((rate0 - target_rate) < (target_rate >> 5)) ++ return; + -+ if (ses_ptr->hdata.init != 0) { -+ if (auth_len > 0) { -+ ret = cryptodev_hash_update(&ses_ptr->hdata, -+ auth_sg, auth_len); -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_hash_update: %d", ret); -+ return ret; -+ } -+ } ++ regmap_read(cru_ctx->grf, length_offset, &length_ori); ++ length = length_ori; ++ length_ori = length; ++ length &= PVTPLL_LENGTH_SEL_MASK; ++ length++; ++ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); ++ regmap_write(cru_ctx->grf, length_offset, val); ++ usleep_range(2000, 2100); ++ regmap_read(cru_ctx->grf, count_offset, &rate1); ++ if ((rate1 < target_rate) || (rate1 >= rate0)) ++ return; ++ if (abs(rate1 - target_rate) < (target_rate >> 5)) ++ return; + -+ ret = cryptodev_hash_final(&ses_ptr->hdata, hash_output); -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_hash_final: %d", ret); -+ return ret; -+ } ++ step = rate0 - rate1; ++ delta = rate1 - target_rate; ++ length += delta / step; ++ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); ++ regmap_write(cru_ctx->grf, length_offset, val); ++ usleep_range(2000, 2100); ++ regmap_read(cru_ctx->grf, count_offset, &rate0); + -+ if (unlikely(copy_to_user(caop->tag, hash_output, caop->tag_len))) -+ return -EFAULT; -+ } ++ while (abs(rate0 - target_rate) >= (target_rate >> 5)) { ++ if (i++ > 20) ++ break; ++ if (rate0 > target_rate) ++ length++; ++ else ++ length--; ++ if (length <= length_ori) ++ break; ++ val = HIWORD_UPDATE(length, PVTPLL_LENGTH_SEL_MASK, PVTPLL_LENGTH_SEL_SHIFT); ++ regmap_write(cru_ctx->grf, length_offset, val); ++ usleep_range(2000, 2100); ++ regmap_read(cru_ctx->grf, count_offset, &rate0); ++ } ++} + -+ } else { -+ if (ses_ptr->hdata.init != 0) { -+ if (unlikely(caop->tag_len > sizeof(vhash) || caop->tag_len > len)) { -+ derr(1, "Illegal tag len size"); -+ return -EINVAL; -+ } ++static void rockchip_rv1106_pvtpll_calibrate(struct work_struct *w) ++{ ++ struct clk *clk; ++ unsigned long rate; + -+ if (unlikely(copy_from_user(vhash, caop->tag, caop->tag_len))) -+ return -EFAULT; ++ clk = __clk_lookup("clk_pvtpll_0"); ++ if (clk) { ++ rate = clk_get_rate(clk); ++ _cru_pvtpll_calibrate(CRU_PVTPLL0_OSC_CNT_AVG, ++ CRU_PVTPLL0_CON0_H, rate / 1000000); ++ } + -+ ret = cryptodev_hash_update(&ses_ptr->hdata, -+ auth_sg, auth_len); -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_hash_update: %d", ret); -+ return ret; -+ } ++ clk = __clk_lookup("clk_pvtpll_1"); ++ if (clk) { ++ rate = clk_get_rate(clk); ++ _cru_pvtpll_calibrate(CRU_PVTPLL1_OSC_CNT_AVG, ++ CRU_PVTPLL1_CON0_H, rate / 1000000); ++ } + -+ ret = cryptodev_hash_final(&ses_ptr->hdata, hash_output); -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_hash_final: %d", ret); -+ return ret; -+ } ++ clk = __clk_lookup("cpu_pvtpll"); ++ if (clk) { ++ rate = clk_get_rate(clk); ++ _grf_pvtpll_calibrate(CPU_PVTPLL_OSC_CNT_AVG, ++ CPU_PVTPLL_CON0_H, rate / 1000000); ++ } ++} ++static DECLARE_DEFERRABLE_WORK(pvtpll_calibrate_work, rockchip_rv1106_pvtpll_calibrate); + -+ if (memcmp(vhash, hash_output, caop->tag_len) != 0 || fail != 0) { -+ derr(2, "MAC verification failed"); -+ return -EBADMSG; -+ } -+ } ++static void rockchip_rv1106_pvtpll_init(struct rockchip_clk_provider *ctx) ++{ ++ /* set pvtpll ref clk mux */ ++ writel_relaxed(CPU_PVTPLL_PATH_CORE, ctx->reg_base + CPU_CLK_PATH_BASE); + -+ if (ses_ptr->cdata.init != 0) { -+ ret = cryptodev_cipher_decrypt(&ses_ptr->cdata, -+ dst_sg, dst_sg, len); ++ regmap_write(ctx->grf, CPU_PVTPLL_CON0_H, HIWORD_UPDATE(0x7, PVTPLL_LENGTH_SEL_MASK, ++ PVTPLL_LENGTH_SEL_SHIFT)); ++ regmap_write(ctx->grf, CPU_PVTPLL_CON0_L, HIWORD_UPDATE(0x1, PVTPLL_RING_SEL_MASK, ++ PVTPLL_RING_SEL_SHIFT)); ++ regmap_write(ctx->grf, CPU_PVTPLL_CON0_L, HIWORD_UPDATE(0x3, PVTPLL_EN_MASK, ++ PVTPLL_EN_SHIFT)); + -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_cipher_decrypt: %d", ret); -+ return ret; -+ } -+ } ++ writel_relaxed(0x007f0000, ctx->reg_base + CRU_PVTPLL0_CON0_H); ++ writel_relaxed(0xffff0018, ctx->reg_base + CRU_PVTPLL0_CON1_L); ++ writel_relaxed(0xffff0004, ctx->reg_base + CRU_PVTPLL0_CON2_H); ++ writel_relaxed(0x00030003, ctx->reg_base + CRU_PVTPLL0_CON0_L); + -+ } -+ kcaop->dst_len = len; -+ return 0; ++ writel_relaxed(0x007f0000, ctx->reg_base + CRU_PVTPLL1_CON0_H); ++ writel_relaxed(0xffff0018, ctx->reg_base + CRU_PVTPLL1_CON1_L); ++ writel_relaxed(0xffff0004, ctx->reg_base + CRU_PVTPLL1_CON2_H); ++ writel_relaxed(0x00030003, ctx->reg_base + CRU_PVTPLL1_CON0_L); ++ ++ schedule_delayed_work(&pvtpll_calibrate_work, msecs_to_jiffies(3000)); +} + -+static int rk_auth_n_crypt(struct csession *ses_ptr, struct kernel_crypt_auth_op *kcaop, -+ struct scatterlist *auth_sg, uint32_t auth_len, -+ struct scatterlist *src_sg, -+ struct scatterlist *dst_sg, uint32_t len) ++static int rv1106_clk_panic(struct notifier_block *this, ++ unsigned long ev, void *ptr) +{ -+ int ret; -+ struct crypt_auth_op *caop = &kcaop->caop; -+ int max_tag_len; ++ rv1106_dump_cru(); ++ return NOTIFY_DONE; ++} + -+ max_tag_len = cryptodev_cipher_get_tag_size(&ses_ptr->cdata); -+ if (unlikely(caop->tag_len > max_tag_len)) { -+ derr(0, "Illegal tag length: %d", caop->tag_len); -+ return -EINVAL; -+ } ++static struct notifier_block rv1106_clk_panic_block = { ++ .notifier_call = rv1106_clk_panic, ++}; + -+ if (caop->tag_len) -+ cryptodev_cipher_set_tag_size(&ses_ptr->cdata, caop->tag_len); -+ else -+ caop->tag_len = max_tag_len; ++static void __init rv1106_clk_init(struct device_node *np) ++{ ++ struct rockchip_clk_provider *ctx; ++ void __iomem *reg_base; ++ struct clk **cru_clks; + -+ cryptodev_cipher_auth(&ses_ptr->cdata, auth_sg, auth_len); ++ reg_base = of_iomap(np, 0); ++ if (!reg_base) { ++ pr_err("%s: could not map cru region\n", __func__); ++ return; ++ } + -+ if (caop->op == COP_ENCRYPT) { -+ ret = cryptodev_cipher_encrypt(&ses_ptr->cdata, src_sg, dst_sg, len); -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_cipher_encrypt: %d", ret); -+ return ret; -+ } -+ } else { -+ ret = cryptodev_cipher_decrypt(&ses_ptr->cdata, src_sg, dst_sg, len); -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_cipher_decrypt: %d", ret); -+ return ret; -+ } ++ rv1106_cru_base = reg_base; ++ ++ ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); ++ if (IS_ERR(ctx)) { ++ pr_err("%s: rockchip clk init failed\n", __func__); ++ iounmap(reg_base); ++ return; + } ++ cru_ctx = ctx; + -+ return 0; -+} ++ rockchip_rv1106_pvtpll_init(ctx); + -+/* Typical AEAD (i.e. GCM) encryption/decryption. -+ * During decryption the tag is verified. -+ */ -+static int -+auth_n_crypt(struct csession *ses_ptr, struct kernel_crypt_auth_op *kcaop, -+ struct scatterlist *auth_sg, uint32_t auth_len, -+ struct scatterlist *src_sg, -+ struct scatterlist *dst_sg, uint32_t len) -+{ -+ int ret; -+ struct crypt_auth_op *caop = &kcaop->caop; -+ int max_tag_len; ++ cru_clks = ctx->clk_data.clks; + -+ max_tag_len = cryptodev_cipher_get_tag_size(&ses_ptr->cdata); -+ if (unlikely(caop->tag_len > max_tag_len)) { -+ derr(0, "Illegal tag length: %d", caop->tag_len); -+ return -EINVAL; -+ } ++ rockchip_clk_register_plls(ctx, rv1106_pll_clks, ++ ARRAY_SIZE(rv1106_pll_clks), ++ RV1106_GRF_SOC_STATUS0); + -+ if (caop->tag_len) -+ cryptodev_cipher_set_tag_size(&ses_ptr->cdata, caop->tag_len); -+ else -+ caop->tag_len = max_tag_len; ++ rockchip_clk_register_armclk(ctx, ARMCLK, "armclk", ++ 3, cru_clks[PLL_APLL], cru_clks[PLL_GPLL], ++ &rv1106_cpuclk_data, rv1106_cpuclk_rates, ++ ARRAY_SIZE(rv1106_cpuclk_rates)); + -+ cryptodev_cipher_auth(&ses_ptr->cdata, auth_sg, auth_len); ++ rockchip_clk_register_branches(ctx, rv1106_clk_branches, ++ ARRAY_SIZE(rv1106_clk_branches)); + -+ if (caop->op == COP_ENCRYPT) { -+ ret = cryptodev_cipher_encrypt(&ses_ptr->cdata, -+ src_sg, dst_sg, len); -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_cipher_encrypt: %d", ret); -+ return ret; -+ } -+ kcaop->dst_len = len + caop->tag_len; -+ caop->tag = caop->dst + len; -+ } else { -+ ret = cryptodev_cipher_decrypt(&ses_ptr->cdata, -+ src_sg, dst_sg, len); ++ rockchip_clk_register_branches(grf_ctx, rv1106_grf_clk_branches, ++ ARRAY_SIZE(rv1106_grf_clk_branches)); + -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_cipher_decrypt: %d", ret); -+ return ret; -+ } -+ kcaop->dst_len = len - caop->tag_len; -+ caop->tag = caop->dst + len - caop->tag_len; -+ } ++ rockchip_register_softrst(np, 31745, reg_base + RV1106_PMUSOFTRST_CON(0), ++ ROCKCHIP_SOFTRST_HIWORD_MASK); + -+ return 0; ++ rockchip_register_restart_notifier(ctx, RV1106_GLB_SRST_FST, NULL); ++ ++ rockchip_clk_of_add_provider(np, ctx); ++ ++ atomic_notifier_chain_register(&panic_notifier_list, ++ &rv1106_clk_panic_block); +} + -+static int crypto_auth_zc_srtp(struct csession *ses_ptr, struct kernel_crypt_auth_op *kcaop) ++CLK_OF_DECLARE(rv1106_cru, "rockchip,rv1106-cru", rv1106_clk_init); ++ ++static void __init rv1106_grf_clk_init(struct device_node *np) +{ -+ struct scatterlist *dst_sg, *auth_sg; -+ struct crypt_auth_op *caop = &kcaop->caop; -+ int ret; ++ struct rockchip_clk_provider *ctx; ++ void __iomem *reg_base; + -+ if (unlikely(ses_ptr->cdata.init != 0 && -+ (ses_ptr->cdata.stream == 0 || ses_ptr->cdata.aead != 0))) { -+ derr(0, "Only stream modes are allowed in SRTP mode (but not AEAD)"); -+ return -EINVAL; ++ reg_base = of_iomap(of_get_parent(np), 0); ++ if (!reg_base) { ++ pr_err("%s: could not map cru grf region\n", __func__); ++ return; + } + -+ ret = get_userbuf_srtp(ses_ptr, kcaop, &auth_sg, &dst_sg); -+ if (unlikely(ret)) { -+ derr(1, "get_userbuf_srtp(): Error getting user pages."); -+ return ret; ++ ctx = rockchip_clk_init(np, reg_base, CLK_NR_GRF_CLKS); ++ if (IS_ERR(ctx)) { ++ pr_err("%s: rockchip grf clk init failed\n", __func__); ++ return; + } ++ grf_ctx = ctx; + -+ ret = srtp_auth_n_crypt(ses_ptr, kcaop, auth_sg, caop->auth_len, -+ dst_sg, caop->len); ++ rockchip_clk_of_add_provider(np, ctx); ++} ++CLK_OF_DECLARE(rv1106_grf_cru, "rockchip,rv1106-grf-cru", rv1106_grf_clk_init); + -+ cryptodev_release_user_pages(ses_ptr); ++#ifdef MODULE ++struct clk_rv1106_inits { ++ void (*inits)(struct device_node *np); ++}; + -+ return ret; -+} ++static const struct clk_rv1106_inits clk_rv1106_init = { ++ .inits = rv1106_clk_init, ++}; + -+static int crypto_auth_zc_tls(struct csession *ses_ptr, struct kernel_crypt_auth_op *kcaop) ++static const struct clk_rv1106_inits clk_rv1106_grf_init = { ++ .inits = rv1106_grf_clk_init, ++}; ++ ++static const struct of_device_id clk_rv1106_match_table[] = { ++ { ++ .compatible = "rockchip,rv1106-cru", ++ .data = &clk_rv1106_init, ++ }, { ++ .compatible = "rockchip,rv1106-grf-cru", ++ .data = &clk_rv1106_grf_init, ++ }, ++ { } ++}; ++MODULE_DEVICE_TABLE(of, clk_rv1106_match_table); ++ ++static int __init clk_rv1106_probe(struct platform_device *pdev) +{ -+ struct crypt_auth_op *caop = &kcaop->caop; -+ struct scatterlist *dst_sg, *auth_sg; -+ unsigned char *auth_buf = NULL; -+ struct scatterlist tmp; -+ int ret; ++ struct device_node *np = pdev->dev.of_node; ++ const struct of_device_id *match; ++ const struct clk_rv1106_inits *init_data; + -+ if (unlikely(caop->auth_len > PAGE_SIZE)) { -+ derr(1, "auth data len is excessive."); ++ match = of_match_device(clk_rv1106_match_table, &pdev->dev); ++ if (!match || !match->data) + return -EINVAL; -+ } + -+ auth_buf = (char *)__get_free_page(GFP_KERNEL); -+ if (unlikely(!auth_buf)) { -+ derr(1, "unable to get a free page."); -+ return -ENOMEM; -+ } ++ init_data = match->data; ++ if (init_data->inits) ++ init_data->inits(np); + -+ if (caop->auth_src && caop->auth_len > 0) { -+ if (unlikely(copy_from_user(auth_buf, caop->auth_src, caop->auth_len))) { -+ derr(1, "unable to copy auth data from userspace."); -+ ret = -EFAULT; -+ goto free_auth_buf; -+ } ++ return 0; ++} + -+ sg_init_one(&tmp, auth_buf, caop->auth_len); -+ auth_sg = &tmp; -+ } else { -+ auth_sg = NULL; -+ } ++static struct platform_driver clk_rv1106_driver = { ++ .driver = { ++ .name = "clk-rv1106", ++ .of_match_table = clk_rv1106_match_table, ++ }, ++}; ++builtin_platform_driver_probe(clk_rv1106_driver, clk_rv1106_probe); + -+ ret = get_userbuf_tls(ses_ptr, kcaop, &dst_sg); -+ if (unlikely(ret)) { -+ derr(1, "get_userbuf_tls(): Error getting user pages."); -+ goto free_auth_buf; -+ } ++MODULE_DESCRIPTION("Rockchip RV1106 Clock Driver"); ++MODULE_LICENSE("GPL"); ++#endif /* MODULE */ +diff --git a/drivers/clk/rockchip/regmap/Kconfig b/drivers/clk/rockchip/regmap/Kconfig +new file mode 100644 +index 000000000..65f691bc4 +--- /dev/null ++++ b/drivers/clk/rockchip/regmap/Kconfig +@@ -0,0 +1,16 @@ ++# SPDX-License-Identifier: GPL-2.0 + -+ ret = tls_auth_n_crypt(ses_ptr, kcaop, auth_sg, caop->auth_len, -+ dst_sg, caop->len); -+ cryptodev_release_user_pages(ses_ptr); ++config COMMON_CLK_ROCKCHIP_REGMAP ++ tristate + -+free_auth_buf: -+ free_page((unsigned long)auth_buf); -+ return ret; -+} ++config CLK_RK618 ++ tristate "Clock driver for Rockchip RK618" ++ depends on MFD_RK618 ++ default MFD_RK618 ++ select COMMON_CLK_ROCKCHIP_REGMAP + -+static int crypto_auth_zc_aead(struct csession *ses_ptr, struct kernel_crypt_auth_op *kcaop) -+{ -+ struct scatterlist *dst_sg; -+ struct scatterlist *src_sg; -+ struct crypt_auth_op *caop = &kcaop->caop; -+ unsigned char *auth_buf = NULL; -+ int ret; ++config CLK_RK628 ++ tristate "Clock driver for Rockchip RK628" ++ depends on MFD_RK628 ++ default MFD_RK628 ++ select COMMON_CLK_ROCKCHIP_REGMAP +diff --git a/drivers/clk/rockchip/regmap/Makefile b/drivers/clk/rockchip/regmap/Makefile +new file mode 100644 +index 000000000..18d075d09 +--- /dev/null ++++ b/drivers/clk/rockchip/regmap/Makefile +@@ -0,0 +1,13 @@ ++# SPDX-License-Identifier: GPL-2.0 + -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)) -+ struct scatterlist tmp; -+ struct scatterlist *auth_sg; -+#else -+ struct scatterlist auth1[2]; -+ struct scatterlist auth2[2]; -+#endif ++obj-$(CONFIG_COMMON_CLK_ROCKCHIP_REGMAP) += clk-rockchip-regmap.o + -+ if (unlikely(ses_ptr->cdata.init == 0 || -+ (ses_ptr->cdata.stream == 0 && ses_ptr->cdata.aead == 0))) { -+ derr(0, "Only stream and AEAD ciphers are allowed for authenc"); -+ return -EINVAL; -+ } ++clk-rockchip-regmap-objs := clk-regmap-mux.o \ ++ clk-regmap-divider.o \ ++ clk-regmap-gate.o \ ++ clk-regmap-fractional-divider.o \ ++ clk-regmap-composite.o \ ++ clk-regmap-pll.o + -+ if (unlikely(caop->auth_len > PAGE_SIZE)) { -+ derr(1, "auth data len is excessive."); -+ return -EINVAL; -+ } ++obj-$(CONFIG_CLK_RK618) += clk-rk618.o ++obj-$(CONFIG_CLK_RK628) += clk-rk628.o +diff --git a/drivers/clk/rockchip/regmap/clk-regmap-composite.c b/drivers/clk/rockchip/regmap/clk-regmap-composite.c +new file mode 100644 +index 000000000..43d2b9a45 +--- /dev/null ++++ b/drivers/clk/rockchip/regmap/clk-regmap-composite.c +@@ -0,0 +1,400 @@ ++/* ++ * Copyright (c) 2017 Rockchip Electronics Co. Ltd. ++ * ++ * Base on code in drivers/clk/clk-composite.c. ++ * See clk-composite.c for further copyright information. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ + -+ auth_buf = (char *)__get_free_page(GFP_KERNEL); -+ if (unlikely(!auth_buf)) { -+ derr(1, "unable to get a free page."); -+ return -ENOMEM; -+ } ++#include "clk-regmap.h" + -+ ret = cryptodev_get_userbuf(ses_ptr, caop->src, caop->len, caop->dst, kcaop->dst_len, -+ kcaop->task, kcaop->mm, &src_sg, &dst_sg); -+ if (unlikely(ret)) { -+ derr(1, "get_userbuf(): Error getting user pages."); -+ goto free_auth_buf; -+ } ++struct clk_regmap_composite { ++ struct device *dev; ++ struct clk_hw hw; ++ struct clk_ops ops; + -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)) -+ if (caop->auth_src && caop->auth_len > 0) { -+ if (unlikely(copy_from_user(auth_buf, caop->auth_src, caop->auth_len))) { -+ derr(1, "unable to copy auth data from userspace."); -+ ret = -EFAULT; -+ goto free_pages; -+ } ++ struct clk_hw *mux_hw; ++ struct clk_hw *rate_hw; ++ struct clk_hw *gate_hw; + -+ sg_init_one(&tmp, auth_buf, caop->auth_len); -+ auth_sg = &tmp; -+ } else { -+ auth_sg = NULL; -+ } ++ const struct clk_ops *mux_ops; ++ const struct clk_ops *rate_ops; ++ const struct clk_ops *gate_ops; ++}; + -+ ret = auth_n_crypt(ses_ptr, kcaop, auth_sg, caop->auth_len, -+ src_sg, dst_sg, caop->len); -+#else -+ if (caop->auth_src && caop->auth_len > 0) { -+ if (unlikely(copy_from_user(auth_buf, caop->auth_src, caop->auth_len))) { -+ derr(1, "unable to copy auth data from userspace."); -+ ret = -EFAULT; -+ goto free_pages; -+ } ++#define to_clk_regmap_composite(_hw) \ ++ container_of(_hw, struct clk_regmap_composite, hw) + -+ sg_init_table(auth1, 2); -+ sg_set_buf(auth1, auth_buf, caop->auth_len); -+ sg_chain(auth1, 2, src_sg); ++static u8 clk_regmap_composite_get_parent(struct clk_hw *hw) ++{ ++ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); ++ const struct clk_ops *mux_ops = composite->mux_ops; ++ struct clk_hw *mux_hw = composite->mux_hw; + -+ if (src_sg == dst_sg) { -+ src_sg = auth1; -+ dst_sg = auth1; -+ } else { -+ sg_init_table(auth2, 2); -+ sg_set_buf(auth2, auth_buf, caop->auth_len); -+ sg_chain(auth2, 2, dst_sg); -+ src_sg = auth1; -+ dst_sg = auth2; -+ } -+ } ++ __clk_hw_set_clk(mux_hw, hw); + -+ ret = auth_n_crypt(ses_ptr, kcaop, NULL, caop->auth_len, -+ src_sg, dst_sg, caop->len); -+#endif ++ return mux_ops->get_parent(mux_hw); ++} + -+free_pages: -+ cryptodev_release_user_pages(ses_ptr); ++static int clk_regmap_composite_set_parent(struct clk_hw *hw, u8 index) ++{ ++ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); ++ const struct clk_ops *mux_ops = composite->mux_ops; ++ struct clk_hw *mux_hw = composite->mux_hw; + -+free_auth_buf: -+ free_page((unsigned long)auth_buf); ++ __clk_hw_set_clk(mux_hw, hw); + -+ return ret; ++ return mux_ops->set_parent(mux_hw, index); +} + -+/* Chain two sglists together. It will keep the last nent of priv -+ * and invalidate the first nent of sgl -+ */ -+static struct scatterlist *sg_copy_chain(struct scatterlist *prv, -+ unsigned int prv_nents, -+ struct scatterlist *sgl) ++static unsigned long clk_regmap_composite_recalc_rate(struct clk_hw *hw, ++ unsigned long parent_rate) +{ -+ struct scatterlist *sg_tmp = sg_last(prv, prv_nents); ++ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); ++ const struct clk_ops *rate_ops = composite->rate_ops; ++ struct clk_hw *rate_hw = composite->rate_hw; + -+ sg_set_page(sgl, sg_page(sg_tmp), sg_tmp->length, sg_tmp->offset); ++ __clk_hw_set_clk(rate_hw, hw); + -+ if (prv_nents > 1) { -+ sg_chain(prv, prv_nents, sgl); -+ return prv; -+ } else { -+ return sgl; -+ } ++ return rate_ops->recalc_rate(rate_hw, parent_rate); +} + -+static int crypto_auth_zc_rk(struct csession *ses_ptr, struct kernel_crypt_auth_op *kcaop) ++static int clk_regmap_composite_determine_rate(struct clk_hw *hw, ++ struct clk_rate_request *req) +{ -+ struct scatterlist *dst; -+ struct scatterlist *src; -+ struct scatterlist *dst_sg; -+ struct scatterlist *src_sg; -+ struct crypt_auth_op *caop = &kcaop->caop; -+ unsigned char *auth_buf = NULL, *tag_buf = NULL; -+ struct scatterlist auth_src[2], auth_dst[2], tag[3]; -+ int ret; -+ -+ if (unlikely(ses_ptr->cdata.init == 0 || -+ (ses_ptr->cdata.stream == 0 && ses_ptr->cdata.aead == 0))) { -+ derr(0, "Only stream and AEAD ciphers are allowed for authenc"); -+ return -EINVAL; -+ } ++ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); ++ const struct clk_ops *rate_ops = composite->rate_ops; ++ const struct clk_ops *mux_ops = composite->mux_ops; ++ struct clk_hw *rate_hw = composite->rate_hw; ++ struct clk_hw *mux_hw = composite->mux_hw; ++ struct clk_hw *parent; ++ unsigned long parent_rate; ++ long tmp_rate, best_rate = 0; ++ unsigned long rate_diff; ++ unsigned long best_rate_diff = ULONG_MAX; ++ long rate; ++ unsigned int i; + -+ if (unlikely(caop->auth_len > PAGE_SIZE)) { -+ derr(1, "auth data len is excessive."); -+ return -EINVAL; -+ } ++ if (rate_hw && rate_ops && rate_ops->determine_rate) { ++ __clk_hw_set_clk(rate_hw, hw); ++ return rate_ops->determine_rate(rate_hw, req); ++ } else if (rate_hw && rate_ops && rate_ops->round_rate && ++ mux_hw && mux_ops && mux_ops->set_parent) { ++ req->best_parent_hw = NULL; + -+ ret = cryptodev_get_userbuf(ses_ptr, caop->src, caop->len, -+ caop->dst, kcaop->dst_len, -+ kcaop->task, kcaop->mm, &src_sg, &dst_sg); -+ if (unlikely(ret)) { -+ derr(1, "get_userbuf(): Error getting user pages."); -+ ret = -EFAULT; -+ goto exit; -+ } ++ if (clk_hw_get_flags(hw) & CLK_SET_RATE_NO_REPARENT) { ++ parent = clk_hw_get_parent(mux_hw); ++ req->best_parent_hw = parent; ++ req->best_parent_rate = clk_hw_get_rate(parent); + -+ dst = dst_sg; -+ src = src_sg; ++ rate = rate_ops->round_rate(rate_hw, req->rate, ++ &req->best_parent_rate); ++ if (rate < 0) ++ return rate; + -+ /* chain tag */ -+ if (caop->tag && caop->tag_len > 0) { -+ tag_buf = kcalloc(caop->tag_len, sizeof(*tag_buf), GFP_KERNEL); -+ if (unlikely(!tag_buf)) { -+ derr(1, "unable to kcalloc %d.", caop->tag_len); -+ ret = -EFAULT; -+ goto free_pages; ++ req->rate = rate; ++ return 0; + } + -+ if (unlikely(copy_from_user(tag_buf, caop->tag, caop->tag_len))) { -+ derr(1, "unable to copy tag data from userspace."); -+ ret = -EFAULT; -+ goto free_pages; -+ } ++ for (i = 0; i < clk_hw_get_num_parents(mux_hw); i++) { ++ parent = clk_hw_get_parent_by_index(mux_hw, i); ++ if (!parent) ++ continue; + -+ sg_init_table(tag, ARRAY_SIZE(tag)); -+ sg_set_buf(&tag[1], tag_buf, caop->tag_len); ++ parent_rate = clk_hw_get_rate(parent); + -+ /* Since the sg_chain() requires the last sg in the list is empty and -+ * used for link information, we can not directly link src/dst_sg to tags -+ */ -+ if (caop->op == COP_ENCRYPT) -+ dst = sg_copy_chain(dst_sg, sg_nents(dst_sg), tag); -+ else -+ src = sg_copy_chain(src_sg, sg_nents(src_sg), tag); -+ } ++ tmp_rate = rate_ops->round_rate(rate_hw, req->rate, ++ &parent_rate); ++ if (tmp_rate < 0) ++ continue; + -+ /* chain auth */ -+ auth_buf = (char *)__get_free_page(GFP_KERNEL); -+ if (unlikely(!auth_buf)) { -+ derr(1, "unable to get a free page."); -+ ret = -EFAULT; -+ goto free_pages; -+ } ++ rate_diff = abs(req->rate - tmp_rate); + -+ if (caop->auth_src && caop->auth_len > 0) { -+ if (unlikely(copy_from_user(auth_buf, caop->auth_src, caop->auth_len))) { -+ derr(1, "unable to copy auth data from userspace."); -+ ret = -EFAULT; -+ goto free_pages; -+ } ++ if (!rate_diff || !req->best_parent_hw || ++ best_rate_diff > rate_diff) { ++ req->best_parent_hw = parent; ++ req->best_parent_rate = parent_rate; ++ best_rate_diff = rate_diff; ++ best_rate = tmp_rate; ++ } + -+ sg_init_table(auth_src, ARRAY_SIZE(auth_src)); -+ sg_set_buf(auth_src, auth_buf, caop->auth_len); -+ sg_init_table(auth_dst, ARRAY_SIZE(auth_dst)); -+ sg_set_buf(auth_dst, auth_buf, caop->auth_len); ++ if (!rate_diff) ++ return 0; ++ } + -+ sg_chain(auth_src, 2, src); -+ sg_chain(auth_dst, 2, dst); -+ src = auth_src; -+ dst = auth_dst; ++ req->rate = best_rate; ++ return 0; ++ } else if (mux_hw && mux_ops && mux_ops->determine_rate) { ++ __clk_hw_set_clk(mux_hw, hw); ++ return mux_ops->determine_rate(mux_hw, req); ++ } else { ++ return -EINVAL; + } + -+ if (caop->op == COP_ENCRYPT) -+ ret = rk_auth_n_crypt(ses_ptr, kcaop, NULL, caop->auth_len, -+ src, dst, caop->len); -+ else -+ ret = rk_auth_n_crypt(ses_ptr, kcaop, NULL, caop->auth_len, -+ src, dst, caop->len + caop->tag_len); ++ return 0; ++} + -+ if (!ret && caop->op == COP_ENCRYPT) { -+ if (unlikely(copy_to_user(kcaop->caop.tag, tag_buf, caop->tag_len))) { -+ derr(1, "Error in copying to userspace"); -+ ret = -EFAULT; -+ goto free_pages; -+ } -+ } ++static long clk_regmap_composite_round_rate(struct clk_hw *hw, ++ unsigned long rate, ++ unsigned long *prate) ++{ ++ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); ++ const struct clk_ops *rate_ops = composite->rate_ops; ++ struct clk_hw *rate_hw = composite->rate_hw; + -+free_pages: -+ cryptodev_release_user_pages(ses_ptr); ++ __clk_hw_set_clk(rate_hw, hw); + -+exit: -+ if (auth_buf) -+ free_page((unsigned long)auth_buf); ++ return rate_ops->round_rate(rate_hw, rate, prate); ++} + -+ kfree(tag_buf); ++static int clk_regmap_composite_set_rate(struct clk_hw *hw, ++ unsigned long rate, ++ unsigned long parent_rate) ++{ ++ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); ++ const struct clk_ops *rate_ops = composite->rate_ops; ++ struct clk_hw *rate_hw = composite->rate_hw; + -+ return ret; ++ __clk_hw_set_clk(rate_hw, hw); ++ ++ return rate_ops->set_rate(rate_hw, rate, parent_rate); +} + -+static int -+__crypto_auth_run_zc(struct csession *ses_ptr, struct kernel_crypt_auth_op *kcaop) ++static int clk_regmap_composite_is_prepared(struct clk_hw *hw) +{ -+ struct crypt_auth_op *caop = &kcaop->caop; -+ int ret; ++ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); ++ const struct clk_ops *gate_ops = composite->gate_ops; ++ struct clk_hw *gate_hw = composite->gate_hw; + -+ if (caop->flags & COP_FLAG_AEAD_SRTP_TYPE) { -+ ret = crypto_auth_zc_srtp(ses_ptr, kcaop); -+ } else if (caop->flags & COP_FLAG_AEAD_TLS_TYPE && -+ ses_ptr->cdata.aead == 0) { -+ ret = crypto_auth_zc_tls(ses_ptr, kcaop); -+ } else if (caop->flags & COP_FLAG_AEAD_RK_TYPE && -+ ses_ptr->cdata.aead) { -+ ret = crypto_auth_zc_rk(ses_ptr, kcaop); -+ } else if (ses_ptr->cdata.aead) { -+ ret = crypto_auth_zc_aead(ses_ptr, kcaop); -+ } else { -+ ret = -EINVAL; -+ } ++ __clk_hw_set_clk(gate_hw, hw); + -+ return ret; ++ return gate_ops->is_prepared(gate_hw); +} + ++static int clk_regmap_composite_prepare(struct clk_hw *hw) ++{ ++ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); ++ const struct clk_ops *gate_ops = composite->gate_ops; ++ struct clk_hw *gate_hw = composite->gate_hw; + -+int crypto_auth_run(struct fcrypt *fcr, struct kernel_crypt_auth_op *kcaop) ++ __clk_hw_set_clk(gate_hw, hw); ++ ++ return gate_ops->prepare(gate_hw); ++} ++ ++static void clk_regmap_composite_unprepare(struct clk_hw *hw) +{ -+ struct csession *ses_ptr; -+ struct crypt_auth_op *caop = &kcaop->caop; -+ int ret; ++ struct clk_regmap_composite *composite = to_clk_regmap_composite(hw); ++ const struct clk_ops *gate_ops = composite->gate_ops; ++ struct clk_hw *gate_hw = composite->gate_hw; + -+ if (unlikely(caop->op != COP_ENCRYPT && caop->op != COP_DECRYPT)) { -+ ddebug(1, "invalid operation op=%u", caop->op); -+ return -EINVAL; -+ } ++ __clk_hw_set_clk(gate_hw, hw); + -+ /* this also enters ses_ptr->sem */ -+ ses_ptr = crypto_get_session_by_sid(fcr, caop->ses); -+ if (unlikely(!ses_ptr)) { -+ derr(1, "invalid session ID=0x%08X", caop->ses); -+ return -EINVAL; ++ gate_ops->unprepare(gate_hw); ++} ++ ++struct clk * ++devm_clk_regmap_register_composite(struct device *dev, const char *name, ++ const char *const *parent_names, ++ u8 num_parents, struct regmap *regmap, ++ u32 mux_reg, u8 mux_shift, u8 mux_width, ++ u32 div_reg, u8 div_shift, u8 div_width, ++ u8 div_flags, ++ u32 gate_reg, u8 gate_shift, ++ unsigned long flags) ++{ ++ struct clk_regmap_gate *gate = NULL; ++ struct clk_regmap_mux *mux = NULL; ++ struct clk_regmap_divider *div = NULL; ++ struct clk_regmap_fractional_divider *fd = NULL; ++ const struct clk_ops *mux_ops = NULL, *div_ops = NULL, *gate_ops = NULL; ++ const struct clk_ops *fd_ops = NULL; ++ struct clk_hw *mux_hw = NULL, *div_hw = NULL, *gate_hw = NULL; ++ struct clk_hw *fd_hw = NULL; ++ struct clk *clk; ++ struct clk_init_data init = {}; ++ struct clk_regmap_composite *composite; ++ struct clk_ops *clk_composite_ops; ++ ++ if (num_parents > 1) { ++ mux = devm_kzalloc(dev, sizeof(*mux), GFP_KERNEL); ++ if (!mux) ++ return ERR_PTR(-ENOMEM); ++ ++ mux->dev = dev; ++ mux->regmap = regmap; ++ mux->reg = mux_reg; ++ mux->shift = mux_shift; ++ mux->mask = BIT(mux_width) - 1; ++ mux_ops = &clk_regmap_mux_ops; ++ mux_hw = &mux->hw; + } + -+ if (unlikely(ses_ptr->cdata.init == 0)) { -+ derr(1, "cipher context not initialized"); -+ ret = -EINVAL; -+ goto out_unlock; ++ if (gate_reg > 0) { ++ gate = devm_kzalloc(dev, sizeof(*gate), GFP_KERNEL); ++ if (!gate) ++ return ERR_PTR(-ENOMEM); ++ ++ gate->dev = dev; ++ gate->regmap = regmap; ++ gate->reg = gate_reg; ++ gate->shift = gate_shift; ++ gate_ops = &clk_regmap_gate_ops; ++ gate_hw = &gate->hw; + } + -+ /* If we have a hash/mac handle reset its state */ -+ if (ses_ptr->hdata.init != 0) { -+ ret = cryptodev_hash_reset(&ses_ptr->hdata); -+ if (unlikely(ret)) { -+ derr(1, "error in cryptodev_hash_reset()"); -+ goto out_unlock; ++ if (div_reg > 0) { ++ if (div_flags & CLK_DIVIDER_HIWORD_MASK) { ++ div = devm_kzalloc(dev, sizeof(*div), GFP_KERNEL); ++ if (!div) ++ return ERR_PTR(-ENOMEM); ++ ++ div->dev = dev; ++ div->regmap = regmap; ++ div->reg = div_reg; ++ div->shift = div_shift; ++ div->width = div_width; ++ div_ops = &clk_regmap_divider_ops; ++ div_hw = &div->hw; ++ } else { ++ fd = devm_kzalloc(dev, sizeof(*fd), GFP_KERNEL); ++ if (!fd) ++ return ERR_PTR(-ENOMEM); ++ ++ fd->dev = dev; ++ fd->regmap = regmap; ++ fd->reg = div_reg; ++ fd->mshift = 16; ++ fd->mwidth = 16; ++ fd->mmask = GENMASK(fd->mwidth - 1, 0) << fd->mshift; ++ fd->nshift = 0; ++ fd->nwidth = 16; ++ fd->nmask = GENMASK(fd->nwidth - 1, 0) << fd->nshift; ++ fd_ops = &clk_regmap_fractional_divider_ops; ++ fd_hw = &fd->hw; + } + } + -+ cryptodev_cipher_set_iv(&ses_ptr->cdata, kcaop->iv, -+ min(ses_ptr->cdata.ivsize, kcaop->ivlen)); ++ composite = devm_kzalloc(dev, sizeof(*composite), GFP_KERNEL); ++ if (!composite) ++ return ERR_PTR(-ENOMEM); + -+ ret = __crypto_auth_run_zc(ses_ptr, kcaop); -+ if (unlikely(ret)) { -+ derr(1, "error in __crypto_auth_run_zc()"); -+ goto out_unlock; ++ init.name = name; ++ init.flags = flags; ++ init.parent_names = parent_names; ++ init.num_parents = num_parents; ++ ++ clk_composite_ops = &composite->ops; ++ ++ if (mux_hw && mux_ops) { ++ if (!mux_ops->get_parent) ++ return ERR_PTR(-EINVAL); ++ ++ composite->mux_hw = mux_hw; ++ composite->mux_ops = mux_ops; ++ clk_composite_ops->get_parent = ++ clk_regmap_composite_get_parent; ++ if (mux_ops->set_parent) ++ clk_composite_ops->set_parent = ++ clk_regmap_composite_set_parent; ++ if (mux_ops->determine_rate) ++ clk_composite_ops->determine_rate = ++ clk_regmap_composite_determine_rate; + } + -+ ret = 0; ++ if (div_hw && div_ops) { ++ if (!div_ops->recalc_rate) ++ return ERR_PTR(-EINVAL); + -+ cryptodev_cipher_get_iv(&ses_ptr->cdata, kcaop->iv, -+ min(ses_ptr->cdata.ivsize, kcaop->ivlen)); ++ clk_composite_ops->recalc_rate = ++ clk_regmap_composite_recalc_rate; + -+out_unlock: -+ crypto_put_session(ses_ptr); -+ return ret; -+} -diff --git a/drivers/crypto/rockchip/cryptodev_linux/cipherapi.h b/drivers/crypto/rockchip/cryptodev_linux/cipherapi.h -new file mode 100644 -index 000000000..7073588e1 ---- /dev/null -+++ b/drivers/crypto/rockchip/cryptodev_linux/cipherapi.h -@@ -0,0 +1,58 @@ -+/* SPDX-License-Identifier: GPL-2.0+ */ ++ if (div_ops->determine_rate) ++ clk_composite_ops->determine_rate = ++ clk_regmap_composite_determine_rate; ++ else if (div_ops->round_rate) ++ clk_composite_ops->round_rate = ++ clk_regmap_composite_round_rate; + -+#ifndef CIPHERAPI_H -+# define CIPHERAPI_H ++ /* .set_rate requires either .round_rate or .determine_rate */ ++ if (div_ops->set_rate) { ++ if (div_ops->determine_rate || div_ops->round_rate) ++ clk_composite_ops->set_rate = ++ clk_regmap_composite_set_rate; ++ else ++ WARN(1, "missing round_rate op\n"); ++ } + -+#include ++ composite->rate_hw = div_hw; ++ composite->rate_ops = div_ops; ++ } + -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) -+# include ++ if (fd_hw && fd_ops) { ++ if (!fd_ops->recalc_rate) ++ return ERR_PTR(-EINVAL); + -+typedef struct crypto_ablkcipher cryptodev_crypto_blkcipher_t; -+typedef struct ablkcipher_request cryptodev_blkcipher_request_t; ++ clk_composite_ops->recalc_rate = ++ clk_regmap_composite_recalc_rate; + -+# define cryptodev_crypto_alloc_blkcipher crypto_alloc_ablkcipher -+# define cryptodev_crypto_blkcipher_blocksize crypto_ablkcipher_blocksize -+# define cryptodev_crypto_blkcipher_ivsize crypto_ablkcipher_ivsize -+# define cryptodev_crypto_blkcipher_alignmask crypto_ablkcipher_alignmask -+# define cryptodev_crypto_blkcipher_setkey crypto_ablkcipher_setkey ++ if (fd_ops->determine_rate) ++ clk_composite_ops->determine_rate = ++ clk_regmap_composite_determine_rate; ++ else if (fd_ops->round_rate) ++ clk_composite_ops->round_rate = ++ clk_regmap_composite_round_rate; + -+static inline void cryptodev_crypto_free_blkcipher(cryptodev_crypto_blkcipher_t *c) { -+ if (c) -+ crypto_free_ablkcipher(c); -+} ++ /* .set_rate requires either .round_rate or .determine_rate */ ++ if (fd_ops->set_rate) { ++ if (fd_ops->determine_rate || fd_ops->round_rate) ++ clk_composite_ops->set_rate = ++ clk_regmap_composite_set_rate; ++ else ++ WARN(1, "missing round_rate op\n"); ++ } + -+# define cryptodev_blkcipher_request_alloc ablkcipher_request_alloc -+# define cryptodev_blkcipher_request_set_callback ablkcipher_request_set_callback ++ composite->rate_hw = fd_hw; ++ composite->rate_ops = fd_ops; ++ } + -+static inline void cryptodev_blkcipher_request_free(cryptodev_blkcipher_request_t *r) { -+ if (r) -+ ablkcipher_request_free(r); -+} ++ if (gate_hw && gate_ops) { ++ if (!gate_ops->is_prepared || !gate_ops->prepare || ++ !gate_ops->unprepare) ++ return ERR_PTR(-EINVAL); + -+# define cryptodev_blkcipher_request_set_crypt ablkcipher_request_set_crypt -+# define cryptodev_crypto_blkcipher_encrypt crypto_ablkcipher_encrypt -+# define cryptodev_crypto_blkcipher_decrypt crypto_ablkcipher_decrypt -+# define cryptodev_crypto_blkcipher_tfm crypto_ablkcipher_tfm -+#else -+#include ++ composite->gate_hw = gate_hw; ++ composite->gate_ops = gate_ops; ++ clk_composite_ops->is_prepared = ++ clk_regmap_composite_is_prepared; ++ clk_composite_ops->prepare = clk_regmap_composite_prepare; ++ clk_composite_ops->unprepare = clk_regmap_composite_unprepare; ++ } + -+typedef struct crypto_skcipher cryptodev_crypto_blkcipher_t; -+typedef struct skcipher_request cryptodev_blkcipher_request_t; ++ init.ops = clk_composite_ops; ++ composite->dev = dev; ++ composite->hw.init = &init; + -+# define cryptodev_crypto_alloc_blkcipher crypto_alloc_skcipher -+# define cryptodev_crypto_blkcipher_blocksize crypto_skcipher_blocksize -+# define cryptodev_crypto_blkcipher_ivsize crypto_skcipher_ivsize -+# define cryptodev_crypto_blkcipher_alignmask crypto_skcipher_alignmask -+# define cryptodev_crypto_blkcipher_setkey crypto_skcipher_setkey -+# define cryptodev_crypto_free_blkcipher crypto_free_skcipher -+# define cryptodev_blkcipher_request_alloc skcipher_request_alloc -+# define cryptodev_blkcipher_request_set_callback skcipher_request_set_callback -+# define cryptodev_blkcipher_request_free skcipher_request_free -+# define cryptodev_blkcipher_request_set_crypt skcipher_request_set_crypt -+# define cryptodev_crypto_blkcipher_encrypt crypto_skcipher_encrypt -+# define cryptodev_crypto_blkcipher_decrypt crypto_skcipher_decrypt -+# define cryptodev_crypto_blkcipher_tfm crypto_skcipher_tfm -+#endif ++ clk = devm_clk_register(dev, &composite->hw); ++ if (IS_ERR(clk)) ++ return clk; + -+#endif -diff --git a/drivers/crypto/rockchip/cryptodev_linux/cryptlib.c b/drivers/crypto/rockchip/cryptodev_linux/cryptlib.c ++ if (composite->mux_hw) ++ composite->mux_hw->clk = clk; ++ ++ if (composite->rate_hw) ++ composite->rate_hw->clk = clk; ++ ++ if (composite->gate_hw) ++ composite->gate_hw->clk = clk; ++ ++ return clk; ++} ++EXPORT_SYMBOL_GPL(devm_clk_regmap_register_composite); +diff --git a/drivers/clk/rockchip/regmap/clk-regmap-divider.c b/drivers/clk/rockchip/regmap/clk-regmap-divider.c new file mode 100644 -index 000000000..8b2a28c73 +index 000000000..d57f2c7f8 --- /dev/null -+++ b/drivers/crypto/rockchip/cryptodev_linux/cryptlib.c -@@ -0,0 +1,492 @@ ++++ b/drivers/clk/rockchip/regmap/clk-regmap-divider.c +@@ -0,0 +1,117 @@ +/* -+ * Driver for /dev/crypto device (aka CryptoDev) -+ * -+ * Copyright (c) 2010,2011 Nikos Mavrogiannopoulos -+ * Portions Copyright (c) 2010 Michael Weiser -+ * Portions Copyright (c) 2010 Phil Sutter ++ * Copyright (c) 2017 Rockchip Electronics Co. Ltd. + * -+ * This file is part of linux cryptodev. ++ * Base on code in drivers/clk/clk-divider.c. ++ * See clk-divider.c for further copyright information. + * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version 2 -+ * of the License, or (at your option) any later version. ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., -+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "cryptodev.h" -+#include "cipherapi.h" ++#include "clk-regmap.h" + -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 0, 0)) -+extern const struct crypto_type crypto_givcipher_type; -+#endif ++#define div_mask(width) ((1 << (width)) - 1) + -+static void cryptodev_complete(struct crypto_async_request *req, int err) ++#define to_clk_regmap_divider(_hw) \ ++ container_of(_hw, struct clk_regmap_divider, hw) ++ ++static unsigned long ++clk_regmap_divider_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) +{ -+ struct cryptodev_result *res = req->data; ++ struct clk_regmap_divider *divider = to_clk_regmap_divider(hw); ++ unsigned int val, div; + -+ if (err == -EINPROGRESS) -+ return; ++ regmap_read(divider->regmap, divider->reg, &val); + -+ res->err = err; -+ complete(&res->completion); ++ div = val >> divider->shift; ++ div &= div_mask(divider->width); ++ ++ return divider_recalc_rate(hw, parent_rate, div, NULL, ++ CLK_DIVIDER_ROUND_CLOSEST, divider->width); +} + -+int cryptodev_get_cipher_keylen(unsigned int *keylen, struct session_op *sop, -+ int aead) ++static long ++clk_regmap_divider_round_rate(struct clk_hw *hw, unsigned long rate, ++ unsigned long *prate) +{ -+ /* -+ * For blockciphers (AES-CBC) or non-composite aead ciphers (like AES-GCM), -+ * the key length is simply the cipher keylen obtained from userspace. If -+ * the cipher is composite aead, the keylen is the sum of cipher keylen, -+ * hmac keylen and a key header length. This key format is the one used in -+ * Linux kernel for composite aead ciphers (crypto/authenc.c) -+ */ -+ unsigned int klen = sop->keylen; -+ -+ if (unlikely(sop->keylen > CRYPTO_CIPHER_MAX_KEY_LEN)) -+ return -EINVAL; -+ -+ if (aead && sop->mackeylen) { -+ if (unlikely(sop->mackeylen > CRYPTO_HMAC_MAX_KEY_LEN)) -+ return -EINVAL; -+ klen += sop->mackeylen; -+ klen += RTA_SPACE(sizeof(struct crypto_authenc_key_param)); -+ } ++ struct clk_regmap_divider *divider = to_clk_regmap_divider(hw); + -+ *keylen = klen; -+ return 0; ++ return divider_round_rate(hw, rate, prate, NULL, divider->width, ++ CLK_DIVIDER_ROUND_CLOSEST); +} + -+int cryptodev_get_cipher_key(uint8_t *key, struct session_op *sop, int aead) ++static int div_round_closest(unsigned long parent_rate, unsigned long rate) +{ -+ /* -+ * Get cipher key from user-space. For blockciphers just copy it from -+ * user-space. For composite aead ciphers combine it with the hmac key in -+ * the format used by Linux kernel in crypto/authenc.c: -+ * -+ * [[AUTHENC_KEY_HEADER + CIPHER_KEYLEN] [AUTHENTICATION KEY] [CIPHER KEY]] -+ */ -+ struct crypto_authenc_key_param *param; -+ struct rtattr *rta; -+ int ret = 0; -+ -+ if (aead && sop->mackeylen) { -+ /* -+ * Composite aead ciphers. The first four bytes are the header type and -+ * header length for aead keys -+ */ -+ rta = (void *)key; -+ rta->rta_type = CRYPTO_AUTHENC_KEYA_PARAM; -+ rta->rta_len = RTA_LENGTH(sizeof(*param)); ++ int up, down; ++ unsigned long up_rate, down_rate; + -+ /* -+ * The next four bytes hold the length of the encryption key -+ */ -+ param = RTA_DATA(rta); -+ param->enckeylen = cpu_to_be32(sop->keylen); ++ up = DIV_ROUND_UP_ULL((u64)parent_rate, rate); ++ down = parent_rate / rate; + -+ /* Advance key pointer eight bytes and copy the hmac key */ -+ key += RTA_SPACE(sizeof(*param)); -+ if (unlikely(copy_from_user(key, sop->mackey, sop->mackeylen))) { -+ ret = -EFAULT; -+ goto error; -+ } -+ /* Advance key pointer past the hmac key */ -+ key += sop->mackeylen; -+ } -+ /* now copy the blockcipher key */ -+ if (unlikely(copy_from_user(key, sop->key, sop->keylen))) -+ ret = -EFAULT; ++ up_rate = DIV_ROUND_UP_ULL((u64)parent_rate, up); ++ down_rate = DIV_ROUND_UP_ULL((u64)parent_rate, down); + -+error: -+ return ret; ++ return (rate - up_rate) <= (down_rate - rate) ? up : down; +} + -+/* Was correct key length supplied? */ -+static int check_key_size(size_t keylen, const char *alg_name, -+ unsigned int min_keysize, unsigned int max_keysize) ++static int ++clk_regmap_divider_set_rate(struct clk_hw *hw, unsigned long rate, ++ unsigned long parent_rate) +{ -+ if (max_keysize > 0 && unlikely((keylen < min_keysize) || -+ (keylen > max_keysize))) { -+ ddebug(1, "Wrong keylen '%zu' for algorithm '%s'. Use %u to %u.", -+ keylen, alg_name, min_keysize, max_keysize); -+ return -EINVAL; -+ } ++ struct clk_regmap_divider *divider = to_clk_regmap_divider(hw); ++ u32 val, div; + -+ return 0; -+} ++ div = div_round_closest(parent_rate, rate); + -+int cryptodev_cipher_init(struct cipher_data *out, const char *alg_name, -+ uint8_t *keyp, size_t keylen, int stream, int aead) -+{ -+ int ret; ++ dev_dbg(divider->dev, "%s: parent_rate=%ld, div=%d, rate=%ld\n", ++ clk_hw_get_name(hw), parent_rate, div, rate); + -+ if (aead == 0) { -+ unsigned int min_keysize, max_keysize; -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) -+ struct crypto_tfm *tfm; -+#else -+ struct ablkcipher_alg *alg; -+#endif ++ val = div_mask(divider->width) << (divider->shift + 16); ++ val |= (div - 1) << divider->shift; + -+ out->async.s = cryptodev_crypto_alloc_blkcipher(alg_name, 0, 0); -+ if (unlikely(IS_ERR(out->async.s))) { -+ ddebug(1, "Failed to load cipher %s", alg_name); -+ return PTR_ERR(out->async.s); -+ } ++ return regmap_write(divider->regmap, divider->reg, val); ++} + -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) -+ tfm = crypto_skcipher_tfm(out->async.s); -+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(5, 4, 0)) -+ if ((tfm->__crt_alg->cra_type == &crypto_ablkcipher_type) -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 0, 0)) -+ || (tfm->__crt_alg->cra_type == &crypto_givcipher_type) -+#endif -+ ) { -+ struct ablkcipher_alg *alg; ++const struct clk_ops clk_regmap_divider_ops = { ++ .recalc_rate = clk_regmap_divider_recalc_rate, ++ .round_rate = clk_regmap_divider_round_rate, ++ .set_rate = clk_regmap_divider_set_rate, ++}; ++EXPORT_SYMBOL_GPL(clk_regmap_divider_ops); + -+ alg = &tfm->__crt_alg->cra_ablkcipher; -+ min_keysize = alg->min_keysize; -+ max_keysize = alg->max_keysize; -+ } else -+#endif -+ { -+ struct skcipher_alg *alg; ++struct clk * ++devm_clk_regmap_register_divider(struct device *dev, const char *name, ++ const char *parent_name, struct regmap *regmap, ++ u32 reg, u8 shift, u8 width, ++ unsigned long flags) ++{ ++ struct clk_regmap_divider *divider; ++ struct clk_init_data init = {}; + -+ alg = crypto_skcipher_alg(out->async.s); -+ min_keysize = alg->min_keysize; -+ max_keysize = alg->max_keysize; -+ } -+#else -+ alg = crypto_ablkcipher_alg(out->async.s); -+ min_keysize = alg->min_keysize; -+ max_keysize = alg->max_keysize; -+#endif -+ ret = check_key_size(keylen, alg_name, min_keysize, -+ max_keysize); -+ if (ret) -+ goto error; ++ divider = devm_kzalloc(dev, sizeof(*divider), GFP_KERNEL); ++ if (!divider) ++ return ERR_PTR(-ENOMEM); + -+ out->blocksize = cryptodev_crypto_blkcipher_blocksize(out->async.s); -+ out->ivsize = cryptodev_crypto_blkcipher_ivsize(out->async.s); -+ out->alignmask = cryptodev_crypto_blkcipher_alignmask(out->async.s); ++ init.name = name; ++ init.ops = &clk_regmap_divider_ops; ++ init.flags = flags; ++ init.parent_names = (parent_name ? &parent_name : NULL); ++ init.num_parents = (parent_name ? 1 : 0); + -+ ret = cryptodev_crypto_blkcipher_setkey(out->async.s, keyp, keylen); -+ } else { -+ out->async.as = crypto_alloc_aead(alg_name, 0, 0); -+ if (unlikely(IS_ERR(out->async.as))) { -+ ddebug(1, "Failed to load cipher %s", alg_name); -+ return PTR_ERR(out->async.as); -+ } ++ divider->dev = dev; ++ divider->regmap = regmap; ++ divider->reg = reg; ++ divider->shift = shift; ++ divider->width = width; ++ divider->hw.init = &init; + -+ out->blocksize = crypto_aead_blocksize(out->async.as); -+ out->ivsize = crypto_aead_ivsize(out->async.as); -+ out->alignmask = crypto_aead_alignmask(out->async.as); ++ return devm_clk_register(dev, ÷r->hw); ++} ++EXPORT_SYMBOL_GPL(devm_clk_regmap_register_divider); +diff --git a/drivers/clk/rockchip/regmap/clk-regmap-fractional-divider.c b/drivers/clk/rockchip/regmap/clk-regmap-fractional-divider.c +new file mode 100644 +index 000000000..1acbc16e7 +--- /dev/null ++++ b/drivers/clk/rockchip/regmap/clk-regmap-fractional-divider.c +@@ -0,0 +1,167 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (c) 2019 Rockchip Electronics Co. Ltd. ++ * ++ * Base on code in drivers/clk/clk-fractional-divider.c. ++ * See clk-fractional-divider.c for further copyright information. ++ */ + -+ ret = crypto_aead_setkey(out->async.as, keyp, keylen); -+ } ++#include + -+ if (unlikely(ret)) { -+ ddebug(1, "Setting key failed for %s-%zu.", alg_name, keylen*8); -+ ret = -EINVAL; -+ goto error; -+ } ++#include "clk-regmap.h" + -+ out->stream = stream; -+ out->aead = aead; ++#define to_clk_regmap_fractional_divider(_hw) \ ++ container_of(_hw, struct clk_regmap_fractional_divider, hw) + -+ init_completion(&out->async.result.completion); ++static unsigned long ++clk_regmap_fractional_divider_recalc_rate(struct clk_hw *hw, ++ unsigned long parent_rate) ++{ ++ struct clk_regmap_fractional_divider *fd = ++ to_clk_regmap_fractional_divider(hw); ++ unsigned long m, n; ++ u32 val; ++ u64 ret; + -+ if (aead == 0) { -+ out->async.request = cryptodev_blkcipher_request_alloc(out->async.s, GFP_KERNEL); -+ if (unlikely(!out->async.request)) { -+ derr(1, "error allocating async crypto request"); -+ ret = -ENOMEM; -+ goto error; -+ } ++ regmap_read(fd->regmap, fd->reg, &val); + -+ cryptodev_blkcipher_request_set_callback(out->async.request, -+ CRYPTO_TFM_REQ_MAY_BACKLOG, -+ cryptodev_complete, &out->async.result); -+ } else { -+ out->async.arequest = aead_request_alloc(out->async.as, GFP_KERNEL); -+ if (unlikely(!out->async.arequest)) { -+ derr(1, "error allocating async crypto request"); -+ ret = -ENOMEM; -+ goto error; -+ } ++ m = (val & fd->mmask) >> fd->mshift; ++ n = (val & fd->nmask) >> fd->nshift; + -+ aead_request_set_callback(out->async.arequest, -+ CRYPTO_TFM_REQ_MAY_BACKLOG, -+ cryptodev_complete, &out->async.result); -+ } ++ if (!n || !m) ++ return parent_rate; + -+ out->init = 1; -+ return 0; -+error: -+ if (aead == 0) { -+ cryptodev_blkcipher_request_free(out->async.request); -+ cryptodev_crypto_free_blkcipher(out->async.s); -+ } else { -+ if (out->async.arequest) -+ aead_request_free(out->async.arequest); -+ if (out->async.as) -+ crypto_free_aead(out->async.as); -+ } ++ ret = (u64)parent_rate * m; ++ do_div(ret, n); + + return ret; +} + -+void cryptodev_cipher_deinit(struct cipher_data *cdata) ++static void clk_regmap_fractional_divider_approximation(struct clk_hw *hw, ++ unsigned long rate, unsigned long *parent_rate, ++ unsigned long *m, unsigned long *n) +{ -+ if (cdata->init) { -+ if (cdata->aead == 0) { -+ cryptodev_blkcipher_request_free(cdata->async.request); -+ cryptodev_crypto_free_blkcipher(cdata->async.s); -+ } else { -+ if (cdata->async.arequest) -+ aead_request_free(cdata->async.arequest); -+ if (cdata->async.as) -+ crypto_free_aead(cdata->async.as); -+ } ++ struct clk_regmap_fractional_divider *fd = ++ to_clk_regmap_fractional_divider(hw); ++ unsigned long p_rate, p_parent_rate; ++ struct clk_hw *p_parent; ++ unsigned long scale; + -+ cdata->init = 0; -+ } -+} ++ if (!rate) { ++ *m = 0; ++ *n = 1; + -+static inline int waitfor(struct cryptodev_result *cr, ssize_t ret) -+{ -+ switch (ret) { -+ case 0: -+ break; -+ case -EINPROGRESS: -+ case -EBUSY: -+ wait_for_completion(&cr->completion); -+ /* At this point we known for sure the request has finished, -+ * because wait_for_completion above was not interruptible. -+ * This is important because otherwise hardware or driver -+ * might try to access memory which will be freed or reused for -+ * another request. */ ++ dev_dbg(fd->dev, "%s rate:(%ld) maybe invalid frequency setting!\n", ++ clk_hw_get_name(hw), rate); + -+ if (unlikely(cr->err)) { -+ derr(0, "error from async request: %d", cr->err); -+ return cr->err; -+ } ++ return; ++ } + -+ break; -+ default: -+ return ret; ++ p_rate = clk_hw_get_rate(clk_hw_get_parent(hw)); ++ if ((rate * 20 > p_rate) && (p_rate % rate != 0)) { ++ p_parent = clk_hw_get_parent(clk_hw_get_parent(hw)); ++ p_parent_rate = clk_hw_get_rate(p_parent); ++ *parent_rate = p_parent_rate; + } + -+ return 0; ++ /* ++ * Get rate closer to *parent_rate to guarantee there is no overflow ++ * for m and n. In the result it will be the nearest rate left shifted ++ * by (scale - fd->nwidth) bits. ++ */ ++ scale = fls_long(*parent_rate / rate - 1); ++ if (scale > fd->nwidth) ++ rate <<= scale - fd->nwidth; ++ ++ rational_best_approximation(rate, *parent_rate, ++ GENMASK(fd->mwidth - 1, 0), ++ GENMASK(fd->nwidth - 1, 0), ++ m, n); +} + -+ssize_t cryptodev_cipher_encrypt(struct cipher_data *cdata, -+ const struct scatterlist *src, struct scatterlist *dst, -+ size_t len) ++static long ++clk_regmap_fractional_divider_round_rate(struct clk_hw *hw, unsigned long rate, ++ unsigned long *parent_rate) +{ -+ int ret; ++ unsigned long m, n; ++ u64 ret; + -+ reinit_completion(&cdata->async.result.completion); ++ if (!rate) ++ return *parent_rate; + -+ if (cdata->aead == 0) { -+ cryptodev_blkcipher_request_set_crypt(cdata->async.request, -+ (struct scatterlist *)src, dst, -+ len, cdata->async.iv); -+ ret = cryptodev_crypto_blkcipher_encrypt(cdata->async.request); -+ } else { -+ aead_request_set_crypt(cdata->async.arequest, -+ (struct scatterlist *)src, dst, -+ len, cdata->async.iv); -+ ret = crypto_aead_encrypt(cdata->async.arequest); -+ } ++ if (rate >= *parent_rate) ++ return *parent_rate; + -+ return waitfor(&cdata->async.result, ret); ++ clk_regmap_fractional_divider_approximation(hw, rate, parent_rate, ++ &m, &n); ++ ++ ret = (u64)*parent_rate * m; ++ do_div(ret, n); ++ ++ return ret; +} + -+ssize_t cryptodev_cipher_decrypt(struct cipher_data *cdata, -+ const struct scatterlist *src, struct scatterlist *dst, -+ size_t len) ++static int ++clk_regmap_fractional_divider_set_rate(struct clk_hw *hw, unsigned long rate, ++ unsigned long parent_rate) +{ -+ int ret; ++ struct clk_regmap_fractional_divider *fd = ++ to_clk_regmap_fractional_divider(hw); ++ unsigned long m, n; ++ u32 val; + -+ reinit_completion(&cdata->async.result.completion); -+ if (cdata->aead == 0) { -+ cryptodev_blkcipher_request_set_crypt(cdata->async.request, -+ (struct scatterlist *)src, dst, -+ len, cdata->async.iv); -+ ret = cryptodev_crypto_blkcipher_decrypt(cdata->async.request); -+ } else { -+ aead_request_set_crypt(cdata->async.arequest, -+ (struct scatterlist *)src, dst, -+ len, cdata->async.iv); -+ ret = crypto_aead_decrypt(cdata->async.arequest); -+ } ++ rational_best_approximation(rate, parent_rate, ++ GENMASK(fd->mwidth - 1, 0), GENMASK(fd->nwidth - 1, 0), ++ &m, &n); + -+ return waitfor(&cdata->async.result, ret); ++ dev_dbg(fd->dev, "%s: parent_rate=%ld, m=%ld, n=%ld, rate=%ld\n", ++ clk_hw_get_name(hw), parent_rate, m, n, rate); ++ ++ regmap_read(fd->regmap, fd->reg, &val); ++ val &= ~(fd->mmask | fd->nmask); ++ val |= (m << fd->mshift) | (n << fd->nshift); ++ ++ return regmap_write(fd->regmap, fd->reg, val); +} + -+/* Hash functions */ ++const struct clk_ops clk_regmap_fractional_divider_ops = { ++ .recalc_rate = clk_regmap_fractional_divider_recalc_rate, ++ .round_rate = clk_regmap_fractional_divider_round_rate, ++ .set_rate = clk_regmap_fractional_divider_set_rate, ++}; ++EXPORT_SYMBOL_GPL(clk_regmap_fractional_divider_ops); + -+int cryptodev_hash_init(struct hash_data *hdata, const char *alg_name, -+ int hmac_mode, void *mackey, size_t mackeylen) ++struct clk * ++devm_clk_regmap_register_fractional_divider(struct device *dev, ++ const char *name, ++ const char *parent_name, ++ struct regmap *regmap, ++ u32 reg, unsigned long flags) +{ -+ int ret; ++ struct clk_regmap_fractional_divider *fd; ++ struct clk_init_data init; + -+ hdata->async.s = crypto_alloc_ahash(alg_name, 0, 0); -+ if (unlikely(IS_ERR(hdata->async.s))) { -+ ddebug(1, "Failed to load transform for %s", alg_name); -+ return PTR_ERR(hdata->async.s); -+ } ++ fd = devm_kzalloc(dev, sizeof(*fd), GFP_KERNEL); ++ if (!fd) ++ return ERR_PTR(-ENOMEM); + -+ /* Copy the key from user and set to TFM. */ -+ if (hmac_mode != 0) { -+ ret = crypto_ahash_setkey(hdata->async.s, mackey, mackeylen); -+ if (unlikely(ret)) { -+ ddebug(1, "Setting hmac key failed for %s-%zu.", -+ alg_name, mackeylen*8); -+ ret = -EINVAL; -+ goto error; -+ } -+ } ++ init.name = name; ++ init.ops = &clk_regmap_fractional_divider_ops; ++ init.flags = flags; ++ init.parent_names = (parent_name ? &parent_name : NULL); ++ init.num_parents = (parent_name ? 1 : 0); + -+ hdata->digestsize = crypto_ahash_digestsize(hdata->async.s); -+ hdata->alignmask = crypto_ahash_alignmask(hdata->async.s); ++ fd->dev = dev; ++ fd->regmap = regmap; ++ fd->reg = reg; ++ fd->mshift = 16; ++ fd->mwidth = 16; ++ fd->mmask = GENMASK(fd->mwidth - 1, 0) << fd->mshift; ++ fd->nshift = 0; ++ fd->nwidth = 16; ++ fd->nmask = GENMASK(fd->nwidth - 1, 0) << fd->nshift; ++ fd->hw.init = &init; + -+ init_completion(&hdata->async.result.completion); ++ return devm_clk_register(dev, &fd->hw); ++} ++EXPORT_SYMBOL_GPL(devm_clk_regmap_register_fractional_divider); +diff --git a/drivers/clk/rockchip/regmap/clk-regmap-gate.c b/drivers/clk/rockchip/regmap/clk-regmap-gate.c +new file mode 100644 +index 000000000..36549b912 +--- /dev/null ++++ b/drivers/clk/rockchip/regmap/clk-regmap-gate.c +@@ -0,0 +1,82 @@ ++/* ++ * Copyright (c) 2017 Rockchip Electronics Co. Ltd. ++ * ++ * Base on code in drivers/clk/clk-gate.c. ++ * See clk-gate.c for further copyright information. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ + -+ hdata->async.request = ahash_request_alloc(hdata->async.s, GFP_KERNEL); -+ if (unlikely(!hdata->async.request)) { -+ derr(0, "error allocating async crypto request"); -+ ret = -ENOMEM; -+ goto error; -+ } ++#include "clk-regmap.h" + -+ ahash_request_set_callback(hdata->async.request, -+ CRYPTO_TFM_REQ_MAY_BACKLOG, -+ cryptodev_complete, &hdata->async.result); -+ hdata->init = 1; -+ return 0; ++#define to_clk_regmap_gate(_hw) container_of(_hw, struct clk_regmap_gate, hw) + -+error: -+ crypto_free_ahash(hdata->async.s); -+ return ret; ++static int clk_regmap_gate_prepare(struct clk_hw *hw) ++{ ++ struct clk_regmap_gate *gate = to_clk_regmap_gate(hw); ++ ++ return regmap_write(gate->regmap, gate->reg, ++ 0 | BIT(gate->shift + 16)); +} + -+void cryptodev_hash_deinit(struct hash_data *hdata) ++static void clk_regmap_gate_unprepare(struct clk_hw *hw) +{ -+ if (hdata->init) { -+ ahash_request_free(hdata->async.request); -+ crypto_free_ahash(hdata->async.s); -+ hdata->init = 0; -+ } ++ struct clk_regmap_gate *gate = to_clk_regmap_gate(hw); ++ ++ regmap_write(gate->regmap, gate->reg, ++ BIT(gate->shift) | BIT(gate->shift + 16)); +} + -+int cryptodev_hash_reset(struct hash_data *hdata) ++static int clk_regmap_gate_is_prepared(struct clk_hw *hw) +{ -+ int ret; -+ -+ ret = crypto_ahash_init(hdata->async.request); -+ if (unlikely(ret)) { -+ derr(0, "error in crypto_hash_init()"); -+ return ret; -+ } ++ struct clk_regmap_gate *gate = to_clk_regmap_gate(hw); ++ u32 val; + -+ return 0; ++ regmap_read(gate->regmap, gate->reg, &val); + ++ return !(val & BIT(gate->shift)); +} + -+ssize_t cryptodev_hash_update(struct hash_data *hdata, -+ struct scatterlist *sg, size_t len) ++const struct clk_ops clk_regmap_gate_ops = { ++ .prepare = clk_regmap_gate_prepare, ++ .unprepare = clk_regmap_gate_unprepare, ++ .is_prepared = clk_regmap_gate_is_prepared, ++}; ++EXPORT_SYMBOL_GPL(clk_regmap_gate_ops); ++ ++struct clk * ++devm_clk_regmap_register_gate(struct device *dev, const char *name, ++ const char *parent_name, ++ struct regmap *regmap, u32 reg, u8 shift, ++ unsigned long flags) +{ -+ int ret; ++ struct clk_regmap_gate *gate; ++ struct clk_init_data init = {}; + -+ reinit_completion(&hdata->async.result.completion); -+ ahash_request_set_crypt(hdata->async.request, sg, NULL, len); ++ gate = devm_kzalloc(dev, sizeof(*gate), GFP_KERNEL); ++ if (!gate) ++ return ERR_PTR(-ENOMEM); + -+ ret = crypto_ahash_update(hdata->async.request); ++ init.name = name; ++ init.ops = &clk_regmap_gate_ops; ++ init.flags = flags; ++ init.parent_names = (parent_name ? &parent_name : NULL); ++ init.num_parents = (parent_name ? 1 : 0); + -+ return waitfor(&hdata->async.result, ret); ++ gate->dev = dev; ++ gate->regmap = regmap; ++ gate->reg = reg; ++ gate->shift = shift; ++ gate->hw.init = &init; ++ ++ return devm_clk_register(dev, &gate->hw); +} ++EXPORT_SYMBOL_GPL(devm_clk_regmap_register_gate); +diff --git a/drivers/clk/rockchip/regmap/clk-regmap-mux.c b/drivers/clk/rockchip/regmap/clk-regmap-mux.c +new file mode 100644 +index 000000000..eb37b5f95 +--- /dev/null ++++ b/drivers/clk/rockchip/regmap/clk-regmap-mux.c +@@ -0,0 +1,81 @@ ++/* ++ * Copyright (c) 2017 Rockchip Electronics Co. Ltd. ++ * ++ * Base on code in drivers/clk/clk-mux.c. ++ * See clk-mux.c for further copyright information. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ + -+int cryptodev_hash_final(struct hash_data *hdata, void *output) ++#include "clk-regmap.h" ++ ++#define to_clk_regmap_mux(_hw) container_of(_hw, struct clk_regmap_mux, hw) ++ ++static u8 clk_regmap_mux_get_parent(struct clk_hw *hw) +{ -+ int ret; ++ struct clk_regmap_mux *mux = to_clk_regmap_mux(hw); ++ u8 index; ++ u32 val; + -+ reinit_completion(&hdata->async.result.completion); -+ ahash_request_set_crypt(hdata->async.request, NULL, output, 0); ++ regmap_read(mux->regmap, mux->reg, &val); + -+ ret = crypto_ahash_final(hdata->async.request); ++ index = val >> mux->shift; ++ index &= mux->mask; + -+ return waitfor(&hdata->async.result, ret); ++ return index; +} + -+#ifdef CIOCCPHASH -+/* import the current hash state of src to dst */ -+int cryptodev_hash_copy(struct hash_data *dst, struct hash_data *src) ++static int clk_regmap_mux_set_parent(struct clk_hw *hw, u8 index) +{ -+ int ret, statesize; -+ void *statedata = NULL; -+ struct crypto_tfm *tfm; ++ struct clk_regmap_mux *mux = to_clk_regmap_mux(hw); + -+ if (unlikely(src == NULL || !src->init || -+ dst == NULL || !dst->init)) { -+ return -EINVAL; -+ } ++ return regmap_write(mux->regmap, mux->reg, (index << mux->shift) | ++ (mux->mask << (mux->shift + 16))); ++} + -+ reinit_completion(&src->async.result.completion); ++const struct clk_ops clk_regmap_mux_ops = { ++ .set_parent = clk_regmap_mux_set_parent, ++ .get_parent = clk_regmap_mux_get_parent, ++ .determine_rate = __clk_mux_determine_rate, ++}; ++EXPORT_SYMBOL_GPL(clk_regmap_mux_ops); + -+ statesize = crypto_ahash_statesize(src->async.s); -+ if (unlikely(statesize <= 0)) { -+ return -EINVAL; -+ } ++struct clk * ++devm_clk_regmap_register_mux(struct device *dev, const char *name, ++ const char * const *parent_names, u8 num_parents, ++ struct regmap *regmap, u32 reg, u8 shift, u8 width, ++ unsigned long flags) ++{ ++ struct clk_regmap_mux *mux; ++ struct clk_init_data init = {}; + -+ statedata = kzalloc(statesize, GFP_KERNEL); -+ if (unlikely(statedata == NULL)) { -+ return -ENOMEM; -+ } ++ mux = devm_kzalloc(dev, sizeof(*mux), GFP_KERNEL); ++ if (!mux) ++ return ERR_PTR(-ENOMEM); + -+ ret = crypto_ahash_export(src->async.request, statedata); -+ if (unlikely(ret < 0)) { -+ if (unlikely(ret == -ENOSYS)) { -+ tfm = crypto_ahash_tfm(src->async.s); -+ derr(0, "cryptodev_hash_copy: crypto_ahash_export not implemented for " -+ "alg='%s', driver='%s'", crypto_tfm_alg_name(tfm), -+ crypto_tfm_alg_driver_name(tfm)); -+ } -+ goto out; -+ } ++ init.name = name; ++ init.ops = &clk_regmap_mux_ops; ++ init.flags = flags; ++ init.parent_names = parent_names; ++ init.num_parents = num_parents; + -+ ret = crypto_ahash_import(dst->async.request, statedata); -+ if (unlikely(ret == -ENOSYS)) { -+ tfm = crypto_ahash_tfm(dst->async.s); -+ derr(0, "cryptodev_hash_copy: crypto_ahash_import not implemented for " -+ "alg='%s', driver='%s'", crypto_tfm_alg_name(tfm), -+ crypto_tfm_alg_driver_name(tfm)); -+ } -+out: -+ kfree(statedata); -+ return ret; ++ mux->dev = dev; ++ mux->regmap = regmap; ++ mux->reg = reg; ++ mux->shift = shift; ++ mux->mask = BIT(width) - 1; ++ mux->hw.init = &init; ++ ++ return devm_clk_register(dev, &mux->hw); +} -+#endif /* CIOCCPHASH */ -diff --git a/drivers/crypto/rockchip/cryptodev_linux/cryptlib.h b/drivers/crypto/rockchip/cryptodev_linux/cryptlib.h ++EXPORT_SYMBOL_GPL(devm_clk_regmap_register_mux); ++ ++MODULE_LICENSE("GPL"); +diff --git a/drivers/clk/rockchip/regmap/clk-regmap-pll.c b/drivers/clk/rockchip/regmap/clk-regmap-pll.c new file mode 100644 -index 000000000..b8867d91b +index 000000000..24ad7eda9 --- /dev/null -+++ b/drivers/crypto/rockchip/cryptodev_linux/cryptlib.h -@@ -0,0 +1,111 @@ -+/* SPDX-License-Identifier: GPL-2.0+ */ ++++ b/drivers/clk/rockchip/regmap/clk-regmap-pll.c +@@ -0,0 +1,363 @@ ++/* ++ * Copyright (c) 2017 Rockchip Electronics Co. Ltd. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ + -+#ifndef CRYPTLIB_H -+# define CRYPTLIB_H ++#include "clk-regmap.h" + -+#include ++#define PLLCON_OFFSET(x) (x * 4) + -+struct cryptodev_result { -+ struct completion completion; -+ int err; -+}; ++#define PLL_BYPASS(x) HIWORD_UPDATE(x, 15, 15) ++#define PLL_BYPASS_MASK BIT(15) ++#define PLL_BYPASS_SHIFT 15 ++#define PLL_POSTDIV1(x) HIWORD_UPDATE(x, 14, 12) ++#define PLL_POSTDIV1_MASK GENMASK(14, 12) ++#define PLL_POSTDIV1_SHIFT 12 ++#define PLL_FBDIV(x) HIWORD_UPDATE(x, 11, 0) ++#define PLL_FBDIV_MASK GENMASK(11, 0) ++#define PLL_FBDIV_SHIFT 0 + -+#include "cipherapi.h" ++#define PLL_POSTDIV2(x) HIWORD_UPDATE(x, 8, 6) ++#define PLL_POSTDIV2_MASK GENMASK(8, 6) ++#define PLL_POSTDIV2_SHIFT 6 ++#define PLL_REFDIV(x) HIWORD_UPDATE(x, 5, 0) ++#define PLL_REFDIV_MASK GENMASK(5, 0) ++#define PLL_REFDIV_SHIFT 0 + -+struct cipher_data { -+ int init; /* 0 uninitialized */ -+ int blocksize; -+ int aead; -+ int stream; -+ int ivsize; -+ int alignmask; -+ struct { -+ /* block ciphers */ -+ cryptodev_crypto_blkcipher_t *s; -+ cryptodev_blkcipher_request_t *request; ++#define PLL_FOUT_4PHASE_CLK_POWER_DOWN BIT(27) ++#define PLL_FOUT_VCO_CLK_POWER_DOWN BIT(26) ++#define PLL_FOUT_POST_DIV_POWER_DOWN BIT(25) ++#define PLL_DAC_POWER_DOWN BIT(24) ++#define PLL_FRAC(x) UPDATE(x, 23, 0) ++#define PLL_FRAC_MASK GENMASK(23, 0) ++#define PLL_FRAC_SHIFT 0 + -+ /* AEAD ciphers */ -+ struct crypto_aead *as; -+ struct aead_request *arequest; ++#define MIN_FREF_RATE 10000000UL ++#define MAX_FREF_RATE 800000000UL ++#define MIN_FREFDIV_RATE 1000000UL ++#define MAX_FREFDIV_RATE 40000000UL ++#define MIN_FVCO_RATE 400000000UL ++#define MAX_FVCO_RATE 1600000000UL ++#define MIN_FOUTPOSTDIV_RATE 8000000UL ++#define MAX_FOUTPOSTDIV_RATE 1600000000UL + -+ struct cryptodev_result result; -+ uint8_t iv[EALG_MAX_BLOCK_LEN]; -+ } async; ++struct clk_regmap_pll { ++ struct clk_hw hw; ++ struct device *dev; ++ struct regmap *regmap; ++ unsigned int reg; ++ u8 pd_shift; ++ u8 dsmpd_shift; ++ u8 lock_shift; +}; + -+int cryptodev_cipher_init(struct cipher_data *out, const char *alg_name, -+ uint8_t *key, size_t keylen, int stream, int aead); -+void cryptodev_cipher_deinit(struct cipher_data *cdata); -+int cryptodev_get_cipher_key(uint8_t *key, struct session_op *sop, int aead); -+int cryptodev_get_cipher_keylen(unsigned int *keylen, struct session_op *sop, -+ int aead); -+ssize_t cryptodev_cipher_decrypt(struct cipher_data *cdata, -+ const struct scatterlist *sg1, -+ struct scatterlist *sg2, size_t len); -+ssize_t cryptodev_cipher_encrypt(struct cipher_data *cdata, -+ const struct scatterlist *sg1, -+ struct scatterlist *sg2, size_t len); ++#define to_clk_regmap_pll(_hw) container_of(_hw, struct clk_regmap_pll, hw) + -+/* AEAD */ -+static inline void cryptodev_cipher_auth(struct cipher_data *cdata, -+ struct scatterlist *sg1, size_t len) ++static unsigned long ++clk_regmap_pll_recalc_rate(struct clk_hw *hw, unsigned long prate) +{ -+ /* for some reason we _have_ to call that even for zero length sgs */ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)) -+ aead_request_set_assoc(cdata->async.arequest, len ? sg1 : NULL, len); -+#else -+ aead_request_set_ad(cdata->async.arequest, len); -+#endif -+} ++ struct clk_regmap_pll *pll = to_clk_regmap_pll(hw); ++ unsigned int postdiv1, fbdiv, dsmpd, postdiv2, refdiv, frac, bypass; ++ unsigned int con0, con1, con2; ++ u64 foutvco, foutpostdiv; + -+static inline void cryptodev_cipher_set_tag_size(struct cipher_data *cdata, int size) -+{ -+ if (likely(cdata->aead != 0)) -+ crypto_aead_setauthsize(cdata->async.as, size); -+} ++ regmap_read(pll->regmap, pll->reg + PLLCON_OFFSET(0), &con0); ++ regmap_read(pll->regmap, pll->reg + PLLCON_OFFSET(1), &con1); ++ regmap_read(pll->regmap, pll->reg + PLLCON_OFFSET(2), &con2); + -+static inline int cryptodev_cipher_get_tag_size(struct cipher_data *cdata) -+{ -+ if (likely(cdata->init && cdata->aead != 0)) -+ return crypto_aead_authsize(cdata->async.as); -+ else -+ return 0; -+} ++ bypass = (con0 & PLL_BYPASS_MASK) >> PLL_BYPASS_SHIFT; ++ postdiv1 = (con0 & PLL_POSTDIV1_MASK) >> PLL_POSTDIV1_SHIFT; ++ fbdiv = (con0 & PLL_FBDIV_MASK) >> PLL_FBDIV_SHIFT; ++ dsmpd = (con1 & BIT(pll->dsmpd_shift)) >> pll->dsmpd_shift; ++ postdiv2 = (con1 & PLL_POSTDIV2_MASK) >> PLL_POSTDIV2_SHIFT; ++ refdiv = (con1 & PLL_REFDIV_MASK) >> PLL_REFDIV_SHIFT; ++ frac = (con2 & PLL_FRAC_MASK) >> PLL_FRAC_SHIFT; + -+static inline void cryptodev_cipher_set_iv(struct cipher_data *cdata, -+ void *iv, size_t iv_size) -+{ -+ memcpy(cdata->async.iv, iv, min(iv_size, sizeof(cdata->async.iv))); -+} ++ if (bypass) ++ return prate; + -+static inline void cryptodev_cipher_get_iv(struct cipher_data *cdata, -+ void *iv, size_t iv_size) -+{ -+ memcpy(iv, cdata->async.iv, min(iv_size, sizeof(cdata->async.iv))); ++ foutvco = prate * fbdiv; ++ do_div(foutvco, refdiv); ++ ++ if (!dsmpd) { ++ u64 frac_rate = (u64)prate * frac; ++ ++ do_div(frac_rate, refdiv); ++ foutvco += frac_rate >> 24; ++ } ++ ++ foutpostdiv = foutvco; ++ do_div(foutpostdiv, postdiv1); ++ do_div(foutpostdiv, postdiv2); ++ ++ return foutpostdiv; +} + -+/* Hash */ -+struct hash_data { -+ int init; /* 0 uninitialized */ -+ int digestsize; -+ int alignmask; -+ struct { -+ struct crypto_ahash *s; -+ struct cryptodev_result result; -+ struct ahash_request *request; -+ } async; -+}; ++static long clk_pll_round_rate(unsigned long fin, unsigned long fout, ++ u8 *refdiv, u16 *fbdiv, ++ u8 *postdiv1, u8 *postdiv2, ++ u32 *frac, u8 *dsmpd, u8 *bypass) ++{ ++ u8 min_refdiv, max_refdiv, postdiv; ++ u8 _dsmpd = 1, _postdiv1 = 0, _postdiv2 = 0, _refdiv = 0; ++ u16 _fbdiv = 0; ++ u32 _frac = 0; ++ u64 foutvco, foutpostdiv; + -+int cryptodev_hash_final(struct hash_data *hdata, void *output); -+ssize_t cryptodev_hash_update(struct hash_data *hdata, -+ struct scatterlist *sg, size_t len); -+int cryptodev_hash_reset(struct hash_data *hdata); -+void cryptodev_hash_deinit(struct hash_data *hdata); -+int cryptodev_hash_init(struct hash_data *hdata, const char *alg_name, -+ int hmac_mode, void *mackey, size_t mackeylen); -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)) -+int cryptodev_hash_copy(struct hash_data *dst, struct hash_data *src); -+#endif ++ /* ++ * FREF : 10MHz ~ 800MHz ++ * FREFDIV : 1MHz ~ 40MHz ++ * FOUTVCO : 400MHz ~ 1.6GHz ++ * FOUTPOSTDIV : 8MHz ~ 1.6GHz ++ */ ++ if (fin < MIN_FREF_RATE || fin > MAX_FREF_RATE) ++ return -EINVAL; + ++ if (fout < MIN_FOUTPOSTDIV_RATE || fout > MAX_FOUTPOSTDIV_RATE) ++ return -EINVAL; + -+#endif -diff --git a/drivers/crypto/rockchip/cryptodev_linux/cryptodev.h b/drivers/crypto/rockchip/cryptodev_linux/cryptodev.h -new file mode 100644 -index 000000000..fd8619db6 ---- /dev/null -+++ b/drivers/crypto/rockchip/cryptodev_linux/cryptodev.h -@@ -0,0 +1,188 @@ -+/* SPDX-License-Identifier: GPL-2.0+ */ ++ if (fin == fout) { ++ if (bypass) ++ *bypass = true; ++ return fin; ++ } + -+/* cipher stuff */ -+#ifndef CRYPTODEV_H -+# define CRYPTODEV_H ++ min_refdiv = DIV_ROUND_UP(fin, MAX_FREFDIV_RATE); ++ max_refdiv = fin / MIN_FREFDIV_RATE; ++ if (max_refdiv > 64) ++ max_refdiv = 64; + -+#include ++ if (fout < MIN_FVCO_RATE) { ++ postdiv = DIV_ROUND_UP_ULL(MIN_FVCO_RATE, fout); + -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)) -+# define reinit_completion(x) INIT_COMPLETION(*(x)) -+#endif ++ for (_postdiv2 = 1; _postdiv2 < 8; _postdiv2++) { ++ if (postdiv % _postdiv2) ++ continue; + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ _postdiv1 = postdiv / _postdiv2; + -+#define PFX "cryptodev: " -+#define dprintk(level, severity, format, a...) \ -+ do { \ -+ if (level <= cryptodev_verbosity) \ -+ printk(severity PFX "%s[%u] (%s:%u): " format "\n", \ -+ current->comm, current->pid, \ -+ __func__, __LINE__, \ -+ ##a); \ -+ } while (0) -+#define derr(level, format, a...) dprintk(level, KERN_ERR, format, ##a) -+#define dwarning(level, format, a...) dprintk(level, KERN_WARNING, format, ##a) -+#define dinfo(level, format, a...) dprintk(level, KERN_INFO, format, ##a) -+#define ddebug(level, format, a...) dprintk(level, KERN_DEBUG, format, ##a) ++ if (_postdiv1 > 0 && _postdiv1 < 8) ++ break; ++ } + ++ if (_postdiv2 > 7) ++ return -EINVAL; + -+extern int cryptodev_verbosity; ++ fout *= _postdiv1 * _postdiv2; ++ } else { ++ _postdiv1 = 1; ++ _postdiv2 = 1; ++ } + -+struct fcrypt { -+ struct list_head list; -+ struct list_head dma_map_list; -+ struct mutex sem; -+}; ++ for (_refdiv = min_refdiv; _refdiv <= max_refdiv; _refdiv++) { ++ u64 tmp, frac_rate; + -+/* compatibility stuff */ -+#ifdef CONFIG_COMPAT -+#include ++ if (fin % _refdiv) ++ continue; + -+/* input of CIOCGSESSION */ -+struct compat_session_op { -+ /* Specify either cipher or mac -+ */ -+ uint32_t cipher; /* cryptodev_crypto_op_t */ -+ uint32_t mac; /* cryptodev_crypto_op_t */ ++ tmp = (u64)fout * _refdiv; ++ do_div(tmp, fin); ++ _fbdiv = tmp; ++ if (_fbdiv < 10 || _fbdiv > 1600) ++ continue; + -+ uint32_t keylen; -+ compat_uptr_t key; /* pointer to key data */ -+ uint32_t mackeylen; -+ compat_uptr_t mackey; /* pointer to mac key data */ ++ tmp = (u64)_fbdiv * fin; ++ do_div(tmp, _refdiv); ++ if (fout < MIN_FVCO_RATE || fout > MAX_FVCO_RATE) ++ continue; + -+ uint32_t ses; /* session identifier */ -+}; ++ frac_rate = fout - tmp; + -+/* input of CIOCCRYPT */ -+struct compat_crypt_op { -+ uint32_t ses; /* session identifier */ -+ uint16_t op; /* COP_ENCRYPT or COP_DECRYPT */ -+ uint16_t flags; /* see COP_FLAG_* */ -+ uint32_t len; /* length of source data */ -+ compat_uptr_t src; /* source data */ -+ compat_uptr_t dst; /* pointer to output data */ -+ compat_uptr_t mac;/* pointer to output data for hash/MAC operations */ -+ compat_uptr_t iv;/* initialization vector for encryption operations */ -+}; ++ if (frac_rate) { ++ tmp = (u64)frac_rate * _refdiv; ++ tmp <<= 24; ++ do_div(tmp, fin); ++ _frac = tmp; ++ _dsmpd = 0; ++ } + -+/* input of COMPAT_CIOCAUTHCRYPT */ -+struct compat_crypt_auth_op { -+ uint32_t ses; /* session identifier */ -+ uint16_t op; /* COP_ENCRYPT or COP_DECRYPT */ -+ uint16_t flags; /* see COP_FLAG_AEAD_* */ -+ uint32_t len; /* length of source data */ -+ uint32_t auth_len; /* length of auth data */ -+ compat_uptr_t auth_src; /* authenticated-only data */ ++ break; ++ } + -+ /* The current implementation is more efficient if data are -+ * encrypted in-place (src== dst). ++ /* ++ * If DSMPD = 1 (DSM is disabled, "integer mode") ++ * FOUTVCO = FREF / REFDIV * FBDIV ++ * FOUTPOSTDIV = FOUTVCO / POSTDIV1 / POSTDIV2 ++ * ++ * If DSMPD = 0 (DSM is enabled, "fractional mode") ++ * FOUTVCO = FREF / REFDIV * (FBDIV + FRAC / 2^24) ++ * FOUTPOSTDIV = FOUTVCO / POSTDIV1 / POSTDIV2 + */ -+ compat_uptr_t src; /* data to be encrypted and authenticated */ -+ compat_uptr_t dst; /* pointer to output data. Must have -+ * space for tag. For TLS this should be at least -+ * len + tag_size + block_size for padding -+ */ ++ foutvco = fin * _fbdiv; ++ do_div(foutvco, _refdiv); + -+ compat_uptr_t tag; /* where the tag will be copied to. TLS mode -+ * doesn't use that as tag is copied to dst. -+ * SRTP mode copies tag there. -+ */ -+ uint32_t tag_len; /* the length of the tag. Use zero for digest size or max tag. */ ++ if (!_dsmpd) { ++ u64 frac_rate = (u64)fin * _frac; + -+ /* initialization vector for encryption operations */ -+ compat_uptr_t iv; -+ uint32_t iv_len; -+}; ++ do_div(frac_rate, _refdiv); ++ foutvco += frac_rate >> 24; ++ } + -+/* compat ioctls, defined for the above structs */ -+#define COMPAT_CIOCGSESSION _IOWR('c', 102, struct compat_session_op) -+#define COMPAT_CIOCCRYPT _IOWR('c', 104, struct compat_crypt_op) -+#define COMPAT_CIOCASYNCCRYPT _IOW('c', 107, struct compat_crypt_op) -+#define COMPAT_CIOCASYNCFETCH _IOR('c', 108, struct compat_crypt_op) ++ foutpostdiv = foutvco; ++ do_div(foutpostdiv, _postdiv1); ++ do_div(foutpostdiv, _postdiv2); + -+#define COMPAT_CIOCAUTHCRYPT _IOWR('c', 109, struct compat_crypt_auth_op) ++ if (refdiv) ++ *refdiv = _refdiv; ++ if (fbdiv) ++ *fbdiv = _fbdiv; ++ if (postdiv1) ++ *postdiv1 = _postdiv1; ++ if (postdiv2) ++ *postdiv2 = _postdiv2; ++ if (frac) ++ *frac = _frac; ++ if (dsmpd) ++ *dsmpd = _dsmpd; ++ if (bypass) ++ *bypass = false; + -+#endif /* CONFIG_COMPAT */ ++ return (unsigned long)foutpostdiv; ++} + -+/* kernel-internal extension to struct crypt_op */ -+struct kernel_crypt_op { -+ struct crypt_op cop; ++static long ++clk_regmap_pll_round_rate(struct clk_hw *hw, unsigned long drate, ++ unsigned long *prate) ++{ ++ struct clk_regmap_pll *pll = to_clk_regmap_pll(hw); ++ long rate; + -+ int ivlen; -+ __u8 iv[EALG_MAX_BLOCK_LEN]; ++ rate = clk_pll_round_rate(*prate, drate, NULL, NULL, NULL, NULL, NULL, ++ NULL, NULL); + -+ int digestsize; -+ uint8_t hash_output[AALG_MAX_RESULT_LEN]; ++ dev_dbg(pll->dev, "%s: prate=%ld, drate=%ld, rate=%ld\n", ++ clk_hw_get_name(hw), *prate, drate, rate); + -+ struct task_struct *task; -+ struct mm_struct *mm; -+}; ++ return rate; ++} + -+struct kernel_crypt_auth_op { -+ struct crypt_auth_op caop; ++static int ++clk_regmap_pll_set_rate(struct clk_hw *hw, unsigned long drate, ++ unsigned long prate) ++{ ++ struct clk_regmap_pll *pll = to_clk_regmap_pll(hw); ++ u8 refdiv, postdiv1, postdiv2, dsmpd, bypass; ++ u16 fbdiv; ++ u32 frac; ++ long rate; + -+ int dst_len; /* based on src_len + pad + tag */ -+ int ivlen; -+ __u8 iv[EALG_MAX_BLOCK_LEN]; ++ rate = clk_pll_round_rate(prate, drate, &refdiv, &fbdiv, &postdiv1, ++ &postdiv2, &frac, &dsmpd, &bypass); ++ if (rate < 0) ++ return rate; + -+ struct task_struct *task; -+ struct mm_struct *mm; -+}; ++ dev_dbg(pll->dev, "%s: rate=%ld, bypass=%d\n", ++ clk_hw_get_name(hw), drate, bypass); + -+/* auth */ -+#ifdef CONFIG_COMPAT -+int compat_kcaop_to_user(struct kernel_crypt_auth_op *kcaop, -+ struct fcrypt *fcr, void __user *arg); -+int compat_kcaop_from_user(struct kernel_crypt_auth_op *kcaop, -+ struct fcrypt *fcr, void __user *arg); -+#endif /* CONFIG_COMPAT */ -+int cryptodev_kcaop_from_user(struct kernel_crypt_auth_op *kcop, -+ struct fcrypt *fcr, void __user *arg); -+int cryptodev_kcaop_to_user(struct kernel_crypt_auth_op *kcaop, -+ struct fcrypt *fcr, void __user *arg); -+int crypto_auth_run(struct fcrypt *fcr, struct kernel_crypt_auth_op *kcaop); -+int crypto_run(struct fcrypt *fcr, struct kernel_crypt_op *kcop); ++ if (bypass) { ++ regmap_write(pll->regmap, pll->reg + PLLCON_OFFSET(0), ++ PLL_BYPASS(1)); ++ } else { ++ regmap_write(pll->regmap, pll->reg + PLLCON_OFFSET(0), ++ PLL_BYPASS(0) | PLL_POSTDIV1(postdiv1) | ++ PLL_FBDIV(fbdiv)); ++ regmap_write(pll->regmap, pll->reg + PLLCON_OFFSET(1), ++ HIWORD_UPDATE(dsmpd, pll->dsmpd_shift, pll->dsmpd_shift) | ++ PLL_POSTDIV2(postdiv2) | PLL_REFDIV(refdiv)); ++ regmap_write(pll->regmap, pll->reg + PLLCON_OFFSET(2), ++ PLL_FRAC(frac)); + -+#include "cryptlib.h" ++ dev_dbg(pll->dev, "refdiv=%d, fbdiv=%d, frac=%d\n", ++ refdiv, fbdiv, frac); ++ dev_dbg(pll->dev, "postdiv1=%d, postdiv2=%d\n", ++ postdiv1, postdiv2); ++ } + -+/* other internal structs */ -+struct csession { -+ struct list_head entry; -+ struct mutex sem; -+ struct cipher_data cdata; -+ struct hash_data hdata; -+ uint32_t sid; -+ uint32_t alignmask; ++ return 0; ++} + -+ unsigned int array_size; -+ unsigned int used_pages; /* the number of pages that are used */ -+ /* the number of pages marked as NOT-writable; they preceed writeables */ -+ unsigned int readonly_pages; -+ struct page **pages; -+ struct scatterlist *sg; -+}; ++static int clk_regmap_pll_prepare(struct clk_hw *hw) ++{ ++ struct clk_regmap_pll *pll = to_clk_regmap_pll(hw); ++ u32 v; ++ int ret; + -+struct csession *crypto_get_session_by_sid(struct fcrypt *fcr, uint32_t sid); -+int -+crypto_get_sessions_by_sid(struct fcrypt *fcr, -+ uint32_t sid_1, struct csession **ses_ptr_1, -+ uint32_t sid_2, struct csession **ses_ptr_2); ++ regmap_write(pll->regmap, pll->reg + PLLCON_OFFSET(1), ++ HIWORD_UPDATE(0, pll->pd_shift, pll->pd_shift)); + -+static inline void crypto_put_session(struct csession *ses_ptr) ++ ret = regmap_read_poll_timeout(pll->regmap, ++ pll->reg + PLLCON_OFFSET(1), ++ v, v & BIT(pll->lock_shift), 50, 50000); ++ if (ret) ++ dev_err(pll->dev, "%s is not lock\n", clk_hw_get_name(hw)); ++ ++ return 0; ++} ++ ++static void clk_regmap_pll_unprepare(struct clk_hw *hw) +{ -+ mutex_unlock(&ses_ptr->sem); ++ struct clk_regmap_pll *pll = to_clk_regmap_pll(hw); ++ ++ regmap_write(pll->regmap, pll->reg + PLLCON_OFFSET(1), ++ HIWORD_UPDATE(1, pll->pd_shift, pll->pd_shift)); +} -+int cryptodev_adjust_sg_array(struct csession *ses, int pagecount); + -+#endif /* CRYPTODEV_INT_H */ -diff --git a/drivers/crypto/rockchip/cryptodev_linux/ioctl.c b/drivers/crypto/rockchip/cryptodev_linux/ioctl.c ++static int clk_regmap_pll_is_prepared(struct clk_hw *hw) ++{ ++ struct clk_regmap_pll *pll = to_clk_regmap_pll(hw); ++ unsigned int con1; ++ ++ regmap_read(pll->regmap, pll->reg + PLLCON_OFFSET(1), &con1); ++ ++ return !(con1 & BIT(pll->pd_shift)); ++} ++ ++static const struct clk_ops clk_regmap_pll_ops = { ++ .recalc_rate = clk_regmap_pll_recalc_rate, ++ .round_rate = clk_regmap_pll_round_rate, ++ .set_rate = clk_regmap_pll_set_rate, ++ .prepare = clk_regmap_pll_prepare, ++ .unprepare = clk_regmap_pll_unprepare, ++ .is_prepared = clk_regmap_pll_is_prepared, ++}; ++ ++struct clk * ++devm_clk_regmap_register_pll(struct device *dev, const char *name, ++ const char *parent_name, ++ struct regmap *regmap, u32 reg, u8 pd_shift, ++ u8 dsmpd_shift, u8 lock_shift, ++ unsigned long flags) ++{ ++ struct clk_regmap_pll *pll; ++ struct clk_init_data init = {}; ++ ++ pll = devm_kzalloc(dev, sizeof(*pll), GFP_KERNEL); ++ if (!pll) ++ return ERR_PTR(-ENOMEM); ++ ++ init.name = name; ++ init.ops = &clk_regmap_pll_ops; ++ init.flags = flags; ++ init.parent_names = (parent_name ? &parent_name : NULL); ++ init.num_parents = (parent_name ? 1 : 0); ++ ++ pll->dev = dev; ++ pll->regmap = regmap; ++ pll->reg = reg; ++ pll->pd_shift = pd_shift; ++ pll->dsmpd_shift = dsmpd_shift; ++ pll->lock_shift = lock_shift; ++ pll->hw.init = &init; ++ ++ return devm_clk_register(dev, &pll->hw); ++} ++EXPORT_SYMBOL_GPL(devm_clk_regmap_register_pll); +diff --git a/drivers/clk/rockchip/regmap/clk-regmap.h b/drivers/clk/rockchip/regmap/clk-regmap.h new file mode 100644 -index 000000000..032b016e4 +index 000000000..4626e1982 --- /dev/null -+++ b/drivers/crypto/rockchip/cryptodev_linux/ioctl.c -@@ -0,0 +1,1329 @@ ++++ b/drivers/clk/rockchip/regmap/clk-regmap.h +@@ -0,0 +1,308 @@ +/* -+ * Driver for /dev/crypto device (aka CryptoDev) -+ * -+ * Copyright (c) 2004 Michal Ludvig , SuSE Labs -+ * Copyright (c) 2009,2010,2011 Nikos Mavrogiannopoulos -+ * Copyright (c) 2010 Phil Sutter -+ * -+ * This file is part of linux cryptodev. ++ * Copyright (c) 2017 Rockchip Electronics Co. Ltd. + * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version 2 -+ * of the License, or (at your option) any later version. ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., -+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+/* -+ * Device /dev/crypto provides an interface for -+ * accessing kernel CryptoAPI algorithms (ciphers, -+ * hashes) from userspace programs. -+ * -+ * /dev/crypto interface was originally introduced in -+ * OpenBSD and this module attempts to keep the API. -+ * + */ + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include ++#ifndef __CLK_REGMAP_H__ ++#define __CLK_REGMAP_H__ + -+#include "cryptodev.h" -+#include "zc.h" -+#include "version.h" -+#include "cipherapi.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+#include "rk_cryptodev.h" ++#define UPDATE(x, h, l) (((x) << (l)) & GENMASK((h), (l))) ++#define HIWORD_UPDATE(v, h, l) (((v) << (l)) | (GENMASK((h), (l)) << 16)) + -+MODULE_AUTHOR("Nikos Mavrogiannopoulos "); -+MODULE_DESCRIPTION("CryptoDev driver"); -+MODULE_LICENSE("GPL"); ++struct clk_pll_data { ++ unsigned int id; ++ const char *name; ++ const char *parent_name; ++ u32 reg; ++ u8 pd_shift; ++ u8 dsmpd_shift; ++ u8 lock_shift; ++ unsigned long flags; ++}; + -+/* ====== Compile-time config ====== */ ++#define PLL(_id, _name, _parent_name, _reg, _pd_shift, _dsmpd_shift, \ ++ _lock_shift, _flags) \ ++{ \ ++ .id = _id, \ ++ .name = _name, \ ++ .parent_name = _parent_name, \ ++ .reg = _reg, \ ++ .pd_shift = _pd_shift, \ ++ .dsmpd_shift = _dsmpd_shift, \ ++ .lock_shift = _lock_shift, \ ++ .flags = _flags, \ ++} + -+/* Default (pre-allocated) and maximum size of the job queue. -+ * These are free, pending and done items all together. */ -+#define DEF_COP_RINGSIZE 16 -+#define MAX_COP_RINGSIZE 64 ++#define RK618_PLL(_id, _name, _parent_name, _reg, _flags) \ ++ PLL(_id, _name, _parent_name, _reg, 10, 9, 15, _flags) + -+/* ====== Module parameters ====== */ ++struct clk_mux_data { ++ unsigned int id; ++ const char *name; ++ const char *const *parent_names; ++ u8 num_parents; ++ u32 reg; ++ u8 shift; ++ u8 width; ++ unsigned long flags; ++}; + -+int cryptodev_verbosity; -+module_param(cryptodev_verbosity, int, 0644); -+MODULE_PARM_DESC(cryptodev_verbosity, "0: normal, 1: verbose, 2: debug"); ++#define MUX(_id, _name, _parent_names, _reg, _shift, _width, _flags) \ ++{ \ ++ .id = _id, \ ++ .name = _name, \ ++ .parent_names = _parent_names, \ ++ .num_parents = ARRAY_SIZE(_parent_names), \ ++ .reg = _reg, \ ++ .shift = _shift, \ ++ .width = _width, \ ++ .flags = _flags, \ ++} + -+/* ====== CryptoAPI ====== */ -+struct todo_list_item { -+ struct list_head __hook; -+ struct kernel_crypt_op kcop; -+ int result; ++struct clk_gate_data { ++ unsigned int id; ++ const char *name; ++ const char *parent_name; ++ u32 reg; ++ u8 shift; ++ unsigned long flags; +}; + -+struct locked_list { -+ struct list_head list; -+ struct mutex lock; -+}; ++#define GATE(_id, _name, _parent_name, _reg, _shift, _flags) \ ++{ \ ++ .id = _id, \ ++ .name = _name, \ ++ .parent_name = _parent_name, \ ++ .reg = _reg, \ ++ .shift = _shift, \ ++ .flags = _flags, \ ++} + -+struct crypt_priv { -+ struct fcrypt fcrypt; -+ struct locked_list free, todo, done; -+ int itemcount; -+ struct work_struct cryptask; -+ wait_queue_head_t user_waiter; ++struct clk_divider_data { ++ unsigned int id; ++ const char *name; ++ const char *parent_name; ++ u32 reg; ++ u8 shift; ++ u8 width; ++ unsigned long flags; +}; + -+#define FILL_SG(sg, ptr, len) \ -+ do { \ -+ (sg)->page = virt_to_page(ptr); \ -+ (sg)->offset = offset_in_page(ptr); \ -+ (sg)->length = len; \ -+ (sg)->dma_address = 0; \ -+ } while (0) ++#define DIV(_id, _name, _parent_name, _reg, _shift, _width, _flags) \ ++{ \ ++ .id = _id, \ ++ .name = _name, \ ++ .parent_name = _parent_name, \ ++ .reg = _reg, \ ++ .shift = _shift, \ ++ .width = _width, \ ++ .flags = _flags, \ ++} + -+/* cryptodev's own workqueue, keeps crypto tasks from disturbing the force */ -+static struct workqueue_struct *cryptodev_wq; -+static atomic_t cryptodev_sess = ATOMIC_INIT(1); ++struct clk_composite_data { ++ unsigned int id; ++ const char *name; ++ const char *const *parent_names; ++ u8 num_parents; ++ u32 mux_reg; ++ u8 mux_shift; ++ u8 mux_width; ++ u32 div_reg; ++ u8 div_shift; ++ u8 div_width; ++ u8 div_flags; ++ u32 gate_reg; ++ u8 gate_shift; ++ unsigned long flags; ++}; + -+/* Prepare session for future use. */ -+static int -+crypto_create_session(struct fcrypt *fcr, struct session_op *sop) -+{ -+ struct csession *ses_new = NULL, *ses_ptr; -+ int ret = 0; -+ const char *alg_name = NULL; -+ const char *hash_name = NULL; -+ int hmac_mode = 1, stream = 0, aead = 0; -+ /* -+ * With composite aead ciphers, only ckey is used and it can cover all the -+ * structure space; otherwise both keys may be used simultaneously but they -+ * are confined to their spaces -+ */ -+ struct { -+ uint8_t ckey[CRYPTO_CIPHER_MAX_KEY_LEN]; -+ uint8_t mkey[CRYPTO_HMAC_MAX_KEY_LEN]; -+ /* padding space for aead keys */ -+ uint8_t pad[RTA_SPACE(sizeof(struct crypto_authenc_key_param))]; -+ } keys; ++#define COMPOSITE(_id, _name, _parent_names, \ ++ _mux_reg, _mux_shift, _mux_width, \ ++ _div_reg, _div_shift, _div_width, \ ++ _gate_reg, _gate_shift, _flags) \ ++{ \ ++ .id = _id, \ ++ .name = _name, \ ++ .parent_names = _parent_names, \ ++ .num_parents = ARRAY_SIZE(_parent_names), \ ++ .mux_reg = _mux_reg, \ ++ .mux_shift = _mux_shift, \ ++ .mux_width = _mux_width, \ ++ .div_reg = _div_reg, \ ++ .div_shift = _div_shift, \ ++ .div_width = _div_width, \ ++ .div_flags = CLK_DIVIDER_HIWORD_MASK, \ ++ .gate_reg = _gate_reg, \ ++ .gate_shift = _gate_shift, \ ++ .flags = _flags, \ ++} + -+ /* Does the request make sense? */ -+ if (unlikely(!sop->cipher && !sop->mac)) { -+ ddebug(1, "Both 'cipher' and 'mac' unset."); -+ return -EINVAL; -+ } ++#define COMPOSITE_NOMUX(_id, _name, _parent_name, \ ++ _div_reg, _div_shift, _div_width, \ ++ _gate_reg, _gate_shift, _flags) \ ++{ \ ++ .id = _id, \ ++ .name = _name, \ ++ .parent_names = (const char *[]){ _parent_name }, \ ++ .num_parents = 1, \ ++ .div_reg = _div_reg, \ ++ .div_shift = _div_shift, \ ++ .div_width = _div_width, \ ++ .div_flags = CLK_DIVIDER_HIWORD_MASK, \ ++ .gate_reg = _gate_reg, \ ++ .gate_shift = _gate_shift, \ ++ .flags = _flags, \ ++} + -+ memset(&keys, 0x00, sizeof(keys)); ++#define COMPOSITE_NODIV(_id, _name, _parent_names, \ ++ _mux_reg, _mux_shift, _mux_width, \ ++ _gate_reg, _gate_shift, _flags) \ ++ COMPOSITE(_id, _name, _parent_names, \ ++ _mux_reg, _mux_shift, _mux_width, \ ++ 0, 0, 0, \ ++ _gate_reg, _gate_shift, _flags) + -+ switch (sop->cipher) { -+ case 0: -+ break; -+ case CRYPTO_DES_CBC: -+ alg_name = "cbc(des)"; -+ break; -+ case CRYPTO_3DES_CBC: -+ alg_name = "cbc(des3_ede)"; -+ break; -+ case CRYPTO_BLF_CBC: -+ alg_name = "cbc(blowfish)"; -+ break; -+ case CRYPTO_AES_CBC: -+ alg_name = "cbc(aes)"; -+ break; -+ case CRYPTO_AES_ECB: -+ alg_name = "ecb(aes)"; -+ break; -+ case CRYPTO_AES_XTS: -+ alg_name = "xts(aes)"; -+ break; -+ case CRYPTO_CAMELLIA_CBC: -+ alg_name = "cbc(camellia)"; -+ break; -+ case CRYPTO_AES_CTR: -+ alg_name = "ctr(aes)"; -+ stream = 1; -+ break; -+ case CRYPTO_AES_GCM: -+ alg_name = "gcm(aes)"; -+ stream = 1; -+ aead = 1; -+ break; -+ case CRYPTO_TLS11_AES_CBC_HMAC_SHA1: -+ alg_name = "tls11(hmac(sha1),cbc(aes))"; -+ stream = 0; -+ aead = 1; -+ break; -+ case CRYPTO_TLS12_AES_CBC_HMAC_SHA256: -+ alg_name = "tls12(hmac(sha256),cbc(aes))"; -+ stream = 0; -+ aead = 1; -+ break; -+ case CRYPTO_NULL: -+ alg_name = "ecb(cipher_null)"; -+ stream = 1; -+ break; -+ default: -+ alg_name = rk_get_cipher_name(sop->cipher, &stream, &aead); -+ if (!alg_name) { -+ ddebug(1, "bad cipher: %d", sop->cipher); -+ return -EINVAL; -+ } -+ break; -+ } ++#define COMPOSITE_FRAC(_id, _name, _parent_names, \ ++ _mux_reg, _mux_shift, _mux_width, \ ++ _div_reg, \ ++ _gate_reg, _gate_shift, _flags) \ ++{ \ ++ .id = _id, \ ++ .name = _name, \ ++ .parent_names = _parent_names, \ ++ .num_parents = ARRAY_SIZE(_parent_names), \ ++ .mux_reg = _mux_reg, \ ++ .mux_shift = _mux_shift, \ ++ .mux_width = _mux_width, \ ++ .div_reg = _div_reg, \ ++ .gate_reg = _gate_reg, \ ++ .gate_shift = _gate_shift, \ ++ .flags = _flags, \ ++} + -+ switch (sop->mac) { -+ case 0: -+ break; -+ case CRYPTO_MD5_HMAC: -+ hash_name = "hmac(md5)"; -+ break; -+ case CRYPTO_RIPEMD160_HMAC: -+ hash_name = "hmac(rmd160)"; -+ break; -+ case CRYPTO_SHA1_HMAC: -+ hash_name = "hmac(sha1)"; -+ break; -+ case CRYPTO_SHA2_224_HMAC: -+ hash_name = "hmac(sha224)"; -+ break; ++#define COMPOSITE_FRAC_NOMUX(_id, _name, _parent_name, \ ++ _div_reg, \ ++ _gate_reg, _gate_shift, _flags) \ ++{ \ ++ .id = _id, \ ++ .name = _name, \ ++ .parent_names = (const char *[]){ _parent_name }, \ ++ .num_parents = 1, \ ++ .div_reg = _div_reg, \ ++ .gate_reg = _gate_reg, \ ++ .gate_shift = _gate_shift, \ ++ .flags = _flags, \ ++} + -+ case CRYPTO_SHA2_256_HMAC: -+ hash_name = "hmac(sha256)"; -+ break; -+ case CRYPTO_SHA2_384_HMAC: -+ hash_name = "hmac(sha384)"; -+ break; -+ case CRYPTO_SHA2_512_HMAC: -+ hash_name = "hmac(sha512)"; -+ break; ++#define COMPOSITE_FRAC_NOGATE(_id, _name, _parent_names, \ ++ _mux_reg, _mux_shift, _mux_width, \ ++ _div_reg, \ ++ _flags) \ ++ COMPOSITE_FRAC(_id, _name, _parent_names, \ ++ _mux_reg, _mux_shift, _mux_width, \ ++ _div_reg, 0, 0, _flags) + -+ /* non-hmac cases */ -+ case CRYPTO_MD5: -+ hash_name = "md5"; -+ hmac_mode = 0; -+ break; -+ case CRYPTO_RIPEMD160: -+ hash_name = "rmd160"; -+ hmac_mode = 0; -+ break; -+ case CRYPTO_SHA1: -+ hash_name = "sha1"; -+ hmac_mode = 0; -+ break; -+ case CRYPTO_SHA2_224: -+ hash_name = "sha224"; -+ hmac_mode = 0; -+ break; -+ case CRYPTO_SHA2_256: -+ hash_name = "sha256"; -+ hmac_mode = 0; -+ break; -+ case CRYPTO_SHA2_384: -+ hash_name = "sha384"; -+ hmac_mode = 0; -+ break; -+ case CRYPTO_SHA2_512: -+ hash_name = "sha512"; -+ hmac_mode = 0; -+ break; -+ default: -+ hash_name = rk_get_hash_name(sop->mac, &hmac_mode); -+ if (!hash_name) { -+ ddebug(1, "bad mac: %d", sop->mac); -+ return -EINVAL; -+ } -+ break; -+ } ++struct clk_regmap_fractional_divider { ++ struct clk_hw hw; ++ struct device *dev; ++ struct regmap *regmap; ++ u32 reg; ++ u8 mshift; ++ u8 mwidth; ++ u32 mmask; ++ u8 nshift; ++ u8 nwidth; ++ u32 nmask; ++}; + -+ /* Create a session and put it to the list. Zeroing the structure helps -+ * also with a single exit point in case of errors */ -+ ses_new = kzalloc(sizeof(*ses_new), GFP_KERNEL); -+ if (!ses_new) -+ return -ENOMEM; ++struct clk_regmap_divider { ++ struct clk_hw hw; ++ struct device *dev; ++ struct regmap *regmap; ++ u32 reg; ++ u8 shift; ++ u8 width; ++}; + -+ /* Set-up crypto transform. */ -+ if (alg_name) { -+ unsigned int keylen; -+ ret = cryptodev_get_cipher_keylen(&keylen, sop, aead); -+ if (unlikely(ret < 0)) { -+ ddebug(1, "Setting key failed for %s-%zu.", -+ alg_name, (size_t)sop->keylen*8); -+ goto session_error; -+ } ++struct clk_regmap_gate { ++ struct clk_hw hw; ++ struct device *dev; ++ struct regmap *regmap; ++ u32 reg; ++ u8 shift; ++}; + -+ ret = cryptodev_get_cipher_key(keys.ckey, sop, aead); -+ if (unlikely(ret < 0)) -+ goto session_error; ++struct clk_regmap_mux { ++ struct clk_hw hw; ++ struct device *dev; ++ struct regmap *regmap; ++ u32 reg; ++ u32 mask; ++ u8 shift; ++}; + -+ ret = cryptodev_cipher_init(&ses_new->cdata, alg_name, keys.ckey, -+ keylen, stream, aead); -+ if (ret < 0) { -+ ddebug(1, "Failed to load cipher for %s", alg_name); -+ goto session_error; -+ } -+ } ++extern const struct clk_ops clk_regmap_mux_ops; ++extern const struct clk_ops clk_regmap_divider_ops; ++extern const struct clk_ops clk_regmap_gate_ops; ++extern const struct clk_ops clk_regmap_fractional_divider_ops; + -+ if (hash_name && aead == 0) { -+ if (unlikely(sop->mackeylen > CRYPTO_HMAC_MAX_KEY_LEN)) { -+ ddebug(1, "Setting key failed for %s-%zu.", -+ hash_name, (size_t)sop->mackeylen*8); -+ ret = -EINVAL; -+ goto session_error; -+ } ++struct clk * ++devm_clk_regmap_register_pll(struct device *dev, const char *name, ++ const char *parent_name, ++ struct regmap *regmap, u32 reg, u8 pd_shift, ++ u8 dsmpd_shift, u8 lock_shift, ++ unsigned long flags); + -+ if (sop->mackey && unlikely(copy_from_user(keys.mkey, sop->mackey, -+ sop->mackeylen))) { -+ ret = -EFAULT; -+ goto session_error; -+ } ++struct clk * ++devm_clk_regmap_register_mux(struct device *dev, const char *name, ++ const char * const *parent_names, u8 num_parents, ++ struct regmap *regmap, u32 reg, u8 shift, u8 width, ++ unsigned long flags); + -+ ret = cryptodev_hash_init(&ses_new->hdata, hash_name, hmac_mode, -+ keys.mkey, sop->mackeylen); -+ if (ret != 0) { -+ ddebug(1, "Failed to load hash for %s", hash_name); -+ goto session_error; -+ } ++struct clk * ++devm_clk_regmap_register_divider(struct device *dev, const char *name, ++ const char *parent_name, struct regmap *regmap, ++ u32 reg, u8 shift, u8 width, ++ unsigned long flags); + -+ ret = cryptodev_hash_reset(&ses_new->hdata); -+ if (ret != 0) { -+ goto session_error; -+ } -+ } ++struct clk * ++devm_clk_regmap_register_gate(struct device *dev, const char *name, ++ const char *parent_name, ++ struct regmap *regmap, u32 reg, u8 shift, ++ unsigned long flags); + -+ ses_new->alignmask = max(ses_new->cdata.alignmask, -+ ses_new->hdata.alignmask); -+ ddebug(2, "got alignmask %d", ses_new->alignmask); ++struct clk * ++devm_clk_regmap_register_fractional_divider(struct device *dev, ++ const char *name, ++ const char *parent_name, ++ struct regmap *regmap, ++ u32 reg, unsigned long flags); + -+ ses_new->array_size = DEFAULT_PREALLOC_PAGES; -+ ddebug(2, "preallocating for %d user pages", ses_new->array_size); -+ ses_new->pages = kzalloc(ses_new->array_size * -+ sizeof(struct page *), GFP_KERNEL); -+ ses_new->sg = kzalloc(ses_new->array_size * -+ sizeof(struct scatterlist), GFP_KERNEL); -+ if (ses_new->sg == NULL || ses_new->pages == NULL) { -+ ddebug(0, "Memory error"); -+ ret = -ENOMEM; -+ goto session_error; -+ } ++struct clk * ++devm_clk_regmap_register_composite(struct device *dev, const char *name, ++ const char *const *parent_names, ++ u8 num_parents, struct regmap *regmap, ++ u32 mux_reg, u8 mux_shift, u8 mux_width, ++ u32 div_reg, u8 div_shift, u8 div_width, ++ u8 div_flags, ++ u32 gate_reg, u8 gate_shift, ++ unsigned long flags); + -+ /* Non-multithreaded can only create one session */ -+ if (!rk_cryptodev_multi_thread(NULL) && -+ !atomic_dec_and_test(&cryptodev_sess)) { -+ atomic_inc(&cryptodev_sess); -+ ddebug(2, "Non-multithreaded can only create one session. sess = %d", -+ atomic_read(&cryptodev_sess)); -+ ret = -EBUSY; -+ goto session_error; -+ } ++#endif +diff --git a/drivers/clk/rockchip/regmap/clk-rk618.c b/drivers/clk/rockchip/regmap/clk-rk618.c +new file mode 100644 +index 000000000..c780f502b +--- /dev/null ++++ b/drivers/clk/rockchip/regmap/clk-rk618.c +@@ -0,0 +1,408 @@ ++/* ++ * Copyright (c) 2017 Rockchip Electronics Co. Ltd. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ + -+ /* put the new session to the list */ -+ get_random_bytes(&ses_new->sid, sizeof(ses_new->sid)); -+ mutex_init(&ses_new->sem); ++#include ++#include ++#include ++#include ++#include + -+ mutex_lock(&fcr->sem); -+restart: -+ list_for_each_entry(ses_ptr, &fcr->list, entry) { -+ /* Check for duplicate SID */ -+ if (unlikely(ses_new->sid == ses_ptr->sid)) { -+ get_random_bytes(&ses_new->sid, sizeof(ses_new->sid)); -+ /* Unless we have a broken RNG this -+ shouldn't loop forever... ;-) */ -+ goto restart; -+ } -+ } ++#include "clk-regmap.h" + -+ list_add(&ses_new->entry, &fcr->list); -+ mutex_unlock(&fcr->sem); ++#define RK618_CRU_CLKSEL0 0x0058 ++#define RK618_CRU_CLKSEL1 0x005c ++#define RK618_CRU_CLKSEL2 0x0060 ++#define RK618_CRU_CLKSEL3 0x0064 ++#define RK618_CRU_PLL0_CON0 0x0068 ++#define RK618_CRU_PLL0_CON1 0x006c ++#define RK618_CRU_PLL0_CON2 0x0070 ++#define RK618_CRU_PLL1_CON0 0x0074 ++#define RK618_CRU_PLL1_CON1 0x0078 ++#define RK618_CRU_PLL1_CON2 0x007c + -+ /* Fill in some values for the user. */ -+ sop->ses = ses_new->sid; -+ return 0; ++enum { ++ LCDC0_CLK = 1, ++ LCDC1_CLK, ++ VIF_PLLIN_CLK, ++ SCALER_PLLIN_CLK, ++ VIF_PLL_CLK, ++ SCALER_PLL_CLK, ++ VIF0_CLK, ++ VIF1_CLK, ++ SCALER_IN_CLK, ++ SCALER_CLK, ++ DITHER_CLK, ++ HDMI_CLK, ++ MIPI_CLK, ++ LVDS_CLK, ++ LVTTL_CLK, ++ RGB_CLK, ++ VIF0_PRE_CLK, ++ VIF1_PRE_CLK, ++ CODEC_CLK, ++ NR_CLKS, ++}; + -+ /* We count on ses_new to be initialized with zeroes -+ * Since hdata and cdata are embedded within ses_new, it follows that -+ * hdata->init and cdata->init are either zero or one as they have been -+ * initialized or not */ -+session_error: -+ cryptodev_hash_deinit(&ses_new->hdata); -+ cryptodev_cipher_deinit(&ses_new->cdata); -+ kfree(ses_new->sg); -+ kfree(ses_new->pages); -+ kfree(ses_new); -+ return ret; -+} ++struct rk618_cru { ++ struct device *dev; ++ struct rk618 *parent; ++ struct regmap *regmap; + -+/* Everything that needs to be done when removing a session. */ -+static inline void -+crypto_destroy_session(struct csession *ses_ptr) -+{ -+ if (!mutex_trylock(&ses_ptr->sem)) { -+ ddebug(2, "Waiting for semaphore of sid=0x%08X", ses_ptr->sid); -+ mutex_lock(&ses_ptr->sem); -+ } -+ ddebug(2, "Removed session 0x%08X", ses_ptr->sid); -+ cryptodev_cipher_deinit(&ses_ptr->cdata); -+ cryptodev_hash_deinit(&ses_ptr->hdata); -+ ddebug(2, "freeing space for %d user pages", ses_ptr->array_size); -+ kfree(ses_ptr->pages); -+ kfree(ses_ptr->sg); -+ mutex_unlock(&ses_ptr->sem); -+ mutex_destroy(&ses_ptr->sem); -+ kfree(ses_ptr); ++ struct clk_onecell_data clk_data; ++}; + -+ /* Non-multithreaded can only create one session */ -+ if (!rk_cryptodev_multi_thread(NULL)) { -+ atomic_inc(&cryptodev_sess); -+ ddebug(2, "Release cryptodev_sess = %d", atomic_read(&cryptodev_sess)); -+ } -+} ++static char clkin_name[32] = "dummy"; ++static char lcdc0_dclkp_name[32] = "dummy"; ++static char lcdc1_dclkp_name[32] = "dummy"; + -+/* Look up a session by ID and remove. */ -+static int -+crypto_finish_session(struct fcrypt *fcr, uint32_t sid) -+{ -+ struct csession *tmp, *ses_ptr; -+ struct list_head *head; -+ int ret = 0; ++#define PNAME(x) static const char *const x[] + -+ mutex_lock(&fcr->sem); -+ head = &fcr->list; -+ list_for_each_entry_safe(ses_ptr, tmp, head, entry) { -+ if (ses_ptr->sid == sid) { -+ list_del(&ses_ptr->entry); -+ crypto_destroy_session(ses_ptr); -+ break; -+ } -+ } ++PNAME(mux_pll_in_p) = { "lcdc0_clk", "lcdc1_clk", clkin_name }; ++PNAME(mux_pll_src_p) = { "vif_pll_clk", "scaler_pll_clk", }; ++PNAME(mux_scaler_in_src_p) = { "vif0_clk", "vif1_clk" }; ++PNAME(mux_hdmi_src_p) = { "vif1_clk", "scaler_clk", "vif0_clk" }; ++PNAME(mux_dither_src_p) = { "vif0_clk", "scaler_clk" }; ++PNAME(mux_vif0_src_p) = { "vif0_pre_clk", lcdc0_dclkp_name }; ++PNAME(mux_vif1_src_p) = { "vif1_pre_clk", lcdc1_dclkp_name }; ++PNAME(mux_codec_src_p) = { "codec_pre_clk", clkin_name }; + -+ if (unlikely(!ses_ptr)) { -+ derr(1, "Session with sid=0x%08X not found!", sid); -+ ret = -ENOENT; -+ } -+ mutex_unlock(&fcr->sem); ++/* Two PLL, one for dual datarate input logic, the other for scaler */ ++static const struct clk_pll_data rk618_clk_plls[] = { ++ RK618_PLL(VIF_PLL_CLK, "vif_pll_clk", "vif_pllin_clk", ++ RK618_CRU_PLL0_CON0, ++ 0), ++ RK618_PLL(SCALER_PLL_CLK, "scaler_pll_clk", "scaler_pllin_clk", ++ RK618_CRU_PLL1_CON0, ++ 0), ++}; + -+ return ret; -+} ++static const struct clk_mux_data rk618_clk_muxes[] = { ++ MUX(VIF_PLLIN_CLK, "vif_pllin_clk", mux_pll_in_p, ++ RK618_CRU_CLKSEL0, 6, 2, ++ 0), ++ MUX(SCALER_PLLIN_CLK, "scaler_pllin_clk", mux_pll_in_p, ++ RK618_CRU_CLKSEL0, 8, 2, ++ 0), ++ MUX(SCALER_IN_CLK, "scaler_in_clk", mux_scaler_in_src_p, ++ RK618_CRU_CLKSEL3, 15, 1, ++ 0), ++ MUX(DITHER_CLK, "dither_clk", mux_dither_src_p, ++ RK618_CRU_CLKSEL3, 14, 1, ++ 0), ++ MUX(VIF0_CLK, "vif0_clk", mux_vif0_src_p, ++ RK618_CRU_CLKSEL3, 1, 1, ++ CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT), ++ MUX(VIF1_CLK, "vif1_clk", mux_vif1_src_p, ++ RK618_CRU_CLKSEL3, 7, 1, ++ CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT), ++ MUX(CODEC_CLK, "codec_clk", mux_codec_src_p, ++ RK618_CRU_CLKSEL1, 1, 1, ++ CLK_SET_RATE_PARENT), ++}; + -+/* Remove all sessions when closing the file */ -+static int -+crypto_finish_all_sessions(struct fcrypt *fcr) -+{ -+ struct csession *tmp, *ses_ptr; -+ struct list_head *head; ++static const struct clk_divider_data rk618_clk_dividers[] = { ++ DIV(LCDC0_CLK, "lcdc0_clk", lcdc0_dclkp_name, ++ RK618_CRU_CLKSEL0, 0, 3, ++ 0), ++ DIV(LCDC1_CLK, "lcdc1_clk", lcdc1_dclkp_name, ++ RK618_CRU_CLKSEL0, 3, 3, ++ 0), ++}; + -+ mutex_lock(&fcr->sem); ++static const struct clk_gate_data rk618_clk_gates[] = { ++ GATE(MIPI_CLK, "mipi_clk", "dither_clk", ++ RK618_CRU_CLKSEL1, 10, ++ CLK_IGNORE_UNUSED), ++ GATE(LVDS_CLK, "lvds_clk", "dither_clk", ++ RK618_CRU_CLKSEL1, 9, ++ CLK_IGNORE_UNUSED), ++ GATE(LVTTL_CLK, "lvttl_clk", "dither_clk", ++ RK618_CRU_CLKSEL1, 12, ++ 0), ++ GATE(RGB_CLK, "rgb_clk", "dither_clk", ++ RK618_CRU_CLKSEL1, 11, ++ 0), ++}; + -+ head = &fcr->list; -+ list_for_each_entry_safe(ses_ptr, tmp, head, entry) { -+ list_del(&ses_ptr->entry); -+ crypto_destroy_session(ses_ptr); -+ } -+ mutex_unlock(&fcr->sem); ++static const struct clk_composite_data rk618_clk_composites[] = { ++ COMPOSITE(SCALER_CLK, "scaler_clk", mux_pll_src_p, ++ RK618_CRU_CLKSEL1, 3, 1, ++ RK618_CRU_CLKSEL1, 5, 3, ++ RK618_CRU_CLKSEL1, 4, ++ CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT), ++ COMPOSITE_NODIV(HDMI_CLK, "hdmi_clk", mux_hdmi_src_p, ++ RK618_CRU_CLKSEL3, 12, 2, ++ RK618_CRU_CLKSEL1, 8, ++ 0), ++ COMPOSITE(VIF0_PRE_CLK, "vif0_pre_clk", mux_pll_src_p, ++ RK618_CRU_CLKSEL3, 0, 1, ++ RK618_CRU_CLKSEL3, 3, 3, ++ RK618_CRU_CLKSEL3, 2, ++ CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT), ++ COMPOSITE(VIF1_PRE_CLK, "vif1_pre_clk", mux_pll_src_p, ++ RK618_CRU_CLKSEL3, 6, 1, ++ RK618_CRU_CLKSEL3, 9, 3, ++ RK618_CRU_CLKSEL3, 8, ++ CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT), ++ COMPOSITE_FRAC_NOGATE(0, "codec_pre_clk", mux_pll_src_p, ++ RK618_CRU_CLKSEL1, 0, 1, ++ RK618_CRU_CLKSEL2, ++ 0), ++}; + -+ return 0; ++static void rk618_clk_add_lookup(struct rk618_cru *cru, struct clk *clk, ++ unsigned int id) ++{ ++ if (cru->clk_data.clks && id) ++ cru->clk_data.clks[id] = clk; +} + -+/* Look up session by session ID. The returned session is locked. */ -+struct csession * -+crypto_get_session_by_sid(struct fcrypt *fcr, uint32_t sid) ++static void rk618_clk_register_muxes(struct rk618_cru *cru) +{ -+ struct csession *ses_ptr, *retval = NULL; ++ struct clk *clk; ++ unsigned int i; + -+ if (unlikely(fcr == NULL)) -+ return NULL; ++ for (i = 0; i < ARRAY_SIZE(rk618_clk_muxes); i++) { ++ const struct clk_mux_data *data = &rk618_clk_muxes[i]; + -+ mutex_lock(&fcr->sem); -+ list_for_each_entry(ses_ptr, &fcr->list, entry) { -+ if (ses_ptr->sid == sid) { -+ mutex_lock(&ses_ptr->sem); -+ retval = ses_ptr; -+ break; ++ clk = devm_clk_regmap_register_mux(cru->dev, data->name, ++ data->parent_names, ++ data->num_parents, ++ cru->regmap, data->reg, ++ data->shift, data->width, ++ data->flags); ++ if (IS_ERR(clk)) { ++ dev_err(cru->dev, "failed to register clock %s\n", ++ data->name); ++ continue; + } -+ } -+ mutex_unlock(&fcr->sem); + -+ return retval; ++ rk618_clk_add_lookup(cru, clk, data->id); ++ } +} + -+static void mutex_lock_double(struct mutex *a, struct mutex *b) ++static void rk618_clk_register_dividers(struct rk618_cru *cru) +{ -+ if (b < a) -+ swap(a, b); -+ -+ mutex_lock(a); -+ mutex_lock_nested(b, SINGLE_DEPTH_NESTING); -+} ++ struct clk *clk; ++ unsigned int i; + -+int -+crypto_get_sessions_by_sid(struct fcrypt *fcr, -+ uint32_t sid_1, struct csession **ses_ptr_1, -+ uint32_t sid_2, struct csession **ses_ptr_2) -+{ -+ struct csession *ses_ptr; -+ int retval; ++ for (i = 0; i < ARRAY_SIZE(rk618_clk_dividers); i++) { ++ const struct clk_divider_data *data = &rk618_clk_dividers[i]; + -+ if (unlikely(fcr == NULL)) { -+ retval = -ENOENT; -+ goto out; -+ } ++ clk = devm_clk_regmap_register_divider(cru->dev, data->name, ++ data->parent_name, ++ cru->regmap, data->reg, ++ data->shift, data->width, ++ data->flags); ++ if (IS_ERR(clk)) { ++ dev_err(cru->dev, "failed to register clock %s\n", ++ data->name); ++ continue; ++ } + -+ if (sid_1 == sid_2) { -+ retval = -EDEADLK; -+ goto out; ++ rk618_clk_add_lookup(cru, clk, data->id); + } ++} + -+ mutex_lock(&fcr->sem); -+ -+ list_for_each_entry(ses_ptr, &fcr->list, entry) { -+ if (ses_ptr->sid == sid_1) -+ *ses_ptr_1 = ses_ptr; -+ else if (ses_ptr->sid == sid_2) -+ *ses_ptr_2 = ses_ptr; -+ } ++static void rk618_clk_register_gates(struct rk618_cru *cru) ++{ ++ struct clk *clk; ++ unsigned int i; + -+ if (*ses_ptr_1 && *ses_ptr_2) { -+ mutex_lock_double(&(*ses_ptr_1)->sem, &(*ses_ptr_2)->sem); -+ retval = 0; -+ } else { -+ retval = -ENOENT; -+ } ++ for (i = 0; i < ARRAY_SIZE(rk618_clk_gates); i++) { ++ const struct clk_gate_data *data = &rk618_clk_gates[i]; + -+ mutex_unlock(&fcr->sem); ++ clk = devm_clk_regmap_register_gate(cru->dev, data->name, ++ data->parent_name, ++ cru->regmap, ++ data->reg, data->shift, ++ data->flags); ++ if (IS_ERR(clk)) { ++ dev_err(cru->dev, "failed to register clock %s\n", ++ data->name); ++ continue; ++ } + -+out: -+ if (retval) { -+ *ses_ptr_1 = NULL; -+ *ses_ptr_2 = NULL; ++ rk618_clk_add_lookup(cru, clk, data->id); + } -+ return retval; +} + -+#ifdef CIOCCPHASH -+/* Copy the hash state from one session to another */ -+static int -+crypto_copy_hash_state(struct fcrypt *fcr, uint32_t dst_sid, uint32_t src_sid) ++static void rk618_clk_register_composites(struct rk618_cru *cru) +{ -+ struct csession *src_ses, *dst_ses; -+ int ret; ++ struct clk *clk; ++ unsigned int i; + -+ ret = crypto_get_sessions_by_sid(fcr, src_sid, &src_ses, -+ dst_sid, &dst_ses); -+ if (unlikely(ret)) { -+ derr(1, "Failed to get sesssions with sid=0x%08X sid=%0x08X!", -+ src_sid, dst_sid); -+ return ret; -+ } ++ for (i = 0; i < ARRAY_SIZE(rk618_clk_composites); i++) { ++ const struct clk_composite_data *data = ++ &rk618_clk_composites[i]; + -+ ret = cryptodev_hash_copy(&dst_ses->hdata, &src_ses->hdata); -+ crypto_put_session(src_ses); -+ crypto_put_session(dst_ses); -+ return ret; ++ clk = devm_clk_regmap_register_composite(cru->dev, data->name, ++ data->parent_names, ++ data->num_parents, ++ cru->regmap, ++ data->mux_reg, ++ data->mux_shift, ++ data->mux_width, ++ data->div_reg, ++ data->div_shift, ++ data->div_width, ++ data->div_flags, ++ data->gate_reg, ++ data->gate_shift, ++ data->flags); ++ if (IS_ERR(clk)) { ++ dev_err(cru->dev, "failed to register clock %s\n", ++ data->name); ++ continue; ++ } ++ ++ rk618_clk_add_lookup(cru, clk, data->id); ++ } +} -+#endif /* CIOCCPHASH */ + -+static void cryptask_routine(struct work_struct *work) ++static void rk618_clk_register_plls(struct rk618_cru *cru) +{ -+ struct crypt_priv *pcr = container_of(work, struct crypt_priv, cryptask); -+ struct todo_list_item *item; -+ LIST_HEAD(tmp); -+ -+ /* fetch all pending jobs into the temporary list */ -+ mutex_lock(&pcr->todo.lock); -+ list_cut_position(&tmp, &pcr->todo.list, pcr->todo.list.prev); -+ mutex_unlock(&pcr->todo.lock); ++ struct clk *clk; ++ unsigned int i; + -+ /* handle each job locklessly */ -+ list_for_each_entry(item, &tmp, __hook) { -+ item->result = crypto_run(&pcr->fcrypt, &item->kcop); -+ if (unlikely(item->result)) -+ derr(0, "crypto_run() failed: %d", item->result); -+ } ++ for (i = 0; i < ARRAY_SIZE(rk618_clk_plls); i++) { ++ const struct clk_pll_data *data = &rk618_clk_plls[i]; + -+ /* push all handled jobs to the done list at once */ -+ mutex_lock(&pcr->done.lock); -+ list_splice_tail(&tmp, &pcr->done.list); -+ mutex_unlock(&pcr->done.lock); ++ clk = devm_clk_regmap_register_pll(cru->dev, data->name, ++ data->parent_name, ++ cru->regmap, ++ data->reg, ++ data->pd_shift, ++ data->dsmpd_shift, ++ data->lock_shift, ++ data->flags); ++ if (IS_ERR(clk)) { ++ dev_err(cru->dev, "failed to register clock %s\n", ++ data->name); ++ continue; ++ } + -+ /* wake for POLLIN */ -+ wake_up_interruptible(&pcr->user_waiter); ++ rk618_clk_add_lookup(cru, clk, data->id); ++ } +} + -+/* ====== /dev/crypto ====== */ -+static atomic_t cryptodev_node = ATOMIC_INIT(1); -+ -+static int -+cryptodev_open(struct inode *inode, struct file *filp) ++static int rk618_cru_probe(struct platform_device *pdev) +{ -+ struct todo_list_item *tmp, *tmp_next; -+ struct crypt_priv *pcr; -+ int i; -+ -+ /* Non-multithreaded can only be opened once */ -+ if (!rk_cryptodev_multi_thread(NULL) && -+ !atomic_dec_and_test(&cryptodev_node)) { -+ atomic_inc(&cryptodev_node); -+ ddebug(2, "Non-multithreaded can only be opened once. node = %d", -+ atomic_read(&cryptodev_node)); -+ return -EBUSY; -+ } ++ struct rk618 *rk618 = dev_get_drvdata(pdev->dev.parent); ++ struct device *dev = &pdev->dev; ++ struct rk618_cru *cru; ++ struct clk **clk_table; ++ const char *parent_name; ++ struct clk *clk; ++ int ret, i; + -+ /* make sure sess == 1 after open */ -+ atomic_set(&cryptodev_sess, 1); ++ if (!of_device_is_available(dev->of_node)) ++ return -ENODEV; + -+ pcr = kzalloc(sizeof(*pcr), GFP_KERNEL); -+ if (!pcr) ++ cru = devm_kzalloc(dev, sizeof(*cru), GFP_KERNEL); ++ if (!cru) + return -ENOMEM; -+ filp->private_data = pcr; -+ -+ mutex_init(&pcr->fcrypt.sem); -+ mutex_init(&pcr->free.lock); -+ mutex_init(&pcr->todo.lock); -+ mutex_init(&pcr->done.lock); + -+ INIT_LIST_HEAD(&pcr->fcrypt.list); -+ INIT_LIST_HEAD(&pcr->fcrypt.dma_map_list); -+ INIT_LIST_HEAD(&pcr->free.list); -+ INIT_LIST_HEAD(&pcr->todo.list); -+ INIT_LIST_HEAD(&pcr->done.list); -+ -+ INIT_WORK(&pcr->cryptask, cryptask_routine); -+ -+ init_waitqueue_head(&pcr->user_waiter); ++ clk_table = devm_kcalloc(dev, NR_CLKS, sizeof(struct clk *), ++ GFP_KERNEL); ++ if (!clk_table) ++ return -ENOMEM; + -+ for (i = 0; i < DEF_COP_RINGSIZE; i++) { -+ tmp = kzalloc(sizeof(struct todo_list_item), GFP_KERNEL); -+ if (!tmp) -+ goto err_ringalloc; -+ pcr->itemcount++; -+ ddebug(2, "allocated new item at %p", tmp); -+ list_add(&tmp->__hook, &pcr->free.list); -+ } ++ for (i = 0; i < NR_CLKS; i++) ++ clk_table[i] = ERR_PTR(-ENOENT); + -+ ddebug(2, "Cryptodev handle initialised, %d elements in queue", -+ DEF_COP_RINGSIZE); -+ return 0; ++ cru->dev = dev; ++ cru->parent = rk618; ++ cru->regmap = rk618->regmap; ++ cru->clk_data.clks = clk_table; ++ cru->clk_data.clk_num = NR_CLKS; ++ platform_set_drvdata(pdev, cru); + -+/* In case of errors, free any memory allocated so far */ -+err_ringalloc: -+ list_for_each_entry_safe(tmp, tmp_next, &pcr->free.list, __hook) { -+ list_del(&tmp->__hook); -+ kfree(tmp); ++ clk = devm_clk_get(dev, "clkin"); ++ if (IS_ERR(clk)) { ++ ret = PTR_ERR(clk); ++ dev_err(dev, "failed to get clkin: %d\n", ret); ++ return ret; + } -+ mutex_destroy(&pcr->done.lock); -+ mutex_destroy(&pcr->todo.lock); -+ mutex_destroy(&pcr->free.lock); -+ mutex_destroy(&pcr->fcrypt.sem); -+ kfree(pcr); -+ filp->private_data = NULL; -+ return -ENOMEM; -+} + -+static int -+cryptodev_release(struct inode *inode, struct file *filp) -+{ -+ struct crypt_priv *pcr = filp->private_data; -+ struct todo_list_item *item, *item_safe; -+ int items_freed = 0; ++ strlcpy(clkin_name, __clk_get_name(clk), sizeof(clkin_name)); + -+ if (!pcr) -+ return 0; ++ clk = devm_clk_get(dev, "lcdc0_dclkp"); ++ if (IS_ERR(clk)) { ++ if (PTR_ERR(clk) != -ENOENT) { ++ ret = PTR_ERR(clk); ++ dev_err(dev, "failed to get lcdc0_dclkp: %d\n", ret); ++ return ret; ++ } + -+ /* Non-multithreaded can only be opened once */ -+ if (!rk_cryptodev_multi_thread(NULL)) { -+ atomic_inc(&cryptodev_node); -+ ddebug(2, "Release cryptodev_node = %d", atomic_read(&cryptodev_node)); ++ clk = NULL; + } + -+ cancel_work_sync(&pcr->cryptask); -+ -+ list_splice_tail(&pcr->todo.list, &pcr->free.list); -+ list_splice_tail(&pcr->done.list, &pcr->free.list); ++ parent_name = __clk_get_name(clk); ++ if (parent_name) ++ strlcpy(lcdc0_dclkp_name, parent_name, ++ sizeof(lcdc0_dclkp_name)); + -+ list_for_each_entry_safe(item, item_safe, &pcr->free.list, __hook) { -+ ddebug(2, "freeing item at %p", item); -+ list_del(&item->__hook); -+ kfree(item); -+ items_freed++; -+ } ++ clk = devm_clk_get(dev, "lcdc1_dclkp"); ++ if (IS_ERR(clk)) { ++ if (PTR_ERR(clk) != -ENOENT) { ++ ret = PTR_ERR(clk); ++ dev_err(dev, "failed to get lcdc1_dclkp: %d\n", ret); ++ return ret; ++ } + -+ if (items_freed != pcr->itemcount) { -+ derr(0, "freed %d items, but %d should exist!", -+ items_freed, pcr->itemcount); ++ clk = NULL; + } + -+ crypto_finish_all_sessions(&pcr->fcrypt); -+ -+ mutex_destroy(&pcr->done.lock); -+ mutex_destroy(&pcr->todo.lock); -+ mutex_destroy(&pcr->free.lock); -+ mutex_destroy(&pcr->fcrypt.sem); ++ parent_name = __clk_get_name(clk); ++ if (parent_name) ++ strlcpy(lcdc1_dclkp_name, parent_name, ++ sizeof(lcdc1_dclkp_name)); + -+ kfree(pcr); -+ filp->private_data = NULL; ++ rk618_clk_register_plls(cru); ++ rk618_clk_register_muxes(cru); ++ rk618_clk_register_dividers(cru); ++ rk618_clk_register_gates(cru); ++ rk618_clk_register_composites(cru); + -+ ddebug(2, "Cryptodev handle deinitialised, %d elements freed", -+ items_freed); -+ return 0; ++ return of_clk_add_provider(dev->of_node, of_clk_src_onecell_get, ++ &cru->clk_data); +} + -+#ifdef ENABLE_ASYNC -+/* enqueue a job for asynchronous completion -+ * -+ * returns: -+ * -EBUSY when there are no free queue slots left -+ * (and the number of slots has reached it MAX_COP_RINGSIZE) -+ * -EFAULT when there was a memory allocation error -+ * 0 on success */ -+static int crypto_async_run(struct crypt_priv *pcr, struct kernel_crypt_op *kcop) ++static int rk618_cru_remove(struct platform_device *pdev) +{ -+ struct todo_list_item *item = NULL; -+ -+ if (unlikely(kcop->cop.flags & COP_FLAG_NO_ZC)) -+ return -EINVAL; -+ -+ mutex_lock(&pcr->free.lock); -+ if (likely(!list_empty(&pcr->free.list))) { -+ item = list_first_entry(&pcr->free.list, -+ struct todo_list_item, __hook); -+ list_del(&item->__hook); -+ } else if (pcr->itemcount < MAX_COP_RINGSIZE) { -+ pcr->itemcount++; -+ } else { -+ mutex_unlock(&pcr->free.lock); -+ return -EBUSY; -+ } -+ mutex_unlock(&pcr->free.lock); -+ -+ if (unlikely(!item)) { -+ item = kzalloc(sizeof(struct todo_list_item), GFP_KERNEL); -+ if (unlikely(!item)) -+ return -EFAULT; -+ dinfo(1, "increased item count to %d", pcr->itemcount); -+ } -+ -+ memcpy(&item->kcop, kcop, sizeof(struct kernel_crypt_op)); -+ -+ mutex_lock(&pcr->todo.lock); -+ list_add_tail(&item->__hook, &pcr->todo.list); -+ mutex_unlock(&pcr->todo.lock); ++ of_clk_del_provider(pdev->dev.of_node); + -+ queue_work(cryptodev_wq, &pcr->cryptask); + return 0; +} + -+/* get the first completed job from the "done" queue -+ * -+ * returns: -+ * -EBUSY if no completed jobs are ready (yet) -+ * the return value of crypto_run() otherwise */ -+static int crypto_async_fetch(struct crypt_priv *pcr, -+ struct kernel_crypt_op *kcop) -+{ -+ struct todo_list_item *item; -+ int retval; -+ -+ mutex_lock(&pcr->done.lock); -+ if (list_empty(&pcr->done.list)) { -+ mutex_unlock(&pcr->done.lock); -+ return -EBUSY; -+ } -+ item = list_first_entry(&pcr->done.list, struct todo_list_item, __hook); -+ list_del(&item->__hook); -+ mutex_unlock(&pcr->done.lock); ++static const struct of_device_id rk618_cru_of_match[] = { ++ { .compatible = "rockchip,rk618-cru", }, ++ {}, ++}; ++MODULE_DEVICE_TABLE(of, rk618_cru_of_match); + -+ memcpy(kcop, &item->kcop, sizeof(struct kernel_crypt_op)); -+ retval = item->result; ++static struct platform_driver rk618_cru_driver = { ++ .driver = { ++ .name = "rk618-cru", ++ .of_match_table = of_match_ptr(rk618_cru_of_match), ++ }, ++ .probe = rk618_cru_probe, ++ .remove = rk618_cru_remove, ++}; ++module_platform_driver(rk618_cru_driver); + -+ mutex_lock(&pcr->free.lock); -+ list_add_tail(&item->__hook, &pcr->free.list); -+ mutex_unlock(&pcr->free.lock); ++MODULE_AUTHOR("Wyon Bi "); ++MODULE_DESCRIPTION("Rockchip rk618 CRU driver"); ++MODULE_LICENSE("GPL v2"); +diff --git a/drivers/clk/rockchip/regmap/clk-rk628.c b/drivers/clk/rockchip/regmap/clk-rk628.c +new file mode 100644 +index 000000000..7f501db66 +--- /dev/null ++++ b/drivers/clk/rockchip/regmap/clk-rk628.c +@@ -0,0 +1,609 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (c) 2020 Rockchip Electronics Co. Ltd. ++ * ++ * Author: Wyon Bi ++ */ + -+ /* wake for POLLOUT */ -+ wake_up_interruptible(&pcr->user_waiter); ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ return retval; -+} -+#endif ++#include "clk-regmap.h" + -+/* this function has to be called from process context */ -+static int fill_kcop_from_cop(struct kernel_crypt_op *kcop, struct fcrypt *fcr) -+{ -+ struct crypt_op *cop = &kcop->cop; -+ struct csession *ses_ptr; -+ int rc; ++#define RK628_PLL(_id, _name, _parent_name, _reg, _flags) \ ++ PLL(_id, _name, _parent_name, _reg, 13, 12, 10, _flags) + -+ /* this also enters ses_ptr->sem */ -+ ses_ptr = crypto_get_session_by_sid(fcr, cop->ses); -+ if (unlikely(!ses_ptr)) { -+ derr(1, "invalid session ID=0x%08X", cop->ses); -+ return -EINVAL; -+ } -+ kcop->ivlen = cop->iv ? ses_ptr->cdata.ivsize : 0; -+ kcop->digestsize = 0; /* will be updated during operation */ ++#define REG(x) ((x) + 0xc0000) + -+ crypto_put_session(ses_ptr); ++#define CRU_CPLL_CON0 REG(0x0000) ++#define CRU_CPLL_CON1 REG(0x0004) ++#define CRU_CPLL_CON2 REG(0x0008) ++#define CRU_CPLL_CON3 REG(0x000c) ++#define CRU_CPLL_CON4 REG(0x0010) ++#define CRU_GPLL_CON0 REG(0x0020) ++#define CRU_GPLL_CON1 REG(0x0024) ++#define CRU_GPLL_CON2 REG(0x0028) ++#define CRU_GPLL_CON3 REG(0x002c) ++#define CRU_GPLL_CON4 REG(0x0030) ++#define CRU_MODE_CON REG(0x0060) ++#define CRU_CLKSEL_CON00 REG(0x0080) ++#define CRU_CLKSEL_CON01 REG(0x0084) ++#define CRU_CLKSEL_CON02 REG(0x0088) ++#define CRU_CLKSEL_CON03 REG(0x008c) ++#define CRU_CLKSEL_CON04 REG(0x0090) ++#define CRU_CLKSEL_CON05 REG(0x0094) ++#define CRU_CLKSEL_CON06 REG(0x0098) ++#define CRU_CLKSEL_CON07 REG(0x009c) ++#define CRU_CLKSEL_CON08 REG(0x00a0) ++#define CRU_CLKSEL_CON09 REG(0x00a4) ++#define CRU_CLKSEL_CON10 REG(0x00a8) ++#define CRU_CLKSEL_CON11 REG(0x00ac) ++#define CRU_CLKSEL_CON12 REG(0x00b0) ++#define CRU_CLKSEL_CON13 REG(0x00b4) ++#define CRU_CLKSEL_CON14 REG(0x00b8) ++#define CRU_CLKSEL_CON15 REG(0x00bc) ++#define CRU_CLKSEL_CON16 REG(0x00c0) ++#define CRU_CLKSEL_CON17 REG(0x00c4) ++#define CRU_CLKSEL_CON18 REG(0x00c8) ++#define CRU_CLKSEL_CON20 REG(0x00d0) ++#define CRU_CLKSEL_CON21 REG(0x00d4) ++#define CRU_GATE_CON00 REG(0x0180) ++#define CRU_GATE_CON01 REG(0x0184) ++#define CRU_GATE_CON02 REG(0x0188) ++#define CRU_GATE_CON03 REG(0x018c) ++#define CRU_GATE_CON04 REG(0x0190) ++#define CRU_GATE_CON05 REG(0x0194) ++#define CRU_SOFTRST_CON00 REG(0x0200) ++#define CRU_SOFTRST_CON01 REG(0x0204) ++#define CRU_SOFTRST_CON02 REG(0x0208) ++#define CRU_SOFTRST_CON04 REG(0x0210) ++#define CRU_MAX_REGISTER CRU_SOFTRST_CON04 + -+ kcop->task = current; -+ kcop->mm = current->mm; ++#define reset_to_cru(_rst) container_of(_rst, struct rk628_cru, rcdev) + -+ if (cop->iv) { -+ rc = copy_from_user(kcop->iv, cop->iv, kcop->ivlen); -+ if (unlikely(rc)) { -+ derr(1, "error copying IV (%d bytes), copy_from_user returned %d for address %p", -+ kcop->ivlen, rc, cop->iv); -+ return -EFAULT; -+ } -+ } ++struct rk628_cru { ++ struct device *dev; ++ struct rk628 *parent; ++ struct regmap *regmap; ++ struct reset_controller_dev rcdev; ++ struct clk_onecell_data clk_data; ++}; + -+ return 0; -+} ++#define CNAME(x) "rk628_" x + -+/* this function has to be called from process context */ -+static int fill_cop_from_kcop(struct kernel_crypt_op *kcop, struct fcrypt *fcr) -+{ -+ int ret; ++#define PNAME(x) static const char *const x[] + -+ if (kcop->digestsize) { -+ ret = copy_to_user(kcop->cop.mac, -+ kcop->hash_output, kcop->digestsize); -+ if (unlikely(ret)) -+ return -EFAULT; -+ } -+ if (kcop->ivlen && kcop->cop.flags & COP_FLAG_WRITE_IV) { -+ ret = copy_to_user(kcop->cop.iv, -+ kcop->iv, kcop->ivlen); -+ if (unlikely(ret)) -+ return -EFAULT; -+ } -+ return 0; -+} ++PNAME(mux_cpll_osc_p) = { CNAME("xin_osc0_func"), CNAME("clk_cpll") }; ++PNAME(mux_gpll_osc_p) = { CNAME("xin_osc0_func"), CNAME("clk_gpll") }; ++PNAME(mux_cpll_gpll_mux_p) = { CNAME("clk_cpll_mux"), CNAME("clk_gpll_mux") }; ++PNAME(mux_mclk_i2s_8ch_p) = { CNAME("clk_i2s_8ch_src"), ++ CNAME("clk_i2s_8ch_frac"), CNAME("i2s_mclkin"), ++ CNAME("xin_osc0_half") }; ++PNAME(mux_i2s_mclkout_p) = { CNAME("mclk_i2s_8ch"), CNAME("xin_osc0_half") }; ++PNAME(mux_clk_testout_p) = { CNAME("xin_osc0_func"), CNAME("xin_osc0_half"), ++ CNAME("clk_gpll"), CNAME("clk_gpll_mux"), ++ CNAME("clk_cpll"), CNAME("clk_gpll_mux"), ++ CNAME("pclk_logic"), CNAME("sclk_vop"), ++ CNAME("mclk_i2s_8ch"), CNAME("i2s_mclkout"), ++ CNAME("dummy"), CNAME("clk_hdmirx_aud"), ++ CNAME("clk_hdmirx_cec"), CNAME("clk_imodet"), ++ CNAME("clk_txesc"), CNAME("clk_gpio_db0") }; + -+static int kcop_from_user(struct kernel_crypt_op *kcop, -+ struct fcrypt *fcr, void __user *arg) -+{ -+ if (unlikely(copy_from_user(&kcop->cop, arg, sizeof(kcop->cop)))) -+ return -EFAULT; ++static const struct clk_pll_data rk628_clk_plls[] = { ++ RK628_PLL(CGU_CLK_CPLL, CNAME("clk_cpll"), CNAME("xin_osc0_func"), ++ CRU_CPLL_CON0, ++ 0), ++ RK628_PLL(CGU_CLK_GPLL, CNAME("clk_gpll"), CNAME("xin_osc0_func"), ++ CRU_GPLL_CON0, ++ 0), ++}; + -+ return fill_kcop_from_cop(kcop, fcr); -+} ++static const struct clk_mux_data rk628_clk_muxes[] = { ++ MUX(CGU_CLK_CPLL_MUX, CNAME("clk_cpll_mux"), mux_cpll_osc_p, ++ CRU_MODE_CON, 0, 1, ++ 0), ++ MUX(CGU_CLK_GPLL_MUX, CNAME("clk_gpll_mux"), mux_gpll_osc_p, ++ CRU_MODE_CON, 2, 1, ++ CLK_SET_RATE_NO_REPARENT | CLK_SET_RATE_PARENT), ++}; + -+static int kcop_to_user(struct kernel_crypt_op *kcop, -+ struct fcrypt *fcr, void __user *arg) -+{ -+ int ret; ++static const struct clk_gate_data rk628_clk_gates[] = { ++ GATE(CGU_PCLK_GPIO0, CNAME("pclk_gpio0"), CNAME("pclk_logic"), ++ CRU_GATE_CON01, 0, ++ 0), ++ GATE(CGU_PCLK_GPIO1, CNAME("pclk_gpio1"), CNAME("pclk_logic"), ++ CRU_GATE_CON01, 1, ++ 0), ++ GATE(CGU_PCLK_GPIO2, CNAME("pclk_gpio2"), CNAME("pclk_logic"), ++ CRU_GATE_CON01, 2, ++ 0), ++ GATE(CGU_PCLK_GPIO3, CNAME("pclk_gpio3"), CNAME("pclk_logic"), ++ CRU_GATE_CON01, 3, ++ 0), + -+ ret = fill_cop_from_kcop(kcop, fcr); -+ if (unlikely(ret)) { -+ derr(1, "Error in fill_cop_from_kcop"); -+ return ret; -+ } ++ GATE(CGU_PCLK_TXPHY_CON, CNAME("pclk_txphy_con"), CNAME("pclk_logic"), ++ CRU_GATE_CON02, 3, ++ CLK_IGNORE_UNUSED), ++ GATE(CGU_PCLK_EFUSE, CNAME("pclk_efuse"), CNAME("pclk_logic"), ++ CRU_GATE_CON00, 5, ++ 0), ++ GATE(0, CNAME("pclk_i2c2apb"), CNAME("pclk_logic"), ++ CRU_GATE_CON00, 3, ++ CLK_IGNORE_UNUSED), ++ GATE(0, CNAME("pclk_cru"), CNAME("pclk_logic"), ++ CRU_GATE_CON00, 1, ++ CLK_IGNORE_UNUSED), ++ GATE(0, CNAME("pclk_adapter"), CNAME("pclk_logic"), ++ CRU_GATE_CON00, 7, ++ CLK_IGNORE_UNUSED), ++ GATE(0, CNAME("pclk_regfile"), CNAME("pclk_logic"), ++ CRU_GATE_CON00, 2, ++ CLK_IGNORE_UNUSED), ++ GATE(CGU_PCLK_DSI0, CNAME("pclk_dsi0"), CNAME("pclk_logic"), ++ CRU_GATE_CON02, 6, ++ 0), ++ GATE(CGU_PCLK_DSI1, CNAME("pclk_dsi1"), CNAME("pclk_logic"), ++ CRU_GATE_CON02, 7, ++ 0), ++ GATE(CGU_PCLK_CSI, CNAME("pclk_csi"), CNAME("pclk_logic"), ++ CRU_GATE_CON02, 8, ++ 0), ++ GATE(CGU_PCLK_HDMITX, CNAME("pclk_hdmitx"), CNAME("pclk_logic"), ++ CRU_GATE_CON02, 4, ++ 0), ++ GATE(CGU_PCLK_RXPHY, CNAME("pclk_rxphy"), CNAME("pclk_logic"), ++ CRU_GATE_CON02, 0, ++ 0), ++ GATE(CGU_PCLK_HDMIRX, CNAME("pclk_hdmirx"), CNAME("pclk_logic"), ++ CRU_GATE_CON02, 2, ++ 0), ++ GATE(CGU_PCLK_GVIHOST, CNAME("pclk_gvihost"), CNAME("pclk_logic"), ++ CRU_GATE_CON02, 5, ++ 0), ++ GATE(CGU_CLK_CFG_DPHY0, CNAME("clk_cfg_dphy0"), CNAME("xin_osc0_func"), ++ CRU_GATE_CON02, 13, ++ 0), ++ GATE(CGU_CLK_CFG_DPHY1, CNAME("clk_cfg_dphy1"), CNAME("xin_osc0_func"), ++ CRU_GATE_CON02, 14, ++ 0), ++ GATE(CGU_CLK_TXESC, CNAME("clk_txesc"), CNAME("xin_osc0_func"), ++ CRU_GATE_CON02, 12, ++ 0), ++}; + -+ if (unlikely(copy_to_user(arg, &kcop->cop, sizeof(kcop->cop)))) { -+ derr(1, "Cannot copy to userspace"); -+ return -EFAULT; -+ } -+ return 0; -+} ++static const struct clk_composite_data rk628_clk_composites[] = { ++ COMPOSITE(CGU_CLK_IMODET, CNAME("clk_imodet"), mux_cpll_gpll_mux_p, ++ CRU_CLKSEL_CON05, 5, 1, ++ CRU_CLKSEL_CON05, 0, 5, ++ CRU_GATE_CON02, 11, ++ 0), ++ COMPOSITE(CGU_CLK_HDMIRX_AUD, CNAME("clk_hdmirx_aud"), ++ mux_cpll_gpll_mux_p, ++ CRU_CLKSEL_CON05, 15, 1, ++ CRU_CLKSEL_CON05, 6, 8, ++ CRU_GATE_CON02, 10, ++ CLK_SET_RATE_NO_REPARENT | CLK_SET_RATE_PARENT), ++ COMPOSITE_FRAC_NOMUX(CGU_CLK_HDMIRX_CEC, CNAME("clk_hdmirx_cec"), ++ CNAME("xin_osc0_func"), ++ CRU_CLKSEL_CON12, ++ CRU_GATE_CON01, 15, ++ 0), ++ COMPOSITE_FRAC(CGU_CLK_RX_READ, CNAME("clk_rx_read"), ++ mux_cpll_gpll_mux_p, ++ CRU_CLKSEL_CON02, 8, 1, ++ CRU_CLKSEL_CON14, ++ CRU_GATE_CON00, 11, ++ 0), ++ COMPOSITE_FRAC(CGU_SCLK_VOP, CNAME("sclk_vop"), mux_cpll_gpll_mux_p, ++ CRU_CLKSEL_CON02, 9, 1, ++ CRU_CLKSEL_CON13, ++ CRU_GATE_CON00, 13, ++ CLK_SET_RATE_NO_REPARENT), ++ COMPOSITE(CGU_PCLK_LOGIC, CNAME("pclk_logic"), mux_cpll_gpll_mux_p, ++ CRU_CLKSEL_CON00, 7, 1, ++ CRU_CLKSEL_CON00, 0, 5, ++ CRU_GATE_CON00, 0, ++ 0), ++ COMPOSITE_NOMUX(CGU_CLK_GPIO_DB0, CNAME("clk_gpio_db0"), ++ CNAME("xin_osc0_func"), ++ CRU_CLKSEL_CON08, 0, 10, ++ CRU_GATE_CON01, 4, ++ 0), ++ COMPOSITE_NOMUX(CGU_CLK_GPIO_DB1, CNAME("clk_gpio_db1"), ++ CNAME("xin_osc0_func"), ++ CRU_CLKSEL_CON09, 0, 10, ++ CRU_GATE_CON01, 5, ++ 0), ++ COMPOSITE_NOMUX(CGU_CLK_GPIO_DB2, CNAME("clk_gpio_db2"), ++ CNAME("xin_osc0_func"), ++ CRU_CLKSEL_CON10, 0, 10, ++ CRU_GATE_CON01, 6, ++ 0), ++ COMPOSITE_NOMUX(CGU_CLK_GPIO_DB3, CNAME("clk_gpio_db3"), ++ CNAME("xin_osc0_func"), ++ CRU_CLKSEL_CON11, 0, 10, ++ CRU_GATE_CON01, 7, ++ 0), ++ COMPOSITE(CGU_CLK_I2S_8CH_SRC, CNAME("clk_i2s_8ch_src"), ++ mux_cpll_gpll_mux_p, ++ CRU_CLKSEL_CON03, 13, 1, ++ CRU_CLKSEL_CON03, 8, 5, ++ CRU_GATE_CON03, 9, ++ 0), ++ COMPOSITE_FRAC_NOMUX(CGU_CLK_I2S_8CH_FRAC, CNAME("clk_i2s_8ch_frac"), ++ CNAME("clk_i2s_8ch_src"), ++ CRU_CLKSEL_CON04, ++ CRU_GATE_CON03, 10, ++ 0), ++ COMPOSITE_NODIV(CGU_MCLK_I2S_8CH, CNAME("mclk_i2s_8ch"), ++ mux_mclk_i2s_8ch_p, ++ CRU_CLKSEL_CON03, 14, 2, ++ CRU_GATE_CON03, 11, ++ CLK_SET_RATE_PARENT), ++ COMPOSITE_NODIV(CGU_I2S_MCLKOUT, CNAME("i2s_mclkout"), ++ mux_i2s_mclkout_p, ++ CRU_CLKSEL_CON03, 7, 1, ++ CRU_GATE_CON03, 12, ++ CLK_SET_RATE_PARENT), ++ COMPOSITE(CGU_BT1120DEC, CNAME("clk_bt1120dec"), mux_cpll_gpll_mux_p, ++ CRU_CLKSEL_CON02, 7, 1, ++ CRU_CLKSEL_CON02, 0, 5, ++ CRU_GATE_CON00, 12, ++ 0), ++ COMPOSITE(CGU_CLK_TESTOUT, CNAME("clk_testout"), mux_clk_testout_p, ++ CRU_CLKSEL_CON06, 0, 4, ++ CRU_CLKSEL_CON06, 8, 6, ++ CRU_GATE_CON04, 7, ++ 0), ++}; + -+static inline void tfm_info_to_alg_info(struct alg_info *dst, struct crypto_tfm *tfm) ++static void rk628_clk_add_lookup(struct rk628_cru *cru, struct clk *clk, ++ unsigned int id) +{ -+ snprintf(dst->cra_name, CRYPTODEV_MAX_ALG_NAME, -+ "%s", crypto_tfm_alg_name(tfm)); -+ snprintf(dst->cra_driver_name, CRYPTODEV_MAX_ALG_NAME, -+ "%s", crypto_tfm_alg_driver_name(tfm)); ++ if (cru->clk_data.clks && id) ++ cru->clk_data.clks[id] = clk; +} + -+#ifndef CRYPTO_ALG_KERN_DRIVER_ONLY -+static unsigned int is_known_accelerated(struct crypto_tfm *tfm) ++static void rk628_clk_register_muxes(struct rk628_cru *cru) +{ -+ const char *name = crypto_tfm_alg_driver_name(tfm); ++ struct clk *clk; ++ unsigned int i; + -+ if (name == NULL) -+ return 1; /* assume accelerated */ ++ for (i = 0; i < ARRAY_SIZE(rk628_clk_muxes); i++) { ++ const struct clk_mux_data *data = &rk628_clk_muxes[i]; + -+ /* look for known crypto engine names */ -+ if (strstr(name, "-talitos") || -+ !strncmp(name, "mv-", 3) || -+ !strncmp(name, "atmel-", 6) || -+ strstr(name, "geode") || -+ strstr(name, "hifn") || -+ strstr(name, "-ixp4xx") || -+ strstr(name, "-omap") || -+ strstr(name, "-picoxcell") || -+ strstr(name, "-s5p") || -+ strstr(name, "-ppc4xx") || -+ strstr(name, "-caam") || -+ strstr(name, "-n2")) -+ return 1; ++ clk = devm_clk_regmap_register_mux(cru->dev, data->name, ++ data->parent_names, ++ data->num_parents, ++ cru->regmap, data->reg, ++ data->shift, data->width, ++ data->flags); ++ if (IS_ERR(clk)) { ++ dev_err(cru->dev, "failed to register clock %s\n", ++ data->name); ++ continue; ++ } + -+ return 0; ++ rk628_clk_add_lookup(cru, clk, data->id); ++ } +} -+#endif + -+static int get_session_info(struct fcrypt *fcr, struct session_info_op *siop) ++static void rk628_clk_register_gates(struct rk628_cru *cru) +{ -+ struct csession *ses_ptr; -+ struct crypto_tfm *tfm; ++ struct clk *clk; ++ unsigned int i; + -+ /* this also enters ses_ptr->sem */ -+ ses_ptr = crypto_get_session_by_sid(fcr, siop->ses); -+ if (unlikely(!ses_ptr)) { -+ derr(1, "invalid session ID=0x%08X", siop->ses); -+ return -EINVAL; -+ } ++ for (i = 0; i < ARRAY_SIZE(rk628_clk_gates); i++) { ++ const struct clk_gate_data *data = &rk628_clk_gates[i]; + -+ siop->flags = 0; ++ clk = devm_clk_regmap_register_gate(cru->dev, data->name, ++ data->parent_name, ++ cru->regmap, ++ data->reg, data->shift, ++ data->flags); ++ if (IS_ERR(clk)) { ++ dev_err(cru->dev, "failed to register clock %s\n", ++ data->name); ++ continue; ++ } + -+ if (ses_ptr->cdata.init) { -+ if (ses_ptr->cdata.aead == 0) -+ tfm = cryptodev_crypto_blkcipher_tfm(ses_ptr->cdata.async.s); -+ else -+ tfm = crypto_aead_tfm(ses_ptr->cdata.async.as); -+ tfm_info_to_alg_info(&siop->cipher_info, tfm); -+#ifdef CRYPTO_ALG_KERN_DRIVER_ONLY -+ if (tfm->__crt_alg->cra_flags & CRYPTO_ALG_KERN_DRIVER_ONLY) -+ siop->flags |= SIOP_FLAG_KERNEL_DRIVER_ONLY; -+#else -+ if (is_known_accelerated(tfm)) -+ siop->flags |= SIOP_FLAG_KERNEL_DRIVER_ONLY; -+#endif -+ } -+ if (ses_ptr->hdata.init) { -+ tfm = crypto_ahash_tfm(ses_ptr->hdata.async.s); -+ tfm_info_to_alg_info(&siop->hash_info, tfm); -+#ifdef CRYPTO_ALG_KERN_DRIVER_ONLY -+ if (tfm->__crt_alg->cra_flags & CRYPTO_ALG_KERN_DRIVER_ONLY) -+ siop->flags |= SIOP_FLAG_KERNEL_DRIVER_ONLY; -+#else -+ if (is_known_accelerated(tfm)) -+ siop->flags |= SIOP_FLAG_KERNEL_DRIVER_ONLY; -+#endif ++ rk628_clk_add_lookup(cru, clk, data->id); + } -+ -+ siop->alignmask = ses_ptr->alignmask; -+ -+ crypto_put_session(ses_ptr); -+ return 0; +} + -+static long -+cryptodev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg_) ++static void rk628_clk_register_composites(struct rk628_cru *cru) +{ -+ void __user *arg = (void __user *)arg_; -+ int __user *p = arg; -+ struct session_op sop; -+ struct kernel_crypt_op kcop; -+ struct kernel_crypt_auth_op kcaop; -+ struct crypt_priv *pcr = filp->private_data; -+ struct fcrypt *fcr; -+ struct session_info_op siop; -+#ifdef CIOCCPHASH -+ struct cphash_op cphop; -+#endif -+ uint32_t ses; -+ int ret, fd; -+ -+ if (unlikely(!pcr)) -+ BUG(); -+ -+ fcr = &pcr->fcrypt; ++ struct clk *clk; ++ unsigned int i; + -+ switch (cmd) { -+ case CIOCASYMFEAT: -+ return put_user(0, p); -+ case CRIOGET: -+ fd = get_unused_fd_flags(0); -+ if (unlikely(fd < 0)) -+ return fd; ++ for (i = 0; i < ARRAY_SIZE(rk628_clk_composites); i++) { ++ const struct clk_composite_data *data = ++ &rk628_clk_composites[i]; + -+ ret = put_user(fd, p); -+ if (unlikely(ret)) { -+ put_unused_fd(fd); -+ return ret; ++ clk = devm_clk_regmap_register_composite(cru->dev, data->name, ++ data->parent_names, ++ data->num_parents, ++ cru->regmap, ++ data->mux_reg, ++ data->mux_shift, ++ data->mux_width, ++ data->div_reg, ++ data->div_shift, ++ data->div_width, ++ data->div_flags, ++ data->gate_reg, ++ data->gate_shift, ++ data->flags); ++ if (IS_ERR(clk)) { ++ dev_err(cru->dev, "failed to register clock %s\n", ++ data->name); ++ continue; + } + -+ get_file(filp); -+ fd_install(fd, filp); ++ rk628_clk_add_lookup(cru, clk, data->id); ++ } ++} + -+ return ret; -+ case CIOCGSESSION: -+ if (unlikely(copy_from_user(&sop, arg, sizeof(sop)))) -+ return -EFAULT; ++static void rk628_clk_register_plls(struct rk628_cru *cru) ++{ ++ struct clk *clk; ++ unsigned int i; + -+ ret = crypto_create_session(fcr, &sop); -+ if (unlikely(ret)) -+ return ret; -+ ret = copy_to_user(arg, &sop, sizeof(sop)); -+ if (unlikely(ret)) { -+ crypto_finish_session(fcr, sop.ses); -+ return -EFAULT; -+ } -+ return ret; -+ case CIOCFSESSION: -+ ret = get_user(ses, (uint32_t __user *)arg); -+ if (unlikely(ret)) -+ return ret; -+ ret = crypto_finish_session(fcr, ses); -+ return ret; -+ case CIOCGSESSINFO: -+ if (unlikely(copy_from_user(&siop, arg, sizeof(siop)))) -+ return -EFAULT; ++ for (i = 0; i < ARRAY_SIZE(rk628_clk_plls); i++) { ++ const struct clk_pll_data *data = &rk628_clk_plls[i]; + -+ ret = get_session_info(fcr, &siop); -+ if (unlikely(ret)) -+ return ret; -+ return copy_to_user(arg, &siop, sizeof(siop)); -+#ifdef CIOCCPHASH -+ case CIOCCPHASH: -+ if (unlikely(copy_from_user(&cphop, arg, sizeof(cphop)))) -+ return -EFAULT; -+ return crypto_copy_hash_state(fcr, cphop.dst_ses, cphop.src_ses); -+#endif /* CIOCPHASH */ -+ case CIOCCRYPT: -+ if (unlikely(ret = kcop_from_user(&kcop, fcr, arg))) { -+ dwarning(1, "Error copying from user"); -+ return ret; ++ clk = devm_clk_regmap_register_pll(cru->dev, data->name, ++ data->parent_name, ++ cru->regmap, ++ data->reg, ++ data->pd_shift, ++ data->dsmpd_shift, ++ data->lock_shift, ++ data->flags); ++ if (IS_ERR(clk)) { ++ dev_err(cru->dev, "failed to register clock %s\n", ++ data->name); ++ continue; + } + -+ ret = crypto_run(fcr, &kcop); -+ if (unlikely(ret)) { -+ dwarning(1, "Error in crypto_run"); -+ return ret; -+ } ++ rk628_clk_add_lookup(cru, clk, data->id); ++ } ++} + -+ return kcop_to_user(&kcop, fcr, arg); -+ case CIOCAUTHCRYPT: -+ if (unlikely(ret = cryptodev_kcaop_from_user(&kcaop, fcr, arg))) { -+ dwarning(1, "Error copying from user"); -+ return ret; -+ } ++struct rk628_rgu_data { ++ unsigned int id; ++ unsigned int reg; ++ unsigned int bit; ++}; + -+ ret = crypto_auth_run(fcr, &kcaop); -+ if (unlikely(ret)) { -+ dwarning(1, "Error in crypto_auth_run"); -+ return ret; -+ } -+ return cryptodev_kcaop_to_user(&kcaop, fcr, arg); -+#ifdef ENABLE_ASYNC -+ case CIOCASYNCCRYPT: -+ if (unlikely(ret = kcop_from_user(&kcop, fcr, arg))) -+ return ret; ++#define RSTGEN(_id, _reg, _bit) \ ++ { \ ++ .id = (_id), \ ++ .reg = (_reg), \ ++ .bit = (_bit), \ ++ } + -+ return crypto_async_run(pcr, &kcop); -+ case CIOCASYNCFETCH: -+ ret = crypto_async_fetch(pcr, &kcop); -+ if (unlikely(ret)) -+ return ret; ++static const struct rk628_rgu_data rk628_rgu_data[] = { ++ RSTGEN(RGU_LOGIC, CRU_SOFTRST_CON00, 0), ++ RSTGEN(RGU_CRU, CRU_SOFTRST_CON00, 1), ++ RSTGEN(RGU_REGFILE, CRU_SOFTRST_CON00, 2), ++ RSTGEN(RGU_I2C2APB, CRU_SOFTRST_CON00, 3), ++ RSTGEN(RGU_EFUSE, CRU_SOFTRST_CON00, 5), ++ RSTGEN(RGU_ADAPTER, CRU_SOFTRST_CON00, 7), ++ RSTGEN(RGU_CLK_RX, CRU_SOFTRST_CON00, 11), ++ RSTGEN(RGU_BT1120DEC, CRU_SOFTRST_CON00, 12), ++ RSTGEN(RGU_VOP, CRU_SOFTRST_CON00, 13), + -+ return kcop_to_user(&kcop, fcr, arg); -+#endif -+ default: -+ return rk_cryptodev_ioctl(fcr, cmd, arg_); -+ } -+} ++ RSTGEN(RGU_GPIO0, CRU_SOFTRST_CON01, 0), ++ RSTGEN(RGU_GPIO1, CRU_SOFTRST_CON01, 1), ++ RSTGEN(RGU_GPIO2, CRU_SOFTRST_CON01, 2), ++ RSTGEN(RGU_GPIO3, CRU_SOFTRST_CON01, 3), ++ RSTGEN(RGU_GPIO_DB0, CRU_SOFTRST_CON01, 4), ++ RSTGEN(RGU_GPIO_DB1, CRU_SOFTRST_CON01, 5), ++ RSTGEN(RGU_GPIO_DB2, CRU_SOFTRST_CON01, 6), ++ RSTGEN(RGU_GPIO_DB3, CRU_SOFTRST_CON01, 7), + -+/* compatibility code for 32bit userlands */ -+#ifdef CONFIG_COMPAT ++ RSTGEN(RGU_RXPHY, CRU_SOFTRST_CON02, 0), ++ RSTGEN(RGU_HDMIRX, CRU_SOFTRST_CON02, 2), ++ RSTGEN(RGU_TXPHY_CON, CRU_SOFTRST_CON02, 3), ++ RSTGEN(RGU_HDMITX, CRU_SOFTRST_CON02, 4), ++ RSTGEN(RGU_GVIHOST, CRU_SOFTRST_CON02, 5), ++ RSTGEN(RGU_DSI0, CRU_SOFTRST_CON02, 6), ++ RSTGEN(RGU_DSI1, CRU_SOFTRST_CON02, 7), ++ RSTGEN(RGU_CSI, CRU_SOFTRST_CON02, 8), ++ RSTGEN(RGU_TXDATA, CRU_SOFTRST_CON02, 9), ++ RSTGEN(RGU_DECODER, CRU_SOFTRST_CON02, 10), ++ RSTGEN(RGU_ENCODER, CRU_SOFTRST_CON02, 11), ++ RSTGEN(RGU_HDMIRX_PON, CRU_SOFTRST_CON02, 12), ++ RSTGEN(RGU_TXBYTEHS, CRU_SOFTRST_CON02, 13), ++ RSTGEN(RGU_TXESC, CRU_SOFTRST_CON02, 14), ++}; + -+static inline void -+compat_to_session_op(struct compat_session_op *compat, struct session_op *sop) ++static int rk628_rgu_update(struct rk628_cru *cru, unsigned long id, int assert) +{ -+ sop->cipher = compat->cipher; -+ sop->mac = compat->mac; -+ sop->keylen = compat->keylen; ++ const struct rk628_rgu_data *data = &rk628_rgu_data[id]; + -+ sop->key = compat_ptr(compat->key); -+ sop->mackeylen = compat->mackeylen; -+ sop->mackey = compat_ptr(compat->mackey); -+ sop->ses = compat->ses; ++ return regmap_write(cru->regmap, data->reg, ++ BIT(data->bit + 16) | (assert << data->bit)); +} + -+static inline void -+session_op_to_compat(struct session_op *sop, struct compat_session_op *compat) ++static int rk628_rgu_assert(struct reset_controller_dev *rcdev, ++ unsigned long id) +{ -+ compat->cipher = sop->cipher; -+ compat->mac = sop->mac; -+ compat->keylen = sop->keylen; ++ struct rk628_cru *cru = reset_to_cru(rcdev); + -+ compat->key = ptr_to_compat(sop->key); -+ compat->mackeylen = sop->mackeylen; -+ compat->mackey = ptr_to_compat(sop->mackey); -+ compat->ses = sop->ses; ++ return rk628_rgu_update(cru, id, 1); +} + -+static inline void -+compat_to_crypt_op(struct compat_crypt_op *compat, struct crypt_op *cop) ++static int rk628_rgu_deassert(struct reset_controller_dev *rcdev, ++ unsigned long id) +{ -+ cop->ses = compat->ses; -+ cop->op = compat->op; -+ cop->flags = compat->flags; -+ cop->len = compat->len; ++ struct rk628_cru *cru = reset_to_cru(rcdev); + -+ cop->src = compat_ptr(compat->src); -+ cop->dst = compat_ptr(compat->dst); -+ cop->mac = compat_ptr(compat->mac); -+ cop->iv = compat_ptr(compat->iv); ++ return rk628_rgu_update(cru, id, 0); +} + -+static inline void -+crypt_op_to_compat(struct crypt_op *cop, struct compat_crypt_op *compat) ++static struct reset_control_ops rk628_rgu_ops = { ++ .assert = rk628_rgu_assert, ++ .deassert = rk628_rgu_deassert, ++}; ++ ++static int rk628_reset_controller_register(struct rk628_cru *cru) +{ -+ compat->ses = cop->ses; -+ compat->op = cop->op; -+ compat->flags = cop->flags; -+ compat->len = cop->len; ++ struct device *dev = cru->dev; + -+ compat->src = ptr_to_compat(cop->src); -+ compat->dst = ptr_to_compat(cop->dst); -+ compat->mac = ptr_to_compat(cop->mac); -+ compat->iv = ptr_to_compat(cop->iv); ++ cru->rcdev.owner = THIS_MODULE; ++ cru->rcdev.nr_resets = ARRAY_SIZE(rk628_rgu_data); ++ cru->rcdev.of_node = dev->of_node; ++ cru->rcdev.ops = &rk628_rgu_ops; ++ ++ return devm_reset_controller_register(dev, &cru->rcdev); +} + -+static int compat_kcop_from_user(struct kernel_crypt_op *kcop, -+ struct fcrypt *fcr, void __user *arg) -+{ -+ struct compat_crypt_op compat_cop; ++static const struct regmap_range rk628_cru_readable_ranges[] = { ++ regmap_reg_range(CRU_CPLL_CON0, CRU_CPLL_CON4), ++ regmap_reg_range(CRU_GPLL_CON0, CRU_GPLL_CON4), ++ regmap_reg_range(CRU_MODE_CON, CRU_MODE_CON), ++ regmap_reg_range(CRU_CLKSEL_CON00, CRU_CLKSEL_CON21), ++ regmap_reg_range(CRU_GATE_CON00, CRU_GATE_CON05), ++ regmap_reg_range(CRU_SOFTRST_CON00, CRU_SOFTRST_CON04), ++}; + -+ if (unlikely(copy_from_user(&compat_cop, arg, sizeof(compat_cop)))) -+ return -EFAULT; -+ compat_to_crypt_op(&compat_cop, &kcop->cop); ++static const struct regmap_access_table rk628_cru_readable_table = { ++ .yes_ranges = rk628_cru_readable_ranges, ++ .n_yes_ranges = ARRAY_SIZE(rk628_cru_readable_ranges), ++}; + -+ return fill_kcop_from_cop(kcop, fcr); -+} ++static const struct regmap_config rk628_cru_regmap_config = { ++ .name = "cru", ++ .reg_bits = 32, ++ .val_bits = 32, ++ .reg_stride = 4, ++ .max_register = CRU_MAX_REGISTER, ++ .reg_format_endian = REGMAP_ENDIAN_LITTLE, ++ .val_format_endian = REGMAP_ENDIAN_LITTLE, ++ .rd_table = &rk628_cru_readable_table, ++}; + -+static int compat_kcop_to_user(struct kernel_crypt_op *kcop, -+ struct fcrypt *fcr, void __user *arg) ++static void rk628_cru_init(struct rk628_cru *cru) +{ -+ int ret; -+ struct compat_crypt_op compat_cop; ++ u32 val = 0; ++ u8 mcu_mode; + -+ ret = fill_cop_from_kcop(kcop, fcr); -+ if (unlikely(ret)) { -+ dwarning(1, "Error in fill_cop_from_kcop"); -+ return ret; -+ } -+ crypt_op_to_compat(&kcop->cop, &compat_cop); ++ regmap_read(cru->parent->grf, GRF_SYSTEM_STATUS0, &val); ++ mcu_mode = (val & I2C_ONLY_FLAG) ? 0 : 1; ++ if (mcu_mode) ++ return; + -+ if (unlikely(copy_to_user(arg, &compat_cop, sizeof(compat_cop)))) { -+ dwarning(1, "Error copying to user"); -+ return -EFAULT; -+ } -+ return 0; ++ /* clock switch and first set gpll almost 99MHz */ ++ regmap_write(cru->regmap, CRU_GPLL_CON0, 0xffff701d); ++ usleep_range(1000, 1100); ++ /* set clk_gpll_mux from gpll */ ++ regmap_write(cru->regmap, CRU_MODE_CON, 0xffff0004); ++ usleep_range(1000, 1100); ++ /* set pclk_logic from clk_gpll_mux and set pclk div 4 */ ++ regmap_write(cru->regmap, CRU_CLKSEL_CON00, 0xff0080); ++ regmap_write(cru->regmap, CRU_CLKSEL_CON00, 0xff0083); ++ /* set cpll almost 400MHz */ ++ regmap_write(cru->regmap, CRU_CPLL_CON0, 0xffff3063); ++ usleep_range(1000, 1100); ++ /* set clk_cpll_mux from clk_cpll */ ++ regmap_write(cru->regmap, CRU_MODE_CON, 0xffff0005); ++ /* set pclk use cpll, now div is 4 */ ++ regmap_write(cru->regmap, CRU_CLKSEL_CON00, 0xff0003); ++ /* set pclk use cpll, now div is 12 */ ++ regmap_write(cru->regmap, CRU_CLKSEL_CON00, 0xff000b); ++ /* gpll 983.04MHz */ ++ regmap_write(cru->regmap, CRU_GPLL_CON0, 0xffff1028); ++ usleep_range(1000, 1100); ++ /* set pclk use gpll, nuw div is 0xb */ ++ regmap_write(cru->regmap, CRU_CLKSEL_CON00, 0xff008b); ++ /* set cpll 1188MHz */ ++ regmap_write(cru->regmap, CRU_CPLL_CON0, 0xffff1063); ++ usleep_range(1000, 1100); ++ /* set pclk use cpll, and set pclk 99MHz */ ++ regmap_write(cru->regmap, CRU_CLKSEL_CON00, 0xff000b); +} + -+static long -+cryptodev_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg_) ++static int rk628_cru_probe(struct platform_device *pdev) +{ -+ void __user *arg = (void __user *)arg_; -+ struct crypt_priv *pcr = file->private_data; -+ struct fcrypt *fcr; -+ struct session_op sop; -+ struct compat_session_op compat_sop; -+ struct kernel_crypt_op kcop; -+ struct kernel_crypt_auth_op kcaop; ++ struct rk628 *rk628 = dev_get_drvdata(pdev->dev.parent); ++ struct device *dev = &pdev->dev; ++ struct rk628_cru *cru; ++ struct clk **clk_table; ++ unsigned int i; + int ret; + -+ if (unlikely(!pcr)) -+ BUG(); -+ -+ fcr = &pcr->fcrypt; -+ -+ switch (cmd) { -+ case CIOCASYMFEAT: -+ case CRIOGET: -+ case CIOCFSESSION: -+ case CIOCGSESSINFO: -+ return cryptodev_ioctl(file, cmd, arg_); -+ -+ case COMPAT_CIOCGSESSION: -+ if (unlikely(copy_from_user(&compat_sop, arg, -+ sizeof(compat_sop)))) -+ return -EFAULT; -+ compat_to_session_op(&compat_sop, &sop); ++ cru = devm_kzalloc(dev, sizeof(*cru), GFP_KERNEL); ++ if (!cru) ++ return -ENOMEM; + -+ ret = crypto_create_session(fcr, &sop); -+ if (unlikely(ret)) -+ return ret; ++ cru->dev = dev; ++ cru->parent = rk628; ++ platform_set_drvdata(pdev, cru); + -+ session_op_to_compat(&sop, &compat_sop); -+ ret = copy_to_user(arg, &compat_sop, sizeof(compat_sop)); -+ if (unlikely(ret)) { -+ crypto_finish_session(fcr, sop.ses); -+ return -EFAULT; -+ } ++ cru->regmap = devm_regmap_init_i2c(rk628->client, ++ &rk628_cru_regmap_config); ++ if (IS_ERR(cru->regmap)) { ++ ret = PTR_ERR(cru->regmap); ++ dev_err(dev, "failed to allocate register map: %d\n", ret); + return ret; ++ } + -+ case COMPAT_CIOCCRYPT: -+ ret = compat_kcop_from_user(&kcop, fcr, arg); -+ if (unlikely(ret)) -+ return ret; ++ rk628_cru_init(cru); + -+ ret = crypto_run(fcr, &kcop); -+ if (unlikely(ret)) -+ return ret; ++ clk_table = devm_kcalloc(dev, CGU_NR_CLKS, sizeof(struct clk *), ++ GFP_KERNEL); ++ if (!clk_table) ++ return -ENOMEM; + -+ return compat_kcop_to_user(&kcop, fcr, arg); ++ for (i = 0; i < CGU_NR_CLKS; i++) ++ clk_table[i] = ERR_PTR(-ENOENT); + -+ case COMPAT_CIOCAUTHCRYPT: -+ ret = compat_kcaop_from_user(&kcaop, fcr, arg); -+ if (unlikely(ret)) { -+ dwarning(1, "Error copying from user"); -+ return ret; -+ } ++ cru->clk_data.clks = clk_table; ++ cru->clk_data.clk_num = CGU_NR_CLKS; + -+ ret = crypto_auth_run(fcr, &kcaop); -+ if (unlikely(ret)) { -+ dwarning(1, "Error in crypto_auth_run"); -+ return ret; -+ } -+ return compat_kcaop_to_user(&kcaop, fcr, arg); -+#ifdef ENABLE_ASYNC -+ case COMPAT_CIOCASYNCCRYPT: -+ if (unlikely(ret = compat_kcop_from_user(&kcop, fcr, arg))) -+ return ret; ++ rk628_clk_register_plls(cru); ++ rk628_clk_register_muxes(cru); ++ rk628_clk_register_gates(cru); ++ rk628_clk_register_composites(cru); ++ rk628_reset_controller_register(cru); + -+ return crypto_async_run(pcr, &kcop); -+ case COMPAT_CIOCASYNCFETCH: -+ ret = crypto_async_fetch(pcr, &kcop); -+ if (unlikely(ret)) -+ return ret; ++ clk_prepare_enable(clk_table[CGU_PCLK_LOGIC]); + -+ return compat_kcop_to_user(&kcop, fcr, arg); -+#endif -+ default: -+ return rk_compat_cryptodev_ioctl(fcr, cmd, arg_); -+ } ++ return of_clk_add_provider(dev->of_node, of_clk_src_onecell_get, ++ &cru->clk_data); +} + -+#endif /* CONFIG_COMPAT */ -+ -+static unsigned int cryptodev_poll(struct file *file, poll_table *wait) ++static int rk628_cru_remove(struct platform_device *pdev) +{ -+ struct crypt_priv *pcr = file->private_data; -+ unsigned int ret = 0; -+ -+ poll_wait(file, &pcr->user_waiter, wait); -+ -+ if (!list_empty_careful(&pcr->done.list)) -+ ret |= POLLIN | POLLRDNORM; -+ if (!list_empty_careful(&pcr->free.list) || pcr->itemcount < MAX_COP_RINGSIZE) -+ ret |= POLLOUT | POLLWRNORM; ++ of_clk_del_provider(pdev->dev.of_node); + -+ return ret; ++ return 0; +} + -+static const struct file_operations cryptodev_fops = { -+ .owner = THIS_MODULE, -+ .open = cryptodev_open, -+ .release = cryptodev_release, -+ .unlocked_ioctl = cryptodev_ioctl, -+#ifdef CONFIG_COMPAT -+ .compat_ioctl = cryptodev_compat_ioctl, -+#endif /* CONFIG_COMPAT */ -+ .poll = cryptodev_poll, ++static const struct of_device_id rk628_cru_of_match[] = { ++ { .compatible = "rockchip,rk628-cru", }, ++ {}, +}; ++MODULE_DEVICE_TABLE(of, rk628_cru_of_match); + -+static struct miscdevice cryptodev = { -+ .minor = MISC_DYNAMIC_MINOR, -+ .name = "crypto", -+ .fops = &cryptodev_fops, -+ .mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH, ++static struct platform_driver rk628_cru_driver = { ++ .driver = { ++ .name = "rk628-cru", ++ .of_match_table = of_match_ptr(rk628_cru_of_match), ++ }, ++ .probe = rk628_cru_probe, ++ .remove = rk628_cru_remove, +}; ++module_platform_driver(rk628_cru_driver); + -+static int __init -+cryptodev_register(void) -+{ -+ int rc; -+ -+ rc = misc_register(&cryptodev); -+ if (unlikely(rc)) { -+ pr_err(PFX "registration of /dev/crypto failed\n"); -+ return rc; -+ } -+ -+ return 0; -+} ++MODULE_AUTHOR("Wyon Bi "); ++MODULE_DESCRIPTION("Rockchip RK628 CRU driver"); ++MODULE_LICENSE("GPL v2"); +diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig +index 8208a3d89..b7022db25 100644 +--- a/drivers/clocksource/Kconfig ++++ b/drivers/clocksource/Kconfig +@@ -92,7 +92,9 @@ config IXP4XX_TIMER + Enables support for the Intel XScale IXP4xx SoC timer. + + config ROCKCHIP_TIMER +- bool "Rockchip timer driver" if COMPILE_TEST ++ tristate "Rockchip timer driver" ++ default ARCH_ROCKCHIP ++ depends on ARCH_ROCKCHIP || COMPILE_TEST + depends on ARM || ARM64 + select TIMER_OF + select CLKSRC_MMIO +diff --git a/drivers/clocksource/timer-rockchip.c b/drivers/clocksource/timer-rockchip.c +index 1f95d0aca..2f4e970d7 100644 +--- a/drivers/clocksource/timer-rockchip.c ++++ b/drivers/clocksource/timer-rockchip.c +@@ -8,11 +8,13 @@ + #include + #include + #include ++#include + #include + #include + #include + #include + #include ++#include + + #define TIMER_NAME "rk_timer" + +@@ -45,7 +47,9 @@ struct rk_clkevt { + }; + + static struct rk_clkevt *rk_clkevt; ++#ifndef MODULE + static struct rk_timer *rk_clksrc; ++#endif + + static inline struct rk_timer *rk_timer(struct clock_event_device *ce) + { +@@ -119,10 +123,12 @@ static irqreturn_t rk_timer_interrupt(int irq, void *dev_id) + return IRQ_HANDLED; + } + ++#ifndef MODULE + static u64 notrace rk_timer_sched_read(void) + { + return ~readl_relaxed(rk_clksrc->base + TIMER_CURRENT_VALUE0); + } ++#endif + + static int __init + rk_timer_probe(struct rk_timer *timer, struct device_node *np) +@@ -250,6 +256,7 @@ static int __init rk_clkevt_init(struct device_node *np) + return ret; + } + ++#ifndef MODULE + static int __init rk_clksrc_init(struct device_node *np) + { + int ret = -EINVAL; +@@ -287,14 +294,17 @@ static int __init rk_clksrc_init(struct device_node *np) + rk_clksrc = ERR_PTR(ret); + return ret; + } ++#endif + + static int __init rk_timer_init(struct device_node *np) + { + if (!rk_clkevt) + return rk_clkevt_init(np); + ++#ifndef MODULE + if (!rk_clksrc) + return rk_clksrc_init(np); ++#endif + + pr_err("Too many timer definitions for '%s'\n", TIMER_NAME); + return -EINVAL; +@@ -302,3 +312,26 @@ static int __init rk_timer_init(struct device_node *np) + + TIMER_OF_DECLARE(rk3288_timer, "rockchip,rk3288-timer", rk_timer_init); + TIMER_OF_DECLARE(rk3399_timer, "rockchip,rk3399-timer", rk_timer_init); + -+static void __exit -+cryptodev_deregister(void) ++#ifdef MODULE ++static int __init rk_timer_driver_probe(struct platform_device *pdev) +{ -+ misc_deregister(&cryptodev); ++ return rk_timer_init(pdev->dev.of_node); +} + -+/* ====== Module init/exit ====== */ -+static struct ctl_table verbosity_ctl_dir[] = { -+ { -+ .procname = "cryptodev_verbosity", -+ .data = &cryptodev_verbosity, -+ .maxlen = sizeof(int), -+ .mode = 0644, -+ .proc_handler = proc_dointvec, -+ }, -+ {}, ++static const struct of_device_id rk_timer_match_table[] = { ++ { .compatible = "rockchip,rk3288-timer" }, ++ { .compatible = "rockchip,rk3399-timer" }, ++ { /* sentinel */ }, +}; + -+static struct ctl_table verbosity_ctl_root[] = { -+ { -+ .procname = "ioctl", -+ .mode = 0555, -+ .child = verbosity_ctl_dir, ++static struct platform_driver rk_timer_driver = { ++ .driver = { ++ .name = TIMER_NAME, ++ .of_match_table = rk_timer_match_table, + }, -+ {}, +}; -+static struct ctl_table_header *verbosity_sysctl_header; -+static int __init init_cryptodev(void) -+{ -+ int rc; -+ -+ cryptodev_wq = create_workqueue("cryptodev_queue"); -+ if (unlikely(!cryptodev_wq)) { -+ pr_err(PFX "failed to allocate the cryptodev workqueue\n"); -+ return -EFAULT; -+ } ++module_platform_driver_probe(rk_timer_driver, rk_timer_driver_probe); + -+ rc = cryptodev_register(); -+ if (unlikely(rc)) { -+ destroy_workqueue(cryptodev_wq); -+ return rc; -+ } ++MODULE_LICENSE("GPL"); ++#endif +diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig +index 7e773c47a..a870deefa 100644 +--- a/drivers/cpufreq/Kconfig ++++ b/drivers/cpufreq/Kconfig +@@ -105,6 +105,17 @@ config CPU_FREQ_DEFAULT_GOV_SCHEDUTIL + have a look at the help section of that governor. The fallback + governor will be 'performance'. + ++config CPU_FREQ_DEFAULT_GOV_INTERACTIVE ++ bool "interactive" ++ depends on NO_GKI ++ select CPU_FREQ_GOV_INTERACTIVE ++ select CPU_FREQ_GOV_PERFORMANCE ++ help ++ Use the CPUFreq governor 'interactive' as default. This allows ++ you to get a full dynamic cpu frequency capable system by simply ++ loading your cpufreq low-level hardware driver, using the ++ 'interactive' governor for latency-sensitive workloads. + -+ verbosity_sysctl_header = register_sysctl_table(verbosity_ctl_root); + endchoice + + config CPU_FREQ_GOV_PERFORMANCE +@@ -203,12 +214,33 @@ config CPU_FREQ_GOV_SCHEDUTIL + + If in doubt, say N. + ++config CPU_FREQ_GOV_INTERACTIVE ++ tristate "'interactive' cpufreq policy governor" ++ depends on NO_GKI ++ depends on CPU_FREQ ++ select CPU_FREQ_GOV_ATTR_SET ++ select IRQ_WORK ++ help ++ 'interactive' - This driver adds a dynamic cpufreq policy governor ++ designed for latency-sensitive workloads. + -+ pr_info(PFX "driver %s loaded.\n", VERSION); ++ This governor attempts to reduce the latency of clock ++ increases so that the system is more responsive to ++ interactive workloads. + -+ return 0; -+} ++ To compile this driver as a module, choose M here: the ++ module will be called cpufreq_interactive. + -+static void __exit exit_cryptodev(void) -+{ -+ flush_workqueue(cryptodev_wq); -+ destroy_workqueue(cryptodev_wq); ++ For details, take a look at linux/Documentation/cpu-freq. + -+ if (verbosity_sysctl_header) -+ unregister_sysctl_table(verbosity_sysctl_header); ++ If in doubt, say N. + -+ cryptodev_deregister(); -+ pr_info(PFX "driver unloaded.\n"); -+} + comment "CPU frequency scaling drivers" + + config CPUFREQ_DT + tristate "Generic DT based cpufreq driver" + depends on HAVE_CLK && OF +- select CPUFREQ_DT_PLATDEV ++ select CPUFREQ_DT_PLATDEV if !ARM_ROCKCHIP_CPUFREQ + select PM_OPP + help + This adds a generic DT based cpufreq driver for frequency management. +diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm +index c5cecbd89..52127fa44 100644 +--- a/drivers/cpufreq/Kconfig.arm ++++ b/drivers/cpufreq/Kconfig.arm +@@ -190,6 +190,16 @@ config ARM_RASPBERRYPI_CPUFREQ + + If in doubt, say N. + ++config ARM_ROCKCHIP_CPUFREQ ++ tristate "Rockchip CPUfreq driver" ++ depends on ARCH_ROCKCHIP && CPUFREQ_DT ++ select PM_OPP ++ help ++ This adds the CPUFreq driver support for Rockchip SoCs, ++ based on cpufreq-dt. + -+module_init(init_cryptodev); -+module_exit(exit_cryptodev); ++ If in doubt, say N. + -diff --git a/drivers/crypto/rockchip/cryptodev_linux/main.c b/drivers/crypto/rockchip/cryptodev_linux/main.c + config ARM_S3C64XX_CPUFREQ + bool "Samsung S3C64XX" + depends on CPU_S3C6410 +diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile +index ef8510774..fab0394cf 100644 +--- a/drivers/cpufreq/Makefile ++++ b/drivers/cpufreq/Makefile +@@ -8,14 +8,15 @@ obj-$(CONFIG_CPU_FREQ_STAT) += cpufreq_stats.o + # CPUfreq governors + obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o + obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o +-obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o ++obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace_rk.o + obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o + obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o ++obj-$(CONFIG_CPU_FREQ_GOV_INTERACTIVE) += cpufreq_interactive.o + obj-$(CONFIG_CPU_FREQ_GOV_COMMON) += cpufreq_governor.o + obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET) += cpufreq_governor_attr_set.o + +-obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o +-obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o ++obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt-rk.o ++obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev-rk.o + + # Traces + CFLAGS_amd-pstate-trace.o := -I$(src) +@@ -71,6 +72,7 @@ obj-$(CONFIG_PXA3xx) += pxa3xx-cpufreq.o + obj-$(CONFIG_ARM_QCOM_CPUFREQ_HW) += qcom-cpufreq-hw.o + obj-$(CONFIG_ARM_QCOM_CPUFREQ_NVMEM) += qcom-cpufreq-nvmem.o + obj-$(CONFIG_ARM_RASPBERRYPI_CPUFREQ) += raspberrypi-cpufreq.o ++obj-$(CONFIG_ARM_ROCKCHIP_CPUFREQ) += rockchip-cpufreq.o + obj-$(CONFIG_ARM_S3C64XX_CPUFREQ) += s3c64xx-cpufreq.o + obj-$(CONFIG_ARM_S5PV210_CPUFREQ) += s5pv210-cpufreq.o + obj-$(CONFIG_ARM_SA1110_CPUFREQ) += sa1110-cpufreq.o +diff --git a/drivers/cpufreq/cpufreq-dt-platdev-rk.c b/drivers/cpufreq/cpufreq-dt-platdev-rk.c new file mode 100644 -index 000000000..23efae186 +index 000000000..52cdde09e --- /dev/null -+++ b/drivers/crypto/rockchip/cryptodev_linux/main.c -@@ -0,0 +1,266 @@ -+/* -+ * Driver for /dev/crypto device (aka CryptoDev) -+ * -+ * Copyright (c) 2004 Michal Ludvig , SuSE Labs -+ * Copyright (c) 2009-2013 Nikos Mavrogiannopoulos -+ * -+ * This file is part of linux cryptodev. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version 2 -+ * of the License, or (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., -+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ ++++ b/drivers/cpufreq/cpufreq-dt-platdev-rk.c +@@ -0,0 +1,243 @@ ++// SPDX-License-Identifier: GPL-2.0-only +/* -+ * Device /dev/crypto provides an interface for -+ * accessing kernel CryptoAPI algorithms (ciphers, -+ * hashes) from userspace programs. -+ * -+ * /dev/crypto interface was originally introduced in -+ * OpenBSD and this module attempts to keep the API. -+ * -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "cryptodev.h" -+#include "zc.h" -+#include "cryptlib.h" -+#include "version.h" -+ -+/* This file contains the traditional operations of encryption -+ * and hashing of /dev/crypto. ++ * Copyright (C) 2016 Linaro. ++ * Viresh Kumar + */ + -+static int -+hash_n_crypt(struct csession *ses_ptr, struct crypt_op *cop, -+ struct scatterlist *src_sg, struct scatterlist *dst_sg, -+ uint32_t len) -+{ -+ int ret; ++#include ++#include ++#include ++#include + -+ /* Always hash before encryption and after decryption. Maybe -+ * we should introduce a flag to switch... TBD later on. -+ */ -+ if (cop->op == COP_ENCRYPT) { -+ if (ses_ptr->hdata.init != 0) { -+ ret = cryptodev_hash_update(&ses_ptr->hdata, -+ src_sg, len); -+ if (unlikely(ret)) -+ goto out_err; -+ } -+ if (ses_ptr->cdata.init != 0) { -+ ret = cryptodev_cipher_encrypt(&ses_ptr->cdata, -+ src_sg, dst_sg, len); ++#include "cpufreq-dt.h" + -+ if (unlikely(ret)) -+ goto out_err; -+ } -+ } else { -+ if (ses_ptr->cdata.init != 0) { -+ ret = cryptodev_cipher_decrypt(&ses_ptr->cdata, -+ src_sg, dst_sg, len); ++/* ++ * Machines for which the cpufreq device is *always* created, mostly used for ++ * platforms using "operating-points" (V1) property. ++ */ ++static const struct of_device_id allowlist[] __initconst = { ++ { .compatible = "allwinner,sun4i-a10", }, ++ { .compatible = "allwinner,sun5i-a10s", }, ++ { .compatible = "allwinner,sun5i-a13", }, ++ { .compatible = "allwinner,sun5i-r8", }, ++ { .compatible = "allwinner,sun6i-a31", }, ++ { .compatible = "allwinner,sun6i-a31s", }, ++ { .compatible = "allwinner,sun7i-a20", }, ++ { .compatible = "allwinner,sun8i-a23", }, ++ { .compatible = "allwinner,sun8i-a83t", }, ++ { .compatible = "allwinner,sun8i-h3", }, + -+ if (unlikely(ret)) -+ goto out_err; -+ } ++ { .compatible = "apm,xgene-shadowcat", }, + -+ if (ses_ptr->hdata.init != 0) { -+ ret = cryptodev_hash_update(&ses_ptr->hdata, -+ dst_sg, len); -+ if (unlikely(ret)) -+ goto out_err; -+ } -+ } -+ return 0; -+out_err: -+ derr(0, "CryptoAPI failure: %d", ret); -+ return ret; -+} ++ { .compatible = "arm,integrator-ap", }, ++ { .compatible = "arm,integrator-cp", }, + -+/* This is the main crypto function - feed it with plaintext -+ and get a ciphertext (or vice versa :-) */ -+static int -+__crypto_run_std(struct csession *ses_ptr, struct crypt_op *cop) -+{ -+ char *data; -+ char __user *src, *dst; -+ struct scatterlist sg; -+ size_t nbytes, bufsize; -+ int ret = 0; ++ { .compatible = "hisilicon,hi3660", }, + -+ nbytes = cop->len; -+ data = (char *)__get_free_page(GFP_KERNEL); ++ { .compatible = "fsl,imx27", }, ++ { .compatible = "fsl,imx51", }, ++ { .compatible = "fsl,imx53", }, + -+ if (unlikely(!data)) { -+ derr(1, "Error getting free page."); -+ return -ENOMEM; -+ } ++ { .compatible = "marvell,berlin", }, ++ { .compatible = "marvell,pxa250", }, ++ { .compatible = "marvell,pxa270", }, + -+ bufsize = PAGE_SIZE < nbytes ? PAGE_SIZE : nbytes; ++ { .compatible = "samsung,exynos3250", }, ++ { .compatible = "samsung,exynos4210", }, ++ { .compatible = "samsung,exynos5250", }, ++#ifndef CONFIG_BL_SWITCHER ++ { .compatible = "samsung,exynos5800", }, ++#endif + -+ src = cop->src; -+ dst = cop->dst; ++ { .compatible = "renesas,emev2", }, ++ { .compatible = "renesas,r7s72100", }, ++ { .compatible = "renesas,r8a73a4", }, ++ { .compatible = "renesas,r8a7740", }, ++ { .compatible = "renesas,r8a7742", }, ++ { .compatible = "renesas,r8a7743", }, ++ { .compatible = "renesas,r8a7744", }, ++ { .compatible = "renesas,r8a7745", }, ++ { .compatible = "renesas,r8a7778", }, ++ { .compatible = "renesas,r8a7779", }, ++ { .compatible = "renesas,r8a7790", }, ++ { .compatible = "renesas,r8a7791", }, ++ { .compatible = "renesas,r8a7792", }, ++ { .compatible = "renesas,r8a7793", }, ++ { .compatible = "renesas,r8a7794", }, ++ { .compatible = "renesas,sh73a0", }, + -+ while (nbytes > 0) { -+ size_t current_len = nbytes > bufsize ? bufsize : nbytes; ++ { .compatible = "st-ericsson,u8500", }, ++ { .compatible = "st-ericsson,u8540", }, ++ { .compatible = "st-ericsson,u9500", }, ++ { .compatible = "st-ericsson,u9540", }, + -+ if (unlikely(copy_from_user(data, src, current_len))) { -+ derr(1, "Error copying %zu bytes from user address %p.", current_len, src); -+ ret = -EFAULT; -+ break; -+ } ++ { .compatible = "starfive,jh7110", }, + -+ sg_init_one(&sg, data, current_len); ++ { .compatible = "ti,omap2", }, ++ { .compatible = "ti,omap4", }, ++ { .compatible = "ti,omap5", }, + -+ ret = hash_n_crypt(ses_ptr, cop, &sg, &sg, current_len); ++ { .compatible = "xlnx,zynq-7000", }, ++ { .compatible = "xlnx,zynqmp", }, + -+ if (unlikely(ret)) { -+ derr(1, "hash_n_crypt failed."); -+ break; -+ } ++ { } ++}; + -+ if (ses_ptr->cdata.init != 0) { -+ if (unlikely(copy_to_user(dst, data, current_len))) { -+ derr(1, "could not copy to user."); -+ ret = -EFAULT; -+ break; -+ } -+ } ++/* ++ * Machines for which the cpufreq device is *not* created, mostly used for ++ * platforms using "operating-points-v2" property. ++ */ ++static const struct of_device_id blocklist[] __initconst = { ++ { .compatible = "allwinner,sun50i-h6", }, + -+ dst += current_len; -+ nbytes -= current_len; -+ src += current_len; -+ } ++ { .compatible = "apple,arm-platform", }, + -+ free_page((unsigned long)data); -+ return ret; -+} ++ { .compatible = "arm,vexpress", }, + ++ { .compatible = "calxeda,highbank", }, ++ { .compatible = "calxeda,ecx-2000", }, + ++ { .compatible = "fsl,imx7ulp", }, ++ { .compatible = "fsl,imx7d", }, ++ { .compatible = "fsl,imx7s", }, ++ { .compatible = "fsl,imx8mq", }, ++ { .compatible = "fsl,imx8mm", }, ++ { .compatible = "fsl,imx8mn", }, ++ { .compatible = "fsl,imx8mp", }, + -+/* This is the main crypto function - zero-copy edition */ -+static int -+__crypto_run_zc(struct csession *ses_ptr, struct kernel_crypt_op *kcop) -+{ -+ struct scatterlist *src_sg, *dst_sg; -+ struct crypt_op *cop = &kcop->cop; -+ int ret = 0; ++ { .compatible = "marvell,armadaxp", }, + -+ ret = cryptodev_get_userbuf(ses_ptr, cop->src, cop->len, cop->dst, cop->len, -+ kcop->task, kcop->mm, &src_sg, &dst_sg); -+ if (unlikely(ret)) { -+ derr(1, "Error getting user pages. Falling back to non zero copy."); -+ return __crypto_run_std(ses_ptr, cop); -+ } ++ { .compatible = "mediatek,mt2701", }, ++ { .compatible = "mediatek,mt2712", }, ++ { .compatible = "mediatek,mt7622", }, ++ { .compatible = "mediatek,mt7623", }, ++ { .compatible = "mediatek,mt8167", }, ++ { .compatible = "mediatek,mt817x", }, ++ { .compatible = "mediatek,mt8173", }, ++ { .compatible = "mediatek,mt8176", }, ++ { .compatible = "mediatek,mt8183", }, ++ { .compatible = "mediatek,mt8186", }, ++ { .compatible = "mediatek,mt8365", }, ++ { .compatible = "mediatek,mt8516", }, + -+ ret = hash_n_crypt(ses_ptr, cop, src_sg, dst_sg, cop->len); ++ { .compatible = "nvidia,tegra20", }, ++ { .compatible = "nvidia,tegra30", }, ++ { .compatible = "nvidia,tegra124", }, ++ { .compatible = "nvidia,tegra210", }, ++ { .compatible = "nvidia,tegra234", }, + -+ cryptodev_release_user_pages(ses_ptr); -+ return ret; -+} ++ { .compatible = "qcom,apq8096", }, ++ { .compatible = "qcom,msm8996", }, ++ { .compatible = "qcom,msm8998", }, ++ { .compatible = "qcom,qcm2290", }, ++ { .compatible = "qcom,qcs404", }, ++ { .compatible = "qcom,qdu1000", }, ++ { .compatible = "qcom,sa8155p" }, ++ { .compatible = "qcom,sa8540p" }, ++ { .compatible = "qcom,sa8775p" }, ++ { .compatible = "qcom,sc7180", }, ++ { .compatible = "qcom,sc7280", }, ++ { .compatible = "qcom,sc8180x", }, ++ { .compatible = "qcom,sc8280xp", }, ++ { .compatible = "qcom,sdm845", }, ++ { .compatible = "qcom,sdx75", }, ++ { .compatible = "qcom,sm6115", }, ++ { .compatible = "qcom,sm6350", }, ++ { .compatible = "qcom,sm6375", }, ++ { .compatible = "qcom,sm7225", }, ++ { .compatible = "qcom,sm8150", }, ++ { .compatible = "qcom,sm8250", }, ++ { .compatible = "qcom,sm8350", }, ++ { .compatible = "qcom,sm8450", }, ++ { .compatible = "qcom,sm8550", }, + -+int crypto_run(struct fcrypt *fcr, struct kernel_crypt_op *kcop) -+{ -+ struct csession *ses_ptr; -+ struct crypt_op *cop = &kcop->cop; -+ int ret; ++ { .compatible = "rockchip,px30", }, ++ { .compatible = "rockchip,rk2928", }, ++ { .compatible = "rockchip,rk3036", }, ++ { .compatible = "rockchip,rk3066a", }, ++ { .compatible = "rockchip,rk3066b", }, ++ { .compatible = "rockchip,rk3126", }, ++ { .compatible = "rockchip,rk3128", }, ++ { .compatible = "rockchip,rk3188", }, ++ { .compatible = "rockchip,rk3228", }, ++ { .compatible = "rockchip,rk3229", }, ++ { .compatible = "rockchip,rk3288", }, ++ { .compatible = "rockchip,rk3288w", }, ++ { .compatible = "rockchip,rk3308", }, ++ { .compatible = "rockchip,rk3326", }, ++ { .compatible = "rockchip,rk3328", }, ++ { .compatible = "rockchip,rk3366", }, ++ { .compatible = "rockchip,rk3368", }, ++ { .compatible = "rockchip,rk3399", }, ++ { .compatible = "rockchip,rk3399pro", }, ++ { .compatible = "rockchip,rk3528", }, ++ { .compatible = "rockchip,rk3562", }, ++ { .compatible = "rockchip,rk3566", }, ++ { .compatible = "rockchip,rk3567", }, ++ { .compatible = "rockchip,rk3568", }, ++ { .compatible = "rockchip,rk3588", }, ++ { .compatible = "rockchip,rv1103", }, ++ { .compatible = "rockchip,rv1106", }, ++ { .compatible = "rockchip,rv1109", }, ++ { .compatible = "rockchip,rv1126", }, + -+ if (unlikely(cop->op != COP_ENCRYPT && cop->op != COP_DECRYPT)) { -+ ddebug(1, "invalid operation op=%u", cop->op); -+ return -EINVAL; -+ } ++ { .compatible = "st,stih407", }, ++ { .compatible = "st,stih410", }, ++ { .compatible = "st,stih418", }, + -+ /* this also enters ses_ptr->sem */ -+ ses_ptr = crypto_get_session_by_sid(fcr, cop->ses); -+ if (unlikely(!ses_ptr)) { -+ derr(1, "invalid session ID=0x%08X", cop->ses); -+ return -EINVAL; -+ } ++ { .compatible = "ti,am33xx", }, ++ { .compatible = "ti,am43", }, ++ { .compatible = "ti,dra7", }, ++ { .compatible = "ti,omap3", }, ++ { .compatible = "ti,am625", }, ++ { .compatible = "ti,am62a7", }, + -+ if (ses_ptr->hdata.init != 0 && (cop->flags == 0 || cop->flags & COP_FLAG_RESET)) { -+ ret = cryptodev_hash_reset(&ses_ptr->hdata); -+ if (unlikely(ret)) { -+ derr(1, "error in cryptodev_hash_reset()"); -+ goto out_unlock; -+ } -+ } ++ { .compatible = "qcom,ipq8064", }, ++ { .compatible = "qcom,apq8064", }, ++ { .compatible = "qcom,msm8974", }, ++ { .compatible = "qcom,msm8960", }, + -+ if (ses_ptr->cdata.init != 0) { -+ int blocksize = ses_ptr->cdata.blocksize; ++ { } ++}; + -+ if (unlikely(cop->len % blocksize)) { -+ derr(1, "data size (%u) isn't a multiple of block size (%u)", -+ cop->len, blocksize); -+ ret = -EINVAL; -+ goto out_unlock; -+ } ++static bool __init cpu0_node_has_opp_v2_prop(void) ++{ ++ struct device_node *np = of_cpu_device_node_get(0); ++ bool ret = false; + -+ cryptodev_cipher_set_iv(&ses_ptr->cdata, kcop->iv, -+ min(ses_ptr->cdata.ivsize, kcop->ivlen)); -+ } ++ if (of_property_present(np, "operating-points-v2")) ++ ret = true; + -+ if (likely(cop->len)) { -+ if (!(cop->flags & COP_FLAG_NO_ZC)) { -+ if (unlikely(ses_ptr->alignmask && !IS_ALIGNED((unsigned long)cop->src, ses_ptr->alignmask + 1))) { -+ dwarning(2, "source address %p is not %d byte aligned - disabling zero copy", -+ cop->src, ses_ptr->alignmask + 1); -+ cop->flags |= COP_FLAG_NO_ZC; -+ } ++ of_node_put(np); ++ return ret; ++} + -+ if (unlikely(ses_ptr->alignmask && !IS_ALIGNED((unsigned long)cop->dst, ses_ptr->alignmask + 1))) { -+ dwarning(2, "destination address %p is not %d byte aligned - disabling zero copy", -+ cop->dst, ses_ptr->alignmask + 1); -+ cop->flags |= COP_FLAG_NO_ZC; -+ } -+ } ++static int __init cpufreq_dt_platdev_init(void) ++{ ++ struct device_node *np = of_find_node_by_path("/"); ++ const struct of_device_id *match; ++ const void *data = NULL; + -+ if (cop->flags & COP_FLAG_NO_ZC) -+ ret = __crypto_run_std(ses_ptr, &kcop->cop); -+ else -+ ret = __crypto_run_zc(ses_ptr, kcop); -+ if (unlikely(ret)) -+ goto out_unlock; -+ } ++ if (!np) ++ return -ENODEV; + -+ if (ses_ptr->cdata.init != 0) { -+ cryptodev_cipher_get_iv(&ses_ptr->cdata, kcop->iv, -+ min(ses_ptr->cdata.ivsize, kcop->ivlen)); ++ match = of_match_node(allowlist, np); ++ if (match) { ++ data = match->data; ++ goto create_pdev; + } + -+ if (ses_ptr->hdata.init != 0 && -+ ((cop->flags & COP_FLAG_FINAL) || -+ (!(cop->flags & COP_FLAG_UPDATE) || cop->len == 0))) { ++ if (cpu0_node_has_opp_v2_prop() && !of_match_node(blocklist, np)) ++ goto create_pdev; + -+ ret = cryptodev_hash_final(&ses_ptr->hdata, kcop->hash_output); -+ if (unlikely(ret)) { -+ derr(0, "CryptoAPI failure: %d", ret); -+ goto out_unlock; -+ } -+ kcop->digestsize = ses_ptr->hdata.digestsize; -+ } ++ of_node_put(np); ++ return -ENODEV; + -+out_unlock: -+ crypto_put_session(ses_ptr); -+ return ret; ++create_pdev: ++ of_node_put(np); ++ return PTR_ERR_OR_ZERO(platform_device_register_data(NULL, "cpufreq-dt", ++ -1, data, ++ sizeof(struct cpufreq_dt_platform_data))); +} -diff --git a/drivers/crypto/rockchip/cryptodev_linux/rk_cryptodev.c b/drivers/crypto/rockchip/cryptodev_linux/rk_cryptodev.c ++core_initcall(cpufreq_dt_platdev_init); ++MODULE_LICENSE("GPL"); +diff --git a/drivers/cpufreq/cpufreq-dt-rk.c b/drivers/cpufreq/cpufreq-dt-rk.c new file mode 100644 -index 000000000..be36478f8 +index 000000000..914488f8e --- /dev/null -+++ b/drivers/crypto/rockchip/cryptodev_linux/rk_cryptodev.c -@@ -0,0 +1,1527 @@ -+// SPDX-License-Identifier: GPL-2.0 ++++ b/drivers/cpufreq/cpufreq-dt-rk.c +@@ -0,0 +1,382 @@ ++// SPDX-License-Identifier: GPL-2.0-only +/* -+ * Crypto acceleration support for Rockchip crypto -+ * -+ * Copyright (c) 2021, Rockchip Electronics Co., Ltd -+ * -+ * Author: Lin Jinhan ++ * Copyright (C) 2012 Freescale Semiconductor, Inc. + * ++ * Copyright (C) 2014 Linaro. ++ * Viresh Kumar + */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include + -+#include "version.h" -+#include "cipherapi.h" -+#include "rk_cryptodev.h" ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + -+MODULE_IMPORT_NS(DMA_BUF); ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+#define MAX_CRYPTO_DEV 1 -+#define MAX_CRYPTO_NAME_LEN 64 ++#include "cpufreq-dt.h" ++#ifdef CONFIG_ARCH_ROCKCHIP ++#include "rockchip-cpufreq.h" ++#endif + -+struct dma_fd_map_node { -+ struct kernel_crypt_fd_map_op fd_map; -+ struct sg_table *sgtbl; -+ struct dma_buf *dmabuf; -+ struct dma_buf_attachment *dma_attach; -+ struct list_head list; -+}; ++struct private_data { ++ struct list_head node; + -+struct crypto_dev_info { -+ struct device *dev; -+ char name[MAX_CRYPTO_NAME_LEN]; -+ bool is_multi_thread; ++ cpumask_var_t cpus; ++ struct device *cpu_dev; ++ struct cpufreq_frequency_table *freq_table; ++ bool have_static_opps; ++ int opp_token; +}; + -+static struct crypto_dev_info g_dev_infos[MAX_CRYPTO_DEV]; -+ -+/* -+ * rk_cryptodev_register_dev - register crypto device into rk_cryptodev. -+ * @dev: [in] crypto device to register -+ * @name: [in] crypto device name to register -+ */ -+int rk_cryptodev_register_dev(struct device *dev, const char *name) -+{ -+ uint32_t i; ++static LIST_HEAD(priv_list); + -+ if (WARN_ON(!dev)) -+ return -EINVAL; ++static struct freq_attr *cpufreq_dt_attr[] = { ++ &cpufreq_freq_attr_scaling_available_freqs, ++ NULL, /* Extra space for boost-attr if required */ ++ NULL, ++}; + -+ if (WARN_ON(!name)) -+ return -EINVAL; ++static struct private_data *cpufreq_dt_find_data(int cpu) ++{ ++ struct private_data *priv; + -+ for (i = 0; i < ARRAY_SIZE(g_dev_infos); i++) { -+ if (!g_dev_infos[i].dev) { -+ memset(&g_dev_infos[i], 0x00, sizeof(g_dev_infos[i])); ++ list_for_each_entry(priv, &priv_list, node) { ++ if (cpumask_test_cpu(cpu, priv->cpus)) ++ return priv; ++ } + -+ g_dev_infos[i].dev = dev; -+ strncpy(g_dev_infos[i].name, name, sizeof(g_dev_infos[i].name)); ++ return NULL; ++} + -+ g_dev_infos[i].is_multi_thread = strstr(g_dev_infos[i].name, "multi"); -+ dev_info(dev, "register to cryptodev ok!\n"); -+ return 0; -+ } -+ } ++static int set_target(struct cpufreq_policy *policy, unsigned int index) ++{ ++ struct private_data *priv = policy->driver_data; ++ unsigned long freq = policy->freq_table[index].frequency; + -+ return -ENOMEM; ++#ifdef CONFIG_ARCH_ROCKCHIP ++ return rockchip_cpufreq_opp_set_rate(priv->cpu_dev, freq * 1000); ++#else ++ return dev_pm_opp_set_rate(priv->cpu_dev, freq * 1000); ++#endif +} -+EXPORT_SYMBOL_GPL(rk_cryptodev_register_dev); + +/* -+ * rk_cryptodev_unregister_dev - unregister crypto device from rk_cryptodev -+ * @dev: [in] crypto device to unregister ++ * An earlier version of opp-v1 bindings used to name the regulator ++ * "cpu0-supply", we still need to handle that for backwards compatibility. + */ -+int rk_cryptodev_unregister_dev(struct device *dev) ++static const char *find_supply_name(struct device *dev) +{ -+ uint32_t i; ++ struct device_node *np; ++ struct property *pp; ++ int cpu = dev->id; ++ const char *name = NULL; + -+ if (WARN_ON(!dev)) -+ return -EINVAL; ++ np = of_node_get(dev->of_node); + -+ for (i = 0; i < ARRAY_SIZE(g_dev_infos); i++) { -+ if (g_dev_infos[i].dev == dev) { -+ memset(&g_dev_infos[i], 0x00, sizeof(g_dev_infos[i])); -+ return 0; ++ /* This must be valid for sure */ ++ if (WARN_ON(!np)) ++ return NULL; ++ ++ /* Try "cpu0" for older DTs */ ++ if (!cpu) { ++ pp = of_find_property(np, "cpu0-supply", NULL); ++ if (pp) { ++ name = "cpu0"; ++ goto node_put; + } + } + -+ return -EINVAL; -+} -+EXPORT_SYMBOL_GPL(rk_cryptodev_unregister_dev); -+ -+static struct device *rk_cryptodev_find_dev(const char *name) -+{ -+ uint32_t i; -+ -+ for (i = 0; i < ARRAY_SIZE(g_dev_infos); i++) { -+ if (g_dev_infos[i].dev) -+ return g_dev_infos[i].dev; ++ pp = of_find_property(np, "cpu-supply", NULL); ++ if (pp) { ++ name = "cpu"; ++ goto node_put; + } + -+ return NULL; ++ dev_dbg(dev, "no regulator for cpu%d\n", cpu); ++node_put: ++ of_node_put(np); ++ return name; +} + -+/* this function has to be called from process context */ -+static int fill_kcop_fd_from_cop(struct kernel_crypt_fd_op *kcop, struct fcrypt *fcr) ++static int cpufreq_init(struct cpufreq_policy *policy) +{ -+ struct crypt_fd_op *cop = &kcop->cop; -+ struct csession *ses_ptr; -+ int rc; ++ struct private_data *priv; ++ struct device *cpu_dev; ++ struct clk *cpu_clk; ++ unsigned int transition_latency; ++ int ret; + -+ /* this also enters ses_ptr->sem */ -+ ses_ptr = crypto_get_session_by_sid(fcr, cop->ses); -+ if (unlikely(!ses_ptr)) { -+ derr(1, "invalid session ID=0x%08X", cop->ses); -+ return -EINVAL; ++ priv = cpufreq_dt_find_data(policy->cpu); ++ if (!priv) { ++ pr_err("failed to find data for cpu%d\n", policy->cpu); ++ return -ENODEV; + } -+ kcop->ivlen = cop->iv ? ses_ptr->cdata.ivsize : 0; -+ kcop->digestsize = 0; /* will be updated during operation */ ++ cpu_dev = priv->cpu_dev; + -+ crypto_put_session(ses_ptr); ++ cpu_clk = clk_get(cpu_dev, NULL); ++ if (IS_ERR(cpu_clk)) { ++ ret = PTR_ERR(cpu_clk); ++ dev_err(cpu_dev, "%s: failed to get clk: %d\n", __func__, ret); ++ return ret; ++ } + -+ kcop->task = current; -+ kcop->mm = current->mm; ++ transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev); ++ if (!transition_latency) ++ transition_latency = CPUFREQ_ETERNAL; + -+ if (cop->iv) { -+ rc = copy_from_user(kcop->iv, cop->iv, kcop->ivlen); -+ if (unlikely(rc)) { -+ derr(1, "error copying IV (%d bytes), returned %d for addr %p", -+ kcop->ivlen, rc, cop->iv); -+ return -EFAULT; -+ } ++ cpumask_copy(policy->cpus, priv->cpus); ++ policy->driver_data = priv; ++ policy->clk = cpu_clk; ++ policy->freq_table = priv->freq_table; ++ policy->suspend_freq = dev_pm_opp_get_suspend_opp_freq(cpu_dev) / 1000; ++ policy->cpuinfo.transition_latency = transition_latency; ++ policy->dvfs_possible_from_any_cpu = true; ++ ++ /* Support turbo/boost mode */ ++ if (policy_has_boost_freq(policy)) { ++ /* This gets disabled by core on driver unregister */ ++ ret = cpufreq_enable_boost_support(); ++ if (ret) ++ goto out_clk_put; ++ cpufreq_dt_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; + } + + return 0; -+} + ++out_clk_put: ++ clk_put(cpu_clk); + -+/* this function has to be called from process context */ -+static int fill_cop_fd_from_kcop(struct kernel_crypt_fd_op *kcop, struct fcrypt *fcr) -+{ -+ int ret; ++ return ret; ++} + -+ if (kcop->digestsize) { -+ ret = copy_to_user(kcop->cop.mac, -+ kcop->hash_output, kcop->digestsize); -+ if (unlikely(ret)) -+ return -EFAULT; -+ } -+ if (kcop->ivlen && kcop->cop.flags & COP_FLAG_WRITE_IV) { -+ ret = copy_to_user(kcop->cop.iv, -+ kcop->iv, kcop->ivlen); -+ if (unlikely(ret)) -+ return -EFAULT; -+ } ++static int cpufreq_online(struct cpufreq_policy *policy) ++{ ++ /* We did light-weight tear down earlier, nothing to do here */ + return 0; +} + -+static int kcop_fd_from_user(struct kernel_crypt_fd_op *kcop, -+ struct fcrypt *fcr, void __user *arg) ++static int cpufreq_offline(struct cpufreq_policy *policy) +{ -+ if (unlikely(copy_from_user(&kcop->cop, arg, sizeof(kcop->cop)))) -+ return -EFAULT; -+ -+ return fill_kcop_fd_from_cop(kcop, fcr); ++ /* ++ * Preserve policy->driver_data and don't free resources on light-weight ++ * tear down. ++ */ ++ return 0; +} + -+static int kcop_fd_to_user(struct kernel_crypt_fd_op *kcop, -+ struct fcrypt *fcr, void __user *arg) ++static int cpufreq_exit(struct cpufreq_policy *policy) +{ -+ int ret; -+ -+ ret = fill_cop_fd_from_kcop(kcop, fcr); -+ if (unlikely(ret)) { -+ derr(1, "Error in fill_cop_from_kcop"); -+ return ret; -+ } -+ -+ if (unlikely(copy_to_user(arg, &kcop->cop, sizeof(kcop->cop)))) { -+ derr(1, "Cannot copy to userspace"); -+ return -EFAULT; -+ } -+ ++ clk_put(policy->clk); + return 0; +} + -+static int -+hash_n_crypt_fd(struct csession *ses_ptr, struct crypt_fd_op *cop, -+ struct scatterlist *src_sg, struct scatterlist *dst_sg, -+ uint32_t len) ++static struct cpufreq_driver dt_cpufreq_driver = { ++ .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK | ++ CPUFREQ_IS_COOLING_DEV, ++ .verify = cpufreq_generic_frequency_table_verify, ++ .target_index = set_target, ++ .get = cpufreq_generic_get, ++ .init = cpufreq_init, ++ .exit = cpufreq_exit, ++ .online = cpufreq_online, ++ .offline = cpufreq_offline, ++ .register_em = cpufreq_register_em_with_opp, ++ .name = "cpufreq-dt", ++ .attr = cpufreq_dt_attr, ++ .suspend = cpufreq_generic_suspend, ++}; ++ ++static int dt_cpufreq_early_init(struct device *dev, int cpu) +{ ++ struct private_data *priv; ++ struct device *cpu_dev; ++ bool fallback = false; ++ const char *reg_name[] = { NULL, NULL }; + int ret; + -+ /* Always hash before encryption and after decryption. Maybe -+ * we should introduce a flag to switch... TBD later on. -+ */ -+ if (cop->op == COP_ENCRYPT) { -+ if (ses_ptr->hdata.init != 0) { -+ ret = cryptodev_hash_update(&ses_ptr->hdata, -+ src_sg, len); -+ if (unlikely(ret)) -+ goto out_err; -+ } -+ if (ses_ptr->cdata.init != 0) { -+ ret = cryptodev_cipher_encrypt(&ses_ptr->cdata, -+ src_sg, dst_sg, len); ++ /* Check if this CPU is already covered by some other policy */ ++ if (cpufreq_dt_find_data(cpu)) ++ return 0; + -+ if (unlikely(ret)) -+ goto out_err; -+ } -+ } else { -+ if (ses_ptr->cdata.init != 0) { -+ ret = cryptodev_cipher_decrypt(&ses_ptr->cdata, -+ src_sg, dst_sg, len); ++ cpu_dev = get_cpu_device(cpu); ++ if (!cpu_dev) ++ return -EPROBE_DEFER; + -+ if (unlikely(ret)) -+ goto out_err; -+ } ++ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); ++ if (!priv) ++ return -ENOMEM; + -+ if (ses_ptr->hdata.init != 0) { -+ ret = cryptodev_hash_update(&ses_ptr->hdata, -+ dst_sg, len); -+ if (unlikely(ret)) -+ goto out_err; -+ } -+ } -+ return 0; -+out_err: -+ derr(0, "CryptoAPI failure: %d", ret); -+ return ret; -+} ++ if (!zalloc_cpumask_var(&priv->cpus, GFP_KERNEL)) ++ return -ENOMEM; + -+static int get_dmafd_sgtbl(int dma_fd, unsigned int dma_len, enum dma_data_direction dir, -+ struct sg_table **sg_tbl, struct dma_buf_attachment **dma_attach, -+ struct dma_buf **dmabuf) -+{ -+ struct device *crypto_dev = rk_cryptodev_find_dev(NULL); ++ cpumask_set_cpu(cpu, priv->cpus); ++ priv->cpu_dev = cpu_dev; + -+ if (!crypto_dev) -+ return -EINVAL; ++ /* ++ * OPP layer will be taking care of regulators now, but it needs to know ++ * the name of the regulator first. ++ */ ++ reg_name[0] = find_supply_name(cpu_dev); ++ if (reg_name[0]) { ++ priv->opp_token = dev_pm_opp_set_regulators(cpu_dev, reg_name); ++ if (priv->opp_token < 0) { ++ ret = dev_err_probe(cpu_dev, priv->opp_token, ++ "failed to set regulators\n"); ++ goto free_cpumask; ++ } ++ } + -+ *sg_tbl = NULL; -+ *dmabuf = NULL; -+ *dma_attach = NULL; ++ /* Get OPP-sharing information from "operating-points-v2" bindings */ ++ ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, priv->cpus); ++ if (ret) { ++ if (ret != -ENOENT) ++ goto out; + -+ *dmabuf = dma_buf_get(dma_fd); -+ if (IS_ERR(*dmabuf)) { -+ derr(1, "dmabuf error! ret = %d", (int)PTR_ERR(*dmabuf)); -+ *dmabuf = NULL; -+ goto error; ++ /* ++ * operating-points-v2 not supported, fallback to all CPUs share ++ * OPP for backward compatibility if the platform hasn't set ++ * sharing CPUs. ++ */ ++ if (dev_pm_opp_get_sharing_cpus(cpu_dev, priv->cpus)) ++ fallback = true; + } + -+ *dma_attach = dma_buf_attach(*dmabuf, crypto_dev); -+ if (IS_ERR(*dma_attach)) { -+ derr(1, "dma_attach error! ret = %d", (int)PTR_ERR(*dma_attach)); -+ *dma_attach = NULL; -+ goto error; ++ /* ++ * Initialize OPP tables for all priv->cpus. They will be shared by ++ * all CPUs which have marked their CPUs shared with OPP bindings. ++ * ++ * For platforms not using operating-points-v2 bindings, we do this ++ * before updating priv->cpus. Otherwise, we will end up creating ++ * duplicate OPPs for the CPUs. ++ * ++ * OPPs might be populated at runtime, don't fail for error here unless ++ * it is -EPROBE_DEFER. ++ */ ++ ret = dev_pm_opp_of_cpumask_add_table(priv->cpus); ++ if (!ret) { ++ priv->have_static_opps = true; ++ } else if (ret == -EPROBE_DEFER) { ++ goto out; + } + + /* -+ * DMA_TO_DEVICE : cache clean for input data -+ * DMA_FROM_DEVICE: cache invalidate for output data ++ * The OPP table must be initialized, statically or dynamically, by this ++ * point. + */ -+ *sg_tbl = dma_buf_map_attachment(*dma_attach, dir); -+ if (IS_ERR(*sg_tbl)) { -+ derr(1, "sg_tbl error! ret = %d", (int)PTR_ERR(*sg_tbl)); -+ *sg_tbl = NULL; -+ goto error; ++ ret = dev_pm_opp_get_opp_count(cpu_dev); ++ if (ret <= 0) { ++ dev_err(cpu_dev, "OPP table can't be empty\n"); ++ ret = -ENODEV; ++ goto out; + } + -+ /* cache invalidate for input data */ -+ if (dir == DMA_TO_DEVICE) -+ dma_sync_sg_for_cpu(crypto_dev, (*sg_tbl)->sgl, (*sg_tbl)->nents, DMA_FROM_DEVICE); ++ if (fallback) { ++ cpumask_setall(priv->cpus); ++ ret = dev_pm_opp_set_sharing_cpus(cpu_dev, priv->cpus); ++ if (ret) ++ dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", ++ __func__, ret); ++ } + -+ return 0; -+error: -+ if (*sg_tbl) -+ dma_buf_unmap_attachment(*dma_attach, *sg_tbl, dir); ++#ifdef CONFIG_ARCH_ROCKCHIP ++ rockchip_cpufreq_adjust_table(cpu_dev); ++#endif + -+ if (*dma_attach) -+ dma_buf_detach(*dmabuf, *dma_attach); ++ ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &priv->freq_table); ++ if (ret) { ++ dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); ++ goto out; ++ } + -+ if (*dmabuf) -+ dma_buf_put(*dmabuf); ++ list_add(&priv->node, &priv_list); ++ return 0; + -+ return -EINVAL; ++out: ++ if (priv->have_static_opps) ++ dev_pm_opp_of_cpumask_remove_table(priv->cpus); ++ dev_pm_opp_put_regulators(priv->opp_token); ++free_cpumask: ++ free_cpumask_var(priv->cpus); ++ return ret; +} + -+static int put_dmafd_sgtbl(int dma_fd, enum dma_data_direction dir, -+ struct sg_table *sg_tbl, struct dma_buf_attachment *dma_attach, -+ struct dma_buf *dmabuf) ++static void dt_cpufreq_release(void) +{ -+ struct device *crypto_dev = rk_cryptodev_find_dev(NULL); -+ -+ if (!crypto_dev) -+ return -EINVAL; -+ -+ if (!sg_tbl || !dma_attach || !dmabuf) -+ return -EINVAL; -+ -+ /* cache clean for output data */ -+ if (dir == DMA_FROM_DEVICE) -+ dma_sync_sg_for_device(crypto_dev, sg_tbl->sgl, sg_tbl->nents, DMA_TO_DEVICE); -+ -+ /* -+ * DMA_TO_DEVICE : do nothing for input data -+ * DMA_FROM_DEVICE: cache invalidate for output data -+ */ -+ dma_buf_unmap_attachment(dma_attach, sg_tbl, dir); -+ dma_buf_detach(dmabuf, dma_attach); -+ dma_buf_put(dmabuf); ++ struct private_data *priv, *tmp; + -+ return 0; ++ list_for_each_entry_safe(priv, tmp, &priv_list, node) { ++ dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &priv->freq_table); ++ if (priv->have_static_opps) ++ dev_pm_opp_of_cpumask_remove_table(priv->cpus); ++ dev_pm_opp_put_regulators(priv->opp_token); ++ free_cpumask_var(priv->cpus); ++ list_del(&priv->node); ++ } +} + -+static struct dma_fd_map_node *dma_fd_find_node(struct fcrypt *fcr, int dma_fd) ++static int dt_cpufreq_probe(struct platform_device *pdev) +{ -+ struct dma_fd_map_node *map_node = NULL; -+ -+ mutex_lock(&fcr->sem); ++ struct cpufreq_dt_platform_data *data = dev_get_platdata(&pdev->dev); ++ int ret, cpu; + -+ list_for_each_entry(map_node, &fcr->dma_map_list, list) { -+ if (unlikely(map_node->fd_map.mop.dma_fd == dma_fd)) { -+ mutex_unlock(&fcr->sem); -+ return map_node; -+ } ++ /* Request resources early so we can return in case of -EPROBE_DEFER */ ++ for_each_possible_cpu(cpu) { ++ ret = dt_cpufreq_early_init(&pdev->dev, cpu); ++ if (ret) ++ goto err; + } + -+ mutex_unlock(&fcr->sem); -+ -+ return NULL; -+} -+ -+/* This is the main crypto function - zero-copy edition */ -+static int __crypto_fd_run(struct fcrypt *fcr, struct csession *ses_ptr, -+ struct kernel_crypt_fd_op *kcop) -+{ -+ struct crypt_fd_op *cop = &kcop->cop; -+ struct dma_buf *dma_buf_in = NULL, *dma_buf_out = NULL; -+ struct sg_table sg_tmp; -+ struct sg_table *sg_tbl_in = NULL, *sg_tbl_out = NULL; -+ struct dma_buf_attachment *dma_attach_in = NULL, *dma_attach_out = NULL; -+ struct dma_fd_map_node *node_src = NULL, *node_dst = NULL; -+ int ret = 0; ++ if (data) { ++ if (data->have_governor_per_policy) ++ dt_cpufreq_driver.flags |= CPUFREQ_HAVE_GOVERNOR_PER_POLICY; + -+ node_src = dma_fd_find_node(fcr, kcop->cop.src_fd); -+ if (node_src) { -+ sg_tbl_in = node_src->sgtbl; -+ } else { -+ ret = get_dmafd_sgtbl(kcop->cop.src_fd, kcop->cop.len, DMA_TO_DEVICE, -+ &sg_tbl_in, &dma_attach_in, &dma_buf_in); -+ if (unlikely(ret)) { -+ derr(1, "Error get_dmafd_sgtbl src."); -+ goto exit; ++ dt_cpufreq_driver.resume = data->resume; ++ if (data->suspend) ++ dt_cpufreq_driver.suspend = data->suspend; ++ if (data->get_intermediate) { ++ dt_cpufreq_driver.target_intermediate = data->target_intermediate; ++ dt_cpufreq_driver.get_intermediate = data->get_intermediate; + } + } + -+ /* only cipher has dst */ -+ if (ses_ptr->cdata.init) { -+ node_dst = dma_fd_find_node(fcr, kcop->cop.dst_fd); -+ if (node_dst) { -+ sg_tbl_out = node_dst->sgtbl; -+ } else { -+ ret = get_dmafd_sgtbl(kcop->cop.dst_fd, kcop->cop.len, DMA_FROM_DEVICE, -+ &sg_tbl_out, &dma_attach_out, &dma_buf_out); -+ if (unlikely(ret)) { -+ derr(1, "Error get_dmafd_sgtbl dst."); -+ goto exit; -+ } -+ } -+ } else { -+ memset(&sg_tmp, 0x00, sizeof(sg_tmp)); -+ sg_tbl_out = &sg_tmp; ++ ret = cpufreq_register_driver(&dt_cpufreq_driver); ++ if (ret) { ++ dev_err(&pdev->dev, "failed register driver: %d\n", ret); ++ goto err; + } + -+ ret = hash_n_crypt_fd(ses_ptr, cop, sg_tbl_in->sgl, sg_tbl_out->sgl, cop->len); -+ -+exit: -+ if (dma_buf_in) -+ put_dmafd_sgtbl(kcop->cop.src_fd, DMA_TO_DEVICE, -+ sg_tbl_in, dma_attach_in, dma_buf_in); -+ -+ if (dma_buf_out) -+ put_dmafd_sgtbl(kcop->cop.dst_fd, DMA_FROM_DEVICE, -+ sg_tbl_out, dma_attach_out, dma_buf_out); ++ return 0; ++err: ++ dt_cpufreq_release(); + return ret; +} + -+static int crypto_fd_run(struct fcrypt *fcr, struct kernel_crypt_fd_op *kcop) ++static void dt_cpufreq_remove(struct platform_device *pdev) +{ -+ struct csession *ses_ptr; -+ struct crypt_fd_op *cop = &kcop->cop; -+ int ret = -EINVAL; ++ cpufreq_unregister_driver(&dt_cpufreq_driver); ++ dt_cpufreq_release(); ++} + -+ if (unlikely(cop->op != COP_ENCRYPT && cop->op != COP_DECRYPT)) { -+ ddebug(1, "invalid operation op=%u", cop->op); -+ return -EINVAL; -+ } ++static struct platform_driver dt_cpufreq_platdrv = { ++ .driver = { ++ .name = "cpufreq-dt", ++ }, ++ .probe = dt_cpufreq_probe, ++ .remove_new = dt_cpufreq_remove, ++}; ++module_platform_driver(dt_cpufreq_platdrv); + -+ /* this also enters ses_ptr->sem */ -+ ses_ptr = crypto_get_session_by_sid(fcr, cop->ses); -+ if (unlikely(!ses_ptr)) { -+ derr(1, "invalid session ID=0x%08X", cop->ses); -+ return -EINVAL; -+ } ++MODULE_ALIAS("platform:cpufreq-dt"); ++MODULE_AUTHOR("Viresh Kumar "); ++MODULE_AUTHOR("Shawn Guo "); ++MODULE_DESCRIPTION("Generic cpufreq driver"); ++MODULE_LICENSE("GPL"); +diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c +new file mode 100644 +index 000000000..a128216fc +--- /dev/null ++++ b/drivers/cpufreq/cpufreq_interactive.c +@@ -0,0 +1,1643 @@ ++/* ++ * drivers/cpufreq/cpufreq_interactive.c ++ * ++ * Copyright (C) 2010-2016 Google, Inc. ++ * ++ * This software is licensed under the terms of the GNU General Public ++ * License version 2, as published by the Free Software Foundation, and ++ * may be copied, distributed, and modified under those terms. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * Author: Mike Chan (mike@android.com) ++ */ + -+ if (ses_ptr->hdata.init != 0 && (cop->flags == 0 || cop->flags & COP_FLAG_RESET)) { -+ ret = cryptodev_hash_reset(&ses_ptr->hdata); -+ if (unlikely(ret)) { -+ derr(1, "error in cryptodev_hash_reset()"); -+ goto out_unlock; -+ } -+ } ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + -+ if (ses_ptr->cdata.init != 0) { -+ int blocksize = ses_ptr->cdata.blocksize; ++#include ++#include ++#include ++#ifdef CONFIG_ARCH_ROCKCHIP ++#include ++#endif ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ if (unlikely(cop->len % blocksize)) { -+ derr(1, "data size (%u) isn't a multiple of block size (%u)", -+ cop->len, blocksize); -+ ret = -EINVAL; -+ goto out_unlock; -+ } ++#define CREATE_TRACE_POINTS ++#include + -+ cryptodev_cipher_set_iv(&ses_ptr->cdata, kcop->iv, -+ min(ses_ptr->cdata.ivsize, kcop->ivlen)); -+ } ++#define gov_attr_ro(_name) \ ++static struct governor_attr _name = \ ++__ATTR(_name, 0444, show_##_name, NULL) + -+ if (likely(cop->len)) { -+ ret = __crypto_fd_run(fcr, ses_ptr, kcop); -+ if (unlikely(ret)) -+ goto out_unlock; -+ } ++#define gov_attr_wo(_name) \ ++static struct governor_attr _name = \ ++__ATTR(_name, 0200, NULL, store_##_name) + -+ if (ses_ptr->cdata.init != 0) { -+ cryptodev_cipher_get_iv(&ses_ptr->cdata, kcop->iv, -+ min(ses_ptr->cdata.ivsize, kcop->ivlen)); -+ } ++#define gov_attr_rw(_name) \ ++static struct governor_attr _name = \ ++__ATTR(_name, 0644, show_##_name, store_##_name) + -+ if (ses_ptr->hdata.init != 0 && -+ ((cop->flags & COP_FLAG_FINAL) || -+ (!(cop->flags & COP_FLAG_UPDATE) || cop->len == 0))) { ++/* Separate instance required for each 'interactive' directory in sysfs */ ++struct interactive_tunables { ++ struct gov_attr_set attr_set; + -+ ret = cryptodev_hash_final(&ses_ptr->hdata, kcop->hash_output); -+ if (unlikely(ret)) { -+ derr(0, "CryptoAPI failure: %d", ret); -+ goto out_unlock; -+ } -+ kcop->digestsize = ses_ptr->hdata.digestsize; -+ } ++ /* Hi speed to bump to from lo speed when load burst (default max) */ ++ unsigned int hispeed_freq; + -+out_unlock: -+ crypto_put_session(ses_ptr); ++ /* Go to hi speed when CPU load at or above this value. */ ++#define DEFAULT_GO_HISPEED_LOAD 99 ++ unsigned long go_hispeed_load; + -+ return ret; -+} ++ /* Target load. Lower values result in higher CPU speeds. */ ++ spinlock_t target_loads_lock; ++ unsigned int *target_loads; ++ int ntarget_loads; + -+static int kcop_map_fd_from_user(struct kernel_crypt_fd_map_op *kcop, -+ struct fcrypt *fcr, void __user *arg) -+{ -+ if (unlikely(copy_from_user(&kcop->mop, arg, sizeof(kcop->mop)))) -+ return -EFAULT; ++ /* ++ * The minimum amount of time to spend at a frequency before we can ramp ++ * down. ++ */ ++#define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC) ++ unsigned long min_sample_time; + -+ return 0; -+} ++ /* The sample rate of the timer used to increase frequency */ ++ unsigned long sampling_rate; + -+static int kcop_map_fd_to_user(struct kernel_crypt_fd_map_op *kcop, -+ struct fcrypt *fcr, void __user *arg) -+{ -+ if (unlikely(copy_to_user(arg, &kcop->mop, sizeof(kcop->mop)))) { -+ derr(1, "Cannot copy to userspace"); -+ return -EFAULT; -+ } ++ /* ++ * Wait this long before raising speed above hispeed, by default a ++ * single timer interval. ++ */ ++ spinlock_t above_hispeed_delay_lock; ++ unsigned int *above_hispeed_delay; ++ int nabove_hispeed_delay; + -+ return 0; -+} ++ /* Non-zero means indefinite speed boost active */ ++ int boost; ++ /* Duration of a boot pulse in usecs */ ++ int boostpulse_duration; ++ /* End time of boost pulse in ktime converted to usecs */ ++ u64 boostpulse_endtime; ++#ifdef CONFIG_ARCH_ROCKCHIP ++ /* Frequency to which a touch boost takes the cpus to */ ++ unsigned long touchboost_freq; ++ /* Duration of a touchboost pulse in usecs */ ++ int touchboostpulse_duration_val; ++ /* End time of touchboost pulse in ktime converted to usecs */ ++ u64 touchboostpulse_endtime; ++#endif ++ bool boosted; + -+static int dma_fd_map_for_user(struct fcrypt *fcr, struct kernel_crypt_fd_map_op *kmop) -+{ -+ struct device *crypto_dev = NULL; -+ struct dma_fd_map_node *map_node = NULL; ++ /* ++ * Max additional time to wait in idle, beyond sampling_rate, at speeds ++ * above minimum before wakeup to reduce speed, or -1 if unnecessary. ++ */ ++#define DEFAULT_TIMER_SLACK (4 * DEFAULT_SAMPLING_RATE) ++ unsigned long timer_slack_delay; ++ unsigned long timer_slack; ++ bool io_is_busy; ++}; + -+ /* check if dma_fd is already mapped */ -+ map_node = dma_fd_find_node(fcr, kmop->mop.dma_fd); -+ if (map_node) { -+ kmop->mop.phys_addr = map_node->fd_map.mop.phys_addr; -+ return 0; -+ } ++/* Separate instance required for each 'struct cpufreq_policy' */ ++struct interactive_policy { ++ struct cpufreq_policy *policy; ++ struct interactive_tunables *tunables; ++ struct list_head tunables_hook; ++}; + -+ crypto_dev = rk_cryptodev_find_dev(NULL); -+ if (!crypto_dev) -+ return -EINVAL; ++/* Separate instance required for each CPU */ ++struct interactive_cpu { ++ struct update_util_data update_util; ++ struct interactive_policy *ipolicy; + -+ map_node = kzalloc(sizeof(*map_node), GFP_KERNEL); -+ if (!map_node) -+ return -ENOMEM; ++ struct irq_work irq_work; ++ u64 last_sample_time; ++ unsigned long next_sample_jiffies; ++ bool work_in_progress; + -+ map_node->dmabuf = dma_buf_get(kmop->mop.dma_fd); -+ if (IS_ERR(map_node->dmabuf)) { -+ derr(1, "dmabuf error! ret = %d", (int)PTR_ERR(map_node->dmabuf)); -+ map_node->dmabuf = NULL; -+ goto error; -+ } ++ struct rw_semaphore enable_sem; ++ struct timer_list slack_timer; + -+ map_node->dma_attach = dma_buf_attach(map_node->dmabuf, crypto_dev); -+ if (IS_ERR(map_node->dma_attach)) { -+ derr(1, "dma_attach error! ret = %d", (int)PTR_ERR(map_node->dma_attach)); -+ map_node->dma_attach = NULL; -+ goto error; -+ } ++ spinlock_t load_lock; /* protects the next 4 fields */ ++ u64 time_in_idle; ++ u64 time_in_idle_timestamp; ++ u64 cputime_speedadj; ++ u64 cputime_speedadj_timestamp; + -+ map_node->sgtbl = dma_buf_map_attachment(map_node->dma_attach, DMA_BIDIRECTIONAL); -+ if (IS_ERR(map_node->sgtbl)) { -+ derr(1, "sg_tbl error! ret = %d", (int)PTR_ERR(map_node->sgtbl)); -+ map_node->sgtbl = NULL; -+ goto error; -+ } ++ spinlock_t target_freq_lock; /*protects target freq */ ++ unsigned int target_freq; + -+ map_node->fd_map.mop.dma_fd = kmop->mop.dma_fd; -+ map_node->fd_map.mop.phys_addr = map_node->sgtbl->sgl->dma_address; ++ unsigned int floor_freq; ++ u64 pol_floor_val_time; /* policy floor_validate_time */ ++ u64 loc_floor_val_time; /* per-cpu floor_validate_time */ ++ u64 pol_hispeed_val_time; /* policy hispeed_validate_time */ ++ u64 loc_hispeed_val_time; /* per-cpu hispeed_validate_time */ ++ int cpu; ++}; + -+ mutex_lock(&fcr->sem); -+ list_add(&map_node->list, &fcr->dma_map_list); -+ mutex_unlock(&fcr->sem); ++static DEFINE_PER_CPU(struct interactive_cpu, interactive_cpu); + -+ kmop->mop.phys_addr = map_node->fd_map.mop.phys_addr; ++/* Realtime thread handles frequency scaling */ ++static struct task_struct *speedchange_task; ++static cpumask_t speedchange_cpumask; ++static spinlock_t speedchange_cpumask_lock; + -+ return 0; -+error: -+ if (map_node->sgtbl) -+ dma_buf_unmap_attachment(map_node->dma_attach, map_node->sgtbl, DMA_BIDIRECTIONAL); ++/* Target load. Lower values result in higher CPU speeds. */ ++#define DEFAULT_TARGET_LOAD 90 ++static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD}; + -+ if (map_node->dma_attach) -+ dma_buf_detach(map_node->dmabuf, map_node->dma_attach); ++#define DEFAULT_SAMPLING_RATE (20 * USEC_PER_MSEC) ++#define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_SAMPLING_RATE ++static unsigned int default_above_hispeed_delay[] = { ++ DEFAULT_ABOVE_HISPEED_DELAY ++}; + -+ if (map_node->dmabuf) -+ dma_buf_put(map_node->dmabuf); ++/* Iterate over interactive policies for tunables */ ++#define for_each_ipolicy(__ip) \ ++ list_for_each_entry(__ip, &tunables->attr_set.policy_list, tunables_hook) + -+ kfree(map_node); ++static struct interactive_tunables *global_tunables; ++static DEFINE_MUTEX(global_tunables_lock); ++#ifdef CONFIG_ARCH_ROCKCHIP ++static struct interactive_tunables backup_tunables[2]; ++#endif + -+ return -EINVAL; ++static inline void update_slack_delay(struct interactive_tunables *tunables) ++{ ++ tunables->timer_slack_delay = usecs_to_jiffies(tunables->timer_slack + ++ tunables->sampling_rate); +} + -+static int dma_fd_unmap_for_user(struct fcrypt *fcr, struct kernel_crypt_fd_map_op *kmop) ++static bool timer_slack_required(struct interactive_cpu *icpu) +{ -+ struct dma_fd_map_node *tmp, *map_node; -+ bool is_found = false; -+ int ret = 0; -+ -+ mutex_lock(&fcr->sem); -+ list_for_each_entry_safe(map_node, tmp, &fcr->dma_map_list, list) { -+ if (map_node->fd_map.mop.dma_fd == kmop->mop.dma_fd && -+ map_node->fd_map.mop.phys_addr == kmop->mop.phys_addr) { -+ dma_buf_unmap_attachment(map_node->dma_attach, map_node->sgtbl, -+ DMA_BIDIRECTIONAL); -+ dma_buf_detach(map_node->dmabuf, map_node->dma_attach); -+ dma_buf_put(map_node->dmabuf); -+ list_del(&map_node->list); -+ kfree(map_node); -+ kmop->mop.phys_addr = 0; -+ is_found = true; -+ break; -+ } -+ } ++ struct interactive_policy *ipolicy = icpu->ipolicy; ++ struct interactive_tunables *tunables = ipolicy->tunables; + -+ if (unlikely(!is_found)) { -+ derr(1, "dmafd =0x%08X not found!", kmop->mop.dma_fd); -+ ret = -ENOENT; -+ mutex_unlock(&fcr->sem); -+ goto exit; -+ } ++ if (tunables->timer_slack == 0) ++ return false; + -+ mutex_unlock(&fcr->sem); ++ if (icpu->target_freq > ipolicy->policy->min) ++ return true; + -+exit: -+ return ret; ++ return false; +} + -+static int dma_fd_begin_cpu_access(struct fcrypt *fcr, struct kernel_crypt_fd_map_op *kmop) ++static void gov_slack_timer_start(struct interactive_cpu *icpu, int cpu) +{ -+ struct dma_fd_map_node *map_node = NULL; -+ -+ map_node = dma_fd_find_node(fcr, kmop->mop.dma_fd); -+ if (unlikely(!map_node)) { -+ derr(1, "dmafd =0x%08X not found!", kmop->mop.dma_fd); -+ return -ENOENT; -+ } ++ struct interactive_tunables *tunables = icpu->ipolicy->tunables; + -+ return dma_buf_begin_cpu_access(map_node->dmabuf, DMA_BIDIRECTIONAL); ++ icpu->slack_timer.expires = jiffies + tunables->timer_slack_delay; ++ add_timer_on(&icpu->slack_timer, cpu); +} + -+static int dma_fd_end_cpu_access(struct fcrypt *fcr, struct kernel_crypt_fd_map_op *kmop) ++static void gov_slack_timer_modify(struct interactive_cpu *icpu) +{ -+ struct dma_fd_map_node *map_node = NULL; -+ -+ map_node = dma_fd_find_node(fcr, kmop->mop.dma_fd); -+ if (unlikely(!map_node)) { -+ derr(1, "dmafd =0x%08X not found!", kmop->mop.dma_fd); -+ return -ENOENT; -+ } ++ struct interactive_tunables *tunables = icpu->ipolicy->tunables; + -+ return dma_buf_end_cpu_access(map_node->dmabuf, DMA_BIDIRECTIONAL); ++ mod_timer(&icpu->slack_timer, jiffies + tunables->timer_slack_delay); +} + -+static int kcop_rsa_from_user(struct kernel_crypt_rsa_op *kcop, -+ struct fcrypt *fcr, void __user *arg) ++static void slack_timer_resched(struct interactive_cpu *icpu, int cpu, ++ bool modify) +{ -+ if (unlikely(copy_from_user(&kcop->rop, arg, sizeof(kcop->rop)))) -+ return -EFAULT; ++ struct interactive_tunables *tunables = icpu->ipolicy->tunables; ++ unsigned long flags; + -+ return 0; -+} ++ spin_lock_irqsave(&icpu->load_lock, flags); + -+static int kcop_rsa_to_user(struct kernel_crypt_rsa_op *kcop, -+ struct fcrypt *fcr, void __user *arg) -+{ -+ if (unlikely(copy_to_user(arg, &kcop->rop, sizeof(kcop->rop)))) { -+ derr(1, "Cannot copy to userspace"); -+ return -EFAULT; ++ icpu->time_in_idle = get_cpu_idle_time(cpu, ++ &icpu->time_in_idle_timestamp, ++ tunables->io_is_busy); ++ icpu->cputime_speedadj = 0; ++ icpu->cputime_speedadj_timestamp = icpu->time_in_idle_timestamp; ++ ++ if (timer_slack_required(icpu)) { ++ if (modify) ++ gov_slack_timer_modify(icpu); ++ else ++ gov_slack_timer_start(icpu, cpu); + } + -+ return 0; ++ spin_unlock_irqrestore(&icpu->load_lock, flags); +} + -+static int crypto_rsa_run(struct fcrypt *fcr, struct kernel_crypt_rsa_op *krop) ++static unsigned int ++freq_to_above_hispeed_delay(struct interactive_tunables *tunables, ++ unsigned int freq) +{ -+ int ret; -+ u8 *key = NULL, *in = NULL, *out = NULL; -+ u32 out_len_max; -+ struct crypt_rsa_op *rop = &krop->rop; -+ const char *driver = "rsa-rk"; -+ struct crypto_akcipher *tfm = NULL; -+ struct akcipher_request *req = NULL; -+ DECLARE_CRYPTO_WAIT(wait); -+ struct scatterlist src, dst; -+ bool is_priv_key = (rop->flags & COP_FLAG_RSA_PRIV) == COP_FLAG_RSA_PRIV; ++ unsigned long flags; ++ unsigned int ret; ++ int i; + -+ /* The key size cannot exceed RK_RSA_BER_KEY_MAX Byte */ -+ if (rop->key_len > RK_RSA_BER_KEY_MAX) -+ return -ENOKEY; ++ spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); + -+ if (rop->in_len > RK_RSA_KEY_MAX_BYTES || -+ rop->out_len > RK_RSA_KEY_MAX_BYTES) -+ return -EINVAL; ++ for (i = 0; i < tunables->nabove_hispeed_delay - 1 && ++ freq >= tunables->above_hispeed_delay[i + 1]; i += 2) ++ ; + -+ tfm = crypto_alloc_akcipher(driver, 0, 0); -+ if (IS_ERR(tfm)) { -+ ddebug(2, "alg: akcipher: Failed to load tfm for %s: %ld\n", -+ driver, PTR_ERR(tfm)); -+ return PTR_ERR(tfm); -+ } ++ ret = tunables->above_hispeed_delay[i]; ++ spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); + -+ req = akcipher_request_alloc(tfm, GFP_KERNEL); -+ if (!req) { -+ ddebug(2, "akcipher_request_alloc failed\n"); -+ ret = -ENOMEM; -+ goto exit; -+ } ++ return ret; ++} + -+ key = kzalloc(rop->key_len, GFP_KERNEL); -+ if (!key) { -+ ret = -ENOMEM; -+ goto exit; -+ } ++static unsigned int freq_to_targetload(struct interactive_tunables *tunables, ++ unsigned int freq) ++{ ++ unsigned long flags; ++ unsigned int ret; ++ int i; + -+ if (unlikely(copy_from_user(key, u64_to_user_ptr(rop->key), rop->key_len))) { -+ ret = -EFAULT; -+ goto exit; -+ } ++ spin_lock_irqsave(&tunables->target_loads_lock, flags); + -+ in = kzalloc(rop->in_len, GFP_KERNEL); -+ if (!in) { -+ ret = -ENOMEM; -+ goto exit; -+ } ++ for (i = 0; i < tunables->ntarget_loads - 1 && ++ freq >= tunables->target_loads[i + 1]; i += 2) ++ ; + -+ if (unlikely(copy_from_user(in, u64_to_user_ptr(rop->in), rop->in_len))) { -+ ret = -EFAULT; -+ goto exit; -+ } ++ ret = tunables->target_loads[i]; ++ spin_unlock_irqrestore(&tunables->target_loads_lock, flags); ++ return ret; ++} + -+ if (is_priv_key) -+ ret = crypto_akcipher_set_priv_key(tfm, key, rop->key_len); -+ else -+ ret = crypto_akcipher_set_pub_key(tfm, key, rop->key_len); -+ if (ret) { -+ derr(1, "crypto_akcipher_set_%s_key error[%d]", -+ is_priv_key ? "priv" : "pub", ret); -+ ret = -ENOKEY; -+ goto exit; -+ } ++/* ++ * If increasing frequencies never map to a lower target load then ++ * choose_freq() will find the minimum frequency that does not exceed its ++ * target load given the current load. ++ */ ++static unsigned int choose_freq(struct interactive_cpu *icpu, ++ unsigned int loadadjfreq) ++{ ++ struct cpufreq_policy *policy = icpu->ipolicy->policy; ++ struct cpufreq_frequency_table *freq_table = policy->freq_table; ++ unsigned int prevfreq, freqmin = 0, freqmax = UINT_MAX, tl; ++ unsigned int freq = policy->cur; ++ int index; + -+ out_len_max = crypto_akcipher_maxsize(tfm); -+ out = kzalloc(out_len_max, GFP_KERNEL); -+ if (!out) { -+ ret = -ENOMEM; -+ goto exit; -+ } ++ do { ++ prevfreq = freq; ++ tl = freq_to_targetload(icpu->ipolicy->tunables, freq); + -+ sg_init_one(&src, in, rop->in_len); -+ sg_init_one(&dst, out, out_len_max); ++ /* ++ * Find the lowest frequency where the computed load is less ++ * than or equal to the target load. ++ */ + -+ crypto_init_wait(&wait); -+ akcipher_request_set_crypt(req, &src, &dst, rop->in_len, out_len_max); ++ index = cpufreq_frequency_table_target(policy, loadadjfreq / tl, ++ CPUFREQ_RELATION_L); + -+ switch (rop->op) { -+ case AOP_ENCRYPT: -+ ret = crypto_wait_req(crypto_akcipher_encrypt(req), &wait); -+ break; -+ case AOP_DECRYPT: -+ ret = crypto_wait_req(crypto_akcipher_decrypt(req), &wait); -+ break; -+ default: -+ derr(1, "unknown ops %x", rop->op); -+ ret = -EINVAL; -+ break; -+ } ++ freq = freq_table[index].frequency; + -+ if (ret) { -+ derr(1, "alg: akcipher: failed %d\n", ret); -+ goto exit; -+ } ++ if (freq > prevfreq) { ++ /* The previous frequency is too low */ ++ freqmin = prevfreq; + -+ if (unlikely(copy_to_user(u64_to_user_ptr(rop->out), out, req->dst_len))) { -+ derr(1, "Cannot copy to userspace"); -+ ret = -EFAULT; -+ goto exit; -+ } ++ if (freq < freqmax) ++ continue; + -+ rop->out_len = req->dst_len; -+exit: -+ kfree(out); -+ kfree(in); -+ kfree(key); -+ akcipher_request_free(req); -+ crypto_free_akcipher(tfm); ++ /* Find highest frequency that is less than freqmax */ ++ index = cpufreq_frequency_table_target(policy, ++ freqmax - 1, CPUFREQ_RELATION_H); + -+ return ret; -+} ++ freq = freq_table[index].frequency; + -+/* Typical AEAD (i.e. GCM) encryption/decryption. -+ * During decryption the tag is verified. -+ */ -+static int rk_auth_fd_n_crypt(struct csession *ses_ptr, struct kernel_crypt_auth_fd_op *kcaop, -+ struct scatterlist *auth_sg, uint32_t auth_len, -+ struct scatterlist *src_sg, -+ struct scatterlist *dst_sg, uint32_t len) -+{ -+ int ret; -+ struct crypt_auth_fd_op *caop = &kcaop->caop; -+ int max_tag_len; ++ if (freq == freqmin) { ++ /* ++ * The first frequency below freqmax has already ++ * been found to be too low. freqmax is the ++ * lowest speed we found that is fast enough. ++ */ ++ freq = freqmax; ++ break; ++ } ++ } else if (freq < prevfreq) { ++ /* The previous frequency is high enough. */ ++ freqmax = prevfreq; + -+ max_tag_len = cryptodev_cipher_get_tag_size(&ses_ptr->cdata); -+ if (unlikely(caop->tag_len > max_tag_len)) { -+ derr(0, "Illegal tag length: %d", caop->tag_len); -+ return -EINVAL; -+ } ++ if (freq > freqmin) ++ continue; + -+ if (caop->tag_len) -+ cryptodev_cipher_set_tag_size(&ses_ptr->cdata, caop->tag_len); -+ else -+ caop->tag_len = max_tag_len; ++ /* Find lowest frequency that is higher than freqmin */ ++ index = cpufreq_frequency_table_target(policy, ++ freqmin + 1, CPUFREQ_RELATION_L); + -+ cryptodev_cipher_auth(&ses_ptr->cdata, auth_sg, auth_len); ++ freq = freq_table[index].frequency; + -+ if (caop->op == COP_ENCRYPT) { -+ ret = cryptodev_cipher_encrypt(&ses_ptr->cdata, -+ src_sg, dst_sg, len); -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_cipher_encrypt: %d", ret); -+ return ret; ++ /* ++ * If freqmax is the first frequency above ++ * freqmin then we have already found that ++ * this speed is fast enough. ++ */ ++ if (freq == freqmax) ++ break; + } -+ } else { -+ ret = cryptodev_cipher_decrypt(&ses_ptr->cdata, -+ src_sg, dst_sg, len); + -+ if (unlikely(ret)) { -+ derr(0, "cryptodev_cipher_decrypt: %d", ret); -+ return ret; -+ } -+ } ++ /* If same frequency chosen as previous then done. */ ++ } while (freq != prevfreq); + -+ return 0; ++ return freq; +} + -+static void sg_init_table_set_page(struct scatterlist *sgl_dst, unsigned int nents_dst, -+ struct scatterlist *sgl_src, unsigned int len) ++static u64 update_load(struct interactive_cpu *icpu, int cpu) +{ -+ sg_init_table(sgl_dst, nents_dst); -+ sg_set_page(sgl_dst, sg_page(sgl_src), len, sgl_src->offset); ++ struct interactive_tunables *tunables = icpu->ipolicy->tunables; ++ u64 now_idle, now, active_time, delta_idle, delta_time; + -+ sg_dma_address(sgl_dst) = sg_dma_address(sgl_src); -+ sg_dma_len(sgl_dst) = len; -+} ++ now_idle = get_cpu_idle_time(cpu, &now, tunables->io_is_busy); ++ delta_idle = (now_idle - icpu->time_in_idle); ++ delta_time = (now - icpu->time_in_idle_timestamp); + -+/* This is the main crypto function - zero-copy edition */ -+static int crypto_auth_fd_zc_rk(struct fcrypt *fcr, struct csession *ses_ptr, -+ struct kernel_crypt_auth_fd_op *kcaop) -+{ -+ struct crypt_auth_fd_op *caop = &kcaop->caop; -+ struct dma_buf *dma_buf_in = NULL, *dma_buf_out = NULL, *dma_buf_auth = NULL; -+ struct sg_table *sg_tbl_in = NULL, *sg_tbl_out = NULL, *sg_tbl_auth = NULL; -+ struct dma_buf_attachment *dma_attach_in = NULL, *dma_attach_out = NULL; -+ struct dma_buf_attachment *dma_attach_auth = NULL; -+ struct dma_fd_map_node *node_src = NULL, *node_dst = NULL, *node_auth = NULL; -+ struct scatterlist *dst_sg, *src_sg; -+ struct scatterlist auth_src[2], auth_dst[2], src[2], dst[2], tag[2]; -+ unsigned char *tag_buf = NULL; -+ int ret = 0; ++ if (delta_time <= delta_idle) ++ active_time = 0; ++ else ++ active_time = delta_time - delta_idle; + -+ node_src = dma_fd_find_node(fcr, caop->src_fd); -+ if (node_src) { -+ sg_tbl_in = node_src->sgtbl; -+ } else { -+ ret = get_dmafd_sgtbl(caop->src_fd, caop->len, DMA_TO_DEVICE, -+ &sg_tbl_in, &dma_attach_in, &dma_buf_in); -+ if (unlikely(ret)) { -+ derr(1, "Error get_dmafd_sgtbl src."); -+ goto exit; -+ } -+ } ++ icpu->cputime_speedadj += active_time * icpu->ipolicy->policy->cur; + -+ node_dst = dma_fd_find_node(fcr, caop->dst_fd); -+ if (node_dst) { -+ sg_tbl_out = node_dst->sgtbl; -+ } else { -+ ret = get_dmafd_sgtbl(caop->dst_fd, caop->len, DMA_FROM_DEVICE, -+ &sg_tbl_out, &dma_attach_out, &dma_buf_out); -+ if (unlikely(ret)) { -+ derr(1, "Error get_dmafd_sgtbl dst."); -+ goto exit; -+ } -+ } ++ icpu->time_in_idle = now_idle; ++ icpu->time_in_idle_timestamp = now; + -+ src_sg = sg_tbl_in->sgl; -+ dst_sg = sg_tbl_out->sgl; ++ return now; ++} + -+ if (caop->auth_len > 0) { -+ node_auth = dma_fd_find_node(fcr, caop->auth_fd); -+ if (node_auth) { -+ sg_tbl_auth = node_auth->sgtbl; -+ } else { -+ ret = get_dmafd_sgtbl(caop->auth_fd, caop->auth_len, DMA_TO_DEVICE, -+ &sg_tbl_auth, &dma_attach_auth, &dma_buf_auth); -+ if (unlikely(ret)) { -+ derr(1, "Error get_dmafd_sgtbl auth."); -+ goto exit; -+ } -+ } ++/* Re-evaluate load to see if a frequency change is required or not */ ++static void eval_target_freq(struct interactive_cpu *icpu) ++{ ++ struct interactive_tunables *tunables = icpu->ipolicy->tunables; ++ struct cpufreq_policy *policy = icpu->ipolicy->policy; ++ struct cpufreq_frequency_table *freq_table = policy->freq_table; ++ u64 cputime_speedadj, now, max_fvtime; ++ unsigned int new_freq, loadadjfreq, index, delta_time; ++ unsigned long flags; ++ int cpu_load; ++ int cpu = smp_processor_id(); + -+ sg_init_table_set_page(auth_src, ARRAY_SIZE(auth_src), -+ sg_tbl_auth->sgl, caop->auth_len); ++ spin_lock_irqsave(&icpu->load_lock, flags); ++ now = update_load(icpu, smp_processor_id()); ++ delta_time = (unsigned int)(now - icpu->cputime_speedadj_timestamp); ++ cputime_speedadj = icpu->cputime_speedadj; ++ spin_unlock_irqrestore(&icpu->load_lock, flags); + -+ sg_init_table_set_page(auth_dst, ARRAY_SIZE(auth_dst), -+ sg_tbl_auth->sgl, caop->auth_len); ++ if (!delta_time) ++ return; + -+ sg_init_table_set_page(src, ARRAY_SIZE(src), -+ sg_tbl_in->sgl, caop->len); ++ spin_lock_irqsave(&icpu->target_freq_lock, flags); ++ do_div(cputime_speedadj, delta_time); ++ loadadjfreq = (unsigned int)cputime_speedadj * 100; ++ cpu_load = loadadjfreq / policy->cur; ++ tunables->boosted = tunables->boost || ++ now < tunables->boostpulse_endtime; + -+ sg_init_table_set_page(dst, ARRAY_SIZE(dst), -+ sg_tbl_out->sgl, caop->len); ++ if (cpu_load >= tunables->go_hispeed_load || tunables->boosted) { ++ if (policy->cur < tunables->hispeed_freq) { ++ new_freq = tunables->hispeed_freq; ++ } else { ++ new_freq = choose_freq(icpu, loadadjfreq); + -+ sg_chain(auth_src, 2, src); -+ sg_chain(auth_dst, 2, dst); -+ src_sg = auth_src; -+ dst_sg = auth_dst; ++ if (new_freq < tunables->hispeed_freq) ++ new_freq = tunables->hispeed_freq; ++ } ++ } else { ++ new_freq = choose_freq(icpu, loadadjfreq); ++ if (new_freq > tunables->hispeed_freq && ++ policy->cur < tunables->hispeed_freq) ++ new_freq = tunables->hispeed_freq; + } + -+ /* get tag */ -+ if (caop->tag && caop->tag_len > 0) { -+ tag_buf = kcalloc(caop->tag_len, sizeof(*tag_buf), GFP_KERNEL); -+ if (unlikely(!tag_buf)) { -+ derr(1, "unable to kcalloc %d.", caop->tag_len); -+ ret = -EFAULT; -+ goto exit; -+ } ++#ifdef CONFIG_ARCH_ROCKCHIP ++ if (now < tunables->touchboostpulse_endtime && ++ new_freq < tunables->touchboost_freq) { ++ new_freq = tunables->touchboost_freq; ++ } ++#endif ++ if (policy->cur >= tunables->hispeed_freq && ++ new_freq > policy->cur && ++ now - icpu->pol_hispeed_val_time < freq_to_above_hispeed_delay(tunables, policy->cur)) { ++ trace_cpufreq_interactive_notyet(cpu, cpu_load, ++ icpu->target_freq, policy->cur, new_freq); ++ goto exit; ++ } + -+ ret = copy_from_user(tag_buf, u64_to_user_ptr((u64)caop->tag), caop->tag_len); -+ if (unlikely(ret)) { -+ derr(1, "unable to copy tag data from userspace."); -+ ret = -EFAULT; ++ icpu->loc_hispeed_val_time = now; ++ ++ index = cpufreq_frequency_table_target(policy, new_freq, ++ CPUFREQ_RELATION_L); ++ new_freq = freq_table[index].frequency; ++ ++ /* ++ * Do not scale below floor_freq unless we have been at or above the ++ * floor frequency for the minimum sample time since last validated. ++ */ ++ max_fvtime = max(icpu->pol_floor_val_time, icpu->loc_floor_val_time); ++ if (new_freq < icpu->floor_freq && icpu->target_freq >= policy->cur) { ++ if (now - max_fvtime < tunables->min_sample_time) { ++ trace_cpufreq_interactive_notyet(cpu, cpu_load, ++ icpu->target_freq, policy->cur, new_freq); + goto exit; + } ++ } + -+ sg_init_table(tag, 2); -+ sg_set_buf(tag, tag_buf, caop->tag_len); ++ /* ++ * Update the timestamp for checking whether speed has been held at ++ * or above the selected frequency for a minimum of min_sample_time, ++ * if not boosted to hispeed_freq. If boosted to hispeed_freq then we ++ * allow the speed to drop as soon as the boostpulse duration expires ++ * (or the indefinite boost is turned off). ++ */ + -+ if (caop->op == COP_ENCRYPT) -+ sg_chain(dst, 2, tag); -+ else -+ sg_chain(src, 2, tag); ++ if (!tunables->boosted || new_freq > tunables->hispeed_freq) { ++ icpu->floor_freq = new_freq; ++ if (icpu->target_freq >= policy->cur || new_freq >= policy->cur) ++ icpu->loc_floor_val_time = now; + } + -+ if (caop->op == COP_ENCRYPT) -+ ret = rk_auth_fd_n_crypt(ses_ptr, kcaop, NULL, caop->auth_len, -+ src_sg, dst_sg, caop->len); -+ else -+ ret = rk_auth_fd_n_crypt(ses_ptr, kcaop, NULL, caop->auth_len, -+ src_sg, dst_sg, caop->len + caop->tag_len); -+ -+ if (!ret && caop->op == COP_ENCRYPT && tag_buf) { -+ ret = copy_to_user(u64_to_user_ptr((u64)kcaop->caop.tag), tag_buf, caop->tag_len); -+ if (unlikely(ret)) { -+ derr(1, "Error in copying to userspace"); -+ ret = -EFAULT; -+ goto exit; -+ } ++ if (icpu->target_freq == new_freq && ++ icpu->target_freq <= policy->cur) { ++ trace_cpufreq_interactive_already(cpu, cpu_load, ++ icpu->target_freq, policy->cur, new_freq); ++ goto exit; + } + -+exit: -+ kfree(tag_buf); ++ trace_cpufreq_interactive_target(cpu, cpu_load, icpu->target_freq, ++ policy->cur, new_freq); + -+ if (dma_buf_in) -+ put_dmafd_sgtbl(caop->src_fd, DMA_TO_DEVICE, -+ sg_tbl_in, dma_attach_in, dma_buf_in); ++ icpu->target_freq = new_freq; ++ spin_unlock_irqrestore(&icpu->target_freq_lock, flags); + -+ if (dma_buf_out) -+ put_dmafd_sgtbl(caop->dst_fd, DMA_FROM_DEVICE, -+ sg_tbl_out, dma_attach_out, dma_buf_out); ++ spin_lock_irqsave(&speedchange_cpumask_lock, flags); ++ cpumask_set_cpu(cpu, &speedchange_cpumask); ++ spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); + -+ if (dma_buf_auth) -+ put_dmafd_sgtbl(caop->auth_fd, DMA_TO_DEVICE, -+ sg_tbl_auth, dma_attach_auth, dma_buf_auth); ++ wake_up_process(speedchange_task); ++ return; + -+ return ret; ++exit: ++ spin_unlock_irqrestore(&icpu->target_freq_lock, flags); +} + -+static int __crypto_auth_fd_run_zc(struct fcrypt *fcr, struct csession *ses_ptr, -+ struct kernel_crypt_auth_fd_op *kcaop) ++static void cpufreq_interactive_update(struct interactive_cpu *icpu) +{ -+ struct crypt_auth_fd_op *caop = &kcaop->caop; -+ int ret; ++ eval_target_freq(icpu); ++ slack_timer_resched(icpu, smp_processor_id(), true); ++} + -+ if (caop->flags & COP_FLAG_AEAD_RK_TYPE) -+ ret = crypto_auth_fd_zc_rk(fcr, ses_ptr, kcaop); -+ else -+ ret = -EINVAL; /* other types, not implemented */ ++static void cpufreq_interactive_idle_end(void) ++{ ++ struct interactive_cpu *icpu = &per_cpu(interactive_cpu, ++ smp_processor_id()); ++ unsigned long sampling_rate; + -+ return ret; ++ if (!down_read_trylock(&icpu->enable_sem)) ++ return; ++ ++ if (icpu->ipolicy) { ++ /* ++ * We haven't sampled load for more than sampling_rate time, do ++ * it right now. ++ */ ++ if (time_after_eq(jiffies, icpu->next_sample_jiffies)) { ++ sampling_rate = icpu->ipolicy->tunables->sampling_rate; ++ icpu->last_sample_time = local_clock(); ++ icpu->next_sample_jiffies = usecs_to_jiffies(sampling_rate) + jiffies; ++ cpufreq_interactive_update(icpu); ++ } ++ } ++ ++ up_read(&icpu->enable_sem); +} + -+static int crypto_auth_fd_run(struct fcrypt *fcr, struct kernel_crypt_auth_fd_op *kcaop) ++static void cpufreq_interactive_get_policy_info(struct cpufreq_policy *policy, ++ unsigned int *pmax_freq, ++ u64 *phvt, u64 *pfvt) +{ -+ struct csession *ses_ptr; -+ struct crypt_auth_fd_op *caop = &kcaop->caop; -+ int ret = -EINVAL; ++ struct interactive_cpu *icpu; ++ u64 hvt = ~0ULL, fvt = 0; ++ unsigned int max_freq = 0, i; + -+ if (unlikely(caop->op != COP_ENCRYPT && caop->op != COP_DECRYPT)) { -+ ddebug(1, "invalid operation op=%u", caop->op); -+ return -EINVAL; -+ } ++ for_each_cpu(i, policy->cpus) { ++ icpu = &per_cpu(interactive_cpu, i); + -+ /* this also enters ses_ptr->sem */ -+ ses_ptr = crypto_get_session_by_sid(fcr, caop->ses); -+ if (unlikely(!ses_ptr)) { -+ derr(1, "invalid session ID=0x%08X", caop->ses); -+ return -EINVAL; ++ fvt = max(fvt, icpu->loc_floor_val_time); ++ if (icpu->target_freq > max_freq) { ++ max_freq = icpu->target_freq; ++ hvt = icpu->loc_hispeed_val_time; ++ } else if (icpu->target_freq == max_freq) { ++ hvt = min(hvt, icpu->loc_hispeed_val_time); ++ } + } + -+ if (unlikely(ses_ptr->cdata.init == 0)) { -+ derr(1, "cipher context not initialized"); -+ ret = -EINVAL; -+ goto out_unlock; ++ *pmax_freq = max_freq; ++ *phvt = hvt; ++ *pfvt = fvt; ++} ++ ++static void cpufreq_interactive_adjust_cpu(unsigned int cpu, ++ struct cpufreq_policy *policy) ++{ ++ struct interactive_cpu *icpu; ++ u64 hvt, fvt; ++ unsigned int max_freq; ++ int i; ++ ++ cpufreq_interactive_get_policy_info(policy, &max_freq, &hvt, &fvt); ++ ++ for_each_cpu(i, policy->cpus) { ++ icpu = &per_cpu(interactive_cpu, i); ++ icpu->pol_floor_val_time = fvt; + } + -+ /* If we have a hash/mac handle reset its state */ -+ if (ses_ptr->hdata.init != 0) { -+ ret = cryptodev_hash_reset(&ses_ptr->hdata); -+ if (unlikely(ret)) { -+ derr(1, "error in cryptodev_hash_reset()"); -+ goto out_unlock; ++ if (max_freq != policy->cur) { ++ __cpufreq_driver_target(policy, max_freq, CPUFREQ_RELATION_H); ++ for_each_cpu(i, policy->cpus) { ++ icpu = &per_cpu(interactive_cpu, i); ++ icpu->pol_hispeed_val_time = hvt; + } + } + -+ cryptodev_cipher_set_iv(&ses_ptr->cdata, kcaop->iv, -+ min(ses_ptr->cdata.ivsize, kcaop->ivlen)); ++ trace_cpufreq_interactive_setspeed(cpu, max_freq, policy->cur); ++} + -+ ret = __crypto_auth_fd_run_zc(fcr, ses_ptr, kcaop); -+ if (unlikely(ret)) { -+ derr(1, "error in __crypto_auth_fd_run_zc()"); -+ goto out_unlock; -+ } ++static int cpufreq_interactive_speedchange_task(void *data) ++{ ++ unsigned int cpu; ++ cpumask_t tmp_mask; ++ unsigned long flags; + -+ ret = 0; ++again: ++ set_current_state(TASK_INTERRUPTIBLE); ++ spin_lock_irqsave(&speedchange_cpumask_lock, flags); + -+ cryptodev_cipher_get_iv(&ses_ptr->cdata, kcaop->iv, -+ min(ses_ptr->cdata.ivsize, kcaop->ivlen)); ++ if (cpumask_empty(&speedchange_cpumask)) { ++ spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); ++ schedule(); + -+out_unlock: -+ crypto_put_session(ses_ptr); -+ return ret; -+} ++ if (kthread_should_stop()) ++ return 0; + -+/* -+ * Return tag (digest) length for authenticated encryption -+ * If the cipher and digest are separate, hdata.init is set - just return -+ * digest length. Otherwise return digest length for aead ciphers -+ */ -+static int rk_cryptodev_get_tag_len(struct csession *ses_ptr) -+{ -+ if (ses_ptr->hdata.init) -+ return ses_ptr->hdata.digestsize; -+ else -+ return cryptodev_cipher_get_tag_size(&ses_ptr->cdata); -+} ++ spin_lock_irqsave(&speedchange_cpumask_lock, flags); ++ } + -+/* -+ * Calculate destination buffer length for authenticated encryption. The -+ * expectation is that user-space code allocates exactly the same space for -+ * destination buffer before calling cryptodev. The result is cipher-dependent. -+ */ -+static int rk_cryptodev_fd_get_dst_len(struct crypt_auth_fd_op *caop, struct csession *ses_ptr) -+{ -+ int dst_len = caop->len; ++ set_current_state(TASK_RUNNING); ++ tmp_mask = speedchange_cpumask; ++ cpumask_clear(&speedchange_cpumask); ++ spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); + -+ if (caop->op == COP_DECRYPT) -+ return dst_len; ++ for_each_cpu(cpu, &tmp_mask) { ++ struct interactive_cpu *icpu = &per_cpu(interactive_cpu, cpu); ++ struct cpufreq_policy *policy; + -+ dst_len += caop->tag_len; ++ policy = cpufreq_cpu_get(cpu); ++ if (!policy) ++ continue; + -+ /* for TLS always add some padding so the total length is rounded to -+ * cipher block size -+ */ -+ if (caop->flags & COP_FLAG_AEAD_TLS_TYPE) { -+ int bs = ses_ptr->cdata.blocksize; ++ down_write(&policy->rwsem); + -+ dst_len += bs - (dst_len % bs); ++ if (likely(down_read_trylock(&icpu->enable_sem))) { ++ if (likely(icpu->ipolicy)) ++ cpufreq_interactive_adjust_cpu(cpu, policy); ++ up_read(&icpu->enable_sem); ++ } ++ ++ up_write(&policy->rwsem); ++ cpufreq_cpu_put(policy); + } + -+ return dst_len; ++ goto again; +} + -+static int fill_kcaop_fd_from_caop(struct kernel_crypt_auth_fd_op *kcaop, struct fcrypt *fcr) ++static void cpufreq_interactive_boost(struct interactive_tunables *tunables) +{ -+ struct crypt_auth_fd_op *caop = &kcaop->caop; -+ struct csession *ses_ptr; -+ int ret; ++ struct interactive_policy *ipolicy; ++ struct cpufreq_policy *policy; ++ struct interactive_cpu *icpu; ++ unsigned long flags[2]; ++ bool wakeup = false; ++ int i; + -+ /* this also enters ses_ptr->sem */ -+ ses_ptr = crypto_get_session_by_sid(fcr, caop->ses); -+ if (unlikely(!ses_ptr)) { -+ derr(1, "invalid session ID=0x%08X", caop->ses); -+ return -EINVAL; -+ } ++ tunables->boosted = true; + -+ if (caop->tag_len == 0) -+ caop->tag_len = rk_cryptodev_get_tag_len(ses_ptr); ++ spin_lock_irqsave(&speedchange_cpumask_lock, flags[0]); + -+ kcaop->ivlen = caop->iv ? ses_ptr->cdata.ivsize : 0; -+ kcaop->dst_len = rk_cryptodev_fd_get_dst_len(caop, ses_ptr); -+ kcaop->task = current; -+ kcaop->mm = current->mm; ++ for_each_ipolicy(ipolicy) { ++ policy = ipolicy->policy; + -+ if (caop->iv) { -+ ret = copy_from_user(kcaop->iv, u64_to_user_ptr((u64)caop->iv), kcaop->ivlen); -+ if (unlikely(ret)) { -+ derr(1, "error copy_from_user IV (%d bytes) returned %d for address %llu", -+ kcaop->ivlen, ret, caop->iv); -+ ret = -EFAULT; -+ goto out_unlock; ++ for_each_cpu(i, policy->cpus) { ++ icpu = &per_cpu(interactive_cpu, i); ++ ++ if (!down_read_trylock(&icpu->enable_sem)) ++ continue; ++ ++ if (!icpu->ipolicy) { ++ up_read(&icpu->enable_sem); ++ continue; ++ } ++ ++ spin_lock_irqsave(&icpu->target_freq_lock, flags[1]); ++ if (icpu->target_freq < tunables->hispeed_freq) { ++ icpu->target_freq = tunables->hispeed_freq; ++ cpumask_set_cpu(i, &speedchange_cpumask); ++ icpu->pol_hispeed_val_time = ktime_to_us(ktime_get()); ++ wakeup = true; ++ } ++ spin_unlock_irqrestore(&icpu->target_freq_lock, flags[1]); ++ ++ up_read(&icpu->enable_sem); + } + } + -+ ret = 0; ++ spin_unlock_irqrestore(&speedchange_cpumask_lock, flags[0]); + -+out_unlock: -+ crypto_put_session(ses_ptr); -+ return ret; ++ if (wakeup) ++ wake_up_process(speedchange_task); +} + -+static int fill_caop_fd_from_kcaop(struct kernel_crypt_auth_fd_op *kcaop, struct fcrypt *fcr) ++static int cpufreq_interactive_notifier(struct notifier_block *nb, ++ unsigned long val, void *data) +{ -+ int ret; ++ struct cpufreq_freqs *freq = data; ++ struct cpufreq_policy *policy = freq->policy; ++ struct interactive_cpu *icpu; ++ unsigned long flags; ++ int cpu; + -+ kcaop->caop.len = kcaop->dst_len; ++ if (val != CPUFREQ_POSTCHANGE) ++ return 0; + -+ if (kcaop->ivlen && kcaop->caop.flags & COP_FLAG_WRITE_IV) { -+ ret = copy_to_user(u64_to_user_ptr((u64)kcaop->caop.iv), kcaop->iv, kcaop->ivlen); -+ if (unlikely(ret)) { -+ derr(1, "Error in copying iv to userspace"); -+ return -EFAULT; ++ for_each_cpu(cpu, policy->cpus) { ++ icpu = &per_cpu(interactive_cpu, cpu); ++ ++ if (!down_read_trylock(&icpu->enable_sem)) ++ continue; ++ ++ if (!icpu->ipolicy) { ++ up_read(&icpu->enable_sem); ++ continue; + } ++ ++ spin_lock_irqsave(&icpu->load_lock, flags); ++ update_load(icpu, cpu); ++ spin_unlock_irqrestore(&icpu->load_lock, flags); ++ ++ up_read(&icpu->enable_sem); + } + + return 0; +} + -+static int kcaop_fd_from_user(struct kernel_crypt_auth_fd_op *kcaop, -+ struct fcrypt *fcr, void __user *arg) ++static struct notifier_block cpufreq_notifier_block = { ++ .notifier_call = cpufreq_interactive_notifier, ++}; ++ ++static unsigned int *get_tokenized_data(const char *buf, int *num_tokens) +{ -+ if (unlikely(copy_from_user(&kcaop->caop, arg, sizeof(kcaop->caop)))) { -+ derr(1, "Error in copying from userspace"); -+ return -EFAULT; -+ } ++ const char *cp = buf; ++ int ntokens = 1, i = 0; ++ unsigned int *tokenized_data; ++ int err = -EINVAL; + -+ return fill_kcaop_fd_from_caop(kcaop, fcr); -+} ++ while ((cp = strpbrk(cp + 1, " :"))) ++ ntokens++; + -+static int kcaop_fd_to_user(struct kernel_crypt_auth_fd_op *kcaop, -+ struct fcrypt *fcr, void __user *arg) -+{ -+ int ret; ++ if (!(ntokens & 0x1)) ++ goto err; + -+ ret = fill_caop_fd_from_kcaop(kcaop, fcr); -+ if (unlikely(ret)) { -+ derr(1, "Error in fill_caop_from_kcaop"); -+ return ret; ++ tokenized_data = kcalloc(ntokens, sizeof(*tokenized_data), GFP_KERNEL); ++ if (!tokenized_data) { ++ err = -ENOMEM; ++ goto err; + } + -+ if (unlikely(copy_to_user(arg, &kcaop->caop, sizeof(kcaop->caop)))) { -+ derr(1, "Cannot copy to userspace"); -+ return -EFAULT; ++ cp = buf; ++ while (i < ntokens) { ++ if (sscanf(cp, "%u", &tokenized_data[i++]) != 1) ++ goto err_kfree; ++ ++ cp = strpbrk(cp, " :"); ++ if (!cp) ++ break; ++ cp++; + } + -+ return 0; ++ if (i != ntokens) ++ goto err_kfree; ++ ++ *num_tokens = ntokens; ++ return tokenized_data; ++ ++err_kfree: ++ kfree(tokenized_data); ++err: ++ return ERR_PTR(err); +} + -+long -+rk_cryptodev_ioctl(struct fcrypt *fcr, unsigned int cmd, unsigned long arg_) ++/* Interactive governor sysfs interface */ ++static struct interactive_tunables *to_tunables(struct gov_attr_set *attr_set) +{ -+ struct kernel_crypt_fd_op kcop; -+ struct kernel_crypt_fd_map_op kmop; -+ struct kernel_crypt_rsa_op krop; -+ struct kernel_crypt_auth_fd_op kcaop; -+ void __user *arg = (void __user *)arg_; -+ int ret; ++ return container_of(attr_set, struct interactive_tunables, attr_set); ++} + -+ switch (cmd) { -+ case RIOCCRYPT_FD: -+ ret = kcop_fd_from_user(&kcop, fcr, arg); -+ if (unlikely(ret)) { -+ dwarning(1, "Error copying from user"); -+ return ret; -+ } ++#define show_one(file_name, type) \ ++static ssize_t show_##file_name(struct gov_attr_set *attr_set, char *buf) \ ++{ \ ++ struct interactive_tunables *tunables = to_tunables(attr_set); \ ++ return sprintf(buf, type "\n", tunables->file_name); \ ++} + -+ ret = crypto_fd_run(fcr, &kcop); -+ if (unlikely(ret)) { -+ dwarning(1, "Error in crypto_run"); -+ return ret; -+ } ++static ssize_t show_target_loads(struct gov_attr_set *attr_set, char *buf) ++{ ++ struct interactive_tunables *tunables = to_tunables(attr_set); ++ unsigned long flags; ++ ssize_t ret = 0; ++ int i; + -+ return kcop_fd_to_user(&kcop, fcr, arg); -+ case RIOCAUTHCRYPT_FD: -+ ret = kcaop_fd_from_user(&kcaop, fcr, arg); -+ if (unlikely(ret)) { -+ dwarning(1, "Error copying from user"); -+ return ret; -+ } ++ spin_lock_irqsave(&tunables->target_loads_lock, flags); + -+ ret = crypto_auth_fd_run(fcr, &kcaop); -+ if (unlikely(ret)) { -+ dwarning(1, "Error in crypto_run"); -+ return ret; -+ } ++ for (i = 0; i < tunables->ntarget_loads; i++) ++ ret += sprintf(buf + ret, "%u%s", tunables->target_loads[i], ++ i & 0x1 ? ":" : " "); + -+ return kcaop_fd_to_user(&kcaop, fcr, arg); -+ case RIOCCRYPT_FD_MAP: -+ ret = kcop_map_fd_from_user(&kmop, fcr, arg); -+ if (unlikely(ret)) { -+ dwarning(1, "Error copying from user"); -+ return ret; -+ } ++ sprintf(buf + ret - 1, "\n"); ++ spin_unlock_irqrestore(&tunables->target_loads_lock, flags); + -+ ret = dma_fd_map_for_user(fcr, &kmop); -+ if (unlikely(ret)) { -+ dwarning(1, "Error in dma_fd_map_for_user"); -+ return ret; -+ } ++ return ret; ++} + -+ return kcop_map_fd_to_user(&kmop, fcr, arg); -+ case RIOCCRYPT_FD_UNMAP: -+ ret = kcop_map_fd_from_user(&kmop, fcr, arg); -+ if (unlikely(ret)) { -+ dwarning(1, "Error copying from user"); -+ return ret; -+ } ++static ssize_t store_target_loads(struct gov_attr_set *attr_set, ++ const char *buf, size_t count) ++{ ++ struct interactive_tunables *tunables = to_tunables(attr_set); ++ unsigned int *new_target_loads; ++ unsigned long flags; ++ int ntokens; + -+ ret = dma_fd_unmap_for_user(fcr, &kmop); -+ if (unlikely(ret)) -+ dwarning(1, "Error in dma_fd_unmap_for_user"); ++ new_target_loads = get_tokenized_data(buf, &ntokens); ++ if (IS_ERR(new_target_loads)) ++ return PTR_ERR(new_target_loads); + -+ return ret; -+ case RIOCCRYPT_CPU_ACCESS: -+ ret = kcop_map_fd_from_user(&kmop, fcr, arg); -+ if (unlikely(ret)) { -+ dwarning(1, "Error copying from user"); -+ return ret; -+ } ++ spin_lock_irqsave(&tunables->target_loads_lock, flags); ++ if (tunables->target_loads != default_target_loads) ++ kfree(tunables->target_loads); ++ tunables->target_loads = new_target_loads; ++ tunables->ntarget_loads = ntokens; ++ spin_unlock_irqrestore(&tunables->target_loads_lock, flags); + -+ ret = dma_fd_begin_cpu_access(fcr, &kmop); -+ if (unlikely(ret)) -+ dwarning(1, "Error in dma_fd_begin_cpu_access"); ++ return count; ++} + -+ return ret; -+ case RIOCCRYPT_DEV_ACCESS: -+ ret = kcop_map_fd_from_user(&kmop, fcr, arg); -+ if (unlikely(ret)) { -+ dwarning(1, "Error copying from user"); -+ return ret; -+ } ++static ssize_t show_above_hispeed_delay(struct gov_attr_set *attr_set, ++ char *buf) ++{ ++ struct interactive_tunables *tunables = to_tunables(attr_set); ++ unsigned long flags; ++ ssize_t ret = 0; ++ int i; + -+ ret = dma_fd_end_cpu_access(fcr, &kmop); -+ if (unlikely(ret)) -+ dwarning(1, "Error in dma_fd_end_cpu_access"); ++ spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); + -+ return ret; -+ case RIOCCRYPT_RSA_CRYPT: -+ ret = kcop_rsa_from_user(&krop, fcr, arg); -+ if (unlikely(ret)) { -+ dwarning(1, "Error copying from user"); -+ return ret; -+ } ++ for (i = 0; i < tunables->nabove_hispeed_delay; i++) ++ ret += sprintf(buf + ret, "%u%s", ++ tunables->above_hispeed_delay[i], ++ i & 0x1 ? ":" : " "); + -+ ret = crypto_rsa_run(fcr, &krop); -+ if (unlikely(ret)) { -+ dwarning(1, "Error in rsa_run"); -+ return ret; -+ } ++ sprintf(buf + ret - 1, "\n"); ++ spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); + -+ return kcop_rsa_to_user(&krop, fcr, arg); -+ default: -+ return -EINVAL; -+ } ++ return ret; +} + -+/* compatibility code for 32bit userlands */ -+#ifdef CONFIG_COMPAT -+ -+static inline void -+compat_to_crypt_fd_op(struct compat_crypt_fd_op *compat, struct crypt_fd_op *cop) ++static ssize_t store_above_hispeed_delay(struct gov_attr_set *attr_set, ++ const char *buf, size_t count) +{ -+ cop->ses = compat->ses; -+ cop->op = compat->op; -+ cop->flags = compat->flags; -+ cop->len = compat->len; ++ struct interactive_tunables *tunables = to_tunables(attr_set); ++ unsigned int *new_above_hispeed_delay = NULL; ++ unsigned long flags; ++ int ntokens; + -+ cop->src_fd = compat->src_fd; -+ cop->dst_fd = compat->dst_fd; -+ cop->mac = compat_ptr(compat->mac); -+ cop->iv = compat_ptr(compat->iv); -+} ++ new_above_hispeed_delay = get_tokenized_data(buf, &ntokens); ++ if (IS_ERR(new_above_hispeed_delay)) ++ return PTR_ERR(new_above_hispeed_delay); + -+static inline void -+crypt_fd_op_to_compat(struct crypt_fd_op *cop, struct compat_crypt_fd_op *compat) -+{ -+ compat->ses = cop->ses; -+ compat->op = cop->op; -+ compat->flags = cop->flags; -+ compat->len = cop->len; ++ spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); ++ if (tunables->above_hispeed_delay != default_above_hispeed_delay) ++ kfree(tunables->above_hispeed_delay); ++ tunables->above_hispeed_delay = new_above_hispeed_delay; ++ tunables->nabove_hispeed_delay = ntokens; ++ spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); + -+ compat->src_fd = cop->src_fd; -+ compat->dst_fd = cop->dst_fd; -+ compat->mac = ptr_to_compat(cop->mac); -+ compat->iv = ptr_to_compat(cop->iv); ++ return count; +} + -+static int compat_kcop_fd_from_user(struct kernel_crypt_fd_op *kcop, -+ struct fcrypt *fcr, void __user *arg) ++static ssize_t store_hispeed_freq(struct gov_attr_set *attr_set, ++ const char *buf, size_t count) +{ -+ struct compat_crypt_fd_op compat_cop; ++ struct interactive_tunables *tunables = to_tunables(attr_set); ++ unsigned long int val; ++ int ret; + -+ if (unlikely(copy_from_user(&compat_cop, arg, sizeof(compat_cop)))) -+ return -EFAULT; -+ compat_to_crypt_fd_op(&compat_cop, &kcop->cop); ++ ret = kstrtoul(buf, 0, &val); ++ if (ret < 0) ++ return ret; + -+ return fill_kcop_fd_from_cop(kcop, fcr); ++ tunables->hispeed_freq = val; ++ ++ return count; +} + -+static int compat_kcop_fd_to_user(struct kernel_crypt_fd_op *kcop, -+ struct fcrypt *fcr, void __user *arg) ++static ssize_t store_go_hispeed_load(struct gov_attr_set *attr_set, ++ const char *buf, size_t count) +{ ++ struct interactive_tunables *tunables = to_tunables(attr_set); ++ unsigned long val; + int ret; -+ struct compat_crypt_fd_op compat_cop; + -+ ret = fill_cop_fd_from_kcop(kcop, fcr); -+ if (unlikely(ret)) { -+ dwarning(1, "Error in fill_cop_from_kcop"); ++ ret = kstrtoul(buf, 0, &val); ++ if (ret < 0) + return ret; -+ } -+ crypt_fd_op_to_compat(&kcop->cop, &compat_cop); -+ -+ if (unlikely(copy_to_user(arg, &compat_cop, sizeof(compat_cop)))) { -+ dwarning(1, "Error copying to user"); -+ return -EFAULT; -+ } -+ return 0; -+} + -+static inline void -+compat_to_crypt_fd_map_op(struct compat_crypt_fd_map_op *compat, struct crypt_fd_map_op *mop) -+{ -+ mop->dma_fd = compat->dma_fd; -+ mop->phys_addr = compat->phys_addr; -+} ++ tunables->go_hispeed_load = val; + -+static inline void -+crypt_fd_map_op_to_compat(struct crypt_fd_map_op *mop, struct compat_crypt_fd_map_op *compat) -+{ -+ compat->dma_fd = mop->dma_fd; -+ compat->phys_addr = mop->phys_addr; ++ return count; +} + -+static int compat_kcop_map_fd_from_user(struct kernel_crypt_fd_map_op *kcop, -+ struct fcrypt *fcr, void __user *arg) ++static ssize_t store_min_sample_time(struct gov_attr_set *attr_set, ++ const char *buf, size_t count) +{ -+ struct compat_crypt_fd_map_op compat_mop; ++ struct interactive_tunables *tunables = to_tunables(attr_set); ++ unsigned long val; ++ int ret; + -+ if (unlikely(copy_from_user(&compat_mop, arg, sizeof(compat_mop)))) -+ return -EFAULT; ++ ret = kstrtoul(buf, 0, &val); ++ if (ret < 0) ++ return ret; + -+ compat_to_crypt_fd_map_op(&compat_mop, &kcop->mop); ++ tunables->min_sample_time = val; + -+ return 0; ++ return count; +} + -+static int compat_kcop_map_fd_to_user(struct kernel_crypt_fd_map_op *kcop, -+ struct fcrypt *fcr, void __user *arg) ++static ssize_t show_timer_rate(struct gov_attr_set *attr_set, char *buf) +{ -+ struct compat_crypt_fd_map_op compat_mop; -+ -+ crypt_fd_map_op_to_compat(&kcop->mop, &compat_mop); -+ if (unlikely(copy_to_user(arg, &compat_mop, sizeof(compat_mop)))) { -+ derr(1, "Cannot copy to userspace"); -+ return -EFAULT; -+ } ++ struct interactive_tunables *tunables = to_tunables(attr_set); + -+ return 0; ++ return sprintf(buf, "%lu\n", tunables->sampling_rate); +} + -+long -+rk_compat_cryptodev_ioctl(struct fcrypt *fcr, unsigned int cmd, unsigned long arg_) ++static ssize_t store_timer_rate(struct gov_attr_set *attr_set, const char *buf, ++ size_t count) +{ -+ struct kernel_crypt_fd_op kcop; -+ struct kernel_crypt_fd_map_op kmop; -+ void __user *arg = (void __user *)arg_; ++ struct interactive_tunables *tunables = to_tunables(attr_set); ++ unsigned long val, val_round; + int ret; + -+ switch (cmd) { -+ case COMPAT_RIOCCRYPT_FD: -+ ret = compat_kcop_fd_from_user(&kcop, fcr, arg); -+ if (unlikely(ret)) { -+ dwarning(1, "Error copying from user"); -+ return ret; -+ } -+ -+ ret = crypto_fd_run(fcr, &kcop); -+ if (unlikely(ret)) { -+ dwarning(1, "Error in crypto_run"); -+ return ret; -+ } ++ ret = kstrtoul(buf, 0, &val); ++ if (ret < 0) ++ return ret; + -+ return compat_kcop_fd_to_user(&kcop, fcr, arg); -+ case COMPAT_RIOCCRYPT_FD_MAP: -+ ret = compat_kcop_map_fd_from_user(&kmop, fcr, arg); -+ if (unlikely(ret)) { -+ dwarning(1, "Error copying from user"); -+ return ret; -+ } ++ val_round = jiffies_to_usecs(usecs_to_jiffies(val)); ++ if (val != val_round) ++ pr_warn("timer_rate not aligned to jiffy. Rounded up to %lu\n", ++ val_round); + -+ ret = dma_fd_map_for_user(fcr, &kmop); -+ if (unlikely(ret)) { -+ dwarning(1, "Error in dma_fd_map_for_user"); -+ return ret; -+ } ++ tunables->sampling_rate = val_round; + -+ return compat_kcop_map_fd_to_user(&kmop, fcr, arg); -+ case COMPAT_RIOCCRYPT_FD_UNMAP: -+ ret = compat_kcop_map_fd_from_user(&kmop, fcr, arg); -+ if (unlikely(ret)) { -+ dwarning(1, "Error copying from user"); -+ return ret; -+ } ++ return count; ++} + -+ ret = dma_fd_unmap_for_user(fcr, &kmop); -+ if (unlikely(ret)) -+ dwarning(1, "Error in dma_fd_unmap_for_user"); ++static ssize_t store_timer_slack(struct gov_attr_set *attr_set, const char *buf, ++ size_t count) ++{ ++ struct interactive_tunables *tunables = to_tunables(attr_set); ++ unsigned long val; ++ int ret; + ++ ret = kstrtol(buf, 10, &val); ++ if (ret < 0) + return ret; -+ case COMPAT_RIOCCRYPT_CPU_ACCESS: -+ ret = compat_kcop_map_fd_from_user(&kmop, fcr, arg); -+ if (unlikely(ret)) { -+ dwarning(1, "Error copying from user"); -+ return ret; -+ } -+ -+ ret = dma_fd_begin_cpu_access(fcr, &kmop); -+ if (unlikely(ret)) { -+ dwarning(1, "Error in dma_fd_begin_cpu_access"); -+ return ret; -+ } -+ -+ return compat_kcop_map_fd_to_user(&kmop, fcr, arg); -+ case COMPAT_RIOCCRYPT_DEV_ACCESS: -+ ret = compat_kcop_map_fd_from_user(&kmop, fcr, arg); -+ if (unlikely(ret)) { -+ dwarning(1, "Error copying from user"); -+ return ret; -+ } + -+ ret = dma_fd_end_cpu_access(fcr, &kmop); -+ if (unlikely(ret)) -+ dwarning(1, "Error in dma_fd_end_cpu_access"); ++ tunables->timer_slack = val; ++ update_slack_delay(tunables); + -+ return ret; -+ default: -+ return rk_cryptodev_ioctl(fcr, cmd, arg_); -+ } ++ return count; +} + -+#endif /* CONFIG_COMPAT */ -+ -+struct cipher_algo_name_map { -+ uint32_t id; -+ const char *name; -+ int is_stream; -+ int is_aead; -+}; ++static ssize_t store_boost(struct gov_attr_set *attr_set, const char *buf, ++ size_t count) ++{ ++ struct interactive_tunables *tunables = to_tunables(attr_set); ++ unsigned long val; ++ int ret; + -+struct hash_algo_name_map { -+ uint32_t id; -+ const char *name; -+ int is_hmac; -+}; ++ ret = kstrtoul(buf, 0, &val); ++ if (ret < 0) ++ return ret; + -+static const struct cipher_algo_name_map c_algo_map_tbl[] = { -+ {CRYPTO_RK_DES_ECB, "ecb-des-rk", 0, 0}, -+ {CRYPTO_RK_DES_CBC, "cbc-des-rk", 0, 0}, -+ {CRYPTO_RK_DES_CFB, "cfb-des-rk", 0, 0}, -+ {CRYPTO_RK_DES_OFB, "ofb-des-rk", 0, 0}, -+ {CRYPTO_RK_3DES_ECB, "ecb-des3_ede-rk", 0, 0}, -+ {CRYPTO_RK_3DES_CBC, "cbc-des3_ede-rk", 0, 0}, -+ {CRYPTO_RK_3DES_CFB, "cfb-des3_ede-rk", 0, 0}, -+ {CRYPTO_RK_3DES_OFB, "ofb-des3_ede-rk", 0, 0}, -+ {CRYPTO_RK_SM4_ECB, "ecb-sm4-rk", 0, 0}, -+ {CRYPTO_RK_SM4_CBC, "cbc-sm4-rk", 0, 0}, -+ {CRYPTO_RK_SM4_CFB, "cfb-sm4-rk", 0, 0}, -+ {CRYPTO_RK_SM4_OFB, "ofb-sm4-rk", 0, 0}, -+ {CRYPTO_RK_SM4_CTS, "cts-sm4-rk", 0, 0}, -+ {CRYPTO_RK_SM4_CTR, "ctr-sm4-rk", 1, 0}, -+ {CRYPTO_RK_SM4_XTS, "xts-sm4-rk", 0, 0}, -+ {CRYPTO_RK_SM4_CCM, "ccm-sm4-rk", 1, 1}, -+ {CRYPTO_RK_SM4_GCM, "gcm-sm4-rk", 1, 1}, -+ {CRYPTO_RK_AES_ECB, "ecb-aes-rk", 0, 0}, -+ {CRYPTO_RK_AES_CBC, "cbc-aes-rk", 0, 0}, -+ {CRYPTO_RK_AES_CFB, "cfb-aes-rk", 0, 0}, -+ {CRYPTO_RK_AES_OFB, "ofb-aes-rk", 0, 0}, -+ {CRYPTO_RK_AES_CTS, "cts-aes-rk", 0, 0}, -+ {CRYPTO_RK_AES_CTR, "ctr-aes-rk", 1, 0}, -+ {CRYPTO_RK_AES_XTS, "xts-aes-rk", 0, 0}, -+ {CRYPTO_RK_AES_CCM, "ccm-aes-rk", 1, 1}, -+ {CRYPTO_RK_AES_GCM, "gcm-aes-rk", 1, 1}, -+}; ++ tunables->boost = val; + -+static const struct hash_algo_name_map h_algo_map_tbl[] = { ++ if (tunables->boost) { ++ trace_cpufreq_interactive_boost("on"); ++ if (!tunables->boosted) ++ cpufreq_interactive_boost(tunables); ++ } else { ++ tunables->boostpulse_endtime = ktime_to_us(ktime_get()); ++ trace_cpufreq_interactive_unboost("off"); ++ } + -+ {CRYPTO_RK_MD5, "md5-rk", 0}, -+ {CRYPTO_RK_SHA1, "sha1-rk", 0}, -+ {CRYPTO_RK_SHA224, "sha224-rk", 0}, -+ {CRYPTO_RK_SHA256, "sha256-rk", 0}, -+ {CRYPTO_RK_SHA384, "sha384-rk", 0}, -+ {CRYPTO_RK_SHA512, "sha512-rk", 0}, -+ {CRYPTO_RK_SHA512_224, "sha512_224-rk", 0}, -+ {CRYPTO_RK_SHA512_256, "sha512_256-rk", 0}, -+ {CRYPTO_RK_SM3, "sm3-rk", 0}, -+ {CRYPTO_RK_MD5_HMAC, "hmac-md5-rk", 1}, -+ {CRYPTO_RK_SHA1_HMAC, "hmac-sha1-rk", 1}, -+ {CRYPTO_RK_SHA256_HMAC, "hmac-sha256-rk", 1}, -+ {CRYPTO_RK_SHA512_HMAC, "hmac-sha512-rk", 1}, -+ {CRYPTO_RK_SM3_HMAC, "hmac-sm3-rk", 1}, -+ {CRYPTO_RK_SM4_CMAC, "cmac-sm4-rk", 1}, -+ {CRYPTO_RK_SM4_CBC_MAC, "cbcmac-sm4-rk", 1}, -+ {CRYPTO_RK_AES_CMAC, "cmac-aes-rk", 1}, -+ {CRYPTO_RK_AES_CBC_MAC, "cbcmac-aes-rk", 1}, -+}; ++ return count; ++} + -+const char *rk_get_cipher_name(uint32_t id, int *is_stream, int *is_aead) ++static ssize_t store_boostpulse(struct gov_attr_set *attr_set, const char *buf, ++ size_t count) +{ -+ uint32_t i; ++ struct interactive_tunables *tunables = to_tunables(attr_set); ++ unsigned long val; ++ int ret; + -+ *is_stream = 0; -+ *is_aead = 0; ++ ret = kstrtoul(buf, 0, &val); ++ if (ret < 0) ++ return ret; + -+ for (i = 0; i < ARRAY_SIZE(c_algo_map_tbl); i++) { -+ if (id == c_algo_map_tbl[i].id) { -+ *is_stream = c_algo_map_tbl[i].is_stream; -+ *is_aead = c_algo_map_tbl[i].is_aead; -+ return c_algo_map_tbl[i].name; -+ } -+ } ++ tunables->boostpulse_endtime = ktime_to_us(ktime_get()) + ++ tunables->boostpulse_duration; ++ trace_cpufreq_interactive_boost("pulse"); ++ if (!tunables->boosted) ++ cpufreq_interactive_boost(tunables); + -+ return NULL; ++ return count; +} + -+const char *rk_get_hash_name(uint32_t id, int *is_hmac) ++static ssize_t store_boostpulse_duration(struct gov_attr_set *attr_set, ++ const char *buf, size_t count) +{ -+ uint32_t i; ++ struct interactive_tunables *tunables = to_tunables(attr_set); ++ unsigned long val; ++ int ret; + -+ *is_hmac = 0; ++ ret = kstrtoul(buf, 0, &val); ++ if (ret < 0) ++ return ret; + -+ for (i = 0; i < ARRAY_SIZE(h_algo_map_tbl); i++) { -+ if (id == h_algo_map_tbl[i].id) { -+ *is_hmac = h_algo_map_tbl[i].is_hmac; -+ return h_algo_map_tbl[i].name; -+ } -+ } ++ tunables->boostpulse_duration = val; + -+ return NULL; ++ return count; +} + -+bool rk_cryptodev_multi_thread(const char *name) ++static ssize_t store_io_is_busy(struct gov_attr_set *attr_set, const char *buf, ++ size_t count) +{ -+ uint32_t i; ++ struct interactive_tunables *tunables = to_tunables(attr_set); ++ unsigned long val; ++ int ret; + -+ for (i = 0; i < ARRAY_SIZE(g_dev_infos); i++) { -+ if (g_dev_infos[i].dev) -+ return g_dev_infos[i].is_multi_thread; -+ } ++ ret = kstrtoul(buf, 0, &val); ++ if (ret < 0) ++ return ret; + -+ return false; ++ tunables->io_is_busy = val; ++ ++ return count; +} -diff --git a/drivers/crypto/rockchip/cryptodev_linux/rk_cryptodev.h b/drivers/crypto/rockchip/cryptodev_linux/rk_cryptodev.h -new file mode 100644 -index 000000000..dff499be3 ---- /dev/null -+++ b/drivers/crypto/rockchip/cryptodev_linux/rk_cryptodev.h -@@ -0,0 +1,109 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ + -+/* Copyright (c) 2021 Rockchip Electronics Co. Ltd. */ ++show_one(hispeed_freq, "%u"); ++show_one(go_hispeed_load, "%lu"); ++show_one(min_sample_time, "%lu"); ++show_one(timer_slack, "%lu"); ++show_one(boost, "%u"); ++show_one(boostpulse_duration, "%u"); ++show_one(io_is_busy, "%u"); + -+#ifndef __RK_CRYPTODEV_H__ -+#define __RK_CRYPTODEV_H__ ++gov_attr_rw(target_loads); ++gov_attr_rw(above_hispeed_delay); ++gov_attr_rw(hispeed_freq); ++gov_attr_rw(go_hispeed_load); ++gov_attr_rw(min_sample_time); ++gov_attr_rw(timer_rate); ++gov_attr_rw(timer_slack); ++gov_attr_rw(boost); ++gov_attr_wo(boostpulse); ++gov_attr_rw(boostpulse_duration); ++gov_attr_rw(io_is_busy); + -+#include -+#include -+#include "cryptodev.h" ++static struct attribute *interactive_attrs[] = { ++ &target_loads.attr, ++ &above_hispeed_delay.attr, ++ &hispeed_freq.attr, ++ &go_hispeed_load.attr, ++ &min_sample_time.attr, ++ &timer_rate.attr, ++ &timer_slack.attr, ++ &boost.attr, ++ &boostpulse.attr, ++ &boostpulse_duration.attr, ++ &io_is_busy.attr, ++ NULL ++}; ++ATTRIBUTE_GROUPS(interactive); + -+/* compatibility stuff */ -+#ifdef CONFIG_COMPAT -+#include ++static struct kobj_type interactive_tunables_ktype = { ++ .default_groups = interactive_groups, ++ .sysfs_ops = &governor_sysfs_ops, ++}; + -+/* input of RIOCCRYPT_FD */ -+struct compat_crypt_fd_op { -+ uint32_t ses; /* session identifier */ -+ uint16_t op; /* COP_ENCRYPT or COP_DECRYPT */ -+ uint16_t flags; /* see COP_FLAG_* */ -+ uint32_t len; /* length of source data */ -+ int src_fd; /* source data */ -+ int dst_fd; /* pointer to output data */ -+ compat_uptr_t mac;/* pointer to output data for hash/MAC operations */ -+ compat_uptr_t iv;/* initialization vector for encryption operations */ ++static int cpufreq_interactive_idle_notifier(struct notifier_block *nb, ++ unsigned long val, void *data) ++{ ++ if (val == IDLE_END) ++ cpufreq_interactive_idle_end(); ++ ++ return 0; ++} ++ ++static struct notifier_block cpufreq_interactive_idle_nb = { ++ .notifier_call = cpufreq_interactive_idle_notifier, +}; + -+/* input of RIOCCRYPT_FD_MAP/RIOCCRYPT_FD_UNMAP */ -+struct compat_crypt_fd_map_op { -+ int dma_fd; /* session identifier */ -+ uint32_t phys_addr; /* physics addr */ ++/* Interactive Governor callbacks */ ++struct interactive_governor { ++ struct cpufreq_governor gov; ++ unsigned int usage_count; +}; + -+/* compat ioctls, defined for the above structs */ -+#define COMPAT_RIOCCRYPT_FD _IOWR('r', 104, struct compat_crypt_fd_op) -+#define COMPAT_RIOCCRYPT_FD_MAP _IOWR('r', 105, struct compat_crypt_fd_map_op) -+#define COMPAT_RIOCCRYPT_FD_UNMAP _IOW('r', 106, struct compat_crypt_fd_map_op) -+#define COMPAT_RIOCCRYPT_CPU_ACCESS _IOW('r', 107, struct compat_crypt_fd_map_op) -+#define COMPAT_RIOCCRYPT_DEV_ACCESS _IOW('r', 108, struct compat_crypt_fd_map_op) ++static struct interactive_governor interactive_gov; + ++#define CPU_FREQ_GOV_INTERACTIVE (&interactive_gov.gov) + -+#endif /* CONFIG_COMPAT */ ++static void irq_work(struct irq_work *irq_work) ++{ ++ struct interactive_cpu *icpu = container_of(irq_work, struct ++ interactive_cpu, irq_work); + -+/* kernel-internal extension to struct crypt_op */ -+struct kernel_crypt_fd_op { -+ struct crypt_fd_op cop; ++ cpufreq_interactive_update(icpu); ++ icpu->work_in_progress = false; ++} + -+ int ivlen; -+ __u8 iv[EALG_MAX_BLOCK_LEN]; ++static void update_util_handler(struct update_util_data *data, u64 time, ++ unsigned int flags) ++{ ++ struct interactive_cpu *icpu = container_of(data, ++ struct interactive_cpu, update_util); ++ struct interactive_policy *ipolicy = icpu->ipolicy; ++ struct interactive_tunables *tunables = ipolicy->tunables; ++ u64 delta_ns; + -+ int digestsize; -+ uint8_t hash_output[AALG_MAX_RESULT_LEN]; ++ /* ++ * The irq-work may not be allowed to be queued up right now. ++ * Possible reasons: ++ * - Work has already been queued up or is in progress. ++ * - It is too early (too little time from the previous sample). ++ */ ++ if (icpu->work_in_progress) ++ return; + -+ struct task_struct *task; -+ struct mm_struct *mm; -+}; ++ delta_ns = time - icpu->last_sample_time; ++ if ((s64)delta_ns < tunables->sampling_rate * NSEC_PER_USEC) ++ return; + -+struct kernel_crypt_auth_fd_op { -+ struct crypt_auth_fd_op caop; ++ icpu->last_sample_time = time; ++ icpu->next_sample_jiffies = usecs_to_jiffies(tunables->sampling_rate) + ++ jiffies; + -+ int dst_len; /* based on src_len */ -+ __u8 iv[EALG_MAX_BLOCK_LEN]; -+ int ivlen; ++ icpu->work_in_progress = true; ++ irq_work_queue_on(&icpu->irq_work, icpu->cpu); ++} + -+ struct task_struct *task; -+ struct mm_struct *mm; -+}; ++static void gov_set_update_util(struct interactive_policy *ipolicy) ++{ ++ struct cpufreq_policy *policy = ipolicy->policy; ++ struct interactive_cpu *icpu; ++ int cpu; + -+/* kernel-internal extension to struct crypt_fd_map_op */ -+struct kernel_crypt_fd_map_op { -+ struct crypt_fd_map_op mop; -+}; ++ for_each_cpu(cpu, policy->cpus) { ++ icpu = &per_cpu(interactive_cpu, cpu); + -+/* kernel-internal extension to struct crypt_op */ -+struct kernel_crypt_rsa_op { -+ struct crypt_rsa_op rop; ++ icpu->last_sample_time = 0; ++ icpu->next_sample_jiffies = 0; ++ cpufreq_add_update_util_hook(cpu, &icpu->update_util, ++ update_util_handler); ++ } ++} + -+ struct task_struct *task; -+ struct mm_struct *mm; -+}; ++static inline void gov_clear_update_util(struct cpufreq_policy *policy) ++{ ++ int i; + -+#if IS_ENABLED(CONFIG_CRYPTO_DEV_ROCKCHIP_DEV) -+int rk_cryptodev_register_dev(struct device *dev, const char *name); -+int rk_cryptodev_unregister_dev(struct device *dev); -+#else -+static inline int rk_cryptodev_register_dev(struct device *dev, const char *name) ++ for_each_cpu(i, policy->cpus) ++ cpufreq_remove_update_util_hook(i); ++ ++ synchronize_rcu(); ++} ++ ++static void icpu_cancel_work(struct interactive_cpu *icpu) +{ -+ return 0; ++ irq_work_sync(&icpu->irq_work); ++ icpu->work_in_progress = false; ++ del_timer_sync(&icpu->slack_timer); +} + -+static inline int rk_cryptodev_unregister_dev(struct device *dev) ++static struct interactive_policy * ++interactive_policy_alloc(struct cpufreq_policy *policy) +{ -+ return 0; ++ struct interactive_policy *ipolicy; ++ ++ ipolicy = kzalloc(sizeof(*ipolicy), GFP_KERNEL); ++ if (!ipolicy) ++ return NULL; ++ ++ ipolicy->policy = policy; ++ ++ return ipolicy; +} -+#endif + -+long -+rk_cryptodev_ioctl(struct fcrypt *fcr, unsigned int cmd, unsigned long arg_); ++static void interactive_policy_free(struct interactive_policy *ipolicy) ++{ ++ kfree(ipolicy); ++} + -+long -+rk_compat_cryptodev_ioctl(struct fcrypt *fcr, unsigned int cmd, unsigned long arg_); ++static struct interactive_tunables * ++interactive_tunables_alloc(struct interactive_policy *ipolicy) ++{ ++ struct interactive_tunables *tunables; + -+const char *rk_get_cipher_name(uint32_t id, int *is_stream, int *is_aead); ++ tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); ++ if (!tunables) ++ return NULL; + -+const char *rk_get_hash_name(uint32_t id, int *is_hmac); ++ gov_attr_set_init(&tunables->attr_set, &ipolicy->tunables_hook); ++ if (!have_governor_per_policy()) ++ global_tunables = tunables; + -+bool rk_cryptodev_multi_thread(const char *name); ++ ipolicy->tunables = tunables; + -+#endif -diff --git a/drivers/crypto/rockchip/cryptodev_linux/util.c b/drivers/crypto/rockchip/cryptodev_linux/util.c -new file mode 100644 -index 000000000..28c197eb5 ---- /dev/null -+++ b/drivers/crypto/rockchip/cryptodev_linux/util.c -@@ -0,0 +1,80 @@ -+/* -+ * Copyright (c) 2011 Maxim Levitsky -+ * -+ * This file is part of linux cryptodev. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version 2 -+ * of the License, or (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., -+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ return tunables; ++} + -+#include -+#include -+#include "util.h" ++static void interactive_tunables_free(struct interactive_tunables *tunables) ++{ ++ if (!have_governor_per_policy()) ++ global_tunables = NULL; + -+/* These were taken from Maxim Levitsky's patch to lkml. -+ */ -+struct scatterlist *cryptodev_sg_advance(struct scatterlist *sg, int consumed) ++ kfree(tunables); ++} ++ ++#ifdef CONFIG_ARCH_ROCKCHIP ++static void cpufreq_interactive_input_event(struct input_handle *handle, ++ unsigned int type, ++ unsigned int code, ++ int value) +{ -+ while (consumed >= sg->length) { -+ consumed -= sg->length; ++ u64 now, endtime; ++ int i; ++ int anyboost = 0; ++ unsigned long flags[2]; ++ struct interactive_cpu *pcpu; ++ struct interactive_tunables *tunables; + -+ sg = sg_next(sg); -+ if (!sg) -+ break; -+ } ++ if (type != EV_ABS && type != EV_KEY && type != EV_REL) ++ return; + -+ WARN_ON(!sg && consumed); ++ trace_cpufreq_interactive_boost("touch"); ++ spin_lock_irqsave(&speedchange_cpumask_lock, flags[0]); + -+ if (!sg) -+ return NULL; ++ now = ktime_to_us(ktime_get()); ++ for_each_online_cpu(i) { ++ pcpu = &per_cpu(interactive_cpu, i); ++ if (!down_read_trylock(&pcpu->enable_sem)) ++ continue; + -+ sg->offset += consumed; -+ sg->length -= consumed; ++ if (!pcpu->ipolicy) { ++ up_read(&pcpu->enable_sem); ++ continue; ++ } + -+ if (sg->offset >= PAGE_SIZE) { -+ struct page *page = -+ nth_page(sg_page(sg), sg->offset / PAGE_SIZE); -+ sg_set_page(sg, page, sg->length, sg->offset % PAGE_SIZE); -+ } ++ tunables = pcpu->ipolicy->tunables; ++ if (!tunables) { ++ up_read(&pcpu->enable_sem); ++ continue; ++ } + -+ return sg; -+} ++ endtime = now + tunables->touchboostpulse_duration_val; ++ if (endtime < (tunables->touchboostpulse_endtime + ++ 10 * USEC_PER_MSEC)) { ++ up_read(&pcpu->enable_sem); ++ continue; ++ } ++ tunables->touchboostpulse_endtime = endtime; + -+/** -+ * cryptodev_sg_copy - copies sg entries from sg_from to sg_to, such -+ * as sg_to covers first 'len' bytes from sg_from. -+ */ -+int cryptodev_sg_copy(struct scatterlist *sg_from, struct scatterlist *sg_to, int len) -+{ -+ while (len > sg_from->length) { -+ len -= sg_from->length; ++ spin_lock_irqsave(&pcpu->target_freq_lock, flags[1]); ++ if (pcpu->target_freq < tunables->touchboost_freq) { ++ pcpu->target_freq = tunables->touchboost_freq; ++ cpumask_set_cpu(i, &speedchange_cpumask); ++ pcpu->loc_hispeed_val_time = ++ ktime_to_us(ktime_get()); ++ anyboost = 1; ++ } + -+ sg_set_page(sg_to, sg_page(sg_from), -+ sg_from->length, sg_from->offset); ++ pcpu->floor_freq = tunables->touchboost_freq; ++ pcpu->loc_floor_val_time = ktime_to_us(ktime_get()); + -+ sg_to = sg_next(sg_to); -+ sg_from = sg_next(sg_from); ++ spin_unlock_irqrestore(&pcpu->target_freq_lock, flags[1]); + -+ if (len && (!sg_from || !sg_to)) -+ return -ENOMEM; ++ up_read(&pcpu->enable_sem); + } + -+ if (len) -+ sg_set_page(sg_to, sg_page(sg_from), -+ len, sg_from->offset); -+ sg_mark_end(sg_to); -+ return 0; ++ spin_unlock_irqrestore(&speedchange_cpumask_lock, flags[0]); ++ ++ if (anyboost) ++ wake_up_process(speedchange_task); +} + -diff --git a/drivers/crypto/rockchip/cryptodev_linux/util.h b/drivers/crypto/rockchip/cryptodev_linux/util.h -new file mode 100644 -index 000000000..f7bf13242 ---- /dev/null -+++ b/drivers/crypto/rockchip/cryptodev_linux/util.h -@@ -0,0 +1,8 @@ -+/* SPDX-License-Identifier: GPL-2.0+ */ ++static int cpufreq_interactive_input_connect(struct input_handler *handler, ++ struct input_dev *dev, ++ const struct input_device_id *id) ++{ ++ struct input_handle *handle; ++ int error; + -+#ifndef UTILS_H -+#define UTILS_H -+int cryptodev_sg_copy(struct scatterlist *sg_from, struct scatterlist *sg_to, int len); -+struct scatterlist *cryptodev_sg_advance(struct scatterlist *sg, int consumed); -+#endif ++ handle = kzalloc(sizeof(*handle), GFP_KERNEL); ++ if (!handle) ++ return -ENOMEM; + -diff --git a/drivers/crypto/rockchip/cryptodev_linux/version.h b/drivers/crypto/rockchip/cryptodev_linux/version.h -new file mode 100644 -index 000000000..be0490244 ---- /dev/null -+++ b/drivers/crypto/rockchip/cryptodev_linux/version.h -@@ -0,0 +1,9 @@ -+/* SPDX-License-Identifier: GPL-2.0+ */ ++ handle->dev = dev; ++ handle->handler = handler; ++ handle->name = "cpufreq"; + -+#ifndef VERSION_H -+#define VERSION_H ++ error = input_register_handle(handle); ++ if (error) ++ goto err2; + -+#define VERSION "1.12" ++ error = input_open_device(handle); ++ if (error) ++ goto err1; + -+#endif ++ return 0; ++err1: ++ input_unregister_handle(handle); ++err2: ++ kfree(handle); ++ return error; ++} + -diff --git a/drivers/crypto/rockchip/cryptodev_linux/zc.c b/drivers/crypto/rockchip/cryptodev_linux/zc.c -new file mode 100644 -index 000000000..7671c3131 ---- /dev/null -+++ b/drivers/crypto/rockchip/cryptodev_linux/zc.c -@@ -0,0 +1,235 @@ -+/* -+ * Driver for /dev/crypto device (aka CryptoDev) -+ * -+ * Copyright (c) 2009-2013 Nikos Mavrogiannopoulos -+ * Copyright (c) 2010 Phil Sutter -+ * Copyright (c) 2011, 2012 OpenSSL Software Foundation, Inc. -+ * -+ * This file is part of linux cryptodev. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version 2 -+ * of the License, or (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -+ * 02110-1301, USA. -+ */ ++static void cpufreq_interactive_input_disconnect(struct input_handle *handle) ++{ ++ input_close_device(handle); ++ input_unregister_handle(handle); ++ kfree(handle); ++} + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "cryptodev.h" -+#include "zc.h" -+#include "version.h" ++static const struct input_device_id cpufreq_interactive_ids[] = { ++ { ++ .flags = INPUT_DEVICE_ID_MATCH_EVBIT | ++ INPUT_DEVICE_ID_MATCH_ABSBIT, ++ .evbit = { BIT_MASK(EV_ABS) }, ++ .absbit = { [BIT_WORD(ABS_MT_POSITION_X)] = ++ BIT_MASK(ABS_MT_POSITION_X) | ++ BIT_MASK(ABS_MT_POSITION_Y) }, ++ }, ++ { ++ .flags = INPUT_DEVICE_ID_MATCH_KEYBIT | ++ INPUT_DEVICE_ID_MATCH_ABSBIT, ++ .keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) }, ++ .absbit = { [BIT_WORD(ABS_X)] = ++ BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) }, ++ }, ++ { ++ .flags = INPUT_DEVICE_ID_MATCH_EVBIT, ++ .evbit = { BIT_MASK(EV_KEY) }, ++ }, ++ {/* A mouse like device, at least one button,two relative axes */ ++ .flags = INPUT_DEVICE_ID_MATCH_EVBIT | ++ INPUT_DEVICE_ID_MATCH_KEYBIT | ++ INPUT_DEVICE_ID_MATCH_RELBIT, ++ .evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) }, ++ .keybit = { [BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) }, ++ .relbit = { BIT_MASK(REL_X) | BIT_MASK(REL_Y) }, ++ }, ++ {/* A separate scrollwheel */ ++ .flags = INPUT_DEVICE_ID_MATCH_EVBIT | ++ INPUT_DEVICE_ID_MATCH_RELBIT, ++ .evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) }, ++ .relbit = { BIT_MASK(REL_WHEEL) }, ++ }, ++ { }, ++}; + -+/* Helper functions to assist zero copy. -+ * This needs to be redesigned and moved out of the session. --nmav -+ */ ++static struct input_handler cpufreq_interactive_input_handler = { ++ .event = cpufreq_interactive_input_event, ++ .connect = cpufreq_interactive_input_connect, ++ .disconnect = cpufreq_interactive_input_disconnect, ++ .name = "cpufreq_interactive", ++ .id_table = cpufreq_interactive_ids, ++}; + -+/* offset of buf in it's first page */ -+#define PAGEOFFSET(buf) ((unsigned long)buf & ~PAGE_MASK) ++static void rockchip_cpufreq_policy_init(struct interactive_policy *ipolicy) ++{ ++ struct interactive_tunables *tunables = ipolicy->tunables; ++ struct gov_attr_set attr_set; ++ int index; + -+/* fetch the pages addr resides in into pg and initialise sg with them */ -+int __cryptodev_get_userbuf(uint8_t __user *addr, uint32_t len, int write, -+ unsigned int pgcount, struct page **pg, struct scatterlist *sg, -+ struct task_struct *task, struct mm_struct *mm) ++ tunables->min_sample_time = 40 * USEC_PER_MSEC; ++ tunables->boostpulse_duration = 40 * USEC_PER_MSEC; ++ if (ipolicy->policy->cpu == 0) { ++ tunables->hispeed_freq = 1008000; ++ tunables->touchboostpulse_duration_val = 500 * USEC_PER_MSEC; ++ tunables->touchboost_freq = 1200000; ++ } else { ++ tunables->hispeed_freq = 816000; ++ } ++ ++ index = (ipolicy->policy->cpu == 0) ? 0 : 1; ++ if (!backup_tunables[index].sampling_rate) { ++ backup_tunables[index] = *tunables; ++ } else { ++ attr_set = tunables->attr_set; ++ *tunables = backup_tunables[index]; ++ tunables->attr_set = attr_set; ++ } ++} ++#endif ++ ++int cpufreq_interactive_init(struct cpufreq_policy *policy) +{ -+ int ret, pglen, i = 0; -+ struct scatterlist *sgp; ++ struct interactive_policy *ipolicy; ++ struct interactive_tunables *tunables; ++ int ret; + -+ if (unlikely(!pgcount || !len || !addr)) { -+ sg_mark_end(sg); -+ return 0; ++ /* State should be equivalent to EXIT */ ++ if (policy->governor_data) ++ return -EBUSY; ++ ++ ipolicy = interactive_policy_alloc(policy); ++ if (!ipolicy) ++ return -ENOMEM; ++ ++ mutex_lock(&global_tunables_lock); ++ ++ if (global_tunables) { ++ if (WARN_ON(have_governor_per_policy())) { ++ ret = -EINVAL; ++ goto free_int_policy; ++ } ++ ++ policy->governor_data = ipolicy; ++ ipolicy->tunables = global_tunables; ++ ++ gov_attr_set_get(&global_tunables->attr_set, ++ &ipolicy->tunables_hook); ++ goto out; + } + -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)) -+ down_read(&mm->mmap_sem); -+#else -+ mmap_read_lock(mm); ++ tunables = interactive_tunables_alloc(ipolicy); ++ if (!tunables) { ++ ret = -ENOMEM; ++ goto free_int_policy; ++ } ++ ++ tunables->hispeed_freq = policy->max; ++ tunables->above_hispeed_delay = default_above_hispeed_delay; ++ tunables->nabove_hispeed_delay = ++ ARRAY_SIZE(default_above_hispeed_delay); ++ tunables->go_hispeed_load = DEFAULT_GO_HISPEED_LOAD; ++ tunables->target_loads = default_target_loads; ++ tunables->ntarget_loads = ARRAY_SIZE(default_target_loads); ++ tunables->min_sample_time = DEFAULT_MIN_SAMPLE_TIME; ++ tunables->boostpulse_duration = DEFAULT_MIN_SAMPLE_TIME; ++ tunables->sampling_rate = DEFAULT_SAMPLING_RATE; ++ tunables->timer_slack = DEFAULT_TIMER_SLACK; ++ update_slack_delay(tunables); ++ ++ spin_lock_init(&tunables->target_loads_lock); ++ spin_lock_init(&tunables->above_hispeed_delay_lock); ++ ++ policy->governor_data = ipolicy; ++ ++#ifdef CONFIG_ARCH_ROCKCHIP ++ rockchip_cpufreq_policy_init(ipolicy); +#endif -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 168)) -+ ret = get_user_pages(task, mm, -+ (unsigned long)addr, pgcount, write, 0, pg, NULL); -+#elif (LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)) -+ ret = get_user_pages(task, mm, -+ (unsigned long)addr, pgcount, write, pg, NULL); -+#elif (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)) -+ ret = get_user_pages_remote(task, mm, -+ (unsigned long)addr, pgcount, write, 0, pg, NULL); -+#elif (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+ ret = get_user_pages_remote(task, mm, -+ (unsigned long)addr, pgcount, write ? FOLL_WRITE : 0, -+ pg, NULL); -+#elif (LINUX_VERSION_CODE < KERNEL_VERSION(5, 9, 0)) -+ ret = get_user_pages_remote(task, mm, -+ (unsigned long)addr, pgcount, write ? FOLL_WRITE : 0, -+ pg, NULL, NULL); -+#else -+ ret = get_user_pages_remote(mm, -+ (unsigned long)addr, pgcount, write ? FOLL_WRITE : 0, -+ pg, NULL, NULL); ++ ret = kobject_init_and_add(&tunables->attr_set.kobj, ++ &interactive_tunables_ktype, ++ get_governor_parent_kobj(policy), "%s", ++ interactive_gov.gov.name); ++ if (ret) ++ goto fail; ++ ++ /* One time initialization for governor */ ++ if (!interactive_gov.usage_count++) { ++ idle_notifier_register(&cpufreq_interactive_idle_nb); ++ cpufreq_register_notifier(&cpufreq_notifier_block, ++ CPUFREQ_TRANSITION_NOTIFIER); ++#ifdef CONFIG_ARCH_ROCKCHIP ++ ret = input_register_handler(&cpufreq_interactive_input_handler); +#endif -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)) -+ up_read(&mm->mmap_sem); -+#else -+ mmap_read_unlock(mm); ++ } ++ ++ out: ++ mutex_unlock(&global_tunables_lock); ++ return 0; ++ ++ fail: ++ policy->governor_data = NULL; ++ interactive_tunables_free(tunables); ++ ++ free_int_policy: ++ mutex_unlock(&global_tunables_lock); ++ ++ interactive_policy_free(ipolicy); ++ pr_err("governor initialization failed (%d)\n", ret); ++ ++ return ret; ++} ++ ++void cpufreq_interactive_exit(struct cpufreq_policy *policy) ++{ ++ struct interactive_policy *ipolicy = policy->governor_data; ++ struct interactive_tunables *tunables = ipolicy->tunables; ++ unsigned int count; ++ ++ mutex_lock(&global_tunables_lock); ++ ++ /* Last policy using the governor ? */ ++ if (!--interactive_gov.usage_count) { ++ cpufreq_unregister_notifier(&cpufreq_notifier_block, ++ CPUFREQ_TRANSITION_NOTIFIER); ++ idle_notifier_unregister(&cpufreq_interactive_idle_nb); ++#ifdef CONFIG_ARCH_ROCKCHIP ++ input_unregister_handler(&cpufreq_interactive_input_handler); +#endif -+ if (ret < 0 || ret != pgcount) -+ return -EINVAL; ++ } + -+ sg_init_table(sg, pgcount); ++ count = gov_attr_set_put(&tunables->attr_set, &ipolicy->tunables_hook); ++ policy->governor_data = NULL; ++ if (!count) { ++#ifdef CONFIG_ARCH_ROCKCHIP ++ if (policy->cpu == 0) ++ backup_tunables[0] = *tunables; ++ else ++ backup_tunables[1] = *tunables; ++#endif ++ interactive_tunables_free(tunables); ++ } + -+ pglen = min((ptrdiff_t)(PAGE_SIZE - PAGEOFFSET(addr)), (ptrdiff_t)len); -+ sg_set_page(sg, pg[i++], pglen, PAGEOFFSET(addr)); ++ mutex_unlock(&global_tunables_lock); + -+ len -= pglen; -+ for (sgp = sg_next(sg); len; sgp = sg_next(sgp)) { -+ pglen = min((uint32_t)PAGE_SIZE, len); -+ sg_set_page(sgp, pg[i++], pglen, 0); -+ len -= pglen; ++ interactive_policy_free(ipolicy); ++} ++ ++int cpufreq_interactive_start(struct cpufreq_policy *policy) ++{ ++ struct interactive_policy *ipolicy = policy->governor_data; ++ struct interactive_cpu *icpu; ++ unsigned int cpu; ++ ++ for_each_cpu(cpu, policy->cpus) { ++ icpu = &per_cpu(interactive_cpu, cpu); ++ ++ icpu->target_freq = policy->cur; ++ icpu->floor_freq = icpu->target_freq; ++ icpu->pol_floor_val_time = ktime_to_us(ktime_get()); ++ icpu->loc_floor_val_time = icpu->pol_floor_val_time; ++ icpu->pol_hispeed_val_time = icpu->pol_floor_val_time; ++ icpu->loc_hispeed_val_time = icpu->pol_floor_val_time; ++ icpu->cpu = cpu; ++ ++ down_write(&icpu->enable_sem); ++ icpu->ipolicy = ipolicy; ++ slack_timer_resched(icpu, cpu, false); ++ up_write(&icpu->enable_sem); + } -+ sg_mark_end(sg_last(sg, pgcount)); ++ ++ gov_set_update_util(ipolicy); + return 0; +} + -+int cryptodev_adjust_sg_array(struct csession *ses, int pagecount) ++void cpufreq_interactive_stop(struct cpufreq_policy *policy) +{ -+ struct scatterlist *sg; -+ struct page **pages; -+ int array_size; ++ struct interactive_policy *ipolicy = policy->governor_data; ++ struct interactive_cpu *icpu; ++ unsigned int cpu; + -+ for (array_size = ses->array_size; array_size < pagecount; -+ array_size *= 2) -+ ; -+ ddebug(1, "reallocating from %d to %d pages", -+ ses->array_size, array_size); -+ pages = krealloc(ses->pages, array_size * sizeof(struct page *), -+ GFP_KERNEL); -+ if (unlikely(!pages)) -+ return -ENOMEM; -+ ses->pages = pages; -+ sg = krealloc(ses->sg, array_size * sizeof(struct scatterlist), -+ GFP_KERNEL); -+ if (unlikely(!sg)) ++ gov_clear_update_util(ipolicy->policy); ++ ++ for_each_cpu(cpu, policy->cpus) { ++ icpu = &per_cpu(interactive_cpu, cpu); ++ ++ down_write(&icpu->enable_sem); ++ icpu_cancel_work(icpu); ++ icpu->ipolicy = NULL; ++ up_write(&icpu->enable_sem); ++ } ++} ++ ++void cpufreq_interactive_limits(struct cpufreq_policy *policy) ++{ ++ struct interactive_cpu *icpu; ++ unsigned int cpu; ++ unsigned long flags; ++ ++ cpufreq_policy_apply_limits(policy); ++ ++ for_each_cpu(cpu, policy->cpus) { ++ icpu = &per_cpu(interactive_cpu, cpu); ++ ++ spin_lock_irqsave(&icpu->target_freq_lock, flags); ++ ++ if (policy->max < icpu->target_freq) ++ icpu->target_freq = policy->max; ++ else if (policy->min > icpu->target_freq) ++ icpu->target_freq = policy->min; ++ ++ spin_unlock_irqrestore(&icpu->target_freq_lock, flags); ++ } ++} ++ ++static struct interactive_governor interactive_gov = { ++ .gov = { ++ .name = "interactive", ++ .owner = THIS_MODULE, ++ .init = cpufreq_interactive_init, ++ .exit = cpufreq_interactive_exit, ++ .start = cpufreq_interactive_start, ++ .stop = cpufreq_interactive_stop, ++ .limits = cpufreq_interactive_limits, ++ } ++}; ++ ++static void cpufreq_interactive_nop_timer(struct timer_list *t) ++{ ++ /* ++ * The purpose of slack-timer is to wake up the CPU from IDLE, in order ++ * to decrease its frequency if it is not set to minimum already. ++ * ++ * This is important for platforms where CPU with higher frequencies ++ * consume higher power even at IDLE. ++ */ ++} ++ ++static int __init cpufreq_interactive_gov_init(void) ++{ ++ struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; ++ struct interactive_cpu *icpu; ++ unsigned int cpu; ++ ++ for_each_possible_cpu(cpu) { ++ icpu = &per_cpu(interactive_cpu, cpu); ++ ++ init_irq_work(&icpu->irq_work, irq_work); ++ spin_lock_init(&icpu->load_lock); ++ spin_lock_init(&icpu->target_freq_lock); ++ init_rwsem(&icpu->enable_sem); ++ ++ /* Initialize per-cpu slack-timer */ ++ timer_setup(&icpu->slack_timer, cpufreq_interactive_nop_timer, ++ TIMER_PINNED); ++ } ++ ++ spin_lock_init(&speedchange_cpumask_lock); ++ speedchange_task = kthread_create(cpufreq_interactive_speedchange_task, ++ NULL, "cfinteractive"); ++ if (IS_ERR(speedchange_task)) ++ return PTR_ERR(speedchange_task); ++ ++ sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, ¶m); ++ get_task_struct(speedchange_task); ++ ++ /* wake up so the thread does not look hung to the freezer */ ++ wake_up_process(speedchange_task); ++ ++ return cpufreq_register_governor(CPU_FREQ_GOV_INTERACTIVE); ++} ++ ++#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE ++struct cpufreq_governor *cpufreq_default_governor(void) ++{ ++ return CPU_FREQ_GOV_INTERACTIVE; ++} ++ ++fs_initcall(cpufreq_interactive_gov_init); ++#else ++module_init(cpufreq_interactive_gov_init); ++#endif ++ ++static void __exit cpufreq_interactive_gov_exit(void) ++{ ++ cpufreq_unregister_governor(CPU_FREQ_GOV_INTERACTIVE); ++ kthread_stop(speedchange_task); ++ put_task_struct(speedchange_task); ++} ++module_exit(cpufreq_interactive_gov_exit); ++ ++MODULE_AUTHOR("Mike Chan "); ++MODULE_DESCRIPTION("'cpufreq_interactive' - A dynamic cpufreq governor for Latency sensitive workloads"); ++MODULE_LICENSE("GPL"); +diff --git a/drivers/cpufreq/cpufreq_userspace_rk.c b/drivers/cpufreq/cpufreq_userspace_rk.c +new file mode 100644 +index 000000000..1f001d281 +--- /dev/null ++++ b/drivers/cpufreq/cpufreq_userspace_rk.c +@@ -0,0 +1,140 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++ ++/* ++ * linux/drivers/cpufreq/cpufreq_userspace.c ++ * ++ * Copyright (C) 2001 Russell King ++ * (C) 2002 - 2004 Dominik Brodowski ++ */ ++ ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ ++#include ++#include ++#include ++#include ++#include ++ ++static DEFINE_PER_CPU(unsigned int, cpu_is_managed); ++static DEFINE_MUTEX(userspace_mutex); ++ ++/** ++ * cpufreq_set - set the CPU frequency ++ * @policy: pointer to policy struct where freq is being set ++ * @freq: target frequency in kHz ++ * ++ * Sets the CPU frequency to freq. ++ */ ++static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq) ++{ ++ int ret = -EINVAL; ++ unsigned int *setspeed = policy->governor_data; ++ ++ pr_debug("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq); ++ ++ mutex_lock(&userspace_mutex); ++ if (!per_cpu(cpu_is_managed, policy->cpu)) ++ goto err; ++ ++ *setspeed = freq; ++ ++ ret = __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L); ++ err: ++ mutex_unlock(&userspace_mutex); ++ return ret; ++} ++ ++static ssize_t show_speed(struct cpufreq_policy *policy, char *buf) ++{ ++ return sprintf(buf, "%u\n", policy->cur); ++} ++ ++static int cpufreq_userspace_policy_init(struct cpufreq_policy *policy) ++{ ++ unsigned int *setspeed; ++ ++ setspeed = kzalloc(sizeof(*setspeed), GFP_KERNEL); ++ if (!setspeed) + return -ENOMEM; -+ ses->sg = sg; -+ ses->array_size = array_size; + ++ policy->governor_data = setspeed; + return 0; +} + -+void cryptodev_release_user_pages(struct csession *ses) ++static void cpufreq_userspace_policy_exit(struct cpufreq_policy *policy) +{ -+ unsigned int i; ++ mutex_lock(&userspace_mutex); ++ kfree(policy->governor_data); ++ policy->governor_data = NULL; ++ mutex_unlock(&userspace_mutex); ++} + -+ for (i = 0; i < ses->used_pages; i++) { -+ if (!PageReserved(ses->pages[i])) -+ SetPageDirty(ses->pages[i]); ++static int cpufreq_userspace_policy_start(struct cpufreq_policy *policy) ++{ ++ unsigned int *setspeed = policy->governor_data; + -+ if (ses->readonly_pages == 0) -+ flush_dcache_page(ses->pages[i]); -+ else -+ ses->readonly_pages--; ++ BUG_ON(!policy->cur); ++ pr_debug("started managing cpu %u\n", policy->cpu); + -+ put_page(ses->pages[i]); -+ } -+ ses->used_pages = 0; ++ mutex_lock(&userspace_mutex); ++ per_cpu(cpu_is_managed, policy->cpu) = 1; ++ if (!*setspeed) ++ *setspeed = policy->cur; ++ mutex_unlock(&userspace_mutex); ++ return 0; +} + -+/* make src and dst available in scatterlists. -+ * dst might be the same as src. ++static void cpufreq_userspace_policy_stop(struct cpufreq_policy *policy) ++{ ++ pr_debug("managing cpu %u stopped\n", policy->cpu); ++ ++ mutex_lock(&userspace_mutex); ++ per_cpu(cpu_is_managed, policy->cpu) = 0; ++ mutex_unlock(&userspace_mutex); ++} ++ ++static void cpufreq_userspace_policy_limits(struct cpufreq_policy *policy) ++{ ++ unsigned int *setspeed = policy->governor_data; ++ ++ mutex_lock(&userspace_mutex); ++ ++ pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz, last set to %u kHz\n", ++ policy->cpu, policy->min, policy->max, policy->cur, *setspeed); ++ ++ if (policy->max < *setspeed) ++ __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); ++ else if (policy->min > *setspeed) ++ __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); ++ else ++ __cpufreq_driver_target(policy, *setspeed, CPUFREQ_RELATION_L); ++ ++ mutex_unlock(&userspace_mutex); ++} ++ ++static struct cpufreq_governor cpufreq_gov_userspace = { ++ .name = "userspace", ++ .init = cpufreq_userspace_policy_init, ++ .exit = cpufreq_userspace_policy_exit, ++ .start = cpufreq_userspace_policy_start, ++ .stop = cpufreq_userspace_policy_stop, ++ .limits = cpufreq_userspace_policy_limits, ++ .store_setspeed = cpufreq_set, ++ .show_setspeed = show_speed, ++ .owner = THIS_MODULE, ++}; ++ ++MODULE_AUTHOR("Dominik Brodowski , " ++ "Russell King "); ++MODULE_DESCRIPTION("CPUfreq policy governor 'userspace'"); ++MODULE_LICENSE("GPL"); ++ ++#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE ++struct cpufreq_governor *cpufreq_default_governor(void) ++{ ++ return &cpufreq_gov_userspace; ++} ++#endif ++ ++cpufreq_governor_init(cpufreq_gov_userspace); ++cpufreq_governor_exit(cpufreq_gov_userspace); +diff --git a/drivers/cpufreq/rockchip-cpufreq.c b/drivers/cpufreq/rockchip-cpufreq.c +new file mode 100644 +index 000000000..dfed7d6b7 +--- /dev/null ++++ b/drivers/cpufreq/rockchip-cpufreq.c +@@ -0,0 +1,852 @@ ++/* ++ * Rockchip CPUFreq Driver ++ * ++ * Copyright (C) 2017 Fuzhou Rockchip Electronics Co., Ltd ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed "as is" WITHOUT ANY WARRANTY of any ++ * kind, whether express or implied; without even the implied warranty ++ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + */ -+int cryptodev_get_userbuf(struct csession *ses, -+ void *__user src, unsigned int src_len, -+ void *__user dst, unsigned int dst_len, -+ struct task_struct *task, struct mm_struct *mm, -+ struct scatterlist **src_sg, -+ struct scatterlist **dst_sg) ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "cpufreq-dt.h" ++#include "rockchip-cpufreq.h" ++ ++struct cluster_info { ++ struct list_head list_head; ++ struct monitor_dev_info *mdev_info; ++ struct rockchip_opp_info opp_info; ++ struct freq_qos_request dsu_qos_req; ++ cpumask_t cpus; ++ unsigned int idle_threshold_freq; ++ bool is_idle_disabled; ++ bool is_opp_shared_dsu; ++ unsigned long rate; ++ unsigned long volt, mem_volt; ++}; ++static LIST_HEAD(cluster_info_list); ++ ++static struct cluster_info *rockchip_cluster_info_lookup(int cpu); ++ ++static int px30_get_soc_info(struct device *dev, struct device_node *np, ++ int *bin, int *process) +{ -+ int src_pagecount, dst_pagecount; -+ int rc; ++ int ret = 0; ++ u8 value = 0; + -+ /* Empty input is a valid option to many algorithms & is tested by NIST/FIPS */ -+ /* Make sure NULL input has 0 length */ -+ if (!src && src_len) -+ src_len = 0; ++ if (!bin) ++ return 0; + -+ /* I don't know that null output is ever useful, but we can handle it gracefully */ -+ /* Make sure NULL output has 0 length */ -+ if (!dst && dst_len) -+ dst_len = 0; ++ if (of_property_match_string(np, "nvmem-cell-names", ++ "performance") >= 0) { ++ ret = rockchip_nvmem_cell_read_u8(np, "performance", &value); ++ if (ret) { ++ dev_err(dev, "Failed to get soc performance value\n"); ++ return ret; ++ } ++ *bin = value; ++ } ++ if (*bin >= 0) ++ dev_info(dev, "bin=%d\n", *bin); + -+ src_pagecount = PAGECOUNT(src, src_len); -+ dst_pagecount = PAGECOUNT(dst, dst_len); ++ return ret; ++} + -+ ses->used_pages = (src == dst) ? max(src_pagecount, dst_pagecount) -+ : src_pagecount + dst_pagecount; ++static int rk3288_get_soc_info(struct device *dev, struct device_node *np, ++ int *bin, int *process) ++{ ++ int ret = 0; ++ u8 value = 0; ++ char *name; + -+ ses->readonly_pages = (src == dst) ? 0 : src_pagecount; ++ if (!bin) ++ goto next; ++ if (of_property_match_string(np, "nvmem-cell-names", "special") >= 0) { ++ ret = rockchip_nvmem_cell_read_u8(np, "special", &value); ++ if (ret) { ++ dev_err(dev, "Failed to get soc special value\n"); ++ goto out; ++ } ++ if (value == 0xc) ++ *bin = 0; ++ else ++ *bin = 1; ++ } + -+ if (ses->used_pages > ses->array_size) { -+ rc = cryptodev_adjust_sg_array(ses, ses->used_pages); -+ if (rc) -+ return rc; ++ if (soc_is_rk3288w()) ++ name = "performance-w"; ++ else ++ name = "performance"; ++ ++ if (of_property_match_string(np, "nvmem-cell-names", name) >= 0) { ++ ret = rockchip_nvmem_cell_read_u8(np, name, &value); ++ if (ret) { ++ dev_err(dev, "Failed to get soc performance value\n"); ++ goto out; ++ } ++ if (value & 0x2) ++ *bin = 3; ++ else if (value & 0x01) ++ *bin = 2; + } ++ if (*bin >= 0) ++ dev_info(dev, "bin=%d\n", *bin); + -+ if (src == dst) { /* inplace operation */ -+ /* When we encrypt for authenc modes we need to write -+ * more data than the ones we read. */ -+ if (src_len < dst_len) -+ src_len = dst_len; -+ rc = __cryptodev_get_userbuf(src, src_len, 1, ses->used_pages, -+ ses->pages, ses->sg, task, mm); -+ if (unlikely(rc)) { -+ derr(1, "failed to get user pages for data IO"); -+ return rc; ++next: ++ if (!process) ++ goto out; ++ if (of_property_match_string(np, "nvmem-cell-names", ++ "process") >= 0) { ++ ret = rockchip_nvmem_cell_read_u8(np, "process", &value); ++ if (ret) { ++ dev_err(dev, "Failed to get soc process version\n"); ++ goto out; + } -+ (*src_sg) = (*dst_sg) = ses->sg; -+ return 0; ++ if (soc_is_rk3288() && (value == 0 || value == 1)) ++ *process = 0; + } ++ if (*process >= 0) ++ dev_info(dev, "process=%d\n", *process); + -+ *src_sg = NULL; /* default to no input */ -+ *dst_sg = NULL; /* default to ignore output */ ++out: ++ return ret; ++} + -+ if (likely(src)) { -+ rc = __cryptodev_get_userbuf(src, src_len, 0, ses->readonly_pages, -+ ses->pages, ses->sg, task, mm); -+ if (unlikely(rc)) { -+ derr(1, "failed to get user pages for data input"); -+ return rc; ++static int rk3399_get_soc_info(struct device *dev, struct device_node *np, ++ int *bin, int *process) ++{ ++ int ret = 0; ++ u8 value = 0; ++ ++ if (!bin) ++ return 0; ++ ++ if (of_property_match_string(np, "nvmem-cell-names", ++ "specification_serial_number") >= 0) { ++ ret = rockchip_nvmem_cell_read_u8(np, ++ "specification_serial_number", ++ &value); ++ if (ret) { ++ dev_err(dev, ++ "Failed to get specification_serial_number\n"); ++ goto out; ++ } ++ ++ if (value == 0xb) { ++ *bin = 0; ++ } else if (value == 0x1) { ++ if (of_property_match_string(np, "nvmem-cell-names", ++ "customer_demand") >= 0) { ++ ret = rockchip_nvmem_cell_read_u8(np, ++ "customer_demand", ++ &value); ++ if (ret) { ++ dev_err(dev, "Failed to get customer_demand\n"); ++ goto out; ++ } ++ if (value == 0x0) ++ *bin = 0; ++ else ++ *bin = 1; ++ } ++ } else if (value == 0x10) { ++ *bin = 1; + } -+ *src_sg = ses->sg; + } + -+ if (likely(dst)) { -+ const unsigned int writable_pages = -+ ses->used_pages - ses->readonly_pages; -+ struct page **dst_pages = ses->pages + ses->readonly_pages; -+ *dst_sg = ses->sg + ses->readonly_pages; ++out: ++ if (*bin >= 0) ++ dev_info(dev, "bin=%d\n", *bin); + -+ rc = __cryptodev_get_userbuf(dst, dst_len, 1, writable_pages, -+ dst_pages, *dst_sg, task, mm); -+ if (unlikely(rc)) { -+ derr(1, "failed to get user pages for data output"); -+ cryptodev_release_user_pages(ses); /* FIXME: use __release_userbuf(src, ...) */ -+ return rc; ++ return ret; ++} ++ ++static int rk3588_get_soc_info(struct device *dev, struct device_node *np, ++ int *bin, int *process) ++{ ++ int ret = 0; ++ u8 value = 0; ++ ++ if (!bin) ++ return 0; ++ ++ if (of_property_match_string(np, "nvmem-cell-names", ++ "specification_serial_number") >= 0) { ++ ret = rockchip_nvmem_cell_read_u8(np, ++ "specification_serial_number", ++ &value); ++ if (ret) { ++ dev_err(dev, ++ "Failed to get specification_serial_number\n"); ++ return ret; + } ++ /* RK3588M */ ++ if (value == 0xd) ++ *bin = 1; ++ /* RK3588J */ ++ else if (value == 0xa) ++ *bin = 2; ++ } ++ if (*bin < 0) ++ *bin = 0; ++ dev_info(dev, "bin=%d\n", *bin); ++ ++ return ret; ++} ++ ++static int rk3588_change_length(struct device *dev, struct device_node *np, ++ struct rockchip_opp_info *opp_info) ++{ ++ struct clk *clk; ++ unsigned long old_rate; ++ unsigned int low_len_sel; ++ u32 opp_flag = 0; ++ int ret = 0; ++ ++ if (opp_info->volt_sel < 0) ++ return 0; ++ ++ clk = clk_get(dev, NULL); ++ if (IS_ERR(clk)) { ++ dev_warn(dev, "failed to get cpu clk\n"); ++ return PTR_ERR(clk); ++ } ++ ++ /* RK3588 low speed grade should change to low length */ ++ if (of_property_read_u32(np, "rockchip,pvtm-low-len-sel", ++ &low_len_sel)) ++ goto out; ++ if (opp_info->volt_sel > low_len_sel) ++ goto out; ++ opp_flag = OPP_LENGTH_LOW; ++ ++ old_rate = clk_get_rate(clk); ++ ret = clk_set_rate(clk, old_rate | opp_flag); ++ if (ret) { ++ dev_err(dev, "failed to change length\n"); ++ goto out; + } ++ clk_set_rate(clk, old_rate); ++out: ++ clk_put(clk); ++ ++ return ret; ++} ++ ++static int rk3588_set_supported_hw(struct device *dev, struct device_node *np, ++ struct rockchip_opp_info *opp_info) ++{ ++ int bin = opp_info->bin; ++ ++ if (!of_property_read_bool(np, "rockchip,supported-hw")) ++ return 0; ++ ++ if (bin < 0) ++ bin = 0; ++ ++ /* SoC Version */ ++ opp_info->supported_hw[0] = BIT(bin); ++ /* Speed Grade */ ++ opp_info->supported_hw[1] = BIT(opp_info->volt_sel); ++ + return 0; +} -diff --git a/drivers/crypto/rockchip/cryptodev_linux/zc.h b/drivers/crypto/rockchip/cryptodev_linux/zc.h -new file mode 100644 -index 000000000..808af4855 ---- /dev/null -+++ b/drivers/crypto/rockchip/cryptodev_linux/zc.h -@@ -0,0 +1,27 @@ -+/* SPDX-License-Identifier: GPL-2.0+ */ + -+#ifndef ZC_H -+# define ZC_H ++static int rk3588_set_soc_info(struct device *dev, struct device_node *np, ++ struct rockchip_opp_info *opp_info) ++{ ++ rk3588_change_length(dev, np, opp_info); ++ rk3588_set_supported_hw(dev, np, opp_info); + -+/* For zero copy */ -+int __cryptodev_get_userbuf(uint8_t __user *addr, uint32_t len, int write, -+ unsigned int pgcount, struct page **pg, struct scatterlist *sg, -+ struct task_struct *task, struct mm_struct *mm); -+void cryptodev_release_user_pages(struct csession *ses); ++ return 0; ++} + -+int cryptodev_get_userbuf(struct csession *ses, -+ void *__user src, unsigned int src_len, -+ void *__user dst, unsigned int dst_len, -+ struct task_struct *task, struct mm_struct *mm, -+ struct scatterlist **src_sg, -+ struct scatterlist **dst_sg); ++static int rk3588_cpu_set_read_margin(struct device *dev, ++ struct rockchip_opp_info *opp_info, ++ u32 rm) ++{ ++ if (!opp_info->volt_rm_tbl) ++ return 0; ++ if (rm == opp_info->current_rm || rm == UINT_MAX) ++ return 0; + -+/* buflen ? (last page - first page + 1) : 0 */ -+#define PAGECOUNT(buf, buflen) ((buflen) \ -+ ? ((((unsigned long)(buf + buflen - 1)) >> PAGE_SHIFT) - \ -+ (((unsigned long)(buf )) >> PAGE_SHIFT) + 1) \ -+ : 0) ++ dev_dbg(dev, "set rm to %d\n", rm); ++ if (opp_info->grf) { ++ regmap_write(opp_info->grf, 0x20, 0x001c0000 | (rm << 2)); ++ regmap_write(opp_info->grf, 0x28, 0x003c0000 | (rm << 2)); ++ regmap_write(opp_info->grf, 0x2c, 0x003c0000 | (rm << 2)); ++ regmap_write(opp_info->grf, 0x30, 0x00200020); ++ udelay(1); ++ regmap_write(opp_info->grf, 0x30, 0x00200000); ++ } ++ if (opp_info->dsu_grf) { ++ regmap_write(opp_info->dsu_grf, 0x20, 0x001c0000 | (rm << 2)); ++ regmap_write(opp_info->dsu_grf, 0x28, 0x003c0000 | (rm << 2)); ++ regmap_write(opp_info->dsu_grf, 0x2c, 0x003c0000 | (rm << 2)); ++ regmap_write(opp_info->dsu_grf, 0x30, 0x001c0000 | (rm << 2)); ++ regmap_write(opp_info->dsu_grf, 0x38, 0x001c0000 | (rm << 2)); ++ regmap_write(opp_info->dsu_grf, 0x18, 0x40004000); ++ udelay(1); ++ regmap_write(opp_info->dsu_grf, 0x18, 0x40000000); ++ } + -+#define DEFAULT_PREALLOC_PAGES 32 ++ opp_info->current_rm = rm; + -+#endif -diff --git a/drivers/crypto/rockchip/procfs.c b/drivers/crypto/rockchip/procfs.c -new file mode 100644 -index 000000000..5689dc530 ---- /dev/null -+++ b/drivers/crypto/rockchip/procfs.c -@@ -0,0 +1,160 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* Copyright (c) Rockchip Electronics Co., Ltd. */ -+#include -+#include -+#include -+#include ++ return 0; ++} + -+#include "procfs.h" ++static int cpu_opp_config_regulators(struct device *dev, ++ struct dev_pm_opp *old_opp, ++ struct dev_pm_opp *new_opp, ++ struct regulator **regulators, ++ unsigned int count) ++{ ++ struct cluster_info *cluster; + -+#ifdef CONFIG_PROC_FS ++ cluster = rockchip_cluster_info_lookup(dev->id); ++ if (!cluster) ++ return -EINVAL; + -+static const char *alg_type2name[ALG_TYPE_MAX] = { -+ [ALG_TYPE_HASH] = "HASH", -+ [ALG_TYPE_HMAC] = "HMAC", -+ [ALG_TYPE_CIPHER] = "CIPHER", -+ [ALG_TYPE_ASYM] = "ASYM", -+ [ALG_TYPE_AEAD] = "AEAD", ++ return rockchip_opp_config_regulators(dev, old_opp, new_opp, regulators, ++ count, &cluster->opp_info); ++} ++ ++static int rv1126_get_soc_info(struct device *dev, struct device_node *np, ++ int *bin, int *process) ++{ ++ int ret = 0; ++ u8 value = 0; ++ ++ if (of_property_match_string(np, "nvmem-cell-names", "performance") >= 0) { ++ ret = rockchip_nvmem_cell_read_u8(np, "performance", &value); ++ if (ret) { ++ dev_err(dev, "Failed to get soc performance value\n"); ++ return ret; ++ } ++ if (value == 0x1) ++ *bin = 1; ++ else ++ *bin = 0; ++ } ++ if (*bin >= 0) ++ dev_info(dev, "bin=%d\n", *bin); ++ ++ return ret; ++} ++ ++static const struct rockchip_opp_data px30_cpu_opp_data = { ++ .get_soc_info = px30_get_soc_info, +}; + -+static void crypto_show_clock(struct seq_file *p, struct clk_bulk_data *clk_bulks, int clks_num) ++static const struct rockchip_opp_data rk3288_cpu_opp_data = { ++ .get_soc_info = rk3288_get_soc_info, ++}; ++ ++static const struct rockchip_opp_data rk3399_cpu_opp_data = { ++ .get_soc_info = rk3399_get_soc_info, ++}; ++ ++static const struct rockchip_opp_data rk3588_cpu_opp_data = { ++ .get_soc_info = rk3588_get_soc_info, ++ .set_soc_info = rk3588_set_soc_info, ++ .set_read_margin = rk3588_cpu_set_read_margin, ++ .config_regulators = cpu_opp_config_regulators, ++}; ++ ++static const struct rockchip_opp_data rv1126_cpu_opp_data = { ++ .get_soc_info = rv1126_get_soc_info, ++}; ++ ++static const struct of_device_id rockchip_cpufreq_of_match[] = { ++ { ++ .compatible = "rockchip,px30", ++ .data = (void *)&px30_cpu_opp_data, ++ }, ++ { ++ .compatible = "rockchip,rk3288", ++ .data = (void *)&rk3288_cpu_opp_data, ++ }, ++ { ++ .compatible = "rockchip,rk3288w", ++ .data = (void *)&rk3288_cpu_opp_data, ++ }, ++ { ++ .compatible = "rockchip,rk3326", ++ .data = (void *)&px30_cpu_opp_data, ++ }, ++ { ++ .compatible = "rockchip,rk3399", ++ .data = (void *)&rk3399_cpu_opp_data, ++ }, ++ { ++ .compatible = "rockchip,rk3588", ++ .data = (void *)&rk3588_cpu_opp_data, ++ }, ++ { ++ .compatible = "rockchip,rv1109", ++ .data = (void *)&rv1126_cpu_opp_data, ++ }, ++ { ++ .compatible = "rockchip,rv1126", ++ .data = (void *)&rv1126_cpu_opp_data, ++ }, ++ {}, ++}; ++ ++static struct cluster_info *rockchip_cluster_info_lookup(int cpu) +{ -+ int i; ++ struct cluster_info *cluster; + -+ seq_puts(p, "clock info:\n"); ++ list_for_each_entry(cluster, &cluster_info_list, list_head) { ++ if (cpumask_test_cpu(cpu, &cluster->cpus)) ++ return cluster; ++ } + -+ for (i = 0; i < clks_num; i++) -+ seq_printf(p, "\t%-10s %ld\n", clk_bulks[i].id, clk_get_rate(clk_bulks[i].clk)); ++ return NULL; ++} + -+ seq_puts(p, "\n"); ++static int rockchip_cpufreq_cluster_init(int cpu, struct cluster_info *cluster) ++{ ++ struct rockchip_opp_info *opp_info = &cluster->opp_info; ++ struct device_node *np; ++ struct device *dev; ++ char *reg_name; ++ int ret = 0; ++ u32 freq = 0; ++ ++ dev = get_cpu_device(cpu); ++ if (!dev) ++ return -ENODEV; ++ ++ np = of_parse_phandle(dev->of_node, "operating-points-v2", 0); ++ if (!np) { ++ dev_warn(dev, "OPP-v2 not supported\n"); ++ return -ENOENT; ++ } ++ ret = dev_pm_opp_of_get_sharing_cpus(dev, &cluster->cpus); ++ if (ret) { ++ dev_err(dev, "Failed to get sharing cpus\n"); ++ of_node_put(np); ++ return ret; ++ } ++ cluster->is_opp_shared_dsu = of_property_read_bool(np, "rockchip,opp-shared-dsu"); ++ if (!of_property_read_u32(np, "rockchip,idle-threshold-freq", &freq)) ++ cluster->idle_threshold_freq = freq; ++ of_node_put(np); ++ ++ if (of_find_property(dev->of_node, "cpu-supply", NULL)) ++ reg_name = "cpu"; ++ else if (of_find_property(dev->of_node, "cpu0-supply", NULL)) ++ reg_name = "cpu0"; ++ else ++ return -ENOENT; ++ rockchip_get_opp_data(rockchip_cpufreq_of_match, opp_info); ++ ret = rockchip_init_opp_info(dev, opp_info, NULL, reg_name); ++ if (ret) ++ dev_err(dev, "failed to init opp info\n"); ++ ++ return ret; +} + -+static void crypto_show_stat(struct seq_file *p, struct rk_crypto_stat *stat) ++int rockchip_cpufreq_adjust_table(struct device *dev) +{ -+ /* show statistic info */ -+ seq_puts(p, "Statistic info:\n"); -+ seq_printf(p, "\tbusy_cnt : %llu\n", stat->busy_cnt); -+ seq_printf(p, "\tequeue_cnt : %llu\n", stat->equeue_cnt); -+ seq_printf(p, "\tdequeue_cnt : %llu\n", stat->dequeue_cnt); -+ seq_printf(p, "\tdone_cnt : %llu\n", stat->done_cnt); -+ seq_printf(p, "\tcomplete_cnt : %llu\n", stat->complete_cnt); -+ seq_printf(p, "\tfake_cnt : %llu\n", stat->fake_cnt); -+ seq_printf(p, "\tirq_cnt : %llu\n", stat->irq_cnt); -+ seq_printf(p, "\ttimeout_cnt : %llu\n", stat->timeout_cnt); -+ seq_printf(p, "\terror_cnt : %llu\n", stat->error_cnt); -+ seq_printf(p, "\tlast_error : %d\n", stat->last_error); -+ seq_puts(p, "\n"); ++ struct cluster_info *cluster; ++ ++ cluster = rockchip_cluster_info_lookup(dev->id); ++ if (!cluster) ++ return -EINVAL; ++ ++ return rockchip_adjust_opp_table(dev, &cluster->opp_info); +} ++EXPORT_SYMBOL_GPL(rockchip_cpufreq_adjust_table); + -+static void crypto_show_queue_info(struct seq_file *p, struct rk_crypto_dev *rk_dev) ++int rockchip_cpufreq_opp_set_rate(struct device *dev, unsigned long target_freq) +{ -+ bool busy; -+ unsigned long flags; -+ u32 qlen, max_qlen; ++ struct cluster_info *cluster; ++ struct dev_pm_opp *opp; ++ struct rockchip_opp_info *opp_info; ++ struct dev_pm_opp_supply supplies[2] = {0}; ++ unsigned long freq; ++ int ret = 0; + -+ spin_lock_irqsave(&rk_dev->lock, flags); ++ cluster = rockchip_cluster_info_lookup(dev->id); ++ if (!cluster) ++ return -EINVAL; ++ opp_info = &cluster->opp_info; + -+ qlen = rk_dev->queue.qlen; -+ max_qlen = rk_dev->queue.max_qlen; -+ busy = rk_dev->busy; ++ rockchip_opp_dvfs_lock(opp_info); ++ ret = dev_pm_opp_set_rate(dev, target_freq); ++ if (!ret) { ++ cluster->rate = target_freq; ++ opp = dev_pm_opp_find_freq_ceil(dev, &freq); ++ if (!IS_ERR(opp)) { ++ dev_pm_opp_get_supplies(opp, supplies); ++ cluster->volt = supplies[0].u_volt; ++ if (opp_info->regulator_count > 1) ++ cluster->mem_volt = supplies[1].u_volt; ++ dev_pm_opp_put(opp); ++ } ++ } ++ rockchip_opp_dvfs_unlock(opp_info); + -+ spin_unlock_irqrestore(&rk_dev->lock, flags); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(rockchip_cpufreq_opp_set_rate); + -+ seq_printf(p, "Crypto queue usage [%u/%u], ever_max = %llu, status: %s\n", -+ qlen, max_qlen, rk_dev->stat.ever_queue_max, busy ? "busy" : "idle"); ++static int rockchip_cpufreq_suspend(struct cpufreq_policy *policy) ++{ ++ int ret = 0; + -+ seq_puts(p, "\n"); ++ ret = cpufreq_generic_suspend(policy); ++ if (!ret) ++ rockchip_monitor_suspend_low_temp_adjust(policy->cpu); ++ ++ return ret; +} + -+static void crypto_show_valid_algo_single(struct seq_file *p, enum alg_type type, -+ struct rk_crypto_algt **algs, u32 algs_num) ++static int rockchip_cpufreq_add_monitor(struct cluster_info *cluster, ++ struct cpufreq_policy *policy) +{ -+ u32 i; -+ struct rk_crypto_algt *tmp_algs; ++ struct device *dev = cluster->opp_info.dev; ++ struct monitor_dev_profile *mdevp = NULL; ++ struct monitor_dev_info *mdev_info = NULL; + -+ seq_printf(p, "\t%s:\n", alg_type2name[type]); ++ mdevp = kzalloc(sizeof(*mdevp), GFP_KERNEL); ++ if (!mdevp) ++ return -ENOMEM; + -+ for (i = 0; i < algs_num; i++, algs++) { -+ tmp_algs = *algs; ++ mdevp->type = MONITOR_TYPE_CPU; ++ mdevp->low_temp_adjust = rockchip_monitor_cpu_low_temp_adjust; ++ mdevp->high_temp_adjust = rockchip_monitor_cpu_high_temp_adjust; ++ mdevp->check_rate_volt = rockchip_monitor_check_rate_volt; ++ mdevp->data = (void *)policy; ++ mdevp->opp_info = &cluster->opp_info; ++ cpumask_copy(&mdevp->allowed_cpus, policy->cpus); ++ mdev_info = rockchip_system_monitor_register(dev, mdevp); ++ if (IS_ERR(mdev_info)) { ++ kfree(mdevp); ++ dev_err(dev, "failed to register system monitor\n"); ++ return -EINVAL; ++ } ++ mdev_info->devp = mdevp; ++ cluster->mdev_info = mdev_info; + -+ if (!(tmp_algs->valid_flag) || tmp_algs->type != type) ++ return 0; ++} ++ ++static int rockchip_cpufreq_remove_monitor(struct cluster_info *cluster) ++{ ++ if (cluster->mdev_info) { ++ kfree(cluster->mdev_info->devp); ++ rockchip_system_monitor_unregister(cluster->mdev_info); ++ cluster->mdev_info = NULL; ++ } ++ ++ return 0; ++} ++ ++static int rockchip_cpufreq_remove_dsu_qos(struct cluster_info *cluster) ++{ ++ struct cluster_info *ci; ++ ++ if (!cluster->is_opp_shared_dsu) ++ return 0; ++ ++ list_for_each_entry(ci, &cluster_info_list, list_head) { ++ if (ci->is_opp_shared_dsu) + continue; ++ if (freq_qos_request_active(&ci->dsu_qos_req)) ++ freq_qos_remove_request(&ci->dsu_qos_req); ++ } + -+ seq_printf(p, "\t\t%s\n", tmp_algs->name); ++ return 0; ++} ++ ++static int rockchip_cpufreq_add_dsu_qos_req(struct cluster_info *cluster, ++ struct cpufreq_policy *policy) ++{ ++ struct device *dev = cluster->opp_info.dev; ++ struct cluster_info *ci; ++ int ret; ++ ++ if (!cluster->is_opp_shared_dsu) ++ return 0; ++ ++ list_for_each_entry(ci, &cluster_info_list, list_head) { ++ if (ci->is_opp_shared_dsu) ++ continue; ++ ret = freq_qos_add_request(&policy->constraints, ++ &ci->dsu_qos_req, ++ FREQ_QOS_MIN, ++ FREQ_QOS_MIN_DEFAULT_VALUE); ++ if (ret < 0) { ++ dev_err(dev, "failed to add dsu freq constraint\n"); ++ goto error; ++ } + } + -+ seq_puts(p, "\n"); ++ return 0; ++ ++error: ++ rockchip_cpufreq_remove_dsu_qos(cluster); ++ ++ return ret; +} + -+static void crypto_show_valid_algos(struct seq_file *p, struct rk_crypto_soc_data *soc_data) ++static int rockchip_cpufreq_notifier(struct notifier_block *nb, ++ unsigned long event, void *data) +{ -+ u32 algs_num = 0; -+ struct rk_crypto_algt **algs; ++ struct cpufreq_policy *policy = data; ++ struct cluster_info *cluster; + -+ seq_puts(p, "Valid algorithms:\n"); ++ cluster = rockchip_cluster_info_lookup(policy->cpu); ++ if (!cluster) ++ return NOTIFY_BAD; + -+ algs = soc_data->hw_get_algts(&algs_num); -+ if (!algs || algs_num == 0) -+ return; ++ if (event == CPUFREQ_CREATE_POLICY) { ++ if (rockchip_cpufreq_add_monitor(cluster, policy)) ++ return NOTIFY_BAD; ++ if (rockchip_cpufreq_add_dsu_qos_req(cluster, policy)) ++ return NOTIFY_BAD; ++ } else if (event == CPUFREQ_REMOVE_POLICY) { ++ rockchip_cpufreq_remove_monitor(cluster); ++ rockchip_cpufreq_remove_dsu_qos(cluster); ++ } + -+ crypto_show_valid_algo_single(p, ALG_TYPE_CIPHER, algs, algs_num); -+ crypto_show_valid_algo_single(p, ALG_TYPE_AEAD, algs, algs_num); -+ crypto_show_valid_algo_single(p, ALG_TYPE_HASH, algs, algs_num); -+ crypto_show_valid_algo_single(p, ALG_TYPE_HMAC, algs, algs_num); -+ crypto_show_valid_algo_single(p, ALG_TYPE_ASYM, algs, algs_num); ++ return NOTIFY_OK; +} + -+static int crypto_show_all(struct seq_file *p, void *v) ++static struct notifier_block rockchip_cpufreq_notifier_block = { ++ .notifier_call = rockchip_cpufreq_notifier, ++}; ++ ++#ifdef MODULE ++static struct pm_qos_request idle_pm_qos; ++static int idle_disable_refcnt; ++static DEFINE_MUTEX(idle_disable_lock); ++ ++static int rockchip_cpufreq_idle_state_disable(struct cpumask *cpumask, ++ int index, bool disable) +{ -+ struct rk_crypto_dev *rk_dev = p->private; -+ struct rk_crypto_soc_data *soc_data = rk_dev->soc_data; -+ struct rk_crypto_stat *stat = &rk_dev->stat; ++ mutex_lock(&idle_disable_lock); + -+ seq_printf(p, "Rockchip Crypto Version: %s\n\n", -+ soc_data->crypto_ver); ++ if (disable) { ++ if (idle_disable_refcnt == 0) ++ cpu_latency_qos_update_request(&idle_pm_qos, 0); ++ idle_disable_refcnt++; ++ } else { ++ if (--idle_disable_refcnt == 0) ++ cpu_latency_qos_update_request(&idle_pm_qos, ++ PM_QOS_DEFAULT_VALUE); ++ } + -+ seq_printf(p, "use_soft_aes192 : %s\n\n", soc_data->use_soft_aes192 ? "true" : "false"); ++ mutex_unlock(&idle_disable_lock); + -+ crypto_show_clock(p, rk_dev->clk_bulks, rk_dev->clks_num); ++ return 0; ++} ++#else ++static int rockchip_cpufreq_idle_state_disable(struct cpumask *cpumask, ++ int index, bool disable) ++{ ++ unsigned int cpu; + -+ crypto_show_valid_algos(p, soc_data); ++ for_each_cpu(cpu, cpumask) { ++ struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu); ++ struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + -+ crypto_show_stat(p, stat); ++ if (!dev || !drv) ++ continue; ++ if (index >= drv->state_count) ++ continue; ++ cpuidle_driver_state_disabled(drv, index, disable); ++ } + -+ crypto_show_queue_info(p, rk_dev); ++ if (disable) { ++ preempt_disable(); ++ for_each_cpu(cpu, cpumask) { ++ if (cpu != smp_processor_id() && cpu_online(cpu)) ++ wake_up_if_idle(cpu); ++ } ++ preempt_enable(); ++ } + + return 0; +} ++#endif + -+static int crypto_open(struct inode *inode, struct file *file) ++#define cpu_to_dsu_freq(freq) ((freq) * 4 / 5) ++ ++static int rockchip_cpufreq_update_dsu_req(struct cluster_info *cluster, ++ unsigned int freq) +{ -+ struct rk_crypto_dev *data = pde_data(inode); ++ struct device *dev = cluster->opp_info.dev; ++ unsigned int dsu_freq = rounddown(cpu_to_dsu_freq(freq), 100000); + -+ return single_open(file, crypto_show_all, data); ++ if (cluster->is_opp_shared_dsu || ++ !freq_qos_request_active(&cluster->dsu_qos_req)) ++ return 0; ++ ++ dev_dbg(dev, "cpu to dsu: %u -> %u\n", freq, dsu_freq); ++ ++ return freq_qos_update_request(&cluster->dsu_qos_req, dsu_freq); +} + -+static const struct proc_ops ops = { -+ .proc_open = crypto_open, -+ .proc_read = seq_read, -+ .proc_lseek = seq_lseek, -+ .proc_release = single_release, ++static int rockchip_cpufreq_transition_notifier(struct notifier_block *nb, ++ unsigned long event, void *data) ++{ ++ struct cpufreq_freqs *freqs = data; ++ struct cpufreq_policy *policy = freqs->policy; ++ struct cluster_info *cluster; ++ ++ cluster = rockchip_cluster_info_lookup(policy->cpu); ++ if (!cluster) ++ return NOTIFY_BAD; ++ ++ if (event == CPUFREQ_PRECHANGE) { ++ if (cluster->idle_threshold_freq && ++ freqs->new >= cluster->idle_threshold_freq && ++ !cluster->is_idle_disabled) { ++ rockchip_cpufreq_idle_state_disable(policy->cpus, 1, ++ true); ++ cluster->is_idle_disabled = true; ++ } ++ } else if (event == CPUFREQ_POSTCHANGE) { ++ if (cluster->idle_threshold_freq && ++ freqs->new < cluster->idle_threshold_freq && ++ cluster->is_idle_disabled) { ++ rockchip_cpufreq_idle_state_disable(policy->cpus, 1, ++ false); ++ cluster->is_idle_disabled = false; ++ } ++ rockchip_cpufreq_update_dsu_req(cluster, freqs->new); ++ } ++ ++ return NOTIFY_OK; ++} ++ ++static struct notifier_block rockchip_cpufreq_transition_notifier_block = { ++ .notifier_call = rockchip_cpufreq_transition_notifier, +}; + -+int rkcrypto_proc_init(struct rk_crypto_dev *rk_dev) ++static int rockchip_cpufreq_panic_notifier(struct notifier_block *nb, ++ unsigned long v, void *p) +{ -+ rk_dev->procfs = proc_create_data(rk_dev->name, 0, NULL, &ops, rk_dev); -+ if (!rk_dev->procfs) -+ return -EINVAL; ++ struct cluster_info *ci; ++ struct rockchip_opp_info *opp_info; ++ ++ list_for_each_entry(ci, &cluster_info_list, list_head) { ++ opp_info = &ci->opp_info; ++ ++ if (opp_info->regulator_count > 1) ++ dev_info(opp_info->dev, ++ "cur_freq: %lu Hz, volt_vdd: %lu uV, volt_mem: %lu uV\n", ++ ci->rate, ci->volt, ci->mem_volt); ++ else ++ dev_info(opp_info->dev, "cur_freq: %lu Hz, volt: %lu uV\n", ++ ci->rate, ci->volt); ++ } + + return 0; +} + -+void rkcrypto_proc_cleanup(struct rk_crypto_dev *rk_dev) ++static struct notifier_block rockchip_cpufreq_panic_notifier_block = { ++ .notifier_call = rockchip_cpufreq_panic_notifier, ++}; ++ ++static int __init rockchip_cpufreq_driver_init(void) +{ -+ if (rk_dev->procfs) -+ remove_proc_entry(rk_dev->name, NULL); ++ struct cluster_info *cluster, *pos; ++ struct cpufreq_dt_platform_data pdata = {0}; ++ int cpu, ret; + -+ rk_dev->procfs = NULL; ++ for_each_possible_cpu(cpu) { ++ cluster = rockchip_cluster_info_lookup(cpu); ++ if (cluster) ++ continue; ++ ++ cluster = kzalloc(sizeof(*cluster), GFP_KERNEL); ++ if (!cluster) { ++ ret = -ENOMEM; ++ goto release_cluster_info; ++ } ++ ++ ret = rockchip_cpufreq_cluster_init(cpu, cluster); ++ if (ret) { ++ pr_err("Failed to initialize dvfs info cpu%d\n", cpu); ++ goto release_cluster_info; ++ } ++ list_add(&cluster->list_head, &cluster_info_list); ++ } ++ ++ pdata.have_governor_per_policy = true; ++ pdata.suspend = rockchip_cpufreq_suspend; ++ ++ ret = cpufreq_register_notifier(&rockchip_cpufreq_notifier_block, ++ CPUFREQ_POLICY_NOTIFIER); ++ if (ret) { ++ pr_err("failed to register cpufreq notifier\n"); ++ goto release_cluster_info; ++ } ++ ++ if (of_machine_is_compatible("rockchip,rk3588")) { ++ ret = cpufreq_register_notifier(&rockchip_cpufreq_transition_notifier_block, ++ CPUFREQ_TRANSITION_NOTIFIER); ++ if (ret) { ++ cpufreq_unregister_notifier(&rockchip_cpufreq_notifier_block, ++ CPUFREQ_POLICY_NOTIFIER); ++ pr_err("failed to register cpufreq notifier\n"); ++ goto release_cluster_info; ++ } ++#ifdef MODULE ++ cpu_latency_qos_add_request(&idle_pm_qos, PM_QOS_DEFAULT_VALUE); ++#endif ++ } ++ ++ ret = atomic_notifier_chain_register(&panic_notifier_list, ++ &rockchip_cpufreq_panic_notifier_block); ++ if (ret) ++ pr_err("failed to register cpufreq panic notifier\n"); ++ ++ return PTR_ERR_OR_ZERO(platform_device_register_data(NULL, "cpufreq-dt", ++ -1, (void *)&pdata, ++ sizeof(struct cpufreq_dt_platform_data))); ++ ++release_cluster_info: ++ list_for_each_entry_safe(cluster, pos, &cluster_info_list, list_head) { ++ list_del(&cluster->list_head); ++ kfree(cluster); ++ } ++ return ret; +} ++module_init(rockchip_cpufreq_driver_init); + -+#endif /* CONFIG_PROC_FS */ -diff --git a/drivers/crypto/rockchip/procfs.h b/drivers/crypto/rockchip/procfs.h ++MODULE_AUTHOR("Finley Xiao "); ++MODULE_DESCRIPTION("Rockchip cpufreq driver"); ++MODULE_LICENSE("GPL v2"); +diff --git a/drivers/cpufreq/rockchip-cpufreq.h b/drivers/cpufreq/rockchip-cpufreq.h new file mode 100644 -index 000000000..e491c53b4 +index 000000000..2eb920606 --- /dev/null -+++ b/drivers/crypto/rockchip/procfs.h -@@ -0,0 +1,23 @@ ++++ b/drivers/cpufreq/rockchip-cpufreq.h +@@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ -+/* Copyright (c) 2022 Rockchip Electronics Co., Ltd. */ ++/* ++ * Copyright (c) 2019 Fuzhou Rockchip Electronics Co., Ltd ++ */ ++#ifndef __ROCKCHIP_CPUFREQ_H ++#define __ROCKCHIP_CPUFREQ_H + -+#ifndef _RKCRYPTO_PROCFS_H -+#define _RKCRYPTO_PROCFS_H ++#if IS_ENABLED(CONFIG_ARM_ROCKCHIP_CPUFREQ) ++int rockchip_cpufreq_adjust_table(struct device *dev); ++int rockchip_cpufreq_opp_set_rate(struct device *dev, unsigned long target_freq); ++#else ++static inline int rockchip_cpufreq_adjust_table(struct device *dev) ++{ ++ return -EOPNOTSUPP; ++} + -+#include "rk_crypto_core.h" ++static inline int rockchip_cpufreq_opp_set_rate(struct device *dev, ++ unsigned long target_freq) ++{ ++ return -EOPNOTSUPP; ++} ++#endif /* CONFIG_ARM_ROCKCHIP_CPUFREQ */ + -+#ifdef CONFIG_PROC_FS -+int rkcrypto_proc_init(struct rk_crypto_dev *dev); -+void rkcrypto_proc_cleanup(struct rk_crypto_dev *dev); -+#else -+static inline int rkcrypto_proc_init(struct rk_crypto_dev *dev) ++#endif +diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig +index c761952f0..5f1c9813b 100644 +--- a/drivers/crypto/Kconfig ++++ b/drivers/crypto/Kconfig +@@ -638,26 +638,27 @@ config CRYPTO_DEV_ROCKCHIP + select CRYPTO_DES + select CRYPTO_AES + select CRYPTO_ENGINE ++ select CRYPTO_XTS ++ select CRYPTO_CFB ++ select CRYPTO_OFB ++ select CRYPTO_CTR ++ select CRYPTO_GCM + select CRYPTO_LIB_DES + select CRYPTO_MD5 + select CRYPTO_SHA1 ++ select CRYPTO_SM3 ++ select CRYPTO_SM4 + select CRYPTO_SHA256 ++ select CRTPTO_SHA512 + select CRYPTO_HASH + select CRYPTO_SKCIPHER ++ select CRYPTO_RSA + + help + This driver interfaces with the hardware crypto accelerator. + Supporting cbc/ecb chainmode, and aes/des/des3_ede cipher mode. + +-config CRYPTO_DEV_ROCKCHIP_DEBUG +- bool "Enable Rockchip crypto stats" +- depends on CRYPTO_DEV_ROCKCHIP +- depends on DEBUG_FS +- help +- Say y to enable Rockchip crypto debug stats. +- This will create /sys/kernel/debug/rk3288_crypto/stats for displaying +- the number of requests per algorithm and other internal stats. +- ++source "drivers/crypto/rockchip/Kconfig" + + config CRYPTO_DEV_ZYNQMP_AES + tristate "Support for Xilinx ZynqMP AES hw accelerator" +diff --git a/drivers/crypto/rockchip/Kconfig b/drivers/crypto/rockchip/Kconfig +new file mode 100644 +index 000000000..b7402e8a5 +--- /dev/null ++++ b/drivers/crypto/rockchip/Kconfig +@@ -0,0 +1,25 @@ ++# SPDX-License-Identifier: GPL-2.0-only ++if CRYPTO_DEV_ROCKCHIP ++ ++config CRYPTO_DEV_ROCKCHIP_V1 ++ bool "crypto v1 for RV1108 RK3288 RK3368 RK3399" ++ default y if CPU_RV1108 || CPU_RK3288 || CPU_RK3368 || CPU_RK3399 ++ ++config CRYPTO_DEV_ROCKCHIP_V2 ++ bool "crypto v2 for RV1109/RV1126 RK1808 RK3308 PX30/RK3326 RK356X RK3588" ++ default y if CPU_RV1126 || CPU_RK1808 || CPU_RK3308 || CPU_PX30 || CPU_RK3568 || CPU_RK3588 ++ ++config CRYPTO_DEV_ROCKCHIP_V3 ++ bool "crypto v3/v4 for RV1106/RK3528/RK3562" ++ default y if CPU_RV1106 || CPU_RK3528 || CPU_RK3562 ++ ++endif ++ ++config CRYPTO_DEV_ROCKCHIP_DEV ++ tristate "Export rockchip crypto device for user space" ++ depends on CRYPTO_DEV_ROCKCHIP ++ default n ++ help ++ This is a /dev/crypto device driver.The main idea is to ++ access existing ciphers in kernel space from userspace, ++ thus enabling the re-use of a hardware implementation of a cipher. +diff --git a/drivers/crypto/rockchip/Makefile b/drivers/crypto/rockchip/Makefile +index 785277aca..53e34aa47 100644 +--- a/drivers/crypto/rockchip/Makefile ++++ b/drivers/crypto/rockchip/Makefile +@@ -1,5 +1,30 @@ + # SPDX-License-Identifier: GPL-2.0-only + obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rk_crypto.o +-rk_crypto-objs := rk3288_crypto.o \ +- rk3288_crypto_skcipher.o \ +- rk3288_crypto_ahash.o ++rk_crypto-objs := rk_crypto_core.o \ ++ rk_crypto_utils.o \ ++ rk_crypto_ahash_utils.o \ ++ rk_crypto_skcipher_utils.o \ ++ procfs.o ++ ++rk_crypto-$(CONFIG_CRYPTO_DEV_ROCKCHIP_V1) += \ ++ rk_crypto_v1.o \ ++ rk_crypto_v1_ahash.o \ ++ rk_crypto_v1_skcipher.o ++ ++rk_crypto-$(CONFIG_CRYPTO_DEV_ROCKCHIP_V2) += \ ++ rk_crypto_v2.o \ ++ rk_crypto_v2_ahash.o \ ++ rk_crypto_v2_skcipher.o \ ++ rk_crypto_v2_akcipher.o \ ++ rk_crypto_v2_pka.o \ ++ rk_crypto_bignum.o ++ ++rk_crypto-$(CONFIG_CRYPTO_DEV_ROCKCHIP_V3) += \ ++ rk_crypto_v3.o \ ++ rk_crypto_v3_ahash.o \ ++ rk_crypto_v3_skcipher.o \ ++ rk_crypto_v2_akcipher.o \ ++ rk_crypto_v2_pka.o \ ++ rk_crypto_bignum.o ++ ++obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP_DEV) += cryptodev_linux/ +diff --git a/drivers/crypto/rockchip/cryptodev_linux/Makefile b/drivers/crypto/rockchip/cryptodev_linux/Makefile +new file mode 100644 +index 000000000..628262fb2 +--- /dev/null ++++ b/drivers/crypto/rockchip/cryptodev_linux/Makefile +@@ -0,0 +1,10 @@ ++# SPDX-License-Identifier: GPL-2.0+ ++obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP_DEV) += cryptodev.o ++cryptodev-objs := ioctl.o \ ++ main.o \ ++ cryptlib.o \ ++ authenc.o \ ++ zc.o \ ++ util.o \ ++ rk_cryptodev.o ++ +diff --git a/drivers/crypto/rockchip/cryptodev_linux/authenc.c b/drivers/crypto/rockchip/cryptodev_linux/authenc.c +new file mode 100644 +index 000000000..afca7f76d +--- /dev/null ++++ b/drivers/crypto/rockchip/cryptodev_linux/authenc.c +@@ -0,0 +1,1106 @@ ++/* ++ * Driver for /dev/crypto device (aka CryptoDev) ++ * ++ * Copyright (c) 2011, 2012 OpenSSL Software Foundation, Inc. ++ * ++ * Author: Nikos Mavrogiannopoulos ++ * ++ * This file is part of linux cryptodev. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2 ++ * of the License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., ++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ ++ ++/* ++ * This file handles the AEAD part of /dev/crypto. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "cryptodev.h" ++#include "zc.h" ++#include "util.h" ++#include "cryptlib.h" ++#include "version.h" ++ ++ ++/* make caop->dst available in scatterlist. ++ * (caop->src is assumed to be equal to caop->dst) ++ */ ++static int get_userbuf_tls(struct csession *ses, struct kernel_crypt_auth_op *kcaop, ++ struct scatterlist **dst_sg) +{ ++ int pagecount = 0; ++ struct crypt_auth_op *caop = &kcaop->caop; ++ int rc; ++ ++ if (caop->dst == NULL) ++ return -EINVAL; ++ ++ if (ses->alignmask) { ++ if (!IS_ALIGNED((unsigned long)caop->dst, ses->alignmask + 1)) ++ dwarning(2, "careful - source address %p is not %d byte aligned", ++ caop->dst, ses->alignmask + 1); ++ } ++ ++ if (kcaop->dst_len == 0) { ++ dwarning(1, "Destination length cannot be zero"); ++ return -EINVAL; ++ } ++ ++ pagecount = PAGECOUNT(caop->dst, kcaop->dst_len); ++ ++ ses->used_pages = pagecount; ++ ses->readonly_pages = 0; ++ ++ rc = cryptodev_adjust_sg_array(ses, pagecount); ++ if (rc) ++ return rc; ++ ++ rc = __cryptodev_get_userbuf(caop->dst, kcaop->dst_len, 1, pagecount, ++ ses->pages, ses->sg, kcaop->task, kcaop->mm); ++ if (unlikely(rc)) { ++ derr(1, "failed to get user pages for data input"); ++ return -EINVAL; ++ } ++ ++ (*dst_sg) = ses->sg; ++ + return 0; +} -+static inline void rkcrypto_proc_cleanup(struct rk_crypto_dev *dev) ++ ++ ++#define MAX_SRTP_AUTH_DATA_DIFF 256 ++ ++/* Makes caop->auth_src available as scatterlist. ++ * It also provides a pointer to caop->dst, which however, ++ * is assumed to be within the caop->auth_src buffer. If not ++ * (if their difference exceeds MAX_SRTP_AUTH_DATA_DIFF) it ++ * returns error. ++ */ ++static int get_userbuf_srtp(struct csession *ses, struct kernel_crypt_auth_op *kcaop, ++ struct scatterlist **auth_sg, struct scatterlist **dst_sg) +{ ++ int pagecount, diff; ++ int auth_pagecount = 0; ++ struct crypt_auth_op *caop = &kcaop->caop; ++ int rc; ++ ++ if (caop->dst == NULL && caop->auth_src == NULL) { ++ derr(1, "dst and auth_src cannot be both null"); ++ return -EINVAL; ++ } ++ ++ if (ses->alignmask) { ++ if (!IS_ALIGNED((unsigned long)caop->dst, ses->alignmask + 1)) ++ dwarning(2, "careful - source address %p is not %d byte aligned", ++ caop->dst, ses->alignmask + 1); ++ if (!IS_ALIGNED((unsigned long)caop->auth_src, ses->alignmask + 1)) ++ dwarning(2, "careful - source address %p is not %d byte aligned", ++ caop->auth_src, ses->alignmask + 1); ++ } ++ ++ if (unlikely(kcaop->dst_len == 0 || caop->auth_len == 0)) { ++ dwarning(1, "Destination length cannot be zero"); ++ return -EINVAL; ++ } ++ ++ /* Note that in SRTP auth data overlap with data to be encrypted (dst) ++ */ ++ ++ auth_pagecount = PAGECOUNT(caop->auth_src, caop->auth_len); ++ diff = (int)(caop->src - caop->auth_src); ++ if (diff > MAX_SRTP_AUTH_DATA_DIFF || diff < 0) { ++ dwarning(1, "auth_src must overlap with src (diff: %d).", diff); ++ return -EINVAL; ++ } ++ ++ pagecount = auth_pagecount; ++ ++ rc = cryptodev_adjust_sg_array(ses, pagecount*2); /* double pages to have pages for dst(=auth_src) */ ++ if (rc) { ++ derr(1, "cannot adjust sg array"); ++ return rc; ++ } ++ ++ rc = __cryptodev_get_userbuf(caop->auth_src, caop->auth_len, 1, auth_pagecount, ++ ses->pages, ses->sg, kcaop->task, kcaop->mm); ++ if (unlikely(rc)) { ++ derr(1, "failed to get user pages for data input"); ++ return -EINVAL; ++ } ++ ++ ses->used_pages = pagecount; ++ ses->readonly_pages = 0; ++ ++ (*auth_sg) = ses->sg; ++ ++ (*dst_sg) = ses->sg + auth_pagecount; ++ sg_init_table(*dst_sg, auth_pagecount); ++ cryptodev_sg_copy(ses->sg, (*dst_sg), caop->auth_len); ++ (*dst_sg) = cryptodev_sg_advance(*dst_sg, diff); ++ if (*dst_sg == NULL) { ++ cryptodev_release_user_pages(ses); ++ derr(1, "failed to get enough pages for auth data"); ++ return -EINVAL; ++ } + ++ return 0; +} -+#endif + -+#endif -diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c -deleted file mode 100644 -index 77d5705a5..000000000 ---- a/drivers/crypto/rockchip/rk3288_crypto.c -+++ /dev/null -@@ -1,444 +0,0 @@ --// SPDX-License-Identifier: GPL-2.0-only --/* -- * Crypto acceleration support for Rockchip RK3288 -- * -- * Copyright (c) 2015, Fuzhou Rockchip Electronics Co., Ltd -- * -- * Author: Zain Wang -- * -- * Some ideas are from marvell-cesa.c and s5p-sss.c driver. -- */ -- --#include "rk3288_crypto.h" --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --static struct rockchip_ip rocklist = { -- .dev_list = LIST_HEAD_INIT(rocklist.dev_list), -- .lock = __SPIN_LOCK_UNLOCKED(rocklist.lock), --}; -- --struct rk_crypto_info *get_rk_crypto(void) --{ -- struct rk_crypto_info *first; -- -- spin_lock(&rocklist.lock); -- first = list_first_entry_or_null(&rocklist.dev_list, -- struct rk_crypto_info, list); -- list_rotate_left(&rocklist.dev_list); -- spin_unlock(&rocklist.lock); -- return first; --} -- --static const struct rk_variant rk3288_variant = { -- .num_clks = 4, -- .rkclks = { -- { "sclk", 150000000}, -- } --}; -- --static const struct rk_variant rk3328_variant = { -- .num_clks = 3, --}; -- --static const struct rk_variant rk3399_variant = { -- .num_clks = 3, --}; -- --static int rk_crypto_get_clks(struct rk_crypto_info *dev) --{ -- int i, j, err; -- unsigned long cr; -- -- dev->num_clks = devm_clk_bulk_get_all(dev->dev, &dev->clks); -- if (dev->num_clks < dev->variant->num_clks) { -- dev_err(dev->dev, "Missing clocks, got %d instead of %d\n", -- dev->num_clks, dev->variant->num_clks); -- return -EINVAL; -- } -- -- for (i = 0; i < dev->num_clks; i++) { -- cr = clk_get_rate(dev->clks[i].clk); -- for (j = 0; j < ARRAY_SIZE(dev->variant->rkclks); j++) { -- if (dev->variant->rkclks[j].max == 0) -- continue; -- if (strcmp(dev->variant->rkclks[j].name, dev->clks[i].id)) -- continue; -- if (cr > dev->variant->rkclks[j].max) { -- err = clk_set_rate(dev->clks[i].clk, -- dev->variant->rkclks[j].max); -- if (err) -- dev_err(dev->dev, "Fail downclocking %s from %lu to %lu\n", -- dev->variant->rkclks[j].name, cr, -- dev->variant->rkclks[j].max); -- else -- dev_info(dev->dev, "Downclocking %s from %lu to %lu\n", -- dev->variant->rkclks[j].name, cr, -- dev->variant->rkclks[j].max); -- } -- } -- } -- return 0; --} -- --static int rk_crypto_enable_clk(struct rk_crypto_info *dev) --{ -- int err; -- -- err = clk_bulk_prepare_enable(dev->num_clks, dev->clks); -- if (err) -- dev_err(dev->dev, "Could not enable clock clks\n"); -- -- return err; --} -- --static void rk_crypto_disable_clk(struct rk_crypto_info *dev) --{ -- clk_bulk_disable_unprepare(dev->num_clks, dev->clks); --} -- --/* -- * Power management strategy: The device is suspended until a request -- * is handled. For avoiding suspend/resume yoyo, the autosuspend is set to 2s. -- */ --static int rk_crypto_pm_suspend(struct device *dev) --{ -- struct rk_crypto_info *rkdev = dev_get_drvdata(dev); -- -- rk_crypto_disable_clk(rkdev); -- reset_control_assert(rkdev->rst); -- -- return 0; --} -- --static int rk_crypto_pm_resume(struct device *dev) --{ -- struct rk_crypto_info *rkdev = dev_get_drvdata(dev); -- int ret; -- -- ret = rk_crypto_enable_clk(rkdev); -- if (ret) -- return ret; -- -- reset_control_deassert(rkdev->rst); -- return 0; -- --} -- --static const struct dev_pm_ops rk_crypto_pm_ops = { -- SET_RUNTIME_PM_OPS(rk_crypto_pm_suspend, rk_crypto_pm_resume, NULL) --}; -- --static int rk_crypto_pm_init(struct rk_crypto_info *rkdev) --{ -- int err; -- -- pm_runtime_use_autosuspend(rkdev->dev); -- pm_runtime_set_autosuspend_delay(rkdev->dev, 2000); -- -- err = pm_runtime_set_suspended(rkdev->dev); -- if (err) -- return err; -- pm_runtime_enable(rkdev->dev); -- return err; --} -- --static void rk_crypto_pm_exit(struct rk_crypto_info *rkdev) --{ -- pm_runtime_disable(rkdev->dev); --} -- --static irqreturn_t rk_crypto_irq_handle(int irq, void *dev_id) --{ -- struct rk_crypto_info *dev = platform_get_drvdata(dev_id); -- u32 interrupt_status; -- -- interrupt_status = CRYPTO_READ(dev, RK_CRYPTO_INTSTS); -- CRYPTO_WRITE(dev, RK_CRYPTO_INTSTS, interrupt_status); -- -- dev->status = 1; -- if (interrupt_status & 0x0a) { -- dev_warn(dev->dev, "DMA Error\n"); -- dev->status = 0; -- } -- complete(&dev->complete); -- -- return IRQ_HANDLED; --} -- --static struct rk_crypto_tmp *rk_cipher_algs[] = { -- &rk_ecb_aes_alg, -- &rk_cbc_aes_alg, -- &rk_ecb_des_alg, -- &rk_cbc_des_alg, -- &rk_ecb_des3_ede_alg, -- &rk_cbc_des3_ede_alg, -- &rk_ahash_sha1, -- &rk_ahash_sha256, -- &rk_ahash_md5, --}; -- --static int rk_crypto_debugfs_show(struct seq_file *seq, void *v) --{ -- struct rk_crypto_info *dd; -- unsigned int i; -- -- spin_lock(&rocklist.lock); -- list_for_each_entry(dd, &rocklist.dev_list, list) { -- seq_printf(seq, "%s %s requests: %lu\n", -- dev_driver_string(dd->dev), dev_name(dd->dev), -- dd->nreq); -- } -- spin_unlock(&rocklist.lock); -- -- for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) { -- if (!rk_cipher_algs[i]->dev) -- continue; -- switch (rk_cipher_algs[i]->type) { -- case CRYPTO_ALG_TYPE_SKCIPHER: -- seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n", -- rk_cipher_algs[i]->alg.skcipher.base.base.cra_driver_name, -- rk_cipher_algs[i]->alg.skcipher.base.base.cra_name, -- rk_cipher_algs[i]->stat_req, rk_cipher_algs[i]->stat_fb); -- seq_printf(seq, "\tfallback due to length: %lu\n", -- rk_cipher_algs[i]->stat_fb_len); -- seq_printf(seq, "\tfallback due to alignment: %lu\n", -- rk_cipher_algs[i]->stat_fb_align); -- seq_printf(seq, "\tfallback due to SGs: %lu\n", -- rk_cipher_algs[i]->stat_fb_sgdiff); -- break; -- case CRYPTO_ALG_TYPE_AHASH: -- seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n", -- rk_cipher_algs[i]->alg.hash.base.halg.base.cra_driver_name, -- rk_cipher_algs[i]->alg.hash.base.halg.base.cra_name, -- rk_cipher_algs[i]->stat_req, rk_cipher_algs[i]->stat_fb); -- break; -- } -- } -- return 0; --} -- --DEFINE_SHOW_ATTRIBUTE(rk_crypto_debugfs); -- --static void register_debugfs(struct rk_crypto_info *crypto_info) --{ -- struct dentry *dbgfs_dir __maybe_unused; -- struct dentry *dbgfs_stats __maybe_unused; -- -- /* Ignore error of debugfs */ -- dbgfs_dir = debugfs_create_dir("rk3288_crypto", NULL); -- dbgfs_stats = debugfs_create_file("stats", 0444, dbgfs_dir, &rocklist, -- &rk_crypto_debugfs_fops); -- --#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG -- rocklist.dbgfs_dir = dbgfs_dir; -- rocklist.dbgfs_stats = dbgfs_stats; --#endif --} -- --static int rk_crypto_register(struct rk_crypto_info *crypto_info) --{ -- unsigned int i, k; -- int err = 0; -- -- for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) { -- rk_cipher_algs[i]->dev = crypto_info; -- switch (rk_cipher_algs[i]->type) { -- case CRYPTO_ALG_TYPE_SKCIPHER: -- dev_info(crypto_info->dev, "Register %s as %s\n", -- rk_cipher_algs[i]->alg.skcipher.base.base.cra_name, -- rk_cipher_algs[i]->alg.skcipher.base.base.cra_driver_name); -- err = crypto_engine_register_skcipher(&rk_cipher_algs[i]->alg.skcipher); -- break; -- case CRYPTO_ALG_TYPE_AHASH: -- dev_info(crypto_info->dev, "Register %s as %s\n", -- rk_cipher_algs[i]->alg.hash.base.halg.base.cra_name, -- rk_cipher_algs[i]->alg.hash.base.halg.base.cra_driver_name); -- err = crypto_engine_register_ahash(&rk_cipher_algs[i]->alg.hash); -- break; -- default: -- dev_err(crypto_info->dev, "unknown algorithm\n"); -- } -- if (err) -- goto err_cipher_algs; -- } -- return 0; -- --err_cipher_algs: -- for (k = 0; k < i; k++) { -- if (rk_cipher_algs[i]->type == CRYPTO_ALG_TYPE_SKCIPHER) -- crypto_engine_unregister_skcipher(&rk_cipher_algs[k]->alg.skcipher); -- else -- crypto_engine_unregister_ahash(&rk_cipher_algs[i]->alg.hash); -- } -- return err; --} -- --static void rk_crypto_unregister(void) --{ -- unsigned int i; -- -- for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) { -- if (rk_cipher_algs[i]->type == CRYPTO_ALG_TYPE_SKCIPHER) -- crypto_engine_unregister_skcipher(&rk_cipher_algs[i]->alg.skcipher); -- else -- crypto_engine_unregister_ahash(&rk_cipher_algs[i]->alg.hash); -- } --} -- --static const struct of_device_id crypto_of_id_table[] = { -- { .compatible = "rockchip,rk3288-crypto", -- .data = &rk3288_variant, -- }, -- { .compatible = "rockchip,rk3328-crypto", -- .data = &rk3328_variant, -- }, -- { .compatible = "rockchip,rk3399-crypto", -- .data = &rk3399_variant, -- }, -- {} --}; --MODULE_DEVICE_TABLE(of, crypto_of_id_table); -- --static int rk_crypto_probe(struct platform_device *pdev) --{ -- struct device *dev = &pdev->dev; -- struct rk_crypto_info *crypto_info, *first; -- int err = 0; -- -- crypto_info = devm_kzalloc(&pdev->dev, -- sizeof(*crypto_info), GFP_KERNEL); -- if (!crypto_info) { -- err = -ENOMEM; -- goto err_crypto; -- } -- -- crypto_info->dev = &pdev->dev; -- platform_set_drvdata(pdev, crypto_info); -- -- crypto_info->variant = of_device_get_match_data(&pdev->dev); -- if (!crypto_info->variant) { -- dev_err(&pdev->dev, "Missing variant\n"); -- return -EINVAL; -- } -- -- crypto_info->rst = devm_reset_control_array_get_exclusive(dev); -- if (IS_ERR(crypto_info->rst)) { -- err = PTR_ERR(crypto_info->rst); -- goto err_crypto; -- } -- -- reset_control_assert(crypto_info->rst); -- usleep_range(10, 20); -- reset_control_deassert(crypto_info->rst); -- -- crypto_info->reg = devm_platform_ioremap_resource(pdev, 0); -- if (IS_ERR(crypto_info->reg)) { -- err = PTR_ERR(crypto_info->reg); -- goto err_crypto; -- } -- -- err = rk_crypto_get_clks(crypto_info); -- if (err) -- goto err_crypto; -- -- crypto_info->irq = platform_get_irq(pdev, 0); -- if (crypto_info->irq < 0) { -- err = crypto_info->irq; -- goto err_crypto; -- } -- -- err = devm_request_irq(&pdev->dev, crypto_info->irq, -- rk_crypto_irq_handle, IRQF_SHARED, -- "rk-crypto", pdev); -- -- if (err) { -- dev_err(&pdev->dev, "irq request failed.\n"); -- goto err_crypto; -- } -- -- crypto_info->engine = crypto_engine_alloc_init(&pdev->dev, true); -- crypto_engine_start(crypto_info->engine); -- init_completion(&crypto_info->complete); -- -- err = rk_crypto_pm_init(crypto_info); -- if (err) -- goto err_pm; -- -- spin_lock(&rocklist.lock); -- first = list_first_entry_or_null(&rocklist.dev_list, -- struct rk_crypto_info, list); -- list_add_tail(&crypto_info->list, &rocklist.dev_list); -- spin_unlock(&rocklist.lock); -- -- if (!first) { -- err = rk_crypto_register(crypto_info); -- if (err) { -- dev_err(dev, "Fail to register crypto algorithms"); -- goto err_register_alg; -- } -- -- register_debugfs(crypto_info); -- } -- -- return 0; -- --err_register_alg: -- rk_crypto_pm_exit(crypto_info); --err_pm: -- crypto_engine_exit(crypto_info->engine); --err_crypto: -- dev_err(dev, "Crypto Accelerator not successfully registered\n"); -- return err; --} -- --static int rk_crypto_remove(struct platform_device *pdev) --{ -- struct rk_crypto_info *crypto_tmp = platform_get_drvdata(pdev); -- struct rk_crypto_info *first; -- -- spin_lock_bh(&rocklist.lock); -- list_del(&crypto_tmp->list); -- first = list_first_entry_or_null(&rocklist.dev_list, -- struct rk_crypto_info, list); -- spin_unlock_bh(&rocklist.lock); -- -- if (!first) { --#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG -- debugfs_remove_recursive(rocklist.dbgfs_dir); --#endif -- rk_crypto_unregister(); -- } -- rk_crypto_pm_exit(crypto_tmp); -- crypto_engine_exit(crypto_tmp->engine); -- return 0; --} -- --static struct platform_driver crypto_driver = { -- .probe = rk_crypto_probe, -- .remove = rk_crypto_remove, -- .driver = { -- .name = "rk3288-crypto", -- .pm = &rk_crypto_pm_ops, -- .of_match_table = crypto_of_id_table, -- }, --}; -- --module_platform_driver(crypto_driver); -- --MODULE_AUTHOR("Zain Wang "); --MODULE_DESCRIPTION("Support for Rockchip's cryptographic engine"); --MODULE_LICENSE("GPL"); -diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c -deleted file mode 100644 -index 29c953721..000000000 ---- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c -+++ /dev/null -@@ -1,470 +0,0 @@ --// SPDX-License-Identifier: GPL-2.0-only --/* -- * Crypto acceleration support for Rockchip RK3288 -- * -- * Copyright (c) 2015, Fuzhou Rockchip Electronics Co., Ltd -- * -- * Author: Zain Wang -- * -- * Some ideas are from marvell/cesa.c and s5p-sss.c driver. -- */ -- --#include --#include --#include --#include --#include --#include --#include --#include --#include "rk3288_crypto.h" -- --/* -- * IC can not process zero message hash, -- * so we put the fixed hash out when met zero message. -- */ -- --static bool rk_ahash_need_fallback(struct ahash_request *req) --{ -- struct scatterlist *sg; -- -- sg = req->src; -- while (sg) { -- if (!IS_ALIGNED(sg->offset, sizeof(u32))) { -- return true; -- } -- if (sg->length % 4) { -- return true; -- } -- sg = sg_next(sg); -- } -- return false; --} -- --static int rk_ahash_digest_fb(struct ahash_request *areq) --{ -- struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); -- struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); -- struct rk_ahash_ctx *tfmctx = crypto_ahash_ctx(tfm); -- struct ahash_alg *alg = crypto_ahash_alg(tfm); -- struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash.base); -- -- algt->stat_fb++; -- -- ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm); -- rctx->fallback_req.base.flags = areq->base.flags & -- CRYPTO_TFM_REQ_MAY_SLEEP; -- -- rctx->fallback_req.nbytes = areq->nbytes; -- rctx->fallback_req.src = areq->src; -- rctx->fallback_req.result = areq->result; -- -- return crypto_ahash_digest(&rctx->fallback_req); --} -- --static int zero_message_process(struct ahash_request *req) --{ -- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -- int rk_digest_size = crypto_ahash_digestsize(tfm); -- -- switch (rk_digest_size) { -- case SHA1_DIGEST_SIZE: -- memcpy(req->result, sha1_zero_message_hash, rk_digest_size); -- break; -- case SHA256_DIGEST_SIZE: -- memcpy(req->result, sha256_zero_message_hash, rk_digest_size); -- break; -- case MD5_DIGEST_SIZE: -- memcpy(req->result, md5_zero_message_hash, rk_digest_size); -- break; -- default: -- return -EINVAL; -- } -- -- return 0; --} -- --static void rk_ahash_reg_init(struct ahash_request *req, -- struct rk_crypto_info *dev) --{ -- struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -- int reg_status; -- -- reg_status = CRYPTO_READ(dev, RK_CRYPTO_CTRL) | -- RK_CRYPTO_HASH_FLUSH | _SBF(0xffff, 16); -- CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, reg_status); -- -- reg_status = CRYPTO_READ(dev, RK_CRYPTO_CTRL); -- reg_status &= (~RK_CRYPTO_HASH_FLUSH); -- reg_status |= _SBF(0xffff, 16); -- CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, reg_status); -- -- memset_io(dev->reg + RK_CRYPTO_HASH_DOUT_0, 0, 32); -- -- CRYPTO_WRITE(dev, RK_CRYPTO_INTENA, RK_CRYPTO_HRDMA_ERR_ENA | -- RK_CRYPTO_HRDMA_DONE_ENA); -- -- CRYPTO_WRITE(dev, RK_CRYPTO_INTSTS, RK_CRYPTO_HRDMA_ERR_INT | -- RK_CRYPTO_HRDMA_DONE_INT); -- -- CRYPTO_WRITE(dev, RK_CRYPTO_HASH_CTRL, rctx->mode | -- RK_CRYPTO_HASH_SWAP_DO); -- -- CRYPTO_WRITE(dev, RK_CRYPTO_CONF, RK_CRYPTO_BYTESWAP_HRFIFO | -- RK_CRYPTO_BYTESWAP_BRFIFO | -- RK_CRYPTO_BYTESWAP_BTFIFO); -- -- CRYPTO_WRITE(dev, RK_CRYPTO_HASH_MSG_LEN, req->nbytes); --} -- --static int rk_ahash_init(struct ahash_request *req) --{ -- struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -- struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); -- -- ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); -- rctx->fallback_req.base.flags = req->base.flags & -- CRYPTO_TFM_REQ_MAY_SLEEP; -- -- return crypto_ahash_init(&rctx->fallback_req); --} -- --static int rk_ahash_update(struct ahash_request *req) --{ -- struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -- struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); -- -- ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); -- rctx->fallback_req.base.flags = req->base.flags & -- CRYPTO_TFM_REQ_MAY_SLEEP; -- rctx->fallback_req.nbytes = req->nbytes; -- rctx->fallback_req.src = req->src; -- -- return crypto_ahash_update(&rctx->fallback_req); --} -- --static int rk_ahash_final(struct ahash_request *req) --{ -- struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -- struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); -- -- ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); -- rctx->fallback_req.base.flags = req->base.flags & -- CRYPTO_TFM_REQ_MAY_SLEEP; -- rctx->fallback_req.result = req->result; -- -- return crypto_ahash_final(&rctx->fallback_req); --} -- --static int rk_ahash_finup(struct ahash_request *req) --{ -- struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -- struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); -- -- ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); -- rctx->fallback_req.base.flags = req->base.flags & -- CRYPTO_TFM_REQ_MAY_SLEEP; -- -- rctx->fallback_req.nbytes = req->nbytes; -- rctx->fallback_req.src = req->src; -- rctx->fallback_req.result = req->result; -- -- return crypto_ahash_finup(&rctx->fallback_req); --} -- --static int rk_ahash_import(struct ahash_request *req, const void *in) --{ -- struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -- struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); -- -- ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); -- rctx->fallback_req.base.flags = req->base.flags & -- CRYPTO_TFM_REQ_MAY_SLEEP; -- -- return crypto_ahash_import(&rctx->fallback_req, in); --} -- --static int rk_ahash_export(struct ahash_request *req, void *out) --{ -- struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -- struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); -- -- ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); -- rctx->fallback_req.base.flags = req->base.flags & -- CRYPTO_TFM_REQ_MAY_SLEEP; -- -- return crypto_ahash_export(&rctx->fallback_req, out); --} -- --static int rk_ahash_digest(struct ahash_request *req) --{ -- struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -- struct rk_crypto_info *dev; -- struct crypto_engine *engine; -- -- if (rk_ahash_need_fallback(req)) -- return rk_ahash_digest_fb(req); -- -- if (!req->nbytes) -- return zero_message_process(req); -- -- dev = get_rk_crypto(); -- -- rctx->dev = dev; -- engine = dev->engine; -- -- return crypto_transfer_hash_request_to_engine(engine, req); --} -- --static void crypto_ahash_dma_start(struct rk_crypto_info *dev, struct scatterlist *sg) --{ -- CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAS, sg_dma_address(sg)); -- CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAL, sg_dma_len(sg) / 4); -- CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, RK_CRYPTO_HASH_START | -- (RK_CRYPTO_HASH_START << 16)); --} -- --static int rk_hash_prepare(struct crypto_engine *engine, void *breq) --{ -- struct ahash_request *areq = container_of(breq, struct ahash_request, base); -- struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); -- struct rk_crypto_info *rkc = rctx->dev; -- int ret; -- -- ret = dma_map_sg(rkc->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE); -- if (ret <= 0) -- return -EINVAL; -- -- rctx->nrsg = ret; -- -- return 0; --} -- --static void rk_hash_unprepare(struct crypto_engine *engine, void *breq) --{ -- struct ahash_request *areq = container_of(breq, struct ahash_request, base); -- struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); -- struct rk_crypto_info *rkc = rctx->dev; -- -- dma_unmap_sg(rkc->dev, areq->src, rctx->nrsg, DMA_TO_DEVICE); --} -- --static int rk_hash_run(struct crypto_engine *engine, void *breq) --{ -- struct ahash_request *areq = container_of(breq, struct ahash_request, base); -- struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); -- struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); -- struct ahash_alg *alg = crypto_ahash_alg(tfm); -- struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash.base); -- struct scatterlist *sg = areq->src; -- struct rk_crypto_info *rkc = rctx->dev; -- int err; -- int i; -- u32 v; -- -- err = pm_runtime_resume_and_get(rkc->dev); -- if (err) -- return err; -- -- err = rk_hash_prepare(engine, breq); -- if (err) -- goto theend; -- -- rctx->mode = 0; -- -- algt->stat_req++; -- rkc->nreq++; -- -- switch (crypto_ahash_digestsize(tfm)) { -- case SHA1_DIGEST_SIZE: -- rctx->mode = RK_CRYPTO_HASH_SHA1; -- break; -- case SHA256_DIGEST_SIZE: -- rctx->mode = RK_CRYPTO_HASH_SHA256; -- break; -- case MD5_DIGEST_SIZE: -- rctx->mode = RK_CRYPTO_HASH_MD5; -- break; -- default: -- err = -EINVAL; -- goto theend; -- } -- -- rk_ahash_reg_init(areq, rkc); -- -- while (sg) { -- reinit_completion(&rkc->complete); -- rkc->status = 0; -- crypto_ahash_dma_start(rkc, sg); -- wait_for_completion_interruptible_timeout(&rkc->complete, -- msecs_to_jiffies(2000)); -- if (!rkc->status) { -- dev_err(rkc->dev, "DMA timeout\n"); -- err = -EFAULT; -- goto theend; -- } -- sg = sg_next(sg); -- } -- -- /* -- * it will take some time to process date after last dma -- * transmission. -- * -- * waiting time is relative with the last date len, -- * so cannot set a fixed time here. -- * 10us makes system not call here frequently wasting -- * efficiency, and make it response quickly when dma -- * complete. -- */ -- readl_poll_timeout(rkc->reg + RK_CRYPTO_HASH_STS, v, v == 0, 10, 1000); -- -- for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++) { -- v = readl(rkc->reg + RK_CRYPTO_HASH_DOUT_0 + i * 4); -- put_unaligned_le32(v, areq->result + i * 4); -- } -- --theend: -- pm_runtime_put_autosuspend(rkc->dev); -- -- rk_hash_unprepare(engine, breq); -- -- local_bh_disable(); -- crypto_finalize_hash_request(engine, breq, err); -- local_bh_enable(); -- -- return 0; --} -- --static int rk_hash_init_tfm(struct crypto_ahash *tfm) --{ -- struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm); -- const char *alg_name = crypto_ahash_alg_name(tfm); -- struct ahash_alg *alg = crypto_ahash_alg(tfm); -- struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash.base); -- -- /* for fallback */ -- tctx->fallback_tfm = crypto_alloc_ahash(alg_name, 0, -- CRYPTO_ALG_NEED_FALLBACK); -- if (IS_ERR(tctx->fallback_tfm)) { -- dev_err(algt->dev->dev, "Could not load fallback driver.\n"); -- return PTR_ERR(tctx->fallback_tfm); -- } -- -- crypto_ahash_set_reqsize(tfm, -- sizeof(struct rk_ahash_rctx) + -- crypto_ahash_reqsize(tctx->fallback_tfm)); -- -- return 0; --} -- --static void rk_hash_exit_tfm(struct crypto_ahash *tfm) --{ -- struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm); -- -- crypto_free_ahash(tctx->fallback_tfm); --} -- --struct rk_crypto_tmp rk_ahash_sha1 = { -- .type = CRYPTO_ALG_TYPE_AHASH, -- .alg.hash.base = { -- .init = rk_ahash_init, -- .update = rk_ahash_update, -- .final = rk_ahash_final, -- .finup = rk_ahash_finup, -- .export = rk_ahash_export, -- .import = rk_ahash_import, -- .digest = rk_ahash_digest, -- .init_tfm = rk_hash_init_tfm, -- .exit_tfm = rk_hash_exit_tfm, -- .halg = { -- .digestsize = SHA1_DIGEST_SIZE, -- .statesize = sizeof(struct sha1_state), -- .base = { -- .cra_name = "sha1", -- .cra_driver_name = "rk-sha1", -- .cra_priority = 300, -- .cra_flags = CRYPTO_ALG_ASYNC | -- CRYPTO_ALG_NEED_FALLBACK, -- .cra_blocksize = SHA1_BLOCK_SIZE, -- .cra_ctxsize = sizeof(struct rk_ahash_ctx), -- .cra_alignmask = 3, -- .cra_module = THIS_MODULE, -- } -- } -- }, -- .alg.hash.op = { -- .do_one_request = rk_hash_run, -- }, --}; -- --struct rk_crypto_tmp rk_ahash_sha256 = { -- .type = CRYPTO_ALG_TYPE_AHASH, -- .alg.hash.base = { -- .init = rk_ahash_init, -- .update = rk_ahash_update, -- .final = rk_ahash_final, -- .finup = rk_ahash_finup, -- .export = rk_ahash_export, -- .import = rk_ahash_import, -- .digest = rk_ahash_digest, -- .init_tfm = rk_hash_init_tfm, -- .exit_tfm = rk_hash_exit_tfm, -- .halg = { -- .digestsize = SHA256_DIGEST_SIZE, -- .statesize = sizeof(struct sha256_state), -- .base = { -- .cra_name = "sha256", -- .cra_driver_name = "rk-sha256", -- .cra_priority = 300, -- .cra_flags = CRYPTO_ALG_ASYNC | -- CRYPTO_ALG_NEED_FALLBACK, -- .cra_blocksize = SHA256_BLOCK_SIZE, -- .cra_ctxsize = sizeof(struct rk_ahash_ctx), -- .cra_alignmask = 3, -- .cra_module = THIS_MODULE, -- } -- } -- }, -- .alg.hash.op = { -- .do_one_request = rk_hash_run, -- }, --}; -- --struct rk_crypto_tmp rk_ahash_md5 = { -- .type = CRYPTO_ALG_TYPE_AHASH, -- .alg.hash.base = { -- .init = rk_ahash_init, -- .update = rk_ahash_update, -- .final = rk_ahash_final, -- .finup = rk_ahash_finup, -- .export = rk_ahash_export, -- .import = rk_ahash_import, -- .digest = rk_ahash_digest, -- .init_tfm = rk_hash_init_tfm, -- .exit_tfm = rk_hash_exit_tfm, -- .halg = { -- .digestsize = MD5_DIGEST_SIZE, -- .statesize = sizeof(struct md5_state), -- .base = { -- .cra_name = "md5", -- .cra_driver_name = "rk-md5", -- .cra_priority = 300, -- .cra_flags = CRYPTO_ALG_ASYNC | -- CRYPTO_ALG_NEED_FALLBACK, -- .cra_blocksize = SHA1_BLOCK_SIZE, -- .cra_ctxsize = sizeof(struct rk_ahash_ctx), -- .cra_alignmask = 3, -- .cra_module = THIS_MODULE, -- } -- } -- }, -- .alg.hash.op = { -- .do_one_request = rk_hash_run, -- }, --}; -diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c -deleted file mode 100644 -index da95747d9..000000000 ---- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c -+++ /dev/null -@@ -1,613 +0,0 @@ --// SPDX-License-Identifier: GPL-2.0-only --/* -- * Crypto acceleration support for Rockchip RK3288 -- * -- * Copyright (c) 2015, Fuzhou Rockchip Electronics Co., Ltd -- * -- * Author: Zain Wang -- * -- * Some ideas are from marvell-cesa.c and s5p-sss.c driver. -- */ -- --#include --#include --#include --#include --#include --#include --#include --#include "rk3288_crypto.h" -- --#define RK_CRYPTO_DEC BIT(0) -- --static int rk_cipher_need_fallback(struct skcipher_request *req) --{ -- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -- struct skcipher_alg *alg = crypto_skcipher_alg(tfm); -- struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher.base); -- struct scatterlist *sgs, *sgd; -- unsigned int stodo, dtodo, len; -- unsigned int bs = crypto_skcipher_blocksize(tfm); -- -- if (!req->cryptlen) -- return true; -- -- len = req->cryptlen; -- sgs = req->src; -- sgd = req->dst; -- while (sgs && sgd) { -- if (!IS_ALIGNED(sgs->offset, sizeof(u32))) { -- algt->stat_fb_align++; -- return true; -- } -- if (!IS_ALIGNED(sgd->offset, sizeof(u32))) { -- algt->stat_fb_align++; -- return true; -- } -- stodo = min(len, sgs->length); -- if (stodo % bs) { -- algt->stat_fb_len++; -- return true; -- } -- dtodo = min(len, sgd->length); -- if (dtodo % bs) { -- algt->stat_fb_len++; -- return true; -- } -- if (stodo != dtodo) { -- algt->stat_fb_sgdiff++; -- return true; -- } -- len -= stodo; -- sgs = sg_next(sgs); -- sgd = sg_next(sgd); -- } -- return false; --} -- --static int rk_cipher_fallback(struct skcipher_request *areq) --{ -- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); -- struct rk_cipher_ctx *op = crypto_skcipher_ctx(tfm); -- struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq); -- struct skcipher_alg *alg = crypto_skcipher_alg(tfm); -- struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher.base); -- int err; -- -- algt->stat_fb++; -- -- skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm); -- skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags, -- areq->base.complete, areq->base.data); -- skcipher_request_set_crypt(&rctx->fallback_req, areq->src, areq->dst, -- areq->cryptlen, areq->iv); -- if (rctx->mode & RK_CRYPTO_DEC) -- err = crypto_skcipher_decrypt(&rctx->fallback_req); -- else -- err = crypto_skcipher_encrypt(&rctx->fallback_req); -- return err; --} -- --static int rk_cipher_handle_req(struct skcipher_request *req) --{ -- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); -- struct rk_crypto_info *rkc; -- struct crypto_engine *engine; -- -- if (rk_cipher_need_fallback(req)) -- return rk_cipher_fallback(req); -- -- rkc = get_rk_crypto(); -- -- engine = rkc->engine; -- rctx->dev = rkc; -- -- return crypto_transfer_skcipher_request_to_engine(engine, req); --} -- --static int rk_aes_setkey(struct crypto_skcipher *cipher, -- const u8 *key, unsigned int keylen) --{ -- struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); -- struct rk_cipher_ctx *ctx = crypto_tfm_ctx(tfm); -- -- if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 && -- keylen != AES_KEYSIZE_256) -- return -EINVAL; -- ctx->keylen = keylen; -- memcpy(ctx->key, key, keylen); -- -- return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); --} -- --static int rk_des_setkey(struct crypto_skcipher *cipher, -- const u8 *key, unsigned int keylen) --{ -- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher); -- int err; -- -- err = verify_skcipher_des_key(cipher, key); -- if (err) -- return err; -- -- ctx->keylen = keylen; -- memcpy(ctx->key, key, keylen); -- -- return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); --} -- --static int rk_tdes_setkey(struct crypto_skcipher *cipher, -- const u8 *key, unsigned int keylen) --{ -- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher); -- int err; -- -- err = verify_skcipher_des3_key(cipher, key); -- if (err) -- return err; -- -- ctx->keylen = keylen; -- memcpy(ctx->key, key, keylen); -- -- return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); --} -- --static int rk_aes_ecb_encrypt(struct skcipher_request *req) --{ -- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); -- -- rctx->mode = RK_CRYPTO_AES_ECB_MODE; -- return rk_cipher_handle_req(req); --} -- --static int rk_aes_ecb_decrypt(struct skcipher_request *req) --{ -- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); -- -- rctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC; -- return rk_cipher_handle_req(req); --} -- --static int rk_aes_cbc_encrypt(struct skcipher_request *req) --{ -- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); -- -- rctx->mode = RK_CRYPTO_AES_CBC_MODE; -- return rk_cipher_handle_req(req); --} -- --static int rk_aes_cbc_decrypt(struct skcipher_request *req) --{ -- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); -- -- rctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC; -- return rk_cipher_handle_req(req); --} -- --static int rk_des_ecb_encrypt(struct skcipher_request *req) --{ -- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); -- -- rctx->mode = 0; -- return rk_cipher_handle_req(req); --} -- --static int rk_des_ecb_decrypt(struct skcipher_request *req) --{ -- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); -- -- rctx->mode = RK_CRYPTO_DEC; -- return rk_cipher_handle_req(req); --} -- --static int rk_des_cbc_encrypt(struct skcipher_request *req) --{ -- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); -- -- rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC; -- return rk_cipher_handle_req(req); --} -- --static int rk_des_cbc_decrypt(struct skcipher_request *req) --{ -- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); -- -- rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC; -- return rk_cipher_handle_req(req); --} -- --static int rk_des3_ede_ecb_encrypt(struct skcipher_request *req) --{ -- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); -- -- rctx->mode = RK_CRYPTO_TDES_SELECT; -- return rk_cipher_handle_req(req); --} -- --static int rk_des3_ede_ecb_decrypt(struct skcipher_request *req) --{ -- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); -- -- rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC; -- return rk_cipher_handle_req(req); --} -- --static int rk_des3_ede_cbc_encrypt(struct skcipher_request *req) --{ -- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); -- -- rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC; -- return rk_cipher_handle_req(req); --} -- --static int rk_des3_ede_cbc_decrypt(struct skcipher_request *req) --{ -- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); -- -- rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC | -- RK_CRYPTO_DEC; -- return rk_cipher_handle_req(req); --} -- --static void rk_cipher_hw_init(struct rk_crypto_info *dev, struct skcipher_request *req) --{ -- struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); -- struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); -- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); -- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher); -- u32 block, conf_reg = 0; -- -- block = crypto_tfm_alg_blocksize(tfm); -- -- if (block == DES_BLOCK_SIZE) { -- rctx->mode |= RK_CRYPTO_TDES_FIFO_MODE | -- RK_CRYPTO_TDES_BYTESWAP_KEY | -- RK_CRYPTO_TDES_BYTESWAP_IV; -- CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, rctx->mode); -- memcpy_toio(dev->reg + RK_CRYPTO_TDES_KEY1_0, ctx->key, ctx->keylen); -- conf_reg = RK_CRYPTO_DESSEL; -- } else { -- rctx->mode |= RK_CRYPTO_AES_FIFO_MODE | -- RK_CRYPTO_AES_KEY_CHANGE | -- RK_CRYPTO_AES_BYTESWAP_KEY | -- RK_CRYPTO_AES_BYTESWAP_IV; -- if (ctx->keylen == AES_KEYSIZE_192) -- rctx->mode |= RK_CRYPTO_AES_192BIT_key; -- else if (ctx->keylen == AES_KEYSIZE_256) -- rctx->mode |= RK_CRYPTO_AES_256BIT_key; -- CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, rctx->mode); -- memcpy_toio(dev->reg + RK_CRYPTO_AES_KEY_0, ctx->key, ctx->keylen); -- } -- conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO | -- RK_CRYPTO_BYTESWAP_BRFIFO; -- CRYPTO_WRITE(dev, RK_CRYPTO_CONF, conf_reg); -- CRYPTO_WRITE(dev, RK_CRYPTO_INTENA, -- RK_CRYPTO_BCDMA_ERR_ENA | RK_CRYPTO_BCDMA_DONE_ENA); --} -- --static void crypto_dma_start(struct rk_crypto_info *dev, -- struct scatterlist *sgs, -- struct scatterlist *sgd, unsigned int todo) --{ -- CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAS, sg_dma_address(sgs)); -- CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAL, todo); -- CRYPTO_WRITE(dev, RK_CRYPTO_BTDMAS, sg_dma_address(sgd)); -- CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, RK_CRYPTO_BLOCK_START | -- _SBF(RK_CRYPTO_BLOCK_START, 16)); --} -- --static int rk_cipher_run(struct crypto_engine *engine, void *async_req) --{ -- struct skcipher_request *areq = container_of(async_req, struct skcipher_request, base); -- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); -- struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq); -- struct scatterlist *sgs, *sgd; -- int err = 0; -- int ivsize = crypto_skcipher_ivsize(tfm); -- int offset; -- u8 iv[AES_BLOCK_SIZE]; -- u8 biv[AES_BLOCK_SIZE]; -- u8 *ivtouse = areq->iv; -- unsigned int len = areq->cryptlen; -- unsigned int todo; -- struct skcipher_alg *alg = crypto_skcipher_alg(tfm); -- struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher.base); -- struct rk_crypto_info *rkc = rctx->dev; -- -- err = pm_runtime_resume_and_get(rkc->dev); -- if (err) -- return err; -- -- algt->stat_req++; -- rkc->nreq++; -- -- ivsize = crypto_skcipher_ivsize(tfm); -- if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) { -- if (rctx->mode & RK_CRYPTO_DEC) { -- offset = areq->cryptlen - ivsize; -- scatterwalk_map_and_copy(rctx->backup_iv, areq->src, -- offset, ivsize, 0); -- } -- } -- -- sgs = areq->src; -- sgd = areq->dst; -- -- while (sgs && sgd && len) { -- if (!sgs->length) { -- sgs = sg_next(sgs); -- sgd = sg_next(sgd); -- continue; -- } -- if (rctx->mode & RK_CRYPTO_DEC) { -- /* we backup last block of source to be used as IV at next step */ -- offset = sgs->length - ivsize; -- scatterwalk_map_and_copy(biv, sgs, offset, ivsize, 0); -- } -- if (sgs == sgd) { -- err = dma_map_sg(rkc->dev, sgs, 1, DMA_BIDIRECTIONAL); -- if (err <= 0) { -- err = -EINVAL; -- goto theend_iv; -- } -- } else { -- err = dma_map_sg(rkc->dev, sgs, 1, DMA_TO_DEVICE); -- if (err <= 0) { -- err = -EINVAL; -- goto theend_iv; -- } -- err = dma_map_sg(rkc->dev, sgd, 1, DMA_FROM_DEVICE); -- if (err <= 0) { -- err = -EINVAL; -- goto theend_sgs; -- } -- } -- err = 0; -- rk_cipher_hw_init(rkc, areq); -- if (ivsize) { -- if (ivsize == DES_BLOCK_SIZE) -- memcpy_toio(rkc->reg + RK_CRYPTO_TDES_IV_0, ivtouse, ivsize); -- else -- memcpy_toio(rkc->reg + RK_CRYPTO_AES_IV_0, ivtouse, ivsize); -- } -- reinit_completion(&rkc->complete); -- rkc->status = 0; -- -- todo = min(sg_dma_len(sgs), len); -- len -= todo; -- crypto_dma_start(rkc, sgs, sgd, todo / 4); -- wait_for_completion_interruptible_timeout(&rkc->complete, -- msecs_to_jiffies(2000)); -- if (!rkc->status) { -- dev_err(rkc->dev, "DMA timeout\n"); -- err = -EFAULT; -- goto theend; -- } -- if (sgs == sgd) { -- dma_unmap_sg(rkc->dev, sgs, 1, DMA_BIDIRECTIONAL); -- } else { -- dma_unmap_sg(rkc->dev, sgs, 1, DMA_TO_DEVICE); -- dma_unmap_sg(rkc->dev, sgd, 1, DMA_FROM_DEVICE); -- } -- if (rctx->mode & RK_CRYPTO_DEC) { -- memcpy(iv, biv, ivsize); -- ivtouse = iv; -- } else { -- offset = sgd->length - ivsize; -- scatterwalk_map_and_copy(iv, sgd, offset, ivsize, 0); -- ivtouse = iv; -- } -- sgs = sg_next(sgs); -- sgd = sg_next(sgd); -- } -- -- if (areq->iv && ivsize > 0) { -- offset = areq->cryptlen - ivsize; -- if (rctx->mode & RK_CRYPTO_DEC) { -- memcpy(areq->iv, rctx->backup_iv, ivsize); -- memzero_explicit(rctx->backup_iv, ivsize); -- } else { -- scatterwalk_map_and_copy(areq->iv, areq->dst, offset, -- ivsize, 0); -- } -- } -- --theend: -- pm_runtime_put_autosuspend(rkc->dev); -- -- local_bh_disable(); -- crypto_finalize_skcipher_request(engine, areq, err); -- local_bh_enable(); -- return 0; -- --theend_sgs: -- if (sgs == sgd) { -- dma_unmap_sg(rkc->dev, sgs, 1, DMA_BIDIRECTIONAL); -- } else { -- dma_unmap_sg(rkc->dev, sgs, 1, DMA_TO_DEVICE); -- dma_unmap_sg(rkc->dev, sgd, 1, DMA_FROM_DEVICE); -- } --theend_iv: -- return err; --} -- --static int rk_cipher_tfm_init(struct crypto_skcipher *tfm) --{ -- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); -- const char *name = crypto_tfm_alg_name(&tfm->base); -- struct skcipher_alg *alg = crypto_skcipher_alg(tfm); -- struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher.base); -- -- ctx->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK); -- if (IS_ERR(ctx->fallback_tfm)) { -- dev_err(algt->dev->dev, "ERROR: Cannot allocate fallback for %s %ld\n", -- name, PTR_ERR(ctx->fallback_tfm)); -- return PTR_ERR(ctx->fallback_tfm); -- } -- -- tfm->reqsize = sizeof(struct rk_cipher_rctx) + -- crypto_skcipher_reqsize(ctx->fallback_tfm); -- -- return 0; --} -- --static void rk_cipher_tfm_exit(struct crypto_skcipher *tfm) --{ -- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); -- -- memzero_explicit(ctx->key, ctx->keylen); -- crypto_free_skcipher(ctx->fallback_tfm); --} -- --struct rk_crypto_tmp rk_ecb_aes_alg = { -- .type = CRYPTO_ALG_TYPE_SKCIPHER, -- .alg.skcipher.base = { -- .base.cra_name = "ecb(aes)", -- .base.cra_driver_name = "ecb-aes-rk", -- .base.cra_priority = 300, -- .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, -- .base.cra_blocksize = AES_BLOCK_SIZE, -- .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), -- .base.cra_alignmask = 0x0f, -- .base.cra_module = THIS_MODULE, -- -- .init = rk_cipher_tfm_init, -- .exit = rk_cipher_tfm_exit, -- .min_keysize = AES_MIN_KEY_SIZE, -- .max_keysize = AES_MAX_KEY_SIZE, -- .setkey = rk_aes_setkey, -- .encrypt = rk_aes_ecb_encrypt, -- .decrypt = rk_aes_ecb_decrypt, -- }, -- .alg.skcipher.op = { -- .do_one_request = rk_cipher_run, -- }, --}; -- --struct rk_crypto_tmp rk_cbc_aes_alg = { -- .type = CRYPTO_ALG_TYPE_SKCIPHER, -- .alg.skcipher.base = { -- .base.cra_name = "cbc(aes)", -- .base.cra_driver_name = "cbc-aes-rk", -- .base.cra_priority = 300, -- .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, -- .base.cra_blocksize = AES_BLOCK_SIZE, -- .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), -- .base.cra_alignmask = 0x0f, -- .base.cra_module = THIS_MODULE, -- -- .init = rk_cipher_tfm_init, -- .exit = rk_cipher_tfm_exit, -- .min_keysize = AES_MIN_KEY_SIZE, -- .max_keysize = AES_MAX_KEY_SIZE, -- .ivsize = AES_BLOCK_SIZE, -- .setkey = rk_aes_setkey, -- .encrypt = rk_aes_cbc_encrypt, -- .decrypt = rk_aes_cbc_decrypt, -- }, -- .alg.skcipher.op = { -- .do_one_request = rk_cipher_run, -- }, --}; -- --struct rk_crypto_tmp rk_ecb_des_alg = { -- .type = CRYPTO_ALG_TYPE_SKCIPHER, -- .alg.skcipher.base = { -- .base.cra_name = "ecb(des)", -- .base.cra_driver_name = "ecb-des-rk", -- .base.cra_priority = 300, -- .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, -- .base.cra_blocksize = DES_BLOCK_SIZE, -- .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), -- .base.cra_alignmask = 0x07, -- .base.cra_module = THIS_MODULE, -- -- .init = rk_cipher_tfm_init, -- .exit = rk_cipher_tfm_exit, -- .min_keysize = DES_KEY_SIZE, -- .max_keysize = DES_KEY_SIZE, -- .setkey = rk_des_setkey, -- .encrypt = rk_des_ecb_encrypt, -- .decrypt = rk_des_ecb_decrypt, -- }, -- .alg.skcipher.op = { -- .do_one_request = rk_cipher_run, -- }, --}; -- --struct rk_crypto_tmp rk_cbc_des_alg = { -- .type = CRYPTO_ALG_TYPE_SKCIPHER, -- .alg.skcipher.base = { -- .base.cra_name = "cbc(des)", -- .base.cra_driver_name = "cbc-des-rk", -- .base.cra_priority = 300, -- .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, -- .base.cra_blocksize = DES_BLOCK_SIZE, -- .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), -- .base.cra_alignmask = 0x07, -- .base.cra_module = THIS_MODULE, -- -- .init = rk_cipher_tfm_init, -- .exit = rk_cipher_tfm_exit, -- .min_keysize = DES_KEY_SIZE, -- .max_keysize = DES_KEY_SIZE, -- .ivsize = DES_BLOCK_SIZE, -- .setkey = rk_des_setkey, -- .encrypt = rk_des_cbc_encrypt, -- .decrypt = rk_des_cbc_decrypt, -- }, -- .alg.skcipher.op = { -- .do_one_request = rk_cipher_run, -- }, --}; -- --struct rk_crypto_tmp rk_ecb_des3_ede_alg = { -- .type = CRYPTO_ALG_TYPE_SKCIPHER, -- .alg.skcipher.base = { -- .base.cra_name = "ecb(des3_ede)", -- .base.cra_driver_name = "ecb-des3-ede-rk", -- .base.cra_priority = 300, -- .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, -- .base.cra_blocksize = DES_BLOCK_SIZE, -- .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), -- .base.cra_alignmask = 0x07, -- .base.cra_module = THIS_MODULE, -- -- .init = rk_cipher_tfm_init, -- .exit = rk_cipher_tfm_exit, -- .min_keysize = DES3_EDE_KEY_SIZE, -- .max_keysize = DES3_EDE_KEY_SIZE, -- .setkey = rk_tdes_setkey, -- .encrypt = rk_des3_ede_ecb_encrypt, -- .decrypt = rk_des3_ede_ecb_decrypt, -- }, -- .alg.skcipher.op = { -- .do_one_request = rk_cipher_run, -- }, --}; -- --struct rk_crypto_tmp rk_cbc_des3_ede_alg = { -- .type = CRYPTO_ALG_TYPE_SKCIPHER, -- .alg.skcipher.base = { -- .base.cra_name = "cbc(des3_ede)", -- .base.cra_driver_name = "cbc-des3-ede-rk", -- .base.cra_priority = 300, -- .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, -- .base.cra_blocksize = DES_BLOCK_SIZE, -- .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), -- .base.cra_alignmask = 0x07, -- .base.cra_module = THIS_MODULE, -- -- .init = rk_cipher_tfm_init, -- .exit = rk_cipher_tfm_exit, -- .min_keysize = DES3_EDE_KEY_SIZE, -- .max_keysize = DES3_EDE_KEY_SIZE, -- .ivsize = DES_BLOCK_SIZE, -- .setkey = rk_tdes_setkey, -- .encrypt = rk_des3_ede_cbc_encrypt, -- .decrypt = rk_des3_ede_cbc_decrypt, -- }, -- .alg.skcipher.op = { -- .do_one_request = rk_cipher_run, -- }, --}; -diff --git a/drivers/crypto/rockchip/rk_crypto_ahash_utils.c b/drivers/crypto/rockchip/rk_crypto_ahash_utils.c -new file mode 100644 -index 000000000..495c55485 ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_ahash_utils.c -@@ -0,0 +1,450 @@ -+// SPDX-License-Identifier: GPL-2.0 +/* -+ * Rockchip crypto hash uitls -+ * -+ * Copyright (c) 2022, Rockchip Electronics Co., Ltd -+ * -+ * Author: Lin Jinhan -+ * ++ * Return tag (digest) length for authenticated encryption ++ * If the cipher and digest are separate, hdata.init is set - just return ++ * digest length. Otherwise return digest length for aead ciphers + */ -+ -+#include "rk_crypto_core.h" -+#include "rk_crypto_ahash_utils.h" -+ -+static const char * const hash_algo2name[] = { -+ [HASH_ALGO_MD5] = "md5", -+ [HASH_ALGO_SHA1] = "sha1", -+ [HASH_ALGO_SHA224] = "sha224", -+ [HASH_ALGO_SHA256] = "sha256", -+ [HASH_ALGO_SHA384] = "sha384", -+ [HASH_ALGO_SHA512] = "sha512", -+ [HASH_ALGO_SM3] = "sm3", -+}; -+ -+static void rk_alg_ctx_clear(struct rk_alg_ctx *alg_ctx) ++static int cryptodev_get_tag_len(struct csession *ses_ptr) +{ -+ alg_ctx->total = 0; -+ alg_ctx->left_bytes = 0; -+ alg_ctx->count = 0; -+ alg_ctx->sg_src = 0; -+ alg_ctx->req_src = 0; -+ alg_ctx->src_nents = 0; ++ if (ses_ptr->hdata.init) ++ return ses_ptr->hdata.digestsize; ++ else ++ return cryptodev_cipher_get_tag_size(&ses_ptr->cdata); +} + -+static void rk_ahash_ctx_clear(struct rk_ahash_ctx *ctx) ++/* ++ * Calculate destination buffer length for authenticated encryption. The ++ * expectation is that user-space code allocates exactly the same space for ++ * destination buffer before calling cryptodev. The result is cipher-dependent. ++ */ ++static int cryptodev_get_dst_len(struct crypt_auth_op *caop, struct csession *ses_ptr) +{ -+ rk_alg_ctx_clear(&ctx->algs_ctx); -+ -+ memset(ctx->hash_tmp, 0x00, RK_DMA_ALIGNMENT); -+ memset(ctx->lastc, 0x00, sizeof(ctx->lastc)); ++ int dst_len = caop->len; ++ if (caop->op == COP_DECRYPT) ++ return dst_len; + -+ ctx->hash_tmp_len = 0; -+ ctx->calc_cnt = 0; -+ ctx->lastc_len = 0; -+} ++ if (caop->flags & COP_FLAG_AEAD_RK_TYPE) ++ return dst_len; + -+struct rk_ahash_ctx *rk_ahash_ctx_cast(struct rk_crypto_dev *rk_dev) -+{ -+ struct ahash_request *req = ahash_request_cast(rk_dev->async_req); -+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ dst_len += caop->tag_len; + -+ return crypto_ahash_ctx(tfm); -+} ++ /* for TLS always add some padding so the total length is rounded to ++ * cipher block size */ ++ if (caop->flags & COP_FLAG_AEAD_TLS_TYPE) { ++ int bs = ses_ptr->cdata.blocksize; ++ dst_len += bs - (dst_len % bs); ++ } + -+struct rk_alg_ctx *rk_ahash_alg_ctx(struct rk_crypto_dev *rk_dev) -+{ -+ return &(rk_ahash_ctx_cast(rk_dev))->algs_ctx; ++ return dst_len; +} + -+struct rk_crypto_algt *rk_ahash_get_algt(struct crypto_ahash *tfm) ++static int fill_kcaop_from_caop(struct kernel_crypt_auth_op *kcaop, struct fcrypt *fcr) +{ -+ struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg); -+ -+ return container_of(alg, struct rk_crypto_algt, alg.hash); -+} ++ struct crypt_auth_op *caop = &kcaop->caop; ++ struct csession *ses_ptr; ++ int ret; + -+static int rk_ahash_set_data_start(struct rk_crypto_dev *rk_dev, uint32_t flag) -+{ -+ int err; -+ struct rk_alg_ctx *alg_ctx = rk_ahash_alg_ctx(rk_dev); ++ /* this also enters ses_ptr->sem */ ++ ses_ptr = crypto_get_session_by_sid(fcr, caop->ses); ++ if (unlikely(!ses_ptr)) { ++ derr(1, "invalid session ID=0x%08X", caop->ses); ++ return -EINVAL; ++ } + -+ CRYPTO_TRACE(); ++ if (caop->flags & COP_FLAG_AEAD_TLS_TYPE || caop->flags & COP_FLAG_AEAD_SRTP_TYPE) { ++ if (caop->src != caop->dst) { ++ derr(1, "Non-inplace encryption and decryption is not efficient and not implemented"); ++ ret = -EINVAL; ++ goto out_unlock; ++ } ++ } + -+ err = rk_dev->load_data(rk_dev, alg_ctx->sg_src, alg_ctx->sg_dst); -+ if (!err) -+ err = alg_ctx->ops.hw_dma_start(rk_dev, flag); ++ if (caop->tag_len == 0) ++ caop->tag_len = cryptodev_get_tag_len(ses_ptr); + -+ return err; -+} ++ kcaop->ivlen = caop->iv ? ses_ptr->cdata.ivsize : 0; ++ kcaop->dst_len = cryptodev_get_dst_len(caop, ses_ptr); ++ kcaop->task = current; ++ kcaop->mm = current->mm; + -+static u32 rk_calc_lastc_new_len(u32 nbytes, u32 old_len) -+{ -+ u32 total_len = nbytes + old_len; ++ if (caop->iv) { ++ ret = copy_from_user(kcaop->iv, caop->iv, kcaop->ivlen); ++ if (unlikely(ret)) { ++ derr(1, "error copying IV (%d bytes), copy_from_user returned %d for address %p", ++ kcaop->ivlen, ret, caop->iv); ++ ret = -EFAULT; ++ goto out_unlock; ++ } ++ } + -+ if (total_len <= RK_DMA_ALIGNMENT) -+ return nbytes; ++ ret = 0; + -+ if (total_len % RK_DMA_ALIGNMENT) -+ return total_len % RK_DMA_ALIGNMENT; ++out_unlock: ++ crypto_put_session(ses_ptr); ++ return ret; + -+ return RK_DMA_ALIGNMENT; +} + -+static int rk_ahash_fallback_digest(const char *alg_name, bool is_hmac, -+ const u8 *key, u32 key_len, -+ const u8 *msg, u32 msg_len, -+ u8 *digest) ++static int fill_caop_from_kcaop(struct kernel_crypt_auth_op *kcaop, struct fcrypt *fcr) +{ -+ struct crypto_ahash *ahash_tfm; -+ struct ahash_request *req; -+ struct crypto_wait wait; -+ struct scatterlist sg; + int ret; + -+ CRYPTO_TRACE("%s, is_hmac = %d, key_len = %u, msg_len = %u", -+ alg_name, is_hmac, key_len, msg_len); -+ -+ ahash_tfm = crypto_alloc_ahash(alg_name, 0, CRYPTO_ALG_NEED_FALLBACK); -+ if (IS_ERR(ahash_tfm)) -+ return PTR_ERR(ahash_tfm); ++ kcaop->caop.len = kcaop->dst_len; + -+ req = ahash_request_alloc(ahash_tfm, GFP_KERNEL); -+ if (!req) { -+ crypto_free_ahash(ahash_tfm); -+ return -ENOMEM; ++ if (kcaop->ivlen && kcaop->caop.flags & COP_FLAG_WRITE_IV) { ++ ret = copy_to_user(kcaop->caop.iv, ++ kcaop->iv, kcaop->ivlen); ++ if (unlikely(ret)) { ++ derr(1, "Error in copying to userspace"); ++ return -EFAULT; ++ } + } ++ return 0; ++} + -+ init_completion(&wait.completion); -+ -+ ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, -+ crypto_req_done, &wait); + -+ crypto_ahash_clear_flags(ahash_tfm, ~0); ++int cryptodev_kcaop_from_user(struct kernel_crypt_auth_op *kcaop, ++ struct fcrypt *fcr, void __user *arg) ++{ ++ if (unlikely(copy_from_user(&kcaop->caop, arg, sizeof(kcaop->caop)))) { ++ derr(1, "Error in copying from userspace"); ++ return -EFAULT; ++ } + -+ sg_init_one(&sg, msg, msg_len); -+ ahash_request_set_crypt(req, &sg, digest, msg_len); ++ return fill_kcaop_from_caop(kcaop, fcr); ++} + -+ if (is_hmac) -+ crypto_ahash_setkey(ahash_tfm, key, key_len); ++int cryptodev_kcaop_to_user(struct kernel_crypt_auth_op *kcaop, ++ struct fcrypt *fcr, void __user *arg) ++{ ++ int ret; + -+ ret = crypto_wait_req(crypto_ahash_digest(req), &wait); -+ if (ret) { -+ CRYPTO_MSG("digest failed, ret = %d", ret); -+ goto exit; ++ ret = fill_caop_from_kcaop(kcaop, fcr); ++ if (unlikely(ret)) { ++ derr(1, "fill_caop_from_kcaop"); ++ return ret; + } + -+exit: -+ ahash_request_free(req); -+ crypto_free_ahash(ahash_tfm); -+ -+ return ret; ++ if (unlikely(copy_to_user(arg, &kcaop->caop, sizeof(kcaop->caop)))) { ++ derr(1, "Error in copying to userspace"); ++ return -EFAULT; ++ } ++ return 0; +} + -+static int rk_ahash_get_zero_result(struct ahash_request *req) ++/* compatibility code for 32bit userlands */ ++#ifdef CONFIG_COMPAT ++ ++static inline void ++compat_to_crypt_auth_op(struct compat_crypt_auth_op *compat, struct crypt_auth_op *caop) +{ -+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -+ struct rk_crypto_algt *algt = rk_ahash_get_algt(tfm); -+ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); ++ caop->ses = compat->ses; ++ caop->op = compat->op; ++ caop->flags = compat->flags; ++ caop->len = compat->len; ++ caop->auth_len = compat->auth_len; ++ caop->tag_len = compat->tag_len; ++ caop->iv_len = compat->iv_len; + -+ return rk_ahash_fallback_digest(crypto_ahash_alg_name(tfm), -+ algt->type == ALG_TYPE_HMAC, -+ ctx->authkey, ctx->authkey_len, -+ NULL, 0, req->result); ++ caop->auth_src = compat_ptr(compat->auth_src); ++ caop->src = compat_ptr(compat->src); ++ caop->dst = compat_ptr(compat->dst); ++ caop->tag = compat_ptr(compat->tag); ++ caop->iv = compat_ptr(compat->iv); +} + -+int rk_ahash_hmac_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen) ++static inline void ++crypt_auth_op_to_compat(struct crypt_auth_op *caop, struct compat_crypt_auth_op *compat) +{ -+ unsigned int blocksize = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); -+ struct rk_crypto_algt *algt = rk_ahash_get_algt(tfm); -+ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); -+ const char *alg_name; -+ int ret = 0; -+ -+ CRYPTO_MSG(); ++ compat->ses = caop->ses; ++ compat->op = caop->op; ++ compat->flags = caop->flags; ++ compat->len = caop->len; ++ compat->auth_len = caop->auth_len; ++ compat->tag_len = caop->tag_len; ++ compat->iv_len = caop->iv_len; + -+ if (algt->algo >= ARRAY_SIZE(hash_algo2name)) { -+ CRYPTO_MSG("hash algo %d invalid\n", algt->algo); -+ return -EINVAL; -+ } ++ compat->auth_src = ptr_to_compat(caop->auth_src); ++ compat->src = ptr_to_compat(caop->src); ++ compat->dst = ptr_to_compat(caop->dst); ++ compat->tag = ptr_to_compat(caop->tag); ++ compat->iv = ptr_to_compat(caop->iv); ++} + -+ memset(ctx->authkey, 0, sizeof(ctx->authkey)); ++int compat_kcaop_from_user(struct kernel_crypt_auth_op *kcaop, ++ struct fcrypt *fcr, void __user *arg) ++{ ++ int ret; ++ struct compat_crypt_auth_op compat_auth_cop; + -+ if (keylen <= blocksize) { -+ memcpy(ctx->authkey, key, keylen); -+ ctx->authkey_len = keylen; -+ goto exit; ++ ret = copy_from_user(&compat_auth_cop, arg, sizeof(compat_auth_cop)); ++ if (unlikely(ret)) { ++ derr(1, "Error in copying from userspace"); ++ return -EFAULT; + } + -+ alg_name = hash_algo2name[algt->algo]; -+ -+ CRYPTO_TRACE("calc key digest %s", alg_name); -+ -+ ret = rk_ahash_fallback_digest(alg_name, false, NULL, 0, key, keylen, -+ ctx->authkey); -+ if (ret) { -+ CRYPTO_MSG("rk_ahash_fallback_digest error ret = %d\n", ret); -+ goto exit; -+ } ++ compat_to_crypt_auth_op(&compat_auth_cop, &kcaop->caop); + -+ ctx->authkey_len = crypto_ahash_digestsize(tfm); -+exit: -+ return ret; ++ return fill_kcaop_from_caop(kcaop, fcr); +} + -+int rk_ahash_init(struct ahash_request *req) ++int compat_kcaop_to_user(struct kernel_crypt_auth_op *kcaop, ++ struct fcrypt *fcr, void __user *arg) +{ -+ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -+ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); ++ int ret; ++ struct compat_crypt_auth_op compat_auth_cop; + -+ CRYPTO_TRACE(); ++ ret = fill_caop_from_kcaop(kcaop, fcr); ++ if (unlikely(ret)) { ++ derr(1, "fill_caop_from_kcaop"); ++ return ret; ++ } + -+ memset(rctx, 0x00, sizeof(*rctx)); -+ rk_ahash_ctx_clear(ctx); ++ crypt_auth_op_to_compat(&kcaop->caop, &compat_auth_cop); + ++ if (unlikely(copy_to_user(arg, &compat_auth_cop, sizeof(compat_auth_cop)))) { ++ derr(1, "Error in copying to userspace"); ++ return -EFAULT; ++ } + return 0; +} + -+int rk_ahash_update(struct ahash_request *req) -+{ -+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -+ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); -+ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -+ struct rk_crypto_dev *rk_dev = ctx->rk_dev; -+ -+ CRYPTO_TRACE("nbytes = %u", req->nbytes); -+ -+ memset(rctx, 0x00, sizeof(*rctx)); -+ -+ rctx->flag = RK_FLAG_UPDATE; ++#endif /* CONFIG_COMPAT */ + -+ return rk_dev->enqueue(rk_dev, &req->base); ++static void copy_tls_hash(struct scatterlist *dst_sg, int len, void *hash, int hash_len) ++{ ++ scatterwalk_map_and_copy(hash, dst_sg, len, hash_len, 1); +} + -+int rk_ahash_final(struct ahash_request *req) ++static void read_tls_hash(struct scatterlist *dst_sg, int len, void *hash, int hash_len) +{ -+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -+ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); -+ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -+ struct rk_crypto_dev *rk_dev = ctx->rk_dev; -+ -+ CRYPTO_TRACE(); ++ scatterwalk_map_and_copy(hash, dst_sg, len - hash_len, hash_len, 0); ++} + -+ memset(rctx, 0x00, sizeof(*rctx)); ++#define TLS_MAX_PADDING_SIZE 256 ++static int pad_record(struct scatterlist *dst_sg, int len, int block_size) ++{ ++ uint8_t pad[TLS_MAX_PADDING_SIZE]; ++ int pad_size = block_size - (len % block_size); + -+ rctx->flag = RK_FLAG_FINAL; ++ memset(pad, pad_size - 1, pad_size); + -+ /* use fallback hash */ -+ if (ctx->calc_cnt == 0 && -+ ctx->hash_tmp_len == 0 && -+ ctx->lastc_len == 0) { -+ CRYPTO_TRACE("use fallback hash"); -+ return rk_ahash_get_zero_result(req); -+ } ++ scatterwalk_map_and_copy(pad, dst_sg, len, pad_size, 1); + -+ return rk_dev->enqueue(rk_dev, &req->base); ++ return pad_size; +} + -+int rk_ahash_finup(struct ahash_request *req) ++static int verify_tls_record_pad(struct scatterlist *dst_sg, int len, int block_size) +{ -+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -+ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); -+ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -+ struct rk_crypto_dev *rk_dev = ctx->rk_dev; -+ -+ CRYPTO_TRACE("nbytes = %u", req->nbytes); -+ -+ memset(rctx, 0x00, sizeof(*rctx)); ++ uint8_t pad[TLS_MAX_PADDING_SIZE]; ++ uint8_t pad_size; ++ int i; + -+ rctx->flag = RK_FLAG_UPDATE | RK_FLAG_FINAL; ++ scatterwalk_map_and_copy(&pad_size, dst_sg, len - 1, 1, 0); + -+ /* use fallback hash */ -+ if (req->nbytes == 0 && -+ ctx->calc_cnt == 0 && -+ ctx->hash_tmp_len == 0 && -+ ctx->lastc_len == 0) { -+ CRYPTO_TRACE("use fallback hash"); -+ return rk_ahash_get_zero_result(req); ++ if (pad_size + 1 > len) { ++ derr(1, "Pad size: %d", pad_size); ++ return -EBADMSG; + } + -+ return rk_dev->enqueue(rk_dev, &req->base); -+} ++ scatterwalk_map_and_copy(pad, dst_sg, len - pad_size - 1, pad_size + 1, 0); + -+int rk_ahash_digest(struct ahash_request *req) -+{ -+ CRYPTO_TRACE("calc data %u bytes.", req->nbytes); ++ for (i = 0; i < pad_size; i++) ++ if (pad[i] != pad_size) { ++ derr(1, "Pad size: %u, pad: %d", pad_size, pad[i]); ++ return -EBADMSG; ++ } + -+ return rk_ahash_init(req) ?: rk_ahash_finup(req); ++ return pad_size + 1; +} + -+int rk_ahash_start(struct rk_crypto_dev *rk_dev) ++/* Authenticate and encrypt the TLS way (also perform padding). ++ * During decryption it verifies the pad and tag and returns -EBADMSG on error. ++ */ ++static int ++tls_auth_n_crypt(struct csession *ses_ptr, struct kernel_crypt_auth_op *kcaop, ++ struct scatterlist *auth_sg, uint32_t auth_len, ++ struct scatterlist *dst_sg, uint32_t len) +{ -+ struct ahash_request *req = ahash_request_cast(rk_dev->async_req); -+ struct rk_alg_ctx *alg_ctx = rk_ahash_alg_ctx(rk_dev); -+ struct rk_ahash_ctx *ctx = rk_ahash_ctx_cast(rk_dev); -+ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -+ struct rk_crypto_algt *algt = rk_ahash_get_algt(tfm); -+ struct scatterlist *src_sg; -+ unsigned int nbytes; -+ int ret = 0; -+ -+ CRYPTO_TRACE("origin: old_len = %u, new_len = %u, nbytes = %u, flag = %d", -+ ctx->hash_tmp_len, ctx->lastc_len, req->nbytes, rctx->flag); -+ -+ /* update 0Byte do nothing */ -+ if (req->nbytes == 0 && !(rctx->flag & RK_FLAG_FINAL)) -+ goto no_calc; -+ -+ if (ctx->lastc_len) { -+ /* move lastc saved last time to the head of this calculation */ -+ memcpy(ctx->hash_tmp + ctx->hash_tmp_len, ctx->lastc, ctx->lastc_len); -+ ctx->hash_tmp_len = ctx->hash_tmp_len + ctx->lastc_len; -+ ctx->lastc_len = 0; -+ } -+ -+ CRYPTO_TRACE("hash_tmp_len = %u", ctx->hash_tmp_len); ++ int ret, fail = 0; ++ struct crypt_auth_op *caop = &kcaop->caop; ++ uint8_t vhash[AALG_MAX_RESULT_LEN]; ++ uint8_t hash_output[AALG_MAX_RESULT_LEN]; + -+ /* final request no need to save lastc_new */ -+ if ((rctx->flag & RK_FLAG_UPDATE) && (rctx->flag & RK_FLAG_FINAL)) { -+ nbytes = req->nbytes + ctx->hash_tmp_len; ++ /* TLS authenticates the plaintext except for the padding. ++ */ ++ if (caop->op == COP_ENCRYPT) { ++ if (ses_ptr->hdata.init != 0) { ++ if (auth_len > 0) { ++ ret = cryptodev_hash_update(&ses_ptr->hdata, ++ auth_sg, auth_len); ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_hash_update: %d", ret); ++ return ret; ++ } ++ } + -+ CRYPTO_TRACE("finup %u bytes", nbytes); -+ } else if (rctx->flag & RK_FLAG_UPDATE) { -+ ctx->lastc_len = rk_calc_lastc_new_len(req->nbytes, ctx->hash_tmp_len); ++ if (len > 0) { ++ ret = cryptodev_hash_update(&ses_ptr->hdata, ++ dst_sg, len); ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_hash_update: %d", ret); ++ return ret; ++ } ++ } + -+ CRYPTO_TRACE("nents = %u, ctx->lastc_len = %u, offset = %u", -+ sg_nents_for_len(req->src, req->nbytes), ctx->lastc_len, -+ req->nbytes - ctx->lastc_len); ++ ret = cryptodev_hash_final(&ses_ptr->hdata, hash_output); ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_hash_final: %d", ret); ++ return ret; ++ } + -+ if (!sg_pcopy_to_buffer(req->src, sg_nents_for_len(req->src, req->nbytes), -+ ctx->lastc, ctx->lastc_len, req->nbytes - ctx->lastc_len)) { -+ ret = -EINVAL; -+ goto exit; ++ copy_tls_hash(dst_sg, len, hash_output, caop->tag_len); ++ len += caop->tag_len; + } + -+ nbytes = ctx->hash_tmp_len + req->nbytes - ctx->lastc_len; ++ if (ses_ptr->cdata.init != 0) { ++ if (ses_ptr->cdata.blocksize > 1) { ++ ret = pad_record(dst_sg, len, ses_ptr->cdata.blocksize); ++ len += ret; ++ } + -+ /* not enough data */ -+ if (nbytes < RK_DMA_ALIGNMENT) { -+ CRYPTO_TRACE("nbytes = %u, not enough data", nbytes); -+ memcpy(ctx->hash_tmp + ctx->hash_tmp_len, -+ ctx->lastc, ctx->lastc_len); -+ ctx->hash_tmp_len = ctx->hash_tmp_len + ctx->lastc_len; -+ ctx->lastc_len = 0; -+ goto no_calc; ++ ret = cryptodev_cipher_encrypt(&ses_ptr->cdata, ++ dst_sg, dst_sg, len); ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_cipher_encrypt: %d", ret); ++ return ret; ++ } + } -+ -+ CRYPTO_TRACE("update nbytes = %u", nbytes); + } else { -+ /* final just calc lastc_old */ -+ nbytes = ctx->hash_tmp_len; -+ -+ CRYPTO_TRACE("final nbytes = %u", nbytes); -+ } ++ if (ses_ptr->cdata.init != 0) { ++ ret = cryptodev_cipher_decrypt(&ses_ptr->cdata, ++ dst_sg, dst_sg, len); + -+ if (ctx->hash_tmp_len) { -+ /* Concatenate old data to the header */ -+ sg_init_table(ctx->hash_sg, ARRAY_SIZE(ctx->hash_sg)); -+ sg_set_buf(ctx->hash_sg, ctx->hash_tmp, ctx->hash_tmp_len); ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_cipher_decrypt: %d", ret); ++ return ret; ++ } + -+ if (rk_crypto_check_dmafd(req->src, sg_nents_for_len(req->src, req->nbytes))) { -+ CRYPTO_TRACE("is hash dmafd"); -+ if (!dma_map_sg(rk_dev->dev, &ctx->hash_sg[0], 1, DMA_TO_DEVICE)) { -+ dev_err(rk_dev->dev, "[%s:%d] dma_map_sg(hash_sg) error\n", -+ __func__, __LINE__); -+ ret = -ENOMEM; -+ goto exit; ++ if (ses_ptr->cdata.blocksize > 1) { ++ ret = verify_tls_record_pad(dst_sg, len, ses_ptr->cdata.blocksize); ++ if (unlikely(ret < 0)) { ++ derr(2, "verify_record_pad: %d", ret); ++ fail = 1; ++ } else { ++ len -= ret; ++ } + } -+ ctx->hash_tmp_mapped = true; + } + -+ sg_chain(ctx->hash_sg, ARRAY_SIZE(ctx->hash_sg), req->src); ++ if (ses_ptr->hdata.init != 0) { ++ if (unlikely(caop->tag_len > sizeof(vhash) || caop->tag_len > len)) { ++ derr(1, "Illegal tag len size"); ++ return -EINVAL; ++ } + -+ src_sg = &ctx->hash_sg[0]; -+ ctx->hash_tmp_len = 0; -+ } else { -+ src_sg = req->src; -+ } ++ read_tls_hash(dst_sg, len, vhash, caop->tag_len); ++ len -= caop->tag_len; + -+ alg_ctx->total = nbytes; -+ alg_ctx->left_bytes = nbytes; -+ alg_ctx->sg_src = src_sg; -+ alg_ctx->req_src = src_sg; -+ alg_ctx->src_nents = sg_nents_for_len(src_sg, nbytes); ++ if (auth_len > 0) { ++ ret = cryptodev_hash_update(&ses_ptr->hdata, ++ auth_sg, auth_len); ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_hash_update: %d", ret); ++ return ret; ++ } ++ } + -+ CRYPTO_TRACE("adjust: old_len = %u, new_len = %u, nbytes = %u", -+ ctx->hash_tmp_len, ctx->lastc_len, nbytes); ++ if (len > 0) { ++ ret = cryptodev_hash_update(&ses_ptr->hdata, ++ dst_sg, len); ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_hash_update: %d", ret); ++ return ret; ++ } ++ } + -+ if (nbytes) { -+ if (ctx->calc_cnt == 0) -+ alg_ctx->ops.hw_init(rk_dev, algt->algo, algt->type); ++ ret = cryptodev_hash_final(&ses_ptr->hdata, hash_output); ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_hash_final: %d", ret); ++ return ret; ++ } + -+ /* flush all 64byte key buffer for hmac */ -+ alg_ctx->ops.hw_write_key(ctx->rk_dev, ctx->authkey, sizeof(ctx->authkey)); -+ ret = rk_ahash_set_data_start(rk_dev, rctx->flag); ++ if (memcmp(vhash, hash_output, caop->tag_len) != 0 || fail != 0) { ++ derr(2, "MAC verification failed (tag_len: %d)", caop->tag_len); ++ return -EBADMSG; ++ } ++ } + } -+exit: -+ return ret; -+no_calc: -+ CRYPTO_TRACE("no calc"); -+ rk_alg_ctx_clear(alg_ctx); -+ ++ kcaop->dst_len = len; + return 0; +} + -+int rk_ahash_crypto_rx(struct rk_crypto_dev *rk_dev) ++/* Authenticate and encrypt the SRTP way. During decryption ++ * it verifies the tag and returns -EBADMSG on error. ++ */ ++static int ++srtp_auth_n_crypt(struct csession *ses_ptr, struct kernel_crypt_auth_op *kcaop, ++ struct scatterlist *auth_sg, uint32_t auth_len, ++ struct scatterlist *dst_sg, uint32_t len) +{ -+ int err = 0; -+ struct ahash_request *req = ahash_request_cast(rk_dev->async_req); -+ struct rk_alg_ctx *alg_ctx = rk_ahash_alg_ctx(rk_dev); -+ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -+ struct rk_ahash_ctx *ctx = rk_ahash_ctx_cast(rk_dev); -+ -+ CRYPTO_TRACE("left bytes = %u, flag = %d", alg_ctx->left_bytes, rctx->flag); -+ -+ err = rk_dev->unload_data(rk_dev); -+ if (err) -+ goto out_rx; -+ -+ ctx->calc_cnt += alg_ctx->count; ++ int ret, fail = 0; ++ struct crypt_auth_op *caop = &kcaop->caop; ++ uint8_t vhash[AALG_MAX_RESULT_LEN]; ++ uint8_t hash_output[AALG_MAX_RESULT_LEN]; + -+ if (alg_ctx->left_bytes) { -+ if (alg_ctx->aligned) { -+ if (sg_is_last(alg_ctx->sg_src)) { -+ dev_warn(rk_dev->dev, "[%s:%d], Lack of data\n", -+ __func__, __LINE__); -+ err = -ENOMEM; -+ goto out_rx; ++ /* SRTP authenticates the encrypted data. ++ */ ++ if (caop->op == COP_ENCRYPT) { ++ if (ses_ptr->cdata.init != 0) { ++ ret = cryptodev_cipher_encrypt(&ses_ptr->cdata, ++ dst_sg, dst_sg, len); ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_cipher_encrypt: %d", ret); ++ return ret; + } -+ alg_ctx->sg_src = sg_next(alg_ctx->sg_src); + } -+ err = rk_ahash_set_data_start(rk_dev, rctx->flag); -+ } else { -+ /* -+ * it will take some time to process date after last dma -+ * transmission. -+ */ -+ struct crypto_ahash *tfm; + -+ if (ctx->hash_tmp_mapped) -+ dma_unmap_sg(rk_dev->dev, &ctx->hash_sg[0], 1, DMA_TO_DEVICE); ++ if (ses_ptr->hdata.init != 0) { ++ if (auth_len > 0) { ++ ret = cryptodev_hash_update(&ses_ptr->hdata, ++ auth_sg, auth_len); ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_hash_update: %d", ret); ++ return ret; ++ } ++ } + -+ /* only final will get result */ -+ if (!(rctx->flag & RK_FLAG_FINAL)) -+ goto out_rx; ++ ret = cryptodev_hash_final(&ses_ptr->hdata, hash_output); ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_hash_final: %d", ret); ++ return ret; ++ } + -+ if (!req->result) { -+ err = -EINVAL; -+ goto out_rx; ++ if (unlikely(copy_to_user(caop->tag, hash_output, caop->tag_len))) ++ return -EFAULT; + } + -+ tfm = crypto_ahash_reqtfm(req); ++ } else { ++ if (ses_ptr->hdata.init != 0) { ++ if (unlikely(caop->tag_len > sizeof(vhash) || caop->tag_len > len)) { ++ derr(1, "Illegal tag len size"); ++ return -EINVAL; ++ } + -+ err = alg_ctx->ops.hw_get_result(rk_dev, req->result, -+ crypto_ahash_digestsize(tfm)); -+ } ++ if (unlikely(copy_from_user(vhash, caop->tag, caop->tag_len))) ++ return -EFAULT; + -+out_rx: -+ return err; -+} -diff --git a/drivers/crypto/rockchip/rk_crypto_ahash_utils.h b/drivers/crypto/rockchip/rk_crypto_ahash_utils.h -new file mode 100644 -index 000000000..46afd98a0 ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_ahash_utils.h -@@ -0,0 +1,35 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+ -+/* Copyright (c) 2022 Rockchip Electronics Co. Ltd. */ -+ -+#ifndef __RK_CRYPTO_AHASH_UTILS_H__ -+#define __RK_CRYPTO_AHASH_UTILS_H__ -+ -+#include -+ -+#include "rk_crypto_core.h" -+#include "rk_crypto_utils.h" ++ ret = cryptodev_hash_update(&ses_ptr->hdata, ++ auth_sg, auth_len); ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_hash_update: %d", ret); ++ return ret; ++ } + -+struct rk_alg_ctx *rk_ahash_alg_ctx(struct rk_crypto_dev *rk_dev); ++ ret = cryptodev_hash_final(&ses_ptr->hdata, hash_output); ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_hash_final: %d", ret); ++ return ret; ++ } + -+struct rk_crypto_algt *rk_ahash_get_algt(struct crypto_ahash *tfm); ++ if (memcmp(vhash, hash_output, caop->tag_len) != 0 || fail != 0) { ++ derr(2, "MAC verification failed"); ++ return -EBADMSG; ++ } ++ } + -+struct rk_ahash_ctx *rk_ahash_ctx_cast(struct rk_crypto_dev *rk_dev); ++ if (ses_ptr->cdata.init != 0) { ++ ret = cryptodev_cipher_decrypt(&ses_ptr->cdata, ++ dst_sg, dst_sg, len); + -+int rk_ahash_hmac_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen); ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_cipher_decrypt: %d", ret); ++ return ret; ++ } ++ } + -+int rk_ahash_init(struct ahash_request *req); ++ } ++ kcaop->dst_len = len; ++ return 0; ++} + -+int rk_ahash_update(struct ahash_request *req); ++static int rk_auth_n_crypt(struct csession *ses_ptr, struct kernel_crypt_auth_op *kcaop, ++ struct scatterlist *auth_sg, uint32_t auth_len, ++ struct scatterlist *src_sg, ++ struct scatterlist *dst_sg, uint32_t len) ++{ ++ int ret; ++ struct crypt_auth_op *caop = &kcaop->caop; ++ int max_tag_len; + -+int rk_ahash_final(struct ahash_request *req); ++ max_tag_len = cryptodev_cipher_get_tag_size(&ses_ptr->cdata); ++ if (unlikely(caop->tag_len > max_tag_len)) { ++ derr(0, "Illegal tag length: %d", caop->tag_len); ++ return -EINVAL; ++ } + -+int rk_ahash_finup(struct ahash_request *req); ++ if (caop->tag_len) ++ cryptodev_cipher_set_tag_size(&ses_ptr->cdata, caop->tag_len); ++ else ++ caop->tag_len = max_tag_len; + -+int rk_ahash_digest(struct ahash_request *req); ++ cryptodev_cipher_auth(&ses_ptr->cdata, auth_sg, auth_len); + -+int rk_ahash_crypto_rx(struct rk_crypto_dev *rk_dev); ++ if (caop->op == COP_ENCRYPT) { ++ ret = cryptodev_cipher_encrypt(&ses_ptr->cdata, src_sg, dst_sg, len); ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_cipher_encrypt: %d", ret); ++ return ret; ++ } ++ } else { ++ ret = cryptodev_cipher_decrypt(&ses_ptr->cdata, src_sg, dst_sg, len); ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_cipher_decrypt: %d", ret); ++ return ret; ++ } ++ } + -+int rk_ahash_start(struct rk_crypto_dev *rk_dev); ++ return 0; ++} + -+#endif -diff --git a/drivers/crypto/rockchip/rk_crypto_bignum.c b/drivers/crypto/rockchip/rk_crypto_bignum.c -new file mode 100644 -index 000000000..690c2fdf5 ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_bignum.c -@@ -0,0 +1,130 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * bignum support for Rockchip crypto -+ * -+ * Copyright (c) 2020 Rockchip Electronics Co., Ltd. -+ * -+ * Author: Lin Jinhan -+ * ++/* Typical AEAD (i.e. GCM) encryption/decryption. ++ * During decryption the tag is verified. + */ -+#include -+ -+#include "rk_crypto_bignum.h" -+ -+#define DEFAULT_ENDIAN RK_BG_LITTILE_ENDIAN -+ -+#define BYTES2WORDS(bytes) (round_up((bytes), sizeof(u32)) / sizeof(u32)) -+#define WORDS2BYTES(words) ((words) * sizeof(u32)) -+#define RK_WORD_SIZE 32 -+ -+static void rk_reverse_memcpy(void *dst, const void *src, u32 size) ++static int ++auth_n_crypt(struct csession *ses_ptr, struct kernel_crypt_auth_op *kcaop, ++ struct scatterlist *auth_sg, uint32_t auth_len, ++ struct scatterlist *src_sg, ++ struct scatterlist *dst_sg, uint32_t len) +{ -+ char *_dst = (char *)dst, *_src = (char *)src; -+ u32 i; ++ int ret; ++ struct crypt_auth_op *caop = &kcaop->caop; ++ int max_tag_len; + -+ if (!dst || !src || !size) -+ return; ++ max_tag_len = cryptodev_cipher_get_tag_size(&ses_ptr->cdata); ++ if (unlikely(caop->tag_len > max_tag_len)) { ++ derr(0, "Illegal tag length: %d", caop->tag_len); ++ return -EINVAL; ++ } + -+ for (i = 0; i < size; ++i) -+ _dst[size - i - 1] = _src[i]; -+} ++ if (caop->tag_len) ++ cryptodev_cipher_set_tag_size(&ses_ptr->cdata, caop->tag_len); ++ else ++ caop->tag_len = max_tag_len; + -+struct rk_bignum *rk_bn_alloc(u32 max_size) -+{ -+ struct rk_bignum *bn; ++ cryptodev_cipher_auth(&ses_ptr->cdata, auth_sg, auth_len); + -+ bn = kzalloc(sizeof(*bn), GFP_KERNEL); -+ if (!bn) -+ return NULL; ++ if (caop->op == COP_ENCRYPT) { ++ ret = cryptodev_cipher_encrypt(&ses_ptr->cdata, ++ src_sg, dst_sg, len); ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_cipher_encrypt: %d", ret); ++ return ret; ++ } ++ kcaop->dst_len = len + caop->tag_len; ++ caop->tag = caop->dst + len; ++ } else { ++ ret = cryptodev_cipher_decrypt(&ses_ptr->cdata, ++ src_sg, dst_sg, len); + -+ bn->data = kzalloc(round_up(max_size, sizeof(u32)), GFP_KERNEL); -+ if (!bn->data) { -+ kfree(bn); -+ return NULL; ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_cipher_decrypt: %d", ret); ++ return ret; ++ } ++ kcaop->dst_len = len - caop->tag_len; ++ caop->tag = caop->dst + len - caop->tag_len; + } + -+ bn->n_words = BYTES2WORDS(max_size); -+ -+ return bn; ++ return 0; +} + -+void rk_bn_free(struct rk_bignum *bn) ++static int crypto_auth_zc_srtp(struct csession *ses_ptr, struct kernel_crypt_auth_op *kcaop) +{ -+ if (!bn) -+ return; ++ struct scatterlist *dst_sg, *auth_sg; ++ struct crypt_auth_op *caop = &kcaop->caop; ++ int ret; + -+ if (bn->data) { -+ memset(bn->data, 0x00, WORDS2BYTES(bn->n_words)); -+ kfree(bn->data); ++ if (unlikely(ses_ptr->cdata.init != 0 && ++ (ses_ptr->cdata.stream == 0 || ses_ptr->cdata.aead != 0))) { ++ derr(0, "Only stream modes are allowed in SRTP mode (but not AEAD)"); ++ return -EINVAL; + } + -+ kfree(bn); -+} -+ -+int rk_bn_set_data(struct rk_bignum *bn, const u8 *data, u32 size, enum bignum_endian endian) -+{ -+ if (!bn || !data) -+ return -EINVAL; ++ ret = get_userbuf_srtp(ses_ptr, kcaop, &auth_sg, &dst_sg); ++ if (unlikely(ret)) { ++ derr(1, "get_userbuf_srtp(): Error getting user pages."); ++ return ret; ++ } + -+ if (BYTES2WORDS(size) > bn->n_words) -+ return -EINVAL; ++ ret = srtp_auth_n_crypt(ses_ptr, kcaop, auth_sg, caop->auth_len, ++ dst_sg, caop->len); + -+ if (endian == DEFAULT_ENDIAN) -+ memcpy(bn->data, data, size); -+ else -+ rk_reverse_memcpy(bn->data, data, size); ++ cryptodev_release_user_pages(ses_ptr); + -+ return 0; ++ return ret; +} + -+int rk_bn_get_data(const struct rk_bignum *bn, u8 *data, u32 size, enum bignum_endian endian) ++static int crypto_auth_zc_tls(struct csession *ses_ptr, struct kernel_crypt_auth_op *kcaop) +{ -+ if (!bn || !data) -+ return -EINVAL; ++ struct crypt_auth_op *caop = &kcaop->caop; ++ struct scatterlist *dst_sg, *auth_sg; ++ unsigned char *auth_buf = NULL; ++ struct scatterlist tmp; ++ int ret; + -+ if (size < WORDS2BYTES(bn->n_words)) ++ if (unlikely(caop->auth_len > PAGE_SIZE)) { ++ derr(1, "auth data len is excessive."); + return -EINVAL; ++ } + -+ memset(data, 0x00, size); ++ auth_buf = (char *)__get_free_page(GFP_KERNEL); ++ if (unlikely(!auth_buf)) { ++ derr(1, "unable to get a free page."); ++ return -ENOMEM; ++ } + -+ if (endian == DEFAULT_ENDIAN) -+ memcpy(data + size - WORDS2BYTES(bn->n_words), bn->data, bn->n_words); -+ else -+ rk_reverse_memcpy(data + size - WORDS2BYTES(bn->n_words), -+ bn->data, WORDS2BYTES(bn->n_words)); ++ if (caop->auth_src && caop->auth_len > 0) { ++ if (unlikely(copy_from_user(auth_buf, caop->auth_src, caop->auth_len))) { ++ derr(1, "unable to copy auth data from userspace."); ++ ret = -EFAULT; ++ goto free_auth_buf; ++ } + -+ return 0; -+} ++ sg_init_one(&tmp, auth_buf, caop->auth_len); ++ auth_sg = &tmp; ++ } else { ++ auth_sg = NULL; ++ } + -+u32 rk_bn_get_size(const struct rk_bignum *bn) -+{ -+ if (!bn) -+ return 0; ++ ret = get_userbuf_tls(ses_ptr, kcaop, &dst_sg); ++ if (unlikely(ret)) { ++ derr(1, "get_userbuf_tls(): Error getting user pages."); ++ goto free_auth_buf; ++ } + -+ return WORDS2BYTES(bn->n_words); ++ ret = tls_auth_n_crypt(ses_ptr, kcaop, auth_sg, caop->auth_len, ++ dst_sg, caop->len); ++ cryptodev_release_user_pages(ses_ptr); ++ ++free_auth_buf: ++ free_page((unsigned long)auth_buf); ++ return ret; +} + -+/* -+ * @brief Returns the index of the highest 1 in |bn|. -+ * @param bn: the point of input data bignum. -+ * @return The index starts at 0 for the least significant bit. -+ * If src == zero, it will return -1 -+ */ -+int rk_bn_highest_bit(const struct rk_bignum *bn) ++static int crypto_auth_zc_aead(struct csession *ses_ptr, struct kernel_crypt_auth_op *kcaop) +{ -+ u32 w; -+ u32 b; -+ -+ if (!bn || !bn->data || !bn->n_words) -+ return -1; ++ struct scatterlist *dst_sg; ++ struct scatterlist *src_sg; ++ struct crypt_auth_op *caop = &kcaop->caop; ++ unsigned char *auth_buf = NULL; ++ int ret; + -+ w = bn->data[bn->n_words - 1]; ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)) ++ struct scatterlist tmp; ++ struct scatterlist *auth_sg; ++#else ++ struct scatterlist auth1[2]; ++ struct scatterlist auth2[2]; ++#endif + -+ for (b = 0; b < RK_WORD_SIZE; b++) { -+ w >>= 1; -+ if (w == 0) -+ break; ++ if (unlikely(ses_ptr->cdata.init == 0 || ++ (ses_ptr->cdata.stream == 0 && ses_ptr->cdata.aead == 0))) { ++ derr(0, "Only stream and AEAD ciphers are allowed for authenc"); ++ return -EINVAL; + } + -+ return (int)(bn->n_words - 1) * RK_WORD_SIZE + b; -+} -diff --git a/drivers/crypto/rockchip/rk_crypto_bignum.h b/drivers/crypto/rockchip/rk_crypto_bignum.h -new file mode 100644 -index 000000000..780aa8766 ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_bignum.h -@@ -0,0 +1,27 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* Copyright (c) 2020 Rockchip Electronics Co., Ltd. */ -+ -+#ifndef __RK_CRYPTO_BIGNUM_H__ -+#define __RK_CRYPTO_BIGNUM_H__ -+ -+enum bignum_endian { -+ RK_BG_BIG_ENDIAN, -+ RK_BG_LITTILE_ENDIAN -+}; -+ -+/** -+ * struct rk_bignum - crypto bignum struct. -+ */ -+struct rk_bignum { -+ u32 n_words; -+ u32 *data; -+}; ++ if (unlikely(caop->auth_len > PAGE_SIZE)) { ++ derr(1, "auth data len is excessive."); ++ return -EINVAL; ++ } + -+struct rk_bignum *rk_bn_alloc(u32 max_size); -+void rk_bn_free(struct rk_bignum *bn); -+int rk_bn_set_data(struct rk_bignum *bn, const u8 *data, u32 size, enum bignum_endian endian); -+int rk_bn_get_data(const struct rk_bignum *bn, u8 *data, u32 size, enum bignum_endian endian); -+u32 rk_bn_get_size(const struct rk_bignum *bn); -+int rk_bn_highest_bit(const struct rk_bignum *src); ++ auth_buf = (char *)__get_free_page(GFP_KERNEL); ++ if (unlikely(!auth_buf)) { ++ derr(1, "unable to get a free page."); ++ return -ENOMEM; ++ } + -+#endif -diff --git a/drivers/crypto/rockchip/rk_crypto_core.c b/drivers/crypto/rockchip/rk_crypto_core.c -new file mode 100644 -index 000000000..2a9cf2da6 ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_core.c -@@ -0,0 +1,945 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Crypto acceleration support for Rockchip crypto -+ * -+ * Copyright (c) 2018, Fuzhou Rockchip Electronics Co., Ltd -+ * -+ * Author: Zain Wang -+ * Mender: Lin Jinhan -+ * -+ * Some ideas are from marvell-cesa.c and s5p-sss.c driver. -+ */ ++ ret = cryptodev_get_userbuf(ses_ptr, caop->src, caop->len, caop->dst, kcaop->dst_len, ++ kcaop->task, kcaop->mm, &src_sg, &dst_sg); ++ if (unlikely(ret)) { ++ derr(1, "get_userbuf(): Error getting user pages."); ++ goto free_auth_buf; ++ } + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)) ++ if (caop->auth_src && caop->auth_len > 0) { ++ if (unlikely(copy_from_user(auth_buf, caop->auth_src, caop->auth_len))) { ++ derr(1, "unable to copy auth data from userspace."); ++ ret = -EFAULT; ++ goto free_pages; ++ } + -+#include "rk_crypto_core.h" -+#include "rk_crypto_utils.h" -+#include "rk_crypto_v1.h" -+#include "rk_crypto_v2.h" -+#include "rk_crypto_v3.h" -+#include "cryptodev_linux/rk_cryptodev.h" -+#include "procfs.h" ++ sg_init_one(&tmp, auth_buf, caop->auth_len); ++ auth_sg = &tmp; ++ } else { ++ auth_sg = NULL; ++ } + -+#define CRYPTO_NAME "rkcrypto" ++ ret = auth_n_crypt(ses_ptr, kcaop, auth_sg, caop->auth_len, ++ src_sg, dst_sg, caop->len); ++#else ++ if (caop->auth_src && caop->auth_len > 0) { ++ if (unlikely(copy_from_user(auth_buf, caop->auth_src, caop->auth_len))) { ++ derr(1, "unable to copy auth data from userspace."); ++ ret = -EFAULT; ++ goto free_pages; ++ } + -+static struct rk_alg_ctx *rk_alg_ctx_cast(struct crypto_async_request *async_req) -+{ -+ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(async_req->tfm); ++ sg_init_table(auth1, 2); ++ sg_set_buf(auth1, auth_buf, caop->auth_len); ++ sg_chain(auth1, 2, src_sg); + -+ return &ctx->algs_ctx; -+} ++ if (src_sg == dst_sg) { ++ src_sg = auth1; ++ dst_sg = auth1; ++ } else { ++ sg_init_table(auth2, 2); ++ sg_set_buf(auth2, auth_buf, caop->auth_len); ++ sg_chain(auth2, 2, dst_sg); ++ src_sg = auth1; ++ dst_sg = auth2; ++ } ++ } + -+static int rk_crypto_enable_clk(struct rk_crypto_dev *rk_dev) -+{ -+ int ret; ++ ret = auth_n_crypt(ses_ptr, kcaop, NULL, caop->auth_len, ++ src_sg, dst_sg, caop->len); ++#endif + -+ dev_dbg(rk_dev->dev, "clk_bulk_prepare_enable.\n"); ++free_pages: ++ cryptodev_release_user_pages(ses_ptr); + -+ ret = clk_bulk_prepare_enable(rk_dev->clks_num, -+ rk_dev->clk_bulks); -+ if (ret < 0) -+ dev_err(rk_dev->dev, "failed to enable clks %d\n", ret); ++free_auth_buf: ++ free_page((unsigned long)auth_buf); + + return ret; +} + -+static void rk_crypto_disable_clk(struct rk_crypto_dev *rk_dev) ++/* Chain two sglists together. It will keep the last nent of priv ++ * and invalidate the first nent of sgl ++ */ ++static struct scatterlist *sg_copy_chain(struct scatterlist *prv, ++ unsigned int prv_nents, ++ struct scatterlist *sgl) +{ -+ dev_dbg(rk_dev->dev, "clk_bulk_disable_unprepare.\n"); ++ struct scatterlist *sg_tmp = sg_last(prv, prv_nents); + -+ clk_bulk_disable_unprepare(rk_dev->clks_num, rk_dev->clk_bulks); ++ sg_set_page(sgl, sg_page(sg_tmp), sg_tmp->length, sg_tmp->offset); ++ ++ if (prv_nents > 1) { ++ sg_chain(prv, prv_nents, sgl); ++ return prv; ++ } else { ++ return sgl; ++ } +} + -+static int rk_load_data(struct rk_crypto_dev *rk_dev, -+ struct scatterlist *sg_src, -+ struct scatterlist *sg_dst) ++static int crypto_auth_zc_rk(struct csession *ses_ptr, struct kernel_crypt_auth_op *kcaop) +{ -+ int ret = -EINVAL; -+ unsigned int count; -+ u32 src_nents, dst_nents; -+ struct device *dev = rk_dev->dev; -+ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev->async_req); -+ -+ alg_ctx->count = 0; ++ struct scatterlist *dst; ++ struct scatterlist *src; ++ struct scatterlist *dst_sg; ++ struct scatterlist *src_sg; ++ struct crypt_auth_op *caop = &kcaop->caop; ++ unsigned char *auth_buf = NULL, *tag_buf = NULL; ++ struct scatterlist auth_src[2], auth_dst[2], tag[3]; ++ int ret; + -+ /* 0 data input just do nothing */ -+ if (alg_ctx->total == 0) -+ return 0; ++ if (unlikely(ses_ptr->cdata.init == 0 || ++ (ses_ptr->cdata.stream == 0 && ses_ptr->cdata.aead == 0))) { ++ derr(0, "Only stream and AEAD ciphers are allowed for authenc"); ++ return -EINVAL; ++ } + -+ src_nents = alg_ctx->src_nents; -+ dst_nents = alg_ctx->dst_nents; ++ if (unlikely(caop->auth_len > PAGE_SIZE)) { ++ derr(1, "auth data len is excessive."); ++ return -EINVAL; ++ } + -+ /* skip assoclen data */ -+ if (alg_ctx->assoclen && alg_ctx->left_bytes == alg_ctx->total) { -+ CRYPTO_TRACE("have assoclen..."); ++ ret = cryptodev_get_userbuf(ses_ptr, caop->src, caop->len, ++ caop->dst, kcaop->dst_len, ++ kcaop->task, kcaop->mm, &src_sg, &dst_sg); ++ if (unlikely(ret)) { ++ derr(1, "get_userbuf(): Error getting user pages."); ++ ret = -EFAULT; ++ goto exit; ++ } + -+ if (alg_ctx->assoclen > rk_dev->aad_max) { -+ ret = -ENOMEM; -+ goto error; -+ } ++ dst = dst_sg; ++ src = src_sg; + -+ if (!sg_pcopy_to_buffer(alg_ctx->req_src, alg_ctx->src_nents, -+ rk_dev->addr_aad, alg_ctx->assoclen, 0)) { -+ dev_err(dev, "[%s:%d] assoc pcopy err\n", -+ __func__, __LINE__); -+ ret = -EINVAL; -+ goto error; ++ /* chain tag */ ++ if (caop->tag && caop->tag_len > 0) { ++ tag_buf = kcalloc(caop->tag_len, sizeof(*tag_buf), GFP_KERNEL); ++ if (unlikely(!tag_buf)) { ++ derr(1, "unable to kcalloc %d.", caop->tag_len); ++ ret = -EFAULT; ++ goto free_pages; + } + -+ sg_init_one(&alg_ctx->sg_aad, rk_dev->addr_aad, alg_ctx->assoclen); -+ -+ if (!dma_map_sg(dev, &alg_ctx->sg_aad, 1, DMA_TO_DEVICE)) { -+ dev_err(dev, "[%s:%d] dma_map_sg(sg_aad) error\n", -+ __func__, __LINE__); -+ ret = -ENOMEM; -+ goto error; ++ if (unlikely(copy_from_user(tag_buf, caop->tag, caop->tag_len))) { ++ derr(1, "unable to copy tag data from userspace."); ++ ret = -EFAULT; ++ goto free_pages; + } + -+ alg_ctx->addr_aad_in = sg_dma_address(&alg_ctx->sg_aad); -+ -+ /* point sg_src and sg_dst skip assoc data */ -+ sg_src = scatterwalk_ffwd(rk_dev->src, alg_ctx->req_src, -+ alg_ctx->assoclen); -+ sg_dst = (alg_ctx->req_src == alg_ctx->req_dst) ? sg_src : -+ scatterwalk_ffwd(rk_dev->dst, alg_ctx->req_dst, -+ alg_ctx->assoclen); -+ -+ alg_ctx->sg_src = sg_src; -+ alg_ctx->sg_dst = sg_dst; -+ src_nents = sg_nents_for_len(sg_src, alg_ctx->total); -+ dst_nents = sg_nents_for_len(sg_dst, alg_ctx->total); ++ sg_init_table(tag, ARRAY_SIZE(tag)); ++ sg_set_buf(&tag[1], tag_buf, caop->tag_len); + -+ CRYPTO_TRACE("src_nents = %u, dst_nents = %u", src_nents, dst_nents); ++ /* Since the sg_chain() requires the last sg in the list is empty and ++ * used for link information, we can not directly link src/dst_sg to tags ++ */ ++ if (caop->op == COP_ENCRYPT) ++ dst = sg_copy_chain(dst_sg, sg_nents(dst_sg), tag); ++ else ++ src = sg_copy_chain(src_sg, sg_nents(src_sg), tag); + } + -+ if (alg_ctx->left_bytes == alg_ctx->total) { -+ alg_ctx->aligned = rk_crypto_check_align(sg_src, src_nents, sg_dst, dst_nents, -+ alg_ctx->align_size); -+ alg_ctx->is_dma = rk_crypto_check_dmafd(sg_src, src_nents) && -+ rk_crypto_check_dmafd(sg_dst, dst_nents); ++ /* chain auth */ ++ auth_buf = (char *)__get_free_page(GFP_KERNEL); ++ if (unlikely(!auth_buf)) { ++ derr(1, "unable to get a free page."); ++ ret = -EFAULT; ++ goto free_pages; + } + -+ CRYPTO_TRACE("aligned = %d, is_dma = %d, total = %u, left_bytes = %u, assoclen = %u\n", -+ alg_ctx->aligned, alg_ctx->is_dma, alg_ctx->total, -+ alg_ctx->left_bytes, alg_ctx->assoclen); -+ -+ if (alg_ctx->aligned) { -+ u32 nents; -+ -+ if (rk_dev->soc_data->use_lli_chain) { -+ count = rk_crypto_hw_desc_maxlen(sg_src, alg_ctx->left_bytes, &nents); -+ } else { -+ nents = 1; -+ count = min_t(unsigned int, alg_ctx->left_bytes, sg_src->length); ++ if (caop->auth_src && caop->auth_len > 0) { ++ if (unlikely(copy_from_user(auth_buf, caop->auth_src, caop->auth_len))) { ++ derr(1, "unable to copy auth data from userspace."); ++ ret = -EFAULT; ++ goto free_pages; + } + -+ alg_ctx->map_nents = nents; -+ alg_ctx->left_bytes -= count; ++ sg_init_table(auth_src, ARRAY_SIZE(auth_src)); ++ sg_set_buf(auth_src, auth_buf, caop->auth_len); ++ sg_init_table(auth_dst, ARRAY_SIZE(auth_dst)); ++ sg_set_buf(auth_dst, auth_buf, caop->auth_len); + -+ if (!alg_ctx->is_dma && !dma_map_sg(dev, sg_src, nents, DMA_TO_DEVICE)) { -+ dev_err(dev, "[%s:%d] dma_map_sg(src) error\n", -+ __func__, __LINE__); -+ ret = -EINVAL; -+ goto error; -+ } -+ alg_ctx->addr_in = sg_dma_address(sg_src); ++ sg_chain(auth_src, 2, src); ++ sg_chain(auth_dst, 2, dst); ++ src = auth_src; ++ dst = auth_dst; ++ } + -+ if (sg_dst) { -+ if (!alg_ctx->is_dma && !dma_map_sg(dev, sg_dst, nents, DMA_FROM_DEVICE)) { -+ dev_err(dev, -+ "[%s:%d] dma_map_sg(dst) error\n", -+ __func__, __LINE__); -+ dma_unmap_sg(dev, sg_src, 1, -+ DMA_TO_DEVICE); -+ ret = -EINVAL; -+ goto error; -+ } -+ alg_ctx->addr_out = sg_dma_address(sg_dst); ++ if (caop->op == COP_ENCRYPT) ++ ret = rk_auth_n_crypt(ses_ptr, kcaop, NULL, caop->auth_len, ++ src, dst, caop->len); ++ else ++ ret = rk_auth_n_crypt(ses_ptr, kcaop, NULL, caop->auth_len, ++ src, dst, caop->len + caop->tag_len); ++ ++ if (!ret && caop->op == COP_ENCRYPT) { ++ if (unlikely(copy_to_user(kcaop->caop.tag, tag_buf, caop->tag_len))) { ++ derr(1, "Error in copying to userspace"); ++ ret = -EFAULT; ++ goto free_pages; + } -+ } else { -+ alg_ctx->map_nents = 1; ++ } + -+ count = (alg_ctx->left_bytes > rk_dev->vir_max) ? -+ rk_dev->vir_max : alg_ctx->left_bytes; ++free_pages: ++ cryptodev_release_user_pages(ses_ptr); + -+ if (!sg_pcopy_to_buffer(alg_ctx->req_src, alg_ctx->src_nents, -+ rk_dev->addr_vir, count, -+ alg_ctx->assoclen + alg_ctx->total - alg_ctx->left_bytes)) { -+ dev_err(dev, "[%s:%d] pcopy err\n", -+ __func__, __LINE__); -+ ret = -EINVAL; -+ goto error; -+ } -+ alg_ctx->left_bytes -= count; -+ sg_init_one(&alg_ctx->sg_tmp, rk_dev->addr_vir, count); -+ if (!dma_map_sg(dev, &alg_ctx->sg_tmp, 1, DMA_TO_DEVICE)) { -+ dev_err(dev, "[%s:%d] dma_map_sg(sg_tmp) error\n", -+ __func__, __LINE__); -+ ret = -ENOMEM; -+ goto error; -+ } -+ alg_ctx->addr_in = sg_dma_address(&alg_ctx->sg_tmp); ++exit: ++ if (auth_buf) ++ free_page((unsigned long)auth_buf); + -+ if (sg_dst) { -+ if (!dma_map_sg(dev, &alg_ctx->sg_tmp, 1, -+ DMA_FROM_DEVICE)) { -+ dev_err(dev, -+ "[%s:%d] dma_map_sg(sg_tmp) error\n", -+ __func__, __LINE__); -+ dma_unmap_sg(dev, &alg_ctx->sg_tmp, 1, -+ DMA_TO_DEVICE); -+ ret = -ENOMEM; -+ goto error; -+ } -+ alg_ctx->addr_out = sg_dma_address(&alg_ctx->sg_tmp); -+ } -+ } ++ kfree(tag_buf); + -+ alg_ctx->count = count; -+ return 0; -+error: + return ret; +} + -+static int rk_unload_data(struct rk_crypto_dev *rk_dev) ++static int ++__crypto_auth_run_zc(struct csession *ses_ptr, struct kernel_crypt_auth_op *kcaop) +{ -+ int ret = 0; -+ struct scatterlist *sg_in, *sg_out; -+ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev->async_req); -+ u32 nents; ++ struct crypt_auth_op *caop = &kcaop->caop; ++ int ret; + -+ CRYPTO_TRACE("aligned = %d, total = %u, left_bytes = %u\n", -+ alg_ctx->aligned, alg_ctx->total, alg_ctx->left_bytes); ++ if (caop->flags & COP_FLAG_AEAD_SRTP_TYPE) { ++ ret = crypto_auth_zc_srtp(ses_ptr, kcaop); ++ } else if (caop->flags & COP_FLAG_AEAD_TLS_TYPE && ++ ses_ptr->cdata.aead == 0) { ++ ret = crypto_auth_zc_tls(ses_ptr, kcaop); ++ } else if (caop->flags & COP_FLAG_AEAD_RK_TYPE && ++ ses_ptr->cdata.aead) { ++ ret = crypto_auth_zc_rk(ses_ptr, kcaop); ++ } else if (ses_ptr->cdata.aead) { ++ ret = crypto_auth_zc_aead(ses_ptr, kcaop); ++ } else { ++ ret = -EINVAL; ++ } + -+ /* 0 data input just do nothing */ -+ if (alg_ctx->total == 0 || alg_ctx->count == 0) -+ return 0; ++ return ret; ++} + -+ nents = alg_ctx->map_nents; + -+ sg_in = alg_ctx->aligned ? alg_ctx->sg_src : &alg_ctx->sg_tmp; ++int crypto_auth_run(struct fcrypt *fcr, struct kernel_crypt_auth_op *kcaop) ++{ ++ struct csession *ses_ptr; ++ struct crypt_auth_op *caop = &kcaop->caop; ++ int ret; + -+ /* only is dma buffer and aligned will skip unmap */ -+ if (!alg_ctx->is_dma || !alg_ctx->aligned) -+ dma_unmap_sg(rk_dev->dev, sg_in, nents, DMA_TO_DEVICE); ++ if (unlikely(caop->op != COP_ENCRYPT && caop->op != COP_DECRYPT)) { ++ ddebug(1, "invalid operation op=%u", caop->op); ++ return -EINVAL; ++ } + -+ if (alg_ctx->sg_dst) { -+ sg_out = alg_ctx->aligned ? alg_ctx->sg_dst : &alg_ctx->sg_tmp; ++ /* this also enters ses_ptr->sem */ ++ ses_ptr = crypto_get_session_by_sid(fcr, caop->ses); ++ if (unlikely(!ses_ptr)) { ++ derr(1, "invalid session ID=0x%08X", caop->ses); ++ return -EINVAL; ++ } + -+ /* only is dma buffer and aligned will skip unmap */ -+ if (!alg_ctx->is_dma || !alg_ctx->aligned) -+ dma_unmap_sg(rk_dev->dev, sg_out, nents, DMA_FROM_DEVICE); ++ if (unlikely(ses_ptr->cdata.init == 0)) { ++ derr(1, "cipher context not initialized"); ++ ret = -EINVAL; ++ goto out_unlock; + } + -+ if (!alg_ctx->aligned && alg_ctx->req_dst) { -+ if (!sg_pcopy_from_buffer(alg_ctx->req_dst, alg_ctx->dst_nents, -+ rk_dev->addr_vir, alg_ctx->count, -+ alg_ctx->total - alg_ctx->left_bytes - -+ alg_ctx->count + alg_ctx->assoclen)) { -+ ret = -EINVAL; -+ goto exit; ++ /* If we have a hash/mac handle reset its state */ ++ if (ses_ptr->hdata.init != 0) { ++ ret = cryptodev_hash_reset(&ses_ptr->hdata); ++ if (unlikely(ret)) { ++ derr(1, "error in cryptodev_hash_reset()"); ++ goto out_unlock; + } + } + -+ if (alg_ctx->assoclen) { -+ dma_unmap_sg(rk_dev->dev, &alg_ctx->sg_aad, 1, DMA_TO_DEVICE); ++ cryptodev_cipher_set_iv(&ses_ptr->cdata, kcaop->iv, ++ min(ses_ptr->cdata.ivsize, kcaop->ivlen)); + -+ /* copy assoc data to dst */ -+ if (!sg_pcopy_from_buffer(alg_ctx->req_dst, sg_nents(alg_ctx->req_dst), -+ rk_dev->addr_aad, alg_ctx->assoclen, 0)) { -+ ret = -EINVAL; -+ goto exit; -+ } ++ ret = __crypto_auth_run_zc(ses_ptr, kcaop); ++ if (unlikely(ret)) { ++ derr(1, "error in __crypto_auth_run_zc()"); ++ goto out_unlock; + } -+exit: -+ return ret; -+} + -+static void start_irq_timer(struct rk_crypto_dev *rk_dev) -+{ -+ mod_timer(&rk_dev->timer, jiffies + msecs_to_jiffies(3000)); ++ ret = 0; ++ ++ cryptodev_cipher_get_iv(&ses_ptr->cdata, kcaop->iv, ++ min(ses_ptr->cdata.ivsize, kcaop->ivlen)); ++ ++out_unlock: ++ crypto_put_session(ses_ptr); ++ return ret; +} +diff --git a/drivers/crypto/rockchip/cryptodev_linux/cipherapi.h b/drivers/crypto/rockchip/cryptodev_linux/cipherapi.h +new file mode 100644 +index 000000000..7073588e1 +--- /dev/null ++++ b/drivers/crypto/rockchip/cryptodev_linux/cipherapi.h +@@ -0,0 +1,58 @@ ++/* SPDX-License-Identifier: GPL-2.0+ */ + -+/* use timer to avoid crypto irq timeout */ -+static void rk_crypto_irq_timer_handle(struct timer_list *t) -+{ -+ struct rk_crypto_dev *rk_dev = from_timer(rk_dev, t, timer); -+ unsigned long flags; ++#ifndef CIPHERAPI_H ++# define CIPHERAPI_H + -+ spin_lock_irqsave(&rk_dev->lock, flags); ++#include + -+ rk_dev->err = -ETIMEDOUT; -+ rk_dev->stat.timeout_cnt++; ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) ++# include + -+ rk_unload_data(rk_dev); ++typedef struct crypto_ablkcipher cryptodev_crypto_blkcipher_t; ++typedef struct ablkcipher_request cryptodev_blkcipher_request_t; + -+ spin_unlock_irqrestore(&rk_dev->lock, flags); ++# define cryptodev_crypto_alloc_blkcipher crypto_alloc_ablkcipher ++# define cryptodev_crypto_blkcipher_blocksize crypto_ablkcipher_blocksize ++# define cryptodev_crypto_blkcipher_ivsize crypto_ablkcipher_ivsize ++# define cryptodev_crypto_blkcipher_alignmask crypto_ablkcipher_alignmask ++# define cryptodev_crypto_blkcipher_setkey crypto_ablkcipher_setkey + -+ tasklet_schedule(&rk_dev->done_task); ++static inline void cryptodev_crypto_free_blkcipher(cryptodev_crypto_blkcipher_t *c) { ++ if (c) ++ crypto_free_ablkcipher(c); +} + -+static irqreturn_t rk_crypto_irq_handle(int irq, void *dev_id) -+{ -+ struct rk_crypto_dev *rk_dev = platform_get_drvdata(dev_id); -+ struct rk_alg_ctx *alg_ctx; -+ unsigned long flags; ++# define cryptodev_blkcipher_request_alloc ablkcipher_request_alloc ++# define cryptodev_blkcipher_request_set_callback ablkcipher_request_set_callback + -+ spin_lock_irqsave(&rk_dev->lock, flags); ++static inline void cryptodev_blkcipher_request_free(cryptodev_blkcipher_request_t *r) { ++ if (r) ++ ablkcipher_request_free(r); ++} + -+ /* reset timeout timer */ -+ start_irq_timer(rk_dev); ++# define cryptodev_blkcipher_request_set_crypt ablkcipher_request_set_crypt ++# define cryptodev_crypto_blkcipher_encrypt crypto_ablkcipher_encrypt ++# define cryptodev_crypto_blkcipher_decrypt crypto_ablkcipher_decrypt ++# define cryptodev_crypto_blkcipher_tfm crypto_ablkcipher_tfm ++#else ++#include + -+ alg_ctx = rk_alg_ctx_cast(rk_dev->async_req); ++typedef struct crypto_skcipher cryptodev_crypto_blkcipher_t; ++typedef struct skcipher_request cryptodev_blkcipher_request_t; + -+ rk_dev->stat.irq_cnt++; ++# define cryptodev_crypto_alloc_blkcipher crypto_alloc_skcipher ++# define cryptodev_crypto_blkcipher_blocksize crypto_skcipher_blocksize ++# define cryptodev_crypto_blkcipher_ivsize crypto_skcipher_ivsize ++# define cryptodev_crypto_blkcipher_alignmask crypto_skcipher_alignmask ++# define cryptodev_crypto_blkcipher_setkey crypto_skcipher_setkey ++# define cryptodev_crypto_free_blkcipher crypto_free_skcipher ++# define cryptodev_blkcipher_request_alloc skcipher_request_alloc ++# define cryptodev_blkcipher_request_set_callback skcipher_request_set_callback ++# define cryptodev_blkcipher_request_free skcipher_request_free ++# define cryptodev_blkcipher_request_set_crypt skcipher_request_set_crypt ++# define cryptodev_crypto_blkcipher_encrypt crypto_skcipher_encrypt ++# define cryptodev_crypto_blkcipher_decrypt crypto_skcipher_decrypt ++# define cryptodev_crypto_blkcipher_tfm crypto_skcipher_tfm ++#endif + -+ if (alg_ctx->ops.irq_handle) -+ alg_ctx->ops.irq_handle(irq, dev_id); ++#endif +diff --git a/drivers/crypto/rockchip/cryptodev_linux/cryptlib.c b/drivers/crypto/rockchip/cryptodev_linux/cryptlib.c +new file mode 100644 +index 000000000..8b2a28c73 +--- /dev/null ++++ b/drivers/crypto/rockchip/cryptodev_linux/cryptlib.c +@@ -0,0 +1,492 @@ ++/* ++ * Driver for /dev/crypto device (aka CryptoDev) ++ * ++ * Copyright (c) 2010,2011 Nikos Mavrogiannopoulos ++ * Portions Copyright (c) 2010 Michael Weiser ++ * Portions Copyright (c) 2010 Phil Sutter ++ * ++ * This file is part of linux cryptodev. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2 ++ * of the License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., ++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ /* already trigger timeout */ -+ if (rk_dev->err != -ETIMEDOUT) { -+ spin_unlock_irqrestore(&rk_dev->lock, flags); -+ tasklet_schedule(&rk_dev->done_task); -+ } else { -+ spin_unlock_irqrestore(&rk_dev->lock, flags); -+ } ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "cryptodev.h" ++#include "cipherapi.h" + -+ return IRQ_HANDLED; -+} ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 0, 0)) ++extern const struct crypto_type crypto_givcipher_type; ++#endif + -+static int rk_start_op(struct rk_crypto_dev *rk_dev) ++static void cryptodev_complete(struct crypto_async_request *req, int err) +{ -+ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev->async_req); -+ int ret; -+ -+ if (!alg_ctx || !alg_ctx->ops.start) -+ return -EINVAL; -+ -+ alg_ctx->aligned = false; -+ -+ enable_irq(rk_dev->irq); -+ start_irq_timer(rk_dev); -+ -+ ret = alg_ctx->ops.start(rk_dev); -+ if (ret) -+ return ret; ++ struct cryptodev_result *res = req->data; + -+ /* fake calculations are used to trigger the Done Task */ -+ if (alg_ctx->total == 0) { -+ CRYPTO_TRACE("fake done_task"); -+ rk_dev->stat.fake_cnt++; -+ tasklet_schedule(&rk_dev->done_task); -+ } ++ if (err == -EINPROGRESS) ++ return; + -+ return 0; ++ res->err = err; ++ complete(&res->completion); +} + -+static int rk_update_op(struct rk_crypto_dev *rk_dev) ++int cryptodev_get_cipher_keylen(unsigned int *keylen, struct session_op *sop, ++ int aead) +{ -+ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev->async_req); ++ /* ++ * For blockciphers (AES-CBC) or non-composite aead ciphers (like AES-GCM), ++ * the key length is simply the cipher keylen obtained from userspace. If ++ * the cipher is composite aead, the keylen is the sum of cipher keylen, ++ * hmac keylen and a key header length. This key format is the one used in ++ * Linux kernel for composite aead ciphers (crypto/authenc.c) ++ */ ++ unsigned int klen = sop->keylen; + -+ if (!alg_ctx || !alg_ctx->ops.update) ++ if (unlikely(sop->keylen > CRYPTO_CIPHER_MAX_KEY_LEN)) + return -EINVAL; + -+ return alg_ctx->ops.update(rk_dev); ++ if (aead && sop->mackeylen) { ++ if (unlikely(sop->mackeylen > CRYPTO_HMAC_MAX_KEY_LEN)) ++ return -EINVAL; ++ klen += sop->mackeylen; ++ klen += RTA_SPACE(sizeof(struct crypto_authenc_key_param)); ++ } ++ ++ *keylen = klen; ++ return 0; +} + -+static void rk_complete_op(struct rk_crypto_dev *rk_dev, int err) ++int cryptodev_get_cipher_key(uint8_t *key, struct session_op *sop, int aead) +{ -+ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev->async_req); ++ /* ++ * Get cipher key from user-space. For blockciphers just copy it from ++ * user-space. For composite aead ciphers combine it with the hmac key in ++ * the format used by Linux kernel in crypto/authenc.c: ++ * ++ * [[AUTHENC_KEY_HEADER + CIPHER_KEYLEN] [AUTHENTICATION KEY] [CIPHER KEY]] ++ */ ++ struct crypto_authenc_key_param *param; ++ struct rtattr *rta; ++ int ret = 0; + -+ disable_irq(rk_dev->irq); -+ del_timer(&rk_dev->timer); ++ if (aead && sop->mackeylen) { ++ /* ++ * Composite aead ciphers. The first four bytes are the header type and ++ * header length for aead keys ++ */ ++ rta = (void *)key; ++ rta->rta_type = CRYPTO_AUTHENC_KEYA_PARAM; ++ rta->rta_len = RTA_LENGTH(sizeof(*param)); + -+ rk_dev->stat.complete_cnt++; ++ /* ++ * The next four bytes hold the length of the encryption key ++ */ ++ param = RTA_DATA(rta); ++ param->enckeylen = cpu_to_be32(sop->keylen); + -+ if (err) { -+ rk_dev->stat.error_cnt++; -+ rk_dev->stat.last_error = err; -+ dev_err(rk_dev->dev, "complete_op err = %d\n", err); ++ /* Advance key pointer eight bytes and copy the hmac key */ ++ key += RTA_SPACE(sizeof(*param)); ++ if (unlikely(copy_from_user(key, sop->mackey, sop->mackeylen))) { ++ ret = -EFAULT; ++ goto error; ++ } ++ /* Advance key pointer past the hmac key */ ++ key += sop->mackeylen; + } ++ /* now copy the blockcipher key */ ++ if (unlikely(copy_from_user(key, sop->key, sop->keylen))) ++ ret = -EFAULT; + -+ if (!alg_ctx || !alg_ctx->ops.complete) -+ return; -+ -+ alg_ctx->ops.complete(rk_dev->async_req, err); ++error: ++ return ret; ++} + -+ rk_dev->async_req = NULL; ++/* Was correct key length supplied? */ ++static int check_key_size(size_t keylen, const char *alg_name, ++ unsigned int min_keysize, unsigned int max_keysize) ++{ ++ if (max_keysize > 0 && unlikely((keylen < min_keysize) || ++ (keylen > max_keysize))) { ++ ddebug(1, "Wrong keylen '%zu' for algorithm '%s'. Use %u to %u.", ++ keylen, alg_name, min_keysize, max_keysize); ++ return -EINVAL; ++ } + -+ tasklet_schedule(&rk_dev->queue_task); ++ return 0; +} + -+static int rk_crypto_enqueue(struct rk_crypto_dev *rk_dev, -+ struct crypto_async_request *async_req) ++int cryptodev_cipher_init(struct cipher_data *out, const char *alg_name, ++ uint8_t *keyp, size_t keylen, int stream, int aead) +{ -+ unsigned long flags; + int ret; + -+ spin_lock_irqsave(&rk_dev->lock, flags); -+ ret = crypto_enqueue_request(&rk_dev->queue, async_req); -+ -+ if (rk_dev->queue.qlen > rk_dev->stat.ever_queue_max) -+ rk_dev->stat.ever_queue_max = rk_dev->queue.qlen; ++ if (aead == 0) { ++ unsigned int min_keysize, max_keysize; ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) ++ struct crypto_tfm *tfm; ++#else ++ struct ablkcipher_alg *alg; ++#endif + -+ if (rk_dev->busy) { -+ rk_dev->stat.busy_cnt++; -+ spin_unlock_irqrestore(&rk_dev->lock, flags); -+ return ret; -+ } ++ out->async.s = cryptodev_crypto_alloc_blkcipher(alg_name, 0, 0); ++ if (unlikely(IS_ERR(out->async.s))) { ++ ddebug(1, "Failed to load cipher %s", alg_name); ++ return PTR_ERR(out->async.s); ++ } + -+ rk_dev->stat.equeue_cnt++; -+ rk_dev->busy = true; -+ spin_unlock_irqrestore(&rk_dev->lock, flags); -+ tasklet_schedule(&rk_dev->queue_task); ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) ++ tfm = crypto_skcipher_tfm(out->async.s); ++#if (LINUX_VERSION_CODE <= KERNEL_VERSION(5, 4, 0)) ++ if ((tfm->__crt_alg->cra_type == &crypto_ablkcipher_type) ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 0, 0)) ++ || (tfm->__crt_alg->cra_type == &crypto_givcipher_type) ++#endif ++ ) { ++ struct ablkcipher_alg *alg; + -+ return ret; -+} ++ alg = &tfm->__crt_alg->cra_ablkcipher; ++ min_keysize = alg->min_keysize; ++ max_keysize = alg->max_keysize; ++ } else ++#endif ++ { ++ struct skcipher_alg *alg; + -+static void rk_crypto_queue_task_cb(unsigned long data) -+{ -+ struct rk_crypto_dev *rk_dev = (struct rk_crypto_dev *)data; -+ struct crypto_async_request *async_req, *backlog; -+ unsigned long flags; ++ alg = crypto_skcipher_alg(out->async.s); ++ min_keysize = alg->min_keysize; ++ max_keysize = alg->max_keysize; ++ } ++#else ++ alg = crypto_ablkcipher_alg(out->async.s); ++ min_keysize = alg->min_keysize; ++ max_keysize = alg->max_keysize; ++#endif ++ ret = check_key_size(keylen, alg_name, min_keysize, ++ max_keysize); ++ if (ret) ++ goto error; + -+ spin_lock_irqsave(&rk_dev->lock, flags); -+ if (rk_dev->async_req) { -+ dev_err(rk_dev->dev, "%s: Unexpected crypto paths.\n", __func__); -+ goto exit; -+ } ++ out->blocksize = cryptodev_crypto_blkcipher_blocksize(out->async.s); ++ out->ivsize = cryptodev_crypto_blkcipher_ivsize(out->async.s); ++ out->alignmask = cryptodev_crypto_blkcipher_alignmask(out->async.s); + -+ rk_dev->err = 0; ++ ret = cryptodev_crypto_blkcipher_setkey(out->async.s, keyp, keylen); ++ } else { ++ out->async.as = crypto_alloc_aead(alg_name, 0, 0); ++ if (unlikely(IS_ERR(out->async.as))) { ++ ddebug(1, "Failed to load cipher %s", alg_name); ++ return PTR_ERR(out->async.as); ++ } + -+ backlog = crypto_get_backlog(&rk_dev->queue); -+ async_req = crypto_dequeue_request(&rk_dev->queue); ++ out->blocksize = crypto_aead_blocksize(out->async.as); ++ out->ivsize = crypto_aead_ivsize(out->async.as); ++ out->alignmask = crypto_aead_alignmask(out->async.as); + -+ if (!async_req) { -+ rk_dev->busy = false; -+ goto exit; ++ ret = crypto_aead_setkey(out->async.as, keyp, keylen); + } -+ rk_dev->stat.dequeue_cnt++; + -+ if (backlog) { -+ backlog->complete(backlog, -EINPROGRESS); -+ backlog = NULL; ++ if (unlikely(ret)) { ++ ddebug(1, "Setting key failed for %s-%zu.", alg_name, keylen*8); ++ ret = -EINVAL; ++ goto error; + } + -+ rk_dev->async_req = async_req; -+ rk_dev->err = rk_start_op(rk_dev); -+ if (rk_dev->err) -+ rk_complete_op(rk_dev, rk_dev->err); ++ out->stream = stream; ++ out->aead = aead; + -+exit: -+ spin_unlock_irqrestore(&rk_dev->lock, flags); -+} ++ init_completion(&out->async.result.completion); + -+static void rk_crypto_done_task_cb(unsigned long data) -+{ -+ struct rk_crypto_dev *rk_dev = (struct rk_crypto_dev *)data; -+ struct rk_alg_ctx *alg_ctx; -+ unsigned long flags; ++ if (aead == 0) { ++ out->async.request = cryptodev_blkcipher_request_alloc(out->async.s, GFP_KERNEL); ++ if (unlikely(!out->async.request)) { ++ derr(1, "error allocating async crypto request"); ++ ret = -ENOMEM; ++ goto error; ++ } + -+ spin_lock_irqsave(&rk_dev->lock, flags); ++ cryptodev_blkcipher_request_set_callback(out->async.request, ++ CRYPTO_TFM_REQ_MAY_BACKLOG, ++ cryptodev_complete, &out->async.result); ++ } else { ++ out->async.arequest = aead_request_alloc(out->async.as, GFP_KERNEL); ++ if (unlikely(!out->async.arequest)) { ++ derr(1, "error allocating async crypto request"); ++ ret = -ENOMEM; ++ goto error; ++ } + -+ if (!rk_dev->async_req) { -+ dev_err(rk_dev->dev, "done task receive invalid async_req\n"); -+ spin_unlock_irqrestore(&rk_dev->lock, flags); -+ return; ++ aead_request_set_callback(out->async.arequest, ++ CRYPTO_TFM_REQ_MAY_BACKLOG, ++ cryptodev_complete, &out->async.result); + } + -+ alg_ctx = rk_alg_ctx_cast(rk_dev->async_req); -+ -+ rk_dev->stat.done_cnt++; -+ -+ if (rk_dev->err) -+ goto exit; -+ -+ if (alg_ctx->left_bytes == 0) { -+ CRYPTO_TRACE("done task cb last calc"); -+ /* unload data for last calculation */ -+ rk_dev->err = rk_update_op(rk_dev); -+ goto exit; ++ out->init = 1; ++ return 0; ++error: ++ if (aead == 0) { ++ cryptodev_blkcipher_request_free(out->async.request); ++ cryptodev_crypto_free_blkcipher(out->async.s); ++ } else { ++ if (out->async.arequest) ++ aead_request_free(out->async.arequest); ++ if (out->async.as) ++ crypto_free_aead(out->async.as); + } + -+ rk_dev->err = rk_update_op(rk_dev); -+ if (rk_dev->err) -+ goto exit; ++ return ret; ++} + -+ spin_unlock_irqrestore(&rk_dev->lock, flags); ++void cryptodev_cipher_deinit(struct cipher_data *cdata) ++{ ++ if (cdata->init) { ++ if (cdata->aead == 0) { ++ cryptodev_blkcipher_request_free(cdata->async.request); ++ cryptodev_crypto_free_blkcipher(cdata->async.s); ++ } else { ++ if (cdata->async.arequest) ++ aead_request_free(cdata->async.arequest); ++ if (cdata->async.as) ++ crypto_free_aead(cdata->async.as); ++ } + -+ return; -+exit: -+ rk_complete_op(rk_dev, rk_dev->err); -+ spin_unlock_irqrestore(&rk_dev->lock, flags); ++ cdata->init = 0; ++ } +} + -+static struct rk_crypto_algt *rk_crypto_find_algs(struct rk_crypto_dev *rk_dev, -+ char *name) ++static inline int waitfor(struct cryptodev_result *cr, ssize_t ret) +{ -+ u32 i; -+ struct rk_crypto_algt **algs; -+ struct rk_crypto_algt *tmp_algs; -+ uint32_t total_algs_num = 0; -+ -+ algs = rk_dev->soc_data->hw_get_algts(&total_algs_num); -+ if (!algs || total_algs_num == 0) -+ return NULL; ++ switch (ret) { ++ case 0: ++ break; ++ case -EINPROGRESS: ++ case -EBUSY: ++ wait_for_completion(&cr->completion); ++ /* At this point we known for sure the request has finished, ++ * because wait_for_completion above was not interruptible. ++ * This is important because otherwise hardware or driver ++ * might try to access memory which will be freed or reused for ++ * another request. */ + -+ for (i = 0; i < total_algs_num; i++, algs++) { -+ tmp_algs = *algs; -+ tmp_algs->rk_dev = rk_dev; ++ if (unlikely(cr->err)) { ++ derr(0, "error from async request: %d", cr->err); ++ return cr->err; ++ } + -+ if (strcmp(tmp_algs->name, name) == 0) -+ return tmp_algs; ++ break; ++ default: ++ return ret; + } + -+ return NULL; ++ return 0; +} + -+static int rk_crypto_register(struct rk_crypto_dev *rk_dev) ++ssize_t cryptodev_cipher_encrypt(struct cipher_data *cdata, ++ const struct scatterlist *src, struct scatterlist *dst, ++ size_t len) +{ -+ unsigned int i, k; -+ char **algs_name; -+ struct rk_crypto_algt *tmp_algs; -+ struct rk_crypto_soc_data *soc_data; -+ int err = 0; -+ -+ soc_data = rk_dev->soc_data; ++ int ret; + -+ algs_name = soc_data->valid_algs_name; ++ reinit_completion(&cdata->async.result.completion); + -+ rk_dev->request_crypto(rk_dev, __func__); ++ if (cdata->aead == 0) { ++ cryptodev_blkcipher_request_set_crypt(cdata->async.request, ++ (struct scatterlist *)src, dst, ++ len, cdata->async.iv); ++ ret = cryptodev_crypto_blkcipher_encrypt(cdata->async.request); ++ } else { ++ aead_request_set_crypt(cdata->async.arequest, ++ (struct scatterlist *)src, dst, ++ len, cdata->async.iv); ++ ret = crypto_aead_encrypt(cdata->async.arequest); ++ } + -+ for (i = 0; i < soc_data->valid_algs_num; i++, algs_name++) { -+ tmp_algs = rk_crypto_find_algs(rk_dev, *algs_name); -+ if (!tmp_algs) { -+ CRYPTO_TRACE("%s not matched!!!\n", *algs_name); -+ continue; -+ } ++ return waitfor(&cdata->async.result, ret); ++} + -+ if (soc_data->hw_is_algo_valid && !soc_data->hw_is_algo_valid(rk_dev, tmp_algs)) { -+ CRYPTO_TRACE("%s skipped!!!\n", *algs_name); -+ continue; -+ } ++ssize_t cryptodev_cipher_decrypt(struct cipher_data *cdata, ++ const struct scatterlist *src, struct scatterlist *dst, ++ size_t len) ++{ ++ int ret; + -+ CRYPTO_TRACE("%s matched!!!\n", *algs_name); ++ reinit_completion(&cdata->async.result.completion); ++ if (cdata->aead == 0) { ++ cryptodev_blkcipher_request_set_crypt(cdata->async.request, ++ (struct scatterlist *)src, dst, ++ len, cdata->async.iv); ++ ret = cryptodev_crypto_blkcipher_decrypt(cdata->async.request); ++ } else { ++ aead_request_set_crypt(cdata->async.arequest, ++ (struct scatterlist *)src, dst, ++ len, cdata->async.iv); ++ ret = crypto_aead_decrypt(cdata->async.arequest); ++ } + -+ tmp_algs->rk_dev = rk_dev; ++ return waitfor(&cdata->async.result, ret); ++} + -+ if (tmp_algs->type == ALG_TYPE_CIPHER) { -+ if (tmp_algs->mode == CIPHER_MODE_CTR || -+ tmp_algs->mode == CIPHER_MODE_CFB || -+ tmp_algs->mode == CIPHER_MODE_OFB) -+ tmp_algs->alg.crypto.base.cra_blocksize = 1; ++/* Hash functions */ + -+ if (tmp_algs->mode == CIPHER_MODE_ECB) -+ tmp_algs->alg.crypto.ivsize = 0; ++int cryptodev_hash_init(struct hash_data *hdata, const char *alg_name, ++ int hmac_mode, void *mackey, size_t mackeylen) ++{ ++ int ret; + -+ /* rv1126 is not support aes192 */ -+ if (soc_data->use_soft_aes192 && -+ tmp_algs->algo == CIPHER_ALGO_AES) -+ tmp_algs->use_soft_aes192 = true; ++ hdata->async.s = crypto_alloc_ahash(alg_name, 0, 0); ++ if (unlikely(IS_ERR(hdata->async.s))) { ++ ddebug(1, "Failed to load transform for %s", alg_name); ++ return PTR_ERR(hdata->async.s); ++ } + -+ err = crypto_register_skcipher(&tmp_algs->alg.crypto); -+ } else if (tmp_algs->type == ALG_TYPE_HASH || tmp_algs->type == ALG_TYPE_HMAC) { -+ err = crypto_register_ahash(&tmp_algs->alg.hash); -+ } else if (tmp_algs->type == ALG_TYPE_ASYM) { -+ err = crypto_register_akcipher(&tmp_algs->alg.asym); -+ } else if (tmp_algs->type == ALG_TYPE_AEAD) { -+ if (soc_data->use_soft_aes192 && -+ tmp_algs->algo == CIPHER_ALGO_AES) -+ tmp_algs->use_soft_aes192 = true; -+ err = crypto_register_aead(&tmp_algs->alg.aead); -+ } else { -+ continue; ++ /* Copy the key from user and set to TFM. */ ++ if (hmac_mode != 0) { ++ ret = crypto_ahash_setkey(hdata->async.s, mackey, mackeylen); ++ if (unlikely(ret)) { ++ ddebug(1, "Setting hmac key failed for %s-%zu.", ++ alg_name, mackeylen*8); ++ ret = -EINVAL; ++ goto error; + } ++ } + -+ if (err) -+ goto err_cipher_algs; ++ hdata->digestsize = crypto_ahash_digestsize(hdata->async.s); ++ hdata->alignmask = crypto_ahash_alignmask(hdata->async.s); + -+ tmp_algs->valid_flag = true; ++ init_completion(&hdata->async.result.completion); + -+ CRYPTO_TRACE("%s register OK!!!\n", *algs_name); ++ hdata->async.request = ahash_request_alloc(hdata->async.s, GFP_KERNEL); ++ if (unlikely(!hdata->async.request)) { ++ derr(0, "error allocating async crypto request"); ++ ret = -ENOMEM; ++ goto error; + } + -+ rk_dev->release_crypto(rk_dev, __func__); -+ ++ ahash_request_set_callback(hdata->async.request, ++ CRYPTO_TFM_REQ_MAY_BACKLOG, ++ cryptodev_complete, &hdata->async.result); ++ hdata->init = 1; + return 0; + -+err_cipher_algs: -+ algs_name = soc_data->valid_algs_name; ++error: ++ crypto_free_ahash(hdata->async.s); ++ return ret; ++} + -+ for (k = 0; k < i; k++, algs_name++) { -+ tmp_algs = rk_crypto_find_algs(rk_dev, *algs_name); -+ if (!tmp_algs) -+ continue; ++void cryptodev_hash_deinit(struct hash_data *hdata) ++{ ++ if (hdata->init) { ++ ahash_request_free(hdata->async.request); ++ crypto_free_ahash(hdata->async.s); ++ hdata->init = 0; ++ } ++} + -+ if (tmp_algs->type == ALG_TYPE_CIPHER) -+ crypto_unregister_skcipher(&tmp_algs->alg.crypto); -+ else if (tmp_algs->type == ALG_TYPE_HASH || tmp_algs->type == ALG_TYPE_HMAC) -+ crypto_unregister_ahash(&tmp_algs->alg.hash); -+ else if (tmp_algs->type == ALG_TYPE_ASYM) -+ crypto_unregister_akcipher(&tmp_algs->alg.asym); -+ else if (tmp_algs->type == ALG_TYPE_AEAD) -+ crypto_unregister_aead(&tmp_algs->alg.aead); ++int cryptodev_hash_reset(struct hash_data *hdata) ++{ ++ int ret; ++ ++ ret = crypto_ahash_init(hdata->async.request); ++ if (unlikely(ret)) { ++ derr(0, "error in crypto_hash_init()"); ++ return ret; + } + -+ rk_dev->release_crypto(rk_dev, __func__); ++ return 0; + -+ return err; +} + -+static void rk_crypto_unregister(struct rk_crypto_dev *rk_dev) ++ssize_t cryptodev_hash_update(struct hash_data *hdata, ++ struct scatterlist *sg, size_t len) +{ -+ unsigned int i; -+ char **algs_name; -+ struct rk_crypto_algt *tmp_algs; -+ -+ algs_name = rk_dev->soc_data->valid_algs_name; -+ -+ rk_dev->request_crypto(rk_dev, __func__); ++ int ret; + -+ for (i = 0; i < rk_dev->soc_data->valid_algs_num; i++, algs_name++) { -+ tmp_algs = rk_crypto_find_algs(rk_dev, *algs_name); -+ if (!tmp_algs) -+ continue; ++ reinit_completion(&hdata->async.result.completion); ++ ahash_request_set_crypt(hdata->async.request, sg, NULL, len); + -+ if (tmp_algs->type == ALG_TYPE_CIPHER) -+ crypto_unregister_skcipher(&tmp_algs->alg.crypto); -+ else if (tmp_algs->type == ALG_TYPE_HASH || tmp_algs->type == ALG_TYPE_HMAC) -+ crypto_unregister_ahash(&tmp_algs->alg.hash); -+ else if (tmp_algs->type == ALG_TYPE_ASYM) -+ crypto_unregister_akcipher(&tmp_algs->alg.asym); -+ } ++ ret = crypto_ahash_update(hdata->async.request); + -+ rk_dev->release_crypto(rk_dev, __func__); ++ return waitfor(&hdata->async.result, ret); +} + -+static void rk_crypto_request(struct rk_crypto_dev *rk_dev, const char *name) ++int cryptodev_hash_final(struct hash_data *hdata, void *output) +{ -+ CRYPTO_TRACE("Crypto is requested by %s\n", name); ++ int ret; + -+ rk_crypto_enable_clk(rk_dev); -+} ++ reinit_completion(&hdata->async.result.completion); ++ ahash_request_set_crypt(hdata->async.request, NULL, output, 0); + -+static void rk_crypto_release(struct rk_crypto_dev *rk_dev, const char *name) -+{ -+ CRYPTO_TRACE("Crypto is released by %s\n", name); ++ ret = crypto_ahash_final(hdata->async.request); + -+ rk_crypto_disable_clk(rk_dev); ++ return waitfor(&hdata->async.result, ret); +} + -+static void rk_crypto_action(void *data) ++#ifdef CIOCCPHASH ++/* import the current hash state of src to dst */ ++int cryptodev_hash_copy(struct hash_data *dst, struct hash_data *src) +{ -+ struct rk_crypto_dev *rk_dev = data; ++ int ret, statesize; ++ void *statedata = NULL; ++ struct crypto_tfm *tfm; + -+ if (rk_dev->rst) -+ reset_control_assert(rk_dev->rst); -+} ++ if (unlikely(src == NULL || !src->init || ++ dst == NULL || !dst->init)) { ++ return -EINVAL; ++ } + -+static char *crypto_no_sm_algs_name[] = { -+ "ecb(aes)", "cbc(aes)", "cfb(aes)", "ofb(aes)", "ctr(aes)", "gcm(aes)", -+ "ecb(des)", "cbc(des)", "cfb(des)", "ofb(des)", -+ "ecb(des3_ede)", "cbc(des3_ede)", "cfb(des3_ede)", "ofb(des3_ede)", -+ "sha1", "sha224", "sha256", "sha384", "sha512", "md5", -+ "hmac(sha1)", "hmac(sha256)", "hmac(sha512)", "hmac(md5)", -+ "rsa" -+}; ++ reinit_completion(&src->async.result.completion); + -+static char *crypto_rv1126_algs_name[] = { -+ "ecb(sm4)", "cbc(sm4)", "cfb(sm4)", "ofb(sm4)", "ctr(sm4)", "gcm(sm4)", -+ "ecb(aes)", "cbc(aes)", "cfb(aes)", "ofb(aes)", "ctr(aes)", "gcm(aes)", -+ "ecb(des)", "cbc(des)", "cfb(des)", "ofb(des)", -+ "ecb(des3_ede)", "cbc(des3_ede)", "cfb(des3_ede)", "ofb(des3_ede)", -+ "sha1", "sha256", "sha512", "md5", "sm3", -+ "hmac(sha1)", "hmac(sha256)", "hmac(sha512)", "hmac(md5)", "hmac(sm3)", -+ "rsa" -+}; ++ statesize = crypto_ahash_statesize(src->async.s); ++ if (unlikely(statesize <= 0)) { ++ return -EINVAL; ++ } + -+static char *crypto_full_algs_name[] = { -+ "ecb(sm4)", "cbc(sm4)", "cfb(sm4)", "ofb(sm4)", "ctr(sm4)", "gcm(sm4)", -+ "ecb(aes)", "cbc(aes)", "cfb(aes)", "ofb(aes)", "ctr(aes)", "gcm(aes)", -+ "ecb(des)", "cbc(des)", "cfb(des)", "ofb(des)", -+ "ecb(des3_ede)", "cbc(des3_ede)", "cfb(des3_ede)", "ofb(des3_ede)", -+ "sha1", "sha224", "sha256", "sha384", "sha512", "md5", "sm3", -+ "hmac(sha1)", "hmac(sha256)", "hmac(sha512)", "hmac(md5)", "hmac(sm3)", -+ "rsa" -+}; ++ statedata = kzalloc(statesize, GFP_KERNEL); ++ if (unlikely(statedata == NULL)) { ++ return -ENOMEM; ++ } + -+static const struct rk_crypto_soc_data px30_soc_data = -+ RK_CRYPTO_V2_SOC_DATA_INIT(crypto_no_sm_algs_name, false); ++ ret = crypto_ahash_export(src->async.request, statedata); ++ if (unlikely(ret < 0)) { ++ if (unlikely(ret == -ENOSYS)) { ++ tfm = crypto_ahash_tfm(src->async.s); ++ derr(0, "cryptodev_hash_copy: crypto_ahash_export not implemented for " ++ "alg='%s', driver='%s'", crypto_tfm_alg_name(tfm), ++ crypto_tfm_alg_driver_name(tfm)); ++ } ++ goto out; ++ } + -+static const struct rk_crypto_soc_data rv1126_soc_data = -+ RK_CRYPTO_V2_SOC_DATA_INIT(crypto_rv1126_algs_name, true); ++ ret = crypto_ahash_import(dst->async.request, statedata); ++ if (unlikely(ret == -ENOSYS)) { ++ tfm = crypto_ahash_tfm(dst->async.s); ++ derr(0, "cryptodev_hash_copy: crypto_ahash_import not implemented for " ++ "alg='%s', driver='%s'", crypto_tfm_alg_name(tfm), ++ crypto_tfm_alg_driver_name(tfm)); ++ } ++out: ++ kfree(statedata); ++ return ret; ++} ++#endif /* CIOCCPHASH */ +diff --git a/drivers/crypto/rockchip/cryptodev_linux/cryptlib.h b/drivers/crypto/rockchip/cryptodev_linux/cryptlib.h +new file mode 100644 +index 000000000..b8867d91b +--- /dev/null ++++ b/drivers/crypto/rockchip/cryptodev_linux/cryptlib.h +@@ -0,0 +1,111 @@ ++/* SPDX-License-Identifier: GPL-2.0+ */ + -+static const struct rk_crypto_soc_data full_soc_data = -+ RK_CRYPTO_V2_SOC_DATA_INIT(crypto_full_algs_name, false); ++#ifndef CRYPTLIB_H ++# define CRYPTLIB_H + -+static const struct rk_crypto_soc_data cryto_v3_soc_data = -+ RK_CRYPTO_V3_SOC_DATA_INIT(crypto_full_algs_name); ++#include + -+static char *rk3288_cipher_algs[] = { -+ "ecb(aes)", "cbc(aes)", -+ "ecb(des)", "cbc(des)", -+ "ecb(des3_ede)", "cbc(des3_ede)", -+ "sha1", "sha256", "md5", ++struct cryptodev_result { ++ struct completion completion; ++ int err; +}; + -+static const struct rk_crypto_soc_data rk3288_soc_data = -+ RK_CRYPTO_V1_SOC_DATA_INIT(rk3288_cipher_algs); ++#include "cipherapi.h" + -+static const struct of_device_id crypto_of_id_table[] = { ++struct cipher_data { ++ int init; /* 0 uninitialized */ ++ int blocksize; ++ int aead; ++ int stream; ++ int ivsize; ++ int alignmask; ++ struct { ++ /* block ciphers */ ++ cryptodev_crypto_blkcipher_t *s; ++ cryptodev_blkcipher_request_t *request; + -+#if IS_ENABLED(CONFIG_CRYPTO_DEV_ROCKCHIP_V3) -+ /* crypto v4 in belows same with crypto-v3*/ -+ { -+ .compatible = "rockchip,crypto-v4", -+ .data = (void *)&cryto_v3_soc_data, -+ }, ++ /* AEAD ciphers */ ++ struct crypto_aead *as; ++ struct aead_request *arequest; + -+ /* crypto v3 in belows */ -+ { -+ .compatible = "rockchip,crypto-v3", -+ .data = (void *)&cryto_v3_soc_data, -+ }, -+#endif ++ struct cryptodev_result result; ++ uint8_t iv[EALG_MAX_BLOCK_LEN]; ++ } async; ++}; + -+#if IS_ENABLED(CONFIG_CRYPTO_DEV_ROCKCHIP_V2) -+ /* crypto v2 in belows */ -+ { -+ .compatible = "rockchip,px30-crypto", -+ .data = (void *)&px30_soc_data, -+ }, -+ { -+ .compatible = "rockchip,rv1126-crypto", -+ .data = (void *)&rv1126_soc_data, -+ }, -+ { -+ .compatible = "rockchip,rk3568-crypto", -+ .data = (void *)&full_soc_data, -+ }, -+ { -+ .compatible = "rockchip,rk3588-crypto", -+ .data = (void *)&full_soc_data, -+ }, -+#endif ++int cryptodev_cipher_init(struct cipher_data *out, const char *alg_name, ++ uint8_t *key, size_t keylen, int stream, int aead); ++void cryptodev_cipher_deinit(struct cipher_data *cdata); ++int cryptodev_get_cipher_key(uint8_t *key, struct session_op *sop, int aead); ++int cryptodev_get_cipher_keylen(unsigned int *keylen, struct session_op *sop, ++ int aead); ++ssize_t cryptodev_cipher_decrypt(struct cipher_data *cdata, ++ const struct scatterlist *sg1, ++ struct scatterlist *sg2, size_t len); ++ssize_t cryptodev_cipher_encrypt(struct cipher_data *cdata, ++ const struct scatterlist *sg1, ++ struct scatterlist *sg2, size_t len); + -+#if IS_ENABLED(CONFIG_CRYPTO_DEV_ROCKCHIP_V1) -+ /* crypto v1 in belows */ -+ { -+ .compatible = "rockchip,rk3288-crypto", -+ .data = (void *)&rk3288_soc_data, -+ }, ++/* AEAD */ ++static inline void cryptodev_cipher_auth(struct cipher_data *cdata, ++ struct scatterlist *sg1, size_t len) ++{ ++ /* for some reason we _have_ to call that even for zero length sgs */ ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)) ++ aead_request_set_assoc(cdata->async.arequest, len ? sg1 : NULL, len); ++#else ++ aead_request_set_ad(cdata->async.arequest, len); +#endif ++} + -+ { /* sentinel */ } -+}; ++static inline void cryptodev_cipher_set_tag_size(struct cipher_data *cdata, int size) ++{ ++ if (likely(cdata->aead != 0)) ++ crypto_aead_setauthsize(cdata->async.as, size); ++} + -+MODULE_DEVICE_TABLE(of, crypto_of_id_table); ++static inline int cryptodev_cipher_get_tag_size(struct cipher_data *cdata) ++{ ++ if (likely(cdata->init && cdata->aead != 0)) ++ return crypto_aead_authsize(cdata->async.as); ++ else ++ return 0; ++} + -+static int rk_crypto_probe(struct platform_device *pdev) ++static inline void cryptodev_cipher_set_iv(struct cipher_data *cdata, ++ void *iv, size_t iv_size) +{ -+ struct resource *res; -+ struct device *dev = &pdev->dev; -+ struct device_node *np = pdev->dev.of_node; -+ struct rk_crypto_soc_data *soc_data; -+ const struct of_device_id *match; -+ struct rk_crypto_dev *rk_dev; -+ const char * const *rsts; -+ uint32_t rst_num = 0; -+ int err = 0; ++ memcpy(cdata->async.iv, iv, min(iv_size, sizeof(cdata->async.iv))); ++} + -+ rk_dev = devm_kzalloc(&pdev->dev, -+ sizeof(*rk_dev), GFP_KERNEL); -+ if (!rk_dev) { -+ err = -ENOMEM; -+ goto err_crypto; -+ } ++static inline void cryptodev_cipher_get_iv(struct cipher_data *cdata, ++ void *iv, size_t iv_size) ++{ ++ memcpy(iv, cdata->async.iv, min(iv_size, sizeof(cdata->async.iv))); ++} + -+ rk_dev->name = CRYPTO_NAME; ++/* Hash */ ++struct hash_data { ++ int init; /* 0 uninitialized */ ++ int digestsize; ++ int alignmask; ++ struct { ++ struct crypto_ahash *s; ++ struct cryptodev_result result; ++ struct ahash_request *request; ++ } async; ++}; + -+ match = of_match_node(crypto_of_id_table, np); -+ soc_data = (struct rk_crypto_soc_data *)match->data; -+ rk_dev->soc_data = soc_data; ++int cryptodev_hash_final(struct hash_data *hdata, void *output); ++ssize_t cryptodev_hash_update(struct hash_data *hdata, ++ struct scatterlist *sg, size_t len); ++int cryptodev_hash_reset(struct hash_data *hdata); ++void cryptodev_hash_deinit(struct hash_data *hdata); ++int cryptodev_hash_init(struct hash_data *hdata, const char *alg_name, ++ int hmac_mode, void *mackey, size_t mackeylen); ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)) ++int cryptodev_hash_copy(struct hash_data *dst, struct hash_data *src); ++#endif + -+ rsts = soc_data->hw_get_rsts(&rst_num); -+ if (rsts && rsts[0]) { -+ rk_dev->rst = -+ devm_reset_control_get(dev, rsts[0]); -+ if (IS_ERR(rk_dev->rst)) { -+ err = PTR_ERR(rk_dev->rst); -+ goto err_crypto; -+ } -+ reset_control_assert(rk_dev->rst); -+ usleep_range(10, 20); -+ reset_control_deassert(rk_dev->rst); -+ } + -+ err = devm_add_action_or_reset(dev, rk_crypto_action, rk_dev); -+ if (err) -+ goto err_crypto; ++#endif +diff --git a/drivers/crypto/rockchip/cryptodev_linux/cryptodev.h b/drivers/crypto/rockchip/cryptodev_linux/cryptodev.h +new file mode 100644 +index 000000000..fd8619db6 +--- /dev/null ++++ b/drivers/crypto/rockchip/cryptodev_linux/cryptodev.h +@@ -0,0 +1,188 @@ ++/* SPDX-License-Identifier: GPL-2.0+ */ + -+ spin_lock_init(&rk_dev->lock); ++/* cipher stuff */ ++#ifndef CRYPTODEV_H ++# define CRYPTODEV_H + -+ /* get crypto base */ -+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); -+ rk_dev->reg = devm_ioremap_resource(dev, res); -+ if (IS_ERR(rk_dev->reg)) { -+ err = PTR_ERR(rk_dev->reg); -+ goto err_crypto; -+ } ++#include + -+ /* get pka base, if pka reg not set, pka reg = crypto + pka offset */ -+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1); -+ rk_dev->pka_reg = devm_ioremap_resource(dev, res); -+ if (IS_ERR(rk_dev->pka_reg)) -+ rk_dev->pka_reg = rk_dev->reg + soc_data->default_pka_offset; ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)) ++# define reinit_completion(x) INIT_COMPLETION(*(x)) ++#endif + -+ rk_dev->clks_num = devm_clk_bulk_get_all(dev, &rk_dev->clk_bulks); -+ if (rk_dev->clks_num < 0) { -+ err = rk_dev->clks_num; -+ dev_err(dev, "failed to get clks property\n"); -+ goto err_crypto; -+ } ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ rk_dev->irq = platform_get_irq(pdev, 0); -+ if (rk_dev->irq < 0) { -+ dev_warn(dev, -+ "control Interrupt is not available.\n"); -+ err = rk_dev->irq; -+ goto err_crypto; -+ } ++#define PFX "cryptodev: " ++#define dprintk(level, severity, format, a...) \ ++ do { \ ++ if (level <= cryptodev_verbosity) \ ++ printk(severity PFX "%s[%u] (%s:%u): " format "\n", \ ++ current->comm, current->pid, \ ++ __func__, __LINE__, \ ++ ##a); \ ++ } while (0) ++#define derr(level, format, a...) dprintk(level, KERN_ERR, format, ##a) ++#define dwarning(level, format, a...) dprintk(level, KERN_WARNING, format, ##a) ++#define dinfo(level, format, a...) dprintk(level, KERN_INFO, format, ##a) ++#define ddebug(level, format, a...) dprintk(level, KERN_DEBUG, format, ##a) + -+ err = devm_request_irq(dev, rk_dev->irq, -+ rk_crypto_irq_handle, IRQF_SHARED, -+ "rk-crypto", pdev); -+ if (err) { -+ dev_err(dev, "irq request failed.\n"); -+ goto err_crypto; -+ } + -+ disable_irq(rk_dev->irq); ++extern int cryptodev_verbosity; + -+ err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); -+ if (err) { -+ dev_err(dev, "crypto: No suitable DMA available.\n"); -+ goto err_crypto; -+ } ++struct fcrypt { ++ struct list_head list; ++ struct list_head dma_map_list; ++ struct mutex sem; ++}; + -+ rk_dev->dev = dev; ++/* compatibility stuff */ ++#ifdef CONFIG_COMPAT ++#include + -+ rk_dev->hw_info = -+ devm_kzalloc(dev, soc_data->hw_info_size, GFP_KERNEL); -+ if (!rk_dev->hw_info) { -+ err = -ENOMEM; -+ goto err_crypto; -+ } ++/* input of CIOCGSESSION */ ++struct compat_session_op { ++ /* Specify either cipher or mac ++ */ ++ uint32_t cipher; /* cryptodev_crypto_op_t */ ++ uint32_t mac; /* cryptodev_crypto_op_t */ + -+ err = soc_data->hw_init(dev, rk_dev->hw_info); -+ if (err) { -+ dev_err(dev, "hw_init failed.\n"); -+ goto err_crypto; -+ } ++ uint32_t keylen; ++ compat_uptr_t key; /* pointer to key data */ ++ uint32_t mackeylen; ++ compat_uptr_t mackey; /* pointer to mac key data */ + -+ rk_dev->addr_vir = (void *)__get_free_pages(GFP_KERNEL | GFP_DMA32, -+ RK_BUFFER_ORDER); -+ if (!rk_dev->addr_vir) { -+ err = -ENOMEM; -+ dev_err(dev, "__get_free_page failed.\n"); -+ goto err_crypto; -+ } ++ uint32_t ses; /* session identifier */ ++}; + -+ rk_dev->vir_max = RK_BUFFER_SIZE; ++/* input of CIOCCRYPT */ ++struct compat_crypt_op { ++ uint32_t ses; /* session identifier */ ++ uint16_t op; /* COP_ENCRYPT or COP_DECRYPT */ ++ uint16_t flags; /* see COP_FLAG_* */ ++ uint32_t len; /* length of source data */ ++ compat_uptr_t src; /* source data */ ++ compat_uptr_t dst; /* pointer to output data */ ++ compat_uptr_t mac;/* pointer to output data for hash/MAC operations */ ++ compat_uptr_t iv;/* initialization vector for encryption operations */ ++}; + -+ rk_dev->addr_aad = (void *)__get_free_page(GFP_KERNEL); -+ if (!rk_dev->addr_aad) { -+ err = -ENOMEM; -+ dev_err(dev, "__get_free_page failed.\n"); -+ goto err_crypto; -+ } ++/* input of COMPAT_CIOCAUTHCRYPT */ ++struct compat_crypt_auth_op { ++ uint32_t ses; /* session identifier */ ++ uint16_t op; /* COP_ENCRYPT or COP_DECRYPT */ ++ uint16_t flags; /* see COP_FLAG_AEAD_* */ ++ uint32_t len; /* length of source data */ ++ uint32_t auth_len; /* length of auth data */ ++ compat_uptr_t auth_src; /* authenticated-only data */ + -+ rk_dev->aad_max = RK_BUFFER_SIZE; ++ /* The current implementation is more efficient if data are ++ * encrypted in-place (src== dst). ++ */ ++ compat_uptr_t src; /* data to be encrypted and authenticated */ ++ compat_uptr_t dst; /* pointer to output data. Must have ++ * space for tag. For TLS this should be at least ++ * len + tag_size + block_size for padding ++ */ + -+ platform_set_drvdata(pdev, rk_dev); ++ compat_uptr_t tag; /* where the tag will be copied to. TLS mode ++ * doesn't use that as tag is copied to dst. ++ * SRTP mode copies tag there. ++ */ ++ uint32_t tag_len; /* the length of the tag. Use zero for digest size or max tag. */ + -+ tasklet_init(&rk_dev->queue_task, -+ rk_crypto_queue_task_cb, (unsigned long)rk_dev); -+ tasklet_init(&rk_dev->done_task, -+ rk_crypto_done_task_cb, (unsigned long)rk_dev); -+ crypto_init_queue(&rk_dev->queue, 50); ++ /* initialization vector for encryption operations */ ++ compat_uptr_t iv; ++ uint32_t iv_len; ++}; + -+ timer_setup(&rk_dev->timer, rk_crypto_irq_timer_handle, 0); ++/* compat ioctls, defined for the above structs */ ++#define COMPAT_CIOCGSESSION _IOWR('c', 102, struct compat_session_op) ++#define COMPAT_CIOCCRYPT _IOWR('c', 104, struct compat_crypt_op) ++#define COMPAT_CIOCASYNCCRYPT _IOW('c', 107, struct compat_crypt_op) ++#define COMPAT_CIOCASYNCFETCH _IOR('c', 108, struct compat_crypt_op) + -+ rk_dev->request_crypto = rk_crypto_request; -+ rk_dev->release_crypto = rk_crypto_release; -+ rk_dev->load_data = rk_load_data; -+ rk_dev->unload_data = rk_unload_data; -+ rk_dev->enqueue = rk_crypto_enqueue; -+ rk_dev->busy = false; ++#define COMPAT_CIOCAUTHCRYPT _IOWR('c', 109, struct compat_crypt_auth_op) + -+ err = rk_crypto_register(rk_dev); -+ if (err) { -+ dev_err(dev, "err in register alg"); -+ goto err_register_alg; -+ } ++#endif /* CONFIG_COMPAT */ + -+ rk_cryptodev_register_dev(rk_dev->dev, soc_data->crypto_ver); ++/* kernel-internal extension to struct crypt_op */ ++struct kernel_crypt_op { ++ struct crypt_op cop; + -+ rkcrypto_proc_init(rk_dev); ++ int ivlen; ++ __u8 iv[EALG_MAX_BLOCK_LEN]; + -+ dev_info(dev, "%s Accelerator successfully registered\n", soc_data->crypto_ver); -+ return 0; ++ int digestsize; ++ uint8_t hash_output[AALG_MAX_RESULT_LEN]; + -+err_register_alg: -+ tasklet_kill(&rk_dev->queue_task); -+ tasklet_kill(&rk_dev->done_task); -+err_crypto: -+ return err; -+} ++ struct task_struct *task; ++ struct mm_struct *mm; ++}; + -+static int rk_crypto_remove(struct platform_device *pdev) -+{ -+ struct rk_crypto_dev *rk_dev = platform_get_drvdata(pdev); ++struct kernel_crypt_auth_op { ++ struct crypt_auth_op caop; + -+ rkcrypto_proc_cleanup(rk_dev); ++ int dst_len; /* based on src_len + pad + tag */ ++ int ivlen; ++ __u8 iv[EALG_MAX_BLOCK_LEN]; + -+ rk_cryptodev_unregister_dev(rk_dev->dev); ++ struct task_struct *task; ++ struct mm_struct *mm; ++}; + -+ del_timer_sync(&rk_dev->timer); ++/* auth */ ++#ifdef CONFIG_COMPAT ++int compat_kcaop_to_user(struct kernel_crypt_auth_op *kcaop, ++ struct fcrypt *fcr, void __user *arg); ++int compat_kcaop_from_user(struct kernel_crypt_auth_op *kcaop, ++ struct fcrypt *fcr, void __user *arg); ++#endif /* CONFIG_COMPAT */ ++int cryptodev_kcaop_from_user(struct kernel_crypt_auth_op *kcop, ++ struct fcrypt *fcr, void __user *arg); ++int cryptodev_kcaop_to_user(struct kernel_crypt_auth_op *kcaop, ++ struct fcrypt *fcr, void __user *arg); ++int crypto_auth_run(struct fcrypt *fcr, struct kernel_crypt_auth_op *kcaop); ++int crypto_run(struct fcrypt *fcr, struct kernel_crypt_op *kcop); + -+ rk_crypto_unregister(rk_dev); -+ tasklet_kill(&rk_dev->done_task); -+ tasklet_kill(&rk_dev->queue_task); ++#include "cryptlib.h" + -+ if (rk_dev->addr_vir) -+ free_pages((unsigned long)rk_dev->addr_vir, RK_BUFFER_ORDER); ++/* other internal structs */ ++struct csession { ++ struct list_head entry; ++ struct mutex sem; ++ struct cipher_data cdata; ++ struct hash_data hdata; ++ uint32_t sid; ++ uint32_t alignmask; + -+ if (rk_dev->addr_aad) -+ free_page((unsigned long)rk_dev->addr_aad); ++ unsigned int array_size; ++ unsigned int used_pages; /* the number of pages that are used */ ++ /* the number of pages marked as NOT-writable; they preceed writeables */ ++ unsigned int readonly_pages; ++ struct page **pages; ++ struct scatterlist *sg; ++}; + -+ rk_dev->soc_data->hw_deinit(&pdev->dev, rk_dev->hw_info); ++struct csession *crypto_get_session_by_sid(struct fcrypt *fcr, uint32_t sid); ++int ++crypto_get_sessions_by_sid(struct fcrypt *fcr, ++ uint32_t sid_1, struct csession **ses_ptr_1, ++ uint32_t sid_2, struct csession **ses_ptr_2); + -+ return 0; ++static inline void crypto_put_session(struct csession *ses_ptr) ++{ ++ mutex_unlock(&ses_ptr->sem); +} ++int cryptodev_adjust_sg_array(struct csession *ses, int pagecount); + -+static struct platform_driver crypto_driver = { -+ .probe = rk_crypto_probe, -+ .remove = rk_crypto_remove, -+ .driver = { -+ .name = "rk-crypto", -+ .of_match_table = crypto_of_id_table, -+ }, -+}; -+ -+module_platform_driver(crypto_driver); -+ -+MODULE_AUTHOR("Lin Jinhan "); -+MODULE_DESCRIPTION("Support for Rockchip's cryptographic engine"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/crypto/rockchip/rk_crypto_core.h b/drivers/crypto/rockchip/rk_crypto_core.h ++#endif /* CRYPTODEV_INT_H */ +diff --git a/drivers/crypto/rockchip/cryptodev_linux/ioctl.c b/drivers/crypto/rockchip/cryptodev_linux/ioctl.c new file mode 100644 -index 000000000..b2b059e91 +index 000000000..032b016e4 --- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_core.h -@@ -0,0 +1,464 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+ -+/* Copyright (c) 2018 Rockchip Electronics Co. Ltd. */ -+ -+#ifndef __RK_CRYPTO_CORE_H__ -+#define __RK_CRYPTO_CORE_H__ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "rk_crypto_bignum.h" ++++ b/drivers/crypto/rockchip/cryptodev_linux/ioctl.c +@@ -0,0 +1,1329 @@ ++/* ++ * Driver for /dev/crypto device (aka CryptoDev) ++ * ++ * Copyright (c) 2004 Michal Ludvig , SuSE Labs ++ * Copyright (c) 2009,2010,2011 Nikos Mavrogiannopoulos ++ * Copyright (c) 2010 Phil Sutter ++ * ++ * This file is part of linux cryptodev. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2 ++ * of the License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., ++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + +/* -+ * Change to the lowest priority, and hardware encryption is -+ * invoked explicitly only at the User layer. ++ * Device /dev/crypto provides an interface for ++ * accessing kernel CryptoAPI algorithms (ciphers, ++ * hashes) from userspace programs. ++ * ++ * /dev/crypto interface was originally introduced in ++ * OpenBSD and this module attempts to keep the API. ++ * + */ -+#define RK_CRYPTO_PRIORITY 0 + -+/* Increase the addr_vir buffer size from 1 to 8 pages */ -+#define RK_BUFFER_ORDER 3 -+#define RK_BUFFER_SIZE (PAGE_SIZE << RK_BUFFER_ORDER) -+ -+#define RK_DMA_ALIGNMENT 128 -+#define sha384_state sha512_state -+#define sha224_state sha256_state -+ -+#define RK_FLAG_FINAL BIT(0) -+#define RK_FLAG_UPDATE BIT(1) ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+struct rk_crypto_stat { -+ unsigned long long busy_cnt; -+ unsigned long long equeue_cnt; -+ unsigned long long dequeue_cnt; -+ unsigned long long complete_cnt; -+ unsigned long long done_cnt; -+ unsigned long long fake_cnt; -+ unsigned long long irq_cnt; -+ unsigned long long timeout_cnt; -+ unsigned long long error_cnt; -+ unsigned long long ever_queue_max; -+ int last_error; -+}; ++#include + -+struct rk_crypto_dev { -+ struct device *dev; -+ struct reset_control *rst; -+ void __iomem *reg; -+ void __iomem *pka_reg; -+ int irq; -+ struct crypto_queue queue; -+ struct tasklet_struct queue_task; -+ struct tasklet_struct done_task; -+ int err; -+ void *hw_info; -+ struct rk_crypto_soc_data *soc_data; -+ int clks_num; -+ struct clk_bulk_data *clk_bulks; -+ const char *name; -+ struct proc_dir_entry *procfs; -+ struct rk_crypto_stat stat; ++#include "cryptodev.h" ++#include "zc.h" ++#include "version.h" ++#include "cipherapi.h" + -+ /* device lock */ -+ spinlock_t lock; ++#include "rk_cryptodev.h" + -+ /* the public variable */ -+ struct crypto_async_request *async_req; -+ void *addr_vir; -+ u32 vir_max; -+ void *addr_aad; -+ int aad_max; -+ struct scatterlist src[2]; -+ struct scatterlist dst[2]; ++MODULE_AUTHOR("Nikos Mavrogiannopoulos "); ++MODULE_DESCRIPTION("CryptoDev driver"); ++MODULE_LICENSE("GPL"); + -+ struct timer_list timer; -+ bool busy; -+ void (*request_crypto)(struct rk_crypto_dev *rk_dev, const char *name); -+ void (*release_crypto)(struct rk_crypto_dev *rk_dev, const char *name); -+ int (*load_data)(struct rk_crypto_dev *rk_dev, -+ struct scatterlist *sg_src, -+ struct scatterlist *sg_dst); -+ int (*unload_data)(struct rk_crypto_dev *rk_dev); -+ int (*enqueue)(struct rk_crypto_dev *rk_dev, -+ struct crypto_async_request *async_req); -+}; ++/* ====== Compile-time config ====== */ + -+struct rk_crypto_soc_data { -+ const char *crypto_ver; -+ char **valid_algs_name; -+ int valid_algs_num; -+ unsigned int hw_info_size; -+ bool use_soft_aes192; -+ int default_pka_offset; -+ bool use_lli_chain; ++/* Default (pre-allocated) and maximum size of the job queue. ++ * These are free, pending and done items all together. */ ++#define DEF_COP_RINGSIZE 16 ++#define MAX_COP_RINGSIZE 64 + -+ int (*hw_init)(struct device *dev, void *hw_info); -+ void (*hw_deinit)(struct device *dev, void *hw_info); -+ const char * const *(*hw_get_rsts)(uint32_t *num); -+ struct rk_crypto_algt **(*hw_get_algts)(uint32_t *num); -+ bool (*hw_is_algo_valid)(struct rk_crypto_dev *rk_dev, -+ struct rk_crypto_algt *aglt); -+}; ++/* ====== Module parameters ====== */ + -+struct rk_alg_ops { -+ int (*start)(struct rk_crypto_dev *rk_dev); -+ int (*update)(struct rk_crypto_dev *rk_dev); -+ void (*complete)(struct crypto_async_request *base, int err); -+ int (*irq_handle)(int irq, void *dev_id); ++int cryptodev_verbosity; ++module_param(cryptodev_verbosity, int, 0644); ++MODULE_PARM_DESC(cryptodev_verbosity, "0: normal, 1: verbose, 2: debug"); + -+ int (*hw_write_key)(struct rk_crypto_dev *rk_dev, const u8 *key, u32 key_len); -+ void (*hw_write_iv)(struct rk_crypto_dev *rk_dev, const u8 *iv, u32 iv_len); -+ int (*hw_init)(struct rk_crypto_dev *rk_dev, u32 algo, u32 type); -+ int (*hw_dma_start)(struct rk_crypto_dev *rk_dev, uint32_t flag); -+ int (*hw_get_result)(struct rk_crypto_dev *rk_dev, uint8_t *data, uint32_t data_len); ++/* ====== CryptoAPI ====== */ ++struct todo_list_item { ++ struct list_head __hook; ++ struct kernel_crypt_op kcop; ++ int result; +}; + -+struct rk_alg_ctx { -+ struct rk_alg_ops ops; -+ struct scatterlist *sg_src; -+ struct scatterlist *sg_dst; -+ struct scatterlist sg_tmp; -+ struct scatterlist sg_aad; -+ struct scatterlist *req_src; -+ struct scatterlist *req_dst; -+ size_t src_nents; -+ size_t dst_nents; -+ size_t map_nents; -+ -+ int is_aead; -+ unsigned int total; -+ unsigned int assoclen; -+ unsigned int count; -+ unsigned int left_bytes; -+ -+ dma_addr_t addr_in; -+ dma_addr_t addr_out; -+ dma_addr_t addr_aad_in; -+ -+ bool aligned; -+ bool is_dma; -+ int align_size; -+ int chunk_size; ++struct locked_list { ++ struct list_head list; ++ struct mutex lock; +}; + -+/* the private variable of hash */ -+struct rk_ahash_ctx { -+ struct rk_alg_ctx algs_ctx; -+ struct rk_crypto_dev *rk_dev; -+ u8 authkey[SHA512_BLOCK_SIZE]; -+ u32 authkey_len; -+ struct scatterlist hash_sg[2]; -+ u8 *hash_tmp; -+ u32 hash_tmp_len; -+ bool hash_tmp_mapped; -+ u32 calc_cnt; -+ -+ u8 lastc[RK_DMA_ALIGNMENT]; -+ u32 lastc_len; -+ -+ void *priv; -+ -+ /* for fallback */ -+ struct crypto_ahash *fallback_tfm; ++struct crypt_priv { ++ struct fcrypt fcrypt; ++ struct locked_list free, todo, done; ++ int itemcount; ++ struct work_struct cryptask; ++ wait_queue_head_t user_waiter; +}; + -+/* the privete variable of hash for fallback */ -+struct rk_ahash_rctx { -+ struct ahash_request fallback_req; -+ u32 mode; -+ u32 flag; -+}; ++#define FILL_SG(sg, ptr, len) \ ++ do { \ ++ (sg)->page = virt_to_page(ptr); \ ++ (sg)->offset = offset_in_page(ptr); \ ++ (sg)->length = len; \ ++ (sg)->dma_address = 0; \ ++ } while (0) + -+/* the private variable of cipher */ -+struct rk_cipher_ctx { -+ struct rk_alg_ctx algs_ctx; -+ struct rk_crypto_dev *rk_dev; -+ unsigned char key[AES_MAX_KEY_SIZE * 2]; -+ unsigned int keylen; -+ u32 mode; -+ u8 iv[AES_BLOCK_SIZE]; -+ u32 iv_len; -+ u8 lastc[AES_BLOCK_SIZE]; -+ bool is_enc; -+ void *priv; ++/* cryptodev's own workqueue, keeps crypto tasks from disturbing the force */ ++static struct workqueue_struct *cryptodev_wq; ++static atomic_t cryptodev_sess = ATOMIC_INIT(1); + -+ /* for fallback */ -+ bool fallback_key_inited; -+ struct crypto_skcipher *fallback_tfm; -+ struct skcipher_request fallback_req; // keep at the end -+ struct crypto_aead *fallback_aead; -+}; ++/* Prepare session for future use. */ ++static int ++crypto_create_session(struct fcrypt *fcr, struct session_op *sop) ++{ ++ struct csession *ses_new = NULL, *ses_ptr; ++ int ret = 0; ++ const char *alg_name = NULL; ++ const char *hash_name = NULL; ++ int hmac_mode = 1, stream = 0, aead = 0; ++ /* ++ * With composite aead ciphers, only ckey is used and it can cover all the ++ * structure space; otherwise both keys may be used simultaneously but they ++ * are confined to their spaces ++ */ ++ struct { ++ uint8_t ckey[CRYPTO_CIPHER_MAX_KEY_LEN]; ++ uint8_t mkey[CRYPTO_HMAC_MAX_KEY_LEN]; ++ /* padding space for aead keys */ ++ uint8_t pad[RTA_SPACE(sizeof(struct crypto_authenc_key_param))]; ++ } keys; + -+struct rk_rsa_ctx { -+ struct rk_alg_ctx algs_ctx; -+ struct rk_bignum *n; -+ struct rk_bignum *e; -+ struct rk_bignum *d; ++ /* Does the request make sense? */ ++ if (unlikely(!sop->cipher && !sop->mac)) { ++ ddebug(1, "Both 'cipher' and 'mac' unset."); ++ return -EINVAL; ++ } + -+ struct rk_crypto_dev *rk_dev; -+}; ++ memset(&keys, 0x00, sizeof(keys)); + -+enum alg_type { -+ ALG_TYPE_HASH, -+ ALG_TYPE_HMAC, -+ ALG_TYPE_CIPHER, -+ ALG_TYPE_ASYM, -+ ALG_TYPE_AEAD, -+ ALG_TYPE_MAX, -+}; ++ switch (sop->cipher) { ++ case 0: ++ break; ++ case CRYPTO_DES_CBC: ++ alg_name = "cbc(des)"; ++ break; ++ case CRYPTO_3DES_CBC: ++ alg_name = "cbc(des3_ede)"; ++ break; ++ case CRYPTO_BLF_CBC: ++ alg_name = "cbc(blowfish)"; ++ break; ++ case CRYPTO_AES_CBC: ++ alg_name = "cbc(aes)"; ++ break; ++ case CRYPTO_AES_ECB: ++ alg_name = "ecb(aes)"; ++ break; ++ case CRYPTO_AES_XTS: ++ alg_name = "xts(aes)"; ++ break; ++ case CRYPTO_CAMELLIA_CBC: ++ alg_name = "cbc(camellia)"; ++ break; ++ case CRYPTO_AES_CTR: ++ alg_name = "ctr(aes)"; ++ stream = 1; ++ break; ++ case CRYPTO_AES_GCM: ++ alg_name = "gcm(aes)"; ++ stream = 1; ++ aead = 1; ++ break; ++ case CRYPTO_TLS11_AES_CBC_HMAC_SHA1: ++ alg_name = "tls11(hmac(sha1),cbc(aes))"; ++ stream = 0; ++ aead = 1; ++ break; ++ case CRYPTO_TLS12_AES_CBC_HMAC_SHA256: ++ alg_name = "tls12(hmac(sha256),cbc(aes))"; ++ stream = 0; ++ aead = 1; ++ break; ++ case CRYPTO_NULL: ++ alg_name = "ecb(cipher_null)"; ++ stream = 1; ++ break; ++ default: ++ alg_name = rk_get_cipher_name(sop->cipher, &stream, &aead); ++ if (!alg_name) { ++ ddebug(1, "bad cipher: %d", sop->cipher); ++ return -EINVAL; ++ } ++ break; ++ } + -+struct rk_crypto_algt { -+ struct rk_crypto_dev *rk_dev; -+ union { -+ struct skcipher_alg crypto; -+ struct ahash_alg hash; -+ struct akcipher_alg asym; -+ struct aead_alg aead; -+ } alg; -+ enum alg_type type; -+ u32 algo; -+ u32 mode; -+ char *name; -+ bool use_soft_aes192; -+ bool valid_flag; -+}; ++ switch (sop->mac) { ++ case 0: ++ break; ++ case CRYPTO_MD5_HMAC: ++ hash_name = "hmac(md5)"; ++ break; ++ case CRYPTO_RIPEMD160_HMAC: ++ hash_name = "hmac(rmd160)"; ++ break; ++ case CRYPTO_SHA1_HMAC: ++ hash_name = "hmac(sha1)"; ++ break; ++ case CRYPTO_SHA2_224_HMAC: ++ hash_name = "hmac(sha224)"; ++ break; + -+enum rk_hash_algo { -+ HASH_ALGO_MD5, -+ HASH_ALGO_SHA1, -+ HASH_ALGO_SHA224, -+ HASH_ALGO_SHA256, -+ HASH_ALGO_SHA384, -+ HASH_ALGO_SHA512, -+ HASH_ALGO_SM3, -+ HASH_ALGO_SHA512_224, -+ HASH_ALGO_SHA512_256, -+}; ++ case CRYPTO_SHA2_256_HMAC: ++ hash_name = "hmac(sha256)"; ++ break; ++ case CRYPTO_SHA2_384_HMAC: ++ hash_name = "hmac(sha384)"; ++ break; ++ case CRYPTO_SHA2_512_HMAC: ++ hash_name = "hmac(sha512)"; ++ break; + -+enum rk_cipher_algo { -+ CIPHER_ALGO_DES, -+ CIPHER_ALGO_DES3_EDE, -+ CIPHER_ALGO_AES, -+ CIPHER_ALGO_SM4, -+}; ++ /* non-hmac cases */ ++ case CRYPTO_MD5: ++ hash_name = "md5"; ++ hmac_mode = 0; ++ break; ++ case CRYPTO_RIPEMD160: ++ hash_name = "rmd160"; ++ hmac_mode = 0; ++ break; ++ case CRYPTO_SHA1: ++ hash_name = "sha1"; ++ hmac_mode = 0; ++ break; ++ case CRYPTO_SHA2_224: ++ hash_name = "sha224"; ++ hmac_mode = 0; ++ break; ++ case CRYPTO_SHA2_256: ++ hash_name = "sha256"; ++ hmac_mode = 0; ++ break; ++ case CRYPTO_SHA2_384: ++ hash_name = "sha384"; ++ hmac_mode = 0; ++ break; ++ case CRYPTO_SHA2_512: ++ hash_name = "sha512"; ++ hmac_mode = 0; ++ break; ++ default: ++ hash_name = rk_get_hash_name(sop->mac, &hmac_mode); ++ if (!hash_name) { ++ ddebug(1, "bad mac: %d", sop->mac); ++ return -EINVAL; ++ } ++ break; ++ } + -+enum rk_cipher_mode { -+ CIPHER_MODE_ECB, -+ CIPHER_MODE_CBC, -+ CIPHER_MODE_CFB, -+ CIPHER_MODE_OFB, -+ CIPHER_MODE_CTR, -+ CIPHER_MODE_XTS, -+ CIPHER_MODE_CTS, -+ CIPHER_MODE_CCM, -+ CIPHER_MODE_GCM, -+ CIPHER_MODE_CMAC, -+ CIPHER_MODE_CBCMAC, -+}; ++ /* Create a session and put it to the list. Zeroing the structure helps ++ * also with a single exit point in case of errors */ ++ ses_new = kzalloc(sizeof(*ses_new), GFP_KERNEL); ++ if (!ses_new) ++ return -ENOMEM; + -+#define DES_MIN_KEY_SIZE DES_KEY_SIZE -+#define DES_MAX_KEY_SIZE DES_KEY_SIZE -+#define DES3_EDE_MIN_KEY_SIZE DES3_EDE_KEY_SIZE -+#define DES3_EDE_MAX_KEY_SIZE DES3_EDE_KEY_SIZE -+#define SM4_MIN_KEY_SIZE SM4_KEY_SIZE -+#define SM4_MAX_KEY_SIZE SM4_KEY_SIZE ++ /* Set-up crypto transform. */ ++ if (alg_name) { ++ unsigned int keylen; ++ ret = cryptodev_get_cipher_keylen(&keylen, sop, aead); ++ if (unlikely(ret < 0)) { ++ ddebug(1, "Setting key failed for %s-%zu.", ++ alg_name, (size_t)sop->keylen*8); ++ goto session_error; ++ } + -+#define MD5_BLOCK_SIZE SHA1_BLOCK_SIZE ++ ret = cryptodev_get_cipher_key(keys.ckey, sop, aead); ++ if (unlikely(ret < 0)) ++ goto session_error; + -+#define RK_AEAD_ALGO_INIT(cipher_algo, cipher_mode, algo_name, driver_name) {\ -+ .name = #algo_name,\ -+ .type = ALG_TYPE_AEAD,\ -+ .algo = CIPHER_ALGO_##cipher_algo,\ -+ .mode = CIPHER_MODE_##cipher_mode,\ -+ .alg.aead = {\ -+ .base.cra_name = #algo_name,\ -+ .base.cra_driver_name = #driver_name,\ -+ .base.cra_priority = RK_CRYPTO_PRIORITY,\ -+ .base.cra_flags = CRYPTO_ALG_TYPE_AEAD |\ -+ CRYPTO_ALG_KERN_DRIVER_ONLY |\ -+ CRYPTO_ALG_ASYNC |\ -+ CRYPTO_ALG_NEED_FALLBACK,\ -+ .base.cra_blocksize = 1,\ -+ .base.cra_ctxsize = sizeof(struct rk_cipher_ctx),\ -+ .base.cra_alignmask = 0x07,\ -+ .base.cra_module = THIS_MODULE,\ -+ .init = rk_aead_init_tfm,\ -+ .exit = rk_aead_exit_tfm,\ -+ .ivsize = GCM_AES_IV_SIZE,\ -+ .chunksize = cipher_algo##_BLOCK_SIZE,\ -+ .maxauthsize = AES_BLOCK_SIZE,\ -+ .setkey = rk_aead_setkey,\ -+ .setauthsize = rk_aead_gcm_setauthsize,\ -+ .encrypt = rk_aead_encrypt,\ -+ .decrypt = rk_aead_decrypt,\ -+ } \ -+} ++ ret = cryptodev_cipher_init(&ses_new->cdata, alg_name, keys.ckey, ++ keylen, stream, aead); ++ if (ret < 0) { ++ ddebug(1, "Failed to load cipher for %s", alg_name); ++ goto session_error; ++ } ++ } + -+#define RK_CIPHER_ALGO_INIT(cipher_algo, cipher_mode, algo_name, driver_name) {\ -+ .name = #algo_name,\ -+ .type = ALG_TYPE_CIPHER,\ -+ .algo = CIPHER_ALGO_##cipher_algo,\ -+ .mode = CIPHER_MODE_##cipher_mode,\ -+ .alg.crypto = {\ -+ .base.cra_name = #algo_name,\ -+ .base.cra_driver_name = #driver_name,\ -+ .base.cra_priority = RK_CRYPTO_PRIORITY,\ -+ .base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY |\ -+ CRYPTO_ALG_ASYNC |\ -+ CRYPTO_ALG_NEED_FALLBACK,\ -+ .base.cra_blocksize = cipher_algo##_BLOCK_SIZE,\ -+ .base.cra_ctxsize = sizeof(struct rk_cipher_ctx),\ -+ .base.cra_alignmask = 0x07,\ -+ .base.cra_module = THIS_MODULE,\ -+ .init = rk_ablk_init_tfm,\ -+ .exit = rk_ablk_exit_tfm,\ -+ .min_keysize = cipher_algo##_MIN_KEY_SIZE,\ -+ .max_keysize = cipher_algo##_MAX_KEY_SIZE,\ -+ .ivsize = cipher_algo##_BLOCK_SIZE,\ -+ .chunksize = cipher_algo##_BLOCK_SIZE,\ -+ .setkey = rk_cipher_setkey,\ -+ .encrypt = rk_cipher_encrypt,\ -+ .decrypt = rk_cipher_decrypt,\ -+ } \ -+} ++ if (hash_name && aead == 0) { ++ if (unlikely(sop->mackeylen > CRYPTO_HMAC_MAX_KEY_LEN)) { ++ ddebug(1, "Setting key failed for %s-%zu.", ++ hash_name, (size_t)sop->mackeylen*8); ++ ret = -EINVAL; ++ goto session_error; ++ } + -+#define RK_CIPHER_ALGO_XTS_INIT(cipher_algo, algo_name, driver_name) {\ -+ .name = #algo_name,\ -+ .type = ALG_TYPE_CIPHER,\ -+ .algo = CIPHER_ALGO_##cipher_algo,\ -+ .mode = CIPHER_MODE_XTS,\ -+ .alg.crypto = {\ -+ .base.cra_name = #algo_name,\ -+ .base.cra_driver_name = #driver_name,\ -+ .base.cra_priority = RK_CRYPTO_PRIORITY,\ -+ .base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY |\ -+ CRYPTO_ALG_ASYNC |\ -+ CRYPTO_ALG_NEED_FALLBACK,\ -+ .base.cra_blocksize = cipher_algo##_BLOCK_SIZE,\ -+ .base.cra_ctxsize = sizeof(struct rk_cipher_ctx),\ -+ .base.cra_alignmask = 0x07,\ -+ .base.cra_module = THIS_MODULE,\ -+ .init = rk_ablk_init_tfm,\ -+ .exit = rk_ablk_exit_tfm,\ -+ .min_keysize = cipher_algo##_MAX_KEY_SIZE,\ -+ .max_keysize = cipher_algo##_MAX_KEY_SIZE * 2,\ -+ .ivsize = cipher_algo##_BLOCK_SIZE,\ -+ .chunksize = cipher_algo##_BLOCK_SIZE,\ -+ .setkey = rk_cipher_setkey,\ -+ .encrypt = rk_cipher_encrypt,\ -+ .decrypt = rk_cipher_decrypt,\ -+ } \ -+} ++ if (sop->mackey && unlikely(copy_from_user(keys.mkey, sop->mackey, ++ sop->mackeylen))) { ++ ret = -EFAULT; ++ goto session_error; ++ } + -+#define RK_HASH_ALGO_INIT(hash_algo, algo_name) {\ -+ .name = #algo_name,\ -+ .type = ALG_TYPE_HASH,\ -+ .algo = HASH_ALGO_##hash_algo,\ -+ .alg.hash = {\ -+ .init = rk_ahash_init,\ -+ .update = rk_ahash_update,\ -+ .final = rk_ahash_final,\ -+ .finup = rk_ahash_finup,\ -+ .export = rk_ahash_export,\ -+ .import = rk_ahash_import,\ -+ .digest = rk_ahash_digest,\ -+ .halg = {\ -+ .digestsize = hash_algo##_DIGEST_SIZE,\ -+ .statesize = sizeof(struct algo_name##_state),\ -+ .base = {\ -+ .cra_name = #algo_name,\ -+ .cra_driver_name = #algo_name"-rk",\ -+ .cra_priority = RK_CRYPTO_PRIORITY,\ -+ .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY |\ -+ CRYPTO_ALG_ASYNC |\ -+ CRYPTO_ALG_NEED_FALLBACK,\ -+ .cra_blocksize = hash_algo##_BLOCK_SIZE,\ -+ .cra_ctxsize = sizeof(struct rk_ahash_ctx),\ -+ .cra_alignmask = 0,\ -+ .cra_init = rk_cra_hash_init,\ -+ .cra_exit = rk_cra_hash_exit,\ -+ .cra_module = THIS_MODULE,\ -+ } \ -+ } \ -+ } \ -+} ++ ret = cryptodev_hash_init(&ses_new->hdata, hash_name, hmac_mode, ++ keys.mkey, sop->mackeylen); ++ if (ret != 0) { ++ ddebug(1, "Failed to load hash for %s", hash_name); ++ goto session_error; ++ } + -+#define RK_HMAC_ALGO_INIT(hash_algo, algo_name) {\ -+ .name = "hmac(" #algo_name ")",\ -+ .type = ALG_TYPE_HMAC,\ -+ .algo = HASH_ALGO_##hash_algo,\ -+ .alg.hash = {\ -+ .init = rk_ahash_init,\ -+ .update = rk_ahash_update,\ -+ .final = rk_ahash_final,\ -+ .finup = rk_ahash_finup,\ -+ .export = rk_ahash_export,\ -+ .import = rk_ahash_import,\ -+ .digest = rk_ahash_digest,\ -+ .setkey = rk_ahash_hmac_setkey,\ -+ .halg = {\ -+ .digestsize = hash_algo##_DIGEST_SIZE,\ -+ .statesize = sizeof(struct algo_name##_state),\ -+ .base = {\ -+ .cra_name = "hmac(" #algo_name ")",\ -+ .cra_driver_name = "hmac-" #algo_name "-rk",\ -+ .cra_priority = RK_CRYPTO_PRIORITY,\ -+ .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY |\ -+ CRYPTO_ALG_ASYNC |\ -+ CRYPTO_ALG_NEED_FALLBACK,\ -+ .cra_blocksize = hash_algo##_BLOCK_SIZE,\ -+ .cra_ctxsize = sizeof(struct rk_ahash_ctx),\ -+ .cra_alignmask = 0,\ -+ .cra_init = rk_cra_hash_init,\ -+ .cra_exit = rk_cra_hash_exit,\ -+ .cra_module = THIS_MODULE,\ -+ } \ -+ } \ -+ } \ -+} ++ ret = cryptodev_hash_reset(&ses_new->hdata); ++ if (ret != 0) { ++ goto session_error; ++ } ++ } + -+#define IS_TYPE_HMAC(type) ((type) == ALG_TYPE_HMAC) ++ ses_new->alignmask = max(ses_new->cdata.alignmask, ++ ses_new->hdata.alignmask); ++ ddebug(2, "got alignmask %d", ses_new->alignmask); + -+#define CRYPTO_READ(dev, offset) \ -+ readl_relaxed(((dev)->reg + (offset))) -+#define CRYPTO_WRITE(dev, offset, val) \ -+ writel_relaxed((val), ((dev)->reg + (offset))) ++ ses_new->array_size = DEFAULT_PREALLOC_PAGES; ++ ddebug(2, "preallocating for %d user pages", ses_new->array_size); ++ ses_new->pages = kzalloc(ses_new->array_size * ++ sizeof(struct page *), GFP_KERNEL); ++ ses_new->sg = kzalloc(ses_new->array_size * ++ sizeof(struct scatterlist), GFP_KERNEL); ++ if (ses_new->sg == NULL || ses_new->pages == NULL) { ++ ddebug(0, "Memory error"); ++ ret = -ENOMEM; ++ goto session_error; ++ } + -+#ifdef DEBUG -+#define CRYPTO_TRACE(format, ...) pr_err("[%s, %05d]-trace: " format "\n", \ -+ __func__, __LINE__, ##__VA_ARGS__) -+#define CRYPTO_MSG(format, ...) pr_err("[%s, %05d]-msg:" format "\n", \ -+ __func__, __LINE__, ##__VA_ARGS__) -+#define CRYPTO_DUMPHEX(var_name, data, len) print_hex_dump(KERN_CONT, (var_name), \ -+ DUMP_PREFIX_OFFSET, \ -+ 16, 1, (data), (len), false) -+#else -+#define CRYPTO_TRACE(format, ...) -+#define CRYPTO_MSG(format, ...) -+#define CRYPTO_DUMPHEX(var_name, data, len) -+#endif ++ /* Non-multithreaded can only create one session */ ++ if (!rk_cryptodev_multi_thread(NULL) && ++ !atomic_dec_and_test(&cryptodev_sess)) { ++ atomic_inc(&cryptodev_sess); ++ ddebug(2, "Non-multithreaded can only create one session. sess = %d", ++ atomic_read(&cryptodev_sess)); ++ ret = -EBUSY; ++ goto session_error; ++ } + -+#endif ++ /* put the new session to the list */ ++ get_random_bytes(&ses_new->sid, sizeof(ses_new->sid)); ++ mutex_init(&ses_new->sem); + -diff --git a/drivers/crypto/rockchip/rk_crypto_skcipher_utils.c b/drivers/crypto/rockchip/rk_crypto_skcipher_utils.c -new file mode 100644 -index 000000000..7d8d0aafa ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_skcipher_utils.c -@@ -0,0 +1,478 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Rockchip crypto skcipher uitls -+ * -+ * Copyright (c) 2022, Rockchip Electronics Co., Ltd -+ * -+ * Author: Lin Jinhan -+ * -+ */ ++ mutex_lock(&fcr->sem); ++restart: ++ list_for_each_entry(ses_ptr, &fcr->list, entry) { ++ /* Check for duplicate SID */ ++ if (unlikely(ses_new->sid == ses_ptr->sid)) { ++ get_random_bytes(&ses_new->sid, sizeof(ses_new->sid)); ++ /* Unless we have a broken RNG this ++ shouldn't loop forever... ;-) */ ++ goto restart; ++ } ++ } + -+#include "rk_crypto_skcipher_utils.h" ++ list_add(&ses_new->entry, &fcr->list); ++ mutex_unlock(&fcr->sem); + -+struct rk_crypto_algt *rk_cipher_get_algt(struct crypto_skcipher *tfm) -+{ -+ struct skcipher_alg *alg = crypto_skcipher_alg(tfm); ++ /* Fill in some values for the user. */ ++ sop->ses = ses_new->sid; ++ return 0; + -+ return container_of(alg, struct rk_crypto_algt, alg.crypto); ++ /* We count on ses_new to be initialized with zeroes ++ * Since hdata and cdata are embedded within ses_new, it follows that ++ * hdata->init and cdata->init are either zero or one as they have been ++ * initialized or not */ ++session_error: ++ cryptodev_hash_deinit(&ses_new->hdata); ++ cryptodev_cipher_deinit(&ses_new->cdata); ++ kfree(ses_new->sg); ++ kfree(ses_new->pages); ++ kfree(ses_new); ++ return ret; +} + -+struct rk_crypto_algt *rk_aead_get_algt(struct crypto_aead *tfm) ++/* Everything that needs to be done when removing a session. */ ++static inline void ++crypto_destroy_session(struct csession *ses_ptr) +{ -+ struct aead_alg *alg = crypto_aead_alg(tfm); ++ if (!mutex_trylock(&ses_ptr->sem)) { ++ ddebug(2, "Waiting for semaphore of sid=0x%08X", ses_ptr->sid); ++ mutex_lock(&ses_ptr->sem); ++ } ++ ddebug(2, "Removed session 0x%08X", ses_ptr->sid); ++ cryptodev_cipher_deinit(&ses_ptr->cdata); ++ cryptodev_hash_deinit(&ses_ptr->hdata); ++ ddebug(2, "freeing space for %d user pages", ses_ptr->array_size); ++ kfree(ses_ptr->pages); ++ kfree(ses_ptr->sg); ++ mutex_unlock(&ses_ptr->sem); ++ mutex_destroy(&ses_ptr->sem); ++ kfree(ses_ptr); + -+ return container_of(alg, struct rk_crypto_algt, alg.aead); ++ /* Non-multithreaded can only create one session */ ++ if (!rk_cryptodev_multi_thread(NULL)) { ++ atomic_inc(&cryptodev_sess); ++ ddebug(2, "Release cryptodev_sess = %d", atomic_read(&cryptodev_sess)); ++ } +} + -+struct rk_cipher_ctx *rk_cipher_ctx_cast(struct rk_crypto_dev *rk_dev) ++/* Look up a session by ID and remove. */ ++static int ++crypto_finish_session(struct fcrypt *fcr, uint32_t sid) +{ -+ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(rk_dev->async_req->tfm); ++ struct csession *tmp, *ses_ptr; ++ struct list_head *head; ++ int ret = 0; + -+ return ctx; -+} ++ mutex_lock(&fcr->sem); ++ head = &fcr->list; ++ list_for_each_entry_safe(ses_ptr, tmp, head, entry) { ++ if (ses_ptr->sid == sid) { ++ list_del(&ses_ptr->entry); ++ crypto_destroy_session(ses_ptr); ++ break; ++ } ++ } + -+struct rk_alg_ctx *rk_cipher_alg_ctx(struct rk_crypto_dev *rk_dev) -+{ -+ return &(rk_cipher_ctx_cast(rk_dev)->algs_ctx); -+} ++ if (unlikely(!ses_ptr)) { ++ derr(1, "Session with sid=0x%08X not found!", sid); ++ ret = -ENOENT; ++ } ++ mutex_unlock(&fcr->sem); + -+static bool is_no_multi_blocksize(uint32_t mode) -+{ -+ return (mode == CIPHER_MODE_CFB || -+ mode == CIPHER_MODE_OFB || -+ mode == CIPHER_MODE_CTR || -+ mode == CIPHER_MODE_XTS || -+ mode == CIPHER_MODE_GCM) ? true : false; ++ return ret; +} + -+int rk_cipher_fallback(struct skcipher_request *req, struct rk_cipher_ctx *ctx, bool encrypt) ++/* Remove all sessions when closing the file */ ++static int ++crypto_finish_all_sessions(struct fcrypt *fcr) +{ -+ int ret; ++ struct csession *tmp, *ses_ptr; ++ struct list_head *head; + -+ CRYPTO_MSG("use fallback tfm"); ++ mutex_lock(&fcr->sem); + -+ if (!ctx->fallback_tfm) { -+ ret = -ENODEV; -+ CRYPTO_MSG("fallback_tfm is empty!\n"); -+ goto exit; ++ head = &fcr->list; ++ list_for_each_entry_safe(ses_ptr, tmp, head, entry) { ++ list_del(&ses_ptr->entry); ++ crypto_destroy_session(ses_ptr); + } ++ mutex_unlock(&fcr->sem); + -+ if (!ctx->fallback_key_inited) { -+ ret = crypto_skcipher_setkey(ctx->fallback_tfm, -+ ctx->key, ctx->keylen); -+ if (ret) { -+ CRYPTO_MSG("fallback crypto_skcipher_setkey err = %d\n", -+ ret); -+ goto exit; -+ } -+ -+ ctx->fallback_key_inited = true; -+ } ++ return 0; ++} + -+ skcipher_request_set_tfm(&ctx->fallback_req, ctx->fallback_tfm); -+ skcipher_request_set_callback(&ctx->fallback_req, -+ req->base.flags, -+ req->base.complete, -+ req->base.data); ++/* Look up session by session ID. The returned session is locked. */ ++struct csession * ++crypto_get_session_by_sid(struct fcrypt *fcr, uint32_t sid) ++{ ++ struct csession *ses_ptr, *retval = NULL; + -+ skcipher_request_set_crypt(&ctx->fallback_req, req->src, -+ req->dst, req->cryptlen, req->iv); ++ if (unlikely(fcr == NULL)) ++ return NULL; + -+ ret = encrypt ? crypto_skcipher_encrypt(&ctx->fallback_req) : -+ crypto_skcipher_decrypt(&ctx->fallback_req); ++ mutex_lock(&fcr->sem); ++ list_for_each_entry(ses_ptr, &fcr->list, entry) { ++ if (ses_ptr->sid == sid) { ++ mutex_lock(&ses_ptr->sem); ++ retval = ses_ptr; ++ break; ++ } ++ } ++ mutex_unlock(&fcr->sem); + -+exit: -+ return ret; ++ return retval; +} + -+/* increment counter (128-bit int) by 1 */ -+static void rk_ctr128_inc(uint8_t *counter) ++static void mutex_lock_double(struct mutex *a, struct mutex *b) +{ -+ u32 n = 16; -+ u8 c; ++ if (b < a) ++ swap(a, b); + -+ do { -+ --n; -+ c = counter[n]; -+ ++c; -+ counter[n] = c; -+ if (c) -+ return; -+ } while (n); ++ mutex_lock(a); ++ mutex_lock_nested(b, SINGLE_DEPTH_NESTING); +} + -+static void rk_ctr128_calc(uint8_t *counter, uint32_t data_len) ++int ++crypto_get_sessions_by_sid(struct fcrypt *fcr, ++ uint32_t sid_1, struct csession **ses_ptr_1, ++ uint32_t sid_2, struct csession **ses_ptr_2) +{ -+ u32 i; -+ u32 chunksize = AES_BLOCK_SIZE; -+ -+ for (i = 0; i < DIV_ROUND_UP(data_len, chunksize); i++) -+ rk_ctr128_inc(counter); -+} ++ struct csession *ses_ptr; ++ int retval; + -+static uint32_t rk_get_new_iv(struct rk_cipher_ctx *ctx, u32 mode, bool is_enc, uint8_t *iv) -+{ -+ struct scatterlist *sg_dst; -+ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; -+ uint32_t ivsize = alg_ctx->chunk_size; ++ if (unlikely(fcr == NULL)) { ++ retval = -ENOENT; ++ goto out; ++ } + -+ if (!iv) -+ return 0; ++ if (sid_1 == sid_2) { ++ retval = -EDEADLK; ++ goto out; ++ } + -+ sg_dst = alg_ctx->aligned ? alg_ctx->sg_dst : &alg_ctx->sg_tmp; ++ mutex_lock(&fcr->sem); + -+ CRYPTO_TRACE("aligned = %u, count = %u, ivsize = %u, is_enc = %d\n", -+ alg_ctx->aligned, alg_ctx->count, ivsize, is_enc); ++ list_for_each_entry(ses_ptr, &fcr->list, entry) { ++ if (ses_ptr->sid == sid_1) ++ *ses_ptr_1 = ses_ptr; ++ else if (ses_ptr->sid == sid_2) ++ *ses_ptr_2 = ses_ptr; ++ } + -+ switch (mode) { -+ case CIPHER_MODE_CTR: -+ rk_ctr128_calc(iv, alg_ctx->count); -+ break; -+ case CIPHER_MODE_CBC: -+ case CIPHER_MODE_CFB: -+ if (is_enc) -+ sg_pcopy_to_buffer(sg_dst, alg_ctx->map_nents, -+ iv, ivsize, alg_ctx->count - ivsize); -+ else -+ memcpy(iv, ctx->lastc, ivsize); -+ break; -+ case CIPHER_MODE_OFB: -+ sg_pcopy_to_buffer(sg_dst, alg_ctx->map_nents, -+ iv, ivsize, alg_ctx->count - ivsize); -+ crypto_xor(iv, ctx->lastc, ivsize); -+ break; -+ default: -+ return 0; ++ if (*ses_ptr_1 && *ses_ptr_2) { ++ mutex_lock_double(&(*ses_ptr_1)->sem, &(*ses_ptr_2)->sem); ++ retval = 0; ++ } else { ++ retval = -ENOENT; + } + -+ return ivsize; ++ mutex_unlock(&fcr->sem); ++ ++out: ++ if (retval) { ++ *ses_ptr_1 = NULL; ++ *ses_ptr_2 = NULL; ++ } ++ return retval; +} + -+static void rk_iv_copyback(struct rk_crypto_dev *rk_dev) ++#ifdef CIOCCPHASH ++/* Copy the hash state from one session to another */ ++static int ++crypto_copy_hash_state(struct fcrypt *fcr, uint32_t dst_sid, uint32_t src_sid) +{ -+ uint32_t iv_size; -+ struct skcipher_request *req = skcipher_request_cast(rk_dev->async_req); -+ struct rk_cipher_ctx *ctx = rk_cipher_ctx_cast(rk_dev); -+ struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); -+ struct rk_crypto_algt *algt = rk_cipher_get_algt(cipher); ++ struct csession *src_ses, *dst_ses; ++ int ret; + -+ iv_size = rk_get_new_iv(ctx, algt->mode, ctx->is_enc, ctx->iv); ++ ret = crypto_get_sessions_by_sid(fcr, src_sid, &src_ses, ++ dst_sid, &dst_ses); ++ if (unlikely(ret)) { ++ derr(1, "Failed to get sesssions with sid=0x%08X sid=%0x08X!", ++ src_sid, dst_sid); ++ return ret; ++ } + -+ if (iv_size && req->iv) -+ memcpy(req->iv, ctx->iv, iv_size); ++ ret = cryptodev_hash_copy(&dst_ses->hdata, &src_ses->hdata); ++ crypto_put_session(src_ses); ++ crypto_put_session(dst_ses); ++ return ret; +} ++#endif /* CIOCCPHASH */ + -+static void rk_update_iv(struct rk_crypto_dev *rk_dev) ++static void cryptask_routine(struct work_struct *work) +{ -+ uint32_t iv_size; -+ struct rk_cipher_ctx *ctx = rk_cipher_ctx_cast(rk_dev); -+ struct rk_alg_ctx *algs_ctx = &ctx->algs_ctx; -+ struct skcipher_request *req = skcipher_request_cast(rk_dev->async_req); -+ struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); -+ struct rk_crypto_algt *algt = rk_cipher_get_algt(cipher); ++ struct crypt_priv *pcr = container_of(work, struct crypt_priv, cryptask); ++ struct todo_list_item *item; ++ LIST_HEAD(tmp); + -+ iv_size = rk_get_new_iv(ctx, algt->mode, ctx->is_enc, ctx->iv); ++ /* fetch all pending jobs into the temporary list */ ++ mutex_lock(&pcr->todo.lock); ++ list_cut_position(&tmp, &pcr->todo.list, pcr->todo.list.prev); ++ mutex_unlock(&pcr->todo.lock); + -+ if (iv_size) -+ algs_ctx->ops.hw_write_iv(rk_dev, ctx->iv, iv_size); -+} ++ /* handle each job locklessly */ ++ list_for_each_entry(item, &tmp, __hook) { ++ item->result = crypto_run(&pcr->fcrypt, &item->kcop); ++ if (unlikely(item->result)) ++ derr(0, "crypto_run() failed: %d", item->result); ++ } + -+static int rk_set_data_start(struct rk_crypto_dev *rk_dev) -+{ -+ int err; -+ struct rk_alg_ctx *alg_ctx = rk_cipher_alg_ctx(rk_dev); ++ /* push all handled jobs to the done list at once */ ++ mutex_lock(&pcr->done.lock); ++ list_splice_tail(&tmp, &pcr->done.list); ++ mutex_unlock(&pcr->done.lock); + -+ err = rk_dev->load_data(rk_dev, alg_ctx->sg_src, alg_ctx->sg_dst); -+ if (!err) { -+ u32 ivsize = alg_ctx->chunk_size; -+ struct scatterlist *src_sg; -+ struct rk_cipher_ctx *ctx = rk_cipher_ctx_cast(rk_dev); ++ /* wake for POLLIN */ ++ wake_up_interruptible(&pcr->user_waiter); ++} + -+ memset(ctx->lastc, 0x00, sizeof(ctx->lastc)); ++/* ====== /dev/crypto ====== */ ++static atomic_t cryptodev_node = ATOMIC_INIT(1); + -+ src_sg = alg_ctx->aligned ? alg_ctx->sg_src : &alg_ctx->sg_tmp; ++static int ++cryptodev_open(struct inode *inode, struct file *filp) ++{ ++ struct todo_list_item *tmp, *tmp_next; ++ struct crypt_priv *pcr; ++ int i; + -+ ivsize = alg_ctx->count > ivsize ? ivsize : alg_ctx->count; ++ /* Non-multithreaded can only be opened once */ ++ if (!rk_cryptodev_multi_thread(NULL) && ++ !atomic_dec_and_test(&cryptodev_node)) { ++ atomic_inc(&cryptodev_node); ++ ddebug(2, "Non-multithreaded can only be opened once. node = %d", ++ atomic_read(&cryptodev_node)); ++ return -EBUSY; ++ } + -+ sg_pcopy_to_buffer(src_sg, alg_ctx->map_nents, -+ ctx->lastc, ivsize, alg_ctx->count - ivsize); ++ /* make sure sess == 1 after open */ ++ atomic_set(&cryptodev_sess, 1); + -+ alg_ctx->ops.hw_dma_start(rk_dev, true); -+ } ++ pcr = kzalloc(sizeof(*pcr), GFP_KERNEL); ++ if (!pcr) ++ return -ENOMEM; ++ filp->private_data = pcr; + -+ return err; -+} ++ mutex_init(&pcr->fcrypt.sem); ++ mutex_init(&pcr->free.lock); ++ mutex_init(&pcr->todo.lock); ++ mutex_init(&pcr->done.lock); + -+int rk_cipher_setkey(struct crypto_skcipher *cipher, const u8 *key, unsigned int keylen) -+{ -+ struct rk_crypto_algt *algt = rk_cipher_get_algt(cipher); -+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher); -+ uint32_t key_factor; -+ int ret = -EINVAL; ++ INIT_LIST_HEAD(&pcr->fcrypt.list); ++ INIT_LIST_HEAD(&pcr->fcrypt.dma_map_list); ++ INIT_LIST_HEAD(&pcr->free.list); ++ INIT_LIST_HEAD(&pcr->todo.list); ++ INIT_LIST_HEAD(&pcr->done.list); + -+ CRYPTO_MSG("algo = %x, mode = %x, key_len = %d\n", -+ algt->algo, algt->mode, keylen); ++ INIT_WORK(&pcr->cryptask, cryptask_routine); + -+ /* The key length of XTS is twice the normal length */ -+ key_factor = algt->mode == CIPHER_MODE_XTS ? 2 : 1; ++ init_waitqueue_head(&pcr->user_waiter); + -+ switch (algt->algo) { -+ case CIPHER_ALGO_DES: -+ ret = verify_skcipher_des_key(cipher, key); -+ if (ret) -+ goto exit; -+ break; -+ case CIPHER_ALGO_DES3_EDE: -+ ret = verify_skcipher_des3_key(cipher, key); -+ if (ret) -+ goto exit; -+ break; -+ case CIPHER_ALGO_AES: -+ if (keylen != (AES_KEYSIZE_128 * key_factor) && -+ keylen != (AES_KEYSIZE_192 * key_factor) && -+ keylen != (AES_KEYSIZE_256 * key_factor)) -+ goto exit; -+ break; -+ case CIPHER_ALGO_SM4: -+ if (keylen != (SM4_KEY_SIZE * key_factor)) -+ goto exit; -+ break; -+ default: -+ ret = -EINVAL; -+ goto exit; ++ for (i = 0; i < DEF_COP_RINGSIZE; i++) { ++ tmp = kzalloc(sizeof(struct todo_list_item), GFP_KERNEL); ++ if (!tmp) ++ goto err_ringalloc; ++ pcr->itemcount++; ++ ddebug(2, "allocated new item at %p", tmp); ++ list_add(&tmp->__hook, &pcr->free.list); + } + -+ memcpy(ctx->key, key, keylen); -+ ctx->keylen = keylen; -+ ctx->fallback_key_inited = false; ++ ddebug(2, "Cryptodev handle initialised, %d elements in queue", ++ DEF_COP_RINGSIZE); ++ return 0; + -+ ret = 0; -+exit: -+ return ret; ++/* In case of errors, free any memory allocated so far */ ++err_ringalloc: ++ list_for_each_entry_safe(tmp, tmp_next, &pcr->free.list, __hook) { ++ list_del(&tmp->__hook); ++ kfree(tmp); ++ } ++ mutex_destroy(&pcr->done.lock); ++ mutex_destroy(&pcr->todo.lock); ++ mutex_destroy(&pcr->free.lock); ++ mutex_destroy(&pcr->fcrypt.sem); ++ kfree(pcr); ++ filp->private_data = NULL; ++ return -ENOMEM; +} + -+int rk_ablk_rx(struct rk_crypto_dev *rk_dev) ++static int ++cryptodev_release(struct inode *inode, struct file *filp) +{ -+ int err = 0; -+ struct rk_cipher_ctx *ctx = rk_cipher_ctx_cast(rk_dev); -+ struct rk_alg_ctx *alg_ctx = rk_cipher_alg_ctx(rk_dev); -+ -+ CRYPTO_TRACE("left_bytes = %u\n", alg_ctx->left_bytes); -+ -+ err = rk_dev->unload_data(rk_dev); -+ if (err) -+ goto out_rx; -+ -+ if (alg_ctx->left_bytes) { -+ rk_update_iv(rk_dev); -+ if (alg_ctx->aligned) { -+ if (sg_is_last(alg_ctx->sg_src)) { -+ dev_err(rk_dev->dev, "[%s:%d] Lack of data\n", -+ __func__, __LINE__); -+ err = -ENOMEM; -+ goto out_rx; -+ } -+ alg_ctx->sg_src = sg_next(alg_ctx->sg_src); -+ alg_ctx->sg_dst = sg_next(alg_ctx->sg_dst); -+ } -+ err = rk_set_data_start(rk_dev); -+ } else { -+ if (alg_ctx->is_aead) { -+ u8 hard_tag[RK_MAX_TAG_SIZE]; -+ u8 user_tag[RK_MAX_TAG_SIZE]; -+ struct aead_request *req = -+ aead_request_cast(rk_dev->async_req); -+ struct crypto_aead *tfm = crypto_aead_reqtfm(req); ++ struct crypt_priv *pcr = filp->private_data; ++ struct todo_list_item *item, *item_safe; ++ int items_freed = 0; + -+ unsigned int authsize = crypto_aead_authsize(tfm); ++ if (!pcr) ++ return 0; + -+ CRYPTO_TRACE("cryptlen = %u, assoclen = %u, aead authsize = %u", -+ alg_ctx->total, alg_ctx->assoclen, authsize); ++ /* Non-multithreaded can only be opened once */ ++ if (!rk_cryptodev_multi_thread(NULL)) { ++ atomic_inc(&cryptodev_node); ++ ddebug(2, "Release cryptodev_node = %d", atomic_read(&cryptodev_node)); ++ } + -+ err = alg_ctx->ops.hw_get_result(rk_dev, hard_tag, authsize); -+ if (err) -+ goto out_rx; ++ cancel_work_sync(&pcr->cryptask); + -+ CRYPTO_DUMPHEX("hard_tag", hard_tag, authsize); -+ if (!ctx->is_enc) { -+ if (!sg_pcopy_to_buffer(alg_ctx->req_src, -+ sg_nents(alg_ctx->req_src), -+ user_tag, authsize, -+ alg_ctx->total + -+ alg_ctx->assoclen)) { -+ err = -EINVAL; -+ goto out_rx; -+ } ++ list_splice_tail(&pcr->todo.list, &pcr->free.list); ++ list_splice_tail(&pcr->done.list, &pcr->free.list); + -+ CRYPTO_DUMPHEX("user_tag", user_tag, authsize); -+ err = crypto_memneq(user_tag, hard_tag, authsize) ? -EBADMSG : 0; -+ } else { -+ if (!sg_pcopy_from_buffer(alg_ctx->req_dst, -+ sg_nents(alg_ctx->req_dst), -+ hard_tag, authsize, -+ alg_ctx->total + -+ alg_ctx->assoclen)) { -+ err = -EINVAL; -+ goto out_rx; -+ } -+ } -+ } else { -+ rk_iv_copyback(rk_dev); -+ } ++ list_for_each_entry_safe(item, item_safe, &pcr->free.list, __hook) { ++ ddebug(2, "freeing item at %p", item); ++ list_del(&item->__hook); ++ kfree(item); ++ items_freed++; + } -+out_rx: -+ return err; -+} + -+int rk_ablk_start(struct rk_crypto_dev *rk_dev) -+{ -+ struct skcipher_request *req = -+ skcipher_request_cast(rk_dev->async_req); -+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -+ struct rk_crypto_algt *algt = rk_cipher_get_algt(tfm); -+ struct rk_alg_ctx *alg_ctx = rk_cipher_alg_ctx(rk_dev); -+ int err = 0; ++ if (items_freed != pcr->itemcount) { ++ derr(0, "freed %d items, but %d should exist!", ++ items_freed, pcr->itemcount); ++ } + -+ alg_ctx->left_bytes = req->cryptlen; -+ alg_ctx->total = req->cryptlen; -+ alg_ctx->sg_src = req->src; -+ alg_ctx->req_src = req->src; -+ alg_ctx->src_nents = sg_nents_for_len(req->src, req->cryptlen); -+ alg_ctx->sg_dst = req->dst; -+ alg_ctx->req_dst = req->dst; -+ alg_ctx->dst_nents = sg_nents_for_len(req->dst, req->cryptlen); ++ crypto_finish_all_sessions(&pcr->fcrypt); + -+ CRYPTO_TRACE("total = %u", alg_ctx->total); ++ mutex_destroy(&pcr->done.lock); ++ mutex_destroy(&pcr->todo.lock); ++ mutex_destroy(&pcr->free.lock); ++ mutex_destroy(&pcr->fcrypt.sem); + -+ alg_ctx->ops.hw_init(rk_dev, algt->algo, algt->mode); -+ err = rk_set_data_start(rk_dev); ++ kfree(pcr); ++ filp->private_data = NULL; + -+ return err; ++ ddebug(2, "Cryptodev handle deinitialised, %d elements freed", ++ items_freed); ++ return 0; +} + -+int rk_skcipher_handle_req(struct rk_crypto_dev *rk_dev, struct skcipher_request *req) ++#ifdef ENABLE_ASYNC ++/* enqueue a job for asynchronous completion ++ * ++ * returns: ++ * -EBUSY when there are no free queue slots left ++ * (and the number of slots has reached it MAX_COP_RINGSIZE) ++ * -EFAULT when there was a memory allocation error ++ * 0 on success */ ++static int crypto_async_run(struct crypt_priv *pcr, struct kernel_crypt_op *kcop) +{ -+ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm); -+ struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); -+ struct rk_crypto_algt *algt = rk_cipher_get_algt(cipher); ++ struct todo_list_item *item = NULL; + -+ if (!IS_ALIGNED(req->cryptlen, ctx->algs_ctx.chunk_size) && -+ !is_no_multi_blocksize(algt->mode)) ++ if (unlikely(kcop->cop.flags & COP_FLAG_NO_ZC)) + return -EINVAL; -+ else -+ return rk_dev->enqueue(rk_dev, &req->base); -+} + -+int rk_aead_fallback(struct aead_request *req, struct rk_cipher_ctx *ctx, bool encrypt) -+{ -+ int ret; -+ struct aead_request *subreq = aead_request_ctx(req); ++ mutex_lock(&pcr->free.lock); ++ if (likely(!list_empty(&pcr->free.list))) { ++ item = list_first_entry(&pcr->free.list, ++ struct todo_list_item, __hook); ++ list_del(&item->__hook); ++ } else if (pcr->itemcount < MAX_COP_RINGSIZE) { ++ pcr->itemcount++; ++ } else { ++ mutex_unlock(&pcr->free.lock); ++ return -EBUSY; ++ } ++ mutex_unlock(&pcr->free.lock); + -+ if (!ctx->fallback_aead) { -+ CRYPTO_TRACE("fallback_tfm is empty"); -+ return -EINVAL; ++ if (unlikely(!item)) { ++ item = kzalloc(sizeof(struct todo_list_item), GFP_KERNEL); ++ if (unlikely(!item)) ++ return -EFAULT; ++ dinfo(1, "increased item count to %d", pcr->itemcount); + } + -+ CRYPTO_MSG("use fallback tfm"); ++ memcpy(&item->kcop, kcop, sizeof(struct kernel_crypt_op)); + -+ if (!ctx->fallback_key_inited) { -+ ret = crypto_aead_setkey(ctx->fallback_aead, ctx->key, ctx->keylen); -+ if (ret) { -+ CRYPTO_MSG("fallback crypto_skcipher_setkey err = %d\n", ret); -+ goto exit; -+ } ++ mutex_lock(&pcr->todo.lock); ++ list_add_tail(&item->__hook, &pcr->todo.list); ++ mutex_unlock(&pcr->todo.lock); + -+ ctx->fallback_key_inited = true; ++ queue_work(cryptodev_wq, &pcr->cryptask); ++ return 0; ++} ++ ++/* get the first completed job from the "done" queue ++ * ++ * returns: ++ * -EBUSY if no completed jobs are ready (yet) ++ * the return value of crypto_run() otherwise */ ++static int crypto_async_fetch(struct crypt_priv *pcr, ++ struct kernel_crypt_op *kcop) ++{ ++ struct todo_list_item *item; ++ int retval; ++ ++ mutex_lock(&pcr->done.lock); ++ if (list_empty(&pcr->done.list)) { ++ mutex_unlock(&pcr->done.lock); ++ return -EBUSY; + } ++ item = list_first_entry(&pcr->done.list, struct todo_list_item, __hook); ++ list_del(&item->__hook); ++ mutex_unlock(&pcr->done.lock); + -+ aead_request_set_tfm(subreq, ctx->fallback_aead); -+ aead_request_set_callback(subreq, req->base.flags, req->base.complete, req->base.data); -+ aead_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, req->iv); -+ aead_request_set_ad(subreq, req->assoclen); ++ memcpy(kcop, &item->kcop, sizeof(struct kernel_crypt_op)); ++ retval = item->result; + -+ ret = encrypt ? crypto_aead_encrypt(subreq) : crypto_aead_decrypt(subreq); ++ mutex_lock(&pcr->free.lock); ++ list_add_tail(&item->__hook, &pcr->free.list); ++ mutex_unlock(&pcr->free.lock); + -+exit: -+ return ret; ++ /* wake for POLLOUT */ ++ wake_up_interruptible(&pcr->user_waiter); ++ ++ return retval; +} ++#endif + -+int rk_aead_setkey(struct crypto_aead *cipher, const u8 *key, unsigned int keylen) ++/* this function has to be called from process context */ ++static int fill_kcop_from_cop(struct kernel_crypt_op *kcop, struct fcrypt *fcr) +{ -+ struct crypto_tfm *tfm = crypto_aead_tfm(cipher); -+ struct rk_crypto_algt *algt = rk_aead_get_algt(cipher); -+ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(tfm); -+ int ret = -EINVAL; ++ struct crypt_op *cop = &kcop->cop; ++ struct csession *ses_ptr; ++ int rc; + -+ CRYPTO_MSG("algo = %x, mode = %x, key_len = %d\n", algt->algo, algt->mode, keylen); ++ /* this also enters ses_ptr->sem */ ++ ses_ptr = crypto_get_session_by_sid(fcr, cop->ses); ++ if (unlikely(!ses_ptr)) { ++ derr(1, "invalid session ID=0x%08X", cop->ses); ++ return -EINVAL; ++ } ++ kcop->ivlen = cop->iv ? ses_ptr->cdata.ivsize : 0; ++ kcop->digestsize = 0; /* will be updated during operation */ + -+ switch (algt->algo) { -+ case CIPHER_ALGO_AES: -+ if (keylen != AES_KEYSIZE_128 && -+ keylen != AES_KEYSIZE_192 && -+ keylen != AES_KEYSIZE_256) -+ goto error; ++ crypto_put_session(ses_ptr); + -+ break; -+ case CIPHER_ALGO_SM4: -+ if (keylen != SM4_KEY_SIZE) -+ goto error; ++ kcop->task = current; ++ kcop->mm = current->mm; + -+ break; -+ default: -+ CRYPTO_TRACE(); -+ goto error; ++ if (cop->iv) { ++ rc = copy_from_user(kcop->iv, cop->iv, kcop->ivlen); ++ if (unlikely(rc)) { ++ derr(1, "error copying IV (%d bytes), copy_from_user returned %d for address %p", ++ kcop->ivlen, rc, cop->iv); ++ return -EFAULT; ++ } + } + -+ memcpy(ctx->key, key, keylen); -+ ctx->keylen = keylen; -+ ctx->fallback_key_inited = false; -+ + return 0; -+ -+error: -+ return ret; +} + -+int rk_aead_start(struct rk_crypto_dev *rk_dev) ++/* this function has to be called from process context */ ++static int fill_cop_from_kcop(struct kernel_crypt_op *kcop, struct fcrypt *fcr) +{ -+ struct aead_request *req = aead_request_cast(rk_dev->async_req); -+ struct crypto_aead *tfm = crypto_aead_reqtfm(req); -+ struct rk_cipher_ctx *ctx = crypto_aead_ctx(tfm); -+ struct rk_crypto_algt *algt = rk_aead_get_algt(tfm); -+ struct rk_alg_ctx *alg_ctx = rk_cipher_alg_ctx(rk_dev); -+ unsigned int total = 0, authsize; -+ int err = 0; ++ int ret; + -+ total = req->cryptlen + req->assoclen; ++ if (kcop->digestsize) { ++ ret = copy_to_user(kcop->cop.mac, ++ kcop->hash_output, kcop->digestsize); ++ if (unlikely(ret)) ++ return -EFAULT; ++ } ++ if (kcop->ivlen && kcop->cop.flags & COP_FLAG_WRITE_IV) { ++ ret = copy_to_user(kcop->cop.iv, ++ kcop->iv, kcop->ivlen); ++ if (unlikely(ret)) ++ return -EFAULT; ++ } ++ return 0; ++} + -+ authsize = ctx->is_enc ? 0 : crypto_aead_authsize(tfm); ++static int kcop_from_user(struct kernel_crypt_op *kcop, ++ struct fcrypt *fcr, void __user *arg) ++{ ++ if (unlikely(copy_from_user(&kcop->cop, arg, sizeof(kcop->cop)))) ++ return -EFAULT; + -+ alg_ctx->total = req->cryptlen - authsize; -+ alg_ctx->assoclen = req->assoclen; -+ alg_ctx->sg_src = req->src; -+ alg_ctx->req_src = req->src; -+ alg_ctx->src_nents = sg_nents_for_len(req->src, total); -+ alg_ctx->sg_dst = req->dst; -+ alg_ctx->req_dst = req->dst; -+ alg_ctx->dst_nents = sg_nents_for_len(req->dst, total - authsize); -+ alg_ctx->left_bytes = alg_ctx->total; ++ return fill_kcop_from_cop(kcop, fcr); ++} + -+ CRYPTO_TRACE("src_nents = %zu, dst_nents = %zu", alg_ctx->src_nents, alg_ctx->dst_nents); -+ CRYPTO_TRACE("is_enc = %d, authsize = %u, cryptlen = %u, total = %u, assoclen = %u", -+ ctx->is_enc, authsize, req->cryptlen, alg_ctx->total, alg_ctx->assoclen); ++static int kcop_to_user(struct kernel_crypt_op *kcop, ++ struct fcrypt *fcr, void __user *arg) ++{ ++ int ret; + -+ alg_ctx->ops.hw_init(rk_dev, algt->algo, algt->mode); -+ err = rk_set_data_start(rk_dev); ++ ret = fill_cop_from_kcop(kcop, fcr); ++ if (unlikely(ret)) { ++ derr(1, "Error in fill_cop_from_kcop"); ++ return ret; ++ } + -+ return err; ++ if (unlikely(copy_to_user(arg, &kcop->cop, sizeof(kcop->cop)))) { ++ derr(1, "Cannot copy to userspace"); ++ return -EFAULT; ++ } ++ return 0; +} + -+int rk_aead_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) ++static inline void tfm_info_to_alg_info(struct alg_info *dst, struct crypto_tfm *tfm) +{ -+ return crypto_gcm_check_authsize(authsize); ++ snprintf(dst->cra_name, CRYPTODEV_MAX_ALG_NAME, ++ "%s", crypto_tfm_alg_name(tfm)); ++ snprintf(dst->cra_driver_name, CRYPTODEV_MAX_ALG_NAME, ++ "%s", crypto_tfm_alg_driver_name(tfm)); +} + -+int rk_aead_handle_req(struct rk_crypto_dev *rk_dev, struct aead_request *req) ++#ifndef CRYPTO_ALG_KERN_DRIVER_ONLY ++static unsigned int is_known_accelerated(struct crypto_tfm *tfm) +{ -+ return rk_dev->enqueue(rk_dev, &req->base); ++ const char *name = crypto_tfm_alg_driver_name(tfm); ++ ++ if (name == NULL) ++ return 1; /* assume accelerated */ ++ ++ /* look for known crypto engine names */ ++ if (strstr(name, "-talitos") || ++ !strncmp(name, "mv-", 3) || ++ !strncmp(name, "atmel-", 6) || ++ strstr(name, "geode") || ++ strstr(name, "hifn") || ++ strstr(name, "-ixp4xx") || ++ strstr(name, "-omap") || ++ strstr(name, "-picoxcell") || ++ strstr(name, "-s5p") || ++ strstr(name, "-ppc4xx") || ++ strstr(name, "-caam") || ++ strstr(name, "-n2")) ++ return 1; ++ ++ return 0; +} -diff --git a/drivers/crypto/rockchip/rk_crypto_skcipher_utils.h b/drivers/crypto/rockchip/rk_crypto_skcipher_utils.h -new file mode 100644 -index 000000000..7d47f9719 ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_skcipher_utils.h -@@ -0,0 +1,46 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ ++#endif + -+/* Copyright (c) 2022 Rockchip Electronics Co. Ltd. */ ++static int get_session_info(struct fcrypt *fcr, struct session_info_op *siop) ++{ ++ struct csession *ses_ptr; ++ struct crypto_tfm *tfm; + -+#ifndef __RK_CRYPTO_SKCIPHER_UTILS_H__ -+#define __RK_CRYPTO_SKCIPHER_UTILS_H__ ++ /* this also enters ses_ptr->sem */ ++ ses_ptr = crypto_get_session_by_sid(fcr, siop->ses); ++ if (unlikely(!ses_ptr)) { ++ derr(1, "invalid session ID=0x%08X", siop->ses); ++ return -EINVAL; ++ } + -+#include -+#include -+#include ++ siop->flags = 0; + -+#include "rk_crypto_core.h" -+#include "rk_crypto_utils.h" ++ if (ses_ptr->cdata.init) { ++ if (ses_ptr->cdata.aead == 0) ++ tfm = cryptodev_crypto_blkcipher_tfm(ses_ptr->cdata.async.s); ++ else ++ tfm = crypto_aead_tfm(ses_ptr->cdata.async.as); ++ tfm_info_to_alg_info(&siop->cipher_info, tfm); ++#ifdef CRYPTO_ALG_KERN_DRIVER_ONLY ++ if (tfm->__crt_alg->cra_flags & CRYPTO_ALG_KERN_DRIVER_ONLY) ++ siop->flags |= SIOP_FLAG_KERNEL_DRIVER_ONLY; ++#else ++ if (is_known_accelerated(tfm)) ++ siop->flags |= SIOP_FLAG_KERNEL_DRIVER_ONLY; ++#endif ++ } ++ if (ses_ptr->hdata.init) { ++ tfm = crypto_ahash_tfm(ses_ptr->hdata.async.s); ++ tfm_info_to_alg_info(&siop->hash_info, tfm); ++#ifdef CRYPTO_ALG_KERN_DRIVER_ONLY ++ if (tfm->__crt_alg->cra_flags & CRYPTO_ALG_KERN_DRIVER_ONLY) ++ siop->flags |= SIOP_FLAG_KERNEL_DRIVER_ONLY; ++#else ++ if (is_known_accelerated(tfm)) ++ siop->flags |= SIOP_FLAG_KERNEL_DRIVER_ONLY; ++#endif ++ } + -+#define RK_MAX_TAG_SIZE 32 ++ siop->alignmask = ses_ptr->alignmask; + -+struct rk_crypto_algt *rk_cipher_get_algt(struct crypto_skcipher *tfm); ++ crypto_put_session(ses_ptr); ++ return 0; ++} + -+struct rk_crypto_algt *rk_aead_get_algt(struct crypto_aead *tfm); ++static long ++cryptodev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg_) ++{ ++ void __user *arg = (void __user *)arg_; ++ int __user *p = arg; ++ struct session_op sop; ++ struct kernel_crypt_op kcop; ++ struct kernel_crypt_auth_op kcaop; ++ struct crypt_priv *pcr = filp->private_data; ++ struct fcrypt *fcr; ++ struct session_info_op siop; ++#ifdef CIOCCPHASH ++ struct cphash_op cphop; ++#endif ++ uint32_t ses; ++ int ret, fd; + -+struct rk_alg_ctx *rk_cipher_alg_ctx(struct rk_crypto_dev *rk_dev); ++ if (unlikely(!pcr)) ++ BUG(); + -+struct rk_cipher_ctx *rk_cipher_ctx_cast(struct rk_crypto_dev *rk_dev); ++ fcr = &pcr->fcrypt; + -+int rk_cipher_fallback(struct skcipher_request *req, struct rk_cipher_ctx *ctx, bool encrypt); ++ switch (cmd) { ++ case CIOCASYMFEAT: ++ return put_user(0, p); ++ case CRIOGET: ++ fd = get_unused_fd_flags(0); ++ if (unlikely(fd < 0)) ++ return fd; + -+int rk_cipher_setkey(struct crypto_skcipher *cipher, const u8 *key, unsigned int keylen); ++ ret = put_user(fd, p); ++ if (unlikely(ret)) { ++ put_unused_fd(fd); ++ return ret; ++ } + -+int rk_ablk_rx(struct rk_crypto_dev *rk_dev); ++ get_file(filp); ++ fd_install(fd, filp); + -+int rk_ablk_start(struct rk_crypto_dev *rk_dev); ++ return ret; ++ case CIOCGSESSION: ++ if (unlikely(copy_from_user(&sop, arg, sizeof(sop)))) ++ return -EFAULT; + -+int rk_skcipher_handle_req(struct rk_crypto_dev *rk_dev, struct skcipher_request *req); ++ ret = crypto_create_session(fcr, &sop); ++ if (unlikely(ret)) ++ return ret; ++ ret = copy_to_user(arg, &sop, sizeof(sop)); ++ if (unlikely(ret)) { ++ crypto_finish_session(fcr, sop.ses); ++ return -EFAULT; ++ } ++ return ret; ++ case CIOCFSESSION: ++ ret = get_user(ses, (uint32_t __user *)arg); ++ if (unlikely(ret)) ++ return ret; ++ ret = crypto_finish_session(fcr, ses); ++ return ret; ++ case CIOCGSESSINFO: ++ if (unlikely(copy_from_user(&siop, arg, sizeof(siop)))) ++ return -EFAULT; + -+int rk_aead_fallback(struct aead_request *req, struct rk_cipher_ctx *ctx, bool encrypt); ++ ret = get_session_info(fcr, &siop); ++ if (unlikely(ret)) ++ return ret; ++ return copy_to_user(arg, &siop, sizeof(siop)); ++#ifdef CIOCCPHASH ++ case CIOCCPHASH: ++ if (unlikely(copy_from_user(&cphop, arg, sizeof(cphop)))) ++ return -EFAULT; ++ return crypto_copy_hash_state(fcr, cphop.dst_ses, cphop.src_ses); ++#endif /* CIOCPHASH */ ++ case CIOCCRYPT: ++ if (unlikely(ret = kcop_from_user(&kcop, fcr, arg))) { ++ dwarning(1, "Error copying from user"); ++ return ret; ++ } + -+int rk_aead_setkey(struct crypto_aead *cipher, const u8 *key, unsigned int keylen); ++ ret = crypto_run(fcr, &kcop); ++ if (unlikely(ret)) { ++ dwarning(1, "Error in crypto_run"); ++ return ret; ++ } + -+int rk_aead_start(struct rk_crypto_dev *rk_dev); ++ return kcop_to_user(&kcop, fcr, arg); ++ case CIOCAUTHCRYPT: ++ if (unlikely(ret = cryptodev_kcaop_from_user(&kcaop, fcr, arg))) { ++ dwarning(1, "Error copying from user"); ++ return ret; ++ } + -+int rk_aead_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize); ++ ret = crypto_auth_run(fcr, &kcaop); ++ if (unlikely(ret)) { ++ dwarning(1, "Error in crypto_auth_run"); ++ return ret; ++ } ++ return cryptodev_kcaop_to_user(&kcaop, fcr, arg); ++#ifdef ENABLE_ASYNC ++ case CIOCASYNCCRYPT: ++ if (unlikely(ret = kcop_from_user(&kcop, fcr, arg))) ++ return ret; + -+int rk_aead_handle_req(struct rk_crypto_dev *rk_dev, struct aead_request *req); ++ return crypto_async_run(pcr, &kcop); ++ case CIOCASYNCFETCH: ++ ret = crypto_async_fetch(pcr, &kcop); ++ if (unlikely(ret)) ++ return ret; + ++ return kcop_to_user(&kcop, fcr, arg); +#endif ++ default: ++ return rk_cryptodev_ioctl(fcr, cmd, arg_); ++ } ++} + -diff --git a/drivers/crypto/rockchip/rk_crypto_utils.c b/drivers/crypto/rockchip/rk_crypto_utils.c -new file mode 100644 -index 000000000..5758e0eed ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_utils.c -@@ -0,0 +1,317 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Rockchip crypto uitls -+ * -+ * Copyright (c) 2022, Rockchip Electronics Co., Ltd -+ * -+ * Author: Lin Jinhan -+ * -+ */ ++/* compatibility code for 32bit userlands */ ++#ifdef CONFIG_COMPAT + -+#include -+#include ++static inline void ++compat_to_session_op(struct compat_session_op *compat, struct session_op *sop) ++{ ++ sop->cipher = compat->cipher; ++ sop->mac = compat->mac; ++ sop->keylen = compat->keylen; + -+#include "rk_crypto_core.h" -+#include "rk_crypto_utils.h" ++ sop->key = compat_ptr(compat->key); ++ sop->mackeylen = compat->mackeylen; ++ sop->mackey = compat_ptr(compat->mackey); ++ sop->ses = compat->ses; ++} + -+static inline void word2byte_be(u32 word, u8 *ch) ++static inline void ++session_op_to_compat(struct session_op *sop, struct compat_session_op *compat) +{ -+ ch[0] = (word >> 24) & 0xff; -+ ch[1] = (word >> 16) & 0xff; -+ ch[2] = (word >> 8) & 0xff; -+ ch[3] = (word >> 0) & 0xff; ++ compat->cipher = sop->cipher; ++ compat->mac = sop->mac; ++ compat->keylen = sop->keylen; ++ ++ compat->key = ptr_to_compat(sop->key); ++ compat->mackeylen = sop->mackeylen; ++ compat->mackey = ptr_to_compat(sop->mackey); ++ compat->ses = sop->ses; +} + -+static inline u32 byte2word_be(const u8 *ch) ++static inline void ++compat_to_crypt_op(struct compat_crypt_op *compat, struct crypt_op *cop) +{ -+ return (*ch << 24) + (*(ch + 1) << 16) + -+ (*(ch + 2) << 8) + *(ch + 3); ++ cop->ses = compat->ses; ++ cop->op = compat->op; ++ cop->flags = compat->flags; ++ cop->len = compat->len; ++ ++ cop->src = compat_ptr(compat->src); ++ cop->dst = compat_ptr(compat->dst); ++ cop->mac = compat_ptr(compat->mac); ++ cop->iv = compat_ptr(compat->iv); +} + -+void rk_crypto_write_regs(struct rk_crypto_dev *rk_dev, u32 base_addr, const u8 *data, u32 bytes) ++static inline void ++crypt_op_to_compat(struct crypt_op *cop, struct compat_crypt_op *compat) +{ -+ u32 i; -+ u8 tmp_buf[4]; -+ -+ for (i = 0; i < bytes / 4; i++, base_addr += 4) -+ CRYPTO_WRITE(rk_dev, base_addr, byte2word_be(data + i * 4)); ++ compat->ses = cop->ses; ++ compat->op = cop->op; ++ compat->flags = cop->flags; ++ compat->len = cop->len; + -+ if (bytes % 4) { -+ memset(tmp_buf, 0x00, sizeof(tmp_buf)); -+ memcpy((u8 *)tmp_buf, data + (bytes / 4) * 4, bytes % 4); -+ CRYPTO_WRITE(rk_dev, base_addr, byte2word_be(tmp_buf)); -+ } ++ compat->src = ptr_to_compat(cop->src); ++ compat->dst = ptr_to_compat(cop->dst); ++ compat->mac = ptr_to_compat(cop->mac); ++ compat->iv = ptr_to_compat(cop->iv); +} + -+void rk_crypto_clear_regs(struct rk_crypto_dev *rk_dev, u32 base_addr, u32 words) ++static int compat_kcop_from_user(struct kernel_crypt_op *kcop, ++ struct fcrypt *fcr, void __user *arg) +{ -+ u32 i; ++ struct compat_crypt_op compat_cop; + -+ for (i = 0; i < words; i++, base_addr += 4) -+ CRYPTO_WRITE(rk_dev, base_addr, 0); ++ if (unlikely(copy_from_user(&compat_cop, arg, sizeof(compat_cop)))) ++ return -EFAULT; ++ compat_to_crypt_op(&compat_cop, &kcop->cop); ++ ++ return fill_kcop_from_cop(kcop, fcr); +} + -+void rk_crypto_read_regs(struct rk_crypto_dev *rk_dev, u32 base_addr, u8 *data, u32 bytes) ++static int compat_kcop_to_user(struct kernel_crypt_op *kcop, ++ struct fcrypt *fcr, void __user *arg) +{ -+ u32 i; -+ -+ for (i = 0; i < bytes / 4; i++, base_addr += 4) -+ word2byte_be(CRYPTO_READ(rk_dev, base_addr), data + i * 4); ++ int ret; ++ struct compat_crypt_op compat_cop; + -+ if (bytes % 4) { -+ uint8_t tmp_buf[4]; ++ ret = fill_cop_from_kcop(kcop, fcr); ++ if (unlikely(ret)) { ++ dwarning(1, "Error in fill_cop_from_kcop"); ++ return ret; ++ } ++ crypt_op_to_compat(&kcop->cop, &compat_cop); + -+ word2byte_be(CRYPTO_READ(rk_dev, base_addr), tmp_buf); -+ memcpy(data + i * 4, tmp_buf, bytes % 4); ++ if (unlikely(copy_to_user(arg, &compat_cop, sizeof(compat_cop)))) { ++ dwarning(1, "Error copying to user"); ++ return -EFAULT; + } ++ return 0; +} + -+static int check_scatter_align(struct scatterlist *sg_src, -+ struct scatterlist *sg_dst, -+ int align_mask) ++static long ++cryptodev_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg_) +{ -+ int in, out, align; -+ -+ /* The last piece has no need for length alignment */ -+ in = IS_ALIGNED((u32)sg_src->offset, 4) && -+ (!sg_next(sg_src) || -+ IS_ALIGNED((u32)sg_src->length, align_mask)) && -+ (sg_phys(sg_src) < SZ_4G); -+ if (!sg_dst) -+ return in; ++ void __user *arg = (void __user *)arg_; ++ struct crypt_priv *pcr = file->private_data; ++ struct fcrypt *fcr; ++ struct session_op sop; ++ struct compat_session_op compat_sop; ++ struct kernel_crypt_op kcop; ++ struct kernel_crypt_auth_op kcaop; ++ int ret; + -+ /* The last piece has no need for length alignment */ -+ out = IS_ALIGNED((u32)sg_dst->offset, 4) && -+ (!sg_next(sg_dst) || -+ IS_ALIGNED((u32)sg_dst->length, align_mask)) && -+ (sg_phys(sg_dst) < SZ_4G); -+ align = in && out; ++ if (unlikely(!pcr)) ++ BUG(); + -+ return (align && (sg_src->length == sg_dst->length)); -+} ++ fcr = &pcr->fcrypt; + -+bool rk_crypto_check_align(struct scatterlist *src_sg, size_t src_nents, -+ struct scatterlist *dst_sg, size_t dst_nents, -+ int align_mask) -+{ -+ struct scatterlist *src_tmp = NULL; -+ struct scatterlist *dst_tmp = NULL; -+ unsigned int i; ++ switch (cmd) { ++ case CIOCASYMFEAT: ++ case CRIOGET: ++ case CIOCFSESSION: ++ case CIOCGSESSINFO: ++ return cryptodev_ioctl(file, cmd, arg_); + -+ if (dst_sg && src_nents != dst_nents) -+ return false; ++ case COMPAT_CIOCGSESSION: ++ if (unlikely(copy_from_user(&compat_sop, arg, ++ sizeof(compat_sop)))) ++ return -EFAULT; ++ compat_to_session_op(&compat_sop, &sop); + -+ src_tmp = src_sg; -+ dst_tmp = dst_sg; ++ ret = crypto_create_session(fcr, &sop); ++ if (unlikely(ret)) ++ return ret; + -+ for (i = 0; i < src_nents; i++) { -+ if (!src_tmp) -+ return false; ++ session_op_to_compat(&sop, &compat_sop); ++ ret = copy_to_user(arg, &compat_sop, sizeof(compat_sop)); ++ if (unlikely(ret)) { ++ crypto_finish_session(fcr, sop.ses); ++ return -EFAULT; ++ } ++ return ret; + -+ if (!check_scatter_align(src_tmp, dst_tmp, align_mask)) -+ return false; ++ case COMPAT_CIOCCRYPT: ++ ret = compat_kcop_from_user(&kcop, fcr, arg); ++ if (unlikely(ret)) ++ return ret; + -+ src_tmp = sg_next(src_tmp); ++ ret = crypto_run(fcr, &kcop); ++ if (unlikely(ret)) ++ return ret; + -+ if (dst_sg) -+ dst_tmp = sg_next(dst_tmp); -+ } ++ return compat_kcop_to_user(&kcop, fcr, arg); + -+ return true; -+} ++ case COMPAT_CIOCAUTHCRYPT: ++ ret = compat_kcaop_from_user(&kcaop, fcr, arg); ++ if (unlikely(ret)) { ++ dwarning(1, "Error copying from user"); ++ return ret; ++ } + -+bool rk_crypto_check_dmafd(struct scatterlist *sgl, size_t nents) -+{ -+ struct scatterlist *src_tmp = NULL; -+ unsigned int i; ++ ret = crypto_auth_run(fcr, &kcaop); ++ if (unlikely(ret)) { ++ dwarning(1, "Error in crypto_auth_run"); ++ return ret; ++ } ++ return compat_kcaop_to_user(&kcaop, fcr, arg); ++#ifdef ENABLE_ASYNC ++ case COMPAT_CIOCASYNCCRYPT: ++ if (unlikely(ret = compat_kcop_from_user(&kcop, fcr, arg))) ++ return ret; + -+ for_each_sg(sgl, src_tmp, nents, i) { -+ if (!src_tmp) -+ return false; ++ return crypto_async_run(pcr, &kcop); ++ case COMPAT_CIOCASYNCFETCH: ++ ret = crypto_async_fetch(pcr, &kcop); ++ if (unlikely(ret)) ++ return ret; + -+ if (src_tmp->length && !sg_dma_address(src_tmp)) -+ return false; ++ return compat_kcop_to_user(&kcop, fcr, arg); ++#endif ++ default: ++ return rk_compat_cryptodev_ioctl(fcr, cmd, arg_); + } -+ -+ return true; +} + -+void rk_crypto_dump_hw_desc(struct rk_hw_desc *hw_desc) -+{ -+ struct crypto_lli_desc *cur_lli = NULL; -+ u32 i; ++#endif /* CONFIG_COMPAT */ + -+ cur_lli = hw_desc->lli_head; ++static unsigned int cryptodev_poll(struct file *file, poll_table *wait) ++{ ++ struct crypt_priv *pcr = file->private_data; ++ unsigned int ret = 0; + -+ CRYPTO_TRACE("lli_head = %lx, lli_tail = %lx", -+ (unsigned long)hw_desc->lli_head, (unsigned long)hw_desc->lli_tail); ++ poll_wait(file, &pcr->user_waiter, wait); + -+ for (i = 0; i < hw_desc->total; i++, cur_lli++) { -+ CRYPTO_TRACE("cur_lli = %lx", (unsigned long)cur_lli); -+ CRYPTO_TRACE("src_addr = %08x", cur_lli->src_addr); -+ CRYPTO_TRACE("src_len = %08x", cur_lli->src_len); -+ CRYPTO_TRACE("dst_addr = %08x", cur_lli->dst_addr); -+ CRYPTO_TRACE("dst_len = %08x", cur_lli->dst_len); -+ CRYPTO_TRACE("user_def = %08x", cur_lli->user_define); -+ CRYPTO_TRACE("dma_ctl = %08x", cur_lli->dma_ctrl); -+ CRYPTO_TRACE("next = %08x\n", cur_lli->next_addr); ++ if (!list_empty_careful(&pcr->done.list)) ++ ret |= POLLIN | POLLRDNORM; ++ if (!list_empty_careful(&pcr->free.list) || pcr->itemcount < MAX_COP_RINGSIZE) ++ ret |= POLLOUT | POLLWRNORM; + -+ if (cur_lli == hw_desc->lli_tail) -+ break; -+ } ++ return ret; +} + -+u64 rk_crypto_hw_desc_maxlen(struct scatterlist *sg, u64 len, u32 *max_nents) -+{ -+ int nents; -+ u64 total; -+ -+ if (!len) -+ return 0; ++static const struct file_operations cryptodev_fops = { ++ .owner = THIS_MODULE, ++ .open = cryptodev_open, ++ .release = cryptodev_release, ++ .unlocked_ioctl = cryptodev_ioctl, ++#ifdef CONFIG_COMPAT ++ .compat_ioctl = cryptodev_compat_ioctl, ++#endif /* CONFIG_COMPAT */ ++ .poll = cryptodev_poll, ++}; + -+ for (nents = 0, total = 0; sg; sg = sg_next(sg)) { -+ if (!sg) -+ goto exit; ++static struct miscdevice cryptodev = { ++ .minor = MISC_DYNAMIC_MINOR, ++ .name = "crypto", ++ .fops = &cryptodev_fops, ++ .mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH, ++}; + -+ nents++; -+ total += sg->length; ++static int __init ++cryptodev_register(void) ++{ ++ int rc; + -+ if (nents >= RK_DEFAULT_LLI_CNT || total >= len) -+ goto exit; ++ rc = misc_register(&cryptodev); ++ if (unlikely(rc)) { ++ pr_err(PFX "registration of /dev/crypto failed\n"); ++ return rc; + } + -+exit: -+ *max_nents = nents; -+ return total > len ? len : total; ++ return 0; +} + -+int rk_crypto_hw_desc_alloc(struct device *dev, struct rk_hw_desc *hw_desc) ++static void __exit ++cryptodev_deregister(void) +{ -+ u32 lli_cnt = RK_DEFAULT_LLI_CNT; -+ u32 lli_len = lli_cnt * sizeof(struct crypto_lli_desc); -+ -+ if (!dev || !hw_desc) -+ return -EINVAL; ++ misc_deregister(&cryptodev); ++} + -+ memset(hw_desc, 0x00, sizeof(*hw_desc)); ++/* ====== Module init/exit ====== */ ++static struct ctl_table verbosity_ctl_dir[] = { ++ { ++ .procname = "cryptodev_verbosity", ++ .data = &cryptodev_verbosity, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec, ++ }, ++ {}, ++}; + -+ hw_desc->lli_aad = dma_alloc_coherent(dev, sizeof(struct crypto_lli_desc), -+ &hw_desc->lli_aad_dma, GFP_KERNEL); -+ if (!hw_desc->lli_aad) -+ return -ENOMEM; ++static struct ctl_table verbosity_ctl_root[] = { ++ { ++ .procname = "ioctl", ++ .mode = 0555, ++ .child = verbosity_ctl_dir, ++ }, ++ {}, ++}; ++static struct ctl_table_header *verbosity_sysctl_header; ++static int __init init_cryptodev(void) ++{ ++ int rc; + -+ ///TODO: cma -+ hw_desc->lli_head = dma_alloc_coherent(dev, lli_len, &hw_desc->lli_head_dma, GFP_KERNEL); -+ if (!hw_desc->lli_head) { -+ dma_free_coherent(dev, sizeof(struct crypto_lli_desc), -+ hw_desc->lli_aad, hw_desc->lli_aad_dma); -+ return -ENOMEM; ++ cryptodev_wq = create_workqueue("cryptodev_queue"); ++ if (unlikely(!cryptodev_wq)) { ++ pr_err(PFX "failed to allocate the cryptodev workqueue\n"); ++ return -EFAULT; + } + -+ hw_desc->lli_tail = hw_desc->lli_head; -+ hw_desc->total = lli_cnt; -+ hw_desc->dev = dev; ++ rc = cryptodev_register(); ++ if (unlikely(rc)) { ++ destroy_workqueue(cryptodev_wq); ++ return rc; ++ } + -+ memset(hw_desc->lli_head, 0x00, lli_len); ++ verbosity_sysctl_header = register_sysctl_table(verbosity_ctl_root); + -+ CRYPTO_TRACE("dev = %lx, buffer_len = %u, lli_head = %lx, lli_head_dma = %lx", -+ (unsigned long)hw_desc->dev, lli_len, -+ (unsigned long)hw_desc->lli_head, (unsigned long)hw_desc->lli_head_dma); ++ pr_info(PFX "driver %s loaded.\n", VERSION); + + return 0; +} + -+void rk_crypto_hw_desc_free(struct rk_hw_desc *hw_desc) ++static void __exit exit_cryptodev(void) +{ -+ if (!hw_desc || !hw_desc->dev || !hw_desc->lli_head) -+ return; -+ -+ CRYPTO_TRACE("dev = %lx, buffer_len = %lu, lli_head = %lx, lli_head_dma = %lx", -+ (unsigned long)hw_desc->dev, -+ (unsigned long)hw_desc->total * sizeof(struct crypto_lli_desc), -+ (unsigned long)hw_desc->lli_head, (unsigned long)hw_desc->lli_head_dma); -+ -+ dma_free_coherent(hw_desc->dev, sizeof(struct crypto_lli_desc), -+ hw_desc->lli_aad, hw_desc->lli_aad_dma); ++ flush_workqueue(cryptodev_wq); ++ destroy_workqueue(cryptodev_wq); + -+ dma_free_coherent(hw_desc->dev, hw_desc->total * sizeof(struct crypto_lli_desc), -+ hw_desc->lli_head, hw_desc->lli_head_dma); ++ if (verbosity_sysctl_header) ++ unregister_sysctl_table(verbosity_sysctl_header); + -+ memset(hw_desc, 0x00, sizeof(*hw_desc)); ++ cryptodev_deregister(); ++ pr_info(PFX "driver unloaded.\n"); +} + -+int rk_crypto_hw_desc_init(struct rk_hw_desc *hw_desc, -+ struct scatterlist *src_sg, -+ struct scatterlist *dst_sg, -+ u64 len) -+{ -+ struct crypto_lli_desc *cur_lli = NULL; -+ struct scatterlist *tmp_src, *tmp_dst; -+ dma_addr_t tmp_next_dma; -+ u32 src_nents, dst_nents; -+ u32 i, data_cnt = 0; ++module_init(init_cryptodev); ++module_exit(exit_cryptodev); + -+ if (!hw_desc || !hw_desc->dev || !hw_desc->lli_head) -+ return -EINVAL; +diff --git a/drivers/crypto/rockchip/cryptodev_linux/main.c b/drivers/crypto/rockchip/cryptodev_linux/main.c +new file mode 100644 +index 000000000..23efae186 +--- /dev/null ++++ b/drivers/crypto/rockchip/cryptodev_linux/main.c +@@ -0,0 +1,266 @@ ++/* ++ * Driver for /dev/crypto device (aka CryptoDev) ++ * ++ * Copyright (c) 2004 Michal Ludvig , SuSE Labs ++ * Copyright (c) 2009-2013 Nikos Mavrogiannopoulos ++ * ++ * This file is part of linux cryptodev. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2 ++ * of the License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., ++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ if (!src_sg || len == 0) -+ return -EINVAL; ++/* ++ * Device /dev/crypto provides an interface for ++ * accessing kernel CryptoAPI algorithms (ciphers, ++ * hashes) from userspace programs. ++ * ++ * /dev/crypto interface was originally introduced in ++ * OpenBSD and this module attempts to keep the API. ++ * ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "cryptodev.h" ++#include "zc.h" ++#include "cryptlib.h" ++#include "version.h" + -+ src_nents = sg_nents_for_len(src_sg, len); -+ dst_nents = dst_sg ? sg_nents_for_len(dst_sg, len) : src_nents; ++/* This file contains the traditional operations of encryption ++ * and hashing of /dev/crypto. ++ */ + -+ if (src_nents != dst_nents) -+ return -EINVAL; ++static int ++hash_n_crypt(struct csession *ses_ptr, struct crypt_op *cop, ++ struct scatterlist *src_sg, struct scatterlist *dst_sg, ++ uint32_t len) ++{ ++ int ret; + -+ CRYPTO_TRACE("src_nents = %u, total = %u, len = %llu", src_nents, hw_desc->total, len); ++ /* Always hash before encryption and after decryption. Maybe ++ * we should introduce a flag to switch... TBD later on. ++ */ ++ if (cop->op == COP_ENCRYPT) { ++ if (ses_ptr->hdata.init != 0) { ++ ret = cryptodev_hash_update(&ses_ptr->hdata, ++ src_sg, len); ++ if (unlikely(ret)) ++ goto out_err; ++ } ++ if (ses_ptr->cdata.init != 0) { ++ ret = cryptodev_cipher_encrypt(&ses_ptr->cdata, ++ src_sg, dst_sg, len); + -+ if (src_nents > hw_desc->total) { -+ pr_err("crypto: nents overflow, %u > %u", src_nents, hw_desc->total); ++ if (unlikely(ret)) ++ goto out_err; ++ } ++ } else { ++ if (ses_ptr->cdata.init != 0) { ++ ret = cryptodev_cipher_decrypt(&ses_ptr->cdata, ++ src_sg, dst_sg, len); ++ ++ if (unlikely(ret)) ++ goto out_err; ++ } ++ ++ if (ses_ptr->hdata.init != 0) { ++ ret = cryptodev_hash_update(&ses_ptr->hdata, ++ dst_sg, len); ++ if (unlikely(ret)) ++ goto out_err; ++ } ++ } ++ return 0; ++out_err: ++ derr(0, "CryptoAPI failure: %d", ret); ++ return ret; ++} ++ ++/* This is the main crypto function - feed it with plaintext ++ and get a ciphertext (or vice versa :-) */ ++static int ++__crypto_run_std(struct csession *ses_ptr, struct crypt_op *cop) ++{ ++ char *data; ++ char __user *src, *dst; ++ struct scatterlist sg; ++ size_t nbytes, bufsize; ++ int ret = 0; ++ ++ nbytes = cop->len; ++ data = (char *)__get_free_page(GFP_KERNEL); ++ ++ if (unlikely(!data)) { ++ derr(1, "Error getting free page."); + return -ENOMEM; + } + -+ memset(hw_desc->lli_head, 0x00, src_nents * sizeof(struct crypto_lli_desc)); ++ bufsize = PAGE_SIZE < nbytes ? PAGE_SIZE : nbytes; + -+ cur_lli = hw_desc->lli_head; -+ tmp_src = src_sg; -+ tmp_dst = dst_sg; -+ tmp_next_dma = hw_desc->lli_head_dma + sizeof(*cur_lli); ++ src = cop->src; ++ dst = cop->dst; + -+ if (dst_sg) { -+ for (i = 0; i < src_nents - 1; i++, cur_lli++, tmp_next_dma += sizeof(*cur_lli)) { -+ cur_lli->src_addr = sg_dma_address(tmp_src); -+ cur_lli->src_len = sg_dma_len(tmp_src); -+ cur_lli->dst_addr = sg_dma_address(tmp_dst); -+ cur_lli->dst_len = sg_dma_len(tmp_dst); -+ cur_lli->next_addr = tmp_next_dma; ++ while (nbytes > 0) { ++ size_t current_len = nbytes > bufsize ? bufsize : nbytes; + -+ data_cnt += sg_dma_len(tmp_src); -+ tmp_src = sg_next(tmp_src); -+ tmp_dst = sg_next(tmp_dst); ++ if (unlikely(copy_from_user(data, src, current_len))) { ++ derr(1, "Error copying %zu bytes from user address %p.", current_len, src); ++ ret = -EFAULT; ++ break; + } -+ } else { -+ for (i = 0; i < src_nents - 1; i++, cur_lli++, tmp_next_dma += sizeof(*cur_lli)) { -+ cur_lli->src_addr = sg_dma_address(tmp_src); -+ cur_lli->src_len = sg_dma_len(tmp_src); -+ cur_lli->next_addr = tmp_next_dma; + -+ data_cnt += sg_dma_len(tmp_src); -+ tmp_src = sg_next(tmp_src); ++ sg_init_one(&sg, data, current_len); ++ ++ ret = hash_n_crypt(ses_ptr, cop, &sg, &sg, current_len); ++ ++ if (unlikely(ret)) { ++ derr(1, "hash_n_crypt failed."); ++ break; + } -+ } + -+ /* for last lli */ -+ cur_lli->src_addr = sg_dma_address(tmp_src); -+ cur_lli->src_len = len - data_cnt; -+ cur_lli->next_addr = 0; ++ if (ses_ptr->cdata.init != 0) { ++ if (unlikely(copy_to_user(dst, data, current_len))) { ++ derr(1, "could not copy to user."); ++ ret = -EFAULT; ++ break; ++ } ++ } + -+ if (dst_sg) { -+ cur_lli->dst_addr = sg_dma_address(tmp_dst); -+ cur_lli->dst_len = len - data_cnt; ++ dst += current_len; ++ nbytes -= current_len; ++ src += current_len; + } + -+ hw_desc->lli_tail = cur_lli; -+ -+ return 0; ++ free_page((unsigned long)data); ++ return ret; +} + -diff --git a/drivers/crypto/rockchip/rk_crypto_utils.h b/drivers/crypto/rockchip/rk_crypto_utils.h -new file mode 100644 -index 000000000..26c931db8 ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_utils.h -@@ -0,0 +1,63 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ + -+/* Copyright (c) 2022 Rockchip Electronics Co. Ltd. */ + -+#ifndef __RK_CRYPTO_UTILS_H__ -+#define __RK_CRYPTO_UTILS_H__ ++/* This is the main crypto function - zero-copy edition */ ++static int ++__crypto_run_zc(struct csession *ses_ptr, struct kernel_crypt_op *kcop) ++{ ++ struct scatterlist *src_sg, *dst_sg; ++ struct crypt_op *cop = &kcop->cop; ++ int ret = 0; + -+#include -+#include ++ ret = cryptodev_get_userbuf(ses_ptr, cop->src, cop->len, cop->dst, cop->len, ++ kcop->task, kcop->mm, &src_sg, &dst_sg); ++ if (unlikely(ret)) { ++ derr(1, "Error getting user pages. Falling back to non zero copy."); ++ return __crypto_run_std(ses_ptr, cop); ++ } + -+#include "rk_crypto_core.h" ++ ret = hash_n_crypt(ses_ptr, cop, src_sg, dst_sg, cop->len); + -+/* Default 256 x 4K = 1MByte */ -+#define RK_DEFAULT_LLI_CNT 256 ++ cryptodev_release_user_pages(ses_ptr); ++ return ret; ++} + -+struct crypto_lli_desc { -+ u32 src_addr; -+ u32 src_len; -+ u32 dst_addr; -+ u32 dst_len; -+ u32 user_define; -+ u32 reserve; -+ u32 dma_ctrl; -+ u32 next_addr; -+}; ++int crypto_run(struct fcrypt *fcr, struct kernel_crypt_op *kcop) ++{ ++ struct csession *ses_ptr; ++ struct crypt_op *cop = &kcop->cop; ++ int ret; + -+struct rk_hw_desc { -+ struct device *dev; -+ struct crypto_lli_desc *lli_aad; -+ struct crypto_lli_desc *lli_head; -+ struct crypto_lli_desc *lli_tail; -+ dma_addr_t lli_head_dma; -+ dma_addr_t lli_aad_dma; -+ u32 total; -+}; ++ if (unlikely(cop->op != COP_ENCRYPT && cop->op != COP_DECRYPT)) { ++ ddebug(1, "invalid operation op=%u", cop->op); ++ return -EINVAL; ++ } + -+void rk_crypto_write_regs(struct rk_crypto_dev *rk_dev, u32 base_addr, const u8 *data, u32 bytes); ++ /* this also enters ses_ptr->sem */ ++ ses_ptr = crypto_get_session_by_sid(fcr, cop->ses); ++ if (unlikely(!ses_ptr)) { ++ derr(1, "invalid session ID=0x%08X", cop->ses); ++ return -EINVAL; ++ } + -+void rk_crypto_clear_regs(struct rk_crypto_dev *rk_dev, u32 base_addr, u32 words); ++ if (ses_ptr->hdata.init != 0 && (cop->flags == 0 || cop->flags & COP_FLAG_RESET)) { ++ ret = cryptodev_hash_reset(&ses_ptr->hdata); ++ if (unlikely(ret)) { ++ derr(1, "error in cryptodev_hash_reset()"); ++ goto out_unlock; ++ } ++ } + -+void rk_crypto_read_regs(struct rk_crypto_dev *rk_dev, u32 base_addr, u8 *data, u32 bytes); ++ if (ses_ptr->cdata.init != 0) { ++ int blocksize = ses_ptr->cdata.blocksize; + -+bool rk_crypto_check_align(struct scatterlist *src_sg, size_t src_nents, -+ struct scatterlist *dst_sg, size_t dst_nents, -+ int align_mask); ++ if (unlikely(cop->len % blocksize)) { ++ derr(1, "data size (%u) isn't a multiple of block size (%u)", ++ cop->len, blocksize); ++ ret = -EINVAL; ++ goto out_unlock; ++ } + -+bool rk_crypto_check_dmafd(struct scatterlist *sgl, size_t nents); ++ cryptodev_cipher_set_iv(&ses_ptr->cdata, kcop->iv, ++ min(ses_ptr->cdata.ivsize, kcop->ivlen)); ++ } + -+u64 rk_crypto_hw_desc_maxlen(struct scatterlist *sg, u64 len, u32 *max_nents); ++ if (likely(cop->len)) { ++ if (!(cop->flags & COP_FLAG_NO_ZC)) { ++ if (unlikely(ses_ptr->alignmask && !IS_ALIGNED((unsigned long)cop->src, ses_ptr->alignmask + 1))) { ++ dwarning(2, "source address %p is not %d byte aligned - disabling zero copy", ++ cop->src, ses_ptr->alignmask + 1); ++ cop->flags |= COP_FLAG_NO_ZC; ++ } + -+int rk_crypto_hw_desc_alloc(struct device *dev, struct rk_hw_desc *hw_desc); ++ if (unlikely(ses_ptr->alignmask && !IS_ALIGNED((unsigned long)cop->dst, ses_ptr->alignmask + 1))) { ++ dwarning(2, "destination address %p is not %d byte aligned - disabling zero copy", ++ cop->dst, ses_ptr->alignmask + 1); ++ cop->flags |= COP_FLAG_NO_ZC; ++ } ++ } + -+int rk_crypto_hw_desc_init(struct rk_hw_desc *hw_desc, -+ struct scatterlist *src_sg, -+ struct scatterlist *dst_sg, -+ u64 len); ++ if (cop->flags & COP_FLAG_NO_ZC) ++ ret = __crypto_run_std(ses_ptr, &kcop->cop); ++ else ++ ret = __crypto_run_zc(ses_ptr, kcop); ++ if (unlikely(ret)) ++ goto out_unlock; ++ } + -+void rk_crypto_hw_desc_free(struct rk_hw_desc *hw_desc); ++ if (ses_ptr->cdata.init != 0) { ++ cryptodev_cipher_get_iv(&ses_ptr->cdata, kcop->iv, ++ min(ses_ptr->cdata.ivsize, kcop->ivlen)); ++ } + -+void rk_crypto_dump_hw_desc(struct rk_hw_desc *hw_desc); ++ if (ses_ptr->hdata.init != 0 && ++ ((cop->flags & COP_FLAG_FINAL) || ++ (!(cop->flags & COP_FLAG_UPDATE) || cop->len == 0))) { + -+#endif ++ ret = cryptodev_hash_final(&ses_ptr->hdata, kcop->hash_output); ++ if (unlikely(ret)) { ++ derr(0, "CryptoAPI failure: %d", ret); ++ goto out_unlock; ++ } ++ kcop->digestsize = ses_ptr->hdata.digestsize; ++ } + -diff --git a/drivers/crypto/rockchip/rk_crypto_v1.c b/drivers/crypto/rockchip/rk_crypto_v1.c ++out_unlock: ++ crypto_put_session(ses_ptr); ++ return ret; ++} +diff --git a/drivers/crypto/rockchip/cryptodev_linux/rk_cryptodev.c b/drivers/crypto/rockchip/cryptodev_linux/rk_cryptodev.c new file mode 100644 -index 000000000..14347c490 +index 000000000..be36478f8 --- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_v1.c -@@ -0,0 +1,60 @@ ++++ b/drivers/crypto/rockchip/cryptodev_linux/rk_cryptodev.c +@@ -0,0 +1,1527 @@ +// SPDX-License-Identifier: GPL-2.0 +/* -+ * Crypto acceleration support for Rockchip Crypto V1 ++ * Crypto acceleration support for Rockchip crypto + * -+ * Copyright (c) 2022, Rockchip Electronics Co., Ltd ++ * Copyright (c) 2021, Rockchip Electronics Co., Ltd + * + * Author: Lin Jinhan + * + */ -+#include "rk_crypto_core.h" -+#include "rk_crypto_v1.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+static const char * const crypto_v1_rsts[] = { -+ "crypto-rst", -+}; ++#include "version.h" ++#include "cipherapi.h" ++#include "rk_cryptodev.h" + -+static struct rk_crypto_algt *crypto_v1_algs[] = { -+ &rk_v1_ecb_aes_alg, /* ecb(aes) */ -+ &rk_v1_cbc_aes_alg, /* cbc(aes) */ ++MODULE_IMPORT_NS(DMA_BUF); + -+ &rk_v1_ecb_des_alg, /* ecb(des) */ -+ &rk_v1_cbc_des_alg, /* cbc(des) */ ++#define MAX_CRYPTO_DEV 1 ++#define MAX_CRYPTO_NAME_LEN 64 + -+ &rk_v1_ecb_des3_ede_alg, /* ecb(des3_ede) */ -+ &rk_v1_cbc_des3_ede_alg, /* cbc(des3_ede) */ ++struct dma_fd_map_node { ++ struct kernel_crypt_fd_map_op fd_map; ++ struct sg_table *sgtbl; ++ struct dma_buf *dmabuf; ++ struct dma_buf_attachment *dma_attach; ++ struct list_head list; ++}; + -+ &rk_v1_ahash_sha1, /* sha1 */ -+ &rk_v1_ahash_sha256, /* sha256 */ -+ &rk_v1_ahash_md5, /* md5 */ ++struct crypto_dev_info { ++ struct device *dev; ++ char name[MAX_CRYPTO_NAME_LEN]; ++ bool is_multi_thread; +}; + -+int rk_hw_crypto_v1_init(struct device *dev, void *hw_info) -+{ -+ return 0; -+} ++static struct crypto_dev_info g_dev_infos[MAX_CRYPTO_DEV]; + -+void rk_hw_crypto_v1_deinit(struct device *dev, void *hw_info) ++/* ++ * rk_cryptodev_register_dev - register crypto device into rk_cryptodev. ++ * @dev: [in] crypto device to register ++ * @name: [in] crypto device name to register ++ */ ++int rk_cryptodev_register_dev(struct device *dev, const char *name) +{ ++ uint32_t i; + -+} ++ if (WARN_ON(!dev)) ++ return -EINVAL; + -+const char * const *rk_hw_crypto_v1_get_rsts(uint32_t *num) -+{ -+ *num = ARRAY_SIZE(crypto_v1_rsts); ++ if (WARN_ON(!name)) ++ return -EINVAL; + -+ return crypto_v1_rsts; -+} ++ for (i = 0; i < ARRAY_SIZE(g_dev_infos); i++) { ++ if (!g_dev_infos[i].dev) { ++ memset(&g_dev_infos[i], 0x00, sizeof(g_dev_infos[i])); + -+struct rk_crypto_algt **rk_hw_crypto_v1_get_algts(uint32_t *num) -+{ -+ *num = ARRAY_SIZE(crypto_v1_algs); ++ g_dev_infos[i].dev = dev; ++ strncpy(g_dev_infos[i].name, name, sizeof(g_dev_infos[i].name)); + -+ return crypto_v1_algs; ++ g_dev_infos[i].is_multi_thread = strstr(g_dev_infos[i].name, "multi"); ++ dev_info(dev, "register to cryptodev ok!\n"); ++ return 0; ++ } ++ } ++ ++ return -ENOMEM; +} ++EXPORT_SYMBOL_GPL(rk_cryptodev_register_dev); + -+bool rk_hw_crypto_v1_algo_valid(struct rk_crypto_dev *rk_dev, struct rk_crypto_algt *aglt) ++/* ++ * rk_cryptodev_unregister_dev - unregister crypto device from rk_cryptodev ++ * @dev: [in] crypto device to unregister ++ */ ++int rk_cryptodev_unregister_dev(struct device *dev) +{ -+ return true; -+} ++ uint32_t i; + -diff --git a/drivers/crypto/rockchip/rk_crypto_v1.h b/drivers/crypto/rockchip/rk_crypto_v1.h -new file mode 100644 -index 000000000..417ead575 ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_v1.h -@@ -0,0 +1,65 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ ++ if (WARN_ON(!dev)) ++ return -EINVAL; + -+/* Copyright (c) 2018 Rockchip Electronics Co. Ltd. */ ++ for (i = 0; i < ARRAY_SIZE(g_dev_infos); i++) { ++ if (g_dev_infos[i].dev == dev) { ++ memset(&g_dev_infos[i], 0x00, sizeof(g_dev_infos[i])); ++ return 0; ++ } ++ } + -+#ifndef __RK_CRYPTO_V1_H__ -+#define __RK_CRYPTO_V1_H__ ++ return -EINVAL; ++} ++EXPORT_SYMBOL_GPL(rk_cryptodev_unregister_dev); + -+#include ++static struct device *rk_cryptodev_find_dev(const char *name) ++{ ++ uint32_t i; + -+struct rk_hw_crypto_v1_info { -+ int reserved; -+}; ++ for (i = 0; i < ARRAY_SIZE(g_dev_infos); i++) { ++ if (g_dev_infos[i].dev) ++ return g_dev_infos[i].dev; ++ } + -+#define RK_CRYPTO_V1_SOC_DATA_INIT(names) {\ -+ .crypto_ver = "CRYPTO V1.0.0.0",\ -+ .use_soft_aes192 = false,\ -+ .valid_algs_name = (names),\ -+ .valid_algs_num = ARRAY_SIZE(names),\ -+ .hw_init = rk_hw_crypto_v1_init,\ -+ .hw_deinit = rk_hw_crypto_v1_deinit,\ -+ .hw_get_rsts = rk_hw_crypto_v1_get_rsts,\ -+ .hw_get_algts = rk_hw_crypto_v1_get_algts,\ -+ .hw_is_algo_valid = rk_hw_crypto_v1_algo_valid,\ -+ .hw_info_size = sizeof(struct rk_hw_crypto_v1_info),\ -+ .default_pka_offset = 0,\ -+ .use_lli_chain = false,\ ++ return NULL; +} + -+#if IS_ENABLED(CONFIG_CRYPTO_DEV_ROCKCHIP_V1) -+ -+extern struct rk_crypto_algt rk_v1_ecb_aes_alg; -+extern struct rk_crypto_algt rk_v1_cbc_aes_alg; -+ -+extern struct rk_crypto_algt rk_v1_ecb_des_alg; -+extern struct rk_crypto_algt rk_v1_cbc_des_alg; ++/* this function has to be called from process context */ ++static int fill_kcop_fd_from_cop(struct kernel_crypt_fd_op *kcop, struct fcrypt *fcr) ++{ ++ struct crypt_fd_op *cop = &kcop->cop; ++ struct csession *ses_ptr; ++ int rc; + -+extern struct rk_crypto_algt rk_v1_ecb_des3_ede_alg; -+extern struct rk_crypto_algt rk_v1_cbc_des3_ede_alg; ++ /* this also enters ses_ptr->sem */ ++ ses_ptr = crypto_get_session_by_sid(fcr, cop->ses); ++ if (unlikely(!ses_ptr)) { ++ derr(1, "invalid session ID=0x%08X", cop->ses); ++ return -EINVAL; ++ } ++ kcop->ivlen = cop->iv ? ses_ptr->cdata.ivsize : 0; ++ kcop->digestsize = 0; /* will be updated during operation */ + -+extern struct rk_crypto_algt rk_v1_ahash_sha1; -+extern struct rk_crypto_algt rk_v1_ahash_sha256; -+extern struct rk_crypto_algt rk_v1_ahash_md5; ++ crypto_put_session(ses_ptr); + -+int rk_hw_crypto_v1_init(struct device *dev, void *hw_info); -+void rk_hw_crypto_v1_deinit(struct device *dev, void *hw_info); -+const char * const *rk_hw_crypto_v1_get_rsts(uint32_t *num); -+struct rk_crypto_algt **rk_hw_crypto_v1_get_algts(uint32_t *num); -+bool rk_hw_crypto_v1_algo_valid(struct rk_crypto_dev *rk_dev, struct rk_crypto_algt *aglt); ++ kcop->task = current; ++ kcop->mm = current->mm; + -+#else ++ if (cop->iv) { ++ rc = copy_from_user(kcop->iv, cop->iv, kcop->ivlen); ++ if (unlikely(rc)) { ++ derr(1, "error copying IV (%d bytes), returned %d for addr %p", ++ kcop->ivlen, rc, cop->iv); ++ return -EFAULT; ++ } ++ } + -+static inline int rk_hw_crypto_v1_init(struct device *dev, void *hw_info) { return -EINVAL; } -+static inline void rk_hw_crypto_v1_deinit(struct device *dev, void *hw_info) {} -+static inline const char * const *rk_hw_crypto_v1_get_rsts(uint32_t *num) { return NULL; } -+static inline struct rk_crypto_algt **rk_hw_crypto_v1_get_algts(uint32_t *num) { return NULL; } -+static inline bool rk_hw_crypto_v1_algo_valid(struct rk_crypto_dev *rk_dev, -+ struct rk_crypto_algt *aglt) -+{ -+ return false; ++ return 0; +} + -+#endif /* end of IS_ENABLED(CONFIG_CRYPTO_DEV_ROCKCHIP_V1) */ -+ -+#endif /* end of __RK_CRYPTO_V1_H__ */ + -diff --git a/drivers/crypto/rockchip/rk_crypto_v1_ahash.c b/drivers/crypto/rockchip/rk_crypto_v1_ahash.c -new file mode 100644 -index 000000000..633961adf ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_v1_ahash.c -@@ -0,0 +1,382 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Crypto acceleration support for Rockchip RK3288 -+ * -+ * Copyright (c) 2015, Fuzhou Rockchip Electronics Co., Ltd -+ * -+ * Author: Zain Wang -+ * -+ * Some ideas are from marvell/cesa.c and s5p-sss.c driver. -+ */ -+#include "rk_crypto_core.h" -+#include "rk_crypto_v1.h" -+#include "rk_crypto_v1_reg.h" ++/* this function has to be called from process context */ ++static int fill_cop_fd_from_kcop(struct kernel_crypt_fd_op *kcop, struct fcrypt *fcr) ++{ ++ int ret; + -+/* -+ * IC can not process zero message hash, -+ * so we put the fixed hash out when met zero message. -+ */ ++ if (kcop->digestsize) { ++ ret = copy_to_user(kcop->cop.mac, ++ kcop->hash_output, kcop->digestsize); ++ if (unlikely(ret)) ++ return -EFAULT; ++ } ++ if (kcop->ivlen && kcop->cop.flags & COP_FLAG_WRITE_IV) { ++ ret = copy_to_user(kcop->cop.iv, ++ kcop->iv, kcop->ivlen); ++ if (unlikely(ret)) ++ return -EFAULT; ++ } ++ return 0; ++} + -+static struct rk_alg_ctx *rk_alg_ctx_cast( -+ struct rk_crypto_dev *rk_dev) ++static int kcop_fd_from_user(struct kernel_crypt_fd_op *kcop, ++ struct fcrypt *fcr, void __user *arg) +{ -+ struct ahash_request *req = -+ ahash_request_cast(rk_dev->async_req); -+ -+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -+ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); ++ if (unlikely(copy_from_user(&kcop->cop, arg, sizeof(kcop->cop)))) ++ return -EFAULT; + -+ return &ctx->algs_ctx; ++ return fill_kcop_fd_from_cop(kcop, fcr); +} + -+static int rk_crypto_irq_handle(int irq, void *dev_id) ++static int kcop_fd_to_user(struct kernel_crypt_fd_op *kcop, ++ struct fcrypt *fcr, void __user *arg) +{ -+ struct rk_crypto_dev *rk_dev = platform_get_drvdata(dev_id); -+ u32 interrupt_status; ++ int ret; + -+ interrupt_status = CRYPTO_READ(rk_dev, RK_CRYPTO_INTSTS); -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_INTSTS, interrupt_status); ++ ret = fill_cop_fd_from_kcop(kcop, fcr); ++ if (unlikely(ret)) { ++ derr(1, "Error in fill_cop_from_kcop"); ++ return ret; ++ } + -+ if (interrupt_status & 0x0a) { -+ dev_warn(rk_dev->dev, "DMA Error\n"); -+ rk_dev->err = -EFAULT; ++ if (unlikely(copy_to_user(arg, &kcop->cop, sizeof(kcop->cop)))) { ++ derr(1, "Cannot copy to userspace"); ++ return -EFAULT; + } + + return 0; +} + -+static int zero_message_process(struct ahash_request *req) ++static int ++hash_n_crypt_fd(struct csession *ses_ptr, struct crypt_fd_op *cop, ++ struct scatterlist *src_sg, struct scatterlist *dst_sg, ++ uint32_t len) +{ -+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -+ int rk_digest_size = crypto_ahash_digestsize(tfm); ++ int ret; + -+ const u8 sha256_zero_msg_hash[SHA256_DIGEST_SIZE] = { -+ 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, -+ 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, -+ 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, -+ 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55 -+ }; ++ /* Always hash before encryption and after decryption. Maybe ++ * we should introduce a flag to switch... TBD later on. ++ */ ++ if (cop->op == COP_ENCRYPT) { ++ if (ses_ptr->hdata.init != 0) { ++ ret = cryptodev_hash_update(&ses_ptr->hdata, ++ src_sg, len); ++ if (unlikely(ret)) ++ goto out_err; ++ } ++ if (ses_ptr->cdata.init != 0) { ++ ret = cryptodev_cipher_encrypt(&ses_ptr->cdata, ++ src_sg, dst_sg, len); + -+ const u8 sha1_zero_msg_hash[SHA1_DIGEST_SIZE] = { -+ 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, -+ 0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, -+ 0xaf, 0xd8, 0x07, 0x09 -+ }; ++ if (unlikely(ret)) ++ goto out_err; ++ } ++ } else { ++ if (ses_ptr->cdata.init != 0) { ++ ret = cryptodev_cipher_decrypt(&ses_ptr->cdata, ++ src_sg, dst_sg, len); + -+ const u8 md5_zero_msg_hash[MD5_DIGEST_SIZE] = { -+ 0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04, -+ 0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e, -+ }; ++ if (unlikely(ret)) ++ goto out_err; ++ } + -+ switch (rk_digest_size) { -+ case SHA1_DIGEST_SIZE: -+ memcpy(req->result, sha1_zero_msg_hash, rk_digest_size); -+ break; -+ case SHA256_DIGEST_SIZE: -+ memcpy(req->result, sha256_zero_msg_hash, rk_digest_size); -+ break; -+ case MD5_DIGEST_SIZE: -+ memcpy(req->result, md5_zero_msg_hash, rk_digest_size); -+ break; -+ default: -+ return -EINVAL; ++ if (ses_ptr->hdata.init != 0) { ++ ret = cryptodev_hash_update(&ses_ptr->hdata, ++ dst_sg, len); ++ if (unlikely(ret)) ++ goto out_err; ++ } + } -+ + return 0; ++out_err: ++ derr(0, "CryptoAPI failure: %d", ret); ++ return ret; +} + -+static void rk_ahash_crypto_complete(struct crypto_async_request *base, int err) -+{ -+ if (base->complete) -+ base->complete(base, err); -+} -+ -+static void rk_ahash_reg_init(struct rk_crypto_dev *rk_dev) ++static int get_dmafd_sgtbl(int dma_fd, unsigned int dma_len, enum dma_data_direction dir, ++ struct sg_table **sg_tbl, struct dma_buf_attachment **dma_attach, ++ struct dma_buf **dmabuf) +{ -+ struct ahash_request *req = ahash_request_cast(rk_dev->async_req); -+ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -+ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); -+ int reg_status = 0; -+ -+ reg_status = CRYPTO_READ(rk_dev, RK_CRYPTO_CTRL) | -+ RK_CRYPTO_HASH_FLUSH | _SBF(0xffff, 16); -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_CTRL, reg_status); ++ struct device *crypto_dev = rk_cryptodev_find_dev(NULL); + -+ reg_status = CRYPTO_READ(rk_dev, RK_CRYPTO_CTRL); -+ reg_status &= (~RK_CRYPTO_HASH_FLUSH); -+ reg_status |= _SBF(0xffff, 16); -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_CTRL, reg_status); ++ if (!crypto_dev) ++ return -EINVAL; + -+ memset_io(rk_dev->reg + RK_CRYPTO_HASH_DOUT_0, 0, 32); ++ *sg_tbl = NULL; ++ *dmabuf = NULL; ++ *dma_attach = NULL; + -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_INTENA, RK_CRYPTO_HRDMA_ERR_ENA | -+ RK_CRYPTO_HRDMA_DONE_ENA); ++ *dmabuf = dma_buf_get(dma_fd); ++ if (IS_ERR(*dmabuf)) { ++ derr(1, "dmabuf error! ret = %d", (int)PTR_ERR(*dmabuf)); ++ *dmabuf = NULL; ++ goto error; ++ } + -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_INTSTS, RK_CRYPTO_HRDMA_ERR_INT | -+ RK_CRYPTO_HRDMA_DONE_INT); ++ *dma_attach = dma_buf_attach(*dmabuf, crypto_dev); ++ if (IS_ERR(*dma_attach)) { ++ derr(1, "dma_attach error! ret = %d", (int)PTR_ERR(*dma_attach)); ++ *dma_attach = NULL; ++ goto error; ++ } + -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_HASH_CTRL, rctx->mode | -+ RK_CRYPTO_HASH_SWAP_DO); ++ /* ++ * DMA_TO_DEVICE : cache clean for input data ++ * DMA_FROM_DEVICE: cache invalidate for output data ++ */ ++ *sg_tbl = dma_buf_map_attachment(*dma_attach, dir); ++ if (IS_ERR(*sg_tbl)) { ++ derr(1, "sg_tbl error! ret = %d", (int)PTR_ERR(*sg_tbl)); ++ *sg_tbl = NULL; ++ goto error; ++ } + -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_CONF, RK_CRYPTO_BYTESWAP_HRFIFO | -+ RK_CRYPTO_BYTESWAP_BRFIFO | -+ RK_CRYPTO_BYTESWAP_BTFIFO); ++ /* cache invalidate for input data */ ++ if (dir == DMA_TO_DEVICE) ++ dma_sync_sg_for_cpu(crypto_dev, (*sg_tbl)->sgl, (*sg_tbl)->nents, DMA_FROM_DEVICE); + -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_HASH_MSG_LEN, alg_ctx->total); -+} ++ return 0; ++error: ++ if (*sg_tbl) ++ dma_buf_unmap_attachment(*dma_attach, *sg_tbl, dir); + -+static int rk_ahash_init(struct ahash_request *req) -+{ -+ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -+ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); ++ if (*dma_attach) ++ dma_buf_detach(*dmabuf, *dma_attach); + -+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); -+ rctx->fallback_req.base.flags = req->base.flags & -+ CRYPTO_TFM_REQ_MAY_SLEEP; ++ if (*dmabuf) ++ dma_buf_put(*dmabuf); + -+ return crypto_ahash_init(&rctx->fallback_req); ++ return -EINVAL; +} + -+static int rk_ahash_update(struct ahash_request *req) ++static int put_dmafd_sgtbl(int dma_fd, enum dma_data_direction dir, ++ struct sg_table *sg_tbl, struct dma_buf_attachment *dma_attach, ++ struct dma_buf *dmabuf) +{ -+ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -+ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); ++ struct device *crypto_dev = rk_cryptodev_find_dev(NULL); + -+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); -+ rctx->fallback_req.base.flags = req->base.flags & -+ CRYPTO_TFM_REQ_MAY_SLEEP; -+ rctx->fallback_req.nbytes = req->nbytes; -+ rctx->fallback_req.src = req->src; ++ if (!crypto_dev) ++ return -EINVAL; + -+ return crypto_ahash_update(&rctx->fallback_req); -+} ++ if (!sg_tbl || !dma_attach || !dmabuf) ++ return -EINVAL; + -+static int rk_ahash_final(struct ahash_request *req) -+{ -+ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -+ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); ++ /* cache clean for output data */ ++ if (dir == DMA_FROM_DEVICE) ++ dma_sync_sg_for_device(crypto_dev, sg_tbl->sgl, sg_tbl->nents, DMA_TO_DEVICE); + -+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); -+ rctx->fallback_req.base.flags = req->base.flags & -+ CRYPTO_TFM_REQ_MAY_SLEEP; -+ rctx->fallback_req.result = req->result; ++ /* ++ * DMA_TO_DEVICE : do nothing for input data ++ * DMA_FROM_DEVICE: cache invalidate for output data ++ */ ++ dma_buf_unmap_attachment(dma_attach, sg_tbl, dir); ++ dma_buf_detach(dmabuf, dma_attach); ++ dma_buf_put(dmabuf); + -+ return crypto_ahash_final(&rctx->fallback_req); ++ return 0; +} + -+static int rk_ahash_finup(struct ahash_request *req) ++static struct dma_fd_map_node *dma_fd_find_node(struct fcrypt *fcr, int dma_fd) +{ -+ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -+ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); ++ struct dma_fd_map_node *map_node = NULL; + -+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); -+ rctx->fallback_req.base.flags = req->base.flags & -+ CRYPTO_TFM_REQ_MAY_SLEEP; ++ mutex_lock(&fcr->sem); + -+ rctx->fallback_req.nbytes = req->nbytes; -+ rctx->fallback_req.src = req->src; -+ rctx->fallback_req.result = req->result; ++ list_for_each_entry(map_node, &fcr->dma_map_list, list) { ++ if (unlikely(map_node->fd_map.mop.dma_fd == dma_fd)) { ++ mutex_unlock(&fcr->sem); ++ return map_node; ++ } ++ } + -+ return crypto_ahash_finup(&rctx->fallback_req); ++ mutex_unlock(&fcr->sem); ++ ++ return NULL; +} + -+static int rk_ahash_import(struct ahash_request *req, const void *in) ++/* This is the main crypto function - zero-copy edition */ ++static int __crypto_fd_run(struct fcrypt *fcr, struct csession *ses_ptr, ++ struct kernel_crypt_fd_op *kcop) +{ -+ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -+ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); ++ struct crypt_fd_op *cop = &kcop->cop; ++ struct dma_buf *dma_buf_in = NULL, *dma_buf_out = NULL; ++ struct sg_table sg_tmp; ++ struct sg_table *sg_tbl_in = NULL, *sg_tbl_out = NULL; ++ struct dma_buf_attachment *dma_attach_in = NULL, *dma_attach_out = NULL; ++ struct dma_fd_map_node *node_src = NULL, *node_dst = NULL; ++ int ret = 0; + -+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); -+ rctx->fallback_req.base.flags = req->base.flags & -+ CRYPTO_TFM_REQ_MAY_SLEEP; ++ node_src = dma_fd_find_node(fcr, kcop->cop.src_fd); ++ if (node_src) { ++ sg_tbl_in = node_src->sgtbl; ++ } else { ++ ret = get_dmafd_sgtbl(kcop->cop.src_fd, kcop->cop.len, DMA_TO_DEVICE, ++ &sg_tbl_in, &dma_attach_in, &dma_buf_in); ++ if (unlikely(ret)) { ++ derr(1, "Error get_dmafd_sgtbl src."); ++ goto exit; ++ } ++ } + -+ return crypto_ahash_import(&rctx->fallback_req, in); -+} ++ /* only cipher has dst */ ++ if (ses_ptr->cdata.init) { ++ node_dst = dma_fd_find_node(fcr, kcop->cop.dst_fd); ++ if (node_dst) { ++ sg_tbl_out = node_dst->sgtbl; ++ } else { ++ ret = get_dmafd_sgtbl(kcop->cop.dst_fd, kcop->cop.len, DMA_FROM_DEVICE, ++ &sg_tbl_out, &dma_attach_out, &dma_buf_out); ++ if (unlikely(ret)) { ++ derr(1, "Error get_dmafd_sgtbl dst."); ++ goto exit; ++ } ++ } ++ } else { ++ memset(&sg_tmp, 0x00, sizeof(sg_tmp)); ++ sg_tbl_out = &sg_tmp; ++ } + -+static int rk_ahash_export(struct ahash_request *req, void *out) -+{ -+ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -+ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); ++ ret = hash_n_crypt_fd(ses_ptr, cop, sg_tbl_in->sgl, sg_tbl_out->sgl, cop->len); + -+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); -+ rctx->fallback_req.base.flags = req->base.flags & -+ CRYPTO_TFM_REQ_MAY_SLEEP; ++exit: ++ if (dma_buf_in) ++ put_dmafd_sgtbl(kcop->cop.src_fd, DMA_TO_DEVICE, ++ sg_tbl_in, dma_attach_in, dma_buf_in); + -+ return crypto_ahash_export(&rctx->fallback_req, out); ++ if (dma_buf_out) ++ put_dmafd_sgtbl(kcop->cop.dst_fd, DMA_FROM_DEVICE, ++ sg_tbl_out, dma_attach_out, dma_buf_out); ++ return ret; +} + -+static int rk_ahash_digest(struct ahash_request *req) ++static int crypto_fd_run(struct fcrypt *fcr, struct kernel_crypt_fd_op *kcop) +{ -+ struct rk_ahash_ctx *tctx = crypto_tfm_ctx(req->base.tfm); -+ struct rk_crypto_dev *rk_dev = tctx->rk_dev; ++ struct csession *ses_ptr; ++ struct crypt_fd_op *cop = &kcop->cop; ++ int ret = -EINVAL; + -+ if (!req->nbytes) -+ return zero_message_process(req); -+ else -+ return rk_dev->enqueue(rk_dev, &req->base); -+} ++ if (unlikely(cop->op != COP_ENCRYPT && cop->op != COP_DECRYPT)) { ++ ddebug(1, "invalid operation op=%u", cop->op); ++ return -EINVAL; ++ } + -+static void crypto_ahash_dma_start(struct rk_crypto_dev *rk_dev) -+{ -+ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); ++ /* this also enters ses_ptr->sem */ ++ ses_ptr = crypto_get_session_by_sid(fcr, cop->ses); ++ if (unlikely(!ses_ptr)) { ++ derr(1, "invalid session ID=0x%08X", cop->ses); ++ return -EINVAL; ++ } + -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_HRDMAS, alg_ctx->addr_in); -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_HRDMAL, (alg_ctx->count + 3) / 4); -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_CTRL, RK_CRYPTO_HASH_START | -+ (RK_CRYPTO_HASH_START << 16)); -+} ++ if (ses_ptr->hdata.init != 0 && (cop->flags == 0 || cop->flags & COP_FLAG_RESET)) { ++ ret = cryptodev_hash_reset(&ses_ptr->hdata); ++ if (unlikely(ret)) { ++ derr(1, "error in cryptodev_hash_reset()"); ++ goto out_unlock; ++ } ++ } + -+static int rk_ahash_set_data_start(struct rk_crypto_dev *rk_dev) -+{ -+ int err; -+ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); ++ if (ses_ptr->cdata.init != 0) { ++ int blocksize = ses_ptr->cdata.blocksize; + -+ err = rk_dev->load_data(rk_dev, alg_ctx->sg_src, NULL); -+ if (!err) -+ crypto_ahash_dma_start(rk_dev); -+ return err; -+} ++ if (unlikely(cop->len % blocksize)) { ++ derr(1, "data size (%u) isn't a multiple of block size (%u)", ++ cop->len, blocksize); ++ ret = -EINVAL; ++ goto out_unlock; ++ } + -+static int rk_ahash_start(struct rk_crypto_dev *rk_dev) -+{ -+ struct ahash_request *req = ahash_request_cast(rk_dev->async_req); -+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -+ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); -+ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); ++ cryptodev_cipher_set_iv(&ses_ptr->cdata, kcop->iv, ++ min(ses_ptr->cdata.ivsize, kcop->ivlen)); ++ } + -+ alg_ctx->total = req->nbytes; -+ alg_ctx->left_bytes = req->nbytes; -+ alg_ctx->sg_src = req->src; -+ alg_ctx->req_src = req->src; -+ alg_ctx->src_nents = sg_nents_for_len(req->src, req->nbytes); ++ if (likely(cop->len)) { ++ ret = __crypto_fd_run(fcr, ses_ptr, kcop); ++ if (unlikely(ret)) ++ goto out_unlock; ++ } + -+ rctx->mode = 0; ++ if (ses_ptr->cdata.init != 0) { ++ cryptodev_cipher_get_iv(&ses_ptr->cdata, kcop->iv, ++ min(ses_ptr->cdata.ivsize, kcop->ivlen)); ++ } + -+ switch (crypto_ahash_digestsize(tfm)) { -+ case SHA1_DIGEST_SIZE: -+ rctx->mode = RK_CRYPTO_HASH_SHA1; -+ break; -+ case SHA256_DIGEST_SIZE: -+ rctx->mode = RK_CRYPTO_HASH_SHA256; -+ break; -+ case MD5_DIGEST_SIZE: -+ rctx->mode = RK_CRYPTO_HASH_MD5; -+ break; -+ default: -+ return -EINVAL; ++ if (ses_ptr->hdata.init != 0 && ++ ((cop->flags & COP_FLAG_FINAL) || ++ (!(cop->flags & COP_FLAG_UPDATE) || cop->len == 0))) { ++ ++ ret = cryptodev_hash_final(&ses_ptr->hdata, kcop->hash_output); ++ if (unlikely(ret)) { ++ derr(0, "CryptoAPI failure: %d", ret); ++ goto out_unlock; ++ } ++ kcop->digestsize = ses_ptr->hdata.digestsize; + } + -+ rk_ahash_reg_init(rk_dev); -+ return rk_ahash_set_data_start(rk_dev); ++out_unlock: ++ crypto_put_session(ses_ptr); ++ ++ return ret; +} + -+static int rk_ahash_crypto_rx(struct rk_crypto_dev *rk_dev) ++static int kcop_map_fd_from_user(struct kernel_crypt_fd_map_op *kcop, ++ struct fcrypt *fcr, void __user *arg) +{ -+ int err = 0; -+ struct ahash_request *req = ahash_request_cast(rk_dev->async_req); -+ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); -+ struct crypto_ahash *tfm; -+ -+ CRYPTO_TRACE("left_bytes = %u\n", alg_ctx->left_bytes); -+ -+ err = rk_dev->unload_data(rk_dev); -+ if (err) -+ goto out_rx; ++ if (unlikely(copy_from_user(&kcop->mop, arg, sizeof(kcop->mop)))) ++ return -EFAULT; + -+ if (alg_ctx->left_bytes) { -+ if (alg_ctx->aligned) { -+ if (sg_is_last(alg_ctx->sg_src)) { -+ dev_warn(rk_dev->dev, "[%s:%d], Lack of data\n", -+ __func__, __LINE__); -+ err = -ENOMEM; -+ goto out_rx; -+ } -+ alg_ctx->sg_src = sg_next(alg_ctx->sg_src); -+ } -+ err = rk_ahash_set_data_start(rk_dev); -+ } else { -+ /* -+ * it will take some time to process date after last dma -+ * transmission. -+ * -+ * waiting time is relative with the last date len, -+ * so cannot set a fixed time here. -+ * 10us makes system not call here frequently wasting -+ * efficiency, and make it response quickly when dma -+ * complete. -+ */ -+ while (!CRYPTO_READ(rk_dev, RK_CRYPTO_HASH_STS)) -+ udelay(10); ++ return 0; ++} + -+ tfm = crypto_ahash_reqtfm(req); -+ memcpy_fromio(req->result, rk_dev->reg + RK_CRYPTO_HASH_DOUT_0, -+ crypto_ahash_digestsize(tfm)); ++static int kcop_map_fd_to_user(struct kernel_crypt_fd_map_op *kcop, ++ struct fcrypt *fcr, void __user *arg) ++{ ++ if (unlikely(copy_to_user(arg, &kcop->mop, sizeof(kcop->mop)))) { ++ derr(1, "Cannot copy to userspace"); ++ return -EFAULT; + } + -+out_rx: -+ return err; ++ return 0; +} + -+static int rk_cra_hash_init(struct crypto_tfm *tfm) ++static int dma_fd_map_for_user(struct fcrypt *fcr, struct kernel_crypt_fd_map_op *kmop) +{ -+ struct rk_ahash_ctx *ctx = crypto_tfm_ctx(tfm); -+ struct rk_crypto_algt *algt; -+ struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg); -+ const char *alg_name = crypto_tfm_alg_name(tfm); -+ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; -+ struct rk_crypto_dev *rk_dev; ++ struct device *crypto_dev = NULL; ++ struct dma_fd_map_node *map_node = NULL; + -+ algt = container_of(alg, struct rk_crypto_algt, alg.hash); -+ rk_dev = algt->rk_dev; ++ /* check if dma_fd is already mapped */ ++ map_node = dma_fd_find_node(fcr, kmop->mop.dma_fd); ++ if (map_node) { ++ kmop->mop.phys_addr = map_node->fd_map.mop.phys_addr; ++ return 0; ++ } + -+ memset(ctx, 0x00, sizeof(*ctx)); ++ crypto_dev = rk_cryptodev_find_dev(NULL); ++ if (!crypto_dev) ++ return -EINVAL; + -+ if (!rk_dev->request_crypto) -+ return -EFAULT; ++ map_node = kzalloc(sizeof(*map_node), GFP_KERNEL); ++ if (!map_node) ++ return -ENOMEM; + -+ rk_dev->request_crypto(rk_dev, crypto_tfm_alg_name(tfm)); ++ map_node->dmabuf = dma_buf_get(kmop->mop.dma_fd); ++ if (IS_ERR(map_node->dmabuf)) { ++ derr(1, "dmabuf error! ret = %d", (int)PTR_ERR(map_node->dmabuf)); ++ map_node->dmabuf = NULL; ++ goto error; ++ } + -+ alg_ctx->align_size = 4; ++ map_node->dma_attach = dma_buf_attach(map_node->dmabuf, crypto_dev); ++ if (IS_ERR(map_node->dma_attach)) { ++ derr(1, "dma_attach error! ret = %d", (int)PTR_ERR(map_node->dma_attach)); ++ map_node->dma_attach = NULL; ++ goto error; ++ } + -+ alg_ctx->ops.start = rk_ahash_start; -+ alg_ctx->ops.update = rk_ahash_crypto_rx; -+ alg_ctx->ops.complete = rk_ahash_crypto_complete; -+ alg_ctx->ops.irq_handle = rk_crypto_irq_handle; ++ map_node->sgtbl = dma_buf_map_attachment(map_node->dma_attach, DMA_BIDIRECTIONAL); ++ if (IS_ERR(map_node->sgtbl)) { ++ derr(1, "sg_tbl error! ret = %d", (int)PTR_ERR(map_node->sgtbl)); ++ map_node->sgtbl = NULL; ++ goto error; ++ } + -+ ctx->rk_dev = rk_dev; ++ map_node->fd_map.mop.dma_fd = kmop->mop.dma_fd; ++ map_node->fd_map.mop.phys_addr = map_node->sgtbl->sgl->dma_address; + -+ /* for fallback */ -+ ctx->fallback_tfm = crypto_alloc_ahash(alg_name, 0, -+ CRYPTO_ALG_NEED_FALLBACK); -+ if (IS_ERR(ctx->fallback_tfm)) { -+ dev_err(rk_dev->dev, "Could not load fallback driver.\n"); -+ return PTR_ERR(ctx->fallback_tfm); -+ } -+ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), -+ sizeof(struct rk_ahash_rctx) + -+ crypto_ahash_reqsize(ctx->fallback_tfm)); ++ mutex_lock(&fcr->sem); ++ list_add(&map_node->list, &fcr->dma_map_list); ++ mutex_unlock(&fcr->sem); + -+ algt->alg.hash.halg.statesize = crypto_ahash_statesize(ctx->fallback_tfm); ++ kmop->mop.phys_addr = map_node->fd_map.mop.phys_addr; + + return 0; -+} ++error: ++ if (map_node->sgtbl) ++ dma_buf_unmap_attachment(map_node->dma_attach, map_node->sgtbl, DMA_BIDIRECTIONAL); + -+static void rk_cra_hash_exit(struct crypto_tfm *tfm) -+{ -+ struct rk_ahash_ctx *ctx = crypto_tfm_ctx(tfm); ++ if (map_node->dma_attach) ++ dma_buf_detach(map_node->dmabuf, map_node->dma_attach); + -+ if (ctx->fallback_tfm) -+ crypto_free_ahash(ctx->fallback_tfm); ++ if (map_node->dmabuf) ++ dma_buf_put(map_node->dmabuf); + -+ ctx->rk_dev->release_crypto(ctx->rk_dev, crypto_tfm_alg_name(tfm)); -+} ++ kfree(map_node); + -+struct rk_crypto_algt rk_v1_ahash_sha1 = RK_HASH_ALGO_INIT(SHA1, sha1); -+struct rk_crypto_algt rk_v1_ahash_sha256 = RK_HASH_ALGO_INIT(SHA256, sha256); -+struct rk_crypto_algt rk_v1_ahash_md5 = RK_HASH_ALGO_INIT(MD5, md5); ++ return -EINVAL; ++} + -diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk_crypto_v1_reg.h -similarity index 65% -rename from drivers/crypto/rockchip/rk3288_crypto.h -rename to drivers/crypto/rockchip/rk_crypto_v1_reg.h -index 3aa03cbfb..cf520527a 100644 ---- a/drivers/crypto/rockchip/rk3288_crypto.h -+++ b/drivers/crypto/rockchip/rk_crypto_v1_reg.h -@@ -1,20 +1,9 @@ - /* SPDX-License-Identifier: GPL-2.0 */ --#ifndef __RK3288_CRYPTO_H__ --#define __RK3288_CRYPTO_H__ - --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -+/* Copyright (c) 2018 Rockchip Electronics Co. Ltd. */ ++static int dma_fd_unmap_for_user(struct fcrypt *fcr, struct kernel_crypt_fd_map_op *kmop) ++{ ++ struct dma_fd_map_node *tmp, *map_node; ++ bool is_found = false; ++ int ret = 0; + -+#ifndef __RK_CRYPTO_V1_REG_H__ -+#define __RK_CRYPTO_V1_REG_H__ - - #define _SBF(v, f) ((v) << (f)) - -@@ -180,106 +169,4 @@ - #define RK_CRYPTO_HASH_DOUT_6 0x01a4 - #define RK_CRYPTO_HASH_DOUT_7 0x01a8 - --#define CRYPTO_READ(dev, offset) \ -- readl_relaxed(((dev)->reg + (offset))) --#define CRYPTO_WRITE(dev, offset, val) \ -- writel_relaxed((val), ((dev)->reg + (offset))) -- --#define RK_MAX_CLKS 4 -- --/* -- * struct rockchip_ip - struct for managing a list of RK crypto instance -- * @dev_list: Used for doing a list of rk_crypto_info -- * @lock: Control access to dev_list -- * @dbgfs_dir: Debugfs dentry for statistic directory -- * @dbgfs_stats: Debugfs dentry for statistic counters -- */ --struct rockchip_ip { -- struct list_head dev_list; -- spinlock_t lock; /* Control access to dev_list */ -- struct dentry *dbgfs_dir; -- struct dentry *dbgfs_stats; --}; -- --struct rk_clks { -- const char *name; -- unsigned long max; --}; -- --struct rk_variant { -- int num_clks; -- struct rk_clks rkclks[RK_MAX_CLKS]; --}; -- --struct rk_crypto_info { -- struct list_head list; -- struct device *dev; -- struct clk_bulk_data *clks; -- int num_clks; -- struct reset_control *rst; -- void __iomem *reg; -- int irq; -- const struct rk_variant *variant; -- unsigned long nreq; -- struct crypto_engine *engine; -- struct completion complete; -- int status; --}; -- --/* the private variable of hash */ --struct rk_ahash_ctx { -- /* for fallback */ -- struct crypto_ahash *fallback_tfm; --}; -- --/* the private variable of hash for fallback */ --struct rk_ahash_rctx { -- struct rk_crypto_info *dev; -- struct ahash_request fallback_req; -- u32 mode; -- int nrsg; --}; -- --/* the private variable of cipher */ --struct rk_cipher_ctx { -- unsigned int keylen; -- u8 key[AES_MAX_KEY_SIZE]; -- u8 iv[AES_BLOCK_SIZE]; -- struct crypto_skcipher *fallback_tfm; --}; -- --struct rk_cipher_rctx { -- struct rk_crypto_info *dev; -- u8 backup_iv[AES_BLOCK_SIZE]; -- u32 mode; -- struct skcipher_request fallback_req; // keep at the end --}; -- --struct rk_crypto_tmp { -- u32 type; -- struct rk_crypto_info *dev; -- union { -- struct skcipher_engine_alg skcipher; -- struct ahash_engine_alg hash; -- } alg; -- unsigned long stat_req; -- unsigned long stat_fb; -- unsigned long stat_fb_len; -- unsigned long stat_fb_sglen; -- unsigned long stat_fb_align; -- unsigned long stat_fb_sgdiff; --}; -- --extern struct rk_crypto_tmp rk_ecb_aes_alg; --extern struct rk_crypto_tmp rk_cbc_aes_alg; --extern struct rk_crypto_tmp rk_ecb_des_alg; --extern struct rk_crypto_tmp rk_cbc_des_alg; --extern struct rk_crypto_tmp rk_ecb_des3_ede_alg; --extern struct rk_crypto_tmp rk_cbc_des3_ede_alg; -- --extern struct rk_crypto_tmp rk_ahash_sha1; --extern struct rk_crypto_tmp rk_ahash_sha256; --extern struct rk_crypto_tmp rk_ahash_md5; -- --struct rk_crypto_info *get_rk_crypto(void); - #endif -diff --git a/drivers/crypto/rockchip/rk_crypto_v1_skcipher.c b/drivers/crypto/rockchip/rk_crypto_v1_skcipher.c -new file mode 100644 -index 000000000..dc7a57b80 ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_v1_skcipher.c -@@ -0,0 +1,424 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Crypto acceleration support for Rockchip RK3288 -+ * -+ * Copyright (c) 2015, Fuzhou Rockchip Electronics Co., Ltd -+ * -+ * Author: Zain Wang -+ * -+ * Some ideas are from marvell-cesa.c and s5p-sss.c driver. -+ */ -+#include "rk_crypto_core.h" -+#include "rk_crypto_v1.h" -+#include "rk_crypto_v1_reg.h" ++ mutex_lock(&fcr->sem); ++ list_for_each_entry_safe(map_node, tmp, &fcr->dma_map_list, list) { ++ if (map_node->fd_map.mop.dma_fd == kmop->mop.dma_fd && ++ map_node->fd_map.mop.phys_addr == kmop->mop.phys_addr) { ++ dma_buf_unmap_attachment(map_node->dma_attach, map_node->sgtbl, ++ DMA_BIDIRECTIONAL); ++ dma_buf_detach(map_node->dmabuf, map_node->dma_attach); ++ dma_buf_put(map_node->dmabuf); ++ list_del(&map_node->list); ++ kfree(map_node); ++ kmop->mop.phys_addr = 0; ++ is_found = true; ++ break; ++ } ++ } + -+#define RK_CRYPTO_DEC BIT(0) ++ if (unlikely(!is_found)) { ++ derr(1, "dmafd =0x%08X not found!", kmop->mop.dma_fd); ++ ret = -ENOENT; ++ mutex_unlock(&fcr->sem); ++ goto exit; ++ } + -+static struct rk_alg_ctx *rk_alg_ctx_cast( -+ struct rk_crypto_dev *rk_dev) -+{ -+ struct skcipher_request *req = -+ skcipher_request_cast(rk_dev->async_req); -+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ mutex_unlock(&fcr->sem); + -+ return &ctx->algs_ctx; ++exit: ++ return ret; +} + -+static int rk_crypto_irq_handle(int irq, void *dev_id) ++static int dma_fd_begin_cpu_access(struct fcrypt *fcr, struct kernel_crypt_fd_map_op *kmop) +{ -+ struct rk_crypto_dev *rk_dev = platform_get_drvdata(dev_id); -+ u32 interrupt_status; -+ -+ interrupt_status = CRYPTO_READ(rk_dev, RK_CRYPTO_INTSTS); -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_INTSTS, interrupt_status); ++ struct dma_fd_map_node *map_node = NULL; + -+ if (interrupt_status & 0x0a) { -+ dev_warn(rk_dev->dev, "DMA Error\n"); -+ rk_dev->err = -EFAULT; ++ map_node = dma_fd_find_node(fcr, kmop->mop.dma_fd); ++ if (unlikely(!map_node)) { ++ derr(1, "dmafd =0x%08X not found!", kmop->mop.dma_fd); ++ return -ENOENT; + } + -+ return 0; ++ return dma_buf_begin_cpu_access(map_node->dmabuf, DMA_BIDIRECTIONAL); +} + -+static void rk_crypto_complete(struct crypto_async_request *base, int err) ++static int dma_fd_end_cpu_access(struct fcrypt *fcr, struct kernel_crypt_fd_map_op *kmop) +{ -+ if (base->complete) -+ base->complete(base, err); ++ struct dma_fd_map_node *map_node = NULL; ++ ++ map_node = dma_fd_find_node(fcr, kmop->mop.dma_fd); ++ if (unlikely(!map_node)) { ++ derr(1, "dmafd =0x%08X not found!", kmop->mop.dma_fd); ++ return -ENOENT; ++ } ++ ++ return dma_buf_end_cpu_access(map_node->dmabuf, DMA_BIDIRECTIONAL); +} + -+static int rk_handle_req(struct rk_crypto_dev *rk_dev, -+ struct skcipher_request *req) ++static int kcop_rsa_from_user(struct kernel_crypt_rsa_op *kcop, ++ struct fcrypt *fcr, void __user *arg) +{ -+ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm); ++ if (unlikely(copy_from_user(&kcop->rop, arg, sizeof(kcop->rop)))) ++ return -EFAULT; + -+ if (!IS_ALIGNED(req->cryptlen, ctx->algs_ctx.align_size)) -+ return -EINVAL; -+ else -+ return rk_dev->enqueue(rk_dev, &req->base); ++ return 0; +} + -+static int rk_get_bc(u32 algo, u32 mode, u32 *bc_val) ++static int kcop_rsa_to_user(struct kernel_crypt_rsa_op *kcop, ++ struct fcrypt *fcr, void __user *arg) +{ -+ /* default DES ECB mode */ -+ *bc_val = 0; -+ -+ switch (algo) { -+ case CIPHER_ALGO_DES3_EDE: -+ *bc_val |= RK_CRYPTO_TDES_SELECT; -+ fallthrough; -+ case CIPHER_ALGO_DES: -+ if (mode == CIPHER_MODE_ECB) -+ *bc_val = 0; -+ else if (mode == CIPHER_MODE_CBC) -+ *bc_val = RK_CRYPTO_TDES_CHAINMODE_CBC; -+ else -+ goto error; -+ break; -+ case CIPHER_ALGO_AES: -+ if (mode == CIPHER_MODE_ECB) -+ *bc_val = RK_CRYPTO_AES_ECB_MODE; -+ else if (mode == CIPHER_MODE_CBC) -+ *bc_val = RK_CRYPTO_AES_CBC_MODE; -+ else -+ goto error; -+ break; -+ default: -+ goto error; ++ if (unlikely(copy_to_user(arg, &kcop->rop, sizeof(kcop->rop)))) { ++ derr(1, "Cannot copy to userspace"); ++ return -EFAULT; + } + + return 0; -+error: -+ return -EINVAL; +} + -+static int rk_cipher_setkey(struct crypto_skcipher *cipher, -+ const u8 *key, unsigned int keylen) ++static int crypto_rsa_run(struct fcrypt *fcr, struct kernel_crypt_rsa_op *krop) +{ -+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher); -+ struct skcipher_alg *alg = crypto_skcipher_alg(cipher); -+ struct rk_crypto_algt *algt; -+ int err; -+ -+ algt = container_of(alg, struct rk_crypto_algt, alg.crypto); -+ -+ CRYPTO_MSG("algo = %x, mode = %x, key_len = %d\n", -+ algt->algo, algt->mode, keylen); ++ int ret; ++ u8 *key = NULL, *in = NULL, *out = NULL; ++ u32 out_len_max; ++ struct crypt_rsa_op *rop = &krop->rop; ++ const char *driver = "rsa-rk"; ++ struct crypto_akcipher *tfm = NULL; ++ struct akcipher_request *req = NULL; ++ DECLARE_CRYPTO_WAIT(wait); ++ struct scatterlist src, dst; ++ bool is_priv_key = (rop->flags & COP_FLAG_RSA_PRIV) == COP_FLAG_RSA_PRIV; + -+ switch (algt->algo) { -+ case CIPHER_ALGO_DES: -+ if (keylen != DES_KEY_SIZE) -+ goto error; ++ /* The key size cannot exceed RK_RSA_BER_KEY_MAX Byte */ ++ if (rop->key_len > RK_RSA_BER_KEY_MAX) ++ return -ENOKEY; + -+ err = verify_skcipher_des_key(cipher, key); -+ if (err) -+ goto error; ++ if (rop->in_len > RK_RSA_KEY_MAX_BYTES || ++ rop->out_len > RK_RSA_KEY_MAX_BYTES) ++ return -EINVAL; + -+ break; -+ case CIPHER_ALGO_DES3_EDE: -+ err = verify_skcipher_des3_key(cipher, key); -+ if (err) -+ goto error; -+ break; -+ case CIPHER_ALGO_AES: -+ if (keylen != AES_KEYSIZE_128 && -+ keylen != AES_KEYSIZE_192 && -+ keylen != AES_KEYSIZE_256) -+ goto error; -+ break; -+ default: -+ goto error; ++ tfm = crypto_alloc_akcipher(driver, 0, 0); ++ if (IS_ERR(tfm)) { ++ ddebug(2, "alg: akcipher: Failed to load tfm for %s: %ld\n", ++ driver, PTR_ERR(tfm)); ++ return PTR_ERR(tfm); + } + -+ memcpy(ctx->key, key, keylen); -+ ctx->keylen = keylen; -+ -+ return 0; ++ req = akcipher_request_alloc(tfm, GFP_KERNEL); ++ if (!req) { ++ ddebug(2, "akcipher_request_alloc failed\n"); ++ ret = -ENOMEM; ++ goto exit; ++ } + -+error: -+ return -EINVAL; -+} ++ key = kzalloc(rop->key_len, GFP_KERNEL); ++ if (!key) { ++ ret = -ENOMEM; ++ goto exit; ++ } + ++ if (unlikely(copy_from_user(key, u64_to_user_ptr(rop->key), rop->key_len))) { ++ ret = -EFAULT; ++ goto exit; ++ } + -+static int rk_cipher_encrypt(struct skcipher_request *req) -+{ -+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); -+ struct skcipher_alg *alg = crypto_skcipher_alg(tfm); -+ struct rk_crypto_dev *rk_dev = ctx->rk_dev; -+ struct rk_crypto_algt *algt; -+ int ret; ++ in = kzalloc(rop->in_len, GFP_KERNEL); ++ if (!in) { ++ ret = -ENOMEM; ++ goto exit; ++ } + -+ algt = container_of(alg, struct rk_crypto_algt, alg.crypto); ++ if (unlikely(copy_from_user(in, u64_to_user_ptr(rop->in), rop->in_len))) { ++ ret = -EFAULT; ++ goto exit; ++ } + -+ ret = rk_get_bc(algt->algo, algt->mode, &ctx->mode); -+ if (ret) -+ return ret; ++ if (is_priv_key) ++ ret = crypto_akcipher_set_priv_key(tfm, key, rop->key_len); ++ else ++ ret = crypto_akcipher_set_pub_key(tfm, key, rop->key_len); ++ if (ret) { ++ derr(1, "crypto_akcipher_set_%s_key error[%d]", ++ is_priv_key ? "priv" : "pub", ret); ++ ret = -ENOKEY; ++ goto exit; ++ } + -+ CRYPTO_MSG("ctx->mode = %x\n", ctx->mode); ++ out_len_max = crypto_akcipher_maxsize(tfm); ++ out = kzalloc(out_len_max, GFP_KERNEL); ++ if (!out) { ++ ret = -ENOMEM; ++ goto exit; ++ } + -+ return rk_handle_req(rk_dev, req); -+} ++ sg_init_one(&src, in, rop->in_len); ++ sg_init_one(&dst, out, out_len_max); + -+static int rk_cipher_decrypt(struct skcipher_request *req) -+{ -+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); -+ struct skcipher_alg *alg = crypto_skcipher_alg(tfm); -+ struct rk_crypto_dev *rk_dev = ctx->rk_dev; -+ struct rk_crypto_algt *algt; -+ int ret; ++ crypto_init_wait(&wait); ++ akcipher_request_set_crypt(req, &src, &dst, rop->in_len, out_len_max); + -+ algt = container_of(alg, struct rk_crypto_algt, alg.crypto); ++ switch (rop->op) { ++ case AOP_ENCRYPT: ++ ret = crypto_wait_req(crypto_akcipher_encrypt(req), &wait); ++ break; ++ case AOP_DECRYPT: ++ ret = crypto_wait_req(crypto_akcipher_decrypt(req), &wait); ++ break; ++ default: ++ derr(1, "unknown ops %x", rop->op); ++ ret = -EINVAL; ++ break; ++ } + -+ ret = rk_get_bc(algt->algo, algt->mode, &ctx->mode); -+ if (ret) -+ return ret; ++ if (ret) { ++ derr(1, "alg: akcipher: failed %d\n", ret); ++ goto exit; ++ } + -+ ctx->mode |= RK_CRYPTO_DEC; ++ if (unlikely(copy_to_user(u64_to_user_ptr(rop->out), out, req->dst_len))) { ++ derr(1, "Cannot copy to userspace"); ++ ret = -EFAULT; ++ goto exit; ++ } + -+ CRYPTO_MSG("ctx->mode = %x\n", ctx->mode); ++ rop->out_len = req->dst_len; ++exit: ++ kfree(out); ++ kfree(in); ++ kfree(key); ++ akcipher_request_free(req); ++ crypto_free_akcipher(tfm); + -+ return rk_handle_req(rk_dev, req); ++ return ret; +} + -+static void rk_ablk_hw_init(struct rk_crypto_dev *rk_dev) ++/* Typical AEAD (i.e. GCM) encryption/decryption. ++ * During decryption the tag is verified. ++ */ ++static int rk_auth_fd_n_crypt(struct csession *ses_ptr, struct kernel_crypt_auth_fd_op *kcaop, ++ struct scatterlist *auth_sg, uint32_t auth_len, ++ struct scatterlist *src_sg, ++ struct scatterlist *dst_sg, uint32_t len) +{ -+ struct skcipher_request *req = -+ skcipher_request_cast(rk_dev->async_req); -+ struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); -+ struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); -+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher); -+ u32 ivsize, block, conf_reg = 0; -+ -+ block = crypto_tfm_alg_blocksize(tfm); -+ ivsize = crypto_skcipher_ivsize(cipher); ++ int ret; ++ struct crypt_auth_fd_op *caop = &kcaop->caop; ++ int max_tag_len; + -+ if (block == DES_BLOCK_SIZE) { -+ memcpy_toio(ctx->rk_dev->reg + RK_CRYPTO_TDES_KEY1_0, -+ ctx->key, ctx->keylen); -+ ctx->mode |= RK_CRYPTO_TDES_FIFO_MODE | -+ RK_CRYPTO_TDES_BYTESWAP_KEY | -+ RK_CRYPTO_TDES_BYTESWAP_IV; -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_TDES_CTRL, ctx->mode); -+ memcpy_toio(rk_dev->reg + RK_CRYPTO_TDES_IV_0, -+ req->iv, ivsize); -+ conf_reg = RK_CRYPTO_DESSEL; -+ } else { -+ memcpy_toio(ctx->rk_dev->reg + RK_CRYPTO_AES_KEY_0, -+ ctx->key, ctx->keylen); -+ ctx->mode |= RK_CRYPTO_AES_FIFO_MODE | -+ RK_CRYPTO_AES_KEY_CHANGE | -+ RK_CRYPTO_AES_BYTESWAP_KEY | -+ RK_CRYPTO_AES_BYTESWAP_IV; -+ if (ctx->keylen == AES_KEYSIZE_192) -+ ctx->mode |= RK_CRYPTO_AES_192BIT_key; -+ else if (ctx->keylen == AES_KEYSIZE_256) -+ ctx->mode |= RK_CRYPTO_AES_256BIT_key; -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_AES_CTRL, ctx->mode); -+ memcpy_toio(rk_dev->reg + RK_CRYPTO_AES_IV_0, -+ req->iv, ivsize); ++ max_tag_len = cryptodev_cipher_get_tag_size(&ses_ptr->cdata); ++ if (unlikely(caop->tag_len > max_tag_len)) { ++ derr(0, "Illegal tag length: %d", caop->tag_len); ++ return -EINVAL; + } -+ conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO | -+ RK_CRYPTO_BYTESWAP_BRFIFO; -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_CONF, conf_reg); -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_INTENA, -+ RK_CRYPTO_BCDMA_ERR_ENA | RK_CRYPTO_BCDMA_DONE_ENA); -+} + -+static void crypto_dma_start(struct rk_crypto_dev *rk_dev) -+{ -+ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); ++ if (caop->tag_len) ++ cryptodev_cipher_set_tag_size(&ses_ptr->cdata, caop->tag_len); ++ else ++ caop->tag_len = max_tag_len; + -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_BRDMAS, alg_ctx->addr_in); -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_BRDMAL, alg_ctx->count / 4); -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_BTDMAS, alg_ctx->addr_out); -+ CRYPTO_WRITE(rk_dev, RK_CRYPTO_CTRL, RK_CRYPTO_BLOCK_START | -+ _SBF(RK_CRYPTO_BLOCK_START, 16)); -+} ++ cryptodev_cipher_auth(&ses_ptr->cdata, auth_sg, auth_len); + -+static int rk_set_data_start(struct rk_crypto_dev *rk_dev) -+{ -+ int err; -+ struct skcipher_request *req = -+ skcipher_request_cast(rk_dev->async_req); -+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); -+ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); -+ u32 ivsize = crypto_skcipher_ivsize(tfm); -+ u8 *src_last_blk = page_address(sg_page(alg_ctx->sg_src)) + -+ alg_ctx->sg_src->offset + alg_ctx->sg_src->length - ivsize; ++ if (caop->op == COP_ENCRYPT) { ++ ret = cryptodev_cipher_encrypt(&ses_ptr->cdata, ++ src_sg, dst_sg, len); ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_cipher_encrypt: %d", ret); ++ return ret; ++ } ++ } else { ++ ret = cryptodev_cipher_decrypt(&ses_ptr->cdata, ++ src_sg, dst_sg, len); + -+ /* Store the iv that need to be updated in chain mode. -+ * And update the IV buffer to contain the next IV for decryption mode. -+ */ -+ if (ctx->mode & RK_CRYPTO_DEC) { -+ memcpy(ctx->iv, src_last_blk, ivsize); -+ sg_pcopy_to_buffer(alg_ctx->req_src, alg_ctx->src_nents, -+ req->iv, ivsize, alg_ctx->total - ivsize); ++ if (unlikely(ret)) { ++ derr(0, "cryptodev_cipher_decrypt: %d", ret); ++ return ret; ++ } + } + -+ err = rk_dev->load_data(rk_dev, alg_ctx->sg_src, alg_ctx->sg_dst); -+ if (!err) -+ crypto_dma_start(rk_dev); -+ return err; ++ return 0; +} + -+static int rk_ablk_start(struct rk_crypto_dev *rk_dev) ++static void sg_init_table_set_page(struct scatterlist *sgl_dst, unsigned int nents_dst, ++ struct scatterlist *sgl_src, unsigned int len) +{ -+ struct skcipher_request *req = -+ skcipher_request_cast(rk_dev->async_req); -+ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); -+ int err = 0; -+ -+ alg_ctx->left_bytes = req->cryptlen; -+ alg_ctx->total = req->cryptlen; -+ alg_ctx->sg_src = req->src; -+ alg_ctx->req_src = req->src; -+ alg_ctx->src_nents = sg_nents_for_len(req->src, req->cryptlen); -+ alg_ctx->sg_dst = req->dst; -+ alg_ctx->req_dst = req->dst; -+ alg_ctx->dst_nents = sg_nents_for_len(req->dst, req->cryptlen); -+ -+ rk_ablk_hw_init(rk_dev); -+ err = rk_set_data_start(rk_dev); ++ sg_init_table(sgl_dst, nents_dst); ++ sg_set_page(sgl_dst, sg_page(sgl_src), len, sgl_src->offset); + -+ return err; ++ sg_dma_address(sgl_dst) = sg_dma_address(sgl_src); ++ sg_dma_len(sgl_dst) = len; +} + -+static void rk_iv_copyback(struct rk_crypto_dev *rk_dev) ++/* This is the main crypto function - zero-copy edition */ ++static int crypto_auth_fd_zc_rk(struct fcrypt *fcr, struct csession *ses_ptr, ++ struct kernel_crypt_auth_fd_op *kcaop) +{ -+ struct skcipher_request *req = -+ skcipher_request_cast(rk_dev->async_req); -+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); -+ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); -+ u32 ivsize = crypto_skcipher_ivsize(tfm); ++ struct crypt_auth_fd_op *caop = &kcaop->caop; ++ struct dma_buf *dma_buf_in = NULL, *dma_buf_out = NULL, *dma_buf_auth = NULL; ++ struct sg_table *sg_tbl_in = NULL, *sg_tbl_out = NULL, *sg_tbl_auth = NULL; ++ struct dma_buf_attachment *dma_attach_in = NULL, *dma_attach_out = NULL; ++ struct dma_buf_attachment *dma_attach_auth = NULL; ++ struct dma_fd_map_node *node_src = NULL, *node_dst = NULL, *node_auth = NULL; ++ struct scatterlist *dst_sg, *src_sg; ++ struct scatterlist auth_src[2], auth_dst[2], src[2], dst[2], tag[2]; ++ unsigned char *tag_buf = NULL; ++ int ret = 0; + -+ /* Update the IV buffer to contain the next IV for encryption mode. */ -+ if (!(ctx->mode & RK_CRYPTO_DEC) && req->iv) { -+ if (alg_ctx->aligned) { -+ memcpy(req->iv, sg_virt(alg_ctx->sg_dst) + -+ alg_ctx->sg_dst->length - ivsize, ivsize); -+ } else { -+ memcpy(req->iv, rk_dev->addr_vir + -+ alg_ctx->count - ivsize, ivsize); ++ node_src = dma_fd_find_node(fcr, caop->src_fd); ++ if (node_src) { ++ sg_tbl_in = node_src->sgtbl; ++ } else { ++ ret = get_dmafd_sgtbl(caop->src_fd, caop->len, DMA_TO_DEVICE, ++ &sg_tbl_in, &dma_attach_in, &dma_buf_in); ++ if (unlikely(ret)) { ++ derr(1, "Error get_dmafd_sgtbl src."); ++ goto exit; + } + } -+} -+ -+static void rk_update_iv(struct rk_crypto_dev *rk_dev) -+{ -+ struct skcipher_request *req = -+ skcipher_request_cast(rk_dev->async_req); -+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); -+ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); -+ u32 ivsize = crypto_skcipher_ivsize(tfm); -+ u8 *new_iv = NULL; + -+ if (ctx->mode & RK_CRYPTO_DEC) { -+ new_iv = ctx->iv; ++ node_dst = dma_fd_find_node(fcr, caop->dst_fd); ++ if (node_dst) { ++ sg_tbl_out = node_dst->sgtbl; + } else { -+ new_iv = page_address(sg_page(alg_ctx->sg_dst)) + -+ alg_ctx->sg_dst->offset + -+ alg_ctx->sg_dst->length - ivsize; ++ ret = get_dmafd_sgtbl(caop->dst_fd, caop->len, DMA_FROM_DEVICE, ++ &sg_tbl_out, &dma_attach_out, &dma_buf_out); ++ if (unlikely(ret)) { ++ derr(1, "Error get_dmafd_sgtbl dst."); ++ goto exit; ++ } + } + -+ if (ivsize == DES_BLOCK_SIZE) -+ memcpy_toio(rk_dev->reg + RK_CRYPTO_TDES_IV_0, new_iv, ivsize); -+ else if (ivsize == AES_BLOCK_SIZE) -+ memcpy_toio(rk_dev->reg + RK_CRYPTO_AES_IV_0, new_iv, ivsize); -+} -+ -+/* return: -+ * true some err was occurred -+ * fault no err, continue -+ */ -+static int rk_ablk_rx(struct rk_crypto_dev *rk_dev) -+{ -+ int err = 0; -+ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); -+ -+ CRYPTO_TRACE("left_bytes = %u\n", alg_ctx->left_bytes); -+ -+ err = rk_dev->unload_data(rk_dev); -+ if (err) -+ goto out_rx; ++ src_sg = sg_tbl_in->sgl; ++ dst_sg = sg_tbl_out->sgl; + -+ if (alg_ctx->left_bytes) { -+ rk_update_iv(rk_dev); -+ if (alg_ctx->aligned) { -+ if (sg_is_last(alg_ctx->sg_src)) { -+ dev_err(rk_dev->dev, "[%s:%d] Lack of data\n", -+ __func__, __LINE__); -+ err = -ENOMEM; -+ goto out_rx; ++ if (caop->auth_len > 0) { ++ node_auth = dma_fd_find_node(fcr, caop->auth_fd); ++ if (node_auth) { ++ sg_tbl_auth = node_auth->sgtbl; ++ } else { ++ ret = get_dmafd_sgtbl(caop->auth_fd, caop->auth_len, DMA_TO_DEVICE, ++ &sg_tbl_auth, &dma_attach_auth, &dma_buf_auth); ++ if (unlikely(ret)) { ++ derr(1, "Error get_dmafd_sgtbl auth."); ++ goto exit; + } -+ alg_ctx->sg_src = sg_next(alg_ctx->sg_src); -+ alg_ctx->sg_dst = sg_next(alg_ctx->sg_dst); + } -+ err = rk_set_data_start(rk_dev); -+ } else { -+ rk_iv_copyback(rk_dev); -+ } -+out_rx: -+ return err; -+} -+ -+static int rk_ablk_init_tfm(struct crypto_skcipher *tfm) -+{ -+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); -+ struct skcipher_alg *alg = crypto_skcipher_alg(tfm); -+ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; -+ struct rk_crypto_algt *algt; -+ struct rk_crypto_dev *rk_dev; -+ const char *alg_name = crypto_tfm_alg_name(crypto_skcipher_tfm(tfm)); + -+ algt = container_of(alg, struct rk_crypto_algt, alg.crypto); -+ rk_dev = algt->rk_dev; ++ sg_init_table_set_page(auth_src, ARRAY_SIZE(auth_src), ++ sg_tbl_auth->sgl, caop->auth_len); + -+ memset(ctx, 0x00, sizeof(*ctx)); ++ sg_init_table_set_page(auth_dst, ARRAY_SIZE(auth_dst), ++ sg_tbl_auth->sgl, caop->auth_len); + -+ if (!rk_dev->request_crypto) -+ return -EFAULT; ++ sg_init_table_set_page(src, ARRAY_SIZE(src), ++ sg_tbl_in->sgl, caop->len); + -+ rk_dev->request_crypto(rk_dev, alg_name); ++ sg_init_table_set_page(dst, ARRAY_SIZE(dst), ++ sg_tbl_out->sgl, caop->len); + -+ alg_ctx->align_size = crypto_skcipher_alignmask(tfm) + 1; ++ sg_chain(auth_src, 2, src); ++ sg_chain(auth_dst, 2, dst); ++ src_sg = auth_src; ++ dst_sg = auth_dst; ++ } + -+ alg_ctx->ops.start = rk_ablk_start; -+ alg_ctx->ops.update = rk_ablk_rx; -+ alg_ctx->ops.complete = rk_crypto_complete; -+ alg_ctx->ops.irq_handle = rk_crypto_irq_handle; ++ /* get tag */ ++ if (caop->tag && caop->tag_len > 0) { ++ tag_buf = kcalloc(caop->tag_len, sizeof(*tag_buf), GFP_KERNEL); ++ if (unlikely(!tag_buf)) { ++ derr(1, "unable to kcalloc %d.", caop->tag_len); ++ ret = -EFAULT; ++ goto exit; ++ } + -+ ctx->rk_dev = rk_dev; ++ ret = copy_from_user(tag_buf, u64_to_user_ptr((u64)caop->tag), caop->tag_len); ++ if (unlikely(ret)) { ++ derr(1, "unable to copy tag data from userspace."); ++ ret = -EFAULT; ++ goto exit; ++ } + -+ return 0; -+} ++ sg_init_table(tag, 2); ++ sg_set_buf(tag, tag_buf, caop->tag_len); + -+static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm) -+{ -+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); -+ const char *alg_name = crypto_tfm_alg_name(crypto_skcipher_tfm(tfm)); ++ if (caop->op == COP_ENCRYPT) ++ sg_chain(dst, 2, tag); ++ else ++ sg_chain(src, 2, tag); ++ } + -+ ctx->rk_dev->release_crypto(ctx->rk_dev, alg_name); -+} ++ if (caop->op == COP_ENCRYPT) ++ ret = rk_auth_fd_n_crypt(ses_ptr, kcaop, NULL, caop->auth_len, ++ src_sg, dst_sg, caop->len); ++ else ++ ret = rk_auth_fd_n_crypt(ses_ptr, kcaop, NULL, caop->auth_len, ++ src_sg, dst_sg, caop->len + caop->tag_len); + -+struct rk_crypto_algt rk_v1_ecb_aes_alg = -+ RK_CIPHER_ALGO_INIT(AES, ECB, ecb(aes), ecb-aes-rk); ++ if (!ret && caop->op == COP_ENCRYPT && tag_buf) { ++ ret = copy_to_user(u64_to_user_ptr((u64)kcaop->caop.tag), tag_buf, caop->tag_len); ++ if (unlikely(ret)) { ++ derr(1, "Error in copying to userspace"); ++ ret = -EFAULT; ++ goto exit; ++ } ++ } + -+struct rk_crypto_algt rk_v1_cbc_aes_alg = -+ RK_CIPHER_ALGO_INIT(AES, CBC, cbc(aes), cbc-aes-rk); ++exit: ++ kfree(tag_buf); + -+struct rk_crypto_algt rk_v1_ecb_des_alg = -+ RK_CIPHER_ALGO_INIT(DES, ECB, ecb(des), ecb-des-rk); ++ if (dma_buf_in) ++ put_dmafd_sgtbl(caop->src_fd, DMA_TO_DEVICE, ++ sg_tbl_in, dma_attach_in, dma_buf_in); + -+struct rk_crypto_algt rk_v1_cbc_des_alg = -+ RK_CIPHER_ALGO_INIT(DES, CBC, cbc(des), cbc-des-rk); ++ if (dma_buf_out) ++ put_dmafd_sgtbl(caop->dst_fd, DMA_FROM_DEVICE, ++ sg_tbl_out, dma_attach_out, dma_buf_out); + -+struct rk_crypto_algt rk_v1_ecb_des3_ede_alg = -+ RK_CIPHER_ALGO_INIT(DES3_EDE, ECB, ecb(des3_ede), ecb-des3_ede-rk); ++ if (dma_buf_auth) ++ put_dmafd_sgtbl(caop->auth_fd, DMA_TO_DEVICE, ++ sg_tbl_auth, dma_attach_auth, dma_buf_auth); + -+struct rk_crypto_algt rk_v1_cbc_des3_ede_alg = -+ RK_CIPHER_ALGO_INIT(DES3_EDE, CBC, cbc(des3_ede), cbc-des3_ede-rk); -diff --git a/drivers/crypto/rockchip/rk_crypto_v2.c b/drivers/crypto/rockchip/rk_crypto_v2.c -new file mode 100644 -index 000000000..e8f2a0a24 ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_v2.c -@@ -0,0 +1,104 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Crypto acceleration support for Rockchip Crypto V2 -+ * -+ * Copyright (c) 2022, Rockchip Electronics Co., Ltd -+ * -+ * Author: Lin Jinhan -+ * -+ */ ++ return ret; ++} + -+#include "rk_crypto_core.h" -+#include "rk_crypto_v2.h" ++static int __crypto_auth_fd_run_zc(struct fcrypt *fcr, struct csession *ses_ptr, ++ struct kernel_crypt_auth_fd_op *kcaop) ++{ ++ struct crypt_auth_fd_op *caop = &kcaop->caop; ++ int ret; + -+static const char * const crypto_v2_rsts[] = { -+ "crypto-rst", -+}; ++ if (caop->flags & COP_FLAG_AEAD_RK_TYPE) ++ ret = crypto_auth_fd_zc_rk(fcr, ses_ptr, kcaop); ++ else ++ ret = -EINVAL; /* other types, not implemented */ + -+static struct rk_crypto_algt *crypto_v2_algs[] = { -+ &rk_v2_ecb_sm4_alg, /* ecb(sm4) */ -+ &rk_v2_cbc_sm4_alg, /* cbc(sm4) */ -+ &rk_v2_xts_sm4_alg, /* xts(sm4) */ -+ &rk_v2_cfb_sm4_alg, /* cfb(sm4) */ -+ &rk_v2_ofb_sm4_alg, /* ofb(sm4) */ -+ &rk_v2_ctr_sm4_alg, /* ctr(sm4) */ -+ &rk_v2_gcm_sm4_alg, /* gcm(sm4) */ ++ return ret; ++} + -+ &rk_v2_ecb_aes_alg, /* ecb(aes) */ -+ &rk_v2_cbc_aes_alg, /* cbc(aes) */ -+ &rk_v2_xts_aes_alg, /* xts(aes) */ -+ &rk_v2_cfb_aes_alg, /* cfb(aes) */ -+ &rk_v2_ofb_aes_alg, /* ofb(aes) */ -+ &rk_v2_ctr_aes_alg, /* ctr(aes) */ -+ &rk_v2_gcm_aes_alg, /* gcm(aes) */ ++static int crypto_auth_fd_run(struct fcrypt *fcr, struct kernel_crypt_auth_fd_op *kcaop) ++{ ++ struct csession *ses_ptr; ++ struct crypt_auth_fd_op *caop = &kcaop->caop; ++ int ret = -EINVAL; + -+ &rk_v2_ecb_des_alg, /* ecb(des) */ -+ &rk_v2_cbc_des_alg, /* cbc(des) */ -+ &rk_v2_cfb_des_alg, /* cfb(des) */ -+ &rk_v2_ofb_des_alg, /* ofb(des) */ ++ if (unlikely(caop->op != COP_ENCRYPT && caop->op != COP_DECRYPT)) { ++ ddebug(1, "invalid operation op=%u", caop->op); ++ return -EINVAL; ++ } + -+ &rk_v2_ecb_des3_ede_alg, /* ecb(des3_ede) */ -+ &rk_v2_cbc_des3_ede_alg, /* cbc(des3_ede) */ -+ &rk_v2_cfb_des3_ede_alg, /* cfb(des3_ede) */ -+ &rk_v2_ofb_des3_ede_alg, /* ofb(des3_ede) */ ++ /* this also enters ses_ptr->sem */ ++ ses_ptr = crypto_get_session_by_sid(fcr, caop->ses); ++ if (unlikely(!ses_ptr)) { ++ derr(1, "invalid session ID=0x%08X", caop->ses); ++ return -EINVAL; ++ } + -+ &rk_v2_ahash_sha1, /* sha1 */ -+ &rk_v2_ahash_sha224, /* sha224 */ -+ &rk_v2_ahash_sha256, /* sha256 */ -+ &rk_v2_ahash_sha384, /* sha384 */ -+ &rk_v2_ahash_sha512, /* sha512 */ -+ &rk_v2_ahash_md5, /* md5 */ -+ &rk_v2_ahash_sm3, /* sm3 */ ++ if (unlikely(ses_ptr->cdata.init == 0)) { ++ derr(1, "cipher context not initialized"); ++ ret = -EINVAL; ++ goto out_unlock; ++ } + -+ &rk_v2_hmac_sha1, /* hmac(sha1) */ -+ &rk_v2_hmac_sha256, /* hmac(sha256) */ -+ &rk_v2_hmac_sha512, /* hmac(sha512) */ -+ &rk_v2_hmac_md5, /* hmac(md5) */ -+ &rk_v2_hmac_sm3, /* hmac(sm3) */ ++ /* If we have a hash/mac handle reset its state */ ++ if (ses_ptr->hdata.init != 0) { ++ ret = cryptodev_hash_reset(&ses_ptr->hdata); ++ if (unlikely(ret)) { ++ derr(1, "error in cryptodev_hash_reset()"); ++ goto out_unlock; ++ } ++ } + -+ &rk_v2_asym_rsa, /* rsa */ -+}; ++ cryptodev_cipher_set_iv(&ses_ptr->cdata, kcaop->iv, ++ min(ses_ptr->cdata.ivsize, kcaop->ivlen)); + -+int rk_hw_crypto_v2_init(struct device *dev, void *hw_info) -+{ -+ struct rk_hw_crypto_v2_info *info = -+ (struct rk_hw_crypto_v2_info *)hw_info; ++ ret = __crypto_auth_fd_run_zc(fcr, ses_ptr, kcaop); ++ if (unlikely(ret)) { ++ derr(1, "error in __crypto_auth_fd_run_zc()"); ++ goto out_unlock; ++ } + -+ if (!dev || !hw_info) -+ return -EINVAL; ++ ret = 0; + -+ memset(info, 0x00, sizeof(*info)); ++ cryptodev_cipher_get_iv(&ses_ptr->cdata, kcaop->iv, ++ min(ses_ptr->cdata.ivsize, kcaop->ivlen)); + -+ return rk_crypto_hw_desc_alloc(dev, &info->hw_desc); ++out_unlock: ++ crypto_put_session(ses_ptr); ++ return ret; +} + -+void rk_hw_crypto_v2_deinit(struct device *dev, void *hw_info) ++/* ++ * Return tag (digest) length for authenticated encryption ++ * If the cipher and digest are separate, hdata.init is set - just return ++ * digest length. Otherwise return digest length for aead ciphers ++ */ ++static int rk_cryptodev_get_tag_len(struct csession *ses_ptr) +{ -+ struct rk_hw_crypto_v2_info *info = -+ (struct rk_hw_crypto_v2_info *)hw_info; -+ -+ if (!dev || !hw_info) -+ return; -+ -+ rk_crypto_hw_desc_free(&info->hw_desc); ++ if (ses_ptr->hdata.init) ++ return ses_ptr->hdata.digestsize; ++ else ++ return cryptodev_cipher_get_tag_size(&ses_ptr->cdata); +} + -+const char * const *rk_hw_crypto_v2_get_rsts(uint32_t *num) ++/* ++ * Calculate destination buffer length for authenticated encryption. The ++ * expectation is that user-space code allocates exactly the same space for ++ * destination buffer before calling cryptodev. The result is cipher-dependent. ++ */ ++static int rk_cryptodev_fd_get_dst_len(struct crypt_auth_fd_op *caop, struct csession *ses_ptr) +{ -+ *num = ARRAY_SIZE(crypto_v2_rsts); ++ int dst_len = caop->len; + -+ return crypto_v2_rsts; -+} ++ if (caop->op == COP_DECRYPT) ++ return dst_len; + -+struct rk_crypto_algt **rk_hw_crypto_v2_get_algts(uint32_t *num) -+{ -+ *num = ARRAY_SIZE(crypto_v2_algs); ++ dst_len += caop->tag_len; + -+ return crypto_v2_algs; ++ /* for TLS always add some padding so the total length is rounded to ++ * cipher block size ++ */ ++ if (caop->flags & COP_FLAG_AEAD_TLS_TYPE) { ++ int bs = ses_ptr->cdata.blocksize; ++ ++ dst_len += bs - (dst_len % bs); ++ } ++ ++ return dst_len; +} + -+bool rk_hw_crypto_v2_algo_valid(struct rk_crypto_dev *rk_dev, struct rk_crypto_algt *aglt) ++static int fill_kcaop_fd_from_caop(struct kernel_crypt_auth_fd_op *kcaop, struct fcrypt *fcr) +{ -+ return true; -+} ++ struct crypt_auth_fd_op *caop = &kcaop->caop; ++ struct csession *ses_ptr; ++ int ret; + -diff --git a/drivers/crypto/rockchip/rk_crypto_v2.h b/drivers/crypto/rockchip/rk_crypto_v2.h -new file mode 100644 -index 000000000..9e82346d6 ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_v2.h -@@ -0,0 +1,95 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+ -+/* Copyright (c) 2018 Rockchip Electronics Co. Ltd. */ -+ -+#ifndef __RK_CRYPTO_V2_H__ -+#define __RK_CRYPTO_V2_H__ -+ -+#include -+ -+#include "rk_crypto_utils.h" -+ -+struct rk_hw_crypto_v2_info { -+ struct rk_hw_desc hw_desc; -+}; -+ -+#define RK_CRYPTO_V2_SOC_DATA_INIT(names, soft_aes_192) {\ -+ .crypto_ver = "CRYPTO V2.0.0.0",\ -+ .use_soft_aes192 = soft_aes_192,\ -+ .valid_algs_name = (names),\ -+ .valid_algs_num = ARRAY_SIZE(names),\ -+ .hw_init = rk_hw_crypto_v2_init,\ -+ .hw_deinit = rk_hw_crypto_v2_deinit,\ -+ .hw_get_rsts = rk_hw_crypto_v2_get_rsts,\ -+ .hw_get_algts = rk_hw_crypto_v2_get_algts,\ -+ .hw_is_algo_valid = rk_hw_crypto_v2_algo_valid,\ -+ .hw_info_size = sizeof(struct rk_hw_crypto_v2_info),\ -+ .default_pka_offset = 0x0480,\ -+ .use_lli_chain = true,\ -+} -+ -+#if IS_ENABLED(CONFIG_CRYPTO_DEV_ROCKCHIP_V2) -+ -+extern struct rk_crypto_algt rk_v2_ecb_sm4_alg; -+extern struct rk_crypto_algt rk_v2_cbc_sm4_alg; -+extern struct rk_crypto_algt rk_v2_xts_sm4_alg; -+extern struct rk_crypto_algt rk_v2_cfb_sm4_alg; -+extern struct rk_crypto_algt rk_v2_ofb_sm4_alg; -+extern struct rk_crypto_algt rk_v2_ctr_sm4_alg; -+extern struct rk_crypto_algt rk_v2_gcm_sm4_alg; -+ -+extern struct rk_crypto_algt rk_v2_ecb_aes_alg; -+extern struct rk_crypto_algt rk_v2_cbc_aes_alg; -+extern struct rk_crypto_algt rk_v2_xts_aes_alg; -+extern struct rk_crypto_algt rk_v2_cfb_aes_alg; -+extern struct rk_crypto_algt rk_v2_ofb_aes_alg; -+extern struct rk_crypto_algt rk_v2_ctr_aes_alg; -+extern struct rk_crypto_algt rk_v2_gcm_aes_alg; -+ -+extern struct rk_crypto_algt rk_v2_ecb_des_alg; -+extern struct rk_crypto_algt rk_v2_cbc_des_alg; -+extern struct rk_crypto_algt rk_v2_cfb_des_alg; -+extern struct rk_crypto_algt rk_v2_ofb_des_alg; -+ -+extern struct rk_crypto_algt rk_v2_ecb_des3_ede_alg; -+extern struct rk_crypto_algt rk_v2_cbc_des3_ede_alg; -+extern struct rk_crypto_algt rk_v2_cfb_des3_ede_alg; -+extern struct rk_crypto_algt rk_v2_ofb_des3_ede_alg; -+ -+extern struct rk_crypto_algt rk_v2_ahash_sha1; -+extern struct rk_crypto_algt rk_v2_ahash_sha224; -+extern struct rk_crypto_algt rk_v2_ahash_sha256; -+extern struct rk_crypto_algt rk_v2_ahash_sha384; -+extern struct rk_crypto_algt rk_v2_ahash_sha512; -+extern struct rk_crypto_algt rk_v2_ahash_md5; -+extern struct rk_crypto_algt rk_v2_ahash_sm3; ++ /* this also enters ses_ptr->sem */ ++ ses_ptr = crypto_get_session_by_sid(fcr, caop->ses); ++ if (unlikely(!ses_ptr)) { ++ derr(1, "invalid session ID=0x%08X", caop->ses); ++ return -EINVAL; ++ } + -+extern struct rk_crypto_algt rk_v2_hmac_md5; -+extern struct rk_crypto_algt rk_v2_hmac_sha1; -+extern struct rk_crypto_algt rk_v2_hmac_sha256; -+extern struct rk_crypto_algt rk_v2_hmac_sha512; -+extern struct rk_crypto_algt rk_v2_hmac_sm3; ++ if (caop->tag_len == 0) ++ caop->tag_len = rk_cryptodev_get_tag_len(ses_ptr); + -+extern struct rk_crypto_algt rk_v2_asym_rsa; ++ kcaop->ivlen = caop->iv ? ses_ptr->cdata.ivsize : 0; ++ kcaop->dst_len = rk_cryptodev_fd_get_dst_len(caop, ses_ptr); ++ kcaop->task = current; ++ kcaop->mm = current->mm; + -+int rk_hw_crypto_v2_init(struct device *dev, void *hw_info); -+void rk_hw_crypto_v2_deinit(struct device *dev, void *hw_info); -+const char * const *rk_hw_crypto_v2_get_rsts(uint32_t *num); -+struct rk_crypto_algt **rk_hw_crypto_v2_get_algts(uint32_t *num); -+bool rk_hw_crypto_v2_algo_valid(struct rk_crypto_dev *rk_dev, struct rk_crypto_algt *aglt); ++ if (caop->iv) { ++ ret = copy_from_user(kcaop->iv, u64_to_user_ptr((u64)caop->iv), kcaop->ivlen); ++ if (unlikely(ret)) { ++ derr(1, "error copy_from_user IV (%d bytes) returned %d for address %llu", ++ kcaop->ivlen, ret, caop->iv); ++ ret = -EFAULT; ++ goto out_unlock; ++ } ++ } + -+#else ++ ret = 0; + -+static inline int rk_hw_crypto_v2_init(struct device *dev, void *hw_info) { return -EINVAL; } -+static inline void rk_hw_crypto_v2_deinit(struct device *dev, void *hw_info) {} -+static inline const char * const *rk_hw_crypto_v2_get_rsts(uint32_t *num) { return NULL; } -+static inline struct rk_crypto_algt **rk_hw_crypto_v2_get_algts(uint32_t *num) { return NULL; } -+static inline bool rk_hw_crypto_v2_algo_valid(struct rk_crypto_dev *rk_dev, -+ struct rk_crypto_algt *aglt) -+{ -+ return false; ++out_unlock: ++ crypto_put_session(ses_ptr); ++ return ret; +} + -+#endif /* end of IS_ENABLED(CONFIG_CRYPTO_DEV_ROCKCHIP_V2) */ -+ -+#endif /* end of __RK_CRYPTO_V2_H__ */ -diff --git a/drivers/crypto/rockchip/rk_crypto_v2_ahash.c b/drivers/crypto/rockchip/rk_crypto_v2_ahash.c -new file mode 100644 -index 000000000..919603ff4 ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_v2_ahash.c -@@ -0,0 +1,379 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Hash acceleration support for Rockchip Crypto v2 -+ * -+ * Copyright (c) 2020, Rockchip Electronics Co., Ltd -+ * -+ * Author: Lin Jinhan -+ * -+ * Some ideas are from marvell/cesa.c and s5p-sss.c driver. -+ */ -+ -+#include -+#include -+ -+#include "rk_crypto_core.h" -+#include "rk_crypto_v2.h" -+#include "rk_crypto_v2_reg.h" -+#include "rk_crypto_ahash_utils.h" -+#include "rk_crypto_utils.h" -+ -+#define RK_HASH_CTX_MAGIC 0x1A1A1A1A -+#define RK_POLL_PERIOD_US 100 -+#define RK_POLL_TIMEOUT_US 50000 -+ -+struct rk_ahash_expt_ctx { -+ struct rk_ahash_ctx ctx; -+ u8 lastc[RK_DMA_ALIGNMENT]; -+}; -+ -+static const u32 hash_algo2bc[] = { -+ [HASH_ALGO_MD5] = CRYPTO_MD5, -+ [HASH_ALGO_SHA1] = CRYPTO_SHA1, -+ [HASH_ALGO_SHA224] = CRYPTO_SHA224, -+ [HASH_ALGO_SHA256] = CRYPTO_SHA256, -+ [HASH_ALGO_SHA384] = CRYPTO_SHA384, -+ [HASH_ALGO_SHA512] = CRYPTO_SHA512, -+ [HASH_ALGO_SM3] = CRYPTO_SM3, -+}; -+ -+static void rk_hash_reset(struct rk_crypto_dev *rk_dev) ++static int fill_caop_fd_from_kcaop(struct kernel_crypt_auth_fd_op *kcaop, struct fcrypt *fcr) +{ + int ret; -+ u32 tmp = 0, tmp_mask = 0; -+ unsigned int pool_timeout_us = 1000; -+ -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0x00); + -+ tmp = CRYPTO_SW_CC_RESET; -+ tmp_mask = tmp << CRYPTO_WRITE_MASK_SHIFT; -+ -+ CRYPTO_WRITE(rk_dev, CRYPTO_RST_CTL, tmp | tmp_mask); -+ -+ /* This is usually done in 20 clock cycles */ -+ ret = read_poll_timeout_atomic(CRYPTO_READ, tmp, !tmp, 0, pool_timeout_us, -+ false, rk_dev, CRYPTO_RST_CTL); -+ if (ret) -+ dev_err(rk_dev->dev, "cipher reset pool timeout %ums.", -+ pool_timeout_us); ++ kcaop->caop.len = kcaop->dst_len; + -+ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, 0xffff0000); ++ if (kcaop->ivlen && kcaop->caop.flags & COP_FLAG_WRITE_IV) { ++ ret = copy_to_user(u64_to_user_ptr((u64)kcaop->caop.iv), kcaop->iv, kcaop->ivlen); ++ if (unlikely(ret)) { ++ derr(1, "Error in copying iv to userspace"); ++ return -EFAULT; ++ } ++ } + -+ /* clear dma int status */ -+ tmp = CRYPTO_READ(rk_dev, CRYPTO_DMA_INT_ST); -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_ST, tmp); ++ return 0; +} + -+static int rk_crypto_irq_handle(int irq, void *dev_id) ++static int kcaop_fd_from_user(struct kernel_crypt_auth_fd_op *kcaop, ++ struct fcrypt *fcr, void __user *arg) +{ -+ struct rk_crypto_dev *rk_dev = platform_get_drvdata(dev_id); -+ u32 interrupt_status; -+ struct rk_hw_crypto_v2_info *hw_info = -+ (struct rk_hw_crypto_v2_info *)rk_dev->hw_info; -+ struct rk_alg_ctx *alg_ctx = rk_ahash_alg_ctx(rk_dev); ++ if (unlikely(copy_from_user(&kcaop->caop, arg, sizeof(kcaop->caop)))) { ++ derr(1, "Error in copying from userspace"); ++ return -EFAULT; ++ } + -+ /* disable crypto irq */ -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0); ++ return fill_kcaop_fd_from_caop(kcaop, fcr); ++} + -+ interrupt_status = CRYPTO_READ(rk_dev, CRYPTO_DMA_INT_ST); -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_ST, interrupt_status); ++static int kcaop_fd_to_user(struct kernel_crypt_auth_fd_op *kcaop, ++ struct fcrypt *fcr, void __user *arg) ++{ ++ int ret; + -+ interrupt_status &= CRYPTO_LOCKSTEP_MASK; ++ ret = fill_caop_fd_from_kcaop(kcaop, fcr); ++ if (unlikely(ret)) { ++ derr(1, "Error in fill_caop_from_kcaop"); ++ return ret; ++ } + -+ if (interrupt_status != CRYPTO_SRC_ITEM_DONE_INT_ST) { -+ dev_err(rk_dev->dev, "DMA desc = %p\n", hw_info->hw_desc.lli_head); -+ dev_err(rk_dev->dev, "DMA addr_in = %08x\n", -+ (u32)alg_ctx->addr_in); -+ dev_err(rk_dev->dev, "DMA addr_out = %08x\n", -+ (u32)alg_ctx->addr_out); -+ dev_err(rk_dev->dev, "DMA count = %08x\n", alg_ctx->count); -+ dev_err(rk_dev->dev, "DMA desc_dma = %08x\n", -+ (u32)hw_info->hw_desc.lli_head_dma); -+ dev_err(rk_dev->dev, "DMA Error status = %08x\n", -+ interrupt_status); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_LLI_ADDR status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_LLI_ADDR)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_ST status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_ST)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_STATE status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_STATE)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_LLI_RADDR status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_LLI_RADDR)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_SRC_RADDR status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_SRC_RADDR)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_DST_RADDR status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_DST_RADDR)); -+ rk_dev->err = -EFAULT; ++ if (unlikely(copy_to_user(arg, &kcaop->caop, sizeof(kcaop->caop)))) { ++ derr(1, "Cannot copy to userspace"); ++ return -EFAULT; + } + + return 0; +} + -+static void rk_ahash_crypto_complete(struct crypto_async_request *base, int err) ++long ++rk_cryptodev_ioctl(struct fcrypt *fcr, unsigned int cmd, unsigned long arg_) +{ -+ struct ahash_request *req = ahash_request_cast(base); -+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -+ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); -+ struct rk_alg_ctx *alg_ctx = rk_ahash_alg_ctx(ctx->rk_dev); ++ struct kernel_crypt_fd_op kcop; ++ struct kernel_crypt_fd_map_op kmop; ++ struct kernel_crypt_rsa_op krop; ++ struct kernel_crypt_auth_fd_op kcaop; ++ void __user *arg = (void __user *)arg_; ++ int ret; + -+ struct rk_hw_crypto_v2_info *hw_info = ctx->rk_dev->hw_info; -+ struct crypto_lli_desc *lli_desc = hw_info->hw_desc.lli_head; ++ switch (cmd) { ++ case RIOCCRYPT_FD: ++ ret = kcop_fd_from_user(&kcop, fcr, arg); ++ if (unlikely(ret)) { ++ dwarning(1, "Error copying from user"); ++ return ret; ++ } + -+ if (err) { -+ rk_hash_reset(ctx->rk_dev); -+ pr_err("aligned = %u, align_size = %u\n", -+ alg_ctx->aligned, alg_ctx->align_size); -+ pr_err("total = %u, left = %u, count = %u\n", -+ alg_ctx->total, alg_ctx->left_bytes, alg_ctx->count); -+ pr_err("lli->src = %08x\n", lli_desc->src_addr); -+ pr_err("lli->src_len = %08x\n", lli_desc->src_len); -+ pr_err("lli->dst = %08x\n", lli_desc->dst_addr); -+ pr_err("lli->dst_len = %08x\n", lli_desc->dst_len); -+ pr_err("lli->dma_ctl = %08x\n", lli_desc->dma_ctrl); -+ pr_err("lli->usr_def = %08x\n", lli_desc->user_define); -+ pr_err("lli->next = %08x\n\n\n", lli_desc->next_addr); -+ } ++ ret = crypto_fd_run(fcr, &kcop); ++ if (unlikely(ret)) { ++ dwarning(1, "Error in crypto_run"); ++ return ret; ++ } + -+ if (base->complete) -+ base->complete(base, err); -+} ++ return kcop_fd_to_user(&kcop, fcr, arg); ++ case RIOCAUTHCRYPT_FD: ++ ret = kcaop_fd_from_user(&kcaop, fcr, arg); ++ if (unlikely(ret)) { ++ dwarning(1, "Error copying from user"); ++ return ret; ++ } + -+static inline void clear_hash_out_reg(struct rk_crypto_dev *rk_dev) -+{ -+ rk_crypto_clear_regs(rk_dev, CRYPTO_HASH_DOUT_0, 16); -+} ++ ret = crypto_auth_fd_run(fcr, &kcaop); ++ if (unlikely(ret)) { ++ dwarning(1, "Error in crypto_run"); ++ return ret; ++ } + -+static int write_key_reg(struct rk_crypto_dev *rk_dev, const u8 *key, -+ u32 key_len) -+{ -+ rk_crypto_write_regs(rk_dev, CRYPTO_CH0_KEY_0, key, key_len); ++ return kcaop_fd_to_user(&kcaop, fcr, arg); ++ case RIOCCRYPT_FD_MAP: ++ ret = kcop_map_fd_from_user(&kmop, fcr, arg); ++ if (unlikely(ret)) { ++ dwarning(1, "Error copying from user"); ++ return ret; ++ } + -+ return 0; -+} ++ ret = dma_fd_map_for_user(fcr, &kmop); ++ if (unlikely(ret)) { ++ dwarning(1, "Error in dma_fd_map_for_user"); ++ return ret; ++ } + -+static int rk_hw_hash_init(struct rk_crypto_dev *rk_dev, u32 algo, u32 type) -+{ -+ u32 reg_ctrl = 0; ++ return kcop_map_fd_to_user(&kmop, fcr, arg); ++ case RIOCCRYPT_FD_UNMAP: ++ ret = kcop_map_fd_from_user(&kmop, fcr, arg); ++ if (unlikely(ret)) { ++ dwarning(1, "Error copying from user"); ++ return ret; ++ } + -+ if (algo >= ARRAY_SIZE(hash_algo2bc)) -+ goto exit; ++ ret = dma_fd_unmap_for_user(fcr, &kmop); ++ if (unlikely(ret)) ++ dwarning(1, "Error in dma_fd_unmap_for_user"); + -+ rk_hash_reset(rk_dev); ++ return ret; ++ case RIOCCRYPT_CPU_ACCESS: ++ ret = kcop_map_fd_from_user(&kmop, fcr, arg); ++ if (unlikely(ret)) { ++ dwarning(1, "Error copying from user"); ++ return ret; ++ } + -+ clear_hash_out_reg(rk_dev); ++ ret = dma_fd_begin_cpu_access(fcr, &kmop); ++ if (unlikely(ret)) ++ dwarning(1, "Error in dma_fd_begin_cpu_access"); + -+ reg_ctrl = hash_algo2bc[algo] | CRYPTO_HW_PAD_ENABLE; ++ return ret; ++ case RIOCCRYPT_DEV_ACCESS: ++ ret = kcop_map_fd_from_user(&kmop, fcr, arg); ++ if (unlikely(ret)) { ++ dwarning(1, "Error copying from user"); ++ return ret; ++ } + -+ if (IS_TYPE_HMAC(type)) { -+ CRYPTO_TRACE("this is hmac"); -+ reg_ctrl |= CRYPTO_HMAC_ENABLE; -+ } ++ ret = dma_fd_end_cpu_access(fcr, &kmop); ++ if (unlikely(ret)) ++ dwarning(1, "Error in dma_fd_end_cpu_access"); + -+ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, reg_ctrl | CRYPTO_WRITE_MASK_ALL); -+ CRYPTO_WRITE(rk_dev, CRYPTO_FIFO_CTL, 0x00030003); ++ return ret; ++ case RIOCCRYPT_RSA_CRYPT: ++ ret = kcop_rsa_from_user(&krop, fcr, arg); ++ if (unlikely(ret)) { ++ dwarning(1, "Error copying from user"); ++ return ret; ++ } + -+ return 0; -+exit: -+ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, 0 | CRYPTO_WRITE_MASK_ALL); ++ ret = crypto_rsa_run(fcr, &krop); ++ if (unlikely(ret)) { ++ dwarning(1, "Error in rsa_run"); ++ return ret; ++ } + -+ return -EINVAL; ++ return kcop_rsa_to_user(&krop, fcr, arg); ++ default: ++ return -EINVAL; ++ } +} + -+static void clean_hash_setting(struct rk_crypto_dev *rk_dev) -+{ -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0); -+ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, 0 | CRYPTO_WRITE_MASK_ALL); -+} ++/* compatibility code for 32bit userlands */ ++#ifdef CONFIG_COMPAT + -+static int rk_ahash_import(struct ahash_request *req, const void *in) ++static inline void ++compat_to_crypt_fd_op(struct compat_crypt_fd_op *compat, struct crypt_fd_op *cop) +{ -+ struct rk_ahash_expt_ctx state; -+ -+ /* 'in' may not be aligned so memcpy to local variable */ -+ memcpy(&state, in, sizeof(state)); -+ -+ ///TODO: deal with import ++ cop->ses = compat->ses; ++ cop->op = compat->op; ++ cop->flags = compat->flags; ++ cop->len = compat->len; + -+ return 0; ++ cop->src_fd = compat->src_fd; ++ cop->dst_fd = compat->dst_fd; ++ cop->mac = compat_ptr(compat->mac); ++ cop->iv = compat_ptr(compat->iv); +} + -+static int rk_ahash_export(struct ahash_request *req, void *out) ++static inline void ++crypt_fd_op_to_compat(struct crypt_fd_op *cop, struct compat_crypt_fd_op *compat) +{ -+ struct rk_ahash_expt_ctx state; ++ compat->ses = cop->ses; ++ compat->op = cop->op; ++ compat->flags = cop->flags; ++ compat->len = cop->len; + -+ /* Don't let anything leak to 'out' */ -+ memset(&state, 0, sizeof(state)); ++ compat->src_fd = cop->src_fd; ++ compat->dst_fd = cop->dst_fd; ++ compat->mac = ptr_to_compat(cop->mac); ++ compat->iv = ptr_to_compat(cop->iv); ++} + -+ ///TODO: deal with import ++static int compat_kcop_fd_from_user(struct kernel_crypt_fd_op *kcop, ++ struct fcrypt *fcr, void __user *arg) ++{ ++ struct compat_crypt_fd_op compat_cop; + -+ memcpy(out, &state, sizeof(state)); ++ if (unlikely(copy_from_user(&compat_cop, arg, sizeof(compat_cop)))) ++ return -EFAULT; ++ compat_to_crypt_fd_op(&compat_cop, &kcop->cop); + -+ return 0; ++ return fill_kcop_fd_from_cop(kcop, fcr); +} + -+static int rk_ahash_dma_start(struct rk_crypto_dev *rk_dev, uint32_t flag) ++static int compat_kcop_fd_to_user(struct kernel_crypt_fd_op *kcop, ++ struct fcrypt *fcr, void __user *arg) +{ -+ struct rk_hw_crypto_v2_info *hw_info = -+ (struct rk_hw_crypto_v2_info *)rk_dev->hw_info; -+ struct rk_alg_ctx *alg_ctx = rk_ahash_alg_ctx(rk_dev); -+ struct rk_ahash_ctx *ctx = rk_ahash_ctx_cast(rk_dev); -+ struct crypto_lli_desc *lli_head, *lli_tail; -+ u32 dma_ctl = CRYPTO_DMA_RESTART; -+ bool is_final = flag & RK_FLAG_FINAL; + int ret; ++ struct compat_crypt_fd_op compat_cop; + -+ CRYPTO_TRACE("ctx->calc_cnt = %u, count %u Byte, is_final = %d", -+ ctx->calc_cnt, alg_ctx->count, is_final); -+ -+ if (alg_ctx->count % RK_DMA_ALIGNMENT && !is_final) { -+ dev_err(rk_dev->dev, "count = %u is not aligned with [%u]\n", -+ alg_ctx->count, RK_DMA_ALIGNMENT); -+ return -EINVAL; -+ } -+ -+ if (alg_ctx->count == 0) { -+ /* do nothing */ -+ CRYPTO_TRACE("empty calc"); -+ return 0; -+ } -+ -+ if (alg_ctx->aligned) -+ ret = rk_crypto_hw_desc_init(&hw_info->hw_desc, -+ alg_ctx->sg_src, NULL, alg_ctx->count); -+ else -+ ret = rk_crypto_hw_desc_init(&hw_info->hw_desc, -+ &alg_ctx->sg_tmp, NULL, alg_ctx->count); -+ if (ret) ++ ret = fill_cop_fd_from_kcop(kcop, fcr); ++ if (unlikely(ret)) { ++ dwarning(1, "Error in fill_cop_from_kcop"); + return ret; -+ -+ lli_head = hw_info->hw_desc.lli_head; -+ lli_tail = hw_info->hw_desc.lli_tail; -+ -+ lli_tail->dma_ctrl = is_final ? LLI_DMA_CTRL_LAST : LLI_DMA_CTRL_PAUSE; -+ lli_tail->dma_ctrl |= LLI_DMA_CTRL_SRC_DONE; -+ lli_tail->next_addr = hw_info->hw_desc.lli_head_dma; -+ -+ if (ctx->calc_cnt == 0) { -+ dma_ctl = CRYPTO_DMA_START; -+ -+ lli_head->user_define |= LLI_USER_CIPHER_START; -+ lli_head->user_define |= LLI_USER_STRING_START; -+ -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_LLI_ADDR, hw_info->hw_desc.lli_head_dma); -+ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, -+ (CRYPTO_HASH_ENABLE << CRYPTO_WRITE_MASK_SHIFT) | -+ CRYPTO_HASH_ENABLE); + } ++ crypt_fd_op_to_compat(&kcop->cop, &compat_cop); + -+ if (is_final && alg_ctx->left_bytes == 0) -+ lli_tail->user_define |= LLI_USER_STRING_LAST; -+ -+ CRYPTO_TRACE("dma_ctrl = %08x, user_define = %08x, len = %u", -+ lli_head->dma_ctrl, lli_head->user_define, alg_ctx->count); -+ -+ rk_crypto_dump_hw_desc(&hw_info->hw_desc); -+ -+ dma_wmb(); -+ -+ /* enable crypto irq */ -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0x7f); -+ -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_CTL, dma_ctl | dma_ctl << CRYPTO_WRITE_MASK_SHIFT); -+ ++ if (unlikely(copy_to_user(arg, &compat_cop, sizeof(compat_cop)))) { ++ dwarning(1, "Error copying to user"); ++ return -EFAULT; ++ } + return 0; +} + -+static int rk_ahash_get_result(struct rk_crypto_dev *rk_dev, -+ uint8_t *data, uint32_t data_len) ++static inline void ++compat_to_crypt_fd_map_op(struct compat_crypt_fd_map_op *compat, struct crypt_fd_map_op *mop) +{ -+ int ret = 0; -+ u32 reg_ctrl = 0; -+ -+ ret = read_poll_timeout_atomic(CRYPTO_READ, reg_ctrl, -+ reg_ctrl & CRYPTO_HASH_IS_VALID, -+ RK_POLL_PERIOD_US, -+ RK_POLL_TIMEOUT_US, false, -+ rk_dev, CRYPTO_HASH_VALID); -+ if (ret) -+ goto exit; -+ -+ rk_crypto_read_regs(rk_dev, CRYPTO_HASH_DOUT_0, data, data_len); -+ -+ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_VALID, CRYPTO_HASH_IS_VALID); -+ -+exit: -+ clean_hash_setting(rk_dev); -+ -+ return ret; ++ mop->dma_fd = compat->dma_fd; ++ mop->phys_addr = compat->phys_addr; +} + -+static int rk_cra_hash_init(struct crypto_tfm *tfm) ++static inline void ++crypt_fd_map_op_to_compat(struct crypt_fd_map_op *mop, struct compat_crypt_fd_map_op *compat) +{ -+ struct rk_crypto_algt *algt = -+ rk_ahash_get_algt(__crypto_ahash_cast(tfm)); -+ const char *alg_name = crypto_tfm_alg_name(tfm); -+ struct rk_ahash_ctx *ctx = crypto_tfm_ctx(tfm); -+ struct rk_crypto_dev *rk_dev = algt->rk_dev; -+ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; -+ -+ CRYPTO_TRACE(); ++ compat->dma_fd = mop->dma_fd; ++ compat->phys_addr = mop->phys_addr; ++} + -+ memset(ctx, 0x00, sizeof(*ctx)); ++static int compat_kcop_map_fd_from_user(struct kernel_crypt_fd_map_op *kcop, ++ struct fcrypt *fcr, void __user *arg) ++{ ++ struct compat_crypt_fd_map_op compat_mop; + -+ if (!rk_dev->request_crypto) ++ if (unlikely(copy_from_user(&compat_mop, arg, sizeof(compat_mop)))) + return -EFAULT; + -+ alg_ctx->align_size = RK_DMA_ALIGNMENT; ++ compat_to_crypt_fd_map_op(&compat_mop, &kcop->mop); + -+ alg_ctx->ops.start = rk_ahash_start; -+ alg_ctx->ops.update = rk_ahash_crypto_rx; -+ alg_ctx->ops.complete = rk_ahash_crypto_complete; -+ alg_ctx->ops.irq_handle = rk_crypto_irq_handle; ++ return 0; ++} + -+ alg_ctx->ops.hw_write_key = write_key_reg; -+ alg_ctx->ops.hw_init = rk_hw_hash_init; -+ alg_ctx->ops.hw_dma_start = rk_ahash_dma_start; -+ alg_ctx->ops.hw_get_result = rk_ahash_get_result; ++static int compat_kcop_map_fd_to_user(struct kernel_crypt_fd_map_op *kcop, ++ struct fcrypt *fcr, void __user *arg) ++{ ++ struct compat_crypt_fd_map_op compat_mop; + -+ ctx->rk_dev = rk_dev; -+ ctx->hash_tmp = (u8 *)get_zeroed_page(GFP_KERNEL | GFP_DMA32); -+ if (!ctx->hash_tmp) { -+ dev_err(rk_dev->dev, "Can't get zeroed page for hash tmp.\n"); -+ return -ENOMEM; ++ crypt_fd_map_op_to_compat(&kcop->mop, &compat_mop); ++ if (unlikely(copy_to_user(arg, &compat_mop, sizeof(compat_mop)))) { ++ derr(1, "Cannot copy to userspace"); ++ return -EFAULT; + } + -+ rk_dev->request_crypto(rk_dev, alg_name); -+ -+ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct rk_ahash_rctx)); -+ -+ algt->alg.hash.halg.statesize = sizeof(struct rk_ahash_expt_ctx); -+ + return 0; +} + -+static void rk_cra_hash_exit(struct crypto_tfm *tfm) ++long ++rk_compat_cryptodev_ioctl(struct fcrypt *fcr, unsigned int cmd, unsigned long arg_) +{ -+ struct rk_ahash_ctx *ctx = crypto_tfm_ctx(tfm); -+ -+ CRYPTO_TRACE(); -+ -+ if (ctx->hash_tmp) -+ free_page((unsigned long)ctx->hash_tmp); ++ struct kernel_crypt_fd_op kcop; ++ struct kernel_crypt_fd_map_op kmop; ++ void __user *arg = (void __user *)arg_; ++ int ret; + -+ ctx->rk_dev->release_crypto(ctx->rk_dev, crypto_tfm_alg_name(tfm)); -+} ++ switch (cmd) { ++ case COMPAT_RIOCCRYPT_FD: ++ ret = compat_kcop_fd_from_user(&kcop, fcr, arg); ++ if (unlikely(ret)) { ++ dwarning(1, "Error copying from user"); ++ return ret; ++ } + -+struct rk_crypto_algt rk_v2_ahash_md5 = RK_HASH_ALGO_INIT(MD5, md5); -+struct rk_crypto_algt rk_v2_ahash_sha1 = RK_HASH_ALGO_INIT(SHA1, sha1); -+struct rk_crypto_algt rk_v2_ahash_sha224 = RK_HASH_ALGO_INIT(SHA224, sha224); -+struct rk_crypto_algt rk_v2_ahash_sha256 = RK_HASH_ALGO_INIT(SHA256, sha256); -+struct rk_crypto_algt rk_v2_ahash_sha384 = RK_HASH_ALGO_INIT(SHA384, sha384); -+struct rk_crypto_algt rk_v2_ahash_sha512 = RK_HASH_ALGO_INIT(SHA512, sha512); -+struct rk_crypto_algt rk_v2_ahash_sm3 = RK_HASH_ALGO_INIT(SM3, sm3); ++ ret = crypto_fd_run(fcr, &kcop); ++ if (unlikely(ret)) { ++ dwarning(1, "Error in crypto_run"); ++ return ret; ++ } + -+struct rk_crypto_algt rk_v2_hmac_md5 = RK_HMAC_ALGO_INIT(MD5, md5); -+struct rk_crypto_algt rk_v2_hmac_sha1 = RK_HMAC_ALGO_INIT(SHA1, sha1); -+struct rk_crypto_algt rk_v2_hmac_sha256 = RK_HMAC_ALGO_INIT(SHA256, sha256); -+struct rk_crypto_algt rk_v2_hmac_sha512 = RK_HMAC_ALGO_INIT(SHA512, sha512); -+struct rk_crypto_algt rk_v2_hmac_sm3 = RK_HMAC_ALGO_INIT(SM3, sm3); ++ return compat_kcop_fd_to_user(&kcop, fcr, arg); ++ case COMPAT_RIOCCRYPT_FD_MAP: ++ ret = compat_kcop_map_fd_from_user(&kmop, fcr, arg); ++ if (unlikely(ret)) { ++ dwarning(1, "Error copying from user"); ++ return ret; ++ } + -diff --git a/drivers/crypto/rockchip/rk_crypto_v2_akcipher.c b/drivers/crypto/rockchip/rk_crypto_v2_akcipher.c -new file mode 100644 -index 000000000..1db5adde9 ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_v2_akcipher.c -@@ -0,0 +1,320 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * RSA acceleration support for Rockchip crypto v2 -+ * -+ * Copyright (c) 2020 Rockchip Electronics Co., Ltd. -+ * -+ * Author: Lin Jinhan -+ * -+ * Some ideas are from marvell/cesa.c and s5p-sss.c driver. -+ */ ++ ret = dma_fd_map_for_user(fcr, &kmop); ++ if (unlikely(ret)) { ++ dwarning(1, "Error in dma_fd_map_for_user"); ++ return ret; ++ } + -+#include ++ return compat_kcop_map_fd_to_user(&kmop, fcr, arg); ++ case COMPAT_RIOCCRYPT_FD_UNMAP: ++ ret = compat_kcop_map_fd_from_user(&kmop, fcr, arg); ++ if (unlikely(ret)) { ++ dwarning(1, "Error copying from user"); ++ return ret; ++ } + -+#include "rk_crypto_core.h" -+#include "rk_crypto_v2.h" -+#include "rk_crypto_v2_reg.h" -+#include "rk_crypto_v2_pka.h" ++ ret = dma_fd_unmap_for_user(fcr, &kmop); ++ if (unlikely(ret)) ++ dwarning(1, "Error in dma_fd_unmap_for_user"); + -+#define BG_WORDS2BYTES(words) ((words) * sizeof(u32)) -+#define BG_BYTES2WORDS(bytes) (((bytes) + sizeof(u32) - 1) / sizeof(u32)) ++ return ret; ++ case COMPAT_RIOCCRYPT_CPU_ACCESS: ++ ret = compat_kcop_map_fd_from_user(&kmop, fcr, arg); ++ if (unlikely(ret)) { ++ dwarning(1, "Error copying from user"); ++ return ret; ++ } + -+static DEFINE_MUTEX(akcipher_mutex); ++ ret = dma_fd_begin_cpu_access(fcr, &kmop); ++ if (unlikely(ret)) { ++ dwarning(1, "Error in dma_fd_begin_cpu_access"); ++ return ret; ++ } + -+static void rk_rsa_adjust_rsa_key(struct rsa_key *key) -+{ -+ if (key->n_sz && key->n && !key->n[0]) { -+ key->n++; -+ key->n_sz--; -+ } ++ return compat_kcop_map_fd_to_user(&kmop, fcr, arg); ++ case COMPAT_RIOCCRYPT_DEV_ACCESS: ++ ret = compat_kcop_map_fd_from_user(&kmop, fcr, arg); ++ if (unlikely(ret)) { ++ dwarning(1, "Error copying from user"); ++ return ret; ++ } + -+ if (key->e_sz && key->e && !key->e[0]) { -+ key->e++; -+ key->e_sz--; -+ } ++ ret = dma_fd_end_cpu_access(fcr, &kmop); ++ if (unlikely(ret)) ++ dwarning(1, "Error in dma_fd_end_cpu_access"); + -+ if (key->d_sz && key->d && !key->d[0]) { -+ key->d++; -+ key->d_sz--; ++ return ret; ++ default: ++ return rk_cryptodev_ioctl(fcr, cmd, arg_); + } +} + -+static void rk_rsa_clear_ctx(struct rk_rsa_ctx *ctx) -+{ -+ /* Free the old key if any */ -+ rk_bn_free(ctx->n); -+ ctx->n = NULL; -+ -+ rk_bn_free(ctx->e); -+ ctx->e = NULL; -+ -+ rk_bn_free(ctx->d); -+ ctx->d = NULL; -+} -+ -+static int rk_rsa_setkey(struct crypto_akcipher *tfm, const void *key, -+ unsigned int keylen, bool private) -+{ -+ struct rk_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); -+ struct rsa_key rsa_key; -+ int ret = -ENOMEM; -+ -+ rk_rsa_clear_ctx(ctx); -+ -+ memset(&rsa_key, 0x00, sizeof(rsa_key)); -+ -+ if (private) -+ ret = rsa_parse_priv_key(&rsa_key, key, keylen); -+ else -+ ret = rsa_parse_pub_key(&rsa_key, key, keylen); -+ -+ if (ret < 0) -+ goto error; ++#endif /* CONFIG_COMPAT */ + -+ rk_rsa_adjust_rsa_key(&rsa_key); ++struct cipher_algo_name_map { ++ uint32_t id; ++ const char *name; ++ int is_stream; ++ int is_aead; ++}; + -+ ctx->n = rk_bn_alloc(rsa_key.n_sz); -+ if (!ctx->n) -+ goto error; ++struct hash_algo_name_map { ++ uint32_t id; ++ const char *name; ++ int is_hmac; ++}; + -+ ctx->e = rk_bn_alloc(rsa_key.e_sz); -+ if (!ctx->e) -+ goto error; ++static const struct cipher_algo_name_map c_algo_map_tbl[] = { ++ {CRYPTO_RK_DES_ECB, "ecb-des-rk", 0, 0}, ++ {CRYPTO_RK_DES_CBC, "cbc-des-rk", 0, 0}, ++ {CRYPTO_RK_DES_CFB, "cfb-des-rk", 0, 0}, ++ {CRYPTO_RK_DES_OFB, "ofb-des-rk", 0, 0}, ++ {CRYPTO_RK_3DES_ECB, "ecb-des3_ede-rk", 0, 0}, ++ {CRYPTO_RK_3DES_CBC, "cbc-des3_ede-rk", 0, 0}, ++ {CRYPTO_RK_3DES_CFB, "cfb-des3_ede-rk", 0, 0}, ++ {CRYPTO_RK_3DES_OFB, "ofb-des3_ede-rk", 0, 0}, ++ {CRYPTO_RK_SM4_ECB, "ecb-sm4-rk", 0, 0}, ++ {CRYPTO_RK_SM4_CBC, "cbc-sm4-rk", 0, 0}, ++ {CRYPTO_RK_SM4_CFB, "cfb-sm4-rk", 0, 0}, ++ {CRYPTO_RK_SM4_OFB, "ofb-sm4-rk", 0, 0}, ++ {CRYPTO_RK_SM4_CTS, "cts-sm4-rk", 0, 0}, ++ {CRYPTO_RK_SM4_CTR, "ctr-sm4-rk", 1, 0}, ++ {CRYPTO_RK_SM4_XTS, "xts-sm4-rk", 0, 0}, ++ {CRYPTO_RK_SM4_CCM, "ccm-sm4-rk", 1, 1}, ++ {CRYPTO_RK_SM4_GCM, "gcm-sm4-rk", 1, 1}, ++ {CRYPTO_RK_AES_ECB, "ecb-aes-rk", 0, 0}, ++ {CRYPTO_RK_AES_CBC, "cbc-aes-rk", 0, 0}, ++ {CRYPTO_RK_AES_CFB, "cfb-aes-rk", 0, 0}, ++ {CRYPTO_RK_AES_OFB, "ofb-aes-rk", 0, 0}, ++ {CRYPTO_RK_AES_CTS, "cts-aes-rk", 0, 0}, ++ {CRYPTO_RK_AES_CTR, "ctr-aes-rk", 1, 0}, ++ {CRYPTO_RK_AES_XTS, "xts-aes-rk", 0, 0}, ++ {CRYPTO_RK_AES_CCM, "ccm-aes-rk", 1, 1}, ++ {CRYPTO_RK_AES_GCM, "gcm-aes-rk", 1, 1}, ++}; + -+ rk_bn_set_data(ctx->n, rsa_key.n, rsa_key.n_sz, RK_BG_BIG_ENDIAN); -+ rk_bn_set_data(ctx->e, rsa_key.e, rsa_key.e_sz, RK_BG_BIG_ENDIAN); ++static const struct hash_algo_name_map h_algo_map_tbl[] = { + -+ CRYPTO_DUMPHEX("n = ", ctx->n->data, BG_WORDS2BYTES(ctx->n->n_words)); -+ CRYPTO_DUMPHEX("e = ", ctx->e->data, BG_WORDS2BYTES(ctx->e->n_words)); ++ {CRYPTO_RK_MD5, "md5-rk", 0}, ++ {CRYPTO_RK_SHA1, "sha1-rk", 0}, ++ {CRYPTO_RK_SHA224, "sha224-rk", 0}, ++ {CRYPTO_RK_SHA256, "sha256-rk", 0}, ++ {CRYPTO_RK_SHA384, "sha384-rk", 0}, ++ {CRYPTO_RK_SHA512, "sha512-rk", 0}, ++ {CRYPTO_RK_SHA512_224, "sha512_224-rk", 0}, ++ {CRYPTO_RK_SHA512_256, "sha512_256-rk", 0}, ++ {CRYPTO_RK_SM3, "sm3-rk", 0}, ++ {CRYPTO_RK_MD5_HMAC, "hmac-md5-rk", 1}, ++ {CRYPTO_RK_SHA1_HMAC, "hmac-sha1-rk", 1}, ++ {CRYPTO_RK_SHA256_HMAC, "hmac-sha256-rk", 1}, ++ {CRYPTO_RK_SHA512_HMAC, "hmac-sha512-rk", 1}, ++ {CRYPTO_RK_SM3_HMAC, "hmac-sm3-rk", 1}, ++ {CRYPTO_RK_SM4_CMAC, "cmac-sm4-rk", 1}, ++ {CRYPTO_RK_SM4_CBC_MAC, "cbcmac-sm4-rk", 1}, ++ {CRYPTO_RK_AES_CMAC, "cmac-aes-rk", 1}, ++ {CRYPTO_RK_AES_CBC_MAC, "cbcmac-aes-rk", 1}, ++}; + -+ if (private) { -+ ctx->d = rk_bn_alloc(rsa_key.d_sz); -+ if (!ctx->d) -+ goto error; ++const char *rk_get_cipher_name(uint32_t id, int *is_stream, int *is_aead) ++{ ++ uint32_t i; + -+ rk_bn_set_data(ctx->d, rsa_key.d, rsa_key.d_sz, RK_BG_BIG_ENDIAN); ++ *is_stream = 0; ++ *is_aead = 0; + -+ CRYPTO_DUMPHEX("d = ", ctx->d->data, BG_WORDS2BYTES(ctx->d->n_words)); ++ for (i = 0; i < ARRAY_SIZE(c_algo_map_tbl); i++) { ++ if (id == c_algo_map_tbl[i].id) { ++ *is_stream = c_algo_map_tbl[i].is_stream; ++ *is_aead = c_algo_map_tbl[i].is_aead; ++ return c_algo_map_tbl[i].name; ++ } + } + -+ return 0; -+error: -+ rk_rsa_clear_ctx(ctx); -+ return ret; -+} -+ -+static unsigned int rk_rsa_max_size(struct crypto_akcipher *tfm) -+{ -+ struct rk_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); -+ -+ CRYPTO_TRACE(); -+ -+ return rk_bn_get_size(ctx->n); ++ return NULL; +} + -+static int rk_rsa_setpubkey(struct crypto_akcipher *tfm, const void *key, -+ unsigned int keylen) ++const char *rk_get_hash_name(uint32_t id, int *is_hmac) +{ -+ CRYPTO_TRACE(); ++ uint32_t i; + -+ return rk_rsa_setkey(tfm, key, keylen, false); -+} ++ *is_hmac = 0; + -+static int rk_rsa_setprivkey(struct crypto_akcipher *tfm, const void *key, -+ unsigned int keylen) -+{ -+ CRYPTO_TRACE(); ++ for (i = 0; i < ARRAY_SIZE(h_algo_map_tbl); i++) { ++ if (id == h_algo_map_tbl[i].id) { ++ *is_hmac = h_algo_map_tbl[i].is_hmac; ++ return h_algo_map_tbl[i].name; ++ } ++ } + -+ return rk_rsa_setkey(tfm, key, keylen, true); ++ return NULL; +} + -+static int rk_rsa_calc(struct akcipher_request *req, bool encypt) ++bool rk_cryptodev_multi_thread(const char *name) +{ -+ struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); -+ struct rk_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); -+ struct rk_bignum *in = NULL, *out = NULL; -+ u32 key_byte_size; -+ u8 *tmp_buf = NULL; -+ int ret = -ENOMEM; -+ -+ CRYPTO_TRACE(); -+ -+ if (unlikely(!ctx->n || !ctx->e)) -+ return -EINVAL; -+ -+ if (!encypt && !ctx->d) -+ return -EINVAL; -+ -+ key_byte_size = rk_bn_get_size(ctx->n); -+ -+ if (req->dst_len < key_byte_size) { -+ req->dst_len = key_byte_size; -+ return -EOVERFLOW; -+ } -+ -+ if (req->src_len > key_byte_size) -+ return -EINVAL; -+ -+ in = rk_bn_alloc(key_byte_size); -+ if (!in) -+ goto exit; -+ -+ out = rk_bn_alloc(key_byte_size); -+ if (!out) -+ goto exit; -+ -+ tmp_buf = kzalloc(key_byte_size, GFP_KERNEL); -+ if (!tmp_buf) -+ goto exit; ++ uint32_t i; + -+ if (!sg_copy_to_buffer(req->src, sg_nents(req->src), tmp_buf, req->src_len)) { -+ dev_err(ctx->rk_dev->dev, "[%s:%d] sg copy err\n", -+ __func__, __LINE__); -+ ret = -EINVAL; -+ goto exit; ++ for (i = 0; i < ARRAY_SIZE(g_dev_infos); i++) { ++ if (g_dev_infos[i].dev) ++ return g_dev_infos[i].is_multi_thread; + } + -+ ret = rk_bn_set_data(in, tmp_buf, req->src_len, RK_BG_BIG_ENDIAN); -+ if (ret) -+ goto exit; -+ -+ CRYPTO_DUMPHEX("in = ", in->data, BG_WORDS2BYTES(in->n_words)); -+ -+ mutex_lock(&akcipher_mutex); ++ return false; ++} +diff --git a/drivers/crypto/rockchip/cryptodev_linux/rk_cryptodev.h b/drivers/crypto/rockchip/cryptodev_linux/rk_cryptodev.h +new file mode 100644 +index 000000000..dff499be3 +--- /dev/null ++++ b/drivers/crypto/rockchip/cryptodev_linux/rk_cryptodev.h +@@ -0,0 +1,109 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ + -+ if (encypt) -+ ret = rk_pka_expt_mod(in, ctx->e, ctx->n, out); -+ else -+ ret = rk_pka_expt_mod(in, ctx->d, ctx->n, out); ++/* Copyright (c) 2021 Rockchip Electronics Co. Ltd. */ + -+ mutex_unlock(&akcipher_mutex); ++#ifndef __RK_CRYPTODEV_H__ ++#define __RK_CRYPTODEV_H__ + -+ if (ret) -+ goto exit; ++#include ++#include ++#include "cryptodev.h" + -+ CRYPTO_DUMPHEX("out = ", out->data, BG_WORDS2BYTES(out->n_words)); ++/* compatibility stuff */ ++#ifdef CONFIG_COMPAT ++#include + -+ ret = rk_bn_get_data(out, tmp_buf, key_byte_size, RK_BG_BIG_ENDIAN); -+ if (ret) -+ goto exit; ++/* input of RIOCCRYPT_FD */ ++struct compat_crypt_fd_op { ++ uint32_t ses; /* session identifier */ ++ uint16_t op; /* COP_ENCRYPT or COP_DECRYPT */ ++ uint16_t flags; /* see COP_FLAG_* */ ++ uint32_t len; /* length of source data */ ++ int src_fd; /* source data */ ++ int dst_fd; /* pointer to output data */ ++ compat_uptr_t mac;/* pointer to output data for hash/MAC operations */ ++ compat_uptr_t iv;/* initialization vector for encryption operations */ ++}; + -+ CRYPTO_DUMPHEX("tmp_buf = ", tmp_buf, key_byte_size); ++/* input of RIOCCRYPT_FD_MAP/RIOCCRYPT_FD_UNMAP */ ++struct compat_crypt_fd_map_op { ++ int dma_fd; /* session identifier */ ++ uint32_t phys_addr; /* physics addr */ ++}; + -+ if (!sg_copy_from_buffer(req->dst, sg_nents(req->dst), tmp_buf, key_byte_size)) { -+ dev_err(ctx->rk_dev->dev, "[%s:%d] sg copy err\n", -+ __func__, __LINE__); -+ ret = -EINVAL; -+ goto exit; -+ } ++/* compat ioctls, defined for the above structs */ ++#define COMPAT_RIOCCRYPT_FD _IOWR('r', 104, struct compat_crypt_fd_op) ++#define COMPAT_RIOCCRYPT_FD_MAP _IOWR('r', 105, struct compat_crypt_fd_map_op) ++#define COMPAT_RIOCCRYPT_FD_UNMAP _IOW('r', 106, struct compat_crypt_fd_map_op) ++#define COMPAT_RIOCCRYPT_CPU_ACCESS _IOW('r', 107, struct compat_crypt_fd_map_op) ++#define COMPAT_RIOCCRYPT_DEV_ACCESS _IOW('r', 108, struct compat_crypt_fd_map_op) + -+ req->dst_len = key_byte_size; + -+ CRYPTO_TRACE("ret = %d", ret); -+exit: -+ kfree(tmp_buf); ++#endif /* CONFIG_COMPAT */ + -+ rk_bn_free(in); -+ rk_bn_free(out); ++/* kernel-internal extension to struct crypt_op */ ++struct kernel_crypt_fd_op { ++ struct crypt_fd_op cop; + -+ return ret; -+} ++ int ivlen; ++ __u8 iv[EALG_MAX_BLOCK_LEN]; + -+static int rk_rsa_enc(struct akcipher_request *req) -+{ -+ CRYPTO_TRACE(); ++ int digestsize; ++ uint8_t hash_output[AALG_MAX_RESULT_LEN]; + -+ return rk_rsa_calc(req, true); -+} ++ struct task_struct *task; ++ struct mm_struct *mm; ++}; + -+static int rk_rsa_dec(struct akcipher_request *req) -+{ -+ CRYPTO_TRACE(); ++struct kernel_crypt_auth_fd_op { ++ struct crypt_auth_fd_op caop; + -+ return rk_rsa_calc(req, false); -+} ++ int dst_len; /* based on src_len */ ++ __u8 iv[EALG_MAX_BLOCK_LEN]; ++ int ivlen; + -+static int rk_rsa_start(struct rk_crypto_dev *rk_dev) -+{ -+ CRYPTO_TRACE(); ++ struct task_struct *task; ++ struct mm_struct *mm; ++}; + -+ return -ENOSYS; -+} ++/* kernel-internal extension to struct crypt_fd_map_op */ ++struct kernel_crypt_fd_map_op { ++ struct crypt_fd_map_op mop; ++}; + -+static int rk_rsa_crypto_rx(struct rk_crypto_dev *rk_dev) -+{ -+ CRYPTO_TRACE(); ++/* kernel-internal extension to struct crypt_op */ ++struct kernel_crypt_rsa_op { ++ struct crypt_rsa_op rop; + -+ return -ENOSYS; -+} ++ struct task_struct *task; ++ struct mm_struct *mm; ++}; + -+static void rk_rsa_complete(struct crypto_async_request *base, int err) ++#if IS_ENABLED(CONFIG_CRYPTO_DEV_ROCKCHIP_DEV) ++int rk_cryptodev_register_dev(struct device *dev, const char *name); ++int rk_cryptodev_unregister_dev(struct device *dev); ++#else ++static inline int rk_cryptodev_register_dev(struct device *dev, const char *name) +{ -+ if (base->complete) -+ base->complete(base, err); ++ return 0; +} + -+static int rk_rsa_init_tfm(struct crypto_akcipher *tfm) ++static inline int rk_cryptodev_unregister_dev(struct device *dev) +{ -+ struct rk_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); -+ struct akcipher_alg *alg = __crypto_akcipher_alg(tfm->base.__crt_alg); -+ struct rk_crypto_algt *algt; -+ struct rk_crypto_dev *rk_dev; -+ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; -+ -+ CRYPTO_TRACE(); -+ -+ memset(ctx, 0x00, sizeof(*ctx)); -+ -+ algt = container_of(alg, struct rk_crypto_algt, alg.asym); -+ rk_dev = algt->rk_dev; -+ -+ if (!rk_dev->request_crypto) -+ return -EFAULT; -+ -+ rk_dev->request_crypto(rk_dev, "rsa"); -+ -+ alg_ctx->align_size = crypto_tfm_alg_alignmask(&tfm->base) + 1; -+ -+ alg_ctx->ops.start = rk_rsa_start; -+ alg_ctx->ops.update = rk_rsa_crypto_rx; -+ alg_ctx->ops.complete = rk_rsa_complete; -+ -+ ctx->rk_dev = rk_dev; -+ -+ rk_pka_set_crypto_base(ctx->rk_dev->pka_reg); -+ + return 0; +} ++#endif + -+static void rk_rsa_exit_tfm(struct crypto_akcipher *tfm) -+{ -+ struct rk_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); ++long ++rk_cryptodev_ioctl(struct fcrypt *fcr, unsigned int cmd, unsigned long arg_); + -+ CRYPTO_TRACE(); ++long ++rk_compat_cryptodev_ioctl(struct fcrypt *fcr, unsigned int cmd, unsigned long arg_); + -+ rk_rsa_clear_ctx(ctx); ++const char *rk_get_cipher_name(uint32_t id, int *is_stream, int *is_aead); + -+ ctx->rk_dev->release_crypto(ctx->rk_dev, "rsa"); -+} ++const char *rk_get_hash_name(uint32_t id, int *is_hmac); + -+struct rk_crypto_algt rk_v2_asym_rsa = { -+ .name = "rsa", -+ .type = ALG_TYPE_ASYM, -+ .alg.asym = { -+ .encrypt = rk_rsa_enc, -+ .decrypt = rk_rsa_dec, -+ .set_pub_key = rk_rsa_setpubkey, -+ .set_priv_key = rk_rsa_setprivkey, -+ .max_size = rk_rsa_max_size, -+ .init = rk_rsa_init_tfm, -+ .exit = rk_rsa_exit_tfm, -+ .base = { -+ .cra_name = "rsa", -+ .cra_driver_name = "rsa-rk", -+ .cra_priority = RK_CRYPTO_PRIORITY, -+ .cra_module = THIS_MODULE, -+ .cra_ctxsize = sizeof(struct rk_rsa_ctx), -+ }, -+ }, -+}; ++bool rk_cryptodev_multi_thread(const char *name); + -diff --git a/drivers/crypto/rockchip/rk_crypto_v2_pka.c b/drivers/crypto/rockchip/rk_crypto_v2_pka.c ++#endif +diff --git a/drivers/crypto/rockchip/cryptodev_linux/util.c b/drivers/crypto/rockchip/cryptodev_linux/util.c new file mode 100644 -index 000000000..d2c0a265b +index 000000000..28c197eb5 --- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_v2_pka.c -@@ -0,0 +1,686 @@ -+// SPDX-License-Identifier: GPL-2.0 ++++ b/drivers/crypto/rockchip/cryptodev_linux/util.c +@@ -0,0 +1,80 @@ +/* -+ * Copyright (c) 2020 Rockchip Electronics Co., Ltd. ++ * Copyright (c) 2011 Maxim Levitsky ++ * ++ * This file is part of linux cryptodev. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2 ++ * of the License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., ++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#include -+ -+#include "rk_crypto_core.h" -+#include "rk_crypto_v2.h" -+#include "rk_crypto_v2_reg.h" -+#include "rk_crypto_v2_pka.h" -+ -+#define PKA_WORDS2BITS(words) ((words) * 32) -+#define PKA_BITS2WORDS(bits) (((bits) + 31) / 32) -+ -+#define PKA_WORDS2BYTES(words) ((words) * 4) -+#define PKA_BYTES2BITS(bytes) ((bytes) * 8) -+ -+/* PKA length set */ -+enum { -+ PKA_EXACT_LEN_ID = 0, -+ PKA_CALC_LEN_ID, -+ PKA_USED_LEN_MAX, -+}; -+ -+/********************* Private MACRO Definition ******************************/ -+#define PKA_POLL_PERIOD_US 1000 -+#define PKA_POLL_TIMEOUT_US 50000 -+ -+/* for private key EXP_MOD operation */ -+#define PKA_MAX_POLL_PERIOD_US 20000 -+#define PKA_MAX_POLL_TIMEOUT_US 2000000 ++#include ++#include ++#include "util.h" + -+#define PKA_MAX_CALC_BITS 4096 -+#define PKA_MAX_CALC_WORDS PKA_BITS2WORDS(PKA_MAX_CALC_BITS) ++/* These were taken from Maxim Levitsky's patch to lkml. ++ */ ++struct scatterlist *cryptodev_sg_advance(struct scatterlist *sg, int consumed) ++{ ++ while (consumed >= sg->length) { ++ consumed -= sg->length; + -+/* PKA N_NP_T0_T1 register default (reset) value: N=0, NP=1, T0=30, T1=31 */ -+#define PKA_N 0UL -+#define PKA_NP 1UL -+#define PKA_T0 30UL /*tmp reg */ -+#define PKA_T1 31UL /*tmp reg */ -+#define PKA_TMP_REG_CNT 2 ++ sg = sg_next(sg); ++ if (!sg) ++ break; ++ } + -+#define PKA_N_NP_T0_T1_REG_DEFAULT \ -+ (PKA_N << CRYPTO_N_VIRTUAL_ADDR_SHIFT | \ -+ PKA_NP << CRYPTO_NP_VIRTUAL_ADDR_SHIFT | \ -+ PKA_T0 << CRYPTO_T0_VIRTUAL_ADDR_SHIFT | \ -+ PKA_T1 << CRYPTO_T1_VIRTUAL_ADDR_SHIFT) ++ WARN_ON(!sg && consumed); + -+#define RES_DISCARD 0x3F ++ if (!sg) ++ return NULL; + -+/* values for defining, that PKA entry is not in use */ -+#define PKA_ADDR_NOT_USED 0xFFC ++ sg->offset += consumed; ++ sg->length -= consumed; + -+/* Machine Opcodes definitions (according to HW CRS ) */ ++ if (sg->offset >= PAGE_SIZE) { ++ struct page *page = ++ nth_page(sg_page(sg), sg->offset / PAGE_SIZE); ++ sg_set_page(sg, page, sg->length, sg->offset % PAGE_SIZE); ++ } + -+enum pka_opcode { -+ PKA_OPCODE_ADD = 0x04, -+ PKA_OPCODE_SUB, -+ PKA_OPCODE_MOD_ADD, -+ PKA_OPCODE_MOD_SUB, -+ PKA_OPCODE_AND, -+ PKA_OPCODE_OR, -+ PKA_OPCODE_XOR, -+ PKA_OPCODE_SHR0 = 0x0C, -+ PKA_OPCODE_SHR1, -+ PKA_OPCODE_SHL0, -+ PKA_OPCODE_SHL1, -+ PKA_OPCODE_LMUL, -+ PKA_OPCODE_MOD_MUL, -+ PKA_OPCODE_MOD_MUL_NR, -+ PKA_OPCODE_MOD_EXP, -+ PKA_OPCODE_DIV, -+ PKA_OPCODE_MOD_INV, -+ PKA_OPCODE_MOD_DIV, -+ PKA_OPCODE_HMUL, -+ PKA_OPCODE_TERMINATE, -+}; ++ return sg; ++} + -+#define PKA_CLK_ENABLE() -+#define PKA_CLK_DISABLE() ++/** ++ * cryptodev_sg_copy - copies sg entries from sg_from to sg_to, such ++ * as sg_to covers first 'len' bytes from sg_from. ++ */ ++int cryptodev_sg_copy(struct scatterlist *sg_from, struct scatterlist *sg_to, int len) ++{ ++ while (len > sg_from->length) { ++ len -= sg_from->length; + -+#define PKA_READ(offset) readl_relaxed((pka_base) + (offset)) -+#define PKA_WRITE(val, offset) writel_relaxed((val), (pka_base) + (offset)) ++ sg_set_page(sg_to, sg_page(sg_from), ++ sg_from->length, sg_from->offset); + -+#define PKA_BIGNUM_WORDS(x) (rk_bn_get_size(x) / sizeof(u32)) ++ sg_to = sg_next(sg_to); ++ sg_from = sg_next(sg_from); + -+#define PKA_RAM_FOR_PKA() PKA_WRITE((CRYPTO_RAM_PKA_RDY << CRYPTO_WRITE_MASK_SHIFT) | \ -+ CRYPTO_RAM_PKA_RDY, CRYPTO_RAM_CTL) ++ if (len && (!sg_from || !sg_to)) ++ return -ENOMEM; ++ } + -+#define PKA_RAM_FOR_CPU() do { \ -+ PKA_WRITE((CRYPTO_RAM_PKA_RDY << CRYPTO_WRITE_MASK_SHIFT), CRYPTO_RAM_CTL); \ -+ while ((PKA_READ(CRYPTO_RAM_ST) & 0x01) != CRYPTO_CLK_RAM_RDY) \ -+ cpu_relax(); \ -+} while (0) ++ if (len) ++ sg_set_page(sg_to, sg_page(sg_from), ++ len, sg_from->offset); ++ sg_mark_end(sg_to); ++ return 0; ++} + -+#define PKA_GET_SRAM_ADDR(addr) ((void *)(pka_base + CRYPTO_SRAM_BASE + (addr))) +diff --git a/drivers/crypto/rockchip/cryptodev_linux/util.h b/drivers/crypto/rockchip/cryptodev_linux/util.h +new file mode 100644 +index 000000000..f7bf13242 +--- /dev/null ++++ b/drivers/crypto/rockchip/cryptodev_linux/util.h +@@ -0,0 +1,8 @@ ++/* SPDX-License-Identifier: GPL-2.0+ */ + -+/************************************************************************* -+ * Macros for calling PKA operations (names according to operation issue * -+ *************************************************************************/ ++#ifndef UTILS_H ++#define UTILS_H ++int cryptodev_sg_copy(struct scatterlist *sg_from, struct scatterlist *sg_to, int len); ++struct scatterlist *cryptodev_sg_advance(struct scatterlist *sg, int consumed); ++#endif + -+/*--------------------------------------*/ -+/* 1. ADD - SUBTRACT operations */ -+/*--------------------------------------*/ -+/* Add: res = op_a + op_b */ -+#define RK_PKA_ADD(op_a, op_b, res) pka_exec_op(PKA_OPCODE_ADD, PKA_CALC_LEN_ID, \ -+ 0, (op_a), 0, (op_b), 0, (res), 0) +diff --git a/drivers/crypto/rockchip/cryptodev_linux/version.h b/drivers/crypto/rockchip/cryptodev_linux/version.h +new file mode 100644 +index 000000000..be0490244 +--- /dev/null ++++ b/drivers/crypto/rockchip/cryptodev_linux/version.h +@@ -0,0 +1,9 @@ ++/* SPDX-License-Identifier: GPL-2.0+ */ + -+/* Clr: res = op_a & 0 - clears the operand A. */ -+#define RK_PKA_CLR(op_a) pka_exec_op(PKA_OPCODE_AND, PKA_CALC_LEN_ID, \ -+ 0, (op_a), 1, 0x00, 0, (op_a), 0) ++#ifndef VERSION_H ++#define VERSION_H + -+/* Copy: OpDest = OpSrc || 0 */ -+#define RK_PKA_COPY(op_dest, op_src) pka_exec_op(PKA_OPCODE_OR, PKA_CALC_LEN_ID, \ -+ 0, (op_src), 1, 0x00, 0, (op_dest), 0) ++#define VERSION "1.12" + -+/* Set0: res = op_a || 1 : set bit0 = 1, other bits are not changed */ -+#define RK_PKA_SET_0(op_a, res) pka_exec_op(PKA_OPCODE_OR, PKA_CALC_LEN_ID, \ -+ 0, (op_a), 1, 0x01, 0, (res), 0) ++#endif + -+/*----------------------------------------------*/ -+/* 3. SHIFT operations */ -+/*----------------------------------------------*/ -+/* SHL0: res = op_a << (S+1) : -+ * shifts left operand A by S+1 bits, insert 0 to right most bits +diff --git a/drivers/crypto/rockchip/cryptodev_linux/zc.c b/drivers/crypto/rockchip/cryptodev_linux/zc.c +new file mode 100644 +index 000000000..7671c3131 +--- /dev/null ++++ b/drivers/crypto/rockchip/cryptodev_linux/zc.c +@@ -0,0 +1,235 @@ ++/* ++ * Driver for /dev/crypto device (aka CryptoDev) ++ * ++ * Copyright (c) 2009-2013 Nikos Mavrogiannopoulos ++ * Copyright (c) 2010 Phil Sutter ++ * Copyright (c) 2011, 2012 OpenSSL Software Foundation, Inc. ++ * ++ * This file is part of linux cryptodev. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2 ++ * of the License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA ++ * 02110-1301, USA. + */ -+#define RK_PKA_SHL0(op_a, S, res) pka_exec_op(PKA_OPCODE_SHL0, PKA_CALC_LEN_ID, \ -+ 0, (op_a), 0, (S), 0, (res), 0) + -+/* SHL1: res = op_a << (S+1) : -+ * shifts left operand A by S+1 bits, insert 1 to right most bits ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "cryptodev.h" ++#include "zc.h" ++#include "version.h" ++ ++/* Helper functions to assist zero copy. ++ * This needs to be redesigned and moved out of the session. --nmav + */ -+#define RK_PKA_SHL1(op_a, S, res) pka_exec_op(PKA_OPCODE_SHL1, PKA_CALC_LEN_ID, \ -+ 0, (op_a), 0, (S), 0, (res), 0) + -+/*--------------------------------------------------------------*/ -+/* 2. Multiplication and other operations */ -+/* Note: See notes to RK_PKAExecOperation */ -+/*--------------------------------------------------------------*/ ++/* offset of buf in it's first page */ ++#define PAGEOFFSET(buf) ((unsigned long)buf & ~PAGE_MASK) + -+/* ModExp: res = op_a ** op_b mod N - modular exponentiation */ -+#define RK_PKA_MOD_EXP(op_a, op_b, res) \ -+ pka_exec_op(PKA_OPCODE_MOD_EXP, PKA_EXACT_LEN_ID, 0, (op_a), \ -+ 0, (op_b), 0, (res), 0) ++/* fetch the pages addr resides in into pg and initialise sg with them */ ++int __cryptodev_get_userbuf(uint8_t __user *addr, uint32_t len, int write, ++ unsigned int pgcount, struct page **pg, struct scatterlist *sg, ++ struct task_struct *task, struct mm_struct *mm) ++{ ++ int ret, pglen, i = 0; ++ struct scatterlist *sgp; + -+/* Divide: res = op_a / op_b , op_a = op_a mod op_b - division, */ -+#define RK_PKA_DIV(op_a, op_b, res) pka_exec_op(PKA_OPCODE_DIV, PKA_CALC_LEN_ID, \ -+ 0, (op_a), 0, (op_b), 0, (res), 0) ++ if (unlikely(!pgcount || !len || !addr)) { ++ sg_mark_end(sg); ++ return 0; ++ } + -+/* Terminate - special operation, which allows HOST access */ -+/* to PKA data memory registers after end of PKA operations */ -+#define RK_PKA_TERMINATE() pka_exec_op(PKA_OPCODE_TERMINATE, 0, 0, 0, 0, 0, 0, 0, 0) ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)) ++ down_read(&mm->mmap_sem); ++#else ++ mmap_read_lock(mm); ++#endif ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 168)) ++ ret = get_user_pages(task, mm, ++ (unsigned long)addr, pgcount, write, 0, pg, NULL); ++#elif (LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)) ++ ret = get_user_pages(task, mm, ++ (unsigned long)addr, pgcount, write, pg, NULL); ++#elif (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)) ++ ret = get_user_pages_remote(task, mm, ++ (unsigned long)addr, pgcount, write, 0, pg, NULL); ++#elif (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++ ret = get_user_pages_remote(task, mm, ++ (unsigned long)addr, pgcount, write ? FOLL_WRITE : 0, ++ pg, NULL); ++#elif (LINUX_VERSION_CODE < KERNEL_VERSION(5, 9, 0)) ++ ret = get_user_pages_remote(task, mm, ++ (unsigned long)addr, pgcount, write ? FOLL_WRITE : 0, ++ pg, NULL, NULL); ++#else ++ ret = get_user_pages_remote(mm, ++ (unsigned long)addr, pgcount, write ? FOLL_WRITE : 0, ++ pg, NULL, NULL); ++#endif ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)) ++ up_read(&mm->mmap_sem); ++#else ++ mmap_read_unlock(mm); ++#endif ++ if (ret < 0 || ret != pgcount) ++ return -EINVAL; + -+/********************* Private Variable Definition ***************************/ -+static void __iomem *pka_base; ++ sg_init_table(sg, pgcount); + -+static void pka_word_memcpy(u32 *dst, u32 *src, u32 size) -+{ -+ u32 i; ++ pglen = min((ptrdiff_t)(PAGE_SIZE - PAGEOFFSET(addr)), (ptrdiff_t)len); ++ sg_set_page(sg, pg[i++], pglen, PAGEOFFSET(addr)); + -+ for (i = 0; i < size; i++, dst++) -+ writel_relaxed(src[i], (void *)dst); ++ len -= pglen; ++ for (sgp = sg_next(sg); len; sgp = sg_next(sgp)) { ++ pglen = min((uint32_t)PAGE_SIZE, len); ++ sg_set_page(sgp, pg[i++], pglen, 0); ++ len -= pglen; ++ } ++ sg_mark_end(sg_last(sg, pgcount)); ++ return 0; +} + -+static void pka_word_memset(u32 *buff, u32 val, u32 size) ++int cryptodev_adjust_sg_array(struct csession *ses, int pagecount) +{ -+ u32 i; -+ -+ for (i = 0; i < size; i++, buff++) -+ writel_relaxed(val, (void *)buff); -+} ++ struct scatterlist *sg; ++ struct page **pages; ++ int array_size; + -+static int pka_wait_pipe_rdy(void) -+{ -+ u32 reg_val = 0; ++ for (array_size = ses->array_size; array_size < pagecount; ++ array_size *= 2) ++ ; ++ ddebug(1, "reallocating from %d to %d pages", ++ ses->array_size, array_size); ++ pages = krealloc(ses->pages, array_size * sizeof(struct page *), ++ GFP_KERNEL); ++ if (unlikely(!pages)) ++ return -ENOMEM; ++ ses->pages = pages; ++ sg = krealloc(ses->sg, array_size * sizeof(struct scatterlist), ++ GFP_KERNEL); ++ if (unlikely(!sg)) ++ return -ENOMEM; ++ ses->sg = sg; ++ ses->array_size = array_size; + -+ return readx_poll_timeout(PKA_READ, CRYPTO_PKA_PIPE_RDY, reg_val, -+ reg_val, PKA_POLL_PERIOD_US, PKA_POLL_TIMEOUT_US); ++ return 0; +} + -+static int pka_wait_done(void) ++void cryptodev_release_user_pages(struct csession *ses) +{ -+ u32 reg_val = 0; ++ unsigned int i; + -+ return readx_poll_timeout(PKA_READ, CRYPTO_PKA_DONE, reg_val, -+ reg_val, PKA_POLL_PERIOD_US, PKA_POLL_TIMEOUT_US); -+} ++ for (i = 0; i < ses->used_pages; i++) { ++ if (!PageReserved(ses->pages[i])) ++ SetPageDirty(ses->pages[i]); + -+static int pka_max_wait_done(void) -+{ -+ u32 reg_val = 0; ++ if (ses->readonly_pages == 0) ++ flush_dcache_page(ses->pages[i]); ++ else ++ ses->readonly_pages--; + -+ return readx_poll_timeout(PKA_READ, CRYPTO_PKA_DONE, reg_val, -+ reg_val, PKA_MAX_POLL_PERIOD_US, PKA_MAX_POLL_TIMEOUT_US); ++ put_page(ses->pages[i]); ++ } ++ ses->used_pages = 0; +} + -+static u32 pka_check_status(u32 mask) ++/* make src and dst available in scatterlists. ++ * dst might be the same as src. ++ */ ++int cryptodev_get_userbuf(struct csession *ses, ++ void *__user src, unsigned int src_len, ++ void *__user dst, unsigned int dst_len, ++ struct task_struct *task, struct mm_struct *mm, ++ struct scatterlist **src_sg, ++ struct scatterlist **dst_sg) +{ -+ u32 status; ++ int src_pagecount, dst_pagecount; ++ int rc; + -+ pka_wait_done(); -+ status = PKA_READ(CRYPTO_PKA_STATUS); -+ status = status & mask; ++ /* Empty input is a valid option to many algorithms & is tested by NIST/FIPS */ ++ /* Make sure NULL input has 0 length */ ++ if (!src && src_len) ++ src_len = 0; + -+ return !!status; -+} -+static void pka_set_len_words(u32 words, u32 index) -+{ -+ PKA_WRITE(PKA_WORDS2BITS(words), CRYPTO_PKA_L0 + index * sizeof(u32)); -+} ++ /* I don't know that null output is ever useful, but we can handle it gracefully */ ++ /* Make sure NULL output has 0 length */ ++ if (!dst && dst_len) ++ dst_len = 0; + -+static u32 pka_get_len_words(u32 index) -+{ -+ pka_wait_done(); -+ return PKA_BITS2WORDS(PKA_READ(CRYPTO_PKA_L0 + (index) * sizeof(u32))); -+} ++ src_pagecount = PAGECOUNT(src, src_len); ++ dst_pagecount = PAGECOUNT(dst, dst_len); + -+static void pka_set_map_addr(u32 addr, u32 index) -+{ -+ PKA_WRITE(addr, CRYPTO_MEMORY_MAP0 + sizeof(u32) * index); -+} ++ ses->used_pages = (src == dst) ? max(src_pagecount, dst_pagecount) ++ : src_pagecount + dst_pagecount; + -+static u32 pka_get_map_addr(u32 index) -+{ -+ pka_wait_done(); -+ return PKA_READ(CRYPTO_MEMORY_MAP0 + sizeof(u32) * (index)); -+} ++ ses->readonly_pages = (src == dst) ? 0 : src_pagecount; + -+static u32 pka_make_full_opcode(u32 opcode, u32 len_id, -+ u32 is_a_immed, u32 op_a, -+ u32 is_b_immed, u32 op_b, -+ u32 res_discard, u32 res, -+ u32 tag) -+{ -+ u32 full_opcode; ++ if (ses->used_pages > ses->array_size) { ++ rc = cryptodev_adjust_sg_array(ses, ses->used_pages); ++ if (rc) ++ return rc; ++ } + -+ full_opcode = ((opcode & 31) << CRYPTO_OPCODE_CODE_SHIFT | -+ (len_id & 7) << CRYPTO_OPCODE_LEN_SHIFT | -+ (is_a_immed & 1) << CRYPTO_OPCODE_A_IMMED_SHIFT | -+ (op_a & 31) << CRYPTO_OPCODE_A_SHIFT | -+ (is_b_immed & 1) << CRYPTO_OPCODE_B_IMMED_SHIFT | -+ (op_b & 31) << CRYPTO_OPCODE_B_SHIFT | -+ (res_discard & 1) << CRYPTO_OPCODE_R_DIS_SHIFT | -+ (res & 31) << CRYPTO_OPCODE_R_SHIFT | -+ (tag & 31) << CRYPTO_OPCODE_TAG_SHIFT); ++ if (src == dst) { /* inplace operation */ ++ /* When we encrypt for authenc modes we need to write ++ * more data than the ones we read. */ ++ if (src_len < dst_len) ++ src_len = dst_len; ++ rc = __cryptodev_get_userbuf(src, src_len, 1, ses->used_pages, ++ ses->pages, ses->sg, task, mm); ++ if (unlikely(rc)) { ++ derr(1, "failed to get user pages for data IO"); ++ return rc; ++ } ++ (*src_sg) = (*dst_sg) = ses->sg; ++ return 0; ++ } + -+ return full_opcode; -+} ++ *src_sg = NULL; /* default to no input */ ++ *dst_sg = NULL; /* default to ignore output */ + -+static void pka_load_data(u32 addr, u32 *data, u32 size_words) -+{ -+ pka_wait_done(); ++ if (likely(src)) { ++ rc = __cryptodev_get_userbuf(src, src_len, 0, ses->readonly_pages, ++ ses->pages, ses->sg, task, mm); ++ if (unlikely(rc)) { ++ derr(1, "failed to get user pages for data input"); ++ return rc; ++ } ++ *src_sg = ses->sg; ++ } + -+ PKA_RAM_FOR_CPU(); -+ pka_word_memcpy(PKA_GET_SRAM_ADDR(addr), data, size_words); -+ PKA_RAM_FOR_PKA(); ++ if (likely(dst)) { ++ const unsigned int writable_pages = ++ ses->used_pages - ses->readonly_pages; ++ struct page **dst_pages = ses->pages + ses->readonly_pages; ++ *dst_sg = ses->sg + ses->readonly_pages; ++ ++ rc = __cryptodev_get_userbuf(dst, dst_len, 1, writable_pages, ++ dst_pages, *dst_sg, task, mm); ++ if (unlikely(rc)) { ++ derr(1, "failed to get user pages for data output"); ++ cryptodev_release_user_pages(ses); /* FIXME: use __release_userbuf(src, ...) */ ++ return rc; ++ } ++ } ++ return 0; +} +diff --git a/drivers/crypto/rockchip/cryptodev_linux/zc.h b/drivers/crypto/rockchip/cryptodev_linux/zc.h +new file mode 100644 +index 000000000..808af4855 +--- /dev/null ++++ b/drivers/crypto/rockchip/cryptodev_linux/zc.h +@@ -0,0 +1,27 @@ ++/* SPDX-License-Identifier: GPL-2.0+ */ + -+static void pka_clr_mem(u32 addr, u32 size_words) -+{ -+ pka_wait_done(); ++#ifndef ZC_H ++# define ZC_H + -+ PKA_RAM_FOR_CPU(); -+ pka_word_memset(PKA_GET_SRAM_ADDR(addr), 0x00, size_words); -+ PKA_RAM_FOR_PKA(); -+} ++/* For zero copy */ ++int __cryptodev_get_userbuf(uint8_t __user *addr, uint32_t len, int write, ++ unsigned int pgcount, struct page **pg, struct scatterlist *sg, ++ struct task_struct *task, struct mm_struct *mm); ++void cryptodev_release_user_pages(struct csession *ses); + -+static void pka_read_data(u32 addr, u32 *data, u32 size_words) -+{ -+ pka_wait_done(); ++int cryptodev_get_userbuf(struct csession *ses, ++ void *__user src, unsigned int src_len, ++ void *__user dst, unsigned int dst_len, ++ struct task_struct *task, struct mm_struct *mm, ++ struct scatterlist **src_sg, ++ struct scatterlist **dst_sg); + -+ PKA_RAM_FOR_CPU(); -+ pka_word_memcpy(data, PKA_GET_SRAM_ADDR(addr), size_words); -+ PKA_RAM_FOR_PKA(); -+} ++/* buflen ? (last page - first page + 1) : 0 */ ++#define PAGECOUNT(buf, buflen) ((buflen) \ ++ ? ((((unsigned long)(buf + buflen - 1)) >> PAGE_SHIFT) - \ ++ (((unsigned long)(buf )) >> PAGE_SHIFT) + 1) \ ++ : 0) + -+static int pka_exec_op(enum pka_opcode opcode, u8 len_id, -+ u8 is_a_immed, u8 op_a, u8 is_b_immed, u8 op_b, -+ u8 res_discard, u8 res, u8 tag) -+{ -+ int ret = 0; -+ u32 full_opcode; ++#define DEFAULT_PREALLOC_PAGES 32 + -+ if (res == RES_DISCARD) { -+ res_discard = 1; -+ res = 0; -+ } ++#endif +diff --git a/drivers/crypto/rockchip/procfs.c b/drivers/crypto/rockchip/procfs.c +new file mode 100644 +index 000000000..5689dc530 +--- /dev/null ++++ b/drivers/crypto/rockchip/procfs.c +@@ -0,0 +1,160 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) Rockchip Electronics Co., Ltd. */ ++#include ++#include ++#include ++#include + -+ full_opcode = pka_make_full_opcode(opcode, len_id, -+ is_a_immed, op_a, -+ is_b_immed, op_b, -+ res_discard, res, tag); ++#include "procfs.h" + -+ /* write full opcode into PKA CRYPTO_OPCODE register */ -+ PKA_WRITE(full_opcode, CRYPTO_OPCODE); ++#ifdef CONFIG_PROC_FS + -+ /*************************************************/ -+ /* finishing operations for different cases */ -+ /*************************************************/ -+ switch (opcode) { -+ case PKA_OPCODE_DIV: -+ /* for Div operation check, that op_b != 0*/ -+ if (pka_check_status(CRYPTO_PKA_DIV_BY_ZERO)) -+ goto end; -+ break; -+ case PKA_OPCODE_TERMINATE: -+ /* wait for PKA done bit */ -+ ret = pka_wait_done(); -+ break; -+ default: -+ /* wait for PKA pipe ready bit */ -+ ret = pka_wait_pipe_rdy(); -+ } -+end: -+ return ret; -+} ++static const char *alg_type2name[ALG_TYPE_MAX] = { ++ [ALG_TYPE_HASH] = "HASH", ++ [ALG_TYPE_HMAC] = "HMAC", ++ [ALG_TYPE_CIPHER] = "CIPHER", ++ [ALG_TYPE_ASYM] = "ASYM", ++ [ALG_TYPE_AEAD] = "AEAD", ++}; + -+static int pk_int_len_tbl(u32 exact_size_words, u32 calc_size_words) ++static void crypto_show_clock(struct seq_file *p, struct clk_bulk_data *clk_bulks, int clks_num) +{ -+ u32 i; -+ -+ /* clear all length reg */ -+ for (i = 0; i < CRYPTO_LEN_REG_NUM; i++) -+ pka_set_len_words(0, i); ++ int i; + -+ /* Case of default settings */ -+ /* write exact size into first table entry */ -+ pka_set_len_words(exact_size_words, PKA_EXACT_LEN_ID); ++ seq_puts(p, "clock info:\n"); + -+ /* write size with extra word into tab[1] = tab[0] + 32 */ -+ pka_set_len_words(calc_size_words, PKA_CALC_LEN_ID); ++ for (i = 0; i < clks_num; i++) ++ seq_printf(p, "\t%-10s %ld\n", clk_bulks[i].id, clk_get_rate(clk_bulks[i].clk)); + -+ return 0; ++ seq_puts(p, "\n"); +} + -+static int pka_int_map_tbl(u32 *regs_cnt, u32 max_size_words) ++static void crypto_show_stat(struct seq_file *p, struct rk_crypto_stat *stat) +{ -+ u32 i; -+ u32 cur_addr = 0; -+ u32 max_size_bytes, default_regs_cnt; -+ -+ max_size_bytes = PKA_WORDS2BYTES(max_size_words); -+ default_regs_cnt = -+ min_t(u32, CRYPTO_MAP_REG_NUM, CRYPTO_SRAM_SIZE / max_size_bytes); ++ /* show statistic info */ ++ seq_puts(p, "Statistic info:\n"); ++ seq_printf(p, "\tbusy_cnt : %llu\n", stat->busy_cnt); ++ seq_printf(p, "\tequeue_cnt : %llu\n", stat->equeue_cnt); ++ seq_printf(p, "\tdequeue_cnt : %llu\n", stat->dequeue_cnt); ++ seq_printf(p, "\tdone_cnt : %llu\n", stat->done_cnt); ++ seq_printf(p, "\tcomplete_cnt : %llu\n", stat->complete_cnt); ++ seq_printf(p, "\tfake_cnt : %llu\n", stat->fake_cnt); ++ seq_printf(p, "\tirq_cnt : %llu\n", stat->irq_cnt); ++ seq_printf(p, "\ttimeout_cnt : %llu\n", stat->timeout_cnt); ++ seq_printf(p, "\terror_cnt : %llu\n", stat->error_cnt); ++ seq_printf(p, "\tlast_error : %d\n", stat->last_error); ++ seq_puts(p, "\n"); ++} + -+ /* clear all address */ -+ for (i = 0; i < CRYPTO_MAP_REG_NUM; i++) -+ pka_set_map_addr(PKA_ADDR_NOT_USED, i); ++static void crypto_show_queue_info(struct seq_file *p, struct rk_crypto_dev *rk_dev) ++{ ++ bool busy; ++ unsigned long flags; ++ u32 qlen, max_qlen; + -+ /* set addresses of N,NP and user requested registers (excluding 2 temp registers T0,T1) */ -+ for (i = 0; i < default_regs_cnt - PKA_TMP_REG_CNT; i++, cur_addr += max_size_bytes) -+ pka_set_map_addr(cur_addr, i); ++ spin_lock_irqsave(&rk_dev->lock, flags); + -+ /* set addresses of 2 temp registers: T0=30, T1=31 */ -+ pka_set_map_addr(cur_addr, PKA_T0); -+ cur_addr += max_size_bytes; -+ pka_set_map_addr(cur_addr, PKA_T1); ++ qlen = rk_dev->queue.qlen; ++ max_qlen = rk_dev->queue.max_qlen; ++ busy = rk_dev->busy; + -+ /* output maximal count of allowed registers */ -+ *regs_cnt = default_regs_cnt; ++ spin_unlock_irqrestore(&rk_dev->lock, flags); + -+ /* set default virtual addresses of N,NP,T0,T1 registers into N_NP_T0_T1_Reg */ -+ PKA_WRITE((u32)PKA_N_NP_T0_T1_REG_DEFAULT, CRYPTO_N_NP_T0_T1_ADDR); ++ seq_printf(p, "Crypto queue usage [%u/%u], ever_max = %llu, status: %s\n", ++ qlen, max_qlen, rk_dev->stat.ever_queue_max, busy ? "busy" : "idle"); + -+ return 0; ++ seq_puts(p, "\n"); +} + -+static int pka_clear_regs_block(u8 first_reg, u8 regs_cnt) ++static void crypto_show_valid_algo_single(struct seq_file *p, enum alg_type type, ++ struct rk_crypto_algt **algs, u32 algs_num) +{ + u32 i; -+ u32 size_words; -+ int cnt_tmps = 0; -+ u32 user_reg_num = CRYPTO_MAP_REG_NUM - PKA_TMP_REG_CNT; -+ -+ /* calculate size_words of register in words */ -+ size_words = pka_get_len_words(PKA_CALC_LEN_ID); -+ -+ if (first_reg + regs_cnt > user_reg_num) { -+ cnt_tmps = min_t(u8, (regs_cnt + first_reg - user_reg_num), PKA_TMP_REG_CNT); -+ regs_cnt = user_reg_num; -+ } else { -+ cnt_tmps = PKA_TMP_REG_CNT; -+ } ++ struct rk_crypto_algt *tmp_algs; + -+ /* clear ordinary registers */ -+ for (i = first_reg; i < regs_cnt; i++) -+ RK_PKA_CLR(i); ++ seq_printf(p, "\t%s:\n", alg_type2name[type]); + -+ pka_wait_done(); ++ for (i = 0; i < algs_num; i++, algs++) { ++ tmp_algs = *algs; + -+ /* clear PKA temp registers (without PKA operations) */ -+ if (cnt_tmps > 0) { -+ pka_clr_mem(pka_get_map_addr(PKA_T0), size_words); -+ if (cnt_tmps > 1) -+ pka_clr_mem(pka_get_map_addr(PKA_T1), size_words); ++ if (!(tmp_algs->valid_flag) || tmp_algs->type != type) ++ continue; + ++ seq_printf(p, "\t\t%s\n", tmp_algs->name); + } + -+ return 0; ++ seq_puts(p, "\n"); +} + -+static int pka_init(u32 exact_size_words) ++static void crypto_show_valid_algos(struct seq_file *p, struct rk_crypto_soc_data *soc_data) +{ -+ int ret; -+ u32 regs_cnt = 0; -+ u32 calc_size_words = exact_size_words + 1; -+ -+ PKA_CLK_ENABLE(); -+ PKA_RAM_FOR_PKA(); -+ -+ if (exact_size_words > PKA_MAX_CALC_WORDS) -+ return -1; -+ -+ ret = pk_int_len_tbl(exact_size_words, calc_size_words); -+ if (ret) -+ goto exit; -+ -+ ret = pka_int_map_tbl(®s_cnt, calc_size_words); -+ if (ret) -+ goto exit; -+ -+ /* clean PKA data memory */ -+ pka_clear_regs_block(0, regs_cnt - PKA_TMP_REG_CNT); ++ u32 algs_num = 0; ++ struct rk_crypto_algt **algs; + -+ /* clean temp PKA registers 30,31 */ -+ pka_clr_mem(pka_get_map_addr(PKA_T0), calc_size_words); -+ pka_clr_mem(pka_get_map_addr(PKA_T1), calc_size_words); ++ seq_puts(p, "Valid algorithms:\n"); + -+exit: -+ return ret; -+} ++ algs = soc_data->hw_get_algts(&algs_num); ++ if (!algs || algs_num == 0) ++ return; + -+static void pka_finish(void) -+{ -+ RK_PKA_TERMINATE(); -+ PKA_CLK_DISABLE(); ++ crypto_show_valid_algo_single(p, ALG_TYPE_CIPHER, algs, algs_num); ++ crypto_show_valid_algo_single(p, ALG_TYPE_AEAD, algs, algs_num); ++ crypto_show_valid_algo_single(p, ALG_TYPE_HASH, algs, algs_num); ++ crypto_show_valid_algo_single(p, ALG_TYPE_HMAC, algs, algs_num); ++ crypto_show_valid_algo_single(p, ALG_TYPE_ASYM, algs, algs_num); +} + -+static void pka_copy_bn_into_reg(u8 dst_reg, struct rk_bignum *bn) ++static int crypto_show_all(struct seq_file *p, void *v) +{ -+ u32 cur_addr; -+ u32 size_words, bn_words; -+ -+ RK_PKA_TERMINATE(); -+ -+ bn_words = PKA_BIGNUM_WORDS(bn); -+ size_words = pka_get_len_words(PKA_CALC_LEN_ID); -+ cur_addr = pka_get_map_addr(dst_reg); ++ struct rk_crypto_dev *rk_dev = p->private; ++ struct rk_crypto_soc_data *soc_data = rk_dev->soc_data; ++ struct rk_crypto_stat *stat = &rk_dev->stat; + -+ pka_load_data(cur_addr, bn->data, bn_words); -+ cur_addr += PKA_WORDS2BYTES(bn_words); ++ seq_printf(p, "Rockchip Crypto Version: %s\n\n", ++ soc_data->crypto_ver); + -+ pka_clr_mem(cur_addr, size_words - bn_words); -+} ++ seq_printf(p, "use_soft_aes192 : %s\n\n", soc_data->use_soft_aes192 ? "true" : "false"); + -+static int pka_copy_bn_from_reg(struct rk_bignum *bn, u32 size_words, u8 src_reg, bool is_max_poll) -+{ -+ int ret; ++ crypto_show_clock(p, rk_dev->clk_bulks, rk_dev->clks_num); + -+ PKA_WRITE(0, CRYPTO_OPCODE); ++ crypto_show_valid_algos(p, soc_data); + -+ ret = is_max_poll ? pka_max_wait_done() : pka_wait_done(); -+ if (ret) -+ return ret; ++ crypto_show_stat(p, stat); + -+ pka_read_data(pka_get_map_addr(src_reg), bn->data, size_words); ++ crypto_show_queue_info(p, rk_dev); + + return 0; +} + -+/*********** pka_div_bignum function **********************/ -+/** -+ * @brief The function divides long number A*(2^S) by B: -+ * res = A*(2^S) / B, remainder A = A*(2^S) % B. -+ * where: A,B - are numbers of size, which is not grate than, -+ * maximal operands size, -+ * and B > 2^S; -+ * S - exponent of binary factor of A. -+ * ^ - exponentiation operator. -+ * -+ * The function algorithm: -+ * -+ * 1. Let nWords = S/32; nBits = S % 32; -+ * 2. Set res = 0, r_t1 = op_a; -+ * 3. for(i=0; i<=nWords; i++) do: -+ * 3.1. if(i < nWords ) -+ * s1 = 32; -+ * else -+ * s1 = nBits; -+ * 3.2. r_t1 = r_t1 << s1; -+ * 3.3. call PKA_div for calculating the quotient and remainder: -+ * r_t2 = floor(r_t1/op_b) //quotient; -+ * r_t1 = r_t1 % op_b //remainder (is in r_t1 register); -+ * 3.4. res = (res << s1) + r_t2; -+ * end do; -+ * 4. Exit. -+ * -+ * Assuming: -+ * - 5 PKA registers are used: op_a, op_b, res, r_t1, r_t2. -+ * - The registers sizes and mapping tables are set on -+ * default mode according to operands size. -+ * - The PKA clocks are initialized. -+ * NOTE ! Operand op_a shall be overwritten by remainder. -+ * -+ * @param[in] len_id - ID of operation size (modSize+32). -+ * @param[in] op_a - Operand A: virtual register pointer of A. -+ * @param[in] S - exponent of binary factor of A. -+ * @param[in] op_b - Operand B: virtual register pointer of B. -+ * @param[in] res - Virtual register pointer for result quotient. -+ * @param[in] r_t1 - Virtual pointer to remainder. -+ * @param[in] r_t2 - Virtual pointer of temp register. -+ * -+ * @return int - On success 0 is returned: -+ * -+ */ -+static int pka_div_bignum(u8 op_a, u32 s, u8 op_b, u8 res, u8 r_t1, u8 r_t2) ++static int crypto_open(struct inode *inode, struct file *file) +{ -+ u8 s1; -+ u32 i; -+ u32 n_bits, n_words; -+ -+ /* calculate shifting parameters (words and bits ) */ -+ n_words = ((u32)s + 31) / 32; -+ n_bits = (u32)s % 32; -+ -+ /* copy operand op_a (including extra word) into temp reg r_t1 */ -+ RK_PKA_COPY(r_t1, op_a); -+ -+ /* set res = 0 (including extra word) */ -+ RK_PKA_CLR(res); -+ -+ /*----------------------------------------------------*/ -+ /* Step 1. Shifting and dividing loop */ -+ /*----------------------------------------------------*/ -+ for (i = 0; i < n_words; i++) { -+ /* 3.1 set shift value s1 */ -+ s1 = i > 0 ? 32 : n_bits; -+ -+ /* 3.2. shift: r_t1 = r_t1 * 2**s1 (in code (s1-1), -+ * because PKA performs s+1 shifts) -+ */ -+ if (s1 > 0) -+ RK_PKA_SHL0(r_t1 /*op_a*/, (s1 - 1) /*s*/, r_t1 /*res*/); -+ -+ /* 3.3. perform PKA_OPCODE_MOD_DIV for calculating a quotient -+ * r_t2 = floor(r_t1 / N) -+ * and remainder r_t1 = r_t1 % op_b -+ */ -+ RK_PKA_DIV(r_t1 /*op_a*/, op_b /*B*/, r_t2 /*res*/); ++ struct rk_crypto_dev *data = pde_data(inode); + -+ /* 3.4. res = res * 2**s1 + res; */ -+ if (s1 > 0) -+ RK_PKA_SHL0(res /*op_a*/, (s1 - 1) /*s*/, res /*res*/); ++ return single_open(file, crypto_show_all, data); ++} + -+ RK_PKA_ADD(res /*op_a*/, r_t2 /*op_b*/, res /*res*/); -+ } ++static const struct proc_ops ops = { ++ .proc_open = crypto_open, ++ .proc_read = seq_read, ++ .proc_lseek = seq_lseek, ++ .proc_release = single_release, ++}; + -+ pka_wait_done(); ++int rkcrypto_proc_init(struct rk_crypto_dev *rk_dev) ++{ ++ rk_dev->procfs = proc_create_data(rk_dev->name, 0, NULL, &ops, rk_dev); ++ if (!rk_dev->procfs) ++ return -EINVAL; + + return 0; -+} /* END OF pka_div_bignum */ ++} + -+static u32 pka_calc_and_init_np(struct rk_bignum *bn, u8 r_t0, u8 r_t1, u8 r_t2) ++void rkcrypto_proc_cleanup(struct rk_crypto_dev *rk_dev) +{ -+ int ret; -+ u32 i; -+ u32 s; -+ u32 mod_size_bits; -+ u32 num_bits, num_words; -+ -+ /* Set s = 132 */ -+ s = 132; -+ -+ mod_size_bits = PKA_BYTES2BITS(rk_bn_get_size(bn)); ++ if (rk_dev->procfs) ++ remove_proc_entry(rk_dev->name, NULL); + -+ CRYPTO_TRACE("size_bits = %u", mod_size_bits); ++ rk_dev->procfs = NULL; ++} + -+ /* copy modulus N into r0 register */ -+ pka_copy_bn_into_reg(PKA_N, bn); ++#endif /* CONFIG_PROC_FS */ +diff --git a/drivers/crypto/rockchip/procfs.h b/drivers/crypto/rockchip/procfs.h +new file mode 100644 +index 000000000..e491c53b4 +--- /dev/null ++++ b/drivers/crypto/rockchip/procfs.h +@@ -0,0 +1,23 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* Copyright (c) 2022 Rockchip Electronics Co., Ltd. */ + -+ /*--------------------------------------------------------------*/ -+ /* Step 1,2. Set registers: Set op_a = 2^(sizeN+32) */ -+ /* Registers using: 0 - N (is set in register 0, */ -+ /* 1 - NP, temp regs: r_t0 (A), r_t1, r_t2. */ -+ /* len_id: 0 - exact size, 1 - exact+32 bit */ -+ /*--------------------------------------------------------------*/ ++#ifndef _RKCRYPTO_PROCFS_H ++#define _RKCRYPTO_PROCFS_H + -+ /* set register r_t0 = 0 */ -+ RK_PKA_CLR(r_t0); ++#include "rk_crypto_core.h" + -+ /* calculate bit position of said bit in the word */ -+ num_bits = mod_size_bits % 32; -+ num_words = mod_size_bits / 32; ++#ifdef CONFIG_PROC_FS ++int rkcrypto_proc_init(struct rk_crypto_dev *dev); ++void rkcrypto_proc_cleanup(struct rk_crypto_dev *dev); ++#else ++static inline int rkcrypto_proc_init(struct rk_crypto_dev *dev) ++{ ++ return 0; ++} ++static inline void rkcrypto_proc_cleanup(struct rk_crypto_dev *dev) ++{ + -+ CRYPTO_TRACE("num_bits = %u, num_words = %u, size_bits = %u", -+ num_bits, num_words, mod_size_bits); ++} ++#endif + -+ /* set 1 into register r_t0 */ -+ RK_PKA_SET_0(r_t0 /*op_a*/, r_t0 /*res*/); ++#endif +diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c +deleted file mode 100644 +index 77d5705a5..000000000 +--- a/drivers/crypto/rockchip/rk3288_crypto.c ++++ /dev/null +@@ -1,444 +0,0 @@ +-// SPDX-License-Identifier: GPL-2.0-only +-/* +- * Crypto acceleration support for Rockchip RK3288 +- * +- * Copyright (c) 2015, Fuzhou Rockchip Electronics Co., Ltd +- * +- * Author: Zain Wang +- * +- * Some ideas are from marvell-cesa.c and s5p-sss.c driver. +- */ +- +-#include "rk3288_crypto.h" +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-static struct rockchip_ip rocklist = { +- .dev_list = LIST_HEAD_INIT(rocklist.dev_list), +- .lock = __SPIN_LOCK_UNLOCKED(rocklist.lock), +-}; +- +-struct rk_crypto_info *get_rk_crypto(void) +-{ +- struct rk_crypto_info *first; +- +- spin_lock(&rocklist.lock); +- first = list_first_entry_or_null(&rocklist.dev_list, +- struct rk_crypto_info, list); +- list_rotate_left(&rocklist.dev_list); +- spin_unlock(&rocklist.lock); +- return first; +-} +- +-static const struct rk_variant rk3288_variant = { +- .num_clks = 4, +- .rkclks = { +- { "sclk", 150000000}, +- } +-}; +- +-static const struct rk_variant rk3328_variant = { +- .num_clks = 3, +-}; +- +-static const struct rk_variant rk3399_variant = { +- .num_clks = 3, +-}; +- +-static int rk_crypto_get_clks(struct rk_crypto_info *dev) +-{ +- int i, j, err; +- unsigned long cr; +- +- dev->num_clks = devm_clk_bulk_get_all(dev->dev, &dev->clks); +- if (dev->num_clks < dev->variant->num_clks) { +- dev_err(dev->dev, "Missing clocks, got %d instead of %d\n", +- dev->num_clks, dev->variant->num_clks); +- return -EINVAL; +- } +- +- for (i = 0; i < dev->num_clks; i++) { +- cr = clk_get_rate(dev->clks[i].clk); +- for (j = 0; j < ARRAY_SIZE(dev->variant->rkclks); j++) { +- if (dev->variant->rkclks[j].max == 0) +- continue; +- if (strcmp(dev->variant->rkclks[j].name, dev->clks[i].id)) +- continue; +- if (cr > dev->variant->rkclks[j].max) { +- err = clk_set_rate(dev->clks[i].clk, +- dev->variant->rkclks[j].max); +- if (err) +- dev_err(dev->dev, "Fail downclocking %s from %lu to %lu\n", +- dev->variant->rkclks[j].name, cr, +- dev->variant->rkclks[j].max); +- else +- dev_info(dev->dev, "Downclocking %s from %lu to %lu\n", +- dev->variant->rkclks[j].name, cr, +- dev->variant->rkclks[j].max); +- } +- } +- } +- return 0; +-} +- +-static int rk_crypto_enable_clk(struct rk_crypto_info *dev) +-{ +- int err; +- +- err = clk_bulk_prepare_enable(dev->num_clks, dev->clks); +- if (err) +- dev_err(dev->dev, "Could not enable clock clks\n"); +- +- return err; +-} +- +-static void rk_crypto_disable_clk(struct rk_crypto_info *dev) +-{ +- clk_bulk_disable_unprepare(dev->num_clks, dev->clks); +-} +- +-/* +- * Power management strategy: The device is suspended until a request +- * is handled. For avoiding suspend/resume yoyo, the autosuspend is set to 2s. +- */ +-static int rk_crypto_pm_suspend(struct device *dev) +-{ +- struct rk_crypto_info *rkdev = dev_get_drvdata(dev); +- +- rk_crypto_disable_clk(rkdev); +- reset_control_assert(rkdev->rst); +- +- return 0; +-} +- +-static int rk_crypto_pm_resume(struct device *dev) +-{ +- struct rk_crypto_info *rkdev = dev_get_drvdata(dev); +- int ret; +- +- ret = rk_crypto_enable_clk(rkdev); +- if (ret) +- return ret; +- +- reset_control_deassert(rkdev->rst); +- return 0; +- +-} +- +-static const struct dev_pm_ops rk_crypto_pm_ops = { +- SET_RUNTIME_PM_OPS(rk_crypto_pm_suspend, rk_crypto_pm_resume, NULL) +-}; +- +-static int rk_crypto_pm_init(struct rk_crypto_info *rkdev) +-{ +- int err; +- +- pm_runtime_use_autosuspend(rkdev->dev); +- pm_runtime_set_autosuspend_delay(rkdev->dev, 2000); +- +- err = pm_runtime_set_suspended(rkdev->dev); +- if (err) +- return err; +- pm_runtime_enable(rkdev->dev); +- return err; +-} +- +-static void rk_crypto_pm_exit(struct rk_crypto_info *rkdev) +-{ +- pm_runtime_disable(rkdev->dev); +-} +- +-static irqreturn_t rk_crypto_irq_handle(int irq, void *dev_id) +-{ +- struct rk_crypto_info *dev = platform_get_drvdata(dev_id); +- u32 interrupt_status; +- +- interrupt_status = CRYPTO_READ(dev, RK_CRYPTO_INTSTS); +- CRYPTO_WRITE(dev, RK_CRYPTO_INTSTS, interrupt_status); +- +- dev->status = 1; +- if (interrupt_status & 0x0a) { +- dev_warn(dev->dev, "DMA Error\n"); +- dev->status = 0; +- } +- complete(&dev->complete); +- +- return IRQ_HANDLED; +-} +- +-static struct rk_crypto_tmp *rk_cipher_algs[] = { +- &rk_ecb_aes_alg, +- &rk_cbc_aes_alg, +- &rk_ecb_des_alg, +- &rk_cbc_des_alg, +- &rk_ecb_des3_ede_alg, +- &rk_cbc_des3_ede_alg, +- &rk_ahash_sha1, +- &rk_ahash_sha256, +- &rk_ahash_md5, +-}; +- +-static int rk_crypto_debugfs_show(struct seq_file *seq, void *v) +-{ +- struct rk_crypto_info *dd; +- unsigned int i; +- +- spin_lock(&rocklist.lock); +- list_for_each_entry(dd, &rocklist.dev_list, list) { +- seq_printf(seq, "%s %s requests: %lu\n", +- dev_driver_string(dd->dev), dev_name(dd->dev), +- dd->nreq); +- } +- spin_unlock(&rocklist.lock); +- +- for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) { +- if (!rk_cipher_algs[i]->dev) +- continue; +- switch (rk_cipher_algs[i]->type) { +- case CRYPTO_ALG_TYPE_SKCIPHER: +- seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n", +- rk_cipher_algs[i]->alg.skcipher.base.base.cra_driver_name, +- rk_cipher_algs[i]->alg.skcipher.base.base.cra_name, +- rk_cipher_algs[i]->stat_req, rk_cipher_algs[i]->stat_fb); +- seq_printf(seq, "\tfallback due to length: %lu\n", +- rk_cipher_algs[i]->stat_fb_len); +- seq_printf(seq, "\tfallback due to alignment: %lu\n", +- rk_cipher_algs[i]->stat_fb_align); +- seq_printf(seq, "\tfallback due to SGs: %lu\n", +- rk_cipher_algs[i]->stat_fb_sgdiff); +- break; +- case CRYPTO_ALG_TYPE_AHASH: +- seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n", +- rk_cipher_algs[i]->alg.hash.base.halg.base.cra_driver_name, +- rk_cipher_algs[i]->alg.hash.base.halg.base.cra_name, +- rk_cipher_algs[i]->stat_req, rk_cipher_algs[i]->stat_fb); +- break; +- } +- } +- return 0; +-} +- +-DEFINE_SHOW_ATTRIBUTE(rk_crypto_debugfs); +- +-static void register_debugfs(struct rk_crypto_info *crypto_info) +-{ +- struct dentry *dbgfs_dir __maybe_unused; +- struct dentry *dbgfs_stats __maybe_unused; +- +- /* Ignore error of debugfs */ +- dbgfs_dir = debugfs_create_dir("rk3288_crypto", NULL); +- dbgfs_stats = debugfs_create_file("stats", 0444, dbgfs_dir, &rocklist, +- &rk_crypto_debugfs_fops); +- +-#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG +- rocklist.dbgfs_dir = dbgfs_dir; +- rocklist.dbgfs_stats = dbgfs_stats; +-#endif +-} +- +-static int rk_crypto_register(struct rk_crypto_info *crypto_info) +-{ +- unsigned int i, k; +- int err = 0; +- +- for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) { +- rk_cipher_algs[i]->dev = crypto_info; +- switch (rk_cipher_algs[i]->type) { +- case CRYPTO_ALG_TYPE_SKCIPHER: +- dev_info(crypto_info->dev, "Register %s as %s\n", +- rk_cipher_algs[i]->alg.skcipher.base.base.cra_name, +- rk_cipher_algs[i]->alg.skcipher.base.base.cra_driver_name); +- err = crypto_engine_register_skcipher(&rk_cipher_algs[i]->alg.skcipher); +- break; +- case CRYPTO_ALG_TYPE_AHASH: +- dev_info(crypto_info->dev, "Register %s as %s\n", +- rk_cipher_algs[i]->alg.hash.base.halg.base.cra_name, +- rk_cipher_algs[i]->alg.hash.base.halg.base.cra_driver_name); +- err = crypto_engine_register_ahash(&rk_cipher_algs[i]->alg.hash); +- break; +- default: +- dev_err(crypto_info->dev, "unknown algorithm\n"); +- } +- if (err) +- goto err_cipher_algs; +- } +- return 0; +- +-err_cipher_algs: +- for (k = 0; k < i; k++) { +- if (rk_cipher_algs[i]->type == CRYPTO_ALG_TYPE_SKCIPHER) +- crypto_engine_unregister_skcipher(&rk_cipher_algs[k]->alg.skcipher); +- else +- crypto_engine_unregister_ahash(&rk_cipher_algs[i]->alg.hash); +- } +- return err; +-} +- +-static void rk_crypto_unregister(void) +-{ +- unsigned int i; +- +- for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) { +- if (rk_cipher_algs[i]->type == CRYPTO_ALG_TYPE_SKCIPHER) +- crypto_engine_unregister_skcipher(&rk_cipher_algs[i]->alg.skcipher); +- else +- crypto_engine_unregister_ahash(&rk_cipher_algs[i]->alg.hash); +- } +-} +- +-static const struct of_device_id crypto_of_id_table[] = { +- { .compatible = "rockchip,rk3288-crypto", +- .data = &rk3288_variant, +- }, +- { .compatible = "rockchip,rk3328-crypto", +- .data = &rk3328_variant, +- }, +- { .compatible = "rockchip,rk3399-crypto", +- .data = &rk3399_variant, +- }, +- {} +-}; +-MODULE_DEVICE_TABLE(of, crypto_of_id_table); +- +-static int rk_crypto_probe(struct platform_device *pdev) +-{ +- struct device *dev = &pdev->dev; +- struct rk_crypto_info *crypto_info, *first; +- int err = 0; +- +- crypto_info = devm_kzalloc(&pdev->dev, +- sizeof(*crypto_info), GFP_KERNEL); +- if (!crypto_info) { +- err = -ENOMEM; +- goto err_crypto; +- } +- +- crypto_info->dev = &pdev->dev; +- platform_set_drvdata(pdev, crypto_info); +- +- crypto_info->variant = of_device_get_match_data(&pdev->dev); +- if (!crypto_info->variant) { +- dev_err(&pdev->dev, "Missing variant\n"); +- return -EINVAL; +- } +- +- crypto_info->rst = devm_reset_control_array_get_exclusive(dev); +- if (IS_ERR(crypto_info->rst)) { +- err = PTR_ERR(crypto_info->rst); +- goto err_crypto; +- } +- +- reset_control_assert(crypto_info->rst); +- usleep_range(10, 20); +- reset_control_deassert(crypto_info->rst); +- +- crypto_info->reg = devm_platform_ioremap_resource(pdev, 0); +- if (IS_ERR(crypto_info->reg)) { +- err = PTR_ERR(crypto_info->reg); +- goto err_crypto; +- } +- +- err = rk_crypto_get_clks(crypto_info); +- if (err) +- goto err_crypto; +- +- crypto_info->irq = platform_get_irq(pdev, 0); +- if (crypto_info->irq < 0) { +- err = crypto_info->irq; +- goto err_crypto; +- } +- +- err = devm_request_irq(&pdev->dev, crypto_info->irq, +- rk_crypto_irq_handle, IRQF_SHARED, +- "rk-crypto", pdev); +- +- if (err) { +- dev_err(&pdev->dev, "irq request failed.\n"); +- goto err_crypto; +- } +- +- crypto_info->engine = crypto_engine_alloc_init(&pdev->dev, true); +- crypto_engine_start(crypto_info->engine); +- init_completion(&crypto_info->complete); +- +- err = rk_crypto_pm_init(crypto_info); +- if (err) +- goto err_pm; +- +- spin_lock(&rocklist.lock); +- first = list_first_entry_or_null(&rocklist.dev_list, +- struct rk_crypto_info, list); +- list_add_tail(&crypto_info->list, &rocklist.dev_list); +- spin_unlock(&rocklist.lock); +- +- if (!first) { +- err = rk_crypto_register(crypto_info); +- if (err) { +- dev_err(dev, "Fail to register crypto algorithms"); +- goto err_register_alg; +- } +- +- register_debugfs(crypto_info); +- } +- +- return 0; +- +-err_register_alg: +- rk_crypto_pm_exit(crypto_info); +-err_pm: +- crypto_engine_exit(crypto_info->engine); +-err_crypto: +- dev_err(dev, "Crypto Accelerator not successfully registered\n"); +- return err; +-} +- +-static int rk_crypto_remove(struct platform_device *pdev) +-{ +- struct rk_crypto_info *crypto_tmp = platform_get_drvdata(pdev); +- struct rk_crypto_info *first; +- +- spin_lock_bh(&rocklist.lock); +- list_del(&crypto_tmp->list); +- first = list_first_entry_or_null(&rocklist.dev_list, +- struct rk_crypto_info, list); +- spin_unlock_bh(&rocklist.lock); +- +- if (!first) { +-#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG +- debugfs_remove_recursive(rocklist.dbgfs_dir); +-#endif +- rk_crypto_unregister(); +- } +- rk_crypto_pm_exit(crypto_tmp); +- crypto_engine_exit(crypto_tmp->engine); +- return 0; +-} +- +-static struct platform_driver crypto_driver = { +- .probe = rk_crypto_probe, +- .remove = rk_crypto_remove, +- .driver = { +- .name = "rk3288-crypto", +- .pm = &rk_crypto_pm_ops, +- .of_match_table = crypto_of_id_table, +- }, +-}; +- +-module_platform_driver(crypto_driver); +- +-MODULE_AUTHOR("Zain Wang "); +-MODULE_DESCRIPTION("Support for Rockchip's cryptographic engine"); +-MODULE_LICENSE("GPL"); +diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c +deleted file mode 100644 +index 29c953721..000000000 +--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c ++++ /dev/null +@@ -1,470 +0,0 @@ +-// SPDX-License-Identifier: GPL-2.0-only +-/* +- * Crypto acceleration support for Rockchip RK3288 +- * +- * Copyright (c) 2015, Fuzhou Rockchip Electronics Co., Ltd +- * +- * Author: Zain Wang +- * +- * Some ideas are from marvell/cesa.c and s5p-sss.c driver. +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include "rk3288_crypto.h" +- +-/* +- * IC can not process zero message hash, +- * so we put the fixed hash out when met zero message. +- */ +- +-static bool rk_ahash_need_fallback(struct ahash_request *req) +-{ +- struct scatterlist *sg; +- +- sg = req->src; +- while (sg) { +- if (!IS_ALIGNED(sg->offset, sizeof(u32))) { +- return true; +- } +- if (sg->length % 4) { +- return true; +- } +- sg = sg_next(sg); +- } +- return false; +-} +- +-static int rk_ahash_digest_fb(struct ahash_request *areq) +-{ +- struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); +- struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); +- struct rk_ahash_ctx *tfmctx = crypto_ahash_ctx(tfm); +- struct ahash_alg *alg = crypto_ahash_alg(tfm); +- struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash.base); +- +- algt->stat_fb++; +- +- ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm); +- rctx->fallback_req.base.flags = areq->base.flags & +- CRYPTO_TFM_REQ_MAY_SLEEP; +- +- rctx->fallback_req.nbytes = areq->nbytes; +- rctx->fallback_req.src = areq->src; +- rctx->fallback_req.result = areq->result; +- +- return crypto_ahash_digest(&rctx->fallback_req); +-} +- +-static int zero_message_process(struct ahash_request *req) +-{ +- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); +- int rk_digest_size = crypto_ahash_digestsize(tfm); +- +- switch (rk_digest_size) { +- case SHA1_DIGEST_SIZE: +- memcpy(req->result, sha1_zero_message_hash, rk_digest_size); +- break; +- case SHA256_DIGEST_SIZE: +- memcpy(req->result, sha256_zero_message_hash, rk_digest_size); +- break; +- case MD5_DIGEST_SIZE: +- memcpy(req->result, md5_zero_message_hash, rk_digest_size); +- break; +- default: +- return -EINVAL; +- } +- +- return 0; +-} +- +-static void rk_ahash_reg_init(struct ahash_request *req, +- struct rk_crypto_info *dev) +-{ +- struct rk_ahash_rctx *rctx = ahash_request_ctx(req); +- int reg_status; +- +- reg_status = CRYPTO_READ(dev, RK_CRYPTO_CTRL) | +- RK_CRYPTO_HASH_FLUSH | _SBF(0xffff, 16); +- CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, reg_status); +- +- reg_status = CRYPTO_READ(dev, RK_CRYPTO_CTRL); +- reg_status &= (~RK_CRYPTO_HASH_FLUSH); +- reg_status |= _SBF(0xffff, 16); +- CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, reg_status); +- +- memset_io(dev->reg + RK_CRYPTO_HASH_DOUT_0, 0, 32); +- +- CRYPTO_WRITE(dev, RK_CRYPTO_INTENA, RK_CRYPTO_HRDMA_ERR_ENA | +- RK_CRYPTO_HRDMA_DONE_ENA); +- +- CRYPTO_WRITE(dev, RK_CRYPTO_INTSTS, RK_CRYPTO_HRDMA_ERR_INT | +- RK_CRYPTO_HRDMA_DONE_INT); +- +- CRYPTO_WRITE(dev, RK_CRYPTO_HASH_CTRL, rctx->mode | +- RK_CRYPTO_HASH_SWAP_DO); +- +- CRYPTO_WRITE(dev, RK_CRYPTO_CONF, RK_CRYPTO_BYTESWAP_HRFIFO | +- RK_CRYPTO_BYTESWAP_BRFIFO | +- RK_CRYPTO_BYTESWAP_BTFIFO); +- +- CRYPTO_WRITE(dev, RK_CRYPTO_HASH_MSG_LEN, req->nbytes); +-} +- +-static int rk_ahash_init(struct ahash_request *req) +-{ +- struct rk_ahash_rctx *rctx = ahash_request_ctx(req); +- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); +- struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); +- +- ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); +- rctx->fallback_req.base.flags = req->base.flags & +- CRYPTO_TFM_REQ_MAY_SLEEP; +- +- return crypto_ahash_init(&rctx->fallback_req); +-} +- +-static int rk_ahash_update(struct ahash_request *req) +-{ +- struct rk_ahash_rctx *rctx = ahash_request_ctx(req); +- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); +- struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); +- +- ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); +- rctx->fallback_req.base.flags = req->base.flags & +- CRYPTO_TFM_REQ_MAY_SLEEP; +- rctx->fallback_req.nbytes = req->nbytes; +- rctx->fallback_req.src = req->src; +- +- return crypto_ahash_update(&rctx->fallback_req); +-} +- +-static int rk_ahash_final(struct ahash_request *req) +-{ +- struct rk_ahash_rctx *rctx = ahash_request_ctx(req); +- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); +- struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); +- +- ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); +- rctx->fallback_req.base.flags = req->base.flags & +- CRYPTO_TFM_REQ_MAY_SLEEP; +- rctx->fallback_req.result = req->result; +- +- return crypto_ahash_final(&rctx->fallback_req); +-} +- +-static int rk_ahash_finup(struct ahash_request *req) +-{ +- struct rk_ahash_rctx *rctx = ahash_request_ctx(req); +- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); +- struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); +- +- ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); +- rctx->fallback_req.base.flags = req->base.flags & +- CRYPTO_TFM_REQ_MAY_SLEEP; +- +- rctx->fallback_req.nbytes = req->nbytes; +- rctx->fallback_req.src = req->src; +- rctx->fallback_req.result = req->result; +- +- return crypto_ahash_finup(&rctx->fallback_req); +-} +- +-static int rk_ahash_import(struct ahash_request *req, const void *in) +-{ +- struct rk_ahash_rctx *rctx = ahash_request_ctx(req); +- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); +- struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); +- +- ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); +- rctx->fallback_req.base.flags = req->base.flags & +- CRYPTO_TFM_REQ_MAY_SLEEP; +- +- return crypto_ahash_import(&rctx->fallback_req, in); +-} +- +-static int rk_ahash_export(struct ahash_request *req, void *out) +-{ +- struct rk_ahash_rctx *rctx = ahash_request_ctx(req); +- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); +- struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); +- +- ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); +- rctx->fallback_req.base.flags = req->base.flags & +- CRYPTO_TFM_REQ_MAY_SLEEP; +- +- return crypto_ahash_export(&rctx->fallback_req, out); +-} +- +-static int rk_ahash_digest(struct ahash_request *req) +-{ +- struct rk_ahash_rctx *rctx = ahash_request_ctx(req); +- struct rk_crypto_info *dev; +- struct crypto_engine *engine; +- +- if (rk_ahash_need_fallback(req)) +- return rk_ahash_digest_fb(req); +- +- if (!req->nbytes) +- return zero_message_process(req); +- +- dev = get_rk_crypto(); +- +- rctx->dev = dev; +- engine = dev->engine; +- +- return crypto_transfer_hash_request_to_engine(engine, req); +-} +- +-static void crypto_ahash_dma_start(struct rk_crypto_info *dev, struct scatterlist *sg) +-{ +- CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAS, sg_dma_address(sg)); +- CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAL, sg_dma_len(sg) / 4); +- CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, RK_CRYPTO_HASH_START | +- (RK_CRYPTO_HASH_START << 16)); +-} +- +-static int rk_hash_prepare(struct crypto_engine *engine, void *breq) +-{ +- struct ahash_request *areq = container_of(breq, struct ahash_request, base); +- struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); +- struct rk_crypto_info *rkc = rctx->dev; +- int ret; +- +- ret = dma_map_sg(rkc->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE); +- if (ret <= 0) +- return -EINVAL; +- +- rctx->nrsg = ret; +- +- return 0; +-} +- +-static void rk_hash_unprepare(struct crypto_engine *engine, void *breq) +-{ +- struct ahash_request *areq = container_of(breq, struct ahash_request, base); +- struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); +- struct rk_crypto_info *rkc = rctx->dev; +- +- dma_unmap_sg(rkc->dev, areq->src, rctx->nrsg, DMA_TO_DEVICE); +-} +- +-static int rk_hash_run(struct crypto_engine *engine, void *breq) +-{ +- struct ahash_request *areq = container_of(breq, struct ahash_request, base); +- struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); +- struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); +- struct ahash_alg *alg = crypto_ahash_alg(tfm); +- struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash.base); +- struct scatterlist *sg = areq->src; +- struct rk_crypto_info *rkc = rctx->dev; +- int err; +- int i; +- u32 v; +- +- err = pm_runtime_resume_and_get(rkc->dev); +- if (err) +- return err; +- +- err = rk_hash_prepare(engine, breq); +- if (err) +- goto theend; +- +- rctx->mode = 0; +- +- algt->stat_req++; +- rkc->nreq++; +- +- switch (crypto_ahash_digestsize(tfm)) { +- case SHA1_DIGEST_SIZE: +- rctx->mode = RK_CRYPTO_HASH_SHA1; +- break; +- case SHA256_DIGEST_SIZE: +- rctx->mode = RK_CRYPTO_HASH_SHA256; +- break; +- case MD5_DIGEST_SIZE: +- rctx->mode = RK_CRYPTO_HASH_MD5; +- break; +- default: +- err = -EINVAL; +- goto theend; +- } +- +- rk_ahash_reg_init(areq, rkc); +- +- while (sg) { +- reinit_completion(&rkc->complete); +- rkc->status = 0; +- crypto_ahash_dma_start(rkc, sg); +- wait_for_completion_interruptible_timeout(&rkc->complete, +- msecs_to_jiffies(2000)); +- if (!rkc->status) { +- dev_err(rkc->dev, "DMA timeout\n"); +- err = -EFAULT; +- goto theend; +- } +- sg = sg_next(sg); +- } +- +- /* +- * it will take some time to process date after last dma +- * transmission. +- * +- * waiting time is relative with the last date len, +- * so cannot set a fixed time here. +- * 10us makes system not call here frequently wasting +- * efficiency, and make it response quickly when dma +- * complete. +- */ +- readl_poll_timeout(rkc->reg + RK_CRYPTO_HASH_STS, v, v == 0, 10, 1000); +- +- for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++) { +- v = readl(rkc->reg + RK_CRYPTO_HASH_DOUT_0 + i * 4); +- put_unaligned_le32(v, areq->result + i * 4); +- } +- +-theend: +- pm_runtime_put_autosuspend(rkc->dev); +- +- rk_hash_unprepare(engine, breq); +- +- local_bh_disable(); +- crypto_finalize_hash_request(engine, breq, err); +- local_bh_enable(); +- +- return 0; +-} +- +-static int rk_hash_init_tfm(struct crypto_ahash *tfm) +-{ +- struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm); +- const char *alg_name = crypto_ahash_alg_name(tfm); +- struct ahash_alg *alg = crypto_ahash_alg(tfm); +- struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash.base); +- +- /* for fallback */ +- tctx->fallback_tfm = crypto_alloc_ahash(alg_name, 0, +- CRYPTO_ALG_NEED_FALLBACK); +- if (IS_ERR(tctx->fallback_tfm)) { +- dev_err(algt->dev->dev, "Could not load fallback driver.\n"); +- return PTR_ERR(tctx->fallback_tfm); +- } +- +- crypto_ahash_set_reqsize(tfm, +- sizeof(struct rk_ahash_rctx) + +- crypto_ahash_reqsize(tctx->fallback_tfm)); +- +- return 0; +-} +- +-static void rk_hash_exit_tfm(struct crypto_ahash *tfm) +-{ +- struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm); +- +- crypto_free_ahash(tctx->fallback_tfm); +-} +- +-struct rk_crypto_tmp rk_ahash_sha1 = { +- .type = CRYPTO_ALG_TYPE_AHASH, +- .alg.hash.base = { +- .init = rk_ahash_init, +- .update = rk_ahash_update, +- .final = rk_ahash_final, +- .finup = rk_ahash_finup, +- .export = rk_ahash_export, +- .import = rk_ahash_import, +- .digest = rk_ahash_digest, +- .init_tfm = rk_hash_init_tfm, +- .exit_tfm = rk_hash_exit_tfm, +- .halg = { +- .digestsize = SHA1_DIGEST_SIZE, +- .statesize = sizeof(struct sha1_state), +- .base = { +- .cra_name = "sha1", +- .cra_driver_name = "rk-sha1", +- .cra_priority = 300, +- .cra_flags = CRYPTO_ALG_ASYNC | +- CRYPTO_ALG_NEED_FALLBACK, +- .cra_blocksize = SHA1_BLOCK_SIZE, +- .cra_ctxsize = sizeof(struct rk_ahash_ctx), +- .cra_alignmask = 3, +- .cra_module = THIS_MODULE, +- } +- } +- }, +- .alg.hash.op = { +- .do_one_request = rk_hash_run, +- }, +-}; +- +-struct rk_crypto_tmp rk_ahash_sha256 = { +- .type = CRYPTO_ALG_TYPE_AHASH, +- .alg.hash.base = { +- .init = rk_ahash_init, +- .update = rk_ahash_update, +- .final = rk_ahash_final, +- .finup = rk_ahash_finup, +- .export = rk_ahash_export, +- .import = rk_ahash_import, +- .digest = rk_ahash_digest, +- .init_tfm = rk_hash_init_tfm, +- .exit_tfm = rk_hash_exit_tfm, +- .halg = { +- .digestsize = SHA256_DIGEST_SIZE, +- .statesize = sizeof(struct sha256_state), +- .base = { +- .cra_name = "sha256", +- .cra_driver_name = "rk-sha256", +- .cra_priority = 300, +- .cra_flags = CRYPTO_ALG_ASYNC | +- CRYPTO_ALG_NEED_FALLBACK, +- .cra_blocksize = SHA256_BLOCK_SIZE, +- .cra_ctxsize = sizeof(struct rk_ahash_ctx), +- .cra_alignmask = 3, +- .cra_module = THIS_MODULE, +- } +- } +- }, +- .alg.hash.op = { +- .do_one_request = rk_hash_run, +- }, +-}; +- +-struct rk_crypto_tmp rk_ahash_md5 = { +- .type = CRYPTO_ALG_TYPE_AHASH, +- .alg.hash.base = { +- .init = rk_ahash_init, +- .update = rk_ahash_update, +- .final = rk_ahash_final, +- .finup = rk_ahash_finup, +- .export = rk_ahash_export, +- .import = rk_ahash_import, +- .digest = rk_ahash_digest, +- .init_tfm = rk_hash_init_tfm, +- .exit_tfm = rk_hash_exit_tfm, +- .halg = { +- .digestsize = MD5_DIGEST_SIZE, +- .statesize = sizeof(struct md5_state), +- .base = { +- .cra_name = "md5", +- .cra_driver_name = "rk-md5", +- .cra_priority = 300, +- .cra_flags = CRYPTO_ALG_ASYNC | +- CRYPTO_ALG_NEED_FALLBACK, +- .cra_blocksize = SHA1_BLOCK_SIZE, +- .cra_ctxsize = sizeof(struct rk_ahash_ctx), +- .cra_alignmask = 3, +- .cra_module = THIS_MODULE, +- } +- } +- }, +- .alg.hash.op = { +- .do_one_request = rk_hash_run, +- }, +-}; +diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +deleted file mode 100644 +index da95747d9..000000000 +--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c ++++ /dev/null +@@ -1,613 +0,0 @@ +-// SPDX-License-Identifier: GPL-2.0-only +-/* +- * Crypto acceleration support for Rockchip RK3288 +- * +- * Copyright (c) 2015, Fuzhou Rockchip Electronics Co., Ltd +- * +- * Author: Zain Wang +- * +- * Some ideas are from marvell-cesa.c and s5p-sss.c driver. +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include "rk3288_crypto.h" +- +-#define RK_CRYPTO_DEC BIT(0) +- +-static int rk_cipher_need_fallback(struct skcipher_request *req) +-{ +- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); +- struct skcipher_alg *alg = crypto_skcipher_alg(tfm); +- struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher.base); +- struct scatterlist *sgs, *sgd; +- unsigned int stodo, dtodo, len; +- unsigned int bs = crypto_skcipher_blocksize(tfm); +- +- if (!req->cryptlen) +- return true; +- +- len = req->cryptlen; +- sgs = req->src; +- sgd = req->dst; +- while (sgs && sgd) { +- if (!IS_ALIGNED(sgs->offset, sizeof(u32))) { +- algt->stat_fb_align++; +- return true; +- } +- if (!IS_ALIGNED(sgd->offset, sizeof(u32))) { +- algt->stat_fb_align++; +- return true; +- } +- stodo = min(len, sgs->length); +- if (stodo % bs) { +- algt->stat_fb_len++; +- return true; +- } +- dtodo = min(len, sgd->length); +- if (dtodo % bs) { +- algt->stat_fb_len++; +- return true; +- } +- if (stodo != dtodo) { +- algt->stat_fb_sgdiff++; +- return true; +- } +- len -= stodo; +- sgs = sg_next(sgs); +- sgd = sg_next(sgd); +- } +- return false; +-} +- +-static int rk_cipher_fallback(struct skcipher_request *areq) +-{ +- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); +- struct rk_cipher_ctx *op = crypto_skcipher_ctx(tfm); +- struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq); +- struct skcipher_alg *alg = crypto_skcipher_alg(tfm); +- struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher.base); +- int err; +- +- algt->stat_fb++; +- +- skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm); +- skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags, +- areq->base.complete, areq->base.data); +- skcipher_request_set_crypt(&rctx->fallback_req, areq->src, areq->dst, +- areq->cryptlen, areq->iv); +- if (rctx->mode & RK_CRYPTO_DEC) +- err = crypto_skcipher_decrypt(&rctx->fallback_req); +- else +- err = crypto_skcipher_encrypt(&rctx->fallback_req); +- return err; +-} +- +-static int rk_cipher_handle_req(struct skcipher_request *req) +-{ +- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); +- struct rk_crypto_info *rkc; +- struct crypto_engine *engine; +- +- if (rk_cipher_need_fallback(req)) +- return rk_cipher_fallback(req); +- +- rkc = get_rk_crypto(); +- +- engine = rkc->engine; +- rctx->dev = rkc; +- +- return crypto_transfer_skcipher_request_to_engine(engine, req); +-} +- +-static int rk_aes_setkey(struct crypto_skcipher *cipher, +- const u8 *key, unsigned int keylen) +-{ +- struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); +- struct rk_cipher_ctx *ctx = crypto_tfm_ctx(tfm); +- +- if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 && +- keylen != AES_KEYSIZE_256) +- return -EINVAL; +- ctx->keylen = keylen; +- memcpy(ctx->key, key, keylen); +- +- return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); +-} +- +-static int rk_des_setkey(struct crypto_skcipher *cipher, +- const u8 *key, unsigned int keylen) +-{ +- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher); +- int err; +- +- err = verify_skcipher_des_key(cipher, key); +- if (err) +- return err; +- +- ctx->keylen = keylen; +- memcpy(ctx->key, key, keylen); +- +- return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); +-} +- +-static int rk_tdes_setkey(struct crypto_skcipher *cipher, +- const u8 *key, unsigned int keylen) +-{ +- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher); +- int err; +- +- err = verify_skcipher_des3_key(cipher, key); +- if (err) +- return err; +- +- ctx->keylen = keylen; +- memcpy(ctx->key, key, keylen); +- +- return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); +-} +- +-static int rk_aes_ecb_encrypt(struct skcipher_request *req) +-{ +- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); +- +- rctx->mode = RK_CRYPTO_AES_ECB_MODE; +- return rk_cipher_handle_req(req); +-} +- +-static int rk_aes_ecb_decrypt(struct skcipher_request *req) +-{ +- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); +- +- rctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC; +- return rk_cipher_handle_req(req); +-} +- +-static int rk_aes_cbc_encrypt(struct skcipher_request *req) +-{ +- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); +- +- rctx->mode = RK_CRYPTO_AES_CBC_MODE; +- return rk_cipher_handle_req(req); +-} +- +-static int rk_aes_cbc_decrypt(struct skcipher_request *req) +-{ +- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); +- +- rctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC; +- return rk_cipher_handle_req(req); +-} +- +-static int rk_des_ecb_encrypt(struct skcipher_request *req) +-{ +- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); +- +- rctx->mode = 0; +- return rk_cipher_handle_req(req); +-} +- +-static int rk_des_ecb_decrypt(struct skcipher_request *req) +-{ +- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); +- +- rctx->mode = RK_CRYPTO_DEC; +- return rk_cipher_handle_req(req); +-} +- +-static int rk_des_cbc_encrypt(struct skcipher_request *req) +-{ +- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); +- +- rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC; +- return rk_cipher_handle_req(req); +-} +- +-static int rk_des_cbc_decrypt(struct skcipher_request *req) +-{ +- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); +- +- rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC; +- return rk_cipher_handle_req(req); +-} +- +-static int rk_des3_ede_ecb_encrypt(struct skcipher_request *req) +-{ +- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); +- +- rctx->mode = RK_CRYPTO_TDES_SELECT; +- return rk_cipher_handle_req(req); +-} +- +-static int rk_des3_ede_ecb_decrypt(struct skcipher_request *req) +-{ +- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); +- +- rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC; +- return rk_cipher_handle_req(req); +-} +- +-static int rk_des3_ede_cbc_encrypt(struct skcipher_request *req) +-{ +- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); +- +- rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC; +- return rk_cipher_handle_req(req); +-} +- +-static int rk_des3_ede_cbc_decrypt(struct skcipher_request *req) +-{ +- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); +- +- rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC | +- RK_CRYPTO_DEC; +- return rk_cipher_handle_req(req); +-} +- +-static void rk_cipher_hw_init(struct rk_crypto_info *dev, struct skcipher_request *req) +-{ +- struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); +- struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); +- struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); +- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher); +- u32 block, conf_reg = 0; +- +- block = crypto_tfm_alg_blocksize(tfm); +- +- if (block == DES_BLOCK_SIZE) { +- rctx->mode |= RK_CRYPTO_TDES_FIFO_MODE | +- RK_CRYPTO_TDES_BYTESWAP_KEY | +- RK_CRYPTO_TDES_BYTESWAP_IV; +- CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, rctx->mode); +- memcpy_toio(dev->reg + RK_CRYPTO_TDES_KEY1_0, ctx->key, ctx->keylen); +- conf_reg = RK_CRYPTO_DESSEL; +- } else { +- rctx->mode |= RK_CRYPTO_AES_FIFO_MODE | +- RK_CRYPTO_AES_KEY_CHANGE | +- RK_CRYPTO_AES_BYTESWAP_KEY | +- RK_CRYPTO_AES_BYTESWAP_IV; +- if (ctx->keylen == AES_KEYSIZE_192) +- rctx->mode |= RK_CRYPTO_AES_192BIT_key; +- else if (ctx->keylen == AES_KEYSIZE_256) +- rctx->mode |= RK_CRYPTO_AES_256BIT_key; +- CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, rctx->mode); +- memcpy_toio(dev->reg + RK_CRYPTO_AES_KEY_0, ctx->key, ctx->keylen); +- } +- conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO | +- RK_CRYPTO_BYTESWAP_BRFIFO; +- CRYPTO_WRITE(dev, RK_CRYPTO_CONF, conf_reg); +- CRYPTO_WRITE(dev, RK_CRYPTO_INTENA, +- RK_CRYPTO_BCDMA_ERR_ENA | RK_CRYPTO_BCDMA_DONE_ENA); +-} +- +-static void crypto_dma_start(struct rk_crypto_info *dev, +- struct scatterlist *sgs, +- struct scatterlist *sgd, unsigned int todo) +-{ +- CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAS, sg_dma_address(sgs)); +- CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAL, todo); +- CRYPTO_WRITE(dev, RK_CRYPTO_BTDMAS, sg_dma_address(sgd)); +- CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, RK_CRYPTO_BLOCK_START | +- _SBF(RK_CRYPTO_BLOCK_START, 16)); +-} +- +-static int rk_cipher_run(struct crypto_engine *engine, void *async_req) +-{ +- struct skcipher_request *areq = container_of(async_req, struct skcipher_request, base); +- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); +- struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq); +- struct scatterlist *sgs, *sgd; +- int err = 0; +- int ivsize = crypto_skcipher_ivsize(tfm); +- int offset; +- u8 iv[AES_BLOCK_SIZE]; +- u8 biv[AES_BLOCK_SIZE]; +- u8 *ivtouse = areq->iv; +- unsigned int len = areq->cryptlen; +- unsigned int todo; +- struct skcipher_alg *alg = crypto_skcipher_alg(tfm); +- struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher.base); +- struct rk_crypto_info *rkc = rctx->dev; +- +- err = pm_runtime_resume_and_get(rkc->dev); +- if (err) +- return err; +- +- algt->stat_req++; +- rkc->nreq++; +- +- ivsize = crypto_skcipher_ivsize(tfm); +- if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) { +- if (rctx->mode & RK_CRYPTO_DEC) { +- offset = areq->cryptlen - ivsize; +- scatterwalk_map_and_copy(rctx->backup_iv, areq->src, +- offset, ivsize, 0); +- } +- } +- +- sgs = areq->src; +- sgd = areq->dst; +- +- while (sgs && sgd && len) { +- if (!sgs->length) { +- sgs = sg_next(sgs); +- sgd = sg_next(sgd); +- continue; +- } +- if (rctx->mode & RK_CRYPTO_DEC) { +- /* we backup last block of source to be used as IV at next step */ +- offset = sgs->length - ivsize; +- scatterwalk_map_and_copy(biv, sgs, offset, ivsize, 0); +- } +- if (sgs == sgd) { +- err = dma_map_sg(rkc->dev, sgs, 1, DMA_BIDIRECTIONAL); +- if (err <= 0) { +- err = -EINVAL; +- goto theend_iv; +- } +- } else { +- err = dma_map_sg(rkc->dev, sgs, 1, DMA_TO_DEVICE); +- if (err <= 0) { +- err = -EINVAL; +- goto theend_iv; +- } +- err = dma_map_sg(rkc->dev, sgd, 1, DMA_FROM_DEVICE); +- if (err <= 0) { +- err = -EINVAL; +- goto theend_sgs; +- } +- } +- err = 0; +- rk_cipher_hw_init(rkc, areq); +- if (ivsize) { +- if (ivsize == DES_BLOCK_SIZE) +- memcpy_toio(rkc->reg + RK_CRYPTO_TDES_IV_0, ivtouse, ivsize); +- else +- memcpy_toio(rkc->reg + RK_CRYPTO_AES_IV_0, ivtouse, ivsize); +- } +- reinit_completion(&rkc->complete); +- rkc->status = 0; +- +- todo = min(sg_dma_len(sgs), len); +- len -= todo; +- crypto_dma_start(rkc, sgs, sgd, todo / 4); +- wait_for_completion_interruptible_timeout(&rkc->complete, +- msecs_to_jiffies(2000)); +- if (!rkc->status) { +- dev_err(rkc->dev, "DMA timeout\n"); +- err = -EFAULT; +- goto theend; +- } +- if (sgs == sgd) { +- dma_unmap_sg(rkc->dev, sgs, 1, DMA_BIDIRECTIONAL); +- } else { +- dma_unmap_sg(rkc->dev, sgs, 1, DMA_TO_DEVICE); +- dma_unmap_sg(rkc->dev, sgd, 1, DMA_FROM_DEVICE); +- } +- if (rctx->mode & RK_CRYPTO_DEC) { +- memcpy(iv, biv, ivsize); +- ivtouse = iv; +- } else { +- offset = sgd->length - ivsize; +- scatterwalk_map_and_copy(iv, sgd, offset, ivsize, 0); +- ivtouse = iv; +- } +- sgs = sg_next(sgs); +- sgd = sg_next(sgd); +- } +- +- if (areq->iv && ivsize > 0) { +- offset = areq->cryptlen - ivsize; +- if (rctx->mode & RK_CRYPTO_DEC) { +- memcpy(areq->iv, rctx->backup_iv, ivsize); +- memzero_explicit(rctx->backup_iv, ivsize); +- } else { +- scatterwalk_map_and_copy(areq->iv, areq->dst, offset, +- ivsize, 0); +- } +- } +- +-theend: +- pm_runtime_put_autosuspend(rkc->dev); +- +- local_bh_disable(); +- crypto_finalize_skcipher_request(engine, areq, err); +- local_bh_enable(); +- return 0; +- +-theend_sgs: +- if (sgs == sgd) { +- dma_unmap_sg(rkc->dev, sgs, 1, DMA_BIDIRECTIONAL); +- } else { +- dma_unmap_sg(rkc->dev, sgs, 1, DMA_TO_DEVICE); +- dma_unmap_sg(rkc->dev, sgd, 1, DMA_FROM_DEVICE); +- } +-theend_iv: +- return err; +-} +- +-static int rk_cipher_tfm_init(struct crypto_skcipher *tfm) +-{ +- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); +- const char *name = crypto_tfm_alg_name(&tfm->base); +- struct skcipher_alg *alg = crypto_skcipher_alg(tfm); +- struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher.base); +- +- ctx->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK); +- if (IS_ERR(ctx->fallback_tfm)) { +- dev_err(algt->dev->dev, "ERROR: Cannot allocate fallback for %s %ld\n", +- name, PTR_ERR(ctx->fallback_tfm)); +- return PTR_ERR(ctx->fallback_tfm); +- } +- +- tfm->reqsize = sizeof(struct rk_cipher_rctx) + +- crypto_skcipher_reqsize(ctx->fallback_tfm); +- +- return 0; +-} +- +-static void rk_cipher_tfm_exit(struct crypto_skcipher *tfm) +-{ +- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); +- +- memzero_explicit(ctx->key, ctx->keylen); +- crypto_free_skcipher(ctx->fallback_tfm); +-} +- +-struct rk_crypto_tmp rk_ecb_aes_alg = { +- .type = CRYPTO_ALG_TYPE_SKCIPHER, +- .alg.skcipher.base = { +- .base.cra_name = "ecb(aes)", +- .base.cra_driver_name = "ecb-aes-rk", +- .base.cra_priority = 300, +- .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, +- .base.cra_blocksize = AES_BLOCK_SIZE, +- .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), +- .base.cra_alignmask = 0x0f, +- .base.cra_module = THIS_MODULE, +- +- .init = rk_cipher_tfm_init, +- .exit = rk_cipher_tfm_exit, +- .min_keysize = AES_MIN_KEY_SIZE, +- .max_keysize = AES_MAX_KEY_SIZE, +- .setkey = rk_aes_setkey, +- .encrypt = rk_aes_ecb_encrypt, +- .decrypt = rk_aes_ecb_decrypt, +- }, +- .alg.skcipher.op = { +- .do_one_request = rk_cipher_run, +- }, +-}; +- +-struct rk_crypto_tmp rk_cbc_aes_alg = { +- .type = CRYPTO_ALG_TYPE_SKCIPHER, +- .alg.skcipher.base = { +- .base.cra_name = "cbc(aes)", +- .base.cra_driver_name = "cbc-aes-rk", +- .base.cra_priority = 300, +- .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, +- .base.cra_blocksize = AES_BLOCK_SIZE, +- .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), +- .base.cra_alignmask = 0x0f, +- .base.cra_module = THIS_MODULE, +- +- .init = rk_cipher_tfm_init, +- .exit = rk_cipher_tfm_exit, +- .min_keysize = AES_MIN_KEY_SIZE, +- .max_keysize = AES_MAX_KEY_SIZE, +- .ivsize = AES_BLOCK_SIZE, +- .setkey = rk_aes_setkey, +- .encrypt = rk_aes_cbc_encrypt, +- .decrypt = rk_aes_cbc_decrypt, +- }, +- .alg.skcipher.op = { +- .do_one_request = rk_cipher_run, +- }, +-}; +- +-struct rk_crypto_tmp rk_ecb_des_alg = { +- .type = CRYPTO_ALG_TYPE_SKCIPHER, +- .alg.skcipher.base = { +- .base.cra_name = "ecb(des)", +- .base.cra_driver_name = "ecb-des-rk", +- .base.cra_priority = 300, +- .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, +- .base.cra_blocksize = DES_BLOCK_SIZE, +- .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), +- .base.cra_alignmask = 0x07, +- .base.cra_module = THIS_MODULE, +- +- .init = rk_cipher_tfm_init, +- .exit = rk_cipher_tfm_exit, +- .min_keysize = DES_KEY_SIZE, +- .max_keysize = DES_KEY_SIZE, +- .setkey = rk_des_setkey, +- .encrypt = rk_des_ecb_encrypt, +- .decrypt = rk_des_ecb_decrypt, +- }, +- .alg.skcipher.op = { +- .do_one_request = rk_cipher_run, +- }, +-}; +- +-struct rk_crypto_tmp rk_cbc_des_alg = { +- .type = CRYPTO_ALG_TYPE_SKCIPHER, +- .alg.skcipher.base = { +- .base.cra_name = "cbc(des)", +- .base.cra_driver_name = "cbc-des-rk", +- .base.cra_priority = 300, +- .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, +- .base.cra_blocksize = DES_BLOCK_SIZE, +- .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), +- .base.cra_alignmask = 0x07, +- .base.cra_module = THIS_MODULE, +- +- .init = rk_cipher_tfm_init, +- .exit = rk_cipher_tfm_exit, +- .min_keysize = DES_KEY_SIZE, +- .max_keysize = DES_KEY_SIZE, +- .ivsize = DES_BLOCK_SIZE, +- .setkey = rk_des_setkey, +- .encrypt = rk_des_cbc_encrypt, +- .decrypt = rk_des_cbc_decrypt, +- }, +- .alg.skcipher.op = { +- .do_one_request = rk_cipher_run, +- }, +-}; +- +-struct rk_crypto_tmp rk_ecb_des3_ede_alg = { +- .type = CRYPTO_ALG_TYPE_SKCIPHER, +- .alg.skcipher.base = { +- .base.cra_name = "ecb(des3_ede)", +- .base.cra_driver_name = "ecb-des3-ede-rk", +- .base.cra_priority = 300, +- .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, +- .base.cra_blocksize = DES_BLOCK_SIZE, +- .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), +- .base.cra_alignmask = 0x07, +- .base.cra_module = THIS_MODULE, +- +- .init = rk_cipher_tfm_init, +- .exit = rk_cipher_tfm_exit, +- .min_keysize = DES3_EDE_KEY_SIZE, +- .max_keysize = DES3_EDE_KEY_SIZE, +- .setkey = rk_tdes_setkey, +- .encrypt = rk_des3_ede_ecb_encrypt, +- .decrypt = rk_des3_ede_ecb_decrypt, +- }, +- .alg.skcipher.op = { +- .do_one_request = rk_cipher_run, +- }, +-}; +- +-struct rk_crypto_tmp rk_cbc_des3_ede_alg = { +- .type = CRYPTO_ALG_TYPE_SKCIPHER, +- .alg.skcipher.base = { +- .base.cra_name = "cbc(des3_ede)", +- .base.cra_driver_name = "cbc-des3-ede-rk", +- .base.cra_priority = 300, +- .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, +- .base.cra_blocksize = DES_BLOCK_SIZE, +- .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), +- .base.cra_alignmask = 0x07, +- .base.cra_module = THIS_MODULE, +- +- .init = rk_cipher_tfm_init, +- .exit = rk_cipher_tfm_exit, +- .min_keysize = DES3_EDE_KEY_SIZE, +- .max_keysize = DES3_EDE_KEY_SIZE, +- .ivsize = DES_BLOCK_SIZE, +- .setkey = rk_tdes_setkey, +- .encrypt = rk_des3_ede_cbc_encrypt, +- .decrypt = rk_des3_ede_cbc_decrypt, +- }, +- .alg.skcipher.op = { +- .do_one_request = rk_cipher_run, +- }, +-}; +diff --git a/drivers/crypto/rockchip/rk_crypto_ahash_utils.c b/drivers/crypto/rockchip/rk_crypto_ahash_utils.c +new file mode 100644 +index 000000000..495c55485 +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_ahash_utils.c +@@ -0,0 +1,450 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Rockchip crypto hash uitls ++ * ++ * Copyright (c) 2022, Rockchip Electronics Co., Ltd ++ * ++ * Author: Lin Jinhan ++ * ++ */ + -+ /* shift 1 to num_bits+31 position */ -+ if (num_bits > 0) -+ RK_PKA_SHL0(r_t0 /*op_a*/, num_bits - 1 /*s*/, r_t0 /*res*/); ++#include "rk_crypto_core.h" ++#include "rk_crypto_ahash_utils.h" + -+ /* shift to word position */ -+ for (i = 0; i < num_words; i++) -+ RK_PKA_SHL0(r_t0 /*op_a*/, 31 /*s*/, r_t0 /*res*/); ++static const char * const hash_algo2name[] = { ++ [HASH_ALGO_MD5] = "md5", ++ [HASH_ALGO_SHA1] = "sha1", ++ [HASH_ALGO_SHA224] = "sha224", ++ [HASH_ALGO_SHA256] = "sha256", ++ [HASH_ALGO_SHA384] = "sha384", ++ [HASH_ALGO_SHA512] = "sha512", ++ [HASH_ALGO_SM3] = "sm3", ++}; + -+ /*--------------------------------------------------------------*/ -+ /* Step 3. Dividing: PKA_NP = (r_t0 * 2**s) / N */ -+ /*--------------------------------------------------------------*/ -+ ret = pka_div_bignum(r_t0, s, PKA_N, PKA_NP, r_t1, r_t2); ++static void rk_alg_ctx_clear(struct rk_alg_ctx *alg_ctx) ++{ ++ alg_ctx->total = 0; ++ alg_ctx->left_bytes = 0; ++ alg_ctx->count = 0; ++ alg_ctx->sg_src = 0; ++ alg_ctx->req_src = 0; ++ alg_ctx->src_nents = 0; ++} + -+ return ret; -+} /* END OF pka_calc_and_init_np */ ++static void rk_ahash_ctx_clear(struct rk_ahash_ctx *ctx) ++{ ++ rk_alg_ctx_clear(&ctx->algs_ctx); + -+/********************* Public Function Definition ****************************/ ++ memset(ctx->hash_tmp, 0x00, RK_DMA_ALIGNMENT); ++ memset(ctx->lastc, 0x00, sizeof(ctx->lastc)); + -+void rk_pka_set_crypto_base(void __iomem *base) ++ ctx->hash_tmp_len = 0; ++ ctx->calc_cnt = 0; ++ ctx->lastc_len = 0; ++} ++ ++struct rk_ahash_ctx *rk_ahash_ctx_cast(struct rk_crypto_dev *rk_dev) +{ -+ pka_base = base; ++ struct ahash_request *req = ahash_request_cast(rk_dev->async_req); ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ ++ return crypto_ahash_ctx(tfm); +} + -+/** -+ * @brief calculate exp mod. out = in ^ e mod n -+ * @param in: the point of input data bignum. -+ * @param e: the point of exponent bignum. -+ * @param n: the point of modulus bignum. -+ * @param out: the point of outputs bignum. -+ * @param pTmp: the point of tmpdata bignum. -+ * @return 0 for success -+ */ -+int rk_pka_expt_mod(struct rk_bignum *in, -+ struct rk_bignum *e, -+ struct rk_bignum *n, -+ struct rk_bignum *out) ++struct rk_alg_ctx *rk_ahash_alg_ctx(struct rk_crypto_dev *rk_dev) +{ -+ int ret = -1; -+ u32 max_word_size; -+ bool is_max_poll; -+ u8 r_in = 2, r_e = 3, r_out = 4; -+ u8 r_t0 = 2, r_t1 = 3, r_t2 = 4; ++ return &(rk_ahash_ctx_cast(rk_dev))->algs_ctx; ++} + -+ if (!in || !e || !n || !out || PKA_BIGNUM_WORDS(n) == 0) -+ return -1; ++struct rk_crypto_algt *rk_ahash_get_algt(struct crypto_ahash *tfm) ++{ ++ struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg); + -+ max_word_size = PKA_BIGNUM_WORDS(n); ++ return container_of(alg, struct rk_crypto_algt, alg.hash); ++} + -+ ret = pka_init(max_word_size); -+ if (ret) { -+ CRYPTO_TRACE("pka_init error\n"); -+ goto exit; ++static int rk_ahash_set_data_start(struct rk_crypto_dev *rk_dev, uint32_t flag) ++{ ++ int err; ++ struct rk_alg_ctx *alg_ctx = rk_ahash_alg_ctx(rk_dev); ++ ++ CRYPTO_TRACE(); ++ ++ err = rk_dev->load_data(rk_dev, alg_ctx->sg_src, alg_ctx->sg_dst); ++ if (!err) ++ err = alg_ctx->ops.hw_dma_start(rk_dev, flag); ++ ++ return err; ++} ++ ++static u32 rk_calc_lastc_new_len(u32 nbytes, u32 old_len) ++{ ++ u32 total_len = nbytes + old_len; ++ ++ if (total_len <= RK_DMA_ALIGNMENT) ++ return nbytes; ++ ++ if (total_len % RK_DMA_ALIGNMENT) ++ return total_len % RK_DMA_ALIGNMENT; ++ ++ return RK_DMA_ALIGNMENT; ++} ++ ++static int rk_ahash_fallback_digest(const char *alg_name, bool is_hmac, ++ const u8 *key, u32 key_len, ++ const u8 *msg, u32 msg_len, ++ u8 *digest) ++{ ++ struct crypto_ahash *ahash_tfm; ++ struct ahash_request *req; ++ struct crypto_wait wait; ++ struct scatterlist sg; ++ int ret; ++ ++ CRYPTO_TRACE("%s, is_hmac = %d, key_len = %u, msg_len = %u", ++ alg_name, is_hmac, key_len, msg_len); ++ ++ ahash_tfm = crypto_alloc_ahash(alg_name, 0, CRYPTO_ALG_NEED_FALLBACK); ++ if (IS_ERR(ahash_tfm)) ++ return PTR_ERR(ahash_tfm); ++ ++ req = ahash_request_alloc(ahash_tfm, GFP_KERNEL); ++ if (!req) { ++ crypto_free_ahash(ahash_tfm); ++ return -ENOMEM; + } + -+ /* calculate NP by initialization PKA for modular operations */ -+ ret = pka_calc_and_init_np(n, r_t0, r_t1, r_t2); ++ init_completion(&wait.completion); ++ ++ ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, ++ crypto_req_done, &wait); ++ ++ crypto_ahash_clear_flags(ahash_tfm, ~0); ++ ++ sg_init_one(&sg, msg, msg_len); ++ ahash_request_set_crypt(req, &sg, digest, msg_len); ++ ++ if (is_hmac) ++ crypto_ahash_setkey(ahash_tfm, key, key_len); ++ ++ ret = crypto_wait_req(crypto_ahash_digest(req), &wait); + if (ret) { -+ CRYPTO_TRACE("pka_calc_and_init_np error\n"); ++ CRYPTO_MSG("digest failed, ret = %d", ret); + goto exit; + } + -+ pka_clear_regs_block(r_in, 3); ++exit: ++ ahash_request_free(req); ++ crypto_free_ahash(ahash_tfm); + -+ pka_copy_bn_into_reg(r_in, in); -+ pka_copy_bn_into_reg(r_e, e); -+ pka_copy_bn_into_reg(PKA_N, n); ++ return ret; ++} + -+ ret = RK_PKA_MOD_EXP(r_in, r_e, r_out); -+ if (ret) { -+ CRYPTO_TRACE("RK_PKA_MOD_EXP error\n"); ++static int rk_ahash_get_zero_result(struct ahash_request *req) ++{ ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ struct rk_crypto_algt *algt = rk_ahash_get_algt(tfm); ++ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); ++ ++ return rk_ahash_fallback_digest(crypto_ahash_alg_name(tfm), ++ algt->type == ALG_TYPE_HMAC, ++ ctx->authkey, ctx->authkey_len, ++ NULL, 0, req->result); ++} ++ ++int rk_ahash_hmac_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen) ++{ ++ unsigned int blocksize = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); ++ struct rk_crypto_algt *algt = rk_ahash_get_algt(tfm); ++ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); ++ const char *alg_name; ++ int ret = 0; ++ ++ CRYPTO_MSG(); ++ ++ if (algt->algo >= ARRAY_SIZE(hash_algo2name)) { ++ CRYPTO_MSG("hash algo %d invalid\n", algt->algo); ++ return -EINVAL; ++ } ++ ++ memset(ctx->authkey, 0, sizeof(ctx->authkey)); ++ ++ if (keylen <= blocksize) { ++ memcpy(ctx->authkey, key, keylen); ++ ctx->authkey_len = keylen; + goto exit; + } + -+ /* e is usually 0x10001 in public key EXP_MOD operation */ -+ is_max_poll = rk_bn_highest_bit(e) * 2 > rk_bn_highest_bit(n) ? true : false; ++ alg_name = hash_algo2name[algt->algo]; + -+ ret = pka_copy_bn_from_reg(out, max_word_size, r_out, is_max_poll); ++ CRYPTO_TRACE("calc key digest %s", alg_name); + -+exit: -+ pka_clear_regs_block(0, 5); -+ pka_clear_regs_block(30, 2); -+ pka_finish(); ++ ret = rk_ahash_fallback_digest(alg_name, false, NULL, 0, key, keylen, ++ ctx->authkey); ++ if (ret) { ++ CRYPTO_MSG("rk_ahash_fallback_digest error ret = %d\n", ret); ++ goto exit; ++ } + ++ ctx->authkey_len = crypto_ahash_digestsize(tfm); ++exit: + return ret; +} -diff --git a/drivers/crypto/rockchip/rk_crypto_v2_pka.h b/drivers/crypto/rockchip/rk_crypto_v2_pka.h -new file mode 100644 -index 000000000..3c0b236f9 ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_v2_pka.h -@@ -0,0 +1,17 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ + -+/* Copyright (c) 2022 Rockchip Electronics Co. Ltd. */ ++int rk_ahash_init(struct ahash_request *req) ++{ ++ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); + -+#ifndef __RK_CRYPTO_V2_PKA_H__ -+#define __RK_CRYPTO_V2_PKA_H__ ++ CRYPTO_TRACE(); + -+#include "rk_crypto_bignum.h" ++ memset(rctx, 0x00, sizeof(*rctx)); ++ rk_ahash_ctx_clear(ctx); + -+void rk_pka_set_crypto_base(void __iomem *base); ++ return 0; ++} + -+int rk_pka_expt_mod(struct rk_bignum *in, -+ struct rk_bignum *e, -+ struct rk_bignum *n, -+ struct rk_bignum *out); ++int rk_ahash_update(struct ahash_request *req) ++{ ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); ++ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); ++ struct rk_crypto_dev *rk_dev = ctx->rk_dev; + -+#endif -diff --git a/drivers/crypto/rockchip/rk_crypto_v2_reg.h b/drivers/crypto/rockchip/rk_crypto_v2_reg.h -new file mode 100644 -index 000000000..a938ce3f6 ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_v2_reg.h -@@ -0,0 +1,378 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ ++ CRYPTO_TRACE("nbytes = %u", req->nbytes); + -+/* Copyright (c) 2018 Rockchip Electronics Co. Ltd. */ ++ memset(rctx, 0x00, sizeof(*rctx)); + -+#ifndef __RK_CRYPTO_V2_REG_H__ -+#define __RK_CRYPTO_V2_REG_H__ ++ rctx->flag = RK_FLAG_UPDATE; + -+#define _SBF(s, v) ((v) << (s)) ++ return rk_dev->enqueue(rk_dev, &req->base); ++} + -+#define CRYPTO_WRITE_MASK_SHIFT (16) -+#define CRYPTO_WRITE_MASK_ALL ((0xffffu << CRYPTO_WRITE_MASK_SHIFT)) ++int rk_ahash_final(struct ahash_request *req) ++{ ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); ++ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); ++ struct rk_crypto_dev *rk_dev = ctx->rk_dev; + -+#define WRITE_MASK (16) ++ CRYPTO_TRACE(); + -+/* Crypto control registers*/ -+#define CRYPTO_CLK_CTL 0x0000 -+#define CRYPTO_AUTO_CLKGATE_EN BIT(0) ++ memset(rctx, 0x00, sizeof(*rctx)); + -+#define CRYPTO_RST_CTL 0x0004 -+#define CRYPTO_SW_PKA_RESET BIT(2) -+#define CRYPTO_SW_RNG_RESET BIT(1) -+#define CRYPTO_SW_CC_RESET BIT(0) ++ rctx->flag = RK_FLAG_FINAL; + -+/* Crypto DMA control registers*/ -+#define CRYPTO_DMA_INT_EN 0x0008 -+#define CRYPTO_ZERO_ERR_INT_EN BIT(6) -+#define CRYPTO_LIST_ERR_INT_EN BIT(5) -+#define CRYPTO_SRC_ERR_INT_EN BIT(4) -+#define CRYPTO_DST_ERR_INT_EN BIT(3) -+#define CRYPTO_SRC_ITEM_INT_EN BIT(2) -+#define CRYPTO_DST_ITEM_DONE_INT_EN BIT(1) -+#define CRYPTO_LIST_DONE_INT_EN BIT(0) ++ /* use fallback hash */ ++ if (ctx->calc_cnt == 0 && ++ ctx->hash_tmp_len == 0 && ++ ctx->lastc_len == 0) { ++ CRYPTO_TRACE("use fallback hash"); ++ return rk_ahash_get_zero_result(req); ++ } + -+#define CRYPTO_DMA_INT_ST 0x000C -+#define CRYPTO_LOCKSTEP_INT_ST BIT(7) -+#define CRYPTO_ZERO_LEN_INT_ST BIT(6) -+#define CRYPTO_LIST_ERR_INT_ST BIT(5) -+#define CRYPTO_SRC_ERR_INT_ST BIT(4) -+#define CRYPTO_DST_ERR_INT_ST BIT(3) -+#define CRYPTO_SRC_ITEM_DONE_INT_ST BIT(2) -+#define CRYPTO_DST_ITEM_DONE_INT_ST BIT(1) -+#define CRYPTO_LIST_DONE_INT_ST BIT(0) ++ return rk_dev->enqueue(rk_dev, &req->base); ++} + -+#define CRYPTO_LOCKSTEP_MASK (~((u32)CRYPTO_LOCKSTEP_INT_ST)) -+#define CRYPTO_DMA_CTL 0x0010 -+#define CRYPTO_DMA_RESTART BIT(1) -+#define CRYPTO_DMA_START BIT(0) ++int rk_ahash_finup(struct ahash_request *req) ++{ ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); ++ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); ++ struct rk_crypto_dev *rk_dev = ctx->rk_dev; + -+/* DMA LIST Start Address Register */ -+#define CRYPTO_DMA_LLI_ADDR 0x0014 ++ CRYPTO_TRACE("nbytes = %u", req->nbytes); + -+#define CRYPTO_DMA_ST 0x0018 -+#define CRYPTO_DMA_BUSY BIT(0) ++ memset(rctx, 0x00, sizeof(*rctx)); + -+#define CRYPTO_DMA_STATE 0x001C -+#define CRYPTO_LLI_IDLE_STATE _SBF(4, 0x00) -+#define CRYPTO_LLI_FETCH_STATE _SBF(4, 0x01) -+#define CRYPTO_LLI_WORK_STATE _SBF(4, 0x02) -+#define CRYPTO_SRC_IDLE_STATE _SBF(2, 0x00) -+#define CRYPTO_SRC_LOAD_STATE _SBF(2, 0x01) -+#define CRYPTO_SRC_WORK_STATE _SBF(2, 0x02) -+#define CRYPTO_DST_IDLE_STATE _SBF(0, 0x00) -+#define CRYPTO_DST_LOAD_STATE _SBF(0, 0x01) -+#define CRYPTO_DST_WORK_STATE _SBF(0, 0x02) ++ rctx->flag = RK_FLAG_UPDATE | RK_FLAG_FINAL; + -+/* DMA LLI Read Address Register */ -+#define CRYPTO_DMA_LLI_RADDR 0x0020 ++ /* use fallback hash */ ++ if (req->nbytes == 0 && ++ ctx->calc_cnt == 0 && ++ ctx->hash_tmp_len == 0 && ++ ctx->lastc_len == 0) { ++ CRYPTO_TRACE("use fallback hash"); ++ return rk_ahash_get_zero_result(req); ++ } + -+/* DMA Source Data Read Address Register */ -+#define CRYPTO_DMA_SRC_RADDR 0x0024 ++ return rk_dev->enqueue(rk_dev, &req->base); ++} + -+/* DMA Destination Data Read Address Register */ -+#define CRYPTO_DMA_DST_RADDR 0x0028 ++int rk_ahash_digest(struct ahash_request *req) ++{ ++ CRYPTO_TRACE("calc data %u bytes.", req->nbytes); + -+#define CRYPTO_DMA_ITEM_ID 0x002C ++ return rk_ahash_init(req) ?: rk_ahash_finup(req); ++} + -+#define CRYPTO_FIFO_CTL 0x0040 -+#define CRYPTO_DOUT_BYTESWAP BIT(1) -+#define CRYPTO_DOIN_BYTESWAP BIT(0) ++int rk_ahash_start(struct rk_crypto_dev *rk_dev) ++{ ++ struct ahash_request *req = ahash_request_cast(rk_dev->async_req); ++ struct rk_alg_ctx *alg_ctx = rk_ahash_alg_ctx(rk_dev); ++ struct rk_ahash_ctx *ctx = rk_ahash_ctx_cast(rk_dev); ++ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ struct rk_crypto_algt *algt = rk_ahash_get_algt(tfm); ++ struct scatterlist *src_sg; ++ unsigned int nbytes; ++ int ret = 0; + -+/* Block Cipher Control Register */ -+#define CRYPTO_BC_CTL 0x0044 -+#define CRYPTO_BC_AES _SBF(8, 0x00) -+#define CRYPTO_BC_SM4 _SBF(8, 0x01) -+#define CRYPTO_BC_DES _SBF(8, 0x02) -+#define CRYPTO_BC_TDES _SBF(8, 0x03) -+#define CRYPTO_BC_ECB _SBF(4, 0x00) -+#define CRYPTO_BC_CBC _SBF(4, 0x01) -+#define CRYPTO_BC_CTS _SBF(4, 0x02) -+#define CRYPTO_BC_CTR _SBF(4, 0x03) -+#define CRYPTO_BC_CFB _SBF(4, 0x04) -+#define CRYPTO_BC_OFB _SBF(4, 0x05) -+#define CRYPTO_BC_XTS _SBF(4, 0x06) -+#define CRYPTO_BC_CCM _SBF(4, 0x07) -+#define CRYPTO_BC_GCM _SBF(4, 0x08) -+#define CRYPTO_BC_CMAC _SBF(4, 0x09) -+#define CRYPTO_BC_CBC_MAC _SBF(4, 0x0A) -+#define CRYPTO_BC_128_bit_key _SBF(2, 0x00) -+#define CRYPTO_BC_192_bit_key _SBF(2, 0x01) -+#define CRYPTO_BC_256_bit_key _SBF(2, 0x02) -+#define CRYPTO_BC_DECRYPT BIT(1) -+#define CRYPTO_BC_ENABLE BIT(0) ++ CRYPTO_TRACE("origin: old_len = %u, new_len = %u, nbytes = %u, flag = %d", ++ ctx->hash_tmp_len, ctx->lastc_len, req->nbytes, rctx->flag); + -+/* Hash Control Register */ -+#define CRYPTO_HASH_CTL 0x0048 -+#define CRYPTO_SHA1 _SBF(4, 0x00) -+#define CRYPTO_MD5 _SBF(4, 0x01) -+#define CRYPTO_SHA256 _SBF(4, 0x02) -+#define CRYPTO_SHA224 _SBF(4, 0x03) -+#define CRYPTO_SM3 _SBF(4, 0x06) -+#define CRYPTO_SHA512 _SBF(4, 0x08) -+#define CRYPTO_SHA384 _SBF(4, 0x09) -+#define CRYPTO_SHA512_224 _SBF(4, 0x0A) -+#define CRYPTO_SHA512_256 _SBF(4, 0x0B) -+#define CRYPTO_HMAC_ENABLE BIT(3) -+#define CRYPTO_HW_PAD_ENABLE BIT(2) -+#define CRYPTO_HASH_SRC_SEL BIT(1) -+#define CRYPTO_HASH_ENABLE BIT(0) ++ /* update 0Byte do nothing */ ++ if (req->nbytes == 0 && !(rctx->flag & RK_FLAG_FINAL)) ++ goto no_calc; + -+/* Cipher Status Register */ -+#define CRYPTO_CIPHER_ST 0x004C -+#define CRYPTO_OTP_KEY_VALID BIT(2) -+#define CRYPTO_HASH_BUSY BIT(1) -+#define CRYPTO_BLOCK_CIPHER_BUSY BIT(0) ++ if (ctx->lastc_len) { ++ /* move lastc saved last time to the head of this calculation */ ++ memcpy(ctx->hash_tmp + ctx->hash_tmp_len, ctx->lastc, ctx->lastc_len); ++ ctx->hash_tmp_len = ctx->hash_tmp_len + ctx->lastc_len; ++ ctx->lastc_len = 0; ++ } + -+#define CRYPTO_CIPHER_STATE 0x0050 -+#define CRYPTO_HASH_IDLE_STATE _SBF(10, 0x01) -+#define CRYPTO_HASH_IPAD_STATE _SBF(10, 0x02) -+#define CRYPTO_HASH_TEXT_STATE _SBF(10, 0x04) -+#define CRYPTO_HASH_OPAD_STATE _SBF(10, 0x08) -+#define CRYPTO_HASH_OPAD_EXT_STATE _SBF(10, 0x10) -+#define CRYPTO_GCM_IDLE_STATE _SBF(8, 0x00) -+#define CRYPTO_GCM_PRE_STATE _SBF(8, 0x01) -+#define CRYPTO_GCM_NA_STATE _SBF(8, 0x02) -+#define CRYPTO_GCM_PC_STATE _SBF(8, 0x03) -+#define CRYPTO_CCM_IDLE_STATE _SBF(6, 0x00) -+#define CRYPTO_CCM_PRE_STATE _SBF(6, 0x01) -+#define CRYPTO_CCM_NA_STATE _SBF(6, 0x02) -+#define CRYPTO_CCM_PC_STATE _SBF(6, 0x03) -+#define CRYPTO_PARALLEL_IDLE_STATE _SBF(4, 0x00) -+#define CRYPTO_PARALLEL_PRE_STATE _SBF(4, 0x01) -+#define CRYPTO_PARALLEL_BULK_STATE _SBF(4, 0x02) -+#define CRYPTO_MAC_IDLE_STATE _SBF(2, 0x00) -+#define CRYPTO_MAC_PRE_STATE _SBF(2, 0x01) -+#define CRYPTO_MAC_BULK_STATE _SBF(2, 0x02) -+#define CRYPTO_SERIAL_IDLE_STATE _SBF(0, 0x00) -+#define CRYPTO_SERIAL_PRE_STATE _SBF(0, 0x01) -+#define CRYPTO_SERIAL_BULK_STATE _SBF(0, 0x02) ++ CRYPTO_TRACE("hash_tmp_len = %u", ctx->hash_tmp_len); + -+#define CRYPTO_CH0_IV_0 0x0100 -+#define CRYPTO_CH0_IV_1 0x0104 -+#define CRYPTO_CH0_IV_2 0x0108 -+#define CRYPTO_CH0_IV_3 0x010c -+#define CRYPTO_CH1_IV_0 0x0110 -+#define CRYPTO_CH1_IV_1 0x0114 -+#define CRYPTO_CH1_IV_2 0x0118 -+#define CRYPTO_CH1_IV_3 0x011c -+#define CRYPTO_CH2_IV_0 0x0120 -+#define CRYPTO_CH2_IV_1 0x0124 -+#define CRYPTO_CH2_IV_2 0x0128 -+#define CRYPTO_CH2_IV_3 0x012c -+#define CRYPTO_CH3_IV_0 0x0130 -+#define CRYPTO_CH3_IV_1 0x0134 -+#define CRYPTO_CH3_IV_2 0x0138 -+#define CRYPTO_CH3_IV_3 0x013c -+#define CRYPTO_CH4_IV_0 0x0140 -+#define CRYPTO_CH4_IV_1 0x0144 -+#define CRYPTO_CH4_IV_2 0x0148 -+#define CRYPTO_CH4_IV_3 0x014c -+#define CRYPTO_CH5_IV_0 0x0150 -+#define CRYPTO_CH5_IV_1 0x0154 -+#define CRYPTO_CH5_IV_2 0x0158 -+#define CRYPTO_CH5_IV_3 0x015c -+#define CRYPTO_CH6_IV_0 0x0160 -+#define CRYPTO_CH6_IV_1 0x0164 -+#define CRYPTO_CH6_IV_2 0x0168 -+#define CRYPTO_CH6_IV_3 0x016c -+#define CRYPTO_CH7_IV_0 0x0170 -+#define CRYPTO_CH7_IV_1 0x0174 -+#define CRYPTO_CH7_IV_2 0x0178 -+#define CRYPTO_CH7_IV_3 0x017c ++ /* final request no need to save lastc_new */ ++ if ((rctx->flag & RK_FLAG_UPDATE) && (rctx->flag & RK_FLAG_FINAL)) { ++ nbytes = req->nbytes + ctx->hash_tmp_len; + -+#define CRYPTO_CH0_KEY_0 0x0180 -+#define CRYPTO_CH0_KEY_1 0x0184 -+#define CRYPTO_CH0_KEY_2 0x0188 -+#define CRYPTO_CH0_KEY_3 0x018c -+#define CRYPTO_CH1_KEY_0 0x0190 -+#define CRYPTO_CH1_KEY_1 0x0194 -+#define CRYPTO_CH1_KEY_2 0x0198 -+#define CRYPTO_CH1_KEY_3 0x019c -+#define CRYPTO_CH2_KEY_0 0x01a0 -+#define CRYPTO_CH2_KEY_1 0x01a4 -+#define CRYPTO_CH2_KEY_2 0x01a8 -+#define CRYPTO_CH2_KEY_3 0x01ac -+#define CRYPTO_CH3_KEY_0 0x01b0 -+#define CRYPTO_CH3_KEY_1 0x01b4 -+#define CRYPTO_CH3_KEY_2 0x01b8 -+#define CRYPTO_CH3_KEY_3 0x01bc -+#define CRYPTO_CH4_KEY_0 0x01c0 -+#define CRYPTO_CH4_KEY_1 0x01c4 -+#define CRYPTO_CH4_KEY_2 0x01c8 -+#define CRYPTO_CH4_KEY_3 0x01cc -+#define CRYPTO_CH5_KEY_0 0x01d0 -+#define CRYPTO_CH5_KEY_1 0x01d4 -+#define CRYPTO_CH5_KEY_2 0x01d8 -+#define CRYPTO_CH5_KEY_3 0x01dc -+#define CRYPTO_CH6_KEY_0 0x01e0 -+#define CRYPTO_CH6_KEY_1 0x01e4 -+#define CRYPTO_CH6_KEY_2 0x01e8 -+#define CRYPTO_CH6_KEY_3 0x01ec -+#define CRYPTO_CH7_KEY_0 0x01f0 -+#define CRYPTO_CH7_KEY_1 0x01f4 -+#define CRYPTO_CH7_KEY_2 0x01f8 -+#define CRYPTO_CH7_KEY_3 0x01fc -+#define CRYPTO_KEY_CHANNEL_NUM 8 ++ CRYPTO_TRACE("finup %u bytes", nbytes); ++ } else if (rctx->flag & RK_FLAG_UPDATE) { ++ ctx->lastc_len = rk_calc_lastc_new_len(req->nbytes, ctx->hash_tmp_len); + -+#define CRYPTO_CH0_PC_LEN_0 0x0280 -+#define CRYPTO_CH0_PC_LEN_1 0x0284 -+#define CRYPTO_CH1_PC_LEN_0 0x0288 -+#define CRYPTO_CH1_PC_LEN_1 0x028c -+#define CRYPTO_CH2_PC_LEN_0 0x0290 -+#define CRYPTO_CH2_PC_LEN_1 0x0294 -+#define CRYPTO_CH3_PC_LEN_0 0x0298 -+#define CRYPTO_CH3_PC_LEN_1 0x029c -+#define CRYPTO_CH4_PC_LEN_0 0x02a0 -+#define CRYPTO_CH4_PC_LEN_1 0x02a4 -+#define CRYPTO_CH5_PC_LEN_0 0x02a8 -+#define CRYPTO_CH5_PC_LEN_1 0x02ac -+#define CRYPTO_CH6_PC_LEN_0 0x02b0 -+#define CRYPTO_CH6_PC_LEN_1 0x02b4 -+#define CRYPTO_CH7_PC_LEN_0 0x02b8 -+#define CRYPTO_CH7_PC_LEN_1 0x02bc ++ CRYPTO_TRACE("nents = %u, ctx->lastc_len = %u, offset = %u", ++ sg_nents_for_len(req->src, req->nbytes), ctx->lastc_len, ++ req->nbytes - ctx->lastc_len); + -+#define CRYPTO_CH0_AAD_LEN_0 0x02c0 -+#define CRYPTO_CH0_AAD_LEN_1 0x02c4 -+#define CRYPTO_CH1_AAD_LEN_0 0x02c8 -+#define CRYPTO_CH1_AAD_LEN_1 0x02cc -+#define CRYPTO_CH2_AAD_LEN_0 0x02d0 -+#define CRYPTO_CH2_AAD_LEN_1 0x02d4 -+#define CRYPTO_CH3_AAD_LEN_0 0x02d8 -+#define CRYPTO_CH3_AAD_LEN_1 0x02dc -+#define CRYPTO_CH4_AAD_LEN_0 0x02e0 -+#define CRYPTO_CH4_AAD_LEN_1 0x02e4 -+#define CRYPTO_CH5_AAD_LEN_0 0x02e8 -+#define CRYPTO_CH5_AAD_LEN_1 0x02ec -+#define CRYPTO_CH6_AAD_LEN_0 0x02f0 -+#define CRYPTO_CH6_AAD_LEN_1 0x02f4 -+#define CRYPTO_CH7_AAD_LEN_0 0x02f8 -+#define CRYPTO_CH7_AAD_LEN_1 0x02fc ++ if (!sg_pcopy_to_buffer(req->src, sg_nents_for_len(req->src, req->nbytes), ++ ctx->lastc, ctx->lastc_len, req->nbytes - ctx->lastc_len)) { ++ ret = -EINVAL; ++ goto exit; ++ } + -+#define CRYPTO_CH0_IV_LEN_0 0x0300 -+#define CRYPTO_CH1_IV_LEN_0 0x0304 -+#define CRYPTO_CH2_IV_LEN_0 0x0308 -+#define CRYPTO_CH3_IV_LEN_0 0x030c -+#define CRYPTO_CH4_IV_LEN_0 0x0310 -+#define CRYPTO_CH5_IV_LEN_0 0x0314 -+#define CRYPTO_CH6_IV_LEN_0 0x0318 -+#define CRYPTO_CH7_IV_LEN_0 0x031c ++ nbytes = ctx->hash_tmp_len + req->nbytes - ctx->lastc_len; + -+#define CRYPTO_CH0_TAG_0 0x0320 -+#define CRYPTO_CH0_TAG_1 0x0324 -+#define CRYPTO_CH0_TAG_2 0x0328 -+#define CRYPTO_CH0_TAG_3 0x032c ++ /* not enough data */ ++ if (nbytes < RK_DMA_ALIGNMENT) { ++ CRYPTO_TRACE("nbytes = %u, not enough data", nbytes); ++ memcpy(ctx->hash_tmp + ctx->hash_tmp_len, ++ ctx->lastc, ctx->lastc_len); ++ ctx->hash_tmp_len = ctx->hash_tmp_len + ctx->lastc_len; ++ ctx->lastc_len = 0; ++ goto no_calc; ++ } + -+#define CRYPTO_HASH_DOUT_0 0x03a0 -+#define CRYPTO_HASH_DOUT_1 0x03a4 -+#define CRYPTO_HASH_DOUT_2 0x03a8 -+#define CRYPTO_HASH_DOUT_3 0x03ac -+#define CRYPTO_HASH_DOUT_4 0x03b0 -+#define CRYPTO_HASH_DOUT_5 0x03b4 -+#define CRYPTO_HASH_DOUT_6 0x03b8 -+#define CRYPTO_HASH_DOUT_7 0x03bc -+#define CRYPTO_HASH_DOUT_8 0x03c0 -+#define CRYPTO_HASH_DOUT_9 0x03c4 -+#define CRYPTO_HASH_DOUT_10 0x03c8 -+#define CRYPTO_HASH_DOUT_11 0x03cc -+#define CRYPTO_HASH_DOUT_12 0x03d0 -+#define CRYPTO_HASH_DOUT_13 0x03d4 -+#define CRYPTO_HASH_DOUT_14 0x03d8 -+#define CRYPTO_HASH_DOUT_15 0x03dc ++ CRYPTO_TRACE("update nbytes = %u", nbytes); ++ } else { ++ /* final just calc lastc_old */ ++ nbytes = ctx->hash_tmp_len; + -+#define CRYPTO_TAG_VALID 0x03e0 -+#define CRYPTO_CH0_TAG_VALID BIT(0) ++ CRYPTO_TRACE("final nbytes = %u", nbytes); ++ } + -+#define CRYPTO_HASH_VALID 0x03e4 -+#define CRYPTO_HASH_IS_VALID BIT(0) ++ if (ctx->hash_tmp_len) { ++ /* Concatenate old data to the header */ ++ sg_init_table(ctx->hash_sg, ARRAY_SIZE(ctx->hash_sg)); ++ sg_set_buf(ctx->hash_sg, ctx->hash_tmp, ctx->hash_tmp_len); + -+#define LLI_DMA_CTRL_LAST BIT(0) -+#define LLI_DMA_CTRL_PAUSE BIT(1) -+#define LLI_DMA_CTRL_LIST_DONE BIT(8) -+#define LLI_DMA_CTRL_DST_DONE BIT(9) -+#define LLI_DMA_CTRL_SRC_DONE BIT(10) ++ if (rk_crypto_check_dmafd(req->src, sg_nents_for_len(req->src, req->nbytes))) { ++ CRYPTO_TRACE("is hash dmafd"); ++ if (!dma_map_sg(rk_dev->dev, &ctx->hash_sg[0], 1, DMA_TO_DEVICE)) { ++ dev_err(rk_dev->dev, "[%s:%d] dma_map_sg(hash_sg) error\n", ++ __func__, __LINE__); ++ ret = -ENOMEM; ++ goto exit; ++ } ++ ctx->hash_tmp_mapped = true; ++ } + -+#define LLI_USER_CIPHER_START BIT(0) -+#define LLI_USER_STRING_START BIT(1) -+#define LLI_USER_STRING_LAST BIT(2) -+#define LLI_USER_STRING_AAD BIT(3) -+#define LLI_USER_PRIVACY_KEY BIT(7) -+#define LLI_USER_ROOT_KEY BIT(8) ++ sg_chain(ctx->hash_sg, ARRAY_SIZE(ctx->hash_sg), req->src); + -+#define CRYPTO_PKA_BASE_OFFSET 0x0480 ++ src_sg = &ctx->hash_sg[0]; ++ ctx->hash_tmp_len = 0; ++ } else { ++ src_sg = req->src; ++ } + -+#define CRYPTO_RAM_CTL (0x0480 - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_RAM_PKA_RDY BIT(0) ++ alg_ctx->total = nbytes; ++ alg_ctx->left_bytes = nbytes; ++ alg_ctx->sg_src = src_sg; ++ alg_ctx->req_src = src_sg; ++ alg_ctx->src_nents = sg_nents_for_len(src_sg, nbytes); + -+#define CRYPTO_RAM_ST (0x0484 - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_CLK_RAM_RDY BIT(0) -+#define CRYPTO_CLK_RAM_RDY_MASK BIT(0) ++ CRYPTO_TRACE("adjust: old_len = %u, new_len = %u, nbytes = %u", ++ ctx->hash_tmp_len, ctx->lastc_len, nbytes); + -+#define CRYPTO_DEBUG_CTL (0x04a0 - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_DEBUG_MODE BIT(0) ++ if (nbytes) { ++ if (ctx->calc_cnt == 0) ++ alg_ctx->ops.hw_init(rk_dev, algt->algo, algt->type); + -+#define CRYPTO_DEBUG_ST (0x04a4 - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_PKA_DEBUG_CLK_EN BIT(0) ++ /* flush all 64byte key buffer for hmac */ ++ alg_ctx->ops.hw_write_key(ctx->rk_dev, ctx->authkey, sizeof(ctx->authkey)); ++ ret = rk_ahash_set_data_start(rk_dev, rctx->flag); ++ } ++exit: ++ return ret; ++no_calc: ++ CRYPTO_TRACE("no calc"); ++ rk_alg_ctx_clear(alg_ctx); + -+#define CRYPTO_DEBUG_MONITOR (0x04a8 - CRYPTO_PKA_BASE_OFFSET) ++ return 0; ++} + -+/* MAP0 ~ MAP31 */ -+#define CRYPTO_MEMORY_MAP0 (0x00800 - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_MAP_REG_NUM 32 ++int rk_ahash_crypto_rx(struct rk_crypto_dev *rk_dev) ++{ ++ int err = 0; ++ struct ahash_request *req = ahash_request_cast(rk_dev->async_req); ++ struct rk_alg_ctx *alg_ctx = rk_ahash_alg_ctx(rk_dev); ++ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); ++ struct rk_ahash_ctx *ctx = rk_ahash_ctx_cast(rk_dev); + -+#define CRYPTO_OPCODE (0x00880 - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_OPCODE_TAG_SHIFT 0 -+#define CRYPTO_OPCODE_R_SHIFT 6 -+#define CRYPTO_OPCODE_R_DIS_SHIFT 11 -+#define CRYPTO_OPCODE_B_SHIFT 12 -+#define CRYPTO_OPCODE_B_IMMED_SHIFT 17 -+#define CRYPTO_OPCODE_A_SHIFT 18 -+#define CRYPTO_OPCODE_A_IMMED_SHIFT 23 -+#define CRYPTO_OPCODE_LEN_SHIFT 24 -+#define CRYPTO_OPCODE_CODE_SHIFT 27 ++ CRYPTO_TRACE("left bytes = %u, flag = %d", alg_ctx->left_bytes, rctx->flag); + -+#define CRYPTO_N_NP_T0_T1_ADDR (0x00884 - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_N_VIRTUAL_ADDR_SHIFT 0 -+#define CRYPTO_N_VIRTUAL_ADDR_MASK 0x0000001f -+#define CRYPTO_NP_VIRTUAL_ADDR_SHIFT 5 -+#define CRYPTO_NP_VIRTUAL_ADDR_MASK 0x000003e0 -+#define CRYPTO_T0_VIRTUAL_ADDR_SHIFT 10 -+#define CRYPTO_T0_VIRTUAL_ADDR_MASK 0x00007c00 -+#define CRYPTO_T1_VIRTUAL_ADDR_SHIFT 15 -+#define CRYPTO_T1_VIRTUAL_ADDR_MASK 0x000f8000 ++ err = rk_dev->unload_data(rk_dev); ++ if (err) ++ goto out_rx; + -+#define CRYPTO_PKA_STATUS (0x00888 - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_PKA_PIPE_IS_RDY BIT(0) -+#define CRYPTO_PKA_BUSY BIT(1) -+#define CRYPTO_PKA_ALU_OUT_ZERO BIT(2) -+#define CRYPTO_PKA_ALU_MODOVRFLW BIT(3) -+#define CRYPTO_PKA_DIV_BY_ZERO BIT(4) -+#define CRYPTO_PKA_ALU_CARRY BIT(5) -+#define CRYPTO_PKA_ALU_SIGN_OUT BIT(6) -+#define CRYPTO_PKA_MODINV_OF_ZERO BIT(7) -+#define CRYPTO_PKA_CPU_BUSY BIT(8) -+#define CRYPTO_PKA_OPCODE_STATUS_SHIFT 9 -+#define CRYPTO_PKA_OPCODE_STATUS_MASK 0x00003e00 -+#define CRYPTO_PKA_TAG_STATUS_SHIFT 14 -+#define CRYPTO_PKA_TAG_STATUS_MASK 0x0003c000 ++ ctx->calc_cnt += alg_ctx->count; + -+#define CRYPTO_PKA_SW_RESET (0x0088C - CRYPTO_PKA_BASE_OFFSET) ++ if (alg_ctx->left_bytes) { ++ if (alg_ctx->aligned) { ++ if (sg_is_last(alg_ctx->sg_src)) { ++ dev_warn(rk_dev->dev, "[%s:%d], Lack of data\n", ++ __func__, __LINE__); ++ err = -ENOMEM; ++ goto out_rx; ++ } ++ alg_ctx->sg_src = sg_next(alg_ctx->sg_src); ++ } ++ err = rk_ahash_set_data_start(rk_dev, rctx->flag); ++ } else { ++ /* ++ * it will take some time to process date after last dma ++ * transmission. ++ */ ++ struct crypto_ahash *tfm; + -+/* PKA_L0 ~ PKA_L7 */ -+#define CRYPTO_PKA_L0 (0x00890 - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_LEN_REG_NUM 8 ++ if (ctx->hash_tmp_mapped) ++ dma_unmap_sg(rk_dev->dev, &ctx->hash_sg[0], 1, DMA_TO_DEVICE); + -+#define CRYPTO_PKA_PIPE_RDY (0x008B0 - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_PKA_DONE (0x008B4 - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_PKA_MON_SELECT (0x008B8 - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_PKA_DEBUG_REG_EN (0x008BC - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_DEBUG_CNT_ADDR (0x008C0 - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_DEBUG_EXT_ADDR (0x008C4 - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_PKA_DEBUG_HALT (0x008C8 - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_PKA_MON_READ (0x008D0 - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_PKA_INT_ENA (0x008D4 - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_PKA_INT_ST (0x008D8 - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_SRAM_BASE (0x01000 - CRYPTO_PKA_BASE_OFFSET) -+#define CRYPTO_SRAM_SIZE 0x01000 ++ /* only final will get result */ ++ if (!(rctx->flag & RK_FLAG_FINAL)) ++ goto out_rx; + -+#endif ++ if (!req->result) { ++ err = -EINVAL; ++ goto out_rx; ++ } + -diff --git a/drivers/crypto/rockchip/rk_crypto_v2_skcipher.c b/drivers/crypto/rockchip/rk_crypto_v2_skcipher.c ++ tfm = crypto_ahash_reqtfm(req); ++ ++ err = alg_ctx->ops.hw_get_result(rk_dev, req->result, ++ crypto_ahash_digestsize(tfm)); ++ } ++ ++out_rx: ++ return err; ++} +diff --git a/drivers/crypto/rockchip/rk_crypto_ahash_utils.h b/drivers/crypto/rockchip/rk_crypto_ahash_utils.h new file mode 100644 -index 000000000..2bfff0d28 +index 000000000..46afd98a0 --- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_v2_skcipher.c -@@ -0,0 +1,685 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Crypto acceleration support for Rockchip Crypto V2 -+ * -+ * Copyright (c) 2018, Fuzhou Rockchip Electronics Co., Ltd -+ * -+ * Author: Lin Jinhan -+ * -+ * Some ideas are from marvell-cesa.c and s5p-sss.c driver. -+ */ ++++ b/drivers/crypto/rockchip/rk_crypto_ahash_utils.h +@@ -0,0 +1,35 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ + -+#include -+#include -+#include -+#include ++/* Copyright (c) 2022 Rockchip Electronics Co. Ltd. */ ++ ++#ifndef __RK_CRYPTO_AHASH_UTILS_H__ ++#define __RK_CRYPTO_AHASH_UTILS_H__ ++ ++#include + +#include "rk_crypto_core.h" +#include "rk_crypto_utils.h" -+#include "rk_crypto_skcipher_utils.h" -+#include "rk_crypto_v2.h" -+#include "rk_crypto_v2_reg.h" + -+#define RK_POLL_PERIOD_US 100 -+#define RK_POLL_TIMEOUT_US 50000 ++struct rk_alg_ctx *rk_ahash_alg_ctx(struct rk_crypto_dev *rk_dev); + -+static const u32 cipher_algo2bc[] = { -+ [CIPHER_ALGO_DES] = CRYPTO_BC_DES, -+ [CIPHER_ALGO_DES3_EDE] = CRYPTO_BC_TDES, -+ [CIPHER_ALGO_AES] = CRYPTO_BC_AES, -+ [CIPHER_ALGO_SM4] = CRYPTO_BC_SM4, -+}; ++struct rk_crypto_algt *rk_ahash_get_algt(struct crypto_ahash *tfm); + -+static const u32 cipher_mode2bc[] = { -+ [CIPHER_MODE_ECB] = CRYPTO_BC_ECB, -+ [CIPHER_MODE_CBC] = CRYPTO_BC_CBC, -+ [CIPHER_MODE_CFB] = CRYPTO_BC_CFB, -+ [CIPHER_MODE_OFB] = CRYPTO_BC_OFB, -+ [CIPHER_MODE_CTR] = CRYPTO_BC_CTR, -+ [CIPHER_MODE_XTS] = CRYPTO_BC_XTS, -+ [CIPHER_MODE_GCM] = CRYPTO_BC_GCM, -+}; ++struct rk_ahash_ctx *rk_ahash_ctx_cast(struct rk_crypto_dev *rk_dev); + -+static int rk_crypto_irq_handle(int irq, void *dev_id) -+{ -+ struct rk_crypto_dev *rk_dev = platform_get_drvdata(dev_id); -+ u32 interrupt_status; -+ struct rk_hw_crypto_v2_info *hw_info = -+ (struct rk_hw_crypto_v2_info *)rk_dev->hw_info; -+ struct rk_alg_ctx *alg_ctx = rk_cipher_alg_ctx(rk_dev); ++int rk_ahash_hmac_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen); + -+ interrupt_status = CRYPTO_READ(rk_dev, CRYPTO_DMA_INT_ST); -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_ST, interrupt_status); ++int rk_ahash_init(struct ahash_request *req); + -+ interrupt_status &= CRYPTO_LOCKSTEP_MASK; ++int rk_ahash_update(struct ahash_request *req); + -+ if (interrupt_status != CRYPTO_DST_ITEM_DONE_INT_ST) { -+ dev_err(rk_dev->dev, "DMA desc = %p\n", hw_info->hw_desc.lli_head); -+ dev_err(rk_dev->dev, "DMA addr_in = %08x\n", -+ (u32)alg_ctx->addr_in); -+ dev_err(rk_dev->dev, "DMA addr_out = %08x\n", -+ (u32)alg_ctx->addr_out); -+ dev_err(rk_dev->dev, "DMA count = %08x\n", alg_ctx->count); -+ dev_err(rk_dev->dev, "DMA desc_dma = %08x\n", -+ (u32)hw_info->hw_desc.lli_head_dma); -+ dev_err(rk_dev->dev, "DMA Error status = %08x\n", -+ interrupt_status); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_LLI_ADDR status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_LLI_ADDR)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_ST status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_ST)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_STATE status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_STATE)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_LLI_RADDR status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_LLI_RADDR)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_SRC_RADDR status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_SRC_RADDR)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_DST_RADDR status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_DST_RADDR)); -+ rk_dev->err = -EFAULT; -+ } ++int rk_ahash_final(struct ahash_request *req); + -+ return 0; -+} ++int rk_ahash_finup(struct ahash_request *req); + -+static inline void set_pc_len_reg(struct rk_crypto_dev *rk_dev, u64 pc_len) -+{ -+ u32 chn_base = CRYPTO_CH0_PC_LEN_0; ++int rk_ahash_digest(struct ahash_request *req); + -+ CRYPTO_TRACE("PC length = %lu\n", (unsigned long)pc_len); ++int rk_ahash_crypto_rx(struct rk_crypto_dev *rk_dev); + -+ CRYPTO_WRITE(rk_dev, chn_base, pc_len & 0xffffffff); -+ CRYPTO_WRITE(rk_dev, chn_base + 4, pc_len >> 32); -+} ++int rk_ahash_start(struct rk_crypto_dev *rk_dev); + -+static inline void set_aad_len_reg(struct rk_crypto_dev *rk_dev, u64 aad_len) ++#endif +diff --git a/drivers/crypto/rockchip/rk_crypto_bignum.c b/drivers/crypto/rockchip/rk_crypto_bignum.c +new file mode 100644 +index 000000000..690c2fdf5 +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_bignum.c +@@ -0,0 +1,130 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * bignum support for Rockchip crypto ++ * ++ * Copyright (c) 2020 Rockchip Electronics Co., Ltd. ++ * ++ * Author: Lin Jinhan ++ * ++ */ ++#include ++ ++#include "rk_crypto_bignum.h" ++ ++#define DEFAULT_ENDIAN RK_BG_LITTILE_ENDIAN ++ ++#define BYTES2WORDS(bytes) (round_up((bytes), sizeof(u32)) / sizeof(u32)) ++#define WORDS2BYTES(words) ((words) * sizeof(u32)) ++#define RK_WORD_SIZE 32 ++ ++static void rk_reverse_memcpy(void *dst, const void *src, u32 size) +{ -+ u32 chn_base = CRYPTO_CH0_AAD_LEN_0; ++ char *_dst = (char *)dst, *_src = (char *)src; ++ u32 i; + -+ CRYPTO_TRACE("AAD length = %lu\n", (unsigned long)aad_len); ++ if (!dst || !src || !size) ++ return; + -+ CRYPTO_WRITE(rk_dev, chn_base, aad_len & 0xffffffff); -+ CRYPTO_WRITE(rk_dev, chn_base + 4, aad_len >> 32); ++ for (i = 0; i < size; ++i) ++ _dst[size - i - 1] = _src[i]; +} + -+static void set_iv_reg(struct rk_crypto_dev *rk_dev, const u8 *iv, u32 iv_len) ++struct rk_bignum *rk_bn_alloc(u32 max_size) +{ -+ if (!iv || iv_len == 0) -+ return; ++ struct rk_bignum *bn; + -+ CRYPTO_DUMPHEX("set iv", iv, iv_len); ++ bn = kzalloc(sizeof(*bn), GFP_KERNEL); ++ if (!bn) ++ return NULL; + -+ rk_crypto_write_regs(rk_dev, CRYPTO_CH0_IV_0, iv, iv_len); ++ bn->data = kzalloc(round_up(max_size, sizeof(u32)), GFP_KERNEL); ++ if (!bn->data) { ++ kfree(bn); ++ return NULL; ++ } + -+ CRYPTO_WRITE(rk_dev, CRYPTO_CH0_IV_LEN_0, iv_len); -+} ++ bn->n_words = BYTES2WORDS(max_size); + -+static void write_key_reg(struct rk_crypto_dev *rk_dev, const u8 *key, -+ u32 key_len) -+{ -+ rk_crypto_write_regs(rk_dev, CRYPTO_CH0_KEY_0, key, key_len); ++ return bn; +} + -+static void write_tkey_reg(struct rk_crypto_dev *rk_dev, const u8 *key, -+ u32 key_len) ++void rk_bn_free(struct rk_bignum *bn) +{ -+ rk_crypto_write_regs(rk_dev, CRYPTO_CH4_KEY_0, key, key_len); ++ if (!bn) ++ return; ++ ++ if (bn->data) { ++ memset(bn->data, 0x00, WORDS2BYTES(bn->n_words)); ++ kfree(bn->data); ++ } ++ ++ kfree(bn); +} + -+static int get_tag_reg(struct rk_crypto_dev *rk_dev, u8 *tag, u32 tag_len) ++int rk_bn_set_data(struct rk_bignum *bn, const u8 *data, u32 size, enum bignum_endian endian) +{ -+ int ret; -+ u32 reg_ctrl = 0; -+ -+ CRYPTO_TRACE("tag_len = %u", tag_len); ++ if (!bn || !data) ++ return -EINVAL; + -+ if (tag_len > RK_MAX_TAG_SIZE) ++ if (BYTES2WORDS(size) > bn->n_words) + return -EINVAL; + -+ ret = read_poll_timeout_atomic(CRYPTO_READ, -+ reg_ctrl, -+ reg_ctrl & CRYPTO_CH0_TAG_VALID, -+ 0, -+ RK_POLL_TIMEOUT_US, -+ false, -+ rk_dev, CRYPTO_TAG_VALID); -+ if (ret) -+ goto exit; ++ if (endian == DEFAULT_ENDIAN) ++ memcpy(bn->data, data, size); ++ else ++ rk_reverse_memcpy(bn->data, data, size); + -+ rk_crypto_read_regs(rk_dev, CRYPTO_CH0_TAG_0, tag, tag_len); -+exit: -+ return ret; ++ return 0; +} + -+static bool is_force_fallback(struct rk_crypto_algt *algt, uint32_t key_len) ++int rk_bn_get_data(const struct rk_bignum *bn, u8 *data, u32 size, enum bignum_endian endian) +{ -+ if (algt->algo != CIPHER_ALGO_AES) -+ return false; ++ if (!bn || !data) ++ return -EINVAL; + -+ /* crypto v2 not support xts with AES-192 */ -+ if (algt->mode == CIPHER_MODE_XTS && key_len == AES_KEYSIZE_192 * 2) -+ return true; ++ if (size < WORDS2BYTES(bn->n_words)) ++ return -EINVAL; + -+ if (algt->use_soft_aes192 && key_len == AES_KEYSIZE_192) -+ return true; ++ memset(data, 0x00, size); + -+ return false; ++ if (endian == DEFAULT_ENDIAN) ++ memcpy(data + size - WORDS2BYTES(bn->n_words), bn->data, bn->n_words); ++ else ++ rk_reverse_memcpy(data + size - WORDS2BYTES(bn->n_words), ++ bn->data, WORDS2BYTES(bn->n_words)); ++ ++ return 0; +} + -+static bool is_calc_need_round_up(struct skcipher_request *req) ++u32 rk_bn_get_size(const struct rk_bignum *bn) +{ -+ struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); -+ struct rk_crypto_algt *algt = rk_cipher_get_algt(cipher); ++ if (!bn) ++ return 0; + -+ return (algt->mode == CIPHER_MODE_CFB || -+ algt->mode == CIPHER_MODE_OFB || -+ algt->mode == CIPHER_MODE_CTR) ? true : false; ++ return WORDS2BYTES(bn->n_words); +} + -+static void rk_cipher_reset(struct rk_crypto_dev *rk_dev) ++/* ++ * @brief Returns the index of the highest 1 in |bn|. ++ * @param bn: the point of input data bignum. ++ * @return The index starts at 0 for the least significant bit. ++ * If src == zero, it will return -1 ++ */ ++int rk_bn_highest_bit(const struct rk_bignum *bn) +{ -+ int ret; -+ u32 tmp = 0, tmp_mask = 0; -+ unsigned int pool_timeout_us = 1000; -+ -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0x00); -+ -+ tmp = CRYPTO_SW_CC_RESET; -+ tmp_mask = tmp << CRYPTO_WRITE_MASK_SHIFT; -+ -+ CRYPTO_WRITE(rk_dev, CRYPTO_RST_CTL, tmp | tmp_mask); -+ -+ /* This is usually done in 20 clock cycles */ -+ ret = read_poll_timeout_atomic(CRYPTO_READ, tmp, !tmp, 0, -+ pool_timeout_us, false, rk_dev, CRYPTO_RST_CTL); -+ if (ret) -+ dev_err(rk_dev->dev, "cipher reset pool timeout %ums.", -+ pool_timeout_us); -+ -+ CRYPTO_WRITE(rk_dev, CRYPTO_BC_CTL, 0xffff0000); ++ u32 w; ++ u32 b; + -+ /* clear dma int status */ -+ tmp = CRYPTO_READ(rk_dev, CRYPTO_DMA_INT_ST); -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_ST, tmp); -+} ++ if (!bn || !bn->data || !bn->n_words) ++ return -1; + -+static void rk_crypto_complete(struct crypto_async_request *base, int err) -+{ -+ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(base->tfm); -+ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; -+ struct rk_hw_crypto_v2_info *hw_info = ctx->rk_dev->hw_info; -+ struct crypto_lli_desc *lli_desc = hw_info->hw_desc.lli_head; ++ w = bn->data[bn->n_words - 1]; + -+ CRYPTO_WRITE(ctx->rk_dev, CRYPTO_BC_CTL, 0xffff0000); -+ if (err) { -+ rk_cipher_reset(ctx->rk_dev); -+ pr_err("aligned = %u, align_size = %u\n", -+ alg_ctx->aligned, alg_ctx->align_size); -+ pr_err("total = %u, left = %u, count = %u\n", -+ alg_ctx->total, alg_ctx->left_bytes, alg_ctx->count); -+ pr_err("lli->src = %08x\n", lli_desc->src_addr); -+ pr_err("lli->src_len = %08x\n", lli_desc->src_len); -+ pr_err("lli->dst = %08x\n", lli_desc->dst_addr); -+ pr_err("lli->dst_len = %08x\n", lli_desc->dst_len); -+ pr_err("lli->dma_ctl = %08x\n", lli_desc->dma_ctrl); -+ pr_err("lli->usr_def = %08x\n", lli_desc->user_define); -+ pr_err("lli->next = %08x\n\n\n", lli_desc->next_addr); ++ for (b = 0; b < RK_WORD_SIZE; b++) { ++ w >>= 1; ++ if (w == 0) ++ break; + } + -+ if (base->complete) -+ base->complete(base, err); ++ return (int)(bn->n_words - 1) * RK_WORD_SIZE + b; +} +diff --git a/drivers/crypto/rockchip/rk_crypto_bignum.h b/drivers/crypto/rockchip/rk_crypto_bignum.h +new file mode 100644 +index 000000000..780aa8766 +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_bignum.h +@@ -0,0 +1,27 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* Copyright (c) 2020 Rockchip Electronics Co., Ltd. */ + -+static int rk_cipher_crypt(struct skcipher_request *req, bool encrypt) -+{ -+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); -+ struct rk_crypto_algt *algt = rk_cipher_get_algt(tfm); -+ -+ CRYPTO_TRACE("%s total = %u", -+ encrypt ? "encrypt" : "decrypt", req->cryptlen); ++#ifndef __RK_CRYPTO_BIGNUM_H__ ++#define __RK_CRYPTO_BIGNUM_H__ + -+ if (!req->cryptlen) { -+ if (algt->mode == CIPHER_MODE_ECB || -+ algt->mode == CIPHER_MODE_CBC || -+ algt->mode == CIPHER_MODE_CTR || -+ algt->mode == CIPHER_MODE_CFB || -+ algt->mode == CIPHER_MODE_OFB) -+ return 0; -+ else -+ return -EINVAL; -+ } ++enum bignum_endian { ++ RK_BG_BIG_ENDIAN, ++ RK_BG_LITTILE_ENDIAN ++}; + -+ /* XTS data should >= chunksize */ -+ if (algt->mode == CIPHER_MODE_XTS) { -+ if (req->cryptlen < crypto_skcipher_chunksize(tfm)) -+ return -EINVAL; ++/** ++ * struct rk_bignum - crypto bignum struct. ++ */ ++struct rk_bignum { ++ u32 n_words; ++ u32 *data; ++}; + -+ /* force use unalign branch */ -+ ctx->algs_ctx.align_size = ctx->rk_dev->vir_max; ++struct rk_bignum *rk_bn_alloc(u32 max_size); ++void rk_bn_free(struct rk_bignum *bn); ++int rk_bn_set_data(struct rk_bignum *bn, const u8 *data, u32 size, enum bignum_endian endian); ++int rk_bn_get_data(const struct rk_bignum *bn, u8 *data, u32 size, enum bignum_endian endian); ++u32 rk_bn_get_size(const struct rk_bignum *bn); ++int rk_bn_highest_bit(const struct rk_bignum *src); + -+ /* XTS can't pause when use hardware crypto */ -+ if (req->cryptlen > ctx->rk_dev->vir_max) -+ return rk_cipher_fallback(req, ctx, encrypt); -+ } ++#endif +diff --git a/drivers/crypto/rockchip/rk_crypto_core.c b/drivers/crypto/rockchip/rk_crypto_core.c +new file mode 100644 +index 000000000..2a9cf2da6 +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_core.c +@@ -0,0 +1,945 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Crypto acceleration support for Rockchip crypto ++ * ++ * Copyright (c) 2018, Fuzhou Rockchip Electronics Co., Ltd ++ * ++ * Author: Zain Wang ++ * Mender: Lin Jinhan ++ * ++ * Some ideas are from marvell-cesa.c and s5p-sss.c driver. ++ */ + -+ if (is_force_fallback(algt, ctx->keylen)) -+ return rk_cipher_fallback(req, ctx, encrypt); ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ ctx->mode = cipher_algo2bc[algt->algo] | -+ cipher_mode2bc[algt->mode]; -+ if (!encrypt) -+ ctx->mode |= CRYPTO_BC_DECRYPT; ++#include "rk_crypto_core.h" ++#include "rk_crypto_utils.h" ++#include "rk_crypto_v1.h" ++#include "rk_crypto_v2.h" ++#include "rk_crypto_v3.h" ++#include "cryptodev_linux/rk_cryptodev.h" ++#include "procfs.h" + -+ if (algt->algo == CIPHER_ALGO_AES) { -+ uint32_t key_factor; ++#define CRYPTO_NAME "rkcrypto" + -+ /* The key length of XTS is twice the normal length */ -+ key_factor = algt->mode == CIPHER_MODE_XTS ? 2 : 1; ++static struct rk_alg_ctx *rk_alg_ctx_cast(struct crypto_async_request *async_req) ++{ ++ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(async_req->tfm); + -+ if (ctx->keylen == AES_KEYSIZE_128 * key_factor) -+ ctx->mode |= CRYPTO_BC_128_bit_key; -+ else if (ctx->keylen == AES_KEYSIZE_192 * key_factor) -+ ctx->mode |= CRYPTO_BC_192_bit_key; -+ else if (ctx->keylen == AES_KEYSIZE_256 * key_factor) -+ ctx->mode |= CRYPTO_BC_256_bit_key; -+ } ++ return &ctx->algs_ctx; ++} + -+ ctx->iv_len = crypto_skcipher_ivsize(tfm); ++static int rk_crypto_enable_clk(struct rk_crypto_dev *rk_dev) ++{ ++ int ret; + -+ memset(ctx->iv, 0x00, sizeof(ctx->iv)); -+ memcpy(ctx->iv, req->iv, ctx->iv_len); ++ dev_dbg(rk_dev->dev, "clk_bulk_prepare_enable.\n"); + -+ ctx->is_enc = encrypt; ++ ret = clk_bulk_prepare_enable(rk_dev->clks_num, ++ rk_dev->clk_bulks); ++ if (ret < 0) ++ dev_err(rk_dev->dev, "failed to enable clks %d\n", ret); + -+ CRYPTO_MSG("ctx->mode = %x\n", ctx->mode); -+ return rk_skcipher_handle_req(ctx->rk_dev, req); ++ return ret; +} + -+static int rk_cipher_encrypt(struct skcipher_request *req) ++static void rk_crypto_disable_clk(struct rk_crypto_dev *rk_dev) +{ -+ return rk_cipher_crypt(req, true); -+} ++ dev_dbg(rk_dev->dev, "clk_bulk_disable_unprepare.\n"); + -+static int rk_cipher_decrypt(struct skcipher_request *req) -+{ -+ return rk_cipher_crypt(req, false); ++ clk_bulk_disable_unprepare(rk_dev->clks_num, rk_dev->clk_bulks); +} + -+static int rk_ablk_hw_init(struct rk_crypto_dev *rk_dev, u32 algo, u32 mode) ++static int rk_load_data(struct rk_crypto_dev *rk_dev, ++ struct scatterlist *sg_src, ++ struct scatterlist *sg_dst) +{ -+ struct rk_cipher_ctx *ctx = rk_cipher_ctx_cast(rk_dev); -+ -+ rk_cipher_reset(rk_dev); -+ -+ CRYPTO_WRITE(rk_dev, CRYPTO_BC_CTL, 0x00010000); ++ int ret = -EINVAL; ++ unsigned int count; ++ u32 src_nents, dst_nents; ++ struct device *dev = rk_dev->dev; ++ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev->async_req); + -+ if (mode == CIPHER_MODE_XTS) { -+ uint32_t tmp_len = ctx->keylen / 2; ++ alg_ctx->count = 0; + -+ write_key_reg(ctx->rk_dev, ctx->key, tmp_len); -+ write_tkey_reg(ctx->rk_dev, ctx->key + tmp_len, tmp_len); -+ } else { -+ write_key_reg(ctx->rk_dev, ctx->key, ctx->keylen); -+ } ++ /* 0 data input just do nothing */ ++ if (alg_ctx->total == 0) ++ return 0; + -+ if (mode != CIPHER_MODE_ECB) -+ set_iv_reg(rk_dev, ctx->iv, ctx->iv_len); ++ src_nents = alg_ctx->src_nents; ++ dst_nents = alg_ctx->dst_nents; + -+ ctx->mode |= CRYPTO_BC_ENABLE; ++ /* skip assoclen data */ ++ if (alg_ctx->assoclen && alg_ctx->left_bytes == alg_ctx->total) { ++ CRYPTO_TRACE("have assoclen..."); + -+ CRYPTO_WRITE(rk_dev, CRYPTO_FIFO_CTL, 0x00030003); ++ if (alg_ctx->assoclen > rk_dev->aad_max) { ++ ret = -ENOMEM; ++ goto error; ++ } + -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0x7f); ++ if (!sg_pcopy_to_buffer(alg_ctx->req_src, alg_ctx->src_nents, ++ rk_dev->addr_aad, alg_ctx->assoclen, 0)) { ++ dev_err(dev, "[%s:%d] assoc pcopy err\n", ++ __func__, __LINE__); ++ ret = -EINVAL; ++ goto error; ++ } + -+ CRYPTO_WRITE(rk_dev, CRYPTO_BC_CTL, ctx->mode | CRYPTO_WRITE_MASK_ALL); ++ sg_init_one(&alg_ctx->sg_aad, rk_dev->addr_aad, alg_ctx->assoclen); + -+ return 0; -+} ++ if (!dma_map_sg(dev, &alg_ctx->sg_aad, 1, DMA_TO_DEVICE)) { ++ dev_err(dev, "[%s:%d] dma_map_sg(sg_aad) error\n", ++ __func__, __LINE__); ++ ret = -ENOMEM; ++ goto error; ++ } + -+static int crypto_dma_start(struct rk_crypto_dev *rk_dev, uint32_t flag) -+{ -+ struct rk_hw_crypto_v2_info *hw_info = -+ (struct rk_hw_crypto_v2_info *)rk_dev->hw_info; -+ struct skcipher_request *req = -+ skcipher_request_cast(rk_dev->async_req); -+ struct rk_alg_ctx *alg_ctx = rk_cipher_alg_ctx(rk_dev); -+ struct crypto_lli_desc *lli_head, *lli_tail, *lli_aad; -+ u32 calc_len = alg_ctx->count; -+ u32 start_flag = CRYPTO_DMA_START; -+ int ret; ++ alg_ctx->addr_aad_in = sg_dma_address(&alg_ctx->sg_aad); + -+ if (alg_ctx->aligned) -+ ret = rk_crypto_hw_desc_init(&hw_info->hw_desc, -+ alg_ctx->sg_src, alg_ctx->sg_dst, alg_ctx->count); -+ else -+ ret = rk_crypto_hw_desc_init(&hw_info->hw_desc, -+ &alg_ctx->sg_tmp, &alg_ctx->sg_tmp, alg_ctx->count); -+ if (ret) -+ return ret; ++ /* point sg_src and sg_dst skip assoc data */ ++ sg_src = scatterwalk_ffwd(rk_dev->src, alg_ctx->req_src, ++ alg_ctx->assoclen); ++ sg_dst = (alg_ctx->req_src == alg_ctx->req_dst) ? sg_src : ++ scatterwalk_ffwd(rk_dev->dst, alg_ctx->req_dst, ++ alg_ctx->assoclen); + -+ lli_head = hw_info->hw_desc.lli_head; -+ lli_tail = hw_info->hw_desc.lli_tail; -+ lli_aad = hw_info->hw_desc.lli_aad; ++ alg_ctx->sg_src = sg_src; ++ alg_ctx->sg_dst = sg_dst; ++ src_nents = sg_nents_for_len(sg_src, alg_ctx->total); ++ dst_nents = sg_nents_for_len(sg_dst, alg_ctx->total); + -+ /* -+ * the data length is not aligned will use addr_vir to calculate, -+ * so crypto v2 could round up data length to chunk_size -+ */ -+ if (!alg_ctx->is_aead && is_calc_need_round_up(req)) -+ calc_len = round_up(calc_len, alg_ctx->chunk_size); ++ CRYPTO_TRACE("src_nents = %u, dst_nents = %u", src_nents, dst_nents); ++ } + -+ CRYPTO_TRACE("calc_len = %u, cryptlen = %u, assoclen= %u, is_aead = %d", -+ calc_len, alg_ctx->total, alg_ctx->assoclen, alg_ctx->is_aead); ++ if (alg_ctx->left_bytes == alg_ctx->total) { ++ alg_ctx->aligned = rk_crypto_check_align(sg_src, src_nents, sg_dst, dst_nents, ++ alg_ctx->align_size); ++ alg_ctx->is_dma = rk_crypto_check_dmafd(sg_src, src_nents) && ++ rk_crypto_check_dmafd(sg_dst, dst_nents); ++ } + -+ lli_head->user_define = LLI_USER_STRING_START | LLI_USER_CIPHER_START; ++ CRYPTO_TRACE("aligned = %d, is_dma = %d, total = %u, left_bytes = %u, assoclen = %u\n", ++ alg_ctx->aligned, alg_ctx->is_dma, alg_ctx->total, ++ alg_ctx->left_bytes, alg_ctx->assoclen); + -+ lli_tail->dma_ctrl = LLI_DMA_CTRL_DST_DONE | LLI_DMA_CTRL_LAST; -+ lli_tail->user_define |= LLI_USER_STRING_LAST; -+ lli_tail->src_len += (calc_len - alg_ctx->count); -+ lli_tail->dst_len += (calc_len - alg_ctx->count); ++ if (alg_ctx->aligned) { ++ u32 nents; + -+ if (alg_ctx->is_aead) { -+ lli_aad->src_addr = alg_ctx->addr_aad_in; -+ lli_aad->src_len = alg_ctx->assoclen; -+ lli_aad->user_define = LLI_USER_CIPHER_START | -+ LLI_USER_STRING_START | -+ LLI_USER_STRING_LAST | -+ LLI_USER_STRING_AAD; -+ lli_aad->next_addr = hw_info->hw_desc.lli_head_dma; ++ if (rk_dev->soc_data->use_lli_chain) { ++ count = rk_crypto_hw_desc_maxlen(sg_src, alg_ctx->left_bytes, &nents); ++ } else { ++ nents = 1; ++ count = min_t(unsigned int, alg_ctx->left_bytes, sg_src->length); ++ } + -+ /* clear cipher start */ -+ lli_head->user_define &= (~((u32)LLI_USER_CIPHER_START)); ++ alg_ctx->map_nents = nents; ++ alg_ctx->left_bytes -= count; + -+ set_pc_len_reg(rk_dev, alg_ctx->total); -+ set_aad_len_reg(rk_dev, alg_ctx->assoclen); -+ } ++ if (!alg_ctx->is_dma && !dma_map_sg(dev, sg_src, nents, DMA_TO_DEVICE)) { ++ dev_err(dev, "[%s:%d] dma_map_sg(src) error\n", ++ __func__, __LINE__); ++ ret = -EINVAL; ++ goto error; ++ } ++ alg_ctx->addr_in = sg_dma_address(sg_src); + -+ rk_crypto_dump_hw_desc(&hw_info->hw_desc); ++ if (sg_dst) { ++ if (!alg_ctx->is_dma && !dma_map_sg(dev, sg_dst, nents, DMA_FROM_DEVICE)) { ++ dev_err(dev, ++ "[%s:%d] dma_map_sg(dst) error\n", ++ __func__, __LINE__); ++ dma_unmap_sg(dev, sg_src, 1, ++ DMA_TO_DEVICE); ++ ret = -EINVAL; ++ goto error; ++ } ++ alg_ctx->addr_out = sg_dma_address(sg_dst); ++ } ++ } else { ++ alg_ctx->map_nents = 1; + -+ dma_wmb(); ++ count = (alg_ctx->left_bytes > rk_dev->vir_max) ? ++ rk_dev->vir_max : alg_ctx->left_bytes; + -+ if (alg_ctx->is_aead) -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_LLI_ADDR, hw_info->hw_desc.lli_aad_dma); -+ else -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_LLI_ADDR, hw_info->hw_desc.lli_head_dma); ++ if (!sg_pcopy_to_buffer(alg_ctx->req_src, alg_ctx->src_nents, ++ rk_dev->addr_vir, count, ++ alg_ctx->assoclen + alg_ctx->total - alg_ctx->left_bytes)) { ++ dev_err(dev, "[%s:%d] pcopy err\n", ++ __func__, __LINE__); ++ ret = -EINVAL; ++ goto error; ++ } ++ alg_ctx->left_bytes -= count; ++ sg_init_one(&alg_ctx->sg_tmp, rk_dev->addr_vir, count); ++ if (!dma_map_sg(dev, &alg_ctx->sg_tmp, 1, DMA_TO_DEVICE)) { ++ dev_err(dev, "[%s:%d] dma_map_sg(sg_tmp) error\n", ++ __func__, __LINE__); ++ ret = -ENOMEM; ++ goto error; ++ } ++ alg_ctx->addr_in = sg_dma_address(&alg_ctx->sg_tmp); + -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_CTL, start_flag | (start_flag << WRITE_MASK)); ++ if (sg_dst) { ++ if (!dma_map_sg(dev, &alg_ctx->sg_tmp, 1, ++ DMA_FROM_DEVICE)) { ++ dev_err(dev, ++ "[%s:%d] dma_map_sg(sg_tmp) error\n", ++ __func__, __LINE__); ++ dma_unmap_sg(dev, &alg_ctx->sg_tmp, 1, ++ DMA_TO_DEVICE); ++ ret = -ENOMEM; ++ goto error; ++ } ++ alg_ctx->addr_out = sg_dma_address(&alg_ctx->sg_tmp); ++ } ++ } + ++ alg_ctx->count = count; + return 0; ++error: ++ return ret; +} + -+static int rk_ablk_init_tfm(struct crypto_skcipher *tfm) ++static int rk_unload_data(struct rk_crypto_dev *rk_dev) +{ -+ struct rk_crypto_algt *algt = rk_cipher_get_algt(tfm); -+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); -+ const char *alg_name = crypto_tfm_alg_name(crypto_skcipher_tfm(tfm)); -+ struct rk_crypto_dev *rk_dev = algt->rk_dev; -+ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; ++ int ret = 0; ++ struct scatterlist *sg_in, *sg_out; ++ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev->async_req); ++ u32 nents; + -+ CRYPTO_TRACE(); ++ CRYPTO_TRACE("aligned = %d, total = %u, left_bytes = %u\n", ++ alg_ctx->aligned, alg_ctx->total, alg_ctx->left_bytes); + -+ memset(ctx, 0x00, sizeof(*ctx)); ++ /* 0 data input just do nothing */ ++ if (alg_ctx->total == 0 || alg_ctx->count == 0) ++ return 0; + -+ if (!rk_dev->request_crypto) -+ return -EFAULT; ++ nents = alg_ctx->map_nents; + -+ rk_dev->request_crypto(rk_dev, alg_name); ++ sg_in = alg_ctx->aligned ? alg_ctx->sg_src : &alg_ctx->sg_tmp; + -+ /* always not aligned for crypto v2 cipher */ -+ alg_ctx->align_size = 64; -+ alg_ctx->chunk_size = crypto_skcipher_chunksize(tfm); ++ /* only is dma buffer and aligned will skip unmap */ ++ if (!alg_ctx->is_dma || !alg_ctx->aligned) ++ dma_unmap_sg(rk_dev->dev, sg_in, nents, DMA_TO_DEVICE); + -+ alg_ctx->ops.start = rk_ablk_start; -+ alg_ctx->ops.update = rk_ablk_rx; -+ alg_ctx->ops.complete = rk_crypto_complete; -+ alg_ctx->ops.irq_handle = rk_crypto_irq_handle; ++ if (alg_ctx->sg_dst) { ++ sg_out = alg_ctx->aligned ? alg_ctx->sg_dst : &alg_ctx->sg_tmp; + -+ alg_ctx->ops.hw_init = rk_ablk_hw_init; -+ alg_ctx->ops.hw_dma_start = crypto_dma_start; -+ alg_ctx->ops.hw_write_iv = set_iv_reg; ++ /* only is dma buffer and aligned will skip unmap */ ++ if (!alg_ctx->is_dma || !alg_ctx->aligned) ++ dma_unmap_sg(rk_dev->dev, sg_out, nents, DMA_FROM_DEVICE); ++ } + -+ ctx->rk_dev = rk_dev; ++ if (!alg_ctx->aligned && alg_ctx->req_dst) { ++ if (!sg_pcopy_from_buffer(alg_ctx->req_dst, alg_ctx->dst_nents, ++ rk_dev->addr_vir, alg_ctx->count, ++ alg_ctx->total - alg_ctx->left_bytes - ++ alg_ctx->count + alg_ctx->assoclen)) { ++ ret = -EINVAL; ++ goto exit; ++ } ++ } + -+ if (algt->alg.crypto.base.cra_flags & CRYPTO_ALG_NEED_FALLBACK) { -+ CRYPTO_MSG("alloc fallback tfm, name = %s", alg_name); -+ ctx->fallback_tfm = crypto_alloc_skcipher(alg_name, 0, -+ CRYPTO_ALG_ASYNC | -+ CRYPTO_ALG_NEED_FALLBACK); -+ if (IS_ERR(ctx->fallback_tfm)) { -+ CRYPTO_MSG("Could not load fallback driver %s : %ld.\n", -+ alg_name, PTR_ERR(ctx->fallback_tfm)); -+ ctx->fallback_tfm = NULL; ++ if (alg_ctx->assoclen) { ++ dma_unmap_sg(rk_dev->dev, &alg_ctx->sg_aad, 1, DMA_TO_DEVICE); ++ ++ /* copy assoc data to dst */ ++ if (!sg_pcopy_from_buffer(alg_ctx->req_dst, sg_nents(alg_ctx->req_dst), ++ rk_dev->addr_aad, alg_ctx->assoclen, 0)) { ++ ret = -EINVAL; ++ goto exit; + } + } ++exit: ++ return ret; ++} + -+ return 0; ++static void start_irq_timer(struct rk_crypto_dev *rk_dev) ++{ ++ mod_timer(&rk_dev->timer, jiffies + msecs_to_jiffies(3000)); +} + -+static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm) ++/* use timer to avoid crypto irq timeout */ ++static void rk_crypto_irq_timer_handle(struct timer_list *t) +{ -+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); -+ const char *alg_name = crypto_tfm_alg_name(crypto_skcipher_tfm(tfm)); ++ struct rk_crypto_dev *rk_dev = from_timer(rk_dev, t, timer); ++ unsigned long flags; + -+ CRYPTO_TRACE(); ++ spin_lock_irqsave(&rk_dev->lock, flags); + -+ if (ctx->fallback_tfm) { -+ CRYPTO_MSG("free fallback tfm"); -+ crypto_free_skcipher(ctx->fallback_tfm); -+ } ++ rk_dev->err = -ETIMEDOUT; ++ rk_dev->stat.timeout_cnt++; + -+ ctx->rk_dev->release_crypto(ctx->rk_dev, alg_name); -+} ++ rk_unload_data(rk_dev); + -+static int rk_aead_init_tfm(struct crypto_aead *tfm) -+{ -+ struct aead_alg *alg = crypto_aead_alg(tfm); -+ struct rk_crypto_algt *algt = -+ container_of(alg, struct rk_crypto_algt, alg.aead); -+ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(&tfm->base); -+ const char *alg_name = crypto_tfm_alg_name(&tfm->base); -+ struct rk_crypto_dev *rk_dev = algt->rk_dev; -+ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; ++ spin_unlock_irqrestore(&rk_dev->lock, flags); + -+ CRYPTO_TRACE(); ++ tasklet_schedule(&rk_dev->done_task); ++} + -+ if (!rk_dev->request_crypto) -+ return -EFAULT; ++static irqreturn_t rk_crypto_irq_handle(int irq, void *dev_id) ++{ ++ struct rk_crypto_dev *rk_dev = platform_get_drvdata(dev_id); ++ struct rk_alg_ctx *alg_ctx; ++ unsigned long flags; + -+ rk_dev->request_crypto(rk_dev, alg_name); ++ spin_lock_irqsave(&rk_dev->lock, flags); + -+ alg_ctx->align_size = 64; -+ alg_ctx->chunk_size = crypto_aead_chunksize(tfm); ++ /* reset timeout timer */ ++ start_irq_timer(rk_dev); + -+ alg_ctx->ops.start = rk_aead_start; -+ alg_ctx->ops.update = rk_ablk_rx; -+ alg_ctx->ops.complete = rk_crypto_complete; -+ alg_ctx->ops.irq_handle = rk_crypto_irq_handle; ++ alg_ctx = rk_alg_ctx_cast(rk_dev->async_req); + -+ alg_ctx->ops.hw_init = rk_ablk_hw_init; -+ alg_ctx->ops.hw_dma_start = crypto_dma_start; -+ alg_ctx->ops.hw_write_iv = set_iv_reg; -+ alg_ctx->ops.hw_get_result = get_tag_reg; ++ rk_dev->stat.irq_cnt++; + -+ ctx->rk_dev = rk_dev; -+ alg_ctx->is_aead = 1; ++ if (alg_ctx->ops.irq_handle) ++ alg_ctx->ops.irq_handle(irq, dev_id); + -+ if (algt->alg.crypto.base.cra_flags & CRYPTO_ALG_NEED_FALLBACK) { -+ CRYPTO_MSG("alloc fallback tfm, name = %s", alg_name); -+ ctx->fallback_aead = -+ crypto_alloc_aead(alg_name, 0, -+ CRYPTO_ALG_ASYNC | -+ CRYPTO_ALG_NEED_FALLBACK); -+ if (IS_ERR(ctx->fallback_aead)) { -+ dev_err(rk_dev->dev, -+ "Load fallback driver %s err: %ld.\n", -+ alg_name, PTR_ERR(ctx->fallback_aead)); -+ ctx->fallback_aead = NULL; -+ crypto_aead_set_reqsize(tfm, sizeof(struct aead_request)); -+ } else { -+ crypto_aead_set_reqsize(tfm, sizeof(struct aead_request) + -+ crypto_aead_reqsize(ctx->fallback_aead)); -+ } ++ /* already trigger timeout */ ++ if (rk_dev->err != -ETIMEDOUT) { ++ spin_unlock_irqrestore(&rk_dev->lock, flags); ++ tasklet_schedule(&rk_dev->done_task); ++ } else { ++ spin_unlock_irqrestore(&rk_dev->lock, flags); + } + -+ return 0; ++ return IRQ_HANDLED; +} + -+static void rk_aead_exit_tfm(struct crypto_aead *tfm) ++static int rk_start_op(struct rk_crypto_dev *rk_dev) +{ -+ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(&tfm->base); ++ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev->async_req); ++ int ret; + -+ CRYPTO_TRACE(); ++ if (!alg_ctx || !alg_ctx->ops.start) ++ return -EINVAL; + -+ if (ctx->fallback_aead) { -+ CRYPTO_MSG("free fallback tfm"); -+ crypto_free_aead(ctx->fallback_aead); ++ alg_ctx->aligned = false; ++ ++ enable_irq(rk_dev->irq); ++ start_irq_timer(rk_dev); ++ ++ ret = alg_ctx->ops.start(rk_dev); ++ if (ret) ++ return ret; ++ ++ /* fake calculations are used to trigger the Done Task */ ++ if (alg_ctx->total == 0) { ++ CRYPTO_TRACE("fake done_task"); ++ rk_dev->stat.fake_cnt++; ++ tasklet_schedule(&rk_dev->done_task); + } + -+ ctx->rk_dev->release_crypto(ctx->rk_dev, crypto_tfm_alg_name(&tfm->base)); ++ return 0; +} + -+static int rk_aead_crypt(struct aead_request *req, bool encrypt) ++static int rk_update_op(struct rk_crypto_dev *rk_dev) +{ -+ struct crypto_aead *tfm = crypto_aead_reqtfm(req); -+ struct rk_cipher_ctx *ctx = crypto_aead_ctx(tfm); -+ struct rk_crypto_algt *algt = rk_aead_get_algt(tfm); -+ struct scatterlist *sg_src, *sg_dst; -+ struct scatterlist src[2], dst[2]; -+ u64 data_len; -+ bool aligned; -+ int ret = -EINVAL; -+ -+ CRYPTO_TRACE("%s cryptlen = %u, assoclen = %u", -+ encrypt ? "encrypt" : "decrypt", -+ req->cryptlen, req->assoclen); ++ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev->async_req); + -+ data_len = encrypt ? req->cryptlen : (req->cryptlen - crypto_aead_authsize(tfm)); ++ if (!alg_ctx || !alg_ctx->ops.update) ++ return -EINVAL; + -+ if (req->assoclen == 0 || -+ req->cryptlen == 0 || -+ data_len == 0 || -+ is_force_fallback(algt, ctx->keylen)) -+ return rk_aead_fallback(req, ctx, encrypt); ++ return alg_ctx->ops.update(rk_dev); ++} + -+ /* point sg_src and sg_dst skip assoc data */ -+ sg_src = scatterwalk_ffwd(src, req->src, req->assoclen); -+ sg_dst = (req->src == req->dst) ? sg_src : scatterwalk_ffwd(dst, req->dst, req->assoclen); ++static void rk_complete_op(struct rk_crypto_dev *rk_dev, int err) ++{ ++ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev->async_req); + -+ aligned = rk_crypto_check_align(sg_src, sg_nents_for_len(sg_src, data_len), -+ sg_dst, sg_nents_for_len(sg_dst, data_len), -+ 64); ++ disable_irq(rk_dev->irq); ++ del_timer(&rk_dev->timer); + -+ if (sg_nents_for_len(sg_src, data_len) > RK_DEFAULT_LLI_CNT || -+ sg_nents_for_len(sg_dst, data_len) > RK_DEFAULT_LLI_CNT) -+ return rk_aead_fallback(req, ctx, encrypt); ++ rk_dev->stat.complete_cnt++; + -+ if (!aligned) { -+ if (req->assoclen > ctx->rk_dev->aad_max || -+ data_len > ctx->rk_dev->vir_max) -+ return rk_aead_fallback(req, ctx, encrypt); ++ if (err) { ++ rk_dev->stat.error_cnt++; ++ rk_dev->stat.last_error = err; ++ dev_err(rk_dev->dev, "complete_op err = %d\n", err); + } + -+ ctx->mode = cipher_algo2bc[algt->algo] | -+ cipher_mode2bc[algt->mode]; -+ if (!encrypt) -+ ctx->mode |= CRYPTO_BC_DECRYPT; ++ if (!alg_ctx || !alg_ctx->ops.complete) ++ return; + -+ if (algt->algo == CIPHER_ALGO_AES) { -+ if (ctx->keylen == AES_KEYSIZE_128) -+ ctx->mode |= CRYPTO_BC_128_bit_key; -+ else if (ctx->keylen == AES_KEYSIZE_192) -+ ctx->mode |= CRYPTO_BC_192_bit_key; -+ else if (ctx->keylen == AES_KEYSIZE_256) -+ ctx->mode |= CRYPTO_BC_256_bit_key; -+ } ++ alg_ctx->ops.complete(rk_dev->async_req, err); + -+ ctx->iv_len = crypto_aead_ivsize(tfm); ++ rk_dev->async_req = NULL; + -+ memset(ctx->iv, 0x00, sizeof(ctx->iv)); -+ memcpy(ctx->iv, req->iv, ctx->iv_len); ++ tasklet_schedule(&rk_dev->queue_task); ++} + -+ ctx->is_enc = encrypt; ++static int rk_crypto_enqueue(struct rk_crypto_dev *rk_dev, ++ struct crypto_async_request *async_req) ++{ ++ unsigned long flags; ++ int ret; + -+ CRYPTO_MSG("ctx->mode = %x\n", ctx->mode); -+ ret = rk_aead_handle_req(ctx->rk_dev, req); ++ spin_lock_irqsave(&rk_dev->lock, flags); ++ ret = crypto_enqueue_request(&rk_dev->queue, async_req); ++ ++ if (rk_dev->queue.qlen > rk_dev->stat.ever_queue_max) ++ rk_dev->stat.ever_queue_max = rk_dev->queue.qlen; ++ ++ if (rk_dev->busy) { ++ rk_dev->stat.busy_cnt++; ++ spin_unlock_irqrestore(&rk_dev->lock, flags); ++ return ret; ++ } ++ ++ rk_dev->stat.equeue_cnt++; ++ rk_dev->busy = true; ++ spin_unlock_irqrestore(&rk_dev->lock, flags); ++ tasklet_schedule(&rk_dev->queue_task); + + return ret; +} + -+static int rk_aead_encrypt(struct aead_request *req) ++static void rk_crypto_queue_task_cb(unsigned long data) +{ -+ return rk_aead_crypt(req, true); -+} ++ struct rk_crypto_dev *rk_dev = (struct rk_crypto_dev *)data; ++ struct crypto_async_request *async_req, *backlog; ++ unsigned long flags; + -+static int rk_aead_decrypt(struct aead_request *req) -+{ -+ return rk_aead_crypt(req, false); -+} ++ spin_lock_irqsave(&rk_dev->lock, flags); ++ if (rk_dev->async_req) { ++ dev_err(rk_dev->dev, "%s: Unexpected crypto paths.\n", __func__); ++ goto exit; ++ } + -+struct rk_crypto_algt rk_v2_ecb_sm4_alg = -+ RK_CIPHER_ALGO_INIT(SM4, ECB, ecb(sm4), ecb-sm4-rk); ++ rk_dev->err = 0; + -+struct rk_crypto_algt rk_v2_cbc_sm4_alg = -+ RK_CIPHER_ALGO_INIT(SM4, CBC, cbc(sm4), cbc-sm4-rk); ++ backlog = crypto_get_backlog(&rk_dev->queue); ++ async_req = crypto_dequeue_request(&rk_dev->queue); + -+struct rk_crypto_algt rk_v2_xts_sm4_alg = -+ RK_CIPHER_ALGO_XTS_INIT(SM4, xts(sm4), xts-sm4-rk); ++ if (!async_req) { ++ rk_dev->busy = false; ++ goto exit; ++ } ++ rk_dev->stat.dequeue_cnt++; + -+struct rk_crypto_algt rk_v2_cfb_sm4_alg = -+ RK_CIPHER_ALGO_INIT(SM4, CFB, cfb(sm4), cfb-sm4-rk); ++ if (backlog) { ++ backlog->complete(backlog, -EINPROGRESS); ++ backlog = NULL; ++ } + -+struct rk_crypto_algt rk_v2_ofb_sm4_alg = -+ RK_CIPHER_ALGO_INIT(SM4, OFB, ofb(sm4), ofb-sm4-rk); ++ rk_dev->async_req = async_req; ++ rk_dev->err = rk_start_op(rk_dev); ++ if (rk_dev->err) ++ rk_complete_op(rk_dev, rk_dev->err); + -+struct rk_crypto_algt rk_v2_ctr_sm4_alg = -+ RK_CIPHER_ALGO_INIT(SM4, CTR, ctr(sm4), ctr-sm4-rk); ++exit: ++ spin_unlock_irqrestore(&rk_dev->lock, flags); ++} + -+struct rk_crypto_algt rk_v2_gcm_sm4_alg = -+ RK_AEAD_ALGO_INIT(SM4, GCM, gcm(sm4), gcm-sm4-rk); ++static void rk_crypto_done_task_cb(unsigned long data) ++{ ++ struct rk_crypto_dev *rk_dev = (struct rk_crypto_dev *)data; ++ struct rk_alg_ctx *alg_ctx; ++ unsigned long flags; + -+struct rk_crypto_algt rk_v2_ecb_aes_alg = -+ RK_CIPHER_ALGO_INIT(AES, ECB, ecb(aes), ecb-aes-rk); ++ spin_lock_irqsave(&rk_dev->lock, flags); + -+struct rk_crypto_algt rk_v2_cbc_aes_alg = -+ RK_CIPHER_ALGO_INIT(AES, CBC, cbc(aes), cbc-aes-rk); ++ if (!rk_dev->async_req) { ++ dev_err(rk_dev->dev, "done task receive invalid async_req\n"); ++ spin_unlock_irqrestore(&rk_dev->lock, flags); ++ return; ++ } + -+struct rk_crypto_algt rk_v2_xts_aes_alg = -+ RK_CIPHER_ALGO_XTS_INIT(AES, xts(aes), xts-aes-rk); ++ alg_ctx = rk_alg_ctx_cast(rk_dev->async_req); + -+struct rk_crypto_algt rk_v2_cfb_aes_alg = -+ RK_CIPHER_ALGO_INIT(AES, CFB, cfb(aes), cfb-aes-rk); ++ rk_dev->stat.done_cnt++; + -+struct rk_crypto_algt rk_v2_ofb_aes_alg = -+ RK_CIPHER_ALGO_INIT(AES, OFB, ofb(aes), ofb-aes-rk); ++ if (rk_dev->err) ++ goto exit; + -+struct rk_crypto_algt rk_v2_ctr_aes_alg = -+ RK_CIPHER_ALGO_INIT(AES, CTR, ctr(aes), ctr-aes-rk); ++ if (alg_ctx->left_bytes == 0) { ++ CRYPTO_TRACE("done task cb last calc"); ++ /* unload data for last calculation */ ++ rk_dev->err = rk_update_op(rk_dev); ++ goto exit; ++ } + -+struct rk_crypto_algt rk_v2_gcm_aes_alg = -+ RK_AEAD_ALGO_INIT(AES, GCM, gcm(aes), gcm-aes-rk); ++ rk_dev->err = rk_update_op(rk_dev); ++ if (rk_dev->err) ++ goto exit; + -+struct rk_crypto_algt rk_v2_ecb_des_alg = -+ RK_CIPHER_ALGO_INIT(DES, ECB, ecb(des), ecb-des-rk); ++ spin_unlock_irqrestore(&rk_dev->lock, flags); + -+struct rk_crypto_algt rk_v2_cbc_des_alg = -+ RK_CIPHER_ALGO_INIT(DES, CBC, cbc(des), cbc-des-rk); ++ return; ++exit: ++ rk_complete_op(rk_dev, rk_dev->err); ++ spin_unlock_irqrestore(&rk_dev->lock, flags); ++} + -+struct rk_crypto_algt rk_v2_cfb_des_alg = -+ RK_CIPHER_ALGO_INIT(DES, CFB, cfb(des), cfb-des-rk); ++static struct rk_crypto_algt *rk_crypto_find_algs(struct rk_crypto_dev *rk_dev, ++ char *name) ++{ ++ u32 i; ++ struct rk_crypto_algt **algs; ++ struct rk_crypto_algt *tmp_algs; ++ uint32_t total_algs_num = 0; + -+struct rk_crypto_algt rk_v2_ofb_des_alg = -+ RK_CIPHER_ALGO_INIT(DES, OFB, ofb(des), ofb-des-rk); ++ algs = rk_dev->soc_data->hw_get_algts(&total_algs_num); ++ if (!algs || total_algs_num == 0) ++ return NULL; + -+struct rk_crypto_algt rk_v2_ecb_des3_ede_alg = -+ RK_CIPHER_ALGO_INIT(DES3_EDE, ECB, ecb(des3_ede), ecb-des3_ede-rk); ++ for (i = 0; i < total_algs_num; i++, algs++) { ++ tmp_algs = *algs; ++ tmp_algs->rk_dev = rk_dev; + -+struct rk_crypto_algt rk_v2_cbc_des3_ede_alg = -+ RK_CIPHER_ALGO_INIT(DES3_EDE, CBC, cbc(des3_ede), cbc-des3_ede-rk); ++ if (strcmp(tmp_algs->name, name) == 0) ++ return tmp_algs; ++ } + -+struct rk_crypto_algt rk_v2_cfb_des3_ede_alg = -+ RK_CIPHER_ALGO_INIT(DES3_EDE, CFB, cfb(des3_ede), cfb-des3_ede-rk); ++ return NULL; ++} + -+struct rk_crypto_algt rk_v2_ofb_des3_ede_alg = -+ RK_CIPHER_ALGO_INIT(DES3_EDE, OFB, ofb(des3_ede), ofb-des3_ede-rk); ++static int rk_crypto_register(struct rk_crypto_dev *rk_dev) ++{ ++ unsigned int i, k; ++ char **algs_name; ++ struct rk_crypto_algt *tmp_algs; ++ struct rk_crypto_soc_data *soc_data; ++ int err = 0; + -diff --git a/drivers/crypto/rockchip/rk_crypto_v3.c b/drivers/crypto/rockchip/rk_crypto_v3.c -new file mode 100644 -index 000000000..7cd728599 ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_v3.c -@@ -0,0 +1,217 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Crypto acceleration support for Rockchip Crypto V3 -+ * -+ * Copyright (c) 2022, Rockchip Electronics Co., Ltd -+ * -+ * Author: Lin Jinhan -+ * -+ */ ++ soc_data = rk_dev->soc_data; + -+#include "rk_crypto_core.h" -+#include "rk_crypto_v3.h" -+#include "rk_crypto_v3_reg.h" -+#include "rk_crypto_utils.h" ++ algs_name = soc_data->valid_algs_name; + -+static const u32 cipher_mode2bit_mask[] = { -+ [CIPHER_MODE_ECB] = CRYPTO_ECB_FLAG, -+ [CIPHER_MODE_CBC] = CRYPTO_CBC_FLAG, -+ [CIPHER_MODE_CFB] = CRYPTO_CFB_FLAG, -+ [CIPHER_MODE_OFB] = CRYPTO_OFB_FLAG, -+ [CIPHER_MODE_CTR] = CRYPTO_CTR_FLAG, -+ [CIPHER_MODE_XTS] = CRYPTO_XTS_FLAG, -+ [CIPHER_MODE_CTS] = CRYPTO_CTS_FLAG, -+ [CIPHER_MODE_CCM] = CRYPTO_CCM_FLAG, -+ [CIPHER_MODE_GCM] = CRYPTO_GCM_FLAG, -+ [CIPHER_MODE_CMAC] = CRYPTO_CMAC_FLAG, -+ [CIPHER_MODE_CBCMAC] = CRYPTO_CBCMAC_FLAG, -+}; ++ rk_dev->request_crypto(rk_dev, __func__); + -+static const u32 hash_algo2bit_mask[] = { -+ [HASH_ALGO_SHA1] = CRYPTO_HASH_SHA1_FLAG, -+ [HASH_ALGO_SHA224] = CRYPTO_HASH_SHA224_FLAG, -+ [HASH_ALGO_SHA256] = CRYPTO_HASH_SHA256_FLAG, -+ [HASH_ALGO_SHA384] = CRYPTO_HASH_SHA384_FLAG, -+ [HASH_ALGO_SHA512] = CRYPTO_HASH_SHA512_FLAG, -+ [HASH_ALGO_SHA512_224] = CRYPTO_HASH_SHA512_224_FLAG, -+ [HASH_ALGO_SHA512_256] = CRYPTO_HASH_SHA512_256_FLAG, -+ [HASH_ALGO_MD5] = CRYPTO_HASH_MD5_FLAG, -+ [HASH_ALGO_SM3] = CRYPTO_HASH_SM3_FLAG, -+}; ++ for (i = 0; i < soc_data->valid_algs_num; i++, algs_name++) { ++ tmp_algs = rk_crypto_find_algs(rk_dev, *algs_name); ++ if (!tmp_algs) { ++ CRYPTO_TRACE("%s not matched!!!\n", *algs_name); ++ continue; ++ } + -+static const u32 hmac_algo2bit_mask[] = { -+ [HASH_ALGO_SHA1] = CRYPTO_HMAC_SHA1_FLAG, -+ [HASH_ALGO_SHA256] = CRYPTO_HMAC_SHA256_FLAG, -+ [HASH_ALGO_SHA512] = CRYPTO_HMAC_SHA512_FLAG, -+ [HASH_ALGO_MD5] = CRYPTO_HMAC_MD5_FLAG, -+ [HASH_ALGO_SM3] = CRYPTO_HMAC_SM3_FLAG, -+}; ++ if (soc_data->hw_is_algo_valid && !soc_data->hw_is_algo_valid(rk_dev, tmp_algs)) { ++ CRYPTO_TRACE("%s skipped!!!\n", *algs_name); ++ continue; ++ } + -+static const char * const crypto_v3_rsts[] = { -+ "crypto-rst", -+}; ++ CRYPTO_TRACE("%s matched!!!\n", *algs_name); + -+static struct rk_crypto_algt *crypto_v3_algs[] = { -+ &rk_v3_ecb_sm4_alg, /* ecb(sm4) */ -+ &rk_v3_cbc_sm4_alg, /* cbc(sm4) */ -+ &rk_v3_xts_sm4_alg, /* xts(sm4) */ -+ &rk_v3_cfb_sm4_alg, /* cfb(sm4) */ -+ &rk_v3_ofb_sm4_alg, /* ofb(sm4) */ -+ &rk_v3_ctr_sm4_alg, /* ctr(sm4) */ -+ &rk_v3_gcm_sm4_alg, /* ctr(sm4) */ ++ tmp_algs->rk_dev = rk_dev; + -+ &rk_v3_ecb_aes_alg, /* ecb(aes) */ -+ &rk_v3_cbc_aes_alg, /* cbc(aes) */ -+ &rk_v3_xts_aes_alg, /* xts(aes) */ -+ &rk_v3_cfb_aes_alg, /* cfb(aes) */ -+ &rk_v3_ofb_aes_alg, /* ofb(aes) */ -+ &rk_v3_ctr_aes_alg, /* ctr(aes) */ -+ &rk_v3_gcm_aes_alg, /* gcm(aes) */ ++ if (tmp_algs->type == ALG_TYPE_CIPHER) { ++ if (tmp_algs->mode == CIPHER_MODE_CTR || ++ tmp_algs->mode == CIPHER_MODE_CFB || ++ tmp_algs->mode == CIPHER_MODE_OFB) ++ tmp_algs->alg.crypto.base.cra_blocksize = 1; + -+ &rk_v3_ecb_des_alg, /* ecb(des) */ -+ &rk_v3_cbc_des_alg, /* cbc(des) */ -+ &rk_v3_cfb_des_alg, /* cfb(des) */ -+ &rk_v3_ofb_des_alg, /* ofb(des) */ ++ if (tmp_algs->mode == CIPHER_MODE_ECB) ++ tmp_algs->alg.crypto.ivsize = 0; + -+ &rk_v3_ecb_des3_ede_alg, /* ecb(des3_ede) */ -+ &rk_v3_cbc_des3_ede_alg, /* cbc(des3_ede) */ -+ &rk_v3_cfb_des3_ede_alg, /* cfb(des3_ede) */ -+ &rk_v3_ofb_des3_ede_alg, /* ofb(des3_ede) */ ++ /* rv1126 is not support aes192 */ ++ if (soc_data->use_soft_aes192 && ++ tmp_algs->algo == CIPHER_ALGO_AES) ++ tmp_algs->use_soft_aes192 = true; + -+ &rk_v3_ahash_sha1, /* sha1 */ -+ &rk_v3_ahash_sha224, /* sha224 */ -+ &rk_v3_ahash_sha256, /* sha256 */ -+ &rk_v3_ahash_sha384, /* sha384 */ -+ &rk_v3_ahash_sha512, /* sha512 */ -+ &rk_v3_ahash_md5, /* md5 */ -+ &rk_v3_ahash_sm3, /* sm3 */ ++ err = crypto_register_skcipher(&tmp_algs->alg.crypto); ++ } else if (tmp_algs->type == ALG_TYPE_HASH || tmp_algs->type == ALG_TYPE_HMAC) { ++ err = crypto_register_ahash(&tmp_algs->alg.hash); ++ } else if (tmp_algs->type == ALG_TYPE_ASYM) { ++ err = crypto_register_akcipher(&tmp_algs->alg.asym); ++ } else if (tmp_algs->type == ALG_TYPE_AEAD) { ++ if (soc_data->use_soft_aes192 && ++ tmp_algs->algo == CIPHER_ALGO_AES) ++ tmp_algs->use_soft_aes192 = true; ++ err = crypto_register_aead(&tmp_algs->alg.aead); ++ } else { ++ continue; ++ } + -+ &rk_v3_hmac_sha1, /* hmac(sha1) */ -+ &rk_v3_hmac_sha256, /* hmac(sha256) */ -+ &rk_v3_hmac_sha512, /* hmac(sha512) */ -+ &rk_v3_hmac_md5, /* hmac(md5) */ -+ &rk_v3_hmac_sm3, /* hmac(sm3) */ ++ if (err) ++ goto err_cipher_algs; + -+ /* Shared v2 version implementation */ -+ &rk_v2_asym_rsa, /* rsa */ -+}; ++ tmp_algs->valid_flag = true; + -+static bool rk_is_cipher_support(struct rk_crypto_dev *rk_dev, u32 algo, u32 mode, u32 key_len) -+{ -+ u32 version = 0; -+ u32 mask = 0; -+ bool key_len_valid = true; ++ CRYPTO_TRACE("%s register OK!!!\n", *algs_name); ++ } + -+ switch (algo) { -+ case CIPHER_ALGO_DES: -+ case CIPHER_ALGO_DES3_EDE: -+ version = CRYPTO_READ(rk_dev, CRYPTO_DES_VERSION); ++ rk_dev->release_crypto(rk_dev, __func__); + -+ if (key_len == 8) -+ key_len_valid = true; -+ else if (key_len == 16 || key_len == 24) -+ key_len_valid = version & CRYPTO_TDES_FLAG; -+ else -+ key_len_valid = false; -+ break; -+ case CIPHER_ALGO_AES: -+ version = CRYPTO_READ(rk_dev, CRYPTO_AES_VERSION); ++ return 0; + -+ if (key_len == 16) -+ key_len_valid = version & CRYPTO_AES128_FLAG; -+ else if (key_len == 24) -+ key_len_valid = version & CRYPTO_AES192_FLAG; -+ else if (key_len == 32) -+ key_len_valid = version & CRYPTO_AES256_FLAG; -+ else -+ key_len_valid = false; -+ break; -+ case CIPHER_ALGO_SM4: -+ version = CRYPTO_READ(rk_dev, CRYPTO_SM4_VERSION); ++err_cipher_algs: ++ algs_name = soc_data->valid_algs_name; + -+ key_len_valid = (key_len == SM4_KEY_SIZE) ? true : false; -+ break; -+ default: -+ return false; -+ } ++ for (k = 0; k < i; k++, algs_name++) { ++ tmp_algs = rk_crypto_find_algs(rk_dev, *algs_name); ++ if (!tmp_algs) ++ continue; + -+ mask = cipher_mode2bit_mask[mode]; ++ if (tmp_algs->type == ALG_TYPE_CIPHER) ++ crypto_unregister_skcipher(&tmp_algs->alg.crypto); ++ else if (tmp_algs->type == ALG_TYPE_HASH || tmp_algs->type == ALG_TYPE_HMAC) ++ crypto_unregister_ahash(&tmp_algs->alg.hash); ++ else if (tmp_algs->type == ALG_TYPE_ASYM) ++ crypto_unregister_akcipher(&tmp_algs->alg.asym); ++ else if (tmp_algs->type == ALG_TYPE_AEAD) ++ crypto_unregister_aead(&tmp_algs->alg.aead); ++ } + -+ if (key_len == 0) -+ key_len_valid = true; ++ rk_dev->release_crypto(rk_dev, __func__); + -+ return (version & mask) && key_len_valid; ++ return err; +} + -+static bool rk_is_hash_support(struct rk_crypto_dev *rk_dev, u32 algo, u32 type) ++static void rk_crypto_unregister(struct rk_crypto_dev *rk_dev) +{ -+ u32 version = 0; -+ u32 mask = 0; -+ -+ if (type == ALG_TYPE_HMAC) { -+ version = CRYPTO_READ(rk_dev, CRYPTO_HMAC_VERSION); -+ mask = hmac_algo2bit_mask[algo]; -+ } else if (type == ALG_TYPE_HASH) { -+ version = CRYPTO_READ(rk_dev, CRYPTO_HASH_VERSION); -+ mask = hash_algo2bit_mask[algo]; -+ } else { -+ return false; -+ } ++ unsigned int i; ++ char **algs_name; ++ struct rk_crypto_algt *tmp_algs; + -+ return version & mask; -+} ++ algs_name = rk_dev->soc_data->valid_algs_name; + -+int rk_hw_crypto_v3_init(struct device *dev, void *hw_info) -+{ -+ struct rk_hw_crypto_v3_info *info = -+ (struct rk_hw_crypto_v3_info *)hw_info; ++ rk_dev->request_crypto(rk_dev, __func__); + -+ if (!dev || !hw_info) -+ return -EINVAL; ++ for (i = 0; i < rk_dev->soc_data->valid_algs_num; i++, algs_name++) { ++ tmp_algs = rk_crypto_find_algs(rk_dev, *algs_name); ++ if (!tmp_algs) ++ continue; + -+ memset(info, 0x00, sizeof(*info)); ++ if (tmp_algs->type == ALG_TYPE_CIPHER) ++ crypto_unregister_skcipher(&tmp_algs->alg.crypto); ++ else if (tmp_algs->type == ALG_TYPE_HASH || tmp_algs->type == ALG_TYPE_HMAC) ++ crypto_unregister_ahash(&tmp_algs->alg.hash); ++ else if (tmp_algs->type == ALG_TYPE_ASYM) ++ crypto_unregister_akcipher(&tmp_algs->alg.asym); ++ } + -+ return rk_crypto_hw_desc_alloc(dev, &info->hw_desc); ++ rk_dev->release_crypto(rk_dev, __func__); +} + -+void rk_hw_crypto_v3_deinit(struct device *dev, void *hw_info) ++static void rk_crypto_request(struct rk_crypto_dev *rk_dev, const char *name) +{ -+ struct rk_hw_crypto_v3_info *info = -+ (struct rk_hw_crypto_v3_info *)hw_info; -+ -+ if (!dev || !hw_info) -+ return; ++ CRYPTO_TRACE("Crypto is requested by %s\n", name); + -+ rk_crypto_hw_desc_free(&info->hw_desc); ++ rk_crypto_enable_clk(rk_dev); +} + -+const char * const *rk_hw_crypto_v3_get_rsts(uint32_t *num) ++static void rk_crypto_release(struct rk_crypto_dev *rk_dev, const char *name) +{ -+ *num = ARRAY_SIZE(crypto_v3_rsts); ++ CRYPTO_TRACE("Crypto is released by %s\n", name); + -+ return crypto_v3_rsts; ++ rk_crypto_disable_clk(rk_dev); +} + -+struct rk_crypto_algt **rk_hw_crypto_v3_get_algts(uint32_t *num) ++static void rk_crypto_action(void *data) +{ -+ *num = ARRAY_SIZE(crypto_v3_algs); ++ struct rk_crypto_dev *rk_dev = data; + -+ return crypto_v3_algs; ++ if (rk_dev->rst) ++ reset_control_assert(rk_dev->rst); +} + -+bool rk_hw_crypto_v3_algo_valid(struct rk_crypto_dev *rk_dev, struct rk_crypto_algt *aglt) -+{ -+ if (aglt->type == ALG_TYPE_CIPHER || aglt->type == ALG_TYPE_AEAD) { -+ CRYPTO_TRACE("CIPHER"); -+ return rk_is_cipher_support(rk_dev, aglt->algo, aglt->mode, 0); -+ } else if (aglt->type == ALG_TYPE_HASH || aglt->type == ALG_TYPE_HMAC) { -+ CRYPTO_TRACE("HASH/HMAC"); -+ return rk_is_hash_support(rk_dev, aglt->algo, aglt->type); -+ } else if (aglt->type == ALG_TYPE_ASYM) { -+ CRYPTO_TRACE("RSA"); -+ return true; -+ } else { -+ return false; -+ } -+} ++static char *crypto_no_sm_algs_name[] = { ++ "ecb(aes)", "cbc(aes)", "cfb(aes)", "ofb(aes)", "ctr(aes)", "gcm(aes)", ++ "ecb(des)", "cbc(des)", "cfb(des)", "ofb(des)", ++ "ecb(des3_ede)", "cbc(des3_ede)", "cfb(des3_ede)", "ofb(des3_ede)", ++ "sha1", "sha224", "sha256", "sha384", "sha512", "md5", ++ "hmac(sha1)", "hmac(sha256)", "hmac(sha512)", "hmac(md5)", ++ "rsa" ++}; + -diff --git a/drivers/crypto/rockchip/rk_crypto_v3.h b/drivers/crypto/rockchip/rk_crypto_v3.h -new file mode 100644 -index 000000000..a4b181416 ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_v3.h -@@ -0,0 +1,96 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ ++static char *crypto_rv1126_algs_name[] = { ++ "ecb(sm4)", "cbc(sm4)", "cfb(sm4)", "ofb(sm4)", "ctr(sm4)", "gcm(sm4)", ++ "ecb(aes)", "cbc(aes)", "cfb(aes)", "ofb(aes)", "ctr(aes)", "gcm(aes)", ++ "ecb(des)", "cbc(des)", "cfb(des)", "ofb(des)", ++ "ecb(des3_ede)", "cbc(des3_ede)", "cfb(des3_ede)", "ofb(des3_ede)", ++ "sha1", "sha256", "sha512", "md5", "sm3", ++ "hmac(sha1)", "hmac(sha256)", "hmac(sha512)", "hmac(md5)", "hmac(sm3)", ++ "rsa" ++}; + -+/* Copyright (c) 2022 Rockchip Electronics Co. Ltd. */ ++static char *crypto_full_algs_name[] = { ++ "ecb(sm4)", "cbc(sm4)", "cfb(sm4)", "ofb(sm4)", "ctr(sm4)", "gcm(sm4)", ++ "ecb(aes)", "cbc(aes)", "cfb(aes)", "ofb(aes)", "ctr(aes)", "gcm(aes)", ++ "ecb(des)", "cbc(des)", "cfb(des)", "ofb(des)", ++ "ecb(des3_ede)", "cbc(des3_ede)", "cfb(des3_ede)", "ofb(des3_ede)", ++ "sha1", "sha224", "sha256", "sha384", "sha512", "md5", "sm3", ++ "hmac(sha1)", "hmac(sha256)", "hmac(sha512)", "hmac(md5)", "hmac(sm3)", ++ "rsa" ++}; + -+#ifndef __RK_CRYPTO_V3_H__ -+#define __RK_CRYPTO_V3_H__ ++static const struct rk_crypto_soc_data px30_soc_data = ++ RK_CRYPTO_V2_SOC_DATA_INIT(crypto_no_sm_algs_name, false); + -+#include ++static const struct rk_crypto_soc_data rv1126_soc_data = ++ RK_CRYPTO_V2_SOC_DATA_INIT(crypto_rv1126_algs_name, true); + -+#include "rk_crypto_utils.h" ++static const struct rk_crypto_soc_data full_soc_data = ++ RK_CRYPTO_V2_SOC_DATA_INIT(crypto_full_algs_name, false); + -+struct rk_hw_crypto_v3_info { -+ struct rk_hw_desc hw_desc; ++static const struct rk_crypto_soc_data cryto_v3_soc_data = ++ RK_CRYPTO_V3_SOC_DATA_INIT(crypto_full_algs_name); ++ ++static char *rk3288_cipher_algs[] = { ++ "ecb(aes)", "cbc(aes)", ++ "ecb(des)", "cbc(des)", ++ "ecb(des3_ede)", "cbc(des3_ede)", ++ "sha1", "sha256", "md5", +}; + -+#define RK_CRYPTO_V3_SOC_DATA_INIT(names) {\ -+ .crypto_ver = "CRYPTO V3.0.0.0 multi",\ -+ .use_soft_aes192 = false,\ -+ .valid_algs_name = (names),\ -+ .valid_algs_num = ARRAY_SIZE(names),\ -+ .hw_init = rk_hw_crypto_v3_init,\ -+ .hw_deinit = rk_hw_crypto_v3_deinit,\ -+ .hw_get_rsts = rk_hw_crypto_v3_get_rsts,\ -+ .hw_get_algts = rk_hw_crypto_v3_get_algts,\ -+ .hw_is_algo_valid = rk_hw_crypto_v3_algo_valid,\ -+ .hw_info_size = sizeof(struct rk_hw_crypto_v3_info),\ -+ .default_pka_offset = 0x0480,\ -+ .use_lli_chain = true,\ -+} ++static const struct rk_crypto_soc_data rk3288_soc_data = ++ RK_CRYPTO_V1_SOC_DATA_INIT(rk3288_cipher_algs); ++ ++static const struct of_device_id crypto_of_id_table[] = { + +#if IS_ENABLED(CONFIG_CRYPTO_DEV_ROCKCHIP_V3) ++ /* crypto v4 in belows same with crypto-v3*/ ++ { ++ .compatible = "rockchip,crypto-v4", ++ .data = (void *)&cryto_v3_soc_data, ++ }, + -+extern struct rk_crypto_algt rk_v3_ecb_sm4_alg; -+extern struct rk_crypto_algt rk_v3_cbc_sm4_alg; -+extern struct rk_crypto_algt rk_v3_xts_sm4_alg; -+extern struct rk_crypto_algt rk_v3_cfb_sm4_alg; -+extern struct rk_crypto_algt rk_v3_ofb_sm4_alg; -+extern struct rk_crypto_algt rk_v3_ctr_sm4_alg; -+extern struct rk_crypto_algt rk_v3_gcm_sm4_alg; ++ /* crypto v3 in belows */ ++ { ++ .compatible = "rockchip,crypto-v3", ++ .data = (void *)&cryto_v3_soc_data, ++ }, ++#endif + -+extern struct rk_crypto_algt rk_v3_ecb_aes_alg; -+extern struct rk_crypto_algt rk_v3_cbc_aes_alg; -+extern struct rk_crypto_algt rk_v3_xts_aes_alg; -+extern struct rk_crypto_algt rk_v3_cfb_aes_alg; -+extern struct rk_crypto_algt rk_v3_ofb_aes_alg; -+extern struct rk_crypto_algt rk_v3_ctr_aes_alg; -+extern struct rk_crypto_algt rk_v3_gcm_aes_alg; ++#if IS_ENABLED(CONFIG_CRYPTO_DEV_ROCKCHIP_V2) ++ /* crypto v2 in belows */ ++ { ++ .compatible = "rockchip,px30-crypto", ++ .data = (void *)&px30_soc_data, ++ }, ++ { ++ .compatible = "rockchip,rv1126-crypto", ++ .data = (void *)&rv1126_soc_data, ++ }, ++ { ++ .compatible = "rockchip,rk3568-crypto", ++ .data = (void *)&full_soc_data, ++ }, ++ { ++ .compatible = "rockchip,rk3588-crypto", ++ .data = (void *)&full_soc_data, ++ }, ++#endif + -+extern struct rk_crypto_algt rk_v3_ecb_des_alg; -+extern struct rk_crypto_algt rk_v3_cbc_des_alg; -+extern struct rk_crypto_algt rk_v3_cfb_des_alg; -+extern struct rk_crypto_algt rk_v3_ofb_des_alg; ++#if IS_ENABLED(CONFIG_CRYPTO_DEV_ROCKCHIP_V1) ++ /* crypto v1 in belows */ ++ { ++ .compatible = "rockchip,rk3288-crypto", ++ .data = (void *)&rk3288_soc_data, ++ }, ++#endif + -+extern struct rk_crypto_algt rk_v3_ecb_des3_ede_alg; -+extern struct rk_crypto_algt rk_v3_cbc_des3_ede_alg; -+extern struct rk_crypto_algt rk_v3_cfb_des3_ede_alg; -+extern struct rk_crypto_algt rk_v3_ofb_des3_ede_alg; ++ { /* sentinel */ } ++}; + -+extern struct rk_crypto_algt rk_v3_ahash_sha1; -+extern struct rk_crypto_algt rk_v3_ahash_sha224; -+extern struct rk_crypto_algt rk_v3_ahash_sha256; -+extern struct rk_crypto_algt rk_v3_ahash_sha384; -+extern struct rk_crypto_algt rk_v3_ahash_sha512; -+extern struct rk_crypto_algt rk_v3_ahash_md5; -+extern struct rk_crypto_algt rk_v3_ahash_sm3; ++MODULE_DEVICE_TABLE(of, crypto_of_id_table); + -+extern struct rk_crypto_algt rk_v3_hmac_md5; -+extern struct rk_crypto_algt rk_v3_hmac_sha1; -+extern struct rk_crypto_algt rk_v3_hmac_sha256; -+extern struct rk_crypto_algt rk_v3_hmac_sha512; -+extern struct rk_crypto_algt rk_v3_hmac_sm3; ++static int rk_crypto_probe(struct platform_device *pdev) ++{ ++ struct resource *res; ++ struct device *dev = &pdev->dev; ++ struct device_node *np = pdev->dev.of_node; ++ struct rk_crypto_soc_data *soc_data; ++ const struct of_device_id *match; ++ struct rk_crypto_dev *rk_dev; ++ const char * const *rsts; ++ uint32_t rst_num = 0; ++ int err = 0; + -+/* Shared v2 version implementation */ -+extern struct rk_crypto_algt rk_v2_asym_rsa; ++ rk_dev = devm_kzalloc(&pdev->dev, ++ sizeof(*rk_dev), GFP_KERNEL); ++ if (!rk_dev) { ++ err = -ENOMEM; ++ goto err_crypto; ++ } + -+int rk_hw_crypto_v3_init(struct device *dev, void *hw_info); -+void rk_hw_crypto_v3_deinit(struct device *dev, void *hw_info); -+const char * const *rk_hw_crypto_v3_get_rsts(uint32_t *num); -+struct rk_crypto_algt **rk_hw_crypto_v3_get_algts(uint32_t *num); -+bool rk_hw_crypto_v3_algo_valid(struct rk_crypto_dev *rk_dev, struct rk_crypto_algt *aglt); ++ rk_dev->name = CRYPTO_NAME; + -+#else ++ match = of_match_node(crypto_of_id_table, np); ++ soc_data = (struct rk_crypto_soc_data *)match->data; ++ rk_dev->soc_data = soc_data; + -+static inline int rk_hw_crypto_v3_init(struct device *dev, void *hw_info) { return -EINVAL; } -+static inline void rk_hw_crypto_v3_deinit(struct device *dev, void *hw_info) {} -+static inline const char * const *rk_hw_crypto_v3_get_rsts(uint32_t *num) { return NULL; } -+static inline struct rk_crypto_algt **rk_hw_crypto_v3_get_algts(uint32_t *num) { return NULL; } -+static inline bool rk_hw_crypto_v3_algo_valid(struct rk_crypto_dev *rk_dev, -+ struct rk_crypto_algt *aglt) -+{ -+ return false; -+} ++ rsts = soc_data->hw_get_rsts(&rst_num); ++ if (rsts && rsts[0]) { ++ rk_dev->rst = ++ devm_reset_control_get(dev, rsts[0]); ++ if (IS_ERR(rk_dev->rst)) { ++ err = PTR_ERR(rk_dev->rst); ++ goto err_crypto; ++ } ++ reset_control_assert(rk_dev->rst); ++ usleep_range(10, 20); ++ reset_control_deassert(rk_dev->rst); ++ } + -+#endif /* end of IS_ENABLED(CONFIG_CRYPTO_DEV_ROCKCHIP_V3) */ ++ err = devm_add_action_or_reset(dev, rk_crypto_action, rk_dev); ++ if (err) ++ goto err_crypto; + -+#endif /* end of __RK_CRYPTO_V3_H__ */ -diff --git a/drivers/crypto/rockchip/rk_crypto_v3_ahash.c b/drivers/crypto/rockchip/rk_crypto_v3_ahash.c -new file mode 100644 -index 000000000..0c91b45b2 ---- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_v3_ahash.c -@@ -0,0 +1,468 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Hash acceleration support for Rockchip Crypto v3 -+ * -+ * Copyright (c) 2022, Rockchip Electronics Co., Ltd -+ * -+ * Author: Lin Jinhan -+ * -+ */ ++ spin_lock_init(&rk_dev->lock); + -+#include -+#include ++ /* get crypto base */ ++ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ rk_dev->reg = devm_ioremap_resource(dev, res); ++ if (IS_ERR(rk_dev->reg)) { ++ err = PTR_ERR(rk_dev->reg); ++ goto err_crypto; ++ } + -+#include "rk_crypto_core.h" -+#include "rk_crypto_v3.h" -+#include "rk_crypto_v3_reg.h" -+#include "rk_crypto_ahash_utils.h" -+#include "rk_crypto_utils.h" ++ /* get pka base, if pka reg not set, pka reg = crypto + pka offset */ ++ res = platform_get_resource(pdev, IORESOURCE_MEM, 1); ++ rk_dev->pka_reg = devm_ioremap_resource(dev, res); ++ if (IS_ERR(rk_dev->pka_reg)) ++ rk_dev->pka_reg = rk_dev->reg + soc_data->default_pka_offset; + -+#define RK_HASH_CTX_MAGIC 0x1A1A1A1A -+#define RK_POLL_PERIOD_US 100 -+#define RK_POLL_TIMEOUT_US 50000 ++ rk_dev->clks_num = devm_clk_bulk_get_all(dev, &rk_dev->clk_bulks); ++ if (rk_dev->clks_num < 0) { ++ err = rk_dev->clks_num; ++ dev_err(dev, "failed to get clks property\n"); ++ goto err_crypto; ++ } + -+struct rk_ahash_expt_ctx { -+ struct rk_ahash_ctx ctx; -+ u8 lastc[RK_DMA_ALIGNMENT]; -+}; ++ rk_dev->irq = platform_get_irq(pdev, 0); ++ if (rk_dev->irq < 0) { ++ dev_warn(dev, ++ "control Interrupt is not available.\n"); ++ err = rk_dev->irq; ++ goto err_crypto; ++ } + -+struct rk_hash_mid_data { -+ u32 valid_flag; -+ u32 hash_ctl; -+ u32 data[CRYPTO_HASH_MID_WORD_SIZE]; -+}; ++ err = devm_request_irq(dev, rk_dev->irq, ++ rk_crypto_irq_handle, IRQF_SHARED, ++ "rk-crypto", pdev); ++ if (err) { ++ dev_err(dev, "irq request failed.\n"); ++ goto err_crypto; ++ } + -+static const u32 hash_algo2bc[] = { -+ [HASH_ALGO_MD5] = CRYPTO_MD5, -+ [HASH_ALGO_SHA1] = CRYPTO_SHA1, -+ [HASH_ALGO_SHA224] = CRYPTO_SHA224, -+ [HASH_ALGO_SHA256] = CRYPTO_SHA256, -+ [HASH_ALGO_SHA384] = CRYPTO_SHA384, -+ [HASH_ALGO_SHA512] = CRYPTO_SHA512, -+ [HASH_ALGO_SM3] = CRYPTO_SM3, -+}; ++ disable_irq(rk_dev->irq); + -+static void rk_hash_reset(struct rk_crypto_dev *rk_dev) -+{ -+ int ret; -+ u32 tmp = 0, tmp_mask = 0; -+ unsigned int pool_timeout_us = 1000; ++ err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); ++ if (err) { ++ dev_err(dev, "crypto: No suitable DMA available.\n"); ++ goto err_crypto; ++ } + -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0x00); ++ rk_dev->dev = dev; + -+ tmp = CRYPTO_SW_CC_RESET; -+ tmp_mask = tmp << CRYPTO_WRITE_MASK_SHIFT; ++ rk_dev->hw_info = ++ devm_kzalloc(dev, soc_data->hw_info_size, GFP_KERNEL); ++ if (!rk_dev->hw_info) { ++ err = -ENOMEM; ++ goto err_crypto; ++ } + -+ CRYPTO_WRITE(rk_dev, CRYPTO_RST_CTL, tmp | tmp_mask); ++ err = soc_data->hw_init(dev, rk_dev->hw_info); ++ if (err) { ++ dev_err(dev, "hw_init failed.\n"); ++ goto err_crypto; ++ } + -+ /* This is usually done in 20 clock cycles */ -+ ret = read_poll_timeout_atomic(CRYPTO_READ, tmp, !tmp, 0, pool_timeout_us, -+ false, rk_dev, CRYPTO_RST_CTL); -+ if (ret) -+ dev_err(rk_dev->dev, "cipher reset pool timeout %ums.", -+ pool_timeout_us); ++ rk_dev->addr_vir = (void *)__get_free_pages(GFP_KERNEL | GFP_DMA32, ++ RK_BUFFER_ORDER); ++ if (!rk_dev->addr_vir) { ++ err = -ENOMEM; ++ dev_err(dev, "__get_free_page failed.\n"); ++ goto err_crypto; ++ } + -+ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, 0xffff0000); ++ rk_dev->vir_max = RK_BUFFER_SIZE; + -+ /* clear dma int status */ -+ tmp = CRYPTO_READ(rk_dev, CRYPTO_DMA_INT_ST); -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_ST, tmp); -+} ++ rk_dev->addr_aad = (void *)__get_free_page(GFP_KERNEL); ++ if (!rk_dev->addr_aad) { ++ err = -ENOMEM; ++ dev_err(dev, "__get_free_page failed.\n"); ++ goto err_crypto; ++ } + -+static int rk_hash_mid_data_store(struct rk_crypto_dev *rk_dev, struct rk_hash_mid_data *mid_data) -+{ -+ int ret; -+ uint32_t reg_ctrl; ++ rk_dev->aad_max = RK_BUFFER_SIZE; + -+ CRYPTO_TRACE(); ++ platform_set_drvdata(pdev, rk_dev); + -+ ret = read_poll_timeout_atomic(CRYPTO_READ, -+ reg_ctrl, -+ reg_ctrl & CRYPTO_HASH_MID_IS_VALID, -+ 0, -+ RK_POLL_TIMEOUT_US, -+ false, rk_dev, CRYPTO_MID_VALID); ++ tasklet_init(&rk_dev->queue_task, ++ rk_crypto_queue_task_cb, (unsigned long)rk_dev); ++ tasklet_init(&rk_dev->done_task, ++ rk_crypto_done_task_cb, (unsigned long)rk_dev); ++ crypto_init_queue(&rk_dev->queue, 50); + -+ CRYPTO_WRITE(rk_dev, CRYPTO_MID_VALID_SWITCH, -+ CRYPTO_MID_VALID_ENABLE << CRYPTO_WRITE_MASK_SHIFT); -+ if (ret) { -+ CRYPTO_TRACE("CRYPTO_MID_VALID timeout."); -+ goto exit; -+ } ++ timer_setup(&rk_dev->timer, rk_crypto_irq_timer_handle, 0); + -+ CRYPTO_WRITE(rk_dev, CRYPTO_MID_VALID, -+ CRYPTO_HASH_MID_IS_VALID | -+ CRYPTO_HASH_MID_IS_VALID << CRYPTO_WRITE_MASK_SHIFT); ++ rk_dev->request_crypto = rk_crypto_request; ++ rk_dev->release_crypto = rk_crypto_release; ++ rk_dev->load_data = rk_load_data; ++ rk_dev->unload_data = rk_unload_data; ++ rk_dev->enqueue = rk_crypto_enqueue; ++ rk_dev->busy = false; + -+ rk_crypto_read_regs(rk_dev, CRYPTO_HASH_MID_DATA_0, -+ (u8 *)mid_data->data, sizeof(mid_data->data)); ++ err = rk_crypto_register(rk_dev); ++ if (err) { ++ dev_err(dev, "err in register alg"); ++ goto err_register_alg; ++ } + -+ mid_data->hash_ctl = CRYPTO_READ(rk_dev, CRYPTO_HASH_CTL); -+ mid_data->valid_flag = 1; ++ rk_cryptodev_register_dev(rk_dev->dev, soc_data->crypto_ver); + -+ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, 0 | CRYPTO_WRITE_MASK_ALL); ++ rkcrypto_proc_init(rk_dev); + -+exit: -+ return ret; ++ dev_info(dev, "%s Accelerator successfully registered\n", soc_data->crypto_ver); ++ return 0; ++ ++err_register_alg: ++ tasklet_kill(&rk_dev->queue_task); ++ tasklet_kill(&rk_dev->done_task); ++err_crypto: ++ return err; +} + -+static int rk_hash_mid_data_restore(struct rk_crypto_dev *rk_dev, struct rk_hash_mid_data *mid_data) ++static int rk_crypto_remove(struct platform_device *pdev) +{ -+ CRYPTO_TRACE(); ++ struct rk_crypto_dev *rk_dev = platform_get_drvdata(pdev); + -+ CRYPTO_WRITE(rk_dev, CRYPTO_MID_VALID_SWITCH, -+ CRYPTO_MID_VALID_ENABLE | CRYPTO_MID_VALID_ENABLE << CRYPTO_WRITE_MASK_SHIFT); ++ rkcrypto_proc_cleanup(rk_dev); + -+ CRYPTO_WRITE(rk_dev, CRYPTO_MID_VALID, -+ CRYPTO_HASH_MID_IS_VALID | -+ CRYPTO_HASH_MID_IS_VALID << CRYPTO_WRITE_MASK_SHIFT); ++ rk_cryptodev_unregister_dev(rk_dev->dev); + -+ if (!mid_data->valid_flag) { -+ CRYPTO_TRACE("clear mid data"); -+ rk_crypto_clear_regs(rk_dev, CRYPTO_HASH_MID_DATA_0, ARRAY_SIZE(mid_data->data)); -+ return 0; -+ } ++ del_timer_sync(&rk_dev->timer); + -+ rk_crypto_write_regs(rk_dev, CRYPTO_HASH_MID_DATA_0, -+ (u8 *)mid_data->data, sizeof(mid_data->data)); ++ rk_crypto_unregister(rk_dev); ++ tasklet_kill(&rk_dev->done_task); ++ tasklet_kill(&rk_dev->queue_task); + -+ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, mid_data->hash_ctl | CRYPTO_WRITE_MASK_ALL); ++ if (rk_dev->addr_vir) ++ free_pages((unsigned long)rk_dev->addr_vir, RK_BUFFER_ORDER); ++ ++ if (rk_dev->addr_aad) ++ free_page((unsigned long)rk_dev->addr_aad); ++ ++ rk_dev->soc_data->hw_deinit(&pdev->dev, rk_dev->hw_info); + + return 0; +} + -+static int rk_crypto_irq_handle(int irq, void *dev_id) -+{ -+ struct rk_crypto_dev *rk_dev = platform_get_drvdata(dev_id); -+ u32 interrupt_status; -+ struct rk_hw_crypto_v3_info *hw_info = -+ (struct rk_hw_crypto_v3_info *)rk_dev->hw_info; -+ struct rk_alg_ctx *alg_ctx = rk_ahash_alg_ctx(rk_dev); ++static struct platform_driver crypto_driver = { ++ .probe = rk_crypto_probe, ++ .remove = rk_crypto_remove, ++ .driver = { ++ .name = "rk-crypto", ++ .of_match_table = crypto_of_id_table, ++ }, ++}; + -+ /* disable crypto irq */ -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0); ++module_platform_driver(crypto_driver); + -+ interrupt_status = CRYPTO_READ(rk_dev, CRYPTO_DMA_INT_ST); -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_ST, interrupt_status); ++MODULE_AUTHOR("Lin Jinhan "); ++MODULE_DESCRIPTION("Support for Rockchip's cryptographic engine"); ++MODULE_LICENSE("GPL"); +diff --git a/drivers/crypto/rockchip/rk_crypto_core.h b/drivers/crypto/rockchip/rk_crypto_core.h +new file mode 100644 +index 000000000..b2b059e91 +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_core.h +@@ -0,0 +1,464 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ + -+ interrupt_status &= CRYPTO_LOCKSTEP_MASK; ++/* Copyright (c) 2018 Rockchip Electronics Co. Ltd. */ + -+ if (interrupt_status != CRYPTO_SRC_ITEM_DONE_INT_ST) { -+ dev_err(rk_dev->dev, "DMA desc = %p\n", hw_info->hw_desc.lli_head); -+ dev_err(rk_dev->dev, "DMA addr_in = %08x\n", -+ (u32)alg_ctx->addr_in); -+ dev_err(rk_dev->dev, "DMA addr_out = %08x\n", -+ (u32)alg_ctx->addr_out); -+ dev_err(rk_dev->dev, "DMA count = %08x\n", alg_ctx->count); -+ dev_err(rk_dev->dev, "DMA desc_dma = %08x\n", -+ (u32)hw_info->hw_desc.lli_head_dma); -+ dev_err(rk_dev->dev, "DMA Error status = %08x\n", -+ interrupt_status); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_LLI_ADDR status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_LLI_ADDR)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_ST status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_ST)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_STATE status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_STATE)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_LLI_RADDR status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_LLI_RADDR)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_SRC_RADDR status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_SRC_RADDR)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_DST_RADDR status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_DST_RADDR)); -+ rk_dev->err = -EFAULT; -+ } ++#ifndef __RK_CRYPTO_CORE_H__ ++#define __RK_CRYPTO_CORE_H__ + -+ return 0; ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "rk_crypto_bignum.h" ++ ++/* ++ * Change to the lowest priority, and hardware encryption is ++ * invoked explicitly only at the User layer. ++ */ ++#define RK_CRYPTO_PRIORITY 0 ++ ++/* Increase the addr_vir buffer size from 1 to 8 pages */ ++#define RK_BUFFER_ORDER 3 ++#define RK_BUFFER_SIZE (PAGE_SIZE << RK_BUFFER_ORDER) ++ ++#define RK_DMA_ALIGNMENT 128 ++#define sha384_state sha512_state ++#define sha224_state sha256_state ++ ++#define RK_FLAG_FINAL BIT(0) ++#define RK_FLAG_UPDATE BIT(1) ++ ++struct rk_crypto_stat { ++ unsigned long long busy_cnt; ++ unsigned long long equeue_cnt; ++ unsigned long long dequeue_cnt; ++ unsigned long long complete_cnt; ++ unsigned long long done_cnt; ++ unsigned long long fake_cnt; ++ unsigned long long irq_cnt; ++ unsigned long long timeout_cnt; ++ unsigned long long error_cnt; ++ unsigned long long ever_queue_max; ++ int last_error; ++}; ++ ++struct rk_crypto_dev { ++ struct device *dev; ++ struct reset_control *rst; ++ void __iomem *reg; ++ void __iomem *pka_reg; ++ int irq; ++ struct crypto_queue queue; ++ struct tasklet_struct queue_task; ++ struct tasklet_struct done_task; ++ int err; ++ void *hw_info; ++ struct rk_crypto_soc_data *soc_data; ++ int clks_num; ++ struct clk_bulk_data *clk_bulks; ++ const char *name; ++ struct proc_dir_entry *procfs; ++ struct rk_crypto_stat stat; ++ ++ /* device lock */ ++ spinlock_t lock; ++ ++ /* the public variable */ ++ struct crypto_async_request *async_req; ++ void *addr_vir; ++ u32 vir_max; ++ void *addr_aad; ++ int aad_max; ++ struct scatterlist src[2]; ++ struct scatterlist dst[2]; ++ ++ struct timer_list timer; ++ bool busy; ++ void (*request_crypto)(struct rk_crypto_dev *rk_dev, const char *name); ++ void (*release_crypto)(struct rk_crypto_dev *rk_dev, const char *name); ++ int (*load_data)(struct rk_crypto_dev *rk_dev, ++ struct scatterlist *sg_src, ++ struct scatterlist *sg_dst); ++ int (*unload_data)(struct rk_crypto_dev *rk_dev); ++ int (*enqueue)(struct rk_crypto_dev *rk_dev, ++ struct crypto_async_request *async_req); ++}; ++ ++struct rk_crypto_soc_data { ++ const char *crypto_ver; ++ char **valid_algs_name; ++ int valid_algs_num; ++ unsigned int hw_info_size; ++ bool use_soft_aes192; ++ int default_pka_offset; ++ bool use_lli_chain; ++ ++ int (*hw_init)(struct device *dev, void *hw_info); ++ void (*hw_deinit)(struct device *dev, void *hw_info); ++ const char * const *(*hw_get_rsts)(uint32_t *num); ++ struct rk_crypto_algt **(*hw_get_algts)(uint32_t *num); ++ bool (*hw_is_algo_valid)(struct rk_crypto_dev *rk_dev, ++ struct rk_crypto_algt *aglt); ++}; ++ ++struct rk_alg_ops { ++ int (*start)(struct rk_crypto_dev *rk_dev); ++ int (*update)(struct rk_crypto_dev *rk_dev); ++ void (*complete)(struct crypto_async_request *base, int err); ++ int (*irq_handle)(int irq, void *dev_id); ++ ++ int (*hw_write_key)(struct rk_crypto_dev *rk_dev, const u8 *key, u32 key_len); ++ void (*hw_write_iv)(struct rk_crypto_dev *rk_dev, const u8 *iv, u32 iv_len); ++ int (*hw_init)(struct rk_crypto_dev *rk_dev, u32 algo, u32 type); ++ int (*hw_dma_start)(struct rk_crypto_dev *rk_dev, uint32_t flag); ++ int (*hw_get_result)(struct rk_crypto_dev *rk_dev, uint8_t *data, uint32_t data_len); ++}; ++ ++struct rk_alg_ctx { ++ struct rk_alg_ops ops; ++ struct scatterlist *sg_src; ++ struct scatterlist *sg_dst; ++ struct scatterlist sg_tmp; ++ struct scatterlist sg_aad; ++ struct scatterlist *req_src; ++ struct scatterlist *req_dst; ++ size_t src_nents; ++ size_t dst_nents; ++ size_t map_nents; ++ ++ int is_aead; ++ unsigned int total; ++ unsigned int assoclen; ++ unsigned int count; ++ unsigned int left_bytes; ++ ++ dma_addr_t addr_in; ++ dma_addr_t addr_out; ++ dma_addr_t addr_aad_in; ++ ++ bool aligned; ++ bool is_dma; ++ int align_size; ++ int chunk_size; ++}; ++ ++/* the private variable of hash */ ++struct rk_ahash_ctx { ++ struct rk_alg_ctx algs_ctx; ++ struct rk_crypto_dev *rk_dev; ++ u8 authkey[SHA512_BLOCK_SIZE]; ++ u32 authkey_len; ++ struct scatterlist hash_sg[2]; ++ u8 *hash_tmp; ++ u32 hash_tmp_len; ++ bool hash_tmp_mapped; ++ u32 calc_cnt; ++ ++ u8 lastc[RK_DMA_ALIGNMENT]; ++ u32 lastc_len; ++ ++ void *priv; ++ ++ /* for fallback */ ++ struct crypto_ahash *fallback_tfm; ++}; ++ ++/* the privete variable of hash for fallback */ ++struct rk_ahash_rctx { ++ struct ahash_request fallback_req; ++ u32 mode; ++ u32 flag; ++}; ++ ++/* the private variable of cipher */ ++struct rk_cipher_ctx { ++ struct rk_alg_ctx algs_ctx; ++ struct rk_crypto_dev *rk_dev; ++ unsigned char key[AES_MAX_KEY_SIZE * 2]; ++ unsigned int keylen; ++ u32 mode; ++ u8 iv[AES_BLOCK_SIZE]; ++ u32 iv_len; ++ u8 lastc[AES_BLOCK_SIZE]; ++ bool is_enc; ++ void *priv; ++ ++ /* for fallback */ ++ bool fallback_key_inited; ++ struct crypto_skcipher *fallback_tfm; ++ struct skcipher_request fallback_req; // keep at the end ++ struct crypto_aead *fallback_aead; ++}; ++ ++struct rk_rsa_ctx { ++ struct rk_alg_ctx algs_ctx; ++ struct rk_bignum *n; ++ struct rk_bignum *e; ++ struct rk_bignum *d; ++ ++ struct rk_crypto_dev *rk_dev; ++}; ++ ++enum alg_type { ++ ALG_TYPE_HASH, ++ ALG_TYPE_HMAC, ++ ALG_TYPE_CIPHER, ++ ALG_TYPE_ASYM, ++ ALG_TYPE_AEAD, ++ ALG_TYPE_MAX, ++}; ++ ++struct rk_crypto_algt { ++ struct rk_crypto_dev *rk_dev; ++ union { ++ struct skcipher_alg crypto; ++ struct ahash_alg hash; ++ struct akcipher_alg asym; ++ struct aead_alg aead; ++ } alg; ++ enum alg_type type; ++ u32 algo; ++ u32 mode; ++ char *name; ++ bool use_soft_aes192; ++ bool valid_flag; ++}; ++ ++enum rk_hash_algo { ++ HASH_ALGO_MD5, ++ HASH_ALGO_SHA1, ++ HASH_ALGO_SHA224, ++ HASH_ALGO_SHA256, ++ HASH_ALGO_SHA384, ++ HASH_ALGO_SHA512, ++ HASH_ALGO_SM3, ++ HASH_ALGO_SHA512_224, ++ HASH_ALGO_SHA512_256, ++}; ++ ++enum rk_cipher_algo { ++ CIPHER_ALGO_DES, ++ CIPHER_ALGO_DES3_EDE, ++ CIPHER_ALGO_AES, ++ CIPHER_ALGO_SM4, ++}; ++ ++enum rk_cipher_mode { ++ CIPHER_MODE_ECB, ++ CIPHER_MODE_CBC, ++ CIPHER_MODE_CFB, ++ CIPHER_MODE_OFB, ++ CIPHER_MODE_CTR, ++ CIPHER_MODE_XTS, ++ CIPHER_MODE_CTS, ++ CIPHER_MODE_CCM, ++ CIPHER_MODE_GCM, ++ CIPHER_MODE_CMAC, ++ CIPHER_MODE_CBCMAC, ++}; ++ ++#define DES_MIN_KEY_SIZE DES_KEY_SIZE ++#define DES_MAX_KEY_SIZE DES_KEY_SIZE ++#define DES3_EDE_MIN_KEY_SIZE DES3_EDE_KEY_SIZE ++#define DES3_EDE_MAX_KEY_SIZE DES3_EDE_KEY_SIZE ++#define SM4_MIN_KEY_SIZE SM4_KEY_SIZE ++#define SM4_MAX_KEY_SIZE SM4_KEY_SIZE ++ ++#define MD5_BLOCK_SIZE SHA1_BLOCK_SIZE ++ ++#define RK_AEAD_ALGO_INIT(cipher_algo, cipher_mode, algo_name, driver_name) {\ ++ .name = #algo_name,\ ++ .type = ALG_TYPE_AEAD,\ ++ .algo = CIPHER_ALGO_##cipher_algo,\ ++ .mode = CIPHER_MODE_##cipher_mode,\ ++ .alg.aead = {\ ++ .base.cra_name = #algo_name,\ ++ .base.cra_driver_name = #driver_name,\ ++ .base.cra_priority = RK_CRYPTO_PRIORITY,\ ++ .base.cra_flags = CRYPTO_ALG_TYPE_AEAD |\ ++ CRYPTO_ALG_KERN_DRIVER_ONLY |\ ++ CRYPTO_ALG_ASYNC |\ ++ CRYPTO_ALG_NEED_FALLBACK,\ ++ .base.cra_blocksize = 1,\ ++ .base.cra_ctxsize = sizeof(struct rk_cipher_ctx),\ ++ .base.cra_alignmask = 0x07,\ ++ .base.cra_module = THIS_MODULE,\ ++ .init = rk_aead_init_tfm,\ ++ .exit = rk_aead_exit_tfm,\ ++ .ivsize = GCM_AES_IV_SIZE,\ ++ .chunksize = cipher_algo##_BLOCK_SIZE,\ ++ .maxauthsize = AES_BLOCK_SIZE,\ ++ .setkey = rk_aead_setkey,\ ++ .setauthsize = rk_aead_gcm_setauthsize,\ ++ .encrypt = rk_aead_encrypt,\ ++ .decrypt = rk_aead_decrypt,\ ++ } \ +} + -+static void rk_ahash_crypto_complete(struct crypto_async_request *base, int err) -+{ -+ struct ahash_request *req = ahash_request_cast(base); -+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); -+ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); -+ struct rk_alg_ctx *alg_ctx = rk_ahash_alg_ctx(ctx->rk_dev); ++#define RK_CIPHER_ALGO_INIT(cipher_algo, cipher_mode, algo_name, driver_name) {\ ++ .name = #algo_name,\ ++ .type = ALG_TYPE_CIPHER,\ ++ .algo = CIPHER_ALGO_##cipher_algo,\ ++ .mode = CIPHER_MODE_##cipher_mode,\ ++ .alg.crypto = {\ ++ .base.cra_name = #algo_name,\ ++ .base.cra_driver_name = #driver_name,\ ++ .base.cra_priority = RK_CRYPTO_PRIORITY,\ ++ .base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY |\ ++ CRYPTO_ALG_ASYNC |\ ++ CRYPTO_ALG_NEED_FALLBACK,\ ++ .base.cra_blocksize = cipher_algo##_BLOCK_SIZE,\ ++ .base.cra_ctxsize = sizeof(struct rk_cipher_ctx),\ ++ .base.cra_alignmask = 0x07,\ ++ .base.cra_module = THIS_MODULE,\ ++ .init = rk_ablk_init_tfm,\ ++ .exit = rk_ablk_exit_tfm,\ ++ .min_keysize = cipher_algo##_MIN_KEY_SIZE,\ ++ .max_keysize = cipher_algo##_MAX_KEY_SIZE,\ ++ .ivsize = cipher_algo##_BLOCK_SIZE,\ ++ .chunksize = cipher_algo##_BLOCK_SIZE,\ ++ .setkey = rk_cipher_setkey,\ ++ .encrypt = rk_cipher_encrypt,\ ++ .decrypt = rk_cipher_decrypt,\ ++ } \ ++} + -+ struct rk_hw_crypto_v3_info *hw_info = ctx->rk_dev->hw_info; -+ struct crypto_lli_desc *lli_desc = hw_info->hw_desc.lli_head; ++#define RK_CIPHER_ALGO_XTS_INIT(cipher_algo, algo_name, driver_name) {\ ++ .name = #algo_name,\ ++ .type = ALG_TYPE_CIPHER,\ ++ .algo = CIPHER_ALGO_##cipher_algo,\ ++ .mode = CIPHER_MODE_XTS,\ ++ .alg.crypto = {\ ++ .base.cra_name = #algo_name,\ ++ .base.cra_driver_name = #driver_name,\ ++ .base.cra_priority = RK_CRYPTO_PRIORITY,\ ++ .base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY |\ ++ CRYPTO_ALG_ASYNC |\ ++ CRYPTO_ALG_NEED_FALLBACK,\ ++ .base.cra_blocksize = cipher_algo##_BLOCK_SIZE,\ ++ .base.cra_ctxsize = sizeof(struct rk_cipher_ctx),\ ++ .base.cra_alignmask = 0x07,\ ++ .base.cra_module = THIS_MODULE,\ ++ .init = rk_ablk_init_tfm,\ ++ .exit = rk_ablk_exit_tfm,\ ++ .min_keysize = cipher_algo##_MAX_KEY_SIZE,\ ++ .max_keysize = cipher_algo##_MAX_KEY_SIZE * 2,\ ++ .ivsize = cipher_algo##_BLOCK_SIZE,\ ++ .chunksize = cipher_algo##_BLOCK_SIZE,\ ++ .setkey = rk_cipher_setkey,\ ++ .encrypt = rk_cipher_encrypt,\ ++ .decrypt = rk_cipher_decrypt,\ ++ } \ ++} + -+ if (err) { -+ rk_hash_reset(ctx->rk_dev); -+ pr_err("aligned = %u, align_size = %u\n", -+ alg_ctx->aligned, alg_ctx->align_size); -+ pr_err("total = %u, left = %u, count = %u\n", -+ alg_ctx->total, alg_ctx->left_bytes, alg_ctx->count); -+ pr_err("lli->src = %08x\n", lli_desc->src_addr); -+ pr_err("lli->src_len = %08x\n", lli_desc->src_len); -+ pr_err("lli->dst = %08x\n", lli_desc->dst_addr); -+ pr_err("lli->dst_len = %08x\n", lli_desc->dst_len); -+ pr_err("lli->dma_ctl = %08x\n", lli_desc->dma_ctrl); -+ pr_err("lli->usr_def = %08x\n", lli_desc->user_define); -+ pr_err("lli->next = %08x\n\n\n", lli_desc->next_addr); -+ } ++#define RK_HASH_ALGO_INIT(hash_algo, algo_name) {\ ++ .name = #algo_name,\ ++ .type = ALG_TYPE_HASH,\ ++ .algo = HASH_ALGO_##hash_algo,\ ++ .alg.hash = {\ ++ .init = rk_ahash_init,\ ++ .update = rk_ahash_update,\ ++ .final = rk_ahash_final,\ ++ .finup = rk_ahash_finup,\ ++ .export = rk_ahash_export,\ ++ .import = rk_ahash_import,\ ++ .digest = rk_ahash_digest,\ ++ .halg = {\ ++ .digestsize = hash_algo##_DIGEST_SIZE,\ ++ .statesize = sizeof(struct algo_name##_state),\ ++ .base = {\ ++ .cra_name = #algo_name,\ ++ .cra_driver_name = #algo_name"-rk",\ ++ .cra_priority = RK_CRYPTO_PRIORITY,\ ++ .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY |\ ++ CRYPTO_ALG_ASYNC |\ ++ CRYPTO_ALG_NEED_FALLBACK,\ ++ .cra_blocksize = hash_algo##_BLOCK_SIZE,\ ++ .cra_ctxsize = sizeof(struct rk_ahash_ctx),\ ++ .cra_alignmask = 0,\ ++ .cra_init = rk_cra_hash_init,\ ++ .cra_exit = rk_cra_hash_exit,\ ++ .cra_module = THIS_MODULE,\ ++ } \ ++ } \ ++ } \ ++} + -+ if (alg_ctx->total) -+ rk_hash_mid_data_store(ctx->rk_dev, (struct rk_hash_mid_data *)ctx->priv); ++#define RK_HMAC_ALGO_INIT(hash_algo, algo_name) {\ ++ .name = "hmac(" #algo_name ")",\ ++ .type = ALG_TYPE_HMAC,\ ++ .algo = HASH_ALGO_##hash_algo,\ ++ .alg.hash = {\ ++ .init = rk_ahash_init,\ ++ .update = rk_ahash_update,\ ++ .final = rk_ahash_final,\ ++ .finup = rk_ahash_finup,\ ++ .export = rk_ahash_export,\ ++ .import = rk_ahash_import,\ ++ .digest = rk_ahash_digest,\ ++ .setkey = rk_ahash_hmac_setkey,\ ++ .halg = {\ ++ .digestsize = hash_algo##_DIGEST_SIZE,\ ++ .statesize = sizeof(struct algo_name##_state),\ ++ .base = {\ ++ .cra_name = "hmac(" #algo_name ")",\ ++ .cra_driver_name = "hmac-" #algo_name "-rk",\ ++ .cra_priority = RK_CRYPTO_PRIORITY,\ ++ .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY |\ ++ CRYPTO_ALG_ASYNC |\ ++ CRYPTO_ALG_NEED_FALLBACK,\ ++ .cra_blocksize = hash_algo##_BLOCK_SIZE,\ ++ .cra_ctxsize = sizeof(struct rk_ahash_ctx),\ ++ .cra_alignmask = 0,\ ++ .cra_init = rk_cra_hash_init,\ ++ .cra_exit = rk_cra_hash_exit,\ ++ .cra_module = THIS_MODULE,\ ++ } \ ++ } \ ++ } \ ++} + -+ if (base->complete) -+ base->complete(base, err); ++#define IS_TYPE_HMAC(type) ((type) == ALG_TYPE_HMAC) ++ ++#define CRYPTO_READ(dev, offset) \ ++ readl_relaxed(((dev)->reg + (offset))) ++#define CRYPTO_WRITE(dev, offset, val) \ ++ writel_relaxed((val), ((dev)->reg + (offset))) ++ ++#ifdef DEBUG ++#define CRYPTO_TRACE(format, ...) pr_err("[%s, %05d]-trace: " format "\n", \ ++ __func__, __LINE__, ##__VA_ARGS__) ++#define CRYPTO_MSG(format, ...) pr_err("[%s, %05d]-msg:" format "\n", \ ++ __func__, __LINE__, ##__VA_ARGS__) ++#define CRYPTO_DUMPHEX(var_name, data, len) print_hex_dump(KERN_CONT, (var_name), \ ++ DUMP_PREFIX_OFFSET, \ ++ 16, 1, (data), (len), false) ++#else ++#define CRYPTO_TRACE(format, ...) ++#define CRYPTO_MSG(format, ...) ++#define CRYPTO_DUMPHEX(var_name, data, len) ++#endif ++ ++#endif ++ +diff --git a/drivers/crypto/rockchip/rk_crypto_skcipher_utils.c b/drivers/crypto/rockchip/rk_crypto_skcipher_utils.c +new file mode 100644 +index 000000000..7d8d0aafa +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_skcipher_utils.c +@@ -0,0 +1,478 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Rockchip crypto skcipher uitls ++ * ++ * Copyright (c) 2022, Rockchip Electronics Co., Ltd ++ * ++ * Author: Lin Jinhan ++ * ++ */ ++ ++#include "rk_crypto_skcipher_utils.h" ++ ++struct rk_crypto_algt *rk_cipher_get_algt(struct crypto_skcipher *tfm) ++{ ++ struct skcipher_alg *alg = crypto_skcipher_alg(tfm); ++ ++ return container_of(alg, struct rk_crypto_algt, alg.crypto); +} + -+static inline void clear_hash_out_reg(struct rk_crypto_dev *rk_dev) ++struct rk_crypto_algt *rk_aead_get_algt(struct crypto_aead *tfm) +{ -+ rk_crypto_clear_regs(rk_dev, CRYPTO_HASH_DOUT_0, 16); ++ struct aead_alg *alg = crypto_aead_alg(tfm); ++ ++ return container_of(alg, struct rk_crypto_algt, alg.aead); +} + -+static int write_key_reg(struct rk_crypto_dev *rk_dev, const u8 *key, -+ u32 key_len) ++struct rk_cipher_ctx *rk_cipher_ctx_cast(struct rk_crypto_dev *rk_dev) +{ -+ rk_crypto_write_regs(rk_dev, CRYPTO_CH0_KEY_0, key, key_len); ++ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(rk_dev->async_req->tfm); + -+ return 0; ++ return ctx; +} + -+static int rk_hw_hash_init(struct rk_crypto_dev *rk_dev, u32 algo, u32 type) ++struct rk_alg_ctx *rk_cipher_alg_ctx(struct rk_crypto_dev *rk_dev) +{ -+ u32 reg_ctrl = 0; -+ struct rk_ahash_ctx *ctx = rk_ahash_ctx_cast(rk_dev); -+ struct rk_hash_mid_data *mid_data = (struct rk_hash_mid_data *)ctx->priv; ++ return &(rk_cipher_ctx_cast(rk_dev)->algs_ctx); ++} + -+ if (algo >= ARRAY_SIZE(hash_algo2bc)) -+ goto exit; ++static bool is_no_multi_blocksize(uint32_t mode) ++{ ++ return (mode == CIPHER_MODE_CFB || ++ mode == CIPHER_MODE_OFB || ++ mode == CIPHER_MODE_CTR || ++ mode == CIPHER_MODE_XTS || ++ mode == CIPHER_MODE_GCM) ? true : false; ++} + -+ rk_hash_reset(rk_dev); ++int rk_cipher_fallback(struct skcipher_request *req, struct rk_cipher_ctx *ctx, bool encrypt) ++{ ++ int ret; + -+ clear_hash_out_reg(rk_dev); ++ CRYPTO_MSG("use fallback tfm"); + -+ reg_ctrl = hash_algo2bc[algo] | CRYPTO_HW_PAD_ENABLE; ++ if (!ctx->fallback_tfm) { ++ ret = -ENODEV; ++ CRYPTO_MSG("fallback_tfm is empty!\n"); ++ goto exit; ++ } + -+ if (IS_TYPE_HMAC(type)) { -+ CRYPTO_TRACE("this is hmac"); -+ reg_ctrl |= CRYPTO_HMAC_ENABLE; ++ if (!ctx->fallback_key_inited) { ++ ret = crypto_skcipher_setkey(ctx->fallback_tfm, ++ ctx->key, ctx->keylen); ++ if (ret) { ++ CRYPTO_MSG("fallback crypto_skcipher_setkey err = %d\n", ++ ret); ++ goto exit; ++ } ++ ++ ctx->fallback_key_inited = true; + } + -+ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, reg_ctrl | CRYPTO_WRITE_MASK_ALL); -+ CRYPTO_WRITE(rk_dev, CRYPTO_FIFO_CTL, 0x00030003); ++ skcipher_request_set_tfm(&ctx->fallback_req, ctx->fallback_tfm); ++ skcipher_request_set_callback(&ctx->fallback_req, ++ req->base.flags, ++ req->base.complete, ++ req->base.data); + -+ memset(mid_data, 0x00, sizeof(*mid_data)); ++ skcipher_request_set_crypt(&ctx->fallback_req, req->src, ++ req->dst, req->cryptlen, req->iv); ++ ++ ret = encrypt ? crypto_skcipher_encrypt(&ctx->fallback_req) : ++ crypto_skcipher_decrypt(&ctx->fallback_req); + -+ return 0; +exit: -+ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, 0 | CRYPTO_WRITE_MASK_ALL); ++ return ret; ++} + -+ return -EINVAL; ++/* increment counter (128-bit int) by 1 */ ++static void rk_ctr128_inc(uint8_t *counter) ++{ ++ u32 n = 16; ++ u8 c; ++ ++ do { ++ --n; ++ c = counter[n]; ++ ++c; ++ counter[n] = c; ++ if (c) ++ return; ++ } while (n); +} + -+static void clean_hash_setting(struct rk_crypto_dev *rk_dev) ++static void rk_ctr128_calc(uint8_t *counter, uint32_t data_len) +{ -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0); -+ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, 0 | CRYPTO_WRITE_MASK_ALL); ++ u32 i; ++ u32 chunksize = AES_BLOCK_SIZE; ++ ++ for (i = 0; i < DIV_ROUND_UP(data_len, chunksize); i++) ++ rk_ctr128_inc(counter); +} + -+static int rk_ahash_import(struct ahash_request *req, const void *in) ++static uint32_t rk_get_new_iv(struct rk_cipher_ctx *ctx, u32 mode, bool is_enc, uint8_t *iv) +{ -+ struct rk_ahash_expt_ctx state; ++ struct scatterlist *sg_dst; ++ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; ++ uint32_t ivsize = alg_ctx->chunk_size; + -+ /* 'in' may not be aligned so memcpy to local variable */ -+ memcpy(&state, in, sizeof(state)); ++ if (!iv) ++ return 0; + -+ ///TODO: deal with import ++ sg_dst = alg_ctx->aligned ? alg_ctx->sg_dst : &alg_ctx->sg_tmp; + -+ return 0; ++ CRYPTO_TRACE("aligned = %u, count = %u, ivsize = %u, is_enc = %d\n", ++ alg_ctx->aligned, alg_ctx->count, ivsize, is_enc); ++ ++ switch (mode) { ++ case CIPHER_MODE_CTR: ++ rk_ctr128_calc(iv, alg_ctx->count); ++ break; ++ case CIPHER_MODE_CBC: ++ case CIPHER_MODE_CFB: ++ if (is_enc) ++ sg_pcopy_to_buffer(sg_dst, alg_ctx->map_nents, ++ iv, ivsize, alg_ctx->count - ivsize); ++ else ++ memcpy(iv, ctx->lastc, ivsize); ++ break; ++ case CIPHER_MODE_OFB: ++ sg_pcopy_to_buffer(sg_dst, alg_ctx->map_nents, ++ iv, ivsize, alg_ctx->count - ivsize); ++ crypto_xor(iv, ctx->lastc, ivsize); ++ break; ++ default: ++ return 0; ++ } ++ ++ return ivsize; +} + -+static int rk_ahash_export(struct ahash_request *req, void *out) ++static void rk_iv_copyback(struct rk_crypto_dev *rk_dev) +{ -+ struct rk_ahash_expt_ctx state; ++ uint32_t iv_size; ++ struct skcipher_request *req = skcipher_request_cast(rk_dev->async_req); ++ struct rk_cipher_ctx *ctx = rk_cipher_ctx_cast(rk_dev); ++ struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); ++ struct rk_crypto_algt *algt = rk_cipher_get_algt(cipher); + -+ /* Don't let anything leak to 'out' */ -+ memset(&state, 0, sizeof(state)); ++ iv_size = rk_get_new_iv(ctx, algt->mode, ctx->is_enc, ctx->iv); + -+ ///TODO: deal with import ++ if (iv_size && req->iv) ++ memcpy(req->iv, ctx->iv, iv_size); ++} + -+ memcpy(out, &state, sizeof(state)); ++static void rk_update_iv(struct rk_crypto_dev *rk_dev) ++{ ++ uint32_t iv_size; ++ struct rk_cipher_ctx *ctx = rk_cipher_ctx_cast(rk_dev); ++ struct rk_alg_ctx *algs_ctx = &ctx->algs_ctx; ++ struct skcipher_request *req = skcipher_request_cast(rk_dev->async_req); ++ struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); ++ struct rk_crypto_algt *algt = rk_cipher_get_algt(cipher); + -+ return 0; ++ iv_size = rk_get_new_iv(ctx, algt->mode, ctx->is_enc, ctx->iv); ++ ++ if (iv_size) ++ algs_ctx->ops.hw_write_iv(rk_dev, ctx->iv, iv_size); +} + -+static int rk_ahash_dma_start(struct rk_crypto_dev *rk_dev, uint32_t flag) ++static int rk_set_data_start(struct rk_crypto_dev *rk_dev) +{ -+ struct rk_hw_crypto_v3_info *hw_info = -+ (struct rk_hw_crypto_v3_info *)rk_dev->hw_info; -+ struct rk_alg_ctx *alg_ctx = rk_ahash_alg_ctx(rk_dev); -+ struct rk_ahash_ctx *ctx = rk_ahash_ctx_cast(rk_dev); -+ struct crypto_lli_desc *lli_head, *lli_tail; -+ u32 dma_ctl = CRYPTO_DMA_RESTART; -+ bool is_final = flag & RK_FLAG_FINAL; -+ int ret; ++ int err; ++ struct rk_alg_ctx *alg_ctx = rk_cipher_alg_ctx(rk_dev); + -+ CRYPTO_TRACE("ctx->calc_cnt = %u, count %u Byte, is_final = %d", -+ ctx->calc_cnt, alg_ctx->count, is_final); ++ err = rk_dev->load_data(rk_dev, alg_ctx->sg_src, alg_ctx->sg_dst); ++ if (!err) { ++ u32 ivsize = alg_ctx->chunk_size; ++ struct scatterlist *src_sg; ++ struct rk_cipher_ctx *ctx = rk_cipher_ctx_cast(rk_dev); + -+ if (alg_ctx->count % RK_DMA_ALIGNMENT && !is_final) { -+ dev_err(rk_dev->dev, "count = %u is not aligned with [%u]\n", -+ alg_ctx->count, RK_DMA_ALIGNMENT); -+ return -EINVAL; -+ } ++ memset(ctx->lastc, 0x00, sizeof(ctx->lastc)); + -+ if (alg_ctx->count == 0) { -+ /* do nothing */ -+ CRYPTO_TRACE("empty calc"); -+ return 0; -+ } ++ src_sg = alg_ctx->aligned ? alg_ctx->sg_src : &alg_ctx->sg_tmp; + -+ if (alg_ctx->total == alg_ctx->left_bytes + alg_ctx->count) -+ rk_hash_mid_data_restore(rk_dev, (struct rk_hash_mid_data *)ctx->priv); ++ ivsize = alg_ctx->count > ivsize ? ivsize : alg_ctx->count; + -+ if (alg_ctx->aligned) -+ ret = rk_crypto_hw_desc_init(&hw_info->hw_desc, -+ alg_ctx->sg_src, NULL, alg_ctx->count); -+ else -+ ret = rk_crypto_hw_desc_init(&hw_info->hw_desc, -+ &alg_ctx->sg_tmp, NULL, alg_ctx->count); -+ if (ret) -+ return ret; ++ sg_pcopy_to_buffer(src_sg, alg_ctx->map_nents, ++ ctx->lastc, ivsize, alg_ctx->count - ivsize); + -+ lli_head = hw_info->hw_desc.lli_head; -+ lli_tail = hw_info->hw_desc.lli_tail; ++ alg_ctx->ops.hw_dma_start(rk_dev, true); ++ } + -+ lli_tail->dma_ctrl = is_final ? LLI_DMA_CTRL_LAST : LLI_DMA_CTRL_PAUSE; -+ lli_tail->dma_ctrl |= LLI_DMA_CTRL_SRC_DONE; ++ return err; ++} + -+ if (ctx->calc_cnt == 0) { -+ dma_ctl = CRYPTO_DMA_START; ++int rk_cipher_setkey(struct crypto_skcipher *cipher, const u8 *key, unsigned int keylen) ++{ ++ struct rk_crypto_algt *algt = rk_cipher_get_algt(cipher); ++ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher); ++ uint32_t key_factor; ++ int ret = -EINVAL; + -+ lli_head->user_define |= LLI_USER_CIPHER_START; -+ lli_head->user_define |= LLI_USER_STRING_START; ++ CRYPTO_MSG("algo = %x, mode = %x, key_len = %d\n", ++ algt->algo, algt->mode, keylen); + -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_LLI_ADDR, hw_info->hw_desc.lli_head_dma); -+ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, -+ (CRYPTO_HASH_ENABLE << CRYPTO_WRITE_MASK_SHIFT) | -+ CRYPTO_HASH_ENABLE); ++ /* The key length of XTS is twice the normal length */ ++ key_factor = algt->mode == CIPHER_MODE_XTS ? 2 : 1; ++ ++ switch (algt->algo) { ++ case CIPHER_ALGO_DES: ++ ret = verify_skcipher_des_key(cipher, key); ++ if (ret) ++ goto exit; ++ break; ++ case CIPHER_ALGO_DES3_EDE: ++ ret = verify_skcipher_des3_key(cipher, key); ++ if (ret) ++ goto exit; ++ break; ++ case CIPHER_ALGO_AES: ++ if (keylen != (AES_KEYSIZE_128 * key_factor) && ++ keylen != (AES_KEYSIZE_192 * key_factor) && ++ keylen != (AES_KEYSIZE_256 * key_factor)) ++ goto exit; ++ break; ++ case CIPHER_ALGO_SM4: ++ if (keylen != (SM4_KEY_SIZE * key_factor)) ++ goto exit; ++ break; ++ default: ++ ret = -EINVAL; ++ goto exit; + } + -+ if (is_final && alg_ctx->left_bytes == 0) -+ lli_tail->user_define |= LLI_USER_STRING_LAST; ++ memcpy(ctx->key, key, keylen); ++ ctx->keylen = keylen; ++ ctx->fallback_key_inited = false; + -+ CRYPTO_TRACE("dma_ctrl = %08x, user_define = %08x, len = %u", -+ lli_head->dma_ctrl, lli_head->user_define, alg_ctx->count); ++ ret = 0; ++exit: ++ return ret; ++} + -+ rk_crypto_dump_hw_desc(&hw_info->hw_desc); ++int rk_ablk_rx(struct rk_crypto_dev *rk_dev) ++{ ++ int err = 0; ++ struct rk_cipher_ctx *ctx = rk_cipher_ctx_cast(rk_dev); ++ struct rk_alg_ctx *alg_ctx = rk_cipher_alg_ctx(rk_dev); + -+ dma_wmb(); ++ CRYPTO_TRACE("left_bytes = %u\n", alg_ctx->left_bytes); + -+ /* enable crypto irq */ -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0x7f); ++ err = rk_dev->unload_data(rk_dev); ++ if (err) ++ goto out_rx; + -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_CTL, dma_ctl | dma_ctl << CRYPTO_WRITE_MASK_SHIFT); ++ if (alg_ctx->left_bytes) { ++ rk_update_iv(rk_dev); ++ if (alg_ctx->aligned) { ++ if (sg_is_last(alg_ctx->sg_src)) { ++ dev_err(rk_dev->dev, "[%s:%d] Lack of data\n", ++ __func__, __LINE__); ++ err = -ENOMEM; ++ goto out_rx; ++ } ++ alg_ctx->sg_src = sg_next(alg_ctx->sg_src); ++ alg_ctx->sg_dst = sg_next(alg_ctx->sg_dst); ++ } ++ err = rk_set_data_start(rk_dev); ++ } else { ++ if (alg_ctx->is_aead) { ++ u8 hard_tag[RK_MAX_TAG_SIZE]; ++ u8 user_tag[RK_MAX_TAG_SIZE]; ++ struct aead_request *req = ++ aead_request_cast(rk_dev->async_req); ++ struct crypto_aead *tfm = crypto_aead_reqtfm(req); + -+ return 0; ++ unsigned int authsize = crypto_aead_authsize(tfm); ++ ++ CRYPTO_TRACE("cryptlen = %u, assoclen = %u, aead authsize = %u", ++ alg_ctx->total, alg_ctx->assoclen, authsize); ++ ++ err = alg_ctx->ops.hw_get_result(rk_dev, hard_tag, authsize); ++ if (err) ++ goto out_rx; ++ ++ CRYPTO_DUMPHEX("hard_tag", hard_tag, authsize); ++ if (!ctx->is_enc) { ++ if (!sg_pcopy_to_buffer(alg_ctx->req_src, ++ sg_nents(alg_ctx->req_src), ++ user_tag, authsize, ++ alg_ctx->total + ++ alg_ctx->assoclen)) { ++ err = -EINVAL; ++ goto out_rx; ++ } ++ ++ CRYPTO_DUMPHEX("user_tag", user_tag, authsize); ++ err = crypto_memneq(user_tag, hard_tag, authsize) ? -EBADMSG : 0; ++ } else { ++ if (!sg_pcopy_from_buffer(alg_ctx->req_dst, ++ sg_nents(alg_ctx->req_dst), ++ hard_tag, authsize, ++ alg_ctx->total + ++ alg_ctx->assoclen)) { ++ err = -EINVAL; ++ goto out_rx; ++ } ++ } ++ } else { ++ rk_iv_copyback(rk_dev); ++ } ++ } ++out_rx: ++ return err; +} + -+static int rk_ahash_get_result(struct rk_crypto_dev *rk_dev, -+ uint8_t *data, uint32_t data_len) ++int rk_ablk_start(struct rk_crypto_dev *rk_dev) +{ -+ int ret = 0; -+ u32 reg_ctrl = 0; -+ struct rk_ahash_ctx *ctx = rk_ahash_ctx_cast(rk_dev); ++ struct skcipher_request *req = ++ skcipher_request_cast(rk_dev->async_req); ++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); ++ struct rk_crypto_algt *algt = rk_cipher_get_algt(tfm); ++ struct rk_alg_ctx *alg_ctx = rk_cipher_alg_ctx(rk_dev); ++ int err = 0; + -+ memset(ctx->priv, 0x00, sizeof(struct rk_hash_mid_data)); ++ alg_ctx->left_bytes = req->cryptlen; ++ alg_ctx->total = req->cryptlen; ++ alg_ctx->sg_src = req->src; ++ alg_ctx->req_src = req->src; ++ alg_ctx->src_nents = sg_nents_for_len(req->src, req->cryptlen); ++ alg_ctx->sg_dst = req->dst; ++ alg_ctx->req_dst = req->dst; ++ alg_ctx->dst_nents = sg_nents_for_len(req->dst, req->cryptlen); + -+ ret = read_poll_timeout_atomic(CRYPTO_READ, reg_ctrl, -+ reg_ctrl & CRYPTO_HASH_IS_VALID, -+ RK_POLL_PERIOD_US, -+ RK_POLL_TIMEOUT_US, false, -+ rk_dev, CRYPTO_HASH_VALID); -+ if (ret) -+ goto exit; ++ CRYPTO_TRACE("total = %u", alg_ctx->total); + -+ rk_crypto_read_regs(rk_dev, CRYPTO_HASH_DOUT_0, data, data_len); ++ alg_ctx->ops.hw_init(rk_dev, algt->algo, algt->mode); ++ err = rk_set_data_start(rk_dev); + -+ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_VALID, CRYPTO_HASH_IS_VALID); ++ return err; ++} + -+exit: -+ clean_hash_setting(rk_dev); ++int rk_skcipher_handle_req(struct rk_crypto_dev *rk_dev, struct skcipher_request *req) ++{ ++ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm); ++ struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); ++ struct rk_crypto_algt *algt = rk_cipher_get_algt(cipher); + -+ return ret; ++ if (!IS_ALIGNED(req->cryptlen, ctx->algs_ctx.chunk_size) && ++ !is_no_multi_blocksize(algt->mode)) ++ return -EINVAL; ++ else ++ return rk_dev->enqueue(rk_dev, &req->base); +} + -+static int rk_cra_hash_init(struct crypto_tfm *tfm) ++int rk_aead_fallback(struct aead_request *req, struct rk_cipher_ctx *ctx, bool encrypt) +{ -+ struct rk_crypto_algt *algt = -+ rk_ahash_get_algt(__crypto_ahash_cast(tfm)); -+ const char *alg_name = crypto_tfm_alg_name(tfm); -+ struct rk_ahash_ctx *ctx = crypto_tfm_ctx(tfm); -+ struct rk_crypto_dev *rk_dev = algt->rk_dev; -+ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; ++ int ret; ++ struct aead_request *subreq = aead_request_ctx(req); + -+ CRYPTO_TRACE(); ++ if (!ctx->fallback_aead) { ++ CRYPTO_TRACE("fallback_tfm is empty"); ++ return -EINVAL; ++ } + -+ memset(ctx, 0x00, sizeof(*ctx)); ++ CRYPTO_MSG("use fallback tfm"); + -+ if (!rk_dev->request_crypto) -+ return -EFAULT; ++ if (!ctx->fallback_key_inited) { ++ ret = crypto_aead_setkey(ctx->fallback_aead, ctx->key, ctx->keylen); ++ if (ret) { ++ CRYPTO_MSG("fallback crypto_skcipher_setkey err = %d\n", ret); ++ goto exit; ++ } + -+ alg_ctx->align_size = RK_DMA_ALIGNMENT; ++ ctx->fallback_key_inited = true; ++ } + -+ alg_ctx->ops.start = rk_ahash_start; -+ alg_ctx->ops.update = rk_ahash_crypto_rx; -+ alg_ctx->ops.complete = rk_ahash_crypto_complete; -+ alg_ctx->ops.irq_handle = rk_crypto_irq_handle; ++ aead_request_set_tfm(subreq, ctx->fallback_aead); ++ aead_request_set_callback(subreq, req->base.flags, req->base.complete, req->base.data); ++ aead_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, req->iv); ++ aead_request_set_ad(subreq, req->assoclen); + -+ alg_ctx->ops.hw_write_key = write_key_reg; -+ alg_ctx->ops.hw_init = rk_hw_hash_init; -+ alg_ctx->ops.hw_dma_start = rk_ahash_dma_start; -+ alg_ctx->ops.hw_get_result = rk_ahash_get_result; ++ ret = encrypt ? crypto_aead_encrypt(subreq) : crypto_aead_decrypt(subreq); + -+ ctx->rk_dev = rk_dev; -+ ctx->hash_tmp = (u8 *)get_zeroed_page(GFP_KERNEL | GFP_DMA32); -+ if (!ctx->hash_tmp) { -+ dev_err(rk_dev->dev, "Can't get zeroed page for hash tmp.\n"); -+ return -ENOMEM; -+ } ++exit: ++ return ret; ++} + -+ ctx->priv = kmalloc(sizeof(struct rk_hash_mid_data), GFP_KERNEL); -+ if (!ctx->priv) { -+ free_page((unsigned long)ctx->hash_tmp); -+ return -ENOMEM; -+ } ++int rk_aead_setkey(struct crypto_aead *cipher, const u8 *key, unsigned int keylen) ++{ ++ struct crypto_tfm *tfm = crypto_aead_tfm(cipher); ++ struct rk_crypto_algt *algt = rk_aead_get_algt(cipher); ++ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(tfm); ++ int ret = -EINVAL; + -+ memset(ctx->priv, 0x00, sizeof(struct rk_hash_mid_data)); ++ CRYPTO_MSG("algo = %x, mode = %x, key_len = %d\n", algt->algo, algt->mode, keylen); + -+ rk_dev->request_crypto(rk_dev, alg_name); ++ switch (algt->algo) { ++ case CIPHER_ALGO_AES: ++ if (keylen != AES_KEYSIZE_128 && ++ keylen != AES_KEYSIZE_192 && ++ keylen != AES_KEYSIZE_256) ++ goto error; + -+ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct rk_ahash_rctx)); ++ break; ++ case CIPHER_ALGO_SM4: ++ if (keylen != SM4_KEY_SIZE) ++ goto error; + -+ algt->alg.hash.halg.statesize = sizeof(struct rk_ahash_expt_ctx); ++ break; ++ default: ++ CRYPTO_TRACE(); ++ goto error; ++ } ++ ++ memcpy(ctx->key, key, keylen); ++ ctx->keylen = keylen; ++ ctx->fallback_key_inited = false; + + return 0; ++ ++error: ++ return ret; +} + -+static void rk_cra_hash_exit(struct crypto_tfm *tfm) ++int rk_aead_start(struct rk_crypto_dev *rk_dev) +{ -+ struct rk_ahash_ctx *ctx = crypto_tfm_ctx(tfm); ++ struct aead_request *req = aead_request_cast(rk_dev->async_req); ++ struct crypto_aead *tfm = crypto_aead_reqtfm(req); ++ struct rk_cipher_ctx *ctx = crypto_aead_ctx(tfm); ++ struct rk_crypto_algt *algt = rk_aead_get_algt(tfm); ++ struct rk_alg_ctx *alg_ctx = rk_cipher_alg_ctx(rk_dev); ++ unsigned int total = 0, authsize; ++ int err = 0; + -+ CRYPTO_TRACE(); ++ total = req->cryptlen + req->assoclen; + -+ if (ctx->hash_tmp) -+ free_page((unsigned long)ctx->hash_tmp); ++ authsize = ctx->is_enc ? 0 : crypto_aead_authsize(tfm); + -+ kfree(ctx->priv); ++ alg_ctx->total = req->cryptlen - authsize; ++ alg_ctx->assoclen = req->assoclen; ++ alg_ctx->sg_src = req->src; ++ alg_ctx->req_src = req->src; ++ alg_ctx->src_nents = sg_nents_for_len(req->src, total); ++ alg_ctx->sg_dst = req->dst; ++ alg_ctx->req_dst = req->dst; ++ alg_ctx->dst_nents = sg_nents_for_len(req->dst, total - authsize); ++ alg_ctx->left_bytes = alg_ctx->total; + -+ ctx->rk_dev->release_crypto(ctx->rk_dev, crypto_tfm_alg_name(tfm)); -+} ++ CRYPTO_TRACE("src_nents = %zu, dst_nents = %zu", alg_ctx->src_nents, alg_ctx->dst_nents); ++ CRYPTO_TRACE("is_enc = %d, authsize = %u, cryptlen = %u, total = %u, assoclen = %u", ++ ctx->is_enc, authsize, req->cryptlen, alg_ctx->total, alg_ctx->assoclen); + -+struct rk_crypto_algt rk_v3_ahash_md5 = RK_HASH_ALGO_INIT(MD5, md5); -+struct rk_crypto_algt rk_v3_ahash_sha1 = RK_HASH_ALGO_INIT(SHA1, sha1); -+struct rk_crypto_algt rk_v3_ahash_sha224 = RK_HASH_ALGO_INIT(SHA224, sha224); -+struct rk_crypto_algt rk_v3_ahash_sha256 = RK_HASH_ALGO_INIT(SHA256, sha256); -+struct rk_crypto_algt rk_v3_ahash_sha384 = RK_HASH_ALGO_INIT(SHA384, sha384); -+struct rk_crypto_algt rk_v3_ahash_sha512 = RK_HASH_ALGO_INIT(SHA512, sha512); -+struct rk_crypto_algt rk_v3_ahash_sm3 = RK_HASH_ALGO_INIT(SM3, sm3); ++ alg_ctx->ops.hw_init(rk_dev, algt->algo, algt->mode); ++ err = rk_set_data_start(rk_dev); + -+struct rk_crypto_algt rk_v3_hmac_md5 = RK_HMAC_ALGO_INIT(MD5, md5); -+struct rk_crypto_algt rk_v3_hmac_sha1 = RK_HMAC_ALGO_INIT(SHA1, sha1); -+struct rk_crypto_algt rk_v3_hmac_sha256 = RK_HMAC_ALGO_INIT(SHA256, sha256); -+struct rk_crypto_algt rk_v3_hmac_sha512 = RK_HMAC_ALGO_INIT(SHA512, sha512); -+struct rk_crypto_algt rk_v3_hmac_sm3 = RK_HMAC_ALGO_INIT(SM3, sm3); ++ return err; ++} + -diff --git a/drivers/crypto/rockchip/rk_crypto_v3_reg.h b/drivers/crypto/rockchip/rk_crypto_v3_reg.h ++int rk_aead_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) ++{ ++ return crypto_gcm_check_authsize(authsize); ++} ++ ++int rk_aead_handle_req(struct rk_crypto_dev *rk_dev, struct aead_request *req) ++{ ++ return rk_dev->enqueue(rk_dev, &req->base); ++} +diff --git a/drivers/crypto/rockchip/rk_crypto_skcipher_utils.h b/drivers/crypto/rockchip/rk_crypto_skcipher_utils.h new file mode 100644 -index 000000000..1c4c45317 +index 000000000..7d47f9719 --- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_v3_reg.h -@@ -0,0 +1,80 @@ ++++ b/drivers/crypto/rockchip/rk_crypto_skcipher_utils.h +@@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Copyright (c) 2022 Rockchip Electronics Co. Ltd. */ + -+#ifndef __RK_CRYPTO_V3_REG_H__ -+#define __RK_CRYPTO_V3_REG_H__ ++#ifndef __RK_CRYPTO_SKCIPHER_UTILS_H__ ++#define __RK_CRYPTO_SKCIPHER_UTILS_H__ + -+#include "rk_crypto_v2_reg.h" ++#include ++#include ++#include + -+#define CRYPTO_UNEQUAL_ERR_INT_EN BIT(9) -+#define CRYPTO_ZERO_LEN_INT_EN BIT(6) ++#include "rk_crypto_core.h" ++#include "rk_crypto_utils.h" + -+/* DMA Destination Data Read Address Register */ -+#define CRYPTO_DMA_DST_WADDR 0x0028 ++#define RK_MAX_TAG_SIZE 32 + -+#define CRYPTO_BC_MID_IV_0 0x0060 -+#define CRYPTO_BC_MID_WORD_SIZE 6 ++struct rk_crypto_algt *rk_cipher_get_algt(struct crypto_skcipher *tfm); + -+#define CRYPTO_MID_VALID 0x03e8 -+#define CRYPTO_BC_MID_IS_VALID BIT(0) -+#define CRYPTO_HASH_MID_IS_VALID BIT(1) ++struct rk_crypto_algt *rk_aead_get_algt(struct crypto_aead *tfm); + -+#define CRYPTO_KEY_SEL 0x0610 ++struct rk_alg_ctx *rk_cipher_alg_ctx(struct rk_crypto_dev *rk_dev); + -+#define CRYPTO_MID_VALID_SWITCH 0x0630 -+#define CRYPTO_MID_VALID_ENABLE BIT(0) ++struct rk_cipher_ctx *rk_cipher_ctx_cast(struct rk_crypto_dev *rk_dev); + -+#define CRYPTO_AES_VERSION 0x0680 -+#define CRYPTO_DES_VERSION 0x0684 -+#define CRYPTO_SM4_VERSION 0x0688 ++int rk_cipher_fallback(struct skcipher_request *req, struct rk_cipher_ctx *ctx, bool encrypt); + -+#define CRYPTO_ECB_FLAG BIT(0) -+#define CRYPTO_CBC_FLAG BIT(1) -+#define CRYPTO_CTS_FLAG BIT(2) -+#define CRYPTO_CTR_FLAG BIT(3) -+#define CRYPTO_CFB_FLAG BIT(4) -+#define CRYPTO_OFB_FLAG BIT(5) -+#define CRYPTO_XTS_FLAG BIT(6) -+#define CRYPTO_CCM_FLAG BIT(7) -+#define CRYPTO_GCM_FLAG BIT(8) -+#define CRYPTO_CMAC_FLAG BIT(9) -+#define CRYPTO_CBCMAC_FLAG BIT(10) ++int rk_cipher_setkey(struct crypto_skcipher *cipher, const u8 *key, unsigned int keylen); + -+#define CRYPTO_AES128_FLAG BIT(16) -+#define CRYPTO_AES192_FLAG BIT(17) -+#define CRYPTO_AES256_FLAG BIT(18) ++int rk_ablk_rx(struct rk_crypto_dev *rk_dev); + -+#define CRYPTO_TDES_FLAG BIT(16) ++int rk_ablk_start(struct rk_crypto_dev *rk_dev); + -+#define CRYPTO_LOCKSEP_FLAG BIT(20) -+#define CRYPTO_SECURE_FLAG BIT(21) -+#define CRYPTO_MULTI_CHN_FLAG BIT(22) ++int rk_skcipher_handle_req(struct rk_crypto_dev *rk_dev, struct skcipher_request *req); + -+#define CRYPTO_HASH_VERSION 0x068C -+#define CRYPTO_HASH_SHA1_FLAG BIT(0) -+#define CRYPTO_HASH_SHA224_FLAG BIT(1) -+#define CRYPTO_HASH_SHA256_FLAG BIT(2) -+#define CRYPTO_HASH_SHA384_FLAG BIT(3) -+#define CRYPTO_HASH_SHA512_FLAG BIT(4) -+#define CRYPTO_HASH_SHA512_224_FLAG BIT(5) -+#define CRYPTO_HASH_SHA512_256_FLAG BIT(6) -+#define CRYPTO_HASH_MD5_FLAG BIT(7) -+#define CRYPTO_HASH_SM3_FLAG BIT(8) ++int rk_aead_fallback(struct aead_request *req, struct rk_cipher_ctx *ctx, bool encrypt); + -+#define CRYPTO_HMAC_VERSION 0x0690 -+#define CRYPTO_HMAC_SHA1_FLAG BIT(0) -+#define CRYPTO_HMAC_SHA256_FLAG BIT(1) -+#define CRYPTO_HMAC_SHA512_FLAG BIT(2) -+#define CRYPTO_HMAC_MD5_FLAG BIT(3) -+#define CRYPTO_HMAC_SM3_FLAG BIT(4) ++int rk_aead_setkey(struct crypto_aead *cipher, const u8 *key, unsigned int keylen); + -+#define CRYPTO_RNG_VERSION 0x0694 -+#define CRYPTO_PKA_VERSION 0x0698 -+#define CRYPTO_CRYPTO_VERSION 0x06F0 ++int rk_aead_start(struct rk_crypto_dev *rk_dev); + -+#define CRYPTO_HASH_MID_DATA_0 0x0700 -+#define CRYPTO_HASH_MID_WORD_SIZE 26 ++int rk_aead_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize); ++ ++int rk_aead_handle_req(struct rk_crypto_dev *rk_dev, struct aead_request *req); + +#endif + -diff --git a/drivers/crypto/rockchip/rk_crypto_v3_skcipher.c b/drivers/crypto/rockchip/rk_crypto_v3_skcipher.c +diff --git a/drivers/crypto/rockchip/rk_crypto_utils.c b/drivers/crypto/rockchip/rk_crypto_utils.c new file mode 100644 -index 000000000..4220e6cbe +index 000000000..5758e0eed --- /dev/null -+++ b/drivers/crypto/rockchip/rk_crypto_v3_skcipher.c -@@ -0,0 +1,684 @@ ++++ b/drivers/crypto/rockchip/rk_crypto_utils.c +@@ -0,0 +1,317 @@ +// SPDX-License-Identifier: GPL-2.0 +/* -+ * Crypto acceleration support for Rockchip Crypto V2 ++ * Rockchip crypto uitls + * -+ * Copyright (c) 2022, Fuzhou Rockchip Electronics Co., Ltd ++ * Copyright (c) 2022, Rockchip Electronics Co., Ltd + * + * Author: Lin Jinhan + * + */ + +#include -+#include -+#include -+#include ++#include + +#include "rk_crypto_core.h" +#include "rk_crypto_utils.h" -+#include "rk_crypto_skcipher_utils.h" -+#include "rk_crypto_v3.h" -+#include "rk_crypto_v3_reg.h" -+ -+#define RK_POLL_PERIOD_US 100 -+#define RK_POLL_TIMEOUT_US 50000 -+ -+static const u32 cipher_algo2bc[] = { -+ [CIPHER_ALGO_DES] = CRYPTO_BC_DES, -+ [CIPHER_ALGO_DES3_EDE] = CRYPTO_BC_TDES, -+ [CIPHER_ALGO_AES] = CRYPTO_BC_AES, -+ [CIPHER_ALGO_SM4] = CRYPTO_BC_SM4, -+}; + -+static const u32 cipher_mode2bc[] = { -+ [CIPHER_MODE_ECB] = CRYPTO_BC_ECB, -+ [CIPHER_MODE_CBC] = CRYPTO_BC_CBC, -+ [CIPHER_MODE_CFB] = CRYPTO_BC_CFB, -+ [CIPHER_MODE_OFB] = CRYPTO_BC_OFB, -+ [CIPHER_MODE_CTR] = CRYPTO_BC_CTR, -+ [CIPHER_MODE_XTS] = CRYPTO_BC_XTS, -+ [CIPHER_MODE_GCM] = CRYPTO_BC_GCM, -+}; ++static inline void word2byte_be(u32 word, u8 *ch) ++{ ++ ch[0] = (word >> 24) & 0xff; ++ ch[1] = (word >> 16) & 0xff; ++ ch[2] = (word >> 8) & 0xff; ++ ch[3] = (word >> 0) & 0xff; ++} + -+static int rk_crypto_irq_handle(int irq, void *dev_id) ++static inline u32 byte2word_be(const u8 *ch) +{ -+ struct rk_crypto_dev *rk_dev = platform_get_drvdata(dev_id); -+ u32 interrupt_status; -+ struct rk_hw_crypto_v3_info *hw_info = -+ (struct rk_hw_crypto_v3_info *)rk_dev->hw_info; -+ struct rk_alg_ctx *alg_ctx = rk_cipher_alg_ctx(rk_dev); ++ return (*ch << 24) + (*(ch + 1) << 16) + ++ (*(ch + 2) << 8) + *(ch + 3); ++} + -+ interrupt_status = CRYPTO_READ(rk_dev, CRYPTO_DMA_INT_ST); -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_ST, interrupt_status); ++void rk_crypto_write_regs(struct rk_crypto_dev *rk_dev, u32 base_addr, const u8 *data, u32 bytes) ++{ ++ u32 i; ++ u8 tmp_buf[4]; + -+ interrupt_status &= CRYPTO_LOCKSTEP_MASK; ++ for (i = 0; i < bytes / 4; i++, base_addr += 4) ++ CRYPTO_WRITE(rk_dev, base_addr, byte2word_be(data + i * 4)); + -+ if (interrupt_status != CRYPTO_DST_ITEM_DONE_INT_ST) { -+ dev_err(rk_dev->dev, "DMA desc = %p\n", hw_info->hw_desc.lli_head); -+ dev_err(rk_dev->dev, "DMA addr_in = %08x\n", -+ (u32)alg_ctx->addr_in); -+ dev_err(rk_dev->dev, "DMA addr_out = %08x\n", -+ (u32)alg_ctx->addr_out); -+ dev_err(rk_dev->dev, "DMA count = %08x\n", alg_ctx->count); -+ dev_err(rk_dev->dev, "DMA desc_dma = %08x\n", -+ (u32)hw_info->hw_desc.lli_head_dma); -+ dev_err(rk_dev->dev, "DMA Error status = %08x\n", -+ interrupt_status); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_LLI_ADDR status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_LLI_ADDR)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_ST status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_ST)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_STATE status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_STATE)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_LLI_RADDR status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_LLI_RADDR)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_SRC_RADDR status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_SRC_RADDR)); -+ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_DST_RADDR status = %08x\n", -+ CRYPTO_READ(rk_dev, CRYPTO_DMA_DST_RADDR)); -+ rk_dev->err = -EFAULT; ++ if (bytes % 4) { ++ memset(tmp_buf, 0x00, sizeof(tmp_buf)); ++ memcpy((u8 *)tmp_buf, data + (bytes / 4) * 4, bytes % 4); ++ CRYPTO_WRITE(rk_dev, base_addr, byte2word_be(tmp_buf)); + } -+ -+ return 0; +} + -+static inline void set_pc_len_reg(struct rk_crypto_dev *rk_dev, u64 pc_len) ++void rk_crypto_clear_regs(struct rk_crypto_dev *rk_dev, u32 base_addr, u32 words) +{ -+ u32 chn_base = CRYPTO_CH0_PC_LEN_0; -+ -+ CRYPTO_TRACE("PC length = %lu\n", (unsigned long)pc_len); ++ u32 i; + -+ CRYPTO_WRITE(rk_dev, chn_base, pc_len & 0xffffffff); -+ CRYPTO_WRITE(rk_dev, chn_base + 4, pc_len >> 32); ++ for (i = 0; i < words; i++, base_addr += 4) ++ CRYPTO_WRITE(rk_dev, base_addr, 0); +} + -+static inline void set_aad_len_reg(struct rk_crypto_dev *rk_dev, u64 aad_len) ++void rk_crypto_read_regs(struct rk_crypto_dev *rk_dev, u32 base_addr, u8 *data, u32 bytes) +{ -+ u32 chn_base = CRYPTO_CH0_AAD_LEN_0; ++ u32 i; + -+ CRYPTO_TRACE("AAD length = %lu\n", (unsigned long)aad_len); ++ for (i = 0; i < bytes / 4; i++, base_addr += 4) ++ word2byte_be(CRYPTO_READ(rk_dev, base_addr), data + i * 4); + -+ CRYPTO_WRITE(rk_dev, chn_base, aad_len & 0xffffffff); -+ CRYPTO_WRITE(rk_dev, chn_base + 4, aad_len >> 32); ++ if (bytes % 4) { ++ uint8_t tmp_buf[4]; ++ ++ word2byte_be(CRYPTO_READ(rk_dev, base_addr), tmp_buf); ++ memcpy(data + i * 4, tmp_buf, bytes % 4); ++ } +} + -+static void set_iv_reg(struct rk_crypto_dev *rk_dev, const u8 *iv, u32 iv_len) ++static int check_scatter_align(struct scatterlist *sg_src, ++ struct scatterlist *sg_dst, ++ int align_mask) +{ -+ if (!iv || iv_len == 0) -+ return; ++ int in, out, align; + -+ CRYPTO_DUMPHEX("set iv", iv, iv_len); ++ /* The last piece has no need for length alignment */ ++ in = IS_ALIGNED((u32)sg_src->offset, 4) && ++ (!sg_next(sg_src) || ++ IS_ALIGNED((u32)sg_src->length, align_mask)) && ++ (sg_phys(sg_src) < SZ_4G); ++ if (!sg_dst) ++ return in; + -+ rk_crypto_write_regs(rk_dev, CRYPTO_CH0_IV_0, iv, iv_len); ++ /* The last piece has no need for length alignment */ ++ out = IS_ALIGNED((u32)sg_dst->offset, 4) && ++ (!sg_next(sg_dst) || ++ IS_ALIGNED((u32)sg_dst->length, align_mask)) && ++ (sg_phys(sg_dst) < SZ_4G); ++ align = in && out; + -+ CRYPTO_WRITE(rk_dev, CRYPTO_CH0_IV_LEN_0, iv_len); ++ return (align && (sg_src->length == sg_dst->length)); +} + -+static void write_key_reg(struct rk_crypto_dev *rk_dev, const u8 *key, -+ u32 key_len) ++bool rk_crypto_check_align(struct scatterlist *src_sg, size_t src_nents, ++ struct scatterlist *dst_sg, size_t dst_nents, ++ int align_mask) +{ -+ rk_crypto_write_regs(rk_dev, CRYPTO_CH0_KEY_0, key, key_len); -+} ++ struct scatterlist *src_tmp = NULL; ++ struct scatterlist *dst_tmp = NULL; ++ unsigned int i; + -+static void write_tkey_reg(struct rk_crypto_dev *rk_dev, const u8 *key, -+ u32 key_len) -+{ -+ rk_crypto_write_regs(rk_dev, CRYPTO_CH4_KEY_0, key, key_len); -+} ++ if (dst_sg && src_nents != dst_nents) ++ return false; + -+static int get_tag_reg(struct rk_crypto_dev *rk_dev, u8 *tag, u32 tag_len) -+{ -+ int ret; -+ u32 reg_ctrl = 0; ++ src_tmp = src_sg; ++ dst_tmp = dst_sg; + -+ CRYPTO_TRACE("tag_len = %u", tag_len); ++ for (i = 0; i < src_nents; i++) { ++ if (!src_tmp) ++ return false; + -+ if (tag_len > RK_MAX_TAG_SIZE) -+ return -EINVAL; ++ if (!check_scatter_align(src_tmp, dst_tmp, align_mask)) ++ return false; + -+ ret = read_poll_timeout_atomic(CRYPTO_READ, -+ reg_ctrl, -+ reg_ctrl & CRYPTO_CH0_TAG_VALID, -+ 0, -+ RK_POLL_TIMEOUT_US, -+ false, -+ rk_dev, CRYPTO_TAG_VALID); -+ if (ret) -+ goto exit; ++ src_tmp = sg_next(src_tmp); + -+ rk_crypto_read_regs(rk_dev, CRYPTO_CH0_TAG_0, tag, tag_len); -+exit: -+ return ret; ++ if (dst_sg) ++ dst_tmp = sg_next(dst_tmp); ++ } ++ ++ return true; +} + -+static bool is_force_fallback(struct rk_crypto_algt *algt, uint32_t key_len) ++bool rk_crypto_check_dmafd(struct scatterlist *sgl, size_t nents) +{ -+ if (algt->algo != CIPHER_ALGO_AES) -+ return false; ++ struct scatterlist *src_tmp = NULL; ++ unsigned int i; + -+ /* crypto v2 not support xts with AES-192 */ -+ if (algt->mode == CIPHER_MODE_XTS && key_len == AES_KEYSIZE_192 * 2) -+ return true; ++ for_each_sg(sgl, src_tmp, nents, i) { ++ if (!src_tmp) ++ return false; + -+ if (algt->use_soft_aes192 && key_len == AES_KEYSIZE_192) -+ return true; ++ if (src_tmp->length && !sg_dma_address(src_tmp)) ++ return false; ++ } + -+ return false; ++ return true; +} + -+static bool is_calc_need_round_up(struct skcipher_request *req) ++void rk_crypto_dump_hw_desc(struct rk_hw_desc *hw_desc) +{ -+ struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); -+ struct rk_crypto_algt *algt = rk_cipher_get_algt(cipher); -+ -+ return (algt->mode == CIPHER_MODE_CFB || -+ algt->mode == CIPHER_MODE_OFB || -+ algt->mode == CIPHER_MODE_CTR) ? true : false; -+} ++ struct crypto_lli_desc *cur_lli = NULL; ++ u32 i; + -+static void rk_cipher_reset(struct rk_crypto_dev *rk_dev) -+{ -+ int ret; -+ u32 tmp = 0, tmp_mask = 0; -+ unsigned int pool_timeout_us = 1000; ++ cur_lli = hw_desc->lli_head; + -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0x00); ++ CRYPTO_TRACE("lli_head = %lx, lli_tail = %lx", ++ (unsigned long)hw_desc->lli_head, (unsigned long)hw_desc->lli_tail); + -+ tmp = CRYPTO_SW_CC_RESET; -+ tmp_mask = tmp << CRYPTO_WRITE_MASK_SHIFT; ++ for (i = 0; i < hw_desc->total; i++, cur_lli++) { ++ CRYPTO_TRACE("cur_lli = %lx", (unsigned long)cur_lli); ++ CRYPTO_TRACE("src_addr = %08x", cur_lli->src_addr); ++ CRYPTO_TRACE("src_len = %08x", cur_lli->src_len); ++ CRYPTO_TRACE("dst_addr = %08x", cur_lli->dst_addr); ++ CRYPTO_TRACE("dst_len = %08x", cur_lli->dst_len); ++ CRYPTO_TRACE("user_def = %08x", cur_lli->user_define); ++ CRYPTO_TRACE("dma_ctl = %08x", cur_lli->dma_ctrl); ++ CRYPTO_TRACE("next = %08x\n", cur_lli->next_addr); + -+ CRYPTO_WRITE(rk_dev, CRYPTO_RST_CTL, tmp | tmp_mask); ++ if (cur_lli == hw_desc->lli_tail) ++ break; ++ } ++} + -+ /* This is usually done in 20 clock cycles */ -+ ret = read_poll_timeout_atomic(CRYPTO_READ, tmp, !tmp, 0, -+ pool_timeout_us, false, rk_dev, CRYPTO_RST_CTL); -+ if (ret) -+ dev_err(rk_dev->dev, "cipher reset pool timeout %ums.", -+ pool_timeout_us); ++u64 rk_crypto_hw_desc_maxlen(struct scatterlist *sg, u64 len, u32 *max_nents) ++{ ++ int nents; ++ u64 total; + -+ CRYPTO_WRITE(rk_dev, CRYPTO_BC_CTL, 0xffff0000); ++ if (!len) ++ return 0; + -+ /* clear dma int status */ -+ tmp = CRYPTO_READ(rk_dev, CRYPTO_DMA_INT_ST); -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_ST, tmp); -+} ++ for (nents = 0, total = 0; sg; sg = sg_next(sg)) { ++ if (!sg) ++ goto exit; + -+static void rk_crypto_complete(struct crypto_async_request *base, int err) -+{ -+ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(base->tfm); -+ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; -+ struct rk_hw_crypto_v3_info *hw_info = ctx->rk_dev->hw_info; -+ struct crypto_lli_desc *lli_desc = hw_info->hw_desc.lli_head; ++ nents++; ++ total += sg->length; + -+ CRYPTO_WRITE(ctx->rk_dev, CRYPTO_BC_CTL, 0xffff0000); -+ if (err) { -+ rk_cipher_reset(ctx->rk_dev); -+ pr_err("aligned = %u, align_size = %u\n", -+ alg_ctx->aligned, alg_ctx->align_size); -+ pr_err("total = %u, left = %u, count = %u\n", -+ alg_ctx->total, alg_ctx->left_bytes, alg_ctx->count); -+ pr_err("lli->src = %08x\n", lli_desc->src_addr); -+ pr_err("lli->src_len = %08x\n", lli_desc->src_len); -+ pr_err("lli->dst = %08x\n", lli_desc->dst_addr); -+ pr_err("lli->dst_len = %08x\n", lli_desc->dst_len); -+ pr_err("lli->dma_ctl = %08x\n", lli_desc->dma_ctrl); -+ pr_err("lli->usr_def = %08x\n", lli_desc->user_define); -+ pr_err("lli->next = %08x\n\n\n", lli_desc->next_addr); ++ if (nents >= RK_DEFAULT_LLI_CNT || total >= len) ++ goto exit; + } + -+ if (base->complete) -+ base->complete(base, err); ++exit: ++ *max_nents = nents; ++ return total > len ? len : total; +} + -+static int rk_cipher_crypt(struct skcipher_request *req, bool encrypt) ++int rk_crypto_hw_desc_alloc(struct device *dev, struct rk_hw_desc *hw_desc) +{ -+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); -+ struct rk_crypto_algt *algt = rk_cipher_get_algt(tfm); -+ -+ CRYPTO_TRACE("%s total = %u", -+ encrypt ? "encrypt" : "decrypt", req->cryptlen); ++ u32 lli_cnt = RK_DEFAULT_LLI_CNT; ++ u32 lli_len = lli_cnt * sizeof(struct crypto_lli_desc); + -+ if (!req->cryptlen) { -+ if (algt->mode == CIPHER_MODE_ECB || -+ algt->mode == CIPHER_MODE_CBC || -+ algt->mode == CIPHER_MODE_CTR || -+ algt->mode == CIPHER_MODE_CFB || -+ algt->mode == CIPHER_MODE_OFB) -+ return 0; -+ else -+ return -EINVAL; -+ } ++ if (!dev || !hw_desc) ++ return -EINVAL; + -+ /* XTS data should >= chunksize */ -+ if (algt->mode == CIPHER_MODE_XTS) { -+ if (req->cryptlen < crypto_skcipher_chunksize(tfm)) -+ return -EINVAL; ++ memset(hw_desc, 0x00, sizeof(*hw_desc)); + -+ /* force use unalign branch */ -+ ctx->algs_ctx.align_size = ctx->rk_dev->vir_max; ++ hw_desc->lli_aad = dma_alloc_coherent(dev, sizeof(struct crypto_lli_desc), ++ &hw_desc->lli_aad_dma, GFP_KERNEL); ++ if (!hw_desc->lli_aad) ++ return -ENOMEM; + -+ /* XTS can't pause when use hardware crypto */ -+ if (req->cryptlen > ctx->rk_dev->vir_max) -+ return rk_cipher_fallback(req, ctx, encrypt); ++ ///TODO: cma ++ hw_desc->lli_head = dma_alloc_coherent(dev, lli_len, &hw_desc->lli_head_dma, GFP_KERNEL); ++ if (!hw_desc->lli_head) { ++ dma_free_coherent(dev, sizeof(struct crypto_lli_desc), ++ hw_desc->lli_aad, hw_desc->lli_aad_dma); ++ return -ENOMEM; + } + -+ if (is_force_fallback(algt, ctx->keylen)) -+ return rk_cipher_fallback(req, ctx, encrypt); ++ hw_desc->lli_tail = hw_desc->lli_head; ++ hw_desc->total = lli_cnt; ++ hw_desc->dev = dev; + -+ ctx->mode = cipher_algo2bc[algt->algo] | -+ cipher_mode2bc[algt->mode]; -+ if (!encrypt) -+ ctx->mode |= CRYPTO_BC_DECRYPT; ++ memset(hw_desc->lli_head, 0x00, lli_len); + -+ if (algt->algo == CIPHER_ALGO_AES) { -+ uint32_t key_factor; ++ CRYPTO_TRACE("dev = %lx, buffer_len = %u, lli_head = %lx, lli_head_dma = %lx", ++ (unsigned long)hw_desc->dev, lli_len, ++ (unsigned long)hw_desc->lli_head, (unsigned long)hw_desc->lli_head_dma); + -+ /* The key length of XTS is twice the normal length */ -+ key_factor = algt->mode == CIPHER_MODE_XTS ? 2 : 1; ++ return 0; ++} + -+ if (ctx->keylen == AES_KEYSIZE_128 * key_factor) -+ ctx->mode |= CRYPTO_BC_128_bit_key; -+ else if (ctx->keylen == AES_KEYSIZE_192 * key_factor) -+ ctx->mode |= CRYPTO_BC_192_bit_key; -+ else if (ctx->keylen == AES_KEYSIZE_256 * key_factor) -+ ctx->mode |= CRYPTO_BC_256_bit_key; -+ } ++void rk_crypto_hw_desc_free(struct rk_hw_desc *hw_desc) ++{ ++ if (!hw_desc || !hw_desc->dev || !hw_desc->lli_head) ++ return; + -+ ctx->iv_len = crypto_skcipher_ivsize(tfm); ++ CRYPTO_TRACE("dev = %lx, buffer_len = %lu, lli_head = %lx, lli_head_dma = %lx", ++ (unsigned long)hw_desc->dev, ++ (unsigned long)hw_desc->total * sizeof(struct crypto_lli_desc), ++ (unsigned long)hw_desc->lli_head, (unsigned long)hw_desc->lli_head_dma); + -+ memset(ctx->iv, 0x00, sizeof(ctx->iv)); -+ memcpy(ctx->iv, req->iv, ctx->iv_len); ++ dma_free_coherent(hw_desc->dev, sizeof(struct crypto_lli_desc), ++ hw_desc->lli_aad, hw_desc->lli_aad_dma); + -+ ctx->is_enc = encrypt; ++ dma_free_coherent(hw_desc->dev, hw_desc->total * sizeof(struct crypto_lli_desc), ++ hw_desc->lli_head, hw_desc->lli_head_dma); + -+ CRYPTO_MSG("ctx->mode = %x\n", ctx->mode); -+ return rk_skcipher_handle_req(ctx->rk_dev, req); ++ memset(hw_desc, 0x00, sizeof(*hw_desc)); +} + -+static int rk_cipher_encrypt(struct skcipher_request *req) ++int rk_crypto_hw_desc_init(struct rk_hw_desc *hw_desc, ++ struct scatterlist *src_sg, ++ struct scatterlist *dst_sg, ++ u64 len) +{ -+ return rk_cipher_crypt(req, true); -+} ++ struct crypto_lli_desc *cur_lli = NULL; ++ struct scatterlist *tmp_src, *tmp_dst; ++ dma_addr_t tmp_next_dma; ++ u32 src_nents, dst_nents; ++ u32 i, data_cnt = 0; + -+static int rk_cipher_decrypt(struct skcipher_request *req) -+{ -+ return rk_cipher_crypt(req, false); -+} ++ if (!hw_desc || !hw_desc->dev || !hw_desc->lli_head) ++ return -EINVAL; + -+static int rk_ablk_hw_init(struct rk_crypto_dev *rk_dev, u32 algo, u32 mode) -+{ -+ struct rk_cipher_ctx *ctx = rk_cipher_ctx_cast(rk_dev); ++ if (!src_sg || len == 0) ++ return -EINVAL; + -+ rk_cipher_reset(rk_dev); ++ src_nents = sg_nents_for_len(src_sg, len); ++ dst_nents = dst_sg ? sg_nents_for_len(dst_sg, len) : src_nents; + -+ CRYPTO_WRITE(rk_dev, CRYPTO_BC_CTL, 0x00010000); ++ if (src_nents != dst_nents) ++ return -EINVAL; + -+ if (mode == CIPHER_MODE_XTS) { -+ uint32_t tmp_len = ctx->keylen / 2; ++ CRYPTO_TRACE("src_nents = %u, total = %u, len = %llu", src_nents, hw_desc->total, len); + -+ write_key_reg(ctx->rk_dev, ctx->key, tmp_len); -+ write_tkey_reg(ctx->rk_dev, ctx->key + tmp_len, tmp_len); -+ } else { -+ write_key_reg(ctx->rk_dev, ctx->key, ctx->keylen); ++ if (src_nents > hw_desc->total) { ++ pr_err("crypto: nents overflow, %u > %u", src_nents, hw_desc->total); ++ return -ENOMEM; + } + -+ if (mode != CIPHER_MODE_ECB) -+ set_iv_reg(rk_dev, ctx->iv, ctx->iv_len); ++ memset(hw_desc->lli_head, 0x00, src_nents * sizeof(struct crypto_lli_desc)); + -+ ctx->mode |= CRYPTO_BC_ENABLE; ++ cur_lli = hw_desc->lli_head; ++ tmp_src = src_sg; ++ tmp_dst = dst_sg; ++ tmp_next_dma = hw_desc->lli_head_dma + sizeof(*cur_lli); + -+ CRYPTO_WRITE(rk_dev, CRYPTO_FIFO_CTL, 0x00030003); ++ if (dst_sg) { ++ for (i = 0; i < src_nents - 1; i++, cur_lli++, tmp_next_dma += sizeof(*cur_lli)) { ++ cur_lli->src_addr = sg_dma_address(tmp_src); ++ cur_lli->src_len = sg_dma_len(tmp_src); ++ cur_lli->dst_addr = sg_dma_address(tmp_dst); ++ cur_lli->dst_len = sg_dma_len(tmp_dst); ++ cur_lli->next_addr = tmp_next_dma; + -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0x7f); ++ data_cnt += sg_dma_len(tmp_src); ++ tmp_src = sg_next(tmp_src); ++ tmp_dst = sg_next(tmp_dst); ++ } ++ } else { ++ for (i = 0; i < src_nents - 1; i++, cur_lli++, tmp_next_dma += sizeof(*cur_lli)) { ++ cur_lli->src_addr = sg_dma_address(tmp_src); ++ cur_lli->src_len = sg_dma_len(tmp_src); ++ cur_lli->next_addr = tmp_next_dma; + -+ CRYPTO_WRITE(rk_dev, CRYPTO_BC_CTL, ctx->mode | CRYPTO_WRITE_MASK_ALL); ++ data_cnt += sg_dma_len(tmp_src); ++ tmp_src = sg_next(tmp_src); ++ } ++ } ++ ++ /* for last lli */ ++ cur_lli->src_addr = sg_dma_address(tmp_src); ++ cur_lli->src_len = len - data_cnt; ++ cur_lli->next_addr = 0; ++ ++ if (dst_sg) { ++ cur_lli->dst_addr = sg_dma_address(tmp_dst); ++ cur_lli->dst_len = len - data_cnt; ++ } ++ ++ hw_desc->lli_tail = cur_lli; + + return 0; +} + -+static int crypto_dma_start(struct rk_crypto_dev *rk_dev, uint32_t flag) -+{ -+ struct rk_hw_crypto_v3_info *hw_info = -+ (struct rk_hw_crypto_v3_info *)rk_dev->hw_info; -+ struct skcipher_request *req = -+ skcipher_request_cast(rk_dev->async_req); -+ struct rk_alg_ctx *alg_ctx = rk_cipher_alg_ctx(rk_dev); -+ struct crypto_lli_desc *lli_head, *lli_tail, *lli_aad; -+ u32 calc_len = alg_ctx->count; -+ u32 start_flag = CRYPTO_DMA_START; -+ int ret; +diff --git a/drivers/crypto/rockchip/rk_crypto_utils.h b/drivers/crypto/rockchip/rk_crypto_utils.h +new file mode 100644 +index 000000000..26c931db8 +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_utils.h +@@ -0,0 +1,63 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ + -+ if (alg_ctx->aligned) -+ ret = rk_crypto_hw_desc_init(&hw_info->hw_desc, -+ alg_ctx->sg_src, alg_ctx->sg_dst, alg_ctx->count); -+ else -+ ret = rk_crypto_hw_desc_init(&hw_info->hw_desc, -+ &alg_ctx->sg_tmp, &alg_ctx->sg_tmp, alg_ctx->count); -+ if (ret) -+ return ret; ++/* Copyright (c) 2022 Rockchip Electronics Co. Ltd. */ + -+ lli_head = hw_info->hw_desc.lli_head; -+ lli_tail = hw_info->hw_desc.lli_tail; -+ lli_aad = hw_info->hw_desc.lli_aad; ++#ifndef __RK_CRYPTO_UTILS_H__ ++#define __RK_CRYPTO_UTILS_H__ + -+ /* -+ * the data length is not aligned will use addr_vir to calculate, -+ * so crypto v2 could round up data length to chunk_size -+ */ -+ if (!alg_ctx->is_aead && is_calc_need_round_up(req)) -+ calc_len = round_up(calc_len, alg_ctx->chunk_size); ++#include ++#include + -+ CRYPTO_TRACE("calc_len = %u, cryptlen = %u, assoclen= %u, is_aead = %d", -+ calc_len, alg_ctx->total, alg_ctx->assoclen, alg_ctx->is_aead); ++#include "rk_crypto_core.h" + -+ lli_head->user_define = LLI_USER_STRING_START | LLI_USER_CIPHER_START; ++/* Default 256 x 4K = 1MByte */ ++#define RK_DEFAULT_LLI_CNT 256 + -+ lli_tail->dma_ctrl = LLI_DMA_CTRL_DST_DONE | LLI_DMA_CTRL_LAST; -+ lli_tail->user_define |= LLI_USER_STRING_LAST; -+ lli_tail->src_len += (calc_len - alg_ctx->count); -+ lli_tail->dst_len += (calc_len - alg_ctx->count); ++struct crypto_lli_desc { ++ u32 src_addr; ++ u32 src_len; ++ u32 dst_addr; ++ u32 dst_len; ++ u32 user_define; ++ u32 reserve; ++ u32 dma_ctrl; ++ u32 next_addr; ++}; + -+ if (alg_ctx->is_aead) { -+ lli_aad->src_addr = alg_ctx->addr_aad_in; -+ lli_aad->src_len = alg_ctx->assoclen; -+ lli_aad->user_define = LLI_USER_CIPHER_START | -+ LLI_USER_STRING_START | -+ LLI_USER_STRING_LAST | -+ LLI_USER_STRING_AAD; -+ lli_aad->next_addr = hw_info->hw_desc.lli_head_dma; ++struct rk_hw_desc { ++ struct device *dev; ++ struct crypto_lli_desc *lli_aad; ++ struct crypto_lli_desc *lli_head; ++ struct crypto_lli_desc *lli_tail; ++ dma_addr_t lli_head_dma; ++ dma_addr_t lli_aad_dma; ++ u32 total; ++}; + -+ /* clear cipher start */ -+ lli_head->user_define &= (~((u32)LLI_USER_CIPHER_START)); ++void rk_crypto_write_regs(struct rk_crypto_dev *rk_dev, u32 base_addr, const u8 *data, u32 bytes); + -+ set_pc_len_reg(rk_dev, alg_ctx->total); -+ set_aad_len_reg(rk_dev, alg_ctx->assoclen); -+ } ++void rk_crypto_clear_regs(struct rk_crypto_dev *rk_dev, u32 base_addr, u32 words); + -+ rk_crypto_dump_hw_desc(&hw_info->hw_desc); ++void rk_crypto_read_regs(struct rk_crypto_dev *rk_dev, u32 base_addr, u8 *data, u32 bytes); + -+ dma_wmb(); ++bool rk_crypto_check_align(struct scatterlist *src_sg, size_t src_nents, ++ struct scatterlist *dst_sg, size_t dst_nents, ++ int align_mask); + -+ if (alg_ctx->is_aead) -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_LLI_ADDR, hw_info->hw_desc.lli_aad_dma); -+ else -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_LLI_ADDR, hw_info->hw_desc.lli_head_dma); ++bool rk_crypto_check_dmafd(struct scatterlist *sgl, size_t nents); + -+ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_CTL, start_flag | (start_flag << WRITE_MASK)); ++u64 rk_crypto_hw_desc_maxlen(struct scatterlist *sg, u64 len, u32 *max_nents); + -+ return 0; -+} ++int rk_crypto_hw_desc_alloc(struct device *dev, struct rk_hw_desc *hw_desc); + -+static int rk_ablk_init_tfm(struct crypto_skcipher *tfm) -+{ -+ struct rk_crypto_algt *algt = rk_cipher_get_algt(tfm); -+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); -+ const char *alg_name = crypto_tfm_alg_name(crypto_skcipher_tfm(tfm)); -+ struct rk_crypto_dev *rk_dev = algt->rk_dev; -+ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; ++int rk_crypto_hw_desc_init(struct rk_hw_desc *hw_desc, ++ struct scatterlist *src_sg, ++ struct scatterlist *dst_sg, ++ u64 len); + -+ CRYPTO_TRACE(); ++void rk_crypto_hw_desc_free(struct rk_hw_desc *hw_desc); + -+ memset(ctx, 0x00, sizeof(*ctx)); ++void rk_crypto_dump_hw_desc(struct rk_hw_desc *hw_desc); + -+ if (!rk_dev->request_crypto) -+ return -EFAULT; ++#endif + -+ rk_dev->request_crypto(rk_dev, alg_name); +diff --git a/drivers/crypto/rockchip/rk_crypto_v1.c b/drivers/crypto/rockchip/rk_crypto_v1.c +new file mode 100644 +index 000000000..14347c490 +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_v1.c +@@ -0,0 +1,60 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Crypto acceleration support for Rockchip Crypto V1 ++ * ++ * Copyright (c) 2022, Rockchip Electronics Co., Ltd ++ * ++ * Author: Lin Jinhan ++ * ++ */ ++#include "rk_crypto_core.h" ++#include "rk_crypto_v1.h" + -+ /* always not aligned for crypto v2 cipher */ -+ alg_ctx->align_size = 64; -+ alg_ctx->chunk_size = crypto_skcipher_chunksize(tfm); ++static const char * const crypto_v1_rsts[] = { ++ "crypto-rst", ++}; + -+ alg_ctx->ops.start = rk_ablk_start; -+ alg_ctx->ops.update = rk_ablk_rx; -+ alg_ctx->ops.complete = rk_crypto_complete; -+ alg_ctx->ops.irq_handle = rk_crypto_irq_handle; ++static struct rk_crypto_algt *crypto_v1_algs[] = { ++ &rk_v1_ecb_aes_alg, /* ecb(aes) */ ++ &rk_v1_cbc_aes_alg, /* cbc(aes) */ + -+ alg_ctx->ops.hw_init = rk_ablk_hw_init; -+ alg_ctx->ops.hw_dma_start = crypto_dma_start; -+ alg_ctx->ops.hw_write_iv = set_iv_reg; ++ &rk_v1_ecb_des_alg, /* ecb(des) */ ++ &rk_v1_cbc_des_alg, /* cbc(des) */ + -+ ctx->rk_dev = rk_dev; ++ &rk_v1_ecb_des3_ede_alg, /* ecb(des3_ede) */ ++ &rk_v1_cbc_des3_ede_alg, /* cbc(des3_ede) */ + -+ if (algt->alg.crypto.base.cra_flags & CRYPTO_ALG_NEED_FALLBACK) { -+ CRYPTO_MSG("alloc fallback tfm, name = %s", alg_name); -+ ctx->fallback_tfm = crypto_alloc_skcipher(alg_name, 0, -+ CRYPTO_ALG_ASYNC | -+ CRYPTO_ALG_NEED_FALLBACK); -+ if (IS_ERR(ctx->fallback_tfm)) { -+ CRYPTO_MSG("Could not load fallback driver %s : %ld.\n", -+ alg_name, PTR_ERR(ctx->fallback_tfm)); -+ ctx->fallback_tfm = NULL; -+ } -+ } ++ &rk_v1_ahash_sha1, /* sha1 */ ++ &rk_v1_ahash_sha256, /* sha256 */ ++ &rk_v1_ahash_md5, /* md5 */ ++}; + ++int rk_hw_crypto_v1_init(struct device *dev, void *hw_info) ++{ + return 0; +} + -+static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm) ++void rk_hw_crypto_v1_deinit(struct device *dev, void *hw_info) +{ -+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); -+ const char *alg_name = crypto_tfm_alg_name(crypto_skcipher_tfm(tfm)); + -+ CRYPTO_TRACE(); ++} + -+ if (ctx->fallback_tfm) { -+ CRYPTO_MSG("free fallback tfm"); -+ crypto_free_skcipher(ctx->fallback_tfm); -+ } ++const char * const *rk_hw_crypto_v1_get_rsts(uint32_t *num) ++{ ++ *num = ARRAY_SIZE(crypto_v1_rsts); + -+ ctx->rk_dev->release_crypto(ctx->rk_dev, alg_name); ++ return crypto_v1_rsts; +} + -+static int rk_aead_init_tfm(struct crypto_aead *tfm) ++struct rk_crypto_algt **rk_hw_crypto_v1_get_algts(uint32_t *num) +{ -+ struct aead_alg *alg = crypto_aead_alg(tfm); -+ struct rk_crypto_algt *algt = -+ container_of(alg, struct rk_crypto_algt, alg.aead); -+ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(&tfm->base); -+ const char *alg_name = crypto_tfm_alg_name(&tfm->base); -+ struct rk_crypto_dev *rk_dev = algt->rk_dev; -+ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; -+ -+ CRYPTO_TRACE(); ++ *num = ARRAY_SIZE(crypto_v1_algs); + -+ if (!rk_dev->request_crypto) -+ return -EFAULT; ++ return crypto_v1_algs; ++} + -+ rk_dev->request_crypto(rk_dev, alg_name); ++bool rk_hw_crypto_v1_algo_valid(struct rk_crypto_dev *rk_dev, struct rk_crypto_algt *aglt) ++{ ++ return true; ++} + -+ alg_ctx->align_size = 64; -+ alg_ctx->chunk_size = crypto_aead_chunksize(tfm); +diff --git a/drivers/crypto/rockchip/rk_crypto_v1.h b/drivers/crypto/rockchip/rk_crypto_v1.h +new file mode 100644 +index 000000000..417ead575 +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_v1.h +@@ -0,0 +1,65 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ + -+ alg_ctx->ops.start = rk_aead_start; -+ alg_ctx->ops.update = rk_ablk_rx; -+ alg_ctx->ops.complete = rk_crypto_complete; -+ alg_ctx->ops.irq_handle = rk_crypto_irq_handle; ++/* Copyright (c) 2018 Rockchip Electronics Co. Ltd. */ + -+ alg_ctx->ops.hw_init = rk_ablk_hw_init; -+ alg_ctx->ops.hw_dma_start = crypto_dma_start; -+ alg_ctx->ops.hw_write_iv = set_iv_reg; -+ alg_ctx->ops.hw_get_result = get_tag_reg; ++#ifndef __RK_CRYPTO_V1_H__ ++#define __RK_CRYPTO_V1_H__ + -+ ctx->rk_dev = rk_dev; -+ alg_ctx->is_aead = 1; ++#include + -+ if (algt->alg.crypto.base.cra_flags & CRYPTO_ALG_NEED_FALLBACK) { -+ CRYPTO_MSG("alloc fallback tfm, name = %s", alg_name); -+ ctx->fallback_aead = -+ crypto_alloc_aead(alg_name, 0, -+ CRYPTO_ALG_ASYNC | -+ CRYPTO_ALG_NEED_FALLBACK); -+ if (IS_ERR(ctx->fallback_aead)) { -+ dev_err(rk_dev->dev, -+ "Load fallback driver %s err: %ld.\n", -+ alg_name, PTR_ERR(ctx->fallback_aead)); -+ ctx->fallback_aead = NULL; -+ crypto_aead_set_reqsize(tfm, sizeof(struct aead_request)); -+ } else { -+ crypto_aead_set_reqsize(tfm, sizeof(struct aead_request) + -+ crypto_aead_reqsize(ctx->fallback_aead)); -+ } -+ } ++struct rk_hw_crypto_v1_info { ++ int reserved; ++}; + -+ return 0; ++#define RK_CRYPTO_V1_SOC_DATA_INIT(names) {\ ++ .crypto_ver = "CRYPTO V1.0.0.0",\ ++ .use_soft_aes192 = false,\ ++ .valid_algs_name = (names),\ ++ .valid_algs_num = ARRAY_SIZE(names),\ ++ .hw_init = rk_hw_crypto_v1_init,\ ++ .hw_deinit = rk_hw_crypto_v1_deinit,\ ++ .hw_get_rsts = rk_hw_crypto_v1_get_rsts,\ ++ .hw_get_algts = rk_hw_crypto_v1_get_algts,\ ++ .hw_is_algo_valid = rk_hw_crypto_v1_algo_valid,\ ++ .hw_info_size = sizeof(struct rk_hw_crypto_v1_info),\ ++ .default_pka_offset = 0,\ ++ .use_lli_chain = false,\ +} + -+static void rk_aead_exit_tfm(struct crypto_aead *tfm) -+{ -+ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(&tfm->base); -+ -+ CRYPTO_TRACE(); ++#if IS_ENABLED(CONFIG_CRYPTO_DEV_ROCKCHIP_V1) + -+ if (ctx->fallback_aead) { -+ CRYPTO_MSG("free fallback tfm"); -+ crypto_free_aead(ctx->fallback_aead); -+ } ++extern struct rk_crypto_algt rk_v1_ecb_aes_alg; ++extern struct rk_crypto_algt rk_v1_cbc_aes_alg; + -+ ctx->rk_dev->release_crypto(ctx->rk_dev, crypto_tfm_alg_name(&tfm->base)); -+} ++extern struct rk_crypto_algt rk_v1_ecb_des_alg; ++extern struct rk_crypto_algt rk_v1_cbc_des_alg; + -+static int rk_aead_crypt(struct aead_request *req, bool encrypt) -+{ -+ struct crypto_aead *tfm = crypto_aead_reqtfm(req); -+ struct rk_cipher_ctx *ctx = crypto_aead_ctx(tfm); -+ struct rk_crypto_algt *algt = rk_aead_get_algt(tfm); -+ struct scatterlist *sg_src, *sg_dst; -+ struct scatterlist src[2], dst[2]; -+ u64 data_len; -+ bool aligned; -+ int ret = -EINVAL; ++extern struct rk_crypto_algt rk_v1_ecb_des3_ede_alg; ++extern struct rk_crypto_algt rk_v1_cbc_des3_ede_alg; + -+ CRYPTO_TRACE("%s cryptlen = %u, assoclen = %u", -+ encrypt ? "encrypt" : "decrypt", -+ req->cryptlen, req->assoclen); ++extern struct rk_crypto_algt rk_v1_ahash_sha1; ++extern struct rk_crypto_algt rk_v1_ahash_sha256; ++extern struct rk_crypto_algt rk_v1_ahash_md5; + -+ data_len = encrypt ? req->cryptlen : (req->cryptlen - crypto_aead_authsize(tfm)); ++int rk_hw_crypto_v1_init(struct device *dev, void *hw_info); ++void rk_hw_crypto_v1_deinit(struct device *dev, void *hw_info); ++const char * const *rk_hw_crypto_v1_get_rsts(uint32_t *num); ++struct rk_crypto_algt **rk_hw_crypto_v1_get_algts(uint32_t *num); ++bool rk_hw_crypto_v1_algo_valid(struct rk_crypto_dev *rk_dev, struct rk_crypto_algt *aglt); + -+ if (req->assoclen == 0 || -+ req->cryptlen == 0 || -+ data_len == 0 || -+ is_force_fallback(algt, ctx->keylen)) -+ return rk_aead_fallback(req, ctx, encrypt); ++#else + -+ /* point sg_src and sg_dst skip assoc data */ -+ sg_src = scatterwalk_ffwd(src, req->src, req->assoclen); -+ sg_dst = (req->src == req->dst) ? sg_src : scatterwalk_ffwd(dst, req->dst, req->assoclen); ++static inline int rk_hw_crypto_v1_init(struct device *dev, void *hw_info) { return -EINVAL; } ++static inline void rk_hw_crypto_v1_deinit(struct device *dev, void *hw_info) {} ++static inline const char * const *rk_hw_crypto_v1_get_rsts(uint32_t *num) { return NULL; } ++static inline struct rk_crypto_algt **rk_hw_crypto_v1_get_algts(uint32_t *num) { return NULL; } ++static inline bool rk_hw_crypto_v1_algo_valid(struct rk_crypto_dev *rk_dev, ++ struct rk_crypto_algt *aglt) ++{ ++ return false; ++} + -+ aligned = rk_crypto_check_align(sg_src, sg_nents_for_len(sg_src, data_len), -+ sg_dst, sg_nents_for_len(sg_dst, data_len), -+ 64); ++#endif /* end of IS_ENABLED(CONFIG_CRYPTO_DEV_ROCKCHIP_V1) */ + -+ if (sg_nents_for_len(sg_src, data_len) > RK_DEFAULT_LLI_CNT || -+ sg_nents_for_len(sg_dst, data_len) > RK_DEFAULT_LLI_CNT) -+ return rk_aead_fallback(req, ctx, encrypt); ++#endif /* end of __RK_CRYPTO_V1_H__ */ + -+ if (!aligned) { -+ if (req->assoclen > ctx->rk_dev->aad_max || -+ data_len > ctx->rk_dev->vir_max) -+ return rk_aead_fallback(req, ctx, encrypt); -+ } +diff --git a/drivers/crypto/rockchip/rk_crypto_v1_ahash.c b/drivers/crypto/rockchip/rk_crypto_v1_ahash.c +new file mode 100644 +index 000000000..633961adf +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_v1_ahash.c +@@ -0,0 +1,382 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Crypto acceleration support for Rockchip RK3288 ++ * ++ * Copyright (c) 2015, Fuzhou Rockchip Electronics Co., Ltd ++ * ++ * Author: Zain Wang ++ * ++ * Some ideas are from marvell/cesa.c and s5p-sss.c driver. ++ */ ++#include "rk_crypto_core.h" ++#include "rk_crypto_v1.h" ++#include "rk_crypto_v1_reg.h" + -+ ctx->mode = cipher_algo2bc[algt->algo] | -+ cipher_mode2bc[algt->mode]; -+ if (!encrypt) -+ ctx->mode |= CRYPTO_BC_DECRYPT; ++/* ++ * IC can not process zero message hash, ++ * so we put the fixed hash out when met zero message. ++ */ + -+ if (algt->algo == CIPHER_ALGO_AES) { -+ if (ctx->keylen == AES_KEYSIZE_128) -+ ctx->mode |= CRYPTO_BC_128_bit_key; -+ else if (ctx->keylen == AES_KEYSIZE_192) -+ ctx->mode |= CRYPTO_BC_192_bit_key; -+ else if (ctx->keylen == AES_KEYSIZE_256) -+ ctx->mode |= CRYPTO_BC_256_bit_key; -+ } ++static struct rk_alg_ctx *rk_alg_ctx_cast( ++ struct rk_crypto_dev *rk_dev) ++{ ++ struct ahash_request *req = ++ ahash_request_cast(rk_dev->async_req); + -+ ctx->iv_len = crypto_aead_ivsize(tfm); ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); + -+ memset(ctx->iv, 0x00, sizeof(ctx->iv)); -+ memcpy(ctx->iv, req->iv, ctx->iv_len); ++ return &ctx->algs_ctx; ++} + -+ ctx->is_enc = encrypt; ++static int rk_crypto_irq_handle(int irq, void *dev_id) ++{ ++ struct rk_crypto_dev *rk_dev = platform_get_drvdata(dev_id); ++ u32 interrupt_status; + -+ CRYPTO_MSG("ctx->mode = %x\n", ctx->mode); -+ ret = rk_aead_handle_req(ctx->rk_dev, req); ++ interrupt_status = CRYPTO_READ(rk_dev, RK_CRYPTO_INTSTS); ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_INTSTS, interrupt_status); + -+ return ret; -+} ++ if (interrupt_status & 0x0a) { ++ dev_warn(rk_dev->dev, "DMA Error\n"); ++ rk_dev->err = -EFAULT; ++ } + -+static int rk_aead_encrypt(struct aead_request *req) -+{ -+ return rk_aead_crypt(req, true); ++ return 0; +} + -+static int rk_aead_decrypt(struct aead_request *req) ++static int zero_message_process(struct ahash_request *req) +{ -+ return rk_aead_crypt(req, false); -+} -+ -+struct rk_crypto_algt rk_v3_ecb_sm4_alg = -+ RK_CIPHER_ALGO_INIT(SM4, ECB, ecb(sm4), ecb-sm4-rk); ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ int rk_digest_size = crypto_ahash_digestsize(tfm); + -+struct rk_crypto_algt rk_v3_cbc_sm4_alg = -+ RK_CIPHER_ALGO_INIT(SM4, CBC, cbc(sm4), cbc-sm4-rk); ++ const u8 sha256_zero_msg_hash[SHA256_DIGEST_SIZE] = { ++ 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, ++ 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, ++ 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, ++ 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55 ++ }; + -+struct rk_crypto_algt rk_v3_xts_sm4_alg = -+ RK_CIPHER_ALGO_XTS_INIT(SM4, xts(sm4), xts-sm4-rk); ++ const u8 sha1_zero_msg_hash[SHA1_DIGEST_SIZE] = { ++ 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, ++ 0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, ++ 0xaf, 0xd8, 0x07, 0x09 ++ }; + -+struct rk_crypto_algt rk_v3_cfb_sm4_alg = -+ RK_CIPHER_ALGO_INIT(SM4, CFB, cfb(sm4), cfb-sm4-rk); ++ const u8 md5_zero_msg_hash[MD5_DIGEST_SIZE] = { ++ 0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04, ++ 0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e, ++ }; + -+struct rk_crypto_algt rk_v3_ofb_sm4_alg = -+ RK_CIPHER_ALGO_INIT(SM4, OFB, ofb(sm4), ofb-sm4-rk); -+ -+struct rk_crypto_algt rk_v3_ctr_sm4_alg = -+ RK_CIPHER_ALGO_INIT(SM4, CTR, ctr(sm4), ctr-sm4-rk); -+ -+struct rk_crypto_algt rk_v3_gcm_sm4_alg = -+ RK_AEAD_ALGO_INIT(SM4, GCM, gcm(sm4), gcm-sm4-rk); -+ -+struct rk_crypto_algt rk_v3_ecb_aes_alg = -+ RK_CIPHER_ALGO_INIT(AES, ECB, ecb(aes), ecb-aes-rk); -+ -+struct rk_crypto_algt rk_v3_cbc_aes_alg = -+ RK_CIPHER_ALGO_INIT(AES, CBC, cbc(aes), cbc-aes-rk); ++ switch (rk_digest_size) { ++ case SHA1_DIGEST_SIZE: ++ memcpy(req->result, sha1_zero_msg_hash, rk_digest_size); ++ break; ++ case SHA256_DIGEST_SIZE: ++ memcpy(req->result, sha256_zero_msg_hash, rk_digest_size); ++ break; ++ case MD5_DIGEST_SIZE: ++ memcpy(req->result, md5_zero_msg_hash, rk_digest_size); ++ break; ++ default: ++ return -EINVAL; ++ } + -+struct rk_crypto_algt rk_v3_xts_aes_alg = -+ RK_CIPHER_ALGO_XTS_INIT(AES, xts(aes), xts-aes-rk); ++ return 0; ++} + -+struct rk_crypto_algt rk_v3_cfb_aes_alg = -+ RK_CIPHER_ALGO_INIT(AES, CFB, cfb(aes), cfb-aes-rk); ++static void rk_ahash_crypto_complete(struct crypto_async_request *base, int err) ++{ ++ if (base->complete) ++ base->complete(base, err); ++} + -+struct rk_crypto_algt rk_v3_ofb_aes_alg = -+ RK_CIPHER_ALGO_INIT(AES, OFB, ofb(aes), ofb-aes-rk); ++static void rk_ahash_reg_init(struct rk_crypto_dev *rk_dev) ++{ ++ struct ahash_request *req = ahash_request_cast(rk_dev->async_req); ++ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); ++ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); ++ int reg_status = 0; + -+struct rk_crypto_algt rk_v3_ctr_aes_alg = -+ RK_CIPHER_ALGO_INIT(AES, CTR, ctr(aes), ctr-aes-rk); ++ reg_status = CRYPTO_READ(rk_dev, RK_CRYPTO_CTRL) | ++ RK_CRYPTO_HASH_FLUSH | _SBF(0xffff, 16); ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_CTRL, reg_status); + -+struct rk_crypto_algt rk_v3_gcm_aes_alg = -+ RK_AEAD_ALGO_INIT(AES, GCM, gcm(aes), gcm-aes-rk); ++ reg_status = CRYPTO_READ(rk_dev, RK_CRYPTO_CTRL); ++ reg_status &= (~RK_CRYPTO_HASH_FLUSH); ++ reg_status |= _SBF(0xffff, 16); ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_CTRL, reg_status); + -+struct rk_crypto_algt rk_v3_ecb_des_alg = -+ RK_CIPHER_ALGO_INIT(DES, ECB, ecb(des), ecb-des-rk); ++ memset_io(rk_dev->reg + RK_CRYPTO_HASH_DOUT_0, 0, 32); + -+struct rk_crypto_algt rk_v3_cbc_des_alg = -+ RK_CIPHER_ALGO_INIT(DES, CBC, cbc(des), cbc-des-rk); ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_INTENA, RK_CRYPTO_HRDMA_ERR_ENA | ++ RK_CRYPTO_HRDMA_DONE_ENA); + -+struct rk_crypto_algt rk_v3_cfb_des_alg = -+ RK_CIPHER_ALGO_INIT(DES, CFB, cfb(des), cfb-des-rk); ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_INTSTS, RK_CRYPTO_HRDMA_ERR_INT | ++ RK_CRYPTO_HRDMA_DONE_INT); + -+struct rk_crypto_algt rk_v3_ofb_des_alg = -+ RK_CIPHER_ALGO_INIT(DES, OFB, ofb(des), ofb-des-rk); ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_HASH_CTRL, rctx->mode | ++ RK_CRYPTO_HASH_SWAP_DO); + -+struct rk_crypto_algt rk_v3_ecb_des3_ede_alg = -+ RK_CIPHER_ALGO_INIT(DES3_EDE, ECB, ecb(des3_ede), ecb-des3_ede-rk); ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_CONF, RK_CRYPTO_BYTESWAP_HRFIFO | ++ RK_CRYPTO_BYTESWAP_BRFIFO | ++ RK_CRYPTO_BYTESWAP_BTFIFO); + -+struct rk_crypto_algt rk_v3_cbc_des3_ede_alg = -+ RK_CIPHER_ALGO_INIT(DES3_EDE, CBC, cbc(des3_ede), cbc-des3_ede-rk); ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_HASH_MSG_LEN, alg_ctx->total); ++} + -+struct rk_crypto_algt rk_v3_cfb_des3_ede_alg = -+ RK_CIPHER_ALGO_INIT(DES3_EDE, CFB, cfb(des3_ede), cfb-des3_ede-rk); ++static int rk_ahash_init(struct ahash_request *req) ++{ ++ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); + -+struct rk_crypto_algt rk_v3_ofb_des3_ede_alg = -+ RK_CIPHER_ALGO_INIT(DES3_EDE, OFB, ofb(des3_ede), ofb-des3_ede-rk); ++ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); ++ rctx->fallback_req.base.flags = req->base.flags & ++ CRYPTO_TFM_REQ_MAY_SLEEP; + -diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig -index 3c4862a75..eeb1f12bc 100644 ---- a/drivers/devfreq/Kconfig -+++ b/drivers/devfreq/Kconfig -@@ -129,15 +129,20 @@ config ARM_MEDIATEK_CCI_DEVFREQ - buck voltages and update a proper CCI frequency. Use the notification - to get the regulator status. - --config ARM_RK3399_DMC_DEVFREQ -- tristate "ARM RK3399 DMC DEVFREQ Driver" -+config ARM_ROCKCHIP_BUS_DEVFREQ -+ tristate "ARM ROCKCHIP BUS DEVFREQ Driver" -+ depends on ARCH_ROCKCHIP -+ help -+ This adds the DEVFREQ driver for the ROCKCHIP BUS. ++ return crypto_ahash_init(&rctx->fallback_req); ++} + -+config ARM_ROCKCHIP_DMC_DEVFREQ -+ tristate "ARM ROCKCHIP DMC DEVFREQ Driver" - depends on (ARCH_ROCKCHIP && HAVE_ARM_SMCCC) || \ - (COMPILE_TEST && HAVE_ARM_SMCCC) - select DEVFREQ_EVENT_ROCKCHIP_DFI -- select DEVFREQ_GOV_SIMPLE_ONDEMAND - select PM_DEVFREQ_EVENT - help -- This adds the DEVFREQ driver for the RK3399 DMC(Dynamic Memory Controller). -+ This adds the DEVFREQ driver for the ROCKCHIP DMC(Dynamic Memory Controller). - It sets the frequency for the memory controller and reads the usage counts - from hardware. - -diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile -index bf40d0492..92018ab6c 100644 ---- a/drivers/devfreq/Makefile -+++ b/drivers/devfreq/Makefile -@@ -12,7 +12,8 @@ obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ) += exynos-bus.o - obj-$(CONFIG_ARM_IMX_BUS_DEVFREQ) += imx-bus.o - obj-$(CONFIG_ARM_IMX8M_DDRC_DEVFREQ) += imx8m-ddrc.o - obj-$(CONFIG_ARM_MEDIATEK_CCI_DEVFREQ) += mtk-cci-devfreq.o --obj-$(CONFIG_ARM_RK3399_DMC_DEVFREQ) += rk3399_dmc.o -+obj-$(CONFIG_ARM_ROCKCHIP_BUS_DEVFREQ) += rockchip_bus.o -+obj-$(CONFIG_ARM_ROCKCHIP_DMC_DEVFREQ) += rockchip_dmc.o rockchip_dmc_common.o - obj-$(CONFIG_ARM_SUN8I_A33_MBUS_DEVFREQ) += sun8i-a33-mbus.o - obj-$(CONFIG_ARM_TEGRA_DEVFREQ) += tegra30-devfreq.o - -diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c -index 49c542ecc..4385e1763 100644 ---- a/drivers/devfreq/devfreq.c -+++ b/drivers/devfreq/devfreq.c -@@ -1807,6 +1807,40 @@ static ssize_t trans_stat_store(struct device *dev, - } - static DEVICE_ATTR_RW(trans_stat); - -+static ssize_t load_show(struct device *dev, struct device_attribute *attr, -+ char *buf) ++static int rk_ahash_update(struct ahash_request *req) +{ -+ int err; -+ struct devfreq *devfreq = to_devfreq(dev); -+ struct devfreq_dev_status stat = devfreq->last_status; -+ unsigned long freq; -+ ssize_t len; -+ -+ err = devfreq_update_stats(devfreq); -+ if (err) -+ return err; ++ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); + -+ if (stat.total_time < stat.busy_time) { -+ err = devfreq_update_stats(devfreq); -+ if (err) -+ return err; -+ }; ++ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); ++ rctx->fallback_req.base.flags = req->base.flags & ++ CRYPTO_TFM_REQ_MAY_SLEEP; ++ rctx->fallback_req.nbytes = req->nbytes; ++ rctx->fallback_req.src = req->src; + -+ if (!stat.total_time) -+ return 0; ++ return crypto_ahash_update(&rctx->fallback_req); ++} + -+ len = sprintf(buf, "%lu", stat.busy_time * 100 / stat.total_time); ++static int rk_ahash_final(struct ahash_request *req) ++{ ++ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); + -+ if (devfreq->profile->get_cur_freq && -+ !devfreq->profile->get_cur_freq(devfreq->dev.parent, &freq)) -+ len += sprintf(buf + len, "@%luHz\n", freq); -+ else -+ len += sprintf(buf + len, "@%luHz\n", devfreq->previous_freq); ++ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); ++ rctx->fallback_req.base.flags = req->base.flags & ++ CRYPTO_TFM_REQ_MAY_SLEEP; ++ rctx->fallback_req.result = req->result; + -+ return len; ++ return crypto_ahash_final(&rctx->fallback_req); +} -+static DEVICE_ATTR_RO(load); + - static struct attribute *devfreq_attrs[] = { - &dev_attr_name.attr, - &dev_attr_governor.attr, -@@ -1817,6 +1851,7 @@ static struct attribute *devfreq_attrs[] = { - &dev_attr_min_freq.attr, - &dev_attr_max_freq.attr, - &dev_attr_trans_stat.attr, -+ &dev_attr_load.attr, - NULL, - }; - ATTRIBUTE_GROUPS(devfreq); -diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig -index 878825372..4526c69c6 100644 ---- a/drivers/devfreq/event/Kconfig -+++ b/drivers/devfreq/event/Kconfig -@@ -39,4 +39,11 @@ config DEVFREQ_EVENT_ROCKCHIP_DFI - This add the devfreq-event driver for Rockchip SoC. It provides DFI - (DDR Monitor Module) driver to count ddr load. - -+config DEVFREQ_EVENT_ROCKCHIP_NOCP -+ tristate "ROCKCHIP NoC (Network On Chip) Probe DEVFREQ event Driver" -+ depends on ARCH_ROCKCHIP -+ help -+ This add the devfreq-event driver for Rockchip SoC. It provides NoC -+ (Network on Chip) Probe counters to monitor traffic statistics. ++static int rk_ahash_finup(struct ahash_request *req) ++{ ++ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); + - endif # PM_DEVFREQ_EVENT -diff --git a/drivers/devfreq/event/Makefile b/drivers/devfreq/event/Makefile -index 3c847e5d5..03d67f06c 100644 ---- a/drivers/devfreq/event/Makefile -+++ b/drivers/devfreq/event/Makefile -@@ -4,3 +4,4 @@ - obj-$(CONFIG_DEVFREQ_EVENT_EXYNOS_NOCP) += exynos-nocp.o - obj-$(CONFIG_DEVFREQ_EVENT_EXYNOS_PPMU) += exynos-ppmu.o - obj-$(CONFIG_DEVFREQ_EVENT_ROCKCHIP_DFI) += rockchip-dfi.o -+obj-$(CONFIG_DEVFREQ_EVENT_ROCKCHIP_NOCP) += rockchip-nocp.o -diff --git a/drivers/devfreq/event/rockchip-dfi.c b/drivers/devfreq/event/rockchip-dfi.c -index 74893c06a..6db7c4945 100644 ---- a/drivers/devfreq/event/rockchip-dfi.c -+++ b/drivers/devfreq/event/rockchip-dfi.c -@@ -20,26 +20,81 @@ - - #include - --#define RK3399_DMC_NUM_CH 2 -- -+#define PX30_PMUGRF_OS_REG2 0x208 -+#define PX30_PMUGRF_OS_REG3 0x20c ++ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); ++ rctx->fallback_req.base.flags = req->base.flags & ++ CRYPTO_TFM_REQ_MAY_SLEEP; + -+#define RK3588_PMUGRF_OS_REG(n) (0x200 + (n) * 4) ++ rctx->fallback_req.nbytes = req->nbytes; ++ rctx->fallback_req.src = req->src; ++ rctx->fallback_req.result = req->result; + -+#define RK3128_GRF_SOC_CON0 0x140 -+#define RK3128_GRF_OS_REG1 0x1cc -+#define RK3128_GRF_DFI_WRNUM 0x220 -+#define RK3128_GRF_DFI_RDNUM 0x224 -+#define RK3128_GRF_DFI_TIMERVAL 0x22c -+#define RK3128_DDR_MONITOR_EN ((1 << (16 + 6)) + (1 << 6)) -+#define RK3128_DDR_MONITOR_DISB ((1 << (16 + 6)) + (0 << 6)) ++ return crypto_ahash_finup(&rctx->fallback_req); ++} + -+#define RK3288_PMU_SYS_REG2 0x9c -+#define RK3288_GRF_SOC_CON4 0x254 -+#define RK3288_GRF_SOC_STATUS(n) (0x280 + (n) * 4) -+#define RK3288_DFI_EN (0x30003 << 14) -+#define RK3288_DFI_DIS (0x30000 << 14) -+#define RK3288_LPDDR_SEL (0x10001 << 13) -+#define RK3288_DDR3_SEL (0x10000 << 13) ++static int rk_ahash_import(struct ahash_request *req, const void *in) ++{ ++ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); + -+#define RK3328_GRF_OS_REG2 0x5d0 ++ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); ++ rctx->fallback_req.base.flags = req->base.flags & ++ CRYPTO_TFM_REQ_MAY_SLEEP; + -+#define RK3368_GRF_DDRC0_CON0 0x600 -+#define RK3368_GRF_SOC_STATUS5 0x494 -+#define RK3368_GRF_SOC_STATUS6 0x498 -+#define RK3368_GRF_SOC_STATUS8 0x4a0 -+#define RK3368_GRF_SOC_STATUS9 0x4a4 -+#define RK3368_GRF_SOC_STATUS10 0x4a8 -+#define RK3368_DFI_EN (0x30003 << 5) -+#define RK3368_DFI_DIS (0x30000 << 5) ++ return crypto_ahash_import(&rctx->fallback_req, in); ++} + -+#define MAX_DMC_NUM_CH 4 -+#define READ_DRAMTYPE_INFO(n) (((n) >> 13) & 0x7) -+#define READ_CH_INFO(n) (((n) >> 28) & 0x3) -+#define READ_DRAMTYPE_INFO_V3(n, m) ((((n) >> 13) & 0x7) | ((((m) >> 12) & 0x3) << 3)) -+#define READ_SYSREG_VERSION(m) (((m) >> 28) & 0xf) -+#define READ_LP5_BANK_MODE(m) (((m) >> 1) & 0x3) -+#define READ_LP5_CKR(m) (((m) >> 0) & 0x1) - /* DDRMON_CTRL */ --#define DDRMON_CTRL 0x04 --#define CLR_DDRMON_CTRL (0x1f0000 << 0) --#define LPDDR4_EN (0x10001 << 4) --#define HARDWARE_EN (0x10001 << 3) --#define LPDDR3_EN (0x10001 << 2) --#define SOFTWARE_EN (0x10001 << 1) --#define SOFTWARE_DIS (0x10000 << 1) --#define TIME_CNT_EN (0x10001 << 0) -+#define DDRMON_CTRL 0x04 -+#define CLR_DDRMON_CTRL (0xffff0000 << 0) -+#define LPDDR5_BANK_MODE(m) ((0x30000 | ((m) & 0x3)) << 7) -+#define LPDDR5_EN (0x10001 << 6) -+#define DDR4_EN (0x10001 << 5) -+#define LPDDR4_EN (0x10001 << 4) -+#define HARDWARE_EN (0x10001 << 3) -+#define LPDDR2_3_EN (0x10001 << 2) -+#define SOFTWARE_EN (0x10001 << 1) -+#define SOFTWARE_DIS (0x10000 << 1) -+#define TIME_CNT_EN (0x10001 << 0) - - #define DDRMON_CH0_COUNT_NUM 0x28 - #define DDRMON_CH0_DFI_ACCESS_NUM 0x2c - #define DDRMON_CH1_COUNT_NUM 0x3c - #define DDRMON_CH1_DFI_ACCESS_NUM 0x40 - -+/* pmu grf */ -+#define PMUGRF_OS_REG2 0x308 ++static int rk_ahash_export(struct ahash_request *req, void *out) ++{ ++ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); + -+enum { -+ DDR4 = 0, -+ DDR3 = 3, -+ LPDDR2 = 5, -+ LPDDR3 = 6, -+ LPDDR4 = 7, -+ LPDDR4X = 8, -+ LPDDR5 = 9, -+ DDR5 = 10, -+ UNUSED = 0xFF -+}; ++ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); ++ rctx->fallback_req.base.flags = req->base.flags & ++ CRYPTO_TFM_REQ_MAY_SLEEP; + - struct dmc_usage { -- u32 access; -- u32 total; -+ u64 access; -+ u64 total; - }; - - /* -@@ -50,44 +105,307 @@ struct dmc_usage { - struct rockchip_dfi { - struct devfreq_event_dev *edev; - struct devfreq_event_desc *desc; -- struct dmc_usage ch_usage[RK3399_DMC_NUM_CH]; -+ struct dmc_usage ch_usage[MAX_DMC_NUM_CH]; - struct device *dev; - void __iomem *regs; - struct regmap *regmap_pmu; -+ struct regmap *regmap_grf; -+ struct regmap *regmap_pmugrf; - struct clk *clk; -+ u32 dram_type; -+ u32 mon_idx; -+ u32 count_rate; -+ u32 dram_dynamic_info_reg; -+ /* 0: BG mode, 1: 16 Bank mode, 2: 8 bank mode */ -+ u32 lp5_bank_mode; -+ /* 0: clk:dqs = 1:2, 1: 1:4 */ -+ u32 lp5_ckr; -+ /* -+ * available mask, 1: available, 0: not available -+ * each bit represent a channel -+ */ -+ u32 ch_msk; -+}; ++ return crypto_ahash_export(&rctx->fallback_req, out); ++} + -+static void rk3128_dfi_start_hardware_counter(struct devfreq_event_dev *edev) ++static int rk_ahash_digest(struct ahash_request *req) +{ -+ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); ++ struct rk_ahash_ctx *tctx = crypto_tfm_ctx(req->base.tfm); ++ struct rk_crypto_dev *rk_dev = tctx->rk_dev; + -+ regmap_write(info->regmap_grf, -+ RK3128_GRF_SOC_CON0, -+ RK3128_DDR_MONITOR_EN); ++ if (!req->nbytes) ++ return zero_message_process(req); ++ else ++ return rk_dev->enqueue(rk_dev, &req->base); +} + -+static void rk3128_dfi_stop_hardware_counter(struct devfreq_event_dev *edev) ++static void crypto_ahash_dma_start(struct rk_crypto_dev *rk_dev) +{ -+ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); ++ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); + -+ regmap_write(info->regmap_grf, -+ RK3128_GRF_SOC_CON0, -+ RK3128_DDR_MONITOR_DISB); ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_HRDMAS, alg_ctx->addr_in); ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_HRDMAL, (alg_ctx->count + 3) / 4); ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_CTRL, RK_CRYPTO_HASH_START | ++ (RK_CRYPTO_HASH_START << 16)); +} + -+static int rk3128_dfi_disable(struct devfreq_event_dev *edev) ++static int rk_ahash_set_data_start(struct rk_crypto_dev *rk_dev) +{ -+ rk3128_dfi_stop_hardware_counter(edev); ++ int err; ++ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); + -+ return 0; ++ err = rk_dev->load_data(rk_dev, alg_ctx->sg_src, NULL); ++ if (!err) ++ crypto_ahash_dma_start(rk_dev); ++ return err; +} + -+static int rk3128_dfi_enable(struct devfreq_event_dev *edev) ++static int rk_ahash_start(struct rk_crypto_dev *rk_dev) +{ -+ rk3128_dfi_start_hardware_counter(edev); ++ struct ahash_request *req = ahash_request_cast(rk_dev->async_req); ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ struct rk_ahash_rctx *rctx = ahash_request_ctx(req); ++ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); + -+ return 0; -+} ++ alg_ctx->total = req->nbytes; ++ alg_ctx->left_bytes = req->nbytes; ++ alg_ctx->sg_src = req->src; ++ alg_ctx->req_src = req->src; ++ alg_ctx->src_nents = sg_nents_for_len(req->src, req->nbytes); + -+static int rk3128_dfi_set_event(struct devfreq_event_dev *edev) -+{ -+ return 0; -+} ++ rctx->mode = 0; + -+static int rk3128_dfi_get_event(struct devfreq_event_dev *edev, -+ struct devfreq_event_data *edata) -+{ -+ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); -+ unsigned long flags; -+ u32 dfi_wr, dfi_rd, dfi_timer; ++ switch (crypto_ahash_digestsize(tfm)) { ++ case SHA1_DIGEST_SIZE: ++ rctx->mode = RK_CRYPTO_HASH_SHA1; ++ break; ++ case SHA256_DIGEST_SIZE: ++ rctx->mode = RK_CRYPTO_HASH_SHA256; ++ break; ++ case MD5_DIGEST_SIZE: ++ rctx->mode = RK_CRYPTO_HASH_MD5; ++ break; ++ default: ++ return -EINVAL; ++ } + -+ local_irq_save(flags); ++ rk_ahash_reg_init(rk_dev); ++ return rk_ahash_set_data_start(rk_dev); ++} + -+ rk3128_dfi_stop_hardware_counter(edev); ++static int rk_ahash_crypto_rx(struct rk_crypto_dev *rk_dev) ++{ ++ int err = 0; ++ struct ahash_request *req = ahash_request_cast(rk_dev->async_req); ++ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); ++ struct crypto_ahash *tfm; + -+ regmap_read(info->regmap_grf, RK3128_GRF_DFI_WRNUM, &dfi_wr); -+ regmap_read(info->regmap_grf, RK3128_GRF_DFI_RDNUM, &dfi_rd); -+ regmap_read(info->regmap_grf, RK3128_GRF_DFI_TIMERVAL, &dfi_timer); ++ CRYPTO_TRACE("left_bytes = %u\n", alg_ctx->left_bytes); + -+ edata->load_count = (dfi_wr + dfi_rd) * 4; -+ edata->total_count = dfi_timer; ++ err = rk_dev->unload_data(rk_dev); ++ if (err) ++ goto out_rx; + -+ rk3128_dfi_start_hardware_counter(edev); ++ if (alg_ctx->left_bytes) { ++ if (alg_ctx->aligned) { ++ if (sg_is_last(alg_ctx->sg_src)) { ++ dev_warn(rk_dev->dev, "[%s:%d], Lack of data\n", ++ __func__, __LINE__); ++ err = -ENOMEM; ++ goto out_rx; ++ } ++ alg_ctx->sg_src = sg_next(alg_ctx->sg_src); ++ } ++ err = rk_ahash_set_data_start(rk_dev); ++ } else { ++ /* ++ * it will take some time to process date after last dma ++ * transmission. ++ * ++ * waiting time is relative with the last date len, ++ * so cannot set a fixed time here. ++ * 10us makes system not call here frequently wasting ++ * efficiency, and make it response quickly when dma ++ * complete. ++ */ ++ while (!CRYPTO_READ(rk_dev, RK_CRYPTO_HASH_STS)) ++ udelay(10); + -+ local_irq_restore(flags); ++ tfm = crypto_ahash_reqtfm(req); ++ memcpy_fromio(req->result, rk_dev->reg + RK_CRYPTO_HASH_DOUT_0, ++ crypto_ahash_digestsize(tfm)); ++ } + -+ return 0; ++out_rx: ++ return err; +} + -+static const struct devfreq_event_ops rk3128_dfi_ops = { -+ .disable = rk3128_dfi_disable, -+ .enable = rk3128_dfi_enable, -+ .get_event = rk3128_dfi_get_event, -+ .set_event = rk3128_dfi_set_event, -+}; -+ -+static void rk3288_dfi_start_hardware_counter(struct devfreq_event_dev *edev) ++static int rk_cra_hash_init(struct crypto_tfm *tfm) +{ -+ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); ++ struct rk_ahash_ctx *ctx = crypto_tfm_ctx(tfm); ++ struct rk_crypto_algt *algt; ++ struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg); ++ const char *alg_name = crypto_tfm_alg_name(tfm); ++ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; ++ struct rk_crypto_dev *rk_dev; + -+ regmap_write(info->regmap_grf, RK3288_GRF_SOC_CON4, RK3288_DFI_EN); -+} ++ algt = container_of(alg, struct rk_crypto_algt, alg.hash); ++ rk_dev = algt->rk_dev; + -+static void rk3288_dfi_stop_hardware_counter(struct devfreq_event_dev *edev) -+{ -+ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); ++ memset(ctx, 0x00, sizeof(*ctx)); + -+ regmap_write(info->regmap_grf, RK3288_GRF_SOC_CON4, RK3288_DFI_DIS); -+} ++ if (!rk_dev->request_crypto) ++ return -EFAULT; + -+static int rk3288_dfi_disable(struct devfreq_event_dev *edev) -+{ -+ rk3288_dfi_stop_hardware_counter(edev); ++ rk_dev->request_crypto(rk_dev, crypto_tfm_alg_name(tfm)); + -+ return 0; -+} ++ alg_ctx->align_size = 4; + -+static int rk3288_dfi_enable(struct devfreq_event_dev *edev) -+{ -+ rk3288_dfi_start_hardware_counter(edev); ++ alg_ctx->ops.start = rk_ahash_start; ++ alg_ctx->ops.update = rk_ahash_crypto_rx; ++ alg_ctx->ops.complete = rk_ahash_crypto_complete; ++ alg_ctx->ops.irq_handle = rk_crypto_irq_handle; + -+ return 0; -+} ++ ctx->rk_dev = rk_dev; ++ ++ /* for fallback */ ++ ctx->fallback_tfm = crypto_alloc_ahash(alg_name, 0, ++ CRYPTO_ALG_NEED_FALLBACK); ++ if (IS_ERR(ctx->fallback_tfm)) { ++ dev_err(rk_dev->dev, "Could not load fallback driver.\n"); ++ return PTR_ERR(ctx->fallback_tfm); ++ } ++ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), ++ sizeof(struct rk_ahash_rctx) + ++ crypto_ahash_reqsize(ctx->fallback_tfm)); ++ ++ algt->alg.hash.halg.statesize = crypto_ahash_statesize(ctx->fallback_tfm); + -+static int rk3288_dfi_set_event(struct devfreq_event_dev *edev) -+{ + return 0; +} + -+static int rk3288_dfi_get_busier_ch(struct devfreq_event_dev *edev) ++static void rk_cra_hash_exit(struct crypto_tfm *tfm) +{ -+ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); -+ u32 tmp, max = 0; -+ u32 i, busier_ch = 0; -+ u32 rd_count, wr_count, total_count; -+ -+ rk3288_dfi_stop_hardware_counter(edev); ++ struct rk_ahash_ctx *ctx = crypto_tfm_ctx(tfm); + -+ /* Find out which channel is busier */ -+ for (i = 0; i < MAX_DMC_NUM_CH; i++) { -+ if (!(info->ch_msk & BIT(i))) -+ continue; -+ regmap_read(info->regmap_grf, -+ RK3288_GRF_SOC_STATUS(11 + i * 4), &wr_count); -+ regmap_read(info->regmap_grf, -+ RK3288_GRF_SOC_STATUS(12 + i * 4), &rd_count); -+ regmap_read(info->regmap_grf, -+ RK3288_GRF_SOC_STATUS(14 + i * 4), &total_count); -+ info->ch_usage[i].access = (wr_count + rd_count) * 4; -+ info->ch_usage[i].total = total_count; -+ tmp = info->ch_usage[i].access; -+ if (tmp > max) { -+ busier_ch = i; -+ max = tmp; -+ } -+ } -+ rk3288_dfi_start_hardware_counter(edev); ++ if (ctx->fallback_tfm) ++ crypto_free_ahash(ctx->fallback_tfm); + -+ return busier_ch; ++ ctx->rk_dev->release_crypto(ctx->rk_dev, crypto_tfm_alg_name(tfm)); +} + -+static int rk3288_dfi_get_event(struct devfreq_event_dev *edev, -+ struct devfreq_event_data *edata) -+{ -+ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); -+ int busier_ch; -+ unsigned long flags; -+ -+ local_irq_save(flags); -+ busier_ch = rk3288_dfi_get_busier_ch(edev); -+ local_irq_restore(flags); ++struct rk_crypto_algt rk_v1_ahash_sha1 = RK_HASH_ALGO_INIT(SHA1, sha1); ++struct rk_crypto_algt rk_v1_ahash_sha256 = RK_HASH_ALGO_INIT(SHA256, sha256); ++struct rk_crypto_algt rk_v1_ahash_md5 = RK_HASH_ALGO_INIT(MD5, md5); + -+ edata->load_count = info->ch_usage[busier_ch].access; -+ edata->total_count = info->ch_usage[busier_ch].total; +diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk_crypto_v1_reg.h +similarity index 65% +rename from drivers/crypto/rockchip/rk3288_crypto.h +rename to drivers/crypto/rockchip/rk_crypto_v1_reg.h +index 3aa03cbfb..cf520527a 100644 +--- a/drivers/crypto/rockchip/rk3288_crypto.h ++++ b/drivers/crypto/rockchip/rk_crypto_v1_reg.h +@@ -1,20 +1,9 @@ + /* SPDX-License-Identifier: GPL-2.0 */ +-#ifndef __RK3288_CRYPTO_H__ +-#define __RK3288_CRYPTO_H__ + +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include ++/* Copyright (c) 2018 Rockchip Electronics Co. Ltd. */ + -+ return 0; -+} ++#ifndef __RK_CRYPTO_V1_REG_H__ ++#define __RK_CRYPTO_V1_REG_H__ + + #define _SBF(v, f) ((v) << (f)) + +@@ -180,106 +169,4 @@ + #define RK_CRYPTO_HASH_DOUT_6 0x01a4 + #define RK_CRYPTO_HASH_DOUT_7 0x01a8 + +-#define CRYPTO_READ(dev, offset) \ +- readl_relaxed(((dev)->reg + (offset))) +-#define CRYPTO_WRITE(dev, offset, val) \ +- writel_relaxed((val), ((dev)->reg + (offset))) +- +-#define RK_MAX_CLKS 4 +- +-/* +- * struct rockchip_ip - struct for managing a list of RK crypto instance +- * @dev_list: Used for doing a list of rk_crypto_info +- * @lock: Control access to dev_list +- * @dbgfs_dir: Debugfs dentry for statistic directory +- * @dbgfs_stats: Debugfs dentry for statistic counters +- */ +-struct rockchip_ip { +- struct list_head dev_list; +- spinlock_t lock; /* Control access to dev_list */ +- struct dentry *dbgfs_dir; +- struct dentry *dbgfs_stats; +-}; +- +-struct rk_clks { +- const char *name; +- unsigned long max; +-}; +- +-struct rk_variant { +- int num_clks; +- struct rk_clks rkclks[RK_MAX_CLKS]; +-}; +- +-struct rk_crypto_info { +- struct list_head list; +- struct device *dev; +- struct clk_bulk_data *clks; +- int num_clks; +- struct reset_control *rst; +- void __iomem *reg; +- int irq; +- const struct rk_variant *variant; +- unsigned long nreq; +- struct crypto_engine *engine; +- struct completion complete; +- int status; +-}; +- +-/* the private variable of hash */ +-struct rk_ahash_ctx { +- /* for fallback */ +- struct crypto_ahash *fallback_tfm; +-}; +- +-/* the private variable of hash for fallback */ +-struct rk_ahash_rctx { +- struct rk_crypto_info *dev; +- struct ahash_request fallback_req; +- u32 mode; +- int nrsg; +-}; +- +-/* the private variable of cipher */ +-struct rk_cipher_ctx { +- unsigned int keylen; +- u8 key[AES_MAX_KEY_SIZE]; +- u8 iv[AES_BLOCK_SIZE]; +- struct crypto_skcipher *fallback_tfm; +-}; +- +-struct rk_cipher_rctx { +- struct rk_crypto_info *dev; +- u8 backup_iv[AES_BLOCK_SIZE]; +- u32 mode; +- struct skcipher_request fallback_req; // keep at the end +-}; +- +-struct rk_crypto_tmp { +- u32 type; +- struct rk_crypto_info *dev; +- union { +- struct skcipher_engine_alg skcipher; +- struct ahash_engine_alg hash; +- } alg; +- unsigned long stat_req; +- unsigned long stat_fb; +- unsigned long stat_fb_len; +- unsigned long stat_fb_sglen; +- unsigned long stat_fb_align; +- unsigned long stat_fb_sgdiff; +-}; +- +-extern struct rk_crypto_tmp rk_ecb_aes_alg; +-extern struct rk_crypto_tmp rk_cbc_aes_alg; +-extern struct rk_crypto_tmp rk_ecb_des_alg; +-extern struct rk_crypto_tmp rk_cbc_des_alg; +-extern struct rk_crypto_tmp rk_ecb_des3_ede_alg; +-extern struct rk_crypto_tmp rk_cbc_des3_ede_alg; +- +-extern struct rk_crypto_tmp rk_ahash_sha1; +-extern struct rk_crypto_tmp rk_ahash_sha256; +-extern struct rk_crypto_tmp rk_ahash_md5; +- +-struct rk_crypto_info *get_rk_crypto(void); + #endif +diff --git a/drivers/crypto/rockchip/rk_crypto_v1_skcipher.c b/drivers/crypto/rockchip/rk_crypto_v1_skcipher.c +new file mode 100644 +index 000000000..dc7a57b80 +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_v1_skcipher.c +@@ -0,0 +1,424 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Crypto acceleration support for Rockchip RK3288 ++ * ++ * Copyright (c) 2015, Fuzhou Rockchip Electronics Co., Ltd ++ * ++ * Author: Zain Wang ++ * ++ * Some ideas are from marvell-cesa.c and s5p-sss.c driver. ++ */ ++#include "rk_crypto_core.h" ++#include "rk_crypto_v1.h" ++#include "rk_crypto_v1_reg.h" + -+static const struct devfreq_event_ops rk3288_dfi_ops = { -+ .disable = rk3288_dfi_disable, -+ .enable = rk3288_dfi_enable, -+ .get_event = rk3288_dfi_get_event, -+ .set_event = rk3288_dfi_set_event, -+}; ++#define RK_CRYPTO_DEC BIT(0) + -+static void rk3368_dfi_start_hardware_counter(struct devfreq_event_dev *edev) ++static struct rk_alg_ctx *rk_alg_ctx_cast( ++ struct rk_crypto_dev *rk_dev) +{ -+ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); ++ struct skcipher_request *req = ++ skcipher_request_cast(rk_dev->async_req); ++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); ++ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + -+ regmap_write(info->regmap_grf, RK3368_GRF_DDRC0_CON0, RK3368_DFI_EN); ++ return &ctx->algs_ctx; +} + -+static void rk3368_dfi_stop_hardware_counter(struct devfreq_event_dev *edev) ++static int rk_crypto_irq_handle(int irq, void *dev_id) +{ -+ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); ++ struct rk_crypto_dev *rk_dev = platform_get_drvdata(dev_id); ++ u32 interrupt_status; + -+ regmap_write(info->regmap_grf, RK3368_GRF_DDRC0_CON0, RK3368_DFI_DIS); -+} ++ interrupt_status = CRYPTO_READ(rk_dev, RK_CRYPTO_INTSTS); ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_INTSTS, interrupt_status); + -+static int rk3368_dfi_disable(struct devfreq_event_dev *edev) -+{ -+ rk3368_dfi_stop_hardware_counter(edev); ++ if (interrupt_status & 0x0a) { ++ dev_warn(rk_dev->dev, "DMA Error\n"); ++ rk_dev->err = -EFAULT; ++ } + + return 0; +} + -+static int rk3368_dfi_enable(struct devfreq_event_dev *edev) ++static void rk_crypto_complete(struct crypto_async_request *base, int err) +{ -+ rk3368_dfi_start_hardware_counter(edev); ++ if (base->complete) ++ base->complete(base, err); ++} + -+ return 0; ++static int rk_handle_req(struct rk_crypto_dev *rk_dev, ++ struct skcipher_request *req) ++{ ++ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm); ++ ++ if (!IS_ALIGNED(req->cryptlen, ctx->algs_ctx.align_size)) ++ return -EINVAL; ++ else ++ return rk_dev->enqueue(rk_dev, &req->base); +} + -+static int rk3368_dfi_set_event(struct devfreq_event_dev *edev) ++static int rk_get_bc(u32 algo, u32 mode, u32 *bc_val) +{ ++ /* default DES ECB mode */ ++ *bc_val = 0; ++ ++ switch (algo) { ++ case CIPHER_ALGO_DES3_EDE: ++ *bc_val |= RK_CRYPTO_TDES_SELECT; ++ fallthrough; ++ case CIPHER_ALGO_DES: ++ if (mode == CIPHER_MODE_ECB) ++ *bc_val = 0; ++ else if (mode == CIPHER_MODE_CBC) ++ *bc_val = RK_CRYPTO_TDES_CHAINMODE_CBC; ++ else ++ goto error; ++ break; ++ case CIPHER_ALGO_AES: ++ if (mode == CIPHER_MODE_ECB) ++ *bc_val = RK_CRYPTO_AES_ECB_MODE; ++ else if (mode == CIPHER_MODE_CBC) ++ *bc_val = RK_CRYPTO_AES_CBC_MODE; ++ else ++ goto error; ++ break; ++ default: ++ goto error; ++ } ++ + return 0; ++error: ++ return -EINVAL; +} + -+static int rk3368_dfi_get_event(struct devfreq_event_dev *edev, -+ struct devfreq_event_data *edata) ++static int rk_cipher_setkey(struct crypto_skcipher *cipher, ++ const u8 *key, unsigned int keylen) +{ -+ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); -+ unsigned long flags; -+ u32 dfi0_wr, dfi0_rd, dfi1_wr, dfi1_rd, dfi_timer; ++ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher); ++ struct skcipher_alg *alg = crypto_skcipher_alg(cipher); ++ struct rk_crypto_algt *algt; ++ int err; + -+ local_irq_save(flags); ++ algt = container_of(alg, struct rk_crypto_algt, alg.crypto); + -+ rk3368_dfi_stop_hardware_counter(edev); ++ CRYPTO_MSG("algo = %x, mode = %x, key_len = %d\n", ++ algt->algo, algt->mode, keylen); + -+ regmap_read(info->regmap_grf, RK3368_GRF_SOC_STATUS5, &dfi0_wr); -+ regmap_read(info->regmap_grf, RK3368_GRF_SOC_STATUS6, &dfi0_rd); -+ regmap_read(info->regmap_grf, RK3368_GRF_SOC_STATUS9, &dfi1_wr); -+ regmap_read(info->regmap_grf, RK3368_GRF_SOC_STATUS10, &dfi1_rd); -+ regmap_read(info->regmap_grf, RK3368_GRF_SOC_STATUS8, &dfi_timer); ++ switch (algt->algo) { ++ case CIPHER_ALGO_DES: ++ if (keylen != DES_KEY_SIZE) ++ goto error; + -+ edata->load_count = (dfi0_wr + dfi0_rd + dfi1_wr + dfi1_rd) * 2; -+ edata->total_count = dfi_timer; ++ err = verify_skcipher_des_key(cipher, key); ++ if (err) ++ goto error; + -+ rk3368_dfi_start_hardware_counter(edev); ++ break; ++ case CIPHER_ALGO_DES3_EDE: ++ err = verify_skcipher_des3_key(cipher, key); ++ if (err) ++ goto error; ++ break; ++ case CIPHER_ALGO_AES: ++ if (keylen != AES_KEYSIZE_128 && ++ keylen != AES_KEYSIZE_192 && ++ keylen != AES_KEYSIZE_256) ++ goto error; ++ break; ++ default: ++ goto error; ++ } + -+ local_irq_restore(flags); ++ memcpy(ctx->key, key, keylen); ++ ctx->keylen = keylen; + + return 0; -+} + -+static const struct devfreq_event_ops rk3368_dfi_ops = { -+ .disable = rk3368_dfi_disable, -+ .enable = rk3368_dfi_enable, -+ .get_event = rk3368_dfi_get_event, -+ .set_event = rk3368_dfi_set_event, - }; - - static void rockchip_dfi_start_hardware_counter(struct devfreq_event_dev *edev) - { - struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); - void __iomem *dfi_regs = info->regs; -- u32 val; -- u32 ddr_type; -+ u32 mon_idx = 0, val_6 = 0; -+ u32 i; - -- /* get ddr type */ -- regmap_read(info->regmap_pmu, RK3399_PMUGRF_OS_REG2, &val); -- ddr_type = (val >> RK3399_PMUGRF_DDRTYPE_SHIFT) & -- RK3399_PMUGRF_DDRTYPE_MASK; -+ if (info->mon_idx) -+ mon_idx = info->mon_idx; - -- /* clear DDRMON_CTRL setting */ -- writel_relaxed(CLR_DDRMON_CTRL, dfi_regs + DDRMON_CTRL); -+ if (info->dram_dynamic_info_reg) -+ regmap_read(info->regmap_pmugrf, info->dram_dynamic_info_reg, &val_6); - -- /* set ddr type to dfi */ -- if (ddr_type == RK3399_PMUGRF_DDRTYPE_LPDDR3) -- writel_relaxed(LPDDR3_EN, dfi_regs + DDRMON_CTRL); -- else if (ddr_type == RK3399_PMUGRF_DDRTYPE_LPDDR4) -- writel_relaxed(LPDDR4_EN, dfi_regs + DDRMON_CTRL); -+ if (info->dram_type == LPDDR5) { -+ info->lp5_bank_mode = READ_LP5_BANK_MODE(val_6); -+ info->lp5_ckr = READ_LP5_CKR(val_6); -+ } - -- /* enable count, use software mode */ -- writel_relaxed(SOFTWARE_EN, dfi_regs + DDRMON_CTRL); -+ for (i = 0; i < MAX_DMC_NUM_CH; i++) { -+ if (!(info->ch_msk & BIT(i))) -+ continue; -+ /* clear DDRMON_CTRL setting */ -+ writel_relaxed(CLR_DDRMON_CTRL, dfi_regs + i * mon_idx + DDRMON_CTRL); ++error: ++ return -EINVAL; ++} + -+ /* set ddr type to dfi */ -+ if (info->dram_type == LPDDR3 || info->dram_type == LPDDR2) -+ writel_relaxed(LPDDR2_3_EN, dfi_regs + i * mon_idx + DDRMON_CTRL); -+ else if (info->dram_type == LPDDR4 || info->dram_type == LPDDR4X) -+ writel_relaxed(LPDDR4_EN, dfi_regs + i * mon_idx + DDRMON_CTRL); -+ else if (info->dram_type == DDR4) -+ writel_relaxed(DDR4_EN, dfi_regs + i * mon_idx + DDRMON_CTRL); -+ else if (info->dram_type == LPDDR5) -+ writel_relaxed(LPDDR5_EN | LPDDR5_BANK_MODE(info->lp5_bank_mode), -+ dfi_regs + i * mon_idx + DDRMON_CTRL); + -+ /* enable count, use software mode */ -+ writel_relaxed(SOFTWARE_EN, dfi_regs + i * mon_idx + DDRMON_CTRL); -+ } - } - - static void rockchip_dfi_stop_hardware_counter(struct devfreq_event_dev *edev) - { - struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); - void __iomem *dfi_regs = info->regs; -+ u32 mon_idx = 0, i; - -- writel_relaxed(SOFTWARE_DIS, dfi_regs + DDRMON_CTRL); -+ if (info->mon_idx) -+ mon_idx = info->mon_idx; ++static int rk_cipher_encrypt(struct skcipher_request *req) ++{ ++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); ++ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct skcipher_alg *alg = crypto_skcipher_alg(tfm); ++ struct rk_crypto_dev *rk_dev = ctx->rk_dev; ++ struct rk_crypto_algt *algt; ++ int ret; + -+ for (i = 0; i < MAX_DMC_NUM_CH; i++) { -+ if (!(info->ch_msk & BIT(i))) -+ continue; -+ writel_relaxed(SOFTWARE_DIS, dfi_regs + i * mon_idx + DDRMON_CTRL); -+ } - } - - static int rockchip_dfi_get_busier_ch(struct devfreq_event_dev *edev) -@@ -96,16 +414,35 @@ static int rockchip_dfi_get_busier_ch(struct devfreq_event_dev *edev) - u32 tmp, max = 0; - u32 i, busier_ch = 0; - void __iomem *dfi_regs = info->regs; -+ u32 mon_idx = 0x20, count_rate = 1; - - rockchip_dfi_stop_hardware_counter(edev); - -+ if (info->mon_idx) -+ mon_idx = info->mon_idx; -+ if (info->count_rate) -+ count_rate = info->count_rate; ++ algt = container_of(alg, struct rk_crypto_algt, alg.crypto); + - /* Find out which channel is busier */ -- for (i = 0; i < RK3399_DMC_NUM_CH; i++) { -- info->ch_usage[i].access = readl_relaxed(dfi_regs + -- DDRMON_CH0_DFI_ACCESS_NUM + i * 20) * 4; -+ for (i = 0; i < MAX_DMC_NUM_CH; i++) { -+ if (!(info->ch_msk & BIT(i))) -+ continue; ++ ret = rk_get_bc(algt->algo, algt->mode, &ctx->mode); ++ if (ret) ++ return ret; + -+ /* rk3588 counter is dfi clk rate */ - info->ch_usage[i].total = readl_relaxed(dfi_regs + -- DDRMON_CH0_COUNT_NUM + i * 20); -- tmp = info->ch_usage[i].access; -+ DDRMON_CH0_COUNT_NUM + i * mon_idx) * count_rate; ++ CRYPTO_MSG("ctx->mode = %x\n", ctx->mode); + -+ /* LPDDR5 LPDDR4 and LPDDR4X BL = 16,other DDR type BL = 8 */ -+ tmp = readl_relaxed(dfi_regs + -+ DDRMON_CH0_DFI_ACCESS_NUM + i * mon_idx); -+ if (info->dram_type == LPDDR4 || info->dram_type == LPDDR4X) -+ tmp *= 8; -+ else if (info->dram_type == LPDDR5) -+ tmp *= 16 / (4 << info->lp5_ckr); -+ else -+ tmp *= 4; -+ info->ch_usage[i].access = tmp; ++ return rk_handle_req(rk_dev, req); ++} + - if (tmp > max) { - busier_ch = i; - max = tmp; -@@ -121,7 +458,8 @@ static int rockchip_dfi_disable(struct devfreq_event_dev *edev) - struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); - - rockchip_dfi_stop_hardware_counter(edev); -- clk_disable_unprepare(info->clk); -+ if (info->clk) -+ clk_disable_unprepare(info->clk); - - return 0; - } -@@ -131,10 +469,13 @@ static int rockchip_dfi_enable(struct devfreq_event_dev *edev) - struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); - int ret; - -- ret = clk_prepare_enable(info->clk); -- if (ret) { -- dev_err(&edev->dev, "failed to enable dfi clk: %d\n", ret); -- return ret; -+ if (info->clk) { -+ ret = clk_prepare_enable(info->clk); -+ if (ret) { -+ dev_err(&edev->dev, "failed to enable dfi clk: %d\n", -+ ret); -+ return ret; -+ } - } - - rockchip_dfi_start_hardware_counter(edev); -@@ -151,8 +492,11 @@ static int rockchip_dfi_get_event(struct devfreq_event_dev *edev, - { - struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); - int busier_ch; -+ unsigned long flags; - -+ local_irq_save(flags); - busier_ch = rockchip_dfi_get_busier_ch(edev); -+ local_irq_restore(flags); - - edata->load_count = info->ch_usage[busier_ch].access; - edata->total_count = info->ch_usage[busier_ch].total; -@@ -167,22 +511,159 @@ static const struct devfreq_event_ops rockchip_dfi_ops = { - .set_event = rockchip_dfi_set_event, - }; - --static const struct of_device_id rockchip_dfi_id_match[] = { -- { .compatible = "rockchip,rk3399-dfi" }, -- { }, --}; --MODULE_DEVICE_TABLE(of, rockchip_dfi_id_match); -+static __maybe_unused __init int rk3588_dfi_init(struct platform_device *pdev, -+ struct rockchip_dfi *data, -+ struct devfreq_event_desc *desc) ++static int rk_cipher_decrypt(struct skcipher_request *req) +{ -+ struct device_node *np = pdev->dev.of_node; -+ struct resource *res; -+ u32 val_2, val_3, val_4; - --static int rockchip_dfi_probe(struct platform_device *pdev) -+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); -+ data->regs = devm_ioremap_resource(&pdev->dev, res); -+ if (IS_ERR(data->regs)) -+ return PTR_ERR(data->regs); ++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); ++ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct skcipher_alg *alg = crypto_skcipher_alg(tfm); ++ struct rk_crypto_dev *rk_dev = ctx->rk_dev; ++ struct rk_crypto_algt *algt; ++ int ret; + -+ data->regmap_pmugrf = syscon_regmap_lookup_by_phandle(np, "rockchip,pmu_grf"); -+ if (IS_ERR(data->regmap_pmugrf)) -+ return PTR_ERR(data->regmap_pmugrf); ++ algt = container_of(alg, struct rk_crypto_algt, alg.crypto); + -+ regmap_read(data->regmap_pmugrf, RK3588_PMUGRF_OS_REG(2), &val_2); -+ regmap_read(data->regmap_pmugrf, RK3588_PMUGRF_OS_REG(3), &val_3); -+ regmap_read(data->regmap_pmugrf, RK3588_PMUGRF_OS_REG(4), &val_4); -+ if (READ_SYSREG_VERSION(val_3) >= 0x3) -+ data->dram_type = READ_DRAMTYPE_INFO_V3(val_2, val_3); -+ else -+ data->dram_type = READ_DRAMTYPE_INFO(val_2); ++ ret = rk_get_bc(algt->algo, algt->mode, &ctx->mode); ++ if (ret) ++ return ret; + -+ data->mon_idx = 0x4000; -+ if (data->dram_type == LPDDR5) -+ data->count_rate = 1; -+ else -+ data->count_rate = 2; -+ data->dram_dynamic_info_reg = RK3588_PMUGRF_OS_REG(6); -+ data->ch_msk = READ_CH_INFO(val_2) | READ_CH_INFO(val_4) << 2; -+ data->clk = NULL; ++ ctx->mode |= RK_CRYPTO_DEC; + -+ desc->ops = &rockchip_dfi_ops; ++ CRYPTO_MSG("ctx->mode = %x\n", ctx->mode); + -+ return 0; ++ return rk_handle_req(rk_dev, req); +} + -+static __maybe_unused __init int px30_dfi_init(struct platform_device *pdev, -+ struct rockchip_dfi *data, -+ struct devfreq_event_desc *desc) - { -- struct device *dev = &pdev->dev; -- struct rockchip_dfi *data; -- struct devfreq_event_desc *desc; - struct device_node *np = pdev->dev.of_node, *node; -+ struct resource *res; -+ u32 val_2, val_3; - -- data = devm_kzalloc(dev, sizeof(struct rockchip_dfi), GFP_KERNEL); -- if (!data) -- return -ENOMEM; -+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); -+ data->regs = devm_ioremap_resource(&pdev->dev, res); -+ if (IS_ERR(data->regs)) -+ return PTR_ERR(data->regs); ++static void rk_ablk_hw_init(struct rk_crypto_dev *rk_dev) ++{ ++ struct skcipher_request *req = ++ skcipher_request_cast(rk_dev->async_req); ++ struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); ++ struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); ++ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher); ++ u32 ivsize, block, conf_reg = 0; + -+ node = of_parse_phandle(np, "rockchip,pmugrf", 0); -+ if (node) { -+ data->regmap_pmugrf = syscon_node_to_regmap(node); -+ if (IS_ERR(data->regmap_pmugrf)) -+ return PTR_ERR(data->regmap_pmugrf); -+ } ++ block = crypto_tfm_alg_blocksize(tfm); ++ ivsize = crypto_skcipher_ivsize(cipher); + -+ regmap_read(data->regmap_pmugrf, PX30_PMUGRF_OS_REG2, &val_2); -+ regmap_read(data->regmap_pmugrf, PX30_PMUGRF_OS_REG3, &val_3); -+ if (READ_SYSREG_VERSION(val_3) >= 0x3) -+ data->dram_type = READ_DRAMTYPE_INFO_V3(val_2, val_3); -+ else -+ data->dram_type = READ_DRAMTYPE_INFO(val_2); -+ data->ch_msk = 1; -+ data->clk = NULL; ++ if (block == DES_BLOCK_SIZE) { ++ memcpy_toio(ctx->rk_dev->reg + RK_CRYPTO_TDES_KEY1_0, ++ ctx->key, ctx->keylen); ++ ctx->mode |= RK_CRYPTO_TDES_FIFO_MODE | ++ RK_CRYPTO_TDES_BYTESWAP_KEY | ++ RK_CRYPTO_TDES_BYTESWAP_IV; ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_TDES_CTRL, ctx->mode); ++ memcpy_toio(rk_dev->reg + RK_CRYPTO_TDES_IV_0, ++ req->iv, ivsize); ++ conf_reg = RK_CRYPTO_DESSEL; ++ } else { ++ memcpy_toio(ctx->rk_dev->reg + RK_CRYPTO_AES_KEY_0, ++ ctx->key, ctx->keylen); ++ ctx->mode |= RK_CRYPTO_AES_FIFO_MODE | ++ RK_CRYPTO_AES_KEY_CHANGE | ++ RK_CRYPTO_AES_BYTESWAP_KEY | ++ RK_CRYPTO_AES_BYTESWAP_IV; ++ if (ctx->keylen == AES_KEYSIZE_192) ++ ctx->mode |= RK_CRYPTO_AES_192BIT_key; ++ else if (ctx->keylen == AES_KEYSIZE_256) ++ ctx->mode |= RK_CRYPTO_AES_256BIT_key; ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_AES_CTRL, ctx->mode); ++ memcpy_toio(rk_dev->reg + RK_CRYPTO_AES_IV_0, ++ req->iv, ivsize); ++ } ++ conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO | ++ RK_CRYPTO_BYTESWAP_BRFIFO; ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_CONF, conf_reg); ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_INTENA, ++ RK_CRYPTO_BCDMA_ERR_ENA | RK_CRYPTO_BCDMA_DONE_ENA); ++} + -+ desc->ops = &rockchip_dfi_ops; ++static void crypto_dma_start(struct rk_crypto_dev *rk_dev) ++{ ++ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); + -+ return 0; ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_BRDMAS, alg_ctx->addr_in); ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_BRDMAL, alg_ctx->count / 4); ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_BTDMAS, alg_ctx->addr_out); ++ CRYPTO_WRITE(rk_dev, RK_CRYPTO_CTRL, RK_CRYPTO_BLOCK_START | ++ _SBF(RK_CRYPTO_BLOCK_START, 16)); +} + -+static __maybe_unused __init int rk3128_dfi_init(struct platform_device *pdev, -+ struct rockchip_dfi *data, -+ struct devfreq_event_desc *desc) ++static int rk_set_data_start(struct rk_crypto_dev *rk_dev) +{ -+ struct device_node *np = pdev->dev.of_node, *node; ++ int err; ++ struct skcipher_request *req = ++ skcipher_request_cast(rk_dev->async_req); ++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); ++ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); ++ u32 ivsize = crypto_skcipher_ivsize(tfm); ++ u8 *src_last_blk = page_address(sg_page(alg_ctx->sg_src)) + ++ alg_ctx->sg_src->offset + alg_ctx->sg_src->length - ivsize; + -+ node = of_parse_phandle(np, "rockchip,grf", 0); -+ if (node) { -+ data->regmap_grf = syscon_node_to_regmap(node); -+ if (IS_ERR(data->regmap_grf)) -+ return PTR_ERR(data->regmap_grf); ++ /* Store the iv that need to be updated in chain mode. ++ * And update the IV buffer to contain the next IV for decryption mode. ++ */ ++ if (ctx->mode & RK_CRYPTO_DEC) { ++ memcpy(ctx->iv, src_last_blk, ivsize); ++ sg_pcopy_to_buffer(alg_ctx->req_src, alg_ctx->src_nents, ++ req->iv, ivsize, alg_ctx->total - ivsize); + } + -+ desc->ops = &rk3128_dfi_ops; ++ err = rk_dev->load_data(rk_dev, alg_ctx->sg_src, alg_ctx->sg_dst); ++ if (!err) ++ crypto_dma_start(rk_dev); ++ return err; ++} + -+ return 0; ++static int rk_ablk_start(struct rk_crypto_dev *rk_dev) ++{ ++ struct skcipher_request *req = ++ skcipher_request_cast(rk_dev->async_req); ++ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); ++ int err = 0; ++ ++ alg_ctx->left_bytes = req->cryptlen; ++ alg_ctx->total = req->cryptlen; ++ alg_ctx->sg_src = req->src; ++ alg_ctx->req_src = req->src; ++ alg_ctx->src_nents = sg_nents_for_len(req->src, req->cryptlen); ++ alg_ctx->sg_dst = req->dst; ++ alg_ctx->req_dst = req->dst; ++ alg_ctx->dst_nents = sg_nents_for_len(req->dst, req->cryptlen); ++ ++ rk_ablk_hw_init(rk_dev); ++ err = rk_set_data_start(rk_dev); ++ ++ return err; +} + -+static __maybe_unused __init int rk3288_dfi_init(struct platform_device *pdev, -+ struct rockchip_dfi *data, -+ struct devfreq_event_desc *desc) ++static void rk_iv_copyback(struct rk_crypto_dev *rk_dev) +{ -+ struct device_node *np = pdev->dev.of_node, *node; -+ u32 val; ++ struct skcipher_request *req = ++ skcipher_request_cast(rk_dev->async_req); ++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); ++ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); ++ u32 ivsize = crypto_skcipher_ivsize(tfm); + -+ node = of_parse_phandle(np, "rockchip,pmu", 0); -+ if (node) { -+ data->regmap_pmu = syscon_node_to_regmap(node); -+ if (IS_ERR(data->regmap_pmu)) -+ return PTR_ERR(data->regmap_pmu); ++ /* Update the IV buffer to contain the next IV for encryption mode. */ ++ if (!(ctx->mode & RK_CRYPTO_DEC) && req->iv) { ++ if (alg_ctx->aligned) { ++ memcpy(req->iv, sg_virt(alg_ctx->sg_dst) + ++ alg_ctx->sg_dst->length - ivsize, ivsize); ++ } else { ++ memcpy(req->iv, rk_dev->addr_vir + ++ alg_ctx->count - ivsize, ivsize); ++ } + } ++} + -+ node = of_parse_phandle(np, "rockchip,grf", 0); -+ if (node) { -+ data->regmap_grf = syscon_node_to_regmap(node); -+ if (IS_ERR(data->regmap_grf)) -+ return PTR_ERR(data->regmap_grf); ++static void rk_update_iv(struct rk_crypto_dev *rk_dev) ++{ ++ struct skcipher_request *req = ++ skcipher_request_cast(rk_dev->async_req); ++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); ++ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); ++ u32 ivsize = crypto_skcipher_ivsize(tfm); ++ u8 *new_iv = NULL; ++ ++ if (ctx->mode & RK_CRYPTO_DEC) { ++ new_iv = ctx->iv; ++ } else { ++ new_iv = page_address(sg_page(alg_ctx->sg_dst)) + ++ alg_ctx->sg_dst->offset + ++ alg_ctx->sg_dst->length - ivsize; + } + -+ regmap_read(data->regmap_pmu, RK3288_PMU_SYS_REG2, &val); -+ data->dram_type = READ_DRAMTYPE_INFO(val); -+ data->ch_msk = READ_CH_INFO(val); ++ if (ivsize == DES_BLOCK_SIZE) ++ memcpy_toio(rk_dev->reg + RK_CRYPTO_TDES_IV_0, new_iv, ivsize); ++ else if (ivsize == AES_BLOCK_SIZE) ++ memcpy_toio(rk_dev->reg + RK_CRYPTO_AES_IV_0, new_iv, ivsize); ++} + -+ if (data->dram_type == DDR3) -+ regmap_write(data->regmap_grf, RK3288_GRF_SOC_CON4, -+ RK3288_DDR3_SEL); -+ else -+ regmap_write(data->regmap_grf, RK3288_GRF_SOC_CON4, -+ RK3288_LPDDR_SEL); ++/* return: ++ * true some err was occurred ++ * fault no err, continue ++ */ ++static int rk_ablk_rx(struct rk_crypto_dev *rk_dev) ++{ ++ int err = 0; ++ struct rk_alg_ctx *alg_ctx = rk_alg_ctx_cast(rk_dev); + -+ desc->ops = &rk3288_dfi_ops; ++ CRYPTO_TRACE("left_bytes = %u\n", alg_ctx->left_bytes); + -+ return 0; ++ err = rk_dev->unload_data(rk_dev); ++ if (err) ++ goto out_rx; ++ ++ if (alg_ctx->left_bytes) { ++ rk_update_iv(rk_dev); ++ if (alg_ctx->aligned) { ++ if (sg_is_last(alg_ctx->sg_src)) { ++ dev_err(rk_dev->dev, "[%s:%d] Lack of data\n", ++ __func__, __LINE__); ++ err = -ENOMEM; ++ goto out_rx; ++ } ++ alg_ctx->sg_src = sg_next(alg_ctx->sg_src); ++ alg_ctx->sg_dst = sg_next(alg_ctx->sg_dst); ++ } ++ err = rk_set_data_start(rk_dev); ++ } else { ++ rk_iv_copyback(rk_dev); ++ } ++out_rx: ++ return err; +} + -+static __maybe_unused __init int rk3368_dfi_init(struct platform_device *pdev, -+ struct rockchip_dfi *data, -+ struct devfreq_event_desc *desc) ++static int rk_ablk_init_tfm(struct crypto_skcipher *tfm) +{ -+ struct device *dev = &pdev->dev; ++ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct skcipher_alg *alg = crypto_skcipher_alg(tfm); ++ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; ++ struct rk_crypto_algt *algt; ++ struct rk_crypto_dev *rk_dev; ++ const char *alg_name = crypto_tfm_alg_name(crypto_skcipher_tfm(tfm)); + -+ if (!dev->parent || !dev->parent->of_node) -+ return -EINVAL; ++ algt = container_of(alg, struct rk_crypto_algt, alg.crypto); ++ rk_dev = algt->rk_dev; + -+ data->regmap_grf = syscon_node_to_regmap(dev->parent->of_node); -+ if (IS_ERR(data->regmap_grf)) -+ return PTR_ERR(data->regmap_grf); ++ memset(ctx, 0x00, sizeof(*ctx)); + -+ desc->ops = &rk3368_dfi_ops; ++ if (!rk_dev->request_crypto) ++ return -EFAULT; + -+ return 0; -+} ++ rk_dev->request_crypto(rk_dev, alg_name); + -+static __maybe_unused __init int rockchip_dfi_init(struct platform_device *pdev, -+ struct rockchip_dfi *data, -+ struct devfreq_event_desc *desc) -+{ -+ struct device *dev = &pdev->dev; -+ struct device_node *np = pdev->dev.of_node, *node; -+ u32 val; - - data->regs = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(data->regs)) -@@ -193,32 +674,132 @@ static int rockchip_dfi_probe(struct platform_device *pdev) - return dev_err_probe(dev, PTR_ERR(data->clk), - "Cannot get the clk pclk_ddr_mon\n"); - -+ /* try to find the optional reference to the pmu syscon */ - node = of_parse_phandle(np, "rockchip,pmu", 0); -- if (!node) -- return dev_err_probe(&pdev->dev, -ENODEV, "Can't find pmu_grf registers\n"); -+ if (node) { -+ data->regmap_pmu = syscon_node_to_regmap(node); -+ of_node_put(node); -+ if (IS_ERR(data->regmap_pmu)) -+ return PTR_ERR(data->regmap_pmu); -+ } ++ alg_ctx->align_size = crypto_skcipher_alignmask(tfm) + 1; + -+ regmap_read(data->regmap_pmu, PMUGRF_OS_REG2, &val); -+ data->dram_type = READ_DRAMTYPE_INFO(val); -+ data->ch_msk = READ_CH_INFO(val); ++ alg_ctx->ops.start = rk_ablk_start; ++ alg_ctx->ops.update = rk_ablk_rx; ++ alg_ctx->ops.complete = rk_crypto_complete; ++ alg_ctx->ops.irq_handle = rk_crypto_irq_handle; + -+ desc->ops = &rockchip_dfi_ops; ++ ctx->rk_dev = rk_dev; + + return 0; +} + -+static __maybe_unused __init int rk3328_dfi_init(struct platform_device *pdev, -+ struct rockchip_dfi *data, -+ struct devfreq_event_desc *desc) ++static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm) +{ -+ struct device_node *np = pdev->dev.of_node, *node; -+ struct resource *res; -+ u32 val; ++ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ const char *alg_name = crypto_tfm_alg_name(crypto_skcipher_tfm(tfm)); + -+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); -+ data->regs = devm_ioremap_resource(&pdev->dev, res); -+ if (IS_ERR(data->regs)) -+ return PTR_ERR(data->regs); ++ ctx->rk_dev->release_crypto(ctx->rk_dev, alg_name); ++} + -+ node = of_parse_phandle(np, "rockchip,grf", 0); -+ if (node) { -+ data->regmap_grf = syscon_node_to_regmap(node); -+ if (IS_ERR(data->regmap_grf)) -+ return PTR_ERR(data->regmap_grf); -+ } ++struct rk_crypto_algt rk_v1_ecb_aes_alg = ++ RK_CIPHER_ALGO_INIT(AES, ECB, ecb(aes), ecb-aes-rk); + -+ regmap_read(data->regmap_grf, RK3328_GRF_OS_REG2, &val); -+ data->dram_type = READ_DRAMTYPE_INFO(val); -+ data->ch_msk = 1; -+ data->clk = NULL; - -- data->regmap_pmu = syscon_node_to_regmap(node); -- of_node_put(node); -- if (IS_ERR(data->regmap_pmu)) -- return PTR_ERR(data->regmap_pmu); -+ desc->ops = &rockchip_dfi_ops; ++struct rk_crypto_algt rk_v1_cbc_aes_alg = ++ RK_CIPHER_ALGO_INIT(AES, CBC, cbc(aes), cbc-aes-rk); + -+ return 0; -+} - -- data->dev = dev; -+static const struct of_device_id rockchip_dfi_id_match[] = { -+#ifdef CONFIG_CPU_PX30 -+ { .compatible = "rockchip,px30-dfi", .data = px30_dfi_init }, -+#endif -+#ifdef CONFIG_CPU_RK1808 -+ { .compatible = "rockchip,rk1808-dfi", .data = px30_dfi_init }, -+#endif -+#ifdef CONFIG_CPU_RK312X -+ { .compatible = "rockchip,rk3128-dfi", .data = rk3128_dfi_init }, -+#endif -+#ifdef CONFIG_CPU_RK3288 -+ { .compatible = "rockchip,rk3288-dfi", .data = rk3288_dfi_init }, -+#endif -+#ifdef CONFIG_CPU_RK3328 -+ { .compatible = "rockchip,rk3328-dfi", .data = rk3328_dfi_init }, -+#endif -+#ifdef CONFIG_CPU_RK3368 -+ { .compatible = "rockchip,rk3368-dfi", .data = rk3368_dfi_init }, -+#endif -+#ifdef CONFIG_CPU_RK3399 -+ { .compatible = "rockchip,rk3399-dfi", .data = rockchip_dfi_init }, -+#endif -+#ifdef CONFIG_CPU_RK3562 -+ { .compatible = "rockchip,rk3562-dfi", .data = px30_dfi_init }, -+#endif -+#ifdef CONFIG_CPU_RK3568 -+ { .compatible = "rockchip,rk3568-dfi", .data = px30_dfi_init }, -+#endif -+#ifdef CONFIG_CPU_RK3588 -+ { .compatible = "rockchip,rk3588-dfi", .data = rk3588_dfi_init }, -+#endif -+#ifdef CONFIG_CPU_RV1126 -+ { .compatible = "rockchip,rv1126-dfi", .data = px30_dfi_init }, -+#endif -+ { }, -+}; ++struct rk_crypto_algt rk_v1_ecb_des_alg = ++ RK_CIPHER_ALGO_INIT(DES, ECB, ecb(des), ecb-des-rk); + -+static int rockchip_dfi_probe(struct platform_device *pdev) -+{ -+ struct device *dev = &pdev->dev; -+ struct rockchip_dfi *data; -+ struct devfreq_event_desc *desc; -+ struct device_node *np = pdev->dev.of_node; -+ const struct of_device_id *match; -+ int (*init)(struct platform_device *pdev, struct rockchip_dfi *data, -+ struct devfreq_event_desc *desc); ++struct rk_crypto_algt rk_v1_cbc_des_alg = ++ RK_CIPHER_ALGO_INIT(DES, CBC, cbc(des), cbc-des-rk); + -+ data = devm_kzalloc(dev, sizeof(struct rockchip_dfi), GFP_KERNEL); -+ if (!data) -+ return -ENOMEM; - - desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL); - if (!desc) - return -ENOMEM; - -- desc->ops = &rockchip_dfi_ops; -+ match = of_match_node(rockchip_dfi_id_match, pdev->dev.of_node); -+ if (match) { -+ init = match->data; -+ if (init) { -+ if (init(pdev, data, desc)) -+ return -EINVAL; -+ } else { -+ return 0; -+ } -+ } else { -+ return 0; -+ } ++struct rk_crypto_algt rk_v1_ecb_des3_ede_alg = ++ RK_CIPHER_ALGO_INIT(DES3_EDE, ECB, ecb(des3_ede), ecb-des3_ede-rk); + - desc->driver_data = data; - desc->name = np->name; -- data->desc = desc; - -- data->edev = devm_devfreq_event_add_edev(&pdev->dev, desc); -+ data->edev = devm_devfreq_event_add_edev(dev, desc); - if (IS_ERR(data->edev)) { -- dev_err(&pdev->dev, -- "failed to add devfreq-event device\n"); -+ dev_err(dev, "failed to add devfreq-event device\n"); - return PTR_ERR(data->edev); - } -+ data->desc = desc; -+ data->dev = &pdev->dev; - - platform_set_drvdata(pdev, data); - -diff --git a/drivers/devfreq/event/rockchip-nocp.c b/drivers/devfreq/event/rockchip-nocp.c ++struct rk_crypto_algt rk_v1_cbc_des3_ede_alg = ++ RK_CIPHER_ALGO_INIT(DES3_EDE, CBC, cbc(des3_ede), cbc-des3_ede-rk); +diff --git a/drivers/crypto/rockchip/rk_crypto_v2.c b/drivers/crypto/rockchip/rk_crypto_v2.c new file mode 100644 -index 000000000..954a27d3f +index 000000000..e8f2a0a24 --- /dev/null -+++ b/drivers/devfreq/event/rockchip-nocp.c -@@ -0,0 +1,210 @@ ++++ b/drivers/crypto/rockchip/rk_crypto_v2.c +@@ -0,0 +1,104 @@ ++// SPDX-License-Identifier: GPL-2.0 +/* -+ * Copyright (c) 2016, Fuzhou Rockchip Electronics Co., Ltd ++ * Crypto acceleration support for Rockchip Crypto V2 + * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms and conditions of the GNU General Public License, -+ * version 2, as published by the Free Software Foundation. ++ * Copyright (c) 2022, Rockchip Electronics Co., Ltd ++ * ++ * Author: Lin Jinhan + * -+ * This program is distributed in the hope it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. + */ + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++#include "rk_crypto_core.h" ++#include "rk_crypto_v2.h" + -+#define EVENT_BYTE 0x08 -+#define EVENT_CHAIN 0x10 ++static const char * const crypto_v2_rsts[] = { ++ "crypto-rst", ++}; + -+#define START_EN BIT(3) -+#define GLOBAL_EN BIT(0) -+#define START_GO BIT(0) ++static struct rk_crypto_algt *crypto_v2_algs[] = { ++ &rk_v2_ecb_sm4_alg, /* ecb(sm4) */ ++ &rk_v2_cbc_sm4_alg, /* cbc(sm4) */ ++ &rk_v2_xts_sm4_alg, /* xts(sm4) */ ++ &rk_v2_cfb_sm4_alg, /* cfb(sm4) */ ++ &rk_v2_ofb_sm4_alg, /* ofb(sm4) */ ++ &rk_v2_ctr_sm4_alg, /* ctr(sm4) */ ++ &rk_v2_gcm_sm4_alg, /* gcm(sm4) */ + -+#define PROBE_MAINCTL 0x0008 -+#define PROBE_CFGCTL 0x000c -+#define PROBE_STATPERIOD 0x0024 -+#define PROBE_STATGO 0x0028 ++ &rk_v2_ecb_aes_alg, /* ecb(aes) */ ++ &rk_v2_cbc_aes_alg, /* cbc(aes) */ ++ &rk_v2_xts_aes_alg, /* xts(aes) */ ++ &rk_v2_cfb_aes_alg, /* cfb(aes) */ ++ &rk_v2_ofb_aes_alg, /* ofb(aes) */ ++ &rk_v2_ctr_aes_alg, /* ctr(aes) */ ++ &rk_v2_gcm_aes_alg, /* gcm(aes) */ + -+struct nocp_info { -+ u32 counter0_src; -+ u32 counter0_val; -+ u32 counter1_src; -+ u32 counter1_val; -+}; ++ &rk_v2_ecb_des_alg, /* ecb(des) */ ++ &rk_v2_cbc_des_alg, /* cbc(des) */ ++ &rk_v2_cfb_des_alg, /* cfb(des) */ ++ &rk_v2_ofb_des_alg, /* ofb(des) */ + -+struct rockchip_nocp { -+ void __iomem *reg_base; -+ struct device *dev; -+ struct devfreq_event_dev *edev; -+ struct devfreq_event_desc *desc; -+ const struct nocp_info *info; -+ ktime_t time; ++ &rk_v2_ecb_des3_ede_alg, /* ecb(des3_ede) */ ++ &rk_v2_cbc_des3_ede_alg, /* cbc(des3_ede) */ ++ &rk_v2_cfb_des3_ede_alg, /* cfb(des3_ede) */ ++ &rk_v2_ofb_des3_ede_alg, /* ofb(des3_ede) */ ++ ++ &rk_v2_ahash_sha1, /* sha1 */ ++ &rk_v2_ahash_sha224, /* sha224 */ ++ &rk_v2_ahash_sha256, /* sha256 */ ++ &rk_v2_ahash_sha384, /* sha384 */ ++ &rk_v2_ahash_sha512, /* sha512 */ ++ &rk_v2_ahash_md5, /* md5 */ ++ &rk_v2_ahash_sm3, /* sm3 */ ++ ++ &rk_v2_hmac_sha1, /* hmac(sha1) */ ++ &rk_v2_hmac_sha256, /* hmac(sha256) */ ++ &rk_v2_hmac_sha512, /* hmac(sha512) */ ++ &rk_v2_hmac_md5, /* hmac(md5) */ ++ &rk_v2_hmac_sm3, /* hmac(sm3) */ ++ ++ &rk_v2_asym_rsa, /* rsa */ +}; + -+static int rockchip_nocp_enable(struct devfreq_event_dev *edev) ++int rk_hw_crypto_v2_init(struct device *dev, void *hw_info) +{ -+ struct rockchip_nocp *nocp = devfreq_event_get_drvdata(edev); -+ const struct nocp_info *info = nocp->info; -+ void __iomem *reg_base = nocp->reg_base; ++ struct rk_hw_crypto_v2_info *info = ++ (struct rk_hw_crypto_v2_info *)hw_info; + -+ writel_relaxed(GLOBAL_EN, reg_base + PROBE_CFGCTL); -+ writel_relaxed(START_EN, reg_base + PROBE_MAINCTL); -+ writel_relaxed(0, reg_base + PROBE_STATPERIOD); -+ writel_relaxed(EVENT_BYTE, reg_base + info->counter0_src); -+ writel_relaxed(EVENT_CHAIN, reg_base + info->counter1_src); -+ writel_relaxed(START_GO, reg_base + PROBE_STATGO); ++ if (!dev || !hw_info) ++ return -EINVAL; + -+ nocp->time = ktime_get(); ++ memset(info, 0x00, sizeof(*info)); + -+ return 0; ++ return rk_crypto_hw_desc_alloc(dev, &info->hw_desc); +} + -+static int rockchip_nocp_disable(struct devfreq_event_dev *edev) ++void rk_hw_crypto_v2_deinit(struct device *dev, void *hw_info) +{ -+ struct rockchip_nocp *nocp = devfreq_event_get_drvdata(edev); -+ const struct nocp_info *info = nocp->info; -+ void __iomem *reg_base = nocp->reg_base; ++ struct rk_hw_crypto_v2_info *info = ++ (struct rk_hw_crypto_v2_info *)hw_info; + -+ writel_relaxed(0, reg_base + PROBE_STATGO); -+ writel_relaxed(0, reg_base + PROBE_MAINCTL); -+ writel_relaxed(0, reg_base + PROBE_CFGCTL); -+ writel_relaxed(0, reg_base + info->counter0_src); -+ writel_relaxed(0, reg_base + info->counter1_src); ++ if (!dev || !hw_info) ++ return; + -+ return 0; ++ rk_crypto_hw_desc_free(&info->hw_desc); +} + -+static int rockchip_nocp_get_event(struct devfreq_event_dev *edev, -+ struct devfreq_event_data *edata) ++const char * const *rk_hw_crypto_v2_get_rsts(uint32_t *num) +{ -+ struct rockchip_nocp *nocp = devfreq_event_get_drvdata(edev); -+ const struct nocp_info *info = nocp->info; -+ void __iomem *reg_base = nocp->reg_base; -+ u32 counter = 0, counter0 = 0, counter1 = 0; -+ int time_ms = 0; -+ -+ time_ms = ktime_to_ms(ktime_sub(ktime_get(), nocp->time)); ++ *num = ARRAY_SIZE(crypto_v2_rsts); + -+ counter0 = readl_relaxed(reg_base + info->counter0_val); -+ counter1 = readl_relaxed(reg_base + info->counter1_val); -+ counter = (counter0 & 0xffff) | ((counter1 & 0xffff) << 16); -+ counter = counter / 1000000; -+ if (time_ms > 0) -+ edata->load_count = (counter * 1000) / time_ms; ++ return crypto_v2_rsts; ++} + -+ writel_relaxed(START_GO, reg_base + PROBE_STATGO); -+ nocp->time = ktime_get(); ++struct rk_crypto_algt **rk_hw_crypto_v2_get_algts(uint32_t *num) ++{ ++ *num = ARRAY_SIZE(crypto_v2_algs); + -+ return 0; ++ return crypto_v2_algs; +} + -+static int rockchip_nocp_set_event(struct devfreq_event_dev *edev) ++bool rk_hw_crypto_v2_algo_valid(struct rk_crypto_dev *rk_dev, struct rk_crypto_algt *aglt) +{ -+ return 0; ++ return true; +} + -+static const struct devfreq_event_ops rockchip_nocp_ops = { -+ .disable = rockchip_nocp_disable, -+ .enable = rockchip_nocp_enable, -+ .get_event = rockchip_nocp_get_event, -+ .set_event = rockchip_nocp_set_event, -+}; +diff --git a/drivers/crypto/rockchip/rk_crypto_v2.h b/drivers/crypto/rockchip/rk_crypto_v2.h +new file mode 100644 +index 000000000..9e82346d6 +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_v2.h +@@ -0,0 +1,95 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ + -+static const struct nocp_info rk3288_nocp = { -+ .counter0_src = 0x138, -+ .counter0_val = 0x13c, -+ .counter1_src = 0x14c, -+ .counter1_val = 0x150, -+}; ++/* Copyright (c) 2018 Rockchip Electronics Co. Ltd. */ + -+static const struct nocp_info rk3568_nocp = { -+ .counter0_src = 0x204, -+ .counter0_val = 0x20c, -+ .counter1_src = 0x214, -+ .counter1_val = 0x21c, -+}; ++#ifndef __RK_CRYPTO_V2_H__ ++#define __RK_CRYPTO_V2_H__ + -+static const struct of_device_id rockchip_nocp_id_match[] = { -+ { -+ .compatible = "rockchip,rk3288-nocp", -+ .data = (void *)&rk3288_nocp, -+ }, -+ { -+ .compatible = "rockchip,rk3368-nocp", -+ .data = (void *)&rk3288_nocp, -+ }, -+ { -+ .compatible = "rockchip,rk3399-nocp", -+ .data = (void *)&rk3288_nocp, -+ }, -+ { -+ .compatible = "rockchip,rk3568-nocp", -+ .data = (void *)&rk3568_nocp, -+ }, -+ { }, ++#include ++ ++#include "rk_crypto_utils.h" ++ ++struct rk_hw_crypto_v2_info { ++ struct rk_hw_desc hw_desc; +}; + -+static int rockchip_nocp_probe(struct platform_device *pdev) -+{ -+ struct resource *res; -+ struct rockchip_nocp *nocp; -+ struct devfreq_event_desc *desc; -+ struct device_node *np = pdev->dev.of_node; -+ const struct of_device_id *match; ++#define RK_CRYPTO_V2_SOC_DATA_INIT(names, soft_aes_192) {\ ++ .crypto_ver = "CRYPTO V2.0.0.0",\ ++ .use_soft_aes192 = soft_aes_192,\ ++ .valid_algs_name = (names),\ ++ .valid_algs_num = ARRAY_SIZE(names),\ ++ .hw_init = rk_hw_crypto_v2_init,\ ++ .hw_deinit = rk_hw_crypto_v2_deinit,\ ++ .hw_get_rsts = rk_hw_crypto_v2_get_rsts,\ ++ .hw_get_algts = rk_hw_crypto_v2_get_algts,\ ++ .hw_is_algo_valid = rk_hw_crypto_v2_algo_valid,\ ++ .hw_info_size = sizeof(struct rk_hw_crypto_v2_info),\ ++ .default_pka_offset = 0x0480,\ ++ .use_lli_chain = true,\ ++} + -+ match = of_match_device(rockchip_nocp_id_match, &pdev->dev); -+ if (!match || !match->data) { -+ dev_err(&pdev->dev, "missing nocp data\n"); -+ return -ENODEV; -+ } ++#if IS_ENABLED(CONFIG_CRYPTO_DEV_ROCKCHIP_V2) + -+ nocp = devm_kzalloc(&pdev->dev, sizeof(*nocp), GFP_KERNEL); -+ if (!nocp) -+ return -ENOMEM; ++extern struct rk_crypto_algt rk_v2_ecb_sm4_alg; ++extern struct rk_crypto_algt rk_v2_cbc_sm4_alg; ++extern struct rk_crypto_algt rk_v2_xts_sm4_alg; ++extern struct rk_crypto_algt rk_v2_cfb_sm4_alg; ++extern struct rk_crypto_algt rk_v2_ofb_sm4_alg; ++extern struct rk_crypto_algt rk_v2_ctr_sm4_alg; ++extern struct rk_crypto_algt rk_v2_gcm_sm4_alg; + -+ nocp->info = match->data; ++extern struct rk_crypto_algt rk_v2_ecb_aes_alg; ++extern struct rk_crypto_algt rk_v2_cbc_aes_alg; ++extern struct rk_crypto_algt rk_v2_xts_aes_alg; ++extern struct rk_crypto_algt rk_v2_cfb_aes_alg; ++extern struct rk_crypto_algt rk_v2_ofb_aes_alg; ++extern struct rk_crypto_algt rk_v2_ctr_aes_alg; ++extern struct rk_crypto_algt rk_v2_gcm_aes_alg; + -+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); -+ nocp->reg_base = devm_ioremap_resource(&pdev->dev, res); -+ if (IS_ERR(nocp->reg_base)) -+ return PTR_ERR(nocp->reg_base); ++extern struct rk_crypto_algt rk_v2_ecb_des_alg; ++extern struct rk_crypto_algt rk_v2_cbc_des_alg; ++extern struct rk_crypto_algt rk_v2_cfb_des_alg; ++extern struct rk_crypto_algt rk_v2_ofb_des_alg; + -+ desc = devm_kzalloc(&pdev->dev, sizeof(*desc), GFP_KERNEL); -+ if (!desc) -+ return -ENOMEM; ++extern struct rk_crypto_algt rk_v2_ecb_des3_ede_alg; ++extern struct rk_crypto_algt rk_v2_cbc_des3_ede_alg; ++extern struct rk_crypto_algt rk_v2_cfb_des3_ede_alg; ++extern struct rk_crypto_algt rk_v2_ofb_des3_ede_alg; + -+ desc->ops = &rockchip_nocp_ops; -+ desc->driver_data = nocp; -+ desc->name = np->name; -+ nocp->desc = desc; -+ nocp->dev = &pdev->dev; -+ nocp->edev = devm_devfreq_event_add_edev(&pdev->dev, desc); -+ if (IS_ERR(nocp->edev)) { -+ dev_err(&pdev->dev, "failed to add devfreq-event device\n"); -+ return PTR_ERR(nocp->edev); -+ } ++extern struct rk_crypto_algt rk_v2_ahash_sha1; ++extern struct rk_crypto_algt rk_v2_ahash_sha224; ++extern struct rk_crypto_algt rk_v2_ahash_sha256; ++extern struct rk_crypto_algt rk_v2_ahash_sha384; ++extern struct rk_crypto_algt rk_v2_ahash_sha512; ++extern struct rk_crypto_algt rk_v2_ahash_md5; ++extern struct rk_crypto_algt rk_v2_ahash_sm3; + -+ platform_set_drvdata(pdev, nocp); ++extern struct rk_crypto_algt rk_v2_hmac_md5; ++extern struct rk_crypto_algt rk_v2_hmac_sha1; ++extern struct rk_crypto_algt rk_v2_hmac_sha256; ++extern struct rk_crypto_algt rk_v2_hmac_sha512; ++extern struct rk_crypto_algt rk_v2_hmac_sm3; + -+ return 0; ++extern struct rk_crypto_algt rk_v2_asym_rsa; ++ ++int rk_hw_crypto_v2_init(struct device *dev, void *hw_info); ++void rk_hw_crypto_v2_deinit(struct device *dev, void *hw_info); ++const char * const *rk_hw_crypto_v2_get_rsts(uint32_t *num); ++struct rk_crypto_algt **rk_hw_crypto_v2_get_algts(uint32_t *num); ++bool rk_hw_crypto_v2_algo_valid(struct rk_crypto_dev *rk_dev, struct rk_crypto_algt *aglt); ++ ++#else ++ ++static inline int rk_hw_crypto_v2_init(struct device *dev, void *hw_info) { return -EINVAL; } ++static inline void rk_hw_crypto_v2_deinit(struct device *dev, void *hw_info) {} ++static inline const char * const *rk_hw_crypto_v2_get_rsts(uint32_t *num) { return NULL; } ++static inline struct rk_crypto_algt **rk_hw_crypto_v2_get_algts(uint32_t *num) { return NULL; } ++static inline bool rk_hw_crypto_v2_algo_valid(struct rk_crypto_dev *rk_dev, ++ struct rk_crypto_algt *aglt) ++{ ++ return false; +} + -+static struct platform_driver rockchip_nocp_driver = { -+ .probe = rockchip_nocp_probe, -+ .driver = { -+ .name = "rockchip-nocp", -+ .of_match_table = rockchip_nocp_id_match, -+ }, -+}; -+module_platform_driver(rockchip_nocp_driver); ++#endif /* end of IS_ENABLED(CONFIG_CRYPTO_DEV_ROCKCHIP_V2) */ + -+MODULE_DESCRIPTION("Rockchip NoC (Network on Chip) Probe driver"); -+MODULE_AUTHOR("Finley Xiao "); -+MODULE_LICENSE("GPL v2"); -diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c -index daff40702..fd2c5ffed 100644 ---- a/drivers/devfreq/rk3399_dmc.c -+++ b/drivers/devfreq/rk3399_dmc.c -@@ -22,6 +22,7 @@ - #include - - #include -+#include - #include - #include - -@@ -381,17 +382,16 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) - } - - regmap_read(data->regmap_pmu, RK3399_PMUGRF_OS_REG2, &val); -- ddr_type = (val >> RK3399_PMUGRF_DDRTYPE_SHIFT) & -- RK3399_PMUGRF_DDRTYPE_MASK; -+ ddr_type = FIELD_GET(RK3399_PMUGRF_OS_REG2_DDRTYPE, val); - - switch (ddr_type) { -- case RK3399_PMUGRF_DDRTYPE_DDR3: -+ case ROCKCHIP_DDRTYPE_DDR3: - data->odt_dis_freq = data->ddr3_odt_dis_freq; - break; -- case RK3399_PMUGRF_DDRTYPE_LPDDR3: -+ case ROCKCHIP_DDRTYPE_LPDDR3: - data->odt_dis_freq = data->lpddr3_odt_dis_freq; - break; -- case RK3399_PMUGRF_DDRTYPE_LPDDR4: -+ case ROCKCHIP_DDRTYPE_LPDDR4: - data->odt_dis_freq = data->lpddr4_odt_dis_freq; - break; - default: -diff --git a/drivers/devfreq/rockchip_bus.c b/drivers/devfreq/rockchip_bus.c ++#endif /* end of __RK_CRYPTO_V2_H__ */ +diff --git a/drivers/crypto/rockchip/rk_crypto_v2_ahash.c b/drivers/crypto/rockchip/rk_crypto_v2_ahash.c new file mode 100644 -index 000000000..0f5487eae +index 000000000..919603ff4 --- /dev/null -+++ b/drivers/devfreq/rockchip_bus.c -@@ -0,0 +1,548 @@ ++++ b/drivers/crypto/rockchip/rk_crypto_v2_ahash.c +@@ -0,0 +1,379 @@ +// SPDX-License-Identifier: GPL-2.0 +/* -+ * Copyright (c) 2018, Fuzhou Rockchip Electronics Co., Ltd. -+ * Author: Tony Xie ++ * Hash acceleration support for Rockchip Crypto v2 ++ * ++ * Copyright (c) 2020, Rockchip Electronics Co., Ltd ++ * ++ * Author: Lin Jinhan ++ * ++ * Some ideas are from marvell/cesa.c and s5p-sss.c driver. + */ + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include +#include -+#include -+#include ++#include + -+#define CLUSTER0 0 -+#define CLUSTER1 1 -+#define MAX_CLUSTERS 2 ++#include "rk_crypto_core.h" ++#include "rk_crypto_v2.h" ++#include "rk_crypto_v2_reg.h" ++#include "rk_crypto_ahash_utils.h" ++#include "rk_crypto_utils.h" + -+#define to_rockchip_bus_clk_nb(nb) \ -+ container_of(nb, struct rockchip_bus, clk_nb) -+#define to_rockchip_bus_cpufreq_nb(nb) \ -+ container_of(nb, struct rockchip_bus, cpufreq_nb) ++#define RK_HASH_CTX_MAGIC 0x1A1A1A1A ++#define RK_POLL_PERIOD_US 100 ++#define RK_POLL_TIMEOUT_US 50000 + -+struct busfreq_table { -+ unsigned long freq; -+ unsigned long volt; ++struct rk_ahash_expt_ctx { ++ struct rk_ahash_ctx ctx; ++ u8 lastc[RK_DMA_ALIGNMENT]; +}; + -+struct rockchip_bus { -+ struct device *dev; -+ struct regulator *regulator; -+ struct clk *clk; -+ struct notifier_block clk_nb; -+ struct notifier_block cpufreq_nb; -+ struct busfreq_table *freq_table; -+ struct rockchip_opp_info opp_info; ++static const u32 hash_algo2bc[] = { ++ [HASH_ALGO_MD5] = CRYPTO_MD5, ++ [HASH_ALGO_SHA1] = CRYPTO_SHA1, ++ [HASH_ALGO_SHA224] = CRYPTO_SHA224, ++ [HASH_ALGO_SHA256] = CRYPTO_SHA256, ++ [HASH_ALGO_SHA384] = CRYPTO_SHA384, ++ [HASH_ALGO_SHA512] = CRYPTO_SHA512, ++ [HASH_ALGO_SM3] = CRYPTO_SM3, ++}; + -+ unsigned int max_state; ++static void rk_hash_reset(struct rk_crypto_dev *rk_dev) ++{ ++ int ret; ++ u32 tmp = 0, tmp_mask = 0; ++ unsigned int pool_timeout_us = 1000; + -+ unsigned long cur_volt; -+ unsigned long cur_rate; ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0x00); + -+ /* -+ * Busfreq-policy-cpufreq: -+ * If the cpu frequency of two clusters are both less than or equal to -+ * cpu_high_freq, change bus rate to low_rate, otherwise change it to -+ * high_rate. -+ */ -+ unsigned long high_rate; -+ unsigned long low_rate; -+ unsigned int cpu_high_freq; -+ unsigned int cpu_freq[MAX_CLUSTERS]; -+}; ++ tmp = CRYPTO_SW_CC_RESET; ++ tmp_mask = tmp << CRYPTO_WRITE_MASK_SHIFT; + -+static int rockchip_sip_bus_smc_config(u32 bus_id, u32 cfg, u32 enable_msk) -+{ -+ struct arm_smccc_res res; ++ CRYPTO_WRITE(rk_dev, CRYPTO_RST_CTL, tmp | tmp_mask); + -+ res = sip_smc_bus_config(bus_id, cfg, enable_msk); ++ /* This is usually done in 20 clock cycles */ ++ ret = read_poll_timeout_atomic(CRYPTO_READ, tmp, !tmp, 0, pool_timeout_us, ++ false, rk_dev, CRYPTO_RST_CTL); ++ if (ret) ++ dev_err(rk_dev->dev, "cipher reset pool timeout %ums.", ++ pool_timeout_us); + -+ return res.a0; ++ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, 0xffff0000); ++ ++ /* clear dma int status */ ++ tmp = CRYPTO_READ(rk_dev, CRYPTO_DMA_INT_ST); ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_ST, tmp); +} + -+static int rockchip_bus_smc_config(struct rockchip_bus *bus) ++static int rk_crypto_irq_handle(int irq, void *dev_id) +{ -+ struct device *dev = bus->dev; -+ struct device_node *np = dev->of_node; -+ struct device_node *child; -+ unsigned int enable_msk, bus_id, cfg; -+ char *prp_name = "rockchip,soc-bus-table"; -+ u32 *table = NULL; -+ int ret = 0, config_cnt, i; ++ struct rk_crypto_dev *rk_dev = platform_get_drvdata(dev_id); ++ u32 interrupt_status; ++ struct rk_hw_crypto_v2_info *hw_info = ++ (struct rk_hw_crypto_v2_info *)rk_dev->hw_info; ++ struct rk_alg_ctx *alg_ctx = rk_ahash_alg_ctx(rk_dev); + -+ for_each_available_child_of_node(np, child) { -+ ret = of_property_read_u32_index(child, "bus-id", 0, -+ &bus_id); -+ if (ret) -+ continue; ++ /* disable crypto irq */ ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0); + -+ ret = of_property_read_u32_index(child, "cfg-val", 0, -+ &cfg); -+ if (ret) { -+ dev_info(dev, "get cfg-val error\n"); -+ continue; -+ } ++ interrupt_status = CRYPTO_READ(rk_dev, CRYPTO_DMA_INT_ST); ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_ST, interrupt_status); + -+ if (!cfg) { -+ dev_info(dev, "cfg-val invalid\n"); -+ continue; -+ } ++ interrupt_status &= CRYPTO_LOCKSTEP_MASK; + -+ ret = of_property_read_u32_index(child, "enable-msk", 0, -+ &enable_msk); -+ if (ret) { -+ dev_info(dev, "get enable_msk error\n"); -+ continue; -+ } -+ -+ ret = rockchip_sip_bus_smc_config(bus_id, cfg, -+ enable_msk); -+ if (ret) { -+ dev_info(dev, "bus smc config error: %x!\n", ret); -+ break; -+ } -+ } -+ -+ config_cnt = of_property_count_u32_elems(np, prp_name); -+ if (config_cnt <= 0) { -+ return 0; -+ } else if (config_cnt % 3) { -+ dev_err(dev, "Invalid count of %s\n", prp_name); -+ return -EINVAL; ++ if (interrupt_status != CRYPTO_SRC_ITEM_DONE_INT_ST) { ++ dev_err(rk_dev->dev, "DMA desc = %p\n", hw_info->hw_desc.lli_head); ++ dev_err(rk_dev->dev, "DMA addr_in = %08x\n", ++ (u32)alg_ctx->addr_in); ++ dev_err(rk_dev->dev, "DMA addr_out = %08x\n", ++ (u32)alg_ctx->addr_out); ++ dev_err(rk_dev->dev, "DMA count = %08x\n", alg_ctx->count); ++ dev_err(rk_dev->dev, "DMA desc_dma = %08x\n", ++ (u32)hw_info->hw_desc.lli_head_dma); ++ dev_err(rk_dev->dev, "DMA Error status = %08x\n", ++ interrupt_status); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_LLI_ADDR status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_LLI_ADDR)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_ST status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_ST)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_STATE status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_STATE)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_LLI_RADDR status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_LLI_RADDR)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_SRC_RADDR status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_SRC_RADDR)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_DST_RADDR status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_DST_RADDR)); ++ rk_dev->err = -EFAULT; + } + -+ table = kmalloc_array(config_cnt, sizeof(u32), GFP_KERNEL); -+ if (!table) -+ return -ENOMEM; -+ -+ ret = of_property_read_u32_array(np, prp_name, table, config_cnt); -+ if (ret) { -+ dev_err(dev, "get %s error\n", prp_name); -+ goto free_table; -+ } ++ return 0; ++} + -+ /* table[3n]: bus_id -+ * table[3n + 1]: config -+ * table[3n + 2]: enable_mask -+ */ -+ for (i = 0; i < config_cnt; i += 3) { -+ bus_id = table[i]; -+ cfg = table[i + 1]; -+ enable_msk = table[i + 2]; ++static void rk_ahash_crypto_complete(struct crypto_async_request *base, int err) ++{ ++ struct ahash_request *req = ahash_request_cast(base); ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); ++ struct rk_alg_ctx *alg_ctx = rk_ahash_alg_ctx(ctx->rk_dev); + -+ if (!cfg) { -+ dev_info(dev, "cfg-val invalid in %s-%d\n", prp_name, bus_id); -+ continue; -+ } ++ struct rk_hw_crypto_v2_info *hw_info = ctx->rk_dev->hw_info; ++ struct crypto_lli_desc *lli_desc = hw_info->hw_desc.lli_head; + -+ ret = rockchip_sip_bus_smc_config(bus_id, cfg, enable_msk); -+ if (ret) { -+ dev_err(dev, "bus smc config error: %x!\n", ret); -+ goto free_table; -+ } ++ if (err) { ++ rk_hash_reset(ctx->rk_dev); ++ pr_err("aligned = %u, align_size = %u\n", ++ alg_ctx->aligned, alg_ctx->align_size); ++ pr_err("total = %u, left = %u, count = %u\n", ++ alg_ctx->total, alg_ctx->left_bytes, alg_ctx->count); ++ pr_err("lli->src = %08x\n", lli_desc->src_addr); ++ pr_err("lli->src_len = %08x\n", lli_desc->src_len); ++ pr_err("lli->dst = %08x\n", lli_desc->dst_addr); ++ pr_err("lli->dst_len = %08x\n", lli_desc->dst_len); ++ pr_err("lli->dma_ctl = %08x\n", lli_desc->dma_ctrl); ++ pr_err("lli->usr_def = %08x\n", lli_desc->user_define); ++ pr_err("lli->next = %08x\n\n\n", lli_desc->next_addr); + } + -+free_table: -+ kfree(table); -+ -+ return ret; ++ if (base->complete) ++ base->complete(base, err); +} + -+static int rockchip_bus_set_freq_table(struct rockchip_bus *bus) ++static inline void clear_hash_out_reg(struct rk_crypto_dev *rk_dev) +{ -+ struct device *dev = bus->dev; -+ struct dev_pm_opp *opp; -+ unsigned long freq; -+ int i, count; -+ -+ count = dev_pm_opp_get_opp_count(dev); -+ if (count <= 0) -+ return -EINVAL; -+ -+ bus->max_state = count; -+ bus->freq_table = devm_kcalloc(dev, -+ bus->max_state, -+ sizeof(*bus->freq_table), -+ GFP_KERNEL); -+ if (!bus->freq_table) { -+ bus->max_state = 0; -+ return -ENOMEM; -+ } ++ rk_crypto_clear_regs(rk_dev, CRYPTO_HASH_DOUT_0, 16); ++} + -+ for (i = 0, freq = 0; i < bus->max_state; i++, freq++) { -+ opp = dev_pm_opp_find_freq_ceil(dev, &freq); -+ if (IS_ERR(opp)) { -+ devm_kfree(dev, bus->freq_table); -+ bus->max_state = 0; -+ return PTR_ERR(opp); -+ } -+ bus->freq_table[i].volt = dev_pm_opp_get_voltage(opp); -+ bus->freq_table[i].freq = freq; -+ dev_pm_opp_put(opp); -+ } ++static int write_key_reg(struct rk_crypto_dev *rk_dev, const u8 *key, ++ u32 key_len) ++{ ++ rk_crypto_write_regs(rk_dev, CRYPTO_CH0_KEY_0, key, key_len); + + return 0; +} + -+static int rockchip_bus_power_control_init(struct rockchip_bus *bus) ++static int rk_hw_hash_init(struct rk_crypto_dev *rk_dev, u32 algo, u32 type) +{ -+ struct device *dev = bus->dev; -+ int ret = 0; ++ u32 reg_ctrl = 0; + -+ bus->clk = devm_clk_get(dev, "bus"); -+ if (IS_ERR(bus->clk)) { -+ dev_err(dev, "failed to get bus clock\n"); -+ return PTR_ERR(bus->clk); -+ } ++ if (algo >= ARRAY_SIZE(hash_algo2bc)) ++ goto exit; + -+ bus->regulator = devm_regulator_get(dev, "bus"); -+ if (IS_ERR(bus->regulator)) { -+ dev_err(dev, "failed to get bus regulator\n"); -+ return PTR_ERR(bus->regulator); -+ } ++ rk_hash_reset(rk_dev); + -+ ret = rockchip_init_opp_table(dev, &bus->opp_info, NULL, "pvtm"); -+ if (ret < 0) { -+ dev_err(dev, "failed to get OPP table\n"); -+ return ret; -+ } ++ clear_hash_out_reg(rk_dev); + -+ ret = rockchip_bus_set_freq_table(bus); -+ if (ret < 0) { -+ dev_err(dev, "failed to set bus freq table\n"); -+ return ret; ++ reg_ctrl = hash_algo2bc[algo] | CRYPTO_HW_PAD_ENABLE; ++ ++ if (IS_TYPE_HMAC(type)) { ++ CRYPTO_TRACE("this is hmac"); ++ reg_ctrl |= CRYPTO_HMAC_ENABLE; + } + ++ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, reg_ctrl | CRYPTO_WRITE_MASK_ALL); ++ CRYPTO_WRITE(rk_dev, CRYPTO_FIFO_CTL, 0x00030003); ++ + return 0; ++exit: ++ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, 0 | CRYPTO_WRITE_MASK_ALL); ++ ++ return -EINVAL; +} + -+static int rockchip_bus_clkfreq_target(struct device *dev, unsigned long freq) ++static void clean_hash_setting(struct rk_crypto_dev *rk_dev) +{ -+ struct rockchip_bus *bus = dev_get_drvdata(dev); -+ unsigned long target_volt = bus->freq_table[bus->max_state - 1].volt; -+ int i; ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0); ++ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, 0 | CRYPTO_WRITE_MASK_ALL); ++} + -+ for (i = 0; i < bus->max_state; i++) { -+ if (freq <= bus->freq_table[i].freq) { -+ target_volt = bus->freq_table[i].volt; -+ break; -+ } -+ } ++static int rk_ahash_import(struct ahash_request *req, const void *in) ++{ ++ struct rk_ahash_expt_ctx state; + -+ if (bus->cur_volt != target_volt) { -+ dev_dbg(bus->dev, "target_volt: %lu\n", target_volt); -+ if (regulator_set_voltage(bus->regulator, target_volt, -+ INT_MAX)) { -+ dev_err(dev, "failed to set voltage %lu uV\n", -+ target_volt); -+ return -EINVAL; -+ } -+ bus->cur_volt = target_volt; -+ } ++ /* 'in' may not be aligned so memcpy to local variable */ ++ memcpy(&state, in, sizeof(state)); ++ ++ ///TODO: deal with import + + return 0; +} + -+static int rockchip_bus_clk_notifier(struct notifier_block *nb, -+ unsigned long event, void *data) ++static int rk_ahash_export(struct ahash_request *req, void *out) +{ -+ struct clk_notifier_data *ndata = data; -+ struct rockchip_bus *bus = to_rockchip_bus_clk_nb(nb); -+ int ret = 0; ++ struct rk_ahash_expt_ctx state; + -+ dev_dbg(bus->dev, "event %lu, old_rate %lu, new_rate: %lu\n", -+ event, ndata->old_rate, ndata->new_rate); ++ /* Don't let anything leak to 'out' */ ++ memset(&state, 0, sizeof(state)); + -+ switch (event) { -+ case PRE_RATE_CHANGE: -+ if (ndata->new_rate > ndata->old_rate) -+ ret = rockchip_bus_clkfreq_target(bus->dev, -+ ndata->new_rate); -+ break; -+ case POST_RATE_CHANGE: -+ if (ndata->new_rate < ndata->old_rate) -+ ret = rockchip_bus_clkfreq_target(bus->dev, -+ ndata->new_rate); -+ break; -+ case ABORT_RATE_CHANGE: -+ if (ndata->new_rate > ndata->old_rate) -+ ret = rockchip_bus_clkfreq_target(bus->dev, -+ ndata->old_rate); -+ break; -+ default: -+ break; -+ } ++ ///TODO: deal with import + -+ return notifier_from_errno(ret); ++ memcpy(out, &state, sizeof(state)); ++ ++ return 0; +} + -+static int rockchip_bus_clkfreq(struct rockchip_bus *bus) ++static int rk_ahash_dma_start(struct rk_crypto_dev *rk_dev, uint32_t flag) +{ -+ struct device *dev = bus->dev; -+ unsigned long init_rate; -+ int ret = 0; ++ struct rk_hw_crypto_v2_info *hw_info = ++ (struct rk_hw_crypto_v2_info *)rk_dev->hw_info; ++ struct rk_alg_ctx *alg_ctx = rk_ahash_alg_ctx(rk_dev); ++ struct rk_ahash_ctx *ctx = rk_ahash_ctx_cast(rk_dev); ++ struct crypto_lli_desc *lli_head, *lli_tail; ++ u32 dma_ctl = CRYPTO_DMA_RESTART; ++ bool is_final = flag & RK_FLAG_FINAL; ++ int ret; + -+ ret = rockchip_bus_power_control_init(bus); -+ if (ret) { -+ dev_err(dev, "failed to init power control\n"); -+ return ret; ++ CRYPTO_TRACE("ctx->calc_cnt = %u, count %u Byte, is_final = %d", ++ ctx->calc_cnt, alg_ctx->count, is_final); ++ ++ if (alg_ctx->count % RK_DMA_ALIGNMENT && !is_final) { ++ dev_err(rk_dev->dev, "count = %u is not aligned with [%u]\n", ++ alg_ctx->count, RK_DMA_ALIGNMENT); ++ return -EINVAL; + } + -+ init_rate = clk_get_rate(bus->clk); -+ ret = rockchip_bus_clkfreq_target(dev, init_rate); ++ if (alg_ctx->count == 0) { ++ /* do nothing */ ++ CRYPTO_TRACE("empty calc"); ++ return 0; ++ } ++ ++ if (alg_ctx->aligned) ++ ret = rk_crypto_hw_desc_init(&hw_info->hw_desc, ++ alg_ctx->sg_src, NULL, alg_ctx->count); ++ else ++ ret = rk_crypto_hw_desc_init(&hw_info->hw_desc, ++ &alg_ctx->sg_tmp, NULL, alg_ctx->count); + if (ret) + return ret; + -+ bus->clk_nb.notifier_call = rockchip_bus_clk_notifier; -+ ret = clk_notifier_register(bus->clk, &bus->clk_nb); -+ if (ret) { -+ dev_err(dev, "failed to register clock notifier\n"); -+ return ret; -+ } ++ lli_head = hw_info->hw_desc.lli_head; ++ lli_tail = hw_info->hw_desc.lli_tail; + -+ return 0; -+} ++ lli_tail->dma_ctrl = is_final ? LLI_DMA_CTRL_LAST : LLI_DMA_CTRL_PAUSE; ++ lli_tail->dma_ctrl |= LLI_DMA_CTRL_SRC_DONE; ++ lli_tail->next_addr = hw_info->hw_desc.lli_head_dma; + -+static int rockchip_bus_cpufreq_target(struct device *dev, unsigned long freq, -+ u32 flags) -+{ -+ struct rockchip_bus *bus = dev_get_drvdata(dev); -+ struct dev_pm_opp *opp; -+ unsigned long target_volt, target_rate = freq; -+ int ret = 0; ++ if (ctx->calc_cnt == 0) { ++ dma_ctl = CRYPTO_DMA_START; + -+ if (!bus->regulator) { -+ dev_dbg(dev, "%luHz -> %luHz\n", bus->cur_rate, target_rate); -+ ret = clk_set_rate(bus->clk, target_rate); -+ if (ret) -+ dev_err(bus->dev, "failed to set bus rate %lu\n", -+ target_rate); -+ else -+ bus->cur_rate = target_rate; -+ return ret; -+ } ++ lli_head->user_define |= LLI_USER_CIPHER_START; ++ lli_head->user_define |= LLI_USER_STRING_START; + -+ opp = devfreq_recommended_opp(dev, &target_rate, flags); -+ if (IS_ERR(opp)) { -+ dev_err(dev, "failed to recommended opp %lu\n", target_rate); -+ return PTR_ERR(opp); ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_LLI_ADDR, hw_info->hw_desc.lli_head_dma); ++ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, ++ (CRYPTO_HASH_ENABLE << CRYPTO_WRITE_MASK_SHIFT) | ++ CRYPTO_HASH_ENABLE); + } -+ target_volt = dev_pm_opp_get_voltage(opp); -+ dev_pm_opp_put(opp); + -+ if (bus->cur_rate == target_rate) { -+ if (bus->cur_volt == target_volt) -+ return 0; -+ ret = regulator_set_voltage(bus->regulator, target_volt, -+ INT_MAX); -+ if (ret) { -+ dev_err(dev, "failed to set voltage %lu\n", -+ target_volt); -+ return ret; -+ } -+ bus->cur_volt = target_volt; -+ return 0; -+ } else if (!bus->cur_volt) { -+ bus->cur_volt = regulator_get_voltage(bus->regulator); -+ } ++ if (is_final && alg_ctx->left_bytes == 0) ++ lli_tail->user_define |= LLI_USER_STRING_LAST; + -+ if (bus->cur_rate < target_rate) { -+ ret = regulator_set_voltage(bus->regulator, target_volt, -+ INT_MAX); -+ if (ret) { -+ dev_err(dev, "failed to set voltage %lu\n", -+ target_volt); -+ return ret; -+ } -+ } ++ CRYPTO_TRACE("dma_ctrl = %08x, user_define = %08x, len = %u", ++ lli_head->dma_ctrl, lli_head->user_define, alg_ctx->count); + -+ ret = clk_set_rate(bus->clk, target_rate); -+ if (ret) { -+ dev_err(dev, "failed to set bus rate %lu\n", target_rate); -+ return ret; -+ } ++ rk_crypto_dump_hw_desc(&hw_info->hw_desc); + -+ if (bus->cur_rate > target_rate) { -+ ret = regulator_set_voltage(bus->regulator, target_volt, -+ INT_MAX); -+ if (ret) { -+ dev_err(dev, "failed to set voltage %lu\n", -+ target_volt); -+ return ret; -+ } -+ } ++ dma_wmb(); + -+ dev_dbg(dev, "%luHz %luuV -> %luHz %luuV\n", bus->cur_rate, -+ bus->cur_volt, target_rate, target_volt); -+ bus->cur_rate = target_rate; -+ bus->cur_volt = target_volt; ++ /* enable crypto irq */ ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0x7f); + -+ return ret; ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_CTL, dma_ctl | dma_ctl << CRYPTO_WRITE_MASK_SHIFT); ++ ++ return 0; +} + -+static int rockchip_bus_cpufreq_notifier(struct notifier_block *nb, -+ unsigned long event, void *data) ++static int rk_ahash_get_result(struct rk_crypto_dev *rk_dev, ++ uint8_t *data, uint32_t data_len) +{ -+ struct rockchip_bus *bus = to_rockchip_bus_cpufreq_nb(nb); -+ struct cpufreq_freqs *freqs = data; -+ int id = topology_physical_package_id(freqs->policy->cpu); ++ int ret = 0; ++ u32 reg_ctrl = 0; + -+ if (id < 0 || id >= MAX_CLUSTERS) -+ return NOTIFY_DONE; ++ ret = read_poll_timeout_atomic(CRYPTO_READ, reg_ctrl, ++ reg_ctrl & CRYPTO_HASH_IS_VALID, ++ RK_POLL_PERIOD_US, ++ RK_POLL_TIMEOUT_US, false, ++ rk_dev, CRYPTO_HASH_VALID); ++ if (ret) ++ goto exit; + -+ bus->cpu_freq[id] = freqs->new; ++ rk_crypto_read_regs(rk_dev, CRYPTO_HASH_DOUT_0, data, data_len); + -+ if (!bus->cpu_freq[CLUSTER0] || !bus->cpu_freq[CLUSTER1]) -+ return NOTIFY_DONE; ++ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_VALID, CRYPTO_HASH_IS_VALID); + -+ switch (event) { -+ case CPUFREQ_PRECHANGE: -+ if ((bus->cpu_freq[CLUSTER0] > bus->cpu_high_freq || -+ bus->cpu_freq[CLUSTER1] > bus->cpu_high_freq) && -+ bus->cur_rate != bus->high_rate) { -+ dev_dbg(bus->dev, "cpu%d freq=%d %d, up cci rate to %lu\n", -+ freqs->policy->cpu, -+ bus->cpu_freq[CLUSTER0], -+ bus->cpu_freq[CLUSTER1], -+ bus->high_rate); -+ rockchip_bus_cpufreq_target(bus->dev, bus->high_rate, -+ 0); -+ } -+ break; -+ case CPUFREQ_POSTCHANGE: -+ if (bus->cpu_freq[CLUSTER0] <= bus->cpu_high_freq && -+ bus->cpu_freq[CLUSTER1] <= bus->cpu_high_freq && -+ bus->cur_rate != bus->low_rate) { -+ dev_dbg(bus->dev, "cpu%d freq=%d %d, down cci rate to %lu\n", -+ freqs->policy->cpu, -+ bus->cpu_freq[CLUSTER0], -+ bus->cpu_freq[CLUSTER1], -+ bus->low_rate); -+ rockchip_bus_cpufreq_target(bus->dev, bus->low_rate, -+ 0); -+ } -+ break; -+ } ++exit: ++ clean_hash_setting(rk_dev); + -+ return NOTIFY_OK; ++ return ret; +} + -+static int rockchip_bus_cpufreq(struct rockchip_bus *bus) ++static int rk_cra_hash_init(struct crypto_tfm *tfm) +{ -+ struct device *dev = bus->dev; -+ struct device_node *np = dev->of_node; -+ unsigned int freq; -+ int ret = 0; ++ struct rk_crypto_algt *algt = ++ rk_ahash_get_algt(__crypto_ahash_cast(tfm)); ++ const char *alg_name = crypto_tfm_alg_name(tfm); ++ struct rk_ahash_ctx *ctx = crypto_tfm_ctx(tfm); ++ struct rk_crypto_dev *rk_dev = algt->rk_dev; ++ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; + -+ if (of_parse_phandle(dev->of_node, "operating-points-v2", 0)) { -+ ret = rockchip_bus_power_control_init(bus); -+ if (ret) { -+ dev_err(dev, "failed to init power control\n"); -+ return ret; -+ } -+ } else { -+ bus->clk = devm_clk_get(dev, "bus"); -+ if (IS_ERR(bus->clk)) { -+ dev_err(dev, "failed to get bus clock\n"); -+ return PTR_ERR(bus->clk); -+ } -+ bus->regulator = NULL; -+ } ++ CRYPTO_TRACE(); + -+ ret = of_property_read_u32(np, "cpu-high-freq", &bus->cpu_high_freq); -+ if (ret) { -+ dev_err(dev, "failed to get cpu-high-freq\n"); -+ return ret; -+ } -+ ret = of_property_read_u32(np, "cci-high-freq", &freq); -+ if (ret) { -+ dev_err(dev, "failed to get cci-high-freq\n"); -+ return ret; -+ } -+ bus->high_rate = freq * 1000; -+ ret = of_property_read_u32(np, "cci-low-freq", &freq); -+ if (ret) { -+ dev_err(dev, "failed to get cci-low-freq\n"); -+ return ret; -+ } -+ bus->low_rate = freq * 1000; ++ memset(ctx, 0x00, sizeof(*ctx)); + -+ bus->cpufreq_nb.notifier_call = rockchip_bus_cpufreq_notifier; -+ ret = cpufreq_register_notifier(&bus->cpufreq_nb, -+ CPUFREQ_TRANSITION_NOTIFIER); -+ if (ret) { -+ dev_err(dev, "failed to register cpufreq notifier\n"); -+ return ret; ++ if (!rk_dev->request_crypto) ++ return -EFAULT; ++ ++ alg_ctx->align_size = RK_DMA_ALIGNMENT; ++ ++ alg_ctx->ops.start = rk_ahash_start; ++ alg_ctx->ops.update = rk_ahash_crypto_rx; ++ alg_ctx->ops.complete = rk_ahash_crypto_complete; ++ alg_ctx->ops.irq_handle = rk_crypto_irq_handle; ++ ++ alg_ctx->ops.hw_write_key = write_key_reg; ++ alg_ctx->ops.hw_init = rk_hw_hash_init; ++ alg_ctx->ops.hw_dma_start = rk_ahash_dma_start; ++ alg_ctx->ops.hw_get_result = rk_ahash_get_result; ++ ++ ctx->rk_dev = rk_dev; ++ ctx->hash_tmp = (u8 *)get_zeroed_page(GFP_KERNEL | GFP_DMA32); ++ if (!ctx->hash_tmp) { ++ dev_err(rk_dev->dev, "Can't get zeroed page for hash tmp.\n"); ++ return -ENOMEM; + } + -+ return 0; -+} ++ rk_dev->request_crypto(rk_dev, alg_name); + -+static const struct of_device_id rockchip_busfreq_of_match[] = { -+ { .compatible = "rockchip,px30-bus", }, -+ { .compatible = "rockchip,rk1808-bus", }, -+ { .compatible = "rockchip,rk3288-bus", }, -+ { .compatible = "rockchip,rk3368-bus", }, -+ { .compatible = "rockchip,rk3399-bus", }, -+ { .compatible = "rockchip,rk3528-bus", }, -+ { .compatible = "rockchip,rk3562-bus", }, -+ { .compatible = "rockchip,rk3568-bus", }, -+ { .compatible = "rockchip,rk3588-bus", }, -+ { .compatible = "rockchip,rv1126-bus", }, -+ { }, -+}; ++ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct rk_ahash_rctx)); + -+MODULE_DEVICE_TABLE(of, rockchip_busfreq_of_match); ++ algt->alg.hash.halg.statesize = sizeof(struct rk_ahash_expt_ctx); + -+static int rockchip_busfreq_probe(struct platform_device *pdev) -+{ -+ struct device *dev = &pdev->dev; -+ struct device_node *np = dev->of_node; -+ struct rockchip_bus *bus; -+ const char *policy_name; -+ int ret = 0; ++ return 0; ++} + -+ bus = devm_kzalloc(dev, sizeof(*bus), GFP_KERNEL); -+ if (!bus) -+ return -ENOMEM; -+ bus->dev = dev; -+ platform_set_drvdata(pdev, bus); ++static void rk_cra_hash_exit(struct crypto_tfm *tfm) ++{ ++ struct rk_ahash_ctx *ctx = crypto_tfm_ctx(tfm); + -+ ret = of_property_read_string(np, "rockchip,busfreq-policy", -+ &policy_name); -+ if (ret) { -+ dev_info(dev, "failed to get busfreq policy\n"); -+ return ret; -+ } ++ CRYPTO_TRACE(); + -+ if (!strcmp(policy_name, "smc")) -+ ret = rockchip_bus_smc_config(bus); -+ else if (!strcmp(policy_name, "clkfreq")) -+ ret = rockchip_bus_clkfreq(bus); -+ else if (!strcmp(policy_name, "cpufreq")) -+ ret = rockchip_bus_cpufreq(bus); ++ if (ctx->hash_tmp) ++ free_page((unsigned long)ctx->hash_tmp); + -+ return ret; ++ ctx->rk_dev->release_crypto(ctx->rk_dev, crypto_tfm_alg_name(tfm)); +} + -+static struct platform_driver rockchip_busfreq_driver = { -+ .probe = rockchip_busfreq_probe, -+ .driver = { -+ .name = "rockchip,bus", -+ .of_match_table = rockchip_busfreq_of_match, -+ }, -+}; ++struct rk_crypto_algt rk_v2_ahash_md5 = RK_HASH_ALGO_INIT(MD5, md5); ++struct rk_crypto_algt rk_v2_ahash_sha1 = RK_HASH_ALGO_INIT(SHA1, sha1); ++struct rk_crypto_algt rk_v2_ahash_sha224 = RK_HASH_ALGO_INIT(SHA224, sha224); ++struct rk_crypto_algt rk_v2_ahash_sha256 = RK_HASH_ALGO_INIT(SHA256, sha256); ++struct rk_crypto_algt rk_v2_ahash_sha384 = RK_HASH_ALGO_INIT(SHA384, sha384); ++struct rk_crypto_algt rk_v2_ahash_sha512 = RK_HASH_ALGO_INIT(SHA512, sha512); ++struct rk_crypto_algt rk_v2_ahash_sm3 = RK_HASH_ALGO_INIT(SM3, sm3); + -+module_platform_driver(rockchip_busfreq_driver); ++struct rk_crypto_algt rk_v2_hmac_md5 = RK_HMAC_ALGO_INIT(MD5, md5); ++struct rk_crypto_algt rk_v2_hmac_sha1 = RK_HMAC_ALGO_INIT(SHA1, sha1); ++struct rk_crypto_algt rk_v2_hmac_sha256 = RK_HMAC_ALGO_INIT(SHA256, sha256); ++struct rk_crypto_algt rk_v2_hmac_sha512 = RK_HMAC_ALGO_INIT(SHA512, sha512); ++struct rk_crypto_algt rk_v2_hmac_sm3 = RK_HMAC_ALGO_INIT(SM3, sm3); + -+MODULE_LICENSE("GPL v2"); -+MODULE_AUTHOR("Tony Xie "); -+MODULE_DESCRIPTION("rockchip busfreq driver with devfreq framework"); -diff --git a/drivers/devfreq/rockchip_dmc.c b/drivers/devfreq/rockchip_dmc.c +diff --git a/drivers/crypto/rockchip/rk_crypto_v2_akcipher.c b/drivers/crypto/rockchip/rk_crypto_v2_akcipher.c new file mode 100644 -index 000000000..4c373c6ab +index 000000000..1db5adde9 --- /dev/null -+++ b/drivers/devfreq/rockchip_dmc.c -@@ -0,0 +1,3411 @@ -+// SPDX-License-Identifier: GPL-2.0-only ++++ b/drivers/crypto/rockchip/rk_crypto_v2_akcipher.c +@@ -0,0 +1,320 @@ ++// SPDX-License-Identifier: GPL-2.0 +/* -+ * Rockchip Generic dmc support. ++ * RSA acceleration support for Rockchip crypto v2 + * -+ * Copyright (c) 2021 Rockchip Electronics Co. Ltd. -+ * Author: Finley Xiao ++ * Copyright (c) 2020 Rockchip Electronics Co., Ltd. ++ * ++ * Author: Lin Jinhan ++ * ++ * Some ideas are from marvell/cesa.c and s5p-sss.c driver. + */ + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include +#include -+#include -+#include -+#include + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++#include "rk_crypto_core.h" ++#include "rk_crypto_v2.h" ++#include "rk_crypto_v2_reg.h" ++#include "rk_crypto_v2_pka.h" + -+#include "governor.h" -+#include "rockchip_dmc_timing.h" -+#include "../clk/rockchip-oh/clk.h" -+#include "../gpu/drm/rockchip-oh/rockchip_drm_drv.h" -+#include "../opp/opp.h" ++#define BG_WORDS2BYTES(words) ((words) * sizeof(u32)) ++#define BG_BYTES2WORDS(bytes) (((bytes) + sizeof(u32) - 1) / sizeof(u32)) + -+#define system_status_to_dmcfreq(nb) container_of(nb, struct rockchip_dmcfreq, \ -+ status_nb) -+#define reboot_to_dmcfreq(nb) container_of(nb, struct rockchip_dmcfreq, \ -+ reboot_nb) -+#define boost_to_dmcfreq(work) container_of(work, struct rockchip_dmcfreq, \ -+ boost_work) -+#define input_hd_to_dmcfreq(hd) container_of(hd, struct rockchip_dmcfreq, \ -+ input_handler) ++static DEFINE_MUTEX(akcipher_mutex); + -+#define VIDEO_1080P_SIZE (1920 * 1080) -+#define DTS_PAR_OFFSET (4096) ++static void rk_rsa_adjust_rsa_key(struct rsa_key *key) ++{ ++ if (key->n_sz && key->n && !key->n[0]) { ++ key->n++; ++ key->n_sz--; ++ } + -+#define FALLBACK_STATIC_TEMPERATURE 55000 ++ if (key->e_sz && key->e && !key->e[0]) { ++ key->e++; ++ key->e_sz--; ++ } + -+struct dmc_freq_table { -+ unsigned long freq; -+ struct dev_pm_opp_supply supplies[2]; -+}; ++ if (key->d_sz && key->d && !key->d[0]) { ++ key->d++; ++ key->d_sz--; ++ } ++} + -+struct share_params { -+ u32 hz; -+ u32 lcdc_type; -+ u32 vop; -+ u32 vop_dclk_mode; -+ u32 sr_idle_en; -+ u32 addr_mcu_el3; -+ /* -+ * 1: need to wait flag1 -+ * 0: never wait flag1 -+ */ -+ u32 wait_flag1; -+ /* -+ * 1: need to wait flag1 -+ * 0: never wait flag1 -+ */ -+ u32 wait_flag0; -+ u32 complt_hwirq; -+ u32 update_drv_odt_cfg; -+ u32 update_deskew_cfg; ++static void rk_rsa_clear_ctx(struct rk_rsa_ctx *ctx) ++{ ++ /* Free the old key if any */ ++ rk_bn_free(ctx->n); ++ ctx->n = NULL; + -+ u32 freq_count; -+ u32 freq_info_mhz[6]; -+ u32 wait_mode; -+ u32 vop_scan_line_time_ns; -+ /* if need, add parameter after */ -+}; ++ rk_bn_free(ctx->e); ++ ctx->e = NULL; + -+static struct share_params *ddr_psci_param; ++ rk_bn_free(ctx->d); ++ ctx->d = NULL; ++} + -+struct rockchip_dmcfreq_ondemand_data { -+ unsigned int upthreshold; -+ unsigned int downdifferential; -+}; ++static int rk_rsa_setkey(struct crypto_akcipher *tfm, const void *key, ++ unsigned int keylen, bool private) ++{ ++ struct rk_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); ++ struct rsa_key rsa_key; ++ int ret = -ENOMEM; + -+struct rockchip_dmcfreq { -+ struct device *dev; -+ struct dmcfreq_common_info info; -+ struct rockchip_dmcfreq_ondemand_data ondemand_data; -+ struct clk *dmc_clk; -+ struct devfreq_event_dev **edev; -+ struct mutex lock; /* serializes access to video_info_list */ -+ struct dram_timing *timing; -+ struct notifier_block status_nb; -+ struct notifier_block panic_nb; -+ struct list_head video_info_list; -+ struct freq_map_table *cpu_bw_tbl; -+ struct work_struct boost_work; -+ struct input_handler input_handler; -+ struct monitor_dev_info *mdev_info; -+ struct share_params *set_rate_params; -+ struct rockchip_opp_info opp_info; ++ rk_rsa_clear_ctx(ctx); + -+ unsigned long *nocp_bw; -+ unsigned long rate; -+ unsigned long volt, mem_volt; -+ unsigned long sleep_volt, sleep_mem_volt; -+ unsigned long auto_min_rate; -+ unsigned long status_rate; -+ unsigned long normal_rate; -+ unsigned long video_1080p_rate; -+ unsigned long video_4k_rate; -+ unsigned long video_4k_10b_rate; -+ unsigned long video_4k_60p_rate; -+ unsigned long video_svep_rate; -+ unsigned long performance_rate; -+ unsigned long hdmi_rate; -+ unsigned long hdmirx_rate; -+ unsigned long idle_rate; -+ unsigned long suspend_rate; -+ unsigned long deep_suspend_rate; -+ unsigned long reboot_rate; -+ unsigned long boost_rate; -+ unsigned long fixed_rate; -+ unsigned long low_power_rate; ++ memset(&rsa_key, 0x00, sizeof(rsa_key)); + -+ unsigned long freq_count; -+ unsigned long freq_info_rate[6]; -+ unsigned long rate_low; -+ unsigned long rate_mid_low; -+ unsigned long rate_mid_high; -+ unsigned long rate_high; ++ if (private) ++ ret = rsa_parse_priv_key(&rsa_key, key, keylen); ++ else ++ ret = rsa_parse_pub_key(&rsa_key, key, keylen); + -+ unsigned int min_cpu_freq; -+ unsigned int system_status_en; -+ unsigned int refresh; -+ int edev_count; -+ int dfi_id; -+ int nocp_cpu_id; ++ if (ret < 0) ++ goto error; + -+ bool is_fixed; -+ bool is_set_rate_direct; ++ rk_rsa_adjust_rsa_key(&rsa_key); + -+ unsigned int touchboostpulse_duration_val; -+ u64 touchboostpulse_endtime; ++ ctx->n = rk_bn_alloc(rsa_key.n_sz); ++ if (!ctx->n) ++ goto error; + -+ int (*set_auto_self_refresh)(u32 en); -+}; ++ ctx->e = rk_bn_alloc(rsa_key.e_sz); ++ if (!ctx->e) ++ goto error; + -+static struct pm_qos_request pm_qos; ++ rk_bn_set_data(ctx->n, rsa_key.n, rsa_key.n_sz, RK_BG_BIG_ENDIAN); ++ rk_bn_set_data(ctx->e, rsa_key.e, rsa_key.e_sz, RK_BG_BIG_ENDIAN); + -+static int rockchip_dmcfreq_check_rate_volt(struct monitor_dev_info *info); ++ CRYPTO_DUMPHEX("n = ", ctx->n->data, BG_WORDS2BYTES(ctx->n->n_words)); ++ CRYPTO_DUMPHEX("e = ", ctx->e->data, BG_WORDS2BYTES(ctx->e->n_words)); + -+static struct monitor_dev_profile dmc_mdevp = { -+ .type = MONITOR_TYPE_DEV, -+ .low_temp_adjust = rockchip_monitor_dev_low_temp_adjust, -+ .high_temp_adjust = rockchip_monitor_dev_high_temp_adjust, -+ .check_rate_volt = rockchip_dmcfreq_check_rate_volt, -+}; ++ if (private) { ++ ctx->d = rk_bn_alloc(rsa_key.d_sz); ++ if (!ctx->d) ++ goto error; + -+static inline unsigned long is_dualview(unsigned long status) ++ rk_bn_set_data(ctx->d, rsa_key.d, rsa_key.d_sz, RK_BG_BIG_ENDIAN); ++ ++ CRYPTO_DUMPHEX("d = ", ctx->d->data, BG_WORDS2BYTES(ctx->d->n_words)); ++ } ++ ++ return 0; ++error: ++ rk_rsa_clear_ctx(ctx); ++ return ret; ++} ++ ++static unsigned int rk_rsa_max_size(struct crypto_akcipher *tfm) +{ -+ return (status & SYS_STATUS_LCDC0) && (status & SYS_STATUS_LCDC1); ++ struct rk_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); ++ ++ CRYPTO_TRACE(); ++ ++ return rk_bn_get_size(ctx->n); +} + -+static inline unsigned long is_isp(unsigned long status) ++static int rk_rsa_setpubkey(struct crypto_akcipher *tfm, const void *key, ++ unsigned int keylen) +{ -+ return (status & SYS_STATUS_ISP) || -+ (status & SYS_STATUS_CIF0) || -+ (status & SYS_STATUS_CIF1); ++ CRYPTO_TRACE(); ++ ++ return rk_rsa_setkey(tfm, key, keylen, false); +} + -+/* -+ * function: packaging de-skew setting to px30_ddr_dts_config_timing, -+ * px30_ddr_dts_config_timing will pass to trust firmware, and -+ * used direct to set register. -+ * input: de_skew -+ * output: tim -+ */ -+static void px30_de_skew_set_2_reg(struct rk3328_ddr_de_skew_setting *de_skew, -+ struct px30_ddr_dts_config_timing *tim) ++static int rk_rsa_setprivkey(struct crypto_akcipher *tfm, const void *key, ++ unsigned int keylen) +{ -+ u32 n; -+ u32 offset; -+ u32 shift; ++ CRYPTO_TRACE(); + -+ memset_io(tim->ca_skew, 0, sizeof(tim->ca_skew)); -+ memset_io(tim->cs0_skew, 0, sizeof(tim->cs0_skew)); -+ memset_io(tim->cs1_skew, 0, sizeof(tim->cs1_skew)); ++ return rk_rsa_setkey(tfm, key, keylen, true); ++} + -+ /* CA de-skew */ -+ for (n = 0; n < ARRAY_SIZE(de_skew->ca_de_skew); n++) { -+ offset = n / 2; -+ shift = n % 2; -+ /* 0 => 4; 1 => 0 */ -+ shift = (shift == 0) ? 4 : 0; -+ tim->ca_skew[offset] &= ~(0xf << shift); -+ tim->ca_skew[offset] |= (de_skew->ca_de_skew[n] << shift); ++static int rk_rsa_calc(struct akcipher_request *req, bool encypt) ++{ ++ struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); ++ struct rk_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); ++ struct rk_bignum *in = NULL, *out = NULL; ++ u32 key_byte_size; ++ u8 *tmp_buf = NULL; ++ int ret = -ENOMEM; ++ ++ CRYPTO_TRACE(); ++ ++ if (unlikely(!ctx->n || !ctx->e)) ++ return -EINVAL; ++ ++ if (!encypt && !ctx->d) ++ return -EINVAL; ++ ++ key_byte_size = rk_bn_get_size(ctx->n); ++ ++ if (req->dst_len < key_byte_size) { ++ req->dst_len = key_byte_size; ++ return -EOVERFLOW; + } + -+ /* CS0 data de-skew */ -+ for (n = 0; n < ARRAY_SIZE(de_skew->cs0_de_skew); n++) { -+ offset = ((n / 21) * 11) + ((n % 21) / 2); -+ shift = ((n % 21) % 2); -+ if ((n % 21) == 20) -+ shift = 0; -+ else -+ /* 0 => 4; 1 => 0 */ -+ shift = (shift == 0) ? 4 : 0; -+ tim->cs0_skew[offset] &= ~(0xf << shift); -+ tim->cs0_skew[offset] |= (de_skew->cs0_de_skew[n] << shift); ++ if (req->src_len > key_byte_size) ++ return -EINVAL; ++ ++ in = rk_bn_alloc(key_byte_size); ++ if (!in) ++ goto exit; ++ ++ out = rk_bn_alloc(key_byte_size); ++ if (!out) ++ goto exit; ++ ++ tmp_buf = kzalloc(key_byte_size, GFP_KERNEL); ++ if (!tmp_buf) ++ goto exit; ++ ++ if (!sg_copy_to_buffer(req->src, sg_nents(req->src), tmp_buf, req->src_len)) { ++ dev_err(ctx->rk_dev->dev, "[%s:%d] sg copy err\n", ++ __func__, __LINE__); ++ ret = -EINVAL; ++ goto exit; + } + -+ /* CS1 data de-skew */ -+ for (n = 0; n < ARRAY_SIZE(de_skew->cs1_de_skew); n++) { -+ offset = ((n / 21) * 11) + ((n % 21) / 2); -+ shift = ((n % 21) % 2); -+ if ((n % 21) == 20) -+ shift = 0; -+ else -+ /* 0 => 4; 1 => 0 */ -+ shift = (shift == 0) ? 4 : 0; -+ tim->cs1_skew[offset] &= ~(0xf << shift); -+ tim->cs1_skew[offset] |= (de_skew->cs1_de_skew[n] << shift); ++ ret = rk_bn_set_data(in, tmp_buf, req->src_len, RK_BG_BIG_ENDIAN); ++ if (ret) ++ goto exit; ++ ++ CRYPTO_DUMPHEX("in = ", in->data, BG_WORDS2BYTES(in->n_words)); ++ ++ mutex_lock(&akcipher_mutex); ++ ++ if (encypt) ++ ret = rk_pka_expt_mod(in, ctx->e, ctx->n, out); ++ else ++ ret = rk_pka_expt_mod(in, ctx->d, ctx->n, out); ++ ++ mutex_unlock(&akcipher_mutex); ++ ++ if (ret) ++ goto exit; ++ ++ CRYPTO_DUMPHEX("out = ", out->data, BG_WORDS2BYTES(out->n_words)); ++ ++ ret = rk_bn_get_data(out, tmp_buf, key_byte_size, RK_BG_BIG_ENDIAN); ++ if (ret) ++ goto exit; ++ ++ CRYPTO_DUMPHEX("tmp_buf = ", tmp_buf, key_byte_size); ++ ++ if (!sg_copy_from_buffer(req->dst, sg_nents(req->dst), tmp_buf, key_byte_size)) { ++ dev_err(ctx->rk_dev->dev, "[%s:%d] sg copy err\n", ++ __func__, __LINE__); ++ ret = -EINVAL; ++ goto exit; + } ++ ++ req->dst_len = key_byte_size; ++ ++ CRYPTO_TRACE("ret = %d", ret); ++exit: ++ kfree(tmp_buf); ++ ++ rk_bn_free(in); ++ rk_bn_free(out); ++ ++ return ret; +} + -+/* -+ * function: packaging de-skew setting to rk3328_ddr_dts_config_timing, -+ * rk3328_ddr_dts_config_timing will pass to trust firmware, and -+ * used direct to set register. -+ * input: de_skew -+ * output: tim -+ */ -+static void -+rk3328_de_skew_setting_2_register(struct rk3328_ddr_de_skew_setting *de_skew, -+ struct rk3328_ddr_dts_config_timing *tim) ++static int rk_rsa_enc(struct akcipher_request *req) +{ -+ u32 n; -+ u32 offset; -+ u32 shift; ++ CRYPTO_TRACE(); + -+ memset_io(tim->ca_skew, 0, sizeof(tim->ca_skew)); -+ memset_io(tim->cs0_skew, 0, sizeof(tim->cs0_skew)); -+ memset_io(tim->cs1_skew, 0, sizeof(tim->cs1_skew)); ++ return rk_rsa_calc(req, true); ++} + -+ /* CA de-skew */ -+ for (n = 0; n < ARRAY_SIZE(de_skew->ca_de_skew); n++) { -+ offset = n / 2; -+ shift = n % 2; -+ /* 0 => 4; 1 => 0 */ -+ shift = (shift == 0) ? 4 : 0; -+ tim->ca_skew[offset] &= ~(0xf << shift); -+ tim->ca_skew[offset] |= (de_skew->ca_de_skew[n] << shift); -+ } ++static int rk_rsa_dec(struct akcipher_request *req) ++{ ++ CRYPTO_TRACE(); + -+ /* CS0 data de-skew */ -+ for (n = 0; n < ARRAY_SIZE(de_skew->cs0_de_skew); n++) { -+ offset = ((n / 21) * 11) + ((n % 21) / 2); -+ shift = ((n % 21) % 2); -+ if ((n % 21) == 20) -+ shift = 0; -+ else -+ /* 0 => 4; 1 => 0 */ -+ shift = (shift == 0) ? 4 : 0; -+ tim->cs0_skew[offset] &= ~(0xf << shift); -+ tim->cs0_skew[offset] |= (de_skew->cs0_de_skew[n] << shift); -+ } ++ return rk_rsa_calc(req, false); ++} + -+ /* CS1 data de-skew */ -+ for (n = 0; n < ARRAY_SIZE(de_skew->cs1_de_skew); n++) { -+ offset = ((n / 21) * 11) + ((n % 21) / 2); -+ shift = ((n % 21) % 2); -+ if ((n % 21) == 20) -+ shift = 0; -+ else -+ /* 0 => 4; 1 => 0 */ -+ shift = (shift == 0) ? 4 : 0; -+ tim->cs1_skew[offset] &= ~(0xf << shift); -+ tim->cs1_skew[offset] |= (de_skew->cs1_de_skew[n] << shift); -+ } ++static int rk_rsa_start(struct rk_crypto_dev *rk_dev) ++{ ++ CRYPTO_TRACE(); ++ ++ return -ENOSYS; +} + -+static int rk_drm_get_lcdc_type(void) ++static int rk_rsa_crypto_rx(struct rk_crypto_dev *rk_dev) +{ -+ u32 lcdc_type = rockchip_drm_get_sub_dev_type(); ++ CRYPTO_TRACE(); + -+ switch (lcdc_type) { -+ case DRM_MODE_CONNECTOR_DPI: -+ case DRM_MODE_CONNECTOR_LVDS: -+ lcdc_type = SCREEN_LVDS; -+ break; -+ case DRM_MODE_CONNECTOR_DisplayPort: -+ lcdc_type = SCREEN_DP; -+ break; -+ case DRM_MODE_CONNECTOR_HDMIA: -+ case DRM_MODE_CONNECTOR_HDMIB: -+ lcdc_type = SCREEN_HDMI; -+ break; -+ case DRM_MODE_CONNECTOR_TV: -+ lcdc_type = SCREEN_TVOUT; -+ break; -+ case DRM_MODE_CONNECTOR_eDP: -+ lcdc_type = SCREEN_EDP; -+ break; -+ case DRM_MODE_CONNECTOR_DSI: -+ lcdc_type = SCREEN_MIPI; -+ break; -+ default: -+ lcdc_type = SCREEN_NULL; -+ break; -+ } ++ return -ENOSYS; ++} + -+ return lcdc_type; ++static void rk_rsa_complete(struct crypto_async_request *base, int err) ++{ ++ if (base->complete) ++ base->complete(base, err); +} + -+static int rockchip_ddr_set_rate(unsigned long target_rate) ++static int rk_rsa_init_tfm(struct crypto_akcipher *tfm) +{ -+ struct arm_smccc_res res; ++ struct rk_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); ++ struct akcipher_alg *alg = __crypto_akcipher_alg(tfm->base.__crt_alg); ++ struct rk_crypto_algt *algt; ++ struct rk_crypto_dev *rk_dev; ++ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; + -+ ddr_psci_param->hz = target_rate; -+ ddr_psci_param->lcdc_type = rk_drm_get_lcdc_type(); -+ ddr_psci_param->vop_scan_line_time_ns = rockchip_drm_get_scan_line_time_ns(); -+ ddr_psci_param->wait_flag1 = 1; -+ ddr_psci_param->wait_flag0 = 1; ++ CRYPTO_TRACE(); + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_SET_RATE); ++ memset(ctx, 0x00, sizeof(*ctx)); + -+ if ((int)res.a1 == SIP_RET_SET_RATE_TIMEOUT) -+ rockchip_dmcfreq_wait_complete(); ++ algt = container_of(alg, struct rk_crypto_algt, alg.asym); ++ rk_dev = algt->rk_dev; + -+ return res.a0; ++ if (!rk_dev->request_crypto) ++ return -EFAULT; ++ ++ rk_dev->request_crypto(rk_dev, "rsa"); ++ ++ alg_ctx->align_size = crypto_tfm_alg_alignmask(&tfm->base) + 1; ++ ++ alg_ctx->ops.start = rk_rsa_start; ++ alg_ctx->ops.update = rk_rsa_crypto_rx; ++ alg_ctx->ops.complete = rk_rsa_complete; ++ ++ ctx->rk_dev = rk_dev; ++ ++ rk_pka_set_crypto_base(ctx->rk_dev->pka_reg); ++ ++ return 0; +} + -+static int rockchip_dmcfreq_set_volt(struct device *dev, struct regulator *reg, -+ struct dev_pm_opp_supply *supply, -+ char *reg_name) ++static void rk_rsa_exit_tfm(struct crypto_akcipher *tfm) +{ -+ int ret; ++ struct rk_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + -+ dev_dbg(dev, "%s: %s voltages (mV): %lu %lu %lu\n", __func__, reg_name, -+ supply->u_volt_min, supply->u_volt, supply->u_volt_max); -+ ret = regulator_set_voltage_triplet(reg, supply->u_volt_min, -+ supply->u_volt, INT_MAX); -+ if (ret) -+ dev_err(dev, "%s: failed to set voltage (%lu %lu %lu mV): %d\n", -+ __func__, supply->u_volt_min, supply->u_volt, -+ supply->u_volt_max, ret); ++ CRYPTO_TRACE(); + -+ return ret; ++ rk_rsa_clear_ctx(ctx); ++ ++ ctx->rk_dev->release_crypto(ctx->rk_dev, "rsa"); +} + -+static int rockchip_dmcfreq_opp_set_rate(struct device *dev, -+ struct rockchip_dmcfreq *dmcfreq, -+ unsigned long *freq) -+{ -+ struct rockchip_opp_info *opp_info = &dmcfreq->opp_info; -+ unsigned int reg_count = opp_info->regulator_count; -+ struct regulator *vdd_reg = NULL; -+ struct regulator *mem_reg = NULL; -+ struct clk *clk = opp_info->clk; -+ struct dev_pm_opp *opp; -+ struct dev_pm_opp_supply supplies[2]; -+ unsigned long old_freq = dmcfreq->rate; -+ unsigned long new_freq; -+ int old_volt = dmcfreq->volt; -+ int old_volt_mem = dmcfreq->mem_volt; -+ struct cpufreq_policy *policy; -+ bool is_cpufreq_changed = false; -+ unsigned int cpu_cur, cpufreq_cur; -+ int ret = 0; ++struct rk_crypto_algt rk_v2_asym_rsa = { ++ .name = "rsa", ++ .type = ALG_TYPE_ASYM, ++ .alg.asym = { ++ .encrypt = rk_rsa_enc, ++ .decrypt = rk_rsa_dec, ++ .set_pub_key = rk_rsa_setpubkey, ++ .set_priv_key = rk_rsa_setprivkey, ++ .max_size = rk_rsa_max_size, ++ .init = rk_rsa_init_tfm, ++ .exit = rk_rsa_exit_tfm, ++ .base = { ++ .cra_name = "rsa", ++ .cra_driver_name = "rsa-rk", ++ .cra_priority = RK_CRYPTO_PRIORITY, ++ .cra_module = THIS_MODULE, ++ .cra_ctxsize = sizeof(struct rk_rsa_ctx), ++ }, ++ }, ++}; + -+ vdd_reg = opp_info->regulators[0]; -+ if (reg_count > 1) -+ mem_reg = opp_info->regulators[1]; +diff --git a/drivers/crypto/rockchip/rk_crypto_v2_pka.c b/drivers/crypto/rockchip/rk_crypto_v2_pka.c +new file mode 100644 +index 000000000..d2c0a265b +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_v2_pka.c +@@ -0,0 +1,686 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (c) 2020 Rockchip Electronics Co., Ltd. ++ */ + -+ opp = dev_pm_opp_find_freq_ceil(dev, freq); -+ if (IS_ERR(opp)) { -+ opp = dev_pm_opp_find_freq_floor(dev, freq); -+ if (IS_ERR(opp)) -+ return PTR_ERR(opp); -+ } -+ new_freq = *freq; -+ ret = dev_pm_opp_get_supplies(opp, supplies); -+ if (ret) -+ return ret; -+ dev_pm_opp_put(opp); ++#include + -+ /* -+ * We need to prevent cpu hotplug from happening while a dmc freq rate -+ * change is happening. -+ * -+ * Do this before taking the policy rwsem to avoid deadlocks between the -+ * mutex that is locked/unlocked in cpu_hotplug_disable/enable. And it -+ * can also avoid deadlocks between the mutex that is locked/unlocked -+ * in cpus_read_lock/unlock (such as store_scaling_max_freq()). -+ */ -+ cpus_read_lock(); ++#include "rk_crypto_core.h" ++#include "rk_crypto_v2.h" ++#include "rk_crypto_v2_reg.h" ++#include "rk_crypto_v2_pka.h" + -+ if (dmcfreq->min_cpu_freq) { -+ /* -+ * Go to specified cpufreq and block other cpufreq changes since -+ * set_rate needs to complete during vblank. -+ */ -+ cpu_cur = raw_smp_processor_id(); -+ policy = cpufreq_cpu_get(cpu_cur); -+ if (!policy) { -+ dev_err(dev, "cpu%d policy NULL\n", cpu_cur); -+ ret = -EINVAL; -+ goto cpus_unlock; -+ } -+ down_write(&policy->rwsem); -+ cpufreq_cur = cpufreq_quick_get(cpu_cur); ++#define PKA_WORDS2BITS(words) ((words) * 32) ++#define PKA_BITS2WORDS(bits) (((bits) + 31) / 32) + -+ /* If we're thermally throttled; don't change; */ -+ if (cpufreq_cur < dmcfreq->min_cpu_freq) { -+ if (policy->max >= dmcfreq->min_cpu_freq) { -+ __cpufreq_driver_target(policy, -+ dmcfreq->min_cpu_freq, -+ CPUFREQ_RELATION_L); -+ is_cpufreq_changed = true; -+ } else { -+ dev_dbg(dev, -+ "CPU may too slow for DMC (%d MHz)\n", -+ policy->max); -+ } -+ } -+ } ++#define PKA_WORDS2BYTES(words) ((words) * 4) ++#define PKA_BYTES2BITS(bytes) ((bytes) * 8) + -+ /* Scaling up? Scale voltage before frequency */ -+ if (new_freq >= old_freq) { -+ if (reg_count > 1) { -+ ret = rockchip_dmcfreq_set_volt(dev, mem_reg, -+ &supplies[1], "mem"); -+ if (ret) -+ goto restore_voltage; -+ } -+ ret = rockchip_dmcfreq_set_volt(dev, vdd_reg, &supplies[0], "vdd"); -+ if (ret) -+ goto restore_voltage; -+ if (new_freq == old_freq) -+ goto out; -+ } ++/* PKA length set */ ++enum { ++ PKA_EXACT_LEN_ID = 0, ++ PKA_CALC_LEN_ID, ++ PKA_USED_LEN_MAX, ++}; + -+ /* -+ * Writer in rwsem may block readers even during its waiting in queue, -+ * and this may lead to a deadlock when the code path takes read sem -+ * twice (e.g. one in vop_lock() and another in rockchip_pmu_lock()). -+ * As a (suboptimal) workaround, let writer to spin until it gets the -+ * lock. -+ */ -+ while (!rockchip_dmcfreq_write_trylock()) -+ cond_resched(); -+ dev_dbg(dev, "%lu Hz --> %lu Hz\n", old_freq, new_freq); ++/********************* Private MACRO Definition ******************************/ ++#define PKA_POLL_PERIOD_US 1000 ++#define PKA_POLL_TIMEOUT_US 50000 + -+ if (dmcfreq->set_rate_params) { -+ dmcfreq->set_rate_params->lcdc_type = rk_drm_get_lcdc_type(); -+ dmcfreq->set_rate_params->wait_flag1 = 1; -+ dmcfreq->set_rate_params->wait_flag0 = 1; -+ } ++/* for private key EXP_MOD operation */ ++#define PKA_MAX_POLL_PERIOD_US 20000 ++#define PKA_MAX_POLL_TIMEOUT_US 2000000 + -+ if (dmcfreq->is_set_rate_direct) -+ ret = rockchip_ddr_set_rate(new_freq); -+ else -+ ret = clk_set_rate(clk, new_freq); ++#define PKA_MAX_CALC_BITS 4096 ++#define PKA_MAX_CALC_WORDS PKA_BITS2WORDS(PKA_MAX_CALC_BITS) + -+ rockchip_dmcfreq_write_unlock(); -+ if (ret) { -+ dev_err(dev, "%s: failed to set clock rate: %d\n", __func__, -+ ret); -+ goto restore_voltage; -+ } ++/* PKA N_NP_T0_T1 register default (reset) value: N=0, NP=1, T0=30, T1=31 */ ++#define PKA_N 0UL ++#define PKA_NP 1UL ++#define PKA_T0 30UL /*tmp reg */ ++#define PKA_T1 31UL /*tmp reg */ ++#define PKA_TMP_REG_CNT 2 + -+ /* -+ * Check the dpll rate, -+ * There only two result we will get, -+ * 1. Ddr frequency scaling fail, we still get the old rate. -+ * 2. Ddr frequency scaling successful, we get the rate we set. -+ */ -+ dmcfreq->rate = clk_get_rate(clk); ++#define PKA_N_NP_T0_T1_REG_DEFAULT \ ++ (PKA_N << CRYPTO_N_VIRTUAL_ADDR_SHIFT | \ ++ PKA_NP << CRYPTO_NP_VIRTUAL_ADDR_SHIFT | \ ++ PKA_T0 << CRYPTO_T0_VIRTUAL_ADDR_SHIFT | \ ++ PKA_T1 << CRYPTO_T1_VIRTUAL_ADDR_SHIFT) + -+ /* If get the incorrect rate, set voltage to old value. */ -+ if (dmcfreq->rate != new_freq) { -+ dev_err(dev, "Get wrong frequency, Request %lu, Current %lu\n", -+ new_freq, dmcfreq->rate); -+ ret = -EINVAL; -+ goto restore_voltage; -+ } ++#define RES_DISCARD 0x3F + -+ /* Scaling down? Scale voltage after frequency */ -+ if (new_freq < old_freq) { -+ ret = rockchip_dmcfreq_set_volt(dev, vdd_reg, &supplies[0], -+ "vdd"); -+ if (ret) -+ goto restore_freq; -+ if (reg_count > 1) { -+ ret = rockchip_dmcfreq_set_volt(dev, mem_reg, -+ &supplies[1], "mem"); -+ if (ret) -+ goto restore_freq; -+ } -+ } -+ dmcfreq->volt = supplies[0].u_volt; -+ if (reg_count > 1) -+ dmcfreq->mem_volt = supplies[1].u_volt; ++/* values for defining, that PKA entry is not in use */ ++#define PKA_ADDR_NOT_USED 0xFFC + -+ goto out; ++/* Machine Opcodes definitions (according to HW CRS ) */ + -+restore_freq: -+ if (dmcfreq->is_set_rate_direct) -+ ret = rockchip_ddr_set_rate(new_freq); -+ else -+ ret = clk_set_rate(clk, new_freq); -+ if (ret) -+ dev_err(dev, "%s: failed to restore old-freq (%lu Hz)\n", -+ __func__, old_freq); -+restore_voltage: -+ if (reg_count > 1 && old_volt_mem) -+ regulator_set_voltage(mem_reg, old_volt_mem, INT_MAX); -+ if (old_volt) -+ regulator_set_voltage(vdd_reg, old_volt, INT_MAX); -+out: -+ if (dmcfreq->min_cpu_freq) { -+ if (is_cpufreq_changed) -+ __cpufreq_driver_target(policy, cpufreq_cur, -+ CPUFREQ_RELATION_L); -+ up_write(&policy->rwsem); -+ cpufreq_cpu_put(policy); -+ } -+cpus_unlock: -+ cpus_read_unlock(); ++enum pka_opcode { ++ PKA_OPCODE_ADD = 0x04, ++ PKA_OPCODE_SUB, ++ PKA_OPCODE_MOD_ADD, ++ PKA_OPCODE_MOD_SUB, ++ PKA_OPCODE_AND, ++ PKA_OPCODE_OR, ++ PKA_OPCODE_XOR, ++ PKA_OPCODE_SHR0 = 0x0C, ++ PKA_OPCODE_SHR1, ++ PKA_OPCODE_SHL0, ++ PKA_OPCODE_SHL1, ++ PKA_OPCODE_LMUL, ++ PKA_OPCODE_MOD_MUL, ++ PKA_OPCODE_MOD_MUL_NR, ++ PKA_OPCODE_MOD_EXP, ++ PKA_OPCODE_DIV, ++ PKA_OPCODE_MOD_INV, ++ PKA_OPCODE_MOD_DIV, ++ PKA_OPCODE_HMUL, ++ PKA_OPCODE_TERMINATE, ++}; + -+ return ret; -+} ++#define PKA_CLK_ENABLE() ++#define PKA_CLK_DISABLE() + -+static int rockchip_dmcfreq_check_rate_volt(struct monitor_dev_info *info) -+{ -+ struct device *dev = info->dev; -+ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev); -+ struct rockchip_opp_info *opp_info = &dmcfreq->opp_info; -+ unsigned long cur_freq = dmcfreq->rate; ++#define PKA_READ(offset) readl_relaxed((pka_base) + (offset)) ++#define PKA_WRITE(val, offset) writel_relaxed((val), (pka_base) + (offset)) + -+ rockchip_opp_dvfs_lock(opp_info); -+ rockchip_dmcfreq_opp_set_rate(dev, dmcfreq, &cur_freq); -+ opp_info->is_rate_volt_checked = true; -+ rockchip_opp_dvfs_unlock(opp_info); ++#define PKA_BIGNUM_WORDS(x) (rk_bn_get_size(x) / sizeof(u32)) + -+ return 0; -+} ++#define PKA_RAM_FOR_PKA() PKA_WRITE((CRYPTO_RAM_PKA_RDY << CRYPTO_WRITE_MASK_SHIFT) | \ ++ CRYPTO_RAM_PKA_RDY, CRYPTO_RAM_CTL) + -+static int rockchip_dmcfreq_target(struct device *dev, unsigned long *freq, -+ u32 flags) ++#define PKA_RAM_FOR_CPU() do { \ ++ PKA_WRITE((CRYPTO_RAM_PKA_RDY << CRYPTO_WRITE_MASK_SHIFT), CRYPTO_RAM_CTL); \ ++ while ((PKA_READ(CRYPTO_RAM_ST) & 0x01) != CRYPTO_CLK_RAM_RDY) \ ++ cpu_relax(); \ ++} while (0) ++ ++#define PKA_GET_SRAM_ADDR(addr) ((void *)(pka_base + CRYPTO_SRAM_BASE + (addr))) ++ ++/************************************************************************* ++ * Macros for calling PKA operations (names according to operation issue * ++ *************************************************************************/ ++ ++/*--------------------------------------*/ ++/* 1. ADD - SUBTRACT operations */ ++/*--------------------------------------*/ ++/* Add: res = op_a + op_b */ ++#define RK_PKA_ADD(op_a, op_b, res) pka_exec_op(PKA_OPCODE_ADD, PKA_CALC_LEN_ID, \ ++ 0, (op_a), 0, (op_b), 0, (res), 0) ++ ++/* Clr: res = op_a & 0 - clears the operand A. */ ++#define RK_PKA_CLR(op_a) pka_exec_op(PKA_OPCODE_AND, PKA_CALC_LEN_ID, \ ++ 0, (op_a), 1, 0x00, 0, (op_a), 0) ++ ++/* Copy: OpDest = OpSrc || 0 */ ++#define RK_PKA_COPY(op_dest, op_src) pka_exec_op(PKA_OPCODE_OR, PKA_CALC_LEN_ID, \ ++ 0, (op_src), 1, 0x00, 0, (op_dest), 0) ++ ++/* Set0: res = op_a || 1 : set bit0 = 1, other bits are not changed */ ++#define RK_PKA_SET_0(op_a, res) pka_exec_op(PKA_OPCODE_OR, PKA_CALC_LEN_ID, \ ++ 0, (op_a), 1, 0x01, 0, (res), 0) ++ ++/*----------------------------------------------*/ ++/* 3. SHIFT operations */ ++/*----------------------------------------------*/ ++/* SHL0: res = op_a << (S+1) : ++ * shifts left operand A by S+1 bits, insert 0 to right most bits ++ */ ++#define RK_PKA_SHL0(op_a, S, res) pka_exec_op(PKA_OPCODE_SHL0, PKA_CALC_LEN_ID, \ ++ 0, (op_a), 0, (S), 0, (res), 0) ++ ++/* SHL1: res = op_a << (S+1) : ++ * shifts left operand A by S+1 bits, insert 1 to right most bits ++ */ ++#define RK_PKA_SHL1(op_a, S, res) pka_exec_op(PKA_OPCODE_SHL1, PKA_CALC_LEN_ID, \ ++ 0, (op_a), 0, (S), 0, (res), 0) ++ ++/*--------------------------------------------------------------*/ ++/* 2. Multiplication and other operations */ ++/* Note: See notes to RK_PKAExecOperation */ ++/*--------------------------------------------------------------*/ ++ ++/* ModExp: res = op_a ** op_b mod N - modular exponentiation */ ++#define RK_PKA_MOD_EXP(op_a, op_b, res) \ ++ pka_exec_op(PKA_OPCODE_MOD_EXP, PKA_EXACT_LEN_ID, 0, (op_a), \ ++ 0, (op_b), 0, (res), 0) ++ ++/* Divide: res = op_a / op_b , op_a = op_a mod op_b - division, */ ++#define RK_PKA_DIV(op_a, op_b, res) pka_exec_op(PKA_OPCODE_DIV, PKA_CALC_LEN_ID, \ ++ 0, (op_a), 0, (op_b), 0, (res), 0) ++ ++/* Terminate - special operation, which allows HOST access */ ++/* to PKA data memory registers after end of PKA operations */ ++#define RK_PKA_TERMINATE() pka_exec_op(PKA_OPCODE_TERMINATE, 0, 0, 0, 0, 0, 0, 0, 0) ++ ++/********************* Private Variable Definition ***************************/ ++static void __iomem *pka_base; ++ ++static void pka_word_memcpy(u32 *dst, u32 *src, u32 size) +{ -+ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev); -+ struct rockchip_opp_info *opp_info = &dmcfreq->opp_info; -+ struct devfreq *devfreq; -+ int ret = 0; ++ u32 i; + -+ if (!opp_info->is_rate_volt_checked) -+ return -EINVAL; ++ for (i = 0; i < size; i++, dst++) ++ writel_relaxed(src[i], (void *)dst); ++} + -+ rockchip_opp_dvfs_lock(opp_info); -+ if (dmcfreq->rate != *freq) { -+ ret = rockchip_dmcfreq_opp_set_rate(dev, dmcfreq, freq); -+ if (!ret) { -+ if (dmcfreq->info.devfreq) { -+ devfreq = dmcfreq->info.devfreq; -+ devfreq->last_status.current_frequency = *freq; -+ } -+ } -+ } -+ rockchip_opp_dvfs_unlock(opp_info); ++static void pka_word_memset(u32 *buff, u32 val, u32 size) ++{ ++ u32 i; + -+ return ret; ++ for (i = 0; i < size; i++, buff++) ++ writel_relaxed(val, (void *)buff); +} + -+static int rockchip_dmcfreq_get_dev_status(struct device *dev, -+ struct devfreq_dev_status *stat) ++static int pka_wait_pipe_rdy(void) +{ -+ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev); -+ struct rockchip_opp_info *opp_info = &dmcfreq->opp_info; -+ struct devfreq_event_data edata; -+ int i, ret = 0; ++ u32 reg_val = 0; + -+ if (!dmcfreq->info.auto_freq_en) -+ return -EINVAL; ++ return readx_poll_timeout(PKA_READ, CRYPTO_PKA_PIPE_RDY, reg_val, ++ reg_val, PKA_POLL_PERIOD_US, PKA_POLL_TIMEOUT_US); ++} + -+ /* -+ * RK3588 platform may crash if the CPU and MCU access the DFI/DMC -+ * registers at same time. -+ */ -+ rockchip_opp_dvfs_lock(opp_info); -+ for (i = 0; i < dmcfreq->edev_count; i++) { -+ ret = devfreq_event_get_event(dmcfreq->edev[i], &edata); -+ if (ret < 0) { -+ dev_err(dev, "failed to get event %s\n", -+ dmcfreq->edev[i]->desc->name); -+ goto out; -+ } -+ if (i == dmcfreq->dfi_id) { -+ stat->busy_time = edata.load_count; -+ stat->total_time = edata.total_count; -+ } else { -+ dmcfreq->nocp_bw[i] = edata.load_count; -+ } -+ } ++static int pka_wait_done(void) ++{ ++ u32 reg_val = 0; + -+out: -+ rockchip_opp_dvfs_unlock(opp_info); ++ return readx_poll_timeout(PKA_READ, CRYPTO_PKA_DONE, reg_val, ++ reg_val, PKA_POLL_PERIOD_US, PKA_POLL_TIMEOUT_US); ++} + -+ return ret; ++static int pka_max_wait_done(void) ++{ ++ u32 reg_val = 0; ++ ++ return readx_poll_timeout(PKA_READ, CRYPTO_PKA_DONE, reg_val, ++ reg_val, PKA_MAX_POLL_PERIOD_US, PKA_MAX_POLL_TIMEOUT_US); +} + -+static int rockchip_dmcfreq_get_cur_freq(struct device *dev, -+ unsigned long *freq) ++static u32 pka_check_status(u32 mask) +{ -+ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev); ++ u32 status; + -+ *freq = dmcfreq->rate; ++ pka_wait_done(); ++ status = PKA_READ(CRYPTO_PKA_STATUS); ++ status = status & mask; + -+ return 0; ++ return !!status; ++} ++static void pka_set_len_words(u32 words, u32 index) ++{ ++ PKA_WRITE(PKA_WORDS2BITS(words), CRYPTO_PKA_L0 + index * sizeof(u32)); +} + -+static struct devfreq_dev_profile rockchip_devfreq_dmc_profile = { -+ .polling_ms = 50, -+ .target = rockchip_dmcfreq_target, -+ .get_dev_status = rockchip_dmcfreq_get_dev_status, -+ .get_cur_freq = rockchip_dmcfreq_get_cur_freq, -+ .is_cooling_device = true, -+}; ++static u32 pka_get_len_words(u32 index) ++{ ++ pka_wait_done(); ++ return PKA_BITS2WORDS(PKA_READ(CRYPTO_PKA_L0 + (index) * sizeof(u32))); ++} + -+static inline void reset_last_status(struct devfreq *devfreq) ++static void pka_set_map_addr(u32 addr, u32 index) +{ -+ devfreq->last_status.total_time = 1; -+ devfreq->last_status.busy_time = 1; ++ PKA_WRITE(addr, CRYPTO_MEMORY_MAP0 + sizeof(u32) * index); +} + -+static void of_get_px30_timings(struct device *dev, -+ struct device_node *np, uint32_t *timing) ++static u32 pka_get_map_addr(u32 index) +{ -+ struct device_node *np_tim; -+ u32 *p; -+ struct px30_ddr_dts_config_timing *dts_timing; -+ struct rk3328_ddr_de_skew_setting *de_skew; -+ int ret = 0; -+ u32 i; ++ pka_wait_done(); ++ return PKA_READ(CRYPTO_MEMORY_MAP0 + sizeof(u32) * (index)); ++} + -+ dts_timing = -+ (struct px30_ddr_dts_config_timing *)(timing + -+ DTS_PAR_OFFSET / 4); ++static u32 pka_make_full_opcode(u32 opcode, u32 len_id, ++ u32 is_a_immed, u32 op_a, ++ u32 is_b_immed, u32 op_b, ++ u32 res_discard, u32 res, ++ u32 tag) ++{ ++ u32 full_opcode; + -+ np_tim = of_parse_phandle(np, "ddr_timing", 0); -+ if (!np_tim) { -+ ret = -EINVAL; -+ goto end; -+ } -+ de_skew = kmalloc(sizeof(*de_skew), GFP_KERNEL); -+ if (!de_skew) { -+ ret = -ENOMEM; -+ goto end; -+ } -+ p = (u32 *)dts_timing; -+ for (i = 0; i < ARRAY_SIZE(px30_dts_timing); i++) { -+ ret |= of_property_read_u32(np_tim, px30_dts_timing[i], -+ p + i); -+ } -+ p = (u32 *)de_skew->ca_de_skew; -+ for (i = 0; i < ARRAY_SIZE(rk3328_dts_ca_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rk3328_dts_ca_timing[i], -+ p + i); -+ } -+ p = (u32 *)de_skew->cs0_de_skew; -+ for (i = 0; i < ARRAY_SIZE(rk3328_dts_cs0_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rk3328_dts_cs0_timing[i], -+ p + i); -+ } -+ p = (u32 *)de_skew->cs1_de_skew; -+ for (i = 0; i < ARRAY_SIZE(rk3328_dts_cs1_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rk3328_dts_cs1_timing[i], -+ p + i); -+ } -+ if (!ret) -+ px30_de_skew_set_2_reg(de_skew, dts_timing); -+ kfree(de_skew); -+end: -+ if (!ret) { -+ dts_timing->available = 1; -+ } else { -+ dts_timing->available = 0; -+ dev_err(dev, "of_get_ddr_timings: fail\n"); -+ } ++ full_opcode = ((opcode & 31) << CRYPTO_OPCODE_CODE_SHIFT | ++ (len_id & 7) << CRYPTO_OPCODE_LEN_SHIFT | ++ (is_a_immed & 1) << CRYPTO_OPCODE_A_IMMED_SHIFT | ++ (op_a & 31) << CRYPTO_OPCODE_A_SHIFT | ++ (is_b_immed & 1) << CRYPTO_OPCODE_B_IMMED_SHIFT | ++ (op_b & 31) << CRYPTO_OPCODE_B_SHIFT | ++ (res_discard & 1) << CRYPTO_OPCODE_R_DIS_SHIFT | ++ (res & 31) << CRYPTO_OPCODE_R_SHIFT | ++ (tag & 31) << CRYPTO_OPCODE_TAG_SHIFT); + -+ of_node_put(np_tim); ++ return full_opcode; +} + -+static void of_get_rk1808_timings(struct device *dev, -+ struct device_node *np, uint32_t *timing) ++static void pka_load_data(u32 addr, u32 *data, u32 size_words) +{ -+ struct device_node *np_tim; -+ u32 *p; -+ struct rk1808_ddr_dts_config_timing *dts_timing; -+ int ret = 0; -+ u32 i; ++ pka_wait_done(); + -+ dts_timing = -+ (struct rk1808_ddr_dts_config_timing *)(timing + -+ DTS_PAR_OFFSET / 4); ++ PKA_RAM_FOR_CPU(); ++ pka_word_memcpy(PKA_GET_SRAM_ADDR(addr), data, size_words); ++ PKA_RAM_FOR_PKA(); ++} + -+ np_tim = of_parse_phandle(np, "ddr_timing", 0); -+ if (!np_tim) { -+ ret = -EINVAL; -+ goto end; -+ } ++static void pka_clr_mem(u32 addr, u32 size_words) ++{ ++ pka_wait_done(); + -+ p = (u32 *)dts_timing; -+ for (i = 0; i < ARRAY_SIZE(px30_dts_timing); i++) { -+ ret |= of_property_read_u32(np_tim, px30_dts_timing[i], -+ p + i); -+ } -+ p = (u32 *)dts_timing->ca_de_skew; -+ for (i = 0; i < ARRAY_SIZE(rk1808_dts_ca_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rk1808_dts_ca_timing[i], -+ p + i); -+ } -+ p = (u32 *)dts_timing->cs0_a_de_skew; -+ for (i = 0; i < ARRAY_SIZE(rk1808_dts_cs0_a_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rk1808_dts_cs0_a_timing[i], -+ p + i); -+ } -+ p = (u32 *)dts_timing->cs0_b_de_skew; -+ for (i = 0; i < ARRAY_SIZE(rk1808_dts_cs0_b_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rk1808_dts_cs0_b_timing[i], -+ p + i); -+ } -+ p = (u32 *)dts_timing->cs1_a_de_skew; -+ for (i = 0; i < ARRAY_SIZE(rk1808_dts_cs1_a_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rk1808_dts_cs1_a_timing[i], -+ p + i); -+ } -+ p = (u32 *)dts_timing->cs1_b_de_skew; -+ for (i = 0; i < ARRAY_SIZE(rk1808_dts_cs1_b_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rk1808_dts_cs1_b_timing[i], -+ p + i); -+ } ++ PKA_RAM_FOR_CPU(); ++ pka_word_memset(PKA_GET_SRAM_ADDR(addr), 0x00, size_words); ++ PKA_RAM_FOR_PKA(); ++} + -+end: -+ if (!ret) { -+ dts_timing->available = 1; -+ } else { -+ dts_timing->available = 0; -+ dev_err(dev, "of_get_ddr_timings: fail\n"); -+ } ++static void pka_read_data(u32 addr, u32 *data, u32 size_words) ++{ ++ pka_wait_done(); + -+ of_node_put(np_tim); ++ PKA_RAM_FOR_CPU(); ++ pka_word_memcpy(data, PKA_GET_SRAM_ADDR(addr), size_words); ++ PKA_RAM_FOR_PKA(); +} + -+static void of_get_rk3128_timings(struct device *dev, -+ struct device_node *np, uint32_t *timing) ++static int pka_exec_op(enum pka_opcode opcode, u8 len_id, ++ u8 is_a_immed, u8 op_a, u8 is_b_immed, u8 op_b, ++ u8 res_discard, u8 res, u8 tag) +{ -+ struct device_node *np_tim; -+ u32 *p; -+ struct rk3128_ddr_dts_config_timing *dts_timing; -+ struct share_params *init_timing; + int ret = 0; -+ u32 i; ++ u32 full_opcode; + -+ init_timing = (struct share_params *)timing; ++ if (res == RES_DISCARD) { ++ res_discard = 1; ++ res = 0; ++ } + -+ if (of_property_read_u32(np, "vop-dclk-mode", -+ &init_timing->vop_dclk_mode)) -+ init_timing->vop_dclk_mode = 0; ++ full_opcode = pka_make_full_opcode(opcode, len_id, ++ is_a_immed, op_a, ++ is_b_immed, op_b, ++ res_discard, res, tag); + -+ p = timing + DTS_PAR_OFFSET / 4; -+ np_tim = of_parse_phandle(np, "rockchip,ddr_timing", 0); -+ if (!np_tim) { -+ ret = -EINVAL; -+ goto end; -+ } -+ for (i = 0; i < ARRAY_SIZE(rk3128_dts_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rk3128_dts_timing[i], -+ p + i); ++ /* write full opcode into PKA CRYPTO_OPCODE register */ ++ PKA_WRITE(full_opcode, CRYPTO_OPCODE); ++ ++ /*************************************************/ ++ /* finishing operations for different cases */ ++ /*************************************************/ ++ switch (opcode) { ++ case PKA_OPCODE_DIV: ++ /* for Div operation check, that op_b != 0*/ ++ if (pka_check_status(CRYPTO_PKA_DIV_BY_ZERO)) ++ goto end; ++ break; ++ case PKA_OPCODE_TERMINATE: ++ /* wait for PKA done bit */ ++ ret = pka_wait_done(); ++ break; ++ default: ++ /* wait for PKA pipe ready bit */ ++ ret = pka_wait_pipe_rdy(); + } +end: -+ dts_timing = -+ (struct rk3128_ddr_dts_config_timing *)(timing + -+ DTS_PAR_OFFSET / 4); -+ if (!ret) { -+ dts_timing->available = 1; -+ } else { -+ dts_timing->available = 0; -+ dev_err(dev, "of_get_ddr_timings: fail\n"); -+ } -+ -+ of_node_put(np_tim); ++ return ret; +} + -+static uint32_t of_get_rk3228_timings(struct device *dev, -+ struct device_node *np, uint32_t *timing) ++static int pk_int_len_tbl(u32 exact_size_words, u32 calc_size_words) +{ -+ struct device_node *np_tim; -+ u32 *p; -+ int ret = 0; + u32 i; + -+ p = timing + DTS_PAR_OFFSET / 4; -+ np_tim = of_parse_phandle(np, "rockchip,dram_timing", 0); -+ if (!np_tim) { -+ ret = -EINVAL; -+ goto end; -+ } -+ for (i = 0; i < ARRAY_SIZE(rk3228_dts_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rk3228_dts_timing[i], -+ p + i); -+ } -+end: -+ if (ret) -+ dev_err(dev, "of_get_ddr_timings: fail\n"); ++ /* clear all length reg */ ++ for (i = 0; i < CRYPTO_LEN_REG_NUM; i++) ++ pka_set_len_words(0, i); + -+ of_node_put(np_tim); -+ return ret; ++ /* Case of default settings */ ++ /* write exact size into first table entry */ ++ pka_set_len_words(exact_size_words, PKA_EXACT_LEN_ID); ++ ++ /* write size with extra word into tab[1] = tab[0] + 32 */ ++ pka_set_len_words(calc_size_words, PKA_CALC_LEN_ID); ++ ++ return 0; +} + -+static void of_get_rk3288_timings(struct device *dev, -+ struct device_node *np, uint32_t *timing) ++static int pka_int_map_tbl(u32 *regs_cnt, u32 max_size_words) +{ -+ struct device_node *np_tim; -+ u32 *p; -+ struct rk3288_ddr_dts_config_timing *dts_timing; -+ struct share_params *init_timing; -+ int ret = 0; + u32 i; ++ u32 cur_addr = 0; ++ u32 max_size_bytes, default_regs_cnt; + -+ init_timing = (struct share_params *)timing; ++ max_size_bytes = PKA_WORDS2BYTES(max_size_words); ++ default_regs_cnt = ++ min_t(u32, CRYPTO_MAP_REG_NUM, CRYPTO_SRAM_SIZE / max_size_bytes); + -+ if (of_property_read_u32(np, "vop-dclk-mode", -+ &init_timing->vop_dclk_mode)) -+ init_timing->vop_dclk_mode = 0; ++ /* clear all address */ ++ for (i = 0; i < CRYPTO_MAP_REG_NUM; i++) ++ pka_set_map_addr(PKA_ADDR_NOT_USED, i); + -+ p = timing + DTS_PAR_OFFSET / 4; -+ np_tim = of_parse_phandle(np, "rockchip,ddr_timing", 0); -+ if (!np_tim) { -+ ret = -EINVAL; -+ goto end; -+ } -+ for (i = 0; i < ARRAY_SIZE(rk3288_dts_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rk3288_dts_timing[i], -+ p + i); -+ } -+end: -+ dts_timing = -+ (struct rk3288_ddr_dts_config_timing *)(timing + -+ DTS_PAR_OFFSET / 4); -+ if (!ret) { -+ dts_timing->available = 1; -+ } else { -+ dts_timing->available = 0; -+ dev_err(dev, "of_get_ddr_timings: fail\n"); -+ } ++ /* set addresses of N,NP and user requested registers (excluding 2 temp registers T0,T1) */ ++ for (i = 0; i < default_regs_cnt - PKA_TMP_REG_CNT; i++, cur_addr += max_size_bytes) ++ pka_set_map_addr(cur_addr, i); + -+ of_node_put(np_tim); ++ /* set addresses of 2 temp registers: T0=30, T1=31 */ ++ pka_set_map_addr(cur_addr, PKA_T0); ++ cur_addr += max_size_bytes; ++ pka_set_map_addr(cur_addr, PKA_T1); ++ ++ /* output maximal count of allowed registers */ ++ *regs_cnt = default_regs_cnt; ++ ++ /* set default virtual addresses of N,NP,T0,T1 registers into N_NP_T0_T1_Reg */ ++ PKA_WRITE((u32)PKA_N_NP_T0_T1_REG_DEFAULT, CRYPTO_N_NP_T0_T1_ADDR); ++ ++ return 0; +} + -+static void of_get_rk3328_timings(struct device *dev, -+ struct device_node *np, uint32_t *timing) ++static int pka_clear_regs_block(u8 first_reg, u8 regs_cnt) +{ -+ struct device_node *np_tim; -+ u32 *p; -+ struct rk3328_ddr_dts_config_timing *dts_timing; -+ struct rk3328_ddr_de_skew_setting *de_skew; -+ int ret = 0; + u32 i; ++ u32 size_words; ++ int cnt_tmps = 0; ++ u32 user_reg_num = CRYPTO_MAP_REG_NUM - PKA_TMP_REG_CNT; + -+ dts_timing = -+ (struct rk3328_ddr_dts_config_timing *)(timing + -+ DTS_PAR_OFFSET / 4); ++ /* calculate size_words of register in words */ ++ size_words = pka_get_len_words(PKA_CALC_LEN_ID); + -+ np_tim = of_parse_phandle(np, "ddr_timing", 0); -+ if (!np_tim) { -+ ret = -EINVAL; -+ goto end; -+ } -+ de_skew = kmalloc(sizeof(*de_skew), GFP_KERNEL); -+ if (!de_skew) { -+ ret = -ENOMEM; -+ goto end; -+ } -+ p = (u32 *)dts_timing; -+ for (i = 0; i < ARRAY_SIZE(rk3328_dts_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rk3328_dts_timing[i], -+ p + i); -+ } -+ p = (u32 *)de_skew->ca_de_skew; -+ for (i = 0; i < ARRAY_SIZE(rk3328_dts_ca_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rk3328_dts_ca_timing[i], -+ p + i); -+ } -+ p = (u32 *)de_skew->cs0_de_skew; -+ for (i = 0; i < ARRAY_SIZE(rk3328_dts_cs0_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rk3328_dts_cs0_timing[i], -+ p + i); -+ } -+ p = (u32 *)de_skew->cs1_de_skew; -+ for (i = 0; i < ARRAY_SIZE(rk3328_dts_cs1_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rk3328_dts_cs1_timing[i], -+ p + i); -+ } -+ if (!ret) -+ rk3328_de_skew_setting_2_register(de_skew, dts_timing); -+ kfree(de_skew); -+end: -+ if (!ret) { -+ dts_timing->available = 1; ++ if (first_reg + regs_cnt > user_reg_num) { ++ cnt_tmps = min_t(u8, (regs_cnt + first_reg - user_reg_num), PKA_TMP_REG_CNT); ++ regs_cnt = user_reg_num; + } else { -+ dts_timing->available = 0; -+ dev_err(dev, "of_get_ddr_timings: fail\n"); ++ cnt_tmps = PKA_TMP_REG_CNT; + } + -+ of_node_put(np_tim); ++ /* clear ordinary registers */ ++ for (i = first_reg; i < regs_cnt; i++) ++ RK_PKA_CLR(i); ++ ++ pka_wait_done(); ++ ++ /* clear PKA temp registers (without PKA operations) */ ++ if (cnt_tmps > 0) { ++ pka_clr_mem(pka_get_map_addr(PKA_T0), size_words); ++ if (cnt_tmps > 1) ++ pka_clr_mem(pka_get_map_addr(PKA_T1), size_words); ++ ++ } ++ ++ return 0; +} + -+static void of_get_rv1126_timings(struct device *dev, -+ struct device_node *np, uint32_t *timing) ++static int pka_init(u32 exact_size_words) +{ -+ struct device_node *np_tim; -+ u32 *p; -+ struct rk1808_ddr_dts_config_timing *dts_timing; -+ int ret = 0; -+ u32 i; ++ int ret; ++ u32 regs_cnt = 0; ++ u32 calc_size_words = exact_size_words + 1; + -+ dts_timing = -+ (struct rk1808_ddr_dts_config_timing *)(timing + -+ DTS_PAR_OFFSET / 4); ++ PKA_CLK_ENABLE(); ++ PKA_RAM_FOR_PKA(); + -+ np_tim = of_parse_phandle(np, "ddr_timing", 0); -+ if (!np_tim) { -+ ret = -EINVAL; -+ goto end; -+ } ++ if (exact_size_words > PKA_MAX_CALC_WORDS) ++ return -1; + -+ p = (u32 *)dts_timing; -+ for (i = 0; i < ARRAY_SIZE(px30_dts_timing); i++) { -+ ret |= of_property_read_u32(np_tim, px30_dts_timing[i], -+ p + i); -+ } -+ p = (u32 *)dts_timing->ca_de_skew; -+ for (i = 0; i < ARRAY_SIZE(rv1126_dts_ca_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rv1126_dts_ca_timing[i], -+ p + i); -+ } -+ p = (u32 *)dts_timing->cs0_a_de_skew; -+ for (i = 0; i < ARRAY_SIZE(rv1126_dts_cs0_a_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rv1126_dts_cs0_a_timing[i], -+ p + i); -+ } -+ p = (u32 *)dts_timing->cs0_b_de_skew; -+ for (i = 0; i < ARRAY_SIZE(rv1126_dts_cs0_b_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rv1126_dts_cs0_b_timing[i], -+ p + i); -+ } -+ p = (u32 *)dts_timing->cs1_a_de_skew; -+ for (i = 0; i < ARRAY_SIZE(rv1126_dts_cs1_a_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rv1126_dts_cs1_a_timing[i], -+ p + i); -+ } -+ p = (u32 *)dts_timing->cs1_b_de_skew; -+ for (i = 0; i < ARRAY_SIZE(rv1126_dts_cs1_b_timing); i++) { -+ ret |= of_property_read_u32(np_tim, rv1126_dts_cs1_b_timing[i], -+ p + i); -+ } ++ ret = pk_int_len_tbl(exact_size_words, calc_size_words); ++ if (ret) ++ goto exit; + -+end: -+ if (!ret) { -+ dts_timing->available = 1; -+ } else { -+ dts_timing->available = 0; -+ dev_err(dev, "of_get_ddr_timings: fail\n"); -+ } ++ ret = pka_int_map_tbl(®s_cnt, calc_size_words); ++ if (ret) ++ goto exit; + -+ of_node_put(np_tim); ++ /* clean PKA data memory */ ++ pka_clear_regs_block(0, regs_cnt - PKA_TMP_REG_CNT); ++ ++ /* clean temp PKA registers 30,31 */ ++ pka_clr_mem(pka_get_map_addr(PKA_T0), calc_size_words); ++ pka_clr_mem(pka_get_map_addr(PKA_T1), calc_size_words); ++ ++exit: ++ return ret; +} + -+static struct rk3399_dram_timing *of_get_rk3399_timings(struct device *dev, -+ struct device_node *np) ++static void pka_finish(void) +{ -+ struct rk3399_dram_timing *timing = NULL; -+ struct device_node *np_tim; -+ int ret; ++ RK_PKA_TERMINATE(); ++ PKA_CLK_DISABLE(); ++} + -+ np_tim = of_parse_phandle(np, "ddr_timing", 0); -+ if (np_tim) { -+ timing = devm_kzalloc(dev, sizeof(*timing), GFP_KERNEL); -+ if (!timing) -+ goto err; ++static void pka_copy_bn_into_reg(u8 dst_reg, struct rk_bignum *bn) ++{ ++ u32 cur_addr; ++ u32 size_words, bn_words; + -+ ret = of_property_read_u32(np_tim, "ddr3_speed_bin", -+ &timing->ddr3_speed_bin); -+ ret |= of_property_read_u32(np_tim, "pd_idle", -+ &timing->pd_idle); -+ ret |= of_property_read_u32(np_tim, "sr_idle", -+ &timing->sr_idle); -+ ret |= of_property_read_u32(np_tim, "sr_mc_gate_idle", -+ &timing->sr_mc_gate_idle); -+ ret |= of_property_read_u32(np_tim, "srpd_lite_idle", -+ &timing->srpd_lite_idle); -+ ret |= of_property_read_u32(np_tim, "standby_idle", -+ &timing->standby_idle); -+ ret |= of_property_read_u32(np_tim, "auto_lp_dis_freq", -+ &timing->auto_lp_dis_freq); -+ ret |= of_property_read_u32(np_tim, "ddr3_dll_dis_freq", -+ &timing->ddr3_dll_dis_freq); -+ ret |= of_property_read_u32(np_tim, "phy_dll_dis_freq", -+ &timing->phy_dll_dis_freq); -+ ret |= of_property_read_u32(np_tim, "ddr3_odt_dis_freq", -+ &timing->ddr3_odt_dis_freq); -+ ret |= of_property_read_u32(np_tim, "ddr3_drv", -+ &timing->ddr3_drv); -+ ret |= of_property_read_u32(np_tim, "ddr3_odt", -+ &timing->ddr3_odt); -+ ret |= of_property_read_u32(np_tim, "phy_ddr3_ca_drv", -+ &timing->phy_ddr3_ca_drv); -+ ret |= of_property_read_u32(np_tim, "phy_ddr3_dq_drv", -+ &timing->phy_ddr3_dq_drv); -+ ret |= of_property_read_u32(np_tim, "phy_ddr3_odt", -+ &timing->phy_ddr3_odt); -+ ret |= of_property_read_u32(np_tim, "lpddr3_odt_dis_freq", -+ &timing->lpddr3_odt_dis_freq); -+ ret |= of_property_read_u32(np_tim, "lpddr3_drv", -+ &timing->lpddr3_drv); -+ ret |= of_property_read_u32(np_tim, "lpddr3_odt", -+ &timing->lpddr3_odt); -+ ret |= of_property_read_u32(np_tim, "phy_lpddr3_ca_drv", -+ &timing->phy_lpddr3_ca_drv); -+ ret |= of_property_read_u32(np_tim, "phy_lpddr3_dq_drv", -+ &timing->phy_lpddr3_dq_drv); -+ ret |= of_property_read_u32(np_tim, "phy_lpddr3_odt", -+ &timing->phy_lpddr3_odt); -+ ret |= of_property_read_u32(np_tim, "lpddr4_odt_dis_freq", -+ &timing->lpddr4_odt_dis_freq); -+ ret |= of_property_read_u32(np_tim, "lpddr4_drv", -+ &timing->lpddr4_drv); -+ ret |= of_property_read_u32(np_tim, "lpddr4_dq_odt", -+ &timing->lpddr4_dq_odt); -+ ret |= of_property_read_u32(np_tim, "lpddr4_ca_odt", -+ &timing->lpddr4_ca_odt); -+ ret |= of_property_read_u32(np_tim, "phy_lpddr4_ca_drv", -+ &timing->phy_lpddr4_ca_drv); -+ ret |= of_property_read_u32(np_tim, "phy_lpddr4_ck_cs_drv", -+ &timing->phy_lpddr4_ck_cs_drv); -+ ret |= of_property_read_u32(np_tim, "phy_lpddr4_dq_drv", -+ &timing->phy_lpddr4_dq_drv); -+ ret |= of_property_read_u32(np_tim, "phy_lpddr4_odt", -+ &timing->phy_lpddr4_odt); -+ if (ret) { -+ devm_kfree(dev, timing); -+ goto err; -+ } -+ of_node_put(np_tim); -+ return timing; -+ } -+ -+err: -+ if (timing) { -+ devm_kfree(dev, timing); -+ timing = NULL; -+ } -+ of_node_put(np_tim); -+ return timing; -+} ++ RK_PKA_TERMINATE(); + -+static int rockchip_ddr_set_auto_self_refresh(uint32_t en) -+{ -+ struct arm_smccc_res res; ++ bn_words = PKA_BIGNUM_WORDS(bn); ++ size_words = pka_get_len_words(PKA_CALC_LEN_ID); ++ cur_addr = pka_get_map_addr(dst_reg); + -+ ddr_psci_param->sr_idle_en = en; -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_SET_AT_SR); ++ pka_load_data(cur_addr, bn->data, bn_words); ++ cur_addr += PKA_WORDS2BYTES(bn_words); + -+ return res.a0; ++ pka_clr_mem(cur_addr, size_words - bn_words); +} + -+struct dmcfreq_wait_ctrl_t { -+ wait_queue_head_t wait_wq; -+ int complt_irq; -+ int wait_flag; -+ int wait_en; -+ int wait_time_out_ms; -+ int dcf_en; -+ struct regmap *regmap_dcf; -+}; ++static int pka_copy_bn_from_reg(struct rk_bignum *bn, u32 size_words, u8 src_reg, bool is_max_poll) ++{ ++ int ret; + -+static struct dmcfreq_wait_ctrl_t wait_ctrl; ++ PKA_WRITE(0, CRYPTO_OPCODE); + -+static irqreturn_t wait_complete_irq(int irqno, void *dev_id) -+{ -+ struct dmcfreq_wait_ctrl_t *ctrl = dev_id; ++ ret = is_max_poll ? pka_max_wait_done() : pka_wait_done(); ++ if (ret) ++ return ret; + -+ ctrl->wait_flag = 0; -+ wake_up(&ctrl->wait_wq); -+ return IRQ_HANDLED; ++ pka_read_data(pka_get_map_addr(src_reg), bn->data, size_words); ++ ++ return 0; +} + -+static irqreturn_t wait_dcf_complete_irq(int irqno, void *dev_id) ++/*********** pka_div_bignum function **********************/ ++/** ++ * @brief The function divides long number A*(2^S) by B: ++ * res = A*(2^S) / B, remainder A = A*(2^S) % B. ++ * where: A,B - are numbers of size, which is not grate than, ++ * maximal operands size, ++ * and B > 2^S; ++ * S - exponent of binary factor of A. ++ * ^ - exponentiation operator. ++ * ++ * The function algorithm: ++ * ++ * 1. Let nWords = S/32; nBits = S % 32; ++ * 2. Set res = 0, r_t1 = op_a; ++ * 3. for(i=0; i<=nWords; i++) do: ++ * 3.1. if(i < nWords ) ++ * s1 = 32; ++ * else ++ * s1 = nBits; ++ * 3.2. r_t1 = r_t1 << s1; ++ * 3.3. call PKA_div for calculating the quotient and remainder: ++ * r_t2 = floor(r_t1/op_b) //quotient; ++ * r_t1 = r_t1 % op_b //remainder (is in r_t1 register); ++ * 3.4. res = (res << s1) + r_t2; ++ * end do; ++ * 4. Exit. ++ * ++ * Assuming: ++ * - 5 PKA registers are used: op_a, op_b, res, r_t1, r_t2. ++ * - The registers sizes and mapping tables are set on ++ * default mode according to operands size. ++ * - The PKA clocks are initialized. ++ * NOTE ! Operand op_a shall be overwritten by remainder. ++ * ++ * @param[in] len_id - ID of operation size (modSize+32). ++ * @param[in] op_a - Operand A: virtual register pointer of A. ++ * @param[in] S - exponent of binary factor of A. ++ * @param[in] op_b - Operand B: virtual register pointer of B. ++ * @param[in] res - Virtual register pointer for result quotient. ++ * @param[in] r_t1 - Virtual pointer to remainder. ++ * @param[in] r_t2 - Virtual pointer of temp register. ++ * ++ * @return int - On success 0 is returned: ++ * ++ */ ++static int pka_div_bignum(u8 op_a, u32 s, u8 op_b, u8 res, u8 r_t1, u8 r_t2) +{ -+ struct arm_smccc_res res; -+ struct dmcfreq_wait_ctrl_t *ctrl = dev_id; ++ u8 s1; ++ u32 i; ++ u32 n_bits, n_words; + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_POST_SET_RATE); -+ if (res.a0) -+ pr_err("%s: dram post set rate error:%lx\n", __func__, res.a0); ++ /* calculate shifting parameters (words and bits ) */ ++ n_words = ((u32)s + 31) / 32; ++ n_bits = (u32)s % 32; + -+ ctrl->wait_flag = 0; -+ wake_up(&ctrl->wait_wq); -+ return IRQ_HANDLED; -+} ++ /* copy operand op_a (including extra word) into temp reg r_t1 */ ++ RK_PKA_COPY(r_t1, op_a); + -+int rockchip_dmcfreq_wait_complete(void) -+{ -+ struct arm_smccc_res res; ++ /* set res = 0 (including extra word) */ ++ RK_PKA_CLR(res); + -+ if (!wait_ctrl.wait_en) { -+ pr_err("%s: Do not support time out!\n", __func__); -+ return 0; -+ } -+ wait_ctrl.wait_flag = -1; ++ /*----------------------------------------------------*/ ++ /* Step 1. Shifting and dividing loop */ ++ /*----------------------------------------------------*/ ++ for (i = 0; i < n_words; i++) { ++ /* 3.1 set shift value s1 */ ++ s1 = i > 0 ? 32 : n_bits; + -+ enable_irq(wait_ctrl.complt_irq); -+ /* -+ * CPUs only enter WFI when idle to make sure that -+ * FIQn can quick response. -+ */ -+ cpu_latency_qos_update_request(&pm_qos, 0); ++ /* 3.2. shift: r_t1 = r_t1 * 2**s1 (in code (s1-1), ++ * because PKA performs s+1 shifts) ++ */ ++ if (s1 > 0) ++ RK_PKA_SHL0(r_t1 /*op_a*/, (s1 - 1) /*s*/, r_t1 /*res*/); + -+ if (wait_ctrl.dcf_en == 1) { -+ /* start dcf */ -+ regmap_update_bits(wait_ctrl.regmap_dcf, 0x0, 0x1, 0x1); -+ } else if (wait_ctrl.dcf_en == 2) { -+ res = sip_smc_dram(0, 0, ROCKCHIP_SIP_CONFIG_MCU_START); -+ if (res.a0) { -+ pr_err("rockchip_sip_config_mcu_start error:%lx\n", res.a0); -+ return -ENOMEM; -+ } -+ } ++ /* 3.3. perform PKA_OPCODE_MOD_DIV for calculating a quotient ++ * r_t2 = floor(r_t1 / N) ++ * and remainder r_t1 = r_t1 % op_b ++ */ ++ RK_PKA_DIV(r_t1 /*op_a*/, op_b /*B*/, r_t2 /*res*/); + -+ wait_event_timeout(wait_ctrl.wait_wq, (wait_ctrl.wait_flag == 0), -+ msecs_to_jiffies(wait_ctrl.wait_time_out_ms)); ++ /* 3.4. res = res * 2**s1 + res; */ ++ if (s1 > 0) ++ RK_PKA_SHL0(res /*op_a*/, (s1 - 1) /*s*/, res /*res*/); + -+ /* -+ * If waiting for wait_ctrl.complt_irq times out, clear the IRQ and stop the MCU by -+ * sip_smc_dram(DRAM_POST_SET_RATE). -+ */ -+ if (wait_ctrl.dcf_en == 2 && wait_ctrl.wait_flag != 0) { -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ROCKCHIP_SIP_CONFIG_DRAM_POST_SET_RATE); -+ if (res.a0) -+ pr_err("%s: dram post set rate error:%lx\n", __func__, res.a0); ++ RK_PKA_ADD(res /*op_a*/, r_t2 /*op_b*/, res /*res*/); + } + -+ cpu_latency_qos_update_request(&pm_qos, PM_QOS_DEFAULT_VALUE); -+ disable_irq(wait_ctrl.complt_irq); ++ pka_wait_done(); + + return 0; -+} ++} /* END OF pka_div_bignum */ + -+static __maybe_unused int rockchip_get_freq_info(struct rockchip_dmcfreq *dmcfreq) ++static u32 pka_calc_and_init_np(struct rk_bignum *bn, u8 r_t0, u8 r_t1, u8 r_t2) +{ -+ struct arm_smccc_res res; -+ struct dev_pm_opp *opp; -+ struct dmc_freq_table *freq_table; -+ unsigned long rate; -+ int i, j, count, ret = 0; ++ int ret; ++ u32 i; ++ u32 s; ++ u32 mod_size_bits; ++ u32 num_bits, num_words; + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_GET_FREQ_INFO); -+ if (res.a0) { -+ dev_err(dmcfreq->dev, "rockchip_sip_config_dram_get_freq_info error:%lx\n", -+ res.a0); -+ return -ENOMEM; -+ } ++ /* Set s = 132 */ ++ s = 132; + -+ if (ddr_psci_param->freq_count == 0 || ddr_psci_param->freq_count > 6) { -+ dev_err(dmcfreq->dev, "it is no available frequencies!\n"); -+ return -EPERM; -+ } ++ mod_size_bits = PKA_BYTES2BITS(rk_bn_get_size(bn)); + -+ for (i = 0; i < ddr_psci_param->freq_count; i++) -+ dmcfreq->freq_info_rate[i] = ddr_psci_param->freq_info_mhz[i] * 1000000; -+ dmcfreq->freq_count = ddr_psci_param->freq_count; ++ CRYPTO_TRACE("size_bits = %u", mod_size_bits); + -+ /* update dmc_opp_table */ -+ count = dev_pm_opp_get_opp_count(dmcfreq->dev); -+ if (count <= 0) { -+ ret = count ? count : -ENODATA; -+ return ret; -+ } ++ /* copy modulus N into r0 register */ ++ pka_copy_bn_into_reg(PKA_N, bn); + -+ freq_table = kzalloc(sizeof(*freq_table) * count, GFP_KERNEL); -+ for (i = 0, rate = 0; i < count; i++, rate++) { -+ /* find next rate */ -+ opp = dev_pm_opp_find_freq_ceil(dmcfreq->dev, &rate); -+ if (IS_ERR(opp)) { -+ ret = PTR_ERR(opp); -+ dev_err(dmcfreq->dev, "failed to find OPP for freq %lu.\n", rate); -+ goto out; -+ } -+ freq_table[i].freq = rate; -+ freq_table[i].supplies[0].u_volt = dev_pm_opp_get_voltage(opp); -+ dev_pm_opp_put(opp); ++ /*--------------------------------------------------------------*/ ++ /* Step 1,2. Set registers: Set op_a = 2^(sizeN+32) */ ++ /* Registers using: 0 - N (is set in register 0, */ ++ /* 1 - NP, temp regs: r_t0 (A), r_t1, r_t2. */ ++ /* len_id: 0 - exact size, 1 - exact+32 bit */ ++ /*--------------------------------------------------------------*/ + -+ for (j = 0; j < dmcfreq->freq_count; j++) { -+ if (rate == dmcfreq->freq_info_rate[j]) -+ break; -+ } -+ if (j == dmcfreq->freq_count) -+ dev_pm_opp_disable(dmcfreq->dev, rate); -+ } ++ /* set register r_t0 = 0 */ ++ RK_PKA_CLR(r_t0); + -+ for (i = 0; i < dmcfreq->freq_count; i++) { -+ for (j = 0; j < count; j++) { -+ if (dmcfreq->freq_info_rate[i] == freq_table[j].freq) { -+ break; -+ } else if (dmcfreq->freq_info_rate[i] < freq_table[j].freq) { -+ dev_pm_opp_add(dmcfreq->dev, dmcfreq->freq_info_rate[i], -+ freq_table[j].supplies[0].u_volt); -+ break; -+ } -+ } -+ if (j == count) { -+ dev_err(dmcfreq->dev, "failed to match dmc_opp_table for %ld\n", -+ dmcfreq->freq_info_rate[i]); -+ if (i == 0) -+ ret = -EPERM; -+ else -+ dmcfreq->freq_count = i; -+ goto out; -+ } -+ } ++ /* calculate bit position of said bit in the word */ ++ num_bits = mod_size_bits % 32; ++ num_words = mod_size_bits / 32; ++ ++ CRYPTO_TRACE("num_bits = %u, num_words = %u, size_bits = %u", ++ num_bits, num_words, mod_size_bits); ++ ++ /* set 1 into register r_t0 */ ++ RK_PKA_SET_0(r_t0 /*op_a*/, r_t0 /*res*/); ++ ++ /* shift 1 to num_bits+31 position */ ++ if (num_bits > 0) ++ RK_PKA_SHL0(r_t0 /*op_a*/, num_bits - 1 /*s*/, r_t0 /*res*/); ++ ++ /* shift to word position */ ++ for (i = 0; i < num_words; i++) ++ RK_PKA_SHL0(r_t0 /*op_a*/, 31 /*s*/, r_t0 /*res*/); ++ ++ /*--------------------------------------------------------------*/ ++ /* Step 3. Dividing: PKA_NP = (r_t0 * 2**s) / N */ ++ /*--------------------------------------------------------------*/ ++ ret = pka_div_bignum(r_t0, s, PKA_N, PKA_NP, r_t1, r_t2); + -+out: -+ kfree(freq_table); + return ret; ++} /* END OF pka_calc_and_init_np */ ++ ++/********************* Public Function Definition ****************************/ ++ ++void rk_pka_set_crypto_base(void __iomem *base) ++{ ++ pka_base = base; +} + -+static __maybe_unused int -+rockchip_dmcfreq_adjust_opp_table(struct rockchip_dmcfreq *dmcfreq) ++/** ++ * @brief calculate exp mod. out = in ^ e mod n ++ * @param in: the point of input data bignum. ++ * @param e: the point of exponent bignum. ++ * @param n: the point of modulus bignum. ++ * @param out: the point of outputs bignum. ++ * @param pTmp: the point of tmpdata bignum. ++ * @return 0 for success ++ */ ++int rk_pka_expt_mod(struct rk_bignum *in, ++ struct rk_bignum *e, ++ struct rk_bignum *n, ++ struct rk_bignum *out) +{ -+ struct device *dev = dmcfreq->dev; -+ struct arm_smccc_res res; -+ struct dev_pm_opp *opp; -+ struct opp_table *opp_table; -+ struct dmc_freq_table *freq_table; -+ int i, j, count = 0, ret = 0; ++ int ret = -1; ++ u32 max_word_size; ++ bool is_max_poll; ++ u8 r_in = 2, r_e = 3, r_out = 4; ++ u8 r_t0 = 2, r_t1 = 3, r_t2 = 4; + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_GET_FREQ_INFO); -+ if (res.a0) { -+ dev_err(dev, "rockchip_sip_config_dram_get_freq_info error:%lx\n", -+ res.a0); -+ return -ENOMEM; ++ if (!in || !e || !n || !out || PKA_BIGNUM_WORDS(n) == 0) ++ return -1; ++ ++ max_word_size = PKA_BIGNUM_WORDS(n); ++ ++ ret = pka_init(max_word_size); ++ if (ret) { ++ CRYPTO_TRACE("pka_init error\n"); ++ goto exit; + } + -+ if (ddr_psci_param->freq_count == 0 || ddr_psci_param->freq_count > 6) { -+ dev_err(dev, "there is no available frequencies!\n"); -+ return -EPERM; ++ /* calculate NP by initialization PKA for modular operations */ ++ ret = pka_calc_and_init_np(n, r_t0, r_t1, r_t2); ++ if (ret) { ++ CRYPTO_TRACE("pka_calc_and_init_np error\n"); ++ goto exit; + } + -+ for (i = 0; i < ddr_psci_param->freq_count; i++) -+ dmcfreq->freq_info_rate[i] = ddr_psci_param->freq_info_mhz[i] * 1000000; -+ dmcfreq->freq_count = ddr_psci_param->freq_count; ++ pka_clear_regs_block(r_in, 3); + -+ count = dev_pm_opp_get_opp_count(dev); -+ if (count <= 0) { -+ dev_err(dev, "there is no available opp\n"); -+ ret = count ? count : -ENODATA; -+ return ret; -+ } ++ pka_copy_bn_into_reg(r_in, in); ++ pka_copy_bn_into_reg(r_e, e); ++ pka_copy_bn_into_reg(PKA_N, n); + -+ freq_table = kzalloc(sizeof(*freq_table) * count, GFP_KERNEL); -+ opp_table = dev_pm_opp_get_opp_table(dev); -+ if (!opp_table) { -+ ret = -ENOMEM; -+ goto out; ++ ret = RK_PKA_MOD_EXP(r_in, r_e, r_out); ++ if (ret) { ++ CRYPTO_TRACE("RK_PKA_MOD_EXP error\n"); ++ goto exit; + } + -+ mutex_lock(&opp_table->lock); -+ i = 0; -+ list_for_each_entry(opp, &opp_table->opp_list, node) { -+ if (!opp->available) -+ continue; ++ /* e is usually 0x10001 in public key EXP_MOD operation */ ++ is_max_poll = rk_bn_highest_bit(e) * 2 > rk_bn_highest_bit(n) ? true : false; + -+ freq_table[i].freq = opp->rates[0]; -+ freq_table[i].supplies[0] = opp->supplies[0]; -+ if (dmcfreq->opp_info.regulator_count > 1) -+ freq_table[i].supplies[1] = opp->supplies[1]; ++ ret = pka_copy_bn_from_reg(out, max_word_size, r_out, is_max_poll); + -+ i++; -+ } ++exit: ++ pka_clear_regs_block(0, 5); ++ pka_clear_regs_block(30, 2); ++ pka_finish(); + -+ i = 0; -+ list_for_each_entry(opp, &opp_table->opp_list, node) { -+ if (!opp->available) -+ continue; ++ return ret; ++} +diff --git a/drivers/crypto/rockchip/rk_crypto_v2_pka.h b/drivers/crypto/rockchip/rk_crypto_v2_pka.h +new file mode 100644 +index 000000000..3c0b236f9 +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_v2_pka.h +@@ -0,0 +1,17 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ + -+ if (i >= dmcfreq->freq_count) { -+ opp->available = false; -+ continue; -+ } ++/* Copyright (c) 2022 Rockchip Electronics Co. Ltd. */ + -+ for (j = 0; j < count; j++) { -+ if (dmcfreq->freq_info_rate[i] <= freq_table[j].freq) { -+ opp->rates[0] = dmcfreq->freq_info_rate[i]; -+ opp->supplies[0] = freq_table[j].supplies[0]; -+ if (dmcfreq->opp_info.regulator_count > 1) -+ opp->supplies[1] = freq_table[j].supplies[1]; ++#ifndef __RK_CRYPTO_V2_PKA_H__ ++#define __RK_CRYPTO_V2_PKA_H__ + -+ break; -+ } -+ } -+ if (j == count) { -+ dev_err(dmcfreq->dev, "failed to match dmc_opp_table for %ld\n", -+ dmcfreq->freq_info_rate[i]); -+ if (i == 0) { -+ ret = -EPERM; -+ goto out; -+ } else { -+ opp->available = false; -+ dmcfreq->freq_count = i; -+ } -+ } -+ i++; -+ } ++#include "rk_crypto_bignum.h" + -+ mutex_unlock(&opp_table->lock); -+ dev_pm_opp_put_opp_table(opp_table); ++void rk_pka_set_crypto_base(void __iomem *base); + -+out: -+ kfree(freq_table); -+ return ret; -+} ++int rk_pka_expt_mod(struct rk_bignum *in, ++ struct rk_bignum *e, ++ struct rk_bignum *n, ++ struct rk_bignum *out); + -+static __maybe_unused int px30_dmc_init(struct platform_device *pdev, -+ struct rockchip_dmcfreq *dmcfreq) -+{ -+ struct arm_smccc_res res; -+ u32 size; -+ int ret; -+ int complt_irq; -+ u32 complt_hwirq; -+ struct irq_data *complt_irq_data; ++#endif +diff --git a/drivers/crypto/rockchip/rk_crypto_v2_reg.h b/drivers/crypto/rockchip/rk_crypto_v2_reg.h +new file mode 100644 +index 000000000..a938ce3f6 +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_v2_reg.h +@@ -0,0 +1,378 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ + -+ res = sip_smc_dram(0, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_GET_VERSION); -+ dev_notice(&pdev->dev, "current ATF version 0x%lx!\n", res.a1); -+ if (res.a0 || res.a1 < 0x103) { -+ dev_err(&pdev->dev, -+ "trusted firmware need to update or is invalid!\n"); -+ return -ENXIO; -+ } ++/* Copyright (c) 2018 Rockchip Electronics Co. Ltd. */ + -+ dev_notice(&pdev->dev, "read tf version 0x%lx!\n", res.a1); ++#ifndef __RK_CRYPTO_V2_REG_H__ ++#define __RK_CRYPTO_V2_REG_H__ + -+ /* -+ * first 4KB is used for interface parameters -+ * after 4KB * N is dts parameters -+ */ -+ size = sizeof(struct px30_ddr_dts_config_timing); -+ res = sip_smc_request_share_mem(DIV_ROUND_UP(size, 4096) + 1, -+ SHARE_PAGE_TYPE_DDR); -+ if (res.a0 != 0) { -+ dev_err(&pdev->dev, "no ATF memory for init\n"); -+ return -ENOMEM; -+ } -+ ddr_psci_param = (struct share_params *)res.a1; -+ of_get_px30_timings(&pdev->dev, pdev->dev.of_node, -+ (uint32_t *)ddr_psci_param); ++#define _SBF(s, v) ((v) << (s)) + -+ init_waitqueue_head(&wait_ctrl.wait_wq); -+ wait_ctrl.wait_en = 1; -+ wait_ctrl.wait_time_out_ms = 17 * 5; ++#define CRYPTO_WRITE_MASK_SHIFT (16) ++#define CRYPTO_WRITE_MASK_ALL ((0xffffu << CRYPTO_WRITE_MASK_SHIFT)) + -+ complt_irq = platform_get_irq_byname(pdev, "complete_irq"); -+ if (complt_irq < 0) { -+ dev_err(&pdev->dev, "no IRQ for complete_irq: %d\n", -+ complt_irq); -+ return complt_irq; -+ } -+ wait_ctrl.complt_irq = complt_irq; ++#define WRITE_MASK (16) + -+ ret = devm_request_irq(&pdev->dev, complt_irq, wait_complete_irq, -+ 0, dev_name(&pdev->dev), &wait_ctrl); -+ if (ret < 0) { -+ dev_err(&pdev->dev, "cannot request complete_irq\n"); -+ return ret; -+ } -+ disable_irq(complt_irq); ++/* Crypto control registers*/ ++#define CRYPTO_CLK_CTL 0x0000 ++#define CRYPTO_AUTO_CLKGATE_EN BIT(0) + -+ complt_irq_data = irq_get_irq_data(complt_irq); -+ complt_hwirq = irqd_to_hwirq(complt_irq_data); -+ ddr_psci_param->complt_hwirq = complt_hwirq; ++#define CRYPTO_RST_CTL 0x0004 ++#define CRYPTO_SW_PKA_RESET BIT(2) ++#define CRYPTO_SW_RNG_RESET BIT(1) ++#define CRYPTO_SW_CC_RESET BIT(0) + -+ dmcfreq->set_rate_params = ddr_psci_param; -+ rockchip_set_ddrclk_params(dmcfreq->set_rate_params); -+ rockchip_set_ddrclk_dmcfreq_wait_complete(rockchip_dmcfreq_wait_complete); ++/* Crypto DMA control registers*/ ++#define CRYPTO_DMA_INT_EN 0x0008 ++#define CRYPTO_ZERO_ERR_INT_EN BIT(6) ++#define CRYPTO_LIST_ERR_INT_EN BIT(5) ++#define CRYPTO_SRC_ERR_INT_EN BIT(4) ++#define CRYPTO_DST_ERR_INT_EN BIT(3) ++#define CRYPTO_SRC_ITEM_INT_EN BIT(2) ++#define CRYPTO_DST_ITEM_DONE_INT_EN BIT(1) ++#define CRYPTO_LIST_DONE_INT_EN BIT(0) + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_INIT); -+ if (res.a0) { -+ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", -+ res.a0); -+ return -ENOMEM; -+ } ++#define CRYPTO_DMA_INT_ST 0x000C ++#define CRYPTO_LOCKSTEP_INT_ST BIT(7) ++#define CRYPTO_ZERO_LEN_INT_ST BIT(6) ++#define CRYPTO_LIST_ERR_INT_ST BIT(5) ++#define CRYPTO_SRC_ERR_INT_ST BIT(4) ++#define CRYPTO_DST_ERR_INT_ST BIT(3) ++#define CRYPTO_SRC_ITEM_DONE_INT_ST BIT(2) ++#define CRYPTO_DST_ITEM_DONE_INT_ST BIT(1) ++#define CRYPTO_LIST_DONE_INT_ST BIT(0) + -+ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; ++#define CRYPTO_LOCKSTEP_MASK (~((u32)CRYPTO_LOCKSTEP_INT_ST)) ++#define CRYPTO_DMA_CTL 0x0010 ++#define CRYPTO_DMA_RESTART BIT(1) ++#define CRYPTO_DMA_START BIT(0) + -+ return 0; -+} ++/* DMA LIST Start Address Register */ ++#define CRYPTO_DMA_LLI_ADDR 0x0014 + -+static __maybe_unused int rk1808_dmc_init(struct platform_device *pdev, -+ struct rockchip_dmcfreq *dmcfreq) -+{ -+ struct arm_smccc_res res; -+ u32 size; -+ int ret; -+ int complt_irq; -+ struct device_node *node; ++#define CRYPTO_DMA_ST 0x0018 ++#define CRYPTO_DMA_BUSY BIT(0) + -+ res = sip_smc_dram(0, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_GET_VERSION); -+ dev_notice(&pdev->dev, "current ATF version 0x%lx!\n", res.a1); -+ if (res.a0 || res.a1 < 0x101) { -+ dev_err(&pdev->dev, -+ "trusted firmware need to update or is invalid!\n"); -+ return -ENXIO; -+ } ++#define CRYPTO_DMA_STATE 0x001C ++#define CRYPTO_LLI_IDLE_STATE _SBF(4, 0x00) ++#define CRYPTO_LLI_FETCH_STATE _SBF(4, 0x01) ++#define CRYPTO_LLI_WORK_STATE _SBF(4, 0x02) ++#define CRYPTO_SRC_IDLE_STATE _SBF(2, 0x00) ++#define CRYPTO_SRC_LOAD_STATE _SBF(2, 0x01) ++#define CRYPTO_SRC_WORK_STATE _SBF(2, 0x02) ++#define CRYPTO_DST_IDLE_STATE _SBF(0, 0x00) ++#define CRYPTO_DST_LOAD_STATE _SBF(0, 0x01) ++#define CRYPTO_DST_WORK_STATE _SBF(0, 0x02) + -+ /* -+ * first 4KB is used for interface parameters -+ * after 4KB * N is dts parameters -+ */ -+ size = sizeof(struct rk1808_ddr_dts_config_timing); -+ res = sip_smc_request_share_mem(DIV_ROUND_UP(size, 4096) + 1, -+ SHARE_PAGE_TYPE_DDR); -+ if (res.a0 != 0) { -+ dev_err(&pdev->dev, "no ATF memory for init\n"); -+ return -ENOMEM; -+ } -+ ddr_psci_param = (struct share_params *)res.a1; -+ of_get_rk1808_timings(&pdev->dev, pdev->dev.of_node, -+ (uint32_t *)ddr_psci_param); ++/* DMA LLI Read Address Register */ ++#define CRYPTO_DMA_LLI_RADDR 0x0020 + -+ /* enable start dcf in kernel after dcf ready */ -+ node = of_parse_phandle(pdev->dev.of_node, "dcf_reg", 0); -+ wait_ctrl.regmap_dcf = syscon_node_to_regmap(node); -+ if (IS_ERR(wait_ctrl.regmap_dcf)) -+ return PTR_ERR(wait_ctrl.regmap_dcf); -+ wait_ctrl.dcf_en = 1; ++/* DMA Source Data Read Address Register */ ++#define CRYPTO_DMA_SRC_RADDR 0x0024 + -+ init_waitqueue_head(&wait_ctrl.wait_wq); -+ wait_ctrl.wait_en = 1; -+ wait_ctrl.wait_time_out_ms = 17 * 5; ++/* DMA Destination Data Read Address Register */ ++#define CRYPTO_DMA_DST_RADDR 0x0028 + -+ complt_irq = platform_get_irq_byname(pdev, "complete_irq"); -+ if (complt_irq < 0) { -+ dev_err(&pdev->dev, "no IRQ for complete_irq: %d\n", -+ complt_irq); -+ return complt_irq; -+ } -+ wait_ctrl.complt_irq = complt_irq; ++#define CRYPTO_DMA_ITEM_ID 0x002C + -+ ret = devm_request_irq(&pdev->dev, complt_irq, wait_dcf_complete_irq, -+ 0, dev_name(&pdev->dev), &wait_ctrl); -+ if (ret < 0) { -+ dev_err(&pdev->dev, "cannot request complete_irq\n"); -+ return ret; -+ } -+ disable_irq(complt_irq); ++#define CRYPTO_FIFO_CTL 0x0040 ++#define CRYPTO_DOUT_BYTESWAP BIT(1) ++#define CRYPTO_DOIN_BYTESWAP BIT(0) + -+ dmcfreq->set_rate_params = ddr_psci_param; -+ rockchip_set_ddrclk_params(dmcfreq->set_rate_params); -+ rockchip_set_ddrclk_dmcfreq_wait_complete(rockchip_dmcfreq_wait_complete); ++/* Block Cipher Control Register */ ++#define CRYPTO_BC_CTL 0x0044 ++#define CRYPTO_BC_AES _SBF(8, 0x00) ++#define CRYPTO_BC_SM4 _SBF(8, 0x01) ++#define CRYPTO_BC_DES _SBF(8, 0x02) ++#define CRYPTO_BC_TDES _SBF(8, 0x03) ++#define CRYPTO_BC_ECB _SBF(4, 0x00) ++#define CRYPTO_BC_CBC _SBF(4, 0x01) ++#define CRYPTO_BC_CTS _SBF(4, 0x02) ++#define CRYPTO_BC_CTR _SBF(4, 0x03) ++#define CRYPTO_BC_CFB _SBF(4, 0x04) ++#define CRYPTO_BC_OFB _SBF(4, 0x05) ++#define CRYPTO_BC_XTS _SBF(4, 0x06) ++#define CRYPTO_BC_CCM _SBF(4, 0x07) ++#define CRYPTO_BC_GCM _SBF(4, 0x08) ++#define CRYPTO_BC_CMAC _SBF(4, 0x09) ++#define CRYPTO_BC_CBC_MAC _SBF(4, 0x0A) ++#define CRYPTO_BC_128_bit_key _SBF(2, 0x00) ++#define CRYPTO_BC_192_bit_key _SBF(2, 0x01) ++#define CRYPTO_BC_256_bit_key _SBF(2, 0x02) ++#define CRYPTO_BC_DECRYPT BIT(1) ++#define CRYPTO_BC_ENABLE BIT(0) + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_INIT); -+ if (res.a0) { -+ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", -+ res.a0); -+ return -ENOMEM; -+ } ++/* Hash Control Register */ ++#define CRYPTO_HASH_CTL 0x0048 ++#define CRYPTO_SHA1 _SBF(4, 0x00) ++#define CRYPTO_MD5 _SBF(4, 0x01) ++#define CRYPTO_SHA256 _SBF(4, 0x02) ++#define CRYPTO_SHA224 _SBF(4, 0x03) ++#define CRYPTO_SM3 _SBF(4, 0x06) ++#define CRYPTO_SHA512 _SBF(4, 0x08) ++#define CRYPTO_SHA384 _SBF(4, 0x09) ++#define CRYPTO_SHA512_224 _SBF(4, 0x0A) ++#define CRYPTO_SHA512_256 _SBF(4, 0x0B) ++#define CRYPTO_HMAC_ENABLE BIT(3) ++#define CRYPTO_HW_PAD_ENABLE BIT(2) ++#define CRYPTO_HASH_SRC_SEL BIT(1) ++#define CRYPTO_HASH_ENABLE BIT(0) + -+ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; ++/* Cipher Status Register */ ++#define CRYPTO_CIPHER_ST 0x004C ++#define CRYPTO_OTP_KEY_VALID BIT(2) ++#define CRYPTO_HASH_BUSY BIT(1) ++#define CRYPTO_BLOCK_CIPHER_BUSY BIT(0) + -+ return 0; -+} ++#define CRYPTO_CIPHER_STATE 0x0050 ++#define CRYPTO_HASH_IDLE_STATE _SBF(10, 0x01) ++#define CRYPTO_HASH_IPAD_STATE _SBF(10, 0x02) ++#define CRYPTO_HASH_TEXT_STATE _SBF(10, 0x04) ++#define CRYPTO_HASH_OPAD_STATE _SBF(10, 0x08) ++#define CRYPTO_HASH_OPAD_EXT_STATE _SBF(10, 0x10) ++#define CRYPTO_GCM_IDLE_STATE _SBF(8, 0x00) ++#define CRYPTO_GCM_PRE_STATE _SBF(8, 0x01) ++#define CRYPTO_GCM_NA_STATE _SBF(8, 0x02) ++#define CRYPTO_GCM_PC_STATE _SBF(8, 0x03) ++#define CRYPTO_CCM_IDLE_STATE _SBF(6, 0x00) ++#define CRYPTO_CCM_PRE_STATE _SBF(6, 0x01) ++#define CRYPTO_CCM_NA_STATE _SBF(6, 0x02) ++#define CRYPTO_CCM_PC_STATE _SBF(6, 0x03) ++#define CRYPTO_PARALLEL_IDLE_STATE _SBF(4, 0x00) ++#define CRYPTO_PARALLEL_PRE_STATE _SBF(4, 0x01) ++#define CRYPTO_PARALLEL_BULK_STATE _SBF(4, 0x02) ++#define CRYPTO_MAC_IDLE_STATE _SBF(2, 0x00) ++#define CRYPTO_MAC_PRE_STATE _SBF(2, 0x01) ++#define CRYPTO_MAC_BULK_STATE _SBF(2, 0x02) ++#define CRYPTO_SERIAL_IDLE_STATE _SBF(0, 0x00) ++#define CRYPTO_SERIAL_PRE_STATE _SBF(0, 0x01) ++#define CRYPTO_SERIAL_BULK_STATE _SBF(0, 0x02) + -+static __maybe_unused int rk3128_dmc_init(struct platform_device *pdev, -+ struct rockchip_dmcfreq *dmcfreq) -+{ -+ struct arm_smccc_res res; ++#define CRYPTO_CH0_IV_0 0x0100 ++#define CRYPTO_CH0_IV_1 0x0104 ++#define CRYPTO_CH0_IV_2 0x0108 ++#define CRYPTO_CH0_IV_3 0x010c ++#define CRYPTO_CH1_IV_0 0x0110 ++#define CRYPTO_CH1_IV_1 0x0114 ++#define CRYPTO_CH1_IV_2 0x0118 ++#define CRYPTO_CH1_IV_3 0x011c ++#define CRYPTO_CH2_IV_0 0x0120 ++#define CRYPTO_CH2_IV_1 0x0124 ++#define CRYPTO_CH2_IV_2 0x0128 ++#define CRYPTO_CH2_IV_3 0x012c ++#define CRYPTO_CH3_IV_0 0x0130 ++#define CRYPTO_CH3_IV_1 0x0134 ++#define CRYPTO_CH3_IV_2 0x0138 ++#define CRYPTO_CH3_IV_3 0x013c ++#define CRYPTO_CH4_IV_0 0x0140 ++#define CRYPTO_CH4_IV_1 0x0144 ++#define CRYPTO_CH4_IV_2 0x0148 ++#define CRYPTO_CH4_IV_3 0x014c ++#define CRYPTO_CH5_IV_0 0x0150 ++#define CRYPTO_CH5_IV_1 0x0154 ++#define CRYPTO_CH5_IV_2 0x0158 ++#define CRYPTO_CH5_IV_3 0x015c ++#define CRYPTO_CH6_IV_0 0x0160 ++#define CRYPTO_CH6_IV_1 0x0164 ++#define CRYPTO_CH6_IV_2 0x0168 ++#define CRYPTO_CH6_IV_3 0x016c ++#define CRYPTO_CH7_IV_0 0x0170 ++#define CRYPTO_CH7_IV_1 0x0174 ++#define CRYPTO_CH7_IV_2 0x0178 ++#define CRYPTO_CH7_IV_3 0x017c + -+ res = sip_smc_request_share_mem(DIV_ROUND_UP(sizeof( -+ struct rk3128_ddr_dts_config_timing), -+ 4096) + 1, SHARE_PAGE_TYPE_DDR); -+ if (res.a0) { -+ dev_err(&pdev->dev, "no ATF memory for init\n"); -+ return -ENOMEM; -+ } -+ ddr_psci_param = (struct share_params *)res.a1; -+ of_get_rk3128_timings(&pdev->dev, pdev->dev.of_node, -+ (uint32_t *)ddr_psci_param); ++#define CRYPTO_CH0_KEY_0 0x0180 ++#define CRYPTO_CH0_KEY_1 0x0184 ++#define CRYPTO_CH0_KEY_2 0x0188 ++#define CRYPTO_CH0_KEY_3 0x018c ++#define CRYPTO_CH1_KEY_0 0x0190 ++#define CRYPTO_CH1_KEY_1 0x0194 ++#define CRYPTO_CH1_KEY_2 0x0198 ++#define CRYPTO_CH1_KEY_3 0x019c ++#define CRYPTO_CH2_KEY_0 0x01a0 ++#define CRYPTO_CH2_KEY_1 0x01a4 ++#define CRYPTO_CH2_KEY_2 0x01a8 ++#define CRYPTO_CH2_KEY_3 0x01ac ++#define CRYPTO_CH3_KEY_0 0x01b0 ++#define CRYPTO_CH3_KEY_1 0x01b4 ++#define CRYPTO_CH3_KEY_2 0x01b8 ++#define CRYPTO_CH3_KEY_3 0x01bc ++#define CRYPTO_CH4_KEY_0 0x01c0 ++#define CRYPTO_CH4_KEY_1 0x01c4 ++#define CRYPTO_CH4_KEY_2 0x01c8 ++#define CRYPTO_CH4_KEY_3 0x01cc ++#define CRYPTO_CH5_KEY_0 0x01d0 ++#define CRYPTO_CH5_KEY_1 0x01d4 ++#define CRYPTO_CH5_KEY_2 0x01d8 ++#define CRYPTO_CH5_KEY_3 0x01dc ++#define CRYPTO_CH6_KEY_0 0x01e0 ++#define CRYPTO_CH6_KEY_1 0x01e4 ++#define CRYPTO_CH6_KEY_2 0x01e8 ++#define CRYPTO_CH6_KEY_3 0x01ec ++#define CRYPTO_CH7_KEY_0 0x01f0 ++#define CRYPTO_CH7_KEY_1 0x01f4 ++#define CRYPTO_CH7_KEY_2 0x01f8 ++#define CRYPTO_CH7_KEY_3 0x01fc ++#define CRYPTO_KEY_CHANNEL_NUM 8 + -+ ddr_psci_param->hz = 0; -+ ddr_psci_param->lcdc_type = rk_drm_get_lcdc_type(); ++#define CRYPTO_CH0_PC_LEN_0 0x0280 ++#define CRYPTO_CH0_PC_LEN_1 0x0284 ++#define CRYPTO_CH1_PC_LEN_0 0x0288 ++#define CRYPTO_CH1_PC_LEN_1 0x028c ++#define CRYPTO_CH2_PC_LEN_0 0x0290 ++#define CRYPTO_CH2_PC_LEN_1 0x0294 ++#define CRYPTO_CH3_PC_LEN_0 0x0298 ++#define CRYPTO_CH3_PC_LEN_1 0x029c ++#define CRYPTO_CH4_PC_LEN_0 0x02a0 ++#define CRYPTO_CH4_PC_LEN_1 0x02a4 ++#define CRYPTO_CH5_PC_LEN_0 0x02a8 ++#define CRYPTO_CH5_PC_LEN_1 0x02ac ++#define CRYPTO_CH6_PC_LEN_0 0x02b0 ++#define CRYPTO_CH6_PC_LEN_1 0x02b4 ++#define CRYPTO_CH7_PC_LEN_0 0x02b8 ++#define CRYPTO_CH7_PC_LEN_1 0x02bc + -+ dmcfreq->set_rate_params = ddr_psci_param; -+ rockchip_set_ddrclk_params(dmcfreq->set_rate_params); ++#define CRYPTO_CH0_AAD_LEN_0 0x02c0 ++#define CRYPTO_CH0_AAD_LEN_1 0x02c4 ++#define CRYPTO_CH1_AAD_LEN_0 0x02c8 ++#define CRYPTO_CH1_AAD_LEN_1 0x02cc ++#define CRYPTO_CH2_AAD_LEN_0 0x02d0 ++#define CRYPTO_CH2_AAD_LEN_1 0x02d4 ++#define CRYPTO_CH3_AAD_LEN_0 0x02d8 ++#define CRYPTO_CH3_AAD_LEN_1 0x02dc ++#define CRYPTO_CH4_AAD_LEN_0 0x02e0 ++#define CRYPTO_CH4_AAD_LEN_1 0x02e4 ++#define CRYPTO_CH5_AAD_LEN_0 0x02e8 ++#define CRYPTO_CH5_AAD_LEN_1 0x02ec ++#define CRYPTO_CH6_AAD_LEN_0 0x02f0 ++#define CRYPTO_CH6_AAD_LEN_1 0x02f4 ++#define CRYPTO_CH7_AAD_LEN_0 0x02f8 ++#define CRYPTO_CH7_AAD_LEN_1 0x02fc + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_INIT); ++#define CRYPTO_CH0_IV_LEN_0 0x0300 ++#define CRYPTO_CH1_IV_LEN_0 0x0304 ++#define CRYPTO_CH2_IV_LEN_0 0x0308 ++#define CRYPTO_CH3_IV_LEN_0 0x030c ++#define CRYPTO_CH4_IV_LEN_0 0x0310 ++#define CRYPTO_CH5_IV_LEN_0 0x0314 ++#define CRYPTO_CH6_IV_LEN_0 0x0318 ++#define CRYPTO_CH7_IV_LEN_0 0x031c + -+ if (res.a0) { -+ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", -+ res.a0); -+ return -ENOMEM; -+ } ++#define CRYPTO_CH0_TAG_0 0x0320 ++#define CRYPTO_CH0_TAG_1 0x0324 ++#define CRYPTO_CH0_TAG_2 0x0328 ++#define CRYPTO_CH0_TAG_3 0x032c + -+ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; ++#define CRYPTO_HASH_DOUT_0 0x03a0 ++#define CRYPTO_HASH_DOUT_1 0x03a4 ++#define CRYPTO_HASH_DOUT_2 0x03a8 ++#define CRYPTO_HASH_DOUT_3 0x03ac ++#define CRYPTO_HASH_DOUT_4 0x03b0 ++#define CRYPTO_HASH_DOUT_5 0x03b4 ++#define CRYPTO_HASH_DOUT_6 0x03b8 ++#define CRYPTO_HASH_DOUT_7 0x03bc ++#define CRYPTO_HASH_DOUT_8 0x03c0 ++#define CRYPTO_HASH_DOUT_9 0x03c4 ++#define CRYPTO_HASH_DOUT_10 0x03c8 ++#define CRYPTO_HASH_DOUT_11 0x03cc ++#define CRYPTO_HASH_DOUT_12 0x03d0 ++#define CRYPTO_HASH_DOUT_13 0x03d4 ++#define CRYPTO_HASH_DOUT_14 0x03d8 ++#define CRYPTO_HASH_DOUT_15 0x03dc + -+ return 0; -+} ++#define CRYPTO_TAG_VALID 0x03e0 ++#define CRYPTO_CH0_TAG_VALID BIT(0) + -+static __maybe_unused int rk3228_dmc_init(struct platform_device *pdev, -+ struct rockchip_dmcfreq *dmcfreq) -+{ -+ struct arm_smccc_res res; ++#define CRYPTO_HASH_VALID 0x03e4 ++#define CRYPTO_HASH_IS_VALID BIT(0) + -+ res = sip_smc_request_share_mem(DIV_ROUND_UP(sizeof( -+ struct rk3228_ddr_dts_config_timing), -+ 4096) + 1, SHARE_PAGE_TYPE_DDR); -+ if (res.a0) { -+ dev_err(&pdev->dev, "no ATF memory for init\n"); -+ return -ENOMEM; -+ } ++#define LLI_DMA_CTRL_LAST BIT(0) ++#define LLI_DMA_CTRL_PAUSE BIT(1) ++#define LLI_DMA_CTRL_LIST_DONE BIT(8) ++#define LLI_DMA_CTRL_DST_DONE BIT(9) ++#define LLI_DMA_CTRL_SRC_DONE BIT(10) + -+ ddr_psci_param = (struct share_params *)res.a1; -+ if (of_get_rk3228_timings(&pdev->dev, pdev->dev.of_node, -+ (uint32_t *)ddr_psci_param)) -+ return -ENOMEM; ++#define LLI_USER_CIPHER_START BIT(0) ++#define LLI_USER_STRING_START BIT(1) ++#define LLI_USER_STRING_LAST BIT(2) ++#define LLI_USER_STRING_AAD BIT(3) ++#define LLI_USER_PRIVACY_KEY BIT(7) ++#define LLI_USER_ROOT_KEY BIT(8) + -+ ddr_psci_param->hz = 0; ++#define CRYPTO_PKA_BASE_OFFSET 0x0480 + -+ dmcfreq->set_rate_params = ddr_psci_param; -+ rockchip_set_ddrclk_params(dmcfreq->set_rate_params); ++#define CRYPTO_RAM_CTL (0x0480 - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_RAM_PKA_RDY BIT(0) + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_INIT); ++#define CRYPTO_RAM_ST (0x0484 - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_CLK_RAM_RDY BIT(0) ++#define CRYPTO_CLK_RAM_RDY_MASK BIT(0) + -+ if (res.a0) { -+ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", -+ res.a0); -+ return -ENOMEM; -+ } ++#define CRYPTO_DEBUG_CTL (0x04a0 - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_DEBUG_MODE BIT(0) + -+ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; ++#define CRYPTO_DEBUG_ST (0x04a4 - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_PKA_DEBUG_CLK_EN BIT(0) + -+ return 0; -+} ++#define CRYPTO_DEBUG_MONITOR (0x04a8 - CRYPTO_PKA_BASE_OFFSET) + -+static __maybe_unused int rk3288_dmc_init(struct platform_device *pdev, -+ struct rockchip_dmcfreq *dmcfreq) -+{ -+ struct device *dev = &pdev->dev; -+ struct clk *pclk_phy, *pclk_upctl, *dmc_clk; -+ struct arm_smccc_res res; -+ int ret; ++/* MAP0 ~ MAP31 */ ++#define CRYPTO_MEMORY_MAP0 (0x00800 - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_MAP_REG_NUM 32 + -+ dmc_clk = devm_clk_get(dev, "dmc_clk"); -+ if (IS_ERR(dmc_clk)) { -+ dev_err(dev, "Cannot get the clk dmc_clk\n"); -+ return PTR_ERR(dmc_clk); -+ } -+ ret = clk_prepare_enable(dmc_clk); -+ if (ret < 0) { -+ dev_err(dev, "failed to prepare/enable dmc_clk\n"); -+ return ret; -+ } ++#define CRYPTO_OPCODE (0x00880 - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_OPCODE_TAG_SHIFT 0 ++#define CRYPTO_OPCODE_R_SHIFT 6 ++#define CRYPTO_OPCODE_R_DIS_SHIFT 11 ++#define CRYPTO_OPCODE_B_SHIFT 12 ++#define CRYPTO_OPCODE_B_IMMED_SHIFT 17 ++#define CRYPTO_OPCODE_A_SHIFT 18 ++#define CRYPTO_OPCODE_A_IMMED_SHIFT 23 ++#define CRYPTO_OPCODE_LEN_SHIFT 24 ++#define CRYPTO_OPCODE_CODE_SHIFT 27 + -+ pclk_phy = devm_clk_get(dev, "pclk_phy0"); -+ if (IS_ERR(pclk_phy)) { -+ dev_err(dev, "Cannot get the clk pclk_phy0\n"); -+ return PTR_ERR(pclk_phy); -+ } -+ ret = clk_prepare_enable(pclk_phy); -+ if (ret < 0) { -+ dev_err(dev, "failed to prepare/enable pclk_phy0\n"); -+ return ret; -+ } -+ pclk_upctl = devm_clk_get(dev, "pclk_upctl0"); -+ if (IS_ERR(pclk_upctl)) { -+ dev_err(dev, "Cannot get the clk pclk_upctl0\n"); -+ return PTR_ERR(pclk_upctl); -+ } -+ ret = clk_prepare_enable(pclk_upctl); -+ if (ret < 0) { -+ dev_err(dev, "failed to prepare/enable pclk_upctl1\n"); -+ return ret; -+ } ++#define CRYPTO_N_NP_T0_T1_ADDR (0x00884 - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_N_VIRTUAL_ADDR_SHIFT 0 ++#define CRYPTO_N_VIRTUAL_ADDR_MASK 0x0000001f ++#define CRYPTO_NP_VIRTUAL_ADDR_SHIFT 5 ++#define CRYPTO_NP_VIRTUAL_ADDR_MASK 0x000003e0 ++#define CRYPTO_T0_VIRTUAL_ADDR_SHIFT 10 ++#define CRYPTO_T0_VIRTUAL_ADDR_MASK 0x00007c00 ++#define CRYPTO_T1_VIRTUAL_ADDR_SHIFT 15 ++#define CRYPTO_T1_VIRTUAL_ADDR_MASK 0x000f8000 + -+ pclk_phy = devm_clk_get(dev, "pclk_phy1"); -+ if (IS_ERR(pclk_phy)) { -+ dev_err(dev, "Cannot get the clk pclk_phy1\n"); -+ return PTR_ERR(pclk_phy); -+ } -+ ret = clk_prepare_enable(pclk_phy); -+ if (ret < 0) { -+ dev_err(dev, "failed to prepare/enable pclk_phy1\n"); -+ return ret; -+ } -+ pclk_upctl = devm_clk_get(dev, "pclk_upctl1"); -+ if (IS_ERR(pclk_upctl)) { -+ dev_err(dev, "Cannot get the clk pclk_upctl1\n"); -+ return PTR_ERR(pclk_upctl); -+ } -+ ret = clk_prepare_enable(pclk_upctl); -+ if (ret < 0) { -+ dev_err(dev, "failed to prepare/enable pclk_upctl1\n"); -+ return ret; -+ } ++#define CRYPTO_PKA_STATUS (0x00888 - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_PKA_PIPE_IS_RDY BIT(0) ++#define CRYPTO_PKA_BUSY BIT(1) ++#define CRYPTO_PKA_ALU_OUT_ZERO BIT(2) ++#define CRYPTO_PKA_ALU_MODOVRFLW BIT(3) ++#define CRYPTO_PKA_DIV_BY_ZERO BIT(4) ++#define CRYPTO_PKA_ALU_CARRY BIT(5) ++#define CRYPTO_PKA_ALU_SIGN_OUT BIT(6) ++#define CRYPTO_PKA_MODINV_OF_ZERO BIT(7) ++#define CRYPTO_PKA_CPU_BUSY BIT(8) ++#define CRYPTO_PKA_OPCODE_STATUS_SHIFT 9 ++#define CRYPTO_PKA_OPCODE_STATUS_MASK 0x00003e00 ++#define CRYPTO_PKA_TAG_STATUS_SHIFT 14 ++#define CRYPTO_PKA_TAG_STATUS_MASK 0x0003c000 + -+ res = sip_smc_request_share_mem(DIV_ROUND_UP(sizeof( -+ struct rk3288_ddr_dts_config_timing), -+ 4096) + 1, SHARE_PAGE_TYPE_DDR); -+ if (res.a0) { -+ dev_err(&pdev->dev, "no ATF memory for init\n"); -+ return -ENOMEM; -+ } ++#define CRYPTO_PKA_SW_RESET (0x0088C - CRYPTO_PKA_BASE_OFFSET) + -+ ddr_psci_param = (struct share_params *)res.a1; -+ of_get_rk3288_timings(&pdev->dev, pdev->dev.of_node, -+ (uint32_t *)ddr_psci_param); ++/* PKA_L0 ~ PKA_L7 */ ++#define CRYPTO_PKA_L0 (0x00890 - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_LEN_REG_NUM 8 + -+ ddr_psci_param->hz = 0; -+ ddr_psci_param->lcdc_type = rk_drm_get_lcdc_type(); ++#define CRYPTO_PKA_PIPE_RDY (0x008B0 - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_PKA_DONE (0x008B4 - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_PKA_MON_SELECT (0x008B8 - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_PKA_DEBUG_REG_EN (0x008BC - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_DEBUG_CNT_ADDR (0x008C0 - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_DEBUG_EXT_ADDR (0x008C4 - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_PKA_DEBUG_HALT (0x008C8 - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_PKA_MON_READ (0x008D0 - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_PKA_INT_ENA (0x008D4 - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_PKA_INT_ST (0x008D8 - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_SRAM_BASE (0x01000 - CRYPTO_PKA_BASE_OFFSET) ++#define CRYPTO_SRAM_SIZE 0x01000 + -+ dmcfreq->set_rate_params = ddr_psci_param; -+ rockchip_set_ddrclk_params(dmcfreq->set_rate_params); ++#endif + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_INIT); +diff --git a/drivers/crypto/rockchip/rk_crypto_v2_skcipher.c b/drivers/crypto/rockchip/rk_crypto_v2_skcipher.c +new file mode 100644 +index 000000000..2bfff0d28 +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_v2_skcipher.c +@@ -0,0 +1,685 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Crypto acceleration support for Rockchip Crypto V2 ++ * ++ * Copyright (c) 2018, Fuzhou Rockchip Electronics Co., Ltd ++ * ++ * Author: Lin Jinhan ++ * ++ * Some ideas are from marvell-cesa.c and s5p-sss.c driver. ++ */ + -+ if (res.a0) { -+ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", -+ res.a0); -+ return -ENOMEM; -+ } ++#include ++#include ++#include ++#include + -+ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; ++#include "rk_crypto_core.h" ++#include "rk_crypto_utils.h" ++#include "rk_crypto_skcipher_utils.h" ++#include "rk_crypto_v2.h" ++#include "rk_crypto_v2_reg.h" + -+ return 0; -+} ++#define RK_POLL_PERIOD_US 100 ++#define RK_POLL_TIMEOUT_US 50000 + -+static __maybe_unused int rk3328_dmc_init(struct platform_device *pdev, -+ struct rockchip_dmcfreq *dmcfreq) ++static const u32 cipher_algo2bc[] = { ++ [CIPHER_ALGO_DES] = CRYPTO_BC_DES, ++ [CIPHER_ALGO_DES3_EDE] = CRYPTO_BC_TDES, ++ [CIPHER_ALGO_AES] = CRYPTO_BC_AES, ++ [CIPHER_ALGO_SM4] = CRYPTO_BC_SM4, ++}; ++ ++static const u32 cipher_mode2bc[] = { ++ [CIPHER_MODE_ECB] = CRYPTO_BC_ECB, ++ [CIPHER_MODE_CBC] = CRYPTO_BC_CBC, ++ [CIPHER_MODE_CFB] = CRYPTO_BC_CFB, ++ [CIPHER_MODE_OFB] = CRYPTO_BC_OFB, ++ [CIPHER_MODE_CTR] = CRYPTO_BC_CTR, ++ [CIPHER_MODE_XTS] = CRYPTO_BC_XTS, ++ [CIPHER_MODE_GCM] = CRYPTO_BC_GCM, ++}; ++ ++static int rk_crypto_irq_handle(int irq, void *dev_id) +{ -+ struct arm_smccc_res res; -+ u32 size; ++ struct rk_crypto_dev *rk_dev = platform_get_drvdata(dev_id); ++ u32 interrupt_status; ++ struct rk_hw_crypto_v2_info *hw_info = ++ (struct rk_hw_crypto_v2_info *)rk_dev->hw_info; ++ struct rk_alg_ctx *alg_ctx = rk_cipher_alg_ctx(rk_dev); + -+ res = sip_smc_dram(0, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_GET_VERSION); -+ dev_notice(&pdev->dev, "current ATF version 0x%lx!\n", res.a1); -+ if (res.a0 || (res.a1 < 0x101)) { -+ dev_err(&pdev->dev, -+ "trusted firmware need to update or is invalid!\n"); -+ return -ENXIO; -+ } ++ interrupt_status = CRYPTO_READ(rk_dev, CRYPTO_DMA_INT_ST); ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_ST, interrupt_status); + -+ dev_notice(&pdev->dev, "read tf version 0x%lx!\n", res.a1); ++ interrupt_status &= CRYPTO_LOCKSTEP_MASK; + -+ /* -+ * first 4KB is used for interface parameters -+ * after 4KB * N is dts parameters -+ */ -+ size = sizeof(struct rk3328_ddr_dts_config_timing); -+ res = sip_smc_request_share_mem(DIV_ROUND_UP(size, 4096) + 1, -+ SHARE_PAGE_TYPE_DDR); -+ if (res.a0 != 0) { -+ dev_err(&pdev->dev, "no ATF memory for init\n"); -+ return -ENOMEM; ++ if (interrupt_status != CRYPTO_DST_ITEM_DONE_INT_ST) { ++ dev_err(rk_dev->dev, "DMA desc = %p\n", hw_info->hw_desc.lli_head); ++ dev_err(rk_dev->dev, "DMA addr_in = %08x\n", ++ (u32)alg_ctx->addr_in); ++ dev_err(rk_dev->dev, "DMA addr_out = %08x\n", ++ (u32)alg_ctx->addr_out); ++ dev_err(rk_dev->dev, "DMA count = %08x\n", alg_ctx->count); ++ dev_err(rk_dev->dev, "DMA desc_dma = %08x\n", ++ (u32)hw_info->hw_desc.lli_head_dma); ++ dev_err(rk_dev->dev, "DMA Error status = %08x\n", ++ interrupt_status); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_LLI_ADDR status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_LLI_ADDR)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_ST status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_ST)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_STATE status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_STATE)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_LLI_RADDR status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_LLI_RADDR)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_SRC_RADDR status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_SRC_RADDR)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_DST_RADDR status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_DST_RADDR)); ++ rk_dev->err = -EFAULT; + } -+ ddr_psci_param = (struct share_params *)res.a1; -+ of_get_rk3328_timings(&pdev->dev, pdev->dev.of_node, -+ (uint32_t *)ddr_psci_param); + -+ dmcfreq->set_rate_params = ddr_psci_param; -+ rockchip_set_ddrclk_params(dmcfreq->set_rate_params); ++ return 0; ++} + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_INIT); -+ if (res.a0) { -+ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", -+ res.a0); -+ return -ENOMEM; -+ } ++static inline void set_pc_len_reg(struct rk_crypto_dev *rk_dev, u64 pc_len) ++{ ++ u32 chn_base = CRYPTO_CH0_PC_LEN_0; + -+ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; ++ CRYPTO_TRACE("PC length = %lu\n", (unsigned long)pc_len); + -+ return 0; ++ CRYPTO_WRITE(rk_dev, chn_base, pc_len & 0xffffffff); ++ CRYPTO_WRITE(rk_dev, chn_base + 4, pc_len >> 32); +} + -+static int rk3399_set_msch_readlatency(unsigned int readlatency) ++static inline void set_aad_len_reg(struct rk_crypto_dev *rk_dev, u64 aad_len) +{ -+ struct arm_smccc_res res; ++ u32 chn_base = CRYPTO_CH0_AAD_LEN_0; + -+ arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, readlatency, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_SET_MSCH_RL, -+ 0, 0, 0, 0, &res); ++ CRYPTO_TRACE("AAD length = %lu\n", (unsigned long)aad_len); + -+ return res.a0; ++ CRYPTO_WRITE(rk_dev, chn_base, aad_len & 0xffffffff); ++ CRYPTO_WRITE(rk_dev, chn_base + 4, aad_len >> 32); +} + -+static __maybe_unused int rk3399_dmc_init(struct platform_device *pdev, -+ struct rockchip_dmcfreq *dmcfreq) ++static void set_iv_reg(struct rk_crypto_dev *rk_dev, const u8 *iv, u32 iv_len) +{ -+ struct device *dev = &pdev->dev; -+ struct device_node *np = pdev->dev.of_node; -+ struct arm_smccc_res res; -+ struct rk3399_dram_timing *dram_timing; -+ int index, size; -+ u32 *timing; ++ if (!iv || iv_len == 0) ++ return; + -+ /* -+ * Get dram timing and pass it to arm trust firmware, -+ * the dram drvier in arm trust firmware will get these -+ * timing and to do dram initial. -+ */ -+ dram_timing = of_get_rk3399_timings(dev, np); -+ if (dram_timing) { -+ timing = (u32 *)dram_timing; -+ size = sizeof(struct rk3399_dram_timing) / 4; -+ for (index = 0; index < size; index++) { -+ arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, *timing++, index, -+ ROCKCHIP_SIP_CONFIG_DRAM_SET_PARAM, -+ 0, 0, 0, 0, &res); -+ if (res.a0) { -+ dev_err(dev, "Failed to set dram param: %ld\n", -+ res.a0); -+ return -EINVAL; -+ } -+ } -+ } ++ CRYPTO_DUMPHEX("set iv", iv, iv_len); + -+ dmcfreq->set_rate_params = -+ devm_kzalloc(dev, sizeof(struct share_params), GFP_KERNEL); -+ if (!dmcfreq->set_rate_params) -+ return -ENOMEM; -+ rockchip_set_ddrclk_params(dmcfreq->set_rate_params); ++ rk_crypto_write_regs(rk_dev, CRYPTO_CH0_IV_0, iv, iv_len); + -+ arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_INIT, -+ 0, 0, 0, 0, &res); ++ CRYPTO_WRITE(rk_dev, CRYPTO_CH0_IV_LEN_0, iv_len); ++} + -+ dmcfreq->info.set_msch_readlatency = rk3399_set_msch_readlatency; ++static void write_key_reg(struct rk_crypto_dev *rk_dev, const u8 *key, ++ u32 key_len) ++{ ++ rk_crypto_write_regs(rk_dev, CRYPTO_CH0_KEY_0, key, key_len); ++} + -+ return 0; ++static void write_tkey_reg(struct rk_crypto_dev *rk_dev, const u8 *key, ++ u32 key_len) ++{ ++ rk_crypto_write_regs(rk_dev, CRYPTO_CH4_KEY_0, key, key_len); +} + -+static __maybe_unused int rk3528_dmc_init(struct platform_device *pdev, -+ struct rockchip_dmcfreq *dmcfreq) ++static int get_tag_reg(struct rk_crypto_dev *rk_dev, u8 *tag, u32 tag_len) +{ -+ struct arm_smccc_res res; + int ret; -+ int complt_irq; -+ u32 complt_hwirq; -+ struct irq_data *complt_irq_data; -+ -+ res = sip_smc_dram(0, 0, ROCKCHIP_SIP_CONFIG_DRAM_GET_VERSION); -+ dev_notice(&pdev->dev, "current ATF version 0x%lx\n", res.a1); -+ if (res.a0 || res.a1 < 0x100) { -+ dev_err(&pdev->dev, "trusted firmware need update to V1.00 and above.\n"); -+ return -ENXIO; -+ } ++ u32 reg_ctrl = 0; + -+ /* -+ * first 4KB is used for interface parameters -+ * after 4KB is dts parameters -+ * request share memory size 4KB * 2 -+ */ -+ res = sip_smc_request_share_mem(2, SHARE_PAGE_TYPE_DDR); -+ if (res.a0 != 0) { -+ dev_err(&pdev->dev, "no ATF memory for init\n"); -+ return -ENOMEM; -+ } -+ ddr_psci_param = (struct share_params *)res.a1; -+ /* Clear ddr_psci_param, size is 4KB * 2 */ -+ memset_io(ddr_psci_param, 0x0, 4096 * 2); ++ CRYPTO_TRACE("tag_len = %u", tag_len); + -+ wait_ctrl.dcf_en = 0; ++ if (tag_len > RK_MAX_TAG_SIZE) ++ return -EINVAL; + -+ init_waitqueue_head(&wait_ctrl.wait_wq); -+ wait_ctrl.wait_en = 1; -+ wait_ctrl.wait_time_out_ms = 17 * 5; ++ ret = read_poll_timeout_atomic(CRYPTO_READ, ++ reg_ctrl, ++ reg_ctrl & CRYPTO_CH0_TAG_VALID, ++ 0, ++ RK_POLL_TIMEOUT_US, ++ false, ++ rk_dev, CRYPTO_TAG_VALID); ++ if (ret) ++ goto exit; + -+ complt_irq = platform_get_irq_byname(pdev, "complete"); -+ if (complt_irq < 0) { -+ dev_err(&pdev->dev, "no IRQ for complt_irq: %d\n", complt_irq); -+ return complt_irq; -+ } -+ wait_ctrl.complt_irq = complt_irq; ++ rk_crypto_read_regs(rk_dev, CRYPTO_CH0_TAG_0, tag, tag_len); ++exit: ++ return ret; ++} + -+ ret = devm_request_irq(&pdev->dev, complt_irq, wait_dcf_complete_irq, -+ 0, dev_name(&pdev->dev), &wait_ctrl); -+ if (ret < 0) { -+ dev_err(&pdev->dev, "cannot request complt_irq\n"); -+ return ret; -+ } -+ disable_irq(complt_irq); ++static bool is_force_fallback(struct rk_crypto_algt *algt, uint32_t key_len) ++{ ++ if (algt->algo != CIPHER_ALGO_AES) ++ return false; + -+ complt_irq_data = irq_get_irq_data(complt_irq); -+ complt_hwirq = irqd_to_hwirq(complt_irq_data); -+ ddr_psci_param->complt_hwirq = complt_hwirq; ++ /* crypto v2 not support xts with AES-192 */ ++ if (algt->mode == CIPHER_MODE_XTS && key_len == AES_KEYSIZE_192 * 2) ++ return true; + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ROCKCHIP_SIP_CONFIG_DRAM_INIT); -+ if (res.a0) { -+ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", res.a0); -+ return -ENOMEM; -+ } ++ if (algt->use_soft_aes192 && key_len == AES_KEYSIZE_192) ++ return true; + -+ ret = rockchip_get_freq_info(dmcfreq); -+ if (ret < 0) { -+ dev_err(&pdev->dev, "cannot get frequency info\n"); -+ return ret; -+ } -+ dmcfreq->is_set_rate_direct = true; ++ return false; ++} + -+ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; ++static bool is_calc_need_round_up(struct skcipher_request *req) ++{ ++ struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); ++ struct rk_crypto_algt *algt = rk_cipher_get_algt(cipher); + -+ return 0; ++ return (algt->mode == CIPHER_MODE_CFB || ++ algt->mode == CIPHER_MODE_OFB || ++ algt->mode == CIPHER_MODE_CTR) ? true : false; +} + -+static __maybe_unused int rk3568_dmc_init(struct platform_device *pdev, -+ struct rockchip_dmcfreq *dmcfreq) ++static void rk_cipher_reset(struct rk_crypto_dev *rk_dev) +{ -+ struct arm_smccc_res res; + int ret; -+ int complt_irq; ++ u32 tmp = 0, tmp_mask = 0; ++ unsigned int pool_timeout_us = 1000; + -+ res = sip_smc_dram(0, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_GET_VERSION); -+ dev_notice(&pdev->dev, "current ATF version 0x%lx\n", res.a1); -+ if (res.a0 || res.a1 < 0x101) { -+ dev_err(&pdev->dev, "trusted firmware need update to V1.01 and above.\n"); -+ return -ENXIO; -+ } ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0x00); + -+ /* -+ * first 4KB is used for interface parameters -+ * after 4KB is dts parameters -+ * request share memory size 4KB * 2 -+ */ -+ res = sip_smc_request_share_mem(2, SHARE_PAGE_TYPE_DDR); -+ if (res.a0 != 0) { -+ dev_err(&pdev->dev, "no ATF memory for init\n"); -+ return -ENOMEM; -+ } -+ ddr_psci_param = (struct share_params *)res.a1; -+ /* Clear ddr_psci_param, size is 4KB * 2 */ -+ memset_io(ddr_psci_param, 0x0, 4096 * 2); ++ tmp = CRYPTO_SW_CC_RESET; ++ tmp_mask = tmp << CRYPTO_WRITE_MASK_SHIFT; + -+ /* start mcu with sip_smc_dram */ -+ wait_ctrl.dcf_en = 2; ++ CRYPTO_WRITE(rk_dev, CRYPTO_RST_CTL, tmp | tmp_mask); + -+ init_waitqueue_head(&wait_ctrl.wait_wq); -+ wait_ctrl.wait_en = 1; -+ wait_ctrl.wait_time_out_ms = 17 * 5; ++ /* This is usually done in 20 clock cycles */ ++ ret = read_poll_timeout_atomic(CRYPTO_READ, tmp, !tmp, 0, ++ pool_timeout_us, false, rk_dev, CRYPTO_RST_CTL); ++ if (ret) ++ dev_err(rk_dev->dev, "cipher reset pool timeout %ums.", ++ pool_timeout_us); + -+ complt_irq = platform_get_irq_byname(pdev, "complete"); -+ if (complt_irq < 0) { -+ dev_err(&pdev->dev, "no IRQ for complt_irq: %d\n", -+ complt_irq); -+ return complt_irq; -+ } -+ wait_ctrl.complt_irq = complt_irq; ++ CRYPTO_WRITE(rk_dev, CRYPTO_BC_CTL, 0xffff0000); + -+ ret = devm_request_irq(&pdev->dev, complt_irq, wait_dcf_complete_irq, -+ 0, dev_name(&pdev->dev), &wait_ctrl); -+ if (ret < 0) { -+ dev_err(&pdev->dev, "cannot request complt_irq\n"); -+ return ret; -+ } -+ disable_irq(complt_irq); ++ /* clear dma int status */ ++ tmp = CRYPTO_READ(rk_dev, CRYPTO_DMA_INT_ST); ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_ST, tmp); ++} + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_INIT); -+ if (res.a0) { -+ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", -+ res.a0); -+ return -ENOMEM; -+ } ++static void rk_crypto_complete(struct crypto_async_request *base, int err) ++{ ++ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(base->tfm); ++ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; ++ struct rk_hw_crypto_v2_info *hw_info = ctx->rk_dev->hw_info; ++ struct crypto_lli_desc *lli_desc = hw_info->hw_desc.lli_head; + -+ ret = rockchip_get_freq_info(dmcfreq); -+ if (ret < 0) { -+ dev_err(&pdev->dev, "cannot get frequency info\n"); -+ return ret; ++ CRYPTO_WRITE(ctx->rk_dev, CRYPTO_BC_CTL, 0xffff0000); ++ if (err) { ++ rk_cipher_reset(ctx->rk_dev); ++ pr_err("aligned = %u, align_size = %u\n", ++ alg_ctx->aligned, alg_ctx->align_size); ++ pr_err("total = %u, left = %u, count = %u\n", ++ alg_ctx->total, alg_ctx->left_bytes, alg_ctx->count); ++ pr_err("lli->src = %08x\n", lli_desc->src_addr); ++ pr_err("lli->src_len = %08x\n", lli_desc->src_len); ++ pr_err("lli->dst = %08x\n", lli_desc->dst_addr); ++ pr_err("lli->dst_len = %08x\n", lli_desc->dst_len); ++ pr_err("lli->dma_ctl = %08x\n", lli_desc->dma_ctrl); ++ pr_err("lli->usr_def = %08x\n", lli_desc->user_define); ++ pr_err("lli->next = %08x\n\n\n", lli_desc->next_addr); + } -+ dmcfreq->is_set_rate_direct = true; + -+ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; -+ -+ return 0; ++ if (base->complete) ++ base->complete(base, err); +} + -+static __maybe_unused int rk3588_dmc_init(struct platform_device *pdev, -+ struct rockchip_dmcfreq *dmcfreq) ++static int rk_cipher_crypt(struct skcipher_request *req, bool encrypt) +{ -+ struct arm_smccc_res res; -+ struct dev_pm_opp *opp; -+ unsigned long opp_rate; -+ int ret; -+ int complt_irq; ++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); ++ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct rk_crypto_algt *algt = rk_cipher_get_algt(tfm); + -+ res = sip_smc_dram(0, 0, ROCKCHIP_SIP_CONFIG_DRAM_GET_VERSION); -+ dev_notice(&pdev->dev, "current ATF version 0x%lx\n", res.a1); -+ if (res.a0) { -+ dev_err(&pdev->dev, "trusted firmware unsupported, please update.\n"); -+ return -ENXIO; -+ } ++ CRYPTO_TRACE("%s total = %u", ++ encrypt ? "encrypt" : "decrypt", req->cryptlen); + -+ /* -+ * first 4KB is used for interface parameters -+ * after 4KB is dts parameters -+ * request share memory size 4KB * 2 -+ */ -+ res = sip_smc_request_share_mem(2, SHARE_PAGE_TYPE_DDR); -+ if (res.a0 != 0) { -+ dev_err(&pdev->dev, "no ATF memory for init\n"); -+ return -ENOMEM; ++ if (!req->cryptlen) { ++ if (algt->mode == CIPHER_MODE_ECB || ++ algt->mode == CIPHER_MODE_CBC || ++ algt->mode == CIPHER_MODE_CTR || ++ algt->mode == CIPHER_MODE_CFB || ++ algt->mode == CIPHER_MODE_OFB) ++ return 0; ++ else ++ return -EINVAL; + } -+ ddr_psci_param = (struct share_params *)res.a1; -+ /* Clear ddr_psci_param, size is 4KB * 2 */ -+ memset_io(ddr_psci_param, 0x0, 4096 * 2); + -+ /* start mcu with sip_smc_dram */ -+ wait_ctrl.dcf_en = 2; ++ /* XTS data should >= chunksize */ ++ if (algt->mode == CIPHER_MODE_XTS) { ++ if (req->cryptlen < crypto_skcipher_chunksize(tfm)) ++ return -EINVAL; + -+ init_waitqueue_head(&wait_ctrl.wait_wq); -+ wait_ctrl.wait_en = 1; -+ wait_ctrl.wait_time_out_ms = 17 * 5; ++ /* force use unalign branch */ ++ ctx->algs_ctx.align_size = ctx->rk_dev->vir_max; + -+ complt_irq = platform_get_irq_byname(pdev, "complete"); -+ if (complt_irq < 0) { -+ dev_err(&pdev->dev, "no IRQ for complt_irq: %d\n", complt_irq); -+ return complt_irq; ++ /* XTS can't pause when use hardware crypto */ ++ if (req->cryptlen > ctx->rk_dev->vir_max) ++ return rk_cipher_fallback(req, ctx, encrypt); + } -+ wait_ctrl.complt_irq = complt_irq; + -+ ret = devm_request_irq(&pdev->dev, complt_irq, wait_dcf_complete_irq, -+ 0, dev_name(&pdev->dev), &wait_ctrl); -+ if (ret < 0) { -+ dev_err(&pdev->dev, "cannot request complt_irq\n"); -+ return ret; -+ } -+ disable_irq(complt_irq); ++ if (is_force_fallback(algt, ctx->keylen)) ++ return rk_cipher_fallback(req, ctx, encrypt); + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ROCKCHIP_SIP_CONFIG_DRAM_INIT); -+ if (res.a0) { -+ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", res.a0); -+ return -ENOMEM; -+ } ++ ctx->mode = cipher_algo2bc[algt->algo] | ++ cipher_mode2bc[algt->mode]; ++ if (!encrypt) ++ ctx->mode |= CRYPTO_BC_DECRYPT; + -+ ret = rockchip_dmcfreq_adjust_opp_table(dmcfreq); -+ if (ret < 0) { -+ dev_err(&pdev->dev, "cannot get frequency info\n"); -+ return ret; -+ } -+ dmcfreq->is_set_rate_direct = true; ++ if (algt->algo == CIPHER_ALGO_AES) { ++ uint32_t key_factor; + -+ /* Config the dmcfreq->sleep_volt for deepsleep */ -+ opp_rate = dmcfreq->freq_info_rate[dmcfreq->freq_count - 1]; -+ opp = devfreq_recommended_opp(&pdev->dev, &opp_rate, 0); -+ if (IS_ERR(opp)) { -+ dev_err(&pdev->dev, "Failed to find opp for %lu Hz\n", opp_rate); -+ return PTR_ERR(opp); ++ /* The key length of XTS is twice the normal length */ ++ key_factor = algt->mode == CIPHER_MODE_XTS ? 2 : 1; ++ ++ if (ctx->keylen == AES_KEYSIZE_128 * key_factor) ++ ctx->mode |= CRYPTO_BC_128_bit_key; ++ else if (ctx->keylen == AES_KEYSIZE_192 * key_factor) ++ ctx->mode |= CRYPTO_BC_192_bit_key; ++ else if (ctx->keylen == AES_KEYSIZE_256 * key_factor) ++ ctx->mode |= CRYPTO_BC_256_bit_key; + } -+ dmcfreq->sleep_volt = opp->supplies[0].u_volt; -+ if (dmcfreq->opp_info.regulator_count > 1) -+ dmcfreq->sleep_mem_volt = opp->supplies[1].u_volt; -+ dev_pm_opp_put(opp); + -+ if (of_property_read_u32(pdev->dev.of_node, "wait-mode", &ddr_psci_param->wait_mode)) -+ ddr_psci_param->wait_mode = 0; ++ ctx->iv_len = crypto_skcipher_ivsize(tfm); + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ROCKCHIP_SIP_CONFIG_DRAM_GET_STALL_TIME); -+ if (res.a0) -+ dev_err(dmcfreq->dev, "Current ATF unsupported get_stall_time\n"); -+ else -+ dmcfreq->info.stall_time_ns = (unsigned int)res.a1; ++ memset(ctx->iv, 0x00, sizeof(ctx->iv)); ++ memcpy(ctx->iv, req->iv, ctx->iv_len); + -+ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; ++ ctx->is_enc = encrypt; + -+ return 0; ++ CRYPTO_MSG("ctx->mode = %x\n", ctx->mode); ++ return rk_skcipher_handle_req(ctx->rk_dev, req); +} + -+static __maybe_unused int rv1126_dmc_init(struct platform_device *pdev, -+ struct rockchip_dmcfreq *dmcfreq) ++static int rk_cipher_encrypt(struct skcipher_request *req) +{ -+ struct arm_smccc_res res; -+ u32 size; -+ int ret; -+ int complt_irq; -+ struct device_node *node; ++ return rk_cipher_crypt(req, true); ++} + -+ res = sip_smc_dram(0, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_GET_VERSION); -+ dev_notice(&pdev->dev, "current ATF version 0x%lx\n", res.a1); -+ if (res.a0 || res.a1 < 0x100) { -+ dev_err(&pdev->dev, -+ "trusted firmware need to update or is invalid!\n"); -+ return -ENXIO; -+ } ++static int rk_cipher_decrypt(struct skcipher_request *req) ++{ ++ return rk_cipher_crypt(req, false); ++} + -+ /* -+ * first 4KB is used for interface parameters -+ * after 4KB * N is dts parameters -+ */ -+ size = sizeof(struct rk1808_ddr_dts_config_timing); -+ res = sip_smc_request_share_mem(DIV_ROUND_UP(size, 4096) + 1, -+ SHARE_PAGE_TYPE_DDR); -+ if (res.a0 != 0) { -+ dev_err(&pdev->dev, "no ATF memory for init\n"); -+ return -ENOMEM; -+ } -+ ddr_psci_param = (struct share_params *)res.a1; -+ of_get_rv1126_timings(&pdev->dev, pdev->dev.of_node, -+ (uint32_t *)ddr_psci_param); ++static int rk_ablk_hw_init(struct rk_crypto_dev *rk_dev, u32 algo, u32 mode) ++{ ++ struct rk_cipher_ctx *ctx = rk_cipher_ctx_cast(rk_dev); + -+ /* enable start dcf in kernel after dcf ready */ -+ node = of_parse_phandle(pdev->dev.of_node, "dcf", 0); -+ wait_ctrl.regmap_dcf = syscon_node_to_regmap(node); -+ if (IS_ERR(wait_ctrl.regmap_dcf)) -+ return PTR_ERR(wait_ctrl.regmap_dcf); -+ wait_ctrl.dcf_en = 1; ++ rk_cipher_reset(rk_dev); + -+ init_waitqueue_head(&wait_ctrl.wait_wq); -+ wait_ctrl.wait_en = 1; -+ wait_ctrl.wait_time_out_ms = 17 * 5; ++ CRYPTO_WRITE(rk_dev, CRYPTO_BC_CTL, 0x00010000); + -+ complt_irq = platform_get_irq_byname(pdev, "complete"); -+ if (complt_irq < 0) { -+ dev_err(&pdev->dev, "no IRQ for complt_irq: %d\n", -+ complt_irq); -+ return complt_irq; -+ } -+ wait_ctrl.complt_irq = complt_irq; ++ if (mode == CIPHER_MODE_XTS) { ++ uint32_t tmp_len = ctx->keylen / 2; + -+ ret = devm_request_irq(&pdev->dev, complt_irq, wait_dcf_complete_irq, -+ 0, dev_name(&pdev->dev), &wait_ctrl); -+ if (ret < 0) { -+ dev_err(&pdev->dev, "cannot request complt_irq\n"); -+ return ret; ++ write_key_reg(ctx->rk_dev, ctx->key, tmp_len); ++ write_tkey_reg(ctx->rk_dev, ctx->key + tmp_len, tmp_len); ++ } else { ++ write_key_reg(ctx->rk_dev, ctx->key, ctx->keylen); + } -+ disable_irq(complt_irq); + -+ if (of_property_read_u32(pdev->dev.of_node, "update_drv_odt_cfg", -+ &ddr_psci_param->update_drv_odt_cfg)) -+ ddr_psci_param->update_drv_odt_cfg = 0; ++ if (mode != CIPHER_MODE_ECB) ++ set_iv_reg(rk_dev, ctx->iv, ctx->iv_len); + -+ if (of_property_read_u32(pdev->dev.of_node, "update_deskew_cfg", -+ &ddr_psci_param->update_deskew_cfg)) -+ ddr_psci_param->update_deskew_cfg = 0; ++ ctx->mode |= CRYPTO_BC_ENABLE; + -+ dmcfreq->set_rate_params = ddr_psci_param; -+ rockchip_set_ddrclk_params(dmcfreq->set_rate_params); -+ rockchip_set_ddrclk_dmcfreq_wait_complete(rockchip_dmcfreq_wait_complete); ++ CRYPTO_WRITE(rk_dev, CRYPTO_FIFO_CTL, 0x00030003); + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_INIT); -+ if (res.a0) { -+ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", -+ res.a0); -+ return -ENOMEM; -+ } ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0x7f); + -+ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; ++ CRYPTO_WRITE(rk_dev, CRYPTO_BC_CTL, ctx->mode | CRYPTO_WRITE_MASK_ALL); + + return 0; +} + -+static const struct of_device_id rockchip_dmcfreq_of_match[] = { -+#if IS_ENABLED(CONFIG_CPU_PX30) -+ { .compatible = "rockchip,px30-dmc", .data = px30_dmc_init }, -+#endif -+#if IS_ENABLED(CONFIG_CPU_RK1808) -+ { .compatible = "rockchip,rk1808-dmc", .data = rk1808_dmc_init }, -+#endif -+#if IS_ENABLED(CONFIG_CPU_RK312X) -+ { .compatible = "rockchip,rk3128-dmc", .data = rk3128_dmc_init }, -+#endif -+#if IS_ENABLED(CONFIG_CPU_RK322X) -+ { .compatible = "rockchip,rk3228-dmc", .data = rk3228_dmc_init }, -+#endif -+#if IS_ENABLED(CONFIG_CPU_RK3288) -+ { .compatible = "rockchip,rk3288-dmc", .data = rk3288_dmc_init }, -+#endif -+#if IS_ENABLED(CONFIG_CPU_RK3308) -+ { .compatible = "rockchip,rk3308-dmc", .data = NULL }, -+#endif -+#if IS_ENABLED(CONFIG_CPU_RK3328) -+ { .compatible = "rockchip,rk3328-dmc", .data = rk3328_dmc_init }, -+#endif -+#if IS_ENABLED(CONFIG_CPU_RK3399) -+ { .compatible = "rockchip,rk3399-dmc", .data = rk3399_dmc_init }, -+#endif -+#if IS_ENABLED(CONFIG_CPU_RK3528) -+ { .compatible = "rockchip,rk3528-dmc", .data = rk3528_dmc_init }, -+#endif -+#if IS_ENABLED(CONFIG_CPU_RK3562) -+ { .compatible = "rockchip,rk3562-dmc", .data = rk3568_dmc_init }, -+#endif -+#if IS_ENABLED(CONFIG_CPU_RK3568) -+ { .compatible = "rockchip,rk3568-dmc", .data = rk3568_dmc_init }, -+#endif -+#if IS_ENABLED(CONFIG_CPU_RK3588) -+ { .compatible = "rockchip,rk3588-dmc", .data = rk3588_dmc_init }, -+#endif -+#if IS_ENABLED(CONFIG_CPU_RV1126) -+ { .compatible = "rockchip,rv1126-dmc", .data = rv1126_dmc_init }, -+#endif -+ { }, -+}; -+MODULE_DEVICE_TABLE(of, rockchip_dmcfreq_of_match); -+ -+static int rockchip_get_freq_map_talbe(struct device_node *np, char *porp_name, -+ struct freq_map_table **table) ++static int crypto_dma_start(struct rk_crypto_dev *rk_dev, uint32_t flag) +{ -+ struct freq_map_table *tbl; -+ const struct property *prop; -+ unsigned int temp_freq = 0; -+ int count, i; ++ struct rk_hw_crypto_v2_info *hw_info = ++ (struct rk_hw_crypto_v2_info *)rk_dev->hw_info; ++ struct skcipher_request *req = ++ skcipher_request_cast(rk_dev->async_req); ++ struct rk_alg_ctx *alg_ctx = rk_cipher_alg_ctx(rk_dev); ++ struct crypto_lli_desc *lli_head, *lli_tail, *lli_aad; ++ u32 calc_len = alg_ctx->count; ++ u32 start_flag = CRYPTO_DMA_START; ++ int ret; + -+ prop = of_find_property(np, porp_name, NULL); -+ if (!prop) -+ return -EINVAL; ++ if (alg_ctx->aligned) ++ ret = rk_crypto_hw_desc_init(&hw_info->hw_desc, ++ alg_ctx->sg_src, alg_ctx->sg_dst, alg_ctx->count); ++ else ++ ret = rk_crypto_hw_desc_init(&hw_info->hw_desc, ++ &alg_ctx->sg_tmp, &alg_ctx->sg_tmp, alg_ctx->count); ++ if (ret) ++ return ret; + -+ if (!prop->value) -+ return -ENODATA; ++ lli_head = hw_info->hw_desc.lli_head; ++ lli_tail = hw_info->hw_desc.lli_tail; ++ lli_aad = hw_info->hw_desc.lli_aad; + -+ count = of_property_count_u32_elems(np, porp_name); -+ if (count < 0) -+ return -EINVAL; ++ /* ++ * the data length is not aligned will use addr_vir to calculate, ++ * so crypto v2 could round up data length to chunk_size ++ */ ++ if (!alg_ctx->is_aead && is_calc_need_round_up(req)) ++ calc_len = round_up(calc_len, alg_ctx->chunk_size); + -+ if (count % 3) -+ return -EINVAL; ++ CRYPTO_TRACE("calc_len = %u, cryptlen = %u, assoclen= %u, is_aead = %d", ++ calc_len, alg_ctx->total, alg_ctx->assoclen, alg_ctx->is_aead); + -+ tbl = kzalloc(sizeof(*tbl) * (count / 3 + 1), GFP_KERNEL); -+ if (!tbl) -+ return -ENOMEM; ++ lli_head->user_define = LLI_USER_STRING_START | LLI_USER_CIPHER_START; + -+ for (i = 0; i < count / 3; i++) { -+ of_property_read_u32_index(np, porp_name, 3 * i, &tbl[i].min); -+ of_property_read_u32_index(np, porp_name, 3 * i + 1, -+ &tbl[i].max); -+ of_property_read_u32_index(np, porp_name, 3 * i + 2, -+ &temp_freq); -+ tbl[i].freq = temp_freq * 1000; ++ lli_tail->dma_ctrl = LLI_DMA_CTRL_DST_DONE | LLI_DMA_CTRL_LAST; ++ lli_tail->user_define |= LLI_USER_STRING_LAST; ++ lli_tail->src_len += (calc_len - alg_ctx->count); ++ lli_tail->dst_len += (calc_len - alg_ctx->count); ++ ++ if (alg_ctx->is_aead) { ++ lli_aad->src_addr = alg_ctx->addr_aad_in; ++ lli_aad->src_len = alg_ctx->assoclen; ++ lli_aad->user_define = LLI_USER_CIPHER_START | ++ LLI_USER_STRING_START | ++ LLI_USER_STRING_LAST | ++ LLI_USER_STRING_AAD; ++ lli_aad->next_addr = hw_info->hw_desc.lli_head_dma; ++ ++ /* clear cipher start */ ++ lli_head->user_define &= (~((u32)LLI_USER_CIPHER_START)); ++ ++ set_pc_len_reg(rk_dev, alg_ctx->total); ++ set_aad_len_reg(rk_dev, alg_ctx->assoclen); + } + -+ tbl[i].min = 0; -+ tbl[i].max = 0; -+ tbl[i].freq = DMCFREQ_TABLE_END; ++ rk_crypto_dump_hw_desc(&hw_info->hw_desc); + -+ *table = tbl; ++ dma_wmb(); ++ ++ if (alg_ctx->is_aead) ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_LLI_ADDR, hw_info->hw_desc.lli_aad_dma); ++ else ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_LLI_ADDR, hw_info->hw_desc.lli_head_dma); ++ ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_CTL, start_flag | (start_flag << WRITE_MASK)); + + return 0; +} + -+static int rockchip_get_rl_map_talbe(struct device_node *np, char *porp_name, -+ struct rl_map_table **table) ++static int rk_ablk_init_tfm(struct crypto_skcipher *tfm) +{ -+ struct rl_map_table *tbl; -+ const struct property *prop; -+ int count, i; ++ struct rk_crypto_algt *algt = rk_cipher_get_algt(tfm); ++ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ const char *alg_name = crypto_tfm_alg_name(crypto_skcipher_tfm(tfm)); ++ struct rk_crypto_dev *rk_dev = algt->rk_dev; ++ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; + -+ prop = of_find_property(np, porp_name, NULL); -+ if (!prop) -+ return -EINVAL; ++ CRYPTO_TRACE(); + -+ if (!prop->value) -+ return -ENODATA; ++ memset(ctx, 0x00, sizeof(*ctx)); + -+ count = of_property_count_u32_elems(np, porp_name); -+ if (count < 0) -+ return -EINVAL; ++ if (!rk_dev->request_crypto) ++ return -EFAULT; + -+ if (count % 2) -+ return -EINVAL; ++ rk_dev->request_crypto(rk_dev, alg_name); + -+ tbl = kzalloc(sizeof(*tbl) * (count / 2 + 1), GFP_KERNEL); -+ if (!tbl) -+ return -ENOMEM; ++ /* always not aligned for crypto v2 cipher */ ++ alg_ctx->align_size = 64; ++ alg_ctx->chunk_size = crypto_skcipher_chunksize(tfm); + -+ for (i = 0; i < count / 2; i++) { -+ of_property_read_u32_index(np, porp_name, 2 * i, &tbl[i].pn); -+ of_property_read_u32_index(np, porp_name, 2 * i + 1, -+ &tbl[i].rl); -+ } ++ alg_ctx->ops.start = rk_ablk_start; ++ alg_ctx->ops.update = rk_ablk_rx; ++ alg_ctx->ops.complete = rk_crypto_complete; ++ alg_ctx->ops.irq_handle = rk_crypto_irq_handle; + -+ tbl[i].pn = 0; -+ tbl[i].rl = DMCFREQ_TABLE_END; ++ alg_ctx->ops.hw_init = rk_ablk_hw_init; ++ alg_ctx->ops.hw_dma_start = crypto_dma_start; ++ alg_ctx->ops.hw_write_iv = set_iv_reg; + -+ *table = tbl; ++ ctx->rk_dev = rk_dev; ++ ++ if (algt->alg.crypto.base.cra_flags & CRYPTO_ALG_NEED_FALLBACK) { ++ CRYPTO_MSG("alloc fallback tfm, name = %s", alg_name); ++ ctx->fallback_tfm = crypto_alloc_skcipher(alg_name, 0, ++ CRYPTO_ALG_ASYNC | ++ CRYPTO_ALG_NEED_FALLBACK); ++ if (IS_ERR(ctx->fallback_tfm)) { ++ CRYPTO_MSG("Could not load fallback driver %s : %ld.\n", ++ alg_name, PTR_ERR(ctx->fallback_tfm)); ++ ctx->fallback_tfm = NULL; ++ } ++ } + + return 0; +} + -+static int rockchip_get_system_status_rate(struct device_node *np, -+ char *porp_name, -+ struct rockchip_dmcfreq *dmcfreq) ++static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm) +{ -+ const struct property *prop; -+ unsigned int status = 0, freq = 0; -+ unsigned long temp_rate = 0; -+ int count, i; ++ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ const char *alg_name = crypto_tfm_alg_name(crypto_skcipher_tfm(tfm)); + -+ prop = of_find_property(np, porp_name, NULL); -+ if (!prop) -+ return -ENODEV; ++ CRYPTO_TRACE(); + -+ if (!prop->value) -+ return -ENODATA; ++ if (ctx->fallback_tfm) { ++ CRYPTO_MSG("free fallback tfm"); ++ crypto_free_skcipher(ctx->fallback_tfm); ++ } + -+ count = of_property_count_u32_elems(np, porp_name); -+ if (count < 0) -+ return -EINVAL; ++ ctx->rk_dev->release_crypto(ctx->rk_dev, alg_name); ++} + -+ if (count % 2) -+ return -EINVAL; ++static int rk_aead_init_tfm(struct crypto_aead *tfm) ++{ ++ struct aead_alg *alg = crypto_aead_alg(tfm); ++ struct rk_crypto_algt *algt = ++ container_of(alg, struct rk_crypto_algt, alg.aead); ++ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(&tfm->base); ++ const char *alg_name = crypto_tfm_alg_name(&tfm->base); ++ struct rk_crypto_dev *rk_dev = algt->rk_dev; ++ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; + -+ for (i = 0; i < count / 2; i++) { -+ of_property_read_u32_index(np, porp_name, 2 * i, -+ &status); -+ of_property_read_u32_index(np, porp_name, 2 * i + 1, -+ &freq); -+ switch (status) { -+ case SYS_STATUS_NORMAL: -+ dmcfreq->normal_rate = freq * 1000; -+ break; -+ case SYS_STATUS_SUSPEND: -+ dmcfreq->suspend_rate = freq * 1000; -+ break; -+ case SYS_STATUS_DEEP_SUSPEND: -+ dmcfreq->deep_suspend_rate = freq * 1000; -+ break; -+ case SYS_STATUS_VIDEO_1080P: -+ dmcfreq->video_1080p_rate = freq * 1000; -+ break; -+ case SYS_STATUS_VIDEO_4K: -+ dmcfreq->video_4k_rate = freq * 1000; -+ break; -+ case SYS_STATUS_VIDEO_4K_10B: -+ dmcfreq->video_4k_10b_rate = freq * 1000; -+ break; -+ case SYS_STATUS_VIDEO_SVEP: -+ dmcfreq->video_svep_rate = freq * 1000; -+ break; -+ case SYS_STATUS_PERFORMANCE: -+ dmcfreq->performance_rate = freq * 1000; -+ break; -+ case SYS_STATUS_HDMI: -+ dmcfreq->hdmi_rate = freq * 1000; -+ break; -+ case SYS_STATUS_HDMIRX: -+ dmcfreq->hdmirx_rate = freq * 1000; -+ break; -+ case SYS_STATUS_IDLE: -+ dmcfreq->idle_rate = freq * 1000; -+ break; -+ case SYS_STATUS_REBOOT: -+ dmcfreq->reboot_rate = freq * 1000; -+ break; -+ case SYS_STATUS_BOOST: -+ dmcfreq->boost_rate = freq * 1000; -+ break; -+ case SYS_STATUS_ISP: -+ case SYS_STATUS_CIF0: -+ case SYS_STATUS_CIF1: -+ case SYS_STATUS_DUALVIEW: -+ temp_rate = freq * 1000; -+ if (dmcfreq->fixed_rate < temp_rate) -+ dmcfreq->fixed_rate = temp_rate; -+ break; -+ case SYS_STATUS_LOW_POWER: -+ dmcfreq->low_power_rate = freq * 1000; -+ break; -+ default: -+ break; ++ CRYPTO_TRACE(); ++ ++ if (!rk_dev->request_crypto) ++ return -EFAULT; ++ ++ rk_dev->request_crypto(rk_dev, alg_name); ++ ++ alg_ctx->align_size = 64; ++ alg_ctx->chunk_size = crypto_aead_chunksize(tfm); ++ ++ alg_ctx->ops.start = rk_aead_start; ++ alg_ctx->ops.update = rk_ablk_rx; ++ alg_ctx->ops.complete = rk_crypto_complete; ++ alg_ctx->ops.irq_handle = rk_crypto_irq_handle; ++ ++ alg_ctx->ops.hw_init = rk_ablk_hw_init; ++ alg_ctx->ops.hw_dma_start = crypto_dma_start; ++ alg_ctx->ops.hw_write_iv = set_iv_reg; ++ alg_ctx->ops.hw_get_result = get_tag_reg; ++ ++ ctx->rk_dev = rk_dev; ++ alg_ctx->is_aead = 1; ++ ++ if (algt->alg.crypto.base.cra_flags & CRYPTO_ALG_NEED_FALLBACK) { ++ CRYPTO_MSG("alloc fallback tfm, name = %s", alg_name); ++ ctx->fallback_aead = ++ crypto_alloc_aead(alg_name, 0, ++ CRYPTO_ALG_ASYNC | ++ CRYPTO_ALG_NEED_FALLBACK); ++ if (IS_ERR(ctx->fallback_aead)) { ++ dev_err(rk_dev->dev, ++ "Load fallback driver %s err: %ld.\n", ++ alg_name, PTR_ERR(ctx->fallback_aead)); ++ ctx->fallback_aead = NULL; ++ crypto_aead_set_reqsize(tfm, sizeof(struct aead_request)); ++ } else { ++ crypto_aead_set_reqsize(tfm, sizeof(struct aead_request) + ++ crypto_aead_reqsize(ctx->fallback_aead)); + } + } + + return 0; +} + -+static unsigned long rockchip_freq_level_2_rate(struct rockchip_dmcfreq *dmcfreq, -+ unsigned int level) ++static void rk_aead_exit_tfm(struct crypto_aead *tfm) +{ -+ unsigned long rate = 0; ++ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(&tfm->base); + -+ switch (level) { -+ case DMC_FREQ_LEVEL_LOW: -+ rate = dmcfreq->rate_low; -+ break; -+ case DMC_FREQ_LEVEL_MID_LOW: -+ rate = dmcfreq->rate_mid_low; -+ break; -+ case DMC_FREQ_LEVEL_MID_HIGH: -+ rate = dmcfreq->rate_mid_high; -+ break; -+ case DMC_FREQ_LEVEL_HIGH: -+ rate = dmcfreq->rate_high; -+ break; -+ default: -+ break; ++ CRYPTO_TRACE(); ++ ++ if (ctx->fallback_aead) { ++ CRYPTO_MSG("free fallback tfm"); ++ crypto_free_aead(ctx->fallback_aead); + } + -+ return rate; ++ ctx->rk_dev->release_crypto(ctx->rk_dev, crypto_tfm_alg_name(&tfm->base)); +} + -+static int rockchip_get_system_status_level(struct device_node *np, -+ char *porp_name, -+ struct rockchip_dmcfreq *dmcfreq) ++static int rk_aead_crypt(struct aead_request *req, bool encrypt) +{ -+ const struct property *prop; -+ unsigned int status = 0, level = 0; -+ unsigned long temp_rate = 0; -+ int count, i; -+ -+ prop = of_find_property(np, porp_name, NULL); -+ if (!prop) -+ return -ENODEV; ++ struct crypto_aead *tfm = crypto_aead_reqtfm(req); ++ struct rk_cipher_ctx *ctx = crypto_aead_ctx(tfm); ++ struct rk_crypto_algt *algt = rk_aead_get_algt(tfm); ++ struct scatterlist *sg_src, *sg_dst; ++ struct scatterlist src[2], dst[2]; ++ u64 data_len; ++ bool aligned; ++ int ret = -EINVAL; + -+ if (!prop->value) -+ return -ENODATA; ++ CRYPTO_TRACE("%s cryptlen = %u, assoclen = %u", ++ encrypt ? "encrypt" : "decrypt", ++ req->cryptlen, req->assoclen); + -+ count = of_property_count_u32_elems(np, porp_name); -+ if (count < 0) -+ return -EINVAL; ++ data_len = encrypt ? req->cryptlen : (req->cryptlen - crypto_aead_authsize(tfm)); + -+ if (count % 2) -+ return -EINVAL; ++ if (req->assoclen == 0 || ++ req->cryptlen == 0 || ++ data_len == 0 || ++ is_force_fallback(algt, ctx->keylen)) ++ return rk_aead_fallback(req, ctx, encrypt); + -+ if (dmcfreq->freq_count == 1) { -+ dmcfreq->rate_low = dmcfreq->freq_info_rate[0]; -+ dmcfreq->rate_mid_low = dmcfreq->freq_info_rate[0]; -+ dmcfreq->rate_mid_high = dmcfreq->freq_info_rate[0]; -+ dmcfreq->rate_high = dmcfreq->freq_info_rate[0]; -+ } else if (dmcfreq->freq_count == 2) { -+ dmcfreq->rate_low = dmcfreq->freq_info_rate[0]; -+ dmcfreq->rate_mid_low = dmcfreq->freq_info_rate[0]; -+ dmcfreq->rate_mid_high = dmcfreq->freq_info_rate[1]; -+ dmcfreq->rate_high = dmcfreq->freq_info_rate[1]; -+ } else if (dmcfreq->freq_count == 3) { -+ dmcfreq->rate_low = dmcfreq->freq_info_rate[0]; -+ dmcfreq->rate_mid_low = dmcfreq->freq_info_rate[1]; -+ dmcfreq->rate_mid_high = dmcfreq->freq_info_rate[1]; -+ dmcfreq->rate_high = dmcfreq->freq_info_rate[2]; -+ } else if (dmcfreq->freq_count == 4) { -+ dmcfreq->rate_low = dmcfreq->freq_info_rate[0]; -+ dmcfreq->rate_mid_low = dmcfreq->freq_info_rate[1]; -+ dmcfreq->rate_mid_high = dmcfreq->freq_info_rate[2]; -+ dmcfreq->rate_high = dmcfreq->freq_info_rate[3]; -+ } else if (dmcfreq->freq_count == 5 || dmcfreq->freq_count == 6) { -+ dmcfreq->rate_low = dmcfreq->freq_info_rate[0]; -+ dmcfreq->rate_mid_low = dmcfreq->freq_info_rate[1]; -+ dmcfreq->rate_mid_high = dmcfreq->freq_info_rate[dmcfreq->freq_count - 2]; -+ dmcfreq->rate_high = dmcfreq->freq_info_rate[dmcfreq->freq_count - 1]; -+ } else { -+ return -EINVAL; ++ /* point sg_src and sg_dst skip assoc data */ ++ sg_src = scatterwalk_ffwd(src, req->src, req->assoclen); ++ sg_dst = (req->src == req->dst) ? sg_src : scatterwalk_ffwd(dst, req->dst, req->assoclen); ++ ++ aligned = rk_crypto_check_align(sg_src, sg_nents_for_len(sg_src, data_len), ++ sg_dst, sg_nents_for_len(sg_dst, data_len), ++ 64); ++ ++ if (sg_nents_for_len(sg_src, data_len) > RK_DEFAULT_LLI_CNT || ++ sg_nents_for_len(sg_dst, data_len) > RK_DEFAULT_LLI_CNT) ++ return rk_aead_fallback(req, ctx, encrypt); ++ ++ if (!aligned) { ++ if (req->assoclen > ctx->rk_dev->aad_max || ++ data_len > ctx->rk_dev->vir_max) ++ return rk_aead_fallback(req, ctx, encrypt); + } + -+ dmcfreq->auto_min_rate = dmcfreq->rate_low; ++ ctx->mode = cipher_algo2bc[algt->algo] | ++ cipher_mode2bc[algt->mode]; ++ if (!encrypt) ++ ctx->mode |= CRYPTO_BC_DECRYPT; + -+ for (i = 0; i < count / 2; i++) { -+ of_property_read_u32_index(np, porp_name, 2 * i, -+ &status); -+ of_property_read_u32_index(np, porp_name, 2 * i + 1, -+ &level); -+ switch (status) { -+ case SYS_STATUS_NORMAL: -+ dmcfreq->normal_rate = rockchip_freq_level_2_rate(dmcfreq, level); -+ dev_info(dmcfreq->dev, "normal_rate = %ld\n", dmcfreq->normal_rate); -+ break; -+ case SYS_STATUS_SUSPEND: -+ dmcfreq->suspend_rate = rockchip_freq_level_2_rate(dmcfreq, level); -+ dev_info(dmcfreq->dev, "suspend_rate = %ld\n", dmcfreq->suspend_rate); -+ break; -+ case SYS_STATUS_DEEP_SUSPEND: -+ dmcfreq->deep_suspend_rate = rockchip_freq_level_2_rate(dmcfreq, level); -+ dev_info(dmcfreq->dev, "deep_suspend_rate = %ld\n", -+ dmcfreq->deep_suspend_rate); -+ break; -+ case SYS_STATUS_VIDEO_1080P: -+ dmcfreq->video_1080p_rate = rockchip_freq_level_2_rate(dmcfreq, level); -+ dev_info(dmcfreq->dev, "video_1080p_rate = %ld\n", -+ dmcfreq->video_1080p_rate); -+ break; -+ case SYS_STATUS_VIDEO_4K: -+ dmcfreq->video_4k_rate = rockchip_freq_level_2_rate(dmcfreq, level); -+ dev_info(dmcfreq->dev, "video_4k_rate = %ld\n", dmcfreq->video_4k_rate); -+ break; -+ case SYS_STATUS_VIDEO_4K_10B: -+ dmcfreq->video_4k_10b_rate = rockchip_freq_level_2_rate(dmcfreq, level); -+ dev_info(dmcfreq->dev, "video_4k_10b_rate = %ld\n", -+ dmcfreq->video_4k_10b_rate); -+ break; -+ case SYS_STATUS_VIDEO_4K_60P: -+ dmcfreq->video_4k_60p_rate = rockchip_freq_level_2_rate(dmcfreq, level); -+ dev_info(dmcfreq->dev, "video_4k_60p_rate = %ld\n", -+ dmcfreq->video_4k_60p_rate); -+ break; -+ case SYS_STATUS_VIDEO_SVEP: -+ dmcfreq->video_svep_rate = rockchip_freq_level_2_rate(dmcfreq, level); -+ dev_info(dmcfreq->dev, "video_svep_rate = %ld\n", -+ dmcfreq->video_svep_rate); -+ break; -+ case SYS_STATUS_PERFORMANCE: -+ dmcfreq->performance_rate = rockchip_freq_level_2_rate(dmcfreq, level); -+ dev_info(dmcfreq->dev, "performance_rate = %ld\n", -+ dmcfreq->performance_rate); -+ break; -+ case SYS_STATUS_HDMI: -+ dmcfreq->hdmi_rate = rockchip_freq_level_2_rate(dmcfreq, level); -+ dev_info(dmcfreq->dev, "hdmi_rate = %ld\n", dmcfreq->hdmi_rate); -+ break; -+ case SYS_STATUS_HDMIRX: -+ dmcfreq->hdmirx_rate = rockchip_freq_level_2_rate(dmcfreq, level); -+ dev_info(dmcfreq->dev, "hdmirx_rate = %ld\n", dmcfreq->hdmirx_rate); -+ break; -+ case SYS_STATUS_IDLE: -+ dmcfreq->idle_rate = rockchip_freq_level_2_rate(dmcfreq, level); -+ dev_info(dmcfreq->dev, "idle_rate = %ld\n", dmcfreq->idle_rate); -+ break; -+ case SYS_STATUS_REBOOT: -+ dmcfreq->reboot_rate = rockchip_freq_level_2_rate(dmcfreq, level); -+ dev_info(dmcfreq->dev, "reboot_rate = %ld\n", dmcfreq->reboot_rate); -+ break; -+ case SYS_STATUS_BOOST: -+ dmcfreq->boost_rate = rockchip_freq_level_2_rate(dmcfreq, level); -+ dev_info(dmcfreq->dev, "boost_rate = %ld\n", dmcfreq->boost_rate); -+ break; -+ case SYS_STATUS_ISP: -+ case SYS_STATUS_CIF0: -+ case SYS_STATUS_CIF1: -+ case SYS_STATUS_DUALVIEW: -+ temp_rate = rockchip_freq_level_2_rate(dmcfreq, level); -+ if (dmcfreq->fixed_rate < temp_rate) { -+ dmcfreq->fixed_rate = temp_rate; -+ dev_info(dmcfreq->dev, -+ "fixed_rate(isp|cif0|cif1|dualview) = %ld\n", -+ dmcfreq->fixed_rate); -+ } -+ break; -+ case SYS_STATUS_LOW_POWER: -+ dmcfreq->low_power_rate = rockchip_freq_level_2_rate(dmcfreq, level); -+ dev_info(dmcfreq->dev, "low_power_rate = %ld\n", dmcfreq->low_power_rate); -+ break; -+ default: -+ break; -+ } ++ if (algt->algo == CIPHER_ALGO_AES) { ++ if (ctx->keylen == AES_KEYSIZE_128) ++ ctx->mode |= CRYPTO_BC_128_bit_key; ++ else if (ctx->keylen == AES_KEYSIZE_192) ++ ctx->mode |= CRYPTO_BC_192_bit_key; ++ else if (ctx->keylen == AES_KEYSIZE_256) ++ ctx->mode |= CRYPTO_BC_256_bit_key; + } + -+ return 0; ++ ctx->iv_len = crypto_aead_ivsize(tfm); ++ ++ memset(ctx->iv, 0x00, sizeof(ctx->iv)); ++ memcpy(ctx->iv, req->iv, ctx->iv_len); ++ ++ ctx->is_enc = encrypt; ++ ++ CRYPTO_MSG("ctx->mode = %x\n", ctx->mode); ++ ret = rk_aead_handle_req(ctx->rk_dev, req); ++ ++ return ret; +} + -+static void rockchip_dmcfreq_update_target(struct rockchip_dmcfreq *dmcfreq) ++static int rk_aead_encrypt(struct aead_request *req) +{ -+ struct devfreq *devfreq = dmcfreq->info.devfreq; -+ -+ mutex_lock(&devfreq->lock); -+ update_devfreq(devfreq); -+ mutex_unlock(&devfreq->lock); ++ return rk_aead_crypt(req, true); +} + -+static int rockchip_dmcfreq_system_status_notifier(struct notifier_block *nb, -+ unsigned long status, -+ void *ptr) ++static int rk_aead_decrypt(struct aead_request *req) +{ -+ struct rockchip_dmcfreq *dmcfreq = system_status_to_dmcfreq(nb); -+ unsigned long target_rate = 0; -+ unsigned int refresh = false; -+ bool is_fixed = false; ++ return rk_aead_crypt(req, false); ++} + -+ if (dmcfreq->fixed_rate && (is_dualview(status) || is_isp(status))) { -+ if (dmcfreq->is_fixed) -+ return NOTIFY_OK; -+ is_fixed = true; -+ target_rate = dmcfreq->fixed_rate; -+ goto next; -+ } ++struct rk_crypto_algt rk_v2_ecb_sm4_alg = ++ RK_CIPHER_ALGO_INIT(SM4, ECB, ecb(sm4), ecb-sm4-rk); + -+ if (dmcfreq->reboot_rate && (status & SYS_STATUS_REBOOT)) { -+ if (dmcfreq->info.auto_freq_en) -+ devfreq_monitor_stop(dmcfreq->info.devfreq); -+ target_rate = dmcfreq->reboot_rate; -+ goto next; -+ } ++struct rk_crypto_algt rk_v2_cbc_sm4_alg = ++ RK_CIPHER_ALGO_INIT(SM4, CBC, cbc(sm4), cbc-sm4-rk); + -+ if (dmcfreq->suspend_rate && (status & SYS_STATUS_SUSPEND)) { -+ target_rate = dmcfreq->suspend_rate; -+ refresh = true; -+ goto next; -+ } ++struct rk_crypto_algt rk_v2_xts_sm4_alg = ++ RK_CIPHER_ALGO_XTS_INIT(SM4, xts(sm4), xts-sm4-rk); + -+ if (dmcfreq->low_power_rate && (status & SYS_STATUS_LOW_POWER)) { -+ target_rate = dmcfreq->low_power_rate; -+ goto next; -+ } ++struct rk_crypto_algt rk_v2_cfb_sm4_alg = ++ RK_CIPHER_ALGO_INIT(SM4, CFB, cfb(sm4), cfb-sm4-rk); + -+ if (dmcfreq->performance_rate && (status & SYS_STATUS_PERFORMANCE)) { -+ if (dmcfreq->performance_rate > target_rate) -+ target_rate = dmcfreq->performance_rate; -+ } ++struct rk_crypto_algt rk_v2_ofb_sm4_alg = ++ RK_CIPHER_ALGO_INIT(SM4, OFB, ofb(sm4), ofb-sm4-rk); + -+ if (dmcfreq->hdmi_rate && (status & SYS_STATUS_HDMI)) { -+ if (dmcfreq->hdmi_rate > target_rate) -+ target_rate = dmcfreq->hdmi_rate; -+ } ++struct rk_crypto_algt rk_v2_ctr_sm4_alg = ++ RK_CIPHER_ALGO_INIT(SM4, CTR, ctr(sm4), ctr-sm4-rk); + -+ if (dmcfreq->hdmirx_rate && (status & SYS_STATUS_HDMIRX)) { -+ if (dmcfreq->hdmirx_rate > target_rate) -+ target_rate = dmcfreq->hdmirx_rate; -+ } ++struct rk_crypto_algt rk_v2_gcm_sm4_alg = ++ RK_AEAD_ALGO_INIT(SM4, GCM, gcm(sm4), gcm-sm4-rk); + -+ if (dmcfreq->video_4k_rate && (status & SYS_STATUS_VIDEO_4K)) { -+ if (dmcfreq->video_4k_rate > target_rate) -+ target_rate = dmcfreq->video_4k_rate; -+ } ++struct rk_crypto_algt rk_v2_ecb_aes_alg = ++ RK_CIPHER_ALGO_INIT(AES, ECB, ecb(aes), ecb-aes-rk); + -+ if (dmcfreq->video_4k_10b_rate && (status & SYS_STATUS_VIDEO_4K_10B)) { -+ if (dmcfreq->video_4k_10b_rate > target_rate) -+ target_rate = dmcfreq->video_4k_10b_rate; -+ } ++struct rk_crypto_algt rk_v2_cbc_aes_alg = ++ RK_CIPHER_ALGO_INIT(AES, CBC, cbc(aes), cbc-aes-rk); + -+ if (dmcfreq->video_4k_60p_rate && (status & SYS_STATUS_VIDEO_4K_60P)) { -+ if (dmcfreq->video_4k_60p_rate > target_rate) -+ target_rate = dmcfreq->video_4k_60p_rate; -+ } ++struct rk_crypto_algt rk_v2_xts_aes_alg = ++ RK_CIPHER_ALGO_XTS_INIT(AES, xts(aes), xts-aes-rk); + -+ if (dmcfreq->video_1080p_rate && (status & SYS_STATUS_VIDEO_1080P)) { -+ if (dmcfreq->video_1080p_rate > target_rate) -+ target_rate = dmcfreq->video_1080p_rate; -+ } ++struct rk_crypto_algt rk_v2_cfb_aes_alg = ++ RK_CIPHER_ALGO_INIT(AES, CFB, cfb(aes), cfb-aes-rk); + -+ if (dmcfreq->video_svep_rate && (status & SYS_STATUS_VIDEO_SVEP)) { -+ if (dmcfreq->video_svep_rate > target_rate) -+ target_rate = dmcfreq->video_svep_rate; -+ } ++struct rk_crypto_algt rk_v2_ofb_aes_alg = ++ RK_CIPHER_ALGO_INIT(AES, OFB, ofb(aes), ofb-aes-rk); + -+next: ++struct rk_crypto_algt rk_v2_ctr_aes_alg = ++ RK_CIPHER_ALGO_INIT(AES, CTR, ctr(aes), ctr-aes-rk); + -+ dev_dbg(dmcfreq->dev, "status=0x%x\n", (unsigned int)status); -+ dmcfreq->is_fixed = is_fixed; -+ dmcfreq->status_rate = target_rate; -+ if (dmcfreq->refresh != refresh) { -+ if (dmcfreq->set_auto_self_refresh) -+ dmcfreq->set_auto_self_refresh(refresh); -+ dmcfreq->refresh = refresh; -+ } -+ rockchip_dmcfreq_update_target(dmcfreq); ++struct rk_crypto_algt rk_v2_gcm_aes_alg = ++ RK_AEAD_ALGO_INIT(AES, GCM, gcm(aes), gcm-aes-rk); + -+ return NOTIFY_OK; -+} ++struct rk_crypto_algt rk_v2_ecb_des_alg = ++ RK_CIPHER_ALGO_INIT(DES, ECB, ecb(des), ecb-des-rk); + -+static int rockchip_dmcfreq_panic_notifier(struct notifier_block *nb, -+ unsigned long v, void *p) -+{ -+ struct rockchip_dmcfreq *dmcfreq = -+ container_of(nb, struct rockchip_dmcfreq, panic_nb); -+ struct device *dev = dmcfreq->dev; ++struct rk_crypto_algt rk_v2_cbc_des_alg = ++ RK_CIPHER_ALGO_INIT(DES, CBC, cbc(des), cbc-des-rk); + -+ if (dmcfreq->opp_info.regulator_count == 1) -+ dev_info(dev, "cur_freq: %lu Hz, volt: %lu uV\n", -+ dmcfreq->rate, dmcfreq->volt); -+ else -+ dev_info(dev, "cur_freq: %lu Hz, volt_vdd: %lu uV, volt_mem: %lu uV\n", -+ dmcfreq->rate, dmcfreq->volt, dmcfreq->mem_volt); ++struct rk_crypto_algt rk_v2_cfb_des_alg = ++ RK_CIPHER_ALGO_INIT(DES, CFB, cfb(des), cfb-des-rk); + -+ return 0; -+} ++struct rk_crypto_algt rk_v2_ofb_des_alg = ++ RK_CIPHER_ALGO_INIT(DES, OFB, ofb(des), ofb-des-rk); + -+static ssize_t rockchip_dmcfreq_status_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ unsigned int status = rockchip_get_system_status(); ++struct rk_crypto_algt rk_v2_ecb_des3_ede_alg = ++ RK_CIPHER_ALGO_INIT(DES3_EDE, ECB, ecb(des3_ede), ecb-des3_ede-rk); + -+ return sprintf(buf, "0x%x\n", status); -+} ++struct rk_crypto_algt rk_v2_cbc_des3_ede_alg = ++ RK_CIPHER_ALGO_INIT(DES3_EDE, CBC, cbc(des3_ede), cbc-des3_ede-rk); + -+static ssize_t rockchip_dmcfreq_status_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, -+ size_t count) -+{ -+ if (!count) -+ return -EINVAL; ++struct rk_crypto_algt rk_v2_cfb_des3_ede_alg = ++ RK_CIPHER_ALGO_INIT(DES3_EDE, CFB, cfb(des3_ede), cfb-des3_ede-rk); + -+ rockchip_update_system_status(buf); ++struct rk_crypto_algt rk_v2_ofb_des3_ede_alg = ++ RK_CIPHER_ALGO_INIT(DES3_EDE, OFB, ofb(des3_ede), ofb-des3_ede-rk); + -+ return count; -+} +diff --git a/drivers/crypto/rockchip/rk_crypto_v3.c b/drivers/crypto/rockchip/rk_crypto_v3.c +new file mode 100644 +index 000000000..7cd728599 +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_v3.c +@@ -0,0 +1,217 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Crypto acceleration support for Rockchip Crypto V3 ++ * ++ * Copyright (c) 2022, Rockchip Electronics Co., Ltd ++ * ++ * Author: Lin Jinhan ++ * ++ */ + -+static DEVICE_ATTR(system_status, 0644, rockchip_dmcfreq_status_show, -+ rockchip_dmcfreq_status_store); ++#include "rk_crypto_core.h" ++#include "rk_crypto_v3.h" ++#include "rk_crypto_v3_reg.h" ++#include "rk_crypto_utils.h" + -+static ssize_t upthreshold_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev->parent); -+ struct rockchip_dmcfreq_ondemand_data *data = &dmcfreq->ondemand_data; ++static const u32 cipher_mode2bit_mask[] = { ++ [CIPHER_MODE_ECB] = CRYPTO_ECB_FLAG, ++ [CIPHER_MODE_CBC] = CRYPTO_CBC_FLAG, ++ [CIPHER_MODE_CFB] = CRYPTO_CFB_FLAG, ++ [CIPHER_MODE_OFB] = CRYPTO_OFB_FLAG, ++ [CIPHER_MODE_CTR] = CRYPTO_CTR_FLAG, ++ [CIPHER_MODE_XTS] = CRYPTO_XTS_FLAG, ++ [CIPHER_MODE_CTS] = CRYPTO_CTS_FLAG, ++ [CIPHER_MODE_CCM] = CRYPTO_CCM_FLAG, ++ [CIPHER_MODE_GCM] = CRYPTO_GCM_FLAG, ++ [CIPHER_MODE_CMAC] = CRYPTO_CMAC_FLAG, ++ [CIPHER_MODE_CBCMAC] = CRYPTO_CBCMAC_FLAG, ++}; + -+ return sprintf(buf, "%d\n", data->upthreshold); -+} ++static const u32 hash_algo2bit_mask[] = { ++ [HASH_ALGO_SHA1] = CRYPTO_HASH_SHA1_FLAG, ++ [HASH_ALGO_SHA224] = CRYPTO_HASH_SHA224_FLAG, ++ [HASH_ALGO_SHA256] = CRYPTO_HASH_SHA256_FLAG, ++ [HASH_ALGO_SHA384] = CRYPTO_HASH_SHA384_FLAG, ++ [HASH_ALGO_SHA512] = CRYPTO_HASH_SHA512_FLAG, ++ [HASH_ALGO_SHA512_224] = CRYPTO_HASH_SHA512_224_FLAG, ++ [HASH_ALGO_SHA512_256] = CRYPTO_HASH_SHA512_256_FLAG, ++ [HASH_ALGO_MD5] = CRYPTO_HASH_MD5_FLAG, ++ [HASH_ALGO_SM3] = CRYPTO_HASH_SM3_FLAG, ++}; + -+static ssize_t upthreshold_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, -+ size_t count) ++static const u32 hmac_algo2bit_mask[] = { ++ [HASH_ALGO_SHA1] = CRYPTO_HMAC_SHA1_FLAG, ++ [HASH_ALGO_SHA256] = CRYPTO_HMAC_SHA256_FLAG, ++ [HASH_ALGO_SHA512] = CRYPTO_HMAC_SHA512_FLAG, ++ [HASH_ALGO_MD5] = CRYPTO_HMAC_MD5_FLAG, ++ [HASH_ALGO_SM3] = CRYPTO_HMAC_SM3_FLAG, ++}; ++ ++static const char * const crypto_v3_rsts[] = { ++ "crypto-rst", ++}; ++ ++static struct rk_crypto_algt *crypto_v3_algs[] = { ++ &rk_v3_ecb_sm4_alg, /* ecb(sm4) */ ++ &rk_v3_cbc_sm4_alg, /* cbc(sm4) */ ++ &rk_v3_xts_sm4_alg, /* xts(sm4) */ ++ &rk_v3_cfb_sm4_alg, /* cfb(sm4) */ ++ &rk_v3_ofb_sm4_alg, /* ofb(sm4) */ ++ &rk_v3_ctr_sm4_alg, /* ctr(sm4) */ ++ &rk_v3_gcm_sm4_alg, /* ctr(sm4) */ ++ ++ &rk_v3_ecb_aes_alg, /* ecb(aes) */ ++ &rk_v3_cbc_aes_alg, /* cbc(aes) */ ++ &rk_v3_xts_aes_alg, /* xts(aes) */ ++ &rk_v3_cfb_aes_alg, /* cfb(aes) */ ++ &rk_v3_ofb_aes_alg, /* ofb(aes) */ ++ &rk_v3_ctr_aes_alg, /* ctr(aes) */ ++ &rk_v3_gcm_aes_alg, /* gcm(aes) */ ++ ++ &rk_v3_ecb_des_alg, /* ecb(des) */ ++ &rk_v3_cbc_des_alg, /* cbc(des) */ ++ &rk_v3_cfb_des_alg, /* cfb(des) */ ++ &rk_v3_ofb_des_alg, /* ofb(des) */ ++ ++ &rk_v3_ecb_des3_ede_alg, /* ecb(des3_ede) */ ++ &rk_v3_cbc_des3_ede_alg, /* cbc(des3_ede) */ ++ &rk_v3_cfb_des3_ede_alg, /* cfb(des3_ede) */ ++ &rk_v3_ofb_des3_ede_alg, /* ofb(des3_ede) */ ++ ++ &rk_v3_ahash_sha1, /* sha1 */ ++ &rk_v3_ahash_sha224, /* sha224 */ ++ &rk_v3_ahash_sha256, /* sha256 */ ++ &rk_v3_ahash_sha384, /* sha384 */ ++ &rk_v3_ahash_sha512, /* sha512 */ ++ &rk_v3_ahash_md5, /* md5 */ ++ &rk_v3_ahash_sm3, /* sm3 */ ++ ++ &rk_v3_hmac_sha1, /* hmac(sha1) */ ++ &rk_v3_hmac_sha256, /* hmac(sha256) */ ++ &rk_v3_hmac_sha512, /* hmac(sha512) */ ++ &rk_v3_hmac_md5, /* hmac(md5) */ ++ &rk_v3_hmac_sm3, /* hmac(sm3) */ ++ ++ /* Shared v2 version implementation */ ++ &rk_v2_asym_rsa, /* rsa */ ++}; ++ ++static bool rk_is_cipher_support(struct rk_crypto_dev *rk_dev, u32 algo, u32 mode, u32 key_len) +{ -+ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev->parent); -+ struct rockchip_dmcfreq_ondemand_data *data = &dmcfreq->ondemand_data; -+ unsigned int value; ++ u32 version = 0; ++ u32 mask = 0; ++ bool key_len_valid = true; + -+ if (kstrtouint(buf, 10, &value)) -+ return -EINVAL; ++ switch (algo) { ++ case CIPHER_ALGO_DES: ++ case CIPHER_ALGO_DES3_EDE: ++ version = CRYPTO_READ(rk_dev, CRYPTO_DES_VERSION); + -+ data->upthreshold = value; ++ if (key_len == 8) ++ key_len_valid = true; ++ else if (key_len == 16 || key_len == 24) ++ key_len_valid = version & CRYPTO_TDES_FLAG; ++ else ++ key_len_valid = false; ++ break; ++ case CIPHER_ALGO_AES: ++ version = CRYPTO_READ(rk_dev, CRYPTO_AES_VERSION); + -+ return count; -+} ++ if (key_len == 16) ++ key_len_valid = version & CRYPTO_AES128_FLAG; ++ else if (key_len == 24) ++ key_len_valid = version & CRYPTO_AES192_FLAG; ++ else if (key_len == 32) ++ key_len_valid = version & CRYPTO_AES256_FLAG; ++ else ++ key_len_valid = false; ++ break; ++ case CIPHER_ALGO_SM4: ++ version = CRYPTO_READ(rk_dev, CRYPTO_SM4_VERSION); + -+static DEVICE_ATTR_RW(upthreshold); ++ key_len_valid = (key_len == SM4_KEY_SIZE) ? true : false; ++ break; ++ default: ++ return false; ++ } + -+static ssize_t downdifferential_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) ++ mask = cipher_mode2bit_mask[mode]; ++ ++ if (key_len == 0) ++ key_len_valid = true; ++ ++ return (version & mask) && key_len_valid; ++} ++ ++static bool rk_is_hash_support(struct rk_crypto_dev *rk_dev, u32 algo, u32 type) +{ -+ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev->parent); -+ struct rockchip_dmcfreq_ondemand_data *data = &dmcfreq->ondemand_data; ++ u32 version = 0; ++ u32 mask = 0; + -+ return sprintf(buf, "%d\n", data->downdifferential); ++ if (type == ALG_TYPE_HMAC) { ++ version = CRYPTO_READ(rk_dev, CRYPTO_HMAC_VERSION); ++ mask = hmac_algo2bit_mask[algo]; ++ } else if (type == ALG_TYPE_HASH) { ++ version = CRYPTO_READ(rk_dev, CRYPTO_HASH_VERSION); ++ mask = hash_algo2bit_mask[algo]; ++ } else { ++ return false; ++ } ++ ++ return version & mask; +} + -+static ssize_t downdifferential_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, -+ size_t count) ++int rk_hw_crypto_v3_init(struct device *dev, void *hw_info) +{ -+ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev->parent); -+ struct rockchip_dmcfreq_ondemand_data *data = &dmcfreq->ondemand_data; -+ unsigned int value; ++ struct rk_hw_crypto_v3_info *info = ++ (struct rk_hw_crypto_v3_info *)hw_info; + -+ if (kstrtouint(buf, 10, &value)) ++ if (!dev || !hw_info) + return -EINVAL; + -+ data->downdifferential = value; ++ memset(info, 0x00, sizeof(*info)); + -+ return count; ++ return rk_crypto_hw_desc_alloc(dev, &info->hw_desc); +} + -+static DEVICE_ATTR_RW(downdifferential); -+ -+static unsigned long get_nocp_req_rate(struct rockchip_dmcfreq *dmcfreq) ++void rk_hw_crypto_v3_deinit(struct device *dev, void *hw_info) +{ -+ unsigned long target = 0, cpu_bw = 0; -+ int i; ++ struct rk_hw_crypto_v3_info *info = ++ (struct rk_hw_crypto_v3_info *)hw_info; + -+ if (!dmcfreq->cpu_bw_tbl || dmcfreq->nocp_cpu_id < 0) -+ goto out; ++ if (!dev || !hw_info) ++ return; + -+ cpu_bw = dmcfreq->nocp_bw[dmcfreq->nocp_cpu_id]; ++ rk_crypto_hw_desc_free(&info->hw_desc); ++} + -+ for (i = 0; dmcfreq->cpu_bw_tbl[i].freq != CPUFREQ_TABLE_END; i++) { -+ if (cpu_bw >= dmcfreq->cpu_bw_tbl[i].min) -+ target = dmcfreq->cpu_bw_tbl[i].freq; -+ } ++const char * const *rk_hw_crypto_v3_get_rsts(uint32_t *num) ++{ ++ *num = ARRAY_SIZE(crypto_v3_rsts); + -+out: -+ return target; ++ return crypto_v3_rsts; +} + -+static int devfreq_dmc_ondemand_func(struct devfreq *df, -+ unsigned long *freq) ++struct rk_crypto_algt **rk_hw_crypto_v3_get_algts(uint32_t *num) +{ -+ int err; -+ struct devfreq_dev_status *stat; -+ unsigned long long a, b; -+ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(df->dev.parent); -+ struct rockchip_dmcfreq_ondemand_data *data = &dmcfreq->ondemand_data; -+ unsigned int upthreshold = data->upthreshold; -+ unsigned int downdifferential = data->downdifferential; -+ unsigned long target_freq = 0, nocp_req_rate = 0; -+ u64 now; ++ *num = ARRAY_SIZE(crypto_v3_algs); + -+ if (dmcfreq->info.auto_freq_en && !dmcfreq->is_fixed) { -+ if (dmcfreq->status_rate) -+ target_freq = dmcfreq->status_rate; -+ else if (dmcfreq->auto_min_rate) -+ target_freq = dmcfreq->auto_min_rate; -+ nocp_req_rate = get_nocp_req_rate(dmcfreq); -+ target_freq = max3(target_freq, nocp_req_rate, -+ dmcfreq->info.vop_req_rate); -+ now = ktime_to_us(ktime_get()); -+ if (now < dmcfreq->touchboostpulse_endtime) -+ target_freq = max(target_freq, dmcfreq->boost_rate); ++ return crypto_v3_algs; ++} ++ ++bool rk_hw_crypto_v3_algo_valid(struct rk_crypto_dev *rk_dev, struct rk_crypto_algt *aglt) ++{ ++ if (aglt->type == ALG_TYPE_CIPHER || aglt->type == ALG_TYPE_AEAD) { ++ CRYPTO_TRACE("CIPHER"); ++ return rk_is_cipher_support(rk_dev, aglt->algo, aglt->mode, 0); ++ } else if (aglt->type == ALG_TYPE_HASH || aglt->type == ALG_TYPE_HMAC) { ++ CRYPTO_TRACE("HASH/HMAC"); ++ return rk_is_hash_support(rk_dev, aglt->algo, aglt->type); ++ } else if (aglt->type == ALG_TYPE_ASYM) { ++ CRYPTO_TRACE("RSA"); ++ return true; + } else { -+ if (dmcfreq->status_rate) -+ target_freq = dmcfreq->status_rate; -+ else if (dmcfreq->normal_rate) -+ target_freq = dmcfreq->normal_rate; -+ if (target_freq) -+ *freq = target_freq; -+ if (dmcfreq->info.auto_freq_en && !devfreq_update_stats(df)) -+ return 0; -+ goto reset_last_status; ++ return false; + } ++} + -+ if (!upthreshold || !downdifferential) -+ goto reset_last_status; +diff --git a/drivers/crypto/rockchip/rk_crypto_v3.h b/drivers/crypto/rockchip/rk_crypto_v3.h +new file mode 100644 +index 000000000..a4b181416 +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_v3.h +@@ -0,0 +1,96 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ + -+ if (upthreshold > 100 || -+ upthreshold < downdifferential) -+ goto reset_last_status; ++/* Copyright (c) 2022 Rockchip Electronics Co. Ltd. */ + -+ err = devfreq_update_stats(df); -+ if (err) -+ goto reset_last_status; ++#ifndef __RK_CRYPTO_V3_H__ ++#define __RK_CRYPTO_V3_H__ + -+ stat = &df->last_status; ++#include + -+ /* Assume MAX if it is going to be divided by zero */ -+ if (stat->total_time == 0) { -+ *freq = DEVFREQ_MAX_FREQ; -+ return 0; -+ } ++#include "rk_crypto_utils.h" + -+ /* Prevent overflow */ -+ if (stat->busy_time >= (1 << 24) || stat->total_time >= (1 << 24)) { -+ stat->busy_time >>= 7; -+ stat->total_time >>= 7; -+ } ++struct rk_hw_crypto_v3_info { ++ struct rk_hw_desc hw_desc; ++}; + -+ /* Set MAX if it's busy enough */ -+ if (stat->busy_time * 100 > -+ stat->total_time * upthreshold) { -+ *freq = DEVFREQ_MAX_FREQ; -+ return 0; -+ } ++#define RK_CRYPTO_V3_SOC_DATA_INIT(names) {\ ++ .crypto_ver = "CRYPTO V3.0.0.0 multi",\ ++ .use_soft_aes192 = false,\ ++ .valid_algs_name = (names),\ ++ .valid_algs_num = ARRAY_SIZE(names),\ ++ .hw_init = rk_hw_crypto_v3_init,\ ++ .hw_deinit = rk_hw_crypto_v3_deinit,\ ++ .hw_get_rsts = rk_hw_crypto_v3_get_rsts,\ ++ .hw_get_algts = rk_hw_crypto_v3_get_algts,\ ++ .hw_is_algo_valid = rk_hw_crypto_v3_algo_valid,\ ++ .hw_info_size = sizeof(struct rk_hw_crypto_v3_info),\ ++ .default_pka_offset = 0x0480,\ ++ .use_lli_chain = true,\ ++} + -+ /* Set MAX if we do not know the initial frequency */ -+ if (stat->current_frequency == 0) { -+ *freq = DEVFREQ_MAX_FREQ; -+ return 0; -+ } ++#if IS_ENABLED(CONFIG_CRYPTO_DEV_ROCKCHIP_V3) + -+ /* Keep the current frequency */ -+ if (stat->busy_time * 100 > -+ stat->total_time * (upthreshold - downdifferential)) { -+ *freq = max(target_freq, stat->current_frequency); -+ return 0; -+ } ++extern struct rk_crypto_algt rk_v3_ecb_sm4_alg; ++extern struct rk_crypto_algt rk_v3_cbc_sm4_alg; ++extern struct rk_crypto_algt rk_v3_xts_sm4_alg; ++extern struct rk_crypto_algt rk_v3_cfb_sm4_alg; ++extern struct rk_crypto_algt rk_v3_ofb_sm4_alg; ++extern struct rk_crypto_algt rk_v3_ctr_sm4_alg; ++extern struct rk_crypto_algt rk_v3_gcm_sm4_alg; + -+ /* Set the desired frequency based on the load */ -+ a = stat->busy_time; -+ a *= stat->current_frequency; -+ b = div_u64(a, stat->total_time); -+ b *= 100; -+ b = div_u64(b, (upthreshold - downdifferential / 2)); -+ *freq = max_t(unsigned long, target_freq, b); ++extern struct rk_crypto_algt rk_v3_ecb_aes_alg; ++extern struct rk_crypto_algt rk_v3_cbc_aes_alg; ++extern struct rk_crypto_algt rk_v3_xts_aes_alg; ++extern struct rk_crypto_algt rk_v3_cfb_aes_alg; ++extern struct rk_crypto_algt rk_v3_ofb_aes_alg; ++extern struct rk_crypto_algt rk_v3_ctr_aes_alg; ++extern struct rk_crypto_algt rk_v3_gcm_aes_alg; + -+ return 0; ++extern struct rk_crypto_algt rk_v3_ecb_des_alg; ++extern struct rk_crypto_algt rk_v3_cbc_des_alg; ++extern struct rk_crypto_algt rk_v3_cfb_des_alg; ++extern struct rk_crypto_algt rk_v3_ofb_des_alg; + -+reset_last_status: -+ reset_last_status(df); ++extern struct rk_crypto_algt rk_v3_ecb_des3_ede_alg; ++extern struct rk_crypto_algt rk_v3_cbc_des3_ede_alg; ++extern struct rk_crypto_algt rk_v3_cfb_des3_ede_alg; ++extern struct rk_crypto_algt rk_v3_ofb_des3_ede_alg; + -+ return 0; -+} ++extern struct rk_crypto_algt rk_v3_ahash_sha1; ++extern struct rk_crypto_algt rk_v3_ahash_sha224; ++extern struct rk_crypto_algt rk_v3_ahash_sha256; ++extern struct rk_crypto_algt rk_v3_ahash_sha384; ++extern struct rk_crypto_algt rk_v3_ahash_sha512; ++extern struct rk_crypto_algt rk_v3_ahash_md5; ++extern struct rk_crypto_algt rk_v3_ahash_sm3; + -+static int devfreq_dmc_ondemand_handler(struct devfreq *devfreq, -+ unsigned int event, void *data) -+{ -+ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(devfreq->dev.parent); ++extern struct rk_crypto_algt rk_v3_hmac_md5; ++extern struct rk_crypto_algt rk_v3_hmac_sha1; ++extern struct rk_crypto_algt rk_v3_hmac_sha256; ++extern struct rk_crypto_algt rk_v3_hmac_sha512; ++extern struct rk_crypto_algt rk_v3_hmac_sm3; + -+ if (!dmcfreq->info.auto_freq_en) -+ return 0; ++/* Shared v2 version implementation */ ++extern struct rk_crypto_algt rk_v2_asym_rsa; + -+ switch (event) { -+ case DEVFREQ_GOV_START: -+ devfreq_monitor_start(devfreq); -+ break; ++int rk_hw_crypto_v3_init(struct device *dev, void *hw_info); ++void rk_hw_crypto_v3_deinit(struct device *dev, void *hw_info); ++const char * const *rk_hw_crypto_v3_get_rsts(uint32_t *num); ++struct rk_crypto_algt **rk_hw_crypto_v3_get_algts(uint32_t *num); ++bool rk_hw_crypto_v3_algo_valid(struct rk_crypto_dev *rk_dev, struct rk_crypto_algt *aglt); + -+ case DEVFREQ_GOV_STOP: -+ devfreq_monitor_stop(devfreq); -+ break; ++#else + -+ case DEVFREQ_GOV_UPDATE_INTERVAL: -+ devfreq_update_interval(devfreq, (unsigned int *)data); -+ break; ++static inline int rk_hw_crypto_v3_init(struct device *dev, void *hw_info) { return -EINVAL; } ++static inline void rk_hw_crypto_v3_deinit(struct device *dev, void *hw_info) {} ++static inline const char * const *rk_hw_crypto_v3_get_rsts(uint32_t *num) { return NULL; } ++static inline struct rk_crypto_algt **rk_hw_crypto_v3_get_algts(uint32_t *num) { return NULL; } ++static inline bool rk_hw_crypto_v3_algo_valid(struct rk_crypto_dev *rk_dev, ++ struct rk_crypto_algt *aglt) ++{ ++ return false; ++} + -+ case DEVFREQ_GOV_SUSPEND: -+ devfreq_monitor_suspend(devfreq); -+ break; ++#endif /* end of IS_ENABLED(CONFIG_CRYPTO_DEV_ROCKCHIP_V3) */ + -+ case DEVFREQ_GOV_RESUME: -+ devfreq_monitor_resume(devfreq); -+ break; ++#endif /* end of __RK_CRYPTO_V3_H__ */ +diff --git a/drivers/crypto/rockchip/rk_crypto_v3_ahash.c b/drivers/crypto/rockchip/rk_crypto_v3_ahash.c +new file mode 100644 +index 000000000..0c91b45b2 +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_v3_ahash.c +@@ -0,0 +1,468 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Hash acceleration support for Rockchip Crypto v3 ++ * ++ * Copyright (c) 2022, Rockchip Electronics Co., Ltd ++ * ++ * Author: Lin Jinhan ++ * ++ */ + -+ default: -+ break; -+ } ++#include ++#include + -+ return 0; -+} ++#include "rk_crypto_core.h" ++#include "rk_crypto_v3.h" ++#include "rk_crypto_v3_reg.h" ++#include "rk_crypto_ahash_utils.h" ++#include "rk_crypto_utils.h" + -+static struct devfreq_governor devfreq_dmc_ondemand = { -+ .name = "dmc_ondemand", -+ .get_target_freq = devfreq_dmc_ondemand_func, -+ .event_handler = devfreq_dmc_ondemand_handler, ++#define RK_HASH_CTX_MAGIC 0x1A1A1A1A ++#define RK_POLL_PERIOD_US 100 ++#define RK_POLL_TIMEOUT_US 50000 ++ ++struct rk_ahash_expt_ctx { ++ struct rk_ahash_ctx ctx; ++ u8 lastc[RK_DMA_ALIGNMENT]; +}; + -+static int rockchip_dmcfreq_enable_event(struct rockchip_dmcfreq *dmcfreq) -+{ -+ int i, ret; ++struct rk_hash_mid_data { ++ u32 valid_flag; ++ u32 hash_ctl; ++ u32 data[CRYPTO_HASH_MID_WORD_SIZE]; ++}; + -+ if (!dmcfreq->info.auto_freq_en) -+ return 0; ++static const u32 hash_algo2bc[] = { ++ [HASH_ALGO_MD5] = CRYPTO_MD5, ++ [HASH_ALGO_SHA1] = CRYPTO_SHA1, ++ [HASH_ALGO_SHA224] = CRYPTO_SHA224, ++ [HASH_ALGO_SHA256] = CRYPTO_SHA256, ++ [HASH_ALGO_SHA384] = CRYPTO_SHA384, ++ [HASH_ALGO_SHA512] = CRYPTO_SHA512, ++ [HASH_ALGO_SM3] = CRYPTO_SM3, ++}; + -+ for (i = 0; i < dmcfreq->edev_count; i++) { -+ ret = devfreq_event_enable_edev(dmcfreq->edev[i]); -+ if (ret < 0) { -+ dev_err(dmcfreq->dev, -+ "failed to enable devfreq-event\n"); -+ return ret; -+ } -+ } ++static void rk_hash_reset(struct rk_crypto_dev *rk_dev) ++{ ++ int ret; ++ u32 tmp = 0, tmp_mask = 0; ++ unsigned int pool_timeout_us = 1000; + -+ return 0; -+} ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0x00); + -+static int rockchip_dmcfreq_disable_event(struct rockchip_dmcfreq *dmcfreq) -+{ -+ int i, ret; ++ tmp = CRYPTO_SW_CC_RESET; ++ tmp_mask = tmp << CRYPTO_WRITE_MASK_SHIFT; + -+ if (!dmcfreq->info.auto_freq_en) -+ return 0; ++ CRYPTO_WRITE(rk_dev, CRYPTO_RST_CTL, tmp | tmp_mask); + -+ for (i = 0; i < dmcfreq->edev_count; i++) { -+ ret = devfreq_event_disable_edev(dmcfreq->edev[i]); -+ if (ret < 0) { -+ dev_err(dmcfreq->dev, -+ "failed to disable devfreq-event\n"); -+ return ret; -+ } -+ } ++ /* This is usually done in 20 clock cycles */ ++ ret = read_poll_timeout_atomic(CRYPTO_READ, tmp, !tmp, 0, pool_timeout_us, ++ false, rk_dev, CRYPTO_RST_CTL); ++ if (ret) ++ dev_err(rk_dev->dev, "cipher reset pool timeout %ums.", ++ pool_timeout_us); + -+ return 0; ++ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, 0xffff0000); ++ ++ /* clear dma int status */ ++ tmp = CRYPTO_READ(rk_dev, CRYPTO_DMA_INT_ST); ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_ST, tmp); +} + -+static int rockchip_get_edev_id(struct rockchip_dmcfreq *dmcfreq, -+ const char *name) ++static int rk_hash_mid_data_store(struct rk_crypto_dev *rk_dev, struct rk_hash_mid_data *mid_data) +{ -+ struct devfreq_event_dev *edev; -+ int i; ++ int ret; ++ uint32_t reg_ctrl; + -+ for (i = 0; i < dmcfreq->edev_count; i++) { -+ edev = dmcfreq->edev[i]; -+ if (!strcmp(edev->desc->name, name)) -+ return i; ++ CRYPTO_TRACE(); ++ ++ ret = read_poll_timeout_atomic(CRYPTO_READ, ++ reg_ctrl, ++ reg_ctrl & CRYPTO_HASH_MID_IS_VALID, ++ 0, ++ RK_POLL_TIMEOUT_US, ++ false, rk_dev, CRYPTO_MID_VALID); ++ ++ CRYPTO_WRITE(rk_dev, CRYPTO_MID_VALID_SWITCH, ++ CRYPTO_MID_VALID_ENABLE << CRYPTO_WRITE_MASK_SHIFT); ++ if (ret) { ++ CRYPTO_TRACE("CRYPTO_MID_VALID timeout."); ++ goto exit; + } + -+ return -EINVAL; -+} ++ CRYPTO_WRITE(rk_dev, CRYPTO_MID_VALID, ++ CRYPTO_HASH_MID_IS_VALID | ++ CRYPTO_HASH_MID_IS_VALID << CRYPTO_WRITE_MASK_SHIFT); + -+static int rockchip_dmcfreq_get_event(struct rockchip_dmcfreq *dmcfreq) -+{ -+ struct device *dev = dmcfreq->dev; -+ struct device_node *events_np, *np = dev->of_node; -+ int i, j, count, available_count = 0; ++ rk_crypto_read_regs(rk_dev, CRYPTO_HASH_MID_DATA_0, ++ (u8 *)mid_data->data, sizeof(mid_data->data)); + -+ count = devfreq_event_get_edev_count(dev, "devfreq-events"); -+ if (count < 0) { -+ dev_dbg(dev, "failed to get count of devfreq-event dev\n"); -+ return 0; -+ } -+ for (i = 0; i < count; i++) { -+ events_np = of_parse_phandle(np, "devfreq-events", i); -+ if (!events_np) -+ continue; -+ if (of_device_is_available(events_np)) -+ available_count++; -+ of_node_put(events_np); -+ } -+ if (!available_count) { -+ dev_dbg(dev, "failed to get available devfreq-event\n"); -+ return 0; -+ } -+ dmcfreq->edev_count = available_count; -+ dmcfreq->edev = devm_kzalloc(dev, -+ sizeof(*dmcfreq->edev) * available_count, -+ GFP_KERNEL); -+ if (!dmcfreq->edev) -+ return -ENOMEM; ++ mid_data->hash_ctl = CRYPTO_READ(rk_dev, CRYPTO_HASH_CTL); ++ mid_data->valid_flag = 1; + -+ for (i = 0, j = 0; i < count; i++) { -+ events_np = of_parse_phandle(np, "devfreq-events", i); -+ if (!events_np) -+ continue; -+ if (of_device_is_available(events_np)) { -+ of_node_put(events_np); -+ if (j >= available_count) { -+ dev_err(dev, "invalid event conut\n"); -+ return -EINVAL; -+ } -+ dmcfreq->edev[j] = -+ devfreq_event_get_edev_by_phandle(dev, "devfreq-events", i); -+ if (IS_ERR(dmcfreq->edev[j])) -+ return -EPROBE_DEFER; -+ j++; -+ } else { -+ of_node_put(events_np); -+ } -+ } -+ dmcfreq->info.auto_freq_en = true; -+ dmcfreq->dfi_id = rockchip_get_edev_id(dmcfreq, "dfi"); -+ dmcfreq->nocp_cpu_id = rockchip_get_edev_id(dmcfreq, "nocp-cpu"); -+ dmcfreq->nocp_bw = -+ devm_kzalloc(dev, sizeof(*dmcfreq->nocp_bw) * available_count, -+ GFP_KERNEL); -+ if (!dmcfreq->nocp_bw) -+ return -ENOMEM; ++ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, 0 | CRYPTO_WRITE_MASK_ALL); + -+ return 0; ++exit: ++ return ret; +} + -+static int rockchip_dmcfreq_dmc_init(struct platform_device *pdev, -+ struct rockchip_dmcfreq *dmcfreq) ++static int rk_hash_mid_data_restore(struct rk_crypto_dev *rk_dev, struct rk_hash_mid_data *mid_data) +{ -+ const struct of_device_id *match; -+ int (*init)(struct platform_device *pdev, -+ struct rockchip_dmcfreq *data); -+ int ret; ++ CRYPTO_TRACE(); + -+ match = of_match_node(rockchip_dmcfreq_of_match, pdev->dev.of_node); -+ if (match) { -+ init = match->data; -+ if (init) { -+ ret = init(pdev, dmcfreq); -+ if (ret) -+ return ret; -+ } ++ CRYPTO_WRITE(rk_dev, CRYPTO_MID_VALID_SWITCH, ++ CRYPTO_MID_VALID_ENABLE | CRYPTO_MID_VALID_ENABLE << CRYPTO_WRITE_MASK_SHIFT); ++ ++ CRYPTO_WRITE(rk_dev, CRYPTO_MID_VALID, ++ CRYPTO_HASH_MID_IS_VALID | ++ CRYPTO_HASH_MID_IS_VALID << CRYPTO_WRITE_MASK_SHIFT); ++ ++ if (!mid_data->valid_flag) { ++ CRYPTO_TRACE("clear mid data"); ++ rk_crypto_clear_regs(rk_dev, CRYPTO_HASH_MID_DATA_0, ARRAY_SIZE(mid_data->data)); ++ return 0; + } + ++ rk_crypto_write_regs(rk_dev, CRYPTO_HASH_MID_DATA_0, ++ (u8 *)mid_data->data, sizeof(mid_data->data)); ++ ++ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, mid_data->hash_ctl | CRYPTO_WRITE_MASK_ALL); ++ + return 0; +} + -+static void rockchip_dmcfreq_parse_dt(struct rockchip_dmcfreq *dmcfreq) ++static int rk_crypto_irq_handle(int irq, void *dev_id) +{ -+ struct device *dev = dmcfreq->dev; -+ struct device_node *np = dev->of_node; ++ struct rk_crypto_dev *rk_dev = platform_get_drvdata(dev_id); ++ u32 interrupt_status; ++ struct rk_hw_crypto_v3_info *hw_info = ++ (struct rk_hw_crypto_v3_info *)rk_dev->hw_info; ++ struct rk_alg_ctx *alg_ctx = rk_ahash_alg_ctx(rk_dev); + -+ if (!rockchip_get_system_status_rate(np, "system-status-freq", dmcfreq)) -+ dmcfreq->system_status_en = true; -+ else if (!rockchip_get_system_status_level(np, "system-status-level", dmcfreq)) -+ dmcfreq->system_status_en = true; ++ /* disable crypto irq */ ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0); + -+ of_property_read_u32(np, "min-cpu-freq", &dmcfreq->min_cpu_freq); ++ interrupt_status = CRYPTO_READ(rk_dev, CRYPTO_DMA_INT_ST); ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_ST, interrupt_status); + -+ of_property_read_u32(np, "upthreshold", -+ &dmcfreq->ondemand_data.upthreshold); -+ of_property_read_u32(np, "downdifferential", -+ &dmcfreq->ondemand_data.downdifferential); -+ if (dmcfreq->info.auto_freq_en) -+ of_property_read_u32(np, "auto-freq-en", -+ &dmcfreq->info.auto_freq_en); -+ if (!dmcfreq->auto_min_rate) { -+ of_property_read_u32(np, "auto-min-freq", -+ (u32 *)&dmcfreq->auto_min_rate); -+ dmcfreq->auto_min_rate *= 1000; -+ } ++ interrupt_status &= CRYPTO_LOCKSTEP_MASK; + -+ if (rockchip_get_freq_map_talbe(np, "cpu-bw-dmc-freq", -+ &dmcfreq->cpu_bw_tbl)) -+ dev_dbg(dev, "failed to get cpu bandwidth to dmc rate\n"); -+ if (rockchip_get_freq_map_talbe(np, "vop-frame-bw-dmc-freq", -+ &dmcfreq->info.vop_frame_bw_tbl)) -+ dev_dbg(dev, "failed to get vop frame bandwidth to dmc rate\n"); -+ if (rockchip_get_freq_map_talbe(np, "vop-bw-dmc-freq", -+ &dmcfreq->info.vop_bw_tbl)) -+ dev_err(dev, "failed to get vop bandwidth to dmc rate\n"); -+ if (rockchip_get_rl_map_talbe(np, "vop-pn-msch-readlatency", -+ &dmcfreq->info.vop_pn_rl_tbl)) -+ dev_err(dev, "failed to get vop pn to msch rl\n"); -+ if (dmcfreq->video_4k_rate) -+ dmcfreq->info.vop_4k_rate = dmcfreq->video_4k_rate; -+ else if (dmcfreq->video_4k_10b_rate) -+ dmcfreq->info.vop_4k_rate = dmcfreq->video_4k_10b_rate; ++ if (interrupt_status != CRYPTO_SRC_ITEM_DONE_INT_ST) { ++ dev_err(rk_dev->dev, "DMA desc = %p\n", hw_info->hw_desc.lli_head); ++ dev_err(rk_dev->dev, "DMA addr_in = %08x\n", ++ (u32)alg_ctx->addr_in); ++ dev_err(rk_dev->dev, "DMA addr_out = %08x\n", ++ (u32)alg_ctx->addr_out); ++ dev_err(rk_dev->dev, "DMA count = %08x\n", alg_ctx->count); ++ dev_err(rk_dev->dev, "DMA desc_dma = %08x\n", ++ (u32)hw_info->hw_desc.lli_head_dma); ++ dev_err(rk_dev->dev, "DMA Error status = %08x\n", ++ interrupt_status); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_LLI_ADDR status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_LLI_ADDR)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_ST status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_ST)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_STATE status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_STATE)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_LLI_RADDR status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_LLI_RADDR)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_SRC_RADDR status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_SRC_RADDR)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_DST_RADDR status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_DST_RADDR)); ++ rk_dev->err = -EFAULT; ++ } + -+ of_property_read_u32(np, "touchboost_duration", -+ (u32 *)&dmcfreq->touchboostpulse_duration_val); -+ if (dmcfreq->touchboostpulse_duration_val) -+ dmcfreq->touchboostpulse_duration_val *= USEC_PER_MSEC; -+ else -+ dmcfreq->touchboostpulse_duration_val = 500 * USEC_PER_MSEC; ++ return 0; +} + -+static int rockchip_dmcfreq_add_devfreq(struct rockchip_dmcfreq *dmcfreq) ++static void rk_ahash_crypto_complete(struct crypto_async_request *base, int err) +{ -+ struct devfreq_dev_profile *devp = &rockchip_devfreq_dmc_profile; -+ struct device *dev = dmcfreq->dev; -+ struct dev_pm_opp *opp; -+ struct devfreq *devfreq; -+ unsigned long opp_rate; ++ struct ahash_request *req = ahash_request_cast(base); ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ struct rk_ahash_ctx *ctx = crypto_ahash_ctx(tfm); ++ struct rk_alg_ctx *alg_ctx = rk_ahash_alg_ctx(ctx->rk_dev); + -+ dmcfreq->rate = clk_get_rate(dmcfreq->opp_info.clk); -+ opp_rate = dmcfreq->rate; -+ opp = devfreq_recommended_opp(dev, &opp_rate, 0); -+ if (IS_ERR(opp)) { -+ dev_err(dev, "Failed to find opp for %lu Hz\n", opp_rate); -+ return PTR_ERR(opp); -+ } -+ dev_pm_opp_put(opp); ++ struct rk_hw_crypto_v3_info *hw_info = ctx->rk_dev->hw_info; ++ struct crypto_lli_desc *lli_desc = hw_info->hw_desc.lli_head; + -+ devp->initial_freq = dmcfreq->rate; -+ devfreq = devm_devfreq_add_device(dev, devp, "dmc_ondemand", -+ &dmcfreq->ondemand_data); -+ if (IS_ERR(devfreq)) { -+ dev_err(dev, "failed to add devfreq\n"); -+ return PTR_ERR(devfreq); ++ if (err) { ++ rk_hash_reset(ctx->rk_dev); ++ pr_err("aligned = %u, align_size = %u\n", ++ alg_ctx->aligned, alg_ctx->align_size); ++ pr_err("total = %u, left = %u, count = %u\n", ++ alg_ctx->total, alg_ctx->left_bytes, alg_ctx->count); ++ pr_err("lli->src = %08x\n", lli_desc->src_addr); ++ pr_err("lli->src_len = %08x\n", lli_desc->src_len); ++ pr_err("lli->dst = %08x\n", lli_desc->dst_addr); ++ pr_err("lli->dst_len = %08x\n", lli_desc->dst_len); ++ pr_err("lli->dma_ctl = %08x\n", lli_desc->dma_ctrl); ++ pr_err("lli->usr_def = %08x\n", lli_desc->user_define); ++ pr_err("lli->next = %08x\n\n\n", lli_desc->next_addr); + } + -+ devm_devfreq_register_opp_notifier(dev, devfreq); ++ if (alg_ctx->total) ++ rk_hash_mid_data_store(ctx->rk_dev, (struct rk_hash_mid_data *)ctx->priv); + -+ devfreq->last_status.current_frequency = opp_rate; -+ devfreq->suspend_freq = dmcfreq->deep_suspend_rate; ++ if (base->complete) ++ base->complete(base, err); ++} + -+ reset_last_status(devfreq); ++static inline void clear_hash_out_reg(struct rk_crypto_dev *rk_dev) ++{ ++ rk_crypto_clear_regs(rk_dev, CRYPTO_HASH_DOUT_0, 16); ++} + -+ dmcfreq->info.devfreq = devfreq; ++static int write_key_reg(struct rk_crypto_dev *rk_dev, const u8 *key, ++ u32 key_len) ++{ ++ rk_crypto_write_regs(rk_dev, CRYPTO_CH0_KEY_0, key, key_len); + + return 0; +} + -+static void rockchip_dmcfreq_register_notifier(struct rockchip_dmcfreq *dmcfreq) ++static int rk_hw_hash_init(struct rk_crypto_dev *rk_dev, u32 algo, u32 type) +{ -+ int ret; ++ u32 reg_ctrl = 0; ++ struct rk_ahash_ctx *ctx = rk_ahash_ctx_cast(rk_dev); ++ struct rk_hash_mid_data *mid_data = (struct rk_hash_mid_data *)ctx->priv; + -+ if (dmcfreq->system_status_en || dmcfreq->info.auto_freq_en) { -+ if (vop_register_dmc()) -+ dev_err(dmcfreq->dev, "fail to register notify to vop.\n"); ++ if (algo >= ARRAY_SIZE(hash_algo2bc)) ++ goto exit; + -+ dmcfreq->status_nb.notifier_call = -+ rockchip_dmcfreq_system_status_notifier; -+ ret = rockchip_register_system_status_notifier(&dmcfreq->status_nb); -+ if (ret) -+ dev_err(dmcfreq->dev, "failed to register system_status nb\n"); -+ } ++ rk_hash_reset(rk_dev); + -+ dmcfreq->panic_nb.notifier_call = rockchip_dmcfreq_panic_notifier; -+ ret = atomic_notifier_chain_register(&panic_notifier_list, -+ &dmcfreq->panic_nb); -+ if (ret) -+ dev_err(dmcfreq->dev, "failed to register panic nb\n"); ++ clear_hash_out_reg(rk_dev); + -+ dmc_mdevp.data = dmcfreq->info.devfreq; -+ dmc_mdevp.opp_info = &dmcfreq->opp_info; -+ dmcfreq->mdev_info = rockchip_system_monitor_register(dmcfreq->dev, -+ &dmc_mdevp); -+ if (IS_ERR(dmcfreq->mdev_info)) { -+ dev_dbg(dmcfreq->dev, "without without system monitor\n"); -+ dmcfreq->mdev_info = NULL; ++ reg_ctrl = hash_algo2bc[algo] | CRYPTO_HW_PAD_ENABLE; ++ ++ if (IS_TYPE_HMAC(type)) { ++ CRYPTO_TRACE("this is hmac"); ++ reg_ctrl |= CRYPTO_HMAC_ENABLE; + } -+ dmcfreq->opp_info.is_rate_volt_checked = true; -+} + -+static void rockchip_dmcfreq_add_interface(struct rockchip_dmcfreq *dmcfreq) -+{ -+ struct devfreq *devfreq = dmcfreq->info.devfreq; ++ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, reg_ctrl | CRYPTO_WRITE_MASK_ALL); ++ CRYPTO_WRITE(rk_dev, CRYPTO_FIFO_CTL, 0x00030003); + -+ if (sysfs_create_file(&devfreq->dev.kobj, &dev_attr_upthreshold.attr)) -+ dev_err(dmcfreq->dev, -+ "failed to register upthreshold sysfs file\n"); -+ if (sysfs_create_file(&devfreq->dev.kobj, -+ &dev_attr_downdifferential.attr)) -+ dev_err(dmcfreq->dev, -+ "failed to register downdifferential sysfs file\n"); ++ memset(mid_data, 0x00, sizeof(*mid_data)); + -+ if (!rockchip_add_system_status_interface(&devfreq->dev)) -+ return; -+ if (sysfs_create_file(&devfreq->dev.kobj, -+ &dev_attr_system_status.attr)) -+ dev_err(dmcfreq->dev, -+ "failed to register system_status sysfs file\n"); ++ return 0; ++exit: ++ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, 0 | CRYPTO_WRITE_MASK_ALL); ++ ++ return -EINVAL; +} + -+static void rockchip_dmcfreq_boost_work(struct work_struct *work) ++static void clean_hash_setting(struct rk_crypto_dev *rk_dev) +{ -+ struct rockchip_dmcfreq *dmcfreq = boost_to_dmcfreq(work); -+ -+ rockchip_dmcfreq_update_target(dmcfreq); ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0); ++ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, 0 | CRYPTO_WRITE_MASK_ALL); +} + -+static void rockchip_dmcfreq_input_event(struct input_handle *handle, -+ unsigned int type, -+ unsigned int code, -+ int value) ++static int rk_ahash_import(struct ahash_request *req, const void *in) +{ -+ struct rockchip_dmcfreq *dmcfreq = handle->private; -+ u64 now, endtime; ++ struct rk_ahash_expt_ctx state; + -+ if (type != EV_ABS && type != EV_KEY) -+ return; ++ /* 'in' may not be aligned so memcpy to local variable */ ++ memcpy(&state, in, sizeof(state)); + -+ now = ktime_to_us(ktime_get()); -+ endtime = now + dmcfreq->touchboostpulse_duration_val; -+ if (endtime < (dmcfreq->touchboostpulse_endtime + 10 * USEC_PER_MSEC)) -+ return; -+ dmcfreq->touchboostpulse_endtime = endtime; ++ ///TODO: deal with import + -+ queue_work(system_freezable_wq, &dmcfreq->boost_work); ++ return 0; +} + -+static int rockchip_dmcfreq_input_connect(struct input_handler *handler, -+ struct input_dev *dev, -+ const struct input_device_id *id) ++static int rk_ahash_export(struct ahash_request *req, void *out) +{ -+ int error; -+ struct input_handle *handle; -+ struct rockchip_dmcfreq *dmcfreq = input_hd_to_dmcfreq(handler); -+ -+ handle = kzalloc(sizeof(*handle), GFP_KERNEL); -+ if (!handle) -+ return -ENOMEM; ++ struct rk_ahash_expt_ctx state; + -+ handle->dev = dev; -+ handle->handler = handler; -+ handle->name = "dmcfreq"; -+ handle->private = dmcfreq; ++ /* Don't let anything leak to 'out' */ ++ memset(&state, 0, sizeof(state)); + -+ error = input_register_handle(handle); -+ if (error) -+ goto err2; ++ ///TODO: deal with import + -+ error = input_open_device(handle); -+ if (error) -+ goto err1; ++ memcpy(out, &state, sizeof(state)); + + return 0; -+err1: -+ input_unregister_handle(handle); -+err2: -+ kfree(handle); -+ return error; +} + -+static void rockchip_dmcfreq_input_disconnect(struct input_handle *handle) ++static int rk_ahash_dma_start(struct rk_crypto_dev *rk_dev, uint32_t flag) +{ -+ input_close_device(handle); -+ input_unregister_handle(handle); -+ kfree(handle); -+} -+ -+static const struct input_device_id rockchip_dmcfreq_input_ids[] = { -+ { -+ .flags = INPUT_DEVICE_ID_MATCH_EVBIT | -+ INPUT_DEVICE_ID_MATCH_ABSBIT, -+ .evbit = { BIT_MASK(EV_ABS) }, -+ .absbit = { [BIT_WORD(ABS_MT_POSITION_X)] = -+ BIT_MASK(ABS_MT_POSITION_X) | -+ BIT_MASK(ABS_MT_POSITION_Y) }, -+ }, -+ { -+ .flags = INPUT_DEVICE_ID_MATCH_KEYBIT | -+ INPUT_DEVICE_ID_MATCH_ABSBIT, -+ .keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) }, -+ .absbit = { [BIT_WORD(ABS_X)] = -+ BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) }, -+ }, -+ { -+ .flags = INPUT_DEVICE_ID_MATCH_EVBIT, -+ .evbit = { BIT_MASK(EV_KEY) }, -+ }, -+ { }, -+}; ++ struct rk_hw_crypto_v3_info *hw_info = ++ (struct rk_hw_crypto_v3_info *)rk_dev->hw_info; ++ struct rk_alg_ctx *alg_ctx = rk_ahash_alg_ctx(rk_dev); ++ struct rk_ahash_ctx *ctx = rk_ahash_ctx_cast(rk_dev); ++ struct crypto_lli_desc *lli_head, *lli_tail; ++ u32 dma_ctl = CRYPTO_DMA_RESTART; ++ bool is_final = flag & RK_FLAG_FINAL; ++ int ret; + -+static void rockchip_dmcfreq_boost_init(struct rockchip_dmcfreq *dmcfreq) -+{ -+ if (!dmcfreq->boost_rate) -+ return; -+ INIT_WORK(&dmcfreq->boost_work, rockchip_dmcfreq_boost_work); -+ dmcfreq->input_handler.event = rockchip_dmcfreq_input_event; -+ dmcfreq->input_handler.connect = rockchip_dmcfreq_input_connect; -+ dmcfreq->input_handler.disconnect = rockchip_dmcfreq_input_disconnect; -+ dmcfreq->input_handler.name = "dmcfreq"; -+ dmcfreq->input_handler.id_table = rockchip_dmcfreq_input_ids; -+ if (input_register_handler(&dmcfreq->input_handler)) -+ dev_err(dmcfreq->dev, "failed to register input handler\n"); -+} ++ CRYPTO_TRACE("ctx->calc_cnt = %u, count %u Byte, is_final = %d", ++ ctx->calc_cnt, alg_ctx->count, is_final); + -+static int rockchip_dmcfreq_probe(struct platform_device *pdev) -+{ -+ struct device *dev = &pdev->dev; -+ struct rockchip_dmcfreq *data; -+ int ret; ++ if (alg_ctx->count % RK_DMA_ALIGNMENT && !is_final) { ++ dev_err(rk_dev->dev, "count = %u is not aligned with [%u]\n", ++ alg_ctx->count, RK_DMA_ALIGNMENT); ++ return -EINVAL; ++ } + -+ data = devm_kzalloc(dev, sizeof(struct rockchip_dmcfreq), GFP_KERNEL); -+ if (!data) -+ return -ENOMEM; ++ if (alg_ctx->count == 0) { ++ /* do nothing */ ++ CRYPTO_TRACE("empty calc"); ++ return 0; ++ } + -+ data->dev = dev; -+ data->dev->init_name = "dmc"; -+ data->info.dev = dev; -+ mutex_init(&data->lock); -+ INIT_LIST_HEAD(&data->video_info_list); ++ if (alg_ctx->total == alg_ctx->left_bytes + alg_ctx->count) ++ rk_hash_mid_data_restore(rk_dev, (struct rk_hash_mid_data *)ctx->priv); + -+ ret = rockchip_dmcfreq_get_event(data); ++ if (alg_ctx->aligned) ++ ret = rk_crypto_hw_desc_init(&hw_info->hw_desc, ++ alg_ctx->sg_src, NULL, alg_ctx->count); ++ else ++ ret = rk_crypto_hw_desc_init(&hw_info->hw_desc, ++ &alg_ctx->sg_tmp, NULL, alg_ctx->count); + if (ret) + return ret; + -+ ret = rockchip_init_opp_table(dev, &data->opp_info, "dmc_clk", "center"); -+ if (ret) -+ return ret; ++ lli_head = hw_info->hw_desc.lli_head; ++ lli_tail = hw_info->hw_desc.lli_tail; + -+ ret = rockchip_dmcfreq_dmc_init(pdev, data); -+ if (ret) -+ return ret; ++ lli_tail->dma_ctrl = is_final ? LLI_DMA_CTRL_LAST : LLI_DMA_CTRL_PAUSE; ++ lli_tail->dma_ctrl |= LLI_DMA_CTRL_SRC_DONE; + -+ rockchip_dmcfreq_parse_dt(data); ++ if (ctx->calc_cnt == 0) { ++ dma_ctl = CRYPTO_DMA_START; + -+ platform_set_drvdata(pdev, data); ++ lli_head->user_define |= LLI_USER_CIPHER_START; ++ lli_head->user_define |= LLI_USER_STRING_START; + -+ if (!data->system_status_en && !data->info.auto_freq_en) { -+ dev_info(dev, "don't add devfreq feature\n"); -+ rockchip_dmcfreq_register_notifier(data); -+ return 0; ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_LLI_ADDR, hw_info->hw_desc.lli_head_dma); ++ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_CTL, ++ (CRYPTO_HASH_ENABLE << CRYPTO_WRITE_MASK_SHIFT) | ++ CRYPTO_HASH_ENABLE); + } + -+ cpu_latency_qos_add_request(&pm_qos, PM_QOS_DEFAULT_VALUE); ++ if (is_final && alg_ctx->left_bytes == 0) ++ lli_tail->user_define |= LLI_USER_STRING_LAST; + -+ ret = devfreq_add_governor(&devfreq_dmc_ondemand); -+ if (ret) -+ return ret; -+ ret = rockchip_dmcfreq_enable_event(data); -+ if (ret) -+ return ret; -+ ret = rockchip_dmcfreq_add_devfreq(data); -+ if (ret) { -+ rockchip_dmcfreq_disable_event(data); -+ return ret; -+ } ++ CRYPTO_TRACE("dma_ctrl = %08x, user_define = %08x, len = %u", ++ lli_head->dma_ctrl, lli_head->user_define, alg_ctx->count); + -+ rockchip_dmcfreq_register_notifier(data); -+ rockchip_dmcfreq_add_interface(data); -+ rockchip_dmcfreq_boost_init(data); -+ rockchip_dmcfreq_vop_bandwidth_init(&data->info); ++ rk_crypto_dump_hw_desc(&hw_info->hw_desc); + -+ rockchip_set_system_status(SYS_STATUS_NORMAL); ++ dma_wmb(); ++ ++ /* enable crypto irq */ ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0x7f); ++ ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_CTL, dma_ctl | dma_ctl << CRYPTO_WRITE_MASK_SHIFT); + + return 0; +} + -+static __maybe_unused int rockchip_dmcfreq_suspend(struct device *dev) ++static int rk_ahash_get_result(struct rk_crypto_dev *rk_dev, ++ uint8_t *data, uint32_t data_len) +{ -+ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev); -+ struct rockchip_opp_info *opp_info; -+ struct regulator *vdd_reg = NULL; -+ struct regulator *mem_reg = NULL; + int ret = 0; ++ u32 reg_ctrl = 0; ++ struct rk_ahash_ctx *ctx = rk_ahash_ctx_cast(rk_dev); + -+ if (!dmcfreq) -+ return 0; ++ memset(ctx->priv, 0x00, sizeof(struct rk_hash_mid_data)); + -+ ret = rockchip_dmcfreq_disable_event(dmcfreq); ++ ret = read_poll_timeout_atomic(CRYPTO_READ, reg_ctrl, ++ reg_ctrl & CRYPTO_HASH_IS_VALID, ++ RK_POLL_PERIOD_US, ++ RK_POLL_TIMEOUT_US, false, ++ rk_dev, CRYPTO_HASH_VALID); + if (ret) -+ return ret; ++ goto exit; + -+ if (dmcfreq->info.devfreq) { -+ ret = devfreq_suspend_device(dmcfreq->info.devfreq); -+ if (ret < 0) { -+ dev_err(dev, "failed to suspend the devfreq devices\n"); -+ return ret; -+ } -+ } ++ rk_crypto_read_regs(rk_dev, CRYPTO_HASH_DOUT_0, data, data_len); + -+ opp_info = &dmcfreq->opp_info; -+ if (!opp_info->regulators) -+ return 0; -+ vdd_reg = opp_info->regulators[0]; -+ if (opp_info->regulator_count > 1) -+ mem_reg = opp_info->regulators[1]; ++ CRYPTO_WRITE(rk_dev, CRYPTO_HASH_VALID, CRYPTO_HASH_IS_VALID); + -+ /* set voltage to sleep_volt if need */ -+ if (vdd_reg && dmcfreq->sleep_volt && -+ dmcfreq->sleep_volt != dmcfreq->volt) { -+ ret = regulator_set_voltage(vdd_reg, dmcfreq->sleep_volt, INT_MAX); -+ if (ret) { -+ dev_err(dev, "Cannot set vdd voltage %lu uV\n", -+ dmcfreq->sleep_volt); -+ return ret; -+ } -+ } -+ if (mem_reg && dmcfreq->sleep_mem_volt && -+ dmcfreq->sleep_mem_volt != dmcfreq->mem_volt) { -+ ret = regulator_set_voltage(mem_reg, dmcfreq->sleep_mem_volt, INT_MAX); -+ if (ret) { -+ dev_err(dev, "Cannot set mem voltage %lu uV\n", -+ dmcfreq->sleep_mem_volt); -+ return ret; -+ } -+ } ++exit: ++ clean_hash_setting(rk_dev); + -+ return 0; ++ return ret; +} + -+static __maybe_unused int rockchip_dmcfreq_resume(struct device *dev) ++static int rk_cra_hash_init(struct crypto_tfm *tfm) +{ -+ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev); -+ struct rockchip_opp_info *opp_info; -+ struct regulator *vdd_reg = NULL; -+ struct regulator *mem_reg = NULL; -+ int ret = 0; ++ struct rk_crypto_algt *algt = ++ rk_ahash_get_algt(__crypto_ahash_cast(tfm)); ++ const char *alg_name = crypto_tfm_alg_name(tfm); ++ struct rk_ahash_ctx *ctx = crypto_tfm_ctx(tfm); ++ struct rk_crypto_dev *rk_dev = algt->rk_dev; ++ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; + -+ if (!dmcfreq) -+ return 0; ++ CRYPTO_TRACE(); + -+ opp_info = &dmcfreq->opp_info; -+ if (opp_info->regulators) { -+ vdd_reg = opp_info->regulators[0]; -+ if (opp_info->regulator_count > 1) -+ mem_reg = opp_info->regulators[1]; ++ memset(ctx, 0x00, sizeof(*ctx)); + -+ /* restore voltage if it is sleep_volt */ -+ if (mem_reg && dmcfreq->sleep_volt && -+ dmcfreq->sleep_volt != dmcfreq->volt) { -+ ret = regulator_set_voltage(vdd_reg, dmcfreq->volt, -+ INT_MAX); -+ if (ret) { -+ dev_err(dev, "Cannot set vdd voltage %lu uV\n", -+ dmcfreq->volt); -+ return ret; -+ } -+ } -+ if (vdd_reg && dmcfreq->sleep_mem_volt && -+ dmcfreq->sleep_mem_volt != dmcfreq->mem_volt) { -+ ret = regulator_set_voltage(mem_reg, dmcfreq->mem_volt, -+ INT_MAX); -+ if (ret) { -+ dev_err(dev, "Cannot set mem voltage %lu uV\n", -+ dmcfreq->mem_volt); -+ return ret; -+ } -+ } -+ } ++ if (!rk_dev->request_crypto) ++ return -EFAULT; + -+ ret = rockchip_dmcfreq_enable_event(dmcfreq); -+ if (ret) -+ return ret; ++ alg_ctx->align_size = RK_DMA_ALIGNMENT; + -+ if (dmcfreq->info.devfreq) { -+ ret = devfreq_resume_device(dmcfreq->info.devfreq); -+ if (ret < 0) { -+ dev_err(dev, "failed to resume the devfreq devices\n"); -+ return ret; -+ } -+ } ++ alg_ctx->ops.start = rk_ahash_start; ++ alg_ctx->ops.update = rk_ahash_crypto_rx; ++ alg_ctx->ops.complete = rk_ahash_crypto_complete; ++ alg_ctx->ops.irq_handle = rk_crypto_irq_handle; + -+ return ret; -+} ++ alg_ctx->ops.hw_write_key = write_key_reg; ++ alg_ctx->ops.hw_init = rk_hw_hash_init; ++ alg_ctx->ops.hw_dma_start = rk_ahash_dma_start; ++ alg_ctx->ops.hw_get_result = rk_ahash_get_result; + -+static SIMPLE_DEV_PM_OPS(rockchip_dmcfreq_pm, rockchip_dmcfreq_suspend, -+ rockchip_dmcfreq_resume); -+static struct platform_driver rockchip_dmcfreq_driver = { -+ .probe = rockchip_dmcfreq_probe, -+ .driver = { -+ .name = "rockchip-dmc", -+ .pm = &rockchip_dmcfreq_pm, -+ .of_match_table = rockchip_dmcfreq_of_match, -+ }, -+}; -+module_platform_driver(rockchip_dmcfreq_driver); ++ ctx->rk_dev = rk_dev; ++ ctx->hash_tmp = (u8 *)get_zeroed_page(GFP_KERNEL | GFP_DMA32); ++ if (!ctx->hash_tmp) { ++ dev_err(rk_dev->dev, "Can't get zeroed page for hash tmp.\n"); ++ return -ENOMEM; ++ } + -+MODULE_AUTHOR("Finley Xiao "); -+MODULE_DESCRIPTION("rockchip dmcfreq driver with devfreq framework"); -+MODULE_LICENSE("GPL v2"); -diff --git a/drivers/devfreq/rockchip_dmc_common.c b/drivers/devfreq/rockchip_dmc_common.c -new file mode 100644 -index 000000000..7765e7174 ---- /dev/null -+++ b/drivers/devfreq/rockchip_dmc_common.c -@@ -0,0 +1,180 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+/* -+ * Rockchip dmc common functions. -+ * -+ * Copyright (c) 2021 Rockchip Electronics Co. Ltd. -+ * Author: Finley Xiao -+ */ ++ ctx->priv = kmalloc(sizeof(struct rk_hash_mid_data), GFP_KERNEL); ++ if (!ctx->priv) { ++ free_page((unsigned long)ctx->hash_tmp); ++ return -ENOMEM; ++ } + -+#include -+#include ++ memset(ctx->priv, 0x00, sizeof(struct rk_hash_mid_data)); + -+#define msch_rl_to_dmcfreq(work) container_of(to_delayed_work(work), \ -+ struct rockchip_dmcfreq, \ -+ msch_rl_work) -+#define MSCH_RL_DELAY_TIME 50 /* ms */ ++ rk_dev->request_crypto(rk_dev, alg_name); + -+static struct dmcfreq_common_info *common_info; -+static DECLARE_RWSEM(rockchip_dmcfreq_sem); ++ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct rk_ahash_rctx)); + -+void rockchip_dmcfreq_lock(void) -+{ -+ down_read(&rockchip_dmcfreq_sem); -+} -+EXPORT_SYMBOL(rockchip_dmcfreq_lock); ++ algt->alg.hash.halg.statesize = sizeof(struct rk_ahash_expt_ctx); + -+void rockchip_dmcfreq_lock_nested(void) -+{ -+ down_read_nested(&rockchip_dmcfreq_sem, SINGLE_DEPTH_NESTING); ++ return 0; +} -+EXPORT_SYMBOL(rockchip_dmcfreq_lock_nested); + -+void rockchip_dmcfreq_unlock(void) ++static void rk_cra_hash_exit(struct crypto_tfm *tfm) +{ -+ up_read(&rockchip_dmcfreq_sem); -+} -+EXPORT_SYMBOL(rockchip_dmcfreq_unlock); ++ struct rk_ahash_ctx *ctx = crypto_tfm_ctx(tfm); + -+int rockchip_dmcfreq_write_trylock(void) -+{ -+ return down_write_trylock(&rockchip_dmcfreq_sem); -+} -+EXPORT_SYMBOL(rockchip_dmcfreq_write_trylock); ++ CRYPTO_TRACE(); + -+void rockchip_dmcfreq_write_unlock(void) -+{ -+ up_write(&rockchip_dmcfreq_sem); -+} -+EXPORT_SYMBOL(rockchip_dmcfreq_write_unlock); ++ if (ctx->hash_tmp) ++ free_page((unsigned long)ctx->hash_tmp); + -+static void set_msch_rl(unsigned int readlatency) ++ kfree(ctx->priv); + -+{ -+ rockchip_dmcfreq_lock(); -+ dev_dbg(common_info->dev, "rl 0x%x -> 0x%x\n", -+ common_info->read_latency, readlatency); -+ if (!common_info->set_msch_readlatency(readlatency)) -+ common_info->read_latency = readlatency; -+ else -+ dev_err(common_info->dev, "failed to set msch rl\n"); -+ rockchip_dmcfreq_unlock(); ++ ctx->rk_dev->release_crypto(ctx->rk_dev, crypto_tfm_alg_name(tfm)); +} + -+static void set_msch_rl_work(struct work_struct *work) -+{ -+ set_msch_rl(0); -+ common_info->is_msch_rl_work_started = false; -+} ++struct rk_crypto_algt rk_v3_ahash_md5 = RK_HASH_ALGO_INIT(MD5, md5); ++struct rk_crypto_algt rk_v3_ahash_sha1 = RK_HASH_ALGO_INIT(SHA1, sha1); ++struct rk_crypto_algt rk_v3_ahash_sha224 = RK_HASH_ALGO_INIT(SHA224, sha224); ++struct rk_crypto_algt rk_v3_ahash_sha256 = RK_HASH_ALGO_INIT(SHA256, sha256); ++struct rk_crypto_algt rk_v3_ahash_sha384 = RK_HASH_ALGO_INIT(SHA384, sha384); ++struct rk_crypto_algt rk_v3_ahash_sha512 = RK_HASH_ALGO_INIT(SHA512, sha512); ++struct rk_crypto_algt rk_v3_ahash_sm3 = RK_HASH_ALGO_INIT(SM3, sm3); + -+int rockchip_dmcfreq_vop_bandwidth_init(struct dmcfreq_common_info *info) -+{ -+ if (info->set_msch_readlatency) -+ INIT_DELAYED_WORK(&info->msch_rl_work, set_msch_rl_work); -+ common_info = info; ++struct rk_crypto_algt rk_v3_hmac_md5 = RK_HMAC_ALGO_INIT(MD5, md5); ++struct rk_crypto_algt rk_v3_hmac_sha1 = RK_HMAC_ALGO_INIT(SHA1, sha1); ++struct rk_crypto_algt rk_v3_hmac_sha256 = RK_HMAC_ALGO_INIT(SHA256, sha256); ++struct rk_crypto_algt rk_v3_hmac_sha512 = RK_HMAC_ALGO_INIT(SHA512, sha512); ++struct rk_crypto_algt rk_v3_hmac_sm3 = RK_HMAC_ALGO_INIT(SM3, sm3); + -+ return 0; -+} -+EXPORT_SYMBOL(rockchip_dmcfreq_vop_bandwidth_init); +diff --git a/drivers/crypto/rockchip/rk_crypto_v3_reg.h b/drivers/crypto/rockchip/rk_crypto_v3_reg.h +new file mode 100644 +index 000000000..1c4c45317 +--- /dev/null ++++ b/drivers/crypto/rockchip/rk_crypto_v3_reg.h +@@ -0,0 +1,80 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ + -+void rockchip_dmcfreq_vop_bandwidth_update(struct dmcfreq_vop_info *vop_info) -+{ -+ unsigned long vop_last_rate, target = 0; -+ unsigned int readlatency = 0; -+ int i; ++/* Copyright (c) 2022 Rockchip Electronics Co. Ltd. */ + -+ if (!common_info) -+ return; ++#ifndef __RK_CRYPTO_V3_REG_H__ ++#define __RK_CRYPTO_V3_REG_H__ + -+ dev_dbg(common_info->dev, "line bw=%u, frame bw=%u, pn=%u, pn_4k=%u\n", -+ vop_info->line_bw_mbyte, vop_info->frame_bw_mbyte, -+ vop_info->plane_num, vop_info->plane_num_4k); ++#include "rk_crypto_v2_reg.h" + -+ if (!common_info->vop_pn_rl_tbl || !common_info->set_msch_readlatency) -+ goto vop_bw_tbl; -+ for (i = 0; common_info->vop_pn_rl_tbl[i].rl != DMCFREQ_TABLE_END; i++) { -+ if (vop_info->plane_num >= common_info->vop_pn_rl_tbl[i].pn) -+ readlatency = common_info->vop_pn_rl_tbl[i].rl; -+ } -+ dev_dbg(common_info->dev, "pn=%u\n", vop_info->plane_num); -+ if (readlatency) { -+ cancel_delayed_work_sync(&common_info->msch_rl_work); -+ common_info->is_msch_rl_work_started = false; -+ if (common_info->read_latency != readlatency) -+ set_msch_rl(readlatency); -+ } else if (common_info->read_latency && -+ !common_info->is_msch_rl_work_started) { -+ common_info->is_msch_rl_work_started = true; -+ schedule_delayed_work(&common_info->msch_rl_work, -+ msecs_to_jiffies(MSCH_RL_DELAY_TIME)); -+ } ++#define CRYPTO_UNEQUAL_ERR_INT_EN BIT(9) ++#define CRYPTO_ZERO_LEN_INT_EN BIT(6) + -+vop_bw_tbl: -+ if (!common_info->auto_freq_en || !common_info->vop_bw_tbl) -+ goto vop_frame_bw_tbl; ++/* DMA Destination Data Read Address Register */ ++#define CRYPTO_DMA_DST_WADDR 0x0028 + -+ for (i = 0; common_info->vop_bw_tbl[i].freq != DMCFREQ_TABLE_END; i++) { -+ if (vop_info->line_bw_mbyte >= common_info->vop_bw_tbl[i].min) -+ target = common_info->vop_bw_tbl[i].freq; -+ } ++#define CRYPTO_BC_MID_IV_0 0x0060 ++#define CRYPTO_BC_MID_WORD_SIZE 6 + -+vop_frame_bw_tbl: -+ if (!common_info->auto_freq_en || !common_info->vop_frame_bw_tbl) -+ goto next; -+ for (i = 0; common_info->vop_frame_bw_tbl[i].freq != DMCFREQ_TABLE_END; -+ i++) { -+ if (vop_info->frame_bw_mbyte >= common_info->vop_frame_bw_tbl[i].min) { -+ if (target < common_info->vop_frame_bw_tbl[i].freq) -+ target = common_info->vop_frame_bw_tbl[i].freq; -+ } -+ } ++#define CRYPTO_MID_VALID 0x03e8 ++#define CRYPTO_BC_MID_IS_VALID BIT(0) ++#define CRYPTO_HASH_MID_IS_VALID BIT(1) + -+next: -+ if (vop_info->plane_num_4k && target < common_info->vop_4k_rate) -+ target = common_info->vop_4k_rate; ++#define CRYPTO_KEY_SEL 0x0610 + -+ vop_last_rate = common_info->vop_req_rate; -+ common_info->vop_req_rate = target; ++#define CRYPTO_MID_VALID_SWITCH 0x0630 ++#define CRYPTO_MID_VALID_ENABLE BIT(0) + -+ if (target > vop_last_rate) { -+ mutex_lock(&common_info->devfreq->lock); -+ update_devfreq(common_info->devfreq); -+ mutex_unlock(&common_info->devfreq->lock); -+ } -+} -+EXPORT_SYMBOL(rockchip_dmcfreq_vop_bandwidth_update); ++#define CRYPTO_AES_VERSION 0x0680 ++#define CRYPTO_DES_VERSION 0x0684 ++#define CRYPTO_SM4_VERSION 0x0688 + -+int rockchip_dmcfreq_vop_bandwidth_request(struct dmcfreq_vop_info *vop_info) -+{ -+ unsigned long target = 0; -+ int i; ++#define CRYPTO_ECB_FLAG BIT(0) ++#define CRYPTO_CBC_FLAG BIT(1) ++#define CRYPTO_CTS_FLAG BIT(2) ++#define CRYPTO_CTR_FLAG BIT(3) ++#define CRYPTO_CFB_FLAG BIT(4) ++#define CRYPTO_OFB_FLAG BIT(5) ++#define CRYPTO_XTS_FLAG BIT(6) ++#define CRYPTO_CCM_FLAG BIT(7) ++#define CRYPTO_GCM_FLAG BIT(8) ++#define CRYPTO_CMAC_FLAG BIT(9) ++#define CRYPTO_CBCMAC_FLAG BIT(10) + -+ if (!common_info || !common_info->auto_freq_en || -+ !common_info->vop_bw_tbl) -+ return 0; ++#define CRYPTO_AES128_FLAG BIT(16) ++#define CRYPTO_AES192_FLAG BIT(17) ++#define CRYPTO_AES256_FLAG BIT(18) + -+ for (i = 0; common_info->vop_bw_tbl[i].freq != DMCFREQ_TABLE_END; i++) { -+ if (vop_info->line_bw_mbyte <= common_info->vop_bw_tbl[i].max) { -+ target = common_info->vop_bw_tbl[i].freq; -+ break; -+ } -+ } ++#define CRYPTO_TDES_FLAG BIT(16) + -+ if (!target) -+ return -EINVAL; ++#define CRYPTO_LOCKSEP_FLAG BIT(20) ++#define CRYPTO_SECURE_FLAG BIT(21) ++#define CRYPTO_MULTI_CHN_FLAG BIT(22) + -+ return 0; -+} -+EXPORT_SYMBOL(rockchip_dmcfreq_vop_bandwidth_request); ++#define CRYPTO_HASH_VERSION 0x068C ++#define CRYPTO_HASH_SHA1_FLAG BIT(0) ++#define CRYPTO_HASH_SHA224_FLAG BIT(1) ++#define CRYPTO_HASH_SHA256_FLAG BIT(2) ++#define CRYPTO_HASH_SHA384_FLAG BIT(3) ++#define CRYPTO_HASH_SHA512_FLAG BIT(4) ++#define CRYPTO_HASH_SHA512_224_FLAG BIT(5) ++#define CRYPTO_HASH_SHA512_256_FLAG BIT(6) ++#define CRYPTO_HASH_MD5_FLAG BIT(7) ++#define CRYPTO_HASH_SM3_FLAG BIT(8) + -+unsigned int rockchip_dmcfreq_get_stall_time_ns(void) -+{ -+ if (!common_info) -+ return 0; ++#define CRYPTO_HMAC_VERSION 0x0690 ++#define CRYPTO_HMAC_SHA1_FLAG BIT(0) ++#define CRYPTO_HMAC_SHA256_FLAG BIT(1) ++#define CRYPTO_HMAC_SHA512_FLAG BIT(2) ++#define CRYPTO_HMAC_MD5_FLAG BIT(3) ++#define CRYPTO_HMAC_SM3_FLAG BIT(4) + -+ return common_info->stall_time_ns; -+} -+EXPORT_SYMBOL(rockchip_dmcfreq_get_stall_time_ns); ++#define CRYPTO_RNG_VERSION 0x0694 ++#define CRYPTO_PKA_VERSION 0x0698 ++#define CRYPTO_CRYPTO_VERSION 0x06F0 + -+MODULE_AUTHOR("Finley Xiao "); -+MODULE_DESCRIPTION("rockchip dmcfreq driver with devfreq framework"); -+MODULE_LICENSE("GPL v2"); -diff --git a/drivers/devfreq/rockchip_dmc_dbg.c b/drivers/devfreq/rockchip_dmc_dbg.c ++#define CRYPTO_HASH_MID_DATA_0 0x0700 ++#define CRYPTO_HASH_MID_WORD_SIZE 26 ++ ++#endif ++ +diff --git a/drivers/crypto/rockchip/rk_crypto_v3_skcipher.c b/drivers/crypto/rockchip/rk_crypto_v3_skcipher.c new file mode 100644 -index 000000000..80b25e904 +index 000000000..4220e6cbe --- /dev/null -+++ b/drivers/devfreq/rockchip_dmc_dbg.c -@@ -0,0 +1,1061 @@ ++++ b/drivers/crypto/rockchip/rk_crypto_v3_skcipher.c +@@ -0,0 +1,684 @@ +// SPDX-License-Identifier: GPL-2.0 +/* -+ * Copyright (c) 2020, Rockchip Electronics Co., Ltd. ++ * Crypto acceleration support for Rockchip Crypto V2 ++ * ++ * Copyright (c) 2022, Fuzhou Rockchip Electronics Co., Ltd ++ * ++ * Author: Lin Jinhan ++ * + */ ++ ++#include ++#include +#include -+#include -+#include +#include -+#include -+#include -+#include -+#include -+#include -+#include + -+#include ++#include "rk_crypto_core.h" ++#include "rk_crypto_utils.h" ++#include "rk_crypto_skcipher_utils.h" ++#include "rk_crypto_v3.h" ++#include "rk_crypto_v3_reg.h" + -+#include "rockchip_dmc_timing.h" ++#define RK_POLL_PERIOD_US 100 ++#define RK_POLL_TIMEOUT_US 50000 + -+/* -+ * DMCDBG share memory request 4KB for delivery parameter -+ */ -+#define DMCDBG_PAGE_NUMS (1) -+#define DMCDBG_SHARE_MEM_SIZE ((DMCDBG_PAGE_NUMS) * 4096) ++static const u32 cipher_algo2bc[] = { ++ [CIPHER_ALGO_DES] = CRYPTO_BC_DES, ++ [CIPHER_ALGO_DES3_EDE] = CRYPTO_BC_TDES, ++ [CIPHER_ALGO_AES] = CRYPTO_BC_AES, ++ [CIPHER_ALGO_SM4] = CRYPTO_BC_SM4, ++}; + -+#define PROC_DMCDBG_DIR_NAME "dmcdbg" -+#define PROC_DMCDBG_DRAM_INFO "dmcinfo" -+#define PROC_DMCDBG_POWERSAVE "powersave" -+#define PROC_DMCDBG_DRVODT "drvodt" -+#define PROC_DMCDBG_DESKEW "deskew" -+#define PROC_DMCDBG_REGS_INFO "regsinfo" ++static const u32 cipher_mode2bc[] = { ++ [CIPHER_MODE_ECB] = CRYPTO_BC_ECB, ++ [CIPHER_MODE_CBC] = CRYPTO_BC_CBC, ++ [CIPHER_MODE_CFB] = CRYPTO_BC_CFB, ++ [CIPHER_MODE_OFB] = CRYPTO_BC_OFB, ++ [CIPHER_MODE_CTR] = CRYPTO_BC_CTR, ++ [CIPHER_MODE_XTS] = CRYPTO_BC_XTS, ++ [CIPHER_MODE_GCM] = CRYPTO_BC_GCM, ++}; + -+#define DDRDBG_FUNC_GET_VERSION (0x01) -+#define DDRDBG_FUNC_GET_SUPPORTED (0x02) -+#define DDRDBG_FUNC_GET_DRAM_INFO (0x03) -+#define DDRDBG_FUNC_GET_DESKEW_INFO (0x04) -+#define DDRDBG_FUNC_UPDATE_DESKEW (0x05) -+#define DDRDBG_FUNC_DATA_TRAINING (0x06) -+#define DDRDBG_FUNC_UPDATE_DESKEW_TR (0x07) -+#define DDRDBG_FUNC_GET_POWERSAVE_INFO (0x08) -+#define DDRDBG_FUNC_UPDATE_POWERSAVE (0x09) -+#define DDRDBG_FUNC_GET_DRVODT_INFO (0x0a) -+#define DDRDBG_FUNC_UPDATE_DRVODT (0x0b) -+#define DDRDBG_FUNC_GET_REGISTERS_INFO (0x0c) ++static int rk_crypto_irq_handle(int irq, void *dev_id) ++{ ++ struct rk_crypto_dev *rk_dev = platform_get_drvdata(dev_id); ++ u32 interrupt_status; ++ struct rk_hw_crypto_v3_info *hw_info = ++ (struct rk_hw_crypto_v3_info *)rk_dev->hw_info; ++ struct rk_alg_ctx *alg_ctx = rk_cipher_alg_ctx(rk_dev); + -+#define DRV_ODT_UNKNOWN (0xffff) -+#define DRV_ODT_UNSUSPEND_FIX (0x0) -+#define DRV_ODT_SUSPEND_FIX (0x1) ++ interrupt_status = CRYPTO_READ(rk_dev, CRYPTO_DMA_INT_ST); ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_ST, interrupt_status); + -+#define REGS_NAME_LEN_MAX (20) -+#define SKEW_GROUP_NUM_MAX (6) -+#define SKEW_TIMING_NUM_MAX (50) ++ interrupt_status &= CRYPTO_LOCKSTEP_MASK; + -+struct rockchip_dmcdbg { -+ struct device *dev; -+}; ++ if (interrupt_status != CRYPTO_DST_ITEM_DONE_INT_ST) { ++ dev_err(rk_dev->dev, "DMA desc = %p\n", hw_info->hw_desc.lli_head); ++ dev_err(rk_dev->dev, "DMA addr_in = %08x\n", ++ (u32)alg_ctx->addr_in); ++ dev_err(rk_dev->dev, "DMA addr_out = %08x\n", ++ (u32)alg_ctx->addr_out); ++ dev_err(rk_dev->dev, "DMA count = %08x\n", alg_ctx->count); ++ dev_err(rk_dev->dev, "DMA desc_dma = %08x\n", ++ (u32)hw_info->hw_desc.lli_head_dma); ++ dev_err(rk_dev->dev, "DMA Error status = %08x\n", ++ interrupt_status); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_LLI_ADDR status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_LLI_ADDR)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_ST status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_ST)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_STATE status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_STATE)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_LLI_RADDR status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_LLI_RADDR)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_SRC_RADDR status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_SRC_RADDR)); ++ dev_err(rk_dev->dev, "DMA CRYPTO_DMA_DST_RADDR status = %08x\n", ++ CRYPTO_READ(rk_dev, CRYPTO_DMA_DST_RADDR)); ++ rk_dev->err = -EFAULT; ++ } + -+struct proc_dir_entry *proc_dmcdbg_dir; ++ return 0; ++} + -+struct dram_cap_info { -+ unsigned int rank; -+ unsigned int col; -+ unsigned int bank; -+ unsigned int buswidth; -+ unsigned int die_buswidth; -+ unsigned int row_3_4; -+ unsigned int cs0_row; -+ unsigned int cs1_row; -+ unsigned int cs0_high16bit_row; -+ unsigned int cs1_high16bit_row; -+ unsigned int bankgroup; -+ unsigned int size; -+}; ++static inline void set_pc_len_reg(struct rk_crypto_dev *rk_dev, u64 pc_len) ++{ ++ u32 chn_base = CRYPTO_CH0_PC_LEN_0; + -+struct dram_info { -+ unsigned int version; -+ char dramtype[10]; -+ unsigned int dramfreq; -+ unsigned int channel_num; -+ struct dram_cap_info ch[2]; -+}; ++ CRYPTO_TRACE("PC length = %lu\n", (unsigned long)pc_len); + -+static const char * const power_save_msg[] = { -+ "auto power down enable", -+ "auto power down idle cycle", -+ "auto self refresh enable", -+ "auto self refresh idle cycle", -+ "self refresh with clock gate idle cycle", -+ "self refresh and power down lite idle cycle", -+ "standby idle cycle", -+}; ++ CRYPTO_WRITE(rk_dev, chn_base, pc_len & 0xffffffff); ++ CRYPTO_WRITE(rk_dev, chn_base + 4, pc_len >> 32); ++} + -+struct power_save_info { -+ unsigned int pd_en; -+ unsigned int pd_idle; -+ unsigned int sr_en; -+ unsigned int sr_idle; -+ unsigned int sr_mc_gate_idle; -+ unsigned int srpd_lite_idle; -+ unsigned int standby_idle; -+}; ++static inline void set_aad_len_reg(struct rk_crypto_dev *rk_dev, u64 aad_len) ++{ ++ u32 chn_base = CRYPTO_CH0_AAD_LEN_0; + -+static const char * const drv_odt_msg[] = { -+ "dram side drv pull-up", -+ "dram side drv pull-down", -+ "dram side dq odt pull-up", -+ "dram side dq odt pull-down", -+ "dram side ca odt pull-up", -+ "dram side ca odt pull-down", -+ "soc side ca drv pull-up", -+ "soc side ca drv pull-down", -+ "soc side ck drv pull-up", -+ "soc side ck drv pull-down", -+ "soc side cs drv pull-up", -+ "soc side cs drv pull-down", -+ "soc side dq drv pull-up", -+ "soc side dq drv pull-down", -+ "soc side odt pull-up", -+ "soc side odt pull-down", -+ "phy vref inner", -+ "phy vref out", -+}; ++ CRYPTO_TRACE("AAD length = %lu\n", (unsigned long)aad_len); + -+struct drv_odt { -+ unsigned int value; -+ unsigned int ohm; -+ unsigned int flag; -+}; ++ CRYPTO_WRITE(rk_dev, chn_base, aad_len & 0xffffffff); ++ CRYPTO_WRITE(rk_dev, chn_base + 4, aad_len >> 32); ++} + -+struct drv_odt_vref { -+ unsigned int value; -+ unsigned int percen; -+ unsigned int flag; -+}; ++static void set_iv_reg(struct rk_crypto_dev *rk_dev, const u8 *iv, u32 iv_len) ++{ ++ if (!iv || iv_len == 0) ++ return; + -+struct drv_odt_info { -+ struct drv_odt dram_drv_up; -+ struct drv_odt dram_drv_down; -+ struct drv_odt dram_dq_odt_up; -+ struct drv_odt dram_dq_odt_down; -+ struct drv_odt dram_ca_odt_up; -+ struct drv_odt dram_ca_odt_down; -+ struct drv_odt phy_ca_drv_up; -+ struct drv_odt phy_ca_drv_down; -+ struct drv_odt phy_ck_drv_up; -+ struct drv_odt phy_ck_drv_down; -+ struct drv_odt phy_cs_drv_up; -+ struct drv_odt phy_cs_drv_down; -+ struct drv_odt phy_dq_drv_up; -+ struct drv_odt phy_dq_drv_down; -+ struct drv_odt phy_odt_up; -+ struct drv_odt phy_odt_down; -+ struct drv_odt_vref phy_vref_inner; -+ struct drv_odt_vref phy_vref_out; -+}; ++ CRYPTO_DUMPHEX("set iv", iv, iv_len); + -+struct dmc_registers { -+ char regs_name[REGS_NAME_LEN_MAX]; -+ unsigned int regs_addr; -+}; ++ rk_crypto_write_regs(rk_dev, CRYPTO_CH0_IV_0, iv, iv_len); + -+struct registers_info { -+ unsigned int regs_num; -+ struct dmc_registers regs[]; -+}; ++ CRYPTO_WRITE(rk_dev, CRYPTO_CH0_IV_LEN_0, iv_len); ++} + -+struct skew_group { -+ unsigned int skew_num; -+ unsigned int *p_skew_info; -+ char *p_skew_timing[SKEW_TIMING_NUM_MAX]; -+ char *note; -+}; ++static void write_key_reg(struct rk_crypto_dev *rk_dev, const u8 *key, ++ u32 key_len) ++{ ++ rk_crypto_write_regs(rk_dev, CRYPTO_CH0_KEY_0, key, key_len); ++} + -+struct rockchip_dmcdbg_data { -+ unsigned int inited_flag; -+ void __iomem *share_memory; -+ unsigned int skew_group_num; -+ struct skew_group skew_group[SKEW_GROUP_NUM_MAX]; -+}; ++static void write_tkey_reg(struct rk_crypto_dev *rk_dev, const u8 *key, ++ u32 key_len) ++{ ++ rk_crypto_write_regs(rk_dev, CRYPTO_CH4_KEY_0, key, key_len); ++} + -+static struct rockchip_dmcdbg_data dmcdbg_data; ++static int get_tag_reg(struct rk_crypto_dev *rk_dev, u8 *tag, u32 tag_len) ++{ ++ int ret; ++ u32 reg_ctrl = 0; + -+struct skew_info_rv1126 { -+ unsigned int ca_skew[32]; -+ unsigned int cs0_a_skew[44]; -+ unsigned int cs0_b_skew[44]; -+ unsigned int cs1_a_skew[44]; -+ unsigned int cs1_b_skew[44]; -+}; -+ -+static int dmcinfo_proc_show(struct seq_file *m, void *v) -+{ -+ struct arm_smccc_res res; -+ struct dram_info *p_dram_info; -+ struct file *fp = NULL; -+ char cur_freq[20] = {0}; -+ char governor[20] = {0}; -+ loff_t pos; -+ u32 i; -+ -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, DDRDBG_FUNC_GET_DRAM_INFO, -+ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); -+ if (res.a0) { -+ seq_printf(m, "rockchip_sip_config_dram_debug error:%lx\n", -+ res.a0); -+ return -ENOMEM; -+ } -+ -+ if (!dmcdbg_data.inited_flag) { -+ seq_puts(m, "dmcdbg_data no int\n"); -+ return -EPERM; -+ } -+ p_dram_info = (struct dram_info *)dmcdbg_data.share_memory; -+ -+ /* dram type information */ -+ seq_printf(m, -+ "DramType: %s\n" -+ , -+ p_dram_info->dramtype -+ ); -+ -+ /* dram capacity information */ -+ seq_printf(m, -+ "\n" -+ "DramCapacity:\n" -+ ); ++ CRYPTO_TRACE("tag_len = %u", tag_len); + -+ for (i = 0; i < p_dram_info->channel_num; i++) { -+ if (p_dram_info->channel_num == 2) -+ seq_printf(m, -+ "Channel [%d]:\n" -+ , -+ i -+ ); ++ if (tag_len > RK_MAX_TAG_SIZE) ++ return -EINVAL; + -+ seq_printf(m, -+ "CS Count: %d\n" -+ "Bus Width: %d bit\n" -+ "Column: %d\n" -+ "Bank: %d\n" -+ "CS0_Row: %d\n" -+ "CS1_Row: %d\n" -+ "DieBusWidth: %d bit\n" -+ "TotalSize: %d MB\n" -+ , -+ p_dram_info->ch[i].rank, -+ p_dram_info->ch[i].buswidth, -+ p_dram_info->ch[i].col, -+ p_dram_info->ch[i].bank, -+ p_dram_info->ch[i].cs0_row, -+ p_dram_info->ch[i].cs1_row, -+ p_dram_info->ch[i].die_buswidth, -+ p_dram_info->ch[i].size -+ ); -+ } ++ ret = read_poll_timeout_atomic(CRYPTO_READ, ++ reg_ctrl, ++ reg_ctrl & CRYPTO_CH0_TAG_VALID, ++ 0, ++ RK_POLL_TIMEOUT_US, ++ false, ++ rk_dev, CRYPTO_TAG_VALID); ++ if (ret) ++ goto exit; + -+ /* check devfreq/dmc device */ -+ fp = filp_open("/sys/class/devfreq/dmc/cur_freq", O_RDONLY, 0); -+ if (IS_ERR(fp)) { -+ seq_printf(m, -+ "\n" -+ "devfreq/dmc: Disable\n" -+ "DramFreq: %d\n" -+ , -+ p_dram_info->dramfreq -+ ); -+ } else { -+ pos = 0; -+ kernel_read(fp, cur_freq, sizeof(cur_freq), &pos); -+ filp_close(fp, NULL); ++ rk_crypto_read_regs(rk_dev, CRYPTO_CH0_TAG_0, tag, tag_len); ++exit: ++ return ret; ++} + -+ fp = filp_open("/sys/class/devfreq/dmc/governor", O_RDONLY, 0); -+ if (IS_ERR(fp)) { -+ fp = NULL; -+ } else { -+ pos = 0; -+ kernel_read(fp, governor, sizeof(governor), &pos); -+ filp_close(fp, NULL); -+ } ++static bool is_force_fallback(struct rk_crypto_algt *algt, uint32_t key_len) ++{ ++ if (algt->algo != CIPHER_ALGO_AES) ++ return false; + -+ seq_printf(m, -+ "\n" -+ "devfreq/dmc: Enable\n" -+ "governor: %s\n" -+ "cur_freq: %s\n" -+ , -+ governor, -+ cur_freq -+ ); -+ seq_printf(m, -+ "NOTE:\n" -+ "more information about dmc can get from /sys/class/devfreq/dmc.\n" -+ ); -+ } ++ /* crypto v2 not support xts with AES-192 */ ++ if (algt->mode == CIPHER_MODE_XTS && key_len == AES_KEYSIZE_192 * 2) ++ return true; + -+ return 0; -+} ++ if (algt->use_soft_aes192 && key_len == AES_KEYSIZE_192) ++ return true; + -+static int dmcinfo_proc_open(struct inode *inode, struct file *file) -+{ -+ return single_open(file, dmcinfo_proc_show, NULL); ++ return false; +} + -+static const struct file_operations dmcinfo_proc_fops = { -+ .open = dmcinfo_proc_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; -+ -+static int proc_dmcinfo_init(void) ++static bool is_calc_need_round_up(struct skcipher_request *req) +{ -+ /* create dmcinfo file */ -+ proc_create(PROC_DMCDBG_DRAM_INFO, 0644, proc_dmcdbg_dir, -+ &dmcinfo_proc_fops); ++ struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); ++ struct rk_crypto_algt *algt = rk_cipher_get_algt(cipher); + -+ return 0; ++ return (algt->mode == CIPHER_MODE_CFB || ++ algt->mode == CIPHER_MODE_OFB || ++ algt->mode == CIPHER_MODE_CTR) ? true : false; +} + -+static int powersave_proc_show(struct seq_file *m, void *v) ++static void rk_cipher_reset(struct rk_crypto_dev *rk_dev) +{ -+ struct arm_smccc_res res; -+ struct power_save_info *p_power; -+ unsigned int *p_uint; -+ unsigned int i = 0; ++ int ret; ++ u32 tmp = 0, tmp_mask = 0; ++ unsigned int pool_timeout_us = 1000; + -+ /* get low power information */ -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, -+ DDRDBG_FUNC_GET_POWERSAVE_INFO, -+ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); -+ if (res.a0) { -+ seq_printf(m, "rockchip_sip_config_dram_debug error:%lx\n", -+ res.a0); -+ return -ENOMEM; -+ } ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0x00); + -+ if (!dmcdbg_data.inited_flag) { -+ seq_puts(m, "dmcdbg_data no int\n"); -+ return -EPERM; -+ } -+ p_power = (struct power_save_info *)dmcdbg_data.share_memory; ++ tmp = CRYPTO_SW_CC_RESET; ++ tmp_mask = tmp << CRYPTO_WRITE_MASK_SHIFT; + -+ seq_printf(m, -+ "low power information:\n" -+ "\n" -+ "[number]name: value\n" -+ ); ++ CRYPTO_WRITE(rk_dev, CRYPTO_RST_CTL, tmp | tmp_mask); + -+ p_uint = (unsigned int *)p_power; -+ for (i = 0; i < ARRAY_SIZE(power_save_msg); i++) -+ seq_printf(m, -+ "[%d]%s: %d\n" -+ , -+ i, power_save_msg[i], *(p_uint + i) -+ ); ++ /* This is usually done in 20 clock cycles */ ++ ret = read_poll_timeout_atomic(CRYPTO_READ, tmp, !tmp, 0, ++ pool_timeout_us, false, rk_dev, CRYPTO_RST_CTL); ++ if (ret) ++ dev_err(rk_dev->dev, "cipher reset pool timeout %ums.", ++ pool_timeout_us); + -+ seq_printf(m, -+ "\n" -+ "power save setting:\n" -+ "echo number=value > /proc/dmcdbg/powersave\n" -+ "eg: set auto power down enable to 1\n" -+ " echo 0=1 > /proc/dmcdbg/powersave\n" -+ "\n" -+ "Support for setting multiple parameters at the same time.\n" -+ "echo number=value,number=value,... > /proc/dmcdbg/powersave\n" -+ "eg:\n" -+ " echo 0=1,1=32 > /proc/dmcdbg/powersave\n" -+ ); ++ CRYPTO_WRITE(rk_dev, CRYPTO_BC_CTL, 0xffff0000); + -+ return 0; ++ /* clear dma int status */ ++ tmp = CRYPTO_READ(rk_dev, CRYPTO_DMA_INT_ST); ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_ST, tmp); +} + -+static int powersave_proc_open(struct inode *inode, struct file *file) ++static void rk_crypto_complete(struct crypto_async_request *base, int err) +{ -+ return single_open(file, powersave_proc_show, NULL); ++ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(base->tfm); ++ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; ++ struct rk_hw_crypto_v3_info *hw_info = ctx->rk_dev->hw_info; ++ struct crypto_lli_desc *lli_desc = hw_info->hw_desc.lli_head; ++ ++ CRYPTO_WRITE(ctx->rk_dev, CRYPTO_BC_CTL, 0xffff0000); ++ if (err) { ++ rk_cipher_reset(ctx->rk_dev); ++ pr_err("aligned = %u, align_size = %u\n", ++ alg_ctx->aligned, alg_ctx->align_size); ++ pr_err("total = %u, left = %u, count = %u\n", ++ alg_ctx->total, alg_ctx->left_bytes, alg_ctx->count); ++ pr_err("lli->src = %08x\n", lli_desc->src_addr); ++ pr_err("lli->src_len = %08x\n", lli_desc->src_len); ++ pr_err("lli->dst = %08x\n", lli_desc->dst_addr); ++ pr_err("lli->dst_len = %08x\n", lli_desc->dst_len); ++ pr_err("lli->dma_ctl = %08x\n", lli_desc->dma_ctrl); ++ pr_err("lli->usr_def = %08x\n", lli_desc->user_define); ++ pr_err("lli->next = %08x\n\n\n", lli_desc->next_addr); ++ } ++ ++ if (base->complete) ++ base->complete(base, err); +} + -+static ssize_t powersave_proc_write(struct file *file, -+ const char __user *buffer, -+ size_t count, loff_t *ppos) ++static int rk_cipher_crypt(struct skcipher_request *req, bool encrypt) +{ -+ struct arm_smccc_res res; -+ struct power_save_info *p_power; -+ unsigned int *p_uint; -+ char *buf, *cookie_pot, *p_char; -+ int ret = 0; -+ u32 loop, i, offset, value; -+ long long_val; ++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); ++ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct rk_crypto_algt *algt = rk_cipher_get_algt(tfm); + -+ /* get buffer data */ -+ buf = vzalloc(count); -+ cookie_pot = buf; -+ if (!cookie_pot) -+ return -ENOMEM; ++ CRYPTO_TRACE("%s total = %u", ++ encrypt ? "encrypt" : "decrypt", req->cryptlen); + -+ if (copy_from_user(cookie_pot, buffer, count)) { -+ ret = -EFAULT; -+ goto err; ++ if (!req->cryptlen) { ++ if (algt->mode == CIPHER_MODE_ECB || ++ algt->mode == CIPHER_MODE_CBC || ++ algt->mode == CIPHER_MODE_CTR || ++ algt->mode == CIPHER_MODE_CFB || ++ algt->mode == CIPHER_MODE_OFB) ++ return 0; ++ else ++ return -EINVAL; + } + -+ /* get power save setting information */ -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, -+ DDRDBG_FUNC_GET_POWERSAVE_INFO, -+ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); -+ if (res.a0) { -+ pr_err("rockchip_sip_config_dram_debug error:%lx\n", res.a0); -+ ret = -ENOMEM; -+ goto err; -+ } ++ /* XTS data should >= chunksize */ ++ if (algt->mode == CIPHER_MODE_XTS) { ++ if (req->cryptlen < crypto_skcipher_chunksize(tfm)) ++ return -EINVAL; + -+ if (!dmcdbg_data.inited_flag) { -+ pr_err("dmcdbg_data no int\n"); -+ ret = -EPERM; -+ goto err; -+ } -+ p_power = (struct power_save_info *)dmcdbg_data.share_memory; ++ /* force use unalign branch */ ++ ctx->algs_ctx.align_size = ctx->rk_dev->vir_max; + -+ loop = 0; -+ for (i = 0; i < count; i++) { -+ if (*(cookie_pot + i) == '=') -+ loop++; ++ /* XTS can't pause when use hardware crypto */ ++ if (req->cryptlen > ctx->rk_dev->vir_max) ++ return rk_cipher_fallback(req, ctx, encrypt); + } + -+ p_uint = (unsigned int *)p_power; -+ for (i = 0; i < loop; i++) { -+ p_char = strsep(&cookie_pot, "="); -+ ret = kstrtol(p_char, 10, &long_val); -+ if (ret) -+ goto err; -+ offset = long_val; ++ if (is_force_fallback(algt, ctx->keylen)) ++ return rk_cipher_fallback(req, ctx, encrypt); + -+ if (i == (loop - 1)) -+ p_char = strsep(&cookie_pot, "\0"); -+ else -+ p_char = strsep(&cookie_pot, ","); ++ ctx->mode = cipher_algo2bc[algt->algo] | ++ cipher_mode2bc[algt->mode]; ++ if (!encrypt) ++ ctx->mode |= CRYPTO_BC_DECRYPT; + -+ ret = kstrtol(p_char, 10, &long_val); -+ if (ret) -+ goto err; -+ value = long_val; ++ if (algt->algo == CIPHER_ALGO_AES) { ++ uint32_t key_factor; + -+ if (offset >= ARRAY_SIZE(power_save_msg)) { -+ ret = -EINVAL; -+ goto err; -+ } -+ offset = array_index_nospec(offset, ARRAY_SIZE(power_save_msg)); ++ /* The key length of XTS is twice the normal length */ ++ key_factor = algt->mode == CIPHER_MODE_XTS ? 2 : 1; + -+ *(p_uint + offset) = value; ++ if (ctx->keylen == AES_KEYSIZE_128 * key_factor) ++ ctx->mode |= CRYPTO_BC_128_bit_key; ++ else if (ctx->keylen == AES_KEYSIZE_192 * key_factor) ++ ctx->mode |= CRYPTO_BC_192_bit_key; ++ else if (ctx->keylen == AES_KEYSIZE_256 * key_factor) ++ ctx->mode |= CRYPTO_BC_256_bit_key; + } + -+ /* update power save setting */ -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, DDRDBG_FUNC_UPDATE_POWERSAVE, -+ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); -+ if (res.a0) { -+ pr_err("rockchip_sip_config_dram_debug error:%lx\n", res.a0); -+ ret = -ENOMEM; -+ goto err; -+ } ++ ctx->iv_len = crypto_skcipher_ivsize(tfm); + -+ ret = count; -+err: -+ vfree(buf); -+ return ret; -+} ++ memset(ctx->iv, 0x00, sizeof(ctx->iv)); ++ memcpy(ctx->iv, req->iv, ctx->iv_len); + -+static const struct file_operations powersave_proc_fops = { -+ .open = powersave_proc_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+ .write = powersave_proc_write, -+}; ++ ctx->is_enc = encrypt; + -+static int proc_powersave_init(void) ++ CRYPTO_MSG("ctx->mode = %x\n", ctx->mode); ++ return rk_skcipher_handle_req(ctx->rk_dev, req); ++} ++ ++static int rk_cipher_encrypt(struct skcipher_request *req) +{ -+ /* create dmcinfo file */ -+ proc_create(PROC_DMCDBG_POWERSAVE, 0644, proc_dmcdbg_dir, -+ &powersave_proc_fops); ++ return rk_cipher_crypt(req, true); ++} + -+ return 0; ++static int rk_cipher_decrypt(struct skcipher_request *req) ++{ ++ return rk_cipher_crypt(req, false); +} + -+static int drvodt_proc_show(struct seq_file *m, void *v) ++static int rk_ablk_hw_init(struct rk_crypto_dev *rk_dev, u32 algo, u32 mode) +{ -+ struct arm_smccc_res res; -+ struct drv_odt_info *p_drvodt; -+ unsigned int *p_uint; -+ unsigned int i; ++ struct rk_cipher_ctx *ctx = rk_cipher_ctx_cast(rk_dev); + -+ /* get drive strength and odt information */ -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, DDRDBG_FUNC_GET_DRVODT_INFO, -+ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); -+ if (res.a0) { -+ seq_printf(m, "rockchip_sip_config_dram_debug error:%lx\n", -+ res.a0); -+ return -ENOMEM; -+ } ++ rk_cipher_reset(rk_dev); + -+ if (!dmcdbg_data.inited_flag) { -+ seq_puts(m, "dmcdbg_data no int\n"); -+ return -EPERM; -+ } -+ p_drvodt = (struct drv_odt_info *)dmcdbg_data.share_memory; ++ CRYPTO_WRITE(rk_dev, CRYPTO_BC_CTL, 0x00010000); + -+ seq_printf(m, -+ "drv and odt information:\n" -+ "\n" -+ "[number]name: value (ohm)\n" -+ ); ++ if (mode == CIPHER_MODE_XTS) { ++ uint32_t tmp_len = ctx->keylen / 2; + -+ p_uint = (unsigned int *)p_drvodt; -+ for (i = 0; i < ARRAY_SIZE(drv_odt_msg); i++) { -+ if (*(p_uint + (i * 3)) == DRV_ODT_UNKNOWN) -+ seq_printf(m, -+ "[%2d]%s: NULL (unknown) %c\n" -+ , -+ i, drv_odt_msg[i], -+ (*(p_uint + (i * 3) + 2) == -+ DRV_ODT_SUSPEND_FIX) ? '\0' : '*' -+ ); -+ else if (*(p_uint + (i * 3) + 1) == DRV_ODT_UNKNOWN) -+ seq_printf(m, -+ "[%2d]%s: %d (unknown) %c\n" -+ , -+ i, drv_odt_msg[i], *(p_uint + (i * 3)), -+ (*(p_uint + (i * 3) + 2) == -+ DRV_ODT_SUSPEND_FIX) ? '\0' : '*' -+ ); -+ else if (i < (ARRAY_SIZE(drv_odt_msg) - 2)) -+ seq_printf(m, -+ "[%2d]%s: %d (%d ohm) %c\n" -+ , -+ i, drv_odt_msg[i], *(p_uint + (i * 3)), -+ *(p_uint + (i * 3) + 1), -+ (*(p_uint + (i * 3) + 2) == -+ DRV_ODT_SUSPEND_FIX) ? '\0' : '*' -+ ); -+ else -+ seq_printf(m, -+ "[%2d]%s: %d (%d %%) %c\n" -+ , -+ i, drv_odt_msg[i], *(p_uint + (i * 3)), -+ *(p_uint + (i * 3) + 1), -+ (*(p_uint + (i * 3) + 2) == -+ DRV_ODT_SUSPEND_FIX) ? '\0' : '*' -+ ); ++ write_key_reg(ctx->rk_dev, ctx->key, tmp_len); ++ write_tkey_reg(ctx->rk_dev, ctx->key + tmp_len, tmp_len); ++ } else { ++ write_key_reg(ctx->rk_dev, ctx->key, ctx->keylen); + } + -+ seq_printf(m, -+ "\n" -+ "drvodt setting:\n" -+ "echo number=value > /proc/dmcdbg/drvodt\n" -+ "eg: set soc side ca drv up to 20\n" -+ " echo 6=20 > /proc/dmcdbg/drvodt\n" -+ "\n" -+ "Support for setting multiple parameters at the same time.\n" -+ "echo number=value,number=value,... > /proc/dmcdbg/drvodt\n" -+ "eg: set soc side ca drv up and down to 20\n" -+ " echo 6=20,7=20 > /proc/dmcdbg/drvodt\n" -+ "Note: Please update both up and down at the same time.\n" -+ " (*) mean unsupported setting value\n" -+ ); ++ if (mode != CIPHER_MODE_ECB) ++ set_iv_reg(rk_dev, ctx->iv, ctx->iv_len); + -+ return 0; -+} ++ ctx->mode |= CRYPTO_BC_ENABLE; + -+static int drvodt_proc_open(struct inode *inode, struct file *file) -+{ -+ return single_open(file, drvodt_proc_show, NULL); ++ CRYPTO_WRITE(rk_dev, CRYPTO_FIFO_CTL, 0x00030003); ++ ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_INT_EN, 0x7f); ++ ++ CRYPTO_WRITE(rk_dev, CRYPTO_BC_CTL, ctx->mode | CRYPTO_WRITE_MASK_ALL); ++ ++ return 0; +} + -+static ssize_t drvodt_proc_write(struct file *file, -+ const char __user *buffer, -+ size_t count, loff_t *ppos) ++static int crypto_dma_start(struct rk_crypto_dev *rk_dev, uint32_t flag) +{ -+ struct arm_smccc_res res; -+ struct drv_odt_info *p_drvodt; -+ unsigned int *p_uint; -+ char *buf, *cookie_pot, *p_char; -+ int ret = 0; -+ u32 loop, i, offset, value; -+ long long_val; -+ -+ /* get buffer data */ -+ buf = vzalloc(count); -+ cookie_pot = buf; -+ if (!cookie_pot) -+ return -ENOMEM; ++ struct rk_hw_crypto_v3_info *hw_info = ++ (struct rk_hw_crypto_v3_info *)rk_dev->hw_info; ++ struct skcipher_request *req = ++ skcipher_request_cast(rk_dev->async_req); ++ struct rk_alg_ctx *alg_ctx = rk_cipher_alg_ctx(rk_dev); ++ struct crypto_lli_desc *lli_head, *lli_tail, *lli_aad; ++ u32 calc_len = alg_ctx->count; ++ u32 start_flag = CRYPTO_DMA_START; ++ int ret; + -+ if (copy_from_user(cookie_pot, buffer, count)) { -+ ret = -EFAULT; -+ goto err; -+ } ++ if (alg_ctx->aligned) ++ ret = rk_crypto_hw_desc_init(&hw_info->hw_desc, ++ alg_ctx->sg_src, alg_ctx->sg_dst, alg_ctx->count); ++ else ++ ret = rk_crypto_hw_desc_init(&hw_info->hw_desc, ++ &alg_ctx->sg_tmp, &alg_ctx->sg_tmp, alg_ctx->count); ++ if (ret) ++ return ret; + -+ /* get drv and odt setting */ -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, DDRDBG_FUNC_GET_DRVODT_INFO, -+ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); -+ if (res.a0) { -+ pr_err("rockchip_sip_config_dram_debug error:%lx\n", res.a0); -+ ret = -ENOMEM; -+ goto err; -+ } ++ lli_head = hw_info->hw_desc.lli_head; ++ lli_tail = hw_info->hw_desc.lli_tail; ++ lli_aad = hw_info->hw_desc.lli_aad; + -+ if (!dmcdbg_data.inited_flag) { -+ pr_err("dmcdbg_data no int\n"); -+ ret = -EPERM; -+ goto err; -+ } -+ p_drvodt = (struct drv_odt_info *)dmcdbg_data.share_memory; ++ /* ++ * the data length is not aligned will use addr_vir to calculate, ++ * so crypto v2 could round up data length to chunk_size ++ */ ++ if (!alg_ctx->is_aead && is_calc_need_round_up(req)) ++ calc_len = round_up(calc_len, alg_ctx->chunk_size); + -+ loop = 0; -+ for (i = 0; i < count; i++) { -+ if (*(cookie_pot + i) == '=') -+ loop++; -+ } ++ CRYPTO_TRACE("calc_len = %u, cryptlen = %u, assoclen= %u, is_aead = %d", ++ calc_len, alg_ctx->total, alg_ctx->assoclen, alg_ctx->is_aead); + -+ p_uint = (unsigned int *)p_drvodt; -+ for (i = 0; i < loop; i++) { -+ p_char = strsep(&cookie_pot, "="); -+ ret = kstrtol(p_char, 10, &long_val); -+ if (ret) -+ goto err; -+ offset = long_val; ++ lli_head->user_define = LLI_USER_STRING_START | LLI_USER_CIPHER_START; + -+ if (i == (loop - 1)) -+ p_char = strsep(&cookie_pot, "\0"); -+ else -+ p_char = strsep(&cookie_pot, ","); ++ lli_tail->dma_ctrl = LLI_DMA_CTRL_DST_DONE | LLI_DMA_CTRL_LAST; ++ lli_tail->user_define |= LLI_USER_STRING_LAST; ++ lli_tail->src_len += (calc_len - alg_ctx->count); ++ lli_tail->dst_len += (calc_len - alg_ctx->count); + -+ ret = kstrtol(p_char, 10, &long_val); -+ if (ret) -+ goto err; -+ value = long_val; ++ if (alg_ctx->is_aead) { ++ lli_aad->src_addr = alg_ctx->addr_aad_in; ++ lli_aad->src_len = alg_ctx->assoclen; ++ lli_aad->user_define = LLI_USER_CIPHER_START | ++ LLI_USER_STRING_START | ++ LLI_USER_STRING_LAST | ++ LLI_USER_STRING_AAD; ++ lli_aad->next_addr = hw_info->hw_desc.lli_head_dma; + -+ if (offset >= ARRAY_SIZE(drv_odt_msg)) { -+ ret = -EINVAL; -+ goto err; -+ } -+ offset *= 3; -+ offset = array_index_nospec(offset, ARRAY_SIZE(drv_odt_msg) * 3); ++ /* clear cipher start */ ++ lli_head->user_define &= (~((u32)LLI_USER_CIPHER_START)); + -+ *(p_uint + offset) = value; ++ set_pc_len_reg(rk_dev, alg_ctx->total); ++ set_aad_len_reg(rk_dev, alg_ctx->assoclen); + } + -+ /* update power save setting */ -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, DDRDBG_FUNC_UPDATE_DRVODT, -+ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); -+ if (res.a0) { -+ pr_err("rockchip_sip_config_dram_debug error:%lx\n", res.a0); -+ ret = -ENOMEM; -+ goto err; -+ } ++ rk_crypto_dump_hw_desc(&hw_info->hw_desc); + -+ ret = count; -+err: -+ vfree(buf); -+ return ret; -+} ++ dma_wmb(); + -+static const struct file_operations drvodt_proc_fops = { -+ .open = drvodt_proc_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+ .write = drvodt_proc_write, -+}; ++ if (alg_ctx->is_aead) ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_LLI_ADDR, hw_info->hw_desc.lli_aad_dma); ++ else ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_LLI_ADDR, hw_info->hw_desc.lli_head_dma); + -+static int proc_drvodt_init(void) -+{ -+ /* create dmcinfo file */ -+ proc_create(PROC_DMCDBG_DRVODT, 0644, proc_dmcdbg_dir, -+ &drvodt_proc_fops); ++ CRYPTO_WRITE(rk_dev, CRYPTO_DMA_CTL, start_flag | (start_flag << WRITE_MASK)); + + return 0; +} + -+static int skew_proc_show(struct seq_file *m, void *v) ++static int rk_ablk_init_tfm(struct crypto_skcipher *tfm) +{ -+ struct arm_smccc_res res; -+ unsigned int *p_uint; -+ u32 group, i; ++ struct rk_crypto_algt *algt = rk_cipher_get_algt(tfm); ++ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ const char *alg_name = crypto_tfm_alg_name(crypto_skcipher_tfm(tfm)); ++ struct rk_crypto_dev *rk_dev = algt->rk_dev; ++ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; + -+ /* get deskew information */ -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, DDRDBG_FUNC_GET_DESKEW_INFO, -+ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); -+ if (res.a0) { -+ seq_printf(m, "rockchip_sip_config_dram_debug error:%lx\n", -+ res.a0); -+ return -ENOMEM; -+ } ++ CRYPTO_TRACE(); + -+ if (!dmcdbg_data.inited_flag) { -+ seq_puts(m, "dmcdbg_data no int\n"); -+ return -EPERM; -+ } ++ memset(ctx, 0x00, sizeof(*ctx)); + -+ seq_printf(m, -+ "de-skew information:\n" -+ "\n" -+ "[group_number]name: value\n" -+ ); ++ if (!rk_dev->request_crypto) ++ return -EFAULT; + -+ for (group = 0; group < dmcdbg_data.skew_group_num; group++) { -+ if (dmcdbg_data.skew_group[group].note != NULL) -+ seq_printf(m, -+ "%s\n" -+ , -+ dmcdbg_data.skew_group[group].note -+ ); -+ p_uint = (unsigned int *)dmcdbg_data.skew_group[group].p_skew_info; -+ for (i = 0; i < dmcdbg_data.skew_group[group].skew_num; i++) -+ seq_printf(m, -+ "[%c%d_%d]%s: %d\n" -+ , -+ (i < 10) ? ' ' : '\0', group, i, -+ dmcdbg_data.skew_group[group].p_skew_timing[i], -+ *(p_uint + i) -+ ); -+ } ++ rk_dev->request_crypto(rk_dev, alg_name); + -+ seq_printf(m, -+ "\n" -+ "de-skew setting:\n" -+ "echo group_number=value > /proc/dmcdbg/deskew\n" -+ "eg: set a1_ddr3a14_de-skew to 8\n" -+ " echo 0_1=8 > /proc/dmcdbg/deskew\n" -+ "\n" -+ "Support for setting multiple parameters simultaneously.\n" -+ "echo group_number=value,group_number=value,... > /proc/dmcdbg/deskew\n" -+ "eg:\n" -+ " echo 0_1=8,1_2=8 > /proc/dmcdbg/deskew\n" -+ ); ++ /* always not aligned for crypto v2 cipher */ ++ alg_ctx->align_size = 64; ++ alg_ctx->chunk_size = crypto_skcipher_chunksize(tfm); + -+ return 0; -+} ++ alg_ctx->ops.start = rk_ablk_start; ++ alg_ctx->ops.update = rk_ablk_rx; ++ alg_ctx->ops.complete = rk_crypto_complete; ++ alg_ctx->ops.irq_handle = rk_crypto_irq_handle; + -+static int skew_proc_open(struct inode *inode, struct file *file) -+{ -+ return single_open(file, skew_proc_show, NULL); ++ alg_ctx->ops.hw_init = rk_ablk_hw_init; ++ alg_ctx->ops.hw_dma_start = crypto_dma_start; ++ alg_ctx->ops.hw_write_iv = set_iv_reg; ++ ++ ctx->rk_dev = rk_dev; ++ ++ if (algt->alg.crypto.base.cra_flags & CRYPTO_ALG_NEED_FALLBACK) { ++ CRYPTO_MSG("alloc fallback tfm, name = %s", alg_name); ++ ctx->fallback_tfm = crypto_alloc_skcipher(alg_name, 0, ++ CRYPTO_ALG_ASYNC | ++ CRYPTO_ALG_NEED_FALLBACK); ++ if (IS_ERR(ctx->fallback_tfm)) { ++ CRYPTO_MSG("Could not load fallback driver %s : %ld.\n", ++ alg_name, PTR_ERR(ctx->fallback_tfm)); ++ ctx->fallback_tfm = NULL; ++ } ++ } ++ ++ return 0; +} + -+static ssize_t skew_proc_write(struct file *file, -+ const char __user *buffer, -+ size_t count, loff_t *ppos) ++static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm) +{ -+ struct arm_smccc_res res; -+ unsigned int *p_uint; -+ char *buf, *cookie_pot, *p_char; -+ int ret = 0; -+ u32 loop, i, offset_max, group, offset, value; -+ long long_val; ++ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ const char *alg_name = crypto_tfm_alg_name(crypto_skcipher_tfm(tfm)); + -+ /* get buffer data */ -+ buf = vzalloc(count); -+ cookie_pot = buf; -+ if (!cookie_pot) -+ return -ENOMEM; ++ CRYPTO_TRACE(); + -+ if (copy_from_user(cookie_pot, buffer, count)) { -+ ret = -EFAULT; -+ goto err; ++ if (ctx->fallback_tfm) { ++ CRYPTO_MSG("free fallback tfm"); ++ crypto_free_skcipher(ctx->fallback_tfm); + } + -+ /* get skew setting */ -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, DDRDBG_FUNC_GET_DESKEW_INFO, -+ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); -+ if (res.a0) { -+ pr_err("rockchip_sip_config_dram_debug error:%lx\n", res.a0); -+ ret = -ENOMEM; -+ goto err; -+ } ++ ctx->rk_dev->release_crypto(ctx->rk_dev, alg_name); ++} + -+ if (!dmcdbg_data.inited_flag) { -+ pr_err("dmcdbg_data no int\n"); -+ ret = -EPERM; -+ goto err; -+ } ++static int rk_aead_init_tfm(struct crypto_aead *tfm) ++{ ++ struct aead_alg *alg = crypto_aead_alg(tfm); ++ struct rk_crypto_algt *algt = ++ container_of(alg, struct rk_crypto_algt, alg.aead); ++ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(&tfm->base); ++ const char *alg_name = crypto_tfm_alg_name(&tfm->base); ++ struct rk_crypto_dev *rk_dev = algt->rk_dev; ++ struct rk_alg_ctx *alg_ctx = &ctx->algs_ctx; + -+ loop = 0; -+ for (i = 0; i < count; i++) { -+ if (*(cookie_pot + i) == '=') -+ loop++; -+ } ++ CRYPTO_TRACE(); + -+ for (i = 0; i < loop; i++) { -+ p_char = strsep(&cookie_pot, "_"); -+ ret = kstrtol(p_char, 10, &long_val); -+ if (ret) -+ goto err; -+ group = long_val; ++ if (!rk_dev->request_crypto) ++ return -EFAULT; + -+ p_char = strsep(&cookie_pot, "="); -+ ret = kstrtol(p_char, 10, &long_val); -+ if (ret) -+ goto err; -+ offset = long_val; ++ rk_dev->request_crypto(rk_dev, alg_name); + -+ if (i == (loop - 1)) -+ p_char = strsep(&cookie_pot, "\0"); -+ else -+ p_char = strsep(&cookie_pot, ","); ++ alg_ctx->align_size = 64; ++ alg_ctx->chunk_size = crypto_aead_chunksize(tfm); + -+ ret = kstrtol(p_char, 10, &long_val); -+ if (ret) -+ goto err; -+ value = long_val; ++ alg_ctx->ops.start = rk_aead_start; ++ alg_ctx->ops.update = rk_ablk_rx; ++ alg_ctx->ops.complete = rk_crypto_complete; ++ alg_ctx->ops.irq_handle = rk_crypto_irq_handle; + -+ if (group >= dmcdbg_data.skew_group_num) { -+ ret = -EINVAL; -+ goto err; -+ } -+ group = array_index_nospec(group, dmcdbg_data.skew_group_num); ++ alg_ctx->ops.hw_init = rk_ablk_hw_init; ++ alg_ctx->ops.hw_dma_start = crypto_dma_start; ++ alg_ctx->ops.hw_write_iv = set_iv_reg; ++ alg_ctx->ops.hw_get_result = get_tag_reg; + -+ p_uint = (unsigned int *)dmcdbg_data.skew_group[group].p_skew_info; -+ offset_max = dmcdbg_data.skew_group[group].skew_num; ++ ctx->rk_dev = rk_dev; ++ alg_ctx->is_aead = 1; + -+ if (offset >= offset_max) { -+ ret = -EINVAL; -+ goto err; ++ if (algt->alg.crypto.base.cra_flags & CRYPTO_ALG_NEED_FALLBACK) { ++ CRYPTO_MSG("alloc fallback tfm, name = %s", alg_name); ++ ctx->fallback_aead = ++ crypto_alloc_aead(alg_name, 0, ++ CRYPTO_ALG_ASYNC | ++ CRYPTO_ALG_NEED_FALLBACK); ++ if (IS_ERR(ctx->fallback_aead)) { ++ dev_err(rk_dev->dev, ++ "Load fallback driver %s err: %ld.\n", ++ alg_name, PTR_ERR(ctx->fallback_aead)); ++ ctx->fallback_aead = NULL; ++ crypto_aead_set_reqsize(tfm, sizeof(struct aead_request)); ++ } else { ++ crypto_aead_set_reqsize(tfm, sizeof(struct aead_request) + ++ crypto_aead_reqsize(ctx->fallback_aead)); + } -+ offset = array_index_nospec(offset, offset_max); -+ -+ *(p_uint + offset) = value; -+ } -+ -+ /* update power save setting */ -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, DDRDBG_FUNC_UPDATE_DESKEW, -+ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); -+ if (res.a0) { -+ pr_err("rockchip_sip_config_dram_debug error:%lx\n", res.a0); -+ ret = -ENOMEM; -+ goto err; + } + -+ ret = count; -+err: -+ vfree(buf); -+ return ret; ++ return 0; +} + -+static const struct file_operations skew_proc_fops = { -+ .open = skew_proc_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+ .write = skew_proc_write, -+}; -+ -+static int proc_skew_init(void) ++static void rk_aead_exit_tfm(struct crypto_aead *tfm) +{ -+ /* create dmcinfo file */ -+ proc_create(PROC_DMCDBG_DESKEW, 0644, proc_dmcdbg_dir, -+ &skew_proc_fops); ++ struct rk_cipher_ctx *ctx = crypto_tfm_ctx(&tfm->base); + -+ return 0; ++ CRYPTO_TRACE(); ++ ++ if (ctx->fallback_aead) { ++ CRYPTO_MSG("free fallback tfm"); ++ crypto_free_aead(ctx->fallback_aead); ++ } ++ ++ ctx->rk_dev->release_crypto(ctx->rk_dev, crypto_tfm_alg_name(&tfm->base)); +} + -+static int regsinfo_proc_show(struct seq_file *m, void *v) ++static int rk_aead_crypt(struct aead_request *req, bool encrypt) +{ -+ struct arm_smccc_res res; -+ struct registers_info *p_regsinfo; -+ u32 i; ++ struct crypto_aead *tfm = crypto_aead_reqtfm(req); ++ struct rk_cipher_ctx *ctx = crypto_aead_ctx(tfm); ++ struct rk_crypto_algt *algt = rk_aead_get_algt(tfm); ++ struct scatterlist *sg_src, *sg_dst; ++ struct scatterlist src[2], dst[2]; ++ u64 data_len; ++ bool aligned; ++ int ret = -EINVAL; + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, -+ DDRDBG_FUNC_GET_REGISTERS_INFO, -+ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); -+ if (res.a0) { -+ seq_printf(m, "rockchip_sip_config_dram_debug error:%lx\n", -+ res.a0); -+ return -ENOMEM; -+ } ++ CRYPTO_TRACE("%s cryptlen = %u, assoclen = %u", ++ encrypt ? "encrypt" : "decrypt", ++ req->cryptlen, req->assoclen); + -+ if (!dmcdbg_data.inited_flag) { -+ seq_puts(m, "dmcdbg_data no int\n"); -+ return -EPERM; ++ data_len = encrypt ? req->cryptlen : (req->cryptlen - crypto_aead_authsize(tfm)); ++ ++ if (req->assoclen == 0 || ++ req->cryptlen == 0 || ++ data_len == 0 || ++ is_force_fallback(algt, ctx->keylen)) ++ return rk_aead_fallback(req, ctx, encrypt); ++ ++ /* point sg_src and sg_dst skip assoc data */ ++ sg_src = scatterwalk_ffwd(src, req->src, req->assoclen); ++ sg_dst = (req->src == req->dst) ? sg_src : scatterwalk_ffwd(dst, req->dst, req->assoclen); ++ ++ aligned = rk_crypto_check_align(sg_src, sg_nents_for_len(sg_src, data_len), ++ sg_dst, sg_nents_for_len(sg_dst, data_len), ++ 64); ++ ++ if (sg_nents_for_len(sg_src, data_len) > RK_DEFAULT_LLI_CNT || ++ sg_nents_for_len(sg_dst, data_len) > RK_DEFAULT_LLI_CNT) ++ return rk_aead_fallback(req, ctx, encrypt); ++ ++ if (!aligned) { ++ if (req->assoclen > ctx->rk_dev->aad_max || ++ data_len > ctx->rk_dev->vir_max) ++ return rk_aead_fallback(req, ctx, encrypt); + } -+ p_regsinfo = (struct registers_info *)dmcdbg_data.share_memory; + -+ seq_printf(m, -+ "registers base address information:\n" -+ "\n" -+ ); ++ ctx->mode = cipher_algo2bc[algt->algo] | ++ cipher_mode2bc[algt->mode]; ++ if (!encrypt) ++ ctx->mode |= CRYPTO_BC_DECRYPT; + -+ for (i = 0; i < p_regsinfo->regs_num; i++) { -+ seq_printf(m, -+ "%s=0x%x\n" -+ , -+ p_regsinfo->regs[i].regs_name, -+ p_regsinfo->regs[i].regs_addr -+ ); ++ if (algt->algo == CIPHER_ALGO_AES) { ++ if (ctx->keylen == AES_KEYSIZE_128) ++ ctx->mode |= CRYPTO_BC_128_bit_key; ++ else if (ctx->keylen == AES_KEYSIZE_192) ++ ctx->mode |= CRYPTO_BC_192_bit_key; ++ else if (ctx->keylen == AES_KEYSIZE_256) ++ ctx->mode |= CRYPTO_BC_256_bit_key; + } + -+ return 0; -+} ++ ctx->iv_len = crypto_aead_ivsize(tfm); + -+static int regsinfo_proc_open(struct inode *inode, struct file *file) -+{ -+ return single_open(file, regsinfo_proc_show, NULL); -+} ++ memset(ctx->iv, 0x00, sizeof(ctx->iv)); ++ memcpy(ctx->iv, req->iv, ctx->iv_len); + -+static const struct file_operations regsinfo_proc_fops = { -+ .open = regsinfo_proc_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; ++ ctx->is_enc = encrypt; + -+static int proc_regsinfo_init(void) -+{ -+ /* create dmcinfo file */ -+ proc_create(PROC_DMCDBG_REGS_INFO, 0644, proc_dmcdbg_dir, -+ ®sinfo_proc_fops); ++ CRYPTO_MSG("ctx->mode = %x\n", ctx->mode); ++ ret = rk_aead_handle_req(ctx->rk_dev, req); + -+ return 0; ++ return ret; +} + -+static void rv1126_get_skew_parameter(void) ++static int rk_aead_encrypt(struct aead_request *req) +{ -+ struct skew_info_rv1126 *p_skew; -+ u32 i; ++ return rk_aead_crypt(req, true); ++} + -+ /* get skew parameters */ -+ p_skew = (struct skew_info_rv1126 *)dmcdbg_data.share_memory; -+ dmcdbg_data.skew_group_num = 5; ++static int rk_aead_decrypt(struct aead_request *req) ++{ ++ return rk_aead_crypt(req, false); ++} + -+ /* ca_skew parameters */ -+ dmcdbg_data.skew_group[0].p_skew_info = (unsigned int *)p_skew->ca_skew; -+ dmcdbg_data.skew_group[0].skew_num = ARRAY_SIZE(rv1126_dts_ca_timing); -+ for (i = 0; i < dmcdbg_data.skew_group[0].skew_num; i++) -+ dmcdbg_data.skew_group[0].p_skew_timing[i] = -+ (char *)rv1126_dts_ca_timing[i]; -+ dmcdbg_data.skew_group[0].note = -+ "(ca_skew: ddr4(pad_name)_ddr3_lpddr3_lpddr4_de-skew)"; ++struct rk_crypto_algt rk_v3_ecb_sm4_alg = ++ RK_CIPHER_ALGO_INIT(SM4, ECB, ecb(sm4), ecb-sm4-rk); + -+ /* cs0_a_skew parameters */ -+ dmcdbg_data.skew_group[1].p_skew_info = (unsigned int *)p_skew->cs0_a_skew; -+ dmcdbg_data.skew_group[1].skew_num = ARRAY_SIZE(rv1126_dts_cs0_a_timing); -+ for (i = 0; i < dmcdbg_data.skew_group[1].skew_num; i++) -+ dmcdbg_data.skew_group[1].p_skew_timing[i] = -+ (char *)rv1126_dts_cs0_a_timing[i]; -+ dmcdbg_data.skew_group[1].note = "(cs0_a_skew)"; ++struct rk_crypto_algt rk_v3_cbc_sm4_alg = ++ RK_CIPHER_ALGO_INIT(SM4, CBC, cbc(sm4), cbc-sm4-rk); + -+ /* cs0_b_skew parameters */ -+ dmcdbg_data.skew_group[2].p_skew_info = (unsigned int *)p_skew->cs0_b_skew; -+ dmcdbg_data.skew_group[2].skew_num = ARRAY_SIZE(rv1126_dts_cs0_b_timing); -+ for (i = 0; i < dmcdbg_data.skew_group[2].skew_num; i++) -+ dmcdbg_data.skew_group[2].p_skew_timing[i] = -+ (char *)rv1126_dts_cs0_b_timing[i]; -+ dmcdbg_data.skew_group[2].note = "(cs0_b_skew)"; ++struct rk_crypto_algt rk_v3_xts_sm4_alg = ++ RK_CIPHER_ALGO_XTS_INIT(SM4, xts(sm4), xts-sm4-rk); + -+ /* cs1_a_skew parameters */ -+ dmcdbg_data.skew_group[3].p_skew_info = (unsigned int *)p_skew->cs1_a_skew; -+ dmcdbg_data.skew_group[3].skew_num = ARRAY_SIZE(rv1126_dts_cs1_a_timing); -+ for (i = 0; i < dmcdbg_data.skew_group[3].skew_num; i++) -+ dmcdbg_data.skew_group[3].p_skew_timing[i] = -+ (char *)rv1126_dts_cs1_a_timing[i]; -+ dmcdbg_data.skew_group[3].note = "(cs1_a_skew)"; ++struct rk_crypto_algt rk_v3_cfb_sm4_alg = ++ RK_CIPHER_ALGO_INIT(SM4, CFB, cfb(sm4), cfb-sm4-rk); + -+ /* cs1_b_skew parameters */ -+ dmcdbg_data.skew_group[4].p_skew_info = (unsigned int *)p_skew->cs1_b_skew; -+ dmcdbg_data.skew_group[4].skew_num = ARRAY_SIZE(rv1126_dts_cs1_b_timing); -+ for (i = 0; i < dmcdbg_data.skew_group[3].skew_num; i++) -+ dmcdbg_data.skew_group[4].p_skew_timing[i] = -+ (char *)rv1126_dts_cs1_b_timing[i]; -+ dmcdbg_data.skew_group[4].note = "(cs1_b_skew)"; -+} ++struct rk_crypto_algt rk_v3_ofb_sm4_alg = ++ RK_CIPHER_ALGO_INIT(SM4, OFB, ofb(sm4), ofb-sm4-rk); + -+static __maybe_unused int rv1126_dmcdbg_init(struct platform_device *pdev, -+ struct rockchip_dmcdbg *dmcdbg) -+{ -+ struct arm_smccc_res res; ++struct rk_crypto_algt rk_v3_ctr_sm4_alg = ++ RK_CIPHER_ALGO_INIT(SM4, CTR, ctr(sm4), ctr-sm4-rk); + -+ /* check ddr_debug_func version */ -+ res = sip_smc_dram(0, DDRDBG_FUNC_GET_VERSION, -+ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); -+ dev_notice(&pdev->dev, "current ATF ddr_debug_func version 0x%lx.\n", -+ res.a1); -+ /* -+ * [15:8] major version, [7:0] minor version -+ * major version must match both kernel dmcdbg and ATF ddr_debug_func. -+ */ -+ if (res.a0 || res.a1 < 0x101 || ((res.a1 & 0xff00) != 0x100)) { -+ dev_err(&pdev->dev, -+ "version invalid,need update,the major version unmatch!\n"); -+ return -ENXIO; -+ } ++struct rk_crypto_algt rk_v3_gcm_sm4_alg = ++ RK_AEAD_ALGO_INIT(SM4, GCM, gcm(sm4), gcm-sm4-rk); + -+ /* request share memory for pass parameter */ -+ res = sip_smc_request_share_mem(DMCDBG_PAGE_NUMS, -+ SHARE_PAGE_TYPE_DDRDBG); -+ if (res.a0 != 0) { -+ dev_err(&pdev->dev, "request share mem error\n"); -+ return -ENOMEM; -+ } ++struct rk_crypto_algt rk_v3_ecb_aes_alg = ++ RK_CIPHER_ALGO_INIT(AES, ECB, ecb(aes), ecb-aes-rk); + -+ dmcdbg_data.share_memory = (void __iomem *)res.a1; -+ dmcdbg_data.inited_flag = 1; ++struct rk_crypto_algt rk_v3_cbc_aes_alg = ++ RK_CIPHER_ALGO_INIT(AES, CBC, cbc(aes), cbc-aes-rk); + -+ rv1126_get_skew_parameter(); ++struct rk_crypto_algt rk_v3_xts_aes_alg = ++ RK_CIPHER_ALGO_XTS_INIT(AES, xts(aes), xts-aes-rk); + -+ /* create parent dir in /proc */ -+ proc_dmcdbg_dir = proc_mkdir(PROC_DMCDBG_DIR_NAME, NULL); -+ if (!proc_dmcdbg_dir) { -+ dev_err(&pdev->dev, "create proc dir error!"); -+ return -ENOENT; -+ } ++struct rk_crypto_algt rk_v3_cfb_aes_alg = ++ RK_CIPHER_ALGO_INIT(AES, CFB, cfb(aes), cfb-aes-rk); + -+ proc_dmcinfo_init(); -+ proc_powersave_init(); -+ proc_drvodt_init(); -+ proc_skew_init(); -+ proc_regsinfo_init(); -+ return 0; -+} ++struct rk_crypto_algt rk_v3_ofb_aes_alg = ++ RK_CIPHER_ALGO_INIT(AES, OFB, ofb(aes), ofb-aes-rk); + -+static const struct of_device_id rockchip_dmcdbg_of_match[] = { -+ { .compatible = "rockchip,rv1126-dmcdbg", .data = rv1126_dmcdbg_init}, -+ { }, -+}; -+MODULE_DEVICE_TABLE(of, rockchip_dmcdbg_of_match); ++struct rk_crypto_algt rk_v3_ctr_aes_alg = ++ RK_CIPHER_ALGO_INIT(AES, CTR, ctr(aes), ctr-aes-rk); + -+static int rockchip_dmcdbg_probe(struct platform_device *pdev) -+{ -+ struct device *dev = &pdev->dev; -+ struct rockchip_dmcdbg *data; -+ const struct of_device_id *match; -+ int (*init)(struct platform_device *pdev, -+ struct rockchip_dmcdbg *data); -+ int ret = 0; ++struct rk_crypto_algt rk_v3_gcm_aes_alg = ++ RK_AEAD_ALGO_INIT(AES, GCM, gcm(aes), gcm-aes-rk); + -+ data = devm_kzalloc(dev, sizeof(struct rockchip_dmcdbg), GFP_KERNEL); -+ if (!data) -+ return -ENOMEM; ++struct rk_crypto_algt rk_v3_ecb_des_alg = ++ RK_CIPHER_ALGO_INIT(DES, ECB, ecb(des), ecb-des-rk); + -+ data->dev = dev; ++struct rk_crypto_algt rk_v3_cbc_des_alg = ++ RK_CIPHER_ALGO_INIT(DES, CBC, cbc(des), cbc-des-rk); + -+ /* match soc chip init */ -+ match = of_match_node(rockchip_dmcdbg_of_match, pdev->dev.of_node); -+ if (match) { -+ init = match->data; -+ if (init) { -+ if (init(pdev, data)) -+ return -EINVAL; -+ } -+ } ++struct rk_crypto_algt rk_v3_cfb_des_alg = ++ RK_CIPHER_ALGO_INIT(DES, CFB, cfb(des), cfb-des-rk); + -+ return ret; -+} ++struct rk_crypto_algt rk_v3_ofb_des_alg = ++ RK_CIPHER_ALGO_INIT(DES, OFB, ofb(des), ofb-des-rk); + -+static struct platform_driver rockchip_dmcdbg_driver = { -+ .probe = rockchip_dmcdbg_probe, -+ .driver = { -+ .name = "rockchip,dmcdbg", -+ .of_match_table = rockchip_dmcdbg_of_match, -+ }, -+}; -+module_platform_driver(rockchip_dmcdbg_driver); ++struct rk_crypto_algt rk_v3_ecb_des3_ede_alg = ++ RK_CIPHER_ALGO_INIT(DES3_EDE, ECB, ecb(des3_ede), ecb-des3_ede-rk); + -+MODULE_LICENSE("GPL v2"); -+MODULE_AUTHOR("YouMin Chen "); -+MODULE_DESCRIPTION("rockchip dmc debug driver with devfreq framework"); -diff --git a/drivers/devfreq/rockchip_dmc_timing.h b/drivers/devfreq/rockchip_dmc_timing.h -new file mode 100644 -index 000000000..2f7b7774c ---- /dev/null -+++ b/drivers/devfreq/rockchip_dmc_timing.h -@@ -0,0 +1,1231 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (c) 2020, Rockchip Electronics Co., Ltd. -+ */ ++struct rk_crypto_algt rk_v3_cbc_des3_ede_alg = ++ RK_CIPHER_ALGO_INIT(DES3_EDE, CBC, cbc(des3_ede), cbc-des3_ede-rk); + -+#ifndef __ROCKCHIP_DMC_TIMING_H__ -+#define __ROCKCHIP_DMC_TIMING_H__ ++struct rk_crypto_algt rk_v3_cfb_des3_ede_alg = ++ RK_CIPHER_ALGO_INIT(DES3_EDE, CFB, cfb(des3_ede), cfb-des3_ede-rk); + -+/* hope this define can adapt all future platfor */ -+static const char * const px30_dts_timing[] = { -+ "ddr2_speed_bin", -+ "ddr3_speed_bin", -+ "ddr4_speed_bin", -+ "pd_idle", -+ "sr_idle", -+ "sr_mc_gate_idle", -+ "srpd_lite_idle", -+ "standby_idle", ++struct rk_crypto_algt rk_v3_ofb_des3_ede_alg = ++ RK_CIPHER_ALGO_INIT(DES3_EDE, OFB, ofb(des3_ede), ofb-des3_ede-rk); + -+ "auto_pd_dis_freq", -+ "auto_sr_dis_freq", -+ "ddr2_dll_dis_freq", -+ "ddr3_dll_dis_freq", -+ "ddr4_dll_dis_freq", -+ "phy_dll_dis_freq", +diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig +index 3c4862a75..eeb1f12bc 100644 +--- a/drivers/devfreq/Kconfig ++++ b/drivers/devfreq/Kconfig +@@ -129,15 +129,20 @@ config ARM_MEDIATEK_CCI_DEVFREQ + buck voltages and update a proper CCI frequency. Use the notification + to get the regulator status. + +-config ARM_RK3399_DMC_DEVFREQ +- tristate "ARM RK3399 DMC DEVFREQ Driver" ++config ARM_ROCKCHIP_BUS_DEVFREQ ++ tristate "ARM ROCKCHIP BUS DEVFREQ Driver" ++ depends on ARCH_ROCKCHIP ++ help ++ This adds the DEVFREQ driver for the ROCKCHIP BUS. + -+ "ddr2_odt_dis_freq", -+ "phy_ddr2_odt_dis_freq", -+ "ddr2_drv", -+ "ddr2_odt", -+ "phy_ddr2_ca_drv", -+ "phy_ddr2_ck_drv", -+ "phy_ddr2_dq_drv", -+ "phy_ddr2_odt", ++config ARM_ROCKCHIP_DMC_DEVFREQ ++ tristate "ARM ROCKCHIP DMC DEVFREQ Driver" + depends on (ARCH_ROCKCHIP && HAVE_ARM_SMCCC) || \ + (COMPILE_TEST && HAVE_ARM_SMCCC) + select DEVFREQ_EVENT_ROCKCHIP_DFI +- select DEVFREQ_GOV_SIMPLE_ONDEMAND + select PM_DEVFREQ_EVENT + help +- This adds the DEVFREQ driver for the RK3399 DMC(Dynamic Memory Controller). ++ This adds the DEVFREQ driver for the ROCKCHIP DMC(Dynamic Memory Controller). + It sets the frequency for the memory controller and reads the usage counts + from hardware. + +diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile +index bf40d0492..92018ab6c 100644 +--- a/drivers/devfreq/Makefile ++++ b/drivers/devfreq/Makefile +@@ -12,7 +12,8 @@ obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ) += exynos-bus.o + obj-$(CONFIG_ARM_IMX_BUS_DEVFREQ) += imx-bus.o + obj-$(CONFIG_ARM_IMX8M_DDRC_DEVFREQ) += imx8m-ddrc.o + obj-$(CONFIG_ARM_MEDIATEK_CCI_DEVFREQ) += mtk-cci-devfreq.o +-obj-$(CONFIG_ARM_RK3399_DMC_DEVFREQ) += rk3399_dmc.o ++obj-$(CONFIG_ARM_ROCKCHIP_BUS_DEVFREQ) += rockchip_bus.o ++obj-$(CONFIG_ARM_ROCKCHIP_DMC_DEVFREQ) += rockchip_dmc.o rockchip_dmc_common.o + obj-$(CONFIG_ARM_SUN8I_A33_MBUS_DEVFREQ) += sun8i-a33-mbus.o + obj-$(CONFIG_ARM_TEGRA_DEVFREQ) += tegra30-devfreq.o + +diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c +index 49c542ecc..4385e1763 100644 +--- a/drivers/devfreq/devfreq.c ++++ b/drivers/devfreq/devfreq.c +@@ -1807,6 +1807,40 @@ static ssize_t trans_stat_store(struct device *dev, + } + static DEVICE_ATTR_RW(trans_stat); + ++static ssize_t load_show(struct device *dev, struct device_attribute *attr, ++ char *buf) ++{ ++ int err; ++ struct devfreq *devfreq = to_devfreq(dev); ++ struct devfreq_dev_status stat = devfreq->last_status; ++ unsigned long freq; ++ ssize_t len; + -+ "ddr3_odt_dis_freq", -+ "phy_ddr3_odt_dis_freq", -+ "ddr3_drv", -+ "ddr3_odt", -+ "phy_ddr3_ca_drv", -+ "phy_ddr3_ck_drv", -+ "phy_ddr3_dq_drv", -+ "phy_ddr3_odt", ++ err = devfreq_update_stats(devfreq); ++ if (err) ++ return err; + -+ "phy_lpddr2_odt_dis_freq", -+ "lpddr2_drv", -+ "phy_lpddr2_ca_drv", -+ "phy_lpddr2_ck_drv", -+ "phy_lpddr2_dq_drv", -+ "phy_lpddr2_odt", ++ if (stat.total_time < stat.busy_time) { ++ err = devfreq_update_stats(devfreq); ++ if (err) ++ return err; ++ }; + -+ "lpddr3_odt_dis_freq", -+ "phy_lpddr3_odt_dis_freq", -+ "lpddr3_drv", -+ "lpddr3_odt", -+ "phy_lpddr3_ca_drv", -+ "phy_lpddr3_ck_drv", -+ "phy_lpddr3_dq_drv", -+ "phy_lpddr3_odt", ++ if (!stat.total_time) ++ return 0; + -+ "lpddr4_odt_dis_freq", -+ "phy_lpddr4_odt_dis_freq", -+ "lpddr4_drv", -+ "lpddr4_dq_odt", -+ "lpddr4_ca_odt", -+ "phy_lpddr4_ca_drv", -+ "phy_lpddr4_ck_cs_drv", -+ "phy_lpddr4_dq_drv", -+ "phy_lpddr4_odt", ++ len = sprintf(buf, "%lu", stat.busy_time * 100 / stat.total_time); + -+ "ddr4_odt_dis_freq", -+ "phy_ddr4_odt_dis_freq", -+ "ddr4_drv", -+ "ddr4_odt", -+ "phy_ddr4_ca_drv", -+ "phy_ddr4_ck_drv", -+ "phy_ddr4_dq_drv", -+ "phy_ddr4_odt", -+}; ++ if (devfreq->profile->get_cur_freq && ++ !devfreq->profile->get_cur_freq(devfreq->dev.parent, &freq)) ++ len += sprintf(buf + len, "@%luHz\n", freq); ++ else ++ len += sprintf(buf + len, "@%luHz\n", devfreq->previous_freq); + -+struct px30_ddr_dts_config_timing { -+ unsigned int ddr2_speed_bin; -+ unsigned int ddr3_speed_bin; -+ unsigned int ddr4_speed_bin; -+ unsigned int pd_idle; -+ unsigned int sr_idle; -+ unsigned int sr_mc_gate_idle; -+ unsigned int srpd_lite_idle; -+ unsigned int standby_idle; ++ return len; ++} ++static DEVICE_ATTR_RO(load); + -+ unsigned int auto_pd_dis_freq; -+ unsigned int auto_sr_dis_freq; -+ /* for ddr2 only */ -+ unsigned int ddr2_dll_dis_freq; -+ /* for ddr3 only */ -+ unsigned int ddr3_dll_dis_freq; -+ /* for ddr4 only */ -+ unsigned int ddr4_dll_dis_freq; -+ unsigned int phy_dll_dis_freq; + static struct attribute *devfreq_attrs[] = { + &dev_attr_name.attr, + &dev_attr_governor.attr, +@@ -1817,6 +1851,7 @@ static struct attribute *devfreq_attrs[] = { + &dev_attr_min_freq.attr, + &dev_attr_max_freq.attr, + &dev_attr_trans_stat.attr, ++ &dev_attr_load.attr, + NULL, + }; + ATTRIBUTE_GROUPS(devfreq); +diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig +index 878825372..4526c69c6 100644 +--- a/drivers/devfreq/event/Kconfig ++++ b/drivers/devfreq/event/Kconfig +@@ -39,4 +39,11 @@ config DEVFREQ_EVENT_ROCKCHIP_DFI + This add the devfreq-event driver for Rockchip SoC. It provides DFI + (DDR Monitor Module) driver to count ddr load. + ++config DEVFREQ_EVENT_ROCKCHIP_NOCP ++ tristate "ROCKCHIP NoC (Network On Chip) Probe DEVFREQ event Driver" ++ depends on ARCH_ROCKCHIP ++ help ++ This add the devfreq-event driver for Rockchip SoC. It provides NoC ++ (Network on Chip) Probe counters to monitor traffic statistics. + -+ unsigned int ddr2_odt_dis_freq; -+ unsigned int phy_ddr2_odt_dis_freq; -+ unsigned int ddr2_drv; -+ unsigned int ddr2_odt; -+ unsigned int phy_ddr2_ca_drv; -+ unsigned int phy_ddr2_ck_drv; -+ unsigned int phy_ddr2_dq_drv; -+ unsigned int phy_ddr2_odt; + endif # PM_DEVFREQ_EVENT +diff --git a/drivers/devfreq/event/Makefile b/drivers/devfreq/event/Makefile +index 3c847e5d5..03d67f06c 100644 +--- a/drivers/devfreq/event/Makefile ++++ b/drivers/devfreq/event/Makefile +@@ -4,3 +4,4 @@ + obj-$(CONFIG_DEVFREQ_EVENT_EXYNOS_NOCP) += exynos-nocp.o + obj-$(CONFIG_DEVFREQ_EVENT_EXYNOS_PPMU) += exynos-ppmu.o + obj-$(CONFIG_DEVFREQ_EVENT_ROCKCHIP_DFI) += rockchip-dfi.o ++obj-$(CONFIG_DEVFREQ_EVENT_ROCKCHIP_NOCP) += rockchip-nocp.o +diff --git a/drivers/devfreq/event/rockchip-dfi.c b/drivers/devfreq/event/rockchip-dfi.c +index 74893c06a..6db7c4945 100644 +--- a/drivers/devfreq/event/rockchip-dfi.c ++++ b/drivers/devfreq/event/rockchip-dfi.c +@@ -20,26 +20,81 @@ + + #include + +-#define RK3399_DMC_NUM_CH 2 +- ++#define PX30_PMUGRF_OS_REG2 0x208 ++#define PX30_PMUGRF_OS_REG3 0x20c + -+ unsigned int ddr3_odt_dis_freq; -+ unsigned int phy_ddr3_odt_dis_freq; -+ unsigned int ddr3_drv; -+ unsigned int ddr3_odt; -+ unsigned int phy_ddr3_ca_drv; -+ unsigned int phy_ddr3_ck_drv; -+ unsigned int phy_ddr3_dq_drv; -+ unsigned int phy_ddr3_odt; ++#define RK3588_PMUGRF_OS_REG(n) (0x200 + (n) * 4) + -+ unsigned int phy_lpddr2_odt_dis_freq; -+ unsigned int lpddr2_drv; -+ unsigned int phy_lpddr2_ca_drv; -+ unsigned int phy_lpddr2_ck_drv; -+ unsigned int phy_lpddr2_dq_drv; -+ unsigned int phy_lpddr2_odt; ++#define RK3128_GRF_SOC_CON0 0x140 ++#define RK3128_GRF_OS_REG1 0x1cc ++#define RK3128_GRF_DFI_WRNUM 0x220 ++#define RK3128_GRF_DFI_RDNUM 0x224 ++#define RK3128_GRF_DFI_TIMERVAL 0x22c ++#define RK3128_DDR_MONITOR_EN ((1 << (16 + 6)) + (1 << 6)) ++#define RK3128_DDR_MONITOR_DISB ((1 << (16 + 6)) + (0 << 6)) + -+ unsigned int lpddr3_odt_dis_freq; -+ unsigned int phy_lpddr3_odt_dis_freq; -+ unsigned int lpddr3_drv; -+ unsigned int lpddr3_odt; -+ unsigned int phy_lpddr3_ca_drv; -+ unsigned int phy_lpddr3_ck_drv; -+ unsigned int phy_lpddr3_dq_drv; -+ unsigned int phy_lpddr3_odt; ++#define RK3288_PMU_SYS_REG2 0x9c ++#define RK3288_GRF_SOC_CON4 0x254 ++#define RK3288_GRF_SOC_STATUS(n) (0x280 + (n) * 4) ++#define RK3288_DFI_EN (0x30003 << 14) ++#define RK3288_DFI_DIS (0x30000 << 14) ++#define RK3288_LPDDR_SEL (0x10001 << 13) ++#define RK3288_DDR3_SEL (0x10000 << 13) + -+ unsigned int lpddr4_odt_dis_freq; -+ unsigned int phy_lpddr4_odt_dis_freq; -+ unsigned int lpddr4_drv; -+ unsigned int lpddr4_dq_odt; -+ unsigned int lpddr4_ca_odt; -+ unsigned int phy_lpddr4_ca_drv; -+ unsigned int phy_lpddr4_ck_cs_drv; -+ unsigned int phy_lpddr4_dq_drv; -+ unsigned int phy_lpddr4_odt; ++#define RK3328_GRF_OS_REG2 0x5d0 + -+ unsigned int ddr4_odt_dis_freq; -+ unsigned int phy_ddr4_odt_dis_freq; -+ unsigned int ddr4_drv; -+ unsigned int ddr4_odt; -+ unsigned int phy_ddr4_ca_drv; -+ unsigned int phy_ddr4_ck_drv; -+ unsigned int phy_ddr4_dq_drv; -+ unsigned int phy_ddr4_odt; ++#define RK3368_GRF_DDRC0_CON0 0x600 ++#define RK3368_GRF_SOC_STATUS5 0x494 ++#define RK3368_GRF_SOC_STATUS6 0x498 ++#define RK3368_GRF_SOC_STATUS8 0x4a0 ++#define RK3368_GRF_SOC_STATUS9 0x4a4 ++#define RK3368_GRF_SOC_STATUS10 0x4a8 ++#define RK3368_DFI_EN (0x30003 << 5) ++#define RK3368_DFI_DIS (0x30000 << 5) + -+ unsigned int ca_skew[15]; -+ unsigned int cs0_skew[44]; -+ unsigned int cs1_skew[44]; ++#define MAX_DMC_NUM_CH 4 ++#define READ_DRAMTYPE_INFO(n) (((n) >> 13) & 0x7) ++#define READ_CH_INFO(n) (((n) >> 28) & 0x3) ++#define READ_DRAMTYPE_INFO_V3(n, m) ((((n) >> 13) & 0x7) | ((((m) >> 12) & 0x3) << 3)) ++#define READ_SYSREG_VERSION(m) (((m) >> 28) & 0xf) ++#define READ_LP5_BANK_MODE(m) (((m) >> 1) & 0x3) ++#define READ_LP5_CKR(m) (((m) >> 0) & 0x1) + /* DDRMON_CTRL */ +-#define DDRMON_CTRL 0x04 +-#define CLR_DDRMON_CTRL (0x1f0000 << 0) +-#define LPDDR4_EN (0x10001 << 4) +-#define HARDWARE_EN (0x10001 << 3) +-#define LPDDR3_EN (0x10001 << 2) +-#define SOFTWARE_EN (0x10001 << 1) +-#define SOFTWARE_DIS (0x10000 << 1) +-#define TIME_CNT_EN (0x10001 << 0) ++#define DDRMON_CTRL 0x04 ++#define CLR_DDRMON_CTRL (0xffff0000 << 0) ++#define LPDDR5_BANK_MODE(m) ((0x30000 | ((m) & 0x3)) << 7) ++#define LPDDR5_EN (0x10001 << 6) ++#define DDR4_EN (0x10001 << 5) ++#define LPDDR4_EN (0x10001 << 4) ++#define HARDWARE_EN (0x10001 << 3) ++#define LPDDR2_3_EN (0x10001 << 2) ++#define SOFTWARE_EN (0x10001 << 1) ++#define SOFTWARE_DIS (0x10000 << 1) ++#define TIME_CNT_EN (0x10001 << 0) + + #define DDRMON_CH0_COUNT_NUM 0x28 + #define DDRMON_CH0_DFI_ACCESS_NUM 0x2c + #define DDRMON_CH1_COUNT_NUM 0x3c + #define DDRMON_CH1_DFI_ACCESS_NUM 0x40 + ++/* pmu grf */ ++#define PMUGRF_OS_REG2 0x308 + -+ unsigned int available; ++enum { ++ DDR4 = 0, ++ DDR3 = 3, ++ LPDDR2 = 5, ++ LPDDR3 = 6, ++ LPDDR4 = 7, ++ LPDDR4X = 8, ++ LPDDR5 = 9, ++ DDR5 = 10, ++ UNUSED = 0xFF +}; + -+static const char * const rk1808_dts_ca_timing[] = { -+ "a0_ddr3a9_de-skew", -+ "a1_ddr3a14_de-skew", -+ "a2_ddr3a13_de-skew", -+ "a3_ddr3a11_de-skew", -+ "a4_ddr3a2_de-skew", -+ "a5_ddr3a4_de-skew", -+ "a6_ddr3a3_de-skew", -+ "a7_ddr3a6_de-skew", -+ "a8_ddr3a5_de-skew", -+ "a9_ddr3a1_de-skew", -+ "a10_ddr3a0_de-skew", -+ "a11_ddr3a7_de-skew", -+ "a12_ddr3casb_de-skew", -+ "a13_ddr3a8_de-skew", -+ "a14_ddr3odt0_de-skew", -+ "a15_ddr3ba1_de-skew", -+ "a16_ddr3rasb_de-skew", -+ "a17_ddr3null_de-skew", -+ "ba0_ddr3ba2_de-skew", -+ "ba1_ddr3a12_de-skew", -+ "bg0_ddr3ba0_de-skew", -+ "bg1_ddr3web_de-skew", -+ "cke_ddr3cke_de-skew", -+ "ck_ddr3ck_de-skew", -+ "ckb_ddr3ckb_de-skew", -+ "csb0_ddr3a10_de-skew", -+ "odt0_ddr3a15_de-skew", -+ "resetn_ddr3resetn_de-skew", -+ "actn_ddr3csb0_de-skew", -+ "csb1_ddr3csb1_de-skew", -+ "odt1_ddr3odt1_de-skew", + struct dmc_usage { +- u32 access; +- u32 total; ++ u64 access; ++ u64 total; + }; + + /* +@@ -50,44 +105,307 @@ struct dmc_usage { + struct rockchip_dfi { + struct devfreq_event_dev *edev; + struct devfreq_event_desc *desc; +- struct dmc_usage ch_usage[RK3399_DMC_NUM_CH]; ++ struct dmc_usage ch_usage[MAX_DMC_NUM_CH]; + struct device *dev; + void __iomem *regs; + struct regmap *regmap_pmu; ++ struct regmap *regmap_grf; ++ struct regmap *regmap_pmugrf; + struct clk *clk; ++ u32 dram_type; ++ u32 mon_idx; ++ u32 count_rate; ++ u32 dram_dynamic_info_reg; ++ /* 0: BG mode, 1: 16 Bank mode, 2: 8 bank mode */ ++ u32 lp5_bank_mode; ++ /* 0: clk:dqs = 1:2, 1: 1:4 */ ++ u32 lp5_ckr; ++ /* ++ * available mask, 1: available, 0: not available ++ * each bit represent a channel ++ */ ++ u32 ch_msk; +}; + -+static const char * const rk1808_dts_cs0_a_timing[] = { -+ "cs0_dm0_rx_de-skew", -+ "cs0_dm0_tx_de-skew", -+ "cs0_dq0_rx_de-skew", -+ "cs0_dq0_tx_de-skew", -+ "cs0_dq1_rx_de-skew", -+ "cs0_dq1_tx_de-skew", -+ "cs0_dq2_rx_de-skew", -+ "cs0_dq2_tx_de-skew", -+ "cs0_dq3_rx_de-skew", -+ "cs0_dq3_tx_de-skew", -+ "cs0_dq4_rx_de-skew", -+ "cs0_dq4_tx_de-skew", -+ "cs0_dq5_rx_de-skew", -+ "cs0_dq5_tx_de-skew", -+ "cs0_dq6_rx_de-skew", -+ "cs0_dq6_tx_de-skew", -+ "cs0_dq7_rx_de-skew", -+ "cs0_dq7_tx_de-skew", -+ "cs0_dqs0p_rx_de-skew", -+ "cs0_dqs0p_tx_de-skew", -+ "cs0_dqs0n_tx_de-skew", -+ "cs0_dm1_rx_de-skew", -+ "cs0_dm1_tx_de-skew", -+ "cs0_dq8_rx_de-skew", -+ "cs0_dq8_tx_de-skew", -+ "cs0_dq9_rx_de-skew", -+ "cs0_dq9_tx_de-skew", -+ "cs0_dq10_rx_de-skew", -+ "cs0_dq10_tx_de-skew", -+ "cs0_dq11_rx_de-skew", -+ "cs0_dq11_tx_de-skew", -+ "cs0_dq12_rx_de-skew", -+ "cs0_dq12_tx_de-skew", -+ "cs0_dq13_rx_de-skew", -+ "cs0_dq13_tx_de-skew", -+ "cs0_dq14_rx_de-skew", -+ "cs0_dq14_tx_de-skew", -+ "cs0_dq15_rx_de-skew", -+ "cs0_dq15_tx_de-skew", -+ "cs0_dqs1p_rx_de-skew", -+ "cs0_dqs1p_tx_de-skew", -+ "cs0_dqs1n_tx_de-skew", -+ "cs0_dqs0n_rx_de-skew", -+ "cs0_dqs1n_rx_de-skew", -+}; ++static void rk3128_dfi_start_hardware_counter(struct devfreq_event_dev *edev) ++{ ++ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); + -+static const char * const rk1808_dts_cs0_b_timing[] = { -+ "cs0_dm2_rx_de-skew", -+ "cs0_dm2_tx_de-skew", -+ "cs0_dq16_rx_de-skew", -+ "cs0_dq16_tx_de-skew", -+ "cs0_dq17_rx_de-skew", -+ "cs0_dq17_tx_de-skew", -+ "cs0_dq18_rx_de-skew", -+ "cs0_dq18_tx_de-skew", -+ "cs0_dq19_rx_de-skew", -+ "cs0_dq19_tx_de-skew", -+ "cs0_dq20_rx_de-skew", -+ "cs0_dq20_tx_de-skew", -+ "cs0_dq21_rx_de-skew", -+ "cs0_dq21_tx_de-skew", -+ "cs0_dq22_rx_de-skew", -+ "cs0_dq22_tx_de-skew", -+ "cs0_dq23_rx_de-skew", -+ "cs0_dq23_tx_de-skew", -+ "cs0_dqs2p_rx_de-skew", -+ "cs0_dqs2p_tx_de-skew", -+ "cs0_dqs2n_tx_de-skew", -+ "cs0_dm3_rx_de-skew", -+ "cs0_dm3_tx_de-skew", -+ "cs0_dq24_rx_de-skew", -+ "cs0_dq24_tx_de-skew", -+ "cs0_dq25_rx_de-skew", -+ "cs0_dq25_tx_de-skew", -+ "cs0_dq26_rx_de-skew", -+ "cs0_dq26_tx_de-skew", -+ "cs0_dq27_rx_de-skew", -+ "cs0_dq27_tx_de-skew", -+ "cs0_dq28_rx_de-skew", -+ "cs0_dq28_tx_de-skew", -+ "cs0_dq29_rx_de-skew", -+ "cs0_dq29_tx_de-skew", -+ "cs0_dq30_rx_de-skew", -+ "cs0_dq30_tx_de-skew", -+ "cs0_dq31_rx_de-skew", -+ "cs0_dq31_tx_de-skew", -+ "cs0_dqs3p_rx_de-skew", -+ "cs0_dqs3p_tx_de-skew", -+ "cs0_dqs3n_tx_de-skew", -+ "cs0_dqs2n_rx_de-skew", -+ "cs0_dqs3n_rx_de-skew", -+}; ++ regmap_write(info->regmap_grf, ++ RK3128_GRF_SOC_CON0, ++ RK3128_DDR_MONITOR_EN); ++} + -+static const char * const rk1808_dts_cs1_a_timing[] = { -+ "cs1_dm0_rx_de-skew", -+ "cs1_dm0_tx_de-skew", -+ "cs1_dq0_rx_de-skew", -+ "cs1_dq0_tx_de-skew", -+ "cs1_dq1_rx_de-skew", -+ "cs1_dq1_tx_de-skew", -+ "cs1_dq2_rx_de-skew", -+ "cs1_dq2_tx_de-skew", -+ "cs1_dq3_rx_de-skew", -+ "cs1_dq3_tx_de-skew", -+ "cs1_dq4_rx_de-skew", -+ "cs1_dq4_tx_de-skew", -+ "cs1_dq5_rx_de-skew", -+ "cs1_dq5_tx_de-skew", -+ "cs1_dq6_rx_de-skew", -+ "cs1_dq6_tx_de-skew", -+ "cs1_dq7_rx_de-skew", -+ "cs1_dq7_tx_de-skew", -+ "cs1_dqs0p_rx_de-skew", -+ "cs1_dqs0p_tx_de-skew", -+ "cs1_dqs0n_tx_de-skew", -+ "cs1_dm1_rx_de-skew", -+ "cs1_dm1_tx_de-skew", -+ "cs1_dq8_rx_de-skew", -+ "cs1_dq8_tx_de-skew", -+ "cs1_dq9_rx_de-skew", -+ "cs1_dq9_tx_de-skew", -+ "cs1_dq10_rx_de-skew", -+ "cs1_dq10_tx_de-skew", -+ "cs1_dq11_rx_de-skew", -+ "cs1_dq11_tx_de-skew", -+ "cs1_dq12_rx_de-skew", -+ "cs1_dq12_tx_de-skew", -+ "cs1_dq13_rx_de-skew", -+ "cs1_dq13_tx_de-skew", -+ "cs1_dq14_rx_de-skew", -+ "cs1_dq14_tx_de-skew", -+ "cs1_dq15_rx_de-skew", -+ "cs1_dq15_tx_de-skew", -+ "cs1_dqs1p_rx_de-skew", -+ "cs1_dqs1p_tx_de-skew", -+ "cs1_dqs1n_tx_de-skew", -+ "cs1_dqs0n_rx_de-skew", -+ "cs1_dqs1n_rx_de-skew", -+}; ++static void rk3128_dfi_stop_hardware_counter(struct devfreq_event_dev *edev) ++{ ++ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); + -+static const char * const rk1808_dts_cs1_b_timing[] = { -+ "cs1_dm2_rx_de-skew", -+ "cs1_dm2_tx_de-skew", -+ "cs1_dq16_rx_de-skew", -+ "cs1_dq16_tx_de-skew", -+ "cs1_dq17_rx_de-skew", -+ "cs1_dq17_tx_de-skew", -+ "cs1_dq18_rx_de-skew", -+ "cs1_dq18_tx_de-skew", -+ "cs1_dq19_rx_de-skew", -+ "cs1_dq19_tx_de-skew", -+ "cs1_dq20_rx_de-skew", -+ "cs1_dq20_tx_de-skew", -+ "cs1_dq21_rx_de-skew", -+ "cs1_dq21_tx_de-skew", -+ "cs1_dq22_rx_de-skew", -+ "cs1_dq22_tx_de-skew", -+ "cs1_dq23_rx_de-skew", -+ "cs1_dq23_tx_de-skew", -+ "cs1_dqs2p_rx_de-skew", -+ "cs1_dqs2p_tx_de-skew", -+ "cs1_dqs2n_tx_de-skew", -+ "cs1_dm3_rx_de-skew", -+ "cs1_dm3_tx_de-skew", -+ "cs1_dq24_rx_de-skew", -+ "cs1_dq24_tx_de-skew", -+ "cs1_dq25_rx_de-skew", -+ "cs1_dq25_tx_de-skew", -+ "cs1_dq26_rx_de-skew", -+ "cs1_dq26_tx_de-skew", -+ "cs1_dq27_rx_de-skew", -+ "cs1_dq27_tx_de-skew", -+ "cs1_dq28_rx_de-skew", -+ "cs1_dq28_tx_de-skew", -+ "cs1_dq29_rx_de-skew", -+ "cs1_dq29_tx_de-skew", -+ "cs1_dq30_rx_de-skew", -+ "cs1_dq30_tx_de-skew", -+ "cs1_dq31_rx_de-skew", -+ "cs1_dq31_tx_de-skew", -+ "cs1_dqs3p_rx_de-skew", -+ "cs1_dqs3p_tx_de-skew", -+ "cs1_dqs3n_tx_de-skew", -+ "cs1_dqs2n_rx_de-skew", -+ "cs1_dqs3n_rx_de-skew", -+}; ++ regmap_write(info->regmap_grf, ++ RK3128_GRF_SOC_CON0, ++ RK3128_DDR_MONITOR_DISB); ++} + -+struct rk1808_ddr_dts_config_timing { -+ unsigned int ddr2_speed_bin; -+ unsigned int ddr3_speed_bin; -+ unsigned int ddr4_speed_bin; -+ unsigned int pd_idle; -+ unsigned int sr_idle; -+ unsigned int sr_mc_gate_idle; -+ unsigned int srpd_lite_idle; -+ unsigned int standby_idle; ++static int rk3128_dfi_disable(struct devfreq_event_dev *edev) ++{ ++ rk3128_dfi_stop_hardware_counter(edev); + -+ unsigned int auto_pd_dis_freq; -+ unsigned int auto_sr_dis_freq; -+ /* for ddr2 only */ -+ unsigned int ddr2_dll_dis_freq; -+ /* for ddr3 only */ -+ unsigned int ddr3_dll_dis_freq; -+ /* for ddr4 only */ -+ unsigned int ddr4_dll_dis_freq; -+ unsigned int phy_dll_dis_freq; ++ return 0; ++} + -+ unsigned int ddr2_odt_dis_freq; -+ unsigned int phy_ddr2_odt_dis_freq; -+ unsigned int ddr2_drv; -+ unsigned int ddr2_odt; -+ unsigned int phy_ddr2_ca_drv; -+ unsigned int phy_ddr2_ck_drv; -+ unsigned int phy_ddr2_dq_drv; -+ unsigned int phy_ddr2_odt; ++static int rk3128_dfi_enable(struct devfreq_event_dev *edev) ++{ ++ rk3128_dfi_start_hardware_counter(edev); + -+ unsigned int ddr3_odt_dis_freq; -+ unsigned int phy_ddr3_odt_dis_freq; -+ unsigned int ddr3_drv; -+ unsigned int ddr3_odt; -+ unsigned int phy_ddr3_ca_drv; -+ unsigned int phy_ddr3_ck_drv; -+ unsigned int phy_ddr3_dq_drv; -+ unsigned int phy_ddr3_odt; ++ return 0; ++} + -+ unsigned int phy_lpddr2_odt_dis_freq; -+ unsigned int lpddr2_drv; -+ unsigned int phy_lpddr2_ca_drv; -+ unsigned int phy_lpddr2_ck_drv; -+ unsigned int phy_lpddr2_dq_drv; -+ unsigned int phy_lpddr2_odt; ++static int rk3128_dfi_set_event(struct devfreq_event_dev *edev) ++{ ++ return 0; ++} + -+ unsigned int lpddr3_odt_dis_freq; -+ unsigned int phy_lpddr3_odt_dis_freq; -+ unsigned int lpddr3_drv; -+ unsigned int lpddr3_odt; -+ unsigned int phy_lpddr3_ca_drv; -+ unsigned int phy_lpddr3_ck_drv; -+ unsigned int phy_lpddr3_dq_drv; -+ unsigned int phy_lpddr3_odt; ++static int rk3128_dfi_get_event(struct devfreq_event_dev *edev, ++ struct devfreq_event_data *edata) ++{ ++ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); ++ unsigned long flags; ++ u32 dfi_wr, dfi_rd, dfi_timer; + -+ unsigned int lpddr4_odt_dis_freq; -+ unsigned int phy_lpddr4_odt_dis_freq; -+ unsigned int lpddr4_drv; -+ unsigned int lpddr4_dq_odt; -+ unsigned int lpddr4_ca_odt; -+ unsigned int phy_lpddr4_ca_drv; -+ unsigned int phy_lpddr4_ck_cs_drv; -+ unsigned int phy_lpddr4_dq_drv; -+ unsigned int phy_lpddr4_odt; ++ local_irq_save(flags); + -+ unsigned int ddr4_odt_dis_freq; -+ unsigned int phy_ddr4_odt_dis_freq; -+ unsigned int ddr4_drv; -+ unsigned int ddr4_odt; -+ unsigned int phy_ddr4_ca_drv; -+ unsigned int phy_ddr4_ck_drv; -+ unsigned int phy_ddr4_dq_drv; -+ unsigned int phy_ddr4_odt; ++ rk3128_dfi_stop_hardware_counter(edev); + -+ unsigned int ca_de_skew[31]; -+ unsigned int cs0_a_de_skew[44]; -+ unsigned int cs0_b_de_skew[44]; -+ unsigned int cs1_a_de_skew[44]; -+ unsigned int cs1_b_de_skew[44]; ++ regmap_read(info->regmap_grf, RK3128_GRF_DFI_WRNUM, &dfi_wr); ++ regmap_read(info->regmap_grf, RK3128_GRF_DFI_RDNUM, &dfi_rd); ++ regmap_read(info->regmap_grf, RK3128_GRF_DFI_TIMERVAL, &dfi_timer); + -+ unsigned int available; -+}; ++ edata->load_count = (dfi_wr + dfi_rd) * 4; ++ edata->total_count = dfi_timer; + -+static const char * const rk3128_dts_timing[] = { -+ "ddr3_speed_bin", -+ "pd_idle", -+ "sr_idle", -+ "auto_pd_dis_freq", -+ "auto_sr_dis_freq", -+ "ddr3_dll_dis_freq", -+ "lpddr2_dll_dis_freq", -+ "phy_dll_dis_freq", -+ "ddr3_odt_dis_freq", -+ "phy_ddr3_odt_disb_freq", -+ "ddr3_drv", -+ "ddr3_odt", -+ "phy_ddr3_clk_drv", -+ "phy_ddr3_cmd_drv", -+ "phy_ddr3_dqs_drv", -+ "phy_ddr3_odt", -+ "lpddr2_drv", -+ "phy_lpddr2_clk_drv", -+ "phy_lpddr2_cmd_drv", -+ "phy_lpddr2_dqs_drv", -+ "ddr_2t", -+}; ++ rk3128_dfi_start_hardware_counter(edev); + -+struct rk3128_ddr_dts_config_timing { -+ u32 ddr3_speed_bin; -+ u32 pd_idle; -+ u32 sr_idle; -+ u32 auto_pd_dis_freq; -+ u32 auto_sr_dis_freq; -+ u32 ddr3_dll_dis_freq; -+ u32 lpddr2_dll_dis_freq; -+ u32 phy_dll_dis_freq; -+ u32 ddr3_odt_dis_freq; -+ u32 phy_ddr3_odt_disb_freq; -+ u32 ddr3_drv; -+ u32 ddr3_odt; -+ u32 phy_ddr3_clk_drv; -+ u32 phy_ddr3_cmd_drv; -+ u32 phy_ddr3_dqs_drv; -+ u32 phy_ddr3_odt; -+ u32 lpddr2_drv; -+ u32 phy_lpddr2_clk_drv; -+ u32 phy_lpddr2_cmd_drv; -+ u32 phy_lpddr2_dqs_drv; -+ u32 ddr_2t; -+ u32 available; -+}; ++ local_irq_restore(flags); + -+static const char * const rk3228_dts_timing[] = { -+ "dram_spd_bin", -+ "sr_idle", -+ "pd_idle", -+ "dram_dll_disb_freq", -+ "phy_dll_disb_freq", -+ "dram_odt_disb_freq", -+ "phy_odt_disb_freq", -+ "ddr3_drv", -+ "ddr3_odt", -+ "lpddr3_drv", -+ "lpddr3_odt", -+ "lpddr2_drv", -+ "phy_ddr3_clk_drv", -+ "phy_ddr3_cmd_drv", -+ "phy_ddr3_dqs_drv", -+ "phy_ddr3_odt", -+ "phy_lp23_clk_drv", -+ "phy_lp23_cmd_drv", -+ "phy_lp23_dqs_drv", -+ "phy_lp3_odt" -+}; ++ return 0; ++} + -+struct rk3228_ddr_dts_config_timing { -+ u32 dram_spd_bin; -+ u32 sr_idle; -+ u32 pd_idle; -+ u32 dram_dll_dis_freq; -+ u32 phy_dll_dis_freq; -+ u32 dram_odt_dis_freq; -+ u32 phy_odt_dis_freq; -+ u32 ddr3_drv; -+ u32 ddr3_odt; -+ u32 lpddr3_drv; -+ u32 lpddr3_odt; -+ u32 lpddr2_drv; -+ u32 phy_ddr3_clk_drv; -+ u32 phy_ddr3_cmd_drv; -+ u32 phy_ddr3_dqs_drv; -+ u32 phy_ddr3_odt; -+ u32 phy_lp23_clk_drv; -+ u32 phy_lp23_cmd_drv; -+ u32 phy_lp23_dqs_drv; -+ u32 phy_lp3_odt; ++static const struct devfreq_event_ops rk3128_dfi_ops = { ++ .disable = rk3128_dfi_disable, ++ .enable = rk3128_dfi_enable, ++ .get_event = rk3128_dfi_get_event, ++ .set_event = rk3128_dfi_set_event, +}; + -+static const char * const rk3288_dts_timing[] = { -+ "ddr3_speed_bin", -+ "pd_idle", -+ "sr_idle", ++static void rk3288_dfi_start_hardware_counter(struct devfreq_event_dev *edev) ++{ ++ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); + -+ "auto_pd_dis_freq", -+ "auto_sr_dis_freq", -+ /* for ddr3 only */ -+ "ddr3_dll_dis_freq", -+ "phy_dll_dis_freq", ++ regmap_write(info->regmap_grf, RK3288_GRF_SOC_CON4, RK3288_DFI_EN); ++} + -+ "ddr3_odt_dis_freq", -+ "phy_ddr3_odt_dis_freq", -+ "ddr3_drv", -+ "ddr3_odt", -+ "phy_ddr3_drv", -+ "phy_ddr3_odt", ++static void rk3288_dfi_stop_hardware_counter(struct devfreq_event_dev *edev) ++{ ++ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); + -+ "lpddr2_drv", -+ "phy_lpddr2_drv", ++ regmap_write(info->regmap_grf, RK3288_GRF_SOC_CON4, RK3288_DFI_DIS); ++} + -+ "lpddr3_odt_dis_freq", -+ "phy_lpddr3_odt_dis_freq", -+ "lpddr3_drv", -+ "lpddr3_odt", -+ "phy_lpddr3_drv", -+ "phy_lpddr3_odt" -+}; ++static int rk3288_dfi_disable(struct devfreq_event_dev *edev) ++{ ++ rk3288_dfi_stop_hardware_counter(edev); + -+struct rk3288_ddr_dts_config_timing { -+ unsigned int ddr3_speed_bin; -+ unsigned int pd_idle; -+ unsigned int sr_idle; ++ return 0; ++} + -+ unsigned int auto_pd_dis_freq; -+ unsigned int auto_sr_dis_freq; -+ /* for ddr3 only */ -+ unsigned int ddr3_dll_dis_freq; -+ unsigned int phy_dll_dis_freq; ++static int rk3288_dfi_enable(struct devfreq_event_dev *edev) ++{ ++ rk3288_dfi_start_hardware_counter(edev); + -+ unsigned int ddr3_odt_dis_freq; -+ unsigned int phy_ddr3_odt_dis_freq; -+ unsigned int ddr3_drv; -+ unsigned int ddr3_odt; -+ unsigned int phy_ddr3_drv; -+ unsigned int phy_ddr3_odt; ++ return 0; ++} + -+ unsigned int lpddr2_drv; -+ unsigned int phy_lpddr2_drv; ++static int rk3288_dfi_set_event(struct devfreq_event_dev *edev) ++{ ++ return 0; ++} + -+ unsigned int lpddr3_odt_dis_freq; -+ unsigned int phy_lpddr3_odt_dis_freq; -+ unsigned int lpddr3_drv; -+ unsigned int lpddr3_odt; -+ unsigned int phy_lpddr3_drv; -+ unsigned int phy_lpddr3_odt; ++static int rk3288_dfi_get_busier_ch(struct devfreq_event_dev *edev) ++{ ++ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); ++ u32 tmp, max = 0; ++ u32 i, busier_ch = 0; ++ u32 rd_count, wr_count, total_count; + -+ unsigned int available; -+}; ++ rk3288_dfi_stop_hardware_counter(edev); + -+/* hope this define can adapt all future platfor */ -+static const char * const rk3328_dts_timing[] = { -+ "ddr3_speed_bin", -+ "ddr4_speed_bin", -+ "pd_idle", -+ "sr_idle", -+ "sr_mc_gate_idle", -+ "srpd_lite_idle", -+ "standby_idle", ++ /* Find out which channel is busier */ ++ for (i = 0; i < MAX_DMC_NUM_CH; i++) { ++ if (!(info->ch_msk & BIT(i))) ++ continue; ++ regmap_read(info->regmap_grf, ++ RK3288_GRF_SOC_STATUS(11 + i * 4), &wr_count); ++ regmap_read(info->regmap_grf, ++ RK3288_GRF_SOC_STATUS(12 + i * 4), &rd_count); ++ regmap_read(info->regmap_grf, ++ RK3288_GRF_SOC_STATUS(14 + i * 4), &total_count); ++ info->ch_usage[i].access = (wr_count + rd_count) * 4; ++ info->ch_usage[i].total = total_count; ++ tmp = info->ch_usage[i].access; ++ if (tmp > max) { ++ busier_ch = i; ++ max = tmp; ++ } ++ } ++ rk3288_dfi_start_hardware_counter(edev); + -+ "auto_pd_dis_freq", -+ "auto_sr_dis_freq", -+ "ddr3_dll_dis_freq", -+ "ddr4_dll_dis_freq", -+ "phy_dll_dis_freq", ++ return busier_ch; ++} + -+ "ddr3_odt_dis_freq", -+ "phy_ddr3_odt_dis_freq", -+ "ddr3_drv", -+ "ddr3_odt", -+ "phy_ddr3_ca_drv", -+ "phy_ddr3_ck_drv", -+ "phy_ddr3_dq_drv", -+ "phy_ddr3_odt", ++static int rk3288_dfi_get_event(struct devfreq_event_dev *edev, ++ struct devfreq_event_data *edata) ++{ ++ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); ++ int busier_ch; ++ unsigned long flags; + -+ "lpddr3_odt_dis_freq", -+ "phy_lpddr3_odt_dis_freq", -+ "lpddr3_drv", -+ "lpddr3_odt", -+ "phy_lpddr3_ca_drv", -+ "phy_lpddr3_ck_drv", -+ "phy_lpddr3_dq_drv", -+ "phy_lpddr3_odt", ++ local_irq_save(flags); ++ busier_ch = rk3288_dfi_get_busier_ch(edev); ++ local_irq_restore(flags); + -+ "lpddr4_odt_dis_freq", -+ "phy_lpddr4_odt_dis_freq", -+ "lpddr4_drv", -+ "lpddr4_dq_odt", -+ "lpddr4_ca_odt", -+ "phy_lpddr4_ca_drv", -+ "phy_lpddr4_ck_cs_drv", -+ "phy_lpddr4_dq_drv", -+ "phy_lpddr4_odt", ++ edata->load_count = info->ch_usage[busier_ch].access; ++ edata->total_count = info->ch_usage[busier_ch].total; + -+ "ddr4_odt_dis_freq", -+ "phy_ddr4_odt_dis_freq", -+ "ddr4_drv", -+ "ddr4_odt", -+ "phy_ddr4_ca_drv", -+ "phy_ddr4_ck_drv", -+ "phy_ddr4_dq_drv", -+ "phy_ddr4_odt", -+}; ++ return 0; ++} + -+static const char * const rk3328_dts_ca_timing[] = { -+ "ddr3a1_ddr4a9_de-skew", -+ "ddr3a0_ddr4a10_de-skew", -+ "ddr3a3_ddr4a6_de-skew", -+ "ddr3a2_ddr4a4_de-skew", -+ "ddr3a5_ddr4a8_de-skew", -+ "ddr3a4_ddr4a5_de-skew", -+ "ddr3a7_ddr4a11_de-skew", -+ "ddr3a6_ddr4a7_de-skew", -+ "ddr3a9_ddr4a0_de-skew", -+ "ddr3a8_ddr4a13_de-skew", -+ "ddr3a11_ddr4a3_de-skew", -+ "ddr3a10_ddr4cs0_de-skew", -+ "ddr3a13_ddr4a2_de-skew", -+ "ddr3a12_ddr4ba1_de-skew", -+ "ddr3a15_ddr4odt0_de-skew", -+ "ddr3a14_ddr4a1_de-skew", -+ "ddr3ba1_ddr4a15_de-skew", -+ "ddr3ba0_ddr4bg0_de-skew", -+ "ddr3ras_ddr4cke_de-skew", -+ "ddr3ba2_ddr4ba0_de-skew", -+ "ddr3we_ddr4bg1_de-skew", -+ "ddr3cas_ddr4a12_de-skew", -+ "ddr3ckn_ddr4ckn_de-skew", -+ "ddr3ckp_ddr4ckp_de-skew", -+ "ddr3cke_ddr4a16_de-skew", -+ "ddr3odt0_ddr4a14_de-skew", -+ "ddr3cs0_ddr4act_de-skew", -+ "ddr3reset_ddr4reset_de-skew", -+ "ddr3cs1_ddr4cs1_de-skew", -+ "ddr3odt1_ddr4odt1_de-skew", ++static const struct devfreq_event_ops rk3288_dfi_ops = { ++ .disable = rk3288_dfi_disable, ++ .enable = rk3288_dfi_enable, ++ .get_event = rk3288_dfi_get_event, ++ .set_event = rk3288_dfi_set_event, +}; + -+static const char * const rk3328_dts_cs0_timing[] = { -+ "cs0_dm0_rx_de-skew", -+ "cs0_dm0_tx_de-skew", -+ "cs0_dq0_rx_de-skew", -+ "cs0_dq0_tx_de-skew", -+ "cs0_dq1_rx_de-skew", -+ "cs0_dq1_tx_de-skew", -+ "cs0_dq2_rx_de-skew", -+ "cs0_dq2_tx_de-skew", -+ "cs0_dq3_rx_de-skew", -+ "cs0_dq3_tx_de-skew", -+ "cs0_dq4_rx_de-skew", -+ "cs0_dq4_tx_de-skew", -+ "cs0_dq5_rx_de-skew", -+ "cs0_dq5_tx_de-skew", -+ "cs0_dq6_rx_de-skew", -+ "cs0_dq6_tx_de-skew", -+ "cs0_dq7_rx_de-skew", -+ "cs0_dq7_tx_de-skew", -+ "cs0_dqs0_rx_de-skew", -+ "cs0_dqs0p_tx_de-skew", -+ "cs0_dqs0n_tx_de-skew", -+ -+ "cs0_dm1_rx_de-skew", -+ "cs0_dm1_tx_de-skew", -+ "cs0_dq8_rx_de-skew", -+ "cs0_dq8_tx_de-skew", -+ "cs0_dq9_rx_de-skew", -+ "cs0_dq9_tx_de-skew", -+ "cs0_dq10_rx_de-skew", -+ "cs0_dq10_tx_de-skew", -+ "cs0_dq11_rx_de-skew", -+ "cs0_dq11_tx_de-skew", -+ "cs0_dq12_rx_de-skew", -+ "cs0_dq12_tx_de-skew", -+ "cs0_dq13_rx_de-skew", -+ "cs0_dq13_tx_de-skew", -+ "cs0_dq14_rx_de-skew", -+ "cs0_dq14_tx_de-skew", -+ "cs0_dq15_rx_de-skew", -+ "cs0_dq15_tx_de-skew", -+ "cs0_dqs1_rx_de-skew", -+ "cs0_dqs1p_tx_de-skew", -+ "cs0_dqs1n_tx_de-skew", ++static void rk3368_dfi_start_hardware_counter(struct devfreq_event_dev *edev) ++{ ++ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); + -+ "cs0_dm2_rx_de-skew", -+ "cs0_dm2_tx_de-skew", -+ "cs0_dq16_rx_de-skew", -+ "cs0_dq16_tx_de-skew", -+ "cs0_dq17_rx_de-skew", -+ "cs0_dq17_tx_de-skew", -+ "cs0_dq18_rx_de-skew", -+ "cs0_dq18_tx_de-skew", -+ "cs0_dq19_rx_de-skew", -+ "cs0_dq19_tx_de-skew", -+ "cs0_dq20_rx_de-skew", -+ "cs0_dq20_tx_de-skew", -+ "cs0_dq21_rx_de-skew", -+ "cs0_dq21_tx_de-skew", -+ "cs0_dq22_rx_de-skew", -+ "cs0_dq22_tx_de-skew", -+ "cs0_dq23_rx_de-skew", -+ "cs0_dq23_tx_de-skew", -+ "cs0_dqs2_rx_de-skew", -+ "cs0_dqs2p_tx_de-skew", -+ "cs0_dqs2n_tx_de-skew", ++ regmap_write(info->regmap_grf, RK3368_GRF_DDRC0_CON0, RK3368_DFI_EN); ++} + -+ "cs0_dm3_rx_de-skew", -+ "cs0_dm3_tx_de-skew", -+ "cs0_dq24_rx_de-skew", -+ "cs0_dq24_tx_de-skew", -+ "cs0_dq25_rx_de-skew", -+ "cs0_dq25_tx_de-skew", -+ "cs0_dq26_rx_de-skew", -+ "cs0_dq26_tx_de-skew", -+ "cs0_dq27_rx_de-skew", -+ "cs0_dq27_tx_de-skew", -+ "cs0_dq28_rx_de-skew", -+ "cs0_dq28_tx_de-skew", -+ "cs0_dq29_rx_de-skew", -+ "cs0_dq29_tx_de-skew", -+ "cs0_dq30_rx_de-skew", -+ "cs0_dq30_tx_de-skew", -+ "cs0_dq31_rx_de-skew", -+ "cs0_dq31_tx_de-skew", -+ "cs0_dqs3_rx_de-skew", -+ "cs0_dqs3p_tx_de-skew", -+ "cs0_dqs3n_tx_de-skew", -+}; ++static void rk3368_dfi_stop_hardware_counter(struct devfreq_event_dev *edev) ++{ ++ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); + -+static const char * const rk3328_dts_cs1_timing[] = { -+ "cs1_dm0_rx_de-skew", -+ "cs1_dm0_tx_de-skew", -+ "cs1_dq0_rx_de-skew", -+ "cs1_dq0_tx_de-skew", -+ "cs1_dq1_rx_de-skew", -+ "cs1_dq1_tx_de-skew", -+ "cs1_dq2_rx_de-skew", -+ "cs1_dq2_tx_de-skew", -+ "cs1_dq3_rx_de-skew", -+ "cs1_dq3_tx_de-skew", -+ "cs1_dq4_rx_de-skew", -+ "cs1_dq4_tx_de-skew", -+ "cs1_dq5_rx_de-skew", -+ "cs1_dq5_tx_de-skew", -+ "cs1_dq6_rx_de-skew", -+ "cs1_dq6_tx_de-skew", -+ "cs1_dq7_rx_de-skew", -+ "cs1_dq7_tx_de-skew", -+ "cs1_dqs0_rx_de-skew", -+ "cs1_dqs0p_tx_de-skew", -+ "cs1_dqs0n_tx_de-skew", ++ regmap_write(info->regmap_grf, RK3368_GRF_DDRC0_CON0, RK3368_DFI_DIS); ++} + -+ "cs1_dm1_rx_de-skew", -+ "cs1_dm1_tx_de-skew", -+ "cs1_dq8_rx_de-skew", -+ "cs1_dq8_tx_de-skew", -+ "cs1_dq9_rx_de-skew", -+ "cs1_dq9_tx_de-skew", -+ "cs1_dq10_rx_de-skew", -+ "cs1_dq10_tx_de-skew", -+ "cs1_dq11_rx_de-skew", -+ "cs1_dq11_tx_de-skew", -+ "cs1_dq12_rx_de-skew", -+ "cs1_dq12_tx_de-skew", -+ "cs1_dq13_rx_de-skew", -+ "cs1_dq13_tx_de-skew", -+ "cs1_dq14_rx_de-skew", -+ "cs1_dq14_tx_de-skew", -+ "cs1_dq15_rx_de-skew", -+ "cs1_dq15_tx_de-skew", -+ "cs1_dqs1_rx_de-skew", -+ "cs1_dqs1p_tx_de-skew", -+ "cs1_dqs1n_tx_de-skew", ++static int rk3368_dfi_disable(struct devfreq_event_dev *edev) ++{ ++ rk3368_dfi_stop_hardware_counter(edev); + -+ "cs1_dm2_rx_de-skew", -+ "cs1_dm2_tx_de-skew", -+ "cs1_dq16_rx_de-skew", -+ "cs1_dq16_tx_de-skew", -+ "cs1_dq17_rx_de-skew", -+ "cs1_dq17_tx_de-skew", -+ "cs1_dq18_rx_de-skew", -+ "cs1_dq18_tx_de-skew", -+ "cs1_dq19_rx_de-skew", -+ "cs1_dq19_tx_de-skew", -+ "cs1_dq20_rx_de-skew", -+ "cs1_dq20_tx_de-skew", -+ "cs1_dq21_rx_de-skew", -+ "cs1_dq21_tx_de-skew", -+ "cs1_dq22_rx_de-skew", -+ "cs1_dq22_tx_de-skew", -+ "cs1_dq23_rx_de-skew", -+ "cs1_dq23_tx_de-skew", -+ "cs1_dqs2_rx_de-skew", -+ "cs1_dqs2p_tx_de-skew", -+ "cs1_dqs2n_tx_de-skew", ++ return 0; ++} + -+ "cs1_dm3_rx_de-skew", -+ "cs1_dm3_tx_de-skew", -+ "cs1_dq24_rx_de-skew", -+ "cs1_dq24_tx_de-skew", -+ "cs1_dq25_rx_de-skew", -+ "cs1_dq25_tx_de-skew", -+ "cs1_dq26_rx_de-skew", -+ "cs1_dq26_tx_de-skew", -+ "cs1_dq27_rx_de-skew", -+ "cs1_dq27_tx_de-skew", -+ "cs1_dq28_rx_de-skew", -+ "cs1_dq28_tx_de-skew", -+ "cs1_dq29_rx_de-skew", -+ "cs1_dq29_tx_de-skew", -+ "cs1_dq30_rx_de-skew", -+ "cs1_dq30_tx_de-skew", -+ "cs1_dq31_rx_de-skew", -+ "cs1_dq31_tx_de-skew", -+ "cs1_dqs3_rx_de-skew", -+ "cs1_dqs3p_tx_de-skew", -+ "cs1_dqs3n_tx_de-skew", -+}; ++static int rk3368_dfi_enable(struct devfreq_event_dev *edev) ++{ ++ rk3368_dfi_start_hardware_counter(edev); + -+struct rk3328_ddr_dts_config_timing { -+ unsigned int ddr3_speed_bin; -+ unsigned int ddr4_speed_bin; -+ unsigned int pd_idle; -+ unsigned int sr_idle; -+ unsigned int sr_mc_gate_idle; -+ unsigned int srpd_lite_idle; -+ unsigned int standby_idle; ++ return 0; ++} + -+ unsigned int auto_pd_dis_freq; -+ unsigned int auto_sr_dis_freq; -+ /* for ddr3 only */ -+ unsigned int ddr3_dll_dis_freq; -+ /* for ddr4 only */ -+ unsigned int ddr4_dll_dis_freq; -+ unsigned int phy_dll_dis_freq; ++static int rk3368_dfi_set_event(struct devfreq_event_dev *edev) ++{ ++ return 0; ++} + -+ unsigned int ddr3_odt_dis_freq; -+ unsigned int phy_ddr3_odt_dis_freq; -+ unsigned int ddr3_drv; -+ unsigned int ddr3_odt; -+ unsigned int phy_ddr3_ca_drv; -+ unsigned int phy_ddr3_ck_drv; -+ unsigned int phy_ddr3_dq_drv; -+ unsigned int phy_ddr3_odt; ++static int rk3368_dfi_get_event(struct devfreq_event_dev *edev, ++ struct devfreq_event_data *edata) ++{ ++ struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); ++ unsigned long flags; ++ u32 dfi0_wr, dfi0_rd, dfi1_wr, dfi1_rd, dfi_timer; + -+ unsigned int lpddr3_odt_dis_freq; -+ unsigned int phy_lpddr3_odt_dis_freq; -+ unsigned int lpddr3_drv; -+ unsigned int lpddr3_odt; -+ unsigned int phy_lpddr3_ca_drv; -+ unsigned int phy_lpddr3_ck_drv; -+ unsigned int phy_lpddr3_dq_drv; -+ unsigned int phy_lpddr3_odt; ++ local_irq_save(flags); + -+ unsigned int lpddr4_odt_dis_freq; -+ unsigned int phy_lpddr4_odt_dis_freq; -+ unsigned int lpddr4_drv; -+ unsigned int lpddr4_dq_odt; -+ unsigned int lpddr4_ca_odt; -+ unsigned int phy_lpddr4_ca_drv; -+ unsigned int phy_lpddr4_ck_cs_drv; -+ unsigned int phy_lpddr4_dq_drv; -+ unsigned int phy_lpddr4_odt; ++ rk3368_dfi_stop_hardware_counter(edev); + -+ unsigned int ddr4_odt_dis_freq; -+ unsigned int phy_ddr4_odt_dis_freq; -+ unsigned int ddr4_drv; -+ unsigned int ddr4_odt; -+ unsigned int phy_ddr4_ca_drv; -+ unsigned int phy_ddr4_ck_drv; -+ unsigned int phy_ddr4_dq_drv; -+ unsigned int phy_ddr4_odt; ++ regmap_read(info->regmap_grf, RK3368_GRF_SOC_STATUS5, &dfi0_wr); ++ regmap_read(info->regmap_grf, RK3368_GRF_SOC_STATUS6, &dfi0_rd); ++ regmap_read(info->regmap_grf, RK3368_GRF_SOC_STATUS9, &dfi1_wr); ++ regmap_read(info->regmap_grf, RK3368_GRF_SOC_STATUS10, &dfi1_rd); ++ regmap_read(info->regmap_grf, RK3368_GRF_SOC_STATUS8, &dfi_timer); + -+ unsigned int ca_skew[15]; -+ unsigned int cs0_skew[44]; -+ unsigned int cs1_skew[44]; ++ edata->load_count = (dfi0_wr + dfi0_rd + dfi1_wr + dfi1_rd) * 2; ++ edata->total_count = dfi_timer; + -+ unsigned int available; -+}; ++ rk3368_dfi_start_hardware_counter(edev); + -+struct rk3328_ddr_de_skew_setting { -+ unsigned int ca_de_skew[30]; -+ unsigned int cs0_de_skew[84]; -+ unsigned int cs1_de_skew[84]; -+}; ++ local_irq_restore(flags); + -+struct rk3368_dram_timing { -+ u32 dram_spd_bin; -+ u32 sr_idle; -+ u32 pd_idle; -+ u32 dram_dll_dis_freq; -+ u32 phy_dll_dis_freq; -+ u32 dram_odt_dis_freq; -+ u32 phy_odt_dis_freq; -+ u32 ddr3_drv; -+ u32 ddr3_odt; -+ u32 lpddr3_drv; -+ u32 lpddr3_odt; -+ u32 lpddr2_drv; -+ u32 phy_clk_drv; -+ u32 phy_cmd_drv; -+ u32 phy_dqs_drv; -+ u32 phy_odt; -+ u32 ddr_2t; -+}; ++ return 0; ++} + -+struct rk3399_dram_timing { -+ unsigned int ddr3_speed_bin; -+ unsigned int pd_idle; -+ unsigned int sr_idle; -+ unsigned int sr_mc_gate_idle; -+ unsigned int srpd_lite_idle; -+ unsigned int standby_idle; -+ unsigned int auto_lp_dis_freq; -+ unsigned int ddr3_dll_dis_freq; -+ unsigned int phy_dll_dis_freq; -+ unsigned int ddr3_odt_dis_freq; -+ unsigned int ddr3_drv; -+ unsigned int ddr3_odt; -+ unsigned int phy_ddr3_ca_drv; -+ unsigned int phy_ddr3_dq_drv; -+ unsigned int phy_ddr3_odt; -+ unsigned int lpddr3_odt_dis_freq; -+ unsigned int lpddr3_drv; -+ unsigned int lpddr3_odt; -+ unsigned int phy_lpddr3_ca_drv; -+ unsigned int phy_lpddr3_dq_drv; -+ unsigned int phy_lpddr3_odt; -+ unsigned int lpddr4_odt_dis_freq; -+ unsigned int lpddr4_drv; -+ unsigned int lpddr4_dq_odt; -+ unsigned int lpddr4_ca_odt; -+ unsigned int phy_lpddr4_ca_drv; -+ unsigned int phy_lpddr4_ck_cs_drv; -+ unsigned int phy_lpddr4_dq_drv; -+ unsigned int phy_lpddr4_odt; -+}; ++static const struct devfreq_event_ops rk3368_dfi_ops = { ++ .disable = rk3368_dfi_disable, ++ .enable = rk3368_dfi_enable, ++ .get_event = rk3368_dfi_get_event, ++ .set_event = rk3368_dfi_set_event, + }; + + static void rockchip_dfi_start_hardware_counter(struct devfreq_event_dev *edev) + { + struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); + void __iomem *dfi_regs = info->regs; +- u32 val; +- u32 ddr_type; ++ u32 mon_idx = 0, val_6 = 0; ++ u32 i; + +- /* get ddr type */ +- regmap_read(info->regmap_pmu, RK3399_PMUGRF_OS_REG2, &val); +- ddr_type = (val >> RK3399_PMUGRF_DDRTYPE_SHIFT) & +- RK3399_PMUGRF_DDRTYPE_MASK; ++ if (info->mon_idx) ++ mon_idx = info->mon_idx; + +- /* clear DDRMON_CTRL setting */ +- writel_relaxed(CLR_DDRMON_CTRL, dfi_regs + DDRMON_CTRL); ++ if (info->dram_dynamic_info_reg) ++ regmap_read(info->regmap_pmugrf, info->dram_dynamic_info_reg, &val_6); + +- /* set ddr type to dfi */ +- if (ddr_type == RK3399_PMUGRF_DDRTYPE_LPDDR3) +- writel_relaxed(LPDDR3_EN, dfi_regs + DDRMON_CTRL); +- else if (ddr_type == RK3399_PMUGRF_DDRTYPE_LPDDR4) +- writel_relaxed(LPDDR4_EN, dfi_regs + DDRMON_CTRL); ++ if (info->dram_type == LPDDR5) { ++ info->lp5_bank_mode = READ_LP5_BANK_MODE(val_6); ++ info->lp5_ckr = READ_LP5_CKR(val_6); ++ } + +- /* enable count, use software mode */ +- writel_relaxed(SOFTWARE_EN, dfi_regs + DDRMON_CTRL); ++ for (i = 0; i < MAX_DMC_NUM_CH; i++) { ++ if (!(info->ch_msk & BIT(i))) ++ continue; ++ /* clear DDRMON_CTRL setting */ ++ writel_relaxed(CLR_DDRMON_CTRL, dfi_regs + i * mon_idx + DDRMON_CTRL); + -+/* name rule: ddr4(pad_name)_ddr3_lpddr3_lpddr4_de-skew */ -+static const char * const rv1126_dts_ca_timing[] = { -+ "a0_a3_a3_cke1-a_de-skew", -+ "a1_ba1_null_cke0-b_de-skew", -+ "a2_a9_a9_a4-a_de-skew", -+ "a3_a15_null_a5-b_de-skew", -+ "a4_a6_a6_ck-a_de-skew", -+ "a5_a12_null_odt0-b_de-skew", -+ "a6_ba2_null_a0-a_de-skew", -+ "a7_a4_a4_odt0-a_de-skew", -+ "a8_a1_a1_cke0-a_de-skew", -+ "a9_a5_a5_a5-a_de-skew", -+ "a10_a8_a8_clkb-a_de-skew", -+ "a11_a7_a7_ca2-a_de-skew", -+ "a12_rasn_null_ca1-a_de-skew", -+ "a13_a13_null_ca3-a_de-skew", -+ "a14_a14_null_csb1-b_de-skew", -+ "a15_a10_null_ca0-b_de-skew", -+ "a16_a11_null_csb0-b_de-skew", -+ "a17_null_null_null_de-skew", -+ "ba0_csb1_csb1_csb0-a_de-skew", -+ "ba1_wen_null_cke1-b_de-skew", -+ "bg0_odt1_odt1_csb1-a_de-skew", -+ "bg1_a2_a2_odt1-a_de-skew", -+ "cke0_casb_null_ca1-b_de-skew", -+ "ck_ck_ck_ck-b_de-skew", -+ "ckb_ckb_ckb_ckb-b_de-skew", -+ "csb0_odt0_odt0_ca2-b_de-skew", -+ "odt0_csb0_csb0_ca4-b_de-skew", -+ "resetn_resetn_null-resetn_de-skew", -+ "actn_cke_cke_ca3-b_de-skew", -+ "cke1_null_null_null_de-skew", -+ "csb1_ba0_null_null_de-skew", -+ "odt1_a0_a0_odt1-b_de-skew", -+}; ++ /* set ddr type to dfi */ ++ if (info->dram_type == LPDDR3 || info->dram_type == LPDDR2) ++ writel_relaxed(LPDDR2_3_EN, dfi_regs + i * mon_idx + DDRMON_CTRL); ++ else if (info->dram_type == LPDDR4 || info->dram_type == LPDDR4X) ++ writel_relaxed(LPDDR4_EN, dfi_regs + i * mon_idx + DDRMON_CTRL); ++ else if (info->dram_type == DDR4) ++ writel_relaxed(DDR4_EN, dfi_regs + i * mon_idx + DDRMON_CTRL); ++ else if (info->dram_type == LPDDR5) ++ writel_relaxed(LPDDR5_EN | LPDDR5_BANK_MODE(info->lp5_bank_mode), ++ dfi_regs + i * mon_idx + DDRMON_CTRL); + -+static const char * const rv1126_dts_cs0_a_timing[] = { -+ "cs0_dm0_rx_de-skew", -+ "cs0_dq0_rx_de-skew", -+ "cs0_dq1_rx_de-skew", -+ "cs0_dq2_rx_de-skew", -+ "cs0_dq3_rx_de-skew", -+ "cs0_dq4_rx_de-skew", -+ "cs0_dq5_rx_de-skew", -+ "cs0_dq6_rx_de-skew", -+ "cs0_dq7_rx_de-skew", -+ "cs0_dqs0p_rx_de-skew", -+ "cs0_dqs0n_rx_de-skew", -+ "cs0_dm1_rx_de-skew", -+ "cs0_dq8_rx_de-skew", -+ "cs0_dq9_rx_de-skew", -+ "cs0_dq10_rx_de-skew", -+ "cs0_dq11_rx_de-skew", -+ "cs0_dq12_rx_de-skew", -+ "cs0_dq13_rx_de-skew", -+ "cs0_dq14_rx_de-skew", -+ "cs0_dq15_rx_de-skew", -+ "cs0_dqs1p_rx_de-skew", -+ "cs0_dqs1n_rx_de-skew", -+ "cs0_dm0_tx_de-skew", -+ "cs0_dq0_tx_de-skew", -+ "cs0_dq1_tx_de-skew", -+ "cs0_dq2_tx_de-skew", -+ "cs0_dq3_tx_de-skew", -+ "cs0_dq4_tx_de-skew", -+ "cs0_dq5_tx_de-skew", -+ "cs0_dq6_tx_de-skew", -+ "cs0_dq7_tx_de-skew", -+ "cs0_dqs0p_tx_de-skew", -+ "cs0_dqs0n_tx_de-skew", -+ "cs0_dm1_tx_de-skew", -+ "cs0_dq8_tx_de-skew", -+ "cs0_dq9_tx_de-skew", -+ "cs0_dq10_tx_de-skew", -+ "cs0_dq11_tx_de-skew", -+ "cs0_dq12_tx_de-skew", -+ "cs0_dq13_tx_de-skew", -+ "cs0_dq14_tx_de-skew", -+ "cs0_dq15_tx_de-skew", -+ "cs0_dqs1p_tx_de-skew", -+ "cs0_dqs1n_tx_de-skew", -+}; ++ /* enable count, use software mode */ ++ writel_relaxed(SOFTWARE_EN, dfi_regs + i * mon_idx + DDRMON_CTRL); ++ } + } + + static void rockchip_dfi_stop_hardware_counter(struct devfreq_event_dev *edev) + { + struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); + void __iomem *dfi_regs = info->regs; ++ u32 mon_idx = 0, i; + +- writel_relaxed(SOFTWARE_DIS, dfi_regs + DDRMON_CTRL); ++ if (info->mon_idx) ++ mon_idx = info->mon_idx; + -+static const char * const rv1126_dts_cs0_b_timing[] = { -+ "cs0_dm2_rx_de-skew", -+ "cs0_dq16_rx_de-skew", -+ "cs0_dq17_rx_de-skew", -+ "cs0_dq18_rx_de-skew", -+ "cs0_dq19_rx_de-skew", -+ "cs0_dq20_rx_de-skew", -+ "cs0_dq21_rx_de-skew", -+ "cs0_dq22_rx_de-skew", -+ "cs0_dq23_rx_de-skew", -+ "cs0_dqs2p_rx_de-skew", -+ "cs0_dqs2n_rx_de-skew", -+ "cs0_dm3_rx_de-skew", -+ "cs0_dq24_rx_de-skew", -+ "cs0_dq25_rx_de-skew", -+ "cs0_dq26_rx_de-skew", -+ "cs0_dq27_rx_de-skew", -+ "cs0_dq28_rx_de-skew", -+ "cs0_dq29_rx_de-skew", -+ "cs0_dq30_rx_de-skew", -+ "cs0_dq31_rx_de-skew", -+ "cs0_dqs3p_rx_de-skew", -+ "cs0_dqs3n_rx_de-skew", -+ "cs0_dm2_tx_de-skew", -+ "cs0_dq16_tx_de-skew", -+ "cs0_dq17_tx_de-skew", -+ "cs0_dq18_tx_de-skew", -+ "cs0_dq19_tx_de-skew", -+ "cs0_dq20_tx_de-skew", -+ "cs0_dq21_tx_de-skew", -+ "cs0_dq22_tx_de-skew", -+ "cs0_dq23_tx_de-skew", -+ "cs0_dqs2p_tx_de-skew", -+ "cs0_dqs2n_tx_de-skew", -+ "cs0_dm3_tx_de-skew", -+ "cs0_dq24_tx_de-skew", -+ "cs0_dq25_tx_de-skew", -+ "cs0_dq26_tx_de-skew", -+ "cs0_dq27_tx_de-skew", -+ "cs0_dq28_tx_de-skew", -+ "cs0_dq29_tx_de-skew", -+ "cs0_dq30_tx_de-skew", -+ "cs0_dq31_tx_de-skew", -+ "cs0_dqs3p_tx_de-skew", -+ "cs0_dqs3n_tx_de-skew", -+}; ++ for (i = 0; i < MAX_DMC_NUM_CH; i++) { ++ if (!(info->ch_msk & BIT(i))) ++ continue; ++ writel_relaxed(SOFTWARE_DIS, dfi_regs + i * mon_idx + DDRMON_CTRL); ++ } + } + + static int rockchip_dfi_get_busier_ch(struct devfreq_event_dev *edev) +@@ -96,16 +414,35 @@ static int rockchip_dfi_get_busier_ch(struct devfreq_event_dev *edev) + u32 tmp, max = 0; + u32 i, busier_ch = 0; + void __iomem *dfi_regs = info->regs; ++ u32 mon_idx = 0x20, count_rate = 1; + + rockchip_dfi_stop_hardware_counter(edev); + ++ if (info->mon_idx) ++ mon_idx = info->mon_idx; ++ if (info->count_rate) ++ count_rate = info->count_rate; + -+static const char * const rv1126_dts_cs1_a_timing[] = { -+ "cs1_dm0_rx_de-skew", -+ "cs1_dq0_rx_de-skew", -+ "cs1_dq1_rx_de-skew", -+ "cs1_dq2_rx_de-skew", -+ "cs1_dq3_rx_de-skew", -+ "cs1_dq4_rx_de-skew", -+ "cs1_dq5_rx_de-skew", -+ "cs1_dq6_rx_de-skew", -+ "cs1_dq7_rx_de-skew", -+ "cs1_dqs0p_rx_de-skew", -+ "cs1_dqs0n_rx_de-skew", -+ "cs1_dm1_rx_de-skew", -+ "cs1_dq8_rx_de-skew", -+ "cs1_dq9_rx_de-skew", -+ "cs1_dq10_rx_de-skew", -+ "cs1_dq11_rx_de-skew", -+ "cs1_dq12_rx_de-skew", -+ "cs1_dq13_rx_de-skew", -+ "cs1_dq14_rx_de-skew", -+ "cs1_dq15_rx_de-skew", -+ "cs1_dqs1p_rx_de-skew", -+ "cs1_dqs1n_rx_de-skew", -+ "cs1_dm0_tx_de-skew", -+ "cs1_dq0_tx_de-skew", -+ "cs1_dq1_tx_de-skew", -+ "cs1_dq2_tx_de-skew", -+ "cs1_dq3_tx_de-skew", -+ "cs1_dq4_tx_de-skew", -+ "cs1_dq5_tx_de-skew", -+ "cs1_dq6_tx_de-skew", -+ "cs1_dq7_tx_de-skew", -+ "cs1_dqs0p_tx_de-skew", -+ "cs1_dqs0n_tx_de-skew", -+ "cs1_dm1_tx_de-skew", -+ "cs1_dq8_tx_de-skew", -+ "cs1_dq9_tx_de-skew", -+ "cs1_dq10_tx_de-skew", -+ "cs1_dq11_tx_de-skew", -+ "cs1_dq12_tx_de-skew", -+ "cs1_dq13_tx_de-skew", -+ "cs1_dq14_tx_de-skew", -+ "cs1_dq15_tx_de-skew", -+ "cs1_dqs1p_tx_de-skew", -+ "cs1_dqs1n_tx_de-skew", -+}; + /* Find out which channel is busier */ +- for (i = 0; i < RK3399_DMC_NUM_CH; i++) { +- info->ch_usage[i].access = readl_relaxed(dfi_regs + +- DDRMON_CH0_DFI_ACCESS_NUM + i * 20) * 4; ++ for (i = 0; i < MAX_DMC_NUM_CH; i++) { ++ if (!(info->ch_msk & BIT(i))) ++ continue; + -+static const char * const rv1126_dts_cs1_b_timing[] = { -+ "cs1_dm2_rx_de-skew", -+ "cs1_dq16_rx_de-skew", -+ "cs1_dq17_rx_de-skew", -+ "cs1_dq18_rx_de-skew", -+ "cs1_dq19_rx_de-skew", -+ "cs1_dq20_rx_de-skew", -+ "cs1_dq21_rx_de-skew", -+ "cs1_dq22_rx_de-skew", -+ "cs1_dq23_rx_de-skew", -+ "cs1_dqs2p_rx_de-skew", -+ "cs1_dqs2n_rx_de-skew", -+ "cs1_dm3_rx_de-skew", -+ "cs1_dq24_rx_de-skew", -+ "cs1_dq25_rx_de-skew", -+ "cs1_dq26_rx_de-skew", -+ "cs1_dq27_rx_de-skew", -+ "cs1_dq28_rx_de-skew", -+ "cs1_dq29_rx_de-skew", -+ "cs1_dq30_rx_de-skew", -+ "cs1_dq31_rx_de-skew", -+ "cs1_dqs3p_rx_de-skew", -+ "cs1_dqs3n_rx_de-skew", -+ "cs1_dm2_tx_de-skew", -+ "cs1_dq16_tx_de-skew", -+ "cs1_dq17_tx_de-skew", -+ "cs1_dq18_tx_de-skew", -+ "cs1_dq19_tx_de-skew", -+ "cs1_dq20_tx_de-skew", -+ "cs1_dq21_tx_de-skew", -+ "cs1_dq22_tx_de-skew", -+ "cs1_dq23_tx_de-skew", -+ "cs1_dqs2p_tx_de-skew", -+ "cs1_dqs2n_tx_de-skew", -+ "cs1_dm3_tx_de-skew", -+ "cs1_dq24_tx_de-skew", -+ "cs1_dq25_tx_de-skew", -+ "cs1_dq26_tx_de-skew", -+ "cs1_dq27_tx_de-skew", -+ "cs1_dq28_tx_de-skew", -+ "cs1_dq29_tx_de-skew", -+ "cs1_dq30_tx_de-skew", -+ "cs1_dq31_tx_de-skew", -+ "cs1_dqs3p_tx_de-skew", -+ "cs1_dqs3n_tx_de-skew", -+}; ++ /* rk3588 counter is dfi clk rate */ + info->ch_usage[i].total = readl_relaxed(dfi_regs + +- DDRMON_CH0_COUNT_NUM + i * 20); +- tmp = info->ch_usage[i].access; ++ DDRMON_CH0_COUNT_NUM + i * mon_idx) * count_rate; + -+#endif /* __ROCKCHIP_DMC_TIMING_H__ */ ++ /* LPDDR5 LPDDR4 and LPDDR4X BL = 16,other DDR type BL = 8 */ ++ tmp = readl_relaxed(dfi_regs + ++ DDRMON_CH0_DFI_ACCESS_NUM + i * mon_idx); ++ if (info->dram_type == LPDDR4 || info->dram_type == LPDDR4X) ++ tmp *= 8; ++ else if (info->dram_type == LPDDR5) ++ tmp *= 16 / (4 << info->lp5_ckr); ++ else ++ tmp *= 4; ++ info->ch_usage[i].access = tmp; + -diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig -index c86a4756a..82c7df8e4 100644 ---- a/drivers/dma-buf/Kconfig -+++ b/drivers/dma-buf/Kconfig -@@ -1,6 +1,39 @@ - # SPDX-License-Identifier: GPL-2.0-only - menu "DMABUF options" + if (tmp > max) { + busier_ch = i; + max = tmp; +@@ -121,7 +458,8 @@ static int rockchip_dfi_disable(struct devfreq_event_dev *edev) + struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); -+config DMABUF_CACHE -+ bool "DMABUF cache attachment" -+ default ARCH_ROCKCHIP -+ depends on NO_GKI -+ help -+ This option support to store attachments in a list and destroy them by -+ set to a callback list in the dtor of dma-buf. + rockchip_dfi_stop_hardware_counter(edev); +- clk_disable_unprepare(info->clk); ++ if (info->clk) ++ clk_disable_unprepare(info->clk); + + return 0; + } +@@ -131,10 +469,13 @@ static int rockchip_dfi_enable(struct devfreq_event_dev *edev) + struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); + int ret; + +- ret = clk_prepare_enable(info->clk); +- if (ret) { +- dev_err(&edev->dev, "failed to enable dfi clk: %d\n", ret); +- return ret; ++ if (info->clk) { ++ ret = clk_prepare_enable(info->clk); ++ if (ret) { ++ dev_err(&edev->dev, "failed to enable dfi clk: %d\n", ++ ret); ++ return ret; ++ } + } + + rockchip_dfi_start_hardware_counter(edev); +@@ -151,8 +492,11 @@ static int rockchip_dfi_get_event(struct devfreq_event_dev *edev, + { + struct rockchip_dfi *info = devfreq_event_get_drvdata(edev); + int busier_ch; ++ unsigned long flags; + ++ local_irq_save(flags); + busier_ch = rockchip_dfi_get_busier_ch(edev); ++ local_irq_restore(flags); + + edata->load_count = info->ch_usage[busier_ch].access; + edata->total_count = info->ch_usage[busier_ch].total; +@@ -167,22 +511,159 @@ static const struct devfreq_event_ops rockchip_dfi_ops = { + .set_event = rockchip_dfi_set_event, + }; + +-static const struct of_device_id rockchip_dfi_id_match[] = { +- { .compatible = "rockchip,rk3399-dfi" }, +- { }, +-}; +-MODULE_DEVICE_TABLE(of, rockchip_dfi_id_match); ++static __maybe_unused __init int rk3588_dfi_init(struct platform_device *pdev, ++ struct rockchip_dfi *data, ++ struct devfreq_event_desc *desc) ++{ ++ struct device_node *np = pdev->dev.of_node; ++ struct resource *res; ++ u32 val_2, val_3, val_4; + +-static int rockchip_dfi_probe(struct platform_device *pdev) ++ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ data->regs = devm_ioremap_resource(&pdev->dev, res); ++ if (IS_ERR(data->regs)) ++ return PTR_ERR(data->regs); + -+config RK_DMABUF_DEBUG -+ bool "Rockchip DMABUF debug option" -+ depends on NO_GKI -+ select RK_DMABUF_PROCFS -+ help -+ This option support to debug all the dmabuf on db_list, allows to set -+ a name for dmabuf. If not sure, say N ++ data->regmap_pmugrf = syscon_regmap_lookup_by_phandle(np, "rockchip,pmu_grf"); ++ if (IS_ERR(data->regmap_pmugrf)) ++ return PTR_ERR(data->regmap_pmugrf); + -+config RK_DMABUF_DEBUG_ADVANCED -+ bool "Rockchip DMABUF debug advanced option" -+ depends on RK_DMABUF_DEBUG -+ help -+ This option support to debug all the dmabuf on db_list, allows to attach -+ and map a dmabuf who has no attachment. If not sure, say N ++ regmap_read(data->regmap_pmugrf, RK3588_PMUGRF_OS_REG(2), &val_2); ++ regmap_read(data->regmap_pmugrf, RK3588_PMUGRF_OS_REG(3), &val_3); ++ regmap_read(data->regmap_pmugrf, RK3588_PMUGRF_OS_REG(4), &val_4); ++ if (READ_SYSREG_VERSION(val_3) >= 0x3) ++ data->dram_type = READ_DRAMTYPE_INFO_V3(val_2, val_3); ++ else ++ data->dram_type = READ_DRAMTYPE_INFO(val_2); + -+config DMABUF_PARTIAL -+ bool "Support for partial cache maintenance" -+ help -+ In order to improve performance, allow dma-buf clients to -+ apply cache maintenance to only a subset of a dma-buf. ++ data->mon_idx = 0x4000; ++ if (data->dram_type == LPDDR5) ++ data->count_rate = 1; ++ else ++ data->count_rate = 2; ++ data->dram_dynamic_info_reg = RK3588_PMUGRF_OS_REG(6); ++ data->ch_msk = READ_CH_INFO(val_2) | READ_CH_INFO(val_4) << 2; ++ data->clk = NULL; + -+ Kernel clients will be able to use the dma_buf_begin_cpu_access_partial -+ and dma_buf_end_cpu_access_partial functions to only apply cache -+ maintenance to a range within the dma-buf. ++ desc->ops = &rockchip_dfi_ops; + - config SYNC_FILE - bool "Explicit Synchronization Framework" - default n -@@ -30,6 +63,13 @@ config SW_SYNC - WARNING: improper use of this can result in deadlocking kernel - drivers from userspace. Intended for test and debug only. - -+config SW_SYNC_DEBUG -+ bool "SW Sync Debug" -+ depends on DEBUG_FS && SW_SYNC && NO_GKI -+ default SW_SYNC -+ help -+ To get current fence point and timeline status. ++ return 0; ++} + - config UDMABUF - bool "userspace dmabuf misc driver" - default n -@@ -103,5 +143,6 @@ menuconfig DMABUF_SYSFS_STATS - in quite some performance problems. - - source "drivers/dma-buf/heaps/Kconfig" -+source "drivers/dma-buf/rk_heaps/Kconfig" - - endmenu -diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile -index cdb3bb049..525a73a16 100644 ---- a/drivers/dma-buf/Makefile -+++ b/drivers/dma-buf/Makefile -@@ -1,12 +1,15 @@ - # SPDX-License-Identifier: GPL-2.0-only --obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \ -+obj-y := dma-buf-rk.o dma-fence.o dma-fence-array.o dma-fence-chain.o \ - dma-fence-unwrap.o dma-resv.o --obj-$(CONFIG_DMABUF_HEAPS) += dma-heap.o -+obj-$(CONFIG_DMABUF_HEAPS) += dma-heap-rk.o - obj-$(CONFIG_DMABUF_HEAPS) += heaps/ -+obj-$(CONFIG_DMABUF_CACHE) += dma-buf-cache.o - obj-$(CONFIG_SYNC_FILE) += sync_file.o --obj-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o -+obj-$(CONFIG_SW_SYNC) += sw_sync.o -+obj-$(CONFIG_SW_SYNC_DEBUG) += sync_debug.o - obj-$(CONFIG_UDMABUF) += udmabuf.o - obj-$(CONFIG_DMABUF_SYSFS_STATS) += dma-buf-sysfs-stats.o -+obj-$(CONFIG_DMABUF_HEAPS_ROCKCHIP) += rk_heaps/ ++static __maybe_unused __init int px30_dfi_init(struct platform_device *pdev, ++ struct rockchip_dfi *data, ++ struct devfreq_event_desc *desc) + { +- struct device *dev = &pdev->dev; +- struct rockchip_dfi *data; +- struct devfreq_event_desc *desc; + struct device_node *np = pdev->dev.of_node, *node; ++ struct resource *res; ++ u32 val_2, val_3; - dmabuf_selftests-y := \ - selftest.o \ -diff --git a/drivers/dma-buf/dma-buf-cache.c b/drivers/dma-buf/dma-buf-cache.c -new file mode 100644 -index 000000000..5ec8896d3 ---- /dev/null -+++ b/drivers/dma-buf/dma-buf-cache.c -@@ -0,0 +1,198 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (c) 2021 Rockchip Electronics Co. Ltd. -+ */ +- data = devm_kzalloc(dev, sizeof(struct rockchip_dfi), GFP_KERNEL); +- if (!data) +- return -ENOMEM; ++ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ data->regs = devm_ioremap_resource(&pdev->dev, res); ++ if (IS_ERR(data->regs)) ++ return PTR_ERR(data->regs); + -+#include -+#include -+#undef CONFIG_DMABUF_CACHE -+#include ++ node = of_parse_phandle(np, "rockchip,pmugrf", 0); ++ if (node) { ++ data->regmap_pmugrf = syscon_node_to_regmap(node); ++ if (IS_ERR(data->regmap_pmugrf)) ++ return PTR_ERR(data->regmap_pmugrf); ++ } + -+/* NOTE: dma-buf-cache APIs are not irq safe, please DO NOT run in irq context !! */ ++ regmap_read(data->regmap_pmugrf, PX30_PMUGRF_OS_REG2, &val_2); ++ regmap_read(data->regmap_pmugrf, PX30_PMUGRF_OS_REG3, &val_3); ++ if (READ_SYSREG_VERSION(val_3) >= 0x3) ++ data->dram_type = READ_DRAMTYPE_INFO_V3(val_2, val_3); ++ else ++ data->dram_type = READ_DRAMTYPE_INFO(val_2); ++ data->ch_msk = 1; ++ data->clk = NULL; + -+struct dma_buf_cache_list { -+ struct list_head head; -+}; ++ desc->ops = &rockchip_dfi_ops; + -+struct dma_buf_cache { -+ struct list_head list; -+ struct dma_buf_attachment *attach; -+ enum dma_data_direction direction; -+ struct sg_table *sg_table; -+}; ++ return 0; ++} + -+static int dma_buf_cache_destructor(struct dma_buf *dmabuf, void *dtor_data) ++static __maybe_unused __init int rk3128_dfi_init(struct platform_device *pdev, ++ struct rockchip_dfi *data, ++ struct devfreq_event_desc *desc) +{ -+ struct dma_buf_cache_list *data; -+ struct dma_buf_cache *cache, *tmp; -+ -+ mutex_lock(&dmabuf->cache_lock); -+ -+ data = dmabuf->dtor_data; -+ -+ list_for_each_entry_safe(cache, tmp, &data->head, list) { -+ if (!IS_ERR_OR_NULL(cache->sg_table)) -+ dma_buf_unmap_attachment(cache->attach, -+ cache->sg_table, -+ cache->direction); ++ struct device_node *np = pdev->dev.of_node, *node; + -+ dma_buf_detach(dmabuf, cache->attach); -+ list_del(&cache->list); -+ kfree(cache); ++ node = of_parse_phandle(np, "rockchip,grf", 0); ++ if (node) { ++ data->regmap_grf = syscon_node_to_regmap(node); ++ if (IS_ERR(data->regmap_grf)) ++ return PTR_ERR(data->regmap_grf); + } + -+ mutex_unlock(&dmabuf->cache_lock); ++ desc->ops = &rk3128_dfi_ops; + -+ kfree(data); + return 0; +} + -+static struct dma_buf_cache * -+dma_buf_cache_get_cache(struct dma_buf_attachment *attach) ++static __maybe_unused __init int rk3288_dfi_init(struct platform_device *pdev, ++ struct rockchip_dfi *data, ++ struct devfreq_event_desc *desc) +{ -+ struct dma_buf_cache_list *data; -+ struct dma_buf_cache *cache; -+ struct dma_buf *dmabuf = attach->dmabuf; -+ -+ if (dmabuf->dtor != dma_buf_cache_destructor) -+ return NULL; -+ -+ data = dmabuf->dtor_data; ++ struct device_node *np = pdev->dev.of_node, *node; ++ u32 val; + -+ list_for_each_entry(cache, &data->head, list) { -+ if (cache->attach == attach) -+ return cache; ++ node = of_parse_phandle(np, "rockchip,pmu", 0); ++ if (node) { ++ data->regmap_pmu = syscon_node_to_regmap(node); ++ if (IS_ERR(data->regmap_pmu)) ++ return PTR_ERR(data->regmap_pmu); + } + -+ return NULL; -+} ++ node = of_parse_phandle(np, "rockchip,grf", 0); ++ if (node) { ++ data->regmap_grf = syscon_node_to_regmap(node); ++ if (IS_ERR(data->regmap_grf)) ++ return PTR_ERR(data->regmap_grf); ++ } + -+void dma_buf_cache_detach(struct dma_buf *dmabuf, -+ struct dma_buf_attachment *attach) -+{ -+ struct dma_buf_cache *cache; ++ regmap_read(data->regmap_pmu, RK3288_PMU_SYS_REG2, &val); ++ data->dram_type = READ_DRAMTYPE_INFO(val); ++ data->ch_msk = READ_CH_INFO(val); + -+ mutex_lock(&dmabuf->cache_lock); ++ if (data->dram_type == DDR3) ++ regmap_write(data->regmap_grf, RK3288_GRF_SOC_CON4, ++ RK3288_DDR3_SEL); ++ else ++ regmap_write(data->regmap_grf, RK3288_GRF_SOC_CON4, ++ RK3288_LPDDR_SEL); + -+ cache = dma_buf_cache_get_cache(attach); -+ if (!cache) -+ dma_buf_detach(dmabuf, attach); ++ desc->ops = &rk3288_dfi_ops; + -+ mutex_unlock(&dmabuf->cache_lock); ++ return 0; +} -+EXPORT_SYMBOL(dma_buf_cache_detach); + -+struct dma_buf_attachment *dma_buf_cache_attach(struct dma_buf *dmabuf, -+ struct device *dev) ++static __maybe_unused __init int rk3368_dfi_init(struct platform_device *pdev, ++ struct rockchip_dfi *data, ++ struct devfreq_event_desc *desc) +{ -+ struct dma_buf_attachment *attach; -+ struct dma_buf_cache_list *data; -+ struct dma_buf_cache *cache; -+ -+ mutex_lock(&dmabuf->cache_lock); ++ struct device *dev = &pdev->dev; + -+ if (!dmabuf->dtor) { -+ data = kzalloc(sizeof(*data), GFP_KERNEL); -+ if (!data) { -+ attach = ERR_PTR(-ENOMEM); -+ goto err_data; -+ } -+ INIT_LIST_HEAD(&data->head); -+ dma_buf_set_destructor(dmabuf, dma_buf_cache_destructor, data); -+ } ++ if (!dev->parent || !dev->parent->of_node) ++ return -EINVAL; + -+ if (dmabuf->dtor && dmabuf->dtor != dma_buf_cache_destructor) { -+ attach = dma_buf_attach(dmabuf, dev); -+ goto attach_done; -+ } ++ data->regmap_grf = syscon_node_to_regmap(dev->parent->of_node); ++ if (IS_ERR(data->regmap_grf)) ++ return PTR_ERR(data->regmap_grf); + -+ data = dmabuf->dtor_data; ++ desc->ops = &rk3368_dfi_ops; + -+ list_for_each_entry(cache, &data->head, list) { -+ if (cache->attach->dev == dev) { -+ /* Already attached */ -+ attach = cache->attach; -+ goto attach_done; -+ } -+ } ++ return 0; ++} + -+ cache = kzalloc(sizeof(*cache), GFP_KERNEL); -+ if (!cache) { -+ attach = ERR_PTR(-ENOMEM); -+ goto err_cache; ++static __maybe_unused __init int rockchip_dfi_init(struct platform_device *pdev, ++ struct rockchip_dfi *data, ++ struct devfreq_event_desc *desc) ++{ ++ struct device *dev = &pdev->dev; ++ struct device_node *np = pdev->dev.of_node, *node; ++ u32 val; + + data->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(data->regs)) +@@ -193,32 +674,132 @@ static int rockchip_dfi_probe(struct platform_device *pdev) + return dev_err_probe(dev, PTR_ERR(data->clk), + "Cannot get the clk pclk_ddr_mon\n"); + ++ /* try to find the optional reference to the pmu syscon */ + node = of_parse_phandle(np, "rockchip,pmu", 0); +- if (!node) +- return dev_err_probe(&pdev->dev, -ENODEV, "Can't find pmu_grf registers\n"); ++ if (node) { ++ data->regmap_pmu = syscon_node_to_regmap(node); ++ of_node_put(node); ++ if (IS_ERR(data->regmap_pmu)) ++ return PTR_ERR(data->regmap_pmu); + } -+ /* Cache attachment */ -+ attach = dma_buf_attach(dmabuf, dev); -+ if (IS_ERR_OR_NULL(attach)) -+ goto err_attach; + -+ cache->attach = attach; -+ list_add(&cache->list, &data->head); ++ regmap_read(data->regmap_pmu, PMUGRF_OS_REG2, &val); ++ data->dram_type = READ_DRAMTYPE_INFO(val); ++ data->ch_msk = READ_CH_INFO(val); + -+attach_done: -+ mutex_unlock(&dmabuf->cache_lock); -+ return attach; ++ desc->ops = &rockchip_dfi_ops; + -+err_attach: -+ kfree(cache); -+err_cache: -+ kfree(data); -+ dma_buf_set_destructor(dmabuf, NULL, NULL); -+err_data: -+ mutex_unlock(&dmabuf->cache_lock); -+ return attach; ++ return 0; +} -+EXPORT_SYMBOL(dma_buf_cache_attach); + -+void dma_buf_cache_unmap_attachment(struct dma_buf_attachment *attach, -+ struct sg_table *sg_table, -+ enum dma_data_direction direction) ++static __maybe_unused __init int rk3328_dfi_init(struct platform_device *pdev, ++ struct rockchip_dfi *data, ++ struct devfreq_event_desc *desc) +{ -+ struct dma_buf *dmabuf = attach->dmabuf; -+ struct dma_buf_cache *cache; ++ struct device_node *np = pdev->dev.of_node, *node; ++ struct resource *res; ++ u32 val; + -+ mutex_lock(&dmabuf->cache_lock); ++ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ data->regs = devm_ioremap_resource(&pdev->dev, res); ++ if (IS_ERR(data->regs)) ++ return PTR_ERR(data->regs); + -+ cache = dma_buf_cache_get_cache(attach); -+ if (!cache) -+ dma_buf_unmap_attachment(attach, sg_table, direction); ++ node = of_parse_phandle(np, "rockchip,grf", 0); ++ if (node) { ++ data->regmap_grf = syscon_node_to_regmap(node); ++ if (IS_ERR(data->regmap_grf)) ++ return PTR_ERR(data->regmap_grf); ++ } + -+ mutex_unlock(&dmabuf->cache_lock); ++ regmap_read(data->regmap_grf, RK3328_GRF_OS_REG2, &val); ++ data->dram_type = READ_DRAMTYPE_INFO(val); ++ data->ch_msk = 1; ++ data->clk = NULL; + +- data->regmap_pmu = syscon_node_to_regmap(node); +- of_node_put(node); +- if (IS_ERR(data->regmap_pmu)) +- return PTR_ERR(data->regmap_pmu); ++ desc->ops = &rockchip_dfi_ops; ++ ++ return 0; +} -+EXPORT_SYMBOL(dma_buf_cache_unmap_attachment); + +- data->dev = dev; ++static const struct of_device_id rockchip_dfi_id_match[] = { ++#ifdef CONFIG_CPU_PX30 ++ { .compatible = "rockchip,px30-dfi", .data = px30_dfi_init }, ++#endif ++#ifdef CONFIG_CPU_RK1808 ++ { .compatible = "rockchip,rk1808-dfi", .data = px30_dfi_init }, ++#endif ++#ifdef CONFIG_CPU_RK312X ++ { .compatible = "rockchip,rk3128-dfi", .data = rk3128_dfi_init }, ++#endif ++#ifdef CONFIG_CPU_RK3288 ++ { .compatible = "rockchip,rk3288-dfi", .data = rk3288_dfi_init }, ++#endif ++#ifdef CONFIG_CPU_RK3328 ++ { .compatible = "rockchip,rk3328-dfi", .data = rk3328_dfi_init }, ++#endif ++#ifdef CONFIG_CPU_RK3368 ++ { .compatible = "rockchip,rk3368-dfi", .data = rk3368_dfi_init }, ++#endif ++#ifdef CONFIG_CPU_RK3399 ++ { .compatible = "rockchip,rk3399-dfi", .data = rockchip_dfi_init }, ++#endif ++#ifdef CONFIG_CPU_RK3562 ++ { .compatible = "rockchip,rk3562-dfi", .data = px30_dfi_init }, ++#endif ++#ifdef CONFIG_CPU_RK3568 ++ { .compatible = "rockchip,rk3568-dfi", .data = px30_dfi_init }, ++#endif ++#ifdef CONFIG_CPU_RK3588 ++ { .compatible = "rockchip,rk3588-dfi", .data = rk3588_dfi_init }, ++#endif ++#ifdef CONFIG_CPU_RV1126 ++ { .compatible = "rockchip,rv1126-dfi", .data = px30_dfi_init }, ++#endif ++ { }, ++}; + -+struct sg_table *dma_buf_cache_map_attachment(struct dma_buf_attachment *attach, -+ enum dma_data_direction direction) ++static int rockchip_dfi_probe(struct platform_device *pdev) +{ -+ struct dma_buf *dmabuf = attach->dmabuf; -+ struct dma_buf_cache *cache; -+ struct sg_table *sg_table; -+ -+ mutex_lock(&dmabuf->cache_lock); ++ struct device *dev = &pdev->dev; ++ struct rockchip_dfi *data; ++ struct devfreq_event_desc *desc; ++ struct device_node *np = pdev->dev.of_node; ++ const struct of_device_id *match; ++ int (*init)(struct platform_device *pdev, struct rockchip_dfi *data, ++ struct devfreq_event_desc *desc); + -+ cache = dma_buf_cache_get_cache(attach); -+ if (!cache) { -+ sg_table = dma_buf_map_attachment(attach, direction); -+ goto map_done; -+ } -+ if (cache->sg_table) { -+ /* Already mapped */ -+ if (cache->direction == direction) { -+ sg_table = cache->sg_table; -+ goto map_done; ++ data = devm_kzalloc(dev, sizeof(struct rockchip_dfi), GFP_KERNEL); ++ if (!data) ++ return -ENOMEM; + + desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL); + if (!desc) + return -ENOMEM; + +- desc->ops = &rockchip_dfi_ops; ++ match = of_match_node(rockchip_dfi_id_match, pdev->dev.of_node); ++ if (match) { ++ init = match->data; ++ if (init) { ++ if (init(pdev, data, desc)) ++ return -EINVAL; ++ } else { ++ return 0; + } -+ /* Different directions */ -+ dma_buf_unmap_attachment(attach, cache->sg_table, -+ cache->direction); ++ } else { ++ return 0; + } + -+ /* Cache map */ -+ sg_table = dma_buf_map_attachment(attach, direction); -+ cache->sg_table = sg_table; -+ cache->direction = direction; -+ -+map_done: -+ mutex_unlock(&dmabuf->cache_lock); -+ return sg_table; -+} -+EXPORT_SYMBOL(dma_buf_cache_map_attachment); -diff --git a/drivers/dma-buf/dma-buf-rk.c b/drivers/dma-buf/dma-buf-rk.c + desc->driver_data = data; + desc->name = np->name; +- data->desc = desc; + +- data->edev = devm_devfreq_event_add_edev(&pdev->dev, desc); ++ data->edev = devm_devfreq_event_add_edev(dev, desc); + if (IS_ERR(data->edev)) { +- dev_err(&pdev->dev, +- "failed to add devfreq-event device\n"); ++ dev_err(dev, "failed to add devfreq-event device\n"); + return PTR_ERR(data->edev); + } ++ data->desc = desc; ++ data->dev = &pdev->dev; + + platform_set_drvdata(pdev, data); + +diff --git a/drivers/devfreq/event/rockchip-nocp.c b/drivers/devfreq/event/rockchip-nocp.c new file mode 100644 -index 000000000..c2f9f3edd +index 000000000..954a27d3f --- /dev/null -+++ b/drivers/dma-buf/dma-buf-rk.c -@@ -0,0 +1,1937 @@ -+// SPDX-License-Identifier: GPL-2.0-only ++++ b/drivers/devfreq/event/rockchip-nocp.c +@@ -0,0 +1,210 @@ +/* -+ * Framework for buffer objects that can be shared across devices/subsystems. ++ * Copyright (c) 2016, Fuzhou Rockchip Electronics Co., Ltd + * -+ * Copyright(C) 2011 Linaro Limited. All rights reserved. -+ * Author: Sumit Semwal ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. + * -+ * Many thanks to linaro-mm-sig list, and specially -+ * Arnd Bergmann , Rob Clark and -+ * Daniel Vetter for their support in creation and -+ * refining of this idea. ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. + */ + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++#include ++#include ++#include ++#include +#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++#include ++#include ++#include + -+#include -+#include ++#define EVENT_BYTE 0x08 ++#define EVENT_CHAIN 0x10 + -+#include "dma-buf-sysfs-stats.h" -+#include "dma-buf-process-info.h" ++#define START_EN BIT(3) ++#define GLOBAL_EN BIT(0) ++#define START_GO BIT(0) + -+static inline int is_dma_buf_file(struct file *); ++#define PROBE_MAINCTL 0x0008 ++#define PROBE_CFGCTL 0x000c ++#define PROBE_STATPERIOD 0x0024 ++#define PROBE_STATGO 0x0028 + -+struct dma_buf_list { -+ struct list_head head; -+ struct mutex lock; ++struct nocp_info { ++ u32 counter0_src; ++ u32 counter0_val; ++ u32 counter1_src; ++ u32 counter1_val; +}; + -+static struct dma_buf_list db_list; ++struct rockchip_nocp { ++ void __iomem *reg_base; ++ struct device *dev; ++ struct devfreq_event_dev *edev; ++ struct devfreq_event_desc *desc; ++ const struct nocp_info *info; ++ ktime_t time; ++}; + -+/** -+ * dma_buf_get_each - Helps in traversing the db_list and calls the -+ * callback function which can extract required info out of each -+ * dmabuf. -+ * The db_list needs to be locked to prevent the db_list from being -+ * dynamically updated during the traversal process. -+ * -+ * @callback: [in] Handle for each dmabuf buffer in db_list. -+ * @private: [in] User-defined, used to pass in when callback is -+ * called. -+ * -+ * Returns 0 on success, otherwise returns a non-zero value for -+ * mutex_lock_interruptible or callback. -+ */ -+int dma_buf_get_each(int (*callback)(const struct dma_buf *dmabuf, -+ void *private), void *private) ++static int rockchip_nocp_enable(struct devfreq_event_dev *edev) +{ -+ struct dma_buf *buf; -+ int ret = mutex_lock_interruptible(&db_list.lock); -+ -+ if (ret) -+ return ret; ++ struct rockchip_nocp *nocp = devfreq_event_get_drvdata(edev); ++ const struct nocp_info *info = nocp->info; ++ void __iomem *reg_base = nocp->reg_base; + -+ list_for_each_entry(buf, &db_list.head, list_node) { -+ ret = callback(buf, private); -+ if (ret) -+ break; -+ } -+ mutex_unlock(&db_list.lock); -+ return ret; -+} -+EXPORT_SYMBOL_NS_GPL(dma_buf_get_each, MINIDUMP); ++ writel_relaxed(GLOBAL_EN, reg_base + PROBE_CFGCTL); ++ writel_relaxed(START_EN, reg_base + PROBE_MAINCTL); ++ writel_relaxed(0, reg_base + PROBE_STATPERIOD); ++ writel_relaxed(EVENT_BYTE, reg_base + info->counter0_src); ++ writel_relaxed(EVENT_CHAIN, reg_base + info->counter1_src); ++ writel_relaxed(START_GO, reg_base + PROBE_STATGO); + -+#if IS_ENABLED(CONFIG_RK_DMABUF_DEBUG) -+static size_t db_total_size; -+static size_t db_peak_size; ++ nocp->time = ktime_get(); + -+void dma_buf_reset_peak_size(void) -+{ -+ mutex_lock(&db_list.lock); -+ db_peak_size = 0; -+ mutex_unlock(&db_list.lock); ++ return 0; +} -+EXPORT_SYMBOL_GPL(dma_buf_reset_peak_size); + -+size_t dma_buf_get_peak_size(void) ++static int rockchip_nocp_disable(struct devfreq_event_dev *edev) +{ -+ size_t sz; ++ struct rockchip_nocp *nocp = devfreq_event_get_drvdata(edev); ++ const struct nocp_info *info = nocp->info; ++ void __iomem *reg_base = nocp->reg_base; + -+ mutex_lock(&db_list.lock); -+ sz = db_peak_size; -+ mutex_unlock(&db_list.lock); ++ writel_relaxed(0, reg_base + PROBE_STATGO); ++ writel_relaxed(0, reg_base + PROBE_MAINCTL); ++ writel_relaxed(0, reg_base + PROBE_CFGCTL); ++ writel_relaxed(0, reg_base + info->counter0_src); ++ writel_relaxed(0, reg_base + info->counter1_src); + -+ return sz; ++ return 0; +} -+EXPORT_SYMBOL_GPL(dma_buf_get_peak_size); + -+size_t dma_buf_get_total_size(void) ++static int rockchip_nocp_get_event(struct devfreq_event_dev *edev, ++ struct devfreq_event_data *edata) +{ -+ size_t sz; -+ -+ mutex_lock(&db_list.lock); -+ sz = db_total_size; -+ mutex_unlock(&db_list.lock); ++ struct rockchip_nocp *nocp = devfreq_event_get_drvdata(edev); ++ const struct nocp_info *info = nocp->info; ++ void __iomem *reg_base = nocp->reg_base; ++ u32 counter = 0, counter0 = 0, counter1 = 0; ++ int time_ms = 0; + -+ return sz; -+} -+EXPORT_SYMBOL_GPL(dma_buf_get_total_size); -+#endif ++ time_ms = ktime_to_ms(ktime_sub(ktime_get(), nocp->time)); + -+static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen) -+{ -+ struct dma_buf *dmabuf; -+ char name[DMA_BUF_NAME_LEN]; -+ size_t ret = 0; ++ counter0 = readl_relaxed(reg_base + info->counter0_val); ++ counter1 = readl_relaxed(reg_base + info->counter1_val); ++ counter = (counter0 & 0xffff) | ((counter1 & 0xffff) << 16); ++ counter = counter / 1000000; ++ if (time_ms > 0) ++ edata->load_count = (counter * 1000) / time_ms; + -+ dmabuf = dentry->d_fsdata; -+ spin_lock(&dmabuf->name_lock); -+ if (dmabuf->name) -+ ret = strlcpy(name, dmabuf->name, DMA_BUF_NAME_LEN); -+ spin_unlock(&dmabuf->name_lock); ++ writel_relaxed(START_GO, reg_base + PROBE_STATGO); ++ nocp->time = ktime_get(); + -+ return dynamic_dname(buffer, buflen, "/%s:%s", -+ dentry->d_name.name, ret > 0 ? name : ""); ++ return 0; +} + -+static void dma_buf_release(struct dentry *dentry) ++static int rockchip_nocp_set_event(struct devfreq_event_dev *edev) +{ -+ struct dma_buf *dmabuf; -+#ifdef CONFIG_DMABUF_CACHE -+ int dtor_ret = 0; -+#endif -+ -+ dmabuf = dentry->d_fsdata; -+ if (unlikely(!dmabuf)) -+ return; -+ -+ BUG_ON(dmabuf->vmapping_counter); -+ -+ /* -+ * If you hit this BUG() it could mean: -+ * * There's a file reference imbalance in dma_buf_poll / dma_buf_poll_cb or somewhere else -+ * * dmabuf->cb_in/out.active are non-0 despite no pending fence callback -+ */ -+ BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active); ++ return 0; ++} + -+ dma_buf_stats_teardown(dmabuf); -+#ifdef CONFIG_DMABUF_CACHE -+ if (dmabuf->dtor) -+ dtor_ret = dmabuf->dtor(dmabuf, dmabuf->dtor_data); ++static const struct devfreq_event_ops rockchip_nocp_ops = { ++ .disable = rockchip_nocp_disable, ++ .enable = rockchip_nocp_enable, ++ .get_event = rockchip_nocp_get_event, ++ .set_event = rockchip_nocp_set_event, ++}; + -+ if (!dtor_ret) -+#endif -+ dmabuf->ops->release(dmabuf); ++static const struct nocp_info rk3288_nocp = { ++ .counter0_src = 0x138, ++ .counter0_val = 0x13c, ++ .counter1_src = 0x14c, ++ .counter1_val = 0x150, ++}; + -+ if (dmabuf->resv == (struct dma_resv *)&dmabuf[1]) -+ dma_resv_fini(dmabuf->resv); ++static const struct nocp_info rk3568_nocp = { ++ .counter0_src = 0x204, ++ .counter0_val = 0x20c, ++ .counter1_src = 0x214, ++ .counter1_val = 0x21c, ++}; + -+ WARN_ON(!list_empty(&dmabuf->attachments)); -+ module_put(dmabuf->owner); -+ kfree(dmabuf->name); -+ kfree(dmabuf); -+} ++static const struct of_device_id rockchip_nocp_id_match[] = { ++ { ++ .compatible = "rockchip,rk3288-nocp", ++ .data = (void *)&rk3288_nocp, ++ }, ++ { ++ .compatible = "rockchip,rk3368-nocp", ++ .data = (void *)&rk3288_nocp, ++ }, ++ { ++ .compatible = "rockchip,rk3399-nocp", ++ .data = (void *)&rk3288_nocp, ++ }, ++ { ++ .compatible = "rockchip,rk3568-nocp", ++ .data = (void *)&rk3568_nocp, ++ }, ++ { }, ++}; + -+static int dma_buf_file_release(struct inode *inode, struct file *file) ++static int rockchip_nocp_probe(struct platform_device *pdev) +{ -+ struct dma_buf *dmabuf; -+ -+ if (!is_dma_buf_file(file)) -+ return -EINVAL; ++ struct resource *res; ++ struct rockchip_nocp *nocp; ++ struct devfreq_event_desc *desc; ++ struct device_node *np = pdev->dev.of_node; ++ const struct of_device_id *match; + -+ dmabuf = file->private_data; -+ if (dmabuf) { -+ mutex_lock(&db_list.lock); -+#if IS_ENABLED(CONFIG_RK_DMABUF_DEBUG) -+ db_total_size -= dmabuf->size; -+#endif -+ list_del(&dmabuf->list_node); -+ mutex_unlock(&db_list.lock); ++ match = of_match_device(rockchip_nocp_id_match, &pdev->dev); ++ if (!match || !match->data) { ++ dev_err(&pdev->dev, "missing nocp data\n"); ++ return -ENODEV; + } + -+ return 0; -+} -+ -+static const struct dentry_operations dma_buf_dentry_ops = { -+ .d_dname = dmabuffs_dname, -+ .d_release = dma_buf_release, -+}; ++ nocp = devm_kzalloc(&pdev->dev, sizeof(*nocp), GFP_KERNEL); ++ if (!nocp) ++ return -ENOMEM; + -+static struct vfsmount *dma_buf_mnt; ++ nocp->info = match->data; + -+static int dma_buf_fs_init_context(struct fs_context *fc) -+{ -+ struct pseudo_fs_context *ctx; ++ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ nocp->reg_base = devm_ioremap_resource(&pdev->dev, res); ++ if (IS_ERR(nocp->reg_base)) ++ return PTR_ERR(nocp->reg_base); + -+ ctx = init_pseudo(fc, DMA_BUF_MAGIC); -+ if (!ctx) ++ desc = devm_kzalloc(&pdev->dev, sizeof(*desc), GFP_KERNEL); ++ if (!desc) + return -ENOMEM; -+ ctx->dops = &dma_buf_dentry_ops; -+ return 0; -+} -+ -+static struct file_system_type dma_buf_fs_type = { -+ .name = "dmabuf", -+ .init_fs_context = dma_buf_fs_init_context, -+ .kill_sb = kill_anon_super, -+}; + -+static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma) -+{ -+ struct dma_buf *dmabuf; ++ desc->ops = &rockchip_nocp_ops; ++ desc->driver_data = nocp; ++ desc->name = np->name; ++ nocp->desc = desc; ++ nocp->dev = &pdev->dev; ++ nocp->edev = devm_devfreq_event_add_edev(&pdev->dev, desc); ++ if (IS_ERR(nocp->edev)) { ++ dev_err(&pdev->dev, "failed to add devfreq-event device\n"); ++ return PTR_ERR(nocp->edev); ++ } + -+ if (!is_dma_buf_file(file)) -+ return -EINVAL; ++ platform_set_drvdata(pdev, nocp); + -+ dmabuf = file->private_data; ++ return 0; ++} + -+ /* check if buffer supports mmap */ -+ if (!dmabuf->ops->mmap) -+ return -EINVAL; ++static struct platform_driver rockchip_nocp_driver = { ++ .probe = rockchip_nocp_probe, ++ .driver = { ++ .name = "rockchip-nocp", ++ .of_match_table = rockchip_nocp_id_match, ++ }, ++}; ++module_platform_driver(rockchip_nocp_driver); + -+ /* check for overflowing the buffer's size */ -+ if (vma->vm_pgoff + vma_pages(vma) > -+ dmabuf->size >> PAGE_SHIFT) -+ return -EINVAL; ++MODULE_DESCRIPTION("Rockchip NoC (Network on Chip) Probe driver"); ++MODULE_AUTHOR("Finley Xiao "); ++MODULE_LICENSE("GPL v2"); +diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c +index daff40702..fd2c5ffed 100644 +--- a/drivers/devfreq/rk3399_dmc.c ++++ b/drivers/devfreq/rk3399_dmc.c +@@ -22,6 +22,7 @@ + #include + + #include ++#include + #include + #include + +@@ -381,17 +382,16 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) + } + + regmap_read(data->regmap_pmu, RK3399_PMUGRF_OS_REG2, &val); +- ddr_type = (val >> RK3399_PMUGRF_DDRTYPE_SHIFT) & +- RK3399_PMUGRF_DDRTYPE_MASK; ++ ddr_type = FIELD_GET(RK3399_PMUGRF_OS_REG2_DDRTYPE, val); + + switch (ddr_type) { +- case RK3399_PMUGRF_DDRTYPE_DDR3: ++ case ROCKCHIP_DDRTYPE_DDR3: + data->odt_dis_freq = data->ddr3_odt_dis_freq; + break; +- case RK3399_PMUGRF_DDRTYPE_LPDDR3: ++ case ROCKCHIP_DDRTYPE_LPDDR3: + data->odt_dis_freq = data->lpddr3_odt_dis_freq; + break; +- case RK3399_PMUGRF_DDRTYPE_LPDDR4: ++ case ROCKCHIP_DDRTYPE_LPDDR4: + data->odt_dis_freq = data->lpddr4_odt_dis_freq; + break; + default: +diff --git a/drivers/devfreq/rockchip_bus.c b/drivers/devfreq/rockchip_bus.c +new file mode 100644 +index 000000000..0f5487eae +--- /dev/null ++++ b/drivers/devfreq/rockchip_bus.c +@@ -0,0 +1,548 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (c) 2018, Fuzhou Rockchip Electronics Co., Ltd. ++ * Author: Tony Xie ++ */ + -+ return dmabuf->ops->mmap(dmabuf, vma); -+} ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+static loff_t dma_buf_llseek(struct file *file, loff_t offset, int whence) -+{ -+ struct dma_buf *dmabuf; -+ loff_t base; ++#define CLUSTER0 0 ++#define CLUSTER1 1 ++#define MAX_CLUSTERS 2 + -+ if (!is_dma_buf_file(file)) -+ return -EBADF; ++#define to_rockchip_bus_clk_nb(nb) \ ++ container_of(nb, struct rockchip_bus, clk_nb) ++#define to_rockchip_bus_cpufreq_nb(nb) \ ++ container_of(nb, struct rockchip_bus, cpufreq_nb) + -+ dmabuf = file->private_data; ++struct busfreq_table { ++ unsigned long freq; ++ unsigned long volt; ++}; + -+ /* only support discovering the end of the buffer, -+ but also allow SEEK_SET to maintain the idiomatic -+ SEEK_END(0), SEEK_CUR(0) pattern */ -+ if (whence == SEEK_END) -+ base = dmabuf->size; -+ else if (whence == SEEK_SET) -+ base = 0; -+ else -+ return -EINVAL; ++struct rockchip_bus { ++ struct device *dev; ++ struct regulator *regulator; ++ struct clk *clk; ++ struct notifier_block clk_nb; ++ struct notifier_block cpufreq_nb; ++ struct busfreq_table *freq_table; ++ struct rockchip_opp_info opp_info; + -+ if (offset != 0) -+ return -EINVAL; ++ unsigned int max_state; + -+ return base + offset; -+} ++ unsigned long cur_volt; ++ unsigned long cur_rate; + -+/** -+ * DOC: implicit fence polling -+ * -+ * To support cross-device and cross-driver synchronization of buffer access -+ * implicit fences (represented internally in the kernel with &struct dma_fence) -+ * can be attached to a &dma_buf. The glue for that and a few related things are -+ * provided in the &dma_resv structure. -+ * -+ * Userspace can query the state of these implicitly tracked fences using poll() -+ * and related system calls: -+ * -+ * - Checking for EPOLLIN, i.e. read access, can be use to query the state of the -+ * most recent write or exclusive fence. -+ * -+ * - Checking for EPOLLOUT, i.e. write access, can be used to query the state of -+ * all attached fences, shared and exclusive ones. -+ * -+ * Note that this only signals the completion of the respective fences, i.e. the -+ * DMA transfers are complete. Cache flushing and any other necessary -+ * preparations before CPU access can begin still need to happen. -+ * -+ * As an alternative to poll(), the set of fences on DMA buffer can be -+ * exported as a &sync_file using &dma_buf_sync_file_export. -+ */ ++ /* ++ * Busfreq-policy-cpufreq: ++ * If the cpu frequency of two clusters are both less than or equal to ++ * cpu_high_freq, change bus rate to low_rate, otherwise change it to ++ * high_rate. ++ */ ++ unsigned long high_rate; ++ unsigned long low_rate; ++ unsigned int cpu_high_freq; ++ unsigned int cpu_freq[MAX_CLUSTERS]; ++}; + -+static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb) ++static int rockchip_sip_bus_smc_config(u32 bus_id, u32 cfg, u32 enable_msk) +{ -+ struct dma_buf_poll_cb_t *dcb = (struct dma_buf_poll_cb_t *)cb; -+ struct dma_buf *dmabuf = container_of(dcb->poll, struct dma_buf, poll); -+ unsigned long flags; ++ struct arm_smccc_res res; + -+ spin_lock_irqsave(&dcb->poll->lock, flags); -+ wake_up_locked_poll(dcb->poll, dcb->active); -+ dcb->active = 0; -+ spin_unlock_irqrestore(&dcb->poll->lock, flags); -+ dma_fence_put(fence); -+ /* Paired with get_file in dma_buf_poll */ -+ fput(dmabuf->file); ++ res = sip_smc_bus_config(bus_id, cfg, enable_msk); ++ ++ return res.a0; +} + -+static bool dma_buf_poll_add_cb(struct dma_resv *resv, bool write, -+ struct dma_buf_poll_cb_t *dcb) ++static int rockchip_bus_smc_config(struct rockchip_bus *bus) +{ -+ struct dma_resv_iter cursor; -+ struct dma_fence *fence; -+ int r; -+ -+ dma_resv_for_each_fence(&cursor, resv, dma_resv_usage_rw(write), -+ fence) { -+ dma_fence_get(fence); -+ r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb); -+ if (!r) -+ return true; -+ dma_fence_put(fence); -+ } ++ struct device *dev = bus->dev; ++ struct device_node *np = dev->of_node; ++ struct device_node *child; ++ unsigned int enable_msk, bus_id, cfg; ++ char *prp_name = "rockchip,soc-bus-table"; ++ u32 *table = NULL; ++ int ret = 0, config_cnt, i; + -+ return false; -+} ++ for_each_available_child_of_node(np, child) { ++ ret = of_property_read_u32_index(child, "bus-id", 0, ++ &bus_id); ++ if (ret) ++ continue; + -+static __poll_t dma_buf_poll(struct file *file, poll_table *poll) -+{ -+ struct dma_buf *dmabuf; -+ struct dma_resv *resv; -+ __poll_t events; ++ ret = of_property_read_u32_index(child, "cfg-val", 0, ++ &cfg); ++ if (ret) { ++ dev_info(dev, "get cfg-val error\n"); ++ continue; ++ } + -+ dmabuf = file->private_data; -+ if (!dmabuf || !dmabuf->resv) -+ return EPOLLERR; ++ if (!cfg) { ++ dev_info(dev, "cfg-val invalid\n"); ++ continue; ++ } + -+ resv = dmabuf->resv; ++ ret = of_property_read_u32_index(child, "enable-msk", 0, ++ &enable_msk); ++ if (ret) { ++ dev_info(dev, "get enable_msk error\n"); ++ continue; ++ } + -+ poll_wait(file, &dmabuf->poll, poll); ++ ret = rockchip_sip_bus_smc_config(bus_id, cfg, ++ enable_msk); ++ if (ret) { ++ dev_info(dev, "bus smc config error: %x!\n", ret); ++ break; ++ } ++ } + -+ events = poll_requested_events(poll) & (EPOLLIN | EPOLLOUT); -+ if (!events) ++ config_cnt = of_property_count_u32_elems(np, prp_name); ++ if (config_cnt <= 0) { + return 0; ++ } else if (config_cnt % 3) { ++ dev_err(dev, "Invalid count of %s\n", prp_name); ++ return -EINVAL; ++ } + -+ dma_resv_lock(resv, NULL); ++ table = kmalloc_array(config_cnt, sizeof(u32), GFP_KERNEL); ++ if (!table) ++ return -ENOMEM; + -+ if (events & EPOLLOUT) { -+ struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_out; ++ ret = of_property_read_u32_array(np, prp_name, table, config_cnt); ++ if (ret) { ++ dev_err(dev, "get %s error\n", prp_name); ++ goto free_table; ++ } + -+ /* Check that callback isn't busy */ -+ spin_lock_irq(&dmabuf->poll.lock); -+ if (dcb->active) -+ events &= ~EPOLLOUT; -+ else -+ dcb->active = EPOLLOUT; -+ spin_unlock_irq(&dmabuf->poll.lock); ++ /* table[3n]: bus_id ++ * table[3n + 1]: config ++ * table[3n + 2]: enable_mask ++ */ ++ for (i = 0; i < config_cnt; i += 3) { ++ bus_id = table[i]; ++ cfg = table[i + 1]; ++ enable_msk = table[i + 2]; + -+ if (events & EPOLLOUT) { -+ /* Paired with fput in dma_buf_poll_cb */ -+ get_file(dmabuf->file); ++ if (!cfg) { ++ dev_info(dev, "cfg-val invalid in %s-%d\n", prp_name, bus_id); ++ continue; ++ } + -+ if (!dma_buf_poll_add_cb(resv, true, dcb)) -+ /* No callback queued, wake up any other waiters */ -+ dma_buf_poll_cb(NULL, &dcb->cb); -+ else -+ events &= ~EPOLLOUT; ++ ret = rockchip_sip_bus_smc_config(bus_id, cfg, enable_msk); ++ if (ret) { ++ dev_err(dev, "bus smc config error: %x!\n", ret); ++ goto free_table; + } + } + -+ if (events & EPOLLIN) { -+ struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_in; ++free_table: ++ kfree(table); + -+ /* Check that callback isn't busy */ -+ spin_lock_irq(&dmabuf->poll.lock); -+ if (dcb->active) -+ events &= ~EPOLLIN; -+ else -+ dcb->active = EPOLLIN; -+ spin_unlock_irq(&dmabuf->poll.lock); ++ return ret; ++} + -+ if (events & EPOLLIN) { -+ /* Paired with fput in dma_buf_poll_cb */ -+ get_file(dmabuf->file); ++static int rockchip_bus_set_freq_table(struct rockchip_bus *bus) ++{ ++ struct device *dev = bus->dev; ++ struct dev_pm_opp *opp; ++ unsigned long freq; ++ int i, count; + -+ if (!dma_buf_poll_add_cb(resv, false, dcb)) -+ /* No callback queued, wake up any other waiters */ -+ dma_buf_poll_cb(NULL, &dcb->cb); -+ else -+ events &= ~EPOLLIN; ++ count = dev_pm_opp_get_opp_count(dev); ++ if (count <= 0) ++ return -EINVAL; ++ ++ bus->max_state = count; ++ bus->freq_table = devm_kcalloc(dev, ++ bus->max_state, ++ sizeof(*bus->freq_table), ++ GFP_KERNEL); ++ if (!bus->freq_table) { ++ bus->max_state = 0; ++ return -ENOMEM; ++ } ++ ++ for (i = 0, freq = 0; i < bus->max_state; i++, freq++) { ++ opp = dev_pm_opp_find_freq_ceil(dev, &freq); ++ if (IS_ERR(opp)) { ++ devm_kfree(dev, bus->freq_table); ++ bus->max_state = 0; ++ return PTR_ERR(opp); + } ++ bus->freq_table[i].volt = dev_pm_opp_get_voltage(opp); ++ bus->freq_table[i].freq = freq; ++ dev_pm_opp_put(opp); + } + -+ dma_resv_unlock(resv); -+ return events; ++ return 0; +} + -+static long _dma_buf_set_name(struct dma_buf *dmabuf, const char *name) ++static int rockchip_bus_power_control_init(struct rockchip_bus *bus) +{ -+ spin_lock(&dmabuf->name_lock); -+ kfree(dmabuf->name); -+ dmabuf->name = name; -+ spin_unlock(&dmabuf->name_lock); ++ struct device *dev = bus->dev; ++ int ret = 0; + -+ return 0; -+} ++ bus->clk = devm_clk_get(dev, "bus"); ++ if (IS_ERR(bus->clk)) { ++ dev_err(dev, "failed to get bus clock\n"); ++ return PTR_ERR(bus->clk); ++ } + -+/** -+ * dma_buf_set_name - Set a name to a specific dma_buf to track the usage. -+ * It could support changing the name of the dma-buf if the same -+ * piece of memory is used for multiple purpose between different devices. -+ * -+ * @dmabuf: [in] dmabuf buffer that will be renamed. -+ * @buf: [in] A piece of userspace memory that contains the name of -+ * the dma-buf. -+ * -+ * Returns 0 on success. If the dma-buf buffer is already attached to -+ * devices, return -EBUSY. -+ * -+ */ -+long dma_buf_set_name(struct dma_buf *dmabuf, const char *name) -+{ -+ long ret = 0; -+ char *buf = kstrndup(name, DMA_BUF_NAME_LEN, GFP_KERNEL); ++ bus->regulator = devm_regulator_get(dev, "bus"); ++ if (IS_ERR(bus->regulator)) { ++ dev_err(dev, "failed to get bus regulator\n"); ++ return PTR_ERR(bus->regulator); ++ } + -+ if (!buf) -+ return -ENOMEM; ++ ret = rockchip_init_opp_table(dev, &bus->opp_info, NULL, "pvtm"); ++ if (ret < 0) { ++ dev_err(dev, "failed to get OPP table\n"); ++ return ret; ++ } + -+ ret = _dma_buf_set_name(dmabuf, buf); -+ if (ret) -+ kfree(buf); ++ ret = rockchip_bus_set_freq_table(bus); ++ if (ret < 0) { ++ dev_err(dev, "failed to set bus freq table\n"); ++ return ret; ++ } + -+ return ret; ++ return 0; +} -+EXPORT_SYMBOL_GPL(dma_buf_set_name); + -+static long dma_buf_set_name_user(struct dma_buf *dmabuf, const char __user *buf) ++static int rockchip_bus_clkfreq_target(struct device *dev, unsigned long freq) +{ -+ char *name = strndup_user(buf, DMA_BUF_NAME_LEN); -+ long ret; ++ struct rockchip_bus *bus = dev_get_drvdata(dev); ++ unsigned long target_volt = bus->freq_table[bus->max_state - 1].volt; ++ int i; + -+ if (IS_ERR(name)) -+ return PTR_ERR(name); ++ for (i = 0; i < bus->max_state; i++) { ++ if (freq <= bus->freq_table[i].freq) { ++ target_volt = bus->freq_table[i].volt; ++ break; ++ } ++ } + -+ ret = _dma_buf_set_name(dmabuf, name); -+ if (ret) -+ kfree(name); ++ if (bus->cur_volt != target_volt) { ++ dev_dbg(bus->dev, "target_volt: %lu\n", target_volt); ++ if (regulator_set_voltage(bus->regulator, target_volt, ++ INT_MAX)) { ++ dev_err(dev, "failed to set voltage %lu uV\n", ++ target_volt); ++ return -EINVAL; ++ } ++ bus->cur_volt = target_volt; ++ } + -+ return ret; ++ return 0; +} + -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+static long dma_buf_export_sync_file(struct dma_buf *dmabuf, -+ void __user *user_data) ++static int rockchip_bus_clk_notifier(struct notifier_block *nb, ++ unsigned long event, void *data) +{ -+ struct dma_buf_export_sync_file arg; -+ enum dma_resv_usage usage; -+ struct dma_fence *fence = NULL; -+ struct sync_file *sync_file; -+ int fd, ret; ++ struct clk_notifier_data *ndata = data; ++ struct rockchip_bus *bus = to_rockchip_bus_clk_nb(nb); ++ int ret = 0; + -+ if (copy_from_user(&arg, user_data, sizeof(arg))) -+ return -EFAULT; -+ -+ if (arg.flags & ~DMA_BUF_SYNC_RW) -+ return -EINVAL; -+ -+ if ((arg.flags & DMA_BUF_SYNC_RW) == 0) -+ return -EINVAL; -+ -+ fd = get_unused_fd_flags(O_CLOEXEC); -+ if (fd < 0) -+ return fd; -+ -+ usage = dma_resv_usage_rw(arg.flags & DMA_BUF_SYNC_WRITE); -+ ret = dma_resv_get_singleton(dmabuf->resv, usage, &fence); -+ if (ret) -+ goto err_put_fd; ++ dev_dbg(bus->dev, "event %lu, old_rate %lu, new_rate: %lu\n", ++ event, ndata->old_rate, ndata->new_rate); + -+ if (!fence) -+ fence = dma_fence_get_stub(); ++ switch (event) { ++ case PRE_RATE_CHANGE: ++ if (ndata->new_rate > ndata->old_rate) ++ ret = rockchip_bus_clkfreq_target(bus->dev, ++ ndata->new_rate); ++ break; ++ case POST_RATE_CHANGE: ++ if (ndata->new_rate < ndata->old_rate) ++ ret = rockchip_bus_clkfreq_target(bus->dev, ++ ndata->new_rate); ++ break; ++ case ABORT_RATE_CHANGE: ++ if (ndata->new_rate > ndata->old_rate) ++ ret = rockchip_bus_clkfreq_target(bus->dev, ++ ndata->old_rate); ++ break; ++ default: ++ break; ++ } + -+ sync_file = sync_file_create(fence); ++ return notifier_from_errno(ret); ++} + -+ dma_fence_put(fence); ++static int rockchip_bus_clkfreq(struct rockchip_bus *bus) ++{ ++ struct device *dev = bus->dev; ++ unsigned long init_rate; ++ int ret = 0; + -+ if (!sync_file) { -+ ret = -ENOMEM; -+ goto err_put_fd; ++ ret = rockchip_bus_power_control_init(bus); ++ if (ret) { ++ dev_err(dev, "failed to init power control\n"); ++ return ret; + } + -+ arg.fd = fd; -+ if (copy_to_user(user_data, &arg, sizeof(arg))) { -+ ret = -EFAULT; -+ goto err_put_file; -+ } ++ init_rate = clk_get_rate(bus->clk); ++ ret = rockchip_bus_clkfreq_target(dev, init_rate); ++ if (ret) ++ return ret; + -+ fd_install(fd, sync_file->file); ++ bus->clk_nb.notifier_call = rockchip_bus_clk_notifier; ++ ret = clk_notifier_register(bus->clk, &bus->clk_nb); ++ if (ret) { ++ dev_err(dev, "failed to register clock notifier\n"); ++ return ret; ++ } + + return 0; -+ -+err_put_file: -+ fput(sync_file->file); -+err_put_fd: -+ put_unused_fd(fd); -+ return ret; +} + -+static long dma_buf_import_sync_file(struct dma_buf *dmabuf, -+ const void __user *user_data) ++static int rockchip_bus_cpufreq_target(struct device *dev, unsigned long freq, ++ u32 flags) +{ -+ struct dma_buf_import_sync_file arg; -+ struct dma_fence *fence, *f; -+ enum dma_resv_usage usage; -+ struct dma_fence_unwrap iter; -+ unsigned int num_fences; ++ struct rockchip_bus *bus = dev_get_drvdata(dev); ++ struct dev_pm_opp *opp; ++ unsigned long target_volt, target_rate = freq; + int ret = 0; + -+ if (copy_from_user(&arg, user_data, sizeof(arg))) -+ return -EFAULT; -+ -+ if (arg.flags & ~DMA_BUF_SYNC_RW) -+ return -EINVAL; -+ -+ if ((arg.flags & DMA_BUF_SYNC_RW) == 0) -+ return -EINVAL; ++ if (!bus->regulator) { ++ dev_dbg(dev, "%luHz -> %luHz\n", bus->cur_rate, target_rate); ++ ret = clk_set_rate(bus->clk, target_rate); ++ if (ret) ++ dev_err(bus->dev, "failed to set bus rate %lu\n", ++ target_rate); ++ else ++ bus->cur_rate = target_rate; ++ return ret; ++ } + -+ fence = sync_file_get_fence(arg.fd); -+ if (!fence) -+ return -EINVAL; ++ opp = devfreq_recommended_opp(dev, &target_rate, flags); ++ if (IS_ERR(opp)) { ++ dev_err(dev, "failed to recommended opp %lu\n", target_rate); ++ return PTR_ERR(opp); ++ } ++ target_volt = dev_pm_opp_get_voltage(opp); ++ dev_pm_opp_put(opp); + -+ usage = (arg.flags & DMA_BUF_SYNC_WRITE) ? DMA_RESV_USAGE_WRITE : -+ DMA_RESV_USAGE_READ; ++ if (bus->cur_rate == target_rate) { ++ if (bus->cur_volt == target_volt) ++ return 0; ++ ret = regulator_set_voltage(bus->regulator, target_volt, ++ INT_MAX); ++ if (ret) { ++ dev_err(dev, "failed to set voltage %lu\n", ++ target_volt); ++ return ret; ++ } ++ bus->cur_volt = target_volt; ++ return 0; ++ } else if (!bus->cur_volt) { ++ bus->cur_volt = regulator_get_voltage(bus->regulator); ++ } + -+ num_fences = 0; -+ dma_fence_unwrap_for_each(f, &iter, fence) -+ ++num_fences; ++ if (bus->cur_rate < target_rate) { ++ ret = regulator_set_voltage(bus->regulator, target_volt, ++ INT_MAX); ++ if (ret) { ++ dev_err(dev, "failed to set voltage %lu\n", ++ target_volt); ++ return ret; ++ } ++ } + -+ if (num_fences > 0) { -+ dma_resv_lock(dmabuf->resv, NULL); ++ ret = clk_set_rate(bus->clk, target_rate); ++ if (ret) { ++ dev_err(dev, "failed to set bus rate %lu\n", target_rate); ++ return ret; ++ } + -+ ret = dma_resv_reserve_fences(dmabuf->resv, num_fences); -+ if (!ret) { -+ dma_fence_unwrap_for_each(f, &iter, fence) -+ dma_resv_add_fence(dmabuf->resv, f, usage); ++ if (bus->cur_rate > target_rate) { ++ ret = regulator_set_voltage(bus->regulator, target_volt, ++ INT_MAX); ++ if (ret) { ++ dev_err(dev, "failed to set voltage %lu\n", ++ target_volt); ++ return ret; + } -+ -+ dma_resv_unlock(dmabuf->resv); + } + -+ dma_fence_put(fence); ++ dev_dbg(dev, "%luHz %luuV -> %luHz %luuV\n", bus->cur_rate, ++ bus->cur_volt, target_rate, target_volt); ++ bus->cur_rate = target_rate; ++ bus->cur_volt = target_volt; + + return ret; +} -+#endif + -+static long dma_buf_ioctl(struct file *file, -+ unsigned int cmd, unsigned long arg) ++static int rockchip_bus_cpufreq_notifier(struct notifier_block *nb, ++ unsigned long event, void *data) +{ -+ struct dma_buf *dmabuf; -+ struct dma_buf_sync sync; -+ enum dma_data_direction direction; -+ int ret; ++ struct rockchip_bus *bus = to_rockchip_bus_cpufreq_nb(nb); ++ struct cpufreq_freqs *freqs = data; ++ int id = topology_physical_package_id(freqs->policy->cpu); + -+ dmabuf = file->private_data; ++ if (id < 0 || id >= MAX_CLUSTERS) ++ return NOTIFY_DONE; + -+ switch (cmd) { -+ case DMA_BUF_IOCTL_SYNC: -+ if (copy_from_user(&sync, (void __user *) arg, sizeof(sync))) -+ return -EFAULT; ++ bus->cpu_freq[id] = freqs->new; + -+ if (sync.flags & ~DMA_BUF_SYNC_VALID_FLAGS_MASK) -+ return -EINVAL; ++ if (!bus->cpu_freq[CLUSTER0] || !bus->cpu_freq[CLUSTER1]) ++ return NOTIFY_DONE; + -+ switch (sync.flags & DMA_BUF_SYNC_RW) { -+ case DMA_BUF_SYNC_READ: -+ direction = DMA_FROM_DEVICE; -+ break; -+ case DMA_BUF_SYNC_WRITE: -+ direction = DMA_TO_DEVICE; -+ break; -+ case DMA_BUF_SYNC_RW: -+ direction = DMA_BIDIRECTIONAL; -+ break; -+ default: -+ return -EINVAL; ++ switch (event) { ++ case CPUFREQ_PRECHANGE: ++ if ((bus->cpu_freq[CLUSTER0] > bus->cpu_high_freq || ++ bus->cpu_freq[CLUSTER1] > bus->cpu_high_freq) && ++ bus->cur_rate != bus->high_rate) { ++ dev_dbg(bus->dev, "cpu%d freq=%d %d, up cci rate to %lu\n", ++ freqs->policy->cpu, ++ bus->cpu_freq[CLUSTER0], ++ bus->cpu_freq[CLUSTER1], ++ bus->high_rate); ++ rockchip_bus_cpufreq_target(bus->dev, bus->high_rate, ++ 0); + } ++ break; ++ case CPUFREQ_POSTCHANGE: ++ if (bus->cpu_freq[CLUSTER0] <= bus->cpu_high_freq && ++ bus->cpu_freq[CLUSTER1] <= bus->cpu_high_freq && ++ bus->cur_rate != bus->low_rate) { ++ dev_dbg(bus->dev, "cpu%d freq=%d %d, down cci rate to %lu\n", ++ freqs->policy->cpu, ++ bus->cpu_freq[CLUSTER0], ++ bus->cpu_freq[CLUSTER1], ++ bus->low_rate); ++ rockchip_bus_cpufreq_target(bus->dev, bus->low_rate, ++ 0); ++ } ++ break; ++ } + -+ if (sync.flags & DMA_BUF_SYNC_END) -+ ret = dma_buf_end_cpu_access(dmabuf, direction); -+ else -+ ret = dma_buf_begin_cpu_access(dmabuf, direction); -+ -+ return ret; ++ return NOTIFY_OK; ++} + -+ case DMA_BUF_SET_NAME_A: -+ case DMA_BUF_SET_NAME_B: -+ return dma_buf_set_name_user(dmabuf, (const char __user *)arg); ++static int rockchip_bus_cpufreq(struct rockchip_bus *bus) ++{ ++ struct device *dev = bus->dev; ++ struct device_node *np = dev->of_node; ++ unsigned int freq; ++ int ret = 0; + -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ case DMA_BUF_IOCTL_EXPORT_SYNC_FILE: -+ return dma_buf_export_sync_file(dmabuf, (void __user *)arg); -+ case DMA_BUF_IOCTL_IMPORT_SYNC_FILE: -+ return dma_buf_import_sync_file(dmabuf, (const void __user *)arg); -+#endif ++ if (of_parse_phandle(dev->of_node, "operating-points-v2", 0)) { ++ ret = rockchip_bus_power_control_init(bus); ++ if (ret) { ++ dev_err(dev, "failed to init power control\n"); ++ return ret; ++ } ++ } else { ++ bus->clk = devm_clk_get(dev, "bus"); ++ if (IS_ERR(bus->clk)) { ++ dev_err(dev, "failed to get bus clock\n"); ++ return PTR_ERR(bus->clk); ++ } ++ bus->regulator = NULL; ++ } + -+#ifdef CONFIG_DMABUF_PARTIAL -+ case DMA_BUF_IOCTL_SYNC_PARTIAL: { -+ struct dma_buf_sync_partial sync_p; ++ ret = of_property_read_u32(np, "cpu-high-freq", &bus->cpu_high_freq); ++ if (ret) { ++ dev_err(dev, "failed to get cpu-high-freq\n"); ++ return ret; ++ } ++ ret = of_property_read_u32(np, "cci-high-freq", &freq); ++ if (ret) { ++ dev_err(dev, "failed to get cci-high-freq\n"); ++ return ret; ++ } ++ bus->high_rate = freq * 1000; ++ ret = of_property_read_u32(np, "cci-low-freq", &freq); ++ if (ret) { ++ dev_err(dev, "failed to get cci-low-freq\n"); ++ return ret; ++ } ++ bus->low_rate = freq * 1000; + -+ if (copy_from_user(&sync_p, (void __user *) arg, sizeof(sync_p))) -+ return -EFAULT; ++ bus->cpufreq_nb.notifier_call = rockchip_bus_cpufreq_notifier; ++ ret = cpufreq_register_notifier(&bus->cpufreq_nb, ++ CPUFREQ_TRANSITION_NOTIFIER); ++ if (ret) { ++ dev_err(dev, "failed to register cpufreq notifier\n"); ++ return ret; ++ } + -+ if (sync_p.len == 0) -+ return 0; ++ return 0; ++} + -+ if (sync_p.len > dmabuf->size || sync_p.offset > dmabuf->size - sync_p.len) -+ return -EINVAL; ++static const struct of_device_id rockchip_busfreq_of_match[] = { ++ { .compatible = "rockchip,px30-bus", }, ++ { .compatible = "rockchip,rk1808-bus", }, ++ { .compatible = "rockchip,rk3288-bus", }, ++ { .compatible = "rockchip,rk3368-bus", }, ++ { .compatible = "rockchip,rk3399-bus", }, ++ { .compatible = "rockchip,rk3528-bus", }, ++ { .compatible = "rockchip,rk3562-bus", }, ++ { .compatible = "rockchip,rk3568-bus", }, ++ { .compatible = "rockchip,rk3588-bus", }, ++ { .compatible = "rockchip,rv1126-bus", }, ++ { }, ++}; + -+ if (sync_p.flags & ~DMA_BUF_SYNC_VALID_FLAGS_MASK) -+ return -EINVAL; ++MODULE_DEVICE_TABLE(of, rockchip_busfreq_of_match); + -+ switch (sync_p.flags & DMA_BUF_SYNC_RW) { -+ case DMA_BUF_SYNC_READ: -+ direction = DMA_FROM_DEVICE; -+ break; -+ case DMA_BUF_SYNC_WRITE: -+ direction = DMA_TO_DEVICE; -+ break; -+ case DMA_BUF_SYNC_RW: -+ direction = DMA_BIDIRECTIONAL; -+ break; -+ default: -+ return -EINVAL; -+ } ++static int rockchip_busfreq_probe(struct platform_device *pdev) ++{ ++ struct device *dev = &pdev->dev; ++ struct device_node *np = dev->of_node; ++ struct rockchip_bus *bus; ++ const char *policy_name; ++ int ret = 0; + -+ if (sync_p.flags & DMA_BUF_SYNC_END) -+ ret = dma_buf_end_cpu_access_partial(dmabuf, direction, -+ sync_p.offset, -+ sync_p.len); -+ else -+ ret = dma_buf_begin_cpu_access_partial(dmabuf, direction, -+ sync_p.offset, -+ sync_p.len); ++ bus = devm_kzalloc(dev, sizeof(*bus), GFP_KERNEL); ++ if (!bus) ++ return -ENOMEM; ++ bus->dev = dev; ++ platform_set_drvdata(pdev, bus); + ++ ret = of_property_read_string(np, "rockchip,busfreq-policy", ++ &policy_name); ++ if (ret) { ++ dev_info(dev, "failed to get busfreq policy\n"); + return ret; + } -+#endif /* CONFIG_DMABUF_PARTIAL */ -+ -+ default: -+ return -ENOTTY; -+ } -+} + -+static void dma_buf_show_fdinfo(struct seq_file *m, struct file *file) -+{ -+ struct dma_buf *dmabuf = file->private_data; ++ if (!strcmp(policy_name, "smc")) ++ ret = rockchip_bus_smc_config(bus); ++ else if (!strcmp(policy_name, "clkfreq")) ++ ret = rockchip_bus_clkfreq(bus); ++ else if (!strcmp(policy_name, "cpufreq")) ++ ret = rockchip_bus_cpufreq(bus); + -+ seq_printf(m, "size:\t%zu\n", dmabuf->size); -+ /* Don't count the temporary reference taken inside procfs seq_show */ -+ seq_printf(m, "count:\t%ld\n", file_count(dmabuf->file) - 1); -+ seq_printf(m, "exp_name:\t%s\n", dmabuf->exp_name); -+ spin_lock(&dmabuf->name_lock); -+ if (dmabuf->name) -+ seq_printf(m, "name:\t%s\n", dmabuf->name); -+ spin_unlock(&dmabuf->name_lock); ++ return ret; +} + -+static const struct file_operations dma_buf_fops = { -+ .release = dma_buf_file_release, -+ .mmap = dma_buf_mmap_internal, -+ .llseek = dma_buf_llseek, -+ .poll = dma_buf_poll, -+ .unlocked_ioctl = dma_buf_ioctl, -+ .compat_ioctl = compat_ptr_ioctl, -+ .show_fdinfo = dma_buf_show_fdinfo, ++static struct platform_driver rockchip_busfreq_driver = { ++ .probe = rockchip_busfreq_probe, ++ .driver = { ++ .name = "rockchip,bus", ++ .of_match_table = rockchip_busfreq_of_match, ++ }, +}; + ++module_platform_driver(rockchip_busfreq_driver); ++ ++MODULE_LICENSE("GPL v2"); ++MODULE_AUTHOR("Tony Xie "); ++MODULE_DESCRIPTION("rockchip busfreq driver with devfreq framework"); +diff --git a/drivers/devfreq/rockchip_dmc.c b/drivers/devfreq/rockchip_dmc.c +new file mode 100644 +index 000000000..4c373c6ab +--- /dev/null ++++ b/drivers/devfreq/rockchip_dmc.c +@@ -0,0 +1,3411 @@ ++// SPDX-License-Identifier: GPL-2.0-only +/* -+ * is_dma_buf_file - Check if struct file* is associated with dma_buf ++ * Rockchip Generic dmc support. ++ * ++ * Copyright (c) 2021 Rockchip Electronics Co. Ltd. ++ * Author: Finley Xiao + */ -+static inline int is_dma_buf_file(struct file *file) -+{ -+ return file->f_op == &dma_buf_fops; -+} -+ -+static struct file *dma_buf_getfile(size_t size, int flags) -+{ -+ static atomic64_t dmabuf_inode = ATOMIC64_INIT(0); -+ struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb); -+ struct file *file; + -+ if (IS_ERR(inode)) -+ return ERR_CAST(inode); ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ inode->i_size = size; -+ inode_set_bytes(inode, size); ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ /* -+ * The ->i_ino acquired from get_next_ino() is not unique thus -+ * not suitable for using it as dentry name by dmabuf stats. -+ * Override ->i_ino with the unique and dmabuffs specific -+ * value. -+ */ -+ inode->i_ino = atomic64_add_return(1, &dmabuf_inode); -+ flags &= O_ACCMODE | O_NONBLOCK; -+ file = alloc_file_pseudo(inode, dma_buf_mnt, "dmabuf", -+ flags, &dma_buf_fops); -+ if (IS_ERR(file)) -+ goto err_alloc_file; ++#include "governor.h" ++#include "rockchip_dmc_timing.h" ++#include "../clk/rockchip-oh/clk.h" ++#include "../gpu/drm/rockchip-oh/rockchip_drm_drv.h" ++#include "../opp/opp.h" + -+ return file; ++#define system_status_to_dmcfreq(nb) container_of(nb, struct rockchip_dmcfreq, \ ++ status_nb) ++#define reboot_to_dmcfreq(nb) container_of(nb, struct rockchip_dmcfreq, \ ++ reboot_nb) ++#define boost_to_dmcfreq(work) container_of(work, struct rockchip_dmcfreq, \ ++ boost_work) ++#define input_hd_to_dmcfreq(hd) container_of(hd, struct rockchip_dmcfreq, \ ++ input_handler) + -+err_alloc_file: -+ iput(inode); -+ return file; -+} ++#define VIDEO_1080P_SIZE (1920 * 1080) ++#define DTS_PAR_OFFSET (4096) + -+static void dma_buf_set_default_name(struct dma_buf *dmabuf) -+{ -+ char task_comm[TASK_COMM_LEN]; -+ char *name; ++#define FALLBACK_STATIC_TEMPERATURE 55000 + -+ get_task_comm(task_comm, current->group_leader); -+ name = kasprintf(GFP_KERNEL, "%d-%s", current->tgid, task_comm); -+ dma_buf_set_name(dmabuf, name); -+ kfree(name); -+} ++struct dmc_freq_table { ++ unsigned long freq; ++ struct dev_pm_opp_supply supplies[2]; ++}; + -+/** -+ * DOC: dma buf device access -+ * -+ * For device DMA access to a shared DMA buffer the usual sequence of operations -+ * is fairly simple: -+ * -+ * 1. The exporter defines his exporter instance using -+ * DEFINE_DMA_BUF_EXPORT_INFO() and calls dma_buf_export() to wrap a private -+ * buffer object into a &dma_buf. It then exports that &dma_buf to userspace -+ * as a file descriptor by calling dma_buf_fd(). -+ * -+ * 2. Userspace passes this file-descriptors to all drivers it wants this buffer -+ * to share with: First the file descriptor is converted to a &dma_buf using -+ * dma_buf_get(). Then the buffer is attached to the device using -+ * dma_buf_attach(). -+ * -+ * Up to this stage the exporter is still free to migrate or reallocate the -+ * backing storage. -+ * -+ * 3. Once the buffer is attached to all devices userspace can initiate DMA -+ * access to the shared buffer. In the kernel this is done by calling -+ * dma_buf_map_attachment() and dma_buf_unmap_attachment(). -+ * -+ * 4. Once a driver is done with a shared buffer it needs to call -+ * dma_buf_detach() (after cleaning up any mappings) and then release the -+ * reference acquired with dma_buf_get() by calling dma_buf_put(). -+ * -+ * For the detailed semantics exporters are expected to implement see -+ * &dma_buf_ops. -+ */ ++struct share_params { ++ u32 hz; ++ u32 lcdc_type; ++ u32 vop; ++ u32 vop_dclk_mode; ++ u32 sr_idle_en; ++ u32 addr_mcu_el3; ++ /* ++ * 1: need to wait flag1 ++ * 0: never wait flag1 ++ */ ++ u32 wait_flag1; ++ /* ++ * 1: need to wait flag1 ++ * 0: never wait flag1 ++ */ ++ u32 wait_flag0; ++ u32 complt_hwirq; ++ u32 update_drv_odt_cfg; ++ u32 update_deskew_cfg; + -+/** -+ * dma_buf_export - Creates a new dma_buf, and associates an anon file -+ * with this buffer, so it can be exported. -+ * Also connect the allocator specific data and ops to the buffer. -+ * Additionally, provide a name string for exporter; useful in debugging. -+ * -+ * @exp_info: [in] holds all the export related information provided -+ * by the exporter. see &struct dma_buf_export_info -+ * for further details. -+ * -+ * Returns, on success, a newly created struct dma_buf object, which wraps the -+ * supplied private data and operations for struct dma_buf_ops. On either -+ * missing ops, or error in allocating struct dma_buf, will return negative -+ * error. -+ * -+ * For most cases the easiest way to create @exp_info is through the -+ * %DEFINE_DMA_BUF_EXPORT_INFO macro. -+ */ -+struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) -+{ -+ struct dma_buf *dmabuf; -+ struct dma_resv *resv = exp_info->resv; -+ struct file *file; -+ size_t alloc_size = sizeof(struct dma_buf); -+ int ret; ++ u32 freq_count; ++ u32 freq_info_mhz[6]; ++ u32 wait_mode; ++ u32 vop_scan_line_time_ns; ++ /* if need, add parameter after */ ++}; + -+ if (WARN_ON(!exp_info->priv || !exp_info->ops -+ || !exp_info->ops->map_dma_buf -+ || !exp_info->ops->unmap_dma_buf -+ || !exp_info->ops->release)) -+ return ERR_PTR(-EINVAL); ++static struct share_params *ddr_psci_param; + -+ if (WARN_ON(exp_info->ops->cache_sgt_mapping && -+ (exp_info->ops->pin || exp_info->ops->unpin))) -+ return ERR_PTR(-EINVAL); ++struct rockchip_dmcfreq_ondemand_data { ++ unsigned int upthreshold; ++ unsigned int downdifferential; ++}; + -+ if (WARN_ON(!exp_info->ops->pin != !exp_info->ops->unpin)) -+ return ERR_PTR(-EINVAL); ++struct rockchip_dmcfreq { ++ struct device *dev; ++ struct dmcfreq_common_info info; ++ struct rockchip_dmcfreq_ondemand_data ondemand_data; ++ struct clk *dmc_clk; ++ struct devfreq_event_dev **edev; ++ struct mutex lock; /* serializes access to video_info_list */ ++ struct dram_timing *timing; ++ struct notifier_block status_nb; ++ struct notifier_block panic_nb; ++ struct list_head video_info_list; ++ struct freq_map_table *cpu_bw_tbl; ++ struct work_struct boost_work; ++ struct input_handler input_handler; ++ struct monitor_dev_info *mdev_info; ++ struct share_params *set_rate_params; ++ struct rockchip_opp_info opp_info; + -+ if (!try_module_get(exp_info->owner)) -+ return ERR_PTR(-ENOENT); ++ unsigned long *nocp_bw; ++ unsigned long rate; ++ unsigned long volt, mem_volt; ++ unsigned long sleep_volt, sleep_mem_volt; ++ unsigned long auto_min_rate; ++ unsigned long status_rate; ++ unsigned long normal_rate; ++ unsigned long video_1080p_rate; ++ unsigned long video_4k_rate; ++ unsigned long video_4k_10b_rate; ++ unsigned long video_4k_60p_rate; ++ unsigned long video_svep_rate; ++ unsigned long performance_rate; ++ unsigned long hdmi_rate; ++ unsigned long hdmirx_rate; ++ unsigned long idle_rate; ++ unsigned long suspend_rate; ++ unsigned long deep_suspend_rate; ++ unsigned long reboot_rate; ++ unsigned long boost_rate; ++ unsigned long fixed_rate; ++ unsigned long low_power_rate; + -+ file = dma_buf_getfile(exp_info->size, exp_info->flags); -+ if (IS_ERR(file)) { -+ ret = PTR_ERR(file); -+ goto err_module; -+ } ++ unsigned long freq_count; ++ unsigned long freq_info_rate[6]; ++ unsigned long rate_low; ++ unsigned long rate_mid_low; ++ unsigned long rate_mid_high; ++ unsigned long rate_high; + -+ if (!exp_info->resv) -+ alloc_size += sizeof(struct dma_resv); -+ else -+ /* prevent &dma_buf[1] == dma_buf->resv */ -+ alloc_size += 1; -+ dmabuf = kzalloc(alloc_size, GFP_KERNEL); -+ if (!dmabuf) { -+ ret = -ENOMEM; -+ goto err_file; -+ } ++ unsigned int min_cpu_freq; ++ unsigned int system_status_en; ++ unsigned int refresh; ++ int edev_count; ++ int dfi_id; ++ int nocp_cpu_id; + -+ dmabuf->priv = exp_info->priv; -+ dmabuf->ops = exp_info->ops; -+ dmabuf->size = exp_info->size; -+ dmabuf->exp_name = exp_info->exp_name; -+ dmabuf->owner = exp_info->owner; -+ spin_lock_init(&dmabuf->name_lock); -+#ifdef CONFIG_DMABUF_CACHE -+ mutex_init(&dmabuf->cache_lock); -+#endif -+ init_waitqueue_head(&dmabuf->poll); -+ dmabuf->cb_in.poll = dmabuf->cb_out.poll = &dmabuf->poll; -+ dmabuf->cb_in.active = dmabuf->cb_out.active = 0; -+ mutex_init(&dmabuf->lock); -+ INIT_LIST_HEAD(&dmabuf->attachments); ++ bool is_fixed; ++ bool is_set_rate_direct; + -+ if (!resv) { -+ dmabuf->resv = (struct dma_resv *)&dmabuf[1]; -+ dma_resv_init(dmabuf->resv); -+ } else { -+ dmabuf->resv = resv; -+ } ++ unsigned int touchboostpulse_duration_val; ++ u64 touchboostpulse_endtime; + -+ ret = dma_buf_stats_setup(dmabuf, file); -+ if (ret) -+ goto err_dmabuf; ++ int (*set_auto_self_refresh)(u32 en); ++}; + -+ file->private_data = dmabuf; -+ file->f_path.dentry->d_fsdata = dmabuf; -+ dmabuf->file = file; ++static struct pm_qos_request pm_qos; + -+ mutex_lock(&db_list.lock); -+ list_add(&dmabuf->list_node, &db_list.head); -+#if IS_ENABLED(CONFIG_RK_DMABUF_DEBUG) -+ db_total_size += dmabuf->size; -+ db_peak_size = max(db_total_size, db_peak_size); -+#endif -+ mutex_unlock(&db_list.lock); ++static int rockchip_dmcfreq_check_rate_volt(struct monitor_dev_info *info); + -+ if (IS_ENABLED(CONFIG_RK_DMABUF_DEBUG)) -+ dma_buf_set_default_name(dmabuf); ++static struct monitor_dev_profile dmc_mdevp = { ++ .type = MONITOR_TYPE_DEV, ++ .low_temp_adjust = rockchip_monitor_dev_low_temp_adjust, ++ .high_temp_adjust = rockchip_monitor_dev_high_temp_adjust, ++ .check_rate_volt = rockchip_dmcfreq_check_rate_volt, ++}; + -+ init_dma_buf_task_info(dmabuf); -+ return dmabuf; ++static inline unsigned long is_dualview(unsigned long status) ++{ ++ return (status & SYS_STATUS_LCDC0) && (status & SYS_STATUS_LCDC1); ++} + -+err_dmabuf: -+ if (!resv) -+ dma_resv_fini(dmabuf->resv); -+ kfree(dmabuf); -+err_file: -+ fput(file); -+err_module: -+ module_put(exp_info->owner); -+ return ERR_PTR(ret); ++static inline unsigned long is_isp(unsigned long status) ++{ ++ return (status & SYS_STATUS_ISP) || ++ (status & SYS_STATUS_CIF0) || ++ (status & SYS_STATUS_CIF1); +} -+EXPORT_SYMBOL_NS_GPL(dma_buf_export, DMA_BUF); + -+/** -+ * dma_buf_fd - returns a file descriptor for the given struct dma_buf -+ * @dmabuf: [in] pointer to dma_buf for which fd is required. -+ * @flags: [in] flags to give to fd -+ * -+ * On success, returns an associated 'fd'. Else, returns error. ++/* ++ * function: packaging de-skew setting to px30_ddr_dts_config_timing, ++ * px30_ddr_dts_config_timing will pass to trust firmware, and ++ * used direct to set register. ++ * input: de_skew ++ * output: tim + */ -+int dma_buf_fd(struct dma_buf *dmabuf, int flags) ++static void px30_de_skew_set_2_reg(struct rk3328_ddr_de_skew_setting *de_skew, ++ struct px30_ddr_dts_config_timing *tim) +{ -+ int fd; ++ u32 n; ++ u32 offset; ++ u32 shift; + -+ if (!dmabuf || !dmabuf->file) -+ return -EINVAL; ++ memset_io(tim->ca_skew, 0, sizeof(tim->ca_skew)); ++ memset_io(tim->cs0_skew, 0, sizeof(tim->cs0_skew)); ++ memset_io(tim->cs1_skew, 0, sizeof(tim->cs1_skew)); + -+ fd = get_unused_fd_flags(flags); -+ if (fd < 0) -+ return fd; ++ /* CA de-skew */ ++ for (n = 0; n < ARRAY_SIZE(de_skew->ca_de_skew); n++) { ++ offset = n / 2; ++ shift = n % 2; ++ /* 0 => 4; 1 => 0 */ ++ shift = (shift == 0) ? 4 : 0; ++ tim->ca_skew[offset] &= ~(0xf << shift); ++ tim->ca_skew[offset] |= (de_skew->ca_de_skew[n] << shift); ++ } + -+ fd_install(fd, dmabuf->file); ++ /* CS0 data de-skew */ ++ for (n = 0; n < ARRAY_SIZE(de_skew->cs0_de_skew); n++) { ++ offset = ((n / 21) * 11) + ((n % 21) / 2); ++ shift = ((n % 21) % 2); ++ if ((n % 21) == 20) ++ shift = 0; ++ else ++ /* 0 => 4; 1 => 0 */ ++ shift = (shift == 0) ? 4 : 0; ++ tim->cs0_skew[offset] &= ~(0xf << shift); ++ tim->cs0_skew[offset] |= (de_skew->cs0_de_skew[n] << shift); ++ } + -+ return fd; ++ /* CS1 data de-skew */ ++ for (n = 0; n < ARRAY_SIZE(de_skew->cs1_de_skew); n++) { ++ offset = ((n / 21) * 11) + ((n % 21) / 2); ++ shift = ((n % 21) % 2); ++ if ((n % 21) == 20) ++ shift = 0; ++ else ++ /* 0 => 4; 1 => 0 */ ++ shift = (shift == 0) ? 4 : 0; ++ tim->cs1_skew[offset] &= ~(0xf << shift); ++ tim->cs1_skew[offset] |= (de_skew->cs1_de_skew[n] << shift); ++ } +} -+EXPORT_SYMBOL_NS_GPL(dma_buf_fd, DMA_BUF); + -+/** -+ * dma_buf_get - returns the struct dma_buf related to an fd -+ * @fd: [in] fd associated with the struct dma_buf to be returned -+ * -+ * On success, returns the struct dma_buf associated with an fd; uses -+ * file's refcounting done by fget to increase refcount. returns ERR_PTR -+ * otherwise. ++/* ++ * function: packaging de-skew setting to rk3328_ddr_dts_config_timing, ++ * rk3328_ddr_dts_config_timing will pass to trust firmware, and ++ * used direct to set register. ++ * input: de_skew ++ * output: tim + */ -+struct dma_buf *dma_buf_get(int fd) ++static void ++rk3328_de_skew_setting_2_register(struct rk3328_ddr_de_skew_setting *de_skew, ++ struct rk3328_ddr_dts_config_timing *tim) +{ -+ struct file *file; ++ u32 n; ++ u32 offset; ++ u32 shift; + -+ file = fget(fd); ++ memset_io(tim->ca_skew, 0, sizeof(tim->ca_skew)); ++ memset_io(tim->cs0_skew, 0, sizeof(tim->cs0_skew)); ++ memset_io(tim->cs1_skew, 0, sizeof(tim->cs1_skew)); + -+ if (!file) -+ return ERR_PTR(-EBADF); ++ /* CA de-skew */ ++ for (n = 0; n < ARRAY_SIZE(de_skew->ca_de_skew); n++) { ++ offset = n / 2; ++ shift = n % 2; ++ /* 0 => 4; 1 => 0 */ ++ shift = (shift == 0) ? 4 : 0; ++ tim->ca_skew[offset] &= ~(0xf << shift); ++ tim->ca_skew[offset] |= (de_skew->ca_de_skew[n] << shift); ++ } + -+ if (!is_dma_buf_file(file)) { -+ fput(file); -+ return ERR_PTR(-EINVAL); ++ /* CS0 data de-skew */ ++ for (n = 0; n < ARRAY_SIZE(de_skew->cs0_de_skew); n++) { ++ offset = ((n / 21) * 11) + ((n % 21) / 2); ++ shift = ((n % 21) % 2); ++ if ((n % 21) == 20) ++ shift = 0; ++ else ++ /* 0 => 4; 1 => 0 */ ++ shift = (shift == 0) ? 4 : 0; ++ tim->cs0_skew[offset] &= ~(0xf << shift); ++ tim->cs0_skew[offset] |= (de_skew->cs0_de_skew[n] << shift); + } + -+ return file->private_data; ++ /* CS1 data de-skew */ ++ for (n = 0; n < ARRAY_SIZE(de_skew->cs1_de_skew); n++) { ++ offset = ((n / 21) * 11) + ((n % 21) / 2); ++ shift = ((n % 21) % 2); ++ if ((n % 21) == 20) ++ shift = 0; ++ else ++ /* 0 => 4; 1 => 0 */ ++ shift = (shift == 0) ? 4 : 0; ++ tim->cs1_skew[offset] &= ~(0xf << shift); ++ tim->cs1_skew[offset] |= (de_skew->cs1_de_skew[n] << shift); ++ } +} -+EXPORT_SYMBOL_NS_GPL(dma_buf_get, DMA_BUF); + -+/** -+ * dma_buf_put - decreases refcount of the buffer -+ * @dmabuf: [in] buffer to reduce refcount of -+ * -+ * Uses file's refcounting done implicitly by fput(). -+ * -+ * If, as a result of this call, the refcount becomes 0, the 'release' file -+ * operation related to this fd is called. It calls &dma_buf_ops.release vfunc -+ * in turn, and frees the memory allocated for dmabuf when exported. -+ */ -+void dma_buf_put(struct dma_buf *dmabuf) ++static int rk_drm_get_lcdc_type(void) +{ -+ if (WARN_ON(!dmabuf || !dmabuf->file)) -+ return; ++ u32 lcdc_type = rockchip_drm_get_sub_dev_type(); + -+ fput(dmabuf->file); ++ switch (lcdc_type) { ++ case DRM_MODE_CONNECTOR_DPI: ++ case DRM_MODE_CONNECTOR_LVDS: ++ lcdc_type = SCREEN_LVDS; ++ break; ++ case DRM_MODE_CONNECTOR_DisplayPort: ++ lcdc_type = SCREEN_DP; ++ break; ++ case DRM_MODE_CONNECTOR_HDMIA: ++ case DRM_MODE_CONNECTOR_HDMIB: ++ lcdc_type = SCREEN_HDMI; ++ break; ++ case DRM_MODE_CONNECTOR_TV: ++ lcdc_type = SCREEN_TVOUT; ++ break; ++ case DRM_MODE_CONNECTOR_eDP: ++ lcdc_type = SCREEN_EDP; ++ break; ++ case DRM_MODE_CONNECTOR_DSI: ++ lcdc_type = SCREEN_MIPI; ++ break; ++ default: ++ lcdc_type = SCREEN_NULL; ++ break; ++ } ++ ++ return lcdc_type; +} -+EXPORT_SYMBOL_NS_GPL(dma_buf_put, DMA_BUF); + -+static void mangle_sg_table(struct sg_table *sg_table) ++static int rockchip_ddr_set_rate(unsigned long target_rate) +{ -+#ifdef CONFIG_DMABUF_DEBUG -+ int i; -+ struct scatterlist *sg; -+ -+ /* To catch abuse of the underlying struct page by importers mix -+ * up the bits, but take care to preserve the low SG_ bits to -+ * not corrupt the sgt. The mixing is undone in __unmap_dma_buf -+ * before passing the sgt back to the exporter. */ -+ for_each_sgtable_sg(sg_table, sg, i) -+ sg->page_link ^= ~0xffUL; -+#endif ++ struct arm_smccc_res res; + -+} -+static struct sg_table * __map_dma_buf(struct dma_buf_attachment *attach, -+ enum dma_data_direction direction) -+{ -+ struct sg_table *sg_table; -+ signed long ret; ++ ddr_psci_param->hz = target_rate; ++ ddr_psci_param->lcdc_type = rk_drm_get_lcdc_type(); ++ ddr_psci_param->vop_scan_line_time_ns = rockchip_drm_get_scan_line_time_ns(); ++ ddr_psci_param->wait_flag1 = 1; ++ ddr_psci_param->wait_flag0 = 1; + -+ sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction); -+ if (IS_ERR_OR_NULL(sg_table)) -+ return sg_table; ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_SET_RATE); + -+ if (!dma_buf_attachment_is_dynamic(attach)) { -+ ret = dma_resv_wait_timeout(attach->dmabuf->resv, -+ DMA_RESV_USAGE_KERNEL, true, -+ MAX_SCHEDULE_TIMEOUT); -+ if (ret < 0) { -+ attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, -+ direction); -+ return ERR_PTR(ret); -+ } -+ } ++ if ((int)res.a1 == SIP_RET_SET_RATE_TIMEOUT) ++ rockchip_dmcfreq_wait_complete(); + -+ mangle_sg_table(sg_table); -+ return sg_table; ++ return res.a0; +} + -+/** -+ * DOC: locking convention -+ * -+ * In order to avoid deadlock situations between dma-buf exports and importers, -+ * all dma-buf API users must follow the common dma-buf locking convention. -+ * -+ * Convention for importers -+ * -+ * 1. Importers must hold the dma-buf reservation lock when calling these -+ * functions: -+ * -+ * - dma_buf_pin() -+ * - dma_buf_unpin() -+ * - dma_buf_map_attachment() -+ * - dma_buf_unmap_attachment() -+ * - dma_buf_vmap() -+ * - dma_buf_vunmap() -+ * -+ * 2. Importers must not hold the dma-buf reservation lock when calling these -+ * functions: -+ * -+ * - dma_buf_attach() -+ * - dma_buf_dynamic_attach() -+ * - dma_buf_detach() -+ * - dma_buf_export() -+ * - dma_buf_fd() -+ * - dma_buf_get() -+ * - dma_buf_put() -+ * - dma_buf_mmap() -+ * - dma_buf_begin_cpu_access() -+ * - dma_buf_end_cpu_access() -+ * - dma_buf_map_attachment_unlocked() -+ * - dma_buf_unmap_attachment_unlocked() -+ * - dma_buf_vmap_unlocked() -+ * - dma_buf_vunmap_unlocked() -+ * -+ * Convention for exporters -+ * -+ * 1. These &dma_buf_ops callbacks are invoked with unlocked dma-buf -+ * reservation and exporter can take the lock: -+ * -+ * - &dma_buf_ops.attach() -+ * - &dma_buf_ops.detach() -+ * - &dma_buf_ops.release() -+ * - &dma_buf_ops.begin_cpu_access() -+ * - &dma_buf_ops.end_cpu_access() -+ * - &dma_buf_ops.mmap() -+ * -+ * 2. These &dma_buf_ops callbacks are invoked with locked dma-buf -+ * reservation and exporter can't take the lock: -+ * -+ * - &dma_buf_ops.pin() -+ * - &dma_buf_ops.unpin() -+ * - &dma_buf_ops.map_dma_buf() -+ * - &dma_buf_ops.unmap_dma_buf() -+ * - &dma_buf_ops.vmap() -+ * - &dma_buf_ops.vunmap() -+ * -+ * 3. Exporters must hold the dma-buf reservation lock when calling these -+ * functions: -+ * -+ * - dma_buf_move_notify() -+ */ -+ -+/** -+ * dma_buf_dynamic_attach - Add the device to dma_buf's attachments list -+ * @dmabuf: [in] buffer to attach device to. -+ * @dev: [in] device to be attached. -+ * @importer_ops: [in] importer operations for the attachment -+ * @importer_priv: [in] importer private pointer for the attachment -+ * -+ * Returns struct dma_buf_attachment pointer for this attachment. Attachments -+ * must be cleaned up by calling dma_buf_detach(). -+ * -+ * Optionally this calls &dma_buf_ops.attach to allow device-specific attach -+ * functionality. -+ * -+ * Returns: -+ * -+ * A pointer to newly created &dma_buf_attachment on success, or a negative -+ * error code wrapped into a pointer on failure. -+ * -+ * Note that this can fail if the backing storage of @dmabuf is in a place not -+ * accessible to @dev, and cannot be moved to a more suitable place. This is -+ * indicated with the error code -EBUSY. -+ */ -+struct dma_buf_attachment * -+dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev, -+ const struct dma_buf_attach_ops *importer_ops, -+ void *importer_priv) ++static int rockchip_dmcfreq_set_volt(struct device *dev, struct regulator *reg, ++ struct dev_pm_opp_supply *supply, ++ char *reg_name) +{ -+ struct dma_buf_attachment *attach; + int ret; + -+ if (WARN_ON(!dmabuf || !dev)) -+ return ERR_PTR(-EINVAL); ++ dev_dbg(dev, "%s: %s voltages (mV): %lu %lu %lu\n", __func__, reg_name, ++ supply->u_volt_min, supply->u_volt, supply->u_volt_max); ++ ret = regulator_set_voltage_triplet(reg, supply->u_volt_min, ++ supply->u_volt, INT_MAX); ++ if (ret) ++ dev_err(dev, "%s: failed to set voltage (%lu %lu %lu mV): %d\n", ++ __func__, supply->u_volt_min, supply->u_volt, ++ supply->u_volt_max, ret); + -+ if (WARN_ON(importer_ops && !importer_ops->move_notify)) -+ return ERR_PTR(-EINVAL); ++ return ret; ++} + -+ attach = kzalloc(sizeof(*attach), GFP_KERNEL); -+ if (!attach) -+ return ERR_PTR(-ENOMEM); ++static int rockchip_dmcfreq_opp_set_rate(struct device *dev, ++ struct rockchip_dmcfreq *dmcfreq, ++ unsigned long *freq) ++{ ++ struct rockchip_opp_info *opp_info = &dmcfreq->opp_info; ++ unsigned int reg_count = opp_info->regulator_count; ++ struct regulator *vdd_reg = NULL; ++ struct regulator *mem_reg = NULL; ++ struct clk *clk = opp_info->clk; ++ struct dev_pm_opp *opp; ++ struct dev_pm_opp_supply supplies[2]; ++ unsigned long old_freq = dmcfreq->rate; ++ unsigned long new_freq; ++ int old_volt = dmcfreq->volt; ++ int old_volt_mem = dmcfreq->mem_volt; ++ struct cpufreq_policy *policy; ++ bool is_cpufreq_changed = false; ++ unsigned int cpu_cur, cpufreq_cur; ++ int ret = 0; + -+ attach->dev = dev; -+ attach->dmabuf = dmabuf; -+ if (importer_ops) -+ attach->peer2peer = importer_ops->allow_peer2peer; -+ attach->importer_ops = importer_ops; -+ attach->importer_priv = importer_priv; ++ vdd_reg = opp_info->regulators[0]; ++ if (reg_count > 1) ++ mem_reg = opp_info->regulators[1]; + -+ if (dmabuf->ops->attach) { -+ ret = dmabuf->ops->attach(dmabuf, attach); -+ if (ret) -+ goto err_attach; ++ opp = dev_pm_opp_find_freq_ceil(dev, freq); ++ if (IS_ERR(opp)) { ++ opp = dev_pm_opp_find_freq_floor(dev, freq); ++ if (IS_ERR(opp)) ++ return PTR_ERR(opp); + } -+ dma_resv_lock(dmabuf->resv, NULL); -+ list_add(&attach->node, &dmabuf->attachments); -+ dma_resv_unlock(dmabuf->resv); ++ new_freq = *freq; ++ ret = dev_pm_opp_get_supplies(opp, supplies); ++ if (ret) ++ return ret; ++ dev_pm_opp_put(opp); + -+ /* When either the importer or the exporter can't handle dynamic -+ * mappings we cache the mapping here to avoid issues with the -+ * reservation object lock. ++ /* ++ * We need to prevent cpu hotplug from happening while a dmc freq rate ++ * change is happening. ++ * ++ * Do this before taking the policy rwsem to avoid deadlocks between the ++ * mutex that is locked/unlocked in cpu_hotplug_disable/enable. And it ++ * can also avoid deadlocks between the mutex that is locked/unlocked ++ * in cpus_read_lock/unlock (such as store_scaling_max_freq()). + */ -+ if (dma_buf_attachment_is_dynamic(attach) != -+ dma_buf_is_dynamic(dmabuf)) { -+ struct sg_table *sgt; ++ cpus_read_lock(); + -+ if (dma_buf_is_dynamic(attach->dmabuf)) { -+ dma_resv_lock(attach->dmabuf->resv, NULL); -+ ret = dmabuf->ops->pin(attach); -+ if (ret) -+ goto err_unlock; ++ if (dmcfreq->min_cpu_freq) { ++ /* ++ * Go to specified cpufreq and block other cpufreq changes since ++ * set_rate needs to complete during vblank. ++ */ ++ cpu_cur = raw_smp_processor_id(); ++ policy = cpufreq_cpu_get(cpu_cur); ++ if (!policy) { ++ dev_err(dev, "cpu%d policy NULL\n", cpu_cur); ++ ret = -EINVAL; ++ goto cpus_unlock; + } ++ down_write(&policy->rwsem); ++ cpufreq_cur = cpufreq_quick_get(cpu_cur); + -+ sgt = __map_dma_buf(attach, DMA_BIDIRECTIONAL); -+ if (!sgt) -+ sgt = ERR_PTR(-ENOMEM); -+ if (IS_ERR(sgt)) { -+ ret = PTR_ERR(sgt); -+ goto err_unpin; ++ /* If we're thermally throttled; don't change; */ ++ if (cpufreq_cur < dmcfreq->min_cpu_freq) { ++ if (policy->max >= dmcfreq->min_cpu_freq) { ++ __cpufreq_driver_target(policy, ++ dmcfreq->min_cpu_freq, ++ CPUFREQ_RELATION_L); ++ is_cpufreq_changed = true; ++ } else { ++ dev_dbg(dev, ++ "CPU may too slow for DMC (%d MHz)\n", ++ policy->max); ++ } + } -+ if (dma_buf_is_dynamic(attach->dmabuf)) -+ dma_resv_unlock(attach->dmabuf->resv); -+ attach->sgt = sgt; -+ attach->dir = DMA_BIDIRECTIONAL; + } + -+ return attach; -+ -+err_attach: -+ kfree(attach); -+ return ERR_PTR(ret); -+ -+err_unpin: -+ if (dma_buf_is_dynamic(attach->dmabuf)) -+ dmabuf->ops->unpin(attach); -+ -+err_unlock: -+ if (dma_buf_is_dynamic(attach->dmabuf)) -+ dma_resv_unlock(attach->dmabuf->resv); -+ -+ dma_buf_detach(dmabuf, attach); -+ return ERR_PTR(ret); -+} -+EXPORT_SYMBOL_NS_GPL(dma_buf_dynamic_attach, DMA_BUF); ++ /* Scaling up? Scale voltage before frequency */ ++ if (new_freq >= old_freq) { ++ if (reg_count > 1) { ++ ret = rockchip_dmcfreq_set_volt(dev, mem_reg, ++ &supplies[1], "mem"); ++ if (ret) ++ goto restore_voltage; ++ } ++ ret = rockchip_dmcfreq_set_volt(dev, vdd_reg, &supplies[0], "vdd"); ++ if (ret) ++ goto restore_voltage; ++ if (new_freq == old_freq) ++ goto out; ++ } + -+/** -+ * dma_buf_attach - Wrapper for dma_buf_dynamic_attach -+ * @dmabuf: [in] buffer to attach device to. -+ * @dev: [in] device to be attached. -+ * -+ * Wrapper to call dma_buf_dynamic_attach() for drivers which still use a static -+ * mapping. -+ */ -+struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, -+ struct device *dev) -+{ -+ return dma_buf_dynamic_attach(dmabuf, dev, NULL, NULL); -+} -+EXPORT_SYMBOL_NS_GPL(dma_buf_attach, DMA_BUF); ++ /* ++ * Writer in rwsem may block readers even during its waiting in queue, ++ * and this may lead to a deadlock when the code path takes read sem ++ * twice (e.g. one in vop_lock() and another in rockchip_pmu_lock()). ++ * As a (suboptimal) workaround, let writer to spin until it gets the ++ * lock. ++ */ ++ while (!rockchip_dmcfreq_write_trylock()) ++ cond_resched(); ++ dev_dbg(dev, "%lu Hz --> %lu Hz\n", old_freq, new_freq); + -+static void __unmap_dma_buf(struct dma_buf_attachment *attach, -+ struct sg_table *sg_table, -+ enum dma_data_direction direction) -+{ -+ /* uses XOR, hence this unmangles */ -+ mangle_sg_table(sg_table); ++ if (dmcfreq->set_rate_params) { ++ dmcfreq->set_rate_params->lcdc_type = rk_drm_get_lcdc_type(); ++ dmcfreq->set_rate_params->wait_flag1 = 1; ++ dmcfreq->set_rate_params->wait_flag0 = 1; ++ } + -+ attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction); -+} ++ if (dmcfreq->is_set_rate_direct) ++ ret = rockchip_ddr_set_rate(new_freq); ++ else ++ ret = clk_set_rate(clk, new_freq); + -+/** -+ * dma_buf_detach - Remove the given attachment from dmabuf's attachments list -+ * @dmabuf: [in] buffer to detach from. -+ * @attach: [in] attachment to be detached; is free'd after this call. -+ * -+ * Clean up a device attachment obtained by calling dma_buf_attach(). -+ * -+ * Optionally this calls &dma_buf_ops.detach for device-specific detach. -+ */ -+void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) -+{ -+ if (WARN_ON(!dmabuf || !attach)) -+ return; ++ rockchip_dmcfreq_write_unlock(); ++ if (ret) { ++ dev_err(dev, "%s: failed to set clock rate: %d\n", __func__, ++ ret); ++ goto restore_voltage; ++ } + -+ if (attach->sgt) { -+ if (dma_buf_is_dynamic(attach->dmabuf)) -+ dma_resv_lock(attach->dmabuf->resv, NULL); ++ /* ++ * Check the dpll rate, ++ * There only two result we will get, ++ * 1. Ddr frequency scaling fail, we still get the old rate. ++ * 2. Ddr frequency scaling successful, we get the rate we set. ++ */ ++ dmcfreq->rate = clk_get_rate(clk); + -+ __unmap_dma_buf(attach, attach->sgt, attach->dir); ++ /* If get the incorrect rate, set voltage to old value. */ ++ if (dmcfreq->rate != new_freq) { ++ dev_err(dev, "Get wrong frequency, Request %lu, Current %lu\n", ++ new_freq, dmcfreq->rate); ++ ret = -EINVAL; ++ goto restore_voltage; ++ } + -+ if (dma_buf_is_dynamic(attach->dmabuf)) { -+ dmabuf->ops->unpin(attach); -+ dma_resv_unlock(attach->dmabuf->resv); ++ /* Scaling down? Scale voltage after frequency */ ++ if (new_freq < old_freq) { ++ ret = rockchip_dmcfreq_set_volt(dev, vdd_reg, &supplies[0], ++ "vdd"); ++ if (ret) ++ goto restore_freq; ++ if (reg_count > 1) { ++ ret = rockchip_dmcfreq_set_volt(dev, mem_reg, ++ &supplies[1], "mem"); ++ if (ret) ++ goto restore_freq; + } + } ++ dmcfreq->volt = supplies[0].u_volt; ++ if (reg_count > 1) ++ dmcfreq->mem_volt = supplies[1].u_volt; + -+ dma_resv_lock(dmabuf->resv, NULL); -+ list_del(&attach->node); -+ dma_resv_unlock(dmabuf->resv); -+ if (dmabuf->ops->detach) -+ dmabuf->ops->detach(dmabuf, attach); -+ -+ kfree(attach); -+} -+EXPORT_SYMBOL_NS_GPL(dma_buf_detach, DMA_BUF); -+ -+/** -+ * dma_buf_pin - Lock down the DMA-buf -+ * @attach: [in] attachment which should be pinned -+ * -+ * Only dynamic importers (who set up @attach with dma_buf_dynamic_attach()) may -+ * call this, and only for limited use cases like scanout and not for temporary -+ * pin operations. It is not permitted to allow userspace to pin arbitrary -+ * amounts of buffers through this interface. -+ * -+ * Buffers must be unpinned by calling dma_buf_unpin(). -+ * -+ * Returns: -+ * 0 on success, negative error code on failure. -+ */ -+int dma_buf_pin(struct dma_buf_attachment *attach) -+{ -+ struct dma_buf *dmabuf = attach->dmabuf; -+ int ret = 0; -+ -+ WARN_ON(!dma_buf_attachment_is_dynamic(attach)); -+ -+ dma_resv_assert_held(dmabuf->resv); ++ goto out; + -+ if (dmabuf->ops->pin) -+ ret = dmabuf->ops->pin(attach); ++restore_freq: ++ if (dmcfreq->is_set_rate_direct) ++ ret = rockchip_ddr_set_rate(new_freq); ++ else ++ ret = clk_set_rate(clk, new_freq); ++ if (ret) ++ dev_err(dev, "%s: failed to restore old-freq (%lu Hz)\n", ++ __func__, old_freq); ++restore_voltage: ++ if (reg_count > 1 && old_volt_mem) ++ regulator_set_voltage(mem_reg, old_volt_mem, INT_MAX); ++ if (old_volt) ++ regulator_set_voltage(vdd_reg, old_volt, INT_MAX); ++out: ++ if (dmcfreq->min_cpu_freq) { ++ if (is_cpufreq_changed) ++ __cpufreq_driver_target(policy, cpufreq_cur, ++ CPUFREQ_RELATION_L); ++ up_write(&policy->rwsem); ++ cpufreq_cpu_put(policy); ++ } ++cpus_unlock: ++ cpus_read_unlock(); + + return ret; +} -+EXPORT_SYMBOL_NS_GPL(dma_buf_pin, DMA_BUF); + -+/** -+ * dma_buf_unpin - Unpin a DMA-buf -+ * @attach: [in] attachment which should be unpinned -+ * -+ * This unpins a buffer pinned by dma_buf_pin() and allows the exporter to move -+ * any mapping of @attach again and inform the importer through -+ * &dma_buf_attach_ops.move_notify. -+ */ -+void dma_buf_unpin(struct dma_buf_attachment *attach) ++static int rockchip_dmcfreq_check_rate_volt(struct monitor_dev_info *info) +{ -+ struct dma_buf *dmabuf = attach->dmabuf; -+ -+ WARN_ON(!dma_buf_attachment_is_dynamic(attach)); ++ struct device *dev = info->dev; ++ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev); ++ struct rockchip_opp_info *opp_info = &dmcfreq->opp_info; ++ unsigned long cur_freq = dmcfreq->rate; + -+ dma_resv_assert_held(dmabuf->resv); ++ rockchip_opp_dvfs_lock(opp_info); ++ rockchip_dmcfreq_opp_set_rate(dev, dmcfreq, &cur_freq); ++ opp_info->is_rate_volt_checked = true; ++ rockchip_opp_dvfs_unlock(opp_info); + -+ if (dmabuf->ops->unpin) -+ dmabuf->ops->unpin(attach); ++ return 0; +} -+EXPORT_SYMBOL_NS_GPL(dma_buf_unpin, DMA_BUF); + -+/** -+ * dma_buf_map_attachment - Returns the scatterlist table of the attachment; -+ * mapped into _device_ address space. Is a wrapper for map_dma_buf() of the -+ * dma_buf_ops. -+ * @attach: [in] attachment whose scatterlist is to be returned -+ * @direction: [in] direction of DMA transfer -+ * -+ * Returns sg_table containing the scatterlist to be returned; returns ERR_PTR -+ * on error. May return -EINTR if it is interrupted by a signal. -+ * -+ * On success, the DMA addresses and lengths in the returned scatterlist are -+ * PAGE_SIZE aligned. -+ * -+ * A mapping must be unmapped by using dma_buf_unmap_attachment(). Note that -+ * the underlying backing storage is pinned for as long as a mapping exists, -+ * therefore users/importers should not hold onto a mapping for undue amounts of -+ * time. -+ * -+ * Important: Dynamic importers must wait for the exclusive fence of the struct -+ * dma_resv attached to the DMA-BUF first. -+ */ -+struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, -+ enum dma_data_direction direction) ++static int rockchip_dmcfreq_target(struct device *dev, unsigned long *freq, ++ u32 flags) +{ -+ struct sg_table *sg_table; -+ int r; -+ -+ might_sleep(); -+ -+ if (WARN_ON(!attach || !attach->dmabuf)) -+ return ERR_PTR(-EINVAL); -+ -+ if (dma_buf_attachment_is_dynamic(attach)) -+ dma_resv_assert_held(attach->dmabuf->resv); -+ -+ if (attach->sgt) { -+ /* -+ * Two mappings with different directions for the same -+ * attachment are not allowed. -+ */ -+ if (attach->dir != direction && -+ attach->dir != DMA_BIDIRECTIONAL) -+ return ERR_PTR(-EBUSY); -+ -+ return attach->sgt; -+ } -+ -+ if (dma_buf_is_dynamic(attach->dmabuf)) { -+ dma_resv_assert_held(attach->dmabuf->resv); -+ if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) { -+ r = attach->dmabuf->ops->pin(attach); -+ if (r) -+ return ERR_PTR(r); -+ } -+ } -+ -+ sg_table = __map_dma_buf(attach, direction); -+ if (!sg_table) -+ sg_table = ERR_PTR(-ENOMEM); -+ -+ if (IS_ERR(sg_table) && dma_buf_is_dynamic(attach->dmabuf) && -+ !IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) -+ attach->dmabuf->ops->unpin(attach); -+ -+ if (!IS_ERR(sg_table) && attach->dmabuf->ops->cache_sgt_mapping) { -+ attach->sgt = sg_table; -+ attach->dir = direction; -+ } ++ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev); ++ struct rockchip_opp_info *opp_info = &dmcfreq->opp_info; ++ struct devfreq *devfreq; ++ int ret = 0; + -+#ifdef CONFIG_DMA_API_DEBUG -+ if (!IS_ERR(sg_table)) { -+ struct scatterlist *sg; -+ u64 addr; -+ int len; -+ int i; ++ if (!opp_info->is_rate_volt_checked) ++ return -EINVAL; + -+ for_each_sgtable_dma_sg(sg_table, sg, i) { -+ addr = sg_dma_address(sg); -+ len = sg_dma_len(sg); -+ if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(len)) { -+ pr_debug("%s: addr %llx or len %x is not page aligned!\n", -+ __func__, addr, len); ++ rockchip_opp_dvfs_lock(opp_info); ++ if (dmcfreq->rate != *freq) { ++ ret = rockchip_dmcfreq_opp_set_rate(dev, dmcfreq, freq); ++ if (!ret) { ++ if (dmcfreq->info.devfreq) { ++ devfreq = dmcfreq->info.devfreq; ++ devfreq->last_status.current_frequency = *freq; + } + } + } -+#endif /* CONFIG_DMA_API_DEBUG */ -+ return sg_table; ++ rockchip_opp_dvfs_unlock(opp_info); ++ ++ return ret; +} -+EXPORT_SYMBOL_NS_GPL(dma_buf_map_attachment, DMA_BUF); + -+/** -+ * dma_buf_map_attachment_unlocked - Returns the scatterlist table of the attachment; -+ * mapped into _device_ address space. Is a wrapper for map_dma_buf() of the -+ * dma_buf_ops. -+ * @attach: [in] attachment whose scatterlist is to be returned -+ * @direction: [in] direction of DMA transfer -+ * -+ * Unlocked variant of dma_buf_map_attachment(). -+ */ -+struct sg_table * -+dma_buf_map_attachment_unlocked(struct dma_buf_attachment *attach, -+ enum dma_data_direction direction) ++static int rockchip_dmcfreq_get_dev_status(struct device *dev, ++ struct devfreq_dev_status *stat) +{ -+ struct sg_table *sg_table; ++ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev); ++ struct rockchip_opp_info *opp_info = &dmcfreq->opp_info; ++ struct devfreq_event_data edata; ++ int i, ret = 0; + -+ might_sleep(); ++ if (!dmcfreq->info.auto_freq_en) ++ return -EINVAL; + -+ if (WARN_ON(!attach || !attach->dmabuf)) -+ return ERR_PTR(-EINVAL); ++ /* ++ * RK3588 platform may crash if the CPU and MCU access the DFI/DMC ++ * registers at same time. ++ */ ++ rockchip_opp_dvfs_lock(opp_info); ++ for (i = 0; i < dmcfreq->edev_count; i++) { ++ ret = devfreq_event_get_event(dmcfreq->edev[i], &edata); ++ if (ret < 0) { ++ dev_err(dev, "failed to get event %s\n", ++ dmcfreq->edev[i]->desc->name); ++ goto out; ++ } ++ if (i == dmcfreq->dfi_id) { ++ stat->busy_time = edata.load_count; ++ stat->total_time = edata.total_count; ++ } else { ++ dmcfreq->nocp_bw[i] = edata.load_count; ++ } ++ } + -+ dma_resv_lock(attach->dmabuf->resv, NULL); -+ sg_table = dma_buf_map_attachment(attach, direction); -+ dma_resv_unlock(attach->dmabuf->resv); ++out: ++ rockchip_opp_dvfs_unlock(opp_info); + -+ return sg_table; ++ return ret; +} -+EXPORT_SYMBOL_NS_GPL(dma_buf_map_attachment_unlocked, DMA_BUF); + -+/** -+ * dma_buf_unmap_attachment - unmaps and decreases usecount of the buffer;might -+ * deallocate the scatterlist associated. Is a wrapper for unmap_dma_buf() of -+ * dma_buf_ops. -+ * @attach: [in] attachment to unmap buffer from -+ * @sg_table: [in] scatterlist info of the buffer to unmap -+ * @direction: [in] direction of DMA transfer -+ * -+ * This unmaps a DMA mapping for @attached obtained by dma_buf_map_attachment(). -+ */ -+void dma_buf_unmap_attachment(struct dma_buf_attachment *attach, -+ struct sg_table *sg_table, -+ enum dma_data_direction direction) ++static int rockchip_dmcfreq_get_cur_freq(struct device *dev, ++ unsigned long *freq) +{ -+ might_sleep(); -+ -+ if (WARN_ON(!attach || !attach->dmabuf || !sg_table)) -+ return; -+ -+ if (dma_buf_attachment_is_dynamic(attach)) -+ dma_resv_assert_held(attach->dmabuf->resv); ++ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev); + -+ if (attach->sgt == sg_table) -+ return; ++ *freq = dmcfreq->rate; + -+ if (dma_buf_is_dynamic(attach->dmabuf)) -+ dma_resv_assert_held(attach->dmabuf->resv); ++ return 0; ++} + -+ __unmap_dma_buf(attach, sg_table, direction); ++static struct devfreq_dev_profile rockchip_devfreq_dmc_profile = { ++ .polling_ms = 50, ++ .target = rockchip_dmcfreq_target, ++ .get_dev_status = rockchip_dmcfreq_get_dev_status, ++ .get_cur_freq = rockchip_dmcfreq_get_cur_freq, ++ .is_cooling_device = true, ++}; + -+ if (dma_buf_is_dynamic(attach->dmabuf) && -+ !IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) -+ dma_buf_unpin(attach); ++static inline void reset_last_status(struct devfreq *devfreq) ++{ ++ devfreq->last_status.total_time = 1; ++ devfreq->last_status.busy_time = 1; +} -+EXPORT_SYMBOL_NS_GPL(dma_buf_unmap_attachment, DMA_BUF); + -+/** -+ * dma_buf_unmap_attachment_unlocked - unmaps and decreases usecount of the buffer;might -+ * deallocate the scatterlist associated. Is a wrapper for unmap_dma_buf() of -+ * dma_buf_ops. -+ * @attach: [in] attachment to unmap buffer from -+ * @sg_table: [in] scatterlist info of the buffer to unmap -+ * @direction: [in] direction of DMA transfer -+ * -+ * Unlocked variant of dma_buf_unmap_attachment(). -+ */ -+void dma_buf_unmap_attachment_unlocked(struct dma_buf_attachment *attach, -+ struct sg_table *sg_table, -+ enum dma_data_direction direction) ++static void of_get_px30_timings(struct device *dev, ++ struct device_node *np, uint32_t *timing) +{ -+ might_sleep(); ++ struct device_node *np_tim; ++ u32 *p; ++ struct px30_ddr_dts_config_timing *dts_timing; ++ struct rk3328_ddr_de_skew_setting *de_skew; ++ int ret = 0; ++ u32 i; + -+ if (WARN_ON(!attach || !attach->dmabuf || !sg_table)) -+ return; ++ dts_timing = ++ (struct px30_ddr_dts_config_timing *)(timing + ++ DTS_PAR_OFFSET / 4); + -+ dma_resv_lock(attach->dmabuf->resv, NULL); -+ dma_buf_unmap_attachment(attach, sg_table, direction); -+ dma_resv_unlock(attach->dmabuf->resv); ++ np_tim = of_parse_phandle(np, "ddr_timing", 0); ++ if (!np_tim) { ++ ret = -EINVAL; ++ goto end; ++ } ++ de_skew = kmalloc(sizeof(*de_skew), GFP_KERNEL); ++ if (!de_skew) { ++ ret = -ENOMEM; ++ goto end; ++ } ++ p = (u32 *)dts_timing; ++ for (i = 0; i < ARRAY_SIZE(px30_dts_timing); i++) { ++ ret |= of_property_read_u32(np_tim, px30_dts_timing[i], ++ p + i); ++ } ++ p = (u32 *)de_skew->ca_de_skew; ++ for (i = 0; i < ARRAY_SIZE(rk3328_dts_ca_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rk3328_dts_ca_timing[i], ++ p + i); ++ } ++ p = (u32 *)de_skew->cs0_de_skew; ++ for (i = 0; i < ARRAY_SIZE(rk3328_dts_cs0_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rk3328_dts_cs0_timing[i], ++ p + i); ++ } ++ p = (u32 *)de_skew->cs1_de_skew; ++ for (i = 0; i < ARRAY_SIZE(rk3328_dts_cs1_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rk3328_dts_cs1_timing[i], ++ p + i); ++ } ++ if (!ret) ++ px30_de_skew_set_2_reg(de_skew, dts_timing); ++ kfree(de_skew); ++end: ++ if (!ret) { ++ dts_timing->available = 1; ++ } else { ++ dts_timing->available = 0; ++ dev_err(dev, "of_get_ddr_timings: fail\n"); ++ } ++ ++ of_node_put(np_tim); +} -+EXPORT_SYMBOL_NS_GPL(dma_buf_unmap_attachment_unlocked, DMA_BUF); + -+/** -+ * dma_buf_move_notify - notify attachments that DMA-buf is moving -+ * -+ * @dmabuf: [in] buffer which is moving -+ * -+ * Informs all attachments that they need to destroy and recreate all their -+ * mappings. -+ */ -+void dma_buf_move_notify(struct dma_buf *dmabuf) ++static void of_get_rk1808_timings(struct device *dev, ++ struct device_node *np, uint32_t *timing) +{ -+ struct dma_buf_attachment *attach; -+ -+ dma_resv_assert_held(dmabuf->resv); ++ struct device_node *np_tim; ++ u32 *p; ++ struct rk1808_ddr_dts_config_timing *dts_timing; ++ int ret = 0; ++ u32 i; + -+ list_for_each_entry(attach, &dmabuf->attachments, node) -+ if (attach->importer_ops) -+ attach->importer_ops->move_notify(attach); -+} -+EXPORT_SYMBOL_NS_GPL(dma_buf_move_notify, DMA_BUF); ++ dts_timing = ++ (struct rk1808_ddr_dts_config_timing *)(timing + ++ DTS_PAR_OFFSET / 4); + -+/** -+ * DOC: cpu access -+ * -+ * There are multiple reasons for supporting CPU access to a dma buffer object: -+ * -+ * - Fallback operations in the kernel, for example when a device is connected -+ * over USB and the kernel needs to shuffle the data around first before -+ * sending it away. Cache coherency is handled by bracketing any transactions -+ * with calls to dma_buf_begin_cpu_access() and dma_buf_end_cpu_access() -+ * access. -+ * -+ * Since for most kernel internal dma-buf accesses need the entire buffer, a -+ * vmap interface is introduced. Note that on very old 32-bit architectures -+ * vmalloc space might be limited and result in vmap calls failing. -+ * -+ * Interfaces:: -+ * -+ * void \*dma_buf_vmap(struct dma_buf \*dmabuf, struct iosys_map \*map) -+ * void dma_buf_vunmap(struct dma_buf \*dmabuf, struct iosys_map \*map) -+ * -+ * The vmap call can fail if there is no vmap support in the exporter, or if -+ * it runs out of vmalloc space. Note that the dma-buf layer keeps a reference -+ * count for all vmap access and calls down into the exporter's vmap function -+ * only when no vmapping exists, and only unmaps it once. Protection against -+ * concurrent vmap/vunmap calls is provided by taking the &dma_buf.lock mutex. -+ * -+ * - For full compatibility on the importer side with existing userspace -+ * interfaces, which might already support mmap'ing buffers. This is needed in -+ * many processing pipelines (e.g. feeding a software rendered image into a -+ * hardware pipeline, thumbnail creation, snapshots, ...). Also, Android's ION -+ * framework already supported this and for DMA buffer file descriptors to -+ * replace ION buffers mmap support was needed. -+ * -+ * There is no special interfaces, userspace simply calls mmap on the dma-buf -+ * fd. But like for CPU access there's a need to bracket the actual access, -+ * which is handled by the ioctl (DMA_BUF_IOCTL_SYNC). Note that -+ * DMA_BUF_IOCTL_SYNC can fail with -EAGAIN or -EINTR, in which case it must -+ * be restarted. -+ * -+ * Some systems might need some sort of cache coherency management e.g. when -+ * CPU and GPU domains are being accessed through dma-buf at the same time. -+ * To circumvent this problem there are begin/end coherency markers, that -+ * forward directly to existing dma-buf device drivers vfunc hooks. Userspace -+ * can make use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The -+ * sequence would be used like following: -+ * -+ * - mmap dma-buf fd -+ * - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2. read/write -+ * to mmap area 3. SYNC_END ioctl. This can be repeated as often as you -+ * want (with the new data being consumed by say the GPU or the scanout -+ * device) -+ * - munmap once you don't need the buffer any more -+ * -+ * For correctness and optimal performance, it is always required to use -+ * SYNC_START and SYNC_END before and after, respectively, when accessing the -+ * mapped address. Userspace cannot rely on coherent access, even when there -+ * are systems where it just works without calling these ioctls. -+ * -+ * - And as a CPU fallback in userspace processing pipelines. -+ * -+ * Similar to the motivation for kernel cpu access it is again important that -+ * the userspace code of a given importing subsystem can use the same -+ * interfaces with a imported dma-buf buffer object as with a native buffer -+ * object. This is especially important for drm where the userspace part of -+ * contemporary OpenGL, X, and other drivers is huge, and reworking them to -+ * use a different way to mmap a buffer rather invasive. -+ * -+ * The assumption in the current dma-buf interfaces is that redirecting the -+ * initial mmap is all that's needed. A survey of some of the existing -+ * subsystems shows that no driver seems to do any nefarious thing like -+ * syncing up with outstanding asynchronous processing on the device or -+ * allocating special resources at fault time. So hopefully this is good -+ * enough, since adding interfaces to intercept pagefaults and allow pte -+ * shootdowns would increase the complexity quite a bit. -+ * -+ * Interface:: -+ * -+ * int dma_buf_mmap(struct dma_buf \*, struct vm_area_struct \*, -+ * unsigned long); -+ * -+ * If the importing subsystem simply provides a special-purpose mmap call to -+ * set up a mapping in userspace, calling do_mmap with &dma_buf.file will -+ * equally achieve that for a dma-buf object. -+ */ ++ np_tim = of_parse_phandle(np, "ddr_timing", 0); ++ if (!np_tim) { ++ ret = -EINVAL; ++ goto end; ++ } + -+static int __dma_buf_begin_cpu_access(struct dma_buf *dmabuf, -+ enum dma_data_direction direction) -+{ -+ bool write = (direction == DMA_BIDIRECTIONAL || -+ direction == DMA_TO_DEVICE); -+ struct dma_resv *resv = dmabuf->resv; -+ long ret; ++ p = (u32 *)dts_timing; ++ for (i = 0; i < ARRAY_SIZE(px30_dts_timing); i++) { ++ ret |= of_property_read_u32(np_tim, px30_dts_timing[i], ++ p + i); ++ } ++ p = (u32 *)dts_timing->ca_de_skew; ++ for (i = 0; i < ARRAY_SIZE(rk1808_dts_ca_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rk1808_dts_ca_timing[i], ++ p + i); ++ } ++ p = (u32 *)dts_timing->cs0_a_de_skew; ++ for (i = 0; i < ARRAY_SIZE(rk1808_dts_cs0_a_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rk1808_dts_cs0_a_timing[i], ++ p + i); ++ } ++ p = (u32 *)dts_timing->cs0_b_de_skew; ++ for (i = 0; i < ARRAY_SIZE(rk1808_dts_cs0_b_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rk1808_dts_cs0_b_timing[i], ++ p + i); ++ } ++ p = (u32 *)dts_timing->cs1_a_de_skew; ++ for (i = 0; i < ARRAY_SIZE(rk1808_dts_cs1_a_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rk1808_dts_cs1_a_timing[i], ++ p + i); ++ } ++ p = (u32 *)dts_timing->cs1_b_de_skew; ++ for (i = 0; i < ARRAY_SIZE(rk1808_dts_cs1_b_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rk1808_dts_cs1_b_timing[i], ++ p + i); ++ } + -+ /* Wait on any implicit rendering fences */ -+ ret = dma_resv_wait_timeout(resv, dma_resv_usage_rw(write), -+ true, MAX_SCHEDULE_TIMEOUT); -+ if (ret < 0) -+ return ret; ++end: ++ if (!ret) { ++ dts_timing->available = 1; ++ } else { ++ dts_timing->available = 0; ++ dev_err(dev, "of_get_ddr_timings: fail\n"); ++ } + -+ return 0; ++ of_node_put(np_tim); +} + -+/** -+ * dma_buf_begin_cpu_access - Must be called before accessing a dma_buf from the -+ * cpu in the kernel context. Calls begin_cpu_access to allow exporter-specific -+ * preparations. Coherency is only guaranteed in the specified range for the -+ * specified access direction. -+ * @dmabuf: [in] buffer to prepare cpu access for. -+ * @direction: [in] direction of access. -+ * -+ * After the cpu access is complete the caller should call -+ * dma_buf_end_cpu_access(). Only when cpu access is bracketed by both calls is -+ * it guaranteed to be coherent with other DMA access. -+ * -+ * This function will also wait for any DMA transactions tracked through -+ * implicit synchronization in &dma_buf.resv. For DMA transactions with explicit -+ * synchronization this function will only ensure cache coherency, callers must -+ * ensure synchronization with such DMA transactions on their own. -+ * -+ * Can return negative error values, returns 0 on success. -+ */ -+int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, -+ enum dma_data_direction direction) ++static void of_get_rk3128_timings(struct device *dev, ++ struct device_node *np, uint32_t *timing) +{ ++ struct device_node *np_tim; ++ u32 *p; ++ struct rk3128_ddr_dts_config_timing *dts_timing; ++ struct share_params *init_timing; + int ret = 0; ++ u32 i; + -+ if (WARN_ON(!dmabuf)) -+ return -EINVAL; -+ -+ might_lock(&dmabuf->resv->lock.base); ++ init_timing = (struct share_params *)timing; + -+ if (dmabuf->ops->begin_cpu_access) -+ ret = dmabuf->ops->begin_cpu_access(dmabuf, direction); ++ if (of_property_read_u32(np, "vop-dclk-mode", ++ &init_timing->vop_dclk_mode)) ++ init_timing->vop_dclk_mode = 0; + -+ /* Ensure that all fences are waited upon - but we first allow -+ * the native handler the chance to do so more efficiently if it -+ * chooses. A double invocation here will be reasonably cheap no-op. -+ */ -+ if (ret == 0) -+ ret = __dma_buf_begin_cpu_access(dmabuf, direction); ++ p = timing + DTS_PAR_OFFSET / 4; ++ np_tim = of_parse_phandle(np, "rockchip,ddr_timing", 0); ++ if (!np_tim) { ++ ret = -EINVAL; ++ goto end; ++ } ++ for (i = 0; i < ARRAY_SIZE(rk3128_dts_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rk3128_dts_timing[i], ++ p + i); ++ } ++end: ++ dts_timing = ++ (struct rk3128_ddr_dts_config_timing *)(timing + ++ DTS_PAR_OFFSET / 4); ++ if (!ret) { ++ dts_timing->available = 1; ++ } else { ++ dts_timing->available = 0; ++ dev_err(dev, "of_get_ddr_timings: fail\n"); ++ } + -+ return ret; ++ of_node_put(np_tim); +} -+EXPORT_SYMBOL_NS_GPL(dma_buf_begin_cpu_access, DMA_BUF); + -+/** -+ * dma_buf_end_cpu_access - Must be called after accessing a dma_buf from the -+ * cpu in the kernel context. Calls end_cpu_access to allow exporter-specific -+ * actions. Coherency is only guaranteed in the specified range for the -+ * specified access direction. -+ * @dmabuf: [in] buffer to complete cpu access for. -+ * @direction: [in] direction of access. -+ * -+ * This terminates CPU access started with dma_buf_begin_cpu_access(). -+ * -+ * Can return negative error values, returns 0 on success. -+ */ -+int dma_buf_end_cpu_access(struct dma_buf *dmabuf, -+ enum dma_data_direction direction) ++static uint32_t of_get_rk3228_timings(struct device *dev, ++ struct device_node *np, uint32_t *timing) +{ ++ struct device_node *np_tim; ++ u32 *p; + int ret = 0; ++ u32 i; + -+ WARN_ON(!dmabuf); -+ -+ might_lock(&dmabuf->resv->lock.base); -+ -+ if (dmabuf->ops->end_cpu_access) -+ ret = dmabuf->ops->end_cpu_access(dmabuf, direction); ++ p = timing + DTS_PAR_OFFSET / 4; ++ np_tim = of_parse_phandle(np, "rockchip,dram_timing", 0); ++ if (!np_tim) { ++ ret = -EINVAL; ++ goto end; ++ } ++ for (i = 0; i < ARRAY_SIZE(rk3228_dts_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rk3228_dts_timing[i], ++ p + i); ++ } ++end: ++ if (ret) ++ dev_err(dev, "of_get_ddr_timings: fail\n"); + ++ of_node_put(np_tim); + return ret; +} -+EXPORT_SYMBOL_NS_GPL(dma_buf_end_cpu_access, DMA_BUF); -+ + -+/** -+ * dma_buf_mmap - Setup up a userspace mmap with the given vma -+ * @dmabuf: [in] buffer that should back the vma -+ * @vma: [in] vma for the mmap -+ * @pgoff: [in] offset in pages where this mmap should start within the -+ * dma-buf buffer. -+ * -+ * This function adjusts the passed in vma so that it points at the file of the -+ * dma_buf operation. It also adjusts the starting pgoff and does bounds -+ * checking on the size of the vma. Then it calls the exporters mmap function to -+ * set up the mapping. -+ * -+ * Can return negative error values, returns 0 on success. -+ */ -+int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma, -+ unsigned long pgoff) ++static void of_get_rk3288_timings(struct device *dev, ++ struct device_node *np, uint32_t *timing) +{ -+ if (WARN_ON(!dmabuf || !vma)) -+ return -EINVAL; -+ -+ /* check if buffer supports mmap */ -+ if (!dmabuf->ops->mmap) -+ return -EINVAL; ++ struct device_node *np_tim; ++ u32 *p; ++ struct rk3288_ddr_dts_config_timing *dts_timing; ++ struct share_params *init_timing; ++ int ret = 0; ++ u32 i; + -+ /* check for offset overflow */ -+ if (pgoff + vma_pages(vma) < pgoff) -+ return -EOVERFLOW; ++ init_timing = (struct share_params *)timing; + -+ /* check for overflowing the buffer's size */ -+ if (pgoff + vma_pages(vma) > -+ dmabuf->size >> PAGE_SHIFT) -+ return -EINVAL; ++ if (of_property_read_u32(np, "vop-dclk-mode", ++ &init_timing->vop_dclk_mode)) ++ init_timing->vop_dclk_mode = 0; + -+ /* readjust the vma */ -+ vma_set_file(vma, dmabuf->file); -+ vma->vm_pgoff = pgoff; ++ p = timing + DTS_PAR_OFFSET / 4; ++ np_tim = of_parse_phandle(np, "rockchip,ddr_timing", 0); ++ if (!np_tim) { ++ ret = -EINVAL; ++ goto end; ++ } ++ for (i = 0; i < ARRAY_SIZE(rk3288_dts_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rk3288_dts_timing[i], ++ p + i); ++ } ++end: ++ dts_timing = ++ (struct rk3288_ddr_dts_config_timing *)(timing + ++ DTS_PAR_OFFSET / 4); ++ if (!ret) { ++ dts_timing->available = 1; ++ } else { ++ dts_timing->available = 0; ++ dev_err(dev, "of_get_ddr_timings: fail\n"); ++ } + -+ return dmabuf->ops->mmap(dmabuf, vma); ++ of_node_put(np_tim); +} -+EXPORT_SYMBOL_NS_GPL(dma_buf_mmap, DMA_BUF); + -+/** -+ * dma_buf_vmap - Create virtual mapping for the buffer object into kernel -+ * address space. Same restrictions as for vmap and friends apply. -+ * @dmabuf: [in] buffer to vmap -+ * @map: [out] returns the vmap pointer -+ * -+ * This call may fail due to lack of virtual mapping address space. -+ * These calls are optional in drivers. The intended use for them -+ * is for mapping objects linear in kernel space for high use objects. -+ * -+ * To ensure coherency users must call dma_buf_begin_cpu_access() and -+ * dma_buf_end_cpu_access() around any cpu access performed through this -+ * mapping. -+ * -+ * Returns 0 on success, or a negative errno code otherwise. -+ */ -+int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map) ++static void of_get_rk3328_timings(struct device *dev, ++ struct device_node *np, uint32_t *timing) +{ -+ struct iosys_map ptr; ++ struct device_node *np_tim; ++ u32 *p; ++ struct rk3328_ddr_dts_config_timing *dts_timing; ++ struct rk3328_ddr_de_skew_setting *de_skew; + int ret = 0; ++ u32 i; + -+ iosys_map_clear(map); -+ -+ if (WARN_ON(!dmabuf)) -+ return -EINVAL; -+ -+ if (!dmabuf->ops->vmap) -+ return -EINVAL; ++ dts_timing = ++ (struct rk3328_ddr_dts_config_timing *)(timing + ++ DTS_PAR_OFFSET / 4); + -+ mutex_lock(&dmabuf->lock); -+ if (dmabuf->vmapping_counter) { -+ dmabuf->vmapping_counter++; -+ BUG_ON(iosys_map_is_null(&dmabuf->vmap_ptr)); -+ *map = dmabuf->vmap_ptr; -+ goto out_unlock; ++ np_tim = of_parse_phandle(np, "ddr_timing", 0); ++ if (!np_tim) { ++ ret = -EINVAL; ++ goto end; ++ } ++ de_skew = kmalloc(sizeof(*de_skew), GFP_KERNEL); ++ if (!de_skew) { ++ ret = -ENOMEM; ++ goto end; ++ } ++ p = (u32 *)dts_timing; ++ for (i = 0; i < ARRAY_SIZE(rk3328_dts_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rk3328_dts_timing[i], ++ p + i); ++ } ++ p = (u32 *)de_skew->ca_de_skew; ++ for (i = 0; i < ARRAY_SIZE(rk3328_dts_ca_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rk3328_dts_ca_timing[i], ++ p + i); ++ } ++ p = (u32 *)de_skew->cs0_de_skew; ++ for (i = 0; i < ARRAY_SIZE(rk3328_dts_cs0_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rk3328_dts_cs0_timing[i], ++ p + i); ++ } ++ p = (u32 *)de_skew->cs1_de_skew; ++ for (i = 0; i < ARRAY_SIZE(rk3328_dts_cs1_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rk3328_dts_cs1_timing[i], ++ p + i); ++ } ++ if (!ret) ++ rk3328_de_skew_setting_2_register(de_skew, dts_timing); ++ kfree(de_skew); ++end: ++ if (!ret) { ++ dts_timing->available = 1; ++ } else { ++ dts_timing->available = 0; ++ dev_err(dev, "of_get_ddr_timings: fail\n"); + } + -+ BUG_ON(iosys_map_is_set(&dmabuf->vmap_ptr)); -+ -+ ret = dmabuf->ops->vmap(dmabuf, &ptr); -+ if (WARN_ON_ONCE(ret)) -+ goto out_unlock; -+ -+ dmabuf->vmap_ptr = ptr; -+ dmabuf->vmapping_counter = 1; -+ -+ *map = dmabuf->vmap_ptr; -+ -+out_unlock: -+ mutex_unlock(&dmabuf->lock); -+ return ret; ++ of_node_put(np_tim); +} -+EXPORT_SYMBOL_NS_GPL(dma_buf_vmap, DMA_BUF); + -+/** -+ * dma_buf_vmap_unlocked - Create virtual mapping for the buffer object into kernel -+ * address space. Same restrictions as for vmap and friends apply. -+ * @dmabuf: [in] buffer to vmap -+ * @map: [out] returns the vmap pointer -+ * -+ * Unlocked version of dma_buf_vmap() -+ * -+ * Returns 0 on success, or a negative errno code otherwise. -+ */ -+int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map) ++static void of_get_rv1126_timings(struct device *dev, ++ struct device_node *np, uint32_t *timing) +{ -+ int ret; ++ struct device_node *np_tim; ++ u32 *p; ++ struct rk1808_ddr_dts_config_timing *dts_timing; ++ int ret = 0; ++ u32 i; + -+ iosys_map_clear(map); ++ dts_timing = ++ (struct rk1808_ddr_dts_config_timing *)(timing + ++ DTS_PAR_OFFSET / 4); + -+ if (WARN_ON(!dmabuf)) -+ return -EINVAL; ++ np_tim = of_parse_phandle(np, "ddr_timing", 0); ++ if (!np_tim) { ++ ret = -EINVAL; ++ goto end; ++ } + -+ dma_resv_lock(dmabuf->resv, NULL); -+ ret = dma_buf_vmap(dmabuf, map); -+ dma_resv_unlock(dmabuf->resv); ++ p = (u32 *)dts_timing; ++ for (i = 0; i < ARRAY_SIZE(px30_dts_timing); i++) { ++ ret |= of_property_read_u32(np_tim, px30_dts_timing[i], ++ p + i); ++ } ++ p = (u32 *)dts_timing->ca_de_skew; ++ for (i = 0; i < ARRAY_SIZE(rv1126_dts_ca_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rv1126_dts_ca_timing[i], ++ p + i); ++ } ++ p = (u32 *)dts_timing->cs0_a_de_skew; ++ for (i = 0; i < ARRAY_SIZE(rv1126_dts_cs0_a_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rv1126_dts_cs0_a_timing[i], ++ p + i); ++ } ++ p = (u32 *)dts_timing->cs0_b_de_skew; ++ for (i = 0; i < ARRAY_SIZE(rv1126_dts_cs0_b_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rv1126_dts_cs0_b_timing[i], ++ p + i); ++ } ++ p = (u32 *)dts_timing->cs1_a_de_skew; ++ for (i = 0; i < ARRAY_SIZE(rv1126_dts_cs1_a_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rv1126_dts_cs1_a_timing[i], ++ p + i); ++ } ++ p = (u32 *)dts_timing->cs1_b_de_skew; ++ for (i = 0; i < ARRAY_SIZE(rv1126_dts_cs1_b_timing); i++) { ++ ret |= of_property_read_u32(np_tim, rv1126_dts_cs1_b_timing[i], ++ p + i); ++ } + -+ return ret; ++end: ++ if (!ret) { ++ dts_timing->available = 1; ++ } else { ++ dts_timing->available = 0; ++ dev_err(dev, "of_get_ddr_timings: fail\n"); ++ } ++ ++ of_node_put(np_tim); +} -+EXPORT_SYMBOL_NS_GPL(dma_buf_vmap_unlocked, DMA_BUF); + -+/** -+ * dma_buf_vunmap - Unmap a vmap obtained by dma_buf_vmap. -+ * @dmabuf: [in] buffer to vunmap -+ * @map: [in] vmap pointer to vunmap -+ */ -+void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map) ++static struct rk3399_dram_timing *of_get_rk3399_timings(struct device *dev, ++ struct device_node *np) +{ -+ if (WARN_ON(!dmabuf)) -+ return; ++ struct rk3399_dram_timing *timing = NULL; ++ struct device_node *np_tim; ++ int ret; + -+ BUG_ON(iosys_map_is_null(&dmabuf->vmap_ptr)); -+ BUG_ON(dmabuf->vmapping_counter == 0); -+ BUG_ON(!iosys_map_is_equal(&dmabuf->vmap_ptr, map)); ++ np_tim = of_parse_phandle(np, "ddr_timing", 0); ++ if (np_tim) { ++ timing = devm_kzalloc(dev, sizeof(*timing), GFP_KERNEL); ++ if (!timing) ++ goto err; + -+ mutex_lock(&dmabuf->lock); -+ if (--dmabuf->vmapping_counter == 0) { -+ if (dmabuf->ops->vunmap) -+ dmabuf->ops->vunmap(dmabuf, map); -+ iosys_map_clear(&dmabuf->vmap_ptr); ++ ret = of_property_read_u32(np_tim, "ddr3_speed_bin", ++ &timing->ddr3_speed_bin); ++ ret |= of_property_read_u32(np_tim, "pd_idle", ++ &timing->pd_idle); ++ ret |= of_property_read_u32(np_tim, "sr_idle", ++ &timing->sr_idle); ++ ret |= of_property_read_u32(np_tim, "sr_mc_gate_idle", ++ &timing->sr_mc_gate_idle); ++ ret |= of_property_read_u32(np_tim, "srpd_lite_idle", ++ &timing->srpd_lite_idle); ++ ret |= of_property_read_u32(np_tim, "standby_idle", ++ &timing->standby_idle); ++ ret |= of_property_read_u32(np_tim, "auto_lp_dis_freq", ++ &timing->auto_lp_dis_freq); ++ ret |= of_property_read_u32(np_tim, "ddr3_dll_dis_freq", ++ &timing->ddr3_dll_dis_freq); ++ ret |= of_property_read_u32(np_tim, "phy_dll_dis_freq", ++ &timing->phy_dll_dis_freq); ++ ret |= of_property_read_u32(np_tim, "ddr3_odt_dis_freq", ++ &timing->ddr3_odt_dis_freq); ++ ret |= of_property_read_u32(np_tim, "ddr3_drv", ++ &timing->ddr3_drv); ++ ret |= of_property_read_u32(np_tim, "ddr3_odt", ++ &timing->ddr3_odt); ++ ret |= of_property_read_u32(np_tim, "phy_ddr3_ca_drv", ++ &timing->phy_ddr3_ca_drv); ++ ret |= of_property_read_u32(np_tim, "phy_ddr3_dq_drv", ++ &timing->phy_ddr3_dq_drv); ++ ret |= of_property_read_u32(np_tim, "phy_ddr3_odt", ++ &timing->phy_ddr3_odt); ++ ret |= of_property_read_u32(np_tim, "lpddr3_odt_dis_freq", ++ &timing->lpddr3_odt_dis_freq); ++ ret |= of_property_read_u32(np_tim, "lpddr3_drv", ++ &timing->lpddr3_drv); ++ ret |= of_property_read_u32(np_tim, "lpddr3_odt", ++ &timing->lpddr3_odt); ++ ret |= of_property_read_u32(np_tim, "phy_lpddr3_ca_drv", ++ &timing->phy_lpddr3_ca_drv); ++ ret |= of_property_read_u32(np_tim, "phy_lpddr3_dq_drv", ++ &timing->phy_lpddr3_dq_drv); ++ ret |= of_property_read_u32(np_tim, "phy_lpddr3_odt", ++ &timing->phy_lpddr3_odt); ++ ret |= of_property_read_u32(np_tim, "lpddr4_odt_dis_freq", ++ &timing->lpddr4_odt_dis_freq); ++ ret |= of_property_read_u32(np_tim, "lpddr4_drv", ++ &timing->lpddr4_drv); ++ ret |= of_property_read_u32(np_tim, "lpddr4_dq_odt", ++ &timing->lpddr4_dq_odt); ++ ret |= of_property_read_u32(np_tim, "lpddr4_ca_odt", ++ &timing->lpddr4_ca_odt); ++ ret |= of_property_read_u32(np_tim, "phy_lpddr4_ca_drv", ++ &timing->phy_lpddr4_ca_drv); ++ ret |= of_property_read_u32(np_tim, "phy_lpddr4_ck_cs_drv", ++ &timing->phy_lpddr4_ck_cs_drv); ++ ret |= of_property_read_u32(np_tim, "phy_lpddr4_dq_drv", ++ &timing->phy_lpddr4_dq_drv); ++ ret |= of_property_read_u32(np_tim, "phy_lpddr4_odt", ++ &timing->phy_lpddr4_odt); ++ if (ret) { ++ devm_kfree(dev, timing); ++ goto err; ++ } ++ of_node_put(np_tim); ++ return timing; + } -+ mutex_unlock(&dmabuf->lock); ++ ++err: ++ if (timing) { ++ devm_kfree(dev, timing); ++ timing = NULL; ++ } ++ of_node_put(np_tim); ++ return timing; +} -+EXPORT_SYMBOL_NS_GPL(dma_buf_vunmap, DMA_BUF); + -+/** -+ * dma_buf_vunmap_unlocked - Unmap a vmap obtained by dma_buf_vmap. -+ * @dmabuf: [in] buffer to vunmap -+ * @map: [in] vmap pointer to vunmap -+ */ -+void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map) ++static int rockchip_ddr_set_auto_self_refresh(uint32_t en) +{ -+ if (WARN_ON(!dmabuf)) -+ return; ++ struct arm_smccc_res res; + -+ dma_resv_lock(dmabuf->resv, NULL); -+ dma_buf_vunmap(dmabuf, map); -+ dma_resv_unlock(dmabuf->resv); ++ ddr_psci_param->sr_idle_en = en; ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_SET_AT_SR); ++ ++ return res.a0; +} -+EXPORT_SYMBOL_NS_GPL(dma_buf_vunmap_unlocked, DMA_BUF); + -+#ifdef CONFIG_DEBUG_FS -+static int dma_buf_debug_show(struct seq_file *s, void *unused) -+{ -+ struct dma_buf *buf_obj; -+ struct dma_buf_attachment *attach_obj; -+ int count = 0, attach_count; -+ size_t size = 0; -+ int ret; ++struct dmcfreq_wait_ctrl_t { ++ wait_queue_head_t wait_wq; ++ int complt_irq; ++ int wait_flag; ++ int wait_en; ++ int wait_time_out_ms; ++ int dcf_en; ++ struct regmap *regmap_dcf; ++}; + -+ ret = mutex_lock_interruptible(&db_list.lock); ++static struct dmcfreq_wait_ctrl_t wait_ctrl; + -+ if (ret) -+ return ret; ++static irqreturn_t wait_complete_irq(int irqno, void *dev_id) ++{ ++ struct dmcfreq_wait_ctrl_t *ctrl = dev_id; + -+ seq_puts(s, "\nDma-buf Objects:\n"); -+ seq_printf(s, "%-8s\t%-8s\t%-8s\t%-8s\texp_name\t%-8s\tname\n", -+ "size", "flags", "mode", "count", "ino"); ++ ctrl->wait_flag = 0; ++ wake_up(&ctrl->wait_wq); ++ return IRQ_HANDLED; ++} + -+ list_for_each_entry(buf_obj, &db_list.head, list_node) { ++static irqreturn_t wait_dcf_complete_irq(int irqno, void *dev_id) ++{ ++ struct arm_smccc_res res; ++ struct dmcfreq_wait_ctrl_t *ctrl = dev_id; + -+ ret = dma_resv_lock_interruptible(buf_obj->resv, NULL); -+ if (ret) -+ goto error_unlock; ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_POST_SET_RATE); ++ if (res.a0) ++ pr_err("%s: dram post set rate error:%lx\n", __func__, res.a0); + ++ ctrl->wait_flag = 0; ++ wake_up(&ctrl->wait_wq); ++ return IRQ_HANDLED; ++} + -+ spin_lock(&buf_obj->name_lock); -+ seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\t%08lu\t%s\n", -+ buf_obj->size, -+ buf_obj->file->f_flags, buf_obj->file->f_mode, -+ file_count(buf_obj->file), -+ buf_obj->exp_name, -+ file_inode(buf_obj->file)->i_ino, -+ buf_obj->name ?: ""); -+ spin_unlock(&buf_obj->name_lock); ++int rockchip_dmcfreq_wait_complete(void) ++{ ++ struct arm_smccc_res res; + -+ dma_resv_describe(buf_obj->resv, s); ++ if (!wait_ctrl.wait_en) { ++ pr_err("%s: Do not support time out!\n", __func__); ++ return 0; ++ } ++ wait_ctrl.wait_flag = -1; + -+ seq_puts(s, "\tAttached Devices:\n"); -+ attach_count = 0; ++ enable_irq(wait_ctrl.complt_irq); ++ /* ++ * CPUs only enter WFI when idle to make sure that ++ * FIQn can quick response. ++ */ ++ cpu_latency_qos_update_request(&pm_qos, 0); + -+ list_for_each_entry(attach_obj, &buf_obj->attachments, node) { -+ seq_printf(s, "\t%s\n", dev_name(attach_obj->dev)); -+ attach_count++; ++ if (wait_ctrl.dcf_en == 1) { ++ /* start dcf */ ++ regmap_update_bits(wait_ctrl.regmap_dcf, 0x0, 0x1, 0x1); ++ } else if (wait_ctrl.dcf_en == 2) { ++ res = sip_smc_dram(0, 0, ROCKCHIP_SIP_CONFIG_MCU_START); ++ if (res.a0) { ++ pr_err("rockchip_sip_config_mcu_start error:%lx\n", res.a0); ++ return -ENOMEM; + } -+ dma_resv_unlock(buf_obj->resv); ++ } + -+ seq_printf(s, "Total %d devices attached\n\n", -+ attach_count); ++ wait_event_timeout(wait_ctrl.wait_wq, (wait_ctrl.wait_flag == 0), ++ msecs_to_jiffies(wait_ctrl.wait_time_out_ms)); + -+ count++; -+ size += buf_obj->size; ++ /* ++ * If waiting for wait_ctrl.complt_irq times out, clear the IRQ and stop the MCU by ++ * sip_smc_dram(DRAM_POST_SET_RATE). ++ */ ++ if (wait_ctrl.dcf_en == 2 && wait_ctrl.wait_flag != 0) { ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ROCKCHIP_SIP_CONFIG_DRAM_POST_SET_RATE); ++ if (res.a0) ++ pr_err("%s: dram post set rate error:%lx\n", __func__, res.a0); + } + -+ seq_printf(s, "\nTotal %d objects, %zu bytes\n", count, size); ++ cpu_latency_qos_update_request(&pm_qos, PM_QOS_DEFAULT_VALUE); ++ disable_irq(wait_ctrl.complt_irq); + -+ mutex_unlock(&db_list.lock); + return 0; -+ -+error_unlock: -+ mutex_unlock(&db_list.lock); -+ return ret; +} + -+DEFINE_SHOW_ATTRIBUTE(dma_buf_debug); ++static __maybe_unused int rockchip_get_freq_info(struct rockchip_dmcfreq *dmcfreq) ++{ ++ struct arm_smccc_res res; ++ struct dev_pm_opp *opp; ++ struct dmc_freq_table *freq_table; ++ unsigned long rate; ++ int i, j, count, ret = 0; + -+static struct dentry *dma_buf_debugfs_dir; ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_GET_FREQ_INFO); ++ if (res.a0) { ++ dev_err(dmcfreq->dev, "rockchip_sip_config_dram_get_freq_info error:%lx\n", ++ res.a0); ++ return -ENOMEM; ++ } + -+static int dma_buf_init_debugfs(void) -+{ -+ struct dentry *d; -+ int err = 0; ++ if (ddr_psci_param->freq_count == 0 || ddr_psci_param->freq_count > 6) { ++ dev_err(dmcfreq->dev, "it is no available frequencies!\n"); ++ return -EPERM; ++ } + -+ d = debugfs_create_dir("dma_buf", NULL); -+ if (IS_ERR(d)) -+ return PTR_ERR(d); ++ for (i = 0; i < ddr_psci_param->freq_count; i++) ++ dmcfreq->freq_info_rate[i] = ddr_psci_param->freq_info_mhz[i] * 1000000; ++ dmcfreq->freq_count = ddr_psci_param->freq_count; + -+ dma_buf_debugfs_dir = d; ++ /* update dmc_opp_table */ ++ count = dev_pm_opp_get_opp_count(dmcfreq->dev); ++ if (count <= 0) { ++ ret = count ? count : -ENODATA; ++ return ret; ++ } + -+ d = debugfs_create_file("bufinfo", S_IRUGO, dma_buf_debugfs_dir, -+ NULL, &dma_buf_debug_fops); -+ if (IS_ERR(d)) { -+ pr_debug("dma_buf: debugfs: failed to create node bufinfo\n"); -+ debugfs_remove_recursive(dma_buf_debugfs_dir); -+ dma_buf_debugfs_dir = NULL; -+ err = PTR_ERR(d); ++ freq_table = kzalloc(sizeof(*freq_table) * count, GFP_KERNEL); ++ for (i = 0, rate = 0; i < count; i++, rate++) { ++ /* find next rate */ ++ opp = dev_pm_opp_find_freq_ceil(dmcfreq->dev, &rate); ++ if (IS_ERR(opp)) { ++ ret = PTR_ERR(opp); ++ dev_err(dmcfreq->dev, "failed to find OPP for freq %lu.\n", rate); ++ goto out; ++ } ++ freq_table[i].freq = rate; ++ freq_table[i].supplies[0].u_volt = dev_pm_opp_get_voltage(opp); ++ dev_pm_opp_put(opp); ++ ++ for (j = 0; j < dmcfreq->freq_count; j++) { ++ if (rate == dmcfreq->freq_info_rate[j]) ++ break; ++ } ++ if (j == dmcfreq->freq_count) ++ dev_pm_opp_disable(dmcfreq->dev, rate); + } + -+ dma_buf_process_info_init_debugfs(dma_buf_debugfs_dir); -+ return err; -+} ++ for (i = 0; i < dmcfreq->freq_count; i++) { ++ for (j = 0; j < count; j++) { ++ if (dmcfreq->freq_info_rate[i] == freq_table[j].freq) { ++ break; ++ } else if (dmcfreq->freq_info_rate[i] < freq_table[j].freq) { ++ dev_pm_opp_add(dmcfreq->dev, dmcfreq->freq_info_rate[i], ++ freq_table[j].supplies[0].u_volt); ++ break; ++ } ++ } ++ if (j == count) { ++ dev_err(dmcfreq->dev, "failed to match dmc_opp_table for %ld\n", ++ dmcfreq->freq_info_rate[i]); ++ if (i == 0) ++ ret = -EPERM; ++ else ++ dmcfreq->freq_count = i; ++ goto out; ++ } ++ } + -+static void dma_buf_uninit_debugfs(void) -+{ -+ debugfs_remove_recursive(dma_buf_debugfs_dir); -+} -+#else -+static inline int dma_buf_init_debugfs(void) -+{ -+ return 0; -+} -+static inline void dma_buf_uninit_debugfs(void) -+{ ++out: ++ kfree(freq_table); ++ return ret; +} -+#endif + -+#ifdef CONFIG_DMABUF_PROCESS_INFO -+struct dma_buf *get_dma_buf_from_file(struct file *f) ++static __maybe_unused int ++rockchip_dmcfreq_adjust_opp_table(struct rockchip_dmcfreq *dmcfreq) +{ -+ if (IS_ERR_OR_NULL(f)) -+ return NULL; ++ struct device *dev = dmcfreq->dev; ++ struct arm_smccc_res res; ++ struct dev_pm_opp *opp; ++ struct opp_table *opp_table; ++ struct dmc_freq_table *freq_table; ++ int i, j, count = 0, ret = 0; + -+ if (!is_dma_buf_file(f)) -+ return NULL; ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_GET_FREQ_INFO); ++ if (res.a0) { ++ dev_err(dev, "rockchip_sip_config_dram_get_freq_info error:%lx\n", ++ res.a0); ++ return -ENOMEM; ++ } + -+ return f->private_data; -+} -+#endif /* CONFIG_DMABUF_PROCESS_INFO */ ++ if (ddr_psci_param->freq_count == 0 || ddr_psci_param->freq_count > 6) { ++ dev_err(dev, "there is no available frequencies!\n"); ++ return -EPERM; ++ } + -+static int __init dma_buf_init(void) -+{ -+ int ret; ++ for (i = 0; i < ddr_psci_param->freq_count; i++) ++ dmcfreq->freq_info_rate[i] = ddr_psci_param->freq_info_mhz[i] * 1000000; ++ dmcfreq->freq_count = ddr_psci_param->freq_count; + -+ ret = dma_buf_init_sysfs_statistics(); -+ if (ret) ++ count = dev_pm_opp_get_opp_count(dev); ++ if (count <= 0) { ++ dev_err(dev, "there is no available opp\n"); ++ ret = count ? count : -ENODATA; + return ret; ++ } + -+ dma_buf_mnt = kern_mount(&dma_buf_fs_type); -+ if (IS_ERR(dma_buf_mnt)) -+ return PTR_ERR(dma_buf_mnt); ++ freq_table = kzalloc(sizeof(*freq_table) * count, GFP_KERNEL); ++ opp_table = dev_pm_opp_get_opp_table(dev); ++ if (!opp_table) { ++ ret = -ENOMEM; ++ goto out; ++ } + -+ mutex_init(&db_list.lock); -+ INIT_LIST_HEAD(&db_list.head); -+ dma_buf_init_debugfs(); -+ dma_buf_process_info_init_procfs(); -+ return 0; -+} -+subsys_initcall(dma_buf_init); ++ mutex_lock(&opp_table->lock); ++ i = 0; ++ list_for_each_entry(opp, &opp_table->opp_list, node) { ++ if (!opp->available) ++ continue; + -+static void __exit dma_buf_deinit(void) -+{ -+ dma_buf_uninit_debugfs(); -+ kern_unmount(dma_buf_mnt); -+ dma_buf_uninit_sysfs_statistics(); -+ dma_buf_process_info_uninit_procfs(); -+} -+__exitcall(dma_buf_deinit); -diff --git a/drivers/dma-buf/dma-heap-rk.c b/drivers/dma-buf/dma-heap-rk.c -new file mode 100644 -index 000000000..a777e9185 ---- /dev/null -+++ b/drivers/dma-buf/dma-heap-rk.c -@@ -0,0 +1,340 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Framework for userspace DMA-BUF allocations -+ * -+ * Copyright (C) 2011 Google, Inc. -+ * Copyright (C) 2019 Linaro Ltd. -+ */ ++ freq_table[i].freq = opp->rates[0]; ++ freq_table[i].supplies[0] = opp->supplies[0]; ++ if (dmcfreq->opp_info.regulator_count > 1) ++ freq_table[i].supplies[1] = opp->supplies[1]; + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ i++; ++ } + -+#define DEVNAME "dma_heap" ++ i = 0; ++ list_for_each_entry(opp, &opp_table->opp_list, node) { ++ if (!opp->available) ++ continue; + -+#define NUM_HEAP_MINORS 128 ++ if (i >= dmcfreq->freq_count) { ++ opp->available = false; ++ continue; ++ } + -+/** -+ * struct dma_heap - represents a dmabuf heap in the system -+ * @name: used for debugging/device-node name -+ * @ops: ops struct for this heap -+ * @heap_devt heap device node -+ * @list list head connecting to list of heaps -+ * @heap_cdev heap char device -+ * -+ * Represents a heap of memory from which buffers can be made. -+ */ -+struct dma_heap { -+ const char *name; -+ const struct dma_heap_ops *ops; -+ void *priv; -+ dev_t heap_devt; -+ struct list_head list; -+ struct cdev heap_cdev; -+}; ++ for (j = 0; j < count; j++) { ++ if (dmcfreq->freq_info_rate[i] <= freq_table[j].freq) { ++ opp->rates[0] = dmcfreq->freq_info_rate[i]; ++ opp->supplies[0] = freq_table[j].supplies[0]; ++ if (dmcfreq->opp_info.regulator_count > 1) ++ opp->supplies[1] = freq_table[j].supplies[1]; + -+static LIST_HEAD(heap_list); -+static DEFINE_MUTEX(heap_list_lock); -+static dev_t dma_heap_devt; -+static struct class *dma_heap_class; -+static DEFINE_XARRAY_ALLOC(dma_heap_minors); ++ break; ++ } ++ } ++ if (j == count) { ++ dev_err(dmcfreq->dev, "failed to match dmc_opp_table for %ld\n", ++ dmcfreq->freq_info_rate[i]); ++ if (i == 0) { ++ ret = -EPERM; ++ goto out; ++ } else { ++ opp->available = false; ++ dmcfreq->freq_count = i; ++ } ++ } ++ i++; ++ } + -+static int dma_heap_buffer_alloc(struct dma_heap *heap, size_t len, -+ unsigned int fd_flags, -+ unsigned int heap_flags) ++ mutex_unlock(&opp_table->lock); ++ dev_pm_opp_put_opp_table(opp_table); ++ ++out: ++ kfree(freq_table); ++ return ret; ++} ++ ++static __maybe_unused int px30_dmc_init(struct platform_device *pdev, ++ struct rockchip_dmcfreq *dmcfreq) +{ -+ struct dma_buf *dmabuf; -+ int fd; ++ struct arm_smccc_res res; ++ u32 size; ++ int ret; ++ int complt_irq; ++ u32 complt_hwirq; ++ struct irq_data *complt_irq_data; ++ ++ res = sip_smc_dram(0, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_GET_VERSION); ++ dev_notice(&pdev->dev, "current ATF version 0x%lx!\n", res.a1); ++ if (res.a0 || res.a1 < 0x103) { ++ dev_err(&pdev->dev, ++ "trusted firmware need to update or is invalid!\n"); ++ return -ENXIO; ++ } ++ ++ dev_notice(&pdev->dev, "read tf version 0x%lx!\n", res.a1); + + /* -+ * Allocations from all heaps have to begin -+ * and end on page boundaries. ++ * first 4KB is used for interface parameters ++ * after 4KB * N is dts parameters + */ -+ len = PAGE_ALIGN(len); -+ if (!len) -+ return -EINVAL; ++ size = sizeof(struct px30_ddr_dts_config_timing); ++ res = sip_smc_request_share_mem(DIV_ROUND_UP(size, 4096) + 1, ++ SHARE_PAGE_TYPE_DDR); ++ if (res.a0 != 0) { ++ dev_err(&pdev->dev, "no ATF memory for init\n"); ++ return -ENOMEM; ++ } ++ ddr_psci_param = (struct share_params *)res.a1; ++ of_get_px30_timings(&pdev->dev, pdev->dev.of_node, ++ (uint32_t *)ddr_psci_param); + -+ dmabuf = heap->ops->allocate(heap, len, fd_flags, heap_flags); -+ if (IS_ERR(dmabuf)) -+ return PTR_ERR(dmabuf); ++ init_waitqueue_head(&wait_ctrl.wait_wq); ++ wait_ctrl.wait_en = 1; ++ wait_ctrl.wait_time_out_ms = 17 * 5; + -+ fd = dma_buf_fd(dmabuf, fd_flags); -+ if (fd < 0) { -+ dma_buf_put(dmabuf); -+ /* just return, as put will call release and that will free */ ++ complt_irq = platform_get_irq_byname(pdev, "complete_irq"); ++ if (complt_irq < 0) { ++ dev_err(&pdev->dev, "no IRQ for complete_irq: %d\n", ++ complt_irq); ++ return complt_irq; + } -+ return fd; -+} ++ wait_ctrl.complt_irq = complt_irq; + -+static int dma_heap_open(struct inode *inode, struct file *file) -+{ -+ struct dma_heap *heap; ++ ret = devm_request_irq(&pdev->dev, complt_irq, wait_complete_irq, ++ 0, dev_name(&pdev->dev), &wait_ctrl); ++ if (ret < 0) { ++ dev_err(&pdev->dev, "cannot request complete_irq\n"); ++ return ret; ++ } ++ disable_irq(complt_irq); + -+ heap = xa_load(&dma_heap_minors, iminor(inode)); -+ if (!heap) { -+ pr_err("dma_heap: minor %d unknown.\n", iminor(inode)); -+ return -ENODEV; ++ complt_irq_data = irq_get_irq_data(complt_irq); ++ complt_hwirq = irqd_to_hwirq(complt_irq_data); ++ ddr_psci_param->complt_hwirq = complt_hwirq; ++ ++ dmcfreq->set_rate_params = ddr_psci_param; ++ rockchip_set_ddrclk_params(dmcfreq->set_rate_params); ++ rockchip_set_ddrclk_dmcfreq_wait_complete(rockchip_dmcfreq_wait_complete); ++ ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_INIT); ++ if (res.a0) { ++ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", ++ res.a0); ++ return -ENOMEM; + } + -+ /* instance data as context */ -+ file->private_data = heap; -+ nonseekable_open(inode, file); ++ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; + + return 0; +} + -+static long dma_heap_ioctl_allocate(struct file *file, void *data) ++static __maybe_unused int rk1808_dmc_init(struct platform_device *pdev, ++ struct rockchip_dmcfreq *dmcfreq) +{ -+ struct dma_heap_allocation_data *heap_allocation = data; -+ struct dma_heap *heap = file->private_data; -+ int fd; ++ struct arm_smccc_res res; ++ u32 size; ++ int ret; ++ int complt_irq; ++ struct device_node *node; + -+ if (heap_allocation->fd) -+ return -EINVAL; ++ res = sip_smc_dram(0, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_GET_VERSION); ++ dev_notice(&pdev->dev, "current ATF version 0x%lx!\n", res.a1); ++ if (res.a0 || res.a1 < 0x101) { ++ dev_err(&pdev->dev, ++ "trusted firmware need to update or is invalid!\n"); ++ return -ENXIO; ++ } + -+ if (heap_allocation->fd_flags & ~DMA_HEAP_VALID_FD_FLAGS) -+ return -EINVAL; ++ /* ++ * first 4KB is used for interface parameters ++ * after 4KB * N is dts parameters ++ */ ++ size = sizeof(struct rk1808_ddr_dts_config_timing); ++ res = sip_smc_request_share_mem(DIV_ROUND_UP(size, 4096) + 1, ++ SHARE_PAGE_TYPE_DDR); ++ if (res.a0 != 0) { ++ dev_err(&pdev->dev, "no ATF memory for init\n"); ++ return -ENOMEM; ++ } ++ ddr_psci_param = (struct share_params *)res.a1; ++ of_get_rk1808_timings(&pdev->dev, pdev->dev.of_node, ++ (uint32_t *)ddr_psci_param); + -+ fd = dma_heap_buffer_alloc(heap, heap_allocation->len, -+ heap_allocation->fd_flags, -+ heap_allocation->heap_flags); -+ if (fd < 0) -+ return fd; ++ /* enable start dcf in kernel after dcf ready */ ++ node = of_parse_phandle(pdev->dev.of_node, "dcf_reg", 0); ++ wait_ctrl.regmap_dcf = syscon_node_to_regmap(node); ++ if (IS_ERR(wait_ctrl.regmap_dcf)) ++ return PTR_ERR(wait_ctrl.regmap_dcf); ++ wait_ctrl.dcf_en = 1; + -+ heap_allocation->fd = fd; ++ init_waitqueue_head(&wait_ctrl.wait_wq); ++ wait_ctrl.wait_en = 1; ++ wait_ctrl.wait_time_out_ms = 17 * 5; ++ ++ complt_irq = platform_get_irq_byname(pdev, "complete_irq"); ++ if (complt_irq < 0) { ++ dev_err(&pdev->dev, "no IRQ for complete_irq: %d\n", ++ complt_irq); ++ return complt_irq; ++ } ++ wait_ctrl.complt_irq = complt_irq; ++ ++ ret = devm_request_irq(&pdev->dev, complt_irq, wait_dcf_complete_irq, ++ 0, dev_name(&pdev->dev), &wait_ctrl); ++ if (ret < 0) { ++ dev_err(&pdev->dev, "cannot request complete_irq\n"); ++ return ret; ++ } ++ disable_irq(complt_irq); ++ ++ dmcfreq->set_rate_params = ddr_psci_param; ++ rockchip_set_ddrclk_params(dmcfreq->set_rate_params); ++ rockchip_set_ddrclk_dmcfreq_wait_complete(rockchip_dmcfreq_wait_complete); ++ ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_INIT); ++ if (res.a0) { ++ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", ++ res.a0); ++ return -ENOMEM; ++ } ++ ++ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; + + return 0; +} + -+static int dma_heap_ioctl_get_phys(struct file *file, void *data) ++static __maybe_unused int rk3128_dmc_init(struct platform_device *pdev, ++ struct rockchip_dmcfreq *dmcfreq) +{ -+#if IS_ENABLED(CONFIG_NO_GKI) -+ struct dma_heap *heap = file->private_data; -+ struct dma_heap_phys_data *phys = data; ++ struct arm_smccc_res res; + -+ if (heap->ops->get_phys) -+ return heap->ops->get_phys(heap, phys); -+#endif ++ res = sip_smc_request_share_mem(DIV_ROUND_UP(sizeof( ++ struct rk3128_ddr_dts_config_timing), ++ 4096) + 1, SHARE_PAGE_TYPE_DDR); ++ if (res.a0) { ++ dev_err(&pdev->dev, "no ATF memory for init\n"); ++ return -ENOMEM; ++ } ++ ddr_psci_param = (struct share_params *)res.a1; ++ of_get_rk3128_timings(&pdev->dev, pdev->dev.of_node, ++ (uint32_t *)ddr_psci_param); + -+ return -EINVAL; -+} ++ ddr_psci_param->hz = 0; ++ ddr_psci_param->lcdc_type = rk_drm_get_lcdc_type(); + -+static unsigned int dma_heap_ioctl_cmds[] = { -+ DMA_HEAP_IOCTL_ALLOC, -+ DMA_HEAP_IOCTL_GET_PHYS, -+}; ++ dmcfreq->set_rate_params = ddr_psci_param; ++ rockchip_set_ddrclk_params(dmcfreq->set_rate_params); + -+static long dma_heap_ioctl(struct file *file, unsigned int ucmd, -+ unsigned long arg) -+{ -+ char stack_kdata[128]; -+ char *kdata = stack_kdata; -+ unsigned int kcmd; -+ unsigned int in_size, out_size, drv_size, ksize; -+ int nr = _IOC_NR(ucmd); -+ int ret = 0; ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_INIT); + -+ if (nr >= ARRAY_SIZE(dma_heap_ioctl_cmds)) -+ return -EINVAL; ++ if (res.a0) { ++ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", ++ res.a0); ++ return -ENOMEM; ++ } + -+ nr = array_index_nospec(nr, ARRAY_SIZE(dma_heap_ioctl_cmds)); -+ /* Get the kernel ioctl cmd that matches */ -+ kcmd = dma_heap_ioctl_cmds[nr]; ++ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; + -+ /* Figure out the delta between user cmd size and kernel cmd size */ -+ drv_size = _IOC_SIZE(kcmd); -+ out_size = _IOC_SIZE(ucmd); -+ in_size = out_size; -+ if ((ucmd & kcmd & IOC_IN) == 0) -+ in_size = 0; -+ if ((ucmd & kcmd & IOC_OUT) == 0) -+ out_size = 0; -+ ksize = max(max(in_size, out_size), drv_size); ++ return 0; ++} + -+ /* If necessary, allocate buffer for ioctl argument */ -+ if (ksize > sizeof(stack_kdata)) { -+ kdata = kmalloc(ksize, GFP_KERNEL); -+ if (!kdata) -+ return -ENOMEM; -+ } ++static __maybe_unused int rk3228_dmc_init(struct platform_device *pdev, ++ struct rockchip_dmcfreq *dmcfreq) ++{ ++ struct arm_smccc_res res; + -+ if (copy_from_user(kdata, (void __user *)arg, in_size) != 0) { -+ ret = -EFAULT; -+ goto err; ++ res = sip_smc_request_share_mem(DIV_ROUND_UP(sizeof( ++ struct rk3228_ddr_dts_config_timing), ++ 4096) + 1, SHARE_PAGE_TYPE_DDR); ++ if (res.a0) { ++ dev_err(&pdev->dev, "no ATF memory for init\n"); ++ return -ENOMEM; + } + -+ /* zero out any difference between the kernel/user structure size */ -+ if (ksize > in_size) -+ memset(kdata + in_size, 0, ksize - in_size); ++ ddr_psci_param = (struct share_params *)res.a1; ++ if (of_get_rk3228_timings(&pdev->dev, pdev->dev.of_node, ++ (uint32_t *)ddr_psci_param)) ++ return -ENOMEM; + -+ switch (kcmd) { -+ case DMA_HEAP_IOCTL_ALLOC: -+ ret = dma_heap_ioctl_allocate(file, kdata); -+ break; -+ case DMA_HEAP_IOCTL_GET_PHYS: -+ ret = dma_heap_ioctl_get_phys(file, kdata); -+ break; -+ default: -+ ret = -ENOTTY; -+ goto err; -+ } ++ ddr_psci_param->hz = 0; + -+ if (copy_to_user((void __user *)arg, kdata, out_size) != 0) -+ ret = -EFAULT; -+err: -+ if (kdata != stack_kdata) -+ kfree(kdata); -+ return ret; -+} ++ dmcfreq->set_rate_params = ddr_psci_param; ++ rockchip_set_ddrclk_params(dmcfreq->set_rate_params); + -+static const struct file_operations dma_heap_fops = { -+ .owner = THIS_MODULE, -+ .open = dma_heap_open, -+ .unlocked_ioctl = dma_heap_ioctl, -+#ifdef CONFIG_COMPAT -+ .compat_ioctl = dma_heap_ioctl, -+#endif -+}; ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_INIT); + -+/** -+ * dma_heap_get_drvdata() - get per-subdriver data for the heap -+ * @heap: DMA-Heap to retrieve private data for -+ * -+ * Returns: -+ * The per-subdriver data for the heap. -+ */ -+void *dma_heap_get_drvdata(struct dma_heap *heap) -+{ -+ return heap->priv; -+} ++ if (res.a0) { ++ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", ++ res.a0); ++ return -ENOMEM; ++ } + -+/** -+ * dma_heap_get_name() - get heap name -+ * @heap: DMA-Heap to retrieve private data for -+ * -+ * Returns: -+ * The char* for the heap name. -+ */ -+const char *dma_heap_get_name(struct dma_heap *heap) -+{ -+ return heap->name; ++ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; ++ ++ return 0; +} + -+struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info) ++static __maybe_unused int rk3288_dmc_init(struct platform_device *pdev, ++ struct rockchip_dmcfreq *dmcfreq) +{ -+ struct dma_heap *heap, *h, *err_ret; -+ struct device *dev_ret; -+ unsigned int minor; ++ struct device *dev = &pdev->dev; ++ struct clk *pclk_phy, *pclk_upctl, *dmc_clk; ++ struct arm_smccc_res res; + int ret; + -+ if (!exp_info->name || !strcmp(exp_info->name, "")) { -+ pr_err("dma_heap: Cannot add heap without a name\n"); -+ return ERR_PTR(-EINVAL); ++ dmc_clk = devm_clk_get(dev, "dmc_clk"); ++ if (IS_ERR(dmc_clk)) { ++ dev_err(dev, "Cannot get the clk dmc_clk\n"); ++ return PTR_ERR(dmc_clk); + } -+ -+ if (!exp_info->ops || !exp_info->ops->allocate) { -+ pr_err("dma_heap: Cannot add heap with invalid ops struct\n"); -+ return ERR_PTR(-EINVAL); ++ ret = clk_prepare_enable(dmc_clk); ++ if (ret < 0) { ++ dev_err(dev, "failed to prepare/enable dmc_clk\n"); ++ return ret; + } + -+ heap = kzalloc(sizeof(*heap), GFP_KERNEL); -+ if (!heap) -+ return ERR_PTR(-ENOMEM); -+ -+ heap->name = exp_info->name; -+ heap->ops = exp_info->ops; -+ heap->priv = exp_info->priv; -+ -+ /* Find unused minor number */ -+ ret = xa_alloc(&dma_heap_minors, &minor, heap, -+ XA_LIMIT(0, NUM_HEAP_MINORS - 1), GFP_KERNEL); ++ pclk_phy = devm_clk_get(dev, "pclk_phy0"); ++ if (IS_ERR(pclk_phy)) { ++ dev_err(dev, "Cannot get the clk pclk_phy0\n"); ++ return PTR_ERR(pclk_phy); ++ } ++ ret = clk_prepare_enable(pclk_phy); + if (ret < 0) { -+ pr_err("dma_heap: Unable to get minor number for heap\n"); -+ err_ret = ERR_PTR(ret); -+ goto err0; ++ dev_err(dev, "failed to prepare/enable pclk_phy0\n"); ++ return ret; + } -+ -+ /* Create device */ -+ heap->heap_devt = MKDEV(MAJOR(dma_heap_devt), minor); -+ -+ cdev_init(&heap->heap_cdev, &dma_heap_fops); -+ ret = cdev_add(&heap->heap_cdev, heap->heap_devt, 1); ++ pclk_upctl = devm_clk_get(dev, "pclk_upctl0"); ++ if (IS_ERR(pclk_upctl)) { ++ dev_err(dev, "Cannot get the clk pclk_upctl0\n"); ++ return PTR_ERR(pclk_upctl); ++ } ++ ret = clk_prepare_enable(pclk_upctl); + if (ret < 0) { -+ pr_err("dma_heap: Unable to add char device\n"); -+ err_ret = ERR_PTR(ret); -+ goto err1; ++ dev_err(dev, "failed to prepare/enable pclk_upctl1\n"); ++ return ret; + } + -+ dev_ret = device_create(dma_heap_class, -+ NULL, -+ heap->heap_devt, -+ NULL, -+ heap->name); -+ if (IS_ERR(dev_ret)) { -+ pr_err("dma_heap: Unable to create device\n"); -+ err_ret = ERR_CAST(dev_ret); -+ goto err2; ++ pclk_phy = devm_clk_get(dev, "pclk_phy1"); ++ if (IS_ERR(pclk_phy)) { ++ dev_err(dev, "Cannot get the clk pclk_phy1\n"); ++ return PTR_ERR(pclk_phy); + } -+ -+ mutex_lock(&heap_list_lock); -+ /* check the name is unique */ -+ list_for_each_entry(h, &heap_list, list) { -+ if (!strcmp(h->name, exp_info->name)) { -+ mutex_unlock(&heap_list_lock); -+ pr_err("dma_heap: Already registered heap named %s\n", -+ exp_info->name); -+ err_ret = ERR_PTR(-EINVAL); -+ goto err3; -+ } ++ ret = clk_prepare_enable(pclk_phy); ++ if (ret < 0) { ++ dev_err(dev, "failed to prepare/enable pclk_phy1\n"); ++ return ret; ++ } ++ pclk_upctl = devm_clk_get(dev, "pclk_upctl1"); ++ if (IS_ERR(pclk_upctl)) { ++ dev_err(dev, "Cannot get the clk pclk_upctl1\n"); ++ return PTR_ERR(pclk_upctl); ++ } ++ ret = clk_prepare_enable(pclk_upctl); ++ if (ret < 0) { ++ dev_err(dev, "failed to prepare/enable pclk_upctl1\n"); ++ return ret; + } + -+ /* Add heap to the list */ -+ list_add(&heap->list, &heap_list); -+ mutex_unlock(&heap_list_lock); -+ -+ return heap; ++ res = sip_smc_request_share_mem(DIV_ROUND_UP(sizeof( ++ struct rk3288_ddr_dts_config_timing), ++ 4096) + 1, SHARE_PAGE_TYPE_DDR); ++ if (res.a0) { ++ dev_err(&pdev->dev, "no ATF memory for init\n"); ++ return -ENOMEM; ++ } + -+err3: -+ device_destroy(dma_heap_class, heap->heap_devt); -+err2: -+ cdev_del(&heap->heap_cdev); -+err1: -+ xa_erase(&dma_heap_minors, minor); -+err0: -+ kfree(heap); -+ return err_ret; -+} ++ ddr_psci_param = (struct share_params *)res.a1; ++ of_get_rk3288_timings(&pdev->dev, pdev->dev.of_node, ++ (uint32_t *)ddr_psci_param); + -+static char *dma_heap_devnode(const struct device *dev, umode_t *mode) -+{ -+ return kasprintf(GFP_KERNEL, "dma_heap/%s", dev_name(dev)); -+} ++ ddr_psci_param->hz = 0; ++ ddr_psci_param->lcdc_type = rk_drm_get_lcdc_type(); + -+static int dma_heap_init(void) -+{ -+ int ret; ++ dmcfreq->set_rate_params = ddr_psci_param; ++ rockchip_set_ddrclk_params(dmcfreq->set_rate_params); + -+ ret = alloc_chrdev_region(&dma_heap_devt, 0, NUM_HEAP_MINORS, DEVNAME); -+ if (ret) -+ return ret; ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_INIT); + -+ dma_heap_class = class_create(DEVNAME); -+ if (IS_ERR(dma_heap_class)) { -+ unregister_chrdev_region(dma_heap_devt, NUM_HEAP_MINORS); -+ return PTR_ERR(dma_heap_class); ++ if (res.a0) { ++ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", ++ res.a0); ++ return -ENOMEM; + } -+ dma_heap_class->devnode = dma_heap_devnode; ++ ++ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; + + return 0; +} -+subsys_initcall(dma_heap_init); -diff --git a/drivers/dma-buf/heaps/Kconfig b/drivers/dma-buf/heaps/Kconfig -index a5eef06c4..c6b35f28a 100644 ---- a/drivers/dma-buf/heaps/Kconfig -+++ b/drivers/dma-buf/heaps/Kconfig -@@ -1,3 +1,13 @@ -+menuconfig DMABUF_HEAPS_DEFERRED_FREE -+ bool "DMA-BUF heaps deferred-free library" -+ help -+ Choose this option to enable the DMA-BUF heaps deferred-free library. + -+menuconfig DMABUF_HEAPS_PAGE_POOL -+ bool "DMA-BUF heaps page-pool library" -+ help -+ Choose this option to enable the DMA-BUF heaps page-pool library. ++static __maybe_unused int rk3328_dmc_init(struct platform_device *pdev, ++ struct rockchip_dmcfreq *dmcfreq) ++{ ++ struct arm_smccc_res res; ++ u32 size; + - config DMABUF_HEAPS_SYSTEM - bool "DMA-BUF System Heap" - depends on DMABUF_HEAPS -@@ -12,3 +22,10 @@ config DMABUF_HEAPS_CMA - Choose this option to enable dma-buf CMA heap. This heap is backed - by the Contiguous Memory Allocator (CMA). If your system has these - regions, you should say Y here. ++ res = sip_smc_dram(0, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_GET_VERSION); ++ dev_notice(&pdev->dev, "current ATF version 0x%lx!\n", res.a1); ++ if (res.a0 || (res.a1 < 0x101)) { ++ dev_err(&pdev->dev, ++ "trusted firmware need to update or is invalid!\n"); ++ return -ENXIO; ++ } + -+config DMABUF_HEAPS_SRAM -+ tristate "Export on-chip SRAM pools using DMA-Heaps" -+ depends on DMABUF_HEAPS && SRAM -+ help -+ This driver allows the export of on-chip SRAM marked as exportable -+ to userspace using the DMA-Heaps interface. -diff --git a/drivers/dma-buf/heaps/Makefile b/drivers/dma-buf/heaps/Makefile -index 974467791..f373aa65d 100644 ---- a/drivers/dma-buf/heaps/Makefile -+++ b/drivers/dma-buf/heaps/Makefile -@@ -1,3 +1,6 @@ - # SPDX-License-Identifier: GPL-2.0 -+obj-$(CONFIG_DMABUF_HEAPS_DEFERRED_FREE) += deferred-free-helper.o -+obj-$(CONFIG_DMABUF_HEAPS_PAGE_POOL) += page_pool.o - obj-$(CONFIG_DMABUF_HEAPS_SYSTEM) += system_heap.o - obj-$(CONFIG_DMABUF_HEAPS_CMA) += cma_heap.o -+obj-$(CONFIG_DMABUF_HEAPS_SRAM) += sram_heap.o -diff --git a/drivers/dma-buf/heaps/deferred-free-helper.c b/drivers/dma-buf/heaps/deferred-free-helper.c -new file mode 100644 -index 000000000..d207eac58 ---- /dev/null -+++ b/drivers/dma-buf/heaps/deferred-free-helper.c -@@ -0,0 +1,138 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Deferred dmabuf freeing helper -+ * -+ * Copyright (C) 2020 Linaro, Ltd. -+ * -+ * Based on the ION page pool code -+ * Copyright (C) 2011 Google, Inc. -+ */ ++ dev_notice(&pdev->dev, "read tf version 0x%lx!\n", res.a1); + -+#include -+#include -+#include -+#include -+#include ++ /* ++ * first 4KB is used for interface parameters ++ * after 4KB * N is dts parameters ++ */ ++ size = sizeof(struct rk3328_ddr_dts_config_timing); ++ res = sip_smc_request_share_mem(DIV_ROUND_UP(size, 4096) + 1, ++ SHARE_PAGE_TYPE_DDR); ++ if (res.a0 != 0) { ++ dev_err(&pdev->dev, "no ATF memory for init\n"); ++ return -ENOMEM; ++ } ++ ddr_psci_param = (struct share_params *)res.a1; ++ of_get_rk3328_timings(&pdev->dev, pdev->dev.of_node, ++ (uint32_t *)ddr_psci_param); + -+#include "deferred-free-helper.h" ++ dmcfreq->set_rate_params = ddr_psci_param; ++ rockchip_set_ddrclk_params(dmcfreq->set_rate_params); + -+static LIST_HEAD(free_list); -+static size_t list_nr_pages; -+wait_queue_head_t freelist_waitqueue; -+struct task_struct *freelist_task; -+static DEFINE_SPINLOCK(free_list_lock); ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_INIT); ++ if (res.a0) { ++ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", ++ res.a0); ++ return -ENOMEM; ++ } + -+void deferred_free(struct deferred_freelist_item *item, -+ void (*free)(struct deferred_freelist_item*, -+ enum df_reason), -+ size_t nr_pages) ++ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; ++ ++ return 0; ++} ++ ++static int rk3399_set_msch_readlatency(unsigned int readlatency) +{ -+ unsigned long flags; ++ struct arm_smccc_res res; + -+ INIT_LIST_HEAD(&item->list); -+ item->nr_pages = nr_pages; -+ item->free = free; ++ arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, readlatency, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_SET_MSCH_RL, ++ 0, 0, 0, 0, &res); + -+ spin_lock_irqsave(&free_list_lock, flags); -+ list_add(&item->list, &free_list); -+ list_nr_pages += nr_pages; -+ spin_unlock_irqrestore(&free_list_lock, flags); -+ wake_up(&freelist_waitqueue); ++ return res.a0; +} -+EXPORT_SYMBOL_GPL(deferred_free); + -+static size_t free_one_item(enum df_reason reason) ++static __maybe_unused int rk3399_dmc_init(struct platform_device *pdev, ++ struct rockchip_dmcfreq *dmcfreq) +{ -+ unsigned long flags; -+ size_t nr_pages; -+ struct deferred_freelist_item *item; ++ struct device *dev = &pdev->dev; ++ struct device_node *np = pdev->dev.of_node; ++ struct arm_smccc_res res; ++ struct rk3399_dram_timing *dram_timing; ++ int index, size; ++ u32 *timing; + -+ spin_lock_irqsave(&free_list_lock, flags); -+ if (list_empty(&free_list)) { -+ spin_unlock_irqrestore(&free_list_lock, flags); -+ return 0; ++ /* ++ * Get dram timing and pass it to arm trust firmware, ++ * the dram drvier in arm trust firmware will get these ++ * timing and to do dram initial. ++ */ ++ dram_timing = of_get_rk3399_timings(dev, np); ++ if (dram_timing) { ++ timing = (u32 *)dram_timing; ++ size = sizeof(struct rk3399_dram_timing) / 4; ++ for (index = 0; index < size; index++) { ++ arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, *timing++, index, ++ ROCKCHIP_SIP_CONFIG_DRAM_SET_PARAM, ++ 0, 0, 0, 0, &res); ++ if (res.a0) { ++ dev_err(dev, "Failed to set dram param: %ld\n", ++ res.a0); ++ return -EINVAL; ++ } ++ } + } -+ item = list_first_entry(&free_list, struct deferred_freelist_item, list); -+ list_del(&item->list); -+ nr_pages = item->nr_pages; -+ list_nr_pages -= nr_pages; -+ spin_unlock_irqrestore(&free_list_lock, flags); + -+ item->free(item, reason); -+ return nr_pages; -+} ++ dmcfreq->set_rate_params = ++ devm_kzalloc(dev, sizeof(struct share_params), GFP_KERNEL); ++ if (!dmcfreq->set_rate_params) ++ return -ENOMEM; ++ rockchip_set_ddrclk_params(dmcfreq->set_rate_params); + -+static unsigned long get_freelist_nr_pages(void) -+{ -+ unsigned long nr_pages; -+ unsigned long flags; ++ arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_INIT, ++ 0, 0, 0, 0, &res); + -+ spin_lock_irqsave(&free_list_lock, flags); -+ nr_pages = list_nr_pages; -+ spin_unlock_irqrestore(&free_list_lock, flags); -+ return nr_pages; -+} ++ dmcfreq->info.set_msch_readlatency = rk3399_set_msch_readlatency; + -+static unsigned long freelist_shrink_count(struct shrinker *shrinker, -+ struct shrink_control *sc) -+{ -+ return get_freelist_nr_pages(); ++ return 0; +} + -+static unsigned long freelist_shrink_scan(struct shrinker *shrinker, -+ struct shrink_control *sc) ++static __maybe_unused int rk3528_dmc_init(struct platform_device *pdev, ++ struct rockchip_dmcfreq *dmcfreq) +{ -+ unsigned long total_freed = 0; ++ struct arm_smccc_res res; ++ int ret; ++ int complt_irq; ++ u32 complt_hwirq; ++ struct irq_data *complt_irq_data; + -+ if (sc->nr_to_scan == 0) -+ return 0; ++ res = sip_smc_dram(0, 0, ROCKCHIP_SIP_CONFIG_DRAM_GET_VERSION); ++ dev_notice(&pdev->dev, "current ATF version 0x%lx\n", res.a1); ++ if (res.a0 || res.a1 < 0x100) { ++ dev_err(&pdev->dev, "trusted firmware need update to V1.00 and above.\n"); ++ return -ENXIO; ++ } + -+ while (total_freed < sc->nr_to_scan) { -+ size_t pages_freed = free_one_item(DF_UNDER_PRESSURE); ++ /* ++ * first 4KB is used for interface parameters ++ * after 4KB is dts parameters ++ * request share memory size 4KB * 2 ++ */ ++ res = sip_smc_request_share_mem(2, SHARE_PAGE_TYPE_DDR); ++ if (res.a0 != 0) { ++ dev_err(&pdev->dev, "no ATF memory for init\n"); ++ return -ENOMEM; ++ } ++ ddr_psci_param = (struct share_params *)res.a1; ++ /* Clear ddr_psci_param, size is 4KB * 2 */ ++ memset_io(ddr_psci_param, 0x0, 4096 * 2); + -+ if (!pages_freed) -+ break; ++ wait_ctrl.dcf_en = 0; + -+ total_freed += pages_freed; ++ init_waitqueue_head(&wait_ctrl.wait_wq); ++ wait_ctrl.wait_en = 1; ++ wait_ctrl.wait_time_out_ms = 17 * 5; ++ ++ complt_irq = platform_get_irq_byname(pdev, "complete"); ++ if (complt_irq < 0) { ++ dev_err(&pdev->dev, "no IRQ for complt_irq: %d\n", complt_irq); ++ return complt_irq; + } ++ wait_ctrl.complt_irq = complt_irq; + -+ return total_freed; -+} ++ ret = devm_request_irq(&pdev->dev, complt_irq, wait_dcf_complete_irq, ++ 0, dev_name(&pdev->dev), &wait_ctrl); ++ if (ret < 0) { ++ dev_err(&pdev->dev, "cannot request complt_irq\n"); ++ return ret; ++ } ++ disable_irq(complt_irq); + -+static struct shrinker freelist_shrinker = { -+ .count_objects = freelist_shrink_count, -+ .scan_objects = freelist_shrink_scan, -+ .seeks = DEFAULT_SEEKS, -+ .batch = 0, -+}; ++ complt_irq_data = irq_get_irq_data(complt_irq); ++ complt_hwirq = irqd_to_hwirq(complt_irq_data); ++ ddr_psci_param->complt_hwirq = complt_hwirq; + -+static int deferred_free_thread(void *data) -+{ -+ while (true) { -+ wait_event_freezable(freelist_waitqueue, -+ get_freelist_nr_pages() > 0); ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ROCKCHIP_SIP_CONFIG_DRAM_INIT); ++ if (res.a0) { ++ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", res.a0); ++ return -ENOMEM; ++ } + -+ free_one_item(DF_NORMAL); ++ ret = rockchip_get_freq_info(dmcfreq); ++ if (ret < 0) { ++ dev_err(&pdev->dev, "cannot get frequency info\n"); ++ return ret; + } ++ dmcfreq->is_set_rate_direct = true; ++ ++ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; + + return 0; +} + -+static int deferred_freelist_init(void) ++static __maybe_unused int rk3568_dmc_init(struct platform_device *pdev, ++ struct rockchip_dmcfreq *dmcfreq) +{ -+ list_nr_pages = 0; ++ struct arm_smccc_res res; ++ int ret; ++ int complt_irq; + -+ init_waitqueue_head(&freelist_waitqueue); -+ freelist_task = kthread_run(deferred_free_thread, NULL, -+ "%s", "dmabuf-deferred-free-worker"); -+ if (IS_ERR(freelist_task)) { -+ pr_err("Creating thread for deferred free failed\n"); -+ return -1; ++ res = sip_smc_dram(0, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_GET_VERSION); ++ dev_notice(&pdev->dev, "current ATF version 0x%lx\n", res.a1); ++ if (res.a0 || res.a1 < 0x101) { ++ dev_err(&pdev->dev, "trusted firmware need update to V1.01 and above.\n"); ++ return -ENXIO; + } -+ sched_set_normal(freelist_task, 19); + -+ return register_shrinker(&freelist_shrinker, "freelist-shrinker"); -+} -+module_init(deferred_freelist_init); -+MODULE_LICENSE("GPL v2"); ++ /* ++ * first 4KB is used for interface parameters ++ * after 4KB is dts parameters ++ * request share memory size 4KB * 2 ++ */ ++ res = sip_smc_request_share_mem(2, SHARE_PAGE_TYPE_DDR); ++ if (res.a0 != 0) { ++ dev_err(&pdev->dev, "no ATF memory for init\n"); ++ return -ENOMEM; ++ } ++ ddr_psci_param = (struct share_params *)res.a1; ++ /* Clear ddr_psci_param, size is 4KB * 2 */ ++ memset_io(ddr_psci_param, 0x0, 4096 * 2); + -diff --git a/drivers/dma-buf/heaps/deferred-free-helper.h b/drivers/dma-buf/heaps/deferred-free-helper.h -new file mode 100644 -index 000000000..11940328c ---- /dev/null -+++ b/drivers/dma-buf/heaps/deferred-free-helper.h -@@ -0,0 +1,55 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ ++ /* start mcu with sip_smc_dram */ ++ wait_ctrl.dcf_en = 2; + -+#ifndef DEFERRED_FREE_HELPER_H -+#define DEFERRED_FREE_HELPER_H ++ init_waitqueue_head(&wait_ctrl.wait_wq); ++ wait_ctrl.wait_en = 1; ++ wait_ctrl.wait_time_out_ms = 17 * 5; + -+/** -+ * df_reason - enum for reason why item was freed -+ * -+ * This provides a reason for why the free function was called -+ * on the item. This is useful when deferred_free is used in -+ * combination with a pagepool, so under pressure the page can -+ * be immediately freed. -+ * -+ * DF_NORMAL: Normal deferred free -+ * -+ * DF_UNDER_PRESSURE: Free was called because the system -+ * is under memory pressure. Usually -+ * from a shrinker. Avoid allocating -+ * memory in the free call, as it may -+ * fail. -+ */ -+enum df_reason { -+ DF_NORMAL, -+ DF_UNDER_PRESSURE, -+}; ++ complt_irq = platform_get_irq_byname(pdev, "complete"); ++ if (complt_irq < 0) { ++ dev_err(&pdev->dev, "no IRQ for complt_irq: %d\n", ++ complt_irq); ++ return complt_irq; ++ } ++ wait_ctrl.complt_irq = complt_irq; + -+/** -+ * deferred_freelist_item - item structure for deferred freelist -+ * -+ * This is to be added to the structure for whatever you want to -+ * defer freeing on. -+ * -+ * @nr_pages: number of pages used by item to be freed -+ * @free: function pointer to be called when freeing the item -+ * @list: list entry for the deferred list -+ */ -+struct deferred_freelist_item { -+ size_t nr_pages; -+ void (*free)(struct deferred_freelist_item *i, -+ enum df_reason reason); -+ struct list_head list; -+}; ++ ret = devm_request_irq(&pdev->dev, complt_irq, wait_dcf_complete_irq, ++ 0, dev_name(&pdev->dev), &wait_ctrl); ++ if (ret < 0) { ++ dev_err(&pdev->dev, "cannot request complt_irq\n"); ++ return ret; ++ } ++ disable_irq(complt_irq); + -+/** -+ * deferred_free - call to add item to the deferred free list -+ * -+ * @item: Pointer to deferred_freelist_item field of a structure -+ * @free: Function pointer to the free call -+ * @nr_pages: number of pages to be freed -+ */ -+void deferred_free(struct deferred_freelist_item *item, -+ void (*free)(struct deferred_freelist_item *i, -+ enum df_reason reason), -+ size_t nr_pages); -+#endif -diff --git a/drivers/dma-buf/heaps/page_pool.c b/drivers/dma-buf/heaps/page_pool.c -new file mode 100644 -index 000000000..f3e359860 ---- /dev/null -+++ b/drivers/dma-buf/heaps/page_pool.c -@@ -0,0 +1,247 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * DMA BUF page pool system -+ * -+ * Copyright (C) 2020 Linaro Ltd. -+ * -+ * Based on the ION page pool code -+ * Copyright (C) 2011 Google, Inc. -+ */ ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_INIT); ++ if (res.a0) { ++ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", ++ res.a0); ++ return -ENOMEM; ++ } + -+#include -+#include -+#include -+#include -+#include -+#include "page_pool.h" ++ ret = rockchip_get_freq_info(dmcfreq); ++ if (ret < 0) { ++ dev_err(&pdev->dev, "cannot get frequency info\n"); ++ return ret; ++ } ++ dmcfreq->is_set_rate_direct = true; + -+static LIST_HEAD(pool_list); -+static DEFINE_MUTEX(pool_list_lock); ++ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; + -+static inline -+struct page *dmabuf_page_pool_alloc_pages(struct dmabuf_page_pool *pool) -+{ -+ if (fatal_signal_pending(current)) -+ return NULL; -+ return alloc_pages(pool->gfp_mask, pool->order); ++ return 0; +} + -+static inline void dmabuf_page_pool_free_pages(struct dmabuf_page_pool *pool, -+ struct page *page) ++static __maybe_unused int rk3588_dmc_init(struct platform_device *pdev, ++ struct rockchip_dmcfreq *dmcfreq) +{ -+ __free_pages(page, pool->order); -+} ++ struct arm_smccc_res res; ++ struct dev_pm_opp *opp; ++ unsigned long opp_rate; ++ int ret; ++ int complt_irq; + -+static void dmabuf_page_pool_add(struct dmabuf_page_pool *pool, struct page *page) -+{ -+ int index; ++ res = sip_smc_dram(0, 0, ROCKCHIP_SIP_CONFIG_DRAM_GET_VERSION); ++ dev_notice(&pdev->dev, "current ATF version 0x%lx\n", res.a1); ++ if (res.a0) { ++ dev_err(&pdev->dev, "trusted firmware unsupported, please update.\n"); ++ return -ENXIO; ++ } + -+ if (PageHighMem(page)) -+ index = POOL_HIGHPAGE; -+ else -+ index = POOL_LOWPAGE; ++ /* ++ * first 4KB is used for interface parameters ++ * after 4KB is dts parameters ++ * request share memory size 4KB * 2 ++ */ ++ res = sip_smc_request_share_mem(2, SHARE_PAGE_TYPE_DDR); ++ if (res.a0 != 0) { ++ dev_err(&pdev->dev, "no ATF memory for init\n"); ++ return -ENOMEM; ++ } ++ ddr_psci_param = (struct share_params *)res.a1; ++ /* Clear ddr_psci_param, size is 4KB * 2 */ ++ memset_io(ddr_psci_param, 0x0, 4096 * 2); + -+ mutex_lock(&pool->mutex); -+ list_add_tail(&page->lru, &pool->items[index]); -+ pool->count[index]++; -+ mutex_unlock(&pool->mutex); -+ mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE, -+ 1 << pool->order); -+} ++ /* start mcu with sip_smc_dram */ ++ wait_ctrl.dcf_en = 2; + -+static struct page *dmabuf_page_pool_remove(struct dmabuf_page_pool *pool, int index) -+{ -+ struct page *page; ++ init_waitqueue_head(&wait_ctrl.wait_wq); ++ wait_ctrl.wait_en = 1; ++ wait_ctrl.wait_time_out_ms = 17 * 5; + -+ mutex_lock(&pool->mutex); -+ page = list_first_entry_or_null(&pool->items[index], struct page, lru); -+ if (page) { -+ pool->count[index]--; -+ list_del(&page->lru); -+ mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE, -+ -(1 << pool->order)); ++ complt_irq = platform_get_irq_byname(pdev, "complete"); ++ if (complt_irq < 0) { ++ dev_err(&pdev->dev, "no IRQ for complt_irq: %d\n", complt_irq); ++ return complt_irq; + } -+ mutex_unlock(&pool->mutex); ++ wait_ctrl.complt_irq = complt_irq; + -+ return page; -+} ++ ret = devm_request_irq(&pdev->dev, complt_irq, wait_dcf_complete_irq, ++ 0, dev_name(&pdev->dev), &wait_ctrl); ++ if (ret < 0) { ++ dev_err(&pdev->dev, "cannot request complt_irq\n"); ++ return ret; ++ } ++ disable_irq(complt_irq); + -+static struct page *dmabuf_page_pool_fetch(struct dmabuf_page_pool *pool) -+{ -+ struct page *page = NULL; ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ROCKCHIP_SIP_CONFIG_DRAM_INIT); ++ if (res.a0) { ++ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", res.a0); ++ return -ENOMEM; ++ } + -+ page = dmabuf_page_pool_remove(pool, POOL_HIGHPAGE); -+ if (!page) -+ page = dmabuf_page_pool_remove(pool, POOL_LOWPAGE); ++ ret = rockchip_dmcfreq_adjust_opp_table(dmcfreq); ++ if (ret < 0) { ++ dev_err(&pdev->dev, "cannot get frequency info\n"); ++ return ret; ++ } ++ dmcfreq->is_set_rate_direct = true; + -+ return page; -+} ++ /* Config the dmcfreq->sleep_volt for deepsleep */ ++ opp_rate = dmcfreq->freq_info_rate[dmcfreq->freq_count - 1]; ++ opp = devfreq_recommended_opp(&pdev->dev, &opp_rate, 0); ++ if (IS_ERR(opp)) { ++ dev_err(&pdev->dev, "Failed to find opp for %lu Hz\n", opp_rate); ++ return PTR_ERR(opp); ++ } ++ dmcfreq->sleep_volt = opp->supplies[0].u_volt; ++ if (dmcfreq->opp_info.regulator_count > 1) ++ dmcfreq->sleep_mem_volt = opp->supplies[1].u_volt; ++ dev_pm_opp_put(opp); + -+struct page *dmabuf_page_pool_alloc(struct dmabuf_page_pool *pool) -+{ -+ struct page *page = NULL; ++ if (of_property_read_u32(pdev->dev.of_node, "wait-mode", &ddr_psci_param->wait_mode)) ++ ddr_psci_param->wait_mode = 0; + -+ if (WARN_ON(!pool)) -+ return NULL; ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ROCKCHIP_SIP_CONFIG_DRAM_GET_STALL_TIME); ++ if (res.a0) ++ dev_err(dmcfreq->dev, "Current ATF unsupported get_stall_time\n"); ++ else ++ dmcfreq->info.stall_time_ns = (unsigned int)res.a1; + -+ page = dmabuf_page_pool_fetch(pool); ++ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; + -+ if (!page) -+ page = dmabuf_page_pool_alloc_pages(pool); -+ return page; ++ return 0; +} -+EXPORT_SYMBOL_GPL(dmabuf_page_pool_alloc); + -+void dmabuf_page_pool_free(struct dmabuf_page_pool *pool, struct page *page) ++static __maybe_unused int rv1126_dmc_init(struct platform_device *pdev, ++ struct rockchip_dmcfreq *dmcfreq) +{ -+ if (WARN_ON(pool->order != compound_order(page))) -+ return; ++ struct arm_smccc_res res; ++ u32 size; ++ int ret; ++ int complt_irq; ++ struct device_node *node; + -+ dmabuf_page_pool_add(pool, page); -+} -+EXPORT_SYMBOL_GPL(dmabuf_page_pool_free); ++ res = sip_smc_dram(0, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_GET_VERSION); ++ dev_notice(&pdev->dev, "current ATF version 0x%lx\n", res.a1); ++ if (res.a0 || res.a1 < 0x100) { ++ dev_err(&pdev->dev, ++ "trusted firmware need to update or is invalid!\n"); ++ return -ENXIO; ++ } + -+static int dmabuf_page_pool_total(struct dmabuf_page_pool *pool, bool high) -+{ -+ int count = pool->count[POOL_LOWPAGE]; ++ /* ++ * first 4KB is used for interface parameters ++ * after 4KB * N is dts parameters ++ */ ++ size = sizeof(struct rk1808_ddr_dts_config_timing); ++ res = sip_smc_request_share_mem(DIV_ROUND_UP(size, 4096) + 1, ++ SHARE_PAGE_TYPE_DDR); ++ if (res.a0 != 0) { ++ dev_err(&pdev->dev, "no ATF memory for init\n"); ++ return -ENOMEM; ++ } ++ ddr_psci_param = (struct share_params *)res.a1; ++ of_get_rv1126_timings(&pdev->dev, pdev->dev.of_node, ++ (uint32_t *)ddr_psci_param); + -+ if (high) -+ count += pool->count[POOL_HIGHPAGE]; ++ /* enable start dcf in kernel after dcf ready */ ++ node = of_parse_phandle(pdev->dev.of_node, "dcf", 0); ++ wait_ctrl.regmap_dcf = syscon_node_to_regmap(node); ++ if (IS_ERR(wait_ctrl.regmap_dcf)) ++ return PTR_ERR(wait_ctrl.regmap_dcf); ++ wait_ctrl.dcf_en = 1; + -+ return count << pool->order; -+} ++ init_waitqueue_head(&wait_ctrl.wait_wq); ++ wait_ctrl.wait_en = 1; ++ wait_ctrl.wait_time_out_ms = 17 * 5; + -+struct dmabuf_page_pool *dmabuf_page_pool_create(gfp_t gfp_mask, unsigned int order) -+{ -+ struct dmabuf_page_pool *pool = kmalloc(sizeof(*pool), GFP_KERNEL); -+ int i; ++ complt_irq = platform_get_irq_byname(pdev, "complete"); ++ if (complt_irq < 0) { ++ dev_err(&pdev->dev, "no IRQ for complt_irq: %d\n", ++ complt_irq); ++ return complt_irq; ++ } ++ wait_ctrl.complt_irq = complt_irq; + -+ if (!pool) -+ return NULL; ++ ret = devm_request_irq(&pdev->dev, complt_irq, wait_dcf_complete_irq, ++ 0, dev_name(&pdev->dev), &wait_ctrl); ++ if (ret < 0) { ++ dev_err(&pdev->dev, "cannot request complt_irq\n"); ++ return ret; ++ } ++ disable_irq(complt_irq); + -+ for (i = 0; i < POOL_TYPE_SIZE; i++) { -+ pool->count[i] = 0; -+ INIT_LIST_HEAD(&pool->items[i]); ++ if (of_property_read_u32(pdev->dev.of_node, "update_drv_odt_cfg", ++ &ddr_psci_param->update_drv_odt_cfg)) ++ ddr_psci_param->update_drv_odt_cfg = 0; ++ ++ if (of_property_read_u32(pdev->dev.of_node, "update_deskew_cfg", ++ &ddr_psci_param->update_deskew_cfg)) ++ ddr_psci_param->update_deskew_cfg = 0; ++ ++ dmcfreq->set_rate_params = ddr_psci_param; ++ rockchip_set_ddrclk_params(dmcfreq->set_rate_params); ++ rockchip_set_ddrclk_dmcfreq_wait_complete(rockchip_dmcfreq_wait_complete); ++ ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_INIT); ++ if (res.a0) { ++ dev_err(&pdev->dev, "rockchip_sip_config_dram_init error:%lx\n", ++ res.a0); ++ return -ENOMEM; + } -+ pool->gfp_mask = gfp_mask | __GFP_COMP; -+ pool->order = order; -+ mutex_init(&pool->mutex); + -+ mutex_lock(&pool_list_lock); -+ list_add(&pool->list, &pool_list); -+ mutex_unlock(&pool_list_lock); ++ dmcfreq->set_auto_self_refresh = rockchip_ddr_set_auto_self_refresh; + -+ return pool; ++ return 0; +} -+EXPORT_SYMBOL_GPL(dmabuf_page_pool_create); + -+void dmabuf_page_pool_destroy(struct dmabuf_page_pool *pool) ++static const struct of_device_id rockchip_dmcfreq_of_match[] = { ++#if IS_ENABLED(CONFIG_CPU_PX30) ++ { .compatible = "rockchip,px30-dmc", .data = px30_dmc_init }, ++#endif ++#if IS_ENABLED(CONFIG_CPU_RK1808) ++ { .compatible = "rockchip,rk1808-dmc", .data = rk1808_dmc_init }, ++#endif ++#if IS_ENABLED(CONFIG_CPU_RK312X) ++ { .compatible = "rockchip,rk3128-dmc", .data = rk3128_dmc_init }, ++#endif ++#if IS_ENABLED(CONFIG_CPU_RK322X) ++ { .compatible = "rockchip,rk3228-dmc", .data = rk3228_dmc_init }, ++#endif ++#if IS_ENABLED(CONFIG_CPU_RK3288) ++ { .compatible = "rockchip,rk3288-dmc", .data = rk3288_dmc_init }, ++#endif ++#if IS_ENABLED(CONFIG_CPU_RK3308) ++ { .compatible = "rockchip,rk3308-dmc", .data = NULL }, ++#endif ++#if IS_ENABLED(CONFIG_CPU_RK3328) ++ { .compatible = "rockchip,rk3328-dmc", .data = rk3328_dmc_init }, ++#endif ++#if IS_ENABLED(CONFIG_CPU_RK3399) ++ { .compatible = "rockchip,rk3399-dmc", .data = rk3399_dmc_init }, ++#endif ++#if IS_ENABLED(CONFIG_CPU_RK3528) ++ { .compatible = "rockchip,rk3528-dmc", .data = rk3528_dmc_init }, ++#endif ++#if IS_ENABLED(CONFIG_CPU_RK3562) ++ { .compatible = "rockchip,rk3562-dmc", .data = rk3568_dmc_init }, ++#endif ++#if IS_ENABLED(CONFIG_CPU_RK3568) ++ { .compatible = "rockchip,rk3568-dmc", .data = rk3568_dmc_init }, ++#endif ++#if IS_ENABLED(CONFIG_CPU_RK3588) ++ { .compatible = "rockchip,rk3588-dmc", .data = rk3588_dmc_init }, ++#endif ++#if IS_ENABLED(CONFIG_CPU_RV1126) ++ { .compatible = "rockchip,rv1126-dmc", .data = rv1126_dmc_init }, ++#endif ++ { }, ++}; ++MODULE_DEVICE_TABLE(of, rockchip_dmcfreq_of_match); ++ ++static int rockchip_get_freq_map_talbe(struct device_node *np, char *porp_name, ++ struct freq_map_table **table) +{ -+ struct page *page; -+ int i; ++ struct freq_map_table *tbl; ++ const struct property *prop; ++ unsigned int temp_freq = 0; ++ int count, i; + -+ /* Remove us from the pool list */ -+ mutex_lock(&pool_list_lock); -+ list_del(&pool->list); -+ mutex_unlock(&pool_list_lock); ++ prop = of_find_property(np, porp_name, NULL); ++ if (!prop) ++ return -EINVAL; + -+ /* Free any remaining pages in the pool */ -+ for (i = 0; i < POOL_TYPE_SIZE; i++) { -+ while ((page = dmabuf_page_pool_remove(pool, i))) -+ dmabuf_page_pool_free_pages(pool, page); ++ if (!prop->value) ++ return -ENODATA; ++ ++ count = of_property_count_u32_elems(np, porp_name); ++ if (count < 0) ++ return -EINVAL; ++ ++ if (count % 3) ++ return -EINVAL; ++ ++ tbl = kzalloc(sizeof(*tbl) * (count / 3 + 1), GFP_KERNEL); ++ if (!tbl) ++ return -ENOMEM; ++ ++ for (i = 0; i < count / 3; i++) { ++ of_property_read_u32_index(np, porp_name, 3 * i, &tbl[i].min); ++ of_property_read_u32_index(np, porp_name, 3 * i + 1, ++ &tbl[i].max); ++ of_property_read_u32_index(np, porp_name, 3 * i + 2, ++ &temp_freq); ++ tbl[i].freq = temp_freq * 1000; + } + -+ kfree(pool); ++ tbl[i].min = 0; ++ tbl[i].max = 0; ++ tbl[i].freq = DMCFREQ_TABLE_END; ++ ++ *table = tbl; ++ ++ return 0; +} -+EXPORT_SYMBOL_GPL(dmabuf_page_pool_destroy); + -+static int dmabuf_page_pool_do_shrink(struct dmabuf_page_pool *pool, gfp_t gfp_mask, -+ int nr_to_scan) ++static int rockchip_get_rl_map_talbe(struct device_node *np, char *porp_name, ++ struct rl_map_table **table) +{ -+ int freed = 0; -+ bool high; ++ struct rl_map_table *tbl; ++ const struct property *prop; ++ int count, i; + -+ if (current_is_kswapd()) -+ high = true; -+ else -+ high = !!(gfp_mask & __GFP_HIGHMEM); ++ prop = of_find_property(np, porp_name, NULL); ++ if (!prop) ++ return -EINVAL; + -+ if (nr_to_scan == 0) -+ return dmabuf_page_pool_total(pool, high); ++ if (!prop->value) ++ return -ENODATA; + -+ while (freed < nr_to_scan) { -+ struct page *page; ++ count = of_property_count_u32_elems(np, porp_name); ++ if (count < 0) ++ return -EINVAL; + -+ /* Try to free low pages first */ -+ page = dmabuf_page_pool_remove(pool, POOL_LOWPAGE); -+ if (!page) -+ page = dmabuf_page_pool_remove(pool, POOL_HIGHPAGE); ++ if (count % 2) ++ return -EINVAL; + -+ if (!page) -+ break; ++ tbl = kzalloc(sizeof(*tbl) * (count / 2 + 1), GFP_KERNEL); ++ if (!tbl) ++ return -ENOMEM; + -+ dmabuf_page_pool_free_pages(pool, page); -+ freed += (1 << pool->order); ++ for (i = 0; i < count / 2; i++) { ++ of_property_read_u32_index(np, porp_name, 2 * i, &tbl[i].pn); ++ of_property_read_u32_index(np, porp_name, 2 * i + 1, ++ &tbl[i].rl); + } + -+ return freed; ++ tbl[i].pn = 0; ++ tbl[i].rl = DMCFREQ_TABLE_END; ++ ++ *table = tbl; ++ ++ return 0; +} + -+static int dmabuf_page_pool_shrink(gfp_t gfp_mask, int nr_to_scan) ++static int rockchip_get_system_status_rate(struct device_node *np, ++ char *porp_name, ++ struct rockchip_dmcfreq *dmcfreq) +{ -+ struct dmabuf_page_pool *pool; -+ int nr_total = 0; -+ int nr_freed; -+ int only_scan = 0; ++ const struct property *prop; ++ unsigned int status = 0, freq = 0; ++ unsigned long temp_rate = 0; ++ int count, i; + -+ if (!nr_to_scan) -+ only_scan = 1; ++ prop = of_find_property(np, porp_name, NULL); ++ if (!prop) ++ return -ENODEV; + -+ mutex_lock(&pool_list_lock); -+ list_for_each_entry(pool, &pool_list, list) { -+ if (only_scan) { -+ nr_total += dmabuf_page_pool_do_shrink(pool, -+ gfp_mask, -+ nr_to_scan); -+ } else { -+ nr_freed = dmabuf_page_pool_do_shrink(pool, -+ gfp_mask, -+ nr_to_scan); -+ nr_to_scan -= nr_freed; -+ nr_total += nr_freed; -+ if (nr_to_scan <= 0) -+ break; ++ if (!prop->value) ++ return -ENODATA; ++ ++ count = of_property_count_u32_elems(np, porp_name); ++ if (count < 0) ++ return -EINVAL; ++ ++ if (count % 2) ++ return -EINVAL; ++ ++ for (i = 0; i < count / 2; i++) { ++ of_property_read_u32_index(np, porp_name, 2 * i, ++ &status); ++ of_property_read_u32_index(np, porp_name, 2 * i + 1, ++ &freq); ++ switch (status) { ++ case SYS_STATUS_NORMAL: ++ dmcfreq->normal_rate = freq * 1000; ++ break; ++ case SYS_STATUS_SUSPEND: ++ dmcfreq->suspend_rate = freq * 1000; ++ break; ++ case SYS_STATUS_DEEP_SUSPEND: ++ dmcfreq->deep_suspend_rate = freq * 1000; ++ break; ++ case SYS_STATUS_VIDEO_1080P: ++ dmcfreq->video_1080p_rate = freq * 1000; ++ break; ++ case SYS_STATUS_VIDEO_4K: ++ dmcfreq->video_4k_rate = freq * 1000; ++ break; ++ case SYS_STATUS_VIDEO_4K_10B: ++ dmcfreq->video_4k_10b_rate = freq * 1000; ++ break; ++ case SYS_STATUS_VIDEO_SVEP: ++ dmcfreq->video_svep_rate = freq * 1000; ++ break; ++ case SYS_STATUS_PERFORMANCE: ++ dmcfreq->performance_rate = freq * 1000; ++ break; ++ case SYS_STATUS_HDMI: ++ dmcfreq->hdmi_rate = freq * 1000; ++ break; ++ case SYS_STATUS_HDMIRX: ++ dmcfreq->hdmirx_rate = freq * 1000; ++ break; ++ case SYS_STATUS_IDLE: ++ dmcfreq->idle_rate = freq * 1000; ++ break; ++ case SYS_STATUS_REBOOT: ++ dmcfreq->reboot_rate = freq * 1000; ++ break; ++ case SYS_STATUS_BOOST: ++ dmcfreq->boost_rate = freq * 1000; ++ break; ++ case SYS_STATUS_ISP: ++ case SYS_STATUS_CIF0: ++ case SYS_STATUS_CIF1: ++ case SYS_STATUS_DUALVIEW: ++ temp_rate = freq * 1000; ++ if (dmcfreq->fixed_rate < temp_rate) ++ dmcfreq->fixed_rate = temp_rate; ++ break; ++ case SYS_STATUS_LOW_POWER: ++ dmcfreq->low_power_rate = freq * 1000; ++ break; ++ default: ++ break; + } + } -+ mutex_unlock(&pool_list_lock); + -+ return nr_total; ++ return 0; +} + -+static unsigned long dmabuf_page_pool_shrink_count(struct shrinker *shrinker, -+ struct shrink_control *sc) ++static unsigned long rockchip_freq_level_2_rate(struct rockchip_dmcfreq *dmcfreq, ++ unsigned int level) +{ -+ return dmabuf_page_pool_shrink(sc->gfp_mask, 0); ++ unsigned long rate = 0; ++ ++ switch (level) { ++ case DMC_FREQ_LEVEL_LOW: ++ rate = dmcfreq->rate_low; ++ break; ++ case DMC_FREQ_LEVEL_MID_LOW: ++ rate = dmcfreq->rate_mid_low; ++ break; ++ case DMC_FREQ_LEVEL_MID_HIGH: ++ rate = dmcfreq->rate_mid_high; ++ break; ++ case DMC_FREQ_LEVEL_HIGH: ++ rate = dmcfreq->rate_high; ++ break; ++ default: ++ break; ++ } ++ ++ return rate; +} + -+static unsigned long dmabuf_page_pool_shrink_scan(struct shrinker *shrinker, -+ struct shrink_control *sc) ++static int rockchip_get_system_status_level(struct device_node *np, ++ char *porp_name, ++ struct rockchip_dmcfreq *dmcfreq) +{ -+ if (sc->nr_to_scan == 0) -+ return 0; -+ return dmabuf_page_pool_shrink(sc->gfp_mask, sc->nr_to_scan); -+} ++ const struct property *prop; ++ unsigned int status = 0, level = 0; ++ unsigned long temp_rate = 0; ++ int count, i; + -+struct shrinker pool_shrinker = { -+ .count_objects = dmabuf_page_pool_shrink_count, -+ .scan_objects = dmabuf_page_pool_shrink_scan, -+ .seeks = DEFAULT_SEEKS, -+ .batch = 0, -+}; ++ prop = of_find_property(np, porp_name, NULL); ++ if (!prop) ++ return -ENODEV; + -+static int dmabuf_page_pool_init_shrinker(void) -+{ -+ return register_shrinker(&pool_shrinker, "pool-shrinker"); -+} -+module_init(dmabuf_page_pool_init_shrinker); -+MODULE_LICENSE("GPL v2"); -diff --git a/drivers/dma-buf/heaps/page_pool.h b/drivers/dma-buf/heaps/page_pool.h -new file mode 100644 -index 000000000..6b083b04f ---- /dev/null -+++ b/drivers/dma-buf/heaps/page_pool.h -@@ -0,0 +1,55 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * DMA BUF PagePool implementation -+ * Based on earlier ION code by Google -+ * -+ * Copyright (C) 2011 Google, Inc. -+ * Copyright (C) 2020 Linaro Ltd. -+ */ -+ -+#ifndef _DMABUF_PAGE_POOL_H -+#define _DMABUF_PAGE_POOL_H -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* page types we track in the pool */ -+enum { -+ POOL_LOWPAGE, /* Clean lowmem pages */ -+ POOL_HIGHPAGE, /* Clean highmem pages */ -+ -+ POOL_TYPE_SIZE, -+}; -+ -+/** -+ * struct dmabuf_page_pool - pagepool struct -+ * @count[]: array of number of pages of that type in the pool -+ * @items[]: array of list of pages of the specific type -+ * @mutex: lock protecting this struct and especially the count -+ * item list -+ * @gfp_mask: gfp_mask to use from alloc -+ * @order: order of pages in the pool -+ * @list: list node for list of pools -+ * -+ * Allows you to keep a pool of pre allocated pages to use -+ */ -+struct dmabuf_page_pool { -+ int count[POOL_TYPE_SIZE]; -+ struct list_head items[POOL_TYPE_SIZE]; -+ struct mutex mutex; -+ gfp_t gfp_mask; -+ unsigned int order; -+ struct list_head list; -+}; -+ -+struct dmabuf_page_pool *dmabuf_page_pool_create(gfp_t gfp_mask, -+ unsigned int order); -+void dmabuf_page_pool_destroy(struct dmabuf_page_pool *pool); -+struct page *dmabuf_page_pool_alloc(struct dmabuf_page_pool *pool); -+void dmabuf_page_pool_free(struct dmabuf_page_pool *pool, struct page *page); -+ -+#endif /* _DMABUF_PAGE_POOL_H */ -diff --git a/drivers/dma-buf/heaps/rk_cma_heap.c b/drivers/dma-buf/heaps/rk_cma_heap.c -new file mode 100644 -index 000000000..c93674c2a ---- /dev/null -+++ b/drivers/dma-buf/heaps/rk_cma_heap.c -@@ -0,0 +1,616 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * DMABUF CMA heap exporter -+ * -+ * Copyright (C) 2012, 2019, 2020 Linaro Ltd. -+ * Author: for ST-Ericsson. -+ * -+ * Also utilizing parts of Andrew Davis' SRAM heap: -+ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com/ -+ * Andrew F. Davis -+ * -+ * Copyright (C) 2021, 2022 Rockchip Electronics Co. Ltd. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+struct cma_heap { -+ struct dma_heap *heap; -+ struct cma *cma; -+}; -+ -+struct cma_heap_buffer { -+ struct cma_heap *heap; -+ struct list_head attachments; -+ struct mutex lock; -+ unsigned long len; -+ struct page *cma_pages; -+ struct page **pages; -+ pgoff_t pagecount; -+ int vmap_cnt; -+ void *vaddr; -+ -+ bool uncached; -+}; -+ -+struct dma_heap_attachment { -+ struct device *dev; -+ struct sg_table table; -+ struct list_head list; -+ bool mapped; -+ -+ bool uncached; -+}; ++ if (!prop->value) ++ return -ENODATA; + -+static int cma_heap_attach(struct dma_buf *dmabuf, -+ struct dma_buf_attachment *attachment) -+{ -+ struct cma_heap_buffer *buffer = dmabuf->priv; -+ struct dma_heap_attachment *a; -+ int ret; ++ count = of_property_count_u32_elems(np, porp_name); ++ if (count < 0) ++ return -EINVAL; + -+ a = kzalloc(sizeof(*a), GFP_KERNEL); -+ if (!a) -+ return -ENOMEM; ++ if (count % 2) ++ return -EINVAL; + -+ ret = sg_alloc_table_from_pages(&a->table, buffer->pages, -+ buffer->pagecount, 0, -+ buffer->pagecount << PAGE_SHIFT, -+ GFP_KERNEL); -+ if (ret) { -+ kfree(a); -+ return ret; ++ if (dmcfreq->freq_count == 1) { ++ dmcfreq->rate_low = dmcfreq->freq_info_rate[0]; ++ dmcfreq->rate_mid_low = dmcfreq->freq_info_rate[0]; ++ dmcfreq->rate_mid_high = dmcfreq->freq_info_rate[0]; ++ dmcfreq->rate_high = dmcfreq->freq_info_rate[0]; ++ } else if (dmcfreq->freq_count == 2) { ++ dmcfreq->rate_low = dmcfreq->freq_info_rate[0]; ++ dmcfreq->rate_mid_low = dmcfreq->freq_info_rate[0]; ++ dmcfreq->rate_mid_high = dmcfreq->freq_info_rate[1]; ++ dmcfreq->rate_high = dmcfreq->freq_info_rate[1]; ++ } else if (dmcfreq->freq_count == 3) { ++ dmcfreq->rate_low = dmcfreq->freq_info_rate[0]; ++ dmcfreq->rate_mid_low = dmcfreq->freq_info_rate[1]; ++ dmcfreq->rate_mid_high = dmcfreq->freq_info_rate[1]; ++ dmcfreq->rate_high = dmcfreq->freq_info_rate[2]; ++ } else if (dmcfreq->freq_count == 4) { ++ dmcfreq->rate_low = dmcfreq->freq_info_rate[0]; ++ dmcfreq->rate_mid_low = dmcfreq->freq_info_rate[1]; ++ dmcfreq->rate_mid_high = dmcfreq->freq_info_rate[2]; ++ dmcfreq->rate_high = dmcfreq->freq_info_rate[3]; ++ } else if (dmcfreq->freq_count == 5 || dmcfreq->freq_count == 6) { ++ dmcfreq->rate_low = dmcfreq->freq_info_rate[0]; ++ dmcfreq->rate_mid_low = dmcfreq->freq_info_rate[1]; ++ dmcfreq->rate_mid_high = dmcfreq->freq_info_rate[dmcfreq->freq_count - 2]; ++ dmcfreq->rate_high = dmcfreq->freq_info_rate[dmcfreq->freq_count - 1]; ++ } else { ++ return -EINVAL; + } + -+ a->dev = attachment->dev; -+ INIT_LIST_HEAD(&a->list); -+ a->mapped = false; -+ -+ a->uncached = buffer->uncached; -+ attachment->priv = a; ++ dmcfreq->auto_min_rate = dmcfreq->rate_low; + -+ mutex_lock(&buffer->lock); -+ list_add(&a->list, &buffer->attachments); -+ mutex_unlock(&buffer->lock); ++ for (i = 0; i < count / 2; i++) { ++ of_property_read_u32_index(np, porp_name, 2 * i, ++ &status); ++ of_property_read_u32_index(np, porp_name, 2 * i + 1, ++ &level); ++ switch (status) { ++ case SYS_STATUS_NORMAL: ++ dmcfreq->normal_rate = rockchip_freq_level_2_rate(dmcfreq, level); ++ dev_info(dmcfreq->dev, "normal_rate = %ld\n", dmcfreq->normal_rate); ++ break; ++ case SYS_STATUS_SUSPEND: ++ dmcfreq->suspend_rate = rockchip_freq_level_2_rate(dmcfreq, level); ++ dev_info(dmcfreq->dev, "suspend_rate = %ld\n", dmcfreq->suspend_rate); ++ break; ++ case SYS_STATUS_DEEP_SUSPEND: ++ dmcfreq->deep_suspend_rate = rockchip_freq_level_2_rate(dmcfreq, level); ++ dev_info(dmcfreq->dev, "deep_suspend_rate = %ld\n", ++ dmcfreq->deep_suspend_rate); ++ break; ++ case SYS_STATUS_VIDEO_1080P: ++ dmcfreq->video_1080p_rate = rockchip_freq_level_2_rate(dmcfreq, level); ++ dev_info(dmcfreq->dev, "video_1080p_rate = %ld\n", ++ dmcfreq->video_1080p_rate); ++ break; ++ case SYS_STATUS_VIDEO_4K: ++ dmcfreq->video_4k_rate = rockchip_freq_level_2_rate(dmcfreq, level); ++ dev_info(dmcfreq->dev, "video_4k_rate = %ld\n", dmcfreq->video_4k_rate); ++ break; ++ case SYS_STATUS_VIDEO_4K_10B: ++ dmcfreq->video_4k_10b_rate = rockchip_freq_level_2_rate(dmcfreq, level); ++ dev_info(dmcfreq->dev, "video_4k_10b_rate = %ld\n", ++ dmcfreq->video_4k_10b_rate); ++ break; ++ case SYS_STATUS_VIDEO_4K_60P: ++ dmcfreq->video_4k_60p_rate = rockchip_freq_level_2_rate(dmcfreq, level); ++ dev_info(dmcfreq->dev, "video_4k_60p_rate = %ld\n", ++ dmcfreq->video_4k_60p_rate); ++ break; ++ case SYS_STATUS_VIDEO_SVEP: ++ dmcfreq->video_svep_rate = rockchip_freq_level_2_rate(dmcfreq, level); ++ dev_info(dmcfreq->dev, "video_svep_rate = %ld\n", ++ dmcfreq->video_svep_rate); ++ break; ++ case SYS_STATUS_PERFORMANCE: ++ dmcfreq->performance_rate = rockchip_freq_level_2_rate(dmcfreq, level); ++ dev_info(dmcfreq->dev, "performance_rate = %ld\n", ++ dmcfreq->performance_rate); ++ break; ++ case SYS_STATUS_HDMI: ++ dmcfreq->hdmi_rate = rockchip_freq_level_2_rate(dmcfreq, level); ++ dev_info(dmcfreq->dev, "hdmi_rate = %ld\n", dmcfreq->hdmi_rate); ++ break; ++ case SYS_STATUS_HDMIRX: ++ dmcfreq->hdmirx_rate = rockchip_freq_level_2_rate(dmcfreq, level); ++ dev_info(dmcfreq->dev, "hdmirx_rate = %ld\n", dmcfreq->hdmirx_rate); ++ break; ++ case SYS_STATUS_IDLE: ++ dmcfreq->idle_rate = rockchip_freq_level_2_rate(dmcfreq, level); ++ dev_info(dmcfreq->dev, "idle_rate = %ld\n", dmcfreq->idle_rate); ++ break; ++ case SYS_STATUS_REBOOT: ++ dmcfreq->reboot_rate = rockchip_freq_level_2_rate(dmcfreq, level); ++ dev_info(dmcfreq->dev, "reboot_rate = %ld\n", dmcfreq->reboot_rate); ++ break; ++ case SYS_STATUS_BOOST: ++ dmcfreq->boost_rate = rockchip_freq_level_2_rate(dmcfreq, level); ++ dev_info(dmcfreq->dev, "boost_rate = %ld\n", dmcfreq->boost_rate); ++ break; ++ case SYS_STATUS_ISP: ++ case SYS_STATUS_CIF0: ++ case SYS_STATUS_CIF1: ++ case SYS_STATUS_DUALVIEW: ++ temp_rate = rockchip_freq_level_2_rate(dmcfreq, level); ++ if (dmcfreq->fixed_rate < temp_rate) { ++ dmcfreq->fixed_rate = temp_rate; ++ dev_info(dmcfreq->dev, ++ "fixed_rate(isp|cif0|cif1|dualview) = %ld\n", ++ dmcfreq->fixed_rate); ++ } ++ break; ++ case SYS_STATUS_LOW_POWER: ++ dmcfreq->low_power_rate = rockchip_freq_level_2_rate(dmcfreq, level); ++ dev_info(dmcfreq->dev, "low_power_rate = %ld\n", dmcfreq->low_power_rate); ++ break; ++ default: ++ break; ++ } ++ } + + return 0; +} + -+static void cma_heap_detach(struct dma_buf *dmabuf, -+ struct dma_buf_attachment *attachment) ++static void rockchip_dmcfreq_update_target(struct rockchip_dmcfreq *dmcfreq) +{ -+ struct cma_heap_buffer *buffer = dmabuf->priv; -+ struct dma_heap_attachment *a = attachment->priv; -+ -+ mutex_lock(&buffer->lock); -+ list_del(&a->list); -+ mutex_unlock(&buffer->lock); ++ struct devfreq *devfreq = dmcfreq->info.devfreq; + -+ sg_free_table(&a->table); -+ kfree(a); ++ mutex_lock(&devfreq->lock); ++ update_devfreq(devfreq); ++ mutex_unlock(&devfreq->lock); +} + -+static struct sg_table *cma_heap_map_dma_buf(struct dma_buf_attachment *attachment, -+ enum dma_data_direction direction) ++static int rockchip_dmcfreq_system_status_notifier(struct notifier_block *nb, ++ unsigned long status, ++ void *ptr) +{ -+ struct dma_heap_attachment *a = attachment->priv; -+ struct sg_table *table = &a->table; -+ int attrs = attachment->dma_map_attrs; -+ int ret; -+ -+ if (a->uncached) -+ attrs |= DMA_ATTR_SKIP_CPU_SYNC; ++ struct rockchip_dmcfreq *dmcfreq = system_status_to_dmcfreq(nb); ++ unsigned long target_rate = 0; ++ unsigned int refresh = false; ++ bool is_fixed = false; + -+ ret = dma_map_sgtable(attachment->dev, table, direction, attrs); -+ if (ret) -+ return ERR_PTR(-ENOMEM); -+ a->mapped = true; -+ return table; -+} ++ if (dmcfreq->fixed_rate && (is_dualview(status) || is_isp(status))) { ++ if (dmcfreq->is_fixed) ++ return NOTIFY_OK; ++ is_fixed = true; ++ target_rate = dmcfreq->fixed_rate; ++ goto next; ++ } + -+static void cma_heap_unmap_dma_buf(struct dma_buf_attachment *attachment, -+ struct sg_table *table, -+ enum dma_data_direction direction) -+{ -+ struct dma_heap_attachment *a = attachment->priv; -+ int attrs = attachment->dma_map_attrs; ++ if (dmcfreq->reboot_rate && (status & SYS_STATUS_REBOOT)) { ++ if (dmcfreq->info.auto_freq_en) ++ devfreq_monitor_stop(dmcfreq->info.devfreq); ++ target_rate = dmcfreq->reboot_rate; ++ goto next; ++ } + -+ a->mapped = false; ++ if (dmcfreq->suspend_rate && (status & SYS_STATUS_SUSPEND)) { ++ target_rate = dmcfreq->suspend_rate; ++ refresh = true; ++ goto next; ++ } + -+ if (a->uncached) -+ attrs |= DMA_ATTR_SKIP_CPU_SYNC; ++ if (dmcfreq->low_power_rate && (status & SYS_STATUS_LOW_POWER)) { ++ target_rate = dmcfreq->low_power_rate; ++ goto next; ++ } + -+ dma_unmap_sgtable(attachment->dev, table, direction, attrs); -+} ++ if (dmcfreq->performance_rate && (status & SYS_STATUS_PERFORMANCE)) { ++ if (dmcfreq->performance_rate > target_rate) ++ target_rate = dmcfreq->performance_rate; ++ } + -+static int __maybe_unused -+cma_heap_dma_buf_begin_cpu_access_partial(struct dma_buf *dmabuf, -+ enum dma_data_direction direction, -+ unsigned int offset, -+ unsigned int len) -+{ -+ struct cma_heap_buffer *buffer = dmabuf->priv; -+ phys_addr_t phys = page_to_phys(buffer->cma_pages); ++ if (dmcfreq->hdmi_rate && (status & SYS_STATUS_HDMI)) { ++ if (dmcfreq->hdmi_rate > target_rate) ++ target_rate = dmcfreq->hdmi_rate; ++ } + -+ if (buffer->vmap_cnt) -+ invalidate_kernel_vmap_range(buffer->vaddr, buffer->len); ++ if (dmcfreq->hdmirx_rate && (status & SYS_STATUS_HDMIRX)) { ++ if (dmcfreq->hdmirx_rate > target_rate) ++ target_rate = dmcfreq->hdmirx_rate; ++ } + -+ if (buffer->uncached) -+ return 0; ++ if (dmcfreq->video_4k_rate && (status & SYS_STATUS_VIDEO_4K)) { ++ if (dmcfreq->video_4k_rate > target_rate) ++ target_rate = dmcfreq->video_4k_rate; ++ } + -+ mutex_lock(&buffer->lock); -+ dma_sync_single_for_cpu(dma_heap_get_dev(buffer->heap->heap), -+ phys + offset, -+ len, -+ direction); -+ mutex_unlock(&buffer->lock); ++ if (dmcfreq->video_4k_10b_rate && (status & SYS_STATUS_VIDEO_4K_10B)) { ++ if (dmcfreq->video_4k_10b_rate > target_rate) ++ target_rate = dmcfreq->video_4k_10b_rate; ++ } + -+ return 0; -+} ++ if (dmcfreq->video_4k_60p_rate && (status & SYS_STATUS_VIDEO_4K_60P)) { ++ if (dmcfreq->video_4k_60p_rate > target_rate) ++ target_rate = dmcfreq->video_4k_60p_rate; ++ } + -+static int __maybe_unused -+cma_heap_dma_buf_end_cpu_access_partial(struct dma_buf *dmabuf, -+ enum dma_data_direction direction, -+ unsigned int offset, -+ unsigned int len) -+{ -+ struct cma_heap_buffer *buffer = dmabuf->priv; -+ phys_addr_t phys = page_to_phys(buffer->cma_pages); ++ if (dmcfreq->video_1080p_rate && (status & SYS_STATUS_VIDEO_1080P)) { ++ if (dmcfreq->video_1080p_rate > target_rate) ++ target_rate = dmcfreq->video_1080p_rate; ++ } + -+ if (buffer->vmap_cnt) -+ flush_kernel_vmap_range(buffer->vaddr, buffer->len); ++ if (dmcfreq->video_svep_rate && (status & SYS_STATUS_VIDEO_SVEP)) { ++ if (dmcfreq->video_svep_rate > target_rate) ++ target_rate = dmcfreq->video_svep_rate; ++ } + -+ if (buffer->uncached) -+ return 0; ++next: + -+ mutex_lock(&buffer->lock); -+ dma_sync_single_for_device(dma_heap_get_dev(buffer->heap->heap), -+ phys + offset, -+ len, -+ direction); -+ mutex_unlock(&buffer->lock); ++ dev_dbg(dmcfreq->dev, "status=0x%x\n", (unsigned int)status); ++ dmcfreq->is_fixed = is_fixed; ++ dmcfreq->status_rate = target_rate; ++ if (dmcfreq->refresh != refresh) { ++ if (dmcfreq->set_auto_self_refresh) ++ dmcfreq->set_auto_self_refresh(refresh); ++ dmcfreq->refresh = refresh; ++ } ++ rockchip_dmcfreq_update_target(dmcfreq); + -+ return 0; ++ return NOTIFY_OK; +} + -+static int cma_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, -+ enum dma_data_direction direction) ++static int rockchip_dmcfreq_panic_notifier(struct notifier_block *nb, ++ unsigned long v, void *p) +{ -+ struct cma_heap_buffer *buffer = dmabuf->priv; -+ struct dma_heap_attachment *a; -+ -+ if (buffer->vmap_cnt) -+ invalidate_kernel_vmap_range(buffer->vaddr, buffer->len); ++ struct rockchip_dmcfreq *dmcfreq = ++ container_of(nb, struct rockchip_dmcfreq, panic_nb); ++ struct device *dev = dmcfreq->dev; + -+ mutex_lock(&buffer->lock); -+ list_for_each_entry(a, &buffer->attachments, list) { -+ if (!a->mapped) -+ continue; -+ dma_sync_sgtable_for_cpu(a->dev, &a->table, direction); -+ } -+ mutex_unlock(&buffer->lock); ++ if (dmcfreq->opp_info.regulator_count == 1) ++ dev_info(dev, "cur_freq: %lu Hz, volt: %lu uV\n", ++ dmcfreq->rate, dmcfreq->volt); ++ else ++ dev_info(dev, "cur_freq: %lu Hz, volt_vdd: %lu uV, volt_mem: %lu uV\n", ++ dmcfreq->rate, dmcfreq->volt, dmcfreq->mem_volt); + + return 0; +} + -+static int cma_heap_dma_buf_end_cpu_access(struct dma_buf *dmabuf, -+ enum dma_data_direction direction) ++static ssize_t rockchip_dmcfreq_status_show(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) +{ -+ struct cma_heap_buffer *buffer = dmabuf->priv; -+ struct dma_heap_attachment *a; -+ -+ if (buffer->vmap_cnt) -+ flush_kernel_vmap_range(buffer->vaddr, buffer->len); -+ -+ mutex_lock(&buffer->lock); -+ list_for_each_entry(a, &buffer->attachments, list) { -+ if (!a->mapped) -+ continue; -+ dma_sync_sgtable_for_device(a->dev, &a->table, direction); -+ } -+ mutex_unlock(&buffer->lock); ++ unsigned int status = rockchip_get_system_status(); + -+ return 0; ++ return sprintf(buf, "0x%x\n", status); +} + -+static vm_fault_t cma_heap_vm_fault(struct vm_fault *vmf) ++static ssize_t rockchip_dmcfreq_status_store(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, ++ size_t count) +{ -+ struct vm_area_struct *vma = vmf->vma; -+ struct cma_heap_buffer *buffer = vma->vm_private_data; -+ -+ if (vmf->pgoff > buffer->pagecount) -+ return VM_FAULT_SIGBUS; ++ if (!count) ++ return -EINVAL; + -+ vmf->page = buffer->pages[vmf->pgoff]; -+ get_page(vmf->page); ++ rockchip_update_system_status(buf); + -+ return 0; ++ return count; +} + -+static const struct vm_operations_struct dma_heap_vm_ops = { -+ .fault = cma_heap_vm_fault, -+}; ++static DEVICE_ATTR(system_status, 0644, rockchip_dmcfreq_status_show, ++ rockchip_dmcfreq_status_store); + -+static int cma_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) ++static ssize_t upthreshold_show(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) +{ -+ struct cma_heap_buffer *buffer = dmabuf->priv; -+ -+ if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0) -+ return -EINVAL; -+ -+ if (buffer->uncached) -+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); -+ -+ vma->vm_ops = &dma_heap_vm_ops; -+ vma->vm_private_data = buffer; ++ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev->parent); ++ struct rockchip_dmcfreq_ondemand_data *data = &dmcfreq->ondemand_data; + -+ return 0; ++ return sprintf(buf, "%d\n", data->upthreshold); +} + -+static void *cma_heap_do_vmap(struct cma_heap_buffer *buffer) ++static ssize_t upthreshold_store(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, ++ size_t count) +{ -+ void *vaddr; -+ pgprot_t pgprot = PAGE_KERNEL; ++ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev->parent); ++ struct rockchip_dmcfreq_ondemand_data *data = &dmcfreq->ondemand_data; ++ unsigned int value; + -+ if (buffer->uncached) -+ pgprot = pgprot_writecombine(PAGE_KERNEL); ++ if (kstrtouint(buf, 10, &value)) ++ return -EINVAL; + -+ vaddr = vmap(buffer->pages, buffer->pagecount, VM_MAP, pgprot); -+ if (!vaddr) -+ return ERR_PTR(-ENOMEM); ++ data->upthreshold = value; + -+ return vaddr; ++ return count; +} + -+static void *cma_heap_vmap(struct dma_buf *dmabuf) ++static DEVICE_ATTR_RW(upthreshold); ++ ++static ssize_t downdifferential_show(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) +{ -+ struct cma_heap_buffer *buffer = dmabuf->priv; -+ void *vaddr; ++ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev->parent); ++ struct rockchip_dmcfreq_ondemand_data *data = &dmcfreq->ondemand_data; + -+ mutex_lock(&buffer->lock); -+ if (buffer->vmap_cnt) { -+ buffer->vmap_cnt++; -+ vaddr = buffer->vaddr; -+ goto out; -+ } ++ return sprintf(buf, "%d\n", data->downdifferential); ++} + -+ vaddr = cma_heap_do_vmap(buffer); -+ if (IS_ERR(vaddr)) -+ goto out; ++static ssize_t downdifferential_store(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, ++ size_t count) ++{ ++ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev->parent); ++ struct rockchip_dmcfreq_ondemand_data *data = &dmcfreq->ondemand_data; ++ unsigned int value; + -+ buffer->vaddr = vaddr; -+ buffer->vmap_cnt++; -+out: -+ mutex_unlock(&buffer->lock); ++ if (kstrtouint(buf, 10, &value)) ++ return -EINVAL; + -+ return vaddr; ++ data->downdifferential = value; ++ ++ return count; +} + -+static void cma_heap_vunmap(struct dma_buf *dmabuf, void *vaddr) ++static DEVICE_ATTR_RW(downdifferential); ++ ++static unsigned long get_nocp_req_rate(struct rockchip_dmcfreq *dmcfreq) +{ -+ struct cma_heap_buffer *buffer = dmabuf->priv; ++ unsigned long target = 0, cpu_bw = 0; ++ int i; + -+ mutex_lock(&buffer->lock); -+ if (!--buffer->vmap_cnt) { -+ vunmap(buffer->vaddr); -+ buffer->vaddr = NULL; -+ } -+ mutex_unlock(&buffer->lock); -+} ++ if (!dmcfreq->cpu_bw_tbl || dmcfreq->nocp_cpu_id < 0) ++ goto out; + -+static void cma_heap_dma_buf_release(struct dma_buf *dmabuf) -+{ -+ struct cma_heap_buffer *buffer = dmabuf->priv; -+ struct cma_heap *cma_heap = buffer->heap; ++ cpu_bw = dmcfreq->nocp_bw[dmcfreq->nocp_cpu_id]; + -+ if (buffer->vmap_cnt > 0) { -+ WARN(1, "%s: buffer still mapped in the kernel\n", __func__); -+ vunmap(buffer->vaddr); ++ for (i = 0; dmcfreq->cpu_bw_tbl[i].freq != CPUFREQ_TABLE_END; i++) { ++ if (cpu_bw >= dmcfreq->cpu_bw_tbl[i].min) ++ target = dmcfreq->cpu_bw_tbl[i].freq; + } + -+ /* free page list */ -+ kfree(buffer->pages); -+ /* release memory */ -+ cma_release(cma_heap->cma, buffer->cma_pages, buffer->pagecount); -+ kfree(buffer); ++out: ++ return target; +} + -+static const struct dma_buf_ops cma_heap_buf_ops = { -+ .attach = cma_heap_attach, -+ .detach = cma_heap_detach, -+ .map_dma_buf = cma_heap_map_dma_buf, -+ .unmap_dma_buf = cma_heap_unmap_dma_buf, -+ .begin_cpu_access = cma_heap_dma_buf_begin_cpu_access, -+ .end_cpu_access = cma_heap_dma_buf_end_cpu_access, -+#ifdef CONFIG_DMABUF_PARTIAL -+ .begin_cpu_access_partial = cma_heap_dma_buf_begin_cpu_access_partial, -+ .end_cpu_access_partial = cma_heap_dma_buf_end_cpu_access_partial, -+#endif -+ .mmap = cma_heap_mmap, -+ .vmap = cma_heap_vmap, -+ .vunmap = cma_heap_vunmap, -+ .release = cma_heap_dma_buf_release, -+}; -+ -+static struct dma_buf *cma_heap_do_allocate(struct dma_heap *heap, -+ unsigned long len, -+ unsigned long fd_flags, -+ unsigned long heap_flags, bool uncached) ++static int devfreq_dmc_ondemand_func(struct devfreq *df, ++ unsigned long *freq) +{ -+ struct cma_heap *cma_heap = dma_heap_get_drvdata(heap); -+ struct cma_heap_buffer *buffer; -+ DEFINE_DMA_BUF_EXPORT_INFO(exp_info); -+ size_t size = PAGE_ALIGN(len); -+ pgoff_t pagecount = size >> PAGE_SHIFT; -+ unsigned long align = get_order(size); -+ struct page *cma_pages; -+ struct dma_buf *dmabuf; -+ int ret = -ENOMEM; -+ pgoff_t pg; -+ dma_addr_t dma; -+ -+ buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); -+ if (!buffer) -+ return ERR_PTR(-ENOMEM); -+ -+ buffer->uncached = uncached; ++ int err; ++ struct devfreq_dev_status *stat; ++ unsigned long long a, b; ++ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(df->dev.parent); ++ struct rockchip_dmcfreq_ondemand_data *data = &dmcfreq->ondemand_data; ++ unsigned int upthreshold = data->upthreshold; ++ unsigned int downdifferential = data->downdifferential; ++ unsigned long target_freq = 0, nocp_req_rate = 0; ++ u64 now; + -+ INIT_LIST_HEAD(&buffer->attachments); -+ mutex_init(&buffer->lock); -+ buffer->len = size; ++ if (dmcfreq->info.auto_freq_en && !dmcfreq->is_fixed) { ++ if (dmcfreq->status_rate) ++ target_freq = dmcfreq->status_rate; ++ else if (dmcfreq->auto_min_rate) ++ target_freq = dmcfreq->auto_min_rate; ++ nocp_req_rate = get_nocp_req_rate(dmcfreq); ++ target_freq = max3(target_freq, nocp_req_rate, ++ dmcfreq->info.vop_req_rate); ++ now = ktime_to_us(ktime_get()); ++ if (now < dmcfreq->touchboostpulse_endtime) ++ target_freq = max(target_freq, dmcfreq->boost_rate); ++ } else { ++ if (dmcfreq->status_rate) ++ target_freq = dmcfreq->status_rate; ++ else if (dmcfreq->normal_rate) ++ target_freq = dmcfreq->normal_rate; ++ if (target_freq) ++ *freq = target_freq; ++ if (dmcfreq->info.auto_freq_en && !devfreq_update_stats(df)) ++ return 0; ++ goto reset_last_status; ++ } + -+ if (align > CONFIG_CMA_ALIGNMENT) -+ align = CONFIG_CMA_ALIGNMENT; ++ if (!upthreshold || !downdifferential) ++ goto reset_last_status; + -+ cma_pages = cma_alloc(cma_heap->cma, pagecount, align, GFP_KERNEL); -+ if (!cma_pages) -+ goto free_buffer; ++ if (upthreshold > 100 || ++ upthreshold < downdifferential) ++ goto reset_last_status; + -+ /* Clear the cma pages */ -+ if (PageHighMem(cma_pages)) { -+ unsigned long nr_clear_pages = pagecount; -+ struct page *page = cma_pages; ++ err = devfreq_update_stats(df); ++ if (err) ++ goto reset_last_status; + -+ while (nr_clear_pages > 0) { -+ void *vaddr = kmap_atomic(page); ++ stat = &df->last_status; + -+ memset(vaddr, 0, PAGE_SIZE); -+ kunmap_atomic(vaddr); -+ /* -+ * Avoid wasting time zeroing memory if the process -+ * has been killed by by SIGKILL -+ */ -+ if (fatal_signal_pending(current)) -+ goto free_cma; -+ page++; -+ nr_clear_pages--; -+ } -+ } else { -+ memset(page_address(cma_pages), 0, size); ++ /* Assume MAX if it is going to be divided by zero */ ++ if (stat->total_time == 0) { ++ *freq = DEVFREQ_MAX_FREQ; ++ return 0; + } + -+ buffer->pages = kmalloc_array(pagecount, sizeof(*buffer->pages), GFP_KERNEL); -+ if (!buffer->pages) { -+ ret = -ENOMEM; -+ goto free_cma; ++ /* Prevent overflow */ ++ if (stat->busy_time >= (1 << 24) || stat->total_time >= (1 << 24)) { ++ stat->busy_time >>= 7; ++ stat->total_time >>= 7; + } + -+ for (pg = 0; pg < pagecount; pg++) -+ buffer->pages[pg] = &cma_pages[pg]; -+ -+ buffer->cma_pages = cma_pages; -+ buffer->heap = cma_heap; -+ buffer->pagecount = pagecount; ++ /* Set MAX if it's busy enough */ ++ if (stat->busy_time * 100 > ++ stat->total_time * upthreshold) { ++ *freq = DEVFREQ_MAX_FREQ; ++ return 0; ++ } + -+ /* create the dmabuf */ -+ exp_info.exp_name = dma_heap_get_name(heap); -+ exp_info.ops = &cma_heap_buf_ops; -+ exp_info.size = buffer->len; -+ exp_info.flags = fd_flags; -+ exp_info.priv = buffer; -+ dmabuf = dma_buf_export(&exp_info); -+ if (IS_ERR(dmabuf)) { -+ ret = PTR_ERR(dmabuf); -+ goto free_pages; ++ /* Set MAX if we do not know the initial frequency */ ++ if (stat->current_frequency == 0) { ++ *freq = DEVFREQ_MAX_FREQ; ++ return 0; + } + -+ if (buffer->uncached) { -+ dma = dma_map_page(dma_heap_get_dev(heap), buffer->cma_pages, 0, -+ buffer->pagecount * PAGE_SIZE, DMA_FROM_DEVICE); -+ dma_unmap_page(dma_heap_get_dev(heap), dma, -+ buffer->pagecount * PAGE_SIZE, DMA_FROM_DEVICE); ++ /* Keep the current frequency */ ++ if (stat->busy_time * 100 > ++ stat->total_time * (upthreshold - downdifferential)) { ++ *freq = max(target_freq, stat->current_frequency); ++ return 0; + } + -+ return dmabuf; ++ /* Set the desired frequency based on the load */ ++ a = stat->busy_time; ++ a *= stat->current_frequency; ++ b = div_u64(a, stat->total_time); ++ b *= 100; ++ b = div_u64(b, (upthreshold - downdifferential / 2)); ++ *freq = max_t(unsigned long, target_freq, b); + -+free_pages: -+ kfree(buffer->pages); -+free_cma: -+ cma_release(cma_heap->cma, cma_pages, pagecount); -+free_buffer: -+ kfree(buffer); ++ return 0; + -+ return ERR_PTR(ret); -+} ++reset_last_status: ++ reset_last_status(df); + -+static struct dma_buf *cma_heap_allocate(struct dma_heap *heap, -+ unsigned long len, -+ unsigned long fd_flags, -+ unsigned long heap_flags) -+{ -+ return cma_heap_do_allocate(heap, len, fd_flags, heap_flags, false); ++ return 0; +} + -+#if IS_ENABLED(CONFIG_NO_GKI) -+static int cma_heap_get_phys(struct dma_heap *heap, -+ struct dma_heap_phys_data *phys) ++static int devfreq_dmc_ondemand_handler(struct devfreq *devfreq, ++ unsigned int event, void *data) +{ -+ struct cma_heap *cma_heap = dma_heap_get_drvdata(heap); -+ struct cma_heap_buffer *buffer; -+ struct dma_buf *dmabuf; ++ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(devfreq->dev.parent); + -+ phys->paddr = (__u64)-1; ++ if (!dmcfreq->info.auto_freq_en) ++ return 0; + -+ if (IS_ERR_OR_NULL(phys)) -+ return -EINVAL; ++ switch (event) { ++ case DEVFREQ_GOV_START: ++ devfreq_monitor_start(devfreq); ++ break; + -+ dmabuf = dma_buf_get(phys->fd); -+ if (IS_ERR_OR_NULL(dmabuf)) -+ return -EBADFD; ++ case DEVFREQ_GOV_STOP: ++ devfreq_monitor_stop(devfreq); ++ break; + -+ buffer = dmabuf->priv; -+ if (IS_ERR_OR_NULL(buffer)) -+ goto err; ++ case DEVFREQ_GOV_UPDATE_INTERVAL: ++ devfreq_update_interval(devfreq, (unsigned int *)data); ++ break; + -+ if (buffer->heap != cma_heap) -+ goto err; ++ case DEVFREQ_GOV_SUSPEND: ++ devfreq_monitor_suspend(devfreq); ++ break; + -+ phys->paddr = page_to_phys(buffer->cma_pages); ++ case DEVFREQ_GOV_RESUME: ++ devfreq_monitor_resume(devfreq); ++ break; + -+err: -+ dma_buf_put(dmabuf); ++ default: ++ break; ++ } + -+ return (phys->paddr == (__u64)-1) ? -EINVAL : 0; ++ return 0; +} -+#endif + -+static const struct dma_heap_ops cma_heap_ops = { -+ .allocate = cma_heap_allocate, -+#if IS_ENABLED(CONFIG_NO_GKI) -+ .get_phys = cma_heap_get_phys, -+#endif ++static struct devfreq_governor devfreq_dmc_ondemand = { ++ .name = "dmc_ondemand", ++ .get_target_freq = devfreq_dmc_ondemand_func, ++ .event_handler = devfreq_dmc_ondemand_handler, +}; + -+static struct dma_buf *cma_uncached_heap_allocate(struct dma_heap *heap, -+ unsigned long len, -+ unsigned long fd_flags, -+ unsigned long heap_flags) -+{ -+ return cma_heap_do_allocate(heap, len, fd_flags, heap_flags, true); -+} -+ -+static struct dma_buf *cma_uncached_heap_not_initialized(struct dma_heap *heap, -+ unsigned long len, -+ unsigned long fd_flags, -+ unsigned long heap_flags) ++static int rockchip_dmcfreq_enable_event(struct rockchip_dmcfreq *dmcfreq) +{ -+ pr_info("heap %s not initialized\n", dma_heap_get_name(heap)); -+ return ERR_PTR(-EBUSY); -+} ++ int i, ret; + -+static struct dma_heap_ops cma_uncached_heap_ops = { -+ .allocate = cma_uncached_heap_not_initialized, -+}; ++ if (!dmcfreq->info.auto_freq_en) ++ return 0; + -+static int set_heap_dev_dma(struct device *heap_dev) -+{ -+ int err = 0; ++ for (i = 0; i < dmcfreq->edev_count; i++) { ++ ret = devfreq_event_enable_edev(dmcfreq->edev[i]); ++ if (ret < 0) { ++ dev_err(dmcfreq->dev, ++ "failed to enable devfreq-event\n"); ++ return ret; ++ } ++ } + -+ if (!heap_dev) -+ return -EINVAL; ++ return 0; ++} + -+ dma_coerce_mask_and_coherent(heap_dev, DMA_BIT_MASK(64)); ++static int rockchip_dmcfreq_disable_event(struct rockchip_dmcfreq *dmcfreq) ++{ ++ int i, ret; + -+ if (!heap_dev->dma_parms) { -+ heap_dev->dma_parms = devm_kzalloc(heap_dev, -+ sizeof(*heap_dev->dma_parms), -+ GFP_KERNEL); -+ if (!heap_dev->dma_parms) -+ return -ENOMEM; ++ if (!dmcfreq->info.auto_freq_en) ++ return 0; + -+ err = dma_set_max_seg_size(heap_dev, (unsigned int)DMA_BIT_MASK(64)); -+ if (err) { -+ devm_kfree(heap_dev, heap_dev->dma_parms); -+ dev_err(heap_dev, "Failed to set DMA segment size, err:%d\n", err); -+ return err; ++ for (i = 0; i < dmcfreq->edev_count; i++) { ++ ret = devfreq_event_disable_edev(dmcfreq->edev[i]); ++ if (ret < 0) { ++ dev_err(dmcfreq->dev, ++ "failed to disable devfreq-event\n"); ++ return ret; + } + } + + return 0; +} + -+static int __add_cma_heap(struct cma *cma, void *data) ++static int rockchip_get_edev_id(struct rockchip_dmcfreq *dmcfreq, ++ const char *name) +{ -+ struct cma_heap *cma_heap, *cma_uncached_heap; -+ struct dma_heap_export_info exp_info; -+ int ret; -+ -+ cma_heap = kzalloc(sizeof(*cma_heap), GFP_KERNEL); -+ if (!cma_heap) -+ return -ENOMEM; -+ cma_heap->cma = cma; -+ -+ exp_info.name = "cma"; -+ exp_info.ops = &cma_heap_ops; -+ exp_info.priv = cma_heap; -+ -+ cma_heap->heap = dma_heap_add(&exp_info); -+ if (IS_ERR(cma_heap->heap)) { -+ ret = PTR_ERR(cma_heap->heap); -+ goto free_cma_heap; -+ } ++ struct devfreq_event_dev *edev; ++ int i; + -+ cma_uncached_heap = kzalloc(sizeof(*cma_heap), GFP_KERNEL); -+ if (!cma_uncached_heap) { -+ ret = -ENOMEM; -+ goto put_cma_heap; ++ for (i = 0; i < dmcfreq->edev_count; i++) { ++ edev = dmcfreq->edev[i]; ++ if (!strcmp(edev->desc->name, name)) ++ return i; + } + -+ cma_uncached_heap->cma = cma; ++ return -EINVAL; ++} + -+ exp_info.name = "cma-uncached"; -+ exp_info.ops = &cma_uncached_heap_ops; -+ exp_info.priv = cma_uncached_heap; ++static int rockchip_dmcfreq_get_event(struct rockchip_dmcfreq *dmcfreq) ++{ ++ struct device *dev = dmcfreq->dev; ++ struct device_node *events_np, *np = dev->of_node; ++ int i, j, count, available_count = 0; + -+ cma_uncached_heap->heap = dma_heap_add(&exp_info); -+ if (IS_ERR(cma_uncached_heap->heap)) { -+ ret = PTR_ERR(cma_uncached_heap->heap); -+ goto free_uncached_cma_heap; ++ count = devfreq_event_get_edev_count(dev, "devfreq-events"); ++ if (count < 0) { ++ dev_dbg(dev, "failed to get count of devfreq-event dev\n"); ++ return 0; + } ++ for (i = 0; i < count; i++) { ++ events_np = of_parse_phandle(np, "devfreq-events", i); ++ if (!events_np) ++ continue; ++ if (of_device_is_available(events_np)) ++ available_count++; ++ of_node_put(events_np); ++ } ++ if (!available_count) { ++ dev_dbg(dev, "failed to get available devfreq-event\n"); ++ return 0; ++ } ++ dmcfreq->edev_count = available_count; ++ dmcfreq->edev = devm_kzalloc(dev, ++ sizeof(*dmcfreq->edev) * available_count, ++ GFP_KERNEL); ++ if (!dmcfreq->edev) ++ return -ENOMEM; + -+ ret = set_heap_dev_dma(dma_heap_get_dev(cma_uncached_heap->heap)); -+ if (ret) -+ goto put_uncached_cma_heap; -+ -+ mb(); /* make sure we only set allocate after dma_mask is set */ -+ cma_uncached_heap_ops.allocate = cma_uncached_heap_allocate; ++ for (i = 0, j = 0; i < count; i++) { ++ events_np = of_parse_phandle(np, "devfreq-events", i); ++ if (!events_np) ++ continue; ++ if (of_device_is_available(events_np)) { ++ of_node_put(events_np); ++ if (j >= available_count) { ++ dev_err(dev, "invalid event conut\n"); ++ return -EINVAL; ++ } ++ dmcfreq->edev[j] = ++ devfreq_event_get_edev_by_phandle(dev, "devfreq-events", i); ++ if (IS_ERR(dmcfreq->edev[j])) ++ return -EPROBE_DEFER; ++ j++; ++ } else { ++ of_node_put(events_np); ++ } ++ } ++ dmcfreq->info.auto_freq_en = true; ++ dmcfreq->dfi_id = rockchip_get_edev_id(dmcfreq, "dfi"); ++ dmcfreq->nocp_cpu_id = rockchip_get_edev_id(dmcfreq, "nocp-cpu"); ++ dmcfreq->nocp_bw = ++ devm_kzalloc(dev, sizeof(*dmcfreq->nocp_bw) * available_count, ++ GFP_KERNEL); ++ if (!dmcfreq->nocp_bw) ++ return -ENOMEM; + + return 0; -+ -+put_uncached_cma_heap: -+ dma_heap_put(cma_uncached_heap->heap); -+free_uncached_cma_heap: -+ kfree(cma_uncached_heap); -+put_cma_heap: -+ dma_heap_put(cma_heap->heap); -+free_cma_heap: -+ kfree(cma_heap); -+ -+ return ret; +} + -+static int add_default_cma_heap(void) ++static int rockchip_dmcfreq_dmc_init(struct platform_device *pdev, ++ struct rockchip_dmcfreq *dmcfreq) +{ -+ struct cma *default_cma = dev_get_cma_area(NULL); -+ int ret = 0; ++ const struct of_device_id *match; ++ int (*init)(struct platform_device *pdev, ++ struct rockchip_dmcfreq *data); ++ int ret; + -+ if (default_cma) -+ ret = __add_cma_heap(default_cma, NULL); ++ match = of_match_node(rockchip_dmcfreq_of_match, pdev->dev.of_node); ++ if (match) { ++ init = match->data; ++ if (init) { ++ ret = init(pdev, dmcfreq); ++ if (ret) ++ return ret; ++ } ++ } + -+ return ret; ++ return 0; +} -+module_init(add_default_cma_heap); -+MODULE_DESCRIPTION("DMA-BUF CMA Heap"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/dma-buf/heaps/rk_system_heap.c b/drivers/dma-buf/heaps/rk_system_heap.c -new file mode 100644 -index 000000000..52dcfc2bb ---- /dev/null -+++ b/drivers/dma-buf/heaps/rk_system_heap.c -@@ -0,0 +1,841 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * DMABUF System heap exporter for Rockchip -+ * -+ * Copyright (C) 2011 Google, Inc. -+ * Copyright (C) 2019, 2020 Linaro Ltd. -+ * Copyright (c) 2021, 2022 Rockchip Electronics Co. Ltd. -+ * -+ * Portions based off of Andrew Davis' SRAM heap: -+ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com/ -+ * Andrew F. Davis -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "page_pool.h" -+#include "deferred-free-helper.h" + -+static struct dma_heap *sys_heap; -+static struct dma_heap *sys_dma32_heap; -+static struct dma_heap *sys_uncached_heap; -+static struct dma_heap *sys_uncached_dma32_heap; ++static void rockchip_dmcfreq_parse_dt(struct rockchip_dmcfreq *dmcfreq) ++{ ++ struct device *dev = dmcfreq->dev; ++ struct device_node *np = dev->of_node; + -+/* Default setting */ -+static u32 bank_bit_first = 12; -+static u32 bank_bit_mask = 0x7; ++ if (!rockchip_get_system_status_rate(np, "system-status-freq", dmcfreq)) ++ dmcfreq->system_status_en = true; ++ else if (!rockchip_get_system_status_level(np, "system-status-level", dmcfreq)) ++ dmcfreq->system_status_en = true; + -+struct system_heap_buffer { -+ struct dma_heap *heap; -+ struct list_head attachments; -+ struct mutex lock; -+ unsigned long len; -+ struct sg_table sg_table; -+ int vmap_cnt; -+ void *vaddr; -+ struct deferred_freelist_item deferred_free; -+ struct dmabuf_page_pool **pools; -+ bool uncached; -+}; ++ of_property_read_u32(np, "min-cpu-freq", &dmcfreq->min_cpu_freq); + -+struct dma_heap_attachment { -+ struct device *dev; -+ struct sg_table *table; -+ struct list_head list; -+ bool mapped; ++ of_property_read_u32(np, "upthreshold", ++ &dmcfreq->ondemand_data.upthreshold); ++ of_property_read_u32(np, "downdifferential", ++ &dmcfreq->ondemand_data.downdifferential); ++ if (dmcfreq->info.auto_freq_en) ++ of_property_read_u32(np, "auto-freq-en", ++ &dmcfreq->info.auto_freq_en); ++ if (!dmcfreq->auto_min_rate) { ++ of_property_read_u32(np, "auto-min-freq", ++ (u32 *)&dmcfreq->auto_min_rate); ++ dmcfreq->auto_min_rate *= 1000; ++ } + -+ bool uncached; -+}; ++ if (rockchip_get_freq_map_talbe(np, "cpu-bw-dmc-freq", ++ &dmcfreq->cpu_bw_tbl)) ++ dev_dbg(dev, "failed to get cpu bandwidth to dmc rate\n"); ++ if (rockchip_get_freq_map_talbe(np, "vop-frame-bw-dmc-freq", ++ &dmcfreq->info.vop_frame_bw_tbl)) ++ dev_dbg(dev, "failed to get vop frame bandwidth to dmc rate\n"); ++ if (rockchip_get_freq_map_talbe(np, "vop-bw-dmc-freq", ++ &dmcfreq->info.vop_bw_tbl)) ++ dev_err(dev, "failed to get vop bandwidth to dmc rate\n"); ++ if (rockchip_get_rl_map_talbe(np, "vop-pn-msch-readlatency", ++ &dmcfreq->info.vop_pn_rl_tbl)) ++ dev_err(dev, "failed to get vop pn to msch rl\n"); ++ if (dmcfreq->video_4k_rate) ++ dmcfreq->info.vop_4k_rate = dmcfreq->video_4k_rate; ++ else if (dmcfreq->video_4k_10b_rate) ++ dmcfreq->info.vop_4k_rate = dmcfreq->video_4k_10b_rate; + -+#define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO) -+#define HIGH_ORDER_GFP (((GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN \ -+ | __GFP_NORETRY) & ~__GFP_RECLAIM) \ -+ | __GFP_COMP) -+static gfp_t order_flags[] = {HIGH_ORDER_GFP, HIGH_ORDER_GFP, LOW_ORDER_GFP}; -+/* -+ * The selection of the orders used for allocation (1MB, 64K, 4K) is designed -+ * to match with the sizes often found in IOMMUs. Using order 4 pages instead -+ * of order 0 pages can significantly improve the performance of many IOMMUs -+ * by reducing TLB pressure and time spent updating page tables. -+ */ -+static unsigned int orders[] = {8, 4, 0}; -+#define NUM_ORDERS ARRAY_SIZE(orders) -+struct dmabuf_page_pool *pools[NUM_ORDERS]; -+struct dmabuf_page_pool *dma32_pools[NUM_ORDERS]; ++ of_property_read_u32(np, "touchboost_duration", ++ (u32 *)&dmcfreq->touchboostpulse_duration_val); ++ if (dmcfreq->touchboostpulse_duration_val) ++ dmcfreq->touchboostpulse_duration_val *= USEC_PER_MSEC; ++ else ++ dmcfreq->touchboostpulse_duration_val = 500 * USEC_PER_MSEC; ++} + -+static struct sg_table *dup_sg_table(struct sg_table *table) ++static int rockchip_dmcfreq_add_devfreq(struct rockchip_dmcfreq *dmcfreq) +{ -+ struct sg_table *new_table; -+ int ret, i; -+ struct scatterlist *sg, *new_sg; -+ -+ new_table = kzalloc(sizeof(*new_table), GFP_KERNEL); -+ if (!new_table) -+ return ERR_PTR(-ENOMEM); ++ struct devfreq_dev_profile *devp = &rockchip_devfreq_dmc_profile; ++ struct device *dev = dmcfreq->dev; ++ struct dev_pm_opp *opp; ++ struct devfreq *devfreq; ++ unsigned long opp_rate; + -+ ret = sg_alloc_table(new_table, table->orig_nents, GFP_KERNEL); -+ if (ret) { -+ kfree(new_table); -+ return ERR_PTR(-ENOMEM); ++ dmcfreq->rate = clk_get_rate(dmcfreq->opp_info.clk); ++ opp_rate = dmcfreq->rate; ++ opp = devfreq_recommended_opp(dev, &opp_rate, 0); ++ if (IS_ERR(opp)) { ++ dev_err(dev, "Failed to find opp for %lu Hz\n", opp_rate); ++ return PTR_ERR(opp); + } ++ dev_pm_opp_put(opp); + -+ new_sg = new_table->sgl; -+ for_each_sgtable_sg(table, sg, i) { -+ sg_set_page(new_sg, sg_page(sg), sg->length, sg->offset); -+ new_sg = sg_next(new_sg); ++ devp->initial_freq = dmcfreq->rate; ++ devfreq = devm_devfreq_add_device(dev, devp, "dmc_ondemand", ++ &dmcfreq->ondemand_data); ++ if (IS_ERR(devfreq)) { ++ dev_err(dev, "failed to add devfreq\n"); ++ return PTR_ERR(devfreq); + } + -+ return new_table; -+} -+ -+static int system_heap_attach(struct dma_buf *dmabuf, -+ struct dma_buf_attachment *attachment) -+{ -+ struct system_heap_buffer *buffer = dmabuf->priv; -+ struct dma_heap_attachment *a; -+ struct sg_table *table; -+ -+ a = kzalloc(sizeof(*a), GFP_KERNEL); -+ if (!a) -+ return -ENOMEM; ++ devm_devfreq_register_opp_notifier(dev, devfreq); + -+ table = dup_sg_table(&buffer->sg_table); -+ if (IS_ERR(table)) { -+ kfree(a); -+ return -ENOMEM; -+ } ++ devfreq->last_status.current_frequency = opp_rate; ++ devfreq->suspend_freq = dmcfreq->deep_suspend_rate; + -+ a->table = table; -+ a->dev = attachment->dev; -+ INIT_LIST_HEAD(&a->list); -+ a->mapped = false; -+ a->uncached = buffer->uncached; -+ attachment->priv = a; ++ reset_last_status(devfreq); + -+ mutex_lock(&buffer->lock); -+ list_add(&a->list, &buffer->attachments); -+ mutex_unlock(&buffer->lock); ++ dmcfreq->info.devfreq = devfreq; + + return 0; +} + -+static void system_heap_detach(struct dma_buf *dmabuf, -+ struct dma_buf_attachment *attachment) ++static void rockchip_dmcfreq_register_notifier(struct rockchip_dmcfreq *dmcfreq) +{ -+ struct system_heap_buffer *buffer = dmabuf->priv; -+ struct dma_heap_attachment *a = attachment->priv; ++ int ret; + -+ mutex_lock(&buffer->lock); -+ list_del(&a->list); -+ mutex_unlock(&buffer->lock); ++ if (dmcfreq->system_status_en || dmcfreq->info.auto_freq_en) { ++ if (vop_register_dmc()) ++ dev_err(dmcfreq->dev, "fail to register notify to vop.\n"); + -+ sg_free_table(a->table); -+ kfree(a->table); -+ kfree(a); ++ dmcfreq->status_nb.notifier_call = ++ rockchip_dmcfreq_system_status_notifier; ++ ret = rockchip_register_system_status_notifier(&dmcfreq->status_nb); ++ if (ret) ++ dev_err(dmcfreq->dev, "failed to register system_status nb\n"); ++ } ++ ++ dmcfreq->panic_nb.notifier_call = rockchip_dmcfreq_panic_notifier; ++ ret = atomic_notifier_chain_register(&panic_notifier_list, ++ &dmcfreq->panic_nb); ++ if (ret) ++ dev_err(dmcfreq->dev, "failed to register panic nb\n"); ++ ++ dmc_mdevp.data = dmcfreq->info.devfreq; ++ dmc_mdevp.opp_info = &dmcfreq->opp_info; ++ dmcfreq->mdev_info = rockchip_system_monitor_register(dmcfreq->dev, ++ &dmc_mdevp); ++ if (IS_ERR(dmcfreq->mdev_info)) { ++ dev_dbg(dmcfreq->dev, "without without system monitor\n"); ++ dmcfreq->mdev_info = NULL; ++ } ++ dmcfreq->opp_info.is_rate_volt_checked = true; +} + -+static struct sg_table *system_heap_map_dma_buf(struct dma_buf_attachment *attachment, -+ enum dma_data_direction direction) ++static void rockchip_dmcfreq_add_interface(struct rockchip_dmcfreq *dmcfreq) +{ -+ struct dma_heap_attachment *a = attachment->priv; -+ struct sg_table *table = a->table; -+ int attr = attachment->dma_map_attrs; -+ int ret; -+ -+ if (a->uncached) -+ attr |= DMA_ATTR_SKIP_CPU_SYNC; ++ struct devfreq *devfreq = dmcfreq->info.devfreq; + -+ ret = dma_map_sgtable(attachment->dev, table, direction, attr); -+ if (ret) -+ return ERR_PTR(ret); ++ if (sysfs_create_file(&devfreq->dev.kobj, &dev_attr_upthreshold.attr)) ++ dev_err(dmcfreq->dev, ++ "failed to register upthreshold sysfs file\n"); ++ if (sysfs_create_file(&devfreq->dev.kobj, ++ &dev_attr_downdifferential.attr)) ++ dev_err(dmcfreq->dev, ++ "failed to register downdifferential sysfs file\n"); + -+ a->mapped = true; -+ return table; ++ if (!rockchip_add_system_status_interface(&devfreq->dev)) ++ return; ++ if (sysfs_create_file(&devfreq->dev.kobj, ++ &dev_attr_system_status.attr)) ++ dev_err(dmcfreq->dev, ++ "failed to register system_status sysfs file\n"); +} + -+static void system_heap_unmap_dma_buf(struct dma_buf_attachment *attachment, -+ struct sg_table *table, -+ enum dma_data_direction direction) ++static void rockchip_dmcfreq_boost_work(struct work_struct *work) +{ -+ struct dma_heap_attachment *a = attachment->priv; -+ int attr = attachment->dma_map_attrs; ++ struct rockchip_dmcfreq *dmcfreq = boost_to_dmcfreq(work); + -+ if (a->uncached) -+ attr |= DMA_ATTR_SKIP_CPU_SYNC; -+ a->mapped = false; -+ dma_unmap_sgtable(attachment->dev, table, direction, attr); ++ rockchip_dmcfreq_update_target(dmcfreq); +} + -+static int system_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, -+ enum dma_data_direction direction) ++static void rockchip_dmcfreq_input_event(struct input_handle *handle, ++ unsigned int type, ++ unsigned int code, ++ int value) +{ -+ struct system_heap_buffer *buffer = dmabuf->priv; -+ struct dma_heap_attachment *a; -+ -+ mutex_lock(&buffer->lock); ++ struct rockchip_dmcfreq *dmcfreq = handle->private; ++ u64 now, endtime; + -+ if (buffer->vmap_cnt) -+ invalidate_kernel_vmap_range(buffer->vaddr, buffer->len); ++ if (type != EV_ABS && type != EV_KEY) ++ return; + -+ if (!buffer->uncached) { -+ list_for_each_entry(a, &buffer->attachments, list) { -+ if (!a->mapped) -+ continue; -+ dma_sync_sgtable_for_cpu(a->dev, a->table, direction); -+ } -+ } -+ mutex_unlock(&buffer->lock); ++ now = ktime_to_us(ktime_get()); ++ endtime = now + dmcfreq->touchboostpulse_duration_val; ++ if (endtime < (dmcfreq->touchboostpulse_endtime + 10 * USEC_PER_MSEC)) ++ return; ++ dmcfreq->touchboostpulse_endtime = endtime; + -+ return 0; ++ queue_work(system_freezable_wq, &dmcfreq->boost_work); +} + -+static int system_heap_dma_buf_end_cpu_access(struct dma_buf *dmabuf, -+ enum dma_data_direction direction) ++static int rockchip_dmcfreq_input_connect(struct input_handler *handler, ++ struct input_dev *dev, ++ const struct input_device_id *id) +{ -+ struct system_heap_buffer *buffer = dmabuf->priv; -+ struct dma_heap_attachment *a; ++ int error; ++ struct input_handle *handle; ++ struct rockchip_dmcfreq *dmcfreq = input_hd_to_dmcfreq(handler); + -+ mutex_lock(&buffer->lock); ++ handle = kzalloc(sizeof(*handle), GFP_KERNEL); ++ if (!handle) ++ return -ENOMEM; + -+ if (buffer->vmap_cnt) -+ flush_kernel_vmap_range(buffer->vaddr, buffer->len); ++ handle->dev = dev; ++ handle->handler = handler; ++ handle->name = "dmcfreq"; ++ handle->private = dmcfreq; + -+ if (!buffer->uncached) { -+ list_for_each_entry(a, &buffer->attachments, list) { -+ if (!a->mapped) -+ continue; -+ dma_sync_sgtable_for_device(a->dev, a->table, direction); -+ } -+ } -+ mutex_unlock(&buffer->lock); ++ error = input_register_handle(handle); ++ if (error) ++ goto err2; ++ ++ error = input_open_device(handle); ++ if (error) ++ goto err1; + + return 0; ++err1: ++ input_unregister_handle(handle); ++err2: ++ kfree(handle); ++ return error; +} + -+static int system_heap_sgl_sync_range(struct device *dev, -+ struct sg_table *sgt, -+ unsigned int offset, -+ unsigned int length, -+ enum dma_data_direction dir, -+ bool for_cpu) ++static void rockchip_dmcfreq_input_disconnect(struct input_handle *handle) +{ -+ struct scatterlist *sg; -+ unsigned int len = 0; -+ dma_addr_t sg_dma_addr; -+ int i; ++ input_close_device(handle); ++ input_unregister_handle(handle); ++ kfree(handle); ++} + -+ for_each_sgtable_sg(sgt, sg, i) { -+ unsigned int sg_offset, sg_left, size = 0; ++static const struct input_device_id rockchip_dmcfreq_input_ids[] = { ++ { ++ .flags = INPUT_DEVICE_ID_MATCH_EVBIT | ++ INPUT_DEVICE_ID_MATCH_ABSBIT, ++ .evbit = { BIT_MASK(EV_ABS) }, ++ .absbit = { [BIT_WORD(ABS_MT_POSITION_X)] = ++ BIT_MASK(ABS_MT_POSITION_X) | ++ BIT_MASK(ABS_MT_POSITION_Y) }, ++ }, ++ { ++ .flags = INPUT_DEVICE_ID_MATCH_KEYBIT | ++ INPUT_DEVICE_ID_MATCH_ABSBIT, ++ .keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) }, ++ .absbit = { [BIT_WORD(ABS_X)] = ++ BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) }, ++ }, ++ { ++ .flags = INPUT_DEVICE_ID_MATCH_EVBIT, ++ .evbit = { BIT_MASK(EV_KEY) }, ++ }, ++ { }, ++}; + -+ sg_dma_addr = sg_phys(sg); ++static void rockchip_dmcfreq_boost_init(struct rockchip_dmcfreq *dmcfreq) ++{ ++ if (!dmcfreq->boost_rate) ++ return; ++ INIT_WORK(&dmcfreq->boost_work, rockchip_dmcfreq_boost_work); ++ dmcfreq->input_handler.event = rockchip_dmcfreq_input_event; ++ dmcfreq->input_handler.connect = rockchip_dmcfreq_input_connect; ++ dmcfreq->input_handler.disconnect = rockchip_dmcfreq_input_disconnect; ++ dmcfreq->input_handler.name = "dmcfreq"; ++ dmcfreq->input_handler.id_table = rockchip_dmcfreq_input_ids; ++ if (input_register_handler(&dmcfreq->input_handler)) ++ dev_err(dmcfreq->dev, "failed to register input handler\n"); ++} + -+ len += sg->length; -+ if (len <= offset) -+ continue; ++static int rockchip_dmcfreq_probe(struct platform_device *pdev) ++{ ++ struct device *dev = &pdev->dev; ++ struct rockchip_dmcfreq *data; ++ int ret; + -+ sg_left = len - offset; -+ sg_offset = sg->length - sg_left; ++ data = devm_kzalloc(dev, sizeof(struct rockchip_dmcfreq), GFP_KERNEL); ++ if (!data) ++ return -ENOMEM; + -+ size = (length < sg_left) ? length : sg_left; -+ if (for_cpu) -+ dma_sync_single_range_for_cpu(dev, sg_dma_addr, -+ sg_offset, size, dir); -+ else -+ dma_sync_single_range_for_device(dev, sg_dma_addr, -+ sg_offset, size, dir); ++ data->dev = dev; ++ data->dev->init_name = "dmc"; ++ data->info.dev = dev; ++ mutex_init(&data->lock); ++ INIT_LIST_HEAD(&data->video_info_list); + -+ offset += size; -+ length -= size; ++ ret = rockchip_dmcfreq_get_event(data); ++ if (ret) ++ return ret; + -+ if (length == 0) -+ break; -+ } ++ ret = rockchip_init_opp_table(dev, &data->opp_info, "dmc_clk", "center"); ++ if (ret) ++ return ret; + -+ return 0; -+} ++ ret = rockchip_dmcfreq_dmc_init(pdev, data); ++ if (ret) ++ return ret; + -+static int __maybe_unused -+system_heap_dma_buf_begin_cpu_access_partial(struct dma_buf *dmabuf, -+ enum dma_data_direction direction, -+ unsigned int offset, -+ unsigned int len) -+{ -+ struct system_heap_buffer *buffer = dmabuf->priv; -+ struct dma_heap *heap = buffer->heap; -+ struct sg_table *table = &buffer->sg_table; -+ int ret; ++ rockchip_dmcfreq_parse_dt(data); + -+ if (direction == DMA_TO_DEVICE) ++ platform_set_drvdata(pdev, data); ++ ++ if (!data->system_status_en && !data->info.auto_freq_en) { ++ dev_info(dev, "don't add devfreq feature\n"); ++ rockchip_dmcfreq_register_notifier(data); + return 0; ++ } + -+ mutex_lock(&buffer->lock); -+ if (buffer->vmap_cnt) -+ invalidate_kernel_vmap_range(buffer->vaddr, buffer->len); ++ cpu_latency_qos_add_request(&pm_qos, PM_QOS_DEFAULT_VALUE); + -+ if (buffer->uncached) { -+ mutex_unlock(&buffer->lock); -+ return 0; ++ ret = devfreq_add_governor(&devfreq_dmc_ondemand); ++ if (ret) ++ return ret; ++ ret = rockchip_dmcfreq_enable_event(data); ++ if (ret) ++ return ret; ++ ret = rockchip_dmcfreq_add_devfreq(data); ++ if (ret) { ++ rockchip_dmcfreq_disable_event(data); ++ return ret; + } + -+ ret = system_heap_sgl_sync_range(dma_heap_get_dev(heap), table, -+ offset, len, direction, true); -+ mutex_unlock(&buffer->lock); ++ rockchip_dmcfreq_register_notifier(data); ++ rockchip_dmcfreq_add_interface(data); ++ rockchip_dmcfreq_boost_init(data); ++ rockchip_dmcfreq_vop_bandwidth_init(&data->info); + -+ return ret; ++ rockchip_set_system_status(SYS_STATUS_NORMAL); ++ ++ return 0; +} + -+static int __maybe_unused -+system_heap_dma_buf_end_cpu_access_partial(struct dma_buf *dmabuf, -+ enum dma_data_direction direction, -+ unsigned int offset, -+ unsigned int len) ++static __maybe_unused int rockchip_dmcfreq_suspend(struct device *dev) +{ -+ struct system_heap_buffer *buffer = dmabuf->priv; -+ struct dma_heap *heap = buffer->heap; -+ struct sg_table *table = &buffer->sg_table; -+ int ret; -+ -+ mutex_lock(&buffer->lock); -+ if (buffer->vmap_cnt) -+ flush_kernel_vmap_range(buffer->vaddr, buffer->len); ++ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev); ++ struct rockchip_opp_info *opp_info; ++ struct regulator *vdd_reg = NULL; ++ struct regulator *mem_reg = NULL; ++ int ret = 0; + -+ if (buffer->uncached) { -+ mutex_unlock(&buffer->lock); ++ if (!dmcfreq) + return 0; -+ } -+ -+ ret = system_heap_sgl_sync_range(dma_heap_get_dev(heap), table, -+ offset, len, direction, false); -+ mutex_unlock(&buffer->lock); + -+ return ret; -+} -+ -+static int system_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) -+{ -+ struct system_heap_buffer *buffer = dmabuf->priv; -+ struct sg_table *table = &buffer->sg_table; -+ unsigned long addr = vma->vm_start; -+ struct sg_page_iter piter; -+ int ret; ++ ret = rockchip_dmcfreq_disable_event(dmcfreq); ++ if (ret) ++ return ret; + -+ if (buffer->uncached) -+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); ++ if (dmcfreq->info.devfreq) { ++ ret = devfreq_suspend_device(dmcfreq->info.devfreq); ++ if (ret < 0) { ++ dev_err(dev, "failed to suspend the devfreq devices\n"); ++ return ret; ++ } ++ } + -+ for_each_sgtable_page(table, &piter, vma->vm_pgoff) { -+ struct page *page = sg_page_iter_page(&piter); ++ opp_info = &dmcfreq->opp_info; ++ if (!opp_info->regulators) ++ return 0; ++ vdd_reg = opp_info->regulators[0]; ++ if (opp_info->regulator_count > 1) ++ mem_reg = opp_info->regulators[1]; + -+ ret = remap_pfn_range(vma, addr, page_to_pfn(page), PAGE_SIZE, -+ vma->vm_page_prot); -+ if (ret) ++ /* set voltage to sleep_volt if need */ ++ if (vdd_reg && dmcfreq->sleep_volt && ++ dmcfreq->sleep_volt != dmcfreq->volt) { ++ ret = regulator_set_voltage(vdd_reg, dmcfreq->sleep_volt, INT_MAX); ++ if (ret) { ++ dev_err(dev, "Cannot set vdd voltage %lu uV\n", ++ dmcfreq->sleep_volt); + return ret; -+ addr += PAGE_SIZE; -+ if (addr >= vma->vm_end) -+ return 0; ++ } ++ } ++ if (mem_reg && dmcfreq->sleep_mem_volt && ++ dmcfreq->sleep_mem_volt != dmcfreq->mem_volt) { ++ ret = regulator_set_voltage(mem_reg, dmcfreq->sleep_mem_volt, INT_MAX); ++ if (ret) { ++ dev_err(dev, "Cannot set mem voltage %lu uV\n", ++ dmcfreq->sleep_mem_volt); ++ return ret; ++ } + } ++ + return 0; +} + -+static void *system_heap_do_vmap(struct system_heap_buffer *buffer) ++static __maybe_unused int rockchip_dmcfreq_resume(struct device *dev) +{ -+ struct sg_table *table = &buffer->sg_table; -+ int npages = PAGE_ALIGN(buffer->len) / PAGE_SIZE; -+ struct page **pages = vmalloc(sizeof(struct page *) * npages); -+ struct page **tmp = pages; -+ struct sg_page_iter piter; -+ pgprot_t pgprot = PAGE_KERNEL; -+ void *vaddr; ++ struct rockchip_dmcfreq *dmcfreq = dev_get_drvdata(dev); ++ struct rockchip_opp_info *opp_info; ++ struct regulator *vdd_reg = NULL; ++ struct regulator *mem_reg = NULL; ++ int ret = 0; + -+ if (!pages) -+ return ERR_PTR(-ENOMEM); ++ if (!dmcfreq) ++ return 0; + -+ if (buffer->uncached) -+ pgprot = pgprot_writecombine(PAGE_KERNEL); ++ opp_info = &dmcfreq->opp_info; ++ if (opp_info->regulators) { ++ vdd_reg = opp_info->regulators[0]; ++ if (opp_info->regulator_count > 1) ++ mem_reg = opp_info->regulators[1]; + -+ for_each_sgtable_page(table, &piter, 0) { -+ WARN_ON(tmp - pages >= npages); -+ *tmp++ = sg_page_iter_page(&piter); ++ /* restore voltage if it is sleep_volt */ ++ if (mem_reg && dmcfreq->sleep_volt && ++ dmcfreq->sleep_volt != dmcfreq->volt) { ++ ret = regulator_set_voltage(vdd_reg, dmcfreq->volt, ++ INT_MAX); ++ if (ret) { ++ dev_err(dev, "Cannot set vdd voltage %lu uV\n", ++ dmcfreq->volt); ++ return ret; ++ } ++ } ++ if (vdd_reg && dmcfreq->sleep_mem_volt && ++ dmcfreq->sleep_mem_volt != dmcfreq->mem_volt) { ++ ret = regulator_set_voltage(mem_reg, dmcfreq->mem_volt, ++ INT_MAX); ++ if (ret) { ++ dev_err(dev, "Cannot set mem voltage %lu uV\n", ++ dmcfreq->mem_volt); ++ return ret; ++ } ++ } + } + -+ vaddr = vmap(pages, npages, VM_MAP, pgprot); -+ vfree(pages); ++ ret = rockchip_dmcfreq_enable_event(dmcfreq); ++ if (ret) ++ return ret; + -+ if (!vaddr) -+ return ERR_PTR(-ENOMEM); ++ if (dmcfreq->info.devfreq) { ++ ret = devfreq_resume_device(dmcfreq->info.devfreq); ++ if (ret < 0) { ++ dev_err(dev, "failed to resume the devfreq devices\n"); ++ return ret; ++ } ++ } + -+ return vaddr; ++ return ret; +} + -+static void *system_heap_vmap(struct dma_buf *dmabuf) -+{ -+ struct system_heap_buffer *buffer = dmabuf->priv; -+ void *vaddr; ++static SIMPLE_DEV_PM_OPS(rockchip_dmcfreq_pm, rockchip_dmcfreq_suspend, ++ rockchip_dmcfreq_resume); ++static struct platform_driver rockchip_dmcfreq_driver = { ++ .probe = rockchip_dmcfreq_probe, ++ .driver = { ++ .name = "rockchip-dmc", ++ .pm = &rockchip_dmcfreq_pm, ++ .of_match_table = rockchip_dmcfreq_of_match, ++ }, ++}; ++module_platform_driver(rockchip_dmcfreq_driver); + -+ mutex_lock(&buffer->lock); -+ if (buffer->vmap_cnt) { -+ buffer->vmap_cnt++; -+ vaddr = buffer->vaddr; -+ goto out; -+ } ++MODULE_AUTHOR("Finley Xiao "); ++MODULE_DESCRIPTION("rockchip dmcfreq driver with devfreq framework"); ++MODULE_LICENSE("GPL v2"); +diff --git a/drivers/devfreq/rockchip_dmc_common.c b/drivers/devfreq/rockchip_dmc_common.c +new file mode 100644 +index 000000000..7765e7174 +--- /dev/null ++++ b/drivers/devfreq/rockchip_dmc_common.c +@@ -0,0 +1,180 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Rockchip dmc common functions. ++ * ++ * Copyright (c) 2021 Rockchip Electronics Co. Ltd. ++ * Author: Finley Xiao ++ */ + -+ vaddr = system_heap_do_vmap(buffer); -+ if (IS_ERR(vaddr)) -+ goto out; ++#include ++#include + -+ buffer->vaddr = vaddr; -+ buffer->vmap_cnt++; -+out: -+ mutex_unlock(&buffer->lock); ++#define msch_rl_to_dmcfreq(work) container_of(to_delayed_work(work), \ ++ struct rockchip_dmcfreq, \ ++ msch_rl_work) ++#define MSCH_RL_DELAY_TIME 50 /* ms */ + -+ return vaddr; -+} ++static struct dmcfreq_common_info *common_info; ++static DECLARE_RWSEM(rockchip_dmcfreq_sem); + -+static void system_heap_vunmap(struct dma_buf *dmabuf, void *vaddr) ++void rockchip_dmcfreq_lock(void) +{ -+ struct system_heap_buffer *buffer = dmabuf->priv; -+ -+ mutex_lock(&buffer->lock); -+ if (!--buffer->vmap_cnt) { -+ vunmap(buffer->vaddr); -+ buffer->vaddr = NULL; -+ } -+ mutex_unlock(&buffer->lock); ++ down_read(&rockchip_dmcfreq_sem); +} ++EXPORT_SYMBOL(rockchip_dmcfreq_lock); + -+static int system_heap_zero_buffer(struct system_heap_buffer *buffer) ++void rockchip_dmcfreq_lock_nested(void) +{ -+ struct sg_table *sgt = &buffer->sg_table; -+ struct sg_page_iter piter; -+ struct page *p; -+ void *vaddr; -+ int ret = 0; -+ -+ for_each_sgtable_page(sgt, &piter, 0) { -+ p = sg_page_iter_page(&piter); -+ vaddr = kmap_atomic(p); -+ memset(vaddr, 0, PAGE_SIZE); -+ kunmap_atomic(vaddr); -+ } -+ -+ return ret; ++ down_read_nested(&rockchip_dmcfreq_sem, SINGLE_DEPTH_NESTING); +} ++EXPORT_SYMBOL(rockchip_dmcfreq_lock_nested); + -+static void system_heap_buf_free(struct deferred_freelist_item *item, -+ enum df_reason reason) ++void rockchip_dmcfreq_unlock(void) +{ -+ struct system_heap_buffer *buffer; -+ struct sg_table *table; -+ struct scatterlist *sg; -+ int i, j; -+ -+ buffer = container_of(item, struct system_heap_buffer, deferred_free); -+ /* Zero the buffer pages before adding back to the pool */ -+ if (reason == DF_NORMAL) -+ if (system_heap_zero_buffer(buffer)) -+ reason = DF_UNDER_PRESSURE; // On failure, just free -+ -+ table = &buffer->sg_table; -+ for_each_sgtable_sg(table, sg, i) { -+ struct page *page = sg_page(sg); -+ -+ if (reason == DF_UNDER_PRESSURE) { -+ __free_pages(page, compound_order(page)); -+ } else { -+ for (j = 0; j < NUM_ORDERS; j++) { -+ if (compound_order(page) == orders[j]) -+ break; -+ } -+ dmabuf_page_pool_free(buffer->pools[j], page); -+ } -+ } -+ sg_free_table(table); -+ kfree(buffer); ++ up_read(&rockchip_dmcfreq_sem); +} ++EXPORT_SYMBOL(rockchip_dmcfreq_unlock); + -+static void system_heap_dma_buf_release(struct dma_buf *dmabuf) ++int rockchip_dmcfreq_write_trylock(void) +{ -+ struct system_heap_buffer *buffer = dmabuf->priv; -+ int npages = PAGE_ALIGN(buffer->len) / PAGE_SIZE; ++ return down_write_trylock(&rockchip_dmcfreq_sem); ++} ++EXPORT_SYMBOL(rockchip_dmcfreq_write_trylock); + -+ deferred_free(&buffer->deferred_free, system_heap_buf_free, npages); ++void rockchip_dmcfreq_write_unlock(void) ++{ ++ up_write(&rockchip_dmcfreq_sem); +} ++EXPORT_SYMBOL(rockchip_dmcfreq_write_unlock); + -+static const struct dma_buf_ops system_heap_buf_ops = { -+ .attach = system_heap_attach, -+ .detach = system_heap_detach, -+ .map_dma_buf = system_heap_map_dma_buf, -+ .unmap_dma_buf = system_heap_unmap_dma_buf, -+ .begin_cpu_access = system_heap_dma_buf_begin_cpu_access, -+ .end_cpu_access = system_heap_dma_buf_end_cpu_access, -+#ifdef CONFIG_DMABUF_PARTIAL -+ .begin_cpu_access_partial = system_heap_dma_buf_begin_cpu_access_partial, -+ .end_cpu_access_partial = system_heap_dma_buf_end_cpu_access_partial, -+#endif -+ .mmap = system_heap_mmap, -+ .vmap = system_heap_vmap, -+ .vunmap = system_heap_vunmap, -+ .release = system_heap_dma_buf_release, -+}; ++static void set_msch_rl(unsigned int readlatency) + -+static struct page *system_heap_alloc_largest_available(struct dma_heap *heap, -+ struct dmabuf_page_pool **pool, -+ unsigned long size, -+ unsigned int max_order) +{ -+ struct page *page; -+ int i; -+ -+ for (i = 0; i < NUM_ORDERS; i++) { -+ if (size < (PAGE_SIZE << orders[i])) -+ continue; -+ if (max_order < orders[i]) -+ continue; -+ page = dmabuf_page_pool_alloc(pool[i]); -+ if (!page) -+ continue; -+ return page; -+ } -+ return NULL; ++ rockchip_dmcfreq_lock(); ++ dev_dbg(common_info->dev, "rl 0x%x -> 0x%x\n", ++ common_info->read_latency, readlatency); ++ if (!common_info->set_msch_readlatency(readlatency)) ++ common_info->read_latency = readlatency; ++ else ++ dev_err(common_info->dev, "failed to set msch rl\n"); ++ rockchip_dmcfreq_unlock(); +} + -+static struct dma_buf *system_heap_do_allocate(struct dma_heap *heap, -+ unsigned long len, -+ unsigned long fd_flags, -+ unsigned long heap_flags, -+ bool uncached) ++static void set_msch_rl_work(struct work_struct *work) +{ -+ struct system_heap_buffer *buffer; -+ DEFINE_DMA_BUF_EXPORT_INFO(exp_info); -+ unsigned long size_remaining = len; -+ unsigned int max_order = orders[0]; -+ struct dma_buf *dmabuf; -+ struct sg_table *table; -+ struct scatterlist *sg; -+ struct list_head pages; -+ struct page *page, *tmp_page; -+ int i, ret = -ENOMEM; -+ struct list_head lists[8]; -+ unsigned int block_index[8] = {0}; -+ unsigned int block_1M = 0; -+ unsigned int block_64K = 0; -+ unsigned int maximum; -+ int j; -+ -+ buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); -+ if (!buffer) -+ return ERR_PTR(-ENOMEM); -+ -+ INIT_LIST_HEAD(&buffer->attachments); -+ mutex_init(&buffer->lock); -+ buffer->heap = heap; -+ buffer->len = len; -+ buffer->uncached = uncached; -+ buffer->pools = strstr(dma_heap_get_name(heap), "dma32") ? dma32_pools : pools; ++ set_msch_rl(0); ++ common_info->is_msch_rl_work_started = false; ++} + -+ INIT_LIST_HEAD(&pages); -+ for (i = 0; i < 8; i++) -+ INIT_LIST_HEAD(&lists[i]); -+ i = 0; -+ while (size_remaining > 0) { -+ /* -+ * Avoid trying to allocate memory if the process -+ * has been killed by SIGKILL -+ */ -+ if (fatal_signal_pending(current)) -+ goto free_buffer; ++int rockchip_dmcfreq_vop_bandwidth_init(struct dmcfreq_common_info *info) ++{ ++ if (info->set_msch_readlatency) ++ INIT_DELAYED_WORK(&info->msch_rl_work, set_msch_rl_work); ++ common_info = info; + -+ page = system_heap_alloc_largest_available(heap, buffer->pools, -+ size_remaining, -+ max_order); -+ if (!page) -+ goto free_buffer; ++ return 0; ++} ++EXPORT_SYMBOL(rockchip_dmcfreq_vop_bandwidth_init); + -+ size_remaining -= page_size(page); -+ max_order = compound_order(page); -+ if (max_order) { -+ if (max_order == 8) -+ block_1M++; -+ if (max_order == 4) -+ block_64K++; -+ list_add_tail(&page->lru, &pages); -+ } else { -+ dma_addr_t phys = page_to_phys(page); -+ unsigned int bit_index = ((phys >> bank_bit_first) & bank_bit_mask) & 0x7; ++void rockchip_dmcfreq_vop_bandwidth_update(struct dmcfreq_vop_info *vop_info) ++{ ++ unsigned long vop_last_rate, target = 0; ++ unsigned int readlatency = 0; ++ int i; + -+ list_add_tail(&page->lru, &lists[bit_index]); -+ block_index[bit_index]++; -+ } -+ i++; -+ } ++ if (!common_info) ++ return; + -+ table = &buffer->sg_table; -+ if (sg_alloc_table(table, i, GFP_KERNEL)) -+ goto free_buffer; ++ dev_dbg(common_info->dev, "line bw=%u, frame bw=%u, pn=%u, pn_4k=%u\n", ++ vop_info->line_bw_mbyte, vop_info->frame_bw_mbyte, ++ vop_info->plane_num, vop_info->plane_num_4k); + -+ maximum = block_index[0]; -+ for (i = 1; i < 8; i++) -+ maximum = max(maximum, block_index[i]); -+ sg = table->sgl; -+ list_for_each_entry_safe(page, tmp_page, &pages, lru) { -+ sg_set_page(sg, page, page_size(page), 0); -+ sg = sg_next(sg); -+ list_del(&page->lru); ++ if (!common_info->vop_pn_rl_tbl || !common_info->set_msch_readlatency) ++ goto vop_bw_tbl; ++ for (i = 0; common_info->vop_pn_rl_tbl[i].rl != DMCFREQ_TABLE_END; i++) { ++ if (vop_info->plane_num >= common_info->vop_pn_rl_tbl[i].pn) ++ readlatency = common_info->vop_pn_rl_tbl[i].rl; + } -+ for (i = 0; i < maximum; i++) { -+ for (j = 0; j < 8; j++) { -+ if (!list_empty(&lists[j])) { -+ page = list_first_entry(&lists[j], struct page, lru); -+ sg_set_page(sg, page, PAGE_SIZE, 0); -+ sg = sg_next(sg); -+ list_del(&page->lru); -+ } -+ } ++ dev_dbg(common_info->dev, "pn=%u\n", vop_info->plane_num); ++ if (readlatency) { ++ cancel_delayed_work_sync(&common_info->msch_rl_work); ++ common_info->is_msch_rl_work_started = false; ++ if (common_info->read_latency != readlatency) ++ set_msch_rl(readlatency); ++ } else if (common_info->read_latency && ++ !common_info->is_msch_rl_work_started) { ++ common_info->is_msch_rl_work_started = true; ++ schedule_delayed_work(&common_info->msch_rl_work, ++ msecs_to_jiffies(MSCH_RL_DELAY_TIME)); + } + -+ /* create the dmabuf */ -+ exp_info.exp_name = dma_heap_get_name(heap); -+ exp_info.ops = &system_heap_buf_ops; -+ exp_info.size = buffer->len; -+ exp_info.flags = fd_flags; -+ exp_info.priv = buffer; -+ dmabuf = dma_buf_export(&exp_info); -+ if (IS_ERR(dmabuf)) { -+ ret = PTR_ERR(dmabuf); -+ goto free_pages; ++vop_bw_tbl: ++ if (!common_info->auto_freq_en || !common_info->vop_bw_tbl) ++ goto vop_frame_bw_tbl; ++ ++ for (i = 0; common_info->vop_bw_tbl[i].freq != DMCFREQ_TABLE_END; i++) { ++ if (vop_info->line_bw_mbyte >= common_info->vop_bw_tbl[i].min) ++ target = common_info->vop_bw_tbl[i].freq; + } + -+ /* -+ * For uncached buffers, we need to initially flush cpu cache, since -+ * the __GFP_ZERO on the allocation means the zeroing was done by the -+ * cpu and thus it is likely cached. Map (and implicitly flush) and -+ * unmap it now so we don't get corruption later on. -+ */ -+ if (buffer->uncached) { -+ dma_map_sgtable(dma_heap_get_dev(heap), table, DMA_BIDIRECTIONAL, 0); -+ dma_unmap_sgtable(dma_heap_get_dev(heap), table, DMA_BIDIRECTIONAL, 0); ++vop_frame_bw_tbl: ++ if (!common_info->auto_freq_en || !common_info->vop_frame_bw_tbl) ++ goto next; ++ for (i = 0; common_info->vop_frame_bw_tbl[i].freq != DMCFREQ_TABLE_END; ++ i++) { ++ if (vop_info->frame_bw_mbyte >= common_info->vop_frame_bw_tbl[i].min) { ++ if (target < common_info->vop_frame_bw_tbl[i].freq) ++ target = common_info->vop_frame_bw_tbl[i].freq; ++ } + } + -+ return dmabuf; ++next: ++ if (vop_info->plane_num_4k && target < common_info->vop_4k_rate) ++ target = common_info->vop_4k_rate; + -+free_pages: -+ for_each_sgtable_sg(table, sg, i) { -+ struct page *p = sg_page(sg); ++ vop_last_rate = common_info->vop_req_rate; ++ common_info->vop_req_rate = target; + -+ __free_pages(p, compound_order(p)); -+ } -+ sg_free_table(table); -+free_buffer: -+ list_for_each_entry_safe(page, tmp_page, &pages, lru) -+ __free_pages(page, compound_order(page)); -+ for (i = 0; i < 8; i++) { -+ list_for_each_entry_safe(page, tmp_page, &lists[i], lru) -+ __free_pages(page, compound_order(page)); ++ if (target > vop_last_rate) { ++ mutex_lock(&common_info->devfreq->lock); ++ update_devfreq(common_info->devfreq); ++ mutex_unlock(&common_info->devfreq->lock); + } -+ kfree(buffer); -+ -+ return ERR_PTR(ret); -+} -+ -+static struct dma_buf *system_heap_allocate(struct dma_heap *heap, -+ unsigned long len, -+ unsigned long fd_flags, -+ unsigned long heap_flags) -+{ -+ return system_heap_do_allocate(heap, len, fd_flags, heap_flags, false); +} ++EXPORT_SYMBOL(rockchip_dmcfreq_vop_bandwidth_update); + -+static long system_get_pool_size(struct dma_heap *heap) ++int rockchip_dmcfreq_vop_bandwidth_request(struct dmcfreq_vop_info *vop_info) +{ ++ unsigned long target = 0; + int i; -+ long num_pages = 0; -+ struct dmabuf_page_pool **pool; + -+ pool = strstr(dma_heap_get_name(heap), "dma32") ? dma32_pools : pools; -+ for (i = 0; i < NUM_ORDERS; i++, pool++) { -+ num_pages += ((*pool)->count[POOL_LOWPAGE] + -+ (*pool)->count[POOL_HIGHPAGE]) << (*pool)->order; ++ if (!common_info || !common_info->auto_freq_en || ++ !common_info->vop_bw_tbl) ++ return 0; ++ ++ for (i = 0; common_info->vop_bw_tbl[i].freq != DMCFREQ_TABLE_END; i++) { ++ if (vop_info->line_bw_mbyte <= common_info->vop_bw_tbl[i].max) { ++ target = common_info->vop_bw_tbl[i].freq; ++ break; ++ } + } + -+ return num_pages << PAGE_SHIFT; -+} ++ if (!target) ++ return -EINVAL; + -+static const struct dma_heap_ops system_heap_ops = { -+ .allocate = system_heap_allocate, -+ .get_pool_size = system_get_pool_size, -+}; ++ return 0; ++} ++EXPORT_SYMBOL(rockchip_dmcfreq_vop_bandwidth_request); + -+static struct dma_buf *system_uncached_heap_allocate(struct dma_heap *heap, -+ unsigned long len, -+ unsigned long fd_flags, -+ unsigned long heap_flags) ++unsigned int rockchip_dmcfreq_get_stall_time_ns(void) +{ -+ return system_heap_do_allocate(heap, len, fd_flags, heap_flags, true); -+} ++ if (!common_info) ++ return 0; + -+/* Dummy function to be used until we can call coerce_mask_and_coherent */ -+static struct dma_buf *system_uncached_heap_not_initialized(struct dma_heap *heap, -+ unsigned long len, -+ unsigned long fd_flags, -+ unsigned long heap_flags) -+{ -+ return ERR_PTR(-EBUSY); -+} -+ -+static struct dma_heap_ops system_uncached_heap_ops = { -+ /* After system_heap_create is complete, we will swap this */ -+ .allocate = system_uncached_heap_not_initialized, -+}; -+ -+static int set_heap_dev_dma(struct device *heap_dev) -+{ -+ int err = 0; -+ -+ if (!heap_dev) -+ return -EINVAL; -+ -+ dma_coerce_mask_and_coherent(heap_dev, DMA_BIT_MASK(64)); -+ -+ if (!heap_dev->dma_parms) { -+ heap_dev->dma_parms = devm_kzalloc(heap_dev, -+ sizeof(*heap_dev->dma_parms), -+ GFP_KERNEL); -+ if (!heap_dev->dma_parms) -+ return -ENOMEM; -+ -+ err = dma_set_max_seg_size(heap_dev, (unsigned int)DMA_BIT_MASK(64)); -+ if (err) { -+ devm_kfree(heap_dev, heap_dev->dma_parms); -+ dev_err(heap_dev, "Failed to set DMA segment size, err:%d\n", err); -+ return err; -+ } -+ } -+ -+ return 0; ++ return common_info->stall_time_ns; +} ++EXPORT_SYMBOL(rockchip_dmcfreq_get_stall_time_ns); + -+static int system_heap_create(void) -+{ -+ struct dma_heap_export_info exp_info; -+ int i, err = 0; -+ struct dram_addrmap_info *ddr_map_info; -+ -+ /* -+ * Since swiotlb has memory size limitation, this will calculate -+ * the maximum size locally. -+ * -+ * Once swiotlb_max_segment() return not '0', means that the totalram size -+ * is larger than 4GiB and swiotlb is not force mode, in this case, system -+ * heap should limit largest allocation. -+ * -+ * FIX: fix the orders[] as a workaround. -+ */ -+ if (swiotlb_max_segment()) { -+ unsigned int max_size = (1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE; -+ int max_order = MAX_ORDER; -+ int i; -+ -+ max_size = max_t(unsigned int, max_size, PAGE_SIZE) >> PAGE_SHIFT; -+ max_order = min(max_order, ilog2(max_size)); -+ for (i = 0; i < NUM_ORDERS; i++) { -+ if (max_order < orders[i]) -+ orders[i] = max_order; -+ pr_info("system_heap: orders[%d] = %u\n", i, orders[i]); -+ } -+ } -+ -+ for (i = 0; i < NUM_ORDERS; i++) { -+ pools[i] = dmabuf_page_pool_create(order_flags[i], orders[i]); -+ -+ if (!pools[i]) { -+ int j; -+ -+ pr_err("%s: page pool creation failed!\n", __func__); -+ for (j = 0; j < i; j++) -+ dmabuf_page_pool_destroy(pools[j]); -+ return -ENOMEM; -+ } -+ } -+ -+ for (i = 0; i < NUM_ORDERS; i++) { -+ dma32_pools[i] = dmabuf_page_pool_create(order_flags[i] | GFP_DMA32, orders[i]); -+ -+ if (!dma32_pools[i]) { -+ int j; -+ -+ pr_err("%s: page dma32 pool creation failed!\n", __func__); -+ for (j = 0; j < i; j++) -+ dmabuf_page_pool_destroy(dma32_pools[j]); -+ goto err_dma32_pool; -+ } -+ } -+ -+ exp_info.name = "system"; -+ exp_info.ops = &system_heap_ops; -+ exp_info.priv = NULL; -+ -+ sys_heap = dma_heap_add(&exp_info); -+ if (IS_ERR(sys_heap)) -+ return PTR_ERR(sys_heap); -+ -+ exp_info.name = "system-dma32"; -+ exp_info.ops = &system_heap_ops; -+ exp_info.priv = NULL; -+ -+ sys_dma32_heap = dma_heap_add(&exp_info); -+ if (IS_ERR(sys_dma32_heap)) -+ return PTR_ERR(sys_dma32_heap); -+ -+ exp_info.name = "system-uncached"; -+ exp_info.ops = &system_uncached_heap_ops; -+ exp_info.priv = NULL; -+ -+ sys_uncached_heap = dma_heap_add(&exp_info); -+ if (IS_ERR(sys_uncached_heap)) -+ return PTR_ERR(sys_uncached_heap); -+ -+ err = set_heap_dev_dma(dma_heap_get_dev(sys_uncached_heap)); -+ if (err) -+ return err; -+ -+ exp_info.name = "system-uncached-dma32"; -+ exp_info.ops = &system_uncached_heap_ops; -+ exp_info.priv = NULL; -+ -+ sys_uncached_dma32_heap = dma_heap_add(&exp_info); -+ if (IS_ERR(sys_uncached_dma32_heap)) -+ return PTR_ERR(sys_uncached_dma32_heap); -+ -+ err = set_heap_dev_dma(dma_heap_get_dev(sys_uncached_dma32_heap)); -+ if (err) -+ return err; -+ dma_coerce_mask_and_coherent(dma_heap_get_dev(sys_uncached_dma32_heap), DMA_BIT_MASK(32)); -+ -+ mb(); /* make sure we only set allocate after dma_mask is set */ -+ system_uncached_heap_ops.allocate = system_uncached_heap_allocate; -+ -+ ddr_map_info = sip_smc_get_dram_map(); -+ if (ddr_map_info) { -+ bank_bit_first = ddr_map_info->bank_bit_first; -+ bank_bit_mask = ddr_map_info->bank_bit_mask; -+ } -+ -+ return 0; -+err_dma32_pool: -+ for (i = 0; i < NUM_ORDERS; i++) -+ dmabuf_page_pool_destroy(pools[i]); -+ -+ return -ENOMEM; -+} -+module_init(system_heap_create); ++MODULE_AUTHOR("Finley Xiao "); ++MODULE_DESCRIPTION("rockchip dmcfreq driver with devfreq framework"); +MODULE_LICENSE("GPL v2"); -diff --git a/drivers/dma-buf/heaps/sram_heap.c b/drivers/dma-buf/heaps/sram_heap.c +diff --git a/drivers/devfreq/rockchip_dmc_dbg.c b/drivers/devfreq/rockchip_dmc_dbg.c new file mode 100644 -index 000000000..d9a9b70a7 +index 000000000..80b25e904 --- /dev/null -+++ b/drivers/dma-buf/heaps/sram_heap.c -@@ -0,0 +1,437 @@ ++++ b/drivers/devfreq/rockchip_dmc_dbg.c +@@ -0,0 +1,1061 @@ +// SPDX-License-Identifier: GPL-2.0 +/* -+ * SRAM DMA-Heap exporter && support alloc page and dmabuf on kernel -+ * -+ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com/ -+ * -+ * Author: Andrew F. Davis -+ * -+ * Copyright (C) 2022 Rockchip Electronics Co., Ltd. -+ * -+ * Author: Huang Lee ++ * Copyright (c) 2020, Rockchip Electronics Co., Ltd. + */ -+#define pr_fmt(fmt) "sram_heap: " fmt -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include +#include -+ -+#include -+#include ++#include ++#include +#include ++#include ++#include ++#include ++#include ++#include ++#include + -+#define RK3588_SRAM_BASE 0xff001000 -+ -+struct sram_dma_heap { -+ struct dma_heap *heap; -+ struct gen_pool *pool; -+}; -+ -+struct sram_dma_heap_buffer { -+ struct gen_pool *pool; -+ struct list_head attachments; -+ struct mutex attachments_lock; -+ unsigned long len; -+ void *vaddr; -+ phys_addr_t paddr; -+}; -+ -+struct dma_heap_attachment { -+ struct device *dev; -+ struct sg_table *table; -+ struct list_head list; -+}; -+ -+static int dma_heap_attach(struct dma_buf *dmabuf, -+ struct dma_buf_attachment *attachment) -+{ -+ struct sram_dma_heap_buffer *buffer = dmabuf->priv; -+ struct dma_heap_attachment *a; -+ struct sg_table *table; ++#include + -+ a = kzalloc(sizeof(*a), GFP_KERNEL); -+ if (!a) -+ return -ENOMEM; ++#include "rockchip_dmc_timing.h" + -+ table = kmalloc(sizeof(*table), GFP_KERNEL); -+ if (!table) -+ goto table_alloc_failed; ++/* ++ * DMCDBG share memory request 4KB for delivery parameter ++ */ ++#define DMCDBG_PAGE_NUMS (1) ++#define DMCDBG_SHARE_MEM_SIZE ((DMCDBG_PAGE_NUMS) * 4096) + -+ if (sg_alloc_table(table, 1, GFP_KERNEL)) -+ goto sg_alloc_failed; ++#define PROC_DMCDBG_DIR_NAME "dmcdbg" ++#define PROC_DMCDBG_DRAM_INFO "dmcinfo" ++#define PROC_DMCDBG_POWERSAVE "powersave" ++#define PROC_DMCDBG_DRVODT "drvodt" ++#define PROC_DMCDBG_DESKEW "deskew" ++#define PROC_DMCDBG_REGS_INFO "regsinfo" + -+ /* -+ * The referenced pfn and page are for setting the sram address to the -+ * sgtable, and cannot be used for other purposes, and cannot be accessed -+ * directly or indirectly. -+ * -+ * And not sure if there is a problem with the 32-bit system. -+ * -+ * page cannot support kmap func. -+ */ -+ sg_set_page(table->sgl, pfn_to_page(PFN_DOWN(buffer->paddr)), buffer->len, 0); ++#define DDRDBG_FUNC_GET_VERSION (0x01) ++#define DDRDBG_FUNC_GET_SUPPORTED (0x02) ++#define DDRDBG_FUNC_GET_DRAM_INFO (0x03) ++#define DDRDBG_FUNC_GET_DESKEW_INFO (0x04) ++#define DDRDBG_FUNC_UPDATE_DESKEW (0x05) ++#define DDRDBG_FUNC_DATA_TRAINING (0x06) ++#define DDRDBG_FUNC_UPDATE_DESKEW_TR (0x07) ++#define DDRDBG_FUNC_GET_POWERSAVE_INFO (0x08) ++#define DDRDBG_FUNC_UPDATE_POWERSAVE (0x09) ++#define DDRDBG_FUNC_GET_DRVODT_INFO (0x0a) ++#define DDRDBG_FUNC_UPDATE_DRVODT (0x0b) ++#define DDRDBG_FUNC_GET_REGISTERS_INFO (0x0c) + -+ a->table = table; -+ a->dev = attachment->dev; -+ INIT_LIST_HEAD(&a->list); ++#define DRV_ODT_UNKNOWN (0xffff) ++#define DRV_ODT_UNSUSPEND_FIX (0x0) ++#define DRV_ODT_SUSPEND_FIX (0x1) + -+ attachment->priv = a; ++#define REGS_NAME_LEN_MAX (20) ++#define SKEW_GROUP_NUM_MAX (6) ++#define SKEW_TIMING_NUM_MAX (50) + -+ mutex_lock(&buffer->attachments_lock); -+ list_add(&a->list, &buffer->attachments); -+ mutex_unlock(&buffer->attachments_lock); ++struct rockchip_dmcdbg { ++ struct device *dev; ++}; + -+ return 0; ++struct proc_dir_entry *proc_dmcdbg_dir; + -+sg_alloc_failed: -+ kfree(table); -+table_alloc_failed: -+ kfree(a); -+ return -ENOMEM; -+} ++struct dram_cap_info { ++ unsigned int rank; ++ unsigned int col; ++ unsigned int bank; ++ unsigned int buswidth; ++ unsigned int die_buswidth; ++ unsigned int row_3_4; ++ unsigned int cs0_row; ++ unsigned int cs1_row; ++ unsigned int cs0_high16bit_row; ++ unsigned int cs1_high16bit_row; ++ unsigned int bankgroup; ++ unsigned int size; ++}; + -+static void dma_heap_detatch(struct dma_buf *dmabuf, -+ struct dma_buf_attachment *attachment) -+{ -+ struct sram_dma_heap_buffer *buffer = dmabuf->priv; -+ struct dma_heap_attachment *a = attachment->priv; ++struct dram_info { ++ unsigned int version; ++ char dramtype[10]; ++ unsigned int dramfreq; ++ unsigned int channel_num; ++ struct dram_cap_info ch[2]; ++}; + -+ mutex_lock(&buffer->attachments_lock); -+ list_del(&a->list); -+ mutex_unlock(&buffer->attachments_lock); ++static const char * const power_save_msg[] = { ++ "auto power down enable", ++ "auto power down idle cycle", ++ "auto self refresh enable", ++ "auto self refresh idle cycle", ++ "self refresh with clock gate idle cycle", ++ "self refresh and power down lite idle cycle", ++ "standby idle cycle", ++}; + -+ sg_free_table(a->table); -+ kfree(a->table); -+ kfree(a); -+} ++struct power_save_info { ++ unsigned int pd_en; ++ unsigned int pd_idle; ++ unsigned int sr_en; ++ unsigned int sr_idle; ++ unsigned int sr_mc_gate_idle; ++ unsigned int srpd_lite_idle; ++ unsigned int standby_idle; ++}; + -+static struct sg_table *dma_heap_map_dma_buf(struct dma_buf_attachment *attachment, -+ enum dma_data_direction direction) -+{ -+ struct dma_heap_attachment *a = attachment->priv; -+ struct sg_table *table = a->table; -+ int ret = 0; ++static const char * const drv_odt_msg[] = { ++ "dram side drv pull-up", ++ "dram side drv pull-down", ++ "dram side dq odt pull-up", ++ "dram side dq odt pull-down", ++ "dram side ca odt pull-up", ++ "dram side ca odt pull-down", ++ "soc side ca drv pull-up", ++ "soc side ca drv pull-down", ++ "soc side ck drv pull-up", ++ "soc side ck drv pull-down", ++ "soc side cs drv pull-up", ++ "soc side cs drv pull-down", ++ "soc side dq drv pull-up", ++ "soc side dq drv pull-down", ++ "soc side odt pull-up", ++ "soc side odt pull-down", ++ "phy vref inner", ++ "phy vref out", ++}; + -+ ret = dma_map_sgtable(attachment->dev, table, direction, DMA_ATTR_SKIP_CPU_SYNC); -+ if (ret) -+ return ERR_PTR(-ENOMEM); ++struct drv_odt { ++ unsigned int value; ++ unsigned int ohm; ++ unsigned int flag; ++}; + -+ return table; -+} ++struct drv_odt_vref { ++ unsigned int value; ++ unsigned int percen; ++ unsigned int flag; ++}; + -+static void dma_heap_unmap_dma_buf(struct dma_buf_attachment *attachment, -+ struct sg_table *table, -+ enum dma_data_direction direction) -+{ -+ dma_unmap_sgtable(attachment->dev, table, direction, DMA_ATTR_SKIP_CPU_SYNC); -+} ++struct drv_odt_info { ++ struct drv_odt dram_drv_up; ++ struct drv_odt dram_drv_down; ++ struct drv_odt dram_dq_odt_up; ++ struct drv_odt dram_dq_odt_down; ++ struct drv_odt dram_ca_odt_up; ++ struct drv_odt dram_ca_odt_down; ++ struct drv_odt phy_ca_drv_up; ++ struct drv_odt phy_ca_drv_down; ++ struct drv_odt phy_ck_drv_up; ++ struct drv_odt phy_ck_drv_down; ++ struct drv_odt phy_cs_drv_up; ++ struct drv_odt phy_cs_drv_down; ++ struct drv_odt phy_dq_drv_up; ++ struct drv_odt phy_dq_drv_down; ++ struct drv_odt phy_odt_up; ++ struct drv_odt phy_odt_down; ++ struct drv_odt_vref phy_vref_inner; ++ struct drv_odt_vref phy_vref_out; ++}; + -+static void dma_heap_dma_buf_release(struct dma_buf *dmabuf) -+{ -+ struct sram_dma_heap_buffer *buffer = dmabuf->priv; ++struct dmc_registers { ++ char regs_name[REGS_NAME_LEN_MAX]; ++ unsigned int regs_addr; ++}; + -+ gen_pool_free(buffer->pool, (unsigned long)buffer->vaddr, buffer->len); -+ kfree(buffer); -+} ++struct registers_info { ++ unsigned int regs_num; ++ struct dmc_registers regs[]; ++}; + -+static int dma_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) -+{ -+ struct sram_dma_heap_buffer *buffer = dmabuf->priv; -+ int ret; ++struct skew_group { ++ unsigned int skew_num; ++ unsigned int *p_skew_info; ++ char *p_skew_timing[SKEW_TIMING_NUM_MAX]; ++ char *note; ++}; + -+ /* SRAM mappings are not cached */ -+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); ++struct rockchip_dmcdbg_data { ++ unsigned int inited_flag; ++ void __iomem *share_memory; ++ unsigned int skew_group_num; ++ struct skew_group skew_group[SKEW_GROUP_NUM_MAX]; ++}; + -+ ret = vm_iomap_memory(vma, buffer->paddr, buffer->len); -+ if (ret) -+ pr_err("Could not map buffer to userspace\n"); ++static struct rockchip_dmcdbg_data dmcdbg_data; + -+ return ret; -+} ++struct skew_info_rv1126 { ++ unsigned int ca_skew[32]; ++ unsigned int cs0_a_skew[44]; ++ unsigned int cs0_b_skew[44]; ++ unsigned int cs1_a_skew[44]; ++ unsigned int cs1_b_skew[44]; ++}; + -+static void *dma_heap_vmap(struct dma_buf *dmabuf) ++static int dmcinfo_proc_show(struct seq_file *m, void *v) +{ -+ struct sram_dma_heap_buffer *buffer = dmabuf->priv; ++ struct arm_smccc_res res; ++ struct dram_info *p_dram_info; ++ struct file *fp = NULL; ++ char cur_freq[20] = {0}; ++ char governor[20] = {0}; ++ loff_t pos; ++ u32 i; + -+ return buffer->vaddr; -+} ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, DDRDBG_FUNC_GET_DRAM_INFO, ++ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); ++ if (res.a0) { ++ seq_printf(m, "rockchip_sip_config_dram_debug error:%lx\n", ++ res.a0); ++ return -ENOMEM; ++ } + -+static const struct dma_buf_ops sram_dma_heap_buf_ops = { -+ .attach = dma_heap_attach, -+ .detach = dma_heap_detatch, -+ .map_dma_buf = dma_heap_map_dma_buf, -+ .unmap_dma_buf = dma_heap_unmap_dma_buf, -+ .release = dma_heap_dma_buf_release, -+ .mmap = dma_heap_mmap, -+ .vmap = dma_heap_vmap, -+}; ++ if (!dmcdbg_data.inited_flag) { ++ seq_puts(m, "dmcdbg_data no int\n"); ++ return -EPERM; ++ } ++ p_dram_info = (struct dram_info *)dmcdbg_data.share_memory; + -+static struct dma_buf *sram_dma_heap_allocate(struct dma_heap *heap, -+ unsigned long len, -+ unsigned long fd_flags, -+ unsigned long heap_flags) -+{ -+ struct sram_dma_heap *sram_dma_heap = dma_heap_get_drvdata(heap); -+ struct sram_dma_heap_buffer *buffer; ++ /* dram type information */ ++ seq_printf(m, ++ "DramType: %s\n" ++ , ++ p_dram_info->dramtype ++ ); + -+ DEFINE_DMA_BUF_EXPORT_INFO(exp_info); -+ struct dma_buf *dmabuf; -+ int ret = -ENOMEM; ++ /* dram capacity information */ ++ seq_printf(m, ++ "\n" ++ "DramCapacity:\n" ++ ); + -+ buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); -+ if (!buffer) -+ return ERR_PTR(-ENOMEM); -+ buffer->pool = sram_dma_heap->pool; -+ INIT_LIST_HEAD(&buffer->attachments); -+ mutex_init(&buffer->attachments_lock); -+ buffer->len = len; ++ for (i = 0; i < p_dram_info->channel_num; i++) { ++ if (p_dram_info->channel_num == 2) ++ seq_printf(m, ++ "Channel [%d]:\n" ++ , ++ i ++ ); + -+ buffer->vaddr = (void *)gen_pool_alloc(buffer->pool, buffer->len); -+ if (!buffer->vaddr) { -+ ret = -ENOMEM; -+ goto free_buffer; ++ seq_printf(m, ++ "CS Count: %d\n" ++ "Bus Width: %d bit\n" ++ "Column: %d\n" ++ "Bank: %d\n" ++ "CS0_Row: %d\n" ++ "CS1_Row: %d\n" ++ "DieBusWidth: %d bit\n" ++ "TotalSize: %d MB\n" ++ , ++ p_dram_info->ch[i].rank, ++ p_dram_info->ch[i].buswidth, ++ p_dram_info->ch[i].col, ++ p_dram_info->ch[i].bank, ++ p_dram_info->ch[i].cs0_row, ++ p_dram_info->ch[i].cs1_row, ++ p_dram_info->ch[i].die_buswidth, ++ p_dram_info->ch[i].size ++ ); + } + -+ buffer->paddr = gen_pool_virt_to_phys(buffer->pool, (unsigned long)buffer->vaddr); -+ if (buffer->paddr == -1) { -+ ret = -ENOMEM; -+ goto free_pool; -+ } ++ /* check devfreq/dmc device */ ++ fp = filp_open("/sys/class/devfreq/dmc/cur_freq", O_RDONLY, 0); ++ if (IS_ERR(fp)) { ++ seq_printf(m, ++ "\n" ++ "devfreq/dmc: Disable\n" ++ "DramFreq: %d\n" ++ , ++ p_dram_info->dramfreq ++ ); ++ } else { ++ pos = 0; ++ kernel_read(fp, cur_freq, sizeof(cur_freq), &pos); ++ filp_close(fp, NULL); + -+ /* create the dmabuf */ -+ exp_info.ops = &sram_dma_heap_buf_ops; -+ exp_info.size = buffer->len; -+ exp_info.flags = fd_flags; -+ exp_info.priv = buffer; -+ dmabuf = dma_buf_export(&exp_info); -+ if (IS_ERR(dmabuf)) { -+ ret = PTR_ERR(dmabuf); -+ goto free_pool; -+ } ++ fp = filp_open("/sys/class/devfreq/dmc/governor", O_RDONLY, 0); ++ if (IS_ERR(fp)) { ++ fp = NULL; ++ } else { ++ pos = 0; ++ kernel_read(fp, governor, sizeof(governor), &pos); ++ filp_close(fp, NULL); ++ } + -+ return dmabuf; ++ seq_printf(m, ++ "\n" ++ "devfreq/dmc: Enable\n" ++ "governor: %s\n" ++ "cur_freq: %s\n" ++ , ++ governor, ++ cur_freq ++ ); ++ seq_printf(m, ++ "NOTE:\n" ++ "more information about dmc can get from /sys/class/devfreq/dmc.\n" ++ ); ++ } + -+free_pool: -+ gen_pool_free(buffer->pool, (unsigned long)buffer->vaddr, buffer->len); -+free_buffer: -+ kfree(buffer); ++ return 0; ++} + -+ return ERR_PTR(ret); ++static int dmcinfo_proc_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, dmcinfo_proc_show, NULL); +} + -+static struct dma_heap_ops sram_dma_heap_ops = { -+ .allocate = sram_dma_heap_allocate, ++static const struct file_operations dmcinfo_proc_fops = { ++ .open = dmcinfo_proc_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, +}; + -+static struct sram_dma_heap *sram_dma_heap_global; -+ -+static int sram_dma_heap_export(const char *name, -+ struct gen_pool *sram_gp) ++static int proc_dmcinfo_init(void) +{ -+ struct sram_dma_heap *sram_dma_heap; -+ struct dma_heap_export_info exp_info; ++ /* create dmcinfo file */ ++ proc_create(PROC_DMCDBG_DRAM_INFO, 0644, proc_dmcdbg_dir, ++ &dmcinfo_proc_fops); + -+ pr_info("Exporting SRAM pool '%s'\n", name); ++ return 0; ++} + -+ sram_dma_heap = kzalloc(sizeof(*sram_dma_heap), GFP_KERNEL); -+ if (!sram_dma_heap) ++static int powersave_proc_show(struct seq_file *m, void *v) ++{ ++ struct arm_smccc_res res; ++ struct power_save_info *p_power; ++ unsigned int *p_uint; ++ unsigned int i = 0; ++ ++ /* get low power information */ ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, ++ DDRDBG_FUNC_GET_POWERSAVE_INFO, ++ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); ++ if (res.a0) { ++ seq_printf(m, "rockchip_sip_config_dram_debug error:%lx\n", ++ res.a0); + return -ENOMEM; -+ sram_dma_heap->pool = sram_gp; ++ } + -+ exp_info.name = "sram_dma_heap"; -+ exp_info.ops = &sram_dma_heap_ops; -+ exp_info.priv = sram_dma_heap; ++ if (!dmcdbg_data.inited_flag) { ++ seq_puts(m, "dmcdbg_data no int\n"); ++ return -EPERM; ++ } ++ p_power = (struct power_save_info *)dmcdbg_data.share_memory; + -+ sram_dma_heap_global = sram_dma_heap; ++ seq_printf(m, ++ "low power information:\n" ++ "\n" ++ "[number]name: value\n" ++ ); + -+ sram_dma_heap->heap = dma_heap_add(&exp_info); -+ if (IS_ERR(sram_dma_heap->heap)) { -+ int ret = PTR_ERR(sram_dma_heap->heap); ++ p_uint = (unsigned int *)p_power; ++ for (i = 0; i < ARRAY_SIZE(power_save_msg); i++) ++ seq_printf(m, ++ "[%d]%s: %d\n" ++ , ++ i, power_save_msg[i], *(p_uint + i) ++ ); + -+ kfree(sram_dma_heap); -+ return ret; -+ } ++ seq_printf(m, ++ "\n" ++ "power save setting:\n" ++ "echo number=value > /proc/dmcdbg/powersave\n" ++ "eg: set auto power down enable to 1\n" ++ " echo 0=1 > /proc/dmcdbg/powersave\n" ++ "\n" ++ "Support for setting multiple parameters at the same time.\n" ++ "echo number=value,number=value,... > /proc/dmcdbg/powersave\n" ++ "eg:\n" ++ " echo 0=1,1=32 > /proc/dmcdbg/powersave\n" ++ ); + + return 0; +} + -+struct dma_buf *sram_heap_alloc_dma_buf(size_t size) ++static int powersave_proc_open(struct inode *inode, struct file *file) +{ -+ struct sram_dma_heap *sram_dma_heap = sram_dma_heap_global; -+ struct sram_dma_heap_buffer *buffer; -+ -+ DEFINE_DMA_BUF_EXPORT_INFO(exp_info); -+ struct dma_buf *dmabuf; -+ int ret = -ENOMEM; ++ return single_open(file, powersave_proc_show, NULL); ++} + -+ buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); -+ if (!buffer) -+ return ERR_PTR(-ENOMEM); ++static ssize_t powersave_proc_write(struct file *file, ++ const char __user *buffer, ++ size_t count, loff_t *ppos) ++{ ++ struct arm_smccc_res res; ++ struct power_save_info *p_power; ++ unsigned int *p_uint; ++ char *buf, *cookie_pot, *p_char; ++ int ret = 0; ++ u32 loop, i, offset, value; ++ long long_val; + -+ buffer->pool = sram_dma_heap->pool; -+ INIT_LIST_HEAD(&buffer->attachments); -+ mutex_init(&buffer->attachments_lock); -+ buffer->len = size; ++ /* get buffer data */ ++ buf = vzalloc(count); ++ cookie_pot = buf; ++ if (!cookie_pot) ++ return -ENOMEM; + -+ buffer->vaddr = (void *)gen_pool_alloc(buffer->pool, buffer->len); -+ if (!buffer->vaddr) { -+ ret = -ENOMEM; -+ goto free_buffer; ++ if (copy_from_user(cookie_pot, buffer, count)) { ++ ret = -EFAULT; ++ goto err; + } + -+ buffer->paddr = gen_pool_virt_to_phys(buffer->pool, (unsigned long)buffer->vaddr); -+ if (buffer->paddr == -1) { ++ /* get power save setting information */ ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, ++ DDRDBG_FUNC_GET_POWERSAVE_INFO, ++ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); ++ if (res.a0) { ++ pr_err("rockchip_sip_config_dram_debug error:%lx\n", res.a0); + ret = -ENOMEM; -+ goto free_pool; ++ goto err; + } + -+ /* create the dmabuf */ -+ exp_info.ops = &sram_dma_heap_buf_ops; -+ exp_info.size = buffer->len; -+ exp_info.priv = buffer; -+ dmabuf = dma_buf_export(&exp_info); -+ if (IS_ERR(dmabuf)) { -+ ret = PTR_ERR(dmabuf); -+ goto free_pool; ++ if (!dmcdbg_data.inited_flag) { ++ pr_err("dmcdbg_data no int\n"); ++ ret = -EPERM; ++ goto err; + } ++ p_power = (struct power_save_info *)dmcdbg_data.share_memory; + -+ return dmabuf; -+ -+free_pool: -+ gen_pool_free(buffer->pool, (unsigned long)buffer->vaddr, buffer->len); -+free_buffer: -+ kfree(buffer); ++ loop = 0; ++ for (i = 0; i < count; i++) { ++ if (*(cookie_pot + i) == '=') ++ loop++; ++ } + -+ return ERR_PTR(ret); -+} -+EXPORT_SYMBOL_GPL(sram_heap_alloc_dma_buf); ++ p_uint = (unsigned int *)p_power; ++ for (i = 0; i < loop; i++) { ++ p_char = strsep(&cookie_pot, "="); ++ ret = kstrtol(p_char, 10, &long_val); ++ if (ret) ++ goto err; ++ offset = long_val; + -+struct page *sram_heap_alloc_pages(size_t size) -+{ -+ struct sram_dma_heap *sram_dma_heap = sram_dma_heap_global; ++ if (i == (loop - 1)) ++ p_char = strsep(&cookie_pot, "\0"); ++ else ++ p_char = strsep(&cookie_pot, ","); + -+ void *vaddr; -+ phys_addr_t paddr; -+ struct page *p; ++ ret = kstrtol(p_char, 10, &long_val); ++ if (ret) ++ goto err; ++ value = long_val; + -+ int ret = -ENOMEM; ++ if (offset >= ARRAY_SIZE(power_save_msg)) { ++ ret = -EINVAL; ++ goto err; ++ } ++ offset = array_index_nospec(offset, ARRAY_SIZE(power_save_msg)); + -+ vaddr = (void *)gen_pool_alloc(sram_dma_heap->pool, size); -+ if (!vaddr) { -+ ret = -ENOMEM; -+ pr_err("no memory"); -+ goto failed; ++ *(p_uint + offset) = value; + } + -+ paddr = gen_pool_virt_to_phys(sram_dma_heap->pool, (unsigned long)vaddr); -+ if (paddr == -1) { ++ /* update power save setting */ ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, DDRDBG_FUNC_UPDATE_POWERSAVE, ++ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); ++ if (res.a0) { ++ pr_err("rockchip_sip_config_dram_debug error:%lx\n", res.a0); + ret = -ENOMEM; -+ pr_err("gen_pool_virt_to_phys failed"); -+ goto free_pool; ++ goto err; + } + -+ p = pfn_to_page(PFN_DOWN(paddr)); -+ -+ return p; -+ -+free_pool: -+ gen_pool_free(sram_dma_heap->pool, (unsigned long)vaddr, size); -+failed: -+ -+ return ERR_PTR(ret); ++ ret = count; ++err: ++ vfree(buf); ++ return ret; +} -+EXPORT_SYMBOL_GPL(sram_heap_alloc_pages); + -+static u64 gen_pool_phys_to_virt(struct gen_pool *pool, phys_addr_t paddr) -+{ -+ struct gen_pool_chunk *chunk; -+ u64 vaddr = 0; ++static const struct file_operations powersave_proc_fops = { ++ .open = powersave_proc_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++ .write = powersave_proc_write, ++}; + -+ rcu_read_lock(); -+ list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { -+ /* TODO: only suit for simple chunk now */ -+ vaddr = chunk->start_addr + (paddr - chunk->phys_addr); -+ } -+ rcu_read_unlock(); ++static int proc_powersave_init(void) ++{ ++ /* create dmcinfo file */ ++ proc_create(PROC_DMCDBG_POWERSAVE, 0644, proc_dmcdbg_dir, ++ &powersave_proc_fops); + -+ return vaddr; ++ return 0; +} + -+void sram_heap_free_pages(struct page *p) ++static int drvodt_proc_show(struct seq_file *m, void *v) +{ -+ struct sram_dma_heap *sram_dma_heap = sram_dma_heap_global; -+ void *vaddr; ++ struct arm_smccc_res res; ++ struct drv_odt_info *p_drvodt; ++ unsigned int *p_uint; ++ unsigned int i; + -+ vaddr = (void *)gen_pool_phys_to_virt(sram_dma_heap->pool, page_to_phys(p)); ++ /* get drive strength and odt information */ ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, DDRDBG_FUNC_GET_DRVODT_INFO, ++ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); ++ if (res.a0) { ++ seq_printf(m, "rockchip_sip_config_dram_debug error:%lx\n", ++ res.a0); ++ return -ENOMEM; ++ } + -+ gen_pool_free(sram_dma_heap->pool, (unsigned long)vaddr, PAGE_SIZE); -+} -+EXPORT_SYMBOL_GPL(sram_heap_free_pages); ++ if (!dmcdbg_data.inited_flag) { ++ seq_puts(m, "dmcdbg_data no int\n"); ++ return -EPERM; ++ } ++ p_drvodt = (struct drv_odt_info *)dmcdbg_data.share_memory; + -+void sram_heap_free_dma_buf(struct dma_buf *dmabuf) -+{ -+ struct sram_dma_heap_buffer *buffer = dmabuf->priv; ++ seq_printf(m, ++ "drv and odt information:\n" ++ "\n" ++ "[number]name: value (ohm)\n" ++ ); + -+ gen_pool_free(buffer->pool, (unsigned long)buffer->vaddr, buffer->len); -+ kfree(buffer); -+} -+EXPORT_SYMBOL_GPL(sram_heap_free_dma_buf); ++ p_uint = (unsigned int *)p_drvodt; ++ for (i = 0; i < ARRAY_SIZE(drv_odt_msg); i++) { ++ if (*(p_uint + (i * 3)) == DRV_ODT_UNKNOWN) ++ seq_printf(m, ++ "[%2d]%s: NULL (unknown) %c\n" ++ , ++ i, drv_odt_msg[i], ++ (*(p_uint + (i * 3) + 2) == ++ DRV_ODT_SUSPEND_FIX) ? '\0' : '*' ++ ); ++ else if (*(p_uint + (i * 3) + 1) == DRV_ODT_UNKNOWN) ++ seq_printf(m, ++ "[%2d]%s: %d (unknown) %c\n" ++ , ++ i, drv_odt_msg[i], *(p_uint + (i * 3)), ++ (*(p_uint + (i * 3) + 2) == ++ DRV_ODT_SUSPEND_FIX) ? '\0' : '*' ++ ); ++ else if (i < (ARRAY_SIZE(drv_odt_msg) - 2)) ++ seq_printf(m, ++ "[%2d]%s: %d (%d ohm) %c\n" ++ , ++ i, drv_odt_msg[i], *(p_uint + (i * 3)), ++ *(p_uint + (i * 3) + 1), ++ (*(p_uint + (i * 3) + 2) == ++ DRV_ODT_SUSPEND_FIX) ? '\0' : '*' ++ ); ++ else ++ seq_printf(m, ++ "[%2d]%s: %d (%d %%) %c\n" ++ , ++ i, drv_odt_msg[i], *(p_uint + (i * 3)), ++ *(p_uint + (i * 3) + 1), ++ (*(p_uint + (i * 3) + 2) == ++ DRV_ODT_SUSPEND_FIX) ? '\0' : '*' ++ ); ++ } + -+void *sram_heap_get_vaddr(struct dma_buf *dmabuf) -+{ -+ struct sram_dma_heap_buffer *buffer = dmabuf->priv; ++ seq_printf(m, ++ "\n" ++ "drvodt setting:\n" ++ "echo number=value > /proc/dmcdbg/drvodt\n" ++ "eg: set soc side ca drv up to 20\n" ++ " echo 6=20 > /proc/dmcdbg/drvodt\n" ++ "\n" ++ "Support for setting multiple parameters at the same time.\n" ++ "echo number=value,number=value,... > /proc/dmcdbg/drvodt\n" ++ "eg: set soc side ca drv up and down to 20\n" ++ " echo 6=20,7=20 > /proc/dmcdbg/drvodt\n" ++ "Note: Please update both up and down at the same time.\n" ++ " (*) mean unsupported setting value\n" ++ ); + -+ return buffer->vaddr; ++ return 0; +} -+EXPORT_SYMBOL_GPL(sram_heap_get_vaddr); + -+phys_addr_t sram_heap_get_paddr(struct dma_buf *dmabuf) ++static int drvodt_proc_open(struct inode *inode, struct file *file) +{ -+ struct sram_dma_heap_buffer *buffer = dmabuf->priv; -+ -+ return buffer->paddr; ++ return single_open(file, drvodt_proc_show, NULL); +} -+EXPORT_SYMBOL_GPL(sram_heap_get_paddr); + -+static int rk_add_default_sram_heap(void) ++static ssize_t drvodt_proc_write(struct file *file, ++ const char __user *buffer, ++ size_t count, loff_t *ppos) +{ -+ struct device_node *np = NULL; -+ struct gen_pool *sram_gp = NULL; ++ struct arm_smccc_res res; ++ struct drv_odt_info *p_drvodt; ++ unsigned int *p_uint; ++ char *buf, *cookie_pot, *p_char; + int ret = 0; ++ u32 loop, i, offset, value; ++ long long_val; + -+ np = of_find_compatible_node(NULL, NULL, "rockchip,sram-heap"); -+ if (!np) { -+ pr_info("failed to get device node of sram-heap\n"); -+ return -ENODEV; -+ } ++ /* get buffer data */ ++ buf = vzalloc(count); ++ cookie_pot = buf; ++ if (!cookie_pot) ++ return -ENOMEM; + -+ if (!of_device_is_available(np)) { -+ of_node_put(np); -+ return ret; ++ if (copy_from_user(cookie_pot, buffer, count)) { ++ ret = -EFAULT; ++ goto err; + } + -+ sram_gp = of_gen_pool_get(np, "rockchip,sram", 0); -+ /* release node */ -+ of_node_put(np); -+ if (sram_gp == NULL) { -+ pr_err("sram gen pool is NULL"); -+ return -ENOMEM; ++ /* get drv and odt setting */ ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, DDRDBG_FUNC_GET_DRVODT_INFO, ++ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); ++ if (res.a0) { ++ pr_err("rockchip_sip_config_dram_debug error:%lx\n", res.a0); ++ ret = -ENOMEM; ++ goto err; + } + -+ ret = sram_dma_heap_export("sram-heap", sram_gp); -+ -+ return ret; -+} -+module_init(rk_add_default_sram_heap); -+MODULE_DESCRIPTION("Rockchip DMA-BUF SRAM Heap"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/dma-buf/rk_heaps/Kconfig b/drivers/dma-buf/rk_heaps/Kconfig -new file mode 100644 -index 000000000..6ca3fbe76 ---- /dev/null -+++ b/drivers/dma-buf/rk_heaps/Kconfig -@@ -0,0 +1,48 @@ -+# SPDX-License-Identifier: GPL-2.0-only -+menuconfig DMABUF_HEAPS_ROCKCHIP -+ bool "DMA-BUF Userland Memory Heaps for RockChip" -+ select DMA_SHARED_BUFFER -+ help -+ Choose this option to enable the RockChip DMA-BUF userland memory heaps. -+ This options creates per heap chardevs in /dev/rk_dma_heap/ which -+ allows userspace to allocate dma-bufs that can be shared -+ between drivers. -+ -+config DMABUF_HEAPS_ROCKCHIP_CMA_HEAP -+ tristate "DMA-BUF RockChip CMA Heap" -+ depends on DMABUF_HEAPS_ROCKCHIP -+ help -+ Choose this option to enable dma-buf RockChip CMA heap. This heap is backed -+ by the Contiguous Memory Allocator (CMA). If your system has these -+ regions, you should say Y here. ++ if (!dmcdbg_data.inited_flag) { ++ pr_err("dmcdbg_data no int\n"); ++ ret = -EPERM; ++ goto err; ++ } ++ p_drvodt = (struct drv_odt_info *)dmcdbg_data.share_memory; + -+config DMABUF_HEAPS_ROCKCHIP_CMA_ALIGNMENT -+ int "Maximum PAGE_SIZE order of alignment for RockChip CMA Heap" -+ range 0 12 -+ depends on DMABUF_HEAPS_ROCKCHIP_CMA_HEAP -+ default 8 -+ help -+ DMA mapping framework by default aligns all buffers to the smallest -+ PAGE_SIZE order which is greater than or equal to the requested buffer -+ size. This works well for buffers up to a few hundreds kilobytes, but -+ for larger buffers it just a memory waste. With this parameter you can -+ specify the maximum PAGE_SIZE order for contiguous buffers. Larger -+ buffers will be aligned only to this specified order. The order is -+ expressed as a power of two multiplied by the PAGE_SIZE. ++ loop = 0; ++ for (i = 0; i < count; i++) { ++ if (*(cookie_pot + i) == '=') ++ loop++; ++ } + -+ For example, if your system defaults to 4KiB pages, the order value -+ of 8 means that the buffers will be aligned up to 1MiB only. ++ p_uint = (unsigned int *)p_drvodt; ++ for (i = 0; i < loop; i++) { ++ p_char = strsep(&cookie_pot, "="); ++ ret = kstrtol(p_char, 10, &long_val); ++ if (ret) ++ goto err; ++ offset = long_val; + -+ If unsure, leave the default value "8". ++ if (i == (loop - 1)) ++ p_char = strsep(&cookie_pot, "\0"); ++ else ++ p_char = strsep(&cookie_pot, ","); + -+config DMABUF_RK_HEAPS_DEBUG -+ bool "DMA-BUF RockChip Heap Debug" -+ depends on DMABUF_HEAPS_ROCKCHIP -+ help -+ Choose this option to enable dma-buf RockChip heap debug. ++ ret = kstrtol(p_char, 10, &long_val); ++ if (ret) ++ goto err; ++ value = long_val; + -+config DMABUF_RK_HEAPS_DEBUG_PRINT -+ bool "DMA-BUF RockChip Heap Debug print log enable" -+ depends on DMABUF_HEAPS_ROCKCHIP -+ help -+ Choose this option to enable dma-buf RockChip heap debug. -diff --git a/drivers/dma-buf/rk_heaps/Makefile b/drivers/dma-buf/rk_heaps/Makefile -new file mode 100644 -index 000000000..30d44bb7d ---- /dev/null -+++ b/drivers/dma-buf/rk_heaps/Makefile -@@ -0,0 +1,6 @@ -+# SPDX-License-Identifier: GPL-2.0 ++ if (offset >= ARRAY_SIZE(drv_odt_msg)) { ++ ret = -EINVAL; ++ goto err; ++ } ++ offset *= 3; ++ offset = array_index_nospec(offset, ARRAY_SIZE(drv_odt_msg) * 3); + -+rk-cma-heap-objs := rk-dma-cma.o rk-cma-heap.o ++ *(p_uint + offset) = value; ++ } + -+obj-$(CONFIG_DMABUF_HEAPS_ROCKCHIP) += rk-dma-heap.o -+obj-$(CONFIG_DMABUF_HEAPS_ROCKCHIP_CMA_HEAP) += rk-cma-heap.o -diff --git a/drivers/dma-buf/rk_heaps/rk-cma-heap.c b/drivers/dma-buf/rk_heaps/rk-cma-heap.c -new file mode 100644 -index 000000000..5b063f87b ---- /dev/null -+++ b/drivers/dma-buf/rk_heaps/rk-cma-heap.c -@@ -0,0 +1,687 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * DMABUF CMA heap exporter -+ * -+ * Copyright (C) 2012, 2019, 2020 Linaro Ltd. -+ * Author: for ST-Ericsson. -+ * -+ * Also utilizing parts of Andrew Davis' SRAM heap: -+ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com/ -+ * Andrew F. Davis -+ * -+ * Copyright (C) 2022 Rockchip Electronics Co. Ltd. -+ * Author: Simon Xue -+ */ ++ /* update power save setting */ ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, DDRDBG_FUNC_UPDATE_DRVODT, ++ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); ++ if (res.a0) { ++ pr_err("rockchip_sip_config_dram_debug error:%lx\n", res.a0); ++ ret = -ENOMEM; ++ goto err; ++ } + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "../../../mm/cma.h" -+#include "rk-dma-heap.h" ++ ret = count; ++err: ++ vfree(buf); ++ return ret; ++} + -+struct rk_cma_heap { -+ struct rk_dma_heap *heap; -+ struct cma *cma; ++static const struct file_operations drvodt_proc_fops = { ++ .open = drvodt_proc_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++ .write = drvodt_proc_write, +}; + -+struct rk_cma_heap_buffer { -+ struct rk_cma_heap *heap; -+ struct list_head attachments; -+ struct mutex lock; -+ unsigned long len; -+ struct page *cma_pages; -+ struct page **pages; -+ pgoff_t pagecount; -+ int vmap_cnt; -+ void *vaddr; -+ phys_addr_t phys; -+ bool attached; -+}; ++static int proc_drvodt_init(void) ++{ ++ /* create dmcinfo file */ ++ proc_create(PROC_DMCDBG_DRVODT, 0644, proc_dmcdbg_dir, ++ &drvodt_proc_fops); + -+struct rk_cma_heap_attachment { -+ struct device *dev; -+ struct sg_table table; -+ struct list_head list; -+ bool mapped; -+}; ++ return 0; ++} + -+static int rk_cma_heap_attach(struct dma_buf *dmabuf, -+ struct dma_buf_attachment *attachment) ++static int skew_proc_show(struct seq_file *m, void *v) +{ -+ struct rk_cma_heap_buffer *buffer = dmabuf->priv; -+ struct rk_cma_heap_attachment *a; -+ struct sg_table *table; -+ size_t size = buffer->pagecount << PAGE_SHIFT; -+ int ret; ++ struct arm_smccc_res res; ++ unsigned int *p_uint; ++ u32 group, i; + -+ a = kzalloc(sizeof(*a), GFP_KERNEL); -+ if (!a) ++ /* get deskew information */ ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, DDRDBG_FUNC_GET_DESKEW_INFO, ++ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); ++ if (res.a0) { ++ seq_printf(m, "rockchip_sip_config_dram_debug error:%lx\n", ++ res.a0); + return -ENOMEM; -+ -+ table = &a->table; -+ -+ ret = sg_alloc_table(table, 1, GFP_KERNEL); -+ if (ret) { -+ kfree(a); -+ return ret; + } -+ sg_set_page(table->sgl, buffer->cma_pages, PAGE_ALIGN(size), 0); + -+ a->dev = attachment->dev; -+ INIT_LIST_HEAD(&a->list); -+ a->mapped = false; ++ if (!dmcdbg_data.inited_flag) { ++ seq_puts(m, "dmcdbg_data no int\n"); ++ return -EPERM; ++ } + -+ attachment->priv = a; ++ seq_printf(m, ++ "de-skew information:\n" ++ "\n" ++ "[group_number]name: value\n" ++ ); + -+ buffer->attached = true; ++ for (group = 0; group < dmcdbg_data.skew_group_num; group++) { ++ if (dmcdbg_data.skew_group[group].note != NULL) ++ seq_printf(m, ++ "%s\n" ++ , ++ dmcdbg_data.skew_group[group].note ++ ); ++ p_uint = (unsigned int *)dmcdbg_data.skew_group[group].p_skew_info; ++ for (i = 0; i < dmcdbg_data.skew_group[group].skew_num; i++) ++ seq_printf(m, ++ "[%c%d_%d]%s: %d\n" ++ , ++ (i < 10) ? ' ' : '\0', group, i, ++ dmcdbg_data.skew_group[group].p_skew_timing[i], ++ *(p_uint + i) ++ ); ++ } + -+ mutex_lock(&buffer->lock); -+ list_add(&a->list, &buffer->attachments); -+ mutex_unlock(&buffer->lock); ++ seq_printf(m, ++ "\n" ++ "de-skew setting:\n" ++ "echo group_number=value > /proc/dmcdbg/deskew\n" ++ "eg: set a1_ddr3a14_de-skew to 8\n" ++ " echo 0_1=8 > /proc/dmcdbg/deskew\n" ++ "\n" ++ "Support for setting multiple parameters simultaneously.\n" ++ "echo group_number=value,group_number=value,... > /proc/dmcdbg/deskew\n" ++ "eg:\n" ++ " echo 0_1=8,1_2=8 > /proc/dmcdbg/deskew\n" ++ ); + + return 0; +} + -+static void rk_cma_heap_detach(struct dma_buf *dmabuf, -+ struct dma_buf_attachment *attachment) -+{ -+ struct rk_cma_heap_buffer *buffer = dmabuf->priv; -+ struct rk_cma_heap_attachment *a = attachment->priv; -+ -+ mutex_lock(&buffer->lock); -+ list_del(&a->list); -+ mutex_unlock(&buffer->lock); -+ -+ buffer->attached = false; -+ -+ sg_free_table(&a->table); -+ kfree(a); -+} -+ -+static struct sg_table *rk_cma_heap_map_dma_buf(struct dma_buf_attachment *attachment, -+ enum dma_data_direction direction) ++static int skew_proc_open(struct inode *inode, struct file *file) +{ -+ struct rk_cma_heap_attachment *a = attachment->priv; -+ struct sg_table *table = &a->table; -+ int ret; -+ -+ ret = dma_map_sgtable(attachment->dev, table, direction, 0); -+ if (ret) -+ return ERR_PTR(-ENOMEM); -+ a->mapped = true; -+ return table; ++ return single_open(file, skew_proc_show, NULL); +} + -+static void rk_cma_heap_unmap_dma_buf(struct dma_buf_attachment *attachment, -+ struct sg_table *table, -+ enum dma_data_direction direction) ++static ssize_t skew_proc_write(struct file *file, ++ const char __user *buffer, ++ size_t count, loff_t *ppos) +{ -+ struct rk_cma_heap_attachment *a = attachment->priv; ++ struct arm_smccc_res res; ++ unsigned int *p_uint; ++ char *buf, *cookie_pot, *p_char; ++ int ret = 0; ++ u32 loop, i, offset_max, group, offset, value; ++ long long_val; + -+ a->mapped = false; -+ dma_unmap_sgtable(attachment->dev, table, direction, 0); -+} ++ /* get buffer data */ ++ buf = vzalloc(count); ++ cookie_pot = buf; ++ if (!cookie_pot) ++ return -ENOMEM; + -+static int -+rk_cma_heap_dma_buf_begin_cpu_access_partial(struct dma_buf *dmabuf, -+ enum dma_data_direction direction, -+ unsigned int offset, -+ unsigned int len) -+{ -+ struct rk_cma_heap_buffer *buffer = dmabuf->priv; -+ struct rk_cma_heap_attachment *a; ++ if (copy_from_user(cookie_pot, buffer, count)) { ++ ret = -EFAULT; ++ goto err; ++ } + -+ if (buffer->vmap_cnt) -+ invalidate_kernel_vmap_range(buffer->vaddr, buffer->len); ++ /* get skew setting */ ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, DDRDBG_FUNC_GET_DESKEW_INFO, ++ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); ++ if (res.a0) { ++ pr_err("rockchip_sip_config_dram_debug error:%lx\n", res.a0); ++ ret = -ENOMEM; ++ goto err; ++ } + -+ mutex_lock(&buffer->lock); -+ list_for_each_entry(a, &buffer->attachments, list) { -+ if (!a->mapped) -+ continue; -+ dma_sync_sgtable_for_cpu(a->dev, &a->table, direction); ++ if (!dmcdbg_data.inited_flag) { ++ pr_err("dmcdbg_data no int\n"); ++ ret = -EPERM; ++ goto err; + } + -+ /* For userspace that not attach yet */ -+ if (buffer->phys && !buffer->attached) -+ dma_sync_single_for_cpu(rk_dma_heap_get_dev(buffer->heap->heap), -+ buffer->phys + offset, -+ len, -+ direction); -+ mutex_unlock(&buffer->lock); ++ loop = 0; ++ for (i = 0; i < count; i++) { ++ if (*(cookie_pot + i) == '=') ++ loop++; ++ } + -+ return 0; -+} ++ for (i = 0; i < loop; i++) { ++ p_char = strsep(&cookie_pot, "_"); ++ ret = kstrtol(p_char, 10, &long_val); ++ if (ret) ++ goto err; ++ group = long_val; + -+static int -+rk_cma_heap_dma_buf_end_cpu_access_partial(struct dma_buf *dmabuf, -+ enum dma_data_direction direction, -+ unsigned int offset, -+ unsigned int len) -+{ -+ struct rk_cma_heap_buffer *buffer = dmabuf->priv; -+ struct rk_cma_heap_attachment *a; ++ p_char = strsep(&cookie_pot, "="); ++ ret = kstrtol(p_char, 10, &long_val); ++ if (ret) ++ goto err; ++ offset = long_val; + -+ if (buffer->vmap_cnt) -+ flush_kernel_vmap_range(buffer->vaddr, buffer->len); ++ if (i == (loop - 1)) ++ p_char = strsep(&cookie_pot, "\0"); ++ else ++ p_char = strsep(&cookie_pot, ","); + -+ mutex_lock(&buffer->lock); -+ list_for_each_entry(a, &buffer->attachments, list) { -+ if (!a->mapped) -+ continue; -+ dma_sync_sgtable_for_device(a->dev, &a->table, direction); -+ } ++ ret = kstrtol(p_char, 10, &long_val); ++ if (ret) ++ goto err; ++ value = long_val; + -+ /* For userspace that not attach yet */ -+ if (buffer->phys && !buffer->attached) -+ dma_sync_single_for_device(rk_dma_heap_get_dev(buffer->heap->heap), -+ buffer->phys + offset, -+ len, -+ direction); -+ mutex_unlock(&buffer->lock); ++ if (group >= dmcdbg_data.skew_group_num) { ++ ret = -EINVAL; ++ goto err; ++ } ++ group = array_index_nospec(group, dmcdbg_data.skew_group_num); + -+ return 0; -+} ++ p_uint = (unsigned int *)dmcdbg_data.skew_group[group].p_skew_info; ++ offset_max = dmcdbg_data.skew_group[group].skew_num; + -+static int rk_cma_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, -+ enum dma_data_direction dir) -+{ -+ struct rk_cma_heap_buffer *buffer = dmabuf->priv; -+ unsigned int len = buffer->pagecount * PAGE_SIZE; ++ if (offset >= offset_max) { ++ ret = -EINVAL; ++ goto err; ++ } ++ offset = array_index_nospec(offset, offset_max); + -+ return rk_cma_heap_dma_buf_begin_cpu_access_partial(dmabuf, dir, 0, len); -+} ++ *(p_uint + offset) = value; ++ } + -+static int rk_cma_heap_dma_buf_end_cpu_access(struct dma_buf *dmabuf, -+ enum dma_data_direction dir) -+{ -+ struct rk_cma_heap_buffer *buffer = dmabuf->priv; -+ unsigned int len = buffer->pagecount * PAGE_SIZE; ++ /* update power save setting */ ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, DDRDBG_FUNC_UPDATE_DESKEW, ++ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); ++ if (res.a0) { ++ pr_err("rockchip_sip_config_dram_debug error:%lx\n", res.a0); ++ ret = -ENOMEM; ++ goto err; ++ } + -+ return rk_cma_heap_dma_buf_end_cpu_access_partial(dmabuf, dir, 0, len); ++ ret = count; ++err: ++ vfree(buf); ++ return ret; +} + -+static int rk_cma_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) -+{ -+ struct rk_cma_heap_buffer *buffer = dmabuf->priv; -+ size_t size = vma->vm_end - vma->vm_start; -+ int ret; -+ -+ ret = remap_pfn_range(vma, vma->vm_start, __phys_to_pfn(buffer->phys), -+ size, vma->vm_page_prot); -+ if (ret) -+ return -EAGAIN; -+ -+ return 0; -+} ++static const struct file_operations skew_proc_fops = { ++ .open = skew_proc_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++ .write = skew_proc_write, ++}; + -+static void *rk_cma_heap_do_vmap(struct rk_cma_heap_buffer *buffer) ++static int proc_skew_init(void) +{ -+ void *vaddr; -+ pgprot_t pgprot = PAGE_KERNEL; -+ -+ vaddr = vmap(buffer->pages, buffer->pagecount, VM_MAP, pgprot); -+ if (!vaddr) -+ return ERR_PTR(-ENOMEM); ++ /* create dmcinfo file */ ++ proc_create(PROC_DMCDBG_DESKEW, 0644, proc_dmcdbg_dir, ++ &skew_proc_fops); + -+ return vaddr; ++ return 0; +} + -+static int rk_cma_heap_vmap(struct dma_buf *dmabuf, struct iosys_map *map) ++static int regsinfo_proc_show(struct seq_file *m, void *v) +{ -+ struct rk_cma_heap_buffer *buffer = dmabuf->priv; -+ void *vaddr; -+ int ret = 0; ++ struct arm_smccc_res res; ++ struct registers_info *p_regsinfo; ++ u32 i; + -+ mutex_lock(&buffer->lock); -+ if (buffer->vmap_cnt) { -+ buffer->vmap_cnt++; -+ iosys_map_set_vaddr(map, buffer->vaddr); -+ goto out; ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRDBG, ++ DDRDBG_FUNC_GET_REGISTERS_INFO, ++ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); ++ if (res.a0) { ++ seq_printf(m, "rockchip_sip_config_dram_debug error:%lx\n", ++ res.a0); ++ return -ENOMEM; + } + -+ vaddr = rk_cma_heap_do_vmap(buffer); -+ if (IS_ERR(vaddr)) { -+ ret = PTR_ERR(vaddr); -+ goto out; ++ if (!dmcdbg_data.inited_flag) { ++ seq_puts(m, "dmcdbg_data no int\n"); ++ return -EPERM; + } ++ p_regsinfo = (struct registers_info *)dmcdbg_data.share_memory; + -+ buffer->vaddr = vaddr; -+ buffer->vmap_cnt++; -+ iosys_map_set_vaddr(map, buffer->vaddr); -+out: -+ mutex_unlock(&buffer->lock); ++ seq_printf(m, ++ "registers base address information:\n" ++ "\n" ++ ); + -+ return ret; ++ for (i = 0; i < p_regsinfo->regs_num; i++) { ++ seq_printf(m, ++ "%s=0x%x\n" ++ , ++ p_regsinfo->regs[i].regs_name, ++ p_regsinfo->regs[i].regs_addr ++ ); ++ } ++ ++ return 0; +} + -+static void rk_cma_heap_vunmap(struct dma_buf *dmabuf, struct iosys_map *map) ++static int regsinfo_proc_open(struct inode *inode, struct file *file) +{ -+ struct rk_cma_heap_buffer *buffer = dmabuf->priv; -+ -+ mutex_lock(&buffer->lock); -+ if (!--buffer->vmap_cnt) { -+ vunmap(buffer->vaddr); -+ buffer->vaddr = NULL; -+ } -+ mutex_unlock(&buffer->lock); -+ iosys_map_clear(map); ++ return single_open(file, regsinfo_proc_show, NULL); +} + -+static void rk_cma_heap_remove_dmabuf_list(struct dma_buf *dmabuf) ++static const struct file_operations regsinfo_proc_fops = { ++ .open = regsinfo_proc_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++static int proc_regsinfo_init(void) +{ -+ struct rk_dma_heap_dmabuf *buf; -+ struct rk_cma_heap_buffer *buffer = dmabuf->priv; -+ struct rk_cma_heap *cma_heap = buffer->heap; -+ struct rk_dma_heap *heap = cma_heap->heap; ++ /* create dmcinfo file */ ++ proc_create(PROC_DMCDBG_REGS_INFO, 0644, proc_dmcdbg_dir, ++ ®sinfo_proc_fops); + -+ mutex_lock(&heap->dmabuf_lock); -+ list_for_each_entry(buf, &heap->dmabuf_list, node) { -+ if (buf->dmabuf == dmabuf) { -+ dma_heap_print("<%s> free dmabuf@[%pa-%pa] to heap-<%s>\n", -+ dmabuf->name, -+ dmabuf->file->f_inode->i_ino, -+ &buf->start, &buf->end, -+ rk_dma_heap_get_name(heap)); -+ list_del(&buf->node); -+ kfree(buf); -+ break; -+ } -+ } -+ mutex_unlock(&heap->dmabuf_lock); ++ return 0; +} + -+static int rk_cma_heap_add_dmabuf_list(struct dma_buf *dmabuf, const char *name) ++static void rv1126_get_skew_parameter(void) +{ -+ struct rk_dma_heap_dmabuf *buf; -+ struct rk_cma_heap_buffer *buffer = dmabuf->priv; -+ struct rk_cma_heap *cma_heap = buffer->heap; -+ struct rk_dma_heap *heap = cma_heap->heap; -+ -+ buf = kzalloc(sizeof(*buf), GFP_KERNEL); -+ if (!buf) -+ return -ENOMEM; ++ struct skew_info_rv1126 *p_skew; ++ u32 i; + -+ INIT_LIST_HEAD(&buf->node); -+ buf->dmabuf = dmabuf; -+ buf->start = buffer->phys; -+ buf->end = buf->start + buffer->len - 1; -+ mutex_lock(&heap->dmabuf_lock); -+ list_add_tail(&buf->node, &heap->dmabuf_list); -+ mutex_unlock(&heap->dmabuf_lock); ++ /* get skew parameters */ ++ p_skew = (struct skew_info_rv1126 *)dmcdbg_data.share_memory; ++ dmcdbg_data.skew_group_num = 5; + -+ dma_heap_print("<%s> alloc dmabuf@[%pa-%pa] from heap-<%s>\n", -+ dmabuf->name, dmabuf->file->f_inode->i_ino, -+ &buf->start, &buf->end, rk_dma_heap_get_name(heap)); ++ /* ca_skew parameters */ ++ dmcdbg_data.skew_group[0].p_skew_info = (unsigned int *)p_skew->ca_skew; ++ dmcdbg_data.skew_group[0].skew_num = ARRAY_SIZE(rv1126_dts_ca_timing); ++ for (i = 0; i < dmcdbg_data.skew_group[0].skew_num; i++) ++ dmcdbg_data.skew_group[0].p_skew_timing[i] = ++ (char *)rv1126_dts_ca_timing[i]; ++ dmcdbg_data.skew_group[0].note = ++ "(ca_skew: ddr4(pad_name)_ddr3_lpddr3_lpddr4_de-skew)"; + -+ return 0; -+} ++ /* cs0_a_skew parameters */ ++ dmcdbg_data.skew_group[1].p_skew_info = (unsigned int *)p_skew->cs0_a_skew; ++ dmcdbg_data.skew_group[1].skew_num = ARRAY_SIZE(rv1126_dts_cs0_a_timing); ++ for (i = 0; i < dmcdbg_data.skew_group[1].skew_num; i++) ++ dmcdbg_data.skew_group[1].p_skew_timing[i] = ++ (char *)rv1126_dts_cs0_a_timing[i]; ++ dmcdbg_data.skew_group[1].note = "(cs0_a_skew)"; + -+static int rk_cma_heap_remove_contig_list(struct rk_dma_heap *heap, -+ struct page *page, const char *name) -+{ -+ struct rk_dma_heap_contig_buf *buf; ++ /* cs0_b_skew parameters */ ++ dmcdbg_data.skew_group[2].p_skew_info = (unsigned int *)p_skew->cs0_b_skew; ++ dmcdbg_data.skew_group[2].skew_num = ARRAY_SIZE(rv1126_dts_cs0_b_timing); ++ for (i = 0; i < dmcdbg_data.skew_group[2].skew_num; i++) ++ dmcdbg_data.skew_group[2].p_skew_timing[i] = ++ (char *)rv1126_dts_cs0_b_timing[i]; ++ dmcdbg_data.skew_group[2].note = "(cs0_b_skew)"; + -+ mutex_lock(&heap->contig_lock); -+ list_for_each_entry(buf, &heap->contig_list, node) { -+ if (buf->start == page_to_phys(page)) { -+ dma_heap_print("<%s> free contig-buf@[%pa-%pa] to heap-<%s>\n", -+ buf->orig_alloc, &buf->start, &buf->end, -+ rk_dma_heap_get_name(heap)); -+ list_del(&buf->node); -+ kfree(buf->orig_alloc); -+ kfree(buf); -+ break; -+ } -+ } -+ mutex_unlock(&heap->contig_lock); ++ /* cs1_a_skew parameters */ ++ dmcdbg_data.skew_group[3].p_skew_info = (unsigned int *)p_skew->cs1_a_skew; ++ dmcdbg_data.skew_group[3].skew_num = ARRAY_SIZE(rv1126_dts_cs1_a_timing); ++ for (i = 0; i < dmcdbg_data.skew_group[3].skew_num; i++) ++ dmcdbg_data.skew_group[3].p_skew_timing[i] = ++ (char *)rv1126_dts_cs1_a_timing[i]; ++ dmcdbg_data.skew_group[3].note = "(cs1_a_skew)"; + -+ return 0; ++ /* cs1_b_skew parameters */ ++ dmcdbg_data.skew_group[4].p_skew_info = (unsigned int *)p_skew->cs1_b_skew; ++ dmcdbg_data.skew_group[4].skew_num = ARRAY_SIZE(rv1126_dts_cs1_b_timing); ++ for (i = 0; i < dmcdbg_data.skew_group[3].skew_num; i++) ++ dmcdbg_data.skew_group[4].p_skew_timing[i] = ++ (char *)rv1126_dts_cs1_b_timing[i]; ++ dmcdbg_data.skew_group[4].note = "(cs1_b_skew)"; +} + -+static int rk_cma_heap_add_contig_list(struct rk_dma_heap *heap, -+ struct page *page, unsigned long size, -+ const char *name) ++static __maybe_unused int rv1126_dmcdbg_init(struct platform_device *pdev, ++ struct rockchip_dmcdbg *dmcdbg) +{ -+ struct rk_dma_heap_contig_buf *buf; -+ const char *name_tmp; -+ -+ buf = kzalloc(sizeof(*buf), GFP_KERNEL); -+ if (!buf) -+ return -ENOMEM; ++ struct arm_smccc_res res; + -+ INIT_LIST_HEAD(&buf->node); -+ if (!name) -+ name_tmp = current->comm; -+ else -+ name_tmp = name; ++ /* check ddr_debug_func version */ ++ res = sip_smc_dram(0, DDRDBG_FUNC_GET_VERSION, ++ ROCKCHIP_SIP_CONFIG_DRAM_DEBUG); ++ dev_notice(&pdev->dev, "current ATF ddr_debug_func version 0x%lx.\n", ++ res.a1); ++ /* ++ * [15:8] major version, [7:0] minor version ++ * major version must match both kernel dmcdbg and ATF ddr_debug_func. ++ */ ++ if (res.a0 || res.a1 < 0x101 || ((res.a1 & 0xff00) != 0x100)) { ++ dev_err(&pdev->dev, ++ "version invalid,need update,the major version unmatch!\n"); ++ return -ENXIO; ++ } + -+ buf->orig_alloc = kstrndup(name_tmp, RK_DMA_HEAP_NAME_LEN, GFP_KERNEL); -+ if (!buf->orig_alloc) { -+ kfree(buf); ++ /* request share memory for pass parameter */ ++ res = sip_smc_request_share_mem(DMCDBG_PAGE_NUMS, ++ SHARE_PAGE_TYPE_DDRDBG); ++ if (res.a0 != 0) { ++ dev_err(&pdev->dev, "request share mem error\n"); + return -ENOMEM; + } + -+ buf->start = page_to_phys(page); -+ buf->end = buf->start + size - 1; -+ mutex_lock(&heap->contig_lock); -+ list_add_tail(&buf->node, &heap->contig_list); -+ mutex_unlock(&heap->contig_lock); -+ -+ dma_heap_print("<%s> alloc contig-buf@[%pa-%pa] from heap-<%s>\n", -+ buf->orig_alloc, &buf->start, &buf->end, -+ rk_dma_heap_get_name(heap)); -+ -+ return 0; -+} ++ dmcdbg_data.share_memory = (void __iomem *)res.a1; ++ dmcdbg_data.inited_flag = 1; + -+static void rk_cma_heap_dma_buf_release(struct dma_buf *dmabuf) -+{ -+ struct rk_cma_heap_buffer *buffer = dmabuf->priv; -+ struct rk_cma_heap *cma_heap = buffer->heap; -+ struct rk_dma_heap *heap = cma_heap->heap; ++ rv1126_get_skew_parameter(); + -+ if (buffer->vmap_cnt > 0) { -+ WARN(1, "%s: buffer still mapped in the kernel\n", __func__); -+ vunmap(buffer->vaddr); ++ /* create parent dir in /proc */ ++ proc_dmcdbg_dir = proc_mkdir(PROC_DMCDBG_DIR_NAME, NULL); ++ if (!proc_dmcdbg_dir) { ++ dev_err(&pdev->dev, "create proc dir error!"); ++ return -ENOENT; + } + -+ rk_cma_heap_remove_dmabuf_list(dmabuf); -+ -+ /* free page list */ -+ kfree(buffer->pages); -+ /* release memory */ -+ cma_release(cma_heap->cma, buffer->cma_pages, buffer->pagecount); -+ rk_dma_heap_total_dec(heap, buffer->len); -+ -+ kfree(buffer); ++ proc_dmcinfo_init(); ++ proc_powersave_init(); ++ proc_drvodt_init(); ++ proc_skew_init(); ++ proc_regsinfo_init(); ++ return 0; +} + -+static const struct dma_buf_ops rk_cma_heap_buf_ops = { -+ .cache_sgt_mapping = true, -+ .attach = rk_cma_heap_attach, -+ .detach = rk_cma_heap_detach, -+ .map_dma_buf = rk_cma_heap_map_dma_buf, -+ .unmap_dma_buf = rk_cma_heap_unmap_dma_buf, -+ .begin_cpu_access = rk_cma_heap_dma_buf_begin_cpu_access, -+ .end_cpu_access = rk_cma_heap_dma_buf_end_cpu_access, -+#ifdef CONFIG_DMABUF_PARTIAL -+ .begin_cpu_access_partial = rk_cma_heap_dma_buf_begin_cpu_access_partial, -+ .end_cpu_access_partial = rk_cma_heap_dma_buf_end_cpu_access_partial, -+#endif -+ .mmap = rk_cma_heap_mmap, -+ .vmap = rk_cma_heap_vmap, -+ .vunmap = rk_cma_heap_vunmap, -+ .release = rk_cma_heap_dma_buf_release, ++static const struct of_device_id rockchip_dmcdbg_of_match[] = { ++ { .compatible = "rockchip,rv1126-dmcdbg", .data = rv1126_dmcdbg_init}, ++ { }, +}; ++MODULE_DEVICE_TABLE(of, rockchip_dmcdbg_of_match); + -+static struct dma_buf *rk_cma_heap_allocate(struct rk_dma_heap *heap, -+ unsigned long len, -+ unsigned long fd_flags, -+ unsigned long heap_flags, -+ const char *name) ++static int rockchip_dmcdbg_probe(struct platform_device *pdev) +{ -+ struct rk_cma_heap *cma_heap = rk_dma_heap_get_drvdata(heap); -+ struct rk_cma_heap_buffer *buffer; -+ DEFINE_DMA_BUF_EXPORT_INFO(exp_info); -+ size_t size = PAGE_ALIGN(len); -+ pgoff_t pagecount = size >> PAGE_SHIFT; -+ unsigned long align = get_order(size); -+ struct page *cma_pages; -+ struct dma_buf *dmabuf; -+ pgoff_t pg; -+ int ret = -ENOMEM; -+ -+ buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); -+ if (!buffer) -+ return ERR_PTR(-ENOMEM); -+ -+ INIT_LIST_HEAD(&buffer->attachments); -+ mutex_init(&buffer->lock); -+ buffer->len = size; -+ -+ if (align > CONFIG_DMABUF_HEAPS_ROCKCHIP_CMA_ALIGNMENT) -+ align = CONFIG_DMABUF_HEAPS_ROCKCHIP_CMA_ALIGNMENT; -+ -+ cma_pages = cma_alloc(cma_heap->cma, pagecount, align, GFP_KERNEL); -+ if (!cma_pages) -+ goto free_buffer; ++ struct device *dev = &pdev->dev; ++ struct rockchip_dmcdbg *data; ++ const struct of_device_id *match; ++ int (*init)(struct platform_device *pdev, ++ struct rockchip_dmcdbg *data); ++ int ret = 0; + -+ /* Clear the cma pages */ -+ if (PageHighMem(cma_pages)) { -+ unsigned long nr_clear_pages = pagecount; -+ struct page *page = cma_pages; ++ data = devm_kzalloc(dev, sizeof(struct rockchip_dmcdbg), GFP_KERNEL); ++ if (!data) ++ return -ENOMEM; + -+ while (nr_clear_pages > 0) { -+ void *vaddr = kmap_atomic(page); ++ data->dev = dev; + -+ memset(vaddr, 0, PAGE_SIZE); -+ kunmap_atomic(vaddr); -+ /* -+ * Avoid wasting time zeroing memory if the process -+ * has been killed by SIGKILL -+ */ -+ if (fatal_signal_pending(current)) -+ goto free_cma; -+ page++; -+ nr_clear_pages--; ++ /* match soc chip init */ ++ match = of_match_node(rockchip_dmcdbg_of_match, pdev->dev.of_node); ++ if (match) { ++ init = match->data; ++ if (init) { ++ if (init(pdev, data)) ++ return -EINVAL; + } -+ } else { -+ memset(page_address(cma_pages), 0, size); -+ } -+ -+ buffer->pages = kmalloc_array(pagecount, sizeof(*buffer->pages), -+ GFP_KERNEL); -+ if (!buffer->pages) { -+ ret = -ENOMEM; -+ goto free_cma; + } + -+ for (pg = 0; pg < pagecount; pg++) -+ buffer->pages[pg] = &cma_pages[pg]; -+ -+ buffer->cma_pages = cma_pages; -+ buffer->heap = cma_heap; -+ buffer->pagecount = pagecount; -+ -+ /* create the dmabuf */ -+ exp_info.exp_name = rk_dma_heap_get_name(heap); -+ exp_info.ops = &rk_cma_heap_buf_ops; -+ exp_info.size = buffer->len; -+ exp_info.flags = fd_flags; -+ exp_info.priv = buffer; -+ dmabuf = dma_buf_export(&exp_info); -+ if (IS_ERR(dmabuf)) { -+ ret = PTR_ERR(dmabuf); -+ goto free_pages; -+ } ++ return ret; ++} + -+ buffer->phys = page_to_phys(cma_pages); -+ dma_sync_single_for_cpu(rk_dma_heap_get_dev(heap), buffer->phys, -+ buffer->pagecount * PAGE_SIZE, -+ DMA_FROM_DEVICE); ++static struct platform_driver rockchip_dmcdbg_driver = { ++ .probe = rockchip_dmcdbg_probe, ++ .driver = { ++ .name = "rockchip,dmcdbg", ++ .of_match_table = rockchip_dmcdbg_of_match, ++ }, ++}; ++module_platform_driver(rockchip_dmcdbg_driver); + -+ ret = rk_cma_heap_add_dmabuf_list(dmabuf, name); -+ if (ret) -+ goto fail_dma_buf; ++MODULE_LICENSE("GPL v2"); ++MODULE_AUTHOR("YouMin Chen "); ++MODULE_DESCRIPTION("rockchip dmc debug driver with devfreq framework"); +diff --git a/drivers/devfreq/rockchip_dmc_timing.h b/drivers/devfreq/rockchip_dmc_timing.h +new file mode 100644 +index 000000000..2f7b7774c +--- /dev/null ++++ b/drivers/devfreq/rockchip_dmc_timing.h +@@ -0,0 +1,1231 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Copyright (c) 2020, Rockchip Electronics Co., Ltd. ++ */ + -+ rk_dma_heap_total_inc(heap, buffer->len); ++#ifndef __ROCKCHIP_DMC_TIMING_H__ ++#define __ROCKCHIP_DMC_TIMING_H__ + -+ return dmabuf; ++/* hope this define can adapt all future platfor */ ++static const char * const px30_dts_timing[] = { ++ "ddr2_speed_bin", ++ "ddr3_speed_bin", ++ "ddr4_speed_bin", ++ "pd_idle", ++ "sr_idle", ++ "sr_mc_gate_idle", ++ "srpd_lite_idle", ++ "standby_idle", + -+fail_dma_buf: -+ dma_buf_put(dmabuf); -+free_pages: -+ kfree(buffer->pages); -+free_cma: -+ cma_release(cma_heap->cma, cma_pages, pagecount); -+free_buffer: -+ kfree(buffer); ++ "auto_pd_dis_freq", ++ "auto_sr_dis_freq", ++ "ddr2_dll_dis_freq", ++ "ddr3_dll_dis_freq", ++ "ddr4_dll_dis_freq", ++ "phy_dll_dis_freq", + -+ return ERR_PTR(ret); -+} ++ "ddr2_odt_dis_freq", ++ "phy_ddr2_odt_dis_freq", ++ "ddr2_drv", ++ "ddr2_odt", ++ "phy_ddr2_ca_drv", ++ "phy_ddr2_ck_drv", ++ "phy_ddr2_dq_drv", ++ "phy_ddr2_odt", + -+static struct page *rk_cma_heap_allocate_pages(struct rk_dma_heap *heap, -+ size_t len, const char *name) -+{ -+ struct rk_cma_heap *cma_heap = rk_dma_heap_get_drvdata(heap); -+ size_t size = PAGE_ALIGN(len); -+ pgoff_t pagecount = size >> PAGE_SHIFT; -+ unsigned long align = get_order(size); -+ struct page *page; -+ int ret; ++ "ddr3_odt_dis_freq", ++ "phy_ddr3_odt_dis_freq", ++ "ddr3_drv", ++ "ddr3_odt", ++ "phy_ddr3_ca_drv", ++ "phy_ddr3_ck_drv", ++ "phy_ddr3_dq_drv", ++ "phy_ddr3_odt", + -+ if (align > CONFIG_DMABUF_HEAPS_ROCKCHIP_CMA_ALIGNMENT) -+ align = CONFIG_DMABUF_HEAPS_ROCKCHIP_CMA_ALIGNMENT; ++ "phy_lpddr2_odt_dis_freq", ++ "lpddr2_drv", ++ "phy_lpddr2_ca_drv", ++ "phy_lpddr2_ck_drv", ++ "phy_lpddr2_dq_drv", ++ "phy_lpddr2_odt", + -+ page = cma_alloc(cma_heap->cma, pagecount, align, GFP_KERNEL); -+ if (!page) -+ return ERR_PTR(-ENOMEM); ++ "lpddr3_odt_dis_freq", ++ "phy_lpddr3_odt_dis_freq", ++ "lpddr3_drv", ++ "lpddr3_odt", ++ "phy_lpddr3_ca_drv", ++ "phy_lpddr3_ck_drv", ++ "phy_lpddr3_dq_drv", ++ "phy_lpddr3_odt", + -+ ret = rk_cma_heap_add_contig_list(heap, page, size, name); -+ if (ret) { -+ cma_release(cma_heap->cma, page, pagecount); -+ return ERR_PTR(-EINVAL); -+ } ++ "lpddr4_odt_dis_freq", ++ "phy_lpddr4_odt_dis_freq", ++ "lpddr4_drv", ++ "lpddr4_dq_odt", ++ "lpddr4_ca_odt", ++ "phy_lpddr4_ca_drv", ++ "phy_lpddr4_ck_cs_drv", ++ "phy_lpddr4_dq_drv", ++ "phy_lpddr4_odt", + -+ rk_dma_heap_total_inc(heap, size); ++ "ddr4_odt_dis_freq", ++ "phy_ddr4_odt_dis_freq", ++ "ddr4_drv", ++ "ddr4_odt", ++ "phy_ddr4_ca_drv", ++ "phy_ddr4_ck_drv", ++ "phy_ddr4_dq_drv", ++ "phy_ddr4_odt", ++}; + -+ return page; -+} ++struct px30_ddr_dts_config_timing { ++ unsigned int ddr2_speed_bin; ++ unsigned int ddr3_speed_bin; ++ unsigned int ddr4_speed_bin; ++ unsigned int pd_idle; ++ unsigned int sr_idle; ++ unsigned int sr_mc_gate_idle; ++ unsigned int srpd_lite_idle; ++ unsigned int standby_idle; + -+static void rk_cma_heap_free_pages(struct rk_dma_heap *heap, -+ struct page *page, size_t len, -+ const char *name) -+{ -+ struct rk_cma_heap *cma_heap = rk_dma_heap_get_drvdata(heap); -+ pgoff_t pagecount = len >> PAGE_SHIFT; ++ unsigned int auto_pd_dis_freq; ++ unsigned int auto_sr_dis_freq; ++ /* for ddr2 only */ ++ unsigned int ddr2_dll_dis_freq; ++ /* for ddr3 only */ ++ unsigned int ddr3_dll_dis_freq; ++ /* for ddr4 only */ ++ unsigned int ddr4_dll_dis_freq; ++ unsigned int phy_dll_dis_freq; + -+ rk_cma_heap_remove_contig_list(heap, page, name); ++ unsigned int ddr2_odt_dis_freq; ++ unsigned int phy_ddr2_odt_dis_freq; ++ unsigned int ddr2_drv; ++ unsigned int ddr2_odt; ++ unsigned int phy_ddr2_ca_drv; ++ unsigned int phy_ddr2_ck_drv; ++ unsigned int phy_ddr2_dq_drv; ++ unsigned int phy_ddr2_odt; + -+ cma_release(cma_heap->cma, page, pagecount); ++ unsigned int ddr3_odt_dis_freq; ++ unsigned int phy_ddr3_odt_dis_freq; ++ unsigned int ddr3_drv; ++ unsigned int ddr3_odt; ++ unsigned int phy_ddr3_ca_drv; ++ unsigned int phy_ddr3_ck_drv; ++ unsigned int phy_ddr3_dq_drv; ++ unsigned int phy_ddr3_odt; + -+ rk_dma_heap_total_dec(heap, len); -+} ++ unsigned int phy_lpddr2_odt_dis_freq; ++ unsigned int lpddr2_drv; ++ unsigned int phy_lpddr2_ca_drv; ++ unsigned int phy_lpddr2_ck_drv; ++ unsigned int phy_lpddr2_dq_drv; ++ unsigned int phy_lpddr2_odt; + -+static const struct rk_dma_heap_ops rk_cma_heap_ops = { -+ .allocate = rk_cma_heap_allocate, -+ .alloc_contig_pages = rk_cma_heap_allocate_pages, -+ .free_contig_pages = rk_cma_heap_free_pages, -+}; ++ unsigned int lpddr3_odt_dis_freq; ++ unsigned int phy_lpddr3_odt_dis_freq; ++ unsigned int lpddr3_drv; ++ unsigned int lpddr3_odt; ++ unsigned int phy_lpddr3_ca_drv; ++ unsigned int phy_lpddr3_ck_drv; ++ unsigned int phy_lpddr3_dq_drv; ++ unsigned int phy_lpddr3_odt; + -+static int cma_procfs_show(struct seq_file *s, void *private); ++ unsigned int lpddr4_odt_dis_freq; ++ unsigned int phy_lpddr4_odt_dis_freq; ++ unsigned int lpddr4_drv; ++ unsigned int lpddr4_dq_odt; ++ unsigned int lpddr4_ca_odt; ++ unsigned int phy_lpddr4_ca_drv; ++ unsigned int phy_lpddr4_ck_cs_drv; ++ unsigned int phy_lpddr4_dq_drv; ++ unsigned int phy_lpddr4_odt; + -+static int __rk_add_cma_heap(struct cma *cma, void *data) -+{ -+ struct rk_cma_heap *cma_heap; -+ struct rk_dma_heap_export_info exp_info; ++ unsigned int ddr4_odt_dis_freq; ++ unsigned int phy_ddr4_odt_dis_freq; ++ unsigned int ddr4_drv; ++ unsigned int ddr4_odt; ++ unsigned int phy_ddr4_ca_drv; ++ unsigned int phy_ddr4_ck_drv; ++ unsigned int phy_ddr4_dq_drv; ++ unsigned int phy_ddr4_odt; + -+ cma_heap = kzalloc(sizeof(*cma_heap), GFP_KERNEL); -+ if (!cma_heap) -+ return -ENOMEM; -+ cma_heap->cma = cma; ++ unsigned int ca_skew[15]; ++ unsigned int cs0_skew[44]; ++ unsigned int cs1_skew[44]; + -+ exp_info.name = cma_get_name(cma); -+ exp_info.ops = &rk_cma_heap_ops; -+ exp_info.priv = cma_heap; -+ exp_info.support_cma = true; ++ unsigned int available; ++}; + -+ cma_heap->heap = rk_dma_heap_add(&exp_info); -+ if (IS_ERR(cma_heap->heap)) { -+ int ret = PTR_ERR(cma_heap->heap); -+ -+ kfree(cma_heap); -+ return ret; -+ } ++static const char * const rk1808_dts_ca_timing[] = { ++ "a0_ddr3a9_de-skew", ++ "a1_ddr3a14_de-skew", ++ "a2_ddr3a13_de-skew", ++ "a3_ddr3a11_de-skew", ++ "a4_ddr3a2_de-skew", ++ "a5_ddr3a4_de-skew", ++ "a6_ddr3a3_de-skew", ++ "a7_ddr3a6_de-skew", ++ "a8_ddr3a5_de-skew", ++ "a9_ddr3a1_de-skew", ++ "a10_ddr3a0_de-skew", ++ "a11_ddr3a7_de-skew", ++ "a12_ddr3casb_de-skew", ++ "a13_ddr3a8_de-skew", ++ "a14_ddr3odt0_de-skew", ++ "a15_ddr3ba1_de-skew", ++ "a16_ddr3rasb_de-skew", ++ "a17_ddr3null_de-skew", ++ "ba0_ddr3ba2_de-skew", ++ "ba1_ddr3a12_de-skew", ++ "bg0_ddr3ba0_de-skew", ++ "bg1_ddr3web_de-skew", ++ "cke_ddr3cke_de-skew", ++ "ck_ddr3ck_de-skew", ++ "ckb_ddr3ckb_de-skew", ++ "csb0_ddr3a10_de-skew", ++ "odt0_ddr3a15_de-skew", ++ "resetn_ddr3resetn_de-skew", ++ "actn_ddr3csb0_de-skew", ++ "csb1_ddr3csb1_de-skew", ++ "odt1_ddr3odt1_de-skew", ++}; + -+ if (cma_heap->heap->procfs) -+ proc_create_single_data("alloc_bitmap", 0, cma_heap->heap->procfs, -+ cma_procfs_show, cma); ++static const char * const rk1808_dts_cs0_a_timing[] = { ++ "cs0_dm0_rx_de-skew", ++ "cs0_dm0_tx_de-skew", ++ "cs0_dq0_rx_de-skew", ++ "cs0_dq0_tx_de-skew", ++ "cs0_dq1_rx_de-skew", ++ "cs0_dq1_tx_de-skew", ++ "cs0_dq2_rx_de-skew", ++ "cs0_dq2_tx_de-skew", ++ "cs0_dq3_rx_de-skew", ++ "cs0_dq3_tx_de-skew", ++ "cs0_dq4_rx_de-skew", ++ "cs0_dq4_tx_de-skew", ++ "cs0_dq5_rx_de-skew", ++ "cs0_dq5_tx_de-skew", ++ "cs0_dq6_rx_de-skew", ++ "cs0_dq6_tx_de-skew", ++ "cs0_dq7_rx_de-skew", ++ "cs0_dq7_tx_de-skew", ++ "cs0_dqs0p_rx_de-skew", ++ "cs0_dqs0p_tx_de-skew", ++ "cs0_dqs0n_tx_de-skew", ++ "cs0_dm1_rx_de-skew", ++ "cs0_dm1_tx_de-skew", ++ "cs0_dq8_rx_de-skew", ++ "cs0_dq8_tx_de-skew", ++ "cs0_dq9_rx_de-skew", ++ "cs0_dq9_tx_de-skew", ++ "cs0_dq10_rx_de-skew", ++ "cs0_dq10_tx_de-skew", ++ "cs0_dq11_rx_de-skew", ++ "cs0_dq11_tx_de-skew", ++ "cs0_dq12_rx_de-skew", ++ "cs0_dq12_tx_de-skew", ++ "cs0_dq13_rx_de-skew", ++ "cs0_dq13_tx_de-skew", ++ "cs0_dq14_rx_de-skew", ++ "cs0_dq14_tx_de-skew", ++ "cs0_dq15_rx_de-skew", ++ "cs0_dq15_tx_de-skew", ++ "cs0_dqs1p_rx_de-skew", ++ "cs0_dqs1p_tx_de-skew", ++ "cs0_dqs1n_tx_de-skew", ++ "cs0_dqs0n_rx_de-skew", ++ "cs0_dqs1n_rx_de-skew", ++}; + -+ return 0; -+} ++static const char * const rk1808_dts_cs0_b_timing[] = { ++ "cs0_dm2_rx_de-skew", ++ "cs0_dm2_tx_de-skew", ++ "cs0_dq16_rx_de-skew", ++ "cs0_dq16_tx_de-skew", ++ "cs0_dq17_rx_de-skew", ++ "cs0_dq17_tx_de-skew", ++ "cs0_dq18_rx_de-skew", ++ "cs0_dq18_tx_de-skew", ++ "cs0_dq19_rx_de-skew", ++ "cs0_dq19_tx_de-skew", ++ "cs0_dq20_rx_de-skew", ++ "cs0_dq20_tx_de-skew", ++ "cs0_dq21_rx_de-skew", ++ "cs0_dq21_tx_de-skew", ++ "cs0_dq22_rx_de-skew", ++ "cs0_dq22_tx_de-skew", ++ "cs0_dq23_rx_de-skew", ++ "cs0_dq23_tx_de-skew", ++ "cs0_dqs2p_rx_de-skew", ++ "cs0_dqs2p_tx_de-skew", ++ "cs0_dqs2n_tx_de-skew", ++ "cs0_dm3_rx_de-skew", ++ "cs0_dm3_tx_de-skew", ++ "cs0_dq24_rx_de-skew", ++ "cs0_dq24_tx_de-skew", ++ "cs0_dq25_rx_de-skew", ++ "cs0_dq25_tx_de-skew", ++ "cs0_dq26_rx_de-skew", ++ "cs0_dq26_tx_de-skew", ++ "cs0_dq27_rx_de-skew", ++ "cs0_dq27_tx_de-skew", ++ "cs0_dq28_rx_de-skew", ++ "cs0_dq28_tx_de-skew", ++ "cs0_dq29_rx_de-skew", ++ "cs0_dq29_tx_de-skew", ++ "cs0_dq30_rx_de-skew", ++ "cs0_dq30_tx_de-skew", ++ "cs0_dq31_rx_de-skew", ++ "cs0_dq31_tx_de-skew", ++ "cs0_dqs3p_rx_de-skew", ++ "cs0_dqs3p_tx_de-skew", ++ "cs0_dqs3n_tx_de-skew", ++ "cs0_dqs2n_rx_de-skew", ++ "cs0_dqs3n_rx_de-skew", ++}; + -+static int __init rk_add_default_cma_heap(void) -+{ -+ struct cma *cma = rk_dma_heap_get_cma(); ++static const char * const rk1808_dts_cs1_a_timing[] = { ++ "cs1_dm0_rx_de-skew", ++ "cs1_dm0_tx_de-skew", ++ "cs1_dq0_rx_de-skew", ++ "cs1_dq0_tx_de-skew", ++ "cs1_dq1_rx_de-skew", ++ "cs1_dq1_tx_de-skew", ++ "cs1_dq2_rx_de-skew", ++ "cs1_dq2_tx_de-skew", ++ "cs1_dq3_rx_de-skew", ++ "cs1_dq3_tx_de-skew", ++ "cs1_dq4_rx_de-skew", ++ "cs1_dq4_tx_de-skew", ++ "cs1_dq5_rx_de-skew", ++ "cs1_dq5_tx_de-skew", ++ "cs1_dq6_rx_de-skew", ++ "cs1_dq6_tx_de-skew", ++ "cs1_dq7_rx_de-skew", ++ "cs1_dq7_tx_de-skew", ++ "cs1_dqs0p_rx_de-skew", ++ "cs1_dqs0p_tx_de-skew", ++ "cs1_dqs0n_tx_de-skew", ++ "cs1_dm1_rx_de-skew", ++ "cs1_dm1_tx_de-skew", ++ "cs1_dq8_rx_de-skew", ++ "cs1_dq8_tx_de-skew", ++ "cs1_dq9_rx_de-skew", ++ "cs1_dq9_tx_de-skew", ++ "cs1_dq10_rx_de-skew", ++ "cs1_dq10_tx_de-skew", ++ "cs1_dq11_rx_de-skew", ++ "cs1_dq11_tx_de-skew", ++ "cs1_dq12_rx_de-skew", ++ "cs1_dq12_tx_de-skew", ++ "cs1_dq13_rx_de-skew", ++ "cs1_dq13_tx_de-skew", ++ "cs1_dq14_rx_de-skew", ++ "cs1_dq14_tx_de-skew", ++ "cs1_dq15_rx_de-skew", ++ "cs1_dq15_tx_de-skew", ++ "cs1_dqs1p_rx_de-skew", ++ "cs1_dqs1p_tx_de-skew", ++ "cs1_dqs1n_tx_de-skew", ++ "cs1_dqs0n_rx_de-skew", ++ "cs1_dqs1n_rx_de-skew", ++}; + -+ if (WARN_ON(!cma)) -+ return -EINVAL; ++static const char * const rk1808_dts_cs1_b_timing[] = { ++ "cs1_dm2_rx_de-skew", ++ "cs1_dm2_tx_de-skew", ++ "cs1_dq16_rx_de-skew", ++ "cs1_dq16_tx_de-skew", ++ "cs1_dq17_rx_de-skew", ++ "cs1_dq17_tx_de-skew", ++ "cs1_dq18_rx_de-skew", ++ "cs1_dq18_tx_de-skew", ++ "cs1_dq19_rx_de-skew", ++ "cs1_dq19_tx_de-skew", ++ "cs1_dq20_rx_de-skew", ++ "cs1_dq20_tx_de-skew", ++ "cs1_dq21_rx_de-skew", ++ "cs1_dq21_tx_de-skew", ++ "cs1_dq22_rx_de-skew", ++ "cs1_dq22_tx_de-skew", ++ "cs1_dq23_rx_de-skew", ++ "cs1_dq23_tx_de-skew", ++ "cs1_dqs2p_rx_de-skew", ++ "cs1_dqs2p_tx_de-skew", ++ "cs1_dqs2n_tx_de-skew", ++ "cs1_dm3_rx_de-skew", ++ "cs1_dm3_tx_de-skew", ++ "cs1_dq24_rx_de-skew", ++ "cs1_dq24_tx_de-skew", ++ "cs1_dq25_rx_de-skew", ++ "cs1_dq25_tx_de-skew", ++ "cs1_dq26_rx_de-skew", ++ "cs1_dq26_tx_de-skew", ++ "cs1_dq27_rx_de-skew", ++ "cs1_dq27_tx_de-skew", ++ "cs1_dq28_rx_de-skew", ++ "cs1_dq28_tx_de-skew", ++ "cs1_dq29_rx_de-skew", ++ "cs1_dq29_tx_de-skew", ++ "cs1_dq30_rx_de-skew", ++ "cs1_dq30_tx_de-skew", ++ "cs1_dq31_rx_de-skew", ++ "cs1_dq31_tx_de-skew", ++ "cs1_dqs3p_rx_de-skew", ++ "cs1_dqs3p_tx_de-skew", ++ "cs1_dqs3n_tx_de-skew", ++ "cs1_dqs2n_rx_de-skew", ++ "cs1_dqs3n_rx_de-skew", ++}; + -+ return __rk_add_cma_heap(cma, NULL); -+} ++struct rk1808_ddr_dts_config_timing { ++ unsigned int ddr2_speed_bin; ++ unsigned int ddr3_speed_bin; ++ unsigned int ddr4_speed_bin; ++ unsigned int pd_idle; ++ unsigned int sr_idle; ++ unsigned int sr_mc_gate_idle; ++ unsigned int srpd_lite_idle; ++ unsigned int standby_idle; + -+#if defined(CONFIG_VIDEO_ROCKCHIP_THUNDER_BOOT_ISP) && !defined(CONFIG_INITCALL_ASYNC) -+subsys_initcall(rk_add_default_cma_heap); -+#else -+module_init(rk_add_default_cma_heap); -+#endif ++ unsigned int auto_pd_dis_freq; ++ unsigned int auto_sr_dis_freq; ++ /* for ddr2 only */ ++ unsigned int ddr2_dll_dis_freq; ++ /* for ddr3 only */ ++ unsigned int ddr3_dll_dis_freq; ++ /* for ddr4 only */ ++ unsigned int ddr4_dll_dis_freq; ++ unsigned int phy_dll_dis_freq; + -+static void cma_procfs_format_array(char *buf, size_t bufsize, u32 *array, int array_size) -+{ -+ int i = 0; ++ unsigned int ddr2_odt_dis_freq; ++ unsigned int phy_ddr2_odt_dis_freq; ++ unsigned int ddr2_drv; ++ unsigned int ddr2_odt; ++ unsigned int phy_ddr2_ca_drv; ++ unsigned int phy_ddr2_ck_drv; ++ unsigned int phy_ddr2_dq_drv; ++ unsigned int phy_ddr2_odt; + -+ while (--array_size >= 0) { -+ size_t len; -+ char term = (array_size && (++i % 8)) ? ' ' : '\n'; ++ unsigned int ddr3_odt_dis_freq; ++ unsigned int phy_ddr3_odt_dis_freq; ++ unsigned int ddr3_drv; ++ unsigned int ddr3_odt; ++ unsigned int phy_ddr3_ca_drv; ++ unsigned int phy_ddr3_ck_drv; ++ unsigned int phy_ddr3_dq_drv; ++ unsigned int phy_ddr3_odt; + -+ len = snprintf(buf, bufsize, "%08X%c", *array++, term); -+ buf += len; -+ bufsize -= len; -+ } -+} ++ unsigned int phy_lpddr2_odt_dis_freq; ++ unsigned int lpddr2_drv; ++ unsigned int phy_lpddr2_ca_drv; ++ unsigned int phy_lpddr2_ck_drv; ++ unsigned int phy_lpddr2_dq_drv; ++ unsigned int phy_lpddr2_odt; + -+static void cma_procfs_show_bitmap(struct seq_file *s, struct cma *cma) -+{ -+ int elements = DIV_ROUND_UP(cma_bitmap_maxno(cma), BITS_PER_BYTE * sizeof(u32)); -+ int size = elements * 9; -+ u32 *array = (u32 *)cma->bitmap; -+ char *buf; ++ unsigned int lpddr3_odt_dis_freq; ++ unsigned int phy_lpddr3_odt_dis_freq; ++ unsigned int lpddr3_drv; ++ unsigned int lpddr3_odt; ++ unsigned int phy_lpddr3_ca_drv; ++ unsigned int phy_lpddr3_ck_drv; ++ unsigned int phy_lpddr3_dq_drv; ++ unsigned int phy_lpddr3_odt; + -+ buf = kmalloc(size + 1, GFP_KERNEL); -+ if (!buf) -+ return; ++ unsigned int lpddr4_odt_dis_freq; ++ unsigned int phy_lpddr4_odt_dis_freq; ++ unsigned int lpddr4_drv; ++ unsigned int lpddr4_dq_odt; ++ unsigned int lpddr4_ca_odt; ++ unsigned int phy_lpddr4_ca_drv; ++ unsigned int phy_lpddr4_ck_cs_drv; ++ unsigned int phy_lpddr4_dq_drv; ++ unsigned int phy_lpddr4_odt; + -+ buf[size] = 0; ++ unsigned int ddr4_odt_dis_freq; ++ unsigned int phy_ddr4_odt_dis_freq; ++ unsigned int ddr4_drv; ++ unsigned int ddr4_odt; ++ unsigned int phy_ddr4_ca_drv; ++ unsigned int phy_ddr4_ck_drv; ++ unsigned int phy_ddr4_dq_drv; ++ unsigned int phy_ddr4_odt; + -+ cma_procfs_format_array(buf, size + 1, array, elements); -+ seq_printf(s, "%s", buf); -+ kfree(buf); -+} ++ unsigned int ca_de_skew[31]; ++ unsigned int cs0_a_de_skew[44]; ++ unsigned int cs0_b_de_skew[44]; ++ unsigned int cs1_a_de_skew[44]; ++ unsigned int cs1_b_de_skew[44]; + -+static u64 cma_procfs_used_get(struct cma *cma) -+{ -+ unsigned long flags; -+ unsigned long used; ++ unsigned int available; ++}; + -+ spin_lock_irqsave(&cma->lock, flags); -+ used = bitmap_weight(cma->bitmap, (int)cma_bitmap_maxno(cma)); -+ spin_unlock_irqrestore(&cma->lock, flags); ++static const char * const rk3128_dts_timing[] = { ++ "ddr3_speed_bin", ++ "pd_idle", ++ "sr_idle", ++ "auto_pd_dis_freq", ++ "auto_sr_dis_freq", ++ "ddr3_dll_dis_freq", ++ "lpddr2_dll_dis_freq", ++ "phy_dll_dis_freq", ++ "ddr3_odt_dis_freq", ++ "phy_ddr3_odt_disb_freq", ++ "ddr3_drv", ++ "ddr3_odt", ++ "phy_ddr3_clk_drv", ++ "phy_ddr3_cmd_drv", ++ "phy_ddr3_dqs_drv", ++ "phy_ddr3_odt", ++ "lpddr2_drv", ++ "phy_lpddr2_clk_drv", ++ "phy_lpddr2_cmd_drv", ++ "phy_lpddr2_dqs_drv", ++ "ddr_2t", ++}; + -+ return (u64)used << cma->order_per_bit; -+} ++struct rk3128_ddr_dts_config_timing { ++ u32 ddr3_speed_bin; ++ u32 pd_idle; ++ u32 sr_idle; ++ u32 auto_pd_dis_freq; ++ u32 auto_sr_dis_freq; ++ u32 ddr3_dll_dis_freq; ++ u32 lpddr2_dll_dis_freq; ++ u32 phy_dll_dis_freq; ++ u32 ddr3_odt_dis_freq; ++ u32 phy_ddr3_odt_disb_freq; ++ u32 ddr3_drv; ++ u32 ddr3_odt; ++ u32 phy_ddr3_clk_drv; ++ u32 phy_ddr3_cmd_drv; ++ u32 phy_ddr3_dqs_drv; ++ u32 phy_ddr3_odt; ++ u32 lpddr2_drv; ++ u32 phy_lpddr2_clk_drv; ++ u32 phy_lpddr2_cmd_drv; ++ u32 phy_lpddr2_dqs_drv; ++ u32 ddr_2t; ++ u32 available; ++}; + -+static int cma_procfs_show(struct seq_file *s, void *private) -+{ -+ struct cma *cma = s->private; -+ u64 used = cma_procfs_used_get(cma); ++static const char * const rk3228_dts_timing[] = { ++ "dram_spd_bin", ++ "sr_idle", ++ "pd_idle", ++ "dram_dll_disb_freq", ++ "phy_dll_disb_freq", ++ "dram_odt_disb_freq", ++ "phy_odt_disb_freq", ++ "ddr3_drv", ++ "ddr3_odt", ++ "lpddr3_drv", ++ "lpddr3_odt", ++ "lpddr2_drv", ++ "phy_ddr3_clk_drv", ++ "phy_ddr3_cmd_drv", ++ "phy_ddr3_dqs_drv", ++ "phy_ddr3_odt", ++ "phy_lp23_clk_drv", ++ "phy_lp23_cmd_drv", ++ "phy_lp23_dqs_drv", ++ "phy_lp3_odt" ++}; + -+ seq_printf(s, "Total: %lu KiB\n", cma->count << (PAGE_SHIFT - 10)); -+ seq_printf(s, " Used: %llu KiB\n\n", used << (PAGE_SHIFT - 10)); ++struct rk3228_ddr_dts_config_timing { ++ u32 dram_spd_bin; ++ u32 sr_idle; ++ u32 pd_idle; ++ u32 dram_dll_dis_freq; ++ u32 phy_dll_dis_freq; ++ u32 dram_odt_dis_freq; ++ u32 phy_odt_dis_freq; ++ u32 ddr3_drv; ++ u32 ddr3_odt; ++ u32 lpddr3_drv; ++ u32 lpddr3_odt; ++ u32 lpddr2_drv; ++ u32 phy_ddr3_clk_drv; ++ u32 phy_ddr3_cmd_drv; ++ u32 phy_ddr3_dqs_drv; ++ u32 phy_ddr3_odt; ++ u32 phy_lp23_clk_drv; ++ u32 phy_lp23_cmd_drv; ++ u32 phy_lp23_dqs_drv; ++ u32 phy_lp3_odt; ++}; + -+ cma_procfs_show_bitmap(s, cma); ++static const char * const rk3288_dts_timing[] = { ++ "ddr3_speed_bin", ++ "pd_idle", ++ "sr_idle", + -+ return 0; -+} ++ "auto_pd_dis_freq", ++ "auto_sr_dis_freq", ++ /* for ddr3 only */ ++ "ddr3_dll_dis_freq", ++ "phy_dll_dis_freq", + -+MODULE_DESCRIPTION("RockChip DMA-BUF CMA Heap"); -+MODULE_LICENSE("GPL v2"); -diff --git a/drivers/dma-buf/rk_heaps/rk-dma-cma.c b/drivers/dma-buf/rk_heaps/rk-dma-cma.c -new file mode 100644 -index 000000000..b6521f7dc ---- /dev/null -+++ b/drivers/dma-buf/rk_heaps/rk-dma-cma.c -@@ -0,0 +1,77 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Early setup for Rockchip DMA CMA -+ * -+ * Copyright (C) 2022 Rockchip Electronics Co. Ltd. -+ * Author: Simon Xue -+ */ ++ "ddr3_odt_dis_freq", ++ "phy_ddr3_odt_dis_freq", ++ "ddr3_drv", ++ "ddr3_odt", ++ "phy_ddr3_drv", ++ "phy_ddr3_odt", + -+#include -+#include ++ "lpddr2_drv", ++ "phy_lpddr2_drv", + -+#include "rk-dma-heap.h" ++ "lpddr3_odt_dis_freq", ++ "phy_lpddr3_odt_dis_freq", ++ "lpddr3_drv", ++ "lpddr3_odt", ++ "phy_lpddr3_drv", ++ "phy_lpddr3_odt" ++}; + -+#define RK_DMA_HEAP_CMA_DEFAULT_SIZE SZ_32M ++struct rk3288_ddr_dts_config_timing { ++ unsigned int ddr3_speed_bin; ++ unsigned int pd_idle; ++ unsigned int sr_idle; + -+static unsigned long rk_dma_heap_size __initdata; -+static unsigned long rk_dma_heap_base __initdata; ++ unsigned int auto_pd_dis_freq; ++ unsigned int auto_sr_dis_freq; ++ /* for ddr3 only */ ++ unsigned int ddr3_dll_dis_freq; ++ unsigned int phy_dll_dis_freq; + -+static struct cma *rk_dma_heap_cma; ++ unsigned int ddr3_odt_dis_freq; ++ unsigned int phy_ddr3_odt_dis_freq; ++ unsigned int ddr3_drv; ++ unsigned int ddr3_odt; ++ unsigned int phy_ddr3_drv; ++ unsigned int phy_ddr3_odt; + -+static int __init early_dma_heap_cma(char *p) -+{ -+ if (!p) { -+ pr_err("Config string not provided\n"); -+ return -EINVAL; -+ } ++ unsigned int lpddr2_drv; ++ unsigned int phy_lpddr2_drv; + -+ rk_dma_heap_size = memparse(p, &p); -+ if (*p != '@') -+ return 0; ++ unsigned int lpddr3_odt_dis_freq; ++ unsigned int phy_lpddr3_odt_dis_freq; ++ unsigned int lpddr3_drv; ++ unsigned int lpddr3_odt; ++ unsigned int phy_lpddr3_drv; ++ unsigned int phy_lpddr3_odt; + -+ rk_dma_heap_base = memparse(p + 1, &p); ++ unsigned int available; ++}; + -+ return 0; -+} -+early_param("rk_dma_heap_cma", early_dma_heap_cma); ++/* hope this define can adapt all future platfor */ ++static const char * const rk3328_dts_timing[] = { ++ "ddr3_speed_bin", ++ "ddr4_speed_bin", ++ "pd_idle", ++ "sr_idle", ++ "sr_mc_gate_idle", ++ "srpd_lite_idle", ++ "standby_idle", + -+#ifndef CONFIG_DMA_CMA -+void __weak -+dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) -+{ -+} -+#endif ++ "auto_pd_dis_freq", ++ "auto_sr_dis_freq", ++ "ddr3_dll_dis_freq", ++ "ddr4_dll_dis_freq", ++ "phy_dll_dis_freq", + -+int __init rk_dma_heap_cma_setup(void) -+{ -+ unsigned long size; -+ int ret; -+ bool fix = false; ++ "ddr3_odt_dis_freq", ++ "phy_ddr3_odt_dis_freq", ++ "ddr3_drv", ++ "ddr3_odt", ++ "phy_ddr3_ca_drv", ++ "phy_ddr3_ck_drv", ++ "phy_ddr3_dq_drv", ++ "phy_ddr3_odt", + -+ if (rk_dma_heap_size) -+ size = rk_dma_heap_size; -+ else -+ size = RK_DMA_HEAP_CMA_DEFAULT_SIZE; ++ "lpddr3_odt_dis_freq", ++ "phy_lpddr3_odt_dis_freq", ++ "lpddr3_drv", ++ "lpddr3_odt", ++ "phy_lpddr3_ca_drv", ++ "phy_lpddr3_ck_drv", ++ "phy_lpddr3_dq_drv", ++ "phy_lpddr3_odt", + -+ if (rk_dma_heap_base) -+ fix = true; ++ "lpddr4_odt_dis_freq", ++ "phy_lpddr4_odt_dis_freq", ++ "lpddr4_drv", ++ "lpddr4_dq_odt", ++ "lpddr4_ca_odt", ++ "phy_lpddr4_ca_drv", ++ "phy_lpddr4_ck_cs_drv", ++ "phy_lpddr4_dq_drv", ++ "phy_lpddr4_odt", + -+ ret = cma_declare_contiguous(rk_dma_heap_base, PAGE_ALIGN(size), 0x0, -+ PAGE_SIZE, 0, fix, "rk-dma-heap-cma", -+ &rk_dma_heap_cma); -+ if (ret) -+ return ret; ++ "ddr4_odt_dis_freq", ++ "phy_ddr4_odt_dis_freq", ++ "ddr4_drv", ++ "ddr4_odt", ++ "phy_ddr4_ca_drv", ++ "phy_ddr4_ck_drv", ++ "phy_ddr4_dq_drv", ++ "phy_ddr4_odt", ++}; + -+#if !IS_ENABLED(CONFIG_CMA_INACTIVE) -+ /* Architecture specific contiguous memory fixup. */ -+ dma_contiguous_early_fixup(cma_get_base(rk_dma_heap_cma), -+ cma_get_size(rk_dma_heap_cma)); -+#endif ++static const char * const rk3328_dts_ca_timing[] = { ++ "ddr3a1_ddr4a9_de-skew", ++ "ddr3a0_ddr4a10_de-skew", ++ "ddr3a3_ddr4a6_de-skew", ++ "ddr3a2_ddr4a4_de-skew", ++ "ddr3a5_ddr4a8_de-skew", ++ "ddr3a4_ddr4a5_de-skew", ++ "ddr3a7_ddr4a11_de-skew", ++ "ddr3a6_ddr4a7_de-skew", ++ "ddr3a9_ddr4a0_de-skew", ++ "ddr3a8_ddr4a13_de-skew", ++ "ddr3a11_ddr4a3_de-skew", ++ "ddr3a10_ddr4cs0_de-skew", ++ "ddr3a13_ddr4a2_de-skew", ++ "ddr3a12_ddr4ba1_de-skew", ++ "ddr3a15_ddr4odt0_de-skew", ++ "ddr3a14_ddr4a1_de-skew", ++ "ddr3ba1_ddr4a15_de-skew", ++ "ddr3ba0_ddr4bg0_de-skew", ++ "ddr3ras_ddr4cke_de-skew", ++ "ddr3ba2_ddr4ba0_de-skew", ++ "ddr3we_ddr4bg1_de-skew", ++ "ddr3cas_ddr4a12_de-skew", ++ "ddr3ckn_ddr4ckn_de-skew", ++ "ddr3ckp_ddr4ckp_de-skew", ++ "ddr3cke_ddr4a16_de-skew", ++ "ddr3odt0_ddr4a14_de-skew", ++ "ddr3cs0_ddr4act_de-skew", ++ "ddr3reset_ddr4reset_de-skew", ++ "ddr3cs1_ddr4cs1_de-skew", ++ "ddr3odt1_ddr4odt1_de-skew", ++}; + -+ return 0; -+} ++static const char * const rk3328_dts_cs0_timing[] = { ++ "cs0_dm0_rx_de-skew", ++ "cs0_dm0_tx_de-skew", ++ "cs0_dq0_rx_de-skew", ++ "cs0_dq0_tx_de-skew", ++ "cs0_dq1_rx_de-skew", ++ "cs0_dq1_tx_de-skew", ++ "cs0_dq2_rx_de-skew", ++ "cs0_dq2_tx_de-skew", ++ "cs0_dq3_rx_de-skew", ++ "cs0_dq3_tx_de-skew", ++ "cs0_dq4_rx_de-skew", ++ "cs0_dq4_tx_de-skew", ++ "cs0_dq5_rx_de-skew", ++ "cs0_dq5_tx_de-skew", ++ "cs0_dq6_rx_de-skew", ++ "cs0_dq6_tx_de-skew", ++ "cs0_dq7_rx_de-skew", ++ "cs0_dq7_tx_de-skew", ++ "cs0_dqs0_rx_de-skew", ++ "cs0_dqs0p_tx_de-skew", ++ "cs0_dqs0n_tx_de-skew", + -+struct cma *rk_dma_heap_get_cma(void) -+{ -+ return rk_dma_heap_cma; -+} -diff --git a/drivers/dma-buf/rk_heaps/rk-dma-heap.c b/drivers/dma-buf/rk_heaps/rk-dma-heap.c -new file mode 100644 -index 000000000..d0e76edf3 ---- /dev/null -+++ b/drivers/dma-buf/rk_heaps/rk-dma-heap.c -@@ -0,0 +1,731 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Framework for userspace DMA-BUF allocations -+ * -+ * Copyright (C) 2011 Google, Inc. -+ * Copyright (C) 2019 Linaro Ltd. -+ * Copyright (C) 2022 Rockchip Electronics Co. Ltd. -+ * Author: Simon Xue -+ */ ++ "cs0_dm1_rx_de-skew", ++ "cs0_dm1_tx_de-skew", ++ "cs0_dq8_rx_de-skew", ++ "cs0_dq8_tx_de-skew", ++ "cs0_dq9_rx_de-skew", ++ "cs0_dq9_tx_de-skew", ++ "cs0_dq10_rx_de-skew", ++ "cs0_dq10_tx_de-skew", ++ "cs0_dq11_rx_de-skew", ++ "cs0_dq11_tx_de-skew", ++ "cs0_dq12_rx_de-skew", ++ "cs0_dq12_tx_de-skew", ++ "cs0_dq13_rx_de-skew", ++ "cs0_dq13_tx_de-skew", ++ "cs0_dq14_rx_de-skew", ++ "cs0_dq14_tx_de-skew", ++ "cs0_dq15_rx_de-skew", ++ "cs0_dq15_tx_de-skew", ++ "cs0_dqs1_rx_de-skew", ++ "cs0_dqs1p_tx_de-skew", ++ "cs0_dqs1n_tx_de-skew", + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ "cs0_dm2_rx_de-skew", ++ "cs0_dm2_tx_de-skew", ++ "cs0_dq16_rx_de-skew", ++ "cs0_dq16_tx_de-skew", ++ "cs0_dq17_rx_de-skew", ++ "cs0_dq17_tx_de-skew", ++ "cs0_dq18_rx_de-skew", ++ "cs0_dq18_tx_de-skew", ++ "cs0_dq19_rx_de-skew", ++ "cs0_dq19_tx_de-skew", ++ "cs0_dq20_rx_de-skew", ++ "cs0_dq20_tx_de-skew", ++ "cs0_dq21_rx_de-skew", ++ "cs0_dq21_tx_de-skew", ++ "cs0_dq22_rx_de-skew", ++ "cs0_dq22_tx_de-skew", ++ "cs0_dq23_rx_de-skew", ++ "cs0_dq23_tx_de-skew", ++ "cs0_dqs2_rx_de-skew", ++ "cs0_dqs2p_tx_de-skew", ++ "cs0_dqs2n_tx_de-skew", + -+#include "rk-dma-heap.h" ++ "cs0_dm3_rx_de-skew", ++ "cs0_dm3_tx_de-skew", ++ "cs0_dq24_rx_de-skew", ++ "cs0_dq24_tx_de-skew", ++ "cs0_dq25_rx_de-skew", ++ "cs0_dq25_tx_de-skew", ++ "cs0_dq26_rx_de-skew", ++ "cs0_dq26_tx_de-skew", ++ "cs0_dq27_rx_de-skew", ++ "cs0_dq27_tx_de-skew", ++ "cs0_dq28_rx_de-skew", ++ "cs0_dq28_tx_de-skew", ++ "cs0_dq29_rx_de-skew", ++ "cs0_dq29_tx_de-skew", ++ "cs0_dq30_rx_de-skew", ++ "cs0_dq30_tx_de-skew", ++ "cs0_dq31_rx_de-skew", ++ "cs0_dq31_tx_de-skew", ++ "cs0_dqs3_rx_de-skew", ++ "cs0_dqs3p_tx_de-skew", ++ "cs0_dqs3n_tx_de-skew", ++}; + -+#define DEVNAME "rk_dma_heap" ++static const char * const rk3328_dts_cs1_timing[] = { ++ "cs1_dm0_rx_de-skew", ++ "cs1_dm0_tx_de-skew", ++ "cs1_dq0_rx_de-skew", ++ "cs1_dq0_tx_de-skew", ++ "cs1_dq1_rx_de-skew", ++ "cs1_dq1_tx_de-skew", ++ "cs1_dq2_rx_de-skew", ++ "cs1_dq2_tx_de-skew", ++ "cs1_dq3_rx_de-skew", ++ "cs1_dq3_tx_de-skew", ++ "cs1_dq4_rx_de-skew", ++ "cs1_dq4_tx_de-skew", ++ "cs1_dq5_rx_de-skew", ++ "cs1_dq5_tx_de-skew", ++ "cs1_dq6_rx_de-skew", ++ "cs1_dq6_tx_de-skew", ++ "cs1_dq7_rx_de-skew", ++ "cs1_dq7_tx_de-skew", ++ "cs1_dqs0_rx_de-skew", ++ "cs1_dqs0p_tx_de-skew", ++ "cs1_dqs0n_tx_de-skew", + -+#define NUM_HEAP_MINORS 128 ++ "cs1_dm1_rx_de-skew", ++ "cs1_dm1_tx_de-skew", ++ "cs1_dq8_rx_de-skew", ++ "cs1_dq8_tx_de-skew", ++ "cs1_dq9_rx_de-skew", ++ "cs1_dq9_tx_de-skew", ++ "cs1_dq10_rx_de-skew", ++ "cs1_dq10_tx_de-skew", ++ "cs1_dq11_rx_de-skew", ++ "cs1_dq11_tx_de-skew", ++ "cs1_dq12_rx_de-skew", ++ "cs1_dq12_tx_de-skew", ++ "cs1_dq13_rx_de-skew", ++ "cs1_dq13_tx_de-skew", ++ "cs1_dq14_rx_de-skew", ++ "cs1_dq14_tx_de-skew", ++ "cs1_dq15_rx_de-skew", ++ "cs1_dq15_tx_de-skew", ++ "cs1_dqs1_rx_de-skew", ++ "cs1_dqs1p_tx_de-skew", ++ "cs1_dqs1n_tx_de-skew", + -+static LIST_HEAD(rk_heap_list); -+static DEFINE_MUTEX(rk_heap_list_lock); -+static dev_t rk_dma_heap_devt; -+static struct class *rk_dma_heap_class; -+static DEFINE_XARRAY_ALLOC(rk_dma_heap_minors); -+struct proc_dir_entry *proc_rk_dma_heap_dir; ++ "cs1_dm2_rx_de-skew", ++ "cs1_dm2_tx_de-skew", ++ "cs1_dq16_rx_de-skew", ++ "cs1_dq16_tx_de-skew", ++ "cs1_dq17_rx_de-skew", ++ "cs1_dq17_tx_de-skew", ++ "cs1_dq18_rx_de-skew", ++ "cs1_dq18_tx_de-skew", ++ "cs1_dq19_rx_de-skew", ++ "cs1_dq19_tx_de-skew", ++ "cs1_dq20_rx_de-skew", ++ "cs1_dq20_tx_de-skew", ++ "cs1_dq21_rx_de-skew", ++ "cs1_dq21_tx_de-skew", ++ "cs1_dq22_rx_de-skew", ++ "cs1_dq22_tx_de-skew", ++ "cs1_dq23_rx_de-skew", ++ "cs1_dq23_tx_de-skew", ++ "cs1_dqs2_rx_de-skew", ++ "cs1_dqs2p_tx_de-skew", ++ "cs1_dqs2n_tx_de-skew", + -+#define K(size) ((unsigned long)((size) >> 10)) ++ "cs1_dm3_rx_de-skew", ++ "cs1_dm3_tx_de-skew", ++ "cs1_dq24_rx_de-skew", ++ "cs1_dq24_tx_de-skew", ++ "cs1_dq25_rx_de-skew", ++ "cs1_dq25_tx_de-skew", ++ "cs1_dq26_rx_de-skew", ++ "cs1_dq26_tx_de-skew", ++ "cs1_dq27_rx_de-skew", ++ "cs1_dq27_tx_de-skew", ++ "cs1_dq28_rx_de-skew", ++ "cs1_dq28_tx_de-skew", ++ "cs1_dq29_rx_de-skew", ++ "cs1_dq29_tx_de-skew", ++ "cs1_dq30_rx_de-skew", ++ "cs1_dq30_tx_de-skew", ++ "cs1_dq31_rx_de-skew", ++ "cs1_dq31_tx_de-skew", ++ "cs1_dqs3_rx_de-skew", ++ "cs1_dqs3p_tx_de-skew", ++ "cs1_dqs3n_tx_de-skew", ++}; + -+static int rk_vmap_pfn_apply(pte_t *pte, unsigned long addr, void *private) -+{ -+ struct rk_vmap_pfn_data *data = private; ++struct rk3328_ddr_dts_config_timing { ++ unsigned int ddr3_speed_bin; ++ unsigned int ddr4_speed_bin; ++ unsigned int pd_idle; ++ unsigned int sr_idle; ++ unsigned int sr_mc_gate_idle; ++ unsigned int srpd_lite_idle; ++ unsigned int standby_idle; + -+ *pte = pte_mkspecial(pfn_pte(data->pfn++, data->prot)); -+ return 0; -+} ++ unsigned int auto_pd_dis_freq; ++ unsigned int auto_sr_dis_freq; ++ /* for ddr3 only */ ++ unsigned int ddr3_dll_dis_freq; ++ /* for ddr4 only */ ++ unsigned int ddr4_dll_dis_freq; ++ unsigned int phy_dll_dis_freq; + -+void *rk_vmap_contig_pfn(unsigned long pfn, unsigned int count, pgprot_t prot) -+{ -+ struct rk_vmap_pfn_data data = { .pfn = pfn, .prot = pgprot_nx(prot) }; -+ struct vm_struct *area; ++ unsigned int ddr3_odt_dis_freq; ++ unsigned int phy_ddr3_odt_dis_freq; ++ unsigned int ddr3_drv; ++ unsigned int ddr3_odt; ++ unsigned int phy_ddr3_ca_drv; ++ unsigned int phy_ddr3_ck_drv; ++ unsigned int phy_ddr3_dq_drv; ++ unsigned int phy_ddr3_odt; + -+ area = get_vm_area_caller(count * PAGE_SIZE, VM_MAP, -+ __builtin_return_address(0)); -+ if (!area) -+ return NULL; -+ if (apply_to_page_range(&init_mm, (unsigned long)area->addr, -+ count * PAGE_SIZE, rk_vmap_pfn_apply, &data)) { -+ free_vm_area(area); -+ return NULL; -+ } -+ return area->addr; -+} ++ unsigned int lpddr3_odt_dis_freq; ++ unsigned int phy_lpddr3_odt_dis_freq; ++ unsigned int lpddr3_drv; ++ unsigned int lpddr3_odt; ++ unsigned int phy_lpddr3_ca_drv; ++ unsigned int phy_lpddr3_ck_drv; ++ unsigned int phy_lpddr3_dq_drv; ++ unsigned int phy_lpddr3_odt; + -+int rk_dma_heap_set_dev(struct device *heap_dev) -+{ -+ int err = 0; ++ unsigned int lpddr4_odt_dis_freq; ++ unsigned int phy_lpddr4_odt_dis_freq; ++ unsigned int lpddr4_drv; ++ unsigned int lpddr4_dq_odt; ++ unsigned int lpddr4_ca_odt; ++ unsigned int phy_lpddr4_ca_drv; ++ unsigned int phy_lpddr4_ck_cs_drv; ++ unsigned int phy_lpddr4_dq_drv; ++ unsigned int phy_lpddr4_odt; + -+ if (!heap_dev) -+ return -EINVAL; ++ unsigned int ddr4_odt_dis_freq; ++ unsigned int phy_ddr4_odt_dis_freq; ++ unsigned int ddr4_drv; ++ unsigned int ddr4_odt; ++ unsigned int phy_ddr4_ca_drv; ++ unsigned int phy_ddr4_ck_drv; ++ unsigned int phy_ddr4_dq_drv; ++ unsigned int phy_ddr4_odt; + -+ dma_coerce_mask_and_coherent(heap_dev, DMA_BIT_MASK(64)); ++ unsigned int ca_skew[15]; ++ unsigned int cs0_skew[44]; ++ unsigned int cs1_skew[44]; + -+ if (!heap_dev->dma_parms) { -+ heap_dev->dma_parms = devm_kzalloc(heap_dev, -+ sizeof(*heap_dev->dma_parms), -+ GFP_KERNEL); -+ if (!heap_dev->dma_parms) -+ return -ENOMEM; ++ unsigned int available; ++}; + -+ err = dma_set_max_seg_size(heap_dev, (unsigned int)DMA_BIT_MASK(64)); -+ if (err) { -+ devm_kfree(heap_dev, heap_dev->dma_parms); -+ dev_err(heap_dev, "Failed to set DMA segment size, err:%d\n", err); -+ return err; -+ } -+ } ++struct rk3328_ddr_de_skew_setting { ++ unsigned int ca_de_skew[30]; ++ unsigned int cs0_de_skew[84]; ++ unsigned int cs1_de_skew[84]; ++}; + -+ return 0; -+} -+EXPORT_SYMBOL_GPL(rk_dma_heap_set_dev); ++struct rk3368_dram_timing { ++ u32 dram_spd_bin; ++ u32 sr_idle; ++ u32 pd_idle; ++ u32 dram_dll_dis_freq; ++ u32 phy_dll_dis_freq; ++ u32 dram_odt_dis_freq; ++ u32 phy_odt_dis_freq; ++ u32 ddr3_drv; ++ u32 ddr3_odt; ++ u32 lpddr3_drv; ++ u32 lpddr3_odt; ++ u32 lpddr2_drv; ++ u32 phy_clk_drv; ++ u32 phy_cmd_drv; ++ u32 phy_dqs_drv; ++ u32 phy_odt; ++ u32 ddr_2t; ++}; + -+struct rk_dma_heap *rk_dma_heap_find(const char *name) -+{ -+ struct rk_dma_heap *h; ++struct rk3399_dram_timing { ++ unsigned int ddr3_speed_bin; ++ unsigned int pd_idle; ++ unsigned int sr_idle; ++ unsigned int sr_mc_gate_idle; ++ unsigned int srpd_lite_idle; ++ unsigned int standby_idle; ++ unsigned int auto_lp_dis_freq; ++ unsigned int ddr3_dll_dis_freq; ++ unsigned int phy_dll_dis_freq; ++ unsigned int ddr3_odt_dis_freq; ++ unsigned int ddr3_drv; ++ unsigned int ddr3_odt; ++ unsigned int phy_ddr3_ca_drv; ++ unsigned int phy_ddr3_dq_drv; ++ unsigned int phy_ddr3_odt; ++ unsigned int lpddr3_odt_dis_freq; ++ unsigned int lpddr3_drv; ++ unsigned int lpddr3_odt; ++ unsigned int phy_lpddr3_ca_drv; ++ unsigned int phy_lpddr3_dq_drv; ++ unsigned int phy_lpddr3_odt; ++ unsigned int lpddr4_odt_dis_freq; ++ unsigned int lpddr4_drv; ++ unsigned int lpddr4_dq_odt; ++ unsigned int lpddr4_ca_odt; ++ unsigned int phy_lpddr4_ca_drv; ++ unsigned int phy_lpddr4_ck_cs_drv; ++ unsigned int phy_lpddr4_dq_drv; ++ unsigned int phy_lpddr4_odt; ++}; + -+ mutex_lock(&rk_heap_list_lock); -+ list_for_each_entry(h, &rk_heap_list, list) { -+ if (!strcmp(h->name, name)) { -+ kref_get(&h->refcount); -+ mutex_unlock(&rk_heap_list_lock); -+ return h; -+ } -+ } -+ mutex_unlock(&rk_heap_list_lock); -+ return NULL; -+} -+EXPORT_SYMBOL_GPL(rk_dma_heap_find); ++/* name rule: ddr4(pad_name)_ddr3_lpddr3_lpddr4_de-skew */ ++static const char * const rv1126_dts_ca_timing[] = { ++ "a0_a3_a3_cke1-a_de-skew", ++ "a1_ba1_null_cke0-b_de-skew", ++ "a2_a9_a9_a4-a_de-skew", ++ "a3_a15_null_a5-b_de-skew", ++ "a4_a6_a6_ck-a_de-skew", ++ "a5_a12_null_odt0-b_de-skew", ++ "a6_ba2_null_a0-a_de-skew", ++ "a7_a4_a4_odt0-a_de-skew", ++ "a8_a1_a1_cke0-a_de-skew", ++ "a9_a5_a5_a5-a_de-skew", ++ "a10_a8_a8_clkb-a_de-skew", ++ "a11_a7_a7_ca2-a_de-skew", ++ "a12_rasn_null_ca1-a_de-skew", ++ "a13_a13_null_ca3-a_de-skew", ++ "a14_a14_null_csb1-b_de-skew", ++ "a15_a10_null_ca0-b_de-skew", ++ "a16_a11_null_csb0-b_de-skew", ++ "a17_null_null_null_de-skew", ++ "ba0_csb1_csb1_csb0-a_de-skew", ++ "ba1_wen_null_cke1-b_de-skew", ++ "bg0_odt1_odt1_csb1-a_de-skew", ++ "bg1_a2_a2_odt1-a_de-skew", ++ "cke0_casb_null_ca1-b_de-skew", ++ "ck_ck_ck_ck-b_de-skew", ++ "ckb_ckb_ckb_ckb-b_de-skew", ++ "csb0_odt0_odt0_ca2-b_de-skew", ++ "odt0_csb0_csb0_ca4-b_de-skew", ++ "resetn_resetn_null-resetn_de-skew", ++ "actn_cke_cke_ca3-b_de-skew", ++ "cke1_null_null_null_de-skew", ++ "csb1_ba0_null_null_de-skew", ++ "odt1_a0_a0_odt1-b_de-skew", ++}; + -+void rk_dma_heap_buffer_free(struct dma_buf *dmabuf) -+{ -+ dma_buf_put(dmabuf); -+} -+EXPORT_SYMBOL_GPL(rk_dma_heap_buffer_free); ++static const char * const rv1126_dts_cs0_a_timing[] = { ++ "cs0_dm0_rx_de-skew", ++ "cs0_dq0_rx_de-skew", ++ "cs0_dq1_rx_de-skew", ++ "cs0_dq2_rx_de-skew", ++ "cs0_dq3_rx_de-skew", ++ "cs0_dq4_rx_de-skew", ++ "cs0_dq5_rx_de-skew", ++ "cs0_dq6_rx_de-skew", ++ "cs0_dq7_rx_de-skew", ++ "cs0_dqs0p_rx_de-skew", ++ "cs0_dqs0n_rx_de-skew", ++ "cs0_dm1_rx_de-skew", ++ "cs0_dq8_rx_de-skew", ++ "cs0_dq9_rx_de-skew", ++ "cs0_dq10_rx_de-skew", ++ "cs0_dq11_rx_de-skew", ++ "cs0_dq12_rx_de-skew", ++ "cs0_dq13_rx_de-skew", ++ "cs0_dq14_rx_de-skew", ++ "cs0_dq15_rx_de-skew", ++ "cs0_dqs1p_rx_de-skew", ++ "cs0_dqs1n_rx_de-skew", ++ "cs0_dm0_tx_de-skew", ++ "cs0_dq0_tx_de-skew", ++ "cs0_dq1_tx_de-skew", ++ "cs0_dq2_tx_de-skew", ++ "cs0_dq3_tx_de-skew", ++ "cs0_dq4_tx_de-skew", ++ "cs0_dq5_tx_de-skew", ++ "cs0_dq6_tx_de-skew", ++ "cs0_dq7_tx_de-skew", ++ "cs0_dqs0p_tx_de-skew", ++ "cs0_dqs0n_tx_de-skew", ++ "cs0_dm1_tx_de-skew", ++ "cs0_dq8_tx_de-skew", ++ "cs0_dq9_tx_de-skew", ++ "cs0_dq10_tx_de-skew", ++ "cs0_dq11_tx_de-skew", ++ "cs0_dq12_tx_de-skew", ++ "cs0_dq13_tx_de-skew", ++ "cs0_dq14_tx_de-skew", ++ "cs0_dq15_tx_de-skew", ++ "cs0_dqs1p_tx_de-skew", ++ "cs0_dqs1n_tx_de-skew", ++}; + -+struct dma_buf *rk_dma_heap_buffer_alloc(struct rk_dma_heap *heap, size_t len, -+ unsigned int fd_flags, -+ unsigned int heap_flags, -+ const char *name) -+{ -+ struct dma_buf *dmabuf; ++static const char * const rv1126_dts_cs0_b_timing[] = { ++ "cs0_dm2_rx_de-skew", ++ "cs0_dq16_rx_de-skew", ++ "cs0_dq17_rx_de-skew", ++ "cs0_dq18_rx_de-skew", ++ "cs0_dq19_rx_de-skew", ++ "cs0_dq20_rx_de-skew", ++ "cs0_dq21_rx_de-skew", ++ "cs0_dq22_rx_de-skew", ++ "cs0_dq23_rx_de-skew", ++ "cs0_dqs2p_rx_de-skew", ++ "cs0_dqs2n_rx_de-skew", ++ "cs0_dm3_rx_de-skew", ++ "cs0_dq24_rx_de-skew", ++ "cs0_dq25_rx_de-skew", ++ "cs0_dq26_rx_de-skew", ++ "cs0_dq27_rx_de-skew", ++ "cs0_dq28_rx_de-skew", ++ "cs0_dq29_rx_de-skew", ++ "cs0_dq30_rx_de-skew", ++ "cs0_dq31_rx_de-skew", ++ "cs0_dqs3p_rx_de-skew", ++ "cs0_dqs3n_rx_de-skew", ++ "cs0_dm2_tx_de-skew", ++ "cs0_dq16_tx_de-skew", ++ "cs0_dq17_tx_de-skew", ++ "cs0_dq18_tx_de-skew", ++ "cs0_dq19_tx_de-skew", ++ "cs0_dq20_tx_de-skew", ++ "cs0_dq21_tx_de-skew", ++ "cs0_dq22_tx_de-skew", ++ "cs0_dq23_tx_de-skew", ++ "cs0_dqs2p_tx_de-skew", ++ "cs0_dqs2n_tx_de-skew", ++ "cs0_dm3_tx_de-skew", ++ "cs0_dq24_tx_de-skew", ++ "cs0_dq25_tx_de-skew", ++ "cs0_dq26_tx_de-skew", ++ "cs0_dq27_tx_de-skew", ++ "cs0_dq28_tx_de-skew", ++ "cs0_dq29_tx_de-skew", ++ "cs0_dq30_tx_de-skew", ++ "cs0_dq31_tx_de-skew", ++ "cs0_dqs3p_tx_de-skew", ++ "cs0_dqs3n_tx_de-skew", ++}; + -+ if (fd_flags & ~RK_DMA_HEAP_VALID_FD_FLAGS) -+ return ERR_PTR(-EINVAL); ++static const char * const rv1126_dts_cs1_a_timing[] = { ++ "cs1_dm0_rx_de-skew", ++ "cs1_dq0_rx_de-skew", ++ "cs1_dq1_rx_de-skew", ++ "cs1_dq2_rx_de-skew", ++ "cs1_dq3_rx_de-skew", ++ "cs1_dq4_rx_de-skew", ++ "cs1_dq5_rx_de-skew", ++ "cs1_dq6_rx_de-skew", ++ "cs1_dq7_rx_de-skew", ++ "cs1_dqs0p_rx_de-skew", ++ "cs1_dqs0n_rx_de-skew", ++ "cs1_dm1_rx_de-skew", ++ "cs1_dq8_rx_de-skew", ++ "cs1_dq9_rx_de-skew", ++ "cs1_dq10_rx_de-skew", ++ "cs1_dq11_rx_de-skew", ++ "cs1_dq12_rx_de-skew", ++ "cs1_dq13_rx_de-skew", ++ "cs1_dq14_rx_de-skew", ++ "cs1_dq15_rx_de-skew", ++ "cs1_dqs1p_rx_de-skew", ++ "cs1_dqs1n_rx_de-skew", ++ "cs1_dm0_tx_de-skew", ++ "cs1_dq0_tx_de-skew", ++ "cs1_dq1_tx_de-skew", ++ "cs1_dq2_tx_de-skew", ++ "cs1_dq3_tx_de-skew", ++ "cs1_dq4_tx_de-skew", ++ "cs1_dq5_tx_de-skew", ++ "cs1_dq6_tx_de-skew", ++ "cs1_dq7_tx_de-skew", ++ "cs1_dqs0p_tx_de-skew", ++ "cs1_dqs0n_tx_de-skew", ++ "cs1_dm1_tx_de-skew", ++ "cs1_dq8_tx_de-skew", ++ "cs1_dq9_tx_de-skew", ++ "cs1_dq10_tx_de-skew", ++ "cs1_dq11_tx_de-skew", ++ "cs1_dq12_tx_de-skew", ++ "cs1_dq13_tx_de-skew", ++ "cs1_dq14_tx_de-skew", ++ "cs1_dq15_tx_de-skew", ++ "cs1_dqs1p_tx_de-skew", ++ "cs1_dqs1n_tx_de-skew", ++}; + -+ /* -+ * Allocations from all heaps have to begin -+ * and end on page boundaries. -+ */ -+ len = PAGE_ALIGN(len); -+ if (!len) -+ return ERR_PTR(-EINVAL); ++static const char * const rv1126_dts_cs1_b_timing[] = { ++ "cs1_dm2_rx_de-skew", ++ "cs1_dq16_rx_de-skew", ++ "cs1_dq17_rx_de-skew", ++ "cs1_dq18_rx_de-skew", ++ "cs1_dq19_rx_de-skew", ++ "cs1_dq20_rx_de-skew", ++ "cs1_dq21_rx_de-skew", ++ "cs1_dq22_rx_de-skew", ++ "cs1_dq23_rx_de-skew", ++ "cs1_dqs2p_rx_de-skew", ++ "cs1_dqs2n_rx_de-skew", ++ "cs1_dm3_rx_de-skew", ++ "cs1_dq24_rx_de-skew", ++ "cs1_dq25_rx_de-skew", ++ "cs1_dq26_rx_de-skew", ++ "cs1_dq27_rx_de-skew", ++ "cs1_dq28_rx_de-skew", ++ "cs1_dq29_rx_de-skew", ++ "cs1_dq30_rx_de-skew", ++ "cs1_dq31_rx_de-skew", ++ "cs1_dqs3p_rx_de-skew", ++ "cs1_dqs3n_rx_de-skew", ++ "cs1_dm2_tx_de-skew", ++ "cs1_dq16_tx_de-skew", ++ "cs1_dq17_tx_de-skew", ++ "cs1_dq18_tx_de-skew", ++ "cs1_dq19_tx_de-skew", ++ "cs1_dq20_tx_de-skew", ++ "cs1_dq21_tx_de-skew", ++ "cs1_dq22_tx_de-skew", ++ "cs1_dq23_tx_de-skew", ++ "cs1_dqs2p_tx_de-skew", ++ "cs1_dqs2n_tx_de-skew", ++ "cs1_dm3_tx_de-skew", ++ "cs1_dq24_tx_de-skew", ++ "cs1_dq25_tx_de-skew", ++ "cs1_dq26_tx_de-skew", ++ "cs1_dq27_tx_de-skew", ++ "cs1_dq28_tx_de-skew", ++ "cs1_dq29_tx_de-skew", ++ "cs1_dq30_tx_de-skew", ++ "cs1_dq31_tx_de-skew", ++ "cs1_dqs3p_tx_de-skew", ++ "cs1_dqs3n_tx_de-skew", ++}; + -+ dmabuf = heap->ops->allocate(heap, len, fd_flags, heap_flags, name); ++#endif /* __ROCKCHIP_DMC_TIMING_H__ */ + -+ if (IS_ENABLED(CONFIG_DMABUF_RK_HEAPS_DEBUG) && !IS_ERR(dmabuf)) -+ dma_buf_set_name(dmabuf, name); +diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig +index c86a4756a..82c7df8e4 100644 +--- a/drivers/dma-buf/Kconfig ++++ b/drivers/dma-buf/Kconfig +@@ -1,6 +1,39 @@ + # SPDX-License-Identifier: GPL-2.0-only + menu "DMABUF options" + ++config DMABUF_CACHE ++ bool "DMABUF cache attachment" ++ default ARCH_ROCKCHIP ++ depends on NO_GKI ++ help ++ This option support to store attachments in a list and destroy them by ++ set to a callback list in the dtor of dma-buf. + -+ return dmabuf; -+} -+EXPORT_SYMBOL_GPL(rk_dma_heap_buffer_alloc); ++config RK_DMABUF_DEBUG ++ bool "Rockchip DMABUF debug option" ++ depends on NO_GKI ++ select RK_DMABUF_PROCFS ++ help ++ This option support to debug all the dmabuf on db_list, allows to set ++ a name for dmabuf. If not sure, say N + -+int rk_dma_heap_bufferfd_alloc(struct rk_dma_heap *heap, size_t len, -+ unsigned int fd_flags, -+ unsigned int heap_flags, -+ const char *name) -+{ -+ struct dma_buf *dmabuf; -+ int fd; ++config RK_DMABUF_DEBUG_ADVANCED ++ bool "Rockchip DMABUF debug advanced option" ++ depends on RK_DMABUF_DEBUG ++ help ++ This option support to debug all the dmabuf on db_list, allows to attach ++ and map a dmabuf who has no attachment. If not sure, say N + -+ dmabuf = rk_dma_heap_buffer_alloc(heap, len, fd_flags, heap_flags, -+ name); ++config DMABUF_PARTIAL ++ bool "Support for partial cache maintenance" ++ help ++ In order to improve performance, allow dma-buf clients to ++ apply cache maintenance to only a subset of a dma-buf. + -+ if (IS_ERR(dmabuf)) -+ return PTR_ERR(dmabuf); ++ Kernel clients will be able to use the dma_buf_begin_cpu_access_partial ++ and dma_buf_end_cpu_access_partial functions to only apply cache ++ maintenance to a range within the dma-buf. + -+ fd = dma_buf_fd(dmabuf, fd_flags); -+ if (fd < 0) { -+ dma_buf_put(dmabuf); -+ /* just return, as put will call release and that will free */ -+ } + config SYNC_FILE + bool "Explicit Synchronization Framework" + default n +@@ -30,6 +63,13 @@ config SW_SYNC + WARNING: improper use of this can result in deadlocking kernel + drivers from userspace. Intended for test and debug only. + ++config SW_SYNC_DEBUG ++ bool "SW Sync Debug" ++ depends on DEBUG_FS && SW_SYNC && NO_GKI ++ default SW_SYNC ++ help ++ To get current fence point and timeline status. + -+ return fd; + config UDMABUF + bool "userspace dmabuf misc driver" + default n +@@ -103,5 +143,6 @@ menuconfig DMABUF_SYSFS_STATS + in quite some performance problems. + + source "drivers/dma-buf/heaps/Kconfig" ++source "drivers/dma-buf/rk_heaps/Kconfig" + + endmenu +diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile +index cdb3bb049..525a73a16 100644 +--- a/drivers/dma-buf/Makefile ++++ b/drivers/dma-buf/Makefile +@@ -1,12 +1,15 @@ + # SPDX-License-Identifier: GPL-2.0-only +-obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \ ++obj-y := dma-buf-rk.o dma-fence.o dma-fence-array.o dma-fence-chain.o \ + dma-fence-unwrap.o dma-resv.o +-obj-$(CONFIG_DMABUF_HEAPS) += dma-heap.o ++obj-$(CONFIG_DMABUF_HEAPS) += dma-heap-rk.o + obj-$(CONFIG_DMABUF_HEAPS) += heaps/ ++obj-$(CONFIG_DMABUF_CACHE) += dma-buf-cache.o + obj-$(CONFIG_SYNC_FILE) += sync_file.o +-obj-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o ++obj-$(CONFIG_SW_SYNC) += sw_sync.o ++obj-$(CONFIG_SW_SYNC_DEBUG) += sync_debug.o + obj-$(CONFIG_UDMABUF) += udmabuf.o + obj-$(CONFIG_DMABUF_SYSFS_STATS) += dma-buf-sysfs-stats.o ++obj-$(CONFIG_DMABUF_HEAPS_ROCKCHIP) += rk_heaps/ + + dmabuf_selftests-y := \ + selftest.o \ +diff --git a/drivers/dma-buf/dma-buf-cache.c b/drivers/dma-buf/dma-buf-cache.c +new file mode 100644 +index 000000000..5ec8896d3 +--- /dev/null ++++ b/drivers/dma-buf/dma-buf-cache.c +@@ -0,0 +1,198 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (c) 2021 Rockchip Electronics Co. Ltd. ++ */ + -+} -+EXPORT_SYMBOL_GPL(rk_dma_heap_bufferfd_alloc); ++#include ++#include ++#undef CONFIG_DMABUF_CACHE ++#include + -+struct page *rk_dma_heap_alloc_contig_pages(struct rk_dma_heap *heap, -+ size_t len, const char *name) -+{ -+ if (!heap->support_cma) { -+ WARN_ON(!heap->support_cma); -+ return ERR_PTR(-EINVAL); -+ } ++/* NOTE: dma-buf-cache APIs are not irq safe, please DO NOT run in irq context !! */ + -+ len = PAGE_ALIGN(len); -+ if (!len) -+ return ERR_PTR(-EINVAL); ++struct dma_buf_cache_list { ++ struct list_head head; ++}; + -+ return heap->ops->alloc_contig_pages(heap, len, name); -+} -+EXPORT_SYMBOL_GPL(rk_dma_heap_alloc_contig_pages); ++struct dma_buf_cache { ++ struct list_head list; ++ struct dma_buf_attachment *attach; ++ enum dma_data_direction direction; ++ struct sg_table *sg_table; ++}; + -+void rk_dma_heap_free_contig_pages(struct rk_dma_heap *heap, -+ struct page *pages, size_t len, -+ const char *name) ++static int dma_buf_cache_destructor(struct dma_buf *dmabuf, void *dtor_data) +{ -+ if (!heap->support_cma) { -+ WARN_ON(!heap->support_cma); -+ return; -+ } -+ -+ return heap->ops->free_contig_pages(heap, pages, len, name); -+} -+EXPORT_SYMBOL_GPL(rk_dma_heap_free_contig_pages); ++ struct dma_buf_cache_list *data; ++ struct dma_buf_cache *cache, *tmp; + -+void rk_dma_heap_total_inc(struct rk_dma_heap *heap, size_t len) -+{ -+ mutex_lock(&rk_heap_list_lock); -+ heap->total_size += len; -+ mutex_unlock(&rk_heap_list_lock); -+} ++ mutex_lock(&dmabuf->cache_lock); + -+void rk_dma_heap_total_dec(struct rk_dma_heap *heap, size_t len) -+{ -+ mutex_lock(&rk_heap_list_lock); -+ if (WARN_ON(heap->total_size < len)) -+ heap->total_size = 0; -+ else -+ heap->total_size -= len; -+ mutex_unlock(&rk_heap_list_lock); -+} ++ data = dmabuf->dtor_data; + -+static int rk_dma_heap_open(struct inode *inode, struct file *file) -+{ -+ struct rk_dma_heap *heap; ++ list_for_each_entry_safe(cache, tmp, &data->head, list) { ++ if (!IS_ERR_OR_NULL(cache->sg_table)) ++ dma_buf_unmap_attachment(cache->attach, ++ cache->sg_table, ++ cache->direction); + -+ heap = xa_load(&rk_dma_heap_minors, iminor(inode)); -+ if (!heap) { -+ pr_err("dma_heap: minor %d unknown.\n", iminor(inode)); -+ return -ENODEV; ++ dma_buf_detach(dmabuf, cache->attach); ++ list_del(&cache->list); ++ kfree(cache); + } + -+ /* instance data as context */ -+ file->private_data = heap; -+ nonseekable_open(inode, file); ++ mutex_unlock(&dmabuf->cache_lock); + ++ kfree(data); + return 0; +} + -+static long rk_dma_heap_ioctl_allocate(struct file *file, void *data) ++static struct dma_buf_cache * ++dma_buf_cache_get_cache(struct dma_buf_attachment *attach) +{ -+ struct rk_dma_heap_allocation_data *heap_allocation = data; -+ struct rk_dma_heap *heap = file->private_data; -+ int fd; ++ struct dma_buf_cache_list *data; ++ struct dma_buf_cache *cache; ++ struct dma_buf *dmabuf = attach->dmabuf; + -+ if (heap_allocation->fd) -+ return -EINVAL; ++ if (dmabuf->dtor != dma_buf_cache_destructor) ++ return NULL; + -+ fd = rk_dma_heap_bufferfd_alloc(heap, heap_allocation->len, -+ heap_allocation->fd_flags, -+ heap_allocation->heap_flags, NULL); -+ if (fd < 0) -+ return fd; ++ data = dmabuf->dtor_data; + -+ heap_allocation->fd = fd; ++ list_for_each_entry(cache, &data->head, list) { ++ if (cache->attach == attach) ++ return cache; ++ } + -+ return 0; ++ return NULL; +} + -+static unsigned int rk_dma_heap_ioctl_cmds[] = { -+ RK_DMA_HEAP_IOCTL_ALLOC, -+}; -+ -+static long rk_dma_heap_ioctl(struct file *file, unsigned int ucmd, -+ unsigned long arg) ++void dma_buf_cache_detach(struct dma_buf *dmabuf, ++ struct dma_buf_attachment *attach) +{ -+ char stack_kdata[128]; -+ char *kdata = stack_kdata; -+ unsigned int kcmd; -+ unsigned int in_size, out_size, drv_size, ksize; -+ int nr = _IOC_NR(ucmd); -+ int ret = 0; -+ -+ if (nr >= ARRAY_SIZE(rk_dma_heap_ioctl_cmds)) -+ return -EINVAL; ++ struct dma_buf_cache *cache; + -+ /* Get the kernel ioctl cmd that matches */ -+ kcmd = rk_dma_heap_ioctl_cmds[nr]; ++ mutex_lock(&dmabuf->cache_lock); + -+ /* Figure out the delta between user cmd size and kernel cmd size */ -+ drv_size = _IOC_SIZE(kcmd); -+ out_size = _IOC_SIZE(ucmd); -+ in_size = out_size; -+ if ((ucmd & kcmd & IOC_IN) == 0) -+ in_size = 0; -+ if ((ucmd & kcmd & IOC_OUT) == 0) -+ out_size = 0; -+ ksize = max(max(in_size, out_size), drv_size); ++ cache = dma_buf_cache_get_cache(attach); ++ if (!cache) ++ dma_buf_detach(dmabuf, attach); + -+ /* If necessary, allocate buffer for ioctl argument */ -+ if (ksize > sizeof(stack_kdata)) { -+ kdata = kmalloc(ksize, GFP_KERNEL); -+ if (!kdata) -+ return -ENOMEM; -+ } ++ mutex_unlock(&dmabuf->cache_lock); ++} ++EXPORT_SYMBOL(dma_buf_cache_detach); + -+ if (copy_from_user(kdata, (void __user *)arg, in_size) != 0) { -+ ret = -EFAULT; -+ goto err; -+ } ++struct dma_buf_attachment *dma_buf_cache_attach(struct dma_buf *dmabuf, ++ struct device *dev) ++{ ++ struct dma_buf_attachment *attach; ++ struct dma_buf_cache_list *data; ++ struct dma_buf_cache *cache; + -+ /* zero out any difference between the kernel/user structure size */ -+ if (ksize > in_size) -+ memset(kdata + in_size, 0, ksize - in_size); ++ mutex_lock(&dmabuf->cache_lock); + -+ switch (kcmd) { -+ case RK_DMA_HEAP_IOCTL_ALLOC: -+ ret = rk_dma_heap_ioctl_allocate(file, kdata); -+ break; -+ default: -+ ret = -ENOTTY; -+ goto err; ++ if (!dmabuf->dtor) { ++ data = kzalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) { ++ attach = ERR_PTR(-ENOMEM); ++ goto err_data; ++ } ++ INIT_LIST_HEAD(&data->head); ++ dma_buf_set_destructor(dmabuf, dma_buf_cache_destructor, data); + } + -+ if (copy_to_user((void __user *)arg, kdata, out_size) != 0) -+ ret = -EFAULT; -+err: -+ if (kdata != stack_kdata) -+ kfree(kdata); -+ return ret; -+} ++ if (dmabuf->dtor && dmabuf->dtor != dma_buf_cache_destructor) { ++ attach = dma_buf_attach(dmabuf, dev); ++ goto attach_done; ++ } + -+static const struct file_operations rk_dma_heap_fops = { -+ .owner = THIS_MODULE, -+ .open = rk_dma_heap_open, -+ .unlocked_ioctl = rk_dma_heap_ioctl, -+#ifdef CONFIG_COMPAT -+ .compat_ioctl = rk_dma_heap_ioctl, -+#endif -+}; ++ data = dmabuf->dtor_data; + -+/** -+ * rk_dma_heap_get_drvdata() - get per-subdriver data for the heap -+ * @heap: DMA-Heap to retrieve private data for -+ * -+ * Returns: -+ * The per-subdriver data for the heap. -+ */ -+void *rk_dma_heap_get_drvdata(struct rk_dma_heap *heap) -+{ -+ return heap->priv; -+} ++ list_for_each_entry(cache, &data->head, list) { ++ if (cache->attach->dev == dev) { ++ /* Already attached */ ++ attach = cache->attach; ++ goto attach_done; ++ } ++ } + -+static void rk_dma_heap_release(struct kref *ref) -+{ -+ struct rk_dma_heap *heap = container_of(ref, struct rk_dma_heap, refcount); -+ int minor = MINOR(heap->heap_devt); ++ cache = kzalloc(sizeof(*cache), GFP_KERNEL); ++ if (!cache) { ++ attach = ERR_PTR(-ENOMEM); ++ goto err_cache; ++ } ++ /* Cache attachment */ ++ attach = dma_buf_attach(dmabuf, dev); ++ if (IS_ERR_OR_NULL(attach)) ++ goto err_attach; + -+ /* Note, we already holding the rk_heap_list_lock here */ -+ list_del(&heap->list); ++ cache->attach = attach; ++ list_add(&cache->list, &data->head); + -+ device_destroy(rk_dma_heap_class, heap->heap_devt); -+ cdev_del(&heap->heap_cdev); -+ xa_erase(&rk_dma_heap_minors, minor); ++attach_done: ++ mutex_unlock(&dmabuf->cache_lock); ++ return attach; + -+ kfree(heap); ++err_attach: ++ kfree(cache); ++err_cache: ++ kfree(data); ++ dma_buf_set_destructor(dmabuf, NULL, NULL); ++err_data: ++ mutex_unlock(&dmabuf->cache_lock); ++ return attach; +} ++EXPORT_SYMBOL(dma_buf_cache_attach); + -+void rk_dma_heap_put(struct rk_dma_heap *h) ++void dma_buf_cache_unmap_attachment(struct dma_buf_attachment *attach, ++ struct sg_table *sg_table, ++ enum dma_data_direction direction) +{ -+ /* -+ * Take the rk_heap_list_lock now to avoid racing with code -+ * scanning the list and then taking a kref. -+ */ -+ mutex_lock(&rk_heap_list_lock); -+ kref_put(&h->refcount, rk_dma_heap_release); -+ mutex_unlock(&rk_heap_list_lock); -+} ++ struct dma_buf *dmabuf = attach->dmabuf; ++ struct dma_buf_cache *cache; + -+/** -+ * rk_dma_heap_get_dev() - get device struct for the heap -+ * @heap: DMA-Heap to retrieve device struct from -+ * -+ * Returns: -+ * The device struct for the heap. -+ */ -+struct device *rk_dma_heap_get_dev(struct rk_dma_heap *heap) -+{ -+ return heap->heap_dev; -+} ++ mutex_lock(&dmabuf->cache_lock); + -+/** -+ * rk_dma_heap_get_name() - get heap name -+ * @heap: DMA-Heap to retrieve private data for -+ * -+ * Returns: -+ * The char* for the heap name. -+ */ -+const char *rk_dma_heap_get_name(struct rk_dma_heap *heap) -+{ -+ return heap->name; ++ cache = dma_buf_cache_get_cache(attach); ++ if (!cache) ++ dma_buf_unmap_attachment(attach, sg_table, direction); ++ ++ mutex_unlock(&dmabuf->cache_lock); +} ++EXPORT_SYMBOL(dma_buf_cache_unmap_attachment); + -+struct rk_dma_heap *rk_dma_heap_add(const struct rk_dma_heap_export_info *exp_info) ++struct sg_table *dma_buf_cache_map_attachment(struct dma_buf_attachment *attach, ++ enum dma_data_direction direction) +{ -+ struct rk_dma_heap *heap, *err_ret; -+ unsigned int minor; -+ int ret; ++ struct dma_buf *dmabuf = attach->dmabuf; ++ struct dma_buf_cache *cache; ++ struct sg_table *sg_table; + -+ if (!exp_info->name || !strcmp(exp_info->name, "")) { -+ pr_err("rk_dma_heap: Cannot add heap without a name\n"); -+ return ERR_PTR(-EINVAL); -+ } ++ mutex_lock(&dmabuf->cache_lock); + -+ if (!exp_info->ops || !exp_info->ops->allocate) { -+ pr_err("rk_dma_heap: Cannot add heap with invalid ops struct\n"); -+ return ERR_PTR(-EINVAL); ++ cache = dma_buf_cache_get_cache(attach); ++ if (!cache) { ++ sg_table = dma_buf_map_attachment(attach, direction); ++ goto map_done; + } -+ -+ /* check the name is unique */ -+ heap = rk_dma_heap_find(exp_info->name); -+ if (heap) { -+ pr_err("rk_dma_heap: Already registered heap named %s\n", -+ exp_info->name); -+ rk_dma_heap_put(heap); -+ return ERR_PTR(-EINVAL); ++ if (cache->sg_table) { ++ /* Already mapped */ ++ if (cache->direction == direction) { ++ sg_table = cache->sg_table; ++ goto map_done; ++ } ++ /* Different directions */ ++ dma_buf_unmap_attachment(attach, cache->sg_table, ++ cache->direction); + } + -+ heap = kzalloc(sizeof(*heap), GFP_KERNEL); -+ if (!heap) -+ return ERR_PTR(-ENOMEM); ++ /* Cache map */ ++ sg_table = dma_buf_map_attachment(attach, direction); ++ cache->sg_table = sg_table; ++ cache->direction = direction; + -+ kref_init(&heap->refcount); -+ heap->name = exp_info->name; -+ heap->ops = exp_info->ops; -+ heap->priv = exp_info->priv; -+ heap->support_cma = exp_info->support_cma; -+ INIT_LIST_HEAD(&heap->dmabuf_list); -+ INIT_LIST_HEAD(&heap->contig_list); -+ mutex_init(&heap->dmabuf_lock); -+ mutex_init(&heap->contig_lock); ++map_done: ++ mutex_unlock(&dmabuf->cache_lock); ++ return sg_table; ++} ++EXPORT_SYMBOL(dma_buf_cache_map_attachment); +diff --git a/drivers/dma-buf/dma-buf-rk.c b/drivers/dma-buf/dma-buf-rk.c +new file mode 100644 +index 000000000..c2f9f3edd +--- /dev/null ++++ b/drivers/dma-buf/dma-buf-rk.c +@@ -0,0 +1,1937 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Framework for buffer objects that can be shared across devices/subsystems. ++ * ++ * Copyright(C) 2011 Linaro Limited. All rights reserved. ++ * Author: Sumit Semwal ++ * ++ * Many thanks to linaro-mm-sig list, and specially ++ * Arnd Bergmann , Rob Clark and ++ * Daniel Vetter for their support in creation and ++ * refining of this idea. ++ */ + -+ /* Find unused minor number */ -+ ret = xa_alloc(&rk_dma_heap_minors, &minor, heap, -+ XA_LIMIT(0, NUM_HEAP_MINORS - 1), GFP_KERNEL); -+ if (ret < 0) { -+ pr_err("rk_dma_heap: Unable to get minor number for heap\n"); -+ err_ret = ERR_PTR(ret); -+ goto err0; -+ } ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ /* Create device */ -+ heap->heap_devt = MKDEV(MAJOR(rk_dma_heap_devt), minor); ++#include ++#include + -+ cdev_init(&heap->heap_cdev, &rk_dma_heap_fops); -+ ret = cdev_add(&heap->heap_cdev, heap->heap_devt, 1); -+ if (ret < 0) { -+ pr_err("dma_heap: Unable to add char device\n"); -+ err_ret = ERR_PTR(ret); -+ goto err1; -+ } ++#include "dma-buf-sysfs-stats.h" ++#include "dma-buf-process-info.h" + -+ heap->heap_dev = device_create(rk_dma_heap_class, -+ NULL, -+ heap->heap_devt, -+ NULL, -+ heap->name); -+ if (IS_ERR(heap->heap_dev)) { -+ pr_err("rk_dma_heap: Unable to create device\n"); -+ err_ret = ERR_CAST(heap->heap_dev); -+ goto err2; -+ } ++static inline int is_dma_buf_file(struct file *); + -+ heap->procfs = proc_rk_dma_heap_dir; ++struct dma_buf_list { ++ struct list_head head; ++ struct mutex lock; ++}; + -+ /* Make sure it doesn't disappear on us */ -+ heap->heap_dev = get_device(heap->heap_dev); ++static struct dma_buf_list db_list; + -+ /* Add heap to the list */ -+ mutex_lock(&rk_heap_list_lock); -+ list_add(&heap->list, &rk_heap_list); -+ mutex_unlock(&rk_heap_list_lock); ++/** ++ * dma_buf_get_each - Helps in traversing the db_list and calls the ++ * callback function which can extract required info out of each ++ * dmabuf. ++ * The db_list needs to be locked to prevent the db_list from being ++ * dynamically updated during the traversal process. ++ * ++ * @callback: [in] Handle for each dmabuf buffer in db_list. ++ * @private: [in] User-defined, used to pass in when callback is ++ * called. ++ * ++ * Returns 0 on success, otherwise returns a non-zero value for ++ * mutex_lock_interruptible or callback. ++ */ ++int dma_buf_get_each(int (*callback)(const struct dma_buf *dmabuf, ++ void *private), void *private) ++{ ++ struct dma_buf *buf; ++ int ret = mutex_lock_interruptible(&db_list.lock); + -+ return heap; ++ if (ret) ++ return ret; + -+err2: -+ cdev_del(&heap->heap_cdev); -+err1: -+ xa_erase(&rk_dma_heap_minors, minor); -+err0: -+ kfree(heap); -+ return err_ret; ++ list_for_each_entry(buf, &db_list.head, list_node) { ++ ret = callback(buf, private); ++ if (ret) ++ break; ++ } ++ mutex_unlock(&db_list.lock); ++ return ret; +} ++EXPORT_SYMBOL_NS_GPL(dma_buf_get_each, MINIDUMP); + -+static char *rk_dma_heap_devnode(struct device *dev, umode_t *mode) ++#if IS_ENABLED(CONFIG_RK_DMABUF_DEBUG) ++static size_t db_total_size; ++static size_t db_peak_size; ++ ++void dma_buf_reset_peak_size(void) +{ -+ return kasprintf(GFP_KERNEL, "rk_dma_heap/%s", dev_name(dev)); ++ mutex_lock(&db_list.lock); ++ db_peak_size = 0; ++ mutex_unlock(&db_list.lock); +} ++EXPORT_SYMBOL_GPL(dma_buf_reset_peak_size); + -+static int rk_dma_heap_dump_dmabuf(const struct dma_buf *dmabuf, void *data) ++size_t dma_buf_get_peak_size(void) +{ -+ struct rk_dma_heap *heap = (struct rk_dma_heap *)data; -+ struct rk_dma_heap_dmabuf *buf; -+ struct dma_buf_attachment *a; -+ phys_addr_t size; -+ int attach_count; -+ int ret; ++ size_t sz; + -+ if (!strcmp(dmabuf->exp_name, heap->name)) { -+ seq_printf(heap->s, "dma-heap:<%s> -dmabuf", heap->name); -+ mutex_lock(&heap->dmabuf_lock); -+ list_for_each_entry(buf, &heap->dmabuf_list, node) { -+ if (buf->dmabuf->file->f_inode->i_ino == -+ dmabuf->file->f_inode->i_ino) { -+ seq_printf(heap->s, -+ "\ti_ino = %ld\n", -+ dmabuf->file->f_inode->i_ino); -+ size = buf->end - buf->start + 1; -+ seq_printf(heap->s, -+ "\tAlloc by (%-20s)\t[%pa-%pa]\t%pa (%lu KiB)\n", -+ dmabuf->name, &buf->start, -+ &buf->end, &size, K(size)); -+ seq_puts(heap->s, "\t\tAttached Devices:\n"); -+ attach_count = 0; -+ ret = dma_resv_lock_interruptible(dmabuf->resv, -+ NULL); -+ if (ret) -+ goto error_unlock; -+ list_for_each_entry(a, &dmabuf->attachments, -+ node) { -+ seq_printf(heap->s, "\t\t%s\n", -+ dev_name(a->dev)); -+ attach_count++; -+ } -+ dma_resv_unlock(dmabuf->resv); -+ seq_printf(heap->s, -+ "Total %d devices attached\n\n", -+ attach_count); -+ } -+ } -+ mutex_unlock(&heap->dmabuf_lock); -+ } ++ mutex_lock(&db_list.lock); ++ sz = db_peak_size; ++ mutex_unlock(&db_list.lock); + -+ return 0; -+error_unlock: -+ mutex_unlock(&heap->dmabuf_lock); -+ return ret; ++ return sz; +} ++EXPORT_SYMBOL_GPL(dma_buf_get_peak_size); + -+static int rk_dma_heap_dump_contig(void *data) ++size_t dma_buf_get_total_size(void) +{ -+ struct rk_dma_heap *heap = (struct rk_dma_heap *)data; -+ struct rk_dma_heap_contig_buf *buf; -+ phys_addr_t size; ++ size_t sz; + -+ mutex_lock(&heap->contig_lock); -+ list_for_each_entry(buf, &heap->contig_list, node) { -+ size = buf->end - buf->start + 1; -+ seq_printf(heap->s, "dma-heap:<%s> -non dmabuf\n", heap->name); -+ seq_printf(heap->s, "\tAlloc by (%-20s)\t[%pa-%pa]\t%pa (%lu KiB)\n", -+ buf->orig_alloc, &buf->start, &buf->end, &size, K(size)); -+ } -+ mutex_unlock(&heap->contig_lock); ++ mutex_lock(&db_list.lock); ++ sz = db_total_size; ++ mutex_unlock(&db_list.lock); + -+ return 0; ++ return sz; +} ++EXPORT_SYMBOL_GPL(dma_buf_get_total_size); ++#endif + -+static ssize_t rk_total_pools_kb_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) ++static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen) +{ -+ struct rk_dma_heap *heap; -+ u64 total_pool_size = 0; ++ struct dma_buf *dmabuf; ++ char name[DMA_BUF_NAME_LEN]; ++ size_t ret = 0; + -+ mutex_lock(&rk_heap_list_lock); -+ list_for_each_entry(heap, &rk_heap_list, list) -+ if (heap->ops->get_pool_size) -+ total_pool_size += heap->ops->get_pool_size(heap); -+ mutex_unlock(&rk_heap_list_lock); ++ dmabuf = dentry->d_fsdata; ++ spin_lock(&dmabuf->name_lock); ++ if (dmabuf->name) ++ ret = strlcpy(name, dmabuf->name, DMA_BUF_NAME_LEN); ++ spin_unlock(&dmabuf->name_lock); + -+ return sysfs_emit(buf, "%llu\n", total_pool_size / 1024); ++ return dynamic_dname(buffer, buflen, "/%s:%s", ++ dentry->d_name.name, ret > 0 ? name : ""); +} + -+static struct kobj_attribute rk_total_pools_kb_attr = -+ __ATTR_RO(rk_total_pools_kb); ++static void dma_buf_release(struct dentry *dentry) ++{ ++ struct dma_buf *dmabuf; ++#ifdef CONFIG_DMABUF_CACHE ++ int dtor_ret = 0; ++#endif + -+static struct attribute *rk_dma_heap_sysfs_attrs[] = { -+ &rk_total_pools_kb_attr.attr, -+ NULL, -+}; ++ dmabuf = dentry->d_fsdata; ++ if (unlikely(!dmabuf)) ++ return; + -+ATTRIBUTE_GROUPS(rk_dma_heap_sysfs); ++ BUG_ON(dmabuf->vmapping_counter); + -+static struct kobject *rk_dma_heap_kobject; ++ /* ++ * If you hit this BUG() it could mean: ++ * * There's a file reference imbalance in dma_buf_poll / dma_buf_poll_cb or somewhere else ++ * * dmabuf->cb_in/out.active are non-0 despite no pending fence callback ++ */ ++ BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active); + -+static int rk_dma_heap_sysfs_setup(void) -+{ -+ int ret; ++ dma_buf_stats_teardown(dmabuf); ++#ifdef CONFIG_DMABUF_CACHE ++ if (dmabuf->dtor) ++ dtor_ret = dmabuf->dtor(dmabuf, dmabuf->dtor_data); + -+ rk_dma_heap_kobject = kobject_create_and_add("rk_dma_heap", -+ kernel_kobj); -+ if (!rk_dma_heap_kobject) -+ return -ENOMEM; ++ if (!dtor_ret) ++#endif ++ dmabuf->ops->release(dmabuf); + -+ ret = sysfs_create_groups(rk_dma_heap_kobject, -+ rk_dma_heap_sysfs_groups); -+ if (ret) { -+ kobject_put(rk_dma_heap_kobject); -+ return ret; -+ } ++ if (dmabuf->resv == (struct dma_resv *)&dmabuf[1]) ++ dma_resv_fini(dmabuf->resv); + -+ return 0; ++ WARN_ON(!list_empty(&dmabuf->attachments)); ++ module_put(dmabuf->owner); ++ kfree(dmabuf->name); ++ kfree(dmabuf); +} + -+static void rk_dma_heap_sysfs_teardown(void) ++static int dma_buf_file_release(struct inode *inode, struct file *file) +{ -+ kobject_put(rk_dma_heap_kobject); -+} -+ -+#ifdef CONFIG_DEBUG_FS -+ -+static struct dentry *rk_dma_heap_debugfs_dir; ++ struct dma_buf *dmabuf; + -+static int rk_dma_heap_debug_show(struct seq_file *s, void *unused) -+{ -+ struct rk_dma_heap *heap; -+ unsigned long total = 0; ++ if (!is_dma_buf_file(file)) ++ return -EINVAL; + -+ mutex_lock(&rk_heap_list_lock); -+ list_for_each_entry(heap, &rk_heap_list, list) { -+ heap->s = s; -+ dma_buf_get_each(rk_dma_heap_dump_dmabuf, heap); -+ rk_dma_heap_dump_contig(heap); -+ total += heap->total_size; ++ dmabuf = file->private_data; ++ if (dmabuf) { ++ mutex_lock(&db_list.lock); ++#if IS_ENABLED(CONFIG_RK_DMABUF_DEBUG) ++ db_total_size -= dmabuf->size; ++#endif ++ list_del(&dmabuf->list_node); ++ mutex_unlock(&db_list.lock); + } -+ seq_printf(s, "\nTotal : 0x%lx (%lu KiB)\n", total, K(total)); -+ mutex_unlock(&rk_heap_list_lock); + + return 0; +} -+DEFINE_SHOW_ATTRIBUTE(rk_dma_heap_debug); + -+static int rk_dma_heap_init_debugfs(void) -+{ -+ struct dentry *d; -+ int err = 0; ++static const struct dentry_operations dma_buf_dentry_ops = { ++ .d_dname = dmabuffs_dname, ++ .d_release = dma_buf_release, ++}; + -+ d = debugfs_create_dir("rk_dma_heap", NULL); -+ if (IS_ERR(d)) -+ return PTR_ERR(d); ++static struct vfsmount *dma_buf_mnt; + -+ rk_dma_heap_debugfs_dir = d; ++static int dma_buf_fs_init_context(struct fs_context *fc) ++{ ++ struct pseudo_fs_context *ctx; + -+ d = debugfs_create_file("dma_heap_info", 0444, -+ rk_dma_heap_debugfs_dir, NULL, -+ &rk_dma_heap_debug_fops); -+ if (IS_ERR(d)) { -+ dma_heap_print("rk_dma_heap : debugfs: failed to create node bufinfo\n"); -+ debugfs_remove_recursive(rk_dma_heap_debugfs_dir); -+ rk_dma_heap_debugfs_dir = NULL; -+ err = PTR_ERR(d); -+ } -+ -+ return err; -+} -+#else -+static inline int rk_dma_heap_init_debugfs(void) -+{ ++ ctx = init_pseudo(fc, DMA_BUF_MAGIC); ++ if (!ctx) ++ return -ENOMEM; ++ ctx->dops = &dma_buf_dentry_ops; + return 0; +} -+#endif -+ -+static int rk_dma_heap_proc_show(struct seq_file *s, void *unused) -+{ -+ struct rk_dma_heap *heap; -+ unsigned long total = 0; -+ -+ mutex_lock(&rk_heap_list_lock); -+ list_for_each_entry(heap, &rk_heap_list, list) { -+ heap->s = s; -+ dma_buf_get_each(rk_dma_heap_dump_dmabuf, heap); -+ rk_dma_heap_dump_contig(heap); -+ total += heap->total_size; -+ } -+ seq_printf(s, "\nTotal : 0x%lx (%lu KiB)\n", total, K(total)); -+ mutex_unlock(&rk_heap_list_lock); + -+ return 0; -+} ++static struct file_system_type dma_buf_fs_type = { ++ .name = "dmabuf", ++ .init_fs_context = dma_buf_fs_init_context, ++ .kill_sb = kill_anon_super, ++}; + -+static int rk_dma_heap_info_proc_open(struct inode *inode, -+ struct file *file) ++static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma) +{ -+ return single_open(file, rk_dma_heap_proc_show, NULL); -+} ++ struct dma_buf *dmabuf; + -+static const struct proc_ops rk_dma_heap_info_proc_fops = { -+ .proc_open = rk_dma_heap_info_proc_open, -+ .proc_read = seq_read, -+ .proc_lseek = seq_lseek, -+ .proc_release = single_release, -+}; ++ if (!is_dma_buf_file(file)) ++ return -EINVAL; + -+static int rk_dma_heap_init_proc(void) -+{ -+ proc_rk_dma_heap_dir = proc_mkdir("rk_dma_heap", NULL); -+ if (!proc_rk_dma_heap_dir) { -+ pr_err("create rk_dma_heap proc dir error\n"); -+ return -ENOENT; -+ } ++ dmabuf = file->private_data; + -+ proc_create("dma_heap_info", 0644, proc_rk_dma_heap_dir, -+ &rk_dma_heap_info_proc_fops); ++ /* check if buffer supports mmap */ ++ if (!dmabuf->ops->mmap) ++ return -EINVAL; + -+ return 0; ++ /* check for overflowing the buffer's size */ ++ if (vma->vm_pgoff + vma_pages(vma) > ++ dmabuf->size >> PAGE_SHIFT) ++ return -EINVAL; ++ ++ return dmabuf->ops->mmap(dmabuf, vma); +} + -+static int rk_dma_heap_init(void) ++static loff_t dma_buf_llseek(struct file *file, loff_t offset, int whence) +{ -+ int ret; -+ -+ ret = rk_dma_heap_sysfs_setup(); -+ if (ret) -+ return ret; ++ struct dma_buf *dmabuf; ++ loff_t base; + -+ ret = alloc_chrdev_region(&rk_dma_heap_devt, 0, NUM_HEAP_MINORS, -+ DEVNAME); -+ if (ret) -+ goto err_chrdev; ++ if (!is_dma_buf_file(file)) ++ return -EBADF; + -+ rk_dma_heap_class = class_create(THIS_MODULE, DEVNAME); -+ if (IS_ERR(rk_dma_heap_class)) { -+ ret = PTR_ERR(rk_dma_heap_class); -+ goto err_class; -+ } -+ rk_dma_heap_class->devnode = rk_dma_heap_devnode; ++ dmabuf = file->private_data; + -+ rk_dma_heap_init_debugfs(); -+ rk_dma_heap_init_proc(); ++ /* only support discovering the end of the buffer, ++ but also allow SEEK_SET to maintain the idiomatic ++ SEEK_END(0), SEEK_CUR(0) pattern */ ++ if (whence == SEEK_END) ++ base = dmabuf->size; ++ else if (whence == SEEK_SET) ++ base = 0; ++ else ++ return -EINVAL; + -+ return 0; ++ if (offset != 0) ++ return -EINVAL; + -+err_class: -+ unregister_chrdev_region(rk_dma_heap_devt, NUM_HEAP_MINORS); -+err_chrdev: -+ rk_dma_heap_sysfs_teardown(); -+ return ret; ++ return base + offset; +} -+subsys_initcall(rk_dma_heap_init); -diff --git a/drivers/dma-buf/rk_heaps/rk-dma-heap.h b/drivers/dma-buf/rk_heaps/rk-dma-heap.h -new file mode 100644 -index 000000000..3bc750b02 ---- /dev/null -+++ b/drivers/dma-buf/rk_heaps/rk-dma-heap.h -@@ -0,0 +1,178 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * DMABUF Heaps Allocation Infrastructure -+ * -+ * Copyright (C) 2011 Google, Inc. -+ * Copyright (C) 2019 Linaro Ltd. -+ * Copyright (C) 2022 Rockchip Electronics Co. Ltd. -+ * Author: Simon Xue -+ */ -+ -+#ifndef _RK_DMA_HEAPS_H -+#define _RK_DMA_HEAPS_H -+ -+#include -+#include -+#include -+#include -+ -+#if defined(CONFIG_DMABUF_RK_HEAPS_DEBUG_PRINT) -+#define dma_heap_print(fmt, ...) \ -+ printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) -+#else -+#define dma_heap_print(fmt, ...) \ -+ no_printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) -+#endif -+ -+#define RK_DMA_HEAP_NAME_LEN 16 -+ -+struct rk_vmap_pfn_data { -+ unsigned long pfn; /* first pfn of contiguous */ -+ pgprot_t prot; -+}; + +/** -+ * struct rk_dma_heap_ops - ops to operate on a given heap -+ * @allocate: allocate dmabuf and return struct dma_buf ptr -+ * @get_pool_size: if heap maintains memory pools, get pool size in bytes ++ * DOC: implicit fence polling + * -+ * allocate returns dmabuf on success, ERR_PTR(-errno) on error. -+ */ -+struct rk_dma_heap_ops { -+ struct dma_buf *(*allocate)(struct rk_dma_heap *heap, -+ unsigned long len, -+ unsigned long fd_flags, -+ unsigned long heap_flags, -+ const char *name); -+ struct page *(*alloc_contig_pages)(struct rk_dma_heap *heap, -+ size_t len, const char *name); -+ void (*free_contig_pages)(struct rk_dma_heap *heap, -+ struct page *pages, size_t len, -+ const char *name); -+ long (*get_pool_size)(struct rk_dma_heap *heap); -+}; -+ -+/** -+ * struct rk_dma_heap_export_info - information needed to export a new dmabuf heap -+ * @name: used for debugging/device-node name -+ * @ops: ops struct for this heap -+ * @priv: heap exporter private data ++ * To support cross-device and cross-driver synchronization of buffer access ++ * implicit fences (represented internally in the kernel with &struct dma_fence) ++ * can be attached to a &dma_buf. The glue for that and a few related things are ++ * provided in the &dma_resv structure. + * -+ * Information needed to export a new dmabuf heap. -+ */ -+struct rk_dma_heap_export_info { -+ const char *name; -+ const struct rk_dma_heap_ops *ops; -+ void *priv; -+ bool support_cma; -+}; -+ -+/** -+ * struct rk_dma_heap - represents a dmabuf heap in the system -+ * @name: used for debugging/device-node name -+ * @ops: ops struct for this heap -+ * @heap_devt heap device node -+ * @list list head connecting to list of heaps -+ * @heap_cdev heap char device -+ * @heap_dev heap device struct ++ * Userspace can query the state of these implicitly tracked fences using poll() ++ * and related system calls: + * -+ * Represents a heap of memory from which buffers can be made. -+ */ -+struct rk_dma_heap { -+ const char *name; -+ const struct rk_dma_heap_ops *ops; -+ void *priv; -+ dev_t heap_devt; -+ struct list_head list; -+ struct list_head dmabuf_list; /* dmabuf attach to this node */ -+ struct mutex dmabuf_lock; -+ struct list_head contig_list; /* contig buffer attach to this node */ -+ struct mutex contig_lock; -+ struct cdev heap_cdev; -+ struct kref refcount; -+ struct device *heap_dev; -+ bool support_cma; -+ struct seq_file *s; -+ struct proc_dir_entry *procfs; -+ unsigned long total_size; -+}; -+ -+struct rk_dma_heap_dmabuf { -+ struct list_head node; -+ struct dma_buf *dmabuf; -+ const char *orig_alloc; -+ phys_addr_t start; -+ phys_addr_t end; -+}; -+ -+struct rk_dma_heap_contig_buf { -+ struct list_head node; -+ const char *orig_alloc; -+ phys_addr_t start; -+ phys_addr_t end; -+}; -+ -+/** -+ * rk_dma_heap_get_drvdata() - get per-heap driver data -+ * @heap: DMA-Heap to retrieve private data for ++ * - Checking for EPOLLIN, i.e. read access, can be use to query the state of the ++ * most recent write or exclusive fence. + * -+ * Returns: -+ * The per-heap data for the heap. -+ */ -+void *rk_dma_heap_get_drvdata(struct rk_dma_heap *heap); -+ -+/** -+ * rk_dma_heap_get_dev() - get device struct for the heap -+ * @heap: DMA-Heap to retrieve device struct from ++ * - Checking for EPOLLOUT, i.e. write access, can be used to query the state of ++ * all attached fences, shared and exclusive ones. + * -+ * Returns: -+ * The device struct for the heap. -+ */ -+struct device *rk_dma_heap_get_dev(struct rk_dma_heap *heap); -+ -+/** -+ * rk_dma_heap_get_name() - get heap name -+ * @heap: DMA-Heap to retrieve private data for ++ * Note that this only signals the completion of the respective fences, i.e. the ++ * DMA transfers are complete. Cache flushing and any other necessary ++ * preparations before CPU access can begin still need to happen. + * -+ * Returns: -+ * The char* for the heap name. -+ */ -+const char *rk_dma_heap_get_name(struct rk_dma_heap *heap); -+ -+/** -+ * rk_dma_heap_add - adds a heap to dmabuf heaps -+ * @exp_info: information needed to register this heap ++ * As an alternative to poll(), the set of fences on DMA buffer can be ++ * exported as a &sync_file using &dma_buf_sync_file_export. + */ -+struct rk_dma_heap *rk_dma_heap_add(const struct rk_dma_heap_export_info *exp_info); + -+/** -+ * rk_dma_heap_put - drops a reference to a dmabuf heaps, potentially freeing it -+ * @heap: heap pointer -+ */ -+void rk_dma_heap_put(struct rk_dma_heap *heap); ++static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb) ++{ ++ struct dma_buf_poll_cb_t *dcb = (struct dma_buf_poll_cb_t *)cb; ++ struct dma_buf *dmabuf = container_of(dcb->poll, struct dma_buf, poll); ++ unsigned long flags; + -+/** -+ * rk_vmap_contig_pfn - Map contiguous pfn to vm area -+ * @pfn: indicate the first pfn of contig -+ * @count: count of pfns -+ * @prot: for mapping -+ */ -+void *rk_vmap_contig_pfn(unsigned long pfn, unsigned int count, -+ pgprot_t prot); -+/** -+ * rk_dma_heap_total_inc - Increase total buffer size -+ * @heap: dma_heap to increase -+ * @len: length to increase -+ */ -+void rk_dma_heap_total_inc(struct rk_dma_heap *heap, size_t len); -+/** -+ * rk_dma_heap_total_dec - Decrease total buffer size -+ * @heap: dma_heap to decrease -+ * @len: length to decrease -+ */ -+void rk_dma_heap_total_dec(struct rk_dma_heap *heap, size_t len); -+/** -+ * rk_dma_heap_get_cma - get cma structure -+ */ -+struct cma *rk_dma_heap_get_cma(void); -+#endif /* _DMA_HEAPS_H */ -diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c -index f0a35277f..dcbc3edda 100644 ---- a/drivers/dma-buf/sw_sync.c -+++ b/drivers/dma-buf/sw_sync.c -@@ -7,6 +7,8 @@ - - #include - #include -+#include -+#include - #include - #include - #include -@@ -410,3 +412,13 @@ const struct file_operations sw_sync_debugfs_fops = { - .unlocked_ioctl = sw_sync_ioctl, - .compat_ioctl = compat_ptr_ioctl, - }; ++ spin_lock_irqsave(&dcb->poll->lock, flags); ++ wake_up_locked_poll(dcb->poll, dcb->active); ++ dcb->active = 0; ++ spin_unlock_irqrestore(&dcb->poll->lock, flags); ++ dma_fence_put(fence); ++ /* Paired with get_file in dma_buf_poll */ ++ fput(dmabuf->file); ++} + -+static struct miscdevice sw_sync_dev = { -+ .minor = MISC_DYNAMIC_MINOR, -+ .name = "sw_sync", -+ .fops = &sw_sync_debugfs_fops, -+}; ++static bool dma_buf_poll_add_cb(struct dma_resv *resv, bool write, ++ struct dma_buf_poll_cb_t *dcb) ++{ ++ struct dma_resv_iter cursor; ++ struct dma_fence *fence; ++ int r; + -+module_misc_device(sw_sync_dev); ++ dma_resv_for_each_fence(&cursor, resv, dma_resv_usage_rw(write), ++ fence) { ++ dma_fence_get(fence); ++ r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb); ++ if (!r) ++ return true; ++ dma_fence_put(fence); ++ } + -+MODULE_LICENSE("GPL v2"); -diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig -index 110e99b86..012a4acd0 100644 ---- a/drivers/edac/Kconfig -+++ b/drivers/edac/Kconfig -@@ -561,4 +561,11 @@ config EDAC_NPCM - error detection (in-line ECC in which a section 1/8th of the memory - device used to store data is used for ECC storage). - -+config EDAC_ROCKCHIP -+ tristate "Rockchip DDR ECC" -+ depends on ARCH_ROCKCHIP && HAVE_ARM_SMCCC -+ help -+ Support for error detection and correction on the -+ rockchip family of SOCs. ++ return false; ++} + - endif # EDAC -diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile -index 446364264..6c6f0caca 100644 ---- a/drivers/edac/Makefile -+++ b/drivers/edac/Makefile -@@ -88,3 +88,4 @@ obj-$(CONFIG_EDAC_BLUEFIELD) += bluefield_edac.o - obj-$(CONFIG_EDAC_DMC520) += dmc520_edac.o - obj-$(CONFIG_EDAC_NPCM) += npcm_edac.o - obj-$(CONFIG_EDAC_ZYNQMP) += zynqmp_edac.o -+obj-$(CONFIG_EDAC_ROCKCHIP) += rockchip_edac.o -diff --git a/drivers/edac/rockchip_edac.c b/drivers/edac/rockchip_edac.c -new file mode 100644 -index 000000000..4b1317bed ---- /dev/null -+++ b/drivers/edac/rockchip_edac.c -@@ -0,0 +1,358 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (c) 2023 Rockchip Electronics Co., Ltd. -+ */ ++static __poll_t dma_buf_poll(struct file *file, poll_table *poll) ++{ ++ struct dma_buf *dmabuf; ++ struct dma_resv *resv; ++ __poll_t events; + -+#include -+#include -+#include -+#include -+#include ++ dmabuf = file->private_data; ++ if (!dmabuf || !dmabuf->resv) ++ return EPOLLERR; + -+#include "edac_module.h" ++ resv = dmabuf->resv; + -+#define MAX_CS (4) ++ poll_wait(file, &dmabuf->poll, poll); + -+#define MAX_CH (1) ++ events = poll_requested_events(poll) & (EPOLLIN | EPOLLOUT); ++ if (!events) ++ return 0; + -+#define RK_EDAC_MOD "1" ++ dma_resv_lock(resv, NULL); + -+/* ECCCADDR0 */ -+#define ECC_CORR_RANK_SHIFT (24) -+#define ECC_CORR_RANK_MASK (0x3) -+#define ECC_CORR_ROW_MASK (0x3ffff) -+/* ECCCADDR1 */ -+#define ECC_CORR_CID_SHIFT (28) -+#define ECC_CORR_CID_MASK (0x3) -+#define ECC_CORR_BG_SHIFT (24) -+#define ECC_CORR_BG_MASK (0x3) -+#define ECC_CORR_BANK_SHIFT (16) -+#define ECC_CORR_BANK_MASK (0x7) -+#define ECC_CORR_COL_MASK (0xfff) -+/* ECCUADDR0 */ -+#define ECC_UNCORR_RANK_SHIFT (24) -+#define ECC_UNCORR_RANK_MASK (0x3) -+#define ECC_UNCORR_ROW_MASK (0x3ffff) -+/* ECCUADDR1 */ -+#define ECC_UNCORR_CID_SHIFT (28) -+#define ECC_UNCORR_CID_MASK (0x3) -+#define ECC_UNCORR_BG_SHIFT (24) -+#define ECC_UNCORR_BG_MASK (0x3) -+#define ECC_UNCORR_BANK_SHIFT (16) -+#define ECC_UNCORR_BANK_MASK (0x7) -+#define ECC_UNCORR_COL_MASK (0xfff) ++ if (events & EPOLLOUT) { ++ struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_out; + -+/** -+ * struct ddr_ecc_error_info - DDR ECC error log information -+ * @err_cnt: error count -+ * @rank: Rank number -+ * @row: Row number -+ * @chip_id: Chip id number -+ * @bank_group: Bank Group number -+ * @bank: Bank number -+ * @col: Column number -+ * @bitpos: Bit position -+ */ -+struct ddr_ecc_error_info { -+ u32 err_cnt; -+ u32 rank; -+ u32 row; -+ u32 chip_id; -+ u32 bank_group; -+ u32 bank; -+ u32 col; -+ u32 bitpos; -+}; ++ /* Check that callback isn't busy */ ++ spin_lock_irq(&dmabuf->poll.lock); ++ if (dcb->active) ++ events &= ~EPOLLOUT; ++ else ++ dcb->active = EPOLLOUT; ++ spin_unlock_irq(&dmabuf->poll.lock); + -+/** -+ * struct ddr_ecc_status - DDR ECC status information to report -+ * @ceinfo: Correctable error log information -+ * @ueinfo: Uncorrectable error log information -+ */ -+struct ddr_ecc_status { -+ struct ddr_ecc_error_info ceinfo; -+ struct ddr_ecc_error_info ueinfo; -+}; ++ if (events & EPOLLOUT) { ++ /* Paired with fput in dma_buf_poll_cb */ ++ get_file(dmabuf->file); + -+/** -+ * struct rk_edac_priv - RK DDR memory controller private instance data -+ * @name: EDAC name -+ * @stat: DDR ECC status information -+ * @ce_cnt: Correctable Error count -+ * @ue_cnt: Uncorrectable Error count -+ * @irq_ce: Corrected interrupt number -+ * @irq_ue: Uncorrected interrupt number -+ */ -+struct rk_edac_priv { -+ char *name; -+ struct ddr_ecc_status stat; -+ u32 ce_cnt; -+ u32 ue_cnt; -+ int irq_ce; -+ int irq_ue; -+}; ++ if (!dma_buf_poll_add_cb(resv, true, dcb)) ++ /* No callback queued, wake up any other waiters */ ++ dma_buf_poll_cb(NULL, &dcb->cb); ++ else ++ events &= ~EPOLLOUT; ++ } ++ } + -+static struct ddr_ecc_status *ddr_edac_info; ++ if (events & EPOLLIN) { ++ struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_in; + -+static inline void opstate_init_int(void) -+{ -+ switch (edac_op_state) { -+ case EDAC_OPSTATE_POLL: -+ case EDAC_OPSTATE_INT: -+ break; -+ default: -+ edac_op_state = EDAC_OPSTATE_INT; -+ break; -+ } -+} ++ /* Check that callback isn't busy */ ++ spin_lock_irq(&dmabuf->poll.lock); ++ if (dcb->active) ++ events &= ~EPOLLIN; ++ else ++ dcb->active = EPOLLIN; ++ spin_unlock_irq(&dmabuf->poll.lock); + -+static void rockchip_edac_handle_ce_error(struct mem_ctl_info *mci, -+ struct ddr_ecc_status *p) -+{ -+ struct ddr_ecc_error_info *pinf; ++ if (events & EPOLLIN) { ++ /* Paired with fput in dma_buf_poll_cb */ ++ get_file(dmabuf->file); + -+ if (p->ceinfo.err_cnt) { -+ pinf = &p->ceinfo; -+ edac_mc_printk(mci, KERN_ERR, -+ "DDR ECC CE error: CS%d, Row 0x%x, Bg 0x%x, Bk 0x%x, Col 0x%x bit 0x%x\n", -+ pinf->rank, pinf->row, pinf->bank_group, -+ pinf->bank, pinf->col, -+ pinf->bitpos); -+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, -+ p->ceinfo.err_cnt, 0, 0, 0, 0, 0, -1, -+ mci->ctl_name, ""); ++ if (!dma_buf_poll_add_cb(resv, false, dcb)) ++ /* No callback queued, wake up any other waiters */ ++ dma_buf_poll_cb(NULL, &dcb->cb); ++ else ++ events &= ~EPOLLIN; ++ } + } ++ ++ dma_resv_unlock(resv); ++ return events; +} + -+static void rockchip_edac_handle_ue_error(struct mem_ctl_info *mci, -+ struct ddr_ecc_status *p) ++static long _dma_buf_set_name(struct dma_buf *dmabuf, const char *name) +{ -+ struct ddr_ecc_error_info *pinf; ++ spin_lock(&dmabuf->name_lock); ++ kfree(dmabuf->name); ++ dmabuf->name = name; ++ spin_unlock(&dmabuf->name_lock); + -+ if (p->ueinfo.err_cnt) { -+ pinf = &p->ueinfo; -+ edac_mc_printk(mci, KERN_ERR, -+ "DDR ECC UE error: CS%d, Row 0x%x, Bg 0x%x, Bk 0x%x, Col 0x%x\n", -+ pinf->rank, pinf->row, -+ pinf->bank_group, pinf->bank, pinf->col); -+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, -+ p->ueinfo.err_cnt, 0, 0, 0, 0, 0, -1, -+ mci->ctl_name, ""); -+ } ++ return 0; +} + -+static int rockchip_edac_get_error_info(struct mem_ctl_info *mci) ++/** ++ * dma_buf_set_name - Set a name to a specific dma_buf to track the usage. ++ * It could support changing the name of the dma-buf if the same ++ * piece of memory is used for multiple purpose between different devices. ++ * ++ * @dmabuf: [in] dmabuf buffer that will be renamed. ++ * @buf: [in] A piece of userspace memory that contains the name of ++ * the dma-buf. ++ * ++ * Returns 0 on success. If the dma-buf buffer is already attached to ++ * devices, return -EBUSY. ++ * ++ */ ++long dma_buf_set_name(struct dma_buf *dmabuf, const char *name) +{ -+ struct arm_smccc_res res; ++ long ret = 0; ++ char *buf = kstrndup(name, DMA_BUF_NAME_LEN, GFP_KERNEL); + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRECC, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_ECC); -+ if ((res.a0) || (res.a1)) { -+ edac_mc_printk(mci, KERN_ERR, "ROCKCHIP_SIP_CONFIG_DRAM_ECC not support: 0x%lx\n", -+ res.a0); -+ return -ENXIO; -+ } ++ if (!buf) ++ return -ENOMEM; + -+ return 0; ++ ret = _dma_buf_set_name(dmabuf, buf); ++ if (ret) ++ kfree(buf); ++ ++ return ret; +} ++EXPORT_SYMBOL_GPL(dma_buf_set_name); + -+static void rockchip_edac_check(struct mem_ctl_info *mci) ++static long dma_buf_set_name_user(struct dma_buf *dmabuf, const char __user *buf) +{ -+ struct rk_edac_priv *priv = mci->pvt_info; -+ int ret; ++ char *name = strndup_user(buf, DMA_BUF_NAME_LEN); ++ long ret; + -+ ret = rockchip_edac_get_error_info(mci); ++ if (IS_ERR(name)) ++ return PTR_ERR(name); ++ ++ ret = _dma_buf_set_name(dmabuf, name); + if (ret) -+ return; ++ kfree(name); + -+ priv->ce_cnt += ddr_edac_info->ceinfo.err_cnt; -+ priv->ue_cnt += ddr_edac_info->ceinfo.err_cnt; -+ rockchip_edac_handle_ce_error(mci, ddr_edac_info); -+ rockchip_edac_handle_ue_error(mci, ddr_edac_info); ++ return ret; +} + -+static irqreturn_t rockchip_edac_mc_ce_isr(int irq, void *dev_id) ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++static long dma_buf_export_sync_file(struct dma_buf *dmabuf, ++ void __user *user_data) +{ -+ struct mem_ctl_info *mci = dev_id; -+ struct rk_edac_priv *priv = mci->pvt_info; -+ int ret; ++ struct dma_buf_export_sync_file arg; ++ enum dma_resv_usage usage; ++ struct dma_fence *fence = NULL; ++ struct sync_file *sync_file; ++ int fd, ret; + -+ ret = rockchip_edac_get_error_info(mci); ++ if (copy_from_user(&arg, user_data, sizeof(arg))) ++ return -EFAULT; ++ ++ if (arg.flags & ~DMA_BUF_SYNC_RW) ++ return -EINVAL; ++ ++ if ((arg.flags & DMA_BUF_SYNC_RW) == 0) ++ return -EINVAL; ++ ++ fd = get_unused_fd_flags(O_CLOEXEC); ++ if (fd < 0) ++ return fd; ++ ++ usage = dma_resv_usage_rw(arg.flags & DMA_BUF_SYNC_WRITE); ++ ret = dma_resv_get_singleton(dmabuf->resv, usage, &fence); + if (ret) -+ return IRQ_NONE; ++ goto err_put_fd; + -+ priv->ce_cnt += ddr_edac_info->ceinfo.err_cnt; ++ if (!fence) ++ fence = dma_fence_get_stub(); + -+ rockchip_edac_handle_ce_error(mci, ddr_edac_info); ++ sync_file = sync_file_create(fence); + -+ return IRQ_HANDLED; -+} ++ dma_fence_put(fence); + -+static irqreturn_t rockchip_edac_mc_ue_isr(int irq, void *dev_id) -+{ -+ struct mem_ctl_info *mci = dev_id; -+ struct rk_edac_priv *priv = mci->pvt_info; -+ int ret; ++ if (!sync_file) { ++ ret = -ENOMEM; ++ goto err_put_fd; ++ } + -+ ret = rockchip_edac_get_error_info(mci); -+ if (ret) -+ return IRQ_NONE; ++ arg.fd = fd; ++ if (copy_to_user(user_data, &arg, sizeof(arg))) { ++ ret = -EFAULT; ++ goto err_put_file; ++ } + -+ priv->ue_cnt += ddr_edac_info->ueinfo.err_cnt; ++ fd_install(fd, sync_file->file); + -+ rockchip_edac_handle_ue_error(mci, ddr_edac_info); ++ return 0; + -+ return IRQ_HANDLED; ++err_put_file: ++ fput(sync_file->file); ++err_put_fd: ++ put_unused_fd(fd); ++ return ret; +} + -+static int rockchip_edac_mc_init(struct mem_ctl_info *mci, -+ struct platform_device *pdev) ++static long dma_buf_import_sync_file(struct dma_buf *dmabuf, ++ const void __user *user_data) +{ -+ struct rk_edac_priv *priv = mci->pvt_info; -+ struct arm_smccc_res res; -+ int ret; ++ struct dma_buf_import_sync_file arg; ++ struct dma_fence *fence, *f; ++ enum dma_resv_usage usage; ++ struct dma_fence_unwrap iter; ++ unsigned int num_fences; ++ int ret = 0; + -+ mci->pdev = &pdev->dev; -+ dev_set_drvdata(mci->pdev, mci); -+ mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4; -+ mci->edac_ctl_cap = EDAC_FLAG_SECDED; -+ mci->scrub_cap = SCRUB_NONE; -+ mci->scrub_mode = SCRUB_NONE; ++ if (copy_from_user(&arg, user_data, sizeof(arg))) ++ return -EFAULT; + -+ mci->edac_cap = EDAC_FLAG_SECDED; -+ mci->ctl_name = priv->name; -+ mci->dev_name = priv->name; -+ mci->mod_name = RK_EDAC_MOD; ++ if (arg.flags & ~DMA_BUF_SYNC_RW) ++ return -EINVAL; + -+ if (edac_op_state == EDAC_OPSTATE_POLL) -+ mci->edac_check = rockchip_edac_check; -+ mci->ctl_page_to_phys = NULL; ++ if ((arg.flags & DMA_BUF_SYNC_RW) == 0) ++ return -EINVAL; + -+ res = sip_smc_request_share_mem(1, SHARE_PAGE_TYPE_DDRECC); -+ if (res.a0 != 0) { -+ dev_err(&pdev->dev, "no ATF memory for init, ret 0x%lx\n", res.a0); -+ return -ENOMEM; ++ fence = sync_file_get_fence(arg.fd); ++ if (!fence) ++ return -EINVAL; ++ ++ usage = (arg.flags & DMA_BUF_SYNC_WRITE) ? DMA_RESV_USAGE_WRITE : ++ DMA_RESV_USAGE_READ; ++ ++ num_fences = 0; ++ dma_fence_unwrap_for_each(f, &iter, fence) ++ ++num_fences; ++ ++ if (num_fences > 0) { ++ dma_resv_lock(dmabuf->resv, NULL); ++ ++ ret = dma_resv_reserve_fences(dmabuf->resv, num_fences); ++ if (!ret) { ++ dma_fence_unwrap_for_each(f, &iter, fence) ++ dma_resv_add_fence(dmabuf->resv, f, usage); ++ } ++ ++ dma_resv_unlock(dmabuf->resv); + } -+ ddr_edac_info = (struct ddr_ecc_status *)res.a1; -+ memset(ddr_edac_info, 0, sizeof(struct ddr_ecc_status)); + -+ ret = rockchip_edac_get_error_info(mci); -+ if (ret) -+ return ret; ++ dma_fence_put(fence); + -+ return 0; ++ return ret; +} ++#endif + -+static int rockchip_edac_probe(struct platform_device *pdev) ++static long dma_buf_ioctl(struct file *file, ++ unsigned int cmd, unsigned long arg) +{ -+ struct mem_ctl_info *mci; -+ struct edac_mc_layer layers[2]; -+ struct rk_edac_priv *priv; ++ struct dma_buf *dmabuf; ++ struct dma_buf_sync sync; ++ enum dma_data_direction direction; + int ret; + -+ opstate_init_int(); -+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; -+ layers[0].size = MAX_CS; -+ layers[0].is_virt_csrow = true; -+ layers[1].type = EDAC_MC_LAYER_CHANNEL; -+ layers[1].size = MAX_CH; -+ layers[1].is_virt_csrow = false; -+ -+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, -+ sizeof(struct rk_edac_priv)); -+ if (!mci) { -+ edac_printk(KERN_ERR, EDAC_MC, -+ "Failed memory allocation for mc instance\n"); -+ return -ENOMEM; -+ } ++ dmabuf = file->private_data; + -+ priv = mci->pvt_info; -+ priv->name = "rk_edac_ecc"; -+ ret = rockchip_edac_mc_init(mci, pdev); -+ if (ret) { -+ edac_printk(KERN_ERR, EDAC_MC, -+ "Failed to initialize instance\n"); -+ goto free_edac_mc; -+ } ++ switch (cmd) { ++ case DMA_BUF_IOCTL_SYNC: ++ if (copy_from_user(&sync, (void __user *) arg, sizeof(sync))) ++ return -EFAULT; + -+ ret = edac_mc_add_mc(mci); -+ if (ret) { -+ edac_printk(KERN_ERR, EDAC_MC, -+ "Failed edac_mc_add_mc()\n"); -+ goto free_edac_mc; -+ } ++ if (sync.flags & ~DMA_BUF_SYNC_VALID_FLAGS_MASK) ++ return -EINVAL; + -+ if (edac_op_state == EDAC_OPSTATE_INT) { -+ /* register interrupts */ -+ priv->irq_ce = platform_get_irq_byname(pdev, "ce"); -+ ret = devm_request_irq(&pdev->dev, priv->irq_ce, -+ rockchip_edac_mc_ce_isr, -+ 0, -+ "[EDAC] MC err", mci); -+ if (ret < 0) { -+ edac_printk(KERN_ERR, EDAC_MC, -+ "%s: Unable to request ce irq %d for RK EDAC\n", -+ __func__, priv->irq_ce); -+ goto del_mc; ++ switch (sync.flags & DMA_BUF_SYNC_RW) { ++ case DMA_BUF_SYNC_READ: ++ direction = DMA_FROM_DEVICE; ++ break; ++ case DMA_BUF_SYNC_WRITE: ++ direction = DMA_TO_DEVICE; ++ break; ++ case DMA_BUF_SYNC_RW: ++ direction = DMA_BIDIRECTIONAL; ++ break; ++ default: ++ return -EINVAL; + } + -+ edac_printk(KERN_INFO, EDAC_MC, -+ "acquired ce irq %d for MC\n", -+ priv->irq_ce); -+ -+ priv->irq_ue = platform_get_irq_byname(pdev, "ue"); -+ ret = devm_request_irq(&pdev->dev, priv->irq_ue, -+ rockchip_edac_mc_ue_isr, -+ 0, -+ "[EDAC] MC err", mci); -+ if (ret < 0) { -+ edac_printk(KERN_ERR, EDAC_MC, -+ "%s: Unable to request ue irq %d for RK EDAC\n", -+ __func__, priv->irq_ue); -+ goto del_mc; -+ } ++ if (sync.flags & DMA_BUF_SYNC_END) ++ ret = dma_buf_end_cpu_access(dmabuf, direction); ++ else ++ ret = dma_buf_begin_cpu_access(dmabuf, direction); + -+ edac_printk(KERN_INFO, EDAC_MC, -+ "acquired ue irq %d for MC\n", -+ priv->irq_ue); -+ } ++ return ret; + -+ return 0; ++ case DMA_BUF_SET_NAME_A: ++ case DMA_BUF_SET_NAME_B: ++ return dma_buf_set_name_user(dmabuf, (const char __user *)arg); + -+del_mc: -+ edac_mc_del_mc(&pdev->dev); -+free_edac_mc: -+ edac_mc_free(mci); ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ case DMA_BUF_IOCTL_EXPORT_SYNC_FILE: ++ return dma_buf_export_sync_file(dmabuf, (void __user *)arg); ++ case DMA_BUF_IOCTL_IMPORT_SYNC_FILE: ++ return dma_buf_import_sync_file(dmabuf, (const void __user *)arg); ++#endif + -+ return -ENODEV; -+} ++#ifdef CONFIG_DMABUF_PARTIAL ++ case DMA_BUF_IOCTL_SYNC_PARTIAL: { ++ struct dma_buf_sync_partial sync_p; + -+static int rockchip_edac_remove(struct platform_device *pdev) -+{ -+ struct mem_ctl_info *mci = dev_get_drvdata(&pdev->dev); ++ if (copy_from_user(&sync_p, (void __user *) arg, sizeof(sync_p))) ++ return -EFAULT; + -+ edac_mc_del_mc(&pdev->dev); -+ edac_mc_free(mci); ++ if (sync_p.len == 0) ++ return 0; + -+ return 0; -+} ++ if (sync_p.len > dmabuf->size || sync_p.offset > dmabuf->size - sync_p.len) ++ return -EINVAL; + -+static const struct of_device_id rk_ddr_mc_err_of_match[] = { -+ { .compatible = "rockchip,rk3568-edac", }, -+ {}, -+}; -+MODULE_DEVICE_TABLE(of, rk_ddr_mc_err_of_match); ++ if (sync_p.flags & ~DMA_BUF_SYNC_VALID_FLAGS_MASK) ++ return -EINVAL; + -+static struct platform_driver rockchip_edac_driver = { -+ .probe = rockchip_edac_probe, -+ .remove = rockchip_edac_remove, -+ .driver = { -+ .name = "rk_edac", -+ .of_match_table = rk_ddr_mc_err_of_match, -+ }, -+}; -+module_platform_driver(rockchip_edac_driver); ++ switch (sync_p.flags & DMA_BUF_SYNC_RW) { ++ case DMA_BUF_SYNC_READ: ++ direction = DMA_FROM_DEVICE; ++ break; ++ case DMA_BUF_SYNC_WRITE: ++ direction = DMA_TO_DEVICE; ++ break; ++ case DMA_BUF_SYNC_RW: ++ direction = DMA_BIDIRECTIONAL; ++ break; ++ default: ++ return -EINVAL; ++ } + -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("He Zhihuan \n"); -+MODULE_DESCRIPTION("ROCKCHIP EDAC kernel module"); -diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c -index e7f55c021..fc0db7790 100644 ---- a/drivers/extcon/extcon.c -+++ b/drivers/extcon/extcon.c -@@ -52,6 +52,11 @@ static const struct __extcon_info { - .id = EXTCON_USB_HOST, - .name = "USB-HOST", - }, -+ [EXTCON_USB_VBUS_EN] = { -+ .type = EXTCON_TYPE_USB, -+ .id = EXTCON_USB_VBUS_EN, -+ .name = "USB_VBUS_EN", -+ }, - - /* Charging external connector */ - [EXTCON_CHG_USB_SDP] = { -diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig -index f0e9f2506..386c85da3 100644 ---- a/drivers/firmware/Kconfig -+++ b/drivers/firmware/Kconfig -@@ -226,6 +226,13 @@ config QCOM_SCM_DOWNLOAD_MODE_DEFAULT - - Say Y here to enable "download mode" by default. - -+config ROCKCHIP_SIP -+ tristate "Rockchip SIP interface" -+ depends on HAVE_ARM_SMCCC && ARCH_ROCKCHIP -+ help -+ Say Y here if you want to enable SIP callbacks for Rockchip platforms -+ This option enables support for communicating with the ATF. ++ if (sync_p.flags & DMA_BUF_SYNC_END) ++ ret = dma_buf_end_cpu_access_partial(dmabuf, direction, ++ sync_p.offset, ++ sync_p.len); ++ else ++ ret = dma_buf_begin_cpu_access_partial(dmabuf, direction, ++ sync_p.offset, ++ sync_p.len); + - config SYSFB - bool - select BOOT_VESA_SUPPORT -diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile -index 28fcddcd6..f91d48e6e 100644 ---- a/drivers/firmware/Makefile -+++ b/drivers/firmware/Makefile -@@ -17,6 +17,7 @@ obj-$(CONFIG_ISCSI_IBFT) += iscsi_ibft.o - obj-$(CONFIG_FIRMWARE_MEMMAP) += memmap.o - obj-$(CONFIG_MTK_ADSP_IPC) += mtk-adsp-ipc.o - obj-$(CONFIG_RASPBERRYPI_FIRMWARE) += raspberrypi.o -+obj-$(CONFIG_ROCKCHIP_SIP) += rockchip_sip.o - obj-$(CONFIG_FW_CFG_SYSFS) += qemu_fw_cfg.o - obj-$(CONFIG_QCOM_SCM) += qcom-scm.o - qcom-scm-objs += qcom_scm.o qcom_scm-smc.o qcom_scm-legacy.o -diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c -index 3e8051fe8..b536a66cf 100644 ---- a/drivers/firmware/arm_sdei.c -+++ b/drivers/firmware/arm_sdei.c -@@ -1114,3 +1114,83 @@ void sdei_handler_abort(void) - __this_cpu_write(sdei_active_normal_event, NULL); - } - } ++ return ret; ++ } ++#endif /* CONFIG_DMABUF_PARTIAL */ + -+#ifdef CONFIG_FIQ_DEBUGGER_TRUST_ZONE -+int sdei_event_enable_nolock(u32 event_num) -+{ -+ return sdei_api_event_enable(event_num); ++ default: ++ return -ENOTTY; ++ } +} + -+int sdei_event_disable_nolock(u32 event_num) ++static void dma_buf_show_fdinfo(struct seq_file *m, struct file *file) +{ -+ return sdei_api_event_disable(event_num); ++ struct dma_buf *dmabuf = file->private_data; ++ ++ seq_printf(m, "size:\t%zu\n", dmabuf->size); ++ /* Don't count the temporary reference taken inside procfs seq_show */ ++ seq_printf(m, "count:\t%ld\n", file_count(dmabuf->file) - 1); ++ seq_printf(m, "exp_name:\t%s\n", dmabuf->exp_name); ++ spin_lock(&dmabuf->name_lock); ++ if (dmabuf->name) ++ seq_printf(m, "name:\t%s\n", dmabuf->name); ++ spin_unlock(&dmabuf->name_lock); +} + -+int sdei_event_routing_set_nolock(u32 event_num, unsigned long flags, -+ unsigned long affinity) ++static const struct file_operations dma_buf_fops = { ++ .release = dma_buf_file_release, ++ .mmap = dma_buf_mmap_internal, ++ .llseek = dma_buf_llseek, ++ .poll = dma_buf_poll, ++ .unlocked_ioctl = dma_buf_ioctl, ++ .compat_ioctl = compat_ptr_ioctl, ++ .show_fdinfo = dma_buf_show_fdinfo, ++}; ++ ++/* ++ * is_dma_buf_file - Check if struct file* is associated with dma_buf ++ */ ++static inline int is_dma_buf_file(struct file *file) +{ -+ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_ROUTING_SET, event_num, -+ (unsigned long)flags, (unsigned long)affinity, -+ 0, 0, 0); ++ return file->f_op == &dma_buf_fops; +} + -+int sdei_event_routing_set(u32 event_num, unsigned long flags, -+ unsigned long affinity) ++static struct file *dma_buf_getfile(size_t size, int flags) +{ -+ int err = -EINVAL; -+ struct sdei_event *event; ++ static atomic64_t dmabuf_inode = ATOMIC64_INIT(0); ++ struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb); ++ struct file *file; + -+ mutex_lock(&sdei_events_lock); -+ event = sdei_event_find(event_num); -+ if (!event) { -+ mutex_unlock(&sdei_events_lock); -+ return -ENOENT; -+ } ++ if (IS_ERR(inode)) ++ return ERR_CAST(inode); + -+ err = sdei_event_routing_set_nolock(event_num, flags, affinity); -+ mutex_unlock(&sdei_events_lock); ++ inode->i_size = size; ++ inode_set_bytes(inode, size); + -+ return err; ++ /* ++ * The ->i_ino acquired from get_next_ino() is not unique thus ++ * not suitable for using it as dentry name by dmabuf stats. ++ * Override ->i_ino with the unique and dmabuffs specific ++ * value. ++ */ ++ inode->i_ino = atomic64_add_return(1, &dmabuf_inode); ++ flags &= O_ACCMODE | O_NONBLOCK; ++ file = alloc_file_pseudo(inode, dma_buf_mnt, "dmabuf", ++ flags, &dma_buf_fops); ++ if (IS_ERR(file)) ++ goto err_alloc_file; ++ ++ return file; ++ ++err_alloc_file: ++ iput(inode); ++ return file; +} + -+static int sdei_api_interrupt_bind(u32 intr_num, u64 *result) ++static void dma_buf_set_default_name(struct dma_buf *dmabuf) +{ -+ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_INTERRUPT_BIND, intr_num, 0, 0, 0, -+ 0, result); ++ char task_comm[TASK_COMM_LEN]; ++ char *name; ++ ++ get_task_comm(task_comm, current->group_leader); ++ name = kasprintf(GFP_KERNEL, "%d-%s", current->tgid, task_comm); ++ dma_buf_set_name(dmabuf, name); ++ kfree(name); +} + -+int sdei_interrupt_bind(u32 intr_num, u32 *event_num) ++/** ++ * DOC: dma buf device access ++ * ++ * For device DMA access to a shared DMA buffer the usual sequence of operations ++ * is fairly simple: ++ * ++ * 1. The exporter defines his exporter instance using ++ * DEFINE_DMA_BUF_EXPORT_INFO() and calls dma_buf_export() to wrap a private ++ * buffer object into a &dma_buf. It then exports that &dma_buf to userspace ++ * as a file descriptor by calling dma_buf_fd(). ++ * ++ * 2. Userspace passes this file-descriptors to all drivers it wants this buffer ++ * to share with: First the file descriptor is converted to a &dma_buf using ++ * dma_buf_get(). Then the buffer is attached to the device using ++ * dma_buf_attach(). ++ * ++ * Up to this stage the exporter is still free to migrate or reallocate the ++ * backing storage. ++ * ++ * 3. Once the buffer is attached to all devices userspace can initiate DMA ++ * access to the shared buffer. In the kernel this is done by calling ++ * dma_buf_map_attachment() and dma_buf_unmap_attachment(). ++ * ++ * 4. Once a driver is done with a shared buffer it needs to call ++ * dma_buf_detach() (after cleaning up any mappings) and then release the ++ * reference acquired with dma_buf_get() by calling dma_buf_put(). ++ * ++ * For the detailed semantics exporters are expected to implement see ++ * &dma_buf_ops. ++ */ ++ ++/** ++ * dma_buf_export - Creates a new dma_buf, and associates an anon file ++ * with this buffer, so it can be exported. ++ * Also connect the allocator specific data and ops to the buffer. ++ * Additionally, provide a name string for exporter; useful in debugging. ++ * ++ * @exp_info: [in] holds all the export related information provided ++ * by the exporter. see &struct dma_buf_export_info ++ * for further details. ++ * ++ * Returns, on success, a newly created struct dma_buf object, which wraps the ++ * supplied private data and operations for struct dma_buf_ops. On either ++ * missing ops, or error in allocating struct dma_buf, will return negative ++ * error. ++ * ++ * For most cases the easiest way to create @exp_info is through the ++ * %DEFINE_DMA_BUF_EXPORT_INFO macro. ++ */ ++struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) +{ -+ int err; -+ u64 result; ++ struct dma_buf *dmabuf; ++ struct dma_resv *resv = exp_info->resv; ++ struct file *file; ++ size_t alloc_size = sizeof(struct dma_buf); ++ int ret; + -+ err = sdei_api_interrupt_bind(intr_num, &result); -+ if (!err) -+ *event_num = (u32)result; ++ if (WARN_ON(!exp_info->priv || !exp_info->ops ++ || !exp_info->ops->map_dma_buf ++ || !exp_info->ops->unmap_dma_buf ++ || !exp_info->ops->release)) ++ return ERR_PTR(-EINVAL); + -+ return err; -+} ++ if (WARN_ON(exp_info->ops->cache_sgt_mapping && ++ (exp_info->ops->pin || exp_info->ops->unpin))) ++ return ERR_PTR(-EINVAL); + -+static int sdei_api_interrupt_release(u32 event_num) -+{ -+ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_INTERRUPT_RELEASE, event_num, 0, 0, 0, -+ 0, NULL); -+} ++ if (WARN_ON(!exp_info->ops->pin != !exp_info->ops->unpin)) ++ return ERR_PTR(-EINVAL); + -+int sdei_interrupt_release(u32 event_num) -+{ -+ struct sdei_event *event; ++ if (!try_module_get(exp_info->owner)) ++ return ERR_PTR(-ENOENT); + -+ mutex_lock(&sdei_events_lock); -+ event = sdei_event_find(event_num); -+ mutex_unlock(&sdei_events_lock); ++ file = dma_buf_getfile(exp_info->size, exp_info->flags); ++ if (IS_ERR(file)) { ++ ret = PTR_ERR(file); ++ goto err_module; ++ } + -+ if (event) { -+ pr_err("%s: need unregister event:%d before release\n", -+ __func__, event_num); -+ return SDEI_DENIED; ++ if (!exp_info->resv) ++ alloc_size += sizeof(struct dma_resv); ++ else ++ /* prevent &dma_buf[1] == dma_buf->resv */ ++ alloc_size += 1; ++ dmabuf = kzalloc(alloc_size, GFP_KERNEL); ++ if (!dmabuf) { ++ ret = -ENOMEM; ++ goto err_file; + } + -+ return sdei_api_interrupt_release(event_num); -+} ++ dmabuf->priv = exp_info->priv; ++ dmabuf->ops = exp_info->ops; ++ dmabuf->size = exp_info->size; ++ dmabuf->exp_name = exp_info->exp_name; ++ dmabuf->owner = exp_info->owner; ++ spin_lock_init(&dmabuf->name_lock); ++#ifdef CONFIG_DMABUF_CACHE ++ mutex_init(&dmabuf->cache_lock); +#endif -diff --git a/drivers/firmware/rockchip_sip.c b/drivers/firmware/rockchip_sip.c -new file mode 100644 -index 000000000..e483899d7 ---- /dev/null -+++ b/drivers/firmware/rockchip_sip.c -@@ -0,0 +1,715 @@ -+/* -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * Copyright (C) 2016, Fuzhou Rockchip Electronics Co., Ltd -+ */ ++ init_waitqueue_head(&dmabuf->poll); ++ dmabuf->cb_in.poll = dmabuf->cb_out.poll = &dmabuf->poll; ++ dmabuf->cb_in.active = dmabuf->cb_out.active = 0; ++ mutex_init(&dmabuf->lock); ++ INIT_LIST_HEAD(&dmabuf->attachments); + -+#include -+#include -+#include -+#include -+#include -+#ifdef CONFIG_ARM -+#include -+#endif -+#include -+#include -+#include -+#include -+#include -+#include ++ if (!resv) { ++ dmabuf->resv = (struct dma_resv *)&dmabuf[1]; ++ dma_resv_init(dmabuf->resv); ++ } else { ++ dmabuf->resv = resv; ++ } + -+#ifdef CONFIG_64BIT -+#define PSCI_FN_NATIVE(version, name) PSCI_##version##_FN64_##name -+#else -+#define PSCI_FN_NATIVE(version, name) PSCI_##version##_FN_##name ++ ret = dma_buf_stats_setup(dmabuf, file); ++ if (ret) ++ goto err_dmabuf; ++ ++ file->private_data = dmabuf; ++ file->f_path.dentry->d_fsdata = dmabuf; ++ dmabuf->file = file; ++ ++ mutex_lock(&db_list.lock); ++ list_add(&dmabuf->list_node, &db_list.head); ++#if IS_ENABLED(CONFIG_RK_DMABUF_DEBUG) ++ db_total_size += dmabuf->size; ++ db_peak_size = max(db_total_size, db_peak_size); +#endif ++ mutex_unlock(&db_list.lock); + -+#define SIZE_PAGE(n) ((n) << 12) ++ if (IS_ENABLED(CONFIG_RK_DMABUF_DEBUG)) ++ dma_buf_set_default_name(dmabuf); + -+static struct arm_smccc_res __invoke_sip_fn_smc(unsigned long function_id, -+ unsigned long arg0, -+ unsigned long arg1, -+ unsigned long arg2) -+{ -+ struct arm_smccc_res res; ++ init_dma_buf_task_info(dmabuf); ++ return dmabuf; + -+ arm_smccc_smc(function_id, arg0, arg1, arg2, 0, 0, 0, 0, &res); -+ return res; ++err_dmabuf: ++ if (!resv) ++ dma_resv_fini(dmabuf->resv); ++ kfree(dmabuf); ++err_file: ++ fput(file); ++err_module: ++ module_put(exp_info->owner); ++ return ERR_PTR(ret); +} ++EXPORT_SYMBOL_NS_GPL(dma_buf_export, DMA_BUF); + -+struct arm_smccc_res sip_smc_dram(u32 arg0, u32 arg1, u32 arg2) ++/** ++ * dma_buf_fd - returns a file descriptor for the given struct dma_buf ++ * @dmabuf: [in] pointer to dma_buf for which fd is required. ++ * @flags: [in] flags to give to fd ++ * ++ * On success, returns an associated 'fd'. Else, returns error. ++ */ ++int dma_buf_fd(struct dma_buf *dmabuf, int flags) +{ -+ return __invoke_sip_fn_smc(SIP_DRAM_CONFIG, arg0, arg1, arg2); -+} -+EXPORT_SYMBOL_GPL(sip_smc_dram); ++ int fd; + -+struct arm_smccc_res sip_smc_get_atf_version(void) -+{ -+ return __invoke_sip_fn_smc(SIP_ATF_VERSION, 0, 0, 0); -+} -+EXPORT_SYMBOL_GPL(sip_smc_get_atf_version); ++ if (!dmabuf || !dmabuf->file) ++ return -EINVAL; + -+struct arm_smccc_res sip_smc_get_sip_version(void) -+{ -+ return __invoke_sip_fn_smc(SIP_SIP_VERSION, 0, 0, 0); -+} -+EXPORT_SYMBOL_GPL(sip_smc_get_sip_version); ++ fd = get_unused_fd_flags(flags); ++ if (fd < 0) ++ return fd; + -+int sip_smc_set_suspend_mode(u32 ctrl, u32 config1, u32 config2) -+{ -+ struct arm_smccc_res res; ++ fd_install(fd, dmabuf->file); + -+ res = __invoke_sip_fn_smc(SIP_SUSPEND_MODE, ctrl, config1, config2); -+ return res.a0; ++ return fd; +} -+EXPORT_SYMBOL_GPL(sip_smc_set_suspend_mode); ++EXPORT_SYMBOL_NS_GPL(dma_buf_fd, DMA_BUF); + -+struct arm_smccc_res sip_smc_get_suspend_info(u32 info) ++/** ++ * dma_buf_get - returns the struct dma_buf related to an fd ++ * @fd: [in] fd associated with the struct dma_buf to be returned ++ * ++ * On success, returns the struct dma_buf associated with an fd; uses ++ * file's refcounting done by fget to increase refcount. returns ERR_PTR ++ * otherwise. ++ */ ++struct dma_buf *dma_buf_get(int fd) +{ -+ struct arm_smccc_res res; ++ struct file *file; + -+ res = __invoke_sip_fn_smc(SIP_SUSPEND_MODE, info, 0, 0); -+ return res; -+} -+EXPORT_SYMBOL_GPL(sip_smc_get_suspend_info); ++ file = fget(fd); + -+int sip_smc_virtual_poweroff(void) -+{ -+ struct arm_smccc_res res; ++ if (!file) ++ return ERR_PTR(-EBADF); + -+ res = __invoke_sip_fn_smc(PSCI_FN_NATIVE(1_0, SYSTEM_SUSPEND), 0, 0, 0); -+ return res.a0; ++ if (!is_dma_buf_file(file)) { ++ fput(file); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ return file->private_data; +} -+EXPORT_SYMBOL_GPL(sip_smc_virtual_poweroff); ++EXPORT_SYMBOL_NS_GPL(dma_buf_get, DMA_BUF); + -+int sip_smc_remotectl_config(u32 func, u32 data) ++/** ++ * dma_buf_put - decreases refcount of the buffer ++ * @dmabuf: [in] buffer to reduce refcount of ++ * ++ * Uses file's refcounting done implicitly by fput(). ++ * ++ * If, as a result of this call, the refcount becomes 0, the 'release' file ++ * operation related to this fd is called. It calls &dma_buf_ops.release vfunc ++ * in turn, and frees the memory allocated for dmabuf when exported. ++ */ ++void dma_buf_put(struct dma_buf *dmabuf) +{ -+ struct arm_smccc_res res; -+ -+ res = __invoke_sip_fn_smc(SIP_REMOTECTL_CFG, func, data, 0); ++ if (WARN_ON(!dmabuf || !dmabuf->file)) ++ return; + -+ return res.a0; ++ fput(dmabuf->file); +} -+EXPORT_SYMBOL_GPL(sip_smc_remotectl_config); ++EXPORT_SYMBOL_NS_GPL(dma_buf_put, DMA_BUF); + -+u32 sip_smc_secure_reg_read(u32 addr_phy) ++static void mangle_sg_table(struct sg_table *sg_table) +{ -+ struct arm_smccc_res res; ++#ifdef CONFIG_DMABUF_DEBUG ++ int i; ++ struct scatterlist *sg; + -+ res = __invoke_sip_fn_smc(SIP_ACCESS_REG, 0, addr_phy, SECURE_REG_RD); -+ if (res.a0) -+ pr_err("%s error: %d, addr phy: 0x%x\n", -+ __func__, (int)res.a0, addr_phy); ++ /* To catch abuse of the underlying struct page by importers mix ++ * up the bits, but take care to preserve the low SG_ bits to ++ * not corrupt the sgt. The mixing is undone in __unmap_dma_buf ++ * before passing the sgt back to the exporter. */ ++ for_each_sgtable_sg(sg_table, sg, i) ++ sg->page_link ^= ~0xffUL; ++#endif + -+ return res.a1; +} -+EXPORT_SYMBOL_GPL(sip_smc_secure_reg_read); -+ -+int sip_smc_secure_reg_write(u32 addr_phy, u32 val) ++static struct sg_table * __map_dma_buf(struct dma_buf_attachment *attach, ++ enum dma_data_direction direction) +{ -+ struct arm_smccc_res res; ++ struct sg_table *sg_table; ++ signed long ret; + -+ res = __invoke_sip_fn_smc(SIP_ACCESS_REG, val, addr_phy, SECURE_REG_WR); -+ if (res.a0) -+ pr_err("%s error: %d, addr phy: 0x%x\n", -+ __func__, (int)res.a0, addr_phy); ++ sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction); ++ if (IS_ERR_OR_NULL(sg_table)) ++ return sg_table; + -+ return res.a0; ++ if (!dma_buf_attachment_is_dynamic(attach)) { ++ ret = dma_resv_wait_timeout(attach->dmabuf->resv, ++ DMA_RESV_USAGE_KERNEL, true, ++ MAX_SCHEDULE_TIMEOUT); ++ if (ret < 0) { ++ attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, ++ direction); ++ return ERR_PTR(ret); ++ } ++ } ++ ++ mangle_sg_table(sg_table); ++ return sg_table; +} -+EXPORT_SYMBOL_GPL(sip_smc_secure_reg_write); + -+static void *sip_map(phys_addr_t start, size_t size) ++/** ++ * DOC: locking convention ++ * ++ * In order to avoid deadlock situations between dma-buf exports and importers, ++ * all dma-buf API users must follow the common dma-buf locking convention. ++ * ++ * Convention for importers ++ * ++ * 1. Importers must hold the dma-buf reservation lock when calling these ++ * functions: ++ * ++ * - dma_buf_pin() ++ * - dma_buf_unpin() ++ * - dma_buf_map_attachment() ++ * - dma_buf_unmap_attachment() ++ * - dma_buf_vmap() ++ * - dma_buf_vunmap() ++ * ++ * 2. Importers must not hold the dma-buf reservation lock when calling these ++ * functions: ++ * ++ * - dma_buf_attach() ++ * - dma_buf_dynamic_attach() ++ * - dma_buf_detach() ++ * - dma_buf_export() ++ * - dma_buf_fd() ++ * - dma_buf_get() ++ * - dma_buf_put() ++ * - dma_buf_mmap() ++ * - dma_buf_begin_cpu_access() ++ * - dma_buf_end_cpu_access() ++ * - dma_buf_map_attachment_unlocked() ++ * - dma_buf_unmap_attachment_unlocked() ++ * - dma_buf_vmap_unlocked() ++ * - dma_buf_vunmap_unlocked() ++ * ++ * Convention for exporters ++ * ++ * 1. These &dma_buf_ops callbacks are invoked with unlocked dma-buf ++ * reservation and exporter can take the lock: ++ * ++ * - &dma_buf_ops.attach() ++ * - &dma_buf_ops.detach() ++ * - &dma_buf_ops.release() ++ * - &dma_buf_ops.begin_cpu_access() ++ * - &dma_buf_ops.end_cpu_access() ++ * - &dma_buf_ops.mmap() ++ * ++ * 2. These &dma_buf_ops callbacks are invoked with locked dma-buf ++ * reservation and exporter can't take the lock: ++ * ++ * - &dma_buf_ops.pin() ++ * - &dma_buf_ops.unpin() ++ * - &dma_buf_ops.map_dma_buf() ++ * - &dma_buf_ops.unmap_dma_buf() ++ * - &dma_buf_ops.vmap() ++ * - &dma_buf_ops.vunmap() ++ * ++ * 3. Exporters must hold the dma-buf reservation lock when calling these ++ * functions: ++ * ++ * - dma_buf_move_notify() ++ */ ++ ++/** ++ * dma_buf_dynamic_attach - Add the device to dma_buf's attachments list ++ * @dmabuf: [in] buffer to attach device to. ++ * @dev: [in] device to be attached. ++ * @importer_ops: [in] importer operations for the attachment ++ * @importer_priv: [in] importer private pointer for the attachment ++ * ++ * Returns struct dma_buf_attachment pointer for this attachment. Attachments ++ * must be cleaned up by calling dma_buf_detach(). ++ * ++ * Optionally this calls &dma_buf_ops.attach to allow device-specific attach ++ * functionality. ++ * ++ * Returns: ++ * ++ * A pointer to newly created &dma_buf_attachment on success, or a negative ++ * error code wrapped into a pointer on failure. ++ * ++ * Note that this can fail if the backing storage of @dmabuf is in a place not ++ * accessible to @dev, and cannot be moved to a more suitable place. This is ++ * indicated with the error code -EBUSY. ++ */ ++struct dma_buf_attachment * ++dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev, ++ const struct dma_buf_attach_ops *importer_ops, ++ void *importer_priv) +{ -+ struct page **pages; -+ phys_addr_t page_start; -+ unsigned int page_count; -+ pgprot_t prot; -+ unsigned int i; -+ void *vaddr; ++ struct dma_buf_attachment *attach; ++ int ret; + -+ if (!pfn_valid(__phys_to_pfn(start))) -+ return ioremap(start, size); ++ if (WARN_ON(!dmabuf || !dev)) ++ return ERR_PTR(-EINVAL); + -+ page_start = start - offset_in_page(start); -+ page_count = DIV_ROUND_UP(size + offset_in_page(start), PAGE_SIZE); ++ if (WARN_ON(importer_ops && !importer_ops->move_notify)) ++ return ERR_PTR(-EINVAL); + -+ prot = pgprot_noncached(PAGE_KERNEL); ++ attach = kzalloc(sizeof(*attach), GFP_KERNEL); ++ if (!attach) ++ return ERR_PTR(-ENOMEM); + -+ pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); -+ if (!pages) { -+ pr_err("%s: Failed to allocate array for %u pages\n", -+ __func__, page_count); -+ return NULL; ++ attach->dev = dev; ++ attach->dmabuf = dmabuf; ++ if (importer_ops) ++ attach->peer2peer = importer_ops->allow_peer2peer; ++ attach->importer_ops = importer_ops; ++ attach->importer_priv = importer_priv; ++ ++ if (dmabuf->ops->attach) { ++ ret = dmabuf->ops->attach(dmabuf, attach); ++ if (ret) ++ goto err_attach; + } ++ dma_resv_lock(dmabuf->resv, NULL); ++ list_add(&attach->node, &dmabuf->attachments); ++ dma_resv_unlock(dmabuf->resv); + -+ for (i = 0; i < page_count; i++) -+ pages[i] = phys_to_page(page_start + i * PAGE_SIZE); ++ /* When either the importer or the exporter can't handle dynamic ++ * mappings we cache the mapping here to avoid issues with the ++ * reservation object lock. ++ */ ++ if (dma_buf_attachment_is_dynamic(attach) != ++ dma_buf_is_dynamic(dmabuf)) { ++ struct sg_table *sgt; + -+ vaddr = vmap(pages, page_count, VM_MAP, prot); -+ kfree(pages); ++ if (dma_buf_is_dynamic(attach->dmabuf)) { ++ dma_resv_lock(attach->dmabuf->resv, NULL); ++ ret = dmabuf->ops->pin(attach); ++ if (ret) ++ goto err_unlock; ++ } + -+ /* -+ * Since vmap() uses page granularity, we must add the offset -+ * into the page here, to get the byte granularity address -+ * into the mapping to represent the actual "start" location. -+ */ -+ return vaddr + offset_in_page(start); -+} ++ sgt = __map_dma_buf(attach, DMA_BIDIRECTIONAL); ++ if (!sgt) ++ sgt = ERR_PTR(-ENOMEM); ++ if (IS_ERR(sgt)) { ++ ret = PTR_ERR(sgt); ++ goto err_unpin; ++ } ++ if (dma_buf_is_dynamic(attach->dmabuf)) ++ dma_resv_unlock(attach->dmabuf->resv); ++ attach->sgt = sgt; ++ attach->dir = DMA_BIDIRECTIONAL; ++ } + -+struct arm_smccc_res sip_smc_request_share_mem(u32 page_num, -+ share_page_type_t page_type) -+{ -+ struct arm_smccc_res res; -+ unsigned long share_mem_phy; ++ return attach; + -+ res = __invoke_sip_fn_smc(SIP_SHARE_MEM, page_num, page_type, 0); -+ if (IS_SIP_ERROR(res.a0)) -+ goto error; ++err_attach: ++ kfree(attach); ++ return ERR_PTR(ret); + -+ share_mem_phy = res.a1; -+ res.a1 = (unsigned long)sip_map(share_mem_phy, SIZE_PAGE(page_num)); ++err_unpin: ++ if (dma_buf_is_dynamic(attach->dmabuf)) ++ dmabuf->ops->unpin(attach); + -+error: -+ return res; -+} -+EXPORT_SYMBOL_GPL(sip_smc_request_share_mem); ++err_unlock: ++ if (dma_buf_is_dynamic(attach->dmabuf)) ++ dma_resv_unlock(attach->dmabuf->resv); + -+struct arm_smccc_res sip_smc_mcu_el3fiq(u32 arg0, u32 arg1, u32 arg2) -+{ -+ return __invoke_sip_fn_smc(SIP_MCU_EL3FIQ_CFG, arg0, arg1, arg2); ++ dma_buf_detach(dmabuf, attach); ++ return ERR_PTR(ret); +} -+EXPORT_SYMBOL_GPL(sip_smc_mcu_el3fiq); ++EXPORT_SYMBOL_NS_GPL(dma_buf_dynamic_attach, DMA_BUF); + -+struct arm_smccc_res sip_smc_vpu_reset(u32 arg0, u32 arg1, u32 arg2) ++/** ++ * dma_buf_attach - Wrapper for dma_buf_dynamic_attach ++ * @dmabuf: [in] buffer to attach device to. ++ * @dev: [in] device to be attached. ++ * ++ * Wrapper to call dma_buf_dynamic_attach() for drivers which still use a static ++ * mapping. ++ */ ++struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, ++ struct device *dev) +{ -+ struct arm_smccc_res res; -+ -+ res = __invoke_sip_fn_smc(PSCI_SIP_VPU_RESET, arg0, arg1, arg2); -+ return res; ++ return dma_buf_dynamic_attach(dmabuf, dev, NULL, NULL); +} -+EXPORT_SYMBOL_GPL(sip_smc_vpu_reset); ++EXPORT_SYMBOL_NS_GPL(dma_buf_attach, DMA_BUF); + -+struct arm_smccc_res sip_smc_bus_config(u32 arg0, u32 arg1, u32 arg2) ++static void __unmap_dma_buf(struct dma_buf_attachment *attach, ++ struct sg_table *sg_table, ++ enum dma_data_direction direction) +{ -+ struct arm_smccc_res res; ++ /* uses XOR, hence this unmangles */ ++ mangle_sg_table(sg_table); + -+ res = __invoke_sip_fn_smc(SIP_BUS_CFG, arg0, arg1, arg2); -+ return res; ++ attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction); +} -+EXPORT_SYMBOL_GPL(sip_smc_bus_config); + -+struct dram_addrmap_info *sip_smc_get_dram_map(void) ++/** ++ * dma_buf_detach - Remove the given attachment from dmabuf's attachments list ++ * @dmabuf: [in] buffer to detach from. ++ * @attach: [in] attachment to be detached; is free'd after this call. ++ * ++ * Clean up a device attachment obtained by calling dma_buf_attach(). ++ * ++ * Optionally this calls &dma_buf_ops.detach for device-specific detach. ++ */ ++void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) +{ -+ struct arm_smccc_res res; -+ static struct dram_addrmap_info *map; -+ -+ if (map) -+ return map; ++ if (WARN_ON(!dmabuf || !attach)) ++ return; + -+ /* Request share memory size 4KB */ -+ res = sip_smc_request_share_mem(1, SHARE_PAGE_TYPE_DDR_ADDRMAP); -+ if (res.a0 != 0) { -+ pr_err("no ATF memory for init\n"); -+ return NULL; -+ } ++ if (attach->sgt) { ++ if (dma_buf_is_dynamic(attach->dmabuf)) ++ dma_resv_lock(attach->dmabuf->resv, NULL); + -+ map = (struct dram_addrmap_info *)res.a1; ++ __unmap_dma_buf(attach, attach->sgt, attach->dir); + -+ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR_ADDRMAP, 0, -+ ROCKCHIP_SIP_CONFIG_DRAM_ADDRMAP_GET); -+ if (res.a0) { -+ pr_err("rockchip_sip_config_dram_init error:%lx\n", res.a0); -+ map = NULL; -+ return NULL; ++ if (dma_buf_is_dynamic(attach->dmabuf)) { ++ dmabuf->ops->unpin(attach); ++ dma_resv_unlock(attach->dmabuf->resv); ++ } + } + -+ return map; ++ dma_resv_lock(dmabuf->resv, NULL); ++ list_del(&attach->node); ++ dma_resv_unlock(dmabuf->resv); ++ if (dmabuf->ops->detach) ++ dmabuf->ops->detach(dmabuf, attach); ++ ++ kfree(attach); +} -+EXPORT_SYMBOL_GPL(sip_smc_get_dram_map); ++EXPORT_SYMBOL_NS_GPL(dma_buf_detach, DMA_BUF); + -+struct arm_smccc_res sip_smc_lastlog_request(void) ++/** ++ * dma_buf_pin - Lock down the DMA-buf ++ * @attach: [in] attachment which should be pinned ++ * ++ * Only dynamic importers (who set up @attach with dma_buf_dynamic_attach()) may ++ * call this, and only for limited use cases like scanout and not for temporary ++ * pin operations. It is not permitted to allow userspace to pin arbitrary ++ * amounts of buffers through this interface. ++ * ++ * Buffers must be unpinned by calling dma_buf_unpin(). ++ * ++ * Returns: ++ * 0 on success, negative error code on failure. ++ */ ++int dma_buf_pin(struct dma_buf_attachment *attach) +{ -+ struct arm_smccc_res res; -+ void __iomem *addr1, *addr2; ++ struct dma_buf *dmabuf = attach->dmabuf; ++ int ret = 0; + -+ res = __invoke_sip_fn_smc(SIP_LAST_LOG, local_clock(), 0, 0); -+ if (IS_SIP_ERROR(res.a0)) -+ return res; ++ WARN_ON(!dma_buf_attachment_is_dynamic(attach)); + -+ addr1 = sip_map(res.a1, res.a3); -+ if (!addr1) { -+ pr_err("%s: share memory buffer0 ioremap failed\n", __func__); -+ res.a0 = SIP_RET_INVALID_ADDRESS; -+ return res; -+ } -+ addr2 = sip_map(res.a2, res.a3); -+ if (!addr2) { -+ pr_err("%s: share memory buffer1 ioremap failed\n", __func__); -+ res.a0 = SIP_RET_INVALID_ADDRESS; -+ return res; -+ } ++ dma_resv_assert_held(dmabuf->resv); + -+ res.a1 = (unsigned long)addr1; -+ res.a2 = (unsigned long)addr2; ++ if (dmabuf->ops->pin) ++ ret = dmabuf->ops->pin(attach); + -+ return res; ++ return ret; +} -+EXPORT_SYMBOL_GPL(sip_smc_lastlog_request); ++EXPORT_SYMBOL_NS_GPL(dma_buf_pin, DMA_BUF); + -+int sip_smc_amp_config(u32 sub_func_id, u32 arg1, u32 arg2, u32 arg3) ++/** ++ * dma_buf_unpin - Unpin a DMA-buf ++ * @attach: [in] attachment which should be unpinned ++ * ++ * This unpins a buffer pinned by dma_buf_pin() and allows the exporter to move ++ * any mapping of @attach again and inform the importer through ++ * &dma_buf_attach_ops.move_notify. ++ */ ++void dma_buf_unpin(struct dma_buf_attachment *attach) +{ -+ struct arm_smccc_res res; ++ struct dma_buf *dmabuf = attach->dmabuf; + -+ arm_smccc_smc(RK_SIP_AMP_CFG, sub_func_id, arg1, arg2, arg3, -+ 0, 0, 0, &res); -+ return res.a0; -+} -+EXPORT_SYMBOL_GPL(sip_smc_amp_config); ++ WARN_ON(!dma_buf_attachment_is_dynamic(attach)); + -+struct arm_smccc_res sip_smc_get_amp_info(u32 sub_func_id, u32 arg1) -+{ -+ struct arm_smccc_res res; ++ dma_resv_assert_held(dmabuf->resv); + -+ arm_smccc_smc(RK_SIP_AMP_CFG, sub_func_id, arg1, 0, 0, 0, 0, 0, &res); -+ return res; ++ if (dmabuf->ops->unpin) ++ dmabuf->ops->unpin(attach); +} -+EXPORT_SYMBOL_GPL(sip_smc_get_amp_info); ++EXPORT_SYMBOL_NS_GPL(dma_buf_unpin, DMA_BUF); + -+struct arm_smccc_res sip_smc_get_pvtpll_info(u32 sub_func_id, u32 arg1) ++/** ++ * dma_buf_map_attachment - Returns the scatterlist table of the attachment; ++ * mapped into _device_ address space. Is a wrapper for map_dma_buf() of the ++ * dma_buf_ops. ++ * @attach: [in] attachment whose scatterlist is to be returned ++ * @direction: [in] direction of DMA transfer ++ * ++ * Returns sg_table containing the scatterlist to be returned; returns ERR_PTR ++ * on error. May return -EINTR if it is interrupted by a signal. ++ * ++ * On success, the DMA addresses and lengths in the returned scatterlist are ++ * PAGE_SIZE aligned. ++ * ++ * A mapping must be unmapped by using dma_buf_unmap_attachment(). Note that ++ * the underlying backing storage is pinned for as long as a mapping exists, ++ * therefore users/importers should not hold onto a mapping for undue amounts of ++ * time. ++ * ++ * Important: Dynamic importers must wait for the exclusive fence of the struct ++ * dma_resv attached to the DMA-BUF first. ++ */ ++struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, ++ enum dma_data_direction direction) +{ -+ struct arm_smccc_res res; ++ struct sg_table *sg_table; ++ int r; + -+ /* -+ * res.a0: error code(0: success, !0: error). -+ * res.a1: low temp config flag(0: support, !0: don't support). -+ */ -+ arm_smccc_smc(SIP_PVTPLL_CFG, sub_func_id, arg1, 0, 0, 0, 0, 0, &res); -+ return res; -+} -+EXPORT_SYMBOL_GPL(sip_smc_get_pvtpll_info); ++ might_sleep(); + -+struct arm_smccc_res sip_smc_pvtpll_config(u32 sub_func_id, u32 arg1, u32 arg2, -+ u32 arg3, u32 arg4, u32 arg5, u32 arg6) -+{ -+ struct arm_smccc_res res; ++ if (WARN_ON(!attach || !attach->dmabuf)) ++ return ERR_PTR(-EINVAL); + -+ /* -+ * res.a0: error code(0: success, !0: error). -+ */ -+ arm_smccc_smc(SIP_PVTPLL_CFG, sub_func_id, arg1, arg2, arg3, arg4, arg5, -+ arg6, &res); -+ return res; -+} -+EXPORT_SYMBOL_GPL(sip_smc_pvtpll_config); ++ if (dma_buf_attachment_is_dynamic(attach)) ++ dma_resv_assert_held(attach->dmabuf->resv); + -+void __iomem *sip_hdcp_request_share_memory(int id) -+{ -+ static void __iomem *base; -+ struct arm_smccc_res res; ++ if (attach->sgt) { ++ /* ++ * Two mappings with different directions for the same ++ * attachment are not allowed. ++ */ ++ if (attach->dir != direction && ++ attach->dir != DMA_BIDIRECTIONAL) ++ return ERR_PTR(-EBUSY); + -+ if (id < 0 || id >= MAX_DEVICE) { -+ pr_err("%s: invalid device id\n", __func__); -+ return NULL; ++ return attach->sgt; + } + -+ if (!base) { -+ /* request page share memory */ -+ res = sip_smc_request_share_mem(2, SHARE_PAGE_TYPE_HDCP); -+ if (IS_SIP_ERROR(res.a0)) -+ return NULL; -+ base = (void __iomem *)res.a1; ++ if (dma_buf_is_dynamic(attach->dmabuf)) { ++ dma_resv_assert_held(attach->dmabuf->resv); ++ if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) { ++ r = attach->dmabuf->ops->pin(attach); ++ if (r) ++ return ERR_PTR(r); ++ } + } + -+ return base + id * 1024; -+} -+EXPORT_SYMBOL_GPL(sip_hdcp_request_share_memory); -+ -+struct arm_smccc_res sip_hdcp_config(u32 arg0, u32 arg1, u32 arg2) -+{ -+ struct arm_smccc_res res; ++ sg_table = __map_dma_buf(attach, direction); ++ if (!sg_table) ++ sg_table = ERR_PTR(-ENOMEM); + -+ res = __invoke_sip_fn_smc(SIP_HDCP_CONFIG, arg0, arg1, arg2); -+ return res; -+} -+EXPORT_SYMBOL_GPL(sip_hdcp_config); ++ if (IS_ERR(sg_table) && dma_buf_is_dynamic(attach->dmabuf) && ++ !IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) ++ attach->dmabuf->ops->unpin(attach); + -+/************************** fiq debugger **************************************/ -+/* -+ * AArch32 is not allowed to call SMC64(ATF framework does not support), so we -+ * don't change SIP_UARTDBG_FN to SIP_UARTDBG_CFG64 even when cpu is AArch32 -+ * mode. Let ATF support SIP_UARTDBG_CFG, and we just initialize SIP_UARTDBG_FN -+ * depends on compile option(CONFIG_ARM or CONFIG_ARM64). -+ */ -+#ifdef CONFIG_ARM64 -+#define SIP_UARTDBG_FN SIP_UARTDBG_CFG64 -+#else -+#define SIP_UARTDBG_FN SIP_UARTDBG_CFG -+static int firmware_64_32bit; -+#endif ++ if (!IS_ERR(sg_table) && attach->dmabuf->ops->cache_sgt_mapping) { ++ attach->sgt = sg_table; ++ attach->dir = direction; ++ } + -+static int fiq_sip_enabled; -+static int fiq_target_cpu; -+static phys_addr_t ft_fiq_mem_phy; -+static void __iomem *ft_fiq_mem_base; -+static sip_fiq_debugger_uart_irq_tf_cb_t sip_fiq_debugger_uart_irq_tf; -+static struct pt_regs fiq_pt_regs; ++#ifdef CONFIG_DMA_API_DEBUG ++ if (!IS_ERR(sg_table)) { ++ struct scatterlist *sg; ++ u64 addr; ++ int len; ++ int i; + -+int sip_fiq_debugger_is_enabled(void) -+{ -+ return fiq_sip_enabled; ++ for_each_sgtable_dma_sg(sg_table, sg, i) { ++ addr = sg_dma_address(sg); ++ len = sg_dma_len(sg); ++ if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(len)) { ++ pr_debug("%s: addr %llx or len %x is not page aligned!\n", ++ __func__, addr, len); ++ } ++ } ++ } ++#endif /* CONFIG_DMA_API_DEBUG */ ++ return sg_table; +} -+EXPORT_SYMBOL_GPL(sip_fiq_debugger_is_enabled); ++EXPORT_SYMBOL_NS_GPL(dma_buf_map_attachment, DMA_BUF); + -+static void sip_fiq_debugger_get_pt_regs(void *reg_base, -+ unsigned long sp_el1) ++/** ++ * dma_buf_map_attachment_unlocked - Returns the scatterlist table of the attachment; ++ * mapped into _device_ address space. Is a wrapper for map_dma_buf() of the ++ * dma_buf_ops. ++ * @attach: [in] attachment whose scatterlist is to be returned ++ * @direction: [in] direction of DMA transfer ++ * ++ * Unlocked variant of dma_buf_map_attachment(). ++ */ ++struct sg_table * ++dma_buf_map_attachment_unlocked(struct dma_buf_attachment *attach, ++ enum dma_data_direction direction) +{ -+ __maybe_unused struct sm_nsec_ctx *nsec_ctx = reg_base; -+ __maybe_unused struct gp_regs_ctx *gp_regs = reg_base; ++ struct sg_table *sg_table; + -+#ifdef CONFIG_ARM64 -+ /* -+ * 64-bit ATF + 64-bit kernel -+ */ -+ /* copy cpu context: x0 ~ spsr_el3 */ -+ memcpy(&fiq_pt_regs, reg_base, 8 * 31); ++ might_sleep(); + -+ /* copy pstate: spsr_el3 */ -+ memcpy(&fiq_pt_regs.pstate, reg_base + 0x110, 8); -+ fiq_pt_regs.sp = sp_el1; ++ if (WARN_ON(!attach || !attach->dmabuf)) ++ return ERR_PTR(-EINVAL); + -+ /* copy pc: elr_el3 */ -+ memcpy(&fiq_pt_regs.pc, reg_base + 0x118, 8); -+#else -+ if (firmware_64_32bit == FIRMWARE_ATF_64BIT) { -+ /* -+ * 64-bit ATF + 32-bit kernel -+ */ -+ fiq_pt_regs.ARM_r0 = gp_regs->x0; -+ fiq_pt_regs.ARM_r1 = gp_regs->x1; -+ fiq_pt_regs.ARM_r2 = gp_regs->x2; -+ fiq_pt_regs.ARM_r3 = gp_regs->x3; -+ fiq_pt_regs.ARM_r4 = gp_regs->x4; -+ fiq_pt_regs.ARM_r5 = gp_regs->x5; -+ fiq_pt_regs.ARM_r6 = gp_regs->x6; -+ fiq_pt_regs.ARM_r7 = gp_regs->x7; -+ fiq_pt_regs.ARM_r8 = gp_regs->x8; -+ fiq_pt_regs.ARM_r9 = gp_regs->x9; -+ fiq_pt_regs.ARM_r10 = gp_regs->x10; -+ fiq_pt_regs.ARM_fp = gp_regs->x11; -+ fiq_pt_regs.ARM_ip = gp_regs->x12; -+ fiq_pt_regs.ARM_sp = gp_regs->x19; /* aarch32 svc_r13 */ -+ fiq_pt_regs.ARM_lr = gp_regs->x18; /* aarch32 svc_r14 */ -+ fiq_pt_regs.ARM_cpsr = gp_regs->spsr_el3; -+ fiq_pt_regs.ARM_pc = gp_regs->elr_el3; -+ } else { -+ /* -+ * 32-bit tee firmware + 32-bit kernel -+ */ -+ fiq_pt_regs.ARM_r0 = nsec_ctx->r0; -+ fiq_pt_regs.ARM_r1 = nsec_ctx->r1; -+ fiq_pt_regs.ARM_r2 = nsec_ctx->r2; -+ fiq_pt_regs.ARM_r3 = nsec_ctx->r3; -+ fiq_pt_regs.ARM_r4 = nsec_ctx->r4; -+ fiq_pt_regs.ARM_r5 = nsec_ctx->r5; -+ fiq_pt_regs.ARM_r6 = nsec_ctx->r6; -+ fiq_pt_regs.ARM_r7 = nsec_ctx->r7; -+ fiq_pt_regs.ARM_r8 = nsec_ctx->r8; -+ fiq_pt_regs.ARM_r9 = nsec_ctx->r9; -+ fiq_pt_regs.ARM_r10 = nsec_ctx->r10; -+ fiq_pt_regs.ARM_fp = nsec_ctx->r11; -+ fiq_pt_regs.ARM_ip = nsec_ctx->r12; -+ fiq_pt_regs.ARM_sp = nsec_ctx->svc_sp; -+ fiq_pt_regs.ARM_lr = nsec_ctx->svc_lr; -+ fiq_pt_regs.ARM_cpsr = nsec_ctx->mon_spsr; ++ dma_resv_lock(attach->dmabuf->resv, NULL); ++ sg_table = dma_buf_map_attachment(attach, direction); ++ dma_resv_unlock(attach->dmabuf->resv); + -+ /* -+ * 'nsec_ctx->mon_lr' is not the fiq break point's PC, because it will -+ * be override as 'psci_fiq_debugger_uart_irq_tf_cb' for optee-os to -+ * jump to fiq_debugger handler. -+ * -+ * As 'nsec_ctx->und_lr' is not used for kernel, so optee-os uses it to -+ * deliver fiq break point's PC. -+ * -+ */ -+ fiq_pt_regs.ARM_pc = nsec_ctx->und_lr; -+ } -+#endif ++ return sg_table; +} ++EXPORT_SYMBOL_NS_GPL(dma_buf_map_attachment_unlocked, DMA_BUF); + -+static void sip_fiq_debugger_uart_irq_tf_cb(unsigned long sp_el1, -+ unsigned long offset, -+ unsigned long cpu) ++/** ++ * dma_buf_unmap_attachment - unmaps and decreases usecount of the buffer;might ++ * deallocate the scatterlist associated. Is a wrapper for unmap_dma_buf() of ++ * dma_buf_ops. ++ * @attach: [in] attachment to unmap buffer from ++ * @sg_table: [in] scatterlist info of the buffer to unmap ++ * @direction: [in] direction of DMA transfer ++ * ++ * This unmaps a DMA mapping for @attached obtained by dma_buf_map_attachment(). ++ */ ++void dma_buf_unmap_attachment(struct dma_buf_attachment *attach, ++ struct sg_table *sg_table, ++ enum dma_data_direction direction) +{ -+ char *cpu_context; -+ -+ /* calling fiq handler */ -+ if (ft_fiq_mem_base) { -+ cpu_context = (char *)ft_fiq_mem_base + offset; -+ sip_fiq_debugger_get_pt_regs(cpu_context, sp_el1); -+ sip_fiq_debugger_uart_irq_tf(&fiq_pt_regs, cpu); -+ } -+ -+ /* fiq handler done, return to EL3(then EL3 return to EL1 entry) */ -+ __invoke_sip_fn_smc(SIP_UARTDBG_FN, 0, 0, UARTDBG_CFG_OSHDL_TO_OS); -+} ++ might_sleep(); + -+int sip_fiq_debugger_uart_irq_tf_init(u32 irq_id, sip_fiq_debugger_uart_irq_tf_cb_t callback_fn) -+{ -+ struct arm_smccc_res res; ++ if (WARN_ON(!attach || !attach->dmabuf || !sg_table)) ++ return; + -+ fiq_target_cpu = 0; ++ if (dma_buf_attachment_is_dynamic(attach)) ++ dma_resv_assert_held(attach->dmabuf->resv); + -+ /* init fiq debugger callback */ -+ sip_fiq_debugger_uart_irq_tf = callback_fn; -+ res = __invoke_sip_fn_smc(SIP_UARTDBG_FN, irq_id, -+ (unsigned long)sip_fiq_debugger_uart_irq_tf_cb, -+ UARTDBG_CFG_INIT); -+ if (IS_SIP_ERROR(res.a0)) { -+ pr_err("%s error: %d\n", __func__, (int)res.a0); -+ return res.a0; -+ } ++ if (attach->sgt == sg_table) ++ return; + -+ /* share memory ioremap */ -+ if (!ft_fiq_mem_base) { -+ ft_fiq_mem_phy = res.a1; -+ ft_fiq_mem_base = sip_map(ft_fiq_mem_phy, -+ FIQ_UARTDBG_SHARE_MEM_SIZE); -+ if (!ft_fiq_mem_base) { -+ pr_err("%s: share memory ioremap failed\n", __func__); -+ return -ENOMEM; -+ } -+ } ++ if (dma_buf_is_dynamic(attach->dmabuf)) ++ dma_resv_assert_held(attach->dmabuf->resv); + -+ fiq_sip_enabled = 1; ++ __unmap_dma_buf(attach, sg_table, direction); + -+ return SIP_RET_SUCCESS; ++ if (dma_buf_is_dynamic(attach->dmabuf) && ++ !IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) ++ dma_buf_unpin(attach); +} -+EXPORT_SYMBOL_GPL(sip_fiq_debugger_uart_irq_tf_init); ++EXPORT_SYMBOL_NS_GPL(dma_buf_unmap_attachment, DMA_BUF); + -+static ulong cpu_logical_map_mpidr(u32 cpu) -+{ -+#ifdef MODULE -+ /* Empirically, local "cpu_logical_map()" for rockchip platforms */ -+ ulong mpidr = read_cpuid_mpidr(); ++/** ++ * dma_buf_unmap_attachment_unlocked - unmaps and decreases usecount of the buffer;might ++ * deallocate the scatterlist associated. Is a wrapper for unmap_dma_buf() of ++ * dma_buf_ops. ++ * @attach: [in] attachment to unmap buffer from ++ * @sg_table: [in] scatterlist info of the buffer to unmap ++ * @direction: [in] direction of DMA transfer ++ * ++ * Unlocked variant of dma_buf_unmap_attachment(). ++ */ ++void dma_buf_unmap_attachment_unlocked(struct dma_buf_attachment *attach, ++ struct sg_table *sg_table, ++ enum dma_data_direction direction) ++{ ++ might_sleep(); + -+ if (mpidr & MPIDR_MT_BITMASK) { -+ /* 0x100, 0x200, 0x300, 0x400 ... */ -+ mpidr = (cpu & 0xff) << 8; -+ } else { -+ if (cpu < 4) -+ /* 0x00, 0x01, 0x02, 0x03 */ -+ mpidr = cpu; -+ else if (cpu < 8) -+ /* 0x100, 0x101, 0x102, 0x103 */ -+ mpidr = 0x100 | (cpu - 4); -+ else -+ pr_err("Unsupported map cpu: %d\n", cpu); -+ } ++ if (WARN_ON(!attach || !attach->dmabuf || !sg_table)) ++ return; + -+ return mpidr; -+#else -+ return cpu_logical_map(cpu); -+#endif ++ dma_resv_lock(attach->dmabuf->resv, NULL); ++ dma_buf_unmap_attachment(attach, sg_table, direction); ++ dma_resv_unlock(attach->dmabuf->resv); +} ++EXPORT_SYMBOL_NS_GPL(dma_buf_unmap_attachment_unlocked, DMA_BUF); + -+ulong sip_cpu_logical_map_mpidr(u32 cpu) ++/** ++ * dma_buf_move_notify - notify attachments that DMA-buf is moving ++ * ++ * @dmabuf: [in] buffer which is moving ++ * ++ * Informs all attachments that they need to destroy and recreate all their ++ * mappings. ++ */ ++void dma_buf_move_notify(struct dma_buf *dmabuf) +{ -+ return cpu_logical_map_mpidr(cpu); -+} -+EXPORT_SYMBOL_GPL(sip_cpu_logical_map_mpidr); ++ struct dma_buf_attachment *attach; + -+int sip_fiq_debugger_switch_cpu(u32 cpu) -+{ -+ struct arm_smccc_res res; ++ dma_resv_assert_held(dmabuf->resv); + -+ fiq_target_cpu = cpu; -+ res = __invoke_sip_fn_smc(SIP_UARTDBG_FN, cpu_logical_map_mpidr(cpu), -+ 0, UARTDBG_CFG_OSHDL_CPUSW); -+ return res.a0; ++ list_for_each_entry(attach, &dmabuf->attachments, node) ++ if (attach->importer_ops) ++ attach->importer_ops->move_notify(attach); +} ++EXPORT_SYMBOL_NS_GPL(dma_buf_move_notify, DMA_BUF); + -+int sip_fiq_debugger_sdei_switch_cpu(u32 cur_cpu, u32 target_cpu, u32 flag) ++/** ++ * DOC: cpu access ++ * ++ * There are multiple reasons for supporting CPU access to a dma buffer object: ++ * ++ * - Fallback operations in the kernel, for example when a device is connected ++ * over USB and the kernel needs to shuffle the data around first before ++ * sending it away. Cache coherency is handled by bracketing any transactions ++ * with calls to dma_buf_begin_cpu_access() and dma_buf_end_cpu_access() ++ * access. ++ * ++ * Since for most kernel internal dma-buf accesses need the entire buffer, a ++ * vmap interface is introduced. Note that on very old 32-bit architectures ++ * vmalloc space might be limited and result in vmap calls failing. ++ * ++ * Interfaces:: ++ * ++ * void \*dma_buf_vmap(struct dma_buf \*dmabuf, struct iosys_map \*map) ++ * void dma_buf_vunmap(struct dma_buf \*dmabuf, struct iosys_map \*map) ++ * ++ * The vmap call can fail if there is no vmap support in the exporter, or if ++ * it runs out of vmalloc space. Note that the dma-buf layer keeps a reference ++ * count for all vmap access and calls down into the exporter's vmap function ++ * only when no vmapping exists, and only unmaps it once. Protection against ++ * concurrent vmap/vunmap calls is provided by taking the &dma_buf.lock mutex. ++ * ++ * - For full compatibility on the importer side with existing userspace ++ * interfaces, which might already support mmap'ing buffers. This is needed in ++ * many processing pipelines (e.g. feeding a software rendered image into a ++ * hardware pipeline, thumbnail creation, snapshots, ...). Also, Android's ION ++ * framework already supported this and for DMA buffer file descriptors to ++ * replace ION buffers mmap support was needed. ++ * ++ * There is no special interfaces, userspace simply calls mmap on the dma-buf ++ * fd. But like for CPU access there's a need to bracket the actual access, ++ * which is handled by the ioctl (DMA_BUF_IOCTL_SYNC). Note that ++ * DMA_BUF_IOCTL_SYNC can fail with -EAGAIN or -EINTR, in which case it must ++ * be restarted. ++ * ++ * Some systems might need some sort of cache coherency management e.g. when ++ * CPU and GPU domains are being accessed through dma-buf at the same time. ++ * To circumvent this problem there are begin/end coherency markers, that ++ * forward directly to existing dma-buf device drivers vfunc hooks. Userspace ++ * can make use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The ++ * sequence would be used like following: ++ * ++ * - mmap dma-buf fd ++ * - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2. read/write ++ * to mmap area 3. SYNC_END ioctl. This can be repeated as often as you ++ * want (with the new data being consumed by say the GPU or the scanout ++ * device) ++ * - munmap once you don't need the buffer any more ++ * ++ * For correctness and optimal performance, it is always required to use ++ * SYNC_START and SYNC_END before and after, respectively, when accessing the ++ * mapped address. Userspace cannot rely on coherent access, even when there ++ * are systems where it just works without calling these ioctls. ++ * ++ * - And as a CPU fallback in userspace processing pipelines. ++ * ++ * Similar to the motivation for kernel cpu access it is again important that ++ * the userspace code of a given importing subsystem can use the same ++ * interfaces with a imported dma-buf buffer object as with a native buffer ++ * object. This is especially important for drm where the userspace part of ++ * contemporary OpenGL, X, and other drivers is huge, and reworking them to ++ * use a different way to mmap a buffer rather invasive. ++ * ++ * The assumption in the current dma-buf interfaces is that redirecting the ++ * initial mmap is all that's needed. A survey of some of the existing ++ * subsystems shows that no driver seems to do any nefarious thing like ++ * syncing up with outstanding asynchronous processing on the device or ++ * allocating special resources at fault time. So hopefully this is good ++ * enough, since adding interfaces to intercept pagefaults and allow pte ++ * shootdowns would increase the complexity quite a bit. ++ * ++ * Interface:: ++ * ++ * int dma_buf_mmap(struct dma_buf \*, struct vm_area_struct \*, ++ * unsigned long); ++ * ++ * If the importing subsystem simply provides a special-purpose mmap call to ++ * set up a mapping in userspace, calling do_mmap with &dma_buf.file will ++ * equally achieve that for a dma-buf object. ++ */ ++ ++static int __dma_buf_begin_cpu_access(struct dma_buf *dmabuf, ++ enum dma_data_direction direction) +{ -+ struct arm_smccc_res res; ++ bool write = (direction == DMA_BIDIRECTIONAL || ++ direction == DMA_TO_DEVICE); ++ struct dma_resv *resv = dmabuf->resv; ++ long ret; + -+ res = __invoke_sip_fn_smc(SIP_SDEI_FIQ_DBG_SWITCH_CPU, -+ cur_cpu, target_cpu, flag); -+ return res.a0; ++ /* Wait on any implicit rendering fences */ ++ ret = dma_resv_wait_timeout(resv, dma_resv_usage_rw(write), ++ true, MAX_SCHEDULE_TIMEOUT); ++ if (ret < 0) ++ return ret; ++ ++ return 0; +} + -+int sip_fiq_debugger_sdei_get_event_id(u32 *fiq, u32 *sw_cpu, u32 *flag) ++/** ++ * dma_buf_begin_cpu_access - Must be called before accessing a dma_buf from the ++ * cpu in the kernel context. Calls begin_cpu_access to allow exporter-specific ++ * preparations. Coherency is only guaranteed in the specified range for the ++ * specified access direction. ++ * @dmabuf: [in] buffer to prepare cpu access for. ++ * @direction: [in] direction of access. ++ * ++ * After the cpu access is complete the caller should call ++ * dma_buf_end_cpu_access(). Only when cpu access is bracketed by both calls is ++ * it guaranteed to be coherent with other DMA access. ++ * ++ * This function will also wait for any DMA transactions tracked through ++ * implicit synchronization in &dma_buf.resv. For DMA transactions with explicit ++ * synchronization this function will only ensure cache coherency, callers must ++ * ensure synchronization with such DMA transactions on their own. ++ * ++ * Can return negative error values, returns 0 on success. ++ */ ++int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, ++ enum dma_data_direction direction) +{ -+ struct arm_smccc_res res; -+ -+ res = __invoke_sip_fn_smc(SIP_SDEI_FIQ_DBG_GET_EVENT_ID, -+ 0, 0, 0); -+ *fiq = res.a1; -+ *sw_cpu = res.a2; -+ if (flag) -+ *flag = res.a3; ++ int ret = 0; + -+ return res.a0; -+} ++ if (WARN_ON(!dmabuf)) ++ return -EINVAL; + -+EXPORT_SYMBOL_GPL(sip_fiq_debugger_switch_cpu); ++ might_lock(&dmabuf->resv->lock.base); + -+void sip_fiq_debugger_enable_debug(bool enable) -+{ -+ unsigned long val; ++ if (dmabuf->ops->begin_cpu_access) ++ ret = dmabuf->ops->begin_cpu_access(dmabuf, direction); + -+ val = enable ? UARTDBG_CFG_OSHDL_DEBUG_ENABLE : -+ UARTDBG_CFG_OSHDL_DEBUG_DISABLE; ++ /* Ensure that all fences are waited upon - but we first allow ++ * the native handler the chance to do so more efficiently if it ++ * chooses. A double invocation here will be reasonably cheap no-op. ++ */ ++ if (ret == 0) ++ ret = __dma_buf_begin_cpu_access(dmabuf, direction); + -+ __invoke_sip_fn_smc(SIP_UARTDBG_FN, 0, 0, val); ++ return ret; +} -+EXPORT_SYMBOL_GPL(sip_fiq_debugger_enable_debug); ++EXPORT_SYMBOL_NS_GPL(dma_buf_begin_cpu_access, DMA_BUF); + -+int sip_fiq_debugger_set_print_port(u32 port_phyaddr, u32 baudrate) ++/** ++ * dma_buf_end_cpu_access - Must be called after accessing a dma_buf from the ++ * cpu in the kernel context. Calls end_cpu_access to allow exporter-specific ++ * actions. Coherency is only guaranteed in the specified range for the ++ * specified access direction. ++ * @dmabuf: [in] buffer to complete cpu access for. ++ * @direction: [in] direction of access. ++ * ++ * This terminates CPU access started with dma_buf_begin_cpu_access(). ++ * ++ * Can return negative error values, returns 0 on success. ++ */ ++int dma_buf_end_cpu_access(struct dma_buf *dmabuf, ++ enum dma_data_direction direction) +{ -+ struct arm_smccc_res res; ++ int ret = 0; + -+ res = __invoke_sip_fn_smc(SIP_UARTDBG_FN, port_phyaddr, baudrate, -+ UARTDBG_CFG_PRINT_PORT); -+ return res.a0; -+} -+EXPORT_SYMBOL_GPL(sip_fiq_debugger_set_print_port); ++ WARN_ON(!dmabuf); + -+int sip_fiq_debugger_request_share_memory(void) -+{ -+ struct arm_smccc_res res; ++ might_lock(&dmabuf->resv->lock.base); + -+ /* request page share memory */ -+ res = sip_smc_request_share_mem(FIQ_UARTDBG_PAGE_NUMS, -+ SHARE_PAGE_TYPE_UARTDBG); -+ if (IS_SIP_ERROR(res.a0)) -+ return res.a0; ++ if (dmabuf->ops->end_cpu_access) ++ ret = dmabuf->ops->end_cpu_access(dmabuf, direction); + -+ return SIP_RET_SUCCESS; ++ return ret; +} -+EXPORT_SYMBOL_GPL(sip_fiq_debugger_request_share_memory); ++EXPORT_SYMBOL_NS_GPL(dma_buf_end_cpu_access, DMA_BUF); + -+int sip_fiq_debugger_get_target_cpu(void) -+{ -+ return fiq_target_cpu; -+} -+EXPORT_SYMBOL_GPL(sip_fiq_debugger_get_target_cpu); + -+void sip_fiq_debugger_enable_fiq(bool enable, uint32_t tgt_cpu) ++/** ++ * dma_buf_mmap - Setup up a userspace mmap with the given vma ++ * @dmabuf: [in] buffer that should back the vma ++ * @vma: [in] vma for the mmap ++ * @pgoff: [in] offset in pages where this mmap should start within the ++ * dma-buf buffer. ++ * ++ * This function adjusts the passed in vma so that it points at the file of the ++ * dma_buf operation. It also adjusts the starting pgoff and does bounds ++ * checking on the size of the vma. Then it calls the exporters mmap function to ++ * set up the mapping. ++ * ++ * Can return negative error values, returns 0 on success. ++ */ ++int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma, ++ unsigned long pgoff) +{ -+ u32 en; -+ -+ fiq_target_cpu = tgt_cpu; -+ en = enable ? UARTDBG_CFG_FIQ_ENABEL : UARTDBG_CFG_FIQ_DISABEL; -+ __invoke_sip_fn_smc(SIP_UARTDBG_FN, tgt_cpu, 0, en); -+} -+EXPORT_SYMBOL_GPL(sip_fiq_debugger_enable_fiq); ++ if (WARN_ON(!dmabuf || !vma)) ++ return -EINVAL; + -+int sip_fiq_control(u32 sub_func, u32 irq, unsigned long data) -+{ -+ struct arm_smccc_res res; ++ /* check if buffer supports mmap */ ++ if (!dmabuf->ops->mmap) ++ return -EINVAL; + -+ res = __invoke_sip_fn_smc(RK_SIP_FIQ_CTRL, -+ sub_func, irq, data); -+ return res.a0; -+} -+EXPORT_SYMBOL_GPL(sip_fiq_control); ++ /* check for offset overflow */ ++ if (pgoff + vma_pages(vma) < pgoff) ++ return -EOVERFLOW; + -+int sip_wdt_config(u32 sub_func, u32 arg1, u32 arg2, u32 arg3) -+{ -+ struct arm_smccc_res res; ++ /* check for overflowing the buffer's size */ ++ if (pgoff + vma_pages(vma) > ++ dmabuf->size >> PAGE_SHIFT) ++ return -EINVAL; + -+ arm_smccc_smc(SIP_WDT_CFG, sub_func, arg1, arg2, arg3, -+ 0, 0, 0, &res); ++ /* readjust the vma */ ++ vma_set_file(vma, dmabuf->file); ++ vma->vm_pgoff = pgoff; + -+ return res.a0; ++ return dmabuf->ops->mmap(dmabuf, vma); +} -+EXPORT_SYMBOL_GPL(sip_wdt_config); ++EXPORT_SYMBOL_NS_GPL(dma_buf_mmap, DMA_BUF); + -+int sip_hdmirx_config(u32 sub_func, u32 arg1, u32 arg2, u32 arg3) ++/** ++ * dma_buf_vmap - Create virtual mapping for the buffer object into kernel ++ * address space. Same restrictions as for vmap and friends apply. ++ * @dmabuf: [in] buffer to vmap ++ * @map: [out] returns the vmap pointer ++ * ++ * This call may fail due to lack of virtual mapping address space. ++ * These calls are optional in drivers. The intended use for them ++ * is for mapping objects linear in kernel space for high use objects. ++ * ++ * To ensure coherency users must call dma_buf_begin_cpu_access() and ++ * dma_buf_end_cpu_access() around any cpu access performed through this ++ * mapping. ++ * ++ * Returns 0 on success, or a negative errno code otherwise. ++ */ ++int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map) +{ -+ struct arm_smccc_res res; ++ struct iosys_map ptr; ++ int ret = 0; + -+ arm_smccc_smc(SIP_HDMIRX_CFG, sub_func, arg1, arg2, arg3, -+ 0, 0, 0, &res); ++ iosys_map_clear(map); + -+ return res.a0; -+} -+EXPORT_SYMBOL_GPL(sip_hdmirx_config); ++ if (WARN_ON(!dmabuf)) ++ return -EINVAL; + -+int sip_hdcpkey_init(u32 hdcp_id) -+{ -+ struct arm_smccc_res res; ++ if (!dmabuf->ops->vmap) ++ return -EINVAL; + -+ res = __invoke_sip_fn_smc(TRUSTED_OS_HDCPKEY_INIT, hdcp_id, 0, 0); ++ mutex_lock(&dmabuf->lock); ++ if (dmabuf->vmapping_counter) { ++ dmabuf->vmapping_counter++; ++ BUG_ON(iosys_map_is_null(&dmabuf->vmap_ptr)); ++ *map = dmabuf->vmap_ptr; ++ goto out_unlock; ++ } + -+ return res.a0; -+} -+EXPORT_SYMBOL_GPL(sip_hdcpkey_init); ++ BUG_ON(iosys_map_is_set(&dmabuf->vmap_ptr)); + -+int sip_smc_mcu_config(unsigned long mcu_id, -+ unsigned long func, -+ unsigned long arg2) -+{ -+ struct arm_smccc_res res; ++ ret = dmabuf->ops->vmap(dmabuf, &ptr); ++ if (WARN_ON_ONCE(ret)) ++ goto out_unlock; + -+ res = __invoke_sip_fn_smc(SIP_MCU_CFG, mcu_id, func, arg2); -+ return res.a0; ++ dmabuf->vmap_ptr = ptr; ++ dmabuf->vmapping_counter = 1; ++ ++ *map = dmabuf->vmap_ptr; ++ ++out_unlock: ++ mutex_unlock(&dmabuf->lock); ++ return ret; +} -+EXPORT_SYMBOL_GPL(sip_smc_mcu_config); -+/******************************************************************************/ -+#ifdef CONFIG_ARM -+static __init int sip_firmware_init(void) ++EXPORT_SYMBOL_NS_GPL(dma_buf_vmap, DMA_BUF); ++ ++/** ++ * dma_buf_vmap_unlocked - Create virtual mapping for the buffer object into kernel ++ * address space. Same restrictions as for vmap and friends apply. ++ * @dmabuf: [in] buffer to vmap ++ * @map: [out] returns the vmap pointer ++ * ++ * Unlocked version of dma_buf_vmap() ++ * ++ * Returns 0 on success, or a negative errno code otherwise. ++ */ ++int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map) +{ -+ struct arm_smccc_res res; ++ int ret; + -+ if (!psci_smp_available()) -+ return 0; ++ iosys_map_clear(map); + -+ /* -+ * OP-TEE works on kernel 3.10 and 4.4 and we have different sip -+ * implement. We should tell OP-TEE the current rockchip sip version. -+ */ -+ res = __invoke_sip_fn_smc(SIP_SIP_VERSION, SIP_IMPLEMENT_V2, -+ SECURE_REG_WR, 0); -+ if (IS_SIP_ERROR(res.a0)) -+ pr_err("%s: set rockchip sip version v2 failed\n", __func__); ++ if (WARN_ON(!dmabuf)) ++ return -EINVAL; + -+ /* -+ * Currently, we support: -+ * -+ * 1. 64-bit ATF + 64-bit kernel; -+ * 2. 64-bit ATF + 32-bit kernel; -+ * 3. 32-bit TEE + 32-bit kernel; -+ * -+ * We need to detect which case of above and record in firmware_64_32bit -+ * We get info from cpuid and compare with all supported ARMv7 cpu. -+ */ -+ switch (read_cpuid_part()) { -+ case ARM_CPU_PART_CORTEX_A7: -+ case ARM_CPU_PART_CORTEX_A8: -+ case ARM_CPU_PART_CORTEX_A9: -+ case ARM_CPU_PART_CORTEX_A12: -+ case ARM_CPU_PART_CORTEX_A15: -+ case ARM_CPU_PART_CORTEX_A17: -+ firmware_64_32bit = FIRMWARE_TEE_32BIT; -+ break; -+ default: -+ firmware_64_32bit = FIRMWARE_ATF_64BIT; -+ break; -+ } ++ dma_resv_lock(dmabuf->resv, NULL); ++ ret = dma_buf_vmap(dmabuf, map); ++ dma_resv_unlock(dmabuf->resv); + -+ return 0; ++ return ret; +} -+arch_initcall(sip_firmware_init); -+#endif ++EXPORT_SYMBOL_NS_GPL(dma_buf_vmap_unlocked, DMA_BUF); + -+MODULE_DESCRIPTION("Rockchip SIP Call"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig -index ebd4e113d..c32ec4488 100644 ---- a/drivers/gpio/Kconfig -+++ b/drivers/gpio/Kconfig -@@ -1026,6 +1026,21 @@ config GPIO_ADNP - enough to represent all pins, but the driver will assume a - register layout for 64 pins (8 registers). - -+config GPIO_AW9110 -+ tristate "AW9110 I2C GPIO expanders" -+ select GPIOLIB_IRQCHIP -+ select IRQ_DOMAIN -+ help -+ Say yes here to provide access to I2C -+ GPIO expanders used for additional digital outputs or inputs. -+ Your board setup code will need to declare the expanders in -+ use, and assign numbers to the GPIOs they expose. Those GPIOs -+ can then be used from drivers and other kernel code, just like -+ other GPIOs, but only accessible from task contexts. ++/** ++ * dma_buf_vunmap - Unmap a vmap obtained by dma_buf_vmap. ++ * @dmabuf: [in] buffer to vunmap ++ * @map: [in] vmap pointer to vunmap ++ */ ++void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map) ++{ ++ if (WARN_ON(!dmabuf)) ++ return; + -+ This driver provides an in-kernel interface to those GPIOs using -+ platform-neutral GPIO calls. ++ BUG_ON(iosys_map_is_null(&dmabuf->vmap_ptr)); ++ BUG_ON(dmabuf->vmapping_counter == 0); ++ BUG_ON(!iosys_map_is_equal(&dmabuf->vmap_ptr, map)); + - config GPIO_FXL6408 - tristate "FXL6408 I2C GPIO expander" - select GPIO_REGMAP -@@ -1086,6 +1101,14 @@ config GPIO_MAX732X_IRQ - Say yes here to enable the max732x to be used as an interrupt - controller. It requires the driver to be built in the kernel. - -+config GPIO_NCA9539 -+ tristate "NCA9539 I2C GPIO expander" -+ depends on I2C || COMPILE_TEST -+ select REGMAP_I2C -+ help -+ Say yes here to support the NCA9539 series of I2C Expanders. -+ GPIO expanders used for additional digital outputs or inputs. ++ mutex_lock(&dmabuf->lock); ++ if (--dmabuf->vmapping_counter == 0) { ++ if (dmabuf->ops->vunmap) ++ dmabuf->ops->vunmap(dmabuf, map); ++ iosys_map_clear(&dmabuf->vmap_ptr); ++ } ++ mutex_unlock(&dmabuf->lock); ++} ++EXPORT_SYMBOL_NS_GPL(dma_buf_vunmap, DMA_BUF); + - config GPIO_PCA953X - tristate "PCA95[357]x, PCA9698, TCA64xx, and MAX7310 I/O ports" - select REGMAP_I2C -diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile -index eb73b5d63..d0f3e4332 100644 ---- a/drivers/gpio/Makefile -+++ b/drivers/gpio/Makefile -@@ -36,6 +36,7 @@ obj-$(CONFIG_GPIO_ARIZONA) += gpio-arizona.o - obj-$(CONFIG_GPIO_ASPEED) += gpio-aspeed.o - obj-$(CONFIG_GPIO_ASPEED_SGPIO) += gpio-aspeed-sgpio.o - obj-$(CONFIG_GPIO_ATH79) += gpio-ath79.o -+obj-$(CONFIG_GPIO_AW9110) += gpio-aw9110.o - obj-$(CONFIG_GPIO_BCM_KONA) += gpio-bcm-kona.o - obj-$(CONFIG_GPIO_BCM_XGS_IPROC) += gpio-xgs-iproc.o - obj-$(CONFIG_GPIO_BD71815) += gpio-bd71815.o -@@ -116,6 +117,7 @@ obj-$(CONFIG_GPIO_MT7621) += gpio-mt7621.o - obj-$(CONFIG_GPIO_MVEBU) += gpio-mvebu.o - obj-$(CONFIG_GPIO_MXC) += gpio-mxc.o - obj-$(CONFIG_GPIO_MXS) += gpio-mxs.o -+obj-$(CONFIG_GPIO_NCA9539) += gpio-nca9539.o - obj-$(CONFIG_GPIO_OCTEON) += gpio-octeon.o - obj-$(CONFIG_GPIO_OMAP) += gpio-omap.o - obj-$(CONFIG_GPIO_PALMAS) += gpio-palmas.o -@@ -136,7 +138,7 @@ obj-$(CONFIG_GPIO_RDA) += gpio-rda.o - obj-$(CONFIG_GPIO_RDC321X) += gpio-rdc321x.o - obj-$(CONFIG_GPIO_REALTEK_OTTO) += gpio-realtek-otto.o - obj-$(CONFIG_GPIO_REG) += gpio-reg.o --obj-$(CONFIG_GPIO_ROCKCHIP) += gpio-rockchip.o -+obj-$(CONFIG_GPIO_ROCKCHIP) += gpio-rockchip-oh.o - obj-$(CONFIG_ARCH_SA1100) += gpio-sa1100.o - obj-$(CONFIG_GPIO_SAMA5D2_PIOBU) += gpio-sama5d2-piobu.o - obj-$(CONFIG_GPIO_SCH311X) += gpio-sch311x.o -diff --git a/drivers/gpio/gpio-aw9110.c b/drivers/gpio/gpio-aw9110.c -new file mode 100644 -index 000000000..ab97726fc ---- /dev/null -+++ b/drivers/gpio/gpio-aw9110.c -@@ -0,0 +1,500 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Driver for aw9110 I2C GPIO expanders -+ * -+ * Copyright (c) 2021 Rockchip Electronics Co. Ltd. ++/** ++ * dma_buf_vunmap_unlocked - Unmap a vmap obtained by dma_buf_vmap. ++ * @dmabuf: [in] buffer to vunmap ++ * @map: [in] vmap pointer to vunmap + */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define REG_INPUT_P0 0x00 -+#define REG_INPUT_P1 0x01 -+#define REG_OUTPUT_P0 0x02 -+#define REG_OUTPUT_P1 0x03 -+#define REG_CONFIG_P0 0x04 -+#define REG_CONFIG_P1 0x05 -+#define REG_INT_P0 0x06 -+#define REG_INT_P1 0x07 -+#define REG_ID 0x10 -+#define REG_CTRL 0x11 -+#define REG_WORK_MODE_P0 0x12 -+#define REG_WORK_MODE_P1 0x13 -+#define REG_EN_BREATH 0x14 -+#define REG_FADE_TIME 0x15 -+#define REG_FULL_TIME 0x16 -+#define REG_DLY0_BREATH 0x17 -+#define REG_DLY1_BREATH 0x18 -+#define REG_DLY2_BREATH 0x19 -+#define REG_DLY3_BREATH 0x1a -+#define REG_DLY4_BREATH 0x1b -+#define REG_DLY5_BREATH 0x1c -+#define REG_DIM00 0x20 -+#define REG_DIM01 0x21 -+#define REG_DIM02 0x22 -+#define REG_DIM03 0x23 -+#define REG_DIM04 0x24 -+#define REG_DIM05 0x25 -+#define REG_DIM06 0x26 -+#define REG_DIM07 0x27 -+#define REG_DIM08 0x28 -+#define REG_DIM09 0x29 -+#define REG_SWRST 0x7F -+#define REG_81H 0x81 -+ ++void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map) ++{ ++ if (WARN_ON(!dmabuf)) ++ return; + -+static const struct i2c_device_id aw9110_id[] = { -+ { "aw9110", 10 }, -+ { } -+}; -+MODULE_DEVICE_TABLE(i2c, aw9110_id); ++ dma_resv_lock(dmabuf->resv, NULL); ++ dma_buf_vunmap(dmabuf, map); ++ dma_resv_unlock(dmabuf->resv); ++} ++EXPORT_SYMBOL_NS_GPL(dma_buf_vunmap_unlocked, DMA_BUF); + -+#ifdef CONFIG_OF -+static const struct of_device_id aw9110_of_table[] = { -+ { .compatible = "awinic,aw9110" }, -+ { } -+}; -+MODULE_DEVICE_TABLE(of, aw9110_of_table); -+#endif ++#ifdef CONFIG_DEBUG_FS ++static int dma_buf_debug_show(struct seq_file *s, void *unused) ++{ ++ struct dma_buf *buf_obj; ++ struct dma_buf_attachment *attach_obj; ++ int count = 0, attach_count; ++ size_t size = 0; ++ int ret; + ++ ret = mutex_lock_interruptible(&db_list.lock); + -+struct aw9110 { -+ struct gpio_chip chip; -+ struct irq_chip irqchip; -+ struct i2c_client *client; -+ struct mutex lock; /* protect 'out' */ -+ unsigned int out; /* software latch */ -+ unsigned int direct; /* gpio direct */ -+ unsigned int status; /* current status */ -+ unsigned int irq_enabled; /* enabled irqs */ ++ if (ret) ++ return ret; + -+ struct device *dev; -+ int shdn_en; /* shutdown ctrl */ ++ seq_puts(s, "\nDma-buf Objects:\n"); ++ seq_printf(s, "%-8s\t%-8s\t%-8s\t%-8s\texp_name\t%-8s\tname\n", ++ "size", "flags", "mode", "count", "ino"); + -+ int (*write)(struct i2c_client *client, u8 reg, u8 data); -+ int (*read)(struct i2c_client *client, u8 reg); -+}; ++ list_for_each_entry(buf_obj, &db_list.head, list_node) { + ++ ret = dma_resv_lock_interruptible(buf_obj->resv, NULL); ++ if (ret) ++ goto error_unlock; + -+static int aw9110_i2c_write_le8(struct i2c_client *client, u8 reg, u8 data) -+{ -+ return i2c_smbus_write_byte_data(client, reg, data); -+} + -+static int aw9110_i2c_read_le8(struct i2c_client *client, u8 reg) -+{ -+ return (int)i2c_smbus_read_byte_data(client, reg); -+} ++ spin_lock(&buf_obj->name_lock); ++ seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\t%08lu\t%s\n", ++ buf_obj->size, ++ buf_obj->file->f_flags, buf_obj->file->f_mode, ++ file_count(buf_obj->file), ++ buf_obj->exp_name, ++ file_inode(buf_obj->file)->i_ino, ++ buf_obj->name ?: ""); ++ spin_unlock(&buf_obj->name_lock); + -+static int aw9110_get(struct gpio_chip *chip, unsigned int offset) -+{ -+ struct aw9110 *gpio = gpiochip_get_data(chip); -+ int value = 0; ++ dma_resv_describe(buf_obj->resv, s); + -+ mutex_lock(&gpio->lock); ++ seq_puts(s, "\tAttached Devices:\n"); ++ attach_count = 0; + -+ if (offset < 4) { -+ value = gpio->read(gpio->client, REG_INPUT_P1); -+ mutex_unlock(&gpio->lock); ++ list_for_each_entry(attach_obj, &buf_obj->attachments, node) { ++ seq_printf(s, "\t%s\n", dev_name(attach_obj->dev)); ++ attach_count++; ++ } ++ dma_resv_unlock(buf_obj->resv); + -+ value = (value < 0) ? value : !!(value & (1 << offset)); -+ } else { -+ value = gpio->read(gpio->client, REG_INPUT_P0); -+ mutex_unlock(&gpio->lock); ++ seq_printf(s, "Total %d devices attached\n\n", ++ attach_count); + -+ value = (value < 0) ? value : !!((value<<4) & (1 << offset)); ++ count++; ++ size += buf_obj->size; + } + -+ return value; -+} -+ -+static int aw9110_get_direction(struct gpio_chip *chip, unsigned int offset) -+{ -+ struct aw9110 *gpio = gpiochip_get_data(chip); -+ unsigned int reg_val; ++ seq_printf(s, "\nTotal %d objects, %zu bytes\n", count, size); + -+ reg_val = gpio->direct; ++ mutex_unlock(&db_list.lock); ++ return 0; + -+ dev_dbg(gpio->dev, "direct get: %04X, pin:%d\n", reg_val, offset); ++error_unlock: ++ mutex_unlock(&db_list.lock); ++ return ret; ++} + -+ if (reg_val & (1<lock); ++ struct dentry *d; ++ int err = 0; + -+ /* set direct */ -+ gpio->direct |= (1<write(gpio->client, REG_CONFIG_P1, gpio->direct&0x0F); -+ else -+ gpio->write(gpio->client, REG_CONFIG_P0, (gpio->direct >> 4)&0x3F); ++ dma_buf_debugfs_dir = d; + -+ mutex_unlock(&gpio->lock); ++ d = debugfs_create_file("bufinfo", S_IRUGO, dma_buf_debugfs_dir, ++ NULL, &dma_buf_debug_fops); ++ if (IS_ERR(d)) { ++ pr_debug("dma_buf: debugfs: failed to create node bufinfo\n"); ++ debugfs_remove_recursive(dma_buf_debugfs_dir); ++ dma_buf_debugfs_dir = NULL; ++ err = PTR_ERR(d); ++ } + -+ dev_dbg(gpio->dev, "direct in: %04X, pin:%d\n", gpio->direct, offset); ++ dma_buf_process_info_init_debugfs(dma_buf_debugfs_dir); ++ return err; ++} + ++static void dma_buf_uninit_debugfs(void) ++{ ++ debugfs_remove_recursive(dma_buf_debugfs_dir); ++} ++#else ++static inline int dma_buf_init_debugfs(void) ++{ + return 0; +} ++static inline void dma_buf_uninit_debugfs(void) ++{ ++} ++#endif + -+static int aw9110_direction_output(struct gpio_chip *chip, unsigned int offset, int value) ++#ifdef CONFIG_DMABUF_PROCESS_INFO ++struct dma_buf *get_dma_buf_from_file(struct file *f) +{ -+ struct aw9110 *gpio = gpiochip_get_data(chip); ++ if (IS_ERR_OR_NULL(f)) ++ return NULL; + -+ /* set level */ -+ chip->set(chip, offset, value); ++ if (!is_dma_buf_file(f)) ++ return NULL; + -+ mutex_lock(&gpio->lock); ++ return f->private_data; ++} ++#endif /* CONFIG_DMABUF_PROCESS_INFO */ + -+ /* set direct */ -+ gpio->direct &= ~(1<write(gpio->client, REG_CONFIG_P1, gpio->direct&0x0F); -+ else -+ gpio->write(gpio->client, REG_CONFIG_P0, (gpio->direct >> 4)&0x3F); ++ ret = dma_buf_init_sysfs_statistics(); ++ if (ret) ++ return ret; + -+ mutex_unlock(&gpio->lock); ++ dma_buf_mnt = kern_mount(&dma_buf_fs_type); ++ if (IS_ERR(dma_buf_mnt)) ++ return PTR_ERR(dma_buf_mnt); + -+ dev_dbg(gpio->dev, "direct out: %04X, pin:%d\n", gpio->direct, offset); ++ mutex_init(&db_list.lock); ++ INIT_LIST_HEAD(&db_list.head); ++ dma_buf_init_debugfs(); ++ dma_buf_process_info_init_procfs(); + return 0; +} ++subsys_initcall(dma_buf_init); + -+static void aw9110_set(struct gpio_chip *chip, unsigned int offset, int value) ++static void __exit dma_buf_deinit(void) +{ -+ struct aw9110 *gpio = gpiochip_get_data(chip); -+ unsigned int bit = 1 << offset; -+ -+ mutex_lock(&gpio->lock); -+ -+ if (value) -+ gpio->out |= bit; -+ else -+ gpio->out &= ~bit; -+ -+ if (offset < 4) -+ gpio->write(gpio->client, REG_OUTPUT_P1, gpio->out >> 0); -+ else -+ gpio->write(gpio->client, REG_OUTPUT_P0, gpio->out >> 4); -+ -+ mutex_unlock(&gpio->lock); ++ dma_buf_uninit_debugfs(); ++ kern_unmount(dma_buf_mnt); ++ dma_buf_uninit_sysfs_statistics(); ++ dma_buf_process_info_uninit_procfs(); +} ++__exitcall(dma_buf_deinit); +diff --git a/drivers/dma-buf/dma-heap-rk.c b/drivers/dma-buf/dma-heap-rk.c +new file mode 100644 +index 000000000..a777e9185 +--- /dev/null ++++ b/drivers/dma-buf/dma-heap-rk.c +@@ -0,0 +1,340 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Framework for userspace DMA-BUF allocations ++ * ++ * Copyright (C) 2011 Google, Inc. ++ * Copyright (C) 2019 Linaro Ltd. ++ */ + -+/*-------------------------------------------------------------------------*/ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+static irqreturn_t aw9110_irq(int irq, void *data) -+{ -+ struct aw9110 *gpio = data; -+ unsigned long change, i, status = 0; ++#define DEVNAME "dma_heap" + -+ int value = 0; -+ int nirq; ++#define NUM_HEAP_MINORS 128 + -+ value = gpio->read(gpio->client, REG_INPUT_P1); -+ status |= (value < 0) ? 0 : value; ++/** ++ * struct dma_heap - represents a dmabuf heap in the system ++ * @name: used for debugging/device-node name ++ * @ops: ops struct for this heap ++ * @heap_devt heap device node ++ * @list list head connecting to list of heaps ++ * @heap_cdev heap char device ++ * ++ * Represents a heap of memory from which buffers can be made. ++ */ ++struct dma_heap { ++ const char *name; ++ const struct dma_heap_ops *ops; ++ void *priv; ++ dev_t heap_devt; ++ struct list_head list; ++ struct cdev heap_cdev; ++}; + -+ value = gpio->read(gpio->client, REG_INPUT_P0); -+ status |= (value < 0) ? 0 : (value<<4); ++static LIST_HEAD(heap_list); ++static DEFINE_MUTEX(heap_list_lock); ++static dev_t dma_heap_devt; ++static struct class *dma_heap_class; ++static DEFINE_XARRAY_ALLOC(dma_heap_minors); + ++static int dma_heap_buffer_alloc(struct dma_heap *heap, size_t len, ++ unsigned int fd_flags, ++ unsigned int heap_flags) ++{ ++ struct dma_buf *dmabuf; ++ int fd; + + /* -+ * call the interrupt handler iff gpio is used as -+ * interrupt source, just to avoid bad irqs ++ * Allocations from all heaps have to begin ++ * and end on page boundaries. + */ -+ mutex_lock(&gpio->lock); -+ change = (gpio->status ^ status) & gpio->irq_enabled; -+ gpio->status = status; -+ mutex_unlock(&gpio->lock); ++ len = PAGE_ALIGN(len); ++ if (!len) ++ return -EINVAL; + -+ for_each_set_bit(i, &change, gpio->chip.ngpio) { -+ nirq = irq_find_mapping(gpio->chip.irq.domain, i); -+ if (nirq) { -+ dev_dbg(gpio->dev, "status:%04lx,change:%04lx,index:%ld,nirq:%d\n", -+ status, change, i, nirq); -+ handle_nested_irq(nirq); -+ } -+ } ++ dmabuf = heap->ops->allocate(heap, len, fd_flags, heap_flags); ++ if (IS_ERR(dmabuf)) ++ return PTR_ERR(dmabuf); + -+ return IRQ_HANDLED; ++ fd = dma_buf_fd(dmabuf, fd_flags); ++ if (fd < 0) { ++ dma_buf_put(dmabuf); ++ /* just return, as put will call release and that will free */ ++ } ++ return fd; +} + -+/* -+ * NOP functions -+ */ -+static void aw9110_noop(struct irq_data *data) { } -+ -+static int aw9110_irq_set_wake(struct irq_data *data, unsigned int on) ++static int dma_heap_open(struct inode *inode, struct file *file) +{ -+ struct aw9110 *gpio = irq_data_get_irq_chip_data(data); ++ struct dma_heap *heap; + -+ return irq_set_irq_wake(gpio->client->irq, on); -+} ++ heap = xa_load(&dma_heap_minors, iminor(inode)); ++ if (!heap) { ++ pr_err("dma_heap: minor %d unknown.\n", iminor(inode)); ++ return -ENODEV; ++ } + -+static void aw9110_irq_enable(struct irq_data *data) -+{ -+ struct aw9110 *gpio = irq_data_get_irq_chip_data(data); ++ /* instance data as context */ ++ file->private_data = heap; ++ nonseekable_open(inode, file); + -+ gpio->irq_enabled |= (1 << data->hwirq); ++ return 0; +} + -+static void aw9110_irq_disable(struct irq_data *data) ++static long dma_heap_ioctl_allocate(struct file *file, void *data) +{ -+ struct aw9110 *gpio = irq_data_get_irq_chip_data(data); ++ struct dma_heap_allocation_data *heap_allocation = data; ++ struct dma_heap *heap = file->private_data; ++ int fd; + -+ gpio->irq_enabled &= ~(1 << data->hwirq); -+} ++ if (heap_allocation->fd) ++ return -EINVAL; + -+static void aw9110_irq_bus_lock(struct irq_data *data) -+{ -+ struct aw9110 *gpio = irq_data_get_irq_chip_data(data); ++ if (heap_allocation->fd_flags & ~DMA_HEAP_VALID_FD_FLAGS) ++ return -EINVAL; + -+ mutex_lock(&gpio->lock); -+} ++ fd = dma_heap_buffer_alloc(heap, heap_allocation->len, ++ heap_allocation->fd_flags, ++ heap_allocation->heap_flags); ++ if (fd < 0) ++ return fd; + -+static void aw9110_irq_bus_sync_unlock(struct irq_data *data) -+{ -+ struct aw9110 *gpio = irq_data_get_irq_chip_data(data); ++ heap_allocation->fd = fd; + -+ mutex_unlock(&gpio->lock); ++ return 0; +} + -+static void aw9110_state_init(struct aw9110 *gpio) ++static int dma_heap_ioctl_get_phys(struct file *file, void *data) +{ -+ /* out4-9 push-pull */ -+ gpio->write(gpio->client, REG_CTRL, (1<<4)); -+ -+ /* work mode : gpio */ -+ gpio->write(gpio->client, REG_WORK_MODE_P1, 0x0F); -+ gpio->write(gpio->client, REG_WORK_MODE_P0, 0x3F); -+ -+ /* default direct */ -+ gpio->direct = 0x03FF; /* 0: output, 1:input */ -+ gpio->write(gpio->client, REG_CONFIG_P1, gpio->direct & 0x0F); -+ gpio->write(gpio->client, REG_CONFIG_P0, (gpio->direct>>4) & 0x3F); ++#if IS_ENABLED(CONFIG_NO_GKI) ++ struct dma_heap *heap = file->private_data; ++ struct dma_heap_phys_data *phys = data; + -+ /* interrupt enable */ -+ gpio->irq_enabled = 0x03FF; /* 0: disable 1:enable, chip: 0:enable, 1: disable */ -+ gpio->write(gpio->client, REG_INT_P1, ((~gpio->irq_enabled) >> 0)&0x0F); -+ gpio->write(gpio->client, REG_INT_P0, ((~gpio->irq_enabled) >> 4)&0x3F); ++ if (heap->ops->get_phys) ++ return heap->ops->get_phys(heap, phys); ++#endif + -+ /* clear interrupt */ -+ gpio->read(gpio->client, REG_INPUT_P1); -+ gpio->read(gpio->client, REG_INPUT_P1); ++ return -EINVAL; +} + -+static int aw9110_parse_dt(struct aw9110 *chip, struct i2c_client *client) ++static unsigned int dma_heap_ioctl_cmds[] = { ++ DMA_HEAP_IOCTL_ALLOC, ++ DMA_HEAP_IOCTL_GET_PHYS, ++}; ++ ++static long dma_heap_ioctl(struct file *file, unsigned int ucmd, ++ unsigned long arg) +{ -+ struct device_node *np = client->dev.of_node; ++ char stack_kdata[128]; ++ char *kdata = stack_kdata; ++ unsigned int kcmd; ++ unsigned int in_size, out_size, drv_size, ksize; ++ int nr = _IOC_NR(ucmd); + int ret = 0; + -+ /* shdn_en */ -+ ret = of_get_named_gpio(np, "shdn_en", 0); -+ if (ret < 0) { -+ dev_err(chip->dev, "of get shdn_en failed\n"); -+ chip->shdn_en = -1; -+ } else { -+ chip->shdn_en = ret; ++ if (nr >= ARRAY_SIZE(dma_heap_ioctl_cmds)) ++ return -EINVAL; + -+ ret = devm_gpio_request_one(chip->dev, chip->shdn_en, -+ GPIOF_OUT_INIT_LOW, "AW9110_SHDN_EN"); -+ if (ret) { -+ dev_err(chip->dev, -+ "devm_gpio_request_one shdn_en failed\n"); -+ return ret; -+ } ++ nr = array_index_nospec(nr, ARRAY_SIZE(dma_heap_ioctl_cmds)); ++ /* Get the kernel ioctl cmd that matches */ ++ kcmd = dma_heap_ioctl_cmds[nr]; + -+ /* enable chip */ -+ gpio_set_value(chip->shdn_en, 1); -+ } ++ /* Figure out the delta between user cmd size and kernel cmd size */ ++ drv_size = _IOC_SIZE(kcmd); ++ out_size = _IOC_SIZE(ucmd); ++ in_size = out_size; ++ if ((ucmd & kcmd & IOC_IN) == 0) ++ in_size = 0; ++ if ((ucmd & kcmd & IOC_OUT) == 0) ++ out_size = 0; ++ ksize = max(max(in_size, out_size), drv_size); + -+ return 0; -+} ++ /* If necessary, allocate buffer for ioctl argument */ ++ if (ksize > sizeof(stack_kdata)) { ++ kdata = kmalloc(ksize, GFP_KERNEL); ++ if (!kdata) ++ return -ENOMEM; ++ } + -+static int aw9110_check_dev_id(struct i2c_client *client) -+{ -+ int ret; ++ if (copy_from_user(kdata, (void __user *)arg, in_size) != 0) { ++ ret = -EFAULT; ++ goto err; ++ } + -+ ret = aw9110_i2c_read_le8(client, REG_ID); ++ /* zero out any difference between the kernel/user structure size */ ++ if (ksize > in_size) ++ memset(kdata + in_size, 0, ksize - in_size); + -+ if (ret < 0) { -+ dev_err(&client->dev, "fail to read dev id(%d)\n", ret); -+ return ret; ++ switch (kcmd) { ++ case DMA_HEAP_IOCTL_ALLOC: ++ ret = dma_heap_ioctl_allocate(file, kdata); ++ break; ++ case DMA_HEAP_IOCTL_GET_PHYS: ++ ret = dma_heap_ioctl_get_phys(file, kdata); ++ break; ++ default: ++ ret = -ENOTTY; ++ goto err; + } + -+ dev_info(&client->dev, "dev id : 0x%02x\n", ret); -+ -+ return 0; ++ if (copy_to_user((void __user *)arg, kdata, out_size) != 0) ++ ret = -EFAULT; ++err: ++ if (kdata != stack_kdata) ++ kfree(kdata); ++ return ret; +} + -+/*-------------------------------------------------------------------------*/ ++static const struct file_operations dma_heap_fops = { ++ .owner = THIS_MODULE, ++ .open = dma_heap_open, ++ .unlocked_ioctl = dma_heap_ioctl, ++#ifdef CONFIG_COMPAT ++ .compat_ioctl = dma_heap_ioctl, ++#endif ++}; + -+static int aw9110_probe(struct i2c_client *client, -+ const struct i2c_device_id *id) ++/** ++ * dma_heap_get_drvdata() - get per-subdriver data for the heap ++ * @heap: DMA-Heap to retrieve private data for ++ * ++ * Returns: ++ * The per-subdriver data for the heap. ++ */ ++void *dma_heap_get_drvdata(struct dma_heap *heap) +{ -+ struct aw9110 *gpio; -+ int status; -+ -+ dev_info(&client->dev, "===aw9110 probe===\n"); -+ -+ /* Allocate, initialize, and register this gpio_chip. */ -+ gpio = devm_kzalloc(&client->dev, sizeof(*gpio), GFP_KERNEL); -+ if (!gpio) -+ return -ENOMEM; -+ -+ gpio->dev = &client->dev; -+ -+ aw9110_parse_dt(gpio, client); -+ -+ mutex_init(&gpio->lock); -+ -+ gpio->chip.base = -1; -+ gpio->chip.can_sleep = true; -+ gpio->chip.parent = &client->dev; -+ gpio->chip.owner = THIS_MODULE; -+ gpio->chip.get = aw9110_get; -+ gpio->chip.set = aw9110_set; -+ gpio->chip.get_direction = aw9110_get_direction; -+ gpio->chip.direction_input = aw9110_direction_input; -+ gpio->chip.direction_output = aw9110_direction_output; -+ gpio->chip.ngpio = id->driver_data; ++ return heap->priv; ++} + -+ gpio->write = aw9110_i2c_write_le8; -+ gpio->read = aw9110_i2c_read_le8; ++/** ++ * dma_heap_get_name() - get heap name ++ * @heap: DMA-Heap to retrieve private data for ++ * ++ * Returns: ++ * The char* for the heap name. ++ */ ++const char *dma_heap_get_name(struct dma_heap *heap) ++{ ++ return heap->name; ++} + -+ gpio->chip.label = client->name; ++struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info) ++{ ++ struct dma_heap *heap, *h, *err_ret; ++ struct device *dev_ret; ++ unsigned int minor; ++ int ret; + -+ gpio->client = client; -+ i2c_set_clientdata(client, gpio); ++ if (!exp_info->name || !strcmp(exp_info->name, "")) { ++ pr_err("dma_heap: Cannot add heap without a name\n"); ++ return ERR_PTR(-EINVAL); ++ } + -+ status = aw9110_check_dev_id(client); -+ if (status < 0) { -+ dev_err(&client->dev, "check device id fail(%d)\n", status); -+ goto fail; ++ if (!exp_info->ops || !exp_info->ops->allocate) { ++ pr_err("dma_heap: Cannot add heap with invalid ops struct\n"); ++ return ERR_PTR(-EINVAL); + } + -+ aw9110_state_init(gpio); ++ heap = kzalloc(sizeof(*heap), GFP_KERNEL); ++ if (!heap) ++ return ERR_PTR(-ENOMEM); + -+ /* Enable irqchip if we have an interrupt */ -+ if (client->irq) { -+ struct gpio_irq_chip *girq; ++ heap->name = exp_info->name; ++ heap->ops = exp_info->ops; ++ heap->priv = exp_info->priv; + -+ gpio->irqchip.name = "aw9110"; -+ gpio->irqchip.irq_enable = aw9110_irq_enable; -+ gpio->irqchip.irq_disable = aw9110_irq_disable; -+ gpio->irqchip.irq_ack = aw9110_noop; -+ gpio->irqchip.irq_mask = aw9110_noop; -+ gpio->irqchip.irq_unmask = aw9110_noop; -+ gpio->irqchip.irq_set_wake = aw9110_irq_set_wake; -+ gpio->irqchip.irq_bus_lock = aw9110_irq_bus_lock; -+ gpio->irqchip.irq_bus_sync_unlock = aw9110_irq_bus_sync_unlock; ++ /* Find unused minor number */ ++ ret = xa_alloc(&dma_heap_minors, &minor, heap, ++ XA_LIMIT(0, NUM_HEAP_MINORS - 1), GFP_KERNEL); ++ if (ret < 0) { ++ pr_err("dma_heap: Unable to get minor number for heap\n"); ++ err_ret = ERR_PTR(ret); ++ goto err0; ++ } + -+ status = devm_request_threaded_irq(&client->dev, client->irq, -+ NULL, aw9110_irq, IRQF_ONESHOT | -+ IRQF_TRIGGER_FALLING | IRQF_SHARED, -+ dev_name(&client->dev), gpio); -+ if (status) -+ goto fail; ++ /* Create device */ ++ heap->heap_devt = MKDEV(MAJOR(dma_heap_devt), minor); + -+ girq = &gpio->chip.irq; -+ girq->chip = &gpio->irqchip; -+ /* This will let us handle the parent IRQ in the driver */ -+ girq->parent_handler = NULL; -+ girq->num_parents = 0; -+ girq->parents = NULL; -+ girq->default_type = IRQ_TYPE_NONE; -+ girq->handler = handle_level_irq; -+ girq->threaded = true; ++ cdev_init(&heap->heap_cdev, &dma_heap_fops); ++ ret = cdev_add(&heap->heap_cdev, heap->heap_devt, 1); ++ if (ret < 0) { ++ pr_err("dma_heap: Unable to add char device\n"); ++ err_ret = ERR_PTR(ret); ++ goto err1; + } + -+ status = devm_gpiochip_add_data(&client->dev, &gpio->chip, gpio); -+ if (status < 0) -+ goto fail; ++ dev_ret = device_create(dma_heap_class, ++ NULL, ++ heap->heap_devt, ++ NULL, ++ heap->name); ++ if (IS_ERR(dev_ret)) { ++ pr_err("dma_heap: Unable to create device\n"); ++ err_ret = ERR_CAST(dev_ret); ++ goto err2; ++ } + -+ dev_info(&client->dev, "probed\n"); ++ mutex_lock(&heap_list_lock); ++ /* check the name is unique */ ++ list_for_each_entry(h, &heap_list, list) { ++ if (!strcmp(h->name, exp_info->name)) { ++ mutex_unlock(&heap_list_lock); ++ pr_err("dma_heap: Already registered heap named %s\n", ++ exp_info->name); ++ err_ret = ERR_PTR(-EINVAL); ++ goto err3; ++ } ++ } + -+ return 0; ++ /* Add heap to the list */ ++ list_add(&heap->list, &heap_list); ++ mutex_unlock(&heap_list_lock); + -+fail: -+ dev_err(&client->dev, "probe error %d for '%s'\n", status, -+ client->name); ++ return heap; + -+ return status; ++err3: ++ device_destroy(dma_heap_class, heap->heap_devt); ++err2: ++ cdev_del(&heap->heap_cdev); ++err1: ++ xa_erase(&dma_heap_minors, minor); ++err0: ++ kfree(heap); ++ return err_ret; +} + -+static int aw9110_pm_resume(struct device *dev) ++static char *dma_heap_devnode(const struct device *dev, umode_t *mode) +{ -+ struct aw9110 *gpio = dev->driver_data; -+ -+ /* out4-9 push-pull */ -+ gpio->write(gpio->client, REG_CTRL, (1<<4)); -+ -+ /* work mode : gpio */ -+ gpio->write(gpio->client, REG_WORK_MODE_P1, 0x0F); -+ gpio->write(gpio->client, REG_WORK_MODE_P0, 0x3F); ++ return kasprintf(GFP_KERNEL, "dma_heap/%s", dev_name(dev)); ++} + -+ /* direct */ -+ //gpio->direct = 0x03FF; /* 0: output, 1:input */ -+ gpio->write(gpio->client, REG_CONFIG_P1, gpio->direct & 0x0F); -+ gpio->write(gpio->client, REG_CONFIG_P0, (gpio->direct>>4) & 0x3F); ++static int dma_heap_init(void) ++{ ++ int ret; + -+ /* out */ -+ gpio->write(gpio->client, REG_OUTPUT_P1, gpio->out >> 0); -+ gpio->write(gpio->client, REG_OUTPUT_P0, gpio->out >> 4); ++ ret = alloc_chrdev_region(&dma_heap_devt, 0, NUM_HEAP_MINORS, DEVNAME); ++ if (ret) ++ return ret; + -+ /* interrupt enable */ -+ //gpio->irq_enabled = 0x03FF; /* 0: disable 1:enable, chip: 0:enable, 1: disable */ -+ gpio->write(gpio->client, REG_INT_P1, ((~gpio->irq_enabled) >> 0)&0x0F); -+ gpio->write(gpio->client, REG_INT_P0, ((~gpio->irq_enabled) >> 4)&0x3F); ++ dma_heap_class = class_create(DEVNAME); ++ if (IS_ERR(dma_heap_class)) { ++ unregister_chrdev_region(dma_heap_devt, NUM_HEAP_MINORS); ++ return PTR_ERR(dma_heap_class); ++ } ++ dma_heap_class->devnode = dma_heap_devnode; + + return 0; +} ++subsys_initcall(dma_heap_init); +diff --git a/drivers/dma-buf/heaps/Kconfig b/drivers/dma-buf/heaps/Kconfig +index a5eef06c4..c6b35f28a 100644 +--- a/drivers/dma-buf/heaps/Kconfig ++++ b/drivers/dma-buf/heaps/Kconfig +@@ -1,3 +1,13 @@ ++menuconfig DMABUF_HEAPS_DEFERRED_FREE ++ bool "DMA-BUF heaps deferred-free library" ++ help ++ Choose this option to enable the DMA-BUF heaps deferred-free library. + -+static const struct dev_pm_ops aw9110_pm_ops = { -+ .resume = aw9110_pm_resume, -+}; -+ -+static struct i2c_driver aw9110_driver = { -+ .driver = { -+ .name = "aw9110", -+ .pm = &aw9110_pm_ops, -+ .of_match_table = of_match_ptr(aw9110_of_table), -+ }, -+ .probe = aw9110_probe, -+ .id_table = aw9110_id, -+}; -+ -+static int __init aw9110_init(void) -+{ -+ return i2c_add_driver(&aw9110_driver); -+} -+/* register after i2c postcore initcall and before -+ * subsys initcalls that may rely on these GPIOs -+ */ -+subsys_initcall(aw9110_init); -+ -+static void __exit aw9110_exit(void) -+{ -+ i2c_del_driver(&aw9110_driver); -+} -+module_exit(aw9110_exit); ++menuconfig DMABUF_HEAPS_PAGE_POOL ++ bool "DMA-BUF heaps page-pool library" ++ help ++ Choose this option to enable the DMA-BUF heaps page-pool library. + -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Jake Wu "); -+MODULE_DESCRIPTION("AW9110 i2c expander gpio driver"); + config DMABUF_HEAPS_SYSTEM + bool "DMA-BUF System Heap" + depends on DMABUF_HEAPS +@@ -12,3 +22,10 @@ config DMABUF_HEAPS_CMA + Choose this option to enable dma-buf CMA heap. This heap is backed + by the Contiguous Memory Allocator (CMA). If your system has these + regions, you should say Y here. + -diff --git a/drivers/gpio/gpio-nca9539.c b/drivers/gpio/gpio-nca9539.c ++config DMABUF_HEAPS_SRAM ++ tristate "Export on-chip SRAM pools using DMA-Heaps" ++ depends on DMABUF_HEAPS && SRAM ++ help ++ This driver allows the export of on-chip SRAM marked as exportable ++ to userspace using the DMA-Heaps interface. +diff --git a/drivers/dma-buf/heaps/Makefile b/drivers/dma-buf/heaps/Makefile +index 974467791..f373aa65d 100644 +--- a/drivers/dma-buf/heaps/Makefile ++++ b/drivers/dma-buf/heaps/Makefile +@@ -1,3 +1,6 @@ + # SPDX-License-Identifier: GPL-2.0 ++obj-$(CONFIG_DMABUF_HEAPS_DEFERRED_FREE) += deferred-free-helper.o ++obj-$(CONFIG_DMABUF_HEAPS_PAGE_POOL) += page_pool.o + obj-$(CONFIG_DMABUF_HEAPS_SYSTEM) += system_heap.o + obj-$(CONFIG_DMABUF_HEAPS_CMA) += cma_heap.o ++obj-$(CONFIG_DMABUF_HEAPS_SRAM) += sram_heap.o +diff --git a/drivers/dma-buf/heaps/deferred-free-helper.c b/drivers/dma-buf/heaps/deferred-free-helper.c new file mode 100644 -index 000000000..50fff6d90 +index 000000000..d207eac58 --- /dev/null -+++ b/drivers/gpio/gpio-nca9539.c -@@ -0,0 +1,332 @@ -+// SPDX-License-Identifier: GPL-2.0-only ++++ b/drivers/dma-buf/heaps/deferred-free-helper.c +@@ -0,0 +1,138 @@ ++// SPDX-License-Identifier: GPL-2.0 +/* -+ * NCA9539 I2C Port Expander I/O ++ * Deferred dmabuf freeing helper + * -+ * Copyright (C) 2023 Cody Xie ++ * Copyright (C) 2020 Linaro, Ltd. + * ++ * Based on the ION page pool code ++ * Copyright (C) 2011 Google, Inc. + */ + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++#include ++#include ++#include ++#include ++#include + -+#define NCA9539_REG_INPUT_PORT_BASE 0x00 -+#define NCA9539_REG_INPUT_PORT0 (NCA9539_REG_INPUT_PORT_BASE + 0x0) -+#define NCA9539_REG_INPUT_PORT1 (NCA9539_REG_INPUT_PORT_BASE + 0x1) -+#define NCA9539_REG_OUTPUT_PORT_BASE 0x02 -+#define NCA9539_REG_OUTPUT_PORT0 (NCA9539_REG_OUTPUT_PORT_BASE + 0x0) -+#define NCA9539_REG_OUTPUT_PORT1 (NCA9539_REG_OUTPUT_PORT_BASE + 0x1) -+#define NCA9539_REG_POLARITY_BASE 0x04 -+#define NCA9539_REG_POLARITY_PORT0 (NCA9539_REG_POLARITY_BASE + 0x0) -+#define NCA9539_REG_POLARITY_PORT1 (NCA9539_REG_POLARITY_BASE + 0x1) -+#define NCA9539_REG_CONFIG_BASE 0x06 -+#define NCA9539_REG_CONFIG_PORT0 (NCA9539_REG_CONFIG_BASE + 0x0) -+#define NCA9539_REG_CONFIG_PORT1 (NCA9539_REG_CONFIG_BASE + 0x1) ++#include "deferred-free-helper.h" + -+struct nca9539_chip { -+ struct gpio_chip gpio_chip; -+ struct regmap *regmap; -+ struct regulator *regulator; -+ unsigned int ngpio; -+}; ++static LIST_HEAD(free_list); ++static size_t list_nr_pages; ++wait_queue_head_t freelist_waitqueue; ++struct task_struct *freelist_task; ++static DEFINE_SPINLOCK(free_list_lock); + -+static int nca9539_gpio_get_direction(struct gpio_chip *gc, unsigned int offset) ++void deferred_free(struct deferred_freelist_item *item, ++ void (*free)(struct deferred_freelist_item*, ++ enum df_reason), ++ size_t nr_pages) +{ -+ struct nca9539_chip *priv = gpiochip_get_data(gc); -+ unsigned int port = offset / 8; -+ unsigned int pin = offset % 8; -+ unsigned int value; -+ int ret; -+ -+ dev_dbg(gc->parent, "%s offset(%d)", __func__, offset); -+ ret = regmap_read(priv->regmap, NCA9539_REG_CONFIG_BASE + port, &value); -+ if (ret < 0) { -+ dev_err(gc->parent, "%s offset(%d) read config failed", -+ __func__, offset); -+ return ret; -+ } ++ unsigned long flags; + -+ if (value & BIT(pin)) -+ return GPIO_LINE_DIRECTION_IN; ++ INIT_LIST_HEAD(&item->list); ++ item->nr_pages = nr_pages; ++ item->free = free; + -+ return GPIO_LINE_DIRECTION_OUT; ++ spin_lock_irqsave(&free_list_lock, flags); ++ list_add(&item->list, &free_list); ++ list_nr_pages += nr_pages; ++ spin_unlock_irqrestore(&free_list_lock, flags); ++ wake_up(&freelist_waitqueue); +} ++EXPORT_SYMBOL_GPL(deferred_free); + -+static int nca9539_gpio_direction_input(struct gpio_chip *gc, unsigned int offset) ++static size_t free_one_item(enum df_reason reason) +{ -+ struct nca9539_chip *priv = gpiochip_get_data(gc); -+ unsigned int port = offset / 8; -+ unsigned int pin = offset % 8; -+ int ret; ++ unsigned long flags; ++ size_t nr_pages; ++ struct deferred_freelist_item *item; + -+ dev_dbg(gc->parent, "%s offset(%d)", __func__, offset); -+ ret = regmap_update_bits(priv->regmap, NCA9539_REG_CONFIG_BASE + port, -+ BIT(pin), BIT(pin)); -+ if (ret < 0) { -+ dev_err(gc->parent, "%s offset(%d) read config failed", -+ __func__, offset); ++ spin_lock_irqsave(&free_list_lock, flags); ++ if (list_empty(&free_list)) { ++ spin_unlock_irqrestore(&free_list_lock, flags); ++ return 0; + } ++ item = list_first_entry(&free_list, struct deferred_freelist_item, list); ++ list_del(&item->list); ++ nr_pages = item->nr_pages; ++ list_nr_pages -= nr_pages; ++ spin_unlock_irqrestore(&free_list_lock, flags); + -+ return ret; ++ item->free(item, reason); ++ return nr_pages; +} + -+static int nca9539_gpio_direction_output(struct gpio_chip *gc, unsigned int offset, -+ int val) ++static unsigned long get_freelist_nr_pages(void) +{ -+ struct nca9539_chip *priv = gpiochip_get_data(gc); -+ unsigned int port = offset / 8; -+ unsigned int pin = offset % 8; -+ int ret; -+ -+ dev_dbg(gc->parent, "%s offset(%d) val(%d)", __func__, offset, val); -+ ret = regmap_update_bits(priv->regmap, NCA9539_REG_CONFIG_BASE + port, -+ BIT(pin), 0); -+ if (ret < 0) { -+ dev_err(gc->parent, -+ "%s offset(%d) val(%d) update config failed", __func__, -+ offset, val); -+ return ret; -+ } -+ -+ ret = regmap_update_bits(priv->regmap, -+ NCA9539_REG_OUTPUT_PORT_BASE + port, BIT(pin), -+ val ? BIT(pin) : 0); -+ if (ret < 0) { -+ dev_err(gc->parent, -+ "%s offset(%d) val(%d) update output failed", __func__, -+ offset, val); -+ return ret; -+ } ++ unsigned long nr_pages; ++ unsigned long flags; + -+ return ret; ++ spin_lock_irqsave(&free_list_lock, flags); ++ nr_pages = list_nr_pages; ++ spin_unlock_irqrestore(&free_list_lock, flags); ++ return nr_pages; +} + -+static int nca9539_gpio_get(struct gpio_chip *gc, unsigned int offset) ++static unsigned long freelist_shrink_count(struct shrinker *shrinker, ++ struct shrink_control *sc) +{ -+ struct nca9539_chip *priv = gpiochip_get_data(gc); -+ unsigned int port = offset / 8; -+ unsigned int pin = offset % 8; -+ unsigned int reg; -+ unsigned int value; -+ int ret; -+ -+ dev_dbg(gc->parent, "%s offset(%d)", __func__, offset); -+ ret = regmap_read(priv->regmap, NCA9539_REG_CONFIG_BASE + port, &value); -+ if (ret < 0) { -+ dev_err(gc->parent, "%s offset(%d) check config failed", -+ __func__, offset); -+ return ret; -+ } -+ if (!(BIT(pin) & value)) -+ reg = NCA9539_REG_OUTPUT_PORT_BASE + port; -+ else -+ reg = NCA9539_REG_INPUT_PORT_BASE + port; -+ ret = regmap_read(priv->regmap, reg, &value); -+ if (ret < 0) { -+ dev_err(gc->parent, "%s offset(%d) read value failed", __func__, -+ offset); -+ return -EIO; -+ } -+ -+ return !!(BIT(pin) & value); ++ return get_freelist_nr_pages(); +} + -+static void nca9539_gpio_set(struct gpio_chip *gc, unsigned int offset, int val) ++static unsigned long freelist_shrink_scan(struct shrinker *shrinker, ++ struct shrink_control *sc) +{ -+ struct nca9539_chip *priv = gpiochip_get_data(gc); -+ unsigned int port = offset / 8; -+ unsigned int pin = offset % 8; -+ unsigned int value; -+ int ret; ++ unsigned long total_freed = 0; + -+ dev_dbg(gc->parent, "%s offset(%d) val(%d)", __func__, offset, val); -+ ret = regmap_read(priv->regmap, NCA9539_REG_CONFIG_BASE + port, &value); -+ if (ret < 0 || !!(BIT(pin) & value)) { -+ dev_err(gc->parent, "%s offset(%d) val(%d) check config failed", -+ __func__, offset, val); -+ } ++ if (sc->nr_to_scan == 0) ++ return 0; + -+ ret = regmap_update_bits(priv->regmap, -+ NCA9539_REG_OUTPUT_PORT_BASE + port, BIT(pin), -+ val ? BIT(pin) : 0); -+ if (ret < 0) { -+ dev_err(gc->parent, "%s offset(%d) val(%d) read input failed", -+ __func__, offset, val); -+ } -+} ++ while (total_freed < sc->nr_to_scan) { ++ size_t pages_freed = free_one_item(DF_UNDER_PRESSURE); + -+static bool nca9539_is_writeable_reg(struct device *dev, unsigned int reg) -+{ -+ switch (reg) { -+ case NCA9539_REG_OUTPUT_PORT0: -+ case NCA9539_REG_OUTPUT_PORT1: -+ case NCA9539_REG_POLARITY_PORT0: -+ case NCA9539_REG_POLARITY_PORT1: -+ case NCA9539_REG_CONFIG_PORT0: -+ case NCA9539_REG_CONFIG_PORT1: -+ return true; -+ } -+ return false; -+} ++ if (!pages_freed) ++ break; + -+static bool nca9539_is_readable_reg(struct device *dev, unsigned int reg) -+{ -+ switch (reg) { -+ case NCA9539_REG_INPUT_PORT0: -+ case NCA9539_REG_INPUT_PORT1: -+ case NCA9539_REG_OUTPUT_PORT0: -+ case NCA9539_REG_OUTPUT_PORT1: -+ case NCA9539_REG_POLARITY_PORT0: -+ case NCA9539_REG_POLARITY_PORT1: -+ case NCA9539_REG_CONFIG_PORT0: -+ case NCA9539_REG_CONFIG_PORT1: -+ return true; ++ total_freed += pages_freed; + } -+ return false; -+} + -+static bool nca9539_is_volatile_reg(struct device *dev, unsigned int reg) -+{ -+ return true; ++ return total_freed; +} + -+static const struct reg_default nca9539_regmap_default[] = { -+ { NCA9539_REG_INPUT_PORT0, 0xFF }, -+ { NCA9539_REG_INPUT_PORT1, 0xFF }, -+ { NCA9539_REG_OUTPUT_PORT0, 0xFF }, -+ { NCA9539_REG_OUTPUT_PORT1, 0xFF }, -+ { NCA9539_REG_POLARITY_PORT0, 0x00 }, -+ { NCA9539_REG_POLARITY_PORT1, 0x00 }, -+ { NCA9539_REG_CONFIG_PORT0, 0xFF }, -+ { NCA9539_REG_CONFIG_PORT1, 0xFF }, -+}; -+ -+static const struct regmap_config nca9539_regmap_config = { -+ .reg_bits = 8, -+ .val_bits = 8, -+ .max_register = 7, -+ .writeable_reg = nca9539_is_writeable_reg, -+ .readable_reg = nca9539_is_readable_reg, -+ .volatile_reg = nca9539_is_volatile_reg, -+ .reg_defaults = nca9539_regmap_default, -+ .num_reg_defaults = ARRAY_SIZE(nca9539_regmap_default), -+ .cache_type = REGCACHE_FLAT, -+}; -+ -+static const struct gpio_chip template_chip = { -+ .label = "nca9539-gpio", -+ .owner = THIS_MODULE, -+ .get_direction = nca9539_gpio_get_direction, -+ .direction_input = nca9539_gpio_direction_input, -+ .direction_output = nca9539_gpio_direction_output, -+ .get = nca9539_gpio_get, -+ .set = nca9539_gpio_set, -+ .base = -1, -+ .can_sleep = true, ++static struct shrinker freelist_shrinker = { ++ .count_objects = freelist_shrink_count, ++ .scan_objects = freelist_shrink_scan, ++ .seeks = DEFAULT_SEEKS, ++ .batch = 0, +}; + -+static int nca9539_probe(struct i2c_client *client) ++static int deferred_free_thread(void *data) +{ -+ struct nca9539_chip *chip; -+ struct regulator *reg; -+ int ret; -+ -+ chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); -+ if (!chip) -+ return -ENOMEM; -+ -+ chip->gpio_chip = template_chip; -+ chip->gpio_chip.label = "nca9539-gpio"; -+ chip->gpio_chip.parent = &client->dev; -+ chip->ngpio = (uintptr_t)of_device_get_match_data(&client->dev); -+ chip->gpio_chip.ngpio = chip->ngpio; -+ -+ reg = devm_regulator_get(&client->dev, "vdd"); -+ if (IS_ERR(reg)) -+ return dev_err_probe(&client->dev, PTR_ERR(reg), -+ "reg get err\n"); -+ -+ ret = regulator_enable(reg); -+ if (ret) { -+ dev_err(&client->dev, "reg en err: %d\n", ret); -+ return ret; -+ } -+ chip->regulator = reg; -+ -+ chip->regmap = devm_regmap_init_i2c(client, &nca9539_regmap_config); -+ if (IS_ERR(chip->regmap)) { -+ ret = PTR_ERR(chip->regmap); -+ dev_err(&client->dev, "Failed to allocate register map: %d\n", -+ ret); -+ goto err_exit; -+ } -+ regcache_mark_dirty(chip->regmap); -+ ret = regcache_sync(chip->regmap); -+ if (ret) { -+ dev_err(&client->dev, "Failed to sync register map: %d\n", ret); -+ goto err_exit; -+ } -+ -+ // TODO(Cody): irq_chip setup ++ while (true) { ++ wait_event_freezable(freelist_waitqueue, ++ get_freelist_nr_pages() > 0); + -+ ret = devm_gpiochip_add_data(&client->dev, &chip->gpio_chip, chip); -+ if (ret < 0) { -+ dev_err(&client->dev, "Unable to register gpiochip\n"); -+ goto err_exit; ++ free_one_item(DF_NORMAL); + } + -+ i2c_set_clientdata(client, chip); -+ + return 0; -+ -+err_exit: -+ regulator_disable(chip->regulator); -+ return ret; +} + -+static int nca9539_remove(struct i2c_client *client) ++static int deferred_freelist_init(void) +{ -+ struct nca9539_chip *chip = i2c_get_clientdata(client); ++ list_nr_pages = 0; + -+ regulator_disable(chip->regulator); ++ init_waitqueue_head(&freelist_waitqueue); ++ freelist_task = kthread_run(deferred_free_thread, NULL, ++ "%s", "dmabuf-deferred-free-worker"); ++ if (IS_ERR(freelist_task)) { ++ pr_err("Creating thread for deferred free failed\n"); ++ return -1; ++ } ++ sched_set_normal(freelist_task, 19); + -+ return 0; ++ return register_shrinker(&freelist_shrinker, "freelist-shrinker"); +} ++module_init(deferred_freelist_init); ++MODULE_LICENSE("GPL v2"); + -+static const struct of_device_id nca9539_gpio_of_match_table[] = { -+ { -+ .compatible = "novo,nca9539-gpio", -+ .data = (void *)16, -+ }, -+ { /* sentinel */ }, -+}; -+MODULE_DEVICE_TABLE(of, nca9539_gpio_of_match_table); +diff --git a/drivers/dma-buf/heaps/deferred-free-helper.h b/drivers/dma-buf/heaps/deferred-free-helper.h +new file mode 100644 +index 000000000..11940328c +--- /dev/null ++++ b/drivers/dma-buf/heaps/deferred-free-helper.h +@@ -0,0 +1,55 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ + -+static const struct i2c_device_id nca9539_gpio_id_table[] = { -+ { "nca9539-gpio" }, -+ { /* sentinel */ }, ++#ifndef DEFERRED_FREE_HELPER_H ++#define DEFERRED_FREE_HELPER_H ++ ++/** ++ * df_reason - enum for reason why item was freed ++ * ++ * This provides a reason for why the free function was called ++ * on the item. This is useful when deferred_free is used in ++ * combination with a pagepool, so under pressure the page can ++ * be immediately freed. ++ * ++ * DF_NORMAL: Normal deferred free ++ * ++ * DF_UNDER_PRESSURE: Free was called because the system ++ * is under memory pressure. Usually ++ * from a shrinker. Avoid allocating ++ * memory in the free call, as it may ++ * fail. ++ */ ++enum df_reason { ++ DF_NORMAL, ++ DF_UNDER_PRESSURE, +}; -+MODULE_DEVICE_TABLE(i2c, nca9539_gpio_id_table); + -+static struct i2c_driver nca9539_driver = { -+ .driver = { -+ .name = "nca9539-gpio", -+ .of_match_table = nca9539_gpio_of_match_table, -+ }, -+ .probe_new = nca9539_probe, -+ .remove = nca9539_remove, -+ .id_table = nca9539_gpio_id_table, ++/** ++ * deferred_freelist_item - item structure for deferred freelist ++ * ++ * This is to be added to the structure for whatever you want to ++ * defer freeing on. ++ * ++ * @nr_pages: number of pages used by item to be freed ++ * @free: function pointer to be called when freeing the item ++ * @list: list entry for the deferred list ++ */ ++struct deferred_freelist_item { ++ size_t nr_pages; ++ void (*free)(struct deferred_freelist_item *i, ++ enum df_reason reason); ++ struct list_head list; +}; -+module_i2c_driver(nca9539_driver); + -+MODULE_AUTHOR("Cody Xie "); -+MODULE_DESCRIPTION("GPIO expander driver for Novosense nca9539"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/gpio/gpio-rockchip-oh.c b/drivers/gpio/gpio-rockchip-oh.c ++/** ++ * deferred_free - call to add item to the deferred free list ++ * ++ * @item: Pointer to deferred_freelist_item field of a structure ++ * @free: Function pointer to the free call ++ * @nr_pages: number of pages to be freed ++ */ ++void deferred_free(struct deferred_freelist_item *item, ++ void (*free)(struct deferred_freelist_item *i, ++ enum df_reason reason), ++ size_t nr_pages); ++#endif +diff --git a/drivers/dma-buf/heaps/page_pool.c b/drivers/dma-buf/heaps/page_pool.c new file mode 100644 -index 000000000..f251313c2 +index 000000000..f3e359860 --- /dev/null -+++ b/drivers/gpio/gpio-rockchip-oh.c -@@ -0,0 +1,880 @@ -+// SPDX-License-Identifier: GPL-2.0-only ++++ b/drivers/dma-buf/heaps/page_pool.c +@@ -0,0 +1,247 @@ ++// SPDX-License-Identifier: GPL-2.0 +/* -+ * Copyright (c) 2013 MundoReader S.L. -+ * Author: Heiko Stuebner ++ * DMA BUF page pool system + * -+ * Copyright (c) 2021 Rockchip Electronics Co. Ltd. ++ * Copyright (C) 2020 Linaro Ltd. ++ * ++ * Based on the ION page pool code ++ * Copyright (C) 2011 Google, Inc. + */ + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "../pinctrl/core.h" -+#include "../pinctrl/pinctrl-rockchip.h" -+ -+#define GPIO_TYPE_V1 (0) /* GPIO Version ID reserved */ -+#define GPIO_TYPE_V2 (0x01000C2B) /* GPIO Version ID 0x01000C2B */ -+#define GPIO_TYPE_V2_1 (0x0101157C) /* GPIO Version ID 0x0101157C */ -+ -+#define GPIO_MAX_PINS (32) -+ -+static const struct rockchip_gpio_regs gpio_regs_v1 = { -+ .port_dr = 0x00, -+ .port_ddr = 0x04, -+ .int_en = 0x30, -+ .int_mask = 0x34, -+ .int_type = 0x38, -+ .int_polarity = 0x3c, -+ .int_status = 0x40, -+ .int_rawstatus = 0x44, -+ .debounce = 0x48, -+ .port_eoi = 0x4c, -+ .ext_port = 0x50, -+}; ++#include ++#include ++#include ++#include ++#include ++#include "page_pool.h" + -+static const struct rockchip_gpio_regs gpio_regs_v2 = { -+ .port_dr = 0x00, -+ .port_ddr = 0x08, -+ .int_en = 0x10, -+ .int_mask = 0x18, -+ .int_type = 0x20, -+ .int_polarity = 0x28, -+ .int_bothedge = 0x30, -+ .int_status = 0x50, -+ .int_rawstatus = 0x58, -+ .debounce = 0x38, -+ .dbclk_div_en = 0x40, -+ .dbclk_div_con = 0x48, -+ .port_eoi = 0x60, -+ .ext_port = 0x70, -+ .version_id = 0x78, -+}; ++static LIST_HEAD(pool_list); ++static DEFINE_MUTEX(pool_list_lock); + -+static inline void gpio_writel_v2(u32 val, void __iomem *reg) ++static inline ++struct page *dmabuf_page_pool_alloc_pages(struct dmabuf_page_pool *pool) +{ -+ writel((val & 0xffff) | 0xffff0000, reg); -+ writel((val >> 16) | 0xffff0000, reg + 0x4); ++ if (fatal_signal_pending(current)) ++ return NULL; ++ return alloc_pages(pool->gfp_mask, pool->order); +} + -+static inline u32 gpio_readl_v2(void __iomem *reg) ++static inline void dmabuf_page_pool_free_pages(struct dmabuf_page_pool *pool, ++ struct page *page) +{ -+ return readl(reg + 0x4) << 16 | readl(reg); ++ __free_pages(page, pool->order); +} + -+static inline void rockchip_gpio_writel(struct rockchip_pin_bank *bank, -+ u32 value, unsigned int offset) ++static void dmabuf_page_pool_add(struct dmabuf_page_pool *pool, struct page *page) +{ -+ void __iomem *reg = bank->reg_base + offset; ++ int index; + -+ if (bank->gpio_type == GPIO_TYPE_V2) -+ gpio_writel_v2(value, reg); ++ if (PageHighMem(page)) ++ index = POOL_HIGHPAGE; + else -+ writel(value, reg); ++ index = POOL_LOWPAGE; ++ ++ mutex_lock(&pool->mutex); ++ list_add_tail(&page->lru, &pool->items[index]); ++ pool->count[index]++; ++ mutex_unlock(&pool->mutex); ++ mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE, ++ 1 << pool->order); +} + -+static inline u32 rockchip_gpio_readl(struct rockchip_pin_bank *bank, -+ unsigned int offset) ++static struct page *dmabuf_page_pool_remove(struct dmabuf_page_pool *pool, int index) +{ -+ void __iomem *reg = bank->reg_base + offset; -+ u32 value; ++ struct page *page; + -+ if (bank->gpio_type == GPIO_TYPE_V2) -+ value = gpio_readl_v2(reg); -+ else -+ value = readl(reg); ++ mutex_lock(&pool->mutex); ++ page = list_first_entry_or_null(&pool->items[index], struct page, lru); ++ if (page) { ++ pool->count[index]--; ++ list_del(&page->lru); ++ mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE, ++ -(1 << pool->order)); ++ } ++ mutex_unlock(&pool->mutex); + -+ return value; ++ return page; +} + -+static inline void rockchip_gpio_writel_bit(struct rockchip_pin_bank *bank, -+ u32 bit, u32 value, -+ unsigned int offset) ++static struct page *dmabuf_page_pool_fetch(struct dmabuf_page_pool *pool) +{ -+ void __iomem *reg = bank->reg_base + offset; -+ u32 data; ++ struct page *page = NULL; + -+ if (bank->gpio_type == GPIO_TYPE_V2) { -+ if (value) -+ data = BIT(bit % 16) | BIT(bit % 16 + 16); -+ else -+ data = BIT(bit % 16 + 16); -+ writel(data, bit >= 16 ? reg + 0x4 : reg); -+ } else { -+ data = readl(reg); -+ data &= ~BIT(bit); -+ if (value) -+ data |= BIT(bit); -+ writel(data, reg); -+ } ++ page = dmabuf_page_pool_remove(pool, POOL_HIGHPAGE); ++ if (!page) ++ page = dmabuf_page_pool_remove(pool, POOL_LOWPAGE); ++ ++ return page; +} + -+static inline u32 rockchip_gpio_readl_bit(struct rockchip_pin_bank *bank, -+ u32 bit, unsigned int offset) ++struct page *dmabuf_page_pool_alloc(struct dmabuf_page_pool *pool) +{ -+ void __iomem *reg = bank->reg_base + offset; -+ u32 data; ++ struct page *page = NULL; + -+ if (bank->gpio_type == GPIO_TYPE_V2) { -+ data = readl(bit >= 16 ? reg + 0x4 : reg); -+ data >>= bit % 16; -+ } else { -+ data = readl(reg); -+ data >>= bit; -+ } ++ if (WARN_ON(!pool)) ++ return NULL; + -+ return data & (0x1); ++ page = dmabuf_page_pool_fetch(pool); ++ ++ if (!page) ++ page = dmabuf_page_pool_alloc_pages(pool); ++ return page; +} ++EXPORT_SYMBOL_GPL(dmabuf_page_pool_alloc); + -+static int rockchip_gpio_get_direction(struct gpio_chip *chip, -+ unsigned int offset) ++void dmabuf_page_pool_free(struct dmabuf_page_pool *pool, struct page *page) +{ -+ struct rockchip_pin_bank *bank = gpiochip_get_data(chip); -+ u32 data; -+ -+ data = rockchip_gpio_readl_bit(bank, offset, bank->gpio_regs->port_ddr); -+ if (data) -+ return GPIO_LINE_DIRECTION_OUT; ++ if (WARN_ON(pool->order != compound_order(page))) ++ return; + -+ return GPIO_LINE_DIRECTION_IN; ++ dmabuf_page_pool_add(pool, page); +} ++EXPORT_SYMBOL_GPL(dmabuf_page_pool_free); + -+static int rockchip_gpio_set_direction(struct gpio_chip *chip, -+ unsigned int offset, bool input) ++static int dmabuf_page_pool_total(struct dmabuf_page_pool *pool, bool high) +{ -+ struct rockchip_pin_bank *bank = gpiochip_get_data(chip); -+ unsigned long flags; -+ u32 data = input ? 0 : 1; -+ -+ if (input) -+ pinctrl_gpio_direction_input(bank->pin_base + offset); -+ else -+ pinctrl_gpio_direction_output(bank->pin_base + offset); ++ int count = pool->count[POOL_LOWPAGE]; + -+ raw_spin_lock_irqsave(&bank->slock, flags); -+ rockchip_gpio_writel_bit(bank, offset, data, bank->gpio_regs->port_ddr); -+ raw_spin_unlock_irqrestore(&bank->slock, flags); ++ if (high) ++ count += pool->count[POOL_HIGHPAGE]; + -+ return 0; ++ return count << pool->order; +} + -+static void rockchip_gpio_set(struct gpio_chip *gc, unsigned int offset, -+ int value) ++struct dmabuf_page_pool *dmabuf_page_pool_create(gfp_t gfp_mask, unsigned int order) +{ -+ struct rockchip_pin_bank *bank = gpiochip_get_data(gc); -+ unsigned long flags; ++ struct dmabuf_page_pool *pool = kmalloc(sizeof(*pool), GFP_KERNEL); ++ int i; + -+ raw_spin_lock_irqsave(&bank->slock, flags); -+ rockchip_gpio_writel_bit(bank, offset, value, bank->gpio_regs->port_dr); -+ raw_spin_unlock_irqrestore(&bank->slock, flags); -+} ++ if (!pool) ++ return NULL; + -+static int rockchip_gpio_get(struct gpio_chip *gc, unsigned int offset) -+{ -+ struct rockchip_pin_bank *bank = gpiochip_get_data(gc); -+ u32 data; ++ for (i = 0; i < POOL_TYPE_SIZE; i++) { ++ pool->count[i] = 0; ++ INIT_LIST_HEAD(&pool->items[i]); ++ } ++ pool->gfp_mask = gfp_mask | __GFP_COMP; ++ pool->order = order; ++ mutex_init(&pool->mutex); + -+ data = readl(bank->reg_base + bank->gpio_regs->ext_port); -+ data >>= offset; -+ data &= 1; ++ mutex_lock(&pool_list_lock); ++ list_add(&pool->list, &pool_list); ++ mutex_unlock(&pool_list_lock); + -+ return data; ++ return pool; +} ++EXPORT_SYMBOL_GPL(dmabuf_page_pool_create); + -+static int rockchip_gpio_set_debounce(struct gpio_chip *gc, -+ unsigned int offset, -+ unsigned int debounce) ++void dmabuf_page_pool_destroy(struct dmabuf_page_pool *pool) +{ -+ struct rockchip_pin_bank *bank = gpiochip_get_data(gc); -+ const struct rockchip_gpio_regs *reg = bank->gpio_regs; -+ unsigned long flags, div_reg, freq, max_debounce; -+ bool div_debounce_support; -+ unsigned int cur_div_reg; -+ u64 div; ++ struct page *page; ++ int i; + -+ if (bank->gpio_type == GPIO_TYPE_V2 && !IS_ERR(bank->db_clk)) { -+ div_debounce_support = true; -+ freq = clk_get_rate(bank->db_clk); -+ if (!freq) -+ return -EINVAL; -+ max_debounce = (GENMASK(23, 0) + 1) * 2 * 1000000 / freq; -+ if ((unsigned long)debounce > max_debounce) -+ return -EINVAL; ++ /* Remove us from the pool list */ ++ mutex_lock(&pool_list_lock); ++ list_del(&pool->list); ++ mutex_unlock(&pool_list_lock); + -+ div = debounce * freq; -+ div_reg = DIV_ROUND_CLOSEST_ULL(div, 2 * USEC_PER_SEC) - 1; -+ } else { -+ div_debounce_support = false; ++ /* Free any remaining pages in the pool */ ++ for (i = 0; i < POOL_TYPE_SIZE; i++) { ++ while ((page = dmabuf_page_pool_remove(pool, i))) ++ dmabuf_page_pool_free_pages(pool, page); + } + -+ raw_spin_lock_irqsave(&bank->slock, flags); ++ kfree(pool); ++} ++EXPORT_SYMBOL_GPL(dmabuf_page_pool_destroy); + -+ /* Only the v1 needs to configure div_en and div_con for dbclk */ -+ if (debounce) { -+ if (div_debounce_support) { -+ /* Configure the max debounce from consumers */ -+ cur_div_reg = readl(bank->reg_base + -+ reg->dbclk_div_con); -+ if (cur_div_reg < div_reg) -+ writel(div_reg, bank->reg_base + -+ reg->dbclk_div_con); -+ rockchip_gpio_writel_bit(bank, offset, 1, -+ reg->dbclk_div_en); -+ } ++static int dmabuf_page_pool_do_shrink(struct dmabuf_page_pool *pool, gfp_t gfp_mask, ++ int nr_to_scan) ++{ ++ int freed = 0; ++ bool high; + -+ rockchip_gpio_writel_bit(bank, offset, 1, reg->debounce); -+ } else { -+ if (div_debounce_support) -+ rockchip_gpio_writel_bit(bank, offset, 0, -+ reg->dbclk_div_en); ++ if (current_is_kswapd()) ++ high = true; ++ else ++ high = !!(gfp_mask & __GFP_HIGHMEM); + -+ rockchip_gpio_writel_bit(bank, offset, 0, reg->debounce); -+ } ++ if (nr_to_scan == 0) ++ return dmabuf_page_pool_total(pool, high); + -+ raw_spin_unlock_irqrestore(&bank->slock, flags); ++ while (freed < nr_to_scan) { ++ struct page *page; + -+ /* Enable or disable dbclk at last */ -+ if (div_debounce_support) { -+ if (debounce) -+ clk_prepare_enable(bank->db_clk); -+ else -+ clk_disable_unprepare(bank->db_clk); ++ /* Try to free low pages first */ ++ page = dmabuf_page_pool_remove(pool, POOL_LOWPAGE); ++ if (!page) ++ page = dmabuf_page_pool_remove(pool, POOL_HIGHPAGE); ++ ++ if (!page) ++ break; ++ ++ dmabuf_page_pool_free_pages(pool, page); ++ freed += (1 << pool->order); + } + -+ return 0; ++ return freed; +} + -+static int rockchip_gpio_direction_input(struct gpio_chip *gc, -+ unsigned int offset) ++static int dmabuf_page_pool_shrink(gfp_t gfp_mask, int nr_to_scan) +{ -+ return rockchip_gpio_set_direction(gc, offset, true); -+} ++ struct dmabuf_page_pool *pool; ++ int nr_total = 0; ++ int nr_freed; ++ int only_scan = 0; + -+static int rockchip_gpio_direction_output(struct gpio_chip *gc, -+ unsigned int offset, int value) -+{ -+ rockchip_gpio_set(gc, offset, value); ++ if (!nr_to_scan) ++ only_scan = 1; + -+ return rockchip_gpio_set_direction(gc, offset, false); ++ mutex_lock(&pool_list_lock); ++ list_for_each_entry(pool, &pool_list, list) { ++ if (only_scan) { ++ nr_total += dmabuf_page_pool_do_shrink(pool, ++ gfp_mask, ++ nr_to_scan); ++ } else { ++ nr_freed = dmabuf_page_pool_do_shrink(pool, ++ gfp_mask, ++ nr_to_scan); ++ nr_to_scan -= nr_freed; ++ nr_total += nr_freed; ++ if (nr_to_scan <= 0) ++ break; ++ } ++ } ++ mutex_unlock(&pool_list_lock); ++ ++ return nr_total; +} + -+/* -+ * gpiolib set_config callback function. The setting of the pin -+ * mux function as 'gpio output' will be handled by the pinctrl subsystem -+ * interface. -+ */ -+static int rockchip_gpio_set_config(struct gpio_chip *gc, unsigned int offset, -+ unsigned long config) ++static unsigned long dmabuf_page_pool_shrink_count(struct shrinker *shrinker, ++ struct shrink_control *sc) +{ -+ enum pin_config_param param = pinconf_to_config_param(config); -+ unsigned int debounce = pinconf_to_config_argument(config); -+ -+ switch (param) { -+ case PIN_CONFIG_INPUT_DEBOUNCE: -+ rockchip_gpio_set_debounce(gc, offset, debounce); -+ /* -+ * Rockchip's gpio could only support up to one period -+ * of the debounce clock(pclk), which is far away from -+ * satisftying the requirement, as pclk is usually near -+ * 100MHz shared by all peripherals. So the fact is it -+ * has crippled debounce capability could only be useful -+ * to prevent any spurious glitches from waking up the system -+ * if the gpio is conguired as wakeup interrupt source. Let's -+ * still return -ENOTSUPP as before, to make sure the caller -+ * of gpiod_set_debounce won't change its behaviour. -+ */ -+ return -ENOTSUPP; -+ default: -+ return -ENOTSUPP; -+ } ++ return dmabuf_page_pool_shrink(sc->gfp_mask, 0); +} + -+/* -+ * gpiolib gpio_to_irq callback function. Creates a mapping between a GPIO pin -+ * and a virtual IRQ, if not already present. -+ */ -+static int rockchip_gpio_to_irq(struct gpio_chip *gc, unsigned int offset) ++static unsigned long dmabuf_page_pool_shrink_scan(struct shrinker *shrinker, ++ struct shrink_control *sc) +{ -+ struct rockchip_pin_bank *bank = gpiochip_get_data(gc); -+ unsigned int virq; -+ -+ if (!bank->domain) -+ return -ENXIO; -+ -+ virq = irq_create_mapping(bank->domain, offset); -+ -+ return (virq) ? : -ENXIO; ++ if (sc->nr_to_scan == 0) ++ return 0; ++ return dmabuf_page_pool_shrink(sc->gfp_mask, sc->nr_to_scan); +} + -+static const struct gpio_chip rockchip_gpiolib_chip = { -+ .request = gpiochip_generic_request, -+ .free = gpiochip_generic_free, -+ .set = rockchip_gpio_set, -+ .get = rockchip_gpio_get, -+ .get_direction = rockchip_gpio_get_direction, -+ .direction_input = rockchip_gpio_direction_input, -+ .direction_output = rockchip_gpio_direction_output, -+ .set_config = rockchip_gpio_set_config, -+ .to_irq = rockchip_gpio_to_irq, -+ .owner = THIS_MODULE, ++struct shrinker pool_shrinker = { ++ .count_objects = dmabuf_page_pool_shrink_count, ++ .scan_objects = dmabuf_page_pool_shrink_scan, ++ .seeks = DEFAULT_SEEKS, ++ .batch = 0, +}; + -+static void rockchip_irq_demux(struct irq_desc *desc) ++static int dmabuf_page_pool_init_shrinker(void) +{ -+ struct irq_chip *chip = irq_desc_get_chip(desc); -+ struct rockchip_pin_bank *bank = irq_desc_get_handler_data(desc); -+ unsigned long pending; -+ unsigned int irq; -+ -+ dev_dbg(bank->dev, "got irq for bank %s\n", bank->name); ++ return register_shrinker(&pool_shrinker, "pool-shrinker"); ++} ++module_init(dmabuf_page_pool_init_shrinker); ++MODULE_LICENSE("GPL v2"); +diff --git a/drivers/dma-buf/heaps/page_pool.h b/drivers/dma-buf/heaps/page_pool.h +new file mode 100644 +index 000000000..6b083b04f +--- /dev/null ++++ b/drivers/dma-buf/heaps/page_pool.h +@@ -0,0 +1,55 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * DMA BUF PagePool implementation ++ * Based on earlier ION code by Google ++ * ++ * Copyright (C) 2011 Google, Inc. ++ * Copyright (C) 2020 Linaro Ltd. ++ */ + -+ chained_irq_enter(chip, desc); ++#ifndef _DMABUF_PAGE_POOL_H ++#define _DMABUF_PAGE_POOL_H + -+ pending = readl_relaxed(bank->reg_base + bank->gpio_regs->int_status); -+ for_each_set_bit(irq, &pending, 32) { -+ dev_dbg(bank->dev, "handling irq %d\n", irq); ++#include ++#include ++#include ++#include ++#include ++#include + -+ /* -+ * Triggering IRQ on both rising and falling edge -+ * needs manual intervention. -+ */ -+ if (bank->toggle_edge_mode & BIT(irq)) { -+ u32 data, data_old, polarity; -+ unsigned long flags; ++/* page types we track in the pool */ ++enum { ++ POOL_LOWPAGE, /* Clean lowmem pages */ ++ POOL_HIGHPAGE, /* Clean highmem pages */ + -+ data = readl_relaxed(bank->reg_base + -+ bank->gpio_regs->ext_port); -+ do { -+ raw_spin_lock_irqsave(&bank->slock, flags); ++ POOL_TYPE_SIZE, ++}; + -+ polarity = readl_relaxed(bank->reg_base + -+ bank->gpio_regs->int_polarity); -+ if (data & BIT(irq)) -+ polarity &= ~BIT(irq); -+ else -+ polarity |= BIT(irq); -+ writel(polarity, -+ bank->reg_base + -+ bank->gpio_regs->int_polarity); ++/** ++ * struct dmabuf_page_pool - pagepool struct ++ * @count[]: array of number of pages of that type in the pool ++ * @items[]: array of list of pages of the specific type ++ * @mutex: lock protecting this struct and especially the count ++ * item list ++ * @gfp_mask: gfp_mask to use from alloc ++ * @order: order of pages in the pool ++ * @list: list node for list of pools ++ * ++ * Allows you to keep a pool of pre allocated pages to use ++ */ ++struct dmabuf_page_pool { ++ int count[POOL_TYPE_SIZE]; ++ struct list_head items[POOL_TYPE_SIZE]; ++ struct mutex mutex; ++ gfp_t gfp_mask; ++ unsigned int order; ++ struct list_head list; ++}; + -+ raw_spin_unlock_irqrestore(&bank->slock, flags); ++struct dmabuf_page_pool *dmabuf_page_pool_create(gfp_t gfp_mask, ++ unsigned int order); ++void dmabuf_page_pool_destroy(struct dmabuf_page_pool *pool); ++struct page *dmabuf_page_pool_alloc(struct dmabuf_page_pool *pool); ++void dmabuf_page_pool_free(struct dmabuf_page_pool *pool, struct page *page); + -+ data_old = data; -+ data = readl_relaxed(bank->reg_base + -+ bank->gpio_regs->ext_port); -+ } while ((data & BIT(irq)) != (data_old & BIT(irq))); -+ } ++#endif /* _DMABUF_PAGE_POOL_H */ +diff --git a/drivers/dma-buf/heaps/rk_cma_heap.c b/drivers/dma-buf/heaps/rk_cma_heap.c +new file mode 100644 +index 000000000..c93674c2a +--- /dev/null ++++ b/drivers/dma-buf/heaps/rk_cma_heap.c +@@ -0,0 +1,616 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * DMABUF CMA heap exporter ++ * ++ * Copyright (C) 2012, 2019, 2020 Linaro Ltd. ++ * Author: for ST-Ericsson. ++ * ++ * Also utilizing parts of Andrew Davis' SRAM heap: ++ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com/ ++ * Andrew F. Davis ++ * ++ * Copyright (C) 2021, 2022 Rockchip Electronics Co. Ltd. ++ */ + -+ generic_handle_domain_irq(bank->domain, irq); -+ } ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ chained_irq_exit(chip, desc); -+} ++struct cma_heap { ++ struct dma_heap *heap; ++ struct cma *cma; ++}; + -+static int rockchip_irq_set_type(struct irq_data *d, unsigned int type) -+{ -+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); -+ struct rockchip_pin_bank *bank = gc->private; -+ u32 mask = BIT(d->hwirq); -+ u32 polarity; -+ u32 level; -+ u32 data; -+ unsigned long flags; -+ int ret = 0; ++struct cma_heap_buffer { ++ struct cma_heap *heap; ++ struct list_head attachments; ++ struct mutex lock; ++ unsigned long len; ++ struct page *cma_pages; ++ struct page **pages; ++ pgoff_t pagecount; ++ int vmap_cnt; ++ void *vaddr; + -+ raw_spin_lock_irqsave(&bank->slock, flags); ++ bool uncached; ++}; + -+ rockchip_gpio_writel_bit(bank, d->hwirq, 0, -+ bank->gpio_regs->port_ddr); ++struct dma_heap_attachment { ++ struct device *dev; ++ struct sg_table table; ++ struct list_head list; ++ bool mapped; + -+ raw_spin_unlock_irqrestore(&bank->slock, flags); ++ bool uncached; ++}; + -+ if (type & IRQ_TYPE_EDGE_BOTH) -+ irq_set_handler_locked(d, handle_edge_irq); -+ else -+ irq_set_handler_locked(d, handle_level_irq); ++static int cma_heap_attach(struct dma_buf *dmabuf, ++ struct dma_buf_attachment *attachment) ++{ ++ struct cma_heap_buffer *buffer = dmabuf->priv; ++ struct dma_heap_attachment *a; ++ int ret; + -+ raw_spin_lock_irqsave(&bank->slock, flags); ++ a = kzalloc(sizeof(*a), GFP_KERNEL); ++ if (!a) ++ return -ENOMEM; + -+ level = rockchip_gpio_readl(bank, bank->gpio_regs->int_type); -+ polarity = rockchip_gpio_readl(bank, bank->gpio_regs->int_polarity); ++ ret = sg_alloc_table_from_pages(&a->table, buffer->pages, ++ buffer->pagecount, 0, ++ buffer->pagecount << PAGE_SHIFT, ++ GFP_KERNEL); ++ if (ret) { ++ kfree(a); ++ return ret; ++ } + -+ if (type == IRQ_TYPE_EDGE_BOTH) { -+ if (bank->gpio_type == GPIO_TYPE_V2) { -+ rockchip_gpio_writel_bit(bank, d->hwirq, 1, -+ bank->gpio_regs->int_bothedge); -+ goto out; -+ } else { -+ bank->toggle_edge_mode |= mask; -+ level &= ~mask; ++ a->dev = attachment->dev; ++ INIT_LIST_HEAD(&a->list); ++ a->mapped = false; + -+ /* -+ * Determine gpio state. If 1 next interrupt should be -+ * low otherwise high. -+ */ -+ data = readl(bank->reg_base + bank->gpio_regs->ext_port); -+ if (data & mask) -+ polarity &= ~mask; -+ else -+ polarity |= mask; -+ } -+ } else { -+ if (bank->gpio_type == GPIO_TYPE_V2) { -+ rockchip_gpio_writel_bit(bank, d->hwirq, 0, -+ bank->gpio_regs->int_bothedge); -+ } else { -+ bank->toggle_edge_mode &= ~mask; -+ } -+ switch (type) { -+ case IRQ_TYPE_EDGE_RISING: -+ level |= mask; -+ polarity |= mask; -+ break; -+ case IRQ_TYPE_EDGE_FALLING: -+ level |= mask; -+ polarity &= ~mask; -+ break; -+ case IRQ_TYPE_LEVEL_HIGH: -+ level &= ~mask; -+ polarity |= mask; -+ break; -+ case IRQ_TYPE_LEVEL_LOW: -+ level &= ~mask; -+ polarity &= ~mask; -+ break; -+ default: -+ ret = -EINVAL; -+ goto out; -+ } -+ } ++ a->uncached = buffer->uncached; ++ attachment->priv = a; + -+ rockchip_gpio_writel(bank, level, bank->gpio_regs->int_type); -+ rockchip_gpio_writel(bank, polarity, bank->gpio_regs->int_polarity); -+out: -+ raw_spin_unlock_irqrestore(&bank->slock, flags); ++ mutex_lock(&buffer->lock); ++ list_add(&a->list, &buffer->attachments); ++ mutex_unlock(&buffer->lock); + -+ return ret; ++ return 0; +} + -+static int rockchip_irq_reqres(struct irq_data *d) ++static void cma_heap_detach(struct dma_buf *dmabuf, ++ struct dma_buf_attachment *attachment) +{ -+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); -+ struct rockchip_pin_bank *bank = gc->private; ++ struct cma_heap_buffer *buffer = dmabuf->priv; ++ struct dma_heap_attachment *a = attachment->priv; + -+ rockchip_gpio_direction_input(&bank->gpio_chip, d->hwirq); ++ mutex_lock(&buffer->lock); ++ list_del(&a->list); ++ mutex_unlock(&buffer->lock); + -+ return gpiochip_reqres_irq(&bank->gpio_chip, d->hwirq); ++ sg_free_table(&a->table); ++ kfree(a); +} + -+static void rockchip_irq_relres(struct irq_data *d) ++static struct sg_table *cma_heap_map_dma_buf(struct dma_buf_attachment *attachment, ++ enum dma_data_direction direction) +{ -+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); -+ struct rockchip_pin_bank *bank = gc->private; ++ struct dma_heap_attachment *a = attachment->priv; ++ struct sg_table *table = &a->table; ++ int attrs = attachment->dma_map_attrs; ++ int ret; + -+ gpiochip_relres_irq(&bank->gpio_chip, d->hwirq); ++ if (a->uncached) ++ attrs |= DMA_ATTR_SKIP_CPU_SYNC; ++ ++ ret = dma_map_sgtable(attachment->dev, table, direction, attrs); ++ if (ret) ++ return ERR_PTR(-ENOMEM); ++ a->mapped = true; ++ return table; +} + -+static void rockchip_irq_suspend(struct irq_data *d) ++static void cma_heap_unmap_dma_buf(struct dma_buf_attachment *attachment, ++ struct sg_table *table, ++ enum dma_data_direction direction) +{ -+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); -+ struct rockchip_pin_bank *bank = gc->private; ++ struct dma_heap_attachment *a = attachment->priv; ++ int attrs = attachment->dma_map_attrs; + -+ bank->saved_masks = irq_reg_readl(gc, bank->gpio_regs->int_mask); -+ irq_reg_writel(gc, ~gc->wake_active, bank->gpio_regs->int_mask); -+} ++ a->mapped = false; + -+static void rockchip_irq_resume(struct irq_data *d) -+{ -+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); -+ struct rockchip_pin_bank *bank = gc->private; ++ if (a->uncached) ++ attrs |= DMA_ATTR_SKIP_CPU_SYNC; + -+ irq_reg_writel(gc, bank->saved_masks, bank->gpio_regs->int_mask); ++ dma_unmap_sgtable(attachment->dev, table, direction, attrs); +} + -+static void rockchip_irq_enable(struct irq_data *d) ++static int __maybe_unused ++cma_heap_dma_buf_begin_cpu_access_partial(struct dma_buf *dmabuf, ++ enum dma_data_direction direction, ++ unsigned int offset, ++ unsigned int len) +{ -+ irq_gc_mask_clr_bit(d); -+} ++ struct cma_heap_buffer *buffer = dmabuf->priv; ++ phys_addr_t phys = page_to_phys(buffer->cma_pages); + -+static void rockchip_irq_disable(struct irq_data *d) -+{ -+ irq_gc_mask_set_bit(d); -+} ++ if (buffer->vmap_cnt) ++ invalidate_kernel_vmap_range(buffer->vaddr, buffer->len); + -+static int rockchip_interrupts_register(struct rockchip_pin_bank *bank) -+{ -+ unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; -+ struct irq_chip_generic *gc; -+ int ret; ++ if (buffer->uncached) ++ return 0; + -+ bank->domain = irq_domain_create_linear(dev_fwnode(bank->dev), 32, -+ &irq_generic_chip_ops, NULL); -+ if (!bank->domain) { -+ dev_warn(bank->dev, "could not init irq domain for bank %s\n", -+ bank->name); -+ return -EINVAL; -+ } ++ mutex_lock(&buffer->lock); ++ dma_sync_single_for_cpu(dma_heap_get_dev(buffer->heap->heap), ++ phys + offset, ++ len, ++ direction); ++ mutex_unlock(&buffer->lock); + -+ ret = irq_alloc_domain_generic_chips(bank->domain, 32, 1, -+ "rockchip_gpio_irq", -+ handle_level_irq, -+ clr, 0, 0); -+ if (ret) { -+ dev_err(bank->dev, "could not alloc generic chips for bank %s\n", -+ bank->name); -+ irq_domain_remove(bank->domain); -+ return -EINVAL; -+ } ++ return 0; ++} + -+ gc = irq_get_domain_generic_chip(bank->domain, 0); -+ if (bank->gpio_type == GPIO_TYPE_V2) { -+ gc->reg_writel = gpio_writel_v2; -+ gc->reg_readl = gpio_readl_v2; -+ } ++static int __maybe_unused ++cma_heap_dma_buf_end_cpu_access_partial(struct dma_buf *dmabuf, ++ enum dma_data_direction direction, ++ unsigned int offset, ++ unsigned int len) ++{ ++ struct cma_heap_buffer *buffer = dmabuf->priv; ++ phys_addr_t phys = page_to_phys(buffer->cma_pages); + -+ gc->reg_base = bank->reg_base; -+ gc->private = bank; -+ gc->chip_types[0].regs.mask = bank->gpio_regs->int_mask; -+ gc->chip_types[0].regs.ack = bank->gpio_regs->port_eoi; -+ gc->chip_types[0].chip.irq_ack = irq_gc_ack_set_bit; -+ gc->chip_types[0].chip.irq_mask = irq_gc_mask_set_bit; -+ gc->chip_types[0].chip.irq_unmask = irq_gc_mask_clr_bit; -+ gc->chip_types[0].chip.irq_enable = rockchip_irq_enable; -+ gc->chip_types[0].chip.irq_disable = rockchip_irq_disable; -+ gc->chip_types[0].chip.irq_set_wake = irq_gc_set_wake; -+ gc->chip_types[0].chip.irq_suspend = rockchip_irq_suspend; -+ gc->chip_types[0].chip.irq_resume = rockchip_irq_resume; -+ gc->chip_types[0].chip.irq_set_type = rockchip_irq_set_type; -+ gc->chip_types[0].chip.irq_request_resources = rockchip_irq_reqres; -+ gc->chip_types[0].chip.irq_release_resources = rockchip_irq_relres; -+ gc->wake_enabled = IRQ_MSK(bank->nr_pins); ++ if (buffer->vmap_cnt) ++ flush_kernel_vmap_range(buffer->vaddr, buffer->len); + -+ /* -+ * Linux assumes that all interrupts start out disabled/masked. -+ * Our driver only uses the concept of masked and always keeps -+ * things enabled, so for us that's all masked and all enabled. -+ */ -+ rockchip_gpio_writel(bank, 0xffffffff, bank->gpio_regs->int_mask); -+ rockchip_gpio_writel(bank, 0xffffffff, bank->gpio_regs->port_eoi); -+ rockchip_gpio_writel(bank, 0xffffffff, bank->gpio_regs->int_en); -+ gc->mask_cache = 0xffffffff; ++ if (buffer->uncached) ++ return 0; + -+ irq_set_chained_handler_and_data(bank->irq, -+ rockchip_irq_demux, bank); ++ mutex_lock(&buffer->lock); ++ dma_sync_single_for_device(dma_heap_get_dev(buffer->heap->heap), ++ phys + offset, ++ len, ++ direction); ++ mutex_unlock(&buffer->lock); + + return 0; +} + -+static int rockchip_gpiolib_register(struct rockchip_pin_bank *bank) ++static int cma_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, ++ enum dma_data_direction direction) +{ -+ struct gpio_chip *gc; -+ int ret; -+ -+ bank->gpio_chip = rockchip_gpiolib_chip; ++ struct cma_heap_buffer *buffer = dmabuf->priv; ++ struct dma_heap_attachment *a; + -+ gc = &bank->gpio_chip; -+ gc->base = bank->pin_base; -+ gc->ngpio = bank->nr_pins; -+ gc->label = bank->name; -+ gc->parent = bank->dev; ++ if (buffer->vmap_cnt) ++ invalidate_kernel_vmap_range(buffer->vaddr, buffer->len); + -+ if (!gc->base) -+ gc->base = GPIO_MAX_PINS * bank->bank_num; -+ if (!gc->ngpio) -+ gc->ngpio = GPIO_MAX_PINS; -+ if (!gc->label) { -+ gc->label = kasprintf(GFP_KERNEL, "gpio%d", bank->bank_num); -+ if (!gc->label) -+ return -ENOMEM; ++ mutex_lock(&buffer->lock); ++ list_for_each_entry(a, &buffer->attachments, list) { ++ if (!a->mapped) ++ continue; ++ dma_sync_sgtable_for_cpu(a->dev, &a->table, direction); + } ++ mutex_unlock(&buffer->lock); + -+ ret = gpiochip_add_data(gc, bank); -+ if (ret) { -+ dev_err(bank->dev, "failed to add gpiochip %s, %d\n", -+ gc->label, ret); -+ return ret; -+ } ++ return 0; ++} + -+ ret = rockchip_interrupts_register(bank); -+ if (ret) { -+ dev_err(bank->dev, "failed to register interrupt, %d\n", ret); -+ goto fail; -+ } ++static int cma_heap_dma_buf_end_cpu_access(struct dma_buf *dmabuf, ++ enum dma_data_direction direction) ++{ ++ struct cma_heap_buffer *buffer = dmabuf->priv; ++ struct dma_heap_attachment *a; + -+ return 0; ++ if (buffer->vmap_cnt) ++ flush_kernel_vmap_range(buffer->vaddr, buffer->len); + -+fail: -+ gpiochip_remove(&bank->gpio_chip); ++ mutex_lock(&buffer->lock); ++ list_for_each_entry(a, &buffer->attachments, list) { ++ if (!a->mapped) ++ continue; ++ dma_sync_sgtable_for_device(a->dev, &a->table, direction); ++ } ++ mutex_unlock(&buffer->lock); + -+ return ret; ++ return 0; +} + -+static void rockchip_gpio_get_ver(struct rockchip_pin_bank *bank) ++static vm_fault_t cma_heap_vm_fault(struct vm_fault *vmf) +{ -+ int id = readl(bank->reg_base + gpio_regs_v2.version_id); ++ struct vm_area_struct *vma = vmf->vma; ++ struct cma_heap_buffer *buffer = vma->vm_private_data; + -+ /* If not gpio v2, that is default to v1. */ -+ if (id == GPIO_TYPE_V2 || id == GPIO_TYPE_V2_1) { -+ bank->gpio_regs = &gpio_regs_v2; -+ bank->gpio_type = GPIO_TYPE_V2; -+ } else { -+ bank->gpio_regs = &gpio_regs_v1; -+ bank->gpio_type = GPIO_TYPE_V1; -+ } ++ if (vmf->pgoff > buffer->pagecount) ++ return VM_FAULT_SIGBUS; ++ ++ vmf->page = buffer->pages[vmf->pgoff]; ++ get_page(vmf->page); ++ ++ return 0; +} + -+static struct rockchip_pin_bank * -+rockchip_gpio_find_bank(struct pinctrl_dev *pctldev, int id) ++static const struct vm_operations_struct dma_heap_vm_ops = { ++ .fault = cma_heap_vm_fault, ++}; ++ ++static int cma_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) +{ -+ struct rockchip_pinctrl *info; -+ struct rockchip_pin_bank *bank; -+ int i, found = 0; ++ struct cma_heap_buffer *buffer = dmabuf->priv; + -+ info = pinctrl_dev_get_drvdata(pctldev); -+ bank = info->ctrl->pin_banks; -+ for (i = 0; i < info->ctrl->nr_banks; i++, bank++) { -+ if (bank->bank_num == id) { -+ found = 1; -+ break; -+ } -+ } ++ if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0) ++ return -EINVAL; + -+ return found ? bank : NULL; ++ if (buffer->uncached) ++ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); ++ ++ vma->vm_ops = &dma_heap_vm_ops; ++ vma->vm_private_data = buffer; ++ ++ return 0; +} + -+static int rockchip_gpio_of_get_bank_id(struct device *dev) ++static void *cma_heap_do_vmap(struct cma_heap_buffer *buffer) +{ -+ static int gpio; -+ int bank_id = -1; ++ void *vaddr; ++ pgprot_t pgprot = PAGE_KERNEL; + -+ if (IS_ENABLED(CONFIG_OF) && dev->of_node) { -+ bank_id = of_alias_get_id(dev->of_node, "gpio"); -+ if (bank_id < 0) -+ bank_id = gpio++; -+ } ++ if (buffer->uncached) ++ pgprot = pgprot_writecombine(PAGE_KERNEL); + -+ return bank_id; ++ vaddr = vmap(buffer->pages, buffer->pagecount, VM_MAP, pgprot); ++ if (!vaddr) ++ return ERR_PTR(-ENOMEM); ++ ++ return vaddr; +} + -+#ifdef CONFIG_ACPI -+static int rockchip_gpio_acpi_get_bank_id(struct device *dev) ++static void *cma_heap_vmap(struct dma_buf *dmabuf) +{ -+ struct acpi_device *adev; -+ unsigned long bank_id = -1; -+ const char *uid; -+ int ret; -+ -+ adev = ACPI_COMPANION(dev); -+ if (!adev) -+ return -ENXIO; ++ struct cma_heap_buffer *buffer = dmabuf->priv; ++ void *vaddr; + -+ uid = acpi_device_uid(adev); -+ if (!uid || !(*uid)) { -+ dev_err(dev, "Cannot retrieve UID\n"); -+ return -ENODEV; ++ mutex_lock(&buffer->lock); ++ if (buffer->vmap_cnt) { ++ buffer->vmap_cnt++; ++ vaddr = buffer->vaddr; ++ goto out; + } + -+ ret = kstrtoul(uid, 0, &bank_id); ++ vaddr = cma_heap_do_vmap(buffer); ++ if (IS_ERR(vaddr)) ++ goto out; + -+ return !ret ? bank_id : -ERANGE; ++ buffer->vaddr = vaddr; ++ buffer->vmap_cnt++; ++out: ++ mutex_unlock(&buffer->lock); ++ ++ return vaddr; +} -+#else -+static int rockchip_gpio_acpi_get_bank_id(struct device *dev) ++ ++static void cma_heap_vunmap(struct dma_buf *dmabuf, void *vaddr) +{ -+ return -ENOENT; ++ struct cma_heap_buffer *buffer = dmabuf->priv; ++ ++ mutex_lock(&buffer->lock); ++ if (!--buffer->vmap_cnt) { ++ vunmap(buffer->vaddr); ++ buffer->vaddr = NULL; ++ } ++ mutex_unlock(&buffer->lock); +} -+#endif /* CONFIG_ACPI */ + -+static int rockchip_gpio_probe(struct platform_device *pdev) ++static void cma_heap_dma_buf_release(struct dma_buf *dmabuf) +{ -+ struct device *dev = &pdev->dev; -+ struct pinctrl_dev *pctldev = NULL; -+ struct rockchip_pin_bank *bank = NULL; -+ int bank_id = 0; -+ int ret; ++ struct cma_heap_buffer *buffer = dmabuf->priv; ++ struct cma_heap *cma_heap = buffer->heap; + -+ bank_id = rockchip_gpio_acpi_get_bank_id(dev); -+ if (bank_id < 0) { -+ bank_id = rockchip_gpio_of_get_bank_id(dev); -+ if (bank_id < 0) -+ return bank_id; ++ if (buffer->vmap_cnt > 0) { ++ WARN(1, "%s: buffer still mapped in the kernel\n", __func__); ++ vunmap(buffer->vaddr); + } + -+ if (!ACPI_COMPANION(dev)) { -+ struct device_node *pctlnp = of_get_parent(dev->of_node); ++ /* free page list */ ++ kfree(buffer->pages); ++ /* release memory */ ++ cma_release(cma_heap->cma, buffer->cma_pages, buffer->pagecount); ++ kfree(buffer); ++} + -+ pctldev = of_pinctrl_get(pctlnp); -+ if (!pctldev) -+ return -EPROBE_DEFER; ++static const struct dma_buf_ops cma_heap_buf_ops = { ++ .attach = cma_heap_attach, ++ .detach = cma_heap_detach, ++ .map_dma_buf = cma_heap_map_dma_buf, ++ .unmap_dma_buf = cma_heap_unmap_dma_buf, ++ .begin_cpu_access = cma_heap_dma_buf_begin_cpu_access, ++ .end_cpu_access = cma_heap_dma_buf_end_cpu_access, ++#ifdef CONFIG_DMABUF_PARTIAL ++ .begin_cpu_access_partial = cma_heap_dma_buf_begin_cpu_access_partial, ++ .end_cpu_access_partial = cma_heap_dma_buf_end_cpu_access_partial, ++#endif ++ .mmap = cma_heap_mmap, ++ .vmap = cma_heap_vmap, ++ .vunmap = cma_heap_vunmap, ++ .release = cma_heap_dma_buf_release, ++}; + -+ bank = rockchip_gpio_find_bank(pctldev, bank_id); -+ if (!bank) -+ return -ENODEV; -+ } ++static struct dma_buf *cma_heap_do_allocate(struct dma_heap *heap, ++ unsigned long len, ++ unsigned long fd_flags, ++ unsigned long heap_flags, bool uncached) ++{ ++ struct cma_heap *cma_heap = dma_heap_get_drvdata(heap); ++ struct cma_heap_buffer *buffer; ++ DEFINE_DMA_BUF_EXPORT_INFO(exp_info); ++ size_t size = PAGE_ALIGN(len); ++ pgoff_t pagecount = size >> PAGE_SHIFT; ++ unsigned long align = get_order(size); ++ struct page *cma_pages; ++ struct dma_buf *dmabuf; ++ int ret = -ENOMEM; ++ pgoff_t pg; ++ dma_addr_t dma; + -+ if (!bank) { -+ bank = devm_kzalloc(dev, sizeof(*bank), GFP_KERNEL); -+ if (!bank) -+ return -ENOMEM; -+ } ++ buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); ++ if (!buffer) ++ return ERR_PTR(-ENOMEM); + -+ bank->bank_num = bank_id; -+ bank->dev = dev; ++ buffer->uncached = uncached; + -+ bank->reg_base = devm_platform_ioremap_resource(pdev, 0); -+ if (IS_ERR(bank->reg_base)) -+ return PTR_ERR(bank->reg_base); ++ INIT_LIST_HEAD(&buffer->attachments); ++ mutex_init(&buffer->lock); ++ buffer->len = size; + -+ bank->irq = platform_get_irq(pdev, 0); -+ if (bank->irq < 0) -+ return bank->irq; ++ if (align > CONFIG_CMA_ALIGNMENT) ++ align = CONFIG_CMA_ALIGNMENT; + -+ raw_spin_lock_init(&bank->slock); ++ cma_pages = cma_alloc(cma_heap->cma, pagecount, align, GFP_KERNEL); ++ if (!cma_pages) ++ goto free_buffer; + -+ if (!ACPI_COMPANION(dev)) { -+ bank->clk = devm_clk_get(dev, "bus"); -+ if (IS_ERR(bank->clk)) { -+ bank->clk = of_clk_get(dev->of_node, 0); -+ if (IS_ERR(bank->clk)) { -+ dev_err(dev, "fail to get apb clock\n"); -+ return PTR_ERR(bank->clk); -+ } -+ } ++ /* Clear the cma pages */ ++ if (PageHighMem(cma_pages)) { ++ unsigned long nr_clear_pages = pagecount; ++ struct page *page = cma_pages; + -+ bank->db_clk = devm_clk_get(dev, "db"); -+ if (IS_ERR(bank->db_clk)) { -+ bank->db_clk = of_clk_get(dev->of_node, 1); -+ if (IS_ERR(bank->db_clk)) -+ bank->db_clk = NULL; ++ while (nr_clear_pages > 0) { ++ void *vaddr = kmap_atomic(page); ++ ++ memset(vaddr, 0, PAGE_SIZE); ++ kunmap_atomic(vaddr); ++ /* ++ * Avoid wasting time zeroing memory if the process ++ * has been killed by by SIGKILL ++ */ ++ if (fatal_signal_pending(current)) ++ goto free_cma; ++ page++; ++ nr_clear_pages--; + } ++ } else { ++ memset(page_address(cma_pages), 0, size); + } + -+ clk_prepare_enable(bank->clk); -+ clk_prepare_enable(bank->db_clk); ++ buffer->pages = kmalloc_array(pagecount, sizeof(*buffer->pages), GFP_KERNEL); ++ if (!buffer->pages) { ++ ret = -ENOMEM; ++ goto free_cma; ++ } + -+ rockchip_gpio_get_ver(bank); ++ for (pg = 0; pg < pagecount; pg++) ++ buffer->pages[pg] = &cma_pages[pg]; + -+ /* -+ * Prevent clashes with a deferred output setting -+ * being added right at this moment. -+ */ -+ mutex_lock(&bank->deferred_lock); ++ buffer->cma_pages = cma_pages; ++ buffer->heap = cma_heap; ++ buffer->pagecount = pagecount; + -+ ret = rockchip_gpiolib_register(bank); -+ if (ret) { -+ dev_err(bank->dev, "Failed to register gpio %d\n", ret); -+ goto err_unlock; ++ /* create the dmabuf */ ++ exp_info.exp_name = dma_heap_get_name(heap); ++ exp_info.ops = &cma_heap_buf_ops; ++ exp_info.size = buffer->len; ++ exp_info.flags = fd_flags; ++ exp_info.priv = buffer; ++ dmabuf = dma_buf_export(&exp_info); ++ if (IS_ERR(dmabuf)) { ++ ret = PTR_ERR(dmabuf); ++ goto free_pages; + } + -+ if (!device_property_read_bool(bank->dev, "gpio-ranges") && pctldev) { -+ struct gpio_chip *gc = &bank->gpio_chip; -+ -+ ret = gpiochip_add_pin_range(gc, dev_name(pctldev->dev), 0, -+ gc->base, gc->ngpio); -+ if (ret) { -+ dev_err(bank->dev, "Failed to add pin range\n"); -+ goto err_unlock; -+ } ++ if (buffer->uncached) { ++ dma = dma_map_page(dma_heap_get_dev(heap), buffer->cma_pages, 0, ++ buffer->pagecount * PAGE_SIZE, DMA_FROM_DEVICE); ++ dma_unmap_page(dma_heap_get_dev(heap), dma, ++ buffer->pagecount * PAGE_SIZE, DMA_FROM_DEVICE); + } + -+ while (!list_empty(&bank->deferred_pins)) { -+ struct rockchip_pin_deferred *cfg; ++ return dmabuf; + -+ cfg = list_first_entry(&bank->deferred_pins, -+ struct rockchip_pin_deferred, head); -+ if (!cfg) -+ break; ++free_pages: ++ kfree(buffer->pages); ++free_cma: ++ cma_release(cma_heap->cma, cma_pages, pagecount); ++free_buffer: ++ kfree(buffer); + -+ list_del(&cfg->head); ++ return ERR_PTR(ret); ++} + -+ switch (cfg->param) { -+ case PIN_CONFIG_OUTPUT: -+ ret = rockchip_gpio_direction_output(&bank->gpio_chip, cfg->pin, cfg->arg); -+ if (ret) -+ dev_warn(dev, "setting output pin %u to %u failed\n", cfg->pin, -+ cfg->arg); -+ break; -+ case PIN_CONFIG_INPUT_ENABLE: -+ ret = rockchip_gpio_direction_input(&bank->gpio_chip, cfg->pin); -+ if (ret) -+ dev_warn(dev, "setting input pin %u failed\n", cfg->pin); -+ break; -+ default: -+ dev_warn(dev, "unknown deferred config param %d\n", cfg->param); -+ break; -+ } -+ kfree(cfg); -+ } ++static struct dma_buf *cma_heap_allocate(struct dma_heap *heap, ++ unsigned long len, ++ unsigned long fd_flags, ++ unsigned long heap_flags) ++{ ++ return cma_heap_do_allocate(heap, len, fd_flags, heap_flags, false); ++} + -+ mutex_unlock(&bank->deferred_lock); ++#if IS_ENABLED(CONFIG_NO_GKI) ++static int cma_heap_get_phys(struct dma_heap *heap, ++ struct dma_heap_phys_data *phys) ++{ ++ struct cma_heap *cma_heap = dma_heap_get_drvdata(heap); ++ struct cma_heap_buffer *buffer; ++ struct dma_buf *dmabuf; + -+ platform_set_drvdata(pdev, bank); -+ dev_info(dev, "probed %pfw\n", dev_fwnode(dev)); ++ phys->paddr = (__u64)-1; + -+ return 0; -+err_unlock: -+ mutex_unlock(&bank->deferred_lock); -+ clk_disable_unprepare(bank->clk); -+ clk_disable_unprepare(bank->db_clk); ++ if (IS_ERR_OR_NULL(phys)) ++ return -EINVAL; + -+ return ret; -+} ++ dmabuf = dma_buf_get(phys->fd); ++ if (IS_ERR_OR_NULL(dmabuf)) ++ return -EBADFD; + -+static int rockchip_gpio_remove(struct platform_device *pdev) -+{ -+ struct rockchip_pin_bank *bank = platform_get_drvdata(pdev); ++ buffer = dmabuf->priv; ++ if (IS_ERR_OR_NULL(buffer)) ++ goto err; + -+ clk_disable_unprepare(bank->clk); -+ clk_disable_unprepare(bank->db_clk); -+ gpiochip_remove(&bank->gpio_chip); ++ if (buffer->heap != cma_heap) ++ goto err; + -+ return 0; -+} ++ phys->paddr = page_to_phys(buffer->cma_pages); + -+static const struct of_device_id rockchip_gpio_match[] = { -+ { .compatible = "rockchip,gpio-bank", }, -+ { .compatible = "rockchip,rk3188-gpio-bank0" }, -+ { }, -+}; ++err: ++ dma_buf_put(dmabuf); + -+static struct platform_driver rockchip_gpio_driver = { -+ .probe = rockchip_gpio_probe, -+ .remove = rockchip_gpio_remove, -+ .driver = { -+ .name = "rockchip-gpio", -+ .of_match_table = rockchip_gpio_match, -+ }, ++ return (phys->paddr == (__u64)-1) ? -EINVAL : 0; ++} ++#endif ++ ++static const struct dma_heap_ops cma_heap_ops = { ++ .allocate = cma_heap_allocate, ++#if IS_ENABLED(CONFIG_NO_GKI) ++ .get_phys = cma_heap_get_phys, ++#endif +}; + -+static int __init rockchip_gpio_init(void) ++static struct dma_buf *cma_uncached_heap_allocate(struct dma_heap *heap, ++ unsigned long len, ++ unsigned long fd_flags, ++ unsigned long heap_flags) +{ -+ return platform_driver_register(&rockchip_gpio_driver); ++ return cma_heap_do_allocate(heap, len, fd_flags, heap_flags, true); +} -+postcore_initcall(rockchip_gpio_init); + -+static void __exit rockchip_gpio_exit(void) ++static struct dma_buf *cma_uncached_heap_not_initialized(struct dma_heap *heap, ++ unsigned long len, ++ unsigned long fd_flags, ++ unsigned long heap_flags) +{ -+ platform_driver_unregister(&rockchip_gpio_driver); ++ pr_info("heap %s not initialized\n", dma_heap_get_name(heap)); ++ return ERR_PTR(-EBUSY); +} -+module_exit(rockchip_gpio_exit); + -+MODULE_DESCRIPTION("Rockchip gpio driver"); -+MODULE_ALIAS("platform:rockchip-gpio"); -+MODULE_LICENSE("GPL v2"); -+MODULE_DEVICE_TABLE(of, rockchip_gpio_match); -diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c -index cec9e8f29..3828c0a0f 100644 ---- a/drivers/gpio/gpiolib-of.c -+++ b/drivers/gpio/gpiolib-of.c -@@ -25,21 +25,6 @@ - #include "gpiolib.h" - #include "gpiolib-of.h" - --/* -- * This is Linux-specific flags. By default controllers' and Linux' mapping -- * match, but GPIO controllers are free to translate their own flags to -- * Linux-specific in their .xlate callback. Though, 1:1 mapping is recommended. -- */ --enum of_gpio_flags { -- OF_GPIO_ACTIVE_LOW = 0x1, -- OF_GPIO_SINGLE_ENDED = 0x2, -- OF_GPIO_OPEN_DRAIN = 0x4, -- OF_GPIO_TRANSITORY = 0x8, -- OF_GPIO_PULL_UP = 0x10, -- OF_GPIO_PULL_DOWN = 0x20, -- OF_GPIO_PULL_DISABLE = 0x40, --}; -- - /** - * of_gpio_named_count() - Count GPIOs for a device - * @np: device node to count GPIOs for -@@ -439,6 +424,20 @@ int of_get_named_gpio(const struct device_node *np, const char *propname, - } - EXPORT_SYMBOL_GPL(of_get_named_gpio); - -+int of_get_named_gpio_flags(struct device_node *np, const char *list_name, -+ int index, enum of_gpio_flags *flags) ++static struct dma_heap_ops cma_uncached_heap_ops = { ++ .allocate = cma_uncached_heap_not_initialized, ++}; ++ ++static int set_heap_dev_dma(struct device *heap_dev) +{ -+ struct gpio_desc *desc; ++ int err = 0; + -+ desc = of_get_named_gpiod_flags(np, list_name, index, flags); ++ if (!heap_dev) ++ return -EINVAL; + -+ if (IS_ERR(desc)) -+ return PTR_ERR(desc); -+ else -+ return desc_to_gpio(desc); -+} -+EXPORT_SYMBOL_GPL(of_get_named_gpio_flags); ++ dma_coerce_mask_and_coherent(heap_dev, DMA_BIT_MASK(64)); + - /* Converts gpio_lookup_flags into bitmask of GPIO_* values */ - static unsigned long of_convert_gpio_flags(enum of_gpio_flags flags) - { -@@ -1129,3 +1128,4 @@ void of_gpiochip_remove(struct gpio_chip *chip) - { - of_node_put(dev_of_node(&chip->gpiodev->dev)); - } ++ if (!heap_dev->dma_parms) { ++ heap_dev->dma_parms = devm_kzalloc(heap_dev, ++ sizeof(*heap_dev->dma_parms), ++ GFP_KERNEL); ++ if (!heap_dev->dma_parms) ++ return -ENOMEM; + -diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile -index 8997f0096..b37172366 100644 ---- a/drivers/gpu/Makefile -+++ b/drivers/gpu/Makefile -@@ -3,5 +3,6 @@ - # taken to initialize them in the correct order. Link order is the only way - # to ensure this currently. - obj-y += host1x/ drm/ vga/ -+obj-y += arm/ - obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/ - obj-$(CONFIG_TRACE_GPU_MEM) += trace/ -diff --git a/drivers/gpu/arm/Kbuild b/drivers/gpu/arm/Kbuild -new file mode 100755 -index 000000000..f747fc889 ---- /dev/null -+++ b/drivers/gpu/arm/Kbuild -@@ -0,0 +1,25 @@ -+# SPDX-License-Identifier: GPL-2.0 -+# -+# (C) COPYRIGHT 2012, 2020 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++ err = dma_set_max_seg_size(heap_dev, (unsigned int)DMA_BIT_MASK(64)); ++ if (err) { ++ devm_kfree(heap_dev, heap_dev->dma_parms); ++ dev_err(heap_dev, "Failed to set DMA segment size, err:%d\n", err); ++ return err; ++ } ++ } + -+obj-$(CONFIG_MALI_MIDGARD) += midgard/ ++ return 0; ++} + -+obj-$(CONFIG_MALI400) += mali400/ ++static int __add_cma_heap(struct cma *cma, void *data) ++{ ++ struct cma_heap *cma_heap, *cma_uncached_heap; ++ struct dma_heap_export_info exp_info; ++ int ret; + -+obj-$(CONFIG_MALI_BIFROST) += bifrost/ -diff --git a/drivers/gpu/arm/Kconfig b/drivers/gpu/arm/Kconfig -new file mode 100644 -index 000000000..398a8e50a ---- /dev/null -+++ b/drivers/gpu/arm/Kconfig -@@ -0,0 +1,25 @@ -+# SPDX-License-Identifier: GPL-2.0 -+# -+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# -+# -+source "drivers/gpu/arm/mali400/mali/Kconfig" ++ cma_heap = kzalloc(sizeof(*cma_heap), GFP_KERNEL); ++ if (!cma_heap) ++ return -ENOMEM; ++ cma_heap->cma = cma; + -+source "drivers/gpu/arm/midgard/Kconfig" ++ exp_info.name = "cma"; ++ exp_info.ops = &cma_heap_ops; ++ exp_info.priv = cma_heap; + -+source "drivers/gpu/arm/bifrost/Kconfig" -diff --git a/drivers/gpu/arm/bifrost/Kbuild b/drivers/gpu/arm/bifrost/Kbuild -new file mode 100755 -index 000000000..9cadda188 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/Kbuild -@@ -0,0 +1,243 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++ cma_heap->heap = dma_heap_add(&exp_info); ++ if (IS_ERR(cma_heap->heap)) { ++ ret = PTR_ERR(cma_heap->heap); ++ goto free_cma_heap; ++ } + -+# make $(src) as absolute path if it is not already, by prefixing $(srctree) -+# This is to prevent any build issue due to wrong path. -+src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) ++ cma_uncached_heap = kzalloc(sizeof(*cma_heap), GFP_KERNEL); ++ if (!cma_uncached_heap) { ++ ret = -ENOMEM; ++ goto put_cma_heap; ++ } + -+# -+# Prevent misuse when Kernel configurations are not present by default -+# in out-of-tree builds -+# -+ifneq ($(CONFIG_ANDROID),n) -+ifeq ($(CONFIG_GPU_TRACEPOINTS),n) -+ $(error CONFIG_GPU_TRACEPOINTS must be set in Kernel configuration) -+endif -+endif ++ cma_uncached_heap->cma = cma; + -+ifeq ($(CONFIG_DMA_SHARED_BUFFER),n) -+ $(error CONFIG_DMA_SHARED_BUFFER must be set in Kernel configuration) -+endif ++ exp_info.name = "cma-uncached"; ++ exp_info.ops = &cma_uncached_heap_ops; ++ exp_info.priv = cma_uncached_heap; + -+ifeq ($(CONFIG_PM_DEVFREQ),n) -+ $(error CONFIG_PM_DEVFREQ must be set in Kernel configuration) -+endif ++ cma_uncached_heap->heap = dma_heap_add(&exp_info); ++ if (IS_ERR(cma_uncached_heap->heap)) { ++ ret = PTR_ERR(cma_uncached_heap->heap); ++ goto free_uncached_cma_heap; ++ } + -+ifeq ($(CONFIG_DEVFREQ_THERMAL),n) -+ $(error CONFIG_DEVFREQ_THERMAL must be set in Kernel configuration) -+endif ++ ret = set_heap_dev_dma(dma_heap_get_dev(cma_uncached_heap->heap)); ++ if (ret) ++ goto put_uncached_cma_heap; + -+ifeq ($(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND),n) -+ $(error CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND must be set in Kernel configuration) -+endif ++ mb(); /* make sure we only set allocate after dma_mask is set */ ++ cma_uncached_heap_ops.allocate = cma_uncached_heap_allocate; + -+ifeq ($(CONFIG_FW_LOADER), n) -+ $(error CONFIG_FW_LOADER must be set in Kernel configuration) -+endif ++ return 0; + -+ifeq ($(CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS), y) -+ ifneq ($(CONFIG_DEBUG_FS), y) -+ $(error CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS depends on CONFIG_DEBUG_FS to be set in Kernel configuration) -+ endif -+endif ++put_uncached_cma_heap: ++ dma_heap_put(cma_uncached_heap->heap); ++free_uncached_cma_heap: ++ kfree(cma_uncached_heap); ++put_cma_heap: ++ dma_heap_put(cma_heap->heap); ++free_cma_heap: ++ kfree(cma_heap); + -+ifeq ($(CONFIG_MALI_BIFROST_FENCE_DEBUG), y) -+ ifneq ($(CONFIG_SYNC_FILE), y) -+ $(error CONFIG_MALI_BIFROST_FENCE_DEBUG depends on CONFIG_SYNC_FILE to be set in Kernel configuration) -+ endif -+endif ++ return ret; ++} + -+# -+# Configurations -+# ++static int add_default_cma_heap(void) ++{ ++ struct cma *default_cma = dev_get_cma_area(NULL); ++ int ret = 0; + -+# Driver version string which is returned to userspace via an ioctl -+MALI_RELEASE_NAME ?= '"g18p0-01eac0"' -+# Set up defaults if not defined by build system -+ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y) -+ MALI_UNIT_TEST = 1 -+ MALI_CUSTOMER_RELEASE ?= 0 -+else -+ MALI_UNIT_TEST ?= 0 -+ MALI_CUSTOMER_RELEASE ?= 1 -+endif -+MALI_COVERAGE ?= 0 ++ if (default_cma) ++ ret = __add_cma_heap(default_cma, NULL); + -+# Kconfig passes in the name with quotes for in-tree builds - remove them. -+MALI_PLATFORM_DIR := $(shell echo $(CONFIG_MALI_PLATFORM_NAME)) ++ return ret; ++} ++module_init(add_default_cma_heap); ++MODULE_DESCRIPTION("DMA-BUF CMA Heap"); ++MODULE_LICENSE("GPL"); +diff --git a/drivers/dma-buf/heaps/rk_system_heap.c b/drivers/dma-buf/heaps/rk_system_heap.c +new file mode 100644 +index 000000000..52dcfc2bb +--- /dev/null ++++ b/drivers/dma-buf/heaps/rk_system_heap.c +@@ -0,0 +1,841 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * DMABUF System heap exporter for Rockchip ++ * ++ * Copyright (C) 2011 Google, Inc. ++ * Copyright (C) 2019, 2020 Linaro Ltd. ++ * Copyright (c) 2021, 2022 Rockchip Electronics Co. Ltd. ++ * ++ * Portions based off of Andrew Davis' SRAM heap: ++ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com/ ++ * Andrew F. Davis ++ */ + -+ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) -+ MALI_JIT_PRESSURE_LIMIT_BASE = 0 -+ MALI_USE_CSF = 1 -+else -+ MALI_JIT_PRESSURE_LIMIT_BASE ?= 1 -+ MALI_USE_CSF ?= 0 -+endif ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + ++#include "page_pool.h" ++#include "deferred-free-helper.h" + -+ifneq ($(CONFIG_MALI_KUTF), n) -+ MALI_KERNEL_TEST_API ?= 1 -+else -+ MALI_KERNEL_TEST_API ?= 0 -+endif ++static struct dma_heap *sys_heap; ++static struct dma_heap *sys_dma32_heap; ++static struct dma_heap *sys_uncached_heap; ++static struct dma_heap *sys_uncached_dma32_heap; + -+# Experimental features (corresponding -D definition should be appended to -+# ccflags-y below, e.g. for MALI_EXPERIMENTAL_FEATURE, -+# -DMALI_EXPERIMENTAL_FEATURE=$(MALI_EXPERIMENTAL_FEATURE) should be appended) -+# -+# Experimental features must default to disabled, e.g.: -+# MALI_EXPERIMENTAL_FEATURE ?= 0 -+MALI_INCREMENTAL_RENDERING_JM ?= 0 ++/* Default setting */ ++static u32 bank_bit_first = 12; ++static u32 bank_bit_mask = 0x7; + -+# -+# ccflags -+# -+ccflags-y = \ -+ -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ -+ -DMALI_USE_CSF=$(MALI_USE_CSF) \ -+ -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \ -+ -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ -+ -DMALI_COVERAGE=$(MALI_COVERAGE) \ -+ -DMALI_RELEASE_NAME=$(MALI_RELEASE_NAME) \ -+ -DMALI_JIT_PRESSURE_LIMIT_BASE=$(MALI_JIT_PRESSURE_LIMIT_BASE) \ -+ -DMALI_INCREMENTAL_RENDERING_JM=$(MALI_INCREMENTAL_RENDERING_JM) \ -+ -DMALI_PLATFORM_DIR=$(MALI_PLATFORM_DIR) ++struct system_heap_buffer { ++ struct dma_heap *heap; ++ struct list_head attachments; ++ struct mutex lock; ++ unsigned long len; ++ struct sg_table sg_table; ++ int vmap_cnt; ++ void *vaddr; ++ struct deferred_freelist_item deferred_free; ++ struct dmabuf_page_pool **pools; ++ bool uncached; ++}; + ++struct dma_heap_attachment { ++ struct device *dev; ++ struct sg_table *table; ++ struct list_head list; ++ bool mapped; + -+ifeq ($(KBUILD_EXTMOD),) -+# in-tree -+ ccflags-y +=-DMALI_KBASE_PLATFORM_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME) -+else -+# out-of-tree -+ ccflags-y +=-DMALI_KBASE_PLATFORM_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME) -+endif ++ bool uncached; ++}; + -+ccflags-y += \ -+ -I$(srctree)/include/linux \ -+ -I$(srctree)/drivers/staging/android \ -+ -I$(src) \ -+ -I$(src)/platform/$(MALI_PLATFORM_DIR) \ -+ -I$(src)/../../../base \ -+ -I$(src)/../../../../include ++#define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO) ++#define HIGH_ORDER_GFP (((GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN \ ++ | __GFP_NORETRY) & ~__GFP_RECLAIM) \ ++ | __GFP_COMP) ++static gfp_t order_flags[] = {HIGH_ORDER_GFP, HIGH_ORDER_GFP, LOW_ORDER_GFP}; ++/* ++ * The selection of the orders used for allocation (1MB, 64K, 4K) is designed ++ * to match with the sizes often found in IOMMUs. Using order 4 pages instead ++ * of order 0 pages can significantly improve the performance of many IOMMUs ++ * by reducing TLB pressure and time spent updating page tables. ++ */ ++static unsigned int orders[] = {8, 4, 0}; ++#define NUM_ORDERS ARRAY_SIZE(orders) ++struct dmabuf_page_pool *pools[NUM_ORDERS]; ++struct dmabuf_page_pool *dma32_pools[NUM_ORDERS]; + -+subdir-ccflags-y += $(ccflags-y) ++static struct sg_table *dup_sg_table(struct sg_table *table) ++{ ++ struct sg_table *new_table; ++ int ret, i; ++ struct scatterlist *sg, *new_sg; + -+# -+# Kernel Modules -+# -+obj-$(CONFIG_MALI_BIFROST) += bifrost_kbase.o -+obj-$(CONFIG_MALI_KUTF) += tests/ ++ new_table = kzalloc(sizeof(*new_table), GFP_KERNEL); ++ if (!new_table) ++ return ERR_PTR(-ENOMEM); + -+bifrost_kbase-y := \ -+ mali_kbase_cache_policy.o \ -+ mali_kbase_ccswe.o \ -+ mali_kbase_mem.o \ -+ mali_kbase_mem_migrate.o \ -+ mali_kbase_mem_pool_group.o \ -+ mali_kbase_native_mgm.o \ -+ mali_kbase_ctx_sched.o \ -+ mali_kbase_gpuprops.o \ -+ mali_kbase_pm.o \ -+ mali_kbase_config.o \ -+ mali_kbase_kinstr_prfcnt.o \ -+ mali_kbase_vinstr.o \ -+ mali_kbase_softjobs.o \ -+ mali_kbase_hw.o \ -+ mali_kbase_debug.o \ -+ mali_kbase_gpu_memory_debugfs.o \ -+ mali_kbase_mem_linux.o \ -+ mali_kbase_core_linux.o \ -+ mali_kbase_mem_profile_debugfs.o \ -+ mali_kbase_disjoint_events.o \ -+ mali_kbase_debug_mem_view.o \ -+ mali_kbase_debug_mem_zones.o \ -+ mali_kbase_debug_mem_allocs.o \ -+ mali_kbase_smc.o \ -+ mali_kbase_mem_pool.o \ -+ mali_kbase_mem_pool_debugfs.o \ -+ mali_kbase_debugfs_helper.o \ -+ mali_kbase_strings.o \ -+ mali_kbase_as_fault_debugfs.o \ -+ mali_kbase_regs_history_debugfs.o \ -+ mali_kbase_dvfs_debugfs.o \ -+ mali_power_gpu_frequency_trace.o \ -+ mali_kbase_trace_gpu_mem.o \ -+ mali_kbase_pbha.o ++ ret = sg_alloc_table(new_table, table->orig_nents, GFP_KERNEL); ++ if (ret) { ++ kfree(new_table); ++ return ERR_PTR(-ENOMEM); ++ } + -+bifrost_kbase-$(CONFIG_DEBUG_FS) += mali_kbase_pbha_debugfs.o ++ new_sg = new_table->sgl; ++ for_each_sgtable_sg(table, sg, i) { ++ sg_set_page(new_sg, sg_page(sg), sg->length, sg->offset); ++ new_sg = sg_next(new_sg); ++ } + -+bifrost_kbase-$(CONFIG_MALI_CINSTR_GWT) += mali_kbase_gwt.o ++ return new_table; ++} + -+bifrost_kbase-$(CONFIG_SYNC_FILE) += \ -+ mali_kbase_fence_ops.o \ -+ mali_kbase_sync_file.o \ -+ mali_kbase_sync_common.o ++static int system_heap_attach(struct dma_buf *dmabuf, ++ struct dma_buf_attachment *attachment) ++{ ++ struct system_heap_buffer *buffer = dmabuf->priv; ++ struct dma_heap_attachment *a; ++ struct sg_table *table; + -+ifneq ($(CONFIG_MALI_CSF_SUPPORT),y) -+ bifrost_kbase-y += \ -+ mali_kbase_jm.o \ -+ mali_kbase_dummy_job_wa.o \ -+ mali_kbase_debug_job_fault.o \ -+ mali_kbase_event.o \ -+ mali_kbase_jd.o \ -+ mali_kbase_jd_debugfs.o \ -+ mali_kbase_js.o \ -+ mali_kbase_js_ctx_attr.o \ -+ mali_kbase_kinstr_jm.o ++ a = kzalloc(sizeof(*a), GFP_KERNEL); ++ if (!a) ++ return -ENOMEM; + -+ bifrost_kbase-$(CONFIG_SYNC_FILE) += \ -+ mali_kbase_fence_ops.o \ -+ mali_kbase_fence.o -+endif ++ table = dup_sg_table(&buffer->sg_table); ++ if (IS_ERR(table)) { ++ kfree(a); ++ return -ENOMEM; ++ } + ++ a->table = table; ++ a->dev = attachment->dev; ++ INIT_LIST_HEAD(&a->list); ++ a->mapped = false; ++ a->uncached = buffer->uncached; ++ attachment->priv = a; + -+INCLUDE_SUBDIR = \ -+ $(src)/context/Kbuild \ -+ $(src)/debug/Kbuild \ -+ $(src)/device/Kbuild \ -+ $(src)/backend/gpu/Kbuild \ -+ $(src)/mmu/Kbuild \ -+ $(src)/tl/Kbuild \ -+ $(src)/hwcnt/Kbuild \ -+ $(src)/gpu/Kbuild \ -+ $(src)/thirdparty/Kbuild \ -+ $(src)/platform/$(MALI_PLATFORM_DIR)/Kbuild ++ mutex_lock(&buffer->lock); ++ list_add(&a->list, &buffer->attachments); ++ mutex_unlock(&buffer->lock); + -+ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) -+ INCLUDE_SUBDIR += $(src)/csf/Kbuild -+endif ++ return 0; ++} + -+ifeq ($(CONFIG_MALI_ARBITER_SUPPORT),y) -+ INCLUDE_SUBDIR += $(src)/arbiter/Kbuild -+endif ++static void system_heap_detach(struct dma_buf *dmabuf, ++ struct dma_buf_attachment *attachment) ++{ ++ struct system_heap_buffer *buffer = dmabuf->priv; ++ struct dma_heap_attachment *a = attachment->priv; + -+ifeq ($(CONFIG_MALI_BIFROST_DEVFREQ),y) -+ ifeq ($(CONFIG_DEVFREQ_THERMAL),y) -+ INCLUDE_SUBDIR += $(src)/ipa/Kbuild -+ endif -+endif ++ mutex_lock(&buffer->lock); ++ list_del(&a->list); ++ mutex_unlock(&buffer->lock); + -+ifeq ($(KBUILD_EXTMOD),) -+# in-tree -+ -include $(INCLUDE_SUBDIR) -+else -+# out-of-tree -+ include $(INCLUDE_SUBDIR) -+endif -diff --git a/drivers/gpu/arm/bifrost/Kconfig b/drivers/gpu/arm/bifrost/Kconfig -new file mode 100644 -index 000000000..ca3da57cf ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/Kconfig -@@ -0,0 +1,389 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++ sg_free_table(a->table); ++ kfree(a->table); ++ kfree(a); ++} + -+menuconfig MALI_BIFROST -+ tristate "Mali Bifrost series support" -+ select GPU_TRACEPOINTS if ANDROID -+ select DMA_SHARED_BUFFER -+ select FW_LOADER -+ default n -+ help -+ Enable this option to build support for a ARM Mali Bifrost GPU. ++static struct sg_table *system_heap_map_dma_buf(struct dma_buf_attachment *attachment, ++ enum dma_data_direction direction) ++{ ++ struct dma_heap_attachment *a = attachment->priv; ++ struct sg_table *table = a->table; ++ int attr = attachment->dma_map_attrs; ++ int ret; + -+ To compile this driver as a module, choose M here: -+ this will generate a single module, called mali_kbase. ++ if (a->uncached) ++ attr |= DMA_ATTR_SKIP_CPU_SYNC; + -+if MALI_BIFROST ++ ret = dma_map_sgtable(attachment->dev, table, direction, attr); ++ if (ret) ++ return ERR_PTR(ret); + -+config MALI_PLATFORM_NAME -+ depends on MALI_BIFROST -+ string "Platform name" -+ default "devicetree" -+ help -+ Enter the name of the desired platform configuration directory to -+ include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must -+ exist. ++ a->mapped = true; ++ return table; ++} + -+choice -+ prompt "Mali HW backend" -+ depends on MALI_BIFROST -+ default MALI_REAL_HW ++static void system_heap_unmap_dma_buf(struct dma_buf_attachment *attachment, ++ struct sg_table *table, ++ enum dma_data_direction direction) ++{ ++ struct dma_heap_attachment *a = attachment->priv; ++ int attr = attachment->dma_map_attrs; + -+config MALI_REAL_HW -+ bool "Enable build of Mali kernel driver for real HW" -+ depends on MALI_BIFROST -+ help -+ This is the default HW backend. ++ if (a->uncached) ++ attr |= DMA_ATTR_SKIP_CPU_SYNC; ++ a->mapped = false; ++ dma_unmap_sgtable(attachment->dev, table, direction, attr); ++} + -+config MALI_BIFROST_NO_MALI -+ bool "Enable build of Mali kernel driver for No Mali" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT -+ help -+ This can be used to test the driver in a simulated environment -+ whereby the hardware is not physically present. If the hardware is physically -+ present it will not be used. This can be used to test the majority of the -+ driver without needing actual hardware or for software benchmarking. -+ All calls to the simulated hardware will complete immediately as if the hardware -+ completed the task. ++static int system_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, ++ enum dma_data_direction direction) ++{ ++ struct system_heap_buffer *buffer = dmabuf->priv; ++ struct dma_heap_attachment *a; + ++ mutex_lock(&buffer->lock); + -+endchoice ++ if (buffer->vmap_cnt) ++ invalidate_kernel_vmap_range(buffer->vaddr, buffer->len); + -+menu "Platform specific options" -+source "drivers/gpu/arm/bifrost/platform/Kconfig" -+endmenu ++ if (!buffer->uncached) { ++ list_for_each_entry(a, &buffer->attachments, list) { ++ if (!a->mapped) ++ continue; ++ dma_sync_sgtable_for_cpu(a->dev, a->table, direction); ++ } ++ } ++ mutex_unlock(&buffer->lock); + -+config MALI_CSF_SUPPORT -+ bool "Enable Mali CSF based GPU support" -+ default n -+ help -+ Enables support for CSF based GPUs. ++ return 0; ++} + -+config MALI_BIFROST_DEVFREQ -+ bool "Enable devfreq support for Mali" -+ depends on MALI_BIFROST && PM_DEVFREQ -+ select DEVFREQ_GOV_SIMPLE_ONDEMAND -+ default y -+ help -+ Support devfreq for Mali. ++static int system_heap_dma_buf_end_cpu_access(struct dma_buf *dmabuf, ++ enum dma_data_direction direction) ++{ ++ struct system_heap_buffer *buffer = dmabuf->priv; ++ struct dma_heap_attachment *a; + -+ Using the devfreq framework and, by default, the simple on-demand -+ governor, the frequency of Mali will be dynamically selected from the -+ available OPPs. ++ mutex_lock(&buffer->lock); + -+config MALI_BIFROST_DVFS -+ bool "Enable legacy DVFS" -+ depends on MALI_BIFROST && !MALI_BIFROST_DEVFREQ -+ default n -+ help -+ Choose this option to enable legacy DVFS in the Mali Midgard DDK. ++ if (buffer->vmap_cnt) ++ flush_kernel_vmap_range(buffer->vaddr, buffer->len); + -+config MALI_BIFROST_GATOR_SUPPORT -+ bool "Enable Streamline tracing support" -+ depends on MALI_BIFROST -+ default y -+ help -+ Enables kbase tracing used by the Arm Streamline Performance Analyzer. -+ The tracepoints are used to derive GPU activity charts in Streamline. ++ if (!buffer->uncached) { ++ list_for_each_entry(a, &buffer->attachments, list) { ++ if (!a->mapped) ++ continue; ++ dma_sync_sgtable_for_device(a->dev, a->table, direction); ++ } ++ } ++ mutex_unlock(&buffer->lock); + -+config MALI_BIFROST_ENABLE_TRACE -+ bool "Enable kbase tracing" -+ depends on MALI_BIFROST -+ default y if MALI_BIFROST_DEBUG -+ default n -+ help -+ Enables tracing in kbase. Trace log available through -+ the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled ++ return 0; ++} + -+config MALI_ARBITER_SUPPORT -+ bool "Enable arbiter support for Mali" -+ depends on MALI_BIFROST && !MALI_CSF_SUPPORT -+ default n -+ help -+ Enable support for the arbiter interface in the driver. -+ This allows an external arbiter to manage driver access -+ to GPU hardware in a virtualized environment ++static int system_heap_sgl_sync_range(struct device *dev, ++ struct sg_table *sgt, ++ unsigned int offset, ++ unsigned int length, ++ enum dma_data_direction dir, ++ bool for_cpu) ++{ ++ struct scatterlist *sg; ++ unsigned int len = 0; ++ dma_addr_t sg_dma_addr; ++ int i; + -+ If unsure, say N. ++ for_each_sgtable_sg(sgt, sg, i) { ++ unsigned int sg_offset, sg_left, size = 0; + -+config MALI_DMA_BUF_MAP_ON_DEMAND -+ bool "Enable map imported dma-bufs on demand" -+ depends on MALI_BIFROST -+ default n -+ help -+ This option will cause kbase to set up the GPU mapping of imported -+ dma-buf when needed to run atoms. This is the legacy behavior. ++ sg_dma_addr = sg_phys(sg); + -+ This is intended for testing and the option will get removed in the -+ future. ++ len += sg->length; ++ if (len <= offset) ++ continue; + -+config MALI_DMA_BUF_LEGACY_COMPAT -+ bool "Enable legacy compatibility cache flush on dma-buf map" -+ depends on MALI_BIFROST && !MALI_DMA_BUF_MAP_ON_DEMAND -+ default n -+ help -+ This option enables compatibility with legacy dma-buf mapping -+ behavior, then the dma-buf is mapped on import, by adding cache -+ maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping, -+ including a cache flush. ++ sg_left = len - offset; ++ sg_offset = sg->length - sg_left; + -+ This option might work-around issues related to missing cache -+ flushes in other drivers. This only has an effect for clients using -+ UK 11.18 or older. For later UK versions it is not possible. ++ size = (length < sg_left) ? length : sg_left; ++ if (for_cpu) ++ dma_sync_single_range_for_cpu(dev, sg_dma_addr, ++ sg_offset, size, dir); ++ else ++ dma_sync_single_range_for_device(dev, sg_dma_addr, ++ sg_offset, size, dir); + -+config MALI_CORESIGHT -+ depends on MALI_BIFROST && MALI_CSF_SUPPORT && !MALI_BIFROST_NO_MALI -+ bool "Enable Kbase CoreSight tracing support" -+ default n ++ offset += size; ++ length -= size; + -+menuconfig MALI_BIFROST_EXPERT -+ depends on MALI_BIFROST -+ bool "Enable Expert Settings" -+ default n -+ help -+ Enabling this option and modifying the default settings may produce -+ a driver with performance or other limitations. ++ if (length == 0) ++ break; ++ } + -+if MALI_BIFROST_EXPERT ++ return 0; ++} + -+config LARGE_PAGE_ALLOC_OVERRIDE -+ bool "Override default setting of 2MB pages" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT -+ default n -+ help -+ An override config for LARGE_PAGE_ALLOC config. -+ When LARGE_PAGE_ALLOC_OVERRIDE is Y, 2MB page allocation will be -+ enabled by LARGE_PAGE_ALLOC. When this is N, the feature will be -+ enabled when GPU HW satisfies requirements. ++static int __maybe_unused ++system_heap_dma_buf_begin_cpu_access_partial(struct dma_buf *dmabuf, ++ enum dma_data_direction direction, ++ unsigned int offset, ++ unsigned int len) ++{ ++ struct system_heap_buffer *buffer = dmabuf->priv; ++ struct dma_heap *heap = buffer->heap; ++ struct sg_table *table = &buffer->sg_table; ++ int ret; + -+ If in doubt, say N ++ if (direction == DMA_TO_DEVICE) ++ return 0; + -+config LARGE_PAGE_ALLOC -+ bool "Attempt to allocate 2MB pages" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT -+ default n -+ help -+ Rather than allocating all GPU memory page-by-page, attempt to -+ allocate 2MB pages from the kernel. This reduces TLB pressure and -+ helps to prevent memory fragmentation. ++ mutex_lock(&buffer->lock); ++ if (buffer->vmap_cnt) ++ invalidate_kernel_vmap_range(buffer->vaddr, buffer->len); + -+ Note this config applies only when LARGE_PAGE_ALLOC_OVERRIDE config -+ is enabled and enabling this on a GPU HW that does not satisfy -+ requirements can cause serious problem. ++ if (buffer->uncached) { ++ mutex_unlock(&buffer->lock); ++ return 0; ++ } + -+ If in doubt, say N ++ ret = system_heap_sgl_sync_range(dma_heap_get_dev(heap), table, ++ offset, len, direction, true); ++ mutex_unlock(&buffer->lock); + -+config MALI_MEMORY_FULLY_BACKED -+ bool "Enable memory fully physically-backed" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT -+ default n -+ help -+ This option enables full physical backing of all virtual -+ memory allocations in the kernel. Notice that this build -+ option only affects allocations of grow-on-GPU-page-fault -+ memory. ++ return ret; ++} + -+config MALI_CORESTACK -+ bool "Enable support of GPU core stack power control" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT -+ default n -+ help -+ Enabling this feature on supported GPUs will let the driver powering -+ on/off the GPU core stack independently without involving the Power -+ Domain Controller. This should only be enabled on platforms which -+ integration of the PDC to the Mali GPU is known to be problematic. -+ This feature is currently only supported on t-Six and t-HEx GPUs. ++static int __maybe_unused ++system_heap_dma_buf_end_cpu_access_partial(struct dma_buf *dmabuf, ++ enum dma_data_direction direction, ++ unsigned int offset, ++ unsigned int len) ++{ ++ struct system_heap_buffer *buffer = dmabuf->priv; ++ struct dma_heap *heap = buffer->heap; ++ struct sg_table *table = &buffer->sg_table; ++ int ret; + -+ If unsure, say N. ++ mutex_lock(&buffer->lock); ++ if (buffer->vmap_cnt) ++ flush_kernel_vmap_range(buffer->vaddr, buffer->len); + -+comment "Platform options" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT ++ if (buffer->uncached) { ++ mutex_unlock(&buffer->lock); ++ return 0; ++ } + -+config MALI_BIFROST_ERROR_INJECT -+ bool "Enable No Mali error injection" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT && MALI_BIFROST_NO_MALI -+ default n -+ help -+ Enables insertion of errors to test module failure and recovery mechanisms. ++ ret = system_heap_sgl_sync_range(dma_heap_get_dev(heap), table, ++ offset, len, direction, false); ++ mutex_unlock(&buffer->lock); + -+comment "Debug options" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT ++ return ret; ++} + -+config MALI_BIFROST_DEBUG -+ bool "Enable debug build" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT -+ default n -+ help -+ Select this option for increased checking and reporting of errors. ++static int system_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) ++{ ++ struct system_heap_buffer *buffer = dmabuf->priv; ++ struct sg_table *table = &buffer->sg_table; ++ unsigned long addr = vma->vm_start; ++ struct sg_page_iter piter; ++ int ret; + -+config MALI_BIFROST_FENCE_DEBUG -+ bool "Enable debug sync fence usage" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT && SYNC_FILE -+ default y if MALI_BIFROST_DEBUG -+ help -+ Select this option to enable additional checking and reporting on the -+ use of sync fences in the Mali driver. ++ if (buffer->uncached) ++ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + -+ This will add a 3s timeout to all sync fence waits in the Mali -+ driver, so that when work for Mali has been waiting on a sync fence -+ for a long time a debug message will be printed, detailing what fence -+ is causing the block, and which dependent Mali atoms are blocked as a -+ result of this. ++ for_each_sgtable_page(table, &piter, vma->vm_pgoff) { ++ struct page *page = sg_page_iter_page(&piter); + -+ The timeout can be changed at runtime through the js_soft_timeout -+ device attribute, where the timeout is specified in milliseconds. ++ ret = remap_pfn_range(vma, addr, page_to_pfn(page), PAGE_SIZE, ++ vma->vm_page_prot); ++ if (ret) ++ return ret; ++ addr += PAGE_SIZE; ++ if (addr >= vma->vm_end) ++ return 0; ++ } ++ return 0; ++} + -+config MALI_BIFROST_SYSTEM_TRACE -+ bool "Enable system event tracing support" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT -+ default y if MALI_BIFROST_DEBUG -+ default n -+ help -+ Choose this option to enable system trace events for each -+ kbase event. This is typically used for debugging but has -+ minimal overhead when not in use. Enable only if you know what -+ you are doing. ++static void *system_heap_do_vmap(struct system_heap_buffer *buffer) ++{ ++ struct sg_table *table = &buffer->sg_table; ++ int npages = PAGE_ALIGN(buffer->len) / PAGE_SIZE; ++ struct page **pages = vmalloc(sizeof(struct page *) * npages); ++ struct page **tmp = pages; ++ struct sg_page_iter piter; ++ pgprot_t pgprot = PAGE_KERNEL; ++ void *vaddr; + -+comment "Instrumentation options" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT ++ if (!pages) ++ return ERR_PTR(-ENOMEM); + -+choice -+ prompt "Select Performance counters set" -+ default MALI_PRFCNT_SET_PRIMARY -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT ++ if (buffer->uncached) ++ pgprot = pgprot_writecombine(PAGE_KERNEL); + -+config MALI_PRFCNT_SET_PRIMARY -+ bool "Primary" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT -+ help -+ Select this option to use primary set of performance counters. ++ for_each_sgtable_page(table, &piter, 0) { ++ WARN_ON(tmp - pages >= npages); ++ *tmp++ = sg_page_iter_page(&piter); ++ } + -+config MALI_BIFROST_PRFCNT_SET_SECONDARY -+ bool "Secondary" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT -+ help -+ Select this option to use secondary set of performance counters. Kernel -+ features that depend on an access to the primary set of counters may -+ become unavailable. Enabling this option will prevent power management -+ from working optimally and may cause instrumentation tools to return -+ bogus results. ++ vaddr = vmap(pages, npages, VM_MAP, pgprot); ++ vfree(pages); + -+ If unsure, use MALI_PRFCNT_SET_PRIMARY. ++ if (!vaddr) ++ return ERR_PTR(-ENOMEM); + -+config MALI_PRFCNT_SET_TERTIARY -+ bool "Tertiary" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT -+ help -+ Select this option to use tertiary set of performance counters. Kernel -+ features that depend on an access to the primary set of counters may -+ become unavailable. Enabling this option will prevent power management -+ from working optimally and may cause instrumentation tools to return -+ bogus results. ++ return vaddr; ++} + -+ If unsure, use MALI_PRFCNT_SET_PRIMARY. ++static void *system_heap_vmap(struct dma_buf *dmabuf) ++{ ++ struct system_heap_buffer *buffer = dmabuf->priv; ++ void *vaddr; + -+endchoice ++ mutex_lock(&buffer->lock); ++ if (buffer->vmap_cnt) { ++ buffer->vmap_cnt++; ++ vaddr = buffer->vaddr; ++ goto out; ++ } + -+config MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS -+ bool "Enable runtime selection of performance counters set via debugfs" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT && DEBUG_FS -+ default n -+ help -+ Select this option to make the secondary set of performance counters -+ available at runtime via debugfs. Kernel features that depend on an -+ access to the primary set of counters may become unavailable. ++ vaddr = system_heap_do_vmap(buffer); ++ if (IS_ERR(vaddr)) ++ goto out; + -+ If no runtime debugfs option is set, the build time counter set -+ choice will be used. ++ buffer->vaddr = vaddr; ++ buffer->vmap_cnt++; ++out: ++ mutex_unlock(&buffer->lock); + -+ This feature is unsupported and unstable, and may break at any time. -+ Enabling this option will prevent power management from working -+ optimally and may cause instrumentation tools to return bogus results. ++ return vaddr; ++} + -+ No validation is done on the debugfs input. Invalid input could cause -+ performance counter errors. Valid inputs are the values accepted by -+ the SET_SELECT bits of the PRFCNT_CONFIG register as defined in the -+ architecture specification. ++static void system_heap_vunmap(struct dma_buf *dmabuf, void *vaddr) ++{ ++ struct system_heap_buffer *buffer = dmabuf->priv; + -+ If unsure, say N. ++ mutex_lock(&buffer->lock); ++ if (!--buffer->vmap_cnt) { ++ vunmap(buffer->vaddr); ++ buffer->vaddr = NULL; ++ } ++ mutex_unlock(&buffer->lock); ++} + -+config MALI_JOB_DUMP -+ bool "Enable system level support needed for job dumping" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT -+ default n -+ help -+ Choose this option to enable system level support needed for -+ job dumping. This is typically used for instrumentation but has -+ minimal overhead when not in use. Enable only if you know what -+ you are doing. ++static int system_heap_zero_buffer(struct system_heap_buffer *buffer) ++{ ++ struct sg_table *sgt = &buffer->sg_table; ++ struct sg_page_iter piter; ++ struct page *p; ++ void *vaddr; ++ int ret = 0; + -+comment "Workarounds" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT ++ for_each_sgtable_page(sgt, &piter, 0) { ++ p = sg_page_iter_page(&piter); ++ vaddr = kmap_atomic(p); ++ memset(vaddr, 0, PAGE_SIZE); ++ kunmap_atomic(vaddr); ++ } + -+config MALI_PWRSOFT_765 -+ bool "Enable workaround for PWRSOFT-765" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT -+ default n -+ help -+ PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged -+ in kernel v4.10, however if backported into the kernel then this -+ option must be manually selected. ++ return ret; ++} + -+ If using kernel >= v4.10 then say N, otherwise if devfreq cooling -+ changes have been backported say Y to avoid compilation errors. ++static void system_heap_buf_free(struct deferred_freelist_item *item, ++ enum df_reason reason) ++{ ++ struct system_heap_buffer *buffer; ++ struct sg_table *table; ++ struct scatterlist *sg; ++ int i, j; + -+config MALI_HW_ERRATA_1485982_NOT_AFFECTED -+ bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT -+ default n -+ help -+ This option disables the default workaround for GPU2017-1336. The -+ workaround keeps the L2 cache powered up except for powerdown and reset. ++ buffer = container_of(item, struct system_heap_buffer, deferred_free); ++ /* Zero the buffer pages before adding back to the pool */ ++ if (reason == DF_NORMAL) ++ if (system_heap_zero_buffer(buffer)) ++ reason = DF_UNDER_PRESSURE; // On failure, just free + -+ The workaround introduces a limitation that will prevent the running of -+ protected mode content on fully coherent platforms, as the switch to IO -+ coherency mode requires the L2 to be turned off. ++ table = &buffer->sg_table; ++ for_each_sgtable_sg(table, sg, i) { ++ struct page *page = sg_page(sg); + -+config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE -+ bool "Use alternative workaround for BASE_HW_ISSUE_GPU2017_1336" -+ depends on MALI_BIFROST && MALI_BIFROST_EXPERT && !MALI_HW_ERRATA_1485982_NOT_AFFECTED -+ default n -+ help -+ This option uses an alternative workaround for GPU2017-1336. Lowering -+ the GPU clock to a, platform specific, known good frequency before -+ powering down the L2 cache. The clock can be specified in the device -+ tree using the property, opp-mali-errata-1485982. Otherwise the -+ slowest clock will be selected. ++ if (reason == DF_UNDER_PRESSURE) { ++ __free_pages(page, compound_order(page)); ++ } else { ++ for (j = 0; j < NUM_ORDERS; j++) { ++ if (compound_order(page) == orders[j]) ++ break; ++ } ++ dmabuf_page_pool_free(buffer->pools[j], page); ++ } ++ } ++ sg_free_table(table); ++ kfree(buffer); ++} + -+endif ++static void system_heap_dma_buf_release(struct dma_buf *dmabuf) ++{ ++ struct system_heap_buffer *buffer = dmabuf->priv; ++ int npages = PAGE_ALIGN(buffer->len) / PAGE_SIZE; + -+config MALI_ARBITRATION -+ tristate "Enable Virtualization reference code" -+ depends on MALI_BIFROST -+ default n -+ help -+ Enables the build of several reference modules used in the reference -+ virtualization setup for Mali -+ If unsure, say N. ++ deferred_free(&buffer->deferred_free, system_heap_buf_free, npages); ++} + ++static const struct dma_buf_ops system_heap_buf_ops = { ++ .attach = system_heap_attach, ++ .detach = system_heap_detach, ++ .map_dma_buf = system_heap_map_dma_buf, ++ .unmap_dma_buf = system_heap_unmap_dma_buf, ++ .begin_cpu_access = system_heap_dma_buf_begin_cpu_access, ++ .end_cpu_access = system_heap_dma_buf_end_cpu_access, ++#ifdef CONFIG_DMABUF_PARTIAL ++ .begin_cpu_access_partial = system_heap_dma_buf_begin_cpu_access_partial, ++ .end_cpu_access_partial = system_heap_dma_buf_end_cpu_access_partial, ++#endif ++ .mmap = system_heap_mmap, ++ .vmap = system_heap_vmap, ++ .vunmap = system_heap_vunmap, ++ .release = system_heap_dma_buf_release, ++}; + -+# source "drivers/gpu/arm/bifrost/tests/Kconfig" ++static struct page *system_heap_alloc_largest_available(struct dma_heap *heap, ++ struct dmabuf_page_pool **pool, ++ unsigned long size, ++ unsigned int max_order) ++{ ++ struct page *page; ++ int i; + -+endif -diff --git a/drivers/gpu/arm/bifrost/Makefile b/drivers/gpu/arm/bifrost/Makefile -new file mode 100644 -index 000000000..39df298ff ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/Makefile -@@ -0,0 +1,279 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++ for (i = 0; i < NUM_ORDERS; i++) { ++ if (size < (PAGE_SIZE << orders[i])) ++ continue; ++ if (max_order < orders[i]) ++ continue; ++ page = dmabuf_page_pool_alloc(pool[i]); ++ if (!page) ++ continue; ++ return page; ++ } ++ return NULL; ++} + -+KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build -+KDIR ?= $(KERNEL_SRC) ++static struct dma_buf *system_heap_do_allocate(struct dma_heap *heap, ++ unsigned long len, ++ unsigned long fd_flags, ++ unsigned long heap_flags, ++ bool uncached) ++{ ++ struct system_heap_buffer *buffer; ++ DEFINE_DMA_BUF_EXPORT_INFO(exp_info); ++ unsigned long size_remaining = len; ++ unsigned int max_order = orders[0]; ++ struct dma_buf *dmabuf; ++ struct sg_table *table; ++ struct scatterlist *sg; ++ struct list_head pages; ++ struct page *page, *tmp_page; ++ int i, ret = -ENOMEM; ++ struct list_head lists[8]; ++ unsigned int block_index[8] = {0}; ++ unsigned int block_1M = 0; ++ unsigned int block_64K = 0; ++ unsigned int maximum; ++ int j; + -+ifeq ($(KDIR),) -+ $(error Must specify KDIR to point to the kernel to target)) -+endif ++ buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); ++ if (!buffer) ++ return ERR_PTR(-ENOMEM); + -+# -+# Default configuration values -+# -+# Dependency resolution is done through statements as Kconfig -+# is not supported for out-of-tree builds. -+# ++ INIT_LIST_HEAD(&buffer->attachments); ++ mutex_init(&buffer->lock); ++ buffer->heap = heap; ++ buffer->len = len; ++ buffer->uncached = uncached; ++ buffer->pools = strstr(dma_heap_get_name(heap), "dma32") ? dma32_pools : pools; + -+CONFIG_MALI_BIFROST ?= m -+ifeq ($(CONFIG_MALI_BIFROST),m) -+ CONFIG_MALI_PLATFORM_NAME ?= "devicetree" -+ CONFIG_MALI_BIFROST_GATOR_SUPPORT ?= y -+ CONFIG_MALI_ARBITRATION ?= n -+ CONFIG_MALI_PARTITION_MANAGER ?= n ++ INIT_LIST_HEAD(&pages); ++ for (i = 0; i < 8; i++) ++ INIT_LIST_HEAD(&lists[i]); ++ i = 0; ++ while (size_remaining > 0) { ++ /* ++ * Avoid trying to allocate memory if the process ++ * has been killed by SIGKILL ++ */ ++ if (fatal_signal_pending(current)) ++ goto free_buffer; + -+ ifneq ($(CONFIG_MALI_BIFROST_NO_MALI),y) -+ # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI -+ CONFIG_MALI_REAL_HW ?= y -+ CONFIG_MALI_CORESIGHT = n -+ endif ++ page = system_heap_alloc_largest_available(heap, buffer->pools, ++ size_remaining, ++ max_order); ++ if (!page) ++ goto free_buffer; + -+ ifeq ($(CONFIG_MALI_BIFROST_DVFS),y) -+ # Prevent misuse when CONFIG_MALI_BIFROST_DVFS=y -+ CONFIG_MALI_BIFROST_DEVFREQ ?= n -+ else -+ CONFIG_MALI_BIFROST_DEVFREQ ?= y -+ endif ++ size_remaining -= page_size(page); ++ max_order = compound_order(page); ++ if (max_order) { ++ if (max_order == 8) ++ block_1M++; ++ if (max_order == 4) ++ block_64K++; ++ list_add_tail(&page->lru, &pages); ++ } else { ++ dma_addr_t phys = page_to_phys(page); ++ unsigned int bit_index = ((phys >> bank_bit_first) & bank_bit_mask) & 0x7; + -+ ifeq ($(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND), y) -+ # Prevent misuse when CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y -+ CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n -+ endif ++ list_add_tail(&page->lru, &lists[bit_index]); ++ block_index[bit_index]++; ++ } ++ i++; ++ } + -+ ifeq ($(CONFIG_MALI_CSF_SUPPORT), y) -+ CONFIG_MALI_CORESIGHT ?= n -+ endif ++ table = &buffer->sg_table; ++ if (sg_alloc_table(table, i, GFP_KERNEL)) ++ goto free_buffer; + -+ # -+ # Expert/Debug/Test released configurations -+ # -+ ifeq ($(CONFIG_MALI_BIFROST_EXPERT), y) -+ ifeq ($(CONFIG_MALI_BIFROST_NO_MALI), y) -+ CONFIG_MALI_REAL_HW = n ++ maximum = block_index[0]; ++ for (i = 1; i < 8; i++) ++ maximum = max(maximum, block_index[i]); ++ sg = table->sgl; ++ list_for_each_entry_safe(page, tmp_page, &pages, lru) { ++ sg_set_page(sg, page, page_size(page), 0); ++ sg = sg_next(sg); ++ list_del(&page->lru); ++ } ++ for (i = 0; i < maximum; i++) { ++ for (j = 0; j < 8; j++) { ++ if (!list_empty(&lists[j])) { ++ page = list_first_entry(&lists[j], struct page, lru); ++ sg_set_page(sg, page, PAGE_SIZE, 0); ++ sg = sg_next(sg); ++ list_del(&page->lru); ++ } ++ } ++ } + -+ else -+ # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI=n -+ CONFIG_MALI_REAL_HW = y -+ CONFIG_MALI_BIFROST_ERROR_INJECT = n -+ endif ++ /* create the dmabuf */ ++ exp_info.exp_name = dma_heap_get_name(heap); ++ exp_info.ops = &system_heap_buf_ops; ++ exp_info.size = buffer->len; ++ exp_info.flags = fd_flags; ++ exp_info.priv = buffer; ++ dmabuf = dma_buf_export(&exp_info); ++ if (IS_ERR(dmabuf)) { ++ ret = PTR_ERR(dmabuf); ++ goto free_pages; ++ } + ++ /* ++ * For uncached buffers, we need to initially flush cpu cache, since ++ * the __GFP_ZERO on the allocation means the zeroing was done by the ++ * cpu and thus it is likely cached. Map (and implicitly flush) and ++ * unmap it now so we don't get corruption later on. ++ */ ++ if (buffer->uncached) { ++ dma_map_sgtable(dma_heap_get_dev(heap), table, DMA_BIDIRECTIONAL, 0); ++ dma_unmap_sgtable(dma_heap_get_dev(heap), table, DMA_BIDIRECTIONAL, 0); ++ } + -+ ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y) -+ # Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y -+ CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n -+ endif ++ return dmabuf; + -+ ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y) -+ CONFIG_MALI_BIFROST_ENABLE_TRACE ?= y -+ CONFIG_MALI_BIFROST_SYSTEM_TRACE ?= y ++free_pages: ++ for_each_sgtable_sg(table, sg, i) { ++ struct page *p = sg_page(sg); + -+ ifeq ($(CONFIG_SYNC_FILE), y) -+ CONFIG_MALI_BIFROST_FENCE_DEBUG ?= y -+ else -+ CONFIG_MALI_BIFROST_FENCE_DEBUG = n -+ endif -+ else -+ # Prevent misuse when CONFIG_MALI_BIFROST_DEBUG=n -+ CONFIG_MALI_BIFROST_ENABLE_TRACE = n -+ CONFIG_MALI_BIFROST_SYSTEM_TRACE = n -+ CONFIG_MALI_BIFROST_FENCE_DEBUG = n -+ endif -+ else -+ # Prevent misuse when CONFIG_MALI_BIFROST_EXPERT=n -+ CONFIG_MALI_CORESTACK = n -+ CONFIG_LARGE_PAGE_ALLOC_OVERRIDE = n -+ CONFIG_LARGE_PAGE_ALLOC = n -+ CONFIG_MALI_PWRSOFT_765 = n -+ CONFIG_MALI_MEMORY_FULLY_BACKED = n -+ CONFIG_MALI_JOB_DUMP = n -+ CONFIG_MALI_BIFROST_NO_MALI = n -+ CONFIG_MALI_REAL_HW = y -+ CONFIG_MALI_BIFROST_ERROR_INJECT = n -+ CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n -+ CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n -+ CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n -+ CONFIG_MALI_BIFROST_DEBUG = n -+ CONFIG_MALI_BIFROST_ENABLE_TRACE = n -+ CONFIG_MALI_BIFROST_SYSTEM_TRACE = n -+ CONFIG_MALI_BIFROST_FENCE_DEBUG = n -+ endif ++ __free_pages(p, compound_order(p)); ++ } ++ sg_free_table(table); ++free_buffer: ++ list_for_each_entry_safe(page, tmp_page, &pages, lru) ++ __free_pages(page, compound_order(page)); ++ for (i = 0; i < 8; i++) { ++ list_for_each_entry_safe(page, tmp_page, &lists[i], lru) ++ __free_pages(page, compound_order(page)); ++ } ++ kfree(buffer); + -+ ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y) -+ CONFIG_MALI_KUTF ?= y -+ ifeq ($(CONFIG_MALI_KUTF), y) -+ CONFIG_MALI_KUTF_IRQ_TEST ?= y -+ CONFIG_MALI_KUTF_CLK_RATE_TRACE ?= y -+ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST ?= y -+ else -+ # Prevent misuse when CONFIG_MALI_KUTF=n -+ CONFIG_MALI_KUTF_IRQ_TEST = n -+ CONFIG_MALI_KUTF_CLK_RATE_TRACE = n -+ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n -+ endif -+ else -+ # Prevent misuse when CONFIG_MALI_BIFROST_DEBUG=n -+ CONFIG_MALI_KUTF = n -+ CONFIG_MALI_KUTF_IRQ_TEST = n -+ CONFIG_MALI_KUTF_CLK_RATE_TRACE = n -+ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n -+ endif -+else -+ # Prevent misuse when CONFIG_MALI_BIFROST=n -+ CONFIG_MALI_ARBITRATION = n -+ CONFIG_MALI_KUTF = n -+ CONFIG_MALI_KUTF_IRQ_TEST = n -+ CONFIG_MALI_KUTF_CLK_RATE_TRACE = n -+ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n -+endif ++ return ERR_PTR(ret); ++} + -+# All Mali CONFIG should be listed here -+CONFIGS := \ -+ CONFIG_MALI_BIFROST \ -+ CONFIG_MALI_CSF_SUPPORT \ -+ CONFIG_MALI_BIFROST_GATOR_SUPPORT \ -+ CONFIG_MALI_ARBITER_SUPPORT \ -+ CONFIG_MALI_ARBITRATION \ -+ CONFIG_MALI_PARTITION_MANAGER \ -+ CONFIG_MALI_REAL_HW \ -+ CONFIG_MALI_BIFROST_DEVFREQ \ -+ CONFIG_MALI_BIFROST_DVFS \ -+ CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \ -+ CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \ -+ CONFIG_MALI_BIFROST_EXPERT \ -+ CONFIG_MALI_CORESTACK \ -+ CONFIG_LARGE_PAGE_ALLOC_OVERRIDE \ -+ CONFIG_LARGE_PAGE_ALLOC \ -+ CONFIG_MALI_PWRSOFT_765 \ -+ CONFIG_MALI_MEMORY_FULLY_BACKED \ -+ CONFIG_MALI_JOB_DUMP \ -+ CONFIG_MALI_BIFROST_NO_MALI \ -+ CONFIG_MALI_BIFROST_ERROR_INJECT \ -+ CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \ -+ CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \ -+ CONFIG_MALI_PRFCNT_SET_PRIMARY \ -+ CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY \ -+ CONFIG_MALI_PRFCNT_SET_TERTIARY \ -+ CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS \ -+ CONFIG_MALI_BIFROST_DEBUG \ -+ CONFIG_MALI_BIFROST_ENABLE_TRACE \ -+ CONFIG_MALI_BIFROST_SYSTEM_TRACE \ -+ CONFIG_MALI_BIFROST_FENCE_DEBUG \ -+ CONFIG_MALI_KUTF \ -+ CONFIG_MALI_KUTF_IRQ_TEST \ -+ CONFIG_MALI_KUTF_CLK_RATE_TRACE \ -+ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \ -+ CONFIG_MALI_XEN \ -+ CONFIG_MALI_CORESIGHT ++static struct dma_buf *system_heap_allocate(struct dma_heap *heap, ++ unsigned long len, ++ unsigned long fd_flags, ++ unsigned long heap_flags) ++{ ++ return system_heap_do_allocate(heap, len, fd_flags, heap_flags, false); ++} + ++static long system_get_pool_size(struct dma_heap *heap) ++{ ++ int i; ++ long num_pages = 0; ++ struct dmabuf_page_pool **pool; + -+THIS_DIR := $(dir $(lastword $(MAKEFILE_LIST))) -+-include $(THIS_DIR)/../arbitration/Makefile ++ pool = strstr(dma_heap_get_name(heap), "dma32") ? dma32_pools : pools; ++ for (i = 0; i < NUM_ORDERS; i++, pool++) { ++ num_pages += ((*pool)->count[POOL_LOWPAGE] + ++ (*pool)->count[POOL_HIGHPAGE]) << (*pool)->order; ++ } + -+# MAKE_ARGS to pass the custom CONFIGs on out-of-tree build -+# -+# Generate the list of CONFIGs and values. -+# $(value config) is the name of the CONFIG option. -+# $(value $(value config)) is its value (y, m). -+# When the CONFIG is not set to y or m, it defaults to n. -+MAKE_ARGS := $(foreach config,$(CONFIGS), \ -+ $(if $(filter y m,$(value $(value config))), \ -+ $(value config)=$(value $(value config)), \ -+ $(value config)=n)) ++ return num_pages << PAGE_SHIFT; ++} + -+MAKE_ARGS += CONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME) ++static const struct dma_heap_ops system_heap_ops = { ++ .allocate = system_heap_allocate, ++ .get_pool_size = system_get_pool_size, ++}; + -+# -+# EXTRA_CFLAGS to define the custom CONFIGs on out-of-tree build -+# -+# Generate the list of CONFIGs defines with values from CONFIGS. -+# $(value config) is the name of the CONFIG option. -+# When set to y or m, the CONFIG gets defined to 1. -+EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \ -+ $(if $(filter y m,$(value $(value config))), \ -+ -D$(value config)=1)) ++static struct dma_buf *system_uncached_heap_allocate(struct dma_heap *heap, ++ unsigned long len, ++ unsigned long fd_flags, ++ unsigned long heap_flags) ++{ ++ return system_heap_do_allocate(heap, len, fd_flags, heap_flags, true); ++} + -+EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME) ++/* Dummy function to be used until we can call coerce_mask_and_coherent */ ++static struct dma_buf *system_uncached_heap_not_initialized(struct dma_heap *heap, ++ unsigned long len, ++ unsigned long fd_flags, ++ unsigned long heap_flags) ++{ ++ return ERR_PTR(-EBUSY); ++} + -+# -+# KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions -+# ++static struct dma_heap_ops system_uncached_heap_ops = { ++ /* After system_heap_create is complete, we will swap this */ ++ .allocate = system_uncached_heap_not_initialized, ++}; + -+KBUILD_CFLAGS += -Wall -Werror ++static int set_heap_dev_dma(struct device *heap_dev) ++{ ++ int err = 0; + -+# The following were added to align with W=1 in scripts/Makefile.extrawarn -+# from the Linux source tree (v5.18.14) -+KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter -+KBUILD_CFLAGS += -Wmissing-declarations -+KBUILD_CFLAGS += -Wmissing-format-attribute -+KBUILD_CFLAGS += -Wmissing-prototypes -+KBUILD_CFLAGS += -Wold-style-definition -+# The -Wmissing-include-dirs cannot be enabled as the path to some of the -+# included directories change depending on whether it is an in-tree or -+# out-of-tree build. -+KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable) -+KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable) -+KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned) -+KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation) -+# The following turn off the warnings enabled by -Wextra -+KBUILD_CFLAGS += -Wno-sign-compare -+KBUILD_CFLAGS += -Wno-shift-negative-value -+# This flag is needed to avoid build errors on older kernels -+KBUILD_CFLAGS += $(call cc-option, -Wno-cast-function-type) ++ if (!heap_dev) ++ return -EINVAL; + -+KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 ++ dma_coerce_mask_and_coherent(heap_dev, DMA_BIT_MASK(64)); + -+# The following were added to align with W=2 in scripts/Makefile.extrawarn -+# from the Linux source tree (v5.18.14) -+KBUILD_CFLAGS += -Wdisabled-optimization -+# The -Wshadow flag cannot be enabled unless upstream kernels are -+# patched to fix redefinitions of certain built-in functions and -+# global variables. -+KBUILD_CFLAGS += $(call cc-option, -Wlogical-op) -+KBUILD_CFLAGS += -Wmissing-field-initializers -+# -Wtype-limits must be disabled due to build failures on kernel 5.x -+KBUILD_CFLAGS += -Wno-type-limit -+KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized) -+KBUILD_CFLAGS += $(call cc-option, -Wunused-macros) ++ if (!heap_dev->dma_parms) { ++ heap_dev->dma_parms = devm_kzalloc(heap_dev, ++ sizeof(*heap_dev->dma_parms), ++ GFP_KERNEL); ++ if (!heap_dev->dma_parms) ++ return -ENOMEM; + -+KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2 ++ err = dma_set_max_seg_size(heap_dev, (unsigned int)DMA_BIT_MASK(64)); ++ if (err) { ++ devm_kfree(heap_dev, heap_dev->dma_parms); ++ dev_err(heap_dev, "Failed to set DMA segment size, err:%d\n", err); ++ return err; ++ } ++ } + -+# This warning is disabled to avoid build failures in some kernel versions -+KBUILD_CFLAGS += -Wno-ignored-qualifiers ++ return 0; ++} + -+ifeq ($(CONFIG_GCOV_KERNEL),y) -+ KBUILD_CFLAGS += $(call cc-option, -ftest-coverage) -+ KBUILD_CFLAGS += $(call cc-option, -fprofile-arcs) -+ EXTRA_CFLAGS += -DGCOV_PROFILE=1 -+endif ++static int system_heap_create(void) ++{ ++ struct dma_heap_export_info exp_info; ++ int i, err = 0; ++ struct dram_addrmap_info *ddr_map_info; + -+ifeq ($(CONFIG_MALI_KCOV),y) -+ KBUILD_CFLAGS += $(call cc-option, -fsanitize-coverage=trace-cmp) -+ EXTRA_CFLAGS += -DKCOV=1 -+ EXTRA_CFLAGS += -DKCOV_ENABLE_COMPARISONS=1 -+endif ++ /* ++ * Since swiotlb has memory size limitation, this will calculate ++ * the maximum size locally. ++ * ++ * Once swiotlb_max_segment() return not '0', means that the totalram size ++ * is larger than 4GiB and swiotlb is not force mode, in this case, system ++ * heap should limit largest allocation. ++ * ++ * FIX: fix the orders[] as a workaround. ++ */ ++ if (swiotlb_max_segment()) { ++ unsigned int max_size = (1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE; ++ int max_order = MAX_ORDER; ++ int i; + -+all: -+ $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules ++ max_size = max_t(unsigned int, max_size, PAGE_SIZE) >> PAGE_SHIFT; ++ max_order = min(max_order, ilog2(max_size)); ++ for (i = 0; i < NUM_ORDERS; i++) { ++ if (max_order < orders[i]) ++ orders[i] = max_order; ++ pr_info("system_heap: orders[%d] = %u\n", i, orders[i]); ++ } ++ } + -+modules_install: -+ $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) modules_install ++ for (i = 0; i < NUM_ORDERS; i++) { ++ pools[i] = dmabuf_page_pool_create(order_flags[i], orders[i]); + -+clean: -+ $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) clean -diff --git a/drivers/gpu/arm/bifrost/arbiter/Kbuild b/drivers/gpu/arm/bifrost/arbiter/Kbuild -new file mode 100755 -index 000000000..2e6b11144 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/arbiter/Kbuild -@@ -0,0 +1,23 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++ if (!pools[i]) { ++ int j; + -+bifrost_kbase-y += \ -+ arbiter/mali_kbase_arbif.o \ -+ arbiter/mali_kbase_arbiter_pm.o -diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c ++ pr_err("%s: page pool creation failed!\n", __func__); ++ for (j = 0; j < i; j++) ++ dmabuf_page_pool_destroy(pools[j]); ++ return -ENOMEM; ++ } ++ } ++ ++ for (i = 0; i < NUM_ORDERS; i++) { ++ dma32_pools[i] = dmabuf_page_pool_create(order_flags[i] | GFP_DMA32, orders[i]); ++ ++ if (!dma32_pools[i]) { ++ int j; ++ ++ pr_err("%s: page dma32 pool creation failed!\n", __func__); ++ for (j = 0; j < i; j++) ++ dmabuf_page_pool_destroy(dma32_pools[j]); ++ goto err_dma32_pool; ++ } ++ } ++ ++ exp_info.name = "system"; ++ exp_info.ops = &system_heap_ops; ++ exp_info.priv = NULL; ++ ++ sys_heap = dma_heap_add(&exp_info); ++ if (IS_ERR(sys_heap)) ++ return PTR_ERR(sys_heap); ++ ++ exp_info.name = "system-dma32"; ++ exp_info.ops = &system_heap_ops; ++ exp_info.priv = NULL; ++ ++ sys_dma32_heap = dma_heap_add(&exp_info); ++ if (IS_ERR(sys_dma32_heap)) ++ return PTR_ERR(sys_dma32_heap); ++ ++ exp_info.name = "system-uncached"; ++ exp_info.ops = &system_uncached_heap_ops; ++ exp_info.priv = NULL; ++ ++ sys_uncached_heap = dma_heap_add(&exp_info); ++ if (IS_ERR(sys_uncached_heap)) ++ return PTR_ERR(sys_uncached_heap); ++ ++ err = set_heap_dev_dma(dma_heap_get_dev(sys_uncached_heap)); ++ if (err) ++ return err; ++ ++ exp_info.name = "system-uncached-dma32"; ++ exp_info.ops = &system_uncached_heap_ops; ++ exp_info.priv = NULL; ++ ++ sys_uncached_dma32_heap = dma_heap_add(&exp_info); ++ if (IS_ERR(sys_uncached_dma32_heap)) ++ return PTR_ERR(sys_uncached_dma32_heap); ++ ++ err = set_heap_dev_dma(dma_heap_get_dev(sys_uncached_dma32_heap)); ++ if (err) ++ return err; ++ dma_coerce_mask_and_coherent(dma_heap_get_dev(sys_uncached_dma32_heap), DMA_BIT_MASK(32)); ++ ++ mb(); /* make sure we only set allocate after dma_mask is set */ ++ system_uncached_heap_ops.allocate = system_uncached_heap_allocate; ++ ++ ddr_map_info = sip_smc_get_dram_map(); ++ if (ddr_map_info) { ++ bank_bit_first = ddr_map_info->bank_bit_first; ++ bank_bit_mask = ddr_map_info->bank_bit_mask; ++ } ++ ++ return 0; ++err_dma32_pool: ++ for (i = 0; i < NUM_ORDERS; i++) ++ dmabuf_page_pool_destroy(pools[i]); ++ ++ return -ENOMEM; ++} ++module_init(system_heap_create); ++MODULE_LICENSE("GPL v2"); +diff --git a/drivers/dma-buf/heaps/sram_heap.c b/drivers/dma-buf/heaps/sram_heap.c new file mode 100644 -index 000000000..b5d3cd685 +index 000000000..d9a9b70a7 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c -@@ -0,0 +1,357 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/dma-buf/heaps/sram_heap.c +@@ -0,0 +1,437 @@ ++// SPDX-License-Identifier: GPL-2.0 +/* ++ * SRAM DMA-Heap exporter && support alloc page and dmabuf on kernel + * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com/ + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * Author: Andrew F. Davis + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * Copyright (C) 2022 Rockchip Electronics Co., Ltd. + * ++ * Author: Huang Lee + */ ++#define pr_fmt(fmt) "sram_heap: " fmt + -+/** -+ * DOC: Mali arbiter interface APIs to share GPU between Virtual Machines -+ */ -+ -+#include -+#include "mali_kbase_arbif.h" -+#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include +#include ++#include ++#include ++ ++#include +#include -+#include "linux/mali_arbiter_interface.h" ++#include + -+/* Arbiter interface version against which was implemented this module */ -+#define MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION 5 -+#if MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION != \ -+ MALI_ARBITER_INTERFACE_VERSION -+#error "Unsupported Mali Arbiter interface version." -+#endif ++#define RK3588_SRAM_BASE 0xff001000 + -+static void on_max_config(struct device *dev, uint32_t max_l2_slices, -+ uint32_t max_core_mask) ++struct sram_dma_heap { ++ struct dma_heap *heap; ++ struct gen_pool *pool; ++}; ++ ++struct sram_dma_heap_buffer { ++ struct gen_pool *pool; ++ struct list_head attachments; ++ struct mutex attachments_lock; ++ unsigned long len; ++ void *vaddr; ++ phys_addr_t paddr; ++}; ++ ++struct dma_heap_attachment { ++ struct device *dev; ++ struct sg_table *table; ++ struct list_head list; ++}; ++ ++static int dma_heap_attach(struct dma_buf *dmabuf, ++ struct dma_buf_attachment *attachment) +{ -+ struct kbase_device *kbdev; ++ struct sram_dma_heap_buffer *buffer = dmabuf->priv; ++ struct dma_heap_attachment *a; ++ struct sg_table *table; + -+ if (!dev) { -+ pr_err("%s(): dev is NULL", __func__); -+ return; -+ } ++ a = kzalloc(sizeof(*a), GFP_KERNEL); ++ if (!a) ++ return -ENOMEM; + -+ kbdev = dev_get_drvdata(dev); -+ if (!kbdev) { -+ dev_err(dev, "%s(): kbdev is NULL", __func__); -+ return; -+ } ++ table = kmalloc(sizeof(*table), GFP_KERNEL); ++ if (!table) ++ goto table_alloc_failed; + -+ if (!max_l2_slices || !max_core_mask) { -+ dev_dbg(dev, -+ "%s(): max_config ignored as one of the fields is zero", -+ __func__); -+ return; -+ } ++ if (sg_alloc_table(table, 1, GFP_KERNEL)) ++ goto sg_alloc_failed; + -+ /* set the max config info in the kbase device */ -+ kbase_arbiter_set_max_config(kbdev, max_l2_slices, max_core_mask); ++ /* ++ * The referenced pfn and page are for setting the sram address to the ++ * sgtable, and cannot be used for other purposes, and cannot be accessed ++ * directly or indirectly. ++ * ++ * And not sure if there is a problem with the 32-bit system. ++ * ++ * page cannot support kmap func. ++ */ ++ sg_set_page(table->sgl, pfn_to_page(PFN_DOWN(buffer->paddr)), buffer->len, 0); ++ ++ a->table = table; ++ a->dev = attachment->dev; ++ INIT_LIST_HEAD(&a->list); ++ ++ attachment->priv = a; ++ ++ mutex_lock(&buffer->attachments_lock); ++ list_add(&a->list, &buffer->attachments); ++ mutex_unlock(&buffer->attachments_lock); ++ ++ return 0; ++ ++sg_alloc_failed: ++ kfree(table); ++table_alloc_failed: ++ kfree(a); ++ return -ENOMEM; +} + -+/** -+ * on_update_freq() - Updates GPU clock frequency -+ * @dev: arbiter interface device handle -+ * @freq: GPU clock frequency value reported from arbiter -+ * -+ * call back function to update GPU clock frequency with -+ * new value from arbiter -+ */ -+static void on_update_freq(struct device *dev, uint32_t freq) ++static void dma_heap_detatch(struct dma_buf *dmabuf, ++ struct dma_buf_attachment *attachment) +{ -+ struct kbase_device *kbdev; ++ struct sram_dma_heap_buffer *buffer = dmabuf->priv; ++ struct dma_heap_attachment *a = attachment->priv; + -+ if (!dev) { -+ pr_err("%s(): dev is NULL", __func__); -+ return; -+ } ++ mutex_lock(&buffer->attachments_lock); ++ list_del(&a->list); ++ mutex_unlock(&buffer->attachments_lock); + -+ kbdev = dev_get_drvdata(dev); -+ if (!kbdev) { -+ dev_err(dev, "%s(): kbdev is NULL", __func__); -+ return; -+ } ++ sg_free_table(a->table); ++ kfree(a->table); ++ kfree(a); ++} + -+ kbase_arbiter_pm_update_gpu_freq(&kbdev->arb.arb_freq, freq); ++static struct sg_table *dma_heap_map_dma_buf(struct dma_buf_attachment *attachment, ++ enum dma_data_direction direction) ++{ ++ struct dma_heap_attachment *a = attachment->priv; ++ struct sg_table *table = a->table; ++ int ret = 0; ++ ++ ret = dma_map_sgtable(attachment->dev, table, direction, DMA_ATTR_SKIP_CPU_SYNC); ++ if (ret) ++ return ERR_PTR(-ENOMEM); ++ ++ return table; +} + -+/** -+ * on_gpu_stop() - sends KBASE_VM_GPU_STOP_EVT event on VM stop -+ * @dev: arbiter interface device handle -+ * -+ * call back function to signal a GPU STOP event from arbiter interface -+ */ -+static void on_gpu_stop(struct device *dev) ++static void dma_heap_unmap_dma_buf(struct dma_buf_attachment *attachment, ++ struct sg_table *table, ++ enum dma_data_direction direction) +{ -+ struct kbase_device *kbdev; ++ dma_unmap_sgtable(attachment->dev, table, direction, DMA_ATTR_SKIP_CPU_SYNC); ++} + -+ if (!dev) { -+ pr_err("%s(): dev is NULL", __func__); -+ return; -+ } ++static void dma_heap_dma_buf_release(struct dma_buf *dmabuf) ++{ ++ struct sram_dma_heap_buffer *buffer = dmabuf->priv; + -+ kbdev = dev_get_drvdata(dev); -+ if (!kbdev) { -+ dev_err(dev, "%s(): kbdev is NULL", __func__); -+ return; -+ } ++ gen_pool_free(buffer->pool, (unsigned long)buffer->vaddr, buffer->len); ++ kfree(buffer); ++} + -+ KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED(kbdev, kbdev); -+ kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_STOP_EVT); ++static int dma_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) ++{ ++ struct sram_dma_heap_buffer *buffer = dmabuf->priv; ++ int ret; ++ ++ /* SRAM mappings are not cached */ ++ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); ++ ++ ret = vm_iomap_memory(vma, buffer->paddr, buffer->len); ++ if (ret) ++ pr_err("Could not map buffer to userspace\n"); ++ ++ return ret; +} + -+/** -+ * on_gpu_granted() - sends KBASE_VM_GPU_GRANTED_EVT event on GPU granted -+ * @dev: arbiter interface device handle -+ * -+ * call back function to signal a GPU GRANT event from arbiter interface -+ */ -+static void on_gpu_granted(struct device *dev) ++static void *dma_heap_vmap(struct dma_buf *dmabuf) +{ -+ struct kbase_device *kbdev; ++ struct sram_dma_heap_buffer *buffer = dmabuf->priv; + -+ if (!dev) { -+ pr_err("%s(): dev is NULL", __func__); -+ return; ++ return buffer->vaddr; ++} ++ ++static const struct dma_buf_ops sram_dma_heap_buf_ops = { ++ .attach = dma_heap_attach, ++ .detach = dma_heap_detatch, ++ .map_dma_buf = dma_heap_map_dma_buf, ++ .unmap_dma_buf = dma_heap_unmap_dma_buf, ++ .release = dma_heap_dma_buf_release, ++ .mmap = dma_heap_mmap, ++ .vmap = dma_heap_vmap, ++}; ++ ++static struct dma_buf *sram_dma_heap_allocate(struct dma_heap *heap, ++ unsigned long len, ++ unsigned long fd_flags, ++ unsigned long heap_flags) ++{ ++ struct sram_dma_heap *sram_dma_heap = dma_heap_get_drvdata(heap); ++ struct sram_dma_heap_buffer *buffer; ++ ++ DEFINE_DMA_BUF_EXPORT_INFO(exp_info); ++ struct dma_buf *dmabuf; ++ int ret = -ENOMEM; ++ ++ buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); ++ if (!buffer) ++ return ERR_PTR(-ENOMEM); ++ buffer->pool = sram_dma_heap->pool; ++ INIT_LIST_HEAD(&buffer->attachments); ++ mutex_init(&buffer->attachments_lock); ++ buffer->len = len; ++ ++ buffer->vaddr = (void *)gen_pool_alloc(buffer->pool, buffer->len); ++ if (!buffer->vaddr) { ++ ret = -ENOMEM; ++ goto free_buffer; + } + -+ kbdev = dev_get_drvdata(dev); -+ if (!kbdev) { -+ dev_err(dev, "%s(): kbdev is NULL", __func__); -+ return; ++ buffer->paddr = gen_pool_virt_to_phys(buffer->pool, (unsigned long)buffer->vaddr); ++ if (buffer->paddr == -1) { ++ ret = -ENOMEM; ++ goto free_pool; + } + -+ KBASE_TLSTREAM_TL_ARBITER_GRANTED(kbdev, kbdev); -+ kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_GRANTED_EVT); ++ /* create the dmabuf */ ++ exp_info.ops = &sram_dma_heap_buf_ops; ++ exp_info.size = buffer->len; ++ exp_info.flags = fd_flags; ++ exp_info.priv = buffer; ++ dmabuf = dma_buf_export(&exp_info); ++ if (IS_ERR(dmabuf)) { ++ ret = PTR_ERR(dmabuf); ++ goto free_pool; ++ } ++ ++ return dmabuf; ++ ++free_pool: ++ gen_pool_free(buffer->pool, (unsigned long)buffer->vaddr, buffer->len); ++free_buffer: ++ kfree(buffer); ++ ++ return ERR_PTR(ret); +} + -+/** -+ * on_gpu_lost() - sends KBASE_VM_GPU_LOST_EVT event on GPU granted -+ * @dev: arbiter interface device handle -+ * -+ * call back function to signal a GPU LOST event from arbiter interface -+ */ -+static void on_gpu_lost(struct device *dev) ++static struct dma_heap_ops sram_dma_heap_ops = { ++ .allocate = sram_dma_heap_allocate, ++}; ++ ++static struct sram_dma_heap *sram_dma_heap_global; ++ ++static int sram_dma_heap_export(const char *name, ++ struct gen_pool *sram_gp) +{ -+ struct kbase_device *kbdev; ++ struct sram_dma_heap *sram_dma_heap; ++ struct dma_heap_export_info exp_info; + -+ if (!dev) { -+ pr_err("%s(): dev is NULL", __func__); -+ return; -+ } ++ pr_info("Exporting SRAM pool '%s'\n", name); + -+ kbdev = dev_get_drvdata(dev); -+ if (!kbdev) { -+ dev_err(dev, "%s(): kbdev is NULL", __func__); -+ return; ++ sram_dma_heap = kzalloc(sizeof(*sram_dma_heap), GFP_KERNEL); ++ if (!sram_dma_heap) ++ return -ENOMEM; ++ sram_dma_heap->pool = sram_gp; ++ ++ exp_info.name = "sram_dma_heap"; ++ exp_info.ops = &sram_dma_heap_ops; ++ exp_info.priv = sram_dma_heap; ++ ++ sram_dma_heap_global = sram_dma_heap; ++ ++ sram_dma_heap->heap = dma_heap_add(&exp_info); ++ if (IS_ERR(sram_dma_heap->heap)) { ++ int ret = PTR_ERR(sram_dma_heap->heap); ++ ++ kfree(sram_dma_heap); ++ return ret; + } + -+ kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_LOST_EVT); ++ return 0; +} + -+/** -+ * kbase_arbif_init() - Kbase Arbiter interface initialisation. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Initialise Kbase Arbiter interface and assign callback functions. -+ * -+ * Return: -+ * * 0 - the interface was initialized or was not specified -+ * * in the device tree. -+ * * -EFAULT - the interface was specified but failed to initialize. -+ * * -EPROBE_DEFER - module dependencies are not yet available. -+ */ -+int kbase_arbif_init(struct kbase_device *kbdev) ++struct dma_buf *sram_heap_alloc_dma_buf(size_t size) +{ -+#if IS_ENABLED(CONFIG_OF) -+ struct arbiter_if_arb_vm_ops ops; -+ struct arbiter_if_dev *arb_if; -+ struct device_node *arbiter_if_node; -+ struct platform_device *pdev; -+ int err; ++ struct sram_dma_heap *sram_dma_heap = sram_dma_heap_global; ++ struct sram_dma_heap_buffer *buffer; + -+ dev_dbg(kbdev->dev, "%s\n", __func__); ++ DEFINE_DMA_BUF_EXPORT_INFO(exp_info); ++ struct dma_buf *dmabuf; ++ int ret = -ENOMEM; + -+ arbiter_if_node = of_parse_phandle(kbdev->dev->of_node, -+ "arbiter_if", 0); -+ if (!arbiter_if_node) { -+ dev_dbg(kbdev->dev, "No arbiter_if in Device Tree\n"); -+ /* no arbiter interface defined in device tree */ -+ kbdev->arb.arb_dev = NULL; -+ kbdev->arb.arb_if = NULL; -+ return 0; -+ } ++ buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); ++ if (!buffer) ++ return ERR_PTR(-ENOMEM); + -+ pdev = of_find_device_by_node(arbiter_if_node); -+ if (!pdev) { -+ dev_err(kbdev->dev, "Failed to find arbiter_if device\n"); -+ return -EPROBE_DEFER; -+ } ++ buffer->pool = sram_dma_heap->pool; ++ INIT_LIST_HEAD(&buffer->attachments); ++ mutex_init(&buffer->attachments_lock); ++ buffer->len = size; + -+ if (!pdev->dev.driver || !try_module_get(pdev->dev.driver->owner)) { -+ dev_err(kbdev->dev, "arbiter_if driver not available\n"); -+ put_device(&pdev->dev); -+ return -EPROBE_DEFER; ++ buffer->vaddr = (void *)gen_pool_alloc(buffer->pool, buffer->len); ++ if (!buffer->vaddr) { ++ ret = -ENOMEM; ++ goto free_buffer; + } -+ kbdev->arb.arb_dev = &pdev->dev; -+ arb_if = platform_get_drvdata(pdev); -+ if (!arb_if) { -+ dev_err(kbdev->dev, "arbiter_if driver not ready\n"); -+ module_put(pdev->dev.driver->owner); -+ put_device(&pdev->dev); -+ return -EPROBE_DEFER; ++ ++ buffer->paddr = gen_pool_virt_to_phys(buffer->pool, (unsigned long)buffer->vaddr); ++ if (buffer->paddr == -1) { ++ ret = -ENOMEM; ++ goto free_pool; + } + -+ kbdev->arb.arb_if = arb_if; -+ ops.arb_vm_gpu_stop = on_gpu_stop; -+ ops.arb_vm_gpu_granted = on_gpu_granted; -+ ops.arb_vm_gpu_lost = on_gpu_lost; -+ ops.arb_vm_max_config = on_max_config; -+ ops.arb_vm_update_freq = on_update_freq; ++ /* create the dmabuf */ ++ exp_info.ops = &sram_dma_heap_buf_ops; ++ exp_info.size = buffer->len; ++ exp_info.priv = buffer; ++ dmabuf = dma_buf_export(&exp_info); ++ if (IS_ERR(dmabuf)) { ++ ret = PTR_ERR(dmabuf); ++ goto free_pool; ++ } + -+ kbdev->arb.arb_freq.arb_freq = 0; -+ kbdev->arb.arb_freq.freq_updated = false; -+ mutex_init(&kbdev->arb.arb_freq.arb_freq_lock); ++ return dmabuf; + -+ /* register kbase arbiter_if callbacks */ -+ if (arb_if->vm_ops.vm_arb_register_dev) { -+ err = arb_if->vm_ops.vm_arb_register_dev(arb_if, -+ kbdev->dev, &ops); -+ if (err) { -+ dev_err(&pdev->dev, "Failed to register with arbiter\n"); -+ module_put(pdev->dev.driver->owner); -+ put_device(&pdev->dev); -+ if (err != -EPROBE_DEFER) -+ err = -EFAULT; -+ return err; -+ } -+ } ++free_pool: ++ gen_pool_free(buffer->pool, (unsigned long)buffer->vaddr, buffer->len); ++free_buffer: ++ kfree(buffer); + -+#else /* CONFIG_OF */ -+ dev_dbg(kbdev->dev, "No arbiter without Device Tree support\n"); -+ kbdev->arb.arb_dev = NULL; -+ kbdev->arb.arb_if = NULL; -+#endif -+ return 0; ++ return ERR_PTR(ret); +} ++EXPORT_SYMBOL_GPL(sram_heap_alloc_dma_buf); + -+/** -+ * kbase_arbif_destroy() - De-init Kbase arbiter interface -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * De-initialise Kbase arbiter interface -+ */ -+void kbase_arbif_destroy(struct kbase_device *kbdev) ++struct page *sram_heap_alloc_pages(size_t size) +{ -+ struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; ++ struct sram_dma_heap *sram_dma_heap = sram_dma_heap_global; + -+ if (arb_if && arb_if->vm_ops.vm_arb_unregister_dev) { -+ dev_dbg(kbdev->dev, "%s\n", __func__); -+ arb_if->vm_ops.vm_arb_unregister_dev(kbdev->arb.arb_if); ++ void *vaddr; ++ phys_addr_t paddr; ++ struct page *p; ++ ++ int ret = -ENOMEM; ++ ++ vaddr = (void *)gen_pool_alloc(sram_dma_heap->pool, size); ++ if (!vaddr) { ++ ret = -ENOMEM; ++ pr_err("no memory"); ++ goto failed; + } -+ kbdev->arb.arb_if = NULL; -+ if (kbdev->arb.arb_dev) { -+ module_put(kbdev->arb.arb_dev->driver->owner); -+ put_device(kbdev->arb.arb_dev); ++ ++ paddr = gen_pool_virt_to_phys(sram_dma_heap->pool, (unsigned long)vaddr); ++ if (paddr == -1) { ++ ret = -ENOMEM; ++ pr_err("gen_pool_virt_to_phys failed"); ++ goto free_pool; + } -+ kbdev->arb.arb_dev = NULL; ++ ++ p = pfn_to_page(PFN_DOWN(paddr)); ++ ++ return p; ++ ++free_pool: ++ gen_pool_free(sram_dma_heap->pool, (unsigned long)vaddr, size); ++failed: ++ ++ return ERR_PTR(ret); +} ++EXPORT_SYMBOL_GPL(sram_heap_alloc_pages); + -+/** -+ * kbase_arbif_get_max_config() - Request max config info -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * call back function from arb interface to arbiter requesting max config info -+ */ -+void kbase_arbif_get_max_config(struct kbase_device *kbdev) ++static u64 gen_pool_phys_to_virt(struct gen_pool *pool, phys_addr_t paddr) +{ -+ struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; ++ struct gen_pool_chunk *chunk; ++ u64 vaddr = 0; + -+ if (arb_if && arb_if->vm_ops.vm_arb_get_max_config) { -+ dev_dbg(kbdev->dev, "%s\n", __func__); -+ arb_if->vm_ops.vm_arb_get_max_config(arb_if); ++ rcu_read_lock(); ++ list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { ++ /* TODO: only suit for simple chunk now */ ++ vaddr = chunk->start_addr + (paddr - chunk->phys_addr); + } ++ rcu_read_unlock(); ++ ++ return vaddr; +} + -+/** -+ * kbase_arbif_gpu_request() - Request GPU from -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * call back function from arb interface to arbiter requesting GPU for VM -+ */ -+void kbase_arbif_gpu_request(struct kbase_device *kbdev) ++void sram_heap_free_pages(struct page *p) +{ -+ struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; ++ struct sram_dma_heap *sram_dma_heap = sram_dma_heap_global; ++ void *vaddr; + -+ if (arb_if && arb_if->vm_ops.vm_arb_gpu_request) { -+ dev_dbg(kbdev->dev, "%s\n", __func__); -+ KBASE_TLSTREAM_TL_ARBITER_REQUESTED(kbdev, kbdev); -+ arb_if->vm_ops.vm_arb_gpu_request(arb_if); -+ } ++ vaddr = (void *)gen_pool_phys_to_virt(sram_dma_heap->pool, page_to_phys(p)); ++ ++ gen_pool_free(sram_dma_heap->pool, (unsigned long)vaddr, PAGE_SIZE); +} ++EXPORT_SYMBOL_GPL(sram_heap_free_pages); + -+/** -+ * kbase_arbif_gpu_stopped() - send GPU stopped message to the arbiter -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @gpu_required: GPU request flag -+ * -+ */ -+void kbase_arbif_gpu_stopped(struct kbase_device *kbdev, u8 gpu_required) ++void sram_heap_free_dma_buf(struct dma_buf *dmabuf) +{ -+ struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; ++ struct sram_dma_heap_buffer *buffer = dmabuf->priv; + -+ if (arb_if && arb_if->vm_ops.vm_arb_gpu_stopped) { -+ dev_dbg(kbdev->dev, "%s\n", __func__); -+ KBASE_TLSTREAM_TL_ARBITER_STOPPED(kbdev, kbdev); -+ if (gpu_required) -+ KBASE_TLSTREAM_TL_ARBITER_REQUESTED(kbdev, kbdev); -+ arb_if->vm_ops.vm_arb_gpu_stopped(arb_if, gpu_required); -+ } ++ gen_pool_free(buffer->pool, (unsigned long)buffer->vaddr, buffer->len); ++ kfree(buffer); +} ++EXPORT_SYMBOL_GPL(sram_heap_free_dma_buf); + -+/** -+ * kbase_arbif_gpu_active() - Sends a GPU_ACTIVE message to the Arbiter -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Informs the arbiter VM is active -+ */ -+void kbase_arbif_gpu_active(struct kbase_device *kbdev) ++void *sram_heap_get_vaddr(struct dma_buf *dmabuf) +{ -+ struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; ++ struct sram_dma_heap_buffer *buffer = dmabuf->priv; + -+ if (arb_if && arb_if->vm_ops.vm_arb_gpu_active) { -+ dev_dbg(kbdev->dev, "%s\n", __func__); -+ arb_if->vm_ops.vm_arb_gpu_active(arb_if); -+ } ++ return buffer->vaddr; +} ++EXPORT_SYMBOL_GPL(sram_heap_get_vaddr); + -+/** -+ * kbase_arbif_gpu_idle() - Inform the arbiter that the VM has gone idle -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Informs the arbiter VM is idle -+ */ -+void kbase_arbif_gpu_idle(struct kbase_device *kbdev) ++phys_addr_t sram_heap_get_paddr(struct dma_buf *dmabuf) +{ -+ struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; ++ struct sram_dma_heap_buffer *buffer = dmabuf->priv; + -+ if (arb_if && arb_if->vm_ops.vm_arb_gpu_idle) { -+ dev_dbg(kbdev->dev, "vm_arb_gpu_idle\n"); -+ arb_if->vm_ops.vm_arb_gpu_idle(arb_if); -+ } ++ return buffer->paddr; +} -diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h -new file mode 100644 -index 000000000..701ffd42f ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h -@@ -0,0 +1,121 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++EXPORT_SYMBOL_GPL(sram_heap_get_paddr); + -+/** -+ * DOC: Mali arbiter interface APIs to share GPU between Virtual Machines -+ */ ++static int rk_add_default_sram_heap(void) ++{ ++ struct device_node *np = NULL; ++ struct gen_pool *sram_gp = NULL; ++ int ret = 0; + -+#ifndef _MALI_KBASE_ARBIF_H_ -+#define _MALI_KBASE_ARBIF_H_ ++ np = of_find_compatible_node(NULL, NULL, "rockchip,sram-heap"); ++ if (!np) { ++ pr_info("failed to get device node of sram-heap\n"); ++ return -ENODEV; ++ } + -+/** -+ * enum kbase_arbif_evt - Internal Arbiter event. -+ * -+ * @KBASE_VM_GPU_INITIALIZED_EVT: KBase has finished initializing -+ * and can be stopped -+ * @KBASE_VM_GPU_STOP_EVT: Stop message received from Arbiter -+ * @KBASE_VM_GPU_GRANTED_EVT: Grant message received from Arbiter -+ * @KBASE_VM_GPU_LOST_EVT: Lost message received from Arbiter -+ * @KBASE_VM_GPU_IDLE_EVENT: KBase has transitioned into an inactive state. -+ * @KBASE_VM_REF_EVENT: KBase has transitioned into an active state. -+ * @KBASE_VM_OS_SUSPEND_EVENT: KBase is suspending -+ * @KBASE_VM_OS_RESUME_EVENT: Kbase is resuming -+ */ -+enum kbase_arbif_evt { -+ KBASE_VM_GPU_INITIALIZED_EVT = 1, -+ KBASE_VM_GPU_STOP_EVT, -+ KBASE_VM_GPU_GRANTED_EVT, -+ KBASE_VM_GPU_LOST_EVT, -+ KBASE_VM_GPU_IDLE_EVENT, -+ KBASE_VM_REF_EVENT, -+ KBASE_VM_OS_SUSPEND_EVENT, -+ KBASE_VM_OS_RESUME_EVENT, -+}; ++ if (!of_device_is_available(np)) { ++ of_node_put(np); ++ return ret; ++ } + -+/** -+ * kbase_arbif_init() - Initialize the arbiter interface functionality. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Initialize the arbiter interface and also determines -+ * if Arbiter functionality is required. -+ * -+ * Return: -+ * * 0 - the interface was initialized or was not specified -+ * * in the device tree. -+ * * -EFAULT - the interface was specified but failed to initialize. -+ * * -EPROBE_DEFER - module dependencies are not yet available. -+ */ -+int kbase_arbif_init(struct kbase_device *kbdev); ++ sram_gp = of_gen_pool_get(np, "rockchip,sram", 0); ++ /* release node */ ++ of_node_put(np); ++ if (sram_gp == NULL) { ++ pr_err("sram gen pool is NULL"); ++ return -ENOMEM; ++ } + -+/** -+ * kbase_arbif_destroy() - Cleanups the arbiter interface functionality. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Cleans up the arbiter interface functionality and resets the reference count -+ * of the arbif module used -+ */ -+void kbase_arbif_destroy(struct kbase_device *kbdev); ++ ret = sram_dma_heap_export("sram-heap", sram_gp); + -+/** -+ * kbase_arbif_get_max_config() - Request max config info -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * call back function from arb interface to arbiter requesting max config info -+ */ -+void kbase_arbif_get_max_config(struct kbase_device *kbdev); ++ return ret; ++} ++module_init(rk_add_default_sram_heap); ++MODULE_DESCRIPTION("Rockchip DMA-BUF SRAM Heap"); ++MODULE_LICENSE("GPL"); +diff --git a/drivers/dma-buf/rk_heaps/Kconfig b/drivers/dma-buf/rk_heaps/Kconfig +new file mode 100644 +index 000000000..6ca3fbe76 +--- /dev/null ++++ b/drivers/dma-buf/rk_heaps/Kconfig +@@ -0,0 +1,48 @@ ++# SPDX-License-Identifier: GPL-2.0-only ++menuconfig DMABUF_HEAPS_ROCKCHIP ++ bool "DMA-BUF Userland Memory Heaps for RockChip" ++ select DMA_SHARED_BUFFER ++ help ++ Choose this option to enable the RockChip DMA-BUF userland memory heaps. ++ This options creates per heap chardevs in /dev/rk_dma_heap/ which ++ allows userspace to allocate dma-bufs that can be shared ++ between drivers. + -+/** -+ * kbase_arbif_gpu_request() - Send GPU request message to the arbiter -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Sends a message to Arbiter to request GPU access. -+ */ -+void kbase_arbif_gpu_request(struct kbase_device *kbdev); ++config DMABUF_HEAPS_ROCKCHIP_CMA_HEAP ++ tristate "DMA-BUF RockChip CMA Heap" ++ depends on DMABUF_HEAPS_ROCKCHIP ++ help ++ Choose this option to enable dma-buf RockChip CMA heap. This heap is backed ++ by the Contiguous Memory Allocator (CMA). If your system has these ++ regions, you should say Y here. + -+/** -+ * kbase_arbif_gpu_stopped() - Send GPU stopped message to the arbiter -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @gpu_required: true if GPU access is still required -+ * (Arbiter will automatically send another grant message) -+ * -+ * Sends a message to Arbiter to notify that the GPU has stopped. -+ * @note Once this call has been made, KBase must not attempt to access the GPU -+ * until the #KBASE_VM_GPU_GRANTED_EVT event has been received. -+ */ -+void kbase_arbif_gpu_stopped(struct kbase_device *kbdev, u8 gpu_required); ++config DMABUF_HEAPS_ROCKCHIP_CMA_ALIGNMENT ++ int "Maximum PAGE_SIZE order of alignment for RockChip CMA Heap" ++ range 0 12 ++ depends on DMABUF_HEAPS_ROCKCHIP_CMA_HEAP ++ default 8 ++ help ++ DMA mapping framework by default aligns all buffers to the smallest ++ PAGE_SIZE order which is greater than or equal to the requested buffer ++ size. This works well for buffers up to a few hundreds kilobytes, but ++ for larger buffers it just a memory waste. With this parameter you can ++ specify the maximum PAGE_SIZE order for contiguous buffers. Larger ++ buffers will be aligned only to this specified order. The order is ++ expressed as a power of two multiplied by the PAGE_SIZE. + -+/** -+ * kbase_arbif_gpu_active() - Send a GPU active message to the arbiter -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Sends a message to Arbiter to report that KBase has gone active. -+ */ -+void kbase_arbif_gpu_active(struct kbase_device *kbdev); ++ For example, if your system defaults to 4KiB pages, the order value ++ of 8 means that the buffers will be aligned up to 1MiB only. + -+/** -+ * kbase_arbif_gpu_idle() - Send a GPU idle message to the arbiter -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Sends a message to Arbiter to report that KBase has gone idle. -+ */ -+void kbase_arbif_gpu_idle(struct kbase_device *kbdev); ++ If unsure, leave the default value "8". + -+#endif /* _MALI_KBASE_ARBIF_H_ */ -diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_defs.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_defs.h ++config DMABUF_RK_HEAPS_DEBUG ++ bool "DMA-BUF RockChip Heap Debug" ++ depends on DMABUF_HEAPS_ROCKCHIP ++ help ++ Choose this option to enable dma-buf RockChip heap debug. ++ ++config DMABUF_RK_HEAPS_DEBUG_PRINT ++ bool "DMA-BUF RockChip Heap Debug print log enable" ++ depends on DMABUF_HEAPS_ROCKCHIP ++ help ++ Choose this option to enable dma-buf RockChip heap debug. +diff --git a/drivers/dma-buf/rk_heaps/Makefile b/drivers/dma-buf/rk_heaps/Makefile new file mode 100644 -index 000000000..1c4901b3b +index 000000000..30d44bb7d --- /dev/null -+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_defs.h -@@ -0,0 +1,76 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/dma-buf/rk_heaps/Makefile +@@ -0,0 +1,6 @@ ++# SPDX-License-Identifier: GPL-2.0 ++ ++rk-cma-heap-objs := rk-dma-cma.o rk-cma-heap.o ++ ++obj-$(CONFIG_DMABUF_HEAPS_ROCKCHIP) += rk-dma-heap.o ++obj-$(CONFIG_DMABUF_HEAPS_ROCKCHIP_CMA_HEAP) += rk-cma-heap.o +diff --git a/drivers/dma-buf/rk_heaps/rk-cma-heap.c b/drivers/dma-buf/rk_heaps/rk-cma-heap.c +new file mode 100644 +index 000000000..5b063f87b +--- /dev/null ++++ b/drivers/dma-buf/rk_heaps/rk-cma-heap.c +@@ -0,0 +1,687 @@ ++// SPDX-License-Identifier: GPL-2.0 +/* ++ * DMABUF CMA heap exporter + * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * Copyright (C) 2012, 2019, 2020 Linaro Ltd. ++ * Author: for ST-Ericsson. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * Also utilizing parts of Andrew Davis' SRAM heap: ++ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com/ ++ * Andrew F. Davis + * ++ * Copyright (C) 2022 Rockchip Electronics Co. Ltd. ++ * Author: Simon Xue + */ + -+/** -+ * DOC: Mali structures define to support arbitration feature -+ */ -+ -+#ifndef _MALI_KBASE_ARBITER_DEFS_H_ -+#define _MALI_KBASE_ARBITER_DEFS_H_ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "../../../mm/cma.h" ++#include "rk-dma-heap.h" + -+#include "mali_kbase_arbiter_pm.h" ++struct rk_cma_heap { ++ struct rk_dma_heap *heap; ++ struct cma *cma; ++}; + -+/** -+ * struct kbase_arbiter_vm_state - Struct representing the state and containing the -+ * data of pm work -+ * @kbdev: Pointer to kbase device structure (must be a valid pointer) -+ * @vm_state_lock: The lock protecting the VM state when arbiter is used. -+ * This lock must also be held whenever the VM state is being -+ * transitioned -+ * @vm_state_wait: Wait queue set when GPU is granted -+ * @vm_state: Current state of VM -+ * @vm_arb_wq: Work queue for resuming or stopping work on the GPU for use -+ * with the Arbiter -+ * @vm_suspend_work: Work item for vm_arb_wq to stop current work on GPU -+ * @vm_resume_work: Work item for vm_arb_wq to resume current work on GPU -+ * @vm_arb_starting: Work queue resume in progress -+ * @vm_arb_stopping: Work queue suspend in progress -+ * @interrupts_installed: Flag set when interrupts are installed -+ * @vm_request_timer: Timer to monitor GPU request -+ */ -+struct kbase_arbiter_vm_state { -+ struct kbase_device *kbdev; -+ struct mutex vm_state_lock; -+ wait_queue_head_t vm_state_wait; -+ enum kbase_vm_state vm_state; -+ struct workqueue_struct *vm_arb_wq; -+ struct work_struct vm_suspend_work; -+ struct work_struct vm_resume_work; -+ bool vm_arb_starting; -+ bool vm_arb_stopping; -+ bool interrupts_installed; -+ struct hrtimer vm_request_timer; ++struct rk_cma_heap_buffer { ++ struct rk_cma_heap *heap; ++ struct list_head attachments; ++ struct mutex lock; ++ unsigned long len; ++ struct page *cma_pages; ++ struct page **pages; ++ pgoff_t pagecount; ++ int vmap_cnt; ++ void *vaddr; ++ phys_addr_t phys; ++ bool attached; +}; + -+/** -+ * struct kbase_arbiter_device - Representing an instance of arbiter device, -+ * allocated from the probe method of Mali driver -+ * @arb_if: Pointer to the arbiter interface device -+ * @arb_dev: Pointer to the arbiter device -+ * @arb_freq: GPU clock frequency retrieved from arbiter. -+ */ -+struct kbase_arbiter_device { -+ struct arbiter_if_dev *arb_if; -+ struct device *arb_dev; -+ struct kbase_arbiter_freq arb_freq; ++struct rk_cma_heap_attachment { ++ struct device *dev; ++ struct sg_table table; ++ struct list_head list; ++ bool mapped; +}; + -+#endif /* _MALI_KBASE_ARBITER_DEFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c -new file mode 100644 -index 000000000..667552c56 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c -@@ -0,0 +1,1138 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++static int rk_cma_heap_attach(struct dma_buf *dmabuf, ++ struct dma_buf_attachment *attachment) ++{ ++ struct rk_cma_heap_buffer *buffer = dmabuf->priv; ++ struct rk_cma_heap_attachment *a; ++ struct sg_table *table; ++ size_t size = buffer->pagecount << PAGE_SHIFT; ++ int ret; + -+/** -+ * DOC: Mali arbiter power manager state machine and APIs -+ */ ++ a = kzalloc(sizeof(*a), GFP_KERNEL); ++ if (!a) ++ return -ENOMEM; + -+#include -+#include -+#include -+#include -+#include -+#include ++ table = &a->table; + -+/* A dmesg warning will occur if the GPU is not granted -+ * after the following time (in milliseconds) has ellapsed. -+ */ -+#define GPU_REQUEST_TIMEOUT 1000 -+#define KHZ_TO_HZ 1000 ++ ret = sg_alloc_table(table, 1, GFP_KERNEL); ++ if (ret) { ++ kfree(a); ++ return ret; ++ } ++ sg_set_page(table->sgl, buffer->cma_pages, PAGE_ALIGN(size), 0); + -+#define MAX_L2_SLICES_MASK 0xFF ++ a->dev = attachment->dev; ++ INIT_LIST_HEAD(&a->list); ++ a->mapped = false; + -+/* Maximum time in ms, before deferring probe incase -+ * GPU_GRANTED message is not received -+ */ -+static int gpu_req_timeout = 1; -+module_param(gpu_req_timeout, int, 0644); -+MODULE_PARM_DESC(gpu_req_timeout, -+ "On a virtualized platform, if the GPU is not granted within this time(ms) kbase will defer the probe"); ++ attachment->priv = a; + -+static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev); -+static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( -+ struct kbase_device *kbdev); ++ buffer->attached = true; + -+/** -+ * kbase_arbiter_pm_vm_state_str() - Helper function to get string -+ * for kbase VM state.(debug) -+ * @state: kbase VM state -+ * -+ * Return: string representation of Kbase_vm_state -+ */ -+static inline const char *kbase_arbiter_pm_vm_state_str( -+ enum kbase_vm_state state) -+{ -+ switch (state) { -+ case KBASE_VM_STATE_INITIALIZING: -+ return "KBASE_VM_STATE_INITIALIZING"; -+ case KBASE_VM_STATE_INITIALIZING_WITH_GPU: -+ return "KBASE_VM_STATE_INITIALIZING_WITH_GPU"; -+ case KBASE_VM_STATE_SUSPENDED: -+ return "KBASE_VM_STATE_SUSPENDED"; -+ case KBASE_VM_STATE_STOPPED: -+ return "KBASE_VM_STATE_STOPPED"; -+ case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: -+ return "KBASE_VM_STATE_STOPPED_GPU_REQUESTED"; -+ case KBASE_VM_STATE_STARTING: -+ return "KBASE_VM_STATE_STARTING"; -+ case KBASE_VM_STATE_IDLE: -+ return "KBASE_VM_STATE_IDLE"; -+ case KBASE_VM_STATE_ACTIVE: -+ return "KBASE_VM_STATE_ACTIVE"; -+ case KBASE_VM_STATE_STOPPING_IDLE: -+ return "KBASE_VM_STATE_STOPPING_IDLE"; -+ case KBASE_VM_STATE_STOPPING_ACTIVE: -+ return "KBASE_VM_STATE_STOPPING_ACTIVE"; -+ case KBASE_VM_STATE_SUSPEND_PENDING: -+ return "KBASE_VM_STATE_SUSPEND_PENDING"; -+ case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: -+ return "KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT"; -+ default: -+ KBASE_DEBUG_ASSERT(false); -+ return "[UnknownState]"; -+ } ++ mutex_lock(&buffer->lock); ++ list_add(&a->list, &buffer->attachments); ++ mutex_unlock(&buffer->lock); ++ ++ return 0; +} + -+/** -+ * kbase_arbiter_pm_vm_event_str() - Helper function to get string -+ * for kbase VM event.(debug) -+ * @evt: kbase VM state -+ * -+ * Return: String representation of Kbase_arbif_event -+ */ -+static inline const char *kbase_arbiter_pm_vm_event_str( -+ enum kbase_arbif_evt evt) ++static void rk_cma_heap_detach(struct dma_buf *dmabuf, ++ struct dma_buf_attachment *attachment) +{ -+ switch (evt) { -+ case KBASE_VM_GPU_INITIALIZED_EVT: -+ return "KBASE_VM_GPU_INITIALIZED_EVT"; -+ case KBASE_VM_GPU_STOP_EVT: -+ return "KBASE_VM_GPU_STOP_EVT"; -+ case KBASE_VM_GPU_GRANTED_EVT: -+ return "KBASE_VM_GPU_GRANTED_EVT"; -+ case KBASE_VM_GPU_LOST_EVT: -+ return "KBASE_VM_GPU_LOST_EVT"; -+ case KBASE_VM_OS_SUSPEND_EVENT: -+ return "KBASE_VM_OS_SUSPEND_EVENT"; -+ case KBASE_VM_OS_RESUME_EVENT: -+ return "KBASE_VM_OS_RESUME_EVENT"; -+ case KBASE_VM_GPU_IDLE_EVENT: -+ return "KBASE_VM_GPU_IDLE_EVENT"; -+ case KBASE_VM_REF_EVENT: -+ return "KBASE_VM_REF_EVENT"; -+ default: -+ KBASE_DEBUG_ASSERT(false); -+ return "[UnknownEvent]"; -+ } ++ struct rk_cma_heap_buffer *buffer = dmabuf->priv; ++ struct rk_cma_heap_attachment *a = attachment->priv; ++ ++ mutex_lock(&buffer->lock); ++ list_del(&a->list); ++ mutex_unlock(&buffer->lock); ++ ++ buffer->attached = false; ++ ++ sg_free_table(&a->table); ++ kfree(a); +} + -+/** -+ * kbase_arbiter_pm_vm_set_state() - Sets new kbase_arbiter_vm_state -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @new_state: kbase VM new state -+ * -+ * This function sets the new state for the VM -+ */ -+static void kbase_arbiter_pm_vm_set_state(struct kbase_device *kbdev, -+ enum kbase_vm_state new_state) ++static struct sg_table *rk_cma_heap_map_dma_buf(struct dma_buf_attachment *attachment, ++ enum dma_data_direction direction) +{ -+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; -+ -+ dev_dbg(kbdev->dev, "VM set_state %s -> %s", -+ kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state), -+ kbase_arbiter_pm_vm_state_str(new_state)); ++ struct rk_cma_heap_attachment *a = attachment->priv; ++ struct sg_table *table = &a->table; ++ int ret; + -+ lockdep_assert_held(&arb_vm_state->vm_state_lock); -+ arb_vm_state->vm_state = new_state; -+ if (new_state != KBASE_VM_STATE_INITIALIZING_WITH_GPU && -+ new_state != KBASE_VM_STATE_INITIALIZING) -+ KBASE_KTRACE_ADD(kbdev, ARB_VM_STATE, NULL, new_state); -+ wake_up(&arb_vm_state->vm_state_wait); ++ ret = dma_map_sgtable(attachment->dev, table, direction, 0); ++ if (ret) ++ return ERR_PTR(-ENOMEM); ++ a->mapped = true; ++ return table; +} + -+/** -+ * kbase_arbiter_pm_suspend_wq() - suspend work queue of the driver. -+ * @data: work queue -+ * -+ * Suspends work queue of the driver, when VM is in SUSPEND_PENDING or -+ * STOPPING_IDLE or STOPPING_ACTIVE state -+ */ -+static void kbase_arbiter_pm_suspend_wq(struct work_struct *data) ++static void rk_cma_heap_unmap_dma_buf(struct dma_buf_attachment *attachment, ++ struct sg_table *table, ++ enum dma_data_direction direction) +{ -+ struct kbase_arbiter_vm_state *arb_vm_state = container_of(data, -+ struct kbase_arbiter_vm_state, -+ vm_suspend_work); -+ struct kbase_device *kbdev = arb_vm_state->kbdev; ++ struct rk_cma_heap_attachment *a = attachment->priv; + -+ mutex_lock(&arb_vm_state->vm_state_lock); -+ dev_dbg(kbdev->dev, ">%s\n", __func__); -+ if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE || -+ arb_vm_state->vm_state == -+ KBASE_VM_STATE_STOPPING_ACTIVE || -+ arb_vm_state->vm_state == -+ KBASE_VM_STATE_SUSPEND_PENDING) { -+ mutex_unlock(&arb_vm_state->vm_state_lock); -+ dev_dbg(kbdev->dev, ">kbase_pm_driver_suspend\n"); -+ kbase_pm_driver_suspend(kbdev); -+ dev_dbg(kbdev->dev, "vm_state_lock); -+ } -+ mutex_unlock(&arb_vm_state->vm_state_lock); -+ dev_dbg(kbdev->dev, "<%s\n", __func__); ++ a->mapped = false; ++ dma_unmap_sgtable(attachment->dev, table, direction, 0); +} + -+/** -+ * kbase_arbiter_pm_resume_wq() -Kbase resume work queue. -+ * @data: work item -+ * -+ * Resume work queue of the driver when VM is in STARTING state, -+ * else if its in STOPPING_ACTIVE will request a stop event. -+ */ -+static void kbase_arbiter_pm_resume_wq(struct work_struct *data) ++static int ++rk_cma_heap_dma_buf_begin_cpu_access_partial(struct dma_buf *dmabuf, ++ enum dma_data_direction direction, ++ unsigned int offset, ++ unsigned int len) +{ -+ struct kbase_arbiter_vm_state *arb_vm_state = container_of(data, -+ struct kbase_arbiter_vm_state, -+ vm_resume_work); -+ struct kbase_device *kbdev = arb_vm_state->kbdev; ++ struct rk_cma_heap_buffer *buffer = dmabuf->priv; ++ struct rk_cma_heap_attachment *a; + -+ mutex_lock(&arb_vm_state->vm_state_lock); -+ dev_dbg(kbdev->dev, ">%s\n", __func__); -+ arb_vm_state->vm_arb_starting = true; -+ if (arb_vm_state->vm_state == KBASE_VM_STATE_STARTING) { -+ mutex_unlock(&arb_vm_state->vm_state_lock); -+ dev_dbg(kbdev->dev, ">kbase_pm_driver_resume\n"); -+ kbase_pm_driver_resume(kbdev, true); -+ dev_dbg(kbdev->dev, "vm_state_lock); -+ } else if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_ACTIVE) { -+ kbase_arbiter_pm_vm_stopped(kbdev); ++ if (buffer->vmap_cnt) ++ invalidate_kernel_vmap_range(buffer->vaddr, buffer->len); ++ ++ mutex_lock(&buffer->lock); ++ list_for_each_entry(a, &buffer->attachments, list) { ++ if (!a->mapped) ++ continue; ++ dma_sync_sgtable_for_cpu(a->dev, &a->table, direction); + } -+ arb_vm_state->vm_arb_starting = false; -+ mutex_unlock(&arb_vm_state->vm_state_lock); -+ KBASE_TLSTREAM_TL_ARBITER_STARTED(kbdev, kbdev); -+ dev_dbg(kbdev->dev, "<%s\n", __func__); ++ ++ /* For userspace that not attach yet */ ++ if (buffer->phys && !buffer->attached) ++ dma_sync_single_for_cpu(rk_dma_heap_get_dev(buffer->heap->heap), ++ buffer->phys + offset, ++ len, ++ direction); ++ mutex_unlock(&buffer->lock); ++ ++ return 0; +} + -+/** -+ * request_timer_callback() - Issue warning on request timer expiration -+ * @timer: Request hr timer data -+ * -+ * Called when the Arbiter takes too long to grant the GPU after a -+ * request has been made. Issues a warning in dmesg. -+ * -+ * Return: Always returns HRTIMER_NORESTART -+ */ -+static enum hrtimer_restart request_timer_callback(struct hrtimer *timer) ++static int ++rk_cma_heap_dma_buf_end_cpu_access_partial(struct dma_buf *dmabuf, ++ enum dma_data_direction direction, ++ unsigned int offset, ++ unsigned int len) +{ -+ struct kbase_arbiter_vm_state *arb_vm_state = container_of(timer, -+ struct kbase_arbiter_vm_state, vm_request_timer); ++ struct rk_cma_heap_buffer *buffer = dmabuf->priv; ++ struct rk_cma_heap_attachment *a; + -+ KBASE_DEBUG_ASSERT(arb_vm_state); -+ KBASE_DEBUG_ASSERT(arb_vm_state->kbdev); ++ if (buffer->vmap_cnt) ++ flush_kernel_vmap_range(buffer->vaddr, buffer->len); + -+ dev_warn(arb_vm_state->kbdev->dev, -+ "Still waiting for GPU to be granted from Arbiter after %d ms\n", -+ GPU_REQUEST_TIMEOUT); -+ return HRTIMER_NORESTART; ++ mutex_lock(&buffer->lock); ++ list_for_each_entry(a, &buffer->attachments, list) { ++ if (!a->mapped) ++ continue; ++ dma_sync_sgtable_for_device(a->dev, &a->table, direction); ++ } ++ ++ /* For userspace that not attach yet */ ++ if (buffer->phys && !buffer->attached) ++ dma_sync_single_for_device(rk_dma_heap_get_dev(buffer->heap->heap), ++ buffer->phys + offset, ++ len, ++ direction); ++ mutex_unlock(&buffer->lock); ++ ++ return 0; +} + -+/** -+ * start_request_timer() - Start a timer after requesting GPU -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Start a timer to track when kbase is waiting for the GPU from the -+ * Arbiter. If the timer expires before GPU is granted, a warning in -+ * dmesg will be issued. -+ */ -+static void start_request_timer(struct kbase_device *kbdev) ++static int rk_cma_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, ++ enum dma_data_direction dir) +{ -+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ struct rk_cma_heap_buffer *buffer = dmabuf->priv; ++ unsigned int len = buffer->pagecount * PAGE_SIZE; + -+ hrtimer_start(&arb_vm_state->vm_request_timer, -+ HR_TIMER_DELAY_MSEC(GPU_REQUEST_TIMEOUT), -+ HRTIMER_MODE_REL); ++ return rk_cma_heap_dma_buf_begin_cpu_access_partial(dmabuf, dir, 0, len); +} + -+/** -+ * cancel_request_timer() - Stop the request timer -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Stops the request timer once GPU has been granted. Safe to call -+ * even if timer is no longer running. -+ */ -+static void cancel_request_timer(struct kbase_device *kbdev) ++static int rk_cma_heap_dma_buf_end_cpu_access(struct dma_buf *dmabuf, ++ enum dma_data_direction dir) +{ -+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ struct rk_cma_heap_buffer *buffer = dmabuf->priv; ++ unsigned int len = buffer->pagecount * PAGE_SIZE; + -+ hrtimer_cancel(&arb_vm_state->vm_request_timer); ++ return rk_cma_heap_dma_buf_end_cpu_access_partial(dmabuf, dir, 0, len); +} + -+/** -+ * kbase_arbiter_pm_early_init() - Initialize arbiter for VM -+ * Paravirtualized use. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Initialize the arbiter and other required resources during the runtime -+ * and request the GPU for the VM for the first time. -+ * -+ * Return: 0 if success, or a Linux error code -+ */ -+int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) ++static int rk_cma_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) +{ -+ int err; -+ struct kbase_arbiter_vm_state *arb_vm_state = NULL; ++ struct rk_cma_heap_buffer *buffer = dmabuf->priv; ++ size_t size = vma->vm_end - vma->vm_start; ++ int ret; + -+ arb_vm_state = kmalloc(sizeof(struct kbase_arbiter_vm_state), -+ GFP_KERNEL); -+ if (arb_vm_state == NULL) -+ return -ENOMEM; ++ ret = remap_pfn_range(vma, vma->vm_start, __phys_to_pfn(buffer->phys), ++ size, vma->vm_page_prot); ++ if (ret) ++ return -EAGAIN; + -+ arb_vm_state->kbdev = kbdev; -+ arb_vm_state->vm_state = KBASE_VM_STATE_INITIALIZING; ++ return 0; ++} + -+ mutex_init(&arb_vm_state->vm_state_lock); -+ init_waitqueue_head(&arb_vm_state->vm_state_wait); -+ arb_vm_state->vm_arb_wq = alloc_ordered_workqueue("kbase_vm_arb_wq", -+ WQ_HIGHPRI); -+ if (!arb_vm_state->vm_arb_wq) { -+ dev_err(kbdev->dev, "Failed to allocate vm_arb workqueue\n"); -+ kfree(arb_vm_state); -+ return -ENOMEM; -+ } -+ INIT_WORK(&arb_vm_state->vm_suspend_work, kbase_arbiter_pm_suspend_wq); -+ INIT_WORK(&arb_vm_state->vm_resume_work, kbase_arbiter_pm_resume_wq); -+ arb_vm_state->vm_arb_starting = false; -+ atomic_set(&kbdev->pm.gpu_users_waiting, 0); -+ hrtimer_init(&arb_vm_state->vm_request_timer, CLOCK_MONOTONIC, -+ HRTIMER_MODE_REL); -+ arb_vm_state->vm_request_timer.function = -+ request_timer_callback; -+ kbdev->pm.arb_vm_state = arb_vm_state; -+ -+ err = kbase_arbif_init(kbdev); -+ if (err) { -+ dev_err(kbdev->dev, "Failed to initialise arbif module\n"); -+ goto arbif_init_fail; -+ } ++static void *rk_cma_heap_do_vmap(struct rk_cma_heap_buffer *buffer) ++{ ++ void *vaddr; ++ pgprot_t pgprot = PAGE_KERNEL; + -+ if (kbdev->arb.arb_if) { -+ kbase_arbif_gpu_request(kbdev); -+ dev_dbg(kbdev->dev, "Waiting for initial GPU assignment...\n"); ++ vaddr = vmap(buffer->pages, buffer->pagecount, VM_MAP, pgprot); ++ if (!vaddr) ++ return ERR_PTR(-ENOMEM); + -+ err = wait_event_timeout(arb_vm_state->vm_state_wait, -+ arb_vm_state->vm_state == -+ KBASE_VM_STATE_INITIALIZING_WITH_GPU, -+ msecs_to_jiffies(gpu_req_timeout)); ++ return vaddr; ++} + -+ if (!err) { -+ dev_dbg(kbdev->dev, -+ "Kbase probe Deferred after waiting %d ms to receive GPU_GRANT\n", -+ gpu_req_timeout); ++static int rk_cma_heap_vmap(struct dma_buf *dmabuf, struct iosys_map *map) ++{ ++ struct rk_cma_heap_buffer *buffer = dmabuf->priv; ++ void *vaddr; ++ int ret = 0; + -+ err = -ENODEV; -+ goto arbif_timeout; -+ } ++ mutex_lock(&buffer->lock); ++ if (buffer->vmap_cnt) { ++ buffer->vmap_cnt++; ++ iosys_map_set_vaddr(map, buffer->vaddr); ++ goto out; ++ } + -+ dev_dbg(kbdev->dev, -+ "Waiting for initial GPU assignment - done\n"); ++ vaddr = rk_cma_heap_do_vmap(buffer); ++ if (IS_ERR(vaddr)) { ++ ret = PTR_ERR(vaddr); ++ goto out; + } -+ return 0; + -+arbif_timeout: -+ kbase_arbiter_pm_early_term(kbdev); -+ return err; ++ buffer->vaddr = vaddr; ++ buffer->vmap_cnt++; ++ iosys_map_set_vaddr(map, buffer->vaddr); ++out: ++ mutex_unlock(&buffer->lock); + -+arbif_init_fail: -+ destroy_workqueue(arb_vm_state->vm_arb_wq); -+ kfree(arb_vm_state); -+ kbdev->pm.arb_vm_state = NULL; -+ return err; ++ return ret; +} + -+/** -+ * kbase_arbiter_pm_early_term() - Shutdown arbiter and free resources -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Clean up all the resources -+ */ -+void kbase_arbiter_pm_early_term(struct kbase_device *kbdev) ++static void rk_cma_heap_vunmap(struct dma_buf *dmabuf, struct iosys_map *map) +{ -+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ struct rk_cma_heap_buffer *buffer = dmabuf->priv; + -+ cancel_request_timer(kbdev); -+ mutex_lock(&arb_vm_state->vm_state_lock); -+ if (arb_vm_state->vm_state > KBASE_VM_STATE_STOPPED_GPU_REQUESTED) { -+ kbase_pm_set_gpu_lost(kbdev, false); -+ kbase_arbif_gpu_stopped(kbdev, false); ++ mutex_lock(&buffer->lock); ++ if (!--buffer->vmap_cnt) { ++ vunmap(buffer->vaddr); ++ buffer->vaddr = NULL; + } -+ mutex_unlock(&arb_vm_state->vm_state_lock); -+ destroy_workqueue(arb_vm_state->vm_arb_wq); -+ kbase_arbif_destroy(kbdev); -+ arb_vm_state->vm_arb_wq = NULL; -+ kfree(kbdev->pm.arb_vm_state); -+ kbdev->pm.arb_vm_state = NULL; ++ mutex_unlock(&buffer->lock); ++ iosys_map_clear(map); +} + -+/** -+ * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Releases interrupts and set the interrupt flag to false -+ */ -+void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev) ++static void rk_cma_heap_remove_dmabuf_list(struct dma_buf *dmabuf) +{ -+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ struct rk_dma_heap_dmabuf *buf; ++ struct rk_cma_heap_buffer *buffer = dmabuf->priv; ++ struct rk_cma_heap *cma_heap = buffer->heap; ++ struct rk_dma_heap *heap = cma_heap->heap; + -+ mutex_lock(&arb_vm_state->vm_state_lock); -+ if (arb_vm_state->interrupts_installed == true) { -+ arb_vm_state->interrupts_installed = false; -+ kbase_release_interrupts(kbdev); ++ mutex_lock(&heap->dmabuf_lock); ++ list_for_each_entry(buf, &heap->dmabuf_list, node) { ++ if (buf->dmabuf == dmabuf) { ++ dma_heap_print("<%s> free dmabuf@[%pa-%pa] to heap-<%s>\n", ++ dmabuf->name, ++ dmabuf->file->f_inode->i_ino, ++ &buf->start, &buf->end, ++ rk_dma_heap_get_name(heap)); ++ list_del(&buf->node); ++ kfree(buf); ++ break; ++ } + } -+ mutex_unlock(&arb_vm_state->vm_state_lock); ++ mutex_unlock(&heap->dmabuf_lock); +} + -+/** -+ * kbase_arbiter_pm_install_interrupts() - Install the GPU interrupts -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Install interrupts and set the interrupt_install flag to true. -+ * -+ * Return: 0 if success, or a Linux error code -+ */ -+int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev) ++static int rk_cma_heap_add_dmabuf_list(struct dma_buf *dmabuf, const char *name) +{ -+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; -+ int err; ++ struct rk_dma_heap_dmabuf *buf; ++ struct rk_cma_heap_buffer *buffer = dmabuf->priv; ++ struct rk_cma_heap *cma_heap = buffer->heap; ++ struct rk_dma_heap *heap = cma_heap->heap; + -+ mutex_lock(&arb_vm_state->vm_state_lock); -+ arb_vm_state->interrupts_installed = true; -+ err = kbase_install_interrupts(kbdev); -+ mutex_unlock(&arb_vm_state->vm_state_lock); -+ return err; ++ buf = kzalloc(sizeof(*buf), GFP_KERNEL); ++ if (!buf) ++ return -ENOMEM; ++ ++ INIT_LIST_HEAD(&buf->node); ++ buf->dmabuf = dmabuf; ++ buf->start = buffer->phys; ++ buf->end = buf->start + buffer->len - 1; ++ mutex_lock(&heap->dmabuf_lock); ++ list_add_tail(&buf->node, &heap->dmabuf_list); ++ mutex_unlock(&heap->dmabuf_lock); ++ ++ dma_heap_print("<%s> alloc dmabuf@[%pa-%pa] from heap-<%s>\n", ++ dmabuf->name, dmabuf->file->f_inode->i_ino, ++ &buf->start, &buf->end, rk_dma_heap_get_name(heap)); ++ ++ return 0; +} + -+/** -+ * kbase_arbiter_pm_vm_stopped() - Handle stop state for the VM -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Handles a stop state for the VM -+ */ -+void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) ++static int rk_cma_heap_remove_contig_list(struct rk_dma_heap *heap, ++ struct page *page, const char *name) +{ -+ bool request_gpu = false; -+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ struct rk_dma_heap_contig_buf *buf; + -+ lockdep_assert_held(&arb_vm_state->vm_state_lock); ++ mutex_lock(&heap->contig_lock); ++ list_for_each_entry(buf, &heap->contig_list, node) { ++ if (buf->start == page_to_phys(page)) { ++ dma_heap_print("<%s> free contig-buf@[%pa-%pa] to heap-<%s>\n", ++ buf->orig_alloc, &buf->start, &buf->end, ++ rk_dma_heap_get_name(heap)); ++ list_del(&buf->node); ++ kfree(buf->orig_alloc); ++ kfree(buf); ++ break; ++ } ++ } ++ mutex_unlock(&heap->contig_lock); + -+ if (atomic_read(&kbdev->pm.gpu_users_waiting) > 0 && -+ arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE) -+ kbase_arbiter_pm_vm_set_state(kbdev, -+ KBASE_VM_STATE_STOPPING_ACTIVE); ++ return 0; ++} + -+ dev_dbg(kbdev->dev, "%s %s\n", __func__, -+ kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); ++static int rk_cma_heap_add_contig_list(struct rk_dma_heap *heap, ++ struct page *page, unsigned long size, ++ const char *name) ++{ ++ struct rk_dma_heap_contig_buf *buf; ++ const char *name_tmp; + -+ if (arb_vm_state->interrupts_installed) { -+ arb_vm_state->interrupts_installed = false; -+ kbase_release_interrupts(kbdev); -+ } ++ buf = kzalloc(sizeof(*buf), GFP_KERNEL); ++ if (!buf) ++ return -ENOMEM; + -+ switch (arb_vm_state->vm_state) { -+ case KBASE_VM_STATE_STOPPING_ACTIVE: -+ request_gpu = true; -+ kbase_arbiter_pm_vm_set_state(kbdev, -+ KBASE_VM_STATE_STOPPED_GPU_REQUESTED); -+ break; -+ case KBASE_VM_STATE_STOPPING_IDLE: -+ kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPED); -+ break; -+ case KBASE_VM_STATE_SUSPEND_PENDING: -+ kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); -+ break; -+ default: -+ dev_warn(kbdev->dev, "unexpected pm_stop VM state %u", -+ arb_vm_state->vm_state); -+ break; ++ INIT_LIST_HEAD(&buf->node); ++ if (!name) ++ name_tmp = current->comm; ++ else ++ name_tmp = name; ++ ++ buf->orig_alloc = kstrndup(name_tmp, RK_DMA_HEAP_NAME_LEN, GFP_KERNEL); ++ if (!buf->orig_alloc) { ++ kfree(buf); ++ return -ENOMEM; + } + -+ kbase_pm_set_gpu_lost(kbdev, false); -+ kbase_arbif_gpu_stopped(kbdev, request_gpu); -+ if (request_gpu) -+ start_request_timer(kbdev); ++ buf->start = page_to_phys(page); ++ buf->end = buf->start + size - 1; ++ mutex_lock(&heap->contig_lock); ++ list_add_tail(&buf->node, &heap->contig_list); ++ mutex_unlock(&heap->contig_lock); ++ ++ dma_heap_print("<%s> alloc contig-buf@[%pa-%pa] from heap-<%s>\n", ++ buf->orig_alloc, &buf->start, &buf->end, ++ rk_dma_heap_get_name(heap)); ++ ++ return 0; +} + -+void kbase_arbiter_set_max_config(struct kbase_device *kbdev, -+ uint32_t max_l2_slices, -+ uint32_t max_core_mask) ++static void rk_cma_heap_dma_buf_release(struct dma_buf *dmabuf) +{ -+ struct kbase_arbiter_vm_state *arb_vm_state; -+ struct max_config_props max_config; ++ struct rk_cma_heap_buffer *buffer = dmabuf->priv; ++ struct rk_cma_heap *cma_heap = buffer->heap; ++ struct rk_dma_heap *heap = cma_heap->heap; + -+ if (!kbdev) -+ return; ++ if (buffer->vmap_cnt > 0) { ++ WARN(1, "%s: buffer still mapped in the kernel\n", __func__); ++ vunmap(buffer->vaddr); ++ } + -+ /* Mask the max_l2_slices as it is stored as 8 bits into kbase */ -+ max_config.l2_slices = max_l2_slices & MAX_L2_SLICES_MASK; -+ max_config.core_mask = max_core_mask; -+ arb_vm_state = kbdev->pm.arb_vm_state; ++ rk_cma_heap_remove_dmabuf_list(dmabuf); + -+ mutex_lock(&arb_vm_state->vm_state_lock); -+ /* Just set the max_props in kbase during initialization. */ -+ if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING) -+ kbase_gpuprops_set_max_config(kbdev, &max_config); -+ else -+ dev_dbg(kbdev->dev, "Unexpected max_config on VM state %s", -+ kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); ++ /* free page list */ ++ kfree(buffer->pages); ++ /* release memory */ ++ cma_release(cma_heap->cma, buffer->cma_pages, buffer->pagecount); ++ rk_dma_heap_total_dec(heap, buffer->len); + -+ mutex_unlock(&arb_vm_state->vm_state_lock); ++ kfree(buffer); +} + -+int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev) ++static const struct dma_buf_ops rk_cma_heap_buf_ops = { ++ .cache_sgt_mapping = true, ++ .attach = rk_cma_heap_attach, ++ .detach = rk_cma_heap_detach, ++ .map_dma_buf = rk_cma_heap_map_dma_buf, ++ .unmap_dma_buf = rk_cma_heap_unmap_dma_buf, ++ .begin_cpu_access = rk_cma_heap_dma_buf_begin_cpu_access, ++ .end_cpu_access = rk_cma_heap_dma_buf_end_cpu_access, ++#ifdef CONFIG_DMABUF_PARTIAL ++ .begin_cpu_access_partial = rk_cma_heap_dma_buf_begin_cpu_access_partial, ++ .end_cpu_access_partial = rk_cma_heap_dma_buf_end_cpu_access_partial, ++#endif ++ .mmap = rk_cma_heap_mmap, ++ .vmap = rk_cma_heap_vmap, ++ .vunmap = rk_cma_heap_vunmap, ++ .release = rk_cma_heap_dma_buf_release, ++}; ++ ++static struct dma_buf *rk_cma_heap_allocate(struct rk_dma_heap *heap, ++ unsigned long len, ++ unsigned long fd_flags, ++ unsigned long heap_flags, ++ const char *name) +{ -+ struct kbase_arbiter_vm_state *arb_vm_state; -+ int result = -EINVAL; ++ struct rk_cma_heap *cma_heap = rk_dma_heap_get_drvdata(heap); ++ struct rk_cma_heap_buffer *buffer; ++ DEFINE_DMA_BUF_EXPORT_INFO(exp_info); ++ size_t size = PAGE_ALIGN(len); ++ pgoff_t pagecount = size >> PAGE_SHIFT; ++ unsigned long align = get_order(size); ++ struct page *cma_pages; ++ struct dma_buf *dmabuf; ++ pgoff_t pg; ++ int ret = -ENOMEM; + -+ if (!kbdev) -+ return result; ++ buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); ++ if (!buffer) ++ return ERR_PTR(-ENOMEM); + -+ /* First check the GPU_LOST state */ -+ kbase_pm_lock(kbdev); -+ if (kbase_pm_is_gpu_lost(kbdev)) { -+ kbase_pm_unlock(kbdev); -+ return 0; -+ } -+ kbase_pm_unlock(kbdev); ++ INIT_LIST_HEAD(&buffer->attachments); ++ mutex_init(&buffer->lock); ++ buffer->len = size; + -+ /* Then the arbitration state machine */ -+ arb_vm_state = kbdev->pm.arb_vm_state; ++ if (align > CONFIG_DMABUF_HEAPS_ROCKCHIP_CMA_ALIGNMENT) ++ align = CONFIG_DMABUF_HEAPS_ROCKCHIP_CMA_ALIGNMENT; + -+ mutex_lock(&arb_vm_state->vm_state_lock); -+ switch (arb_vm_state->vm_state) { -+ case KBASE_VM_STATE_INITIALIZING: -+ case KBASE_VM_STATE_SUSPENDED: -+ case KBASE_VM_STATE_STOPPED: -+ case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: -+ case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: -+ result = 0; -+ break; -+ default: -+ result = 1; -+ break; -+ } -+ mutex_unlock(&arb_vm_state->vm_state_lock); ++ cma_pages = cma_alloc(cma_heap->cma, pagecount, align, GFP_KERNEL); ++ if (!cma_pages) ++ goto free_buffer; + -+ return result; -+} ++ /* Clear the cma pages */ ++ if (PageHighMem(cma_pages)) { ++ unsigned long nr_clear_pages = pagecount; ++ struct page *page = cma_pages; + -+/** -+ * kbase_arbiter_pm_vm_gpu_start() - Handles the start state of the VM -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Handles the start state of the VM -+ */ -+static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) -+{ -+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; -+ bool freq_updated = false; ++ while (nr_clear_pages > 0) { ++ void *vaddr = kmap_atomic(page); + -+ lockdep_assert_held(&arb_vm_state->vm_state_lock); -+ mutex_lock(&kbdev->arb.arb_freq.arb_freq_lock); -+ if (kbdev->arb.arb_freq.freq_updated) { -+ kbdev->arb.arb_freq.freq_updated = false; -+ freq_updated = true; ++ memset(vaddr, 0, PAGE_SIZE); ++ kunmap_atomic(vaddr); ++ /* ++ * Avoid wasting time zeroing memory if the process ++ * has been killed by SIGKILL ++ */ ++ if (fatal_signal_pending(current)) ++ goto free_cma; ++ page++; ++ nr_clear_pages--; ++ } ++ } else { ++ memset(page_address(cma_pages), 0, size); + } -+ mutex_unlock(&kbdev->arb.arb_freq.arb_freq_lock); + -+ cancel_request_timer(kbdev); -+ switch (arb_vm_state->vm_state) { -+ case KBASE_VM_STATE_INITIALIZING: -+ kbase_arbiter_pm_vm_set_state(kbdev, -+ KBASE_VM_STATE_INITIALIZING_WITH_GPU); -+ break; -+ case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: -+ kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STARTING); -+ arb_vm_state->interrupts_installed = true; -+ kbase_install_interrupts(kbdev); -+ /* -+ * GPU GRANTED received while in stop can be a result of a -+ * repartitioning. -+ */ -+ kbase_gpuprops_req_curr_config_update(kbdev); -+ /* curr_config will be updated while resuming the PM. */ -+ queue_work(arb_vm_state->vm_arb_wq, -+ &arb_vm_state->vm_resume_work); -+ break; -+ case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: -+ kbase_pm_set_gpu_lost(kbdev, false); -+ kbase_arbif_gpu_stopped(kbdev, false); -+ kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); -+ break; -+ default: -+ /* -+ * GPU_GRANTED can be received when there is a frequency update -+ * Only show a warning if received in an unexpected state -+ * without a frequency update -+ */ -+ if (!freq_updated) -+ dev_warn(kbdev->dev, -+ "GPU_GRANTED when not expected - state %s\n", -+ kbase_arbiter_pm_vm_state_str( -+ arb_vm_state->vm_state)); -+ break; ++ buffer->pages = kmalloc_array(pagecount, sizeof(*buffer->pages), ++ GFP_KERNEL); ++ if (!buffer->pages) { ++ ret = -ENOMEM; ++ goto free_cma; + } -+} + -+/** -+ * kbase_arbiter_pm_vm_gpu_stop() - Handles the stop state of the VM -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Handles the start state of the VM -+ */ -+static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev) -+{ -+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ for (pg = 0; pg < pagecount; pg++) ++ buffer->pages[pg] = &cma_pages[pg]; + -+ lockdep_assert_held(&arb_vm_state->vm_state_lock); -+ if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING_WITH_GPU) { -+ mutex_unlock(&arb_vm_state->vm_state_lock); -+ kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev); -+ mutex_lock(&arb_vm_state->vm_state_lock); -+ } ++ buffer->cma_pages = cma_pages; ++ buffer->heap = cma_heap; ++ buffer->pagecount = pagecount; + -+ switch (arb_vm_state->vm_state) { -+ case KBASE_VM_STATE_IDLE: -+ kbase_arbiter_pm_vm_set_state(kbdev, -+ KBASE_VM_STATE_STOPPING_IDLE); -+ queue_work(arb_vm_state->vm_arb_wq, -+ &arb_vm_state->vm_suspend_work); -+ break; -+ case KBASE_VM_STATE_ACTIVE: -+ kbase_arbiter_pm_vm_set_state(kbdev, -+ KBASE_VM_STATE_STOPPING_ACTIVE); -+ queue_work(arb_vm_state->vm_arb_wq, -+ &arb_vm_state->vm_suspend_work); -+ break; -+ case KBASE_VM_STATE_STARTING: -+ dev_dbg(kbdev->dev, "Got GPU_STOP event while STARTING."); -+ kbase_arbiter_pm_vm_set_state(kbdev, -+ KBASE_VM_STATE_STOPPING_ACTIVE); -+ if (arb_vm_state->vm_arb_starting) -+ queue_work(arb_vm_state->vm_arb_wq, -+ &arb_vm_state->vm_suspend_work); -+ break; -+ case KBASE_VM_STATE_SUSPEND_PENDING: -+ /* Suspend finishes with a stop so nothing else to do */ -+ break; -+ default: -+ dev_warn(kbdev->dev, "GPU_STOP when not expected - state %s\n", -+ kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); -+ break; ++ /* create the dmabuf */ ++ exp_info.exp_name = rk_dma_heap_get_name(heap); ++ exp_info.ops = &rk_cma_heap_buf_ops; ++ exp_info.size = buffer->len; ++ exp_info.flags = fd_flags; ++ exp_info.priv = buffer; ++ dmabuf = dma_buf_export(&exp_info); ++ if (IS_ERR(dmabuf)) { ++ ret = PTR_ERR(dmabuf); ++ goto free_pages; + } -+} + -+/** -+ * kbase_gpu_lost() - Kbase signals GPU is lost on a lost event signal -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * On GPU lost event signals GPU_LOST to the aribiter -+ */ -+static void kbase_gpu_lost(struct kbase_device *kbdev) -+{ -+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; -+ bool handle_gpu_lost = false; ++ buffer->phys = page_to_phys(cma_pages); ++ dma_sync_single_for_cpu(rk_dma_heap_get_dev(heap), buffer->phys, ++ buffer->pagecount * PAGE_SIZE, ++ DMA_FROM_DEVICE); + -+ lockdep_assert_held(&arb_vm_state->vm_state_lock); ++ ret = rk_cma_heap_add_dmabuf_list(dmabuf, name); ++ if (ret) ++ goto fail_dma_buf; + -+ switch (arb_vm_state->vm_state) { -+ case KBASE_VM_STATE_STARTING: -+ case KBASE_VM_STATE_ACTIVE: -+ case KBASE_VM_STATE_IDLE: -+ dev_warn(kbdev->dev, "GPU lost in state %s", -+ kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); -+ kbase_arbiter_pm_vm_gpu_stop(kbdev); -+ handle_gpu_lost = true; -+ break; -+ case KBASE_VM_STATE_STOPPING_IDLE: -+ case KBASE_VM_STATE_STOPPING_ACTIVE: -+ case KBASE_VM_STATE_SUSPEND_PENDING: -+ dev_dbg(kbdev->dev, "GPU lost while stopping"); -+ handle_gpu_lost = true; -+ break; -+ case KBASE_VM_STATE_SUSPENDED: -+ case KBASE_VM_STATE_STOPPED: -+ case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: -+ dev_dbg(kbdev->dev, "GPU lost while already stopped"); -+ break; -+ case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: -+ dev_dbg(kbdev->dev, "GPU lost while waiting to suspend"); -+ kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); -+ break; -+ default: -+ break; -+ } -+ if (handle_gpu_lost) { -+ /* Releasing the VM state lock here is safe because -+ * we are guaranteed to be in either STOPPING_IDLE, -+ * STOPPING_ACTIVE or SUSPEND_PENDING at this point. -+ * The only transitions that are valid from here are to -+ * STOPPED, STOPPED_GPU_REQUESTED or SUSPENDED which can -+ * only happen at the completion of the GPU lost handling. -+ */ -+ mutex_unlock(&arb_vm_state->vm_state_lock); -+ kbase_pm_handle_gpu_lost(kbdev); -+ mutex_lock(&arb_vm_state->vm_state_lock); -+ } -+} ++ rk_dma_heap_total_inc(heap, buffer->len); + -+/** -+ * kbase_arbiter_pm_vm_os_suspend_ready_state() - checks if VM is ready -+ * to be moved to suspended state. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Return: True if its ready to be suspended else False. -+ */ -+static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state( -+ struct kbase_device *kbdev) -+{ -+ switch (kbdev->pm.arb_vm_state->vm_state) { -+ case KBASE_VM_STATE_SUSPENDED: -+ case KBASE_VM_STATE_STOPPED: -+ case KBASE_VM_STATE_IDLE: -+ case KBASE_VM_STATE_ACTIVE: -+ return true; -+ default: -+ return false; -+ } ++ return dmabuf; ++ ++fail_dma_buf: ++ dma_buf_put(dmabuf); ++free_pages: ++ kfree(buffer->pages); ++free_cma: ++ cma_release(cma_heap->cma, cma_pages, pagecount); ++free_buffer: ++ kfree(buffer); ++ ++ return ERR_PTR(ret); +} + -+/** -+ * kbase_arbiter_pm_vm_os_prepare_suspend() - Prepare OS to be in suspend state -+ * until it receives the grant message from arbiter -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Prepares OS to be in suspend state until it receives GRANT message -+ * from Arbiter asynchronously. -+ */ -+static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev) ++static struct page *rk_cma_heap_allocate_pages(struct rk_dma_heap *heap, ++ size_t len, const char *name) +{ -+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; -+ enum kbase_vm_state prev_state; ++ struct rk_cma_heap *cma_heap = rk_dma_heap_get_drvdata(heap); ++ size_t size = PAGE_ALIGN(len); ++ pgoff_t pagecount = size >> PAGE_SHIFT; ++ unsigned long align = get_order(size); ++ struct page *page; ++ int ret; + -+ lockdep_assert_held(&arb_vm_state->vm_state_lock); -+ if (kbdev->arb.arb_if) { -+ if (kbdev->pm.arb_vm_state->vm_state == -+ KBASE_VM_STATE_SUSPENDED) -+ return; -+ } -+ /* Block suspend OS function until we are in a stable state -+ * with vm_state_lock -+ */ -+ while (!kbase_arbiter_pm_vm_os_suspend_ready_state(kbdev)) { -+ prev_state = arb_vm_state->vm_state; -+ switch (arb_vm_state->vm_state) { -+ case KBASE_VM_STATE_STOPPING_ACTIVE: -+ case KBASE_VM_STATE_STOPPING_IDLE: -+ kbase_arbiter_pm_vm_set_state(kbdev, -+ KBASE_VM_STATE_SUSPEND_PENDING); -+ break; -+ case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: -+ kbase_arbiter_pm_vm_set_state(kbdev, -+ KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT); -+ break; -+ case KBASE_VM_STATE_STARTING: -+ if (!arb_vm_state->vm_arb_starting) { -+ kbase_arbiter_pm_vm_set_state(kbdev, -+ KBASE_VM_STATE_SUSPEND_PENDING); -+ kbase_arbiter_pm_vm_stopped(kbdev); -+ } -+ break; -+ default: -+ break; -+ } -+ mutex_unlock(&arb_vm_state->vm_state_lock); -+ wait_event(arb_vm_state->vm_state_wait, -+ arb_vm_state->vm_state != prev_state); -+ mutex_lock(&arb_vm_state->vm_state_lock); -+ } ++ if (align > CONFIG_DMABUF_HEAPS_ROCKCHIP_CMA_ALIGNMENT) ++ align = CONFIG_DMABUF_HEAPS_ROCKCHIP_CMA_ALIGNMENT; + -+ switch (arb_vm_state->vm_state) { -+ case KBASE_VM_STATE_STOPPED: -+ kbase_arbiter_pm_vm_set_state(kbdev, -+ KBASE_VM_STATE_SUSPENDED); -+ break; -+ case KBASE_VM_STATE_IDLE: -+ case KBASE_VM_STATE_ACTIVE: -+ kbase_arbiter_pm_vm_set_state(kbdev, -+ KBASE_VM_STATE_SUSPEND_PENDING); -+ mutex_unlock(&arb_vm_state->vm_state_lock); -+ /* Ensure resume has completed fully before starting suspend */ -+ flush_work(&arb_vm_state->vm_resume_work); -+ kbase_pm_driver_suspend(kbdev); -+ mutex_lock(&arb_vm_state->vm_state_lock); -+ break; -+ case KBASE_VM_STATE_SUSPENDED: -+ break; -+ default: -+ KBASE_DEBUG_ASSERT_MSG(false, "Unexpected state to suspend"); -+ break; ++ page = cma_alloc(cma_heap->cma, pagecount, align, GFP_KERNEL); ++ if (!page) ++ return ERR_PTR(-ENOMEM); ++ ++ ret = rk_cma_heap_add_contig_list(heap, page, size, name); ++ if (ret) { ++ cma_release(cma_heap->cma, page, pagecount); ++ return ERR_PTR(-EINVAL); + } ++ ++ rk_dma_heap_total_inc(heap, size); ++ ++ return page; +} + -+/** -+ * kbase_arbiter_pm_vm_os_resume() - Resume OS function once it receives -+ * a grant message from arbiter -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Resume OS function once it receives GRANT message -+ * from Arbiter asynchronously. -+ */ -+static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev) ++static void rk_cma_heap_free_pages(struct rk_dma_heap *heap, ++ struct page *page, size_t len, ++ const char *name) +{ -+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ struct rk_cma_heap *cma_heap = rk_dma_heap_get_drvdata(heap); ++ pgoff_t pagecount = len >> PAGE_SHIFT; + -+ lockdep_assert_held(&arb_vm_state->vm_state_lock); -+ KBASE_DEBUG_ASSERT_MSG(arb_vm_state->vm_state == -+ KBASE_VM_STATE_SUSPENDED, -+ "Unexpected state to resume"); ++ rk_cma_heap_remove_contig_list(heap, page, name); + -+ kbase_arbiter_pm_vm_set_state(kbdev, -+ KBASE_VM_STATE_STOPPED_GPU_REQUESTED); -+ kbase_arbif_gpu_request(kbdev); -+ start_request_timer(kbdev); ++ cma_release(cma_heap->cma, page, pagecount); + -+ /* Release lock and block resume OS function until we have -+ * asynchronously received the GRANT message from the Arbiter and -+ * fully resumed -+ */ -+ mutex_unlock(&arb_vm_state->vm_state_lock); -+ kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev); -+ flush_work(&arb_vm_state->vm_resume_work); -+ mutex_lock(&arb_vm_state->vm_state_lock); ++ rk_dma_heap_total_dec(heap, len); +} + -+/** -+ * kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @evt: VM event -+ * -+ * The state machine function. Receives events and transitions states -+ * according the event received and the current state -+ */ -+void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, -+ enum kbase_arbif_evt evt) -+{ -+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++static const struct rk_dma_heap_ops rk_cma_heap_ops = { ++ .allocate = rk_cma_heap_allocate, ++ .alloc_contig_pages = rk_cma_heap_allocate_pages, ++ .free_contig_pages = rk_cma_heap_free_pages, ++}; + -+ if (!kbdev->arb.arb_if) -+ return; ++static int cma_procfs_show(struct seq_file *s, void *private); + -+ mutex_lock(&arb_vm_state->vm_state_lock); -+ dev_dbg(kbdev->dev, "%s %s\n", __func__, -+ kbase_arbiter_pm_vm_event_str(evt)); -+ if (arb_vm_state->vm_state != KBASE_VM_STATE_INITIALIZING_WITH_GPU && -+ arb_vm_state->vm_state != KBASE_VM_STATE_INITIALIZING) -+ KBASE_KTRACE_ADD(kbdev, ARB_VM_EVT, NULL, evt); -+ switch (evt) { -+ case KBASE_VM_GPU_GRANTED_EVT: -+ kbase_arbiter_pm_vm_gpu_start(kbdev); -+ break; -+ case KBASE_VM_GPU_STOP_EVT: -+ kbase_arbiter_pm_vm_gpu_stop(kbdev); -+ break; -+ case KBASE_VM_GPU_LOST_EVT: -+ dev_dbg(kbdev->dev, "KBASE_ARBIF_GPU_LOST_EVT!"); -+ kbase_gpu_lost(kbdev); -+ break; -+ case KBASE_VM_OS_SUSPEND_EVENT: -+ kbase_arbiter_pm_vm_os_prepare_suspend(kbdev); -+ break; -+ case KBASE_VM_OS_RESUME_EVENT: -+ kbase_arbiter_pm_vm_os_resume(kbdev); -+ break; -+ case KBASE_VM_GPU_IDLE_EVENT: -+ switch (arb_vm_state->vm_state) { -+ case KBASE_VM_STATE_ACTIVE: -+ kbase_arbiter_pm_vm_set_state(kbdev, -+ KBASE_VM_STATE_IDLE); -+ kbase_arbif_gpu_idle(kbdev); -+ break; -+ default: -+ break; -+ } -+ break; ++static int __rk_add_cma_heap(struct cma *cma, void *data) ++{ ++ struct rk_cma_heap *cma_heap; ++ struct rk_dma_heap_export_info exp_info; + -+ case KBASE_VM_REF_EVENT: -+ switch (arb_vm_state->vm_state) { -+ case KBASE_VM_STATE_STARTING: -+ case KBASE_VM_STATE_IDLE: -+ kbase_arbiter_pm_vm_set_state(kbdev, -+ KBASE_VM_STATE_ACTIVE); -+ kbase_arbif_gpu_active(kbdev); -+ break; -+ case KBASE_VM_STATE_STOPPING_IDLE: -+ kbase_arbiter_pm_vm_set_state(kbdev, -+ KBASE_VM_STATE_STOPPING_ACTIVE); -+ break; -+ default: -+ break; -+ } -+ break; ++ cma_heap = kzalloc(sizeof(*cma_heap), GFP_KERNEL); ++ if (!cma_heap) ++ return -ENOMEM; ++ cma_heap->cma = cma; + -+ case KBASE_VM_GPU_INITIALIZED_EVT: -+ switch (arb_vm_state->vm_state) { -+ case KBASE_VM_STATE_INITIALIZING_WITH_GPU: -+ lockdep_assert_held(&kbdev->pm.lock); -+ if (kbdev->pm.active_count > 0) { -+ kbase_arbiter_pm_vm_set_state(kbdev, -+ KBASE_VM_STATE_ACTIVE); -+ kbase_arbif_gpu_active(kbdev); -+ } else { -+ kbase_arbiter_pm_vm_set_state(kbdev, -+ KBASE_VM_STATE_IDLE); -+ kbase_arbif_gpu_idle(kbdev); -+ } -+ break; -+ default: -+ break; -+ } -+ break; ++ exp_info.name = cma_get_name(cma); ++ exp_info.ops = &rk_cma_heap_ops; ++ exp_info.priv = cma_heap; ++ exp_info.support_cma = true; + -+ default: -+ dev_alert(kbdev->dev, "Got Unknown Event!"); -+ break; -+ } -+ mutex_unlock(&arb_vm_state->vm_state_lock); -+} ++ cma_heap->heap = rk_dma_heap_add(&exp_info); ++ if (IS_ERR(cma_heap->heap)) { ++ int ret = PTR_ERR(cma_heap->heap); + -+KBASE_EXPORT_TEST_API(kbase_arbiter_pm_vm_event); ++ kfree(cma_heap); ++ return ret; ++ } + -+/** -+ * kbase_arbiter_pm_vm_wait_gpu_assignment() - VM wait for a GPU assignment. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * VM waits for a GPU assignment. -+ */ -+static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev) -+{ -+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ if (cma_heap->heap->procfs) ++ proc_create_single_data("alloc_bitmap", 0, cma_heap->heap->procfs, ++ cma_procfs_show, cma); + -+ dev_dbg(kbdev->dev, "Waiting for GPU assignment...\n"); -+ wait_event(arb_vm_state->vm_state_wait, -+ arb_vm_state->vm_state == KBASE_VM_STATE_IDLE || -+ arb_vm_state->vm_state == KBASE_VM_STATE_ACTIVE); -+ dev_dbg(kbdev->dev, "Waiting for GPU assignment - done\n"); ++ return 0; +} + -+/** -+ * kbase_arbiter_pm_vm_gpu_assigned_lockheld() - Check if VM holds VM state lock -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Checks if the virtual machine holds VM state lock. -+ * -+ * Return: true if GPU is assigned, else false. -+ */ -+static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( -+ struct kbase_device *kbdev) ++static int __init rk_add_default_cma_heap(void) +{ -+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; -+ -+ lockdep_assert_held(&arb_vm_state->vm_state_lock); -+ return (arb_vm_state->vm_state == KBASE_VM_STATE_IDLE || -+ arb_vm_state->vm_state == KBASE_VM_STATE_ACTIVE); -+} ++ struct cma *cma = rk_dma_heap_get_cma(); + -+/** -+ * kbase_arbiter_pm_ctx_active_handle_suspend() - Handle suspend operation for -+ * arbitration mode -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @suspend_handler: The handler code for how to handle a suspend -+ * that might occur -+ * -+ * This function handles a suspend event from the driver, -+ * communicating with the arbiter and waiting synchronously for the GPU -+ * to be granted again depending on the VM state. -+ * -+ * Return: 0 on success else 1 suspend handler isn not possible. -+ */ -+int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, -+ enum kbase_pm_suspend_handler suspend_handler) -+{ -+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; -+ int res = 0; ++ if (WARN_ON(!cma)) ++ return -EINVAL; + -+ if (kbdev->arb.arb_if) { -+ mutex_lock(&arb_vm_state->vm_state_lock); -+ while (!kbase_arbiter_pm_vm_gpu_assigned_lockheld(kbdev)) { -+ /* Update VM state since we have GPU work to do */ -+ if (arb_vm_state->vm_state == -+ KBASE_VM_STATE_STOPPING_IDLE) -+ kbase_arbiter_pm_vm_set_state(kbdev, -+ KBASE_VM_STATE_STOPPING_ACTIVE); -+ else if (arb_vm_state->vm_state == -+ KBASE_VM_STATE_STOPPED) { -+ kbase_arbiter_pm_vm_set_state(kbdev, -+ KBASE_VM_STATE_STOPPED_GPU_REQUESTED); -+ kbase_arbif_gpu_request(kbdev); -+ start_request_timer(kbdev); -+ } else if (arb_vm_state->vm_state == -+ KBASE_VM_STATE_INITIALIZING_WITH_GPU) -+ break; ++ return __rk_add_cma_heap(cma, NULL); ++} + -+ if (suspend_handler != -+ KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE) { ++#if defined(CONFIG_VIDEO_ROCKCHIP_THUNDER_BOOT_ISP) && !defined(CONFIG_INITCALL_ASYNC) ++subsys_initcall(rk_add_default_cma_heap); ++#else ++module_init(rk_add_default_cma_heap); ++#endif + -+ /* In case of GPU lost, even if -+ * active_count > 0, we no longer have GPU -+ * access -+ */ -+ if (kbase_pm_is_gpu_lost(kbdev)) -+ res = 1; ++static void cma_procfs_format_array(char *buf, size_t bufsize, u32 *array, int array_size) ++{ ++ int i = 0; + -+ switch (suspend_handler) { -+ case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE: -+ res = 1; -+ break; -+ case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE: -+ if (kbdev->pm.active_count == 0) -+ res = 1; -+ break; -+ case KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED: -+ break; -+ default: -+ WARN(1, "Unknown suspend_handler\n"); -+ res = 1; -+ break; -+ } -+ break; -+ } ++ while (--array_size >= 0) { ++ size_t len; ++ char term = (array_size && (++i % 8)) ? ' ' : '\n'; + -+ /* Need to synchronously wait for GPU assignment */ -+ atomic_inc(&kbdev->pm.gpu_users_waiting); -+ mutex_unlock(&arb_vm_state->vm_state_lock); -+ kbase_pm_unlock(kbdev); -+ kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev); -+ kbase_pm_lock(kbdev); -+ mutex_lock(&arb_vm_state->vm_state_lock); -+ atomic_dec(&kbdev->pm.gpu_users_waiting); -+ } -+ mutex_unlock(&arb_vm_state->vm_state_lock); ++ len = snprintf(buf, bufsize, "%08X%c", *array++, term); ++ buf += len; ++ bufsize -= len; + } -+ return res; +} + -+/** -+ * kbase_arbiter_pm_update_gpu_freq() - Updates GPU clock frequency received -+ * from arbiter. -+ * @arb_freq: Pointer to struchture holding GPU clock frequenecy data -+ * @freq: New frequency value in KHz -+ */ -+void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, -+ uint32_t freq) ++static void cma_procfs_show_bitmap(struct seq_file *s, struct cma *cma) +{ -+ struct kbase_gpu_clk_notifier_data ndata; ++ int elements = DIV_ROUND_UP(cma_bitmap_maxno(cma), BITS_PER_BYTE * sizeof(u32)); ++ int size = elements * 9; ++ u32 *array = (u32 *)cma->bitmap; ++ char *buf; + -+ mutex_lock(&arb_freq->arb_freq_lock); -+ if (arb_freq->arb_freq != freq) { -+ ndata.new_rate = (unsigned long)freq * KHZ_TO_HZ; -+ ndata.old_rate = (unsigned long)arb_freq->arb_freq * KHZ_TO_HZ; -+ ndata.gpu_clk_handle = arb_freq; -+ arb_freq->arb_freq = freq; -+ arb_freq->freq_updated = true; -+ if (arb_freq->nb) -+ arb_freq->nb->notifier_call(arb_freq->nb, -+ POST_RATE_CHANGE, &ndata); -+ } ++ buf = kmalloc(size + 1, GFP_KERNEL); ++ if (!buf) ++ return; + -+ mutex_unlock(&arb_freq->arb_freq_lock); -+} ++ buf[size] = 0; + -+/** -+ * get_arb_gpu_clk() - Enumerate a GPU clock on the given index -+ * @kbdev: kbase_device pointer -+ * @index: GPU clock index -+ * -+ * Return: Pointer to structure holding GPU clock frequency data reported from -+ * arbiter, only index 0 is valid. -+ */ -+static void *get_arb_gpu_clk(struct kbase_device *kbdev, -+ unsigned int index) -+{ -+ if (index == 0) -+ return &kbdev->arb.arb_freq; -+ return NULL; ++ cma_procfs_format_array(buf, size + 1, array, elements); ++ seq_printf(s, "%s", buf); ++ kfree(buf); +} + -+/** -+ * get_arb_gpu_clk_rate() - Get the current rate of GPU clock frequency value -+ * @kbdev: kbase_device pointer -+ * @gpu_clk_handle: Handle unique to the enumerated GPU clock -+ * -+ * Return: The GPU clock frequency value saved when gpu is granted from arbiter -+ */ -+static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev, -+ void *gpu_clk_handle) ++static u64 cma_procfs_used_get(struct cma *cma) +{ -+ uint32_t freq; -+ struct kbase_arbiter_freq *arb_dev_freq = -+ (struct kbase_arbiter_freq *) gpu_clk_handle; ++ unsigned long flags; ++ unsigned long used; + -+ mutex_lock(&arb_dev_freq->arb_freq_lock); -+ /* Convert from KHz to Hz */ -+ freq = arb_dev_freq->arb_freq * KHZ_TO_HZ; -+ mutex_unlock(&arb_dev_freq->arb_freq_lock); -+ return freq; ++ spin_lock_irqsave(&cma->lock, flags); ++ used = bitmap_weight(cma->bitmap, (int)cma_bitmap_maxno(cma)); ++ spin_unlock_irqrestore(&cma->lock, flags); ++ ++ return (u64)used << cma->order_per_bit; +} + -+/** -+ * arb_gpu_clk_notifier_register() - Register a clock rate change notifier. -+ * @kbdev: kbase_device pointer -+ * @gpu_clk_handle: Handle unique to the enumerated GPU clock -+ * @nb: notifier block containing the callback function pointer -+ * -+ * This function registers a callback function that is invoked whenever the -+ * frequency of the clock corresponding to @gpu_clk_handle changes. -+ * -+ * Return: 0 on success, negative error code otherwise. -+ */ -+static int arb_gpu_clk_notifier_register(struct kbase_device *kbdev, -+ void *gpu_clk_handle, struct notifier_block *nb) ++static int cma_procfs_show(struct seq_file *s, void *private) +{ -+ int ret = 0; -+ struct kbase_arbiter_freq *arb_dev_freq = -+ (struct kbase_arbiter_freq *)gpu_clk_handle; ++ struct cma *cma = s->private; ++ u64 used = cma_procfs_used_get(cma); + -+ if (!arb_dev_freq->nb) -+ arb_dev_freq->nb = nb; -+ else -+ ret = -EBUSY; ++ seq_printf(s, "Total: %lu KiB\n", cma->count << (PAGE_SHIFT - 10)); ++ seq_printf(s, " Used: %llu KiB\n\n", used << (PAGE_SHIFT - 10)); + -+ return ret; -+} ++ cma_procfs_show_bitmap(s, cma); + -+/** -+ * arb_gpu_clk_notifier_unregister() - Unregister clock rate change notifier -+ * @kbdev: kbase_device pointer -+ * @gpu_clk_handle: Handle unique to the enumerated GPU clock -+ * @nb: notifier block containing the callback function pointer -+ * -+ * This function pointer is used to unregister a callback function that -+ * was previously registered to get notified of a frequency change of the -+ * clock corresponding to @gpu_clk_handle. -+ */ -+static void arb_gpu_clk_notifier_unregister(struct kbase_device *kbdev, -+ void *gpu_clk_handle, struct notifier_block *nb) -+{ -+ struct kbase_arbiter_freq *arb_dev_freq = -+ (struct kbase_arbiter_freq *)gpu_clk_handle; -+ if (arb_dev_freq->nb == nb) { -+ arb_dev_freq->nb = NULL; -+ } else { -+ dev_err(kbdev->dev, "%s - notifier did not match\n", -+ __func__); -+ } ++ return 0; +} + -+struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops = { -+ .get_gpu_clk_rate = get_arb_gpu_clk_rate, -+ .enumerate_gpu_clk = get_arb_gpu_clk, -+ .gpu_clk_notifier_register = arb_gpu_clk_notifier_register, -+ .gpu_clk_notifier_unregister = arb_gpu_clk_notifier_unregister -+}; -diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h ++MODULE_DESCRIPTION("RockChip DMA-BUF CMA Heap"); ++MODULE_LICENSE("GPL v2"); +diff --git a/drivers/dma-buf/rk_heaps/rk-dma-cma.c b/drivers/dma-buf/rk_heaps/rk-dma-cma.c new file mode 100644 -index 000000000..f863f8860 +index 000000000..b6521f7dc --- /dev/null -+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h -@@ -0,0 +1,196 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/dma-buf/rk_heaps/rk-dma-cma.c +@@ -0,0 +1,77 @@ ++// SPDX-License-Identifier: GPL-2.0 +/* ++ * Early setup for Rockchip DMA CMA + * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+/** -+ * DOC: Mali arbiter power manager state machine and APIs -+ */ -+ -+#ifndef _MALI_KBASE_ARBITER_PM_H_ -+#define _MALI_KBASE_ARBITER_PM_H_ -+ -+#include "mali_kbase_arbif.h" -+ -+/** -+ * enum kbase_vm_state - Current PM Arbitration state. -+ * -+ * @KBASE_VM_STATE_INITIALIZING: Special state before arbiter is initialized. -+ * @KBASE_VM_STATE_INITIALIZING_WITH_GPU: Initialization after GPU -+ * has been granted. -+ * @KBASE_VM_STATE_SUSPENDED: KBase is suspended by OS and GPU is not assigned. -+ * @KBASE_VM_STATE_STOPPED: GPU is not assigned to KBase and is not required. -+ * @KBASE_VM_STATE_STOPPED_GPU_REQUESTED: GPU is not assigned to KBase -+ * but a request has been made. -+ * @KBASE_VM_STATE_STARTING: GPU is assigned and KBase is getting ready to run. -+ * @KBASE_VM_STATE_IDLE: GPU is assigned but KBase has no work to do -+ * @KBASE_VM_STATE_ACTIVE: GPU is assigned and KBase is busy using it -+ * @KBASE_VM_STATE_SUSPEND_PENDING: OS is going into suspend mode. -+ * @KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: OS is going into suspend mode but GPU -+ * has already been requested. -+ * In this situation we must wait for -+ * the Arbiter to send a GRANTED message -+ * and respond immediately with -+ * a STOPPED message before entering -+ * the suspend mode. -+ * @KBASE_VM_STATE_STOPPING_IDLE: Arbiter has sent a stopped message and there -+ * is currently no work to do on the GPU. -+ * @KBASE_VM_STATE_STOPPING_ACTIVE: Arbiter has sent a stopped message when -+ * KBase has work to do. -+ */ -+enum kbase_vm_state { -+ KBASE_VM_STATE_INITIALIZING, -+ KBASE_VM_STATE_INITIALIZING_WITH_GPU, -+ KBASE_VM_STATE_SUSPENDED, -+ KBASE_VM_STATE_STOPPED, -+ KBASE_VM_STATE_STOPPED_GPU_REQUESTED, -+ KBASE_VM_STATE_STARTING, -+ KBASE_VM_STATE_IDLE, -+ KBASE_VM_STATE_ACTIVE, -+ KBASE_VM_STATE_SUSPEND_PENDING, -+ KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT, -+ KBASE_VM_STATE_STOPPING_IDLE, -+ KBASE_VM_STATE_STOPPING_ACTIVE -+}; -+ -+/** -+ * kbase_arbiter_pm_early_init() - Initialize arbiter for VM Paravirtualized use -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Initialize the arbiter and other required resources during the runtime -+ * and request the GPU for the VM for the first time. -+ * -+ * Return: 0 if successful, otherwise a standard Linux error code -+ */ -+int kbase_arbiter_pm_early_init(struct kbase_device *kbdev); -+ -+/** -+ * kbase_arbiter_pm_early_term() - Shutdown arbiter and free resources. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Clean up all the resources -+ */ -+void kbase_arbiter_pm_early_term(struct kbase_device *kbdev); -+ -+/** -+ * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Releases interrupts and set the interrupt flag to false -+ */ -+void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev); -+ -+/** -+ * kbase_arbiter_pm_install_interrupts() - Install the GPU interrupts -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Install interrupts and set the interrupt_install flag to true. -+ * -+ * Return: 0 if success, or a Linux error code ++ * Copyright (C) 2022 Rockchip Electronics Co. Ltd. ++ * Author: Simon Xue + */ -+int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev); + -+/** -+ * kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @event: The event to dispatch -+ * -+ * The state machine function. Receives events and transitions states -+ * according the event received and the current state -+ */ -+void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, -+ enum kbase_arbif_evt event); ++#include ++#include + -+/** -+ * kbase_arbiter_pm_ctx_active_handle_suspend() - Handle suspend operation for -+ * arbitration mode -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @suspend_handler: The handler code for how to handle a suspend -+ * that might occur -+ * -+ * This function handles a suspend event from the driver, -+ * communicating with the arbiter and waiting synchronously for the GPU -+ * to be granted again depending on the VM state. -+ * -+ * Return: 0 if success, 1 if failure due to system suspending/suspended -+ */ -+int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, -+ enum kbase_pm_suspend_handler suspend_handler); ++#include "rk-dma-heap.h" + ++#define RK_DMA_HEAP_CMA_DEFAULT_SIZE SZ_32M + -+/** -+ * kbase_arbiter_pm_vm_stopped() - Handle stop event for the VM -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This function handles a stop event for the VM. -+ * It will update the VM state and forward the stop event to the driver. -+ */ -+void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev); ++static unsigned long rk_dma_heap_size __initdata; ++static unsigned long rk_dma_heap_base __initdata; + -+/** -+ * kbase_arbiter_set_max_config() - Set the max config data in kbase device. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer). -+ * @max_l2_slices: The maximum number of L2 slices. -+ * @max_core_mask: The largest core mask. -+ * -+ * This function handles a stop event for the VM. -+ * It will update the VM state and forward the stop event to the driver. -+ */ -+void kbase_arbiter_set_max_config(struct kbase_device *kbdev, -+ uint32_t max_l2_slices, -+ uint32_t max_core_mask); ++static struct cma *rk_dma_heap_cma; + -+/** -+ * kbase_arbiter_pm_gpu_assigned() - Determine if this VM has access to the GPU -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Return: 0 if the VM does not have access, 1 if it does, and a negative number -+ * if an error occurred -+ */ -+int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev); ++static int __init early_dma_heap_cma(char *p) ++{ ++ if (!p) { ++ pr_err("Config string not provided\n"); ++ return -EINVAL; ++ } + -+extern struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops; ++ rk_dma_heap_size = memparse(p, &p); ++ if (*p != '@') ++ return 0; + -+/** -+ * struct kbase_arbiter_freq - Holding the GPU clock frequency data retrieved -+ * from arbiter -+ * @arb_freq: GPU clock frequency value -+ * @arb_freq_lock: Mutex protecting access to arbfreq value -+ * @nb: Notifier block to receive rate change callbacks -+ * @freq_updated: Flag to indicate whether a frequency changed has just been -+ * communicated to avoid "GPU_GRANTED when not expected" warning -+ */ -+struct kbase_arbiter_freq { -+ uint32_t arb_freq; -+ struct mutex arb_freq_lock; -+ struct notifier_block *nb; -+ bool freq_updated; -+}; ++ rk_dma_heap_base = memparse(p + 1, &p); + -+/** -+ * kbase_arbiter_pm_update_gpu_freq() - Update GPU frequency -+ * @arb_freq: Pointer to GPU clock frequency data -+ * @freq: The new frequency -+ * -+ * Updates the GPU frequency and triggers any notifications -+ */ -+void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, -+ uint32_t freq); ++ return 0; ++} ++early_param("rk_dma_heap_cma", early_dma_heap_cma); + -+#endif /*_MALI_KBASE_ARBITER_PM_H_ */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild -new file mode 100755 -index 000000000..efebc8a54 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild -@@ -0,0 +1,58 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++#ifndef CONFIG_DMA_CMA ++void __weak ++dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) ++{ ++} ++#endif + -+bifrost_kbase-y += \ -+ backend/gpu/mali_kbase_cache_policy_backend.o \ -+ backend/gpu/mali_kbase_gpuprops_backend.o \ -+ backend/gpu/mali_kbase_irq_linux.o \ -+ backend/gpu/mali_kbase_js_backend.o \ -+ backend/gpu/mali_kbase_pm_backend.o \ -+ backend/gpu/mali_kbase_pm_driver.o \ -+ backend/gpu/mali_kbase_pm_metrics.o \ -+ backend/gpu/mali_kbase_pm_ca.o \ -+ backend/gpu/mali_kbase_pm_always_on.o \ -+ backend/gpu/mali_kbase_pm_coarse_demand.o \ -+ backend/gpu/mali_kbase_pm_policy.o \ -+ backend/gpu/mali_kbase_time.o \ -+ backend/gpu/mali_kbase_l2_mmu_config.o \ -+ backend/gpu/mali_kbase_clk_rate_trace_mgr.o ++int __init rk_dma_heap_cma_setup(void) ++{ ++ unsigned long size; ++ int ret; ++ bool fix = false; + -+ifeq ($(MALI_USE_CSF),0) -+ bifrost_kbase-y += \ -+ backend/gpu/mali_kbase_instr_backend.o \ -+ backend/gpu/mali_kbase_jm_as.o \ -+ backend/gpu/mali_kbase_debug_job_fault_backend.o \ -+ backend/gpu/mali_kbase_jm_hw.o \ -+ backend/gpu/mali_kbase_jm_rb.o -+endif ++ if (rk_dma_heap_size) ++ size = rk_dma_heap_size; ++ else ++ size = RK_DMA_HEAP_CMA_DEFAULT_SIZE; + ++ if (rk_dma_heap_base) ++ fix = true; + -+bifrost_kbase-$(CONFIG_MALI_BIFROST_DEVFREQ) += \ -+ backend/gpu/mali_kbase_devfreq.o ++ ret = cma_declare_contiguous(rk_dma_heap_base, PAGE_ALIGN(size), 0x0, ++ PAGE_SIZE, 0, fix, "rk-dma-heap-cma", ++ &rk_dma_heap_cma); ++ if (ret) ++ return ret; + -+ifneq ($(CONFIG_MALI_REAL_HW),y) -+ bifrost_kbase-y += backend/gpu/mali_kbase_model_linux.o -+endif ++#if !IS_ENABLED(CONFIG_CMA_INACTIVE) ++ /* Architecture specific contiguous memory fixup. */ ++ dma_contiguous_early_fixup(cma_get_base(rk_dma_heap_cma), ++ cma_get_size(rk_dma_heap_cma)); ++#endif + -+# NO_MALI Dummy model interface -+bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_dummy.o -+# HW error simulation -+bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_error_generator.o ++ return 0; ++} + -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_backend_config.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_backend_config.h ++struct cma *rk_dma_heap_get_cma(void) ++{ ++ return rk_dma_heap_cma; ++} +diff --git a/drivers/dma-buf/rk_heaps/rk-dma-heap.c b/drivers/dma-buf/rk_heaps/rk-dma-heap.c new file mode 100644 -index 000000000..6924fdb8a +index 000000000..d0e76edf3 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_backend_config.h -@@ -0,0 +1,30 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/dma-buf/rk_heaps/rk-dma-heap.c +@@ -0,0 +1,731 @@ ++// SPDX-License-Identifier: GPL-2.0 +/* ++ * Framework for userspace DMA-BUF allocations + * -+ * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * ++ * Copyright (C) 2011 Google, Inc. ++ * Copyright (C) 2019 Linaro Ltd. ++ * Copyright (C) 2022 Rockchip Electronics Co. Ltd. ++ * Author: Simon Xue + */ + -+/* -+ * Backend specific configuration -+ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+#ifndef _KBASE_BACKEND_CONFIG_H_ -+#define _KBASE_BACKEND_CONFIG_H_ ++#include "rk-dma-heap.h" + -+#endif /* _KBASE_BACKEND_CONFIG_H_ */ ++#define DEVNAME "rk_dma_heap" + -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c -new file mode 100644 -index 000000000..7c0abbaf8 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c -@@ -0,0 +1,92 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2014-2016, 2018, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++#define NUM_HEAP_MINORS 128 + -+#include "backend/gpu/mali_kbase_cache_policy_backend.h" -+#include ++static LIST_HEAD(rk_heap_list); ++static DEFINE_MUTEX(rk_heap_list_lock); ++static dev_t rk_dma_heap_devt; ++static struct class *rk_dma_heap_class; ++static DEFINE_XARRAY_ALLOC(rk_dma_heap_minors); ++struct proc_dir_entry *proc_rk_dma_heap_dir; + -+/** -+ * kbasep_amba_register_present() - Check AMBA_<> register is present -+ * in the GPU. -+ * @kbdev: Device pointer -+ * -+ * Note: Only for arch version 12.x.1 onwards. -+ * -+ * Return: true if AMBA_FEATURES/ENABLE registers are present. -+ */ -+static bool kbasep_amba_register_present(struct kbase_device *kbdev) -+{ -+ return (ARCH_MAJOR_REV_REG(kbdev->gpu_props.props.raw_props.gpu_id) >= -+ GPU_ID2_ARCH_MAJOR_REV_MAKE(12, 1)); -+} ++#define K(size) ((unsigned long)((size) >> 10)) + -+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, -+ u32 mode) ++static int rk_vmap_pfn_apply(pte_t *pte, unsigned long addr, void *private) +{ -+ kbdev->current_gpu_coherency_mode = mode; -+ -+ if (kbasep_amba_register_present(kbdev)) { -+ u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); ++ struct rk_vmap_pfn_data *data = private; + -+ val = AMBA_ENABLE_COHERENCY_PROTOCOL_SET(val, mode); -+ kbase_reg_write(kbdev, AMBA_ENABLE, val); -+ } else -+ kbase_reg_write(kbdev, COHERENCY_ENABLE, mode); ++ *pte = pte_mkspecial(pfn_pte(data->pfn++, data->prot)); ++ return 0; +} + -+u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev) ++void *rk_vmap_contig_pfn(unsigned long pfn, unsigned int count, pgprot_t prot) +{ -+ u32 coherency_features; -+ -+ if (kbasep_amba_register_present(kbdev)) -+ coherency_features = -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_FEATURES)); -+ else -+ coherency_features = kbase_reg_read( -+ kbdev, GPU_CONTROL_REG(COHERENCY_FEATURES)); ++ struct rk_vmap_pfn_data data = { .pfn = pfn, .prot = pgprot_nx(prot) }; ++ struct vm_struct *area; + -+ return coherency_features; ++ area = get_vm_area_caller(count * PAGE_SIZE, VM_MAP, ++ __builtin_return_address(0)); ++ if (!area) ++ return NULL; ++ if (apply_to_page_range(&init_mm, (unsigned long)area->addr, ++ count * PAGE_SIZE, rk_vmap_pfn_apply, &data)) { ++ free_vm_area(area); ++ return NULL; ++ } ++ return area->addr; +} + -+void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev, -+ bool enable) ++int rk_dma_heap_set_dev(struct device *heap_dev) +{ -+ if (kbasep_amba_register_present(kbdev)) { -+ u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); ++ int err = 0; + -+ val = AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(val, enable); -+ kbase_reg_write(kbdev, AMBA_ENABLE, val); ++ if (!heap_dev) ++ return -EINVAL; + -+ } else { -+ WARN(1, "memory_cache_support not supported"); ++ dma_coerce_mask_and_coherent(heap_dev, DMA_BIT_MASK(64)); ++ ++ if (!heap_dev->dma_parms) { ++ heap_dev->dma_parms = devm_kzalloc(heap_dev, ++ sizeof(*heap_dev->dma_parms), ++ GFP_KERNEL); ++ if (!heap_dev->dma_parms) ++ return -ENOMEM; ++ ++ err = dma_set_max_seg_size(heap_dev, (unsigned int)DMA_BIT_MASK(64)); ++ if (err) { ++ devm_kfree(heap_dev, heap_dev->dma_parms); ++ dev_err(heap_dev, "Failed to set DMA segment size, err:%d\n", err); ++ return err; ++ } + } ++ ++ return 0; +} ++EXPORT_SYMBOL_GPL(rk_dma_heap_set_dev); + -+void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable) ++struct rk_dma_heap *rk_dma_heap_find(const char *name) +{ -+ if (kbasep_amba_register_present(kbdev)) { -+ u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); ++ struct rk_dma_heap *h; + -+ val = AMBA_ENABLE_INVALIDATE_HINT_SET(val, enable); -+ kbase_reg_write(kbdev, AMBA_ENABLE, val); -+ } else { -+ WARN(1, "invalidate_hint not supported"); ++ mutex_lock(&rk_heap_list_lock); ++ list_for_each_entry(h, &rk_heap_list, list) { ++ if (!strcmp(h->name, name)) { ++ kref_get(&h->refcount); ++ mutex_unlock(&rk_heap_list_lock); ++ return h; ++ } + } ++ mutex_unlock(&rk_heap_list_lock); ++ return NULL; +} -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h -new file mode 100644 -index 000000000..758e3be08 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h -@@ -0,0 +1,65 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2014-2016, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+#ifndef _KBASE_CACHE_POLICY_BACKEND_H_ -+#define _KBASE_CACHE_POLICY_BACKEND_H_ -+ -+#include "mali_kbase.h" -+#include ++EXPORT_SYMBOL_GPL(rk_dma_heap_find); + -+/** -+ * kbase_cache_set_coherency_mode() - Sets the system coherency mode -+ * in the GPU. -+ * @kbdev: Device pointer -+ * @mode: Coherency mode. COHERENCY_ACE/ACE_LITE -+ */ -+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, -+ u32 mode); ++void rk_dma_heap_buffer_free(struct dma_buf *dmabuf) ++{ ++ dma_buf_put(dmabuf); ++} ++EXPORT_SYMBOL_GPL(rk_dma_heap_buffer_free); + -+/** -+ * kbase_cache_get_coherency_features() - Get the coherency features -+ * in the GPU. -+ * @kbdev: Device pointer -+ * -+ * Return: Register value to be returned -+ */ -+u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev); ++struct dma_buf *rk_dma_heap_buffer_alloc(struct rk_dma_heap *heap, size_t len, ++ unsigned int fd_flags, ++ unsigned int heap_flags, ++ const char *name) ++{ ++ struct dma_buf *dmabuf; + -+/** -+ * kbase_amba_set_memory_cache_support() - Sets AMBA memory cache support -+ * in the GPU. -+ * @kbdev: Device pointer -+ * @enable: true for enable. -+ * -+ * Note: Only for arch version 12.x.1 onwards. -+ */ -+void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev, -+ bool enable); -+/** -+ * kbase_amba_set_invalidate_hint() - Sets AMBA invalidate hint -+ * in the GPU. -+ * @kbdev: Device pointer -+ * @enable: true for enable. -+ * -+ * Note: Only for arch version 12.x.1 onwards. -+ */ -+void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable); -+#endif /* _KBASE_CACHE_POLICY_BACKEND_H_ */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c -new file mode 100644 -index 000000000..ddd03ca23 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c -@@ -0,0 +1,326 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ if (fd_flags & ~RK_DMA_HEAP_VALID_FD_FLAGS) ++ return ERR_PTR(-EINVAL); + -+/* -+ * Implementation of the GPU clock rate trace manager. -+ */ ++ /* ++ * Allocations from all heaps have to begin ++ * and end on page boundaries. ++ */ ++ len = PAGE_ALIGN(len); ++ if (!len) ++ return ERR_PTR(-EINVAL); + -+#include -+#include -+#include -+#include -+#include -+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" ++ dmabuf = heap->ops->allocate(heap, len, fd_flags, heap_flags, name); + -+#ifdef CONFIG_TRACE_POWER_GPU_FREQUENCY -+#include -+#else -+#include "mali_power_gpu_frequency_trace.h" -+#endif ++ if (IS_ENABLED(CONFIG_DMABUF_RK_HEAPS_DEBUG) && !IS_ERR(dmabuf)) ++ dma_buf_set_name(dmabuf, name); + -+#ifndef CLK_RATE_TRACE_OPS -+#define CLK_RATE_TRACE_OPS (NULL) -+#endif ++ return dmabuf; ++} ++EXPORT_SYMBOL_GPL(rk_dma_heap_buffer_alloc); + -+/** -+ * get_clk_rate_trace_callbacks() - Returns pointer to clk trace ops. -+ * @kbdev: Pointer to kbase device, used to check if arbitration is enabled -+ * when compiled with arbiter support. -+ * Return: Pointer to clk trace ops if supported or NULL. -+ */ -+static struct kbase_clk_rate_trace_op_conf * -+get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev) ++int rk_dma_heap_bufferfd_alloc(struct rk_dma_heap *heap, size_t len, ++ unsigned int fd_flags, ++ unsigned int heap_flags, ++ const char *name) +{ -+ /* base case */ -+ struct kbase_clk_rate_trace_op_conf *callbacks = -+ (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS; -+#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) -+ const void *arbiter_if_node; ++ struct dma_buf *dmabuf; ++ int fd; + -+ if (WARN_ON(!kbdev) || WARN_ON(!kbdev->dev)) -+ return callbacks; ++ dmabuf = rk_dma_heap_buffer_alloc(heap, len, fd_flags, heap_flags, ++ name); + -+ arbiter_if_node = -+ of_get_property(kbdev->dev->of_node, "arbiter_if", NULL); -+ /* Arbitration enabled, override the callback pointer.*/ -+ if (arbiter_if_node) -+ callbacks = &arb_clk_rate_trace_ops; -+ else -+ dev_dbg(kbdev->dev, -+ "Arbitration supported but disabled by platform. Leaving clk rate callbacks as default.\n"); ++ if (IS_ERR(dmabuf)) ++ return PTR_ERR(dmabuf); + -+#endif ++ fd = dma_buf_fd(dmabuf, fd_flags); ++ if (fd < 0) { ++ dma_buf_put(dmabuf); ++ /* just return, as put will call release and that will free */ ++ } ++ ++ return fd; + -+ return callbacks; +} ++EXPORT_SYMBOL_GPL(rk_dma_heap_bufferfd_alloc); + -+static int gpu_clk_rate_change_notifier(struct notifier_block *nb, -+ unsigned long event, void *data) ++struct page *rk_dma_heap_alloc_contig_pages(struct rk_dma_heap *heap, ++ size_t len, const char *name) +{ -+ struct kbase_gpu_clk_notifier_data *ndata = data; -+ struct kbase_clk_data *clk_data = -+ container_of(nb, struct kbase_clk_data, clk_rate_change_nb); -+ struct kbase_clk_rate_trace_manager *clk_rtm = clk_data->clk_rtm; -+ unsigned long flags; ++ if (!heap->support_cma) { ++ WARN_ON(!heap->support_cma); ++ return ERR_PTR(-EINVAL); ++ } + -+ if (WARN_ON_ONCE(clk_data->gpu_clk_handle != ndata->gpu_clk_handle)) -+ return NOTIFY_BAD; ++ len = PAGE_ALIGN(len); ++ if (!len) ++ return ERR_PTR(-EINVAL); + -+ spin_lock_irqsave(&clk_rtm->lock, flags); -+ if (event == POST_RATE_CHANGE) { -+ if (!clk_rtm->gpu_idle && -+ (clk_data->clock_val != ndata->new_rate)) { -+ kbase_clk_rate_trace_manager_notify_all( -+ clk_rtm, clk_data->index, ndata->new_rate); -+ } ++ return heap->ops->alloc_contig_pages(heap, len, name); ++} ++EXPORT_SYMBOL_GPL(rk_dma_heap_alloc_contig_pages); + -+ clk_data->clock_val = ndata->new_rate; ++void rk_dma_heap_free_contig_pages(struct rk_dma_heap *heap, ++ struct page *pages, size_t len, ++ const char *name) ++{ ++ if (!heap->support_cma) { ++ WARN_ON(!heap->support_cma); ++ return; + } -+ spin_unlock_irqrestore(&clk_rtm->lock, flags); + -+ return NOTIFY_DONE; ++ return heap->ops->free_contig_pages(heap, pages, len, name); +} ++EXPORT_SYMBOL_GPL(rk_dma_heap_free_contig_pages); + -+static int gpu_clk_data_init(struct kbase_device *kbdev, -+ void *gpu_clk_handle, unsigned int index) ++void rk_dma_heap_total_inc(struct rk_dma_heap *heap, size_t len) +{ -+ struct kbase_clk_rate_trace_op_conf *callbacks; -+ struct kbase_clk_data *clk_data; -+ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; -+ int ret = 0; ++ mutex_lock(&rk_heap_list_lock); ++ heap->total_size += len; ++ mutex_unlock(&rk_heap_list_lock); ++} + -+ callbacks = get_clk_rate_trace_callbacks(kbdev); ++void rk_dma_heap_total_dec(struct rk_dma_heap *heap, size_t len) ++{ ++ mutex_lock(&rk_heap_list_lock); ++ if (WARN_ON(heap->total_size < len)) ++ heap->total_size = 0; ++ else ++ heap->total_size -= len; ++ mutex_unlock(&rk_heap_list_lock); ++} + -+ if (WARN_ON(!callbacks) || -+ WARN_ON(!gpu_clk_handle) || -+ WARN_ON(index >= BASE_MAX_NR_CLOCKS_REGULATORS)) -+ return -EINVAL; ++static int rk_dma_heap_open(struct inode *inode, struct file *file) ++{ ++ struct rk_dma_heap *heap; + -+ clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL); -+ if (!clk_data) { -+ dev_err(kbdev->dev, "Failed to allocate data for clock enumerated at index %u", index); -+ return -ENOMEM; ++ heap = xa_load(&rk_dma_heap_minors, iminor(inode)); ++ if (!heap) { ++ pr_err("dma_heap: minor %d unknown.\n", iminor(inode)); ++ return -ENODEV; + } + -+ clk_data->index = (u8)index; -+ clk_data->gpu_clk_handle = gpu_clk_handle; -+ /* Store the initial value of clock */ -+ clk_data->clock_val = -+ callbacks->get_gpu_clk_rate(kbdev, gpu_clk_handle); ++ /* instance data as context */ ++ file->private_data = heap; ++ nonseekable_open(inode, file); + -+ { -+ /* At the initialization time, GPU is powered off. */ -+ unsigned long flags; ++ return 0; ++} + -+ spin_lock_irqsave(&clk_rtm->lock, flags); -+ kbase_clk_rate_trace_manager_notify_all( -+ clk_rtm, clk_data->index, 0); -+ spin_unlock_irqrestore(&clk_rtm->lock, flags); -+ } ++static long rk_dma_heap_ioctl_allocate(struct file *file, void *data) ++{ ++ struct rk_dma_heap_allocation_data *heap_allocation = data; ++ struct rk_dma_heap *heap = file->private_data; ++ int fd; + -+ clk_data->clk_rtm = clk_rtm; -+ clk_rtm->clks[index] = clk_data; ++ if (heap_allocation->fd) ++ return -EINVAL; + -+ clk_data->clk_rate_change_nb.notifier_call = -+ gpu_clk_rate_change_notifier; ++ fd = rk_dma_heap_bufferfd_alloc(heap, heap_allocation->len, ++ heap_allocation->fd_flags, ++ heap_allocation->heap_flags, NULL); ++ if (fd < 0) ++ return fd; + -+ if (callbacks->gpu_clk_notifier_register) -+ ret = callbacks->gpu_clk_notifier_register(kbdev, -+ gpu_clk_handle, &clk_data->clk_rate_change_nb); -+ if (ret) { -+ dev_err(kbdev->dev, "Failed to register notifier for clock enumerated at index %u", index); -+ kfree(clk_data); -+ } ++ heap_allocation->fd = fd; + -+ return ret; ++ return 0; +} + -+int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev) ++static unsigned int rk_dma_heap_ioctl_cmds[] = { ++ RK_DMA_HEAP_IOCTL_ALLOC, ++}; ++ ++static long rk_dma_heap_ioctl(struct file *file, unsigned int ucmd, ++ unsigned long arg) +{ -+ struct kbase_clk_rate_trace_op_conf *callbacks; -+ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; -+ unsigned int i; ++ char stack_kdata[128]; ++ char *kdata = stack_kdata; ++ unsigned int kcmd; ++ unsigned int in_size, out_size, drv_size, ksize; ++ int nr = _IOC_NR(ucmd); + int ret = 0; + -+ callbacks = get_clk_rate_trace_callbacks(kbdev); -+ -+ spin_lock_init(&clk_rtm->lock); -+ INIT_LIST_HEAD(&clk_rtm->listeners); -+ -+ /* Return early if no callbacks provided for clock rate tracing */ -+ if (!callbacks) { -+ WRITE_ONCE(clk_rtm->clk_rate_trace_ops, NULL); -+ return 0; -+ } -+ -+ clk_rtm->gpu_idle = true; ++ if (nr >= ARRAY_SIZE(rk_dma_heap_ioctl_cmds)) ++ return -EINVAL; + -+ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { -+ void *gpu_clk_handle = -+ callbacks->enumerate_gpu_clk(kbdev, i); ++ /* Get the kernel ioctl cmd that matches */ ++ kcmd = rk_dma_heap_ioctl_cmds[nr]; + -+ if (!gpu_clk_handle) -+ break; ++ /* Figure out the delta between user cmd size and kernel cmd size */ ++ drv_size = _IOC_SIZE(kcmd); ++ out_size = _IOC_SIZE(ucmd); ++ in_size = out_size; ++ if ((ucmd & kcmd & IOC_IN) == 0) ++ in_size = 0; ++ if ((ucmd & kcmd & IOC_OUT) == 0) ++ out_size = 0; ++ ksize = max(max(in_size, out_size), drv_size); + -+ ret = gpu_clk_data_init(kbdev, gpu_clk_handle, i); -+ if (ret) -+ goto error; ++ /* If necessary, allocate buffer for ioctl argument */ ++ if (ksize > sizeof(stack_kdata)) { ++ kdata = kmalloc(ksize, GFP_KERNEL); ++ if (!kdata) ++ return -ENOMEM; + } + -+ /* Activate clock rate trace manager if at least one GPU clock was -+ * enumerated. -+ */ -+ if (i) { -+ WRITE_ONCE(clk_rtm->clk_rate_trace_ops, callbacks); -+ } else { -+ dev_info(kbdev->dev, "No clock(s) available for rate tracing"); -+ WRITE_ONCE(clk_rtm->clk_rate_trace_ops, NULL); ++ if (copy_from_user(kdata, (void __user *)arg, in_size) != 0) { ++ ret = -EFAULT; ++ goto err; + } + -+ return 0; ++ /* zero out any difference between the kernel/user structure size */ ++ if (ksize > in_size) ++ memset(kdata + in_size, 0, ksize - in_size); + -+error: -+ while (i--) { -+ clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister( -+ kbdev, clk_rtm->clks[i]->gpu_clk_handle, -+ &clk_rtm->clks[i]->clk_rate_change_nb); -+ kfree(clk_rtm->clks[i]); ++ switch (kcmd) { ++ case RK_DMA_HEAP_IOCTL_ALLOC: ++ ret = rk_dma_heap_ioctl_allocate(file, kdata); ++ break; ++ default: ++ ret = -ENOTTY; ++ goto err; + } + ++ if (copy_to_user((void __user *)arg, kdata, out_size) != 0) ++ ret = -EFAULT; ++err: ++ if (kdata != stack_kdata) ++ kfree(kdata); + return ret; +} + -+void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev) -+{ -+ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; -+ unsigned int i; -+ -+ WARN_ON(!list_empty(&clk_rtm->listeners)); -+ -+ if (!clk_rtm->clk_rate_trace_ops) -+ return; -+ -+ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { -+ if (!clk_rtm->clks[i]) -+ break; -+ -+ if (clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister) -+ clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister -+ (kbdev, clk_rtm->clks[i]->gpu_clk_handle, -+ &clk_rtm->clks[i]->clk_rate_change_nb); -+ kfree(clk_rtm->clks[i]); -+ } ++static const struct file_operations rk_dma_heap_fops = { ++ .owner = THIS_MODULE, ++ .open = rk_dma_heap_open, ++ .unlocked_ioctl = rk_dma_heap_ioctl, ++#ifdef CONFIG_COMPAT ++ .compat_ioctl = rk_dma_heap_ioctl, ++#endif ++}; + -+ WRITE_ONCE(clk_rtm->clk_rate_trace_ops, NULL); ++/** ++ * rk_dma_heap_get_drvdata() - get per-subdriver data for the heap ++ * @heap: DMA-Heap to retrieve private data for ++ * ++ * Returns: ++ * The per-subdriver data for the heap. ++ */ ++void *rk_dma_heap_get_drvdata(struct rk_dma_heap *heap) ++{ ++ return heap->priv; +} + -+void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev) ++static void rk_dma_heap_release(struct kref *ref) +{ -+ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; -+ unsigned int i; -+ unsigned long flags; -+ -+ if (!clk_rtm->clk_rate_trace_ops) -+ return; -+ -+ spin_lock_irqsave(&clk_rtm->lock, flags); -+ -+ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { -+ struct kbase_clk_data *clk_data = clk_rtm->clks[i]; -+ -+ if (!clk_data) -+ break; ++ struct rk_dma_heap *heap = container_of(ref, struct rk_dma_heap, refcount); ++ int minor = MINOR(heap->heap_devt); + -+ if (unlikely(!clk_data->clock_val)) -+ continue; ++ /* Note, we already holding the rk_heap_list_lock here */ ++ list_del(&heap->list); + -+ kbase_clk_rate_trace_manager_notify_all( -+ clk_rtm, clk_data->index, clk_data->clock_val); -+ } ++ device_destroy(rk_dma_heap_class, heap->heap_devt); ++ cdev_del(&heap->heap_cdev); ++ xa_erase(&rk_dma_heap_minors, minor); + -+ clk_rtm->gpu_idle = false; -+ spin_unlock_irqrestore(&clk_rtm->lock, flags); ++ kfree(heap); +} + -+void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev) ++void rk_dma_heap_put(struct rk_dma_heap *h) +{ -+ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; -+ unsigned int i; -+ unsigned long flags; -+ -+ if (!clk_rtm->clk_rate_trace_ops) -+ return; -+ -+ spin_lock_irqsave(&clk_rtm->lock, flags); ++ /* ++ * Take the rk_heap_list_lock now to avoid racing with code ++ * scanning the list and then taking a kref. ++ */ ++ mutex_lock(&rk_heap_list_lock); ++ kref_put(&h->refcount, rk_dma_heap_release); ++ mutex_unlock(&rk_heap_list_lock); ++} + -+ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { -+ struct kbase_clk_data *clk_data = clk_rtm->clks[i]; ++/** ++ * rk_dma_heap_get_dev() - get device struct for the heap ++ * @heap: DMA-Heap to retrieve device struct from ++ * ++ * Returns: ++ * The device struct for the heap. ++ */ ++struct device *rk_dma_heap_get_dev(struct rk_dma_heap *heap) ++{ ++ return heap->heap_dev; ++} + -+ if (!clk_data) -+ break; ++/** ++ * rk_dma_heap_get_name() - get heap name ++ * @heap: DMA-Heap to retrieve private data for ++ * ++ * Returns: ++ * The char* for the heap name. ++ */ ++const char *rk_dma_heap_get_name(struct rk_dma_heap *heap) ++{ ++ return heap->name; ++} + -+ if (unlikely(!clk_data->clock_val)) -+ continue; ++struct rk_dma_heap *rk_dma_heap_add(const struct rk_dma_heap_export_info *exp_info) ++{ ++ struct rk_dma_heap *heap, *err_ret; ++ unsigned int minor; ++ int ret; + -+ kbase_clk_rate_trace_manager_notify_all( -+ clk_rtm, clk_data->index, 0); ++ if (!exp_info->name || !strcmp(exp_info->name, "")) { ++ pr_err("rk_dma_heap: Cannot add heap without a name\n"); ++ return ERR_PTR(-EINVAL); + } + -+ clk_rtm->gpu_idle = true; -+ spin_unlock_irqrestore(&clk_rtm->lock, flags); ++ if (!exp_info->ops || !exp_info->ops->allocate) { ++ pr_err("rk_dma_heap: Cannot add heap with invalid ops struct\n"); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ /* check the name is unique */ ++ heap = rk_dma_heap_find(exp_info->name); ++ if (heap) { ++ pr_err("rk_dma_heap: Already registered heap named %s\n", ++ exp_info->name); ++ rk_dma_heap_put(heap); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ heap = kzalloc(sizeof(*heap), GFP_KERNEL); ++ if (!heap) ++ return ERR_PTR(-ENOMEM); ++ ++ kref_init(&heap->refcount); ++ heap->name = exp_info->name; ++ heap->ops = exp_info->ops; ++ heap->priv = exp_info->priv; ++ heap->support_cma = exp_info->support_cma; ++ INIT_LIST_HEAD(&heap->dmabuf_list); ++ INIT_LIST_HEAD(&heap->contig_list); ++ mutex_init(&heap->dmabuf_lock); ++ mutex_init(&heap->contig_lock); ++ ++ /* Find unused minor number */ ++ ret = xa_alloc(&rk_dma_heap_minors, &minor, heap, ++ XA_LIMIT(0, NUM_HEAP_MINORS - 1), GFP_KERNEL); ++ if (ret < 0) { ++ pr_err("rk_dma_heap: Unable to get minor number for heap\n"); ++ err_ret = ERR_PTR(ret); ++ goto err0; ++ } ++ ++ /* Create device */ ++ heap->heap_devt = MKDEV(MAJOR(rk_dma_heap_devt), minor); ++ ++ cdev_init(&heap->heap_cdev, &rk_dma_heap_fops); ++ ret = cdev_add(&heap->heap_cdev, heap->heap_devt, 1); ++ if (ret < 0) { ++ pr_err("dma_heap: Unable to add char device\n"); ++ err_ret = ERR_PTR(ret); ++ goto err1; ++ } ++ ++ heap->heap_dev = device_create(rk_dma_heap_class, ++ NULL, ++ heap->heap_devt, ++ NULL, ++ heap->name); ++ if (IS_ERR(heap->heap_dev)) { ++ pr_err("rk_dma_heap: Unable to create device\n"); ++ err_ret = ERR_CAST(heap->heap_dev); ++ goto err2; ++ } ++ ++ heap->procfs = proc_rk_dma_heap_dir; ++ ++ /* Make sure it doesn't disappear on us */ ++ heap->heap_dev = get_device(heap->heap_dev); ++ ++ /* Add heap to the list */ ++ mutex_lock(&rk_heap_list_lock); ++ list_add(&heap->list, &rk_heap_list); ++ mutex_unlock(&rk_heap_list_lock); ++ ++ return heap; ++ ++err2: ++ cdev_del(&heap->heap_cdev); ++err1: ++ xa_erase(&rk_dma_heap_minors, minor); ++err0: ++ kfree(heap); ++ return err_ret; +} + -+void kbase_clk_rate_trace_manager_notify_all( -+ struct kbase_clk_rate_trace_manager *clk_rtm, -+ u32 clk_index, -+ unsigned long new_rate) ++static char *rk_dma_heap_devnode(struct device *dev, umode_t *mode) +{ -+ struct kbase_clk_rate_listener *pos; -+ struct kbase_device *kbdev; ++ return kasprintf(GFP_KERNEL, "rk_dma_heap/%s", dev_name(dev)); ++} + -+ lockdep_assert_held(&clk_rtm->lock); ++static int rk_dma_heap_dump_dmabuf(const struct dma_buf *dmabuf, void *data) ++{ ++ struct rk_dma_heap *heap = (struct rk_dma_heap *)data; ++ struct rk_dma_heap_dmabuf *buf; ++ struct dma_buf_attachment *a; ++ phys_addr_t size; ++ int attach_count; ++ int ret; + -+ kbdev = container_of(clk_rtm, struct kbase_device, pm.clk_rtm); ++ if (!strcmp(dmabuf->exp_name, heap->name)) { ++ seq_printf(heap->s, "dma-heap:<%s> -dmabuf", heap->name); ++ mutex_lock(&heap->dmabuf_lock); ++ list_for_each_entry(buf, &heap->dmabuf_list, node) { ++ if (buf->dmabuf->file->f_inode->i_ino == ++ dmabuf->file->f_inode->i_ino) { ++ seq_printf(heap->s, ++ "\ti_ino = %ld\n", ++ dmabuf->file->f_inode->i_ino); ++ size = buf->end - buf->start + 1; ++ seq_printf(heap->s, ++ "\tAlloc by (%-20s)\t[%pa-%pa]\t%pa (%lu KiB)\n", ++ dmabuf->name, &buf->start, ++ &buf->end, &size, K(size)); ++ seq_puts(heap->s, "\t\tAttached Devices:\n"); ++ attach_count = 0; ++ ret = dma_resv_lock_interruptible(dmabuf->resv, ++ NULL); ++ if (ret) ++ goto error_unlock; ++ list_for_each_entry(a, &dmabuf->attachments, ++ node) { ++ seq_printf(heap->s, "\t\t%s\n", ++ dev_name(a->dev)); ++ attach_count++; ++ } ++ dma_resv_unlock(dmabuf->resv); ++ seq_printf(heap->s, ++ "Total %d devices attached\n\n", ++ attach_count); ++ } ++ } ++ mutex_unlock(&heap->dmabuf_lock); ++ } + -+ dev_dbg(kbdev->dev, "%s - GPU clock %u rate changed to %lu, pid: %d", -+ __func__, clk_index, new_rate, current->pid); ++ return 0; ++error_unlock: ++ mutex_unlock(&heap->dmabuf_lock); ++ return ret; ++} + -+ /* Raise standard `power/gpu_frequency` ftrace event */ -+ { -+ unsigned long new_rate_khz = new_rate; ++static int rk_dma_heap_dump_contig(void *data) ++{ ++ struct rk_dma_heap *heap = (struct rk_dma_heap *)data; ++ struct rk_dma_heap_contig_buf *buf; ++ phys_addr_t size; + -+#if BITS_PER_LONG == 64 -+ do_div(new_rate_khz, 1000); -+#elif BITS_PER_LONG == 32 -+ new_rate_khz /= 1000; ++ mutex_lock(&heap->contig_lock); ++ list_for_each_entry(buf, &heap->contig_list, node) { ++ size = buf->end - buf->start + 1; ++ seq_printf(heap->s, "dma-heap:<%s> -non dmabuf\n", heap->name); ++ seq_printf(heap->s, "\tAlloc by (%-20s)\t[%pa-%pa]\t%pa (%lu KiB)\n", ++ buf->orig_alloc, &buf->start, &buf->end, &size, K(size)); ++ } ++ mutex_unlock(&heap->contig_lock); ++ ++ return 0; ++} ++ ++static ssize_t rk_total_pools_kb_show(struct kobject *kobj, ++ struct kobj_attribute *attr, char *buf) ++{ ++ struct rk_dma_heap *heap; ++ u64 total_pool_size = 0; ++ ++ mutex_lock(&rk_heap_list_lock); ++ list_for_each_entry(heap, &rk_heap_list, list) ++ if (heap->ops->get_pool_size) ++ total_pool_size += heap->ops->get_pool_size(heap); ++ mutex_unlock(&rk_heap_list_lock); ++ ++ return sysfs_emit(buf, "%llu\n", total_pool_size / 1024); ++} ++ ++static struct kobj_attribute rk_total_pools_kb_attr = ++ __ATTR_RO(rk_total_pools_kb); ++ ++static struct attribute *rk_dma_heap_sysfs_attrs[] = { ++ &rk_total_pools_kb_attr.attr, ++ NULL, ++}; ++ ++ATTRIBUTE_GROUPS(rk_dma_heap_sysfs); ++ ++static struct kobject *rk_dma_heap_kobject; ++ ++static int rk_dma_heap_sysfs_setup(void) ++{ ++ int ret; ++ ++ rk_dma_heap_kobject = kobject_create_and_add("rk_dma_heap", ++ kernel_kobj); ++ if (!rk_dma_heap_kobject) ++ return -ENOMEM; ++ ++ ret = sysfs_create_groups(rk_dma_heap_kobject, ++ rk_dma_heap_sysfs_groups); ++ if (ret) { ++ kobject_put(rk_dma_heap_kobject); ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static void rk_dma_heap_sysfs_teardown(void) ++{ ++ kobject_put(rk_dma_heap_kobject); ++} ++ ++#ifdef CONFIG_DEBUG_FS ++ ++static struct dentry *rk_dma_heap_debugfs_dir; ++ ++static int rk_dma_heap_debug_show(struct seq_file *s, void *unused) ++{ ++ struct rk_dma_heap *heap; ++ unsigned long total = 0; ++ ++ mutex_lock(&rk_heap_list_lock); ++ list_for_each_entry(heap, &rk_heap_list, list) { ++ heap->s = s; ++ dma_buf_get_each(rk_dma_heap_dump_dmabuf, heap); ++ rk_dma_heap_dump_contig(heap); ++ total += heap->total_size; ++ } ++ seq_printf(s, "\nTotal : 0x%lx (%lu KiB)\n", total, K(total)); ++ mutex_unlock(&rk_heap_list_lock); ++ ++ return 0; ++} ++DEFINE_SHOW_ATTRIBUTE(rk_dma_heap_debug); ++ ++static int rk_dma_heap_init_debugfs(void) ++{ ++ struct dentry *d; ++ int err = 0; ++ ++ d = debugfs_create_dir("rk_dma_heap", NULL); ++ if (IS_ERR(d)) ++ return PTR_ERR(d); ++ ++ rk_dma_heap_debugfs_dir = d; ++ ++ d = debugfs_create_file("dma_heap_info", 0444, ++ rk_dma_heap_debugfs_dir, NULL, ++ &rk_dma_heap_debug_fops); ++ if (IS_ERR(d)) { ++ dma_heap_print("rk_dma_heap : debugfs: failed to create node bufinfo\n"); ++ debugfs_remove_recursive(rk_dma_heap_debugfs_dir); ++ rk_dma_heap_debugfs_dir = NULL; ++ err = PTR_ERR(d); ++ } ++ ++ return err; ++} +#else -+#error "unsigned long division is not supported for this architecture" ++static inline int rk_dma_heap_init_debugfs(void) ++{ ++ return 0; ++} +#endif + -+ trace_gpu_frequency(new_rate_khz, clk_index); ++static int rk_dma_heap_proc_show(struct seq_file *s, void *unused) ++{ ++ struct rk_dma_heap *heap; ++ unsigned long total = 0; ++ ++ mutex_lock(&rk_heap_list_lock); ++ list_for_each_entry(heap, &rk_heap_list, list) { ++ heap->s = s; ++ dma_buf_get_each(rk_dma_heap_dump_dmabuf, heap); ++ rk_dma_heap_dump_contig(heap); ++ total += heap->total_size; + } ++ seq_printf(s, "\nTotal : 0x%lx (%lu KiB)\n", total, K(total)); ++ mutex_unlock(&rk_heap_list_lock); + -+ /* Notify the listeners. */ -+ list_for_each_entry(pos, &clk_rtm->listeners, node) { -+ pos->notify(pos, clk_index, new_rate); ++ return 0; ++} ++ ++static int rk_dma_heap_info_proc_open(struct inode *inode, ++ struct file *file) ++{ ++ return single_open(file, rk_dma_heap_proc_show, NULL); ++} ++ ++static const struct proc_ops rk_dma_heap_info_proc_fops = { ++ .proc_open = rk_dma_heap_info_proc_open, ++ .proc_read = seq_read, ++ .proc_lseek = seq_lseek, ++ .proc_release = single_release, ++}; ++ ++static int rk_dma_heap_init_proc(void) ++{ ++ proc_rk_dma_heap_dir = proc_mkdir("rk_dma_heap", NULL); ++ if (!proc_rk_dma_heap_dir) { ++ pr_err("create rk_dma_heap proc dir error\n"); ++ return -ENOENT; + } ++ ++ proc_create("dma_heap_info", 0644, proc_rk_dma_heap_dir, ++ &rk_dma_heap_info_proc_fops); ++ ++ return 0; +} -+KBASE_EXPORT_TEST_API(kbase_clk_rate_trace_manager_notify_all); -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.h ++ ++static int rk_dma_heap_init(void) ++{ ++ int ret; ++ ++ ret = rk_dma_heap_sysfs_setup(); ++ if (ret) ++ return ret; ++ ++ ret = alloc_chrdev_region(&rk_dma_heap_devt, 0, NUM_HEAP_MINORS, ++ DEVNAME); ++ if (ret) ++ goto err_chrdev; ++ ++ rk_dma_heap_class = class_create(THIS_MODULE, DEVNAME); ++ if (IS_ERR(rk_dma_heap_class)) { ++ ret = PTR_ERR(rk_dma_heap_class); ++ goto err_class; ++ } ++ rk_dma_heap_class->devnode = rk_dma_heap_devnode; ++ ++ rk_dma_heap_init_debugfs(); ++ rk_dma_heap_init_proc(); ++ ++ return 0; ++ ++err_class: ++ unregister_chrdev_region(rk_dma_heap_devt, NUM_HEAP_MINORS); ++err_chrdev: ++ rk_dma_heap_sysfs_teardown(); ++ return ret; ++} ++subsys_initcall(rk_dma_heap_init); +diff --git a/drivers/dma-buf/rk_heaps/rk-dma-heap.h b/drivers/dma-buf/rk_heaps/rk-dma-heap.h new file mode 100644 -index 000000000..35b3b8d06 +index 000000000..3bc750b02 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.h -@@ -0,0 +1,154 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/dma-buf/rk_heaps/rk-dma-heap.h +@@ -0,0 +1,178 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ +/* ++ * DMABUF Heaps Allocation Infrastructure + * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * ++ * Copyright (C) 2011 Google, Inc. ++ * Copyright (C) 2019 Linaro Ltd. ++ * Copyright (C) 2022 Rockchip Electronics Co. Ltd. ++ * Author: Simon Xue + */ + -+#ifndef _KBASE_CLK_RATE_TRACE_MGR_ -+#define _KBASE_CLK_RATE_TRACE_MGR_ ++#ifndef _RK_DMA_HEAPS_H ++#define _RK_DMA_HEAPS_H + -+/* The index of top clock domain in kbase_clk_rate_trace_manager:clks. */ -+#define KBASE_CLOCK_DOMAIN_TOP (0) ++#include ++#include ++#include ++#include + -+/* The index of shader-cores clock domain in -+ * kbase_clk_rate_trace_manager:clks. -+ */ -+#define KBASE_CLOCK_DOMAIN_SHADER_CORES (1) ++#if defined(CONFIG_DMABUF_RK_HEAPS_DEBUG_PRINT) ++#define dma_heap_print(fmt, ...) \ ++ printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) ++#else ++#define dma_heap_print(fmt, ...) \ ++ no_printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) ++#endif ++ ++#define RK_DMA_HEAP_NAME_LEN 16 ++ ++struct rk_vmap_pfn_data { ++ unsigned long pfn; /* first pfn of contiguous */ ++ pgprot_t prot; ++}; + +/** -+ * struct kbase_clk_data - Data stored per enumerated GPU clock. ++ * struct rk_dma_heap_ops - ops to operate on a given heap ++ * @allocate: allocate dmabuf and return struct dma_buf ptr ++ * @get_pool_size: if heap maintains memory pools, get pool size in bytes + * -+ * @clk_rtm: Pointer to clock rate trace manager object. -+ * @gpu_clk_handle: Handle unique to the enumerated GPU clock. -+ * @plat_private: Private data for the platform to store into -+ * @clk_rate_change_nb: notifier block containing the pointer to callback -+ * function that is invoked whenever the rate of -+ * enumerated GPU clock changes. -+ * @clock_val: Current rate of the enumerated GPU clock. -+ * @index: Index at which the GPU clock was enumerated. ++ * allocate returns dmabuf on success, ERR_PTR(-errno) on error. + */ -+struct kbase_clk_data { -+ struct kbase_clk_rate_trace_manager *clk_rtm; -+ void *gpu_clk_handle; -+ void *plat_private; -+ struct notifier_block clk_rate_change_nb; -+ unsigned long clock_val; -+ u8 index; ++struct rk_dma_heap_ops { ++ struct dma_buf *(*allocate)(struct rk_dma_heap *heap, ++ unsigned long len, ++ unsigned long fd_flags, ++ unsigned long heap_flags, ++ const char *name); ++ struct page *(*alloc_contig_pages)(struct rk_dma_heap *heap, ++ size_t len, const char *name); ++ void (*free_contig_pages)(struct rk_dma_heap *heap, ++ struct page *pages, size_t len, ++ const char *name); ++ long (*get_pool_size)(struct rk_dma_heap *heap); +}; + +/** -+ * kbase_clk_rate_trace_manager_init - Initialize GPU clock rate trace manager. -+ * -+ * @kbdev: Device pointer ++ * struct rk_dma_heap_export_info - information needed to export a new dmabuf heap ++ * @name: used for debugging/device-node name ++ * @ops: ops struct for this heap ++ * @priv: heap exporter private data + * -+ * Return: 0 if success, or an error code on failure. ++ * Information needed to export a new dmabuf heap. + */ -+int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev); ++struct rk_dma_heap_export_info { ++ const char *name; ++ const struct rk_dma_heap_ops *ops; ++ void *priv; ++ bool support_cma; ++}; + +/** -+ * kbase_clk_rate_trace_manager_term - Terminate GPU clock rate trace manager. ++ * struct rk_dma_heap - represents a dmabuf heap in the system ++ * @name: used for debugging/device-node name ++ * @ops: ops struct for this heap ++ * @heap_devt heap device node ++ * @list list head connecting to list of heaps ++ * @heap_cdev heap char device ++ * @heap_dev heap device struct + * -+ * @kbdev: Device pointer ++ * Represents a heap of memory from which buffers can be made. + */ -+void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev); ++struct rk_dma_heap { ++ const char *name; ++ const struct rk_dma_heap_ops *ops; ++ void *priv; ++ dev_t heap_devt; ++ struct list_head list; ++ struct list_head dmabuf_list; /* dmabuf attach to this node */ ++ struct mutex dmabuf_lock; ++ struct list_head contig_list; /* contig buffer attach to this node */ ++ struct mutex contig_lock; ++ struct cdev heap_cdev; ++ struct kref refcount; ++ struct device *heap_dev; ++ bool support_cma; ++ struct seq_file *s; ++ struct proc_dir_entry *procfs; ++ unsigned long total_size; ++}; ++ ++struct rk_dma_heap_dmabuf { ++ struct list_head node; ++ struct dma_buf *dmabuf; ++ const char *orig_alloc; ++ phys_addr_t start; ++ phys_addr_t end; ++}; ++ ++struct rk_dma_heap_contig_buf { ++ struct list_head node; ++ const char *orig_alloc; ++ phys_addr_t start; ++ phys_addr_t end; ++}; + +/** -+ * kbase_clk_rate_trace_manager_gpu_active - Inform GPU clock rate trace -+ * manager of GPU becoming active. ++ * rk_dma_heap_get_drvdata() - get per-heap driver data ++ * @heap: DMA-Heap to retrieve private data for + * -+ * @kbdev: Device pointer ++ * Returns: ++ * The per-heap data for the heap. + */ -+void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev); ++void *rk_dma_heap_get_drvdata(struct rk_dma_heap *heap); + +/** -+ * kbase_clk_rate_trace_manager_gpu_idle - Inform GPU clock rate trace -+ * manager of GPU becoming idle. -+ * @kbdev: Device pointer ++ * rk_dma_heap_get_dev() - get device struct for the heap ++ * @heap: DMA-Heap to retrieve device struct from ++ * ++ * Returns: ++ * The device struct for the heap. + */ -+void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev); ++struct device *rk_dma_heap_get_dev(struct rk_dma_heap *heap); + +/** -+ * kbase_clk_rate_trace_manager_subscribe_no_lock() - Add freq change listener. -+ * -+ * @clk_rtm: Clock rate manager instance. -+ * @listener: Listener handle ++ * rk_dma_heap_get_name() - get heap name ++ * @heap: DMA-Heap to retrieve private data for + * -+ * kbase_clk_rate_trace_manager:lock must be held by the caller. ++ * Returns: ++ * The char* for the heap name. + */ -+static inline void kbase_clk_rate_trace_manager_subscribe_no_lock( -+ struct kbase_clk_rate_trace_manager *clk_rtm, -+ struct kbase_clk_rate_listener *listener) -+{ -+ lockdep_assert_held(&clk_rtm->lock); -+ list_add(&listener->node, &clk_rtm->listeners); -+} ++const char *rk_dma_heap_get_name(struct rk_dma_heap *heap); + +/** -+ * kbase_clk_rate_trace_manager_subscribe() - Add freq change listener. -+ * -+ * @clk_rtm: Clock rate manager instance. -+ * @listener: Listener handle ++ * rk_dma_heap_add - adds a heap to dmabuf heaps ++ * @exp_info: information needed to register this heap + */ -+static inline void kbase_clk_rate_trace_manager_subscribe( -+ struct kbase_clk_rate_trace_manager *clk_rtm, -+ struct kbase_clk_rate_listener *listener) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&clk_rtm->lock, flags); -+ kbase_clk_rate_trace_manager_subscribe_no_lock( -+ clk_rtm, listener); -+ spin_unlock_irqrestore(&clk_rtm->lock, flags); -+} ++struct rk_dma_heap *rk_dma_heap_add(const struct rk_dma_heap_export_info *exp_info); + +/** -+ * kbase_clk_rate_trace_manager_unsubscribe() - Remove freq change listener. -+ * -+ * @clk_rtm: Clock rate manager instance. -+ * @listener: Listener handle ++ * rk_dma_heap_put - drops a reference to a dmabuf heaps, potentially freeing it ++ * @heap: heap pointer + */ -+static inline void kbase_clk_rate_trace_manager_unsubscribe( -+ struct kbase_clk_rate_trace_manager *clk_rtm, -+ struct kbase_clk_rate_listener *listener) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&clk_rtm->lock, flags); -+ list_del(&listener->node); -+ spin_unlock_irqrestore(&clk_rtm->lock, flags); -+} ++void rk_dma_heap_put(struct rk_dma_heap *heap); + +/** -+ * kbase_clk_rate_trace_manager_notify_all() - Notify all clock \ -+ * rate listeners. -+ * -+ * @clk_rtm: Clock rate manager instance. -+ * @clock_index: Clock index. -+ * @new_rate: New clock frequency(Hz) -+ * -+ * kbase_clk_rate_trace_manager:lock must be locked. -+ * This function is exported to be used by clock rate trace test -+ * portal. ++ * rk_vmap_contig_pfn - Map contiguous pfn to vm area ++ * @pfn: indicate the first pfn of contig ++ * @count: count of pfns ++ * @prot: for mapping + */ -+void kbase_clk_rate_trace_manager_notify_all( -+ struct kbase_clk_rate_trace_manager *clk_rtm, -+ u32 clock_index, -+ unsigned long new_rate); ++void *rk_vmap_contig_pfn(unsigned long pfn, unsigned int count, ++ pgprot_t prot); ++/** ++ * rk_dma_heap_total_inc - Increase total buffer size ++ * @heap: dma_heap to increase ++ * @len: length to increase ++ */ ++void rk_dma_heap_total_inc(struct rk_dma_heap *heap, size_t len); ++/** ++ * rk_dma_heap_total_dec - Decrease total buffer size ++ * @heap: dma_heap to decrease ++ * @len: length to decrease ++ */ ++void rk_dma_heap_total_dec(struct rk_dma_heap *heap, size_t len); ++/** ++ * rk_dma_heap_get_cma - get cma structure ++ */ ++struct cma *rk_dma_heap_get_cma(void); ++#endif /* _DMA_HEAPS_H */ +diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c +index f0a35277f..dcbc3edda 100644 +--- a/drivers/dma-buf/sw_sync.c ++++ b/drivers/dma-buf/sw_sync.c +@@ -7,6 +7,8 @@ + + #include + #include ++#include ++#include + #include + #include + #include +@@ -410,3 +412,13 @@ const struct file_operations sw_sync_debugfs_fops = { + .unlocked_ioctl = sw_sync_ioctl, + .compat_ioctl = compat_ptr_ioctl, + }; + -+#endif /* _KBASE_CLK_RATE_TRACE_MGR_ */ ++static struct miscdevice sw_sync_dev = { ++ .minor = MISC_DYNAMIC_MINOR, ++ .name = "sw_sync", ++ .fops = &sw_sync_debugfs_fops, ++}; + -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c ++module_misc_device(sw_sync_dev); ++ ++MODULE_LICENSE("GPL v2"); +diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig +index 110e99b86..012a4acd0 100644 +--- a/drivers/edac/Kconfig ++++ b/drivers/edac/Kconfig +@@ -561,4 +561,11 @@ config EDAC_NPCM + error detection (in-line ECC in which a section 1/8th of the memory + device used to store data is used for ECC storage). + ++config EDAC_ROCKCHIP ++ tristate "Rockchip DDR ECC" ++ depends on ARCH_ROCKCHIP && HAVE_ARM_SMCCC ++ help ++ Support for error detection and correction on the ++ rockchip family of SOCs. ++ + endif # EDAC +diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile +index 446364264..6c6f0caca 100644 +--- a/drivers/edac/Makefile ++++ b/drivers/edac/Makefile +@@ -88,3 +88,4 @@ obj-$(CONFIG_EDAC_BLUEFIELD) += bluefield_edac.o + obj-$(CONFIG_EDAC_DMC520) += dmc520_edac.o + obj-$(CONFIG_EDAC_NPCM) += npcm_edac.o + obj-$(CONFIG_EDAC_ZYNQMP) += zynqmp_edac.o ++obj-$(CONFIG_EDAC_ROCKCHIP) += rockchip_edac.o +diff --git a/drivers/edac/rockchip_edac.c b/drivers/edac/rockchip_edac.c new file mode 100644 -index 000000000..e121b417f +index 000000000..4b1317bed --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c -@@ -0,0 +1,163 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/edac/rockchip_edac.c +@@ -0,0 +1,358 @@ ++// SPDX-License-Identifier: GPL-2.0 +/* -+ * -+ * (C) COPYRIGHT 2012-2015, 2018-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * ++ * Copyright (c) 2023 Rockchip Electronics Co., Ltd. + */ + -+#include -+#include -+#include "mali_kbase_debug_job_fault.h" ++#include ++#include ++#include ++#include ++#include + -+#if IS_ENABLED(CONFIG_DEBUG_FS) ++#include "edac_module.h" + -+/*GPU_CONTROL_REG(r)*/ -+static int gpu_control_reg_snapshot[] = { -+ GPU_ID, -+ SHADER_READY_LO, -+ SHADER_READY_HI, -+ TILER_READY_LO, -+ TILER_READY_HI, -+ L2_READY_LO, -+ L2_READY_HI -+}; ++#define MAX_CS (4) + -+/* JOB_CONTROL_REG(r) */ -+static int job_control_reg_snapshot[] = { -+ JOB_IRQ_MASK, -+ JOB_IRQ_STATUS -+}; ++#define MAX_CH (1) + -+/* JOB_SLOT_REG(n,r) */ -+static int job_slot_reg_snapshot[] = { -+ JS_HEAD_LO, -+ JS_HEAD_HI, -+ JS_TAIL_LO, -+ JS_TAIL_HI, -+ JS_AFFINITY_LO, -+ JS_AFFINITY_HI, -+ JS_CONFIG, -+ JS_STATUS, -+ JS_HEAD_NEXT_LO, -+ JS_HEAD_NEXT_HI, -+ JS_AFFINITY_NEXT_LO, -+ JS_AFFINITY_NEXT_HI, -+ JS_CONFIG_NEXT ++#define RK_EDAC_MOD "1" ++ ++/* ECCCADDR0 */ ++#define ECC_CORR_RANK_SHIFT (24) ++#define ECC_CORR_RANK_MASK (0x3) ++#define ECC_CORR_ROW_MASK (0x3ffff) ++/* ECCCADDR1 */ ++#define ECC_CORR_CID_SHIFT (28) ++#define ECC_CORR_CID_MASK (0x3) ++#define ECC_CORR_BG_SHIFT (24) ++#define ECC_CORR_BG_MASK (0x3) ++#define ECC_CORR_BANK_SHIFT (16) ++#define ECC_CORR_BANK_MASK (0x7) ++#define ECC_CORR_COL_MASK (0xfff) ++/* ECCUADDR0 */ ++#define ECC_UNCORR_RANK_SHIFT (24) ++#define ECC_UNCORR_RANK_MASK (0x3) ++#define ECC_UNCORR_ROW_MASK (0x3ffff) ++/* ECCUADDR1 */ ++#define ECC_UNCORR_CID_SHIFT (28) ++#define ECC_UNCORR_CID_MASK (0x3) ++#define ECC_UNCORR_BG_SHIFT (24) ++#define ECC_UNCORR_BG_MASK (0x3) ++#define ECC_UNCORR_BANK_SHIFT (16) ++#define ECC_UNCORR_BANK_MASK (0x7) ++#define ECC_UNCORR_COL_MASK (0xfff) ++ ++/** ++ * struct ddr_ecc_error_info - DDR ECC error log information ++ * @err_cnt: error count ++ * @rank: Rank number ++ * @row: Row number ++ * @chip_id: Chip id number ++ * @bank_group: Bank Group number ++ * @bank: Bank number ++ * @col: Column number ++ * @bitpos: Bit position ++ */ ++struct ddr_ecc_error_info { ++ u32 err_cnt; ++ u32 rank; ++ u32 row; ++ u32 chip_id; ++ u32 bank_group; ++ u32 bank; ++ u32 col; ++ u32 bitpos; +}; + -+/*MMU_REG(r)*/ -+static int mmu_reg_snapshot[] = { -+ MMU_IRQ_MASK, -+ MMU_IRQ_STATUS ++/** ++ * struct ddr_ecc_status - DDR ECC status information to report ++ * @ceinfo: Correctable error log information ++ * @ueinfo: Uncorrectable error log information ++ */ ++struct ddr_ecc_status { ++ struct ddr_ecc_error_info ceinfo; ++ struct ddr_ecc_error_info ueinfo; +}; + -+/* MMU_AS_REG(n,r) */ -+static int as_reg_snapshot[] = { -+ AS_TRANSTAB_LO, -+ AS_TRANSTAB_HI, -+ AS_TRANSCFG_LO, -+ AS_TRANSCFG_HI, -+ AS_MEMATTR_LO, -+ AS_MEMATTR_HI, -+ AS_FAULTSTATUS, -+ AS_FAULTADDRESS_LO, -+ AS_FAULTADDRESS_HI, -+ AS_STATUS ++/** ++ * struct rk_edac_priv - RK DDR memory controller private instance data ++ * @name: EDAC name ++ * @stat: DDR ECC status information ++ * @ce_cnt: Correctable Error count ++ * @ue_cnt: Uncorrectable Error count ++ * @irq_ce: Corrected interrupt number ++ * @irq_ue: Uncorrected interrupt number ++ */ ++struct rk_edac_priv { ++ char *name; ++ struct ddr_ecc_status stat; ++ u32 ce_cnt; ++ u32 ue_cnt; ++ int irq_ce; ++ int irq_ue; +}; + -+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, -+ int reg_range) -+{ -+ int i, j; -+ int offset = 0; -+ int slot_number; -+ int as_number; ++static struct ddr_ecc_status *ddr_edac_info; + -+ if (kctx->reg_dump == NULL) -+ return false; ++static inline void opstate_init_int(void) ++{ ++ switch (edac_op_state) { ++ case EDAC_OPSTATE_POLL: ++ case EDAC_OPSTATE_INT: ++ break; ++ default: ++ edac_op_state = EDAC_OPSTATE_INT; ++ break; ++ } ++} + -+ slot_number = kctx->kbdev->gpu_props.num_job_slots; -+ as_number = kctx->kbdev->gpu_props.num_address_spaces; ++static void rockchip_edac_handle_ce_error(struct mem_ctl_info *mci, ++ struct ddr_ecc_status *p) ++{ ++ struct ddr_ecc_error_info *pinf; + -+ /* get the GPU control registers*/ -+ for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) { -+ kctx->reg_dump[offset] = -+ GPU_CONTROL_REG(gpu_control_reg_snapshot[i]); -+ offset += 2; ++ if (p->ceinfo.err_cnt) { ++ pinf = &p->ceinfo; ++ edac_mc_printk(mci, KERN_ERR, ++ "DDR ECC CE error: CS%d, Row 0x%x, Bg 0x%x, Bk 0x%x, Col 0x%x bit 0x%x\n", ++ pinf->rank, pinf->row, pinf->bank_group, ++ pinf->bank, pinf->col, ++ pinf->bitpos); ++ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, ++ p->ceinfo.err_cnt, 0, 0, 0, 0, 0, -1, ++ mci->ctl_name, ""); + } ++} + -+ /* get the Job control registers*/ -+ for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) { -+ kctx->reg_dump[offset] = -+ JOB_CONTROL_REG(job_control_reg_snapshot[i]); -+ offset += 2; -+ } ++static void rockchip_edac_handle_ue_error(struct mem_ctl_info *mci, ++ struct ddr_ecc_status *p) ++{ ++ struct ddr_ecc_error_info *pinf; + -+ /* get the Job Slot registers*/ -+ for (j = 0; j < slot_number; j++) { -+ for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) { -+ kctx->reg_dump[offset] = -+ JOB_SLOT_REG(j, job_slot_reg_snapshot[i]); -+ offset += 2; -+ } ++ if (p->ueinfo.err_cnt) { ++ pinf = &p->ueinfo; ++ edac_mc_printk(mci, KERN_ERR, ++ "DDR ECC UE error: CS%d, Row 0x%x, Bg 0x%x, Bk 0x%x, Col 0x%x\n", ++ pinf->rank, pinf->row, ++ pinf->bank_group, pinf->bank, pinf->col); ++ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, ++ p->ueinfo.err_cnt, 0, 0, 0, 0, 0, -1, ++ mci->ctl_name, ""); + } ++} + -+ /* get the MMU registers*/ -+ for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) { -+ kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]); -+ offset += 2; -+ } ++static int rockchip_edac_get_error_info(struct mem_ctl_info *mci) ++{ ++ struct arm_smccc_res res; + -+ /* get the Address space registers*/ -+ for (j = 0; j < as_number; j++) { -+ for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) { -+ kctx->reg_dump[offset] = -+ MMU_AS_REG(j, as_reg_snapshot[i]); -+ offset += 2; -+ } ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDRECC, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_ECC); ++ if ((res.a0) || (res.a1)) { ++ edac_mc_printk(mci, KERN_ERR, "ROCKCHIP_SIP_CONFIG_DRAM_ECC not support: 0x%lx\n", ++ res.a0); ++ return -ENXIO; + } + -+ WARN_ON(offset >= (reg_range*2/4)); ++ return 0; ++} + -+ /* set the termination flag*/ -+ kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG; -+ kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG; ++static void rockchip_edac_check(struct mem_ctl_info *mci) ++{ ++ struct rk_edac_priv *priv = mci->pvt_info; ++ int ret; + -+ dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n", -+ offset); ++ ret = rockchip_edac_get_error_info(mci); ++ if (ret) ++ return; + -+ return true; ++ priv->ce_cnt += ddr_edac_info->ceinfo.err_cnt; ++ priv->ue_cnt += ddr_edac_info->ceinfo.err_cnt; ++ rockchip_edac_handle_ce_error(mci, ddr_edac_info); ++ rockchip_edac_handle_ue_error(mci, ddr_edac_info); +} + -+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx) ++static irqreturn_t rockchip_edac_mc_ce_isr(int irq, void *dev_id) +{ -+ int offset = 0; -+ -+ if (kctx->reg_dump == NULL) -+ return false; ++ struct mem_ctl_info *mci = dev_id; ++ struct rk_edac_priv *priv = mci->pvt_info; ++ int ret; + -+ while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) { -+ kctx->reg_dump[offset+1] = -+ kbase_reg_read(kctx->kbdev, -+ kctx->reg_dump[offset]); -+ offset += 2; -+ } -+ return true; -+} ++ ret = rockchip_edac_get_error_info(mci); ++ if (ret) ++ return IRQ_NONE; + ++ priv->ce_cnt += ddr_edac_info->ceinfo.err_cnt; + -+#endif -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c -new file mode 100644 -index 000000000..e960f4602 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c -@@ -0,0 +1,741 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ * SPDX-License-Identifier: GPL-2.0 -+ * -+ */ ++ rockchip_edac_handle_ce_error(mci, ddr_edac_info); + -+#include -+#include -+#include -+#include ++ return IRQ_HANDLED; ++} + -+#include -+#include -+#include -+#include -+#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) -+#include -+#endif ++static irqreturn_t rockchip_edac_mc_ue_isr(int irq, void *dev_id) ++{ ++ struct mem_ctl_info *mci = dev_id; ++ struct rk_edac_priv *priv = mci->pvt_info; ++ int ret; + -+#include -+#include -+#include -+#include "mali_kbase_devfreq.h" ++ ret = rockchip_edac_get_error_info(mci); ++ if (ret) ++ return IRQ_NONE; + -+#include -+#include -+#include ++ priv->ue_cnt += ddr_edac_info->ueinfo.err_cnt; + -+static struct devfreq_simple_ondemand_data ondemand_data; ++ rockchip_edac_handle_ue_error(mci, ddr_edac_info); + -+static struct monitor_dev_profile mali_mdevp = { -+ .type = MONITOR_TYPE_DEV, -+ .low_temp_adjust = rockchip_monitor_dev_low_temp_adjust, -+ .high_temp_adjust = rockchip_monitor_dev_high_temp_adjust, -+ .check_rate_volt = rockchip_monitor_check_rate_volt, -+}; ++ return IRQ_HANDLED; ++} + -+/** -+ * get_voltage() - Get the voltage value corresponding to the nominal frequency -+ * used by devfreq. -+ * @kbdev: Device pointer -+ * @freq: Nominal frequency in Hz passed by devfreq. -+ * -+ * This function will be called only when the opp table which is compatible with -+ * "operating-points-v2-mali", is not present in the devicetree for GPU device. -+ * -+ * Return: Voltage value in micro volts, 0 in case of error. -+ */ -+static unsigned long get_voltage(struct kbase_device *kbdev, unsigned long freq) ++static int rockchip_edac_mc_init(struct mem_ctl_info *mci, ++ struct platform_device *pdev) +{ -+ struct dev_pm_opp *opp; -+ unsigned long voltage = 0; ++ struct rk_edac_priv *priv = mci->pvt_info; ++ struct arm_smccc_res res; ++ int ret; + -+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE -+ rcu_read_lock(); -+#endif ++ mci->pdev = &pdev->dev; ++ dev_set_drvdata(mci->pdev, mci); ++ mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4; ++ mci->edac_ctl_cap = EDAC_FLAG_SECDED; ++ mci->scrub_cap = SCRUB_NONE; ++ mci->scrub_mode = SCRUB_NONE; + -+ opp = dev_pm_opp_find_freq_exact(kbdev->dev, freq, true); ++ mci->edac_cap = EDAC_FLAG_SECDED; ++ mci->ctl_name = priv->name; ++ mci->dev_name = priv->name; ++ mci->mod_name = RK_EDAC_MOD; + -+ if (IS_ERR_OR_NULL(opp)) -+ dev_err(kbdev->dev, "Failed to get opp (%d)\n", PTR_ERR_OR_ZERO(opp)); -+ else { -+ voltage = dev_pm_opp_get_voltage(opp); -+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE -+ dev_pm_opp_put(opp); -+#endif ++ if (edac_op_state == EDAC_OPSTATE_POLL) ++ mci->edac_check = rockchip_edac_check; ++ mci->ctl_page_to_phys = NULL; ++ ++ res = sip_smc_request_share_mem(1, SHARE_PAGE_TYPE_DDRECC); ++ if (res.a0 != 0) { ++ dev_err(&pdev->dev, "no ATF memory for init, ret 0x%lx\n", res.a0); ++ return -ENOMEM; + } ++ ddr_edac_info = (struct ddr_ecc_status *)res.a1; ++ memset(ddr_edac_info, 0, sizeof(struct ddr_ecc_status)); + -+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE -+ rcu_read_unlock(); -+#endif ++ ret = rockchip_edac_get_error_info(mci); ++ if (ret) ++ return ret; + -+ /* Return the voltage in micro volts */ -+ return voltage; ++ return 0; +} + -+void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, -+ u64 *core_mask, unsigned long *freqs, unsigned long *volts) ++static int rockchip_edac_probe(struct platform_device *pdev) +{ -+ unsigned int i; ++ struct mem_ctl_info *mci; ++ struct edac_mc_layer layers[2]; ++ struct rk_edac_priv *priv; ++ int ret; + -+ for (i = 0; i < kbdev->num_opps; i++) { -+ if (kbdev->devfreq_table[i].opp_freq == freq) { -+ unsigned int j; ++ opstate_init_int(); ++ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; ++ layers[0].size = MAX_CS; ++ layers[0].is_virt_csrow = true; ++ layers[1].type = EDAC_MC_LAYER_CHANNEL; ++ layers[1].size = MAX_CH; ++ layers[1].is_virt_csrow = false; + -+ *core_mask = kbdev->devfreq_table[i].core_mask; -+ for (j = 0; j < kbdev->nr_clocks; j++) { -+ freqs[j] = -+ kbdev->devfreq_table[i].real_freqs[j]; -+ volts[j] = -+ kbdev->devfreq_table[i].opp_volts[j]; -+ } ++ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, ++ sizeof(struct rk_edac_priv)); ++ if (!mci) { ++ edac_printk(KERN_ERR, EDAC_MC, ++ "Failed memory allocation for mc instance\n"); ++ return -ENOMEM; ++ } + -+ break; -+ } ++ priv = mci->pvt_info; ++ priv->name = "rk_edac_ecc"; ++ ret = rockchip_edac_mc_init(mci, pdev); ++ if (ret) { ++ edac_printk(KERN_ERR, EDAC_MC, ++ "Failed to initialize instance\n"); ++ goto free_edac_mc; + } + -+ /* If failed to find OPP, return all cores enabled -+ * and nominal frequency and the corresponding voltage. -+ */ -+ if (i == kbdev->num_opps) { -+ unsigned long voltage = get_voltage(kbdev, freq); ++ ret = edac_mc_add_mc(mci); ++ if (ret) { ++ edac_printk(KERN_ERR, EDAC_MC, ++ "Failed edac_mc_add_mc()\n"); ++ goto free_edac_mc; ++ } + -+ *core_mask = kbdev->gpu_props.props.raw_props.shader_present; ++ if (edac_op_state == EDAC_OPSTATE_INT) { ++ /* register interrupts */ ++ priv->irq_ce = platform_get_irq_byname(pdev, "ce"); ++ ret = devm_request_irq(&pdev->dev, priv->irq_ce, ++ rockchip_edac_mc_ce_isr, ++ 0, ++ "[EDAC] MC err", mci); ++ if (ret < 0) { ++ edac_printk(KERN_ERR, EDAC_MC, ++ "%s: Unable to request ce irq %d for RK EDAC\n", ++ __func__, priv->irq_ce); ++ goto del_mc; ++ } + -+ for (i = 0; i < kbdev->nr_clocks; i++) { -+ freqs[i] = freq; -+ volts[i] = voltage; ++ edac_printk(KERN_INFO, EDAC_MC, ++ "acquired ce irq %d for MC\n", ++ priv->irq_ce); ++ ++ priv->irq_ue = platform_get_irq_byname(pdev, "ue"); ++ ret = devm_request_irq(&pdev->dev, priv->irq_ue, ++ rockchip_edac_mc_ue_isr, ++ 0, ++ "[EDAC] MC err", mci); ++ if (ret < 0) { ++ edac_printk(KERN_ERR, EDAC_MC, ++ "%s: Unable to request ue irq %d for RK EDAC\n", ++ __func__, priv->irq_ue); ++ goto del_mc; + } ++ ++ edac_printk(KERN_INFO, EDAC_MC, ++ "acquired ue irq %d for MC\n", ++ priv->irq_ue); + } ++ ++ return 0; ++ ++del_mc: ++ edac_mc_del_mc(&pdev->dev); ++free_edac_mc: ++ edac_mc_free(mci); ++ ++ return -ENODEV; +} + -+static int kbase_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) ++static int rockchip_edac_remove(struct platform_device *pdev) +{ -+ struct kbase_device *kbdev = dev_get_drvdata(dev); -+ struct rockchip_opp_info *opp_info = &kbdev->opp_info; -+ struct dev_pm_opp *opp; -+ int ret = 0; ++ struct mem_ctl_info *mci = dev_get_drvdata(&pdev->dev); + -+ if (!opp_info->is_rate_volt_checked) -+ return -EINVAL; ++ edac_mc_del_mc(&pdev->dev); ++ edac_mc_free(mci); + -+ opp = devfreq_recommended_opp(dev, freq, flags); -+ if (IS_ERR(opp)) -+ return PTR_ERR(opp); -+ dev_pm_opp_put(opp); ++ return 0; ++} + -+ if (*freq == kbdev->current_nominal_freq) -+ return 0; ++static const struct of_device_id rk_ddr_mc_err_of_match[] = { ++ { .compatible = "rockchip,rk3568-edac", }, ++ {}, ++}; ++MODULE_DEVICE_TABLE(of, rk_ddr_mc_err_of_match); + -+ rockchip_opp_dvfs_lock(opp_info); -+ if (pm_runtime_active(dev)) -+ opp_info->is_runtime_active = true; -+ else -+ opp_info->is_runtime_active = false; -+ ret = dev_pm_opp_set_rate(dev, *freq); -+ if (!ret) { -+ kbdev->current_nominal_freq = *freq; -+ KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(kbdev, (u64)*freq); -+ } -+ rockchip_opp_dvfs_unlock(opp_info); ++static struct platform_driver rockchip_edac_driver = { ++ .probe = rockchip_edac_probe, ++ .remove = rockchip_edac_remove, ++ .driver = { ++ .name = "rk_edac", ++ .of_match_table = rk_ddr_mc_err_of_match, ++ }, ++}; ++module_platform_driver(rockchip_edac_driver); + -+ return ret; -+} ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("He Zhihuan \n"); ++MODULE_DESCRIPTION("ROCKCHIP EDAC kernel module"); +diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c +index e7f55c021..fc0db7790 100644 +--- a/drivers/extcon/extcon.c ++++ b/drivers/extcon/extcon.c +@@ -52,6 +52,11 @@ static const struct __extcon_info { + .id = EXTCON_USB_HOST, + .name = "USB-HOST", + }, ++ [EXTCON_USB_VBUS_EN] = { ++ .type = EXTCON_TYPE_USB, ++ .id = EXTCON_USB_VBUS_EN, ++ .name = "USB_VBUS_EN", ++ }, + + /* Charging external connector */ + [EXTCON_CHG_USB_SDP] = { +diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig +index 8ecffdce9..4f34b223d 100644 +--- a/drivers/firmware/Kconfig ++++ b/drivers/firmware/Kconfig +@@ -225,6 +225,13 @@ config QCOM_SCM_DOWNLOAD_MODE_DEFAULT + + Say Y here to enable "download mode" by default. + ++config ROCKCHIP_SIP ++ tristate "Rockchip SIP interface" ++ depends on HAVE_ARM_SMCCC && ARCH_ROCKCHIP ++ help ++ Say Y here if you want to enable SIP callbacks for Rockchip platforms ++ This option enables support for communicating with the ATF. + -+void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq) -+{ -+ unsigned long target_freq = freq; + config SYSFB + bool + select BOOT_VESA_SUPPORT +diff --git a/drivers/firmware/Kconfig.orig b/drivers/firmware/Kconfig.orig +new file mode 100644 +index 000000000..8ecffdce9 +--- /dev/null ++++ b/drivers/firmware/Kconfig.orig +@@ -0,0 +1,318 @@ ++# SPDX-License-Identifier: GPL-2.0-only ++# ++# For a description of the syntax of this configuration file, ++# see Documentation/kbuild/kconfig-language.rst. ++# + -+ kbase_devfreq_target(kbdev->dev, &target_freq, 0); -+} ++menu "Firmware Drivers" + -+static int -+kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq) -+{ -+ struct kbase_device *kbdev = dev_get_drvdata(dev); ++source "drivers/firmware/arm_scmi/Kconfig" + -+ *freq = kbdev->current_nominal_freq; ++config ARM_SCPI_PROTOCOL ++ tristate "ARM System Control and Power Interface (SCPI) Message Protocol" ++ depends on ARM || ARM64 || COMPILE_TEST ++ depends on MAILBOX ++ help ++ System Control and Power Interface (SCPI) Message Protocol is ++ defined for the purpose of communication between the Application ++ Cores(AP) and the System Control Processor(SCP). The MHU peripheral ++ provides a mechanism for inter-processor communication between SCP ++ and AP. ++ ++ SCP controls most of the power management on the Application ++ Processors. It offers control and management of: the core/cluster ++ power states, various power domain DVFS including the core/cluster, ++ certain system clocks configuration, thermal sensors and many ++ others. ++ ++ This protocol library provides interface for all the client drivers ++ making use of the features offered by the SCP. ++ ++config ARM_SCPI_POWER_DOMAIN ++ tristate "SCPI power domain driver" ++ depends on ARM_SCPI_PROTOCOL || (COMPILE_TEST && OF) ++ default y ++ select PM_GENERIC_DOMAINS if PM ++ help ++ This enables support for the SCPI power domains which can be ++ enabled or disabled via the SCP firmware + -+ return 0; -+} ++config ARM_SDE_INTERFACE ++ bool "ARM Software Delegated Exception Interface (SDEI)" ++ depends on ARM64 ++ help ++ The Software Delegated Exception Interface (SDEI) is an ARM ++ standard for registering callbacks from the platform firmware ++ into the OS. This is typically used to implement RAS notifications. + -+static int -+kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) -+{ -+ struct kbase_device *kbdev = dev_get_drvdata(dev); -+ struct kbasep_pm_metrics diff; ++config EDD ++ tristate "BIOS Enhanced Disk Drive calls determine boot disk" ++ depends on X86 ++ help ++ Say Y or M here if you want to enable BIOS Enhanced Disk Drive ++ Services real mode BIOS calls to determine which disk ++ BIOS tries boot from. This information is then exported via sysfs. + -+ kbase_pm_get_dvfs_metrics(kbdev, &kbdev->last_devfreq_metrics, &diff); ++ This option is experimental and is known to fail to boot on some ++ obscure configurations. Most disk controller BIOS vendors do ++ not yet implement this feature. + -+ stat->busy_time = diff.time_busy; -+ stat->total_time = diff.time_busy + diff.time_idle; -+ stat->current_frequency = kbdev->current_nominal_freq; -+ stat->private_data = NULL; ++config EDD_OFF ++ bool "Sets default behavior for EDD detection to off" ++ depends on EDD ++ default n ++ help ++ Say Y if you want EDD disabled by default, even though it is compiled into the ++ kernel. Say N if you want EDD enabled by default. EDD can be dynamically set ++ using the kernel parameter 'edd={on|skipmbr|off}'. ++ ++config FIRMWARE_MEMMAP ++ bool "Add firmware-provided memory map to sysfs" if EXPERT ++ default X86 ++ help ++ Add the firmware-provided (unmodified) memory map to /sys/firmware/memmap. ++ That memory map is used for example by kexec to set up parameter area ++ for the next kernel, but can also be used for debugging purposes. ++ ++ See also Documentation/ABI/testing/sysfs-firmware-memmap. ++ ++config EFI_PCDP ++ bool "Console device selection via EFI PCDP or HCDP table" ++ depends on ACPI && EFI && IA64 ++ default y if IA64 ++ help ++ If your firmware supplies the PCDP table, and you want to ++ automatically use the primary console device it describes ++ as the Linux console, say Y here. + -+#if MALI_USE_CSF && defined CONFIG_DEVFREQ_THERMAL -+ if (!kbdev->devfreq_profile.is_cooling_device) -+ kbase_ipa_reset_data(kbdev); -+#endif ++ If your firmware supplies the HCDP table, and you want to ++ use the first serial port it describes as the Linux console, ++ say Y here. If your EFI ConOut path contains only a UART ++ device, it will become the console automatically. Otherwise, ++ you must specify the "console=hcdp" kernel boot argument. + -+ return 0; -+} ++ Neither the PCDP nor the HCDP affects naming of serial devices, ++ so a serial console may be /dev/ttyS0, /dev/ttyS1, etc, depending ++ on how the driver discovers devices. + -+static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, -+ struct devfreq_dev_profile *dp) -+{ -+ int count; -+ int i = 0; -+ unsigned long freq; -+ struct dev_pm_opp *opp; ++ You must also enable the appropriate drivers (serial, VGA, etc.) + -+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE -+ rcu_read_lock(); -+#endif -+ count = dev_pm_opp_get_opp_count(kbdev->dev); -+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE -+ rcu_read_unlock(); -+#endif -+ if (count < 0) -+ return count; ++ See DIG64_HCDPv20_042804.pdf available from ++ + -+ dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]), -+ GFP_KERNEL); -+ if (!dp->freq_table) -+ return -ENOMEM; ++config DMIID ++ bool "Export DMI identification via sysfs to userspace" ++ depends on DMI ++ default y ++ help ++ Say Y here if you want to query SMBIOS/DMI system identification ++ information from userspace through /sys/class/dmi/id/ or if you want ++ DMI-based module auto-loading. + -+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE -+ rcu_read_lock(); -+#endif -+ for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) { -+ opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq); -+ if (IS_ERR(opp)) -+ break; -+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE -+ dev_pm_opp_put(opp); -+#endif /* KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE */ ++config DMI_SYSFS ++ tristate "DMI table support in sysfs" ++ depends on SYSFS && DMI ++ default n ++ help ++ Say Y or M here to enable the exporting of the raw DMI table ++ data via sysfs. This is useful for consuming the data without ++ requiring any access to /dev/mem at all. Tables are found ++ under /sys/firmware/dmi when this option is enabled and ++ loaded. + -+ dp->freq_table[i] = freq; -+ } -+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE -+ rcu_read_unlock(); -+#endif ++config DMI_SCAN_MACHINE_NON_EFI_FALLBACK ++ bool + -+ if (count != i) -+ dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n", -+ count, i); ++config ISCSI_IBFT_FIND ++ bool "iSCSI Boot Firmware Table Attributes" ++ depends on X86 && ISCSI_IBFT ++ default n ++ help ++ This option enables the kernel to find the region of memory ++ in which the ISCSI Boot Firmware Table (iBFT) resides. This ++ is necessary for iSCSI Boot Firmware Table Attributes module to work ++ properly. ++ ++config ISCSI_IBFT ++ tristate "iSCSI Boot Firmware Table Attributes module" ++ select ISCSI_BOOT_SYSFS ++ select ISCSI_IBFT_FIND if X86 ++ depends on ACPI && SCSI && SCSI_LOWLEVEL ++ default n ++ help ++ This option enables support for detection and exposing of iSCSI ++ Boot Firmware Table (iBFT) via sysfs to userspace. If you wish to ++ detect iSCSI boot parameters dynamically during system boot, say Y. ++ Otherwise, say N. ++ ++config RASPBERRYPI_FIRMWARE ++ tristate "Raspberry Pi Firmware Driver" ++ depends on BCM2835_MBOX ++ help ++ This option enables support for communicating with the firmware on the ++ Raspberry Pi. + -+ dp->max_state = i; ++config FW_CFG_SYSFS ++ tristate "QEMU fw_cfg device support in sysfs" ++ depends on SYSFS && (ARM || ARM64 || PARISC || PPC_PMAC || SPARC || X86) ++ depends on HAS_IOPORT_MAP ++ default n ++ help ++ Say Y or M here to enable the exporting of the QEMU firmware ++ configuration (fw_cfg) file entries via sysfs. Entries are ++ found under /sys/firmware/fw_cfg when this option is enabled ++ and loaded. ++ ++config FW_CFG_SYSFS_CMDLINE ++ bool "QEMU fw_cfg device parameter parsing" ++ depends on FW_CFG_SYSFS ++ help ++ Allow the qemu_fw_cfg device to be initialized via the kernel ++ command line or using a module parameter. ++ WARNING: Using incorrect parameters (base address in particular) ++ may crash your system. ++ ++config INTEL_STRATIX10_SERVICE ++ tristate "Intel Stratix10 Service Layer" ++ depends on ARCH_INTEL_SOCFPGA && ARM64 && HAVE_ARM_SMCCC ++ default n ++ help ++ Intel Stratix10 service layer runs at privileged exception level, ++ interfaces with the service providers (FPGA manager is one of them) ++ and manages secure monitor call to communicate with secure monitor ++ software at secure monitor exception level. + ++ Say Y here if you want Stratix10 service layer support. + -+ /* Have the lowest clock as suspend clock. -+ * It may be overridden by 'opp-mali-errata-1485982'. -+ */ -+ if (kbdev->pm.backend.gpu_clock_slow_down_wa) { -+ freq = 0; -+ opp = dev_pm_opp_find_freq_ceil(kbdev->dev, &freq); -+ if (IS_ERR(opp)) { -+ dev_err(kbdev->dev, "failed to find slowest clock"); -+ return 0; -+ } -+ dev_pm_opp_put(opp); -+ dev_info(kbdev->dev, "suspend clock %lu from slowest", freq); -+ kbdev->pm.backend.gpu_clock_suspend_freq = freq; -+ } ++config INTEL_STRATIX10_RSU ++ tristate "Intel Stratix10 Remote System Update" ++ depends on INTEL_STRATIX10_SERVICE ++ help ++ The Intel Remote System Update (RSU) driver exposes interfaces ++ access through the Intel Service Layer to user space via sysfs ++ device attribute nodes. The RSU interfaces report/control some of ++ the optional RSU features of the Stratix 10 SoC FPGA. + -+ return 0; -+} ++ The RSU provides a way for customers to update the boot ++ configuration of a Stratix 10 SoC device with significantly reduced ++ risk of corrupting the bitstream storage and bricking the system. + -+static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev) ++ Enable RSU support if you are using an Intel SoC FPGA with the RSU ++ feature enabled and you want Linux user space control. ++ ++ Say Y here if you want Intel RSU support. ++ ++config MTK_ADSP_IPC ++ tristate "MTK ADSP IPC Protocol driver" ++ depends on MTK_ADSP_MBOX ++ help ++ Say yes here to add support for the MediaTek ADSP IPC ++ between host AP (Linux) and the firmware running on ADSP. ++ ADSP exists on some mtk processors. ++ Client might use shared memory to exchange information with ADSP. ++ ++config QCOM_SCM ++ tristate ++ ++config QCOM_SCM_DOWNLOAD_MODE_DEFAULT ++ bool "Qualcomm download mode enabled by default" ++ depends on QCOM_SCM ++ help ++ A device with "download mode" enabled will upon an unexpected ++ warm-restart enter a special debug mode that allows the user to ++ "download" memory content over USB for offline postmortem analysis. ++ The feature can be enabled/disabled on the kernel command line. ++ ++ Say Y here to enable "download mode" by default. ++ ++config SYSFB ++ bool ++ select BOOT_VESA_SUPPORT ++ select SCREEN_INFO ++ ++config SYSFB_SIMPLEFB ++ bool "Mark VGA/VBE/EFI FB as generic system framebuffer" ++ depends on X86 || EFI ++ select SYSFB ++ help ++ Firmwares often provide initial graphics framebuffers so the BIOS, ++ bootloader or kernel can show basic video-output during boot for ++ user-guidance and debugging. Historically, x86 used the VESA BIOS ++ Extensions and EFI-framebuffers for this, which are mostly limited ++ to x86 BIOS or EFI systems. ++ This option, if enabled, marks VGA/VBE/EFI framebuffers as generic ++ framebuffers so the new generic system-framebuffer drivers can be ++ used instead. If the framebuffer is not compatible with the generic ++ modes, it is advertised as fallback platform framebuffer so legacy ++ drivers like efifb, vesafb and uvesafb can pick it up. ++ If this option is not selected, all system framebuffers are always ++ marked as fallback platform framebuffers as usual. ++ ++ Note: Legacy fbdev drivers, including vesafb, efifb, uvesafb, will ++ not be able to pick up generic system framebuffers if this option ++ is selected. You are highly encouraged to enable simplefb as ++ replacement if you select this option. simplefb can correctly deal ++ with generic system framebuffers. But you should still keep vesafb ++ and others enabled as fallback if a system framebuffer is ++ incompatible with simplefb. ++ ++ If unsure, say Y. ++ ++config TI_SCI_PROTOCOL ++ tristate "TI System Control Interface (TISCI) Message Protocol" ++ depends on TI_MESSAGE_MANAGER ++ help ++ TI System Control Interface (TISCI) Message Protocol is used to manage ++ compute systems such as ARM, DSP etc with the system controller in ++ complex System on Chip(SoC) such as those found on certain keystone ++ generation SoC from TI. ++ ++ System controller provides various facilities including power ++ management function support. ++ ++ This protocol library is used by client drivers to use the features ++ provided by the system controller. ++ ++config TRUSTED_FOUNDATIONS ++ bool "Trusted Foundations secure monitor support" ++ depends on ARM && CPU_V7 ++ help ++ Some devices (including most early Tegra-based consumer devices on ++ the market) are booted with the Trusted Foundations secure monitor ++ active, requiring some core operations to be performed by the secure ++ monitor instead of the kernel. ++ ++ This option allows the kernel to invoke the secure monitor whenever ++ required on devices using Trusted Foundations. See the functions and ++ comments in linux/firmware/trusted_foundations.h or the device tree ++ bindings for "tlm,trusted-foundations" for details on how to use it. ++ ++ Choose N if you don't know what this is about. ++ ++config TURRIS_MOX_RWTM ++ tristate "Turris Mox rWTM secure firmware driver" ++ depends on ARCH_MVEBU || COMPILE_TEST ++ depends on HAS_DMA && OF ++ depends on MAILBOX ++ select HW_RANDOM ++ select ARMADA_37XX_RWTM_MBOX ++ help ++ This driver communicates with the firmware on the Cortex-M3 secure ++ processor of the Turris Mox router. Enable if you are building for ++ Turris Mox, and you will be able to read the device serial number and ++ other manufacturing data and also utilize the Entropy Bit Generator ++ for hardware random number generation. ++ ++source "drivers/firmware/arm_ffa/Kconfig" ++source "drivers/firmware/broadcom/Kconfig" ++source "drivers/firmware/cirrus/Kconfig" ++source "drivers/firmware/google/Kconfig" ++source "drivers/firmware/efi/Kconfig" ++source "drivers/firmware/imx/Kconfig" ++source "drivers/firmware/meson/Kconfig" ++source "drivers/firmware/psci/Kconfig" ++source "drivers/firmware/smccc/Kconfig" ++source "drivers/firmware/tegra/Kconfig" ++source "drivers/firmware/xilinx/Kconfig" ++ ++endmenu +diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile +index 28fcddcd6..f91d48e6e 100644 +--- a/drivers/firmware/Makefile ++++ b/drivers/firmware/Makefile +@@ -17,6 +17,7 @@ obj-$(CONFIG_ISCSI_IBFT) += iscsi_ibft.o + obj-$(CONFIG_FIRMWARE_MEMMAP) += memmap.o + obj-$(CONFIG_MTK_ADSP_IPC) += mtk-adsp-ipc.o + obj-$(CONFIG_RASPBERRYPI_FIRMWARE) += raspberrypi.o ++obj-$(CONFIG_ROCKCHIP_SIP) += rockchip_sip.o + obj-$(CONFIG_FW_CFG_SYSFS) += qemu_fw_cfg.o + obj-$(CONFIG_QCOM_SCM) += qcom-scm.o + qcom-scm-objs += qcom_scm.o qcom_scm-smc.o qcom_scm-legacy.o +diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c +index 71e2a9a89..0f4f27613 100644 +--- a/drivers/firmware/arm_sdei.c ++++ b/drivers/firmware/arm_sdei.c +@@ -1119,3 +1119,83 @@ void sdei_handler_abort(void) + __this_cpu_write(sdei_active_normal_event, NULL); + } + } ++ ++#ifdef CONFIG_FIQ_DEBUGGER_TRUST_ZONE ++int sdei_event_enable_nolock(u32 event_num) +{ -+ struct devfreq_dev_profile *dp = &kbdev->devfreq_profile; ++ return sdei_api_event_enable(event_num); ++} + -+ kfree(dp->freq_table); -+ dp->freq_table = NULL; ++int sdei_event_disable_nolock(u32 event_num) ++{ ++ return sdei_api_event_disable(event_num); +} + -+static void kbase_devfreq_term_core_mask_table(struct kbase_device *kbdev) ++int sdei_event_routing_set_nolock(u32 event_num, unsigned long flags, ++ unsigned long affinity) +{ -+ kfree(kbdev->devfreq_table); -+ kbdev->devfreq_table = NULL; ++ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_ROUTING_SET, event_num, ++ (unsigned long)flags, (unsigned long)affinity, ++ 0, 0, 0); +} + -+static void kbase_devfreq_exit(struct device *dev) ++int sdei_event_routing_set(u32 event_num, unsigned long flags, ++ unsigned long affinity) +{ -+ struct kbase_device *kbdev = dev_get_drvdata(dev); ++ int err = -EINVAL; ++ struct sdei_event *event; + -+ if (kbdev) -+ kbase_devfreq_term_freq_table(kbdev); ++ mutex_lock(&sdei_events_lock); ++ event = sdei_event_find(event_num); ++ if (!event) { ++ mutex_unlock(&sdei_events_lock); ++ return -ENOENT; ++ } ++ ++ err = sdei_event_routing_set_nolock(event_num, flags, affinity); ++ mutex_unlock(&sdei_events_lock); ++ ++ return err; +} + -+static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev, -+ struct device_node *node) ++static int sdei_api_interrupt_bind(u32 intr_num, u64 *result) +{ -+ u64 freq = 0; -+ int err = 0; ++ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_INTERRUPT_BIND, intr_num, 0, 0, 0, ++ 0, result); ++} + -+ /* Check if this node is the opp entry having 'opp-mali-errata-1485982' -+ * to get the suspend clock, otherwise skip it. -+ */ -+ if (!of_property_read_bool(node, "opp-mali-errata-1485982")) -+ return; ++int sdei_interrupt_bind(u32 intr_num, u32 *event_num) ++{ ++ int err; ++ u64 result; + -+ /* In kbase DevFreq, the clock will be read from 'opp-hz' -+ * and translated into the actual clock by opp_translate. -+ * -+ * In customer DVFS, the clock will be read from 'opp-hz-real' -+ * for clk driver. If 'opp-hz-real' does not exist, -+ * read from 'opp-hz'. -+ */ -+ if (IS_ENABLED(CONFIG_MALI_BIFROST_DEVFREQ)) -+ err = of_property_read_u64(node, "opp-hz", &freq); -+ else { -+ if (of_property_read_u64(node, "opp-hz-real", &freq)) -+ err = of_property_read_u64(node, "opp-hz", &freq); -+ } ++ err = sdei_api_interrupt_bind(intr_num, &result); ++ if (!err) ++ *event_num = (u32)result; + -+ if (WARN_ON(err || !freq)) -+ return; ++ return err; ++} + -+ kbdev->pm.backend.gpu_clock_suspend_freq = freq; -+ dev_info(kbdev->dev, -+ "suspend clock %llu by opp-mali-errata-1485982", freq); ++static int sdei_api_interrupt_release(u32 event_num) ++{ ++ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_INTERRUPT_RELEASE, event_num, 0, 0, 0, ++ 0, NULL); +} + -+static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) ++int sdei_interrupt_release(u32 event_num) +{ -+#ifndef CONFIG_OF -+ /* OPP table initialization requires at least the capability to get -+ * regulators and clocks from the device tree, as well as parsing -+ * arrays of unsigned integer values. -+ * -+ * The whole initialization process shall simply be skipped if the -+ * minimum capability is not available. -+ */ -+ return 0; -+#else -+ struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node, -+ "operating-points-v2", 0); -+ struct device_node *node; -+ int i = 0; -+ int count; -+ u64 shader_present = kbdev->gpu_props.props.raw_props.shader_present; ++ struct sdei_event *event; + -+ if (!opp_node) -+ return 0; -+ if (!of_device_is_compatible(opp_node, "operating-points-v2-mali")) -+ return 0; ++ mutex_lock(&sdei_events_lock); ++ event = sdei_event_find(event_num); ++ mutex_unlock(&sdei_events_lock); + -+ count = dev_pm_opp_get_opp_count(kbdev->dev); -+ kbdev->devfreq_table = kmalloc_array(count, -+ sizeof(struct kbase_devfreq_opp), GFP_KERNEL); -+ if (!kbdev->devfreq_table) -+ return -ENOMEM; ++ if (event) { ++ pr_err("%s: need unregister event:%d before release\n", ++ __func__, event_num); ++ return SDEI_DENIED; ++ } + -+ for_each_available_child_of_node(opp_node, node) { -+ const void *core_count_p; -+ u64 core_mask, opp_freq, -+ real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; -+ int err; -+#if IS_ENABLED(CONFIG_REGULATOR) -+ u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ return sdei_api_interrupt_release(event_num); ++} +#endif +diff --git a/drivers/firmware/arm_sdei.c.orig b/drivers/firmware/arm_sdei.c.orig +new file mode 100644 +index 000000000..71e2a9a89 +--- /dev/null ++++ b/drivers/firmware/arm_sdei.c.orig +@@ -0,0 +1,1121 @@ ++// SPDX-License-Identifier: GPL-2.0 ++// Copyright (C) 2017 Arm Ltd. ++#define pr_fmt(fmt) "sdei: " fmt + -+ /* Read suspend clock from opp table */ -+ if (kbdev->pm.backend.gpu_clock_slow_down_wa) -+ kbasep_devfreq_read_suspend_clock(kbdev, node); ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ err = of_property_read_u64(node, "opp-hz", &opp_freq); -+ if (err) { -+ dev_warn(kbdev->dev, "Failed to read opp-hz property with error %d\n", -+ err); -+ continue; -+ } ++/* ++ * The call to use to reach the firmware. ++ */ ++static asmlinkage void (*sdei_firmware_call)(unsigned long function_id, ++ unsigned long arg0, unsigned long arg1, ++ unsigned long arg2, unsigned long arg3, ++ unsigned long arg4, struct arm_smccc_res *res); + ++/* entry point from firmware to arch asm code */ ++static unsigned long sdei_entry_point; + -+#if BASE_MAX_NR_CLOCKS_REGULATORS > 1 -+ err = of_property_read_u64_array(node, "opp-hz-real", -+ real_freqs, kbdev->nr_clocks); -+#else -+ WARN_ON(kbdev->nr_clocks != 1); -+ err = of_property_read_u64(node, "opp-hz-real", real_freqs); -+#endif -+ if (err < 0) { -+ dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d\n", -+ err); -+ continue; -+ } -+#if IS_ENABLED(CONFIG_REGULATOR) -+ err = of_property_read_u32_array(node, -+ "opp-microvolt", opp_volts, kbdev->nr_regulators); -+ if (err < 0) { -+ dev_warn(kbdev->dev, "Failed to read opp-microvolt property with error %d\n", -+ err); -+ continue; -+ } -+#endif ++static int sdei_hp_state; + -+ if (of_property_read_u64(node, "opp-core-mask", &core_mask)) -+ core_mask = shader_present; -+ if (core_mask != shader_present && corestack_driver_control) { ++struct sdei_event { ++ /* These three are protected by the sdei_list_lock */ ++ struct list_head list; ++ bool reregister; ++ bool reenable; + -+ dev_warn(kbdev->dev, "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n", -+ opp_freq); -+ continue; -+ } ++ u32 event_num; ++ u8 type; ++ u8 priority; + -+ core_count_p = of_get_property(node, "opp-core-count", NULL); -+ if (core_count_p) { -+ u64 remaining_core_mask = -+ kbdev->gpu_props.props.raw_props.shader_present; -+ int core_count = be32_to_cpup(core_count_p); ++ /* This pointer is handed to firmware as the event argument. */ ++ union { ++ /* Shared events */ ++ struct sdei_registered_event *registered; + -+ core_mask = 0; ++ /* CPU private events */ ++ struct sdei_registered_event __percpu *private_registered; ++ }; ++}; + -+ for (; core_count > 0; core_count--) { -+ int core = ffs(remaining_core_mask); ++/* Take the mutex for any API call or modification. Take the mutex first. */ ++static DEFINE_MUTEX(sdei_events_lock); + -+ if (!core) { -+ dev_err(kbdev->dev, "OPP has more cores than GPU\n"); -+ return -ENODEV; -+ } ++/* and then hold this when modifying the list */ ++static DEFINE_SPINLOCK(sdei_list_lock); ++static LIST_HEAD(sdei_list); + -+ core_mask |= (1ull << (core-1)); -+ remaining_core_mask &= ~(1ull << (core-1)); -+ } -+ } ++/* Private events are registered/enabled via IPI passing one of these */ ++struct sdei_crosscall_args { ++ struct sdei_event *event; ++ atomic_t errors; ++ int first_error; ++}; + -+ if (!core_mask) { -+ dev_err(kbdev->dev, "OPP has invalid core mask of 0\n"); -+ return -ENODEV; -+ } ++#define CROSSCALL_INIT(arg, event) \ ++ do { \ ++ arg.event = event; \ ++ arg.first_error = 0; \ ++ atomic_set(&arg.errors, 0); \ ++ } while (0) + -+ kbdev->devfreq_table[i].opp_freq = opp_freq; -+ kbdev->devfreq_table[i].core_mask = core_mask; -+ if (kbdev->nr_clocks > 0) { -+ int j; ++static inline int sdei_do_local_call(smp_call_func_t fn, ++ struct sdei_event *event) ++{ ++ struct sdei_crosscall_args arg; + -+ for (j = 0; j < kbdev->nr_clocks; j++) -+ kbdev->devfreq_table[i].real_freqs[j] = -+ real_freqs[j]; -+ } -+#if IS_ENABLED(CONFIG_REGULATOR) -+ if (kbdev->nr_regulators > 0) { -+ int j; ++ CROSSCALL_INIT(arg, event); ++ fn(&arg); + -+ for (j = 0; j < kbdev->nr_regulators; j++) -+ kbdev->devfreq_table[i].opp_volts[j] = -+ opp_volts[j]; -+ } -+#endif ++ return arg.first_error; ++} + -+ dev_info(kbdev->dev, "OPP %d : opp_freq=%llu core_mask=%llx\n", -+ i, opp_freq, core_mask); ++static inline int sdei_do_cross_call(smp_call_func_t fn, ++ struct sdei_event *event) ++{ ++ struct sdei_crosscall_args arg; + -+ i++; -+ } ++ CROSSCALL_INIT(arg, event); ++ on_each_cpu(fn, &arg, true); + -+ kbdev->num_opps = i; ++ return arg.first_error; ++} ++ ++static inline void ++sdei_cross_call_return(struct sdei_crosscall_args *arg, int err) ++{ ++ if (err && (atomic_inc_return(&arg->errors) == 1)) ++ arg->first_error = err; ++} ++ ++static int sdei_to_linux_errno(unsigned long sdei_err) ++{ ++ switch (sdei_err) { ++ case SDEI_NOT_SUPPORTED: ++ return -EOPNOTSUPP; ++ case SDEI_INVALID_PARAMETERS: ++ return -EINVAL; ++ case SDEI_DENIED: ++ return -EPERM; ++ case SDEI_PENDING: ++ return -EINPROGRESS; ++ case SDEI_OUT_OF_RESOURCE: ++ return -ENOMEM; ++ } + + return 0; -+#endif /* CONFIG_OF */ +} + -+static const char *kbase_devfreq_req_type_name(enum kbase_devfreq_work_type type) ++static int invoke_sdei_fn(unsigned long function_id, unsigned long arg0, ++ unsigned long arg1, unsigned long arg2, ++ unsigned long arg3, unsigned long arg4, ++ u64 *result) +{ -+ const char *p; ++ int err; ++ struct arm_smccc_res res; + -+ switch (type) { -+ case DEVFREQ_WORK_NONE: -+ p = "devfreq_none"; -+ break; -+ case DEVFREQ_WORK_SUSPEND: -+ p = "devfreq_suspend"; -+ break; -+ case DEVFREQ_WORK_RESUME: -+ p = "devfreq_resume"; -+ break; -+ default: -+ p = "Unknown devfreq_type"; ++ if (sdei_firmware_call) { ++ sdei_firmware_call(function_id, arg0, arg1, arg2, arg3, arg4, ++ &res); ++ err = sdei_to_linux_errno(res.a0); ++ } else { ++ /* ++ * !sdei_firmware_call means we failed to probe or called ++ * sdei_mark_interface_broken(). -EIO is not an error returned ++ * by sdei_to_linux_errno() and is used to suppress messages ++ * from this driver. ++ */ ++ err = -EIO; ++ res.a0 = SDEI_NOT_SUPPORTED; + } -+ return p; ++ ++ if (result) ++ *result = res.a0; ++ ++ return err; +} ++NOKPROBE_SYMBOL(invoke_sdei_fn); + -+static void kbase_devfreq_suspend_resume_worker(struct work_struct *work) ++static struct sdei_event *sdei_event_find(u32 event_num) +{ -+ struct kbase_devfreq_queue_info *info = container_of(work, -+ struct kbase_devfreq_queue_info, work); -+ struct kbase_device *kbdev = container_of(info, struct kbase_device, -+ devfreq_queue); -+ unsigned long flags; -+ enum kbase_devfreq_work_type type, acted_type; ++ struct sdei_event *e, *found = NULL; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ type = kbdev->devfreq_queue.req_type; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ lockdep_assert_held(&sdei_events_lock); + -+ acted_type = kbdev->devfreq_queue.acted_type; -+ dev_dbg(kbdev->dev, "Worker handles queued req: %s (acted: %s)\n", -+ kbase_devfreq_req_type_name(type), -+ kbase_devfreq_req_type_name(acted_type)); -+ switch (type) { -+ case DEVFREQ_WORK_SUSPEND: -+ case DEVFREQ_WORK_RESUME: -+ if (type != acted_type) { -+ if (type == DEVFREQ_WORK_RESUME) -+ devfreq_resume_device(kbdev->devfreq); -+ else -+ devfreq_suspend_device(kbdev->devfreq); -+ dev_dbg(kbdev->dev, "Devfreq transition occured: %s => %s\n", -+ kbase_devfreq_req_type_name(acted_type), -+ kbase_devfreq_req_type_name(type)); -+ kbdev->devfreq_queue.acted_type = type; ++ spin_lock(&sdei_list_lock); ++ list_for_each_entry(e, &sdei_list, list) { ++ if (e->event_num == event_num) { ++ found = e; ++ break; + } -+ break; -+ default: -+ WARN_ON(1); + } ++ spin_unlock(&sdei_list_lock); ++ ++ return found; +} + -+void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, -+ enum kbase_devfreq_work_type work_type) ++int sdei_api_event_context(u32 query, u64 *result) +{ -+ unsigned long flags; ++ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_CONTEXT, query, 0, 0, 0, 0, ++ result); ++} ++NOKPROBE_SYMBOL(sdei_api_event_context); + -+ WARN_ON(work_type == DEVFREQ_WORK_NONE); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ /* Skip enqueuing a work if workqueue has already been terminated. */ -+ if (likely(kbdev->devfreq_queue.workq)) { -+ kbdev->devfreq_queue.req_type = work_type; -+ queue_work(kbdev->devfreq_queue.workq, -+ &kbdev->devfreq_queue.work); -+ } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ dev_dbg(kbdev->dev, "Enqueuing devfreq req: %s\n", -+ kbase_devfreq_req_type_name(work_type)); ++static int sdei_api_event_get_info(u32 event, u32 info, u64 *result) ++{ ++ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_GET_INFO, event, info, 0, ++ 0, 0, result); +} + -+static int kbase_devfreq_work_init(struct kbase_device *kbdev) ++static struct sdei_event *sdei_event_create(u32 event_num, ++ sdei_event_callback *cb, ++ void *cb_arg) +{ -+ kbdev->devfreq_queue.req_type = DEVFREQ_WORK_NONE; -+ kbdev->devfreq_queue.acted_type = DEVFREQ_WORK_RESUME; ++ int err; ++ u64 result; ++ struct sdei_event *event; ++ struct sdei_registered_event *reg; + -+ kbdev->devfreq_queue.workq = alloc_ordered_workqueue("devfreq_workq", 0); -+ if (!kbdev->devfreq_queue.workq) -+ return -ENOMEM; ++ lockdep_assert_held(&sdei_events_lock); + -+ INIT_WORK(&kbdev->devfreq_queue.work, -+ kbase_devfreq_suspend_resume_worker); -+ return 0; ++ event = kzalloc(sizeof(*event), GFP_KERNEL); ++ if (!event) { ++ err = -ENOMEM; ++ goto fail; ++ } ++ ++ INIT_LIST_HEAD(&event->list); ++ event->event_num = event_num; ++ ++ err = sdei_api_event_get_info(event_num, SDEI_EVENT_INFO_EV_PRIORITY, ++ &result); ++ if (err) ++ goto fail; ++ event->priority = result; ++ ++ err = sdei_api_event_get_info(event_num, SDEI_EVENT_INFO_EV_TYPE, ++ &result); ++ if (err) ++ goto fail; ++ event->type = result; ++ ++ if (event->type == SDEI_EVENT_TYPE_SHARED) { ++ reg = kzalloc(sizeof(*reg), GFP_KERNEL); ++ if (!reg) { ++ err = -ENOMEM; ++ goto fail; ++ } ++ ++ reg->event_num = event->event_num; ++ reg->priority = event->priority; ++ ++ reg->callback = cb; ++ reg->callback_arg = cb_arg; ++ event->registered = reg; ++ } else { ++ int cpu; ++ struct sdei_registered_event __percpu *regs; ++ ++ regs = alloc_percpu(struct sdei_registered_event); ++ if (!regs) { ++ err = -ENOMEM; ++ goto fail; ++ } ++ ++ for_each_possible_cpu(cpu) { ++ reg = per_cpu_ptr(regs, cpu); ++ ++ reg->event_num = event->event_num; ++ reg->priority = event->priority; ++ reg->callback = cb; ++ reg->callback_arg = cb_arg; ++ } ++ ++ event->private_registered = regs; ++ } ++ ++ spin_lock(&sdei_list_lock); ++ list_add(&event->list, &sdei_list); ++ spin_unlock(&sdei_list_lock); ++ ++ return event; ++ ++fail: ++ kfree(event); ++ return ERR_PTR(err); +} + -+static void kbase_devfreq_work_term(struct kbase_device *kbdev) ++static void sdei_event_destroy_llocked(struct sdei_event *event) +{ -+ unsigned long flags; -+ struct workqueue_struct *workq; ++ lockdep_assert_held(&sdei_events_lock); ++ lockdep_assert_held(&sdei_list_lock); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ workq = kbdev->devfreq_queue.workq; -+ kbdev->devfreq_queue.workq = NULL; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ list_del(&event->list); + -+ destroy_workqueue(workq); ++ if (event->type == SDEI_EVENT_TYPE_SHARED) ++ kfree(event->registered); ++ else ++ free_percpu(event->private_registered); ++ ++ kfree(event); +} + -+int kbase_devfreq_init(struct kbase_device *kbdev) ++static void sdei_event_destroy(struct sdei_event *event) ++{ ++ spin_lock(&sdei_list_lock); ++ sdei_event_destroy_llocked(event); ++ spin_unlock(&sdei_list_lock); ++} ++ ++static int sdei_api_get_version(u64 *version) ++{ ++ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_VERSION, 0, 0, 0, 0, 0, version); ++} ++ ++int sdei_mask_local_cpu(void) +{ -+ struct device_node *np = kbdev->dev->of_node; -+ struct devfreq_dev_profile *dp; + int err; -+ struct dev_pm_opp *opp; -+ unsigned int dyn_power_coeff = 0; -+ unsigned int i; -+ bool free_devfreq_freq_table = true; + -+ if (kbdev->nr_clocks == 0) { -+ dev_err(kbdev->dev, "Clock not available for devfreq\n"); -+ return -ENODEV; ++ err = invoke_sdei_fn(SDEI_1_0_FN_SDEI_PE_MASK, 0, 0, 0, 0, 0, NULL); ++ if (err && err != -EIO) { ++ pr_warn_once("failed to mask CPU[%u]: %d\n", ++ smp_processor_id(), err); ++ return err; + } + -+ for (i = 0; i < kbdev->nr_clocks; i++) { -+ if (kbdev->clocks[i]) -+ kbdev->current_freqs[i] = -+ clk_get_rate(kbdev->clocks[i]); -+ else -+ kbdev->current_freqs[i] = 0; -+ } -+ kbdev->current_nominal_freq = kbdev->current_freqs[0]; ++ return 0; ++} + -+ opp = devfreq_recommended_opp(kbdev->dev, &kbdev->current_nominal_freq, 0); -+ if (IS_ERR(opp)) -+ return PTR_ERR(opp); -+ dev_pm_opp_put(opp); ++static void _ipi_mask_cpu(void *ignored) ++{ ++ WARN_ON_ONCE(preemptible()); ++ sdei_mask_local_cpu(); ++} + -+ dp = &kbdev->devfreq_profile; ++int sdei_unmask_local_cpu(void) ++{ ++ int err; + -+ dp->initial_freq = kbdev->current_nominal_freq; -+ dp->polling_ms = 100; -+ dp->target = kbase_devfreq_target; -+ dp->get_dev_status = kbase_devfreq_status; -+ dp->get_cur_freq = kbase_devfreq_cur_freq; -+ dp->exit = kbase_devfreq_exit; ++ err = invoke_sdei_fn(SDEI_1_0_FN_SDEI_PE_UNMASK, 0, 0, 0, 0, 0, NULL); ++ if (err && err != -EIO) { ++ pr_warn_once("failed to unmask CPU[%u]: %d\n", ++ smp_processor_id(), err); ++ return err; ++ } + -+ if (kbase_devfreq_init_freq_table(kbdev, dp)) -+ return -EFAULT; ++ return 0; ++} + -+ if (dp->max_state > 0) { -+ /* Record the maximum frequency possible */ -+ kbdev->gpu_props.props.core_props.gpu_freq_khz_max = -+ dp->freq_table[0] / 1000; -+ }; ++static void _ipi_unmask_cpu(void *ignored) ++{ ++ WARN_ON_ONCE(preemptible()); ++ sdei_unmask_local_cpu(); ++} + -+#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) -+ of_property_read_u32(kbdev->dev->of_node, "dynamic-power-coefficient", -+ &dyn_power_coeff); -+ if (dyn_power_coeff) -+ dp->is_cooling_device = true; -+#endif ++static void _ipi_private_reset(void *ignored) ++{ ++ int err; + -+ err = kbase_devfreq_init_core_mask_table(kbdev); -+ if (err) -+ goto init_core_mask_table_failed; ++ WARN_ON_ONCE(preemptible()); + -+ of_property_read_u32(np, "upthreshold", -+ &ondemand_data.upthreshold); -+ of_property_read_u32(np, "downdifferential", -+ &ondemand_data.downdifferential); -+ kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, -+ "simple_ondemand", NULL); -+ if (IS_ERR(kbdev->devfreq)) { -+ err = PTR_ERR(kbdev->devfreq); -+ kbdev->devfreq = NULL; -+ dev_err(kbdev->dev, "Fail to add devfreq device(%d)", err); -+ goto devfreq_add_dev_failed; -+ } ++ err = invoke_sdei_fn(SDEI_1_0_FN_SDEI_PRIVATE_RESET, 0, 0, 0, 0, 0, ++ NULL); ++ if (err && err != -EIO) ++ pr_warn_once("failed to reset CPU[%u]: %d\n", ++ smp_processor_id(), err); ++} + -+ /* Explicit free of freq table isn't needed after devfreq_add_device() */ -+ free_devfreq_freq_table = false; ++static int sdei_api_shared_reset(void) ++{ ++ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_SHARED_RESET, 0, 0, 0, 0, 0, ++ NULL); ++} + -+ /* Initialize devfreq suspend/resume workqueue */ -+ err = kbase_devfreq_work_init(kbdev); -+ if (err) { -+ dev_err(kbdev->dev, "Fail to init devfreq workqueue"); -+ goto devfreq_work_init_failed; -+ } ++static void sdei_mark_interface_broken(void) ++{ ++ pr_err("disabling SDEI firmware interface\n"); ++ on_each_cpu(&_ipi_mask_cpu, NULL, true); ++ sdei_firmware_call = NULL; ++} + -+ /* devfreq_add_device only copies a few of kbdev->dev's fields, so -+ * set drvdata explicitly so IPA models can access kbdev. -+ */ -+ dev_set_drvdata(&kbdev->devfreq->dev, kbdev); ++static int sdei_platform_reset(void) ++{ ++ int err; + -+ err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq); ++ on_each_cpu(&_ipi_private_reset, NULL, true); ++ err = sdei_api_shared_reset(); + if (err) { -+ dev_err(kbdev->dev, -+ "Failed to register OPP notifier (%d)", err); -+ goto opp_notifier_failed; ++ pr_err("Failed to reset platform: %d\n", err); ++ sdei_mark_interface_broken(); + } + -+ mali_mdevp.data = kbdev->devfreq; -+ mali_mdevp.opp_info = &kbdev->opp_info; -+ kbdev->mdev_info = rockchip_system_monitor_register(kbdev->dev, -+ &mali_mdevp); -+ if (IS_ERR(kbdev->mdev_info)) { -+ dev_dbg(kbdev->dev, "without system monitor\n"); -+ kbdev->mdev_info = NULL; -+ } -+ kbdev->opp_info.is_rate_volt_checked = true; -+#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) -+ if (!dp->is_cooling_device) { -+ err = kbase_ipa_init(kbdev); -+ if (err) { -+ dev_err(kbdev->dev, "IPA initialization failed\n"); -+ goto ipa_init_failed; -+ } ++ return err; ++} + -+ kbdev->devfreq_cooling = devfreq_cooling_em_register( -+ kbdev->devfreq, -+ &kbase_ipa_power_model_ops); -+ if (IS_ERR(kbdev->devfreq_cooling)) { -+ err = PTR_ERR(kbdev->devfreq_cooling); -+ dev_err(kbdev->dev, -+ "Failed to register cooling device (%d)\n", -+ err); -+ goto cooling_reg_failed; -+ } -+ } -+#endif ++static int sdei_api_event_enable(u32 event_num) ++{ ++ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_ENABLE, event_num, 0, 0, 0, ++ 0, NULL); ++} + -+ return 0; ++/* Called directly by the hotplug callbacks */ ++static void _local_event_enable(void *data) ++{ ++ int err; ++ struct sdei_crosscall_args *arg = data; + -+#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) -+cooling_reg_failed: -+ kbase_ipa_term(kbdev); -+ipa_init_failed: -+ devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); -+#endif /* CONFIG_DEVFREQ_THERMAL */ ++ err = sdei_api_event_enable(arg->event->event_num); + -+opp_notifier_failed: -+ kbase_devfreq_work_term(kbdev); ++ sdei_cross_call_return(arg, err); ++} + -+devfreq_work_init_failed: -+ if (devfreq_remove_device(kbdev->devfreq)) -+ dev_err(kbdev->dev, "Failed to terminate devfreq (%d)", err); ++int sdei_event_enable(u32 event_num) ++{ ++ int err = -EINVAL; ++ struct sdei_event *event; + -+ kbdev->devfreq = NULL; ++ mutex_lock(&sdei_events_lock); ++ event = sdei_event_find(event_num); ++ if (!event) { ++ mutex_unlock(&sdei_events_lock); ++ return -ENOENT; ++ } + -+devfreq_add_dev_failed: -+ kbase_devfreq_term_core_mask_table(kbdev); + -+init_core_mask_table_failed: -+ if (free_devfreq_freq_table) -+ kbase_devfreq_term_freq_table(kbdev); ++ cpus_read_lock(); ++ if (event->type == SDEI_EVENT_TYPE_SHARED) ++ err = sdei_api_event_enable(event->event_num); ++ else ++ err = sdei_do_cross_call(_local_event_enable, event); ++ ++ if (!err) { ++ spin_lock(&sdei_list_lock); ++ event->reenable = true; ++ spin_unlock(&sdei_list_lock); ++ } ++ cpus_read_unlock(); ++ mutex_unlock(&sdei_events_lock); + + return err; +} + -+void kbase_devfreq_term(struct kbase_device *kbdev) ++static int sdei_api_event_disable(u32 event_num) ++{ ++ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_DISABLE, event_num, 0, 0, ++ 0, 0, NULL); ++} ++ ++static void _ipi_event_disable(void *data) +{ + int err; ++ struct sdei_crosscall_args *arg = data; + -+ dev_dbg(kbdev->dev, "Term Mali devfreq\n"); ++ err = sdei_api_event_disable(arg->event->event_num); + -+#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) -+ if (kbdev->devfreq_cooling) -+ devfreq_cooling_unregister(kbdev->devfreq_cooling); -+#endif ++ sdei_cross_call_return(arg, err); ++} + -+ devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); ++int sdei_event_disable(u32 event_num) ++{ ++ int err = -EINVAL; ++ struct sdei_event *event; + -+ kbase_devfreq_work_term(kbdev); ++ mutex_lock(&sdei_events_lock); ++ event = sdei_event_find(event_num); ++ if (!event) { ++ mutex_unlock(&sdei_events_lock); ++ return -ENOENT; ++ } + -+ err = devfreq_remove_device(kbdev->devfreq); -+ if (err) -+ dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); ++ spin_lock(&sdei_list_lock); ++ event->reenable = false; ++ spin_unlock(&sdei_list_lock); ++ ++ if (event->type == SDEI_EVENT_TYPE_SHARED) ++ err = sdei_api_event_disable(event->event_num); + else -+ kbdev->devfreq = NULL; ++ err = sdei_do_cross_call(_ipi_event_disable, event); ++ mutex_unlock(&sdei_events_lock); + -+ kbase_devfreq_term_core_mask_table(kbdev); ++ return err; ++} + -+#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) -+ if (!kbdev->model_data) -+ kbase_ipa_term(kbdev); -+ kfree(kbdev->model_data); -+#endif ++static int sdei_api_event_unregister(u32 event_num) ++{ ++ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_UNREGISTER, event_num, 0, ++ 0, 0, 0, NULL); +} -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.h -new file mode 100644 -index 000000000..ac88b025a ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.h -@@ -0,0 +1,62 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2014, 2019-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+#ifndef _BASE_DEVFREQ_H_ -+#define _BASE_DEVFREQ_H_ -+ -+int kbase_devfreq_init(struct kbase_device *kbdev); -+ -+void kbase_devfreq_term(struct kbase_device *kbdev); -+ -+/** -+ * kbase_devfreq_force_freq - Set GPU frequency on L2 power on/off. -+ * @kbdev: Device pointer -+ * @freq: GPU frequency in HZ to be set when -+ * MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE is enabled -+ */ -+void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq); -+ -+/** -+ * kbase_devfreq_enqueue_work - Enqueue a work item for suspend/resume devfreq. -+ * @kbdev: Device pointer -+ * @work_type: The type of the devfreq work item, i.e. suspend or resume -+ */ -+void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, -+ enum kbase_devfreq_work_type work_type); + -+/** -+ * kbase_devfreq_opp_translate - Translate nominal OPP frequency from devicetree -+ * into real frequency & voltage pair, along with -+ * core mask -+ * @kbdev: Device pointer -+ * @freq: Nominal frequency -+ * @core_mask: Pointer to u64 to store core mask to -+ * @freqs: Pointer to array of frequencies -+ * @volts: Pointer to array of voltages -+ * -+ * This function will only perform translation if an operating-points-v2-mali -+ * table is present in devicetree. If one is not present then it will return an -+ * untranslated frequency (and corresponding voltage) and all cores enabled. -+ * The voltages returned are in micro Volts (uV). -+ */ -+void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, -+ u64 *core_mask, unsigned long *freqs, unsigned long *volts); -+#endif /* _BASE_DEVFREQ_H_ */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c -new file mode 100644 -index 000000000..10e92ec94 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c -@@ -0,0 +1,200 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++/* Called directly by the hotplug callbacks */ ++static void _local_event_unregister(void *data) ++{ ++ int err; ++ struct sdei_crosscall_args *arg = data; + -+/* -+ * Base kernel property query backend APIs -+ */ ++ err = sdei_api_event_unregister(arg->event->event_num); + -+#include -+#include -+#include -+#include -+#include ++ sdei_cross_call_return(arg, err); ++} + -+int kbase_backend_gpuprops_get(struct kbase_device *kbdev, -+ struct kbase_gpuprops_regdump *regdump) ++int sdei_event_unregister(u32 event_num) +{ -+ int i; -+ struct kbase_gpuprops_regdump registers = { 0 }; -+ -+ /* Fill regdump with the content of the relevant registers */ -+ registers.gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); ++ int err; ++ struct sdei_event *event; + -+ registers.l2_features = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(L2_FEATURES)); ++ WARN_ON(in_nmi()); + -+ registers.tiler_features = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(TILER_FEATURES)); -+ registers.mem_features = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(MEM_FEATURES)); -+ registers.mmu_features = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(MMU_FEATURES)); -+ registers.as_present = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(AS_PRESENT)); -+#if !MALI_USE_CSF -+ registers.js_present = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(JS_PRESENT)); -+#else /* !MALI_USE_CSF */ -+ registers.js_present = 0; -+#endif /* !MALI_USE_CSF */ ++ mutex_lock(&sdei_events_lock); ++ event = sdei_event_find(event_num); ++ if (!event) { ++ pr_warn("Event %u not registered\n", event_num); ++ err = -ENOENT; ++ goto unlock; ++ } + -+ for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) -+#if !MALI_USE_CSF -+ registers.js_features[i] = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(JS_FEATURES_REG(i))); -+#else /* !MALI_USE_CSF */ -+ registers.js_features[i] = 0; -+#endif /* !MALI_USE_CSF */ ++ spin_lock(&sdei_list_lock); ++ event->reregister = false; ++ event->reenable = false; ++ spin_unlock(&sdei_list_lock); + -+ for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) -+ registers.texture_features[i] = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i))); ++ if (event->type == SDEI_EVENT_TYPE_SHARED) ++ err = sdei_api_event_unregister(event->event_num); ++ else ++ err = sdei_do_cross_call(_local_event_unregister, event); + -+ registers.thread_max_threads = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(THREAD_MAX_THREADS)); -+ registers.thread_max_workgroup_size = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE)); -+ registers.thread_max_barrier_size = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE)); -+ registers.thread_features = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(THREAD_FEATURES)); -+ registers.thread_tls_alloc = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(THREAD_TLS_ALLOC)); ++ if (err) ++ goto unlock; + -+ registers.shader_present_lo = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(SHADER_PRESENT_LO)); -+ registers.shader_present_hi = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(SHADER_PRESENT_HI)); ++ sdei_event_destroy(event); ++unlock: ++ mutex_unlock(&sdei_events_lock); + -+ registers.tiler_present_lo = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(TILER_PRESENT_LO)); -+ registers.tiler_present_hi = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(TILER_PRESENT_HI)); ++ return err; ++} + -+ registers.l2_present_lo = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(L2_PRESENT_LO)); -+ registers.l2_present_hi = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(L2_PRESENT_HI)); ++/* ++ * unregister events, but don't destroy them as they are re-registered by ++ * sdei_reregister_shared(). ++ */ ++static int sdei_unregister_shared(void) ++{ ++ int err = 0; ++ struct sdei_event *event; + -+ registers.stack_present_lo = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(STACK_PRESENT_LO)); -+ registers.stack_present_hi = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(STACK_PRESENT_HI)); ++ mutex_lock(&sdei_events_lock); ++ spin_lock(&sdei_list_lock); ++ list_for_each_entry(event, &sdei_list, list) { ++ if (event->type != SDEI_EVENT_TYPE_SHARED) ++ continue; + -+ if (registers.gpu_id >= GPU_ID2_PRODUCT_MAKE(11, 8, 5, 2)) { -+ registers.gpu_features_lo = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_FEATURES_LO)); -+ registers.gpu_features_hi = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_FEATURES_HI)); -+ } else { -+ registers.gpu_features_lo = 0; -+ registers.gpu_features_hi = 0; ++ err = sdei_api_event_unregister(event->event_num); ++ if (err) ++ break; + } ++ spin_unlock(&sdei_list_lock); ++ mutex_unlock(&sdei_events_lock); + -+ if (!kbase_is_gpu_removed(kbdev)) { -+ *regdump = registers; -+ return 0; -+ } else -+ return -EIO; ++ return err; +} + -+int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev, -+ struct kbase_current_config_regdump *curr_config_regdump) ++static int sdei_api_event_register(u32 event_num, unsigned long entry_point, ++ void *arg, u64 flags, u64 affinity) +{ -+ if (WARN_ON(!kbdev) || WARN_ON(!curr_config_regdump)) -+ return -EINVAL; -+ -+ curr_config_regdump->mem_features = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(MEM_FEATURES)); -+ -+ curr_config_regdump->shader_present_lo = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(SHADER_PRESENT_LO)); -+ curr_config_regdump->shader_present_hi = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(SHADER_PRESENT_HI)); -+ -+ curr_config_regdump->l2_present_lo = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(L2_PRESENT_LO)); -+ curr_config_regdump->l2_present_hi = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(L2_PRESENT_HI)); ++ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_REGISTER, event_num, ++ (unsigned long)entry_point, (unsigned long)arg, ++ flags, affinity, NULL); ++} + -+ if (kbase_is_gpu_removed(kbdev)) -+ return -EIO; ++/* Called directly by the hotplug callbacks */ ++static void _local_event_register(void *data) ++{ ++ int err; ++ struct sdei_registered_event *reg; ++ struct sdei_crosscall_args *arg = data; + -+ return 0; ++ reg = per_cpu_ptr(arg->event->private_registered, smp_processor_id()); ++ err = sdei_api_event_register(arg->event->event_num, sdei_entry_point, ++ reg, 0, 0); + ++ sdei_cross_call_return(arg, err); +} + -+int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, -+ struct kbase_gpuprops_regdump *regdump) ++int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg) +{ -+ u32 coherency_features; -+ int error = 0; -+ -+ /* Ensure we can access the GPU registers */ -+ kbase_pm_register_access_enable(kbdev); ++ int err; ++ struct sdei_event *event; + -+ coherency_features = kbase_cache_get_coherency_features(kbdev); ++ WARN_ON(in_nmi()); + -+ if (kbase_is_gpu_removed(kbdev)) -+ error = -EIO; ++ mutex_lock(&sdei_events_lock); ++ if (sdei_event_find(event_num)) { ++ pr_warn("Event %u already registered\n", event_num); ++ err = -EBUSY; ++ goto unlock; ++ } + -+ regdump->coherency_features = coherency_features; ++ event = sdei_event_create(event_num, cb, arg); ++ if (IS_ERR(event)) { ++ err = PTR_ERR(event); ++ pr_warn("Failed to create event %u: %d\n", event_num, err); ++ goto unlock; ++ } + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CORE_FEATURES)) -+ regdump->core_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES)); -+ else -+ regdump->core_features = 0; ++ cpus_read_lock(); ++ if (event->type == SDEI_EVENT_TYPE_SHARED) { ++ err = sdei_api_event_register(event->event_num, ++ sdei_entry_point, ++ event->registered, ++ SDEI_EVENT_REGISTER_RM_ANY, 0); ++ } else { ++ err = sdei_do_cross_call(_local_event_register, event); ++ if (err) ++ sdei_do_cross_call(_local_event_unregister, event); ++ } + -+ kbase_pm_register_access_disable(kbdev); ++ if (err) { ++ sdei_event_destroy(event); ++ pr_warn("Failed to register event %u: %d\n", event_num, err); ++ goto cpu_unlock; ++ } + -+ return error; ++ spin_lock(&sdei_list_lock); ++ event->reregister = true; ++ spin_unlock(&sdei_list_lock); ++cpu_unlock: ++ cpus_read_unlock(); ++unlock: ++ mutex_unlock(&sdei_events_lock); ++ return err; +} + -+int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, -+ struct kbase_gpuprops_regdump *regdump) ++static int sdei_reregister_shared(void) +{ -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { -+ u32 l2_features = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(L2_FEATURES)); -+ u32 l2_config = -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); -+ u32 asn_hash[ASN_HASH_COUNT] = { -+ 0, -+ }; -+ int i; ++ int err = 0; ++ struct sdei_event *event; + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)) { -+ for (i = 0; i < ASN_HASH_COUNT; i++) -+ asn_hash[i] = kbase_reg_read( -+ kbdev, GPU_CONTROL_REG(ASN_HASH(i))); -+ } ++ mutex_lock(&sdei_events_lock); ++ spin_lock(&sdei_list_lock); ++ list_for_each_entry(event, &sdei_list, list) { ++ if (event->type != SDEI_EVENT_TYPE_SHARED) ++ continue; + -+ if (kbase_is_gpu_removed(kbdev)) -+ return -EIO; ++ if (event->reregister) { ++ err = sdei_api_event_register(event->event_num, ++ sdei_entry_point, event->registered, ++ SDEI_EVENT_REGISTER_RM_ANY, 0); ++ if (err) { ++ pr_err("Failed to re-register event %u\n", ++ event->event_num); ++ sdei_event_destroy_llocked(event); ++ break; ++ } ++ } + -+ regdump->l2_features = l2_features; -+ regdump->l2_config = l2_config; -+ for (i = 0; i < ASN_HASH_COUNT; i++) -+ regdump->l2_asn_hash[i] = asn_hash[i]; ++ if (event->reenable) { ++ err = sdei_api_event_enable(event->event_num); ++ if (err) { ++ pr_err("Failed to re-enable event %u\n", ++ event->event_num); ++ break; ++ } ++ } + } ++ spin_unlock(&sdei_list_lock); ++ mutex_unlock(&sdei_events_lock); + -+ return 0; ++ return err; +} -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c -new file mode 100644 -index 000000000..53578ded5 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c -@@ -0,0 +1,481 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+/* -+ * GPU backend instrumentation APIs. -+ */ -+ -+#include -+#include -+#include -+#include -+#include + -+static int wait_prfcnt_ready(struct kbase_device *kbdev) ++static int sdei_cpuhp_down(unsigned int cpu) +{ -+ u32 loops; ++ struct sdei_event *event; ++ int err; + -+ for (loops = 0; loops < KBASE_PRFCNT_ACTIVE_MAX_LOOPS; loops++) { -+ const u32 prfcnt_active = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & -+ GPU_STATUS_PRFCNT_ACTIVE; -+ if (!prfcnt_active) -+ return 0; ++ /* un-register private events */ ++ spin_lock(&sdei_list_lock); ++ list_for_each_entry(event, &sdei_list, list) { ++ if (event->type == SDEI_EVENT_TYPE_SHARED) ++ continue; ++ ++ err = sdei_do_local_call(_local_event_unregister, event); ++ if (err) { ++ pr_err("Failed to unregister event %u: %d\n", ++ event->event_num, err); ++ } + } ++ spin_unlock(&sdei_list_lock); + -+ dev_err(kbdev->dev, "PRFCNT_ACTIVE bit stuck\n"); -+ return -EBUSY; ++ return sdei_mask_local_cpu(); +} + -+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ struct kbase_instr_hwcnt_enable *enable) ++static int sdei_cpuhp_up(unsigned int cpu) +{ -+ unsigned long flags; -+ int err = -EINVAL; -+ u32 irq_mask; -+ u32 prfcnt_config; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ /* alignment failure */ -+ if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1))) -+ return err; ++ struct sdei_event *event; ++ int err; + -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ /* re-register/enable private events */ ++ spin_lock(&sdei_list_lock); ++ list_for_each_entry(event, &sdei_list, list) { ++ if (event->type == SDEI_EVENT_TYPE_SHARED) ++ continue; + -+ if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { -+ /* Instrumentation is already enabled */ -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ return err; -+ } ++ if (event->reregister) { ++ err = sdei_do_local_call(_local_event_register, event); ++ if (err) { ++ pr_err("Failed to re-register event %u: %d\n", ++ event->event_num, err); ++ } ++ } + -+ if (kbase_is_gpu_removed(kbdev)) { -+ /* GPU has been removed by Arbiter */ -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ return err; ++ if (event->reenable) { ++ err = sdei_do_local_call(_local_event_enable, event); ++ if (err) { ++ pr_err("Failed to re-enable event %u: %d\n", ++ event->event_num, err); ++ } ++ } + } ++ spin_unlock(&sdei_list_lock); + -+ /* Enable interrupt */ -+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | -+ PRFCNT_SAMPLE_COMPLETED); -+ -+ /* In use, this context is the owner */ -+ kbdev->hwcnt.kctx = kctx; -+ /* Remember the dump address so we can reprogram it later */ -+ kbdev->hwcnt.addr = enable->dump_buffer; -+ kbdev->hwcnt.addr_bytes = enable->dump_buffer_bytes; -+ -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ -+ /* Configure */ -+ prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; -+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS -+ prfcnt_config |= kbdev->hwcnt.backend.override_counter_set -+ << PRFCNT_CONFIG_SETSELECT_SHIFT; -+#else -+ prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT; -+#endif -+ -+ /* Wait until prfcnt config register can be written */ -+ err = wait_prfcnt_ready(kbdev); -+ if (err) -+ return err; -+ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), -+ prfcnt_config | PRFCNT_CONFIG_MODE_OFF); -+ -+ /* Wait until prfcnt is disabled before writing configuration registers */ -+ err = wait_prfcnt_ready(kbdev); -+ if (err) -+ return err; -+ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), -+ enable->dump_buffer & 0xFFFFFFFF); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), -+ enable->dump_buffer >> 32); ++ return sdei_unmask_local_cpu(); ++} + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), -+ enable->fe_bm); ++/* When entering idle, mask/unmask events for this cpu */ ++static int sdei_pm_notifier(struct notifier_block *nb, unsigned long action, ++ void *data) ++{ ++ int rv; + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), -+ enable->shader_bm); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), -+ enable->mmu_l2_bm); ++ WARN_ON_ONCE(preemptible()); + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), -+ enable->tiler_bm); ++ switch (action) { ++ case CPU_PM_ENTER: ++ rv = sdei_mask_local_cpu(); ++ break; ++ case CPU_PM_EXIT: ++ case CPU_PM_ENTER_FAILED: ++ rv = sdei_unmask_local_cpu(); ++ break; ++ default: ++ return NOTIFY_DONE; ++ } + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), -+ prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL); ++ if (rv) ++ return notifier_from_errno(rv); + -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ return NOTIFY_OK; ++} + -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; -+ kbdev->hwcnt.backend.triggered = 1; -+ wake_up(&kbdev->hwcnt.backend.wait); ++static struct notifier_block sdei_pm_nb = { ++ .notifier_call = sdei_pm_notifier, ++}; + -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++static int sdei_device_suspend(struct device *dev) ++{ ++ on_each_cpu(_ipi_mask_cpu, NULL, true); + -+ dev_dbg(kbdev->dev, "HW counters dumping set-up for context %pK", kctx); + return 0; +} + -+static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev) ++static int sdei_device_resume(struct device *dev) +{ -+ u32 irq_mask; ++ on_each_cpu(_ipi_unmask_cpu, NULL, true); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ lockdep_assert_held(&kbdev->hwcnt.lock); -+ -+ if (kbase_is_gpu_removed(kbdev)) -+ /* GPU has been removed by Arbiter */ -+ return; ++ return 0; ++} + -+ /* Disable interrupt */ -+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); ++/* ++ * We need all events to be reregistered when we resume from hibernate. ++ * ++ * The sequence is freeze->thaw. Reboot. freeze->restore. We unregister ++ * events during freeze, then re-register and re-enable them during thaw ++ * and restore. ++ */ ++static int sdei_device_freeze(struct device *dev) ++{ ++ int err; + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED); ++ /* unregister private events */ ++ cpuhp_remove_state(sdei_hp_state); + -+ /* Wait until prfcnt config register can be written, then disable the counters. -+ * Return value is ignored as we are disabling anyway. -+ */ -+ wait_prfcnt_ready(kbdev); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0); ++ err = sdei_unregister_shared(); ++ if (err) ++ return err; + -+ kbdev->hwcnt.kctx = NULL; -+ kbdev->hwcnt.addr = 0ULL; -+ kbdev->hwcnt.addr_bytes = 0ULL; ++ return 0; +} + -+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) ++static int sdei_device_thaw(struct device *dev) +{ -+ unsigned long flags, pm_flags; -+ struct kbase_device *kbdev = kctx->kbdev; ++ int err; + -+ while (1) { -+ spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ /* re-register shared events */ ++ err = sdei_reregister_shared(); ++ if (err) { ++ pr_warn("Failed to re-register shared events...\n"); ++ sdei_mark_interface_broken(); ++ return err; ++ } + -+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) { -+ /* Instrumentation is in unrecoverable error state, -+ * there is nothing for us to do. -+ */ -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); -+ /* Already disabled, return no error. */ -+ return 0; -+ } ++ err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "SDEI", ++ &sdei_cpuhp_up, &sdei_cpuhp_down); ++ if (err < 0) { ++ pr_warn("Failed to re-register CPU hotplug notifier...\n"); ++ return err; ++ } + -+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) { -+ /* Instrumentation is not enabled */ -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); -+ return -EINVAL; -+ } ++ sdei_hp_state = err; ++ return 0; ++} + -+ if (kbdev->hwcnt.kctx != kctx) { -+ /* Instrumentation has been setup for another context */ -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); -+ return -EINVAL; -+ } ++static int sdei_device_restore(struct device *dev) ++{ ++ int err; + -+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) -+ break; ++ err = sdei_platform_reset(); ++ if (err) ++ return err; + -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); ++ return sdei_device_thaw(dev); ++} + -+ /* Ongoing dump/setup - wait for its completion */ -+ wait_event(kbdev->hwcnt.backend.wait, -+ kbdev->hwcnt.backend.triggered != 0); -+ } ++static const struct dev_pm_ops sdei_pm_ops = { ++ .suspend = sdei_device_suspend, ++ .resume = sdei_device_resume, ++ .freeze = sdei_device_freeze, ++ .thaw = sdei_device_thaw, ++ .restore = sdei_device_restore, ++}; + -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; -+ kbdev->hwcnt.backend.triggered = 0; ++/* ++ * Mask all CPUs and unregister all events on panic, reboot or kexec. ++ */ ++static int sdei_reboot_notifier(struct notifier_block *nb, unsigned long action, ++ void *data) ++{ ++ /* ++ * We are going to reset the interface, after this there is no point ++ * doing work when we take CPUs offline. ++ */ ++ cpuhp_remove_state(sdei_hp_state); + -+ kbasep_instr_hwc_disable_hw_prfcnt(kbdev); ++ sdei_platform_reset(); + -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); ++ return NOTIFY_OK; ++} + -+ dev_dbg(kbdev->dev, "HW counters dumping disabled for context %pK", -+ kctx); ++static struct notifier_block sdei_reboot_nb = { ++ .notifier_call = sdei_reboot_notifier, ++}; + -+ return 0; ++static void sdei_smccc_smc(unsigned long function_id, ++ unsigned long arg0, unsigned long arg1, ++ unsigned long arg2, unsigned long arg3, ++ unsigned long arg4, struct arm_smccc_res *res) ++{ ++ arm_smccc_smc(function_id, arg0, arg1, arg2, arg3, arg4, 0, 0, res); +} ++NOKPROBE_SYMBOL(sdei_smccc_smc); + -+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) ++static void sdei_smccc_hvc(unsigned long function_id, ++ unsigned long arg0, unsigned long arg1, ++ unsigned long arg2, unsigned long arg3, ++ unsigned long arg4, struct arm_smccc_res *res) +{ -+ unsigned long flags; -+ int err = -EINVAL; -+ struct kbase_device *kbdev = kctx->kbdev; ++ arm_smccc_hvc(function_id, arg0, arg1, arg2, arg3, arg4, 0, 0, res); ++} ++NOKPROBE_SYMBOL(sdei_smccc_hvc); + -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++int sdei_register_ghes(struct ghes *ghes, sdei_event_callback *normal_cb, ++ sdei_event_callback *critical_cb) ++{ ++ int err; ++ u64 result; ++ u32 event_num; ++ sdei_event_callback *cb; + -+ if (kbdev->hwcnt.kctx != kctx) { -+ /* The instrumentation has been setup for another context */ -+ goto unlock; -+ } ++ if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES)) ++ return -EOPNOTSUPP; + -+ if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) { -+ /* HW counters are disabled or another dump is ongoing, or we're -+ * resetting, or we are in unrecoverable error state. ++ event_num = ghes->generic->notify.vector; ++ if (event_num == 0) { ++ /* ++ * Event 0 is reserved by the specification for ++ * SDEI_EVENT_SIGNAL. + */ -+ goto unlock; -+ } -+ -+ if (kbase_is_gpu_removed(kbdev)) { -+ /* GPU has been removed by Arbiter */ -+ goto unlock; ++ return -EINVAL; + } + -+ kbdev->hwcnt.backend.triggered = 0; -+ -+ /* Mark that we're dumping - the PF handler can signal that we faulted -+ */ -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING; -+ -+ /* Wait until prfcnt is ready to request dump */ -+ err = wait_prfcnt_ready(kbdev); ++ err = sdei_api_event_get_info(event_num, SDEI_EVENT_INFO_EV_PRIORITY, ++ &result); + if (err) -+ goto unlock; -+ -+ /* Reconfigure the dump address */ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), -+ kbdev->hwcnt.addr & 0xFFFFFFFF); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), -+ kbdev->hwcnt.addr >> 32); -+ -+ /* Start dumping */ -+ KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, -+ kbdev->hwcnt.addr); ++ return err; + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_PRFCNT_SAMPLE); ++ if (result == SDEI_EVENT_PRIORITY_CRITICAL) ++ cb = critical_cb; ++ else ++ cb = normal_cb; + -+ dev_dbg(kbdev->dev, "HW counters dumping done for context %pK", kctx); ++ err = sdei_event_register(event_num, cb, ghes); ++ if (!err) ++ err = sdei_event_enable(event_num); + -+ unlock: -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + return err; +} -+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump); + -+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, -+ bool * const success) ++int sdei_unregister_ghes(struct ghes *ghes) +{ -+ unsigned long flags; -+ bool complete = false; -+ struct kbase_device *kbdev = kctx->kbdev; ++ int i; ++ int err; ++ u32 event_num = ghes->generic->notify.vector; + -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ might_sleep(); + -+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) { -+ *success = true; -+ complete = true; -+ } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { -+ *success = false; -+ complete = true; -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; -+ } ++ if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES)) ++ return -EOPNOTSUPP; + -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ /* ++ * The event may be running on another CPU. Disable it ++ * to stop new events, then try to unregister a few times. ++ */ ++ err = sdei_event_disable(event_num); ++ if (err) ++ return err; + -+ return complete; ++ for (i = 0; i < 3; i++) { ++ err = sdei_event_unregister(event_num); ++ if (err != -EINPROGRESS) ++ break; ++ ++ schedule(); ++ } ++ ++ return err; +} -+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete); + -+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) ++static int sdei_get_conduit(struct platform_device *pdev) +{ -+ unsigned long flags; ++ const char *method; ++ struct device_node *np = pdev->dev.of_node; + -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ sdei_firmware_call = NULL; ++ if (np) { ++ if (of_property_read_string(np, "method", &method)) { ++ pr_warn("missing \"method\" property\n"); ++ return SMCCC_CONDUIT_NONE; ++ } + -+ /* If the state is in unrecoverable error, we already wake_up the waiter -+ * and don't need to do any action when sample is done. -+ */ ++ if (!strcmp("hvc", method)) { ++ sdei_firmware_call = &sdei_smccc_hvc; ++ return SMCCC_CONDUIT_HVC; ++ } else if (!strcmp("smc", method)) { ++ sdei_firmware_call = &sdei_smccc_smc; ++ return SMCCC_CONDUIT_SMC; ++ } + -+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { -+ kbdev->hwcnt.backend.triggered = 1; -+ wake_up(&kbdev->hwcnt.backend.wait); -+ } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) { -+ /* All finished and idle */ -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; -+ kbdev->hwcnt.backend.triggered = 1; -+ wake_up(&kbdev->hwcnt.backend.wait); ++ pr_warn("invalid \"method\" property: %s\n", method); ++ } else if (!acpi_disabled) { ++ if (acpi_psci_use_hvc()) { ++ sdei_firmware_call = &sdei_smccc_hvc; ++ return SMCCC_CONDUIT_HVC; ++ } else { ++ sdei_firmware_call = &sdei_smccc_smc; ++ return SMCCC_CONDUIT_SMC; ++ } + } + -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ return SMCCC_CONDUIT_NONE; +} + -+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) ++static int sdei_probe(struct platform_device *pdev) +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ unsigned long flags; + int err; ++ u64 ver = 0; ++ int conduit; + -+ /* Wait for dump & cache clean to complete */ -+ wait_event(kbdev->hwcnt.backend.wait, -+ kbdev->hwcnt.backend.triggered != 0); -+ -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ conduit = sdei_get_conduit(pdev); ++ if (!sdei_firmware_call) ++ return 0; + -+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { -+ err = -EINVAL; -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; -+ } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) { -+ err = -EIO; -+ } else { -+ /* Dump done */ -+ KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == -+ KBASE_INSTR_STATE_IDLE); -+ err = 0; ++ err = sdei_api_get_version(&ver); ++ if (err) { ++ pr_err("Failed to get SDEI version: %d\n", err); ++ sdei_mark_interface_broken(); ++ return err; + } + -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ pr_info("SDEIv%d.%d (0x%x) detected in firmware.\n", ++ (int)SDEI_VERSION_MAJOR(ver), (int)SDEI_VERSION_MINOR(ver), ++ (int)SDEI_VERSION_VENDOR(ver)); + -+ return err; -+} ++ if (SDEI_VERSION_MAJOR(ver) != 1) { ++ pr_warn("Conflicting SDEI version detected.\n"); ++ sdei_mark_interface_broken(); ++ return -EINVAL; ++ } + -+int kbase_instr_hwcnt_clear(struct kbase_context *kctx) -+{ -+ unsigned long flags; -+ int err = -EINVAL; -+ struct kbase_device *kbdev = kctx->kbdev; ++ err = sdei_platform_reset(); ++ if (err) ++ return err; + -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ sdei_entry_point = sdei_arch_get_entry_point(conduit); ++ if (!sdei_entry_point) { ++ /* Not supported due to hardware or boot configuration */ ++ sdei_mark_interface_broken(); ++ return 0; ++ } + -+ /* Check it's the context previously set up and we're not in IDLE -+ * state. -+ */ -+ if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != -+ KBASE_INSTR_STATE_IDLE) -+ goto unlock; ++ err = cpu_pm_register_notifier(&sdei_pm_nb); ++ if (err) { ++ pr_warn("Failed to register CPU PM notifier...\n"); ++ goto error; ++ } + -+ if (kbase_is_gpu_removed(kbdev)) { -+ /* GPU has been removed by Arbiter */ -+ goto unlock; ++ err = register_reboot_notifier(&sdei_reboot_nb); ++ if (err) { ++ pr_warn("Failed to register reboot notifier...\n"); ++ goto remove_cpupm; + } + -+ /* Wait until prfcnt is ready to clear */ -+ err = wait_prfcnt_ready(kbdev); -+ if (err) -+ goto unlock; ++ err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "SDEI", ++ &sdei_cpuhp_up, &sdei_cpuhp_down); ++ if (err < 0) { ++ pr_warn("Failed to register CPU hotplug notifier...\n"); ++ goto remove_reboot; ++ } + -+ /* Clear the counters */ -+ KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, 0); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_PRFCNT_CLEAR); ++ sdei_hp_state = err; + -+unlock: -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ return 0; ++ ++remove_reboot: ++ unregister_reboot_notifier(&sdei_reboot_nb); ++ ++remove_cpupm: ++ cpu_pm_unregister_notifier(&sdei_pm_nb); ++ ++error: ++ sdei_mark_interface_broken(); + return err; +} -+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear); + -+void kbase_instr_hwcnt_on_unrecoverable_error(struct kbase_device *kbdev) -+{ -+ unsigned long flags; ++static const struct of_device_id sdei_of_match[] = { ++ { .compatible = "arm,sdei-1.0" }, ++ {} ++}; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++static struct platform_driver sdei_driver = { ++ .driver = { ++ .name = "sdei", ++ .pm = &sdei_pm_ops, ++ .of_match_table = sdei_of_match, ++ }, ++ .probe = sdei_probe, ++}; + -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++static bool __init sdei_present_acpi(void) ++{ ++ acpi_status status; ++ struct acpi_table_header *sdei_table_header; + -+ /* If we already in unrecoverable error state, early return. */ -+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) { -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ return; -+ } ++ if (acpi_disabled) ++ return false; + -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_UNRECOVERABLE_ERROR; ++ status = acpi_get_table(ACPI_SIG_SDEI, 0, &sdei_table_header); ++ if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { ++ const char *msg = acpi_format_exception(status); + -+ /* Need to disable HW if it's not disabled yet. */ -+ if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) -+ kbasep_instr_hwc_disable_hw_prfcnt(kbdev); ++ pr_info("Failed to get ACPI:SDEI table, %s\n", msg); ++ } ++ if (ACPI_FAILURE(status)) ++ return false; + -+ /* Wake up any waiters. */ -+ kbdev->hwcnt.backend.triggered = 1; -+ wake_up(&kbdev->hwcnt.backend.wait); ++ acpi_put_table(sdei_table_header); + -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ return true; +} -+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_on_unrecoverable_error); + -+void kbase_instr_hwcnt_on_before_reset(struct kbase_device *kbdev) ++void __init acpi_sdei_init(void) +{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ struct platform_device *pdev; ++ int ret; + -+ /* A reset is the only way to exit the unrecoverable error state */ -+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; ++ if (!sdei_present_acpi()) ++ return; + -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ pdev = platform_device_register_simple(sdei_driver.driver.name, ++ 0, NULL, 0); ++ if (IS_ERR(pdev)) { ++ ret = PTR_ERR(pdev); ++ platform_driver_unregister(&sdei_driver); ++ pr_info("Failed to register ACPI:SDEI platform device %d\n", ++ ret); ++ } +} -+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_on_before_reset); + -+int kbase_instr_backend_init(struct kbase_device *kbdev) ++static int __init sdei_init(void) +{ -+ spin_lock_init(&kbdev->hwcnt.lock); -+ -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; -+ -+ init_waitqueue_head(&kbdev->hwcnt.backend.wait); -+ -+ kbdev->hwcnt.backend.triggered = 0; -+ -+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS -+/* Use the build time option for the override default. */ -+#if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY) -+ kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_SECONDARY; -+#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) -+ kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_TERTIARY; -+#else -+ /* Default to primary */ -+ kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_PRIMARY; -+#endif -+#endif -+ return 0; ++ return platform_driver_register(&sdei_driver); +} ++arch_initcall(sdei_init); + -+void kbase_instr_backend_term(struct kbase_device *kbdev) ++int sdei_event_handler(struct pt_regs *regs, ++ struct sdei_registered_event *arg) +{ -+ CSTD_UNUSED(kbdev); ++ int err; ++ u32 event_num = arg->event_num; ++ ++ err = arg->callback(event_num, regs, arg->callback_arg); ++ if (err) ++ pr_err_ratelimited("event %u on CPU %u failed with error: %d\n", ++ event_num, smp_processor_id(), err); ++ ++ return err; +} ++NOKPROBE_SYMBOL(sdei_event_handler); + -+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS -+void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev) ++void sdei_handler_abort(void) +{ -+ /* No validation is done on the debugfs input. Invalid input could cause -+ * performance counter errors. This is acceptable since this is a debug -+ * only feature and users should know what they are doing. -+ * -+ * Valid inputs are the values accepted bythe SET_SELECT bits of the -+ * PRFCNT_CONFIG register as defined in the architecture specification. ++ /* ++ * If the crash happened in an SDEI event handler then we need to ++ * finish the handler with the firmware so that we can have working ++ * interrupts in the crash kernel. + */ -+ debugfs_create_u8("hwcnt_set_select", 0644, -+ kbdev->mali_debugfs_directory, -+ (u8 *)&kbdev->hwcnt.backend.override_counter_set); ++ if (__this_cpu_read(sdei_active_critical_event)) { ++ pr_warn("still in SDEI critical event context, attempting to finish handler.\n"); ++ __sdei_handler_abort(); ++ __this_cpu_write(sdei_active_critical_event, NULL); ++ } ++ if (__this_cpu_read(sdei_active_normal_event)) { ++ pr_warn("still in SDEI normal event context, attempting to finish handler.\n"); ++ __sdei_handler_abort(); ++ __this_cpu_write(sdei_active_normal_event, NULL); ++ } +} -+#endif -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h +diff --git a/drivers/firmware/rockchip_sip.c b/drivers/firmware/rockchip_sip.c new file mode 100644 -index 000000000..bd2eb8a12 +index 000000000..e483899d7 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h -@@ -0,0 +1,60 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/firmware/rockchip_sip.c +@@ -0,0 +1,715 @@ +/* -+ * -+ * (C) COPYRIGHT 2014, 2016, 2018-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+/* -+ * Backend-specific instrumentation definitions -+ */ -+ -+#ifndef _KBASE_INSTR_DEFS_H_ -+#define _KBASE_INSTR_DEFS_H_ -+ -+#include -+ -+/* -+ * Instrumentation State Machine States ++ * Copyright (C) 2016, Fuzhou Rockchip Electronics Co., Ltd + */ -+enum kbase_instr_state { -+ /* State where instrumentation is not active */ -+ KBASE_INSTR_STATE_DISABLED = 0, -+ /* State machine is active and ready for a command. */ -+ KBASE_INSTR_STATE_IDLE, -+ /* Hardware is currently dumping a frame. */ -+ KBASE_INSTR_STATE_DUMPING, -+ /* An error has occurred during DUMPING (page fault). */ -+ KBASE_INSTR_STATE_FAULT, -+ /* An unrecoverable error has occurred, a reset is the only way to exit -+ * from unrecoverable error state. -+ */ -+ KBASE_INSTR_STATE_UNRECOVERABLE_ERROR, -+}; + -+/* Structure used for instrumentation and HW counters dumping */ -+struct kbase_instr_backend { -+ wait_queue_head_t wait; -+ int triggered; -+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS -+ enum kbase_hwcnt_physical_set override_counter_set; ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_ARM ++#include +#endif ++#include ++#include ++#include ++#include ++#include ++#include + -+ enum kbase_instr_state state; -+}; -+ -+#endif /* _KBASE_INSTR_DEFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_internal.h -new file mode 100644 -index 000000000..332cc6944 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_internal.h -@@ -0,0 +1,41 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2014, 2018, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+/* -+ * Backend-specific HW access instrumentation APIs -+ */ -+ -+#ifndef _KBASE_INSTR_INTERNAL_H_ -+#define _KBASE_INSTR_INTERNAL_H_ -+ -+/** -+ * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning -+ * @data: a &struct work_struct -+ */ -+void kbasep_cache_clean_worker(struct work_struct *data); -+ -+/** -+ * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received -+ * @kbdev: Kbase device -+ */ -+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev); -+ -+#endif /* _KBASE_INSTR_INTERNAL_H_ */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h -new file mode 100644 -index 000000000..66cda8c0b ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h -@@ -0,0 +1,47 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2014-2015, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+/* -+ * Backend specific IRQ APIs -+ */ -+ -+#ifndef _KBASE_IRQ_INTERNAL_H_ -+#define _KBASE_IRQ_INTERNAL_H_ -+ -+int kbase_install_interrupts(struct kbase_device *kbdev); -+ -+void kbase_release_interrupts(struct kbase_device *kbdev); -+ -+/** -+ * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed -+ * execution -+ * @kbdev: The kbase device -+ */ -+void kbase_synchronize_irqs(struct kbase_device *kbdev); -+ -+int kbasep_common_test_interrupt_handlers( -+ struct kbase_device * const kbdev); -+ -+irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val); -+int kbase_set_custom_irq_handler(struct kbase_device *kbdev, -+ irq_handler_t custom_handler, int irq_type); ++#ifdef CONFIG_64BIT ++#define PSCI_FN_NATIVE(version, name) PSCI_##version##_FN64_##name ++#else ++#define PSCI_FN_NATIVE(version, name) PSCI_##version##_FN_##name ++#endif + -+#endif /* _KBASE_IRQ_INTERNAL_H_ */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c -new file mode 100644 -index 000000000..eb63b2c56 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c -@@ -0,0 +1,503 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++#define SIZE_PAGE(n) ((n) << 12) + -+#include -+#include -+#include ++static struct arm_smccc_res __invoke_sip_fn_smc(unsigned long function_id, ++ unsigned long arg0, ++ unsigned long arg1, ++ unsigned long arg2) ++{ ++ struct arm_smccc_res res; + -+#include ++ arm_smccc_smc(function_id, arg0, arg1, arg2, 0, 0, 0, 0, &res); ++ return res; ++} + -+#if IS_ENABLED(CONFIG_MALI_REAL_HW) ++struct arm_smccc_res sip_smc_dram(u32 arg0, u32 arg1, u32 arg2) ++{ ++ return __invoke_sip_fn_smc(SIP_DRAM_CONFIG, arg0, arg1, arg2); ++} ++EXPORT_SYMBOL_GPL(sip_smc_dram); + -+/* GPU IRQ Tags */ -+#define JOB_IRQ_TAG 0 -+#define MMU_IRQ_TAG 1 -+#define GPU_IRQ_TAG 2 ++struct arm_smccc_res sip_smc_get_atf_version(void) ++{ ++ return __invoke_sip_fn_smc(SIP_ATF_VERSION, 0, 0, 0); ++} ++EXPORT_SYMBOL_GPL(sip_smc_get_atf_version); + -+static void *kbase_tag(void *ptr, u32 tag) ++struct arm_smccc_res sip_smc_get_sip_version(void) +{ -+ return (void *)(((uintptr_t) ptr) | tag); ++ return __invoke_sip_fn_smc(SIP_SIP_VERSION, 0, 0, 0); +} ++EXPORT_SYMBOL_GPL(sip_smc_get_sip_version); + -+static void *kbase_untag(void *ptr) ++int sip_smc_set_suspend_mode(u32 ctrl, u32 config1, u32 config2) +{ -+ return (void *)(((uintptr_t) ptr) & ~3); ++ struct arm_smccc_res res; ++ ++ res = __invoke_sip_fn_smc(SIP_SUSPEND_MODE, ctrl, config1, config2); ++ return res.a0; +} ++EXPORT_SYMBOL_GPL(sip_smc_set_suspend_mode); + -+static irqreturn_t kbase_job_irq_handler(int irq, void *data) ++struct arm_smccc_res sip_smc_get_suspend_info(u32 info) +{ -+ unsigned long flags; -+ struct kbase_device *kbdev = kbase_untag(data); -+ u32 val; ++ struct arm_smccc_res res; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ res = __invoke_sip_fn_smc(SIP_SUSPEND_MODE, info, 0, 0); ++ return res; ++} ++EXPORT_SYMBOL_GPL(sip_smc_get_suspend_info); + -+ if (!kbdev->pm.backend.gpu_powered) { -+ /* GPU is turned off - IRQ is not for us */ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ return IRQ_NONE; -+ } ++int sip_smc_virtual_poweroff(void) ++{ ++ struct arm_smccc_res res; + -+ val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); ++ res = __invoke_sip_fn_smc(PSCI_FN_NATIVE(1_0, SYSTEM_SUSPEND), 0, 0, 0); ++ return res.a0; ++} ++EXPORT_SYMBOL_GPL(sip_smc_virtual_poweroff); + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ if (!kbdev->pm.backend.driver_ready_for_irqs) -+ dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", -+ __func__, irq, val); -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++int sip_smc_remotectl_config(u32 func, u32 data) ++{ ++ struct arm_smccc_res res; + -+ if (!val) { -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ return IRQ_NONE; -+ } ++ res = __invoke_sip_fn_smc(SIP_REMOTECTL_CFG, func, data, 0); + -+ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); ++ return res.a0; ++} ++EXPORT_SYMBOL_GPL(sip_smc_remotectl_config); + -+#if MALI_USE_CSF -+ /* call the csf interrupt handler */ -+ kbase_csf_interrupt(kbdev, val); -+#else -+ kbase_job_done(kbdev, val); -+#endif ++u32 sip_smc_secure_reg_read(u32 addr_phy) ++{ ++ struct arm_smccc_res res; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ res = __invoke_sip_fn_smc(SIP_ACCESS_REG, 0, addr_phy, SECURE_REG_RD); ++ if (res.a0) ++ pr_err("%s error: %d, addr phy: 0x%x\n", ++ __func__, (int)res.a0, addr_phy); + -+ return IRQ_HANDLED; ++ return res.a1; +} ++EXPORT_SYMBOL_GPL(sip_smc_secure_reg_read); + -+static irqreturn_t kbase_mmu_irq_handler(int irq, void *data) ++int sip_smc_secure_reg_write(u32 addr_phy, u32 val) +{ -+ unsigned long flags; -+ struct kbase_device *kbdev = kbase_untag(data); -+ u32 val; ++ struct arm_smccc_res res; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ res = __invoke_sip_fn_smc(SIP_ACCESS_REG, val, addr_phy, SECURE_REG_WR); ++ if (res.a0) ++ pr_err("%s error: %d, addr phy: 0x%x\n", ++ __func__, (int)res.a0, addr_phy); + -+ if (!kbdev->pm.backend.gpu_powered) { -+ /* GPU is turned off - IRQ is not for us */ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ return IRQ_NONE; -+ } ++ return res.a0; ++} ++EXPORT_SYMBOL_GPL(sip_smc_secure_reg_write); + -+ atomic_inc(&kbdev->faults_pending); ++static void *sip_map(phys_addr_t start, size_t size) ++{ ++ struct page **pages; ++ phys_addr_t page_start; ++ unsigned int page_count; ++ pgprot_t prot; ++ unsigned int i; ++ void *vaddr; + -+ val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); ++ if (!pfn_valid(__phys_to_pfn(start))) ++ return ioremap(start, size); + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ if (!kbdev->pm.backend.driver_ready_for_irqs) -+ dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", -+ __func__, irq, val); -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ page_start = start - offset_in_page(start); ++ page_count = DIV_ROUND_UP(size + offset_in_page(start), PAGE_SIZE); + -+ if (!val) { -+ atomic_dec(&kbdev->faults_pending); -+ return IRQ_NONE; -+ } ++ prot = pgprot_noncached(PAGE_KERNEL); + -+ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); ++ pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); ++ if (!pages) { ++ pr_err("%s: Failed to allocate array for %u pages\n", ++ __func__, page_count); ++ return NULL; ++ } + -+ kbase_mmu_interrupt(kbdev, val); ++ for (i = 0; i < page_count; i++) ++ pages[i] = phys_to_page(page_start + i * PAGE_SIZE); + -+ atomic_dec(&kbdev->faults_pending); ++ vaddr = vmap(pages, page_count, VM_MAP, prot); ++ kfree(pages); + -+ return IRQ_HANDLED; ++ /* ++ * Since vmap() uses page granularity, we must add the offset ++ * into the page here, to get the byte granularity address ++ * into the mapping to represent the actual "start" location. ++ */ ++ return vaddr + offset_in_page(start); +} + -+static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) ++struct arm_smccc_res sip_smc_request_share_mem(u32 page_num, ++ share_page_type_t page_type) +{ -+ unsigned long flags; -+ struct kbase_device *kbdev = kbase_untag(data); -+ u32 val; -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ -+ if (!kbdev->pm.backend.gpu_powered) { -+ /* GPU is turned off - IRQ is not for us */ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ return IRQ_NONE; -+ } -+ -+ val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS)); -+ -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ if (!kbdev->pm.backend.driver_ready_for_irqs) -+ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", -+ __func__, irq, val); -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ if (!val) -+ return IRQ_NONE; ++ struct arm_smccc_res res; ++ unsigned long share_mem_phy; + -+ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); ++ res = __invoke_sip_fn_smc(SIP_SHARE_MEM, page_num, page_type, 0); ++ if (IS_SIP_ERROR(res.a0)) ++ goto error; + -+ kbase_gpu_interrupt(kbdev, val); ++ share_mem_phy = res.a1; ++ res.a1 = (unsigned long)sip_map(share_mem_phy, SIZE_PAGE(page_num)); + -+ return IRQ_HANDLED; ++error: ++ return res; +} ++EXPORT_SYMBOL_GPL(sip_smc_request_share_mem); + -+static irq_handler_t kbase_handler_table[] = { -+ [JOB_IRQ_TAG] = kbase_job_irq_handler, -+ [MMU_IRQ_TAG] = kbase_mmu_irq_handler, -+ [GPU_IRQ_TAG] = kbase_gpu_irq_handler, -+}; -+ -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+#define JOB_IRQ_HANDLER JOB_IRQ_TAG -+#define GPU_IRQ_HANDLER GPU_IRQ_TAG -+ -+/** -+ * kbase_gpu_irq_test_handler - Variant (for test) of kbase_gpu_irq_handler() -+ * @irq: IRQ number -+ * @data: Data associated with this IRQ (i.e. kbdev) -+ * @val: Value of the GPU_CONTROL_REG(GPU_IRQ_STATUS) -+ * -+ * Handle the GPU device interrupt source requests reflected in the -+ * given source bit-pattern. The test code caller is responsible for -+ * undertaking the required device power maintenace. -+ * -+ * Return: IRQ_HANDLED if the requests are from the GPU device, -+ * IRQ_NONE otherwise -+ */ -+irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val) ++struct arm_smccc_res sip_smc_mcu_el3fiq(u32 arg0, u32 arg1, u32 arg2) +{ -+ struct kbase_device *kbdev = kbase_untag(data); ++ return __invoke_sip_fn_smc(SIP_MCU_EL3FIQ_CFG, arg0, arg1, arg2); ++} ++EXPORT_SYMBOL_GPL(sip_smc_mcu_el3fiq); + -+ if (!val) -+ return IRQ_NONE; ++struct arm_smccc_res sip_smc_vpu_reset(u32 arg0, u32 arg1, u32 arg2) ++{ ++ struct arm_smccc_res res; + -+ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); ++ res = __invoke_sip_fn_smc(PSCI_SIP_VPU_RESET, arg0, arg1, arg2); ++ return res; ++} ++EXPORT_SYMBOL_GPL(sip_smc_vpu_reset); + -+ kbase_gpu_interrupt(kbdev, val); ++struct arm_smccc_res sip_smc_bus_config(u32 arg0, u32 arg1, u32 arg2) ++{ ++ struct arm_smccc_res res; + -+ return IRQ_HANDLED; ++ res = __invoke_sip_fn_smc(SIP_BUS_CFG, arg0, arg1, arg2); ++ return res; +} ++EXPORT_SYMBOL_GPL(sip_smc_bus_config); + -+KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler); -+ -+/** -+ * kbase_set_custom_irq_handler - Set a custom IRQ handler -+ * @kbdev: Device for which the handler is to be registered -+ * @custom_handler: Handler to be registered -+ * @irq_type: Interrupt type -+ * -+ * Registers given interrupt handler for requested interrupt type -+ * In the case where irq handler is not specified, the default handler shall be -+ * registered -+ * -+ * Return: 0 case success, error code otherwise -+ */ -+int kbase_set_custom_irq_handler(struct kbase_device *kbdev, -+ irq_handler_t custom_handler, -+ int irq_type) ++struct dram_addrmap_info *sip_smc_get_dram_map(void) +{ -+ int result = 0; -+ irq_handler_t requested_irq_handler = NULL; ++ struct arm_smccc_res res; ++ static struct dram_addrmap_info *map; + -+ KBASE_DEBUG_ASSERT((irq_type >= JOB_IRQ_HANDLER) && -+ (irq_type <= GPU_IRQ_HANDLER)); ++ if (map) ++ return map; + -+ /* Release previous handler */ -+ if (kbdev->irqs[irq_type].irq) -+ free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type)); ++ /* Request share memory size 4KB */ ++ res = sip_smc_request_share_mem(1, SHARE_PAGE_TYPE_DDR_ADDRMAP); ++ if (res.a0 != 0) { ++ pr_err("no ATF memory for init\n"); ++ return NULL; ++ } + -+ requested_irq_handler = (custom_handler != NULL) ? -+ custom_handler : -+ kbase_handler_table[irq_type]; ++ map = (struct dram_addrmap_info *)res.a1; + -+ if (request_irq(kbdev->irqs[irq_type].irq, requested_irq_handler, -+ kbdev->irqs[irq_type].flags | IRQF_SHARED, -+ dev_name(kbdev->dev), -+ kbase_tag(kbdev, irq_type)) != 0) { -+ result = -EINVAL; -+ dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", -+ kbdev->irqs[irq_type].irq, irq_type); -+#if IS_ENABLED(CONFIG_SPARSE_IRQ) -+ dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); -+#endif /* CONFIG_SPARSE_IRQ */ ++ res = sip_smc_dram(SHARE_PAGE_TYPE_DDR_ADDRMAP, 0, ++ ROCKCHIP_SIP_CONFIG_DRAM_ADDRMAP_GET); ++ if (res.a0) { ++ pr_err("rockchip_sip_config_dram_init error:%lx\n", res.a0); ++ map = NULL; ++ return NULL; + } + -+ return result; ++ return map; +} ++EXPORT_SYMBOL_GPL(sip_smc_get_dram_map); + -+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler); -+ -+/* test correct interrupt assigment and reception by cpu */ -+struct kbasep_irq_test { -+ struct hrtimer timer; -+ wait_queue_head_t wait; -+ int triggered; -+ u32 timeout; -+}; -+ -+static struct kbasep_irq_test kbasep_irq_test_data; -+ -+#define IRQ_TEST_TIMEOUT 500 -+ -+static irqreturn_t kbase_job_irq_test_handler(int irq, void *data) ++struct arm_smccc_res sip_smc_lastlog_request(void) +{ -+ unsigned long flags; -+ struct kbase_device *kbdev = kbase_untag(data); -+ u32 val; ++ struct arm_smccc_res res; ++ void __iomem *addr1, *addr2; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ res = __invoke_sip_fn_smc(SIP_LAST_LOG, local_clock(), 0, 0); ++ if (IS_SIP_ERROR(res.a0)) ++ return res; + -+ if (!kbdev->pm.backend.gpu_powered) { -+ /* GPU is turned off - IRQ is not for us */ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ return IRQ_NONE; ++ addr1 = sip_map(res.a1, res.a3); ++ if (!addr1) { ++ pr_err("%s: share memory buffer0 ioremap failed\n", __func__); ++ res.a0 = SIP_RET_INVALID_ADDRESS; ++ return res; ++ } ++ addr2 = sip_map(res.a2, res.a3); ++ if (!addr2) { ++ pr_err("%s: share memory buffer1 ioremap failed\n", __func__); ++ res.a0 = SIP_RET_INVALID_ADDRESS; ++ return res; + } + -+ val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ if (!val) -+ return IRQ_NONE; -+ -+ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); ++ res.a1 = (unsigned long)addr1; ++ res.a2 = (unsigned long)addr2; + -+ kbasep_irq_test_data.triggered = 1; -+ wake_up(&kbasep_irq_test_data.wait); ++ return res; ++} ++EXPORT_SYMBOL_GPL(sip_smc_lastlog_request); + -+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val); ++int sip_smc_amp_config(u32 sub_func_id, u32 arg1, u32 arg2, u32 arg3) ++{ ++ struct arm_smccc_res res; + -+ return IRQ_HANDLED; ++ arm_smccc_smc(RK_SIP_AMP_CFG, sub_func_id, arg1, arg2, arg3, ++ 0, 0, 0, &res); ++ return res.a0; +} ++EXPORT_SYMBOL_GPL(sip_smc_amp_config); + -+static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) ++struct arm_smccc_res sip_smc_get_amp_info(u32 sub_func_id, u32 arg1) +{ -+ unsigned long flags; -+ struct kbase_device *kbdev = kbase_untag(data); -+ u32 val; ++ struct arm_smccc_res res; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ arm_smccc_smc(RK_SIP_AMP_CFG, sub_func_id, arg1, 0, 0, 0, 0, 0, &res); ++ return res; ++} ++EXPORT_SYMBOL_GPL(sip_smc_get_amp_info); + -+ if (!kbdev->pm.backend.gpu_powered) { -+ /* GPU is turned off - IRQ is not for us */ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ return IRQ_NONE; -+ } ++struct arm_smccc_res sip_smc_get_pvtpll_info(u32 sub_func_id, u32 arg1) ++{ ++ struct arm_smccc_res res; + -+ val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); ++ /* ++ * res.a0: error code(0: success, !0: error). ++ * res.a1: low temp config flag(0: support, !0: don't support). ++ */ ++ arm_smccc_smc(SIP_PVTPLL_CFG, sub_func_id, arg1, 0, 0, 0, 0, 0, &res); ++ return res; ++} ++EXPORT_SYMBOL_GPL(sip_smc_get_pvtpll_info); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++struct arm_smccc_res sip_smc_pvtpll_config(u32 sub_func_id, u32 arg1, u32 arg2, ++ u32 arg3, u32 arg4, u32 arg5, u32 arg6) ++{ ++ struct arm_smccc_res res; + -+ if (!val) -+ return IRQ_NONE; ++ /* ++ * res.a0: error code(0: success, !0: error). ++ */ ++ arm_smccc_smc(SIP_PVTPLL_CFG, sub_func_id, arg1, arg2, arg3, arg4, arg5, ++ arg6, &res); ++ return res; ++} ++EXPORT_SYMBOL_GPL(sip_smc_pvtpll_config); + -+ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); ++void __iomem *sip_hdcp_request_share_memory(int id) ++{ ++ static void __iomem *base; ++ struct arm_smccc_res res; + -+ kbasep_irq_test_data.triggered = 1; -+ wake_up(&kbasep_irq_test_data.wait); ++ if (id < 0 || id >= MAX_DEVICE) { ++ pr_err("%s: invalid device id\n", __func__); ++ return NULL; ++ } + -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val); ++ if (!base) { ++ /* request page share memory */ ++ res = sip_smc_request_share_mem(2, SHARE_PAGE_TYPE_HDCP); ++ if (IS_SIP_ERROR(res.a0)) ++ return NULL; ++ base = (void __iomem *)res.a1; ++ } + -+ return IRQ_HANDLED; ++ return base + id * 1024; +} ++EXPORT_SYMBOL_GPL(sip_hdcp_request_share_memory); + -+static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer) ++struct arm_smccc_res sip_hdcp_config(u32 arg0, u32 arg1, u32 arg2) +{ -+ struct kbasep_irq_test *test_data = container_of(timer, -+ struct kbasep_irq_test, timer); ++ struct arm_smccc_res res; + -+ test_data->timeout = 1; -+ test_data->triggered = 1; -+ wake_up(&test_data->wait); -+ return HRTIMER_NORESTART; ++ res = __invoke_sip_fn_smc(SIP_HDCP_CONFIG, arg0, arg1, arg2); ++ return res; +} ++EXPORT_SYMBOL_GPL(sip_hdcp_config); + -+static int kbasep_common_test_interrupt( -+ struct kbase_device * const kbdev, u32 tag) -+{ -+ int err = 0; -+ irq_handler_t test_handler; -+ -+ u32 old_mask_val; -+ u16 mask_offset; -+ u16 rawstat_offset; -+ -+ switch (tag) { -+ case JOB_IRQ_TAG: -+ test_handler = kbase_job_irq_test_handler; -+ rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT); -+ mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK); -+ break; -+ case MMU_IRQ_TAG: -+ test_handler = kbase_mmu_irq_test_handler; -+ rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT); -+ mask_offset = MMU_REG(MMU_IRQ_MASK); -+ break; -+ case GPU_IRQ_TAG: -+ /* already tested by pm_driver - bail out */ -+ default: -+ return 0; -+ } -+ -+ /* store old mask */ -+ old_mask_val = kbase_reg_read(kbdev, mask_offset); -+ /* mask interrupts */ -+ kbase_reg_write(kbdev, mask_offset, 0x0); ++/************************** fiq debugger **************************************/ ++/* ++ * AArch32 is not allowed to call SMC64(ATF framework does not support), so we ++ * don't change SIP_UARTDBG_FN to SIP_UARTDBG_CFG64 even when cpu is AArch32 ++ * mode. Let ATF support SIP_UARTDBG_CFG, and we just initialize SIP_UARTDBG_FN ++ * depends on compile option(CONFIG_ARM or CONFIG_ARM64). ++ */ ++#ifdef CONFIG_ARM64 ++#define SIP_UARTDBG_FN SIP_UARTDBG_CFG64 ++#else ++#define SIP_UARTDBG_FN SIP_UARTDBG_CFG ++static int firmware_64_32bit; ++#endif + -+ if (kbdev->irqs[tag].irq) { -+ /* release original handler and install test handler */ -+ if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) { -+ err = -EINVAL; -+ } else { -+ kbasep_irq_test_data.timeout = 0; -+ hrtimer_init(&kbasep_irq_test_data.timer, -+ CLOCK_MONOTONIC, HRTIMER_MODE_REL); -+ kbasep_irq_test_data.timer.function = -+ kbasep_test_interrupt_timeout; ++static int fiq_sip_enabled; ++static int fiq_target_cpu; ++static phys_addr_t ft_fiq_mem_phy; ++static void __iomem *ft_fiq_mem_base; ++static sip_fiq_debugger_uart_irq_tf_cb_t sip_fiq_debugger_uart_irq_tf; ++static struct pt_regs fiq_pt_regs; + -+ /* trigger interrupt */ -+ kbase_reg_write(kbdev, mask_offset, 0x1); -+ kbase_reg_write(kbdev, rawstat_offset, 0x1); ++int sip_fiq_debugger_is_enabled(void) ++{ ++ return fiq_sip_enabled; ++} ++EXPORT_SYMBOL_GPL(sip_fiq_debugger_is_enabled); + -+ hrtimer_start(&kbasep_irq_test_data.timer, -+ HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT), -+ HRTIMER_MODE_REL); ++static void sip_fiq_debugger_get_pt_regs(void *reg_base, ++ unsigned long sp_el1) ++{ ++ __maybe_unused struct sm_nsec_ctx *nsec_ctx = reg_base; ++ __maybe_unused struct gp_regs_ctx *gp_regs = reg_base; + -+ wait_event(kbasep_irq_test_data.wait, -+ kbasep_irq_test_data.triggered != 0); ++#ifdef CONFIG_ARM64 ++ /* ++ * 64-bit ATF + 64-bit kernel ++ */ ++ /* copy cpu context: x0 ~ spsr_el3 */ ++ memcpy(&fiq_pt_regs, reg_base, 8 * 31); + -+ if (kbasep_irq_test_data.timeout != 0) { -+ dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n", -+ kbdev->irqs[tag].irq, tag); -+ err = -EINVAL; -+ } else { -+ dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n", -+ kbdev->irqs[tag].irq, tag); -+ } ++ /* copy pstate: spsr_el3 */ ++ memcpy(&fiq_pt_regs.pstate, reg_base + 0x110, 8); ++ fiq_pt_regs.sp = sp_el1; + -+ hrtimer_cancel(&kbasep_irq_test_data.timer); -+ kbasep_irq_test_data.triggered = 0; ++ /* copy pc: elr_el3 */ ++ memcpy(&fiq_pt_regs.pc, reg_base + 0x118, 8); ++#else ++ if (firmware_64_32bit == FIRMWARE_ATF_64BIT) { ++ /* ++ * 64-bit ATF + 32-bit kernel ++ */ ++ fiq_pt_regs.ARM_r0 = gp_regs->x0; ++ fiq_pt_regs.ARM_r1 = gp_regs->x1; ++ fiq_pt_regs.ARM_r2 = gp_regs->x2; ++ fiq_pt_regs.ARM_r3 = gp_regs->x3; ++ fiq_pt_regs.ARM_r4 = gp_regs->x4; ++ fiq_pt_regs.ARM_r5 = gp_regs->x5; ++ fiq_pt_regs.ARM_r6 = gp_regs->x6; ++ fiq_pt_regs.ARM_r7 = gp_regs->x7; ++ fiq_pt_regs.ARM_r8 = gp_regs->x8; ++ fiq_pt_regs.ARM_r9 = gp_regs->x9; ++ fiq_pt_regs.ARM_r10 = gp_regs->x10; ++ fiq_pt_regs.ARM_fp = gp_regs->x11; ++ fiq_pt_regs.ARM_ip = gp_regs->x12; ++ fiq_pt_regs.ARM_sp = gp_regs->x19; /* aarch32 svc_r13 */ ++ fiq_pt_regs.ARM_lr = gp_regs->x18; /* aarch32 svc_r14 */ ++ fiq_pt_regs.ARM_cpsr = gp_regs->spsr_el3; ++ fiq_pt_regs.ARM_pc = gp_regs->elr_el3; ++ } else { ++ /* ++ * 32-bit tee firmware + 32-bit kernel ++ */ ++ fiq_pt_regs.ARM_r0 = nsec_ctx->r0; ++ fiq_pt_regs.ARM_r1 = nsec_ctx->r1; ++ fiq_pt_regs.ARM_r2 = nsec_ctx->r2; ++ fiq_pt_regs.ARM_r3 = nsec_ctx->r3; ++ fiq_pt_regs.ARM_r4 = nsec_ctx->r4; ++ fiq_pt_regs.ARM_r5 = nsec_ctx->r5; ++ fiq_pt_regs.ARM_r6 = nsec_ctx->r6; ++ fiq_pt_regs.ARM_r7 = nsec_ctx->r7; ++ fiq_pt_regs.ARM_r8 = nsec_ctx->r8; ++ fiq_pt_regs.ARM_r9 = nsec_ctx->r9; ++ fiq_pt_regs.ARM_r10 = nsec_ctx->r10; ++ fiq_pt_regs.ARM_fp = nsec_ctx->r11; ++ fiq_pt_regs.ARM_ip = nsec_ctx->r12; ++ fiq_pt_regs.ARM_sp = nsec_ctx->svc_sp; ++ fiq_pt_regs.ARM_lr = nsec_ctx->svc_lr; ++ fiq_pt_regs.ARM_cpsr = nsec_ctx->mon_spsr; + -+ /* mask interrupts */ -+ kbase_reg_write(kbdev, mask_offset, 0x0); ++ /* ++ * 'nsec_ctx->mon_lr' is not the fiq break point's PC, because it will ++ * be override as 'psci_fiq_debugger_uart_irq_tf_cb' for optee-os to ++ * jump to fiq_debugger handler. ++ * ++ * As 'nsec_ctx->und_lr' is not used for kernel, so optee-os uses it to ++ * deliver fiq break point's PC. ++ * ++ */ ++ fiq_pt_regs.ARM_pc = nsec_ctx->und_lr; ++ } ++#endif ++} + -+ /* release test handler */ -+ free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag)); -+ } ++static void sip_fiq_debugger_uart_irq_tf_cb(unsigned long sp_el1, ++ unsigned long offset, ++ unsigned long cpu) ++{ ++ char *cpu_context; + -+ /* restore original interrupt */ -+ if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag], -+ kbdev->irqs[tag].flags | IRQF_SHARED, -+ dev_name(kbdev->dev), kbase_tag(kbdev, tag))) { -+ dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n", -+ kbdev->irqs[tag].irq, tag); -+ err = -EINVAL; -+ } ++ /* calling fiq handler */ ++ if (ft_fiq_mem_base) { ++ cpu_context = (char *)ft_fiq_mem_base + offset; ++ sip_fiq_debugger_get_pt_regs(cpu_context, sp_el1); ++ sip_fiq_debugger_uart_irq_tf(&fiq_pt_regs, cpu); + } -+ /* restore old mask */ -+ kbase_reg_write(kbdev, mask_offset, old_mask_val); + -+ return err; ++ /* fiq handler done, return to EL3(then EL3 return to EL1 entry) */ ++ __invoke_sip_fn_smc(SIP_UARTDBG_FN, 0, 0, UARTDBG_CFG_OSHDL_TO_OS); +} + -+int kbasep_common_test_interrupt_handlers( -+ struct kbase_device * const kbdev) ++int sip_fiq_debugger_uart_irq_tf_init(u32 irq_id, sip_fiq_debugger_uart_irq_tf_cb_t callback_fn) +{ -+ int err; -+ -+ init_waitqueue_head(&kbasep_irq_test_data.wait); -+ kbasep_irq_test_data.triggered = 0; ++ struct arm_smccc_res res; + -+ /* A suspend won't happen during startup/insmod */ -+ kbase_pm_context_active(kbdev); ++ fiq_target_cpu = 0; + -+ err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG); -+ if (err) { -+ dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n"); -+ goto out; ++ /* init fiq debugger callback */ ++ sip_fiq_debugger_uart_irq_tf = callback_fn; ++ res = __invoke_sip_fn_smc(SIP_UARTDBG_FN, irq_id, ++ (unsigned long)sip_fiq_debugger_uart_irq_tf_cb, ++ UARTDBG_CFG_INIT); ++ if (IS_SIP_ERROR(res.a0)) { ++ pr_err("%s error: %d\n", __func__, (int)res.a0); ++ return res.a0; + } + -+ err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG); -+ if (err) { -+ dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n"); -+ goto out; ++ /* share memory ioremap */ ++ if (!ft_fiq_mem_base) { ++ ft_fiq_mem_phy = res.a1; ++ ft_fiq_mem_base = sip_map(ft_fiq_mem_phy, ++ FIQ_UARTDBG_SHARE_MEM_SIZE); ++ if (!ft_fiq_mem_base) { ++ pr_err("%s: share memory ioremap failed\n", __func__); ++ return -ENOMEM; ++ } + } + -+ dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n"); -+ -+ out: -+ kbase_pm_context_idle(kbdev); ++ fiq_sip_enabled = 1; + -+ return err; ++ return SIP_RET_SUCCESS; +} -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++EXPORT_SYMBOL_GPL(sip_fiq_debugger_uart_irq_tf_init); + -+int kbase_install_interrupts(struct kbase_device *kbdev) ++static ulong cpu_logical_map_mpidr(u32 cpu) +{ -+ u32 nr = ARRAY_SIZE(kbase_handler_table); -+ int err; -+ u32 i; ++#ifdef MODULE ++ /* Empirically, local "cpu_logical_map()" for rockchip platforms */ ++ ulong mpidr = read_cpuid_mpidr(); + -+ for (i = 0; i < nr; i++) { -+ err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i], -+ kbdev->irqs[i].flags | IRQF_SHARED, -+ dev_name(kbdev->dev), -+ kbase_tag(kbdev, i)); -+ if (err) { -+ dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", -+ kbdev->irqs[i].irq, i); -+#if IS_ENABLED(CONFIG_SPARSE_IRQ) -+ dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); -+#endif /* CONFIG_SPARSE_IRQ */ -+ goto release; -+ } ++ if (mpidr & MPIDR_MT_BITMASK) { ++ /* 0x100, 0x200, 0x300, 0x400 ... */ ++ mpidr = (cpu & 0xff) << 8; ++ } else { ++ if (cpu < 4) ++ /* 0x00, 0x01, 0x02, 0x03 */ ++ mpidr = cpu; ++ else if (cpu < 8) ++ /* 0x100, 0x101, 0x102, 0x103 */ ++ mpidr = 0x100 | (cpu - 4); ++ else ++ pr_err("Unsupported map cpu: %d\n", cpu); + } + -+ return 0; -+ -+ release: -+ while (i-- > 0) -+ free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); ++ return mpidr; ++#else ++ return cpu_logical_map(cpu); ++#endif ++} + -+ return err; ++ulong sip_cpu_logical_map_mpidr(u32 cpu) ++{ ++ return cpu_logical_map_mpidr(cpu); +} ++EXPORT_SYMBOL_GPL(sip_cpu_logical_map_mpidr); + -+void kbase_release_interrupts(struct kbase_device *kbdev) ++int sip_fiq_debugger_switch_cpu(u32 cpu) +{ -+ u32 nr = ARRAY_SIZE(kbase_handler_table); -+ u32 i; ++ struct arm_smccc_res res; + -+ for (i = 0; i < nr; i++) { -+ if (kbdev->irqs[i].irq) -+ free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); -+ } ++ fiq_target_cpu = cpu; ++ res = __invoke_sip_fn_smc(SIP_UARTDBG_FN, cpu_logical_map_mpidr(cpu), ++ 0, UARTDBG_CFG_OSHDL_CPUSW); ++ return res.a0; +} + -+void kbase_synchronize_irqs(struct kbase_device *kbdev) ++int sip_fiq_debugger_sdei_switch_cpu(u32 cur_cpu, u32 target_cpu, u32 flag) +{ -+ u32 nr = ARRAY_SIZE(kbase_handler_table); -+ u32 i; ++ struct arm_smccc_res res; + -+ for (i = 0; i < nr; i++) { -+ if (kbdev->irqs[i].irq) -+ synchronize_irq(kbdev->irqs[i].irq); -+ } ++ res = __invoke_sip_fn_smc(SIP_SDEI_FIQ_DBG_SWITCH_CPU, ++ cur_cpu, target_cpu, flag); ++ return res.a0; +} + -+KBASE_EXPORT_TEST_API(kbase_synchronize_irqs); ++int sip_fiq_debugger_sdei_get_event_id(u32 *fiq, u32 *sw_cpu, u32 *flag) ++{ ++ struct arm_smccc_res res; + -+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c -new file mode 100644 -index 000000000..258dc6dac ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c -@@ -0,0 +1,241 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ res = __invoke_sip_fn_smc(SIP_SDEI_FIQ_DBG_GET_EVENT_ID, ++ 0, 0, 0); ++ *fiq = res.a1; ++ *sw_cpu = res.a2; ++ if (flag) ++ *flag = res.a3; + -+/* -+ * Register backend context / address space management -+ */ ++ return res.a0; ++} + -+#include -+#include -+#include ++EXPORT_SYMBOL_GPL(sip_fiq_debugger_switch_cpu); + -+/** -+ * assign_and_activate_kctx_addr_space - Assign an AS to a context -+ * @kbdev: Kbase device -+ * @kctx: Kbase context -+ * @current_as: Address Space to assign -+ * -+ * Assign an Address Space (AS) to a context, and add the context to the Policy. -+ * -+ * This includes -+ * setting up the global runpool_irq structure and the context on the AS, -+ * Activating the MMU on the AS, -+ * Allowing jobs to be submitted on the AS. -+ * -+ * Context: -+ * kbasep_js_kctx_info.jsctx_mutex held, -+ * kbasep_js_device_data.runpool_mutex held, -+ * AS transaction mutex held, -+ * Runpool IRQ lock held -+ */ -+static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ struct kbase_as *current_as) ++void sip_fiq_debugger_enable_debug(bool enable) +{ -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; -+ -+ lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); -+ lockdep_assert_held(&js_devdata->runpool_mutex); -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+#if !MALI_USE_CSF -+ /* Attribute handling */ -+ kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx); -+#endif ++ unsigned long val; + -+ /* Allow it to run jobs */ -+ kbasep_js_set_submit_allowed(js_devdata, kctx); ++ val = enable ? UARTDBG_CFG_OSHDL_DEBUG_ENABLE : ++ UARTDBG_CFG_OSHDL_DEBUG_DISABLE; + -+ kbase_js_runpool_inc_context_count(kbdev, kctx); ++ __invoke_sip_fn_smc(SIP_UARTDBG_FN, 0, 0, val); +} ++EXPORT_SYMBOL_GPL(sip_fiq_debugger_enable_debug); + -+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_context *kctx, -+ unsigned int js) ++int sip_fiq_debugger_set_print_port(u32 port_phyaddr, u32 baudrate) +{ -+ int i; -+ -+ if (kbdev->hwaccess.active_kctx[js] == kctx) { -+ /* Context is already active */ -+ return true; -+ } -+ -+ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { -+ if (kbdev->as_to_kctx[i] == kctx) { -+ /* Context already has ASID - mark as active */ -+ return true; -+ } -+ } ++ struct arm_smccc_res res; + -+ /* Context does not have address space assigned */ -+ return false; ++ res = __invoke_sip_fn_smc(SIP_UARTDBG_FN, port_phyaddr, baudrate, ++ UARTDBG_CFG_PRINT_PORT); ++ return res.a0; +} ++EXPORT_SYMBOL_GPL(sip_fiq_debugger_set_print_port); + -+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, -+ struct kbase_context *kctx) ++int sip_fiq_debugger_request_share_memory(void) +{ -+ int as_nr = kctx->as_nr; -+ -+ if (as_nr == KBASEP_AS_NR_INVALID) { -+ WARN(1, "Attempting to release context without ASID\n"); -+ return; -+ } -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ if (atomic_read(&kctx->refcount) != 1) { -+ WARN(1, "Attempting to release active ASID\n"); -+ return; -+ } ++ struct arm_smccc_res res; + -+ kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx); ++ /* request page share memory */ ++ res = sip_smc_request_share_mem(FIQ_UARTDBG_PAGE_NUMS, ++ SHARE_PAGE_TYPE_UARTDBG); ++ if (IS_SIP_ERROR(res.a0)) ++ return res.a0; + -+ kbase_ctx_sched_release_ctx(kctx); -+ kbase_js_runpool_dec_context_count(kbdev, kctx); ++ return SIP_RET_SUCCESS; +} ++EXPORT_SYMBOL_GPL(sip_fiq_debugger_request_share_memory); + -+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, -+ struct kbase_context *kctx) ++int sip_fiq_debugger_get_target_cpu(void) +{ ++ return fiq_target_cpu; +} ++EXPORT_SYMBOL_GPL(sip_fiq_debugger_get_target_cpu); + -+int kbase_backend_find_and_release_free_address_space( -+ struct kbase_device *kbdev, struct kbase_context *kctx) ++void sip_fiq_debugger_enable_fiq(bool enable, uint32_t tgt_cpu) +{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ unsigned long flags; -+ int i; -+ -+ js_devdata = &kbdev->js_data; -+ js_kctx_info = &kctx->jctx.sched_info; -+ -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_lock(&js_devdata->runpool_mutex); -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ -+ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { -+ struct kbasep_js_kctx_info *as_js_kctx_info; -+ struct kbase_context *as_kctx; -+ -+ as_kctx = kbdev->as_to_kctx[i]; -+ as_js_kctx_info = &as_kctx->jctx.sched_info; -+ -+ /* Don't release privileged or active contexts, or contexts with -+ * jobs running. -+ * Note that a context will have at least 1 reference (which -+ * was previously taken by kbasep_js_schedule_ctx()) until -+ * descheduled. -+ */ -+ if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) && -+ atomic_read(&as_kctx->refcount) == 1) { -+ if (!kbase_ctx_sched_inc_refcount_nolock(as_kctx)) { -+ WARN(1, "Failed to retain active context\n"); -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, -+ flags); -+ mutex_unlock(&js_devdata->runpool_mutex); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ -+ return KBASEP_AS_NR_INVALID; -+ } -+ -+ kbasep_js_clear_submit_allowed(js_devdata, as_kctx); ++ u32 en; + -+ /* Drop and retake locks to take the jsctx_mutex on the -+ * context we're about to release without violating lock -+ * ordering -+ */ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&js_devdata->runpool_mutex); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ fiq_target_cpu = tgt_cpu; ++ en = enable ? UARTDBG_CFG_FIQ_ENABEL : UARTDBG_CFG_FIQ_DISABEL; ++ __invoke_sip_fn_smc(SIP_UARTDBG_FN, tgt_cpu, 0, en); ++} ++EXPORT_SYMBOL_GPL(sip_fiq_debugger_enable_fiq); + ++int sip_fiq_control(u32 sub_func, u32 irq, unsigned long data) ++{ ++ struct arm_smccc_res res; + -+ /* Release context from address space */ -+ mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex); -+ mutex_lock(&js_devdata->runpool_mutex); ++ res = __invoke_sip_fn_smc(RK_SIP_FIQ_CTRL, ++ sub_func, irq, data); ++ return res.a0; ++} ++EXPORT_SYMBOL_GPL(sip_fiq_control); + -+ kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx); ++int sip_wdt_config(u32 sub_func, u32 arg1, u32 arg2, u32 arg3) ++{ ++ struct arm_smccc_res res; + -+ if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) { -+ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, -+ as_kctx, -+ true); ++ arm_smccc_smc(SIP_WDT_CFG, sub_func, arg1, arg2, arg3, ++ 0, 0, 0, &res); + -+ mutex_unlock(&js_devdata->runpool_mutex); -+ mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); ++ return res.a0; ++} ++EXPORT_SYMBOL_GPL(sip_wdt_config); + -+ return i; -+ } ++int sip_hdmirx_config(u32 sub_func, u32 arg1, u32 arg2, u32 arg3) ++{ ++ struct arm_smccc_res res; + -+ /* Context was retained while locks were dropped, -+ * continue looking for free AS -+ */ -+ mutex_unlock(&js_devdata->runpool_mutex); -+ mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); ++ arm_smccc_smc(SIP_HDMIRX_CFG, sub_func, arg1, arg2, arg3, ++ 0, 0, 0, &res); + -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_lock(&js_devdata->runpool_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ } -+ } ++ return res.a0; ++} ++EXPORT_SYMBOL_GPL(sip_hdmirx_config); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++int sip_hdcpkey_init(u32 hdcp_id) ++{ ++ struct arm_smccc_res res; + -+ mutex_unlock(&js_devdata->runpool_mutex); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ res = __invoke_sip_fn_smc(TRUSTED_OS_HDCPKEY_INIT, hdcp_id, 0, 0); + -+ return KBASEP_AS_NR_INVALID; ++ return res.a0; +} ++EXPORT_SYMBOL_GPL(sip_hdcpkey_init); + -+bool kbase_backend_use_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ int as_nr) ++int sip_smc_mcu_config(unsigned long mcu_id, ++ unsigned long func, ++ unsigned long arg2) +{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbase_as *new_address_space = NULL; -+ int js; -+ -+ js_devdata = &kbdev->js_data; ++ struct arm_smccc_res res; + -+ for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { -+ if (kbdev->hwaccess.active_kctx[js] == kctx) { -+ WARN(1, "Context is already scheduled in\n"); -+ return false; -+ } -+ } ++ res = __invoke_sip_fn_smc(SIP_MCU_CFG, mcu_id, func, arg2); ++ return res.a0; ++} ++EXPORT_SYMBOL_GPL(sip_smc_mcu_config); ++/******************************************************************************/ ++#ifdef CONFIG_ARM ++static __init int sip_firmware_init(void) ++{ ++ struct arm_smccc_res res; + -+ new_address_space = &kbdev->as[as_nr]; ++ if (!psci_smp_available()) ++ return 0; + -+ lockdep_assert_held(&js_devdata->runpool_mutex); -+ lockdep_assert_held(&kbdev->mmu_hw_mutex); -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space); ++ /* ++ * OP-TEE works on kernel 3.10 and 4.4 and we have different sip ++ * implement. We should tell OP-TEE the current rockchip sip version. ++ */ ++ res = __invoke_sip_fn_smc(SIP_SIP_VERSION, SIP_IMPLEMENT_V2, ++ SECURE_REG_WR, 0); ++ if (IS_SIP_ERROR(res.a0)) ++ pr_err("%s: set rockchip sip version v2 failed\n", __func__); + -+ if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) { -+ /* We need to retain it to keep the corresponding address space -+ */ -+ kbase_ctx_sched_retain_ctx_refcount(kctx); ++ /* ++ * Currently, we support: ++ * ++ * 1. 64-bit ATF + 64-bit kernel; ++ * 2. 64-bit ATF + 32-bit kernel; ++ * 3. 32-bit TEE + 32-bit kernel; ++ * ++ * We need to detect which case of above and record in firmware_64_32bit ++ * We get info from cpuid and compare with all supported ARMv7 cpu. ++ */ ++ switch (read_cpuid_part()) { ++ case ARM_CPU_PART_CORTEX_A7: ++ case ARM_CPU_PART_CORTEX_A8: ++ case ARM_CPU_PART_CORTEX_A9: ++ case ARM_CPU_PART_CORTEX_A12: ++ case ARM_CPU_PART_CORTEX_A15: ++ case ARM_CPU_PART_CORTEX_A17: ++ firmware_64_32bit = FIRMWARE_TEE_32BIT; ++ break; ++ default: ++ firmware_64_32bit = FIRMWARE_ATF_64BIT; ++ break; + } + -+ return true; ++ return 0; +} -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h ++arch_initcall(sip_firmware_init); ++#endif ++ ++MODULE_DESCRIPTION("Rockchip SIP Call"); ++MODULE_LICENSE("GPL"); +diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig +index ebd4e113d..c32ec4488 100644 +--- a/drivers/gpio/Kconfig ++++ b/drivers/gpio/Kconfig +@@ -1026,6 +1026,21 @@ config GPIO_ADNP + enough to represent all pins, but the driver will assume a + register layout for 64 pins (8 registers). + ++config GPIO_AW9110 ++ tristate "AW9110 I2C GPIO expanders" ++ select GPIOLIB_IRQCHIP ++ select IRQ_DOMAIN ++ help ++ Say yes here to provide access to I2C ++ GPIO expanders used for additional digital outputs or inputs. ++ Your board setup code will need to declare the expanders in ++ use, and assign numbers to the GPIOs they expose. Those GPIOs ++ can then be used from drivers and other kernel code, just like ++ other GPIOs, but only accessible from task contexts. ++ ++ This driver provides an in-kernel interface to those GPIOs using ++ platform-neutral GPIO calls. ++ + config GPIO_FXL6408 + tristate "FXL6408 I2C GPIO expander" + select GPIO_REGMAP +@@ -1086,6 +1101,14 @@ config GPIO_MAX732X_IRQ + Say yes here to enable the max732x to be used as an interrupt + controller. It requires the driver to be built in the kernel. + ++config GPIO_NCA9539 ++ tristate "NCA9539 I2C GPIO expander" ++ depends on I2C || COMPILE_TEST ++ select REGMAP_I2C ++ help ++ Say yes here to support the NCA9539 series of I2C Expanders. ++ GPIO expanders used for additional digital outputs or inputs. ++ + config GPIO_PCA953X + tristate "PCA95[357]x, PCA9698, TCA64xx, and MAX7310 I/O ports" + select REGMAP_I2C +diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile +index eb73b5d63..d0f3e4332 100644 +--- a/drivers/gpio/Makefile ++++ b/drivers/gpio/Makefile +@@ -36,6 +36,7 @@ obj-$(CONFIG_GPIO_ARIZONA) += gpio-arizona.o + obj-$(CONFIG_GPIO_ASPEED) += gpio-aspeed.o + obj-$(CONFIG_GPIO_ASPEED_SGPIO) += gpio-aspeed-sgpio.o + obj-$(CONFIG_GPIO_ATH79) += gpio-ath79.o ++obj-$(CONFIG_GPIO_AW9110) += gpio-aw9110.o + obj-$(CONFIG_GPIO_BCM_KONA) += gpio-bcm-kona.o + obj-$(CONFIG_GPIO_BCM_XGS_IPROC) += gpio-xgs-iproc.o + obj-$(CONFIG_GPIO_BD71815) += gpio-bd71815.o +@@ -116,6 +117,7 @@ obj-$(CONFIG_GPIO_MT7621) += gpio-mt7621.o + obj-$(CONFIG_GPIO_MVEBU) += gpio-mvebu.o + obj-$(CONFIG_GPIO_MXC) += gpio-mxc.o + obj-$(CONFIG_GPIO_MXS) += gpio-mxs.o ++obj-$(CONFIG_GPIO_NCA9539) += gpio-nca9539.o + obj-$(CONFIG_GPIO_OCTEON) += gpio-octeon.o + obj-$(CONFIG_GPIO_OMAP) += gpio-omap.o + obj-$(CONFIG_GPIO_PALMAS) += gpio-palmas.o +@@ -136,7 +138,7 @@ obj-$(CONFIG_GPIO_RDA) += gpio-rda.o + obj-$(CONFIG_GPIO_RDC321X) += gpio-rdc321x.o + obj-$(CONFIG_GPIO_REALTEK_OTTO) += gpio-realtek-otto.o + obj-$(CONFIG_GPIO_REG) += gpio-reg.o +-obj-$(CONFIG_GPIO_ROCKCHIP) += gpio-rockchip.o ++obj-$(CONFIG_GPIO_ROCKCHIP) += gpio-rockchip-oh.o + obj-$(CONFIG_ARCH_SA1100) += gpio-sa1100.o + obj-$(CONFIG_GPIO_SAMA5D2_PIOBU) += gpio-sama5d2-piobu.o + obj-$(CONFIG_GPIO_SCH311X) += gpio-sch311x.o +diff --git a/drivers/gpio/gpio-aw9110.c b/drivers/gpio/gpio-aw9110.c new file mode 100644 -index 000000000..136aa526d +index 000000000..ab97726fc --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h -@@ -0,0 +1,135 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpio/gpio-aw9110.c +@@ -0,0 +1,500 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later +/* ++ * Driver for aw9110 I2C GPIO expanders + * -+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+/* -+ * Register-based HW access backend specific definitions ++ * Copyright (c) 2021 Rockchip Electronics Co. Ltd. + */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+#ifndef _KBASE_HWACCESS_GPU_DEFS_H_ -+#define _KBASE_HWACCESS_GPU_DEFS_H_ ++#define REG_INPUT_P0 0x00 ++#define REG_INPUT_P1 0x01 ++#define REG_OUTPUT_P0 0x02 ++#define REG_OUTPUT_P1 0x03 ++#define REG_CONFIG_P0 0x04 ++#define REG_CONFIG_P1 0x05 ++#define REG_INT_P0 0x06 ++#define REG_INT_P1 0x07 ++#define REG_ID 0x10 ++#define REG_CTRL 0x11 ++#define REG_WORK_MODE_P0 0x12 ++#define REG_WORK_MODE_P1 0x13 ++#define REG_EN_BREATH 0x14 ++#define REG_FADE_TIME 0x15 ++#define REG_FULL_TIME 0x16 ++#define REG_DLY0_BREATH 0x17 ++#define REG_DLY1_BREATH 0x18 ++#define REG_DLY2_BREATH 0x19 ++#define REG_DLY3_BREATH 0x1a ++#define REG_DLY4_BREATH 0x1b ++#define REG_DLY5_BREATH 0x1c ++#define REG_DIM00 0x20 ++#define REG_DIM01 0x21 ++#define REG_DIM02 0x22 ++#define REG_DIM03 0x23 ++#define REG_DIM04 0x24 ++#define REG_DIM05 0x25 ++#define REG_DIM06 0x26 ++#define REG_DIM07 0x27 ++#define REG_DIM08 0x28 ++#define REG_DIM09 0x29 ++#define REG_SWRST 0x7F ++#define REG_81H 0x81 + -+/* SLOT_RB_SIZE must be < 256 */ -+#define SLOT_RB_SIZE 2 -+#define SLOT_RB_MASK (SLOT_RB_SIZE - 1) + -+/** -+ * struct rb_entry - Ringbuffer entry -+ * @katom: Atom associated with this entry -+ */ -+struct rb_entry { -+ struct kbase_jd_atom *katom; ++static const struct i2c_device_id aw9110_id[] = { ++ { "aw9110", 10 }, ++ { } +}; ++MODULE_DEVICE_TABLE(i2c, aw9110_id); + -+/* SLOT_RB_TAG_PURGED assumes a value that is different from -+ * NULL (SLOT_RB_NULL_TAG_VAL) and will not be the result of -+ * any valid pointer via macro translation: SLOT_RB_TAG_KCTX(x). -+ */ -+#define SLOT_RB_TAG_PURGED ((u64)(1 << 1)) -+#define SLOT_RB_NULL_TAG_VAL ((u64)0) ++#ifdef CONFIG_OF ++static const struct of_device_id aw9110_of_table[] = { ++ { .compatible = "awinic,aw9110" }, ++ { } ++}; ++MODULE_DEVICE_TABLE(of, aw9110_of_table); ++#endif + -+/** -+ * SLOT_RB_TAG_KCTX() - a function-like macro for converting a pointer to a -+ * u64 for serving as tagged value. -+ * @kctx: Pointer to kbase context. -+ */ -+#define SLOT_RB_TAG_KCTX(kctx) (u64)((uintptr_t)(kctx)) -+/** -+ * struct slot_rb - Slot ringbuffer -+ * @entries: Ringbuffer entries -+ * @last_kctx_tagged: The last context that submitted a job to the slot's -+ * HEAD_NEXT register. The value is a tagged variant so -+ * must not be dereferenced. It is used in operation to -+ * track when shader core L1 caches might contain a -+ * previous context's data, and so must only be set to -+ * SLOT_RB_NULL_TAG_VAL after reset/powerdown of the -+ * cores. In slot job submission, if there is a kctx -+ * change, and the relevant katom is configured with -+ * BASE_JD_REQ_SKIP_CACHE_START, a L1 read only cache -+ * maintenace operation is enforced. -+ * @read_idx: Current read index of buffer -+ * @write_idx: Current write index of buffer -+ * @job_chain_flag: Flag used to implement jobchain disambiguation -+ */ -+struct slot_rb { -+ struct rb_entry entries[SLOT_RB_SIZE]; + -+ u64 last_kctx_tagged; ++struct aw9110 { ++ struct gpio_chip chip; ++ struct irq_chip irqchip; ++ struct i2c_client *client; ++ struct mutex lock; /* protect 'out' */ ++ unsigned int out; /* software latch */ ++ unsigned int direct; /* gpio direct */ ++ unsigned int status; /* current status */ ++ unsigned int irq_enabled; /* enabled irqs */ + -+ u8 read_idx; -+ u8 write_idx; ++ struct device *dev; ++ int shdn_en; /* shutdown ctrl */ + -+ u8 job_chain_flag; ++ int (*write)(struct i2c_client *client, u8 reg, u8 data); ++ int (*read)(struct i2c_client *client, u8 reg); +}; + -+/** -+ * struct kbase_backend_data - GPU backend specific data for HW access layer -+ * @slot_rb: Slot ringbuffers -+ * @scheduling_timer: The timer tick used for rescheduling jobs -+ * @timer_running: Is the timer running? The runpool_mutex must be -+ * held whilst modifying this. -+ * @suspend_timer: Is the timer suspended? Set when a suspend -+ * occurs and cleared on resume. The runpool_mutex -+ * must be held whilst modifying this. -+ * @reset_gpu: Set to a KBASE_RESET_xxx value (see comments) -+ * @reset_workq: Work queue for performing the reset -+ * @reset_work: Work item for performing the reset -+ * @reset_wait: Wait event signalled when the reset is complete -+ * @reset_timer: Timeout for soft-stops before the reset -+ * @timeouts_updated: Have timeout values just been updated? -+ * -+ * The hwaccess_lock (a spinlock) must be held when accessing this structure -+ */ -+struct kbase_backend_data { -+#if !MALI_USE_CSF -+ struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS]; -+ struct hrtimer scheduling_timer; + -+ bool timer_running; -+#endif -+ bool suspend_timer; ++static int aw9110_i2c_write_le8(struct i2c_client *client, u8 reg, u8 data) ++{ ++ return i2c_smbus_write_byte_data(client, reg, data); ++} + -+ atomic_t reset_gpu; ++static int aw9110_i2c_read_le8(struct i2c_client *client, u8 reg) ++{ ++ return (int)i2c_smbus_read_byte_data(client, reg); ++} + -+/* The GPU reset isn't pending */ -+#define KBASE_RESET_GPU_NOT_PENDING 0 -+/* kbase_prepare_to_reset_gpu has been called */ -+#define KBASE_RESET_GPU_PREPARED 1 -+/* kbase_reset_gpu has been called - the reset will now definitely happen -+ * within the timeout period -+ */ -+#define KBASE_RESET_GPU_COMMITTED 2 -+/* The GPU reset process is currently occuring (timeout has expired or -+ * kbasep_try_reset_gpu_early was called) -+ */ -+#define KBASE_RESET_GPU_HAPPENING 3 -+/* Reset the GPU silently, used when resetting the GPU as part of normal -+ * behavior (e.g. when exiting protected mode). -+ */ -+#define KBASE_RESET_GPU_SILENT 4 -+ struct workqueue_struct *reset_workq; -+ struct work_struct reset_work; -+ wait_queue_head_t reset_wait; -+ struct hrtimer reset_timer; ++static int aw9110_get(struct gpio_chip *chip, unsigned int offset) ++{ ++ struct aw9110 *gpio = gpiochip_get_data(chip); ++ int value = 0; + -+ bool timeouts_updated; -+}; ++ mutex_lock(&gpio->lock); + -+#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c -new file mode 100644 -index 000000000..c7257117e ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c -@@ -0,0 +1,1513 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ if (offset < 4) { ++ value = gpio->read(gpio->client, REG_INPUT_P1); ++ mutex_unlock(&gpio->lock); + -+/* -+ * Base kernel job manager APIs -+ */ ++ value = (value < 0) ? value : !!(value & (1 << offset)); ++ } else { ++ value = gpio->read(gpio->client, REG_INPUT_P0); ++ mutex_unlock(&gpio->lock); + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ value = (value < 0) ? value : !!((value<<4) & (1 << offset)); ++ } + -+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev); -+static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, -+ const u64 affinity, const u64 limited_core_mask); ++ return value; ++} + -+static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req core_req, -+ unsigned int js, const u64 limited_core_mask) ++static int aw9110_get_direction(struct gpio_chip *chip, unsigned int offset) +{ -+ u64 affinity; -+ bool skip_affinity_check = false; ++ struct aw9110 *gpio = gpiochip_get_data(chip); ++ unsigned int reg_val; + -+ if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == -+ BASE_JD_REQ_T) { -+ /* Tiler-only atom, affinity value can be programed as 0 */ -+ affinity = 0; -+ skip_affinity_check = true; -+ } else if ((core_req & (BASE_JD_REQ_COHERENT_GROUP | -+ BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) { -+ unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; -+ struct mali_base_gpu_coherent_group_info *coherency_info = -+ &kbdev->gpu_props.props.coherency_info; ++ reg_val = gpio->direct; + -+ affinity = kbdev->pm.backend.shaders_avail & -+ kbdev->pm.debug_core_mask[js]; ++ dev_dbg(gpio->dev, "direct get: %04X, pin:%d\n", reg_val, offset); + -+ /* JS2 on a dual core group system targets core group 1. All -+ * other cases target core group 0. -+ */ -+ if (js == 2 && num_core_groups > 1) -+ affinity &= coherency_info->group[1].core_mask; -+ else if (num_core_groups > 1) -+ affinity &= coherency_info->group[0].core_mask; -+ else -+ affinity &= kbdev->gpu_props.curr_config.shader_present; -+ } else { -+ /* Use all cores */ -+ affinity = kbdev->pm.backend.shaders_avail & -+ kbdev->pm.debug_core_mask[js]; -+ } ++ if (reg_val & (1<pm.backend.shaders_avail)); -+#endif ++ mutex_lock(&gpio->lock); + -+ affinity = kbdev->pm.backend.shaders_avail; ++ /* set direct */ ++ gpio->direct |= (1<write(gpio->client, REG_CONFIG_P1, gpio->direct&0x0F); ++ else ++ gpio->write(gpio->client, REG_CONFIG_P0, (gpio->direct >> 4)&0x3F); + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ /* affinity should never be 0 */ -+ WARN_ON(!affinity); -+#endif -+ } -+ } ++ mutex_unlock(&gpio->lock); + -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), -+ affinity & 0xFFFFFFFF); -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), -+ affinity >> 32); ++ dev_dbg(gpio->dev, "direct in: %04X, pin:%d\n", gpio->direct, offset); + -+ return affinity; ++ return 0; +} + -+/** -+ * select_job_chain() - Select which job chain to submit to the GPU -+ * @katom: Pointer to the atom about to be submitted to the GPU -+ * -+ * Selects one of the fragment job chains attached to the special atom at the -+ * end of a renderpass, or returns the address of the single job chain attached -+ * to any other type of atom. -+ * -+ * Which job chain is selected depends upon whether the tiling phase of the -+ * renderpass completed normally or was soft-stopped because it used too -+ * much memory. It also depends upon whether one of the fragment job chains -+ * has already been run as part of the same renderpass. -+ * -+ * Return: GPU virtual address of the selected job chain -+ */ -+static u64 select_job_chain(struct kbase_jd_atom *katom) ++static int aw9110_direction_output(struct gpio_chip *chip, unsigned int offset, int value) +{ -+ struct kbase_context *const kctx = katom->kctx; -+ u64 jc = katom->jc; -+ struct kbase_jd_renderpass *rp; ++ struct aw9110 *gpio = gpiochip_get_data(chip); + -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ /* set level */ ++ chip->set(chip, offset, value); + -+ if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) -+ return jc; ++ mutex_lock(&gpio->lock); + -+ compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= -+ ARRAY_SIZE(kctx->jctx.renderpasses), -+ "Should check invalid access to renderpasses"); ++ /* set direct */ ++ gpio->direct &= ~(1<jctx.renderpasses[katom->renderpass_id]; -+ /* We can read a subset of renderpass state without holding -+ * higher-level locks (but not end_katom, for example). -+ * If the end-of-renderpass atom is running with as-yet indeterminate -+ * OOM state then assume that the start atom was not soft-stopped. -+ */ -+ switch (rp->state) { -+ case KBASE_JD_RP_OOM: -+ /* Tiling ran out of memory. -+ * Start of incremental rendering, used once. -+ */ -+ jc = katom->jc_fragment.norm_read_forced_write; -+ break; -+ case KBASE_JD_RP_START: -+ case KBASE_JD_RP_PEND_OOM: -+ /* Tiling completed successfully first time. -+ * Single-iteration rendering, used once. -+ */ -+ jc = katom->jc_fragment.norm_read_norm_write; -+ break; -+ case KBASE_JD_RP_RETRY_OOM: -+ /* Tiling ran out of memory again. -+ * Continuation of incremental rendering, used as -+ * many times as required. -+ */ -+ jc = katom->jc_fragment.forced_read_forced_write; -+ break; -+ case KBASE_JD_RP_RETRY: -+ case KBASE_JD_RP_RETRY_PEND_OOM: -+ /* Tiling completed successfully this time. -+ * End of incremental rendering, used once. -+ */ -+ jc = katom->jc_fragment.forced_read_norm_write; -+ break; -+ default: -+ WARN_ON(1); -+ break; -+ } ++ if (offset < 4) ++ gpio->write(gpio->client, REG_CONFIG_P1, gpio->direct&0x0F); ++ else ++ gpio->write(gpio->client, REG_CONFIG_P0, (gpio->direct >> 4)&0x3F); + -+ dev_dbg(kctx->kbdev->dev, -+ "Selected job chain 0x%llx for end atom %pK in state %d\n", -+ jc, (void *)katom, (int)rp->state); ++ mutex_unlock(&gpio->lock); + -+ katom->jc = jc; -+ return jc; ++ dev_dbg(gpio->dev, "direct out: %04X, pin:%d\n", gpio->direct, offset); ++ return 0; +} + -+static inline bool kbasep_jm_wait_js_free(struct kbase_device *kbdev, unsigned int js, -+ struct kbase_context *kctx) ++static void aw9110_set(struct gpio_chip *chip, unsigned int offset, int value) +{ -+ const ktime_t wait_loop_start = ktime_get_raw(); -+ const s64 max_timeout = (s64)kbdev->js_data.js_free_wait_time_ms; -+ s64 diff = 0; ++ struct aw9110 *gpio = gpiochip_get_data(chip); ++ unsigned int bit = 1 << offset; + -+ /* wait for the JS_COMMAND_NEXT register to reach the given status value */ -+ do { -+ if (!kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT))) -+ return true; ++ mutex_lock(&gpio->lock); + -+ diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start)); -+ } while (diff < max_timeout); ++ if (value) ++ gpio->out |= bit; ++ else ++ gpio->out &= ~bit; + -+ dev_err(kbdev->dev, "Timeout in waiting for job slot %u to become free for ctx %d_%u", js, -+ kctx->tgid, kctx->id); ++ if (offset < 4) ++ gpio->write(gpio->client, REG_OUTPUT_P1, gpio->out >> 0); ++ else ++ gpio->write(gpio->client, REG_OUTPUT_P0, gpio->out >> 4); + -+ return false; ++ mutex_unlock(&gpio->lock); +} + -+int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js) ++/*-------------------------------------------------------------------------*/ ++ ++static irqreturn_t aw9110_irq(int irq, void *data) +{ -+ struct kbase_context *kctx; -+ u32 cfg; -+ u64 const jc_head = select_job_chain(katom); -+ u64 affinity; -+ struct slot_rb *ptr_slot_rb = &kbdev->hwaccess.backend.slot_rb[js]; ++ struct aw9110 *gpio = data; ++ unsigned long change, i, status = 0; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ int value = 0; ++ int nirq; + -+ kctx = katom->kctx; ++ value = gpio->read(gpio->client, REG_INPUT_P1); ++ status |= (value < 0) ? 0 : value; + -+ /* Command register must be available */ -+ if (!kbasep_jm_wait_js_free(kbdev, js, kctx)) -+ return -EPERM; ++ value = gpio->read(gpio->client, REG_INPUT_P0); ++ status |= (value < 0) ? 0 : (value<<4); + -+ dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n", -+ jc_head, (void *)katom); + -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), -+ jc_head & 0xFFFFFFFF); -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), -+ jc_head >> 32); ++ /* ++ * call the interrupt handler iff gpio is used as ++ * interrupt source, just to avoid bad irqs ++ */ ++ mutex_lock(&gpio->lock); ++ change = (gpio->status ^ status) & gpio->irq_enabled; ++ gpio->status = status; ++ mutex_unlock(&gpio->lock); + -+ affinity = kbase_job_write_affinity(kbdev, katom->core_req, js, -+ kctx->limited_core_mask); ++ for_each_set_bit(i, &change, gpio->chip.ngpio) { ++ nirq = irq_find_mapping(gpio->chip.irq.domain, i); ++ if (nirq) { ++ dev_dbg(gpio->dev, "status:%04lx,change:%04lx,index:%ld,nirq:%d\n", ++ status, change, i, nirq); ++ handle_nested_irq(nirq); ++ } ++ } + -+ /* start MMU, medium priority, cache clean/flush on end, clean/flush on -+ * start -+ */ -+ cfg = kctx->as_nr; ++ return IRQ_HANDLED; ++} + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) && -+ !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) -+ cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; ++/* ++ * NOP functions ++ */ ++static void aw9110_noop(struct irq_data *data) { } + -+ if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START)) { -+ /* Force a cache maintenance operation if the newly submitted -+ * katom to the slot is from a different kctx. For a JM GPU -+ * that has the feature BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, -+ * applies a FLUSH_INV_SHADER_OTHER. Otherwise, do a -+ * FLUSH_CLEAN_INVALIDATE. -+ */ -+ u64 tagged_kctx = ptr_slot_rb->last_kctx_tagged; ++static int aw9110_irq_set_wake(struct irq_data *data, unsigned int on) ++{ ++ struct aw9110 *gpio = irq_data_get_irq_chip_data(data); + -+ if (tagged_kctx != SLOT_RB_NULL_TAG_VAL && tagged_kctx != SLOT_RB_TAG_KCTX(kctx)) { -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER)) -+ cfg |= JS_CONFIG_START_FLUSH_INV_SHADER_OTHER; -+ else -+ cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; -+ } else -+ cfg |= JS_CONFIG_START_FLUSH_NO_ACTION; -+ } else -+ cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; ++ return irq_set_irq_wake(gpio->client->irq, on); ++} + -+ if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) && -+ !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) -+ cfg |= JS_CONFIG_END_FLUSH_NO_ACTION; -+ else if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE)) -+ cfg |= JS_CONFIG_END_FLUSH_CLEAN; -+ else -+ cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; ++static void aw9110_irq_enable(struct irq_data *data) ++{ ++ struct aw9110 *gpio = irq_data_get_irq_chip_data(data); + -+ cfg |= JS_CONFIG_THREAD_PRI(8); ++ gpio->irq_enabled |= (1 << data->hwirq); ++} + -+ if ((katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED) || -+ (katom->core_req & BASE_JD_REQ_END_RENDERPASS)) -+ cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK; ++static void aw9110_irq_disable(struct irq_data *data) ++{ ++ struct aw9110 *gpio = irq_data_get_irq_chip_data(data); + -+ if (!ptr_slot_rb->job_chain_flag) { -+ cfg |= JS_CONFIG_JOB_CHAIN_FLAG; -+ katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN; -+ ptr_slot_rb->job_chain_flag = true; -+ } else { -+ katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN; -+ ptr_slot_rb->job_chain_flag = false; -+ } ++ gpio->irq_enabled &= ~(1 << data->hwirq); ++} + -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg); ++static void aw9110_irq_bus_lock(struct irq_data *data) ++{ ++ struct aw9110 *gpio = irq_data_get_irq_chip_data(data); + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT), -+ katom->flush_id); ++ mutex_lock(&gpio->lock); ++} + -+ /* Write an approximate start timestamp. -+ * It's approximate because there might be a job in the HEAD register. -+ */ -+ katom->start_timestamp = ktime_get_raw(); ++static void aw9110_irq_bus_sync_unlock(struct irq_data *data) ++{ ++ struct aw9110 *gpio = irq_data_get_irq_chip_data(data); + -+ /* GO ! */ -+ dev_dbg(kbdev->dev, "JS: Submitting atom %pK from ctx %pK to js[%d] with head=0x%llx", -+ katom, kctx, js, jc_head); ++ mutex_unlock(&gpio->lock); ++} + -+ KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, -+ (u32)affinity); ++static void aw9110_state_init(struct aw9110 *gpio) ++{ ++ /* out4-9 push-pull */ ++ gpio->write(gpio->client, REG_CTRL, (1<<4)); + -+ KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, kctx, -+ js, kbase_jd_atom_id(kctx, katom), TL_JS_EVENT_START); ++ /* work mode : gpio */ ++ gpio->write(gpio->client, REG_WORK_MODE_P1, 0x0F); ++ gpio->write(gpio->client, REG_WORK_MODE_P0, 0x3F); + -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(kbdev, katom, jc_head, -+ affinity, cfg); -+ KBASE_TLSTREAM_TL_RET_CTX_LPU( -+ kbdev, -+ kctx, -+ &kbdev->gpu_props.props.raw_props.js_features[ -+ katom->slot_nr]); -+ KBASE_TLSTREAM_TL_RET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]); -+ KBASE_TLSTREAM_TL_RET_ATOM_LPU( -+ kbdev, -+ katom, -+ &kbdev->gpu_props.props.raw_props.js_features[js], -+ "ctx_nr,atom_nr"); -+ kbase_kinstr_jm_atom_hw_submit(katom); ++ /* default direct */ ++ gpio->direct = 0x03FF; /* 0: output, 1:input */ ++ gpio->write(gpio->client, REG_CONFIG_P1, gpio->direct & 0x0F); ++ gpio->write(gpio->client, REG_CONFIG_P0, (gpio->direct>>4) & 0x3F); + -+ /* Update the slot's last katom submission kctx */ -+ ptr_slot_rb->last_kctx_tagged = SLOT_RB_TAG_KCTX(kctx); ++ /* interrupt enable */ ++ gpio->irq_enabled = 0x03FF; /* 0: disable 1:enable, chip: 0:enable, 1: disable */ ++ gpio->write(gpio->client, REG_INT_P1, ((~gpio->irq_enabled) >> 0)&0x0F); ++ gpio->write(gpio->client, REG_INT_P0, ((~gpio->irq_enabled) >> 4)&0x3F); + -+#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) -+ if (!kbase_backend_nr_atoms_submitted(kbdev, js)) { -+ /* If this is the only job on the slot, trace it as starting */ -+ char js_string[16]; ++ /* clear interrupt */ ++ gpio->read(gpio->client, REG_INPUT_P1); ++ gpio->read(gpio->client, REG_INPUT_P1); ++} + -+ trace_gpu_sched_switch( -+ kbasep_make_job_slot_string(js, js_string, -+ sizeof(js_string)), -+ ktime_to_ns(katom->start_timestamp), -+ (u32)katom->kctx->id, 0, katom->work_id); -+ } -+#endif ++static int aw9110_parse_dt(struct aw9110 *chip, struct i2c_client *client) ++{ ++ struct device_node *np = client->dev.of_node; ++ int ret = 0; + -+ trace_sysgraph_gpu(SGR_SUBMIT, kctx->id, -+ kbase_jd_atom_id(kctx, katom), js); ++ /* shdn_en */ ++ ret = of_get_named_gpio(np, "shdn_en", 0); ++ if (ret < 0) { ++ dev_err(chip->dev, "of get shdn_en failed\n"); ++ chip->shdn_en = -1; ++ } else { ++ chip->shdn_en = ret; + -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), -+ JS_COMMAND_START); ++ ret = devm_gpio_request_one(chip->dev, chip->shdn_en, ++ GPIOF_OUT_INIT_LOW, "AW9110_SHDN_EN"); ++ if (ret) { ++ dev_err(chip->dev, ++ "devm_gpio_request_one shdn_en failed\n"); ++ return ret; ++ } ++ ++ /* enable chip */ ++ gpio_set_value(chip->shdn_en, 1); ++ } + + return 0; +} + -+/** -+ * kbasep_job_slot_update_head_start_timestamp - Update timestamp -+ * @kbdev: kbase device -+ * @js: job slot -+ * @end_timestamp: timestamp -+ * -+ * Update the start_timestamp of the job currently in the HEAD, based on the -+ * fact that we got an IRQ for the previous set of completed jobs. -+ * -+ * The estimate also takes into account the time the job was submitted, to -+ * work out the best estimate (which might still result in an over-estimate to -+ * the calculated time spent) -+ */ -+static void kbasep_job_slot_update_head_start_timestamp(struct kbase_device *kbdev, unsigned int js, -+ ktime_t end_timestamp) ++static int aw9110_check_dev_id(struct i2c_client *client) +{ -+ ktime_t timestamp_diff; -+ struct kbase_jd_atom *katom; ++ int ret; + -+ /* Checking the HEAD position for the job slot */ -+ katom = kbase_gpu_inspect(kbdev, js, 0); -+ if (katom != NULL) { -+ timestamp_diff = ktime_sub(end_timestamp, -+ katom->start_timestamp); -+ if (ktime_to_ns(timestamp_diff) >= 0) { -+ /* Only update the timestamp if it's a better estimate -+ * than what's currently stored. This is because our -+ * estimate that accounts for the throttle time may be -+ * too much of an overestimate -+ */ -+ katom->start_timestamp = end_timestamp; -+ } ++ ret = aw9110_i2c_read_le8(client, REG_ID); ++ ++ if (ret < 0) { ++ dev_err(&client->dev, "fail to read dev id(%d)\n", ret); ++ return ret; + } -+} + -+/** -+ * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline -+ * tracepoint -+ * @kbdev: kbase device -+ * @js: job slot -+ * -+ * Make a tracepoint call to the instrumentation module informing that -+ * softstop happened on given lpu (job slot). -+ */ -+static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, unsigned int js) -+{ -+ KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( -+ kbdev, -+ &kbdev->gpu_props.props.raw_props.js_features[js]); ++ dev_info(&client->dev, "dev id : 0x%02x\n", ret); ++ ++ return 0; +} + -+void kbase_job_done(struct kbase_device *kbdev, u32 done) ++/*-------------------------------------------------------------------------*/ ++ ++static int aw9110_probe(struct i2c_client *client, ++ const struct i2c_device_id *id) +{ -+ u32 count = 0; -+ ktime_t end_timestamp; ++ struct aw9110 *gpio; ++ int status; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ dev_info(&client->dev, "===aw9110 probe===\n"); + -+ KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ, NULL, NULL, 0, done); ++ /* Allocate, initialize, and register this gpio_chip. */ ++ gpio = devm_kzalloc(&client->dev, sizeof(*gpio), GFP_KERNEL); ++ if (!gpio) ++ return -ENOMEM; + -+ end_timestamp = ktime_get_raw(); ++ gpio->dev = &client->dev; + -+ while (done) { -+ unsigned int i; -+ u32 failed = done >> 16; ++ aw9110_parse_dt(gpio, client); + -+ /* treat failed slots as finished slots */ -+ u32 finished = (done & 0xFFFF) | failed; ++ mutex_init(&gpio->lock); + -+ /* Note: This is inherently unfair, as we always check for lower -+ * numbered interrupts before the higher numbered ones. -+ */ -+ i = ffs(finished) - 1; ++ gpio->chip.base = -1; ++ gpio->chip.can_sleep = true; ++ gpio->chip.parent = &client->dev; ++ gpio->chip.owner = THIS_MODULE; ++ gpio->chip.get = aw9110_get; ++ gpio->chip.set = aw9110_set; ++ gpio->chip.get_direction = aw9110_get_direction; ++ gpio->chip.direction_input = aw9110_direction_input; ++ gpio->chip.direction_output = aw9110_direction_output; ++ gpio->chip.ngpio = id->driver_data; + -+ do { -+ int nr_done; -+ u32 active; -+ u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */ -+ u64 job_tail = 0; ++ gpio->write = aw9110_i2c_write_le8; ++ gpio->read = aw9110_i2c_read_le8; + -+ if (failed & (1u << i)) { -+ /* read out the job slot status code if the job -+ * slot reported failure -+ */ -+ completion_code = kbase_reg_read(kbdev, -+ JOB_SLOT_REG(i, JS_STATUS)); ++ gpio->chip.label = client->name; + -+ if (completion_code == BASE_JD_EVENT_STOPPED) { -+ u64 job_head; ++ gpio->client = client; ++ i2c_set_clientdata(client, gpio); + -+ KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT( -+ kbdev, NULL, -+ i, 0, TL_JS_EVENT_SOFT_STOP); ++ status = aw9110_check_dev_id(client); ++ if (status < 0) { ++ dev_err(&client->dev, "check device id fail(%d)\n", status); ++ goto fail; ++ } + -+ kbasep_trace_tl_event_lpu_softstop( -+ kbdev, i); ++ aw9110_state_init(gpio); + -+ /* Soft-stopped job - read the value of -+ * JS_TAIL so that the job chain can -+ * be resumed -+ */ -+ job_tail = (u64)kbase_reg_read(kbdev, -+ JOB_SLOT_REG(i, JS_TAIL_LO)) | -+ ((u64)kbase_reg_read(kbdev, -+ JOB_SLOT_REG(i, JS_TAIL_HI)) -+ << 32); -+ job_head = (u64)kbase_reg_read(kbdev, -+ JOB_SLOT_REG(i, JS_HEAD_LO)) | -+ ((u64)kbase_reg_read(kbdev, -+ JOB_SLOT_REG(i, JS_HEAD_HI)) -+ << 32); -+ /* For a soft-stopped job chain js_tail should -+ * same as the js_head, but if not then the -+ * job chain was incorrectly marked as -+ * soft-stopped. In such case we should not -+ * be resuming the job chain from js_tail and -+ * report the completion_code as UNKNOWN. -+ */ -+ if (job_tail != job_head) -+ completion_code = BASE_JD_EVENT_UNKNOWN; ++ /* Enable irqchip if we have an interrupt */ ++ if (client->irq) { ++ struct gpio_irq_chip *girq; + -+ } else if (completion_code == -+ BASE_JD_EVENT_NOT_STARTED) { -+ /* PRLAM-10673 can cause a TERMINATED -+ * job to come back as NOT_STARTED, -+ * but the error interrupt helps us -+ * detect it -+ */ -+ completion_code = -+ BASE_JD_EVENT_TERMINATED; -+ } ++ gpio->irqchip.name = "aw9110"; ++ gpio->irqchip.irq_enable = aw9110_irq_enable; ++ gpio->irqchip.irq_disable = aw9110_irq_disable; ++ gpio->irqchip.irq_ack = aw9110_noop; ++ gpio->irqchip.irq_mask = aw9110_noop; ++ gpio->irqchip.irq_unmask = aw9110_noop; ++ gpio->irqchip.irq_set_wake = aw9110_irq_set_wake; ++ gpio->irqchip.irq_bus_lock = aw9110_irq_bus_lock; ++ gpio->irqchip.irq_bus_sync_unlock = aw9110_irq_bus_sync_unlock; + -+ kbase_gpu_irq_evict(kbdev, i, completion_code); ++ status = devm_request_threaded_irq(&client->dev, client->irq, ++ NULL, aw9110_irq, IRQF_ONESHOT | ++ IRQF_TRIGGER_FALLING | IRQF_SHARED, ++ dev_name(&client->dev), gpio); ++ if (status) ++ goto fail; + -+ /* Some jobs that encounter a BUS FAULT may -+ * result in corrupted state causing future -+ * jobs to hang. Reset GPU before allowing -+ * any other jobs on the slot to continue. -+ */ -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3076)) { -+ if (completion_code == BASE_JD_EVENT_JOB_BUS_FAULT) { -+ if (kbase_prepare_to_reset_gpu_locked( -+ kbdev, -+ RESET_FLAGS_NONE)) -+ kbase_reset_gpu_locked(kbdev); -+ } -+ } -+ } ++ girq = &gpio->chip.irq; ++ girq->chip = &gpio->irqchip; ++ /* This will let us handle the parent IRQ in the driver */ ++ girq->parent_handler = NULL; ++ girq->num_parents = 0; ++ girq->parents = NULL; ++ girq->default_type = IRQ_TYPE_NONE; ++ girq->handler = handle_level_irq; ++ girq->threaded = true; ++ } + -+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), -+ done & ((1 << i) | (1 << (i + 16)))); -+ active = kbase_reg_read(kbdev, -+ JOB_CONTROL_REG(JOB_IRQ_JS_STATE)); ++ status = devm_gpiochip_add_data(&client->dev, &gpio->chip, gpio); ++ if (status < 0) ++ goto fail; + -+ if (((active >> i) & 1) == 0 && -+ (((done >> (i + 16)) & 1) == 0)) { -+ /* There is a potential race we must work -+ * around: -+ * -+ * 1. A job slot has a job in both current and -+ * next registers -+ * 2. The job in current completes -+ * successfully, the IRQ handler reads -+ * RAWSTAT and calls this function with the -+ * relevant bit set in "done" -+ * 3. The job in the next registers becomes the -+ * current job on the GPU -+ * 4. Sometime before the JOB_IRQ_CLEAR line -+ * above the job on the GPU _fails_ -+ * 5. The IRQ_CLEAR clears the done bit but not -+ * the failed bit. This atomically sets -+ * JOB_IRQ_JS_STATE. However since both jobs -+ * have now completed the relevant bits for -+ * the slot are set to 0. -+ * -+ * If we now did nothing then we'd incorrectly -+ * assume that _both_ jobs had completed -+ * successfully (since we haven't yet observed -+ * the fail bit being set in RAWSTAT). -+ * -+ * So at this point if there are no active jobs -+ * left we check to see if RAWSTAT has a failure -+ * bit set for the job slot. If it does we know -+ * that there has been a new failure that we -+ * didn't previously know about, so we make sure -+ * that we record this in active (but we wait -+ * for the next loop to deal with it). -+ * -+ * If we were handling a job failure (i.e. done -+ * has the relevant high bit set) then we know -+ * that the value read back from -+ * JOB_IRQ_JS_STATE is the correct number of -+ * remaining jobs because the failed job will -+ * have prevented any futher jobs from starting -+ * execution. -+ */ -+ u32 rawstat = kbase_reg_read(kbdev, -+ JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)); ++ dev_info(&client->dev, "probed\n"); + -+ if ((rawstat >> (i + 16)) & 1) { -+ /* There is a failed job that we've -+ * missed - add it back to active -+ */ -+ active |= (1u << i); -+ } -+ } ++ return 0; + -+ dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n", -+ completion_code); ++fail: ++ dev_err(&client->dev, "probe error %d for '%s'\n", status, ++ client->name); + -+ nr_done = kbase_backend_nr_atoms_submitted(kbdev, i); -+ nr_done -= (active >> i) & 1; -+ nr_done -= (active >> (i + 16)) & 1; ++ return status; ++} + -+ if (nr_done <= 0) { -+ dev_warn(kbdev->dev, "Spurious interrupt on slot %d", -+ i); ++static int aw9110_pm_resume(struct device *dev) ++{ ++ struct aw9110 *gpio = dev->driver_data; + -+ goto spurious; -+ } ++ /* out4-9 push-pull */ ++ gpio->write(gpio->client, REG_CTRL, (1<<4)); + -+ count += nr_done; ++ /* work mode : gpio */ ++ gpio->write(gpio->client, REG_WORK_MODE_P1, 0x0F); ++ gpio->write(gpio->client, REG_WORK_MODE_P0, 0x3F); + -+ while (nr_done) { -+ if (nr_done == 1) { -+ kbase_gpu_complete_hw(kbdev, i, -+ completion_code, -+ job_tail, -+ &end_timestamp); -+ kbase_jm_try_kick_all(kbdev); -+ } else { -+ /* More than one job has completed. -+ * Since this is not the last job being -+ * reported this time it must have -+ * passed. This is because the hardware -+ * will not allow further jobs in a job -+ * slot to complete until the failed job -+ * is cleared from the IRQ status. -+ */ -+ kbase_gpu_complete_hw(kbdev, i, -+ BASE_JD_EVENT_DONE, -+ 0, -+ &end_timestamp); -+ } -+ nr_done--; -+ } -+ spurious: -+ done = kbase_reg_read(kbdev, -+ JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)); ++ /* direct */ ++ //gpio->direct = 0x03FF; /* 0: output, 1:input */ ++ gpio->write(gpio->client, REG_CONFIG_P1, gpio->direct & 0x0F); ++ gpio->write(gpio->client, REG_CONFIG_P0, (gpio->direct>>4) & 0x3F); + -+ failed = done >> 16; -+ finished = (done & 0xFFFF) | failed; -+ if (done) -+ end_timestamp = ktime_get_raw(); -+ } while (finished & (1 << i)); ++ /* out */ ++ gpio->write(gpio->client, REG_OUTPUT_P1, gpio->out >> 0); ++ gpio->write(gpio->client, REG_OUTPUT_P0, gpio->out >> 4); + -+ kbasep_job_slot_update_head_start_timestamp(kbdev, i, -+ end_timestamp); -+ } ++ /* interrupt enable */ ++ //gpio->irq_enabled = 0x03FF; /* 0: disable 1:enable, chip: 0:enable, 1: disable */ ++ gpio->write(gpio->client, REG_INT_P1, ((~gpio->irq_enabled) >> 0)&0x0F); ++ gpio->write(gpio->client, REG_INT_P0, ((~gpio->irq_enabled) >> 4)&0x3F); + -+ if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == -+ KBASE_RESET_GPU_COMMITTED) { -+ /* If we're trying to reset the GPU then we might be able to do -+ * it early (without waiting for a timeout) because some jobs -+ * have completed -+ */ -+ kbasep_try_reset_gpu_early_locked(kbdev); -+ } -+ KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ_END, NULL, NULL, 0, count); ++ return 0; +} + -+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, unsigned int js, -+ u32 action, base_jd_core_req core_reqs, -+ struct kbase_jd_atom *target_katom) -+{ -+#if KBASE_KTRACE_ENABLE -+ u32 status_reg_before; -+ u64 job_in_head_before; -+ u32 status_reg_after; ++static const struct dev_pm_ops aw9110_pm_ops = { ++ .resume = aw9110_pm_resume, ++}; + -+ WARN_ON(action & (~JS_COMMAND_MASK)); ++static struct i2c_driver aw9110_driver = { ++ .driver = { ++ .name = "aw9110", ++ .pm = &aw9110_pm_ops, ++ .of_match_table = of_match_ptr(aw9110_of_table), ++ }, ++ .probe = aw9110_probe, ++ .id_table = aw9110_id, ++}; + -+ /* Check the head pointer */ -+ job_in_head_before = ((u64) kbase_reg_read(kbdev, -+ JOB_SLOT_REG(js, JS_HEAD_LO))) -+ | (((u64) kbase_reg_read(kbdev, -+ JOB_SLOT_REG(js, JS_HEAD_HI))) -+ << 32); -+ status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS)); -+#endif ++static int __init aw9110_init(void) ++{ ++ return i2c_add_driver(&aw9110_driver); ++} ++/* register after i2c postcore initcall and before ++ * subsys initcalls that may rely on these GPIOs ++ */ ++subsys_initcall(aw9110_init); + -+ if (action == JS_COMMAND_SOFT_STOP) { -+ if (kbase_jd_katom_is_protected(target_katom)) { -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ dev_dbg(kbdev->dev, -+ "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%x", -+ (unsigned int)core_reqs); -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ -+ return; -+ } ++static void __exit aw9110_exit(void) ++{ ++ i2c_del_driver(&aw9110_driver); ++} ++module_exit(aw9110_exit); + -+ /* We are about to issue a soft stop, so mark the atom as having -+ * been soft stopped -+ */ -+ target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED; ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Jake Wu "); ++MODULE_DESCRIPTION("AW9110 i2c expander gpio driver"); + -+ /* Mark the point where we issue the soft-stop command */ -+ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(kbdev, target_katom); +diff --git a/drivers/gpio/gpio-nca9539.c b/drivers/gpio/gpio-nca9539.c +new file mode 100644 +index 000000000..50fff6d90 +--- /dev/null ++++ b/drivers/gpio/gpio-nca9539.c +@@ -0,0 +1,332 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * NCA9539 I2C Port Expander I/O ++ * ++ * Copyright (C) 2023 Cody Xie ++ * ++ */ + -+ action = (target_katom->atom_flags & -+ KBASE_KATOM_FLAGS_JOBCHAIN) ? -+ JS_COMMAND_SOFT_STOP_1 : -+ JS_COMMAND_SOFT_STOP_0; -+ } else if (action == JS_COMMAND_HARD_STOP) { -+ target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED; ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ action = (target_katom->atom_flags & -+ KBASE_KATOM_FLAGS_JOBCHAIN) ? -+ JS_COMMAND_HARD_STOP_1 : -+ JS_COMMAND_HARD_STOP_0; -+ } ++#define NCA9539_REG_INPUT_PORT_BASE 0x00 ++#define NCA9539_REG_INPUT_PORT0 (NCA9539_REG_INPUT_PORT_BASE + 0x0) ++#define NCA9539_REG_INPUT_PORT1 (NCA9539_REG_INPUT_PORT_BASE + 0x1) ++#define NCA9539_REG_OUTPUT_PORT_BASE 0x02 ++#define NCA9539_REG_OUTPUT_PORT0 (NCA9539_REG_OUTPUT_PORT_BASE + 0x0) ++#define NCA9539_REG_OUTPUT_PORT1 (NCA9539_REG_OUTPUT_PORT_BASE + 0x1) ++#define NCA9539_REG_POLARITY_BASE 0x04 ++#define NCA9539_REG_POLARITY_PORT0 (NCA9539_REG_POLARITY_BASE + 0x0) ++#define NCA9539_REG_POLARITY_PORT1 (NCA9539_REG_POLARITY_BASE + 0x1) ++#define NCA9539_REG_CONFIG_BASE 0x06 ++#define NCA9539_REG_CONFIG_PORT0 (NCA9539_REG_CONFIG_BASE + 0x0) ++#define NCA9539_REG_CONFIG_PORT1 (NCA9539_REG_CONFIG_BASE + 0x1) + -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action); ++struct nca9539_chip { ++ struct gpio_chip gpio_chip; ++ struct regmap *regmap; ++ struct regulator *regulator; ++ unsigned int ngpio; ++}; + -+#if KBASE_KTRACE_ENABLE -+ status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS)); -+ if (status_reg_after == BASE_JD_EVENT_ACTIVE) { -+ struct kbase_jd_atom *head; -+ struct kbase_context *head_kctx; ++static int nca9539_gpio_get_direction(struct gpio_chip *gc, unsigned int offset) ++{ ++ struct nca9539_chip *priv = gpiochip_get_data(gc); ++ unsigned int port = offset / 8; ++ unsigned int pin = offset % 8; ++ unsigned int value; ++ int ret; + -+ head = kbase_gpu_inspect(kbdev, js, 0); -+ if (unlikely(!head)) { -+ dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js); -+ return; -+ } -+ head_kctx = head->kctx; ++ dev_dbg(gc->parent, "%s offset(%d)", __func__, offset); ++ ret = regmap_read(priv->regmap, NCA9539_REG_CONFIG_BASE + port, &value); ++ if (ret < 0) { ++ dev_err(gc->parent, "%s offset(%d) read config failed", ++ __func__, offset); ++ return ret; ++ } + -+ if (status_reg_before == BASE_JD_EVENT_ACTIVE) -+ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, head, job_in_head_before, js); -+ else -+ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js); ++ if (value & BIT(pin)) ++ return GPIO_LINE_DIRECTION_IN; + -+ switch (action) { -+ case JS_COMMAND_SOFT_STOP: -+ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP, head_kctx, head, head->jc, js); -+ break; -+ case JS_COMMAND_SOFT_STOP_0: -+ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, head, head->jc, js); -+ break; -+ case JS_COMMAND_SOFT_STOP_1: -+ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, head, head->jc, js); -+ break; -+ case JS_COMMAND_HARD_STOP: -+ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP, head_kctx, head, head->jc, js); -+ break; -+ case JS_COMMAND_HARD_STOP_0: -+ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, head, head->jc, js); -+ break; -+ case JS_COMMAND_HARD_STOP_1: -+ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, js); -+ break; -+ default: -+ WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action, -+ (void *)target_katom, (void *)target_katom->kctx); -+ break; -+ } -+ } else { -+ if (status_reg_before == BASE_JD_EVENT_ACTIVE) -+ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, job_in_head_before, js); -+ else -+ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js); ++ return GPIO_LINE_DIRECTION_OUT; ++} + -+ switch (action) { -+ case JS_COMMAND_SOFT_STOP: -+ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0, js); -+ break; -+ case JS_COMMAND_SOFT_STOP_0: -+ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL, 0, js); -+ break; -+ case JS_COMMAND_SOFT_STOP_1: -+ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL, 0, js); -+ break; -+ case JS_COMMAND_HARD_STOP: -+ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0, js); -+ break; -+ case JS_COMMAND_HARD_STOP_0: -+ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL, 0, js); -+ break; -+ case JS_COMMAND_HARD_STOP_1: -+ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, 0, js); -+ break; -+ default: -+ WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action, -+ (void *)target_katom, (void *)target_katom->kctx); -+ break; -+ } ++static int nca9539_gpio_direction_input(struct gpio_chip *gc, unsigned int offset) ++{ ++ struct nca9539_chip *priv = gpiochip_get_data(gc); ++ unsigned int port = offset / 8; ++ unsigned int pin = offset % 8; ++ int ret; ++ ++ dev_dbg(gc->parent, "%s offset(%d)", __func__, offset); ++ ret = regmap_update_bits(priv->regmap, NCA9539_REG_CONFIG_BASE + port, ++ BIT(pin), BIT(pin)); ++ if (ret < 0) { ++ dev_err(gc->parent, "%s offset(%d) read config failed", ++ __func__, offset); + } -+#endif ++ ++ return ret; +} + -+void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx) ++static int nca9539_gpio_direction_output(struct gpio_chip *gc, unsigned int offset, ++ int val) +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ unsigned int i; ++ struct nca9539_chip *priv = gpiochip_get_data(gc); ++ unsigned int port = offset / 8; ++ unsigned int pin = offset % 8; ++ int ret; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ dev_dbg(gc->parent, "%s offset(%d) val(%d)", __func__, offset, val); ++ ret = regmap_update_bits(priv->regmap, NCA9539_REG_CONFIG_BASE + port, ++ BIT(pin), 0); ++ if (ret < 0) { ++ dev_err(gc->parent, ++ "%s offset(%d) val(%d) update config failed", __func__, ++ offset, val); ++ return ret; ++ } + -+ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) -+ kbase_job_slot_hardstop(kctx, i, NULL); ++ ret = regmap_update_bits(priv->regmap, ++ NCA9539_REG_OUTPUT_PORT_BASE + port, BIT(pin), ++ val ? BIT(pin) : 0); ++ if (ret < 0) { ++ dev_err(gc->parent, ++ "%s offset(%d) val(%d) update output failed", __func__, ++ offset, val); ++ return ret; ++ } ++ ++ return ret; +} + -+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, -+ struct kbase_jd_atom *target_katom) ++static int nca9539_gpio_get(struct gpio_chip *gc, unsigned int offset) +{ -+ struct kbase_device *kbdev; -+ unsigned int target_js = target_katom->slot_nr; -+ int i; -+ bool stop_sent = false; ++ struct nca9539_chip *priv = gpiochip_get_data(gc); ++ unsigned int port = offset / 8; ++ unsigned int pin = offset % 8; ++ unsigned int reg; ++ unsigned int value; ++ int ret; + -+ kbdev = kctx->kbdev; ++ dev_dbg(gc->parent, "%s offset(%d)", __func__, offset); ++ ret = regmap_read(priv->regmap, NCA9539_REG_CONFIG_BASE + port, &value); ++ if (ret < 0) { ++ dev_err(gc->parent, "%s offset(%d) check config failed", ++ __func__, offset); ++ return ret; ++ } ++ if (!(BIT(pin) & value)) ++ reg = NCA9539_REG_OUTPUT_PORT_BASE + port; ++ else ++ reg = NCA9539_REG_INPUT_PORT_BASE + port; ++ ret = regmap_read(priv->regmap, reg, &value); ++ if (ret < 0) { ++ dev_err(gc->parent, "%s offset(%d) read value failed", __func__, ++ offset); ++ return -EIO; ++ } + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ return !!(BIT(pin) & value); ++} + -+ for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, target_js); i++) { -+ struct kbase_jd_atom *slot_katom; ++static void nca9539_gpio_set(struct gpio_chip *gc, unsigned int offset, int val) ++{ ++ struct nca9539_chip *priv = gpiochip_get_data(gc); ++ unsigned int port = offset / 8; ++ unsigned int pin = offset % 8; ++ unsigned int value; ++ int ret; + -+ slot_katom = kbase_gpu_inspect(kbdev, target_js, i); -+ if (!slot_katom) -+ continue; ++ dev_dbg(gc->parent, "%s offset(%d) val(%d)", __func__, offset, val); ++ ret = regmap_read(priv->regmap, NCA9539_REG_CONFIG_BASE + port, &value); ++ if (ret < 0 || !!(BIT(pin) & value)) { ++ dev_err(gc->parent, "%s offset(%d) val(%d) check config failed", ++ __func__, offset, val); ++ } + -+ if (kbase_js_atom_runs_before(kbdev, target_katom, slot_katom, -+ KBASE_ATOM_ORDERING_FLAG_SEQNR)) { -+ if (!stop_sent) -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED( -+ kbdev, -+ target_katom); ++ ret = regmap_update_bits(priv->regmap, ++ NCA9539_REG_OUTPUT_PORT_BASE + port, BIT(pin), ++ val ? BIT(pin) : 0); ++ if (ret < 0) { ++ dev_err(gc->parent, "%s offset(%d) val(%d) read input failed", ++ __func__, offset, val); ++ } ++} + -+ kbase_job_slot_softstop(kbdev, target_js, slot_katom); -+ stop_sent = true; -+ } ++static bool nca9539_is_writeable_reg(struct device *dev, unsigned int reg) ++{ ++ switch (reg) { ++ case NCA9539_REG_OUTPUT_PORT0: ++ case NCA9539_REG_OUTPUT_PORT1: ++ case NCA9539_REG_POLARITY_PORT0: ++ case NCA9539_REG_POLARITY_PORT1: ++ case NCA9539_REG_CONFIG_PORT0: ++ case NCA9539_REG_CONFIG_PORT1: ++ return true; + } ++ return false; +} + -+static int softstop_start_rp_nolock( -+ struct kbase_context *kctx, struct kbase_va_region *reg) ++static bool nca9539_is_readable_reg(struct device *dev, unsigned int reg) +{ -+ struct kbase_device *const kbdev = kctx->kbdev; -+ struct kbase_jd_atom *katom; -+ struct kbase_jd_renderpass *rp; ++ switch (reg) { ++ case NCA9539_REG_INPUT_PORT0: ++ case NCA9539_REG_INPUT_PORT1: ++ case NCA9539_REG_OUTPUT_PORT0: ++ case NCA9539_REG_OUTPUT_PORT1: ++ case NCA9539_REG_POLARITY_PORT0: ++ case NCA9539_REG_POLARITY_PORT1: ++ case NCA9539_REG_CONFIG_PORT0: ++ case NCA9539_REG_CONFIG_PORT1: ++ return true; ++ } ++ return false; ++} + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++static bool nca9539_is_volatile_reg(struct device *dev, unsigned int reg) ++{ ++ return true; ++} + -+ katom = kbase_gpu_inspect(kbdev, 1, 0); ++static const struct reg_default nca9539_regmap_default[] = { ++ { NCA9539_REG_INPUT_PORT0, 0xFF }, ++ { NCA9539_REG_INPUT_PORT1, 0xFF }, ++ { NCA9539_REG_OUTPUT_PORT0, 0xFF }, ++ { NCA9539_REG_OUTPUT_PORT1, 0xFF }, ++ { NCA9539_REG_POLARITY_PORT0, 0x00 }, ++ { NCA9539_REG_POLARITY_PORT1, 0x00 }, ++ { NCA9539_REG_CONFIG_PORT0, 0xFF }, ++ { NCA9539_REG_CONFIG_PORT1, 0xFF }, ++}; + -+ if (!katom) { -+ dev_dbg(kctx->kbdev->dev, "No atom on job slot\n"); -+ return -ESRCH; -+ } ++static const struct regmap_config nca9539_regmap_config = { ++ .reg_bits = 8, ++ .val_bits = 8, ++ .max_register = 7, ++ .writeable_reg = nca9539_is_writeable_reg, ++ .readable_reg = nca9539_is_readable_reg, ++ .volatile_reg = nca9539_is_volatile_reg, ++ .reg_defaults = nca9539_regmap_default, ++ .num_reg_defaults = ARRAY_SIZE(nca9539_regmap_default), ++ .cache_type = REGCACHE_FLAT, ++}; + -+ if (!(katom->core_req & BASE_JD_REQ_START_RENDERPASS)) { -+ dev_dbg(kctx->kbdev->dev, -+ "Atom %pK on job slot is not start RP\n", (void *)katom); -+ return -EPERM; -+ } ++static const struct gpio_chip template_chip = { ++ .label = "nca9539-gpio", ++ .owner = THIS_MODULE, ++ .get_direction = nca9539_gpio_get_direction, ++ .direction_input = nca9539_gpio_direction_input, ++ .direction_output = nca9539_gpio_direction_output, ++ .get = nca9539_gpio_get, ++ .set = nca9539_gpio_set, ++ .base = -1, ++ .can_sleep = true, ++}; + -+ compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= -+ ARRAY_SIZE(kctx->jctx.renderpasses), -+ "Should check invalid access to renderpasses"); ++static int nca9539_probe(struct i2c_client *client) ++{ ++ struct nca9539_chip *chip; ++ struct regulator *reg; ++ int ret; + -+ rp = &kctx->jctx.renderpasses[katom->renderpass_id]; -+ if (WARN_ON(rp->state != KBASE_JD_RP_START && -+ rp->state != KBASE_JD_RP_RETRY)) -+ return -EINVAL; ++ chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); ++ if (!chip) ++ return -ENOMEM; + -+ dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %pK\n", -+ (int)rp->state, (void *)reg); ++ chip->gpio_chip = template_chip; ++ chip->gpio_chip.label = "nca9539-gpio"; ++ chip->gpio_chip.parent = &client->dev; ++ chip->ngpio = (uintptr_t)of_device_get_match_data(&client->dev); ++ chip->gpio_chip.ngpio = chip->ngpio; + -+ if (WARN_ON(katom != rp->start_katom)) -+ return -EINVAL; ++ reg = devm_regulator_get(&client->dev, "vdd"); ++ if (IS_ERR(reg)) ++ return dev_err_probe(&client->dev, PTR_ERR(reg), ++ "reg get err\n"); + -+ dev_dbg(kctx->kbdev->dev, "Adding region %pK to list %pK\n", -+ (void *)reg, (void *)&rp->oom_reg_list); -+ list_move_tail(®->link, &rp->oom_reg_list); -+ dev_dbg(kctx->kbdev->dev, "Added region to list\n"); ++ ret = regulator_enable(reg); ++ if (ret) { ++ dev_err(&client->dev, "reg en err: %d\n", ret); ++ return ret; ++ } ++ chip->regulator = reg; + -+ rp->state = (rp->state == KBASE_JD_RP_START ? -+ KBASE_JD_RP_PEND_OOM : KBASE_JD_RP_RETRY_PEND_OOM); ++ chip->regmap = devm_regmap_init_i2c(client, &nca9539_regmap_config); ++ if (IS_ERR(chip->regmap)) { ++ ret = PTR_ERR(chip->regmap); ++ dev_err(&client->dev, "Failed to allocate register map: %d\n", ++ ret); ++ goto err_exit; ++ } ++ regcache_mark_dirty(chip->regmap); ++ ret = regcache_sync(chip->regmap); ++ if (ret) { ++ dev_err(&client->dev, "Failed to sync register map: %d\n", ret); ++ goto err_exit; ++ } + -+ kbase_job_slot_softstop(kbdev, 1, katom); ++ // TODO(Cody): irq_chip setup ++ ++ ret = devm_gpiochip_add_data(&client->dev, &chip->gpio_chip, chip); ++ if (ret < 0) { ++ dev_err(&client->dev, "Unable to register gpiochip\n"); ++ goto err_exit; ++ } ++ ++ i2c_set_clientdata(client, chip); + + return 0; ++ ++err_exit: ++ regulator_disable(chip->regulator); ++ return ret; +} + -+int kbase_job_slot_softstop_start_rp(struct kbase_context *const kctx, -+ struct kbase_va_region *const reg) ++static int nca9539_remove(struct i2c_client *client) +{ -+ struct kbase_device *const kbdev = kctx->kbdev; -+ int err; -+ unsigned long flags; ++ struct nca9539_chip *chip = i2c_get_clientdata(client); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ err = softstop_start_rp_nolock(kctx, reg); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ regulator_disable(chip->regulator); + -+ return err; ++ return 0; +} + -+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) -+{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ unsigned long timeout = msecs_to_jiffies(ZAP_TIMEOUT); ++static const struct of_device_id nca9539_gpio_of_match_table[] = { ++ { ++ .compatible = "novo,nca9539-gpio", ++ .data = (void *)16, ++ }, ++ { /* sentinel */ }, ++}; ++MODULE_DEVICE_TABLE(of, nca9539_gpio_of_match_table); + -+ timeout = wait_event_timeout(kctx->jctx.zero_jobs_wait, -+ kctx->jctx.job_nr == 0, timeout); ++static const struct i2c_device_id nca9539_gpio_id_table[] = { ++ { "nca9539-gpio" }, ++ { /* sentinel */ }, ++}; ++MODULE_DEVICE_TABLE(i2c, nca9539_gpio_id_table); + -+ if (timeout != 0) -+ timeout = wait_event_timeout( -+ kctx->jctx.sched_info.ctx.is_scheduled_wait, -+ !kbase_ctx_flag(kctx, KCTX_SCHEDULED), -+ timeout); ++static struct i2c_driver nca9539_driver = { ++ .driver = { ++ .name = "nca9539-gpio", ++ .of_match_table = nca9539_gpio_of_match_table, ++ }, ++ .probe_new = nca9539_probe, ++ .remove = nca9539_remove, ++ .id_table = nca9539_gpio_id_table, ++}; ++module_i2c_driver(nca9539_driver); + -+ /* Neither wait timed out; all done! */ -+ if (timeout != 0) -+ goto exit; ++MODULE_AUTHOR("Cody Xie "); ++MODULE_DESCRIPTION("GPIO expander driver for Novosense nca9539"); ++MODULE_LICENSE("GPL"); +diff --git a/drivers/gpio/gpio-rockchip-oh.c b/drivers/gpio/gpio-rockchip-oh.c +new file mode 100644 +index 000000000..f251313c2 +--- /dev/null ++++ b/drivers/gpio/gpio-rockchip-oh.c +@@ -0,0 +1,880 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (c) 2013 MundoReader S.L. ++ * Author: Heiko Stuebner ++ * ++ * Copyright (c) 2021 Rockchip Electronics Co. Ltd. ++ */ + -+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) { -+ dev_err(kbdev->dev, -+ "Issuing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", -+ ZAP_TIMEOUT); -+ kbase_reset_gpu(kbdev); -+ } ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ /* Wait for the reset to complete */ -+ kbase_reset_gpu_wait(kbdev); -+exit: -+ dev_dbg(kbdev->dev, "Zap: Finished Context %pK", kctx); ++#include "../pinctrl/core.h" ++#include "../pinctrl/pinctrl-rockchip.h" + -+ /* Ensure that the signallers of the waitqs have finished */ -+ mutex_lock(&kctx->jctx.lock); -+ mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); -+ mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); -+ mutex_unlock(&kctx->jctx.lock); -+} ++#define GPIO_TYPE_V1 (0) /* GPIO Version ID reserved */ ++#define GPIO_TYPE_V2 (0x01000C2B) /* GPIO Version ID 0x01000C2B */ ++#define GPIO_TYPE_V2_1 (0x0101157C) /* GPIO Version ID 0x0101157C */ + -+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev) -+{ -+ u32 flush_id = 0; ++#define GPIO_MAX_PINS (32) + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) { -+ mutex_lock(&kbdev->pm.lock); -+ if (kbdev->pm.backend.gpu_powered) -+ flush_id = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(LATEST_FLUSH)); -+ mutex_unlock(&kbdev->pm.lock); -+ } ++static const struct rockchip_gpio_regs gpio_regs_v1 = { ++ .port_dr = 0x00, ++ .port_ddr = 0x04, ++ .int_en = 0x30, ++ .int_mask = 0x34, ++ .int_type = 0x38, ++ .int_polarity = 0x3c, ++ .int_status = 0x40, ++ .int_rawstatus = 0x44, ++ .debounce = 0x48, ++ .port_eoi = 0x4c, ++ .ext_port = 0x50, ++}; + -+ return flush_id; -+} ++static const struct rockchip_gpio_regs gpio_regs_v2 = { ++ .port_dr = 0x00, ++ .port_ddr = 0x08, ++ .int_en = 0x10, ++ .int_mask = 0x18, ++ .int_type = 0x20, ++ .int_polarity = 0x28, ++ .int_bothedge = 0x30, ++ .int_status = 0x50, ++ .int_rawstatus = 0x58, ++ .debounce = 0x38, ++ .dbclk_div_en = 0x40, ++ .dbclk_div_con = 0x48, ++ .port_eoi = 0x60, ++ .ext_port = 0x70, ++ .version_id = 0x78, ++}; + -+int kbase_job_slot_init(struct kbase_device *kbdev) ++static inline void gpio_writel_v2(u32 val, void __iomem *reg) +{ -+ CSTD_UNUSED(kbdev); -+ return 0; ++ writel((val & 0xffff) | 0xffff0000, reg); ++ writel((val >> 16) | 0xffff0000, reg + 0x4); +} -+KBASE_EXPORT_TEST_API(kbase_job_slot_init); + -+void kbase_job_slot_halt(struct kbase_device *kbdev) ++static inline u32 gpio_readl_v2(void __iomem *reg) +{ -+ CSTD_UNUSED(kbdev); ++ return readl(reg + 0x4) << 16 | readl(reg); +} + -+void kbase_job_slot_term(struct kbase_device *kbdev) ++static inline void rockchip_gpio_writel(struct rockchip_pin_bank *bank, ++ u32 value, unsigned int offset) +{ -+ CSTD_UNUSED(kbdev); -+} -+KBASE_EXPORT_TEST_API(kbase_job_slot_term); ++ void __iomem *reg = bank->reg_base + offset; + ++ if (bank->gpio_type == GPIO_TYPE_V2) ++ gpio_writel_v2(value, reg); ++ else ++ writel(value, reg); ++} + -+/** -+ * kbase_job_slot_softstop_swflags - Soft-stop a job with flags -+ * @kbdev: The kbase device -+ * @js: The job slot to soft-stop -+ * @target_katom: The job that should be soft-stopped (or NULL for any job) -+ * @sw_flags: Flags to pass in about the soft-stop -+ * -+ * Context: -+ * The job slot lock must be held when calling this function. -+ * The job slot must not already be in the process of being soft-stopped. -+ * -+ * Soft-stop the specified job slot, with extra information about the stop -+ * -+ * Where possible any job in the next register is evicted before the soft-stop. -+ */ -+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js, -+ struct kbase_jd_atom *target_katom, u32 sw_flags) ++static inline u32 rockchip_gpio_readl(struct rockchip_pin_bank *bank, ++ unsigned int offset) +{ -+ dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n", -+ target_katom, sw_flags, js); ++ void __iomem *reg = bank->reg_base + offset; ++ u32 value; + -+ if (sw_flags & JS_COMMAND_MASK) { -+ WARN(true, "Atom %pK in kctx %pK received non-NOP flags %d\n", (void *)target_katom, -+ target_katom ? (void *)target_katom->kctx : NULL, sw_flags); -+ sw_flags &= ~((u32)JS_COMMAND_MASK); -+ } -+ kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom, -+ JS_COMMAND_SOFT_STOP | sw_flags); -+} ++ if (bank->gpio_type == GPIO_TYPE_V2) ++ value = gpio_readl_v2(reg); ++ else ++ value = readl(reg); + -+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, -+ struct kbase_jd_atom *target_katom) -+{ -+ kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u); ++ return value; +} + -+void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js, -+ struct kbase_jd_atom *target_katom) ++static inline void rockchip_gpio_writel_bit(struct rockchip_pin_bank *bank, ++ u32 bit, u32 value, ++ unsigned int offset) +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ bool stopped; ++ void __iomem *reg = bank->reg_base + offset; ++ u32 data; + -+ stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js, -+ target_katom, -+ JS_COMMAND_HARD_STOP); -+ CSTD_UNUSED(stopped); ++ if (bank->gpio_type == GPIO_TYPE_V2) { ++ if (value) ++ data = BIT(bit % 16) | BIT(bit % 16 + 16); ++ else ++ data = BIT(bit % 16 + 16); ++ writel(data, bit >= 16 ? reg + 0x4 : reg); ++ } else { ++ data = readl(reg); ++ data &= ~BIT(bit); ++ if (value) ++ data |= BIT(bit); ++ writel(data, reg); ++ } +} + -+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, -+ base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom) ++static inline u32 rockchip_gpio_readl_bit(struct rockchip_pin_bank *bank, ++ u32 bit, unsigned int offset) +{ -+ u32 hw_action = action & JS_COMMAND_MASK; -+ -+ /* For soft-stop, don't enter if soft-stop not allowed, or isn't -+ * causing disjoint. -+ */ -+ if (hw_action == JS_COMMAND_SOFT_STOP && -+ (kbase_jd_katom_is_protected(target_katom) || -+ (0 == (action & JS_COMMAND_SW_CAUSES_DISJOINT)))) -+ return; ++ void __iomem *reg = bank->reg_base + offset; ++ u32 data; + -+ /* Nothing to do if already logged disjoint state on this atom */ -+ if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) -+ return; ++ if (bank->gpio_type == GPIO_TYPE_V2) { ++ data = readl(bit >= 16 ? reg + 0x4 : reg); ++ data >>= bit % 16; ++ } else { ++ data = readl(reg); ++ data >>= bit; ++ } + -+ target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT; -+ kbase_disjoint_state_up(kbdev); ++ return data & (0x1); +} + -+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, -+ struct kbase_jd_atom *target_katom) ++static int rockchip_gpio_get_direction(struct gpio_chip *chip, ++ unsigned int offset) +{ -+ if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) { -+ target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT; -+ kbase_disjoint_state_down(kbdev); -+ } -+} ++ struct rockchip_pin_bank *bank = gpiochip_get_data(chip); ++ u32 data; + -+int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev) -+{ -+ WARN(true, "%s Not implemented for JM GPUs", __func__); -+ return -EINVAL; -+} ++ data = rockchip_gpio_readl_bit(bank, offset, bank->gpio_regs->port_ddr); ++ if (data) ++ return GPIO_LINE_DIRECTION_OUT; + -+int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev) -+{ -+ WARN(true, "%s Not implemented for JM GPUs", __func__); -+ return -EINVAL; ++ return GPIO_LINE_DIRECTION_IN; +} + -+void kbase_reset_gpu_allow(struct kbase_device *kbdev) ++static int rockchip_gpio_set_direction(struct gpio_chip *chip, ++ unsigned int offset, bool input) +{ -+ WARN(true, "%s Not implemented for JM GPUs", __func__); -+} ++ struct rockchip_pin_bank *bank = gpiochip_get_data(chip); ++ unsigned long flags; ++ u32 data = input ? 0 : 1; + -+void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev) -+{ -+ WARN(true, "%s Not implemented for JM GPUs", __func__); ++ if (input) ++ pinctrl_gpio_direction_input(bank->pin_base + offset); ++ else ++ pinctrl_gpio_direction_output(bank->pin_base + offset); ++ ++ raw_spin_lock_irqsave(&bank->slock, flags); ++ rockchip_gpio_writel_bit(bank, offset, data, bank->gpio_regs->port_ddr); ++ raw_spin_unlock_irqrestore(&bank->slock, flags); ++ ++ return 0; +} + -+void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev) ++static void rockchip_gpio_set(struct gpio_chip *gc, unsigned int offset, ++ int value) +{ -+ WARN(true, "%s Not implemented for JM GPUs", __func__); ++ struct rockchip_pin_bank *bank = gpiochip_get_data(gc); ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&bank->slock, flags); ++ rockchip_gpio_writel_bit(bank, offset, value, bank->gpio_regs->port_dr); ++ raw_spin_unlock_irqrestore(&bank->slock, flags); +} + -+static void kbase_debug_dump_registers(struct kbase_device *kbdev) ++static int rockchip_gpio_get(struct gpio_chip *gc, unsigned int offset) +{ -+ int i; ++ struct rockchip_pin_bank *bank = gpiochip_get_data(gc); ++ u32 data; + -+ kbase_io_history_dump(kbdev); ++ data = readl(bank->reg_base + bank->gpio_regs->ext_port); ++ data >>= offset; ++ data &= 1; + -+ dev_err(kbdev->dev, "Register state:"); -+ dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS))); -+ dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x", -+ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)), -+ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE))); -+ for (i = 0; i < 3; i++) { -+ dev_err(kbdev->dev, " JS%d_STATUS=0x%08x JS%d_HEAD_LO=0x%08x", -+ i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS)), -+ i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO))); -+ } -+ dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", -+ kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS))); -+ dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)), -+ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)), -+ kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK))); -+ dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1))); -+ dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG))); -+ dev_err(kbdev->dev, " TILER_CONFIG=0x%08x JM_CONFIG=0x%08x", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG))); ++ return data; +} + -+static void kbasep_reset_timeout_worker(struct work_struct *data) ++static int rockchip_gpio_set_debounce(struct gpio_chip *gc, ++ unsigned int offset, ++ unsigned int debounce) +{ -+ unsigned long flags; -+ struct kbase_device *kbdev; -+ ktime_t end_timestamp = ktime_get_raw(); -+ struct kbasep_js_device_data *js_devdata; -+ bool silent = false; -+ u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; ++ struct rockchip_pin_bank *bank = gpiochip_get_data(gc); ++ const struct rockchip_gpio_regs *reg = bank->gpio_regs; ++ unsigned long flags, div_reg, freq, max_debounce; ++ bool div_debounce_support; ++ unsigned int cur_div_reg; ++ u64 div; + -+ kbdev = container_of(data, struct kbase_device, -+ hwaccess.backend.reset_work); ++ if (bank->gpio_type == GPIO_TYPE_V2 && !IS_ERR(bank->db_clk)) { ++ div_debounce_support = true; ++ freq = clk_get_rate(bank->db_clk); ++ if (!freq) ++ return -EINVAL; ++ max_debounce = (GENMASK(23, 0) + 1) * 2 * 1000000 / freq; ++ if ((unsigned long)debounce > max_debounce) ++ return -EINVAL; + -+ js_devdata = &kbdev->js_data; ++ div = debounce * freq; ++ div_reg = DIV_ROUND_CLOSEST_ULL(div, 2 * USEC_PER_SEC) - 1; ++ } else { ++ div_debounce_support = false; ++ } + -+ if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == -+ KBASE_RESET_GPU_SILENT) -+ silent = true; ++ raw_spin_lock_irqsave(&bank->slock, flags); + -+ if (kbase_is_quick_reset_enabled(kbdev)) -+ silent = true; ++ /* Only the v1 needs to configure div_en and div_con for dbclk */ ++ if (debounce) { ++ if (div_debounce_support) { ++ /* Configure the max debounce from consumers */ ++ cur_div_reg = readl(bank->reg_base + ++ reg->dbclk_div_con); ++ if (cur_div_reg < div_reg) ++ writel(div_reg, bank->reg_base + ++ reg->dbclk_div_con); ++ rockchip_gpio_writel_bit(bank, offset, 1, ++ reg->dbclk_div_en); ++ } + -+ KBASE_KTRACE_ADD_JM(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0); ++ rockchip_gpio_writel_bit(bank, offset, 1, reg->debounce); ++ } else { ++ if (div_debounce_support) ++ rockchip_gpio_writel_bit(bank, offset, 0, ++ reg->dbclk_div_en); + -+ /* Disable GPU hardware counters. -+ * This call will block until counters are disabled. -+ */ -+ kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); ++ rockchip_gpio_writel_bit(bank, offset, 0, reg->debounce); ++ } + -+ /* Make sure the timer has completed - this cannot be done from -+ * interrupt context, so this cannot be done within -+ * kbasep_try_reset_gpu_early. -+ */ -+ hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); ++ raw_spin_unlock_irqrestore(&bank->slock, flags); + -+ if (kbase_pm_context_active_handle_suspend(kbdev, -+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { -+ /* This would re-activate the GPU. Since it's already idle, -+ * there's no need to reset it -+ */ -+ atomic_set(&kbdev->hwaccess.backend.reset_gpu, -+ KBASE_RESET_GPU_NOT_PENDING); -+ kbase_disjoint_state_down(kbdev); -+ wake_up(&kbdev->hwaccess.backend.reset_wait); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ return; ++ /* Enable or disable dbclk at last */ ++ if (div_debounce_support) { ++ if (debounce) ++ clk_prepare_enable(bank->db_clk); ++ else ++ clk_disable_unprepare(bank->db_clk); + } + -+ WARN(kbdev->irq_reset_flush, "%s: GPU reset already in flight\n", __func__); -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ spin_lock(&kbdev->mmu_mask_change); -+ kbase_pm_reset_start_locked(kbdev); ++ return 0; ++} + -+ /* We're about to flush out the IRQs and their bottom half's */ -+ kbdev->irq_reset_flush = true; ++static int rockchip_gpio_direction_input(struct gpio_chip *gc, ++ unsigned int offset) ++{ ++ return rockchip_gpio_set_direction(gc, offset, true); ++} + -+ /* Disable IRQ to avoid IRQ handlers to kick in after releasing the -+ * spinlock; this also clears any outstanding interrupts -+ */ -+ kbase_pm_disable_interrupts_nolock(kbdev); ++static int rockchip_gpio_direction_output(struct gpio_chip *gc, ++ unsigned int offset, int value) ++{ ++ rockchip_gpio_set(gc, offset, value); + -+ spin_unlock(&kbdev->mmu_mask_change); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return rockchip_gpio_set_direction(gc, offset, false); ++} + -+ /* Ensure that any IRQ handlers have finished -+ * Must be done without any locks IRQ handlers will take -+ */ -+ kbase_synchronize_irqs(kbdev); -+ -+ /* Flush out any in-flight work items */ -+ kbase_flush_mmu_wqs(kbdev); -+ -+ /* The flush has completed so reset the active indicator */ -+ kbdev->irq_reset_flush = false; ++/* ++ * gpiolib set_config callback function. The setting of the pin ++ * mux function as 'gpio output' will be handled by the pinctrl subsystem ++ * interface. ++ */ ++static int rockchip_gpio_set_config(struct gpio_chip *gc, unsigned int offset, ++ unsigned long config) ++{ ++ enum pin_config_param param = pinconf_to_config_param(config); ++ unsigned int debounce = pinconf_to_config_argument(config); + -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) { -+ /* Ensure that L2 is not transitioning when we send the reset -+ * command ++ switch (param) { ++ case PIN_CONFIG_INPUT_DEBOUNCE: ++ rockchip_gpio_set_debounce(gc, offset, debounce); ++ /* ++ * Rockchip's gpio could only support up to one period ++ * of the debounce clock(pclk), which is far away from ++ * satisftying the requirement, as pclk is usually near ++ * 100MHz shared by all peripherals. So the fact is it ++ * has crippled debounce capability could only be useful ++ * to prevent any spurious glitches from waking up the system ++ * if the gpio is conguired as wakeup interrupt source. Let's ++ * still return -ENOTSUPP as before, to make sure the caller ++ * of gpiod_set_debounce won't change its behaviour. + */ -+ while (--max_loops && kbase_pm_get_trans_cores(kbdev, -+ KBASE_PM_CORE_L2)) -+ ; -+ -+ WARN(!max_loops, "L2 power transition timed out while trying to reset\n"); ++ return -ENOTSUPP; ++ default: ++ return -ENOTSUPP; + } ++} + -+ mutex_lock(&kbdev->pm.lock); -+ /* We hold the pm lock, so there ought to be a current policy */ -+ if (unlikely(!kbdev->pm.backend.pm_current_policy)) -+ dev_warn(kbdev->dev, "No power policy set!"); ++/* ++ * gpiolib gpio_to_irq callback function. Creates a mapping between a GPIO pin ++ * and a virtual IRQ, if not already present. ++ */ ++static int rockchip_gpio_to_irq(struct gpio_chip *gc, unsigned int offset) ++{ ++ struct rockchip_pin_bank *bank = gpiochip_get_data(gc); ++ unsigned int virq; + -+ /* All slot have been soft-stopped and we've waited -+ * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we -+ * assume that anything that is still left on the GPU is stuck there and -+ * we'll kill it when we reset the GPU -+ */ ++ if (!bank->domain) ++ return -ENXIO; + -+ if (!silent) -+ dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", -+ RESET_TIMEOUT); ++ virq = irq_create_mapping(bank->domain, offset); + -+ /* Output the state of some interesting registers to help in the -+ * debugging of GPU resets -+ */ -+ if (!silent) -+ kbase_debug_dump_registers(kbdev); ++ return (virq) ? : -ENXIO; ++} + -+ /* Complete any jobs that were still on the GPU */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->protected_mode = false; -+ if (!kbdev->pm.backend.protected_entry_transition_override) -+ kbase_backend_reset(kbdev, &end_timestamp); -+ kbase_pm_metrics_update(kbdev, NULL); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++static const struct gpio_chip rockchip_gpiolib_chip = { ++ .request = gpiochip_generic_request, ++ .free = gpiochip_generic_free, ++ .set = rockchip_gpio_set, ++ .get = rockchip_gpio_get, ++ .get_direction = rockchip_gpio_get_direction, ++ .direction_input = rockchip_gpio_direction_input, ++ .direction_output = rockchip_gpio_direction_output, ++ .set_config = rockchip_gpio_set_config, ++ .to_irq = rockchip_gpio_to_irq, ++ .owner = THIS_MODULE, ++}; + -+ /* Tell hardware counters a reset is about to occur. -+ * If the instr backend is in an unrecoverable error state (e.g. due to -+ * HW being unresponsive), this will transition the backend out of -+ * it, on the assumption a reset will fix whatever problem there was. -+ */ -+ kbase_instr_hwcnt_on_before_reset(kbdev); ++static void rockchip_irq_demux(struct irq_desc *desc) ++{ ++ struct irq_chip *chip = irq_desc_get_chip(desc); ++ struct rockchip_pin_bank *bank = irq_desc_get_handler_data(desc); ++ unsigned long pending; ++ unsigned int irq; + -+ /* Reset the GPU */ -+ kbase_pm_init_hw(kbdev, 0); ++ dev_dbg(bank->dev, "got irq for bank %s\n", bank->name); + -+ mutex_unlock(&kbdev->pm.lock); ++ chained_irq_enter(chip, desc); + -+ mutex_lock(&js_devdata->runpool_mutex); ++ pending = readl_relaxed(bank->reg_base + bank->gpio_regs->int_status); ++ for_each_set_bit(irq, &pending, 32) { ++ dev_dbg(bank->dev, "handling irq %d\n", irq); + -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_ctx_sched_restore_all_as(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); ++ /* ++ * Triggering IRQ on both rising and falling edge ++ * needs manual intervention. ++ */ ++ if (bank->toggle_edge_mode & BIT(irq)) { ++ u32 data, data_old, polarity; ++ unsigned long flags; + -+ kbase_pm_enable_interrupts(kbdev); ++ data = readl_relaxed(bank->reg_base + ++ bank->gpio_regs->ext_port); ++ do { ++ raw_spin_lock_irqsave(&bank->slock, flags); + -+ kbase_disjoint_state_down(kbdev); ++ polarity = readl_relaxed(bank->reg_base + ++ bank->gpio_regs->int_polarity); ++ if (data & BIT(irq)) ++ polarity &= ~BIT(irq); ++ else ++ polarity |= BIT(irq); ++ writel(polarity, ++ bank->reg_base + ++ bank->gpio_regs->int_polarity); + -+ mutex_unlock(&js_devdata->runpool_mutex); ++ raw_spin_unlock_irqrestore(&bank->slock, flags); + -+ mutex_lock(&kbdev->pm.lock); ++ data_old = data; ++ data = readl_relaxed(bank->reg_base + ++ bank->gpio_regs->ext_port); ++ } while ((data & BIT(irq)) != (data_old & BIT(irq))); ++ } + -+ kbase_pm_reset_complete(kbdev); ++ generic_handle_domain_irq(bank->domain, irq); ++ } + -+ /* Find out what cores are required now */ -+ kbase_pm_update_cores_state(kbdev); ++ chained_irq_exit(chip, desc); ++} + -+ /* Synchronously request and wait for those cores, because if -+ * instrumentation is enabled it would need them immediately. -+ */ -+ kbase_pm_wait_for_desired_state(kbdev); ++static int rockchip_irq_set_type(struct irq_data *d, unsigned int type) ++{ ++ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); ++ struct rockchip_pin_bank *bank = gc->private; ++ u32 mask = BIT(d->hwirq); ++ u32 polarity; ++ u32 level; ++ u32 data; ++ unsigned long flags; ++ int ret = 0; + -+ mutex_unlock(&kbdev->pm.lock); ++ raw_spin_lock_irqsave(&bank->slock, flags); + -+ atomic_set(&kbdev->hwaccess.backend.reset_gpu, -+ KBASE_RESET_GPU_NOT_PENDING); ++ rockchip_gpio_writel_bit(bank, d->hwirq, 0, ++ bank->gpio_regs->port_ddr); + -+ wake_up(&kbdev->hwaccess.backend.reset_wait); -+ if (!silent) -+ dev_err(kbdev->dev, "Reset complete"); ++ raw_spin_unlock_irqrestore(&bank->slock, flags); + -+ /* Try submitting some jobs to restart processing */ -+ KBASE_KTRACE_ADD_JM(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, 0); -+ kbase_js_sched_all(kbdev); ++ if (type & IRQ_TYPE_EDGE_BOTH) ++ irq_set_handler_locked(d, handle_edge_irq); ++ else ++ irq_set_handler_locked(d, handle_level_irq); + -+ /* Process any pending slot updates */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_backend_slot_update(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ raw_spin_lock_irqsave(&bank->slock, flags); + -+ kbase_pm_context_idle(kbdev); ++ level = rockchip_gpio_readl(bank, bank->gpio_regs->int_type); ++ polarity = rockchip_gpio_readl(bank, bank->gpio_regs->int_polarity); + -+ /* Re-enable GPU hardware counters */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (type == IRQ_TYPE_EDGE_BOTH) { ++ if (bank->gpio_type == GPIO_TYPE_V2) { ++ rockchip_gpio_writel_bit(bank, d->hwirq, 1, ++ bank->gpio_regs->int_bothedge); ++ goto out; ++ } else { ++ bank->toggle_edge_mode |= mask; ++ level &= ~mask; + -+ KBASE_KTRACE_ADD_JM(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0); ++ /* ++ * Determine gpio state. If 1 next interrupt should be ++ * low otherwise high. ++ */ ++ data = readl(bank->reg_base + bank->gpio_regs->ext_port); ++ if (data & mask) ++ polarity &= ~mask; ++ else ++ polarity |= mask; ++ } ++ } else { ++ if (bank->gpio_type == GPIO_TYPE_V2) { ++ rockchip_gpio_writel_bit(bank, d->hwirq, 0, ++ bank->gpio_regs->int_bothedge); ++ } else { ++ bank->toggle_edge_mode &= ~mask; ++ } ++ switch (type) { ++ case IRQ_TYPE_EDGE_RISING: ++ level |= mask; ++ polarity |= mask; ++ break; ++ case IRQ_TYPE_EDGE_FALLING: ++ level |= mask; ++ polarity &= ~mask; ++ break; ++ case IRQ_TYPE_LEVEL_HIGH: ++ level &= ~mask; ++ polarity |= mask; ++ break; ++ case IRQ_TYPE_LEVEL_LOW: ++ level &= ~mask; ++ polarity &= ~mask; ++ break; ++ default: ++ ret = -EINVAL; ++ goto out; ++ } ++ } ++ ++ rockchip_gpio_writel(bank, level, bank->gpio_regs->int_type); ++ rockchip_gpio_writel(bank, polarity, bank->gpio_regs->int_polarity); ++out: ++ raw_spin_unlock_irqrestore(&bank->slock, flags); ++ ++ return ret; +} + -+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) ++static int rockchip_irq_reqres(struct irq_data *d) +{ -+ struct kbase_device *kbdev = container_of(timer, struct kbase_device, -+ hwaccess.backend.reset_timer); ++ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); ++ struct rockchip_pin_bank *bank = gc->private; + -+ /* Reset still pending? */ -+ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, -+ KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) == -+ KBASE_RESET_GPU_COMMITTED) -+ queue_work(kbdev->hwaccess.backend.reset_workq, -+ &kbdev->hwaccess.backend.reset_work); ++ rockchip_gpio_direction_input(&bank->gpio_chip, d->hwirq); + -+ return HRTIMER_NORESTART; ++ return gpiochip_reqres_irq(&bank->gpio_chip, d->hwirq); +} + -+/* -+ * If all jobs are evicted from the GPU then we can reset the GPU -+ * immediately instead of waiting for the timeout to elapse -+ */ -+ -+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) ++static void rockchip_irq_relres(struct irq_data *d) +{ -+ unsigned int i; -+ int pending_jobs = 0; ++ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); ++ struct rockchip_pin_bank *bank = gc->private; + -+ /* Count the number of jobs */ -+ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) -+ pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i); ++ gpiochip_relres_irq(&bank->gpio_chip, d->hwirq); ++} + -+ if (pending_jobs > 0) { -+ /* There are still jobs on the GPU - wait */ -+ return; -+ } ++static void rockchip_irq_suspend(struct irq_data *d) ++{ ++ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); ++ struct rockchip_pin_bank *bank = gc->private; + -+ /* To prevent getting incorrect registers when dumping failed job, -+ * skip early reset. -+ */ -+ if (atomic_read(&kbdev->job_fault_debug) > 0) -+ return; ++ bank->saved_masks = irq_reg_readl(gc, bank->gpio_regs->int_mask); ++ irq_reg_writel(gc, ~gc->wake_active, bank->gpio_regs->int_mask); ++} + -+ /* Check that the reset has been committed to (i.e. kbase_reset_gpu has -+ * been called), and that no other thread beat this thread to starting -+ * the reset -+ */ -+ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, -+ KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) != -+ KBASE_RESET_GPU_COMMITTED) { -+ /* Reset has already occurred */ -+ return; -+ } ++static void rockchip_irq_resume(struct irq_data *d) ++{ ++ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); ++ struct rockchip_pin_bank *bank = gc->private; + -+ queue_work(kbdev->hwaccess.backend.reset_workq, -+ &kbdev->hwaccess.backend.reset_work); ++ irq_reg_writel(gc, bank->saved_masks, bank->gpio_regs->int_mask); +} + -+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) ++static void rockchip_irq_enable(struct irq_data *d) +{ -+ unsigned long flags; ++ irq_gc_mask_clr_bit(d); ++} + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbasep_try_reset_gpu_early_locked(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++static void rockchip_irq_disable(struct irq_data *d) ++{ ++ irq_gc_mask_set_bit(d); +} + -+/** -+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU -+ * @kbdev: kbase device -+ * @flags: Bitfield indicating impact of reset (see flag defines) -+ * -+ * This function soft-stops all the slots to ensure that as many jobs as -+ * possible are saved. -+ * -+ * Return: boolean which should be interpreted as follows: -+ * true - Prepared for reset, kbase_reset_gpu_locked should be called. -+ * false - Another thread is performing a reset, kbase_reset_gpu should -+ * not be called. -+ */ -+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, -+ unsigned int flags) ++static int rockchip_interrupts_register(struct rockchip_pin_bank *bank) +{ -+ int i; ++ unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; ++ struct irq_chip_generic *gc; ++ int ret; + -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ if (kbase_pm_is_gpu_lost(kbdev)) { -+ /* GPU access has been removed, reset will be done by -+ * Arbiter instead -+ */ -+ return false; ++ bank->domain = irq_domain_create_linear(dev_fwnode(bank->dev), 32, ++ &irq_generic_chip_ops, NULL); ++ if (!bank->domain) { ++ dev_warn(bank->dev, "could not init irq domain for bank %s\n", ++ bank->name); ++ return -EINVAL; + } -+#endif + -+ if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR) -+ kbase_instr_hwcnt_on_unrecoverable_error(kbdev); ++ ret = irq_alloc_domain_generic_chips(bank->domain, 32, 1, ++ "rockchip_gpio_irq", ++ handle_level_irq, ++ clr, 0, 0); ++ if (ret) { ++ dev_err(bank->dev, "could not alloc generic chips for bank %s\n", ++ bank->name); ++ irq_domain_remove(bank->domain); ++ return -EINVAL; ++ } + -+ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, -+ KBASE_RESET_GPU_NOT_PENDING, -+ KBASE_RESET_GPU_PREPARED) != -+ KBASE_RESET_GPU_NOT_PENDING) { -+ /* Some other thread is already resetting the GPU */ -+ return false; ++ gc = irq_get_domain_generic_chip(bank->domain, 0); ++ if (bank->gpio_type == GPIO_TYPE_V2) { ++ gc->reg_writel = gpio_writel_v2; ++ gc->reg_readl = gpio_readl_v2; + } + -+ kbase_disjoint_state_up(kbdev); ++ gc->reg_base = bank->reg_base; ++ gc->private = bank; ++ gc->chip_types[0].regs.mask = bank->gpio_regs->int_mask; ++ gc->chip_types[0].regs.ack = bank->gpio_regs->port_eoi; ++ gc->chip_types[0].chip.irq_ack = irq_gc_ack_set_bit; ++ gc->chip_types[0].chip.irq_mask = irq_gc_mask_set_bit; ++ gc->chip_types[0].chip.irq_unmask = irq_gc_mask_clr_bit; ++ gc->chip_types[0].chip.irq_enable = rockchip_irq_enable; ++ gc->chip_types[0].chip.irq_disable = rockchip_irq_disable; ++ gc->chip_types[0].chip.irq_set_wake = irq_gc_set_wake; ++ gc->chip_types[0].chip.irq_suspend = rockchip_irq_suspend; ++ gc->chip_types[0].chip.irq_resume = rockchip_irq_resume; ++ gc->chip_types[0].chip.irq_set_type = rockchip_irq_set_type; ++ gc->chip_types[0].chip.irq_request_resources = rockchip_irq_reqres; ++ gc->chip_types[0].chip.irq_release_resources = rockchip_irq_relres; ++ gc->wake_enabled = IRQ_MSK(bank->nr_pins); + -+ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) -+ kbase_job_slot_softstop(kbdev, i, NULL); ++ /* ++ * Linux assumes that all interrupts start out disabled/masked. ++ * Our driver only uses the concept of masked and always keeps ++ * things enabled, so for us that's all masked and all enabled. ++ */ ++ rockchip_gpio_writel(bank, 0xffffffff, bank->gpio_regs->int_mask); ++ rockchip_gpio_writel(bank, 0xffffffff, bank->gpio_regs->port_eoi); ++ rockchip_gpio_writel(bank, 0xffffffff, bank->gpio_regs->int_en); ++ gc->mask_cache = 0xffffffff; + -+ return true; ++ irq_set_chained_handler_and_data(bank->irq, ++ rockchip_irq_demux, bank); ++ ++ return 0; +} + -+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags) ++static int rockchip_gpiolib_register(struct rockchip_pin_bank *bank) +{ -+ unsigned long lock_flags; -+ bool ret; ++ struct gpio_chip *gc; ++ int ret; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, lock_flags); -+ ret = kbase_prepare_to_reset_gpu_locked(kbdev, flags); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, lock_flags); ++ bank->gpio_chip = rockchip_gpiolib_chip; + -+ return ret; -+} -+KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu); ++ gc = &bank->gpio_chip; ++ gc->base = bank->pin_base; ++ gc->ngpio = bank->nr_pins; ++ gc->label = bank->name; ++ gc->parent = bank->dev; + -+/* -+ * This function should be called after kbase_prepare_to_reset_gpu if it -+ * returns true. It should never be called without a corresponding call to -+ * kbase_prepare_to_reset_gpu. -+ * -+ * After this function is called (or not called if kbase_prepare_to_reset_gpu -+ * returned false), the caller should wait for -+ * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset -+ * has completed. -+ */ -+void kbase_reset_gpu(struct kbase_device *kbdev) -+{ -+ /* Note this is an assert/atomic_set because it is a software issue for -+ * a race to be occurring here -+ */ -+ if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED)) -+ return; -+ atomic_set(&kbdev->hwaccess.backend.reset_gpu, -+ KBASE_RESET_GPU_COMMITTED); ++ if (!gc->base) ++ gc->base = GPIO_MAX_PINS * bank->bank_num; ++ if (!gc->ngpio) ++ gc->ngpio = GPIO_MAX_PINS; ++ if (!gc->label) { ++ gc->label = kasprintf(GFP_KERNEL, "gpio%d", bank->bank_num); ++ if (!gc->label) ++ return -ENOMEM; ++ } + -+ if (!kbase_is_quick_reset_enabled(kbdev)) -+ dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", -+ kbdev->reset_timeout_ms); ++ ret = gpiochip_add_data(gc, bank); ++ if (ret) { ++ dev_err(bank->dev, "failed to add gpiochip %s, %d\n", ++ gc->label, ret); ++ return ret; ++ } + -+ hrtimer_start(&kbdev->hwaccess.backend.reset_timer, -+ HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), -+ HRTIMER_MODE_REL); ++ ret = rockchip_interrupts_register(bank); ++ if (ret) { ++ dev_err(bank->dev, "failed to register interrupt, %d\n", ret); ++ goto fail; ++ } + -+ /* Try resetting early */ -+ kbasep_try_reset_gpu_early(kbdev); ++ return 0; ++ ++fail: ++ gpiochip_remove(&bank->gpio_chip); ++ ++ return ret; +} -+KBASE_EXPORT_TEST_API(kbase_reset_gpu); + -+void kbase_reset_gpu_locked(struct kbase_device *kbdev) ++static void rockchip_gpio_get_ver(struct rockchip_pin_bank *bank) +{ -+ /* Note this is an assert/atomic_set because it is a software issue for -+ * a race to be occurring here -+ */ -+ if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED)) -+ return; -+ atomic_set(&kbdev->hwaccess.backend.reset_gpu, -+ KBASE_RESET_GPU_COMMITTED); -+ -+ if (!kbase_is_quick_reset_enabled(kbdev)) -+ dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", -+ kbdev->reset_timeout_ms); -+ hrtimer_start(&kbdev->hwaccess.backend.reset_timer, -+ HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), -+ HRTIMER_MODE_REL); ++ int id = readl(bank->reg_base + gpio_regs_v2.version_id); + -+ /* Try resetting early */ -+ kbasep_try_reset_gpu_early_locked(kbdev); ++ /* If not gpio v2, that is default to v1. */ ++ if (id == GPIO_TYPE_V2 || id == GPIO_TYPE_V2_1) { ++ bank->gpio_regs = &gpio_regs_v2; ++ bank->gpio_type = GPIO_TYPE_V2; ++ } else { ++ bank->gpio_regs = &gpio_regs_v1; ++ bank->gpio_type = GPIO_TYPE_V1; ++ } +} + -+int kbase_reset_gpu_silent(struct kbase_device *kbdev) ++static struct rockchip_pin_bank * ++rockchip_gpio_find_bank(struct pinctrl_dev *pctldev, int id) +{ -+ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, -+ KBASE_RESET_GPU_NOT_PENDING, -+ KBASE_RESET_GPU_SILENT) != -+ KBASE_RESET_GPU_NOT_PENDING) { -+ /* Some other thread is already resetting the GPU */ -+ return -EAGAIN; ++ struct rockchip_pinctrl *info; ++ struct rockchip_pin_bank *bank; ++ int i, found = 0; ++ ++ info = pinctrl_dev_get_drvdata(pctldev); ++ bank = info->ctrl->pin_banks; ++ for (i = 0; i < info->ctrl->nr_banks; i++, bank++) { ++ if (bank->bank_num == id) { ++ found = 1; ++ break; ++ } + } + -+ kbase_disjoint_state_up(kbdev); ++ return found ? bank : NULL; ++} + -+ queue_work(kbdev->hwaccess.backend.reset_workq, -+ &kbdev->hwaccess.backend.reset_work); ++static int rockchip_gpio_of_get_bank_id(struct device *dev) ++{ ++ static int gpio; ++ int bank_id = -1; + -+ return 0; ++ if (IS_ENABLED(CONFIG_OF) && dev->of_node) { ++ bank_id = of_alias_get_id(dev->of_node, "gpio"); ++ if (bank_id < 0) ++ bank_id = gpio++; ++ } ++ ++ return bank_id; +} + -+bool kbase_reset_gpu_is_active(struct kbase_device *kbdev) ++#ifdef CONFIG_ACPI ++static int rockchip_gpio_acpi_get_bank_id(struct device *dev) +{ -+ if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == -+ KBASE_RESET_GPU_NOT_PENDING) -+ return false; ++ struct acpi_device *adev; ++ unsigned long bank_id = -1; ++ const char *uid; ++ int ret; + -+ return true; -+} ++ adev = ACPI_COMPANION(dev); ++ if (!adev) ++ return -ENXIO; + -+bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev) ++ uid = acpi_device_uid(adev); ++ if (!uid || !(*uid)) { ++ dev_err(dev, "Cannot retrieve UID\n"); ++ return -ENODEV; ++ } ++ ++ ret = kstrtoul(uid, 0, &bank_id); ++ ++ return !ret ? bank_id : -ERANGE; ++} ++#else ++static int rockchip_gpio_acpi_get_bank_id(struct device *dev) +{ -+ return atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_NOT_PENDING; ++ return -ENOENT; +} ++#endif /* CONFIG_ACPI */ + -+int kbase_reset_gpu_wait(struct kbase_device *kbdev) ++static int rockchip_gpio_probe(struct platform_device *pdev) +{ -+ wait_event(kbdev->hwaccess.backend.reset_wait, -+ atomic_read(&kbdev->hwaccess.backend.reset_gpu) -+ == KBASE_RESET_GPU_NOT_PENDING); ++ struct device *dev = &pdev->dev; ++ struct pinctrl_dev *pctldev = NULL; ++ struct rockchip_pin_bank *bank = NULL; ++ int bank_id = 0; ++ int ret; + -+ return 0; -+} -+KBASE_EXPORT_TEST_API(kbase_reset_gpu_wait); ++ bank_id = rockchip_gpio_acpi_get_bank_id(dev); ++ if (bank_id < 0) { ++ bank_id = rockchip_gpio_of_get_bank_id(dev); ++ if (bank_id < 0) ++ return bank_id; ++ } + -+int kbase_reset_gpu_init(struct kbase_device *kbdev) -+{ -+ kbdev->hwaccess.backend.reset_workq = alloc_workqueue( -+ "Mali reset workqueue", 0, 1); -+ if (kbdev->hwaccess.backend.reset_workq == NULL) -+ return -ENOMEM; ++ if (!ACPI_COMPANION(dev)) { ++ struct device_node *pctlnp = of_get_parent(dev->of_node); + -+ INIT_WORK(&kbdev->hwaccess.backend.reset_work, -+ kbasep_reset_timeout_worker); ++ pctldev = of_pinctrl_get(pctlnp); ++ if (!pctldev) ++ return -EPROBE_DEFER; + -+ hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC, -+ HRTIMER_MODE_REL); -+ kbdev->hwaccess.backend.reset_timer.function = -+ kbasep_reset_timer_callback; ++ bank = rockchip_gpio_find_bank(pctldev, bank_id); ++ if (!bank) ++ return -ENODEV; ++ } + -+ return 0; -+} ++ if (!bank) { ++ bank = devm_kzalloc(dev, sizeof(*bank), GFP_KERNEL); ++ if (!bank) ++ return -ENOMEM; ++ } + -+void kbase_reset_gpu_term(struct kbase_device *kbdev) -+{ -+ destroy_workqueue(kbdev->hwaccess.backend.reset_workq); -+} ++ bank->bank_num = bank_id; ++ bank->dev = dev; + -+static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, -+ const u64 affinity, const u64 limited_core_mask) -+{ -+ const u64 result = affinity & limited_core_mask; ++ bank->reg_base = devm_platform_ioremap_resource(pdev, 0); ++ if (IS_ERR(bank->reg_base)) ++ return PTR_ERR(bank->reg_base); + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ dev_dbg(kbdev->dev, -+ "Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK from 0x%lx to 0x%lx (mask is 0x%lx)\n", -+ (unsigned long)affinity, -+ (unsigned long)result, -+ (unsigned long)limited_core_mask); -+#else -+ CSTD_UNUSED(kbdev); -+#endif ++ bank->irq = platform_get_irq(pdev, 0); ++ if (bank->irq < 0) ++ return bank->irq; + -+ return result; -+} -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h -new file mode 100644 -index 000000000..bfd55a6e2 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h -@@ -0,0 +1,148 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2011-2016, 2018-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ raw_spin_lock_init(&bank->slock); + -+/* -+ * Job Manager backend-specific low-level APIs. -+ */ ++ if (!ACPI_COMPANION(dev)) { ++ bank->clk = devm_clk_get(dev, "bus"); ++ if (IS_ERR(bank->clk)) { ++ bank->clk = of_clk_get(dev->of_node, 0); ++ if (IS_ERR(bank->clk)) { ++ dev_err(dev, "fail to get apb clock\n"); ++ return PTR_ERR(bank->clk); ++ } ++ } + -+#ifndef _KBASE_JM_HWACCESS_H_ -+#define _KBASE_JM_HWACCESS_H_ ++ bank->db_clk = devm_clk_get(dev, "db"); ++ if (IS_ERR(bank->db_clk)) { ++ bank->db_clk = of_clk_get(dev->of_node, 1); ++ if (IS_ERR(bank->db_clk)) ++ bank->db_clk = NULL; ++ } ++ } + -+#include -+#include -+#include ++ clk_prepare_enable(bank->clk); ++ clk_prepare_enable(bank->db_clk); + -+#include -+#include ++ rockchip_gpio_get_ver(bank); + -+/** -+ * kbase_job_done_slot() - Complete the head job on a particular job-slot -+ * @kbdev: Device pointer -+ * @s: Job slot -+ * @completion_code: Completion code of job reported by GPU -+ * @job_tail: Job tail address reported by GPU -+ * @end_timestamp: Timestamp of job completion -+ */ -+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, -+ u64 job_tail, ktime_t *end_timestamp); ++ /* ++ * Prevent clashes with a deferred output setting ++ * being added right at this moment. ++ */ ++ mutex_lock(&bank->deferred_lock); + -+#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) -+static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string, size_t js_size) ++ ret = rockchip_gpiolib_register(bank); ++ if (ret) { ++ dev_err(bank->dev, "Failed to register gpio %d\n", ret); ++ goto err_unlock; ++ } ++ ++ if (!device_property_read_bool(bank->dev, "gpio-ranges") && pctldev) { ++ struct gpio_chip *gc = &bank->gpio_chip; ++ ++ ret = gpiochip_add_pin_range(gc, dev_name(pctldev->dev), 0, ++ gc->base, gc->ngpio); ++ if (ret) { ++ dev_err(bank->dev, "Failed to add pin range\n"); ++ goto err_unlock; ++ } ++ } ++ ++ while (!list_empty(&bank->deferred_pins)) { ++ struct rockchip_pin_deferred *cfg; ++ ++ cfg = list_first_entry(&bank->deferred_pins, ++ struct rockchip_pin_deferred, head); ++ if (!cfg) ++ break; ++ ++ list_del(&cfg->head); ++ ++ switch (cfg->param) { ++ case PIN_CONFIG_OUTPUT: ++ ret = rockchip_gpio_direction_output(&bank->gpio_chip, cfg->pin, cfg->arg); ++ if (ret) ++ dev_warn(dev, "setting output pin %u to %u failed\n", cfg->pin, ++ cfg->arg); ++ break; ++ case PIN_CONFIG_INPUT_ENABLE: ++ ret = rockchip_gpio_direction_input(&bank->gpio_chip, cfg->pin); ++ if (ret) ++ dev_warn(dev, "setting input pin %u failed\n", cfg->pin); ++ break; ++ default: ++ dev_warn(dev, "unknown deferred config param %d\n", cfg->param); ++ break; ++ } ++ kfree(cfg); ++ } ++ ++ mutex_unlock(&bank->deferred_lock); ++ ++ platform_set_drvdata(pdev, bank); ++ dev_info(dev, "probed %pfw\n", dev_fwnode(dev)); ++ ++ return 0; ++err_unlock: ++ mutex_unlock(&bank->deferred_lock); ++ clk_disable_unprepare(bank->clk); ++ clk_disable_unprepare(bank->db_clk); ++ ++ return ret; ++} ++ ++static int rockchip_gpio_remove(struct platform_device *pdev) +{ -+ snprintf(js_string, js_size, "job_slot_%u", js); -+ return js_string; ++ struct rockchip_pin_bank *bank = platform_get_drvdata(pdev); ++ ++ clk_disable_unprepare(bank->clk); ++ clk_disable_unprepare(bank->db_clk); ++ gpiochip_remove(&bank->gpio_chip); ++ ++ return 0; +} -+#endif + -+/** -+ * kbase_job_hw_submit() - Submit a job to the GPU -+ * @kbdev: Device pointer -+ * @katom: Atom to submit -+ * @js: Job slot to submit on -+ * -+ * The caller must check kbasep_jm_is_submit_slots_free() != false before -+ * calling this. -+ * -+ * The following locking conditions are made on the caller: -+ * - it must hold the hwaccess_lock -+ * -+ * Return: 0 if the job was successfully submitted to hardware, an error otherwise. -+ */ -+int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js); ++static const struct of_device_id rockchip_gpio_match[] = { ++ { .compatible = "rockchip,gpio-bank", }, ++ { .compatible = "rockchip,rk3188-gpio-bank0" }, ++ { }, ++}; + -+#if !MALI_USE_CSF -+/** -+ * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop -+ * on the specified atom -+ * @kbdev: Device pointer -+ * @js: Job slot to stop on -+ * @action: The action to perform, either JSn_COMMAND_HARD_STOP or -+ * JSn_COMMAND_SOFT_STOP -+ * @core_reqs: Core requirements of atom to stop -+ * @target_katom: Atom to stop -+ * -+ * The following locking conditions are made on the caller: -+ * - it must hold the hwaccess_lock -+ */ -+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, unsigned int js, -+ u32 action, base_jd_core_req core_reqs, -+ struct kbase_jd_atom *target_katom); -+#endif /* !MALI_USE_CSF */ ++static struct platform_driver rockchip_gpio_driver = { ++ .probe = rockchip_gpio_probe, ++ .remove = rockchip_gpio_remove, ++ .driver = { ++ .name = "rockchip-gpio", ++ .of_match_table = rockchip_gpio_match, ++ }, ++}; + -+/** -+ * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job -+ * slot belonging to a given context. -+ * @kbdev: Device pointer -+ * @kctx: Context pointer. May be NULL -+ * @katom: Specific atom to stop. May be NULL -+ * @js: Job slot to hard stop -+ * @action: The action to perform, either JSn_COMMAND_HARD_STOP or -+ * JSn_COMMAND_SOFT_STOP -+ * -+ * If no context is provided then all jobs on the slot will be soft or hard -+ * stopped. -+ * -+ * If a katom is provided then only that specific atom will be stopped. In this -+ * case the kctx parameter is ignored. -+ * -+ * Jobs that are on the slot but are not yet on the GPU will be unpulled and -+ * returned to the job scheduler. -+ * -+ * Return: true if an atom was stopped, false otherwise -+ */ -+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx, -+ unsigned int js, struct kbase_jd_atom *katom, u32 action); ++static int __init rockchip_gpio_init(void) ++{ ++ return platform_driver_register(&rockchip_gpio_driver); ++} ++postcore_initcall(rockchip_gpio_init); + -+/** -+ * kbase_job_slot_init - Initialise job slot framework -+ * @kbdev: Device pointer -+ * -+ * Called on driver initialisation -+ * -+ * Return: 0 on success -+ */ -+int kbase_job_slot_init(struct kbase_device *kbdev); ++static void __exit rockchip_gpio_exit(void) ++{ ++ platform_driver_unregister(&rockchip_gpio_driver); ++} ++module_exit(rockchip_gpio_exit); + -+/** -+ * kbase_job_slot_halt - Halt the job slot framework -+ * @kbdev: Device pointer -+ * -+ * Should prevent any further job slot processing -+ */ -+void kbase_job_slot_halt(struct kbase_device *kbdev); ++MODULE_DESCRIPTION("Rockchip gpio driver"); ++MODULE_ALIAS("platform:rockchip-gpio"); ++MODULE_LICENSE("GPL v2"); ++MODULE_DEVICE_TABLE(of, rockchip_gpio_match); +diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c +index c1e83b292..068becf45 100644 +--- a/drivers/gpio/gpiolib-of.c ++++ b/drivers/gpio/gpiolib-of.c +@@ -25,21 +25,6 @@ + #include "gpiolib.h" + #include "gpiolib-of.h" + +-/* +- * This is Linux-specific flags. By default controllers' and Linux' mapping +- * match, but GPIO controllers are free to translate their own flags to +- * Linux-specific in their .xlate callback. Though, 1:1 mapping is recommended. +- */ +-enum of_gpio_flags { +- OF_GPIO_ACTIVE_LOW = 0x1, +- OF_GPIO_SINGLE_ENDED = 0x2, +- OF_GPIO_OPEN_DRAIN = 0x4, +- OF_GPIO_TRANSITORY = 0x8, +- OF_GPIO_PULL_UP = 0x10, +- OF_GPIO_PULL_DOWN = 0x20, +- OF_GPIO_PULL_DISABLE = 0x40, +-}; +- + /** + * of_gpio_named_count() - Count GPIOs for a device + * @np: device node to count GPIOs for +@@ -448,6 +433,20 @@ int of_get_named_gpio(const struct device_node *np, const char *propname, + } + EXPORT_SYMBOL_GPL(of_get_named_gpio); + ++int of_get_named_gpio_flags(struct device_node *np, const char *list_name, ++ int index, enum of_gpio_flags *flags) ++{ ++ struct gpio_desc *desc; + -+/** -+ * kbase_job_slot_term - Terminate job slot framework -+ * @kbdev: Device pointer -+ * -+ * Called on driver termination -+ */ -+void kbase_job_slot_term(struct kbase_device *kbdev); ++ desc = of_get_named_gpiod_flags(np, list_name, index, flags); + -+/** -+ * kbase_gpu_cache_clean - Cause a GPU cache clean & flush -+ * @kbdev: Device pointer -+ * -+ * Caller must not be in IRQ context -+ */ -+void kbase_gpu_cache_clean(struct kbase_device *kbdev); ++ if (IS_ERR(desc)) ++ return PTR_ERR(desc); ++ else ++ return desc_to_gpio(desc); ++} ++EXPORT_SYMBOL_GPL(of_get_named_gpio_flags); + -+#endif /* _KBASE_JM_HWACCESS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c + /* Converts gpio_lookup_flags into bitmask of GPIO_* values */ + static unsigned long of_convert_gpio_flags(enum of_gpio_flags flags) + { +@@ -1138,3 +1137,4 @@ void of_gpiochip_remove(struct gpio_chip *chip) + { + of_node_put(dev_of_node(&chip->gpiodev->dev)); + } ++ +diff --git a/drivers/gpio/gpiolib-of.c.orig b/drivers/gpio/gpiolib-of.c.orig new file mode 100644 -index 000000000..7db2b353b +index 000000000..c1e83b292 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c -@@ -0,0 +1,1873 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpio/gpiolib-of.c.orig +@@ -0,0 +1,1140 @@ ++// SPDX-License-Identifier: GPL-2.0+ +/* ++ * OF helpers for the GPIO API + * -+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * Copyright (c) 2007-2008 MontaVista Software, Inc. + * ++ * Author: Anton Vorontsov + */ + -+/* -+ * Register-based HW access backend specific APIs -+ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++#include ++#include + -+/** -+ * SLOT_RB_EMPTY - Return whether the specified ringbuffer is empty. -+ * -+ * @rb: ring buffer -+ * -+ * Note: HW access lock must be held ++#include "gpiolib.h" ++#include "gpiolib-of.h" ++ ++/* ++ * This is Linux-specific flags. By default controllers' and Linux' mapping ++ * match, but GPIO controllers are free to translate their own flags to ++ * Linux-specific in their .xlate callback. Though, 1:1 mapping is recommended. + */ -+#define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx) ++enum of_gpio_flags { ++ OF_GPIO_ACTIVE_LOW = 0x1, ++ OF_GPIO_SINGLE_ENDED = 0x2, ++ OF_GPIO_OPEN_DRAIN = 0x4, ++ OF_GPIO_TRANSITORY = 0x8, ++ OF_GPIO_PULL_UP = 0x10, ++ OF_GPIO_PULL_DOWN = 0x20, ++ OF_GPIO_PULL_DISABLE = 0x40, ++}; + +/** -+ * SLOT_RB_ENTRIES - Return number of atoms currently in the specified ringbuffer. ++ * of_gpio_named_count() - Count GPIOs for a device ++ * @np: device node to count GPIOs for ++ * @propname: property name containing gpio specifier(s) + * -+ * @rb: ring buffer ++ * The function returns the count of GPIOs specified for a node. ++ * Note that the empty GPIO specifiers count too. Returns either ++ * Number of gpios defined in property, ++ * -EINVAL for an incorrectly formed gpios property, or ++ * -ENOENT for a missing gpios property + * -+ * Note: HW access lock must be held -+ */ -+#define SLOT_RB_ENTRIES(rb) ((int)(s8)(rb->write_idx - rb->read_idx)) -+ -+static void kbase_gpu_release_atom(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom, -+ ktime_t *end_timestamp); -+ -+/** -+ * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer -+ * @kbdev: Device pointer -+ * @katom: Atom to enqueue ++ * Example: ++ * gpios = <0 ++ * &gpio1 1 2 ++ * 0 ++ * &gpio2 3 4>; + * -+ * Context: Caller must hold the HW access lock ++ * The above example defines four GPIOs, two of which are not specified. ++ * This function will return '4' + */ -+static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom) ++static int of_gpio_named_count(const struct device_node *np, ++ const char *propname) +{ -+ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr]; -+ -+ WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE); -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom; -+ rb->write_idx++; -+ -+ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED; ++ return of_count_phandle_with_args(np, propname, "#gpio-cells"); +} + +/** -+ * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once -+ * it has been completed -+ * @kbdev: Device pointer -+ * @js: Job slot to remove atom from -+ * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in -+ * which case current time will be used. -+ * -+ * Context: Caller must hold the HW access lock ++ * of_gpio_spi_cs_get_count() - special GPIO counting for SPI ++ * @dev: Consuming device ++ * @con_id: Function within the GPIO consumer + * -+ * Return: Atom removed from ringbuffer ++ * Some elder GPIO controllers need special quirks. Currently we handle ++ * the Freescale and PPC GPIO controller with bindings that doesn't use the ++ * established "cs-gpios" for chip selects but instead rely on ++ * "gpios" for the chip select lines. If we detect this, we redirect ++ * the counting of "cs-gpios" to count "gpios" transparent to the ++ * driver. + */ -+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, unsigned int js, -+ ktime_t *end_timestamp) ++static int of_gpio_spi_cs_get_count(struct device *dev, const char *con_id) +{ -+ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; -+ struct kbase_jd_atom *katom; -+ -+ if (SLOT_RB_EMPTY(rb)) { -+ WARN(1, "GPU ringbuffer unexpectedly empty\n"); -+ return NULL; -+ } -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom; -+ -+ kbase_gpu_release_atom(kbdev, katom, end_timestamp); -+ -+ rb->read_idx++; -+ -+ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB; ++ struct device_node *np = dev->of_node; + -+ return katom; ++ if (!IS_ENABLED(CONFIG_SPI_MASTER)) ++ return 0; ++ if (!con_id || strcmp(con_id, "cs")) ++ return 0; ++ if (!of_device_is_compatible(np, "fsl,spi") && ++ !of_device_is_compatible(np, "aeroflexgaisler,spictrl") && ++ !of_device_is_compatible(np, "ibm,ppc4xx-spi")) ++ return 0; ++ return of_gpio_named_count(np, "gpios"); +} + -+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx) ++int of_gpio_get_count(struct device *dev, const char *con_id) +{ -+ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; ++ int ret; ++ char propname[32]; ++ unsigned int i; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ret = of_gpio_spi_cs_get_count(dev, con_id); ++ if (ret > 0) ++ return ret; + -+ if ((SLOT_RB_ENTRIES(rb) - 1) < idx) -+ return NULL; /* idx out of range */ ++ for (i = 0; i < ARRAY_SIZE(gpio_suffixes); i++) { ++ if (con_id) ++ snprintf(propname, sizeof(propname), "%s-%s", ++ con_id, gpio_suffixes[i]); ++ else ++ snprintf(propname, sizeof(propname), "%s", ++ gpio_suffixes[i]); + -+ return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom; ++ ret = of_gpio_named_count(dev->of_node, propname); ++ if (ret > 0) ++ break; ++ } ++ return ret ? ret : -ENOENT; +} + -+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, unsigned int js) ++static int of_gpiochip_match_node_and_xlate(struct gpio_chip *chip, void *data) +{ -+ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; ++ struct of_phandle_args *gpiospec = data; + -+ if (SLOT_RB_EMPTY(rb)) -+ return NULL; ++ return device_match_of_node(&chip->gpiodev->dev, gpiospec->np) && ++ chip->of_xlate && ++ chip->of_xlate(chip, gpiospec, NULL) >= 0; ++} + -+ return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom; ++static struct gpio_chip *of_find_gpiochip_by_xlate( ++ struct of_phandle_args *gpiospec) ++{ ++ return gpiochip_find(gpiospec, of_gpiochip_match_node_and_xlate); +} + -+bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) ++static struct gpio_desc *of_xlate_and_get_gpiod_flags(struct gpio_chip *chip, ++ struct of_phandle_args *gpiospec, ++ enum of_gpio_flags *flags) +{ -+ unsigned int js; ++ int ret; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (chip->of_gpio_n_cells != gpiospec->args_count) ++ return ERR_PTR(-EINVAL); + -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ int i; ++ ret = chip->of_xlate(chip, gpiospec, flags); ++ if (ret < 0) ++ return ERR_PTR(ret); + -+ for (i = 0; i < SLOT_RB_SIZE; i++) { -+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); ++ return gpiochip_get_desc(chip, ret); ++} + -+ if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) -+ return true; ++/* ++ * Overrides stated polarity of a gpio line and warns when there is a ++ * discrepancy. ++ */ ++static void of_gpio_quirk_polarity(const struct device_node *np, ++ bool active_high, ++ enum of_gpio_flags *flags) ++{ ++ if (active_high) { ++ if (*flags & OF_GPIO_ACTIVE_LOW) { ++ pr_warn("%s GPIO handle specifies active low - ignored\n", ++ of_node_full_name(np)); ++ *flags &= ~OF_GPIO_ACTIVE_LOW; + } ++ } else { ++ if (!(*flags & OF_GPIO_ACTIVE_LOW)) ++ pr_info("%s enforce active low on GPIO handle\n", ++ of_node_full_name(np)); ++ *flags |= OF_GPIO_ACTIVE_LOW; + } -+ return false; +} + -+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js) ++/* ++ * This quirk does static polarity overrides in cases where existing ++ * DTS specified incorrect polarity. ++ */ ++static void of_gpio_try_fixup_polarity(const struct device_node *np, ++ const char *propname, ++ enum of_gpio_flags *flags) +{ -+ int nr = 0; -+ int i; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ for (i = 0; i < SLOT_RB_SIZE; i++) { -+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); ++ static const struct { ++ const char *compatible; ++ const char *propname; ++ bool active_high; ++ } gpios[] = { ++#if !IS_ENABLED(CONFIG_LCD_HX8357) ++ /* ++ * Himax LCD controllers used incorrectly named ++ * "gpios-reset" property and also specified wrong ++ * polarity. ++ */ ++ { "himax,hx8357", "gpios-reset", false }, ++ { "himax,hx8369", "gpios-reset", false }, ++#endif ++#if IS_ENABLED(CONFIG_PCI_LANTIQ) ++ /* ++ * According to the PCI specification, the RST# pin is an ++ * active-low signal. However, most of the device trees that ++ * have been widely used for a long time incorrectly describe ++ * reset GPIO as active-high, and were also using wrong name ++ * for the property. ++ */ ++ { "lantiq,pci-xway", "gpio-reset", false }, ++#endif ++#if IS_ENABLED(CONFIG_REGULATOR_S5M8767) ++ /* ++ * According to S5M8767, the DVS and DS pin are ++ * active-high signals. However, exynos5250-spring.dts use ++ * active-low setting. ++ */ ++ { "samsung,s5m8767-pmic", "s5m8767,pmic-buck-dvs-gpios", true }, ++ { "samsung,s5m8767-pmic", "s5m8767,pmic-buck-ds-gpios", true }, ++#endif ++#if IS_ENABLED(CONFIG_TOUCHSCREEN_TSC2005) ++ /* ++ * DTS for Nokia N900 incorrectly specified "active high" ++ * polarity for the reset line, while the chip actually ++ * treats it as "active low". ++ */ ++ { "ti,tsc2005", "reset-gpios", false }, ++#endif ++ }; ++ unsigned int i; + -+ if (katom && (katom->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_SUBMITTED)) -+ nr++; ++ for (i = 0; i < ARRAY_SIZE(gpios); i++) { ++ if (of_device_is_compatible(np, gpios[i].compatible) && ++ !strcmp(propname, gpios[i].propname)) { ++ of_gpio_quirk_polarity(np, gpios[i].active_high, flags); ++ break; ++ } + } -+ -+ return nr; +} + -+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js) ++static void of_gpio_set_polarity_by_property(const struct device_node *np, ++ const char *propname, ++ enum of_gpio_flags *flags) +{ -+ int nr = 0; -+ int i; ++ const struct device_node *np_compat = np; ++ const struct device_node *np_propname = np; ++ static const struct { ++ const char *compatible; ++ const char *gpio_propname; ++ const char *polarity_propname; ++ } gpios[] = { ++#if IS_ENABLED(CONFIG_FEC) ++ /* Freescale Fast Ethernet Controller */ ++ { "fsl,imx25-fec", "phy-reset-gpios", "phy-reset-active-high" }, ++ { "fsl,imx27-fec", "phy-reset-gpios", "phy-reset-active-high" }, ++ { "fsl,imx28-fec", "phy-reset-gpios", "phy-reset-active-high" }, ++ { "fsl,imx6q-fec", "phy-reset-gpios", "phy-reset-active-high" }, ++ { "fsl,mvf600-fec", "phy-reset-gpios", "phy-reset-active-high" }, ++ { "fsl,imx6sx-fec", "phy-reset-gpios", "phy-reset-active-high" }, ++ { "fsl,imx6ul-fec", "phy-reset-gpios", "phy-reset-active-high" }, ++ { "fsl,imx8mq-fec", "phy-reset-gpios", "phy-reset-active-high" }, ++ { "fsl,imx8qm-fec", "phy-reset-gpios", "phy-reset-active-high" }, ++ { "fsl,s32v234-fec", "phy-reset-gpios", "phy-reset-active-high" }, ++#endif ++#if IS_ENABLED(CONFIG_MMC_ATMELMCI) ++ { "atmel,hsmci", "cd-gpios", "cd-inverted" }, ++#endif ++#if IS_ENABLED(CONFIG_PCI_IMX6) ++ { "fsl,imx6q-pcie", "reset-gpio", "reset-gpio-active-high" }, ++ { "fsl,imx6sx-pcie", "reset-gpio", "reset-gpio-active-high" }, ++ { "fsl,imx6qp-pcie", "reset-gpio", "reset-gpio-active-high" }, ++ { "fsl,imx7d-pcie", "reset-gpio", "reset-gpio-active-high" }, ++ { "fsl,imx8mq-pcie", "reset-gpio", "reset-gpio-active-high" }, ++ { "fsl,imx8mm-pcie", "reset-gpio", "reset-gpio-active-high" }, ++ { "fsl,imx8mp-pcie", "reset-gpio", "reset-gpio-active-high" }, ++#endif + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ /* ++ * The regulator GPIO handles are specified such that the ++ * presence or absence of "enable-active-high" solely controls ++ * the polarity of the GPIO line. Any phandle flags must ++ * be actively ignored. ++ */ ++#if IS_ENABLED(CONFIG_REGULATOR_FIXED_VOLTAGE) ++ { "regulator-fixed", "gpios", "enable-active-high" }, ++ { "regulator-fixed", "gpio", "enable-active-high" }, ++ { "reg-fixed-voltage", "gpios", "enable-active-high" }, ++ { "reg-fixed-voltage", "gpio", "enable-active-high" }, ++#endif ++#if IS_ENABLED(CONFIG_REGULATOR_GPIO) ++ { "regulator-gpio", "enable-gpio", "enable-active-high" }, ++ { "regulator-gpio", "enable-gpios", "enable-active-high" }, ++#endif ++ }; ++ unsigned int i; ++ bool active_high; + -+ for (i = 0; i < SLOT_RB_SIZE; i++) { -+ if (kbase_gpu_inspect(kbdev, js, i)) -+ nr++; ++#if IS_ENABLED(CONFIG_MMC_ATMELMCI) ++ /* ++ * The Atmel HSMCI has compatible property in the parent node and ++ * gpio property in a child node ++ */ ++ if (of_device_is_compatible(np->parent, "atmel,hsmci")) { ++ np_compat = np->parent; ++ np_propname = np; + } ++#endif + -+ return nr; ++ for (i = 0; i < ARRAY_SIZE(gpios); i++) { ++ if (of_device_is_compatible(np_compat, gpios[i].compatible) && ++ !strcmp(propname, gpios[i].gpio_propname)) { ++ active_high = of_property_read_bool(np_propname, ++ gpios[i].polarity_propname); ++ of_gpio_quirk_polarity(np, active_high, flags); ++ break; ++ } ++ } +} + -+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, unsigned int js, -+ enum kbase_atom_gpu_rb_state min_rb_state) ++static void of_gpio_flags_quirks(const struct device_node *np, ++ const char *propname, ++ enum of_gpio_flags *flags, ++ int index) +{ -+ int nr = 0; -+ int i; ++ of_gpio_try_fixup_polarity(np, propname, flags); ++ of_gpio_set_polarity_by_property(np, propname, flags); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ /* ++ * Legacy open drain handling for fixed voltage regulators. ++ */ ++ if (IS_ENABLED(CONFIG_REGULATOR) && ++ of_device_is_compatible(np, "reg-fixed-voltage") && ++ of_property_read_bool(np, "gpio-open-drain")) { ++ *flags |= (OF_GPIO_SINGLE_ENDED | OF_GPIO_OPEN_DRAIN); ++ pr_info("%s uses legacy open drain flag - update the DTS if you can\n", ++ of_node_full_name(np)); ++ } + -+ for (i = 0; i < SLOT_RB_SIZE; i++) { -+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); ++ /* ++ * Legacy handling of SPI active high chip select. If we have a ++ * property named "cs-gpios" we need to inspect the child node ++ * to determine if the flags should have inverted semantics. ++ */ ++ if (IS_ENABLED(CONFIG_SPI_MASTER) && !strcmp(propname, "cs-gpios") && ++ of_property_read_bool(np, "cs-gpios")) { ++ struct device_node *child; ++ u32 cs; ++ int ret; + -+ if (katom && (katom->gpu_rb_state >= min_rb_state)) -+ nr++; ++ for_each_child_of_node(np, child) { ++ ret = of_property_read_u32(child, "reg", &cs); ++ if (ret) ++ continue; ++ if (cs == index) { ++ /* ++ * SPI children have active low chip selects ++ * by default. This can be specified negatively ++ * by just omitting "spi-cs-high" in the ++ * device node, or actively by tagging on ++ * GPIO_ACTIVE_LOW as flag in the device ++ * tree. If the line is simultaneously ++ * tagged as active low in the device tree ++ * and has the "spi-cs-high" set, we get a ++ * conflict and the "spi-cs-high" flag will ++ * take precedence. ++ */ ++ bool active_high = of_property_read_bool(child, ++ "spi-cs-high"); ++ of_gpio_quirk_polarity(child, active_high, ++ flags); ++ of_node_put(child); ++ break; ++ } ++ } + } + -+ return nr; ++ /* Legacy handling of stmmac's active-low PHY reset line */ ++ if (IS_ENABLED(CONFIG_STMMAC_ETH) && ++ !strcmp(propname, "snps,reset-gpio") && ++ of_property_read_bool(np, "snps,reset-active-low")) ++ *flags |= OF_GPIO_ACTIVE_LOW; +} + +/** -+ * check_secure_atom - Check if the given atom is in the given secure state and -+ * has a ringbuffer state of at least -+ * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION -+ * @katom: Atom pointer -+ * @secure: Desired secure state ++ * of_get_named_gpiod_flags() - Get a GPIO descriptor and flags for GPIO API ++ * @np: device node to get GPIO from ++ * @propname: property name containing gpio specifier(s) ++ * @index: index of the GPIO ++ * @flags: a flags pointer to fill in + * -+ * Return: true if atom is in the given state, false otherwise ++ * Returns GPIO descriptor to use with Linux GPIO API, or one of the errno ++ * value on the error condition. If @flags is not NULL the function also fills ++ * in flags for the GPIO. + */ -+static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure) ++static struct gpio_desc *of_get_named_gpiod_flags(const struct device_node *np, ++ const char *propname, int index, enum of_gpio_flags *flags) +{ -+ if (katom->gpu_rb_state >= -+ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && -+ ((kbase_jd_katom_is_protected(katom) && secure) || -+ (!kbase_jd_katom_is_protected(katom) && !secure))) -+ return true; ++ struct of_phandle_args gpiospec; ++ struct gpio_chip *chip; ++ struct gpio_desc *desc; ++ int ret; + -+ return false; ++ ret = of_parse_phandle_with_args_map(np, propname, "gpio", index, ++ &gpiospec); ++ if (ret) { ++ pr_debug("%s: can't parse '%s' property of node '%pOF[%d]'\n", ++ __func__, propname, np, index); ++ return ERR_PTR(ret); ++ } ++ ++ chip = of_find_gpiochip_by_xlate(&gpiospec); ++ if (!chip) { ++ desc = ERR_PTR(-EPROBE_DEFER); ++ goto out; ++ } ++ ++ desc = of_xlate_and_get_gpiod_flags(chip, &gpiospec, flags); ++ if (IS_ERR(desc)) ++ goto out; ++ ++ if (flags) ++ of_gpio_flags_quirks(np, propname, flags, index); ++ ++ pr_debug("%s: parsed '%s' property of node '%pOF[%d]' - status (%d)\n", ++ __func__, propname, np, index, ++ PTR_ERR_OR_ZERO(desc)); ++ ++out: ++ of_node_put(gpiospec.np); ++ ++ return desc; +} + +/** -+ * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given -+ * secure state in the ringbuffers of at least -+ * state -+ * KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE -+ * @kbdev: Device pointer -+ * @secure: Desired secure state ++ * of_get_named_gpio() - Get a GPIO number to use with GPIO API ++ * @np: device node to get GPIO from ++ * @propname: Name of property containing gpio specifier(s) ++ * @index: index of the GPIO + * -+ * Return: true if any atoms are in the given state, false otherwise ++ * Returns GPIO number to use with Linux generic GPIO API, or one of the errno ++ * value on the error condition. + */ -+static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, -+ bool secure) ++int of_get_named_gpio(const struct device_node *np, const char *propname, ++ int index) +{ -+ unsigned int js; -+ -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ int i; -+ -+ for (i = 0; i < SLOT_RB_SIZE; i++) { -+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, -+ js, i); ++ struct gpio_desc *desc; + -+ if (katom) { -+ if (check_secure_atom(katom, secure)) -+ return true; -+ } -+ } -+ } ++ desc = of_get_named_gpiod_flags(np, propname, index, NULL); + -+ return false; ++ if (IS_ERR(desc)) ++ return PTR_ERR(desc); ++ else ++ return desc_to_gpio(desc); +} ++EXPORT_SYMBOL_GPL(of_get_named_gpio); + -+int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js) ++/* Converts gpio_lookup_flags into bitmask of GPIO_* values */ ++static unsigned long of_convert_gpio_flags(enum of_gpio_flags flags) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ unsigned long lflags = GPIO_LOOKUP_FLAGS_DEFAULT; + -+ if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) != -+ KBASE_RESET_GPU_NOT_PENDING) { -+ /* The GPU is being reset - so prevent submission */ -+ return 0; ++ if (flags & OF_GPIO_ACTIVE_LOW) ++ lflags |= GPIO_ACTIVE_LOW; ++ ++ if (flags & OF_GPIO_SINGLE_ENDED) { ++ if (flags & OF_GPIO_OPEN_DRAIN) ++ lflags |= GPIO_OPEN_DRAIN; ++ else ++ lflags |= GPIO_OPEN_SOURCE; + } + -+ return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js); -+} ++ if (flags & OF_GPIO_TRANSITORY) ++ lflags |= GPIO_TRANSITORY; + ++ if (flags & OF_GPIO_PULL_UP) ++ lflags |= GPIO_PULL_UP; + -+static void kbase_gpu_release_atom(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom, -+ ktime_t *end_timestamp) -+{ -+ struct kbase_context *kctx = katom->kctx; ++ if (flags & OF_GPIO_PULL_DOWN) ++ lflags |= GPIO_PULL_DOWN; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (flags & OF_GPIO_PULL_DISABLE) ++ lflags |= GPIO_PULL_DISABLE; + -+ switch (katom->gpu_rb_state) { -+ case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: -+ /* Should be impossible */ -+ WARN(1, "Attempting to release atom not in ringbuffer\n"); -+ break; ++ return lflags; ++} + -+ case KBASE_ATOM_GPU_RB_SUBMITTED: -+ kbase_kinstr_jm_atom_hw_release(katom); -+ /* Inform power management at start/finish of atom so it can -+ * update its GPU utilisation metrics. Mark atom as not -+ * submitted beforehand. ++static struct gpio_desc *of_find_gpio_rename(struct device_node *np, ++ const char *con_id, ++ unsigned int idx, ++ enum of_gpio_flags *of_flags) ++{ ++ static const struct of_rename_gpio { ++ const char *con_id; ++ const char *legacy_id; /* NULL - same as con_id */ ++ /* ++ * Compatible string can be set to NULL in case where ++ * matching to a particular compatible is not practical, ++ * but it should only be done for gpio names that have ++ * vendor prefix to reduce risk of false positives. ++ * Addition of such entries is strongly discouraged. + */ -+ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; -+ kbase_pm_metrics_update(kbdev, end_timestamp); -+ -+ /* Inform platform at start/finish of atom */ -+ kbasep_platform_event_atom_complete(katom); ++ const char *compatible; ++ } gpios[] = { ++#if !IS_ENABLED(CONFIG_LCD_HX8357) ++ /* Himax LCD controllers used "gpios-reset" */ ++ { "reset", "gpios-reset", "himax,hx8357" }, ++ { "reset", "gpios-reset", "himax,hx8369" }, ++#endif ++#if IS_ENABLED(CONFIG_MFD_ARIZONA) ++ { "wlf,reset", NULL, NULL }, ++#endif ++#if IS_ENABLED(CONFIG_RTC_DRV_MOXART) ++ { "rtc-data", "gpio-rtc-data", "moxa,moxart-rtc" }, ++ { "rtc-sclk", "gpio-rtc-sclk", "moxa,moxart-rtc" }, ++ { "rtc-reset", "gpio-rtc-reset", "moxa,moxart-rtc" }, ++#endif ++#if IS_ENABLED(CONFIG_NFC_MRVL_I2C) ++ { "reset", "reset-n-io", "marvell,nfc-i2c" }, ++#endif ++#if IS_ENABLED(CONFIG_NFC_MRVL_SPI) ++ { "reset", "reset-n-io", "marvell,nfc-spi" }, ++#endif ++#if IS_ENABLED(CONFIG_NFC_MRVL_UART) ++ { "reset", "reset-n-io", "marvell,nfc-uart" }, ++ { "reset", "reset-n-io", "mrvl,nfc-uart" }, ++#endif ++#if IS_ENABLED(CONFIG_PCI_LANTIQ) ++ /* MIPS Lantiq PCI */ ++ { "reset", "gpio-reset", "lantiq,pci-xway" }, ++#endif + -+ if (katom->core_req & BASE_JD_REQ_PERMON) -+ kbase_pm_release_gpu_cycle_counter_nolock(kbdev); ++ /* ++ * Some regulator bindings happened before we managed to ++ * establish that GPIO properties should be named ++ * "foo-gpios" so we have this special kludge for them. ++ */ ++#if IS_ENABLED(CONFIG_REGULATOR_ARIZONA_LDO1) ++ { "wlf,ldoena", NULL, NULL }, /* Arizona */ ++#endif ++#if IS_ENABLED(CONFIG_REGULATOR_WM8994) ++ { "wlf,ldo1ena", NULL, NULL }, /* WM8994 */ ++ { "wlf,ldo2ena", NULL, NULL }, /* WM8994 */ ++#endif + -+ KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, katom, -+ &kbdev->gpu_props.props.raw_props.js_features -+ [katom->slot_nr]); -+ KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]); -+ KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, kctx, -+ &kbdev->gpu_props.props.raw_props.js_features -+ [katom->slot_nr]); ++#if IS_ENABLED(CONFIG_SND_SOC_CS42L56) ++ { "reset", "cirrus,gpio-nreset", "cirrus,cs42l56" }, ++#endif ++#if IS_ENABLED(CONFIG_SND_SOC_MT2701_CS42448) ++ { "i2s1-in-sel-gpio1", NULL, "mediatek,mt2701-cs42448-machine" }, ++ { "i2s1-in-sel-gpio2", NULL, "mediatek,mt2701-cs42448-machine" }, ++#endif ++#if IS_ENABLED(CONFIG_SND_SOC_TLV320AIC3X) ++ { "reset", "gpio-reset", "ti,tlv320aic3x" }, ++ { "reset", "gpio-reset", "ti,tlv320aic33" }, ++ { "reset", "gpio-reset", "ti,tlv320aic3007" }, ++ { "reset", "gpio-reset", "ti,tlv320aic3104" }, ++ { "reset", "gpio-reset", "ti,tlv320aic3106" }, ++#endif ++#if IS_ENABLED(CONFIG_SPI_GPIO) ++ /* ++ * The SPI GPIO bindings happened before we managed to ++ * establish that GPIO properties should be named ++ * "foo-gpios" so we have this special kludge for them. ++ */ ++ { "miso", "gpio-miso", "spi-gpio" }, ++ { "mosi", "gpio-mosi", "spi-gpio" }, ++ { "sck", "gpio-sck", "spi-gpio" }, ++#endif + -+ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ -+ fallthrough; -+ case KBASE_ATOM_GPU_RB_READY: -+ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ -+ fallthrough; -+ case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: -+ break; ++ /* ++ * The old Freescale bindings use simply "gpios" as name ++ * for the chip select lines rather than "cs-gpios" like ++ * all other SPI hardware. Allow this specifically for ++ * Freescale and PPC devices. ++ */ ++#if IS_ENABLED(CONFIG_SPI_FSL_SPI) ++ { "cs", "gpios", "fsl,spi" }, ++ { "cs", "gpios", "aeroflexgaisler,spictrl" }, ++#endif ++#if IS_ENABLED(CONFIG_SPI_PPC4xx) ++ { "cs", "gpios", "ibm,ppc4xx-spi" }, ++#endif + -+ case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: -+ if (kbase_jd_katom_is_protected(katom) && -+ (katom->protected_state.enter != -+ KBASE_ATOM_ENTER_PROTECTED_CHECK) && -+ (katom->protected_state.enter != -+ KBASE_ATOM_ENTER_PROTECTED_HWCNT)) { -+ kbase_pm_protected_override_disable(kbdev); -+ kbase_pm_update_cores_state_nolock(kbdev); -+ } -+ if (kbase_jd_katom_is_protected(katom) && -+ (katom->protected_state.enter == -+ KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) -+ kbase_pm_protected_entry_override_disable(kbdev); -+ if (!kbase_jd_katom_is_protected(katom) && -+ (katom->protected_state.exit != -+ KBASE_ATOM_EXIT_PROTECTED_CHECK) && -+ (katom->protected_state.exit != -+ KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT)) { -+ kbase_pm_protected_override_disable(kbdev); -+ kbase_pm_update_cores_state_nolock(kbdev); -+ } ++#if IS_ENABLED(CONFIG_TYPEC_FUSB302) ++ /* ++ * Fairchild FUSB302 host is using undocumented "fcs,int_n" ++ * property without the compulsory "-gpios" suffix. ++ */ ++ { "fcs,int_n", NULL, "fcs,fusb302" }, ++#endif ++ }; ++ struct gpio_desc *desc; ++ const char *legacy_id; ++ unsigned int i; + -+ if (katom->protected_state.enter != -+ KBASE_ATOM_ENTER_PROTECTED_CHECK || -+ katom->protected_state.exit != -+ KBASE_ATOM_EXIT_PROTECTED_CHECK) -+ kbdev->protected_mode_transition = false; ++ if (!con_id) ++ return ERR_PTR(-ENOENT); + -+ /* If the atom is at KBASE_ATOM_ENTER_PROTECTED_HWCNT state, it means -+ * one of two events prevented it from progressing to the next state and -+ * ultimately reach protected mode: -+ * - hwcnts were enabled, and the atom had to schedule a worker to -+ * disable them. -+ * - the hwcnts were already disabled, but some other error occurred. -+ * In the first case, if the worker has not yet completed -+ * (kbdev->protected_mode_hwcnt_disabled == false), we need to re-enable -+ * them and signal to the worker they have already been enabled -+ */ -+ if (kbase_jd_katom_is_protected(katom) && -+ (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_HWCNT)) { -+ kbdev->protected_mode_hwcnt_desired = true; -+ if (kbdev->protected_mode_hwcnt_disabled) { -+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); -+ kbdev->protected_mode_hwcnt_disabled = false; -+ } -+ } ++ for (i = 0; i < ARRAY_SIZE(gpios); i++) { ++ if (strcmp(con_id, gpios[i].con_id)) ++ continue; + -+ /* If the atom has suspended hwcnt but has not yet entered -+ * protected mode, then resume hwcnt now. If the GPU is now in -+ * protected mode then hwcnt will be resumed by GPU reset so -+ * don't resume it here. -+ */ -+ if (kbase_jd_katom_is_protected(katom) && -+ ((katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) || -+ (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY) || -+ (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_FINISHED))) { -+ WARN_ON(!kbdev->protected_mode_hwcnt_disabled); -+ kbdev->protected_mode_hwcnt_desired = true; -+ if (kbdev->protected_mode_hwcnt_disabled) { -+ kbase_hwcnt_context_enable( -+ kbdev->hwcnt_gpu_ctx); -+ kbdev->protected_mode_hwcnt_disabled = false; -+ } -+ } ++ if (gpios[i].compatible && ++ !of_device_is_compatible(np, gpios[i].compatible)) ++ continue; + -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { -+ if (katom->atom_flags & -+ KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { -+ kbase_pm_protected_l2_override(kbdev, false); -+ katom->atom_flags &= -+ ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; -+ } ++ legacy_id = gpios[i].legacy_id ?: gpios[i].con_id; ++ desc = of_get_named_gpiod_flags(np, legacy_id, idx, of_flags); ++ if (!gpiod_not_found(desc)) { ++ pr_info("%s uses legacy gpio name '%s' instead of '%s-gpios'\n", ++ of_node_full_name(np), legacy_id, con_id); ++ return desc; + } -+ -+ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ -+ fallthrough; -+ case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: -+ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ -+ fallthrough; -+ case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: -+ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ -+ fallthrough; -+ case KBASE_ATOM_GPU_RB_RETURN_TO_JS: -+ break; + } + -+ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED; -+ katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; ++ return ERR_PTR(-ENOENT); +} + -+static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom) ++static struct gpio_desc *of_find_mt2701_gpio(struct device_node *np, ++ const char *con_id, ++ unsigned int idx, ++ enum of_gpio_flags *of_flags) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ struct gpio_desc *desc; ++ const char *legacy_id; + -+ KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_MARK_FOR_RETURN_TO_JS, -+ katom->kctx, katom, katom->jc, -+ katom->slot_nr, katom->event_code); -+ kbase_gpu_release_atom(kbdev, katom, NULL); -+ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS; ++ if (!IS_ENABLED(CONFIG_SND_SOC_MT2701_CS42448)) ++ return ERR_PTR(-ENOENT); ++ ++ if (!of_device_is_compatible(np, "mediatek,mt2701-cs42448-machine")) ++ return ERR_PTR(-ENOENT); ++ ++ if (!con_id || strcmp(con_id, "i2s1-in-sel")) ++ return ERR_PTR(-ENOENT); ++ ++ if (idx == 0) ++ legacy_id = "i2s1-in-sel-gpio1"; ++ else if (idx == 1) ++ legacy_id = "i2s1-in-sel-gpio2"; ++ else ++ return ERR_PTR(-ENOENT); ++ ++ desc = of_get_named_gpiod_flags(np, legacy_id, 0, of_flags); ++ if (!gpiod_not_found(desc)) ++ pr_info("%s is using legacy gpio name '%s' instead of '%s-gpios'\n", ++ of_node_full_name(np), legacy_id, con_id); ++ ++ return desc; +} + -+/** -+ * other_slots_busy - Determine if any job slots other than @js are currently -+ * running atoms -+ * @kbdev: Device pointer -+ * @js: Job slot -+ * -+ * Return: true if any slots other than @js are busy, false otherwise -+ */ -+static inline bool other_slots_busy(struct kbase_device *kbdev, unsigned int js) ++typedef struct gpio_desc *(*of_find_gpio_quirk)(struct device_node *np, ++ const char *con_id, ++ unsigned int idx, ++ enum of_gpio_flags *of_flags); ++static const of_find_gpio_quirk of_find_gpio_quirks[] = { ++ of_find_gpio_rename, ++ of_find_mt2701_gpio, ++ NULL ++}; ++ ++struct gpio_desc *of_find_gpio(struct device_node *np, const char *con_id, ++ unsigned int idx, unsigned long *flags) +{ -+ unsigned int slot; ++ char prop_name[32]; /* 32 is max size of property name */ ++ enum of_gpio_flags of_flags; ++ const of_find_gpio_quirk *q; ++ struct gpio_desc *desc; ++ unsigned int i; + -+ for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) { -+ if (slot == js) -+ continue; ++ /* Try GPIO property "foo-gpios" and "foo-gpio" */ ++ for (i = 0; i < ARRAY_SIZE(gpio_suffixes); i++) { ++ if (con_id) ++ snprintf(prop_name, sizeof(prop_name), "%s-%s", con_id, ++ gpio_suffixes[i]); ++ else ++ snprintf(prop_name, sizeof(prop_name), "%s", ++ gpio_suffixes[i]); + -+ if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot, -+ KBASE_ATOM_GPU_RB_SUBMITTED)) -+ return true; ++ desc = of_get_named_gpiod_flags(np, prop_name, idx, &of_flags); ++ ++ if (!gpiod_not_found(desc)) ++ break; + } + -+ return false; -+} ++ /* Properly named GPIO was not found, try workarounds */ ++ for (q = of_find_gpio_quirks; gpiod_not_found(desc) && *q; q++) ++ desc = (*q)(np, con_id, idx, &of_flags); + -+static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev) -+{ -+ return kbdev->protected_mode; -+} ++ if (IS_ERR(desc)) ++ return desc; + -+static void kbase_gpu_disable_coherent(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ *flags = of_convert_gpio_flags(of_flags); + -+ /* -+ * When entering into protected mode, we must ensure that the -+ * GPU is not operating in coherent mode as well. This is to -+ * ensure that no protected memory can be leaked. -+ */ -+ if (kbdev->system_coherency == COHERENCY_ACE) -+ kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE); ++ return desc; +} + -+static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) ++/** ++ * of_parse_own_gpio() - Get a GPIO hog descriptor, names and flags for GPIO API ++ * @np: device node to get GPIO from ++ * @chip: GPIO chip whose hog is parsed ++ * @idx: Index of the GPIO to parse ++ * @name: GPIO line name ++ * @lflags: bitmask of gpio_lookup_flags GPIO_* values - returned from ++ * of_find_gpio() or of_parse_own_gpio() ++ * @dflags: gpiod_flags - optional GPIO initialization flags ++ * ++ * Returns GPIO descriptor to use with Linux GPIO API, or one of the errno ++ * value on the error condition. ++ */ ++static struct gpio_desc *of_parse_own_gpio(struct device_node *np, ++ struct gpio_chip *chip, ++ unsigned int idx, const char **name, ++ unsigned long *lflags, ++ enum gpiod_flags *dflags) +{ -+ int err = -EINVAL; ++ struct device_node *chip_np; ++ enum of_gpio_flags xlate_flags; ++ struct of_phandle_args gpiospec; ++ struct gpio_desc *desc; ++ unsigned int i; ++ u32 tmp; ++ int ret; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ chip_np = dev_of_node(&chip->gpiodev->dev); ++ if (!chip_np) ++ return ERR_PTR(-EINVAL); + -+ WARN_ONCE(!kbdev->protected_ops, -+ "Cannot enter protected mode: protected callbacks not specified.\n"); ++ xlate_flags = 0; ++ *lflags = GPIO_LOOKUP_FLAGS_DEFAULT; ++ *dflags = GPIOD_ASIS; + -+ if (kbdev->protected_ops) { -+ /* Switch GPU to protected mode */ -+ err = kbdev->protected_ops->protected_mode_enable( -+ kbdev->protected_dev); ++ ret = of_property_read_u32(chip_np, "#gpio-cells", &tmp); ++ if (ret) ++ return ERR_PTR(ret); + -+ if (err) { -+ dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n", -+ err); -+ } else { -+ kbdev->protected_mode = true; -+ kbase_ipa_protection_mode_switch_event(kbdev); -+ } ++ gpiospec.np = chip_np; ++ gpiospec.args_count = tmp; ++ ++ for (i = 0; i < tmp; i++) { ++ ret = of_property_read_u32_index(np, "gpios", idx * tmp + i, ++ &gpiospec.args[i]); ++ if (ret) ++ return ERR_PTR(ret); + } + -+ return err; -+} ++ desc = of_xlate_and_get_gpiod_flags(chip, &gpiospec, &xlate_flags); ++ if (IS_ERR(desc)) ++ return desc; + -+static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ *lflags = of_convert_gpio_flags(xlate_flags); + -+ WARN_ONCE(!kbdev->protected_ops, -+ "Cannot exit protected mode: protected callbacks not specified.\n"); ++ if (of_property_read_bool(np, "input")) ++ *dflags |= GPIOD_IN; ++ else if (of_property_read_bool(np, "output-low")) ++ *dflags |= GPIOD_OUT_LOW; ++ else if (of_property_read_bool(np, "output-high")) ++ *dflags |= GPIOD_OUT_HIGH; ++ else { ++ pr_warn("GPIO line %d (%pOFn): no hogging state specified, bailing out\n", ++ desc_to_gpio(desc), np); ++ return ERR_PTR(-EINVAL); ++ } + -+ if (!kbdev->protected_ops) -+ return -EINVAL; ++ if (name && of_property_read_string(np, "line-name", name)) ++ *name = np->name; + -+ /* The protected mode disable callback will be called as part of reset -+ */ -+ return kbase_reset_gpu_silent(kbdev); ++ return desc; +} + -+static int kbase_jm_protected_entry(struct kbase_device *kbdev, -+ struct kbase_jd_atom **katom, int idx, int js) ++/** ++ * of_gpiochip_add_hog - Add all hogs in a hog device node ++ * @chip: gpio chip to act on ++ * @hog: device node describing the hogs ++ * ++ * Returns error if it fails otherwise 0 on success. ++ */ ++static int of_gpiochip_add_hog(struct gpio_chip *chip, struct device_node *hog) +{ -+ int err = 0; ++ enum gpiod_flags dflags; ++ struct gpio_desc *desc; ++ unsigned long lflags; ++ const char *name; ++ unsigned int i; ++ int ret; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ for (i = 0;; i++) { ++ desc = of_parse_own_gpio(hog, chip, i, &name, &lflags, &dflags); ++ if (IS_ERR(desc)) ++ break; + -+ err = kbase_gpu_protected_mode_enter(kbdev); ++ ret = gpiod_hog(desc, name, lflags, dflags); ++ if (ret < 0) ++ return ret; + -+ /* -+ * Regardless of result before this call, we are no longer -+ * transitioning the GPU. -+ */ ++#ifdef CONFIG_OF_DYNAMIC ++ desc->hog = hog; ++#endif ++ } + -+ kbdev->protected_mode_transition = false; -+ kbase_pm_protected_override_disable(kbdev); -+ kbase_pm_update_cores_state_nolock(kbdev); ++ return 0; ++} + -+ KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev); -+ if (err) { -+ /* -+ * Failed to switch into protected mode. -+ * -+ * At this point we expect: -+ * katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && -+ * katom->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_FINISHED -+ * ==> -+ * kbdev->protected_mode_hwcnt_disabled = false -+ */ -+ katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; -+ kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); -+ /* -+ * Only return if head atom or previous atom -+ * already removed - as atoms must be returned -+ * in order. -+ */ -+ if (idx == 0 || katom[0]->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { -+ kbase_gpu_dequeue_atom(kbdev, js, NULL); -+ kbase_jm_return_atom_to_js(kbdev, katom[idx]); ++/** ++ * of_gpiochip_scan_gpios - Scan gpio-controller for gpio definitions ++ * @chip: gpio chip to act on ++ * ++ * This is only used by of_gpiochip_add to request/set GPIO initial ++ * configuration. ++ * It returns error if it fails otherwise 0 on success. ++ */ ++static int of_gpiochip_scan_gpios(struct gpio_chip *chip) ++{ ++ struct device_node *np; ++ int ret; ++ ++ for_each_available_child_of_node(dev_of_node(&chip->gpiodev->dev), np) { ++ if (!of_property_read_bool(np, "gpio-hog")) ++ continue; ++ ++ ret = of_gpiochip_add_hog(chip, np); ++ if (ret < 0) { ++ of_node_put(np); ++ return ret; + } + -+ return -EINVAL; ++ of_node_set_flag(np, OF_POPULATED); + } + -+ /* -+ * Protected mode sanity checks. -+ */ -+ WARN(kbase_jd_katom_is_protected(katom[idx]) != kbase_gpu_in_protected_mode(kbdev), -+ "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", -+ kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev)); -+ katom[idx]->gpu_rb_state = -+ KBASE_ATOM_GPU_RB_READY; ++ return 0; ++} + -+ return err; ++#ifdef CONFIG_OF_DYNAMIC ++/** ++ * of_gpiochip_remove_hog - Remove all hogs in a hog device node ++ * @chip: gpio chip to act on ++ * @hog: device node describing the hogs ++ */ ++static void of_gpiochip_remove_hog(struct gpio_chip *chip, ++ struct device_node *hog) ++{ ++ struct gpio_desc *desc; ++ ++ for_each_gpio_desc_with_flag(chip, desc, FLAG_IS_HOGGED) ++ if (desc->hog == hog) ++ gpiochip_free_own_desc(desc); +} + -+static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, -+ struct kbase_jd_atom **katom, int idx, int js) ++static int of_gpiochip_match_node(struct gpio_chip *chip, void *data) +{ -+ int err = 0; ++ return device_match_of_node(&chip->gpiodev->dev, data); ++} + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++static struct gpio_chip *of_find_gpiochip_by_node(struct device_node *np) ++{ ++ return gpiochip_find(np, of_gpiochip_match_node); ++} + -+ switch (katom[idx]->protected_state.enter) { -+ case KBASE_ATOM_ENTER_PROTECTED_CHECK: -+ KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev, kbdev); -+ /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV -+ * should ensure that we are not already transitiong, and that -+ * there are no atoms currently on the GPU. -+ */ -+ WARN_ON(kbdev->protected_mode_transition); -+ WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); -+ /* If hwcnt is disabled, it means we didn't clean up correctly -+ * during last exit from protected mode. -+ */ -+ WARN_ON(kbdev->protected_mode_hwcnt_disabled); ++static int of_gpio_notify(struct notifier_block *nb, unsigned long action, ++ void *arg) ++{ ++ struct of_reconfig_data *rd = arg; ++ struct gpio_chip *chip; ++ int ret; + -+ katom[idx]->protected_state.enter = -+ KBASE_ATOM_ENTER_PROTECTED_HWCNT; ++ /* ++ * This only supports adding and removing complete gpio-hog nodes. ++ * Modifying an existing gpio-hog node is not supported (except for ++ * changing its "status" property, which is treated the same as ++ * addition/removal). ++ */ ++ switch (of_reconfig_get_state_change(action, arg)) { ++ case OF_RECONFIG_CHANGE_ADD: ++ if (!of_property_read_bool(rd->dn, "gpio-hog")) ++ return NOTIFY_OK; /* not for us */ + -+ kbdev->protected_mode_transition = true; ++ if (of_node_test_and_set_flag(rd->dn, OF_POPULATED)) ++ return NOTIFY_OK; + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ fallthrough; -+ case KBASE_ATOM_ENTER_PROTECTED_HWCNT: -+ /* See if we can get away with disabling hwcnt atomically */ -+ kbdev->protected_mode_hwcnt_desired = false; -+ if (!kbdev->protected_mode_hwcnt_disabled) { -+ if (kbase_hwcnt_context_disable_atomic( -+ kbdev->hwcnt_gpu_ctx)) -+ kbdev->protected_mode_hwcnt_disabled = true; -+ } ++ chip = of_find_gpiochip_by_node(rd->dn->parent); ++ if (chip == NULL) ++ return NOTIFY_OK; /* not for us */ + -+ /* We couldn't disable atomically, so kick off a worker */ -+ if (!kbdev->protected_mode_hwcnt_disabled) { -+ kbase_hwcnt_context_queue_work( -+ kbdev->hwcnt_gpu_ctx, -+ &kbdev->protected_mode_hwcnt_disable_work); -+ return -EAGAIN; ++ ret = of_gpiochip_add_hog(chip, rd->dn); ++ if (ret < 0) { ++ pr_err("%s: failed to add hogs for %pOF\n", __func__, ++ rd->dn); ++ of_node_clear_flag(rd->dn, OF_POPULATED); ++ return notifier_from_errno(ret); + } ++ break; + -+ /* Once reaching this point GPU must be switched to protected -+ * mode or hwcnt re-enabled. -+ */ ++ case OF_RECONFIG_CHANGE_REMOVE: ++ if (!of_node_check_flag(rd->dn, OF_POPULATED)) ++ return NOTIFY_OK; /* already depopulated */ + -+ if (kbase_pm_protected_entry_override_enable(kbdev)) -+ return -EAGAIN; ++ chip = of_find_gpiochip_by_node(rd->dn->parent); ++ if (chip == NULL) ++ return NOTIFY_OK; /* not for us */ + -+ /* -+ * Not in correct mode, begin protected mode switch. -+ * Entering protected mode requires us to power down the L2, -+ * and drop out of fully coherent mode. -+ */ -+ katom[idx]->protected_state.enter = -+ KBASE_ATOM_ENTER_PROTECTED_IDLE_L2; ++ of_gpiochip_remove_hog(chip, rd->dn); ++ of_node_clear_flag(rd->dn, OF_POPULATED); ++ break; ++ } + -+ kbase_pm_protected_override_enable(kbdev); -+ /* -+ * Only if the GPU reset hasn't been initiated, there is a need -+ * to invoke the state machine to explicitly power down the -+ * shader cores and L2. -+ */ -+ if (!kbdev->pm.backend.protected_entry_transition_override) -+ kbase_pm_update_cores_state_nolock(kbdev); ++ return NOTIFY_OK; ++} + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ fallthrough; -+ case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: -+ /* Avoid unnecessary waiting on non-ACE platforms. */ -+ if (kbdev->system_coherency == COHERENCY_ACE) { -+ if (kbdev->pm.backend.l2_always_on) { -+ /* -+ * If the GPU reset hasn't completed, then L2 -+ * could still be powered up. -+ */ -+ if (kbase_reset_gpu_is_active(kbdev)) -+ return -EAGAIN; -+ } ++struct notifier_block gpio_of_notifier = { ++ .notifier_call = of_gpio_notify, ++}; ++#endif /* CONFIG_OF_DYNAMIC */ + -+ if (kbase_pm_get_ready_cores(kbdev, -+ KBASE_PM_CORE_L2) || -+ kbase_pm_get_trans_cores(kbdev, -+ KBASE_PM_CORE_L2) || -+ kbase_is_gpu_removed(kbdev)) { -+ /* -+ * The L2 is still powered, wait for all -+ * the users to finish with it before doing -+ * the actual reset. -+ */ -+ return -EAGAIN; -+ } -+ } ++/** ++ * of_gpio_simple_xlate - translate gpiospec to the GPIO number and flags ++ * @gc: pointer to the gpio_chip structure ++ * @gpiospec: GPIO specifier as found in the device tree ++ * @flags: a flags pointer to fill in ++ * ++ * This is simple translation function, suitable for the most 1:1 mapped ++ * GPIO chips. This function performs only one sanity check: whether GPIO ++ * is less than ngpios (that is specified in the gpio_chip). ++ */ ++static int of_gpio_simple_xlate(struct gpio_chip *gc, ++ const struct of_phandle_args *gpiospec, ++ u32 *flags) ++{ ++ /* ++ * We're discouraging gpio_cells < 2, since that way you'll have to ++ * write your own xlate function (that will have to retrieve the GPIO ++ * number and the flags from a single gpio cell -- this is possible, ++ * but not recommended). ++ */ ++ if (gc->of_gpio_n_cells < 2) { ++ WARN_ON(1); ++ return -EINVAL; ++ } + -+ katom[idx]->protected_state.enter = -+ KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY; ++ if (WARN_ON(gpiospec->args_count < gc->of_gpio_n_cells)) ++ return -EINVAL; + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ fallthrough; -+ case KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: -+ /* -+ * When entering into protected mode, we must ensure that the -+ * GPU is not operating in coherent mode as well. This is to -+ * ensure that no protected memory can be leaked. -+ */ -+ kbase_gpu_disable_coherent(kbdev); ++ if (gpiospec->args[0] >= gc->ngpio) ++ return -EINVAL; + -+ kbase_pm_protected_entry_override_disable(kbdev); ++ if (flags) ++ *flags = gpiospec->args[1]; + -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { -+ /* -+ * Power on L2 caches; this will also result in the -+ * correct value written to coherency enable register. -+ */ -+ kbase_pm_protected_l2_override(kbdev, true); ++ return gpiospec->args[0]; ++} + -+ /* -+ * Set the flag on the atom that additional -+ * L2 references are taken. -+ */ -+ katom[idx]->atom_flags |= -+ KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; -+ } ++#if IS_ENABLED(CONFIG_OF_GPIO_MM_GPIOCHIP) ++#include ++/** ++ * of_mm_gpiochip_add_data - Add memory mapped GPIO chip (bank) ++ * @np: device node of the GPIO chip ++ * @mm_gc: pointer to the of_mm_gpio_chip allocated structure ++ * @data: driver data to store in the struct gpio_chip ++ * ++ * To use this function you should allocate and fill mm_gc with: ++ * ++ * 1) In the gpio_chip structure: ++ * - all the callbacks ++ * - of_gpio_n_cells ++ * - of_xlate callback (optional) ++ * ++ * 3) In the of_mm_gpio_chip structure: ++ * - save_regs callback (optional) ++ * ++ * If succeeded, this function will map bank's memory and will ++ * do all necessary work for you. Then you'll able to use .regs ++ * to manage GPIOs from the callbacks. ++ */ ++int of_mm_gpiochip_add_data(struct device_node *np, ++ struct of_mm_gpio_chip *mm_gc, ++ void *data) ++{ ++ int ret = -ENOMEM; ++ struct gpio_chip *gc = &mm_gc->gc; + -+ katom[idx]->protected_state.enter = -+ KBASE_ATOM_ENTER_PROTECTED_FINISHED; ++ gc->label = kasprintf(GFP_KERNEL, "%pOF", np); ++ if (!gc->label) ++ goto err0; + -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) -+ return -EAGAIN; ++ mm_gc->regs = of_iomap(np, 0); ++ if (!mm_gc->regs) ++ goto err1; + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ fallthrough; -+ case KBASE_ATOM_ENTER_PROTECTED_FINISHED: -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { -+ /* -+ * Check that L2 caches are powered and, if so, -+ * enter protected mode. -+ */ -+ if (kbdev->pm.backend.l2_state == KBASE_L2_ON) { -+ /* -+ * Remove additional L2 reference and reset -+ * the atom flag which denotes it. -+ */ -+ if (katom[idx]->atom_flags & -+ KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { -+ kbase_pm_protected_l2_override(kbdev, -+ false); -+ katom[idx]->atom_flags &= -+ ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; -+ } ++ gc->base = -1; + -+ err = kbase_jm_protected_entry(kbdev, katom, idx, js); ++ if (mm_gc->save_regs) ++ mm_gc->save_regs(mm_gc); + -+ if (err) -+ return err; -+ } else { -+ /* -+ * still waiting for L2 caches to power up -+ */ -+ return -EAGAIN; -+ } -+ } else { -+ err = kbase_jm_protected_entry(kbdev, katom, idx, js); ++ fwnode_handle_put(mm_gc->gc.fwnode); ++ mm_gc->gc.fwnode = fwnode_handle_get(of_fwnode_handle(np)); + -+ if (err) -+ return err; -+ } -+ } ++ ret = gpiochip_add_data(gc, data); ++ if (ret) ++ goto err2; + + return 0; ++err2: ++ of_node_put(np); ++ iounmap(mm_gc->regs); ++err1: ++ kfree(gc->label); ++err0: ++ pr_err("%pOF: GPIO chip registration failed with status %d\n", np, ret); ++ return ret; +} ++EXPORT_SYMBOL_GPL(of_mm_gpiochip_add_data); + -+static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, -+ struct kbase_jd_atom **katom, int idx, int js) ++/** ++ * of_mm_gpiochip_remove - Remove memory mapped GPIO chip (bank) ++ * @mm_gc: pointer to the of_mm_gpio_chip allocated structure ++ */ ++void of_mm_gpiochip_remove(struct of_mm_gpio_chip *mm_gc) +{ -+ int err = 0; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ switch (katom[idx]->protected_state.exit) { -+ case KBASE_ATOM_EXIT_PROTECTED_CHECK: -+ KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev, kbdev); -+ /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV -+ * should ensure that we are not already transitiong, and that -+ * there are no atoms currently on the GPU. -+ */ -+ WARN_ON(kbdev->protected_mode_transition); -+ WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); -+ -+ /* -+ * Exiting protected mode requires a reset, but first the L2 -+ * needs to be powered down to ensure it's not active when the -+ * reset is issued. -+ */ -+ katom[idx]->protected_state.exit = -+ KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; ++ struct gpio_chip *gc = &mm_gc->gc; + -+ kbdev->protected_mode_transition = true; -+ kbase_pm_protected_override_enable(kbdev); -+ kbase_pm_update_cores_state_nolock(kbdev); ++ gpiochip_remove(gc); ++ iounmap(mm_gc->regs); ++ kfree(gc->label); ++} ++EXPORT_SYMBOL_GPL(of_mm_gpiochip_remove); ++#endif + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ fallthrough; -+ case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: -+ if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) { -+ /* -+ * The L2 is still powered, wait for all the users to -+ * finish with it before doing the actual reset. -+ */ -+ return -EAGAIN; -+ } -+ katom[idx]->protected_state.exit = -+ KBASE_ATOM_EXIT_PROTECTED_RESET; ++#ifdef CONFIG_PINCTRL ++static int of_gpiochip_add_pin_range(struct gpio_chip *chip) ++{ ++ struct of_phandle_args pinspec; ++ struct pinctrl_dev *pctldev; ++ struct device_node *np; ++ int index = 0, ret; ++ const char *name; ++ static const char group_names_propname[] = "gpio-ranges-group-names"; ++ struct property *group_names; + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ fallthrough; -+ case KBASE_ATOM_EXIT_PROTECTED_RESET: -+ /* L2 cache has been turned off (which is needed prior to the reset of GPU -+ * to exit the protected mode), so the override flag can be safely cleared. -+ * Even if L2 cache is powered up again before the actual reset, it should -+ * not be an issue (there are no jobs running on the GPU). -+ */ -+ kbase_pm_protected_override_disable(kbdev); ++ np = dev_of_node(&chip->gpiodev->dev); ++ if (!np) ++ return 0; + -+ /* Issue the reset to the GPU */ -+ err = kbase_gpu_protected_mode_reset(kbdev); ++ group_names = of_find_property(np, group_names_propname, NULL); + -+ if (err == -EAGAIN) -+ return -EAGAIN; ++ for (;; index++) { ++ ret = of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, ++ index, &pinspec); ++ if (ret) ++ break; + -+ if (err) { -+ kbdev->protected_mode_transition = false; ++ pctldev = of_pinctrl_get(pinspec.np); ++ of_node_put(pinspec.np); ++ if (!pctldev) ++ return -EPROBE_DEFER; + -+ /* Failed to exit protected mode, fail atom */ -+ katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; -+ kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); -+ /* Only return if head atom or previous atom -+ * already removed - as atoms must be returned in order -+ */ -+ if (idx == 0 || katom[0]->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { -+ kbase_gpu_dequeue_atom(kbdev, js, NULL); -+ kbase_jm_return_atom_to_js(kbdev, katom[idx]); ++ if (pinspec.args[2]) { ++ if (group_names) { ++ of_property_read_string_index(np, ++ group_names_propname, ++ index, &name); ++ if (strlen(name)) { ++ pr_err("%pOF: Group name of numeric GPIO ranges must be the empty string.\n", ++ np); ++ break; ++ } ++ } ++ /* npins != 0: linear range */ ++ ret = gpiochip_add_pin_range(chip, ++ pinctrl_dev_get_devname(pctldev), ++ pinspec.args[0], ++ pinspec.args[1], ++ pinspec.args[2]); ++ if (ret) ++ return ret; ++ } else { ++ /* npins == 0: special range */ ++ if (pinspec.args[1]) { ++ pr_err("%pOF: Illegal gpio-range format.\n", ++ np); ++ break; + } + -+ /* If we're exiting from protected mode, hwcnt must have -+ * been disabled during entry. -+ */ -+ WARN_ON(!kbdev->protected_mode_hwcnt_disabled); -+ kbdev->protected_mode_hwcnt_desired = true; -+ if (kbdev->protected_mode_hwcnt_disabled) { -+ kbase_hwcnt_context_enable( -+ kbdev->hwcnt_gpu_ctx); -+ kbdev->protected_mode_hwcnt_disabled = false; ++ if (!group_names) { ++ pr_err("%pOF: GPIO group range requested but no %s property.\n", ++ np, group_names_propname); ++ break; + } + -+ return -EINVAL; -+ } ++ ret = of_property_read_string_index(np, ++ group_names_propname, ++ index, &name); ++ if (ret) ++ break; + -+ katom[idx]->protected_state.exit = -+ KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; ++ if (!strlen(name)) { ++ pr_err("%pOF: Group name of GPIO group range cannot be the empty string.\n", ++ np); ++ break; ++ } + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ fallthrough; -+ case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: -+ /* A GPU reset is issued when exiting protected mode. Once the -+ * reset is done all atoms' state will also be reset. For this -+ * reason, if the atom is still in this state we can safely -+ * say that the reset has not completed i.e., we have not -+ * finished exiting protected mode yet. -+ */ -+ return -EAGAIN; ++ ret = gpiochip_add_pingroup_range(chip, pctldev, ++ pinspec.args[0], name); ++ if (ret) ++ return ret; ++ } + } + + return 0; +} + -+void kbase_backend_slot_update(struct kbase_device *kbdev) -+{ -+ unsigned int js; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ if (kbase_reset_gpu_is_active(kbdev) || -+ kbase_is_gpu_removed(kbdev)) +#else -+ if (kbase_reset_gpu_is_active(kbdev)) ++static int of_gpiochip_add_pin_range(struct gpio_chip *chip) { return 0; } +#endif -+ return; -+ -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ struct kbase_jd_atom *katom[2]; -+ int idx; -+ -+ katom[0] = kbase_gpu_inspect(kbdev, js, 0); -+ katom[1] = kbase_gpu_inspect(kbdev, js, 1); -+ WARN_ON(katom[1] && !katom[0]); + -+ for (idx = 0; idx < SLOT_RB_SIZE; idx++) { -+ bool cores_ready; -+ int ret; ++int of_gpiochip_add(struct gpio_chip *chip) ++{ ++ struct device_node *np; ++ int ret; + -+ if (!katom[idx]) -+ continue; ++ np = dev_of_node(&chip->gpiodev->dev); ++ if (!np) ++ return 0; + -+ switch (katom[idx]->gpu_rb_state) { -+ case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: -+ /* Should be impossible */ -+ WARN(1, "Attempting to update atom not in ringbuffer\n"); -+ break; ++ if (!chip->of_xlate) { ++ chip->of_gpio_n_cells = 2; ++ chip->of_xlate = of_gpio_simple_xlate; ++ } + -+ case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: -+ if (kbase_js_atom_blocked_on_x_dep(katom[idx])) -+ break; ++ if (chip->of_gpio_n_cells > MAX_PHANDLE_ARGS) ++ return -EINVAL; + -+ katom[idx]->gpu_rb_state = -+ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV; ++ ret = of_gpiochip_add_pin_range(chip); ++ if (ret) ++ return ret; + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ fallthrough; -+ case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: -+ if (kbase_gpu_check_secure_atoms(kbdev, -+ !kbase_jd_katom_is_protected( -+ katom[idx]))) -+ break; ++ of_node_get(np); + -+ if ((idx == 1) && (kbase_jd_katom_is_protected( -+ katom[0]) != -+ kbase_jd_katom_is_protected( -+ katom[1]))) -+ break; ++ ret = of_gpiochip_scan_gpios(chip); ++ if (ret) ++ of_node_put(np); + -+ if (kbdev->protected_mode_transition) -+ break; ++ return ret; ++} + -+ katom[idx]->gpu_rb_state = -+ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION; ++void of_gpiochip_remove(struct gpio_chip *chip) ++{ ++ of_node_put(dev_of_node(&chip->gpiodev->dev)); ++} +diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile +index 8997f0096..b37172366 100644 +--- a/drivers/gpu/Makefile ++++ b/drivers/gpu/Makefile +@@ -3,5 +3,6 @@ + # taken to initialize them in the correct order. Link order is the only way + # to ensure this currently. + obj-y += host1x/ drm/ vga/ ++obj-y += arm/ + obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/ + obj-$(CONFIG_TRACE_GPU_MEM) += trace/ +diff --git a/drivers/gpu/arm/Kbuild b/drivers/gpu/arm/Kbuild +new file mode 100755 +index 000000000..f747fc889 +--- /dev/null ++++ b/drivers/gpu/arm/Kbuild +@@ -0,0 +1,25 @@ ++# SPDX-License-Identifier: GPL-2.0 ++# ++# (C) COPYRIGHT 2012, 2020 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ fallthrough; -+ case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: ++obj-$(CONFIG_MALI_MIDGARD) += midgard/ + -+ /* -+ * Exiting protected mode must be done before -+ * the references on the cores are taken as -+ * a power down the L2 is required which -+ * can't happen after the references for this -+ * atom are taken. -+ */ ++obj-$(CONFIG_MALI400) += mali400/ + -+ if (!kbase_gpu_in_protected_mode(kbdev) && -+ kbase_jd_katom_is_protected(katom[idx])) { -+ /* Atom needs to transition into protected mode. */ -+ ret = kbase_jm_enter_protected_mode(kbdev, -+ katom, idx, js); -+ if (ret) -+ break; -+ } else if (kbase_gpu_in_protected_mode(kbdev) && -+ !kbase_jd_katom_is_protected(katom[idx])) { -+ /* Atom needs to transition out of protected mode. */ -+ ret = kbase_jm_exit_protected_mode(kbdev, -+ katom, idx, js); -+ if (ret) -+ break; -+ } -+ katom[idx]->protected_state.exit = -+ KBASE_ATOM_EXIT_PROTECTED_CHECK; ++obj-$(CONFIG_MALI_BIFROST) += bifrost/ +diff --git a/drivers/gpu/arm/Kconfig b/drivers/gpu/arm/Kconfig +new file mode 100644 +index 000000000..398a8e50a +--- /dev/null ++++ b/drivers/gpu/arm/Kconfig +@@ -0,0 +1,25 @@ ++# SPDX-License-Identifier: GPL-2.0 ++# ++# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# ++# ++source "drivers/gpu/arm/mali400/mali/Kconfig" + -+ /* Atom needs no protected mode transition. */ ++source "drivers/gpu/arm/midgard/Kconfig" + -+ katom[idx]->gpu_rb_state = -+ KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; ++source "drivers/gpu/arm/bifrost/Kconfig" +diff --git a/drivers/gpu/arm/bifrost/Kbuild b/drivers/gpu/arm/bifrost/Kbuild +new file mode 100755 +index 000000000..9cadda188 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/Kbuild +@@ -0,0 +1,243 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ fallthrough; -+ case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: -+ if (katom[idx]->will_fail_event_code) { -+ kbase_gpu_mark_atom_for_return(kbdev, -+ katom[idx]); -+ /* Set EVENT_DONE so this atom will be -+ * completed, not unpulled. -+ */ -+ katom[idx]->event_code = -+ BASE_JD_EVENT_DONE; -+ /* Only return if head atom or previous -+ * atom already removed - as atoms must -+ * be returned in order. -+ */ -+ if (idx == 0 || katom[0]->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { -+ kbase_gpu_dequeue_atom(kbdev, js, NULL); -+ kbase_jm_return_atom_to_js(kbdev, katom[idx]); -+ } -+ break; -+ } ++# make $(src) as absolute path if it is not already, by prefixing $(srctree) ++# This is to prevent any build issue due to wrong path. ++src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) + -+ cores_ready = kbase_pm_cores_requested(kbdev, -+ true); ++# ++# Prevent misuse when Kernel configurations are not present by default ++# in out-of-tree builds ++# ++ifneq ($(CONFIG_ANDROID),n) ++ifeq ($(CONFIG_GPU_TRACEPOINTS),n) ++ $(error CONFIG_GPU_TRACEPOINTS must be set in Kernel configuration) ++endif ++endif + -+ if (!cores_ready) -+ break; ++ifeq ($(CONFIG_DMA_SHARED_BUFFER),n) ++ $(error CONFIG_DMA_SHARED_BUFFER must be set in Kernel configuration) ++endif + -+ katom[idx]->gpu_rb_state = -+ KBASE_ATOM_GPU_RB_READY; ++ifeq ($(CONFIG_PM_DEVFREQ),n) ++ $(error CONFIG_PM_DEVFREQ must be set in Kernel configuration) ++endif + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ fallthrough; -+ case KBASE_ATOM_GPU_RB_READY: ++ifeq ($(CONFIG_DEVFREQ_THERMAL),n) ++ $(error CONFIG_DEVFREQ_THERMAL must be set in Kernel configuration) ++endif + -+ if (idx == 1) { -+ /* Only submit if head atom or previous -+ * atom already submitted -+ */ -+ if ((katom[0]->gpu_rb_state != -+ KBASE_ATOM_GPU_RB_SUBMITTED && -+ katom[0]->gpu_rb_state != -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) -+ break; ++ifeq ($(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND),n) ++ $(error CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND must be set in Kernel configuration) ++endif + -+ /* If intra-slot serialization in use -+ * then don't submit atom to NEXT slot -+ */ -+ if (kbdev->serialize_jobs & -+ KBASE_SERIALIZE_INTRA_SLOT) -+ break; -+ } ++ifeq ($(CONFIG_FW_LOADER), n) ++ $(error CONFIG_FW_LOADER must be set in Kernel configuration) ++endif + -+ /* If inter-slot serialization in use then don't -+ * submit atom if any other slots are in use -+ */ -+ if ((kbdev->serialize_jobs & -+ KBASE_SERIALIZE_INTER_SLOT) && -+ other_slots_busy(kbdev, js)) -+ break; ++ifeq ($(CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS), y) ++ ifneq ($(CONFIG_DEBUG_FS), y) ++ $(error CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS depends on CONFIG_DEBUG_FS to be set in Kernel configuration) ++ endif ++endif + -+ /* Check if this job needs the cycle counter -+ * enabled before submission -+ */ -+ if (katom[idx]->core_req & BASE_JD_REQ_PERMON) -+ kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev); ++ifeq ($(CONFIG_MALI_BIFROST_FENCE_DEBUG), y) ++ ifneq ($(CONFIG_SYNC_FILE), y) ++ $(error CONFIG_MALI_BIFROST_FENCE_DEBUG depends on CONFIG_SYNC_FILE to be set in Kernel configuration) ++ endif ++endif + -+ if (!kbase_job_hw_submit(kbdev, katom[idx], js)) { -+ katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED; ++# ++# Configurations ++# + -+ /* Inform power management at start/finish of -+ * atom so it can update its GPU utilisation -+ * metrics. -+ */ -+ kbase_pm_metrics_update(kbdev, -+ &katom[idx]->start_timestamp); ++# Driver version string which is returned to userspace via an ioctl ++MALI_RELEASE_NAME ?= '"g18p0-01eac0"' ++# Set up defaults if not defined by build system ++ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y) ++ MALI_UNIT_TEST = 1 ++ MALI_CUSTOMER_RELEASE ?= 0 ++else ++ MALI_UNIT_TEST ?= 0 ++ MALI_CUSTOMER_RELEASE ?= 1 ++endif ++MALI_COVERAGE ?= 0 + -+ /* Inform platform at start/finish of atom */ -+ kbasep_platform_event_atom_submit(katom[idx]); -+ } else { -+ if (katom[idx]->core_req & BASE_JD_REQ_PERMON) -+ kbase_pm_release_gpu_cycle_counter_nolock(kbdev); ++# Kconfig passes in the name with quotes for in-tree builds - remove them. ++MALI_PLATFORM_DIR := $(shell echo $(CONFIG_MALI_PLATFORM_NAME)) + -+ break; -+ } ++ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) ++ MALI_JIT_PRESSURE_LIMIT_BASE = 0 ++ MALI_USE_CSF = 1 ++else ++ MALI_JIT_PRESSURE_LIMIT_BASE ?= 1 ++ MALI_USE_CSF ?= 0 ++endif + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ fallthrough; -+ case KBASE_ATOM_GPU_RB_SUBMITTED: -+ break; + -+ case KBASE_ATOM_GPU_RB_RETURN_TO_JS: -+ /* Only return if head atom or previous atom -+ * already removed - as atoms must be returned -+ * in order -+ */ -+ if (idx == 0 || katom[0]->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { -+ kbase_gpu_dequeue_atom(kbdev, js, NULL); -+ kbase_jm_return_atom_to_js(kbdev, -+ katom[idx]); -+ } -+ break; -+ } -+ } -+ } -+} ++ifneq ($(CONFIG_MALI_KUTF), n) ++ MALI_KERNEL_TEST_API ?= 1 ++else ++ MALI_KERNEL_TEST_API ?= 0 ++endif + ++# Experimental features (corresponding -D definition should be appended to ++# ccflags-y below, e.g. for MALI_EXPERIMENTAL_FEATURE, ++# -DMALI_EXPERIMENTAL_FEATURE=$(MALI_EXPERIMENTAL_FEATURE) should be appended) ++# ++# Experimental features must default to disabled, e.g.: ++# MALI_EXPERIMENTAL_FEATURE ?= 0 ++MALI_INCREMENTAL_RENDERING_JM ?= 0 + -+void kbase_backend_run_atom(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ dev_dbg(kbdev->dev, "Backend running atom %pK\n", (void *)katom); ++# ++# ccflags ++# ++ccflags-y = \ ++ -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ ++ -DMALI_USE_CSF=$(MALI_USE_CSF) \ ++ -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \ ++ -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ ++ -DMALI_COVERAGE=$(MALI_COVERAGE) \ ++ -DMALI_RELEASE_NAME=$(MALI_RELEASE_NAME) \ ++ -DMALI_JIT_PRESSURE_LIMIT_BASE=$(MALI_JIT_PRESSURE_LIMIT_BASE) \ ++ -DMALI_INCREMENTAL_RENDERING_JM=$(MALI_INCREMENTAL_RENDERING_JM) \ ++ -DMALI_PLATFORM_DIR=$(MALI_PLATFORM_DIR) + -+ kbase_gpu_enqueue_atom(kbdev, katom); -+ kbase_backend_slot_update(kbdev); -+} + -+/** -+ * kbase_rb_atom_might_depend - determine if one atom in the slot ringbuffer -+ * might depend on another from the same kctx -+ * @katom_a: dependee atom -+ * @katom_b: atom to query -+ * -+ * This can be used on atoms that belong to different slot ringbuffers -+ * -+ * Return: true if @katom_b might depend on @katom_a, false if it cannot depend. -+ */ -+static inline bool -+kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a, -+ const struct kbase_jd_atom *katom_b) -+{ -+ if (katom_a->kctx != katom_b->kctx) -+ return false; -+ return (katom_b->pre_dep || -+ (katom_b->atom_flags & (KBASE_KATOM_FLAG_X_DEP_BLOCKED | -+ KBASE_KATOM_FLAG_FAIL_BLOCKER))); -+} ++ifeq ($(KBUILD_EXTMOD),) ++# in-tree ++ ccflags-y +=-DMALI_KBASE_PLATFORM_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME) ++else ++# out-of-tree ++ ccflags-y +=-DMALI_KBASE_PLATFORM_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME) ++endif + -+/** -+ * kbase_gpu_irq_evict - evict a slot's JSn_HEAD_NEXT atom from the HW if it is -+ * related to a failed JSn_HEAD atom -+ * @kbdev: kbase device -+ * @js: job slot to check -+ * @completion_code: completion code of the failed atom -+ * -+ * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but -+ * unlike other failure codes we _can_ re-run them. -+ * -+ * This forms step 1 in a 2-step process of removing any related atoms from a -+ * slot's JSn_HEAD_NEXT (ringbuffer index 1), should there have -+ * been a 'failure' on an atom in JSn_HEAD (ringbuffer index 0). -+ * -+ * This step only removes the atoms from the HW, and marks them as -+ * (potentially) ready to run again. -+ * -+ * Step 2 is on marking the JSn_HEAD atom as complete -+ * (kbase_gpu_complete_hw()), to dequeue said atoms and return them to the JS -+ * as appropriate, or re-submit them. -+ * -+ * Hence, this function must evict at a minimum the atoms related to the atom -+ * in JSn_HEAD that kbase_gpu_complete_hw() will also dequeue. It is acceptable -+ * if this function evicts more atoms than kbase_gpu_complete_hw() dequeues, as -+ * the next kbase_backend_slot_update() will resubmit any remaining. -+ * -+ * Return: true if an atom was evicted, false otherwise. -+ */ -+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code) -+{ -+ struct kbase_jd_atom *katom; -+ struct kbase_jd_atom *next_katom; ++ccflags-y += \ ++ -I$(srctree)/include/linux \ ++ -I$(srctree)/drivers/staging/android \ ++ -I$(src) \ ++ -I$(src)/platform/$(MALI_PLATFORM_DIR) \ ++ -I$(src)/../../../base \ ++ -I$(src)/../../../../include + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++subdir-ccflags-y += $(ccflags-y) + -+ katom = kbase_gpu_inspect(kbdev, js, 0); -+ if (!katom) { -+ dev_err(kbdev->dev, "Can't get a katom from js(%u)\n", js); -+ return false; -+ } -+ next_katom = kbase_gpu_inspect(kbdev, js, 1); ++# ++# Kernel Modules ++# ++obj-$(CONFIG_MALI_BIFROST) += bifrost_kbase.o ++obj-$(CONFIG_MALI_KUTF) += tests/ + -+ if (next_katom && -+ next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED && -+ (kbase_rb_atom_might_depend(katom, next_katom) || -+ kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) && -+ (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO)) != 0 || -+ kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) != 0)) { -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), -+ JS_COMMAND_NOP); -+ next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; ++bifrost_kbase-y := \ ++ mali_kbase_cache_policy.o \ ++ mali_kbase_ccswe.o \ ++ mali_kbase_mem.o \ ++ mali_kbase_mem_migrate.o \ ++ mali_kbase_mem_pool_group.o \ ++ mali_kbase_native_mgm.o \ ++ mali_kbase_ctx_sched.o \ ++ mali_kbase_gpuprops.o \ ++ mali_kbase_pm.o \ ++ mali_kbase_config.o \ ++ mali_kbase_kinstr_prfcnt.o \ ++ mali_kbase_vinstr.o \ ++ mali_kbase_softjobs.o \ ++ mali_kbase_hw.o \ ++ mali_kbase_debug.o \ ++ mali_kbase_gpu_memory_debugfs.o \ ++ mali_kbase_mem_linux.o \ ++ mali_kbase_core_linux.o \ ++ mali_kbase_mem_profile_debugfs.o \ ++ mali_kbase_disjoint_events.o \ ++ mali_kbase_debug_mem_view.o \ ++ mali_kbase_debug_mem_zones.o \ ++ mali_kbase_debug_mem_allocs.o \ ++ mali_kbase_smc.o \ ++ mali_kbase_mem_pool.o \ ++ mali_kbase_mem_pool_debugfs.o \ ++ mali_kbase_debugfs_helper.o \ ++ mali_kbase_strings.o \ ++ mali_kbase_as_fault_debugfs.o \ ++ mali_kbase_regs_history_debugfs.o \ ++ mali_kbase_dvfs_debugfs.o \ ++ mali_power_gpu_frequency_trace.o \ ++ mali_kbase_trace_gpu_mem.o \ ++ mali_kbase_pbha.o + -+ if (completion_code == BASE_JD_EVENT_STOPPED) { -+ KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, next_katom, -+ &kbdev->gpu_props.props.raw_props.js_features -+ [next_katom->slot_nr]); -+ KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, next_katom, &kbdev->as -+ [next_katom->kctx->as_nr]); -+ KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, next_katom->kctx, -+ &kbdev->gpu_props.props.raw_props.js_features -+ [next_katom->slot_nr]); -+ } ++bifrost_kbase-$(CONFIG_DEBUG_FS) += mali_kbase_pbha_debugfs.o + -+ if (next_katom->core_req & BASE_JD_REQ_PERMON) -+ kbase_pm_release_gpu_cycle_counter_nolock(kbdev); ++bifrost_kbase-$(CONFIG_MALI_CINSTR_GWT) += mali_kbase_gwt.o + -+ /* On evicting the next_katom, the last submission kctx on the -+ * given job slot then reverts back to the one that owns katom. -+ * The aim is to enable the next submission that can determine -+ * if the read only shader core L1 cache should be invalidated. -+ */ -+ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = -+ SLOT_RB_TAG_KCTX(katom->kctx); ++bifrost_kbase-$(CONFIG_SYNC_FILE) += \ ++ mali_kbase_fence_ops.o \ ++ mali_kbase_sync_file.o \ ++ mali_kbase_sync_common.o + -+ return true; -+ } ++ifneq ($(CONFIG_MALI_CSF_SUPPORT),y) ++ bifrost_kbase-y += \ ++ mali_kbase_jm.o \ ++ mali_kbase_dummy_job_wa.o \ ++ mali_kbase_debug_job_fault.o \ ++ mali_kbase_event.o \ ++ mali_kbase_jd.o \ ++ mali_kbase_jd_debugfs.o \ ++ mali_kbase_js.o \ ++ mali_kbase_js_ctx_attr.o \ ++ mali_kbase_kinstr_jm.o + -+ return false; -+} ++ bifrost_kbase-$(CONFIG_SYNC_FILE) += \ ++ mali_kbase_fence_ops.o \ ++ mali_kbase_fence.o ++endif + -+/** -+ * kbase_gpu_complete_hw - complete the atom in a slot's JSn_HEAD -+ * @kbdev: kbase device -+ * @js: job slot to check -+ * @completion_code: completion code of the completed atom -+ * @job_tail: value read from JSn_TAIL, for STOPPED atoms -+ * @end_timestamp: pointer to approximate ktime value when the katom completed -+ * -+ * Among other operations, this also executes step 2 of a 2-step process of -+ * removing any related atoms from a slot's JSn_HEAD_NEXT (ringbuffer index 1), -+ * should there have been a 'failure' on an atom in JSn_HEAD (ringbuffer index -+ * 0). The first step is done in kbase_gpu_irq_evict(). -+ * -+ * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but -+ * unlike other failure codes we _can_ re-run them. -+ * -+ * When the JSn_HEAD atom is considered to be 'failed', then this will dequeue -+ * and return to the JS some (usually all) of the atoms evicted from the HW -+ * during the kbase_gpu_irq_evict() for that JSn_HEAD atom. If it dequeues an -+ * atom, that atom must not have been running or must already be evicted, as -+ * otherwise we would be in the incorrect state of having an atom both running -+ * on the HW and returned to the JS. -+ */ + -+void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code, -+ u64 job_tail, ktime_t *end_timestamp) -+{ -+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); -+ struct kbase_context *kctx = NULL; ++INCLUDE_SUBDIR = \ ++ $(src)/context/Kbuild \ ++ $(src)/debug/Kbuild \ ++ $(src)/device/Kbuild \ ++ $(src)/backend/gpu/Kbuild \ ++ $(src)/mmu/Kbuild \ ++ $(src)/tl/Kbuild \ ++ $(src)/hwcnt/Kbuild \ ++ $(src)/gpu/Kbuild \ ++ $(src)/thirdparty/Kbuild \ ++ $(src)/platform/$(MALI_PLATFORM_DIR)/Kbuild + -+ if (unlikely(!katom)) { -+ dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js); -+ return; -+ } ++ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) ++ INCLUDE_SUBDIR += $(src)/csf/Kbuild ++endif + -+ kctx = katom->kctx; ++ifeq ($(CONFIG_MALI_ARBITER_SUPPORT),y) ++ INCLUDE_SUBDIR += $(src)/arbiter/Kbuild ++endif + -+ dev_dbg(kbdev->dev, -+ "Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", -+ (void *)katom, completion_code, job_tail, js); ++ifeq ($(CONFIG_MALI_BIFROST_DEVFREQ),y) ++ ifeq ($(CONFIG_DEVFREQ_THERMAL),y) ++ INCLUDE_SUBDIR += $(src)/ipa/Kbuild ++ endif ++endif + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ifeq ($(KBUILD_EXTMOD),) ++# in-tree ++ -include $(INCLUDE_SUBDIR) ++else ++# out-of-tree ++ include $(INCLUDE_SUBDIR) ++endif +diff --git a/drivers/gpu/arm/bifrost/Kconfig b/drivers/gpu/arm/bifrost/Kconfig +new file mode 100644 +index 000000000..ca3da57cf +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/Kconfig +@@ -0,0 +1,389 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# + -+ /* -+ * When a hard-stop is followed close after a soft-stop, the completion -+ * code may be set to STOPPED, even though the job is terminated -+ */ -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8438)) { -+ if (completion_code == BASE_JD_EVENT_STOPPED && -+ (katom->atom_flags & -+ KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) { -+ completion_code = BASE_JD_EVENT_TERMINATED; -+ } -+ } ++menuconfig MALI_BIFROST ++ tristate "Mali Bifrost series support" ++ select GPU_TRACEPOINTS if ANDROID ++ select DMA_SHARED_BUFFER ++ select FW_LOADER ++ default n ++ help ++ Enable this option to build support for a ARM Mali Bifrost GPU. + -+ if ((katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) && -+ completion_code != BASE_JD_EVENT_DONE && -+ !(completion_code & BASE_JD_SW_EVENT)) { -+ /* When a job chain fails, on a T60x or when -+ * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not -+ * flushed. To prevent future evictions causing possible memory -+ * corruption we need to flush the cache manually before any -+ * affected memory gets reused. -+ */ -+ katom->need_cache_flush_cores_retained = true; -+ } ++ To compile this driver as a module, choose M here: ++ this will generate a single module, called mali_kbase. + -+ katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); ++if MALI_BIFROST + -+ if (completion_code == BASE_JD_EVENT_STOPPED) { -+ struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, -+ 0); ++config MALI_PLATFORM_NAME ++ depends on MALI_BIFROST ++ string "Platform name" ++ default "devicetree" ++ help ++ Enter the name of the desired platform configuration directory to ++ include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must ++ exist. + -+ /* -+ * Dequeue next atom from ringbuffers on same slot if required. -+ * This atom will already have been removed from the NEXT -+ * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that -+ * the atoms on this slot are returned in the correct order. -+ */ -+ if (next_katom && -+ kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) { -+ WARN_ON(next_katom->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_SUBMITTED); -+ kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); -+ kbase_jm_return_atom_to_js(kbdev, next_katom); -+ } -+ } else if (completion_code != BASE_JD_EVENT_DONE) { -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; -+ unsigned int i; ++choice ++ prompt "Mali HW backend" ++ depends on MALI_BIFROST ++ default MALI_REAL_HW + -+ if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) { -+ dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", -+ js, completion_code, -+ kbase_gpu_exception_name( -+ completion_code)); ++config MALI_REAL_HW ++ bool "Enable build of Mali kernel driver for real HW" ++ depends on MALI_BIFROST ++ help ++ This is the default HW backend. + -+ } ++config MALI_BIFROST_NO_MALI ++ bool "Enable build of Mali kernel driver for No Mali" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT ++ help ++ This can be used to test the driver in a simulated environment ++ whereby the hardware is not physically present. If the hardware is physically ++ present it will not be used. This can be used to test the majority of the ++ driver without needing actual hardware or for software benchmarking. ++ All calls to the simulated hardware will complete immediately as if the hardware ++ completed the task. + -+#if KBASE_KTRACE_DUMP_ON_JOB_SLOT_ERROR != 0 -+ KBASE_KTRACE_DUMP(kbdev); -+#endif -+ kbasep_js_clear_submit_allowed(js_devdata, katom->kctx); + -+ /* -+ * Remove all atoms on the same context from ringbuffers. This -+ * will not remove atoms that are already on the GPU, as these -+ * are guaranteed not to have fail dependencies on the failed -+ * atom. -+ */ -+ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) { -+ struct kbase_jd_atom *katom_idx0 = -+ kbase_gpu_inspect(kbdev, i, 0); -+ struct kbase_jd_atom *katom_idx1 = -+ kbase_gpu_inspect(kbdev, i, 1); ++endchoice + -+ if (katom_idx0 && -+ kbase_rb_atom_might_depend(katom, katom_idx0) && -+ katom_idx0->gpu_rb_state != -+ KBASE_ATOM_GPU_RB_SUBMITTED) { -+ /* Dequeue katom_idx0 from ringbuffer */ -+ kbase_gpu_dequeue_atom(kbdev, i, end_timestamp); ++menu "Platform specific options" ++source "drivers/gpu/arm/bifrost/platform/Kconfig" ++endmenu + -+ if (katom_idx1 && kbase_rb_atom_might_depend( -+ katom, katom_idx1) && -+ katom_idx0->gpu_rb_state != -+ KBASE_ATOM_GPU_RB_SUBMITTED) { -+ /* Dequeue katom_idx1 from ringbuffer */ -+ kbase_gpu_dequeue_atom(kbdev, i, -+ end_timestamp); ++config MALI_CSF_SUPPORT ++ bool "Enable Mali CSF based GPU support" ++ default n ++ help ++ Enables support for CSF based GPUs. + -+ katom_idx1->event_code = -+ BASE_JD_EVENT_STOPPED; -+ kbase_jm_return_atom_to_js(kbdev, -+ katom_idx1); -+ } -+ katom_idx0->event_code = BASE_JD_EVENT_STOPPED; -+ kbase_jm_return_atom_to_js(kbdev, katom_idx0); ++config MALI_BIFROST_DEVFREQ ++ bool "Enable devfreq support for Mali" ++ depends on MALI_BIFROST && PM_DEVFREQ ++ select DEVFREQ_GOV_SIMPLE_ONDEMAND ++ default y ++ help ++ Support devfreq for Mali. + -+ } else if (katom_idx1 && kbase_rb_atom_might_depend( -+ katom, katom_idx1) && -+ katom_idx1->gpu_rb_state != -+ KBASE_ATOM_GPU_RB_SUBMITTED) { -+ /* Can not dequeue this atom yet - will be -+ * dequeued when atom at idx0 completes -+ */ -+ katom_idx1->event_code = BASE_JD_EVENT_STOPPED; -+ kbase_gpu_mark_atom_for_return(kbdev, -+ katom_idx1); -+ } -+ } -+ } ++ Using the devfreq framework and, by default, the simple on-demand ++ governor, the frequency of Mali will be dynamically selected from the ++ available OPPs. + -+ KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc, js, completion_code); ++config MALI_BIFROST_DVFS ++ bool "Enable legacy DVFS" ++ depends on MALI_BIFROST && !MALI_BIFROST_DEVFREQ ++ default n ++ help ++ Choose this option to enable legacy DVFS in the Mali Midgard DDK. + -+ if (job_tail != 0 && job_tail != katom->jc) { -+ /* Some of the job has been executed */ -+ dev_dbg(kbdev->dev, -+ "Update job chain address of atom %pK to resume from 0x%llx\n", -+ (void *)katom, job_tail); ++config MALI_BIFROST_GATOR_SUPPORT ++ bool "Enable Streamline tracing support" ++ depends on MALI_BIFROST ++ default y ++ help ++ Enables kbase tracing used by the Arm Streamline Performance Analyzer. ++ The tracepoints are used to derive GPU activity charts in Streamline. + -+ katom->jc = job_tail; -+ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx, -+ katom, job_tail, js); -+ } ++config MALI_BIFROST_ENABLE_TRACE ++ bool "Enable kbase tracing" ++ depends on MALI_BIFROST ++ default y if MALI_BIFROST_DEBUG ++ default n ++ help ++ Enables tracing in kbase. Trace log available through ++ the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled + -+ /* Only update the event code for jobs that weren't cancelled */ -+ if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) -+ katom->event_code = (enum base_jd_event_code)completion_code; ++config MALI_ARBITER_SUPPORT ++ bool "Enable arbiter support for Mali" ++ depends on MALI_BIFROST && !MALI_CSF_SUPPORT ++ default n ++ help ++ Enable support for the arbiter interface in the driver. ++ This allows an external arbiter to manage driver access ++ to GPU hardware in a virtualized environment + -+ /* Complete the job, and start new ones -+ * -+ * Also defer remaining work onto the workqueue: -+ * - Re-queue Soft-stopped jobs -+ * - For any other jobs, queue the job back into the dependency system -+ * - Schedule out the parent context if necessary, and schedule a new -+ * one in. -+ */ -+#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) -+ { -+ /* The atom in the HEAD */ -+ struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, -+ 0); ++ If unsure, say N. + -+ if (next_katom && next_katom->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_SUBMITTED) { -+ char js_string[16]; ++config MALI_DMA_BUF_MAP_ON_DEMAND ++ bool "Enable map imported dma-bufs on demand" ++ depends on MALI_BIFROST ++ default n ++ help ++ This option will cause kbase to set up the GPU mapping of imported ++ dma-buf when needed to run atoms. This is the legacy behavior. + -+ trace_gpu_sched_switch(kbasep_make_job_slot_string(js, -+ js_string, -+ sizeof(js_string)), -+ ktime_to_ns(*end_timestamp), -+ (u32)next_katom->kctx->id, 0, -+ next_katom->work_id); -+ } else { -+ char js_string[16]; ++ This is intended for testing and the option will get removed in the ++ future. + -+ trace_gpu_sched_switch(kbasep_make_job_slot_string(js, js_string, -+ sizeof(js_string)), -+ ktime_to_ns(ktime_get_raw()), 0, 0, 0); -+ } -+ } -+#endif ++config MALI_DMA_BUF_LEGACY_COMPAT ++ bool "Enable legacy compatibility cache flush on dma-buf map" ++ depends on MALI_BIFROST && !MALI_DMA_BUF_MAP_ON_DEMAND ++ default n ++ help ++ This option enables compatibility with legacy dma-buf mapping ++ behavior, then the dma-buf is mapped on import, by adding cache ++ maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping, ++ including a cache flush. + -+ if (kbdev->serialize_jobs & KBASE_SERIALIZE_RESET) -+ kbase_reset_gpu_silent(kbdev); ++ This option might work-around issues related to missing cache ++ flushes in other drivers. This only has an effect for clients using ++ UK 11.18 or older. For later UK versions it is not possible. + -+ if (completion_code == BASE_JD_EVENT_STOPPED) -+ katom = kbase_jm_return_atom_to_js(kbdev, katom); -+ else -+ katom = kbase_jm_complete(kbdev, katom, end_timestamp); ++config MALI_CORESIGHT ++ depends on MALI_BIFROST && MALI_CSF_SUPPORT && !MALI_BIFROST_NO_MALI ++ bool "Enable Kbase CoreSight tracing support" ++ default n + -+ if (katom) { -+ dev_dbg(kbdev->dev, -+ "Cross-slot dependency %pK has become runnable.\n", -+ (void *)katom); ++menuconfig MALI_BIFROST_EXPERT ++ depends on MALI_BIFROST ++ bool "Enable Expert Settings" ++ default n ++ help ++ Enabling this option and modifying the default settings may produce ++ a driver with performance or other limitations. + -+ /* Check if there are lower priority jobs to soft stop */ -+ kbase_job_slot_ctx_priority_check_locked(kctx, katom); ++if MALI_BIFROST_EXPERT + -+ kbase_jm_try_kick(kbdev, 1 << katom->slot_nr); -+ } ++config LARGE_PAGE_ALLOC_OVERRIDE ++ bool "Override default setting of 2MB pages" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT ++ default n ++ help ++ An override config for LARGE_PAGE_ALLOC config. ++ When LARGE_PAGE_ALLOC_OVERRIDE is Y, 2MB page allocation will be ++ enabled by LARGE_PAGE_ALLOC. When this is N, the feature will be ++ enabled when GPU HW satisfies requirements. + -+ /* For partial shader core off L2 cache flush */ -+ kbase_pm_update_state(kbdev); ++ If in doubt, say N + -+ /* Job completion may have unblocked other atoms. Try to update all job -+ * slots -+ */ -+ kbase_backend_slot_update(kbdev); -+} ++config LARGE_PAGE_ALLOC ++ bool "Attempt to allocate 2MB pages" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT ++ default n ++ help ++ Rather than allocating all GPU memory page-by-page, attempt to ++ allocate 2MB pages from the kernel. This reduces TLB pressure and ++ helps to prevent memory fragmentation. + -+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) -+{ -+ unsigned int js; ++ Note this config applies only when LARGE_PAGE_ALLOC_OVERRIDE config ++ is enabled and enabling this on a GPU HW that does not satisfy ++ requirements can cause serious problem. + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ If in doubt, say N + -+ /* Reset should always take the GPU out of protected mode */ -+ WARN_ON(kbase_gpu_in_protected_mode(kbdev)); ++config MALI_MEMORY_FULLY_BACKED ++ bool "Enable memory fully physically-backed" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT ++ default n ++ help ++ This option enables full physical backing of all virtual ++ memory allocations in the kernel. Notice that this build ++ option only affects allocations of grow-on-GPU-page-fault ++ memory. + -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ int atom_idx = 0; -+ int idx; ++config MALI_CORESTACK ++ bool "Enable support of GPU core stack power control" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT ++ default n ++ help ++ Enabling this feature on supported GPUs will let the driver powering ++ on/off the GPU core stack independently without involving the Power ++ Domain Controller. This should only be enabled on platforms which ++ integration of the PDC to the Mali GPU is known to be problematic. ++ This feature is currently only supported on t-Six and t-HEx GPUs. + -+ for (idx = 0; idx < SLOT_RB_SIZE; idx++) { -+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, -+ js, atom_idx); -+ bool keep_in_jm_rb = false; ++ If unsure, say N. + -+ if (!katom) -+ break; -+ if (katom->protected_state.exit == -+ KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) { -+ /* protected mode sanity checks */ -+ WARN(kbase_jd_katom_is_protected(katom) != -+ kbase_gpu_in_protected_mode(kbdev), -+ "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", -+ kbase_jd_katom_is_protected(katom), -+ kbase_gpu_in_protected_mode(kbdev)); -+ WARN(!(kbase_jd_katom_is_protected(katom) && js == 0) && -+ kbase_jd_katom_is_protected(katom), -+ "Protected atom on JS%u not supported", js); -+ } -+ if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) && -+ !kbase_ctx_flag(katom->kctx, KCTX_DYING)) -+ keep_in_jm_rb = true; ++comment "Platform options" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT + -+ kbase_gpu_release_atom(kbdev, katom, NULL); ++config MALI_BIFROST_ERROR_INJECT ++ bool "Enable No Mali error injection" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT && MALI_BIFROST_NO_MALI ++ default n ++ help ++ Enables insertion of errors to test module failure and recovery mechanisms. + -+ /* -+ * If the atom wasn't on HW when the reset was issued -+ * then leave it in the RB and next time we're kicked -+ * it will be processed again from the starting state. -+ */ -+ if (keep_in_jm_rb) { -+ katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; -+ /* As the atom was not removed, increment the -+ * index so that we read the correct atom in the -+ * next iteration. -+ */ -+ atom_idx++; -+ continue; -+ } ++comment "Debug options" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT + -+ /* -+ * The atom was on the HW when the reset was issued -+ * all we can do is fail the atom. -+ */ -+ kbase_gpu_dequeue_atom(kbdev, js, NULL); -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; -+ kbase_jm_complete(kbdev, katom, end_timestamp); -+ } ++config MALI_BIFROST_DEBUG ++ bool "Enable debug build" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT ++ default n ++ help ++ Select this option for increased checking and reporting of errors. + -+ /* Clear the slot's last katom submission kctx on reset */ -+ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_NULL_TAG_VAL; -+ } ++config MALI_BIFROST_FENCE_DEBUG ++ bool "Enable debug sync fence usage" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT && SYNC_FILE ++ default y if MALI_BIFROST_DEBUG ++ help ++ Select this option to enable additional checking and reporting on the ++ use of sync fences in the Mali driver. + -+ /* Re-enable GPU hardware counters if we're resetting from protected -+ * mode. -+ */ -+ kbdev->protected_mode_hwcnt_desired = true; -+ if (kbdev->protected_mode_hwcnt_disabled) { -+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); -+ kbdev->protected_mode_hwcnt_disabled = false; ++ This will add a 3s timeout to all sync fence waits in the Mali ++ driver, so that when work for Mali has been waiting on a sync fence ++ for a long time a debug message will be printed, detailing what fence ++ is causing the block, and which dependent Mali atoms are blocked as a ++ result of this. + -+ KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev, kbdev); -+ } ++ The timeout can be changed at runtime through the js_soft_timeout ++ device attribute, where the timeout is specified in milliseconds. + -+ kbdev->protected_mode_transition = false; -+ kbase_pm_protected_override_disable(kbdev); -+} ++config MALI_BIFROST_SYSTEM_TRACE ++ bool "Enable system event tracing support" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT ++ default y if MALI_BIFROST_DEBUG ++ default n ++ help ++ Choose this option to enable system trace events for each ++ kbase event. This is typically used for debugging but has ++ minimal overhead when not in use. Enable only if you know what ++ you are doing. + -+/** -+ * should_stop_next_atom - given a soft/hard stop action, determine if the next -+ * atom on a slot should be stopped -+ * @kbdev: kbase devices -+ * @head_katom: atom currently in the JSn_HEAD -+ * @next_katom: atom currently in the JSn_HEAD_NEXT -+ * @action: JS_COMMAND_<...> action for soft/hard-stop -+ * -+ * This is used in cases where @head_katom is the target of the soft/hard-stop. -+ * It only makes sense to call this when @head_katom and @next_katom are from -+ * the same slot. -+ * -+ * Return: true if @next_katom should also be stopped with the given action, -+ * false otherwise -+ */ -+static bool should_stop_next_atom(struct kbase_device *kbdev, -+ const struct kbase_jd_atom *head_katom, -+ const struct kbase_jd_atom *next_katom, -+ u32 action) -+{ -+ bool ret = false; -+ u32 hw_action = action & JS_COMMAND_MASK; ++comment "Instrumentation options" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT + -+ switch (hw_action) { -+ case JS_COMMAND_SOFT_STOP: -+ ret = kbase_js_atom_runs_before(kbdev, head_katom, next_katom, -+ 0u); -+ break; -+ case JS_COMMAND_HARD_STOP: -+ /* Unlike soft-stop, a hard-stop targeting a particular atom -+ * should not cause atoms from unrelated contexts to be -+ * removed -+ */ -+ ret = (head_katom->kctx == next_katom->kctx); -+ break; -+ default: -+ /* Other stop actions are possible, but the driver should not -+ * be generating them at this point in the call chain -+ */ -+ WARN(1, "Unexpected stop action: 0x%.8x", hw_action); -+ break; -+ } -+ return ret; -+} ++choice ++ prompt "Select Performance counters set" ++ default MALI_PRFCNT_SET_PRIMARY ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT + -+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, unsigned int js, -+ struct kbase_jd_atom *katom, u32 action) -+{ -+ struct kbase_context *kctx = katom->kctx; -+ u32 hw_action = action & JS_COMMAND_MASK; ++config MALI_PRFCNT_SET_PRIMARY ++ bool "Primary" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT ++ help ++ Select this option to use primary set of performance counters. + -+ kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom); -+ kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action, -+ katom->core_req, katom); -+ kbase_jsctx_slot_prio_blocked_set(kctx, js, katom->sched_priority); -+} ++config MALI_BIFROST_PRFCNT_SET_SECONDARY ++ bool "Secondary" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT ++ help ++ Select this option to use secondary set of performance counters. Kernel ++ features that depend on an access to the primary set of counters may ++ become unavailable. Enabling this option will prevent power management ++ from working optimally and may cause instrumentation tools to return ++ bogus results. + -+static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom, -+ u32 action, -+ bool disjoint) -+{ -+ struct kbase_context *kctx = katom->kctx; ++ If unsure, use MALI_PRFCNT_SET_PRIMARY. + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++config MALI_PRFCNT_SET_TERTIARY ++ bool "Tertiary" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT ++ help ++ Select this option to use tertiary set of performance counters. Kernel ++ features that depend on an access to the primary set of counters may ++ become unavailable. Enabling this option will prevent power management ++ from working optimally and may cause instrumentation tools to return ++ bogus results. + -+ katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; -+ kbase_gpu_mark_atom_for_return(kbdev, katom); -+ kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr, -+ katom->sched_priority); ++ If unsure, use MALI_PRFCNT_SET_PRIMARY. + -+ if (disjoint) -+ kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, -+ katom); -+} ++endchoice + -+static int should_stop_x_dep_slot(struct kbase_jd_atom *katom) -+{ -+ if (katom->x_post_dep) { -+ struct kbase_jd_atom *dep_atom = katom->x_post_dep; ++config MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS ++ bool "Enable runtime selection of performance counters set via debugfs" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT && DEBUG_FS ++ default n ++ help ++ Select this option to make the secondary set of performance counters ++ available at runtime via debugfs. Kernel features that depend on an ++ access to the primary set of counters may become unavailable. + -+ if (dep_atom->gpu_rb_state != -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB && -+ dep_atom->gpu_rb_state != -+ KBASE_ATOM_GPU_RB_RETURN_TO_JS) -+ return dep_atom->slot_nr; -+ } -+ return -1; -+} ++ If no runtime debugfs option is set, the build time counter set ++ choice will be used. + -+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx, -+ unsigned int js, struct kbase_jd_atom *katom, u32 action) -+{ -+ struct kbase_jd_atom *katom_idx0; -+ struct kbase_context *kctx_idx0 = NULL; -+ struct kbase_jd_atom *katom_idx1; -+ struct kbase_context *kctx_idx1 = NULL; ++ This feature is unsupported and unstable, and may break at any time. ++ Enabling this option will prevent power management from working ++ optimally and may cause instrumentation tools to return bogus results. + -+ bool katom_idx0_valid, katom_idx1_valid; ++ No validation is done on the debugfs input. Invalid input could cause ++ performance counter errors. Valid inputs are the values accepted by ++ the SET_SELECT bits of the PRFCNT_CONFIG register as defined in the ++ architecture specification. + -+ bool ret = false; ++ If unsure, say N. + -+ int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1; -+ int prio_idx0 = 0, prio_idx1 = 0; ++config MALI_JOB_DUMP ++ bool "Enable system level support needed for job dumping" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT ++ default n ++ help ++ Choose this option to enable system level support needed for ++ job dumping. This is typically used for instrumentation but has ++ minimal overhead when not in use. Enable only if you know what ++ you are doing. + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++comment "Workarounds" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT + -+ katom_idx0 = kbase_gpu_inspect(kbdev, js, 0); -+ katom_idx1 = kbase_gpu_inspect(kbdev, js, 1); ++config MALI_PWRSOFT_765 ++ bool "Enable workaround for PWRSOFT-765" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT ++ default n ++ help ++ PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged ++ in kernel v4.10, however if backported into the kernel then this ++ option must be manually selected. + -+ if (katom_idx0) { -+ kctx_idx0 = katom_idx0->kctx; -+ prio_idx0 = katom_idx0->sched_priority; -+ } -+ if (katom_idx1) { -+ kctx_idx1 = katom_idx1->kctx; -+ prio_idx1 = katom_idx1->sched_priority; -+ } ++ If using kernel >= v4.10 then say N, otherwise if devfreq cooling ++ changes have been backported say Y to avoid compilation errors. + -+ if (katom) { -+ katom_idx0_valid = (katom_idx0 == katom); -+ if (katom_idx1) -+ katom_idx1_valid = (katom_idx1 == katom); -+ else -+ katom_idx1_valid = false; -+ } else { -+ katom_idx0_valid = (katom_idx0 && (!kctx || kctx_idx0 == kctx)); -+ katom_idx1_valid = (katom_idx1 && (!kctx || kctx_idx1 == kctx)); -+ } -+ /* If there's an atom in JSn_HEAD_NEXT that we haven't already decided -+ * to stop, but we're stopping the JSn_HEAD atom, see if they are -+ * related/ordered in some way that would require the same stop action -+ */ -+ if (!katom_idx1_valid && katom_idx0_valid && katom_idx1) -+ katom_idx1_valid = should_stop_next_atom(kbdev, katom_idx0, -+ katom_idx1, action); ++config MALI_HW_ERRATA_1485982_NOT_AFFECTED ++ bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT ++ default n ++ help ++ This option disables the default workaround for GPU2017-1336. The ++ workaround keeps the L2 cache powered up except for powerdown and reset. + -+ if (katom_idx0_valid) -+ stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0); -+ if (katom_idx1_valid) -+ stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1); ++ The workaround introduces a limitation that will prevent the running of ++ protected mode content on fully coherent platforms, as the switch to IO ++ coherency mode requires the L2 to be turned off. + -+ if (katom_idx0_valid) { -+ if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { -+ /* Simple case - just dequeue and return */ -+ kbase_gpu_dequeue_atom(kbdev, js, NULL); -+ if (katom_idx1_valid) { -+ kbase_gpu_dequeue_atom(kbdev, js, NULL); -+ katom_idx1->event_code = -+ BASE_JD_EVENT_REMOVED_FROM_NEXT; -+ kbase_jm_return_atom_to_js(kbdev, katom_idx1); -+ kbase_jsctx_slot_prio_blocked_set(kctx_idx1, js, -+ prio_idx1); -+ } ++config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE ++ bool "Use alternative workaround for BASE_HW_ISSUE_GPU2017_1336" ++ depends on MALI_BIFROST && MALI_BIFROST_EXPERT && !MALI_HW_ERRATA_1485982_NOT_AFFECTED ++ default n ++ help ++ This option uses an alternative workaround for GPU2017-1336. Lowering ++ the GPU clock to a, platform specific, known good frequency before ++ powering down the L2 cache. The clock can be specified in the device ++ tree using the property, opp-mali-errata-1485982. Otherwise the ++ slowest clock will be selected. + -+ katom_idx0->event_code = -+ BASE_JD_EVENT_REMOVED_FROM_NEXT; -+ kbase_jm_return_atom_to_js(kbdev, katom_idx0); -+ kbase_jsctx_slot_prio_blocked_set(kctx_idx0, js, -+ prio_idx0); -+ } else { -+ /* katom_idx0 is on GPU */ -+ if (katom_idx1_valid && katom_idx1->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_SUBMITTED) { -+ /* katom_idx0 and katom_idx1 are on GPU */ ++endif + -+ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, -+ JS_COMMAND_NEXT)) == 0) { -+ /* idx0 has already completed - stop -+ * idx1 if needed -+ */ -+ if (katom_idx1_valid) { -+ kbase_gpu_stop_atom(kbdev, js, -+ katom_idx1, -+ action); -+ ret = true; -+ } -+ } else { -+ /* idx1 is in NEXT registers - attempt -+ * to remove -+ */ -+ kbase_reg_write(kbdev, -+ JOB_SLOT_REG(js, -+ JS_COMMAND_NEXT), -+ JS_COMMAND_NOP); ++config MALI_ARBITRATION ++ tristate "Enable Virtualization reference code" ++ depends on MALI_BIFROST ++ default n ++ help ++ Enables the build of several reference modules used in the reference ++ virtualization setup for Mali ++ If unsure, say N. + -+ if (kbase_reg_read(kbdev, -+ JOB_SLOT_REG(js, -+ JS_HEAD_NEXT_LO)) -+ != 0 || -+ kbase_reg_read(kbdev, -+ JOB_SLOT_REG(js, -+ JS_HEAD_NEXT_HI)) -+ != 0) { -+ /* idx1 removed successfully, -+ * will be handled in IRQ -+ */ -+ kbase_gpu_remove_atom(kbdev, -+ katom_idx1, -+ action, true); -+ /* Revert the last_context. */ -+ kbdev->hwaccess.backend.slot_rb[js] -+ .last_kctx_tagged = -+ SLOT_RB_TAG_KCTX(katom_idx0->kctx); + -+ stop_x_dep_idx1 = -+ should_stop_x_dep_slot(katom_idx1); ++# source "drivers/gpu/arm/bifrost/tests/Kconfig" + -+ /* stop idx0 if still on GPU */ -+ kbase_gpu_stop_atom(kbdev, js, -+ katom_idx0, -+ action); -+ ret = true; -+ } else if (katom_idx1_valid) { -+ /* idx0 has already completed, -+ * stop idx1 if needed -+ */ -+ kbase_gpu_stop_atom(kbdev, js, -+ katom_idx1, -+ action); -+ ret = true; -+ } -+ } -+ } else if (katom_idx1_valid) { -+ /* idx1 not on GPU but must be dequeued*/ ++endif +diff --git a/drivers/gpu/arm/bifrost/Makefile b/drivers/gpu/arm/bifrost/Makefile +new file mode 100644 +index 000000000..39df298ff +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/Makefile +@@ -0,0 +1,279 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# + -+ /* idx1 will be handled in IRQ */ -+ kbase_gpu_remove_atom(kbdev, katom_idx1, action, -+ false); -+ /* stop idx0 */ -+ /* This will be repeated for anything removed -+ * from the next registers, since their normal -+ * flow was also interrupted, and this function -+ * might not enter disjoint state e.g. if we -+ * don't actually do a hard stop on the head -+ * atom -+ */ -+ kbase_gpu_stop_atom(kbdev, js, katom_idx0, -+ action); -+ ret = true; -+ } else { -+ /* no atom in idx1 */ -+ /* just stop idx0 */ -+ kbase_gpu_stop_atom(kbdev, js, katom_idx0, -+ action); -+ ret = true; -+ } -+ } -+ } else if (katom_idx1_valid) { -+ if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { -+ /* Mark for return */ -+ /* idx1 will be returned once idx0 completes */ -+ kbase_gpu_remove_atom(kbdev, katom_idx1, action, -+ false); -+ } else { -+ /* idx1 is on GPU */ -+ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, -+ JS_COMMAND_NEXT)) == 0) { -+ /* idx0 has already completed - stop idx1 */ -+ kbase_gpu_stop_atom(kbdev, js, katom_idx1, -+ action); -+ ret = true; -+ } else { -+ /* idx1 is in NEXT registers - attempt to -+ * remove -+ */ -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, -+ JS_COMMAND_NEXT), -+ JS_COMMAND_NOP); ++KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build ++KDIR ?= $(KERNEL_SRC) + -+ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, -+ JS_HEAD_NEXT_LO)) != 0 || -+ kbase_reg_read(kbdev, JOB_SLOT_REG(js, -+ JS_HEAD_NEXT_HI)) != 0) { -+ /* idx1 removed successfully, will be -+ * handled in IRQ once idx0 completes -+ */ -+ kbase_gpu_remove_atom(kbdev, katom_idx1, -+ action, -+ false); -+ /* Revert the last_context, or mark as purged */ -+ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = -+ kctx_idx0 ? SLOT_RB_TAG_KCTX(katom_idx0->kctx) : -+ SLOT_RB_TAG_PURGED; -+ } else { -+ /* idx0 has already completed - stop -+ * idx1 -+ */ -+ kbase_gpu_stop_atom(kbdev, js, -+ katom_idx1, -+ action); -+ ret = true; -+ } -+ } -+ } -+ } ++ifeq ($(KDIR),) ++ $(error Must specify KDIR to point to the kernel to target)) ++endif + ++# ++# Default configuration values ++# ++# Dependency resolution is done through statements as Kconfig ++# is not supported for out-of-tree builds. ++# + -+ if (stop_x_dep_idx0 != -1) -+ kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0, -+ NULL, action); ++CONFIG_MALI_BIFROST ?= m ++ifeq ($(CONFIG_MALI_BIFROST),m) ++ CONFIG_MALI_PLATFORM_NAME ?= "devicetree" ++ CONFIG_MALI_BIFROST_GATOR_SUPPORT ?= y ++ CONFIG_MALI_ARBITRATION ?= n ++ CONFIG_MALI_PARTITION_MANAGER ?= n + -+ if (stop_x_dep_idx1 != -1) -+ kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1, -+ NULL, action); ++ ifneq ($(CONFIG_MALI_BIFROST_NO_MALI),y) ++ # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI ++ CONFIG_MALI_REAL_HW ?= y ++ CONFIG_MALI_CORESIGHT = n ++ endif + -+ return ret; -+} ++ ifeq ($(CONFIG_MALI_BIFROST_DVFS),y) ++ # Prevent misuse when CONFIG_MALI_BIFROST_DVFS=y ++ CONFIG_MALI_BIFROST_DEVFREQ ?= n ++ else ++ CONFIG_MALI_BIFROST_DEVFREQ ?= y ++ endif + -+void kbase_backend_cache_clean(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom) -+{ -+ if (katom->need_cache_flush_cores_retained) { -+ kbase_gpu_start_cache_clean(kbdev, -+ GPU_COMMAND_CACHE_CLN_INV_FULL); -+ kbase_gpu_wait_cache_clean(kbdev); ++ ifeq ($(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND), y) ++ # Prevent misuse when CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y ++ CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n ++ endif + -+ katom->need_cache_flush_cores_retained = false; -+ } -+} ++ ifeq ($(CONFIG_MALI_CSF_SUPPORT), y) ++ CONFIG_MALI_CORESIGHT ?= n ++ endif + -+void kbase_backend_complete_wq(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom) -+{ -+ /* -+ * If cache flush required due to HW workaround then perform the flush -+ * now -+ */ -+ kbase_backend_cache_clean(kbdev, katom); -+} ++ # ++ # Expert/Debug/Test released configurations ++ # ++ ifeq ($(CONFIG_MALI_BIFROST_EXPERT), y) ++ ifeq ($(CONFIG_MALI_BIFROST_NO_MALI), y) ++ CONFIG_MALI_REAL_HW = n + -+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, -+ base_jd_core_req core_req) -+{ -+ if (!kbdev->pm.active_count) { -+ kbase_pm_lock(kbdev); -+ kbase_pm_update_active(kbdev); -+ kbase_pm_unlock(kbdev); -+ } -+} ++ else ++ # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI=n ++ CONFIG_MALI_REAL_HW = y ++ CONFIG_MALI_BIFROST_ERROR_INJECT = n ++ endif + -+void kbase_gpu_dump_slots(struct kbase_device *kbdev) -+{ -+ unsigned long flags; -+ unsigned int js; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y) ++ # Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y ++ CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n ++ endif + -+ dev_info(kbdev->dev, "%s:\n", __func__); ++ ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y) ++ CONFIG_MALI_BIFROST_ENABLE_TRACE ?= y ++ CONFIG_MALI_BIFROST_SYSTEM_TRACE ?= y + -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ int idx; ++ ifeq ($(CONFIG_SYNC_FILE), y) ++ CONFIG_MALI_BIFROST_FENCE_DEBUG ?= y ++ else ++ CONFIG_MALI_BIFROST_FENCE_DEBUG = n ++ endif ++ else ++ # Prevent misuse when CONFIG_MALI_BIFROST_DEBUG=n ++ CONFIG_MALI_BIFROST_ENABLE_TRACE = n ++ CONFIG_MALI_BIFROST_SYSTEM_TRACE = n ++ CONFIG_MALI_BIFROST_FENCE_DEBUG = n ++ endif ++ else ++ # Prevent misuse when CONFIG_MALI_BIFROST_EXPERT=n ++ CONFIG_MALI_CORESTACK = n ++ CONFIG_LARGE_PAGE_ALLOC_OVERRIDE = n ++ CONFIG_LARGE_PAGE_ALLOC = n ++ CONFIG_MALI_PWRSOFT_765 = n ++ CONFIG_MALI_MEMORY_FULLY_BACKED = n ++ CONFIG_MALI_JOB_DUMP = n ++ CONFIG_MALI_BIFROST_NO_MALI = n ++ CONFIG_MALI_REAL_HW = y ++ CONFIG_MALI_BIFROST_ERROR_INJECT = n ++ CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n ++ CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n ++ CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n ++ CONFIG_MALI_BIFROST_DEBUG = n ++ CONFIG_MALI_BIFROST_ENABLE_TRACE = n ++ CONFIG_MALI_BIFROST_SYSTEM_TRACE = n ++ CONFIG_MALI_BIFROST_FENCE_DEBUG = n ++ endif + -+ for (idx = 0; idx < SLOT_RB_SIZE; idx++) { -+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, -+ js, -+ idx); ++ ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y) ++ CONFIG_MALI_KUTF ?= y ++ ifeq ($(CONFIG_MALI_KUTF), y) ++ CONFIG_MALI_KUTF_IRQ_TEST ?= y ++ CONFIG_MALI_KUTF_CLK_RATE_TRACE ?= y ++ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST ?= y ++ else ++ # Prevent misuse when CONFIG_MALI_KUTF=n ++ CONFIG_MALI_KUTF_IRQ_TEST = n ++ CONFIG_MALI_KUTF_CLK_RATE_TRACE = n ++ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n ++ endif ++ else ++ # Prevent misuse when CONFIG_MALI_BIFROST_DEBUG=n ++ CONFIG_MALI_KUTF = n ++ CONFIG_MALI_KUTF_IRQ_TEST = n ++ CONFIG_MALI_KUTF_CLK_RATE_TRACE = n ++ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n ++ endif ++else ++ # Prevent misuse when CONFIG_MALI_BIFROST=n ++ CONFIG_MALI_ARBITRATION = n ++ CONFIG_MALI_KUTF = n ++ CONFIG_MALI_KUTF_IRQ_TEST = n ++ CONFIG_MALI_KUTF_CLK_RATE_TRACE = n ++ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n ++endif + -+ if (katom) -+ dev_info(kbdev->dev, " js%u idx%d : katom=%pK gpu_rb_state=%d\n", -+ js, idx, katom, katom->gpu_rb_state); -+ else -+ dev_info(kbdev->dev, " js%u idx%d : empty\n", js, idx); -+ } -+ } ++# All Mali CONFIG should be listed here ++CONFIGS := \ ++ CONFIG_MALI_BIFROST \ ++ CONFIG_MALI_CSF_SUPPORT \ ++ CONFIG_MALI_BIFROST_GATOR_SUPPORT \ ++ CONFIG_MALI_ARBITER_SUPPORT \ ++ CONFIG_MALI_ARBITRATION \ ++ CONFIG_MALI_PARTITION_MANAGER \ ++ CONFIG_MALI_REAL_HW \ ++ CONFIG_MALI_BIFROST_DEVFREQ \ ++ CONFIG_MALI_BIFROST_DVFS \ ++ CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \ ++ CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \ ++ CONFIG_MALI_BIFROST_EXPERT \ ++ CONFIG_MALI_CORESTACK \ ++ CONFIG_LARGE_PAGE_ALLOC_OVERRIDE \ ++ CONFIG_LARGE_PAGE_ALLOC \ ++ CONFIG_MALI_PWRSOFT_765 \ ++ CONFIG_MALI_MEMORY_FULLY_BACKED \ ++ CONFIG_MALI_JOB_DUMP \ ++ CONFIG_MALI_BIFROST_NO_MALI \ ++ CONFIG_MALI_BIFROST_ERROR_INJECT \ ++ CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \ ++ CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \ ++ CONFIG_MALI_PRFCNT_SET_PRIMARY \ ++ CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY \ ++ CONFIG_MALI_PRFCNT_SET_TERTIARY \ ++ CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS \ ++ CONFIG_MALI_BIFROST_DEBUG \ ++ CONFIG_MALI_BIFROST_ENABLE_TRACE \ ++ CONFIG_MALI_BIFROST_SYSTEM_TRACE \ ++ CONFIG_MALI_BIFROST_FENCE_DEBUG \ ++ CONFIG_MALI_KUTF \ ++ CONFIG_MALI_KUTF_IRQ_TEST \ ++ CONFIG_MALI_KUTF_CLK_RATE_TRACE \ ++ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \ ++ CONFIG_MALI_XEN \ ++ CONFIG_MALI_CORESIGHT + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} + -+void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx) -+{ -+ unsigned int js; -+ bool tracked = false; ++THIS_DIR := $(dir $(lastword $(MAKEFILE_LIST))) ++-include $(THIS_DIR)/../arbitration/Makefile + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++# MAKE_ARGS to pass the custom CONFIGs on out-of-tree build ++# ++# Generate the list of CONFIGs and values. ++# $(value config) is the name of the CONFIG option. ++# $(value $(value config)) is its value (y, m). ++# When the CONFIG is not set to y or m, it defaults to n. ++MAKE_ARGS := $(foreach config,$(CONFIGS), \ ++ $(if $(filter y m,$(value $(value config))), \ ++ $(value config)=$(value $(value config)), \ ++ $(value config)=n)) + -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ u64 tagged_kctx = kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged; ++MAKE_ARGS += CONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME) + -+ if (tagged_kctx == SLOT_RB_TAG_KCTX(kctx)) { -+ /* Marking the slot kctx tracking field is purged */ -+ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_TAG_PURGED; -+ tracked = true; -+ } -+ } ++# ++# EXTRA_CFLAGS to define the custom CONFIGs on out-of-tree build ++# ++# Generate the list of CONFIGs defines with values from CONFIGS. ++# $(value config) is the name of the CONFIG option. ++# When set to y or m, the CONFIG gets defined to 1. ++EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \ ++ $(if $(filter y m,$(value $(value config))), \ ++ -D$(value config)=1)) + -+ if (tracked) { -+ /* The context had run some jobs before the purge, other slots -+ * in SLOT_RB_NULL_TAG_VAL condition needs to be marked as -+ * purged as well. -+ */ -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ if (kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged == -+ SLOT_RB_NULL_TAG_VAL) -+ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = -+ SLOT_RB_TAG_PURGED; -+ } -+ } -+} -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h ++EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME) ++ ++# ++# KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions ++# ++ ++KBUILD_CFLAGS += -Wall -Werror ++ ++# The following were added to align with W=1 in scripts/Makefile.extrawarn ++# from the Linux source tree (v5.18.14) ++KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter ++KBUILD_CFLAGS += -Wmissing-declarations ++KBUILD_CFLAGS += -Wmissing-format-attribute ++KBUILD_CFLAGS += -Wmissing-prototypes ++KBUILD_CFLAGS += -Wold-style-definition ++# The -Wmissing-include-dirs cannot be enabled as the path to some of the ++# included directories change depending on whether it is an in-tree or ++# out-of-tree build. ++KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable) ++KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable) ++KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned) ++KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation) ++# The following turn off the warnings enabled by -Wextra ++KBUILD_CFLAGS += -Wno-sign-compare ++KBUILD_CFLAGS += -Wno-shift-negative-value ++# This flag is needed to avoid build errors on older kernels ++KBUILD_CFLAGS += $(call cc-option, -Wno-cast-function-type) ++ ++KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 ++ ++# The following were added to align with W=2 in scripts/Makefile.extrawarn ++# from the Linux source tree (v5.18.14) ++KBUILD_CFLAGS += -Wdisabled-optimization ++# The -Wshadow flag cannot be enabled unless upstream kernels are ++# patched to fix redefinitions of certain built-in functions and ++# global variables. ++KBUILD_CFLAGS += $(call cc-option, -Wlogical-op) ++KBUILD_CFLAGS += -Wmissing-field-initializers ++# -Wtype-limits must be disabled due to build failures on kernel 5.x ++KBUILD_CFLAGS += -Wno-type-limit ++KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized) ++KBUILD_CFLAGS += $(call cc-option, -Wunused-macros) ++ ++KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2 ++ ++# This warning is disabled to avoid build failures in some kernel versions ++KBUILD_CFLAGS += -Wno-ignored-qualifiers ++ ++ifeq ($(CONFIG_GCOV_KERNEL),y) ++ KBUILD_CFLAGS += $(call cc-option, -ftest-coverage) ++ KBUILD_CFLAGS += $(call cc-option, -fprofile-arcs) ++ EXTRA_CFLAGS += -DGCOV_PROFILE=1 ++endif ++ ++ifeq ($(CONFIG_MALI_KCOV),y) ++ KBUILD_CFLAGS += $(call cc-option, -fsanitize-coverage=trace-cmp) ++ EXTRA_CFLAGS += -DKCOV=1 ++ EXTRA_CFLAGS += -DKCOV_ENABLE_COMPARISONS=1 ++endif ++ ++all: ++ $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules ++ ++modules_install: ++ $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) modules_install ++ ++clean: ++ $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) clean +diff --git a/drivers/gpu/arm/bifrost/arbiter/Kbuild b/drivers/gpu/arm/bifrost/arbiter/Kbuild +new file mode 100755 +index 000000000..2e6b11144 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/arbiter/Kbuild +@@ -0,0 +1,23 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# ++ ++bifrost_kbase-y += \ ++ arbiter/mali_kbase_arbif.o \ ++ arbiter/mali_kbase_arbiter_pm.o +diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c new file mode 100644 -index 000000000..32be0bf44 +index 000000000..b5d3cd685 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h -@@ -0,0 +1,77 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c +@@ -0,0 +1,357 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2014-2018, 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -123921,455 +124089,352 @@ index 000000000..32be0bf44 + * + */ + -+/* -+ * Register-based HW access backend specific APIs ++/** ++ * DOC: Mali arbiter interface APIs to share GPU between Virtual Machines + */ + -+#ifndef _KBASE_HWACCESS_GPU_H_ -+#define _KBASE_HWACCESS_GPU_H_ ++#include ++#include "mali_kbase_arbif.h" ++#include ++#include ++#include ++#include "linux/mali_arbiter_interface.h" + -+#include ++/* Arbiter interface version against which was implemented this module */ ++#define MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION 5 ++#if MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION != \ ++ MALI_ARBITER_INTERFACE_VERSION ++#error "Unsupported Mali Arbiter interface version." ++#endif ++ ++static void on_max_config(struct device *dev, uint32_t max_l2_slices, ++ uint32_t max_core_mask) ++{ ++ struct kbase_device *kbdev; ++ ++ if (!dev) { ++ pr_err("%s(): dev is NULL", __func__); ++ return; ++ } ++ ++ kbdev = dev_get_drvdata(dev); ++ if (!kbdev) { ++ dev_err(dev, "%s(): kbdev is NULL", __func__); ++ return; ++ } ++ ++ if (!max_l2_slices || !max_core_mask) { ++ dev_dbg(dev, ++ "%s(): max_config ignored as one of the fields is zero", ++ __func__); ++ return; ++ } ++ ++ /* set the max config info in the kbase device */ ++ kbase_arbiter_set_max_config(kbdev, max_l2_slices, max_core_mask); ++} + +/** -+ * kbase_gpu_irq_evict - Evict an atom from a NEXT slot -+ * -+ * @kbdev: Device pointer -+ * @js: Job slot to evict from -+ * @completion_code: Event code from job that was run. -+ * -+ * Evict the atom in the NEXT slot for the specified job slot. This function is -+ * called from the job complete IRQ handler when the previous job has failed. ++ * on_update_freq() - Updates GPU clock frequency ++ * @dev: arbiter interface device handle ++ * @freq: GPU clock frequency value reported from arbiter + * -+ * Return: true if job evicted from NEXT registers, false otherwise ++ * call back function to update GPU clock frequency with ++ * new value from arbiter + */ -+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code); ++static void on_update_freq(struct device *dev, uint32_t freq) ++{ ++ struct kbase_device *kbdev; ++ ++ if (!dev) { ++ pr_err("%s(): dev is NULL", __func__); ++ return; ++ } ++ ++ kbdev = dev_get_drvdata(dev); ++ if (!kbdev) { ++ dev_err(dev, "%s(): kbdev is NULL", __func__); ++ return; ++ } ++ ++ kbase_arbiter_pm_update_gpu_freq(&kbdev->arb.arb_freq, freq); ++} + +/** -+ * kbase_gpu_complete_hw - Complete an atom on job slot js ++ * on_gpu_stop() - sends KBASE_VM_GPU_STOP_EVT event on VM stop ++ * @dev: arbiter interface device handle + * -+ * @kbdev: Device pointer -+ * @js: Job slot that has completed -+ * @completion_code: Event code from job that has completed -+ * @job_tail: The tail address from the hardware if the job has partially -+ * completed -+ * @end_timestamp: Time of completion ++ * call back function to signal a GPU STOP event from arbiter interface + */ -+void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code, -+ u64 job_tail, ktime_t *end_timestamp); ++static void on_gpu_stop(struct device *dev) ++{ ++ struct kbase_device *kbdev; ++ ++ if (!dev) { ++ pr_err("%s(): dev is NULL", __func__); ++ return; ++ } ++ ++ kbdev = dev_get_drvdata(dev); ++ if (!kbdev) { ++ dev_err(dev, "%s(): kbdev is NULL", __func__); ++ return; ++ } ++ ++ KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED(kbdev, kbdev); ++ kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_STOP_EVT); ++} + +/** -+ * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer ++ * on_gpu_granted() - sends KBASE_VM_GPU_GRANTED_EVT event on GPU granted ++ * @dev: arbiter interface device handle + * -+ * @kbdev: Device pointer -+ * @js: Job slot to inspect -+ * @idx: Index into ringbuffer. 0 is the job currently running on -+ * the slot, 1 is the job waiting, all other values are invalid. -+ * Return: The atom at that position in the ringbuffer -+ * or NULL if no atom present ++ * call back function to signal a GPU GRANT event from arbiter interface + */ -+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx); ++static void on_gpu_granted(struct device *dev) ++{ ++ struct kbase_device *kbdev; ++ ++ if (!dev) { ++ pr_err("%s(): dev is NULL", __func__); ++ return; ++ } ++ ++ kbdev = dev_get_drvdata(dev); ++ if (!kbdev) { ++ dev_err(dev, "%s(): kbdev is NULL", __func__); ++ return; ++ } ++ ++ KBASE_TLSTREAM_TL_ARBITER_GRANTED(kbdev, kbdev); ++ kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_GRANTED_EVT); ++} + +/** -+ * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers ++ * on_gpu_lost() - sends KBASE_VM_GPU_LOST_EVT event on GPU granted ++ * @dev: arbiter interface device handle + * -+ * @kbdev: Device pointer ++ * call back function to signal a GPU LOST event from arbiter interface + */ -+void kbase_gpu_dump_slots(struct kbase_device *kbdev); ++static void on_gpu_lost(struct device *dev) ++{ ++ struct kbase_device *kbdev; + -+#endif /* _KBASE_HWACCESS_GPU_H_ */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c -new file mode 100644 -index 000000000..cbc88f91a ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c -@@ -0,0 +1,377 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ if (!dev) { ++ pr_err("%s(): dev is NULL", __func__); ++ return; ++ } + -+/* -+ * Register-based HW access backend specific job scheduler APIs -+ */ ++ kbdev = dev_get_drvdata(dev); ++ if (!kbdev) { ++ dev_err(dev, "%s(): kbdev is NULL", __func__); ++ return; ++ } + -+#include -+#include -+#include -+#include -+#include ++ kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_LOST_EVT); ++} + -+#if !MALI_USE_CSF -+/* -+ * Hold the runpool_mutex for this ++/** ++ * kbase_arbif_init() - Kbase Arbiter interface initialisation. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Initialise Kbase Arbiter interface and assign callback functions. ++ * ++ * Return: ++ * * 0 - the interface was initialized or was not specified ++ * * in the device tree. ++ * * -EFAULT - the interface was specified but failed to initialize. ++ * * -EPROBE_DEFER - module dependencies are not yet available. + */ -+static inline bool timer_callback_should_run(struct kbase_device *kbdev) ++int kbase_arbif_init(struct kbase_device *kbdev) +{ -+ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; -+ int nr_running_ctxs; ++#if IS_ENABLED(CONFIG_OF) ++ struct arbiter_if_arb_vm_ops ops; ++ struct arbiter_if_dev *arb_if; ++ struct device_node *arbiter_if_node; ++ struct platform_device *pdev; ++ int err; + -+ lockdep_assert_held(&kbdev->js_data.runpool_mutex); ++ dev_dbg(kbdev->dev, "%s\n", __func__); + -+ /* Timer must stop if we are suspending */ -+ if (backend->suspend_timer) -+ return false; ++ arbiter_if_node = of_parse_phandle(kbdev->dev->of_node, ++ "arbiter_if", 0); ++ if (!arbiter_if_node) { ++ dev_dbg(kbdev->dev, "No arbiter_if in Device Tree\n"); ++ /* no arbiter interface defined in device tree */ ++ kbdev->arb.arb_dev = NULL; ++ kbdev->arb.arb_if = NULL; ++ return 0; ++ } + -+ /* nr_contexts_pullable is updated with the runpool_mutex. However, the -+ * locking in the caller gives us a barrier that ensures -+ * nr_contexts_pullable is up-to-date for reading -+ */ -+ nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable); ++ pdev = of_find_device_by_node(arbiter_if_node); ++ if (!pdev) { ++ dev_err(kbdev->dev, "Failed to find arbiter_if device\n"); ++ return -EPROBE_DEFER; ++ } + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ if (kbdev->js_data.softstop_always) { -+ /* Debug support for allowing soft-stop on a single context */ -+ return true; ++ if (!pdev->dev.driver || !try_module_get(pdev->dev.driver->owner)) { ++ dev_err(kbdev->dev, "arbiter_if driver not available\n"); ++ put_device(&pdev->dev); ++ return -EPROBE_DEFER; ++ } ++ kbdev->arb.arb_dev = &pdev->dev; ++ arb_if = platform_get_drvdata(pdev); ++ if (!arb_if) { ++ dev_err(kbdev->dev, "arbiter_if driver not ready\n"); ++ module_put(pdev->dev.driver->owner); ++ put_device(&pdev->dev); ++ return -EPROBE_DEFER; + } -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ + -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) { -+ /* Timeouts would have to be 4x longer (due to micro- -+ * architectural design) to support OpenCL conformance tests, so -+ * only run the timer when there's: -+ * - 2 or more CL contexts -+ * - 1 or more GLES contexts -+ * -+ * NOTE: We will treat a context that has both Compute and Non- -+ * Compute jobs will be treated as an OpenCL context (hence, we -+ * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE). -+ */ -+ { -+ int nr_compute_ctxs = -+ kbasep_js_ctx_attr_count_on_runpool(kbdev, -+ KBASEP_JS_CTX_ATTR_COMPUTE); -+ int nr_noncompute_ctxs = nr_running_ctxs - -+ nr_compute_ctxs; ++ kbdev->arb.arb_if = arb_if; ++ ops.arb_vm_gpu_stop = on_gpu_stop; ++ ops.arb_vm_gpu_granted = on_gpu_granted; ++ ops.arb_vm_gpu_lost = on_gpu_lost; ++ ops.arb_vm_max_config = on_max_config; ++ ops.arb_vm_update_freq = on_update_freq; + -+ return (bool) (nr_compute_ctxs >= 2 || -+ nr_noncompute_ctxs > 0); ++ kbdev->arb.arb_freq.arb_freq = 0; ++ kbdev->arb.arb_freq.freq_updated = false; ++ mutex_init(&kbdev->arb.arb_freq.arb_freq_lock); ++ ++ /* register kbase arbiter_if callbacks */ ++ if (arb_if->vm_ops.vm_arb_register_dev) { ++ err = arb_if->vm_ops.vm_arb_register_dev(arb_if, ++ kbdev->dev, &ops); ++ if (err) { ++ dev_err(&pdev->dev, "Failed to register with arbiter\n"); ++ module_put(pdev->dev.driver->owner); ++ put_device(&pdev->dev); ++ if (err != -EPROBE_DEFER) ++ err = -EFAULT; ++ return err; + } -+ } else { -+ /* Run the timer callback whenever you have at least 1 context -+ */ -+ return (bool) (nr_running_ctxs > 0); + } ++ ++#else /* CONFIG_OF */ ++ dev_dbg(kbdev->dev, "No arbiter without Device Tree support\n"); ++ kbdev->arb.arb_dev = NULL; ++ kbdev->arb.arb_if = NULL; ++#endif ++ return 0; +} + -+static enum hrtimer_restart timer_callback(struct hrtimer *timer) ++/** ++ * kbase_arbif_destroy() - De-init Kbase arbiter interface ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * De-initialise Kbase arbiter interface ++ */ ++void kbase_arbif_destroy(struct kbase_device *kbdev) +{ -+ unsigned long flags; -+ struct kbase_device *kbdev; -+ struct kbasep_js_device_data *js_devdata; -+ struct kbase_backend_data *backend; -+ unsigned int s; -+ bool reset_needed = false; ++ struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + -+ KBASE_DEBUG_ASSERT(timer != NULL); ++ if (arb_if && arb_if->vm_ops.vm_arb_unregister_dev) { ++ dev_dbg(kbdev->dev, "%s\n", __func__); ++ arb_if->vm_ops.vm_arb_unregister_dev(kbdev->arb.arb_if); ++ } ++ kbdev->arb.arb_if = NULL; ++ if (kbdev->arb.arb_dev) { ++ module_put(kbdev->arb.arb_dev->driver->owner); ++ put_device(kbdev->arb.arb_dev); ++ } ++ kbdev->arb.arb_dev = NULL; ++} + -+ backend = container_of(timer, struct kbase_backend_data, -+ scheduling_timer); -+ kbdev = container_of(backend, struct kbase_device, hwaccess.backend); -+ js_devdata = &kbdev->js_data; ++/** ++ * kbase_arbif_get_max_config() - Request max config info ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * call back function from arb interface to arbiter requesting max config info ++ */ ++void kbase_arbif_get_max_config(struct kbase_device *kbdev) ++{ ++ struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + -+ /* Loop through the slots */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) { -+ struct kbase_jd_atom *atom = NULL; -+ -+ if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) { -+ atom = kbase_gpu_inspect(kbdev, s, 0); -+ KBASE_DEBUG_ASSERT(atom != NULL); -+ } -+ -+ if (atom != NULL) { -+ /* The current version of the model doesn't support -+ * Soft-Stop -+ */ -+ if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) { -+ u32 ticks = atom->ticks++; -+ -+#if !defined(CONFIG_MALI_JOB_DUMP) && !defined(CONFIG_MALI_VECTOR_DUMP) -+ u32 soft_stop_ticks, hard_stop_ticks, -+ gpu_reset_ticks; -+ if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { -+ soft_stop_ticks = -+ js_devdata->soft_stop_ticks_cl; -+ hard_stop_ticks = -+ js_devdata->hard_stop_ticks_cl; -+ gpu_reset_ticks = -+ js_devdata->gpu_reset_ticks_cl; -+ } else { -+ soft_stop_ticks = -+ js_devdata->soft_stop_ticks; -+ if (kbase_is_quick_reset_enabled(kbdev)) { -+ hard_stop_ticks = 2; -+ gpu_reset_ticks = 3; -+ } else { -+ hard_stop_ticks = -+ js_devdata->hard_stop_ticks_ss; -+ gpu_reset_ticks = -+ js_devdata->gpu_reset_ticks_ss; -+ } -+ } -+ -+ /* If timeouts have been changed then ensure -+ * that atom tick count is not greater than the -+ * new soft_stop timeout. This ensures that -+ * atoms do not miss any of the timeouts due to -+ * races between this worker and the thread -+ * changing the timeouts. -+ */ -+ if (backend->timeouts_updated && -+ ticks > soft_stop_ticks) -+ ticks = atom->ticks = soft_stop_ticks; -+ -+ /* Job is Soft-Stoppable */ -+ if (ticks == soft_stop_ticks) { -+ /* Job has been scheduled for at least -+ * js_devdata->soft_stop_ticks ticks. -+ * Soft stop the slot so we can run -+ * other jobs. -+ */ -+#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS -+ int disjoint_threshold = -+ KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD; -+ u32 softstop_flags = 0u; -+ -+ dev_dbg(kbdev->dev, "Soft-stop"); -+ /* nr_user_contexts_running is updated -+ * with the runpool_mutex, but we can't -+ * take that here. -+ * -+ * However, if it's about to be -+ * increased then the new context can't -+ * run any jobs until they take the -+ * hwaccess_lock, so it's OK to observe -+ * the older value. -+ * -+ * Similarly, if it's about to be -+ * decreased, the last job from another -+ * context has already finished, so -+ * it's not too bad that we observe the -+ * older value and register a disjoint -+ * event when we try soft-stopping -+ */ -+ if (js_devdata->nr_user_contexts_running -+ >= disjoint_threshold) -+ softstop_flags |= -+ JS_COMMAND_SW_CAUSES_DISJOINT; -+ -+ kbase_job_slot_softstop_swflags(kbdev, -+ s, atom, softstop_flags); -+#endif -+ } else if (ticks == hard_stop_ticks) { -+ /* Job has been scheduled for at least -+ * js_devdata->hard_stop_ticks_ss ticks. -+ * It should have been soft-stopped by -+ * now. Hard stop the slot. -+ */ -+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS -+ int ms = -+ js_devdata->scheduling_period_ns -+ / 1000000u; -+ if (!kbase_is_quick_reset_enabled(kbdev)) -+ dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", -+ (unsigned long)ticks, -+ (unsigned long)ms); -+ kbase_job_slot_hardstop(atom->kctx, s, -+ atom); -+#endif -+ } else if (ticks == gpu_reset_ticks) { -+ /* Job has been scheduled for at least -+ * js_devdata->gpu_reset_ticks_ss ticks. -+ * It should have left the GPU by now. -+ * Signal that the GPU needs to be -+ * reset. -+ */ -+ reset_needed = true; -+ } -+#else /* !CONFIG_MALI_JOB_DUMP */ -+ /* NOTE: During CONFIG_MALI_JOB_DUMP, we use -+ * the alternate timeouts, which makes the hard- -+ * stop and GPU reset timeout much longer. We -+ * also ensure that we don't soft-stop at all. -+ */ -+ if (ticks == js_devdata->soft_stop_ticks) { -+ /* Job has been scheduled for at least -+ * js_devdata->soft_stop_ticks. We do -+ * not soft-stop during -+ * CONFIG_MALI_JOB_DUMP, however. -+ */ -+ dev_dbg(kbdev->dev, "Soft-stop"); -+ } else if (ticks == -+ js_devdata->hard_stop_ticks_dumping) { -+ /* Job has been scheduled for at least -+ * js_devdata->hard_stop_ticks_dumping -+ * ticks. Hard stop the slot. -+ */ -+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS -+ int ms = -+ js_devdata->scheduling_period_ns -+ / 1000000u; -+ dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", -+ (unsigned long)ticks, -+ (unsigned long)ms); -+ kbase_job_slot_hardstop(atom->kctx, s, -+ atom); -+#endif -+ } else if (ticks == -+ js_devdata->gpu_reset_ticks_dumping) { -+ /* Job has been scheduled for at least -+ * js_devdata->gpu_reset_ticks_dumping -+ * ticks. It should have left the GPU by -+ * now. Signal that the GPU needs to be -+ * reset. -+ */ -+ reset_needed = true; -+ } -+#endif /* !CONFIG_MALI_JOB_DUMP */ -+ } -+ } -+ } -+ if (reset_needed) { -+ if (kbase_is_quick_reset_enabled(kbdev)) -+ dev_err(kbdev->dev, "quick reset"); -+ else { -+ dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issuing GPU soft-reset to resolve."); -+ } -+ -+ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) -+ kbase_reset_gpu_locked(kbdev); ++ if (arb_if && arb_if->vm_ops.vm_arb_get_max_config) { ++ dev_dbg(kbdev->dev, "%s\n", __func__); ++ arb_if->vm_ops.vm_arb_get_max_config(arb_if); + } -+ /* the timer is re-issued if there is contexts in the run-pool */ -+ -+ if (backend->timer_running) -+ hrtimer_start(&backend->scheduling_timer, -+ HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), -+ HRTIMER_MODE_REL); -+ -+ backend->timeouts_updated = false; -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ return HRTIMER_NORESTART; +} -+#endif /* !MALI_USE_CSF */ + -+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) ++/** ++ * kbase_arbif_gpu_request() - Request GPU from ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * call back function from arb interface to arbiter requesting GPU for VM ++ */ ++void kbase_arbif_gpu_request(struct kbase_device *kbdev) +{ -+#if !MALI_USE_CSF -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; -+ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; -+ unsigned long flags; -+ -+ lockdep_assert_held(&js_devdata->runpool_mutex); -+ -+ if (!timer_callback_should_run(kbdev)) { -+ /* Take spinlock to force synchronisation with timer */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ backend->timer_running = false; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ /* From now on, return value of timer_callback_should_run() -+ * will also cause the timer to not requeue itself. Its return -+ * value cannot change, because it depends on variables updated -+ * with the runpool_mutex held, which the caller of this must -+ * also hold -+ */ -+ hrtimer_cancel(&backend->scheduling_timer); -+ } -+ -+ if (timer_callback_should_run(kbdev) && !backend->timer_running) { -+ /* Take spinlock to force synchronisation with timer */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ backend->timer_running = true; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ hrtimer_start(&backend->scheduling_timer, -+ HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), -+ HRTIMER_MODE_REL); ++ struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + -+ KBASE_KTRACE_ADD_JM(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, 0u); ++ if (arb_if && arb_if->vm_ops.vm_arb_gpu_request) { ++ dev_dbg(kbdev->dev, "%s\n", __func__); ++ KBASE_TLSTREAM_TL_ARBITER_REQUESTED(kbdev, kbdev); ++ arb_if->vm_ops.vm_arb_gpu_request(arb_if); + } -+#else /* !MALI_USE_CSF */ -+ CSTD_UNUSED(kbdev); -+#endif /* !MALI_USE_CSF */ -+} -+ -+int kbase_backend_timer_init(struct kbase_device *kbdev) -+{ -+#if !MALI_USE_CSF -+ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; -+ -+ hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, -+ HRTIMER_MODE_REL); -+ backend->scheduling_timer.function = timer_callback; -+ backend->timer_running = false; -+#else /* !MALI_USE_CSF */ -+ CSTD_UNUSED(kbdev); -+#endif /* !MALI_USE_CSF */ -+ -+ return 0; -+} -+ -+void kbase_backend_timer_term(struct kbase_device *kbdev) -+{ -+#if !MALI_USE_CSF -+ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; -+ -+ hrtimer_cancel(&backend->scheduling_timer); -+#else /* !MALI_USE_CSF */ -+ CSTD_UNUSED(kbdev); -+#endif /* !MALI_USE_CSF */ +} + -+void kbase_backend_timer_suspend(struct kbase_device *kbdev) ++/** ++ * kbase_arbif_gpu_stopped() - send GPU stopped message to the arbiter ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @gpu_required: GPU request flag ++ * ++ */ ++void kbase_arbif_gpu_stopped(struct kbase_device *kbdev, u8 gpu_required) +{ -+ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; -+ -+ backend->suspend_timer = true; ++ struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + -+ kbase_backend_ctx_count_changed(kbdev); ++ if (arb_if && arb_if->vm_ops.vm_arb_gpu_stopped) { ++ dev_dbg(kbdev->dev, "%s\n", __func__); ++ KBASE_TLSTREAM_TL_ARBITER_STOPPED(kbdev, kbdev); ++ if (gpu_required) ++ KBASE_TLSTREAM_TL_ARBITER_REQUESTED(kbdev, kbdev); ++ arb_if->vm_ops.vm_arb_gpu_stopped(arb_if, gpu_required); ++ } +} + -+void kbase_backend_timer_resume(struct kbase_device *kbdev) ++/** ++ * kbase_arbif_gpu_active() - Sends a GPU_ACTIVE message to the Arbiter ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Informs the arbiter VM is active ++ */ ++void kbase_arbif_gpu_active(struct kbase_device *kbdev) +{ -+ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; -+ -+ backend->suspend_timer = false; ++ struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + -+ kbase_backend_ctx_count_changed(kbdev); ++ if (arb_if && arb_if->vm_ops.vm_arb_gpu_active) { ++ dev_dbg(kbdev->dev, "%s\n", __func__); ++ arb_if->vm_ops.vm_arb_gpu_active(arb_if); ++ } +} + -+void kbase_backend_timeouts_changed(struct kbase_device *kbdev) ++/** ++ * kbase_arbif_gpu_idle() - Inform the arbiter that the VM has gone idle ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Informs the arbiter VM is idle ++ */ ++void kbase_arbif_gpu_idle(struct kbase_device *kbdev) +{ -+ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; ++ struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + -+ backend->timeouts_updated = true; ++ if (arb_if && arb_if->vm_ops.vm_arb_gpu_idle) { ++ dev_dbg(kbdev->dev, "vm_arb_gpu_idle\n"); ++ arb_if->vm_ops.vm_arb_gpu_idle(arb_if); ++ } +} -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_internal.h +diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h new file mode 100644 -index 000000000..4f7c371a1 +index 000000000..701ffd42f --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_internal.h -@@ -0,0 +1,72 @@ ++++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h +@@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2014-2015, 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -124387,204 +124452,116 @@ index 000000000..4f7c371a1 + * + */ + -+/* -+ * Register-based HW access backend specific job scheduler APIs ++/** ++ * DOC: Mali arbiter interface APIs to share GPU between Virtual Machines + */ + -+#ifndef _KBASE_JS_BACKEND_H_ -+#define _KBASE_JS_BACKEND_H_ ++#ifndef _MALI_KBASE_ARBIF_H_ ++#define _MALI_KBASE_ARBIF_H_ + +/** -+ * kbase_backend_timer_init() - Initialise the JS scheduling timer -+ * @kbdev: Device pointer -+ * -+ * This function should be called at driver initialisation ++ * enum kbase_arbif_evt - Internal Arbiter event. + * -+ * Return: 0 on success ++ * @KBASE_VM_GPU_INITIALIZED_EVT: KBase has finished initializing ++ * and can be stopped ++ * @KBASE_VM_GPU_STOP_EVT: Stop message received from Arbiter ++ * @KBASE_VM_GPU_GRANTED_EVT: Grant message received from Arbiter ++ * @KBASE_VM_GPU_LOST_EVT: Lost message received from Arbiter ++ * @KBASE_VM_GPU_IDLE_EVENT: KBase has transitioned into an inactive state. ++ * @KBASE_VM_REF_EVENT: KBase has transitioned into an active state. ++ * @KBASE_VM_OS_SUSPEND_EVENT: KBase is suspending ++ * @KBASE_VM_OS_RESUME_EVENT: Kbase is resuming + */ -+int kbase_backend_timer_init(struct kbase_device *kbdev); ++enum kbase_arbif_evt { ++ KBASE_VM_GPU_INITIALIZED_EVT = 1, ++ KBASE_VM_GPU_STOP_EVT, ++ KBASE_VM_GPU_GRANTED_EVT, ++ KBASE_VM_GPU_LOST_EVT, ++ KBASE_VM_GPU_IDLE_EVENT, ++ KBASE_VM_REF_EVENT, ++ KBASE_VM_OS_SUSPEND_EVENT, ++ KBASE_VM_OS_RESUME_EVENT, ++}; + +/** -+ * kbase_backend_timer_term() - Terminate the JS scheduling timer -+ * @kbdev: Device pointer ++ * kbase_arbif_init() - Initialize the arbiter interface functionality. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * This function should be called at driver termination ++ * Initialize the arbiter interface and also determines ++ * if Arbiter functionality is required. ++ * ++ * Return: ++ * * 0 - the interface was initialized or was not specified ++ * * in the device tree. ++ * * -EFAULT - the interface was specified but failed to initialize. ++ * * -EPROBE_DEFER - module dependencies are not yet available. + */ -+void kbase_backend_timer_term(struct kbase_device *kbdev); ++int kbase_arbif_init(struct kbase_device *kbdev); + +/** -+ * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling -+ * timer -+ * @kbdev: Device pointer -+ * -+ * This function should be called on suspend, after the active count has reached -+ * zero. This is required as the timer may have been started on job submission -+ * to the job scheduler, but before jobs are submitted to the GPU. ++ * kbase_arbif_destroy() - Cleanups the arbiter interface functionality. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * Caller must hold runpool_mutex. ++ * Cleans up the arbiter interface functionality and resets the reference count ++ * of the arbif module used + */ -+void kbase_backend_timer_suspend(struct kbase_device *kbdev); ++void kbase_arbif_destroy(struct kbase_device *kbdev); + +/** -+ * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS -+ * scheduling timer -+ * @kbdev: Device pointer -+ * -+ * This function should be called on resume. Note that is not guaranteed to -+ * re-start the timer, only evalute whether it should be re-started. ++ * kbase_arbif_get_max_config() - Request max config info ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * Caller must hold runpool_mutex. ++ * call back function from arb interface to arbiter requesting max config info + */ -+void kbase_backend_timer_resume(struct kbase_device *kbdev); ++void kbase_arbif_get_max_config(struct kbase_device *kbdev); + -+#endif /* _KBASE_JS_BACKEND_H_ */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c -new file mode 100644 -index 000000000..9ce50758c ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c -@@ -0,0 +1,131 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++/** ++ * kbase_arbif_gpu_request() - Send GPU request message to the arbiter ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * ++ * Sends a message to Arbiter to request GPU access. + */ -+ -+#include -+#include -+#include -+#include -+#include "mali_kbase_l2_mmu_config.h" ++void kbase_arbif_gpu_request(struct kbase_device *kbdev); + +/** -+ * struct l2_mmu_config_limit_region - L2 MMU limit field ++ * kbase_arbif_gpu_stopped() - Send GPU stopped message to the arbiter ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @gpu_required: true if GPU access is still required ++ * (Arbiter will automatically send another grant message) + * -+ * @value: The default value to load into the L2_MMU_CONFIG register -+ * @mask: The shifted mask of the field in the L2_MMU_CONFIG register -+ * @shift: The shift of where the field starts in the L2_MMU_CONFIG register -+ * This should be the same value as the smaller of the two mask -+ * values ++ * Sends a message to Arbiter to notify that the GPU has stopped. ++ * @note Once this call has been made, KBase must not attempt to access the GPU ++ * until the #KBASE_VM_GPU_GRANTED_EVT event has been received. + */ -+struct l2_mmu_config_limit_region { -+ u32 value, mask, shift; -+}; ++void kbase_arbif_gpu_stopped(struct kbase_device *kbdev, u8 gpu_required); + +/** -+ * struct l2_mmu_config_limit - L2 MMU read and write limit ++ * kbase_arbif_gpu_active() - Send a GPU active message to the arbiter ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * @product_model: The GPU for which this entry applies -+ * @read: Values for the read limit field -+ * @write: Values for the write limit field ++ * Sends a message to Arbiter to report that KBase has gone active. + */ -+struct l2_mmu_config_limit { -+ u32 product_model; -+ struct l2_mmu_config_limit_region read; -+ struct l2_mmu_config_limit_region write; -+}; ++void kbase_arbif_gpu_active(struct kbase_device *kbdev); + -+/* -+ * Zero represents no limit -+ * -+ * For LBEX TBEX TBAX TTRX and TNAX: -+ * The value represents the number of outstanding reads (6 bits) or writes (5 bits) ++/** ++ * kbase_arbif_gpu_idle() - Send a GPU idle message to the arbiter ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * For all other GPUS it is a fraction see: mali_kbase_config_defaults.h ++ * Sends a message to Arbiter to report that KBase has gone idle. + */ -+static const struct l2_mmu_config_limit limits[] = { -+ /* GPU, read, write */ -+ {GPU_ID2_PRODUCT_LBEX, -+ {0, GENMASK(10, 5), 5}, -+ {0, GENMASK(16, 12), 12} }, -+ {GPU_ID2_PRODUCT_TBEX, -+ {0, GENMASK(10, 5), 5}, -+ {0, GENMASK(16, 12), 12} }, -+ {GPU_ID2_PRODUCT_TBAX, -+ {0, GENMASK(10, 5), 5}, -+ {0, GENMASK(16, 12), 12} }, -+ {GPU_ID2_PRODUCT_TTRX, -+ {0, GENMASK(12, 7), 7}, -+ {0, GENMASK(17, 13), 13} }, -+ {GPU_ID2_PRODUCT_TNAX, -+ {0, GENMASK(12, 7), 7}, -+ {0, GENMASK(17, 13), 13} }, -+ {GPU_ID2_PRODUCT_TGOX, -+ {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, -+ {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, -+ {GPU_ID2_PRODUCT_TNOX, -+ {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, -+ {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, -+}; -+ -+int kbase_set_mmu_quirks(struct kbase_device *kbdev) -+{ -+ /* All older GPUs had 2 bits for both fields, this is a default */ -+ struct l2_mmu_config_limit limit = { -+ 0, /* Any GPU not in the limits array defined above */ -+ {KBASE_AID_32, GENMASK(25, 24), 24}, -+ {KBASE_AID_32, GENMASK(27, 26), 26} -+ }; -+ u32 product_model, gpu_id; -+ u32 mmu_config; -+ int i; -+ -+ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; -+ product_model = gpu_id & GPU_ID2_PRODUCT_MODEL; -+ -+ /* Limit the GPU bus bandwidth if the platform needs this. */ -+ for (i = 0; i < ARRAY_SIZE(limits); i++) { -+ if (product_model == limits[i].product_model) { -+ limit = limits[i]; -+ break; -+ } -+ } -+ -+ mmu_config = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)); -+ -+ if (kbase_is_gpu_removed(kbdev)) -+ return -EIO; -+ -+ mmu_config &= ~(limit.read.mask | limit.write.mask); -+ /* Can't use FIELD_PREP() macro here as the mask isn't constant */ -+ mmu_config |= (limit.read.value << limit.read.shift) | -+ (limit.write.value << limit.write.shift); -+ -+ kbdev->hw_quirks_mmu = mmu_config; -+ -+ if (kbdev->system_coherency == COHERENCY_ACE) { -+ /* Allow memory configuration disparity to be ignored, -+ * we optimize the use of shared memory and thus we -+ * expect some disparity in the memory configuration. -+ */ -+ kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY; -+ } ++void kbase_arbif_gpu_idle(struct kbase_device *kbdev); + -+ return 0; -+} -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.h ++#endif /* _MALI_KBASE_ARBIF_H_ */ +diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_defs.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_defs.h new file mode 100644 -index 000000000..07014ad36 +index 000000000..1c4901b3b --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.h -@@ -0,0 +1,36 @@ ++++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_defs.h +@@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -124602,31 +124579,71 @@ index 000000000..07014ad36 + * + */ + -+#ifndef _KBASE_L2_MMU_CONFIG_H_ -+#define _KBASE_L2_MMU_CONFIG_H_ +/** -+ * kbase_set_mmu_quirks - Set the hw_quirks_mmu field of kbdev -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Use this function to initialise the hw_quirks_mmu field, for instance to set -+ * the MAX_READS and MAX_WRITES to sane defaults for each GPU. -+ * -+ * Return: Zero for succeess or a Linux error code ++ * DOC: Mali structures define to support arbitration feature + */ -+int kbase_set_mmu_quirks(struct kbase_device *kbdev); + -+#endif /* _KBASE_L2_MMU_CONFIG_H */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c ++#ifndef _MALI_KBASE_ARBITER_DEFS_H_ ++#define _MALI_KBASE_ARBITER_DEFS_H_ ++ ++#include "mali_kbase_arbiter_pm.h" ++ ++/** ++ * struct kbase_arbiter_vm_state - Struct representing the state and containing the ++ * data of pm work ++ * @kbdev: Pointer to kbase device structure (must be a valid pointer) ++ * @vm_state_lock: The lock protecting the VM state when arbiter is used. ++ * This lock must also be held whenever the VM state is being ++ * transitioned ++ * @vm_state_wait: Wait queue set when GPU is granted ++ * @vm_state: Current state of VM ++ * @vm_arb_wq: Work queue for resuming or stopping work on the GPU for use ++ * with the Arbiter ++ * @vm_suspend_work: Work item for vm_arb_wq to stop current work on GPU ++ * @vm_resume_work: Work item for vm_arb_wq to resume current work on GPU ++ * @vm_arb_starting: Work queue resume in progress ++ * @vm_arb_stopping: Work queue suspend in progress ++ * @interrupts_installed: Flag set when interrupts are installed ++ * @vm_request_timer: Timer to monitor GPU request ++ */ ++struct kbase_arbiter_vm_state { ++ struct kbase_device *kbdev; ++ struct mutex vm_state_lock; ++ wait_queue_head_t vm_state_wait; ++ enum kbase_vm_state vm_state; ++ struct workqueue_struct *vm_arb_wq; ++ struct work_struct vm_suspend_work; ++ struct work_struct vm_resume_work; ++ bool vm_arb_starting; ++ bool vm_arb_stopping; ++ bool interrupts_installed; ++ struct hrtimer vm_request_timer; ++}; ++ ++/** ++ * struct kbase_arbiter_device - Representing an instance of arbiter device, ++ * allocated from the probe method of Mali driver ++ * @arb_if: Pointer to the arbiter interface device ++ * @arb_dev: Pointer to the arbiter device ++ * @arb_freq: GPU clock frequency retrieved from arbiter. ++ */ ++struct kbase_arbiter_device { ++ struct arbiter_if_dev *arb_if; ++ struct device *arb_dev; ++ struct kbase_arbiter_freq arb_freq; ++}; ++ ++#endif /* _MALI_KBASE_ARBITER_DEFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c new file mode 100644 -index 000000000..6db703176 +index 000000000..667552c56 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c -@@ -0,0 +1,2229 @@ ++++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c +@@ -0,0 +1,1138 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -124644,2224 +124661,1936 @@ index 000000000..6db703176 + * + */ + -+/* NOTES: -+ * - A default GPU can be compiled in during the build, by defining -+ * CONFIG_MALI_NO_MALI_DEFAULT_GPU. SCons sets this, which means that -+ * insmod'ing mali_kbase.ko with no arguments after a build with "scons -+ * gpu=tXYZ" will yield the expected GPU ID for tXYZ. This can always be -+ * overridden by passing the 'no_mali_gpu' argument to insmod. -+ * -+ * - if CONFIG_MALI_BIFROST_ERROR_INJECT is defined the error injection system is -+ * activated. ++/** ++ * DOC: Mali arbiter power manager state machine and APIs + */ + -+/* Implementation of failure injection system: -+ * -+ * Error conditions are generated by gpu_generate_error(). -+ * According to CONFIG_MALI_BIFROST_ERROR_INJECT definition gpu_generate_error() either -+ * generates an error HW condition randomly (CONFIG_MALI_ERROR_INJECT_RANDOM) or -+ * checks if there is (in error_track_list) an error configuration to be set for -+ * the current job chain (CONFIG_MALI_ERROR_INJECT_RANDOM not defined). -+ * Each error condition will trigger a specific "state" for a certain set of -+ * registers as per Midgard Architecture Specifications doc. -+ * -+ * According to Midgard Architecture Specifications doc the following registers -+ * are always affected by error conditions: -+ * -+ * JOB Exception: -+ * JOB_IRQ_RAWSTAT -+ * JOB STATUS AREA -+ * -+ * MMU Exception: -+ * MMU_IRQ_RAWSTAT -+ * AS_FAULTSTATUS -+ * AS_FAULTADDRESS -+ * -+ * GPU Exception: -+ * GPU_IRQ_RAWSTAT -+ * GPU_FAULTSTATUS -+ * GPU_FAULTADDRESS -+ * -+ * For further clarification on the model behaviour upon specific error -+ * conditions the user may refer to the Midgard Architecture Specification -+ * document -+ */ +#include -+#include -+#include -+#include -+#include -+ -+#if MALI_USE_CSF -+#include ++#include ++#include ++#include ++#include ++#include + -+/* Index of the last value register for each type of core, with the 1st value -+ * register being at index 0. ++/* A dmesg warning will occur if the GPU is not granted ++ * after the following time (in milliseconds) has ellapsed. + */ -+#define IPA_CTL_MAX_VAL_CNT_IDX (KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS - 1) -+ -+/* Array for storing the value of SELECT register for each type of core */ -+static u64 ipa_ctl_select_config[KBASE_IPA_CORE_TYPE_NUM]; -+static bool ipa_control_timer_enabled; -+#endif ++#define GPU_REQUEST_TIMEOUT 1000 ++#define KHZ_TO_HZ 1000 + -+#define LO_MASK(M) ((M) & 0xFFFFFFFF) -+#if !MALI_USE_CSF -+#define HI_MASK(M) ((M) & 0xFFFFFFFF00000000) -+#endif ++#define MAX_L2_SLICES_MASK 0xFF + -+/* Construct a value for the THREAD_FEATURES register, *except* the two most -+ * significant bits, which are set to IMPLEMENTATION_MODEL in -+ * midgard_model_read_reg(). ++/* Maximum time in ms, before deferring probe incase ++ * GPU_GRANTED message is not received + */ -+#if MALI_USE_CSF -+#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ -+ ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24)) -+#else -+#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ -+ ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24)) -+#endif ++static int gpu_req_timeout = 1; ++module_param(gpu_req_timeout, int, 0644); ++MODULE_PARM_DESC(gpu_req_timeout, ++ "On a virtualized platform, if the GPU is not granted within this time(ms) kbase will defer the probe"); + -+struct error_status_t hw_error_status; ++static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev); ++static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( ++ struct kbase_device *kbdev); + +/** -+ * struct control_reg_values_t - control register values specific to the GPU being 'emulated' -+ * @name: GPU name -+ * @gpu_id: GPU ID to report -+ * @as_present: Bitmap of address spaces present -+ * @thread_max_threads: Maximum number of threads per core -+ * @thread_max_workgroup_size: Maximum number of threads per workgroup -+ * @thread_max_barrier_size: Maximum number of threads per barrier -+ * @thread_features: Thread features, NOT INCLUDING the 2 -+ * most-significant bits, which are always set to -+ * IMPLEMENTATION_MODEL. -+ * @core_features: Core features -+ * @tiler_features: Tiler features -+ * @mmu_features: MMU features -+ * @gpu_features_lo: GPU features (low) -+ * @gpu_features_hi: GPU features (high) -+ * @shader_present: Available shader bitmap -+ * @stack_present: Core stack present bitmap ++ * kbase_arbiter_pm_vm_state_str() - Helper function to get string ++ * for kbase VM state.(debug) ++ * @state: kbase VM state + * ++ * Return: string representation of Kbase_vm_state + */ -+struct control_reg_values_t { -+ const char *name; -+ u32 gpu_id; -+ u32 as_present; -+ u32 thread_max_threads; -+ u32 thread_max_workgroup_size; -+ u32 thread_max_barrier_size; -+ u32 thread_features; -+ u32 core_features; -+ u32 tiler_features; -+ u32 mmu_features; -+ u32 gpu_features_lo; -+ u32 gpu_features_hi; -+ u32 shader_present; -+ u32 stack_present; -+}; -+ -+struct job_slot { -+ int job_active; -+ int job_queued; -+ int job_complete_irq_asserted; -+ int job_irq_mask; -+ int job_disabled; -+}; -+ -+struct dummy_model_t { -+ int reset_completed; -+ int reset_completed_mask; -+#if !MALI_USE_CSF -+ int prfcnt_sample_completed; -+#endif /* !MALI_USE_CSF */ -+ int power_changed_mask; /* 2bits: _ALL,_SINGLE */ -+ int power_changed; /* 1bit */ -+ bool clean_caches_completed; -+ bool clean_caches_completed_irq_enabled; -+#if MALI_USE_CSF -+ bool flush_pa_range_completed; -+ bool flush_pa_range_completed_irq_enabled; -+#endif -+ int power_on; /* 6bits: SHADER[4],TILER,L2 */ -+ u32 stack_power_on_lo; -+ u32 coherency_enable; -+ unsigned int job_irq_js_state; -+ struct job_slot slots[NUM_SLOTS]; -+ const struct control_reg_values_t *control_reg_values; -+ u32 l2_config; -+ void *data; -+}; ++static inline const char *kbase_arbiter_pm_vm_state_str( ++ enum kbase_vm_state state) ++{ ++ switch (state) { ++ case KBASE_VM_STATE_INITIALIZING: ++ return "KBASE_VM_STATE_INITIALIZING"; ++ case KBASE_VM_STATE_INITIALIZING_WITH_GPU: ++ return "KBASE_VM_STATE_INITIALIZING_WITH_GPU"; ++ case KBASE_VM_STATE_SUSPENDED: ++ return "KBASE_VM_STATE_SUSPENDED"; ++ case KBASE_VM_STATE_STOPPED: ++ return "KBASE_VM_STATE_STOPPED"; ++ case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: ++ return "KBASE_VM_STATE_STOPPED_GPU_REQUESTED"; ++ case KBASE_VM_STATE_STARTING: ++ return "KBASE_VM_STATE_STARTING"; ++ case KBASE_VM_STATE_IDLE: ++ return "KBASE_VM_STATE_IDLE"; ++ case KBASE_VM_STATE_ACTIVE: ++ return "KBASE_VM_STATE_ACTIVE"; ++ case KBASE_VM_STATE_STOPPING_IDLE: ++ return "KBASE_VM_STATE_STOPPING_IDLE"; ++ case KBASE_VM_STATE_STOPPING_ACTIVE: ++ return "KBASE_VM_STATE_STOPPING_ACTIVE"; ++ case KBASE_VM_STATE_SUSPEND_PENDING: ++ return "KBASE_VM_STATE_SUSPEND_PENDING"; ++ case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: ++ return "KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT"; ++ default: ++ KBASE_DEBUG_ASSERT(false); ++ return "[UnknownState]"; ++ } ++} + -+/* Array associating GPU names with control register values. The first -+ * one is used in the case of no match. ++/** ++ * kbase_arbiter_pm_vm_event_str() - Helper function to get string ++ * for kbase VM event.(debug) ++ * @evt: kbase VM state ++ * ++ * Return: String representation of Kbase_arbif_event + */ -+static const struct control_reg_values_t all_control_reg_values[] = { -+ { -+ .name = "tMIx", -+ .gpu_id = GPU_ID2_MAKE(6, 0, 10, 0, 0, 1, 0), -+ .as_present = 0xFF, -+ .thread_max_threads = 0x180, -+ .thread_max_workgroup_size = 0x180, -+ .thread_max_barrier_size = 0x180, -+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), -+ .tiler_features = 0x809, -+ .mmu_features = 0x2830, -+ .gpu_features_lo = 0, -+ .gpu_features_hi = 0, -+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, -+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, -+ }, -+ { -+ .name = "tHEx", -+ .gpu_id = GPU_ID2_MAKE(6, 2, 0, 1, 0, 3, 0), -+ .as_present = 0xFF, -+ .thread_max_threads = 0x180, -+ .thread_max_workgroup_size = 0x180, -+ .thread_max_barrier_size = 0x180, -+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), -+ .tiler_features = 0x809, -+ .mmu_features = 0x2830, -+ .gpu_features_lo = 0, -+ .gpu_features_hi = 0, -+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, -+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, -+ }, -+ { -+ .name = "tSIx", -+ .gpu_id = GPU_ID2_MAKE(7, 0, 0, 0, 1, 1, 0), -+ .as_present = 0xFF, -+ .thread_max_threads = 0x300, -+ .thread_max_workgroup_size = 0x180, -+ .thread_max_barrier_size = 0x180, -+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), -+ .tiler_features = 0x209, -+ .mmu_features = 0x2821, -+ .gpu_features_lo = 0, -+ .gpu_features_hi = 0, -+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, -+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, -+ }, -+ { -+ .name = "tDVx", -+ .gpu_id = GPU_ID2_MAKE(7, 0, 0, 3, 0, 0, 0), -+ .as_present = 0xFF, -+ .thread_max_threads = 0x300, -+ .thread_max_workgroup_size = 0x180, -+ .thread_max_barrier_size = 0x180, -+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), -+ .tiler_features = 0x209, -+ .mmu_features = 0x2821, -+ .gpu_features_lo = 0, -+ .gpu_features_hi = 0, -+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, -+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, -+ }, -+ { -+ .name = "tNOx", -+ .gpu_id = GPU_ID2_MAKE(7, 2, 1, 1, 0, 0, 0), -+ .as_present = 0xFF, -+ .thread_max_threads = 0x180, -+ .thread_max_workgroup_size = 0x180, -+ .thread_max_barrier_size = 0x180, -+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), -+ .tiler_features = 0x809, -+ .mmu_features = 0x2830, -+ .gpu_features_lo = 0, -+ .gpu_features_hi = 0, -+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, -+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, -+ }, -+ { -+ .name = "tGOx_r0p0", -+ .gpu_id = GPU_ID2_MAKE(7, 2, 2, 2, 0, 0, 0), -+ .as_present = 0xFF, -+ .thread_max_threads = 0x180, -+ .thread_max_workgroup_size = 0x180, -+ .thread_max_barrier_size = 0x180, -+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), -+ .tiler_features = 0x809, -+ .mmu_features = 0x2830, -+ .gpu_features_lo = 0, -+ .gpu_features_hi = 0, -+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, -+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, -+ }, -+ { -+ .name = "tGOx_r1p0", -+ .gpu_id = GPU_ID2_MAKE(7, 4, 0, 2, 1, 0, 0), -+ .as_present = 0xFF, -+ .thread_max_threads = 0x180, -+ .thread_max_workgroup_size = 0x180, -+ .thread_max_barrier_size = 0x180, -+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), -+ .core_features = 0x2, -+ .tiler_features = 0x209, -+ .mmu_features = 0x2823, -+ .gpu_features_lo = 0, -+ .gpu_features_hi = 0, -+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, -+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, -+ }, -+ { -+ .name = "tTRx", -+ .gpu_id = GPU_ID2_MAKE(9, 0, 8, 0, 0, 0, 0), -+ .as_present = 0xFF, -+ .thread_max_threads = 0x180, -+ .thread_max_workgroup_size = 0x180, -+ .thread_max_barrier_size = 0x180, -+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), -+ .tiler_features = 0x809, -+ .mmu_features = 0x2830, -+ .gpu_features_lo = 0, -+ .gpu_features_hi = 0, -+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, -+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, -+ }, -+ { -+ .name = "tNAx", -+ .gpu_id = GPU_ID2_MAKE(9, 0, 8, 1, 0, 0, 0), -+ .as_present = 0xFF, -+ .thread_max_threads = 0x180, -+ .thread_max_workgroup_size = 0x180, -+ .thread_max_barrier_size = 0x180, -+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), -+ .tiler_features = 0x809, -+ .mmu_features = 0x2830, -+ .gpu_features_lo = 0, -+ .gpu_features_hi = 0, -+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, -+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, -+ }, -+ { -+ .name = "tBEx", -+ .gpu_id = GPU_ID2_MAKE(9, 2, 0, 2, 0, 0, 0), -+ .as_present = 0xFF, -+ .thread_max_threads = 0x180, -+ .thread_max_workgroup_size = 0x180, -+ .thread_max_barrier_size = 0x180, -+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), -+ .tiler_features = 0x809, -+ .mmu_features = 0x2830, -+ .gpu_features_lo = 0, -+ .gpu_features_hi = 0, -+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TBEX, -+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, -+ }, -+ { -+ .name = "tBAx", -+ .gpu_id = GPU_ID2_MAKE(9, 14, 4, 5, 0, 0, 0), -+ .as_present = 0xFF, -+ .thread_max_threads = 0x180, -+ .thread_max_workgroup_size = 0x180, -+ .thread_max_barrier_size = 0x180, -+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), -+ .tiler_features = 0x809, -+ .mmu_features = 0x2830, -+ .gpu_features_lo = 0, -+ .gpu_features_hi = 0, -+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, -+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, -+ }, -+ { -+ .name = "tODx", -+ .gpu_id = GPU_ID2_MAKE(10, 8, 0, 2, 0, 0, 0), -+ .as_present = 0xFF, -+ .thread_max_threads = 0x180, -+ .thread_max_workgroup_size = 0x180, -+ .thread_max_barrier_size = 0x180, -+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), -+ .tiler_features = 0x809, -+ .mmu_features = 0x2830, -+ .gpu_features_lo = 0, -+ .gpu_features_hi = 0, -+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TODX, -+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, -+ }, -+ { -+ .name = "tGRx", -+ .gpu_id = GPU_ID2_MAKE(10, 10, 0, 3, 0, 0, 0), -+ .as_present = 0xFF, -+ .thread_max_threads = 0x180, -+ .thread_max_workgroup_size = 0x180, -+ .thread_max_barrier_size = 0x180, -+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), -+ .core_features = 0x0, /* core_1e16fma2tex */ -+ .tiler_features = 0x809, -+ .mmu_features = 0x2830, -+ .gpu_features_lo = 0, -+ .gpu_features_hi = 0, -+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, -+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, -+ }, -+ { -+ .name = "tVAx", -+ .gpu_id = GPU_ID2_MAKE(10, 12, 0, 4, 0, 0, 0), -+ .as_present = 0xFF, -+ .thread_max_threads = 0x180, -+ .thread_max_workgroup_size = 0x180, -+ .thread_max_barrier_size = 0x180, -+ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), -+ .core_features = 0x0, /* core_1e16fma2tex */ -+ .tiler_features = 0x809, -+ .mmu_features = 0x2830, -+ .gpu_features_lo = 0, -+ .gpu_features_hi = 0, -+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, -+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, -+ }, -+ { -+ .name = "tTUx", -+ .gpu_id = GPU_ID2_MAKE(11, 8, 5, 2, 0, 0, 0), -+ .as_present = 0xFF, -+ .thread_max_threads = 0x800, -+ .thread_max_workgroup_size = 0x400, -+ .thread_max_barrier_size = 0x400, -+ .thread_features = THREAD_FEATURES_PARTIAL(0x10000, 4, 0), -+ .core_features = 0x0, /* core_1e32fma2tex */ -+ .tiler_features = 0x809, -+ .mmu_features = 0x2830, -+ .gpu_features_lo = 0xf, -+ .gpu_features_hi = 0, -+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX, -+ .stack_present = 0xF, -+ }, -+ { -+ .name = "tTIx", -+ .gpu_id = GPU_ID2_MAKE(12, 8, 1, 0, 0, 0, 0), -+ .as_present = 0xFF, -+ .thread_max_threads = 0x800, -+ .thread_max_workgroup_size = 0x400, -+ .thread_max_barrier_size = 0x400, -+ .thread_features = THREAD_FEATURES_PARTIAL(0x10000, 16, 0), -+ .core_features = 0x1, /* core_1e64fma4tex */ -+ .tiler_features = 0x809, -+ .mmu_features = 0x2830, -+ .gpu_features_lo = 0xf, -+ .gpu_features_hi = 0, -+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX, -+ .stack_present = 0xF, -+ }, -+}; ++static inline const char *kbase_arbiter_pm_vm_event_str( ++ enum kbase_arbif_evt evt) ++{ ++ switch (evt) { ++ case KBASE_VM_GPU_INITIALIZED_EVT: ++ return "KBASE_VM_GPU_INITIALIZED_EVT"; ++ case KBASE_VM_GPU_STOP_EVT: ++ return "KBASE_VM_GPU_STOP_EVT"; ++ case KBASE_VM_GPU_GRANTED_EVT: ++ return "KBASE_VM_GPU_GRANTED_EVT"; ++ case KBASE_VM_GPU_LOST_EVT: ++ return "KBASE_VM_GPU_LOST_EVT"; ++ case KBASE_VM_OS_SUSPEND_EVENT: ++ return "KBASE_VM_OS_SUSPEND_EVENT"; ++ case KBASE_VM_OS_RESUME_EVENT: ++ return "KBASE_VM_OS_RESUME_EVENT"; ++ case KBASE_VM_GPU_IDLE_EVENT: ++ return "KBASE_VM_GPU_IDLE_EVENT"; ++ case KBASE_VM_REF_EVENT: ++ return "KBASE_VM_REF_EVENT"; ++ default: ++ KBASE_DEBUG_ASSERT(false); ++ return "[UnknownEvent]"; ++ } ++} + -+static struct { -+ spinlock_t access_lock; -+#if !MALI_USE_CSF -+ unsigned long prfcnt_base; -+#endif /* !MALI_USE_CSF */ -+ u32 *prfcnt_base_cpu; ++/** ++ * kbase_arbiter_pm_vm_set_state() - Sets new kbase_arbiter_vm_state ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @new_state: kbase VM new state ++ * ++ * This function sets the new state for the VM ++ */ ++static void kbase_arbiter_pm_vm_set_state(struct kbase_device *kbdev, ++ enum kbase_vm_state new_state) ++{ ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + -+ u32 time; ++ dev_dbg(kbdev->dev, "VM set_state %s -> %s", ++ kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state), ++ kbase_arbiter_pm_vm_state_str(new_state)); + -+ struct gpu_model_prfcnt_en prfcnt_en; ++ lockdep_assert_held(&arb_vm_state->vm_state_lock); ++ arb_vm_state->vm_state = new_state; ++ if (new_state != KBASE_VM_STATE_INITIALIZING_WITH_GPU && ++ new_state != KBASE_VM_STATE_INITIALIZING) ++ KBASE_KTRACE_ADD(kbdev, ARB_VM_STATE, NULL, new_state); ++ wake_up(&arb_vm_state->vm_state_wait); ++} + -+ u64 l2_present; -+ u64 shader_present; ++/** ++ * kbase_arbiter_pm_suspend_wq() - suspend work queue of the driver. ++ * @data: work queue ++ * ++ * Suspends work queue of the driver, when VM is in SUSPEND_PENDING or ++ * STOPPING_IDLE or STOPPING_ACTIVE state ++ */ ++static void kbase_arbiter_pm_suspend_wq(struct work_struct *data) ++{ ++ struct kbase_arbiter_vm_state *arb_vm_state = container_of(data, ++ struct kbase_arbiter_vm_state, ++ vm_suspend_work); ++ struct kbase_device *kbdev = arb_vm_state->kbdev; + -+#if !MALI_USE_CSF -+ u64 jm_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; -+#else -+ u64 cshw_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; -+#endif /* !MALI_USE_CSF */ -+ u64 tiler_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; -+ u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS * -+ KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; -+ u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES * -+ KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; -+} performance_counters; ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ dev_dbg(kbdev->dev, ">%s\n", __func__); ++ if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE || ++ arb_vm_state->vm_state == ++ KBASE_VM_STATE_STOPPING_ACTIVE || ++ arb_vm_state->vm_state == ++ KBASE_VM_STATE_SUSPEND_PENDING) { ++ mutex_unlock(&arb_vm_state->vm_state_lock); ++ dev_dbg(kbdev->dev, ">kbase_pm_driver_suspend\n"); ++ kbase_pm_driver_suspend(kbdev); ++ dev_dbg(kbdev->dev, "vm_state_lock); ++ } ++ mutex_unlock(&arb_vm_state->vm_state_lock); ++ dev_dbg(kbdev->dev, "<%s\n", __func__); ++} + -+static u32 get_implementation_register(u32 reg, -+ const struct control_reg_values_t *const control_reg_values) ++/** ++ * kbase_arbiter_pm_resume_wq() -Kbase resume work queue. ++ * @data: work item ++ * ++ * Resume work queue of the driver when VM is in STARTING state, ++ * else if its in STOPPING_ACTIVE will request a stop event. ++ */ ++static void kbase_arbiter_pm_resume_wq(struct work_struct *data) +{ -+ switch (reg) { -+ case GPU_CONTROL_REG(SHADER_PRESENT_LO): -+ return LO_MASK(control_reg_values->shader_present); -+ case GPU_CONTROL_REG(TILER_PRESENT_LO): -+ return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT); -+ case GPU_CONTROL_REG(L2_PRESENT_LO): -+ return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT); -+ case GPU_CONTROL_REG(STACK_PRESENT_LO): -+ return LO_MASK(control_reg_values->stack_present); ++ struct kbase_arbiter_vm_state *arb_vm_state = container_of(data, ++ struct kbase_arbiter_vm_state, ++ vm_resume_work); ++ struct kbase_device *kbdev = arb_vm_state->kbdev; + -+ case GPU_CONTROL_REG(SHADER_PRESENT_HI): -+ case GPU_CONTROL_REG(TILER_PRESENT_HI): -+ case GPU_CONTROL_REG(L2_PRESENT_HI): -+ case GPU_CONTROL_REG(STACK_PRESENT_HI): -+ /* *** FALLTHROUGH *** */ -+ default: -+ return 0; ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ dev_dbg(kbdev->dev, ">%s\n", __func__); ++ arb_vm_state->vm_arb_starting = true; ++ if (arb_vm_state->vm_state == KBASE_VM_STATE_STARTING) { ++ mutex_unlock(&arb_vm_state->vm_state_lock); ++ dev_dbg(kbdev->dev, ">kbase_pm_driver_resume\n"); ++ kbase_pm_driver_resume(kbdev, true); ++ dev_dbg(kbdev->dev, "vm_state_lock); ++ } else if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_ACTIVE) { ++ kbase_arbiter_pm_vm_stopped(kbdev); + } ++ arb_vm_state->vm_arb_starting = false; ++ mutex_unlock(&arb_vm_state->vm_state_lock); ++ KBASE_TLSTREAM_TL_ARBITER_STARTED(kbdev, kbdev); ++ dev_dbg(kbdev->dev, "<%s\n", __func__); +} + -+void gpu_device_set_data(void *model, void *data) ++/** ++ * request_timer_callback() - Issue warning on request timer expiration ++ * @timer: Request hr timer data ++ * ++ * Called when the Arbiter takes too long to grant the GPU after a ++ * request has been made. Issues a warning in dmesg. ++ * ++ * Return: Always returns HRTIMER_NORESTART ++ */ ++static enum hrtimer_restart request_timer_callback(struct hrtimer *timer) +{ -+ struct dummy_model_t *dummy = (struct dummy_model_t *)model; ++ struct kbase_arbiter_vm_state *arb_vm_state = container_of(timer, ++ struct kbase_arbiter_vm_state, vm_request_timer); + -+ dummy->data = data; ++ KBASE_DEBUG_ASSERT(arb_vm_state); ++ KBASE_DEBUG_ASSERT(arb_vm_state->kbdev); ++ ++ dev_warn(arb_vm_state->kbdev->dev, ++ "Still waiting for GPU to be granted from Arbiter after %d ms\n", ++ GPU_REQUEST_TIMEOUT); ++ return HRTIMER_NORESTART; +} + -+void *gpu_device_get_data(void *model) ++/** ++ * start_request_timer() - Start a timer after requesting GPU ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Start a timer to track when kbase is waiting for the GPU from the ++ * Arbiter. If the timer expires before GPU is granted, a warning in ++ * dmesg will be issued. ++ */ ++static void start_request_timer(struct kbase_device *kbdev) +{ -+ struct dummy_model_t *dummy = (struct dummy_model_t *)model; ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + -+ return dummy->data; ++ hrtimer_start(&arb_vm_state->vm_request_timer, ++ HR_TIMER_DELAY_MSEC(GPU_REQUEST_TIMEOUT), ++ HRTIMER_MODE_REL); +} + -+#define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1 -+ -+/* SCons should pass in a default GPU, but other ways of building (e.g. -+ * in-tree) won't, so define one here in case. ++/** ++ * cancel_request_timer() - Stop the request timer ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Stops the request timer once GPU has been granted. Safe to call ++ * even if timer is no longer running. + */ -+#ifndef CONFIG_MALI_NO_MALI_DEFAULT_GPU -+#define CONFIG_MALI_NO_MALI_DEFAULT_GPU "tMIx" -+#endif ++static void cancel_request_timer(struct kbase_device *kbdev) ++{ ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + -+static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU; -+module_param(no_mali_gpu, charp, 0000); -+MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as"); ++ hrtimer_cancel(&arb_vm_state->vm_request_timer); ++} + -+#if MALI_USE_CSF -+static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, -+ u32 cnt_idx, bool is_low_word) ++/** ++ * kbase_arbiter_pm_early_init() - Initialize arbiter for VM ++ * Paravirtualized use. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Initialize the arbiter and other required resources during the runtime ++ * and request the GPU for the VM for the first time. ++ * ++ * Return: 0 if success, or a Linux error code ++ */ ++int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) +{ -+ u64 *counters_data; -+ u32 core_count = 0; -+ u32 event_index; -+ u64 value = 0; -+ u32 core; -+ unsigned long flags; ++ int err; ++ struct kbase_arbiter_vm_state *arb_vm_state = NULL; + -+ if (WARN_ON(core_type >= KBASE_IPA_CORE_TYPE_NUM)) -+ return 0; ++ arb_vm_state = kmalloc(sizeof(struct kbase_arbiter_vm_state), ++ GFP_KERNEL); ++ if (arb_vm_state == NULL) ++ return -ENOMEM; + -+ if (WARN_ON(cnt_idx >= KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS)) -+ return 0; ++ arb_vm_state->kbdev = kbdev; ++ arb_vm_state->vm_state = KBASE_VM_STATE_INITIALIZING; + -+ event_index = -+ (ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF; ++ mutex_init(&arb_vm_state->vm_state_lock); ++ init_waitqueue_head(&arb_vm_state->vm_state_wait); ++ arb_vm_state->vm_arb_wq = alloc_ordered_workqueue("kbase_vm_arb_wq", ++ WQ_HIGHPRI); ++ if (!arb_vm_state->vm_arb_wq) { ++ dev_err(kbdev->dev, "Failed to allocate vm_arb workqueue\n"); ++ kfree(arb_vm_state); ++ return -ENOMEM; ++ } ++ INIT_WORK(&arb_vm_state->vm_suspend_work, kbase_arbiter_pm_suspend_wq); ++ INIT_WORK(&arb_vm_state->vm_resume_work, kbase_arbiter_pm_resume_wq); ++ arb_vm_state->vm_arb_starting = false; ++ atomic_set(&kbdev->pm.gpu_users_waiting, 0); ++ hrtimer_init(&arb_vm_state->vm_request_timer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_REL); ++ arb_vm_state->vm_request_timer.function = ++ request_timer_callback; ++ kbdev->pm.arb_vm_state = arb_vm_state; + -+ /* Currently only primary counter blocks are supported */ -+ if (WARN_ON(event_index >= -+ (KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS + KBASE_DUMMY_MODEL_COUNTER_PER_CORE))) -+ return 0; ++ err = kbase_arbif_init(kbdev); ++ if (err) { ++ dev_err(kbdev->dev, "Failed to initialise arbif module\n"); ++ goto arbif_init_fail; ++ } + -+ /* The actual events start index 4 onwards. Spec also says PRFCNT_EN, -+ * TIMESTAMP_LO or TIMESTAMP_HI pseudo-counters do not make sense for -+ * IPA counters. If selected, the value returned for them will be zero. -+ */ -+ if (WARN_ON(event_index < KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS)) -+ return 0; ++ if (kbdev->arb.arb_if) { ++ kbase_arbif_gpu_request(kbdev); ++ dev_dbg(kbdev->dev, "Waiting for initial GPU assignment...\n"); + -+ event_index -= KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS; ++ err = wait_event_timeout(arb_vm_state->vm_state_wait, ++ arb_vm_state->vm_state == ++ KBASE_VM_STATE_INITIALIZING_WITH_GPU, ++ msecs_to_jiffies(gpu_req_timeout)); + -+ spin_lock_irqsave(&performance_counters.access_lock, flags); ++ if (!err) { ++ dev_dbg(kbdev->dev, ++ "Kbase probe Deferred after waiting %d ms to receive GPU_GRANT\n", ++ gpu_req_timeout); + -+ switch (core_type) { -+ case KBASE_IPA_CORE_TYPE_CSHW: -+ core_count = 1; -+ counters_data = performance_counters.cshw_counters; -+ break; -+ case KBASE_IPA_CORE_TYPE_MEMSYS: -+ core_count = hweight64(performance_counters.l2_present); -+ counters_data = performance_counters.l2_counters; -+ break; -+ case KBASE_IPA_CORE_TYPE_TILER: -+ core_count = 1; -+ counters_data = performance_counters.tiler_counters; -+ break; -+ case KBASE_IPA_CORE_TYPE_SHADER: -+ core_count = hweight64(performance_counters.shader_present); -+ counters_data = performance_counters.shader_counters; -+ break; -+ default: -+ WARN(1, "Invalid core_type %d\n", core_type); -+ break; -+ } ++ err = -ENODEV; ++ goto arbif_timeout; ++ } + -+ for (core = 0; core < core_count; core++) { -+ value += counters_data[event_index]; -+ event_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE; ++ dev_dbg(kbdev->dev, ++ "Waiting for initial GPU assignment - done\n"); + } ++ return 0; + -+ spin_unlock_irqrestore(&performance_counters.access_lock, flags); ++arbif_timeout: ++ kbase_arbiter_pm_early_term(kbdev); ++ return err; + -+ if (is_low_word) -+ return (value & U32_MAX); -+ else -+ return (value >> 32); ++arbif_init_fail: ++ destroy_workqueue(arb_vm_state->vm_arb_wq); ++ kfree(arb_vm_state); ++ kbdev->pm.arb_vm_state = NULL; ++ return err; +} -+#endif /* MALI_USE_CSF */ + +/** -+ * gpu_model_clear_prfcnt_values_nolock - Clear performance counter values ++ * kbase_arbiter_pm_early_term() - Shutdown arbiter and free resources ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * Sets all performance counter values to zero. The performance counter access -+ * lock must be held when calling this function. ++ * Clean up all the resources + */ -+static void gpu_model_clear_prfcnt_values_nolock(void) ++void kbase_arbiter_pm_early_term(struct kbase_device *kbdev) +{ -+ lockdep_assert_held(&performance_counters.access_lock); -+#if !MALI_USE_CSF -+ memset(performance_counters.jm_counters, 0, sizeof(performance_counters.jm_counters)); -+#else -+ memset(performance_counters.cshw_counters, 0, sizeof(performance_counters.cshw_counters)); -+#endif /* !MALI_USE_CSF */ -+ memset(performance_counters.tiler_counters, 0, sizeof(performance_counters.tiler_counters)); -+ memset(performance_counters.l2_counters, 0, sizeof(performance_counters.l2_counters)); -+ memset(performance_counters.shader_counters, 0, -+ sizeof(performance_counters.shader_counters)); ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ ++ cancel_request_timer(kbdev); ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ if (arb_vm_state->vm_state > KBASE_VM_STATE_STOPPED_GPU_REQUESTED) { ++ kbase_pm_set_gpu_lost(kbdev, false); ++ kbase_arbif_gpu_stopped(kbdev, false); ++ } ++ mutex_unlock(&arb_vm_state->vm_state_lock); ++ destroy_workqueue(arb_vm_state->vm_arb_wq); ++ kbase_arbif_destroy(kbdev); ++ arb_vm_state->vm_arb_wq = NULL; ++ kfree(kbdev->pm.arb_vm_state); ++ kbdev->pm.arb_vm_state = NULL; +} + -+#if MALI_USE_CSF -+void gpu_model_clear_prfcnt_values(void) ++/** ++ * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Releases interrupts and set the interrupt flag to false ++ */ ++void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev) +{ -+ unsigned long flags; ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + -+ spin_lock_irqsave(&performance_counters.access_lock, flags); -+ gpu_model_clear_prfcnt_values_nolock(); -+ spin_unlock_irqrestore(&performance_counters.access_lock, flags); ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ if (arb_vm_state->interrupts_installed == true) { ++ arb_vm_state->interrupts_installed = false; ++ kbase_release_interrupts(kbdev); ++ } ++ mutex_unlock(&arb_vm_state->vm_state_lock); +} -+KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values); -+#endif /* MALI_USE_CSF */ + +/** -+ * gpu_model_dump_prfcnt_blocks() - Dump performance counter values to buffer ++ * kbase_arbiter_pm_install_interrupts() - Install the GPU interrupts ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * @values: Array of values to be written out -+ * @out_index: Index into performance counter buffer -+ * @block_count: Number of blocks to dump -+ * @prfcnt_enable_mask: Counter enable mask -+ * @blocks_present: Available blocks bit mask ++ * Install interrupts and set the interrupt_install flag to true. + * -+ * The performance counter access lock must be held before calling this -+ * function. ++ * Return: 0 if success, or a Linux error code + */ -+static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, u32 block_count, -+ u32 prfcnt_enable_mask, u64 blocks_present) ++int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev) +{ -+ u32 block_idx, counter; -+ u32 counter_value = 0; -+ u32 *prfcnt_base; -+ u32 index = 0; -+ -+ lockdep_assert_held(&performance_counters.access_lock); -+ -+ prfcnt_base = performance_counters.prfcnt_base_cpu; -+ -+ for (block_idx = 0; block_idx < block_count; block_idx++) { -+ /* only dump values if core is present */ -+ if (!(blocks_present & (1 << block_idx))) { -+#if MALI_USE_CSF -+ /* if CSF dump zeroed out block */ -+ memset(&prfcnt_base[*out_index], 0, -+ KBASE_DUMMY_MODEL_BLOCK_SIZE); -+ *out_index += KBASE_DUMMY_MODEL_VALUES_PER_BLOCK; -+#endif /* MALI_USE_CSF */ -+ continue; -+ } -+ -+ /* write the header */ -+ prfcnt_base[*out_index] = performance_counters.time++; -+ prfcnt_base[*out_index+2] = prfcnt_enable_mask; -+ *out_index += KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS; ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ int err; + -+ /* write the counters */ -+ for (counter = 0; -+ counter < KBASE_DUMMY_MODEL_COUNTER_PER_CORE; -+ counter++) { -+ /* HW counter values retrieved through -+ * PRFCNT_SAMPLE request are of 32 bits only. -+ */ -+ counter_value = (u32)values[index++]; -+ if (KBASE_DUMMY_MODEL_COUNTER_ENABLED( -+ prfcnt_enable_mask, (counter + -+ KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS))) { -+ prfcnt_base[*out_index + counter] = -+ counter_value; -+ } -+ } -+ *out_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE; -+ } ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ arb_vm_state->interrupts_installed = true; ++ err = kbase_install_interrupts(kbdev); ++ mutex_unlock(&arb_vm_state->vm_state_lock); ++ return err; +} + -+static void gpu_model_dump_nolock(void) ++/** ++ * kbase_arbiter_pm_vm_stopped() - Handle stop state for the VM ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Handles a stop state for the VM ++ */ ++void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) +{ -+ u32 index = 0; ++ bool request_gpu = false; ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + -+ lockdep_assert_held(&performance_counters.access_lock); ++ lockdep_assert_held(&arb_vm_state->vm_state_lock); + -+#if !MALI_USE_CSF -+ gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index, 1, -+ performance_counters.prfcnt_en.fe, 0x1); -+#else -+ gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index, 1, -+ performance_counters.prfcnt_en.fe, 0x1); -+#endif /* !MALI_USE_CSF */ -+ gpu_model_dump_prfcnt_blocks(performance_counters.tiler_counters, -+ &index, 1, -+ performance_counters.prfcnt_en.tiler, -+ DUMMY_IMPLEMENTATION_TILER_PRESENT); -+ gpu_model_dump_prfcnt_blocks(performance_counters.l2_counters, &index, -+ KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS, -+ performance_counters.prfcnt_en.l2, -+ performance_counters.l2_present); -+ gpu_model_dump_prfcnt_blocks(performance_counters.shader_counters, -+ &index, KBASE_DUMMY_MODEL_MAX_SHADER_CORES, -+ performance_counters.prfcnt_en.shader, -+ performance_counters.shader_present); ++ if (atomic_read(&kbdev->pm.gpu_users_waiting) > 0 && ++ arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE) ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_STOPPING_ACTIVE); + -+ /* Counter values are cleared after each dump */ -+ gpu_model_clear_prfcnt_values_nolock(); ++ dev_dbg(kbdev->dev, "%s %s\n", __func__, ++ kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + -+ /* simulate a 'long' time between samples */ -+ performance_counters.time += 10; -+} ++ if (arb_vm_state->interrupts_installed) { ++ arb_vm_state->interrupts_installed = false; ++ kbase_release_interrupts(kbdev); ++ } + -+#if !MALI_USE_CSF -+static void midgard_model_dump_prfcnt(void) -+{ -+ unsigned long flags; ++ switch (arb_vm_state->vm_state) { ++ case KBASE_VM_STATE_STOPPING_ACTIVE: ++ request_gpu = true; ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_STOPPED_GPU_REQUESTED); ++ break; ++ case KBASE_VM_STATE_STOPPING_IDLE: ++ kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPED); ++ break; ++ case KBASE_VM_STATE_SUSPEND_PENDING: ++ kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); ++ break; ++ default: ++ dev_warn(kbdev->dev, "unexpected pm_stop VM state %u", ++ arb_vm_state->vm_state); ++ break; ++ } + -+ spin_lock_irqsave(&performance_counters.access_lock, flags); -+ gpu_model_dump_nolock(); -+ spin_unlock_irqrestore(&performance_counters.access_lock, flags); ++ kbase_pm_set_gpu_lost(kbdev, false); ++ kbase_arbif_gpu_stopped(kbdev, request_gpu); ++ if (request_gpu) ++ start_request_timer(kbdev); +} -+#else -+void gpu_model_prfcnt_dump_request(u32 *sample_buf, struct gpu_model_prfcnt_en enable_maps) ++ ++void kbase_arbiter_set_max_config(struct kbase_device *kbdev, ++ uint32_t max_l2_slices, ++ uint32_t max_core_mask) +{ -+ unsigned long flags; ++ struct kbase_arbiter_vm_state *arb_vm_state; ++ struct max_config_props max_config; + -+ if (WARN_ON(!sample_buf)) ++ if (!kbdev) + return; + -+ spin_lock_irqsave(&performance_counters.access_lock, flags); -+ performance_counters.prfcnt_base_cpu = sample_buf; -+ performance_counters.prfcnt_en = enable_maps; -+ gpu_model_dump_nolock(); -+ spin_unlock_irqrestore(&performance_counters.access_lock, flags); -+} ++ /* Mask the max_l2_slices as it is stored as 8 bits into kbase */ ++ max_config.l2_slices = max_l2_slices & MAX_L2_SLICES_MASK; ++ max_config.core_mask = max_core_mask; ++ arb_vm_state = kbdev->pm.arb_vm_state; + -+void gpu_model_glb_request_job_irq(void *model) -+{ -+ unsigned long flags; ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ /* Just set the max_props in kbase during initialization. */ ++ if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING) ++ kbase_gpuprops_set_max_config(kbdev, &max_config); ++ else ++ dev_dbg(kbdev->dev, "Unexpected max_config on VM state %s", ++ kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + -+ spin_lock_irqsave(&hw_error_status.access_lock, flags); -+ hw_error_status.job_irq_status |= JOB_IRQ_GLOBAL_IF; -+ spin_unlock_irqrestore(&hw_error_status.access_lock, flags); -+ gpu_device_raise_irq(model, MODEL_LINUX_JOB_IRQ); ++ mutex_unlock(&arb_vm_state->vm_state_lock); +} -+#endif /* !MALI_USE_CSF */ + -+static void init_register_statuses(struct dummy_model_t *dummy) ++int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev) +{ -+ int i; ++ struct kbase_arbiter_vm_state *arb_vm_state; ++ int result = -EINVAL; + -+ hw_error_status.errors_mask = 0; -+ hw_error_status.gpu_error_irq = 0; -+ hw_error_status.gpu_fault_status = 0; -+ hw_error_status.job_irq_rawstat = 0; -+ hw_error_status.job_irq_status = 0; -+ hw_error_status.mmu_irq_rawstat = 0; -+ hw_error_status.mmu_irq_mask = 0; ++ if (!kbdev) ++ return result; + -+ for (i = 0; i < NUM_SLOTS; i++) { -+ hw_error_status.js_status[i] = 0; -+ hw_error_status.job_irq_rawstat |= -+ (dummy->slots[i].job_complete_irq_asserted) << i; -+ hw_error_status.job_irq_status |= -+ (dummy->slots[i].job_complete_irq_asserted) << i; ++ /* First check the GPU_LOST state */ ++ kbase_pm_lock(kbdev); ++ if (kbase_pm_is_gpu_lost(kbdev)) { ++ kbase_pm_unlock(kbdev); ++ return 0; + } -+ for (i = 0; i < NUM_MMU_AS; i++) { -+ hw_error_status.as_command[i] = 0; -+ hw_error_status.as_faultstatus[i] = 0; -+ hw_error_status.mmu_irq_mask |= 1 << i; ++ kbase_pm_unlock(kbdev); ++ ++ /* Then the arbitration state machine */ ++ arb_vm_state = kbdev->pm.arb_vm_state; ++ ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ switch (arb_vm_state->vm_state) { ++ case KBASE_VM_STATE_INITIALIZING: ++ case KBASE_VM_STATE_SUSPENDED: ++ case KBASE_VM_STATE_STOPPED: ++ case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: ++ case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: ++ result = 0; ++ break; ++ default: ++ result = 1; ++ break; + } ++ mutex_unlock(&arb_vm_state->vm_state_lock); + -+ performance_counters.time = 0; ++ return result; +} + -+static void update_register_statuses(struct dummy_model_t *dummy, unsigned int job_slot) ++/** ++ * kbase_arbiter_pm_vm_gpu_start() - Handles the start state of the VM ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Handles the start state of the VM ++ */ ++static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) +{ -+ lockdep_assert_held(&hw_error_status.access_lock); -+ -+ if (hw_error_status.errors_mask & IS_A_JOB_ERROR) { -+ if (job_slot == hw_error_status.current_job_slot) { -+#if !MALI_USE_CSF -+ if (hw_error_status.js_status[job_slot] == 0) { -+ /* status reg is clean; it can be written */ -+ -+ switch (hw_error_status.errors_mask & -+ IS_A_JOB_ERROR) { -+ case KBASE_JOB_INTERRUPTED: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_INTERRUPTED; -+ break; -+ -+ case KBASE_JOB_STOPPED: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_STOPPED; -+ break; ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ bool freq_updated = false; + -+ case KBASE_JOB_TERMINATED: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_TERMINATED; -+ break; ++ lockdep_assert_held(&arb_vm_state->vm_state_lock); ++ mutex_lock(&kbdev->arb.arb_freq.arb_freq_lock); ++ if (kbdev->arb.arb_freq.freq_updated) { ++ kbdev->arb.arb_freq.freq_updated = false; ++ freq_updated = true; ++ } ++ mutex_unlock(&kbdev->arb.arb_freq.arb_freq_lock); + -+ case KBASE_JOB_CONFIG_FAULT: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_CONFIG_FAULT; -+ break; ++ cancel_request_timer(kbdev); ++ switch (arb_vm_state->vm_state) { ++ case KBASE_VM_STATE_INITIALIZING: ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_INITIALIZING_WITH_GPU); ++ break; ++ case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: ++ kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STARTING); ++ arb_vm_state->interrupts_installed = true; ++ kbase_install_interrupts(kbdev); ++ /* ++ * GPU GRANTED received while in stop can be a result of a ++ * repartitioning. ++ */ ++ kbase_gpuprops_req_curr_config_update(kbdev); ++ /* curr_config will be updated while resuming the PM. */ ++ queue_work(arb_vm_state->vm_arb_wq, ++ &arb_vm_state->vm_resume_work); ++ break; ++ case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: ++ kbase_pm_set_gpu_lost(kbdev, false); ++ kbase_arbif_gpu_stopped(kbdev, false); ++ kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); ++ break; ++ default: ++ /* ++ * GPU_GRANTED can be received when there is a frequency update ++ * Only show a warning if received in an unexpected state ++ * without a frequency update ++ */ ++ if (!freq_updated) ++ dev_warn(kbdev->dev, ++ "GPU_GRANTED when not expected - state %s\n", ++ kbase_arbiter_pm_vm_state_str( ++ arb_vm_state->vm_state)); ++ break; ++ } ++} + -+ case KBASE_JOB_POWER_FAULT: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_POWER_FAULT; -+ break; ++/** ++ * kbase_arbiter_pm_vm_gpu_stop() - Handles the stop state of the VM ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Handles the start state of the VM ++ */ ++static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev) ++{ ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + -+ case KBASE_JOB_READ_FAULT: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_READ_FAULT; -+ break; ++ lockdep_assert_held(&arb_vm_state->vm_state_lock); ++ if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING_WITH_GPU) { ++ mutex_unlock(&arb_vm_state->vm_state_lock); ++ kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev); ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ } + -+ case KBASE_JOB_WRITE_FAULT: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_WRITE_FAULT; -+ break; ++ switch (arb_vm_state->vm_state) { ++ case KBASE_VM_STATE_IDLE: ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_STOPPING_IDLE); ++ queue_work(arb_vm_state->vm_arb_wq, ++ &arb_vm_state->vm_suspend_work); ++ break; ++ case KBASE_VM_STATE_ACTIVE: ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_STOPPING_ACTIVE); ++ queue_work(arb_vm_state->vm_arb_wq, ++ &arb_vm_state->vm_suspend_work); ++ break; ++ case KBASE_VM_STATE_STARTING: ++ dev_dbg(kbdev->dev, "Got GPU_STOP event while STARTING."); ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_STOPPING_ACTIVE); ++ if (arb_vm_state->vm_arb_starting) ++ queue_work(arb_vm_state->vm_arb_wq, ++ &arb_vm_state->vm_suspend_work); ++ break; ++ case KBASE_VM_STATE_SUSPEND_PENDING: ++ /* Suspend finishes with a stop so nothing else to do */ ++ break; ++ default: ++ dev_warn(kbdev->dev, "GPU_STOP when not expected - state %s\n", ++ kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); ++ break; ++ } ++} + -+ case KBASE_JOB_AFFINITY_FAULT: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_AFFINITY_FAULT; -+ break; ++/** ++ * kbase_gpu_lost() - Kbase signals GPU is lost on a lost event signal ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * On GPU lost event signals GPU_LOST to the aribiter ++ */ ++static void kbase_gpu_lost(struct kbase_device *kbdev) ++{ ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ bool handle_gpu_lost = false; + -+ case KBASE_JOB_BUS_FAULT: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_BUS_FAULT; -+ break; ++ lockdep_assert_held(&arb_vm_state->vm_state_lock); + -+ case KBASE_INSTR_INVALID_PC: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_INSTR_INVALID_PC; -+ break; ++ switch (arb_vm_state->vm_state) { ++ case KBASE_VM_STATE_STARTING: ++ case KBASE_VM_STATE_ACTIVE: ++ case KBASE_VM_STATE_IDLE: ++ dev_warn(kbdev->dev, "GPU lost in state %s", ++ kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); ++ kbase_arbiter_pm_vm_gpu_stop(kbdev); ++ handle_gpu_lost = true; ++ break; ++ case KBASE_VM_STATE_STOPPING_IDLE: ++ case KBASE_VM_STATE_STOPPING_ACTIVE: ++ case KBASE_VM_STATE_SUSPEND_PENDING: ++ dev_dbg(kbdev->dev, "GPU lost while stopping"); ++ handle_gpu_lost = true; ++ break; ++ case KBASE_VM_STATE_SUSPENDED: ++ case KBASE_VM_STATE_STOPPED: ++ case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: ++ dev_dbg(kbdev->dev, "GPU lost while already stopped"); ++ break; ++ case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: ++ dev_dbg(kbdev->dev, "GPU lost while waiting to suspend"); ++ kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); ++ break; ++ default: ++ break; ++ } ++ if (handle_gpu_lost) { ++ /* Releasing the VM state lock here is safe because ++ * we are guaranteed to be in either STOPPING_IDLE, ++ * STOPPING_ACTIVE or SUSPEND_PENDING at this point. ++ * The only transitions that are valid from here are to ++ * STOPPED, STOPPED_GPU_REQUESTED or SUSPENDED which can ++ * only happen at the completion of the GPU lost handling. ++ */ ++ mutex_unlock(&arb_vm_state->vm_state_lock); ++ kbase_pm_handle_gpu_lost(kbdev); ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ } ++} + -+ case KBASE_INSTR_INVALID_ENC: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_INSTR_INVALID_ENC; -+ break; ++/** ++ * kbase_arbiter_pm_vm_os_suspend_ready_state() - checks if VM is ready ++ * to be moved to suspended state. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Return: True if its ready to be suspended else False. ++ */ ++static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state( ++ struct kbase_device *kbdev) ++{ ++ switch (kbdev->pm.arb_vm_state->vm_state) { ++ case KBASE_VM_STATE_SUSPENDED: ++ case KBASE_VM_STATE_STOPPED: ++ case KBASE_VM_STATE_IDLE: ++ case KBASE_VM_STATE_ACTIVE: ++ return true; ++ default: ++ return false; ++ } ++} + -+ case KBASE_INSTR_TYPE_MISMATCH: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_INSTR_TYPE_MISMATCH; -+ break; ++/** ++ * kbase_arbiter_pm_vm_os_prepare_suspend() - Prepare OS to be in suspend state ++ * until it receives the grant message from arbiter ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Prepares OS to be in suspend state until it receives GRANT message ++ * from Arbiter asynchronously. ++ */ ++static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev) ++{ ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ enum kbase_vm_state prev_state; + -+ case KBASE_INSTR_OPERAND_FAULT: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_INSTR_OPERAND_FAULT; -+ break; ++ lockdep_assert_held(&arb_vm_state->vm_state_lock); ++ if (kbdev->arb.arb_if) { ++ if (kbdev->pm.arb_vm_state->vm_state == ++ KBASE_VM_STATE_SUSPENDED) ++ return; ++ } ++ /* Block suspend OS function until we are in a stable state ++ * with vm_state_lock ++ */ ++ while (!kbase_arbiter_pm_vm_os_suspend_ready_state(kbdev)) { ++ prev_state = arb_vm_state->vm_state; ++ switch (arb_vm_state->vm_state) { ++ case KBASE_VM_STATE_STOPPING_ACTIVE: ++ case KBASE_VM_STATE_STOPPING_IDLE: ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_SUSPEND_PENDING); ++ break; ++ case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT); ++ break; ++ case KBASE_VM_STATE_STARTING: ++ if (!arb_vm_state->vm_arb_starting) { ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_SUSPEND_PENDING); ++ kbase_arbiter_pm_vm_stopped(kbdev); ++ } ++ break; ++ default: ++ break; ++ } ++ mutex_unlock(&arb_vm_state->vm_state_lock); ++ wait_event(arb_vm_state->vm_state_wait, ++ arb_vm_state->vm_state != prev_state); ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ } + -+ case KBASE_INSTR_TLS_FAULT: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_INSTR_TLS_FAULT; -+ break; ++ switch (arb_vm_state->vm_state) { ++ case KBASE_VM_STATE_STOPPED: ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_SUSPENDED); ++ break; ++ case KBASE_VM_STATE_IDLE: ++ case KBASE_VM_STATE_ACTIVE: ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_SUSPEND_PENDING); ++ mutex_unlock(&arb_vm_state->vm_state_lock); ++ /* Ensure resume has completed fully before starting suspend */ ++ flush_work(&arb_vm_state->vm_resume_work); ++ kbase_pm_driver_suspend(kbdev); ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ break; ++ case KBASE_VM_STATE_SUSPENDED: ++ break; ++ default: ++ KBASE_DEBUG_ASSERT_MSG(false, "Unexpected state to suspend"); ++ break; ++ } ++} + -+ case KBASE_INSTR_BARRIER_FAULT: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_INSTR_BARRIER_FAULT; -+ break; ++/** ++ * kbase_arbiter_pm_vm_os_resume() - Resume OS function once it receives ++ * a grant message from arbiter ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Resume OS function once it receives GRANT message ++ * from Arbiter asynchronously. ++ */ ++static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev) ++{ ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + -+ case KBASE_INSTR_ALIGN_FAULT: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_INSTR_ALIGN_FAULT; -+ break; ++ lockdep_assert_held(&arb_vm_state->vm_state_lock); ++ KBASE_DEBUG_ASSERT_MSG(arb_vm_state->vm_state == ++ KBASE_VM_STATE_SUSPENDED, ++ "Unexpected state to resume"); + -+ case KBASE_DATA_INVALID_FAULT: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_DATA_INVALID_FAULT; -+ break; ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_STOPPED_GPU_REQUESTED); ++ kbase_arbif_gpu_request(kbdev); ++ start_request_timer(kbdev); + -+ case KBASE_TILE_RANGE_FAULT: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_TILE_RANGE_FAULT; -+ break; ++ /* Release lock and block resume OS function until we have ++ * asynchronously received the GRANT message from the Arbiter and ++ * fully resumed ++ */ ++ mutex_unlock(&arb_vm_state->vm_state_lock); ++ kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev); ++ flush_work(&arb_vm_state->vm_resume_work); ++ mutex_lock(&arb_vm_state->vm_state_lock); ++} + -+ case KBASE_ADDR_RANGE_FAULT: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_ADDRESS_RANGE_FAULT; -+ break; ++/** ++ * kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @evt: VM event ++ * ++ * The state machine function. Receives events and transitions states ++ * according the event received and the current state ++ */ ++void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, ++ enum kbase_arbif_evt evt) ++{ ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + -+ case KBASE_OUT_OF_MEMORY: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_OUT_OF_MEMORY; -+ break; ++ if (!kbdev->arb.arb_if) ++ return; + -+ case KBASE_UNKNOWN: -+ hw_error_status.js_status[job_slot] = -+ JS_STATUS_UNKNOWN; -+ break; -+ -+ default: -+ model_error_log(KBASE_CORE, -+ "\nAtom Chain 0x%llx: Invalid Error Mask!", -+ hw_error_status.current_jc); -+ break; -+ } -+ } -+#endif /* !MALI_USE_CSF */ -+ -+ /* we set JOB_FAIL_ */ -+ hw_error_status.job_irq_rawstat |= -+ (dummy->slots[job_slot].job_complete_irq_asserted) << -+ (job_slot + 16); -+ hw_error_status.job_irq_status |= -+ (((dummy->slots[job_slot].job_complete_irq_asserted) << -+ (job_slot)) & -+ (dummy->slots[job_slot].job_irq_mask << -+ job_slot)) << 16; -+ } else { -+ hw_error_status.job_irq_rawstat |= -+ (dummy->slots[job_slot].job_complete_irq_asserted) << -+ job_slot; -+ hw_error_status.job_irq_status |= -+ ((dummy->slots[job_slot].job_complete_irq_asserted) << -+ (job_slot)) & -+ (dummy->slots[job_slot].job_irq_mask << -+ job_slot); ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ dev_dbg(kbdev->dev, "%s %s\n", __func__, ++ kbase_arbiter_pm_vm_event_str(evt)); ++ if (arb_vm_state->vm_state != KBASE_VM_STATE_INITIALIZING_WITH_GPU && ++ arb_vm_state->vm_state != KBASE_VM_STATE_INITIALIZING) ++ KBASE_KTRACE_ADD(kbdev, ARB_VM_EVT, NULL, evt); ++ switch (evt) { ++ case KBASE_VM_GPU_GRANTED_EVT: ++ kbase_arbiter_pm_vm_gpu_start(kbdev); ++ break; ++ case KBASE_VM_GPU_STOP_EVT: ++ kbase_arbiter_pm_vm_gpu_stop(kbdev); ++ break; ++ case KBASE_VM_GPU_LOST_EVT: ++ dev_dbg(kbdev->dev, "KBASE_ARBIF_GPU_LOST_EVT!"); ++ kbase_gpu_lost(kbdev); ++ break; ++ case KBASE_VM_OS_SUSPEND_EVENT: ++ kbase_arbiter_pm_vm_os_prepare_suspend(kbdev); ++ break; ++ case KBASE_VM_OS_RESUME_EVENT: ++ kbase_arbiter_pm_vm_os_resume(kbdev); ++ break; ++ case KBASE_VM_GPU_IDLE_EVENT: ++ switch (arb_vm_state->vm_state) { ++ case KBASE_VM_STATE_ACTIVE: ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_IDLE); ++ kbase_arbif_gpu_idle(kbdev); ++ break; ++ default: ++ break; + } -+ } else { -+ hw_error_status.job_irq_rawstat |= -+ (dummy->slots[job_slot].job_complete_irq_asserted) << -+ job_slot; -+ hw_error_status.job_irq_status |= -+ ((dummy->slots[job_slot].job_complete_irq_asserted) << -+ (job_slot)) & -+ (dummy->slots[job_slot].job_irq_mask << job_slot); -+ } /* end of job register statuses */ -+ -+ if (hw_error_status.errors_mask & IS_A_MMU_ERROR) { -+ int i; -+ -+ for (i = 0; i < NUM_MMU_AS; i++) { -+ if (i == hw_error_status.faulty_mmu_as) { -+ if (hw_error_status.as_faultstatus[i] == 0) { -+ u32 status = -+ hw_error_status.as_faultstatus[i]; -+ /* status reg is clean; it can be -+ * written -+ */ -+ switch (hw_error_status.errors_mask & -+ IS_A_MMU_ERROR) { -+ case KBASE_TRANSLATION_FAULT: -+ /* 0xCm means TRANSLATION FAULT -+ * (m is mmu_table_level) -+ */ -+ status = -+ ((1 << 7) | (1 << 6) | -+ hw_error_status.mmu_table_level -+ ); -+ break; -+ -+ case KBASE_PERMISSION_FAULT: -+ /*0xC8 means PERMISSION FAULT */ -+ status = ((1 << 7) | (1 << 6) | -+ (1 << 3)); -+ break; -+ -+ case KBASE_TRANSTAB_BUS_FAULT: -+ /* 0xDm means TRANSITION TABLE -+ * BUS FAULT (m is -+ * mmu_table_level) -+ */ -+ status = ((1 << 7) | (1 << 6) | -+ (1 << 4) | -+ hw_error_status.mmu_table_level -+ ); -+ break; -+ -+ case KBASE_ACCESS_FLAG: -+ /* 0xD8 means ACCESS FLAG */ -+ status = ((1 << 7) | (1 << 6) | -+ (1 << 4) | (1 << 3)); -+ break; -+ -+ default: -+ model_error_log(KBASE_CORE, -+ "\nAtom Chain 0x%llx: Invalid Error Mask!", -+ hw_error_status.current_jc); -+ break; -+ } -+ hw_error_status.as_faultstatus[i] = -+ status; -+ } ++ break; + -+ if (hw_error_status.errors_mask & -+ KBASE_TRANSTAB_BUS_FAULT) -+ hw_error_status.mmu_irq_rawstat |= -+ 1 << (16 + i); /* bus error */ -+ else -+ hw_error_status.mmu_irq_rawstat |= -+ 1 << i; /* page fault */ -+ } ++ case KBASE_VM_REF_EVENT: ++ switch (arb_vm_state->vm_state) { ++ case KBASE_VM_STATE_STARTING: ++ case KBASE_VM_STATE_IDLE: ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_ACTIVE); ++ kbase_arbif_gpu_active(kbdev); ++ break; ++ case KBASE_VM_STATE_STOPPING_IDLE: ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_STOPPING_ACTIVE); ++ break; ++ default: ++ break; + } -+ } /*end of mmu register statuses */ -+ if (hw_error_status.errors_mask & IS_A_GPU_ERROR) { -+ if (hw_error_status.gpu_fault_status) { -+ /* not the first GPU error reported */ -+ hw_error_status.gpu_error_irq |= (1 << 7); -+ } else { -+ hw_error_status.gpu_error_irq |= 1; -+ switch (hw_error_status.errors_mask & IS_A_GPU_ERROR) { -+ case KBASE_DELAYED_BUS_FAULT: -+ hw_error_status.gpu_fault_status = (1 << 7); -+ break; -+ -+ case KBASE_SHAREABILITY_FAULT: -+ hw_error_status.gpu_fault_status = (1 << 7) | -+ (1 << 3); -+ break; ++ break; + -+ default: -+ model_error_log(KBASE_CORE, -+ "\nAtom Chain 0x%llx: Invalid Error Mask!", -+ hw_error_status.current_jc); -+ break; ++ case KBASE_VM_GPU_INITIALIZED_EVT: ++ switch (arb_vm_state->vm_state) { ++ case KBASE_VM_STATE_INITIALIZING_WITH_GPU: ++ lockdep_assert_held(&kbdev->pm.lock); ++ if (kbdev->pm.active_count > 0) { ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_ACTIVE); ++ kbase_arbif_gpu_active(kbdev); ++ } else { ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_IDLE); ++ kbase_arbif_gpu_idle(kbdev); + } ++ break; ++ default: ++ break; + } ++ break; ++ ++ default: ++ dev_alert(kbdev->dev, "Got Unknown Event!"); ++ break; + } -+ hw_error_status.errors_mask = 0; /*clear error mask */ ++ mutex_unlock(&arb_vm_state->vm_state_lock); +} + -+#if !MALI_USE_CSF -+static void update_job_irq_js_state(struct dummy_model_t *dummy, int mask) ++KBASE_EXPORT_TEST_API(kbase_arbiter_pm_vm_event); ++ ++/** ++ * kbase_arbiter_pm_vm_wait_gpu_assignment() - VM wait for a GPU assignment. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * VM waits for a GPU assignment. ++ */ ++static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev) +{ -+ int i; ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + -+ lockdep_assert_held(&hw_error_status.access_lock); -+ pr_debug("%s", "Updating the JS_ACTIVE register"); ++ dev_dbg(kbdev->dev, "Waiting for GPU assignment...\n"); ++ wait_event(arb_vm_state->vm_state_wait, ++ arb_vm_state->vm_state == KBASE_VM_STATE_IDLE || ++ arb_vm_state->vm_state == KBASE_VM_STATE_ACTIVE); ++ dev_dbg(kbdev->dev, "Waiting for GPU assignment - done\n"); ++} + -+ for (i = 0; i < NUM_SLOTS; i++) { -+ int slot_active = dummy->slots[i].job_active; -+ int next_busy = dummy->slots[i].job_queued; ++/** ++ * kbase_arbiter_pm_vm_gpu_assigned_lockheld() - Check if VM holds VM state lock ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Checks if the virtual machine holds VM state lock. ++ * ++ * Return: true if GPU is assigned, else false. ++ */ ++static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( ++ struct kbase_device *kbdev) ++{ ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + -+ if ((mask & (1 << i)) || (mask & (1 << (i + 16)))) { -+ /* clear the bits we're updating */ -+ dummy->job_irq_js_state &= ~((1 << (16 + i)) | -+ (1 << i)); -+ if (hw_error_status.js_status[i]) { -+ dummy->job_irq_js_state |= next_busy << -+ (i + 16); -+ if (mask & (1 << (i + 16))) { -+ /* clear job slot status */ -+ hw_error_status.js_status[i] = 0; -+ /* continue execution of jobchain */ -+ dummy->slots[i].job_active = -+ dummy->slots[i].job_queued; -+ } -+ } else { -+ /* set bits if needed */ -+ dummy->job_irq_js_state |= ((slot_active << i) | -+ (next_busy << (i + 16))); -+ } -+ } -+ } -+ pr_debug("The new snapshot is 0x%08X\n", dummy->job_irq_js_state); ++ lockdep_assert_held(&arb_vm_state->vm_state_lock); ++ return (arb_vm_state->vm_state == KBASE_VM_STATE_IDLE || ++ arb_vm_state->vm_state == KBASE_VM_STATE_ACTIVE); +} -+#endif /* !MALI_USE_CSF */ + +/** -+ * find_control_reg_values() - Look up constant control register values. -+ * @gpu: GPU name ++ * kbase_arbiter_pm_ctx_active_handle_suspend() - Handle suspend operation for ++ * arbitration mode ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @suspend_handler: The handler code for how to handle a suspend ++ * that might occur + * -+ * Look up the GPU name to find the correct set of control register values for -+ * that GPU. If not found, warn and use the first values in the array. ++ * This function handles a suspend event from the driver, ++ * communicating with the arbiter and waiting synchronously for the GPU ++ * to be granted again depending on the VM state. + * -+ * Return: Pointer to control register values for that GPU. ++ * Return: 0 on success else 1 suspend handler isn not possible. + */ -+static const struct control_reg_values_t *find_control_reg_values(const char *gpu) ++int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, ++ enum kbase_pm_suspend_handler suspend_handler) +{ -+ size_t i; -+ const struct control_reg_values_t *ret = NULL; ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ int res = 0; + -+ /* Edge case for tGOx, as it has 2 entries in the table for its R0 and R1 -+ * revisions respectively. As none of them are named "tGOx" the name comparison -+ * needs to be fixed in these cases. CONFIG_GPU_HWVER should be one of "r0p0" -+ * or "r1p0" and is derived from the DDK's build configuration. In cases -+ * where it is unavailable, it defaults to tGOx r1p0. -+ */ -+ if (!strcmp(gpu, "tGOx")) { -+#ifdef CONFIG_GPU_HWVER -+ if (!strcmp(CONFIG_GPU_HWVER, "r0p0")) -+ gpu = "tGOx_r0p0"; -+ else if (!strcmp(CONFIG_GPU_HWVER, "r1p0")) -+#endif /* CONFIG_GPU_HWVER defined */ -+ gpu = "tGOx_r1p0"; -+ } ++ if (kbdev->arb.arb_if) { ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ while (!kbase_arbiter_pm_vm_gpu_assigned_lockheld(kbdev)) { ++ /* Update VM state since we have GPU work to do */ ++ if (arb_vm_state->vm_state == ++ KBASE_VM_STATE_STOPPING_IDLE) ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_STOPPING_ACTIVE); ++ else if (arb_vm_state->vm_state == ++ KBASE_VM_STATE_STOPPED) { ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_STOPPED_GPU_REQUESTED); ++ kbase_arbif_gpu_request(kbdev); ++ start_request_timer(kbdev); ++ } else if (arb_vm_state->vm_state == ++ KBASE_VM_STATE_INITIALIZING_WITH_GPU) ++ break; + -+ for (i = 0; i < ARRAY_SIZE(all_control_reg_values); ++i) { -+ const struct control_reg_values_t * const fcrv = &all_control_reg_values[i]; ++ if (suspend_handler != ++ KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE) { + -+ if (!strcmp(fcrv->name, gpu)) { -+ ret = fcrv; -+ pr_debug("Found control register values for %s\n", gpu); -+ break; ++ /* In case of GPU lost, even if ++ * active_count > 0, we no longer have GPU ++ * access ++ */ ++ if (kbase_pm_is_gpu_lost(kbdev)) ++ res = 1; ++ ++ switch (suspend_handler) { ++ case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE: ++ res = 1; ++ break; ++ case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE: ++ if (kbdev->pm.active_count == 0) ++ res = 1; ++ break; ++ case KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED: ++ break; ++ default: ++ WARN(1, "Unknown suspend_handler\n"); ++ res = 1; ++ break; ++ } ++ break; ++ } ++ ++ /* Need to synchronously wait for GPU assignment */ ++ atomic_inc(&kbdev->pm.gpu_users_waiting); ++ mutex_unlock(&arb_vm_state->vm_state_lock); ++ kbase_pm_unlock(kbdev); ++ kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev); ++ kbase_pm_lock(kbdev); ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ atomic_dec(&kbdev->pm.gpu_users_waiting); + } ++ mutex_unlock(&arb_vm_state->vm_state_lock); + } ++ return res; ++} + -+ if (!ret) { -+ ret = &all_control_reg_values[0]; -+ pr_warn("Couldn't find control register values for GPU %s; using default %s\n", -+ gpu, ret->name); ++/** ++ * kbase_arbiter_pm_update_gpu_freq() - Updates GPU clock frequency received ++ * from arbiter. ++ * @arb_freq: Pointer to struchture holding GPU clock frequenecy data ++ * @freq: New frequency value in KHz ++ */ ++void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, ++ uint32_t freq) ++{ ++ struct kbase_gpu_clk_notifier_data ndata; ++ ++ mutex_lock(&arb_freq->arb_freq_lock); ++ if (arb_freq->arb_freq != freq) { ++ ndata.new_rate = (unsigned long)freq * KHZ_TO_HZ; ++ ndata.old_rate = (unsigned long)arb_freq->arb_freq * KHZ_TO_HZ; ++ ndata.gpu_clk_handle = arb_freq; ++ arb_freq->arb_freq = freq; ++ arb_freq->freq_updated = true; ++ if (arb_freq->nb) ++ arb_freq->nb->notifier_call(arb_freq->nb, ++ POST_RATE_CHANGE, &ndata); + } + -+ return ret; ++ mutex_unlock(&arb_freq->arb_freq_lock); +} + -+void *midgard_model_create(struct kbase_device *kbdev) ++/** ++ * get_arb_gpu_clk() - Enumerate a GPU clock on the given index ++ * @kbdev: kbase_device pointer ++ * @index: GPU clock index ++ * ++ * Return: Pointer to structure holding GPU clock frequency data reported from ++ * arbiter, only index 0 is valid. ++ */ ++static void *get_arb_gpu_clk(struct kbase_device *kbdev, ++ unsigned int index) +{ -+ struct dummy_model_t *dummy = NULL; -+ -+ spin_lock_init(&hw_error_status.access_lock); -+ spin_lock_init(&performance_counters.access_lock); ++ if (index == 0) ++ return &kbdev->arb.arb_freq; ++ return NULL; ++} + -+ dummy = kzalloc(sizeof(*dummy), GFP_KERNEL); ++/** ++ * get_arb_gpu_clk_rate() - Get the current rate of GPU clock frequency value ++ * @kbdev: kbase_device pointer ++ * @gpu_clk_handle: Handle unique to the enumerated GPU clock ++ * ++ * Return: The GPU clock frequency value saved when gpu is granted from arbiter ++ */ ++static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev, ++ void *gpu_clk_handle) ++{ ++ uint32_t freq; ++ struct kbase_arbiter_freq *arb_dev_freq = ++ (struct kbase_arbiter_freq *) gpu_clk_handle; + -+ if (dummy) { -+ dummy->job_irq_js_state = 0; -+ init_register_statuses(dummy); -+ dummy->control_reg_values = find_control_reg_values(no_mali_gpu); -+ performance_counters.l2_present = get_implementation_register( -+ GPU_CONTROL_REG(L2_PRESENT_LO), dummy->control_reg_values); -+ performance_counters.shader_present = get_implementation_register( -+ GPU_CONTROL_REG(SHADER_PRESENT_LO), dummy->control_reg_values); ++ mutex_lock(&arb_dev_freq->arb_freq_lock); ++ /* Convert from KHz to Hz */ ++ freq = arb_dev_freq->arb_freq * KHZ_TO_HZ; ++ mutex_unlock(&arb_dev_freq->arb_freq_lock); ++ return freq; ++} + -+ gpu_device_set_data(dummy, kbdev); ++/** ++ * arb_gpu_clk_notifier_register() - Register a clock rate change notifier. ++ * @kbdev: kbase_device pointer ++ * @gpu_clk_handle: Handle unique to the enumerated GPU clock ++ * @nb: notifier block containing the callback function pointer ++ * ++ * This function registers a callback function that is invoked whenever the ++ * frequency of the clock corresponding to @gpu_clk_handle changes. ++ * ++ * Return: 0 on success, negative error code otherwise. ++ */ ++static int arb_gpu_clk_notifier_register(struct kbase_device *kbdev, ++ void *gpu_clk_handle, struct notifier_block *nb) ++{ ++ int ret = 0; ++ struct kbase_arbiter_freq *arb_dev_freq = ++ (struct kbase_arbiter_freq *)gpu_clk_handle; + -+ dev_info(kbdev->dev, "Using Dummy Model"); -+ } ++ if (!arb_dev_freq->nb) ++ arb_dev_freq->nb = nb; ++ else ++ ret = -EBUSY; + -+ return dummy; ++ return ret; +} + -+void midgard_model_destroy(void *h) ++/** ++ * arb_gpu_clk_notifier_unregister() - Unregister clock rate change notifier ++ * @kbdev: kbase_device pointer ++ * @gpu_clk_handle: Handle unique to the enumerated GPU clock ++ * @nb: notifier block containing the callback function pointer ++ * ++ * This function pointer is used to unregister a callback function that ++ * was previously registered to get notified of a frequency change of the ++ * clock corresponding to @gpu_clk_handle. ++ */ ++static void arb_gpu_clk_notifier_unregister(struct kbase_device *kbdev, ++ void *gpu_clk_handle, struct notifier_block *nb) +{ -+ kfree((void *)h); ++ struct kbase_arbiter_freq *arb_dev_freq = ++ (struct kbase_arbiter_freq *)gpu_clk_handle; ++ if (arb_dev_freq->nb == nb) { ++ arb_dev_freq->nb = NULL; ++ } else { ++ dev_err(kbdev->dev, "%s - notifier did not match\n", ++ __func__); ++ } +} + -+static void midgard_model_get_outputs(void *h) -+{ -+ struct dummy_model_t *dummy = (struct dummy_model_t *)h; ++struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops = { ++ .get_gpu_clk_rate = get_arb_gpu_clk_rate, ++ .enumerate_gpu_clk = get_arb_gpu_clk, ++ .gpu_clk_notifier_register = arb_gpu_clk_notifier_register, ++ .gpu_clk_notifier_unregister = arb_gpu_clk_notifier_unregister ++}; +diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h +new file mode 100644 +index 000000000..f863f8860 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h +@@ -0,0 +1,196 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ lockdep_assert_held(&hw_error_status.access_lock); ++/** ++ * DOC: Mali arbiter power manager state machine and APIs ++ */ + -+ if (hw_error_status.job_irq_status) -+ gpu_device_raise_irq(dummy, MODEL_LINUX_JOB_IRQ); ++#ifndef _MALI_KBASE_ARBITER_PM_H_ ++#define _MALI_KBASE_ARBITER_PM_H_ + -+ if ((dummy->power_changed && dummy->power_changed_mask) || -+ (dummy->reset_completed & dummy->reset_completed_mask) || -+ hw_error_status.gpu_error_irq || -+#if !MALI_USE_CSF -+ dummy->prfcnt_sample_completed || -+#else -+ (dummy->flush_pa_range_completed && dummy->flush_pa_range_completed_irq_enabled) || -+#endif -+ (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled)) -+ gpu_device_raise_irq(dummy, MODEL_LINUX_GPU_IRQ); ++#include "mali_kbase_arbif.h" + -+ if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask) -+ gpu_device_raise_irq(dummy, MODEL_LINUX_MMU_IRQ); -+} ++/** ++ * enum kbase_vm_state - Current PM Arbitration state. ++ * ++ * @KBASE_VM_STATE_INITIALIZING: Special state before arbiter is initialized. ++ * @KBASE_VM_STATE_INITIALIZING_WITH_GPU: Initialization after GPU ++ * has been granted. ++ * @KBASE_VM_STATE_SUSPENDED: KBase is suspended by OS and GPU is not assigned. ++ * @KBASE_VM_STATE_STOPPED: GPU is not assigned to KBase and is not required. ++ * @KBASE_VM_STATE_STOPPED_GPU_REQUESTED: GPU is not assigned to KBase ++ * but a request has been made. ++ * @KBASE_VM_STATE_STARTING: GPU is assigned and KBase is getting ready to run. ++ * @KBASE_VM_STATE_IDLE: GPU is assigned but KBase has no work to do ++ * @KBASE_VM_STATE_ACTIVE: GPU is assigned and KBase is busy using it ++ * @KBASE_VM_STATE_SUSPEND_PENDING: OS is going into suspend mode. ++ * @KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: OS is going into suspend mode but GPU ++ * has already been requested. ++ * In this situation we must wait for ++ * the Arbiter to send a GRANTED message ++ * and respond immediately with ++ * a STOPPED message before entering ++ * the suspend mode. ++ * @KBASE_VM_STATE_STOPPING_IDLE: Arbiter has sent a stopped message and there ++ * is currently no work to do on the GPU. ++ * @KBASE_VM_STATE_STOPPING_ACTIVE: Arbiter has sent a stopped message when ++ * KBase has work to do. ++ */ ++enum kbase_vm_state { ++ KBASE_VM_STATE_INITIALIZING, ++ KBASE_VM_STATE_INITIALIZING_WITH_GPU, ++ KBASE_VM_STATE_SUSPENDED, ++ KBASE_VM_STATE_STOPPED, ++ KBASE_VM_STATE_STOPPED_GPU_REQUESTED, ++ KBASE_VM_STATE_STARTING, ++ KBASE_VM_STATE_IDLE, ++ KBASE_VM_STATE_ACTIVE, ++ KBASE_VM_STATE_SUSPEND_PENDING, ++ KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT, ++ KBASE_VM_STATE_STOPPING_IDLE, ++ KBASE_VM_STATE_STOPPING_ACTIVE ++}; + -+static void midgard_model_update(void *h) -+{ -+ struct dummy_model_t *dummy = (struct dummy_model_t *)h; -+ int i; ++/** ++ * kbase_arbiter_pm_early_init() - Initialize arbiter for VM Paravirtualized use ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Initialize the arbiter and other required resources during the runtime ++ * and request the GPU for the VM for the first time. ++ * ++ * Return: 0 if successful, otherwise a standard Linux error code ++ */ ++int kbase_arbiter_pm_early_init(struct kbase_device *kbdev); + -+ lockdep_assert_held(&hw_error_status.access_lock); ++/** ++ * kbase_arbiter_pm_early_term() - Shutdown arbiter and free resources. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Clean up all the resources ++ */ ++void kbase_arbiter_pm_early_term(struct kbase_device *kbdev); + -+ for (i = 0; i < NUM_SLOTS; i++) { -+ if (!dummy->slots[i].job_active) -+ continue; ++/** ++ * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Releases interrupts and set the interrupt flag to false ++ */ ++void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev); + -+ if (dummy->slots[i].job_disabled) { -+ update_register_statuses(dummy, i); -+ continue; -+ } ++/** ++ * kbase_arbiter_pm_install_interrupts() - Install the GPU interrupts ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Install interrupts and set the interrupt_install flag to true. ++ * ++ * Return: 0 if success, or a Linux error code ++ */ ++int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev); + -+ /* If there are any pending interrupts that have not -+ * been cleared we cannot run the job in the next register -+ * as we will overwrite the register status of the job in -+ * the head registers - which has not yet been read -+ */ -+ if ((hw_error_status.job_irq_rawstat & (1 << (i + 16))) || -+ (hw_error_status.job_irq_rawstat & (1 << i))) { -+ continue; -+ } ++/** ++ * kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @event: The event to dispatch ++ * ++ * The state machine function. Receives events and transitions states ++ * according the event received and the current state ++ */ ++void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, ++ enum kbase_arbif_evt event); + -+ /*this job is done assert IRQ lines */ -+ signal_int(dummy, i); -+#ifdef CONFIG_MALI_BIFROST_ERROR_INJECT -+ midgard_set_error(i); -+#endif /* CONFIG_MALI_BIFROST_ERROR_INJECT */ -+ update_register_statuses(dummy, i); -+ /*if this job slot returned failures we cannot use it */ -+ if (hw_error_status.job_irq_rawstat & (1 << (i + 16))) { -+ dummy->slots[i].job_active = 0; -+ continue; -+ } -+ /*process next job */ -+ dummy->slots[i].job_active = dummy->slots[i].job_queued; -+ dummy->slots[i].job_queued = 0; -+ if (dummy->slots[i].job_active) { -+ if (hw_error_status.job_irq_rawstat & (1 << (i + 16))) -+ model_error_log(KBASE_CORE, -+ "\natom %lld running a job on a dirty slot", -+ hw_error_status.current_jc); -+ } -+ } -+} ++/** ++ * kbase_arbiter_pm_ctx_active_handle_suspend() - Handle suspend operation for ++ * arbitration mode ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @suspend_handler: The handler code for how to handle a suspend ++ * that might occur ++ * ++ * This function handles a suspend event from the driver, ++ * communicating with the arbiter and waiting synchronously for the GPU ++ * to be granted again depending on the VM state. ++ * ++ * Return: 0 if success, 1 if failure due to system suspending/suspended ++ */ ++int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, ++ enum kbase_pm_suspend_handler suspend_handler); + -+static void invalidate_active_jobs(struct dummy_model_t *dummy) -+{ -+ int i; + -+ lockdep_assert_held(&hw_error_status.access_lock); ++/** ++ * kbase_arbiter_pm_vm_stopped() - Handle stop event for the VM ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * This function handles a stop event for the VM. ++ * It will update the VM state and forward the stop event to the driver. ++ */ ++void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev); + -+ for (i = 0; i < NUM_SLOTS; i++) { -+ if (dummy->slots[i].job_active) { -+ hw_error_status.job_irq_rawstat |= (1 << (16 + i)); ++/** ++ * kbase_arbiter_set_max_config() - Set the max config data in kbase device. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer). ++ * @max_l2_slices: The maximum number of L2 slices. ++ * @max_core_mask: The largest core mask. ++ * ++ * This function handles a stop event for the VM. ++ * It will update the VM state and forward the stop event to the driver. ++ */ ++void kbase_arbiter_set_max_config(struct kbase_device *kbdev, ++ uint32_t max_l2_slices, ++ uint32_t max_core_mask); + -+ hw_error_status.js_status[i] = 0x7f; /*UNKNOWN*/ -+ } -+ } -+} ++/** ++ * kbase_arbiter_pm_gpu_assigned() - Determine if this VM has access to the GPU ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Return: 0 if the VM does not have access, 1 if it does, and a negative number ++ * if an error occurred ++ */ ++int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev); + -+void midgard_model_write_reg(void *h, u32 addr, u32 value) -+{ -+ unsigned long flags; -+ struct dummy_model_t *dummy = (struct dummy_model_t *)h; ++extern struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops; + -+ spin_lock_irqsave(&hw_error_status.access_lock, flags); ++/** ++ * struct kbase_arbiter_freq - Holding the GPU clock frequency data retrieved ++ * from arbiter ++ * @arb_freq: GPU clock frequency value ++ * @arb_freq_lock: Mutex protecting access to arbfreq value ++ * @nb: Notifier block to receive rate change callbacks ++ * @freq_updated: Flag to indicate whether a frequency changed has just been ++ * communicated to avoid "GPU_GRANTED when not expected" warning ++ */ ++struct kbase_arbiter_freq { ++ uint32_t arb_freq; ++ struct mutex arb_freq_lock; ++ struct notifier_block *nb; ++ bool freq_updated; ++}; + -+#if !MALI_USE_CSF -+ if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && -+ (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { -+ unsigned int slot_idx = (addr >> 7) & 0xf; ++/** ++ * kbase_arbiter_pm_update_gpu_freq() - Update GPU frequency ++ * @arb_freq: Pointer to GPU clock frequency data ++ * @freq: The new frequency ++ * ++ * Updates the GPU frequency and triggers any notifications ++ */ ++void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, ++ uint32_t freq); + -+ KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS); -+ if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_LO)) { -+ hw_error_status.current_jc &= -+ ~((u64) (0xFFFFFFFF)); -+ hw_error_status.current_jc |= (u64) value; -+ } -+ if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_HI)) { -+ hw_error_status.current_jc &= (u64) 0xFFFFFFFF; -+ hw_error_status.current_jc |= -+ ((u64) value) << 32; -+ } -+ if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && -+ value == 1) { -+ pr_debug("%s", "start detected"); -+ KBASE_DEBUG_ASSERT(!dummy->slots[slot_idx].job_active || -+ !dummy->slots[slot_idx].job_queued); -+ if ((dummy->slots[slot_idx].job_active) || -+ (hw_error_status.job_irq_rawstat & -+ (1 << (slot_idx + 16)))) { -+ pr_debug("~~~~~~~~~~~ Start: job slot is already active or there are IRQ pending ~~~~~~~~~" -+ ); -+ dummy->slots[slot_idx].job_queued = 1; -+ } else { -+ dummy->slots[slot_idx].job_active = 1; -+ } -+ } ++#endif /*_MALI_KBASE_ARBITER_PM_H_ */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild +new file mode 100755 +index 000000000..efebc8a54 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild +@@ -0,0 +1,58 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# + -+ if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && value == -+ 0) -+ dummy->slots[slot_idx].job_queued = 0; ++bifrost_kbase-y += \ ++ backend/gpu/mali_kbase_cache_policy_backend.o \ ++ backend/gpu/mali_kbase_gpuprops_backend.o \ ++ backend/gpu/mali_kbase_irq_linux.o \ ++ backend/gpu/mali_kbase_js_backend.o \ ++ backend/gpu/mali_kbase_pm_backend.o \ ++ backend/gpu/mali_kbase_pm_driver.o \ ++ backend/gpu/mali_kbase_pm_metrics.o \ ++ backend/gpu/mali_kbase_pm_ca.o \ ++ backend/gpu/mali_kbase_pm_always_on.o \ ++ backend/gpu/mali_kbase_pm_coarse_demand.o \ ++ backend/gpu/mali_kbase_pm_policy.o \ ++ backend/gpu/mali_kbase_time.o \ ++ backend/gpu/mali_kbase_l2_mmu_config.o \ ++ backend/gpu/mali_kbase_clk_rate_trace_mgr.o + -+ if ((addr == JOB_SLOT_REG(slot_idx, JS_COMMAND)) && -+ (value == JS_COMMAND_SOFT_STOP || -+ value == JS_COMMAND_HARD_STOP)) { -+ /*dummy->slots[slot_idx].job_active = 0; */ -+ hw_error_status.current_job_slot = slot_idx; -+ if (value == JS_COMMAND_SOFT_STOP) { -+ hw_error_status.errors_mask = KBASE_JOB_STOPPED; -+ } else { /*value == 3 */ ++ifeq ($(MALI_USE_CSF),0) ++ bifrost_kbase-y += \ ++ backend/gpu/mali_kbase_instr_backend.o \ ++ backend/gpu/mali_kbase_jm_as.o \ ++ backend/gpu/mali_kbase_debug_job_fault_backend.o \ ++ backend/gpu/mali_kbase_jm_hw.o \ ++ backend/gpu/mali_kbase_jm_rb.o ++endif + -+ if (dummy->slots[slot_idx].job_disabled != 0) { -+ pr_debug("enabling slot after HARD_STOP" -+ ); -+ dummy->slots[slot_idx].job_disabled = 0; -+ } -+ hw_error_status.errors_mask = -+ KBASE_JOB_TERMINATED; -+ } -+ } -+ } else if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) { -+ int i; + -+ for (i = 0; i < NUM_SLOTS; i++) { -+ if (value & ((1 << i) | (1 << (i + 16)))) -+ dummy->slots[i].job_complete_irq_asserted = 0; -+ /* hw_error_status.js_status[i] is cleared in -+ * update_job_irq_js_state -+ */ -+ } -+ pr_debug("%s", "job irq cleared"); -+ update_job_irq_js_state(dummy, value); -+ /*remove error condition for JOB */ -+ hw_error_status.job_irq_rawstat &= ~(value); -+ hw_error_status.job_irq_status &= ~(value); -+ } else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { -+ int i; ++bifrost_kbase-$(CONFIG_MALI_BIFROST_DEVFREQ) += \ ++ backend/gpu/mali_kbase_devfreq.o + -+ for (i = 0; i < NUM_SLOTS; i++) -+ dummy->slots[i].job_irq_mask = (value >> i) & 0x01; -+ pr_debug("job irq mask to value %x", value); -+ } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { -+#else /* !MALI_USE_CSF */ -+ if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) { -+ pr_debug("%s", "job irq cleared"); ++ifneq ($(CONFIG_MALI_REAL_HW),y) ++ bifrost_kbase-y += backend/gpu/mali_kbase_model_linux.o ++endif + -+ hw_error_status.job_irq_rawstat &= ~(value); -+ hw_error_status.job_irq_status &= ~(value); -+ } else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { -+ /* ignore JOB_IRQ_MASK as it is handled by CSFFW */ -+ } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { -+#endif /* !MALI_USE_CSF */ -+ pr_debug("GPU_IRQ_MASK set to 0x%x", value); -+ dummy->reset_completed_mask = (value >> 8) & 0x01; -+ dummy->power_changed_mask = (value >> 9) & 0x03; -+ dummy->clean_caches_completed_irq_enabled = (value & (1u << 17)) != 0u; -+#if MALI_USE_CSF -+ dummy->flush_pa_range_completed_irq_enabled = (value & (1u << 20)) != 0u; -+#endif -+ } else if (addr == GPU_CONTROL_REG(COHERENCY_ENABLE)) { -+ dummy->coherency_enable = value; -+ } else if (addr == GPU_CONTROL_REG(GPU_IRQ_CLEAR)) { -+ if (value & (1 << 8)) { -+ pr_debug("%s", "gpu RESET_COMPLETED irq cleared"); -+ dummy->reset_completed = 0; -+ } -+ if (value & (3 << 9)) -+ dummy->power_changed = 0; ++# NO_MALI Dummy model interface ++bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_dummy.o ++# HW error simulation ++bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_error_generator.o + -+ if (value & (1 << 17)) -+ dummy->clean_caches_completed = false; +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_backend_config.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_backend_config.h +new file mode 100644 +index 000000000..6924fdb8a +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_backend_config.h +@@ -0,0 +1,30 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+#if MALI_USE_CSF -+ if (value & (1u << 20)) -+ dummy->flush_pa_range_completed = false; -+#endif /* MALI_USE_CSF */ ++/* ++ * Backend specific configuration ++ */ + -+#if !MALI_USE_CSF -+ if (value & PRFCNT_SAMPLE_COMPLETED) /* (1 << 16) */ -+ dummy->prfcnt_sample_completed = 0; -+#endif /* !MALI_USE_CSF */ ++#ifndef _KBASE_BACKEND_CONFIG_H_ ++#define _KBASE_BACKEND_CONFIG_H_ + -+ /*update error status */ -+ hw_error_status.gpu_error_irq &= ~(value); -+ } else if (addr == GPU_CONTROL_REG(GPU_COMMAND)) { -+ switch (value) { -+ case GPU_COMMAND_SOFT_RESET: -+ case GPU_COMMAND_HARD_RESET: -+ pr_debug("gpu reset (%d) requested", value); -+ /* no more fault status */ -+ hw_error_status.gpu_fault_status = 0; -+ /* completed reset instantly */ -+ dummy->reset_completed = 1; -+ break; -+#if MALI_USE_CSF -+ case GPU_COMMAND_CACHE_CLN_INV_L2: -+ case GPU_COMMAND_CACHE_CLN_INV_L2_LSC: -+ case GPU_COMMAND_CACHE_CLN_INV_FULL: -+#else -+ case GPU_COMMAND_CLEAN_CACHES: -+ case GPU_COMMAND_CLEAN_INV_CACHES: -+#endif -+ pr_debug("clean caches requested"); -+ dummy->clean_caches_completed = true; -+ break; -+#if MALI_USE_CSF -+ case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2: -+ case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC: -+ case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_FULL: -+ pr_debug("pa range flush requested"); -+ dummy->flush_pa_range_completed = true; -+ break; -+#endif /* MALI_USE_CSF */ -+#if !MALI_USE_CSF -+ case GPU_COMMAND_PRFCNT_SAMPLE: -+ midgard_model_dump_prfcnt(); -+ dummy->prfcnt_sample_completed = 1; -+#endif /* !MALI_USE_CSF */ -+ default: -+ break; -+ } -+#if MALI_USE_CSF -+ } else if (addr >= GPU_CONTROL_REG(GPU_COMMAND_ARG0_LO) && -+ addr <= GPU_CONTROL_REG(GPU_COMMAND_ARG1_HI)) { -+ /* Writes ignored */ -+#endif -+ } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) { -+ dummy->l2_config = value; -+ } -+#if MALI_USE_CSF -+ else if (addr >= GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET) && -+ addr < GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET + -+ (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) { -+ if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET)) -+ hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF; -+ } else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) && -+ (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) { -+ /* Do nothing */ -+ } else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) && -+ (addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) { -+ /* Do nothing */ -+ } else if (addr == IPA_CONTROL_REG(COMMAND)) { -+ pr_debug("Received IPA_CONTROL command"); -+ } else if (addr == IPA_CONTROL_REG(TIMER)) { -+ ipa_control_timer_enabled = value ? true : false; -+ } else if ((addr >= IPA_CONTROL_REG(SELECT_CSHW_LO)) && -+ (addr <= IPA_CONTROL_REG(SELECT_SHADER_HI))) { -+ enum kbase_ipa_core_type core_type = (enum kbase_ipa_core_type)( -+ (addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) >> 3); -+ bool is_low_word = -+ !((addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) & 7); ++#endif /* _KBASE_BACKEND_CONFIG_H_ */ + -+ if (is_low_word) { -+ ipa_ctl_select_config[core_type] &= ~(u64)U32_MAX; -+ ipa_ctl_select_config[core_type] |= value; -+ } else { -+ ipa_ctl_select_config[core_type] &= U32_MAX; -+ ipa_ctl_select_config[core_type] |= ((u64)value << 32); -+ } -+ } -+#endif -+ else if (addr == MMU_REG(MMU_IRQ_MASK)) { -+ hw_error_status.mmu_irq_mask = value; -+ } else if (addr == MMU_REG(MMU_IRQ_CLEAR)) { -+ hw_error_status.mmu_irq_rawstat &= (~value); -+ } else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) && (addr <= MMU_AS_REG(15, AS_STATUS))) { -+ int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO)) -+ >> 6; +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c +new file mode 100644 +index 000000000..7c0abbaf8 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c +@@ -0,0 +1,92 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2014-2016, 2018, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ switch (addr & 0x3F) { -+ case AS_COMMAND: -+ switch (value) { -+ case AS_COMMAND_NOP: -+ hw_error_status.as_command[mem_addr_space] = -+ value; -+ break; ++#include "backend/gpu/mali_kbase_cache_policy_backend.h" ++#include + -+ case AS_COMMAND_UPDATE: -+ hw_error_status.as_command[mem_addr_space] = -+ value; -+ if ((hw_error_status.as_faultstatus[ -+ mem_addr_space]) -+ && ((hw_error_status.as_transtab[ -+ mem_addr_space] & 0x3) != 0)) { -+ model_error_log(KBASE_CORE, -+ "\n ERROR: AS_COMMAND issued UPDATE on error condition before AS_TRANSTAB been set to unmapped\n" -+ ); -+ } else if ((hw_error_status.as_faultstatus[ -+ mem_addr_space]) -+ && ((hw_error_status.as_transtab[ -+ mem_addr_space] & 0x3) == 0)) { ++/** ++ * kbasep_amba_register_present() - Check AMBA_<> register is present ++ * in the GPU. ++ * @kbdev: Device pointer ++ * ++ * Note: Only for arch version 12.x.1 onwards. ++ * ++ * Return: true if AMBA_FEATURES/ENABLE registers are present. ++ */ ++static bool kbasep_amba_register_present(struct kbase_device *kbdev) ++{ ++ return (ARCH_MAJOR_REV_REG(kbdev->gpu_props.props.raw_props.gpu_id) >= ++ GPU_ID2_ARCH_MAJOR_REV_MAKE(12, 1)); ++} + -+ /*invalidate all active jobs */ -+ invalidate_active_jobs(dummy); -+ /* error handled */ -+ hw_error_status.as_faultstatus[ -+ mem_addr_space] = 0; -+ } -+ break; ++void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, ++ u32 mode) ++{ ++ kbdev->current_gpu_coherency_mode = mode; + -+ case AS_COMMAND_LOCK: -+ case AS_COMMAND_UNLOCK: -+ hw_error_status.as_command[mem_addr_space] = -+ value; -+ break; ++ if (kbasep_amba_register_present(kbdev)) { ++ u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); + -+ case AS_COMMAND_FLUSH_PT: -+ case AS_COMMAND_FLUSH_MEM: -+ if (hw_error_status.as_command[mem_addr_space] -+ != AS_COMMAND_LOCK) -+ model_error_log(KBASE_CORE, -+ "\n ERROR: AS_COMMAND issued FLUSH without LOCKING before\n" -+ ); -+ else /* error handled if any */ -+ hw_error_status.as_faultstatus[ -+ mem_addr_space] = 0; -+ hw_error_status.as_command[mem_addr_space] = -+ value; -+ break; ++ val = AMBA_ENABLE_COHERENCY_PROTOCOL_SET(val, mode); ++ kbase_reg_write(kbdev, AMBA_ENABLE, val); ++ } else ++ kbase_reg_write(kbdev, COHERENCY_ENABLE, mode); ++} + -+ default: -+ model_error_log(KBASE_CORE, -+ "\n WARNING: UNRECOGNIZED AS_COMMAND 0x%x\n", -+ value); -+ break; -+ } -+ break; ++u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev) ++{ ++ u32 coherency_features; + -+ case AS_TRANSTAB_LO: -+ hw_error_status.as_transtab[mem_addr_space] &= -+ ~((u64) (0xffffffff)); -+ hw_error_status.as_transtab[mem_addr_space] |= -+ (u64) value; -+ break; ++ if (kbasep_amba_register_present(kbdev)) ++ coherency_features = ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_FEATURES)); ++ else ++ coherency_features = kbase_reg_read( ++ kbdev, GPU_CONTROL_REG(COHERENCY_FEATURES)); + -+ case AS_TRANSTAB_HI: -+ hw_error_status.as_transtab[mem_addr_space] &= -+ (u64) 0xffffffff; -+ hw_error_status.as_transtab[mem_addr_space] |= -+ ((u64) value) << 32; -+ break; ++ return coherency_features; ++} + -+ case AS_LOCKADDR_LO: -+ case AS_LOCKADDR_HI: -+ case AS_MEMATTR_LO: -+ case AS_MEMATTR_HI: -+ case AS_TRANSCFG_LO: -+ case AS_TRANSCFG_HI: -+ /* Writes ignored */ -+ break; ++void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev, ++ bool enable) ++{ ++ if (kbasep_amba_register_present(kbdev)) { ++ u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); + -+ default: -+ model_error_log(KBASE_CORE, -+ "Dummy model register access: Writing unsupported MMU #%d register 0x%x value 0x%x\n", -+ mem_addr_space, addr, value); -+ break; -+ } -+ } else { -+ switch (addr) { -+#if !MALI_USE_CSF -+ case PRFCNT_BASE_LO: -+ performance_counters.prfcnt_base = -+ HI_MASK(performance_counters.prfcnt_base) | value; -+ performance_counters.prfcnt_base_cpu = -+ (u32 *)(uintptr_t)performance_counters.prfcnt_base; -+ break; -+ case PRFCNT_BASE_HI: -+ performance_counters.prfcnt_base = -+ LO_MASK(performance_counters.prfcnt_base) | (((u64)value) << 32); -+ performance_counters.prfcnt_base_cpu = -+ (u32 *)(uintptr_t)performance_counters.prfcnt_base; -+ break; -+ case PRFCNT_JM_EN: -+ performance_counters.prfcnt_en.fe = value; -+ break; -+ case PRFCNT_SHADER_EN: -+ performance_counters.prfcnt_en.shader = value; -+ break; -+ case PRFCNT_TILER_EN: -+ performance_counters.prfcnt_en.tiler = value; -+ break; -+ case PRFCNT_MMU_L2_EN: -+ performance_counters.prfcnt_en.l2 = value; -+ break; -+#endif /* !MALI_USE_CSF */ -+ case TILER_PWRON_LO: -+ dummy->power_on |= (value & 1) << 1; -+ /* Also ensure L2 is powered on */ -+ dummy->power_on |= value & 1; -+ dummy->power_changed = 1; -+ break; -+ case SHADER_PWRON_LO: -+ dummy->power_on |= -+ (value & dummy->control_reg_values->shader_present) << 2; -+ dummy->power_changed = 1; -+ break; -+ case L2_PWRON_LO: -+ dummy->power_on |= value & 1; -+ dummy->power_changed = 1; -+ break; -+ case STACK_PWRON_LO: -+ dummy->stack_power_on_lo |= value; -+ dummy->power_changed = 1; -+ break; -+ case TILER_PWROFF_LO: -+ dummy->power_on &= ~((value & 1) << 1); -+ dummy->power_changed = 1; -+ break; -+ case SHADER_PWROFF_LO: -+ dummy->power_on &= -+ ~((value & dummy->control_reg_values->shader_present) << 2); -+ dummy->power_changed = 1; -+ break; -+ case L2_PWROFF_LO: -+ dummy->power_on &= ~(value & 1); -+ /* Also ensure tiler is powered off */ -+ dummy->power_on &= ~((value & 1) << 1); -+ dummy->power_changed = 1; -+ break; -+ case STACK_PWROFF_LO: -+ dummy->stack_power_on_lo &= ~value; -+ dummy->power_changed = 1; -+ break; ++ val = AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(val, enable); ++ kbase_reg_write(kbdev, AMBA_ENABLE, val); + -+ case TILER_PWROFF_HI: -+ case SHADER_PWROFF_HI: -+ case L2_PWROFF_HI: -+ case PWR_KEY: -+ case PWR_OVERRIDE0: -+#if !MALI_USE_CSF -+ case JM_CONFIG: -+ case PRFCNT_CONFIG: -+#else /* !MALI_USE_CSF */ -+ case CSF_CONFIG: -+#endif /* !MALI_USE_CSF */ -+ case SHADER_CONFIG: -+ case TILER_CONFIG: -+ case L2_MMU_CONFIG: -+ /* Writes ignored */ -+ break; -+ default: -+ model_error_log(KBASE_CORE, -+ "Dummy model register access: Writing unsupported register 0x%x value 0x%x\n", -+ addr, value); -+ break; -+ } ++ } else { ++ WARN(1, "memory_cache_support not supported"); + } -+ -+ midgard_model_update(dummy); -+ midgard_model_get_outputs(dummy); -+ spin_unlock_irqrestore(&hw_error_status.access_lock, flags); +} + -+void midgard_model_read_reg(void *h, u32 addr, u32 *const value) ++void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable) +{ -+ unsigned long flags; -+ struct dummy_model_t *dummy = (struct dummy_model_t *)h; ++ if (kbasep_amba_register_present(kbdev)) { ++ u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); + -+ spin_lock_irqsave(&hw_error_status.access_lock, flags); ++ val = AMBA_ENABLE_INVALIDATE_HINT_SET(val, enable); ++ kbase_reg_write(kbdev, AMBA_ENABLE, val); ++ } else { ++ WARN(1, "invalidate_hint not supported"); ++ } ++} +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h +new file mode 100644 +index 000000000..758e3be08 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h +@@ -0,0 +1,65 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2014-2016, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ *value = 0; /* 0 by default */ -+#if !MALI_USE_CSF -+ if (addr == JOB_CONTROL_REG(JOB_IRQ_JS_STATE)) { -+ pr_debug("%s", "JS_ACTIVE being read"); ++#ifndef _KBASE_CACHE_POLICY_BACKEND_H_ ++#define _KBASE_CACHE_POLICY_BACKEND_H_ + -+ *value = dummy->job_irq_js_state; -+ } else if (addr == GPU_CONTROL_REG(GPU_ID)) { -+#else /* !MALI_USE_CSF */ -+ if (addr == GPU_CONTROL_REG(GPU_ID)) { -+#endif /* !MALI_USE_CSF */ ++#include "mali_kbase.h" ++#include + -+ *value = dummy->control_reg_values->gpu_id; -+ } else if (addr == JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)) { -+ *value = hw_error_status.job_irq_rawstat; -+ pr_debug("%s", "JS_IRQ_RAWSTAT being read"); -+ } else if (addr == JOB_CONTROL_REG(JOB_IRQ_STATUS)) { -+ *value = hw_error_status.job_irq_status; -+ pr_debug("JS_IRQ_STATUS being read %x", *value); -+ } -+#if !MALI_USE_CSF -+ else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { -+ int i; ++/** ++ * kbase_cache_set_coherency_mode() - Sets the system coherency mode ++ * in the GPU. ++ * @kbdev: Device pointer ++ * @mode: Coherency mode. COHERENCY_ACE/ACE_LITE ++ */ ++void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, ++ u32 mode); + -+ *value = 0; -+ for (i = 0; i < NUM_SLOTS; i++) -+ *value |= dummy->slots[i].job_irq_mask << i; -+ pr_debug("JS_IRQ_MASK being read %x", *value); -+ } -+#else /* !MALI_USE_CSF */ -+ else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) -+ ; /* ignore JOB_IRQ_MASK as it is handled by CSFFW */ -+#endif /* !MALI_USE_CSF */ -+ else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { -+ *value = (dummy->reset_completed_mask << 8) | -+ ((dummy->clean_caches_completed_irq_enabled ? 1u : 0u) << 17) | -+#if MALI_USE_CSF -+ ((dummy->flush_pa_range_completed_irq_enabled ? 1u : 0u) << 20) | -+#endif -+ (dummy->power_changed_mask << 9) | (1 << 7) | 1; -+ pr_debug("GPU_IRQ_MASK read %x", *value); -+ } else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) { -+ *value = (dummy->power_changed << 9) | (dummy->power_changed << 10) | -+ (dummy->reset_completed << 8) | -+#if !MALI_USE_CSF -+ (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | -+#endif /* !MALI_USE_CSF */ -+ ((dummy->clean_caches_completed ? 1u : 0u) << 17) | -+#if MALI_USE_CSF -+ ((dummy->flush_pa_range_completed ? 1u : 0u) << 20) | -+#endif -+ hw_error_status.gpu_error_irq; -+ pr_debug("GPU_IRQ_RAWSTAT read %x", *value); -+ } else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) { -+ *value = ((dummy->power_changed && (dummy->power_changed_mask & 0x1)) << 9) | -+ ((dummy->power_changed && (dummy->power_changed_mask & 0x2)) << 10) | -+ ((dummy->reset_completed & dummy->reset_completed_mask) << 8) | -+#if !MALI_USE_CSF -+ (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | -+#endif /* !MALI_USE_CSF */ -+ (((dummy->clean_caches_completed && -+ dummy->clean_caches_completed_irq_enabled) ? -+ 1u : -+ 0u) -+ << 17) | -+#if MALI_USE_CSF -+ (((dummy->flush_pa_range_completed && -+ dummy->flush_pa_range_completed_irq_enabled) ? -+ 1u : -+ 0u) -+ << 20) | -+#endif -+ hw_error_status.gpu_error_irq; -+ pr_debug("GPU_IRQ_STAT read %x", *value); -+ } else if (addr == GPU_CONTROL_REG(GPU_STATUS)) { -+ *value = 0; -+#if !MALI_USE_CSF -+ } else if (addr == GPU_CONTROL_REG(LATEST_FLUSH)) { -+ *value = 0; -+#endif -+ } else if (addr == GPU_CONTROL_REG(GPU_FAULTSTATUS)) { -+ *value = hw_error_status.gpu_fault_status; -+ } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) { -+ *value = dummy->l2_config; -+ } -+#if MALI_USE_CSF -+ else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) && -+ (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) { -+ *value = 0; -+ } else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) && -+ (addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) { -+ *value = 0; -+ } -+#endif -+ else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) && -+ (addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) { -+ switch (addr) { -+ case GPU_CONTROL_REG(SHADER_PRESENT_LO): -+ case GPU_CONTROL_REG(SHADER_PRESENT_HI): -+ case GPU_CONTROL_REG(TILER_PRESENT_LO): -+ case GPU_CONTROL_REG(TILER_PRESENT_HI): -+ case GPU_CONTROL_REG(L2_PRESENT_LO): -+ case GPU_CONTROL_REG(L2_PRESENT_HI): -+ case GPU_CONTROL_REG(STACK_PRESENT_LO): -+ case GPU_CONTROL_REG(STACK_PRESENT_HI): -+ *value = get_implementation_register(addr, dummy->control_reg_values); -+ break; -+ case GPU_CONTROL_REG(SHADER_READY_LO): -+ *value = (dummy->power_on >> 0x02) & -+ get_implementation_register(GPU_CONTROL_REG(SHADER_PRESENT_LO), -+ dummy->control_reg_values); -+ break; -+ case GPU_CONTROL_REG(TILER_READY_LO): -+ *value = (dummy->power_on >> 0x01) & -+ get_implementation_register(GPU_CONTROL_REG(TILER_PRESENT_LO), -+ dummy->control_reg_values); -+ break; -+ case GPU_CONTROL_REG(L2_READY_LO): -+ *value = dummy->power_on & -+ get_implementation_register(GPU_CONTROL_REG(L2_PRESENT_LO), -+ dummy->control_reg_values); -+ break; -+ case GPU_CONTROL_REG(STACK_READY_LO): -+ *value = dummy->stack_power_on_lo & -+ get_implementation_register(GPU_CONTROL_REG(STACK_PRESENT_LO), -+ dummy->control_reg_values); -+ break; ++/** ++ * kbase_cache_get_coherency_features() - Get the coherency features ++ * in the GPU. ++ * @kbdev: Device pointer ++ * ++ * Return: Register value to be returned ++ */ ++u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev); + -+ case GPU_CONTROL_REG(SHADER_READY_HI): -+ case GPU_CONTROL_REG(TILER_READY_HI): -+ case GPU_CONTROL_REG(L2_READY_HI): -+ case GPU_CONTROL_REG(STACK_READY_HI): -+ *value = 0; -+ break; ++/** ++ * kbase_amba_set_memory_cache_support() - Sets AMBA memory cache support ++ * in the GPU. ++ * @kbdev: Device pointer ++ * @enable: true for enable. ++ * ++ * Note: Only for arch version 12.x.1 onwards. ++ */ ++void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev, ++ bool enable); ++/** ++ * kbase_amba_set_invalidate_hint() - Sets AMBA invalidate hint ++ * in the GPU. ++ * @kbdev: Device pointer ++ * @enable: true for enable. ++ * ++ * Note: Only for arch version 12.x.1 onwards. ++ */ ++void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable); ++#endif /* _KBASE_CACHE_POLICY_BACKEND_H_ */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c +new file mode 100644 +index 000000000..ddd03ca23 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c +@@ -0,0 +1,326 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ case GPU_CONTROL_REG(SHADER_PWRTRANS_LO): -+ case GPU_CONTROL_REG(SHADER_PWRTRANS_HI): -+ case GPU_CONTROL_REG(TILER_PWRTRANS_LO): -+ case GPU_CONTROL_REG(TILER_PWRTRANS_HI): -+ case GPU_CONTROL_REG(L2_PWRTRANS_LO): -+ case GPU_CONTROL_REG(L2_PWRTRANS_HI): -+ case GPU_CONTROL_REG(STACK_PWRTRANS_LO): -+ case GPU_CONTROL_REG(STACK_PWRTRANS_HI): -+ *value = 0; -+ break; ++/* ++ * Implementation of the GPU clock rate trace manager. ++ */ + -+ case GPU_CONTROL_REG(SHADER_PWRACTIVE_LO): -+ case GPU_CONTROL_REG(SHADER_PWRACTIVE_HI): -+ case GPU_CONTROL_REG(TILER_PWRACTIVE_LO): -+ case GPU_CONTROL_REG(TILER_PWRACTIVE_HI): -+ case GPU_CONTROL_REG(L2_PWRACTIVE_LO): -+ case GPU_CONTROL_REG(L2_PWRACTIVE_HI): -+ *value = 0; -+ break; ++#include ++#include ++#include ++#include ++#include ++#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" + -+#if !MALI_USE_CSF -+ case GPU_CONTROL_REG(JM_CONFIG): -+#else /* !MALI_USE_CSF */ -+ case GPU_CONTROL_REG(CSF_CONFIG): -+#endif /* !MALI_USE_CSF */ ++#ifdef CONFIG_TRACE_POWER_GPU_FREQUENCY ++#include ++#else ++#include "mali_power_gpu_frequency_trace.h" ++#endif + -+ case GPU_CONTROL_REG(SHADER_CONFIG): -+ case GPU_CONTROL_REG(TILER_CONFIG): -+ case GPU_CONTROL_REG(L2_MMU_CONFIG): -+ *value = 0; -+ break; ++#ifndef CLK_RATE_TRACE_OPS ++#define CLK_RATE_TRACE_OPS (NULL) ++#endif + -+ case GPU_CONTROL_REG(COHERENCY_FEATURES): -+ *value = BIT(0) | BIT(1); /* ace_lite and ace, respectively. */ -+ break; -+ case GPU_CONTROL_REG(COHERENCY_ENABLE): -+ *value = dummy->coherency_enable; -+ break; ++/** ++ * get_clk_rate_trace_callbacks() - Returns pointer to clk trace ops. ++ * @kbdev: Pointer to kbase device, used to check if arbitration is enabled ++ * when compiled with arbiter support. ++ * Return: Pointer to clk trace ops if supported or NULL. ++ */ ++static struct kbase_clk_rate_trace_op_conf * ++get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev) ++{ ++ /* base case */ ++ struct kbase_clk_rate_trace_op_conf *callbacks = ++ (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS; ++#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) ++ const void *arbiter_if_node; + -+ case GPU_CONTROL_REG(THREAD_TLS_ALLOC): -+ *value = 0; -+ break; ++ if (WARN_ON(!kbdev) || WARN_ON(!kbdev->dev)) ++ return callbacks; + -+ default: -+ model_error_log(KBASE_CORE, -+ "Dummy model register access: Reading unknown control reg 0x%x\n", -+ addr); -+ break; -+ } -+#if !MALI_USE_CSF -+ } else if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && -+ (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { -+ int slot_idx = (addr >> 7) & 0xf; -+ int sub_reg = addr & 0x7F; ++ arbiter_if_node = ++ of_get_property(kbdev->dev->of_node, "arbiter_if", NULL); ++ /* Arbitration enabled, override the callback pointer.*/ ++ if (arbiter_if_node) ++ callbacks = &arb_clk_rate_trace_ops; ++ else ++ dev_dbg(kbdev->dev, ++ "Arbitration supported but disabled by platform. Leaving clk rate callbacks as default.\n"); + -+ KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS); -+ switch (sub_reg) { -+ case JS_HEAD_NEXT_LO: -+ *value = (u32) ((hw_error_status.current_jc) & -+ 0xFFFFFFFF); -+ break; -+ case JS_HEAD_NEXT_HI: -+ *value = (u32) (hw_error_status.current_jc >> 32); -+ break; -+ case JS_STATUS: -+ if (hw_error_status.js_status[slot_idx]) -+ *value = hw_error_status.js_status[slot_idx]; -+ else /* 0x08 means active, 0x00 idle */ -+ *value = (dummy->slots[slot_idx].job_active) -+ << 3; -+ break; -+ case JS_COMMAND_NEXT: -+ *value = dummy->slots[slot_idx].job_queued; -+ break; ++#endif + -+ /* The dummy model does not implement these registers -+ * avoid printing error messages -+ */ -+ case JS_HEAD_HI: -+ case JS_HEAD_LO: -+ case JS_TAIL_HI: -+ case JS_TAIL_LO: -+ case JS_FLUSH_ID_NEXT: -+ break; ++ return callbacks; ++} + -+ default: -+ model_error_log(KBASE_CORE, -+ "Dummy model register access: unknown job slot reg 0x%02X being read\n", -+ sub_reg); -+ break; ++static int gpu_clk_rate_change_notifier(struct notifier_block *nb, ++ unsigned long event, void *data) ++{ ++ struct kbase_gpu_clk_notifier_data *ndata = data; ++ struct kbase_clk_data *clk_data = ++ container_of(nb, struct kbase_clk_data, clk_rate_change_nb); ++ struct kbase_clk_rate_trace_manager *clk_rtm = clk_data->clk_rtm; ++ unsigned long flags; ++ ++ if (WARN_ON_ONCE(clk_data->gpu_clk_handle != ndata->gpu_clk_handle)) ++ return NOTIFY_BAD; ++ ++ spin_lock_irqsave(&clk_rtm->lock, flags); ++ if (event == POST_RATE_CHANGE) { ++ if (!clk_rtm->gpu_idle && ++ (clk_data->clock_val != ndata->new_rate)) { ++ kbase_clk_rate_trace_manager_notify_all( ++ clk_rtm, clk_data->index, ndata->new_rate); + } -+#endif /* !MALI_USE_CSF */ -+ } else if (addr == GPU_CONTROL_REG(AS_PRESENT)) { -+ *value = dummy->control_reg_values->as_present; -+#if !MALI_USE_CSF -+ } else if (addr == GPU_CONTROL_REG(JS_PRESENT)) { -+ *value = 0x7; -+#endif /* !MALI_USE_CSF */ -+ } else if (addr >= GPU_CONTROL_REG(TEXTURE_FEATURES_0) && -+ addr <= GPU_CONTROL_REG(TEXTURE_FEATURES_3)) { -+ switch (addr) { -+ case GPU_CONTROL_REG(TEXTURE_FEATURES_0): -+ *value = 0xfffff; -+ break; + -+ case GPU_CONTROL_REG(TEXTURE_FEATURES_1): -+ *value = 0xffff; -+ break; ++ clk_data->clock_val = ndata->new_rate; ++ } ++ spin_unlock_irqrestore(&clk_rtm->lock, flags); + -+ case GPU_CONTROL_REG(TEXTURE_FEATURES_2): -+ *value = 0x9f81ffff; -+ break; ++ return NOTIFY_DONE; ++} + -+ case GPU_CONTROL_REG(TEXTURE_FEATURES_3): -+ *value = 0; -+ break; -+ } -+#if !MALI_USE_CSF -+ } else if (addr >= GPU_CONTROL_REG(JS0_FEATURES) && -+ addr <= GPU_CONTROL_REG(JS15_FEATURES)) { -+ switch (addr) { -+ case GPU_CONTROL_REG(JS0_FEATURES): -+ *value = 0x20e; -+ break; ++static int gpu_clk_data_init(struct kbase_device *kbdev, ++ void *gpu_clk_handle, unsigned int index) ++{ ++ struct kbase_clk_rate_trace_op_conf *callbacks; ++ struct kbase_clk_data *clk_data; ++ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; ++ int ret = 0; + -+ case GPU_CONTROL_REG(JS1_FEATURES): -+ *value = 0x1fe; -+ break; ++ callbacks = get_clk_rate_trace_callbacks(kbdev); + -+ case GPU_CONTROL_REG(JS2_FEATURES): -+ *value = 0x7e; -+ break; ++ if (WARN_ON(!callbacks) || ++ WARN_ON(!gpu_clk_handle) || ++ WARN_ON(index >= BASE_MAX_NR_CLOCKS_REGULATORS)) ++ return -EINVAL; + -+ default: -+ *value = 0; -+ break; -+ } -+#endif /* !MALI_USE_CSF */ -+ } else if (addr >= GPU_CONTROL_REG(L2_FEATURES) -+ && addr <= GPU_CONTROL_REG(MMU_FEATURES)) { -+ switch (addr) { -+ case GPU_CONTROL_REG(L2_FEATURES): -+ *value = 0x6100206; -+ break; ++ clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL); ++ if (!clk_data) { ++ dev_err(kbdev->dev, "Failed to allocate data for clock enumerated at index %u", index); ++ return -ENOMEM; ++ } + -+ case GPU_CONTROL_REG(CORE_FEATURES): -+ *value = dummy->control_reg_values->core_features; -+ break; ++ clk_data->index = (u8)index; ++ clk_data->gpu_clk_handle = gpu_clk_handle; ++ /* Store the initial value of clock */ ++ clk_data->clock_val = ++ callbacks->get_gpu_clk_rate(kbdev, gpu_clk_handle); + -+ case GPU_CONTROL_REG(TILER_FEATURES): -+ *value = dummy->control_reg_values->tiler_features; -+ break; ++ { ++ /* At the initialization time, GPU is powered off. */ ++ unsigned long flags; + -+ case GPU_CONTROL_REG(MEM_FEATURES): -+ /* Bit 0: Core group is coherent */ -+ *value = 0x01; -+ /* Bits 11:8: L2 slice count - 1 */ -+ *value |= (hweight64(DUMMY_IMPLEMENTATION_L2_PRESENT) - 1) << 8; -+ break; ++ spin_lock_irqsave(&clk_rtm->lock, flags); ++ kbase_clk_rate_trace_manager_notify_all( ++ clk_rtm, clk_data->index, 0); ++ spin_unlock_irqrestore(&clk_rtm->lock, flags); ++ } + -+ case GPU_CONTROL_REG(MMU_FEATURES): -+ *value = dummy->control_reg_values->mmu_features; -+ break; -+ } -+ } else if (addr >= GPU_CONTROL_REG(THREAD_MAX_THREADS) -+ && addr <= GPU_CONTROL_REG(THREAD_FEATURES)) { -+ switch (addr) { -+ case GPU_CONTROL_REG(THREAD_FEATURES): -+ *value = dummy->control_reg_values->thread_features -+ | (IMPLEMENTATION_MODEL << 30); -+ break; -+ case GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE): -+ *value = dummy->control_reg_values->thread_max_barrier_size; -+ break; -+ case GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE): -+ *value = dummy->control_reg_values->thread_max_workgroup_size; -+ break; -+ case GPU_CONTROL_REG(THREAD_MAX_THREADS): -+ *value = dummy->control_reg_values->thread_max_threads; -+ break; -+ } -+ } else if (addr >= GPU_CONTROL_REG(CYCLE_COUNT_LO) -+ && addr <= GPU_CONTROL_REG(TIMESTAMP_HI)) { -+ *value = 0; -+ } else if (addr >= MMU_AS_REG(0, AS_TRANSTAB_LO) -+ && addr <= MMU_AS_REG(15, AS_STATUS)) { -+ int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO)) -+ >> 6; ++ clk_data->clk_rtm = clk_rtm; ++ clk_rtm->clks[index] = clk_data; + -+ switch (addr & 0x3F) { -+ case AS_TRANSTAB_LO: -+ *value = (u32) -+ (hw_error_status.as_transtab[mem_addr_space] & -+ 0xffffffff); -+ break; ++ clk_data->clk_rate_change_nb.notifier_call = ++ gpu_clk_rate_change_notifier; + -+ case AS_TRANSTAB_HI: -+ *value = (u32) -+ (hw_error_status.as_transtab[mem_addr_space] >> -+ 32); -+ break; ++ if (callbacks->gpu_clk_notifier_register) ++ ret = callbacks->gpu_clk_notifier_register(kbdev, ++ gpu_clk_handle, &clk_data->clk_rate_change_nb); ++ if (ret) { ++ dev_err(kbdev->dev, "Failed to register notifier for clock enumerated at index %u", index); ++ kfree(clk_data); ++ } + -+ case AS_STATUS: -+ *value = 0; -+ break; ++ return ret; ++} + -+ case AS_FAULTSTATUS: -+ if (mem_addr_space == hw_error_status.faulty_mmu_as) -+ *value = hw_error_status.as_faultstatus[ -+ hw_error_status.faulty_mmu_as]; -+ else -+ *value = 0; -+ break; ++int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev) ++{ ++ struct kbase_clk_rate_trace_op_conf *callbacks; ++ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; ++ unsigned int i; ++ int ret = 0; + -+ case AS_LOCKADDR_LO: -+ case AS_LOCKADDR_HI: -+ case AS_MEMATTR_LO: -+ case AS_MEMATTR_HI: -+ case AS_TRANSCFG_LO: -+ case AS_TRANSCFG_HI: -+ /* Read ignored */ -+ *value = 0; -+ break; ++ callbacks = get_clk_rate_trace_callbacks(kbdev); + -+ default: -+ model_error_log(KBASE_CORE, -+ "Dummy model register access: Reading unsupported MMU #%d register 0x%x. Returning 0\n", -+ mem_addr_space, addr); -+ *value = 0; -+ break; -+ } -+ } else if (addr == MMU_REG(MMU_IRQ_MASK)) { -+ *value = hw_error_status.mmu_irq_mask; -+ } else if (addr == MMU_REG(MMU_IRQ_RAWSTAT)) { -+ *value = hw_error_status.mmu_irq_rawstat; -+ } else if (addr == MMU_REG(MMU_IRQ_STATUS)) { -+ *value = hw_error_status.mmu_irq_mask & -+ hw_error_status.mmu_irq_rawstat; ++ spin_lock_init(&clk_rtm->lock); ++ INIT_LIST_HEAD(&clk_rtm->listeners); ++ ++ /* Return early if no callbacks provided for clock rate tracing */ ++ if (!callbacks) { ++ WRITE_ONCE(clk_rtm->clk_rate_trace_ops, NULL); ++ return 0; + } -+#if MALI_USE_CSF -+ else if (addr == IPA_CONTROL_REG(STATUS)) { -+ *value = (ipa_control_timer_enabled << 31); -+ } else if ((addr >= IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) && -+ (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI( -+ IPA_CTL_MAX_VAL_CNT_IDX)))) { -+ u32 counter_index = -+ (addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) >> 3; -+ bool is_low_word = -+ !((addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) & 7); + -+ *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_CSHW, -+ counter_index, is_low_word); -+ } else if ((addr >= IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) && -+ (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI( -+ IPA_CTL_MAX_VAL_CNT_IDX)))) { -+ u32 counter_index = -+ (addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) >> 3; -+ bool is_low_word = -+ !((addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) & 7); ++ clk_rtm->gpu_idle = true; + -+ *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_MEMSYS, -+ counter_index, is_low_word); -+ } else if ((addr >= IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) && -+ (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI( -+ IPA_CTL_MAX_VAL_CNT_IDX)))) { -+ u32 counter_index = -+ (addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) >> 3; -+ bool is_low_word = -+ !((addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) & 7); ++ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { ++ void *gpu_clk_handle = ++ callbacks->enumerate_gpu_clk(kbdev, i); + -+ *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_TILER, -+ counter_index, is_low_word); -+ } else if ((addr >= IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) && -+ (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI( -+ IPA_CTL_MAX_VAL_CNT_IDX)))) { -+ u32 counter_index = -+ (addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) >> 3; -+ bool is_low_word = -+ !((addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) & 7); ++ if (!gpu_clk_handle) ++ break; + -+ *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER, -+ counter_index, is_low_word); ++ ret = gpu_clk_data_init(kbdev, gpu_clk_handle, i); ++ if (ret) ++ goto error; + } -+#endif -+ else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) { -+ *value = dummy->control_reg_values->gpu_features_lo; -+ } else if (addr == GPU_CONTROL_REG(GPU_FEATURES_HI)) { -+ *value = dummy->control_reg_values->gpu_features_hi; ++ ++ /* Activate clock rate trace manager if at least one GPU clock was ++ * enumerated. ++ */ ++ if (i) { ++ WRITE_ONCE(clk_rtm->clk_rate_trace_ops, callbacks); + } else { -+ model_error_log(KBASE_CORE, -+ "Dummy model register access: Reading unsupported register 0x%x. Returning 0\n", -+ addr); -+ *value = 0; ++ dev_info(kbdev->dev, "No clock(s) available for rate tracing"); ++ WRITE_ONCE(clk_rtm->clk_rate_trace_ops, NULL); + } + -+ spin_unlock_irqrestore(&hw_error_status.access_lock, flags); -+ CSTD_UNUSED(dummy); ++ return 0; ++ ++error: ++ while (i--) { ++ clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister( ++ kbdev, clk_rtm->clks[i]->gpu_clk_handle, ++ &clk_rtm->clks[i]->clk_rate_change_nb); ++ kfree(clk_rtm->clks[i]); ++ } ++ ++ return ret; +} + -+static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr_data_offset, -+ u32 usr_data_size, u32 core_count) ++void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev) +{ -+ u32 sample_size; -+ u32 *usr_data = NULL; -+ -+ lockdep_assert_held(&performance_counters.access_lock); ++ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; ++ unsigned int i; + -+ sample_size = -+ core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u32); ++ WARN_ON(!list_empty(&clk_rtm->listeners)); + -+ if ((usr_data_size >= usr_data_offset) && -+ (sample_size <= usr_data_size - usr_data_offset)) -+ usr_data = usr_data_start + (usr_data_offset / sizeof(u32)); ++ if (!clk_rtm->clk_rate_trace_ops) ++ return; + -+ if (!usr_data) -+ model_error_log(KBASE_CORE, "Unable to set counter sample 1"); -+ else { -+ u32 loop_cnt = core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE; -+ u32 i; ++ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { ++ if (!clk_rtm->clks[i]) ++ break; + -+ for (i = 0; i < loop_cnt; i++) { -+ counters[i] = usr_data[i]; -+ } ++ if (clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister) ++ clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister ++ (kbdev, clk_rtm->clks[i]->gpu_clk_handle, ++ &clk_rtm->clks[i]->clk_rate_change_nb); ++ kfree(clk_rtm->clks[i]); + } + -+ return usr_data_offset + sample_size; ++ WRITE_ONCE(clk_rtm->clk_rate_trace_ops, NULL); +} + -+static u32 set_kernel_sample_core_type(u64 *counters, -+ u64 *usr_data_start, u32 usr_data_offset, -+ u32 usr_data_size, u32 core_count) ++void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev) +{ -+ u32 sample_size; -+ u64 *usr_data = NULL; ++ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; ++ unsigned int i; ++ unsigned long flags; + -+ lockdep_assert_held(&performance_counters.access_lock); ++ if (!clk_rtm->clk_rate_trace_ops) ++ return; + -+ sample_size = -+ core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u64); ++ spin_lock_irqsave(&clk_rtm->lock, flags); + -+ if ((usr_data_size >= usr_data_offset) && -+ (sample_size <= usr_data_size - usr_data_offset)) -+ usr_data = usr_data_start + (usr_data_offset / sizeof(u64)); ++ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { ++ struct kbase_clk_data *clk_data = clk_rtm->clks[i]; + -+ if (!usr_data) -+ model_error_log(KBASE_CORE, "Unable to set kernel counter sample 1"); -+ else -+ memcpy(counters, usr_data, sample_size); ++ if (!clk_data) ++ break; + -+ return usr_data_offset + sample_size; ++ if (unlikely(!clk_data->clock_val)) ++ continue; ++ ++ kbase_clk_rate_trace_manager_notify_all( ++ clk_rtm, clk_data->index, clk_data->clock_val); ++ } ++ ++ clk_rtm->gpu_idle = false; ++ spin_unlock_irqrestore(&clk_rtm->lock, flags); +} + -+/* Counter values injected through ioctl are of 32 bits */ -+int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size) ++void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev) +{ ++ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; ++ unsigned int i; + unsigned long flags; -+ u32 *user_data; -+ u32 offset = 0; + -+ if (data == NULL || size == 0 || size > KBASE_DUMMY_MODEL_COUNTER_TOTAL * sizeof(u32)) -+ return -EINVAL; ++ if (!clk_rtm->clk_rate_trace_ops) ++ return; + -+ /* copy_from_user might sleep so can't be called from inside a spinlock -+ * allocate a temporary buffer for user data and copy to that before taking -+ * the lock -+ */ -+ user_data = kmalloc(size, GFP_KERNEL); -+ if (!user_data) -+ return -ENOMEM; ++ spin_lock_irqsave(&clk_rtm->lock, flags); + -+ if (copy_from_user(user_data, data, size)) { -+ model_error_log(KBASE_CORE, "Unable to copy prfcnt data from userspace"); -+ kfree(user_data); -+ return -EINVAL; -+ } ++ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { ++ struct kbase_clk_data *clk_data = clk_rtm->clks[i]; + -+ spin_lock_irqsave(&performance_counters.access_lock, flags); -+#if !MALI_USE_CSF -+ offset = set_user_sample_core_type(performance_counters.jm_counters, user_data, offset, -+ size, 1); -+#else -+ offset = set_user_sample_core_type(performance_counters.cshw_counters, user_data, offset, -+ size, 1); -+#endif /* !MALI_USE_CSF */ -+ offset = set_user_sample_core_type(performance_counters.tiler_counters, user_data, offset, -+ size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); -+ offset = set_user_sample_core_type(performance_counters.l2_counters, user_data, offset, -+ size, KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS); -+ offset = set_user_sample_core_type(performance_counters.shader_counters, user_data, offset, -+ size, KBASE_DUMMY_MODEL_MAX_SHADER_CORES); -+ spin_unlock_irqrestore(&performance_counters.access_lock, flags); ++ if (!clk_data) ++ break; + -+ kfree(user_data); -+ return 0; ++ if (unlikely(!clk_data->clock_val)) ++ continue; ++ ++ kbase_clk_rate_trace_manager_notify_all( ++ clk_rtm, clk_data->index, 0); ++ } ++ ++ clk_rtm->gpu_idle = true; ++ spin_unlock_irqrestore(&clk_rtm->lock, flags); +} + -+/* Counter values injected through kutf are of 64 bits */ -+void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size) ++void kbase_clk_rate_trace_manager_notify_all( ++ struct kbase_clk_rate_trace_manager *clk_rtm, ++ u32 clk_index, ++ unsigned long new_rate) +{ -+ unsigned long flags; -+ u32 offset = 0; -+ -+ spin_lock_irqsave(&performance_counters.access_lock, flags); -+#if !MALI_USE_CSF -+ offset = set_kernel_sample_core_type(performance_counters.jm_counters, data, offset, size, -+ 1); -+#else -+ offset = set_kernel_sample_core_type(performance_counters.cshw_counters, data, offset, size, -+ 1); -+#endif /* !MALI_USE_CSF */ -+ offset = set_kernel_sample_core_type(performance_counters.tiler_counters, data, offset, -+ size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); -+ offset = set_kernel_sample_core_type(performance_counters.l2_counters, data, offset, size, -+ hweight64(performance_counters.l2_present)); -+ offset = set_kernel_sample_core_type(performance_counters.shader_counters, data, offset, -+ size, hweight64(performance_counters.shader_present)); -+ spin_unlock_irqrestore(&performance_counters.access_lock, flags); -+} -+KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_kernel_sample); ++ struct kbase_clk_rate_listener *pos; ++ struct kbase_device *kbdev; + -+void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, -+ u64 *l2_present, u64 *shader_present) -+{ -+ if (shader_present) -+ *shader_present = performance_counters.shader_present; -+ if (l2_present) -+ *l2_present = performance_counters.l2_present; -+} -+KBASE_EXPORT_TEST_API(gpu_model_get_dummy_prfcnt_cores); ++ lockdep_assert_held(&clk_rtm->lock); + -+void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, -+ u64 l2_present, u64 shader_present) -+{ -+ if (WARN_ON(!l2_present || !shader_present -+ || hweight64(l2_present) > KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS -+ || hweight64(shader_present) > KBASE_DUMMY_MODEL_MAX_SHADER_CORES)) -+ return; ++ kbdev = container_of(clk_rtm, struct kbase_device, pm.clk_rtm); + -+ performance_counters.l2_present = l2_present; -+ performance_counters.shader_present = shader_present; ++ dev_dbg(kbdev->dev, "%s - GPU clock %u rate changed to %lu, pid: %d", ++ __func__, clk_index, new_rate, current->pid); + -+ /* Update the GPU properties used by vinstr to calculate the counter -+ * dump buffer size. -+ */ -+ kbdev->gpu_props.props.l2_props.num_l2_slices = hweight64(l2_present); -+ kbdev->gpu_props.props.coherency_info.group[0].core_mask = shader_present; -+ kbdev->gpu_props.curr_config.l2_slices = hweight64(l2_present); -+ kbdev->gpu_props.curr_config.shader_present = shader_present; -+} -+KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_cores); ++ /* Raise standard `power/gpu_frequency` ftrace event */ ++ { ++ unsigned long new_rate_khz = new_rate; + -+int gpu_model_control(void *model, -+ struct kbase_model_control_params *params) -+{ -+ struct dummy_model_t *dummy = (struct dummy_model_t *)model; -+ int i; -+ unsigned long flags; ++#if BITS_PER_LONG == 64 ++ do_div(new_rate_khz, 1000); ++#elif BITS_PER_LONG == 32 ++ new_rate_khz /= 1000; ++#else ++#error "unsigned long division is not supported for this architecture" ++#endif + -+ if (params->command == KBASE_MC_DISABLE_JOBS) { -+ for (i = 0; i < NUM_SLOTS; i++) -+ dummy->slots[i].job_disabled = params->value; -+ } else { -+ return -EINVAL; ++ trace_gpu_frequency(new_rate_khz, clk_index); + } + -+ spin_lock_irqsave(&hw_error_status.access_lock, flags); -+ midgard_model_update(dummy); -+ midgard_model_get_outputs(dummy); -+ spin_unlock_irqrestore(&hw_error_status.access_lock, flags); -+ -+ return 0; -+} -+ -+/** -+ * kbase_is_gpu_removed - Has the GPU been removed. -+ * @kbdev: Kbase device pointer -+ * -+ * This function would return true if the GPU has been removed. -+ * It is stubbed here -+ * Return: Always false -+ */ -+bool kbase_is_gpu_removed(struct kbase_device *kbdev) -+{ -+ return false; ++ /* Notify the listeners. */ ++ list_for_each_entry(pos, &clk_rtm->listeners, node) { ++ pos->notify(pos, clk_index, new_rate); ++ } +} -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h ++KBASE_EXPORT_TEST_API(kbase_clk_rate_trace_manager_notify_all); +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.h new file mode 100644 -index 000000000..84842291c +index 000000000..35b3b8d06 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h -@@ -0,0 +1,224 @@ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.h +@@ -0,0 +1,154 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2014-2015, 2017-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -126879,219 +126608,149 @@ index 000000000..84842291c + * + */ + -+/* -+ * Dummy Model interface -+ * -+ * Support for NO_MALI dummy Model interface. -+ * -+ * +-----------------------------------+ -+ * | Kbase read/write/IRQ | -+ * +-----------------------------------+ -+ * | Model Linux Framework | -+ * +-----------------------------------+ -+ * | Model Dummy interface definitions | -+ * +-----------------+-----------------+ -+ * | Fake R/W | Fake IRQ | -+ * +-----------------+-----------------+ -+ */ -+ -+#ifndef _KBASE_MODEL_DUMMY_H_ -+#define _KBASE_MODEL_DUMMY_H_ -+ -+#include -+#include -+ -+#define model_error_log(module, ...) pr_err(__VA_ARGS__) ++#ifndef _KBASE_CLK_RATE_TRACE_MGR_ ++#define _KBASE_CLK_RATE_TRACE_MGR_ + -+#define NUM_SLOTS 4 /*number of job slots */ ++/* The index of top clock domain in kbase_clk_rate_trace_manager:clks. */ ++#define KBASE_CLOCK_DOMAIN_TOP (0) + -+/*Errors Mask Codes*/ -+/* each bit of errors_mask is associated to a specific error: -+ * NON FAULT STATUS CODES: only the following are implemented since the others -+ * represent normal working statuses ++/* The index of shader-cores clock domain in ++ * kbase_clk_rate_trace_manager:clks. + */ -+#define KBASE_JOB_INTERRUPTED (1<<0) -+#define KBASE_JOB_STOPPED (1<<1) -+#define KBASE_JOB_TERMINATED (1<<2) -+ -+/* JOB EXCEPTIONS: */ -+#define KBASE_JOB_CONFIG_FAULT (1<<3) -+#define KBASE_JOB_POWER_FAULT (1<<4) -+#define KBASE_JOB_READ_FAULT (1<<5) -+#define KBASE_JOB_WRITE_FAULT (1<<6) -+#define KBASE_JOB_AFFINITY_FAULT (1<<7) -+#define KBASE_JOB_BUS_FAULT (1<<8) -+#define KBASE_INSTR_INVALID_PC (1<<9) -+#define KBASE_INSTR_INVALID_ENC (1<<10) -+#define KBASE_INSTR_TYPE_MISMATCH (1<<11) -+#define KBASE_INSTR_OPERAND_FAULT (1<<12) -+#define KBASE_INSTR_TLS_FAULT (1<<13) -+#define KBASE_INSTR_BARRIER_FAULT (1<<14) -+#define KBASE_INSTR_ALIGN_FAULT (1<<15) -+#define KBASE_DATA_INVALID_FAULT (1<<16) -+#define KBASE_TILE_RANGE_FAULT (1<<17) -+#define KBASE_ADDR_RANGE_FAULT (1<<18) -+#define KBASE_OUT_OF_MEMORY (1<<19) -+#define KBASE_UNKNOWN (1<<20) -+ -+/* GPU EXCEPTIONS:*/ -+#define KBASE_DELAYED_BUS_FAULT (1<<21) -+#define KBASE_SHAREABILITY_FAULT (1<<22) -+ -+/* MMU EXCEPTIONS:*/ -+#define KBASE_TRANSLATION_FAULT (1<<23) -+#define KBASE_PERMISSION_FAULT (1<<24) -+#define KBASE_TRANSTAB_BUS_FAULT (1<<25) -+#define KBASE_ACCESS_FLAG (1<<26) -+ -+/* generic useful bitmasks */ -+#define IS_A_JOB_ERROR ((KBASE_UNKNOWN << 1) - KBASE_JOB_INTERRUPTED) -+#define IS_A_MMU_ERROR ((KBASE_ACCESS_FLAG << 1) - KBASE_TRANSLATION_FAULT) -+#define IS_A_GPU_ERROR (KBASE_DELAYED_BUS_FAULT|KBASE_SHAREABILITY_FAULT) -+ -+/* number of possible MMU address spaces */ -+#define NUM_MMU_AS 16 /* total number of MMU address spaces as in -+ * MMU_IRQ_RAWSTAT register -+ */ -+ -+/* Forward declaration */ -+struct kbase_device; ++#define KBASE_CLOCK_DOMAIN_SHADER_CORES (1) + -+/* -+ * the function below is used to trigger the simulation of a faulty -+ * HW condition for a specific job chain atom ++/** ++ * struct kbase_clk_data - Data stored per enumerated GPU clock. ++ * ++ * @clk_rtm: Pointer to clock rate trace manager object. ++ * @gpu_clk_handle: Handle unique to the enumerated GPU clock. ++ * @plat_private: Private data for the platform to store into ++ * @clk_rate_change_nb: notifier block containing the pointer to callback ++ * function that is invoked whenever the rate of ++ * enumerated GPU clock changes. ++ * @clock_val: Current rate of the enumerated GPU clock. ++ * @index: Index at which the GPU clock was enumerated. + */ -+ -+struct kbase_error_params { -+ u64 jc; -+ u32 errors_mask; -+ u32 mmu_table_level; -+ u16 faulty_mmu_as; -+ u16 padding[3]; -+}; -+ -+enum kbase_model_control_command { -+ /* Disable/Enable job completion in the dummy model */ -+ KBASE_MC_DISABLE_JOBS -+}; -+ -+/* struct to control dummy model behavior */ -+struct kbase_model_control_params { -+ s32 command; -+ s32 value; -+}; -+ -+/* struct to track faulty atoms */ -+struct kbase_error_atom { -+ struct kbase_error_params params; -+ struct kbase_error_atom *next; ++struct kbase_clk_data { ++ struct kbase_clk_rate_trace_manager *clk_rtm; ++ void *gpu_clk_handle; ++ void *plat_private; ++ struct notifier_block clk_rate_change_nb; ++ unsigned long clock_val; ++ u8 index; +}; + -+/*struct to track the system error state*/ -+struct error_status_t { -+ spinlock_t access_lock; -+ -+ u32 errors_mask; -+ u32 mmu_table_level; -+ int faulty_mmu_as; -+ -+ u64 current_jc; -+ int current_job_slot; -+ -+ u32 job_irq_rawstat; -+ u32 job_irq_status; -+ u32 js_status[NUM_SLOTS]; -+ -+ u32 mmu_irq_mask; -+ u32 mmu_irq_rawstat; -+ -+ u32 gpu_error_irq; -+ u32 gpu_fault_status; ++/** ++ * kbase_clk_rate_trace_manager_init - Initialize GPU clock rate trace manager. ++ * ++ * @kbdev: Device pointer ++ * ++ * Return: 0 if success, or an error code on failure. ++ */ ++int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev); + -+ u32 as_faultstatus[NUM_MMU_AS]; -+ u32 as_command[NUM_MMU_AS]; -+ u64 as_transtab[NUM_MMU_AS]; -+}; ++/** ++ * kbase_clk_rate_trace_manager_term - Terminate GPU clock rate trace manager. ++ * ++ * @kbdev: Device pointer ++ */ ++void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev); + +/** -+ * struct gpu_model_prfcnt_en - Performance counter enable masks -+ * @fe: Enable mask for front-end block -+ * @tiler: Enable mask for tiler block -+ * @l2: Enable mask for L2/Memory system blocks -+ * @shader: Enable mask for shader core blocks ++ * kbase_clk_rate_trace_manager_gpu_active - Inform GPU clock rate trace ++ * manager of GPU becoming active. ++ * ++ * @kbdev: Device pointer + */ -+struct gpu_model_prfcnt_en { -+ u32 fe; -+ u32 tiler; -+ u32 l2; -+ u32 shader; -+}; ++void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev); + -+void midgard_set_error(int job_slot); -+int job_atom_inject_error(struct kbase_error_params *params); -+int gpu_model_control(void *h, -+ struct kbase_model_control_params *params); ++/** ++ * kbase_clk_rate_trace_manager_gpu_idle - Inform GPU clock rate trace ++ * manager of GPU becoming idle. ++ * @kbdev: Device pointer ++ */ ++void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev); + +/** -+ * gpu_model_set_dummy_prfcnt_user_sample() - Set performance counter values -+ * @data: Userspace pointer to array of counter values -+ * @size: Size of counter value array ++ * kbase_clk_rate_trace_manager_subscribe_no_lock() - Add freq change listener. + * -+ * Counter values set by this function will be used for one sample dump only -+ * after which counters will be cleared back to zero. ++ * @clk_rtm: Clock rate manager instance. ++ * @listener: Listener handle + * -+ * Return: 0 on success, else error code. ++ * kbase_clk_rate_trace_manager:lock must be held by the caller. + */ -+int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size); ++static inline void kbase_clk_rate_trace_manager_subscribe_no_lock( ++ struct kbase_clk_rate_trace_manager *clk_rtm, ++ struct kbase_clk_rate_listener *listener) ++{ ++ lockdep_assert_held(&clk_rtm->lock); ++ list_add(&listener->node, &clk_rtm->listeners); ++} + +/** -+ * gpu_model_set_dummy_prfcnt_kernel_sample() - Set performance counter values -+ * @data: Pointer to array of counter values -+ * @size: Size of counter value array ++ * kbase_clk_rate_trace_manager_subscribe() - Add freq change listener. + * -+ * Counter values set by this function will be used for one sample dump only -+ * after which counters will be cleared back to zero. ++ * @clk_rtm: Clock rate manager instance. ++ * @listener: Listener handle + */ -+void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size); -+ -+void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, -+ u64 *l2_present, u64 *shader_present); -+void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, -+ u64 l2_present, u64 shader_present); ++static inline void kbase_clk_rate_trace_manager_subscribe( ++ struct kbase_clk_rate_trace_manager *clk_rtm, ++ struct kbase_clk_rate_listener *listener) ++{ ++ unsigned long flags; + -+/* Clear the counter values array maintained by the dummy model */ -+void gpu_model_clear_prfcnt_values(void); ++ spin_lock_irqsave(&clk_rtm->lock, flags); ++ kbase_clk_rate_trace_manager_subscribe_no_lock( ++ clk_rtm, listener); ++ spin_unlock_irqrestore(&clk_rtm->lock, flags); ++} + -+#if MALI_USE_CSF +/** -+ * gpu_model_prfcnt_dump_request() - Request performance counter sample dump. -+ * @sample_buf: Pointer to KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE sized array -+ * in which to store dumped performance counter values. -+ * @enable_maps: Physical enable maps for performance counter blocks. ++ * kbase_clk_rate_trace_manager_unsubscribe() - Remove freq change listener. ++ * ++ * @clk_rtm: Clock rate manager instance. ++ * @listener: Listener handle + */ -+void gpu_model_prfcnt_dump_request(uint32_t *sample_buf, struct gpu_model_prfcnt_en enable_maps); ++static inline void kbase_clk_rate_trace_manager_unsubscribe( ++ struct kbase_clk_rate_trace_manager *clk_rtm, ++ struct kbase_clk_rate_listener *listener) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&clk_rtm->lock, flags); ++ list_del(&listener->node); ++ spin_unlock_irqrestore(&clk_rtm->lock, flags); ++} + +/** -+ * gpu_model_glb_request_job_irq() - Trigger job interrupt with global request -+ * flag set. -+ * @model: Model pointer returned by midgard_model_create(). ++ * kbase_clk_rate_trace_manager_notify_all() - Notify all clock \ ++ * rate listeners. ++ * ++ * @clk_rtm: Clock rate manager instance. ++ * @clock_index: Clock index. ++ * @new_rate: New clock frequency(Hz) ++ * ++ * kbase_clk_rate_trace_manager:lock must be locked. ++ * This function is exported to be used by clock rate trace test ++ * portal. + */ -+void gpu_model_glb_request_job_irq(void *model); -+#endif /* MALI_USE_CSF */ ++void kbase_clk_rate_trace_manager_notify_all( ++ struct kbase_clk_rate_trace_manager *clk_rtm, ++ u32 clock_index, ++ unsigned long new_rate); + -+extern struct error_status_t hw_error_status; ++#endif /* _KBASE_CLK_RATE_TRACE_MGR_ */ + -+#endif -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c new file mode 100644 -index 000000000..f310cc74c +index 000000000..e121b417f --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c -@@ -0,0 +1,183 @@ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c +@@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2014-2015, 2018-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2015, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -127110,182 +126769,162 @@ index 000000000..f310cc74c + */ + +#include -+#include -+#include "backend/gpu/mali_kbase_model_linux.h" ++#include ++#include "mali_kbase_debug_job_fault.h" + -+static struct kbase_error_atom *error_track_list; ++#if IS_ENABLED(CONFIG_DEBUG_FS) + -+#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM ++/*GPU_CONTROL_REG(r)*/ ++static int gpu_control_reg_snapshot[] = { ++ GPU_ID, ++ SHADER_READY_LO, ++ SHADER_READY_HI, ++ TILER_READY_LO, ++ TILER_READY_HI, ++ L2_READY_LO, ++ L2_READY_HI ++}; + -+/** Kernel 6.1.0 has dropped prandom_u32(), use get_random_u32() */ -+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) -+#define prandom_u32 get_random_u32 -+#endif ++/* JOB_CONTROL_REG(r) */ ++static int job_control_reg_snapshot[] = { ++ JOB_IRQ_MASK, ++ JOB_IRQ_STATUS ++}; + -+/*following error probability are set quite high in order to stress the driver*/ -+static unsigned int error_probability = 50; /* to be set between 0 and 100 */ -+/* probability to have multiple error give that there is an error */ -+static unsigned int multiple_error_probability = 50; ++/* JOB_SLOT_REG(n,r) */ ++static int job_slot_reg_snapshot[] = { ++ JS_HEAD_LO, ++ JS_HEAD_HI, ++ JS_TAIL_LO, ++ JS_TAIL_HI, ++ JS_AFFINITY_LO, ++ JS_AFFINITY_HI, ++ JS_CONFIG, ++ JS_STATUS, ++ JS_HEAD_NEXT_LO, ++ JS_HEAD_NEXT_HI, ++ JS_AFFINITY_NEXT_LO, ++ JS_AFFINITY_NEXT_HI, ++ JS_CONFIG_NEXT ++}; + -+/* all the error conditions supported by the model */ -+#define TOTAL_FAULTS 27 -+/* maximum number of levels in the MMU translation table tree */ -+#define MAX_MMU_TABLE_LEVEL 4 -+/* worst case scenario is <1 MMU fault + 1 job fault + 2 GPU faults> */ -+#define MAX_CONCURRENT_FAULTS 3 ++/*MMU_REG(r)*/ ++static int mmu_reg_snapshot[] = { ++ MMU_IRQ_MASK, ++ MMU_IRQ_STATUS ++}; + -+/** -+ * gpu_generate_error - Generate GPU error -+ */ -+static void gpu_generate_error(void) ++/* MMU_AS_REG(n,r) */ ++static int as_reg_snapshot[] = { ++ AS_TRANSTAB_LO, ++ AS_TRANSTAB_HI, ++ AS_TRANSCFG_LO, ++ AS_TRANSCFG_HI, ++ AS_MEMATTR_LO, ++ AS_MEMATTR_HI, ++ AS_FAULTSTATUS, ++ AS_FAULTADDRESS_LO, ++ AS_FAULTADDRESS_HI, ++ AS_STATUS ++}; ++ ++bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, ++ int reg_range) +{ -+ unsigned int errors_num = 0; ++ int i, j; ++ int offset = 0; ++ int slot_number; ++ int as_number; + -+ /*is there at least one error? */ -+ if ((prandom_u32() % 100) < error_probability) { -+ /* pick up a faulty mmu address space */ -+ hw_error_status.faulty_mmu_as = prandom_u32() % NUM_MMU_AS; -+ /* pick up an mmu table level */ -+ hw_error_status.mmu_table_level = -+ 1 + (prandom_u32() % MAX_MMU_TABLE_LEVEL); -+ hw_error_status.errors_mask = -+ (u32)(1 << (prandom_u32() % TOTAL_FAULTS)); ++ if (kctx->reg_dump == NULL) ++ return false; + -+ /*is there also one or more errors? */ -+ if ((prandom_u32() % 100) < multiple_error_probability) { -+ errors_num = 1 + (prandom_u32() % -+ (MAX_CONCURRENT_FAULTS - 1)); -+ while (errors_num-- > 0) { -+ u32 temp_mask; ++ slot_number = kctx->kbdev->gpu_props.num_job_slots; ++ as_number = kctx->kbdev->gpu_props.num_address_spaces; + -+ temp_mask = (u32)( -+ 1 << (prandom_u32() % TOTAL_FAULTS)); -+ /* below we check that no bit of the same error -+ * type is set again in the error mask -+ */ -+ if ((temp_mask & IS_A_JOB_ERROR) && -+ (hw_error_status.errors_mask & -+ IS_A_JOB_ERROR)) { -+ errors_num++; -+ continue; -+ } -+ if ((temp_mask & IS_A_MMU_ERROR) && -+ (hw_error_status.errors_mask & -+ IS_A_MMU_ERROR)) { -+ errors_num++; -+ continue; -+ } -+ if ((temp_mask & IS_A_GPU_ERROR) && -+ (hw_error_status.errors_mask & -+ IS_A_GPU_ERROR)) { -+ errors_num++; -+ continue; -+ } -+ /* this error mask is already set */ -+ if ((hw_error_status.errors_mask | temp_mask) == -+ hw_error_status.errors_mask) { -+ errors_num++; -+ continue; -+ } -+ hw_error_status.errors_mask |= temp_mask; -+ } -+ } ++ /* get the GPU control registers*/ ++ for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) { ++ kctx->reg_dump[offset] = ++ GPU_CONTROL_REG(gpu_control_reg_snapshot[i]); ++ offset += 2; + } -+} -+#endif + -+int job_atom_inject_error(struct kbase_error_params *params) -+{ -+ struct kbase_error_atom *new_elem; ++ /* get the Job control registers*/ ++ for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) { ++ kctx->reg_dump[offset] = ++ JOB_CONTROL_REG(job_control_reg_snapshot[i]); ++ offset += 2; ++ } + -+ KBASE_DEBUG_ASSERT(params); ++ /* get the Job Slot registers*/ ++ for (j = 0; j < slot_number; j++) { ++ for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) { ++ kctx->reg_dump[offset] = ++ JOB_SLOT_REG(j, job_slot_reg_snapshot[i]); ++ offset += 2; ++ } ++ } + -+ new_elem = kzalloc(sizeof(*new_elem), GFP_KERNEL); ++ /* get the MMU registers*/ ++ for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) { ++ kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]); ++ offset += 2; ++ } + -+ if (!new_elem) { -+ model_error_log(KBASE_CORE, -+ "\njob_atom_inject_error: kzalloc failed for new_elem\n" -+ ); -+ return -ENOMEM; ++ /* get the Address space registers*/ ++ for (j = 0; j < as_number; j++) { ++ for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) { ++ kctx->reg_dump[offset] = ++ MMU_AS_REG(j, as_reg_snapshot[i]); ++ offset += 2; ++ } + } -+ new_elem->params.jc = params->jc; -+ new_elem->params.errors_mask = params->errors_mask; -+ new_elem->params.mmu_table_level = params->mmu_table_level; -+ new_elem->params.faulty_mmu_as = params->faulty_mmu_as; + -+ /*circular list below */ -+ if (error_track_list == NULL) { /*no elements */ -+ error_track_list = new_elem; -+ new_elem->next = error_track_list; -+ } else { -+ struct kbase_error_atom *walker = error_track_list; ++ WARN_ON(offset >= (reg_range*2/4)); + -+ while (walker->next != error_track_list) -+ walker = walker->next; ++ /* set the termination flag*/ ++ kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG; ++ kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG; + -+ new_elem->next = error_track_list; -+ walker->next = new_elem; -+ } -+ return 0; ++ dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n", ++ offset); ++ ++ return true; +} + -+void midgard_set_error(int job_slot) ++bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx) +{ -+#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM -+ gpu_generate_error(); -+#else -+ struct kbase_error_atom *walker, *auxiliar; -+ -+ if (error_track_list != NULL) { -+ walker = error_track_list->next; -+ auxiliar = error_track_list; -+ do { -+ if (walker->params.jc == hw_error_status.current_jc) { -+ /* found a faulty atom matching with the -+ * current one -+ */ -+ hw_error_status.errors_mask = -+ walker->params.errors_mask; -+ hw_error_status.mmu_table_level = -+ walker->params.mmu_table_level; -+ hw_error_status.faulty_mmu_as = -+ walker->params.faulty_mmu_as; -+ hw_error_status.current_job_slot = job_slot; ++ int offset = 0; + -+ if (walker->next == walker) { -+ /* only one element */ -+ kfree(error_track_list); -+ error_track_list = NULL; -+ } else { -+ auxiliar->next = walker->next; -+ if (walker == error_track_list) -+ error_track_list = walker->next; ++ if (kctx->reg_dump == NULL) ++ return false; + -+ kfree(walker); -+ } -+ break; -+ } -+ auxiliar = walker; -+ walker = walker->next; -+ } while (auxiliar->next != error_track_list); ++ while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) { ++ kctx->reg_dump[offset+1] = ++ kbase_reg_read(kctx->kbdev, ++ kctx->reg_dump[offset]); ++ offset += 2; + } -+#endif /* CONFIG_MALI_ERROR_INJECT_RANDOM */ ++ return true; +} -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c ++ ++ ++#endif +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c new file mode 100644 -index 000000000..e90e4df2f +index 000000000..e960f4602 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c -@@ -0,0 +1,244 @@ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c +@@ -0,0 +1,741 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2010, 2012-2015, 2017-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -127296,241 +126935,738 @@ index 000000000..e90e4df2f + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ */ -+ -+/* -+ * Model Linux Framework interfaces. ++ * SPDX-License-Identifier: GPL-2.0 ++ * + */ + +#include -+#include ++#include ++#include ++#include + -+#include "backend/gpu/mali_kbase_model_linux.h" -+#include "device/mali_kbase_device.h" -+#include "mali_kbase_irq_internal.h" ++#include ++#include ++#include ++#include ++#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) ++#include ++#endif + -+#include ++#include ++#include ++#include ++#include "mali_kbase_devfreq.h" + -+struct model_irq_data { -+ struct kbase_device *kbdev; -+ struct work_struct work; ++#include ++#include ++#include ++ ++static struct devfreq_simple_ondemand_data ondemand_data; ++ ++static struct monitor_dev_profile mali_mdevp = { ++ .type = MONITOR_TYPE_DEV, ++ .low_temp_adjust = rockchip_monitor_dev_low_temp_adjust, ++ .high_temp_adjust = rockchip_monitor_dev_high_temp_adjust, ++ .check_rate_volt = rockchip_monitor_check_rate_volt, +}; + -+static void serve_job_irq(struct work_struct *work) ++/** ++ * get_voltage() - Get the voltage value corresponding to the nominal frequency ++ * used by devfreq. ++ * @kbdev: Device pointer ++ * @freq: Nominal frequency in Hz passed by devfreq. ++ * ++ * This function will be called only when the opp table which is compatible with ++ * "operating-points-v2-mali", is not present in the devicetree for GPU device. ++ * ++ * Return: Voltage value in micro volts, 0 in case of error. ++ */ ++static unsigned long get_voltage(struct kbase_device *kbdev, unsigned long freq) +{ -+ struct model_irq_data *data = container_of(work, struct model_irq_data, -+ work); -+ struct kbase_device *kbdev = data->kbdev; ++ struct dev_pm_opp *opp; ++ unsigned long voltage = 0; + -+ /* Make sure no worker is already serving this IRQ */ -+ while (atomic_cmpxchg(&kbdev->serving_job_irq, 1, 0) == 1) { -+ u32 val; ++#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE ++ rcu_read_lock(); ++#endif + -+ while ((val = kbase_reg_read(kbdev, -+ JOB_CONTROL_REG(JOB_IRQ_STATUS)))) { -+ unsigned long flags; ++ opp = dev_pm_opp_find_freq_exact(kbdev->dev, freq, true); + -+ /* Handle the IRQ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+#if MALI_USE_CSF -+ kbase_csf_interrupt(kbdev, val); -+#else -+ kbase_job_done(kbdev, val); ++ if (IS_ERR_OR_NULL(opp)) ++ dev_err(kbdev->dev, "Failed to get opp (%d)\n", PTR_ERR_OR_ZERO(opp)); ++ else { ++ voltage = dev_pm_opp_get_voltage(opp); ++#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE ++ dev_pm_opp_put(opp); +#endif -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } + } + -+ kmem_cache_free(kbdev->irq_slab, data); ++#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE ++ rcu_read_unlock(); ++#endif ++ ++ /* Return the voltage in micro volts */ ++ return voltage; +} + -+static void serve_gpu_irq(struct work_struct *work) ++void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, ++ u64 *core_mask, unsigned long *freqs, unsigned long *volts) +{ -+ struct model_irq_data *data = container_of(work, struct model_irq_data, -+ work); -+ struct kbase_device *kbdev = data->kbdev; ++ unsigned int i; + -+ /* Make sure no worker is already serving this IRQ */ -+ while (atomic_cmpxchg(&kbdev->serving_gpu_irq, 1, 0) == 1) { -+ u32 val; ++ for (i = 0; i < kbdev->num_opps; i++) { ++ if (kbdev->devfreq_table[i].opp_freq == freq) { ++ unsigned int j; + -+ while ((val = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_IRQ_STATUS)))) { -+ /* Handle the IRQ */ -+ kbase_gpu_interrupt(kbdev, val); ++ *core_mask = kbdev->devfreq_table[i].core_mask; ++ for (j = 0; j < kbdev->nr_clocks; j++) { ++ freqs[j] = ++ kbdev->devfreq_table[i].real_freqs[j]; ++ volts[j] = ++ kbdev->devfreq_table[i].opp_volts[j]; ++ } ++ ++ break; + } + } + -+ kmem_cache_free(kbdev->irq_slab, data); -+} -+ -+static void serve_mmu_irq(struct work_struct *work) -+{ -+ struct model_irq_data *data = container_of(work, struct model_irq_data, -+ work); -+ struct kbase_device *kbdev = data->kbdev; ++ /* If failed to find OPP, return all cores enabled ++ * and nominal frequency and the corresponding voltage. ++ */ ++ if (i == kbdev->num_opps) { ++ unsigned long voltage = get_voltage(kbdev, freq); + -+ /* Make sure no worker is already serving this IRQ */ -+ if (atomic_cmpxchg(&kbdev->serving_mmu_irq, 1, 0) == 1) { -+ u32 val; ++ *core_mask = kbdev->gpu_props.props.raw_props.shader_present; + -+ while ((val = kbase_reg_read(kbdev, -+ MMU_REG(MMU_IRQ_STATUS)))) { -+ /* Handle the IRQ */ -+ kbase_mmu_interrupt(kbdev, val); ++ for (i = 0; i < kbdev->nr_clocks; i++) { ++ freqs[i] = freq; ++ volts[i] = voltage; + } + } -+ -+ kmem_cache_free(kbdev->irq_slab, data); +} + -+void gpu_device_raise_irq(void *model, u32 irq) ++static int kbase_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) +{ -+ struct model_irq_data *data; -+ struct kbase_device *kbdev = gpu_device_get_data(model); ++ struct kbase_device *kbdev = dev_get_drvdata(dev); ++ struct rockchip_opp_info *opp_info = &kbdev->opp_info; ++ struct dev_pm_opp *opp; ++ int ret = 0; + -+ KBASE_DEBUG_ASSERT(kbdev); ++ if (!opp_info->is_rate_volt_checked) ++ return -EINVAL; + -+ data = kmem_cache_alloc(kbdev->irq_slab, GFP_ATOMIC); -+ if (data == NULL) -+ return; ++ opp = devfreq_recommended_opp(dev, freq, flags); ++ if (IS_ERR(opp)) ++ return PTR_ERR(opp); ++ dev_pm_opp_put(opp); + -+ data->kbdev = kbdev; ++ if (*freq == kbdev->current_nominal_freq) ++ return 0; + -+ switch (irq) { -+ case MODEL_LINUX_JOB_IRQ: -+ INIT_WORK(&data->work, serve_job_irq); -+ atomic_set(&kbdev->serving_job_irq, 1); -+ break; -+ case MODEL_LINUX_GPU_IRQ: -+ INIT_WORK(&data->work, serve_gpu_irq); -+ atomic_set(&kbdev->serving_gpu_irq, 1); -+ break; -+ case MODEL_LINUX_MMU_IRQ: -+ INIT_WORK(&data->work, serve_mmu_irq); -+ atomic_set(&kbdev->serving_mmu_irq, 1); -+ break; -+ default: -+ dev_warn(kbdev->dev, "Unknown IRQ"); -+ kmem_cache_free(kbdev->irq_slab, data); -+ data = NULL; -+ break; ++ rockchip_opp_dvfs_lock(opp_info); ++ if (pm_runtime_active(dev)) ++ opp_info->is_runtime_active = true; ++ else ++ opp_info->is_runtime_active = false; ++ ret = dev_pm_opp_set_rate(dev, *freq); ++ if (!ret) { ++ kbdev->current_nominal_freq = *freq; ++ KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(kbdev, (u64)*freq); + } ++ rockchip_opp_dvfs_unlock(opp_info); + -+ if (data != NULL) -+ queue_work(kbdev->irq_workq, &data->work); ++ return ret; +} + -+void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) ++void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq) +{ -+ unsigned long flags; ++ unsigned long target_freq = freq; + -+ spin_lock_irqsave(&kbdev->reg_op_lock, flags); -+ midgard_model_write_reg(kbdev->model, offset, value); -+ spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); ++ kbase_devfreq_target(kbdev->dev, &target_freq, 0); +} + -+KBASE_EXPORT_TEST_API(kbase_reg_write); ++static int ++kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq) ++{ ++ struct kbase_device *kbdev = dev_get_drvdata(dev); + -+u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) ++ *freq = kbdev->current_nominal_freq; ++ ++ return 0; ++} ++ ++static int ++kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) +{ -+ unsigned long flags; -+ u32 val; ++ struct kbase_device *kbdev = dev_get_drvdata(dev); ++ struct kbasep_pm_metrics diff; + -+ spin_lock_irqsave(&kbdev->reg_op_lock, flags); -+ midgard_model_read_reg(kbdev->model, offset, &val); -+ spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); ++ kbase_pm_get_dvfs_metrics(kbdev, &kbdev->last_devfreq_metrics, &diff); + -+ return val; ++ stat->busy_time = diff.time_busy; ++ stat->total_time = diff.time_busy + diff.time_idle; ++ stat->current_frequency = kbdev->current_nominal_freq; ++ stat->private_data = NULL; ++ ++#if MALI_USE_CSF && defined CONFIG_DEVFREQ_THERMAL ++ if (!kbdev->devfreq_profile.is_cooling_device) ++ kbase_ipa_reset_data(kbdev); ++#endif ++ ++ return 0; +} -+KBASE_EXPORT_TEST_API(kbase_reg_read); + -+int kbase_install_interrupts(struct kbase_device *kbdev) ++static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, ++ struct devfreq_dev_profile *dp) +{ -+ KBASE_DEBUG_ASSERT(kbdev); ++ int count; ++ int i = 0; ++ unsigned long freq; ++ struct dev_pm_opp *opp; + -+ atomic_set(&kbdev->serving_job_irq, 0); -+ atomic_set(&kbdev->serving_gpu_irq, 0); -+ atomic_set(&kbdev->serving_mmu_irq, 0); ++#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE ++ rcu_read_lock(); ++#endif ++ count = dev_pm_opp_get_opp_count(kbdev->dev); ++#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE ++ rcu_read_unlock(); ++#endif ++ if (count < 0) ++ return count; + -+ kbdev->irq_workq = alloc_ordered_workqueue("dummy irq queue", 0); -+ if (kbdev->irq_workq == NULL) ++ dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]), ++ GFP_KERNEL); ++ if (!dp->freq_table) + return -ENOMEM; + -+ kbdev->irq_slab = kmem_cache_create("dummy_irq_slab", -+ sizeof(struct model_irq_data), 0, 0, NULL); -+ if (kbdev->irq_slab == NULL) { -+ destroy_workqueue(kbdev->irq_workq); -+ return -ENOMEM; ++#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE ++ rcu_read_lock(); ++#endif ++ for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) { ++ opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq); ++ if (IS_ERR(opp)) ++ break; ++#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE ++ dev_pm_opp_put(opp); ++#endif /* KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE */ ++ ++ dp->freq_table[i] = freq; ++ } ++#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE ++ rcu_read_unlock(); ++#endif ++ ++ if (count != i) ++ dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n", ++ count, i); ++ ++ dp->max_state = i; ++ ++ ++ /* Have the lowest clock as suspend clock. ++ * It may be overridden by 'opp-mali-errata-1485982'. ++ */ ++ if (kbdev->pm.backend.gpu_clock_slow_down_wa) { ++ freq = 0; ++ opp = dev_pm_opp_find_freq_ceil(kbdev->dev, &freq); ++ if (IS_ERR(opp)) { ++ dev_err(kbdev->dev, "failed to find slowest clock"); ++ return 0; ++ } ++ dev_pm_opp_put(opp); ++ dev_info(kbdev->dev, "suspend clock %lu from slowest", freq); ++ kbdev->pm.backend.gpu_clock_suspend_freq = freq; + } + + return 0; +} + -+void kbase_release_interrupts(struct kbase_device *kbdev) ++static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev) +{ -+ KBASE_DEBUG_ASSERT(kbdev); -+ destroy_workqueue(kbdev->irq_workq); -+ kmem_cache_destroy(kbdev->irq_slab); ++ struct devfreq_dev_profile *dp = &kbdev->devfreq_profile; ++ ++ kfree(dp->freq_table); ++ dp->freq_table = NULL; +} + -+void kbase_synchronize_irqs(struct kbase_device *kbdev) ++static void kbase_devfreq_term_core_mask_table(struct kbase_device *kbdev) +{ -+ KBASE_DEBUG_ASSERT(kbdev); -+ flush_workqueue(kbdev->irq_workq); ++ kfree(kbdev->devfreq_table); ++ kbdev->devfreq_table = NULL; +} + -+KBASE_EXPORT_TEST_API(kbase_synchronize_irqs); ++static void kbase_devfreq_exit(struct device *dev) ++{ ++ struct kbase_device *kbdev = dev_get_drvdata(dev); + -+int kbase_set_custom_irq_handler(struct kbase_device *kbdev, -+ irq_handler_t custom_handler, -+ int irq_type) ++ if (kbdev) ++ kbase_devfreq_term_freq_table(kbdev); ++} ++ ++static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev, ++ struct device_node *node) ++{ ++ u64 freq = 0; ++ int err = 0; ++ ++ /* Check if this node is the opp entry having 'opp-mali-errata-1485982' ++ * to get the suspend clock, otherwise skip it. ++ */ ++ if (!of_property_read_bool(node, "opp-mali-errata-1485982")) ++ return; ++ ++ /* In kbase DevFreq, the clock will be read from 'opp-hz' ++ * and translated into the actual clock by opp_translate. ++ * ++ * In customer DVFS, the clock will be read from 'opp-hz-real' ++ * for clk driver. If 'opp-hz-real' does not exist, ++ * read from 'opp-hz'. ++ */ ++ if (IS_ENABLED(CONFIG_MALI_BIFROST_DEVFREQ)) ++ err = of_property_read_u64(node, "opp-hz", &freq); ++ else { ++ if (of_property_read_u64(node, "opp-hz-real", &freq)) ++ err = of_property_read_u64(node, "opp-hz", &freq); ++ } ++ ++ if (WARN_ON(err || !freq)) ++ return; ++ ++ kbdev->pm.backend.gpu_clock_suspend_freq = freq; ++ dev_info(kbdev->dev, ++ "suspend clock %llu by opp-mali-errata-1485982", freq); ++} ++ ++static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) +{ ++#ifndef CONFIG_OF ++ /* OPP table initialization requires at least the capability to get ++ * regulators and clocks from the device tree, as well as parsing ++ * arrays of unsigned integer values. ++ * ++ * The whole initialization process shall simply be skipped if the ++ * minimum capability is not available. ++ */ ++ return 0; ++#else ++ struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node, ++ "operating-points-v2", 0); ++ struct device_node *node; ++ int i = 0; ++ int count; ++ u64 shader_present = kbdev->gpu_props.props.raw_props.shader_present; ++ ++ if (!opp_node) ++ return 0; ++ if (!of_device_is_compatible(opp_node, "operating-points-v2-mali")) ++ return 0; ++ ++ count = dev_pm_opp_get_opp_count(kbdev->dev); ++ kbdev->devfreq_table = kmalloc_array(count, ++ sizeof(struct kbase_devfreq_opp), GFP_KERNEL); ++ if (!kbdev->devfreq_table) ++ return -ENOMEM; ++ ++ for_each_available_child_of_node(opp_node, node) { ++ const void *core_count_p; ++ u64 core_mask, opp_freq, ++ real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ int err; ++#if IS_ENABLED(CONFIG_REGULATOR) ++ u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS]; ++#endif ++ ++ /* Read suspend clock from opp table */ ++ if (kbdev->pm.backend.gpu_clock_slow_down_wa) ++ kbasep_devfreq_read_suspend_clock(kbdev, node); ++ ++ err = of_property_read_u64(node, "opp-hz", &opp_freq); ++ if (err) { ++ dev_warn(kbdev->dev, "Failed to read opp-hz property with error %d\n", ++ err); ++ continue; ++ } ++ ++ ++#if BASE_MAX_NR_CLOCKS_REGULATORS > 1 ++ err = of_property_read_u64_array(node, "opp-hz-real", ++ real_freqs, kbdev->nr_clocks); ++#else ++ WARN_ON(kbdev->nr_clocks != 1); ++ err = of_property_read_u64(node, "opp-hz-real", real_freqs); ++#endif ++ if (err < 0) { ++ dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d\n", ++ err); ++ continue; ++ } ++#if IS_ENABLED(CONFIG_REGULATOR) ++ err = of_property_read_u32_array(node, ++ "opp-microvolt", opp_volts, kbdev->nr_regulators); ++ if (err < 0) { ++ dev_warn(kbdev->dev, "Failed to read opp-microvolt property with error %d\n", ++ err); ++ continue; ++ } ++#endif ++ ++ if (of_property_read_u64(node, "opp-core-mask", &core_mask)) ++ core_mask = shader_present; ++ if (core_mask != shader_present && corestack_driver_control) { ++ ++ dev_warn(kbdev->dev, "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n", ++ opp_freq); ++ continue; ++ } ++ ++ core_count_p = of_get_property(node, "opp-core-count", NULL); ++ if (core_count_p) { ++ u64 remaining_core_mask = ++ kbdev->gpu_props.props.raw_props.shader_present; ++ int core_count = be32_to_cpup(core_count_p); ++ ++ core_mask = 0; ++ ++ for (; core_count > 0; core_count--) { ++ int core = ffs(remaining_core_mask); ++ ++ if (!core) { ++ dev_err(kbdev->dev, "OPP has more cores than GPU\n"); ++ return -ENODEV; ++ } ++ ++ core_mask |= (1ull << (core-1)); ++ remaining_core_mask &= ~(1ull << (core-1)); ++ } ++ } ++ ++ if (!core_mask) { ++ dev_err(kbdev->dev, "OPP has invalid core mask of 0\n"); ++ return -ENODEV; ++ } ++ ++ kbdev->devfreq_table[i].opp_freq = opp_freq; ++ kbdev->devfreq_table[i].core_mask = core_mask; ++ if (kbdev->nr_clocks > 0) { ++ int j; ++ ++ for (j = 0; j < kbdev->nr_clocks; j++) ++ kbdev->devfreq_table[i].real_freqs[j] = ++ real_freqs[j]; ++ } ++#if IS_ENABLED(CONFIG_REGULATOR) ++ if (kbdev->nr_regulators > 0) { ++ int j; ++ ++ for (j = 0; j < kbdev->nr_regulators; j++) ++ kbdev->devfreq_table[i].opp_volts[j] = ++ opp_volts[j]; ++ } ++#endif ++ ++ dev_info(kbdev->dev, "OPP %d : opp_freq=%llu core_mask=%llx\n", ++ i, opp_freq, core_mask); ++ ++ i++; ++ } ++ ++ kbdev->num_opps = i; ++ + return 0; ++#endif /* CONFIG_OF */ +} + -+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler); ++static const char *kbase_devfreq_req_type_name(enum kbase_devfreq_work_type type) ++{ ++ const char *p; + -+irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val) ++ switch (type) { ++ case DEVFREQ_WORK_NONE: ++ p = "devfreq_none"; ++ break; ++ case DEVFREQ_WORK_SUSPEND: ++ p = "devfreq_suspend"; ++ break; ++ case DEVFREQ_WORK_RESUME: ++ p = "devfreq_resume"; ++ break; ++ default: ++ p = "Unknown devfreq_type"; ++ } ++ return p; ++} ++ ++static void kbase_devfreq_suspend_resume_worker(struct work_struct *work) +{ -+ if (!val) -+ return IRQ_NONE; ++ struct kbase_devfreq_queue_info *info = container_of(work, ++ struct kbase_devfreq_queue_info, work); ++ struct kbase_device *kbdev = container_of(info, struct kbase_device, ++ devfreq_queue); ++ unsigned long flags; ++ enum kbase_devfreq_work_type type, acted_type; + -+ return IRQ_HANDLED; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ type = kbdev->devfreq_queue.req_type; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ acted_type = kbdev->devfreq_queue.acted_type; ++ dev_dbg(kbdev->dev, "Worker handles queued req: %s (acted: %s)\n", ++ kbase_devfreq_req_type_name(type), ++ kbase_devfreq_req_type_name(acted_type)); ++ switch (type) { ++ case DEVFREQ_WORK_SUSPEND: ++ case DEVFREQ_WORK_RESUME: ++ if (type != acted_type) { ++ if (type == DEVFREQ_WORK_RESUME) ++ devfreq_resume_device(kbdev->devfreq); ++ else ++ devfreq_suspend_device(kbdev->devfreq); ++ dev_dbg(kbdev->dev, "Devfreq transition occured: %s => %s\n", ++ kbase_devfreq_req_type_name(acted_type), ++ kbase_devfreq_req_type_name(type)); ++ kbdev->devfreq_queue.acted_type = type; ++ } ++ break; ++ default: ++ WARN_ON(1); ++ } +} + -+KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler); ++void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, ++ enum kbase_devfreq_work_type work_type) ++{ ++ unsigned long flags; + -+int kbase_gpu_device_create(struct kbase_device *kbdev) ++ WARN_ON(work_type == DEVFREQ_WORK_NONE); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ /* Skip enqueuing a work if workqueue has already been terminated. */ ++ if (likely(kbdev->devfreq_queue.workq)) { ++ kbdev->devfreq_queue.req_type = work_type; ++ queue_work(kbdev->devfreq_queue.workq, ++ &kbdev->devfreq_queue.work); ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ dev_dbg(kbdev->dev, "Enqueuing devfreq req: %s\n", ++ kbase_devfreq_req_type_name(work_type)); ++} ++ ++static int kbase_devfreq_work_init(struct kbase_device *kbdev) +{ -+ kbdev->model = midgard_model_create(kbdev); -+ if (kbdev->model == NULL) ++ kbdev->devfreq_queue.req_type = DEVFREQ_WORK_NONE; ++ kbdev->devfreq_queue.acted_type = DEVFREQ_WORK_RESUME; ++ ++ kbdev->devfreq_queue.workq = alloc_ordered_workqueue("devfreq_workq", 0); ++ if (!kbdev->devfreq_queue.workq) + return -ENOMEM; + -+ spin_lock_init(&kbdev->reg_op_lock); ++ INIT_WORK(&kbdev->devfreq_queue.work, ++ kbase_devfreq_suspend_resume_worker); ++ return 0; ++} ++ ++static void kbase_devfreq_work_term(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ struct workqueue_struct *workq; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ workq = kbdev->devfreq_queue.workq; ++ kbdev->devfreq_queue.workq = NULL; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ destroy_workqueue(workq); ++} ++ ++int kbase_devfreq_init(struct kbase_device *kbdev) ++{ ++ struct device_node *np = kbdev->dev->of_node; ++ struct devfreq_dev_profile *dp; ++ int err; ++ struct dev_pm_opp *opp; ++ unsigned int dyn_power_coeff = 0; ++ unsigned int i; ++ bool free_devfreq_freq_table = true; ++ ++ if (kbdev->nr_clocks == 0) { ++ dev_err(kbdev->dev, "Clock not available for devfreq\n"); ++ return -ENODEV; ++ } ++ ++ for (i = 0; i < kbdev->nr_clocks; i++) { ++ if (kbdev->clocks[i]) ++ kbdev->current_freqs[i] = ++ clk_get_rate(kbdev->clocks[i]); ++ else ++ kbdev->current_freqs[i] = 0; ++ } ++ kbdev->current_nominal_freq = kbdev->current_freqs[0]; ++ ++ opp = devfreq_recommended_opp(kbdev->dev, &kbdev->current_nominal_freq, 0); ++ if (IS_ERR(opp)) ++ return PTR_ERR(opp); ++ dev_pm_opp_put(opp); ++ ++ dp = &kbdev->devfreq_profile; ++ ++ dp->initial_freq = kbdev->current_nominal_freq; ++ dp->polling_ms = 100; ++ dp->target = kbase_devfreq_target; ++ dp->get_dev_status = kbase_devfreq_status; ++ dp->get_cur_freq = kbase_devfreq_cur_freq; ++ dp->exit = kbase_devfreq_exit; ++ ++ if (kbase_devfreq_init_freq_table(kbdev, dp)) ++ return -EFAULT; ++ ++ if (dp->max_state > 0) { ++ /* Record the maximum frequency possible */ ++ kbdev->gpu_props.props.core_props.gpu_freq_khz_max = ++ dp->freq_table[0] / 1000; ++ }; ++ ++#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) ++ of_property_read_u32(kbdev->dev->of_node, "dynamic-power-coefficient", ++ &dyn_power_coeff); ++ if (dyn_power_coeff) ++ dp->is_cooling_device = true; ++#endif ++ ++ err = kbase_devfreq_init_core_mask_table(kbdev); ++ if (err) ++ goto init_core_mask_table_failed; ++ ++ of_property_read_u32(np, "upthreshold", ++ &ondemand_data.upthreshold); ++ of_property_read_u32(np, "downdifferential", ++ &ondemand_data.downdifferential); ++ kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, ++ "simple_ondemand", NULL); ++ if (IS_ERR(kbdev->devfreq)) { ++ err = PTR_ERR(kbdev->devfreq); ++ kbdev->devfreq = NULL; ++ dev_err(kbdev->dev, "Fail to add devfreq device(%d)", err); ++ goto devfreq_add_dev_failed; ++ } ++ ++ /* Explicit free of freq table isn't needed after devfreq_add_device() */ ++ free_devfreq_freq_table = false; ++ ++ /* Initialize devfreq suspend/resume workqueue */ ++ err = kbase_devfreq_work_init(kbdev); ++ if (err) { ++ dev_err(kbdev->dev, "Fail to init devfreq workqueue"); ++ goto devfreq_work_init_failed; ++ } ++ ++ /* devfreq_add_device only copies a few of kbdev->dev's fields, so ++ * set drvdata explicitly so IPA models can access kbdev. ++ */ ++ dev_set_drvdata(&kbdev->devfreq->dev, kbdev); ++ ++ err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq); ++ if (err) { ++ dev_err(kbdev->dev, ++ "Failed to register OPP notifier (%d)", err); ++ goto opp_notifier_failed; ++ } ++ ++ mali_mdevp.data = kbdev->devfreq; ++ mali_mdevp.opp_info = &kbdev->opp_info; ++ kbdev->mdev_info = rockchip_system_monitor_register(kbdev->dev, ++ &mali_mdevp); ++ if (IS_ERR(kbdev->mdev_info)) { ++ dev_dbg(kbdev->dev, "without system monitor\n"); ++ kbdev->mdev_info = NULL; ++ } ++ kbdev->opp_info.is_rate_volt_checked = true; ++#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) ++ if (!dp->is_cooling_device) { ++ err = kbase_ipa_init(kbdev); ++ if (err) { ++ dev_err(kbdev->dev, "IPA initialization failed\n"); ++ goto ipa_init_failed; ++ } ++ ++ kbdev->devfreq_cooling = devfreq_cooling_em_register( ++ kbdev->devfreq, ++ &kbase_ipa_power_model_ops); ++ if (IS_ERR(kbdev->devfreq_cooling)) { ++ err = PTR_ERR(kbdev->devfreq_cooling); ++ dev_err(kbdev->dev, ++ "Failed to register cooling device (%d)\n", ++ err); ++ goto cooling_reg_failed; ++ } ++ } ++#endif + + return 0; ++ ++#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) ++cooling_reg_failed: ++ kbase_ipa_term(kbdev); ++ipa_init_failed: ++ devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); ++#endif /* CONFIG_DEVFREQ_THERMAL */ ++ ++opp_notifier_failed: ++ kbase_devfreq_work_term(kbdev); ++ ++devfreq_work_init_failed: ++ if (devfreq_remove_device(kbdev->devfreq)) ++ dev_err(kbdev->dev, "Failed to terminate devfreq (%d)", err); ++ ++ kbdev->devfreq = NULL; ++ ++devfreq_add_dev_failed: ++ kbase_devfreq_term_core_mask_table(kbdev); ++ ++init_core_mask_table_failed: ++ if (free_devfreq_freq_table) ++ kbase_devfreq_term_freq_table(kbdev); ++ ++ return err; +} + -+/** -+ * kbase_gpu_device_destroy - Destroy GPU device -+ * -+ * @kbdev: kbase device -+ */ -+void kbase_gpu_device_destroy(struct kbase_device *kbdev) ++void kbase_devfreq_term(struct kbase_device *kbdev) +{ -+ midgard_model_destroy(kbdev->model); ++ int err; ++ ++ dev_dbg(kbdev->dev, "Term Mali devfreq\n"); ++ ++#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) ++ if (kbdev->devfreq_cooling) ++ devfreq_cooling_unregister(kbdev->devfreq_cooling); ++#endif ++ ++ devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); ++ ++ kbase_devfreq_work_term(kbdev); ++ ++ err = devfreq_remove_device(kbdev->devfreq); ++ if (err) ++ dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); ++ else ++ kbdev->devfreq = NULL; ++ ++ kbase_devfreq_term_core_mask_table(kbdev); ++ ++#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) ++ if (!kbdev->model_data) ++ kbase_ipa_term(kbdev); ++ kfree(kbdev->model_data); ++#endif +} -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.h new file mode 100644 -index 000000000..8f09afe3d +index 000000000..ac88b025a --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h -@@ -0,0 +1,151 @@ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.h +@@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -127548,146 +127684,57 @@ index 000000000..8f09afe3d + * + */ + -+/* -+ * Model Linux Framework interfaces. -+ * -+ * This framework is used to provide generic Kbase Models interfaces. -+ * Note: Backends cannot be used together; the selection is done at build time. -+ * -+ * - Without Model Linux Framework: -+ * +-----------------------------+ -+ * | Kbase read/write/IRQ | -+ * +-----------------------------+ -+ * | HW interface definitions | -+ * +-----------------------------+ -+ * -+ * - With Model Linux Framework: -+ * +-----------------------------+ -+ * | Kbase read/write/IRQ | -+ * +-----------------------------+ -+ * | Model Linux Framework | -+ * +-----------------------------+ -+ * | Model interface definitions | -+ * +-----------------------------+ -+ */ -+ -+#ifndef _KBASE_MODEL_LINUX_H_ -+#define _KBASE_MODEL_LINUX_H_ -+ -+/* -+ * Include Model definitions -+ */ -+ -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+#include -+#endif /* IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ -+ -+#if !IS_ENABLED(CONFIG_MALI_REAL_HW) -+/** -+ * kbase_gpu_device_create() - Generic create function. -+ * -+ * @kbdev: Kbase device. -+ * -+ * Specific model hook is implemented by midgard_model_create() -+ * -+ * Return: 0 on success, error code otherwise. -+ */ -+int kbase_gpu_device_create(struct kbase_device *kbdev); -+ -+/** -+ * kbase_gpu_device_destroy() - Generic create function. -+ * -+ * @kbdev: Kbase device. -+ * -+ * Specific model hook is implemented by midgard_model_destroy() -+ */ -+void kbase_gpu_device_destroy(struct kbase_device *kbdev); -+ -+/** -+ * midgard_model_create() - Private create function. -+ * -+ * @kbdev: Kbase device. -+ * -+ * This hook is specific to the model built in Kbase. -+ * -+ * Return: Model handle. -+ */ -+void *midgard_model_create(struct kbase_device *kbdev); -+ -+/** -+ * midgard_model_destroy() - Private destroy function. -+ * -+ * @h: Model handle. -+ * -+ * This hook is specific to the model built in Kbase. -+ */ -+void midgard_model_destroy(void *h); ++#ifndef _BASE_DEVFREQ_H_ ++#define _BASE_DEVFREQ_H_ + -+/** -+ * midgard_model_write_reg() - Private model write function. -+ * -+ * @h: Model handle. -+ * @addr: Address at which to write. -+ * @value: value to write. -+ * -+ * This hook is specific to the model built in Kbase. -+ */ -+void midgard_model_write_reg(void *h, u32 addr, u32 value); ++int kbase_devfreq_init(struct kbase_device *kbdev); + -+/** -+ * midgard_model_read_reg() - Private model read function. -+ * -+ * @h: Model handle. -+ * @addr: Address from which to read. -+ * @value: Pointer where to store the read value. -+ * -+ * This hook is specific to the model built in Kbase. -+ */ -+void midgard_model_read_reg(void *h, u32 addr, u32 *const value); ++void kbase_devfreq_term(struct kbase_device *kbdev); + +/** -+ * gpu_device_raise_irq() - Private IRQ raise function. -+ * -+ * @model: Model handle. -+ * @irq: IRQ type to raise. -+ * -+ * This hook is global to the model Linux framework. ++ * kbase_devfreq_force_freq - Set GPU frequency on L2 power on/off. ++ * @kbdev: Device pointer ++ * @freq: GPU frequency in HZ to be set when ++ * MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE is enabled + */ -+void gpu_device_raise_irq(void *model, u32 irq); ++void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq); + +/** -+ * gpu_device_set_data() - Private model set data function. -+ * -+ * @model: Model handle. -+ * @data: Data carried by model. -+ * -+ * This hook is global to the model Linux framework. ++ * kbase_devfreq_enqueue_work - Enqueue a work item for suspend/resume devfreq. ++ * @kbdev: Device pointer ++ * @work_type: The type of the devfreq work item, i.e. suspend or resume + */ -+void gpu_device_set_data(void *model, void *data); ++void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, ++ enum kbase_devfreq_work_type work_type); + +/** -+ * gpu_device_get_data() - Private model get data function. -+ * -+ * @model: Model handle. -+ * -+ * This hook is global to the model Linux framework. ++ * kbase_devfreq_opp_translate - Translate nominal OPP frequency from devicetree ++ * into real frequency & voltage pair, along with ++ * core mask ++ * @kbdev: Device pointer ++ * @freq: Nominal frequency ++ * @core_mask: Pointer to u64 to store core mask to ++ * @freqs: Pointer to array of frequencies ++ * @volts: Pointer to array of voltages + * -+ * Return: Pointer to the data carried by model. ++ * This function will only perform translation if an operating-points-v2-mali ++ * table is present in devicetree. If one is not present then it will return an ++ * untranslated frequency (and corresponding voltage) and all cores enabled. ++ * The voltages returned are in micro Volts (uV). + */ -+void *gpu_device_get_data(void *model); -+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ -+ -+#endif /* _KBASE_MODEL_LINUX_H_ */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.c ++void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, ++ u64 *core_mask, unsigned long *freqs, unsigned long *volts); ++#endif /* _BASE_DEVFREQ_H_ */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c new file mode 100644 -index 000000000..bbf629065 +index 000000000..10e92ec94 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.c -@@ -0,0 +1,73 @@ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c +@@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2010-2015, 2018-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -127706,151 +127753,194 @@ index 000000000..bbf629065 + */ + +/* -+ * "Always on" power management policy ++ * Base kernel property query backend APIs + */ + +#include -+#include ++#include ++#include ++#include ++#include + -+static bool always_on_shaders_needed(struct kbase_device *kbdev) ++int kbase_backend_gpuprops_get(struct kbase_device *kbdev, ++ struct kbase_gpuprops_regdump *regdump) +{ -+ return true; -+} ++ int i; ++ struct kbase_gpuprops_regdump registers = { 0 }; + -+static bool always_on_get_core_active(struct kbase_device *kbdev) -+{ -+ return true; ++ /* Fill regdump with the content of the relevant registers */ ++ registers.gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); ++ ++ registers.l2_features = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(L2_FEATURES)); ++ ++ registers.tiler_features = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(TILER_FEATURES)); ++ registers.mem_features = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(MEM_FEATURES)); ++ registers.mmu_features = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(MMU_FEATURES)); ++ registers.as_present = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(AS_PRESENT)); ++#if !MALI_USE_CSF ++ registers.js_present = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(JS_PRESENT)); ++#else /* !MALI_USE_CSF */ ++ registers.js_present = 0; ++#endif /* !MALI_USE_CSF */ ++ ++ for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) ++#if !MALI_USE_CSF ++ registers.js_features[i] = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(JS_FEATURES_REG(i))); ++#else /* !MALI_USE_CSF */ ++ registers.js_features[i] = 0; ++#endif /* !MALI_USE_CSF */ ++ ++ for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) ++ registers.texture_features[i] = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i))); ++ ++ registers.thread_max_threads = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(THREAD_MAX_THREADS)); ++ registers.thread_max_workgroup_size = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE)); ++ registers.thread_max_barrier_size = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE)); ++ registers.thread_features = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(THREAD_FEATURES)); ++ registers.thread_tls_alloc = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(THREAD_TLS_ALLOC)); ++ ++ registers.shader_present_lo = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(SHADER_PRESENT_LO)); ++ registers.shader_present_hi = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(SHADER_PRESENT_HI)); ++ ++ registers.tiler_present_lo = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(TILER_PRESENT_LO)); ++ registers.tiler_present_hi = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(TILER_PRESENT_HI)); ++ ++ registers.l2_present_lo = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(L2_PRESENT_LO)); ++ registers.l2_present_hi = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(L2_PRESENT_HI)); ++ ++ registers.stack_present_lo = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(STACK_PRESENT_LO)); ++ registers.stack_present_hi = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(STACK_PRESENT_HI)); ++ ++ if (registers.gpu_id >= GPU_ID2_PRODUCT_MAKE(11, 8, 5, 2)) { ++ registers.gpu_features_lo = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FEATURES_LO)); ++ registers.gpu_features_hi = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FEATURES_HI)); ++ } else { ++ registers.gpu_features_lo = 0; ++ registers.gpu_features_hi = 0; ++ } ++ ++ if (!kbase_is_gpu_removed(kbdev)) { ++ *regdump = registers; ++ return 0; ++ } else ++ return -EIO; +} + -+static void always_on_init(struct kbase_device *kbdev) ++int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev, ++ struct kbase_current_config_regdump *curr_config_regdump) +{ -+ CSTD_UNUSED(kbdev); ++ if (WARN_ON(!kbdev) || WARN_ON(!curr_config_regdump)) ++ return -EINVAL; ++ ++ curr_config_regdump->mem_features = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(MEM_FEATURES)); ++ ++ curr_config_regdump->shader_present_lo = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(SHADER_PRESENT_LO)); ++ curr_config_regdump->shader_present_hi = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(SHADER_PRESENT_HI)); ++ ++ curr_config_regdump->l2_present_lo = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(L2_PRESENT_LO)); ++ curr_config_regdump->l2_present_hi = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(L2_PRESENT_HI)); ++ ++ if (kbase_is_gpu_removed(kbdev)) ++ return -EIO; ++ ++ return 0; ++ +} + -+/** -+ * always_on_term - Term callback function for always-on power policy -+ * -+ * @kbdev: kbase device -+ */ -+static void always_on_term(struct kbase_device *kbdev) ++int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, ++ struct kbase_gpuprops_regdump *regdump) +{ -+ CSTD_UNUSED(kbdev); -+} ++ u32 coherency_features; ++ int error = 0; + -+/* -+ * The struct kbase_pm_policy structure for the demand power policy. -+ * -+ * This is the static structure that defines the demand power policy's callback -+ * and name. -+ */ -+const struct kbase_pm_policy kbase_pm_always_on_policy_ops = { -+ "always_on", /* name */ -+ always_on_init, /* init */ -+ always_on_term, /* term */ -+ always_on_shaders_needed, /* shaders_needed */ -+ always_on_get_core_active, /* get_core_active */ -+ NULL, /* handle_event */ -+ KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */ -+#if MALI_USE_CSF -+ ALWAYS_ON_PM_SCHED_FLAGS, /* pm_sched_flags */ -+#endif -+}; ++ /* Ensure we can access the GPU registers */ ++ kbase_pm_register_access_enable(kbdev); + -+KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops); -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.h -new file mode 100644 -index 000000000..98d35dabe ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.h -@@ -0,0 +1,78 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2011-2015, 2018, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ coherency_features = kbase_cache_get_coherency_features(kbdev); + -+/* -+ * "Always on" power management policy -+ */ ++ if (kbase_is_gpu_removed(kbdev)) ++ error = -EIO; + -+#ifndef MALI_KBASE_PM_ALWAYS_ON_H -+#define MALI_KBASE_PM_ALWAYS_ON_H ++ regdump->coherency_features = coherency_features; + -+/** -+ * DOC: -+ * The "Always on" power management policy has the following -+ * characteristics: -+ * -+ * - When KBase indicates that the GPU will be powered up, but we don't yet -+ * know which Job Chains are to be run: -+ * Shader Cores are powered up, regardless of whether or not they will be -+ * needed later. -+ * -+ * - When KBase indicates that Shader Cores are needed to submit the currently -+ * queued Job Chains: -+ * Shader Cores are kept powered, regardless of whether or not they will be -+ * needed -+ * -+ * - When KBase indicates that the GPU need not be powered: -+ * The Shader Cores are kept powered, regardless of whether or not they will -+ * be needed. The GPU itself is also kept powered, even though it is not -+ * needed. -+ * -+ * This policy is automatically overridden during system suspend: the desired -+ * core state is ignored, and the cores are forced off regardless of what the -+ * policy requests. After resuming from suspend, new changes to the desired -+ * core state made by the policy are honored. -+ * -+ * Note: -+ * -+ * - KBase indicates the GPU will be powered up when it has a User Process that -+ * has just started to submit Job Chains. -+ * -+ * - KBase indicates the GPU need not be powered when all the Job Chains from -+ * User Processes have finished, and it is waiting for a User Process to -+ * submit some more Job Chains. -+ */ ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CORE_FEATURES)) ++ regdump->core_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES)); ++ else ++ regdump->core_features = 0; + -+/** -+ * struct kbasep_pm_policy_always_on - Private struct for policy instance data -+ * @dummy: unused dummy variable -+ * -+ * This contains data that is private to the particular power policy that is -+ * active. -+ */ -+struct kbasep_pm_policy_always_on { -+ int dummy; -+}; ++ kbase_pm_register_access_disable(kbdev); + -+extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops; ++ return error; ++} + -+#endif /* MALI_KBASE_PM_ALWAYS_ON_H */ ++int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, ++ struct kbase_gpuprops_regdump *regdump) ++{ ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { ++ u32 l2_features = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(L2_FEATURES)); ++ u32 l2_config = ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); ++ u32 asn_hash[ASN_HASH_COUNT] = { ++ 0, ++ }; ++ int i; + -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)) { ++ for (i = 0; i < ASN_HASH_COUNT; i++) ++ asn_hash[i] = kbase_reg_read( ++ kbdev, GPU_CONTROL_REG(ASN_HASH(i))); ++ } ++ ++ if (kbase_is_gpu_removed(kbdev)) ++ return -EIO; ++ ++ regdump->l2_features = l2_features; ++ regdump->l2_config = l2_config; ++ for (i = 0; i < ASN_HASH_COUNT; i++) ++ regdump->l2_asn_hash[i] = asn_hash[i]; ++ } ++ ++ return 0; ++} +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c new file mode 100644 -index 000000000..5c71fdf15 +index 000000000..53578ded5 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c -@@ -0,0 +1,1238 @@ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c +@@ -0,0 +1,481 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -127869,1232 +127959,1150 @@ index 000000000..5c71fdf15 + */ + +/* -+ * GPU backend implementation of base kernel power management APIs ++ * GPU backend instrumentation APIs. + */ + +#include +#include -+#include -+ -+#include -+#if !MALI_USE_CSF -+#include -+#include -+#include -+#else -+#include -+#include -+#endif /* !MALI_USE_CSF */ -+#include -+#include -+#include -+#include -+#include -+ -+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data); -+static void kbase_pm_hwcnt_disable_worker(struct work_struct *data); -+static void kbase_pm_gpu_clock_control_worker(struct work_struct *data); ++#include ++#include ++#include + -+int kbase_pm_runtime_init(struct kbase_device *kbdev) ++static int wait_prfcnt_ready(struct kbase_device *kbdev) +{ -+ struct kbase_pm_callback_conf *callbacks; -+ -+ callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; -+ if (callbacks) { -+ kbdev->pm.backend.callback_power_on = -+ callbacks->power_on_callback; -+ kbdev->pm.backend.callback_power_off = -+ callbacks->power_off_callback; -+ kbdev->pm.backend.callback_power_suspend = -+ callbacks->power_suspend_callback; -+ kbdev->pm.backend.callback_power_resume = -+ callbacks->power_resume_callback; -+ kbdev->pm.callback_power_runtime_init = -+ callbacks->power_runtime_init_callback; -+ kbdev->pm.callback_power_runtime_term = -+ callbacks->power_runtime_term_callback; -+ kbdev->pm.backend.callback_power_runtime_on = -+ callbacks->power_runtime_on_callback; -+ kbdev->pm.backend.callback_power_runtime_off = -+ callbacks->power_runtime_off_callback; -+ kbdev->pm.backend.callback_power_runtime_idle = -+ callbacks->power_runtime_idle_callback; -+ kbdev->pm.backend.callback_soft_reset = -+ callbacks->soft_reset_callback; -+ kbdev->pm.backend.callback_power_runtime_gpu_idle = -+ callbacks->power_runtime_gpu_idle_callback; -+ kbdev->pm.backend.callback_power_runtime_gpu_active = -+ callbacks->power_runtime_gpu_active_callback; ++ u32 loops; + -+ if (callbacks->power_runtime_init_callback) -+ return callbacks->power_runtime_init_callback(kbdev); -+ else ++ for (loops = 0; loops < KBASE_PRFCNT_ACTIVE_MAX_LOOPS; loops++) { ++ const u32 prfcnt_active = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & ++ GPU_STATUS_PRFCNT_ACTIVE; ++ if (!prfcnt_active) + return 0; + } + -+ kbdev->pm.backend.callback_power_on = NULL; -+ kbdev->pm.backend.callback_power_off = NULL; -+ kbdev->pm.backend.callback_power_suspend = NULL; -+ kbdev->pm.backend.callback_power_resume = NULL; -+ kbdev->pm.callback_power_runtime_init = NULL; -+ kbdev->pm.callback_power_runtime_term = NULL; -+ kbdev->pm.backend.callback_power_runtime_on = NULL; -+ kbdev->pm.backend.callback_power_runtime_off = NULL; -+ kbdev->pm.backend.callback_power_runtime_idle = NULL; -+ kbdev->pm.backend.callback_soft_reset = NULL; -+ kbdev->pm.backend.callback_power_runtime_gpu_idle = NULL; -+ kbdev->pm.backend.callback_power_runtime_gpu_active = NULL; -+ -+ return 0; ++ dev_err(kbdev->dev, "PRFCNT_ACTIVE bit stuck\n"); ++ return -EBUSY; +} + -+void kbase_pm_runtime_term(struct kbase_device *kbdev) ++int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ struct kbase_instr_hwcnt_enable *enable) +{ -+ if (kbdev->pm.callback_power_runtime_term) -+ kbdev->pm.callback_power_runtime_term(kbdev); -+} ++ unsigned long flags; ++ int err = -EINVAL; ++ u32 irq_mask; ++ u32 prfcnt_config; + -+void kbase_pm_register_access_enable(struct kbase_device *kbdev) -+{ -+ struct kbase_pm_callback_conf *callbacks; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; ++ /* alignment failure */ ++ if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1))) ++ return err; + -+ if (callbacks) -+ callbacks->power_on_callback(kbdev); ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ if (WARN_ON(kbase_pm_is_gpu_lost(kbdev))) -+ dev_err(kbdev->dev, "Attempting to power on while GPU lost\n"); -+#endif ++ if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { ++ /* Instrumentation is already enabled */ ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ return err; ++ } + -+ kbdev->pm.backend.gpu_powered = true; -+} ++ if (kbase_is_gpu_removed(kbdev)) { ++ /* GPU has been removed by Arbiter */ ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ return err; ++ } + -+void kbase_pm_register_access_disable(struct kbase_device *kbdev) -+{ -+ struct kbase_pm_callback_conf *callbacks; ++ /* Enable interrupt */ ++ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | ++ PRFCNT_SAMPLE_COMPLETED); + -+ callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; ++ /* In use, this context is the owner */ ++ kbdev->hwcnt.kctx = kctx; ++ /* Remember the dump address so we can reprogram it later */ ++ kbdev->hwcnt.addr = enable->dump_buffer; ++ kbdev->hwcnt.addr_bytes = enable->dump_buffer_bytes; + -+ kbdev->pm.backend.gpu_powered = false; ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + -+ if (callbacks) -+ callbacks->power_off_callback(kbdev); -+} ++ /* Configure */ ++ prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; ++#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS ++ prfcnt_config |= kbdev->hwcnt.backend.override_counter_set ++ << PRFCNT_CONFIG_SETSELECT_SHIFT; ++#else ++ prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT; ++#endif + -+int kbase_hwaccess_pm_init(struct kbase_device *kbdev) -+{ -+ int ret = 0; ++ /* Wait until prfcnt config register can be written */ ++ err = wait_prfcnt_ready(kbdev); ++ if (err) ++ return err; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), ++ prfcnt_config | PRFCNT_CONFIG_MODE_OFF); + -+ mutex_init(&kbdev->pm.lock); ++ /* Wait until prfcnt is disabled before writing configuration registers */ ++ err = wait_prfcnt_ready(kbdev); ++ if (err) ++ return err; + -+ kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait", -+ WQ_HIGHPRI | WQ_UNBOUND, 1); -+ if (!kbdev->pm.backend.gpu_poweroff_wait_wq) -+ return -ENOMEM; ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), ++ enable->dump_buffer & 0xFFFFFFFF); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), ++ enable->dump_buffer >> 32); + -+ INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, -+ kbase_pm_gpu_poweroff_wait_wq); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), ++ enable->fe_bm); + -+ kbdev->pm.backend.ca_cores_enabled = ~0ull; -+ kbdev->pm.backend.gpu_powered = false; -+ kbdev->pm.backend.gpu_ready = false; -+ kbdev->pm.suspending = false; -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ kbase_pm_set_gpu_lost(kbdev, false); -+#endif -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ kbdev->pm.backend.driver_ready_for_irqs = false; -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ -+ init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), ++ enable->shader_bm); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), ++ enable->mmu_l2_bm); + -+#if !MALI_USE_CSF -+ /* Initialise the metrics subsystem */ -+ ret = kbasep_pm_metrics_init(kbdev); -+ if (ret) -+ return ret; -+#else -+ mutex_init(&kbdev->pm.backend.policy_change_lock); -+ kbdev->pm.backend.policy_change_clamp_state_to_off = false; -+ /* Due to dependency on kbase_ipa_control, the metrics subsystem can't -+ * be initialized here. -+ */ -+ CSTD_UNUSED(ret); -+#endif ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), ++ enable->tiler_bm); + -+ init_waitqueue_head(&kbdev->pm.backend.reset_done_wait); -+ kbdev->pm.backend.reset_done = false; ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), ++ prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL); + -+ init_waitqueue_head(&kbdev->pm.zero_active_count_wait); -+ init_waitqueue_head(&kbdev->pm.resume_wait); -+ kbdev->pm.active_count = 0; ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + -+ spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock); ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; ++ kbdev->hwcnt.backend.triggered = 1; ++ wake_up(&kbdev->hwcnt.backend.wait); + -+ init_waitqueue_head(&kbdev->pm.backend.poweroff_wait); ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + -+ if (kbase_pm_ca_init(kbdev) != 0) -+ goto workq_fail; ++ dev_dbg(kbdev->dev, "HW counters dumping set-up for context %pK", kctx); ++ return 0; ++} + -+ kbase_pm_policy_init(kbdev); ++static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev) ++{ ++ u32 irq_mask; + -+ if (kbase_pm_state_machine_init(kbdev) != 0) -+ goto pm_state_machine_fail; ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ lockdep_assert_held(&kbdev->hwcnt.lock); + -+ kbdev->pm.backend.hwcnt_desired = false; -+ kbdev->pm.backend.hwcnt_disabled = true; -+ INIT_WORK(&kbdev->pm.backend.hwcnt_disable_work, -+ kbase_pm_hwcnt_disable_worker); -+ kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); ++ if (kbase_is_gpu_removed(kbdev)) ++ /* GPU has been removed by Arbiter */ ++ return; + -+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) -+ kbdev->pm.backend.gpu_sleep_supported = -+ kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_GPU_SLEEP) && -+ !kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_1997) && -+ kbdev->pm.backend.callback_power_runtime_gpu_active && -+ kbdev->pm.backend.callback_power_runtime_gpu_idle; -+#endif ++ /* Disable interrupt */ ++ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + -+ if (IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED)) { -+ kbdev->pm.backend.l2_always_on = false; -+ kbdev->pm.backend.gpu_clock_slow_down_wa = false; ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED); + -+ return 0; -+ } ++ /* Wait until prfcnt config register can be written, then disable the counters. ++ * Return value is ignored as we are disabling anyway. ++ */ ++ wait_prfcnt_ready(kbdev); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0); + -+ /* WA1: L2 always_on for GPUs being affected by GPU2017-1336 */ -+ if (!IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE)) { -+ kbdev->pm.backend.gpu_clock_slow_down_wa = false; -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336)) -+ kbdev->pm.backend.l2_always_on = true; -+ else -+ kbdev->pm.backend.l2_always_on = false; ++ kbdev->hwcnt.kctx = NULL; ++ kbdev->hwcnt.addr = 0ULL; ++ kbdev->hwcnt.addr_bytes = 0ULL; ++} + -+ return 0; -+ } ++int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) ++{ ++ unsigned long flags, pm_flags; ++ struct kbase_device *kbdev = kctx->kbdev; + -+ /* WA3: Clock slow down for GPUs being affected by GPU2017-1336 */ -+ kbdev->pm.backend.l2_always_on = false; -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336)) { -+ kbdev->pm.backend.gpu_clock_slow_down_wa = true; -+ kbdev->pm.backend.gpu_clock_suspend_freq = 0; -+ kbdev->pm.backend.gpu_clock_slow_down_desired = true; -+ kbdev->pm.backend.gpu_clock_slowed_down = false; -+ INIT_WORK(&kbdev->pm.backend.gpu_clock_control_work, -+ kbase_pm_gpu_clock_control_worker); -+ } else -+ kbdev->pm.backend.gpu_clock_slow_down_wa = false; ++ while (1) { ++ spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + -+ return 0; ++ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) { ++ /* Instrumentation is in unrecoverable error state, ++ * there is nothing for us to do. ++ */ ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); ++ /* Already disabled, return no error. */ ++ return 0; ++ } + -+pm_state_machine_fail: -+ kbase_pm_policy_term(kbdev); -+ kbase_pm_ca_term(kbdev); -+workq_fail: -+#if !MALI_USE_CSF -+ kbasep_pm_metrics_term(kbdev); -+#endif -+ return -EINVAL; -+} ++ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) { ++ /* Instrumentation is not enabled */ ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); ++ return -EINVAL; ++ } + -+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) -+{ -+ lockdep_assert_held(&kbdev->pm.lock); ++ if (kbdev->hwcnt.kctx != kctx) { ++ /* Instrumentation has been setup for another context */ ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); ++ return -EINVAL; ++ } + -+ /* Turn clocks and interrupts on - no-op if we haven't done a previous -+ * kbase_pm_clock_off() -+ */ -+ kbase_pm_clock_on(kbdev, is_resume); ++ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) ++ break; + -+ if (!is_resume) { -+ unsigned long flags; ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + -+ /* Force update of L2 state - if we have abandoned a power off -+ * then this may be required to power the L2 back on. -+ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_pm_update_state(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* Ongoing dump/setup - wait for its completion */ ++ wait_event(kbdev->hwcnt.backend.wait, ++ kbdev->hwcnt.backend.triggered != 0); + } + -+ /* Update core status as required by the policy */ -+ kbase_pm_update_cores_state(kbdev); ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; ++ kbdev->hwcnt.backend.triggered = 0; + -+ /* NOTE: We don't wait to reach the desired state, since running atoms -+ * will wait for that state to be reached anyway -+ */ ++ kbasep_instr_hwc_disable_hw_prfcnt(kbdev); ++ ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); ++ ++ dev_dbg(kbdev->dev, "HW counters dumping disabled for context %pK", ++ kctx); ++ ++ return 0; +} + -+static void pm_handle_power_off(struct kbase_device *kbdev) ++int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) +{ -+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; -+#if MALI_USE_CSF -+ enum kbase_mcu_state mcu_state; -+#endif + unsigned long flags; ++ int err = -EINVAL; ++ struct kbase_device *kbdev = kctx->kbdev; + -+ lockdep_assert_held(&kbdev->pm.lock); -+ -+ if (backend->poweron_required) -+ return; -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) -+ if (kbdev->pm.backend.gpu_wakeup_override) { -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ return; -+ } -+#endif -+ WARN_ON(backend->shaders_state != -+ KBASE_SHADERS_OFF_CORESTACK_OFF || -+ backend->l2_state != KBASE_L2_OFF); -+#if MALI_USE_CSF -+ mcu_state = backend->mcu_state; -+ WARN_ON(!kbase_pm_is_mcu_inactive(kbdev, mcu_state)); -+#endif -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + -+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) -+ if (backend->callback_power_runtime_gpu_idle) { -+ WARN_ON(backend->gpu_idled); -+ backend->callback_power_runtime_gpu_idle(kbdev); -+ backend->gpu_idled = true; -+ return; ++ if (kbdev->hwcnt.kctx != kctx) { ++ /* The instrumentation has been setup for another context */ ++ goto unlock; + } -+#endif -+ -+ /* Disable interrupts and turn the clock off */ -+ if (!kbase_pm_clock_off(kbdev)) { -+ /* -+ * Page/bus faults are pending, must drop locks to -+ * process. Interrupts are disabled so no more faults -+ * should be generated at this point. -+ */ -+ kbase_pm_unlock(kbdev); -+ kbase_flush_mmu_wqs(kbdev); -+ kbase_pm_lock(kbdev); -+ -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ /* poweron_required may have changed while pm lock -+ * was released. -+ */ -+ if (kbase_pm_is_gpu_lost(kbdev)) -+ backend->poweron_required = false; -+#endif + -+ /* Turn off clock now that fault have been handled. We -+ * dropped locks so poweron_required may have changed - -+ * power back on if this is the case (effectively only -+ * re-enabling of the interrupts would be done in this -+ * case, as the clocks to GPU were not withdrawn yet). ++ if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) { ++ /* HW counters are disabled or another dump is ongoing, or we're ++ * resetting, or we are in unrecoverable error state. + */ -+ if (backend->poweron_required) -+ kbase_pm_clock_on(kbdev, false); -+ else -+ WARN_ON(!kbase_pm_clock_off(kbdev)); ++ goto unlock; + } -+} + -+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) -+{ -+ struct kbase_device *kbdev = container_of(data, struct kbase_device, -+ pm.backend.gpu_poweroff_wait_work); -+ struct kbase_pm_device_data *pm = &kbdev->pm; -+ struct kbase_pm_backend_data *backend = &pm->backend; -+ unsigned long flags; ++ if (kbase_is_gpu_removed(kbdev)) { ++ /* GPU has been removed by Arbiter */ ++ goto unlock; ++ } + -+ KBASE_KTRACE_ADD(kbdev, PM_POWEROFF_WAIT_WQ, NULL, 0); ++ kbdev->hwcnt.backend.triggered = 0; + -+#if !MALI_USE_CSF -+ /* Wait for power transitions to complete. We do this with no locks held -+ * so that we don't deadlock with any pending workqueues. ++ /* Mark that we're dumping - the PF handler can signal that we faulted + */ -+ kbase_pm_wait_for_desired_state(kbdev); -+#endif ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING; + -+ kbase_pm_lock(kbdev); ++ /* Wait until prfcnt is ready to request dump */ ++ err = wait_prfcnt_ready(kbdev); ++ if (err) ++ goto unlock; + -+ pm_handle_power_off(kbdev); ++ /* Reconfigure the dump address */ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), ++ kbdev->hwcnt.addr & 0xFFFFFFFF); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), ++ kbdev->hwcnt.addr >> 32); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ backend->poweroff_wait_in_progress = false; -+ if (backend->poweron_required) { -+ backend->poweron_required = false; -+ kbdev->pm.backend.l2_desired = true; -+#if MALI_USE_CSF -+ kbdev->pm.backend.mcu_desired = true; -+#endif -+ kbase_pm_update_state(kbdev); -+ kbase_pm_update_cores_state_nolock(kbdev); -+#if !MALI_USE_CSF -+ kbase_backend_slot_update(kbdev); -+#endif /* !MALI_USE_CSF */ -+ } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* Start dumping */ ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, ++ kbdev->hwcnt.addr); + -+ kbase_pm_unlock(kbdev); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_PRFCNT_SAMPLE); + -+ wake_up(&kbdev->pm.backend.poweroff_wait); ++ dev_dbg(kbdev->dev, "HW counters dumping done for context %pK", kctx); ++ ++ unlock: ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ return err; +} ++KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump); + -+static void kbase_pm_l2_clock_slow(struct kbase_device *kbdev) ++bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, ++ bool * const success) +{ -+#if defined(CONFIG_MALI_BIFROST_DVFS) -+ struct clk *clk = kbdev->clocks[0]; -+#endif ++ unsigned long flags; ++ bool complete = false; ++ struct kbase_device *kbdev = kctx->kbdev; + -+ if (!kbdev->pm.backend.gpu_clock_slow_down_wa) -+ return; ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + -+ /* No suspend clock is specified */ -+ if (WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_suspend_freq)) -+ return; ++ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) { ++ *success = true; ++ complete = true; ++ } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { ++ *success = false; ++ complete = true; ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; ++ } + -+#if defined(CONFIG_MALI_BIFROST_DEVFREQ) ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + -+ /* Suspend devfreq */ -+ devfreq_suspend_device(kbdev->devfreq); ++ return complete; ++} ++KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete); + -+ /* Keep the current freq to restore it upon resume */ -+ kbdev->previous_frequency = kbdev->current_nominal_freq; ++void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) ++{ ++ unsigned long flags; + -+ /* Slow down GPU clock to the suspend clock*/ -+ kbase_devfreq_force_freq(kbdev, -+ kbdev->pm.backend.gpu_clock_suspend_freq); ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + -+#elif defined(CONFIG_MALI_BIFROST_DVFS) /* CONFIG_MALI_BIFROST_DEVFREQ */ ++ /* If the state is in unrecoverable error, we already wake_up the waiter ++ * and don't need to do any action when sample is done. ++ */ + -+ if (WARN_ON_ONCE(!clk)) -+ return; ++ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { ++ kbdev->hwcnt.backend.triggered = 1; ++ wake_up(&kbdev->hwcnt.backend.wait); ++ } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) { ++ /* All finished and idle */ ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; ++ kbdev->hwcnt.backend.triggered = 1; ++ wake_up(&kbdev->hwcnt.backend.wait); ++ } + -+ /* Stop the metrics gathering framework */ -+ kbase_pm_metrics_stop(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++} + -+ /* Keep the current freq to restore it upon resume */ -+ kbdev->previous_frequency = clk_get_rate(clk); ++int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) ++{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ unsigned long flags; ++ int err; + -+ /* Slow down GPU clock to the suspend clock*/ -+ if (WARN_ON_ONCE(clk_set_rate(clk, -+ kbdev->pm.backend.gpu_clock_suspend_freq))) -+ dev_err(kbdev->dev, "Failed to set suspend freq\n"); ++ /* Wait for dump & cache clean to complete */ ++ wait_event(kbdev->hwcnt.backend.wait, ++ kbdev->hwcnt.backend.triggered != 0); + -+#endif /* CONFIG_MALI_BIFROST_DVFS */ -+} ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + -+static void kbase_pm_l2_clock_normalize(struct kbase_device *kbdev) -+{ -+#if defined(CONFIG_MALI_BIFROST_DVFS) -+ struct clk *clk = kbdev->clocks[0]; -+#endif ++ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { ++ err = -EINVAL; ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; ++ } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) { ++ err = -EIO; ++ } else { ++ /* Dump done */ ++ KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == ++ KBASE_INSTR_STATE_IDLE); ++ err = 0; ++ } + -+ if (!kbdev->pm.backend.gpu_clock_slow_down_wa) -+ return; ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + -+#if defined(CONFIG_MALI_BIFROST_DEVFREQ) ++ return err; ++} + -+ /* Restore GPU clock to the previous one */ -+ kbase_devfreq_force_freq(kbdev, kbdev->previous_frequency); ++int kbase_instr_hwcnt_clear(struct kbase_context *kctx) ++{ ++ unsigned long flags; ++ int err = -EINVAL; ++ struct kbase_device *kbdev = kctx->kbdev; + -+ /* Resume devfreq */ -+ devfreq_resume_device(kbdev->devfreq); ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + -+#elif defined(CONFIG_MALI_BIFROST_DVFS) /* CONFIG_MALI_BIFROST_DEVFREQ */ ++ /* Check it's the context previously set up and we're not in IDLE ++ * state. ++ */ ++ if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != ++ KBASE_INSTR_STATE_IDLE) ++ goto unlock; + -+ if (WARN_ON_ONCE(!clk)) -+ return; ++ if (kbase_is_gpu_removed(kbdev)) { ++ /* GPU has been removed by Arbiter */ ++ goto unlock; ++ } + -+ /* Restore GPU clock */ -+ if (WARN_ON_ONCE(clk_set_rate(clk, kbdev->previous_frequency))) -+ dev_err(kbdev->dev, "Failed to restore freq (%lu)\n", -+ kbdev->previous_frequency); ++ /* Wait until prfcnt is ready to clear */ ++ err = wait_prfcnt_ready(kbdev); ++ if (err) ++ goto unlock; + -+ /* Restart the metrics gathering framework */ -+ kbase_pm_metrics_start(kbdev); ++ /* Clear the counters */ ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, 0); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_PRFCNT_CLEAR); + -+#endif /* CONFIG_MALI_BIFROST_DVFS */ ++unlock: ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ return err; +} ++KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear); + -+static void kbase_pm_gpu_clock_control_worker(struct work_struct *data) ++void kbase_instr_hwcnt_on_unrecoverable_error(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev = container_of(data, struct kbase_device, -+ pm.backend.gpu_clock_control_work); -+ struct kbase_pm_device_data *pm = &kbdev->pm; -+ struct kbase_pm_backend_data *backend = &pm->backend; + unsigned long flags; -+ bool slow_down = false, normalize = false; + -+ /* Determine if GPU clock control is required */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ if (!backend->gpu_clock_slowed_down && -+ backend->gpu_clock_slow_down_desired) { -+ slow_down = true; -+ backend->gpu_clock_slowed_down = true; -+ } else if (backend->gpu_clock_slowed_down && -+ !backend->gpu_clock_slow_down_desired) { -+ normalize = true; -+ backend->gpu_clock_slowed_down = false; ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ ++ /* If we already in unrecoverable error state, early return. */ ++ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) { ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ return; + } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ /* Control GPU clock according to the request of L2 state machine. -+ * The GPU clock needs to be lowered for safe L2 power down -+ * and restored to previous speed at L2 power up. -+ */ -+ if (slow_down) -+ kbase_pm_l2_clock_slow(kbdev); -+ else if (normalize) -+ kbase_pm_l2_clock_normalize(kbdev); ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_UNRECOVERABLE_ERROR; + -+ /* Tell L2 state machine to transit to next state */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_pm_update_state(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* Need to disable HW if it's not disabled yet. */ ++ if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) ++ kbasep_instr_hwc_disable_hw_prfcnt(kbdev); ++ ++ /* Wake up any waiters. */ ++ kbdev->hwcnt.backend.triggered = 1; ++ wake_up(&kbdev->hwcnt.backend.wait); ++ ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); +} ++KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_on_unrecoverable_error); + -+static void kbase_pm_hwcnt_disable_worker(struct work_struct *data) ++void kbase_instr_hwcnt_on_before_reset(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev = container_of(data, struct kbase_device, -+ pm.backend.hwcnt_disable_work); -+ struct kbase_pm_device_data *pm = &kbdev->pm; -+ struct kbase_pm_backend_data *backend = &pm->backend; + unsigned long flags; + -+ bool do_disable; ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* A reset is the only way to exit the unrecoverable error state */ ++ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; + -+ if (!do_disable) -+ return; ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++} ++KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_on_before_reset); + -+ kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); ++int kbase_instr_backend_init(struct kbase_device *kbdev) ++{ ++ spin_lock_init(&kbdev->hwcnt.lock); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled; ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; + -+ if (do_disable) { -+ /* PM state did not change while we were doing the disable, -+ * so commit the work we just performed and continue the state -+ * machine. -+ */ -+ backend->hwcnt_disabled = true; -+ kbase_pm_update_state(kbdev); -+#if !MALI_USE_CSF -+ kbase_backend_slot_update(kbdev); -+#endif /* !MALI_USE_CSF */ -+ } else { -+ /* PM state was updated while we were doing the disable, -+ * so we need to undo the disable we just performed. -+ */ -+#if MALI_USE_CSF -+ unsigned long lock_flags; ++ init_waitqueue_head(&kbdev->hwcnt.backend.wait); + -+ kbase_csf_scheduler_spin_lock(kbdev, &lock_flags); ++ kbdev->hwcnt.backend.triggered = 0; ++ ++#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS ++/* Use the build time option for the override default. */ ++#if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY) ++ kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_SECONDARY; ++#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) ++ kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_TERTIARY; ++#else ++ /* Default to primary */ ++ kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_PRIMARY; +#endif -+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); -+#if MALI_USE_CSF -+ kbase_csf_scheduler_spin_unlock(kbdev, lock_flags); +#endif -+ } ++ return 0; ++} + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++void kbase_instr_backend_term(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); +} + -+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) -+/** -+ * kbase_pm_do_poweroff_sync - Do the synchronous power down of GPU ++#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS ++void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev) ++{ ++ /* No validation is done on the debugfs input. Invalid input could cause ++ * performance counter errors. This is acceptable since this is a debug ++ * only feature and users should know what they are doing. ++ * ++ * Valid inputs are the values accepted bythe SET_SELECT bits of the ++ * PRFCNT_CONFIG register as defined in the architecture specification. ++ */ ++ debugfs_create_u8("hwcnt_set_select", 0644, ++ kbdev->mali_debugfs_directory, ++ (u8 *)&kbdev->hwcnt.backend.override_counter_set); ++} ++#endif +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h +new file mode 100644 +index 000000000..bd2eb8a12 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h +@@ -0,0 +1,60 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * (C) COPYRIGHT 2014, 2016, 2018-2022 ARM Limited. All rights reserved. + * -+ * This function is called at the time of system suspend or device unload -+ * to power down the GPU synchronously. This is needed as the power down of GPU -+ * would usually happen from the runtime suspend callback function (if gpu_active -+ * and gpu_idle callbacks are used) and runtime suspend operation is disabled -+ * when system suspend takes place. -+ * The function first waits for the @gpu_poweroff_wait_work to complete, which -+ * could have been enqueued after the last PM reference was released. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: 0 on success, negative value otherwise. + */ -+static int kbase_pm_do_poweroff_sync(struct kbase_device *kbdev) -+{ -+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; -+ unsigned long flags; -+ int ret = 0; -+ -+ WARN_ON(kbdev->pm.active_count); -+ -+ kbase_pm_wait_for_poweroff_work_complete(kbdev); -+ -+ kbase_pm_lock(kbdev); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ WARN_ON(backend->poweroff_wait_in_progress); -+ WARN_ON(backend->gpu_sleep_mode_active); -+ if (backend->gpu_powered) { + -+ backend->mcu_desired = false; -+ backend->l2_desired = false; -+ kbase_pm_update_state(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++/* ++ * Backend-specific instrumentation definitions ++ */ + -+ ret = kbase_pm_wait_for_desired_state(kbdev); -+ if (ret) { -+ dev_warn( -+ kbdev->dev, -+ "Wait for pm state change failed on synchronous power off"); -+ ret = -EBUSY; -+ goto out; -+ } ++#ifndef _KBASE_INSTR_DEFS_H_ ++#define _KBASE_INSTR_DEFS_H_ + -+ /* Due to the power policy, GPU could have been kept active -+ * throughout and so need to invoke the idle callback before -+ * the power down. -+ */ -+ if (backend->callback_power_runtime_gpu_idle && -+ !backend->gpu_idled) { -+ backend->callback_power_runtime_gpu_idle(kbdev); -+ backend->gpu_idled = true; -+ } ++#include + -+ if (!kbase_pm_clock_off(kbdev)) { -+ dev_warn( -+ kbdev->dev, -+ "Failed to turn off GPU clocks on synchronous power off, MMU faults pending"); -+ ret = -EBUSY; -+ } -+ } else { -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } ++/* ++ * Instrumentation State Machine States ++ */ ++enum kbase_instr_state { ++ /* State where instrumentation is not active */ ++ KBASE_INSTR_STATE_DISABLED = 0, ++ /* State machine is active and ready for a command. */ ++ KBASE_INSTR_STATE_IDLE, ++ /* Hardware is currently dumping a frame. */ ++ KBASE_INSTR_STATE_DUMPING, ++ /* An error has occurred during DUMPING (page fault). */ ++ KBASE_INSTR_STATE_FAULT, ++ /* An unrecoverable error has occurred, a reset is the only way to exit ++ * from unrecoverable error state. ++ */ ++ KBASE_INSTR_STATE_UNRECOVERABLE_ERROR, ++}; + -+out: -+ kbase_pm_unlock(kbdev); -+ return ret; -+} ++/* Structure used for instrumentation and HW counters dumping */ ++struct kbase_instr_backend { ++ wait_queue_head_t wait; ++ int triggered; ++#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS ++ enum kbase_hwcnt_physical_set override_counter_set; +#endif + -+void kbase_pm_do_poweroff(struct kbase_device *kbdev) -+{ -+ unsigned long flags; ++ enum kbase_instr_state state; ++}; + -+ lockdep_assert_held(&kbdev->pm.lock); ++#endif /* _KBASE_INSTR_DEFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_internal.h +new file mode 100644 +index 000000000..332cc6944 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_internal.h +@@ -0,0 +1,41 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2014, 2018, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++/* ++ * Backend-specific HW access instrumentation APIs ++ */ + -+ if (!kbdev->pm.backend.gpu_powered) -+ goto unlock_hwaccess; ++#ifndef _KBASE_INSTR_INTERNAL_H_ ++#define _KBASE_INSTR_INTERNAL_H_ + -+ if (kbdev->pm.backend.poweroff_wait_in_progress) -+ goto unlock_hwaccess; ++/** ++ * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning ++ * @data: a &struct work_struct ++ */ ++void kbasep_cache_clean_worker(struct work_struct *data); + -+#if MALI_USE_CSF -+ kbdev->pm.backend.mcu_desired = false; -+#else -+ /* Force all cores off */ -+ kbdev->pm.backend.shaders_desired = false; -+#endif -+ kbdev->pm.backend.l2_desired = false; ++/** ++ * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received ++ * @kbdev: Kbase device ++ */ ++void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev); + -+ kbdev->pm.backend.poweroff_wait_in_progress = true; -+ kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = true; ++#endif /* _KBASE_INSTR_INTERNAL_H_ */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h +new file mode 100644 +index 000000000..66cda8c0b +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h +@@ -0,0 +1,47 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2014-2015, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ /* l2_desired being false should cause the state machine to -+ * start powering off the L2. When it actually is powered off, -+ * the interrupt handler will call kbase_pm_l2_update_state() -+ * again, which will trigger the kbase_pm_gpu_poweroff_wait_wq. -+ * Callers of this function will need to wait on poweroff_wait. -+ */ -+ kbase_pm_update_state(kbdev); ++/* ++ * Backend specific IRQ APIs ++ */ + -+unlock_hwaccess: -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} ++#ifndef _KBASE_IRQ_INTERNAL_H_ ++#define _KBASE_IRQ_INTERNAL_H_ + -+static bool is_poweroff_in_progress(struct kbase_device *kbdev) -+{ -+ bool ret; -+ unsigned long flags; ++int kbase_install_interrupts(struct kbase_device *kbdev); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ ret = (kbdev->pm.backend.poweroff_wait_in_progress == false); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++void kbase_release_interrupts(struct kbase_device *kbdev); + -+ return ret; -+} ++/** ++ * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed ++ * execution ++ * @kbdev: The kbase device ++ */ ++void kbase_synchronize_irqs(struct kbase_device *kbdev); + -+void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev) -+{ -+ wait_event_killable(kbdev->pm.backend.poweroff_wait, -+ is_poweroff_in_progress(kbdev)); -+} -+KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete); ++int kbasep_common_test_interrupt_handlers( ++ struct kbase_device * const kbdev); + -+/** -+ * is_gpu_powered_down - Check whether GPU is powered down ++irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val); ++int kbase_set_custom_irq_handler(struct kbase_device *kbdev, ++ irq_handler_t custom_handler, int irq_type); ++ ++#endif /* _KBASE_IRQ_INTERNAL_H_ */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c +new file mode 100644 +index 000000000..eb63b2c56 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c +@@ -0,0 +1,503 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * -+ * @kbdev: kbase device ++ * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: true if GPU is powered down, false otherwise + */ -+static bool is_gpu_powered_down(struct kbase_device *kbdev) -+{ -+ bool ret; -+ unsigned long flags; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ ret = !kbdev->pm.backend.gpu_powered; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++#include ++#include ++#include + -+ return ret; -+} ++#include + -+void kbase_pm_wait_for_gpu_power_down(struct kbase_device *kbdev) ++#if IS_ENABLED(CONFIG_MALI_REAL_HW) ++ ++/* GPU IRQ Tags */ ++#define JOB_IRQ_TAG 0 ++#define MMU_IRQ_TAG 1 ++#define GPU_IRQ_TAG 2 ++ ++static void *kbase_tag(void *ptr, u32 tag) +{ -+ wait_event_killable(kbdev->pm.backend.poweroff_wait, -+ is_gpu_powered_down(kbdev)); ++ return (void *)(((uintptr_t) ptr) | tag); +} -+KBASE_EXPORT_TEST_API(kbase_pm_wait_for_gpu_power_down); + -+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, -+ unsigned int flags) ++static void *kbase_untag(void *ptr) +{ -+ unsigned long irq_flags; -+ int ret; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ -+ kbase_pm_lock(kbdev); ++ return (void *)(((uintptr_t) ptr) & ~3); ++} + -+ /* A suspend won't happen during startup/insmod */ -+ KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); ++static irqreturn_t kbase_job_irq_handler(int irq, void *data) ++{ ++ unsigned long flags; ++ struct kbase_device *kbdev = kbase_untag(data); ++ u32 val; + -+ /* Power up the GPU, don't enable IRQs as we are not ready to receive -+ * them -+ */ -+ ret = kbase_pm_init_hw(kbdev, flags); -+ if (ret) { -+ kbase_pm_unlock(kbdev); -+ return ret; -+ } -+#if MALI_USE_CSF -+ kbdev->pm.debug_core_mask = -+ kbdev->gpu_props.props.raw_props.shader_present; -+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); -+ /* Set the initial value for 'shaders_avail'. It would be later -+ * modified only from the MCU state machine, when the shader core -+ * allocation enable mask request has completed. So its value would -+ * indicate the mask of cores that are currently being used by FW for -+ * the allocation of endpoints requested by CSGs. -+ */ -+ kbdev->pm.backend.shaders_avail = kbase_pm_ca_get_core_mask(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); -+#else -+ kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] = -+ kbdev->pm.debug_core_mask[1] = -+ kbdev->pm.debug_core_mask[2] = -+ kbdev->gpu_props.props.raw_props.shader_present; -+#endif ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ /* Pretend the GPU is active to prevent a power policy turning the GPU -+ * cores off -+ */ -+ kbdev->pm.active_count = 1; -+#if MALI_USE_CSF && KBASE_PM_RUNTIME -+ if (kbdev->pm.backend.callback_power_runtime_gpu_active) { -+ /* Take the RPM reference count to match with the internal -+ * PM reference count -+ */ -+ kbdev->pm.backend.callback_power_runtime_gpu_active(kbdev); -+ WARN_ON(kbdev->pm.backend.gpu_idled); ++ if (!kbdev->pm.backend.gpu_powered) { ++ /* GPU is turned off - IRQ is not for us */ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return IRQ_NONE; + } -+#endif + -+ spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, -+ irq_flags); -+ /* Ensure cycle counter is off */ -+ kbdev->pm.backend.gpu_cycle_counter_requests = 0; -+ spin_unlock_irqrestore( -+ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, -+ irq_flags); ++ val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); + -+ /* We are ready to receive IRQ's now as power policy is set up, so -+ * enable them now. -+ */ +#ifdef CONFIG_MALI_BIFROST_DEBUG -+ kbdev->pm.backend.driver_ready_for_irqs = true; -+#endif -+ kbase_pm_enable_interrupts(kbdev); ++ if (!kbdev->pm.backend.driver_ready_for_irqs) ++ dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", ++ __func__, irq, val); ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ + -+ WARN_ON(!kbdev->pm.backend.gpu_powered); -+ /* GPU has been powered up (by kbase_pm_init_hw) and interrupts have -+ * been enabled, so GPU is ready for use and PM state machine can be -+ * exercised from this point onwards. -+ */ -+ kbdev->pm.backend.gpu_ready = true; ++ if (!val) { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return IRQ_NONE; ++ } ++ ++ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + -+ /* Turn on the GPU and any cores needed by the policy */ +#if MALI_USE_CSF -+ /* Turn on the L2 caches, needed for firmware boot */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); -+ kbdev->pm.backend.l2_desired = true; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++ /* call the csf interrupt handler */ ++ kbase_csf_interrupt(kbdev, val); ++#else ++ kbase_job_done(kbdev, val); +#endif -+ kbase_pm_do_poweron(kbdev, false); -+ kbase_pm_unlock(kbdev); + -+ return 0; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ return IRQ_HANDLED; +} + -+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev) ++static irqreturn_t kbase_mmu_irq_handler(int irq, void *data) +{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ unsigned long flags; ++ struct kbase_device *kbdev = kbase_untag(data); ++ u32 val; + -+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) -+ WARN_ON(kbase_pm_do_poweroff_sync(kbdev)); -+#else -+ mutex_lock(&kbdev->pm.lock); -+ kbase_pm_do_poweroff(kbdev); -+ mutex_unlock(&kbdev->pm.lock); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ kbase_pm_wait_for_poweroff_work_complete(kbdev); -+#endif -+} ++ if (!kbdev->pm.backend.gpu_powered) { ++ /* GPU is turned off - IRQ is not for us */ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return IRQ_NONE; ++ } + -+KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt); ++ atomic_inc(&kbdev->faults_pending); + -+void kbase_hwaccess_pm_term(struct kbase_device *kbdev) -+{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0); -+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0); ++ val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); + -+ cancel_work_sync(&kbdev->pm.backend.hwcnt_disable_work); ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ if (!kbdev->pm.backend.driver_ready_for_irqs) ++ dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", ++ __func__, irq, val); ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ if (kbdev->pm.backend.hwcnt_disabled) { -+ unsigned long flags; -+#if MALI_USE_CSF -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+#else -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+#endif ++ if (!val) { ++ atomic_dec(&kbdev->faults_pending); ++ return IRQ_NONE; + } + -+ /* Free any resources the policy allocated */ -+ kbase_pm_state_machine_term(kbdev); -+ kbase_pm_policy_term(kbdev); -+ kbase_pm_ca_term(kbdev); ++ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + -+#if !MALI_USE_CSF -+ /* Shut down the metrics subsystem */ -+ kbasep_pm_metrics_term(kbdev); -+#else -+ if (WARN_ON(mutex_is_locked(&kbdev->pm.backend.policy_change_lock))) { -+ mutex_lock(&kbdev->pm.backend.policy_change_lock); -+ mutex_unlock(&kbdev->pm.backend.policy_change_lock); -+ } -+ mutex_destroy(&kbdev->pm.backend.policy_change_lock); -+#endif ++ kbase_mmu_interrupt(kbdev, val); + -+ destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq); ++ atomic_dec(&kbdev->faults_pending); ++ ++ return IRQ_HANDLED; +} + -+void kbase_pm_power_changed(struct kbase_device *kbdev) ++static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) +{ + unsigned long flags; ++ struct kbase_device *kbdev = kbase_untag(data); ++ u32 val; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_pm_update_state(kbdev); + -+#if !MALI_USE_CSF -+ kbase_backend_slot_update(kbdev); -+#endif /* !MALI_USE_CSF */ ++ if (!kbdev->pm.backend.gpu_powered) { ++ /* GPU is turned off - IRQ is not for us */ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return IRQ_NONE; ++ } + ++ val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS)); ++ ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ if (!kbdev->pm.backend.driver_ready_for_irqs) ++ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", ++ __func__, irq, val); ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} + -+#if MALI_USE_CSF -+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ lockdep_assert_held(&kbdev->pm.lock); ++ if (!val) ++ return IRQ_NONE; + -+ kbdev->pm.debug_core_mask = new_core_mask; -+ kbase_pm_update_dynamic_cores_onoff(kbdev); ++ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); ++ ++ kbase_gpu_interrupt(kbdev, val); ++ ++ return IRQ_HANDLED; +} -+KBASE_EXPORT_TEST_API(kbase_pm_set_debug_core_mask); -+#else -+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, -+ u64 new_core_mask_js0, u64 new_core_mask_js1, -+ u64 new_core_mask_js2) ++ ++static irq_handler_t kbase_handler_table[] = { ++ [JOB_IRQ_TAG] = kbase_job_irq_handler, ++ [MMU_IRQ_TAG] = kbase_mmu_irq_handler, ++ [GPU_IRQ_TAG] = kbase_gpu_irq_handler, ++}; ++ ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++#define JOB_IRQ_HANDLER JOB_IRQ_TAG ++#define GPU_IRQ_HANDLER GPU_IRQ_TAG ++ ++/** ++ * kbase_gpu_irq_test_handler - Variant (for test) of kbase_gpu_irq_handler() ++ * @irq: IRQ number ++ * @data: Data associated with this IRQ (i.e. kbdev) ++ * @val: Value of the GPU_CONTROL_REG(GPU_IRQ_STATUS) ++ * ++ * Handle the GPU device interrupt source requests reflected in the ++ * given source bit-pattern. The test code caller is responsible for ++ * undertaking the required device power maintenace. ++ * ++ * Return: IRQ_HANDLED if the requests are from the GPU device, ++ * IRQ_NONE otherwise ++ */ ++irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ lockdep_assert_held(&kbdev->pm.lock); ++ struct kbase_device *kbdev = kbase_untag(data); + -+ if (kbase_dummy_job_wa_enabled(kbdev)) { -+ dev_warn_once(kbdev->dev, "Change of core mask not supported for slot 0 as dummy job WA is enabled"); -+ new_core_mask_js0 = kbdev->pm.debug_core_mask[0]; -+ } ++ if (!val) ++ return IRQ_NONE; + -+ kbdev->pm.debug_core_mask[0] = new_core_mask_js0; -+ kbdev->pm.debug_core_mask[1] = new_core_mask_js1; -+ kbdev->pm.debug_core_mask[2] = new_core_mask_js2; -+ kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | -+ new_core_mask_js2; ++ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + -+ kbase_pm_update_dynamic_cores_onoff(kbdev); -+} -+#endif /* MALI_USE_CSF */ ++ kbase_gpu_interrupt(kbdev, val); + -+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev) -+{ -+ kbase_pm_update_active(kbdev); ++ return IRQ_HANDLED; +} + -+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev) -+{ -+ kbase_pm_update_active(kbdev); -+} ++KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler); + -+int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) ++/** ++ * kbase_set_custom_irq_handler - Set a custom IRQ handler ++ * @kbdev: Device for which the handler is to be registered ++ * @custom_handler: Handler to be registered ++ * @irq_type: Interrupt type ++ * ++ * Registers given interrupt handler for requested interrupt type ++ * In the case where irq handler is not specified, the default handler shall be ++ * registered ++ * ++ * Return: 0 case success, error code otherwise ++ */ ++int kbase_set_custom_irq_handler(struct kbase_device *kbdev, ++ irq_handler_t custom_handler, ++ int irq_type) +{ -+ int ret = 0; ++ int result = 0; ++ irq_handler_t requested_irq_handler = NULL; + -+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) -+ ret = kbase_pm_do_poweroff_sync(kbdev); -+ if (ret) -+ return ret; -+#else -+ /* Force power off the GPU and all cores (regardless of policy), only -+ * after the PM active count reaches zero (otherwise, we risk turning it -+ * off prematurely) -+ */ -+ kbase_pm_lock(kbdev); ++ KBASE_DEBUG_ASSERT((irq_type >= JOB_IRQ_HANDLER) && ++ (irq_type <= GPU_IRQ_HANDLER)); + -+ kbase_pm_do_poweroff(kbdev); ++ /* Release previous handler */ ++ if (kbdev->irqs[irq_type].irq) ++ free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type)); + -+#if !MALI_USE_CSF -+ kbase_backend_timer_suspend(kbdev); -+#endif /* !MALI_USE_CSF */ ++ requested_irq_handler = (custom_handler != NULL) ? ++ custom_handler : ++ kbase_handler_table[irq_type]; + -+ kbase_pm_unlock(kbdev); ++ if (request_irq(kbdev->irqs[irq_type].irq, requested_irq_handler, ++ kbdev->irqs[irq_type].flags | IRQF_SHARED, ++ dev_name(kbdev->dev), ++ kbase_tag(kbdev, irq_type)) != 0) { ++ result = -EINVAL; ++ dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", ++ kbdev->irqs[irq_type].irq, irq_type); ++#if IS_ENABLED(CONFIG_SPARSE_IRQ) ++ dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); ++#endif /* CONFIG_SPARSE_IRQ */ ++ } + -+ kbase_pm_wait_for_poweroff_work_complete(kbdev); -+#endif ++ return result; ++} + -+ WARN_ON(kbdev->pm.backend.gpu_powered); -+ WARN_ON(atomic_read(&kbdev->faults_pending)); ++KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler); + -+ if (kbdev->pm.backend.callback_power_suspend) -+ kbdev->pm.backend.callback_power_suspend(kbdev); ++/* test correct interrupt assigment and reception by cpu */ ++struct kbasep_irq_test { ++ struct hrtimer timer; ++ wait_queue_head_t wait; ++ int triggered; ++ u32 timeout; ++}; + -+ return ret; -+} ++static struct kbasep_irq_test kbasep_irq_test_data; + -+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) ++#define IRQ_TEST_TIMEOUT 500 ++ ++static irqreturn_t kbase_job_irq_test_handler(int irq, void *data) +{ -+ kbase_pm_lock(kbdev); ++ unsigned long flags; ++ struct kbase_device *kbdev = kbase_untag(data); ++ u32 val; + -+ kbdev->pm.suspending = false; -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ if (kbase_pm_is_gpu_lost(kbdev)) { -+ dev_dbg(kbdev->dev, "%s: GPU lost in progress\n", __func__); -+ kbase_pm_unlock(kbdev); -+ return; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ if (!kbdev->pm.backend.gpu_powered) { ++ /* GPU is turned off - IRQ is not for us */ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return IRQ_NONE; + } -+#endif -+ kbase_pm_do_poweron(kbdev, true); + -+#if !MALI_USE_CSF -+ kbase_backend_timer_resume(kbdev); -+#endif /* !MALI_USE_CSF */ ++ val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); + -+ wake_up_all(&kbdev->pm.resume_wait); -+ kbase_pm_unlock(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ if (!val) ++ return IRQ_NONE; ++ ++ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); ++ ++ kbasep_irq_test_data.triggered = 1; ++ wake_up(&kbasep_irq_test_data.wait); ++ ++ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val); ++ ++ return IRQ_HANDLED; +} + -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) ++static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) +{ + unsigned long flags; -+ ktime_t end_timestamp = ktime_get_raw(); -+ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ struct kbase_device *kbdev = kbase_untag(data); ++ u32 val; + -+ if (!kbdev->arb.arb_if) -+ return; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ mutex_lock(&kbdev->pm.lock); -+ mutex_lock(&arb_vm_state->vm_state_lock); -+ if (kbdev->pm.backend.gpu_powered && -+ !kbase_pm_is_gpu_lost(kbdev)) { -+ kbase_pm_set_gpu_lost(kbdev, true); ++ if (!kbdev->pm.backend.gpu_powered) { ++ /* GPU is turned off - IRQ is not for us */ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return IRQ_NONE; ++ } + -+ /* GPU is no longer mapped to VM. So no interrupts will -+ * be received and Mali registers have been replaced by -+ * dummy RAM -+ */ -+ WARN(!kbase_is_gpu_removed(kbdev), -+ "GPU is still available after GPU lost event\n"); ++ val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); + -+ /* Full GPU reset will have been done by hypervisor, so -+ * cancel -+ */ -+ atomic_set(&kbdev->hwaccess.backend.reset_gpu, -+ KBASE_RESET_GPU_NOT_PENDING); -+ hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); -+ kbase_synchronize_irqs(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ /* Clear all jobs running on the GPU */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->protected_mode = false; -+ kbase_backend_reset(kbdev, &end_timestamp); -+ kbase_pm_metrics_update(kbdev, NULL); -+ kbase_pm_update_state(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (!val) ++ return IRQ_NONE; + -+ /* Cancel any pending HWC dumps */ -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); -+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING || -+ kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; -+ kbdev->hwcnt.backend.triggered = 1; -+ wake_up(&kbdev->hwcnt.backend.wait); -+ } -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ } -+ mutex_unlock(&arb_vm_state->vm_state_lock); -+ mutex_unlock(&kbdev->pm.lock); -+} ++ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ kbasep_irq_test_data.triggered = 1; ++ wake_up(&kbasep_irq_test_data.wait); + -+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) -+int kbase_pm_force_mcu_wakeup_after_sleep(struct kbase_device *kbdev) -+{ -+ unsigned long flags; ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val); + -+ lockdep_assert_held(&kbdev->pm.lock); ++ return IRQ_HANDLED; ++} + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ /* Set the override flag to force the power up of L2 cache */ -+ kbdev->pm.backend.gpu_wakeup_override = true; -+ kbase_pm_update_state(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer) ++{ ++ struct kbasep_irq_test *test_data = container_of(timer, ++ struct kbasep_irq_test, timer); + -+ return kbase_pm_wait_for_desired_state(kbdev); ++ test_data->timeout = 1; ++ test_data->triggered = 1; ++ wake_up(&test_data->wait); ++ return HRTIMER_NORESTART; +} + -+static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev) ++static int kbasep_common_test_interrupt( ++ struct kbase_device * const kbdev, u32 tag) +{ -+ unsigned long flags; -+ int ret; -+ -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); -+ lockdep_assert_held(&kbdev->pm.lock); ++ int err = 0; ++ irq_handler_t test_handler; + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ /* In case of no active CSG on slot, powering up L2 could be skipped and -+ * proceed directly to suspend GPU. -+ * ToDo: firmware has to be reloaded after wake-up as no halt command -+ * has been sent when GPU was put to sleep mode. -+ */ -+ if (!kbase_csf_scheduler_get_nr_active_csgs(kbdev)) -+ dev_info( -+ kbdev->dev, -+ "No active CSGs. Can skip the power up of L2 and go for suspension directly"); -+#endif ++ u32 old_mask_val; ++ u16 mask_offset; ++ u16 rawstat_offset; + -+ ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev); -+ if (ret) { -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ dev_warn( -+ kbdev->dev, -+ "Waiting for MCU to wake up failed on runtime suspend"); -+ kbdev->pm.backend.gpu_wakeup_override = false; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ return ret; ++ switch (tag) { ++ case JOB_IRQ_TAG: ++ test_handler = kbase_job_irq_test_handler; ++ rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT); ++ mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK); ++ break; ++ case MMU_IRQ_TAG: ++ test_handler = kbase_mmu_irq_test_handler; ++ rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT); ++ mask_offset = MMU_REG(MMU_IRQ_MASK); ++ break; ++ case GPU_IRQ_TAG: ++ /* already tested by pm_driver - bail out */ ++ default: ++ return 0; + } + -+ /* Check if a Doorbell mirror interrupt occurred meanwhile */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ if (kbdev->pm.backend.gpu_sleep_mode_active && -+ kbdev->pm.backend.exit_gpu_sleep_mode) { -+ dev_dbg(kbdev->dev, "DB mirror interrupt occurred during runtime suspend after L2 power up"); -+ kbdev->pm.backend.gpu_wakeup_override = false; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ return -EBUSY; -+ } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ /* Need to release the kbdev->pm.lock to avoid lock ordering issue -+ * with kctx->reg.lock, which is taken if the sync wait condition is -+ * evaluated after the CSG suspend operation. -+ */ -+ kbase_pm_unlock(kbdev); -+ ret = kbase_csf_scheduler_handle_runtime_suspend(kbdev); -+ kbase_pm_lock(kbdev); ++ /* store old mask */ ++ old_mask_val = kbase_reg_read(kbdev, mask_offset); ++ /* mask interrupts */ ++ kbase_reg_write(kbdev, mask_offset, 0x0); + -+ /* Power down L2 cache */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->pm.backend.gpu_wakeup_override = false; -+ kbase_pm_update_state(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (kbdev->irqs[tag].irq) { ++ /* release original handler and install test handler */ ++ if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) { ++ err = -EINVAL; ++ } else { ++ kbasep_irq_test_data.timeout = 0; ++ hrtimer_init(&kbasep_irq_test_data.timer, ++ CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ kbasep_irq_test_data.timer.function = ++ kbasep_test_interrupt_timeout; + -+ /* After re-acquiring the kbdev->pm.lock, check if the device -+ * became active (or active then idle) meanwhile. -+ */ -+ if (kbdev->pm.active_count || -+ kbdev->pm.backend.poweroff_wait_in_progress) { -+ dev_dbg(kbdev->dev, -+ "Device became active on runtime suspend after suspending Scheduler"); -+ ret = -EBUSY; -+ } ++ /* trigger interrupt */ ++ kbase_reg_write(kbdev, mask_offset, 0x1); ++ kbase_reg_write(kbdev, rawstat_offset, 0x1); + -+ if (ret) -+ return ret; ++ hrtimer_start(&kbasep_irq_test_data.timer, ++ HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT), ++ HRTIMER_MODE_REL); + -+ ret = kbase_pm_wait_for_desired_state(kbdev); -+ if (ret) -+ dev_warn(kbdev->dev, "Wait for power down failed on runtime suspend"); ++ wait_event(kbasep_irq_test_data.wait, ++ kbasep_irq_test_data.triggered != 0); + -+ return ret; ++ if (kbasep_irq_test_data.timeout != 0) { ++ dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n", ++ kbdev->irqs[tag].irq, tag); ++ err = -EINVAL; ++ } else { ++ dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n", ++ kbdev->irqs[tag].irq, tag); ++ } ++ ++ hrtimer_cancel(&kbasep_irq_test_data.timer); ++ kbasep_irq_test_data.triggered = 0; ++ ++ /* mask interrupts */ ++ kbase_reg_write(kbdev, mask_offset, 0x0); ++ ++ /* release test handler */ ++ free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag)); ++ } ++ ++ /* restore original interrupt */ ++ if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag], ++ kbdev->irqs[tag].flags | IRQF_SHARED, ++ dev_name(kbdev->dev), kbase_tag(kbdev, tag))) { ++ dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n", ++ kbdev->irqs[tag].irq, tag); ++ err = -EINVAL; ++ } ++ } ++ /* restore old mask */ ++ kbase_reg_write(kbdev, mask_offset, old_mask_val); ++ ++ return err; +} + -+int kbase_pm_handle_runtime_suspend(struct kbase_device *kbdev) ++int kbasep_common_test_interrupt_handlers( ++ struct kbase_device * const kbdev) +{ -+ enum kbase_mcu_state mcu_state; -+ bool exit_early = false; -+ unsigned long flags; -+ int ret = 0; ++ int err; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ /* This check is needed for the case where Kbase had invoked the -+ * @power_off_callback directly. -+ */ -+ if (!kbdev->pm.backend.gpu_powered) { -+ dev_dbg(kbdev->dev, "GPU already powered down on runtime suspend"); -+ exit_early = true; -+ } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ init_waitqueue_head(&kbasep_irq_test_data.wait); ++ kbasep_irq_test_data.triggered = 0; + -+ if (exit_early) -+ goto out; ++ /* A suspend won't happen during startup/insmod */ ++ kbase_pm_context_active(kbdev); + -+ ret = kbase_reset_gpu_try_prevent(kbdev); -+ if (ret == -ENOMEM) { -+ dev_dbg(kbdev->dev, "Quit runtime suspend as GPU is in bad state"); -+ /* Finish the runtime suspend, no point in trying again as GPU is -+ * in irrecoverable bad state. -+ */ ++ err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG); ++ if (err) { ++ dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n"); + goto out; -+ } else if (ret) { -+ dev_dbg(kbdev->dev, "Quit runtime suspend for failing to prevent gpu reset"); -+ ret = -EBUSY; ++ } ++ ++ err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG); ++ if (err) { ++ dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n"); + goto out; + } + -+ kbase_csf_scheduler_lock(kbdev); -+ kbase_pm_lock(kbdev); ++ dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n"); + -+ /* -+ * This is to handle the case where GPU device becomes active and idle -+ * very quickly whilst the runtime suspend callback is executing. -+ * This is useful for the following scenario :- -+ * - GPU goes idle and pm_callback_runtime_gpu_idle() is called. -+ * - Auto-suspend timer expires and kbase_device_runtime_suspend() -+ * is called. -+ * - GPU becomes active and pm_callback_runtime_gpu_active() calls -+ * pm_runtime_get(). -+ * - Shortly after that GPU becomes idle again. -+ * - kbase_pm_handle_runtime_suspend() gets called. -+ * - pm_callback_runtime_gpu_idle() is called. -+ * -+ * We do not want to power down the GPU immediately after it goes idle. -+ * So if we notice that GPU had become active when the runtime suspend -+ * had already kicked in, we abort the runtime suspend. -+ * By aborting the runtime suspend, we defer the power down of GPU. -+ * -+ * This check also helps prevent warnings regarding L2 and MCU states -+ * inside the pm_handle_power_off() function. The warning stems from -+ * the fact that pm.lock is released before invoking Scheduler function -+ * to suspend the CSGs. -+ */ -+ if (kbdev->pm.active_count || -+ kbdev->pm.backend.poweroff_wait_in_progress) { -+ dev_dbg(kbdev->dev, "Device became active on runtime suspend"); -+ ret = -EBUSY; -+ goto unlock; -+ } ++ out: ++ kbase_pm_context_idle(kbdev); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ if (kbdev->pm.backend.gpu_sleep_mode_active && -+ kbdev->pm.backend.exit_gpu_sleep_mode) { -+ dev_dbg(kbdev->dev, "DB mirror interrupt occurred during runtime suspend before L2 power up"); -+ ret = -EBUSY; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ goto unlock; -+ } ++ return err; ++} ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ + -+ mcu_state = kbdev->pm.backend.mcu_state; -+ WARN_ON(!kbase_pm_is_mcu_inactive(kbdev, mcu_state)); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++int kbase_install_interrupts(struct kbase_device *kbdev) ++{ ++ u32 nr = ARRAY_SIZE(kbase_handler_table); ++ int err; ++ u32 i; + -+ if (mcu_state == KBASE_MCU_IN_SLEEP) { -+ ret = pm_handle_mcu_sleep_on_runtime_suspend(kbdev); -+ if (ret) -+ goto unlock; ++ for (i = 0; i < nr; i++) { ++ err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i], ++ kbdev->irqs[i].flags | IRQF_SHARED, ++ dev_name(kbdev->dev), ++ kbase_tag(kbdev, i)); ++ if (err) { ++ dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", ++ kbdev->irqs[i].irq, i); ++#if IS_ENABLED(CONFIG_SPARSE_IRQ) ++ dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); ++#endif /* CONFIG_SPARSE_IRQ */ ++ goto release; ++ } + } + -+ /* Disable interrupts and turn off the GPU clocks */ -+ if (!kbase_pm_clock_off(kbdev)) { -+ dev_warn(kbdev->dev, "Failed to turn off GPU clocks on runtime suspend, MMU faults pending"); ++ return 0; + -+ WARN_ON(!kbdev->poweroff_pending); -+ /* Previous call to kbase_pm_clock_off() would have disabled -+ * the interrupts and also synchronized with the interrupt -+ * handlers, so more fault work items can't be enqueued. -+ * -+ * Can't wait for the completion of MMU fault work items as -+ * there is a possibility of a deadlock since the fault work -+ * items would do the group termination which requires the -+ * Scheduler lock. -+ */ -+ ret = -EBUSY; -+ goto unlock; -+ } ++ release: ++ while (i-- > 0) ++ free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); + -+ wake_up(&kbdev->pm.backend.poweroff_wait); -+ WARN_ON(kbdev->pm.backend.gpu_powered); -+ dev_dbg(kbdev->dev, "GPU power down complete"); ++ return err; ++} + -+unlock: -+ kbase_pm_unlock(kbdev); -+ kbase_csf_scheduler_unlock(kbdev); -+ kbase_reset_gpu_allow(kbdev); -+out: -+ if (ret) { -+ ret = -EBUSY; -+ pm_runtime_mark_last_busy(kbdev->dev); ++void kbase_release_interrupts(struct kbase_device *kbdev) ++{ ++ u32 nr = ARRAY_SIZE(kbase_handler_table); ++ u32 i; ++ ++ for (i = 0; i < nr; i++) { ++ if (kbdev->irqs[i].irq) ++ free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); + } ++} + -+ return ret; ++void kbase_synchronize_irqs(struct kbase_device *kbdev) ++{ ++ u32 nr = ARRAY_SIZE(kbase_handler_table); ++ u32 i; ++ ++ for (i = 0; i < nr; i++) { ++ if (kbdev->irqs[i].irq) ++ synchronize_irq(kbdev->irqs[i].irq); ++ } +} -+#endif -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c ++ ++KBASE_EXPORT_TEST_API(kbase_synchronize_irqs); ++ ++#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c new file mode 100644 -index 000000000..e2b0a9192 +index 000000000..258dc6dac --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c -@@ -0,0 +1,150 @@ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c +@@ -0,0 +1,241 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -129113,144 +129121,235 @@ index 000000000..e2b0a9192 + */ + +/* -+ * Base kernel core availability APIs ++ * Register backend context / address space management + */ + +#include -+#include -+#include -+#include -+#include ++#include ++#include + -+int kbase_pm_ca_init(struct kbase_device *kbdev) ++/** ++ * assign_and_activate_kctx_addr_space - Assign an AS to a context ++ * @kbdev: Kbase device ++ * @kctx: Kbase context ++ * @current_as: Address Space to assign ++ * ++ * Assign an Address Space (AS) to a context, and add the context to the Policy. ++ * ++ * This includes ++ * setting up the global runpool_irq structure and the context on the AS, ++ * Activating the MMU on the AS, ++ * Allowing jobs to be submitted on the AS. ++ * ++ * Context: ++ * kbasep_js_kctx_info.jsctx_mutex held, ++ * kbasep_js_device_data.runpool_mutex held, ++ * AS transaction mutex held, ++ * Runpool IRQ lock held ++ */ ++static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ struct kbase_as *current_as) +{ -+#ifdef CONFIG_MALI_BIFROST_DEVFREQ -+ struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + -+ if (kbdev->current_core_mask) -+ pm_backend->ca_cores_enabled = kbdev->current_core_mask; -+ else -+ pm_backend->ca_cores_enabled = -+ kbdev->gpu_props.props.raw_props.shader_present; ++ lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); ++ lockdep_assert_held(&js_devdata->runpool_mutex); ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++#if !MALI_USE_CSF ++ /* Attribute handling */ ++ kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx); +#endif + -+ return 0; ++ /* Allow it to run jobs */ ++ kbasep_js_set_submit_allowed(js_devdata, kctx); ++ ++ kbase_js_runpool_inc_context_count(kbdev, kctx); +} + -+void kbase_pm_ca_term(struct kbase_device *kbdev) ++bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_context *kctx, ++ unsigned int js) ++{ ++ int i; ++ ++ if (kbdev->hwaccess.active_kctx[js] == kctx) { ++ /* Context is already active */ ++ return true; ++ } ++ ++ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { ++ if (kbdev->as_to_kctx[i] == kctx) { ++ /* Context already has ASID - mark as active */ ++ return true; ++ } ++ } ++ ++ /* Context does not have address space assigned */ ++ return false; ++} ++ ++void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ ++ int as_nr = kctx->as_nr; ++ ++ if (as_nr == KBASEP_AS_NR_INVALID) { ++ WARN(1, "Attempting to release context without ASID\n"); ++ return; ++ } ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ if (atomic_read(&kctx->refcount) != 1) { ++ WARN(1, "Attempting to release active ASID\n"); ++ return; ++ } ++ ++ kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx); ++ ++ kbase_ctx_sched_release_ctx(kctx); ++ kbase_js_runpool_dec_context_count(kbdev, kctx); ++} ++ ++void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, ++ struct kbase_context *kctx) +{ +} + -+#ifdef CONFIG_MALI_BIFROST_DEVFREQ -+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) ++int kbase_backend_find_and_release_free_address_space( ++ struct kbase_device *kbdev, struct kbase_context *kctx) +{ -+ struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; ++ struct kbasep_js_device_data *js_devdata; ++ struct kbasep_js_kctx_info *js_kctx_info; + unsigned long flags; -+#if MALI_USE_CSF -+ u64 old_core_mask = 0; -+#endif ++ int i; ++ ++ js_devdata = &kbdev->js_data; ++ js_kctx_info = &kctx->jctx.sched_info; ++ ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_lock(&js_devdata->runpool_mutex); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+#if MALI_USE_CSF -+ if (!(core_mask & kbdev->pm.debug_core_mask)) { -+ dev_err(kbdev->dev, -+ "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n", -+ core_mask, kbdev->pm.debug_core_mask); -+ goto unlock; -+ } ++ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { ++ struct kbasep_js_kctx_info *as_js_kctx_info; ++ struct kbase_context *as_kctx; + -+ old_core_mask = pm_backend->ca_cores_enabled; -+#else -+ if (!(core_mask & kbdev->pm.debug_core_mask_all)) { -+ dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n", -+ core_mask, kbdev->pm.debug_core_mask_all); -+ goto unlock; -+ } ++ as_kctx = kbdev->as_to_kctx[i]; ++ as_js_kctx_info = &as_kctx->jctx.sched_info; + -+ if (kbase_dummy_job_wa_enabled(kbdev)) { -+ dev_err_once(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled"); -+ goto unlock; -+ } -+#endif /* MALI_USE_CSF */ -+ pm_backend->ca_cores_enabled = core_mask; ++ /* Don't release privileged or active contexts, or contexts with ++ * jobs running. ++ * Note that a context will have at least 1 reference (which ++ * was previously taken by kbasep_js_schedule_ctx()) until ++ * descheduled. ++ */ ++ if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) && ++ atomic_read(&as_kctx->refcount) == 1) { ++ if (!kbase_ctx_sched_inc_refcount_nolock(as_kctx)) { ++ WARN(1, "Failed to retain active context\n"); + -+ kbase_pm_update_state(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, ++ flags); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + -+#if MALI_USE_CSF -+ /* Check if old_core_mask contained the undesired cores and wait -+ * for those cores to get powered down -+ */ -+ if ((core_mask & old_core_mask) != old_core_mask) { -+ if (kbase_pm_wait_for_cores_down_scale(kbdev)) { -+ dev_warn(kbdev->dev, -+ "Wait for update of core_mask from %llx to %llx failed", -+ old_core_mask, core_mask); ++ return KBASEP_AS_NR_INVALID; ++ } ++ ++ kbasep_js_clear_submit_allowed(js_devdata, as_kctx); ++ ++ /* Drop and retake locks to take the jsctx_mutex on the ++ * context we're about to release without violating lock ++ * ordering ++ */ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ ++ ++ /* Release context from address space */ ++ mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex); ++ mutex_lock(&js_devdata->runpool_mutex); ++ ++ kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx); ++ ++ if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) { ++ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, ++ as_kctx, ++ true); ++ ++ mutex_unlock(&js_devdata->runpool_mutex); ++ mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); ++ ++ return i; ++ } ++ ++ /* Context was retained while locks were dropped, ++ * continue looking for free AS ++ */ ++ mutex_unlock(&js_devdata->runpool_mutex); ++ mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); ++ ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_lock(&js_devdata->runpool_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + } + } -+#endif -+ -+ dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n", -+ pm_backend->ca_cores_enabled); + -+ return; -+unlock: + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ mutex_unlock(&js_devdata->runpool_mutex); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ ++ return KBASEP_AS_NR_INVALID; +} -+KBASE_EXPORT_TEST_API(kbase_devfreq_set_core_mask); -+#endif + -+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) ++bool kbase_backend_use_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ int as_nr) +{ -+#if MALI_USE_CSF -+ u64 debug_core_mask = kbdev->pm.debug_core_mask; -+#else -+ u64 debug_core_mask = kbdev->pm.debug_core_mask_all; -+#endif ++ struct kbasep_js_device_data *js_devdata; ++ struct kbase_as *new_address_space = NULL; ++ int js; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ js_devdata = &kbdev->js_data; + -+#ifdef CONFIG_MALI_BIFROST_DEVFREQ -+ /* -+ * Although in the init we let the pm_backend->ca_cores_enabled to be -+ * the max config (it uses the base_gpu_props), at this function we need -+ * to limit it to be a subgroup of the curr config, otherwise the -+ * shaders state machine on the PM does not evolve. -+ */ -+ return kbdev->gpu_props.curr_config.shader_present & -+ kbdev->pm.backend.ca_cores_enabled & -+ debug_core_mask; -+#else -+ return kbdev->gpu_props.curr_config.shader_present & -+ debug_core_mask; -+#endif -+} ++ for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { ++ if (kbdev->hwaccess.active_kctx[js] == kctx) { ++ WARN(1, "Context is already scheduled in\n"); ++ return false; ++ } ++ } + -+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask); ++ new_address_space = &kbdev->as[as_nr]; + -+u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev) -+{ ++ lockdep_assert_held(&js_devdata->runpool_mutex); ++ lockdep_assert_held(&kbdev->mmu_hw_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ return (((1ull) << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1); -+#elif MALI_USE_CSF -+ return kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); -+#else -+ return kbdev->pm.backend.pm_shaders_core_mask; -+#endif ++ assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space); ++ ++ if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) { ++ /* We need to retain it to keep the corresponding address space ++ */ ++ kbase_ctx_sched_retain_ctx_refcount(kctx); ++ } ++ ++ return true; +} -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.h +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h new file mode 100644 -index 000000000..90dcaf5b6 +index 000000000..136aa526d --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.h -@@ -0,0 +1,88 @@ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h +@@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -129269,147 +129368,129 @@ index 000000000..90dcaf5b6 + */ + +/* -+ * Base kernel core availability APIs ++ * Register-based HW access backend specific definitions + */ + -+#ifndef _KBASE_PM_CA_H_ -+#define _KBASE_PM_CA_H_ ++#ifndef _KBASE_HWACCESS_GPU_DEFS_H_ ++#define _KBASE_HWACCESS_GPU_DEFS_H_ + -+/** -+ * kbase_pm_ca_init - Initialize core availability framework -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Must be called before calling any other core availability function -+ * -+ * Return: 0 if the core availability framework was successfully initialized, -+ * -errno otherwise -+ */ -+int kbase_pm_ca_init(struct kbase_device *kbdev); ++/* SLOT_RB_SIZE must be < 256 */ ++#define SLOT_RB_SIZE 2 ++#define SLOT_RB_MASK (SLOT_RB_SIZE - 1) + +/** -+ * kbase_pm_ca_term - Terminate core availability framework -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * struct rb_entry - Ringbuffer entry ++ * @katom: Atom associated with this entry + */ -+void kbase_pm_ca_term(struct kbase_device *kbdev); ++struct rb_entry { ++ struct kbase_jd_atom *katom; ++}; + -+/** -+ * kbase_pm_ca_get_core_mask - Get currently available shaders core mask -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Returns a mask of the currently available shader cores. -+ * Calls into the core availability policy -+ * -+ * Return: The bit mask of available cores ++/* SLOT_RB_TAG_PURGED assumes a value that is different from ++ * NULL (SLOT_RB_NULL_TAG_VAL) and will not be the result of ++ * any valid pointer via macro translation: SLOT_RB_TAG_KCTX(x). + */ -+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev); ++#define SLOT_RB_TAG_PURGED ((u64)(1 << 1)) ++#define SLOT_RB_NULL_TAG_VAL ((u64)0) + +/** -+ * kbase_pm_ca_update_core_status - Update core status -+ * -+ * @kbdev: The kbase device structure for the device (must be -+ * a valid pointer) -+ * @cores_ready: The bit mask of cores ready for job submission -+ * @cores_transitioning: The bit mask of cores that are transitioning power -+ * state -+ * -+ * Update core availability policy with current core power status -+ * -+ * Calls into the core availability policy ++ * SLOT_RB_TAG_KCTX() - a function-like macro for converting a pointer to a ++ * u64 for serving as tagged value. ++ * @kctx: Pointer to kbase context. + */ -+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, -+ u64 cores_transitioning); -+ ++#define SLOT_RB_TAG_KCTX(kctx) (u64)((uintptr_t)(kctx)) +/** -+ * kbase_pm_ca_get_instr_core_mask - Get the PM state sync-ed shaders core mask -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Returns a mask of the PM state synchronised shader cores for arranging -+ * HW performance counter dumps -+ * -+ * Return: The bit mask of PM state synchronised cores ++ * struct slot_rb - Slot ringbuffer ++ * @entries: Ringbuffer entries ++ * @last_kctx_tagged: The last context that submitted a job to the slot's ++ * HEAD_NEXT register. The value is a tagged variant so ++ * must not be dereferenced. It is used in operation to ++ * track when shader core L1 caches might contain a ++ * previous context's data, and so must only be set to ++ * SLOT_RB_NULL_TAG_VAL after reset/powerdown of the ++ * cores. In slot job submission, if there is a kctx ++ * change, and the relevant katom is configured with ++ * BASE_JD_REQ_SKIP_CACHE_START, a L1 read only cache ++ * maintenace operation is enforced. ++ * @read_idx: Current read index of buffer ++ * @write_idx: Current write index of buffer ++ * @job_chain_flag: Flag used to implement jobchain disambiguation + */ -+u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev); ++struct slot_rb { ++ struct rb_entry entries[SLOT_RB_SIZE]; + -+#endif /* _KBASE_PM_CA_H_ */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca_devfreq.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca_devfreq.h -new file mode 100644 -index 000000000..d1e4b5327 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca_devfreq.h -@@ -0,0 +1,59 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ u64 last_kctx_tagged; + -+/* -+ * A core availability policy for use with devfreq, where core masks are -+ * associated with OPPs. -+ */ ++ u8 read_idx; ++ u8 write_idx; + -+#ifndef MALI_KBASE_PM_CA_DEVFREQ_H -+#define MALI_KBASE_PM_CA_DEVFREQ_H ++ u8 job_chain_flag; ++}; + +/** -+ * struct kbasep_pm_ca_policy_devfreq - Private structure for devfreq ca policy -+ * -+ * @cores_desired: Cores that the policy wants to be available -+ * @cores_enabled: Cores that the policy is currently returning as available -+ * @cores_used: Cores currently powered or transitioning ++ * struct kbase_backend_data - GPU backend specific data for HW access layer ++ * @slot_rb: Slot ringbuffers ++ * @scheduling_timer: The timer tick used for rescheduling jobs ++ * @timer_running: Is the timer running? The runpool_mutex must be ++ * held whilst modifying this. ++ * @suspend_timer: Is the timer suspended? Set when a suspend ++ * occurs and cleared on resume. The runpool_mutex ++ * must be held whilst modifying this. ++ * @reset_gpu: Set to a KBASE_RESET_xxx value (see comments) ++ * @reset_workq: Work queue for performing the reset ++ * @reset_work: Work item for performing the reset ++ * @reset_wait: Wait event signalled when the reset is complete ++ * @reset_timer: Timeout for soft-stops before the reset ++ * @timeouts_updated: Have timeout values just been updated? + * -+ * This contains data that is private to the devfreq core availability -+ * policy. ++ * The hwaccess_lock (a spinlock) must be held when accessing this structure + */ -+struct kbasep_pm_ca_policy_devfreq { -+ u64 cores_desired; -+ u64 cores_enabled; -+ u64 cores_used; -+}; ++struct kbase_backend_data { ++#if !MALI_USE_CSF ++ struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS]; ++ struct hrtimer scheduling_timer; + -+extern const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops; ++ bool timer_running; ++#endif ++ bool suspend_timer; + -+/** -+ * kbase_devfreq_set_core_mask - Set core mask for policy to use -+ * @kbdev: Device pointer -+ * @core_mask: New core mask -+ * -+ * The new core mask will have immediate effect if the GPU is powered, or will -+ * take effect when it is next powered on. ++ atomic_t reset_gpu; ++ ++/* The GPU reset isn't pending */ ++#define KBASE_RESET_GPU_NOT_PENDING 0 ++/* kbase_prepare_to_reset_gpu has been called */ ++#define KBASE_RESET_GPU_PREPARED 1 ++/* kbase_reset_gpu has been called - the reset will now definitely happen ++ * within the timeout period + */ -+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask); ++#define KBASE_RESET_GPU_COMMITTED 2 ++/* The GPU reset process is currently occuring (timeout has expired or ++ * kbasep_try_reset_gpu_early was called) ++ */ ++#define KBASE_RESET_GPU_HAPPENING 3 ++/* Reset the GPU silently, used when resetting the GPU as part of normal ++ * behavior (e.g. when exiting protected mode). ++ */ ++#define KBASE_RESET_GPU_SILENT 4 ++ struct workqueue_struct *reset_workq; ++ struct work_struct reset_work; ++ wait_queue_head_t reset_wait; ++ struct hrtimer reset_timer; + -+#endif /* MALI_KBASE_PM_CA_DEVFREQ_H */ ++ bool timeouts_updated; ++}; + -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.c ++#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c new file mode 100644 -index 000000000..f40b75385 +index 000000000..c7257117e --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.c -@@ -0,0 +1,67 @@ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c +@@ -0,0 +1,1513 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -129428,4209 +129509,3540 @@ index 000000000..f40b75385 + */ + +/* -+ * "Coarse Demand" power management policy ++ * Base kernel job manager APIs + */ + +#include -+#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+static bool coarse_demand_shaders_needed(struct kbase_device *kbdev) -+{ -+ return kbase_pm_is_active(kbdev); -+} ++static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev); ++static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, ++ const u64 affinity, const u64 limited_core_mask); + -+static bool coarse_demand_get_core_active(struct kbase_device *kbdev) ++static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req core_req, ++ unsigned int js, const u64 limited_core_mask) +{ -+ return kbase_pm_is_active(kbdev); -+} ++ u64 affinity; ++ bool skip_affinity_check = false; + -+static void coarse_demand_init(struct kbase_device *kbdev) -+{ -+ CSTD_UNUSED(kbdev); -+} ++ if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == ++ BASE_JD_REQ_T) { ++ /* Tiler-only atom, affinity value can be programed as 0 */ ++ affinity = 0; ++ skip_affinity_check = true; ++ } else if ((core_req & (BASE_JD_REQ_COHERENT_GROUP | ++ BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) { ++ unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; ++ struct mali_base_gpu_coherent_group_info *coherency_info = ++ &kbdev->gpu_props.props.coherency_info; + -+static void coarse_demand_term(struct kbase_device *kbdev) -+{ -+ CSTD_UNUSED(kbdev); -+} ++ affinity = kbdev->pm.backend.shaders_avail & ++ kbdev->pm.debug_core_mask[js]; + -+/* The struct kbase_pm_policy structure for the demand power policy. -+ * -+ * This is the static structure that defines the demand power policy's callback -+ * and name. -+ */ -+const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = { -+ "coarse_demand", /* name */ -+ coarse_demand_init, /* init */ -+ coarse_demand_term, /* term */ -+ coarse_demand_shaders_needed, /* shaders_needed */ -+ coarse_demand_get_core_active, /* get_core_active */ -+ NULL, /* handle_event */ -+ KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */ -+#if MALI_USE_CSF -+ COARSE_ON_DEMAND_PM_SCHED_FLAGS, /* pm_sched_flags */ -+#endif -+}; ++ /* JS2 on a dual core group system targets core group 1. All ++ * other cases target core group 0. ++ */ ++ if (js == 2 && num_core_groups > 1) ++ affinity &= coherency_info->group[1].core_mask; ++ else if (num_core_groups > 1) ++ affinity &= coherency_info->group[0].core_mask; ++ else ++ affinity &= kbdev->gpu_props.curr_config.shader_present; ++ } else { ++ /* Use all cores */ ++ affinity = kbdev->pm.backend.shaders_avail & ++ kbdev->pm.debug_core_mask[js]; ++ } + -+KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops); -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.h -new file mode 100644 -index 000000000..a947e8f55 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.h -@@ -0,0 +1,64 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2012-2015, 2018, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) { ++ /* Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK by applying the limited core mask. */ ++ affinity = kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask); ++ } + -+/* -+ * "Coarse Demand" power management policy -+ */ ++ if (unlikely(!affinity && !skip_affinity_check)) { ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ u64 shaders_ready = ++ kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + -+#ifndef MALI_KBASE_PM_COARSE_DEMAND_H -+#define MALI_KBASE_PM_COARSE_DEMAND_H ++ WARN_ON(!(shaders_ready & kbdev->pm.backend.shaders_avail)); ++#endif + -+/** -+ * DOC: -+ * The "Coarse" demand power management policy has the following -+ * characteristics: -+ * - When KBase indicates that the GPU will be powered up, but we don't yet -+ * know which Job Chains are to be run: -+ * - Shader Cores are powered up, regardless of whether or not they will be -+ * needed later. -+ * - When KBase indicates that Shader Cores are needed to submit the currently -+ * queued Job Chains: -+ * - Shader Cores are kept powered, regardless of whether or not they will -+ * be needed -+ * - When KBase indicates that the GPU need not be powered: -+ * - The Shader Cores are powered off, and the GPU itself is powered off too. -+ * -+ * @note: -+ * - KBase indicates the GPU will be powered up when it has a User Process that -+ * has just started to submit Job Chains. -+ * - KBase indicates the GPU need not be powered when all the Job Chains from -+ * User Processes have finished, and it is waiting for a User Process to -+ * submit some more Job Chains. -+ */ ++ affinity = kbdev->pm.backend.shaders_avail; + -+/** -+ * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand -+ * policy -+ * @dummy: Dummy member - no state needed -+ * This contains data that is private to the coarse demand power policy. -+ */ -+struct kbasep_pm_policy_coarse_demand { -+ int dummy; -+}; ++ if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) { ++ /* Limiting affinity again to make sure it only enables shader cores with backed TLS memory. */ ++ affinity = kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask); + -+extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops; ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ /* affinity should never be 0 */ ++ WARN_ON(!affinity); ++#endif ++ } ++ } + -+#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h -new file mode 100644 -index 000000000..75d99a30e ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h -@@ -0,0 +1,649 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), ++ affinity & 0xFFFFFFFF); ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), ++ affinity >> 32); ++ ++ return affinity; ++} ++ ++/** ++ * select_job_chain() - Select which job chain to submit to the GPU ++ * @katom: Pointer to the atom about to be submitted to the GPU + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * Selects one of the fragment job chains attached to the special atom at the ++ * end of a renderpass, or returns the address of the single job chain attached ++ * to any other type of atom. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * Which job chain is selected depends upon whether the tiling phase of the ++ * renderpass completed normally or was soft-stopped because it used too ++ * much memory. It also depends upon whether one of the fragment job chains ++ * has already been run as part of the same renderpass. + * ++ * Return: GPU virtual address of the selected job chain + */ ++static u64 select_job_chain(struct kbase_jd_atom *katom) ++{ ++ struct kbase_context *const kctx = katom->kctx; ++ u64 jc = katom->jc; ++ struct kbase_jd_renderpass *rp; + -+/* -+ * Backend-specific Power Manager definitions -+ */ ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+#ifndef _KBASE_PM_HWACCESS_DEFS_H_ -+#define _KBASE_PM_HWACCESS_DEFS_H_ ++ if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) ++ return jc; + -+#include "mali_kbase_pm_always_on.h" -+#include "mali_kbase_pm_coarse_demand.h" ++ compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= ++ ARRAY_SIZE(kctx->jctx.renderpasses), ++ "Should check invalid access to renderpasses"); + -+#if defined(CONFIG_PM_RUNTIME) || defined(CONFIG_PM) -+#define KBASE_PM_RUNTIME 1 -+#endif ++ rp = &kctx->jctx.renderpasses[katom->renderpass_id]; ++ /* We can read a subset of renderpass state without holding ++ * higher-level locks (but not end_katom, for example). ++ * If the end-of-renderpass atom is running with as-yet indeterminate ++ * OOM state then assume that the start atom was not soft-stopped. ++ */ ++ switch (rp->state) { ++ case KBASE_JD_RP_OOM: ++ /* Tiling ran out of memory. ++ * Start of incremental rendering, used once. ++ */ ++ jc = katom->jc_fragment.norm_read_forced_write; ++ break; ++ case KBASE_JD_RP_START: ++ case KBASE_JD_RP_PEND_OOM: ++ /* Tiling completed successfully first time. ++ * Single-iteration rendering, used once. ++ */ ++ jc = katom->jc_fragment.norm_read_norm_write; ++ break; ++ case KBASE_JD_RP_RETRY_OOM: ++ /* Tiling ran out of memory again. ++ * Continuation of incremental rendering, used as ++ * many times as required. ++ */ ++ jc = katom->jc_fragment.forced_read_forced_write; ++ break; ++ case KBASE_JD_RP_RETRY: ++ case KBASE_JD_RP_RETRY_PEND_OOM: ++ /* Tiling completed successfully this time. ++ * End of incremental rendering, used once. ++ */ ++ jc = katom->jc_fragment.forced_read_norm_write; ++ break; ++ default: ++ WARN_ON(1); ++ break; ++ } + -+/* Forward definition - see mali_kbase.h */ -+struct kbase_device; -+struct kbase_jd_atom; ++ dev_dbg(kctx->kbdev->dev, ++ "Selected job chain 0x%llx for end atom %pK in state %d\n", ++ jc, (void *)katom, (int)rp->state); + -+/** -+ * enum kbase_pm_core_type - The types of core in a GPU. -+ * -+ * @KBASE_PM_CORE_L2: The L2 cache -+ * @KBASE_PM_CORE_SHADER: Shader cores -+ * @KBASE_PM_CORE_TILER: Tiler cores -+ * @KBASE_PM_CORE_STACK: Core stacks -+ * -+ * These enumerated values are used in calls to -+ * - kbase_pm_get_present_cores() -+ * - kbase_pm_get_active_cores() -+ * - kbase_pm_get_trans_cores() -+ * - kbase_pm_get_ready_cores(). -+ * -+ * They specify which type of core should be acted on. These values are set in -+ * a manner that allows core_type_to_reg() function to be simpler and more -+ * efficient. -+ */ -+enum kbase_pm_core_type { -+ KBASE_PM_CORE_L2 = L2_PRESENT_LO, -+ KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO, -+ KBASE_PM_CORE_TILER = TILER_PRESENT_LO, -+ KBASE_PM_CORE_STACK = STACK_PRESENT_LO -+}; ++ katom->jc = jc; ++ return jc; ++} + -+/* -+ * enum kbase_l2_core_state - The states used for the L2 cache & tiler power -+ * state machine. -+ */ -+enum kbase_l2_core_state { -+#define KBASEP_L2_STATE(n) KBASE_L2_ ## n, -+#include "mali_kbase_pm_l2_states.h" -+#undef KBASEP_L2_STATE -+}; ++static inline bool kbasep_jm_wait_js_free(struct kbase_device *kbdev, unsigned int js, ++ struct kbase_context *kctx) ++{ ++ const ktime_t wait_loop_start = ktime_get_raw(); ++ const s64 max_timeout = (s64)kbdev->js_data.js_free_wait_time_ms; ++ s64 diff = 0; + -+#if MALI_USE_CSF -+/* -+ * enum kbase_mcu_state - The states used for the MCU state machine. -+ */ -+enum kbase_mcu_state { -+#define KBASEP_MCU_STATE(n) KBASE_MCU_ ## n, -+#include "mali_kbase_pm_mcu_states.h" -+#undef KBASEP_MCU_STATE -+}; -+#endif ++ /* wait for the JS_COMMAND_NEXT register to reach the given status value */ ++ do { ++ if (!kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT))) ++ return true; + -+/* -+ * enum kbase_shader_core_state - The states used for the shaders' state machine. -+ */ -+enum kbase_shader_core_state { -+#define KBASEP_SHADER_STATE(n) KBASE_SHADERS_ ## n, -+#include "mali_kbase_pm_shader_states.h" -+#undef KBASEP_SHADER_STATE -+}; ++ diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start)); ++ } while (diff < max_timeout); + -+/** -+ * struct kbasep_pm_metrics - Metrics data collected for use by the power -+ * management framework. -+ * -+ * @time_busy: the amount of time the GPU was busy executing jobs since the -+ * @time_period_start timestamp, in units of 256ns. This also includes -+ * time_in_protm, the time spent in protected mode, since it's assumed -+ * the GPU was busy 100% during this period. -+ * @time_idle: the amount of time the GPU was not executing jobs since the -+ * time_period_start timestamp, measured in units of 256ns. -+ * @time_in_protm: The amount of time the GPU has spent in protected mode since -+ * the time_period_start timestamp, measured in units of 256ns. -+ * @busy_cl: the amount of time the GPU was busy executing CL jobs. Note that -+ * if two CL jobs were active for 256ns, this value would be updated -+ * with 2 (2x256ns). -+ * @busy_gl: the amount of time the GPU was busy executing GL jobs. Note that -+ * if two GL jobs were active for 256ns, this value would be updated -+ * with 2 (2x256ns). -+ */ -+struct kbasep_pm_metrics { -+ u32 time_busy; -+ u32 time_idle; -+#if MALI_USE_CSF -+ u32 time_in_protm; -+#else -+ u32 busy_cl[2]; -+ u32 busy_gl; -+#endif -+}; ++ dev_err(kbdev->dev, "Timeout in waiting for job slot %u to become free for ctx %d_%u", js, ++ kctx->tgid, kctx->id); + -+/** -+ * struct kbasep_pm_metrics_state - State required to collect the metrics in -+ * struct kbasep_pm_metrics -+ * @time_period_start: time at which busy/idle measurements started -+ * @ipa_control_client: Handle returned on registering DVFS as a -+ * kbase_ipa_control client -+ * @skip_gpu_active_sanity_check: Decide whether to skip GPU_ACTIVE sanity -+ * check in DVFS utilisation calculation -+ * @gpu_active: true when the GPU is executing jobs. false when -+ * not. Updated when the job scheduler informs us a job in submitted -+ * or removed from a GPU slot. -+ * @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device. -+ * @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. -+ * @lock: spinlock protecting the kbasep_pm_metrics_state structure -+ * @platform_data: pointer to data controlled by platform specific code -+ * @kbdev: pointer to kbase device for which metrics are collected -+ * @values: The current values of the power management metrics. The -+ * kbase_pm_get_dvfs_metrics() function is used to compare these -+ * current values with the saved values from a previous invocation. -+ * @initialized: tracks whether metrics_state has been initialized or not. -+ * @timer: timer to regularly make DVFS decisions based on the power -+ * management metrics. -+ * @timer_state: atomic indicating current @timer state, on, off, or stopped. -+ * @dvfs_last: values of the PM metrics from the last DVFS tick -+ * @dvfs_diff: different between the current and previous PM metrics. -+ */ -+struct kbasep_pm_metrics_state { -+ ktime_t time_period_start; -+#if MALI_USE_CSF -+ void *ipa_control_client; -+ bool skip_gpu_active_sanity_check; -+#else -+ bool gpu_active; -+ u32 active_cl_ctx[2]; -+ u32 active_gl_ctx[3]; -+#endif -+ spinlock_t lock; ++ return false; ++} + -+ void *platform_data; -+ struct kbase_device *kbdev; ++int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js) ++{ ++ struct kbase_context *kctx; ++ u32 cfg; ++ u64 const jc_head = select_job_chain(katom); ++ u64 affinity; ++ struct slot_rb *ptr_slot_rb = &kbdev->hwaccess.backend.slot_rb[js]; + -+ struct kbasep_pm_metrics values; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+#ifdef CONFIG_MALI_BIFROST_DVFS -+ bool initialized; -+ struct hrtimer timer; -+ atomic_t timer_state; -+ struct kbasep_pm_metrics dvfs_last; -+ struct kbasep_pm_metrics dvfs_diff; -+#endif -+}; ++ kctx = katom->kctx; + -+/** -+ * struct kbasep_pm_tick_timer_state - State for the shader hysteresis timer -+ * @wq: Work queue to wait for the timer to stopped -+ * @work: Work item which cancels the timer -+ * @timer: Timer for powering off the shader cores -+ * @configured_interval: Period of GPU poweroff timer -+ * @default_ticks: User-configured number of ticks to wait after the shader -+ * power down request is received before turning off the cores -+ * @configured_ticks: Power-policy configured number of ticks to wait after the -+ * shader power down request is received before turning off -+ * the cores. For simple power policies, this is equivalent -+ * to @default_ticks. -+ * @remaining_ticks: Number of remaining timer ticks until shaders are powered off -+ * @cancel_queued: True if the cancellation work item has been queued. This is -+ * required to ensure that it is not queued twice, e.g. after -+ * a reset, which could cause the timer to be incorrectly -+ * cancelled later by a delayed workitem. -+ * @needed: Whether the timer should restart itself -+ */ -+struct kbasep_pm_tick_timer_state { -+ struct workqueue_struct *wq; -+ struct work_struct work; -+ struct hrtimer timer; ++ /* Command register must be available */ ++ if (!kbasep_jm_wait_js_free(kbdev, js, kctx)) ++ return -EPERM; + -+ ktime_t configured_interval; -+ unsigned int default_ticks; -+ unsigned int configured_ticks; -+ unsigned int remaining_ticks; ++ dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n", ++ jc_head, (void *)katom); + -+ bool cancel_queued; -+ bool needed; -+}; ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), ++ jc_head & 0xFFFFFFFF); ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), ++ jc_head >> 32); + -+union kbase_pm_policy_data { -+ struct kbasep_pm_policy_always_on always_on; -+ struct kbasep_pm_policy_coarse_demand coarse_demand; -+}; ++ affinity = kbase_job_write_affinity(kbdev, katom->core_req, js, ++ kctx->limited_core_mask); + -+/** -+ * struct kbase_pm_backend_data - Data stored per device for power management. -+ * -+ * @pm_current_policy: The policy that is currently actively controlling the -+ * power state. -+ * @pm_policy_data: Private data for current PM policy. This is automatically -+ * zeroed when a policy change occurs. -+ * @reset_done: Flag when a reset is complete -+ * @reset_done_wait: Wait queue to wait for changes to @reset_done -+ * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter -+ * users -+ * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests -+ * @gpu_in_desired_state_wait: Wait queue set when the GPU is in the desired -+ * state according to the L2 and shader power state -+ * machines -+ * @gpu_powered: Set to true when the GPU is powered and register -+ * accesses are possible, false otherwise. Access to this -+ * variable should be protected by: both the hwaccess_lock -+ * spinlock and the pm.lock mutex for writes; or at least -+ * one of either lock for reads. -+ * @gpu_ready: Indicates whether the GPU is in a state in which it is -+ * safe to perform PM changes. When false, the PM state -+ * machine needs to wait before making changes to the GPU -+ * power policy, DevFreq or core_mask, so as to avoid these -+ * changing while implicit GPU resets are ongoing. -+ * @pm_shaders_core_mask: Shader PM state synchronised shaders core mask. It -+ * holds the cores enabled in a hardware counters dump, -+ * and may differ from @shaders_avail when under different -+ * states and transitions. -+ * @cg1_disabled: Set if the policy wants to keep the second core group -+ * powered off -+ * @driver_ready_for_irqs: Debug state indicating whether sufficient -+ * initialization of the driver has occurred to handle -+ * IRQs -+ * @metrics: Structure to hold metrics for the GPU -+ * @shader_tick_timer: Structure to hold the shader poweroff tick timer state -+ * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress. -+ * hwaccess_lock must be held when accessing -+ * @invoke_poweroff_wait_wq_when_l2_off: flag indicating that the L2 power state -+ * machine should invoke the poweroff -+ * worker after the L2 has turned off. -+ * @poweron_required: true if a GPU power on is required. Should only be set -+ * when poweroff_wait_in_progress is true, and therefore the -+ * GPU can not immediately be powered on. pm.lock must be -+ * held when accessing -+ * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off -+ * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq -+ * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete -+ * @callback_power_on: Callback when the GPU needs to be turned on. See -+ * &struct kbase_pm_callback_conf -+ * @callback_power_off: Callback when the GPU may be turned off. See -+ * &struct kbase_pm_callback_conf -+ * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to -+ * be turned off. See &struct kbase_pm_callback_conf -+ * @callback_power_resume: Callback when a resume occurs and the GPU needs to -+ * be turned on. See &struct kbase_pm_callback_conf -+ * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See -+ * &struct kbase_pm_callback_conf -+ * @callback_power_runtime_off: Callback when the GPU may be turned off. See -+ * &struct kbase_pm_callback_conf -+ * @callback_power_runtime_idle: Optional callback invoked by runtime PM core -+ * when the GPU may be idle. See -+ * &struct kbase_pm_callback_conf -+ * @callback_soft_reset: Optional callback to software reset the GPU. See -+ * &struct kbase_pm_callback_conf -+ * @callback_power_runtime_gpu_idle: Callback invoked by Kbase when GPU has -+ * become idle. -+ * See &struct kbase_pm_callback_conf. -+ * @callback_power_runtime_gpu_active: Callback when GPU has become active and -+ * @callback_power_runtime_gpu_idle was -+ * called previously. -+ * See &struct kbase_pm_callback_conf. -+ * @ca_cores_enabled: Cores that are currently available -+ * @mcu_state: The current state of the micro-control unit, only applicable -+ * to GPUs that have such a component -+ * @l2_state: The current state of the L2 cache state machine. See -+ * &enum kbase_l2_core_state -+ * @l2_desired: True if the L2 cache should be powered on by the L2 cache state -+ * machine -+ * @l2_always_on: If true, disable powering down of l2 cache. -+ * @shaders_state: The current state of the shader state machine. -+ * @shaders_avail: This is updated by the state machine when it is in a state -+ * where it can write to the SHADER_PWRON or PWROFF registers -+ * to have the same set of available cores as specified by -+ * @shaders_desired_mask. So would precisely indicate the cores -+ * that are currently available. This is internal to shader -+ * state machine of JM GPUs and should *not* be modified -+ * elsewhere. -+ * @shaders_desired_mask: This is updated by the state machine when it is in -+ * a state where it can handle changes to the core -+ * availability (either by DVFS or sysfs). This is -+ * internal to the shader state machine and should -+ * *not* be modified elsewhere. -+ * @shaders_desired: True if the PM active count or power policy requires the -+ * shader cores to be on. This is used as an input to the -+ * shader power state machine. The current state of the -+ * cores may be different, but there should be transitions in -+ * progress that will eventually achieve this state (assuming -+ * that the policy doesn't change its mind in the mean time). -+ * @mcu_desired: True if the micro-control unit should be powered on -+ * @policy_change_clamp_state_to_off: Signaling the backend is in PM policy -+ * change transition, needs the mcu/L2 to be brought back to the -+ * off state and remain in that state until the flag is cleared. -+ * @csf_pm_sched_flags: CSF Dynamic PM control flags in accordance to the -+ * current active PM policy. This field is updated whenever a -+ * new policy is activated. -+ * @policy_change_lock: Used to serialize the policy change calls. In CSF case, -+ * the change of policy may involve the scheduler to -+ * suspend running CSGs and then reconfigure the MCU. -+ * @core_idle_wq: Workqueue for executing the @core_idle_work. -+ * @core_idle_work: Work item used to wait for undesired cores to become inactive. -+ * The work item is enqueued when Host controls the power for -+ * shader cores and down scaling of cores is performed. -+ * @gpu_sleep_supported: Flag to indicate that if GPU sleep feature can be -+ * supported by the kernel driver or not. If this -+ * flag is not set, then HW state is directly saved -+ * when GPU idle notification is received. -+ * @gpu_sleep_mode_active: Flag to indicate that the GPU needs to be in sleep -+ * mode. It is set when the GPU idle notification is -+ * received and is cleared when HW state has been -+ * saved in the runtime suspend callback function or -+ * when the GPU power down is aborted if GPU became -+ * active whilst it was in sleep mode. The flag is -+ * guarded with hwaccess_lock spinlock. -+ * @exit_gpu_sleep_mode: Flag to indicate the GPU can now exit the sleep -+ * mode due to the submission of work from Userspace. -+ * The flag is guarded with hwaccess_lock spinlock. -+ * The @gpu_sleep_mode_active flag is not immediately -+ * reset when this flag is set, this is to ensure that -+ * MCU doesn't gets disabled undesirably without the -+ * suspend of CSGs. That could happen when -+ * scheduler_pm_active() and scheduler_pm_idle() gets -+ * called before the Scheduler gets reactivated. -+ * @gpu_idled: Flag to ensure that the gpu_idle & gpu_active callbacks are -+ * always called in pair. The flag is guarded with pm.lock mutex. -+ * @gpu_wakeup_override: Flag to force the power up of L2 cache & reactivation -+ * of MCU. This is set during the runtime suspend -+ * callback function, when GPU needs to exit the sleep -+ * mode for the saving the HW state before power down. -+ * @db_mirror_interrupt_enabled: Flag tracking if the Doorbell mirror interrupt -+ * is enabled or not. -+ * @in_reset: True if a GPU is resetting and normal power manager operation is -+ * suspended -+ * @partial_shaderoff: True if we want to partial power off shader cores, -+ * it indicates a partial shader core off case, -+ * do some special operation for such case like flush -+ * L2 cache because of GPU2017-861 -+ * @protected_entry_transition_override : True if GPU reset is being used -+ * before entering the protected mode and so -+ * the reset handling behaviour is being -+ * overridden. -+ * @protected_transition_override : True if a protected mode transition is in -+ * progress and is overriding power manager -+ * behaviour. -+ * @protected_l2_override : Non-zero if the L2 cache is required during a -+ * protected mode transition. Has no effect if not -+ * transitioning. -+ * @hwcnt_desired: True if we want GPU hardware counters to be enabled. -+ * @hwcnt_disabled: True if GPU hardware counters are not enabled. -+ * @hwcnt_disable_work: Work item to disable GPU hardware counters, used if -+ * atomic disable is not possible. -+ * @gpu_clock_suspend_freq: 'opp-mali-errata-1485982' clock in opp table -+ * for safe L2 power cycle. -+ * If no opp-mali-errata-1485982 specified, -+ * the slowest clock will be taken. -+ * @gpu_clock_slow_down_wa: If true, slow down GPU clock during L2 power cycle. -+ * @gpu_clock_slow_down_desired: True if we want lower GPU clock -+ * for safe L2 power cycle. False if want GPU clock -+ * to back to normalized one. This is updated only -+ * in L2 state machine, kbase_pm_l2_update_state. -+ * @gpu_clock_slowed_down: During L2 power cycle, -+ * True if gpu clock is set at lower frequency -+ * for safe L2 power down, False if gpu clock gets -+ * restored to previous speed. This is updated only in -+ * work function, kbase_pm_gpu_clock_control_worker. -+ * @gpu_clock_control_work: work item to set GPU clock during L2 power cycle -+ * using gpu_clock_control -+ * -+ * This structure contains data for the power management framework. There is one -+ * instance of this structure per device in the system. -+ * -+ * Note: -+ * During an IRQ, @pm_current_policy can be NULL when the policy is being -+ * changed with kbase_pm_set_policy(). The change is protected under -+ * kbase_device.pm.pcower_change_lock. Direct access to this from IRQ context -+ * must therefore check for NULL. If NULL, then kbase_pm_set_policy() will -+ * re-issue the policy functions that would have been done under IRQ. -+ */ -+struct kbase_pm_backend_data { -+ const struct kbase_pm_policy *pm_current_policy; -+ union kbase_pm_policy_data pm_policy_data; -+ bool reset_done; -+ wait_queue_head_t reset_done_wait; -+ int gpu_cycle_counter_requests; -+ spinlock_t gpu_cycle_counter_requests_lock; ++ /* start MMU, medium priority, cache clean/flush on end, clean/flush on ++ * start ++ */ ++ cfg = kctx->as_nr; + -+ wait_queue_head_t gpu_in_desired_state_wait; ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) && ++ !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) ++ cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; + -+ bool gpu_powered; -+ bool gpu_ready; ++ if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START)) { ++ /* Force a cache maintenance operation if the newly submitted ++ * katom to the slot is from a different kctx. For a JM GPU ++ * that has the feature BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, ++ * applies a FLUSH_INV_SHADER_OTHER. Otherwise, do a ++ * FLUSH_CLEAN_INVALIDATE. ++ */ ++ u64 tagged_kctx = ptr_slot_rb->last_kctx_tagged; + -+ u64 pm_shaders_core_mask; ++ if (tagged_kctx != SLOT_RB_NULL_TAG_VAL && tagged_kctx != SLOT_RB_TAG_KCTX(kctx)) { ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER)) ++ cfg |= JS_CONFIG_START_FLUSH_INV_SHADER_OTHER; ++ else ++ cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; ++ } else ++ cfg |= JS_CONFIG_START_FLUSH_NO_ACTION; ++ } else ++ cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; + -+ bool cg1_disabled; ++ if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) && ++ !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) ++ cfg |= JS_CONFIG_END_FLUSH_NO_ACTION; ++ else if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE)) ++ cfg |= JS_CONFIG_END_FLUSH_CLEAN; ++ else ++ cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ bool driver_ready_for_irqs; -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++ cfg |= JS_CONFIG_THREAD_PRI(8); + -+ struct kbasep_pm_metrics_state metrics; ++ if ((katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED) || ++ (katom->core_req & BASE_JD_REQ_END_RENDERPASS)) ++ cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK; + -+ struct kbasep_pm_tick_timer_state shader_tick_timer; ++ if (!ptr_slot_rb->job_chain_flag) { ++ cfg |= JS_CONFIG_JOB_CHAIN_FLAG; ++ katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN; ++ ptr_slot_rb->job_chain_flag = true; ++ } else { ++ katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN; ++ ptr_slot_rb->job_chain_flag = false; ++ } + -+ bool poweroff_wait_in_progress; -+ bool invoke_poweroff_wait_wq_when_l2_off; -+ bool poweron_required; ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg); + -+ struct workqueue_struct *gpu_poweroff_wait_wq; -+ struct work_struct gpu_poweroff_wait_work; ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT), ++ katom->flush_id); + -+ wait_queue_head_t poweroff_wait; ++ /* Write an approximate start timestamp. ++ * It's approximate because there might be a job in the HEAD register. ++ */ ++ katom->start_timestamp = ktime_get_raw(); + -+ int (*callback_power_on)(struct kbase_device *kbdev); -+ void (*callback_power_off)(struct kbase_device *kbdev); -+ void (*callback_power_suspend)(struct kbase_device *kbdev); -+ void (*callback_power_resume)(struct kbase_device *kbdev); -+ int (*callback_power_runtime_on)(struct kbase_device *kbdev); -+ void (*callback_power_runtime_off)(struct kbase_device *kbdev); -+ int (*callback_power_runtime_idle)(struct kbase_device *kbdev); -+ int (*callback_soft_reset)(struct kbase_device *kbdev); -+ void (*callback_power_runtime_gpu_idle)(struct kbase_device *kbdev); -+ void (*callback_power_runtime_gpu_active)(struct kbase_device *kbdev); ++ /* GO ! */ ++ dev_dbg(kbdev->dev, "JS: Submitting atom %pK from ctx %pK to js[%d] with head=0x%llx", ++ katom, kctx, js, jc_head); + -+ u64 ca_cores_enabled; ++ KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, ++ (u32)affinity); + -+#if MALI_USE_CSF -+ enum kbase_mcu_state mcu_state; -+#endif -+ enum kbase_l2_core_state l2_state; -+ enum kbase_shader_core_state shaders_state; -+ u64 shaders_avail; -+ u64 shaders_desired_mask; -+#if MALI_USE_CSF -+ bool mcu_desired; -+ bool policy_change_clamp_state_to_off; -+ unsigned int csf_pm_sched_flags; -+ struct mutex policy_change_lock; -+ struct workqueue_struct *core_idle_wq; -+ struct work_struct core_idle_work; ++ KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, kctx, ++ js, kbase_jd_atom_id(kctx, katom), TL_JS_EVENT_START); + -+#ifdef KBASE_PM_RUNTIME -+ bool gpu_sleep_supported; -+ bool gpu_sleep_mode_active; -+ bool exit_gpu_sleep_mode; -+ bool gpu_idled; -+ bool gpu_wakeup_override; -+ bool db_mirror_interrupt_enabled; -+#endif -+#endif -+ bool l2_desired; -+ bool l2_always_on; -+ bool shaders_desired; ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(kbdev, katom, jc_head, ++ affinity, cfg); ++ KBASE_TLSTREAM_TL_RET_CTX_LPU( ++ kbdev, ++ kctx, ++ &kbdev->gpu_props.props.raw_props.js_features[ ++ katom->slot_nr]); ++ KBASE_TLSTREAM_TL_RET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]); ++ KBASE_TLSTREAM_TL_RET_ATOM_LPU( ++ kbdev, ++ katom, ++ &kbdev->gpu_props.props.raw_props.js_features[js], ++ "ctx_nr,atom_nr"); ++ kbase_kinstr_jm_atom_hw_submit(katom); + -+ bool in_reset; ++ /* Update the slot's last katom submission kctx */ ++ ptr_slot_rb->last_kctx_tagged = SLOT_RB_TAG_KCTX(kctx); + -+#if !MALI_USE_CSF -+ bool partial_shaderoff; ++#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) ++ if (!kbase_backend_nr_atoms_submitted(kbdev, js)) { ++ /* If this is the only job on the slot, trace it as starting */ ++ char js_string[16]; + -+ bool protected_entry_transition_override; -+ bool protected_transition_override; -+ int protected_l2_override; ++ trace_gpu_sched_switch( ++ kbasep_make_job_slot_string(js, js_string, ++ sizeof(js_string)), ++ ktime_to_ns(katom->start_timestamp), ++ (u32)katom->kctx->id, 0, katom->work_id); ++ } +#endif + -+ bool hwcnt_desired; -+ bool hwcnt_disabled; -+ struct work_struct hwcnt_disable_work; -+ -+ u64 gpu_clock_suspend_freq; -+ bool gpu_clock_slow_down_wa; -+ bool gpu_clock_slow_down_desired; -+ bool gpu_clock_slowed_down; -+ struct work_struct gpu_clock_control_work; -+}; -+ -+#if MALI_USE_CSF -+/* CSF PM flag, signaling that the MCU shader Core should be kept on */ -+#define CSF_DYNAMIC_PM_CORE_KEEP_ON (1 << 0) -+/* CSF PM flag, signaling no scheduler suspension on idle groups */ -+#define CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE (1 << 1) -+/* CSF PM flag, signaling no scheduler suspension on no runnable groups */ -+#define CSF_DYNAMIC_PM_SCHED_NO_SUSPEND (1 << 2) ++ trace_sysgraph_gpu(SGR_SUBMIT, kctx->id, ++ kbase_jd_atom_id(kctx, katom), js); + -+/* The following flags corresponds to existing defined PM policies */ -+#define ALWAYS_ON_PM_SCHED_FLAGS (CSF_DYNAMIC_PM_CORE_KEEP_ON | \ -+ CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE | \ -+ CSF_DYNAMIC_PM_SCHED_NO_SUSPEND) -+#define COARSE_ON_DEMAND_PM_SCHED_FLAGS (0) -+#if !MALI_CUSTOMER_RELEASE -+#define ALWAYS_ON_DEMAND_PM_SCHED_FLAGS (CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE) -+#endif -+#endif ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), ++ JS_COMMAND_START); + -+/* List of policy IDs */ -+enum kbase_pm_policy_id { -+ KBASE_PM_POLICY_ID_COARSE_DEMAND, -+#if !MALI_CUSTOMER_RELEASE -+ KBASE_PM_POLICY_ID_ALWAYS_ON_DEMAND, -+#endif -+ KBASE_PM_POLICY_ID_ALWAYS_ON -+}; ++ return 0; ++} + +/** -+ * enum kbase_pm_policy_event - PM Policy event ID ++ * kbasep_job_slot_update_head_start_timestamp - Update timestamp ++ * @kbdev: kbase device ++ * @js: job slot ++ * @end_timestamp: timestamp ++ * ++ * Update the start_timestamp of the job currently in the HEAD, based on the ++ * fact that we got an IRQ for the previous set of completed jobs. ++ * ++ * The estimate also takes into account the time the job was submitted, to ++ * work out the best estimate (which might still result in an over-estimate to ++ * the calculated time spent) + */ -+enum kbase_pm_policy_event { -+ /** -+ * @KBASE_PM_POLICY_EVENT_IDLE: Indicates that the GPU power state -+ * model has determined that the GPU has gone idle. -+ */ -+ KBASE_PM_POLICY_EVENT_IDLE, -+ /** -+ * @KBASE_PM_POLICY_EVENT_POWER_ON: Indicates that the GPU state model -+ * is preparing to power on the GPU. -+ */ -+ KBASE_PM_POLICY_EVENT_POWER_ON, -+ /** -+ * @KBASE_PM_POLICY_EVENT_TIMER_HIT: Indicates that the GPU became -+ * active while the Shader Tick Timer was holding the GPU in a powered -+ * on state. -+ */ -+ KBASE_PM_POLICY_EVENT_TIMER_HIT, -+ /** -+ * @KBASE_PM_POLICY_EVENT_TIMER_MISS: Indicates that the GPU did not -+ * become active before the Shader Tick Timer timeout occurred. -+ */ -+ KBASE_PM_POLICY_EVENT_TIMER_MISS, -+}; ++static void kbasep_job_slot_update_head_start_timestamp(struct kbase_device *kbdev, unsigned int js, ++ ktime_t end_timestamp) ++{ ++ ktime_t timestamp_diff; ++ struct kbase_jd_atom *katom; ++ ++ /* Checking the HEAD position for the job slot */ ++ katom = kbase_gpu_inspect(kbdev, js, 0); ++ if (katom != NULL) { ++ timestamp_diff = ktime_sub(end_timestamp, ++ katom->start_timestamp); ++ if (ktime_to_ns(timestamp_diff) >= 0) { ++ /* Only update the timestamp if it's a better estimate ++ * than what's currently stored. This is because our ++ * estimate that accounts for the throttle time may be ++ * too much of an overestimate ++ */ ++ katom->start_timestamp = end_timestamp; ++ } ++ } ++} + +/** -+ * struct kbase_pm_policy - Power policy structure. ++ * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline ++ * tracepoint ++ * @kbdev: kbase device ++ * @js: job slot + * -+ * @name: The name of this policy -+ * @init: Function called when the policy is selected -+ * @term: Function called when the policy is unselected -+ * @shaders_needed: Function called to find out if shader cores are needed -+ * @get_core_active: Function called to get the current overall GPU power -+ * state -+ * @handle_event: Function called when a PM policy event occurs. Should be -+ * set to NULL if the power policy doesn't require any -+ * event notifications. -+ * @id: Field indicating an ID for this policy. This is not -+ * necessarily the same as its index in the list returned -+ * by kbase_pm_list_policies(). -+ * It is used purely for debugging. -+ * @pm_sched_flags: Policy associated with CSF PM scheduling operational flags. -+ * Pre-defined required flags exist for each of the -+ * ARM released policies, such as 'always_on', 'coarse_demand' -+ * and etc. -+ * Each power policy exposes a (static) instance of this structure which -+ * contains function pointers to the policy's methods. ++ * Make a tracepoint call to the instrumentation module informing that ++ * softstop happened on given lpu (job slot). + */ -+struct kbase_pm_policy { -+ char *name; -+ -+ /* -+ * Function called when the policy is selected -+ * -+ * This should initialize the kbdev->pm.pm_policy_data structure. It -+ * should not attempt to make any changes to hardware state. -+ * -+ * It is undefined what state the cores are in when the function is -+ * called. -+ * -+ * @kbdev: The kbase device structure for the device (must be a -+ * valid pointer) -+ */ -+ void (*init)(struct kbase_device *kbdev); ++static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, unsigned int js) ++{ ++ KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( ++ kbdev, ++ &kbdev->gpu_props.props.raw_props.js_features[js]); ++} + -+ /* -+ * Function called when the policy is unselected. -+ * -+ * @kbdev: The kbase device structure for the device (must be a -+ * valid pointer) -+ */ -+ void (*term)(struct kbase_device *kbdev); ++void kbase_job_done(struct kbase_device *kbdev, u32 done) ++{ ++ u32 count = 0; ++ ktime_t end_timestamp; + -+ /* -+ * Function called to find out if shader cores are needed -+ * -+ * This needs to at least satisfy kbdev->pm.backend.shaders_desired, -+ * and so must never return false when shaders_desired is true. -+ * -+ * @kbdev: The kbase device structure for the device (must be a -+ * valid pointer) -+ * -+ * Return: true if shader cores are needed, false otherwise -+ */ -+ bool (*shaders_needed)(struct kbase_device *kbdev); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* -+ * Function called to get the current overall GPU power state -+ * -+ * This function must meet or exceed the requirements for power -+ * indicated by kbase_pm_is_active(). -+ * -+ * @kbdev: The kbase device structure for the device (must be a -+ * valid pointer) -+ * -+ * Return: true if the GPU should be powered, false otherwise -+ */ -+ bool (*get_core_active)(struct kbase_device *kbdev); ++ KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ, NULL, NULL, 0, done); + -+ /* -+ * Function called when a power event occurs -+ * -+ * @kbdev: The kbase device structure for the device (must be a -+ * valid pointer) -+ * @event: The id of the power event that has occurred -+ */ -+ void (*handle_event)(struct kbase_device *kbdev, -+ enum kbase_pm_policy_event event); ++ end_timestamp = ktime_get_raw(); + -+ enum kbase_pm_policy_id id; ++ while (done) { ++ unsigned int i; ++ u32 failed = done >> 16; + -+#if MALI_USE_CSF -+ /* Policy associated with CSF PM scheduling operational flags. -+ * There are pre-defined required flags exist for each of the -+ * ARM released policies, such as 'always_on', 'coarse_demand' -+ * and etc. -+ */ -+ unsigned int pm_sched_flags; -+#endif -+}; ++ /* treat failed slots as finished slots */ ++ u32 finished = (done & 0xFFFF) | failed; + -+#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c -new file mode 100644 -index 000000000..5be8acd75 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c -@@ -0,0 +1,3417 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ /* Note: This is inherently unfair, as we always check for lower ++ * numbered interrupts before the higher numbered ones. ++ */ ++ i = ffs(finished) - 1; + -+/* -+ * Base kernel Power Management hardware control -+ */ ++ do { ++ int nr_done; ++ u32 active; ++ u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */ ++ u64 job_tail = 0; + -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ if (failed & (1u << i)) { ++ /* read out the job slot status code if the job ++ * slot reported failure ++ */ ++ completion_code = kbase_reg_read(kbdev, ++ JOB_SLOT_REG(i, JS_STATUS)); + -+#if MALI_USE_CSF -+#include -+#else -+#include -+#endif /* !MALI_USE_CSF */ ++ if (completion_code == BASE_JD_EVENT_STOPPED) { ++ u64 job_head; + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+#include -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ -+#if MALI_USE_CSF -+#include -+#endif ++ KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT( ++ kbdev, NULL, ++ i, 0, TL_JS_EVENT_SOFT_STOP); + -+#if MALI_USE_CSF -+#include -+#endif ++ kbasep_trace_tl_event_lpu_softstop( ++ kbdev, i); + -+#include ++ /* Soft-stopped job - read the value of ++ * JS_TAIL so that the job chain can ++ * be resumed ++ */ ++ job_tail = (u64)kbase_reg_read(kbdev, ++ JOB_SLOT_REG(i, JS_TAIL_LO)) | ++ ((u64)kbase_reg_read(kbdev, ++ JOB_SLOT_REG(i, JS_TAIL_HI)) ++ << 32); ++ job_head = (u64)kbase_reg_read(kbdev, ++ JOB_SLOT_REG(i, JS_HEAD_LO)) | ++ ((u64)kbase_reg_read(kbdev, ++ JOB_SLOT_REG(i, JS_HEAD_HI)) ++ << 32); ++ /* For a soft-stopped job chain js_tail should ++ * same as the js_head, but if not then the ++ * job chain was incorrectly marked as ++ * soft-stopped. In such case we should not ++ * be resuming the job chain from js_tail and ++ * report the completion_code as UNKNOWN. ++ */ ++ if (job_tail != job_head) ++ completion_code = BASE_JD_EVENT_UNKNOWN; + -+#ifdef CONFIG_MALI_CORESTACK -+bool corestack_driver_control = true; -+#else -+bool corestack_driver_control; /* Default value of 0/false */ -+#endif -+module_param(corestack_driver_control, bool, 0444); -+MODULE_PARM_DESC(corestack_driver_control, -+ "Let the driver power on/off the GPU core stack independently " -+ "without involving the Power Domain Controller. This should " -+ "only be enabled on platforms for which integration of the PDC " -+ "to the Mali GPU is known to be problematic."); -+KBASE_EXPORT_TEST_API(corestack_driver_control); ++ } else if (completion_code == ++ BASE_JD_EVENT_NOT_STARTED) { ++ /* PRLAM-10673 can cause a TERMINATED ++ * job to come back as NOT_STARTED, ++ * but the error interrupt helps us ++ * detect it ++ */ ++ completion_code = ++ BASE_JD_EVENT_TERMINATED; ++ } + -+/** -+ * enum kbasep_pm_action - Actions that can be performed on a core. -+ * -+ * @ACTION_PRESENT: The cores that are present -+ * @ACTION_READY: The cores that are ready -+ * @ACTION_PWRON: Power on the cores specified -+ * @ACTION_PWROFF: Power off the cores specified -+ * @ACTION_PWRTRANS: The cores that are transitioning -+ * @ACTION_PWRACTIVE: The cores that are active -+ * -+ * This enumeration is private to the file. Its values are set to allow -+ * core_type_to_reg() function, which decodes this enumeration, to be simpler -+ * and more efficient. -+ */ -+enum kbasep_pm_action { -+ ACTION_PRESENT = 0, -+ ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO), -+ ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO), -+ ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO), -+ ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO), -+ ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO) -+}; ++ kbase_gpu_irq_evict(kbdev, i, completion_code); + -+static u64 kbase_pm_get_state( -+ struct kbase_device *kbdev, -+ enum kbase_pm_core_type core_type, -+ enum kbasep_pm_action action); ++ /* Some jobs that encounter a BUS FAULT may ++ * result in corrupted state causing future ++ * jobs to hang. Reset GPU before allowing ++ * any other jobs on the slot to continue. ++ */ ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3076)) { ++ if (completion_code == BASE_JD_EVENT_JOB_BUS_FAULT) { ++ if (kbase_prepare_to_reset_gpu_locked( ++ kbdev, ++ RESET_FLAGS_NONE)) ++ kbase_reset_gpu_locked(kbdev); ++ } ++ } ++ } + -+static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev); ++ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), ++ done & ((1 << i) | (1 << (i + 16)))); ++ active = kbase_reg_read(kbdev, ++ JOB_CONTROL_REG(JOB_IRQ_JS_STATE)); + -+#if MALI_USE_CSF -+bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (((active >> i) & 1) == 0 && ++ (((done >> (i + 16)) & 1) == 0)) { ++ /* There is a potential race we must work ++ * around: ++ * ++ * 1. A job slot has a job in both current and ++ * next registers ++ * 2. The job in current completes ++ * successfully, the IRQ handler reads ++ * RAWSTAT and calls this function with the ++ * relevant bit set in "done" ++ * 3. The job in the next registers becomes the ++ * current job on the GPU ++ * 4. Sometime before the JOB_IRQ_CLEAR line ++ * above the job on the GPU _fails_ ++ * 5. The IRQ_CLEAR clears the done bit but not ++ * the failed bit. This atomically sets ++ * JOB_IRQ_JS_STATE. However since both jobs ++ * have now completed the relevant bits for ++ * the slot are set to 0. ++ * ++ * If we now did nothing then we'd incorrectly ++ * assume that _both_ jobs had completed ++ * successfully (since we haven't yet observed ++ * the fail bit being set in RAWSTAT). ++ * ++ * So at this point if there are no active jobs ++ * left we check to see if RAWSTAT has a failure ++ * bit set for the job slot. If it does we know ++ * that there has been a new failure that we ++ * didn't previously know about, so we make sure ++ * that we record this in active (but we wait ++ * for the next loop to deal with it). ++ * ++ * If we were handling a job failure (i.e. done ++ * has the relevant high bit set) then we know ++ * that the value read back from ++ * JOB_IRQ_JS_STATE is the correct number of ++ * remaining jobs because the failed job will ++ * have prevented any futher jobs from starting ++ * execution. ++ */ ++ u32 rawstat = kbase_reg_read(kbdev, ++ JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)); + -+ if (unlikely(!kbdev->csf.firmware_inited)) -+ return false; ++ if ((rawstat >> (i + 16)) & 1) { ++ /* There is a failed job that we've ++ * missed - add it back to active ++ */ ++ active |= (1u << i); ++ } ++ } + -+ if (kbdev->csf.scheduler.pm_active_count && -+ kbdev->pm.backend.mcu_desired) -+ return true; ++ dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n", ++ completion_code); + -+#ifdef KBASE_PM_RUNTIME -+ if (kbdev->pm.backend.gpu_wakeup_override) -+ return true; -+#endif ++ nr_done = kbase_backend_nr_atoms_submitted(kbdev, i); ++ nr_done -= (active >> i) & 1; ++ nr_done -= (active >> (i + 16)) & 1; + -+ /* MCU is supposed to be ON, only when scheduler.pm_active_count is -+ * non zero. But for always_on policy, the MCU needs to be kept on, -+ * unless policy changing transition needs it off. -+ */ ++ if (nr_done <= 0) { ++ dev_warn(kbdev->dev, "Spurious interrupt on slot %d", ++ i); + -+ return (kbdev->pm.backend.mcu_desired && -+ kbase_pm_no_mcu_core_pwroff(kbdev) && -+ !kbdev->pm.backend.policy_change_clamp_state_to_off); -+} -+#endif ++ goto spurious; ++ } + -+bool kbase_pm_is_l2_desired(struct kbase_device *kbdev) -+{ -+#if !MALI_USE_CSF -+ if (kbdev->pm.backend.protected_entry_transition_override) -+ return false; ++ count += nr_done; + -+ if (kbdev->pm.backend.protected_transition_override && -+ kbdev->pm.backend.protected_l2_override) -+ return true; ++ while (nr_done) { ++ if (nr_done == 1) { ++ kbase_gpu_complete_hw(kbdev, i, ++ completion_code, ++ job_tail, ++ &end_timestamp); ++ kbase_jm_try_kick_all(kbdev); ++ } else { ++ /* More than one job has completed. ++ * Since this is not the last job being ++ * reported this time it must have ++ * passed. This is because the hardware ++ * will not allow further jobs in a job ++ * slot to complete until the failed job ++ * is cleared from the IRQ status. ++ */ ++ kbase_gpu_complete_hw(kbdev, i, ++ BASE_JD_EVENT_DONE, ++ 0, ++ &end_timestamp); ++ } ++ nr_done--; ++ } ++ spurious: ++ done = kbase_reg_read(kbdev, ++ JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)); + -+ if (kbdev->pm.backend.protected_transition_override && -+ !kbdev->pm.backend.shaders_desired) -+ return false; -+#else -+ if (unlikely(kbdev->pm.backend.policy_change_clamp_state_to_off)) -+ return false; ++ failed = done >> 16; ++ finished = (done & 0xFFFF) | failed; ++ if (done) ++ end_timestamp = ktime_get_raw(); ++ } while (finished & (1 << i)); + -+ /* Power up the L2 cache only when MCU is desired */ -+ if (likely(kbdev->csf.firmware_inited)) -+ return kbase_pm_is_mcu_desired(kbdev); -+#endif ++ kbasep_job_slot_update_head_start_timestamp(kbdev, i, ++ end_timestamp); ++ } + -+ return kbdev->pm.backend.l2_desired; ++ if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == ++ KBASE_RESET_GPU_COMMITTED) { ++ /* If we're trying to reset the GPU then we might be able to do ++ * it early (without waiting for a timeout) because some jobs ++ * have completed ++ */ ++ kbasep_try_reset_gpu_early_locked(kbdev); ++ } ++ KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ_END, NULL, NULL, 0, count); +} + -+#if !MALI_USE_CSF -+void kbase_pm_protected_override_enable(struct kbase_device *kbdev) ++void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, unsigned int js, ++ u32 action, base_jd_core_req core_reqs, ++ struct kbase_jd_atom *target_katom) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++#if KBASE_KTRACE_ENABLE ++ u32 status_reg_before; ++ u64 job_in_head_before; ++ u32 status_reg_after; + -+ kbdev->pm.backend.protected_transition_override = true; -+} -+void kbase_pm_protected_override_disable(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ WARN_ON(action & (~JS_COMMAND_MASK)); + -+ kbdev->pm.backend.protected_transition_override = false; -+} ++ /* Check the head pointer */ ++ job_in_head_before = ((u64) kbase_reg_read(kbdev, ++ JOB_SLOT_REG(js, JS_HEAD_LO))) ++ | (((u64) kbase_reg_read(kbdev, ++ JOB_SLOT_REG(js, JS_HEAD_HI))) ++ << 32); ++ status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS)); ++#endif + -+int kbase_pm_protected_entry_override_enable(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (action == JS_COMMAND_SOFT_STOP) { ++ if (kbase_jd_katom_is_protected(target_katom)) { ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ dev_dbg(kbdev->dev, ++ "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%x", ++ (unsigned int)core_reqs); ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++ return; ++ } + -+ WARN_ON(!kbdev->protected_mode_transition); ++ /* We are about to issue a soft stop, so mark the atom as having ++ * been soft stopped ++ */ ++ target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED; + -+ if (kbdev->pm.backend.l2_always_on && -+ (kbdev->system_coherency == COHERENCY_ACE)) { -+ WARN_ON(kbdev->pm.backend.protected_entry_transition_override); ++ /* Mark the point where we issue the soft-stop command */ ++ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(kbdev, target_katom); + -+ /* -+ * If there is already a GPU reset pending then wait for it to -+ * complete before initiating a special reset for protected -+ * mode entry. -+ */ -+ if (kbase_reset_gpu_silent(kbdev)) -+ return -EAGAIN; ++ action = (target_katom->atom_flags & ++ KBASE_KATOM_FLAGS_JOBCHAIN) ? ++ JS_COMMAND_SOFT_STOP_1 : ++ JS_COMMAND_SOFT_STOP_0; ++ } else if (action == JS_COMMAND_HARD_STOP) { ++ target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED; + -+ kbdev->pm.backend.protected_entry_transition_override = true; ++ action = (target_katom->atom_flags & ++ KBASE_KATOM_FLAGS_JOBCHAIN) ? ++ JS_COMMAND_HARD_STOP_1 : ++ JS_COMMAND_HARD_STOP_0; + } + -+ return 0; -+} ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action); + -+void kbase_pm_protected_entry_override_disable(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++#if KBASE_KTRACE_ENABLE ++ status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS)); ++ if (status_reg_after == BASE_JD_EVENT_ACTIVE) { ++ struct kbase_jd_atom *head; ++ struct kbase_context *head_kctx; + -+ WARN_ON(!kbdev->protected_mode_transition); ++ head = kbase_gpu_inspect(kbdev, js, 0); ++ if (unlikely(!head)) { ++ dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js); ++ return; ++ } ++ head_kctx = head->kctx; + -+ if (kbdev->pm.backend.l2_always_on && -+ (kbdev->system_coherency == COHERENCY_ACE)) { -+ WARN_ON(!kbdev->pm.backend.protected_entry_transition_override); ++ if (status_reg_before == BASE_JD_EVENT_ACTIVE) ++ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, head, job_in_head_before, js); ++ else ++ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js); + -+ kbdev->pm.backend.protected_entry_transition_override = false; ++ switch (action) { ++ case JS_COMMAND_SOFT_STOP: ++ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP, head_kctx, head, head->jc, js); ++ break; ++ case JS_COMMAND_SOFT_STOP_0: ++ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, head, head->jc, js); ++ break; ++ case JS_COMMAND_SOFT_STOP_1: ++ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, head, head->jc, js); ++ break; ++ case JS_COMMAND_HARD_STOP: ++ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP, head_kctx, head, head->jc, js); ++ break; ++ case JS_COMMAND_HARD_STOP_0: ++ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, head, head->jc, js); ++ break; ++ case JS_COMMAND_HARD_STOP_1: ++ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, js); ++ break; ++ default: ++ WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action, ++ (void *)target_katom, (void *)target_katom->kctx); ++ break; ++ } ++ } else { ++ if (status_reg_before == BASE_JD_EVENT_ACTIVE) ++ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, job_in_head_before, js); ++ else ++ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js); ++ ++ switch (action) { ++ case JS_COMMAND_SOFT_STOP: ++ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0, js); ++ break; ++ case JS_COMMAND_SOFT_STOP_0: ++ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL, 0, js); ++ break; ++ case JS_COMMAND_SOFT_STOP_1: ++ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL, 0, js); ++ break; ++ case JS_COMMAND_HARD_STOP: ++ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0, js); ++ break; ++ case JS_COMMAND_HARD_STOP_0: ++ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL, 0, js); ++ break; ++ case JS_COMMAND_HARD_STOP_1: ++ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, 0, js); ++ break; ++ default: ++ WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action, ++ (void *)target_katom, (void *)target_katom->kctx); ++ break; ++ } + } ++#endif +} + -+void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override) ++void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ struct kbase_device *kbdev = kctx->kbdev; ++ unsigned int i; + -+ if (override) { -+ kbdev->pm.backend.protected_l2_override++; -+ WARN_ON(kbdev->pm.backend.protected_l2_override <= 0); -+ } else { -+ kbdev->pm.backend.protected_l2_override--; -+ WARN_ON(kbdev->pm.backend.protected_l2_override < 0); -+ } ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ kbase_pm_update_state(kbdev); ++ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) ++ kbase_job_slot_hardstop(kctx, i, NULL); +} -+#endif + -+/** -+ * core_type_to_reg - Decode a core type and action to a register. -+ * -+ * @core_type: The type of core -+ * @action: The type of action -+ * -+ * Given a core type (defined by kbase_pm_core_type) and an action (defined -+ * by kbasep_pm_action) this function will return the register offset that -+ * will perform the action on the core type. The register returned is the _LO -+ * register and an offset must be applied to use the _HI register. -+ * -+ * Return: The register offset of the _LO register that performs an action of -+ * type @action on a core of type @core_type. -+ */ -+static u32 core_type_to_reg(enum kbase_pm_core_type core_type, -+ enum kbasep_pm_action action) ++void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, ++ struct kbase_jd_atom *target_katom) +{ -+ if (corestack_driver_control) { -+ if (core_type == KBASE_PM_CORE_STACK) { -+ switch (action) { -+ case ACTION_PRESENT: -+ return STACK_PRESENT_LO; -+ case ACTION_READY: -+ return STACK_READY_LO; -+ case ACTION_PWRON: -+ return STACK_PWRON_LO; -+ case ACTION_PWROFF: -+ return STACK_PWROFF_LO; -+ case ACTION_PWRTRANS: -+ return STACK_PWRTRANS_LO; -+ default: -+ WARN(1, "Invalid action for core type\n"); -+ } -+ } -+ } ++ struct kbase_device *kbdev; ++ unsigned int target_js = target_katom->slot_nr; ++ int i; ++ bool stop_sent = false; + -+ return (u32)core_type + (u32)action; -+} ++ kbdev = kctx->kbdev; + -+#if IS_ENABLED(CONFIG_ARM64) -+static void mali_cci_flush_l2(struct kbase_device *kbdev) -+{ -+ const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED; -+ u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS; -+ u32 raw; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* -+ * Note that we don't take the cache flush mutex here since -+ * we expect to be the last user of the L2, all other L2 users -+ * would have dropped their references, to initiate L2 power -+ * down, L2 power down being the only valid place for this -+ * to be called from. -+ */ ++ for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, target_js); i++) { ++ struct kbase_jd_atom *slot_katom; + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_CACHE_CLN_INV_L2); ++ slot_katom = kbase_gpu_inspect(kbdev, target_js, i); ++ if (!slot_katom) ++ continue; + -+ raw = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); ++ if (kbase_js_atom_runs_before(kbdev, target_katom, slot_katom, ++ KBASE_ATOM_ORDERING_FLAG_SEQNR)) { ++ if (!stop_sent) ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED( ++ kbdev, ++ target_katom); + -+ /* Wait for cache flush to complete before continuing, exit on -+ * gpu resets or loop expiry. -+ */ -+ while (((raw & mask) == 0) && --loops) { -+ raw = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); ++ kbase_job_slot_softstop(kbdev, target_js, slot_katom); ++ stop_sent = true; ++ } + } +} -+#endif + -+/** -+ * kbase_pm_invoke - Invokes an action on a core set -+ * -+ * @kbdev: The kbase device structure of the device -+ * @core_type: The type of core that the action should be performed on -+ * @cores: A bit mask of cores to perform the action on (low 32 bits) -+ * @action: The action to perform on the cores -+ * -+ * This function performs the action given by @action on a set of cores of a -+ * type given by @core_type. It is a static function used by -+ * kbase_pm_transition_core_type() -+ */ -+static void kbase_pm_invoke(struct kbase_device *kbdev, -+ enum kbase_pm_core_type core_type, -+ u64 cores, -+ enum kbasep_pm_action action) ++static int softstop_start_rp_nolock( ++ struct kbase_context *kctx, struct kbase_va_region *reg) +{ -+ u32 reg; -+ u32 lo = cores & 0xFFFFFFFF; -+ u32 hi = (cores >> 32) & 0xFFFFFFFF; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ struct kbase_jd_atom *katom; ++ struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kbdev->hwaccess_lock); + -+ reg = core_type_to_reg(core_type, action); -+ -+ KBASE_DEBUG_ASSERT(reg); -+ -+ if (cores) { -+ u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY); ++ katom = kbase_gpu_inspect(kbdev, 1, 0); + -+ if (action == ACTION_PWRON) -+ state |= cores; -+ else if (action == ACTION_PWROFF) -+ state &= ~cores; -+ KBASE_TLSTREAM_AUX_PM_STATE(kbdev, core_type, state); ++ if (!katom) { ++ dev_dbg(kctx->kbdev->dev, "No atom on job slot\n"); ++ return -ESRCH; + } + -+ /* Tracing */ -+ if (cores) { -+ if (action == ACTION_PWRON) -+ switch (core_type) { -+ case KBASE_PM_CORE_SHADER: -+ KBASE_KTRACE_ADD(kbdev, PM_PWRON, NULL, cores); -+ break; -+ case KBASE_PM_CORE_TILER: -+ KBASE_KTRACE_ADD(kbdev, PM_PWRON_TILER, NULL, cores); -+ break; -+ case KBASE_PM_CORE_L2: -+ KBASE_KTRACE_ADD(kbdev, PM_PWRON_L2, NULL, cores); -+ break; -+ default: -+ break; -+ } -+ else if (action == ACTION_PWROFF) -+ switch (core_type) { -+ case KBASE_PM_CORE_SHADER: -+ KBASE_KTRACE_ADD(kbdev, PM_PWROFF, NULL, cores); -+ break; -+ case KBASE_PM_CORE_TILER: -+ KBASE_KTRACE_ADD(kbdev, PM_PWROFF_TILER, NULL, cores); -+ break; -+ case KBASE_PM_CORE_L2: -+ KBASE_KTRACE_ADD(kbdev, PM_PWROFF_L2, NULL, cores); -+ /* disable snoops before L2 is turned off */ -+ kbase_pm_cache_snoop_disable(kbdev); -+ break; -+ default: -+ break; -+ } ++ if (!(katom->core_req & BASE_JD_REQ_START_RENDERPASS)) { ++ dev_dbg(kctx->kbdev->dev, ++ "Atom %pK on job slot is not start RP\n", (void *)katom); ++ return -EPERM; + } + -+ if (kbase_dummy_job_wa_enabled(kbdev) && -+ action == ACTION_PWRON && -+ core_type == KBASE_PM_CORE_SHADER && -+ !(kbdev->dummy_job_wa.flags & -+ KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER)) { -+ kbase_dummy_job_wa_execute(kbdev, cores); -+ } else { -+ if (lo != 0) -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo); -+ if (hi != 0) -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi); -+ } ++ compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= ++ ARRAY_SIZE(kctx->jctx.renderpasses), ++ "Should check invalid access to renderpasses"); ++ ++ rp = &kctx->jctx.renderpasses[katom->renderpass_id]; ++ if (WARN_ON(rp->state != KBASE_JD_RP_START && ++ rp->state != KBASE_JD_RP_RETRY)) ++ return -EINVAL; ++ ++ dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %pK\n", ++ (int)rp->state, (void *)reg); ++ ++ if (WARN_ON(katom != rp->start_katom)) ++ return -EINVAL; ++ ++ dev_dbg(kctx->kbdev->dev, "Adding region %pK to list %pK\n", ++ (void *)reg, (void *)&rp->oom_reg_list); ++ list_move_tail(®->link, &rp->oom_reg_list); ++ dev_dbg(kctx->kbdev->dev, "Added region to list\n"); ++ ++ rp->state = (rp->state == KBASE_JD_RP_START ? ++ KBASE_JD_RP_PEND_OOM : KBASE_JD_RP_RETRY_PEND_OOM); ++ ++ kbase_job_slot_softstop(kbdev, 1, katom); ++ ++ return 0; +} + -+/** -+ * kbase_pm_get_state - Get information about a core set -+ * -+ * @kbdev: The kbase device structure of the device -+ * @core_type: The type of core that the should be queried -+ * @action: The property of the cores to query -+ * -+ * This function gets information (chosen by @action) about a set of cores of -+ * a type given by @core_type. It is a static function used by -+ * kbase_pm_get_active_cores(), kbase_pm_get_trans_cores() and -+ * kbase_pm_get_ready_cores(). -+ * -+ * Return: A bit mask specifying the state of the cores -+ */ -+static u64 kbase_pm_get_state(struct kbase_device *kbdev, -+ enum kbase_pm_core_type core_type, -+ enum kbasep_pm_action action) ++int kbase_job_slot_softstop_start_rp(struct kbase_context *const kctx, ++ struct kbase_va_region *const reg) +{ -+ u32 reg; -+ u32 lo, hi; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ int err; ++ unsigned long flags; + -+ reg = core_type_to_reg(core_type, action); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ err = softstop_start_rp_nolock(kctx, reg); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ KBASE_DEBUG_ASSERT(reg); ++ return err; ++} + -+ lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg)); -+ hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4)); ++void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) ++{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ unsigned long timeout = msecs_to_jiffies(ZAP_TIMEOUT); + -+ return (((u64) hi) << 32) | ((u64) lo); ++ timeout = wait_event_timeout(kctx->jctx.zero_jobs_wait, ++ kctx->jctx.job_nr == 0, timeout); ++ ++ if (timeout != 0) ++ timeout = wait_event_timeout( ++ kctx->jctx.sched_info.ctx.is_scheduled_wait, ++ !kbase_ctx_flag(kctx, KCTX_SCHEDULED), ++ timeout); ++ ++ /* Neither wait timed out; all done! */ ++ if (timeout != 0) ++ goto exit; ++ ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) { ++ dev_err(kbdev->dev, ++ "Issuing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", ++ ZAP_TIMEOUT); ++ kbase_reset_gpu(kbdev); ++ } ++ ++ /* Wait for the reset to complete */ ++ kbase_reset_gpu_wait(kbdev); ++exit: ++ dev_dbg(kbdev->dev, "Zap: Finished Context %pK", kctx); ++ ++ /* Ensure that the signallers of the waitqs have finished */ ++ mutex_lock(&kctx->jctx.lock); ++ mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); ++ mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); ++ mutex_unlock(&kctx->jctx.lock); +} + -+/** -+ * kbase_pm_get_present_cores - Get the cores that are present -+ * -+ * @kbdev: Kbase device -+ * @type: The type of cores to query -+ * -+ * Return: Bitmask of the cores that are present -+ */ -+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, -+ enum kbase_pm_core_type type) ++u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev) +{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ u32 flush_id = 0; + -+ switch (type) { -+ case KBASE_PM_CORE_L2: -+ return kbdev->gpu_props.curr_config.l2_present; -+ case KBASE_PM_CORE_SHADER: -+ return kbdev->gpu_props.curr_config.shader_present; -+ case KBASE_PM_CORE_TILER: -+ return kbdev->gpu_props.props.raw_props.tiler_present; -+ case KBASE_PM_CORE_STACK: -+ return kbdev->gpu_props.props.raw_props.stack_present; -+ default: -+ break; ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) { ++ mutex_lock(&kbdev->pm.lock); ++ if (kbdev->pm.backend.gpu_powered) ++ flush_id = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(LATEST_FLUSH)); ++ mutex_unlock(&kbdev->pm.lock); + } -+ KBASE_DEBUG_ASSERT(0); + -+ return 0; ++ return flush_id; +} + -+KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores); -+ -+/** -+ * kbase_pm_get_active_cores - Get the cores that are "active" -+ * (busy processing work) -+ * -+ * @kbdev: Kbase device -+ * @type: The type of cores to query -+ * -+ * Return: Bitmask of cores that are active -+ */ -+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, -+ enum kbase_pm_core_type type) ++int kbase_job_slot_init(struct kbase_device *kbdev) +{ -+ return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE); ++ CSTD_UNUSED(kbdev); ++ return 0; +} ++KBASE_EXPORT_TEST_API(kbase_job_slot_init); + -+KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores); ++void kbase_job_slot_halt(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++} + -+/** -+ * kbase_pm_get_trans_cores - Get the cores that are transitioning between -+ * power states -+ * -+ * @kbdev: Kbase device -+ * @type: The type of cores to query -+ * -+ * Return: Bitmask of cores that are transitioning -+ */ -+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, -+ enum kbase_pm_core_type type) ++void kbase_job_slot_term(struct kbase_device *kbdev) +{ -+ return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS); ++ CSTD_UNUSED(kbdev); +} ++KBASE_EXPORT_TEST_API(kbase_job_slot_term); + -+KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores); + +/** -+ * kbase_pm_get_ready_cores - Get the cores that are powered on ++ * kbase_job_slot_softstop_swflags - Soft-stop a job with flags ++ * @kbdev: The kbase device ++ * @js: The job slot to soft-stop ++ * @target_katom: The job that should be soft-stopped (or NULL for any job) ++ * @sw_flags: Flags to pass in about the soft-stop + * -+ * @kbdev: Kbase device -+ * @type: The type of cores to query ++ * Context: ++ * The job slot lock must be held when calling this function. ++ * The job slot must not already be in the process of being soft-stopped. + * -+ * Return: Bitmask of cores that are ready (powered on) ++ * Soft-stop the specified job slot, with extra information about the stop ++ * ++ * Where possible any job in the next register is evicted before the soft-stop. + */ -+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, -+ enum kbase_pm_core_type type) ++void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js, ++ struct kbase_jd_atom *target_katom, u32 sw_flags) +{ -+ u64 result; -+ -+ result = kbase_pm_get_state(kbdev, type, ACTION_READY); ++ dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n", ++ target_katom, sw_flags, js); + -+ switch (type) { -+ case KBASE_PM_CORE_SHADER: -+ KBASE_KTRACE_ADD(kbdev, PM_CORES_POWERED, NULL, result); -+ break; -+ case KBASE_PM_CORE_TILER: -+ KBASE_KTRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, result); -+ break; -+ case KBASE_PM_CORE_L2: -+ KBASE_KTRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, result); -+ break; -+ default: -+ break; ++ if (sw_flags & JS_COMMAND_MASK) { ++ WARN(true, "Atom %pK in kctx %pK received non-NOP flags %d\n", (void *)target_katom, ++ target_katom ? (void *)target_katom->kctx : NULL, sw_flags); ++ sw_flags &= ~((u32)JS_COMMAND_MASK); + } -+ -+ return result; ++ kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom, ++ JS_COMMAND_SOFT_STOP | sw_flags); +} + -+KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores); -+ -+static void kbase_pm_trigger_hwcnt_disable(struct kbase_device *kbdev) ++void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, ++ struct kbase_jd_atom *target_katom) +{ -+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u); ++} + -+ /* See if we can get away with disabling hwcnt -+ * atomically, otherwise kick off a worker. -+ */ -+ if (kbase_hwcnt_context_disable_atomic(kbdev->hwcnt_gpu_ctx)) { -+ backend->hwcnt_disabled = true; ++void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js, ++ struct kbase_jd_atom *target_katom) ++{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ bool stopped; + -+ } else { -+ kbase_hwcnt_context_queue_work(kbdev->hwcnt_gpu_ctx, -+ &backend->hwcnt_disable_work); -+ } ++ stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js, ++ target_katom, ++ JS_COMMAND_HARD_STOP); ++ CSTD_UNUSED(stopped); +} + -+static void kbase_pm_l2_config_override(struct kbase_device *kbdev) ++void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, ++ base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom) +{ -+ u32 val; ++ u32 hw_action = action & JS_COMMAND_MASK; + -+ /* -+ * Skip if it is not supported ++ /* For soft-stop, don't enter if soft-stop not allowed, or isn't ++ * causing disjoint. + */ -+ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) ++ if (hw_action == JS_COMMAND_SOFT_STOP && ++ (kbase_jd_katom_is_protected(target_katom) || ++ (0 == (action & JS_COMMAND_SW_CAUSES_DISJOINT)))) + return; + -+#if MALI_USE_CSF -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) { -+ val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG), -+ L2_CONFIG_PBHA_HWU_SET(val, kbdev->pbha_propagate_bits)); -+ } -+#endif /* MALI_USE_CSF */ ++ /* Nothing to do if already logged disjoint state on this atom */ ++ if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) ++ return; + -+ /* -+ * Skip if size and hash are not given explicitly, -+ * which means default values are used. -+ */ -+ if ((kbdev->l2_size_override == 0) && (kbdev->l2_hash_override == 0) && -+ (!kbdev->l2_hash_values_override)) -+ return; -+ -+ val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); -+ -+ if (kbdev->l2_size_override) { -+ val &= ~L2_CONFIG_SIZE_MASK; -+ val |= (kbdev->l2_size_override << L2_CONFIG_SIZE_SHIFT); -+ } -+ -+ if (kbdev->l2_hash_override) { -+ WARN_ON(kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)); -+ val &= ~L2_CONFIG_HASH_MASK; -+ val |= (kbdev->l2_hash_override << L2_CONFIG_HASH_SHIFT); -+ } else if (kbdev->l2_hash_values_override) { -+ int i; -+ -+ WARN_ON(!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)); -+ val &= ~L2_CONFIG_ASN_HASH_ENABLE_MASK; -+ val |= (0x1 << L2_CONFIG_ASN_HASH_ENABLE_SHIFT); ++ target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT; ++ kbase_disjoint_state_up(kbdev); ++} + -+ for (i = 0; i < ASN_HASH_COUNT; i++) { -+ dev_dbg(kbdev->dev, "Program 0x%x to ASN_HASH[%d]\n", -+ kbdev->l2_hash_values[i], i); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(ASN_HASH(i)), -+ kbdev->l2_hash_values[i]); -+ } ++void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, ++ struct kbase_jd_atom *target_katom) ++{ ++ if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) { ++ target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT; ++ kbase_disjoint_state_down(kbdev); + } -+ -+ dev_dbg(kbdev->dev, "Program 0x%x to L2_CONFIG\n", val); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG), val); +} + -+static void kbase_pm_control_gpu_clock(struct kbase_device *kbdev) ++int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev) +{ -+ struct kbase_pm_backend_data *const backend = &kbdev->pm.backend; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ queue_work(system_wq, &backend->gpu_clock_control_work); ++ WARN(true, "%s Not implemented for JM GPUs", __func__); ++ return -EINVAL; +} + -+#if MALI_USE_CSF -+static const char *kbase_mcu_state_to_string(enum kbase_mcu_state state) ++int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev) +{ -+ const char *const strings[] = { -+#define KBASEP_MCU_STATE(n) #n, -+#include "mali_kbase_pm_mcu_states.h" -+#undef KBASEP_MCU_STATE -+ }; -+ if (WARN_ON((size_t)state >= ARRAY_SIZE(strings))) -+ return "Bad MCU state"; -+ else -+ return strings[state]; ++ WARN(true, "%s Not implemented for JM GPUs", __func__); ++ return -EINVAL; +} + -+static -+void kbase_ktrace_log_mcu_state(struct kbase_device *kbdev, enum kbase_mcu_state state) ++void kbase_reset_gpu_allow(struct kbase_device *kbdev) +{ -+#if KBASE_KTRACE_ENABLE -+ switch (state) { -+#define KBASEP_MCU_STATE(n) \ -+ case KBASE_MCU_ ## n: \ -+ KBASE_KTRACE_ADD(kbdev, PM_MCU_ ## n, NULL, state); \ -+ break; -+#include "mali_kbase_pm_mcu_states.h" -+#undef KBASEP_MCU_STATE -+ } -+#endif ++ WARN(true, "%s Not implemented for JM GPUs", __func__); +} + -+static inline bool kbase_pm_handle_mcu_core_attr_update(struct kbase_device *kbdev) ++void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev) +{ -+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; -+ bool timer_update; -+ bool core_mask_update; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ WARN_ON(backend->mcu_state != KBASE_MCU_ON); -+ -+ /* This function is only for cases where the MCU managing Cores, if -+ * the firmware mode is with host control, do nothing here. -+ */ -+ if (unlikely(kbdev->csf.firmware_hctl_core_pwr)) -+ return false; -+ -+ core_mask_update = -+ backend->shaders_avail != backend->shaders_desired_mask; -+ -+ timer_update = kbdev->csf.mcu_core_pwroff_dur_count != -+ kbdev->csf.mcu_core_pwroff_reg_shadow; -+ -+ if (core_mask_update || timer_update) -+ kbase_csf_firmware_update_core_attr(kbdev, timer_update, -+ core_mask_update, backend->shaders_desired_mask); -+ -+ return (core_mask_update || timer_update); ++ WARN(true, "%s Not implemented for JM GPUs", __func__); +} + -+bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, -+ enum kbase_mcu_state state) ++void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ return ((state == KBASE_MCU_OFF) || (state == KBASE_MCU_IN_SLEEP)); ++ WARN(true, "%s Not implemented for JM GPUs", __func__); +} + -+#ifdef KBASE_PM_RUNTIME -+/** -+ * kbase_pm_enable_mcu_db_notification - Enable the Doorbell notification on -+ * MCU side -+ * -+ * @kbdev: Pointer to the device. -+ * -+ * This function is called to re-enable the Doorbell notification on MCU side -+ * when MCU needs to beome active again. -+ */ -+static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev) ++static void kbase_debug_dump_registers(struct kbase_device *kbdev) +{ -+ u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_CONTROL)); ++ int i; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ kbase_io_history_dump(kbdev); + -+ val &= ~MCU_CNTRL_DOORBELL_DISABLE_MASK; -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), val); ++ dev_err(kbdev->dev, "Register state:"); ++ dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS))); ++ dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x", ++ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)), ++ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE))); ++ for (i = 0; i < 3; i++) { ++ dev_err(kbdev->dev, " JS%d_STATUS=0x%08x JS%d_HEAD_LO=0x%08x", ++ i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS)), ++ i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO))); ++ } ++ dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", ++ kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS))); ++ dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)), ++ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)), ++ kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK))); ++ dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1))); ++ dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG))); ++ dev_err(kbdev->dev, " TILER_CONFIG=0x%08x JM_CONFIG=0x%08x", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG))); +} + -+/** -+ * wait_mcu_as_inactive - Wait for AS used by MCU FW to get configured -+ * -+ * @kbdev: Pointer to the device. -+ * -+ * This function is called to wait for the AS used by MCU FW to get configured -+ * before DB notification on MCU is enabled, as a workaround for HW issue. -+ */ -+static void wait_mcu_as_inactive(struct kbase_device *kbdev) ++static void kbasep_reset_timeout_worker(struct work_struct *data) +{ -+ unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_2716)) -+ return; ++ unsigned long flags; ++ struct kbase_device *kbdev; ++ ktime_t end_timestamp = ktime_get_raw(); ++ struct kbasep_js_device_data *js_devdata; ++ bool silent = false; ++ u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; + -+ /* Wait for the AS_ACTIVE_INT bit to become 0 for the AS used by MCU FW */ -+ while (--max_loops && -+ kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) & -+ AS_STATUS_AS_ACTIVE_INT) -+ ; ++ kbdev = container_of(data, struct kbase_device, ++ hwaccess.backend.reset_work); + -+ if (!WARN_ON_ONCE(max_loops == 0)) -+ return; ++ js_devdata = &kbdev->js_data; + -+ dev_err(kbdev->dev, "AS_ACTIVE_INT bit stuck for AS %d used by MCU FW", MCU_AS_NR); ++ if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == ++ KBASE_RESET_GPU_SILENT) ++ silent = true; + -+ if (kbase_prepare_to_reset_gpu(kbdev, 0)) -+ kbase_reset_gpu(kbdev); -+} -+#endif ++ if (kbase_is_quick_reset_enabled(kbdev)) ++ silent = true; + -+/** -+ * kbasep_pm_toggle_power_interrupt - Toggles the IRQ mask for power interrupts -+ * from the firmware -+ * -+ * @kbdev: Pointer to the device -+ * @enable: boolean indicating to enable interrupts or not -+ * -+ * The POWER_CHANGED_ALL interrupt can be disabled after L2 has been turned on -+ * when FW is controlling the power for the shader cores. Correspondingly, the -+ * interrupts can be re-enabled after the MCU has been disabled before the -+ * power down of L2. -+ */ -+static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool enable) -+{ -+ u32 irq_mask; ++ KBASE_KTRACE_ADD_JM(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ /* Disable GPU hardware counters. ++ * This call will block until counters are disabled. ++ */ ++ kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + -+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); ++ /* Make sure the timer has completed - this cannot be done from ++ * interrupt context, so this cannot be done within ++ * kbasep_try_reset_gpu_early. ++ */ ++ hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); + -+ if (enable) { -+ irq_mask |= POWER_CHANGED_ALL; -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), POWER_CHANGED_ALL); -+ } else { -+ irq_mask &= ~POWER_CHANGED_ALL; ++ if (kbase_pm_context_active_handle_suspend(kbdev, ++ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { ++ /* This would re-activate the GPU. Since it's already idle, ++ * there's no need to reset it ++ */ ++ atomic_set(&kbdev->hwaccess.backend.reset_gpu, ++ KBASE_RESET_GPU_NOT_PENDING); ++ kbase_disjoint_state_down(kbdev); ++ wake_up(&kbdev->hwaccess.backend.reset_wait); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return; + } + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask); -+} ++ WARN(kbdev->irq_reset_flush, "%s: GPU reset already in flight\n", __func__); + -+static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) -+{ -+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; -+ enum kbase_mcu_state prev_state; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ spin_lock(&kbdev->mmu_mask_change); ++ kbase_pm_reset_start_locked(kbdev); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ /* We're about to flush out the IRQs and their bottom half's */ ++ kbdev->irq_reset_flush = true; + -+ /* -+ * Initial load of firmware should have been done to -+ * exercise the MCU state machine. ++ /* Disable IRQ to avoid IRQ handlers to kick in after releasing the ++ * spinlock; this also clears any outstanding interrupts + */ -+ if (unlikely(!kbdev->csf.firmware_inited)) { -+ WARN_ON(backend->mcu_state != KBASE_MCU_OFF); -+ return 0; -+ } ++ kbase_pm_disable_interrupts_nolock(kbdev); + -+ do { -+ u64 shaders_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_SHADER); -+ u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); ++ spin_unlock(&kbdev->mmu_mask_change); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ /* mask off ready from trans in case transitions finished -+ * between the register reads -+ */ -+ shaders_trans &= ~shaders_ready; ++ /* Ensure that any IRQ handlers have finished ++ * Must be done without any locks IRQ handlers will take ++ */ ++ kbase_synchronize_irqs(kbdev); + -+ prev_state = backend->mcu_state; ++ /* Flush out any in-flight work items */ ++ kbase_flush_mmu_wqs(kbdev); + -+ switch (backend->mcu_state) { -+ case KBASE_MCU_OFF: -+ if (kbase_pm_is_mcu_desired(kbdev) && -+ !backend->policy_change_clamp_state_to_off && -+ backend->l2_state == KBASE_L2_ON) { -+ kbase_csf_firmware_trigger_reload(kbdev); -+ backend->mcu_state = KBASE_MCU_PEND_ON_RELOAD; -+ } -+ break; ++ /* The flush has completed so reset the active indicator */ ++ kbdev->irq_reset_flush = false; + -+ case KBASE_MCU_PEND_ON_RELOAD: -+ if (kbdev->csf.firmware_reloaded) { -+ backend->shaders_desired_mask = -+ kbase_pm_ca_get_core_mask(kbdev); -+ kbase_csf_firmware_global_reinit(kbdev, -+ backend->shaders_desired_mask); -+ if (!kbdev->csf.firmware_hctl_core_pwr) -+ kbasep_pm_toggle_power_interrupt(kbdev, false); -+ backend->mcu_state = -+ KBASE_MCU_ON_GLB_REINIT_PEND; -+ } -+ break; ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) { ++ /* Ensure that L2 is not transitioning when we send the reset ++ * command ++ */ ++ while (--max_loops && kbase_pm_get_trans_cores(kbdev, ++ KBASE_PM_CORE_L2)) ++ ; + -+ case KBASE_MCU_ON_GLB_REINIT_PEND: -+ if (kbase_csf_firmware_global_reinit_complete(kbdev)) { -+ backend->shaders_avail = -+ backend->shaders_desired_mask; -+ backend->pm_shaders_core_mask = 0; -+ if (kbdev->csf.firmware_hctl_core_pwr) { -+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, -+ backend->shaders_avail, ACTION_PWRON); -+ backend->mcu_state = -+ KBASE_MCU_HCTL_SHADERS_PEND_ON; -+ } else -+ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; -+#if IS_ENABLED(CONFIG_MALI_CORESIGHT) -+ if (kbase_debug_coresight_csf_state_check( -+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) { -+ kbase_debug_coresight_csf_state_request( -+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED); -+ backend->mcu_state = KBASE_MCU_CORESIGHT_ENABLE; -+ } else if (kbase_debug_coresight_csf_state_check( -+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) { -+ backend->mcu_state = KBASE_MCU_CORESIGHT_ENABLE; -+ } -+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ -+ } -+ break; ++ WARN(!max_loops, "L2 power transition timed out while trying to reset\n"); ++ } + -+ case KBASE_MCU_HCTL_SHADERS_PEND_ON: -+ if (!shaders_trans && -+ shaders_ready == backend->shaders_avail) { -+ /* Cores now stable, notify MCU the stable mask */ -+ kbase_csf_firmware_update_core_attr(kbdev, -+ false, true, shaders_ready); ++ mutex_lock(&kbdev->pm.lock); ++ /* We hold the pm lock, so there ought to be a current policy */ ++ if (unlikely(!kbdev->pm.backend.pm_current_policy)) ++ dev_warn(kbdev->dev, "No power policy set!"); + -+ backend->pm_shaders_core_mask = shaders_ready; -+ backend->mcu_state = -+ KBASE_MCU_HCTL_CORES_NOTIFY_PEND; -+ } -+ break; ++ /* All slot have been soft-stopped and we've waited ++ * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we ++ * assume that anything that is still left on the GPU is stuck there and ++ * we'll kill it when we reset the GPU ++ */ + -+ case KBASE_MCU_HCTL_CORES_NOTIFY_PEND: -+ /* Wait for the acknowledgement */ -+ if (kbase_csf_firmware_core_attr_updated(kbdev)) -+ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; -+ break; ++ if (!silent) ++ dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", ++ RESET_TIMEOUT); + -+ case KBASE_MCU_ON_HWCNT_ENABLE: -+ backend->hwcnt_desired = true; -+ if (backend->hwcnt_disabled) { -+ unsigned long flags; ++ /* Output the state of some interesting registers to help in the ++ * debugging of GPU resets ++ */ ++ if (!silent) ++ kbase_debug_dump_registers(kbdev); + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+ backend->hwcnt_disabled = false; -+ } -+ backend->mcu_state = KBASE_MCU_ON; -+ break; ++ /* Complete any jobs that were still on the GPU */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->protected_mode = false; ++ if (!kbdev->pm.backend.protected_entry_transition_override) ++ kbase_backend_reset(kbdev, &end_timestamp); ++ kbase_pm_metrics_update(kbdev, NULL); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ case KBASE_MCU_ON: -+ backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev); ++ /* Tell hardware counters a reset is about to occur. ++ * If the instr backend is in an unrecoverable error state (e.g. due to ++ * HW being unresponsive), this will transition the backend out of ++ * it, on the assumption a reset will fix whatever problem there was. ++ */ ++ kbase_instr_hwcnt_on_before_reset(kbdev); + -+ if (!kbase_pm_is_mcu_desired(kbdev)) -+ backend->mcu_state = KBASE_MCU_ON_HWCNT_DISABLE; -+ else if (kbdev->csf.firmware_hctl_core_pwr) { -+ /* Host control scale up/down cores as needed */ -+ if (backend->shaders_desired_mask != shaders_ready) { -+ backend->hwcnt_desired = false; -+ if (!backend->hwcnt_disabled) -+ kbase_pm_trigger_hwcnt_disable(kbdev); -+ backend->mcu_state = -+ KBASE_MCU_HCTL_MCU_ON_RECHECK; -+ } -+ } else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) -+ backend->mcu_state = KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND; -+#if IS_ENABLED(CONFIG_MALI_CORESIGHT) -+ else if (kbdev->csf.coresight.disable_on_pmode_enter) { -+ kbase_debug_coresight_csf_state_request( -+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED); -+ backend->mcu_state = KBASE_MCU_ON_PMODE_ENTER_CORESIGHT_DISABLE; -+ } else if (kbdev->csf.coresight.enable_on_pmode_exit) { -+ kbase_debug_coresight_csf_state_request( -+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED); -+ backend->mcu_state = KBASE_MCU_ON_PMODE_EXIT_CORESIGHT_ENABLE; -+ } -+#endif -+ break; ++ /* Reset the GPU */ ++ kbase_pm_init_hw(kbdev, 0); + -+ case KBASE_MCU_HCTL_MCU_ON_RECHECK: -+ backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev); ++ mutex_unlock(&kbdev->pm.lock); + -+ if (!backend->hwcnt_disabled) { -+ /* Wait for being disabled */ -+ ; -+ } else if (!kbase_pm_is_mcu_desired(kbdev)) { -+ /* Converging to MCU powering down flow */ -+ backend->mcu_state = KBASE_MCU_ON_HWCNT_DISABLE; -+ } else if (backend->shaders_desired_mask & ~shaders_ready) { -+ /* set cores ready but not available to -+ * meet SHADERS_PEND_ON check pass -+ */ -+ backend->shaders_avail = -+ (backend->shaders_desired_mask | shaders_ready); ++ mutex_lock(&js_devdata->runpool_mutex); + -+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, -+ backend->shaders_avail & ~shaders_ready, -+ ACTION_PWRON); -+ backend->mcu_state = -+ KBASE_MCU_HCTL_SHADERS_PEND_ON; ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_ctx_sched_restore_all_as(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); + -+ } else if (~backend->shaders_desired_mask & shaders_ready) { -+ kbase_csf_firmware_update_core_attr(kbdev, false, true, -+ backend->shaders_desired_mask); -+ backend->mcu_state = KBASE_MCU_HCTL_CORES_DOWN_SCALE_NOTIFY_PEND; -+ } else { -+ backend->mcu_state = -+ KBASE_MCU_HCTL_SHADERS_PEND_ON; -+ } -+ break; ++ kbase_pm_enable_interrupts(kbdev); + -+ case KBASE_MCU_HCTL_CORES_DOWN_SCALE_NOTIFY_PEND: -+ if (kbase_csf_firmware_core_attr_updated(kbdev)) { -+ /* wait in queue until cores idle */ -+ queue_work(backend->core_idle_wq, &backend->core_idle_work); -+ backend->mcu_state = KBASE_MCU_HCTL_CORE_INACTIVE_PEND; -+ } -+ break; ++ kbase_disjoint_state_down(kbdev); + -+ case KBASE_MCU_HCTL_CORE_INACTIVE_PEND: -+ { -+ u64 active_cores = kbase_pm_get_active_cores( -+ kbdev, -+ KBASE_PM_CORE_SHADER); -+ u64 cores_to_disable = shaders_ready & -+ ~backend->shaders_desired_mask; ++ mutex_unlock(&js_devdata->runpool_mutex); + -+ if (!(cores_to_disable & active_cores)) { -+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, -+ cores_to_disable, -+ ACTION_PWROFF); -+ backend->shaders_avail = backend->shaders_desired_mask; -+ backend->mcu_state = KBASE_MCU_HCTL_SHADERS_CORE_OFF_PEND; -+ } -+ } -+ break; ++ mutex_lock(&kbdev->pm.lock); + -+ case KBASE_MCU_HCTL_SHADERS_CORE_OFF_PEND: -+ if (!shaders_trans && shaders_ready == backend->shaders_avail) { -+ /* Cores now stable */ -+ backend->pm_shaders_core_mask = shaders_ready; -+ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; -+ } -+ break; ++ kbase_pm_reset_complete(kbdev); + -+ case KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND: -+ if (kbase_csf_firmware_core_attr_updated(kbdev)) { -+ backend->shaders_avail = backend->shaders_desired_mask; -+ backend->mcu_state = KBASE_MCU_ON; -+ } -+ break; ++ /* Find out what cores are required now */ ++ kbase_pm_update_cores_state(kbdev); + -+ case KBASE_MCU_ON_HWCNT_DISABLE: -+ if (kbase_pm_is_mcu_desired(kbdev)) { -+ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; -+ break; -+ } ++ /* Synchronously request and wait for those cores, because if ++ * instrumentation is enabled it would need them immediately. ++ */ ++ kbase_pm_wait_for_desired_state(kbdev); + -+ backend->hwcnt_desired = false; -+ if (!backend->hwcnt_disabled) -+ kbase_pm_trigger_hwcnt_disable(kbdev); ++ mutex_unlock(&kbdev->pm.lock); + ++ atomic_set(&kbdev->hwaccess.backend.reset_gpu, ++ KBASE_RESET_GPU_NOT_PENDING); + -+ if (backend->hwcnt_disabled) { -+#ifdef KBASE_PM_RUNTIME -+ if (backend->gpu_sleep_mode_active) -+ backend->mcu_state = KBASE_MCU_ON_SLEEP_INITIATE; -+ else { -+#endif -+ backend->mcu_state = KBASE_MCU_ON_HALT; -+#if IS_ENABLED(CONFIG_MALI_CORESIGHT) -+ kbase_debug_coresight_csf_state_request( -+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED); -+ backend->mcu_state = KBASE_MCU_CORESIGHT_DISABLE; -+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ -+ } -+ } -+ break; ++ wake_up(&kbdev->hwaccess.backend.reset_wait); ++ if (!silent) ++ dev_err(kbdev->dev, "Reset complete"); + -+#if IS_ENABLED(CONFIG_MALI_CORESIGHT) -+ case KBASE_MCU_ON_PMODE_ENTER_CORESIGHT_DISABLE: -+ if (kbase_debug_coresight_csf_state_check( -+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) { -+ backend->mcu_state = KBASE_MCU_ON; -+ kbdev->csf.coresight.disable_on_pmode_enter = false; -+ } -+ break; -+ case KBASE_MCU_ON_PMODE_EXIT_CORESIGHT_ENABLE: -+ if (kbase_debug_coresight_csf_state_check( -+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) { -+ backend->mcu_state = KBASE_MCU_ON; -+ kbdev->csf.coresight.enable_on_pmode_exit = false; -+ } -+ break; -+ case KBASE_MCU_CORESIGHT_DISABLE: -+ if (kbase_debug_coresight_csf_state_check( -+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) -+ backend->mcu_state = KBASE_MCU_ON_HALT; -+ break; ++ /* Try submitting some jobs to restart processing */ ++ KBASE_KTRACE_ADD_JM(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, 0); ++ kbase_js_sched_all(kbdev); + -+ case KBASE_MCU_CORESIGHT_ENABLE: -+ if (kbase_debug_coresight_csf_state_check( -+ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) -+ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; -+ break; -+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ ++ /* Process any pending slot updates */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_backend_slot_update(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ case KBASE_MCU_ON_HALT: -+ if (!kbase_pm_is_mcu_desired(kbdev)) { -+ kbase_csf_firmware_trigger_mcu_halt(kbdev); -+ backend->mcu_state = KBASE_MCU_ON_PEND_HALT; -+ } else -+ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; -+ break; ++ kbase_pm_context_idle(kbdev); + -+ case KBASE_MCU_ON_PEND_HALT: -+ if (kbase_csf_firmware_mcu_halted(kbdev)) { -+ KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_HALTED, NULL, -+ kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); -+ if (kbdev->csf.firmware_hctl_core_pwr) -+ backend->mcu_state = -+ KBASE_MCU_HCTL_SHADERS_READY_OFF; -+ else -+ backend->mcu_state = KBASE_MCU_POWER_DOWN; -+ } -+ break; ++ /* Re-enable GPU hardware counters */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ case KBASE_MCU_HCTL_SHADERS_READY_OFF: -+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, -+ shaders_ready, ACTION_PWROFF); -+ backend->mcu_state = -+ KBASE_MCU_HCTL_SHADERS_PEND_OFF; -+ break; ++ KBASE_KTRACE_ADD_JM(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0); ++} + -+ case KBASE_MCU_HCTL_SHADERS_PEND_OFF: -+ if (!shaders_trans && !shaders_ready) { -+ backend->pm_shaders_core_mask = 0; -+ backend->mcu_state = KBASE_MCU_POWER_DOWN; -+ } -+ break; ++static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) ++{ ++ struct kbase_device *kbdev = container_of(timer, struct kbase_device, ++ hwaccess.backend.reset_timer); + -+ case KBASE_MCU_POWER_DOWN: -+ kbase_csf_firmware_disable_mcu(kbdev); -+ backend->mcu_state = KBASE_MCU_PEND_OFF; -+ break; ++ /* Reset still pending? */ ++ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, ++ KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) == ++ KBASE_RESET_GPU_COMMITTED) ++ queue_work(kbdev->hwaccess.backend.reset_workq, ++ &kbdev->hwaccess.backend.reset_work); + -+ case KBASE_MCU_PEND_OFF: -+ /* wait synchronously for the MCU to get disabled */ -+ kbase_csf_firmware_disable_mcu_wait(kbdev); -+ if (!kbdev->csf.firmware_hctl_core_pwr) -+ kbasep_pm_toggle_power_interrupt(kbdev, true); -+ backend->mcu_state = KBASE_MCU_OFF; -+ break; -+#ifdef KBASE_PM_RUNTIME -+ case KBASE_MCU_ON_SLEEP_INITIATE: -+ if (!kbase_pm_is_mcu_desired(kbdev)) { -+ kbase_csf_firmware_trigger_mcu_sleep(kbdev); -+ backend->mcu_state = KBASE_MCU_ON_PEND_SLEEP; -+ } else -+ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; -+ break; ++ return HRTIMER_NORESTART; ++} + -+ case KBASE_MCU_ON_PEND_SLEEP: -+ if (kbase_csf_firmware_is_mcu_in_sleep(kbdev)) { -+ KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_SLEEP, NULL, -+ kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); -+ backend->mcu_state = KBASE_MCU_IN_SLEEP; -+ kbase_pm_enable_db_mirror_interrupt(kbdev); -+ kbase_csf_scheduler_reval_idleness_post_sleep(kbdev); -+ /* Enable PM interrupt, after MCU has been put -+ * to sleep, for the power down of L2. -+ */ -+ if (!kbdev->csf.firmware_hctl_core_pwr) -+ kbasep_pm_toggle_power_interrupt(kbdev, true); -+ } -+ break; ++/* ++ * If all jobs are evicted from the GPU then we can reset the GPU ++ * immediately instead of waiting for the timeout to elapse ++ */ + -+ case KBASE_MCU_IN_SLEEP: -+ if (kbase_pm_is_mcu_desired(kbdev) && -+ backend->l2_state == KBASE_L2_ON) { -+ wait_mcu_as_inactive(kbdev); -+ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP( -+ kbdev, kbase_backend_get_cycle_cnt(kbdev)); -+ kbase_pm_enable_mcu_db_notification(kbdev); -+ kbase_pm_disable_db_mirror_interrupt(kbdev); -+ /* Disable PM interrupt after L2 has been -+ * powered up for the wakeup of MCU. -+ */ -+ if (!kbdev->csf.firmware_hctl_core_pwr) -+ kbasep_pm_toggle_power_interrupt(kbdev, false); -+ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); -+ } -+ break; -+#endif -+ case KBASE_MCU_RESET_WAIT: -+ /* Reset complete */ -+ if (!backend->in_reset) -+ backend->mcu_state = KBASE_MCU_OFF; ++static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) ++{ ++ unsigned int i; ++ int pending_jobs = 0; + -+#if IS_ENABLED(CONFIG_MALI_CORESIGHT) -+ kbdev->csf.coresight.disable_on_pmode_enter = false; -+ kbdev->csf.coresight.enable_on_pmode_exit = false; -+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ -+ break; ++ /* Count the number of jobs */ ++ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) ++ pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i); + -+ default: -+ WARN(1, "Invalid state in mcu_state: %d", -+ backend->mcu_state); -+ } ++ if (pending_jobs > 0) { ++ /* There are still jobs on the GPU - wait */ ++ return; ++ } + -+ if (backend->mcu_state != prev_state) { -+ dev_dbg(kbdev->dev, "MCU state transition: %s to %s\n", -+ kbase_mcu_state_to_string(prev_state), -+ kbase_mcu_state_to_string(backend->mcu_state)); -+ kbase_ktrace_log_mcu_state(kbdev, backend->mcu_state); -+ } ++ /* To prevent getting incorrect registers when dumping failed job, ++ * skip early reset. ++ */ ++ if (atomic_read(&kbdev->job_fault_debug) > 0) ++ return; + -+ } while (backend->mcu_state != prev_state); ++ /* Check that the reset has been committed to (i.e. kbase_reset_gpu has ++ * been called), and that no other thread beat this thread to starting ++ * the reset ++ */ ++ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, ++ KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) != ++ KBASE_RESET_GPU_COMMITTED) { ++ /* Reset has already occurred */ ++ return; ++ } + -+ return 0; ++ queue_work(kbdev->hwaccess.backend.reset_workq, ++ &kbdev->hwaccess.backend.reset_work); +} + -+static void core_idle_worker(struct work_struct *work) ++static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev = -+ container_of(work, struct kbase_device, pm.backend.core_idle_work); -+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ while (backend->gpu_powered && (backend->mcu_state == KBASE_MCU_HCTL_CORE_INACTIVE_PEND)) { -+ const unsigned int core_inactive_wait_ms = 1; -+ u64 active_cores = kbase_pm_get_active_cores(kbdev, KBASE_PM_CORE_SHADER); -+ u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); -+ u64 cores_to_disable = shaders_ready & ~backend->shaders_desired_mask; ++ kbasep_try_reset_gpu_early_locked(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} + -+ if (!(cores_to_disable & active_cores)) { -+ kbase_pm_update_state(kbdev); -+ break; -+ } ++/** ++ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU ++ * @kbdev: kbase device ++ * @flags: Bitfield indicating impact of reset (see flag defines) ++ * ++ * This function soft-stops all the slots to ensure that as many jobs as ++ * possible are saved. ++ * ++ * Return: boolean which should be interpreted as follows: ++ * true - Prepared for reset, kbase_reset_gpu_locked should be called. ++ * false - Another thread is performing a reset, kbase_reset_gpu should ++ * not be called. ++ */ ++bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, ++ unsigned int flags) ++{ ++ int i; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ msleep(core_inactive_wait_ms); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (kbase_pm_is_gpu_lost(kbdev)) { ++ /* GPU access has been removed, reset will be done by ++ * Arbiter instead ++ */ ++ return false; + } -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} +#endif + -+static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state) -+{ -+ const char *const strings[] = { -+#define KBASEP_L2_STATE(n) #n, -+#include "mali_kbase_pm_l2_states.h" -+#undef KBASEP_L2_STATE -+ }; -+ if (WARN_ON((size_t)state >= ARRAY_SIZE(strings))) -+ return "Bad level 2 cache state"; -+ else -+ return strings[state]; ++ if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR) ++ kbase_instr_hwcnt_on_unrecoverable_error(kbdev); ++ ++ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, ++ KBASE_RESET_GPU_NOT_PENDING, ++ KBASE_RESET_GPU_PREPARED) != ++ KBASE_RESET_GPU_NOT_PENDING) { ++ /* Some other thread is already resetting the GPU */ ++ return false; ++ } ++ ++ kbase_disjoint_state_up(kbdev); ++ ++ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) ++ kbase_job_slot_softstop(kbdev, i, NULL); ++ ++ return true; +} + -+static -+void kbase_ktrace_log_l2_core_state(struct kbase_device *kbdev, enum kbase_l2_core_state state) ++bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags) +{ -+#if KBASE_KTRACE_ENABLE -+ switch (state) { -+#define KBASEP_L2_STATE(n) \ -+ case KBASE_L2_ ## n: \ -+ KBASE_KTRACE_ADD(kbdev, PM_L2_ ## n, NULL, state); \ -+ break; -+#include "mali_kbase_pm_l2_states.h" -+#undef KBASEP_L2_STATE -+ } -+#endif ++ unsigned long lock_flags; ++ bool ret; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, lock_flags); ++ ret = kbase_prepare_to_reset_gpu_locked(kbdev, flags); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, lock_flags); ++ ++ return ret; +} ++KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu); + -+#if !MALI_USE_CSF -+/* On powering on the L2, the tracked kctx becomes stale and can be cleared. -+ * This enables the backend to spare the START_FLUSH.INV_SHADER_OTHER -+ * operation on the first submitted katom after the L2 powering on. ++/* ++ * This function should be called after kbase_prepare_to_reset_gpu if it ++ * returns true. It should never be called without a corresponding call to ++ * kbase_prepare_to_reset_gpu. ++ * ++ * After this function is called (or not called if kbase_prepare_to_reset_gpu ++ * returned false), the caller should wait for ++ * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset ++ * has completed. + */ -+static void kbase_pm_l2_clear_backend_slot_submit_kctx(struct kbase_device *kbdev) ++void kbase_reset_gpu(struct kbase_device *kbdev) +{ -+ int js; ++ /* Note this is an assert/atomic_set because it is a software issue for ++ * a race to be occurring here ++ */ ++ if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED)) ++ return; ++ atomic_set(&kbdev->hwaccess.backend.reset_gpu, ++ KBASE_RESET_GPU_COMMITTED); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (!kbase_is_quick_reset_enabled(kbdev)) ++ dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", ++ kbdev->reset_timeout_ms); + -+ /* Clear the slots' last katom submission kctx */ -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) -+ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_NULL_TAG_VAL; ++ hrtimer_start(&kbdev->hwaccess.backend.reset_timer, ++ HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), ++ HRTIMER_MODE_REL); ++ ++ /* Try resetting early */ ++ kbasep_try_reset_gpu_early(kbdev); +} -+#endif ++KBASE_EXPORT_TEST_API(kbase_reset_gpu); + -+static bool can_power_down_l2(struct kbase_device *kbdev) ++void kbase_reset_gpu_locked(struct kbase_device *kbdev) +{ -+#if MALI_USE_CSF -+ /* Due to the HW issue GPU2019-3878, need to prevent L2 power off -+ * whilst MMU command is in progress. -+ * Also defer the power-down if MMU is in process of page migration. ++ /* Note this is an assert/atomic_set because it is a software issue for ++ * a race to be occurring here + */ -+ return !kbdev->mmu_hw_operation_in_progress && !kbdev->mmu_page_migrate_in_progress; -+#else -+ return !kbdev->mmu_page_migrate_in_progress; -+#endif ++ if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED)) ++ return; ++ atomic_set(&kbdev->hwaccess.backend.reset_gpu, ++ KBASE_RESET_GPU_COMMITTED); ++ ++ if (!kbase_is_quick_reset_enabled(kbdev)) ++ dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", ++ kbdev->reset_timeout_ms); ++ hrtimer_start(&kbdev->hwaccess.backend.reset_timer, ++ HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), ++ HRTIMER_MODE_REL); ++ ++ /* Try resetting early */ ++ kbasep_try_reset_gpu_early_locked(kbdev); +} + -+static bool can_power_up_l2(struct kbase_device *kbdev) ++int kbase_reset_gpu_silent(struct kbase_device *kbdev) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, ++ KBASE_RESET_GPU_NOT_PENDING, ++ KBASE_RESET_GPU_SILENT) != ++ KBASE_RESET_GPU_NOT_PENDING) { ++ /* Some other thread is already resetting the GPU */ ++ return -EAGAIN; ++ } + -+ /* Avoiding l2 transition if MMU is undergoing page migration */ -+ return !kbdev->mmu_page_migrate_in_progress; ++ kbase_disjoint_state_up(kbdev); ++ ++ queue_work(kbdev->hwaccess.backend.reset_workq, ++ &kbdev->hwaccess.backend.reset_work); ++ ++ return 0; +} + -+static bool need_tiler_control(struct kbase_device *kbdev) ++bool kbase_reset_gpu_is_active(struct kbase_device *kbdev) +{ -+#if MALI_USE_CSF -+ if (kbase_pm_no_mcu_core_pwroff(kbdev)) -+ return true; -+ else ++ if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == ++ KBASE_RESET_GPU_NOT_PENDING) + return false; -+#else ++ + return true; -+#endif +} + -+static int kbase_pm_l2_update_state(struct kbase_device *kbdev) ++bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev) +{ -+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; -+ u64 l2_present = kbdev->gpu_props.curr_config.l2_present; -+ u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present; -+ bool l2_power_up_done; -+ enum kbase_l2_core_state prev_state; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ return atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_NOT_PENDING; ++} + -+ do { -+ /* Get current state */ -+ u64 l2_trans = kbase_pm_get_trans_cores(kbdev, -+ KBASE_PM_CORE_L2); -+ u64 l2_ready = kbase_pm_get_ready_cores(kbdev, -+ KBASE_PM_CORE_L2); ++int kbase_reset_gpu_wait(struct kbase_device *kbdev) ++{ ++ wait_event(kbdev->hwaccess.backend.reset_wait, ++ atomic_read(&kbdev->hwaccess.backend.reset_gpu) ++ == KBASE_RESET_GPU_NOT_PENDING); + -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ /* -+ * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores -+ * are vulnerable to corruption if gpu is lost -+ */ -+ if (kbase_is_gpu_removed(kbdev) || kbase_pm_is_gpu_lost(kbdev)) { -+ backend->shaders_state = -+ KBASE_SHADERS_OFF_CORESTACK_OFF; -+ backend->hwcnt_desired = false; -+ if (!backend->hwcnt_disabled) { -+ /* Don't progress until hw counters are disabled -+ * This may involve waiting for a worker to complete. -+ * The HW counters backend disable code checks for the -+ * GPU removed case and will error out without touching -+ * the hardware. This step is needed to keep the HW -+ * counters in a consistent state after a GPU lost. -+ */ -+ backend->l2_state = -+ KBASE_L2_ON_HWCNT_DISABLE; -+ KBASE_KTRACE_ADD(kbdev, PM_L2_ON_HWCNT_DISABLE, NULL, -+ backend->l2_state); -+ kbase_pm_trigger_hwcnt_disable(kbdev); -+ } ++ return 0; ++} ++KBASE_EXPORT_TEST_API(kbase_reset_gpu_wait); + -+ if (backend->hwcnt_disabled) { -+ backend->l2_state = KBASE_L2_OFF; -+ KBASE_KTRACE_ADD(kbdev, PM_L2_OFF, NULL, backend->l2_state); -+ dev_dbg(kbdev->dev, "GPU lost has occurred - L2 off\n"); -+ } -+ break; -+ } -+#endif ++int kbase_reset_gpu_init(struct kbase_device *kbdev) ++{ ++ kbdev->hwaccess.backend.reset_workq = alloc_workqueue( ++ "Mali reset workqueue", 0, 1); ++ if (kbdev->hwaccess.backend.reset_workq == NULL) ++ return -ENOMEM; + -+ /* mask off ready from trans in case transitions finished -+ * between the register reads -+ */ -+ l2_trans &= ~l2_ready; ++ INIT_WORK(&kbdev->hwaccess.backend.reset_work, ++ kbasep_reset_timeout_worker); + -+ prev_state = backend->l2_state; ++ hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_REL); ++ kbdev->hwaccess.backend.reset_timer.function = ++ kbasep_reset_timer_callback; + -+ switch (backend->l2_state) { -+ case KBASE_L2_OFF: -+ if (kbase_pm_is_l2_desired(kbdev) && can_power_up_l2(kbdev)) { -+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) -+ /* Enable HW timer of IPA control before -+ * L2 cache is powered-up. -+ */ -+ kbase_ipa_control_handle_gpu_sleep_exit(kbdev); -+#endif -+ /* -+ * Set the desired config for L2 before -+ * powering it on -+ */ -+ kbase_pm_l2_config_override(kbdev); -+ kbase_pbha_write_settings(kbdev); ++ return 0; ++} + -+ /* If Host is controlling the power for shader -+ * cores, then it also needs to control the -+ * power for Tiler. -+ * Powering on the tiler will also power the -+ * L2 cache. -+ */ -+ if (need_tiler_control(kbdev)) { -+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_present, -+ ACTION_PWRON); -+ } else { -+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_present, -+ ACTION_PWRON); -+ } -+#if !MALI_USE_CSF -+ /* If we have more than one L2 cache then we -+ * must power them on explicitly. -+ */ -+ if (l2_present != 1) -+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, -+ l2_present & ~1, -+ ACTION_PWRON); -+ /* Clear backend slot submission kctx */ -+ kbase_pm_l2_clear_backend_slot_submit_kctx(kbdev); -+#endif -+ backend->l2_state = KBASE_L2_PEND_ON; -+ } -+ break; ++void kbase_reset_gpu_term(struct kbase_device *kbdev) ++{ ++ destroy_workqueue(kbdev->hwaccess.backend.reset_workq); ++} + -+ case KBASE_L2_PEND_ON: -+ l2_power_up_done = false; -+ if (!l2_trans && l2_ready == l2_present) { -+ if (need_tiler_control(kbdev)) { -+ u64 tiler_trans = kbase_pm_get_trans_cores( -+ kbdev, KBASE_PM_CORE_TILER); -+ u64 tiler_ready = kbase_pm_get_ready_cores( -+ kbdev, KBASE_PM_CORE_TILER); -+ tiler_trans &= ~tiler_ready; ++static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, ++ const u64 affinity, const u64 limited_core_mask) ++{ ++ const u64 result = affinity & limited_core_mask; + -+ if (!tiler_trans && tiler_ready == tiler_present) { -+ KBASE_KTRACE_ADD(kbdev, -+ PM_CORES_CHANGE_AVAILABLE_TILER, -+ NULL, tiler_ready); -+ l2_power_up_done = true; -+ } -+ } else { -+ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, -+ l2_ready); -+ l2_power_up_done = true; -+ } -+ } -+ if (l2_power_up_done) { -+ /* -+ * Ensure snoops are enabled after L2 is powered -+ * up. Note that kbase keeps track of the snoop -+ * state, so safe to repeatedly call. -+ */ -+ kbase_pm_cache_snoop_enable(kbdev); ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ dev_dbg(kbdev->dev, ++ "Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK from 0x%lx to 0x%lx (mask is 0x%lx)\n", ++ (unsigned long)affinity, ++ (unsigned long)result, ++ (unsigned long)limited_core_mask); ++#else ++ CSTD_UNUSED(kbdev); ++#endif + -+ /* With the L2 enabled, we can now enable -+ * hardware counters. -+ */ -+ if (kbdev->pm.backend.gpu_clock_slow_down_wa) -+ backend->l2_state = -+ KBASE_L2_RESTORE_CLOCKS; -+ else -+ backend->l2_state = -+ KBASE_L2_ON_HWCNT_ENABLE; ++ return result; ++} +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h +new file mode 100644 +index 000000000..bfd55a6e2 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h +@@ -0,0 +1,148 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2011-2016, 2018-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ /* Now that the L2 is on, the shaders can start -+ * powering on if they're required. The obvious -+ * way to do this would be to call -+ * kbase_pm_shaders_update_state() here. -+ * However, that would make the two state -+ * machines mutually recursive, as the opposite -+ * would be needed for powering down. Instead, -+ * callers of this function should use the -+ * kbase_pm_update_state() wrapper, which will -+ * call the shader state machine immediately -+ * after the L2 (for power up), or -+ * automatically re-invoke the L2 state machine -+ * when the shaders power down. -+ */ -+ } -+ break; ++/* ++ * Job Manager backend-specific low-level APIs. ++ */ + -+ case KBASE_L2_RESTORE_CLOCKS: -+ /* We always assume only GPUs being affected by -+ * BASE_HW_ISSUE_GPU2017_1336 fall into this state -+ */ -+ WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_slow_down_wa); ++#ifndef _KBASE_JM_HWACCESS_H_ ++#define _KBASE_JM_HWACCESS_H_ + -+ /* If L2 not needed, we need to make sure cancellation -+ * of any previously issued work to restore GPU clock. -+ * For it, move to KBASE_L2_SLOW_DOWN_CLOCKS state. -+ */ -+ if (!kbase_pm_is_l2_desired(kbdev)) { -+ backend->l2_state = KBASE_L2_SLOW_DOWN_CLOCKS; -+ break; -+ } ++#include ++#include ++#include + -+ backend->gpu_clock_slow_down_desired = false; -+ if (backend->gpu_clock_slowed_down) -+ kbase_pm_control_gpu_clock(kbdev); -+ else -+ backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE; -+ break; ++#include ++#include + -+ case KBASE_L2_ON_HWCNT_ENABLE: -+#if !MALI_USE_CSF -+ backend->hwcnt_desired = true; -+ if (backend->hwcnt_disabled) { -+ kbase_hwcnt_context_enable( -+ kbdev->hwcnt_gpu_ctx); -+ backend->hwcnt_disabled = false; -+ } -+#endif -+ backend->l2_state = KBASE_L2_ON; -+ break; ++/** ++ * kbase_job_done_slot() - Complete the head job on a particular job-slot ++ * @kbdev: Device pointer ++ * @s: Job slot ++ * @completion_code: Completion code of job reported by GPU ++ * @job_tail: Job tail address reported by GPU ++ * @end_timestamp: Timestamp of job completion ++ */ ++void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, ++ u64 job_tail, ktime_t *end_timestamp); + -+ case KBASE_L2_ON: -+ if (!kbase_pm_is_l2_desired(kbdev)) { -+#if !MALI_USE_CSF -+ /* Do not power off L2 until the shaders and -+ * core stacks are off. -+ */ -+ if (backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) -+ break; -+#else -+ /* Do not power off L2 until the MCU has been stopped */ -+ if ((backend->mcu_state != KBASE_MCU_OFF) && -+ (backend->mcu_state != KBASE_MCU_IN_SLEEP)) -+ break; ++#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) ++static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string, size_t js_size) ++{ ++ snprintf(js_string, js_size, "job_slot_%u", js); ++ return js_string; ++} +#endif + -+ /* We need to make sure hardware counters are -+ * disabled before powering down the L2, to -+ * prevent loss of data. -+ * -+ * We waited until after the cores were powered -+ * down to prevent ping-ponging between hwcnt -+ * enabled and disabled, which would have -+ * happened if userspace submitted more work -+ * while we were trying to power down. -+ */ -+ backend->l2_state = KBASE_L2_ON_HWCNT_DISABLE; -+ } -+ break; ++/** ++ * kbase_job_hw_submit() - Submit a job to the GPU ++ * @kbdev: Device pointer ++ * @katom: Atom to submit ++ * @js: Job slot to submit on ++ * ++ * The caller must check kbasep_jm_is_submit_slots_free() != false before ++ * calling this. ++ * ++ * The following locking conditions are made on the caller: ++ * - it must hold the hwaccess_lock ++ * ++ * Return: 0 if the job was successfully submitted to hardware, an error otherwise. ++ */ ++int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js); + -+ case KBASE_L2_ON_HWCNT_DISABLE: +#if !MALI_USE_CSF -+ /* If the L2 became desired while we were waiting on the -+ * worker to do the actual hwcnt disable (which might -+ * happen if some work was submitted immediately after -+ * the shaders powered off), then we need to early-out -+ * of this state and re-enable hwcnt. -+ * -+ * If we get lucky, the hwcnt disable might not have -+ * actually started yet, and the logic in the hwcnt -+ * enable state will prevent the worker from -+ * performing the disable entirely, preventing loss of -+ * any hardware counter data. -+ * -+ * If the hwcnt disable has started, then we'll lose -+ * a tiny amount of hardware counter data between the -+ * disable and the re-enable occurring. -+ * -+ * This loss of data is preferable to the alternative, -+ * which is to block the shader cores from doing any -+ * work until we're sure hwcnt has been re-enabled. -+ */ -+ if (kbase_pm_is_l2_desired(kbdev)) { -+ backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE; -+ break; -+ } -+ -+ backend->hwcnt_desired = false; -+ if (!backend->hwcnt_disabled) -+ kbase_pm_trigger_hwcnt_disable(kbdev); -+#endif -+ -+ if (backend->hwcnt_disabled) { -+ if (kbdev->pm.backend.gpu_clock_slow_down_wa) -+ backend->l2_state = -+ KBASE_L2_SLOW_DOWN_CLOCKS; -+ else -+ backend->l2_state = KBASE_L2_POWER_DOWN; -+ } -+ break; -+ -+ case KBASE_L2_SLOW_DOWN_CLOCKS: -+ /* We always assume only GPUs being affected by -+ * BASE_HW_ISSUE_GPU2017_1336 fall into this state -+ */ -+ WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_slow_down_wa); ++/** ++ * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop ++ * on the specified atom ++ * @kbdev: Device pointer ++ * @js: Job slot to stop on ++ * @action: The action to perform, either JSn_COMMAND_HARD_STOP or ++ * JSn_COMMAND_SOFT_STOP ++ * @core_reqs: Core requirements of atom to stop ++ * @target_katom: Atom to stop ++ * ++ * The following locking conditions are made on the caller: ++ * - it must hold the hwaccess_lock ++ */ ++void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, unsigned int js, ++ u32 action, base_jd_core_req core_reqs, ++ struct kbase_jd_atom *target_katom); ++#endif /* !MALI_USE_CSF */ + -+ /* L2 needs to be powered up. And we need to make sure -+ * cancellation of any previously issued work to slow -+ * down GPU clock. For it, we move to the state, -+ * KBASE_L2_RESTORE_CLOCKS. -+ */ -+ if (kbase_pm_is_l2_desired(kbdev)) { -+ backend->l2_state = KBASE_L2_RESTORE_CLOCKS; -+ break; -+ } ++/** ++ * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job ++ * slot belonging to a given context. ++ * @kbdev: Device pointer ++ * @kctx: Context pointer. May be NULL ++ * @katom: Specific atom to stop. May be NULL ++ * @js: Job slot to hard stop ++ * @action: The action to perform, either JSn_COMMAND_HARD_STOP or ++ * JSn_COMMAND_SOFT_STOP ++ * ++ * If no context is provided then all jobs on the slot will be soft or hard ++ * stopped. ++ * ++ * If a katom is provided then only that specific atom will be stopped. In this ++ * case the kctx parameter is ignored. ++ * ++ * Jobs that are on the slot but are not yet on the GPU will be unpulled and ++ * returned to the job scheduler. ++ * ++ * Return: true if an atom was stopped, false otherwise ++ */ ++bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx, ++ unsigned int js, struct kbase_jd_atom *katom, u32 action); + -+ backend->gpu_clock_slow_down_desired = true; -+ if (!backend->gpu_clock_slowed_down) -+ kbase_pm_control_gpu_clock(kbdev); -+ else -+ backend->l2_state = KBASE_L2_POWER_DOWN; ++/** ++ * kbase_job_slot_init - Initialise job slot framework ++ * @kbdev: Device pointer ++ * ++ * Called on driver initialisation ++ * ++ * Return: 0 on success ++ */ ++int kbase_job_slot_init(struct kbase_device *kbdev); + -+ break; ++/** ++ * kbase_job_slot_halt - Halt the job slot framework ++ * @kbdev: Device pointer ++ * ++ * Should prevent any further job slot processing ++ */ ++void kbase_job_slot_halt(struct kbase_device *kbdev); + -+ case KBASE_L2_POWER_DOWN: -+ if (kbase_pm_is_l2_desired(kbdev)) -+ backend->l2_state = KBASE_L2_PEND_ON; -+ else if (can_power_down_l2(kbdev)) { -+ if (!backend->l2_always_on) -+ /* Powering off the L2 will also power off the -+ * tiler. -+ */ -+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, -+ l2_present, -+ ACTION_PWROFF); -+ else -+ /* If L2 cache is powered then we must flush it -+ * before we power off the GPU. Normally this -+ * would have been handled when the L2 was -+ * powered off. -+ */ -+ kbase_gpu_start_cache_clean_nolock( -+ kbdev, GPU_COMMAND_CACHE_CLN_INV_L2); -+#if !MALI_USE_CSF -+ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u); -+#else -+ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, 0u); -+#endif -+ backend->l2_state = KBASE_L2_PEND_OFF; -+ } -+ break; ++/** ++ * kbase_job_slot_term - Terminate job slot framework ++ * @kbdev: Device pointer ++ * ++ * Called on driver termination ++ */ ++void kbase_job_slot_term(struct kbase_device *kbdev); + -+ case KBASE_L2_PEND_OFF: -+ if (!backend->l2_always_on) { -+ /* We only need to check the L2 here - if the L2 -+ * is off then the tiler is definitely also off. -+ */ -+ if (!l2_trans && !l2_ready) { -+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) -+ /* Allow clock gating within the GPU and prevent it -+ * from being seen as active during sleep. -+ */ -+ kbase_ipa_control_handle_gpu_sleep_enter(kbdev); -+#endif -+ /* L2 is now powered off */ -+ backend->l2_state = KBASE_L2_OFF; -+ } -+ } else { -+ if (!kbdev->cache_clean_in_progress) { -+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) -+ /* Allow clock gating within the GPU and prevent it -+ * from being seen as active during sleep. -+ */ -+ kbase_ipa_control_handle_gpu_sleep_enter(kbdev); -+#endif -+ backend->l2_state = KBASE_L2_OFF; -+ } -+ } -+ break; ++/** ++ * kbase_gpu_cache_clean - Cause a GPU cache clean & flush ++ * @kbdev: Device pointer ++ * ++ * Caller must not be in IRQ context ++ */ ++void kbase_gpu_cache_clean(struct kbase_device *kbdev); + -+ case KBASE_L2_RESET_WAIT: -+ /* Reset complete */ -+ if (!backend->in_reset) -+ backend->l2_state = KBASE_L2_OFF; -+ break; ++#endif /* _KBASE_JM_HWACCESS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c +new file mode 100644 +index 000000000..7db2b353b +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c +@@ -0,0 +1,1873 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ default: -+ WARN(1, "Invalid state in l2_state: %d", -+ backend->l2_state); -+ } ++/* ++ * Register-based HW access backend specific APIs ++ */ + -+ if (backend->l2_state != prev_state) { -+ dev_dbg(kbdev->dev, "L2 state transition: %s to %s\n", -+ kbase_l2_core_state_to_string(prev_state), -+ kbase_l2_core_state_to_string( -+ backend->l2_state)); -+ kbase_ktrace_log_l2_core_state(kbdev, backend->l2_state); -+ } ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ } while (backend->l2_state != prev_state); ++/** ++ * SLOT_RB_EMPTY - Return whether the specified ringbuffer is empty. ++ * ++ * @rb: ring buffer ++ * ++ * Note: HW access lock must be held ++ */ ++#define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx) + -+ if (kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off && -+ backend->l2_state == KBASE_L2_OFF) { -+ kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = false; -+ queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq, -+ &kbdev->pm.backend.gpu_poweroff_wait_work); -+ } ++/** ++ * SLOT_RB_ENTRIES - Return number of atoms currently in the specified ringbuffer. ++ * ++ * @rb: ring buffer ++ * ++ * Note: HW access lock must be held ++ */ ++#define SLOT_RB_ENTRIES(rb) ((int)(s8)(rb->write_idx - rb->read_idx)) + -+ return 0; -+} ++static void kbase_gpu_release_atom(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom, ++ ktime_t *end_timestamp); + -+static void shader_poweroff_timer_stop_callback(struct work_struct *data) ++/** ++ * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer ++ * @kbdev: Device pointer ++ * @katom: Atom to enqueue ++ * ++ * Context: Caller must hold the HW access lock ++ */ ++static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom) +{ -+ unsigned long flags; -+ struct kbasep_pm_tick_timer_state *stt = container_of(data, -+ struct kbasep_pm_tick_timer_state, work); -+ struct kbase_device *kbdev = container_of(stt, struct kbase_device, -+ pm.backend.shader_tick_timer); ++ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr]; + -+ hrtimer_cancel(&stt->timer); ++ WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ stt->cancel_queued = false; -+ if (kbdev->pm.backend.gpu_powered) -+ kbase_pm_update_state(kbdev); ++ rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom; ++ rb->write_idx++; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED; +} + +/** -+ * shader_poweroff_timer_queue_cancel - cancel the shader poweroff tick timer -+ * @kbdev: pointer to kbase device -+ * -+ * Synchronization between the shader state machine and the timer thread is -+ * difficult. This is because situations may arise where the state machine -+ * wants to start the timer, but the callback is already running, and has -+ * already passed the point at which it checks whether it is required, and so -+ * cancels itself, even though the state machine may have just tried to call -+ * hrtimer_start. ++ * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once ++ * it has been completed ++ * @kbdev: Device pointer ++ * @js: Job slot to remove atom from ++ * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in ++ * which case current time will be used. + * -+ * This cannot be stopped by holding hwaccess_lock in the timer thread, -+ * because there are still infinitesimally small sections at the start and end -+ * of the callback where the lock is not held. ++ * Context: Caller must hold the HW access lock + * -+ * Instead, a new state is added to the shader state machine, -+ * KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF. This is used to guarantee -+ * that when the shaders are switched off, the timer has definitely been -+ * cancelled. As a result, when KBASE_SHADERS_ON_CORESTACK_ON is left and the -+ * timer is started, it is guaranteed that either the timer is already running -+ * (from an availability change or cancelled timer), or hrtimer_start will -+ * succeed. It is critical to avoid ending up in -+ * KBASE_SHADERS_WAIT_OFF_CORESTACK_ON without the timer running, or it could -+ * hang there forever. ++ * Return: Atom removed from ringbuffer + */ -+static void shader_poweroff_timer_queue_cancel(struct kbase_device *kbdev) ++static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, unsigned int js, ++ ktime_t *end_timestamp) +{ -+ struct kbasep_pm_tick_timer_state *stt = -+ &kbdev->pm.backend.shader_tick_timer; ++ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; ++ struct kbase_jd_atom *katom; ++ ++ if (SLOT_RB_EMPTY(rb)) { ++ WARN(1, "GPU ringbuffer unexpectedly empty\n"); ++ return NULL; ++ } + + lockdep_assert_held(&kbdev->hwaccess_lock); + -+ stt->needed = false; ++ katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom; + -+ if (hrtimer_active(&stt->timer) && !stt->cancel_queued) { -+ stt->cancel_queued = true; -+ queue_work(stt->wq, &stt->work); -+ } -+} ++ kbase_gpu_release_atom(kbdev, katom, end_timestamp); + -+#if !MALI_USE_CSF -+static const char *kbase_shader_core_state_to_string( -+ enum kbase_shader_core_state state) -+{ -+ const char *const strings[] = { -+#define KBASEP_SHADER_STATE(n) #n, -+#include "mali_kbase_pm_shader_states.h" -+#undef KBASEP_SHADER_STATE -+ }; -+ if (WARN_ON((size_t)state >= ARRAY_SIZE(strings))) -+ return "Bad shader core state"; -+ else -+ return strings[state]; ++ rb->read_idx++; ++ ++ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB; ++ ++ return katom; +} + -+static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) ++struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx) +{ -+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; -+ struct kbasep_pm_tick_timer_state *stt = -+ &kbdev->pm.backend.shader_tick_timer; -+ enum kbase_shader_core_state prev_state; -+ u64 stacks_avail = 0; ++ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; + + lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (corestack_driver_control) -+ /* Always power on all the corestacks. Disabling certain -+ * corestacks when their respective shaders are not in the -+ * available bitmap is not currently supported. -+ */ -+ stacks_avail = kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_STACK); ++ if ((SLOT_RB_ENTRIES(rb) - 1) < idx) ++ return NULL; /* idx out of range */ + -+ do { -+ u64 shaders_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_SHADER); -+ u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); -+ u64 stacks_trans = 0; -+ u64 stacks_ready = 0; ++ return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom; ++} + -+ if (corestack_driver_control) { -+ stacks_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_STACK); -+ stacks_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK); -+ } ++struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, unsigned int js) ++{ ++ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; + -+ /* -+ * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores -+ * are vulnerable to corruption if gpu is lost -+ */ -+ if (kbase_is_gpu_removed(kbdev) -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ || kbase_pm_is_gpu_lost(kbdev)) { -+#else -+ ) { -+#endif -+ backend->shaders_state = -+ KBASE_SHADERS_OFF_CORESTACK_OFF; -+ dev_dbg(kbdev->dev, "GPU lost has occurred - shaders off\n"); -+ break; -+ } ++ if (SLOT_RB_EMPTY(rb)) ++ return NULL; + -+ /* mask off ready from trans in case transitions finished -+ * between the register reads -+ */ -+ shaders_trans &= ~shaders_ready; -+ stacks_trans &= ~stacks_ready; ++ return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom; ++} + -+ prev_state = backend->shaders_state; ++bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) ++{ ++ unsigned int js; + -+ switch (backend->shaders_state) { -+ case KBASE_SHADERS_OFF_CORESTACK_OFF: -+ /* Ignore changes to the shader core availability -+ * except at certain points where we can handle it, -+ * i.e. off and SHADERS_ON_CORESTACK_ON. -+ */ -+ backend->shaders_desired_mask = -+ kbase_pm_ca_get_core_mask(kbdev); -+ backend->pm_shaders_core_mask = 0; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (backend->shaders_desired && -+ backend->l2_state == KBASE_L2_ON) { -+ if (backend->hwcnt_desired && -+ !backend->hwcnt_disabled) { -+ /* Trigger a hwcounter dump */ -+ backend->hwcnt_desired = false; -+ kbase_pm_trigger_hwcnt_disable(kbdev); -+ } ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ int i; + -+ if (backend->hwcnt_disabled) { -+ if (corestack_driver_control) { -+ kbase_pm_invoke(kbdev, -+ KBASE_PM_CORE_STACK, -+ stacks_avail, -+ ACTION_PWRON); -+ } -+ backend->shaders_state = -+ KBASE_SHADERS_OFF_CORESTACK_PEND_ON; -+ } -+ } -+ break; ++ for (i = 0; i < SLOT_RB_SIZE; i++) { ++ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + -+ case KBASE_SHADERS_OFF_CORESTACK_PEND_ON: -+ if (!stacks_trans && stacks_ready == stacks_avail) { -+ backend->shaders_avail = -+ backend->shaders_desired_mask; -+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, -+ backend->shaders_avail, ACTION_PWRON); ++ if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) ++ return true; ++ } ++ } ++ return false; ++} + -+ if (backend->pm_current_policy && -+ backend->pm_current_policy->handle_event) -+ backend->pm_current_policy->handle_event( -+ kbdev, -+ KBASE_PM_POLICY_EVENT_POWER_ON); ++int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js) ++{ ++ int nr = 0; ++ int i; + -+ backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; -+ } -+ break; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ case KBASE_SHADERS_PEND_ON_CORESTACK_ON: -+ if (!shaders_trans && shaders_ready == backend->shaders_avail) { -+ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, shaders_ready); -+ backend->pm_shaders_core_mask = shaders_ready; -+ backend->hwcnt_desired = true; -+ if (backend->hwcnt_disabled) { -+#if MALI_USE_CSF -+ unsigned long flags; ++ for (i = 0; i < SLOT_RB_SIZE; i++) { ++ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + -+ kbase_csf_scheduler_spin_lock(kbdev, -+ &flags); -+#endif -+ kbase_hwcnt_context_enable( -+ kbdev->hwcnt_gpu_ctx); -+#if MALI_USE_CSF -+ kbase_csf_scheduler_spin_unlock(kbdev, -+ flags); -+#endif -+ backend->hwcnt_disabled = false; -+ } ++ if (katom && (katom->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_SUBMITTED)) ++ nr++; ++ } + -+ backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON; -+ } -+ break; ++ return nr; ++} + -+ case KBASE_SHADERS_ON_CORESTACK_ON: -+ backend->shaders_desired_mask = -+ kbase_pm_ca_get_core_mask(kbdev); ++int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js) ++{ ++ int nr = 0; ++ int i; + -+ /* If shaders to change state, trigger a counter dump */ -+ if (!backend->shaders_desired || -+ (backend->shaders_desired_mask != shaders_ready)) { -+ backend->hwcnt_desired = false; -+ if (!backend->hwcnt_disabled) -+ kbase_pm_trigger_hwcnt_disable(kbdev); -+ backend->shaders_state = -+ KBASE_SHADERS_ON_CORESTACK_ON_RECHECK; -+ } -+ break; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ case KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: -+ backend->shaders_desired_mask = -+ kbase_pm_ca_get_core_mask(kbdev); ++ for (i = 0; i < SLOT_RB_SIZE; i++) { ++ if (kbase_gpu_inspect(kbdev, js, i)) ++ nr++; ++ } + -+ if (!backend->hwcnt_disabled) { -+ /* Wait for being disabled */ -+ ; -+ } else if (!backend->shaders_desired) { -+ if (backend->pm_current_policy && -+ backend->pm_current_policy->handle_event) -+ backend->pm_current_policy->handle_event( -+ kbdev, -+ KBASE_PM_POLICY_EVENT_IDLE); ++ return nr; ++} + -+ if (kbdev->pm.backend.protected_transition_override || -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ kbase_pm_is_suspending(kbdev) || -+ kbase_pm_is_gpu_lost(kbdev) || -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ -+ !stt->configured_ticks || -+ WARN_ON(stt->cancel_queued)) { -+ backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; -+ } else { -+ stt->remaining_ticks = stt->configured_ticks; -+ stt->needed = true; ++static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, unsigned int js, ++ enum kbase_atom_gpu_rb_state min_rb_state) ++{ ++ int nr = 0; ++ int i; + -+ /* The shader hysteresis timer is not -+ * done the obvious way, which would be -+ * to start an hrtimer when the shader -+ * power off is requested. Instead, -+ * use a 'tick' timer, and set the -+ * remaining number of ticks on a power -+ * off request. This avoids the -+ * latency of starting, then -+ * immediately cancelling an hrtimer -+ * when the shaders are re-requested -+ * before the timeout expires. -+ */ -+ if (!hrtimer_active(&stt->timer)) -+ hrtimer_start(&stt->timer, -+ stt->configured_interval, -+ HRTIMER_MODE_REL); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ backend->shaders_state = KBASE_SHADERS_WAIT_OFF_CORESTACK_ON; -+ } -+ } else if (backend->shaders_desired_mask & ~shaders_ready) { -+ /* set cores ready but not available to -+ * meet KBASE_SHADERS_PEND_ON_CORESTACK_ON -+ * check pass -+ */ -+ backend->shaders_avail = -+ (backend->shaders_desired_mask | shaders_ready); ++ for (i = 0; i < SLOT_RB_SIZE; i++) { ++ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + -+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, -+ backend->shaders_avail & ~shaders_ready, -+ ACTION_PWRON); -+ backend->shaders_state = -+ KBASE_SHADERS_PEND_ON_CORESTACK_ON; -+ } else if (shaders_ready & ~backend->shaders_desired_mask) { -+ backend->shaders_state = -+ KBASE_SHADERS_WAIT_GPU_IDLE; -+ } else { -+ backend->shaders_state = -+ KBASE_SHADERS_PEND_ON_CORESTACK_ON; -+ } -+ break; ++ if (katom && (katom->gpu_rb_state >= min_rb_state)) ++ nr++; ++ } + -+ case KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: -+ if (WARN_ON(!hrtimer_active(&stt->timer))) { -+ stt->remaining_ticks = 0; -+ backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; -+ } ++ return nr; ++} + -+ if (backend->shaders_desired) { -+ if (backend->pm_current_policy && -+ backend->pm_current_policy->handle_event) -+ backend->pm_current_policy->handle_event( -+ kbdev, -+ KBASE_PM_POLICY_EVENT_TIMER_HIT); ++/** ++ * check_secure_atom - Check if the given atom is in the given secure state and ++ * has a ringbuffer state of at least ++ * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION ++ * @katom: Atom pointer ++ * @secure: Desired secure state ++ * ++ * Return: true if atom is in the given state, false otherwise ++ */ ++static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure) ++{ ++ if (katom->gpu_rb_state >= ++ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && ++ ((kbase_jd_katom_is_protected(katom) && secure) || ++ (!kbase_jd_katom_is_protected(katom) && !secure))) ++ return true; + -+ stt->remaining_ticks = 0; -+ backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON_RECHECK; -+ } else if (stt->remaining_ticks == 0) { -+ if (backend->pm_current_policy && -+ backend->pm_current_policy->handle_event) -+ backend->pm_current_policy->handle_event( -+ kbdev, -+ KBASE_PM_POLICY_EVENT_TIMER_MISS); ++ return false; ++} + -+ backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ } else if (kbase_pm_is_suspending(kbdev) || -+ kbase_pm_is_gpu_lost(kbdev)) { -+ backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ -+ } -+ break; ++/** ++ * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given ++ * secure state in the ringbuffers of at least ++ * state ++ * KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE ++ * @kbdev: Device pointer ++ * @secure: Desired secure state ++ * ++ * Return: true if any atoms are in the given state, false otherwise ++ */ ++static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, ++ bool secure) ++{ ++ unsigned int js; + -+ case KBASE_SHADERS_WAIT_GPU_IDLE: -+ /* If partial shader core off need to wait the job in -+ * running and next register finished then flush L2 -+ * or it might hit GPU2017-861 -+ */ -+ if (!kbase_gpu_atoms_submitted_any(kbdev)) { -+ backend->partial_shaderoff = true; -+ backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; -+ } -+ break; ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ int i; + -+ case KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: -+ if (!backend->partial_shaderoff) -+ shader_poweroff_timer_queue_cancel(kbdev); ++ for (i = 0; i < SLOT_RB_SIZE; i++) { ++ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, ++ js, i); + -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) { -+ kbase_gpu_start_cache_clean_nolock( -+ kbdev, GPU_COMMAND_CACHE_CLN_INV_L2); -+ backend->shaders_state = -+ KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON; -+ } else { -+ backend->shaders_state = -+ KBASE_SHADERS_READY_OFF_CORESTACK_ON; ++ if (katom) { ++ if (check_secure_atom(katom, secure)) ++ return true; + } -+ break; ++ } ++ } + -+ case KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON: -+ if (!kbdev->cache_clean_in_progress) -+ backend->shaders_state = -+ KBASE_SHADERS_READY_OFF_CORESTACK_ON; ++ return false; ++} + -+ break; ++int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ case KBASE_SHADERS_READY_OFF_CORESTACK_ON: -+ if (backend->partial_shaderoff) { -+ backend->partial_shaderoff = false; -+ /* remove cores available but not ready to -+ * meet KBASE_SHADERS_PEND_ON_CORESTACK_ON -+ * check pass -+ */ ++ if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) != ++ KBASE_RESET_GPU_NOT_PENDING) { ++ /* The GPU is being reset - so prevent submission */ ++ return 0; ++ } + -+ /* shaders_desired_mask shall be a subset of -+ * shaders_ready -+ */ -+ WARN_ON(backend->shaders_desired_mask & ~shaders_ready); -+ WARN_ON(!(backend->shaders_desired_mask & shaders_ready)); ++ return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js); ++} + -+ backend->shaders_avail = -+ backend->shaders_desired_mask; -+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, -+ shaders_ready & ~backend->shaders_avail, ACTION_PWROFF); -+ backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; -+ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, (shaders_ready & ~backend->shaders_avail)); -+ } else { -+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, -+ shaders_ready, ACTION_PWROFF); + -+ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, 0u); ++static void kbase_gpu_release_atom(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom, ++ ktime_t *end_timestamp) ++{ ++ struct kbase_context *kctx = katom->kctx; + -+ backend->shaders_state = KBASE_SHADERS_PEND_OFF_CORESTACK_ON; -+ } -+ break; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ case KBASE_SHADERS_PEND_OFF_CORESTACK_ON: -+ if (!shaders_trans && !shaders_ready) { -+ if (corestack_driver_control) -+ kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK, -+ stacks_avail, ACTION_PWROFF); ++ switch (katom->gpu_rb_state) { ++ case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: ++ /* Should be impossible */ ++ WARN(1, "Attempting to release atom not in ringbuffer\n"); ++ break; + -+ backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_PEND_OFF; -+ } -+ break; ++ case KBASE_ATOM_GPU_RB_SUBMITTED: ++ kbase_kinstr_jm_atom_hw_release(katom); ++ /* Inform power management at start/finish of atom so it can ++ * update its GPU utilisation metrics. Mark atom as not ++ * submitted beforehand. ++ */ ++ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; ++ kbase_pm_metrics_update(kbdev, end_timestamp); + -+ case KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: -+ if (!stacks_trans && !stacks_ready) { -+ /* On powered off, re-enable the hwcnt */ -+ backend->pm_shaders_core_mask = 0; -+ backend->hwcnt_desired = true; -+ if (backend->hwcnt_disabled) { -+#if MALI_USE_CSF -+ unsigned long flags; ++ /* Inform platform at start/finish of atom */ ++ kbasep_platform_event_atom_complete(katom); + -+ kbase_csf_scheduler_spin_lock(kbdev, -+ &flags); -+#endif -+ kbase_hwcnt_context_enable( -+ kbdev->hwcnt_gpu_ctx); -+#if MALI_USE_CSF -+ kbase_csf_scheduler_spin_unlock(kbdev, -+ flags); -+#endif -+ backend->hwcnt_disabled = false; -+ } -+ backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; -+ } -+ break; ++ if (katom->core_req & BASE_JD_REQ_PERMON) ++ kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + -+ case KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: -+ if (!hrtimer_active(&stt->timer) && !stt->cancel_queued) -+ backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; -+ break; ++ KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, katom, ++ &kbdev->gpu_props.props.raw_props.js_features ++ [katom->slot_nr]); ++ KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]); ++ KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, kctx, ++ &kbdev->gpu_props.props.raw_props.js_features ++ [katom->slot_nr]); + -+ case KBASE_SHADERS_RESET_WAIT: -+ /* Reset complete */ -+ if (!backend->in_reset) -+ backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; -+ break; -+ } ++ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ ++ fallthrough; ++ case KBASE_ATOM_GPU_RB_READY: ++ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ ++ fallthrough; ++ case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: ++ break; + -+ if (backend->shaders_state != prev_state) -+ dev_dbg(kbdev->dev, "Shader state transition: %s to %s\n", -+ kbase_shader_core_state_to_string(prev_state), -+ kbase_shader_core_state_to_string( -+ backend->shaders_state)); -+ -+ } while (backend->shaders_state != prev_state); ++ case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: ++ if (kbase_jd_katom_is_protected(katom) && ++ (katom->protected_state.enter != ++ KBASE_ATOM_ENTER_PROTECTED_CHECK) && ++ (katom->protected_state.enter != ++ KBASE_ATOM_ENTER_PROTECTED_HWCNT)) { ++ kbase_pm_protected_override_disable(kbdev); ++ kbase_pm_update_cores_state_nolock(kbdev); ++ } ++ if (kbase_jd_katom_is_protected(katom) && ++ (katom->protected_state.enter == ++ KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) ++ kbase_pm_protected_entry_override_disable(kbdev); ++ if (!kbase_jd_katom_is_protected(katom) && ++ (katom->protected_state.exit != ++ KBASE_ATOM_EXIT_PROTECTED_CHECK) && ++ (katom->protected_state.exit != ++ KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT)) { ++ kbase_pm_protected_override_disable(kbdev); ++ kbase_pm_update_cores_state_nolock(kbdev); ++ } + -+ return 0; -+} -+#endif /* !MALI_USE_CSF */ ++ if (katom->protected_state.enter != ++ KBASE_ATOM_ENTER_PROTECTED_CHECK || ++ katom->protected_state.exit != ++ KBASE_ATOM_EXIT_PROTECTED_CHECK) ++ kbdev->protected_mode_transition = false; + -+static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev) -+{ -+ bool in_desired_state = true; ++ /* If the atom is at KBASE_ATOM_ENTER_PROTECTED_HWCNT state, it means ++ * one of two events prevented it from progressing to the next state and ++ * ultimately reach protected mode: ++ * - hwcnts were enabled, and the atom had to schedule a worker to ++ * disable them. ++ * - the hwcnts were already disabled, but some other error occurred. ++ * In the first case, if the worker has not yet completed ++ * (kbdev->protected_mode_hwcnt_disabled == false), we need to re-enable ++ * them and signal to the worker they have already been enabled ++ */ ++ if (kbase_jd_katom_is_protected(katom) && ++ (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_HWCNT)) { ++ kbdev->protected_mode_hwcnt_desired = true; ++ if (kbdev->protected_mode_hwcnt_disabled) { ++ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); ++ kbdev->protected_mode_hwcnt_disabled = false; ++ } ++ } + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ /* If the atom has suspended hwcnt but has not yet entered ++ * protected mode, then resume hwcnt now. If the GPU is now in ++ * protected mode then hwcnt will be resumed by GPU reset so ++ * don't resume it here. ++ */ ++ if (kbase_jd_katom_is_protected(katom) && ++ ((katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) || ++ (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY) || ++ (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_FINISHED))) { ++ WARN_ON(!kbdev->protected_mode_hwcnt_disabled); ++ kbdev->protected_mode_hwcnt_desired = true; ++ if (kbdev->protected_mode_hwcnt_disabled) { ++ kbase_hwcnt_context_enable( ++ kbdev->hwcnt_gpu_ctx); ++ kbdev->protected_mode_hwcnt_disabled = false; ++ } ++ } + -+ in_desired_state = kbase_pm_l2_is_in_desired_state(kbdev); ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { ++ if (katom->atom_flags & ++ KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { ++ kbase_pm_protected_l2_override(kbdev, false); ++ katom->atom_flags &= ++ ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; ++ } ++ } + -+#if !MALI_USE_CSF -+ if (kbdev->pm.backend.shaders_desired && -+ kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON) -+ in_desired_state = false; -+ else if (!kbdev->pm.backend.shaders_desired && -+ kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) -+ in_desired_state = false; -+#else -+ in_desired_state &= kbase_pm_mcu_is_in_desired_state(kbdev); -+#endif ++ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ ++ fallthrough; ++ case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: ++ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ ++ fallthrough; ++ case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: ++ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ ++ fallthrough; ++ case KBASE_ATOM_GPU_RB_RETURN_TO_JS: ++ break; ++ } + -+ return in_desired_state; ++ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED; ++ katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; +} + -+static bool kbase_pm_is_in_desired_state(struct kbase_device *kbdev) ++static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom) +{ -+ bool in_desired_state; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ in_desired_state = kbase_pm_is_in_desired_state_nolock(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ return in_desired_state; ++ KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_MARK_FOR_RETURN_TO_JS, ++ katom->kctx, katom, katom->jc, ++ katom->slot_nr, katom->event_code); ++ kbase_gpu_release_atom(kbdev, katom, NULL); ++ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS; +} + -+static bool kbase_pm_is_in_desired_state_with_l2_powered( -+ struct kbase_device *kbdev) ++/** ++ * other_slots_busy - Determine if any job slots other than @js are currently ++ * running atoms ++ * @kbdev: Device pointer ++ * @js: Job slot ++ * ++ * Return: true if any slots other than @js are busy, false otherwise ++ */ ++static inline bool other_slots_busy(struct kbase_device *kbdev, unsigned int js) +{ -+ bool in_desired_state = false; -+ unsigned long flags; ++ unsigned int slot; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ if (kbase_pm_is_in_desired_state_nolock(kbdev) && -+ (kbdev->pm.backend.l2_state == KBASE_L2_ON)) -+ in_desired_state = true; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) { ++ if (slot == js) ++ continue; + -+ return in_desired_state; ++ if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot, ++ KBASE_ATOM_GPU_RB_SUBMITTED)) ++ return true; ++ } ++ ++ return false; +} + -+static void kbase_pm_trace_power_state(struct kbase_device *kbdev) ++static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ KBASE_TLSTREAM_AUX_PM_STATE( -+ kbdev, -+ KBASE_PM_CORE_L2, -+ kbase_pm_get_ready_cores( -+ kbdev, KBASE_PM_CORE_L2)); -+ KBASE_TLSTREAM_AUX_PM_STATE( -+ kbdev, -+ KBASE_PM_CORE_SHADER, -+ kbase_pm_get_ready_cores( -+ kbdev, KBASE_PM_CORE_SHADER)); -+ KBASE_TLSTREAM_AUX_PM_STATE( -+ kbdev, -+ KBASE_PM_CORE_TILER, -+ kbase_pm_get_ready_cores( -+ kbdev, -+ KBASE_PM_CORE_TILER)); -+ -+ if (corestack_driver_control) -+ KBASE_TLSTREAM_AUX_PM_STATE( -+ kbdev, -+ KBASE_PM_CORE_STACK, -+ kbase_pm_get_ready_cores( -+ kbdev, -+ KBASE_PM_CORE_STACK)); ++ return kbdev->protected_mode; +} + -+void kbase_pm_update_state(struct kbase_device *kbdev) ++static void kbase_gpu_disable_coherent(struct kbase_device *kbdev) +{ -+#if !MALI_USE_CSF -+ enum kbase_shader_core_state prev_shaders_state = -+ kbdev->pm.backend.shaders_state; -+#else -+ enum kbase_mcu_state prev_mcu_state = kbdev->pm.backend.mcu_state; -+#endif -+ + lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (!kbdev->pm.backend.gpu_ready) -+ return; /* Do nothing if the GPU is not ready */ -+ -+ if (kbase_pm_l2_update_state(kbdev)) -+ return; -+ -+#if !MALI_USE_CSF -+ if (kbase_pm_shaders_update_state(kbdev)) -+ return; -+ -+ /* If the shaders just turned off, re-invoke the L2 state machine, in -+ * case it was waiting for the shaders to turn off before powering down -+ * the L2. ++ /* ++ * When entering into protected mode, we must ensure that the ++ * GPU is not operating in coherent mode as well. This is to ++ * ensure that no protected memory can be leaked. + */ -+ if (prev_shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF && -+ kbdev->pm.backend.shaders_state == -+ KBASE_SHADERS_OFF_CORESTACK_OFF) { -+ if (kbase_pm_l2_update_state(kbdev)) -+ return; -+ } -+#else -+ if (kbase_pm_mcu_update_state(kbdev)) -+ return; -+ -+ if (!kbase_pm_is_mcu_inactive(kbdev, prev_mcu_state) && -+ kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state)) { -+ if (kbase_pm_l2_update_state(kbdev)) -+ return; -+ } -+#endif -+ -+ if (kbase_pm_is_in_desired_state_nolock(kbdev)) { -+ KBASE_KTRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, -+ kbdev->pm.backend.shaders_avail); -+ -+ kbase_pm_trace_power_state(kbdev); -+ -+ KBASE_KTRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, 0); -+ wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); -+ } ++ if (kbdev->system_coherency == COHERENCY_ACE) ++ kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE); +} + -+static enum hrtimer_restart -+shader_tick_timer_callback(struct hrtimer *timer) ++static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) +{ -+ struct kbasep_pm_tick_timer_state *stt = container_of(timer, -+ struct kbasep_pm_tick_timer_state, timer); -+ struct kbase_device *kbdev = container_of(stt, struct kbase_device, -+ pm.backend.shader_tick_timer); -+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; -+ unsigned long flags; -+ enum hrtimer_restart restart = HRTIMER_NORESTART; ++ int err = -EINVAL; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (stt->remaining_ticks && -+ backend->shaders_state == KBASE_SHADERS_WAIT_OFF_CORESTACK_ON) { -+ stt->remaining_ticks--; ++ WARN_ONCE(!kbdev->protected_ops, ++ "Cannot enter protected mode: protected callbacks not specified.\n"); + -+ /* If the remaining ticks just changed from 1 to 0, invoke the -+ * PM state machine to power off the shader cores. -+ */ -+ if (!stt->remaining_ticks && !backend->shaders_desired) -+ kbase_pm_update_state(kbdev); -+ } ++ if (kbdev->protected_ops) { ++ /* Switch GPU to protected mode */ ++ err = kbdev->protected_ops->protected_mode_enable( ++ kbdev->protected_dev); + -+ if (stt->needed) { -+ hrtimer_forward_now(timer, stt->configured_interval); -+ restart = HRTIMER_RESTART; ++ if (err) { ++ dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n", ++ err); ++ } else { ++ kbdev->protected_mode = true; ++ kbase_ipa_protection_mode_switch_event(kbdev); ++ } + } + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ return restart; ++ return err; +} + -+int kbase_pm_state_machine_init(struct kbase_device *kbdev) ++static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) +{ -+ struct kbasep_pm_tick_timer_state *stt = &kbdev->pm.backend.shader_tick_timer; -+ -+ stt->wq = alloc_workqueue("kbase_pm_shader_poweroff", WQ_HIGHPRI | WQ_UNBOUND, 1); -+ if (!stt->wq) -+ return -ENOMEM; -+ -+ INIT_WORK(&stt->work, shader_poweroff_timer_stop_callback); -+ -+ stt->needed = false; -+ hrtimer_init(&stt->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); -+ stt->timer.function = shader_tick_timer_callback; -+ stt->configured_interval = HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS); -+ stt->default_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER; -+ stt->configured_ticks = stt->default_ticks; -+ -+#if MALI_USE_CSF -+ kbdev->pm.backend.core_idle_wq = alloc_workqueue("coreoff_wq", WQ_HIGHPRI | WQ_UNBOUND, 1); -+ if (!kbdev->pm.backend.core_idle_wq) { -+ destroy_workqueue(stt->wq); -+ return -ENOMEM; -+ } ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ INIT_WORK(&kbdev->pm.backend.core_idle_work, core_idle_worker); -+#endif ++ WARN_ONCE(!kbdev->protected_ops, ++ "Cannot exit protected mode: protected callbacks not specified.\n"); + -+ return 0; -+} ++ if (!kbdev->protected_ops) ++ return -EINVAL; + -+void kbase_pm_state_machine_term(struct kbase_device *kbdev) -+{ -+#if MALI_USE_CSF -+ destroy_workqueue(kbdev->pm.backend.core_idle_wq); -+#endif -+ hrtimer_cancel(&kbdev->pm.backend.shader_tick_timer.timer); -+ destroy_workqueue(kbdev->pm.backend.shader_tick_timer.wq); ++ /* The protected mode disable callback will be called as part of reset ++ */ ++ return kbase_reset_gpu_silent(kbdev); +} + -+void kbase_pm_reset_start_locked(struct kbase_device *kbdev) ++static int kbase_jm_protected_entry(struct kbase_device *kbdev, ++ struct kbase_jd_atom **katom, int idx, int js) +{ -+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; ++ int err = 0; + + lockdep_assert_held(&kbdev->hwaccess_lock); + -+ backend->in_reset = true; -+ backend->l2_state = KBASE_L2_RESET_WAIT; -+ KBASE_KTRACE_ADD(kbdev, PM_L2_RESET_WAIT, NULL, backend->l2_state); -+#if !MALI_USE_CSF -+ backend->shaders_state = KBASE_SHADERS_RESET_WAIT; -+#else -+ /* MCU state machine is exercised only after the initial load/boot -+ * of the firmware. ++ err = kbase_gpu_protected_mode_enter(kbdev); ++ ++ /* ++ * Regardless of result before this call, we are no longer ++ * transitioning the GPU. + */ -+ if (likely(kbdev->csf.firmware_inited)) { -+ backend->mcu_state = KBASE_MCU_RESET_WAIT; -+ KBASE_KTRACE_ADD(kbdev, PM_MCU_RESET_WAIT, NULL, backend->mcu_state); -+#ifdef KBASE_PM_RUNTIME -+ backend->exit_gpu_sleep_mode = true; -+#endif -+ kbdev->csf.firmware_reload_needed = true; -+ } else { -+ WARN_ON(backend->mcu_state != KBASE_MCU_OFF); ++ ++ kbdev->protected_mode_transition = false; ++ kbase_pm_protected_override_disable(kbdev); ++ kbase_pm_update_cores_state_nolock(kbdev); ++ ++ KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev); ++ if (err) { ++ /* ++ * Failed to switch into protected mode. ++ * ++ * At this point we expect: ++ * katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && ++ * katom->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_FINISHED ++ * ==> ++ * kbdev->protected_mode_hwcnt_disabled = false ++ */ ++ katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; ++ kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); ++ /* ++ * Only return if head atom or previous atom ++ * already removed - as atoms must be returned ++ * in order. ++ */ ++ if (idx == 0 || katom[0]->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { ++ kbase_gpu_dequeue_atom(kbdev, js, NULL); ++ kbase_jm_return_atom_to_js(kbdev, katom[idx]); ++ } ++ ++ return -EINVAL; + } -+#endif + -+ /* We're in a reset, so hwcnt will have been synchronously disabled by -+ * this function's caller as part of the reset process. We therefore -+ * know that any call to kbase_hwcnt_context_disable_atomic, if -+ * required to sync the hwcnt refcount with our internal state, is -+ * guaranteed to succeed. ++ /* ++ * Protected mode sanity checks. + */ -+ backend->hwcnt_desired = false; -+ if (!backend->hwcnt_disabled) { -+ WARN_ON(!kbase_hwcnt_context_disable_atomic( -+ kbdev->hwcnt_gpu_ctx)); -+ backend->hwcnt_disabled = true; -+ } ++ WARN(kbase_jd_katom_is_protected(katom[idx]) != kbase_gpu_in_protected_mode(kbdev), ++ "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", ++ kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev)); ++ katom[idx]->gpu_rb_state = ++ KBASE_ATOM_GPU_RB_READY; + -+ shader_poweroff_timer_queue_cancel(kbdev); ++ return err; +} + -+void kbase_pm_reset_complete(struct kbase_device *kbdev) ++static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, ++ struct kbase_jd_atom **katom, int idx, int js) +{ -+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; -+ unsigned long flags; -+ -+ WARN_ON(!kbase_reset_gpu_is_active(kbdev)); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ int err = 0; + -+ /* As GPU has just been reset, that results in implicit flush of L2 -+ * cache, can safely mark the pending cache flush operation (if there -+ * was any) as complete and unblock the waiter. -+ * No work can be submitted whilst GPU reset is ongoing. -+ */ -+ kbase_gpu_cache_clean_wait_complete(kbdev); -+ backend->in_reset = false; -+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) -+ backend->gpu_wakeup_override = false; -+#endif -+ kbase_pm_update_state(kbdev); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} ++ switch (katom[idx]->protected_state.enter) { ++ case KBASE_ATOM_ENTER_PROTECTED_CHECK: ++ KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev, kbdev); ++ /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV ++ * should ensure that we are not already transitiong, and that ++ * there are no atoms currently on the GPU. ++ */ ++ WARN_ON(kbdev->protected_mode_transition); ++ WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); ++ /* If hwcnt is disabled, it means we didn't clean up correctly ++ * during last exit from protected mode. ++ */ ++ WARN_ON(kbdev->protected_mode_hwcnt_disabled); + -+#if !MALI_USE_CSF -+/* Timeout in milliseconds for GPU Power Management to reach the desired -+ * Shader and L2 state. If the time spent waiting has exceeded this threshold -+ * then there is most likely a hardware issue. -+ */ -+#define PM_TIMEOUT_MS (5000) /* 5s */ -+#endif ++ katom[idx]->protected_state.enter = ++ KBASE_ATOM_ENTER_PROTECTED_HWCNT; + -+static void kbase_pm_timed_out(struct kbase_device *kbdev) -+{ -+ unsigned long flags; ++ kbdev->protected_mode_transition = true; + -+ dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); -+#if !MALI_USE_CSF -+ CSTD_UNUSED(flags); -+ dev_err(kbdev->dev, "Desired state :\n"); -+ dev_err(kbdev->dev, "\tShader=%016llx\n", -+ kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0); -+#else -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ dev_err(kbdev->dev, "\tMCU desired = %d\n", -+ kbase_pm_is_mcu_desired(kbdev)); -+ dev_err(kbdev->dev, "\tMCU sw state = %d\n", -+ kbdev->pm.backend.mcu_state); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+#endif -+ dev_err(kbdev->dev, "Current state :\n"); -+ dev_err(kbdev->dev, "\tShader=%08x%08x\n", -+ kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(SHADER_READY_HI)), -+ kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(SHADER_READY_LO))); -+ dev_err(kbdev->dev, "\tTiler =%08x%08x\n", -+ kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(TILER_READY_HI)), -+ kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(TILER_READY_LO))); -+ dev_err(kbdev->dev, "\tL2 =%08x%08x\n", -+ kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(L2_READY_HI)), -+ kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(L2_READY_LO))); -+#if MALI_USE_CSF -+ dev_err(kbdev->dev, "\tMCU status = %d\n", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS))); -+#endif -+ dev_err(kbdev->dev, "Cores transitioning :\n"); -+ dev_err(kbdev->dev, "\tShader=%08x%08x\n", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG( -+ SHADER_PWRTRANS_HI)), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG( -+ SHADER_PWRTRANS_LO))); -+ dev_err(kbdev->dev, "\tTiler =%08x%08x\n", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG( -+ TILER_PWRTRANS_HI)), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG( -+ TILER_PWRTRANS_LO))); -+ dev_err(kbdev->dev, "\tL2 =%08x%08x\n", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG( -+ L2_PWRTRANS_HI)), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG( -+ L2_PWRTRANS_LO))); ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ fallthrough; ++ case KBASE_ATOM_ENTER_PROTECTED_HWCNT: ++ /* See if we can get away with disabling hwcnt atomically */ ++ kbdev->protected_mode_hwcnt_desired = false; ++ if (!kbdev->protected_mode_hwcnt_disabled) { ++ if (kbase_hwcnt_context_disable_atomic( ++ kbdev->hwcnt_gpu_ctx)) ++ kbdev->protected_mode_hwcnt_disabled = true; ++ } + -+ dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); -+ if (kbase_prepare_to_reset_gpu(kbdev, -+ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) -+ kbase_reset_gpu(kbdev); -+} ++ /* We couldn't disable atomically, so kick off a worker */ ++ if (!kbdev->protected_mode_hwcnt_disabled) { ++ kbase_hwcnt_context_queue_work( ++ kbdev->hwcnt_gpu_ctx, ++ &kbdev->protected_mode_hwcnt_disable_work); ++ return -EAGAIN; ++ } + -+int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) -+{ -+ unsigned long flags; -+ unsigned long timeout; -+ long remaining; -+ int err = 0; ++ /* Once reaching this point GPU must be switched to protected ++ * mode or hwcnt re-enabled. ++ */ + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_pm_update_state(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (kbase_pm_protected_entry_override_enable(kbdev)) ++ return -EAGAIN; + -+#if MALI_USE_CSF -+ timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT)); -+#else -+ timeout = msecs_to_jiffies(PM_TIMEOUT_MS); -+#endif ++ /* ++ * Not in correct mode, begin protected mode switch. ++ * Entering protected mode requires us to power down the L2, ++ * and drop out of fully coherent mode. ++ */ ++ katom[idx]->protected_state.enter = ++ KBASE_ATOM_ENTER_PROTECTED_IDLE_L2; + -+ /* Wait for cores */ -+#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE -+ remaining = wait_event_killable_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, -+ kbase_pm_is_in_desired_state_with_l2_powered(kbdev), -+ timeout); -+#else -+ remaining = wait_event_timeout( -+ kbdev->pm.backend.gpu_in_desired_state_wait, -+ kbase_pm_is_in_desired_state_with_l2_powered(kbdev), timeout); -+#endif ++ kbase_pm_protected_override_enable(kbdev); ++ /* ++ * Only if the GPU reset hasn't been initiated, there is a need ++ * to invoke the state machine to explicitly power down the ++ * shader cores and L2. ++ */ ++ if (!kbdev->pm.backend.protected_entry_transition_override) ++ kbase_pm_update_cores_state_nolock(kbdev); + -+ if (!remaining) { -+ kbase_pm_timed_out(kbdev); -+ err = -ETIMEDOUT; -+ } else if (remaining < 0) { -+ dev_info( -+ kbdev->dev, -+ "Wait for desired PM state with L2 powered got interrupted"); -+ err = (int)remaining; -+ } ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ fallthrough; ++ case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: ++ /* Avoid unnecessary waiting on non-ACE platforms. */ ++ if (kbdev->system_coherency == COHERENCY_ACE) { ++ if (kbdev->pm.backend.l2_always_on) { ++ /* ++ * If the GPU reset hasn't completed, then L2 ++ * could still be powered up. ++ */ ++ if (kbase_reset_gpu_is_active(kbdev)) ++ return -EAGAIN; ++ } + -+ return err; -+} ++ if (kbase_pm_get_ready_cores(kbdev, ++ KBASE_PM_CORE_L2) || ++ kbase_pm_get_trans_cores(kbdev, ++ KBASE_PM_CORE_L2) || ++ kbase_is_gpu_removed(kbdev)) { ++ /* ++ * The L2 is still powered, wait for all ++ * the users to finish with it before doing ++ * the actual reset. ++ */ ++ return -EAGAIN; ++ } ++ } + -+int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) -+{ -+ unsigned long flags; -+ long remaining; -+#if MALI_USE_CSF -+ long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT)); -+#else -+ long timeout = msecs_to_jiffies(PM_TIMEOUT_MS); -+#endif -+ int err = 0; ++ katom[idx]->protected_state.enter = ++ KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY; + -+ /* Let the state machine latch the most recent desired state. */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_pm_update_state(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ fallthrough; ++ case KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: ++ /* ++ * When entering into protected mode, we must ensure that the ++ * GPU is not operating in coherent mode as well. This is to ++ * ensure that no protected memory can be leaked. ++ */ ++ kbase_gpu_disable_coherent(kbdev); + -+ /* Wait for cores */ -+#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE -+ remaining = wait_event_killable_timeout( -+ kbdev->pm.backend.gpu_in_desired_state_wait, -+ kbase_pm_is_in_desired_state(kbdev), timeout); -+#else -+ remaining = wait_event_timeout( -+ kbdev->pm.backend.gpu_in_desired_state_wait, -+ kbase_pm_is_in_desired_state(kbdev), timeout); -+#endif ++ kbase_pm_protected_entry_override_disable(kbdev); + -+ if (!remaining) { -+ kbase_pm_timed_out(kbdev); -+ err = -ETIMEDOUT; -+ } else if (remaining < 0) { -+ dev_info(kbdev->dev, -+ "Wait for desired PM state got interrupted"); -+ err = (int)remaining; -+ } ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { ++ /* ++ * Power on L2 caches; this will also result in the ++ * correct value written to coherency enable register. ++ */ ++ kbase_pm_protected_l2_override(kbdev, true); + -+ return err; -+} -+KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state); ++ /* ++ * Set the flag on the atom that additional ++ * L2 references are taken. ++ */ ++ katom[idx]->atom_flags |= ++ KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; ++ } + -+#if MALI_USE_CSF -+/** -+ * core_mask_update_done - Check if downscaling of shader cores is done -+ * -+ * @kbdev: The kbase device structure for the device. -+ * -+ * This function checks if the downscaling of cores is effectively complete. -+ * -+ * Return: true if the downscale is done. -+ */ -+static bool core_mask_update_done(struct kbase_device *kbdev) -+{ -+ bool update_done = false; -+ unsigned long flags; ++ katom[idx]->protected_state.enter = ++ KBASE_ATOM_ENTER_PROTECTED_FINISHED; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ /* If MCU is in stable ON state then it implies that the downscale -+ * request had completed. -+ * If MCU is not active then it implies all cores are off, so can -+ * consider the downscale request as complete. -+ */ -+ if ((kbdev->pm.backend.mcu_state == KBASE_MCU_ON) || -+ kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state)) -+ update_done = true; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) ++ return -EAGAIN; + -+ return update_done; -+} ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ fallthrough; ++ case KBASE_ATOM_ENTER_PROTECTED_FINISHED: ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { ++ /* ++ * Check that L2 caches are powered and, if so, ++ * enter protected mode. ++ */ ++ if (kbdev->pm.backend.l2_state == KBASE_L2_ON) { ++ /* ++ * Remove additional L2 reference and reset ++ * the atom flag which denotes it. ++ */ ++ if (katom[idx]->atom_flags & ++ KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { ++ kbase_pm_protected_l2_override(kbdev, ++ false); ++ katom[idx]->atom_flags &= ++ ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; ++ } + -+int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev) -+{ -+ long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT)); -+ long remaining; -+ int err = 0; ++ err = kbase_jm_protected_entry(kbdev, katom, idx, js); + -+ /* Wait for core mask update to complete */ -+#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE -+ remaining = wait_event_killable_timeout( -+ kbdev->pm.backend.gpu_in_desired_state_wait, -+ core_mask_update_done(kbdev), timeout); -+#else -+ remaining = wait_event_timeout( -+ kbdev->pm.backend.gpu_in_desired_state_wait, -+ core_mask_update_done(kbdev), timeout); -+#endif ++ if (err) ++ return err; ++ } else { ++ /* ++ * still waiting for L2 caches to power up ++ */ ++ return -EAGAIN; ++ } ++ } else { ++ err = kbase_jm_protected_entry(kbdev, katom, idx, js); + -+ if (!remaining) { -+ kbase_pm_timed_out(kbdev); -+ err = -ETIMEDOUT; -+ } else if (remaining < 0) { -+ dev_info( -+ kbdev->dev, -+ "Wait for cores down scaling got interrupted"); -+ err = (int)remaining; ++ if (err) ++ return err; ++ } + } + -+ return err; ++ return 0; +} -+#endif + -+void kbase_pm_enable_interrupts(struct kbase_device *kbdev) ++static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, ++ struct kbase_jd_atom **katom, int idx, int js) +{ -+ unsigned long flags; ++ int err = 0; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ /* -+ * Clear all interrupts, -+ * and unmask them all. -+ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); -+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF); ++ switch (katom[idx]->protected_state.exit) { ++ case KBASE_ATOM_EXIT_PROTECTED_CHECK: ++ KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev, kbdev); ++ /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV ++ * should ensure that we are not already transitiong, and that ++ * there are no atoms currently on the GPU. ++ */ ++ WARN_ON(kbdev->protected_mode_transition); ++ WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); + -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); -+#if MALI_USE_CSF -+ /* Enable only the Page fault bits part */ -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFF); -+#else -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF); -+#endif -+} ++ /* ++ * Exiting protected mode requires a reset, but first the L2 ++ * needs to be powered down to ensure it's not active when the ++ * reset is issued. ++ */ ++ katom[idx]->protected_state.exit = ++ KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; + -+KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts); ++ kbdev->protected_mode_transition = true; ++ kbase_pm_protected_override_enable(kbdev); ++ kbase_pm_update_cores_state_nolock(kbdev); + -+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) -+{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ /* -+ * Mask all interrupts, -+ * and clear them all. -+ */ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ fallthrough; ++ case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: ++ if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) { ++ /* ++ * The L2 is still powered, wait for all the users to ++ * finish with it before doing the actual reset. ++ */ ++ return -EAGAIN; ++ } ++ katom[idx]->protected_state.exit = ++ KBASE_ATOM_EXIT_PROTECTED_RESET; + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); -+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0); -+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ fallthrough; ++ case KBASE_ATOM_EXIT_PROTECTED_RESET: ++ /* L2 cache has been turned off (which is needed prior to the reset of GPU ++ * to exit the protected mode), so the override flag can be safely cleared. ++ * Even if L2 cache is powered up again before the actual reset, it should ++ * not be an issue (there are no jobs running on the GPU). ++ */ ++ kbase_pm_protected_override_disable(kbdev); + -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); -+} ++ /* Issue the reset to the GPU */ ++ err = kbase_gpu_protected_mode_reset(kbdev); + -+void kbase_pm_disable_interrupts(struct kbase_device *kbdev) -+{ -+ unsigned long flags; ++ if (err == -EAGAIN) ++ return -EAGAIN; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_pm_disable_interrupts_nolock(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} ++ if (err) { ++ kbdev->protected_mode_transition = false; + -+KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts); ++ /* Failed to exit protected mode, fail atom */ ++ katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; ++ kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); ++ /* Only return if head atom or previous atom ++ * already removed - as atoms must be returned in order ++ */ ++ if (idx == 0 || katom[0]->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { ++ kbase_gpu_dequeue_atom(kbdev, js, NULL); ++ kbase_jm_return_atom_to_js(kbdev, katom[idx]); ++ } + -+#if MALI_USE_CSF -+/** -+ * update_user_reg_page_mapping - Update the mapping for USER Register page -+ * -+ * @kbdev: The kbase device structure for the device. -+ * -+ * This function must be called to unmap the dummy or real page from USER Register page -+ * mapping whenever GPU is powered up or down. The dummy or real page would get -+ * appropriately mapped in when Userspace reads the LATEST_FLUSH value. -+ */ -+static void update_user_reg_page_mapping(struct kbase_device *kbdev) -+{ -+ struct kbase_context *kctx, *n; ++ /* If we're exiting from protected mode, hwcnt must have ++ * been disabled during entry. ++ */ ++ WARN_ON(!kbdev->protected_mode_hwcnt_disabled); ++ kbdev->protected_mode_hwcnt_desired = true; ++ if (kbdev->protected_mode_hwcnt_disabled) { ++ kbase_hwcnt_context_enable( ++ kbdev->hwcnt_gpu_ctx); ++ kbdev->protected_mode_hwcnt_disabled = false; ++ } + -+ lockdep_assert_held(&kbdev->pm.lock); ++ return -EINVAL; ++ } + -+ mutex_lock(&kbdev->csf.reg_lock); -+ list_for_each_entry_safe(kctx, n, &kbdev->csf.user_reg.list, csf.user_reg.link) { -+ /* This would zap the PTE corresponding to the mapping of User -+ * Register page of the kbase context. The mapping will be reestablished -+ * when the context (user process) needs to access to the page. ++ katom[idx]->protected_state.exit = ++ KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; ++ ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ fallthrough; ++ case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: ++ /* A GPU reset is issued when exiting protected mode. Once the ++ * reset is done all atoms' state will also be reset. For this ++ * reason, if the atom is still in this state we can safely ++ * say that the reset has not completed i.e., we have not ++ * finished exiting protected mode yet. + */ -+ unmap_mapping_range(kbdev->csf.user_reg.filp->f_inode->i_mapping, -+ kctx->csf.user_reg.file_offset << PAGE_SHIFT, PAGE_SIZE, 1); -+ list_del_init(&kctx->csf.user_reg.link); -+ dev_dbg(kbdev->dev, "Updated USER Reg page mapping of ctx %d_%d", kctx->tgid, -+ kctx->id); ++ return -EAGAIN; + } -+ mutex_unlock(&kbdev->csf.reg_lock); ++ ++ return 0; +} -+#endif + -+/* -+ * pmu layout: -+ * 0x0000: PMU TAG (RO) (0xCAFECAFE) -+ * 0x0004: PMU VERSION ID (RO) (0x00000000) -+ * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE) -+ */ -+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) ++void kbase_backend_slot_update(struct kbase_device *kbdev) +{ -+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; -+ bool reset_required = is_resume; -+ unsigned long flags; ++ unsigned int js; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+#if !MALI_USE_CSF -+ lockdep_assert_held(&kbdev->js_data.runpool_mutex); -+#endif /* !MALI_USE_CSF */ -+ lockdep_assert_held(&kbdev->pm.lock); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + +#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ if (WARN_ON(kbase_pm_is_gpu_lost(kbdev))) { -+ dev_err(kbdev->dev, -+ "%s: Cannot power up while GPU lost", __func__); -+ return; -+ } -+#endif -+ -+ if (backend->gpu_powered) { -+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) -+ if (backend->gpu_idled) { -+ backend->callback_power_runtime_gpu_active(kbdev); -+ backend->gpu_idled = false; -+ } ++ if (kbase_reset_gpu_is_active(kbdev) || ++ kbase_is_gpu_removed(kbdev)) ++#else ++ if (kbase_reset_gpu_is_active(kbdev)) +#endif -+ /* Already turned on */ -+ if (kbdev->poweroff_pending) -+ kbase_pm_enable_interrupts(kbdev); -+ kbdev->poweroff_pending = false; -+ KBASE_DEBUG_ASSERT(!is_resume); + return; -+ } + -+ kbdev->poweroff_pending = false; ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ struct kbase_jd_atom *katom[2]; ++ int idx; + -+ KBASE_KTRACE_ADD(kbdev, PM_GPU_ON, NULL, 0u); ++ katom[0] = kbase_gpu_inspect(kbdev, js, 0); ++ katom[1] = kbase_gpu_inspect(kbdev, js, 1); ++ WARN_ON(katom[1] && !katom[0]); + -+ if (is_resume && backend->callback_power_resume) { -+ backend->callback_power_resume(kbdev); -+ return; -+ } else if (backend->callback_power_on) { -+ reset_required = backend->callback_power_on(kbdev); -+ } ++ for (idx = 0; idx < SLOT_RB_SIZE; idx++) { ++ bool cores_ready; ++ int ret; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ backend->gpu_powered = true; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (!katom[idx]) ++ continue; + -+#if MALI_USE_CSF -+ /* GPU has been turned on, can switch to actual register page */ -+ update_user_reg_page_mapping(kbdev); -+#endif ++ switch (katom[idx]->gpu_rb_state) { ++ case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: ++ /* Should be impossible */ ++ WARN(1, "Attempting to update atom not in ringbuffer\n"); ++ break; + ++ case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: ++ if (kbase_js_atom_blocked_on_x_dep(katom[idx])) ++ break; + -+ if (reset_required) { -+ /* GPU state was lost, reset GPU to ensure it is in a -+ * consistent state -+ */ -+ kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS); -+ } -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ else { -+ if (kbdev->arb.arb_if) { -+ struct kbase_arbiter_vm_state *arb_vm_state = -+ kbdev->pm.arb_vm_state; ++ katom[idx]->gpu_rb_state = ++ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV; + -+ /* In the case that the GPU has just been granted by -+ * the Arbiter, a reset will have already been done. -+ * However, it is still necessary to initialize the GPU. -+ */ -+ if (arb_vm_state->vm_arb_starting) -+ kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS | -+ PM_NO_RESET); -+ } -+ } -+ /* -+ * This point means that the GPU trasitioned to ON. So there is a chance -+ * that a repartitioning occurred. In this case the current config -+ * should be read again. -+ */ -+ kbase_gpuprops_get_curr_config_props(kbdev, -+ &kbdev->gpu_props.curr_config); -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ fallthrough; ++ case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: ++ if (kbase_gpu_check_secure_atoms(kbdev, ++ !kbase_jd_katom_is_protected( ++ katom[idx]))) ++ break; + -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_ctx_sched_restore_all_as(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); ++ if ((idx == 1) && (kbase_jd_katom_is_protected( ++ katom[0]) != ++ kbase_jd_katom_is_protected( ++ katom[1]))) ++ break; + -+ if (kbdev->dummy_job_wa.flags & -+ KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) { -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_dummy_job_wa_execute(kbdev, -+ kbase_pm_get_present_cores(kbdev, -+ KBASE_PM_CORE_SHADER)); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } ++ if (kbdev->protected_mode_transition) ++ break; + -+ /* Enable the interrupts */ -+ kbase_pm_enable_interrupts(kbdev); ++ katom[idx]->gpu_rb_state = ++ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION; + -+ /* Turn on the L2 caches */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ backend->gpu_ready = true; -+ backend->l2_desired = true; -+#if MALI_USE_CSF -+ if (reset_required) { -+ /* GPU reset was done after the power on, so send the post -+ * reset event instead. This is okay as GPU power off event -+ * is same as pre GPU reset event. -+ */ -+ kbase_ipa_control_handle_gpu_reset_post(kbdev); -+ } else { -+ kbase_ipa_control_handle_gpu_power_on(kbdev); -+ } -+#endif -+ kbase_pm_update_state(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ fallthrough; ++ case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: + -+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) -+ /* GPU is now powered up. Invoke the GPU active callback as GPU idle -+ * callback would have been invoked before the power down. -+ */ -+ if (backend->gpu_idled) { -+ backend->callback_power_runtime_gpu_active(kbdev); -+ backend->gpu_idled = false; -+ } -+#endif -+} ++ /* ++ * Exiting protected mode must be done before ++ * the references on the cores are taken as ++ * a power down the L2 is required which ++ * can't happen after the references for this ++ * atom are taken. ++ */ + -+KBASE_EXPORT_TEST_API(kbase_pm_clock_on); ++ if (!kbase_gpu_in_protected_mode(kbdev) && ++ kbase_jd_katom_is_protected(katom[idx])) { ++ /* Atom needs to transition into protected mode. */ ++ ret = kbase_jm_enter_protected_mode(kbdev, ++ katom, idx, js); ++ if (ret) ++ break; ++ } else if (kbase_gpu_in_protected_mode(kbdev) && ++ !kbase_jd_katom_is_protected(katom[idx])) { ++ /* Atom needs to transition out of protected mode. */ ++ ret = kbase_jm_exit_protected_mode(kbdev, ++ katom, idx, js); ++ if (ret) ++ break; ++ } ++ katom[idx]->protected_state.exit = ++ KBASE_ATOM_EXIT_PROTECTED_CHECK; + -+bool kbase_pm_clock_off(struct kbase_device *kbdev) -+{ -+ unsigned long flags; ++ /* Atom needs no protected mode transition. */ + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ lockdep_assert_held(&kbdev->pm.lock); ++ katom[idx]->gpu_rb_state = ++ KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; + -+ /* ASSERT that the cores should now be unavailable. No lock needed. */ -+ WARN_ON(kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF); ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ fallthrough; ++ case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: ++ if (katom[idx]->will_fail_event_code) { ++ kbase_gpu_mark_atom_for_return(kbdev, ++ katom[idx]); ++ /* Set EVENT_DONE so this atom will be ++ * completed, not unpulled. ++ */ ++ katom[idx]->event_code = ++ BASE_JD_EVENT_DONE; ++ /* Only return if head atom or previous ++ * atom already removed - as atoms must ++ * be returned in order. ++ */ ++ if (idx == 0 || katom[0]->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { ++ kbase_gpu_dequeue_atom(kbdev, js, NULL); ++ kbase_jm_return_atom_to_js(kbdev, katom[idx]); ++ } ++ break; ++ } + -+ kbdev->poweroff_pending = true; ++ cores_ready = kbase_pm_cores_requested(kbdev, ++ true); + -+ if (!kbdev->pm.backend.gpu_powered) { -+ /* Already turned off */ -+ return true; -+ } ++ if (!cores_ready) ++ break; + -+ KBASE_KTRACE_ADD(kbdev, PM_GPU_OFF, NULL, 0u); ++ katom[idx]->gpu_rb_state = ++ KBASE_ATOM_GPU_RB_READY; + -+ /* Disable interrupts. This also clears any outstanding interrupts */ -+ kbase_pm_disable_interrupts(kbdev); -+ /* Ensure that any IRQ handlers have finished */ -+ kbase_synchronize_irqs(kbdev); ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ fallthrough; ++ case KBASE_ATOM_GPU_RB_READY: + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (idx == 1) { ++ /* Only submit if head atom or previous ++ * atom already submitted ++ */ ++ if ((katom[0]->gpu_rb_state != ++ KBASE_ATOM_GPU_RB_SUBMITTED && ++ katom[0]->gpu_rb_state != ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) ++ break; + -+ if (atomic_read(&kbdev->faults_pending)) { -+ /* Page/bus faults are still being processed. The GPU can not -+ * be powered off until they have completed -+ */ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ return false; -+ } ++ /* If intra-slot serialization in use ++ * then don't submit atom to NEXT slot ++ */ ++ if (kbdev->serialize_jobs & ++ KBASE_SERIALIZE_INTRA_SLOT) ++ break; ++ } + -+ kbase_pm_cache_snoop_disable(kbdev); -+#if MALI_USE_CSF -+ kbase_ipa_control_handle_gpu_power_off(kbdev); -+#endif ++ /* If inter-slot serialization in use then don't ++ * submit atom if any other slots are in use ++ */ ++ if ((kbdev->serialize_jobs & ++ KBASE_SERIALIZE_INTER_SLOT) && ++ other_slots_busy(kbdev, js)) ++ break; + -+ if (kbase_is_gpu_removed(kbdev) -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ || kbase_pm_is_gpu_lost(kbdev)) { -+#else -+ ) { -+#endif -+ /* Ensure we unblock any threads that are stuck waiting -+ * for the GPU -+ */ -+ kbase_gpu_cache_clean_wait_complete(kbdev); -+ } ++ /* Check if this job needs the cycle counter ++ * enabled before submission ++ */ ++ if (katom[idx]->core_req & BASE_JD_REQ_PERMON) ++ kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev); + -+ kbdev->pm.backend.gpu_ready = false; ++ if (!kbase_job_hw_submit(kbdev, katom[idx], js)) { ++ katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED; + -+ /* The GPU power may be turned off from this point */ -+ kbdev->pm.backend.gpu_powered = false; ++ /* Inform power management at start/finish of ++ * atom so it can update its GPU utilisation ++ * metrics. ++ */ ++ kbase_pm_metrics_update(kbdev, ++ &katom[idx]->start_timestamp); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* Inform platform at start/finish of atom */ ++ kbasep_platform_event_atom_submit(katom[idx]); ++ } else { ++ if (katom[idx]->core_req & BASE_JD_REQ_PERMON) ++ kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + -+#if MALI_USE_CSF -+ /* GPU is about to be turned off, switch to dummy page */ -+ update_user_reg_page_mapping(kbdev); -+#endif ++ break; ++ } + -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_IDLE_EVENT); -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ fallthrough; ++ case KBASE_ATOM_GPU_RB_SUBMITTED: ++ break; + -+ if (kbdev->pm.backend.callback_power_off) -+ kbdev->pm.backend.callback_power_off(kbdev); -+ return true; ++ case KBASE_ATOM_GPU_RB_RETURN_TO_JS: ++ /* Only return if head atom or previous atom ++ * already removed - as atoms must be returned ++ * in order ++ */ ++ if (idx == 0 || katom[0]->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { ++ kbase_gpu_dequeue_atom(kbdev, js, NULL); ++ kbase_jm_return_atom_to_js(kbdev, ++ katom[idx]); ++ } ++ break; ++ } ++ } ++ } +} + -+KBASE_EXPORT_TEST_API(kbase_pm_clock_off); -+ -+struct kbasep_reset_timeout_data { -+ struct hrtimer timer; -+ bool timed_out; -+ struct kbase_device *kbdev; -+}; + -+void kbase_pm_reset_done(struct kbase_device *kbdev) ++void kbase_backend_run_atom(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom) +{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ kbdev->pm.backend.reset_done = true; -+ wake_up(&kbdev->pm.backend.reset_done_wait); ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ dev_dbg(kbdev->dev, "Backend running atom %pK\n", (void *)katom); ++ ++ kbase_gpu_enqueue_atom(kbdev, katom); ++ kbase_backend_slot_update(kbdev); +} + +/** -+ * kbase_pm_wait_for_reset - Wait for a reset to happen ++ * kbase_rb_atom_might_depend - determine if one atom in the slot ringbuffer ++ * might depend on another from the same kctx ++ * @katom_a: dependee atom ++ * @katom_b: atom to query + * -+ * @kbdev: Kbase device ++ * This can be used on atoms that belong to different slot ringbuffers + * -+ * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state. ++ * Return: true if @katom_b might depend on @katom_a, false if it cannot depend. + */ -+static void kbase_pm_wait_for_reset(struct kbase_device *kbdev) ++static inline bool ++kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a, ++ const struct kbase_jd_atom *katom_b) +{ -+ lockdep_assert_held(&kbdev->pm.lock); -+ -+ wait_event(kbdev->pm.backend.reset_done_wait, -+ (kbdev->pm.backend.reset_done)); -+ kbdev->pm.backend.reset_done = false; ++ if (katom_a->kctx != katom_b->kctx) ++ return false; ++ return (katom_b->pre_dep || ++ (katom_b->atom_flags & (KBASE_KATOM_FLAG_X_DEP_BLOCKED | ++ KBASE_KATOM_FLAG_FAIL_BLOCKER))); +} + -+KBASE_EXPORT_TEST_API(kbase_pm_reset_done); -+ -+static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) ++/** ++ * kbase_gpu_irq_evict - evict a slot's JSn_HEAD_NEXT atom from the HW if it is ++ * related to a failed JSn_HEAD atom ++ * @kbdev: kbase device ++ * @js: job slot to check ++ * @completion_code: completion code of the failed atom ++ * ++ * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but ++ * unlike other failure codes we _can_ re-run them. ++ * ++ * This forms step 1 in a 2-step process of removing any related atoms from a ++ * slot's JSn_HEAD_NEXT (ringbuffer index 1), should there have ++ * been a 'failure' on an atom in JSn_HEAD (ringbuffer index 0). ++ * ++ * This step only removes the atoms from the HW, and marks them as ++ * (potentially) ready to run again. ++ * ++ * Step 2 is on marking the JSn_HEAD atom as complete ++ * (kbase_gpu_complete_hw()), to dequeue said atoms and return them to the JS ++ * as appropriate, or re-submit them. ++ * ++ * Hence, this function must evict at a minimum the atoms related to the atom ++ * in JSn_HEAD that kbase_gpu_complete_hw() will also dequeue. It is acceptable ++ * if this function evicts more atoms than kbase_gpu_complete_hw() dequeues, as ++ * the next kbase_backend_slot_update() will resubmit any remaining. ++ * ++ * Return: true if an atom was evicted, false otherwise. ++ */ ++bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code) +{ -+ struct kbasep_reset_timeout_data *rtdata = -+ container_of(timer, struct kbasep_reset_timeout_data, timer); -+ -+ rtdata->timed_out = true; ++ struct kbase_jd_atom *katom; ++ struct kbase_jd_atom *next_katom; + -+ /* Set the wait queue to wake up kbase_pm_init_hw even though the reset -+ * hasn't completed -+ */ -+ kbase_pm_reset_done(rtdata->kbdev); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ return HRTIMER_NORESTART; -+} ++ katom = kbase_gpu_inspect(kbdev, js, 0); ++ if (!katom) { ++ dev_err(kbdev->dev, "Can't get a katom from js(%u)\n", js); ++ return false; ++ } ++ next_katom = kbase_gpu_inspect(kbdev, js, 1); + -+static int kbase_set_gpu_quirks(struct kbase_device *kbdev, const u32 prod_id) -+{ -+#if MALI_USE_CSF -+ kbdev->hw_quirks_gpu = -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(CSF_CONFIG)); -+#else -+ u32 hw_quirks_gpu = kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG)); ++ if (next_katom && ++ next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED && ++ (kbase_rb_atom_might_depend(katom, next_katom) || ++ kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) && ++ (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO)) != 0 || ++ kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) != 0)) { ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), ++ JS_COMMAND_NOP); ++ next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; + -+ if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == GPU_ID2_PRODUCT_TMIX) { -+ /* Only for tMIx */ -+ u32 coherency_features; ++ if (completion_code == BASE_JD_EVENT_STOPPED) { ++ KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, next_katom, ++ &kbdev->gpu_props.props.raw_props.js_features ++ [next_katom->slot_nr]); ++ KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, next_katom, &kbdev->as ++ [next_katom->kctx->as_nr]); ++ KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, next_katom->kctx, ++ &kbdev->gpu_props.props.raw_props.js_features ++ [next_katom->slot_nr]); ++ } + -+ coherency_features = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(COHERENCY_FEATURES)); ++ if (next_katom->core_req & BASE_JD_REQ_PERMON) ++ kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + -+ /* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly -+ * documented for tMIx so force correct value here. ++ /* On evicting the next_katom, the last submission kctx on the ++ * given job slot then reverts back to the one that owns katom. ++ * The aim is to enable the next submission that can determine ++ * if the read only shader core L1 cache should be invalidated. + */ -+ if (coherency_features == -+ COHERENCY_FEATURE_BIT(COHERENCY_ACE)) { -+ hw_quirks_gpu |= (COHERENCY_ACE_LITE | COHERENCY_ACE) -+ << JM_FORCE_COHERENCY_FEATURES_SHIFT; -+ } -+ } -+ -+ if (kbase_is_gpu_removed(kbdev)) -+ return -EIO; ++ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = ++ SLOT_RB_TAG_KCTX(katom->kctx); + -+ kbdev->hw_quirks_gpu = hw_quirks_gpu; ++ return true; ++ } + -+#endif /* !MALI_USE_CSF */ -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) { -+ int default_idvs_group_size = 0xF; -+ u32 group_size = 0; ++ return false; ++} + -+ if (of_property_read_u32(kbdev->dev->of_node, "idvs-group-size", -+ &group_size)) -+ group_size = default_idvs_group_size; ++/** ++ * kbase_gpu_complete_hw - complete the atom in a slot's JSn_HEAD ++ * @kbdev: kbase device ++ * @js: job slot to check ++ * @completion_code: completion code of the completed atom ++ * @job_tail: value read from JSn_TAIL, for STOPPED atoms ++ * @end_timestamp: pointer to approximate ktime value when the katom completed ++ * ++ * Among other operations, this also executes step 2 of a 2-step process of ++ * removing any related atoms from a slot's JSn_HEAD_NEXT (ringbuffer index 1), ++ * should there have been a 'failure' on an atom in JSn_HEAD (ringbuffer index ++ * 0). The first step is done in kbase_gpu_irq_evict(). ++ * ++ * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but ++ * unlike other failure codes we _can_ re-run them. ++ * ++ * When the JSn_HEAD atom is considered to be 'failed', then this will dequeue ++ * and return to the JS some (usually all) of the atoms evicted from the HW ++ * during the kbase_gpu_irq_evict() for that JSn_HEAD atom. If it dequeues an ++ * atom, that atom must not have been running or must already be evicted, as ++ * otherwise we would be in the incorrect state of having an atom both running ++ * on the HW and returned to the JS. ++ */ + -+ if (group_size > IDVS_GROUP_MAX_SIZE) { -+ dev_err(kbdev->dev, -+ "idvs-group-size of %d is too large. Maximum value is %d", -+ group_size, IDVS_GROUP_MAX_SIZE); -+ group_size = default_idvs_group_size; -+ } ++void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code, ++ u64 job_tail, ktime_t *end_timestamp) ++{ ++ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); ++ struct kbase_context *kctx = NULL; + -+ kbdev->hw_quirks_gpu |= group_size << IDVS_GROUP_SIZE_SHIFT; ++ if (unlikely(!katom)) { ++ dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js); ++ return; + } + -+#define MANUAL_POWER_CONTROL ((u32)(1 << 8)) -+ if (corestack_driver_control) -+ kbdev->hw_quirks_gpu |= MANUAL_POWER_CONTROL; -+ -+ return 0; -+} ++ kctx = katom->kctx; + -+static int kbase_set_sc_quirks(struct kbase_device *kbdev, const u32 prod_id) -+{ -+ u32 hw_quirks_sc = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(SHADER_CONFIG)); ++ dev_dbg(kbdev->dev, ++ "Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", ++ (void *)katom, completion_code, job_tail, js); + -+ if (kbase_is_gpu_removed(kbdev)) -+ return -EIO; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */ -+ hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE; -+ else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */ -+ hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES; ++ /* ++ * When a hard-stop is followed close after a soft-stop, the completion ++ * code may be set to STOPPED, even though the job is terminated ++ */ ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8438)) { ++ if (completion_code == BASE_JD_EVENT_STOPPED && ++ (katom->atom_flags & ++ KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) { ++ completion_code = BASE_JD_EVENT_TERMINATED; ++ } ++ } + -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_2968_TTRX_3162)) -+ hw_quirks_sc |= SC_VAR_ALGORITHM; ++ if ((katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) && ++ completion_code != BASE_JD_EVENT_DONE && ++ !(completion_code & BASE_JD_SW_EVENT)) { ++ /* When a job chain fails, on a T60x or when ++ * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not ++ * flushed. To prevent future evictions causing possible memory ++ * corruption we need to flush the cache manually before any ++ * affected memory gets reused. ++ */ ++ katom->need_cache_flush_cores_retained = true; ++ } + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_TLS_HASHING)) -+ hw_quirks_sc |= SC_TLS_HASH_ENABLE; ++ katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); + -+ kbdev->hw_quirks_sc = hw_quirks_sc; ++ if (completion_code == BASE_JD_EVENT_STOPPED) { ++ struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, ++ 0); + -+ return 0; -+} ++ /* ++ * Dequeue next atom from ringbuffers on same slot if required. ++ * This atom will already have been removed from the NEXT ++ * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that ++ * the atoms on this slot are returned in the correct order. ++ */ ++ if (next_katom && ++ kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) { ++ WARN_ON(next_katom->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_SUBMITTED); ++ kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); ++ kbase_jm_return_atom_to_js(kbdev, next_katom); ++ } ++ } else if (completion_code != BASE_JD_EVENT_DONE) { ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ unsigned int i; + -+static int kbase_set_tiler_quirks(struct kbase_device *kbdev) -+{ -+ u32 hw_quirks_tiler = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(TILER_CONFIG)); ++ if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) { ++ dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", ++ js, completion_code, ++ kbase_gpu_exception_name( ++ completion_code)); + -+ if (kbase_is_gpu_removed(kbdev)) -+ return -EIO; ++ } + -+ /* Set tiler clock gate override if required */ -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953)) -+ hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE; ++#if KBASE_KTRACE_DUMP_ON_JOB_SLOT_ERROR != 0 ++ KBASE_KTRACE_DUMP(kbdev); ++#endif ++ kbasep_js_clear_submit_allowed(js_devdata, katom->kctx); + -+ kbdev->hw_quirks_tiler = hw_quirks_tiler; ++ /* ++ * Remove all atoms on the same context from ringbuffers. This ++ * will not remove atoms that are already on the GPU, as these ++ * are guaranteed not to have fail dependencies on the failed ++ * atom. ++ */ ++ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) { ++ struct kbase_jd_atom *katom_idx0 = ++ kbase_gpu_inspect(kbdev, i, 0); ++ struct kbase_jd_atom *katom_idx1 = ++ kbase_gpu_inspect(kbdev, i, 1); + -+ return 0; -+} ++ if (katom_idx0 && ++ kbase_rb_atom_might_depend(katom, katom_idx0) && ++ katom_idx0->gpu_rb_state != ++ KBASE_ATOM_GPU_RB_SUBMITTED) { ++ /* Dequeue katom_idx0 from ringbuffer */ ++ kbase_gpu_dequeue_atom(kbdev, i, end_timestamp); + -+static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) -+{ -+ struct device_node *np = kbdev->dev->of_node; -+ const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; -+ const u32 prod_id = -+ (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; -+ int error = 0; ++ if (katom_idx1 && kbase_rb_atom_might_depend( ++ katom, katom_idx1) && ++ katom_idx0->gpu_rb_state != ++ KBASE_ATOM_GPU_RB_SUBMITTED) { ++ /* Dequeue katom_idx1 from ringbuffer */ ++ kbase_gpu_dequeue_atom(kbdev, i, ++ end_timestamp); + -+ kbdev->hw_quirks_gpu = 0; -+ kbdev->hw_quirks_sc = 0; -+ kbdev->hw_quirks_tiler = 0; -+ kbdev->hw_quirks_mmu = 0; ++ katom_idx1->event_code = ++ BASE_JD_EVENT_STOPPED; ++ kbase_jm_return_atom_to_js(kbdev, ++ katom_idx1); ++ } ++ katom_idx0->event_code = BASE_JD_EVENT_STOPPED; ++ kbase_jm_return_atom_to_js(kbdev, katom_idx0); + -+ if (!of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) { -+ dev_info(kbdev->dev, -+ "Found quirks_gpu = [0x%x] in Devicetree\n", -+ kbdev->hw_quirks_gpu); -+ } else { -+ error = kbase_set_gpu_quirks(kbdev, prod_id); -+ if (error) -+ return error; ++ } else if (katom_idx1 && kbase_rb_atom_might_depend( ++ katom, katom_idx1) && ++ katom_idx1->gpu_rb_state != ++ KBASE_ATOM_GPU_RB_SUBMITTED) { ++ /* Can not dequeue this atom yet - will be ++ * dequeued when atom at idx0 completes ++ */ ++ katom_idx1->event_code = BASE_JD_EVENT_STOPPED; ++ kbase_gpu_mark_atom_for_return(kbdev, ++ katom_idx1); ++ } ++ } + } + -+ if (!of_property_read_u32(np, "quirks_sc", -+ &kbdev->hw_quirks_sc)) { -+ dev_info(kbdev->dev, -+ "Found quirks_sc = [0x%x] in Devicetree\n", -+ kbdev->hw_quirks_sc); -+ } else { -+ error = kbase_set_sc_quirks(kbdev, prod_id); -+ if (error) -+ return error; -+ } ++ KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc, js, completion_code); + -+ if (!of_property_read_u32(np, "quirks_tiler", -+ &kbdev->hw_quirks_tiler)) { -+ dev_info(kbdev->dev, -+ "Found quirks_tiler = [0x%x] in Devicetree\n", -+ kbdev->hw_quirks_tiler); -+ } else { -+ error = kbase_set_tiler_quirks(kbdev); -+ if (error) -+ return error; -+ } ++ if (job_tail != 0 && job_tail != katom->jc) { ++ /* Some of the job has been executed */ ++ dev_dbg(kbdev->dev, ++ "Update job chain address of atom %pK to resume from 0x%llx\n", ++ (void *)katom, job_tail); + -+ if (!of_property_read_u32(np, "quirks_mmu", -+ &kbdev->hw_quirks_mmu)) { -+ dev_info(kbdev->dev, -+ "Found quirks_mmu = [0x%x] in Devicetree\n", -+ kbdev->hw_quirks_mmu); -+ } else { -+ error = kbase_set_mmu_quirks(kbdev); ++ katom->jc = job_tail; ++ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx, ++ katom, job_tail, js); + } + -+ return error; -+} -+ -+static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) -+{ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), -+ kbdev->hw_quirks_sc); ++ /* Only update the event code for jobs that weren't cancelled */ ++ if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) ++ katom->event_code = (enum base_jd_event_code)completion_code; + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG), -+ kbdev->hw_quirks_tiler); ++ /* Complete the job, and start new ones ++ * ++ * Also defer remaining work onto the workqueue: ++ * - Re-queue Soft-stopped jobs ++ * - For any other jobs, queue the job back into the dependency system ++ * - Schedule out the parent context if necessary, and schedule a new ++ * one in. ++ */ ++#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) ++ { ++ /* The atom in the HEAD */ ++ struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, ++ 0); + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), -+ kbdev->hw_quirks_mmu); -+#if MALI_USE_CSF -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(CSF_CONFIG), -+ kbdev->hw_quirks_gpu); -+#else -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG), -+ kbdev->hw_quirks_gpu); -+#endif -+} ++ if (next_katom && next_katom->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_SUBMITTED) { ++ char js_string[16]; + -+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) -+{ -+ if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) && -+ !kbdev->cci_snoop_enabled) { -+#if IS_ENABLED(CONFIG_ARM64) -+ if (kbdev->snoop_enable_smc != 0) -+ kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0); -+#endif /* CONFIG_ARM64 */ -+ dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n"); -+ kbdev->cci_snoop_enabled = true; -+ } -+} ++ trace_gpu_sched_switch(kbasep_make_job_slot_string(js, ++ js_string, ++ sizeof(js_string)), ++ ktime_to_ns(*end_timestamp), ++ (u32)next_katom->kctx->id, 0, ++ next_katom->work_id); ++ } else { ++ char js_string[16]; + -+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) -+{ -+ if (kbdev->cci_snoop_enabled) { -+#if IS_ENABLED(CONFIG_ARM64) -+ if (kbdev->snoop_disable_smc != 0) { -+ mali_cci_flush_l2(kbdev); -+ kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0); ++ trace_gpu_sched_switch(kbasep_make_job_slot_string(js, js_string, ++ sizeof(js_string)), ++ ktime_to_ns(ktime_get_raw()), 0, 0, 0); + } -+#endif /* CONFIG_ARM64 */ -+ dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n"); -+ kbdev->cci_snoop_enabled = false; -+ } -+} -+ -+#if !MALI_USE_CSF -+static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev) -+{ -+ unsigned long irq_flags; -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); -+ kbdev->protected_mode_hwcnt_desired = true; -+ if (kbdev->protected_mode_hwcnt_disabled) { -+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); -+ kbdev->protected_mode_hwcnt_disabled = false; + } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); -+} +#endif + -+static int kbase_pm_do_reset(struct kbase_device *kbdev) -+{ -+ struct kbasep_reset_timeout_data rtdata; -+ int ret; -+ -+ KBASE_KTRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, 0); ++ if (kbdev->serialize_jobs & KBASE_SERIALIZE_RESET) ++ kbase_reset_gpu_silent(kbdev); + -+ KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev, kbdev); ++ if (completion_code == BASE_JD_EVENT_STOPPED) ++ katom = kbase_jm_return_atom_to_js(kbdev, katom); ++ else ++ katom = kbase_jm_complete(kbdev, katom, end_timestamp); + -+ if (kbdev->pm.backend.callback_soft_reset) { -+ ret = kbdev->pm.backend.callback_soft_reset(kbdev); -+ if (ret < 0) -+ return ret; -+ else if (ret > 0) -+ return 0; -+ } else { -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_SOFT_RESET); -+ } ++ if (katom) { ++ dev_dbg(kbdev->dev, ++ "Cross-slot dependency %pK has become runnable.\n", ++ (void *)katom); + -+ /* Unmask the reset complete interrupt only */ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED); ++ /* Check if there are lower priority jobs to soft stop */ ++ kbase_job_slot_ctx_priority_check_locked(kctx, katom); + -+ /* Initialize a structure for tracking the status of the reset */ -+ rtdata.kbdev = kbdev; -+ rtdata.timed_out = false; ++ kbase_jm_try_kick(kbdev, 1 << katom->slot_nr); ++ } + -+ /* Create a timer to use as a timeout on the reset */ -+ hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); -+ rtdata.timer.function = kbasep_reset_timeout; ++ /* For partial shader core off L2 cache flush */ ++ kbase_pm_update_state(kbdev); + -+ hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), -+ HRTIMER_MODE_REL); ++ /* Job completion may have unblocked other atoms. Try to update all job ++ * slots ++ */ ++ kbase_backend_slot_update(kbdev); ++} + -+ /* Wait for the RESET_COMPLETED interrupt to be raised */ -+ kbase_pm_wait_for_reset(kbdev); ++void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) ++{ ++ unsigned int js; + -+ if (!rtdata.timed_out) { -+ /* GPU has been reset */ -+ hrtimer_cancel(&rtdata.timer); -+ destroy_hrtimer_on_stack(&rtdata.timer); -+ return 0; -+ } ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* No interrupt has been received - check if the RAWSTAT register says -+ * the reset has completed -+ */ -+ if ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & -+ RESET_COMPLETED)) { -+ /* The interrupt is set in the RAWSTAT; this suggests that the -+ * interrupts are not getting to the CPU -+ */ -+ dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n"); -+ /* If interrupts aren't working we can't continue. */ -+ destroy_hrtimer_on_stack(&rtdata.timer); -+ return -EINVAL; -+ } ++ /* Reset should always take the GPU out of protected mode */ ++ WARN_ON(kbase_gpu_in_protected_mode(kbdev)); + -+ if (kbase_is_gpu_removed(kbdev)) { -+ dev_dbg(kbdev->dev, "GPU has been removed, reset no longer needed.\n"); -+ destroy_hrtimer_on_stack(&rtdata.timer); -+ return -EINVAL; -+ } ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ int atom_idx = 0; ++ int idx; + -+ /* The GPU doesn't seem to be responding to the reset so try a hard -+ * reset, but only when NOT in arbitration mode. -+ */ -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ if (!kbdev->arb.arb_if) { -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ -+ dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", -+ RESET_TIMEOUT); -+ KBASE_KTRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, 0); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_HARD_RESET); ++ for (idx = 0; idx < SLOT_RB_SIZE; idx++) { ++ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, ++ js, atom_idx); ++ bool keep_in_jm_rb = false; + -+ /* Restart the timer to wait for the hard reset to complete */ -+ rtdata.timed_out = false; ++ if (!katom) ++ break; ++ if (katom->protected_state.exit == ++ KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) { ++ /* protected mode sanity checks */ ++ WARN(kbase_jd_katom_is_protected(katom) != ++ kbase_gpu_in_protected_mode(kbdev), ++ "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", ++ kbase_jd_katom_is_protected(katom), ++ kbase_gpu_in_protected_mode(kbdev)); ++ WARN(!(kbase_jd_katom_is_protected(katom) && js == 0) && ++ kbase_jd_katom_is_protected(katom), ++ "Protected atom on JS%u not supported", js); ++ } ++ if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) && ++ !kbase_ctx_flag(katom->kctx, KCTX_DYING)) ++ keep_in_jm_rb = true; + -+ hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), -+ HRTIMER_MODE_REL); ++ kbase_gpu_release_atom(kbdev, katom, NULL); + -+ /* Wait for the RESET_COMPLETED interrupt to be raised */ -+ kbase_pm_wait_for_reset(kbdev); ++ /* ++ * If the atom wasn't on HW when the reset was issued ++ * then leave it in the RB and next time we're kicked ++ * it will be processed again from the starting state. ++ */ ++ if (keep_in_jm_rb) { ++ katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; ++ /* As the atom was not removed, increment the ++ * index so that we read the correct atom in the ++ * next iteration. ++ */ ++ atom_idx++; ++ continue; ++ } + -+ if (!rtdata.timed_out) { -+ /* GPU has been reset */ -+ hrtimer_cancel(&rtdata.timer); -+ destroy_hrtimer_on_stack(&rtdata.timer); -+ return 0; ++ /* ++ * The atom was on the HW when the reset was issued ++ * all we can do is fail the atom. ++ */ ++ kbase_gpu_dequeue_atom(kbdev, js, NULL); ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ kbase_jm_complete(kbdev, katom, end_timestamp); + } + -+ destroy_hrtimer_on_stack(&rtdata.timer); ++ /* Clear the slot's last katom submission kctx on reset */ ++ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_NULL_TAG_VAL; ++ } + -+ dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", -+ RESET_TIMEOUT); -+#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ /* Re-enable GPU hardware counters if we're resetting from protected ++ * mode. ++ */ ++ kbdev->protected_mode_hwcnt_desired = true; ++ if (kbdev->protected_mode_hwcnt_disabled) { ++ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); ++ kbdev->protected_mode_hwcnt_disabled = false; ++ ++ KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev, kbdev); + } -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + -+ return -EINVAL; ++ kbdev->protected_mode_transition = false; ++ kbase_pm_protected_override_disable(kbdev); +} + -+int kbase_pm_protected_mode_enable(struct kbase_device *const kbdev) ++/** ++ * should_stop_next_atom - given a soft/hard stop action, determine if the next ++ * atom on a slot should be stopped ++ * @kbdev: kbase devices ++ * @head_katom: atom currently in the JSn_HEAD ++ * @next_katom: atom currently in the JSn_HEAD_NEXT ++ * @action: JS_COMMAND_<...> action for soft/hard-stop ++ * ++ * This is used in cases where @head_katom is the target of the soft/hard-stop. ++ * It only makes sense to call this when @head_katom and @next_katom are from ++ * the same slot. ++ * ++ * Return: true if @next_katom should also be stopped with the given action, ++ * false otherwise ++ */ ++static bool should_stop_next_atom(struct kbase_device *kbdev, ++ const struct kbase_jd_atom *head_katom, ++ const struct kbase_jd_atom *next_katom, ++ u32 action) +{ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_SET_PROTECTED_MODE); -+ return 0; ++ bool ret = false; ++ u32 hw_action = action & JS_COMMAND_MASK; ++ ++ switch (hw_action) { ++ case JS_COMMAND_SOFT_STOP: ++ ret = kbase_js_atom_runs_before(kbdev, head_katom, next_katom, ++ 0u); ++ break; ++ case JS_COMMAND_HARD_STOP: ++ /* Unlike soft-stop, a hard-stop targeting a particular atom ++ * should not cause atoms from unrelated contexts to be ++ * removed ++ */ ++ ret = (head_katom->kctx == next_katom->kctx); ++ break; ++ default: ++ /* Other stop actions are possible, but the driver should not ++ * be generating them at this point in the call chain ++ */ ++ WARN(1, "Unexpected stop action: 0x%.8x", hw_action); ++ break; ++ } ++ return ret; +} + -+int kbase_pm_protected_mode_disable(struct kbase_device *const kbdev) ++static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, unsigned int js, ++ struct kbase_jd_atom *katom, u32 action) +{ -+ lockdep_assert_held(&kbdev->pm.lock); ++ struct kbase_context *kctx = katom->kctx; ++ u32 hw_action = action & JS_COMMAND_MASK; + -+ return kbase_pm_do_reset(kbdev); ++ kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom); ++ kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action, ++ katom->core_req, katom); ++ kbase_jsctx_slot_prio_blocked_set(kctx, js, katom->sched_priority); +} + -+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) ++static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom, ++ u32 action, ++ bool disjoint) +{ -+ unsigned long irq_flags; -+ int err = 0; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ lockdep_assert_held(&kbdev->pm.lock); -+ -+ /* Ensure the clock is on before attempting to access the hardware */ -+ if (!kbdev->pm.backend.gpu_powered) { -+ if (kbdev->pm.backend.callback_power_on) -+ kbdev->pm.backend.callback_power_on(kbdev); ++ struct kbase_context *kctx = katom->kctx; + -+ kbdev->pm.backend.gpu_powered = true; -+ } ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* Ensure interrupts are off to begin with, this also clears any -+ * outstanding interrupts -+ */ -+ kbase_pm_disable_interrupts(kbdev); -+ /* Ensure cache snoops are disabled before reset. */ -+ kbase_pm_cache_snoop_disable(kbdev); -+ /* Prepare for the soft-reset */ -+ kbdev->pm.backend.reset_done = false; ++ katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; ++ kbase_gpu_mark_atom_for_return(kbdev, katom); ++ kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr, ++ katom->sched_priority); + -+ /* The cores should be made unavailable due to the reset */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); -+ if (kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) -+ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, 0u); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++ if (disjoint) ++ kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, ++ katom); ++} + -+ /* Soft reset the GPU */ -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ if (!(flags & PM_NO_RESET)) -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ -+ err = kbdev->protected_ops->protected_mode_disable( -+ kbdev->protected_dev); ++static int should_stop_x_dep_slot(struct kbase_jd_atom *katom) ++{ ++ if (katom->x_post_dep) { ++ struct kbase_jd_atom *dep_atom = katom->x_post_dep; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); -+#if MALI_USE_CSF -+ if (kbdev->protected_mode) { -+ unsigned long flags; ++ if (dep_atom->gpu_rb_state != ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB && ++ dep_atom->gpu_rb_state != ++ KBASE_ATOM_GPU_RB_RETURN_TO_JS) ++ return dep_atom->slot_nr; ++ } ++ return -1; ++} + -+ kbase_ipa_control_protm_exited(kbdev); ++bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx, ++ unsigned int js, struct kbase_jd_atom *katom, u32 action) ++{ ++ struct kbase_jd_atom *katom_idx0; ++ struct kbase_context *kctx_idx0 = NULL; ++ struct kbase_jd_atom *katom_idx1; ++ struct kbase_context *kctx_idx1 = NULL; + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+ } -+#endif -+ kbdev->protected_mode = false; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++ bool katom_idx0_valid, katom_idx1_valid; + -+ if (err) -+ goto exit; ++ bool ret = false; + -+ if (flags & PM_HW_ISSUES_DETECT) { -+ err = kbase_pm_hw_issues_detect(kbdev); -+ if (err) -+ goto exit; -+ } ++ int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1; ++ int prio_idx0 = 0, prio_idx1 = 0; + -+ kbase_pm_hw_issues_apply(kbdev); -+ kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* Sanity check protected mode was left after reset */ -+ WARN_ON(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & -+ GPU_STATUS_PROTECTED_MODE_ACTIVE); ++ katom_idx0 = kbase_gpu_inspect(kbdev, js, 0); ++ katom_idx1 = kbase_gpu_inspect(kbdev, js, 1); + -+ /* If cycle counter was in use re-enable it, enable_irqs will only be -+ * false when called from kbase_pm_powerup -+ */ -+ if (kbdev->pm.backend.gpu_cycle_counter_requests && -+ (flags & PM_ENABLE_IRQS)) { -+ kbase_pm_enable_interrupts(kbdev); ++ if (katom_idx0) { ++ kctx_idx0 = katom_idx0->kctx; ++ prio_idx0 = katom_idx0->sched_priority; ++ } ++ if (katom_idx1) { ++ kctx_idx1 = katom_idx1->kctx; ++ prio_idx1 = katom_idx1->sched_priority; ++ } + -+ /* Re-enable the counters if we need to */ -+ spin_lock_irqsave( -+ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, -+ irq_flags); -+ if (kbdev->pm.backend.gpu_cycle_counter_requests) -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_CYCLE_COUNT_START); -+ spin_unlock_irqrestore( -+ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, -+ irq_flags); -+ -+ kbase_pm_disable_interrupts(kbdev); ++ if (katom) { ++ katom_idx0_valid = (katom_idx0 == katom); ++ if (katom_idx1) ++ katom_idx1_valid = (katom_idx1 == katom); ++ else ++ katom_idx1_valid = false; ++ } else { ++ katom_idx0_valid = (katom_idx0 && (!kctx || kctx_idx0 == kctx)); ++ katom_idx1_valid = (katom_idx1 && (!kctx || kctx_idx1 == kctx)); + } ++ /* If there's an atom in JSn_HEAD_NEXT that we haven't already decided ++ * to stop, but we're stopping the JSn_HEAD atom, see if they are ++ * related/ordered in some way that would require the same stop action ++ */ ++ if (!katom_idx1_valid && katom_idx0_valid && katom_idx1) ++ katom_idx1_valid = should_stop_next_atom(kbdev, katom_idx0, ++ katom_idx1, action); + -+ if (flags & PM_ENABLE_IRQS) -+ kbase_pm_enable_interrupts(kbdev); ++ if (katom_idx0_valid) ++ stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0); ++ if (katom_idx1_valid) ++ stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1); + -+exit: -+#if !MALI_USE_CSF -+ if (!kbdev->pm.backend.protected_entry_transition_override) { -+ /* Re-enable GPU hardware counters if we're resetting from -+ * protected mode. -+ */ -+ reenable_protected_mode_hwcnt(kbdev); -+ } -+#endif ++ if (katom_idx0_valid) { ++ if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { ++ /* Simple case - just dequeue and return */ ++ kbase_gpu_dequeue_atom(kbdev, js, NULL); ++ if (katom_idx1_valid) { ++ kbase_gpu_dequeue_atom(kbdev, js, NULL); ++ katom_idx1->event_code = ++ BASE_JD_EVENT_REMOVED_FROM_NEXT; ++ kbase_jm_return_atom_to_js(kbdev, katom_idx1); ++ kbase_jsctx_slot_prio_blocked_set(kctx_idx1, js, ++ prio_idx1); ++ } + -+ return err; -+} ++ katom_idx0->event_code = ++ BASE_JD_EVENT_REMOVED_FROM_NEXT; ++ kbase_jm_return_atom_to_js(kbdev, katom_idx0); ++ kbase_jsctx_slot_prio_blocked_set(kctx_idx0, js, ++ prio_idx0); ++ } else { ++ /* katom_idx0 is on GPU */ ++ if (katom_idx1_valid && katom_idx1->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_SUBMITTED) { ++ /* katom_idx0 and katom_idx1 are on GPU */ + -+/** -+ * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters -+ * @kbdev: The kbase device structure of the device -+ * -+ * Increase the count of cycle counter users and turn the cycle counters on if -+ * they were previously off -+ * -+ * This function is designed to be called by -+ * kbase_pm_request_gpu_cycle_counter() or -+ * kbase_pm_request_gpu_cycle_counter_l2_is_on() only -+ * -+ * When this function is called the l2 cache must be on - i.e., the GPU must be -+ * on. -+ */ -+static void -+kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) -+{ -+ unsigned long flags; ++ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, ++ JS_COMMAND_NEXT)) == 0) { ++ /* idx0 has already completed - stop ++ * idx1 if needed ++ */ ++ if (katom_idx1_valid) { ++ kbase_gpu_stop_atom(kbdev, js, ++ katom_idx1, ++ action); ++ ret = true; ++ } ++ } else { ++ /* idx1 is in NEXT registers - attempt ++ * to remove ++ */ ++ kbase_reg_write(kbdev, ++ JOB_SLOT_REG(js, ++ JS_COMMAND_NEXT), ++ JS_COMMAND_NOP); + -+ spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, -+ flags); -+ ++kbdev->pm.backend.gpu_cycle_counter_requests; ++ if (kbase_reg_read(kbdev, ++ JOB_SLOT_REG(js, ++ JS_HEAD_NEXT_LO)) ++ != 0 || ++ kbase_reg_read(kbdev, ++ JOB_SLOT_REG(js, ++ JS_HEAD_NEXT_HI)) ++ != 0) { ++ /* idx1 removed successfully, ++ * will be handled in IRQ ++ */ ++ kbase_gpu_remove_atom(kbdev, ++ katom_idx1, ++ action, true); ++ /* Revert the last_context. */ ++ kbdev->hwaccess.backend.slot_rb[js] ++ .last_kctx_tagged = ++ SLOT_RB_TAG_KCTX(katom_idx0->kctx); + -+ if (kbdev->pm.backend.gpu_cycle_counter_requests == 1) -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_CYCLE_COUNT_START); -+ else { -+ /* This might happen after GPU reset. -+ * Then counter needs to be kicked. -+ */ -+#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ if (!(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & -+ GPU_STATUS_CYCLE_COUNT_ACTIVE)) { -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_CYCLE_COUNT_START); -+ } -+#endif -+ } ++ stop_x_dep_idx1 = ++ should_stop_x_dep_slot(katom_idx1); + -+ spin_unlock_irqrestore( -+ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, -+ flags); -+} ++ /* stop idx0 if still on GPU */ ++ kbase_gpu_stop_atom(kbdev, js, ++ katom_idx0, ++ action); ++ ret = true; ++ } else if (katom_idx1_valid) { ++ /* idx0 has already completed, ++ * stop idx1 if needed ++ */ ++ kbase_gpu_stop_atom(kbdev, js, ++ katom_idx1, ++ action); ++ ret = true; ++ } ++ } ++ } else if (katom_idx1_valid) { ++ /* idx1 not on GPU but must be dequeued*/ + -+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev) -+{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ /* idx1 will be handled in IRQ */ ++ kbase_gpu_remove_atom(kbdev, katom_idx1, action, ++ false); ++ /* stop idx0 */ ++ /* This will be repeated for anything removed ++ * from the next registers, since their normal ++ * flow was also interrupted, and this function ++ * might not enter disjoint state e.g. if we ++ * don't actually do a hard stop on the head ++ * atom ++ */ ++ kbase_gpu_stop_atom(kbdev, js, katom_idx0, ++ action); ++ ret = true; ++ } else { ++ /* no atom in idx1 */ ++ /* just stop idx0 */ ++ kbase_gpu_stop_atom(kbdev, js, katom_idx0, ++ action); ++ ret = true; ++ } ++ } ++ } else if (katom_idx1_valid) { ++ if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { ++ /* Mark for return */ ++ /* idx1 will be returned once idx0 completes */ ++ kbase_gpu_remove_atom(kbdev, katom_idx1, action, ++ false); ++ } else { ++ /* idx1 is on GPU */ ++ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, ++ JS_COMMAND_NEXT)) == 0) { ++ /* idx0 has already completed - stop idx1 */ ++ kbase_gpu_stop_atom(kbdev, js, katom_idx1, ++ action); ++ ret = true; ++ } else { ++ /* idx1 is in NEXT registers - attempt to ++ * remove ++ */ ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, ++ JS_COMMAND_NEXT), ++ JS_COMMAND_NOP); + -+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); ++ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, ++ JS_HEAD_NEXT_LO)) != 0 || ++ kbase_reg_read(kbdev, JOB_SLOT_REG(js, ++ JS_HEAD_NEXT_HI)) != 0) { ++ /* idx1 removed successfully, will be ++ * handled in IRQ once idx0 completes ++ */ ++ kbase_gpu_remove_atom(kbdev, katom_idx1, ++ action, ++ false); ++ /* Revert the last_context, or mark as purged */ ++ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = ++ kctx_idx0 ? SLOT_RB_TAG_KCTX(katom_idx0->kctx) : ++ SLOT_RB_TAG_PURGED; ++ } else { ++ /* idx0 has already completed - stop ++ * idx1 ++ */ ++ kbase_gpu_stop_atom(kbdev, js, ++ katom_idx1, ++ action); ++ ret = true; ++ } ++ } ++ } ++ } + -+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < -+ INT_MAX); + -+ kbase_pm_wait_for_l2_powered(kbdev); ++ if (stop_x_dep_idx0 != -1) ++ kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0, ++ NULL, action); + -+ kbase_pm_request_gpu_cycle_counter_do_request(kbdev); -+} ++ if (stop_x_dep_idx1 != -1) ++ kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1, ++ NULL, action); + -+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter); ++ return ret; ++} + -+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev) ++void kbase_backend_cache_clean(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom) +{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ -+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); ++ if (katom->need_cache_flush_cores_retained) { ++ kbase_gpu_start_cache_clean(kbdev, ++ GPU_COMMAND_CACHE_CLN_INV_FULL); ++ kbase_gpu_wait_cache_clean(kbdev); + -+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < -+ INT_MAX); ++ katom->need_cache_flush_cores_retained = false; ++ } ++} + -+ kbase_pm_request_gpu_cycle_counter_do_request(kbdev); ++void kbase_backend_complete_wq(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom) ++{ ++ /* ++ * If cache flush required due to HW workaround then perform the flush ++ * now ++ */ ++ kbase_backend_cache_clean(kbdev, katom); +} + -+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on); ++void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, ++ base_jd_core_req core_req) ++{ ++ if (!kbdev->pm.active_count) { ++ kbase_pm_lock(kbdev); ++ kbase_pm_update_active(kbdev); ++ kbase_pm_unlock(kbdev); ++ } ++} + -+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev) ++void kbase_gpu_dump_slots(struct kbase_device *kbdev) +{ + unsigned long flags; ++ unsigned int js; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, -+ flags); ++ dev_info(kbdev->dev, "%s:\n", __func__); + -+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0); ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ int idx; + -+ --kbdev->pm.backend.gpu_cycle_counter_requests; ++ for (idx = 0; idx < SLOT_RB_SIZE; idx++) { ++ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, ++ js, ++ idx); + -+ if (kbdev->pm.backend.gpu_cycle_counter_requests == 0) -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_CYCLE_COUNT_STOP); ++ if (katom) ++ dev_info(kbdev->dev, " js%u idx%d : katom=%pK gpu_rb_state=%d\n", ++ js, idx, katom, katom->gpu_rb_state); ++ else ++ dev_info(kbdev->dev, " js%u idx%d : empty\n", js, idx); ++ } ++ } + -+ spin_unlock_irqrestore( -+ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, -+ flags); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + -+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) ++void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx) +{ -+ unsigned long flags; ++ unsigned int js; ++ bool tracked = false; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ kbase_pm_release_gpu_cycle_counter_nolock(kbdev); ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ u64 tagged_kctx = kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} ++ if (tagged_kctx == SLOT_RB_TAG_KCTX(kctx)) { ++ /* Marking the slot kctx tracking field is purged */ ++ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_TAG_PURGED; ++ tracked = true; ++ } ++ } + -+KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter); -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h ++ if (tracked) { ++ /* The context had run some jobs before the purge, other slots ++ * in SLOT_RB_NULL_TAG_VAL condition needs to be marked as ++ * purged as well. ++ */ ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ if (kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged == ++ SLOT_RB_NULL_TAG_VAL) ++ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = ++ SLOT_RB_TAG_PURGED; ++ } ++ } ++} +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h new file mode 100644 -index 000000000..e66ce57d3 +index 000000000..32be0bf44 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h -@@ -0,0 +1,1021 @@ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h +@@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -133649,1015 +133061,669 @@ index 000000000..e66ce57d3 + */ + +/* -+ * Power management API definitions used internally by GPU backend ++ * Register-based HW access backend specific APIs + */ + -+#ifndef _KBASE_BACKEND_PM_INTERNAL_H_ -+#define _KBASE_BACKEND_PM_INTERNAL_H_ -+ -+#include -+ -+#include "backend/gpu/mali_kbase_pm_ca.h" -+#include "mali_kbase_pm_policy.h" -+ -+ -+/** -+ * kbase_pm_dev_idle - The GPU is idle. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * The OS may choose to turn off idle devices -+ */ -+void kbase_pm_dev_idle(struct kbase_device *kbdev); ++#ifndef _KBASE_HWACCESS_GPU_H_ ++#define _KBASE_HWACCESS_GPU_H_ + -+/** -+ * kbase_pm_dev_activate - The GPU is active. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * The OS should avoid opportunistically turning off the GPU while it is active -+ */ -+void kbase_pm_dev_activate(struct kbase_device *kbdev); ++#include + +/** -+ * kbase_pm_get_present_cores - Get details of the cores that are present in -+ * the device. ++ * kbase_gpu_irq_evict - Evict an atom from a NEXT slot + * -+ * @kbdev: The kbase device structure for the device (must be a valid -+ * pointer) -+ * @type: The type of core (see the enum kbase_pm_core_type enumeration) ++ * @kbdev: Device pointer ++ * @js: Job slot to evict from ++ * @completion_code: Event code from job that was run. + * -+ * This function can be called by the active power policy to return a bitmask of -+ * the cores (of a specified type) present in the GPU device and also a count of -+ * the number of cores. ++ * Evict the atom in the NEXT slot for the specified job slot. This function is ++ * called from the job complete IRQ handler when the previous job has failed. + * -+ * Return: The bit mask of cores present ++ * Return: true if job evicted from NEXT registers, false otherwise + */ -+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, -+ enum kbase_pm_core_type type); ++bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code); + +/** -+ * kbase_pm_get_active_cores - Get details of the cores that are currently -+ * active in the device. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @type: The type of core (see the enum kbase_pm_core_type enumeration) -+ * -+ * This function can be called by the active power policy to return a bitmask of -+ * the cores (of a specified type) that are actively processing work (i.e. -+ * turned on *and* busy). ++ * kbase_gpu_complete_hw - Complete an atom on job slot js + * -+ * Return: The bit mask of active cores ++ * @kbdev: Device pointer ++ * @js: Job slot that has completed ++ * @completion_code: Event code from job that has completed ++ * @job_tail: The tail address from the hardware if the job has partially ++ * completed ++ * @end_timestamp: Time of completion + */ -+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, -+ enum kbase_pm_core_type type); ++void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code, ++ u64 job_tail, ktime_t *end_timestamp); + +/** -+ * kbase_pm_get_trans_cores - Get details of the cores that are currently -+ * transitioning between power states. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @type: The type of core (see the enum kbase_pm_core_type enumeration) -+ * -+ * This function can be called by the active power policy to return a bitmask of -+ * the cores (of a specified type) that are currently transitioning between -+ * power states. ++ * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer + * -+ * Return: The bit mask of transitioning cores ++ * @kbdev: Device pointer ++ * @js: Job slot to inspect ++ * @idx: Index into ringbuffer. 0 is the job currently running on ++ * the slot, 1 is the job waiting, all other values are invalid. ++ * Return: The atom at that position in the ringbuffer ++ * or NULL if no atom present + */ -+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, -+ enum kbase_pm_core_type type); ++struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx); + +/** -+ * kbase_pm_get_ready_cores - Get details of the cores that are currently -+ * powered and ready for jobs. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @type: The type of core (see the enum kbase_pm_core_type enumeration) -+ * -+ * This function can be called by the active power policy to return a bitmask of -+ * the cores (of a specified type) that are powered and ready for jobs (they may -+ * or may not be currently executing jobs). ++ * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers + * -+ * Return: The bit mask of ready cores ++ * @kbdev: Device pointer + */ -+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, -+ enum kbase_pm_core_type type); ++void kbase_gpu_dump_slots(struct kbase_device *kbdev); + -+/** -+ * kbase_pm_clock_on - Turn the clock for the device on, and enable device -+ * interrupts. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid -+ * pointer) -+ * @is_resume: true if clock on due to resume after suspend, false otherwise ++#endif /* _KBASE_HWACCESS_GPU_H_ */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c +new file mode 100644 +index 000000000..cbc88f91a +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c +@@ -0,0 +1,377 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * -+ * This function can be used by a power policy to turn the clock for the GPU on. -+ * It should be modified during integration to perform the necessary actions to -+ * ensure that the GPU is fully powered and clocked. -+ */ -+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume); -+ -+/** -+ * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the -+ * device off. ++ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * -+ * @kbdev: The kbase device structure for the device (must be a valid -+ * pointer) ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * This function can be used by a power policy to turn the clock for the GPU -+ * off. It should be modified during integration to perform the necessary -+ * actions to turn the clock off (if this is possible in the integration). ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + * -+ * If runtime PM is enabled and @power_runtime_gpu_idle_callback is used -+ * then this function would usually be invoked from the runtime suspend -+ * callback function. ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: true if clock was turned off, or -+ * false if clock can not be turned off due to pending page/bus fault -+ * workers. Caller must flush MMU workqueues and retry + */ -+bool kbase_pm_clock_off(struct kbase_device *kbdev); + -+/** -+ * kbase_pm_enable_interrupts - Enable interrupts on the device. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Interrupts are also enabled after a call to kbase_pm_clock_on(). ++/* ++ * Register-based HW access backend specific job scheduler APIs + */ -+void kbase_pm_enable_interrupts(struct kbase_device *kbdev); + -+/** -+ * kbase_pm_disable_interrupts - Disable interrupts on the device. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This prevents delivery of Power Management interrupts to the CPU so that -+ * kbase_pm_update_state() will not be called from the IRQ handler -+ * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called. -+ * -+ * Interrupts are also disabled after a call to kbase_pm_clock_off(). -+ */ -+void kbase_pm_disable_interrupts(struct kbase_device *kbdev); ++#include ++#include ++#include ++#include ++#include + -+/** -+ * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts() -+ * that does not take the hwaccess_lock -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Caller must hold the hwaccess_lock. ++#if !MALI_USE_CSF ++/* ++ * Hold the runpool_mutex for this + */ -+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev); ++static inline bool timer_callback_should_run(struct kbase_device *kbdev) ++{ ++ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; ++ int nr_running_ctxs; + -+/** -+ * kbase_pm_init_hw - Initialize the hardware. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @flags: Flags specifying the type of PM init -+ * -+ * This function checks the GPU ID register to ensure that the GPU is supported -+ * by the driver and performs a reset on the device so that it is in a known -+ * state before the device is used. -+ * -+ * Return: 0 if the device is supported and successfully reset. -+ */ -+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags); ++ lockdep_assert_held(&kbdev->js_data.runpool_mutex); + -+/** -+ * kbase_pm_reset_done - The GPU has been reset successfully. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This function must be called by the GPU interrupt handler when the -+ * RESET_COMPLETED bit is set. It signals to the power management initialization -+ * code that the GPU has been successfully reset. -+ */ -+void kbase_pm_reset_done(struct kbase_device *kbdev); ++ /* Timer must stop if we are suspending */ ++ if (backend->suspend_timer) ++ return false; + -+#if MALI_USE_CSF -+/** -+ * kbase_pm_wait_for_desired_state - Wait for the desired power state to be -+ * reached -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Wait for the L2 and MCU state machines to reach the states corresponding -+ * to the values of 'kbase_pm_is_l2_desired' and 'kbase_pm_is_mcu_desired'. -+ * -+ * The usual use-case for this is to ensure that all parts of GPU have been -+ * powered up after performing a GPU Reset. -+ * -+ * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock, -+ * because this function will take that lock itself. -+ * -+ * NOTE: This may not wait until the correct state is reached if there is a -+ * power off in progress and kbase_pm_context_active() was called instead of -+ * kbase_csf_scheduler_pm_active(). -+ * -+ * Return: 0 on success, error code on error -+ */ -+int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); -+#else -+/** -+ * kbase_pm_wait_for_desired_state - Wait for the desired power state to be -+ * reached -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Wait for the L2 and shader power state machines to reach the states -+ * corresponding to the values of 'l2_desired' and 'shaders_desired'. -+ * -+ * The usual use-case for this is to ensure cores are 'READY' after performing -+ * a GPU Reset. -+ * -+ * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock, -+ * because this function will take that lock itself. -+ * -+ * NOTE: This may not wait until the correct state is reached if there is a -+ * power off in progress. To correctly wait for the desired state the caller -+ * must ensure that this is not the case by, for example, calling -+ * kbase_pm_wait_for_poweroff_work_complete() -+ * -+ * Return: 0 on success, error code on error -+ */ -+int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); -+#endif ++ /* nr_contexts_pullable is updated with the runpool_mutex. However, the ++ * locking in the caller gives us a barrier that ensures ++ * nr_contexts_pullable is up-to-date for reading ++ */ ++ nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable); + -+/** -+ * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Wait for the L2 to be powered on, and for the L2 and the state machines of -+ * its dependent stack components to stabilise. -+ * -+ * kbdev->pm.active_count must be non-zero when calling this function. -+ * -+ * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock, -+ * because this function will take that lock itself. -+ * -+ * Return: 0 on success, error code on error -+ */ -+int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev); ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ if (kbdev->js_data.softstop_always) { ++ /* Debug support for allowing soft-stop on a single context */ ++ return true; ++ } ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ + -+#if MALI_USE_CSF -+/** -+ * kbase_pm_wait_for_cores_down_scale - Wait for the downscaling of shader cores -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This function can be called to ensure that the downscaling of cores is -+ * effectively complete and it would be safe to lower the voltage. -+ * The function assumes that caller had exercised the MCU state machine for the -+ * downscale request through the kbase_pm_update_state() function. -+ * -+ * This function needs to be used by the caller to safely wait for the completion -+ * of downscale request, instead of kbase_pm_wait_for_desired_state(). -+ * The downscale request would trigger a state change in MCU state machine -+ * and so when MCU reaches the stable ON state, it can be inferred that -+ * downscaling is complete. But it has been observed that the wake up of the -+ * waiting thread can get delayed by few milli seconds and by the time the -+ * thread wakes up the power down transition could have started (after the -+ * completion of downscale request). -+ * On the completion of power down transition another wake up signal would be -+ * sent, but again by the time thread wakes up the power up transition can begin. -+ * And the power up transition could then get blocked inside the platform specific -+ * callback_power_on() function due to the thread that called into Kbase (from the -+ * platform specific code) to perform the downscaling and then ended up waiting -+ * for the completion of downscale request. -+ * -+ * Return: 0 on success, error code on error or remaining jiffies on timeout. -+ */ -+int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev); -+#endif ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) { ++ /* Timeouts would have to be 4x longer (due to micro- ++ * architectural design) to support OpenCL conformance tests, so ++ * only run the timer when there's: ++ * - 2 or more CL contexts ++ * - 1 or more GLES contexts ++ * ++ * NOTE: We will treat a context that has both Compute and Non- ++ * Compute jobs will be treated as an OpenCL context (hence, we ++ * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE). ++ */ ++ { ++ int nr_compute_ctxs = ++ kbasep_js_ctx_attr_count_on_runpool(kbdev, ++ KBASEP_JS_CTX_ATTR_COMPUTE); ++ int nr_noncompute_ctxs = nr_running_ctxs - ++ nr_compute_ctxs; + -+/** -+ * kbase_pm_update_dynamic_cores_onoff - Update the L2 and shader power state -+ * machines after changing shader core -+ * availability -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * It can be called in any status, so need to check the l2 and shader core -+ * power status in this function or it will break shader/l2 state machine -+ * -+ * Caller must hold hwaccess_lock -+ */ -+void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev); ++ return (bool) (nr_compute_ctxs >= 2 || ++ nr_noncompute_ctxs > 0); ++ } ++ } else { ++ /* Run the timer callback whenever you have at least 1 context ++ */ ++ return (bool) (nr_running_ctxs > 0); ++ } ++} + -+/** -+ * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state() -+ * where the caller must hold -+ * kbase_device.hwaccess_lock -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev); ++static enum hrtimer_restart timer_callback(struct hrtimer *timer) ++{ ++ unsigned long flags; ++ struct kbase_device *kbdev; ++ struct kbasep_js_device_data *js_devdata; ++ struct kbase_backend_data *backend; ++ unsigned int s; ++ bool reset_needed = false; + -+/** -+ * kbase_pm_update_state - Update the L2 and shader power state machines -+ * @kbdev: Device pointer -+ */ -+void kbase_pm_update_state(struct kbase_device *kbdev); ++ KBASE_DEBUG_ASSERT(timer != NULL); + -+/** -+ * kbase_pm_state_machine_init - Initialize the state machines, primarily the -+ * shader poweroff timer -+ * @kbdev: Device pointer -+ * -+ * Return: 0 on success, error code on error -+ */ -+int kbase_pm_state_machine_init(struct kbase_device *kbdev); ++ backend = container_of(timer, struct kbase_backend_data, ++ scheduling_timer); ++ kbdev = container_of(backend, struct kbase_device, hwaccess.backend); ++ js_devdata = &kbdev->js_data; + -+/** -+ * kbase_pm_state_machine_term - Clean up the PM state machines' data -+ * @kbdev: Device pointer -+ */ -+void kbase_pm_state_machine_term(struct kbase_device *kbdev); ++ /* Loop through the slots */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) { ++ struct kbase_jd_atom *atom = NULL; + -+/** -+ * kbase_pm_update_cores_state - Update the desired state of shader cores from -+ * the Power Policy, and begin any power -+ * transitions. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This function will update the desired_xx_state members of -+ * struct kbase_pm_device_data by calling into the current Power Policy. It will -+ * then begin power transitions to make the hardware acheive the desired shader -+ * core state. -+ */ -+void kbase_pm_update_cores_state(struct kbase_device *kbdev); ++ if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) { ++ atom = kbase_gpu_inspect(kbdev, s, 0); ++ KBASE_DEBUG_ASSERT(atom != NULL); ++ } + -+/** -+ * kbasep_pm_metrics_init - Initialize the metrics gathering framework. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This must be called before other metric gathering APIs are called. -+ * -+ * -+ * Return: 0 on success, error code on error -+ */ -+int kbasep_pm_metrics_init(struct kbase_device *kbdev); ++ if (atom != NULL) { ++ /* The current version of the model doesn't support ++ * Soft-Stop ++ */ ++ if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) { ++ u32 ticks = atom->ticks++; + -+/** -+ * kbasep_pm_metrics_term - Terminate the metrics gathering framework. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This must be called when metric gathering is no longer required. It is an -+ * error to call any metrics gathering function (other than -+ * kbasep_pm_metrics_init()) after calling this function. -+ */ -+void kbasep_pm_metrics_term(struct kbase_device *kbdev); ++#if !defined(CONFIG_MALI_JOB_DUMP) && !defined(CONFIG_MALI_VECTOR_DUMP) ++ u32 soft_stop_ticks, hard_stop_ticks, ++ gpu_reset_ticks; ++ if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { ++ soft_stop_ticks = ++ js_devdata->soft_stop_ticks_cl; ++ hard_stop_ticks = ++ js_devdata->hard_stop_ticks_cl; ++ gpu_reset_ticks = ++ js_devdata->gpu_reset_ticks_cl; ++ } else { ++ soft_stop_ticks = ++ js_devdata->soft_stop_ticks; ++ if (kbase_is_quick_reset_enabled(kbdev)) { ++ hard_stop_ticks = 2; ++ gpu_reset_ticks = 3; ++ } else { ++ hard_stop_ticks = ++ js_devdata->hard_stop_ticks_ss; ++ gpu_reset_ticks = ++ js_devdata->gpu_reset_ticks_ss; ++ } ++ } + -+/** -+ * kbase_pm_report_vsync - Function to be called by the frame buffer driver to -+ * update the vsync metric. -+ * @kbdev: The kbase device structure for the device (must be a -+ * valid pointer) -+ * @buffer_updated: True if the buffer has been updated on this VSync, -+ * false otherwise -+ * -+ * This function should be called by the frame buffer driver to update whether -+ * the system is hitting the vsync target or not. buffer_updated should be true -+ * if the vsync corresponded with a new frame being displayed, otherwise it -+ * should be false. This function does not need to be called every vsync, but -+ * only when the value of @buffer_updated differs from a previous call. -+ */ -+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated); ++ /* If timeouts have been changed then ensure ++ * that atom tick count is not greater than the ++ * new soft_stop timeout. This ensures that ++ * atoms do not miss any of the timeouts due to ++ * races between this worker and the thread ++ * changing the timeouts. ++ */ ++ if (backend->timeouts_updated && ++ ticks > soft_stop_ticks) ++ ticks = atom->ticks = soft_stop_ticks; + -+/** -+ * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change -+ * the clock speed of the GPU. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This function should be called regularly by the DVFS system to check whether -+ * the clock speed of the GPU needs updating. -+ */ -+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev); ++ /* Job is Soft-Stoppable */ ++ if (ticks == soft_stop_ticks) { ++ /* Job has been scheduled for at least ++ * js_devdata->soft_stop_ticks ticks. ++ * Soft stop the slot so we can run ++ * other jobs. ++ */ ++#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS ++ int disjoint_threshold = ++ KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD; ++ u32 softstop_flags = 0u; + -+/** -+ * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is -+ * needed -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * If the caller is the first caller then the GPU cycle counters will be enabled -+ * along with the l2 cache -+ * -+ * The GPU must be powered when calling this function (i.e. -+ * kbase_pm_context_active() must have been called). -+ * -+ */ -+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev); ++ dev_dbg(kbdev->dev, "Soft-stop"); ++ /* nr_user_contexts_running is updated ++ * with the runpool_mutex, but we can't ++ * take that here. ++ * ++ * However, if it's about to be ++ * increased then the new context can't ++ * run any jobs until they take the ++ * hwaccess_lock, so it's OK to observe ++ * the older value. ++ * ++ * Similarly, if it's about to be ++ * decreased, the last job from another ++ * context has already finished, so ++ * it's not too bad that we observe the ++ * older value and register a disjoint ++ * event when we try soft-stopping ++ */ ++ if (js_devdata->nr_user_contexts_running ++ >= disjoint_threshold) ++ softstop_flags |= ++ JS_COMMAND_SW_CAUSES_DISJOINT; + -+/** -+ * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is -+ * needed (l2 cache already on) -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This is a version of the above function -+ * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the -+ * l2 cache is known to be on and assured to be on until the subsequent call of -+ * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does -+ * not sleep and can be called from atomic functions. -+ * -+ * The GPU must be powered when calling this function (i.e. -+ * kbase_pm_context_active() must have been called) and the l2 cache must be -+ * powered on. -+ */ -+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev); ++ kbase_job_slot_softstop_swflags(kbdev, ++ s, atom, softstop_flags); ++#endif ++ } else if (ticks == hard_stop_ticks) { ++ /* Job has been scheduled for at least ++ * js_devdata->hard_stop_ticks_ss ticks. ++ * It should have been soft-stopped by ++ * now. Hard stop the slot. ++ */ ++#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS ++ int ms = ++ js_devdata->scheduling_period_ns ++ / 1000000u; ++ if (!kbase_is_quick_reset_enabled(kbdev)) ++ dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", ++ (unsigned long)ticks, ++ (unsigned long)ms); ++ kbase_job_slot_hardstop(atom->kctx, s, ++ atom); ++#endif ++ } else if (ticks == gpu_reset_ticks) { ++ /* Job has been scheduled for at least ++ * js_devdata->gpu_reset_ticks_ss ticks. ++ * It should have left the GPU by now. ++ * Signal that the GPU needs to be ++ * reset. ++ */ ++ reset_needed = true; ++ } ++#else /* !CONFIG_MALI_JOB_DUMP */ ++ /* NOTE: During CONFIG_MALI_JOB_DUMP, we use ++ * the alternate timeouts, which makes the hard- ++ * stop and GPU reset timeout much longer. We ++ * also ensure that we don't soft-stop at all. ++ */ ++ if (ticks == js_devdata->soft_stop_ticks) { ++ /* Job has been scheduled for at least ++ * js_devdata->soft_stop_ticks. We do ++ * not soft-stop during ++ * CONFIG_MALI_JOB_DUMP, however. ++ */ ++ dev_dbg(kbdev->dev, "Soft-stop"); ++ } else if (ticks == ++ js_devdata->hard_stop_ticks_dumping) { ++ /* Job has been scheduled for at least ++ * js_devdata->hard_stop_ticks_dumping ++ * ticks. Hard stop the slot. ++ */ ++#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS ++ int ms = ++ js_devdata->scheduling_period_ns ++ / 1000000u; ++ dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", ++ (unsigned long)ticks, ++ (unsigned long)ms); ++ kbase_job_slot_hardstop(atom->kctx, s, ++ atom); ++#endif ++ } else if (ticks == ++ js_devdata->gpu_reset_ticks_dumping) { ++ /* Job has been scheduled for at least ++ * js_devdata->gpu_reset_ticks_dumping ++ * ticks. It should have left the GPU by ++ * now. Signal that the GPU needs to be ++ * reset. ++ */ ++ reset_needed = true; ++ } ++#endif /* !CONFIG_MALI_JOB_DUMP */ ++ } ++ } ++ } ++ if (reset_needed) { ++ if (kbase_is_quick_reset_enabled(kbdev)) ++ dev_err(kbdev->dev, "quick reset"); ++ else { ++ dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issuing GPU soft-reset to resolve."); ++ } + -+/** -+ * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no -+ * longer in use -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * If the caller is the last caller then the GPU cycle counters will be -+ * disabled. A request must have been made before a call to this. -+ * -+ * Caller must not hold the hwaccess_lock, as it will be taken in this function. -+ * If the caller is already holding this lock then -+ * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead. -+ */ -+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev); ++ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu_locked(kbdev); ++ } ++ /* the timer is re-issued if there is contexts in the run-pool */ + -+/** -+ * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter() -+ * that does not take hwaccess_lock -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Caller must hold the hwaccess_lock. -+ */ -+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev); ++ if (backend->timer_running) ++ hrtimer_start(&backend->scheduling_timer, ++ HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), ++ HRTIMER_MODE_REL); + -+/** -+ * kbase_pm_wait_for_poweroff_work_complete - Wait for the poweroff workqueue to -+ * complete -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This function effectively just waits for the @gpu_poweroff_wait_work work -+ * item to complete, if it was enqueued. GPU may not have been powered down -+ * before this function returns. -+ */ -+void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev); ++ backend->timeouts_updated = false; + -+/** -+ * kbase_pm_wait_for_gpu_power_down - Wait for the GPU power down to complete -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This function waits for the actual gpu power down to complete. -+ */ -+void kbase_pm_wait_for_gpu_power_down(struct kbase_device *kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+/** -+ * kbase_pm_runtime_init - Initialize runtime-pm for Mali GPU platform device -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Setup the power management callbacks and initialize/enable the runtime-pm -+ * for the Mali GPU platform device, using the callback function. This must be -+ * called before the kbase_pm_register_access_enable() function. -+ * -+ * Return: 0 on success, error code on error -+ */ -+int kbase_pm_runtime_init(struct kbase_device *kbdev); ++ return HRTIMER_NORESTART; ++} ++#endif /* !MALI_USE_CSF */ + -+/** -+ * kbase_pm_runtime_term - Disable runtime-pm for Mali GPU platform device -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_runtime_term(struct kbase_device *kbdev); ++void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) ++{ ++#if !MALI_USE_CSF ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; ++ unsigned long flags; + -+/** -+ * kbase_pm_register_access_enable - Enable access to GPU registers -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Enables access to the GPU registers before power management has powered up -+ * the GPU with kbase_pm_powerup(). -+ * -+ * This results in the power management callbacks provided in the driver -+ * configuration to get called to turn on power and/or clocks to the GPU. See -+ * kbase_pm_callback_conf. -+ * -+ * This should only be used before power management is powered up with -+ * kbase_pm_powerup() -+ */ -+void kbase_pm_register_access_enable(struct kbase_device *kbdev); ++ lockdep_assert_held(&js_devdata->runpool_mutex); + -+/** -+ * kbase_pm_register_access_disable - Disable early register access -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Disables access to the GPU registers enabled earlier by a call to -+ * kbase_pm_register_access_enable(). -+ * -+ * This results in the power management callbacks provided in the driver -+ * configuration to get called to turn off power and/or clocks to the GPU. See -+ * kbase_pm_callback_conf -+ * -+ * This should only be used before power management is powered up with -+ * kbase_pm_powerup() -+ */ -+void kbase_pm_register_access_disable(struct kbase_device *kbdev); ++ if (!timer_callback_should_run(kbdev)) { ++ /* Take spinlock to force synchronisation with timer */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ backend->timer_running = false; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* From now on, return value of timer_callback_should_run() ++ * will also cause the timer to not requeue itself. Its return ++ * value cannot change, because it depends on variables updated ++ * with the runpool_mutex held, which the caller of this must ++ * also hold ++ */ ++ hrtimer_cancel(&backend->scheduling_timer); ++ } + -+/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline -+ * function -+ */ ++ if (timer_callback_should_run(kbdev) && !backend->timer_running) { ++ /* Take spinlock to force synchronisation with timer */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ backend->timer_running = true; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ hrtimer_start(&backend->scheduling_timer, ++ HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), ++ HRTIMER_MODE_REL); + -+/** -+ * kbase_pm_metrics_is_active - Check if the power management metrics -+ * collection is active. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Note that this returns if the power management metrics collection was -+ * active at the time of calling, it is possible that after the call the metrics -+ * collection enable may have changed state. -+ * -+ * The caller must handle the consequence that the state may have changed. -+ * -+ * Return: true if metrics collection was active else false. -+ */ -+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev); ++ KBASE_KTRACE_ADD_JM(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, 0u); ++ } ++#else /* !MALI_USE_CSF */ ++ CSTD_UNUSED(kbdev); ++#endif /* !MALI_USE_CSF */ ++} + -+/** -+ * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid -+ * pointer) -+ * @is_resume: true if power on due to resume after suspend, -+ * false otherwise -+ */ -+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume); ++int kbase_backend_timer_init(struct kbase_device *kbdev) ++{ ++#if !MALI_USE_CSF ++ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + -+/** -+ * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been -+ * requested. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid -+ * pointer) -+ */ -+void kbase_pm_do_poweroff(struct kbase_device *kbdev); ++ hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_REL); ++ backend->scheduling_timer.function = timer_callback; ++ backend->timer_running = false; ++#else /* !MALI_USE_CSF */ ++ CSTD_UNUSED(kbdev); ++#endif /* !MALI_USE_CSF */ + -+#if defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) -+void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, -+ struct kbasep_pm_metrics *last, -+ struct kbasep_pm_metrics *diff); -+#endif /* defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) */ ++ return 0; ++} + -+#ifdef CONFIG_MALI_BIFROST_DVFS ++void kbase_backend_timer_term(struct kbase_device *kbdev) ++{ ++#if !MALI_USE_CSF ++ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + -+#if MALI_USE_CSF -+/** -+ * kbase_platform_dvfs_event - Report utilisation to DVFS code for CSF GPU -+ * -+ * @kbdev: The kbase device structure for the device (must be a -+ * valid pointer) -+ * @utilisation: The current calculated utilisation by the metrics system. -+ * -+ * Function provided by platform specific code when DVFS is enabled to allow -+ * the power management metrics system to report utilisation. -+ * -+ * Return: Returns 0 on failure and non zero on success. -+ */ -+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation); -+#else -+/** -+ * kbase_platform_dvfs_event - Report utilisation to DVFS code for JM GPU -+ * -+ * @kbdev: The kbase device structure for the device (must be a -+ * valid pointer) -+ * @utilisation: The current calculated utilisation by the metrics system. -+ * @util_gl_share: The current calculated gl share of utilisation. -+ * @util_cl_share: The current calculated cl share of utilisation per core -+ * group. -+ * Function provided by platform specific code when DVFS is enabled to allow -+ * the power management metrics system to report utilisation. -+ * -+ * Return: Returns 0 on failure and non zero on success. -+ */ -+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, -+ u32 util_gl_share, u32 util_cl_share[2]); -+#endif ++ hrtimer_cancel(&backend->scheduling_timer); ++#else /* !MALI_USE_CSF */ ++ CSTD_UNUSED(kbdev); ++#endif /* !MALI_USE_CSF */ ++} + -+#endif /* CONFIG_MALI_BIFROST_DVFS */ ++void kbase_backend_timer_suspend(struct kbase_device *kbdev) ++{ ++ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + -+void kbase_pm_power_changed(struct kbase_device *kbdev); ++ backend->suspend_timer = true; + -+/** -+ * kbase_pm_metrics_update - Inform the metrics system that an atom is either -+ * about to be run or has just completed. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @now: Pointer to the timestamp of the change, or NULL to use current time -+ * -+ * Caller must hold hwaccess_lock -+ */ -+void kbase_pm_metrics_update(struct kbase_device *kbdev, -+ ktime_t *now); ++ kbase_backend_ctx_count_changed(kbdev); ++} + -+/** -+ * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU -+ * If the GPU does not have coherency this is a no-op -+ * @kbdev: Device pointer -+ * -+ * This function should be called after L2 power up. -+ */ ++void kbase_backend_timer_resume(struct kbase_device *kbdev) ++{ ++ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + -+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev); ++ backend->suspend_timer = false; + -+/** -+ * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU -+ * If the GPU does not have coherency this is a no-op -+ * @kbdev: Device pointer -+ * -+ * This function should be called before L2 power off. -+ */ -+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev); ++ kbase_backend_ctx_count_changed(kbdev); ++} + -+#ifdef CONFIG_MALI_BIFROST_DEVFREQ -+/** -+ * kbase_devfreq_set_core_mask - Set devfreq core mask -+ * @kbdev: Device pointer -+ * @core_mask: New core mask -+ * -+ * This function is used by devfreq to change the available core mask as -+ * required by Dynamic Core Scaling. -+ */ -+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask); -+#endif ++void kbase_backend_timeouts_changed(struct kbase_device *kbdev) ++{ ++ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + -+/** -+ * kbase_pm_reset_start_locked - Signal that GPU reset has started -+ * @kbdev: Device pointer -+ * -+ * Normal power management operation will be suspended until the reset has -+ * completed. ++ backend->timeouts_updated = true; ++} +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_internal.h +new file mode 100644 +index 000000000..4f7c371a1 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_internal.h +@@ -0,0 +1,72 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * Caller must hold hwaccess_lock. -+ */ -+void kbase_pm_reset_start_locked(struct kbase_device *kbdev); -+ -+/** -+ * kbase_pm_reset_complete - Signal that GPU reset has completed -+ * @kbdev: Device pointer ++ * (C) COPYRIGHT 2014-2015, 2020-2021 ARM Limited. All rights reserved. + * -+ * Normal power management operation will be resumed. The power manager will -+ * re-evaluate what cores are needed and power on or off as required. -+ */ -+void kbase_pm_reset_complete(struct kbase_device *kbdev); -+ -+#if !MALI_USE_CSF -+/** -+ * kbase_pm_protected_override_enable - Enable the protected mode override -+ * @kbdev: Device pointer ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * When the protected mode override is enabled, all shader cores are requested -+ * to power down, and the L2 power state can be controlled by -+ * kbase_pm_protected_l2_override(). ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + * -+ * Caller must hold hwaccess_lock. -+ */ -+void kbase_pm_protected_override_enable(struct kbase_device *kbdev); -+ -+/** -+ * kbase_pm_protected_override_disable - Disable the protected mode override -+ * @kbdev: Device pointer ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Caller must hold hwaccess_lock. + */ -+void kbase_pm_protected_override_disable(struct kbase_device *kbdev); + -+/** -+ * kbase_pm_protected_l2_override - Control the protected mode L2 override -+ * @kbdev: Device pointer -+ * @override: true to enable the override, false to disable -+ * -+ * When the driver is transitioning in or out of protected mode, the L2 cache is -+ * forced to power off. This can be overridden to force the L2 cache to power -+ * on. This is required to change coherency settings on some GPUs. ++/* ++ * Register-based HW access backend specific job scheduler APIs + */ -+void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override); + -+/** -+ * kbase_pm_protected_entry_override_enable - Enable the protected mode entry -+ * override -+ * @kbdev: Device pointer -+ * -+ * Initiate a GPU reset and enable the protected mode entry override flag if -+ * l2_always_on WA is enabled and platform is fully coherent. If the GPU -+ * reset is already ongoing then protected mode entry override flag will not -+ * be enabled and function will have to be called again. -+ * -+ * When protected mode entry override flag is enabled to power down L2 via GPU -+ * reset, the GPU reset handling behavior gets changed. For example call to -+ * kbase_backend_reset() is skipped, Hw counters are not re-enabled and L2 -+ * isn't powered up again post reset. -+ * This is needed only as a workaround for a Hw issue where explicit power down -+ * of L2 causes a glitch. For entering protected mode on fully coherent -+ * platforms L2 needs to be powered down to switch to IO coherency mode, so to -+ * avoid the glitch GPU reset is used to power down L2. Hence, this function -+ * does nothing on systems where the glitch issue isn't present. -+ * -+ * Caller must hold hwaccess_lock. Should be only called during the transition -+ * to enter protected mode. -+ * -+ * Return: -EAGAIN if a GPU reset was required for the glitch workaround but -+ * was already ongoing, otherwise 0. -+ */ -+int kbase_pm_protected_entry_override_enable(struct kbase_device *kbdev); ++#ifndef _KBASE_JS_BACKEND_H_ ++#define _KBASE_JS_BACKEND_H_ + +/** -+ * kbase_pm_protected_entry_override_disable - Disable the protected mode entry -+ * override -+ * @kbdev: Device pointer ++ * kbase_backend_timer_init() - Initialise the JS scheduling timer ++ * @kbdev: Device pointer + * -+ * This shall be called once L2 has powered down and switch to IO coherency -+ * mode has been made. As with kbase_pm_protected_entry_override_enable(), -+ * this function does nothing on systems where the glitch issue isn't present. ++ * This function should be called at driver initialisation + * -+ * Caller must hold hwaccess_lock. Should be only called during the transition -+ * to enter protected mode. -+ */ -+void kbase_pm_protected_entry_override_disable(struct kbase_device *kbdev); -+#endif -+ -+/* If true, the driver should explicitly control corestack power management, -+ * instead of relying on the Power Domain Controller. ++ * Return: 0 on success + */ -+extern bool corestack_driver_control; ++int kbase_backend_timer_init(struct kbase_device *kbdev); + +/** -+ * kbase_pm_is_l2_desired - Check whether l2 is desired -+ * -+ * @kbdev: Device pointer -+ * -+ * This shall be called to check whether l2 is needed to power on ++ * kbase_backend_timer_term() - Terminate the JS scheduling timer ++ * @kbdev: Device pointer + * -+ * Return: true if l2 need to power on ++ * This function should be called at driver termination + */ -+bool kbase_pm_is_l2_desired(struct kbase_device *kbdev); ++void kbase_backend_timer_term(struct kbase_device *kbdev); + -+#if MALI_USE_CSF +/** -+ * kbase_pm_is_mcu_desired - Check whether MCU is desired -+ * ++ * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling ++ * timer + * @kbdev: Device pointer + * -+ * This shall be called to check whether MCU needs to be enabled. -+ * -+ * Return: true if MCU needs to be enabled. -+ */ -+bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev); -+ -+/** -+ * kbase_pm_is_mcu_inactive - Check if the MCU is inactive (i.e. either -+ * it is disabled or it is in sleep) -+ * -+ * @kbdev: kbase device -+ * @state: state of the MCU state machine. -+ * -+ * This function must be called with hwaccess_lock held. -+ * L2 cache can be turned off if this function returns true. ++ * This function should be called on suspend, after the active count has reached ++ * zero. This is required as the timer may have been started on job submission ++ * to the job scheduler, but before jobs are submitted to the GPU. + * -+ * Return: true if MCU is inactive ++ * Caller must hold runpool_mutex. + */ -+bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, -+ enum kbase_mcu_state state); ++void kbase_backend_timer_suspend(struct kbase_device *kbdev); + +/** -+ * kbase_pm_idle_groups_sched_suspendable - Check whether the scheduler can be -+ * suspended to low power state when all -+ * the CSGs are idle -+ * ++ * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS ++ * scheduling timer + * @kbdev: Device pointer + * -+ * Return: true if allowed to enter the suspended state. -+ */ -+static inline -+bool kbase_pm_idle_groups_sched_suspendable(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ return !(kbdev->pm.backend.csf_pm_sched_flags & -+ CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE); -+} -+ -+/** -+ * kbase_pm_no_runnables_sched_suspendable - Check whether the scheduler can be -+ * suspended to low power state when -+ * there are no runnable CSGs. -+ * -+ * @kbdev: Device pointer ++ * This function should be called on resume. Note that is not guaranteed to ++ * re-start the timer, only evalute whether it should be re-started. + * -+ * Return: true if allowed to enter the suspended state. ++ * Caller must hold runpool_mutex. + */ -+static inline -+bool kbase_pm_no_runnables_sched_suspendable(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ return !(kbdev->pm.backend.csf_pm_sched_flags & -+ CSF_DYNAMIC_PM_SCHED_NO_SUSPEND); -+} ++void kbase_backend_timer_resume(struct kbase_device *kbdev); + -+/** -+ * kbase_pm_no_mcu_core_pwroff - Check whether the PM is required to keep the -+ * MCU shader Core powered in accordance to the active -+ * power management policy -+ * -+ * @kbdev: Device pointer ++#endif /* _KBASE_JS_BACKEND_H_ */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c +new file mode 100644 +index 000000000..9ce50758c +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c +@@ -0,0 +1,131 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * -+ * Return: true if the MCU is to retain powered. -+ */ -+static inline bool kbase_pm_no_mcu_core_pwroff(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ return kbdev->pm.backend.csf_pm_sched_flags & -+ CSF_DYNAMIC_PM_CORE_KEEP_ON; -+} -+ -+/** -+ * kbase_pm_mcu_is_in_desired_state - Check if MCU is in stable ON/OFF state. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * -+ * @kbdev: Device pointer ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * Return: true if MCU is in stable ON/OFF state. -+ */ -+static inline bool kbase_pm_mcu_is_in_desired_state(struct kbase_device *kbdev) -+{ -+ bool in_desired_state = true; -+ -+ if (kbase_pm_is_mcu_desired(kbdev) && kbdev->pm.backend.mcu_state != KBASE_MCU_ON) -+ in_desired_state = false; -+ else if (!kbase_pm_is_mcu_desired(kbdev) && -+ (kbdev->pm.backend.mcu_state != KBASE_MCU_OFF) && -+ (kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP)) -+ in_desired_state = false; -+ -+ return in_desired_state; -+} -+ -+#endif -+ -+/** -+ * kbase_pm_l2_is_in_desired_state - Check if L2 is in stable ON/OFF state. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + * -+ * @kbdev: Device pointer ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: true if L2 is in stable ON/OFF state. + */ -+static inline bool kbase_pm_l2_is_in_desired_state(struct kbase_device *kbdev) -+{ -+ bool in_desired_state = true; -+ -+ if (kbase_pm_is_l2_desired(kbdev) && kbdev->pm.backend.l2_state != KBASE_L2_ON) -+ in_desired_state = false; -+ else if (!kbase_pm_is_l2_desired(kbdev) && kbdev->pm.backend.l2_state != KBASE_L2_OFF) -+ in_desired_state = false; + -+ return in_desired_state; -+} ++#include ++#include ++#include ++#include ++#include "mali_kbase_l2_mmu_config.h" + +/** -+ * kbase_pm_lock - Lock all necessary mutexes to perform PM actions -+ * -+ * @kbdev: Device pointer ++ * struct l2_mmu_config_limit_region - L2 MMU limit field + * -+ * This function locks correct mutexes independent of GPU architecture. ++ * @value: The default value to load into the L2_MMU_CONFIG register ++ * @mask: The shifted mask of the field in the L2_MMU_CONFIG register ++ * @shift: The shift of where the field starts in the L2_MMU_CONFIG register ++ * This should be the same value as the smaller of the two mask ++ * values + */ -+static inline void kbase_pm_lock(struct kbase_device *kbdev) -+{ -+#if !MALI_USE_CSF -+ mutex_lock(&kbdev->js_data.runpool_mutex); -+#endif /* !MALI_USE_CSF */ -+ mutex_lock(&kbdev->pm.lock); -+} ++struct l2_mmu_config_limit_region { ++ u32 value, mask, shift; ++}; + +/** -+ * kbase_pm_unlock - Unlock mutexes locked by kbase_pm_lock ++ * struct l2_mmu_config_limit - L2 MMU read and write limit + * -+ * @kbdev: Device pointer ++ * @product_model: The GPU for which this entry applies ++ * @read: Values for the read limit field ++ * @write: Values for the write limit field + */ -+static inline void kbase_pm_unlock(struct kbase_device *kbdev) -+{ -+ mutex_unlock(&kbdev->pm.lock); -+#if !MALI_USE_CSF -+ mutex_unlock(&kbdev->js_data.runpool_mutex); -+#endif /* !MALI_USE_CSF */ -+} ++struct l2_mmu_config_limit { ++ u32 product_model; ++ struct l2_mmu_config_limit_region read; ++ struct l2_mmu_config_limit_region write; ++}; + -+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) -+/** -+ * kbase_pm_gpu_sleep_allowed - Check if the GPU is allowed to be put in sleep -+ * -+ * @kbdev: Device pointer ++/* ++ * Zero represents no limit + * -+ * This function is called on GPU idle notification and if it returns false then -+ * GPU power down will be triggered by suspending the CSGs and halting the MCU. ++ * For LBEX TBEX TBAX TTRX and TNAX: ++ * The value represents the number of outstanding reads (6 bits) or writes (5 bits) + * -+ * Return: true if the GPU is allowed to be in the sleep state. ++ * For all other GPUS it is a fraction see: mali_kbase_config_defaults.h + */ -+static inline bool kbase_pm_gpu_sleep_allowed(struct kbase_device *kbdev) -+{ -+ /* If the autosuspend_delay has been set to 0 then it doesn't make -+ * sense to first put GPU to sleep state and then power it down, -+ * instead would be better to power it down right away. -+ * Also need to do the same when autosuspend_delay is set to a negative -+ * value, which implies that runtime pm is effectively disabled by the -+ * kernel. -+ * A high positive value of autosuspend_delay can be used to keep the -+ * GPU in sleep state for a long time. -+ */ -+ if (unlikely(!kbdev->dev->power.autosuspend_delay || -+ (kbdev->dev->power.autosuspend_delay < 0))) -+ return false; -+ -+ return kbdev->pm.backend.gpu_sleep_supported; -+} ++static const struct l2_mmu_config_limit limits[] = { ++ /* GPU, read, write */ ++ {GPU_ID2_PRODUCT_LBEX, ++ {0, GENMASK(10, 5), 5}, ++ {0, GENMASK(16, 12), 12} }, ++ {GPU_ID2_PRODUCT_TBEX, ++ {0, GENMASK(10, 5), 5}, ++ {0, GENMASK(16, 12), 12} }, ++ {GPU_ID2_PRODUCT_TBAX, ++ {0, GENMASK(10, 5), 5}, ++ {0, GENMASK(16, 12), 12} }, ++ {GPU_ID2_PRODUCT_TTRX, ++ {0, GENMASK(12, 7), 7}, ++ {0, GENMASK(17, 13), 13} }, ++ {GPU_ID2_PRODUCT_TNAX, ++ {0, GENMASK(12, 7), 7}, ++ {0, GENMASK(17, 13), 13} }, ++ {GPU_ID2_PRODUCT_TGOX, ++ {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, ++ {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, ++ {GPU_ID2_PRODUCT_TNOX, ++ {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, ++ {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, ++}; + -+/** -+ * kbase_pm_enable_db_mirror_interrupt - Enable the doorbell mirror interrupt to -+ * detect the User doorbell rings. -+ * -+ * @kbdev: Device pointer -+ * -+ * This function is called just before sending the sleep request to MCU firmware -+ * so that User doorbell rings can be detected whilst GPU remains in the sleep -+ * state. -+ * -+ */ -+static inline void kbase_pm_enable_db_mirror_interrupt(struct kbase_device *kbdev) ++int kbase_set_mmu_quirks(struct kbase_device *kbdev) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ if (!kbdev->pm.backend.db_mirror_interrupt_enabled) { -+ u32 irq_mask = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_IRQ_MASK)); ++ /* All older GPUs had 2 bits for both fields, this is a default */ ++ struct l2_mmu_config_limit limit = { ++ 0, /* Any GPU not in the limits array defined above */ ++ {KBASE_AID_32, GENMASK(25, 24), 24}, ++ {KBASE_AID_32, GENMASK(27, 26), 26} ++ }; ++ u32 product_model, gpu_id; ++ u32 mmu_config; ++ int i; + -+ WARN_ON(irq_mask & DOORBELL_MIRROR); ++ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; ++ product_model = gpu_id & GPU_ID2_PRODUCT_MODEL; + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), -+ irq_mask | DOORBELL_MIRROR); -+ kbdev->pm.backend.db_mirror_interrupt_enabled = true; ++ /* Limit the GPU bus bandwidth if the platform needs this. */ ++ for (i = 0; i < ARRAY_SIZE(limits); i++) { ++ if (product_model == limits[i].product_model) { ++ limit = limits[i]; ++ break; ++ } + } -+} + -+/** -+ * kbase_pm_disable_db_mirror_interrupt - Disable the doorbell mirror interrupt. -+ * -+ * @kbdev: Device pointer -+ * -+ * This function is called when doorbell mirror interrupt is received or MCU -+ * needs to be reactivated by enabling the doorbell notification. -+ */ -+static inline void kbase_pm_disable_db_mirror_interrupt(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ mmu_config = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)); + -+ if (kbdev->pm.backend.db_mirror_interrupt_enabled) { -+ u32 irq_mask = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_IRQ_MASK)); ++ if (kbase_is_gpu_removed(kbdev)) ++ return -EIO; + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), -+ irq_mask & ~DOORBELL_MIRROR); -+ kbdev->pm.backend.db_mirror_interrupt_enabled = false; -+ } -+} -+#endif ++ mmu_config &= ~(limit.read.mask | limit.write.mask); ++ /* Can't use FIELD_PREP() macro here as the mask isn't constant */ ++ mmu_config |= (limit.read.value << limit.read.shift) | ++ (limit.write.value << limit.write.shift); + -+/** -+ * kbase_pm_l2_allow_mmu_page_migration - L2 state allows MMU page migration or not -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Check whether the L2 state is in power transition phase or not. If it is, the MMU -+ * page migration should be deferred. The caller must hold hwaccess_lock, and, if MMU -+ * page migration is intended, immediately start the MMU migration action without -+ * dropping the lock. When page migration begins, a flag is set in kbdev that would -+ * prevent the L2 state machine traversing into power transition phases, until -+ * the MMU migration action ends. -+ * -+ * Return: true if MMU page migration is allowed -+ */ -+static inline bool kbase_pm_l2_allow_mmu_page_migration(struct kbase_device *kbdev) -+{ -+ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; ++ kbdev->hw_quirks_mmu = mmu_config; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (kbdev->system_coherency == COHERENCY_ACE) { ++ /* Allow memory configuration disparity to be ignored, ++ * we optimize the use of shared memory and thus we ++ * expect some disparity in the memory configuration. ++ */ ++ kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY; ++ } + -+ return (backend->l2_state != KBASE_L2_PEND_ON && backend->l2_state != KBASE_L2_PEND_OFF); ++ return 0; +} -+ -+#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_l2_states.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_l2_states.h +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.h new file mode 100644 -index 000000000..ef72f6083 +index 000000000..07014ad36 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_l2_states.h -@@ -0,0 +1,50 @@ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.h +@@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -134675,45 +133741,31 @@ index 000000000..ef72f6083 + * + */ + -+/* -+ * Backend-specific Power Manager level 2 cache state definitions. -+ * The function-like macro KBASEP_L2_STATE() must be defined before including -+ * this header file. This header file can be included multiple times in the -+ * same compilation unit with different definitions of KBASEP_L2_STATE(). ++#ifndef _KBASE_L2_MMU_CONFIG_H_ ++#define _KBASE_L2_MMU_CONFIG_H_ ++/** ++ * kbase_set_mmu_quirks - Set the hw_quirks_mmu field of kbdev + * -+ * @OFF: The L2 cache and tiler are off -+ * @PEND_ON: The L2 cache and tiler are powering on -+ * @RESTORE_CLOCKS: The GPU clock is restored. Conditionally used. -+ * @ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being enabled -+ * @ON: The L2 cache and tiler are on, and hwcnt is enabled -+ * @ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being disabled -+ * @SLOW_DOWN_CLOCKS: The GPU clock is set to appropriate or lowest clock. -+ * Conditionally used. -+ * @POWER_DOWN: The L2 cache and tiler are about to be powered off -+ * @PEND_OFF: The L2 cache and tiler are powering off -+ * @RESET_WAIT: The GPU is resetting, L2 cache and tiler power state are -+ * unknown ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Use this function to initialise the hw_quirks_mmu field, for instance to set ++ * the MAX_READS and MAX_WRITES to sane defaults for each GPU. ++ * ++ * Return: Zero for succeess or a Linux error code + */ -+KBASEP_L2_STATE(OFF) -+KBASEP_L2_STATE(PEND_ON) -+KBASEP_L2_STATE(RESTORE_CLOCKS) -+KBASEP_L2_STATE(ON_HWCNT_ENABLE) -+KBASEP_L2_STATE(ON) -+KBASEP_L2_STATE(ON_HWCNT_DISABLE) -+KBASEP_L2_STATE(SLOW_DOWN_CLOCKS) -+KBASEP_L2_STATE(POWER_DOWN) -+KBASEP_L2_STATE(PEND_OFF) -+KBASEP_L2_STATE(RESET_WAIT) -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h ++int kbase_set_mmu_quirks(struct kbase_device *kbdev); ++ ++#endif /* _KBASE_L2_MMU_CONFIG_H */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c new file mode 100644 -index 000000000..3b448e397 +index 000000000..6db703176 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h -@@ -0,0 +1,108 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c +@@ -0,0 +1,2229 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -134731,1870 +133783,2224 @@ index 000000000..3b448e397 + * + */ + -+/* -+ * Backend-specific Power Manager MCU state definitions. -+ * The function-like macro KBASEP_MCU_STATE() must be defined before including -+ * this header file. This header file can be included multiple times in the -+ * same compilation unit with different definitions of KBASEP_MCU_STATE(). ++/* NOTES: ++ * - A default GPU can be compiled in during the build, by defining ++ * CONFIG_MALI_NO_MALI_DEFAULT_GPU. SCons sets this, which means that ++ * insmod'ing mali_kbase.ko with no arguments after a build with "scons ++ * gpu=tXYZ" will yield the expected GPU ID for tXYZ. This can always be ++ * overridden by passing the 'no_mali_gpu' argument to insmod. + * -+ * @OFF: The MCU is powered off. -+ * @PEND_ON_RELOAD: The warm boot of MCU or cold boot of MCU (with -+ * firmware reloading) is in progress. -+ * @ON_GLB_REINIT_PEND: The MCU is enabled and Global configuration -+ * requests have been sent to the firmware. -+ * @ON_HWCNT_ENABLE: The Global requests have completed and MCU is now -+ * ready for use and hwcnt is being enabled. -+ * @ON: The MCU is active and hwcnt has been enabled. -+ * @ON_CORE_ATTR_UPDATE_PEND: The MCU is active and mask of enabled shader cores -+ * is being updated. -+ * @ON_HWCNT_DISABLE: The MCU is on and hwcnt is being disabled. -+ * @ON_HALT: The MCU is on and hwcnt has been disabled, MCU -+ * halt would be triggered. -+ * @ON_PEND_HALT: MCU halt in progress, confirmation pending. -+ * @POWER_DOWN: MCU halted operations, pending being disabled. -+ * @PEND_OFF: MCU is being disabled, pending on powering off. -+ * @RESET_WAIT: The GPU is resetting, MCU state is unknown. -+ * @HCTL_SHADERS_PEND_ON: Global configuration requests sent to the firmware -+ * have completed and shaders have been requested to -+ * power on. -+ * @HCTL_CORES_NOTIFY_PEND: Shader cores have powered up and firmware is being -+ * notified of the mask of enabled shader cores. -+ * @HCTL_MCU_ON_RECHECK: MCU is on and hwcnt disabling is triggered -+ * and checks are done to update the number of -+ * enabled cores. -+ * @HCTL_SHADERS_READY_OFF: MCU has halted and cores need to be powered down -+ * @HCTL_SHADERS_PEND_OFF: Cores are transitioning to power down. -+ * @HCTL_CORES_DOWN_SCALE_NOTIFY_PEND: Firmware has been informed to stop using -+ * specific cores, due to core_mask change request. -+ * After the ACK from FW, the wait will be done for -+ * undesired cores to become inactive. -+ * @HCTL_CORE_INACTIVE_PEND: Waiting for specific cores to become inactive. -+ * Once the cores become inactive their power down -+ * will be initiated. -+ * @HCTL_SHADERS_CORE_OFF_PEND: Waiting for specific cores to complete the -+ * transition to power down. Once powered down, -+ * HW counters will be re-enabled. -+ * @ON_SLEEP_INITIATE: MCU is on and hwcnt has been disabled and MCU -+ * is being put to sleep. -+ * @ON_PEND_SLEEP: MCU sleep is in progress. -+ * @IN_SLEEP: Sleep request is completed and MCU has halted. -+ * @ON_PMODE_ENTER_CORESIGHT_DISABLE: The MCU is on, protected mode enter is about to -+ * be requested, Coresight is being disabled. -+ * @ON_PMODE_EXIT_CORESIGHT_ENABLE : The MCU is on, protected mode exit has happened -+ * Coresight is being enabled. -+ * @CORESIGHT_DISABLE: The MCU is on and Coresight is being disabled. -+ * @CORESIGHT_ENABLE: The MCU is on, host does not have control and -+ * Coresight is being enabled. ++ * - if CONFIG_MALI_BIFROST_ERROR_INJECT is defined the error injection system is ++ * activated. + */ -+KBASEP_MCU_STATE(OFF) -+KBASEP_MCU_STATE(PEND_ON_RELOAD) -+KBASEP_MCU_STATE(ON_GLB_REINIT_PEND) -+KBASEP_MCU_STATE(ON_HWCNT_ENABLE) -+KBASEP_MCU_STATE(ON) -+KBASEP_MCU_STATE(ON_CORE_ATTR_UPDATE_PEND) -+KBASEP_MCU_STATE(ON_HWCNT_DISABLE) -+KBASEP_MCU_STATE(ON_HALT) -+KBASEP_MCU_STATE(ON_PEND_HALT) -+KBASEP_MCU_STATE(POWER_DOWN) -+KBASEP_MCU_STATE(PEND_OFF) -+KBASEP_MCU_STATE(RESET_WAIT) -+/* Additional MCU states with HOST_CONTROL_SHADERS */ -+KBASEP_MCU_STATE(HCTL_SHADERS_PEND_ON) -+KBASEP_MCU_STATE(HCTL_CORES_NOTIFY_PEND) -+KBASEP_MCU_STATE(HCTL_MCU_ON_RECHECK) -+KBASEP_MCU_STATE(HCTL_SHADERS_READY_OFF) -+KBASEP_MCU_STATE(HCTL_SHADERS_PEND_OFF) -+KBASEP_MCU_STATE(HCTL_CORES_DOWN_SCALE_NOTIFY_PEND) -+KBASEP_MCU_STATE(HCTL_CORE_INACTIVE_PEND) -+KBASEP_MCU_STATE(HCTL_SHADERS_CORE_OFF_PEND) -+/* Additional MCU states to support GPU sleep feature */ -+KBASEP_MCU_STATE(ON_SLEEP_INITIATE) -+KBASEP_MCU_STATE(ON_PEND_SLEEP) -+KBASEP_MCU_STATE(IN_SLEEP) -+#if IS_ENABLED(CONFIG_MALI_CORESIGHT) -+/* Additional MCU states for Coresight */ -+KBASEP_MCU_STATE(ON_PMODE_ENTER_CORESIGHT_DISABLE) -+KBASEP_MCU_STATE(ON_PMODE_EXIT_CORESIGHT_ENABLE) -+KBASEP_MCU_STATE(CORESIGHT_DISABLE) -+KBASEP_MCU_STATE(CORESIGHT_ENABLE) -+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c -new file mode 100644 -index 000000000..865f526f6 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c -@@ -0,0 +1,529 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* ++ ++/* Implementation of failure injection system: + * -+ * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. ++ * Error conditions are generated by gpu_generate_error(). ++ * According to CONFIG_MALI_BIFROST_ERROR_INJECT definition gpu_generate_error() either ++ * generates an error HW condition randomly (CONFIG_MALI_ERROR_INJECT_RANDOM) or ++ * checks if there is (in error_track_list) an error configuration to be set for ++ * the current job chain (CONFIG_MALI_ERROR_INJECT_RANDOM not defined). ++ * Each error condition will trigger a specific "state" for a certain set of ++ * registers as per Midgard Architecture Specifications doc. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * According to Midgard Architecture Specifications doc the following registers ++ * are always affected by error conditions: + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * JOB Exception: ++ * JOB_IRQ_RAWSTAT ++ * JOB STATUS AREA + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * MMU Exception: ++ * MMU_IRQ_RAWSTAT ++ * AS_FAULTSTATUS ++ * AS_FAULTADDRESS + * ++ * GPU Exception: ++ * GPU_IRQ_RAWSTAT ++ * GPU_FAULTSTATUS ++ * GPU_FAULTADDRESS ++ * ++ * For further clarification on the model behaviour upon specific error ++ * conditions the user may refer to the Midgard Architecture Specification ++ * document + */ -+ -+/* -+ * Metrics for power management -+ */ -+ +#include -+#include -+#include -+#include ++#include ++#include ++#include ++#include + +#if MALI_USE_CSF -+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" -+#include -+#else -+#include -+#endif /* !MALI_USE_CSF */ -+ -+#include -+#include ++#include + -+#if defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) || !MALI_USE_CSF -+/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns -+ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly -+ * under 11s. Exceeding this will cause overflow ++/* Index of the last value register for each type of core, with the 1st value ++ * register being at index 0. + */ -+#define KBASE_PM_TIME_SHIFT 8 ++#define IPA_CTL_MAX_VAL_CNT_IDX (KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS - 1) ++ ++/* Array for storing the value of SELECT register for each type of core */ ++static u64 ipa_ctl_select_config[KBASE_IPA_CORE_TYPE_NUM]; ++static bool ipa_control_timer_enabled; +#endif + -+#if MALI_USE_CSF -+/* To get the GPU_ACTIVE value in nano seconds unit */ -+#define GPU_ACTIVE_SCALING_FACTOR ((u64)1E9) ++#define LO_MASK(M) ((M) & 0xFFFFFFFF) ++#if !MALI_USE_CSF ++#define HI_MASK(M) ((M) & 0xFFFFFFFF00000000) +#endif + -+/* -+ * Possible state transitions -+ * ON -> ON | OFF | STOPPED -+ * STOPPED -> ON | OFF -+ * OFF -> ON -+ * -+ * -+ * ┌─e─â”┌────────────f─────────────┠-+ * │ v│ v -+ * └───ON ──a──> STOPPED ──b──> OFF -+ * ^^ │ │ -+ * │└──────c─────┘ │ -+ * │ │ -+ * └─────────────d─────────────┘ -+ * -+ * Transition effects: -+ * a. None -+ * b. Timer expires without restart -+ * c. Timer is not stopped, timer period is unaffected -+ * d. Timer must be restarted -+ * e. Callback is executed and the timer is restarted -+ * f. Timer is cancelled, or the callback is waited on if currently executing. This is called during -+ * tear-down and should not be subject to a race from an OFF->ON transition ++/* Construct a value for the THREAD_FEATURES register, *except* the two most ++ * significant bits, which are set to IMPLEMENTATION_MODEL in ++ * midgard_model_read_reg(). + */ -+enum dvfs_metric_timer_state { TIMER_OFF, TIMER_STOPPED, TIMER_ON }; -+ -+#ifdef CONFIG_MALI_BIFROST_DVFS -+static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) -+{ -+ struct kbasep_pm_metrics_state *metrics; -+ -+ if (WARN_ON(!timer)) -+ return HRTIMER_NORESTART; -+ -+ metrics = container_of(timer, struct kbasep_pm_metrics_state, timer); -+ -+ /* Transition (b) to fully off if timer was stopped, don't restart the timer in this case */ -+ if (atomic_cmpxchg(&metrics->timer_state, TIMER_STOPPED, TIMER_OFF) != TIMER_ON) -+ return HRTIMER_NORESTART; -+ -+ kbase_pm_get_dvfs_action(metrics->kbdev); -+ -+ /* Set the new expiration time and restart (transition e) */ -+ hrtimer_forward_now(timer, HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period)); -+ return HRTIMER_RESTART; -+} -+#endif /* CONFIG_MALI_BIFROST_DVFS */ -+ -+int kbasep_pm_metrics_init(struct kbase_device *kbdev) -+{ +#if MALI_USE_CSF -+ struct kbase_ipa_control_perf_counter perf_counter; -+ int err; ++#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ ++ ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24)) ++#else ++#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ ++ ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24)) ++#endif + -+ /* One counter group */ -+ const size_t NUM_PERF_COUNTERS = 1; ++struct error_status_t hw_error_status; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ kbdev->pm.backend.metrics.kbdev = kbdev; -+ kbdev->pm.backend.metrics.time_period_start = ktime_get_raw(); -+ kbdev->pm.backend.metrics.values.time_busy = 0; -+ kbdev->pm.backend.metrics.values.time_idle = 0; -+ kbdev->pm.backend.metrics.values.time_in_protm = 0; -+ -+ perf_counter.scaling_factor = GPU_ACTIVE_SCALING_FACTOR; ++/** ++ * struct control_reg_values_t - control register values specific to the GPU being 'emulated' ++ * @name: GPU name ++ * @gpu_id: GPU ID to report ++ * @as_present: Bitmap of address spaces present ++ * @thread_max_threads: Maximum number of threads per core ++ * @thread_max_workgroup_size: Maximum number of threads per workgroup ++ * @thread_max_barrier_size: Maximum number of threads per barrier ++ * @thread_features: Thread features, NOT INCLUDING the 2 ++ * most-significant bits, which are always set to ++ * IMPLEMENTATION_MODEL. ++ * @core_features: Core features ++ * @tiler_features: Tiler features ++ * @mmu_features: MMU features ++ * @gpu_features_lo: GPU features (low) ++ * @gpu_features_hi: GPU features (high) ++ * @shader_present: Available shader bitmap ++ * @stack_present: Core stack present bitmap ++ * ++ */ ++struct control_reg_values_t { ++ const char *name; ++ u32 gpu_id; ++ u32 as_present; ++ u32 thread_max_threads; ++ u32 thread_max_workgroup_size; ++ u32 thread_max_barrier_size; ++ u32 thread_features; ++ u32 core_features; ++ u32 tiler_features; ++ u32 mmu_features; ++ u32 gpu_features_lo; ++ u32 gpu_features_hi; ++ u32 shader_present; ++ u32 stack_present; ++}; + -+ /* Normalize values by GPU frequency */ -+ perf_counter.gpu_norm = true; ++struct job_slot { ++ int job_active; ++ int job_queued; ++ int job_complete_irq_asserted; ++ int job_irq_mask; ++ int job_disabled; ++}; + -+ /* We need the GPU_ACTIVE counter, which is in the CSHW group */ -+ perf_counter.type = KBASE_IPA_CORE_TYPE_CSHW; ++struct dummy_model_t { ++ int reset_completed; ++ int reset_completed_mask; ++#if !MALI_USE_CSF ++ int prfcnt_sample_completed; ++#endif /* !MALI_USE_CSF */ ++ int power_changed_mask; /* 2bits: _ALL,_SINGLE */ ++ int power_changed; /* 1bit */ ++ bool clean_caches_completed; ++ bool clean_caches_completed_irq_enabled; ++#if MALI_USE_CSF ++ bool flush_pa_range_completed; ++ bool flush_pa_range_completed_irq_enabled; ++#endif ++ int power_on; /* 6bits: SHADER[4],TILER,L2 */ ++ u32 stack_power_on_lo; ++ u32 coherency_enable; ++ unsigned int job_irq_js_state; ++ struct job_slot slots[NUM_SLOTS]; ++ const struct control_reg_values_t *control_reg_values; ++ u32 l2_config; ++ void *data; ++}; + -+ /* We need the GPU_ACTIVE counter */ -+ perf_counter.idx = GPU_ACTIVE_CNT_IDX; ++/* Array associating GPU names with control register values. The first ++ * one is used in the case of no match. ++ */ ++static const struct control_reg_values_t all_control_reg_values[] = { ++ { ++ .name = "tMIx", ++ .gpu_id = GPU_ID2_MAKE(6, 0, 10, 0, 0, 1, 0), ++ .as_present = 0xFF, ++ .thread_max_threads = 0x180, ++ .thread_max_workgroup_size = 0x180, ++ .thread_max_barrier_size = 0x180, ++ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), ++ .tiler_features = 0x809, ++ .mmu_features = 0x2830, ++ .gpu_features_lo = 0, ++ .gpu_features_hi = 0, ++ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, ++ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, ++ }, ++ { ++ .name = "tHEx", ++ .gpu_id = GPU_ID2_MAKE(6, 2, 0, 1, 0, 3, 0), ++ .as_present = 0xFF, ++ .thread_max_threads = 0x180, ++ .thread_max_workgroup_size = 0x180, ++ .thread_max_barrier_size = 0x180, ++ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), ++ .tiler_features = 0x809, ++ .mmu_features = 0x2830, ++ .gpu_features_lo = 0, ++ .gpu_features_hi = 0, ++ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, ++ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, ++ }, ++ { ++ .name = "tSIx", ++ .gpu_id = GPU_ID2_MAKE(7, 0, 0, 0, 1, 1, 0), ++ .as_present = 0xFF, ++ .thread_max_threads = 0x300, ++ .thread_max_workgroup_size = 0x180, ++ .thread_max_barrier_size = 0x180, ++ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), ++ .tiler_features = 0x209, ++ .mmu_features = 0x2821, ++ .gpu_features_lo = 0, ++ .gpu_features_hi = 0, ++ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, ++ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, ++ }, ++ { ++ .name = "tDVx", ++ .gpu_id = GPU_ID2_MAKE(7, 0, 0, 3, 0, 0, 0), ++ .as_present = 0xFF, ++ .thread_max_threads = 0x300, ++ .thread_max_workgroup_size = 0x180, ++ .thread_max_barrier_size = 0x180, ++ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), ++ .tiler_features = 0x209, ++ .mmu_features = 0x2821, ++ .gpu_features_lo = 0, ++ .gpu_features_hi = 0, ++ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, ++ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, ++ }, ++ { ++ .name = "tNOx", ++ .gpu_id = GPU_ID2_MAKE(7, 2, 1, 1, 0, 0, 0), ++ .as_present = 0xFF, ++ .thread_max_threads = 0x180, ++ .thread_max_workgroup_size = 0x180, ++ .thread_max_barrier_size = 0x180, ++ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), ++ .tiler_features = 0x809, ++ .mmu_features = 0x2830, ++ .gpu_features_lo = 0, ++ .gpu_features_hi = 0, ++ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, ++ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, ++ }, ++ { ++ .name = "tGOx_r0p0", ++ .gpu_id = GPU_ID2_MAKE(7, 2, 2, 2, 0, 0, 0), ++ .as_present = 0xFF, ++ .thread_max_threads = 0x180, ++ .thread_max_workgroup_size = 0x180, ++ .thread_max_barrier_size = 0x180, ++ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), ++ .tiler_features = 0x809, ++ .mmu_features = 0x2830, ++ .gpu_features_lo = 0, ++ .gpu_features_hi = 0, ++ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, ++ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, ++ }, ++ { ++ .name = "tGOx_r1p0", ++ .gpu_id = GPU_ID2_MAKE(7, 4, 0, 2, 1, 0, 0), ++ .as_present = 0xFF, ++ .thread_max_threads = 0x180, ++ .thread_max_workgroup_size = 0x180, ++ .thread_max_barrier_size = 0x180, ++ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), ++ .core_features = 0x2, ++ .tiler_features = 0x209, ++ .mmu_features = 0x2823, ++ .gpu_features_lo = 0, ++ .gpu_features_hi = 0, ++ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, ++ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, ++ }, ++ { ++ .name = "tTRx", ++ .gpu_id = GPU_ID2_MAKE(9, 0, 8, 0, 0, 0, 0), ++ .as_present = 0xFF, ++ .thread_max_threads = 0x180, ++ .thread_max_workgroup_size = 0x180, ++ .thread_max_barrier_size = 0x180, ++ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), ++ .tiler_features = 0x809, ++ .mmu_features = 0x2830, ++ .gpu_features_lo = 0, ++ .gpu_features_hi = 0, ++ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, ++ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, ++ }, ++ { ++ .name = "tNAx", ++ .gpu_id = GPU_ID2_MAKE(9, 0, 8, 1, 0, 0, 0), ++ .as_present = 0xFF, ++ .thread_max_threads = 0x180, ++ .thread_max_workgroup_size = 0x180, ++ .thread_max_barrier_size = 0x180, ++ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), ++ .tiler_features = 0x809, ++ .mmu_features = 0x2830, ++ .gpu_features_lo = 0, ++ .gpu_features_hi = 0, ++ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, ++ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, ++ }, ++ { ++ .name = "tBEx", ++ .gpu_id = GPU_ID2_MAKE(9, 2, 0, 2, 0, 0, 0), ++ .as_present = 0xFF, ++ .thread_max_threads = 0x180, ++ .thread_max_workgroup_size = 0x180, ++ .thread_max_barrier_size = 0x180, ++ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), ++ .tiler_features = 0x809, ++ .mmu_features = 0x2830, ++ .gpu_features_lo = 0, ++ .gpu_features_hi = 0, ++ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TBEX, ++ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, ++ }, ++ { ++ .name = "tBAx", ++ .gpu_id = GPU_ID2_MAKE(9, 14, 4, 5, 0, 0, 0), ++ .as_present = 0xFF, ++ .thread_max_threads = 0x180, ++ .thread_max_workgroup_size = 0x180, ++ .thread_max_barrier_size = 0x180, ++ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), ++ .tiler_features = 0x809, ++ .mmu_features = 0x2830, ++ .gpu_features_lo = 0, ++ .gpu_features_hi = 0, ++ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, ++ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, ++ }, ++ { ++ .name = "tODx", ++ .gpu_id = GPU_ID2_MAKE(10, 8, 0, 2, 0, 0, 0), ++ .as_present = 0xFF, ++ .thread_max_threads = 0x180, ++ .thread_max_workgroup_size = 0x180, ++ .thread_max_barrier_size = 0x180, ++ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), ++ .tiler_features = 0x809, ++ .mmu_features = 0x2830, ++ .gpu_features_lo = 0, ++ .gpu_features_hi = 0, ++ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TODX, ++ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, ++ }, ++ { ++ .name = "tGRx", ++ .gpu_id = GPU_ID2_MAKE(10, 10, 0, 3, 0, 0, 0), ++ .as_present = 0xFF, ++ .thread_max_threads = 0x180, ++ .thread_max_workgroup_size = 0x180, ++ .thread_max_barrier_size = 0x180, ++ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), ++ .core_features = 0x0, /* core_1e16fma2tex */ ++ .tiler_features = 0x809, ++ .mmu_features = 0x2830, ++ .gpu_features_lo = 0, ++ .gpu_features_hi = 0, ++ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, ++ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, ++ }, ++ { ++ .name = "tVAx", ++ .gpu_id = GPU_ID2_MAKE(10, 12, 0, 4, 0, 0, 0), ++ .as_present = 0xFF, ++ .thread_max_threads = 0x180, ++ .thread_max_workgroup_size = 0x180, ++ .thread_max_barrier_size = 0x180, ++ .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), ++ .core_features = 0x0, /* core_1e16fma2tex */ ++ .tiler_features = 0x809, ++ .mmu_features = 0x2830, ++ .gpu_features_lo = 0, ++ .gpu_features_hi = 0, ++ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, ++ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, ++ }, ++ { ++ .name = "tTUx", ++ .gpu_id = GPU_ID2_MAKE(11, 8, 5, 2, 0, 0, 0), ++ .as_present = 0xFF, ++ .thread_max_threads = 0x800, ++ .thread_max_workgroup_size = 0x400, ++ .thread_max_barrier_size = 0x400, ++ .thread_features = THREAD_FEATURES_PARTIAL(0x10000, 4, 0), ++ .core_features = 0x0, /* core_1e32fma2tex */ ++ .tiler_features = 0x809, ++ .mmu_features = 0x2830, ++ .gpu_features_lo = 0xf, ++ .gpu_features_hi = 0, ++ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX, ++ .stack_present = 0xF, ++ }, ++ { ++ .name = "tTIx", ++ .gpu_id = GPU_ID2_MAKE(12, 8, 1, 0, 0, 0, 0), ++ .as_present = 0xFF, ++ .thread_max_threads = 0x800, ++ .thread_max_workgroup_size = 0x400, ++ .thread_max_barrier_size = 0x400, ++ .thread_features = THREAD_FEATURES_PARTIAL(0x10000, 16, 0), ++ .core_features = 0x1, /* core_1e64fma4tex */ ++ .tiler_features = 0x809, ++ .mmu_features = 0x2830, ++ .gpu_features_lo = 0xf, ++ .gpu_features_hi = 0, ++ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX, ++ .stack_present = 0xF, ++ }, ++}; + -+ err = kbase_ipa_control_register( -+ kbdev, &perf_counter, NUM_PERF_COUNTERS, -+ &kbdev->pm.backend.metrics.ipa_control_client); -+ if (err) { -+ dev_err(kbdev->dev, -+ "Failed to register IPA with kbase_ipa_control: err=%d", -+ err); -+ return -1; -+ } -+#else -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ kbdev->pm.backend.metrics.kbdev = kbdev; -+ kbdev->pm.backend.metrics.time_period_start = ktime_get_raw(); ++static struct { ++ spinlock_t access_lock; ++#if !MALI_USE_CSF ++ unsigned long prfcnt_base; ++#endif /* !MALI_USE_CSF */ ++ u32 *prfcnt_base_cpu; + -+ kbdev->pm.backend.metrics.gpu_active = false; -+ kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; -+ kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; -+ kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; -+ kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; -+ kbdev->pm.backend.metrics.active_gl_ctx[2] = 0; ++ u32 time; + -+ kbdev->pm.backend.metrics.values.time_busy = 0; -+ kbdev->pm.backend.metrics.values.time_idle = 0; -+ kbdev->pm.backend.metrics.values.busy_cl[0] = 0; -+ kbdev->pm.backend.metrics.values.busy_cl[1] = 0; -+ kbdev->pm.backend.metrics.values.busy_gl = 0; ++ struct gpu_model_prfcnt_en prfcnt_en; + -+#endif -+ spin_lock_init(&kbdev->pm.backend.metrics.lock); ++ u64 l2_present; ++ u64 shader_present; + -+#ifdef CONFIG_MALI_BIFROST_DVFS -+ hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC, -+ HRTIMER_MODE_REL); -+ kbdev->pm.backend.metrics.timer.function = dvfs_callback; -+ kbdev->pm.backend.metrics.initialized = true; -+ atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF); -+ kbase_pm_metrics_start(kbdev); -+#endif /* CONFIG_MALI_BIFROST_DVFS */ ++#if !MALI_USE_CSF ++ u64 jm_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; ++#else ++ u64 cshw_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; ++#endif /* !MALI_USE_CSF */ ++ u64 tiler_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; ++ u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS * ++ KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; ++ u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES * ++ KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; ++} performance_counters; + -+#if MALI_USE_CSF -+ /* The sanity check on the GPU_ACTIVE performance counter -+ * is skipped for Juno platforms that have timing problems. -+ */ -+ kbdev->pm.backend.metrics.skip_gpu_active_sanity_check = -+ of_machine_is_compatible("arm,juno"); -+#endif ++static u32 get_implementation_register(u32 reg, ++ const struct control_reg_values_t *const control_reg_values) ++{ ++ switch (reg) { ++ case GPU_CONTROL_REG(SHADER_PRESENT_LO): ++ return LO_MASK(control_reg_values->shader_present); ++ case GPU_CONTROL_REG(TILER_PRESENT_LO): ++ return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT); ++ case GPU_CONTROL_REG(L2_PRESENT_LO): ++ return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT); ++ case GPU_CONTROL_REG(STACK_PRESENT_LO): ++ return LO_MASK(control_reg_values->stack_present); + -+ return 0; ++ case GPU_CONTROL_REG(SHADER_PRESENT_HI): ++ case GPU_CONTROL_REG(TILER_PRESENT_HI): ++ case GPU_CONTROL_REG(L2_PRESENT_HI): ++ case GPU_CONTROL_REG(STACK_PRESENT_HI): ++ /* *** FALLTHROUGH *** */ ++ default: ++ return 0; ++ } +} -+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init); + -+void kbasep_pm_metrics_term(struct kbase_device *kbdev) ++void gpu_device_set_data(void *model, void *data) +{ -+#ifdef CONFIG_MALI_BIFROST_DVFS -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ -+ /* Cancel the timer, and block if the callback is currently executing (transition f) */ -+ kbdev->pm.backend.metrics.initialized = false; -+ atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF); -+ hrtimer_cancel(&kbdev->pm.backend.metrics.timer); -+#endif /* CONFIG_MALI_BIFROST_DVFS */ ++ struct dummy_model_t *dummy = (struct dummy_model_t *)model; + -+#if MALI_USE_CSF -+ kbase_ipa_control_unregister( -+ kbdev, kbdev->pm.backend.metrics.ipa_control_client); -+#endif ++ dummy->data = data; +} + -+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term); -+ -+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this -+ * function -+ */ -+#if MALI_USE_CSF -+#if defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) -+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev) ++void *gpu_device_get_data(void *model) +{ -+ int err; -+ u64 gpu_active_counter; -+ u64 protected_time; -+ ktime_t now; -+ -+ lockdep_assert_held(&kbdev->pm.backend.metrics.lock); ++ struct dummy_model_t *dummy = (struct dummy_model_t *)model; + -+ /* Query IPA_CONTROL for the latest GPU-active and protected-time -+ * info. -+ */ -+ err = kbase_ipa_control_query( -+ kbdev, kbdev->pm.backend.metrics.ipa_control_client, -+ &gpu_active_counter, 1, &protected_time); ++ return dummy->data; ++} + -+ /* Read the timestamp after reading the GPU_ACTIVE counter value. -+ * This ensures the time gap between the 2 reads is consistent for -+ * a meaningful comparison between the increment of GPU_ACTIVE and -+ * elapsed time. The lock taken inside kbase_ipa_control_query() -+ * function can cause lot of variation. -+ */ -+ now = ktime_get_raw(); ++#define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1 + -+ if (err) { -+ dev_err(kbdev->dev, -+ "Failed to query the increment of GPU_ACTIVE counter: err=%d", -+ err); -+ } else { -+ u64 diff_ns; -+ s64 diff_ns_signed; -+ u32 ns_time; -+ ktime_t diff = ktime_sub( -+ now, kbdev->pm.backend.metrics.time_period_start); ++/* SCons should pass in a default GPU, but other ways of building (e.g. ++ * in-tree) won't, so define one here in case. ++ */ ++#ifndef CONFIG_MALI_NO_MALI_DEFAULT_GPU ++#define CONFIG_MALI_NO_MALI_DEFAULT_GPU "tMIx" ++#endif + -+ diff_ns_signed = ktime_to_ns(diff); ++static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU; ++module_param(no_mali_gpu, charp, 0000); ++MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as"); + -+ if (diff_ns_signed < 0) -+ return; ++#if MALI_USE_CSF ++static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, ++ u32 cnt_idx, bool is_low_word) ++{ ++ u64 *counters_data; ++ u32 core_count = 0; ++ u32 event_index; ++ u64 value = 0; ++ u32 core; ++ unsigned long flags; + -+ diff_ns = (u64)diff_ns_signed; ++ if (WARN_ON(core_type >= KBASE_IPA_CORE_TYPE_NUM)) ++ return 0; + -+#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ /* The GPU_ACTIVE counter shouldn't clock-up more time than has -+ * actually elapsed - but still some margin needs to be given -+ * when doing the comparison. There could be some drift between -+ * the CPU and GPU clock. -+ * -+ * Can do the check only in a real driver build, as an arbitrary -+ * value for GPU_ACTIVE can be fed into dummy model in no_mali -+ * configuration which may not correspond to the real elapsed -+ * time. -+ */ -+ if (!kbdev->pm.backend.metrics.skip_gpu_active_sanity_check) { -+ /* The margin is scaled to allow for the worst-case -+ * scenario where the samples are maximally separated, -+ * plus a small offset for sampling errors. -+ */ -+ u64 const MARGIN_NS = -+ IPA_CONTROL_TIMER_DEFAULT_VALUE_MS * NSEC_PER_MSEC * 3 / 2; ++ if (WARN_ON(cnt_idx >= KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS)) ++ return 0; + -+ if (gpu_active_counter > (diff_ns + MARGIN_NS)) { -+ dev_info( -+ kbdev->dev, -+ "GPU activity takes longer than time interval: %llu ns > %llu ns", -+ (unsigned long long)gpu_active_counter, -+ (unsigned long long)diff_ns); -+ } -+ } -+#endif -+ /* Calculate time difference in units of 256ns */ -+ ns_time = (u32)(diff_ns >> KBASE_PM_TIME_SHIFT); ++ event_index = ++ (ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF; + -+ /* Add protected_time to gpu_active_counter so that time in -+ * protected mode is included in the apparent GPU active time, -+ * then convert it from units of 1ns to units of 256ns, to -+ * match what JM GPUs use. The assumption is made here that the -+ * GPU is 100% busy while in protected mode, so we should add -+ * this since the GPU can't (and thus won't) update these -+ * counters while it's actually in protected mode. -+ * -+ * Perform the add after dividing each value down, to reduce -+ * the chances of overflows. -+ */ -+ protected_time >>= KBASE_PM_TIME_SHIFT; -+ gpu_active_counter >>= KBASE_PM_TIME_SHIFT; -+ gpu_active_counter += protected_time; ++ /* Currently only primary counter blocks are supported */ ++ if (WARN_ON(event_index >= ++ (KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS + KBASE_DUMMY_MODEL_COUNTER_PER_CORE))) ++ return 0; + -+ /* Ensure the following equations don't go wrong if ns_time is -+ * slightly larger than gpu_active_counter somehow -+ */ -+ gpu_active_counter = MIN(gpu_active_counter, ns_time); ++ /* The actual events start index 4 onwards. Spec also says PRFCNT_EN, ++ * TIMESTAMP_LO or TIMESTAMP_HI pseudo-counters do not make sense for ++ * IPA counters. If selected, the value returned for them will be zero. ++ */ ++ if (WARN_ON(event_index < KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS)) ++ return 0; + -+ kbdev->pm.backend.metrics.values.time_busy += -+ gpu_active_counter; ++ event_index -= KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS; + -+ kbdev->pm.backend.metrics.values.time_idle += -+ ns_time - gpu_active_counter; ++ spin_lock_irqsave(&performance_counters.access_lock, flags); + -+ /* Also make time in protected mode available explicitly, -+ * so users of this data have this info, too. -+ */ -+ kbdev->pm.backend.metrics.values.time_in_protm += -+ protected_time; ++ switch (core_type) { ++ case KBASE_IPA_CORE_TYPE_CSHW: ++ core_count = 1; ++ counters_data = performance_counters.cshw_counters; ++ break; ++ case KBASE_IPA_CORE_TYPE_MEMSYS: ++ core_count = hweight64(performance_counters.l2_present); ++ counters_data = performance_counters.l2_counters; ++ break; ++ case KBASE_IPA_CORE_TYPE_TILER: ++ core_count = 1; ++ counters_data = performance_counters.tiler_counters; ++ break; ++ case KBASE_IPA_CORE_TYPE_SHADER: ++ core_count = hweight64(performance_counters.shader_present); ++ counters_data = performance_counters.shader_counters; ++ break; ++ default: ++ WARN(1, "Invalid core_type %d\n", core_type); ++ break; + } + -+ kbdev->pm.backend.metrics.time_period_start = now; -+} -+#endif /* defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) */ -+#else -+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, -+ ktime_t now) -+{ -+ ktime_t diff; ++ for (core = 0; core < core_count; core++) { ++ value += counters_data[event_index]; ++ event_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE; ++ } + -+ lockdep_assert_held(&kbdev->pm.backend.metrics.lock); ++ spin_unlock_irqrestore(&performance_counters.access_lock, flags); + -+ diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); -+ if (ktime_to_ns(diff) < 0) -+ return; ++ if (is_low_word) ++ return (value & U32_MAX); ++ else ++ return (value >> 32); ++} ++#endif /* MALI_USE_CSF */ + -+ if (kbdev->pm.backend.metrics.gpu_active) { -+ u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); ++/** ++ * gpu_model_clear_prfcnt_values_nolock - Clear performance counter values ++ * ++ * Sets all performance counter values to zero. The performance counter access ++ * lock must be held when calling this function. ++ */ ++static void gpu_model_clear_prfcnt_values_nolock(void) ++{ ++ lockdep_assert_held(&performance_counters.access_lock); ++#if !MALI_USE_CSF ++ memset(performance_counters.jm_counters, 0, sizeof(performance_counters.jm_counters)); ++#else ++ memset(performance_counters.cshw_counters, 0, sizeof(performance_counters.cshw_counters)); ++#endif /* !MALI_USE_CSF */ ++ memset(performance_counters.tiler_counters, 0, sizeof(performance_counters.tiler_counters)); ++ memset(performance_counters.l2_counters, 0, sizeof(performance_counters.l2_counters)); ++ memset(performance_counters.shader_counters, 0, ++ sizeof(performance_counters.shader_counters)); ++} + -+ kbdev->pm.backend.metrics.values.time_busy += ns_time; -+ if (kbdev->pm.backend.metrics.active_cl_ctx[0]) -+ kbdev->pm.backend.metrics.values.busy_cl[0] += ns_time; -+ if (kbdev->pm.backend.metrics.active_cl_ctx[1]) -+ kbdev->pm.backend.metrics.values.busy_cl[1] += ns_time; -+ if (kbdev->pm.backend.metrics.active_gl_ctx[0]) -+ kbdev->pm.backend.metrics.values.busy_gl += ns_time; -+ if (kbdev->pm.backend.metrics.active_gl_ctx[1]) -+ kbdev->pm.backend.metrics.values.busy_gl += ns_time; -+ if (kbdev->pm.backend.metrics.active_gl_ctx[2]) -+ kbdev->pm.backend.metrics.values.busy_gl += ns_time; -+ } else { -+ kbdev->pm.backend.metrics.values.time_idle += -+ (u32)(ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); -+ } ++#if MALI_USE_CSF ++void gpu_model_clear_prfcnt_values(void) ++{ ++ unsigned long flags; + -+ kbdev->pm.backend.metrics.time_period_start = now; ++ spin_lock_irqsave(&performance_counters.access_lock, flags); ++ gpu_model_clear_prfcnt_values_nolock(); ++ spin_unlock_irqrestore(&performance_counters.access_lock, flags); +} -+#endif /* MALI_USE_CSF */ ++KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values); ++#endif /* MALI_USE_CSF */ + -+#if defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) -+void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, -+ struct kbasep_pm_metrics *last, -+ struct kbasep_pm_metrics *diff) ++/** ++ * gpu_model_dump_prfcnt_blocks() - Dump performance counter values to buffer ++ * ++ * @values: Array of values to be written out ++ * @out_index: Index into performance counter buffer ++ * @block_count: Number of blocks to dump ++ * @prfcnt_enable_mask: Counter enable mask ++ * @blocks_present: Available blocks bit mask ++ * ++ * The performance counter access lock must be held before calling this ++ * function. ++ */ ++static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, u32 block_count, ++ u32 prfcnt_enable_mask, u64 blocks_present) +{ -+ struct kbasep_pm_metrics *cur = &kbdev->pm.backend.metrics.values; -+ unsigned long flags; ++ u32 block_idx, counter; ++ u32 counter_value = 0; ++ u32 *prfcnt_base; ++ u32 index = 0; + -+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); -+#if MALI_USE_CSF -+ kbase_pm_get_dvfs_utilisation_calc(kbdev); -+#else -+ kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get_raw()); -+#endif ++ lockdep_assert_held(&performance_counters.access_lock); + -+ memset(diff, 0, sizeof(*diff)); -+ diff->time_busy = cur->time_busy - last->time_busy; -+ diff->time_idle = cur->time_idle - last->time_idle; ++ prfcnt_base = performance_counters.prfcnt_base_cpu; + ++ for (block_idx = 0; block_idx < block_count; block_idx++) { ++ /* only dump values if core is present */ ++ if (!(blocks_present & (1 << block_idx))) { +#if MALI_USE_CSF -+ diff->time_in_protm = cur->time_in_protm - last->time_in_protm; -+#else -+ diff->busy_cl[0] = cur->busy_cl[0] - last->busy_cl[0]; -+ diff->busy_cl[1] = cur->busy_cl[1] - last->busy_cl[1]; -+ diff->busy_gl = cur->busy_gl - last->busy_gl; -+#endif ++ /* if CSF dump zeroed out block */ ++ memset(&prfcnt_base[*out_index], 0, ++ KBASE_DUMMY_MODEL_BLOCK_SIZE); ++ *out_index += KBASE_DUMMY_MODEL_VALUES_PER_BLOCK; ++#endif /* MALI_USE_CSF */ ++ continue; ++ } + -+ *last = *cur; ++ /* write the header */ ++ prfcnt_base[*out_index] = performance_counters.time++; ++ prfcnt_base[*out_index+2] = prfcnt_enable_mask; ++ *out_index += KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS; + -+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); ++ /* write the counters */ ++ for (counter = 0; ++ counter < KBASE_DUMMY_MODEL_COUNTER_PER_CORE; ++ counter++) { ++ /* HW counter values retrieved through ++ * PRFCNT_SAMPLE request are of 32 bits only. ++ */ ++ counter_value = (u32)values[index++]; ++ if (KBASE_DUMMY_MODEL_COUNTER_ENABLED( ++ prfcnt_enable_mask, (counter + ++ KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS))) { ++ prfcnt_base[*out_index + counter] = ++ counter_value; ++ } ++ } ++ *out_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE; ++ } +} -+KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics); -+#endif + -+#ifdef CONFIG_MALI_BIFROST_DVFS -+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) ++static void gpu_model_dump_nolock(void) +{ -+ int utilisation; -+ struct kbasep_pm_metrics *diff; -+#if !MALI_USE_CSF -+ int busy; -+ int util_gl_share; -+ int util_cl_share[2]; -+#endif -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ -+ diff = &kbdev->pm.backend.metrics.dvfs_diff; -+ -+ kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, -+ diff); ++ u32 index = 0; + -+ utilisation = (100 * diff->time_busy) / -+ max(diff->time_busy + diff->time_idle, 1u); ++ lockdep_assert_held(&performance_counters.access_lock); + +#if !MALI_USE_CSF -+ busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u); ++ gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index, 1, ++ performance_counters.prfcnt_en.fe, 0x1); ++#else ++ gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index, 1, ++ performance_counters.prfcnt_en.fe, 0x1); ++#endif /* !MALI_USE_CSF */ ++ gpu_model_dump_prfcnt_blocks(performance_counters.tiler_counters, ++ &index, 1, ++ performance_counters.prfcnt_en.tiler, ++ DUMMY_IMPLEMENTATION_TILER_PRESENT); ++ gpu_model_dump_prfcnt_blocks(performance_counters.l2_counters, &index, ++ KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS, ++ performance_counters.prfcnt_en.l2, ++ performance_counters.l2_present); ++ gpu_model_dump_prfcnt_blocks(performance_counters.shader_counters, ++ &index, KBASE_DUMMY_MODEL_MAX_SHADER_CORES, ++ performance_counters.prfcnt_en.shader, ++ performance_counters.shader_present); + -+ util_gl_share = (100 * diff->busy_gl) / busy; -+ util_cl_share[0] = (100 * diff->busy_cl[0]) / busy; -+ util_cl_share[1] = (100 * diff->busy_cl[1]) / busy; ++ /* Counter values are cleared after each dump */ ++ gpu_model_clear_prfcnt_values_nolock(); + -+ kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, -+ util_cl_share); -+#else -+ /* Note that, at present, we don't pass protected-mode time to the -+ * platform here. It's unlikely to be useful, however, as the platform -+ * probably just cares whether the GPU is busy or not; time in -+ * protected mode is already added to busy-time at this point, though, -+ * so we should be good. -+ */ -+ kbase_platform_dvfs_event(kbdev, utilisation); -+#endif ++ /* simulate a 'long' time between samples */ ++ performance_counters.time += 10; +} + -+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev) ++#if !MALI_USE_CSF ++static void midgard_model_dump_prfcnt(void) +{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ unsigned long flags; + -+ return atomic_read(&kbdev->pm.backend.metrics.timer_state) == TIMER_ON; ++ spin_lock_irqsave(&performance_counters.access_lock, flags); ++ gpu_model_dump_nolock(); ++ spin_unlock_irqrestore(&performance_counters.access_lock, flags); +} -+KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active); -+ -+void kbase_pm_metrics_start(struct kbase_device *kbdev) ++#else ++void gpu_model_prfcnt_dump_request(u32 *sample_buf, struct gpu_model_prfcnt_en enable_maps) +{ -+ struct kbasep_pm_metrics_state *metrics = &kbdev->pm.backend.metrics; ++ unsigned long flags; + -+ if (unlikely(!metrics->initialized)) ++ if (WARN_ON(!sample_buf)) + return; + -+ /* Transition to ON, from a stopped state (transition c) */ -+ if (atomic_xchg(&metrics->timer_state, TIMER_ON) == TIMER_OFF) -+ /* Start the timer only if it's been fully stopped (transition d)*/ -+ hrtimer_start(&metrics->timer, HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period), -+ HRTIMER_MODE_REL); ++ spin_lock_irqsave(&performance_counters.access_lock, flags); ++ performance_counters.prfcnt_base_cpu = sample_buf; ++ performance_counters.prfcnt_en = enable_maps; ++ gpu_model_dump_nolock(); ++ spin_unlock_irqrestore(&performance_counters.access_lock, flags); +} + -+void kbase_pm_metrics_stop(struct kbase_device *kbdev) ++void gpu_model_glb_request_job_irq(void *model) +{ -+ if (unlikely(!kbdev->pm.backend.metrics.initialized)) -+ return; ++ unsigned long flags; + -+ /* Timer is Stopped if its currently on (transition a) */ -+ atomic_cmpxchg(&kbdev->pm.backend.metrics.timer_state, TIMER_ON, TIMER_STOPPED); ++ spin_lock_irqsave(&hw_error_status.access_lock, flags); ++ hw_error_status.job_irq_status |= JOB_IRQ_GLOBAL_IF; ++ spin_unlock_irqrestore(&hw_error_status.access_lock, flags); ++ gpu_device_raise_irq(model, MODEL_LINUX_JOB_IRQ); +} ++#endif /* !MALI_USE_CSF */ + -+ -+#endif /* CONFIG_MALI_BIFROST_DVFS */ -+ -+#if !MALI_USE_CSF -+/** -+ * kbase_pm_metrics_active_calc - Update PM active counts based on currently -+ * running atoms -+ * @kbdev: Device pointer -+ * -+ * The caller must hold kbdev->pm.backend.metrics.lock -+ */ -+static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) ++static void init_register_statuses(struct dummy_model_t *dummy) +{ -+ unsigned int js; -+ -+ lockdep_assert_held(&kbdev->pm.backend.metrics.lock); -+ -+ kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; -+ kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; -+ kbdev->pm.backend.metrics.active_gl_ctx[2] = 0; -+ kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; -+ kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; -+ kbdev->pm.backend.metrics.gpu_active = false; -+ -+ for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { -+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); ++ int i; + -+ /* Head atom may have just completed, so if it isn't running -+ * then try the next atom -+ */ -+ if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) -+ katom = kbase_gpu_inspect(kbdev, js, 1); ++ hw_error_status.errors_mask = 0; ++ hw_error_status.gpu_error_irq = 0; ++ hw_error_status.gpu_fault_status = 0; ++ hw_error_status.job_irq_rawstat = 0; ++ hw_error_status.job_irq_status = 0; ++ hw_error_status.mmu_irq_rawstat = 0; ++ hw_error_status.mmu_irq_mask = 0; + -+ if (katom && katom->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_SUBMITTED) { -+ if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { -+ int device_nr = (katom->core_req & -+ BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) -+ ? katom->device_nr : 0; -+ if (!WARN_ON(device_nr >= 2)) -+ kbdev->pm.backend.metrics.active_cl_ctx[device_nr] = 1; -+ } else { -+ kbdev->pm.backend.metrics.active_gl_ctx[js] = 1; -+ trace_sysgraph(SGR_ACTIVE, 0, js); -+ } -+ kbdev->pm.backend.metrics.gpu_active = true; -+ } else { -+ trace_sysgraph(SGR_INACTIVE, 0, js); -+ } ++ for (i = 0; i < NUM_SLOTS; i++) { ++ hw_error_status.js_status[i] = 0; ++ hw_error_status.job_irq_rawstat |= ++ (dummy->slots[i].job_complete_irq_asserted) << i; ++ hw_error_status.job_irq_status |= ++ (dummy->slots[i].job_complete_irq_asserted) << i; ++ } ++ for (i = 0; i < NUM_MMU_AS; i++) { ++ hw_error_status.as_command[i] = 0; ++ hw_error_status.as_faultstatus[i] = 0; ++ hw_error_status.mmu_irq_mask |= 1 << i; + } ++ ++ performance_counters.time = 0; +} + -+/* called when job is submitted to or removed from a GPU slot */ -+void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp) ++static void update_register_statuses(struct dummy_model_t *dummy, unsigned int job_slot) +{ -+ unsigned long flags; -+ ktime_t now; ++ lockdep_assert_held(&hw_error_status.access_lock); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (hw_error_status.errors_mask & IS_A_JOB_ERROR) { ++ if (job_slot == hw_error_status.current_job_slot) { ++#if !MALI_USE_CSF ++ if (hw_error_status.js_status[job_slot] == 0) { ++ /* status reg is clean; it can be written */ + -+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); ++ switch (hw_error_status.errors_mask & ++ IS_A_JOB_ERROR) { ++ case KBASE_JOB_INTERRUPTED: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_INTERRUPTED; ++ break; + -+ if (!timestamp) { -+ now = ktime_get_raw(); -+ timestamp = &now; -+ } ++ case KBASE_JOB_STOPPED: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_STOPPED; ++ break; + -+ /* Track how much of time has been spent busy or idle. For JM GPUs, -+ * this also evaluates how long CL and/or GL jobs have been busy for. -+ */ -+ kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp); ++ case KBASE_JOB_TERMINATED: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_TERMINATED; ++ break; + -+ kbase_pm_metrics_active_calc(kbdev); -+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); -+} -+#endif /* !MALI_USE_CSF */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c -new file mode 100644 -index 000000000..4788f0413 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c -@@ -0,0 +1,426 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ case KBASE_JOB_CONFIG_FAULT: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_CONFIG_FAULT; ++ break; + -+/* -+ * Power policy API implementations -+ */ ++ case KBASE_JOB_POWER_FAULT: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_POWER_FAULT; ++ break; + -+#include -+#include -+#include -+#include -+#include ++ case KBASE_JOB_READ_FAULT: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_READ_FAULT; ++ break; + -+#if MALI_USE_CSF && defined CONFIG_MALI_BIFROST_DEBUG -+#include -+#endif ++ case KBASE_JOB_WRITE_FAULT: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_WRITE_FAULT; ++ break; + -+#include ++ case KBASE_JOB_AFFINITY_FAULT: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_AFFINITY_FAULT; ++ break; + -+static const struct kbase_pm_policy *const all_policy_list[] = { -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ &kbase_pm_always_on_policy_ops, -+ &kbase_pm_coarse_demand_policy_ops, -+#else /* CONFIG_MALI_BIFROST_NO_MALI */ -+ &kbase_pm_coarse_demand_policy_ops, -+ &kbase_pm_always_on_policy_ops, -+#endif /* CONFIG_MALI_BIFROST_NO_MALI */ -+}; ++ case KBASE_JOB_BUS_FAULT: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_BUS_FAULT; ++ break; + -+void kbase_pm_policy_init(struct kbase_device *kbdev) -+{ -+ const struct kbase_pm_policy *default_policy = all_policy_list[0]; -+ struct device_node *np = kbdev->dev->of_node; -+ const char *power_policy_name; -+ unsigned long flags; -+ int i; ++ case KBASE_INSTR_INVALID_PC: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_INSTR_INVALID_PC; ++ break; + -+ if (of_property_read_string(np, "power_policy", &power_policy_name) == 0) { -+ for (i = 0; i < ARRAY_SIZE(all_policy_list); i++) -+ if (sysfs_streq(all_policy_list[i]->name, power_policy_name)) { -+ default_policy = all_policy_list[i]; -+ break; -+ } -+ } ++ case KBASE_INSTR_INVALID_ENC: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_INSTR_INVALID_ENC; ++ break; + -+#if MALI_USE_CSF && defined(CONFIG_MALI_BIFROST_DEBUG) -+ /* Use always_on policy if module param fw_debug=1 is -+ * passed, to aid firmware debugging. -+ */ -+ if (fw_debug) -+ default_policy = &kbase_pm_always_on_policy_ops; -+#endif ++ case KBASE_INSTR_TYPE_MISMATCH: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_INSTR_TYPE_MISMATCH; ++ break; + -+ default_policy->init(kbdev); ++ case KBASE_INSTR_OPERAND_FAULT: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_INSTR_OPERAND_FAULT; ++ break; + -+#if MALI_USE_CSF -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->pm.backend.pm_current_policy = default_policy; -+ kbdev->pm.backend.csf_pm_sched_flags = default_policy->pm_sched_flags; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+#else -+ CSTD_UNUSED(flags); -+ kbdev->pm.backend.pm_current_policy = default_policy; -+#endif -+} ++ case KBASE_INSTR_TLS_FAULT: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_INSTR_TLS_FAULT; ++ break; + -+void kbase_pm_policy_term(struct kbase_device *kbdev) -+{ -+ kbdev->pm.backend.pm_current_policy->term(kbdev); -+} ++ case KBASE_INSTR_BARRIER_FAULT: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_INSTR_BARRIER_FAULT; ++ break; + -+void kbase_pm_update_active(struct kbase_device *kbdev) -+{ -+ struct kbase_pm_device_data *pm = &kbdev->pm; -+ struct kbase_pm_backend_data *backend = &pm->backend; -+ unsigned long flags; -+ bool active; ++ case KBASE_INSTR_ALIGN_FAULT: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_INSTR_ALIGN_FAULT; ++ break; + -+ lockdep_assert_held(&pm->lock); ++ case KBASE_DATA_INVALID_FAULT: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_DATA_INVALID_FAULT; ++ break; + -+ /* pm_current_policy will never be NULL while pm.lock is held */ -+ KBASE_DEBUG_ASSERT(backend->pm_current_policy); ++ case KBASE_TILE_RANGE_FAULT: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_TILE_RANGE_FAULT; ++ break; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ case KBASE_ADDR_RANGE_FAULT: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_ADDRESS_RANGE_FAULT; ++ break; + -+ active = backend->pm_current_policy->get_core_active(kbdev); -+ WARN((kbase_pm_is_active(kbdev) && !active), -+ "GPU is active but policy '%s' is indicating that it can be powered off", -+ kbdev->pm.backend.pm_current_policy->name); ++ case KBASE_OUT_OF_MEMORY: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_OUT_OF_MEMORY; ++ break; + -+ if (active) { -+ /* Power on the GPU and any cores requested by the policy */ -+ if (!pm->backend.invoke_poweroff_wait_wq_when_l2_off && -+ pm->backend.poweroff_wait_in_progress) { -+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); -+ pm->backend.poweron_required = true; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } else { -+ /* Cancel the invocation of -+ * kbase_pm_gpu_poweroff_wait_wq() from the L2 state -+ * machine. This is safe - it -+ * invoke_poweroff_wait_wq_when_l2_off is true, then -+ * the poweroff work hasn't even been queued yet, -+ * meaning we can go straight to powering on. -+ */ -+ pm->backend.invoke_poweroff_wait_wq_when_l2_off = false; -+ pm->backend.poweroff_wait_in_progress = false; -+ pm->backend.l2_desired = true; -+#if MALI_USE_CSF -+ pm->backend.mcu_desired = true; -+#endif ++ case KBASE_UNKNOWN: ++ hw_error_status.js_status[job_slot] = ++ JS_STATUS_UNKNOWN; ++ break; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ kbase_pm_do_poweron(kbdev, false); ++ default: ++ model_error_log(KBASE_CORE, ++ "\nAtom Chain 0x%llx: Invalid Error Mask!", ++ hw_error_status.current_jc); ++ break; ++ } ++ } ++#endif /* !MALI_USE_CSF */ ++ ++ /* we set JOB_FAIL_ */ ++ hw_error_status.job_irq_rawstat |= ++ (dummy->slots[job_slot].job_complete_irq_asserted) << ++ (job_slot + 16); ++ hw_error_status.job_irq_status |= ++ (((dummy->slots[job_slot].job_complete_irq_asserted) << ++ (job_slot)) & ++ (dummy->slots[job_slot].job_irq_mask << ++ job_slot)) << 16; ++ } else { ++ hw_error_status.job_irq_rawstat |= ++ (dummy->slots[job_slot].job_complete_irq_asserted) << ++ job_slot; ++ hw_error_status.job_irq_status |= ++ ((dummy->slots[job_slot].job_complete_irq_asserted) << ++ (job_slot)) & ++ (dummy->slots[job_slot].job_irq_mask << ++ job_slot); + } + } else { -+ /* It is an error for the power policy to power off the GPU -+ * when there are contexts active -+ */ -+ KBASE_DEBUG_ASSERT(pm->active_count == 0); ++ hw_error_status.job_irq_rawstat |= ++ (dummy->slots[job_slot].job_complete_irq_asserted) << ++ job_slot; ++ hw_error_status.job_irq_status |= ++ ((dummy->slots[job_slot].job_complete_irq_asserted) << ++ (job_slot)) & ++ (dummy->slots[job_slot].job_irq_mask << job_slot); ++ } /* end of job register statuses */ + -+ pm->backend.poweron_required = false; ++ if (hw_error_status.errors_mask & IS_A_MMU_ERROR) { ++ int i; + -+ /* Request power off */ -+ if (pm->backend.gpu_powered) { -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ for (i = 0; i < NUM_MMU_AS; i++) { ++ if (i == hw_error_status.faulty_mmu_as) { ++ if (hw_error_status.as_faultstatus[i] == 0) { ++ u32 status = ++ hw_error_status.as_faultstatus[i]; ++ /* status reg is clean; it can be ++ * written ++ */ ++ switch (hw_error_status.errors_mask & ++ IS_A_MMU_ERROR) { ++ case KBASE_TRANSLATION_FAULT: ++ /* 0xCm means TRANSLATION FAULT ++ * (m is mmu_table_level) ++ */ ++ status = ++ ((1 << 7) | (1 << 6) | ++ hw_error_status.mmu_table_level ++ ); ++ break; + -+ /* Power off the GPU immediately */ -+ kbase_pm_do_poweroff(kbdev); ++ case KBASE_PERMISSION_FAULT: ++ /*0xC8 means PERMISSION FAULT */ ++ status = ((1 << 7) | (1 << 6) | ++ (1 << 3)); ++ break; ++ ++ case KBASE_TRANSTAB_BUS_FAULT: ++ /* 0xDm means TRANSITION TABLE ++ * BUS FAULT (m is ++ * mmu_table_level) ++ */ ++ status = ((1 << 7) | (1 << 6) | ++ (1 << 4) | ++ hw_error_status.mmu_table_level ++ ); ++ break; ++ ++ case KBASE_ACCESS_FLAG: ++ /* 0xD8 means ACCESS FLAG */ ++ status = ((1 << 7) | (1 << 6) | ++ (1 << 4) | (1 << 3)); ++ break; ++ ++ default: ++ model_error_log(KBASE_CORE, ++ "\nAtom Chain 0x%llx: Invalid Error Mask!", ++ hw_error_status.current_jc); ++ break; ++ } ++ hw_error_status.as_faultstatus[i] = ++ status; ++ } ++ ++ if (hw_error_status.errors_mask & ++ KBASE_TRANSTAB_BUS_FAULT) ++ hw_error_status.mmu_irq_rawstat |= ++ 1 << (16 + i); /* bus error */ ++ else ++ hw_error_status.mmu_irq_rawstat |= ++ 1 << i; /* page fault */ ++ } ++ } ++ } /*end of mmu register statuses */ ++ if (hw_error_status.errors_mask & IS_A_GPU_ERROR) { ++ if (hw_error_status.gpu_fault_status) { ++ /* not the first GPU error reported */ ++ hw_error_status.gpu_error_irq |= (1 << 7); + } else { -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ hw_error_status.gpu_error_irq |= 1; ++ switch (hw_error_status.errors_mask & IS_A_GPU_ERROR) { ++ case KBASE_DELAYED_BUS_FAULT: ++ hw_error_status.gpu_fault_status = (1 << 7); ++ break; ++ ++ case KBASE_SHAREABILITY_FAULT: ++ hw_error_status.gpu_fault_status = (1 << 7) | ++ (1 << 3); ++ break; ++ ++ default: ++ model_error_log(KBASE_CORE, ++ "\nAtom Chain 0x%llx: Invalid Error Mask!", ++ hw_error_status.current_jc); ++ break; ++ } + } + } ++ hw_error_status.errors_mask = 0; /*clear error mask */ +} + -+void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev) ++#if !MALI_USE_CSF ++static void update_job_irq_js_state(struct dummy_model_t *dummy, int mask) +{ -+ bool shaders_desired; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ lockdep_assert_held(&kbdev->pm.lock); -+ -+ if (kbdev->pm.backend.pm_current_policy == NULL) -+ return; -+ if (kbdev->pm.backend.poweroff_wait_in_progress) -+ return; ++ int i; + -+#if MALI_USE_CSF -+ CSTD_UNUSED(shaders_desired); -+ /* Invoke the MCU state machine to send a request to FW for updating -+ * the mask of shader cores that can be used for allocation of -+ * endpoints requested by CSGs. -+ */ -+ if (kbase_pm_is_mcu_desired(kbdev)) -+ kbase_pm_update_state(kbdev); -+#else -+ /* In protected transition, don't allow outside shader core request -+ * affect transition, return directly -+ */ -+ if (kbdev->pm.backend.protected_transition_override) -+ return; ++ lockdep_assert_held(&hw_error_status.access_lock); ++ pr_debug("%s", "Updating the JS_ACTIVE register"); + -+ shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev); ++ for (i = 0; i < NUM_SLOTS; i++) { ++ int slot_active = dummy->slots[i].job_active; ++ int next_busy = dummy->slots[i].job_queued; + -+ if (shaders_desired && kbase_pm_is_l2_desired(kbdev)) -+ kbase_pm_update_state(kbdev); -+#endif ++ if ((mask & (1 << i)) || (mask & (1 << (i + 16)))) { ++ /* clear the bits we're updating */ ++ dummy->job_irq_js_state &= ~((1 << (16 + i)) | ++ (1 << i)); ++ if (hw_error_status.js_status[i]) { ++ dummy->job_irq_js_state |= next_busy << ++ (i + 16); ++ if (mask & (1 << (i + 16))) { ++ /* clear job slot status */ ++ hw_error_status.js_status[i] = 0; ++ /* continue execution of jobchain */ ++ dummy->slots[i].job_active = ++ dummy->slots[i].job_queued; ++ } ++ } else { ++ /* set bits if needed */ ++ dummy->job_irq_js_state |= ((slot_active << i) | ++ (next_busy << (i + 16))); ++ } ++ } ++ } ++ pr_debug("The new snapshot is 0x%08X\n", dummy->job_irq_js_state); +} ++#endif /* !MALI_USE_CSF */ + -+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) ++/** ++ * find_control_reg_values() - Look up constant control register values. ++ * @gpu: GPU name ++ * ++ * Look up the GPU name to find the correct set of control register values for ++ * that GPU. If not found, warn and use the first values in the array. ++ * ++ * Return: Pointer to control register values for that GPU. ++ */ ++static const struct control_reg_values_t *find_control_reg_values(const char *gpu) +{ -+ bool shaders_desired = false; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ size_t i; ++ const struct control_reg_values_t *ret = NULL; + -+ if (kbdev->pm.backend.pm_current_policy == NULL) -+ return; -+ if (kbdev->pm.backend.poweroff_wait_in_progress) -+ return; ++ /* Edge case for tGOx, as it has 2 entries in the table for its R0 and R1 ++ * revisions respectively. As none of them are named "tGOx" the name comparison ++ * needs to be fixed in these cases. CONFIG_GPU_HWVER should be one of "r0p0" ++ * or "r1p0" and is derived from the DDK's build configuration. In cases ++ * where it is unavailable, it defaults to tGOx r1p0. ++ */ ++ if (!strcmp(gpu, "tGOx")) { ++#ifdef CONFIG_GPU_HWVER ++ if (!strcmp(CONFIG_GPU_HWVER, "r0p0")) ++ gpu = "tGOx_r0p0"; ++ else if (!strcmp(CONFIG_GPU_HWVER, "r1p0")) ++#endif /* CONFIG_GPU_HWVER defined */ ++ gpu = "tGOx_r1p0"; ++ } + -+#if !MALI_USE_CSF -+ if (kbdev->pm.backend.protected_transition_override) -+ /* We are trying to change in/out of protected mode - force all -+ * cores off so that the L2 powers down -+ */ -+ shaders_desired = false; -+ else -+ shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev); -+#endif ++ for (i = 0; i < ARRAY_SIZE(all_control_reg_values); ++i) { ++ const struct control_reg_values_t * const fcrv = &all_control_reg_values[i]; + -+ if (kbdev->pm.backend.shaders_desired != shaders_desired) { -+ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, kbdev->pm.backend.shaders_desired); ++ if (!strcmp(fcrv->name, gpu)) { ++ ret = fcrv; ++ pr_debug("Found control register values for %s\n", gpu); ++ break; ++ } ++ } + -+ kbdev->pm.backend.shaders_desired = shaders_desired; -+ kbase_pm_update_state(kbdev); ++ if (!ret) { ++ ret = &all_control_reg_values[0]; ++ pr_warn("Couldn't find control register values for GPU %s; using default %s\n", ++ gpu, ret->name); + } ++ ++ return ret; +} + -+void kbase_pm_update_cores_state(struct kbase_device *kbdev) ++void *midgard_model_create(struct kbase_device *kbdev) +{ -+ unsigned long flags; ++ struct dummy_model_t *dummy = NULL; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ spin_lock_init(&hw_error_status.access_lock); ++ spin_lock_init(&performance_counters.access_lock); + -+ kbase_pm_update_cores_state_nolock(kbdev); ++ dummy = kzalloc(sizeof(*dummy), GFP_KERNEL); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} ++ if (dummy) { ++ dummy->job_irq_js_state = 0; ++ init_register_statuses(dummy); ++ dummy->control_reg_values = find_control_reg_values(no_mali_gpu); ++ performance_counters.l2_present = get_implementation_register( ++ GPU_CONTROL_REG(L2_PRESENT_LO), dummy->control_reg_values); ++ performance_counters.shader_present = get_implementation_register( ++ GPU_CONTROL_REG(SHADER_PRESENT_LO), dummy->control_reg_values); + -+int kbase_pm_list_policies(struct kbase_device *kbdev, -+ const struct kbase_pm_policy * const **list) -+{ -+ if (list) -+ *list = all_policy_list; ++ gpu_device_set_data(dummy, kbdev); + -+ return ARRAY_SIZE(all_policy_list); -+} ++ dev_info(kbdev->dev, "Using Dummy Model"); ++ } + -+KBASE_EXPORT_TEST_API(kbase_pm_list_policies); ++ return dummy; ++} + -+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev) ++void midgard_model_destroy(void *h) +{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ -+ return kbdev->pm.backend.pm_current_policy; ++ kfree((void *)h); +} + -+KBASE_EXPORT_TEST_API(kbase_pm_get_policy); -+ -+#if MALI_USE_CSF -+static int policy_change_wait_for_L2_off(struct kbase_device *kbdev) ++static void midgard_model_get_outputs(void *h) +{ -+ long remaining; -+ long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT)); -+ int err = 0; ++ struct dummy_model_t *dummy = (struct dummy_model_t *)h; + -+ /* Wait for L2 becoming off, by which the MCU is also implicitly off -+ * since the L2 state machine would only start its power-down -+ * sequence when the MCU is in off state. The L2 off is required -+ * as the tiler may need to be power cycled for MCU reconfiguration -+ * for host control of shader cores. -+ */ -+#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE -+ remaining = wait_event_killable_timeout( -+ kbdev->pm.backend.gpu_in_desired_state_wait, -+ kbdev->pm.backend.l2_state == KBASE_L2_OFF, timeout); -+#else -+ remaining = wait_event_timeout( -+ kbdev->pm.backend.gpu_in_desired_state_wait, -+ kbdev->pm.backend.l2_state == KBASE_L2_OFF, timeout); -+#endif ++ lockdep_assert_held(&hw_error_status.access_lock); + -+ if (!remaining) { -+ err = -ETIMEDOUT; -+ } else if (remaining < 0) { -+ dev_info(kbdev->dev, -+ "Wait for L2_off got interrupted"); -+ err = (int)remaining; -+ } ++ if (hw_error_status.job_irq_status) ++ gpu_device_raise_irq(dummy, MODEL_LINUX_JOB_IRQ); + -+ dev_dbg(kbdev->dev, "%s: err=%d mcu_state=%d, L2_state=%d\n", __func__, -+ err, kbdev->pm.backend.mcu_state, kbdev->pm.backend.l2_state); ++ if ((dummy->power_changed && dummy->power_changed_mask) || ++ (dummy->reset_completed & dummy->reset_completed_mask) || ++ hw_error_status.gpu_error_irq || ++#if !MALI_USE_CSF ++ dummy->prfcnt_sample_completed || ++#else ++ (dummy->flush_pa_range_completed && dummy->flush_pa_range_completed_irq_enabled) || ++#endif ++ (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled)) ++ gpu_device_raise_irq(dummy, MODEL_LINUX_GPU_IRQ); + -+ return err; ++ if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask) ++ gpu_device_raise_irq(dummy, MODEL_LINUX_MMU_IRQ); +} -+#endif + -+void kbase_pm_set_policy(struct kbase_device *kbdev, -+ const struct kbase_pm_policy *new_policy) ++static void midgard_model_update(void *h) +{ -+ const struct kbase_pm_policy *old_policy; -+ unsigned long flags; -+#if MALI_USE_CSF -+ unsigned int new_policy_csf_pm_sched_flags; -+ bool sched_suspend; -+ bool reset_gpu = false; -+ bool reset_op_prevented = true; -+ struct kbase_csf_scheduler *scheduler = NULL; -+#endif ++ struct dummy_model_t *dummy = (struct dummy_model_t *)h; ++ int i; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(new_policy != NULL); ++ lockdep_assert_held(&hw_error_status.access_lock); + -+ KBASE_KTRACE_ADD(kbdev, PM_SET_POLICY, NULL, new_policy->id); ++ for (i = 0; i < NUM_SLOTS; i++) { ++ if (!dummy->slots[i].job_active) ++ continue; + -+#if MALI_USE_CSF -+ scheduler = &kbdev->csf.scheduler; -+ KBASE_DEBUG_ASSERT(scheduler != NULL); ++ if (dummy->slots[i].job_disabled) { ++ update_register_statuses(dummy, i); ++ continue; ++ } + -+ /* Serialize calls on kbase_pm_set_policy() */ -+ mutex_lock(&kbdev->pm.backend.policy_change_lock); ++ /* If there are any pending interrupts that have not ++ * been cleared we cannot run the job in the next register ++ * as we will overwrite the register status of the job in ++ * the head registers - which has not yet been read ++ */ ++ if ((hw_error_status.job_irq_rawstat & (1 << (i + 16))) || ++ (hw_error_status.job_irq_rawstat & (1 << i))) { ++ continue; ++ } + -+ if (kbase_reset_gpu_prevent_and_wait(kbdev)) { -+ dev_warn(kbdev->dev, "Set PM policy failing to prevent gpu reset"); -+ reset_op_prevented = false; ++ /*this job is done assert IRQ lines */ ++ signal_int(dummy, i); ++#ifdef CONFIG_MALI_BIFROST_ERROR_INJECT ++ midgard_set_error(i); ++#endif /* CONFIG_MALI_BIFROST_ERROR_INJECT */ ++ update_register_statuses(dummy, i); ++ /*if this job slot returned failures we cannot use it */ ++ if (hw_error_status.job_irq_rawstat & (1 << (i + 16))) { ++ dummy->slots[i].job_active = 0; ++ continue; ++ } ++ /*process next job */ ++ dummy->slots[i].job_active = dummy->slots[i].job_queued; ++ dummy->slots[i].job_queued = 0; ++ if (dummy->slots[i].job_active) { ++ if (hw_error_status.job_irq_rawstat & (1 << (i + 16))) ++ model_error_log(KBASE_CORE, ++ "\natom %lld running a job on a dirty slot", ++ hw_error_status.current_jc); ++ } + } ++} + -+ /* In case of CSF, the scheduler may be invoked to suspend. In that -+ * case, there is a risk that the L2 may be turned on by the time we -+ * check it here. So we hold the scheduler lock to avoid other operations -+ * interfering with the policy change and vice versa. -+ */ -+ mutex_lock(&scheduler->lock); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ /* policy_change_clamp_state_to_off, when needed, is set/cleared in -+ * this function, a very limited temporal scope for covering the -+ * change transition. -+ */ -+ WARN_ON(kbdev->pm.backend.policy_change_clamp_state_to_off); -+ new_policy_csf_pm_sched_flags = new_policy->pm_sched_flags; ++static void invalidate_active_jobs(struct dummy_model_t *dummy) ++{ ++ int i; + -+ /* Requiring the scheduler PM suspend operation when changes involving -+ * the always_on policy, reflected by the CSF_DYNAMIC_PM_CORE_KEEP_ON -+ * flag bit. -+ */ -+ sched_suspend = reset_op_prevented && -+ (CSF_DYNAMIC_PM_CORE_KEEP_ON & -+ (new_policy_csf_pm_sched_flags | kbdev->pm.backend.csf_pm_sched_flags)); ++ lockdep_assert_held(&hw_error_status.access_lock); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ for (i = 0; i < NUM_SLOTS; i++) { ++ if (dummy->slots[i].job_active) { ++ hw_error_status.job_irq_rawstat |= (1 << (16 + i)); + -+ if (sched_suspend) { -+ /* Update the suspend flag to reflect actually suspend being done ! */ -+ sched_suspend = !kbase_csf_scheduler_pm_suspend_no_lock(kbdev); -+ /* Set the reset recovery flag if the required suspend failed */ -+ reset_gpu = !sched_suspend; ++ hw_error_status.js_status[i] = 0x7f; /*UNKNOWN*/ ++ } + } ++} + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++void midgard_model_write_reg(void *h, u32 addr, u32 value) ++{ ++ unsigned long flags; ++ struct dummy_model_t *dummy = (struct dummy_model_t *)h; + -+ kbdev->pm.backend.policy_change_clamp_state_to_off = sched_suspend; -+ kbase_pm_update_state(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ spin_lock_irqsave(&hw_error_status.access_lock, flags); + -+ if (sched_suspend) -+ reset_gpu = policy_change_wait_for_L2_off(kbdev); -+#endif ++#if !MALI_USE_CSF ++ if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && ++ (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { ++ unsigned int slot_idx = (addr >> 7) & 0xf; + -+ /* During a policy change we pretend the GPU is active */ -+ /* A suspend won't happen here, because we're in a syscall from a -+ * userspace thread -+ */ -+ kbase_pm_context_active(kbdev); ++ KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS); ++ if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_LO)) { ++ hw_error_status.current_jc &= ++ ~((u64) (0xFFFFFFFF)); ++ hw_error_status.current_jc |= (u64) value; ++ } ++ if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_HI)) { ++ hw_error_status.current_jc &= (u64) 0xFFFFFFFF; ++ hw_error_status.current_jc |= ++ ((u64) value) << 32; ++ } ++ if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && ++ value == 1) { ++ pr_debug("%s", "start detected"); ++ KBASE_DEBUG_ASSERT(!dummy->slots[slot_idx].job_active || ++ !dummy->slots[slot_idx].job_queued); ++ if ((dummy->slots[slot_idx].job_active) || ++ (hw_error_status.job_irq_rawstat & ++ (1 << (slot_idx + 16)))) { ++ pr_debug("~~~~~~~~~~~ Start: job slot is already active or there are IRQ pending ~~~~~~~~~" ++ ); ++ dummy->slots[slot_idx].job_queued = 1; ++ } else { ++ dummy->slots[slot_idx].job_active = 1; ++ } ++ } + -+ kbase_pm_lock(kbdev); ++ if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && value == ++ 0) ++ dummy->slots[slot_idx].job_queued = 0; + -+ /* Remove the policy to prevent IRQ handlers from working on it */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ old_policy = kbdev->pm.backend.pm_current_policy; -+ kbdev->pm.backend.pm_current_policy = NULL; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if ((addr == JOB_SLOT_REG(slot_idx, JS_COMMAND)) && ++ (value == JS_COMMAND_SOFT_STOP || ++ value == JS_COMMAND_HARD_STOP)) { ++ /*dummy->slots[slot_idx].job_active = 0; */ ++ hw_error_status.current_job_slot = slot_idx; ++ if (value == JS_COMMAND_SOFT_STOP) { ++ hw_error_status.errors_mask = KBASE_JOB_STOPPED; ++ } else { /*value == 3 */ + -+ KBASE_KTRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, old_policy->id); -+ if (old_policy->term) -+ old_policy->term(kbdev); ++ if (dummy->slots[slot_idx].job_disabled != 0) { ++ pr_debug("enabling slot after HARD_STOP" ++ ); ++ dummy->slots[slot_idx].job_disabled = 0; ++ } ++ hw_error_status.errors_mask = ++ KBASE_JOB_TERMINATED; ++ } ++ } ++ } else if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) { ++ int i; + -+ memset(&kbdev->pm.backend.pm_policy_data, 0, -+ sizeof(union kbase_pm_policy_data)); ++ for (i = 0; i < NUM_SLOTS; i++) { ++ if (value & ((1 << i) | (1 << (i + 16)))) ++ dummy->slots[i].job_complete_irq_asserted = 0; ++ /* hw_error_status.js_status[i] is cleared in ++ * update_job_irq_js_state ++ */ ++ } ++ pr_debug("%s", "job irq cleared"); ++ update_job_irq_js_state(dummy, value); ++ /*remove error condition for JOB */ ++ hw_error_status.job_irq_rawstat &= ~(value); ++ hw_error_status.job_irq_status &= ~(value); ++ } else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { ++ int i; + -+ KBASE_KTRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, new_policy->id); -+ if (new_policy->init) -+ new_policy->init(kbdev); ++ for (i = 0; i < NUM_SLOTS; i++) ++ dummy->slots[i].job_irq_mask = (value >> i) & 0x01; ++ pr_debug("job irq mask to value %x", value); ++ } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { ++#else /* !MALI_USE_CSF */ ++ if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) { ++ pr_debug("%s", "job irq cleared"); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->pm.backend.pm_current_policy = new_policy; ++ hw_error_status.job_irq_rawstat &= ~(value); ++ hw_error_status.job_irq_status &= ~(value); ++ } else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { ++ /* ignore JOB_IRQ_MASK as it is handled by CSFFW */ ++ } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { ++#endif /* !MALI_USE_CSF */ ++ pr_debug("GPU_IRQ_MASK set to 0x%x", value); ++ dummy->reset_completed_mask = (value >> 8) & 0x01; ++ dummy->power_changed_mask = (value >> 9) & 0x03; ++ dummy->clean_caches_completed_irq_enabled = (value & (1u << 17)) != 0u; +#if MALI_USE_CSF -+ kbdev->pm.backend.csf_pm_sched_flags = new_policy_csf_pm_sched_flags; -+ /* New policy in place, release the clamping on mcu/L2 off state */ -+ kbdev->pm.backend.policy_change_clamp_state_to_off = false; -+ kbase_pm_update_state(kbdev); ++ dummy->flush_pa_range_completed_irq_enabled = (value & (1u << 20)) != 0u; +#endif -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ /* If any core power state changes were previously attempted, but -+ * couldn't be made because the policy was changing (current_policy was -+ * NULL), then re-try them here. -+ */ -+ kbase_pm_update_active(kbdev); -+ kbase_pm_update_cores_state(kbdev); -+ -+ kbase_pm_unlock(kbdev); ++ } else if (addr == GPU_CONTROL_REG(COHERENCY_ENABLE)) { ++ dummy->coherency_enable = value; ++ } else if (addr == GPU_CONTROL_REG(GPU_IRQ_CLEAR)) { ++ if (value & (1 << 8)) { ++ pr_debug("%s", "gpu RESET_COMPLETED irq cleared"); ++ dummy->reset_completed = 0; ++ } ++ if (value & (3 << 9)) ++ dummy->power_changed = 0; + -+ /* Now the policy change is finished, we release our fake context active -+ * reference -+ */ -+ kbase_pm_context_idle(kbdev); ++ if (value & (1 << 17)) ++ dummy->clean_caches_completed = false; + +#if MALI_USE_CSF -+ /* Reverse the suspension done */ -+ if (sched_suspend) -+ kbase_csf_scheduler_pm_resume_no_lock(kbdev); -+ mutex_unlock(&scheduler->lock); ++ if (value & (1u << 20)) ++ dummy->flush_pa_range_completed = false; ++#endif /* MALI_USE_CSF */ + -+ if (reset_op_prevented) -+ kbase_reset_gpu_allow(kbdev); ++#if !MALI_USE_CSF ++ if (value & PRFCNT_SAMPLE_COMPLETED) /* (1 << 16) */ ++ dummy->prfcnt_sample_completed = 0; ++#endif /* !MALI_USE_CSF */ + -+ if (reset_gpu) { -+ dev_warn(kbdev->dev, "Resorting to GPU reset for policy change\n"); -+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) -+ kbase_reset_gpu(kbdev); -+ kbase_reset_gpu_wait(kbdev); ++ /*update error status */ ++ hw_error_status.gpu_error_irq &= ~(value); ++ } else if (addr == GPU_CONTROL_REG(GPU_COMMAND)) { ++ switch (value) { ++ case GPU_COMMAND_SOFT_RESET: ++ case GPU_COMMAND_HARD_RESET: ++ pr_debug("gpu reset (%d) requested", value); ++ /* no more fault status */ ++ hw_error_status.gpu_fault_status = 0; ++ /* completed reset instantly */ ++ dummy->reset_completed = 1; ++ break; ++#if MALI_USE_CSF ++ case GPU_COMMAND_CACHE_CLN_INV_L2: ++ case GPU_COMMAND_CACHE_CLN_INV_L2_LSC: ++ case GPU_COMMAND_CACHE_CLN_INV_FULL: ++#else ++ case GPU_COMMAND_CLEAN_CACHES: ++ case GPU_COMMAND_CLEAN_INV_CACHES: ++#endif ++ pr_debug("clean caches requested"); ++ dummy->clean_caches_completed = true; ++ break; ++#if MALI_USE_CSF ++ case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2: ++ case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC: ++ case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_FULL: ++ pr_debug("pa range flush requested"); ++ dummy->flush_pa_range_completed = true; ++ break; ++#endif /* MALI_USE_CSF */ ++#if !MALI_USE_CSF ++ case GPU_COMMAND_PRFCNT_SAMPLE: ++ midgard_model_dump_prfcnt(); ++ dummy->prfcnt_sample_completed = 1; ++#endif /* !MALI_USE_CSF */ ++ default: ++ break; ++ } ++#if MALI_USE_CSF ++ } else if (addr >= GPU_CONTROL_REG(GPU_COMMAND_ARG0_LO) && ++ addr <= GPU_CONTROL_REG(GPU_COMMAND_ARG1_HI)) { ++ /* Writes ignored */ ++#endif ++ } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) { ++ dummy->l2_config = value; + } ++#if MALI_USE_CSF ++ else if (addr >= GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET) && ++ addr < GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET + ++ (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) { ++ if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET)) ++ hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF; ++ } else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) && ++ (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) { ++ /* Do nothing */ ++ } else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) && ++ (addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) { ++ /* Do nothing */ ++ } else if (addr == IPA_CONTROL_REG(COMMAND)) { ++ pr_debug("Received IPA_CONTROL command"); ++ } else if (addr == IPA_CONTROL_REG(TIMER)) { ++ ipa_control_timer_enabled = value ? true : false; ++ } else if ((addr >= IPA_CONTROL_REG(SELECT_CSHW_LO)) && ++ (addr <= IPA_CONTROL_REG(SELECT_SHADER_HI))) { ++ enum kbase_ipa_core_type core_type = (enum kbase_ipa_core_type)( ++ (addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) >> 3); ++ bool is_low_word = ++ !((addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) & 7); + -+ mutex_unlock(&kbdev->pm.backend.policy_change_lock); ++ if (is_low_word) { ++ ipa_ctl_select_config[core_type] &= ~(u64)U32_MAX; ++ ipa_ctl_select_config[core_type] |= value; ++ } else { ++ ipa_ctl_select_config[core_type] &= U32_MAX; ++ ipa_ctl_select_config[core_type] |= ((u64)value << 32); ++ } ++ } +#endif -+} -+ -+KBASE_EXPORT_TEST_API(kbase_pm_set_policy); -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.h -new file mode 100644 -index 000000000..e8113659b ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.h -@@ -0,0 +1,105 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2010-2015, 2018-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+/* -+ * Power policy API definitions -+ */ -+ -+#ifndef _KBASE_PM_POLICY_H_ -+#define _KBASE_PM_POLICY_H_ ++ else if (addr == MMU_REG(MMU_IRQ_MASK)) { ++ hw_error_status.mmu_irq_mask = value; ++ } else if (addr == MMU_REG(MMU_IRQ_CLEAR)) { ++ hw_error_status.mmu_irq_rawstat &= (~value); ++ } else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) && (addr <= MMU_AS_REG(15, AS_STATUS))) { ++ int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO)) ++ >> 6; + -+/** -+ * kbase_pm_policy_init - Initialize power policy framework -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Must be called before calling any other policy function -+ */ -+void kbase_pm_policy_init(struct kbase_device *kbdev); ++ switch (addr & 0x3F) { ++ case AS_COMMAND: ++ switch (value) { ++ case AS_COMMAND_NOP: ++ hw_error_status.as_command[mem_addr_space] = ++ value; ++ break; + -+/** -+ * kbase_pm_policy_term - Terminate power policy framework -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_policy_term(struct kbase_device *kbdev); ++ case AS_COMMAND_UPDATE: ++ hw_error_status.as_command[mem_addr_space] = ++ value; ++ if ((hw_error_status.as_faultstatus[ ++ mem_addr_space]) ++ && ((hw_error_status.as_transtab[ ++ mem_addr_space] & 0x3) != 0)) { ++ model_error_log(KBASE_CORE, ++ "\n ERROR: AS_COMMAND issued UPDATE on error condition before AS_TRANSTAB been set to unmapped\n" ++ ); ++ } else if ((hw_error_status.as_faultstatus[ ++ mem_addr_space]) ++ && ((hw_error_status.as_transtab[ ++ mem_addr_space] & 0x3) == 0)) { + -+/** -+ * kbase_pm_update_active - Update the active power state of the GPU -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Calls into the current power policy -+ */ -+void kbase_pm_update_active(struct kbase_device *kbdev); ++ /*invalidate all active jobs */ ++ invalidate_active_jobs(dummy); ++ /* error handled */ ++ hw_error_status.as_faultstatus[ ++ mem_addr_space] = 0; ++ } ++ break; + -+/** -+ * kbase_pm_update_cores - Update the desired core state of the GPU -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Calls into the current power policy -+ */ -+void kbase_pm_update_cores(struct kbase_device *kbdev); ++ case AS_COMMAND_LOCK: ++ case AS_COMMAND_UNLOCK: ++ hw_error_status.as_command[mem_addr_space] = ++ value; ++ break; + -+/** -+ * kbase_pm_cores_requested - Check that a power request has been locked into -+ * the HW. -+ * @kbdev: Kbase device -+ * @shader_required: true if shaders are required -+ * -+ * Called by the scheduler to check if a power on request has been locked into -+ * the HW. -+ * -+ * Note that there is no guarantee that the cores are actually ready, however -+ * when the request has been locked into the HW, then it is safe to submit work -+ * since the HW will wait for the transition to ready. -+ * -+ * A reference must first be taken prior to making this call. -+ * -+ * Caller must hold the hwaccess_lock. -+ * -+ * Return: true if the request to the HW was successfully made else false if the -+ * request is still pending. -+ */ -+static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev, -+ bool shader_required) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ case AS_COMMAND_FLUSH_PT: ++ case AS_COMMAND_FLUSH_MEM: ++ if (hw_error_status.as_command[mem_addr_space] ++ != AS_COMMAND_LOCK) ++ model_error_log(KBASE_CORE, ++ "\n ERROR: AS_COMMAND issued FLUSH without LOCKING before\n" ++ ); ++ else /* error handled if any */ ++ hw_error_status.as_faultstatus[ ++ mem_addr_space] = 0; ++ hw_error_status.as_command[mem_addr_space] = ++ value; ++ break; + -+ /* If the L2 & tiler are not on or pending, then the tiler is not yet -+ * available, and shaders are definitely not powered. -+ */ -+ if (kbdev->pm.backend.l2_state != KBASE_L2_PEND_ON && -+ kbdev->pm.backend.l2_state != KBASE_L2_ON && -+ kbdev->pm.backend.l2_state != KBASE_L2_ON_HWCNT_ENABLE) -+ return false; ++ default: ++ model_error_log(KBASE_CORE, ++ "\n WARNING: UNRECOGNIZED AS_COMMAND 0x%x\n", ++ value); ++ break; ++ } ++ break; + -+ if (shader_required && -+ kbdev->pm.backend.shaders_state != KBASE_SHADERS_PEND_ON_CORESTACK_ON && -+ kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON && -+ kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON_RECHECK) -+ return false; ++ case AS_TRANSTAB_LO: ++ hw_error_status.as_transtab[mem_addr_space] &= ++ ~((u64) (0xffffffff)); ++ hw_error_status.as_transtab[mem_addr_space] |= ++ (u64) value; ++ break; + -+ return true; -+} ++ case AS_TRANSTAB_HI: ++ hw_error_status.as_transtab[mem_addr_space] &= ++ (u64) 0xffffffff; ++ hw_error_status.as_transtab[mem_addr_space] |= ++ ((u64) value) << 32; ++ break; + -+#endif /* _KBASE_PM_POLICY_H_ */ -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_shader_states.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_shader_states.h -new file mode 100644 -index 000000000..8622ef78d ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_shader_states.h -@@ -0,0 +1,79 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ case AS_LOCKADDR_LO: ++ case AS_LOCKADDR_HI: ++ case AS_MEMATTR_LO: ++ case AS_MEMATTR_HI: ++ case AS_TRANSCFG_LO: ++ case AS_TRANSCFG_HI: ++ /* Writes ignored */ ++ break; + -+/* -+ * Backend-specific Power Manager shader core state definitions. -+ * The function-like macro KBASEP_SHADER_STATE() must be defined before -+ * including this header file. This header file can be included multiple -+ * times in the same compilation unit with different definitions of -+ * KBASEP_SHADER_STATE(). -+ * -+ * @OFF_CORESTACK_OFF: The shaders and core stacks are off -+ * @OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have been -+ * requested to power on and hwcnt is being -+ * disabled -+ * @PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been -+ * requested to power on. Or after doing -+ * partial shader on/off, checking whether -+ * it's the desired state. -+ * @ON_CORESTACK_ON: The shaders and core stacks are on, and -+ * hwcnt already enabled. -+ * @ON_CORESTACK_ON_RECHECK: The shaders and core stacks are on, hwcnt -+ * disabled, and checks to powering down or -+ * re-enabling hwcnt. -+ * @WAIT_OFF_CORESTACK_ON: The shaders have been requested to power -+ * off, but they remain on for the duration -+ * of the hysteresis timer -+ * @WAIT_GPU_IDLE: The shaders partial poweroff needs to -+ * reach a state where jobs on the GPU are -+ * finished including jobs currently running -+ * and in the GPU queue because of -+ * GPU2017-861 -+ * @WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired -+ * @L2_FLUSHING_CORESTACK_ON: The core stacks are on and the level 2 -+ * cache is being flushed. -+ * @READY_OFF_CORESTACK_ON: The core stacks are on and the shaders are -+ * ready to be powered off. -+ * @PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders -+ * have been requested to power off -+ * @OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks -+ * have been requested to power off -+ * @OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are off, but the -+ * tick timer cancellation is still pending. -+ * @RESET_WAIT: The GPU is resetting, shader and core -+ * stack power states are unknown -+ */ -+KBASEP_SHADER_STATE(OFF_CORESTACK_OFF) -+KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_ON) -+KBASEP_SHADER_STATE(PEND_ON_CORESTACK_ON) -+KBASEP_SHADER_STATE(ON_CORESTACK_ON) -+KBASEP_SHADER_STATE(ON_CORESTACK_ON_RECHECK) -+KBASEP_SHADER_STATE(WAIT_OFF_CORESTACK_ON) ++ default: ++ model_error_log(KBASE_CORE, ++ "Dummy model register access: Writing unsupported MMU #%d register 0x%x value 0x%x\n", ++ mem_addr_space, addr, value); ++ break; ++ } ++ } else { ++ switch (addr) { +#if !MALI_USE_CSF -+KBASEP_SHADER_STATE(WAIT_GPU_IDLE) ++ case PRFCNT_BASE_LO: ++ performance_counters.prfcnt_base = ++ HI_MASK(performance_counters.prfcnt_base) | value; ++ performance_counters.prfcnt_base_cpu = ++ (u32 *)(uintptr_t)performance_counters.prfcnt_base; ++ break; ++ case PRFCNT_BASE_HI: ++ performance_counters.prfcnt_base = ++ LO_MASK(performance_counters.prfcnt_base) | (((u64)value) << 32); ++ performance_counters.prfcnt_base_cpu = ++ (u32 *)(uintptr_t)performance_counters.prfcnt_base; ++ break; ++ case PRFCNT_JM_EN: ++ performance_counters.prfcnt_en.fe = value; ++ break; ++ case PRFCNT_SHADER_EN: ++ performance_counters.prfcnt_en.shader = value; ++ break; ++ case PRFCNT_TILER_EN: ++ performance_counters.prfcnt_en.tiler = value; ++ break; ++ case PRFCNT_MMU_L2_EN: ++ performance_counters.prfcnt_en.l2 = value; ++ break; +#endif /* !MALI_USE_CSF */ -+KBASEP_SHADER_STATE(WAIT_FINISHED_CORESTACK_ON) -+KBASEP_SHADER_STATE(L2_FLUSHING_CORESTACK_ON) -+KBASEP_SHADER_STATE(READY_OFF_CORESTACK_ON) -+KBASEP_SHADER_STATE(PEND_OFF_CORESTACK_ON) -+KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_OFF) -+KBASEP_SHADER_STATE(OFF_CORESTACK_OFF_TIMER_PEND_OFF) -+KBASEP_SHADER_STATE(RESET_WAIT) -diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c -new file mode 100644 -index 000000000..1b3346179 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c -@@ -0,0 +1,279 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ case TILER_PWRON_LO: ++ dummy->power_on |= (value & 1) << 1; ++ /* Also ensure L2 is powered on */ ++ dummy->power_on |= value & 1; ++ dummy->power_changed = 1; ++ break; ++ case SHADER_PWRON_LO: ++ dummy->power_on |= ++ (value & dummy->control_reg_values->shader_present) << 2; ++ dummy->power_changed = 1; ++ break; ++ case L2_PWRON_LO: ++ dummy->power_on |= value & 1; ++ dummy->power_changed = 1; ++ break; ++ case STACK_PWRON_LO: ++ dummy->stack_power_on_lo |= value; ++ dummy->power_changed = 1; ++ break; ++ case TILER_PWROFF_LO: ++ dummy->power_on &= ~((value & 1) << 1); ++ dummy->power_changed = 1; ++ break; ++ case SHADER_PWROFF_LO: ++ dummy->power_on &= ++ ~((value & dummy->control_reg_values->shader_present) << 2); ++ dummy->power_changed = 1; ++ break; ++ case L2_PWROFF_LO: ++ dummy->power_on &= ~(value & 1); ++ /* Also ensure tiler is powered off */ ++ dummy->power_on &= ~((value & 1) << 1); ++ dummy->power_changed = 1; ++ break; ++ case STACK_PWROFF_LO: ++ dummy->stack_power_on_lo &= ~value; ++ dummy->power_changed = 1; ++ break; + -+#include -+#include -+#if MALI_USE_CSF -+#include -+#include -+#include -+#endif -+#include -+#include -+#include ++ case TILER_PWROFF_HI: ++ case SHADER_PWROFF_HI: ++ case L2_PWROFF_HI: ++ case PWR_KEY: ++ case PWR_OVERRIDE0: ++#if !MALI_USE_CSF ++ case JM_CONFIG: ++ case PRFCNT_CONFIG: ++#else /* !MALI_USE_CSF */ ++ case CSF_CONFIG: ++#endif /* !MALI_USE_CSF */ ++ case SHADER_CONFIG: ++ case TILER_CONFIG: ++ case L2_MMU_CONFIG: ++ /* Writes ignored */ ++ break; ++ default: ++ model_error_log(KBASE_CORE, ++ "Dummy model register access: Writing unsupported register 0x%x value 0x%x\n", ++ addr, value); ++ break; ++ } ++ } + -+void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, -+ u64 *cycle_counter, -+ u64 *system_time, -+ struct timespec64 *ts) ++ midgard_model_update(dummy); ++ midgard_model_get_outputs(dummy); ++ spin_unlock_irqrestore(&hw_error_status.access_lock, flags); ++} ++ ++void midgard_model_read_reg(void *h, u32 addr, u32 *const value) +{ -+ u32 hi1, hi2; ++ unsigned long flags; ++ struct dummy_model_t *dummy = (struct dummy_model_t *)h; + -+ if (cycle_counter) -+ *cycle_counter = kbase_backend_get_cycle_cnt(kbdev); ++ spin_lock_irqsave(&hw_error_status.access_lock, flags); + -+ if (system_time) { -+ /* Read hi, lo, hi to ensure a coherent u64 */ -+ do { -+ hi1 = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(TIMESTAMP_HI)); -+ *system_time = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(TIMESTAMP_LO)); -+ hi2 = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(TIMESTAMP_HI)); -+ } while (hi1 != hi2); -+ *system_time |= (((u64) hi1) << 32); -+ } ++ *value = 0; /* 0 by default */ ++#if !MALI_USE_CSF ++ if (addr == JOB_CONTROL_REG(JOB_IRQ_JS_STATE)) { ++ pr_debug("%s", "JS_ACTIVE being read"); + -+ /* Record the CPU's idea of current time */ -+ if (ts != NULL) -+#if (KERNEL_VERSION(4, 17, 0) > LINUX_VERSION_CODE) -+ *ts = ktime_to_timespec64(ktime_get_raw()); -+#else -+ ktime_get_raw_ts64(ts); -+#endif -+} ++ *value = dummy->job_irq_js_state; ++ } else if (addr == GPU_CONTROL_REG(GPU_ID)) { ++#else /* !MALI_USE_CSF */ ++ if (addr == GPU_CONTROL_REG(GPU_ID)) { ++#endif /* !MALI_USE_CSF */ + ++ *value = dummy->control_reg_values->gpu_id; ++ } else if (addr == JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)) { ++ *value = hw_error_status.job_irq_rawstat; ++ pr_debug("%s", "JS_IRQ_RAWSTAT being read"); ++ } else if (addr == JOB_CONTROL_REG(JOB_IRQ_STATUS)) { ++ *value = hw_error_status.job_irq_status; ++ pr_debug("JS_IRQ_STATUS being read %x", *value); ++ } +#if !MALI_USE_CSF -+/** -+ * timedwait_cycle_count_active() - Timed wait till CYCLE_COUNT_ACTIVE is active -+ * -+ * @kbdev: Kbase device -+ * -+ * Return: true if CYCLE_COUNT_ACTIVE is active within the timeout. -+ */ -+static bool timedwait_cycle_count_active(struct kbase_device *kbdev) -+{ -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ return true; -+#else -+ bool success = false; -+ const unsigned int timeout = 100; -+ const unsigned long remaining = jiffies + msecs_to_jiffies(timeout); ++ else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { ++ int i; + -+ while (time_is_after_jiffies(remaining)) { -+ if ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & -+ GPU_STATUS_CYCLE_COUNT_ACTIVE)) { -+ success = true; -+ break; -+ } ++ *value = 0; ++ for (i = 0; i < NUM_SLOTS; i++) ++ *value |= dummy->slots[i].job_irq_mask << i; ++ pr_debug("JS_IRQ_MASK being read %x", *value); + } -+ return success; ++#else /* !MALI_USE_CSF */ ++ else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) ++ ; /* ignore JOB_IRQ_MASK as it is handled by CSFFW */ ++#endif /* !MALI_USE_CSF */ ++ else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { ++ *value = (dummy->reset_completed_mask << 8) | ++ ((dummy->clean_caches_completed_irq_enabled ? 1u : 0u) << 17) | ++#if MALI_USE_CSF ++ ((dummy->flush_pa_range_completed_irq_enabled ? 1u : 0u) << 20) | +#endif -+} ++ (dummy->power_changed_mask << 9) | (1 << 7) | 1; ++ pr_debug("GPU_IRQ_MASK read %x", *value); ++ } else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) { ++ *value = (dummy->power_changed << 9) | (dummy->power_changed << 10) | ++ (dummy->reset_completed << 8) | ++#if !MALI_USE_CSF ++ (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | ++#endif /* !MALI_USE_CSF */ ++ ((dummy->clean_caches_completed ? 1u : 0u) << 17) | ++#if MALI_USE_CSF ++ ((dummy->flush_pa_range_completed ? 1u : 0u) << 20) | +#endif -+ -+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, -+ u64 *system_time, struct timespec64 *ts) -+{ ++ hw_error_status.gpu_error_irq; ++ pr_debug("GPU_IRQ_RAWSTAT read %x", *value); ++ } else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) { ++ *value = ((dummy->power_changed && (dummy->power_changed_mask & 0x1)) << 9) | ++ ((dummy->power_changed && (dummy->power_changed_mask & 0x2)) << 10) | ++ ((dummy->reset_completed & dummy->reset_completed_mask) << 8) | +#if !MALI_USE_CSF -+ kbase_pm_request_gpu_cycle_counter(kbdev); -+ WARN_ONCE(kbdev->pm.backend.l2_state != KBASE_L2_ON, -+ "L2 not powered up"); -+ WARN_ONCE((!timedwait_cycle_count_active(kbdev)), -+ "Timed out on CYCLE_COUNT_ACTIVE"); ++ (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | ++#endif /* !MALI_USE_CSF */ ++ (((dummy->clean_caches_completed && ++ dummy->clean_caches_completed_irq_enabled) ? ++ 1u : ++ 0u) ++ << 17) | ++#if MALI_USE_CSF ++ (((dummy->flush_pa_range_completed && ++ dummy->flush_pa_range_completed_irq_enabled) ? ++ 1u : ++ 0u) ++ << 20) | +#endif -+ kbase_backend_get_gpu_time_norequest(kbdev, cycle_counter, system_time, -+ ts); ++ hw_error_status.gpu_error_irq; ++ pr_debug("GPU_IRQ_STAT read %x", *value); ++ } else if (addr == GPU_CONTROL_REG(GPU_STATUS)) { ++ *value = 0; +#if !MALI_USE_CSF -+ kbase_pm_release_gpu_cycle_counter(kbdev); ++ } else if (addr == GPU_CONTROL_REG(LATEST_FLUSH)) { ++ *value = 0; +#endif -+} ++ } else if (addr == GPU_CONTROL_REG(GPU_FAULTSTATUS)) { ++ *value = hw_error_status.gpu_fault_status; ++ } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) { ++ *value = dummy->l2_config; ++ } ++#if MALI_USE_CSF ++ else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) && ++ (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) { ++ *value = 0; ++ } else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) && ++ (addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) { ++ *value = 0; ++ } ++#endif ++ else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) && ++ (addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) { ++ switch (addr) { ++ case GPU_CONTROL_REG(SHADER_PRESENT_LO): ++ case GPU_CONTROL_REG(SHADER_PRESENT_HI): ++ case GPU_CONTROL_REG(TILER_PRESENT_LO): ++ case GPU_CONTROL_REG(TILER_PRESENT_HI): ++ case GPU_CONTROL_REG(L2_PRESENT_LO): ++ case GPU_CONTROL_REG(L2_PRESENT_HI): ++ case GPU_CONTROL_REG(STACK_PRESENT_LO): ++ case GPU_CONTROL_REG(STACK_PRESENT_HI): ++ *value = get_implementation_register(addr, dummy->control_reg_values); ++ break; ++ case GPU_CONTROL_REG(SHADER_READY_LO): ++ *value = (dummy->power_on >> 0x02) & ++ get_implementation_register(GPU_CONTROL_REG(SHADER_PRESENT_LO), ++ dummy->control_reg_values); ++ break; ++ case GPU_CONTROL_REG(TILER_READY_LO): ++ *value = (dummy->power_on >> 0x01) & ++ get_implementation_register(GPU_CONTROL_REG(TILER_PRESENT_LO), ++ dummy->control_reg_values); ++ break; ++ case GPU_CONTROL_REG(L2_READY_LO): ++ *value = dummy->power_on & ++ get_implementation_register(GPU_CONTROL_REG(L2_PRESENT_LO), ++ dummy->control_reg_values); ++ break; ++ case GPU_CONTROL_REG(STACK_READY_LO): ++ *value = dummy->stack_power_on_lo & ++ get_implementation_register(GPU_CONTROL_REG(STACK_PRESENT_LO), ++ dummy->control_reg_values); ++ break; + -+unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, -+ enum kbase_timeout_selector selector) -+{ -+ /* Timeout calculation: -+ * dividing number of cycles by freq in KHz automatically gives value -+ * in milliseconds. nr_cycles will have to be multiplied by 1e3 to -+ * get result in microseconds, and 1e6 to get result in nanoseconds. -+ */ ++ case GPU_CONTROL_REG(SHADER_READY_HI): ++ case GPU_CONTROL_REG(TILER_READY_HI): ++ case GPU_CONTROL_REG(L2_READY_HI): ++ case GPU_CONTROL_REG(STACK_READY_HI): ++ *value = 0; ++ break; + -+ u64 timeout, nr_cycles = 0; -+ u64 freq_khz; ++ case GPU_CONTROL_REG(SHADER_PWRTRANS_LO): ++ case GPU_CONTROL_REG(SHADER_PWRTRANS_HI): ++ case GPU_CONTROL_REG(TILER_PWRTRANS_LO): ++ case GPU_CONTROL_REG(TILER_PWRTRANS_HI): ++ case GPU_CONTROL_REG(L2_PWRTRANS_LO): ++ case GPU_CONTROL_REG(L2_PWRTRANS_HI): ++ case GPU_CONTROL_REG(STACK_PWRTRANS_LO): ++ case GPU_CONTROL_REG(STACK_PWRTRANS_HI): ++ *value = 0; ++ break; + -+ /* Only for debug messages, safe default in case it's mis-maintained */ -+ const char *selector_str = "(unknown)"; ++ case GPU_CONTROL_REG(SHADER_PWRACTIVE_LO): ++ case GPU_CONTROL_REG(SHADER_PWRACTIVE_HI): ++ case GPU_CONTROL_REG(TILER_PWRACTIVE_LO): ++ case GPU_CONTROL_REG(TILER_PWRACTIVE_HI): ++ case GPU_CONTROL_REG(L2_PWRACTIVE_LO): ++ case GPU_CONTROL_REG(L2_PWRACTIVE_HI): ++ *value = 0; ++ break; + -+ if (!kbdev->lowest_gpu_freq_khz) { -+ dev_dbg(kbdev->dev, -+ "Lowest frequency uninitialized! Using reference frequency for scaling"); -+ freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ; -+ } else { -+ freq_khz = kbdev->lowest_gpu_freq_khz; -+ } ++#if !MALI_USE_CSF ++ case GPU_CONTROL_REG(JM_CONFIG): ++#else /* !MALI_USE_CSF */ ++ case GPU_CONTROL_REG(CSF_CONFIG): ++#endif /* !MALI_USE_CSF */ + -+ switch (selector) { -+ case MMU_AS_INACTIVE_WAIT_TIMEOUT: -+ selector_str = "MMU_AS_INACTIVE_WAIT_TIMEOUT"; -+ nr_cycles = MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES; -+ break; -+ case KBASE_TIMEOUT_SELECTOR_COUNT: -+ default: ++ case GPU_CONTROL_REG(SHADER_CONFIG): ++ case GPU_CONTROL_REG(TILER_CONFIG): ++ case GPU_CONTROL_REG(L2_MMU_CONFIG): ++ *value = 0; ++ break; ++ ++ case GPU_CONTROL_REG(COHERENCY_FEATURES): ++ *value = BIT(0) | BIT(1); /* ace_lite and ace, respectively. */ ++ break; ++ case GPU_CONTROL_REG(COHERENCY_ENABLE): ++ *value = dummy->coherency_enable; ++ break; ++ ++ case GPU_CONTROL_REG(THREAD_TLS_ALLOC): ++ *value = 0; ++ break; ++ ++ default: ++ model_error_log(KBASE_CORE, ++ "Dummy model register access: Reading unknown control reg 0x%x\n", ++ addr); ++ break; ++ } +#if !MALI_USE_CSF -+ WARN(1, "Invalid timeout selector used! Using default value"); -+ nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES; -+ break; -+ case JM_DEFAULT_JS_FREE_TIMEOUT: -+ selector_str = "JM_DEFAULT_JS_FREE_TIMEOUT"; -+ nr_cycles = JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES; -+ break; -+#else -+ /* Use Firmware timeout if invalid selection */ -+ WARN(1, -+ "Invalid timeout selector used! Using CSF Firmware timeout"); -+ fallthrough; -+ case CSF_FIRMWARE_TIMEOUT: -+ selector_str = "CSF_FIRMWARE_TIMEOUT"; -+ /* Any FW timeout cannot be longer than the FW ping interval, after which -+ * the firmware_aliveness_monitor will be triggered and may restart -+ * the GPU if the FW is unresponsive. ++ } else if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && ++ (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { ++ int slot_idx = (addr >> 7) & 0xf; ++ int sub_reg = addr & 0x7F; ++ ++ KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS); ++ switch (sub_reg) { ++ case JS_HEAD_NEXT_LO: ++ *value = (u32) ((hw_error_status.current_jc) & ++ 0xFFFFFFFF); ++ break; ++ case JS_HEAD_NEXT_HI: ++ *value = (u32) (hw_error_status.current_jc >> 32); ++ break; ++ case JS_STATUS: ++ if (hw_error_status.js_status[slot_idx]) ++ *value = hw_error_status.js_status[slot_idx]; ++ else /* 0x08 means active, 0x00 idle */ ++ *value = (dummy->slots[slot_idx].job_active) ++ << 3; ++ break; ++ case JS_COMMAND_NEXT: ++ *value = dummy->slots[slot_idx].job_queued; ++ break; ++ ++ /* The dummy model does not implement these registers ++ * avoid printing error messages + */ -+ nr_cycles = min(CSF_FIRMWARE_PING_TIMEOUT_CYCLES, CSF_FIRMWARE_TIMEOUT_CYCLES); ++ case JS_HEAD_HI: ++ case JS_HEAD_LO: ++ case JS_TAIL_HI: ++ case JS_TAIL_LO: ++ case JS_FLUSH_ID_NEXT: ++ break; + -+ if (nr_cycles == CSF_FIRMWARE_PING_TIMEOUT_CYCLES) -+ dev_warn(kbdev->dev, "Capping %s to CSF_FIRMWARE_PING_TIMEOUT\n", -+ selector_str); -+ break; -+ case CSF_PM_TIMEOUT: -+ selector_str = "CSF_PM_TIMEOUT"; -+ nr_cycles = CSF_PM_TIMEOUT_CYCLES; -+ break; -+ case CSF_GPU_RESET_TIMEOUT: -+ selector_str = "CSF_GPU_RESET_TIMEOUT"; -+ nr_cycles = CSF_GPU_RESET_TIMEOUT_CYCLES; -+ break; -+ case CSF_CSG_SUSPEND_TIMEOUT: -+ selector_str = "CSF_CSG_SUSPEND_TIMEOUT"; -+ nr_cycles = CSF_CSG_SUSPEND_TIMEOUT_CYCLES; -+ break; -+ case CSF_FIRMWARE_BOOT_TIMEOUT: -+ selector_str = "CSF_FIRMWARE_BOOT_TIMEOUT"; -+ nr_cycles = CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES; -+ break; -+ case CSF_FIRMWARE_PING_TIMEOUT: -+ selector_str = "CSF_FIRMWARE_PING_TIMEOUT"; -+ nr_cycles = CSF_FIRMWARE_PING_TIMEOUT_CYCLES; -+ break; -+ case CSF_SCHED_PROTM_PROGRESS_TIMEOUT: -+ selector_str = "CSF_SCHED_PROTM_PROGRESS_TIMEOUT"; -+ nr_cycles = kbase_csf_timeout_get(kbdev); -+ break; ++ default: ++ model_error_log(KBASE_CORE, ++ "Dummy model register access: unknown job slot reg 0x%02X being read\n", ++ sub_reg); ++ break; ++ } ++#endif /* !MALI_USE_CSF */ ++ } else if (addr == GPU_CONTROL_REG(AS_PRESENT)) { ++ *value = dummy->control_reg_values->as_present; ++#if !MALI_USE_CSF ++ } else if (addr == GPU_CONTROL_REG(JS_PRESENT)) { ++ *value = 0x7; ++#endif /* !MALI_USE_CSF */ ++ } else if (addr >= GPU_CONTROL_REG(TEXTURE_FEATURES_0) && ++ addr <= GPU_CONTROL_REG(TEXTURE_FEATURES_3)) { ++ switch (addr) { ++ case GPU_CONTROL_REG(TEXTURE_FEATURES_0): ++ *value = 0xfffff; ++ break; ++ ++ case GPU_CONTROL_REG(TEXTURE_FEATURES_1): ++ *value = 0xffff; ++ break; ++ ++ case GPU_CONTROL_REG(TEXTURE_FEATURES_2): ++ *value = 0x9f81ffff; ++ break; ++ ++ case GPU_CONTROL_REG(TEXTURE_FEATURES_3): ++ *value = 0; ++ break; ++ } ++#if !MALI_USE_CSF ++ } else if (addr >= GPU_CONTROL_REG(JS0_FEATURES) && ++ addr <= GPU_CONTROL_REG(JS15_FEATURES)) { ++ switch (addr) { ++ case GPU_CONTROL_REG(JS0_FEATURES): ++ *value = 0x20e; ++ break; ++ ++ case GPU_CONTROL_REG(JS1_FEATURES): ++ *value = 0x1fe; ++ break; ++ ++ case GPU_CONTROL_REG(JS2_FEATURES): ++ *value = 0x7e; ++ break; ++ ++ default: ++ *value = 0; ++ break; ++ } ++#endif /* !MALI_USE_CSF */ ++ } else if (addr >= GPU_CONTROL_REG(L2_FEATURES) ++ && addr <= GPU_CONTROL_REG(MMU_FEATURES)) { ++ switch (addr) { ++ case GPU_CONTROL_REG(L2_FEATURES): ++ *value = 0x6100206; ++ break; ++ ++ case GPU_CONTROL_REG(CORE_FEATURES): ++ *value = dummy->control_reg_values->core_features; ++ break; ++ ++ case GPU_CONTROL_REG(TILER_FEATURES): ++ *value = dummy->control_reg_values->tiler_features; ++ break; ++ ++ case GPU_CONTROL_REG(MEM_FEATURES): ++ /* Bit 0: Core group is coherent */ ++ *value = 0x01; ++ /* Bits 11:8: L2 slice count - 1 */ ++ *value |= (hweight64(DUMMY_IMPLEMENTATION_L2_PRESENT) - 1) << 8; ++ break; ++ ++ case GPU_CONTROL_REG(MMU_FEATURES): ++ *value = dummy->control_reg_values->mmu_features; ++ break; ++ } ++ } else if (addr >= GPU_CONTROL_REG(THREAD_MAX_THREADS) ++ && addr <= GPU_CONTROL_REG(THREAD_FEATURES)) { ++ switch (addr) { ++ case GPU_CONTROL_REG(THREAD_FEATURES): ++ *value = dummy->control_reg_values->thread_features ++ | (IMPLEMENTATION_MODEL << 30); ++ break; ++ case GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE): ++ *value = dummy->control_reg_values->thread_max_barrier_size; ++ break; ++ case GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE): ++ *value = dummy->control_reg_values->thread_max_workgroup_size; ++ break; ++ case GPU_CONTROL_REG(THREAD_MAX_THREADS): ++ *value = dummy->control_reg_values->thread_max_threads; ++ break; ++ } ++ } else if (addr >= GPU_CONTROL_REG(CYCLE_COUNT_LO) ++ && addr <= GPU_CONTROL_REG(TIMESTAMP_HI)) { ++ *value = 0; ++ } else if (addr >= MMU_AS_REG(0, AS_TRANSTAB_LO) ++ && addr <= MMU_AS_REG(15, AS_STATUS)) { ++ int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO)) ++ >> 6; ++ ++ switch (addr & 0x3F) { ++ case AS_TRANSTAB_LO: ++ *value = (u32) ++ (hw_error_status.as_transtab[mem_addr_space] & ++ 0xffffffff); ++ break; ++ ++ case AS_TRANSTAB_HI: ++ *value = (u32) ++ (hw_error_status.as_transtab[mem_addr_space] >> ++ 32); ++ break; ++ ++ case AS_STATUS: ++ *value = 0; ++ break; ++ ++ case AS_FAULTSTATUS: ++ if (mem_addr_space == hw_error_status.faulty_mmu_as) ++ *value = hw_error_status.as_faultstatus[ ++ hw_error_status.faulty_mmu_as]; ++ else ++ *value = 0; ++ break; ++ ++ case AS_LOCKADDR_LO: ++ case AS_LOCKADDR_HI: ++ case AS_MEMATTR_LO: ++ case AS_MEMATTR_HI: ++ case AS_TRANSCFG_LO: ++ case AS_TRANSCFG_HI: ++ /* Read ignored */ ++ *value = 0; ++ break; ++ ++ default: ++ model_error_log(KBASE_CORE, ++ "Dummy model register access: Reading unsupported MMU #%d register 0x%x. Returning 0\n", ++ mem_addr_space, addr); ++ *value = 0; ++ break; ++ } ++ } else if (addr == MMU_REG(MMU_IRQ_MASK)) { ++ *value = hw_error_status.mmu_irq_mask; ++ } else if (addr == MMU_REG(MMU_IRQ_RAWSTAT)) { ++ *value = hw_error_status.mmu_irq_rawstat; ++ } else if (addr == MMU_REG(MMU_IRQ_STATUS)) { ++ *value = hw_error_status.mmu_irq_mask & ++ hw_error_status.mmu_irq_rawstat; ++ } ++#if MALI_USE_CSF ++ else if (addr == IPA_CONTROL_REG(STATUS)) { ++ *value = (ipa_control_timer_enabled << 31); ++ } else if ((addr >= IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) && ++ (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI( ++ IPA_CTL_MAX_VAL_CNT_IDX)))) { ++ u32 counter_index = ++ (addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) >> 3; ++ bool is_low_word = ++ !((addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) & 7); ++ ++ *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_CSHW, ++ counter_index, is_low_word); ++ } else if ((addr >= IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) && ++ (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI( ++ IPA_CTL_MAX_VAL_CNT_IDX)))) { ++ u32 counter_index = ++ (addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) >> 3; ++ bool is_low_word = ++ !((addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) & 7); ++ ++ *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_MEMSYS, ++ counter_index, is_low_word); ++ } else if ((addr >= IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) && ++ (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI( ++ IPA_CTL_MAX_VAL_CNT_IDX)))) { ++ u32 counter_index = ++ (addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) >> 3; ++ bool is_low_word = ++ !((addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) & 7); ++ ++ *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_TILER, ++ counter_index, is_low_word); ++ } else if ((addr >= IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) && ++ (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI( ++ IPA_CTL_MAX_VAL_CNT_IDX)))) { ++ u32 counter_index = ++ (addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) >> 3; ++ bool is_low_word = ++ !((addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) & 7); ++ ++ *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER, ++ counter_index, is_low_word); ++ } +#endif ++ else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) { ++ *value = dummy->control_reg_values->gpu_features_lo; ++ } else if (addr == GPU_CONTROL_REG(GPU_FEATURES_HI)) { ++ *value = dummy->control_reg_values->gpu_features_hi; ++ } else { ++ model_error_log(KBASE_CORE, ++ "Dummy model register access: Reading unsupported register 0x%x. Returning 0\n", ++ addr); ++ *value = 0; + } + -+ timeout = div_u64(nr_cycles, freq_khz); -+ if (WARN(timeout > UINT_MAX, -+ "Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms", -+ (unsigned long long)timeout, selector_str, (unsigned long long)freq_khz)) -+ timeout = UINT_MAX; -+ return (unsigned int)timeout; ++ spin_unlock_irqrestore(&hw_error_status.access_lock, flags); ++ CSTD_UNUSED(dummy); +} -+KBASE_EXPORT_TEST_API(kbase_get_timeout_ms); + -+u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev) ++static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr_data_offset, ++ u32 usr_data_size, u32 core_count) +{ -+ u32 hi1, hi2, lo; ++ u32 sample_size; ++ u32 *usr_data = NULL; + -+ /* Read hi, lo, hi to ensure a coherent u64 */ -+ do { -+ hi1 = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(CYCLE_COUNT_HI)); -+ lo = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(CYCLE_COUNT_LO)); -+ hi2 = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(CYCLE_COUNT_HI)); -+ } while (hi1 != hi2); ++ lockdep_assert_held(&performance_counters.access_lock); + -+ return lo | (((u64) hi1) << 32); -+} ++ sample_size = ++ core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u32); + -+#if MALI_USE_CSF -+u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kbdev, u64 gpu_ts) -+{ -+ if (WARN_ON(!kbdev)) -+ return 0; ++ if ((usr_data_size >= usr_data_offset) && ++ (sample_size <= usr_data_size - usr_data_offset)) ++ usr_data = usr_data_start + (usr_data_offset / sizeof(u32)); + -+ return div64_u64(gpu_ts * kbdev->backend_time.multiplier, kbdev->backend_time.divisor) + -+ kbdev->backend_time.offset; ++ if (!usr_data) ++ model_error_log(KBASE_CORE, "Unable to set counter sample 1"); ++ else { ++ u32 loop_cnt = core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE; ++ u32 i; ++ ++ for (i = 0; i < loop_cnt; i++) { ++ counters[i] = usr_data[i]; ++ } ++ } ++ ++ return usr_data_offset + sample_size; +} + -+/** -+ * get_cpu_gpu_time() - Get current CPU and GPU timestamps. -+ * -+ * @kbdev: Kbase device. -+ * @cpu_ts: Output CPU timestamp. -+ * @gpu_ts: Output GPU timestamp. -+ * @gpu_cycle: Output GPU cycle counts. -+ */ -+static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_ts, u64 *gpu_cycle) ++static u32 set_kernel_sample_core_type(u64 *counters, ++ u64 *usr_data_start, u32 usr_data_offset, ++ u32 usr_data_size, u32 core_count) +{ -+ struct timespec64 ts; ++ u32 sample_size; ++ u64 *usr_data = NULL; + -+ kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts); ++ lockdep_assert_held(&performance_counters.access_lock); + -+ if (cpu_ts) -+ *cpu_ts = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; ++ sample_size = ++ core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u64); ++ ++ if ((usr_data_size >= usr_data_offset) && ++ (sample_size <= usr_data_size - usr_data_offset)) ++ usr_data = usr_data_start + (usr_data_offset / sizeof(u64)); ++ ++ if (!usr_data) ++ model_error_log(KBASE_CORE, "Unable to set kernel counter sample 1"); ++ else ++ memcpy(counters, usr_data, sample_size); ++ ++ return usr_data_offset + sample_size; +} -+#endif + -+int kbase_backend_time_init(struct kbase_device *kbdev) ++/* Counter values injected through ioctl are of 32 bits */ ++int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size) +{ -+#if MALI_USE_CSF -+ u64 cpu_ts = 0; -+ u64 gpu_ts = 0; -+ u64 freq; -+ u64 common_factor; -+ -+ get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL); -+ freq = arch_timer_get_cntfrq(); ++ unsigned long flags; ++ u32 *user_data; ++ u32 offset = 0; + -+ if (!freq) { -+ dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!"); ++ if (data == NULL || size == 0 || size > KBASE_DUMMY_MODEL_COUNTER_TOTAL * sizeof(u32)) + return -EINVAL; -+ } + -+ common_factor = gcd(NSEC_PER_SEC, freq); -+ -+ kbdev->backend_time.multiplier = div64_u64(NSEC_PER_SEC, common_factor); -+ kbdev->backend_time.divisor = div64_u64(freq, common_factor); ++ /* copy_from_user might sleep so can't be called from inside a spinlock ++ * allocate a temporary buffer for user data and copy to that before taking ++ * the lock ++ */ ++ user_data = kmalloc(size, GFP_KERNEL); ++ if (!user_data) ++ return -ENOMEM; + -+ if (!kbdev->backend_time.divisor) { -+ dev_warn(kbdev->dev, "CPU to GPU divisor is zero!"); ++ if (copy_from_user(user_data, data, size)) { ++ model_error_log(KBASE_CORE, "Unable to copy prfcnt data from userspace"); ++ kfree(user_data); + return -EINVAL; + } + -+ kbdev->backend_time.offset = cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier, -+ kbdev->backend_time.divisor); -+#endif ++ spin_lock_irqsave(&performance_counters.access_lock, flags); ++#if !MALI_USE_CSF ++ offset = set_user_sample_core_type(performance_counters.jm_counters, user_data, offset, ++ size, 1); ++#else ++ offset = set_user_sample_core_type(performance_counters.cshw_counters, user_data, offset, ++ size, 1); ++#endif /* !MALI_USE_CSF */ ++ offset = set_user_sample_core_type(performance_counters.tiler_counters, user_data, offset, ++ size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); ++ offset = set_user_sample_core_type(performance_counters.l2_counters, user_data, offset, ++ size, KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS); ++ offset = set_user_sample_core_type(performance_counters.shader_counters, user_data, offset, ++ size, KBASE_DUMMY_MODEL_MAX_SHADER_CORES); ++ spin_unlock_irqrestore(&performance_counters.access_lock, flags); + ++ kfree(user_data); + return 0; +} -diff --git a/drivers/gpu/arm/bifrost/build.bp b/drivers/gpu/arm/bifrost/build.bp -new file mode 100755 -index 000000000..0a61a12d9 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/build.bp -@@ -0,0 +1,280 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ + -+/* Kernel-side tests may include mali_kbase's headers. Therefore any config -+ * options which affect the sizes of any structs (e.g. adding extra members) -+ * must be included in these defaults, so that the structs are consistent in -+ * both mali_kbase and the test modules. */ -+bob_defaults { -+ name: "mali_kbase_shared_config_defaults", -+ defaults: [ -+ "kernel_defaults", -+ ], -+ mali_no_mali: { -+ kbuild_options: [ -+ "CONFIG_MALI_BIFROST_NO_MALI=y", -+ "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}", -+ "CONFIG_GPU_HWVER={{.hwver}}", -+ ], -+ }, -+ mali_platform_dt_pin_rst: { -+ kbuild_options: ["CONFIG_MALI_PLATFORM_DT_PIN_RST=y"], -+ }, -+ gpu_has_csf: { -+ kbuild_options: ["CONFIG_MALI_CSF_SUPPORT=y"], -+ }, -+ mali_devfreq: { -+ kbuild_options: ["CONFIG_MALI_BIFROST_DEVFREQ=y"], -+ }, -+ mali_midgard_dvfs: { -+ kbuild_options: ["CONFIG_MALI_BIFROST_DVFS=y"], -+ }, -+ mali_gator_support: { -+ kbuild_options: ["CONFIG_MALI_BIFROST_GATOR_SUPPORT=y"], -+ }, -+ mali_midgard_enable_trace: { -+ kbuild_options: ["CONFIG_MALI_BIFROST_ENABLE_TRACE=y"], -+ }, -+ mali_arbiter_support: { -+ kbuild_options: ["CONFIG_MALI_ARBITER_SUPPORT=y"], -+ }, -+ mali_dma_buf_map_on_demand: { -+ kbuild_options: ["CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y"], -+ }, -+ mali_dma_buf_legacy_compat: { -+ kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"], -+ }, -+ large_page_alloc_override: { -+ kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC_OVERRIDE=y"], -+ }, -+ large_page_alloc: { -+ kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC=y"], -+ }, -+ mali_memory_fully_backed: { -+ kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"], -+ }, -+ mali_corestack: { -+ kbuild_options: ["CONFIG_MALI_CORESTACK=y"], -+ }, -+ mali_real_hw: { -+ kbuild_options: ["CONFIG_MALI_REAL_HW=y"], -+ }, -+ mali_error_inject_none: { -+ kbuild_options: ["CONFIG_MALI_ERROR_INJECT_NONE=y"], -+ }, -+ mali_error_inject_track_list: { -+ kbuild_options: ["CONFIG_MALI_ERROR_INJECT_TRACK_LIST=y"], -+ }, -+ mali_error_inject_random: { -+ kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"], -+ }, -+ mali_error_inject: { -+ kbuild_options: ["CONFIG_MALI_BIFROST_ERROR_INJECT=y"], -+ }, -+ mali_debug: { -+ kbuild_options: [ -+ "CONFIG_MALI_BIFROST_DEBUG=y", -+ "MALI_KERNEL_TEST_API={{.debug}}", -+ ], -+ }, -+ mali_fence_debug: { -+ kbuild_options: ["CONFIG_MALI_BIFROST_FENCE_DEBUG=y"], -+ }, -+ mali_system_trace: { -+ kbuild_options: ["CONFIG_MALI_BIFROST_SYSTEM_TRACE=y"], -+ }, -+ buslog: { -+ kbuild_options: ["CONFIG_MALI_BUSLOG=y"], -+ }, -+ cinstr_vector_dump: { -+ kbuild_options: ["CONFIG_MALI_VECTOR_DUMP=y"], -+ }, -+ cinstr_gwt: { -+ kbuild_options: ["CONFIG_MALI_CINSTR_GWT=y"], -+ }, -+ cinstr_primary_hwc: { -+ kbuild_options: ["CONFIG_MALI_PRFCNT_SET_PRIMARY=y"], -+ }, -+ cinstr_secondary_hwc: { -+ kbuild_options: ["CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY=y"], -+ }, -+ cinstr_tertiary_hwc: { -+ kbuild_options: ["CONFIG_MALI_PRFCNT_SET_TERTIARY=y"], -+ }, -+ cinstr_hwc_set_select_via_debug_fs: { -+ kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS=y"], -+ }, -+ mali_job_dump: { -+ kbuild_options: ["CONFIG_MALI_JOB_DUMP"], -+ }, -+ mali_pwrsoft_765: { -+ kbuild_options: ["CONFIG_MALI_PWRSOFT_765=y"], -+ }, -+ mali_hw_errata_1485982_not_affected: { -+ kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y"], -+ }, -+ mali_hw_errata_1485982_use_clock_alternative: { -+ kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE=y"], -+ }, -+ platform_is_fpga: { -+ kbuild_options: ["CONFIG_MALI_IS_FPGA=y"], -+ }, -+ mali_coresight: { -+ kbuild_options: ["CONFIG_MALI_CORESIGHT=y"], -+ }, -+ kbuild_options: [ -+ "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}", -+ "MALI_CUSTOMER_RELEASE={{.release}}", -+ "MALI_UNIT_TEST={{.unit_test_code}}", -+ "MALI_USE_CSF={{.gpu_has_csf}}", -+ "MALI_JIT_PRESSURE_LIMIT_BASE={{.jit_pressure_limit_base}}", ++/* Counter values injected through kutf are of 64 bits */ ++void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size) ++{ ++ unsigned long flags; ++ u32 offset = 0; + -+ // Start of CS experimental features definitions. -+ // If there is nothing below, definition should be added as follows: -+ // "MALI_EXPERIMENTAL_FEATURE={{.experimental_feature}}" -+ // experimental_feature above comes from Mconfig in -+ // /product/base/ -+ // However, in Mconfig, experimental_feature should be looked up (for -+ // similar explanation to this one) as ALLCAPS, i.e. -+ // EXPERIMENTAL_FEATURE. -+ // -+ // IMPORTANT: MALI_CS_EXPERIMENTAL should NEVER be defined below as it -+ // is an umbrella feature that would be open for inappropriate use -+ // (catch-all for experimental CS code without separating it into -+ // different features). -+ "MALI_INCREMENTAL_RENDERING_JM={{.incremental_rendering_jm}}", -+ "MALI_BASE_CSF_PERFORMANCE_TESTS={{.base_csf_performance_tests}}", -+ ], ++ spin_lock_irqsave(&performance_counters.access_lock, flags); ++#if !MALI_USE_CSF ++ offset = set_kernel_sample_core_type(performance_counters.jm_counters, data, offset, size, ++ 1); ++#else ++ offset = set_kernel_sample_core_type(performance_counters.cshw_counters, data, offset, size, ++ 1); ++#endif /* !MALI_USE_CSF */ ++ offset = set_kernel_sample_core_type(performance_counters.tiler_counters, data, offset, ++ size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); ++ offset = set_kernel_sample_core_type(performance_counters.l2_counters, data, offset, size, ++ hweight64(performance_counters.l2_present)); ++ offset = set_kernel_sample_core_type(performance_counters.shader_counters, data, offset, ++ size, hweight64(performance_counters.shader_present)); ++ spin_unlock_irqrestore(&performance_counters.access_lock, flags); +} ++KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_kernel_sample); + -+bob_kernel_module { -+ name: "mali_kbase", -+ defaults: [ -+ "mali_kbase_shared_config_defaults", -+ ], -+ srcs: [ -+ "*.c", -+ "*.h", -+ "Kbuild", -+ "backend/gpu/*.c", -+ "backend/gpu/*.h", -+ "backend/gpu/Kbuild", -+ "context/*.c", -+ "context/*.h", -+ "context/Kbuild", -+ "hwcnt/*.c", -+ "hwcnt/*.h", -+ "hwcnt/backend/*.h", -+ "hwcnt/Kbuild", -+ "ipa/*.c", -+ "ipa/*.h", -+ "ipa/Kbuild", -+ "platform/*.h", -+ "platform/*/*.c", -+ "platform/*/*.h", -+ "platform/*/Kbuild", -+ "platform/*/*/*.c", -+ "platform/*/*/*.h", -+ "platform/*/*/Kbuild", -+ "platform/*/*/*.c", -+ "platform/*/*/*.h", -+ "platform/*/*/Kbuild", -+ "platform/*/*/*/*.c", -+ "platform/*/*/*/*.h", -+ "platform/*/*/*/Kbuild", -+ "thirdparty/*.c", -+ "thirdparty/Kbuild", -+ "debug/*.c", -+ "debug/*.h", -+ "debug/Kbuild", -+ "device/*.c", -+ "device/*.h", -+ "device/Kbuild", -+ "gpu/*.c", -+ "gpu/*.h", -+ "gpu/Kbuild", -+ "tl/*.c", -+ "tl/*.h", -+ "tl/Kbuild", -+ "mmu/*.c", -+ "mmu/*.h", -+ "mmu/Kbuild", -+ ], -+ gpu_has_job_manager: { -+ srcs: [ -+ "context/backend/*_jm.c", -+ "debug/backend/*_jm.c", -+ "debug/backend/*_jm.h", -+ "device/backend/*_jm.c", -+ "gpu/backend/*_jm.c", -+ "gpu/backend/*_jm.h", -+ "hwcnt/backend/*_jm.c", -+ "hwcnt/backend/*_jm.h", -+ "hwcnt/backend/*_jm_*.c", -+ "hwcnt/backend/*_jm_*.h", -+ "jm/*.h", -+ "tl/backend/*_jm.c", -+ "mmu/backend/*_jm.c", -+ "ipa/backend/*_jm.c", -+ "ipa/backend/*_jm.h", -+ ], -+ }, -+ gpu_has_csf: { -+ srcs: [ -+ "context/backend/*_csf.c", -+ "csf/*.c", -+ "csf/*.h", -+ "csf/Kbuild", -+ "csf/ipa_control/*.c", -+ "csf/ipa_control/*.h", -+ "csf/ipa_control/Kbuild", -+ "debug/backend/*_csf.c", -+ "debug/backend/*_csf.h", -+ "device/backend/*_csf.c", -+ "gpu/backend/*_csf.c", -+ "gpu/backend/*_csf.h", -+ "hwcnt/backend/*_csf.c", -+ "hwcnt/backend/*_csf.h", -+ "hwcnt/backend/*_csf_*.c", -+ "hwcnt/backend/*_csf_*.h", -+ "tl/backend/*_csf.c", -+ "mmu/backend/*_csf.c", -+ "ipa/backend/*_csf.c", -+ "ipa/backend/*_csf.h", -+ ], -+ }, -+ mali_arbiter_support: { -+ srcs: [ -+ "arbiter/*.c", -+ "arbiter/*.h", -+ "arbiter/Kbuild", -+ ], -+ }, -+ kbuild_options: [ -+ "CONFIG_MALI_BIFROST=m", -+ "CONFIG_MALI_KUTF=n", -+ ], -+ buslog: { -+ extra_symbols: [ -+ "bus_logger", -+ ], -+ }, ++void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, ++ u64 *l2_present, u64 *shader_present) ++{ ++ if (shader_present) ++ *shader_present = performance_counters.shader_present; ++ if (l2_present) ++ *l2_present = performance_counters.l2_present; +} -diff --git a/drivers/gpu/arm/bifrost/context/Kbuild b/drivers/gpu/arm/bifrost/context/Kbuild -new file mode 100755 -index 000000000..156b46a12 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/context/Kbuild -@@ -0,0 +1,27 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2012-2013, 2016-2017, 2020-2021 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++KBASE_EXPORT_TEST_API(gpu_model_get_dummy_prfcnt_cores); + -+bifrost_kbase-y += context/mali_kbase_context.o ++void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, ++ u64 l2_present, u64 shader_present) ++{ ++ if (WARN_ON(!l2_present || !shader_present ++ || hweight64(l2_present) > KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS ++ || hweight64(shader_present) > KBASE_DUMMY_MODEL_MAX_SHADER_CORES)) ++ return; + -+ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) -+ bifrost_kbase-y += context/backend/mali_kbase_context_csf.o -+else -+ bifrost_kbase-y += context/backend/mali_kbase_context_jm.o -+endif -diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c ++ performance_counters.l2_present = l2_present; ++ performance_counters.shader_present = shader_present; ++ ++ /* Update the GPU properties used by vinstr to calculate the counter ++ * dump buffer size. ++ */ ++ kbdev->gpu_props.props.l2_props.num_l2_slices = hweight64(l2_present); ++ kbdev->gpu_props.props.coherency_info.group[0].core_mask = shader_present; ++ kbdev->gpu_props.curr_config.l2_slices = hweight64(l2_present); ++ kbdev->gpu_props.curr_config.shader_present = shader_present; ++} ++KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_cores); ++ ++int gpu_model_control(void *model, ++ struct kbase_model_control_params *params) ++{ ++ struct dummy_model_t *dummy = (struct dummy_model_t *)model; ++ int i; ++ unsigned long flags; ++ ++ if (params->command == KBASE_MC_DISABLE_JOBS) { ++ for (i = 0; i < NUM_SLOTS; i++) ++ dummy->slots[i].job_disabled = params->value; ++ } else { ++ return -EINVAL; ++ } ++ ++ spin_lock_irqsave(&hw_error_status.access_lock, flags); ++ midgard_model_update(dummy); ++ midgard_model_get_outputs(dummy); ++ spin_unlock_irqrestore(&hw_error_status.access_lock, flags); ++ ++ return 0; ++} ++ ++/** ++ * kbase_is_gpu_removed - Has the GPU been removed. ++ * @kbdev: Kbase device pointer ++ * ++ * This function would return true if the GPU has been removed. ++ * It is stubbed here ++ * Return: Always false ++ */ ++bool kbase_is_gpu_removed(struct kbase_device *kbdev) ++{ ++ return false; ++} +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h new file mode 100644 -index 000000000..07d277b94 +index 000000000..84842291c --- /dev/null -+++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c -@@ -0,0 +1,207 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h +@@ -0,0 +1,224 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2015, 2017-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -136613,201 +136019,218 @@ index 000000000..07d277b94 + */ + +/* -+ * Base kernel context APIs for CSF GPUs ++ * Dummy Model interface ++ * ++ * Support for NO_MALI dummy Model interface. ++ * ++ * +-----------------------------------+ ++ * | Kbase read/write/IRQ | ++ * +-----------------------------------+ ++ * | Model Linux Framework | ++ * +-----------------------------------+ ++ * | Model Dummy interface definitions | ++ * +-----------------+-----------------+ ++ * | Fake R/W | Fake IRQ | ++ * +-----------------+-----------------+ + */ + -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++#ifndef _KBASE_MODEL_DUMMY_H_ ++#define _KBASE_MODEL_DUMMY_H_ + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++#include ++#include + -+void kbase_context_debugfs_init(struct kbase_context *const kctx) -+{ -+ kbase_debug_mem_view_init(kctx); -+ kbase_debug_mem_zones_init(kctx); -+ kbase_debug_mem_allocs_init(kctx); -+ kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx); -+ kbase_jit_debugfs_init(kctx); -+ kbase_csf_queue_group_debugfs_init(kctx); -+ kbase_csf_kcpu_debugfs_init(kctx); -+ kbase_csf_sync_debugfs_init(kctx); -+ kbase_csf_tiler_heap_debugfs_init(kctx); -+ kbase_csf_tiler_heap_total_debugfs_init(kctx); -+ kbase_csf_cpu_queue_debugfs_init(kctx); -+} -+KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); ++#define model_error_log(module, ...) pr_err(__VA_ARGS__) + -+void kbase_context_debugfs_term(struct kbase_context *const kctx) -+{ -+ debugfs_remove_recursive(kctx->kctx_dentry); -+} -+KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); -+#else -+void kbase_context_debugfs_init(struct kbase_context *const kctx) -+{ -+ CSTD_UNUSED(kctx); -+} -+KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); ++#define NUM_SLOTS 4 /*number of job slots */ + -+void kbase_context_debugfs_term(struct kbase_context *const kctx) -+{ -+ CSTD_UNUSED(kctx); -+} -+KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); -+#endif /* CONFIG_DEBUG_FS */ ++/*Errors Mask Codes*/ ++/* each bit of errors_mask is associated to a specific error: ++ * NON FAULT STATUS CODES: only the following are implemented since the others ++ * represent normal working statuses ++ */ ++#define KBASE_JOB_INTERRUPTED (1<<0) ++#define KBASE_JOB_STOPPED (1<<1) ++#define KBASE_JOB_TERMINATED (1<<2) + -+static void kbase_context_free(struct kbase_context *kctx) -+{ -+ kbase_timeline_post_kbase_context_destroy(kctx); ++/* JOB EXCEPTIONS: */ ++#define KBASE_JOB_CONFIG_FAULT (1<<3) ++#define KBASE_JOB_POWER_FAULT (1<<4) ++#define KBASE_JOB_READ_FAULT (1<<5) ++#define KBASE_JOB_WRITE_FAULT (1<<6) ++#define KBASE_JOB_AFFINITY_FAULT (1<<7) ++#define KBASE_JOB_BUS_FAULT (1<<8) ++#define KBASE_INSTR_INVALID_PC (1<<9) ++#define KBASE_INSTR_INVALID_ENC (1<<10) ++#define KBASE_INSTR_TYPE_MISMATCH (1<<11) ++#define KBASE_INSTR_OPERAND_FAULT (1<<12) ++#define KBASE_INSTR_TLS_FAULT (1<<13) ++#define KBASE_INSTR_BARRIER_FAULT (1<<14) ++#define KBASE_INSTR_ALIGN_FAULT (1<<15) ++#define KBASE_DATA_INVALID_FAULT (1<<16) ++#define KBASE_TILE_RANGE_FAULT (1<<17) ++#define KBASE_ADDR_RANGE_FAULT (1<<18) ++#define KBASE_OUT_OF_MEMORY (1<<19) ++#define KBASE_UNKNOWN (1<<20) + -+ vfree(kctx); -+} ++/* GPU EXCEPTIONS:*/ ++#define KBASE_DELAYED_BUS_FAULT (1<<21) ++#define KBASE_SHAREABILITY_FAULT (1<<22) + -+static const struct kbase_context_init context_init[] = { -+ { NULL, kbase_context_free, NULL }, -+ { kbase_context_common_init, kbase_context_common_term, -+ "Common context initialization failed" }, -+ { kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term, -+ "Memory pool group initialization failed" }, -+ { kbase_mem_evictable_init, kbase_mem_evictable_deinit, -+ "Memory evictable initialization failed" }, -+ { kbase_context_mmu_init, kbase_context_mmu_term, -+ "MMU initialization failed" }, -+ { kbase_context_mem_alloc_page, kbase_context_mem_pool_free, -+ "Memory alloc page failed" }, -+ { kbase_region_tracker_init, kbase_region_tracker_term, -+ "Region tracker initialization failed" }, -+ { kbase_sticky_resource_init, kbase_context_sticky_resource_term, -+ "Sticky resource initialization failed" }, -+ { kbase_jit_init, kbase_jit_term, "JIT initialization failed" }, -+ { kbase_csf_ctx_init, kbase_csf_ctx_term, -+ "CSF context initialization failed" }, -+ { kbase_context_add_to_dev_list, kbase_context_remove_from_dev_list, -+ "Adding kctx to device failed" }, ++/* MMU EXCEPTIONS:*/ ++#define KBASE_TRANSLATION_FAULT (1<<23) ++#define KBASE_PERMISSION_FAULT (1<<24) ++#define KBASE_TRANSTAB_BUS_FAULT (1<<25) ++#define KBASE_ACCESS_FLAG (1<<26) ++ ++/* generic useful bitmasks */ ++#define IS_A_JOB_ERROR ((KBASE_UNKNOWN << 1) - KBASE_JOB_INTERRUPTED) ++#define IS_A_MMU_ERROR ((KBASE_ACCESS_FLAG << 1) - KBASE_TRANSLATION_FAULT) ++#define IS_A_GPU_ERROR (KBASE_DELAYED_BUS_FAULT|KBASE_SHAREABILITY_FAULT) ++ ++/* number of possible MMU address spaces */ ++#define NUM_MMU_AS 16 /* total number of MMU address spaces as in ++ * MMU_IRQ_RAWSTAT register ++ */ ++ ++/* Forward declaration */ ++struct kbase_device; ++ ++/* ++ * the function below is used to trigger the simulation of a faulty ++ * HW condition for a specific job chain atom ++ */ ++ ++struct kbase_error_params { ++ u64 jc; ++ u32 errors_mask; ++ u32 mmu_table_level; ++ u16 faulty_mmu_as; ++ u16 padding[3]; +}; + -+static void kbase_context_term_partial( -+ struct kbase_context *kctx, -+ unsigned int i) -+{ -+ while (i-- > 0) { -+ if (context_init[i].term) -+ context_init[i].term(kctx); -+ } -+} ++enum kbase_model_control_command { ++ /* Disable/Enable job completion in the dummy model */ ++ KBASE_MC_DISABLE_JOBS ++}; + -+struct kbase_context *kbase_create_context(struct kbase_device *kbdev, -+ bool is_compat, -+ base_context_create_flags const flags, -+ unsigned long const api_version, -+ struct file *const filp) -+{ -+ struct kbase_context *kctx; -+ unsigned int i = 0; ++/* struct to control dummy model behavior */ ++struct kbase_model_control_params { ++ s32 command; ++ s32 value; ++}; + -+ if (WARN_ON(!kbdev)) -+ return NULL; ++/* struct to track faulty atoms */ ++struct kbase_error_atom { ++ struct kbase_error_params params; ++ struct kbase_error_atom *next; ++}; + -+ /* Validate flags */ -+ if (WARN_ON(flags != (flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS))) -+ return NULL; ++/*struct to track the system error state*/ ++struct error_status_t { ++ spinlock_t access_lock; + -+ /* zero-inited as lot of code assume it's zero'ed out on create */ -+ kctx = vzalloc(sizeof(*kctx)); -+ if (WARN_ON(!kctx)) -+ return NULL; ++ u32 errors_mask; ++ u32 mmu_table_level; ++ int faulty_mmu_as; + -+ kctx->kbdev = kbdev; -+ kctx->api_version = api_version; -+ kctx->filp = filp; -+ kctx->create_flags = flags; ++ u64 current_jc; ++ int current_job_slot; + -+ if (is_compat) -+ kbase_ctx_flag_set(kctx, KCTX_COMPAT); -+#if defined(CONFIG_64BIT) -+ else -+ kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA); -+#endif /* defined(CONFIG_64BIT) */ ++ u32 job_irq_rawstat; ++ u32 job_irq_status; ++ u32 js_status[NUM_SLOTS]; + -+ for (i = 0; i < ARRAY_SIZE(context_init); i++) { -+ int err = 0; ++ u32 mmu_irq_mask; ++ u32 mmu_irq_rawstat; + -+ if (context_init[i].init) -+ err = context_init[i].init(kctx); ++ u32 gpu_error_irq; ++ u32 gpu_fault_status; + -+ if (err) { -+ dev_err(kbdev->dev, "%s error = %d\n", -+ context_init[i].err_mes, err); ++ u32 as_faultstatus[NUM_MMU_AS]; ++ u32 as_command[NUM_MMU_AS]; ++ u64 as_transtab[NUM_MMU_AS]; ++}; + -+ /* kctx should be freed by kbase_context_free(). -+ * Otherwise it will result in memory leak. -+ */ -+ WARN_ON(i == 0); ++/** ++ * struct gpu_model_prfcnt_en - Performance counter enable masks ++ * @fe: Enable mask for front-end block ++ * @tiler: Enable mask for tiler block ++ * @l2: Enable mask for L2/Memory system blocks ++ * @shader: Enable mask for shader core blocks ++ */ ++struct gpu_model_prfcnt_en { ++ u32 fe; ++ u32 tiler; ++ u32 l2; ++ u32 shader; ++}; + -+ kbase_context_term_partial(kctx, i); -+ return NULL; -+ } -+ } ++void midgard_set_error(int job_slot); ++int job_atom_inject_error(struct kbase_error_params *params); ++int gpu_model_control(void *h, ++ struct kbase_model_control_params *params); + -+ return kctx; -+} -+KBASE_EXPORT_SYMBOL(kbase_create_context); ++/** ++ * gpu_model_set_dummy_prfcnt_user_sample() - Set performance counter values ++ * @data: Userspace pointer to array of counter values ++ * @size: Size of counter value array ++ * ++ * Counter values set by this function will be used for one sample dump only ++ * after which counters will be cleared back to zero. ++ * ++ * Return: 0 on success, else error code. ++ */ ++int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size); + -+void kbase_destroy_context(struct kbase_context *kctx) -+{ -+ struct kbase_device *kbdev; ++/** ++ * gpu_model_set_dummy_prfcnt_kernel_sample() - Set performance counter values ++ * @data: Pointer to array of counter values ++ * @size: Size of counter value array ++ * ++ * Counter values set by this function will be used for one sample dump only ++ * after which counters will be cleared back to zero. ++ */ ++void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size); + -+ if (WARN_ON(!kctx)) -+ return; ++void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, ++ u64 *l2_present, u64 *shader_present); ++void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, ++ u64 l2_present, u64 shader_present); + -+ kbdev = kctx->kbdev; -+ if (WARN_ON(!kbdev)) -+ return; ++/* Clear the counter values array maintained by the dummy model */ ++void gpu_model_clear_prfcnt_values(void); + -+ /* Context termination could happen whilst the system suspend of -+ * the GPU device is ongoing or has completed. It has been seen on -+ * Customer side that a hang could occur if context termination is -+ * not blocked until the resume of GPU device. -+ */ -+ while (kbase_pm_context_active_handle_suspend( -+ kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { -+ dev_info(kbdev->dev, -+ "Suspend in progress when destroying context"); -+ wait_event(kbdev->pm.resume_wait, -+ !kbase_pm_is_suspending(kbdev)); -+ } ++#if MALI_USE_CSF ++/** ++ * gpu_model_prfcnt_dump_request() - Request performance counter sample dump. ++ * @sample_buf: Pointer to KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE sized array ++ * in which to store dumped performance counter values. ++ * @enable_maps: Physical enable maps for performance counter blocks. ++ */ ++void gpu_model_prfcnt_dump_request(uint32_t *sample_buf, struct gpu_model_prfcnt_en enable_maps); + -+ kbase_mem_pool_group_mark_dying(&kctx->mem_pools); ++/** ++ * gpu_model_glb_request_job_irq() - Trigger job interrupt with global request ++ * flag set. ++ * @model: Model pointer returned by midgard_model_create(). ++ */ ++void gpu_model_glb_request_job_irq(void *model); ++#endif /* MALI_USE_CSF */ + -+ kbase_context_term_partial(kctx, ARRAY_SIZE(context_init)); ++extern struct error_status_t hw_error_status; + -+ kbase_pm_context_idle(kbdev); -+} -+KBASE_EXPORT_SYMBOL(kbase_destroy_context); -diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c ++#endif +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c new file mode 100644 -index 000000000..f49b4734e +index 000000000..f310cc74c --- /dev/null -+++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c -@@ -0,0 +1,272 @@ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c +@@ -0,0 +1,183 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2015, 2018-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -136825,267 +136248,178 @@ index 000000000..f49b4734e + * + */ + -+/* -+ * Base kernel context APIs for Job Manager GPUs -+ */ -+ -+#include -+#include +#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+#include -+#include -+#include -+#include -+ -+void kbase_context_debugfs_init(struct kbase_context *const kctx) -+{ -+ kbase_debug_mem_view_init(kctx); -+ kbase_debug_mem_zones_init(kctx); -+ kbase_debug_mem_allocs_init(kctx); -+ kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx); -+ kbase_jit_debugfs_init(kctx); -+ kbasep_jd_debugfs_ctx_init(kctx); -+} -+KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); -+ -+void kbase_context_debugfs_term(struct kbase_context *const kctx) -+{ -+ debugfs_remove_recursive(kctx->kctx_dentry); -+} -+KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); -+#else -+void kbase_context_debugfs_init(struct kbase_context *const kctx) -+{ -+ CSTD_UNUSED(kctx); -+} -+KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); -+ -+void kbase_context_debugfs_term(struct kbase_context *const kctx) -+{ -+ CSTD_UNUSED(kctx); -+} -+KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); -+#endif /* CONFIG_DEBUG_FS */ -+ -+static int kbase_context_kbase_kinstr_jm_init(struct kbase_context *kctx) -+{ -+ return kbase_kinstr_jm_init(&kctx->kinstr_jm); -+} -+ -+static void kbase_context_kbase_kinstr_jm_term(struct kbase_context *kctx) -+{ -+ kbase_kinstr_jm_term(kctx->kinstr_jm); -+} -+ -+static int kbase_context_kbase_timer_setup(struct kbase_context *kctx) -+{ -+ kbase_timer_setup(&kctx->soft_job_timeout, -+ kbasep_soft_job_timeout_worker); -+ -+ return 0; -+} -+ -+static int kbase_context_submit_check(struct kbase_context *kctx) -+{ -+ struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; -+ unsigned long irq_flags = 0; -+ -+ base_context_create_flags const flags = kctx->create_flags; ++#include ++#include "backend/gpu/mali_kbase_model_linux.h" + -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); ++static struct kbase_error_atom *error_track_list; + -+ /* Translate the flags */ -+ if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) -+ kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); ++#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM + -+ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++/** Kernel 6.1.0 has dropped prandom_u32(), use get_random_u32() */ ++#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) ++#define prandom_u32 get_random_u32 ++#endif + -+ return 0; -+} ++/*following error probability are set quite high in order to stress the driver*/ ++static unsigned int error_probability = 50; /* to be set between 0 and 100 */ ++/* probability to have multiple error give that there is an error */ ++static unsigned int multiple_error_probability = 50; + -+static void kbase_context_flush_jobs(struct kbase_context *kctx) -+{ -+ kbase_jd_zap_context(kctx); -+ flush_workqueue(kctx->jctx.job_done_wq); -+} ++/* all the error conditions supported by the model */ ++#define TOTAL_FAULTS 27 ++/* maximum number of levels in the MMU translation table tree */ ++#define MAX_MMU_TABLE_LEVEL 4 ++/* worst case scenario is <1 MMU fault + 1 job fault + 2 GPU faults> */ ++#define MAX_CONCURRENT_FAULTS 3 + +/** -+ * kbase_context_free - Free kcontext at its destruction -+ * -+ * @kctx: kcontext to be freed ++ * gpu_generate_error - Generate GPU error + */ -+static void kbase_context_free(struct kbase_context *kctx) ++static void gpu_generate_error(void) +{ -+ kbase_timeline_post_kbase_context_destroy(kctx); ++ unsigned int errors_num = 0; + -+ vfree(kctx); -+} ++ /*is there at least one error? */ ++ if ((prandom_u32() % 100) < error_probability) { ++ /* pick up a faulty mmu address space */ ++ hw_error_status.faulty_mmu_as = prandom_u32() % NUM_MMU_AS; ++ /* pick up an mmu table level */ ++ hw_error_status.mmu_table_level = ++ 1 + (prandom_u32() % MAX_MMU_TABLE_LEVEL); ++ hw_error_status.errors_mask = ++ (u32)(1 << (prandom_u32() % TOTAL_FAULTS)); + -+static const struct kbase_context_init context_init[] = { -+ { NULL, kbase_context_free, NULL }, -+ { kbase_context_common_init, kbase_context_common_term, -+ "Common context initialization failed" }, -+ { kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term, -+ "Memory pool group initialization failed" }, -+ { kbase_mem_evictable_init, kbase_mem_evictable_deinit, -+ "Memory evictable initialization failed" }, -+ { kbase_context_mmu_init, kbase_context_mmu_term, -+ "MMU initialization failed" }, -+ { kbase_context_mem_alloc_page, kbase_context_mem_pool_free, -+ "Memory alloc page failed" }, -+ { kbase_region_tracker_init, kbase_region_tracker_term, -+ "Region tracker initialization failed" }, -+ { kbase_sticky_resource_init, kbase_context_sticky_resource_term, -+ "Sticky resource initialization failed" }, -+ { kbase_jit_init, kbase_jit_term, "JIT initialization failed" }, -+ { kbase_context_kbase_kinstr_jm_init, -+ kbase_context_kbase_kinstr_jm_term, -+ "JM instrumentation initialization failed" }, -+ { kbase_context_kbase_timer_setup, NULL, -+ "Timers initialization failed" }, -+ { kbase_event_init, kbase_event_cleanup, -+ "Event initialization failed" }, -+ { kbasep_js_kctx_init, kbasep_js_kctx_term, -+ "JS kctx initialization failed" }, -+ { kbase_jd_init, kbase_jd_exit, "JD initialization failed" }, -+ { kbase_context_submit_check, NULL, "Enabling job submission failed" }, -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ { kbase_debug_job_fault_context_init, -+ kbase_debug_job_fault_context_term, -+ "Job fault context initialization failed" }, -+#endif -+ { NULL, kbase_context_flush_jobs, NULL }, -+ { kbase_context_add_to_dev_list, kbase_context_remove_from_dev_list, -+ "Adding kctx to device failed" }, -+ { kbasep_platform_context_init, kbasep_platform_context_term, -+ "Platform callback for kctx initialization failed" }, -+}; ++ /*is there also one or more errors? */ ++ if ((prandom_u32() % 100) < multiple_error_probability) { ++ errors_num = 1 + (prandom_u32() % ++ (MAX_CONCURRENT_FAULTS - 1)); ++ while (errors_num-- > 0) { ++ u32 temp_mask; + -+static void kbase_context_term_partial( -+ struct kbase_context *kctx, -+ unsigned int i) -+{ -+ while (i-- > 0) { -+ if (context_init[i].term) -+ context_init[i].term(kctx); ++ temp_mask = (u32)( ++ 1 << (prandom_u32() % TOTAL_FAULTS)); ++ /* below we check that no bit of the same error ++ * type is set again in the error mask ++ */ ++ if ((temp_mask & IS_A_JOB_ERROR) && ++ (hw_error_status.errors_mask & ++ IS_A_JOB_ERROR)) { ++ errors_num++; ++ continue; ++ } ++ if ((temp_mask & IS_A_MMU_ERROR) && ++ (hw_error_status.errors_mask & ++ IS_A_MMU_ERROR)) { ++ errors_num++; ++ continue; ++ } ++ if ((temp_mask & IS_A_GPU_ERROR) && ++ (hw_error_status.errors_mask & ++ IS_A_GPU_ERROR)) { ++ errors_num++; ++ continue; ++ } ++ /* this error mask is already set */ ++ if ((hw_error_status.errors_mask | temp_mask) == ++ hw_error_status.errors_mask) { ++ errors_num++; ++ continue; ++ } ++ hw_error_status.errors_mask |= temp_mask; ++ } ++ } + } +} ++#endif + -+struct kbase_context *kbase_create_context(struct kbase_device *kbdev, -+ bool is_compat, -+ base_context_create_flags const flags, -+ unsigned long const api_version, -+ struct file *const filp) ++int job_atom_inject_error(struct kbase_error_params *params) +{ -+ struct kbase_context *kctx; -+ unsigned int i = 0; -+ -+ if (WARN_ON(!kbdev)) -+ return NULL; -+ -+ /* Validate flags */ -+ if (WARN_ON(flags != (flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS))) -+ return NULL; -+ -+ /* zero-inited as lot of code assume it's zero'ed out on create */ -+ kctx = vzalloc(sizeof(*kctx)); -+ if (WARN_ON(!kctx)) -+ return NULL; -+ -+ kctx->kbdev = kbdev; -+ kctx->api_version = api_version; -+ kctx->filp = filp; -+ kctx->create_flags = flags; ++ struct kbase_error_atom *new_elem; + -+ if (is_compat) -+ kbase_ctx_flag_set(kctx, KCTX_COMPAT); -+#if defined(CONFIG_64BIT) -+ else -+ kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA); -+#endif /* defined(CONFIG_64BIT) */ ++ KBASE_DEBUG_ASSERT(params); + -+ for (i = 0; i < ARRAY_SIZE(context_init); i++) { -+ int err = 0; ++ new_elem = kzalloc(sizeof(*new_elem), GFP_KERNEL); + -+ if (context_init[i].init) -+ err = context_init[i].init(kctx); ++ if (!new_elem) { ++ model_error_log(KBASE_CORE, ++ "\njob_atom_inject_error: kzalloc failed for new_elem\n" ++ ); ++ return -ENOMEM; ++ } ++ new_elem->params.jc = params->jc; ++ new_elem->params.errors_mask = params->errors_mask; ++ new_elem->params.mmu_table_level = params->mmu_table_level; ++ new_elem->params.faulty_mmu_as = params->faulty_mmu_as; + -+ if (err) { -+ dev_err(kbdev->dev, "%s error = %d\n", -+ context_init[i].err_mes, err); ++ /*circular list below */ ++ if (error_track_list == NULL) { /*no elements */ ++ error_track_list = new_elem; ++ new_elem->next = error_track_list; ++ } else { ++ struct kbase_error_atom *walker = error_track_list; + -+ /* kctx should be freed by kbase_context_free(). -+ * Otherwise it will result in memory leak. -+ */ -+ WARN_ON(i == 0); ++ while (walker->next != error_track_list) ++ walker = walker->next; + -+ kbase_context_term_partial(kctx, i); -+ return NULL; -+ } ++ new_elem->next = error_track_list; ++ walker->next = new_elem; + } -+ -+ return kctx; ++ return 0; +} -+KBASE_EXPORT_SYMBOL(kbase_create_context); + -+void kbase_destroy_context(struct kbase_context *kctx) ++void midgard_set_error(int job_slot) +{ -+ struct kbase_device *kbdev; ++#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM ++ gpu_generate_error(); ++#else ++ struct kbase_error_atom *walker, *auxiliar; + -+ if (WARN_ON(!kctx)) -+ return; ++ if (error_track_list != NULL) { ++ walker = error_track_list->next; ++ auxiliar = error_track_list; ++ do { ++ if (walker->params.jc == hw_error_status.current_jc) { ++ /* found a faulty atom matching with the ++ * current one ++ */ ++ hw_error_status.errors_mask = ++ walker->params.errors_mask; ++ hw_error_status.mmu_table_level = ++ walker->params.mmu_table_level; ++ hw_error_status.faulty_mmu_as = ++ walker->params.faulty_mmu_as; ++ hw_error_status.current_job_slot = job_slot; + -+ kbdev = kctx->kbdev; -+ if (WARN_ON(!kbdev)) -+ return; ++ if (walker->next == walker) { ++ /* only one element */ ++ kfree(error_track_list); ++ error_track_list = NULL; ++ } else { ++ auxiliar->next = walker->next; ++ if (walker == error_track_list) ++ error_track_list = walker->next; + -+ /* Context termination could happen whilst the system suspend of -+ * the GPU device is ongoing or has completed. It has been seen on -+ * Customer side that a hang could occur if context termination is -+ * not blocked until the resume of GPU device. -+ */ -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ atomic_inc(&kbdev->pm.gpu_users_waiting); -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ -+ while (kbase_pm_context_active_handle_suspend( -+ kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { -+ dev_dbg(kbdev->dev, -+ "Suspend in progress when destroying context"); -+ wait_event(kbdev->pm.resume_wait, -+ !kbase_pm_is_suspending(kbdev)); ++ kfree(walker); ++ } ++ break; ++ } ++ auxiliar = walker; ++ walker = walker->next; ++ } while (auxiliar->next != error_track_list); + } -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ atomic_dec(&kbdev->pm.gpu_users_waiting); -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ -+ -+ kbase_mem_pool_group_mark_dying(&kctx->mem_pools); -+ -+ kbase_context_term_partial(kctx, ARRAY_SIZE(context_init)); -+ -+ kbase_pm_context_idle(kbdev); ++#endif /* CONFIG_MALI_ERROR_INJECT_RANDOM */ +} -+KBASE_EXPORT_SYMBOL(kbase_destroy_context); -diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c new file mode 100644 -index 000000000..88be6c2e7 +index 000000000..e90e4df2f --- /dev/null -+++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c -@@ -0,0 +1,392 @@ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c +@@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010, 2012-2015, 2017-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -137104,386 +136438,238 @@ index 000000000..88be6c2e7 + */ + +/* -+ * Base kernel context APIs ++ * Model Linux Framework interfaces. + */ -+#include -+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE -+#include -+#else -+#include -+#endif + +#include +#include -+#include -+#include -+#include -+#include -+#include -+#include + -+/** -+ * find_process_node - Used to traverse the process rb_tree to find if -+ * process exists already in process rb_tree. -+ * -+ * @node: Pointer to root node to start search. -+ * @tgid: Thread group PID to search for. -+ * -+ * Return: Pointer to kbase_process if exists otherwise NULL. -+ */ -+static struct kbase_process *find_process_node(struct rb_node *node, pid_t tgid) ++#include "backend/gpu/mali_kbase_model_linux.h" ++#include "device/mali_kbase_device.h" ++#include "mali_kbase_irq_internal.h" ++ ++#include ++ ++struct model_irq_data { ++ struct kbase_device *kbdev; ++ struct work_struct work; ++}; ++ ++static void serve_job_irq(struct work_struct *work) +{ -+ struct kbase_process *kprcs = NULL; ++ struct model_irq_data *data = container_of(work, struct model_irq_data, ++ work); ++ struct kbase_device *kbdev = data->kbdev; + -+ /* Check if the kctx creation request is from a existing process.*/ -+ while (node) { -+ struct kbase_process *prcs_node = -+ rb_entry(node, struct kbase_process, kprcs_node); -+ if (prcs_node->tgid == tgid) { -+ kprcs = prcs_node; -+ break; -+ } ++ /* Make sure no worker is already serving this IRQ */ ++ while (atomic_cmpxchg(&kbdev->serving_job_irq, 1, 0) == 1) { ++ u32 val; + -+ if (tgid < prcs_node->tgid) -+ node = node->rb_left; -+ else -+ node = node->rb_right; ++ while ((val = kbase_reg_read(kbdev, ++ JOB_CONTROL_REG(JOB_IRQ_STATUS)))) { ++ unsigned long flags; ++ ++ /* Handle the IRQ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++#if MALI_USE_CSF ++ kbase_csf_interrupt(kbdev, val); ++#else ++ kbase_job_done(kbdev, val); ++#endif ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } + } + -+ return kprcs; ++ kmem_cache_free(kbdev->irq_slab, data); +} + -+/** -+ * kbase_insert_kctx_to_process - Initialise kbase process context. -+ * -+ * @kctx: Pointer to kbase context. -+ * -+ * Here we initialise per process rb_tree managed by kbase_device. -+ * We maintain a rb_tree of each unique process that gets created. -+ * and Each process maintains a list of kbase context. -+ * This setup is currently used by kernel trace functionality -+ * to trace and visualise gpu memory consumption. -+ * -+ * Return: 0 on success and error number on failure. -+ */ -+static int kbase_insert_kctx_to_process(struct kbase_context *kctx) ++static void serve_gpu_irq(struct work_struct *work) +{ -+ struct rb_root *const prcs_root = &kctx->kbdev->process_root; -+ const pid_t tgid = kctx->tgid; -+ struct kbase_process *kprcs = NULL; ++ struct model_irq_data *data = container_of(work, struct model_irq_data, ++ work); ++ struct kbase_device *kbdev = data->kbdev; + -+ lockdep_assert_held(&kctx->kbdev->kctx_list_lock); ++ /* Make sure no worker is already serving this IRQ */ ++ while (atomic_cmpxchg(&kbdev->serving_gpu_irq, 1, 0) == 1) { ++ u32 val; + -+ kprcs = find_process_node(prcs_root->rb_node, tgid); ++ while ((val = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_IRQ_STATUS)))) { ++ /* Handle the IRQ */ ++ kbase_gpu_interrupt(kbdev, val); ++ } ++ } + -+ /* if the kctx is from new process then create a new kbase_process -+ * and add it to the &kbase_device->rb_tree -+ */ -+ if (!kprcs) { -+ struct rb_node **new = &prcs_root->rb_node, *parent = NULL; -+ -+ kprcs = kzalloc(sizeof(*kprcs), GFP_KERNEL); -+ if (kprcs == NULL) -+ return -ENOMEM; -+ kprcs->tgid = tgid; -+ INIT_LIST_HEAD(&kprcs->kctx_list); -+ kprcs->dma_buf_root = RB_ROOT; -+ kprcs->total_gpu_pages = 0; -+ -+ while (*new) { -+ struct kbase_process *prcs_node; -+ -+ parent = *new; -+ prcs_node = rb_entry(parent, struct kbase_process, -+ kprcs_node); -+ if (tgid < prcs_node->tgid) -+ new = &(*new)->rb_left; -+ else -+ new = &(*new)->rb_right; -+ } -+ rb_link_node(&kprcs->kprcs_node, parent, new); -+ rb_insert_color(&kprcs->kprcs_node, prcs_root); -+ } -+ -+ kctx->kprcs = kprcs; -+ list_add(&kctx->kprcs_link, &kprcs->kctx_list); -+ -+ return 0; ++ kmem_cache_free(kbdev->irq_slab, data); +} + -+int kbase_context_common_init(struct kbase_context *kctx) ++static void serve_mmu_irq(struct work_struct *work) +{ -+ const unsigned long cookies_mask = KBASE_COOKIE_MASK; -+ int err = 0; -+ -+ /* creating a context is considered a disjoint event */ -+ kbase_disjoint_event(kctx->kbdev); -+ -+ kctx->process_mm = NULL; -+ kctx->task = NULL; -+ atomic_set(&kctx->nonmapped_pages, 0); -+ atomic_set(&kctx->permanent_mapped_pages, 0); -+ kctx->tgid = task_tgid_vnr(current); -+ kctx->pid = task_pid_vnr(current); -+ -+ /* Check if this is a Userspace created context */ -+ if (likely(kctx->filp)) { -+ struct pid *pid_struct; -+ -+ rcu_read_lock(); -+ pid_struct = find_get_pid(kctx->tgid); -+ if (likely(pid_struct)) { -+ struct task_struct *task = pid_task(pid_struct, PIDTYPE_PID); -+ -+ if (likely(task)) { -+ /* Take a reference on the task to avoid slow lookup -+ * later on from the page allocation loop. -+ */ -+ get_task_struct(task); -+ kctx->task = task; -+ } else { -+ dev_err(kctx->kbdev->dev, -+ "Failed to get task pointer for %s/%d", -+ current->comm, kctx->pid); -+ err = -ESRCH; -+ } -+ -+ put_pid(pid_struct); -+ } else { -+ dev_err(kctx->kbdev->dev, -+ "Failed to get pid pointer for %s/%d", -+ current->comm, kctx->pid); -+ err = -ESRCH; -+ } -+ rcu_read_unlock(); -+ -+ if (unlikely(err)) -+ return err; -+ -+ kbase_mem_mmgrab(); -+ kctx->process_mm = current->mm; -+ } -+ -+ atomic_set(&kctx->used_pages, 0); -+ -+ mutex_init(&kctx->reg_lock); -+ -+ spin_lock_init(&kctx->mem_partials_lock); -+ INIT_LIST_HEAD(&kctx->mem_partials); -+ -+ spin_lock_init(&kctx->waiting_soft_jobs_lock); -+ INIT_LIST_HEAD(&kctx->waiting_soft_jobs); -+ -+ init_waitqueue_head(&kctx->event_queue); -+ atomic_set(&kctx->event_count, 0); -+ -+#if !MALI_USE_CSF -+ atomic_set(&kctx->event_closed, false); -+#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) -+ atomic_set(&kctx->jctx.work_id, 0); -+#endif -+#endif -+ -+#if MALI_USE_CSF -+ atomic64_set(&kctx->num_fixable_allocs, 0); -+ atomic64_set(&kctx->num_fixed_allocs, 0); -+#endif -+ -+ kbase_gpu_vm_lock(kctx); -+ bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG); -+ kbase_gpu_vm_unlock(kctx); ++ struct model_irq_data *data = container_of(work, struct model_irq_data, ++ work); ++ struct kbase_device *kbdev = data->kbdev; + -+ kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1; ++ /* Make sure no worker is already serving this IRQ */ ++ if (atomic_cmpxchg(&kbdev->serving_mmu_irq, 1, 0) == 1) { ++ u32 val; + -+ mutex_lock(&kctx->kbdev->kctx_list_lock); -+ err = kbase_insert_kctx_to_process(kctx); -+ mutex_unlock(&kctx->kbdev->kctx_list_lock); -+ if (err) { -+ dev_err(kctx->kbdev->dev, -+ "(err:%d) failed to insert kctx to kbase_process", err); -+ if (likely(kctx->filp)) { -+ mmdrop(kctx->process_mm); -+ put_task_struct(kctx->task); ++ while ((val = kbase_reg_read(kbdev, ++ MMU_REG(MMU_IRQ_STATUS)))) { ++ /* Handle the IRQ */ ++ kbase_mmu_interrupt(kbdev, val); + } + } + -+ return err; ++ kmem_cache_free(kbdev->irq_slab, data); +} + -+int kbase_context_add_to_dev_list(struct kbase_context *kctx) ++void gpu_device_raise_irq(void *model, u32 irq) +{ -+ if (WARN_ON(!kctx)) -+ return -EINVAL; ++ struct model_irq_data *data; ++ struct kbase_device *kbdev = gpu_device_get_data(model); + -+ if (WARN_ON(!kctx->kbdev)) -+ return -EINVAL; ++ KBASE_DEBUG_ASSERT(kbdev); + -+ mutex_lock(&kctx->kbdev->kctx_list_lock); -+ list_add(&kctx->kctx_list_link, &kctx->kbdev->kctx_list); -+ mutex_unlock(&kctx->kbdev->kctx_list_lock); ++ data = kmem_cache_alloc(kbdev->irq_slab, GFP_ATOMIC); ++ if (data == NULL) ++ return; + -+ kbase_timeline_post_kbase_context_create(kctx); ++ data->kbdev = kbdev; + -+ return 0; ++ switch (irq) { ++ case MODEL_LINUX_JOB_IRQ: ++ INIT_WORK(&data->work, serve_job_irq); ++ atomic_set(&kbdev->serving_job_irq, 1); ++ break; ++ case MODEL_LINUX_GPU_IRQ: ++ INIT_WORK(&data->work, serve_gpu_irq); ++ atomic_set(&kbdev->serving_gpu_irq, 1); ++ break; ++ case MODEL_LINUX_MMU_IRQ: ++ INIT_WORK(&data->work, serve_mmu_irq); ++ atomic_set(&kbdev->serving_mmu_irq, 1); ++ break; ++ default: ++ dev_warn(kbdev->dev, "Unknown IRQ"); ++ kmem_cache_free(kbdev->irq_slab, data); ++ data = NULL; ++ break; ++ } ++ ++ if (data != NULL) ++ queue_work(kbdev->irq_workq, &data->work); +} + -+void kbase_context_remove_from_dev_list(struct kbase_context *kctx) ++void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) +{ -+ if (WARN_ON(!kctx)) -+ return; -+ -+ if (WARN_ON(!kctx->kbdev)) -+ return; -+ -+ kbase_timeline_pre_kbase_context_destroy(kctx); ++ unsigned long flags; + -+ mutex_lock(&kctx->kbdev->kctx_list_lock); -+ list_del_init(&kctx->kctx_list_link); -+ mutex_unlock(&kctx->kbdev->kctx_list_lock); ++ spin_lock_irqsave(&kbdev->reg_op_lock, flags); ++ midgard_model_write_reg(kbdev->model, offset, value); ++ spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); +} + -+/** -+ * kbase_remove_kctx_from_process - remove a terminating context from -+ * the process list. -+ * -+ * @kctx: Pointer to kbase context. -+ * -+ * Remove the tracking of context from the list of contexts maintained under -+ * kbase process and if the list if empty then there no outstanding contexts -+ * we can remove the process node as well. -+ */ ++KBASE_EXPORT_TEST_API(kbase_reg_write); + -+static void kbase_remove_kctx_from_process(struct kbase_context *kctx) ++u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) +{ -+ struct kbase_process *kprcs = kctx->kprcs; ++ unsigned long flags; ++ u32 val; + -+ lockdep_assert_held(&kctx->kbdev->kctx_list_lock); -+ list_del(&kctx->kprcs_link); ++ spin_lock_irqsave(&kbdev->reg_op_lock, flags); ++ midgard_model_read_reg(kbdev->model, offset, &val); ++ spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); + -+ /* if there are no outstanding contexts in current process node, -+ * we can remove it from the process rb_tree. -+ */ -+ if (list_empty(&kprcs->kctx_list)) { -+ rb_erase(&kprcs->kprcs_node, &kctx->kbdev->process_root); -+ /* Add checks, so that the terminating process Should not -+ * hold any gpu_memory. -+ */ -+ spin_lock(&kctx->kbdev->gpu_mem_usage_lock); -+ WARN_ON(kprcs->total_gpu_pages); -+ spin_unlock(&kctx->kbdev->gpu_mem_usage_lock); -+ WARN_ON(!RB_EMPTY_ROOT(&kprcs->dma_buf_root)); -+ kfree(kprcs); -+ } ++ return val; +} ++KBASE_EXPORT_TEST_API(kbase_reg_read); + -+void kbase_context_common_term(struct kbase_context *kctx) ++int kbase_install_interrupts(struct kbase_device *kbdev) +{ -+ int pages; -+ -+ pages = atomic_read(&kctx->used_pages); -+ if (pages != 0) -+ dev_warn(kctx->kbdev->dev, -+ "%s: %d pages in use!\n", __func__, pages); ++ KBASE_DEBUG_ASSERT(kbdev); + -+ WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); ++ atomic_set(&kbdev->serving_job_irq, 0); ++ atomic_set(&kbdev->serving_gpu_irq, 0); ++ atomic_set(&kbdev->serving_mmu_irq, 0); + -+ mutex_lock(&kctx->kbdev->kctx_list_lock); -+ kbase_remove_kctx_from_process(kctx); -+ mutex_unlock(&kctx->kbdev->kctx_list_lock); ++ kbdev->irq_workq = alloc_ordered_workqueue("dummy irq queue", 0); ++ if (kbdev->irq_workq == NULL) ++ return -ENOMEM; + -+ if (likely(kctx->filp)) { -+ mmdrop(kctx->process_mm); -+ put_task_struct(kctx->task); ++ kbdev->irq_slab = kmem_cache_create("dummy_irq_slab", ++ sizeof(struct model_irq_data), 0, 0, NULL); ++ if (kbdev->irq_slab == NULL) { ++ destroy_workqueue(kbdev->irq_workq); ++ return -ENOMEM; + } + -+ KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u); ++ return 0; +} + -+int kbase_context_mem_pool_group_init(struct kbase_context *kctx) ++void kbase_release_interrupts(struct kbase_device *kbdev) +{ -+ return kbase_mem_pool_group_init(&kctx->mem_pools, kctx->kbdev, -+ &kctx->kbdev->mem_pool_defaults, &kctx->kbdev->mem_pools); ++ KBASE_DEBUG_ASSERT(kbdev); ++ destroy_workqueue(kbdev->irq_workq); ++ kmem_cache_destroy(kbdev->irq_slab); +} + -+void kbase_context_mem_pool_group_term(struct kbase_context *kctx) ++void kbase_synchronize_irqs(struct kbase_device *kbdev) +{ -+ kbase_mem_pool_group_term(&kctx->mem_pools); ++ KBASE_DEBUG_ASSERT(kbdev); ++ flush_workqueue(kbdev->irq_workq); +} + -+int kbase_context_mmu_init(struct kbase_context *kctx) ++KBASE_EXPORT_TEST_API(kbase_synchronize_irqs); ++ ++int kbase_set_custom_irq_handler(struct kbase_device *kbdev, ++ irq_handler_t custom_handler, ++ int irq_type) +{ -+ return kbase_mmu_init( -+ kctx->kbdev, &kctx->mmu, kctx, -+ kbase_context_mmu_group_id_get(kctx->create_flags)); ++ return 0; +} + -+void kbase_context_mmu_term(struct kbase_context *kctx) ++KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler); ++ ++irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val) +{ -+ kbase_mmu_term(kctx->kbdev, &kctx->mmu); ++ if (!val) ++ return IRQ_NONE; ++ ++ return IRQ_HANDLED; +} + -+int kbase_context_mem_alloc_page(struct kbase_context *kctx) -+{ -+ struct page *p; ++KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler); + -+ p = kbase_mem_alloc_page(&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK]); -+ if (!p) ++int kbase_gpu_device_create(struct kbase_device *kbdev) ++{ ++ kbdev->model = midgard_model_create(kbdev); ++ if (kbdev->model == NULL) + return -ENOMEM; + -+ kctx->aliasing_sink_page = as_tagged(page_to_phys(p)); ++ spin_lock_init(&kbdev->reg_op_lock); + + return 0; +} + -+void kbase_context_mem_pool_free(struct kbase_context *kctx) -+{ -+ /* drop the aliasing sink page now that it can't be mapped anymore */ -+ kbase_mem_pool_free( -+ &kctx->mem_pools.small[KBASE_MEM_GROUP_SINK], -+ as_page(kctx->aliasing_sink_page), -+ false); -+} -+ -+void kbase_context_sticky_resource_term(struct kbase_context *kctx) ++/** ++ * kbase_gpu_device_destroy - Destroy GPU device ++ * ++ * @kbdev: kbase device ++ */ ++void kbase_gpu_device_destroy(struct kbase_device *kbdev) +{ -+ unsigned long pending_regions_to_clean; -+ -+ kbase_gpu_vm_lock(kctx); -+ kbase_sticky_resource_term(kctx); -+ -+ /* free pending region setups */ -+ pending_regions_to_clean = KBASE_COOKIE_MASK; -+ bitmap_andnot(&pending_regions_to_clean, &pending_regions_to_clean, -+ kctx->cookies, BITS_PER_LONG); -+ while (pending_regions_to_clean) { -+ unsigned int cookie = find_first_bit(&pending_regions_to_clean, -+ BITS_PER_LONG); -+ -+ if (!WARN_ON(!kctx->pending_regions[cookie])) { -+ dev_dbg(kctx->kbdev->dev, "Freeing pending unmapped region\n"); -+ kbase_mem_phy_alloc_put( -+ kctx->pending_regions[cookie]->cpu_alloc); -+ kbase_mem_phy_alloc_put( -+ kctx->pending_regions[cookie]->gpu_alloc); -+ kfree(kctx->pending_regions[cookie]); -+ -+ kctx->pending_regions[cookie] = NULL; -+ } -+ -+ bitmap_clear(&pending_regions_to_clean, cookie, 1); -+ } -+ kbase_gpu_vm_unlock(kctx); ++ midgard_model_destroy(kbdev->model); +} -diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context.h b/drivers/gpu/arm/bifrost/context/mali_kbase_context.h +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h new file mode 100644 -index 000000000..7c90e2708 +index 000000000..8f09afe3d --- /dev/null -+++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.h -@@ -0,0 +1,141 @@ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h +@@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -137501,136 +136687,146 @@ index 000000000..7c90e2708 + * + */ + -+#ifndef _KBASE_CONTEXT_H_ -+#define _KBASE_CONTEXT_H_ ++/* ++ * Model Linux Framework interfaces. ++ * ++ * This framework is used to provide generic Kbase Models interfaces. ++ * Note: Backends cannot be used together; the selection is done at build time. ++ * ++ * - Without Model Linux Framework: ++ * +-----------------------------+ ++ * | Kbase read/write/IRQ | ++ * +-----------------------------+ ++ * | HW interface definitions | ++ * +-----------------------------+ ++ * ++ * - With Model Linux Framework: ++ * +-----------------------------+ ++ * | Kbase read/write/IRQ | ++ * +-----------------------------+ ++ * | Model Linux Framework | ++ * +-----------------------------+ ++ * | Model interface definitions | ++ * +-----------------------------+ ++ */ + -+#include ++#ifndef _KBASE_MODEL_LINUX_H_ ++#define _KBASE_MODEL_LINUX_H_ ++ ++/* ++ * Include Model definitions ++ */ ++ ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++#include ++#endif /* IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ + ++#if !IS_ENABLED(CONFIG_MALI_REAL_HW) +/** -+ * kbase_context_debugfs_init - Initialize the kctx platform -+ * specific debugfs ++ * kbase_gpu_device_create() - Generic create function. + * -+ * @kctx: kbase context ++ * @kbdev: Kbase device. + * -+ * This initializes some debugfs interfaces specific to the platform the source -+ * is compiled for. ++ * Specific model hook is implemented by midgard_model_create() ++ * ++ * Return: 0 on success, error code otherwise. + */ -+void kbase_context_debugfs_init(struct kbase_context *const kctx); ++int kbase_gpu_device_create(struct kbase_device *kbdev); + +/** -+ * kbase_context_debugfs_term - Terminate the kctx platform -+ * specific debugfs ++ * kbase_gpu_device_destroy() - Generic create function. + * -+ * @kctx: kbase context ++ * @kbdev: Kbase device. + * -+ * This terminates some debugfs interfaces specific to the platform the source -+ * is compiled for. ++ * Specific model hook is implemented by midgard_model_destroy() + */ -+void kbase_context_debugfs_term(struct kbase_context *const kctx); ++void kbase_gpu_device_destroy(struct kbase_device *kbdev); + +/** -+ * kbase_create_context() - Create a kernel base context. ++ * midgard_model_create() - Private create function. + * -+ * @kbdev: Object representing an instance of GPU platform device, -+ * allocated from the probe method of the Mali driver. -+ * @is_compat: Force creation of a 32-bit context -+ * @flags: Flags to set, which shall be any combination of -+ * BASEP_CONTEXT_CREATE_KERNEL_FLAGS. -+ * @api_version: Application program interface version, as encoded in -+ * a single integer by the KBASE_API_VERSION macro. -+ * @filp: Pointer to the struct file corresponding to device file -+ * /dev/malixx instance, passed to the file's open method. ++ * @kbdev: Kbase device. + * -+ * Up to one context can be created for each client that opens the device file -+ * /dev/malixx. Context creation is deferred until a special ioctl() system call -+ * is made on the device file. Each context has its own GPU address space. ++ * This hook is specific to the model built in Kbase. + * -+ * Return: new kbase context or NULL on failure ++ * Return: Model handle. + */ -+struct kbase_context * -+kbase_create_context(struct kbase_device *kbdev, bool is_compat, -+ base_context_create_flags const flags, -+ unsigned long api_version, -+ struct file *filp); ++void *midgard_model_create(struct kbase_device *kbdev); + +/** -+ * kbase_destroy_context - Destroy a kernel base context. -+ * @kctx: Context to destroy ++ * midgard_model_destroy() - Private destroy function. + * -+ * Will release all outstanding regions. ++ * @h: Model handle. ++ * ++ * This hook is specific to the model built in Kbase. + */ -+void kbase_destroy_context(struct kbase_context *kctx); ++void midgard_model_destroy(void *h); + +/** -+ * kbase_ctx_flag - Check if @flag is set on @kctx -+ * @kctx: Pointer to kbase context to check -+ * @flag: Flag to check ++ * midgard_model_write_reg() - Private model write function. + * -+ * Return: true if @flag is set on @kctx, false if not. ++ * @h: Model handle. ++ * @addr: Address at which to write. ++ * @value: value to write. ++ * ++ * This hook is specific to the model built in Kbase. + */ -+static inline bool kbase_ctx_flag(struct kbase_context *kctx, -+ enum kbase_context_flags flag) -+{ -+ return atomic_read(&kctx->flags) & flag; -+} ++void midgard_model_write_reg(void *h, u32 addr, u32 value); + +/** -+ * kbase_ctx_compat_mode - Indicate whether a kbase context needs to operate -+ * in compatibility mode for 32-bit userspace. -+ * @kctx: kbase context ++ * midgard_model_read_reg() - Private model read function. + * -+ * Return: True if needs to maintain compatibility, False otherwise. ++ * @h: Model handle. ++ * @addr: Address from which to read. ++ * @value: Pointer where to store the read value. ++ * ++ * This hook is specific to the model built in Kbase. + */ -+static inline bool kbase_ctx_compat_mode(struct kbase_context *kctx) -+{ -+ return !IS_ENABLED(CONFIG_64BIT) || -+ (IS_ENABLED(CONFIG_64BIT) && kbase_ctx_flag(kctx, KCTX_COMPAT)); -+} ++void midgard_model_read_reg(void *h, u32 addr, u32 *const value); + +/** -+ * kbase_ctx_flag_clear - Clear @flag on @kctx -+ * @kctx: Pointer to kbase context -+ * @flag: Flag to clear ++ * gpu_device_raise_irq() - Private IRQ raise function. + * -+ * Clear the @flag on @kctx. This is done atomically, so other flags being -+ * cleared or set at the same time will be safe. ++ * @model: Model handle. ++ * @irq: IRQ type to raise. + * -+ * Some flags have locking requirements, check the documentation for the -+ * respective flags. ++ * This hook is global to the model Linux framework. + */ -+static inline void kbase_ctx_flag_clear(struct kbase_context *kctx, -+ enum kbase_context_flags flag) -+{ -+ atomic_andnot(flag, &kctx->flags); -+} ++void gpu_device_raise_irq(void *model, u32 irq); + +/** -+ * kbase_ctx_flag_set - Set @flag on @kctx -+ * @kctx: Pointer to kbase context -+ * @flag: Flag to set ++ * gpu_device_set_data() - Private model set data function. + * -+ * Set the @flag on @kctx. This is done atomically, so other flags being -+ * cleared or set at the same time will be safe. ++ * @model: Model handle. ++ * @data: Data carried by model. + * -+ * Some flags have locking requirements, check the documentation for the -+ * respective flags. ++ * This hook is global to the model Linux framework. + */ -+static inline void kbase_ctx_flag_set(struct kbase_context *kctx, -+ enum kbase_context_flags flag) -+{ -+ atomic_or(flag, &kctx->flags); -+} -+#endif /* _KBASE_CONTEXT_H_ */ -diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context_internal.h b/drivers/gpu/arm/bifrost/context/mali_kbase_context_internal.h ++void gpu_device_set_data(void *model, void *data); ++ ++/** ++ * gpu_device_get_data() - Private model get data function. ++ * ++ * @model: Model handle. ++ * ++ * This hook is global to the model Linux framework. ++ * ++ * Return: Pointer to the data carried by model. ++ */ ++void *gpu_device_get_data(void *model); ++#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ ++ ++#endif /* _KBASE_MODEL_LINUX_H_ */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.c new file mode 100644 -index 000000000..1cde7394c +index 000000000..bbf629065 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context_internal.h -@@ -0,0 +1,54 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.c +@@ -0,0 +1,73 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2015, 2018-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -137648,141 +136844,68 @@ index 000000000..1cde7394c + * + */ + -+#include -+ -+typedef int kbase_context_init_method(struct kbase_context *kctx); -+typedef void kbase_context_term_method(struct kbase_context *kctx); -+ -+/** -+ * struct kbase_context_init - Device init/term methods. -+ * @init: Function pointer to a initialise method. -+ * @term: Function pointer to a terminate method. -+ * @err_mes: Error message to be printed when init method fails. ++/* ++ * "Always on" power management policy + */ -+struct kbase_context_init { -+ kbase_context_init_method *init; -+ kbase_context_term_method *term; -+ char *err_mes; -+}; -+ -+int kbase_context_common_init(struct kbase_context *kctx); -+void kbase_context_common_term(struct kbase_context *kctx); + -+int kbase_context_mem_pool_group_init(struct kbase_context *kctx); -+void kbase_context_mem_pool_group_term(struct kbase_context *kctx); -+ -+int kbase_context_mmu_init(struct kbase_context *kctx); -+void kbase_context_mmu_term(struct kbase_context *kctx); -+ -+int kbase_context_mem_alloc_page(struct kbase_context *kctx); -+void kbase_context_mem_pool_free(struct kbase_context *kctx); -+ -+void kbase_context_sticky_resource_term(struct kbase_context *kctx); ++#include ++#include + -+int kbase_context_add_to_dev_list(struct kbase_context *kctx); -+void kbase_context_remove_from_dev_list(struct kbase_context *kctx); -diff --git a/drivers/gpu/arm/bifrost/csf/Kbuild b/drivers/gpu/arm/bifrost/csf/Kbuild -new file mode 100755 -index 000000000..44217dba1 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/Kbuild -@@ -0,0 +1,58 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++static bool always_on_shaders_needed(struct kbase_device *kbdev) ++{ ++ return true; ++} + -+bifrost_kbase-y += \ -+ csf/mali_kbase_csf_firmware_cfg.o \ -+ csf/mali_kbase_csf_trace_buffer.o \ -+ csf/mali_kbase_csf.o \ -+ csf/mali_kbase_csf_scheduler.o \ -+ csf/mali_kbase_csf_kcpu.o \ -+ csf/mali_kbase_csf_tiler_heap.o \ -+ csf/mali_kbase_csf_timeout.o \ -+ csf/mali_kbase_csf_tl_reader.o \ -+ csf/mali_kbase_csf_heap_context_alloc.o \ -+ csf/mali_kbase_csf_reset_gpu.o \ -+ csf/mali_kbase_csf_csg_debugfs.o \ -+ csf/mali_kbase_csf_kcpu_debugfs.o \ -+ csf/mali_kbase_csf_sync_debugfs.o \ -+ csf/mali_kbase_csf_protected_memory.o \ -+ csf/mali_kbase_csf_tiler_heap_debugfs.o \ -+ csf/mali_kbase_csf_cpu_queue_debugfs.o \ -+ csf/mali_kbase_csf_event.o \ -+ csf/mali_kbase_csf_firmware_log.o \ -+ csf/mali_kbase_csf_firmware_core_dump.o \ -+ csf/mali_kbase_csf_tiler_heap_reclaim.o \ -+ csf/mali_kbase_csf_mcu_shared_reg.o ++static bool always_on_get_core_active(struct kbase_device *kbdev) ++{ ++ return true; ++} + -+ifeq ($(CONFIG_MALI_BIFROST_NO_MALI),y) -+bifrost_kbase-y += csf/mali_kbase_csf_firmware_no_mali.o -+else -+bifrost_kbase-y += csf/mali_kbase_csf_firmware.o -+endif ++static void always_on_init(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++} + -+bifrost_kbase-$(CONFIG_DEBUG_FS) += csf/mali_kbase_debug_csf_fault.o ++/** ++ * always_on_term - Term callback function for always-on power policy ++ * ++ * @kbdev: kbase device ++ */ ++static void always_on_term(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++} + -+ifeq ($(KBUILD_EXTMOD),) -+# in-tree -+ -include $(src)/csf/ipa_control/Kbuild -+else -+# out-of-tree -+ include $(src)/csf/ipa_control/Kbuild -+endif -diff --git a/drivers/gpu/arm/bifrost/csf/ipa_control/Kbuild b/drivers/gpu/arm/bifrost/csf/ipa_control/Kbuild -new file mode 100755 -index 000000000..dc30281e4 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/ipa_control/Kbuild -@@ -0,0 +1,22 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++/* ++ * The struct kbase_pm_policy structure for the demand power policy. ++ * ++ * This is the static structure that defines the demand power policy's callback ++ * and name. ++ */ ++const struct kbase_pm_policy kbase_pm_always_on_policy_ops = { ++ "always_on", /* name */ ++ always_on_init, /* init */ ++ always_on_term, /* term */ ++ always_on_shaders_needed, /* shaders_needed */ ++ always_on_get_core_active, /* get_core_active */ ++ NULL, /* handle_event */ ++ KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */ ++#if MALI_USE_CSF ++ ALWAYS_ON_PM_SCHED_FLAGS, /* pm_sched_flags */ ++#endif ++}; + -+bifrost_kbase-y += \ -+ csf/ipa_control/mali_kbase_csf_ipa_control.o -diff --git a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c ++KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops); +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.h new file mode 100644 -index 000000000..c81d0a5a7 +index 000000000..98d35dabe --- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c -@@ -0,0 +1,1063 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.h +@@ -0,0 +1,78 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2015, 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -137800,1058 +136923,1317 @@ index 000000000..c81d0a5a7 + * + */ + -+#include -+#include -+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" -+#include "mali_kbase_csf_ipa_control.h" -+ +/* -+ * Status flags from the STATUS register of the IPA Control interface. ++ * "Always on" power management policy + */ -+#define STATUS_COMMAND_ACTIVE ((u32)1 << 0) -+#define STATUS_PROTECTED_MODE ((u32)1 << 8) -+#define STATUS_RESET ((u32)1 << 9) -+#define STATUS_TIMER_ENABLED ((u32)1 << 31) + -+/* -+ * Commands for the COMMAND register of the IPA Control interface. -+ */ -+#define COMMAND_APPLY ((u32)1) -+#define COMMAND_SAMPLE ((u32)3) -+#define COMMAND_PROTECTED_ACK ((u32)4) -+#define COMMAND_RESET_ACK ((u32)5) ++#ifndef MALI_KBASE_PM_ALWAYS_ON_H ++#define MALI_KBASE_PM_ALWAYS_ON_H + -+/* -+ * Number of timer events per second. ++/** ++ * DOC: ++ * The "Always on" power management policy has the following ++ * characteristics: ++ * ++ * - When KBase indicates that the GPU will be powered up, but we don't yet ++ * know which Job Chains are to be run: ++ * Shader Cores are powered up, regardless of whether or not they will be ++ * needed later. ++ * ++ * - When KBase indicates that Shader Cores are needed to submit the currently ++ * queued Job Chains: ++ * Shader Cores are kept powered, regardless of whether or not they will be ++ * needed ++ * ++ * - When KBase indicates that the GPU need not be powered: ++ * The Shader Cores are kept powered, regardless of whether or not they will ++ * be needed. The GPU itself is also kept powered, even though it is not ++ * needed. ++ * ++ * This policy is automatically overridden during system suspend: the desired ++ * core state is ignored, and the cores are forced off regardless of what the ++ * policy requests. After resuming from suspend, new changes to the desired ++ * core state made by the policy are honored. ++ * ++ * Note: ++ * ++ * - KBase indicates the GPU will be powered up when it has a User Process that ++ * has just started to submit Job Chains. ++ * ++ * - KBase indicates the GPU need not be powered when all the Job Chains from ++ * User Processes have finished, and it is waiting for a User Process to ++ * submit some more Job Chains. + */ -+#define TIMER_EVENTS_PER_SECOND ((u32)1000 / IPA_CONTROL_TIMER_DEFAULT_VALUE_MS) + -+/* -+ * Maximum number of loops polling the GPU before we assume the GPU has hung. ++/** ++ * struct kbasep_pm_policy_always_on - Private struct for policy instance data ++ * @dummy: unused dummy variable ++ * ++ * This contains data that is private to the particular power policy that is ++ * active. + */ -+#define IPA_INACTIVE_MAX_LOOPS (8000000U) ++struct kbasep_pm_policy_always_on { ++ int dummy; ++}; + -+/* -+ * Number of bits used to configure a performance counter in SELECT registers. -+ */ -+#define IPA_CONTROL_SELECT_BITS_PER_CNT ((u64)8) ++extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops; + -+/* -+ * Maximum value of a performance counter. -+ */ -+#define MAX_PRFCNT_VALUE (((u64)1 << 48) - 1) ++#endif /* MALI_KBASE_PM_ALWAYS_ON_H */ + -+/** -+ * struct kbase_ipa_control_listener_data - Data for the GPU clock frequency -+ * listener +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c +new file mode 100644 +index 000000000..5c71fdf15 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c +@@ -0,0 +1,1238 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * @listener: GPU clock frequency listener. -+ * @kbdev: Pointer to kbase device. + */ -+struct kbase_ipa_control_listener_data { -+ struct kbase_clk_rate_listener listener; -+ struct kbase_device *kbdev; -+}; + -+static u32 timer_value(u32 gpu_rate) -+{ -+ return gpu_rate / TIMER_EVENTS_PER_SECOND; -+} ++/* ++ * GPU backend implementation of base kernel power management APIs ++ */ + -+static int wait_status(struct kbase_device *kbdev, u32 flags) -+{ -+ unsigned int max_loops = IPA_INACTIVE_MAX_LOOPS; -+ u32 status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); ++#include ++#include ++#include + -+ /* -+ * Wait for the STATUS register to indicate that flags have been -+ * cleared, in case a transition is pending. -+ */ -+ while (--max_loops && (status & flags)) -+ status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); -+ if (max_loops == 0) { -+ dev_err(kbdev->dev, "IPA_CONTROL STATUS register stuck"); -+ return -EBUSY; -+ } ++#include ++#if !MALI_USE_CSF ++#include ++#include ++#include ++#else ++#include ++#include ++#endif /* !MALI_USE_CSF */ ++#include ++#include ++#include ++#include ++#include + -+ return 0; -+} ++static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data); ++static void kbase_pm_hwcnt_disable_worker(struct work_struct *data); ++static void kbase_pm_gpu_clock_control_worker(struct work_struct *data); + -+static int apply_select_config(struct kbase_device *kbdev, u64 *select) ++int kbase_pm_runtime_init(struct kbase_device *kbdev) +{ -+ int ret; -+ -+ u32 select_cshw_lo = (u32)(select[KBASE_IPA_CORE_TYPE_CSHW] & U32_MAX); -+ u32 select_cshw_hi = -+ (u32)((select[KBASE_IPA_CORE_TYPE_CSHW] >> 32) & U32_MAX); -+ u32 select_memsys_lo = -+ (u32)(select[KBASE_IPA_CORE_TYPE_MEMSYS] & U32_MAX); -+ u32 select_memsys_hi = -+ (u32)((select[KBASE_IPA_CORE_TYPE_MEMSYS] >> 32) & U32_MAX); -+ u32 select_tiler_lo = -+ (u32)(select[KBASE_IPA_CORE_TYPE_TILER] & U32_MAX); -+ u32 select_tiler_hi = -+ (u32)((select[KBASE_IPA_CORE_TYPE_TILER] >> 32) & U32_MAX); -+ u32 select_shader_lo = -+ (u32)(select[KBASE_IPA_CORE_TYPE_SHADER] & U32_MAX); -+ u32 select_shader_hi = -+ (u32)((select[KBASE_IPA_CORE_TYPE_SHADER] >> 32) & U32_MAX); -+ -+ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_LO), select_cshw_lo); -+ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_HI), select_cshw_hi); -+ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_MEMSYS_LO), -+ select_memsys_lo); -+ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_MEMSYS_HI), -+ select_memsys_hi); -+ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_TILER_LO), -+ select_tiler_lo); -+ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_TILER_HI), -+ select_tiler_hi); -+ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_SHADER_LO), -+ select_shader_lo); -+ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_SHADER_HI), -+ select_shader_hi); ++ struct kbase_pm_callback_conf *callbacks; + -+ ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); ++ callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; ++ if (callbacks) { ++ kbdev->pm.backend.callback_power_on = ++ callbacks->power_on_callback; ++ kbdev->pm.backend.callback_power_off = ++ callbacks->power_off_callback; ++ kbdev->pm.backend.callback_power_suspend = ++ callbacks->power_suspend_callback; ++ kbdev->pm.backend.callback_power_resume = ++ callbacks->power_resume_callback; ++ kbdev->pm.callback_power_runtime_init = ++ callbacks->power_runtime_init_callback; ++ kbdev->pm.callback_power_runtime_term = ++ callbacks->power_runtime_term_callback; ++ kbdev->pm.backend.callback_power_runtime_on = ++ callbacks->power_runtime_on_callback; ++ kbdev->pm.backend.callback_power_runtime_off = ++ callbacks->power_runtime_off_callback; ++ kbdev->pm.backend.callback_power_runtime_idle = ++ callbacks->power_runtime_idle_callback; ++ kbdev->pm.backend.callback_soft_reset = ++ callbacks->soft_reset_callback; ++ kbdev->pm.backend.callback_power_runtime_gpu_idle = ++ callbacks->power_runtime_gpu_idle_callback; ++ kbdev->pm.backend.callback_power_runtime_gpu_active = ++ callbacks->power_runtime_gpu_active_callback; + -+ if (!ret) { -+ kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_APPLY); -+ ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); -+ } else { -+ dev_err(kbdev->dev, "Wait for the pending command failed"); ++ if (callbacks->power_runtime_init_callback) ++ return callbacks->power_runtime_init_callback(kbdev); ++ else ++ return 0; + } + -+ return ret; ++ kbdev->pm.backend.callback_power_on = NULL; ++ kbdev->pm.backend.callback_power_off = NULL; ++ kbdev->pm.backend.callback_power_suspend = NULL; ++ kbdev->pm.backend.callback_power_resume = NULL; ++ kbdev->pm.callback_power_runtime_init = NULL; ++ kbdev->pm.callback_power_runtime_term = NULL; ++ kbdev->pm.backend.callback_power_runtime_on = NULL; ++ kbdev->pm.backend.callback_power_runtime_off = NULL; ++ kbdev->pm.backend.callback_power_runtime_idle = NULL; ++ kbdev->pm.backend.callback_soft_reset = NULL; ++ kbdev->pm.backend.callback_power_runtime_gpu_idle = NULL; ++ kbdev->pm.backend.callback_power_runtime_gpu_active = NULL; ++ ++ return 0; +} + -+static u64 read_value_cnt(struct kbase_device *kbdev, u8 type, int select_idx) ++void kbase_pm_runtime_term(struct kbase_device *kbdev) +{ -+ u32 value_lo, value_hi; -+ -+ switch (type) { -+ case KBASE_IPA_CORE_TYPE_CSHW: -+ value_lo = kbase_reg_read( -+ kbdev, IPA_CONTROL_REG(VALUE_CSHW_REG_LO(select_idx))); -+ value_hi = kbase_reg_read( -+ kbdev, IPA_CONTROL_REG(VALUE_CSHW_REG_HI(select_idx))); -+ break; -+ case KBASE_IPA_CORE_TYPE_MEMSYS: -+ value_lo = kbase_reg_read( -+ kbdev, -+ IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(select_idx))); -+ value_hi = kbase_reg_read( -+ kbdev, -+ IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(select_idx))); -+ break; -+ case KBASE_IPA_CORE_TYPE_TILER: -+ value_lo = kbase_reg_read( -+ kbdev, IPA_CONTROL_REG(VALUE_TILER_REG_LO(select_idx))); -+ value_hi = kbase_reg_read( -+ kbdev, IPA_CONTROL_REG(VALUE_TILER_REG_HI(select_idx))); -+ break; -+ case KBASE_IPA_CORE_TYPE_SHADER: -+ value_lo = kbase_reg_read( -+ kbdev, -+ IPA_CONTROL_REG(VALUE_SHADER_REG_LO(select_idx))); -+ value_hi = kbase_reg_read( -+ kbdev, -+ IPA_CONTROL_REG(VALUE_SHADER_REG_HI(select_idx))); -+ break; -+ default: -+ WARN(1, "Unknown core type: %u\n", type); -+ value_lo = value_hi = 0; -+ break; -+ } -+ -+ return (((u64)value_hi << 32) | value_lo); ++ if (kbdev->pm.callback_power_runtime_term) ++ kbdev->pm.callback_power_runtime_term(kbdev); +} + -+static void build_select_config(struct kbase_ipa_control *ipa_ctrl, -+ u64 *select_config) ++void kbase_pm_register_access_enable(struct kbase_device *kbdev) +{ -+ size_t i; ++ struct kbase_pm_callback_conf *callbacks; + -+ for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) { -+ size_t j; ++ callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; + -+ select_config[i] = 0ULL; ++ if (callbacks) ++ callbacks->power_on_callback(kbdev); + -+ for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) { -+ struct kbase_ipa_control_prfcnt_config *prfcnt_config = -+ &ipa_ctrl->blocks[i].select[j]; ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (WARN_ON(kbase_pm_is_gpu_lost(kbdev))) ++ dev_err(kbdev->dev, "Attempting to power on while GPU lost\n"); ++#endif + -+ select_config[i] |= -+ ((u64)prfcnt_config->idx -+ << (IPA_CONTROL_SELECT_BITS_PER_CNT * j)); -+ } -+ } ++ kbdev->pm.backend.gpu_powered = true; +} + -+static int update_select_registers(struct kbase_device *kbdev) ++void kbase_pm_register_access_disable(struct kbase_device *kbdev) +{ -+ u64 select_config[KBASE_IPA_CORE_TYPE_NUM]; ++ struct kbase_pm_callback_conf *callbacks; + -+ lockdep_assert_held(&kbdev->csf.ipa_control.lock); ++ callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; + -+ build_select_config(&kbdev->csf.ipa_control, select_config); ++ kbdev->pm.backend.gpu_powered = false; + -+ return apply_select_config(kbdev, select_config); ++ if (callbacks) ++ callbacks->power_off_callback(kbdev); +} + -+static inline void calc_prfcnt_delta(struct kbase_device *kbdev, -+ struct kbase_ipa_control_prfcnt *prfcnt, -+ bool gpu_ready) ++int kbase_hwaccess_pm_init(struct kbase_device *kbdev) +{ -+ u64 delta_value, raw_value; ++ int ret = 0; + -+ if (gpu_ready) -+ raw_value = read_value_cnt(kbdev, (u8)prfcnt->type, -+ prfcnt->select_idx); -+ else -+ raw_value = prfcnt->latest_raw_value; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ if (raw_value < prfcnt->latest_raw_value) { -+ delta_value = (MAX_PRFCNT_VALUE - prfcnt->latest_raw_value) + -+ raw_value; -+ } else { -+ delta_value = raw_value - prfcnt->latest_raw_value; -+ } ++ mutex_init(&kbdev->pm.lock); + -+ delta_value *= prfcnt->scaling_factor; ++ kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait", ++ WQ_HIGHPRI | WQ_UNBOUND, 1); ++ if (!kbdev->pm.backend.gpu_poweroff_wait_wq) ++ return -ENOMEM; + -+ if (kbdev->csf.ipa_control.cur_gpu_rate == 0) { -+ static bool warned; ++ INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, ++ kbase_pm_gpu_poweroff_wait_wq); + -+ if (!warned) { -+ dev_warn(kbdev->dev, "%s: GPU freq is unexpectedly 0", __func__); -+ warned = true; -+ } -+ } else if (prfcnt->gpu_norm) -+ delta_value = div_u64(delta_value, kbdev->csf.ipa_control.cur_gpu_rate); ++ kbdev->pm.backend.ca_cores_enabled = ~0ull; ++ kbdev->pm.backend.gpu_powered = false; ++ kbdev->pm.backend.gpu_ready = false; ++ kbdev->pm.suspending = false; ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ kbase_pm_set_gpu_lost(kbdev, false); ++#endif ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ kbdev->pm.backend.driver_ready_for_irqs = false; ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++ init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait); + -+ prfcnt->latest_raw_value = raw_value; ++#if !MALI_USE_CSF ++ /* Initialise the metrics subsystem */ ++ ret = kbasep_pm_metrics_init(kbdev); ++ if (ret) ++ return ret; ++#else ++ mutex_init(&kbdev->pm.backend.policy_change_lock); ++ kbdev->pm.backend.policy_change_clamp_state_to_off = false; ++ /* Due to dependency on kbase_ipa_control, the metrics subsystem can't ++ * be initialized here. ++ */ ++ CSTD_UNUSED(ret); ++#endif + -+ /* Accumulate the difference */ -+ prfcnt->accumulated_diff += delta_value; -+} ++ init_waitqueue_head(&kbdev->pm.backend.reset_done_wait); ++ kbdev->pm.backend.reset_done = false; + -+/** -+ * kbase_ipa_control_rate_change_notify - GPU frequency change callback -+ * -+ * @listener: Clock frequency change listener. -+ * @clk_index: Index of the clock for which the change has occurred. -+ * @clk_rate_hz: Clock frequency(Hz). -+ * -+ * This callback notifies kbase_ipa_control about GPU frequency changes. -+ * Only top-level clock changes are meaningful. GPU frequency updates -+ * affect all performance counters which require GPU normalization -+ * in every session. -+ */ -+static void -+kbase_ipa_control_rate_change_notify(struct kbase_clk_rate_listener *listener, -+ u32 clk_index, u32 clk_rate_hz) -+{ -+ if ((clk_index == KBASE_CLOCK_DOMAIN_TOP) && (clk_rate_hz != 0)) { -+ size_t i; -+ unsigned long flags; -+ struct kbase_ipa_control_listener_data *listener_data = -+ container_of(listener, -+ struct kbase_ipa_control_listener_data, -+ listener); -+ struct kbase_device *kbdev = listener_data->kbdev; -+ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; ++ init_waitqueue_head(&kbdev->pm.zero_active_count_wait); ++ init_waitqueue_head(&kbdev->pm.resume_wait); ++ kbdev->pm.active_count = 0; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock); + -+ if (!kbdev->pm.backend.gpu_ready) { -+ dev_err(kbdev->dev, -+ "%s: GPU frequency cannot change while GPU is off", -+ __func__); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ return; -+ } ++ init_waitqueue_head(&kbdev->pm.backend.poweroff_wait); + -+ /* Interrupts are already disabled and interrupt state is also saved */ -+ spin_lock(&ipa_ctrl->lock); ++ if (kbase_pm_ca_init(kbdev) != 0) ++ goto workq_fail; + -+ for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) { -+ struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i]; ++ kbase_pm_policy_init(kbdev); + -+ if (session->active) { -+ size_t j; ++ if (kbase_pm_state_machine_init(kbdev) != 0) ++ goto pm_state_machine_fail; + -+ for (j = 0; j < session->num_prfcnts; j++) { -+ struct kbase_ipa_control_prfcnt *prfcnt = -+ &session->prfcnts[j]; ++ kbdev->pm.backend.hwcnt_desired = false; ++ kbdev->pm.backend.hwcnt_disabled = true; ++ INIT_WORK(&kbdev->pm.backend.hwcnt_disable_work, ++ kbase_pm_hwcnt_disable_worker); ++ kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + -+ if (prfcnt->gpu_norm) -+ calc_prfcnt_delta(kbdev, prfcnt, true); -+ } -+ } -+ } ++#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) ++ kbdev->pm.backend.gpu_sleep_supported = ++ kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_GPU_SLEEP) && ++ !kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_1997) && ++ kbdev->pm.backend.callback_power_runtime_gpu_active && ++ kbdev->pm.backend.callback_power_runtime_gpu_idle; ++#endif + -+ ipa_ctrl->cur_gpu_rate = clk_rate_hz; ++ if (IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED)) { ++ kbdev->pm.backend.l2_always_on = false; ++ kbdev->pm.backend.gpu_clock_slow_down_wa = false; + -+ /* Update the timer for automatic sampling if active sessions -+ * are present. Counters have already been manually sampled. -+ */ -+ if (ipa_ctrl->num_active_sessions > 0) { -+ kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), -+ timer_value(ipa_ctrl->cur_gpu_rate)); -+ } ++ return 0; ++ } + -+ spin_unlock(&ipa_ctrl->lock); ++ /* WA1: L2 always_on for GPUs being affected by GPU2017-1336 */ ++ if (!IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE)) { ++ kbdev->pm.backend.gpu_clock_slow_down_wa = false; ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336)) ++ kbdev->pm.backend.l2_always_on = true; ++ else ++ kbdev->pm.backend.l2_always_on = false; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return 0; + } ++ ++ /* WA3: Clock slow down for GPUs being affected by GPU2017-1336 */ ++ kbdev->pm.backend.l2_always_on = false; ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336)) { ++ kbdev->pm.backend.gpu_clock_slow_down_wa = true; ++ kbdev->pm.backend.gpu_clock_suspend_freq = 0; ++ kbdev->pm.backend.gpu_clock_slow_down_desired = true; ++ kbdev->pm.backend.gpu_clock_slowed_down = false; ++ INIT_WORK(&kbdev->pm.backend.gpu_clock_control_work, ++ kbase_pm_gpu_clock_control_worker); ++ } else ++ kbdev->pm.backend.gpu_clock_slow_down_wa = false; ++ ++ return 0; ++ ++pm_state_machine_fail: ++ kbase_pm_policy_term(kbdev); ++ kbase_pm_ca_term(kbdev); ++workq_fail: ++#if !MALI_USE_CSF ++ kbasep_pm_metrics_term(kbdev); ++#endif ++ return -EINVAL; +} + -+void kbase_ipa_control_init(struct kbase_device *kbdev) ++void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) +{ -+ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; -+ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; -+ struct kbase_ipa_control_listener_data *listener_data; -+ size_t i, j; ++ lockdep_assert_held(&kbdev->pm.lock); + -+ for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) { -+ for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) { -+ ipa_ctrl->blocks[i].select[j].idx = 0; -+ ipa_ctrl->blocks[i].select[j].refcount = 0; -+ } -+ ipa_ctrl->blocks[i].num_available_counters = -+ KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; -+ } ++ /* Turn clocks and interrupts on - no-op if we haven't done a previous ++ * kbase_pm_clock_off() ++ */ ++ kbase_pm_clock_on(kbdev, is_resume); + -+ spin_lock_init(&ipa_ctrl->lock); -+ ipa_ctrl->num_active_sessions = 0; -+ for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) -+ ipa_ctrl->sessions[i].active = false; ++ if (!is_resume) { ++ unsigned long flags; + -+ listener_data = kmalloc(sizeof(struct kbase_ipa_control_listener_data), -+ GFP_KERNEL); -+ if (listener_data) { -+ listener_data->listener.notify = -+ kbase_ipa_control_rate_change_notify; -+ listener_data->kbdev = kbdev; -+ ipa_ctrl->rtm_listener_data = listener_data; ++ /* Force update of L2 state - if we have abandoned a power off ++ * then this may be required to power the L2 back on. ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + -+ spin_lock(&clk_rtm->lock); -+ if (clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP]) -+ ipa_ctrl->cur_gpu_rate = -+ clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP]->clock_val; -+ if (listener_data) -+ kbase_clk_rate_trace_manager_subscribe_no_lock( -+ clk_rtm, &listener_data->listener); -+ spin_unlock(&clk_rtm->lock); ++ /* Update core status as required by the policy */ ++ kbase_pm_update_cores_state(kbdev); ++ ++ /* NOTE: We don't wait to reach the desired state, since running atoms ++ * will wait for that state to be reached anyway ++ */ +} -+KBASE_EXPORT_TEST_API(kbase_ipa_control_init); + -+void kbase_ipa_control_term(struct kbase_device *kbdev) ++static void pm_handle_power_off(struct kbase_device *kbdev) +{ ++ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; ++#if MALI_USE_CSF ++ enum kbase_mcu_state mcu_state; ++#endif + unsigned long flags; -+ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; -+ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; -+ struct kbase_ipa_control_listener_data *listener_data = -+ ipa_ctrl->rtm_listener_data; + -+ WARN_ON(ipa_ctrl->num_active_sessions); ++ lockdep_assert_held(&kbdev->pm.lock); + -+ if (listener_data) -+ kbase_clk_rate_trace_manager_unsubscribe(clk_rtm, &listener_data->listener); -+ kfree(ipa_ctrl->rtm_listener_data); ++ if (backend->poweron_required) ++ return; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ if (kbdev->pm.backend.gpu_powered) -+ kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), 0); ++#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) ++ if (kbdev->pm.backend.gpu_wakeup_override) { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return; ++ } ++#endif ++ WARN_ON(backend->shaders_state != ++ KBASE_SHADERS_OFF_CORESTACK_OFF || ++ backend->l2_state != KBASE_L2_OFF); ++#if MALI_USE_CSF ++ mcu_state = backend->mcu_state; ++ WARN_ON(!kbase_pm_is_mcu_inactive(kbdev, mcu_state)); ++#endif + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} -+KBASE_EXPORT_TEST_API(kbase_ipa_control_term); + -+/** session_read_raw_values - Read latest raw values for a sessions -+ * @kbdev: Pointer to kbase device. -+ * @session: Pointer to the session whose performance counters shall be read. -+ * -+ * Read and update the latest raw values of all the performance counters -+ * belonging to a given session. -+ */ -+static void session_read_raw_values(struct kbase_device *kbdev, -+ struct kbase_ipa_control_session *session) -+{ -+ size_t i; ++#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) ++ if (backend->callback_power_runtime_gpu_idle) { ++ WARN_ON(backend->gpu_idled); ++ backend->callback_power_runtime_gpu_idle(kbdev); ++ backend->gpu_idled = true; ++ return; ++ } ++#endif + -+ lockdep_assert_held(&kbdev->csf.ipa_control.lock); ++ /* Disable interrupts and turn the clock off */ ++ if (!kbase_pm_clock_off(kbdev)) { ++ /* ++ * Page/bus faults are pending, must drop locks to ++ * process. Interrupts are disabled so no more faults ++ * should be generated at this point. ++ */ ++ kbase_pm_unlock(kbdev); ++ kbase_flush_mmu_wqs(kbdev); ++ kbase_pm_lock(kbdev); + -+ for (i = 0; i < session->num_prfcnts; i++) { -+ struct kbase_ipa_control_prfcnt *prfcnt = &session->prfcnts[i]; -+ u64 raw_value = read_value_cnt(kbdev, (u8)prfcnt->type, -+ prfcnt->select_idx); ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ /* poweron_required may have changed while pm lock ++ * was released. ++ */ ++ if (kbase_pm_is_gpu_lost(kbdev)) ++ backend->poweron_required = false; ++#endif + -+ prfcnt->latest_raw_value = raw_value; ++ /* Turn off clock now that fault have been handled. We ++ * dropped locks so poweron_required may have changed - ++ * power back on if this is the case (effectively only ++ * re-enabling of the interrupts would be done in this ++ * case, as the clocks to GPU were not withdrawn yet). ++ */ ++ if (backend->poweron_required) ++ kbase_pm_clock_on(kbdev, false); ++ else ++ WARN_ON(!kbase_pm_clock_off(kbdev)); + } +} + -+/** session_gpu_start - Start one or all sessions -+ * @kbdev: Pointer to kbase device. -+ * @ipa_ctrl: Pointer to IPA_CONTROL descriptor. -+ * @session: Pointer to the session to initialize, or NULL to initialize -+ * all sessions. -+ * -+ * This function starts one or all sessions by capturing a manual sample, -+ * reading the latest raw value of performance counters and possibly enabling -+ * the timer for automatic sampling if necessary. -+ * -+ * If a single session is given, it is assumed to be active, regardless of -+ * the number of active sessions. The number of performance counters belonging -+ * to the session shall be set in advance. -+ * -+ * If no session is given, the function shall start all sessions. -+ * The function does nothing if there are no active sessions. -+ * -+ * Return: 0 on success, or error code on failure. -+ */ -+static int session_gpu_start(struct kbase_device *kbdev, -+ struct kbase_ipa_control *ipa_ctrl, -+ struct kbase_ipa_control_session *session) ++static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) +{ -+ bool first_start = -+ (session != NULL) && (ipa_ctrl->num_active_sessions == 0); -+ int ret = 0; -+ -+ lockdep_assert_held(&kbdev->csf.ipa_control.lock); ++ struct kbase_device *kbdev = container_of(data, struct kbase_device, ++ pm.backend.gpu_poweroff_wait_work); ++ struct kbase_pm_device_data *pm = &kbdev->pm; ++ struct kbase_pm_backend_data *backend = &pm->backend; ++ unsigned long flags; + -+ /* -+ * Exit immediately if the caller intends to start all sessions -+ * but there are no active sessions. It's important that no operation -+ * is done on the IPA_CONTROL interface in that case. -+ */ -+ if (!session && ipa_ctrl->num_active_sessions == 0) -+ return ret; ++ KBASE_KTRACE_ADD(kbdev, PM_POWEROFF_WAIT_WQ, NULL, 0); + -+ /* -+ * Take a manual sample unconditionally if the caller intends -+ * to start all sessions. Otherwise, only take a manual sample -+ * if this is the first session to be initialized, for accumulator -+ * registers are empty and no timer has been configured for automatic -+ * sampling. ++#if !MALI_USE_CSF ++ /* Wait for power transitions to complete. We do this with no locks held ++ * so that we don't deadlock with any pending workqueues. + */ -+ if (!session || first_start) { -+ kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), -+ COMMAND_SAMPLE); -+ ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); -+ if (ret) -+ dev_err(kbdev->dev, "%s: failed to sample new counters", -+ __func__); -+ kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), -+ timer_value(ipa_ctrl->cur_gpu_rate)); -+ } ++ kbase_pm_wait_for_desired_state(kbdev); ++#endif + -+ /* -+ * Read current raw value to start the session. -+ * This is necessary to put the first query in condition -+ * to generate a correct value by calculating the difference -+ * from the beginning of the session. This consideration -+ * is true regardless of the number of sessions the caller -+ * intends to start. -+ */ -+ if (!ret) { -+ if (session) { -+ /* On starting a session, value read is required for -+ * IPA power model's calculation initialization. -+ */ -+ session_read_raw_values(kbdev, session); -+ } else { -+ size_t session_idx; ++ kbase_pm_lock(kbdev); + -+ for (session_idx = 0; -+ session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS; -+ session_idx++) { -+ struct kbase_ipa_control_session *session_to_check = &ipa_ctrl->sessions[session_idx]; ++ pm_handle_power_off(kbdev); + -+ if (session_to_check->active) -+ session_read_raw_values(kbdev, session_to_check); -+ } -+ } ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ backend->poweroff_wait_in_progress = false; ++ if (backend->poweron_required) { ++ backend->poweron_required = false; ++ kbdev->pm.backend.l2_desired = true; ++#if MALI_USE_CSF ++ kbdev->pm.backend.mcu_desired = true; ++#endif ++ kbase_pm_update_state(kbdev); ++ kbase_pm_update_cores_state_nolock(kbdev); ++#if !MALI_USE_CSF ++ kbase_backend_slot_update(kbdev); ++#endif /* !MALI_USE_CSF */ + } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ return ret; ++ kbase_pm_unlock(kbdev); ++ ++ wake_up(&kbdev->pm.backend.poweroff_wait); +} + -+int kbase_ipa_control_register( -+ struct kbase_device *kbdev, -+ const struct kbase_ipa_control_perf_counter *perf_counters, -+ size_t num_counters, void **client) ++static void kbase_pm_l2_clock_slow(struct kbase_device *kbdev) +{ -+ int ret = 0; -+ size_t i, session_idx, req_counters[KBASE_IPA_CORE_TYPE_NUM]; -+ bool already_configured[KBASE_IPA_CONTROL_MAX_COUNTERS]; -+ bool new_config = false; -+ struct kbase_ipa_control *ipa_ctrl; -+ struct kbase_ipa_control_session *session = NULL; -+ unsigned long flags; -+ -+ if (WARN_ON(unlikely(kbdev == NULL))) -+ return -ENODEV; -+ -+ if (WARN_ON(perf_counters == NULL) || WARN_ON(client == NULL) || -+ WARN_ON(num_counters > KBASE_IPA_CONTROL_MAX_COUNTERS)) { -+ dev_err(kbdev->dev, "%s: wrong input arguments", __func__); -+ return -EINVAL; -+ } -+ -+ kbase_pm_context_active(kbdev); -+ -+ ipa_ctrl = &kbdev->csf.ipa_control; -+ spin_lock_irqsave(&ipa_ctrl->lock, flags); -+ -+ if (ipa_ctrl->num_active_sessions == KBASE_IPA_CONTROL_MAX_SESSIONS) { -+ dev_err(kbdev->dev, "%s: too many sessions", __func__); -+ ret = -EBUSY; -+ goto exit; -+ } ++#if defined(CONFIG_MALI_BIFROST_DVFS) ++ struct clk *clk = kbdev->clocks[0]; ++#endif + -+ for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) -+ req_counters[i] = 0; ++ if (!kbdev->pm.backend.gpu_clock_slow_down_wa) ++ return; + -+ /* -+ * Count how many counters would need to be configured in order to -+ * satisfy the request. Requested counters which happen to be already -+ * configured can be skipped. -+ */ -+ for (i = 0; i < num_counters; i++) { -+ size_t j; -+ enum kbase_ipa_core_type type = perf_counters[i].type; -+ u8 idx = perf_counters[i].idx; ++ /* No suspend clock is specified */ ++ if (WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_suspend_freq)) ++ return; + -+ if ((type >= KBASE_IPA_CORE_TYPE_NUM) || -+ (idx >= KBASE_IPA_CONTROL_CNT_MAX_IDX)) { -+ dev_err(kbdev->dev, -+ "%s: invalid requested type %u and/or index %u", -+ __func__, type, idx); -+ ret = -EINVAL; -+ goto exit; -+ } ++#if defined(CONFIG_MALI_BIFROST_DEVFREQ) + -+ for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) { -+ struct kbase_ipa_control_prfcnt_config *prfcnt_config = -+ &ipa_ctrl->blocks[type].select[j]; ++ /* Suspend devfreq */ ++ devfreq_suspend_device(kbdev->devfreq); + -+ if (prfcnt_config->refcount > 0) { -+ if (prfcnt_config->idx == idx) { -+ already_configured[i] = true; -+ break; -+ } -+ } -+ } ++ /* Keep the current freq to restore it upon resume */ ++ kbdev->previous_frequency = kbdev->current_nominal_freq; + -+ if (j == KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS) { -+ already_configured[i] = false; -+ req_counters[type]++; -+ new_config = true; -+ } -+ } ++ /* Slow down GPU clock to the suspend clock*/ ++ kbase_devfreq_force_freq(kbdev, ++ kbdev->pm.backend.gpu_clock_suspend_freq); + -+ for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) -+ if (req_counters[i] > -+ ipa_ctrl->blocks[i].num_available_counters) { -+ dev_err(kbdev->dev, -+ "%s: more counters (%zu) than available (%zu) have been requested for type %zu", -+ __func__, req_counters[i], -+ ipa_ctrl->blocks[i].num_available_counters, i); -+ ret = -EINVAL; -+ goto exit; -+ } ++#elif defined(CONFIG_MALI_BIFROST_DVFS) /* CONFIG_MALI_BIFROST_DEVFREQ */ + -+ /* -+ * The request has been validated. -+ * Firstly, find an available session and then set up the initial state -+ * of the session and update the configuration of performance counters -+ * in the internal state of kbase_ipa_control. -+ */ -+ for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS; -+ session_idx++) { -+ if (!ipa_ctrl->sessions[session_idx].active) { -+ session = &ipa_ctrl->sessions[session_idx]; -+ break; -+ } -+ } ++ if (WARN_ON_ONCE(!clk)) ++ return; + -+ if (!session) { -+ dev_err(kbdev->dev, "%s: wrong or corrupt session state", -+ __func__); -+ ret = -EBUSY; -+ goto exit; -+ } ++ /* Stop the metrics gathering framework */ ++ kbase_pm_metrics_stop(kbdev); + -+ for (i = 0; i < num_counters; i++) { -+ struct kbase_ipa_control_prfcnt_config *prfcnt_config; -+ size_t j; -+ u8 type = perf_counters[i].type; -+ u8 idx = perf_counters[i].idx; ++ /* Keep the current freq to restore it upon resume */ ++ kbdev->previous_frequency = clk_get_rate(clk); + -+ for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) { -+ prfcnt_config = &ipa_ctrl->blocks[type].select[j]; ++ /* Slow down GPU clock to the suspend clock*/ ++ if (WARN_ON_ONCE(clk_set_rate(clk, ++ kbdev->pm.backend.gpu_clock_suspend_freq))) ++ dev_err(kbdev->dev, "Failed to set suspend freq\n"); + -+ if (already_configured[i]) { -+ if ((prfcnt_config->refcount > 0) && -+ (prfcnt_config->idx == idx)) { -+ break; -+ } -+ } else { -+ if (prfcnt_config->refcount == 0) -+ break; -+ } -+ } ++#endif /* CONFIG_MALI_BIFROST_DVFS */ ++} + -+ if (WARN_ON((prfcnt_config->refcount > 0 && -+ prfcnt_config->idx != idx) || -+ (j == KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS))) { -+ dev_err(kbdev->dev, -+ "%s: invalid internal state: counter already configured or no counter available to configure", -+ __func__); -+ ret = -EBUSY; -+ goto exit; -+ } ++static void kbase_pm_l2_clock_normalize(struct kbase_device *kbdev) ++{ ++#if defined(CONFIG_MALI_BIFROST_DVFS) ++ struct clk *clk = kbdev->clocks[0]; ++#endif + -+ if (prfcnt_config->refcount == 0) { -+ prfcnt_config->idx = idx; -+ ipa_ctrl->blocks[type].num_available_counters--; -+ } ++ if (!kbdev->pm.backend.gpu_clock_slow_down_wa) ++ return; + -+ session->prfcnts[i].accumulated_diff = 0; -+ session->prfcnts[i].type = type; -+ session->prfcnts[i].select_idx = j; -+ session->prfcnts[i].scaling_factor = -+ perf_counters[i].scaling_factor; -+ session->prfcnts[i].gpu_norm = perf_counters[i].gpu_norm; ++#if defined(CONFIG_MALI_BIFROST_DEVFREQ) + -+ /* Reports to this client for GPU time spent in protected mode -+ * should begin from the point of registration. -+ */ -+ session->last_query_time = ktime_get_raw_ns(); ++ /* Restore GPU clock to the previous one */ ++ kbase_devfreq_force_freq(kbdev, kbdev->previous_frequency); + -+ /* Initially, no time has been spent in protected mode */ -+ session->protm_time = 0; ++ /* Resume devfreq */ ++ devfreq_resume_device(kbdev->devfreq); + -+ prfcnt_config->refcount++; -+ } ++#elif defined(CONFIG_MALI_BIFROST_DVFS) /* CONFIG_MALI_BIFROST_DEVFREQ */ + -+ /* -+ * Apply new configuration, if necessary. -+ * As a temporary solution, make sure that the GPU is on -+ * before applying the new configuration. -+ */ -+ if (new_config) { -+ ret = update_select_registers(kbdev); -+ if (ret) -+ dev_err(kbdev->dev, -+ "%s: failed to apply new SELECT configuration", -+ __func__); -+ } ++ if (WARN_ON_ONCE(!clk)) ++ return; + -+ if (!ret) { -+ session->num_prfcnts = num_counters; -+ ret = session_gpu_start(kbdev, ipa_ctrl, session); -+ } ++ /* Restore GPU clock */ ++ if (WARN_ON_ONCE(clk_set_rate(clk, kbdev->previous_frequency))) ++ dev_err(kbdev->dev, "Failed to restore freq (%lu)\n", ++ kbdev->previous_frequency); + -+ if (!ret) { -+ session->active = true; -+ ipa_ctrl->num_active_sessions++; -+ *client = session; -+ } ++ /* Restart the metrics gathering framework */ ++ kbase_pm_metrics_start(kbdev); + -+exit: -+ spin_unlock_irqrestore(&ipa_ctrl->lock, flags); -+ kbase_pm_context_idle(kbdev); -+ return ret; ++#endif /* CONFIG_MALI_BIFROST_DVFS */ +} -+KBASE_EXPORT_TEST_API(kbase_ipa_control_register); + -+int kbase_ipa_control_unregister(struct kbase_device *kbdev, const void *client) ++static void kbase_pm_gpu_clock_control_worker(struct work_struct *data) +{ -+ struct kbase_ipa_control *ipa_ctrl; -+ struct kbase_ipa_control_session *session; -+ int ret = 0; -+ size_t i; ++ struct kbase_device *kbdev = container_of(data, struct kbase_device, ++ pm.backend.gpu_clock_control_work); ++ struct kbase_pm_device_data *pm = &kbdev->pm; ++ struct kbase_pm_backend_data *backend = &pm->backend; + unsigned long flags; -+ bool new_config = false, valid_session = false; -+ -+ if (WARN_ON(unlikely(kbdev == NULL))) -+ return -ENODEV; ++ bool slow_down = false, normalize = false; + -+ if (WARN_ON(client == NULL)) { -+ dev_err(kbdev->dev, "%s: wrong input arguments", __func__); -+ return -EINVAL; ++ /* Determine if GPU clock control is required */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (!backend->gpu_clock_slowed_down && ++ backend->gpu_clock_slow_down_desired) { ++ slow_down = true; ++ backend->gpu_clock_slowed_down = true; ++ } else if (backend->gpu_clock_slowed_down && ++ !backend->gpu_clock_slow_down_desired) { ++ normalize = true; ++ backend->gpu_clock_slowed_down = false; + } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ kbase_pm_context_active(kbdev); -+ -+ ipa_ctrl = &kbdev->csf.ipa_control; -+ session = (struct kbase_ipa_control_session *)client; ++ /* Control GPU clock according to the request of L2 state machine. ++ * The GPU clock needs to be lowered for safe L2 power down ++ * and restored to previous speed at L2 power up. ++ */ ++ if (slow_down) ++ kbase_pm_l2_clock_slow(kbdev); ++ else if (normalize) ++ kbase_pm_l2_clock_normalize(kbdev); + -+ spin_lock_irqsave(&ipa_ctrl->lock, flags); ++ /* Tell L2 state machine to transit to next state */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} + -+ for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) { -+ if (session == &ipa_ctrl->sessions[i]) { -+ valid_session = true; -+ break; -+ } -+ } ++static void kbase_pm_hwcnt_disable_worker(struct work_struct *data) ++{ ++ struct kbase_device *kbdev = container_of(data, struct kbase_device, ++ pm.backend.hwcnt_disable_work); ++ struct kbase_pm_device_data *pm = &kbdev->pm; ++ struct kbase_pm_backend_data *backend = &pm->backend; ++ unsigned long flags; + -+ if (!valid_session) { -+ dev_err(kbdev->dev, "%s: invalid session handle", __func__); -+ ret = -EINVAL; -+ goto exit; -+ } ++ bool do_disable; + -+ if (ipa_ctrl->num_active_sessions == 0) { -+ dev_err(kbdev->dev, "%s: no active sessions found", __func__); -+ ret = -EINVAL; -+ goto exit; -+ } ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ if (!session->active) { -+ dev_err(kbdev->dev, "%s: session is already inactive", -+ __func__); -+ ret = -EINVAL; -+ goto exit; -+ } ++ if (!do_disable) ++ return; + -+ for (i = 0; i < session->num_prfcnts; i++) { -+ struct kbase_ipa_control_prfcnt_config *prfcnt_config; -+ u8 type = session->prfcnts[i].type; -+ u8 idx = session->prfcnts[i].select_idx; ++ kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + -+ prfcnt_config = &ipa_ctrl->blocks[type].select[idx]; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled; + -+ if (!WARN_ON(prfcnt_config->refcount == 0)) { -+ prfcnt_config->refcount--; -+ if (prfcnt_config->refcount == 0) { -+ new_config = true; -+ ipa_ctrl->blocks[type].num_available_counters++; -+ } -+ } -+ } ++ if (do_disable) { ++ /* PM state did not change while we were doing the disable, ++ * so commit the work we just performed and continue the state ++ * machine. ++ */ ++ backend->hwcnt_disabled = true; ++ kbase_pm_update_state(kbdev); ++#if !MALI_USE_CSF ++ kbase_backend_slot_update(kbdev); ++#endif /* !MALI_USE_CSF */ ++ } else { ++ /* PM state was updated while we were doing the disable, ++ * so we need to undo the disable we just performed. ++ */ ++#if MALI_USE_CSF ++ unsigned long lock_flags; + -+ if (new_config) { -+ ret = update_select_registers(kbdev); -+ if (ret) -+ dev_err(kbdev->dev, -+ "%s: failed to apply SELECT configuration", -+ __func__); ++ kbase_csf_scheduler_spin_lock(kbdev, &lock_flags); ++#endif ++ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); ++#if MALI_USE_CSF ++ kbase_csf_scheduler_spin_unlock(kbdev, lock_flags); ++#endif + } + -+ session->num_prfcnts = 0; -+ session->active = false; -+ ipa_ctrl->num_active_sessions--; -+ -+exit: -+ spin_unlock_irqrestore(&ipa_ctrl->lock, flags); -+ kbase_pm_context_idle(kbdev); -+ return ret; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} -+KBASE_EXPORT_TEST_API(kbase_ipa_control_unregister); + -+int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client, -+ u64 *values, size_t num_values, u64 *protected_time) ++#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) ++/** ++ * kbase_pm_do_poweroff_sync - Do the synchronous power down of GPU ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * This function is called at the time of system suspend or device unload ++ * to power down the GPU synchronously. This is needed as the power down of GPU ++ * would usually happen from the runtime suspend callback function (if gpu_active ++ * and gpu_idle callbacks are used) and runtime suspend operation is disabled ++ * when system suspend takes place. ++ * The function first waits for the @gpu_poweroff_wait_work to complete, which ++ * could have been enqueued after the last PM reference was released. ++ * ++ * Return: 0 on success, negative value otherwise. ++ */ ++static int kbase_pm_do_poweroff_sync(struct kbase_device *kbdev) +{ -+ struct kbase_ipa_control *ipa_ctrl; -+ struct kbase_ipa_control_session *session; -+ size_t i; ++ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + unsigned long flags; -+ bool gpu_ready; -+ -+ if (WARN_ON(unlikely(kbdev == NULL))) -+ return -ENODEV; -+ -+ if (WARN_ON(client == NULL) || WARN_ON(values == NULL)) { -+ dev_err(kbdev->dev, "%s: wrong input arguments", __func__); -+ return -EINVAL; -+ } -+ -+ ipa_ctrl = &kbdev->csf.ipa_control; -+ session = (struct kbase_ipa_control_session *)client; ++ int ret = 0; + -+ if (!session->active) { -+ dev_err(kbdev->dev, -+ "%s: attempt to query inactive session", __func__); -+ return -EINVAL; -+ } ++ WARN_ON(kbdev->pm.active_count); + -+ if (WARN_ON(num_values < session->num_prfcnts)) { -+ dev_err(kbdev->dev, -+ "%s: not enough space (%zu) to return all counter values (%zu)", -+ __func__, num_values, session->num_prfcnts); -+ return -EINVAL; -+ } ++ kbase_pm_wait_for_poweroff_work_complete(kbdev); + ++ kbase_pm_lock(kbdev); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ gpu_ready = kbdev->pm.backend.gpu_ready; -+ -+ for (i = 0; i < session->num_prfcnts; i++) { -+ struct kbase_ipa_control_prfcnt *prfcnt = &session->prfcnts[i]; ++ WARN_ON(backend->poweroff_wait_in_progress); ++ WARN_ON(backend->gpu_sleep_mode_active); ++ if (backend->gpu_powered) { + -+ calc_prfcnt_delta(kbdev, prfcnt, gpu_ready); -+ /* Return all the accumulated difference */ -+ values[i] = prfcnt->accumulated_diff; -+ prfcnt->accumulated_diff = 0; -+ } ++ backend->mcu_desired = false; ++ backend->l2_desired = false; ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ if (protected_time) { -+ u64 time_now = ktime_get_raw_ns(); ++ ret = kbase_pm_wait_for_desired_state(kbdev); ++ if (ret) { ++ dev_warn( ++ kbdev->dev, ++ "Wait for pm state change failed on synchronous power off"); ++ ret = -EBUSY; ++ goto out; ++ } + -+ /* This is the amount of protected-mode time spent prior to -+ * the current protm period. ++ /* Due to the power policy, GPU could have been kept active ++ * throughout and so need to invoke the idle callback before ++ * the power down. + */ -+ *protected_time = session->protm_time; ++ if (backend->callback_power_runtime_gpu_idle && ++ !backend->gpu_idled) { ++ backend->callback_power_runtime_gpu_idle(kbdev); ++ backend->gpu_idled = true; ++ } + -+ if (kbdev->protected_mode) { -+ *protected_time += -+ time_now - MAX(session->last_query_time, -+ ipa_ctrl->protm_start); ++ if (!kbase_pm_clock_off(kbdev)) { ++ dev_warn( ++ kbdev->dev, ++ "Failed to turn off GPU clocks on synchronous power off, MMU faults pending"); ++ ret = -EBUSY; + } -+ session->last_query_time = time_now; -+ session->protm_time = 0; ++ } else { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ for (i = session->num_prfcnts; i < num_values; i++) -+ values[i] = 0; -+ -+ return 0; ++out: ++ kbase_pm_unlock(kbdev); ++ return ret; +} -+KBASE_EXPORT_TEST_API(kbase_ipa_control_query); ++#endif + -+void kbase_ipa_control_handle_gpu_power_off(struct kbase_device *kbdev) ++void kbase_pm_do_poweroff(struct kbase_device *kbdev) +{ -+ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; -+ size_t session_idx; -+ int ret; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ /* GPU should still be ready for use when this function gets called */ -+ WARN_ON(!kbdev->pm.backend.gpu_ready); ++ unsigned long flags; + -+ /* Interrupts are already disabled and interrupt state is also saved */ -+ spin_lock(&ipa_ctrl->lock); ++ lockdep_assert_held(&kbdev->pm.lock); + -+ /* First disable the automatic sampling through TIMER */ -+ kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), 0); -+ ret = wait_status(kbdev, STATUS_TIMER_ENABLED); -+ if (ret) { -+ dev_err(kbdev->dev, -+ "Wait for disabling of IPA control timer failed: %d", -+ ret); -+ } ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ /* Now issue the manual SAMPLE command */ -+ kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_SAMPLE); -+ ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); -+ if (ret) { -+ dev_err(kbdev->dev, -+ "Wait for the completion of manual sample failed: %d", -+ ret); -+ } ++ if (!kbdev->pm.backend.gpu_powered) ++ goto unlock_hwaccess; + -+ for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS; -+ session_idx++) { ++ if (kbdev->pm.backend.poweroff_wait_in_progress) ++ goto unlock_hwaccess; + -+ struct kbase_ipa_control_session *session = -+ &ipa_ctrl->sessions[session_idx]; ++#if MALI_USE_CSF ++ kbdev->pm.backend.mcu_desired = false; ++#else ++ /* Force all cores off */ ++ kbdev->pm.backend.shaders_desired = false; ++#endif ++ kbdev->pm.backend.l2_desired = false; + -+ if (session->active) { -+ size_t i; ++ kbdev->pm.backend.poweroff_wait_in_progress = true; ++ kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = true; + -+ for (i = 0; i < session->num_prfcnts; i++) { -+ struct kbase_ipa_control_prfcnt *prfcnt = -+ &session->prfcnts[i]; ++ /* l2_desired being false should cause the state machine to ++ * start powering off the L2. When it actually is powered off, ++ * the interrupt handler will call kbase_pm_l2_update_state() ++ * again, which will trigger the kbase_pm_gpu_poweroff_wait_wq. ++ * Callers of this function will need to wait on poweroff_wait. ++ */ ++ kbase_pm_update_state(kbdev); + -+ calc_prfcnt_delta(kbdev, prfcnt, true); -+ } -+ } -+ } -+ spin_unlock(&ipa_ctrl->lock); ++unlock_hwaccess: ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + -+void kbase_ipa_control_handle_gpu_power_on(struct kbase_device *kbdev) ++static bool is_poweroff_in_progress(struct kbase_device *kbdev) +{ -+ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; -+ int ret; ++ bool ret; ++ unsigned long flags; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ret = (kbdev->pm.backend.poweroff_wait_in_progress == false); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ /* GPU should have become ready for use when this function gets called */ -+ WARN_ON(!kbdev->pm.backend.gpu_ready); ++ return ret; ++} + -+ /* Interrupts are already disabled and interrupt state is also saved */ -+ spin_lock(&ipa_ctrl->lock); ++void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev) ++{ ++ wait_event_killable(kbdev->pm.backend.poweroff_wait, ++ is_poweroff_in_progress(kbdev)); ++} ++KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete); + -+ ret = update_select_registers(kbdev); -+ if (ret) { -+ dev_err(kbdev->dev, -+ "Failed to reconfigure the select registers: %d", ret); -+ } ++/** ++ * is_gpu_powered_down - Check whether GPU is powered down ++ * ++ * @kbdev: kbase device ++ * ++ * Return: true if GPU is powered down, false otherwise ++ */ ++static bool is_gpu_powered_down(struct kbase_device *kbdev) ++{ ++ bool ret; ++ unsigned long flags; + -+ /* Accumulator registers would not contain any sample after GPU power -+ * cycle if the timer has not been enabled first. Initialize all sessions. -+ */ -+ ret = session_gpu_start(kbdev, ipa_ctrl, NULL); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ret = !kbdev->pm.backend.gpu_powered; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ spin_unlock(&ipa_ctrl->lock); ++ return ret; +} + -+void kbase_ipa_control_handle_gpu_reset_pre(struct kbase_device *kbdev) ++void kbase_pm_wait_for_gpu_power_down(struct kbase_device *kbdev) +{ -+ /* A soft reset is treated as a power down */ -+ kbase_ipa_control_handle_gpu_power_off(kbdev); ++ wait_event_killable(kbdev->pm.backend.poweroff_wait, ++ is_gpu_powered_down(kbdev)); +} -+KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_reset_pre); ++KBASE_EXPORT_TEST_API(kbase_pm_wait_for_gpu_power_down); + -+void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev) ++int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, ++ unsigned int flags) +{ -+ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; ++ unsigned long irq_flags; + int ret; -+ u32 status; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ /* GPU should have become ready for use when this function gets called */ -+ WARN_ON(!kbdev->pm.backend.gpu_ready); ++ kbase_pm_lock(kbdev); + -+ /* Interrupts are already disabled and interrupt state is also saved */ -+ spin_lock(&ipa_ctrl->lock); ++ /* A suspend won't happen during startup/insmod */ ++ KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); + -+ /* Check the status reset bit is set before acknowledging it */ -+ status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); -+ if (status & STATUS_RESET) { -+ /* Acknowledge the reset command */ -+ kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_RESET_ACK); -+ ret = wait_status(kbdev, STATUS_RESET); -+ if (ret) { -+ dev_err(kbdev->dev, -+ "Wait for the reset ack command failed: %d", -+ ret); -+ } ++ /* Power up the GPU, don't enable IRQs as we are not ready to receive ++ * them ++ */ ++ ret = kbase_pm_init_hw(kbdev, flags); ++ if (ret) { ++ kbase_pm_unlock(kbdev); ++ return ret; + } ++#if MALI_USE_CSF ++ kbdev->pm.debug_core_mask = ++ kbdev->gpu_props.props.raw_props.shader_present; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); ++ /* Set the initial value for 'shaders_avail'. It would be later ++ * modified only from the MCU state machine, when the shader core ++ * allocation enable mask request has completed. So its value would ++ * indicate the mask of cores that are currently being used by FW for ++ * the allocation of endpoints requested by CSGs. ++ */ ++ kbdev->pm.backend.shaders_avail = kbase_pm_ca_get_core_mask(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++#else ++ kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] = ++ kbdev->pm.debug_core_mask[1] = ++ kbdev->pm.debug_core_mask[2] = ++ kbdev->gpu_props.props.raw_props.shader_present; ++#endif + -+ spin_unlock(&ipa_ctrl->lock); ++ /* Pretend the GPU is active to prevent a power policy turning the GPU ++ * cores off ++ */ ++ kbdev->pm.active_count = 1; ++#if MALI_USE_CSF && KBASE_PM_RUNTIME ++ if (kbdev->pm.backend.callback_power_runtime_gpu_active) { ++ /* Take the RPM reference count to match with the internal ++ * PM reference count ++ */ ++ kbdev->pm.backend.callback_power_runtime_gpu_active(kbdev); ++ WARN_ON(kbdev->pm.backend.gpu_idled); ++ } ++#endif + -+ kbase_ipa_control_handle_gpu_power_on(kbdev); ++ spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, ++ irq_flags); ++ /* Ensure cycle counter is off */ ++ kbdev->pm.backend.gpu_cycle_counter_requests = 0; ++ spin_unlock_irqrestore( ++ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, ++ irq_flags); ++ ++ /* We are ready to receive IRQ's now as power policy is set up, so ++ * enable them now. ++ */ ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ kbdev->pm.backend.driver_ready_for_irqs = true; ++#endif ++ kbase_pm_enable_interrupts(kbdev); ++ ++ WARN_ON(!kbdev->pm.backend.gpu_powered); ++ /* GPU has been powered up (by kbase_pm_init_hw) and interrupts have ++ * been enabled, so GPU is ready for use and PM state machine can be ++ * exercised from this point onwards. ++ */ ++ kbdev->pm.backend.gpu_ready = true; ++ ++ /* Turn on the GPU and any cores needed by the policy */ ++#if MALI_USE_CSF ++ /* Turn on the L2 caches, needed for firmware boot */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); ++ kbdev->pm.backend.l2_desired = true; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++#endif ++ kbase_pm_do_poweron(kbdev, false); ++ kbase_pm_unlock(kbdev); ++ ++ return 0; +} -+KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_reset_post); + -+#ifdef KBASE_PM_RUNTIME -+void kbase_ipa_control_handle_gpu_sleep_enter(struct kbase_device *kbdev) ++void kbase_hwaccess_pm_halt(struct kbase_device *kbdev) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ if (kbdev->pm.backend.mcu_state == KBASE_MCU_IN_SLEEP) { -+ /* GPU Sleep is treated as a power down */ -+ kbase_ipa_control_handle_gpu_power_off(kbdev); ++#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) ++ WARN_ON(kbase_pm_do_poweroff_sync(kbdev)); ++#else ++ mutex_lock(&kbdev->pm.lock); ++ kbase_pm_do_poweroff(kbdev); ++ mutex_unlock(&kbdev->pm.lock); + -+ /* SELECT_CSHW register needs to be cleared to prevent any -+ * IPA control message to be sent to the top level GPU HWCNT. -+ */ -+ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_LO), 0); -+ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_HI), 0); ++ kbase_pm_wait_for_poweroff_work_complete(kbdev); ++#endif ++} + -+ /* No need to issue the APPLY command here */ ++KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt); ++ ++void kbase_hwaccess_pm_term(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0); ++ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0); ++ ++ cancel_work_sync(&kbdev->pm.backend.hwcnt_disable_work); ++ ++ if (kbdev->pm.backend.hwcnt_disabled) { ++ unsigned long flags; ++#if MALI_USE_CSF ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++#else ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++#endif ++ } ++ ++ /* Free any resources the policy allocated */ ++ kbase_pm_state_machine_term(kbdev); ++ kbase_pm_policy_term(kbdev); ++ kbase_pm_ca_term(kbdev); ++ ++#if !MALI_USE_CSF ++ /* Shut down the metrics subsystem */ ++ kbasep_pm_metrics_term(kbdev); ++#else ++ if (WARN_ON(mutex_is_locked(&kbdev->pm.backend.policy_change_lock))) { ++ mutex_lock(&kbdev->pm.backend.policy_change_lock); ++ mutex_unlock(&kbdev->pm.backend.policy_change_lock); + } ++ mutex_destroy(&kbdev->pm.backend.policy_change_lock); ++#endif ++ ++ destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq); +} -+KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_sleep_enter); + -+void kbase_ipa_control_handle_gpu_sleep_exit(struct kbase_device *kbdev) ++void kbase_pm_power_changed(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_pm_update_state(kbdev); ++ ++#if !MALI_USE_CSF ++ kbase_backend_slot_update(kbdev); ++#endif /* !MALI_USE_CSF */ ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} ++ ++#if MALI_USE_CSF ++void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); ++ lockdep_assert_held(&kbdev->pm.lock); + -+ if (kbdev->pm.backend.mcu_state == KBASE_MCU_IN_SLEEP) { -+ /* To keep things simple, currently exit from -+ * GPU Sleep is treated as a power on event where -+ * all 4 SELECT registers are reconfigured. -+ * On exit from sleep, reconfiguration is needed -+ * only for the SELECT_CSHW register. -+ */ -+ kbase_ipa_control_handle_gpu_power_on(kbdev); ++ kbdev->pm.debug_core_mask = new_core_mask; ++ kbase_pm_update_dynamic_cores_onoff(kbdev); ++} ++KBASE_EXPORT_TEST_API(kbase_pm_set_debug_core_mask); ++#else ++void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, ++ u64 new_core_mask_js0, u64 new_core_mask_js1, ++ u64 new_core_mask_js2) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ lockdep_assert_held(&kbdev->pm.lock); ++ ++ if (kbase_dummy_job_wa_enabled(kbdev)) { ++ dev_warn_once(kbdev->dev, "Change of core mask not supported for slot 0 as dummy job WA is enabled"); ++ new_core_mask_js0 = kbdev->pm.debug_core_mask[0]; + } ++ ++ kbdev->pm.debug_core_mask[0] = new_core_mask_js0; ++ kbdev->pm.debug_core_mask[1] = new_core_mask_js1; ++ kbdev->pm.debug_core_mask[2] = new_core_mask_js2; ++ kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | ++ new_core_mask_js2; ++ ++ kbase_pm_update_dynamic_cores_onoff(kbdev); +} -+KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_sleep_exit); -+#endif ++#endif /* MALI_USE_CSF */ + -+#if MALI_UNIT_TEST -+void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev, -+ u32 clk_index, u32 clk_rate_hz) ++void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev) +{ -+ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; -+ struct kbase_ipa_control_listener_data *listener_data = -+ ipa_ctrl->rtm_listener_data; ++ kbase_pm_update_active(kbdev); ++} + -+ kbase_ipa_control_rate_change_notify(&listener_data->listener, -+ clk_index, clk_rate_hz); ++void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev) ++{ ++ kbase_pm_update_active(kbdev); +} -+KBASE_EXPORT_TEST_API(kbase_ipa_control_rate_change_notify_test); ++ ++int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) ++{ ++ int ret = 0; ++ ++#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) ++ ret = kbase_pm_do_poweroff_sync(kbdev); ++ if (ret) ++ return ret; ++#else ++ /* Force power off the GPU and all cores (regardless of policy), only ++ * after the PM active count reaches zero (otherwise, we risk turning it ++ * off prematurely) ++ */ ++ kbase_pm_lock(kbdev); ++ ++ kbase_pm_do_poweroff(kbdev); ++ ++#if !MALI_USE_CSF ++ kbase_backend_timer_suspend(kbdev); ++#endif /* !MALI_USE_CSF */ ++ ++ kbase_pm_unlock(kbdev); ++ ++ kbase_pm_wait_for_poweroff_work_complete(kbdev); +#endif + -+void kbase_ipa_control_protm_entered(struct kbase_device *kbdev) ++ WARN_ON(kbdev->pm.backend.gpu_powered); ++ WARN_ON(atomic_read(&kbdev->faults_pending)); ++ ++ if (kbdev->pm.backend.callback_power_suspend) ++ kbdev->pm.backend.callback_power_suspend(kbdev); ++ ++ return ret; ++} ++ ++void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) +{ -+ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; ++ kbase_pm_lock(kbdev); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ ipa_ctrl->protm_start = ktime_get_raw_ns(); ++ kbdev->pm.suspending = false; ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (kbase_pm_is_gpu_lost(kbdev)) { ++ dev_dbg(kbdev->dev, "%s: GPU lost in progress\n", __func__); ++ kbase_pm_unlock(kbdev); ++ return; ++ } ++#endif ++ kbase_pm_do_poweron(kbdev, true); ++ ++#if !MALI_USE_CSF ++ kbase_backend_timer_resume(kbdev); ++#endif /* !MALI_USE_CSF */ ++ ++ wake_up_all(&kbdev->pm.resume_wait); ++ kbase_pm_unlock(kbdev); +} + -+void kbase_ipa_control_protm_exited(struct kbase_device *kbdev) ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) +{ -+ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; -+ size_t i; -+ u64 time_now = ktime_get_raw_ns(); -+ u32 status; ++ unsigned long flags; ++ ktime_t end_timestamp = ktime_get_raw(); ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (!kbdev->arb.arb_if) ++ return; + -+ for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) { ++ mutex_lock(&kbdev->pm.lock); ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ if (kbdev->pm.backend.gpu_powered && ++ !kbase_pm_is_gpu_lost(kbdev)) { ++ kbase_pm_set_gpu_lost(kbdev, true); + -+ struct kbase_ipa_control_session *session = -+ &ipa_ctrl->sessions[i]; ++ /* GPU is no longer mapped to VM. So no interrupts will ++ * be received and Mali registers have been replaced by ++ * dummy RAM ++ */ ++ WARN(!kbase_is_gpu_removed(kbdev), ++ "GPU is still available after GPU lost event\n"); + -+ if (session->active) { -+ u64 protm_time = time_now - MAX(session->last_query_time, -+ ipa_ctrl->protm_start); ++ /* Full GPU reset will have been done by hypervisor, so ++ * cancel ++ */ ++ atomic_set(&kbdev->hwaccess.backend.reset_gpu, ++ KBASE_RESET_GPU_NOT_PENDING); ++ hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); ++ kbase_synchronize_irqs(kbdev); + -+ session->protm_time += protm_time; ++ /* Clear all jobs running on the GPU */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->protected_mode = false; ++ kbase_backend_reset(kbdev, &end_timestamp); ++ kbase_pm_metrics_update(kbdev, NULL); ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ /* Cancel any pending HWC dumps */ ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING || ++ kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; ++ kbdev->hwcnt.backend.triggered = 1; ++ wake_up(&kbdev->hwcnt.backend.wait); + } ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + } ++ mutex_unlock(&arb_vm_state->vm_state_lock); ++ mutex_unlock(&kbdev->pm.lock); ++} + -+ /* Acknowledge the protected_mode bit in the IPA_CONTROL STATUS -+ * register ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ ++#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) ++int kbase_pm_force_mcu_wakeup_after_sleep(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ ++ lockdep_assert_held(&kbdev->pm.lock); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ /* Set the override flag to force the power up of L2 cache */ ++ kbdev->pm.backend.gpu_wakeup_override = true; ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ return kbase_pm_wait_for_desired_state(kbdev); ++} ++ ++static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ int ret; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ lockdep_assert_held(&kbdev->pm.lock); ++ ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ /* In case of no active CSG on slot, powering up L2 could be skipped and ++ * proceed directly to suspend GPU. ++ * ToDo: firmware has to be reloaded after wake-up as no halt command ++ * has been sent when GPU was put to sleep mode. + */ -+ status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); -+ if (status & STATUS_PROTECTED_MODE) { -+ int ret; ++ if (!kbase_csf_scheduler_get_nr_active_csgs(kbdev)) ++ dev_info( ++ kbdev->dev, ++ "No active CSGs. Can skip the power up of L2 and go for suspension directly"); ++#endif + -+ /* Acknowledge the protm command */ -+ kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), -+ COMMAND_PROTECTED_ACK); -+ ret = wait_status(kbdev, STATUS_PROTECTED_MODE); -+ if (ret) { -+ dev_err(kbdev->dev, -+ "Wait for the protm ack command failed: %d", -+ ret); -+ } ++ ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev); ++ if (ret) { ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ dev_warn( ++ kbdev->dev, ++ "Waiting for MCU to wake up failed on runtime suspend"); ++ kbdev->pm.backend.gpu_wakeup_override = false; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return ret; ++ } ++ ++ /* Check if a Doorbell mirror interrupt occurred meanwhile */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (kbdev->pm.backend.gpu_sleep_mode_active && ++ kbdev->pm.backend.exit_gpu_sleep_mode) { ++ dev_dbg(kbdev->dev, "DB mirror interrupt occurred during runtime suspend after L2 power up"); ++ kbdev->pm.backend.gpu_wakeup_override = false; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return -EBUSY; ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* Need to release the kbdev->pm.lock to avoid lock ordering issue ++ * with kctx->reg.lock, which is taken if the sync wait condition is ++ * evaluated after the CSG suspend operation. ++ */ ++ kbase_pm_unlock(kbdev); ++ ret = kbase_csf_scheduler_handle_runtime_suspend(kbdev); ++ kbase_pm_lock(kbdev); ++ ++ /* Power down L2 cache */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->pm.backend.gpu_wakeup_override = false; ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ /* After re-acquiring the kbdev->pm.lock, check if the device ++ * became active (or active then idle) meanwhile. ++ */ ++ if (kbdev->pm.active_count || ++ kbdev->pm.backend.poweroff_wait_in_progress) { ++ dev_dbg(kbdev->dev, ++ "Device became active on runtime suspend after suspending Scheduler"); ++ ret = -EBUSY; + } ++ ++ if (ret) ++ return ret; ++ ++ ret = kbase_pm_wait_for_desired_state(kbdev); ++ if (ret) ++ dev_warn(kbdev->dev, "Wait for power down failed on runtime suspend"); ++ ++ return ret; +} + -diff --git a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h ++int kbase_pm_handle_runtime_suspend(struct kbase_device *kbdev) ++{ ++ enum kbase_mcu_state mcu_state; ++ bool exit_early = false; ++ unsigned long flags; ++ int ret = 0; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ /* This check is needed for the case where Kbase had invoked the ++ * @power_off_callback directly. ++ */ ++ if (!kbdev->pm.backend.gpu_powered) { ++ dev_dbg(kbdev->dev, "GPU already powered down on runtime suspend"); ++ exit_early = true; ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ if (exit_early) ++ goto out; ++ ++ ret = kbase_reset_gpu_try_prevent(kbdev); ++ if (ret == -ENOMEM) { ++ dev_dbg(kbdev->dev, "Quit runtime suspend as GPU is in bad state"); ++ /* Finish the runtime suspend, no point in trying again as GPU is ++ * in irrecoverable bad state. ++ */ ++ goto out; ++ } else if (ret) { ++ dev_dbg(kbdev->dev, "Quit runtime suspend for failing to prevent gpu reset"); ++ ret = -EBUSY; ++ goto out; ++ } ++ ++ kbase_csf_scheduler_lock(kbdev); ++ kbase_pm_lock(kbdev); ++ ++ /* ++ * This is to handle the case where GPU device becomes active and idle ++ * very quickly whilst the runtime suspend callback is executing. ++ * This is useful for the following scenario :- ++ * - GPU goes idle and pm_callback_runtime_gpu_idle() is called. ++ * - Auto-suspend timer expires and kbase_device_runtime_suspend() ++ * is called. ++ * - GPU becomes active and pm_callback_runtime_gpu_active() calls ++ * pm_runtime_get(). ++ * - Shortly after that GPU becomes idle again. ++ * - kbase_pm_handle_runtime_suspend() gets called. ++ * - pm_callback_runtime_gpu_idle() is called. ++ * ++ * We do not want to power down the GPU immediately after it goes idle. ++ * So if we notice that GPU had become active when the runtime suspend ++ * had already kicked in, we abort the runtime suspend. ++ * By aborting the runtime suspend, we defer the power down of GPU. ++ * ++ * This check also helps prevent warnings regarding L2 and MCU states ++ * inside the pm_handle_power_off() function. The warning stems from ++ * the fact that pm.lock is released before invoking Scheduler function ++ * to suspend the CSGs. ++ */ ++ if (kbdev->pm.active_count || ++ kbdev->pm.backend.poweroff_wait_in_progress) { ++ dev_dbg(kbdev->dev, "Device became active on runtime suspend"); ++ ret = -EBUSY; ++ goto unlock; ++ } ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (kbdev->pm.backend.gpu_sleep_mode_active && ++ kbdev->pm.backend.exit_gpu_sleep_mode) { ++ dev_dbg(kbdev->dev, "DB mirror interrupt occurred during runtime suspend before L2 power up"); ++ ret = -EBUSY; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ goto unlock; ++ } ++ ++ mcu_state = kbdev->pm.backend.mcu_state; ++ WARN_ON(!kbase_pm_is_mcu_inactive(kbdev, mcu_state)); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ if (mcu_state == KBASE_MCU_IN_SLEEP) { ++ ret = pm_handle_mcu_sleep_on_runtime_suspend(kbdev); ++ if (ret) ++ goto unlock; ++ } ++ ++ /* Disable interrupts and turn off the GPU clocks */ ++ if (!kbase_pm_clock_off(kbdev)) { ++ dev_warn(kbdev->dev, "Failed to turn off GPU clocks on runtime suspend, MMU faults pending"); ++ ++ WARN_ON(!kbdev->poweroff_pending); ++ /* Previous call to kbase_pm_clock_off() would have disabled ++ * the interrupts and also synchronized with the interrupt ++ * handlers, so more fault work items can't be enqueued. ++ * ++ * Can't wait for the completion of MMU fault work items as ++ * there is a possibility of a deadlock since the fault work ++ * items would do the group termination which requires the ++ * Scheduler lock. ++ */ ++ ret = -EBUSY; ++ goto unlock; ++ } ++ ++ wake_up(&kbdev->pm.backend.poweroff_wait); ++ WARN_ON(kbdev->pm.backend.gpu_powered); ++ dev_dbg(kbdev->dev, "GPU power down complete"); ++ ++unlock: ++ kbase_pm_unlock(kbdev); ++ kbase_csf_scheduler_unlock(kbdev); ++ kbase_reset_gpu_allow(kbdev); ++out: ++ if (ret) { ++ ret = -EBUSY; ++ pm_runtime_mark_last_busy(kbdev->dev); ++ } ++ ++ return ret; ++} ++#endif +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c new file mode 100644 -index 000000000..69ff8973b +index 000000000..e2b0a9192 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h -@@ -0,0 +1,271 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c +@@ -0,0 +1,150 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -138869,266 +138251,447 @@ index 000000000..69ff8973b + * + */ + -+#ifndef _KBASE_CSF_IPA_CONTROL_H_ -+#define _KBASE_CSF_IPA_CONTROL_H_ ++/* ++ * Base kernel core availability APIs ++ */ + +#include ++#include ++#include ++#include ++#include ++ ++int kbase_pm_ca_init(struct kbase_device *kbdev) ++{ ++#ifdef CONFIG_MALI_BIFROST_DEVFREQ ++ struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; ++ ++ if (kbdev->current_core_mask) ++ pm_backend->ca_cores_enabled = kbdev->current_core_mask; ++ else ++ pm_backend->ca_cores_enabled = ++ kbdev->gpu_props.props.raw_props.shader_present; ++#endif ++ ++ return 0; ++} ++ ++void kbase_pm_ca_term(struct kbase_device *kbdev) ++{ ++} ++ ++#ifdef CONFIG_MALI_BIFROST_DEVFREQ ++void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) ++{ ++ struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; ++ unsigned long flags; ++#if MALI_USE_CSF ++ u64 old_core_mask = 0; ++#endif ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++#if MALI_USE_CSF ++ if (!(core_mask & kbdev->pm.debug_core_mask)) { ++ dev_err(kbdev->dev, ++ "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n", ++ core_mask, kbdev->pm.debug_core_mask); ++ goto unlock; ++ } ++ ++ old_core_mask = pm_backend->ca_cores_enabled; ++#else ++ if (!(core_mask & kbdev->pm.debug_core_mask_all)) { ++ dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n", ++ core_mask, kbdev->pm.debug_core_mask_all); ++ goto unlock; ++ } ++ ++ if (kbase_dummy_job_wa_enabled(kbdev)) { ++ dev_err_once(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled"); ++ goto unlock; ++ } ++#endif /* MALI_USE_CSF */ ++ pm_backend->ca_cores_enabled = core_mask; ++ ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++#if MALI_USE_CSF ++ /* Check if old_core_mask contained the undesired cores and wait ++ * for those cores to get powered down ++ */ ++ if ((core_mask & old_core_mask) != old_core_mask) { ++ if (kbase_pm_wait_for_cores_down_scale(kbdev)) { ++ dev_warn(kbdev->dev, ++ "Wait for update of core_mask from %llx to %llx failed", ++ old_core_mask, core_mask); ++ } ++ } ++#endif ++ ++ dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n", ++ pm_backend->ca_cores_enabled); ++ ++ return; ++unlock: ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} ++KBASE_EXPORT_TEST_API(kbase_devfreq_set_core_mask); ++#endif ++ ++u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) ++{ ++#if MALI_USE_CSF ++ u64 debug_core_mask = kbdev->pm.debug_core_mask; ++#else ++ u64 debug_core_mask = kbdev->pm.debug_core_mask_all; ++#endif ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++#ifdef CONFIG_MALI_BIFROST_DEVFREQ ++ /* ++ * Although in the init we let the pm_backend->ca_cores_enabled to be ++ * the max config (it uses the base_gpu_props), at this function we need ++ * to limit it to be a subgroup of the curr config, otherwise the ++ * shaders state machine on the PM does not evolve. ++ */ ++ return kbdev->gpu_props.curr_config.shader_present & ++ kbdev->pm.backend.ca_cores_enabled & ++ debug_core_mask; ++#else ++ return kbdev->gpu_props.curr_config.shader_present & ++ debug_core_mask; ++#endif ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask); ++ ++u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ return (((1ull) << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1); ++#elif MALI_USE_CSF ++ return kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); ++#else ++ return kbdev->pm.backend.pm_shaders_core_mask; ++#endif ++} +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.h +new file mode 100644 +index 000000000..90dcaf5b6 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.h +@@ -0,0 +1,88 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + +/* -+ * Maximum index accepted to configure an IPA Control performance counter. ++ * Base kernel core availability APIs + */ -+#define KBASE_IPA_CONTROL_CNT_MAX_IDX ((u8)64 * 3) ++ ++#ifndef _KBASE_PM_CA_H_ ++#define _KBASE_PM_CA_H_ + +/** -+ * struct kbase_ipa_control_perf_counter - Performance counter description ++ * kbase_pm_ca_init - Initialize core availability framework + * -+ * @scaling_factor: Scaling factor by which the counter's value shall be -+ * multiplied. A scaling factor of 1 corresponds to units -+ * of 1 second if values are normalised by GPU frequency. -+ * @gpu_norm: Indicating whether counter values shall be normalized by -+ * GPU frequency. If true, returned values represent -+ * an interval of time expressed in seconds (when the scaling -+ * factor is set to 1). -+ * @type: Type of counter block for performance counter. -+ * @idx: Index of the performance counter inside the block. -+ * It may be dependent on GPU architecture. -+ * It cannot be greater than KBASE_IPA_CONTROL_CNT_MAX_IDX. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * This structure is used by clients of the IPA Control component to describe -+ * a performance counter that they intend to read. The counter is identified -+ * by block and index. In addition to that, the client also specifies how -+ * values shall be represented. Raw values are a number of GPU cycles; -+ * if normalized, they are divided by GPU frequency and become an interval -+ * of time expressed in seconds, since the GPU frequency is given in Hz. -+ * The client may specify a scaling factor to multiply counter values before -+ * they are divided by frequency, in case the unit of time of 1 second is -+ * too low in resolution. For instance: a scaling factor of 1000 implies -+ * that the returned value is a time expressed in milliseconds; a scaling -+ * factor of 1000 * 1000 implies that the returned value is a time expressed -+ * in microseconds. ++ * Must be called before calling any other core availability function ++ * ++ * Return: 0 if the core availability framework was successfully initialized, ++ * -errno otherwise + */ -+struct kbase_ipa_control_perf_counter { -+ u64 scaling_factor; -+ bool gpu_norm; -+ enum kbase_ipa_core_type type; -+ u8 idx; -+}; ++int kbase_pm_ca_init(struct kbase_device *kbdev); + +/** -+ * kbase_ipa_control_init - Initialize the IPA Control component ++ * kbase_pm_ca_term - Terminate core availability framework + * -+ * @kbdev: Pointer to Kbase device. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ -+void kbase_ipa_control_init(struct kbase_device *kbdev); ++void kbase_pm_ca_term(struct kbase_device *kbdev); + +/** -+ * kbase_ipa_control_term - Terminate the IPA Control component ++ * kbase_pm_ca_get_core_mask - Get currently available shaders core mask + * -+ * @kbdev: Pointer to Kbase device. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Returns a mask of the currently available shader cores. ++ * Calls into the core availability policy ++ * ++ * Return: The bit mask of available cores + */ -+void kbase_ipa_control_term(struct kbase_device *kbdev); ++u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev); + +/** -+ * kbase_ipa_control_register - Register a client to the IPA Control component ++ * kbase_pm_ca_update_core_status - Update core status + * -+ * @kbdev: Pointer to Kbase device. -+ * @perf_counters: Array of performance counters the client intends to read. -+ * For each counter the client specifies block, index, -+ * scaling factor and whether it must be normalized by GPU -+ * frequency. -+ * @num_counters: Number of performance counters. It cannot exceed the total -+ * number of counters that exist on the IPA Control interface. -+ * @client: Handle to an opaque structure set by IPA Control if -+ * the registration is successful. This handle identifies -+ * a client's session and shall be provided in its future -+ * queries. ++ * @kbdev: The kbase device structure for the device (must be ++ * a valid pointer) ++ * @cores_ready: The bit mask of cores ready for job submission ++ * @cores_transitioning: The bit mask of cores that are transitioning power ++ * state + * -+ * A client needs to subscribe to the IPA Control component by declaring which -+ * performance counters it intends to read, and specifying a scaling factor -+ * and whether normalization is requested for each performance counter. -+ * The function shall configure the IPA Control interface accordingly and start -+ * a session for the client that made the request. A unique handle is returned -+ * if registration is successful in order to identify the client's session -+ * and be used for future queries. ++ * Update core availability policy with current core power status + * -+ * Return: 0 on success, negative -errno on error ++ * Calls into the core availability policy + */ -+int kbase_ipa_control_register( -+ struct kbase_device *kbdev, -+ const struct kbase_ipa_control_perf_counter *perf_counters, -+ size_t num_counters, void **client); ++void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, ++ u64 cores_transitioning); + +/** -+ * kbase_ipa_control_unregister - Unregister a client from IPA Control ++ * kbase_pm_ca_get_instr_core_mask - Get the PM state sync-ed shaders core mask + * -+ * @kbdev: Pointer to kbase device. -+ * @client: Handle to an opaque structure that identifies the client session -+ * to terminate, as returned by kbase_ipa_control_register. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * Return: 0 on success, negative -errno on error ++ * Returns a mask of the PM state synchronised shader cores for arranging ++ * HW performance counter dumps ++ * ++ * Return: The bit mask of PM state synchronised cores + */ -+int kbase_ipa_control_unregister(struct kbase_device *kbdev, -+ const void *client); ++u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev); + -+/** -+ * kbase_ipa_control_query - Query performance counters ++#endif /* _KBASE_PM_CA_H_ */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca_devfreq.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca_devfreq.h +new file mode 100644 +index 000000000..d1e4b5327 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca_devfreq.h +@@ -0,0 +1,59 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * @kbdev: Pointer to kbase device. -+ * @client: Handle to an opaque structure that identifies the client -+ * session, as returned by kbase_ipa_control_register. -+ * @values: Array of values queried from performance counters, whose -+ * length depends on the number of counters requested at -+ * the time of registration. Values are scaled and normalized -+ * and represent the difference since the last query. -+ * @num_values: Number of entries in the array of values that has been -+ * passed by the caller. It must be at least equal to the -+ * number of performance counters the client registered itself -+ * to read. -+ * @protected_time: Time spent in protected mode since last query, -+ * expressed in nanoseconds. This pointer may be NULL if the -+ * client doesn't want to know about this. ++ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * -+ * A client that has already opened a session by registering itself to read -+ * some performance counters may use this function to query the values of -+ * those counters. The values returned are normalized by GPU frequency if -+ * requested and then multiplied by the scaling factor provided at the time -+ * of registration. Values always represent a difference since the last query. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * Performance counters are not updated while the GPU operates in protected -+ * mode. For this reason, returned values may be unreliable if the GPU has -+ * been in protected mode since the last query. The function returns success -+ * in that case, but it also gives a measure of how much time has been spent -+ * in protected mode. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: 0 on success, negative -errno on error + */ -+int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client, -+ u64 *values, size_t num_values, -+ u64 *protected_time); + -+/** -+ * kbase_ipa_control_handle_gpu_power_on - Handle the GPU power on event -+ * -+ * @kbdev: Pointer to kbase device. -+ * -+ * This function is called after GPU has been powered and is ready for use. -+ * After the GPU power on, IPA Control component needs to ensure that the -+ * counters start incrementing again. ++/* ++ * A core availability policy for use with devfreq, where core masks are ++ * associated with OPPs. + */ -+void kbase_ipa_control_handle_gpu_power_on(struct kbase_device *kbdev); ++ ++#ifndef MALI_KBASE_PM_CA_DEVFREQ_H ++#define MALI_KBASE_PM_CA_DEVFREQ_H + +/** -+ * kbase_ipa_control_handle_gpu_power_off - Handle the GPU power off event ++ * struct kbasep_pm_ca_policy_devfreq - Private structure for devfreq ca policy + * -+ * @kbdev: Pointer to kbase device. ++ * @cores_desired: Cores that the policy wants to be available ++ * @cores_enabled: Cores that the policy is currently returning as available ++ * @cores_used: Cores currently powered or transitioning + * -+ * This function is called just before the GPU is powered off when it is still -+ * ready for use. -+ * IPA Control component needs to be aware of the GPU power off so that it can -+ * handle the query from Clients appropriately and return meaningful values -+ * to them. ++ * This contains data that is private to the devfreq core availability ++ * policy. + */ -+void kbase_ipa_control_handle_gpu_power_off(struct kbase_device *kbdev); ++struct kbasep_pm_ca_policy_devfreq { ++ u64 cores_desired; ++ u64 cores_enabled; ++ u64 cores_used; ++}; ++ ++extern const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops; + +/** -+ * kbase_ipa_control_handle_gpu_reset_pre - Handle the pre GPU reset event -+ * -+ * @kbdev: Pointer to kbase device. ++ * kbase_devfreq_set_core_mask - Set core mask for policy to use ++ * @kbdev: Device pointer ++ * @core_mask: New core mask + * -+ * This function is called when the GPU is about to be reset. ++ * The new core mask will have immediate effect if the GPU is powered, or will ++ * take effect when it is next powered on. + */ -+void kbase_ipa_control_handle_gpu_reset_pre(struct kbase_device *kbdev); ++void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask); + -+/** -+ * kbase_ipa_control_handle_gpu_reset_post - Handle the post GPU reset event ++#endif /* MALI_KBASE_PM_CA_DEVFREQ_H */ ++ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.c +new file mode 100644 +index 000000000..f40b75385 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.c +@@ -0,0 +1,67 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * -+ * @kbdev: Pointer to kbase device. ++ * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved. + * -+ * This function is called after the GPU has been reset. -+ */ -+void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev); -+ -+#ifdef KBASE_PM_RUNTIME -+/** -+ * kbase_ipa_control_handle_gpu_sleep_enter - Handle the pre GPU Sleep event ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * @kbdev: Pointer to kbase device. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * This function is called after MCU has been put to sleep state & L2 cache has -+ * been powered down. The top level part of GPU is still powered up when this -+ * function is called. + */ -+void kbase_ipa_control_handle_gpu_sleep_enter(struct kbase_device *kbdev); + -+/** -+ * kbase_ipa_control_handle_gpu_sleep_exit - Handle the post GPU Sleep event -+ * -+ * @kbdev: Pointer to kbase device. -+ * -+ * This function is called when L2 needs to be powered up and MCU can exit the -+ * sleep state. The top level part of GPU is powered up when this function is -+ * called. ++/* ++ * "Coarse Demand" power management policy ++ */ ++ ++#include ++#include ++ ++static bool coarse_demand_shaders_needed(struct kbase_device *kbdev) ++{ ++ return kbase_pm_is_active(kbdev); ++} ++ ++static bool coarse_demand_get_core_active(struct kbase_device *kbdev) ++{ ++ return kbase_pm_is_active(kbdev); ++} ++ ++static void coarse_demand_init(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++} ++ ++static void coarse_demand_term(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++} ++ ++/* The struct kbase_pm_policy structure for the demand power policy. + * -+ * This function must be called only if kbase_ipa_control_handle_gpu_sleep_enter() -+ * was called previously. ++ * This is the static structure that defines the demand power policy's callback ++ * and name. + */ -+void kbase_ipa_control_handle_gpu_sleep_exit(struct kbase_device *kbdev); ++const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = { ++ "coarse_demand", /* name */ ++ coarse_demand_init, /* init */ ++ coarse_demand_term, /* term */ ++ coarse_demand_shaders_needed, /* shaders_needed */ ++ coarse_demand_get_core_active, /* get_core_active */ ++ NULL, /* handle_event */ ++ KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */ ++#if MALI_USE_CSF ++ COARSE_ON_DEMAND_PM_SCHED_FLAGS, /* pm_sched_flags */ +#endif ++}; + -+#if MALI_UNIT_TEST -+/** -+ * kbase_ipa_control_rate_change_notify_test - Notify GPU rate change -+ * (only for testing) ++KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops); +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.h +new file mode 100644 +index 000000000..a947e8f55 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.h +@@ -0,0 +1,64 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * @kbdev: Pointer to kbase device. -+ * @clk_index: Index of the clock for which the change has occurred. -+ * @clk_rate_hz: Clock frequency(Hz). ++ * (C) COPYRIGHT 2012-2015, 2018, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Notify the IPA Control component about a GPU rate change. + */ -+void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev, -+ u32 clk_index, u32 clk_rate_hz); -+#endif /* MALI_UNIT_TEST */ ++ ++/* ++ * "Coarse Demand" power management policy ++ */ ++ ++#ifndef MALI_KBASE_PM_COARSE_DEMAND_H ++#define MALI_KBASE_PM_COARSE_DEMAND_H + +/** -+ * kbase_ipa_control_protm_entered - Tell IPA_CONTROL that protected mode -+ * has been entered. -+ * -+ * @kbdev: Pointer to kbase device. ++ * DOC: ++ * The "Coarse" demand power management policy has the following ++ * characteristics: ++ * - When KBase indicates that the GPU will be powered up, but we don't yet ++ * know which Job Chains are to be run: ++ * - Shader Cores are powered up, regardless of whether or not they will be ++ * needed later. ++ * - When KBase indicates that Shader Cores are needed to submit the currently ++ * queued Job Chains: ++ * - Shader Cores are kept powered, regardless of whether or not they will ++ * be needed ++ * - When KBase indicates that the GPU need not be powered: ++ * - The Shader Cores are powered off, and the GPU itself is powered off too. + * -+ * This function provides a means through which IPA_CONTROL can be informed -+ * that the GPU has entered protected mode. Since the GPU cannot access -+ * performance counters while in this mode, this information is useful as -+ * it implies (a) the values of these registers cannot change, so theres no -+ * point trying to read them, and (b) IPA_CONTROL has a means through which -+ * to record the duration of time the GPU is in protected mode, which can -+ * then be forwarded on to clients, who may wish, for example, to assume -+ * that the GPU was busy 100% of the time while in this mode. ++ * @note: ++ * - KBase indicates the GPU will be powered up when it has a User Process that ++ * has just started to submit Job Chains. ++ * - KBase indicates the GPU need not be powered when all the Job Chains from ++ * User Processes have finished, and it is waiting for a User Process to ++ * submit some more Job Chains. + */ -+void kbase_ipa_control_protm_entered(struct kbase_device *kbdev); + +/** -+ * kbase_ipa_control_protm_exited - Tell IPA_CONTROL that protected mode -+ * has been exited. -+ * -+ * @kbdev: Pointer to kbase device -+ * -+ * This function provides a means through which IPA_CONTROL can be informed -+ * that the GPU has exited from protected mode. ++ * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand ++ * policy ++ * @dummy: Dummy member - no state needed ++ * This contains data that is private to the coarse demand power policy. + */ -+void kbase_ipa_control_protm_exited(struct kbase_device *kbdev); ++struct kbasep_pm_policy_coarse_demand { ++ int dummy; ++}; + -+#endif /* _KBASE_CSF_IPA_CONTROL_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c ++extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops; ++ ++#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h new file mode 100644 -index 000000000..7a939fc33 +index 000000000..75d99a30e --- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c -@@ -0,0 +1,3289 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h +@@ -0,0 +1,649 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -139146,3284 +138709,4067 @@ index 000000000..7a939fc33 + * + */ + -+#include -+#include -+#include -+#include "mali_kbase_csf.h" -+#include "backend/gpu/mali_kbase_pm_internal.h" -+#include -+#include -+#include -+#include -+#include "mali_kbase_csf_tiler_heap.h" -+#include -+#include "mali_kbase_csf_timeout.h" -+#include -+#include -+#include "mali_kbase_csf_event.h" -+#include -+#include "mali_kbase_csf_mcu_shared_reg.h" ++/* ++ * Backend-specific Power Manager definitions ++ */ + -+#define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK) -+#define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK) ++#ifndef _KBASE_PM_HWACCESS_DEFS_H_ ++#define _KBASE_PM_HWACCESS_DEFS_H_ + -+#define CS_RING_BUFFER_MAX_SIZE ((uint32_t)(1 << 31)) /* 2GiB */ -+#define CS_RING_BUFFER_MIN_SIZE ((uint32_t)4096) ++#include "mali_kbase_pm_always_on.h" ++#include "mali_kbase_pm_coarse_demand.h" + -+#define PROTM_ALLOC_MAX_RETRIES ((u8)5) ++#if defined(CONFIG_PM_RUNTIME) || defined(CONFIG_PM) ++#define KBASE_PM_RUNTIME 1 ++#endif + -+const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = { -+ KBASE_QUEUE_GROUP_PRIORITY_HIGH, -+ KBASE_QUEUE_GROUP_PRIORITY_MEDIUM, -+ KBASE_QUEUE_GROUP_PRIORITY_LOW, -+ KBASE_QUEUE_GROUP_PRIORITY_REALTIME ++/* Forward definition - see mali_kbase.h */ ++struct kbase_device; ++struct kbase_jd_atom; ++ ++/** ++ * enum kbase_pm_core_type - The types of core in a GPU. ++ * ++ * @KBASE_PM_CORE_L2: The L2 cache ++ * @KBASE_PM_CORE_SHADER: Shader cores ++ * @KBASE_PM_CORE_TILER: Tiler cores ++ * @KBASE_PM_CORE_STACK: Core stacks ++ * ++ * These enumerated values are used in calls to ++ * - kbase_pm_get_present_cores() ++ * - kbase_pm_get_active_cores() ++ * - kbase_pm_get_trans_cores() ++ * - kbase_pm_get_ready_cores(). ++ * ++ * They specify which type of core should be acted on. These values are set in ++ * a manner that allows core_type_to_reg() function to be simpler and more ++ * efficient. ++ */ ++enum kbase_pm_core_type { ++ KBASE_PM_CORE_L2 = L2_PRESENT_LO, ++ KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO, ++ KBASE_PM_CORE_TILER = TILER_PRESENT_LO, ++ KBASE_PM_CORE_STACK = STACK_PRESENT_LO +}; -+const u8 kbasep_csf_relative_to_queue_group_priority[KBASE_QUEUE_GROUP_PRIORITY_COUNT] = { -+ BASE_QUEUE_GROUP_PRIORITY_REALTIME, -+ BASE_QUEUE_GROUP_PRIORITY_HIGH, -+ BASE_QUEUE_GROUP_PRIORITY_MEDIUM, -+ BASE_QUEUE_GROUP_PRIORITY_LOW ++ ++/* ++ * enum kbase_l2_core_state - The states used for the L2 cache & tiler power ++ * state machine. ++ */ ++enum kbase_l2_core_state { ++#define KBASEP_L2_STATE(n) KBASE_L2_ ## n, ++#include "mali_kbase_pm_l2_states.h" ++#undef KBASEP_L2_STATE +}; + ++#if MALI_USE_CSF +/* -+ * struct irq_idle_and_protm_track - Object that tracks the idle and protected mode -+ * request information in an interrupt case across -+ * groups. -+ * -+ * @protm_grp: Possibly schedulable group that requested protected mode in the interrupt. -+ * If NULL, no such case observed in the tracked interrupt case. -+ * @idle_seq: The highest priority group that notified idle. If no such instance in the -+ * interrupt case, marked with the largest field value: U32_MAX. -+ * @idle_slot: The slot number if @p idle_seq is valid in the given tracking case. ++ * enum kbase_mcu_state - The states used for the MCU state machine. + */ -+struct irq_idle_and_protm_track { -+ struct kbase_queue_group *protm_grp; -+ u32 idle_seq; -+ s8 idle_slot; ++enum kbase_mcu_state { ++#define KBASEP_MCU_STATE(n) KBASE_MCU_ ## n, ++#include "mali_kbase_pm_mcu_states.h" ++#undef KBASEP_MCU_STATE ++}; ++#endif ++ ++/* ++ * enum kbase_shader_core_state - The states used for the shaders' state machine. ++ */ ++enum kbase_shader_core_state { ++#define KBASEP_SHADER_STATE(n) KBASE_SHADERS_ ## n, ++#include "mali_kbase_pm_shader_states.h" ++#undef KBASEP_SHADER_STATE +}; + +/** -+ * kbasep_ctx_user_reg_page_mapping_term() - Terminate resources for USER Register Page. ++ * struct kbasep_pm_metrics - Metrics data collected for use by the power ++ * management framework. + * -+ * @kctx: Pointer to the kbase context ++ * @time_busy: the amount of time the GPU was busy executing jobs since the ++ * @time_period_start timestamp, in units of 256ns. This also includes ++ * time_in_protm, the time spent in protected mode, since it's assumed ++ * the GPU was busy 100% during this period. ++ * @time_idle: the amount of time the GPU was not executing jobs since the ++ * time_period_start timestamp, measured in units of 256ns. ++ * @time_in_protm: The amount of time the GPU has spent in protected mode since ++ * the time_period_start timestamp, measured in units of 256ns. ++ * @busy_cl: the amount of time the GPU was busy executing CL jobs. Note that ++ * if two CL jobs were active for 256ns, this value would be updated ++ * with 2 (2x256ns). ++ * @busy_gl: the amount of time the GPU was busy executing GL jobs. Note that ++ * if two GL jobs were active for 256ns, this value would be updated ++ * with 2 (2x256ns). + */ -+static void kbasep_ctx_user_reg_page_mapping_term(struct kbase_context *kctx) -+{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ -+ if (unlikely(kctx->csf.user_reg.vma)) -+ dev_err(kbdev->dev, "VMA for USER Register page exist on termination of ctx %d_%d", -+ kctx->tgid, kctx->id); -+ if (WARN_ON_ONCE(!list_empty(&kctx->csf.user_reg.link))) -+ list_del_init(&kctx->csf.user_reg.link); -+} ++struct kbasep_pm_metrics { ++ u32 time_busy; ++ u32 time_idle; ++#if MALI_USE_CSF ++ u32 time_in_protm; ++#else ++ u32 busy_cl[2]; ++ u32 busy_gl; ++#endif ++}; + +/** -+ * kbasep_ctx_user_reg_page_mapping_init() - Initialize resources for USER Register Page. -+ * -+ * @kctx: Pointer to the kbase context -+ * -+ * @return: 0 on success. ++ * struct kbasep_pm_metrics_state - State required to collect the metrics in ++ * struct kbasep_pm_metrics ++ * @time_period_start: time at which busy/idle measurements started ++ * @ipa_control_client: Handle returned on registering DVFS as a ++ * kbase_ipa_control client ++ * @skip_gpu_active_sanity_check: Decide whether to skip GPU_ACTIVE sanity ++ * check in DVFS utilisation calculation ++ * @gpu_active: true when the GPU is executing jobs. false when ++ * not. Updated when the job scheduler informs us a job in submitted ++ * or removed from a GPU slot. ++ * @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device. ++ * @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. ++ * @lock: spinlock protecting the kbasep_pm_metrics_state structure ++ * @platform_data: pointer to data controlled by platform specific code ++ * @kbdev: pointer to kbase device for which metrics are collected ++ * @values: The current values of the power management metrics. The ++ * kbase_pm_get_dvfs_metrics() function is used to compare these ++ * current values with the saved values from a previous invocation. ++ * @initialized: tracks whether metrics_state has been initialized or not. ++ * @timer: timer to regularly make DVFS decisions based on the power ++ * management metrics. ++ * @timer_state: atomic indicating current @timer state, on, off, or stopped. ++ * @dvfs_last: values of the PM metrics from the last DVFS tick ++ * @dvfs_diff: different between the current and previous PM metrics. + */ -+static int kbasep_ctx_user_reg_page_mapping_init(struct kbase_context *kctx) -+{ -+ INIT_LIST_HEAD(&kctx->csf.user_reg.link); -+ kctx->csf.user_reg.vma = NULL; -+ kctx->csf.user_reg.file_offset = 0; ++struct kbasep_pm_metrics_state { ++ ktime_t time_period_start; ++#if MALI_USE_CSF ++ void *ipa_control_client; ++ bool skip_gpu_active_sanity_check; ++#else ++ bool gpu_active; ++ u32 active_cl_ctx[2]; ++ u32 active_gl_ctx[3]; ++#endif ++ spinlock_t lock; + -+ return 0; -+} ++ void *platform_data; ++ struct kbase_device *kbdev; + -+static void put_user_pages_mmap_handle(struct kbase_context *kctx, -+ struct kbase_queue *queue) -+{ -+ unsigned long cookie_nr; ++ struct kbasep_pm_metrics values; + -+ lockdep_assert_held(&kctx->csf.lock); ++#ifdef CONFIG_MALI_BIFROST_DVFS ++ bool initialized; ++ struct hrtimer timer; ++ atomic_t timer_state; ++ struct kbasep_pm_metrics dvfs_last; ++ struct kbasep_pm_metrics dvfs_diff; ++#endif ++}; + -+ if (queue->handle == BASEP_MEM_INVALID_HANDLE) -+ return; ++/** ++ * struct kbasep_pm_tick_timer_state - State for the shader hysteresis timer ++ * @wq: Work queue to wait for the timer to stopped ++ * @work: Work item which cancels the timer ++ * @timer: Timer for powering off the shader cores ++ * @configured_interval: Period of GPU poweroff timer ++ * @default_ticks: User-configured number of ticks to wait after the shader ++ * power down request is received before turning off the cores ++ * @configured_ticks: Power-policy configured number of ticks to wait after the ++ * shader power down request is received before turning off ++ * the cores. For simple power policies, this is equivalent ++ * to @default_ticks. ++ * @remaining_ticks: Number of remaining timer ticks until shaders are powered off ++ * @cancel_queued: True if the cancellation work item has been queued. This is ++ * required to ensure that it is not queued twice, e.g. after ++ * a reset, which could cause the timer to be incorrectly ++ * cancelled later by a delayed workitem. ++ * @needed: Whether the timer should restart itself ++ */ ++struct kbasep_pm_tick_timer_state { ++ struct workqueue_struct *wq; ++ struct work_struct work; ++ struct hrtimer timer; + -+ cookie_nr = -+ PFN_DOWN(queue->handle - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE); ++ ktime_t configured_interval; ++ unsigned int default_ticks; ++ unsigned int configured_ticks; ++ unsigned int remaining_ticks; + -+ if (!WARN_ON(kctx->csf.user_pages_info[cookie_nr] != queue)) { -+ /* free up cookie */ -+ kctx->csf.user_pages_info[cookie_nr] = NULL; -+ bitmap_set(kctx->csf.cookies, cookie_nr, 1); -+ } ++ bool cancel_queued; ++ bool needed; ++}; + -+ queue->handle = BASEP_MEM_INVALID_HANDLE; -+} ++union kbase_pm_policy_data { ++ struct kbasep_pm_policy_always_on always_on; ++ struct kbasep_pm_policy_coarse_demand coarse_demand; ++}; + -+/* Reserve a cookie, to be returned as a handle to userspace for creating -+ * the CPU mapping of the pair of input/output pages and Hw doorbell page. -+ * Will return 0 in case of success otherwise negative on failure. ++/** ++ * struct kbase_pm_backend_data - Data stored per device for power management. ++ * ++ * @pm_current_policy: The policy that is currently actively controlling the ++ * power state. ++ * @pm_policy_data: Private data for current PM policy. This is automatically ++ * zeroed when a policy change occurs. ++ * @reset_done: Flag when a reset is complete ++ * @reset_done_wait: Wait queue to wait for changes to @reset_done ++ * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter ++ * users ++ * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests ++ * @gpu_in_desired_state_wait: Wait queue set when the GPU is in the desired ++ * state according to the L2 and shader power state ++ * machines ++ * @gpu_powered: Set to true when the GPU is powered and register ++ * accesses are possible, false otherwise. Access to this ++ * variable should be protected by: both the hwaccess_lock ++ * spinlock and the pm.lock mutex for writes; or at least ++ * one of either lock for reads. ++ * @gpu_ready: Indicates whether the GPU is in a state in which it is ++ * safe to perform PM changes. When false, the PM state ++ * machine needs to wait before making changes to the GPU ++ * power policy, DevFreq or core_mask, so as to avoid these ++ * changing while implicit GPU resets are ongoing. ++ * @pm_shaders_core_mask: Shader PM state synchronised shaders core mask. It ++ * holds the cores enabled in a hardware counters dump, ++ * and may differ from @shaders_avail when under different ++ * states and transitions. ++ * @cg1_disabled: Set if the policy wants to keep the second core group ++ * powered off ++ * @driver_ready_for_irqs: Debug state indicating whether sufficient ++ * initialization of the driver has occurred to handle ++ * IRQs ++ * @metrics: Structure to hold metrics for the GPU ++ * @shader_tick_timer: Structure to hold the shader poweroff tick timer state ++ * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress. ++ * hwaccess_lock must be held when accessing ++ * @invoke_poweroff_wait_wq_when_l2_off: flag indicating that the L2 power state ++ * machine should invoke the poweroff ++ * worker after the L2 has turned off. ++ * @poweron_required: true if a GPU power on is required. Should only be set ++ * when poweroff_wait_in_progress is true, and therefore the ++ * GPU can not immediately be powered on. pm.lock must be ++ * held when accessing ++ * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off ++ * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq ++ * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete ++ * @callback_power_on: Callback when the GPU needs to be turned on. See ++ * &struct kbase_pm_callback_conf ++ * @callback_power_off: Callback when the GPU may be turned off. See ++ * &struct kbase_pm_callback_conf ++ * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to ++ * be turned off. See &struct kbase_pm_callback_conf ++ * @callback_power_resume: Callback when a resume occurs and the GPU needs to ++ * be turned on. See &struct kbase_pm_callback_conf ++ * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See ++ * &struct kbase_pm_callback_conf ++ * @callback_power_runtime_off: Callback when the GPU may be turned off. See ++ * &struct kbase_pm_callback_conf ++ * @callback_power_runtime_idle: Optional callback invoked by runtime PM core ++ * when the GPU may be idle. See ++ * &struct kbase_pm_callback_conf ++ * @callback_soft_reset: Optional callback to software reset the GPU. See ++ * &struct kbase_pm_callback_conf ++ * @callback_power_runtime_gpu_idle: Callback invoked by Kbase when GPU has ++ * become idle. ++ * See &struct kbase_pm_callback_conf. ++ * @callback_power_runtime_gpu_active: Callback when GPU has become active and ++ * @callback_power_runtime_gpu_idle was ++ * called previously. ++ * See &struct kbase_pm_callback_conf. ++ * @ca_cores_enabled: Cores that are currently available ++ * @mcu_state: The current state of the micro-control unit, only applicable ++ * to GPUs that have such a component ++ * @l2_state: The current state of the L2 cache state machine. See ++ * &enum kbase_l2_core_state ++ * @l2_desired: True if the L2 cache should be powered on by the L2 cache state ++ * machine ++ * @l2_always_on: If true, disable powering down of l2 cache. ++ * @shaders_state: The current state of the shader state machine. ++ * @shaders_avail: This is updated by the state machine when it is in a state ++ * where it can write to the SHADER_PWRON or PWROFF registers ++ * to have the same set of available cores as specified by ++ * @shaders_desired_mask. So would precisely indicate the cores ++ * that are currently available. This is internal to shader ++ * state machine of JM GPUs and should *not* be modified ++ * elsewhere. ++ * @shaders_desired_mask: This is updated by the state machine when it is in ++ * a state where it can handle changes to the core ++ * availability (either by DVFS or sysfs). This is ++ * internal to the shader state machine and should ++ * *not* be modified elsewhere. ++ * @shaders_desired: True if the PM active count or power policy requires the ++ * shader cores to be on. This is used as an input to the ++ * shader power state machine. The current state of the ++ * cores may be different, but there should be transitions in ++ * progress that will eventually achieve this state (assuming ++ * that the policy doesn't change its mind in the mean time). ++ * @mcu_desired: True if the micro-control unit should be powered on ++ * @policy_change_clamp_state_to_off: Signaling the backend is in PM policy ++ * change transition, needs the mcu/L2 to be brought back to the ++ * off state and remain in that state until the flag is cleared. ++ * @csf_pm_sched_flags: CSF Dynamic PM control flags in accordance to the ++ * current active PM policy. This field is updated whenever a ++ * new policy is activated. ++ * @policy_change_lock: Used to serialize the policy change calls. In CSF case, ++ * the change of policy may involve the scheduler to ++ * suspend running CSGs and then reconfigure the MCU. ++ * @core_idle_wq: Workqueue for executing the @core_idle_work. ++ * @core_idle_work: Work item used to wait for undesired cores to become inactive. ++ * The work item is enqueued when Host controls the power for ++ * shader cores and down scaling of cores is performed. ++ * @gpu_sleep_supported: Flag to indicate that if GPU sleep feature can be ++ * supported by the kernel driver or not. If this ++ * flag is not set, then HW state is directly saved ++ * when GPU idle notification is received. ++ * @gpu_sleep_mode_active: Flag to indicate that the GPU needs to be in sleep ++ * mode. It is set when the GPU idle notification is ++ * received and is cleared when HW state has been ++ * saved in the runtime suspend callback function or ++ * when the GPU power down is aborted if GPU became ++ * active whilst it was in sleep mode. The flag is ++ * guarded with hwaccess_lock spinlock. ++ * @exit_gpu_sleep_mode: Flag to indicate the GPU can now exit the sleep ++ * mode due to the submission of work from Userspace. ++ * The flag is guarded with hwaccess_lock spinlock. ++ * The @gpu_sleep_mode_active flag is not immediately ++ * reset when this flag is set, this is to ensure that ++ * MCU doesn't gets disabled undesirably without the ++ * suspend of CSGs. That could happen when ++ * scheduler_pm_active() and scheduler_pm_idle() gets ++ * called before the Scheduler gets reactivated. ++ * @gpu_idled: Flag to ensure that the gpu_idle & gpu_active callbacks are ++ * always called in pair. The flag is guarded with pm.lock mutex. ++ * @gpu_wakeup_override: Flag to force the power up of L2 cache & reactivation ++ * of MCU. This is set during the runtime suspend ++ * callback function, when GPU needs to exit the sleep ++ * mode for the saving the HW state before power down. ++ * @db_mirror_interrupt_enabled: Flag tracking if the Doorbell mirror interrupt ++ * is enabled or not. ++ * @in_reset: True if a GPU is resetting and normal power manager operation is ++ * suspended ++ * @partial_shaderoff: True if we want to partial power off shader cores, ++ * it indicates a partial shader core off case, ++ * do some special operation for such case like flush ++ * L2 cache because of GPU2017-861 ++ * @protected_entry_transition_override : True if GPU reset is being used ++ * before entering the protected mode and so ++ * the reset handling behaviour is being ++ * overridden. ++ * @protected_transition_override : True if a protected mode transition is in ++ * progress and is overriding power manager ++ * behaviour. ++ * @protected_l2_override : Non-zero if the L2 cache is required during a ++ * protected mode transition. Has no effect if not ++ * transitioning. ++ * @hwcnt_desired: True if we want GPU hardware counters to be enabled. ++ * @hwcnt_disabled: True if GPU hardware counters are not enabled. ++ * @hwcnt_disable_work: Work item to disable GPU hardware counters, used if ++ * atomic disable is not possible. ++ * @gpu_clock_suspend_freq: 'opp-mali-errata-1485982' clock in opp table ++ * for safe L2 power cycle. ++ * If no opp-mali-errata-1485982 specified, ++ * the slowest clock will be taken. ++ * @gpu_clock_slow_down_wa: If true, slow down GPU clock during L2 power cycle. ++ * @gpu_clock_slow_down_desired: True if we want lower GPU clock ++ * for safe L2 power cycle. False if want GPU clock ++ * to back to normalized one. This is updated only ++ * in L2 state machine, kbase_pm_l2_update_state. ++ * @gpu_clock_slowed_down: During L2 power cycle, ++ * True if gpu clock is set at lower frequency ++ * for safe L2 power down, False if gpu clock gets ++ * restored to previous speed. This is updated only in ++ * work function, kbase_pm_gpu_clock_control_worker. ++ * @gpu_clock_control_work: work item to set GPU clock during L2 power cycle ++ * using gpu_clock_control ++ * ++ * This structure contains data for the power management framework. There is one ++ * instance of this structure per device in the system. ++ * ++ * Note: ++ * During an IRQ, @pm_current_policy can be NULL when the policy is being ++ * changed with kbase_pm_set_policy(). The change is protected under ++ * kbase_device.pm.pcower_change_lock. Direct access to this from IRQ context ++ * must therefore check for NULL. If NULL, then kbase_pm_set_policy() will ++ * re-issue the policy functions that would have been done under IRQ. + */ -+static int get_user_pages_mmap_handle(struct kbase_context *kctx, -+ struct kbase_queue *queue) -+{ -+ unsigned long cookie, cookie_nr; -+ -+ lockdep_assert_held(&kctx->csf.lock); -+ -+ if (bitmap_empty(kctx->csf.cookies, -+ KBASE_CSF_NUM_USER_IO_PAGES_HANDLE)) { -+ dev_err(kctx->kbdev->dev, -+ "No csf cookies available for allocation!"); -+ return -ENOMEM; -+ } ++struct kbase_pm_backend_data { ++ const struct kbase_pm_policy *pm_current_policy; ++ union kbase_pm_policy_data pm_policy_data; ++ bool reset_done; ++ wait_queue_head_t reset_done_wait; ++ int gpu_cycle_counter_requests; ++ spinlock_t gpu_cycle_counter_requests_lock; + -+ /* allocate a cookie */ -+ cookie_nr = find_first_bit(kctx->csf.cookies, -+ KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); -+ if (kctx->csf.user_pages_info[cookie_nr]) { -+ dev_err(kctx->kbdev->dev, -+ "Inconsistent state of csf cookies!"); -+ return -EINVAL; -+ } -+ kctx->csf.user_pages_info[cookie_nr] = queue; -+ bitmap_clear(kctx->csf.cookies, cookie_nr, 1); ++ wait_queue_head_t gpu_in_desired_state_wait; + -+ /* relocate to correct base */ -+ cookie = cookie_nr + PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE); -+ cookie <<= PAGE_SHIFT; ++ bool gpu_powered; ++ bool gpu_ready; + -+ queue->handle = (u64)cookie; ++ u64 pm_shaders_core_mask; + -+ return 0; -+} ++ bool cg1_disabled; + -+static void init_user_io_pages(struct kbase_queue *queue) -+{ -+ u32 *input_addr = (u32 *)(queue->user_io_addr); -+ u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ bool driver_ready_for_irqs; ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ + -+ input_addr[CS_INSERT_LO/4] = 0; -+ input_addr[CS_INSERT_HI/4] = 0; ++ struct kbasep_pm_metrics_state metrics; + -+ input_addr[CS_EXTRACT_INIT_LO/4] = 0; -+ input_addr[CS_EXTRACT_INIT_HI/4] = 0; ++ struct kbasep_pm_tick_timer_state shader_tick_timer; + -+ output_addr[CS_EXTRACT_LO/4] = 0; -+ output_addr[CS_EXTRACT_HI/4] = 0; ++ bool poweroff_wait_in_progress; ++ bool invoke_poweroff_wait_wq_when_l2_off; ++ bool poweron_required; + -+ output_addr[CS_ACTIVE/4] = 0; -+} ++ struct workqueue_struct *gpu_poweroff_wait_wq; ++ struct work_struct gpu_poweroff_wait_work; + -+static void kernel_unmap_user_io_pages(struct kbase_context *kctx, -+ struct kbase_queue *queue) -+{ -+ kbase_gpu_vm_lock(kctx); ++ wait_queue_head_t poweroff_wait; + -+ vunmap(queue->user_io_addr); ++ int (*callback_power_on)(struct kbase_device *kbdev); ++ void (*callback_power_off)(struct kbase_device *kbdev); ++ void (*callback_power_suspend)(struct kbase_device *kbdev); ++ void (*callback_power_resume)(struct kbase_device *kbdev); ++ int (*callback_power_runtime_on)(struct kbase_device *kbdev); ++ void (*callback_power_runtime_off)(struct kbase_device *kbdev); ++ int (*callback_power_runtime_idle)(struct kbase_device *kbdev); ++ int (*callback_soft_reset)(struct kbase_device *kbdev); ++ void (*callback_power_runtime_gpu_idle)(struct kbase_device *kbdev); ++ void (*callback_power_runtime_gpu_active)(struct kbase_device *kbdev); + -+ WARN_ON(atomic_read(&kctx->permanent_mapped_pages) < KBASEP_NUM_CS_USER_IO_PAGES); -+ atomic_sub(KBASEP_NUM_CS_USER_IO_PAGES, &kctx->permanent_mapped_pages); ++ u64 ca_cores_enabled; + -+ kbase_gpu_vm_unlock(kctx); -+} ++#if MALI_USE_CSF ++ enum kbase_mcu_state mcu_state; ++#endif ++ enum kbase_l2_core_state l2_state; ++ enum kbase_shader_core_state shaders_state; ++ u64 shaders_avail; ++ u64 shaders_desired_mask; ++#if MALI_USE_CSF ++ bool mcu_desired; ++ bool policy_change_clamp_state_to_off; ++ unsigned int csf_pm_sched_flags; ++ struct mutex policy_change_lock; ++ struct workqueue_struct *core_idle_wq; ++ struct work_struct core_idle_work; + -+static int kernel_map_user_io_pages(struct kbase_context *kctx, -+ struct kbase_queue *queue) -+{ -+ struct page *page_list[2]; -+ pgprot_t cpu_map_prot; -+ unsigned long flags; -+ char *user_io_addr; -+ int ret = 0; -+ size_t i; ++#ifdef KBASE_PM_RUNTIME ++ bool gpu_sleep_supported; ++ bool gpu_sleep_mode_active; ++ bool exit_gpu_sleep_mode; ++ bool gpu_idled; ++ bool gpu_wakeup_override; ++ bool db_mirror_interrupt_enabled; ++#endif ++#endif ++ bool l2_desired; ++ bool l2_always_on; ++ bool shaders_desired; + -+ kbase_gpu_vm_lock(kctx); ++ bool in_reset; + -+ if (ARRAY_SIZE(page_list) > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES - -+ atomic_read(&kctx->permanent_mapped_pages))) { -+ ret = -ENOMEM; -+ goto unlock; -+ } ++#if !MALI_USE_CSF ++ bool partial_shaderoff; + -+ /* The pages are mapped to Userspace also, so use the same mapping -+ * attributes as used inside the CPU page fault handler. -+ */ -+ if (kctx->kbdev->system_coherency == COHERENCY_NONE) -+ cpu_map_prot = pgprot_writecombine(PAGE_KERNEL); -+ else -+ cpu_map_prot = PAGE_KERNEL; ++ bool protected_entry_transition_override; ++ bool protected_transition_override; ++ int protected_l2_override; ++#endif + -+ for (i = 0; i < ARRAY_SIZE(page_list); i++) -+ page_list[i] = as_page(queue->phys[i]); ++ bool hwcnt_desired; ++ bool hwcnt_disabled; ++ struct work_struct hwcnt_disable_work; + -+ user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot); ++ u64 gpu_clock_suspend_freq; ++ bool gpu_clock_slow_down_wa; ++ bool gpu_clock_slow_down_desired; ++ bool gpu_clock_slowed_down; ++ struct work_struct gpu_clock_control_work; ++}; + -+ if (!user_io_addr) { -+ dev_err(kctx->kbdev->dev, -+ "%s(): user_io_addr is NULL, queue: %p", -+ __func__, -+ queue); -+ ret = -ENOMEM; -+ } else { -+ atomic_add(ARRAY_SIZE(page_list), &kctx->permanent_mapped_pages); -+ } ++#if MALI_USE_CSF ++/* CSF PM flag, signaling that the MCU shader Core should be kept on */ ++#define CSF_DYNAMIC_PM_CORE_KEEP_ON (1 << 0) ++/* CSF PM flag, signaling no scheduler suspension on idle groups */ ++#define CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE (1 << 1) ++/* CSF PM flag, signaling no scheduler suspension on no runnable groups */ ++#define CSF_DYNAMIC_PM_SCHED_NO_SUSPEND (1 << 2) + -+ kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags); -+ queue->user_io_addr = user_io_addr; -+ kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags); ++/* The following flags corresponds to existing defined PM policies */ ++#define ALWAYS_ON_PM_SCHED_FLAGS (CSF_DYNAMIC_PM_CORE_KEEP_ON | \ ++ CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE | \ ++ CSF_DYNAMIC_PM_SCHED_NO_SUSPEND) ++#define COARSE_ON_DEMAND_PM_SCHED_FLAGS (0) ++#if !MALI_CUSTOMER_RELEASE ++#define ALWAYS_ON_DEMAND_PM_SCHED_FLAGS (CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE) ++#endif ++#endif + -+unlock: -+ kbase_gpu_vm_unlock(kctx); -+ return ret; -+} ++/* List of policy IDs */ ++enum kbase_pm_policy_id { ++ KBASE_PM_POLICY_ID_COARSE_DEMAND, ++#if !MALI_CUSTOMER_RELEASE ++ KBASE_PM_POLICY_ID_ALWAYS_ON_DEMAND, ++#endif ++ KBASE_PM_POLICY_ID_ALWAYS_ON ++}; + -+static void term_queue_group(struct kbase_queue_group *group); -+static void get_queue(struct kbase_queue *queue); -+static void release_queue(struct kbase_queue *queue); ++/** ++ * enum kbase_pm_policy_event - PM Policy event ID ++ */ ++enum kbase_pm_policy_event { ++ /** ++ * @KBASE_PM_POLICY_EVENT_IDLE: Indicates that the GPU power state ++ * model has determined that the GPU has gone idle. ++ */ ++ KBASE_PM_POLICY_EVENT_IDLE, ++ /** ++ * @KBASE_PM_POLICY_EVENT_POWER_ON: Indicates that the GPU state model ++ * is preparing to power on the GPU. ++ */ ++ KBASE_PM_POLICY_EVENT_POWER_ON, ++ /** ++ * @KBASE_PM_POLICY_EVENT_TIMER_HIT: Indicates that the GPU became ++ * active while the Shader Tick Timer was holding the GPU in a powered ++ * on state. ++ */ ++ KBASE_PM_POLICY_EVENT_TIMER_HIT, ++ /** ++ * @KBASE_PM_POLICY_EVENT_TIMER_MISS: Indicates that the GPU did not ++ * become active before the Shader Tick Timer timeout occurred. ++ */ ++ KBASE_PM_POLICY_EVENT_TIMER_MISS, ++}; + +/** -+ * kbase_csf_free_command_stream_user_pages() - Free the resources allocated -+ * for a queue at the time of bind. -+ * -+ * @kctx: Address of the kbase context within which the queue was created. -+ * @queue: Pointer to the queue to be unlinked. -+ * -+ * This function will free the pair of physical pages allocated for a GPU -+ * command queue, and also release the hardware doorbell page, that were mapped -+ * into the process address space to enable direct submission of commands to -+ * the hardware. Also releases the reference taken on the queue when the mapping -+ * was created. -+ * -+ * This function will be called only when the mapping is being removed and -+ * so the resources for queue will not get freed up until the mapping is -+ * removed even though userspace could have terminated the queue. -+ * Kernel will ensure that the termination of Kbase context would only be -+ * triggered after the mapping is removed. ++ * struct kbase_pm_policy - Power policy structure. + * -+ * If an explicit or implicit unbind was missed by the userspace then the -+ * mapping will persist. On process exit kernel itself will remove the mapping. ++ * @name: The name of this policy ++ * @init: Function called when the policy is selected ++ * @term: Function called when the policy is unselected ++ * @shaders_needed: Function called to find out if shader cores are needed ++ * @get_core_active: Function called to get the current overall GPU power ++ * state ++ * @handle_event: Function called when a PM policy event occurs. Should be ++ * set to NULL if the power policy doesn't require any ++ * event notifications. ++ * @id: Field indicating an ID for this policy. This is not ++ * necessarily the same as its index in the list returned ++ * by kbase_pm_list_policies(). ++ * It is used purely for debugging. ++ * @pm_sched_flags: Policy associated with CSF PM scheduling operational flags. ++ * Pre-defined required flags exist for each of the ++ * ARM released policies, such as 'always_on', 'coarse_demand' ++ * and etc. ++ * Each power policy exposes a (static) instance of this structure which ++ * contains function pointers to the policy's methods. + */ -+void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue) -+{ -+ kernel_unmap_user_io_pages(kctx, queue); -+ -+ kbase_mem_pool_free_pages( -+ &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], -+ KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, true, false); -+ kbase_process_page_usage_dec(kctx, KBASEP_NUM_CS_USER_IO_PAGES); -+ -+ /* The user_io_gpu_va should have been unmapped inside the scheduler */ -+ WARN_ONCE(queue->user_io_gpu_va, "Userio pages appears still have mapping"); ++struct kbase_pm_policy { ++ char *name; + -+ /* If the queue has already been terminated by userspace -+ * then the ref count for queue object will drop to 0 here. ++ /* ++ * Function called when the policy is selected ++ * ++ * This should initialize the kbdev->pm.pm_policy_data structure. It ++ * should not attempt to make any changes to hardware state. ++ * ++ * It is undefined what state the cores are in when the function is ++ * called. ++ * ++ * @kbdev: The kbase device structure for the device (must be a ++ * valid pointer) + */ -+ release_queue(queue); -+} -+KBASE_EXPORT_TEST_API(kbase_csf_free_command_stream_user_pages); -+ -+int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue) -+{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ int ret; -+ -+ lockdep_assert_held(&kctx->csf.lock); ++ void (*init)(struct kbase_device *kbdev); + -+ ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], -+ KBASEP_NUM_CS_USER_IO_PAGES, -+ queue->phys, false, kctx->task); -+ if (ret != KBASEP_NUM_CS_USER_IO_PAGES) { -+ /* Marking both the phys to zero for indicating there is no phys allocated */ -+ queue->phys[0].tagged_addr = 0; -+ queue->phys[1].tagged_addr = 0; -+ return -ENOMEM; -+ } ++ /* ++ * Function called when the policy is unselected. ++ * ++ * @kbdev: The kbase device structure for the device (must be a ++ * valid pointer) ++ */ ++ void (*term)(struct kbase_device *kbdev); + -+ ret = kernel_map_user_io_pages(kctx, queue); -+ if (ret) -+ goto kernel_map_failed; ++ /* ++ * Function called to find out if shader cores are needed ++ * ++ * This needs to at least satisfy kbdev->pm.backend.shaders_desired, ++ * and so must never return false when shaders_desired is true. ++ * ++ * @kbdev: The kbase device structure for the device (must be a ++ * valid pointer) ++ * ++ * Return: true if shader cores are needed, false otherwise ++ */ ++ bool (*shaders_needed)(struct kbase_device *kbdev); + -+ kbase_process_page_usage_inc(kctx, KBASEP_NUM_CS_USER_IO_PAGES); -+ init_user_io_pages(queue); ++ /* ++ * Function called to get the current overall GPU power state ++ * ++ * This function must meet or exceed the requirements for power ++ * indicated by kbase_pm_is_active(). ++ * ++ * @kbdev: The kbase device structure for the device (must be a ++ * valid pointer) ++ * ++ * Return: true if the GPU should be powered, false otherwise ++ */ ++ bool (*get_core_active)(struct kbase_device *kbdev); + -+ /* user_io_gpu_va is only mapped when scheduler decides to put the queue -+ * on slot at runtime. Initialize it to 0, signalling no mapping. ++ /* ++ * Function called when a power event occurs ++ * ++ * @kbdev: The kbase device structure for the device (must be a ++ * valid pointer) ++ * @event: The id of the power event that has occurred + */ -+ queue->user_io_gpu_va = 0; ++ void (*handle_event)(struct kbase_device *kbdev, ++ enum kbase_pm_policy_event event); + -+ mutex_lock(&kbdev->csf.reg_lock); -+ if (kbdev->csf.db_file_offsets > (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1)) -+ kbdev->csf.db_file_offsets = 0; ++ enum kbase_pm_policy_id id; + -+ queue->db_file_offset = kbdev->csf.db_file_offsets; -+ kbdev->csf.db_file_offsets += BASEP_QUEUE_NR_MMAP_USER_PAGES; -+ WARN(kbase_refcount_read(&queue->refcount) != 1, -+ "Incorrect refcounting for queue object\n"); -+ /* This is the second reference taken on the queue object and -+ * would be dropped only when the IO mapping is removed either -+ * explicitly by userspace or implicitly by kernel on process exit. ++#if MALI_USE_CSF ++ /* Policy associated with CSF PM scheduling operational flags. ++ * There are pre-defined required flags exist for each of the ++ * ARM released policies, such as 'always_on', 'coarse_demand' ++ * and etc. + */ -+ get_queue(queue); -+ queue->bind_state = KBASE_CSF_QUEUE_BOUND; -+ mutex_unlock(&kbdev->csf.reg_lock); ++ unsigned int pm_sched_flags; ++#endif ++}; + -+ return 0; ++#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c +new file mode 100644 +index 000000000..5be8acd75 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c +@@ -0,0 +1,3417 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+kernel_map_failed: -+ kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], -+ KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, false, false); -+ /* Marking both the phys to zero for indicating there is no phys allocated */ -+ queue->phys[0].tagged_addr = 0; -+ queue->phys[1].tagged_addr = 0; ++/* ++ * Base kernel Power Management hardware control ++ */ + -+ return ret; -+} -+KBASE_EXPORT_TEST_API(kbase_csf_alloc_command_stream_user_pages); ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx, -+ u8 group_handle) -+{ -+ uint index = group_handle; ++#if MALI_USE_CSF ++#include ++#else ++#include ++#endif /* !MALI_USE_CSF */ + -+ lockdep_assert_held(&kctx->csf.lock); ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++#include ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++#if MALI_USE_CSF ++#include ++#endif + -+ if (index < MAX_QUEUE_GROUP_NUM && kctx->csf.queue_groups[index]) { -+ if (WARN_ON(kctx->csf.queue_groups[index]->handle != index)) -+ return NULL; -+ return kctx->csf.queue_groups[index]; -+ } ++#if MALI_USE_CSF ++#include ++#endif + -+ return NULL; -+} ++#include + -+struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle) -+{ -+ return find_queue_group(kctx, group_handle); -+} -+KBASE_EXPORT_TEST_API(kbase_csf_find_queue_group); ++#ifdef CONFIG_MALI_CORESTACK ++bool corestack_driver_control = true; ++#else ++bool corestack_driver_control; /* Default value of 0/false */ ++#endif ++module_param(corestack_driver_control, bool, 0444); ++MODULE_PARM_DESC(corestack_driver_control, ++ "Let the driver power on/off the GPU core stack independently " ++ "without involving the Power Domain Controller. This should " ++ "only be enabled on platforms for which integration of the PDC " ++ "to the Mali GPU is known to be problematic."); ++KBASE_EXPORT_TEST_API(corestack_driver_control); + -+int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, -+ u8 group_handle) -+{ -+ struct kbase_queue_group *group; ++/** ++ * enum kbasep_pm_action - Actions that can be performed on a core. ++ * ++ * @ACTION_PRESENT: The cores that are present ++ * @ACTION_READY: The cores that are ready ++ * @ACTION_PWRON: Power on the cores specified ++ * @ACTION_PWROFF: Power off the cores specified ++ * @ACTION_PWRTRANS: The cores that are transitioning ++ * @ACTION_PWRACTIVE: The cores that are active ++ * ++ * This enumeration is private to the file. Its values are set to allow ++ * core_type_to_reg() function, which decodes this enumeration, to be simpler ++ * and more efficient. ++ */ ++enum kbasep_pm_action { ++ ACTION_PRESENT = 0, ++ ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO), ++ ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO), ++ ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO), ++ ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO), ++ ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO) ++}; + -+ mutex_lock(&kctx->csf.lock); -+ group = find_queue_group(kctx, group_handle); -+ mutex_unlock(&kctx->csf.lock); ++static u64 kbase_pm_get_state( ++ struct kbase_device *kbdev, ++ enum kbase_pm_core_type core_type, ++ enum kbasep_pm_action action); + -+ return group ? 0 : -EINVAL; -+} ++static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev); + -+static struct kbase_queue *find_queue(struct kbase_context *kctx, u64 base_addr) ++#if MALI_USE_CSF ++bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev) +{ -+ struct kbase_queue *queue; -+ -+ lockdep_assert_held(&kctx->csf.lock); -+ -+ list_for_each_entry(queue, &kctx->csf.queue_list, link) { -+ if (base_addr == queue->base_addr) -+ return queue; -+ } ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ return NULL; -+} ++ if (unlikely(!kbdev->csf.firmware_inited)) ++ return false; + -+static void get_queue(struct kbase_queue *queue) -+{ -+ WARN_ON(!kbase_refcount_inc_not_zero(&queue->refcount)); -+} ++ if (kbdev->csf.scheduler.pm_active_count && ++ kbdev->pm.backend.mcu_desired) ++ return true; + -+static void release_queue(struct kbase_queue *queue) -+{ -+ lockdep_assert_held(&queue->kctx->csf.lock); -+ if (kbase_refcount_dec_and_test(&queue->refcount)) { -+ /* The queue can't still be on the per context list. */ -+ WARN_ON(!list_empty(&queue->link)); -+ WARN_ON(queue->group); -+ dev_dbg(queue->kctx->kbdev->dev, -+ "Remove any pending command queue fatal from ctx %d_%d", -+ queue->kctx->tgid, queue->kctx->id); -+ kbase_csf_event_remove_error(queue->kctx, &queue->error); ++#ifdef KBASE_PM_RUNTIME ++ if (kbdev->pm.backend.gpu_wakeup_override) ++ return true; ++#endif + -+ /* After this the Userspace would be able to free the -+ * memory for GPU queue. In case the Userspace missed -+ * terminating the queue, the cleanup will happen on -+ * context termination where tear down of region tracker -+ * would free up the GPU queue memory. -+ */ -+ kbase_gpu_vm_lock(queue->kctx); -+ kbase_va_region_no_user_free_dec(queue->queue_reg); -+ kbase_gpu_vm_unlock(queue->kctx); ++ /* MCU is supposed to be ON, only when scheduler.pm_active_count is ++ * non zero. But for always_on policy, the MCU needs to be kept on, ++ * unless policy changing transition needs it off. ++ */ + -+ kfree(queue); -+ } ++ return (kbdev->pm.backend.mcu_desired && ++ kbase_pm_no_mcu_core_pwroff(kbdev) && ++ !kbdev->pm.backend.policy_change_clamp_state_to_off); +} ++#endif + -+static void oom_event_worker(struct work_struct *data); -+static void cs_error_worker(struct work_struct *data); -+ -+/* Between reg and reg_ex, one and only one must be null */ -+static int csf_queue_register_internal(struct kbase_context *kctx, -+ struct kbase_ioctl_cs_queue_register *reg, -+ struct kbase_ioctl_cs_queue_register_ex *reg_ex) ++bool kbase_pm_is_l2_desired(struct kbase_device *kbdev) +{ -+ struct kbase_queue *queue; -+ int ret = 0; -+ struct kbase_va_region *region; -+ u64 queue_addr; -+ size_t queue_size; -+ -+ /* Only one pointer expected, otherwise coding error */ -+ if ((reg == NULL && reg_ex == NULL) || (reg && reg_ex)) { -+ dev_dbg(kctx->kbdev->dev, -+ "Error, one and only one param-ptr expected!"); -+ return -EINVAL; -+ } -+ -+ /* struct kbase_ioctl_cs_queue_register_ex contains a full -+ * struct kbase_ioctl_cs_queue_register at the start address. So -+ * the pointer can be safely cast to pointing to a -+ * kbase_ioctl_cs_queue_register object. -+ */ -+ if (reg_ex) -+ reg = (struct kbase_ioctl_cs_queue_register *)reg_ex; -+ -+ /* Validate the queue priority */ -+ if (reg->priority > BASE_QUEUE_MAX_PRIORITY) -+ return -EINVAL; -+ -+ queue_addr = reg->buffer_gpu_addr; -+ queue_size = reg->buffer_size >> PAGE_SHIFT; -+ -+ mutex_lock(&kctx->csf.lock); -+ -+ /* Check if queue is already registered */ -+ if (find_queue(kctx, queue_addr) != NULL) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ /* Check if the queue address is valid */ -+ kbase_gpu_vm_lock(kctx); -+ region = kbase_region_tracker_find_region_enclosing_address(kctx, -+ queue_addr); -+ -+ if (kbase_is_region_invalid_or_free(region) || kbase_is_region_shrinkable(region) || -+ region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { -+ ret = -ENOENT; -+ goto out_unlock_vm; -+ } -+ -+ if (queue_size > (region->nr_pages - -+ ((queue_addr >> PAGE_SHIFT) - region->start_pfn))) { -+ ret = -EINVAL; -+ goto out_unlock_vm; -+ } -+ -+ /* Check address validity on cs_trace buffer etc. Don't care -+ * if not enabled (i.e. when size is 0). -+ */ -+ if (reg_ex && reg_ex->ex_buffer_size) { -+ int buf_pages = (reg_ex->ex_buffer_size + -+ (1 << PAGE_SHIFT) - 1) >> PAGE_SHIFT; -+ struct kbase_va_region *region_ex = -+ kbase_region_tracker_find_region_enclosing_address(kctx, -+ reg_ex->ex_buffer_base); -+ -+ if (kbase_is_region_invalid_or_free(region_ex)) { -+ ret = -ENOENT; -+ goto out_unlock_vm; -+ } -+ -+ if (buf_pages > (region_ex->nr_pages - -+ ((reg_ex->ex_buffer_base >> PAGE_SHIFT) - region_ex->start_pfn))) { -+ ret = -EINVAL; -+ goto out_unlock_vm; -+ } -+ -+ region_ex = kbase_region_tracker_find_region_enclosing_address( -+ kctx, reg_ex->ex_offset_var_addr); -+ if (kbase_is_region_invalid_or_free(region_ex)) { -+ ret = -ENOENT; -+ goto out_unlock_vm; -+ } -+ } -+ -+ queue = kzalloc(sizeof(struct kbase_queue), GFP_KERNEL); -+ -+ if (!queue) { -+ ret = -ENOMEM; -+ goto out_unlock_vm; -+ } -+ -+ queue->kctx = kctx; -+ queue->base_addr = queue_addr; -+ -+ queue->queue_reg = region; -+ kbase_va_region_no_user_free_inc(region); -+ -+ queue->size = (queue_size << PAGE_SHIFT); -+ queue->csi_index = KBASEP_IF_NR_INVALID; -+ queue->enabled = false; -+ -+ queue->priority = reg->priority; -+ kbase_refcount_set(&queue->refcount, 1); ++#if !MALI_USE_CSF ++ if (kbdev->pm.backend.protected_entry_transition_override) ++ return false; + -+ queue->group = NULL; -+ queue->bind_state = KBASE_CSF_QUEUE_UNBOUND; -+ queue->handle = BASEP_MEM_INVALID_HANDLE; -+ queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID; ++ if (kbdev->pm.backend.protected_transition_override && ++ kbdev->pm.backend.protected_l2_override) ++ return true; + -+ queue->status_wait = 0; -+ queue->sync_ptr = 0; -+ queue->sync_value = 0; ++ if (kbdev->pm.backend.protected_transition_override && ++ !kbdev->pm.backend.shaders_desired) ++ return false; ++#else ++ if (unlikely(kbdev->pm.backend.policy_change_clamp_state_to_off)) ++ return false; + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ queue->saved_cmd_ptr = 0; ++ /* Power up the L2 cache only when MCU is desired */ ++ if (likely(kbdev->csf.firmware_inited)) ++ return kbase_pm_is_mcu_desired(kbdev); +#endif + -+ queue->sb_status = 0; -+ queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED; -+ -+ atomic_set(&queue->pending, 0); -+ -+ INIT_LIST_HEAD(&queue->link); -+ INIT_LIST_HEAD(&queue->error.link); -+ INIT_WORK(&queue->oom_event_work, oom_event_worker); -+ INIT_WORK(&queue->cs_error_work, cs_error_worker); -+ list_add(&queue->link, &kctx->csf.queue_list); -+ -+ queue->extract_ofs = 0; -+ -+ region->user_data = queue; -+ -+ /* Initialize the cs_trace configuration parameters, When buffer_size -+ * is 0, trace is disabled. Here we only update the fields when -+ * enabled, otherwise leave them as default zeros. -+ */ -+ if (reg_ex && reg_ex->ex_buffer_size) { -+ u32 cfg = CS_INSTR_CONFIG_EVENT_SIZE_SET( -+ 0, reg_ex->ex_event_size); -+ cfg = CS_INSTR_CONFIG_EVENT_STATE_SET( -+ cfg, reg_ex->ex_event_state); -+ -+ queue->trace_cfg = cfg; -+ queue->trace_buffer_size = reg_ex->ex_buffer_size; -+ queue->trace_buffer_base = reg_ex->ex_buffer_base; -+ queue->trace_offset_ptr = reg_ex->ex_offset_var_addr; -+ } -+ -+out_unlock_vm: -+ kbase_gpu_vm_unlock(kctx); -+out: -+ mutex_unlock(&kctx->csf.lock); -+ -+ return ret; ++ return kbdev->pm.backend.l2_desired; +} + -+int kbase_csf_queue_register(struct kbase_context *kctx, -+ struct kbase_ioctl_cs_queue_register *reg) ++#if !MALI_USE_CSF ++void kbase_pm_protected_override_enable(struct kbase_device *kbdev) +{ -+ /* Validate the ring buffer configuration parameters */ -+ if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE || -+ reg->buffer_size > CS_RING_BUFFER_MAX_SIZE || -+ reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr || -+ reg->buffer_gpu_addr & ~PAGE_MASK) -+ return -EINVAL; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ return csf_queue_register_internal(kctx, reg, NULL); ++ kbdev->pm.backend.protected_transition_override = true; +} -+ -+int kbase_csf_queue_register_ex(struct kbase_context *kctx, -+ struct kbase_ioctl_cs_queue_register_ex *reg) ++void kbase_pm_protected_override_disable(struct kbase_device *kbdev) +{ -+ struct kbase_csf_global_iface const *const iface = -+ &kctx->kbdev->csf.global_iface; -+ u32 const glb_version = iface->version; -+ u32 instr = iface->instr_features; -+ u8 max_size = GLB_INSTR_FEATURES_EVENT_SIZE_MAX_GET(instr); -+ u32 min_buf_size = (1u << reg->ex_event_size) * -+ GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(instr); -+ -+ /* If cs_trace_command not supported, the call fails */ -+ if (glb_version < kbase_csf_interface_version(1, 1, 0)) -+ return -EINVAL; -+ -+ /* Validate the ring buffer configuration parameters */ -+ if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE || -+ reg->buffer_size > CS_RING_BUFFER_MAX_SIZE || -+ reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr || -+ reg->buffer_gpu_addr & ~PAGE_MASK) -+ return -EINVAL; -+ -+ /* Validate the cs_trace configuration parameters */ -+ if (reg->ex_buffer_size && -+ ((reg->ex_event_size > max_size) || -+ (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) || -+ (reg->ex_buffer_size < min_buf_size))) -+ return -EINVAL; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ return csf_queue_register_internal(kctx, NULL, reg); ++ kbdev->pm.backend.protected_transition_override = false; +} + -+static void unbind_queue(struct kbase_context *kctx, -+ struct kbase_queue *queue); -+ -+void kbase_csf_queue_terminate(struct kbase_context *kctx, -+ struct kbase_ioctl_cs_queue_terminate *term) ++int kbase_pm_protected_entry_override_enable(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct kbase_queue *queue; -+ int err; -+ bool reset_prevented = false; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ err = kbase_reset_gpu_prevent_and_wait(kbdev); -+ if (err) -+ dev_warn( -+ kbdev->dev, -+ "Unsuccessful GPU reset detected when terminating queue (buffer_addr=0x%.16llx), attempting to terminate regardless", -+ term->buffer_gpu_addr); -+ else -+ reset_prevented = true; ++ WARN_ON(!kbdev->protected_mode_transition); + -+ mutex_lock(&kctx->csf.lock); -+ queue = find_queue(kctx, term->buffer_gpu_addr); ++ if (kbdev->pm.backend.l2_always_on && ++ (kbdev->system_coherency == COHERENCY_ACE)) { ++ WARN_ON(kbdev->pm.backend.protected_entry_transition_override); + -+ if (queue) { -+ /* As the GPU queue has been terminated by the -+ * user space, undo the actions that were performed when the -+ * queue was registered i.e. remove the queue from the per -+ * context list & release the initial reference. The subsequent -+ * lookups for the queue in find_queue() would fail. ++ /* ++ * If there is already a GPU reset pending then wait for it to ++ * complete before initiating a special reset for protected ++ * mode entry. + */ -+ list_del_init(&queue->link); -+ -+ /* Stop the CSI to which queue was bound */ -+ unbind_queue(kctx, queue); -+ -+ kbase_gpu_vm_lock(kctx); -+ if (!WARN_ON(!queue->queue_reg)) -+ queue->queue_reg->user_data = NULL; -+ kbase_gpu_vm_unlock(kctx); ++ if (kbase_reset_gpu_silent(kbdev)) ++ return -EAGAIN; + -+ release_queue(queue); ++ kbdev->pm.backend.protected_entry_transition_override = true; + } + -+ mutex_unlock(&kctx->csf.lock); -+ if (reset_prevented) -+ kbase_reset_gpu_allow(kbdev); ++ return 0; +} + -+int kbase_csf_queue_bind(struct kbase_context *kctx, union kbase_ioctl_cs_queue_bind *bind) ++void kbase_pm_protected_entry_override_disable(struct kbase_device *kbdev) +{ -+ struct kbase_queue *queue; -+ struct kbase_queue_group *group; -+ u8 max_streams; -+ int ret = -EINVAL; -+ -+ mutex_lock(&kctx->csf.lock); -+ -+ group = find_queue_group(kctx, bind->in.group_handle); -+ queue = find_queue(kctx, bind->in.buffer_gpu_addr); -+ -+ if (!group || !queue) -+ goto out; -+ -+ /* For the time being, all CSGs have the same number of CSs -+ * so we check CSG 0 for this number -+ */ -+ max_streams = kctx->kbdev->csf.global_iface.groups[0].stream_num; -+ -+ if (bind->in.csi_index >= max_streams) -+ goto out; -+ -+ if (group->run_state == KBASE_CSF_GROUP_TERMINATED) -+ goto out; -+ -+ if (queue->group || group->bound_queues[bind->in.csi_index]) -+ goto out; -+ -+ ret = get_user_pages_mmap_handle(kctx, queue); -+ if (ret) -+ goto out; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ bind->out.mmap_handle = queue->handle; -+ group->bound_queues[bind->in.csi_index] = queue; -+ queue->group = group; -+ queue->csi_index = bind->in.csi_index; -+ queue->bind_state = KBASE_CSF_QUEUE_BIND_IN_PROGRESS; ++ WARN_ON(!kbdev->protected_mode_transition); + -+out: -+ mutex_unlock(&kctx->csf.lock); ++ if (kbdev->pm.backend.l2_always_on && ++ (kbdev->system_coherency == COHERENCY_ACE)) { ++ WARN_ON(!kbdev->pm.backend.protected_entry_transition_override); + -+ return ret; ++ kbdev->pm.backend.protected_entry_transition_override = false; ++ } +} + -+static struct kbase_queue_group *get_bound_queue_group( -+ struct kbase_queue *queue) ++void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override) +{ -+ struct kbase_context *kctx = queue->kctx; -+ struct kbase_queue_group *group; -+ -+ if (queue->bind_state == KBASE_CSF_QUEUE_UNBOUND) -+ return NULL; -+ -+ if (!queue->group) -+ return NULL; -+ -+ if (queue->csi_index == KBASEP_IF_NR_INVALID) { -+ dev_warn(kctx->kbdev->dev, "CS interface index is incorrect\n"); -+ return NULL; -+ } -+ -+ group = queue->group; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (group->bound_queues[queue->csi_index] != queue) { -+ dev_warn(kctx->kbdev->dev, "Incorrect mapping between queues & queue groups\n"); -+ return NULL; ++ if (override) { ++ kbdev->pm.backend.protected_l2_override++; ++ WARN_ON(kbdev->pm.backend.protected_l2_override <= 0); ++ } else { ++ kbdev->pm.backend.protected_l2_override--; ++ WARN_ON(kbdev->pm.backend.protected_l2_override < 0); + } + -+ return group; -+} -+ -+static void enqueue_gpu_submission_work(struct kbase_context *const kctx) -+{ -+ queue_work(system_highpri_wq, &kctx->csf.pending_submission_work); ++ kbase_pm_update_state(kbdev); +} ++#endif + +/** -+ * pending_submission_worker() - Work item to process pending kicked GPU command queues. ++ * core_type_to_reg - Decode a core type and action to a register. + * -+ * @work: Pointer to pending_submission_work. ++ * @core_type: The type of core ++ * @action: The type of action + * -+ * This function starts all pending queues, for which the work -+ * was previously submitted via ioctl call from application thread. -+ * If the queue is already scheduled and resident, it will be started -+ * right away, otherwise once the group is made resident. ++ * Given a core type (defined by kbase_pm_core_type) and an action (defined ++ * by kbasep_pm_action) this function will return the register offset that ++ * will perform the action on the core type. The register returned is the _LO ++ * register and an offset must be applied to use the _HI register. ++ * ++ * Return: The register offset of the _LO register that performs an action of ++ * type @action on a core of type @core_type. + */ -+static void pending_submission_worker(struct work_struct *work) ++static u32 core_type_to_reg(enum kbase_pm_core_type core_type, ++ enum kbasep_pm_action action) +{ -+ struct kbase_context *kctx = -+ container_of(work, struct kbase_context, csf.pending_submission_work); -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct kbase_queue *queue; -+ int err = kbase_reset_gpu_prevent_and_wait(kbdev); -+ -+ if (err) { -+ dev_err(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue "); -+ return; -+ } -+ -+ mutex_lock(&kctx->csf.lock); -+ -+ /* Iterate through the queue list and schedule the pending ones for submission. */ -+ list_for_each_entry(queue, &kctx->csf.queue_list, link) { -+ if (atomic_cmpxchg(&queue->pending, 1, 0) == 1) { -+ struct kbase_queue_group *group = get_bound_queue_group(queue); -+ int ret; -+ -+ if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND) { -+ dev_dbg(kbdev->dev, "queue is not bound to a group"); -+ continue; -+ } -+ -+ ret = kbase_csf_scheduler_queue_start(queue); -+ if (unlikely(ret)) { -+ dev_dbg(kbdev->dev, "Failed to start queue"); -+ if (ret == -EBUSY) { -+ atomic_cmpxchg(&queue->pending, 0, 1); -+ enqueue_gpu_submission_work(kctx); -+ } ++ if (corestack_driver_control) { ++ if (core_type == KBASE_PM_CORE_STACK) { ++ switch (action) { ++ case ACTION_PRESENT: ++ return STACK_PRESENT_LO; ++ case ACTION_READY: ++ return STACK_READY_LO; ++ case ACTION_PWRON: ++ return STACK_PWRON_LO; ++ case ACTION_PWROFF: ++ return STACK_PWROFF_LO; ++ case ACTION_PWRTRANS: ++ return STACK_PWRTRANS_LO; ++ default: ++ WARN(1, "Invalid action for core type\n"); + } + } + } + -+ mutex_unlock(&kctx->csf.lock); -+ -+ kbase_reset_gpu_allow(kbdev); -+} -+ -+void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot) -+{ -+ if (WARN_ON(slot < 0)) -+ return; -+ -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); -+ -+ kbase_csf_ring_csg_slots_doorbell(kbdev, (u32) (1 << slot)); -+} -+ -+void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, -+ u32 slot_bitmap) -+{ -+ const struct kbase_csf_global_iface *const global_iface = -+ &kbdev->csf.global_iface; -+ const u32 allowed_bitmap = -+ (u32) ((1U << kbdev->csf.global_iface.group_num) - 1); -+ u32 value; -+ -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); -+ -+ if (WARN_ON(slot_bitmap > allowed_bitmap)) -+ return; -+ -+ /* The access to GLB_DB_REQ/ACK needs to be ordered with respect to CSG_REQ/ACK and -+ * CSG_DB_REQ/ACK to avoid a scenario where a CSI request overlaps with a CSG request -+ * or 2 CSI requests overlap and FW ends up missing the 2nd request. -+ * Memory barrier is required, both on Host and FW side, to guarantee the ordering. -+ * -+ * 'osh' is used as CPU and GPU would be in the same Outer shareable domain. -+ */ -+ dmb(osh); -+ -+ value = kbase_csf_firmware_global_output(global_iface, GLB_DB_ACK); -+ value ^= slot_bitmap; -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_DB_REQ, value, -+ slot_bitmap); -+ -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); -+} -+ -+void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev, -+ struct kbase_queue *queue) -+{ -+ mutex_lock(&kbdev->csf.reg_lock); -+ -+ if (queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID) -+ kbase_csf_ring_doorbell(kbdev, queue->doorbell_nr); -+ -+ mutex_unlock(&kbdev->csf.reg_lock); ++ return (u32)core_type + (u32)action; +} + -+void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, -+ int csi_index, int csg_nr, -+ bool ring_csg_doorbell) ++#if IS_ENABLED(CONFIG_ARM64) ++static void mali_cci_flush_l2(struct kbase_device *kbdev) +{ -+ struct kbase_csf_cmd_stream_group_info *ginfo; -+ u32 value; -+ -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); -+ -+ if (WARN_ON(csg_nr < 0) || -+ WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num)) -+ return; -+ -+ ginfo = &kbdev->csf.global_iface.groups[csg_nr]; -+ -+ if (WARN_ON(csi_index < 0) || -+ WARN_ON(csi_index >= ginfo->stream_num)) -+ return; ++ const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED; ++ u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS; ++ u32 raw; + -+ /* The access to CSG_DB_REQ/ACK needs to be ordered with respect to -+ * CS_REQ/ACK to avoid a scenario where CSG_DB_REQ/ACK becomes visibile to -+ * FW before CS_REQ/ACK is set. -+ * -+ * 'osh' is used as CPU and GPU would be in the same outer shareable domain. ++ /* ++ * Note that we don't take the cache flush mutex here since ++ * we expect to be the last user of the L2, all other L2 users ++ * would have dropped their references, to initiate L2 power ++ * down, L2 power down being the only valid place for this ++ * to be called from. + */ -+ dmb(osh); -+ -+ value = kbase_csf_firmware_csg_output(ginfo, CSG_DB_ACK); -+ value ^= (1 << csi_index); -+ kbase_csf_firmware_csg_input_mask(ginfo, CSG_DB_REQ, value, -+ 1 << csi_index); -+ -+ if (likely(ring_csg_doorbell)) -+ kbase_csf_ring_csg_doorbell(kbdev, csg_nr); -+} + -+int kbase_csf_queue_kick(struct kbase_context *kctx, -+ struct kbase_ioctl_cs_queue_kick *kick) -+{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ bool trigger_submission = false; -+ struct kbase_va_region *region; -+ int err = 0; ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CACHE_CLN_INV_L2); + -+ KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK(kbdev, kctx->id, kick->buffer_gpu_addr); ++ raw = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); + -+ /* GPU work submission happening asynchronously to prevent the contention with -+ * scheduler lock and as the result blocking application thread. For this reason, -+ * the vm_lock is used here to get the reference to the queue based on its buffer_gpu_addr -+ * from the context list of active va_regions. -+ * Once the target queue is found the pending flag is set to one atomically avoiding -+ * a race between submission ioctl thread and the work item. ++ /* Wait for cache flush to complete before continuing, exit on ++ * gpu resets or loop expiry. + */ -+ kbase_gpu_vm_lock(kctx); -+ region = kbase_region_tracker_find_region_enclosing_address(kctx, kick->buffer_gpu_addr); -+ if (!kbase_is_region_invalid_or_free(region)) { -+ struct kbase_queue *queue = region->user_data; -+ -+ if (queue) { -+ atomic_cmpxchg(&queue->pending, 0, 1); -+ trigger_submission = true; -+ } -+ } else { -+ dev_dbg(kbdev->dev, -+ "Attempt to kick GPU queue without a valid command buffer region"); -+ err = -EFAULT; ++ while (((raw & mask) == 0) && --loops) { ++ raw = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); + } -+ kbase_gpu_vm_unlock(kctx); -+ -+ if (likely(trigger_submission)) -+ enqueue_gpu_submission_work(kctx); -+ -+ return err; +} ++#endif + -+static void unbind_stopped_queue(struct kbase_context *kctx, -+ struct kbase_queue *queue) -+{ -+ lockdep_assert_held(&kctx->csf.lock); -+ -+ if (WARN_ON(queue->csi_index < 0)) -+ return; -+ -+ if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) { -+ unsigned long flags; -+ -+ kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags); -+ bitmap_clear(queue->group->protm_pending_bitmap, -+ queue->csi_index, 1); -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, CSI_PROTM_PEND_CLEAR, -+ queue->group, queue, queue->group->protm_pending_bitmap[0]); -+ queue->group->bound_queues[queue->csi_index] = NULL; -+ queue->group = NULL; -+ kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags); -+ -+ put_user_pages_mmap_handle(kctx, queue); -+ WARN_ON_ONCE(queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID); -+ queue->bind_state = KBASE_CSF_QUEUE_UNBOUND; -+ } -+} +/** -+ * unbind_queue() - Remove the linkage between a GPU command queue and the group -+ * to which it was bound or being bound. -+ * -+ * @kctx: Address of the kbase context within which the queue was created. -+ * @queue: Pointer to the queue to be unlinked. ++ * kbase_pm_invoke - Invokes an action on a core set + * -+ * This function will also send the stop request to firmware for the CS -+ * if the group to which the GPU command queue was bound is scheduled. ++ * @kbdev: The kbase device structure of the device ++ * @core_type: The type of core that the action should be performed on ++ * @cores: A bit mask of cores to perform the action on (low 32 bits) ++ * @action: The action to perform on the cores + * -+ * This function would be called when :- -+ * - queue is being unbound. This would happen when the IO mapping -+ * created on bind is removed explicitly by userspace or the process -+ * is getting exited. -+ * - queue group is being terminated which still has queues bound -+ * to it. This could happen on an explicit terminate request from userspace -+ * or when the kbase context is being terminated. -+ * - queue is being terminated without completing the bind operation. -+ * This could happen if either the queue group is terminated -+ * after the CS_QUEUE_BIND ioctl but before the 2nd part of bind operation -+ * to create the IO mapping is initiated. -+ * - There is a failure in executing the 2nd part of bind operation, inside the -+ * mmap handler, which creates the IO mapping for queue. ++ * This function performs the action given by @action on a set of cores of a ++ * type given by @core_type. It is a static function used by ++ * kbase_pm_transition_core_type() + */ -+ -+static void unbind_queue(struct kbase_context *kctx, struct kbase_queue *queue) ++static void kbase_pm_invoke(struct kbase_device *kbdev, ++ enum kbase_pm_core_type core_type, ++ u64 cores, ++ enum kbasep_pm_action action) +{ -+ kbase_reset_gpu_assert_failed_or_prevented(kctx->kbdev); -+ lockdep_assert_held(&kctx->csf.lock); -+ -+ if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) { -+ if (queue->bind_state == KBASE_CSF_QUEUE_BOUND) -+ kbase_csf_scheduler_queue_stop(queue); -+ -+ unbind_stopped_queue(kctx, queue); -+ } -+} ++ u32 reg; ++ u32 lo = cores & 0xFFFFFFFF; ++ u32 hi = (cores >> 32) & 0xFFFFFFFF; + -+static bool kbase_csf_queue_phys_allocated(struct kbase_queue *queue) -+{ -+ /* The queue's phys are zeroed when allocation fails. Both of them being -+ * zero is an impossible condition for a successful allocated set of phy pages. -+ */ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ return (queue->phys[0].tagged_addr | queue->phys[1].tagged_addr); -+} ++ reg = core_type_to_reg(core_type, action); + -+void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit) -+{ -+ struct kbase_context *kctx = queue->kctx; ++ KBASE_DEBUG_ASSERT(reg); + -+ lockdep_assert_held(&kctx->csf.lock); ++ if (cores) { ++ u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY); + -+ /* As the process itself is exiting, the termination of queue group can -+ * be done which would be much faster than stopping of individual -+ * queues. This would ensure a faster exit for the process especially -+ * in the case where CSI gets stuck. -+ * The CSI STOP request will wait for the in flight work to drain -+ * whereas CSG TERM request would result in an immediate abort or -+ * cancellation of the pending work. -+ */ -+ if (process_exit) { -+ struct kbase_queue_group *group = get_bound_queue_group(queue); ++ if (action == ACTION_PWRON) ++ state |= cores; ++ else if (action == ACTION_PWROFF) ++ state &= ~cores; ++ KBASE_TLSTREAM_AUX_PM_STATE(kbdev, core_type, state); ++ } + -+ if (group) -+ term_queue_group(group); ++ /* Tracing */ ++ if (cores) { ++ if (action == ACTION_PWRON) ++ switch (core_type) { ++ case KBASE_PM_CORE_SHADER: ++ KBASE_KTRACE_ADD(kbdev, PM_PWRON, NULL, cores); ++ break; ++ case KBASE_PM_CORE_TILER: ++ KBASE_KTRACE_ADD(kbdev, PM_PWRON_TILER, NULL, cores); ++ break; ++ case KBASE_PM_CORE_L2: ++ KBASE_KTRACE_ADD(kbdev, PM_PWRON_L2, NULL, cores); ++ break; ++ default: ++ break; ++ } ++ else if (action == ACTION_PWROFF) ++ switch (core_type) { ++ case KBASE_PM_CORE_SHADER: ++ KBASE_KTRACE_ADD(kbdev, PM_PWROFF, NULL, cores); ++ break; ++ case KBASE_PM_CORE_TILER: ++ KBASE_KTRACE_ADD(kbdev, PM_PWROFF_TILER, NULL, cores); ++ break; ++ case KBASE_PM_CORE_L2: ++ KBASE_KTRACE_ADD(kbdev, PM_PWROFF_L2, NULL, cores); ++ /* disable snoops before L2 is turned off */ ++ kbase_pm_cache_snoop_disable(kbdev); ++ break; ++ default: ++ break; ++ } ++ } + -+ WARN_ON(queue->bind_state != KBASE_CSF_QUEUE_UNBOUND); ++ if (kbase_dummy_job_wa_enabled(kbdev) && ++ action == ACTION_PWRON && ++ core_type == KBASE_PM_CORE_SHADER && ++ !(kbdev->dummy_job_wa.flags & ++ KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER)) { ++ kbase_dummy_job_wa_execute(kbdev, cores); + } else { -+ unbind_queue(kctx, queue); ++ if (lo != 0) ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo); ++ if (hi != 0) ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi); + } -+ -+ /* Free the resources, if allocated phys for this queue */ -+ if (kbase_csf_queue_phys_allocated(queue)) -+ kbase_csf_free_command_stream_user_pages(kctx, queue); -+} -+ -+void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue) -+{ -+ struct kbase_context *kctx = queue->kctx; -+ -+ lockdep_assert_held(&kctx->csf.lock); -+ -+ WARN_ON(queue->bind_state == KBASE_CSF_QUEUE_BOUND); -+ unbind_stopped_queue(kctx, queue); -+ -+ /* Free the resources, if allocated phys for this queue */ -+ if (kbase_csf_queue_phys_allocated(queue)) -+ kbase_csf_free_command_stream_user_pages(kctx, queue); +} + +/** -+ * find_free_group_handle() - Find a free handle for a queue group ++ * kbase_pm_get_state - Get information about a core set + * -+ * @kctx: Address of the kbase context within which the queue group -+ * is to be created. ++ * @kbdev: The kbase device structure of the device ++ * @core_type: The type of core that the should be queried ++ * @action: The property of the cores to query + * -+ * Return: a queue group handle on success, or a negative error code on failure. ++ * This function gets information (chosen by @action) about a set of cores of ++ * a type given by @core_type. It is a static function used by ++ * kbase_pm_get_active_cores(), kbase_pm_get_trans_cores() and ++ * kbase_pm_get_ready_cores(). ++ * ++ * Return: A bit mask specifying the state of the cores + */ -+static int find_free_group_handle(struct kbase_context *const kctx) ++static u64 kbase_pm_get_state(struct kbase_device *kbdev, ++ enum kbase_pm_core_type core_type, ++ enum kbasep_pm_action action) +{ -+ /* find the available index in the array of CSGs per this context */ -+ int idx, group_handle = -ENOMEM; -+ -+ lockdep_assert_held(&kctx->csf.lock); -+ -+ for (idx = 0; -+ (idx != MAX_QUEUE_GROUP_NUM) && (group_handle < 0); -+ idx++) { -+ if (!kctx->csf.queue_groups[idx]) -+ group_handle = idx; -+ } ++ u32 reg; ++ u32 lo, hi; + -+ return group_handle; -+} ++ reg = core_type_to_reg(core_type, action); + -+/** -+ * iface_has_enough_streams() - Check that at least one CSG supports -+ * a given number of CS -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @cs_min: Minimum number of CSs required. -+ * -+ * Return: true if at least one CSG supports the given number -+ * of CSs (or more); otherwise false. -+ */ -+static bool iface_has_enough_streams(struct kbase_device *const kbdev, -+ u32 const cs_min) -+{ -+ bool has_enough = false; -+ struct kbase_csf_cmd_stream_group_info *const groups = -+ kbdev->csf.global_iface.groups; -+ const u32 group_num = kbdev->csf.global_iface.group_num; -+ u32 i; ++ KBASE_DEBUG_ASSERT(reg); + -+ for (i = 0; (i < group_num) && !has_enough; i++) { -+ if (groups[i].stream_num >= cs_min) -+ has_enough = true; -+ } ++ lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg)); ++ hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4)); + -+ return has_enough; ++ return (((u64) hi) << 32) | ((u64) lo); +} + +/** -+ * create_normal_suspend_buffer() - Create normal-mode suspend buffer per -+ * queue group ++ * kbase_pm_get_present_cores - Get the cores that are present + * -+ * @kctx: Pointer to kbase context where the queue group is created at -+ * @s_buf: Pointer to suspend buffer that is attached to queue group ++ * @kbdev: Kbase device ++ * @type: The type of cores to query + * -+ * Return: 0 if phy-pages for the suspend buffer is successfully allocated. -+ * Otherwise -ENOMEM or error code. ++ * Return: Bitmask of the cores that are present + */ -+static int create_normal_suspend_buffer(struct kbase_context *const kctx, -+ struct kbase_normal_suspend_buffer *s_buf) ++u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, ++ enum kbase_pm_core_type type) +{ -+ const size_t nr_pages = -+ PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size); -+ int err; -+ -+ lockdep_assert_held(&kctx->csf.lock); -+ -+ /* The suspend buffer's mapping address is valid only when the CSG is to -+ * run on slot, initializing it 0, signalling the buffer is not mapped. -+ */ -+ s_buf->gpu_va = 0; -+ -+ s_buf->phy = kcalloc(nr_pages, sizeof(*s_buf->phy), GFP_KERNEL); -+ -+ if (!s_buf->phy) -+ return -ENOMEM; -+ -+ /* Get physical page for a normal suspend buffer */ -+ err = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages, -+ &s_buf->phy[0], false, kctx->task); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ if (err < 0) { -+ kfree(s_buf->phy); -+ return err; ++ switch (type) { ++ case KBASE_PM_CORE_L2: ++ return kbdev->gpu_props.curr_config.l2_present; ++ case KBASE_PM_CORE_SHADER: ++ return kbdev->gpu_props.curr_config.shader_present; ++ case KBASE_PM_CORE_TILER: ++ return kbdev->gpu_props.props.raw_props.tiler_present; ++ case KBASE_PM_CORE_STACK: ++ return kbdev->gpu_props.props.raw_props.stack_present; ++ default: ++ break; + } ++ KBASE_DEBUG_ASSERT(0); + -+ kbase_process_page_usage_inc(kctx, nr_pages); + return 0; +} + -+static void timer_event_worker(struct work_struct *data); -+static void protm_event_worker(struct work_struct *data); -+static void term_normal_suspend_buffer(struct kbase_context *const kctx, -+ struct kbase_normal_suspend_buffer *s_buf); ++KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores); + +/** -+ * create_suspend_buffers - Setup normal and protected mode -+ * suspend buffers. ++ * kbase_pm_get_active_cores - Get the cores that are "active" ++ * (busy processing work) + * -+ * @kctx: Address of the kbase context within which the queue group -+ * is to be created. -+ * @group: Pointer to GPU command queue group data. ++ * @kbdev: Kbase device ++ * @type: The type of cores to query + * -+ * Return: 0 if suspend buffers are successfully allocated. Otherwise -ENOMEM. ++ * Return: Bitmask of cores that are active + */ -+static int create_suspend_buffers(struct kbase_context *const kctx, -+ struct kbase_queue_group * const group) ++u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, ++ enum kbase_pm_core_type type) +{ -+ if (create_normal_suspend_buffer(kctx, &group->normal_suspend_buf)) { -+ dev_err(kctx->kbdev->dev, "Failed to create normal suspend buffer\n"); -+ return -ENOMEM; -+ } -+ -+ /* Protected suspend buffer, runtime binding so just initialize it */ -+ group->protected_suspend_buf.gpu_va = 0; -+ group->protected_suspend_buf.pma = NULL; -+ group->protected_suspend_buf.alloc_retries = 0; -+ -+ return 0; ++ return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE); +} + ++KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores); ++ +/** -+ * generate_group_uid() - Makes an ID unique to all kernel base devices -+ * and contexts, for a queue group and CSG. ++ * kbase_pm_get_trans_cores - Get the cores that are transitioning between ++ * power states + * -+ * Return: A unique ID in the form of an unsigned 32-bit integer ++ * @kbdev: Kbase device ++ * @type: The type of cores to query ++ * ++ * Return: Bitmask of cores that are transitioning + */ -+static u32 generate_group_uid(void) ++u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, ++ enum kbase_pm_core_type type) +{ -+ static atomic_t global_csg_uid = ATOMIC_INIT(0); -+ -+ return (u32)atomic_inc_return(&global_csg_uid); ++ return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS); +} + ++KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores); ++ +/** -+ * create_queue_group() - Create a queue group ++ * kbase_pm_get_ready_cores - Get the cores that are powered on + * -+ * @kctx: Address of the kbase context within which the queue group -+ * is to be created. -+ * @create: Address of a structure which contains details of the -+ * queue group which is to be created. ++ * @kbdev: Kbase device ++ * @type: The type of cores to query + * -+ * Return: a queue group handle on success, or a negative error code on failure. ++ * Return: Bitmask of cores that are ready (powered on) + */ -+static int create_queue_group(struct kbase_context *const kctx, -+ union kbase_ioctl_cs_queue_group_create *const create) ++u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, ++ enum kbase_pm_core_type type) +{ -+ int group_handle = find_free_group_handle(kctx); ++ u64 result; + -+ if (group_handle < 0) { -+ dev_dbg(kctx->kbdev->dev, -+ "All queue group handles are already in use"); -+ } else { -+ struct kbase_queue_group * const group = -+ kmalloc(sizeof(struct kbase_queue_group), -+ GFP_KERNEL); ++ result = kbase_pm_get_state(kbdev, type, ACTION_READY); + -+ lockdep_assert_held(&kctx->csf.lock); ++ switch (type) { ++ case KBASE_PM_CORE_SHADER: ++ KBASE_KTRACE_ADD(kbdev, PM_CORES_POWERED, NULL, result); ++ break; ++ case KBASE_PM_CORE_TILER: ++ KBASE_KTRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, result); ++ break; ++ case KBASE_PM_CORE_L2: ++ KBASE_KTRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, result); ++ break; ++ default: ++ break; ++ } + -+ if (!group) { -+ dev_err(kctx->kbdev->dev, "Failed to allocate a queue\n"); -+ group_handle = -ENOMEM; -+ } else { -+ int err = 0; ++ return result; ++} + -+ group->kctx = kctx; -+ group->handle = group_handle; -+ group->csg_nr = KBASEP_CSG_NR_INVALID; ++KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores); + -+ group->tiler_mask = create->in.tiler_mask; -+ group->fragment_mask = create->in.fragment_mask; -+ group->compute_mask = create->in.compute_mask; ++static void kbase_pm_trigger_hwcnt_disable(struct kbase_device *kbdev) ++{ ++ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + -+ group->tiler_max = create->in.tiler_max; -+ group->fragment_max = create->in.fragment_max; -+ group->compute_max = create->in.compute_max; -+ group->csi_handlers = create->in.csi_handlers; -+ group->priority = kbase_csf_priority_queue_group_priority_to_relative( -+ kbase_csf_priority_check(kctx->kbdev, create->in.priority)); -+ group->doorbell_nr = KBASEP_USER_DB_NR_INVALID; -+ group->faulted = false; -+ group->cs_unrecoverable = false; -+ group->reevaluate_idle_status = false; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ group->csg_reg = NULL; -+ group->csg_reg_bind_retries = 0; ++ /* See if we can get away with disabling hwcnt ++ * atomically, otherwise kick off a worker. ++ */ ++ if (kbase_hwcnt_context_disable_atomic(kbdev->hwcnt_gpu_ctx)) { ++ backend->hwcnt_disabled = true; + -+ group->dvs_buf = create->in.dvs_buf; ++ } else { ++ kbase_hwcnt_context_queue_work(kbdev->hwcnt_gpu_ctx, ++ &backend->hwcnt_disable_work); ++ } ++} + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ group->deschedule_deferred_cnt = 0; -+#endif ++static void kbase_pm_l2_config_override(struct kbase_device *kbdev) ++{ ++ u32 val; + -+ group->group_uid = generate_group_uid(); -+ create->out.group_uid = group->group_uid; ++ /* ++ * Skip if it is not supported ++ */ ++ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) ++ return; + -+ INIT_LIST_HEAD(&group->link); -+ INIT_LIST_HEAD(&group->link_to_schedule); -+ INIT_LIST_HEAD(&group->error_fatal.link); -+ INIT_LIST_HEAD(&group->error_timeout.link); -+ INIT_LIST_HEAD(&group->error_tiler_oom.link); -+ INIT_WORK(&group->timer_event_work, timer_event_worker); -+ INIT_WORK(&group->protm_event_work, protm_event_worker); -+ bitmap_zero(group->protm_pending_bitmap, -+ MAX_SUPPORTED_STREAMS_PER_GROUP); ++#if MALI_USE_CSF ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) { ++ val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG), ++ L2_CONFIG_PBHA_HWU_SET(val, kbdev->pbha_propagate_bits)); ++ } ++#endif /* MALI_USE_CSF */ + -+ group->run_state = KBASE_CSF_GROUP_INACTIVE; -+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group, -+ group->run_state); ++ /* ++ * Skip if size and hash are not given explicitly, ++ * which means default values are used. ++ */ ++ if ((kbdev->l2_size_override == 0) && (kbdev->l2_hash_override == 0) && ++ (!kbdev->l2_hash_values_override)) ++ return; + -+ err = create_suspend_buffers(kctx, group); ++ val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); + -+ if (err < 0) { -+ kfree(group); -+ group_handle = err; -+ } else { -+ int j; ++ if (kbdev->l2_size_override) { ++ val &= ~L2_CONFIG_SIZE_MASK; ++ val |= (kbdev->l2_size_override << L2_CONFIG_SIZE_SHIFT); ++ } + -+ kctx->csf.queue_groups[group_handle] = group; -+ for (j = 0; j < MAX_SUPPORTED_STREAMS_PER_GROUP; -+ j++) -+ group->bound_queues[j] = NULL; -+ } ++ if (kbdev->l2_hash_override) { ++ WARN_ON(kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)); ++ val &= ~L2_CONFIG_HASH_MASK; ++ val |= (kbdev->l2_hash_override << L2_CONFIG_HASH_SHIFT); ++ } else if (kbdev->l2_hash_values_override) { ++ int i; ++ ++ WARN_ON(!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)); ++ val &= ~L2_CONFIG_ASN_HASH_ENABLE_MASK; ++ val |= (0x1 << L2_CONFIG_ASN_HASH_ENABLE_SHIFT); ++ ++ for (i = 0; i < ASN_HASH_COUNT; i++) { ++ dev_dbg(kbdev->dev, "Program 0x%x to ASN_HASH[%d]\n", ++ kbdev->l2_hash_values[i], i); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(ASN_HASH(i)), ++ kbdev->l2_hash_values[i]); + } + } + -+ return group_handle; ++ dev_dbg(kbdev->dev, "Program 0x%x to L2_CONFIG\n", val); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG), val); +} + -+static bool dvs_supported(u32 csf_version) ++static void kbase_pm_control_gpu_clock(struct kbase_device *kbdev) +{ -+ if (GLB_VERSION_MAJOR_GET(csf_version) < 3) -+ return false; ++ struct kbase_pm_backend_data *const backend = &kbdev->pm.backend; + -+ if (GLB_VERSION_MAJOR_GET(csf_version) == 3) -+ if (GLB_VERSION_MINOR_GET(csf_version) < 2) -+ return false; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ return true; ++ queue_work(system_wq, &backend->gpu_clock_control_work); +} + -+int kbase_csf_queue_group_create(struct kbase_context *const kctx, -+ union kbase_ioctl_cs_queue_group_create *const create) ++#if MALI_USE_CSF ++static const char *kbase_mcu_state_to_string(enum kbase_mcu_state state) +{ -+ int err = 0; -+ const u32 tiler_count = hweight64(create->in.tiler_mask); -+ const u32 fragment_count = hweight64(create->in.fragment_mask); -+ const u32 compute_count = hweight64(create->in.compute_mask); -+ size_t i; ++ const char *const strings[] = { ++#define KBASEP_MCU_STATE(n) #n, ++#include "mali_kbase_pm_mcu_states.h" ++#undef KBASEP_MCU_STATE ++ }; ++ if (WARN_ON((size_t)state >= ARRAY_SIZE(strings))) ++ return "Bad MCU state"; ++ else ++ return strings[state]; ++} + -+ for (i = 0; i < sizeof(create->in.padding); i++) { -+ if (create->in.padding[i] != 0) { -+ dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n"); -+ return -EINVAL; -+ } ++static ++void kbase_ktrace_log_mcu_state(struct kbase_device *kbdev, enum kbase_mcu_state state) ++{ ++#if KBASE_KTRACE_ENABLE ++ switch (state) { ++#define KBASEP_MCU_STATE(n) \ ++ case KBASE_MCU_ ## n: \ ++ KBASE_KTRACE_ADD(kbdev, PM_MCU_ ## n, NULL, state); \ ++ break; ++#include "mali_kbase_pm_mcu_states.h" ++#undef KBASEP_MCU_STATE + } ++#endif ++} + -+ mutex_lock(&kctx->csf.lock); ++static inline bool kbase_pm_handle_mcu_core_attr_update(struct kbase_device *kbdev) ++{ ++ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; ++ bool timer_update; ++ bool core_mask_update; + -+ if ((create->in.tiler_max > tiler_count) || -+ (create->in.fragment_max > fragment_count) || -+ (create->in.compute_max > compute_count)) { -+ dev_dbg(kctx->kbdev->dev, -+ "Invalid maximum number of endpoints for a queue group"); -+ err = -EINVAL; -+ } else if (create->in.priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT) { -+ dev_dbg(kctx->kbdev->dev, "Invalid queue group priority %u", -+ (unsigned int)create->in.priority); -+ err = -EINVAL; -+ } else if (!iface_has_enough_streams(kctx->kbdev, create->in.cs_min)) { -+ dev_dbg(kctx->kbdev->dev, -+ "No CSG has at least %d CSs", -+ create->in.cs_min); -+ err = -EINVAL; -+ } else if (create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK) { -+ dev_warn(kctx->kbdev->dev, "Unknown exception handler flags set: %u", -+ create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK); -+ err = -EINVAL; -+ } else if (!dvs_supported(kctx->kbdev->csf.global_iface.version) && -+ create->in.dvs_buf) { -+ dev_warn( -+ kctx->kbdev->dev, -+ "GPU does not support DVS but userspace is trying to use it"); -+ err = -EINVAL; -+ } else if (dvs_supported(kctx->kbdev->csf.global_iface.version) && -+ !CSG_DVS_BUF_BUFFER_POINTER_GET(create->in.dvs_buf) && -+ CSG_DVS_BUF_BUFFER_SIZE_GET(create->in.dvs_buf)) { -+ dev_warn(kctx->kbdev->dev, -+ "DVS buffer pointer is null but size is not 0"); -+ err = -EINVAL; -+ } else { -+ /* For the CSG which satisfies the condition for having -+ * the needed number of CSs, check whether it also conforms -+ * with the requirements for at least one of its CSs having -+ * the iterator of the needed type -+ * (note: for CSF v1.0 all CSs in a CSG will have access to -+ * the same iterators) -+ */ -+ const int group_handle = create_queue_group(kctx, create); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (group_handle >= 0) -+ create->out.group_handle = group_handle; -+ else -+ err = group_handle; -+ } ++ WARN_ON(backend->mcu_state != KBASE_MCU_ON); + -+ mutex_unlock(&kctx->csf.lock); ++ /* This function is only for cases where the MCU managing Cores, if ++ * the firmware mode is with host control, do nothing here. ++ */ ++ if (unlikely(kbdev->csf.firmware_hctl_core_pwr)) ++ return false; + -+ return err; -+} ++ core_mask_update = ++ backend->shaders_avail != backend->shaders_desired_mask; + -+/** -+ * term_normal_suspend_buffer() - Free normal-mode suspend buffer of queue group -+ * -+ * @kctx: Pointer to kbase context where queue group belongs to -+ * @s_buf: Pointer to queue group suspend buffer to be freed -+ */ -+static void term_normal_suspend_buffer(struct kbase_context *const kctx, -+ struct kbase_normal_suspend_buffer *s_buf) -+{ -+ const size_t nr_pages = PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size); ++ timer_update = kbdev->csf.mcu_core_pwroff_dur_count != ++ kbdev->csf.mcu_core_pwroff_reg_shadow; + -+ lockdep_assert_held(&kctx->csf.lock); ++ if (core_mask_update || timer_update) ++ kbase_csf_firmware_update_core_attr(kbdev, timer_update, ++ core_mask_update, backend->shaders_desired_mask); + -+ /* The group should not have a bind remaining on any suspend buf region */ -+ WARN_ONCE(s_buf->gpu_va, "Suspend buffer address should be 0 at termination"); ++ return (core_mask_update || timer_update); ++} + -+ kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages, -+ &s_buf->phy[0], false, false); -+ kbase_process_page_usage_dec(kctx, nr_pages); ++bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, ++ enum kbase_mcu_state state) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ kfree(s_buf->phy); -+ s_buf->phy = NULL; ++ return ((state == KBASE_MCU_OFF) || (state == KBASE_MCU_IN_SLEEP)); +} + ++#ifdef KBASE_PM_RUNTIME +/** -+ * term_protected_suspend_buffer() - Free protected-mode suspend buffer of -+ * queue group ++ * kbase_pm_enable_mcu_db_notification - Enable the Doorbell notification on ++ * MCU side + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @sbuf: Pointer to queue group suspend buffer to be freed ++ * @kbdev: Pointer to the device. ++ * ++ * This function is called to re-enable the Doorbell notification on MCU side ++ * when MCU needs to beome active again. + */ -+static void term_protected_suspend_buffer(struct kbase_device *const kbdev, -+ struct kbase_protected_suspend_buffer *sbuf) -+{ -+ WARN_ONCE(sbuf->gpu_va, "Suspend buf should have been unmapped inside scheduler!"); -+ if (sbuf->pma) { -+ const size_t nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); -+ kbase_csf_protected_memory_free(kbdev, sbuf->pma, nr_pages, true); -+ sbuf->pma = NULL; -+ } -+} -+ -+void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group) ++static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev) +{ -+ struct kbase_context *kctx = group->kctx; -+ -+ /* Currently each group supports the same number of CS */ -+ u32 max_streams = -+ kctx->kbdev->csf.global_iface.groups[0].stream_num; -+ u32 i; -+ -+ lockdep_assert_held(&kctx->csf.lock); -+ -+ WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE && -+ group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED); -+ -+ for (i = 0; i < max_streams; i++) { -+ struct kbase_queue *queue = -+ group->bound_queues[i]; -+ -+ /* The group is already being evicted from the scheduler */ -+ if (queue) -+ unbind_stopped_queue(kctx, queue); -+ } ++ u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_CONTROL)); + -+ term_normal_suspend_buffer(kctx, &group->normal_suspend_buf); -+ if (kctx->kbdev->csf.pma_dev) -+ term_protected_suspend_buffer(kctx->kbdev, -+ &group->protected_suspend_buf); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ group->run_state = KBASE_CSF_GROUP_TERMINATED; -+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group, group->run_state); ++ val &= ~MCU_CNTRL_DOORBELL_DISABLE_MASK; ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), val); +} + +/** -+ * term_queue_group - Terminate a GPU command queue group. ++ * wait_mcu_as_inactive - Wait for AS used by MCU FW to get configured + * -+ * @group: Pointer to GPU command queue group data. ++ * @kbdev: Pointer to the device. + * -+ * Terminates a GPU command queue group. From the userspace perspective the -+ * group will still exist but it can't bind new queues to it. Userspace can -+ * still add work in queues bound to the group but it won't be executed. (This -+ * is because the IO mapping created upon binding such queues is still intact.) ++ * This function is called to wait for the AS used by MCU FW to get configured ++ * before DB notification on MCU is enabled, as a workaround for HW issue. + */ -+static void term_queue_group(struct kbase_queue_group *group) ++static void wait_mcu_as_inactive(struct kbase_device *kbdev) +{ -+ struct kbase_context *kctx = group->kctx; ++ unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; + -+ kbase_reset_gpu_assert_failed_or_prevented(kctx->kbdev); -+ lockdep_assert_held(&kctx->csf.lock); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* Stop the group and evict it from the scheduler */ -+ kbase_csf_scheduler_group_deschedule(group); ++ if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_2716)) ++ return; + -+ if (group->run_state == KBASE_CSF_GROUP_TERMINATED) ++ /* Wait for the AS_ACTIVE_INT bit to become 0 for the AS used by MCU FW */ ++ while (--max_loops && ++ kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) & ++ AS_STATUS_AS_ACTIVE_INT) ++ ; ++ ++ if (!WARN_ON_ONCE(max_loops == 0)) + return; + -+ dev_dbg(kctx->kbdev->dev, "group %d terminating", group->handle); ++ dev_err(kbdev->dev, "AS_ACTIVE_INT bit stuck for AS %d used by MCU FW", MCU_AS_NR); + -+ kbase_csf_term_descheduled_queue_group(group); ++ if (kbase_prepare_to_reset_gpu(kbdev, 0)) ++ kbase_reset_gpu(kbdev); +} ++#endif + +/** -+ * wait_group_deferred_deschedule_completion - Wait for refcount of the group to -+ * become 0 that was taken when the group deschedule had to be deferred. ++ * kbasep_pm_toggle_power_interrupt - Toggles the IRQ mask for power interrupts ++ * from the firmware + * -+ * @group: Pointer to GPU command queue group that is being deleted. ++ * @kbdev: Pointer to the device ++ * @enable: boolean indicating to enable interrupts or not + * -+ * This function is called when Userspace deletes the group and after the group -+ * has been descheduled. The function synchronizes with the other threads that were -+ * also trying to deschedule the group whilst the dumping was going on for a fault. -+ * Please refer the documentation of wait_for_dump_complete_on_group_deschedule() -+ * for more details. ++ * The POWER_CHANGED_ALL interrupt can be disabled after L2 has been turned on ++ * when FW is controlling the power for the shader cores. Correspondingly, the ++ * interrupts can be re-enabled after the MCU has been disabled before the ++ * power down of L2. + */ -+static void wait_group_deferred_deschedule_completion(struct kbase_queue_group *group) ++static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool enable) +{ -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ struct kbase_context *kctx = group->kctx; -+ -+ lockdep_assert_held(&kctx->csf.lock); -+ -+ if (likely(!group->deschedule_deferred_cnt)) -+ return; -+ -+ mutex_unlock(&kctx->csf.lock); -+ wait_event(kctx->kbdev->csf.event_wait, !group->deschedule_deferred_cnt); -+ mutex_lock(&kctx->csf.lock); -+#endif -+} ++ u32 irq_mask; + -+static void cancel_queue_group_events(struct kbase_queue_group *group) -+{ -+ cancel_work_sync(&group->timer_event_work); -+ cancel_work_sync(&group->protm_event_work); -+} ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+static void remove_pending_group_fatal_error(struct kbase_queue_group *group) -+{ -+ struct kbase_context *kctx = group->kctx; ++ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + -+ dev_dbg(kctx->kbdev->dev, -+ "Remove any pending group fatal error from context %pK\n", -+ (void *)group->kctx); ++ if (enable) { ++ irq_mask |= POWER_CHANGED_ALL; ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), POWER_CHANGED_ALL); ++ } else { ++ irq_mask &= ~POWER_CHANGED_ALL; ++ } + -+ kbase_csf_event_remove_error(kctx, &group->error_tiler_oom); -+ kbase_csf_event_remove_error(kctx, &group->error_timeout); -+ kbase_csf_event_remove_error(kctx, &group->error_fatal); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask); +} + -+void kbase_csf_queue_group_terminate(struct kbase_context *kctx, -+ u8 group_handle) ++static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) +{ -+ struct kbase_queue_group *group; -+ int err; -+ bool reset_prevented = false; -+ struct kbase_device *const kbdev = kctx->kbdev; -+ -+ err = kbase_reset_gpu_prevent_and_wait(kbdev); -+ if (err) -+ dev_warn( -+ kbdev->dev, -+ "Unsuccessful GPU reset detected when terminating group %d, attempting to terminate regardless", -+ group_handle); -+ else -+ reset_prevented = true; -+ -+ mutex_lock(&kctx->csf.lock); ++ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; ++ enum kbase_mcu_state prev_state; + -+ group = find_queue_group(kctx, group_handle); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (group) { -+ kctx->csf.queue_groups[group_handle] = NULL; -+ /* Stop the running of the given group */ -+ term_queue_group(group); -+ mutex_unlock(&kctx->csf.lock); ++ /* ++ * Initial load of firmware should have been done to ++ * exercise the MCU state machine. ++ */ ++ if (unlikely(!kbdev->csf.firmware_inited)) { ++ WARN_ON(backend->mcu_state != KBASE_MCU_OFF); ++ return 0; ++ } + -+ if (reset_prevented) { -+ /* Allow GPU reset before cancelling the group specific -+ * work item to avoid potential deadlock. -+ * Reset prevention isn't needed after group termination. -+ */ -+ kbase_reset_gpu_allow(kbdev); -+ reset_prevented = false; -+ } ++ do { ++ u64 shaders_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_SHADER); ++ u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + -+ /* Cancel any pending event callbacks. If one is in progress -+ * then this thread waits synchronously for it to complete (which -+ * is why we must unlock the context first). We already ensured -+ * that no more callbacks can be enqueued by terminating the group. ++ /* mask off ready from trans in case transitions finished ++ * between the register reads + */ -+ cancel_queue_group_events(group); ++ shaders_trans &= ~shaders_ready; + -+ mutex_lock(&kctx->csf.lock); ++ prev_state = backend->mcu_state; + -+ /* Clean up after the termination */ -+ remove_pending_group_fatal_error(group); ++ switch (backend->mcu_state) { ++ case KBASE_MCU_OFF: ++ if (kbase_pm_is_mcu_desired(kbdev) && ++ !backend->policy_change_clamp_state_to_off && ++ backend->l2_state == KBASE_L2_ON) { ++ kbase_csf_firmware_trigger_reload(kbdev); ++ backend->mcu_state = KBASE_MCU_PEND_ON_RELOAD; ++ } ++ break; + -+ wait_group_deferred_deschedule_completion(group); -+ } ++ case KBASE_MCU_PEND_ON_RELOAD: ++ if (kbdev->csf.firmware_reloaded) { ++ backend->shaders_desired_mask = ++ kbase_pm_ca_get_core_mask(kbdev); ++ kbase_csf_firmware_global_reinit(kbdev, ++ backend->shaders_desired_mask); ++ if (!kbdev->csf.firmware_hctl_core_pwr) ++ kbasep_pm_toggle_power_interrupt(kbdev, false); ++ backend->mcu_state = ++ KBASE_MCU_ON_GLB_REINIT_PEND; ++ } ++ break; + -+ mutex_unlock(&kctx->csf.lock); -+ if (reset_prevented) -+ kbase_reset_gpu_allow(kbdev); ++ case KBASE_MCU_ON_GLB_REINIT_PEND: ++ if (kbase_csf_firmware_global_reinit_complete(kbdev)) { ++ backend->shaders_avail = ++ backend->shaders_desired_mask; ++ backend->pm_shaders_core_mask = 0; ++ if (kbdev->csf.firmware_hctl_core_pwr) { ++ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, ++ backend->shaders_avail, ACTION_PWRON); ++ backend->mcu_state = ++ KBASE_MCU_HCTL_SHADERS_PEND_ON; ++ } else ++ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; ++#if IS_ENABLED(CONFIG_MALI_CORESIGHT) ++ if (kbase_debug_coresight_csf_state_check( ++ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) { ++ kbase_debug_coresight_csf_state_request( ++ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED); ++ backend->mcu_state = KBASE_MCU_CORESIGHT_ENABLE; ++ } else if (kbase_debug_coresight_csf_state_check( ++ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) { ++ backend->mcu_state = KBASE_MCU_CORESIGHT_ENABLE; ++ } ++#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ ++ } ++ break; + -+ kfree(group); -+} -+KBASE_EXPORT_TEST_API(kbase_csf_queue_group_terminate); ++ case KBASE_MCU_HCTL_SHADERS_PEND_ON: ++ if (!shaders_trans && ++ shaders_ready == backend->shaders_avail) { ++ /* Cores now stable, notify MCU the stable mask */ ++ kbase_csf_firmware_update_core_attr(kbdev, ++ false, true, shaders_ready); + -+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST -+int kbase_csf_queue_group_suspend(struct kbase_context *kctx, -+ struct kbase_suspend_copy_buffer *sus_buf, -+ u8 group_handle) -+{ -+ struct kbase_device *const kbdev = kctx->kbdev; -+ int err; -+ struct kbase_queue_group *group; ++ backend->pm_shaders_core_mask = shaders_ready; ++ backend->mcu_state = ++ KBASE_MCU_HCTL_CORES_NOTIFY_PEND; ++ } ++ break; + -+ err = kbase_reset_gpu_prevent_and_wait(kbdev); -+ if (err) { -+ dev_warn( -+ kbdev->dev, -+ "Unsuccessful GPU reset detected when suspending group %d", -+ group_handle); -+ return err; -+ } -+ mutex_lock(&kctx->csf.lock); ++ case KBASE_MCU_HCTL_CORES_NOTIFY_PEND: ++ /* Wait for the acknowledgement */ ++ if (kbase_csf_firmware_core_attr_updated(kbdev)) ++ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; ++ break; + -+ group = find_queue_group(kctx, group_handle); -+ if (group) -+ err = kbase_csf_scheduler_group_copy_suspend_buf(group, -+ sus_buf); -+ else -+ err = -EINVAL; ++ case KBASE_MCU_ON_HWCNT_ENABLE: ++ backend->hwcnt_desired = true; ++ if (backend->hwcnt_disabled) { ++ unsigned long flags; + -+ mutex_unlock(&kctx->csf.lock); -+ kbase_reset_gpu_allow(kbdev); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ backend->hwcnt_disabled = false; ++ } ++ backend->mcu_state = KBASE_MCU_ON; ++ break; + -+ return err; -+} ++ case KBASE_MCU_ON: ++ backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev); ++ ++ if (!kbase_pm_is_mcu_desired(kbdev)) ++ backend->mcu_state = KBASE_MCU_ON_HWCNT_DISABLE; ++ else if (kbdev->csf.firmware_hctl_core_pwr) { ++ /* Host control scale up/down cores as needed */ ++ if (backend->shaders_desired_mask != shaders_ready) { ++ backend->hwcnt_desired = false; ++ if (!backend->hwcnt_disabled) ++ kbase_pm_trigger_hwcnt_disable(kbdev); ++ backend->mcu_state = ++ KBASE_MCU_HCTL_MCU_ON_RECHECK; ++ } ++ } else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) ++ backend->mcu_state = KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND; ++#if IS_ENABLED(CONFIG_MALI_CORESIGHT) ++ else if (kbdev->csf.coresight.disable_on_pmode_enter) { ++ kbase_debug_coresight_csf_state_request( ++ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED); ++ backend->mcu_state = KBASE_MCU_ON_PMODE_ENTER_CORESIGHT_DISABLE; ++ } else if (kbdev->csf.coresight.enable_on_pmode_exit) { ++ kbase_debug_coresight_csf_state_request( ++ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED); ++ backend->mcu_state = KBASE_MCU_ON_PMODE_EXIT_CORESIGHT_ENABLE; ++ } +#endif ++ break; + -+void kbase_csf_add_group_fatal_error( -+ struct kbase_queue_group *const group, -+ struct base_gpu_queue_group_error const *const err_payload) -+{ -+ struct base_csf_notification error; ++ case KBASE_MCU_HCTL_MCU_ON_RECHECK: ++ backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev); + -+ if (WARN_ON(!group)) -+ return; ++ if (!backend->hwcnt_disabled) { ++ /* Wait for being disabled */ ++ ; ++ } else if (!kbase_pm_is_mcu_desired(kbdev)) { ++ /* Converging to MCU powering down flow */ ++ backend->mcu_state = KBASE_MCU_ON_HWCNT_DISABLE; ++ } else if (backend->shaders_desired_mask & ~shaders_ready) { ++ /* set cores ready but not available to ++ * meet SHADERS_PEND_ON check pass ++ */ ++ backend->shaders_avail = ++ (backend->shaders_desired_mask | shaders_ready); + -+ if (WARN_ON(!err_payload)) -+ return; ++ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, ++ backend->shaders_avail & ~shaders_ready, ++ ACTION_PWRON); ++ backend->mcu_state = ++ KBASE_MCU_HCTL_SHADERS_PEND_ON; + -+ error = (struct base_csf_notification) { -+ .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, -+ .payload = { -+ .csg_error = { -+ .handle = group->handle, -+ .error = *err_payload ++ } else if (~backend->shaders_desired_mask & shaders_ready) { ++ kbase_csf_firmware_update_core_attr(kbdev, false, true, ++ backend->shaders_desired_mask); ++ backend->mcu_state = KBASE_MCU_HCTL_CORES_DOWN_SCALE_NOTIFY_PEND; ++ } else { ++ backend->mcu_state = ++ KBASE_MCU_HCTL_SHADERS_PEND_ON; + } -+ } -+ }; -+ -+ kbase_csf_event_add_error(group->kctx, &group->error_fatal, &error); -+} ++ break; + -+void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev, -+ struct kbase_context *kctx) -+{ -+ struct list_head evicted_groups; -+ struct kbase_queue_group *group; -+ int i; ++ case KBASE_MCU_HCTL_CORES_DOWN_SCALE_NOTIFY_PEND: ++ if (kbase_csf_firmware_core_attr_updated(kbdev)) { ++ /* wait in queue until cores idle */ ++ queue_work(backend->core_idle_wq, &backend->core_idle_work); ++ backend->mcu_state = KBASE_MCU_HCTL_CORE_INACTIVE_PEND; ++ } ++ break; + -+ INIT_LIST_HEAD(&evicted_groups); ++ case KBASE_MCU_HCTL_CORE_INACTIVE_PEND: ++ { ++ u64 active_cores = kbase_pm_get_active_cores( ++ kbdev, ++ KBASE_PM_CORE_SHADER); ++ u64 cores_to_disable = shaders_ready & ++ ~backend->shaders_desired_mask; + -+ mutex_lock(&kctx->csf.lock); ++ if (!(cores_to_disable & active_cores)) { ++ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, ++ cores_to_disable, ++ ACTION_PWROFF); ++ backend->shaders_avail = backend->shaders_desired_mask; ++ backend->mcu_state = KBASE_MCU_HCTL_SHADERS_CORE_OFF_PEND; ++ } ++ } ++ break; + -+ kbase_csf_scheduler_evict_ctx_slots(kbdev, kctx, &evicted_groups); -+ while (!list_empty(&evicted_groups)) { -+ group = list_first_entry(&evicted_groups, -+ struct kbase_queue_group, link); ++ case KBASE_MCU_HCTL_SHADERS_CORE_OFF_PEND: ++ if (!shaders_trans && shaders_ready == backend->shaders_avail) { ++ /* Cores now stable */ ++ backend->pm_shaders_core_mask = shaders_ready; ++ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; ++ } ++ break; + -+ dev_dbg(kbdev->dev, "Context %d_%d active group %d terminated", -+ kctx->tgid, kctx->id, group->handle); -+ kbase_csf_term_descheduled_queue_group(group); -+ list_del_init(&group->link); -+ } ++ case KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND: ++ if (kbase_csf_firmware_core_attr_updated(kbdev)) { ++ backend->shaders_avail = backend->shaders_desired_mask; ++ backend->mcu_state = KBASE_MCU_ON; ++ } ++ break; + -+ /* Acting on the queue groups that are pending to be terminated. */ -+ for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) { -+ group = kctx->csf.queue_groups[i]; -+ if (group && -+ group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) -+ kbase_csf_term_descheduled_queue_group(group); -+ } ++ case KBASE_MCU_ON_HWCNT_DISABLE: ++ if (kbase_pm_is_mcu_desired(kbdev)) { ++ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; ++ break; ++ } + -+ mutex_unlock(&kctx->csf.lock); -+} ++ backend->hwcnt_desired = false; ++ if (!backend->hwcnt_disabled) ++ kbase_pm_trigger_hwcnt_disable(kbdev); + -+int kbase_csf_ctx_init(struct kbase_context *kctx) -+{ -+ int err = -ENOMEM; + -+ INIT_LIST_HEAD(&kctx->csf.queue_list); -+ INIT_LIST_HEAD(&kctx->csf.link); ++ if (backend->hwcnt_disabled) { ++#ifdef KBASE_PM_RUNTIME ++ if (backend->gpu_sleep_mode_active) ++ backend->mcu_state = KBASE_MCU_ON_SLEEP_INITIATE; ++ else { ++#endif ++ backend->mcu_state = KBASE_MCU_ON_HALT; ++#if IS_ENABLED(CONFIG_MALI_CORESIGHT) ++ kbase_debug_coresight_csf_state_request( ++ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED); ++ backend->mcu_state = KBASE_MCU_CORESIGHT_DISABLE; ++#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ ++ } ++ } ++ break; + -+ kbase_csf_event_init(kctx); ++#if IS_ENABLED(CONFIG_MALI_CORESIGHT) ++ case KBASE_MCU_ON_PMODE_ENTER_CORESIGHT_DISABLE: ++ if (kbase_debug_coresight_csf_state_check( ++ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) { ++ backend->mcu_state = KBASE_MCU_ON; ++ kbdev->csf.coresight.disable_on_pmode_enter = false; ++ } ++ break; ++ case KBASE_MCU_ON_PMODE_EXIT_CORESIGHT_ENABLE: ++ if (kbase_debug_coresight_csf_state_check( ++ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) { ++ backend->mcu_state = KBASE_MCU_ON; ++ kbdev->csf.coresight.enable_on_pmode_exit = false; ++ } ++ break; ++ case KBASE_MCU_CORESIGHT_DISABLE: ++ if (kbase_debug_coresight_csf_state_check( ++ kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) ++ backend->mcu_state = KBASE_MCU_ON_HALT; ++ break; + -+ /* Mark all the cookies as 'free' */ -+ bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); ++ case KBASE_MCU_CORESIGHT_ENABLE: ++ if (kbase_debug_coresight_csf_state_check( ++ kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) ++ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; ++ break; ++#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + -+ kctx->csf.wq = alloc_workqueue("mali_kbase_csf_wq", -+ WQ_UNBOUND, 1); ++ case KBASE_MCU_ON_HALT: ++ if (!kbase_pm_is_mcu_desired(kbdev)) { ++ kbase_csf_firmware_trigger_mcu_halt(kbdev); ++ backend->mcu_state = KBASE_MCU_ON_PEND_HALT; ++ } else ++ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; ++ break; + -+ if (likely(kctx->csf.wq)) { -+ err = kbase_csf_scheduler_context_init(kctx); ++ case KBASE_MCU_ON_PEND_HALT: ++ if (kbase_csf_firmware_mcu_halted(kbdev)) { ++ KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_HALTED, NULL, ++ kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); ++ if (kbdev->csf.firmware_hctl_core_pwr) ++ backend->mcu_state = ++ KBASE_MCU_HCTL_SHADERS_READY_OFF; ++ else ++ backend->mcu_state = KBASE_MCU_POWER_DOWN; ++ } ++ break; + -+ if (likely(!err)) { -+ err = kbase_csf_kcpu_queue_context_init(kctx); ++ case KBASE_MCU_HCTL_SHADERS_READY_OFF: ++ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, ++ shaders_ready, ACTION_PWROFF); ++ backend->mcu_state = ++ KBASE_MCU_HCTL_SHADERS_PEND_OFF; ++ break; + -+ if (likely(!err)) { -+ err = kbase_csf_tiler_heap_context_init(kctx); ++ case KBASE_MCU_HCTL_SHADERS_PEND_OFF: ++ if (!shaders_trans && !shaders_ready) { ++ backend->pm_shaders_core_mask = 0; ++ backend->mcu_state = KBASE_MCU_POWER_DOWN; ++ } ++ break; + -+ if (likely(!err)) { -+ mutex_init(&kctx->csf.lock); -+ INIT_WORK(&kctx->csf.pending_submission_work, -+ pending_submission_worker); ++ case KBASE_MCU_POWER_DOWN: ++ kbase_csf_firmware_disable_mcu(kbdev); ++ backend->mcu_state = KBASE_MCU_PEND_OFF; ++ break; + -+ err = kbasep_ctx_user_reg_page_mapping_init(kctx); ++ case KBASE_MCU_PEND_OFF: ++ /* wait synchronously for the MCU to get disabled */ ++ kbase_csf_firmware_disable_mcu_wait(kbdev); ++ if (!kbdev->csf.firmware_hctl_core_pwr) ++ kbasep_pm_toggle_power_interrupt(kbdev, true); ++ backend->mcu_state = KBASE_MCU_OFF; ++ break; ++#ifdef KBASE_PM_RUNTIME ++ case KBASE_MCU_ON_SLEEP_INITIATE: ++ if (!kbase_pm_is_mcu_desired(kbdev)) { ++ kbase_csf_firmware_trigger_mcu_sleep(kbdev); ++ backend->mcu_state = KBASE_MCU_ON_PEND_SLEEP; ++ } else ++ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; ++ break; + -+ if (unlikely(err)) -+ kbase_csf_tiler_heap_context_term(kctx); -+ } ++ case KBASE_MCU_ON_PEND_SLEEP: ++ if (kbase_csf_firmware_is_mcu_in_sleep(kbdev)) { ++ KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_SLEEP, NULL, ++ kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); ++ backend->mcu_state = KBASE_MCU_IN_SLEEP; ++ kbase_pm_enable_db_mirror_interrupt(kbdev); ++ kbase_csf_scheduler_reval_idleness_post_sleep(kbdev); ++ /* Enable PM interrupt, after MCU has been put ++ * to sleep, for the power down of L2. ++ */ ++ if (!kbdev->csf.firmware_hctl_core_pwr) ++ kbasep_pm_toggle_power_interrupt(kbdev, true); ++ } ++ break; + -+ if (unlikely(err)) -+ kbase_csf_kcpu_queue_context_term(kctx); ++ case KBASE_MCU_IN_SLEEP: ++ if (kbase_pm_is_mcu_desired(kbdev) && ++ backend->l2_state == KBASE_L2_ON) { ++ wait_mcu_as_inactive(kbdev); ++ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP( ++ kbdev, kbase_backend_get_cycle_cnt(kbdev)); ++ kbase_pm_enable_mcu_db_notification(kbdev); ++ kbase_pm_disable_db_mirror_interrupt(kbdev); ++ /* Disable PM interrupt after L2 has been ++ * powered up for the wakeup of MCU. ++ */ ++ if (!kbdev->csf.firmware_hctl_core_pwr) ++ kbasep_pm_toggle_power_interrupt(kbdev, false); ++ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + } ++ break; ++#endif ++ case KBASE_MCU_RESET_WAIT: ++ /* Reset complete */ ++ if (!backend->in_reset) ++ backend->mcu_state = KBASE_MCU_OFF; + -+ if (unlikely(err)) -+ kbase_csf_scheduler_context_term(kctx); ++#if IS_ENABLED(CONFIG_MALI_CORESIGHT) ++ kbdev->csf.coresight.disable_on_pmode_enter = false; ++ kbdev->csf.coresight.enable_on_pmode_exit = false; ++#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ ++ break; ++ ++ default: ++ WARN(1, "Invalid state in mcu_state: %d", ++ backend->mcu_state); + } + -+ if (unlikely(err)) -+ destroy_workqueue(kctx->csf.wq); -+ } ++ if (backend->mcu_state != prev_state) { ++ dev_dbg(kbdev->dev, "MCU state transition: %s to %s\n", ++ kbase_mcu_state_to_string(prev_state), ++ kbase_mcu_state_to_string(backend->mcu_state)); ++ kbase_ktrace_log_mcu_state(kbdev, backend->mcu_state); ++ } + -+ return err; ++ } while (backend->mcu_state != prev_state); ++ ++ return 0; +} + -+void kbase_csf_ctx_handle_fault(struct kbase_context *kctx, -+ struct kbase_fault *fault) ++static void core_idle_worker(struct work_struct *work) +{ -+ int gr; -+ bool reported = false; -+ struct base_gpu_queue_group_error err_payload; -+ int err; -+ struct kbase_device *kbdev; -+ -+ if (WARN_ON(!kctx)) -+ return; -+ -+ if (WARN_ON(!fault)) -+ return; ++ struct kbase_device *kbdev = ++ container_of(work, struct kbase_device, pm.backend.core_idle_work); ++ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; ++ unsigned long flags; + -+ kbdev = kctx->kbdev; -+ err = kbase_reset_gpu_try_prevent(kbdev); -+ /* Regardless of whether reset failed or is currently happening, exit -+ * early -+ */ -+ if (err) -+ return; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ while (backend->gpu_powered && (backend->mcu_state == KBASE_MCU_HCTL_CORE_INACTIVE_PEND)) { ++ const unsigned int core_inactive_wait_ms = 1; ++ u64 active_cores = kbase_pm_get_active_cores(kbdev, KBASE_PM_CORE_SHADER); ++ u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); ++ u64 cores_to_disable = shaders_ready & ~backend->shaders_desired_mask; + -+ err_payload = (struct base_gpu_queue_group_error) { -+ .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, -+ .payload = { -+ .fatal_group = { -+ .sideband = fault->addr, -+ .status = fault->status, -+ } ++ if (!(cores_to_disable & active_cores)) { ++ kbase_pm_update_state(kbdev); ++ break; + } -+ }; + -+ mutex_lock(&kctx->csf.lock); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ msleep(core_inactive_wait_ms); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ } + -+ for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) { -+ struct kbase_queue_group *const group = -+ kctx->csf.queue_groups[gr]; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} ++#endif + -+ if (group && group->run_state != KBASE_CSF_GROUP_TERMINATED) { -+ term_queue_group(group); -+ kbase_csf_add_group_fatal_error(group, &err_payload); -+ reported = true; -+ } ++static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state) ++{ ++ const char *const strings[] = { ++#define KBASEP_L2_STATE(n) #n, ++#include "mali_kbase_pm_l2_states.h" ++#undef KBASEP_L2_STATE ++ }; ++ if (WARN_ON((size_t)state >= ARRAY_SIZE(strings))) ++ return "Bad level 2 cache state"; ++ else ++ return strings[state]; ++} ++ ++static ++void kbase_ktrace_log_l2_core_state(struct kbase_device *kbdev, enum kbase_l2_core_state state) ++{ ++#if KBASE_KTRACE_ENABLE ++ switch (state) { ++#define KBASEP_L2_STATE(n) \ ++ case KBASE_L2_ ## n: \ ++ KBASE_KTRACE_ADD(kbdev, PM_L2_ ## n, NULL, state); \ ++ break; ++#include "mali_kbase_pm_l2_states.h" ++#undef KBASEP_L2_STATE + } ++#endif ++} + -+ mutex_unlock(&kctx->csf.lock); ++#if !MALI_USE_CSF ++/* On powering on the L2, the tracked kctx becomes stale and can be cleared. ++ * This enables the backend to spare the START_FLUSH.INV_SHADER_OTHER ++ * operation on the first submitted katom after the L2 powering on. ++ */ ++static void kbase_pm_l2_clear_backend_slot_submit_kctx(struct kbase_device *kbdev) ++{ ++ int js; + -+ if (reported) -+ kbase_event_wakeup(kctx); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ kbase_reset_gpu_allow(kbdev); ++ /* Clear the slots' last katom submission kctx */ ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) ++ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_NULL_TAG_VAL; +} ++#endif + -+void kbase_csf_ctx_term(struct kbase_context *kctx) ++static bool can_power_down_l2(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct kbase_as *as = NULL; -+ unsigned long flags; -+ u32 i; -+ int err; -+ bool reset_prevented = false; -+ -+ /* As the kbase context is terminating, its debugfs sub-directory would -+ * have been removed already and so would be the debugfs file created -+ * for queue groups & kcpu queues, hence no need to explicitly remove -+ * those debugfs files. ++#if MALI_USE_CSF ++ /* Due to the HW issue GPU2019-3878, need to prevent L2 power off ++ * whilst MMU command is in progress. ++ * Also defer the power-down if MMU is in process of page migration. + */ ++ return !kbdev->mmu_hw_operation_in_progress && !kbdev->mmu_page_migrate_in_progress; ++#else ++ return !kbdev->mmu_page_migrate_in_progress; ++#endif ++} + -+ /* Wait for a GPU reset if it is happening, prevent it if not happening */ -+ err = kbase_reset_gpu_prevent_and_wait(kbdev); -+ if (err) -+ dev_warn( -+ kbdev->dev, -+ "Unsuccessful GPU reset detected when terminating csf context (%d_%d), attempting to terminate regardless", -+ kctx->tgid, kctx->id); -+ else -+ reset_prevented = true; -+ -+ mutex_lock(&kctx->csf.lock); ++static bool can_power_up_l2(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* Iterate through the queue groups that were not terminated by -+ * userspace and issue the term request to firmware for them. -+ */ -+ for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) { -+ struct kbase_queue_group *group = kctx->csf.queue_groups[i]; ++ /* Avoiding l2 transition if MMU is undergoing page migration */ ++ return !kbdev->mmu_page_migrate_in_progress; ++} + -+ if (group) { -+ remove_pending_group_fatal_error(group); -+ term_queue_group(group); -+ } -+ } -+ mutex_unlock(&kctx->csf.lock); -+ -+ if (reset_prevented) -+ kbase_reset_gpu_allow(kbdev); -+ -+ cancel_work_sync(&kctx->csf.pending_submission_work); -+ -+ /* Now that all queue groups have been terminated, there can be no -+ * more OoM or timer event interrupts but there can be inflight work -+ * items. Destroying the wq will implicitly flush those work items. -+ */ -+ destroy_workqueue(kctx->csf.wq); -+ -+ /* Wait for the firmware error work item to also finish as it could -+ * be affecting this outgoing context also. -+ */ -+ flush_work(&kctx->kbdev->csf.fw_error_work); -+ -+ /* A work item to handle page_fault/bus_fault/gpu_fault could be -+ * pending for the outgoing context. Flush the workqueue that will -+ * execute that work item. -+ */ -+ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); -+ if (kctx->as_nr != KBASEP_AS_NR_INVALID) -+ as = &kctx->kbdev->as[kctx->as_nr]; -+ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); -+ if (as) -+ flush_workqueue(as->pf_wq); -+ -+ mutex_lock(&kctx->csf.lock); ++static bool need_tiler_control(struct kbase_device *kbdev) ++{ ++#if MALI_USE_CSF ++ if (kbase_pm_no_mcu_core_pwroff(kbdev)) ++ return true; ++ else ++ return false; ++#else ++ return true; ++#endif ++} + -+ for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) { -+ kfree(kctx->csf.queue_groups[i]); -+ kctx->csf.queue_groups[i] = NULL; -+ } ++static int kbase_pm_l2_update_state(struct kbase_device *kbdev) ++{ ++ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; ++ u64 l2_present = kbdev->gpu_props.curr_config.l2_present; ++ u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present; ++ bool l2_power_up_done; ++ enum kbase_l2_core_state prev_state; + -+ /* Iterate through the queues that were not terminated by -+ * userspace and do the required cleanup for them. -+ */ -+ while (!list_empty(&kctx->csf.queue_list)) { -+ struct kbase_queue *queue; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ queue = list_first_entry(&kctx->csf.queue_list, -+ struct kbase_queue, link); ++ do { ++ /* Get current state */ ++ u64 l2_trans = kbase_pm_get_trans_cores(kbdev, ++ KBASE_PM_CORE_L2); ++ u64 l2_ready = kbase_pm_get_ready_cores(kbdev, ++ KBASE_PM_CORE_L2); + -+ /* The reference held when the IO mapping was created on bind -+ * would have been dropped otherwise the termination of Kbase -+ * context itself wouldn't have kicked-in. So there shall be -+ * only one reference left that was taken when queue was -+ * registered. ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ /* ++ * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores ++ * are vulnerable to corruption if gpu is lost + */ -+ WARN_ON(kbase_refcount_read(&queue->refcount) != 1); -+ list_del_init(&queue->link); -+ release_queue(queue); -+ } ++ if (kbase_is_gpu_removed(kbdev) || kbase_pm_is_gpu_lost(kbdev)) { ++ backend->shaders_state = ++ KBASE_SHADERS_OFF_CORESTACK_OFF; ++ backend->hwcnt_desired = false; ++ if (!backend->hwcnt_disabled) { ++ /* Don't progress until hw counters are disabled ++ * This may involve waiting for a worker to complete. ++ * The HW counters backend disable code checks for the ++ * GPU removed case and will error out without touching ++ * the hardware. This step is needed to keep the HW ++ * counters in a consistent state after a GPU lost. ++ */ ++ backend->l2_state = ++ KBASE_L2_ON_HWCNT_DISABLE; ++ KBASE_KTRACE_ADD(kbdev, PM_L2_ON_HWCNT_DISABLE, NULL, ++ backend->l2_state); ++ kbase_pm_trigger_hwcnt_disable(kbdev); ++ } + -+ mutex_unlock(&kctx->csf.lock); ++ if (backend->hwcnt_disabled) { ++ backend->l2_state = KBASE_L2_OFF; ++ KBASE_KTRACE_ADD(kbdev, PM_L2_OFF, NULL, backend->l2_state); ++ dev_dbg(kbdev->dev, "GPU lost has occurred - L2 off\n"); ++ } ++ break; ++ } ++#endif + -+ kbasep_ctx_user_reg_page_mapping_term(kctx); -+ kbase_csf_tiler_heap_context_term(kctx); -+ kbase_csf_kcpu_queue_context_term(kctx); -+ kbase_csf_scheduler_context_term(kctx); -+ kbase_csf_event_term(kctx); ++ /* mask off ready from trans in case transitions finished ++ * between the register reads ++ */ ++ l2_trans &= ~l2_ready; + -+ mutex_destroy(&kctx->csf.lock); -+} ++ prev_state = backend->l2_state; + -+/** -+ * handle_oom_event - Handle the OoM event generated by the firmware for the -+ * CSI. -+ * -+ * @group: Pointer to the CSG group the oom-event belongs to. -+ * @stream: Pointer to the structure containing info provided by the firmware -+ * about the CSI. -+ * -+ * This function will handle the OoM event request from the firmware for the -+ * CS. It will retrieve the address of heap context and heap's -+ * statistics (like number of render passes in-flight) from the CS's kernel -+ * output page and pass them to the tiler heap function to allocate a -+ * new chunk. -+ * It will also update the CS's kernel input page with the address -+ * of a new chunk that was allocated. -+ * -+ * Return: 0 if successfully handled the request, otherwise a negative error -+ * code on failure. -+ */ -+static int handle_oom_event(struct kbase_queue_group *const group, -+ struct kbase_csf_cmd_stream_info const *const stream) -+{ -+ struct kbase_context *const kctx = group->kctx; -+ u64 gpu_heap_va = -+ kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_LO) | -+ ((u64)kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_HI) << 32); -+ const u32 vt_start = -+ kbase_csf_firmware_cs_output(stream, CS_HEAP_VT_START); -+ const u32 vt_end = -+ kbase_csf_firmware_cs_output(stream, CS_HEAP_VT_END); -+ const u32 frag_end = -+ kbase_csf_firmware_cs_output(stream, CS_HEAP_FRAG_END); -+ u32 renderpasses_in_flight; -+ u32 pending_frag_count; -+ u64 new_chunk_ptr; -+ int err; -+ bool frag_end_err = false; ++ switch (backend->l2_state) { ++ case KBASE_L2_OFF: ++ if (kbase_pm_is_l2_desired(kbdev) && can_power_up_l2(kbdev)) { ++#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) ++ /* Enable HW timer of IPA control before ++ * L2 cache is powered-up. ++ */ ++ kbase_ipa_control_handle_gpu_sleep_exit(kbdev); ++#endif ++ /* ++ * Set the desired config for L2 before ++ * powering it on ++ */ ++ kbase_pm_l2_config_override(kbdev); ++ kbase_pbha_write_settings(kbdev); + -+ if ((frag_end > vt_end) || (vt_end >= vt_start)) { -+ frag_end_err = true; -+ dev_dbg(kctx->kbdev->dev, "Invalid Heap statistics provided by firmware: vt_start %d, vt_end %d, frag_end %d\n", -+ vt_start, vt_end, frag_end); -+ } -+ if (frag_end_err) { -+ renderpasses_in_flight = 1; -+ pending_frag_count = 1; -+ } else { -+ renderpasses_in_flight = vt_start - frag_end; -+ pending_frag_count = vt_end - frag_end; -+ } ++ /* If Host is controlling the power for shader ++ * cores, then it also needs to control the ++ * power for Tiler. ++ * Powering on the tiler will also power the ++ * L2 cache. ++ */ ++ if (need_tiler_control(kbdev)) { ++ kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_present, ++ ACTION_PWRON); ++ } else { ++ kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_present, ++ ACTION_PWRON); ++ } ++#if !MALI_USE_CSF ++ /* If we have more than one L2 cache then we ++ * must power them on explicitly. ++ */ ++ if (l2_present != 1) ++ kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, ++ l2_present & ~1, ++ ACTION_PWRON); ++ /* Clear backend slot submission kctx */ ++ kbase_pm_l2_clear_backend_slot_submit_kctx(kbdev); ++#endif ++ backend->l2_state = KBASE_L2_PEND_ON; ++ } ++ break; + -+ err = kbase_csf_tiler_heap_alloc_new_chunk(kctx, -+ gpu_heap_va, renderpasses_in_flight, pending_frag_count, &new_chunk_ptr); ++ case KBASE_L2_PEND_ON: ++ l2_power_up_done = false; ++ if (!l2_trans && l2_ready == l2_present) { ++ if (need_tiler_control(kbdev)) { ++ u64 tiler_trans = kbase_pm_get_trans_cores( ++ kbdev, KBASE_PM_CORE_TILER); ++ u64 tiler_ready = kbase_pm_get_ready_cores( ++ kbdev, KBASE_PM_CORE_TILER); ++ tiler_trans &= ~tiler_ready; + -+ if ((group->csi_handlers & BASE_CSF_TILER_OOM_EXCEPTION_FLAG) && -+ (pending_frag_count == 0) && (err == -ENOMEM || err == -EBUSY)) { -+ /* The group allows incremental rendering, trigger it */ -+ new_chunk_ptr = 0; -+ dev_dbg(kctx->kbdev->dev, "Group-%d (slot-%d) enter incremental render\n", -+ group->handle, group->csg_nr); -+ } else if (err == -EBUSY) { -+ /* Acknowledge with a NULL chunk (firmware will then wait for -+ * the fragment jobs to complete and release chunks) -+ */ -+ new_chunk_ptr = 0; -+ } else if (err) -+ return err; ++ if (!tiler_trans && tiler_ready == tiler_present) { ++ KBASE_KTRACE_ADD(kbdev, ++ PM_CORES_CHANGE_AVAILABLE_TILER, ++ NULL, tiler_ready); ++ l2_power_up_done = true; ++ } ++ } else { ++ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, ++ l2_ready); ++ l2_power_up_done = true; ++ } ++ } ++ if (l2_power_up_done) { ++ /* ++ * Ensure snoops are enabled after L2 is powered ++ * up. Note that kbase keeps track of the snoop ++ * state, so safe to repeatedly call. ++ */ ++ kbase_pm_cache_snoop_enable(kbdev); + -+ kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_LO, -+ new_chunk_ptr & 0xFFFFFFFF); -+ kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_HI, -+ new_chunk_ptr >> 32); ++ /* With the L2 enabled, we can now enable ++ * hardware counters. ++ */ ++ if (kbdev->pm.backend.gpu_clock_slow_down_wa) ++ backend->l2_state = ++ KBASE_L2_RESTORE_CLOCKS; ++ else ++ backend->l2_state = ++ KBASE_L2_ON_HWCNT_ENABLE; + -+ kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_END_LO, -+ new_chunk_ptr & 0xFFFFFFFF); -+ kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_END_HI, -+ new_chunk_ptr >> 32); ++ /* Now that the L2 is on, the shaders can start ++ * powering on if they're required. The obvious ++ * way to do this would be to call ++ * kbase_pm_shaders_update_state() here. ++ * However, that would make the two state ++ * machines mutually recursive, as the opposite ++ * would be needed for powering down. Instead, ++ * callers of this function should use the ++ * kbase_pm_update_state() wrapper, which will ++ * call the shader state machine immediately ++ * after the L2 (for power up), or ++ * automatically re-invoke the L2 state machine ++ * when the shaders power down. ++ */ ++ } ++ break; + -+ return 0; -+} ++ case KBASE_L2_RESTORE_CLOCKS: ++ /* We always assume only GPUs being affected by ++ * BASE_HW_ISSUE_GPU2017_1336 fall into this state ++ */ ++ WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_slow_down_wa); + -+/** -+ * report_tiler_oom_error - Report a CSG error due to a tiler heap OOM event -+ * -+ * @group: Pointer to the GPU command queue group that encountered the error -+ */ -+static void report_tiler_oom_error(struct kbase_queue_group *group) -+{ -+ struct base_csf_notification const -+ error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, -+ .payload = { -+ .csg_error = { -+ .handle = group->handle, -+ .error = { -+ .error_type = -+ BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM, -+ } } } }; ++ /* If L2 not needed, we need to make sure cancellation ++ * of any previously issued work to restore GPU clock. ++ * For it, move to KBASE_L2_SLOW_DOWN_CLOCKS state. ++ */ ++ if (!kbase_pm_is_l2_desired(kbdev)) { ++ backend->l2_state = KBASE_L2_SLOW_DOWN_CLOCKS; ++ break; ++ } + -+ kbase_csf_event_add_error(group->kctx, -+ &group->error_tiler_oom, -+ &error); -+ kbase_event_wakeup(group->kctx); -+} ++ backend->gpu_clock_slow_down_desired = false; ++ if (backend->gpu_clock_slowed_down) ++ kbase_pm_control_gpu_clock(kbdev); ++ else ++ backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE; ++ break; + -+static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev) -+{ -+ int err; -+ const unsigned int cache_flush_wait_timeout_ms = 2000; ++ case KBASE_L2_ON_HWCNT_ENABLE: ++#if !MALI_USE_CSF ++ backend->hwcnt_desired = true; ++ if (backend->hwcnt_disabled) { ++ kbase_hwcnt_context_enable( ++ kbdev->hwcnt_gpu_ctx); ++ backend->hwcnt_disabled = false; ++ } ++#endif ++ backend->l2_state = KBASE_L2_ON; ++ break; + -+ kbase_pm_lock(kbdev); -+ /* With the advent of partial cache flush, dirty cache lines could -+ * be left in the GPU L2 caches by terminating the queue group here -+ * without waiting for proper cache maintenance. A full cache flush -+ * here will prevent these dirty cache lines from being arbitrarily -+ * evicted later and possible causing memory corruption. -+ */ -+ if (kbdev->pm.backend.gpu_powered) { -+ kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC); -+ err = kbase_gpu_wait_cache_clean_timeout(kbdev, cache_flush_wait_timeout_ms); ++ case KBASE_L2_ON: ++ if (!kbase_pm_is_l2_desired(kbdev)) { ++#if !MALI_USE_CSF ++ /* Do not power off L2 until the shaders and ++ * core stacks are off. ++ */ ++ if (backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) ++ break; ++#else ++ /* Do not power off L2 until the MCU has been stopped */ ++ if ((backend->mcu_state != KBASE_MCU_OFF) && ++ (backend->mcu_state != KBASE_MCU_IN_SLEEP)) ++ break; ++#endif + -+ if (err) { -+ dev_warn( -+ kbdev->dev, -+ "[%llu] Timeout waiting for cache clean to complete after fatal error", -+ kbase_backend_get_cycle_cnt(kbdev)); ++ /* We need to make sure hardware counters are ++ * disabled before powering down the L2, to ++ * prevent loss of data. ++ * ++ * We waited until after the cores were powered ++ * down to prevent ping-ponging between hwcnt ++ * enabled and disabled, which would have ++ * happened if userspace submitted more work ++ * while we were trying to power down. ++ */ ++ backend->l2_state = KBASE_L2_ON_HWCNT_DISABLE; ++ } ++ break; + -+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) -+ kbase_reset_gpu(kbdev); -+ } -+ } ++ case KBASE_L2_ON_HWCNT_DISABLE: ++#if !MALI_USE_CSF ++ /* If the L2 became desired while we were waiting on the ++ * worker to do the actual hwcnt disable (which might ++ * happen if some work was submitted immediately after ++ * the shaders powered off), then we need to early-out ++ * of this state and re-enable hwcnt. ++ * ++ * If we get lucky, the hwcnt disable might not have ++ * actually started yet, and the logic in the hwcnt ++ * enable state will prevent the worker from ++ * performing the disable entirely, preventing loss of ++ * any hardware counter data. ++ * ++ * If the hwcnt disable has started, then we'll lose ++ * a tiny amount of hardware counter data between the ++ * disable and the re-enable occurring. ++ * ++ * This loss of data is preferable to the alternative, ++ * which is to block the shader cores from doing any ++ * work until we're sure hwcnt has been re-enabled. ++ */ ++ if (kbase_pm_is_l2_desired(kbdev)) { ++ backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE; ++ break; ++ } + -+ kbase_pm_unlock(kbdev); -+} ++ backend->hwcnt_desired = false; ++ if (!backend->hwcnt_disabled) ++ kbase_pm_trigger_hwcnt_disable(kbdev); ++#endif + -+/** -+ * kbase_queue_oom_event - Handle tiler out-of-memory for a GPU command queue. -+ * -+ * @queue: Pointer to queue for which out-of-memory event was received. -+ * -+ * Called with the CSF locked for the affected GPU virtual address space. -+ * Do not call in interrupt context. -+ * -+ * Handles tiler out-of-memory for a GPU command queue and then clears the -+ * notification to allow the firmware to report out-of-memory again in future. -+ * If the out-of-memory condition was successfully handled then this function -+ * rings the relevant doorbell to notify the firmware; otherwise, it terminates -+ * the GPU command queue group to which the queue is bound and notify a waiting -+ * user space client of the failure. -+ */ -+static void kbase_queue_oom_event(struct kbase_queue *const queue) -+{ -+ struct kbase_context *const kctx = queue->kctx; -+ struct kbase_device *const kbdev = kctx->kbdev; -+ struct kbase_queue_group *group; -+ int slot_num, err; -+ struct kbase_csf_cmd_stream_group_info const *ginfo; -+ struct kbase_csf_cmd_stream_info const *stream; -+ int csi_index = queue->csi_index; -+ u32 cs_oom_ack, cs_oom_req; -+ unsigned long flags; ++ if (backend->hwcnt_disabled) { ++ if (kbdev->pm.backend.gpu_clock_slow_down_wa) ++ backend->l2_state = ++ KBASE_L2_SLOW_DOWN_CLOCKS; ++ else ++ backend->l2_state = KBASE_L2_POWER_DOWN; ++ } ++ break; + -+ lockdep_assert_held(&kctx->csf.lock); ++ case KBASE_L2_SLOW_DOWN_CLOCKS: ++ /* We always assume only GPUs being affected by ++ * BASE_HW_ISSUE_GPU2017_1336 fall into this state ++ */ ++ WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_slow_down_wa); + -+ group = get_bound_queue_group(queue); -+ if (!group) { -+ dev_warn(kctx->kbdev->dev, "queue not bound\n"); -+ return; -+ } ++ /* L2 needs to be powered up. And we need to make sure ++ * cancellation of any previously issued work to slow ++ * down GPU clock. For it, we move to the state, ++ * KBASE_L2_RESTORE_CLOCKS. ++ */ ++ if (kbase_pm_is_l2_desired(kbdev)) { ++ backend->l2_state = KBASE_L2_RESTORE_CLOCKS; ++ break; ++ } + -+ kbase_csf_scheduler_lock(kbdev); ++ backend->gpu_clock_slow_down_desired = true; ++ if (!backend->gpu_clock_slowed_down) ++ kbase_pm_control_gpu_clock(kbdev); ++ else ++ backend->l2_state = KBASE_L2_POWER_DOWN; + -+ slot_num = kbase_csf_scheduler_group_get_slot(group); ++ break; + -+ /* The group could have gone off slot before this work item got -+ * a chance to execute. -+ */ -+ if (slot_num < 0) -+ goto unlock; ++ case KBASE_L2_POWER_DOWN: ++ if (kbase_pm_is_l2_desired(kbdev)) ++ backend->l2_state = KBASE_L2_PEND_ON; ++ else if (can_power_down_l2(kbdev)) { ++ if (!backend->l2_always_on) ++ /* Powering off the L2 will also power off the ++ * tiler. ++ */ ++ kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, ++ l2_present, ++ ACTION_PWROFF); ++ else ++ /* If L2 cache is powered then we must flush it ++ * before we power off the GPU. Normally this ++ * would have been handled when the L2 was ++ * powered off. ++ */ ++ kbase_gpu_start_cache_clean_nolock( ++ kbdev, GPU_COMMAND_CACHE_CLN_INV_L2); ++#if !MALI_USE_CSF ++ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u); ++#else ++ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, 0u); ++#endif ++ backend->l2_state = KBASE_L2_PEND_OFF; ++ } ++ break; + -+ /* If the bound group is on slot yet the kctx is marked with disabled -+ * on address-space fault, the group is pending to be killed. So skip -+ * the inflight oom operation. -+ */ -+ if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) -+ goto unlock; ++ case KBASE_L2_PEND_OFF: ++ if (!backend->l2_always_on) { ++ /* We only need to check the L2 here - if the L2 ++ * is off then the tiler is definitely also off. ++ */ ++ if (!l2_trans && !l2_ready) { ++#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) ++ /* Allow clock gating within the GPU and prevent it ++ * from being seen as active during sleep. ++ */ ++ kbase_ipa_control_handle_gpu_sleep_enter(kbdev); ++#endif ++ /* L2 is now powered off */ ++ backend->l2_state = KBASE_L2_OFF; ++ } ++ } else { ++ if (!kbdev->cache_clean_in_progress) { ++#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) ++ /* Allow clock gating within the GPU and prevent it ++ * from being seen as active during sleep. ++ */ ++ kbase_ipa_control_handle_gpu_sleep_enter(kbdev); ++#endif ++ backend->l2_state = KBASE_L2_OFF; ++ } ++ } ++ break; + -+ ginfo = &kbdev->csf.global_iface.groups[slot_num]; -+ stream = &ginfo->streams[csi_index]; -+ cs_oom_ack = kbase_csf_firmware_cs_output(stream, CS_ACK) & -+ CS_ACK_TILER_OOM_MASK; -+ cs_oom_req = kbase_csf_firmware_cs_input_read(stream, CS_REQ) & -+ CS_REQ_TILER_OOM_MASK; ++ case KBASE_L2_RESET_WAIT: ++ /* Reset complete */ ++ if (!backend->in_reset) ++ backend->l2_state = KBASE_L2_OFF; ++ break; + -+ /* The group could have already undergone suspend-resume cycle before -+ * this work item got a chance to execute. On CSG resume the CS_ACK -+ * register is set by firmware to reflect the CS_REQ register, which -+ * implies that all events signaled before suspension are implicitly -+ * acknowledged. -+ * A new OoM event is expected to be generated after resume. -+ */ -+ if (cs_oom_ack == cs_oom_req) -+ goto unlock; ++ default: ++ WARN(1, "Invalid state in l2_state: %d", ++ backend->l2_state); ++ } + -+ err = handle_oom_event(group, stream); ++ if (backend->l2_state != prev_state) { ++ dev_dbg(kbdev->dev, "L2 state transition: %s to %s\n", ++ kbase_l2_core_state_to_string(prev_state), ++ kbase_l2_core_state_to_string( ++ backend->l2_state)); ++ kbase_ktrace_log_l2_core_state(kbdev, backend->l2_state); ++ } + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_oom_ack, -+ CS_REQ_TILER_OOM_MASK); -+ kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ } while (backend->l2_state != prev_state); + -+ if (unlikely(err)) { -+ dev_warn( -+ kbdev->dev, -+ "Queue group to be terminated, couldn't handle the OoM event\n"); -+ kbase_debug_csf_fault_notify(kbdev, kctx, DF_TILER_OOM); -+ kbase_csf_scheduler_unlock(kbdev); -+ term_queue_group(group); -+ flush_gpu_cache_on_fatal_error(kbdev); -+ report_tiler_oom_error(group); -+ return; ++ if (kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off && ++ backend->l2_state == KBASE_L2_OFF) { ++ kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = false; ++ queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq, ++ &kbdev->pm.backend.gpu_poweroff_wait_work); + } -+unlock: -+ kbase_csf_scheduler_unlock(kbdev); ++ ++ return 0; +} + -+/** -+ * oom_event_worker - Tiler out-of-memory handler called from a workqueue. -+ * -+ * @data: Pointer to a work_struct embedded in GPU command queue data. -+ * -+ * Handles a tiler out-of-memory condition for a GPU command queue and then -+ * releases a reference that was added to prevent the queue being destroyed -+ * while this work item was pending on a workqueue. -+ */ -+static void oom_event_worker(struct work_struct *data) ++static void shader_poweroff_timer_stop_callback(struct work_struct *data) +{ -+ struct kbase_queue *queue = -+ container_of(data, struct kbase_queue, oom_event_work); -+ struct kbase_context *kctx = queue->kctx; -+ struct kbase_device *const kbdev = kctx->kbdev; -+ -+ int err = kbase_reset_gpu_try_prevent(kbdev); ++ unsigned long flags; ++ struct kbasep_pm_tick_timer_state *stt = container_of(data, ++ struct kbasep_pm_tick_timer_state, work); ++ struct kbase_device *kbdev = container_of(stt, struct kbase_device, ++ pm.backend.shader_tick_timer); + -+ /* Regardless of whether reset failed or is currently happening, exit -+ * early -+ */ -+ if (err) -+ return; ++ hrtimer_cancel(&stt->timer); + -+ mutex_lock(&kctx->csf.lock); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ kbase_queue_oom_event(queue); -+ release_queue(queue); ++ stt->cancel_queued = false; ++ if (kbdev->pm.backend.gpu_powered) ++ kbase_pm_update_state(kbdev); + -+ mutex_unlock(&kctx->csf.lock); -+ kbase_reset_gpu_allow(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +/** -+ * report_group_timeout_error - Report the timeout error for the group to userspace. ++ * shader_poweroff_timer_queue_cancel - cancel the shader poweroff tick timer ++ * @kbdev: pointer to kbase device + * -+ * @group: Pointer to the group for which timeout error occurred -+ */ -+static void report_group_timeout_error(struct kbase_queue_group *const group) -+{ -+ struct base_csf_notification const -+ error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, -+ .payload = { -+ .csg_error = { -+ .handle = group->handle, -+ .error = { -+ .error_type = -+ BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT, -+ } } } }; -+ -+ dev_warn(group->kctx->kbdev->dev, -+ "Notify the event notification thread, forward progress timeout (%llu cycles)\n", -+ kbase_csf_timeout_get(group->kctx->kbdev)); -+ -+ kbase_csf_event_add_error(group->kctx, &group->error_timeout, &error); -+ kbase_event_wakeup(group->kctx); -+} -+ -+/** -+ * timer_event_worker - Handle the progress timeout error for the group ++ * Synchronization between the shader state machine and the timer thread is ++ * difficult. This is because situations may arise where the state machine ++ * wants to start the timer, but the callback is already running, and has ++ * already passed the point at which it checks whether it is required, and so ++ * cancels itself, even though the state machine may have just tried to call ++ * hrtimer_start. + * -+ * @data: Pointer to a work_struct embedded in GPU command queue group data. ++ * This cannot be stopped by holding hwaccess_lock in the timer thread, ++ * because there are still infinitesimally small sections at the start and end ++ * of the callback where the lock is not held. + * -+ * Terminate the CSG and report the error to userspace ++ * Instead, a new state is added to the shader state machine, ++ * KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF. This is used to guarantee ++ * that when the shaders are switched off, the timer has definitely been ++ * cancelled. As a result, when KBASE_SHADERS_ON_CORESTACK_ON is left and the ++ * timer is started, it is guaranteed that either the timer is already running ++ * (from an availability change or cancelled timer), or hrtimer_start will ++ * succeed. It is critical to avoid ending up in ++ * KBASE_SHADERS_WAIT_OFF_CORESTACK_ON without the timer running, or it could ++ * hang there forever. + */ -+static void timer_event_worker(struct work_struct *data) ++static void shader_poweroff_timer_queue_cancel(struct kbase_device *kbdev) +{ -+ struct kbase_queue_group *const group = -+ container_of(data, struct kbase_queue_group, timer_event_work); -+ struct kbase_context *const kctx = group->kctx; -+ struct kbase_device *const kbdev = kctx->kbdev; -+ bool reset_prevented = false; -+ int err = kbase_reset_gpu_prevent_and_wait(kbdev); -+ -+ if (err) -+ dev_warn( -+ kbdev->dev, -+ "Unsuccessful GPU reset detected when terminating group %d on progress timeout, attempting to terminate regardless", -+ group->handle); -+ else -+ reset_prevented = true; ++ struct kbasep_pm_tick_timer_state *stt = ++ &kbdev->pm.backend.shader_tick_timer; + -+ mutex_lock(&kctx->csf.lock); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ term_queue_group(group); -+ flush_gpu_cache_on_fatal_error(kbdev); -+ report_group_timeout_error(group); ++ stt->needed = false; + -+ mutex_unlock(&kctx->csf.lock); -+ if (reset_prevented) -+ kbase_reset_gpu_allow(kbdev); ++ if (hrtimer_active(&stt->timer) && !stt->cancel_queued) { ++ stt->cancel_queued = true; ++ queue_work(stt->wq, &stt->work); ++ } +} + -+/** -+ * handle_progress_timer_event - Progress timer timeout event handler. -+ * -+ * @group: Pointer to GPU queue group for which the timeout event is received. -+ * -+ * Notify a waiting user space client of the timeout. -+ * Enqueue a work item to terminate the group and notify the event notification -+ * thread of progress timeout fault for the GPU command queue group. -+ */ -+static void handle_progress_timer_event(struct kbase_queue_group *const group) ++#if !MALI_USE_CSF ++static const char *kbase_shader_core_state_to_string( ++ enum kbase_shader_core_state state) +{ -+ kbase_debug_csf_fault_notify(group->kctx->kbdev, group->kctx, -+ DF_PROGRESS_TIMER_TIMEOUT); -+ -+ queue_work(group->kctx->csf.wq, &group->timer_event_work); ++ const char *const strings[] = { ++#define KBASEP_SHADER_STATE(n) #n, ++#include "mali_kbase_pm_shader_states.h" ++#undef KBASEP_SHADER_STATE ++ }; ++ if (WARN_ON((size_t)state >= ARRAY_SIZE(strings))) ++ return "Bad shader core state"; ++ else ++ return strings[state]; +} + -+/** -+ * alloc_grp_protected_suspend_buffer_pages() - Allocate physical pages from the protected -+ * memory for the protected mode suspend buffer. -+ * @group: Pointer to the GPU queue group. -+ * -+ * Return: 0 if suspend buffer allocation is successful or if its already allocated, otherwise -+ * negative error value. -+ */ -+static int alloc_grp_protected_suspend_buffer_pages(struct kbase_queue_group *const group) ++static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) +{ -+ struct kbase_device *const kbdev = group->kctx->kbdev; -+ struct kbase_context *kctx = group->kctx; -+ struct tagged_addr *phys = NULL; -+ struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf; -+ size_t nr_pages; -+ int err = 0; -+ -+ if (likely(sbuf->pma)) -+ return 0; -+ -+ nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); -+ phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL); -+ if (unlikely(!phys)) { -+ err = -ENOMEM; -+ goto phys_free; -+ } ++ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; ++ struct kbasep_pm_tick_timer_state *stt = ++ &kbdev->pm.backend.shader_tick_timer; ++ enum kbase_shader_core_state prev_state; ++ u64 stacks_avail = 0; + -+ mutex_lock(&kctx->csf.lock); -+ kbase_csf_scheduler_lock(kbdev); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (unlikely(!group->csg_reg)) { -+ /* The only chance of the bound csg_reg is removed from the group is -+ * that it has been put off slot by the scheduler and the csg_reg resource -+ * is contended by other groups. In this case, it needs another occasion for -+ * mapping the pma, which needs a bound csg_reg. Since the group is already -+ * off-slot, returning no error is harmless as the scheduler, when place the -+ * group back on-slot again would do the required MMU map operation on the -+ * allocated and retained pma. ++ if (corestack_driver_control) ++ /* Always power on all the corestacks. Disabling certain ++ * corestacks when their respective shaders are not in the ++ * available bitmap is not currently supported. + */ -+ WARN_ON(group->csg_nr >= 0); -+ dev_dbg(kbdev->dev, "No bound csg_reg for group_%d_%d_%d to enter protected mode", -+ group->kctx->tgid, group->kctx->id, group->handle); -+ goto unlock; -+ } -+ -+ /* Allocate the protected mode pages */ -+ sbuf->pma = kbase_csf_protected_memory_alloc(kbdev, phys, nr_pages, true); -+ if (unlikely(!sbuf->pma)) { -+ err = -ENOMEM; -+ goto unlock; -+ } ++ stacks_avail = kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_STACK); + -+ /* Map the bound susp_reg to the just allocated pma pages */ -+ err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group); ++ do { ++ u64 shaders_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_SHADER); ++ u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); ++ u64 stacks_trans = 0; ++ u64 stacks_ready = 0; + -+unlock: -+ kbase_csf_scheduler_unlock(kbdev); -+ mutex_unlock(&kctx->csf.lock); -+phys_free: -+ kfree(phys); -+ return err; -+} ++ if (corestack_driver_control) { ++ stacks_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_STACK); ++ stacks_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK); ++ } + -+static void report_group_fatal_error(struct kbase_queue_group *const group) -+{ -+ struct base_gpu_queue_group_error const -+ err_payload = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, -+ .payload = { .fatal_group = { -+ .status = GPU_EXCEPTION_TYPE_SW_FAULT_0, -+ } } }; ++ /* ++ * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores ++ * are vulnerable to corruption if gpu is lost ++ */ ++ if (kbase_is_gpu_removed(kbdev) ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ || kbase_pm_is_gpu_lost(kbdev)) { ++#else ++ ) { ++#endif ++ backend->shaders_state = ++ KBASE_SHADERS_OFF_CORESTACK_OFF; ++ dev_dbg(kbdev->dev, "GPU lost has occurred - shaders off\n"); ++ break; ++ } + -+ kbase_csf_add_group_fatal_error(group, &err_payload); -+ kbase_event_wakeup(group->kctx); -+} ++ /* mask off ready from trans in case transitions finished ++ * between the register reads ++ */ ++ shaders_trans &= ~shaders_ready; ++ stacks_trans &= ~stacks_ready; + -+/** -+ * protm_event_worker - Protected mode switch request event handler -+ * called from a workqueue. -+ * -+ * @data: Pointer to a work_struct embedded in GPU command queue group data. -+ * -+ * Request to switch to protected mode. -+ */ -+static void protm_event_worker(struct work_struct *data) -+{ -+ struct kbase_queue_group *const group = -+ container_of(data, struct kbase_queue_group, protm_event_work); -+ struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf; -+ int err = 0; ++ prev_state = backend->shaders_state; + -+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START, -+ group, 0u); ++ switch (backend->shaders_state) { ++ case KBASE_SHADERS_OFF_CORESTACK_OFF: ++ /* Ignore changes to the shader core availability ++ * except at certain points where we can handle it, ++ * i.e. off and SHADERS_ON_CORESTACK_ON. ++ */ ++ backend->shaders_desired_mask = ++ kbase_pm_ca_get_core_mask(kbdev); ++ backend->pm_shaders_core_mask = 0; + -+ err = alloc_grp_protected_suspend_buffer_pages(group); -+ if (!err) { -+ kbase_csf_scheduler_group_protm_enter(group); -+ } else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) { -+ sbuf->alloc_retries++; -+ /* try again to allocate pages */ -+ queue_work(group->kctx->csf.wq, &group->protm_event_work); -+ } else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) { -+ dev_err(group->kctx->kbdev->dev, -+ "Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d", -+ group->handle, group->kctx->tgid, group->kctx->id); -+ report_group_fatal_error(group); -+ } ++ if (backend->shaders_desired && ++ backend->l2_state == KBASE_L2_ON) { ++ if (backend->hwcnt_desired && ++ !backend->hwcnt_disabled) { ++ /* Trigger a hwcounter dump */ ++ backend->hwcnt_desired = false; ++ kbase_pm_trigger_hwcnt_disable(kbdev); ++ } + -+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, -+ group, 0u); -+} ++ if (backend->hwcnt_disabled) { ++ if (corestack_driver_control) { ++ kbase_pm_invoke(kbdev, ++ KBASE_PM_CORE_STACK, ++ stacks_avail, ++ ACTION_PWRON); ++ } ++ backend->shaders_state = ++ KBASE_SHADERS_OFF_CORESTACK_PEND_ON; ++ } ++ } ++ break; + -+/** -+ * handle_fault_event - Handler for CS fault. -+ * -+ * @queue: Pointer to queue for which fault event was received. -+ * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for -+ * the queue. -+ * -+ * Print required information about the CS fault and notify the user space client -+ * about the fault. -+ */ -+static void -+handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack) -+{ -+ struct kbase_device *const kbdev = queue->kctx->kbdev; -+ struct kbase_csf_cmd_stream_group_info const *ginfo = -+ &kbdev->csf.global_iface.groups[queue->group->csg_nr]; -+ struct kbase_csf_cmd_stream_info const *stream = -+ &ginfo->streams[queue->csi_index]; -+ const u32 cs_fault = kbase_csf_firmware_cs_output(stream, CS_FAULT); -+ const u64 cs_fault_info = -+ kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_LO) | -+ ((u64)kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_HI) -+ << 32); -+ const u8 cs_fault_exception_type = -+ CS_FAULT_EXCEPTION_TYPE_GET(cs_fault); -+ const u32 cs_fault_exception_data = -+ CS_FAULT_EXCEPTION_DATA_GET(cs_fault); -+ const u64 cs_fault_info_exception_data = -+ CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info); ++ case KBASE_SHADERS_OFF_CORESTACK_PEND_ON: ++ if (!stacks_trans && stacks_ready == stacks_avail) { ++ backend->shaders_avail = ++ backend->shaders_desired_mask; ++ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, ++ backend->shaders_avail, ACTION_PWRON); + -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ if (backend->pm_current_policy && ++ backend->pm_current_policy->handle_event) ++ backend->pm_current_policy->handle_event( ++ kbdev, ++ KBASE_PM_POLICY_EVENT_POWER_ON); + -+ dev_warn(kbdev->dev, -+ "Ctx %d_%d Group %d CSG %d CSI: %d\n" -+ "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n" -+ "CS_FAULT.EXCEPTION_DATA: 0x%x\n" -+ "CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n", -+ queue->kctx->tgid, queue->kctx->id, queue->group->handle, -+ queue->group->csg_nr, queue->csi_index, -+ cs_fault_exception_type, -+ kbase_gpu_exception_name(cs_fault_exception_type), -+ cs_fault_exception_data, cs_fault_info_exception_data); ++ backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; ++ } ++ break; + ++ case KBASE_SHADERS_PEND_ON_CORESTACK_ON: ++ if (!shaders_trans && shaders_ready == backend->shaders_avail) { ++ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, shaders_ready); ++ backend->pm_shaders_core_mask = shaders_ready; ++ backend->hwcnt_desired = true; ++ if (backend->hwcnt_disabled) { ++#if MALI_USE_CSF ++ unsigned long flags; + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ /* CS_RESOURCE_TERMINATED type fault event can be ignored from the -+ * standpoint of dump on error. It is used to report fault for the CSIs -+ * that are associated with the same CSG as the CSI for which the actual -+ * fault was reported by the Iterator. -+ * Dumping would be triggered when the actual fault is reported. -+ * -+ * CS_INHERIT_FAULT can also be ignored. It could happen due to the error -+ * in other types of queues (cpu/kcpu). If a fault had occurred in some -+ * other GPU queue then the dump would have been performed anyways when -+ * that fault was reported. -+ */ -+ if ((cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT) && -+ (cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED)) { -+ if (unlikely(kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) { -+ get_queue(queue); -+ queue->cs_error = cs_fault; -+ queue->cs_error_info = cs_fault_info; -+ queue->cs_error_fatal = false; -+ if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work)) -+ release_queue(queue); -+ return; -+ } -+ } ++ kbase_csf_scheduler_spin_lock(kbdev, ++ &flags); +#endif -+ -+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, -+ CS_REQ_FAULT_MASK); -+ kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, true); -+} -+ -+static void report_queue_fatal_error(struct kbase_queue *const queue, -+ u32 cs_fatal, u64 cs_fatal_info, -+ u8 group_handle) -+{ -+ struct base_csf_notification error = { -+ .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, -+ .payload = { -+ .csg_error = { -+ .handle = group_handle, -+ .error = { -+ .error_type = -+ BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, -+ .payload = { -+ .fatal_queue = { -+ .sideband = cs_fatal_info, -+ .status = cs_fatal, -+ .csi_index = queue->csi_index, -+ } -+ } ++ kbase_hwcnt_context_enable( ++ kbdev->hwcnt_gpu_ctx); ++#if MALI_USE_CSF ++ kbase_csf_scheduler_spin_unlock(kbdev, ++ flags); ++#endif ++ backend->hwcnt_disabled = false; + } -+ } -+ } -+ }; -+ -+ kbase_csf_event_add_error(queue->kctx, &queue->error, &error); -+ kbase_event_wakeup(queue->kctx); -+} + -+/** -+ * fatal_event_worker - Handle the CS_FATAL/CS_FAULT error for the GPU queue -+ * -+ * @data: Pointer to a work_struct embedded in GPU command queue. -+ * -+ * Terminate the CSG and report the error to userspace. -+ */ -+static void cs_error_worker(struct work_struct *const data) -+{ -+ struct kbase_queue *const queue = -+ container_of(data, struct kbase_queue, cs_error_work); -+ struct kbase_context *const kctx = queue->kctx; -+ struct kbase_device *const kbdev = kctx->kbdev; -+ struct kbase_queue_group *group; -+ u8 group_handle; -+ bool reset_prevented = false; -+ int err; ++ backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON; ++ } ++ break; + -+ kbase_debug_csf_fault_wait_completion(kbdev); -+ err = kbase_reset_gpu_prevent_and_wait(kbdev); ++ case KBASE_SHADERS_ON_CORESTACK_ON: ++ backend->shaders_desired_mask = ++ kbase_pm_ca_get_core_mask(kbdev); + -+ if (err) -+ dev_warn( -+ kbdev->dev, -+ "Unsuccessful GPU reset detected when terminating group to handle fatal event, attempting to terminate regardless"); -+ else -+ reset_prevented = true; ++ /* If shaders to change state, trigger a counter dump */ ++ if (!backend->shaders_desired || ++ (backend->shaders_desired_mask != shaders_ready)) { ++ backend->hwcnt_desired = false; ++ if (!backend->hwcnt_disabled) ++ kbase_pm_trigger_hwcnt_disable(kbdev); ++ backend->shaders_state = ++ KBASE_SHADERS_ON_CORESTACK_ON_RECHECK; ++ } ++ break; + -+ mutex_lock(&kctx->csf.lock); ++ case KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: ++ backend->shaders_desired_mask = ++ kbase_pm_ca_get_core_mask(kbdev); + -+ group = get_bound_queue_group(queue); -+ if (!group) { -+ dev_warn(kbdev->dev, "queue not bound when handling fatal event"); -+ goto unlock; -+ } ++ if (!backend->hwcnt_disabled) { ++ /* Wait for being disabled */ ++ ; ++ } else if (!backend->shaders_desired) { ++ if (backend->pm_current_policy && ++ backend->pm_current_policy->handle_event) ++ backend->pm_current_policy->handle_event( ++ kbdev, ++ KBASE_PM_POLICY_EVENT_IDLE); + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ if (!queue->cs_error_fatal) { -+ unsigned long flags; -+ int slot_num; ++ if (kbdev->pm.backend.protected_transition_override || ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ kbase_pm_is_suspending(kbdev) || ++ kbase_pm_is_gpu_lost(kbdev) || ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ !stt->configured_ticks || ++ WARN_ON(stt->cancel_queued)) { ++ backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; ++ } else { ++ stt->remaining_ticks = stt->configured_ticks; ++ stt->needed = true; + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ slot_num = kbase_csf_scheduler_group_get_slot_locked(group); -+ if (slot_num >= 0) { -+ struct kbase_csf_cmd_stream_group_info const *ginfo = -+ &kbdev->csf.global_iface.groups[slot_num]; -+ struct kbase_csf_cmd_stream_info const *stream = -+ &ginfo->streams[queue->csi_index]; -+ u32 const cs_ack = -+ kbase_csf_firmware_cs_output(stream, CS_ACK); ++ /* The shader hysteresis timer is not ++ * done the obvious way, which would be ++ * to start an hrtimer when the shader ++ * power off is requested. Instead, ++ * use a 'tick' timer, and set the ++ * remaining number of ticks on a power ++ * off request. This avoids the ++ * latency of starting, then ++ * immediately cancelling an hrtimer ++ * when the shaders are re-requested ++ * before the timeout expires. ++ */ ++ if (!hrtimer_active(&stt->timer)) ++ hrtimer_start(&stt->timer, ++ stt->configured_interval, ++ HRTIMER_MODE_REL); + -+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, -+ CS_REQ_FAULT_MASK); -+ kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, -+ slot_num, true); -+ } -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+ goto unlock; -+ } -+#endif ++ backend->shaders_state = KBASE_SHADERS_WAIT_OFF_CORESTACK_ON; ++ } ++ } else if (backend->shaders_desired_mask & ~shaders_ready) { ++ /* set cores ready but not available to ++ * meet KBASE_SHADERS_PEND_ON_CORESTACK_ON ++ * check pass ++ */ ++ backend->shaders_avail = ++ (backend->shaders_desired_mask | shaders_ready); + -+ group_handle = group->handle; -+ term_queue_group(group); -+ flush_gpu_cache_on_fatal_error(kbdev); -+ report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info, -+ group_handle); ++ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, ++ backend->shaders_avail & ~shaders_ready, ++ ACTION_PWRON); ++ backend->shaders_state = ++ KBASE_SHADERS_PEND_ON_CORESTACK_ON; ++ } else if (shaders_ready & ~backend->shaders_desired_mask) { ++ backend->shaders_state = ++ KBASE_SHADERS_WAIT_GPU_IDLE; ++ } else { ++ backend->shaders_state = ++ KBASE_SHADERS_PEND_ON_CORESTACK_ON; ++ } ++ break; + -+unlock: -+ release_queue(queue); -+ mutex_unlock(&kctx->csf.lock); -+ if (reset_prevented) -+ kbase_reset_gpu_allow(kbdev); -+} ++ case KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: ++ if (WARN_ON(!hrtimer_active(&stt->timer))) { ++ stt->remaining_ticks = 0; ++ backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; ++ } + -+/** -+ * handle_fatal_event - Handler for CS fatal. -+ * -+ * @queue: Pointer to queue for which fatal event was received. -+ * @stream: Pointer to the structure containing info provided by the -+ * firmware about the CSI. -+ * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for -+ * the queue. -+ * -+ * Notify a waiting user space client of the CS fatal and prints meaningful -+ * information. -+ * Enqueue a work item to terminate the group and report the fatal error -+ * to user space. -+ */ -+static void -+handle_fatal_event(struct kbase_queue *const queue, -+ struct kbase_csf_cmd_stream_info const *const stream, -+ u32 cs_ack) -+{ -+ const u32 cs_fatal = kbase_csf_firmware_cs_output(stream, CS_FATAL); -+ const u64 cs_fatal_info = -+ kbase_csf_firmware_cs_output(stream, CS_FATAL_INFO_LO) | -+ ((u64)kbase_csf_firmware_cs_output(stream, CS_FATAL_INFO_HI) -+ << 32); -+ const u32 cs_fatal_exception_type = -+ CS_FATAL_EXCEPTION_TYPE_GET(cs_fatal); -+ const u32 cs_fatal_exception_data = -+ CS_FATAL_EXCEPTION_DATA_GET(cs_fatal); -+ const u64 cs_fatal_info_exception_data = -+ CS_FATAL_INFO_EXCEPTION_DATA_GET(cs_fatal_info); -+ struct kbase_device *const kbdev = queue->kctx->kbdev; ++ if (backend->shaders_desired) { ++ if (backend->pm_current_policy && ++ backend->pm_current_policy->handle_event) ++ backend->pm_current_policy->handle_event( ++ kbdev, ++ KBASE_PM_POLICY_EVENT_TIMER_HIT); + -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ stt->remaining_ticks = 0; ++ backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON_RECHECK; ++ } else if (stt->remaining_ticks == 0) { ++ if (backend->pm_current_policy && ++ backend->pm_current_policy->handle_event) ++ backend->pm_current_policy->handle_event( ++ kbdev, ++ KBASE_PM_POLICY_EVENT_TIMER_MISS); + -+ dev_warn(kbdev->dev, -+ "Ctx %d_%d Group %d CSG %d CSI: %d\n" -+ "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n" -+ "CS_FATAL.EXCEPTION_DATA: 0x%x\n" -+ "CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n", -+ queue->kctx->tgid, queue->kctx->id, queue->group->handle, -+ queue->group->csg_nr, queue->csi_index, -+ cs_fatal_exception_type, -+ kbase_gpu_exception_name(cs_fatal_exception_type), -+ cs_fatal_exception_data, cs_fatal_info_exception_data); ++ backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ } else if (kbase_pm_is_suspending(kbdev) || ++ kbase_pm_is_gpu_lost(kbdev)) { ++ backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ } ++ break; + -+ if (cs_fatal_exception_type == -+ CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) { -+ kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_FW_INTERNAL_ERROR); -+ queue_work(system_wq, &kbdev->csf.fw_error_work); -+ } else { -+ kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FATAL); -+ if (cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE) { -+ queue->group->cs_unrecoverable = true; -+ if (kbase_prepare_to_reset_gpu(queue->kctx->kbdev, RESET_FLAGS_NONE)) -+ kbase_reset_gpu(queue->kctx->kbdev); -+ } -+ get_queue(queue); -+ queue->cs_error = cs_fatal; -+ queue->cs_error_info = cs_fatal_info; -+ queue->cs_error_fatal = true; -+ if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work)) -+ release_queue(queue); -+ } ++ case KBASE_SHADERS_WAIT_GPU_IDLE: ++ /* If partial shader core off need to wait the job in ++ * running and next register finished then flush L2 ++ * or it might hit GPU2017-861 ++ */ ++ if (!kbase_gpu_atoms_submitted_any(kbdev)) { ++ backend->partial_shaderoff = true; ++ backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; ++ } ++ break; + -+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, -+ CS_REQ_FATAL_MASK); ++ case KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: ++ if (!backend->partial_shaderoff) ++ shader_poweroff_timer_queue_cancel(kbdev); + -+} ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) { ++ kbase_gpu_start_cache_clean_nolock( ++ kbdev, GPU_COMMAND_CACHE_CLN_INV_L2); ++ backend->shaders_state = ++ KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON; ++ } else { ++ backend->shaders_state = ++ KBASE_SHADERS_READY_OFF_CORESTACK_ON; ++ } ++ break; + -+/** -+ * process_cs_interrupts - Process interrupts for a CS. -+ * -+ * @group: Pointer to GPU command queue group data. -+ * @ginfo: The CSG interface provided by the firmware. -+ * @irqreq: CSG's IRQ request bitmask (one bit per CS). -+ * @irqack: CSG's IRQ acknowledge bitmask (one bit per CS). -+ * @track: Pointer that tracks the highest scanout priority idle CSG -+ * and any newly potentially viable protected mode requesting -+ * CSG in current IRQ context. -+ * -+ * If the interrupt request bitmask differs from the acknowledge bitmask -+ * then the firmware is notifying the host of an event concerning those -+ * CSs indicated by bits whose value differs. The actions required -+ * are then determined by examining which notification flags differ between -+ * the request and acknowledge registers for the individual CS(s). -+ */ -+static void process_cs_interrupts(struct kbase_queue_group *const group, -+ struct kbase_csf_cmd_stream_group_info const *const ginfo, -+ u32 const irqreq, u32 const irqack, -+ struct irq_idle_and_protm_track *track) -+{ -+ struct kbase_device *const kbdev = group->kctx->kbdev; -+ u32 remaining = irqreq ^ irqack; -+ bool protm_pend = false; -+ const bool group_suspending = -+ !kbase_csf_scheduler_group_events_enabled(kbdev, group); ++ case KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON: ++ if (!kbdev->cache_clean_in_progress) ++ backend->shaders_state = ++ KBASE_SHADERS_READY_OFF_CORESTACK_ON; + -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ break; + -+ while (remaining != 0) { -+ int const i = ffs(remaining) - 1; -+ struct kbase_queue *const queue = group->bound_queues[i]; ++ case KBASE_SHADERS_READY_OFF_CORESTACK_ON: ++ if (backend->partial_shaderoff) { ++ backend->partial_shaderoff = false; ++ /* remove cores available but not ready to ++ * meet KBASE_SHADERS_PEND_ON_CORESTACK_ON ++ * check pass ++ */ + -+ remaining &= ~(1 << i); ++ /* shaders_desired_mask shall be a subset of ++ * shaders_ready ++ */ ++ WARN_ON(backend->shaders_desired_mask & ~shaders_ready); ++ WARN_ON(!(backend->shaders_desired_mask & shaders_ready)); + -+ /* The queue pointer can be NULL, but if it isn't NULL then it -+ * cannot disappear since scheduler spinlock is held and before -+ * freeing a bound queue it has to be first unbound which -+ * requires scheduler spinlock. -+ */ -+ if (queue && !WARN_ON(queue->csi_index != i)) { -+ struct kbase_csf_cmd_stream_info const *const stream = -+ &ginfo->streams[i]; -+ u32 const cs_req = kbase_csf_firmware_cs_input_read( -+ stream, CS_REQ); -+ u32 const cs_ack = -+ kbase_csf_firmware_cs_output(stream, CS_ACK); -+ struct workqueue_struct *wq = group->kctx->csf.wq; ++ backend->shaders_avail = ++ backend->shaders_desired_mask; ++ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, ++ shaders_ready & ~backend->shaders_avail, ACTION_PWROFF); ++ backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; ++ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, (shaders_ready & ~backend->shaders_avail)); ++ } else { ++ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, ++ shaders_ready, ACTION_PWROFF); + -+ if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) { -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT, -+ group, queue, cs_req ^ cs_ack); -+ handle_fatal_event(queue, stream, cs_ack); -+ } ++ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, 0u); + -+ if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) { -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT, -+ group, queue, cs_req ^ cs_ack); -+ handle_fault_event(queue, cs_ack); ++ backend->shaders_state = KBASE_SHADERS_PEND_OFF_CORESTACK_ON; + } ++ break; + -+ /* PROTM_PEND and TILER_OOM can be safely ignored -+ * because they will be raised again if the group -+ * is assigned a CSG slot in future. -+ */ -+ if (group_suspending) { -+ u32 const cs_req_remain = cs_req & ~CS_REQ_EXCEPTION_MASK; -+ u32 const cs_ack_remain = cs_ack & ~CS_ACK_EXCEPTION_MASK; ++ case KBASE_SHADERS_PEND_OFF_CORESTACK_ON: ++ if (!shaders_trans && !shaders_ready) { ++ if (corestack_driver_control) ++ kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK, ++ stacks_avail, ACTION_PWROFF); + -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, -+ CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED, -+ group, queue, -+ cs_req_remain ^ cs_ack_remain); -+ continue; ++ backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_PEND_OFF; + } ++ break; + -+ if (((cs_req & CS_REQ_TILER_OOM_MASK) ^ -+ (cs_ack & CS_ACK_TILER_OOM_MASK))) { -+ get_queue(queue); -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM, -+ group, queue, cs_req ^ cs_ack); -+ if (!queue_work(wq, &queue->oom_event_work)) { -+ /* The work item shall not have been -+ * already queued, there can be only -+ * one pending OoM event for a -+ * queue. -+ */ -+ dev_warn( -+ kbdev->dev, -+ "Tiler OOM work pending: queue %d group %d (ctx %d_%d)", -+ queue->csi_index, group->handle, queue->kctx->tgid, -+ queue->kctx->id); -+ release_queue(queue); ++ case KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: ++ if (!stacks_trans && !stacks_ready) { ++ /* On powered off, re-enable the hwcnt */ ++ backend->pm_shaders_core_mask = 0; ++ backend->hwcnt_desired = true; ++ if (backend->hwcnt_disabled) { ++#if MALI_USE_CSF ++ unsigned long flags; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, ++ &flags); ++#endif ++ kbase_hwcnt_context_enable( ++ kbdev->hwcnt_gpu_ctx); ++#if MALI_USE_CSF ++ kbase_csf_scheduler_spin_unlock(kbdev, ++ flags); ++#endif ++ backend->hwcnt_disabled = false; + } ++ backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; + } ++ break; + -+ if ((cs_req & CS_REQ_PROTM_PEND_MASK) ^ -+ (cs_ack & CS_ACK_PROTM_PEND_MASK)) { -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_PROTM_PEND, -+ group, queue, cs_req ^ cs_ack); -+ -+ dev_dbg(kbdev->dev, -+ "Protected mode entry request for queue on csi %d bound to group-%d on slot %d", -+ queue->csi_index, group->handle, -+ group->csg_nr); ++ case KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: ++ if (!hrtimer_active(&stt->timer) && !stt->cancel_queued) ++ backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; ++ break; + -+ bitmap_set(group->protm_pending_bitmap, i, 1); -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_SET, group, queue, -+ group->protm_pending_bitmap[0]); -+ protm_pend = true; -+ } ++ case KBASE_SHADERS_RESET_WAIT: ++ /* Reset complete */ ++ if (!backend->in_reset) ++ backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; ++ break; + } -+ } -+ -+ if (protm_pend) { -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + -+ if (scheduler->tick_protm_pending_seq > group->scan_seq_num) { -+ scheduler->tick_protm_pending_seq = group->scan_seq_num; -+ track->protm_grp = group; -+ } ++ if (backend->shaders_state != prev_state) ++ dev_dbg(kbdev->dev, "Shader state transition: %s to %s\n", ++ kbase_shader_core_state_to_string(prev_state), ++ kbase_shader_core_state_to_string( ++ backend->shaders_state)); + -+ if (!group->protected_suspend_buf.pma) -+ queue_work(group->kctx->csf.wq, &group->protm_event_work); ++ } while (backend->shaders_state != prev_state); + -+ if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) { -+ clear_bit(group->csg_nr, -+ scheduler->csg_slots_idle_mask); -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, -+ scheduler->csg_slots_idle_mask[0]); -+ dev_dbg(kbdev->dev, -+ "Group-%d on slot %d de-idled by protm request", -+ group->handle, group->csg_nr); -+ } -+ } ++ return 0; +} ++#endif /* !MALI_USE_CSF */ + -+/** -+ * process_csg_interrupts - Process interrupts for a CSG. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @csg_nr: CSG number. -+ * @track: Pointer that tracks the highest idle CSG and the newly possible viable -+ * protected mode requesting group, in current IRQ context. -+ * -+ * Handles interrupts for a CSG and for CSs within it. -+ * -+ * If the CSG's request register value differs from its acknowledge register -+ * then the firmware is notifying the host of an event concerning the whole -+ * group. The actions required are then determined by examining which -+ * notification flags differ between those two register values. -+ * -+ * See process_cs_interrupts() for details of per-stream interrupt handling. -+ */ -+static void process_csg_interrupts(struct kbase_device *const kbdev, int const csg_nr, -+ struct irq_idle_and_protm_track *track) ++static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev) +{ -+ struct kbase_csf_cmd_stream_group_info *ginfo; -+ struct kbase_queue_group *group = NULL; -+ u32 req, ack, irqreq, irqack; ++ bool in_desired_state = true; + -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num)) -+ return; ++ in_desired_state = kbase_pm_l2_is_in_desired_state(kbdev); + -+ ginfo = &kbdev->csf.global_iface.groups[csg_nr]; -+ req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ); -+ ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); -+ irqreq = kbase_csf_firmware_csg_output(ginfo, CSG_IRQ_REQ); -+ irqack = kbase_csf_firmware_csg_input_read(ginfo, CSG_IRQ_ACK); ++#if !MALI_USE_CSF ++ if (kbdev->pm.backend.shaders_desired && ++ kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON) ++ in_desired_state = false; ++ else if (!kbdev->pm.backend.shaders_desired && ++ kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) ++ in_desired_state = false; ++#else ++ in_desired_state &= kbase_pm_mcu_is_in_desired_state(kbdev); ++#endif + -+ /* There may not be any pending CSG/CS interrupts to process */ -+ if ((req == ack) && (irqreq == irqack)) -+ return; ++ return in_desired_state; ++} + -+ /* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before -+ * examining the CS_ACK & CS_REQ bits. This would ensure that Host -+ * doesn't misses an interrupt for the CS in the race scenario where -+ * whilst Host is servicing an interrupt for the CS, firmware sends -+ * another interrupt for that CS. -+ */ -+ kbase_csf_firmware_csg_input(ginfo, CSG_IRQ_ACK, irqreq); ++static bool kbase_pm_is_in_desired_state(struct kbase_device *kbdev) ++{ ++ bool in_desired_state; ++ unsigned long flags; + -+ group = kbase_csf_scheduler_get_group_on_slot(kbdev, csg_nr); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ in_desired_state = kbase_pm_is_in_desired_state_nolock(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ /* The group pointer can be NULL here if interrupts for the group -+ * (like SYNC_UPDATE, IDLE notification) were delayed and arrived -+ * just after the suspension of group completed. However if not NULL -+ * then the group pointer cannot disappear even if User tries to -+ * terminate the group whilst this loop is running as scheduler -+ * spinlock is held and for freeing a group that is resident on a CSG -+ * slot scheduler spinlock is required. -+ */ -+ if (!group) -+ return; ++ return in_desired_state; ++} + -+ if (WARN_ON(kbase_csf_scheduler_group_get_slot_locked(group) != csg_nr)) -+ return; ++static bool kbase_pm_is_in_desired_state_with_l2_powered( ++ struct kbase_device *kbdev) ++{ ++ bool in_desired_state = false; ++ unsigned long flags; + -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (kbase_pm_is_in_desired_state_nolock(kbdev) && ++ (kbdev->pm.backend.l2_state == KBASE_L2_ON)) ++ in_desired_state = true; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) { -+ kbase_csf_firmware_csg_input_mask(ginfo, -+ CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK); ++ return in_desired_state; ++} + -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_SYNC_UPDATE, group, req ^ ack); ++static void kbase_pm_trace_power_state(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* SYNC_UPDATE events shall invalidate GPU idle event */ -+ atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true); ++ KBASE_TLSTREAM_AUX_PM_STATE( ++ kbdev, ++ KBASE_PM_CORE_L2, ++ kbase_pm_get_ready_cores( ++ kbdev, KBASE_PM_CORE_L2)); ++ KBASE_TLSTREAM_AUX_PM_STATE( ++ kbdev, ++ KBASE_PM_CORE_SHADER, ++ kbase_pm_get_ready_cores( ++ kbdev, KBASE_PM_CORE_SHADER)); ++ KBASE_TLSTREAM_AUX_PM_STATE( ++ kbdev, ++ KBASE_PM_CORE_TILER, ++ kbase_pm_get_ready_cores( ++ kbdev, ++ KBASE_PM_CORE_TILER)); + -+ kbase_csf_event_signal_cpu_only(group->kctx); -+ } ++ if (corestack_driver_control) ++ KBASE_TLSTREAM_AUX_PM_STATE( ++ kbdev, ++ KBASE_PM_CORE_STACK, ++ kbase_pm_get_ready_cores( ++ kbdev, ++ KBASE_PM_CORE_STACK)); ++} + -+ if ((req ^ ack) & CSG_REQ_IDLE_MASK) { -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++void kbase_pm_update_state(struct kbase_device *kbdev) ++{ ++#if !MALI_USE_CSF ++ enum kbase_shader_core_state prev_shaders_state = ++ kbdev->pm.backend.shaders_state; ++#else ++ enum kbase_mcu_state prev_mcu_state = kbdev->pm.backend.mcu_state; ++#endif + -+ KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE( -+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, csg_nr); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, -+ CSG_REQ_IDLE_MASK); ++ if (!kbdev->pm.backend.gpu_ready) ++ return; /* Do nothing if the GPU is not ready */ + -+ set_bit(csg_nr, scheduler->csg_slots_idle_mask); -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, group, -+ scheduler->csg_slots_idle_mask[0]); -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_IDLE, group, req ^ ack); -+ dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n", -+ group->handle, csg_nr); ++ if (kbase_pm_l2_update_state(kbdev)) ++ return; + -+ if (atomic_read(&scheduler->non_idle_offslot_grps)) { -+ /* If there are non-idle CSGs waiting for a slot, fire -+ * a tock for a replacement. -+ */ -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NON_IDLE_GROUPS, -+ group, req ^ ack); -+ kbase_csf_scheduler_invoke_tock(kbdev); -+ } else { -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NO_NON_IDLE_GROUPS, -+ group, req ^ ack); -+ } ++#if !MALI_USE_CSF ++ if (kbase_pm_shaders_update_state(kbdev)) ++ return; + -+ if (group->scan_seq_num < track->idle_seq) { -+ track->idle_seq = group->scan_seq_num; -+ track->idle_slot = csg_nr; ++ /* If the shaders just turned off, re-invoke the L2 state machine, in ++ * case it was waiting for the shaders to turn off before powering down ++ * the L2. ++ */ ++ if (prev_shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF && ++ kbdev->pm.backend.shaders_state == ++ KBASE_SHADERS_OFF_CORESTACK_OFF) { ++ if (kbase_pm_l2_update_state(kbdev)) ++ return; + } ++#else ++ if (kbase_pm_mcu_update_state(kbdev)) ++ return; ++ ++ if (!kbase_pm_is_mcu_inactive(kbdev, prev_mcu_state) && ++ kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state)) { ++ if (kbase_pm_l2_update_state(kbdev)) ++ return; + } ++#endif + -+ if ((req ^ ack) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK) { -+ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, -+ CSG_REQ_PROGRESS_TIMER_EVENT_MASK); ++ if (kbase_pm_is_in_desired_state_nolock(kbdev)) { ++ KBASE_KTRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, ++ kbdev->pm.backend.shaders_avail); + -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT, group, -+ req ^ ack); -+ dev_info( -+ kbdev->dev, -+ "[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %d\n", -+ kbase_backend_get_cycle_cnt(kbdev), group->handle, group->kctx->tgid, -+ group->kctx->id, csg_nr); ++ kbase_pm_trace_power_state(kbdev); + -+ handle_progress_timer_event(group); ++ KBASE_KTRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, 0); ++ wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); + } -+ -+ process_cs_interrupts(group, ginfo, irqreq, irqack, track); -+ -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_END, group, -+ ((u64)req ^ ack) | (((u64)irqreq ^ irqack) << 32)); +} + -+/** -+ * process_prfcnt_interrupts - Process performance counter interrupts. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @glb_req: Global request register value. -+ * @glb_ack: Global acknowledge register value. -+ * -+ * Handles interrupts issued by the firmware that relate to the performance -+ * counters. For example, on completion of a performance counter sample. It is -+ * expected that the scheduler spinlock is already held on calling this -+ * function. -+ */ -+static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, -+ u32 glb_ack) ++static enum hrtimer_restart ++shader_tick_timer_callback(struct hrtimer *timer) +{ -+ const struct kbase_csf_global_iface *const global_iface = -+ &kbdev->csf.global_iface; ++ struct kbasep_pm_tick_timer_state *stt = container_of(timer, ++ struct kbasep_pm_tick_timer_state, timer); ++ struct kbase_device *kbdev = container_of(stt, struct kbase_device, ++ pm.backend.shader_tick_timer); ++ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; ++ unsigned long flags; ++ enum hrtimer_restart restart = HRTIMER_NORESTART; + -+ lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ /* Process PRFCNT_SAMPLE interrupt. */ -+ if (kbdev->csf.hwcnt.request_pending && -+ ((glb_req & GLB_REQ_PRFCNT_SAMPLE_MASK) == -+ (glb_ack & GLB_REQ_PRFCNT_SAMPLE_MASK))) { -+ kbdev->csf.hwcnt.request_pending = false; ++ if (stt->remaining_ticks && ++ backend->shaders_state == KBASE_SHADERS_WAIT_OFF_CORESTACK_ON) { ++ stt->remaining_ticks--; + -+ dev_dbg(kbdev->dev, "PRFCNT_SAMPLE done interrupt received."); ++ /* If the remaining ticks just changed from 1 to 0, invoke the ++ * PM state machine to power off the shader cores. ++ */ ++ if (!stt->remaining_ticks && !backend->shaders_desired) ++ kbase_pm_update_state(kbdev); ++ } + -+ kbase_hwcnt_backend_csf_on_prfcnt_sample( -+ &kbdev->hwcnt_gpu_iface); ++ if (stt->needed) { ++ hrtimer_forward_now(timer, stt->configured_interval); ++ restart = HRTIMER_RESTART; + } + -+ /* Process PRFCNT_ENABLE interrupt. */ -+ if (kbdev->csf.hwcnt.enable_pending && -+ ((glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) == -+ (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK))) { -+ kbdev->csf.hwcnt.enable_pending = false; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ dev_dbg(kbdev->dev, -+ "PRFCNT_ENABLE status changed interrupt received."); ++ return restart; ++} + -+ if (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK) -+ kbase_hwcnt_backend_csf_on_prfcnt_enable( -+ &kbdev->hwcnt_gpu_iface); -+ else -+ kbase_hwcnt_backend_csf_on_prfcnt_disable( -+ &kbdev->hwcnt_gpu_iface); -+ } ++int kbase_pm_state_machine_init(struct kbase_device *kbdev) ++{ ++ struct kbasep_pm_tick_timer_state *stt = &kbdev->pm.backend.shader_tick_timer; + -+ /* Process PRFCNT_THRESHOLD interrupt. */ -+ if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_THRESHOLD_MASK) { -+ dev_dbg(kbdev->dev, "PRFCNT_THRESHOLD interrupt received."); ++ stt->wq = alloc_workqueue("kbase_pm_shader_poweroff", WQ_HIGHPRI | WQ_UNBOUND, 1); ++ if (!stt->wq) ++ return -ENOMEM; + -+ kbase_hwcnt_backend_csf_on_prfcnt_threshold( -+ &kbdev->hwcnt_gpu_iface); ++ INIT_WORK(&stt->work, shader_poweroff_timer_stop_callback); + -+ /* Set the GLB_REQ.PRFCNT_THRESHOLD flag back to -+ * the same value as GLB_ACK.PRFCNT_THRESHOLD -+ * flag in order to enable reporting of another -+ * PRFCNT_THRESHOLD event. -+ */ -+ kbase_csf_firmware_global_input_mask( -+ global_iface, GLB_REQ, glb_ack, -+ GLB_REQ_PRFCNT_THRESHOLD_MASK); -+ } ++ stt->needed = false; ++ hrtimer_init(&stt->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ stt->timer.function = shader_tick_timer_callback; ++ stt->configured_interval = HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS); ++ stt->default_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER; ++ stt->configured_ticks = stt->default_ticks; + -+ /* Process PRFCNT_OVERFLOW interrupt. */ -+ if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_OVERFLOW_MASK) { -+ dev_dbg(kbdev->dev, "PRFCNT_OVERFLOW interrupt received."); ++#if MALI_USE_CSF ++ kbdev->pm.backend.core_idle_wq = alloc_workqueue("coreoff_wq", WQ_HIGHPRI | WQ_UNBOUND, 1); ++ if (!kbdev->pm.backend.core_idle_wq) { ++ destroy_workqueue(stt->wq); ++ return -ENOMEM; ++ } + -+ kbase_hwcnt_backend_csf_on_prfcnt_overflow( -+ &kbdev->hwcnt_gpu_iface); ++ INIT_WORK(&kbdev->pm.backend.core_idle_work, core_idle_worker); ++#endif + -+ /* Set the GLB_REQ.PRFCNT_OVERFLOW flag back to -+ * the same value as GLB_ACK.PRFCNT_OVERFLOW -+ * flag in order to enable reporting of another -+ * PRFCNT_OVERFLOW event. -+ */ -+ kbase_csf_firmware_global_input_mask( -+ global_iface, GLB_REQ, glb_ack, -+ GLB_REQ_PRFCNT_OVERFLOW_MASK); -+ } ++ return 0; +} + -+/** -+ * check_protm_enter_req_complete - Check if PROTM_ENTER request completed -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @glb_req: Global request register value. -+ * @glb_ack: Global acknowledge register value. -+ * -+ * This function checks if the PROTM_ENTER Global request had completed and -+ * appropriately sends notification about the protected mode entry to components -+ * like IPA, HWC, IPA_CONTROL. -+ */ -+static inline void check_protm_enter_req_complete(struct kbase_device *kbdev, -+ u32 glb_req, u32 glb_ack) ++void kbase_pm_state_machine_term(struct kbase_device *kbdev) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); -+ -+ if (likely(!kbdev->csf.scheduler.active_protm_grp)) -+ return; -+ -+ if (kbdev->protected_mode) -+ return; -+ -+ if ((glb_req & GLB_REQ_PROTM_ENTER_MASK) != -+ (glb_ack & GLB_REQ_PROTM_ENTER_MASK)) -+ return; -+ -+ dev_dbg(kbdev->dev, "Protected mode entry interrupt received"); -+ -+ kbdev->protected_mode = true; -+ kbase_ipa_protection_mode_switch_event(kbdev); -+ kbase_ipa_control_protm_entered(kbdev); -+ kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface); ++#if MALI_USE_CSF ++ destroy_workqueue(kbdev->pm.backend.core_idle_wq); ++#endif ++ hrtimer_cancel(&kbdev->pm.backend.shader_tick_timer.timer); ++ destroy_workqueue(kbdev->pm.backend.shader_tick_timer.wq); +} + -+/** -+ * process_protm_exit - Handle the protected mode exit interrupt -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @glb_ack: Global acknowledge register value. -+ * -+ * This function handles the PROTM_EXIT interrupt and sends notification -+ * about the protected mode exit to components like HWC, IPA_CONTROL. -+ */ -+static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack) ++void kbase_pm_reset_start_locked(struct kbase_device *kbdev) +{ -+ const struct kbase_csf_global_iface *const global_iface = -+ &kbdev->csf.global_iface; -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + + lockdep_assert_held(&kbdev->hwaccess_lock); -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); -+ -+ dev_dbg(kbdev->dev, "Protected mode exit interrupt received"); -+ -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_ack, -+ GLB_REQ_PROTM_EXIT_MASK); + -+ if (likely(scheduler->active_protm_grp)) { -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT, -+ scheduler->active_protm_grp, 0u); -+ scheduler->active_protm_grp = NULL; ++ backend->in_reset = true; ++ backend->l2_state = KBASE_L2_RESET_WAIT; ++ KBASE_KTRACE_ADD(kbdev, PM_L2_RESET_WAIT, NULL, backend->l2_state); ++#if !MALI_USE_CSF ++ backend->shaders_state = KBASE_SHADERS_RESET_WAIT; ++#else ++ /* MCU state machine is exercised only after the initial load/boot ++ * of the firmware. ++ */ ++ if (likely(kbdev->csf.firmware_inited)) { ++ backend->mcu_state = KBASE_MCU_RESET_WAIT; ++ KBASE_KTRACE_ADD(kbdev, PM_MCU_RESET_WAIT, NULL, backend->mcu_state); ++#ifdef KBASE_PM_RUNTIME ++ backend->exit_gpu_sleep_mode = true; ++#endif ++ kbdev->csf.firmware_reload_needed = true; + } else { -+ dev_warn(kbdev->dev, "PROTM_EXIT interrupt after no pmode group"); ++ WARN_ON(backend->mcu_state != KBASE_MCU_OFF); + } ++#endif + -+ if (!WARN_ON(!kbdev->protected_mode)) { -+ kbdev->protected_mode = false; -+ kbase_ipa_control_protm_exited(kbdev); -+ kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface); ++ /* We're in a reset, so hwcnt will have been synchronously disabled by ++ * this function's caller as part of the reset process. We therefore ++ * know that any call to kbase_hwcnt_context_disable_atomic, if ++ * required to sync the hwcnt refcount with our internal state, is ++ * guaranteed to succeed. ++ */ ++ backend->hwcnt_desired = false; ++ if (!backend->hwcnt_disabled) { ++ WARN_ON(!kbase_hwcnt_context_disable_atomic( ++ kbdev->hwcnt_gpu_ctx)); ++ backend->hwcnt_disabled = true; + } + -+#if IS_ENABLED(CONFIG_MALI_CORESIGHT) -+ kbase_debug_coresight_csf_enable_pmode_exit(kbdev); -+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ ++ shader_poweroff_timer_queue_cancel(kbdev); +} + -+static inline void process_tracked_info_for_protm(struct kbase_device *kbdev, -+ struct irq_idle_and_protm_track *track) ++void kbase_pm_reset_complete(struct kbase_device *kbdev) +{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ struct kbase_queue_group *group = track->protm_grp; -+ u32 current_protm_pending_seq = scheduler->tick_protm_pending_seq; ++ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; ++ unsigned long flags; + -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ WARN_ON(!kbase_reset_gpu_is_active(kbdev)); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ if (likely(current_protm_pending_seq == KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID)) -+ return; ++ /* As GPU has just been reset, that results in implicit flush of L2 ++ * cache, can safely mark the pending cache flush operation (if there ++ * was any) as complete and unblock the waiter. ++ * No work can be submitted whilst GPU reset is ongoing. ++ */ ++ kbase_gpu_cache_clean_wait_complete(kbdev); ++ backend->in_reset = false; ++#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) ++ backend->gpu_wakeup_override = false; ++#endif ++ kbase_pm_update_state(kbdev); + -+ /* Handle protm from the tracked information */ -+ if (track->idle_seq < current_protm_pending_seq) { -+ /* If the protm enter was prevented due to groups priority, then fire a tock -+ * for the scheduler to re-examine the case. -+ */ -+ dev_dbg(kbdev->dev, "Attempt pending protm from idle slot %d\n", track->idle_slot); -+ kbase_csf_scheduler_invoke_tock(kbdev); -+ } else if (group) { -+ u32 i, num_groups = kbdev->csf.global_iface.group_num; -+ struct kbase_queue_group *grp; -+ bool tock_triggered = false; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} + -+ /* A new protm request, and track->idle_seq is not sufficient, check across -+ * previously notified idle CSGs in the current tick/tock cycle. -+ */ -+ for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) { -+ if (i == track->idle_slot) -+ continue; -+ grp = kbase_csf_scheduler_get_group_on_slot(kbdev, i); -+ /* If not NULL then the group pointer cannot disappear as the -+ * scheduler spinlock is held. -+ */ -+ if (grp == NULL) -+ continue; ++#if !MALI_USE_CSF ++/* Timeout in milliseconds for GPU Power Management to reach the desired ++ * Shader and L2 state. If the time spent waiting has exceeded this threshold ++ * then there is most likely a hardware issue. ++ */ ++#define PM_TIMEOUT_MS (5000) /* 5s */ ++#endif + -+ if (grp->scan_seq_num < current_protm_pending_seq) { -+ tock_triggered = true; -+ dev_dbg(kbdev->dev, -+ "Attempt new protm from tick/tock idle slot %d\n", i); -+ kbase_csf_scheduler_invoke_tock(kbdev); -+ break; -+ } -+ } ++static void kbase_pm_timed_out(struct kbase_device *kbdev) ++{ ++ unsigned long flags; + -+ if (!tock_triggered) { -+ dev_dbg(kbdev->dev, "Group-%d on slot-%d start protm work\n", -+ group->handle, group->csg_nr); -+ queue_work(group->kctx->csf.wq, &group->protm_event_work); -+ } -+ } ++ dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); ++#if !MALI_USE_CSF ++ CSTD_UNUSED(flags); ++ dev_err(kbdev->dev, "Desired state :\n"); ++ dev_err(kbdev->dev, "\tShader=%016llx\n", ++ kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0); ++#else ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ dev_err(kbdev->dev, "\tMCU desired = %d\n", ++ kbase_pm_is_mcu_desired(kbdev)); ++ dev_err(kbdev->dev, "\tMCU sw state = %d\n", ++ kbdev->pm.backend.mcu_state); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++#endif ++ dev_err(kbdev->dev, "Current state :\n"); ++ dev_err(kbdev->dev, "\tShader=%08x%08x\n", ++ kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(SHADER_READY_HI)), ++ kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(SHADER_READY_LO))); ++ dev_err(kbdev->dev, "\tTiler =%08x%08x\n", ++ kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(TILER_READY_HI)), ++ kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(TILER_READY_LO))); ++ dev_err(kbdev->dev, "\tL2 =%08x%08x\n", ++ kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(L2_READY_HI)), ++ kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(L2_READY_LO))); ++#if MALI_USE_CSF ++ dev_err(kbdev->dev, "\tMCU status = %d\n", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS))); ++#endif ++ dev_err(kbdev->dev, "Cores transitioning :\n"); ++ dev_err(kbdev->dev, "\tShader=%08x%08x\n", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG( ++ SHADER_PWRTRANS_HI)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG( ++ SHADER_PWRTRANS_LO))); ++ dev_err(kbdev->dev, "\tTiler =%08x%08x\n", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG( ++ TILER_PWRTRANS_HI)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG( ++ TILER_PWRTRANS_LO))); ++ dev_err(kbdev->dev, "\tL2 =%08x%08x\n", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG( ++ L2_PWRTRANS_HI)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG( ++ L2_PWRTRANS_LO))); ++ ++ dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); ++ if (kbase_prepare_to_reset_gpu(kbdev, ++ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) ++ kbase_reset_gpu(kbdev); +} + -+static void order_job_irq_clear_with_iface_mem_read(void) ++int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) +{ -+ /* Ensure that write to the JOB_IRQ_CLEAR is ordered with regards to the -+ * read from interface memory. The ordering is needed considering the way -+ * FW & Kbase writes to the JOB_IRQ_RAWSTAT and JOB_IRQ_CLEAR registers -+ * without any synchronization. Without the barrier there is no guarantee -+ * about the ordering, the write to IRQ_CLEAR can take effect after the read -+ * from interface memory and that could cause a problem for the scenario where -+ * FW sends back to back notifications for the same CSG for events like -+ * SYNC_UPDATE and IDLE, but Kbase gets a single IRQ and observes only the -+ * first event. Similar thing can happen with glb events like CFG_ALLOC_EN -+ * acknowledgment and GPU idle notification. -+ * -+ * MCU CPU -+ * --------------- ---------------- -+ * Update interface memory Write to IRQ_CLEAR to clear current IRQ -+ * -+ * Write to IRQ_RAWSTAT to raise new IRQ Read interface memory ++ unsigned long flags; ++ unsigned long timeout; ++ long remaining; ++ int err = 0; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++#if MALI_USE_CSF ++ timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT)); ++#else ++ timeout = msecs_to_jiffies(PM_TIMEOUT_MS); ++#endif ++ ++ /* Wait for cores */ ++#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE ++ remaining = wait_event_killable_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, ++ kbase_pm_is_in_desired_state_with_l2_powered(kbdev), ++ timeout); ++#else ++ remaining = wait_event_timeout( ++ kbdev->pm.backend.gpu_in_desired_state_wait, ++ kbase_pm_is_in_desired_state_with_l2_powered(kbdev), timeout); ++#endif ++ ++ if (!remaining) { ++ kbase_pm_timed_out(kbdev); ++ err = -ETIMEDOUT; ++ } else if (remaining < 0) { ++ dev_info( ++ kbdev->dev, ++ "Wait for desired PM state with L2 powered got interrupted"); ++ err = (int)remaining; ++ } ++ ++ return err; ++} ++ ++int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ long remaining; ++#if MALI_USE_CSF ++ long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT)); ++#else ++ long timeout = msecs_to_jiffies(PM_TIMEOUT_MS); ++#endif ++ int err = 0; ++ ++ /* Let the state machine latch the most recent desired state. */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ /* Wait for cores */ ++#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE ++ remaining = wait_event_killable_timeout( ++ kbdev->pm.backend.gpu_in_desired_state_wait, ++ kbase_pm_is_in_desired_state(kbdev), timeout); ++#else ++ remaining = wait_event_timeout( ++ kbdev->pm.backend.gpu_in_desired_state_wait, ++ kbase_pm_is_in_desired_state(kbdev), timeout); ++#endif ++ ++ if (!remaining) { ++ kbase_pm_timed_out(kbdev); ++ err = -ETIMEDOUT; ++ } else if (remaining < 0) { ++ dev_info(kbdev->dev, ++ "Wait for desired PM state got interrupted"); ++ err = (int)remaining; ++ } ++ ++ return err; ++} ++KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state); ++ ++#if MALI_USE_CSF ++/** ++ * core_mask_update_done - Check if downscaling of shader cores is done ++ * ++ * @kbdev: The kbase device structure for the device. ++ * ++ * This function checks if the downscaling of cores is effectively complete. ++ * ++ * Return: true if the downscale is done. ++ */ ++static bool core_mask_update_done(struct kbase_device *kbdev) ++{ ++ bool update_done = false; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ /* If MCU is in stable ON state then it implies that the downscale ++ * request had completed. ++ * If MCU is not active then it implies all cores are off, so can ++ * consider the downscale request as complete. + */ ++ if ((kbdev->pm.backend.mcu_state == KBASE_MCU_ON) || ++ kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state)) ++ update_done = true; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ /* CPU and GPU would be in the same Outer shareable domain */ -+ dmb(osh); ++ return update_done; +} + -+void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) ++int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev) +{ -+ bool deferred_handling_glb_idle_irq = false; ++ long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT)); ++ long remaining; ++ int err = 0; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ /* Wait for core mask update to complete */ ++#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE ++ remaining = wait_event_killable_timeout( ++ kbdev->pm.backend.gpu_in_desired_state_wait, ++ core_mask_update_done(kbdev), timeout); ++#else ++ remaining = wait_event_timeout( ++ kbdev->pm.backend.gpu_in_desired_state_wait, ++ core_mask_update_done(kbdev), timeout); ++#endif + -+ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_START, NULL, val); ++ if (!remaining) { ++ kbase_pm_timed_out(kbdev); ++ err = -ETIMEDOUT; ++ } else if (remaining < 0) { ++ dev_info( ++ kbdev->dev, ++ "Wait for cores down scaling got interrupted"); ++ err = (int)remaining; ++ } + -+ do { -+ unsigned long flags; -+ u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF; -+ struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX }; -+ bool glb_idle_irq_received = false; ++ return err; ++} ++#endif + -+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val); -+ order_job_irq_clear_with_iface_mem_read(); ++void kbase_pm_enable_interrupts(struct kbase_device *kbdev) ++{ ++ unsigned long flags; + -+ if (csg_interrupts != 0) { -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ /* Looping through and track the highest idle and protm groups */ -+ while (csg_interrupts != 0) { -+ int const csg_nr = ffs(csg_interrupts) - 1; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ /* ++ * Clear all interrupts, ++ * and unmask them all. ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ process_csg_interrupts(kbdev, csg_nr, &track); -+ csg_interrupts &= ~(1 << csg_nr); -+ } ++ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); ++ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF); + -+ /* Handle protm from the tracked information */ -+ process_tracked_info_for_protm(kbdev, &track); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+ } ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); ++#if MALI_USE_CSF ++ /* Enable only the Page fault bits part */ ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFF); ++#else ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF); ++#endif ++} + -+ if (val & JOB_IRQ_GLOBAL_IF) { -+ const struct kbase_csf_global_iface *const global_iface = -+ &kbdev->csf.global_iface; ++KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts); + -+ kbdev->csf.interrupt_received = true; ++void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ /* ++ * Mask all interrupts, ++ * and clear them all. ++ */ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (!kbdev->csf.firmware_reloaded) -+ kbase_csf_firmware_reload_completed(kbdev); -+ else if (global_iface->output) { -+ u32 glb_req, glb_ack; ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); ++ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0); ++ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ glb_req = -+ kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); -+ glb_ack = kbase_csf_firmware_global_output(global_iface, GLB_ACK); -+ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_GLB_REQ_ACK, NULL, -+ glb_req ^ glb_ack); ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); ++} + -+ check_protm_enter_req_complete(kbdev, glb_req, glb_ack); ++void kbase_pm_disable_interrupts(struct kbase_device *kbdev) ++{ ++ unsigned long flags; + -+ if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK) -+ process_protm_exit(kbdev, glb_ack); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_pm_disable_interrupts_nolock(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} + -+ /* Handle IDLE Hysteresis notification event */ -+ if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) { -+ dev_dbg(kbdev->dev, "Idle-hysteresis event flagged"); -+ kbase_csf_firmware_global_input_mask( -+ global_iface, GLB_REQ, glb_ack, -+ GLB_REQ_IDLE_EVENT_MASK); ++KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts); + -+ glb_idle_irq_received = true; -+ /* Defer handling this IRQ to account for a race condition -+ * where the idle worker could be executed before we have -+ * finished handling all pending IRQs (including CSG IDLE -+ * IRQs). -+ */ -+ deferred_handling_glb_idle_irq = true; -+ } ++#if MALI_USE_CSF ++/** ++ * update_user_reg_page_mapping - Update the mapping for USER Register page ++ * ++ * @kbdev: The kbase device structure for the device. ++ * ++ * This function must be called to unmap the dummy or real page from USER Register page ++ * mapping whenever GPU is powered up or down. The dummy or real page would get ++ * appropriately mapped in when Userspace reads the LATEST_FLUSH value. ++ */ ++static void update_user_reg_page_mapping(struct kbase_device *kbdev) ++{ ++ struct kbase_context *kctx, *n; + -+ process_prfcnt_interrupts(kbdev, glb_req, glb_ack); ++ lockdep_assert_held(&kbdev->pm.lock); + -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ mutex_lock(&kbdev->csf.reg_lock); ++ list_for_each_entry_safe(kctx, n, &kbdev->csf.user_reg.list, csf.user_reg.link) { ++ /* This would zap the PTE corresponding to the mapping of User ++ * Register page of the kbase context. The mapping will be reestablished ++ * when the context (user process) needs to access to the page. ++ */ ++ unmap_mapping_range(kbdev->csf.user_reg.filp->f_inode->i_mapping, ++ kctx->csf.user_reg.file_offset << PAGE_SHIFT, PAGE_SIZE, 1); ++ list_del_init(&kctx->csf.user_reg.link); ++ dev_dbg(kbdev->dev, "Updated USER Reg page mapping of ctx %d_%d", kctx->tgid, ++ kctx->id); ++ } ++ mutex_unlock(&kbdev->csf.reg_lock); ++} ++#endif + -+ /* Invoke the MCU state machine as a state transition -+ * might have completed. -+ */ -+ kbase_pm_update_state(kbdev); -+ } ++/* ++ * pmu layout: ++ * 0x0000: PMU TAG (RO) (0xCAFECAFE) ++ * 0x0004: PMU VERSION ID (RO) (0x00000000) ++ * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE) ++ */ ++void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) ++{ ++ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; ++ bool reset_required = is_resume; ++ unsigned long flags; ++ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++#if !MALI_USE_CSF ++ lockdep_assert_held(&kbdev->js_data.runpool_mutex); ++#endif /* !MALI_USE_CSF */ ++ lockdep_assert_held(&kbdev->pm.lock); ++ ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (WARN_ON(kbase_pm_is_gpu_lost(kbdev))) { ++ dev_err(kbdev->dev, ++ "%s: Cannot power up while GPU lost", __func__); ++ return; ++ } ++#endif ++ ++ if (backend->gpu_powered) { ++#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) ++ if (backend->gpu_idled) { ++ backend->callback_power_runtime_gpu_active(kbdev); ++ backend->gpu_idled = false; + } ++#endif ++ /* Already turned on */ ++ if (kbdev->poweroff_pending) ++ kbase_pm_enable_interrupts(kbdev); ++ kbdev->poweroff_pending = false; ++ KBASE_DEBUG_ASSERT(!is_resume); ++ return; ++ } + -+ if (!glb_idle_irq_received) -+ break; -+ /* Attempt to serve potential IRQs that might have occurred -+ * whilst handling the previous IRQ. In case we have observed -+ * the GLB IDLE IRQ without all CSGs having been marked as -+ * idle, the GPU would be treated as no longer idle and left -+ * powered on. ++ kbdev->poweroff_pending = false; ++ ++ KBASE_KTRACE_ADD(kbdev, PM_GPU_ON, NULL, 0u); ++ ++ if (is_resume && backend->callback_power_resume) { ++ backend->callback_power_resume(kbdev); ++ return; ++ } else if (backend->callback_power_on) { ++ reset_required = backend->callback_power_on(kbdev); ++ } ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ backend->gpu_powered = true; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++#if MALI_USE_CSF ++ /* GPU has been turned on, can switch to actual register page */ ++ update_user_reg_page_mapping(kbdev); ++#endif ++ ++ ++ if (reset_required) { ++ /* GPU state was lost, reset GPU to ensure it is in a ++ * consistent state + */ -+ val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); -+ } while (val); ++ kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS); ++ } ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ else { ++ if (kbdev->arb.arb_if) { ++ struct kbase_arbiter_vm_state *arb_vm_state = ++ kbdev->pm.arb_vm_state; + -+ if (deferred_handling_glb_idle_irq) { -+ unsigned long flags; ++ /* In the case that the GPU has just been granted by ++ * the Arbiter, a reset will have already been done. ++ * However, it is still necessary to initialize the GPU. ++ */ ++ if (arb_vm_state->vm_arb_starting) ++ kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS | ++ PM_NO_RESET); ++ } ++ } ++ /* ++ * This point means that the GPU trasitioned to ON. So there is a chance ++ * that a repartitioning occurred. In this case the current config ++ * should be read again. ++ */ ++ kbase_gpuprops_get_curr_config_props(kbdev, ++ &kbdev->gpu_props.curr_config); ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ kbase_csf_scheduler_process_gpu_idle_event(kbdev); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_ctx_sched_restore_all_as(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ ++ if (kbdev->dummy_job_wa.flags & ++ KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) { ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_dummy_job_wa_execute(kbdev, ++ kbase_pm_get_present_cores(kbdev, ++ KBASE_PM_CORE_SHADER)); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + -+ wake_up_all(&kbdev->csf.event_wait); ++ /* Enable the interrupts */ ++ kbase_pm_enable_interrupts(kbdev); + -+ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val); ++ /* Turn on the L2 caches */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ backend->gpu_ready = true; ++ backend->l2_desired = true; ++#if MALI_USE_CSF ++ if (reset_required) { ++ /* GPU reset was done after the power on, so send the post ++ * reset event instead. This is okay as GPU power off event ++ * is same as pre GPU reset event. ++ */ ++ kbase_ipa_control_handle_gpu_reset_post(kbdev); ++ } else { ++ kbase_ipa_control_handle_gpu_power_on(kbdev); ++ } ++#endif ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) ++ /* GPU is now powered up. Invoke the GPU active callback as GPU idle ++ * callback would have been invoked before the power down. ++ */ ++ if (backend->gpu_idled) { ++ backend->callback_power_runtime_gpu_active(kbdev); ++ backend->gpu_idled = false; ++ } ++#endif +} + -+void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev) ++KBASE_EXPORT_TEST_API(kbase_pm_clock_on); ++ ++bool kbase_pm_clock_off(struct kbase_device *kbdev) +{ -+ if (kbdev->csf.db_filp) { -+ struct page *page = as_page(kbdev->csf.dummy_db_page); ++ unsigned long flags; + -+ kbase_mem_pool_free( -+ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], -+ page, false); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ lockdep_assert_held(&kbdev->pm.lock); + -+ fput(kbdev->csf.db_filp); ++ /* ASSERT that the cores should now be unavailable. No lock needed. */ ++ WARN_ON(kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF); ++ ++ kbdev->poweroff_pending = true; ++ ++ if (!kbdev->pm.backend.gpu_powered) { ++ /* Already turned off */ ++ return true; ++ } ++ ++ KBASE_KTRACE_ADD(kbdev, PM_GPU_OFF, NULL, 0u); ++ ++ /* Disable interrupts. This also clears any outstanding interrupts */ ++ kbase_pm_disable_interrupts(kbdev); ++ /* Ensure that any IRQ handlers have finished */ ++ kbase_synchronize_irqs(kbdev); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ if (atomic_read(&kbdev->faults_pending)) { ++ /* Page/bus faults are still being processed. The GPU can not ++ * be powered off until they have completed ++ */ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return false; ++ } ++ ++ kbase_pm_cache_snoop_disable(kbdev); ++#if MALI_USE_CSF ++ kbase_ipa_control_handle_gpu_power_off(kbdev); ++#endif ++ ++ if (kbase_is_gpu_removed(kbdev) ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ || kbase_pm_is_gpu_lost(kbdev)) { ++#else ++ ) { ++#endif ++ /* Ensure we unblock any threads that are stuck waiting ++ * for the GPU ++ */ ++ kbase_gpu_cache_clean_wait_complete(kbdev); + } ++ ++ kbdev->pm.backend.gpu_ready = false; ++ ++ /* The GPU power may be turned off from this point */ ++ kbdev->pm.backend.gpu_powered = false; ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++#if MALI_USE_CSF ++ /* GPU is about to be turned off, switch to dummy page */ ++ update_user_reg_page_mapping(kbdev); ++#endif ++ ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_IDLE_EVENT); ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ ++ if (kbdev->pm.backend.callback_power_off) ++ kbdev->pm.backend.callback_power_off(kbdev); ++ return true; +} + -+int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev) ++KBASE_EXPORT_TEST_API(kbase_pm_clock_off); ++ ++struct kbasep_reset_timeout_data { ++ struct hrtimer timer; ++ bool timed_out; ++ struct kbase_device *kbdev; ++}; ++ ++void kbase_pm_reset_done(struct kbase_device *kbdev) +{ -+ struct tagged_addr phys; -+ struct file *filp; -+ int ret; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ kbdev->pm.backend.reset_done = true; ++ wake_up(&kbdev->pm.backend.reset_done_wait); ++} + -+ filp = shmem_file_setup("mali csf db", MAX_LFS_FILESIZE, VM_NORESERVE); -+ if (IS_ERR(filp)) -+ return PTR_ERR(filp); ++/** ++ * kbase_pm_wait_for_reset - Wait for a reset to happen ++ * ++ * @kbdev: Kbase device ++ * ++ * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state. ++ */ ++static void kbase_pm_wait_for_reset(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->pm.lock); + -+ ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys, -+ false, NULL); ++ wait_event(kbdev->pm.backend.reset_done_wait, ++ (kbdev->pm.backend.reset_done)); ++ kbdev->pm.backend.reset_done = false; ++} + -+ if (ret <= 0) { -+ fput(filp); -+ return ret; ++KBASE_EXPORT_TEST_API(kbase_pm_reset_done); ++ ++static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) ++{ ++ struct kbasep_reset_timeout_data *rtdata = ++ container_of(timer, struct kbasep_reset_timeout_data, timer); ++ ++ rtdata->timed_out = true; ++ ++ /* Set the wait queue to wake up kbase_pm_init_hw even though the reset ++ * hasn't completed ++ */ ++ kbase_pm_reset_done(rtdata->kbdev); ++ ++ return HRTIMER_NORESTART; ++} ++ ++static int kbase_set_gpu_quirks(struct kbase_device *kbdev, const u32 prod_id) ++{ ++#if MALI_USE_CSF ++ kbdev->hw_quirks_gpu = ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(CSF_CONFIG)); ++#else ++ u32 hw_quirks_gpu = kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG)); ++ ++ if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == GPU_ID2_PRODUCT_TMIX) { ++ /* Only for tMIx */ ++ u32 coherency_features; ++ ++ coherency_features = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(COHERENCY_FEATURES)); ++ ++ /* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly ++ * documented for tMIx so force correct value here. ++ */ ++ if (coherency_features == ++ COHERENCY_FEATURE_BIT(COHERENCY_ACE)) { ++ hw_quirks_gpu |= (COHERENCY_ACE_LITE | COHERENCY_ACE) ++ << JM_FORCE_COHERENCY_FEATURES_SHIFT; ++ } + } + -+ kbdev->csf.db_filp = filp; -+ kbdev->csf.dummy_db_page = phys; -+ kbdev->csf.db_file_offsets = 0; ++ if (kbase_is_gpu_removed(kbdev)) ++ return -EIO; ++ ++ kbdev->hw_quirks_gpu = hw_quirks_gpu; ++ ++#endif /* !MALI_USE_CSF */ ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) { ++ int default_idvs_group_size = 0xF; ++ u32 group_size = 0; ++ ++ if (of_property_read_u32(kbdev->dev->of_node, "idvs-group-size", ++ &group_size)) ++ group_size = default_idvs_group_size; ++ ++ if (group_size > IDVS_GROUP_MAX_SIZE) { ++ dev_err(kbdev->dev, ++ "idvs-group-size of %d is too large. Maximum value is %d", ++ group_size, IDVS_GROUP_MAX_SIZE); ++ group_size = default_idvs_group_size; ++ } ++ ++ kbdev->hw_quirks_gpu |= group_size << IDVS_GROUP_SIZE_SHIFT; ++ } ++ ++#define MANUAL_POWER_CONTROL ((u32)(1 << 8)) ++ if (corestack_driver_control) ++ kbdev->hw_quirks_gpu |= MANUAL_POWER_CONTROL; + + return 0; +} + -+void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev) ++static int kbase_set_sc_quirks(struct kbase_device *kbdev, const u32 prod_id) +{ -+ if (kbdev->csf.user_reg.filp) { -+ struct page *page = as_page(kbdev->csf.user_reg.dummy_page); ++ u32 hw_quirks_sc = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(SHADER_CONFIG)); + -+ kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false); -+ fput(kbdev->csf.user_reg.filp); ++ if (kbase_is_gpu_removed(kbdev)) ++ return -EIO; ++ ++ if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */ ++ hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE; ++ else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */ ++ hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES; ++ ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_2968_TTRX_3162)) ++ hw_quirks_sc |= SC_VAR_ALGORITHM; ++ ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_TLS_HASHING)) ++ hw_quirks_sc |= SC_TLS_HASH_ENABLE; ++ ++ kbdev->hw_quirks_sc = hw_quirks_sc; ++ ++ return 0; ++} ++ ++static int kbase_set_tiler_quirks(struct kbase_device *kbdev) ++{ ++ u32 hw_quirks_tiler = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(TILER_CONFIG)); ++ ++ if (kbase_is_gpu_removed(kbdev)) ++ return -EIO; ++ ++ /* Set tiler clock gate override if required */ ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953)) ++ hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE; ++ ++ kbdev->hw_quirks_tiler = hw_quirks_tiler; ++ ++ return 0; ++} ++ ++static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) ++{ ++ struct device_node *np = kbdev->dev->of_node; ++ const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; ++ const u32 prod_id = ++ (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++ int error = 0; ++ ++ kbdev->hw_quirks_gpu = 0; ++ kbdev->hw_quirks_sc = 0; ++ kbdev->hw_quirks_tiler = 0; ++ kbdev->hw_quirks_mmu = 0; ++ ++ if (!of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) { ++ dev_info(kbdev->dev, ++ "Found quirks_gpu = [0x%x] in Devicetree\n", ++ kbdev->hw_quirks_gpu); ++ } else { ++ error = kbase_set_gpu_quirks(kbdev, prod_id); ++ if (error) ++ return error; ++ } ++ ++ if (!of_property_read_u32(np, "quirks_sc", ++ &kbdev->hw_quirks_sc)) { ++ dev_info(kbdev->dev, ++ "Found quirks_sc = [0x%x] in Devicetree\n", ++ kbdev->hw_quirks_sc); ++ } else { ++ error = kbase_set_sc_quirks(kbdev, prod_id); ++ if (error) ++ return error; ++ } ++ ++ if (!of_property_read_u32(np, "quirks_tiler", ++ &kbdev->hw_quirks_tiler)) { ++ dev_info(kbdev->dev, ++ "Found quirks_tiler = [0x%x] in Devicetree\n", ++ kbdev->hw_quirks_tiler); ++ } else { ++ error = kbase_set_tiler_quirks(kbdev); ++ if (error) ++ return error; ++ } ++ ++ if (!of_property_read_u32(np, "quirks_mmu", ++ &kbdev->hw_quirks_mmu)) { ++ dev_info(kbdev->dev, ++ "Found quirks_mmu = [0x%x] in Devicetree\n", ++ kbdev->hw_quirks_mmu); ++ } else { ++ error = kbase_set_mmu_quirks(kbdev); + } ++ ++ return error; +} + -+int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev) ++static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) +{ -+ struct tagged_addr phys; -+ struct file *filp; -+ struct page *page; -+ u32 *addr; ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), ++ kbdev->hw_quirks_sc); + -+ kbdev->csf.user_reg.filp = NULL; ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG), ++ kbdev->hw_quirks_tiler); + -+ filp = shmem_file_setup("mali csf user_reg", MAX_LFS_FILESIZE, VM_NORESERVE); -+ if (IS_ERR(filp)) { -+ dev_err(kbdev->dev, "failed to get an unlinked file for user_reg"); -+ return PTR_ERR(filp); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), ++ kbdev->hw_quirks_mmu); ++#if MALI_USE_CSF ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(CSF_CONFIG), ++ kbdev->hw_quirks_gpu); ++#else ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG), ++ kbdev->hw_quirks_gpu); ++#endif ++} ++ ++void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) ++{ ++ if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) && ++ !kbdev->cci_snoop_enabled) { ++#if IS_ENABLED(CONFIG_ARM64) ++ if (kbdev->snoop_enable_smc != 0) ++ kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0); ++#endif /* CONFIG_ARM64 */ ++ dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n"); ++ kbdev->cci_snoop_enabled = true; + } ++} + -+ if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys, -+ false, NULL) <= 0) { -+ fput(filp); -+ return -ENOMEM; ++void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) ++{ ++ if (kbdev->cci_snoop_enabled) { ++#if IS_ENABLED(CONFIG_ARM64) ++ if (kbdev->snoop_disable_smc != 0) { ++ mali_cci_flush_l2(kbdev); ++ kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0); ++ } ++#endif /* CONFIG_ARM64 */ ++ dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n"); ++ kbdev->cci_snoop_enabled = false; + } ++} + -+ page = as_page(phys); -+ addr = kmap_atomic(page); ++#if !MALI_USE_CSF ++static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev) ++{ ++ unsigned long irq_flags; + -+ /* Write a special value for the latest flush register inside the -+ * dummy page ++ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); ++ kbdev->protected_mode_hwcnt_desired = true; ++ if (kbdev->protected_mode_hwcnt_disabled) { ++ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); ++ kbdev->protected_mode_hwcnt_disabled = false; ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++} ++#endif ++ ++static int kbase_pm_do_reset(struct kbase_device *kbdev) ++{ ++ struct kbasep_reset_timeout_data rtdata; ++ int ret; ++ ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, 0); ++ ++ KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev, kbdev); ++ ++ if (kbdev->pm.backend.callback_soft_reset) { ++ ret = kbdev->pm.backend.callback_soft_reset(kbdev); ++ if (ret < 0) ++ return ret; ++ else if (ret > 0) ++ return 0; ++ } else { ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_SOFT_RESET); ++ } ++ ++ /* Unmask the reset complete interrupt only */ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED); ++ ++ /* Initialize a structure for tracking the status of the reset */ ++ rtdata.kbdev = kbdev; ++ rtdata.timed_out = false; ++ ++ /* Create a timer to use as a timeout on the reset */ ++ hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ rtdata.timer.function = kbasep_reset_timeout; ++ ++ hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), ++ HRTIMER_MODE_REL); ++ ++ /* Wait for the RESET_COMPLETED interrupt to be raised */ ++ kbase_pm_wait_for_reset(kbdev); ++ ++ if (!rtdata.timed_out) { ++ /* GPU has been reset */ ++ hrtimer_cancel(&rtdata.timer); ++ destroy_hrtimer_on_stack(&rtdata.timer); ++ return 0; ++ } ++ ++ /* No interrupt has been received - check if the RAWSTAT register says ++ * the reset has completed + */ -+ addr[LATEST_FLUSH / sizeof(u32)] = POWER_DOWN_LATEST_FLUSH_VALUE; ++ if ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & ++ RESET_COMPLETED)) { ++ /* The interrupt is set in the RAWSTAT; this suggests that the ++ * interrupts are not getting to the CPU ++ */ ++ dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n"); ++ /* If interrupts aren't working we can't continue. */ ++ destroy_hrtimer_on_stack(&rtdata.timer); ++ return -EINVAL; ++ } + -+ kbase_sync_single_for_device(kbdev, kbase_dma_addr(page) + LATEST_FLUSH, sizeof(u32), -+ DMA_BIDIRECTIONAL); -+ kunmap_atomic(addr); ++ if (kbase_is_gpu_removed(kbdev)) { ++ dev_dbg(kbdev->dev, "GPU has been removed, reset no longer needed.\n"); ++ destroy_hrtimer_on_stack(&rtdata.timer); ++ return -EINVAL; ++ } + -+ kbdev->csf.user_reg.filp = filp; -+ kbdev->csf.user_reg.dummy_page = phys; -+ kbdev->csf.user_reg.file_offset = 0; ++ /* The GPU doesn't seem to be responding to the reset so try a hard ++ * reset, but only when NOT in arbitration mode. ++ */ ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (!kbdev->arb.arb_if) { ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", ++ RESET_TIMEOUT); ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, 0); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_HARD_RESET); ++ ++ /* Restart the timer to wait for the hard reset to complete */ ++ rtdata.timed_out = false; ++ ++ hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), ++ HRTIMER_MODE_REL); ++ ++ /* Wait for the RESET_COMPLETED interrupt to be raised */ ++ kbase_pm_wait_for_reset(kbdev); ++ ++ if (!rtdata.timed_out) { ++ /* GPU has been reset */ ++ hrtimer_cancel(&rtdata.timer); ++ destroy_hrtimer_on_stack(&rtdata.timer); ++ return 0; ++ } ++ ++ destroy_hrtimer_on_stack(&rtdata.timer); ++ ++ dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", ++ RESET_TIMEOUT); ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ } ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ ++ return -EINVAL; ++} ++ ++int kbase_pm_protected_mode_enable(struct kbase_device *const kbdev) ++{ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_SET_PROTECTED_MODE); + return 0; +} + -+u8 kbase_csf_priority_check(struct kbase_device *kbdev, u8 req_priority) ++int kbase_pm_protected_mode_disable(struct kbase_device *const kbdev) +{ -+ struct priority_control_manager_device *pcm_device = kbdev->pcm_dev; -+ u8 out_priority = req_priority; ++ lockdep_assert_held(&kbdev->pm.lock); + -+ if (pcm_device) { -+ req_priority = kbase_csf_priority_queue_group_priority_to_relative(req_priority); -+ out_priority = pcm_device->ops.pcm_scheduler_priority_check(pcm_device, current, req_priority); -+ out_priority = kbase_csf_priority_relative_to_queue_group_priority(out_priority); ++ return kbase_pm_do_reset(kbdev); ++} ++ ++int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) ++{ ++ unsigned long irq_flags; ++ int err = 0; ++ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ lockdep_assert_held(&kbdev->pm.lock); ++ ++ /* Ensure the clock is on before attempting to access the hardware */ ++ if (!kbdev->pm.backend.gpu_powered) { ++ if (kbdev->pm.backend.callback_power_on) ++ kbdev->pm.backend.callback_power_on(kbdev); ++ ++ kbdev->pm.backend.gpu_powered = true; + } + -+ return out_priority; ++ /* Ensure interrupts are off to begin with, this also clears any ++ * outstanding interrupts ++ */ ++ kbase_pm_disable_interrupts(kbdev); ++ /* Ensure cache snoops are disabled before reset. */ ++ kbase_pm_cache_snoop_disable(kbdev); ++ /* Prepare for the soft-reset */ ++ kbdev->pm.backend.reset_done = false; ++ ++ /* The cores should be made unavailable due to the reset */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); ++ if (kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) ++ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, 0u); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++ ++ /* Soft reset the GPU */ ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (!(flags & PM_NO_RESET)) ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ err = kbdev->protected_ops->protected_mode_disable( ++ kbdev->protected_dev); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); ++#if MALI_USE_CSF ++ if (kbdev->protected_mode) { ++ unsigned long flags; ++ ++ kbase_ipa_control_protm_exited(kbdev); ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ } ++#endif ++ kbdev->protected_mode = false; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++ ++ if (err) ++ goto exit; ++ ++ if (flags & PM_HW_ISSUES_DETECT) { ++ err = kbase_pm_hw_issues_detect(kbdev); ++ if (err) ++ goto exit; ++ } ++ ++ kbase_pm_hw_issues_apply(kbdev); ++ kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); ++ ++ /* Sanity check protected mode was left after reset */ ++ WARN_ON(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & ++ GPU_STATUS_PROTECTED_MODE_ACTIVE); ++ ++ /* If cycle counter was in use re-enable it, enable_irqs will only be ++ * false when called from kbase_pm_powerup ++ */ ++ if (kbdev->pm.backend.gpu_cycle_counter_requests && ++ (flags & PM_ENABLE_IRQS)) { ++ kbase_pm_enable_interrupts(kbdev); ++ ++ /* Re-enable the counters if we need to */ ++ spin_lock_irqsave( ++ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, ++ irq_flags); ++ if (kbdev->pm.backend.gpu_cycle_counter_requests) ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CYCLE_COUNT_START); ++ spin_unlock_irqrestore( ++ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, ++ irq_flags); ++ ++ kbase_pm_disable_interrupts(kbdev); ++ } ++ ++ if (flags & PM_ENABLE_IRQS) ++ kbase_pm_enable_interrupts(kbdev); ++ ++exit: ++#if !MALI_USE_CSF ++ if (!kbdev->pm.backend.protected_entry_transition_override) { ++ /* Re-enable GPU hardware counters if we're resetting from ++ * protected mode. ++ */ ++ reenable_protected_mode_hwcnt(kbdev); ++ } ++#endif ++ ++ return err; +} -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h ++ ++/** ++ * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters ++ * @kbdev: The kbase device structure of the device ++ * ++ * Increase the count of cycle counter users and turn the cycle counters on if ++ * they were previously off ++ * ++ * This function is designed to be called by ++ * kbase_pm_request_gpu_cycle_counter() or ++ * kbase_pm_request_gpu_cycle_counter_l2_is_on() only ++ * ++ * When this function is called the l2 cache must be on - i.e., the GPU must be ++ * on. ++ */ ++static void ++kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, ++ flags); ++ ++kbdev->pm.backend.gpu_cycle_counter_requests; ++ ++ if (kbdev->pm.backend.gpu_cycle_counter_requests == 1) ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CYCLE_COUNT_START); ++ else { ++ /* This might happen after GPU reset. ++ * Then counter needs to be kicked. ++ */ ++#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ if (!(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & ++ GPU_STATUS_CYCLE_COUNT_ACTIVE)) { ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CYCLE_COUNT_START); ++ } ++#endif ++ } ++ ++ spin_unlock_irqrestore( ++ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, ++ flags); ++} ++ ++void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ ++ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); ++ ++ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < ++ INT_MAX); ++ ++ kbase_pm_wait_for_l2_powered(kbdev); ++ ++ kbase_pm_request_gpu_cycle_counter_do_request(kbdev); ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter); ++ ++void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ ++ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); ++ ++ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < ++ INT_MAX); ++ ++ kbase_pm_request_gpu_cycle_counter_do_request(kbdev); ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on); ++ ++void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, ++ flags); ++ ++ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0); ++ ++ --kbdev->pm.backend.gpu_cycle_counter_requests; ++ ++ if (kbdev->pm.backend.gpu_cycle_counter_requests == 0) ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CYCLE_COUNT_STOP); ++ ++ spin_unlock_irqrestore( ++ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, ++ flags); ++} ++ ++void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ kbase_pm_release_gpu_cycle_counter_nolock(kbdev); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter); +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h new file mode 100644 -index 000000000..dd947dcba +index 000000000..e66ce57d3 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h -@@ -0,0 +1,506 @@ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h +@@ -0,0 +1,1021 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -142441,698 +142787,1016 @@ index 000000000..dd947dcba + * + */ + -+#ifndef _KBASE_CSF_H_ -+#define _KBASE_CSF_H_ -+ -+#include "mali_kbase_csf_kcpu.h" -+#include "mali_kbase_csf_scheduler.h" -+#include "mali_kbase_csf_firmware.h" -+#include "mali_kbase_csf_protected_memory.h" -+#include "mali_kbase_hwaccess_time.h" -+ -+/* Indicate invalid CS h/w interface -+ */ -+#define KBASEP_IF_NR_INVALID ((s8)-1) -+ -+/* Indicate invalid CSG number for a GPU command queue group ++/* ++ * Power management API definitions used internally by GPU backend + */ -+#define KBASEP_CSG_NR_INVALID ((s8)-1) + -+/* Indicate invalid user doorbell number for a GPU command queue -+ */ -+#define KBASEP_USER_DB_NR_INVALID ((s8)-1) ++#ifndef _KBASE_BACKEND_PM_INTERNAL_H_ ++#define _KBASE_BACKEND_PM_INTERNAL_H_ + -+/* Number of pages used for GPU command queue's User input & output data */ -+#define KBASEP_NUM_CS_USER_IO_PAGES (2) ++#include + -+/* Indicates an invalid value for the scan out sequence number, used to -+ * signify there is no group that has protected mode execution pending. -+ */ -+#define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX) ++#include "backend/gpu/mali_kbase_pm_ca.h" ++#include "mali_kbase_pm_policy.h" + -+#define FIRMWARE_IDLE_HYSTERESIS_TIME_USEC (10000) /* Default 10 milliseconds */ + -+/* Idle hysteresis time can be scaled down when GPU sleep feature is used */ -+#define FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER (5) ++/** ++ * kbase_pm_dev_idle - The GPU is idle. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * The OS may choose to turn off idle devices ++ */ ++void kbase_pm_dev_idle(struct kbase_device *kbdev); + +/** -+ * kbase_csf_ctx_init - Initialize the CSF interface for a GPU address space. ++ * kbase_pm_dev_activate - The GPU is active. + * -+ * @kctx: Pointer to the kbase context which is being initialized. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * Return: 0 if successful or a negative error code on failure. ++ * The OS should avoid opportunistically turning off the GPU while it is active + */ -+int kbase_csf_ctx_init(struct kbase_context *kctx); ++void kbase_pm_dev_activate(struct kbase_device *kbdev); + +/** -+ * kbase_csf_ctx_handle_fault - Terminate queue groups & notify fault upon -+ * GPU bus fault, MMU page fault or similar. ++ * kbase_pm_get_present_cores - Get details of the cores that are present in ++ * the device. + * -+ * @kctx: Pointer to faulty kbase context. -+ * @fault: Pointer to the fault. ++ * @kbdev: The kbase device structure for the device (must be a valid ++ * pointer) ++ * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * -+ * This function terminates all GPU command queue groups in the context and -+ * notifies the event notification thread of the fault. ++ * This function can be called by the active power policy to return a bitmask of ++ * the cores (of a specified type) present in the GPU device and also a count of ++ * the number of cores. ++ * ++ * Return: The bit mask of cores present + */ -+void kbase_csf_ctx_handle_fault(struct kbase_context *kctx, -+ struct kbase_fault *fault); ++u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, ++ enum kbase_pm_core_type type); + +/** -+ * kbase_csf_ctx_term - Terminate the CSF interface for a GPU address space. ++ * kbase_pm_get_active_cores - Get details of the cores that are currently ++ * active in the device. + * -+ * @kctx: Pointer to the kbase context which is being terminated. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * -+ * This function terminates any remaining CSGs and CSs which weren't destroyed -+ * before context termination. ++ * This function can be called by the active power policy to return a bitmask of ++ * the cores (of a specified type) that are actively processing work (i.e. ++ * turned on *and* busy). ++ * ++ * Return: The bit mask of active cores + */ -+void kbase_csf_ctx_term(struct kbase_context *kctx); ++u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, ++ enum kbase_pm_core_type type); + +/** -+ * kbase_csf_queue_register - Register a GPU command queue. ++ * kbase_pm_get_trans_cores - Get details of the cores that are currently ++ * transitioning between power states. + * -+ * @kctx: Pointer to the kbase context within which the -+ * queue is to be registered. -+ * @reg: Pointer to the structure which contains details of the -+ * queue to be registered within the provided -+ * context. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * -+ * Return: 0 on success, or negative on failure. ++ * This function can be called by the active power policy to return a bitmask of ++ * the cores (of a specified type) that are currently transitioning between ++ * power states. ++ * ++ * Return: The bit mask of transitioning cores + */ -+int kbase_csf_queue_register(struct kbase_context *kctx, -+ struct kbase_ioctl_cs_queue_register *reg); ++u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, ++ enum kbase_pm_core_type type); + +/** -+ * kbase_csf_queue_register_ex - Register a GPU command queue with -+ * extended format. ++ * kbase_pm_get_ready_cores - Get details of the cores that are currently ++ * powered and ready for jobs. + * -+ * @kctx: Pointer to the kbase context within which the -+ * queue is to be registered. -+ * @reg: Pointer to the structure which contains details of the -+ * queue to be registered within the provided -+ * context, together with the extended parameter fields -+ * for supporting cs trace command. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * -+ * Return: 0 on success, or negative on failure. ++ * This function can be called by the active power policy to return a bitmask of ++ * the cores (of a specified type) that are powered and ready for jobs (they may ++ * or may not be currently executing jobs). ++ * ++ * Return: The bit mask of ready cores + */ -+int kbase_csf_queue_register_ex(struct kbase_context *kctx, -+ struct kbase_ioctl_cs_queue_register_ex *reg); ++u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, ++ enum kbase_pm_core_type type); + +/** -+ * kbase_csf_queue_terminate - Terminate a GPU command queue. ++ * kbase_pm_clock_on - Turn the clock for the device on, and enable device ++ * interrupts. + * -+ * @kctx: Pointer to the kbase context within which the -+ * queue is to be terminated. -+ * @term: Pointer to the structure which identifies which -+ * queue is to be terminated. ++ * @kbdev: The kbase device structure for the device (must be a valid ++ * pointer) ++ * @is_resume: true if clock on due to resume after suspend, false otherwise ++ * ++ * This function can be used by a power policy to turn the clock for the GPU on. ++ * It should be modified during integration to perform the necessary actions to ++ * ensure that the GPU is fully powered and clocked. + */ -+void kbase_csf_queue_terminate(struct kbase_context *kctx, -+ struct kbase_ioctl_cs_queue_terminate *term); ++void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume); + +/** -+ * kbase_csf_free_command_stream_user_pages() - Free the resources allocated -+ * for a queue at the time of bind. ++ * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the ++ * device off. + * -+ * @kctx: Address of the kbase context within which the queue was created. -+ * @queue: Pointer to the queue to be unlinked. ++ * @kbdev: The kbase device structure for the device (must be a valid ++ * pointer) + * -+ * This function will free the pair of physical pages allocated for a GPU -+ * command queue, and also release the hardware doorbell page, that were mapped -+ * into the process address space to enable direct submission of commands to -+ * the hardware. Also releases the reference taken on the queue when the mapping -+ * was created. ++ * This function can be used by a power policy to turn the clock for the GPU ++ * off. It should be modified during integration to perform the necessary ++ * actions to turn the clock off (if this is possible in the integration). + * -+ * If an explicit or implicit unbind was missed by the userspace then the -+ * mapping will persist. On process exit kernel itself will remove the mapping. ++ * If runtime PM is enabled and @power_runtime_gpu_idle_callback is used ++ * then this function would usually be invoked from the runtime suspend ++ * callback function. ++ * ++ * Return: true if clock was turned off, or ++ * false if clock can not be turned off due to pending page/bus fault ++ * workers. Caller must flush MMU workqueues and retry + */ -+void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, -+ struct kbase_queue *queue); ++bool kbase_pm_clock_off(struct kbase_device *kbdev); + +/** -+ * kbase_csf_alloc_command_stream_user_pages - Allocate resources for a -+ * GPU command queue. ++ * kbase_pm_enable_interrupts - Enable interrupts on the device. + * -+ * @kctx: Pointer to the kbase context within which the resources -+ * for the queue are being allocated. -+ * @queue: Pointer to the queue for which to allocate resources. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * This function allocates a pair of User mode input/output pages for a -+ * GPU command queue and maps them in the shared interface segment of MCU -+ * firmware address space. Also reserves a hardware doorbell page for the queue. ++ * Interrupts are also enabled after a call to kbase_pm_clock_on(). ++ */ ++void kbase_pm_enable_interrupts(struct kbase_device *kbdev); ++ ++/** ++ * kbase_pm_disable_interrupts - Disable interrupts on the device. + * -+ * Return: 0 on success, or negative on failure. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * This prevents delivery of Power Management interrupts to the CPU so that ++ * kbase_pm_update_state() will not be called from the IRQ handler ++ * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called. ++ * ++ * Interrupts are also disabled after a call to kbase_pm_clock_off(). + */ -+int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, -+ struct kbase_queue *queue); ++void kbase_pm_disable_interrupts(struct kbase_device *kbdev); + +/** -+ * kbase_csf_queue_bind - Bind a GPU command queue to a queue group. ++ * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts() ++ * that does not take the hwaccess_lock + * -+ * @kctx: The kbase context. -+ * @bind: Pointer to the union which specifies a queue group and a -+ * queue to be bound to that group. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * Return: 0 on success, or negative on failure. ++ * Caller must hold the hwaccess_lock. + */ -+int kbase_csf_queue_bind(struct kbase_context *kctx, -+ union kbase_ioctl_cs_queue_bind *bind); ++void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev); + +/** -+ * kbase_csf_queue_unbind - Unbind a GPU command queue from a queue group -+ * to which it has been bound and free -+ * resources allocated for this queue if there -+ * are any. ++ * kbase_pm_init_hw - Initialize the hardware. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @flags: Flags specifying the type of PM init + * -+ * @queue: Pointer to queue to be unbound. -+ * @process_exit: Flag to indicate if process exit is happening. ++ * This function checks the GPU ID register to ensure that the GPU is supported ++ * by the driver and performs a reset on the device so that it is in a known ++ * state before the device is used. ++ * ++ * Return: 0 if the device is supported and successfully reset. + */ -+void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit); ++int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags); + +/** -+ * kbase_csf_queue_unbind_stopped - Unbind a GPU command queue in the case -+ * where it was never started. -+ * @queue: Pointer to queue to be unbound. ++ * kbase_pm_reset_done - The GPU has been reset successfully. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * Variant of kbase_csf_queue_unbind() for use on error paths for cleaning up -+ * queues that failed to fully bind. ++ * This function must be called by the GPU interrupt handler when the ++ * RESET_COMPLETED bit is set. It signals to the power management initialization ++ * code that the GPU has been successfully reset. + */ -+void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue); ++void kbase_pm_reset_done(struct kbase_device *kbdev); + ++#if MALI_USE_CSF +/** -+ * kbase_csf_queue_kick - Schedule a GPU command queue on the firmware ++ * kbase_pm_wait_for_desired_state - Wait for the desired power state to be ++ * reached ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * @kctx: The kbase context. -+ * @kick: Pointer to the struct which specifies the queue -+ * that needs to be scheduled. ++ * Wait for the L2 and MCU state machines to reach the states corresponding ++ * to the values of 'kbase_pm_is_l2_desired' and 'kbase_pm_is_mcu_desired'. + * -+ * Return: 0 on success, or negative on failure. ++ * The usual use-case for this is to ensure that all parts of GPU have been ++ * powered up after performing a GPU Reset. ++ * ++ * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock, ++ * because this function will take that lock itself. ++ * ++ * NOTE: This may not wait until the correct state is reached if there is a ++ * power off in progress and kbase_pm_context_active() was called instead of ++ * kbase_csf_scheduler_pm_active(). ++ * ++ * Return: 0 on success, error code on error + */ -+int kbase_csf_queue_kick(struct kbase_context *kctx, -+ struct kbase_ioctl_cs_queue_kick *kick); ++int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); ++#else ++/** ++ * kbase_pm_wait_for_desired_state - Wait for the desired power state to be ++ * reached ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Wait for the L2 and shader power state machines to reach the states ++ * corresponding to the values of 'l2_desired' and 'shaders_desired'. ++ * ++ * The usual use-case for this is to ensure cores are 'READY' after performing ++ * a GPU Reset. ++ * ++ * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock, ++ * because this function will take that lock itself. ++ * ++ * NOTE: This may not wait until the correct state is reached if there is a ++ * power off in progress. To correctly wait for the desired state the caller ++ * must ensure that this is not the case by, for example, calling ++ * kbase_pm_wait_for_poweroff_work_complete() ++ * ++ * Return: 0 on success, error code on error ++ */ ++int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); ++#endif + +/** -+ * kbase_csf_queue_group_handle_is_valid - Find the queue group corresponding -+ * to the indicated handle. ++ * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on + * -+ * @kctx: The kbase context under which the queue group exists. -+ * @group_handle: Handle for the group which uniquely identifies it within -+ * the context with which it was created. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * This function is used to find the queue group when passed a handle. ++ * Wait for the L2 to be powered on, and for the L2 and the state machines of ++ * its dependent stack components to stabilise. + * -+ * Return: Pointer to a queue group on success, NULL on failure ++ * kbdev->pm.active_count must be non-zero when calling this function. ++ * ++ * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock, ++ * because this function will take that lock itself. ++ * ++ * Return: 0 on success, error code on error + */ -+struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle); ++int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev); + ++#if MALI_USE_CSF +/** -+ * kbase_csf_queue_group_handle_is_valid - Find if the given queue group handle -+ * is valid. ++ * kbase_pm_wait_for_cores_down_scale - Wait for the downscaling of shader cores + * -+ * @kctx: The kbase context under which the queue group exists. -+ * @group_handle: Handle for the group which uniquely identifies it within -+ * the context with which it was created. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * This function is used to determine if the queue group handle is valid. ++ * This function can be called to ensure that the downscaling of cores is ++ * effectively complete and it would be safe to lower the voltage. ++ * The function assumes that caller had exercised the MCU state machine for the ++ * downscale request through the kbase_pm_update_state() function. + * -+ * Return: 0 on success, or negative on failure. ++ * This function needs to be used by the caller to safely wait for the completion ++ * of downscale request, instead of kbase_pm_wait_for_desired_state(). ++ * The downscale request would trigger a state change in MCU state machine ++ * and so when MCU reaches the stable ON state, it can be inferred that ++ * downscaling is complete. But it has been observed that the wake up of the ++ * waiting thread can get delayed by few milli seconds and by the time the ++ * thread wakes up the power down transition could have started (after the ++ * completion of downscale request). ++ * On the completion of power down transition another wake up signal would be ++ * sent, but again by the time thread wakes up the power up transition can begin. ++ * And the power up transition could then get blocked inside the platform specific ++ * callback_power_on() function due to the thread that called into Kbase (from the ++ * platform specific code) to perform the downscaling and then ended up waiting ++ * for the completion of downscale request. ++ * ++ * Return: 0 on success, error code on error or remaining jiffies on timeout. + */ -+int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, -+ u8 group_handle); ++int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev); ++#endif + +/** -+ * kbase_csf_queue_group_create - Create a GPU command queue group. ++ * kbase_pm_update_dynamic_cores_onoff - Update the L2 and shader power state ++ * machines after changing shader core ++ * availability ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * @kctx: Pointer to the kbase context within which the -+ * queue group is to be created. -+ * @create: Pointer to the structure which contains details of the -+ * queue group which is to be created within the -+ * provided kbase context. ++ * It can be called in any status, so need to check the l2 and shader core ++ * power status in this function or it will break shader/l2 state machine + * -+ * Return: 0 on success, or negative on failure. ++ * Caller must hold hwaccess_lock + */ -+int kbase_csf_queue_group_create(struct kbase_context *kctx, -+ union kbase_ioctl_cs_queue_group_create *create); ++void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev); + +/** -+ * kbase_csf_queue_group_terminate - Terminate a GPU command queue group. ++ * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state() ++ * where the caller must hold ++ * kbase_device.hwaccess_lock + * -+ * @kctx: Pointer to the kbase context within which the -+ * queue group is to be terminated. -+ * @group_handle: Pointer to the structure which identifies the queue -+ * group which is to be terminated. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ -+void kbase_csf_queue_group_terminate(struct kbase_context *kctx, -+ u8 group_handle); ++void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev); + +/** -+ * kbase_csf_term_descheduled_queue_group - Terminate a GPU command queue -+ * group that is not operational -+ * inside the scheduler. ++ * kbase_pm_update_state - Update the L2 and shader power state machines ++ * @kbdev: Device pointer ++ */ ++void kbase_pm_update_state(struct kbase_device *kbdev); ++ ++/** ++ * kbase_pm_state_machine_init - Initialize the state machines, primarily the ++ * shader poweroff timer ++ * @kbdev: Device pointer + * -+ * @group: Pointer to the structure which identifies the queue -+ * group to be terminated. The function assumes that the caller -+ * is sure that the given group is not operational inside the -+ * scheduler. If in doubt, use its alternative: -+ * @ref kbase_csf_queue_group_terminate(). ++ * Return: 0 on success, error code on error + */ -+void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group); ++int kbase_pm_state_machine_init(struct kbase_device *kbdev); + -+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST +/** -+ * kbase_csf_queue_group_suspend - Suspend a GPU command queue group ++ * kbase_pm_state_machine_term - Clean up the PM state machines' data ++ * @kbdev: Device pointer ++ */ ++void kbase_pm_state_machine_term(struct kbase_device *kbdev); ++ ++/** ++ * kbase_pm_update_cores_state - Update the desired state of shader cores from ++ * the Power Policy, and begin any power ++ * transitions. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * @kctx: The kbase context for which the queue group is to be -+ * suspended. -+ * @sus_buf: Pointer to the structure which contains details of the -+ * user buffer and its kernel pinned pages. -+ * @group_handle: Handle for the group which uniquely identifies it within -+ * the context within which it was created. ++ * This function will update the desired_xx_state members of ++ * struct kbase_pm_device_data by calling into the current Power Policy. It will ++ * then begin power transitions to make the hardware acheive the desired shader ++ * core state. ++ */ ++void kbase_pm_update_cores_state(struct kbase_device *kbdev); ++ ++/** ++ * kbasep_pm_metrics_init - Initialize the metrics gathering framework. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * This function is used to suspend a queue group and copy the suspend buffer. ++ * This must be called before other metric gathering APIs are called. + * -+ * Return: 0 on success or negative value if failed to suspend -+ * queue group and copy suspend buffer contents. ++ * ++ * Return: 0 on success, error code on error + */ -+int kbase_csf_queue_group_suspend(struct kbase_context *kctx, -+ struct kbase_suspend_copy_buffer *sus_buf, u8 group_handle); -+#endif ++int kbasep_pm_metrics_init(struct kbase_device *kbdev); + +/** -+ * kbase_csf_add_group_fatal_error - Report a fatal group error to userspace ++ * kbasep_pm_metrics_term - Terminate the metrics gathering framework. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * @group: GPU command queue group. -+ * @err_payload: Error payload to report. ++ * This must be called when metric gathering is no longer required. It is an ++ * error to call any metrics gathering function (other than ++ * kbasep_pm_metrics_init()) after calling this function. + */ -+void kbase_csf_add_group_fatal_error( -+ struct kbase_queue_group *const group, -+ struct base_gpu_queue_group_error const *const err_payload); ++void kbasep_pm_metrics_term(struct kbase_device *kbdev); + +/** -+ * kbase_csf_interrupt - Handle interrupts issued by CSF firmware. ++ * kbase_pm_report_vsync - Function to be called by the frame buffer driver to ++ * update the vsync metric. ++ * @kbdev: The kbase device structure for the device (must be a ++ * valid pointer) ++ * @buffer_updated: True if the buffer has been updated on this VSync, ++ * false otherwise + * -+ * @kbdev: The kbase device to handle an IRQ for -+ * @val: The value of JOB IRQ status register which triggered the interrupt ++ * This function should be called by the frame buffer driver to update whether ++ * the system is hitting the vsync target or not. buffer_updated should be true ++ * if the vsync corresponded with a new frame being displayed, otherwise it ++ * should be false. This function does not need to be called every vsync, but ++ * only when the value of @buffer_updated differs from a previous call. + */ -+void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val); ++void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated); + +/** -+ * kbase_csf_doorbell_mapping_init - Initialize the fields that facilitates -+ * the update of userspace mapping of HW -+ * doorbell page. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change ++ * the clock speed of the GPU. + * -+ * The function creates a file and allocates a dummy page to facilitate the -+ * update of userspace mapping to point to the dummy page instead of the real -+ * HW doorbell page after the suspend of queue group. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * Return: 0 on success, or negative on failure. ++ * This function should be called regularly by the DVFS system to check whether ++ * the clock speed of the GPU needs updating. + */ -+int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev); ++void kbase_pm_get_dvfs_action(struct kbase_device *kbdev); + +/** -+ * kbase_csf_doorbell_mapping_term - Free the dummy page & close the file used -+ * to update the userspace mapping of HW doorbell page ++ * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is ++ * needed ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * If the caller is the first caller then the GPU cycle counters will be enabled ++ * along with the l2 cache ++ * ++ * The GPU must be powered when calling this function (i.e. ++ * kbase_pm_context_active() must have been called). + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ -+void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev); ++void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev); + +/** -+ * kbase_csf_setup_dummy_user_reg_page - Setup the dummy page that is accessed -+ * instead of the User register page after -+ * the GPU power down. ++ * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is ++ * needed (l2 cache already on) ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * This is a version of the above function ++ * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the ++ * l2 cache is known to be on and assured to be on until the subsequent call of ++ * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does ++ * not sleep and can be called from atomic functions. + * -+ * The function allocates a dummy page which is used to replace the User -+ * register page in the userspace mapping after the power down of GPU. -+ * On the power up of GPU, the mapping is updated to point to the real -+ * User register page. The mapping is used to allow access to LATEST_FLUSH -+ * register from userspace. ++ * The GPU must be powered when calling this function (i.e. ++ * kbase_pm_context_active() must have been called) and the l2 cache must be ++ * powered on. ++ */ ++void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev); ++ ++/** ++ * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no ++ * longer in use ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * Return: 0 on success, or negative on failure. ++ * If the caller is the last caller then the GPU cycle counters will be ++ * disabled. A request must have been made before a call to this. ++ * ++ * Caller must not hold the hwaccess_lock, as it will be taken in this function. ++ * If the caller is already holding this lock then ++ * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead. + */ -+int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev); ++void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev); + +/** -+ * kbase_csf_free_dummy_user_reg_page - Free the dummy page that was used -+ * to replace the User register page ++ * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter() ++ * that does not take hwaccess_lock ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * Caller must hold the hwaccess_lock. + */ -+void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev); ++void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev); + +/** -+ * kbase_csf_ring_csg_doorbell - ring the doorbell for a CSG interface. ++ * kbase_pm_wait_for_poweroff_work_complete - Wait for the poweroff workqueue to ++ * complete + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @slot: Index of CSG interface for ringing the door-bell. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * The function kicks a notification on the CSG interface to firmware. ++ * This function effectively just waits for the @gpu_poweroff_wait_work work ++ * item to complete, if it was enqueued. GPU may not have been powered down ++ * before this function returns. + */ -+void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot); ++void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev); + +/** -+ * kbase_csf_ring_csg_slots_doorbell - ring the doorbell for a set of CSG -+ * interfaces. ++ * kbase_pm_wait_for_gpu_power_down - Wait for the GPU power down to complete + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @slot_bitmap: bitmap for the given slots, slot-0 on bit-0, etc. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * The function kicks a notification on a set of CSG interfaces to firmware. ++ * This function waits for the actual gpu power down to complete. + */ -+void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, -+ u32 slot_bitmap); ++void kbase_pm_wait_for_gpu_power_down(struct kbase_device *kbdev); + +/** -+ * kbase_csf_ring_cs_kernel_doorbell - ring the kernel doorbell for a CSI -+ * assigned to a GPU queue ++ * kbase_pm_runtime_init - Initialize runtime-pm for Mali GPU platform device ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @csi_index: ID of the CSI assigned to the GPU queue. -+ * @csg_nr: Index of the CSG slot assigned to the queue -+ * group to which the GPU queue is bound. -+ * @ring_csg_doorbell: Flag to indicate if the CSG doorbell needs to be rung -+ * after updating the CSG_DB_REQ. So if this flag is false -+ * the doorbell interrupt will not be sent to FW. -+ * The flag is supposed be false only when the input page -+ * for bound GPU queues is programmed at the time of -+ * starting/resuming the group on a CSG slot. ++ * Setup the power management callbacks and initialize/enable the runtime-pm ++ * for the Mali GPU platform device, using the callback function. This must be ++ * called before the kbase_pm_register_access_enable() function. + * -+ * The function sends a doorbell interrupt notification to the firmware for -+ * a CSI assigned to a GPU queue. ++ * Return: 0 on success, error code on error + */ -+void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, -+ int csi_index, int csg_nr, -+ bool ring_csg_doorbell); ++int kbase_pm_runtime_init(struct kbase_device *kbdev); + +/** -+ * kbase_csf_ring_cs_user_doorbell - ring the user doorbell allocated for a -+ * queue. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @queue: Pointer to the queue for ringing the door-bell. ++ * kbase_pm_runtime_term - Disable runtime-pm for Mali GPU platform device + * -+ * The function kicks a notification to the firmware on the doorbell assigned -+ * to the queue. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ -+void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev, -+ struct kbase_queue *queue); ++void kbase_pm_runtime_term(struct kbase_device *kbdev); + +/** -+ * kbase_csf_active_queue_groups_reset - Reset the state of all active GPU -+ * command queue groups associated with the context. ++ * kbase_pm_register_access_enable - Enable access to GPU registers ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @kctx: The kbase context. ++ * Enables access to the GPU registers before power management has powered up ++ * the GPU with kbase_pm_powerup(). + * -+ * This function will iterate through all the active/scheduled GPU command -+ * queue groups associated with the context, deschedule and mark them as -+ * terminated (which will then lead to unbinding of all the queues bound to -+ * them) and also no more work would be allowed to execute for them. ++ * This results in the power management callbacks provided in the driver ++ * configuration to get called to turn on power and/or clocks to the GPU. See ++ * kbase_pm_callback_conf. + * -+ * This is similar to the action taken in response to an unexpected OoM event. ++ * This should only be used before power management is powered up with ++ * kbase_pm_powerup() + */ -+void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev, -+ struct kbase_context *kctx); ++void kbase_pm_register_access_enable(struct kbase_device *kbdev); + +/** -+ * kbase_csf_priority_check - Check the priority requested ++ * kbase_pm_register_access_disable - Disable early register access ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * @kbdev: Device pointer -+ * @req_priority: Requested priority ++ * Disables access to the GPU registers enabled earlier by a call to ++ * kbase_pm_register_access_enable(). + * -+ * This will determine whether the requested priority can be satisfied. ++ * This results in the power management callbacks provided in the driver ++ * configuration to get called to turn off power and/or clocks to the GPU. See ++ * kbase_pm_callback_conf + * -+ * Return: The same or lower priority than requested. ++ * This should only be used before power management is powered up with ++ * kbase_pm_powerup() + */ -+u8 kbase_csf_priority_check(struct kbase_device *kbdev, u8 req_priority); ++void kbase_pm_register_access_disable(struct kbase_device *kbdev); + -+extern const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT]; -+extern const u8 kbasep_csf_relative_to_queue_group_priority[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; ++/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline ++ * function ++ */ + +/** -+ * kbase_csf_priority_relative_to_queue_group_priority - Convert relative to base priority ++ * kbase_pm_metrics_is_active - Check if the power management metrics ++ * collection is active. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * @priority: kbase relative priority ++ * Note that this returns if the power management metrics collection was ++ * active at the time of calling, it is possible that after the call the metrics ++ * collection enable may have changed state. + * -+ * This will convert the monotonically increasing realtive priority to the -+ * fixed base priority list. ++ * The caller must handle the consequence that the state may have changed. + * -+ * Return: base_queue_group_priority priority. ++ * Return: true if metrics collection was active else false. + */ -+static inline u8 kbase_csf_priority_relative_to_queue_group_priority(u8 priority) -+{ -+ if (priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT) -+ priority = KBASE_QUEUE_GROUP_PRIORITY_LOW; -+ return kbasep_csf_relative_to_queue_group_priority[priority]; -+} ++bool kbase_pm_metrics_is_active(struct kbase_device *kbdev); + +/** -+ * kbase_csf_priority_queue_group_priority_to_relative - Convert base priority to relative -+ * -+ * @priority: base_queue_group_priority priority ++ * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested. + * -+ * This will convert the fixed base priority list to monotonically increasing realtive priority. ++ * @kbdev: The kbase device structure for the device (must be a valid ++ * pointer) ++ * @is_resume: true if power on due to resume after suspend, ++ * false otherwise ++ */ ++void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume); ++ ++/** ++ * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been ++ * requested. + * -+ * Return: kbase relative priority. ++ * @kbdev: The kbase device structure for the device (must be a valid ++ * pointer) + */ -+static inline u8 kbase_csf_priority_queue_group_priority_to_relative(u8 priority) -+{ -+ /* Apply low priority in case of invalid priority */ -+ if (priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT) -+ priority = BASE_QUEUE_GROUP_PRIORITY_LOW; -+ return kbasep_csf_queue_group_priority_to_relative[priority]; -+} ++void kbase_pm_do_poweroff(struct kbase_device *kbdev); ++ ++#if defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) ++void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, ++ struct kbasep_pm_metrics *last, ++ struct kbasep_pm_metrics *diff); ++#endif /* defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) */ ++ ++#ifdef CONFIG_MALI_BIFROST_DVFS + ++#if MALI_USE_CSF +/** -+ * kbase_csf_ktrace_gpu_cycle_cnt - Wrapper to retreive the GPU cycle counter -+ * value for Ktrace purpose. ++ * kbase_platform_dvfs_event - Report utilisation to DVFS code for CSF GPU + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @kbdev: The kbase device structure for the device (must be a ++ * valid pointer) ++ * @utilisation: The current calculated utilisation by the metrics system. + * -+ * This function is just a wrapper to retreive the GPU cycle counter value, to -+ * avoid any overhead on Release builds where Ktrace is disabled by default. ++ * Function provided by platform specific code when DVFS is enabled to allow ++ * the power management metrics system to report utilisation. + * -+ * Return: Snapshot of the GPU cycle count register. ++ * Return: Returns 0 on failure and non zero on success. + */ -+static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev) -+{ -+#if KBASE_KTRACE_ENABLE -+ return kbase_backend_get_cycle_cnt(kbdev); ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation); +#else -+ return 0; ++/** ++ * kbase_platform_dvfs_event - Report utilisation to DVFS code for JM GPU ++ * ++ * @kbdev: The kbase device structure for the device (must be a ++ * valid pointer) ++ * @utilisation: The current calculated utilisation by the metrics system. ++ * @util_gl_share: The current calculated gl share of utilisation. ++ * @util_cl_share: The current calculated cl share of utilisation per core ++ * group. ++ * Function provided by platform specific code when DVFS is enabled to allow ++ * the power management metrics system to report utilisation. ++ * ++ * Return: Returns 0 on failure and non zero on success. ++ */ ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, ++ u32 util_gl_share, u32 util_cl_share[2]); +#endif -+} + -+#endif /* _KBASE_CSF_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.c -new file mode 100644 -index 000000000..516a33ff7 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.c -@@ -0,0 +1,191 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* ++#endif /* CONFIG_MALI_BIFROST_DVFS */ ++ ++void kbase_pm_power_changed(struct kbase_device *kbdev); ++ ++/** ++ * kbase_pm_metrics_update - Inform the metrics system that an atom is either ++ * about to be run or has just completed. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @now: Pointer to the timestamp of the change, or NULL to use current time + * -+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * Caller must hold hwaccess_lock ++ */ ++void kbase_pm_metrics_update(struct kbase_device *kbdev, ++ ktime_t *now); ++ ++/** ++ * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU ++ * If the GPU does not have coherency this is a no-op ++ * @kbdev: Device pointer + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * This function should be called after L2 power up. ++ */ ++ ++void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev); ++ ++/** ++ * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU ++ * If the GPU does not have coherency this is a no-op ++ * @kbdev: Device pointer + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * This function should be called before L2 power off. ++ */ ++void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev); ++ ++#ifdef CONFIG_MALI_BIFROST_DEVFREQ ++/** ++ * kbase_devfreq_set_core_mask - Set devfreq core mask ++ * @kbdev: Device pointer ++ * @core_mask: New core mask + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * This function is used by devfreq to change the available core mask as ++ * required by Dynamic Core Scaling. ++ */ ++void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask); ++#endif ++ ++/** ++ * kbase_pm_reset_start_locked - Signal that GPU reset has started ++ * @kbdev: Device pointer + * ++ * Normal power management operation will be suspended until the reset has ++ * completed. ++ * ++ * Caller must hold hwaccess_lock. + */ ++void kbase_pm_reset_start_locked(struct kbase_device *kbdev); + -+#include "mali_kbase_csf_cpu_queue_debugfs.h" -+#include -+#include ++/** ++ * kbase_pm_reset_complete - Signal that GPU reset has completed ++ * @kbdev: Device pointer ++ * ++ * Normal power management operation will be resumed. The power manager will ++ * re-evaluate what cores are needed and power on or off as required. ++ */ ++void kbase_pm_reset_complete(struct kbase_device *kbdev); + -+#if IS_ENABLED(CONFIG_DEBUG_FS) ++#if !MALI_USE_CSF ++/** ++ * kbase_pm_protected_override_enable - Enable the protected mode override ++ * @kbdev: Device pointer ++ * ++ * When the protected mode override is enabled, all shader cores are requested ++ * to power down, and the L2 power state can be controlled by ++ * kbase_pm_protected_l2_override(). ++ * ++ * Caller must hold hwaccess_lock. ++ */ ++void kbase_pm_protected_override_enable(struct kbase_device *kbdev); + -+bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx, -+ struct base_csf_notification *req) -+{ -+ if (atomic_cmpxchg(&kctx->csf.cpu_queue.dump_req_status, -+ BASE_CSF_CPU_QUEUE_DUMP_ISSUED, -+ BASE_CSF_CPU_QUEUE_DUMP_PENDING) != -+ BASE_CSF_CPU_QUEUE_DUMP_ISSUED) { -+ return false; -+ } ++/** ++ * kbase_pm_protected_override_disable - Disable the protected mode override ++ * @kbdev: Device pointer ++ * ++ * Caller must hold hwaccess_lock. ++ */ ++void kbase_pm_protected_override_disable(struct kbase_device *kbdev); + -+ req->type = BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP; -+ return true; -+} ++/** ++ * kbase_pm_protected_l2_override - Control the protected mode L2 override ++ * @kbdev: Device pointer ++ * @override: true to enable the override, false to disable ++ * ++ * When the driver is transitioning in or out of protected mode, the L2 cache is ++ * forced to power off. This can be overridden to force the L2 cache to power ++ * on. This is required to change coherency settings on some GPUs. ++ */ ++void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override); + +/** -+ * kbasep_csf_cpu_queue_debugfs_show() - Print cpu queue information for per context ++ * kbase_pm_protected_entry_override_enable - Enable the protected mode entry ++ * override ++ * @kbdev: Device pointer + * -+ * @file: The seq_file for printing to -+ * @data: The debugfs dentry private data, a pointer to kbase_context ++ * Initiate a GPU reset and enable the protected mode entry override flag if ++ * l2_always_on WA is enabled and platform is fully coherent. If the GPU ++ * reset is already ongoing then protected mode entry override flag will not ++ * be enabled and function will have to be called again. + * -+ * Return: Negative error code or 0 on success. ++ * When protected mode entry override flag is enabled to power down L2 via GPU ++ * reset, the GPU reset handling behavior gets changed. For example call to ++ * kbase_backend_reset() is skipped, Hw counters are not re-enabled and L2 ++ * isn't powered up again post reset. ++ * This is needed only as a workaround for a Hw issue where explicit power down ++ * of L2 causes a glitch. For entering protected mode on fully coherent ++ * platforms L2 needs to be powered down to switch to IO coherency mode, so to ++ * avoid the glitch GPU reset is used to power down L2. Hence, this function ++ * does nothing on systems where the glitch issue isn't present. ++ * ++ * Caller must hold hwaccess_lock. Should be only called during the transition ++ * to enter protected mode. ++ * ++ * Return: -EAGAIN if a GPU reset was required for the glitch workaround but ++ * was already ongoing, otherwise 0. + */ -+static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data) -+{ -+ struct kbase_context *kctx = file->private; -+ -+ mutex_lock(&kctx->csf.lock); -+ if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) != -+ BASE_CSF_CPU_QUEUE_DUMP_COMPLETE) { -+ seq_puts(file, "Dump request already started! (try again)\n"); -+ mutex_unlock(&kctx->csf.lock); -+ return -EBUSY; -+ } ++int kbase_pm_protected_entry_override_enable(struct kbase_device *kbdev); + -+ atomic_set(&kctx->csf.cpu_queue.dump_req_status, BASE_CSF_CPU_QUEUE_DUMP_ISSUED); -+ init_completion(&kctx->csf.cpu_queue.dump_cmp); -+ kbase_event_wakeup(kctx); -+ mutex_unlock(&kctx->csf.lock); ++/** ++ * kbase_pm_protected_entry_override_disable - Disable the protected mode entry ++ * override ++ * @kbdev: Device pointer ++ * ++ * This shall be called once L2 has powered down and switch to IO coherency ++ * mode has been made. As with kbase_pm_protected_entry_override_enable(), ++ * this function does nothing on systems where the glitch issue isn't present. ++ * ++ * Caller must hold hwaccess_lock. Should be only called during the transition ++ * to enter protected mode. ++ */ ++void kbase_pm_protected_entry_override_disable(struct kbase_device *kbdev); ++#endif + -+ seq_puts(file, -+ "CPU Queues table (version:v" __stringify(MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION) "):\n"); ++/* If true, the driver should explicitly control corestack power management, ++ * instead of relying on the Power Domain Controller. ++ */ ++extern bool corestack_driver_control; + -+ wait_for_completion_timeout(&kctx->csf.cpu_queue.dump_cmp, -+ msecs_to_jiffies(3000)); ++/** ++ * kbase_pm_is_l2_desired - Check whether l2 is desired ++ * ++ * @kbdev: Device pointer ++ * ++ * This shall be called to check whether l2 is needed to power on ++ * ++ * Return: true if l2 need to power on ++ */ ++bool kbase_pm_is_l2_desired(struct kbase_device *kbdev); + -+ mutex_lock(&kctx->csf.lock); -+ if (kctx->csf.cpu_queue.buffer) { -+ WARN_ON(atomic_read(&kctx->csf.cpu_queue.dump_req_status) != -+ BASE_CSF_CPU_QUEUE_DUMP_PENDING); ++#if MALI_USE_CSF ++/** ++ * kbase_pm_is_mcu_desired - Check whether MCU is desired ++ * ++ * @kbdev: Device pointer ++ * ++ * This shall be called to check whether MCU needs to be enabled. ++ * ++ * Return: true if MCU needs to be enabled. ++ */ ++bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev); + -+ seq_printf(file, "%s\n", kctx->csf.cpu_queue.buffer); ++/** ++ * kbase_pm_is_mcu_inactive - Check if the MCU is inactive (i.e. either ++ * it is disabled or it is in sleep) ++ * ++ * @kbdev: kbase device ++ * @state: state of the MCU state machine. ++ * ++ * This function must be called with hwaccess_lock held. ++ * L2 cache can be turned off if this function returns true. ++ * ++ * Return: true if MCU is inactive ++ */ ++bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, ++ enum kbase_mcu_state state); + -+ kfree(kctx->csf.cpu_queue.buffer); -+ kctx->csf.cpu_queue.buffer = NULL; -+ kctx->csf.cpu_queue.buffer_size = 0; -+ } else -+ seq_puts(file, "Dump error! (time out)\n"); ++/** ++ * kbase_pm_idle_groups_sched_suspendable - Check whether the scheduler can be ++ * suspended to low power state when all ++ * the CSGs are idle ++ * ++ * @kbdev: Device pointer ++ * ++ * Return: true if allowed to enter the suspended state. ++ */ ++static inline ++bool kbase_pm_idle_groups_sched_suspendable(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ atomic_set(&kctx->csf.cpu_queue.dump_req_status, -+ BASE_CSF_CPU_QUEUE_DUMP_COMPLETE); ++ return !(kbdev->pm.backend.csf_pm_sched_flags & ++ CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE); ++} + -+ mutex_unlock(&kctx->csf.lock); -+ return 0; ++/** ++ * kbase_pm_no_runnables_sched_suspendable - Check whether the scheduler can be ++ * suspended to low power state when ++ * there are no runnable CSGs. ++ * ++ * @kbdev: Device pointer ++ * ++ * Return: true if allowed to enter the suspended state. ++ */ ++static inline ++bool kbase_pm_no_runnables_sched_suspendable(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ return !(kbdev->pm.backend.csf_pm_sched_flags & ++ CSF_DYNAMIC_PM_SCHED_NO_SUSPEND); +} + -+static int kbasep_csf_cpu_queue_debugfs_open(struct inode *in, struct file *file) ++/** ++ * kbase_pm_no_mcu_core_pwroff - Check whether the PM is required to keep the ++ * MCU shader Core powered in accordance to the active ++ * power management policy ++ * ++ * @kbdev: Device pointer ++ * ++ * Return: true if the MCU is to retain powered. ++ */ ++static inline bool kbase_pm_no_mcu_core_pwroff(struct kbase_device *kbdev) +{ -+ return single_open(file, kbasep_csf_cpu_queue_debugfs_show, in->i_private); ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ return kbdev->pm.backend.csf_pm_sched_flags & ++ CSF_DYNAMIC_PM_CORE_KEEP_ON; +} + -+static const struct file_operations kbasep_csf_cpu_queue_debugfs_fops = { -+ .open = kbasep_csf_cpu_queue_debugfs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; ++/** ++ * kbase_pm_mcu_is_in_desired_state - Check if MCU is in stable ON/OFF state. ++ * ++ * @kbdev: Device pointer ++ * ++ * Return: true if MCU is in stable ON/OFF state. ++ */ ++static inline bool kbase_pm_mcu_is_in_desired_state(struct kbase_device *kbdev) ++{ ++ bool in_desired_state = true; + -+void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx) ++ if (kbase_pm_is_mcu_desired(kbdev) && kbdev->pm.backend.mcu_state != KBASE_MCU_ON) ++ in_desired_state = false; ++ else if (!kbase_pm_is_mcu_desired(kbdev) && ++ (kbdev->pm.backend.mcu_state != KBASE_MCU_OFF) && ++ (kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP)) ++ in_desired_state = false; ++ ++ return in_desired_state; ++} ++ ++#endif ++ ++/** ++ * kbase_pm_l2_is_in_desired_state - Check if L2 is in stable ON/OFF state. ++ * ++ * @kbdev: Device pointer ++ * ++ * Return: true if L2 is in stable ON/OFF state. ++ */ ++static inline bool kbase_pm_l2_is_in_desired_state(struct kbase_device *kbdev) +{ -+ struct dentry *file; ++ bool in_desired_state = true; + -+ if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) -+ return; ++ if (kbase_pm_is_l2_desired(kbdev) && kbdev->pm.backend.l2_state != KBASE_L2_ON) ++ in_desired_state = false; ++ else if (!kbase_pm_is_l2_desired(kbdev) && kbdev->pm.backend.l2_state != KBASE_L2_OFF) ++ in_desired_state = false; + -+ file = debugfs_create_file("cpu_queue", 0444, kctx->kctx_dentry, -+ kctx, &kbasep_csf_cpu_queue_debugfs_fops); ++ return in_desired_state; ++} + -+ if (IS_ERR_OR_NULL(file)) { -+ dev_warn(kctx->kbdev->dev, -+ "Unable to create cpu queue debugfs entry"); -+ } ++/** ++ * kbase_pm_lock - Lock all necessary mutexes to perform PM actions ++ * ++ * @kbdev: Device pointer ++ * ++ * This function locks correct mutexes independent of GPU architecture. ++ */ ++static inline void kbase_pm_lock(struct kbase_device *kbdev) ++{ ++#if !MALI_USE_CSF ++ mutex_lock(&kbdev->js_data.runpool_mutex); ++#endif /* !MALI_USE_CSF */ ++ mutex_lock(&kbdev->pm.lock); ++} + -+ kctx->csf.cpu_queue.buffer = NULL; -+ kctx->csf.cpu_queue.buffer_size = 0; -+ atomic_set(&kctx->csf.cpu_queue.dump_req_status, -+ BASE_CSF_CPU_QUEUE_DUMP_COMPLETE); ++/** ++ * kbase_pm_unlock - Unlock mutexes locked by kbase_pm_lock ++ * ++ * @kbdev: Device pointer ++ */ ++static inline void kbase_pm_unlock(struct kbase_device *kbdev) ++{ ++ mutex_unlock(&kbdev->pm.lock); ++#if !MALI_USE_CSF ++ mutex_unlock(&kbdev->js_data.runpool_mutex); ++#endif /* !MALI_USE_CSF */ +} + -+int kbase_csf_cpu_queue_dump(struct kbase_context *kctx, -+ u64 buffer, size_t buf_size) ++#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) ++/** ++ * kbase_pm_gpu_sleep_allowed - Check if the GPU is allowed to be put in sleep ++ * ++ * @kbdev: Device pointer ++ * ++ * This function is called on GPU idle notification and if it returns false then ++ * GPU power down will be triggered by suspending the CSGs and halting the MCU. ++ * ++ * Return: true if the GPU is allowed to be in the sleep state. ++ */ ++static inline bool kbase_pm_gpu_sleep_allowed(struct kbase_device *kbdev) +{ -+ int err = 0; ++ /* If the autosuspend_delay has been set to 0 then it doesn't make ++ * sense to first put GPU to sleep state and then power it down, ++ * instead would be better to power it down right away. ++ * Also need to do the same when autosuspend_delay is set to a negative ++ * value, which implies that runtime pm is effectively disabled by the ++ * kernel. ++ * A high positive value of autosuspend_delay can be used to keep the ++ * GPU in sleep state for a long time. ++ */ ++ if (unlikely(!kbdev->dev->power.autosuspend_delay || ++ (kbdev->dev->power.autosuspend_delay < 0))) ++ return false; + -+ size_t alloc_size = buf_size; -+ char *dump_buffer; ++ return kbdev->pm.backend.gpu_sleep_supported; ++} + -+ if (!buffer || !alloc_size) -+ goto done; ++/** ++ * kbase_pm_enable_db_mirror_interrupt - Enable the doorbell mirror interrupt to ++ * detect the User doorbell rings. ++ * ++ * @kbdev: Device pointer ++ * ++ * This function is called just before sending the sleep request to MCU firmware ++ * so that User doorbell rings can be detected whilst GPU remains in the sleep ++ * state. ++ * ++ */ ++static inline void kbase_pm_enable_db_mirror_interrupt(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ alloc_size = (alloc_size + PAGE_SIZE) & ~(PAGE_SIZE - 1); -+ dump_buffer = kzalloc(alloc_size, GFP_KERNEL); -+ if (ZERO_OR_NULL_PTR(dump_buffer)) { -+ err = -ENOMEM; -+ goto done; -+ } ++ if (!kbdev->pm.backend.db_mirror_interrupt_enabled) { ++ u32 irq_mask = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_IRQ_MASK)); + -+ WARN_ON(kctx->csf.cpu_queue.buffer != NULL); ++ WARN_ON(irq_mask & DOORBELL_MIRROR); + -+ err = copy_from_user(dump_buffer, -+ u64_to_user_ptr(buffer), -+ buf_size); -+ if (err) { -+ kfree(dump_buffer); -+ err = -EFAULT; -+ goto done; ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), ++ irq_mask | DOORBELL_MIRROR); ++ kbdev->pm.backend.db_mirror_interrupt_enabled = true; + } ++} + -+ mutex_lock(&kctx->csf.lock); ++/** ++ * kbase_pm_disable_db_mirror_interrupt - Disable the doorbell mirror interrupt. ++ * ++ * @kbdev: Device pointer ++ * ++ * This function is called when doorbell mirror interrupt is received or MCU ++ * needs to be reactivated by enabling the doorbell notification. ++ */ ++static inline void kbase_pm_disable_db_mirror_interrupt(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ kfree(kctx->csf.cpu_queue.buffer); ++ if (kbdev->pm.backend.db_mirror_interrupt_enabled) { ++ u32 irq_mask = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_IRQ_MASK)); + -+ if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) == -+ BASE_CSF_CPU_QUEUE_DUMP_PENDING) { -+ kctx->csf.cpu_queue.buffer = dump_buffer; -+ kctx->csf.cpu_queue.buffer_size = buf_size; -+ complete_all(&kctx->csf.cpu_queue.dump_cmp); -+ } else { -+ kfree(dump_buffer); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), ++ irq_mask & ~DOORBELL_MIRROR); ++ kbdev->pm.backend.db_mirror_interrupt_enabled = false; + } -+ -+ mutex_unlock(&kctx->csf.lock); -+done: -+ return err; +} -+#else -+/* -+ * Stub functions for when debugfs is disabled ++#endif ++ ++/** ++ * kbase_pm_l2_allow_mmu_page_migration - L2 state allows MMU page migration or not ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Check whether the L2 state is in power transition phase or not. If it is, the MMU ++ * page migration should be deferred. The caller must hold hwaccess_lock, and, if MMU ++ * page migration is intended, immediately start the MMU migration action without ++ * dropping the lock. When page migration begins, a flag is set in kbdev that would ++ * prevent the L2 state machine traversing into power transition phases, until ++ * the MMU migration action ends. ++ * ++ * Return: true if MMU page migration is allowed + */ -+void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx) ++static inline bool kbase_pm_l2_allow_mmu_page_migration(struct kbase_device *kbdev) +{ -+} ++ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + -+bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx, -+ struct base_csf_notification *req) -+{ -+ return false; -+} ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+int kbase_csf_cpu_queue_dump(struct kbase_context *kctx, -+ u64 buffer, size_t buf_size) -+{ -+ return 0; ++ return (backend->l2_state != KBASE_L2_PEND_ON && backend->l2_state != KBASE_L2_PEND_OFF); +} -+#endif /* CONFIG_DEBUG_FS */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.h ++ ++#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_l2_states.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_l2_states.h new file mode 100644 -index 000000000..435a99395 +index 000000000..ef72f6083 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.h -@@ -0,0 +1,90 @@ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_l2_states.h +@@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -143150,85 +143814,159 @@ index 000000000..435a99395 + * + */ + -+#ifndef _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_ -+#define _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_ -+ -+#include -+#include -+ -+#include "mali_kbase.h" -+ -+/* Forward declaration */ -+struct base_csf_notification; -+ -+#define MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION 0 -+ -+/* CPU queue dump status */ -+/* Dumping is done or no dumping is in progress. */ -+#define BASE_CSF_CPU_QUEUE_DUMP_COMPLETE 0 -+/* Dumping request is pending. */ -+#define BASE_CSF_CPU_QUEUE_DUMP_PENDING 1 -+/* Dumping request is issued to Userspace */ -+#define BASE_CSF_CPU_QUEUE_DUMP_ISSUED 2 -+ -+ -+/** -+ * kbase_csf_cpu_queue_debugfs_init() - Create a debugfs entry for per context cpu queue(s) ++/* ++ * Backend-specific Power Manager level 2 cache state definitions. ++ * The function-like macro KBASEP_L2_STATE() must be defined before including ++ * this header file. This header file can be included multiple times in the ++ * same compilation unit with different definitions of KBASEP_L2_STATE(). + * -+ * @kctx: The kbase_context for which to create the debugfs entry ++ * @OFF: The L2 cache and tiler are off ++ * @PEND_ON: The L2 cache and tiler are powering on ++ * @RESTORE_CLOCKS: The GPU clock is restored. Conditionally used. ++ * @ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being enabled ++ * @ON: The L2 cache and tiler are on, and hwcnt is enabled ++ * @ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being disabled ++ * @SLOW_DOWN_CLOCKS: The GPU clock is set to appropriate or lowest clock. ++ * Conditionally used. ++ * @POWER_DOWN: The L2 cache and tiler are about to be powered off ++ * @PEND_OFF: The L2 cache and tiler are powering off ++ * @RESET_WAIT: The GPU is resetting, L2 cache and tiler power state are ++ * unknown + */ -+void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx); -+ -+/** -+ * kbase_csf_cpu_queue_read_dump_req - Read cpu queue dump request event ++KBASEP_L2_STATE(OFF) ++KBASEP_L2_STATE(PEND_ON) ++KBASEP_L2_STATE(RESTORE_CLOCKS) ++KBASEP_L2_STATE(ON_HWCNT_ENABLE) ++KBASEP_L2_STATE(ON) ++KBASEP_L2_STATE(ON_HWCNT_DISABLE) ++KBASEP_L2_STATE(SLOW_DOWN_CLOCKS) ++KBASEP_L2_STATE(POWER_DOWN) ++KBASEP_L2_STATE(PEND_OFF) ++KBASEP_L2_STATE(RESET_WAIT) +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h +new file mode 100644 +index 000000000..3b448e397 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h +@@ -0,0 +1,108 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * @kctx: The kbase_context which cpu queue dumpped belongs to -+ * @req: Notification with cpu queue dump request. ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * -+ * Return: true if needs CPU queue dump, or false otherwise. -+ */ -+bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx, -+ struct base_csf_notification *req); -+ -+/** -+ * kbase_csf_cpu_queue_dump_needed - Check the requirement for cpu queue dump ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * @kctx: The kbase_context which cpu queue dumpped belongs to ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: true if it needs cpu queue dump, or false otherwise. + */ -+static inline bool kbase_csf_cpu_queue_dump_needed(struct kbase_context *kctx) -+{ -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ return (atomic_read(&kctx->csf.cpu_queue.dump_req_status) == -+ BASE_CSF_CPU_QUEUE_DUMP_ISSUED); -+#else -+ return false; -+#endif -+} + -+/** -+ * kbase_csf_cpu_queue_dump - dump buffer containing cpu queue information to debugfs -+ * -+ * @kctx: The kbase_context which cpu queue dumpped belongs to -+ * @buffer: Buffer containing the cpu queue information. -+ * @buf_size: Buffer size. ++/* ++ * Backend-specific Power Manager MCU state definitions. ++ * The function-like macro KBASEP_MCU_STATE() must be defined before including ++ * this header file. This header file can be included multiple times in the ++ * same compilation unit with different definitions of KBASEP_MCU_STATE(). + * -+ * Return: Return 0 for dump successfully, or error code. ++ * @OFF: The MCU is powered off. ++ * @PEND_ON_RELOAD: The warm boot of MCU or cold boot of MCU (with ++ * firmware reloading) is in progress. ++ * @ON_GLB_REINIT_PEND: The MCU is enabled and Global configuration ++ * requests have been sent to the firmware. ++ * @ON_HWCNT_ENABLE: The Global requests have completed and MCU is now ++ * ready for use and hwcnt is being enabled. ++ * @ON: The MCU is active and hwcnt has been enabled. ++ * @ON_CORE_ATTR_UPDATE_PEND: The MCU is active and mask of enabled shader cores ++ * is being updated. ++ * @ON_HWCNT_DISABLE: The MCU is on and hwcnt is being disabled. ++ * @ON_HALT: The MCU is on and hwcnt has been disabled, MCU ++ * halt would be triggered. ++ * @ON_PEND_HALT: MCU halt in progress, confirmation pending. ++ * @POWER_DOWN: MCU halted operations, pending being disabled. ++ * @PEND_OFF: MCU is being disabled, pending on powering off. ++ * @RESET_WAIT: The GPU is resetting, MCU state is unknown. ++ * @HCTL_SHADERS_PEND_ON: Global configuration requests sent to the firmware ++ * have completed and shaders have been requested to ++ * power on. ++ * @HCTL_CORES_NOTIFY_PEND: Shader cores have powered up and firmware is being ++ * notified of the mask of enabled shader cores. ++ * @HCTL_MCU_ON_RECHECK: MCU is on and hwcnt disabling is triggered ++ * and checks are done to update the number of ++ * enabled cores. ++ * @HCTL_SHADERS_READY_OFF: MCU has halted and cores need to be powered down ++ * @HCTL_SHADERS_PEND_OFF: Cores are transitioning to power down. ++ * @HCTL_CORES_DOWN_SCALE_NOTIFY_PEND: Firmware has been informed to stop using ++ * specific cores, due to core_mask change request. ++ * After the ACK from FW, the wait will be done for ++ * undesired cores to become inactive. ++ * @HCTL_CORE_INACTIVE_PEND: Waiting for specific cores to become inactive. ++ * Once the cores become inactive their power down ++ * will be initiated. ++ * @HCTL_SHADERS_CORE_OFF_PEND: Waiting for specific cores to complete the ++ * transition to power down. Once powered down, ++ * HW counters will be re-enabled. ++ * @ON_SLEEP_INITIATE: MCU is on and hwcnt has been disabled and MCU ++ * is being put to sleep. ++ * @ON_PEND_SLEEP: MCU sleep is in progress. ++ * @IN_SLEEP: Sleep request is completed and MCU has halted. ++ * @ON_PMODE_ENTER_CORESIGHT_DISABLE: The MCU is on, protected mode enter is about to ++ * be requested, Coresight is being disabled. ++ * @ON_PMODE_EXIT_CORESIGHT_ENABLE : The MCU is on, protected mode exit has happened ++ * Coresight is being enabled. ++ * @CORESIGHT_DISABLE: The MCU is on and Coresight is being disabled. ++ * @CORESIGHT_ENABLE: The MCU is on, host does not have control and ++ * Coresight is being enabled. + */ -+int kbase_csf_cpu_queue_dump(struct kbase_context *kctx, -+ u64 buffer, size_t buf_size); -+#endif /* _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c ++KBASEP_MCU_STATE(OFF) ++KBASEP_MCU_STATE(PEND_ON_RELOAD) ++KBASEP_MCU_STATE(ON_GLB_REINIT_PEND) ++KBASEP_MCU_STATE(ON_HWCNT_ENABLE) ++KBASEP_MCU_STATE(ON) ++KBASEP_MCU_STATE(ON_CORE_ATTR_UPDATE_PEND) ++KBASEP_MCU_STATE(ON_HWCNT_DISABLE) ++KBASEP_MCU_STATE(ON_HALT) ++KBASEP_MCU_STATE(ON_PEND_HALT) ++KBASEP_MCU_STATE(POWER_DOWN) ++KBASEP_MCU_STATE(PEND_OFF) ++KBASEP_MCU_STATE(RESET_WAIT) ++/* Additional MCU states with HOST_CONTROL_SHADERS */ ++KBASEP_MCU_STATE(HCTL_SHADERS_PEND_ON) ++KBASEP_MCU_STATE(HCTL_CORES_NOTIFY_PEND) ++KBASEP_MCU_STATE(HCTL_MCU_ON_RECHECK) ++KBASEP_MCU_STATE(HCTL_SHADERS_READY_OFF) ++KBASEP_MCU_STATE(HCTL_SHADERS_PEND_OFF) ++KBASEP_MCU_STATE(HCTL_CORES_DOWN_SCALE_NOTIFY_PEND) ++KBASEP_MCU_STATE(HCTL_CORE_INACTIVE_PEND) ++KBASEP_MCU_STATE(HCTL_SHADERS_CORE_OFF_PEND) ++/* Additional MCU states to support GPU sleep feature */ ++KBASEP_MCU_STATE(ON_SLEEP_INITIATE) ++KBASEP_MCU_STATE(ON_PEND_SLEEP) ++KBASEP_MCU_STATE(IN_SLEEP) ++#if IS_ENABLED(CONFIG_MALI_CORESIGHT) ++/* Additional MCU states for Coresight */ ++KBASEP_MCU_STATE(ON_PMODE_ENTER_CORESIGHT_DISABLE) ++KBASEP_MCU_STATE(ON_PMODE_EXIT_CORESIGHT_ENABLE) ++KBASEP_MCU_STATE(CORESIGHT_DISABLE) ++KBASEP_MCU_STATE(CORESIGHT_ENABLE) ++#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c new file mode 100644 -index 000000000..e96044ae6 +index 000000000..865f526f6 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c -@@ -0,0 +1,767 @@ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c +@@ -0,0 +1,529 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -143246,762 +143984,956 @@ index 000000000..e96044ae6 + * + */ + -+#include "mali_kbase_csf_csg_debugfs.h" ++/* ++ * Metrics for power management ++ */ ++ +#include -+#include -+#include ++#include ++#include +#include + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+#include "mali_kbase_csf_tl_reader.h" ++#if MALI_USE_CSF ++#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" ++#include ++#else ++#include ++#endif /* !MALI_USE_CSF */ + -+/* Wait time to be used cumulatively for all the CSG slots. -+ * Since scheduler lock is held when STATUS_UPDATE request is sent, there won't be -+ * any other Host request pending on the FW side and usually FW would be responsive -+ * to the Doorbell IRQs as it won't do any polling for a long time and also it won't -+ * have to wait for any HW state transition to complete for publishing the status. -+ * So it is reasonable to expect that handling of STATUS_UPDATE request would be -+ * relatively very quick. ++#include ++#include ++ ++#if defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) || !MALI_USE_CSF ++/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns ++ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly ++ * under 11s. Exceeding this will cause overflow + */ -+#define STATUS_UPDATE_WAIT_TIMEOUT 500 ++#define KBASE_PM_TIME_SHIFT 8 ++#endif + -+/* The bitmask of CSG slots for which the STATUS_UPDATE request completed. -+ * The access to it is serialized with scheduler lock, so at a time it would -+ * get used either for "active_groups" or per context "groups" debugfs file. ++#if MALI_USE_CSF ++/* To get the GPU_ACTIVE value in nano seconds unit */ ++#define GPU_ACTIVE_SCALING_FACTOR ((u64)1E9) ++#endif ++ ++/* ++ * Possible state transitions ++ * ON -> ON | OFF | STOPPED ++ * STOPPED -> ON | OFF ++ * OFF -> ON ++ * ++ * ++ * ┌─e─â”┌────────────f─────────────┠++ * │ v│ v ++ * └───ON ──a──> STOPPED ──b──> OFF ++ * ^^ │ │ ++ * │└──────c─────┘ │ ++ * │ │ ++ * └─────────────d─────────────┘ ++ * ++ * Transition effects: ++ * a. None ++ * b. Timer expires without restart ++ * c. Timer is not stopped, timer period is unaffected ++ * d. Timer must be restarted ++ * e. Callback is executed and the timer is restarted ++ * f. Timer is cancelled, or the callback is waited on if currently executing. This is called during ++ * tear-down and should not be subject to a race from an OFF->ON transition + */ -+static DECLARE_BITMAP(csg_slots_status_updated, MAX_SUPPORTED_CSGS); ++enum dvfs_metric_timer_state { TIMER_OFF, TIMER_STOPPED, TIMER_ON }; + -+static -+bool csg_slot_status_update_finish(struct kbase_device *kbdev, u32 csg_nr) ++#ifdef CONFIG_MALI_BIFROST_DVFS ++static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) +{ -+ struct kbase_csf_cmd_stream_group_info const *const ginfo = -+ &kbdev->csf.global_iface.groups[csg_nr]; ++ struct kbasep_pm_metrics_state *metrics; + -+ return !((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^ -+ kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) & -+ CSG_REQ_STATUS_UPDATE_MASK); ++ if (WARN_ON(!timer)) ++ return HRTIMER_NORESTART; ++ ++ metrics = container_of(timer, struct kbasep_pm_metrics_state, timer); ++ ++ /* Transition (b) to fully off if timer was stopped, don't restart the timer in this case */ ++ if (atomic_cmpxchg(&metrics->timer_state, TIMER_STOPPED, TIMER_OFF) != TIMER_ON) ++ return HRTIMER_NORESTART; ++ ++ kbase_pm_get_dvfs_action(metrics->kbdev); ++ ++ /* Set the new expiration time and restart (transition e) */ ++ hrtimer_forward_now(timer, HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period)); ++ return HRTIMER_RESTART; +} ++#endif /* CONFIG_MALI_BIFROST_DVFS */ + -+static -+bool csg_slots_status_update_finish(struct kbase_device *kbdev, -+ const unsigned long *slots_mask) ++int kbasep_pm_metrics_init(struct kbase_device *kbdev) +{ -+ const u32 max_csg_slots = kbdev->csf.global_iface.group_num; -+ bool changed = false; -+ u32 csg_nr; ++#if MALI_USE_CSF ++ struct kbase_ipa_control_perf_counter perf_counter; ++ int err; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ /* One counter group */ ++ const size_t NUM_PERF_COUNTERS = 1; + -+ for_each_set_bit(csg_nr, slots_mask, max_csg_slots) { -+ if (csg_slot_status_update_finish(kbdev, csg_nr)) { -+ set_bit(csg_nr, csg_slots_status_updated); -+ changed = true; -+ } ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ kbdev->pm.backend.metrics.kbdev = kbdev; ++ kbdev->pm.backend.metrics.time_period_start = ktime_get_raw(); ++ kbdev->pm.backend.metrics.values.time_busy = 0; ++ kbdev->pm.backend.metrics.values.time_idle = 0; ++ kbdev->pm.backend.metrics.values.time_in_protm = 0; ++ ++ perf_counter.scaling_factor = GPU_ACTIVE_SCALING_FACTOR; ++ ++ /* Normalize values by GPU frequency */ ++ perf_counter.gpu_norm = true; ++ ++ /* We need the GPU_ACTIVE counter, which is in the CSHW group */ ++ perf_counter.type = KBASE_IPA_CORE_TYPE_CSHW; ++ ++ /* We need the GPU_ACTIVE counter */ ++ perf_counter.idx = GPU_ACTIVE_CNT_IDX; ++ ++ err = kbase_ipa_control_register( ++ kbdev, &perf_counter, NUM_PERF_COUNTERS, ++ &kbdev->pm.backend.metrics.ipa_control_client); ++ if (err) { ++ dev_err(kbdev->dev, ++ "Failed to register IPA with kbase_ipa_control: err=%d", ++ err); ++ return -1; + } ++#else ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ kbdev->pm.backend.metrics.kbdev = kbdev; ++ kbdev->pm.backend.metrics.time_period_start = ktime_get_raw(); + -+ return changed; ++ kbdev->pm.backend.metrics.gpu_active = false; ++ kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; ++ kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; ++ kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; ++ kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; ++ kbdev->pm.backend.metrics.active_gl_ctx[2] = 0; ++ ++ kbdev->pm.backend.metrics.values.time_busy = 0; ++ kbdev->pm.backend.metrics.values.time_idle = 0; ++ kbdev->pm.backend.metrics.values.busy_cl[0] = 0; ++ kbdev->pm.backend.metrics.values.busy_cl[1] = 0; ++ kbdev->pm.backend.metrics.values.busy_gl = 0; ++ ++#endif ++ spin_lock_init(&kbdev->pm.backend.metrics.lock); ++ ++#ifdef CONFIG_MALI_BIFROST_DVFS ++ hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_REL); ++ kbdev->pm.backend.metrics.timer.function = dvfs_callback; ++ kbdev->pm.backend.metrics.initialized = true; ++ atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF); ++ kbase_pm_metrics_start(kbdev); ++#endif /* CONFIG_MALI_BIFROST_DVFS */ ++ ++#if MALI_USE_CSF ++ /* The sanity check on the GPU_ACTIVE performance counter ++ * is skipped for Juno platforms that have timing problems. ++ */ ++ kbdev->pm.backend.metrics.skip_gpu_active_sanity_check = ++ of_machine_is_compatible("arm,juno"); ++#endif ++ ++ return 0; +} ++KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init); + -+static void wait_csg_slots_status_update_finish(struct kbase_device *kbdev, -+ unsigned long *slots_mask) ++void kbasep_pm_metrics_term(struct kbase_device *kbdev) +{ -+ const u32 max_csg_slots = kbdev->csf.global_iface.group_num; -+ long remaining = kbase_csf_timeout_in_jiffies(STATUS_UPDATE_WAIT_TIMEOUT); -+ -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++#ifdef CONFIG_MALI_BIFROST_DVFS ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ bitmap_zero(csg_slots_status_updated, max_csg_slots); ++ /* Cancel the timer, and block if the callback is currently executing (transition f) */ ++ kbdev->pm.backend.metrics.initialized = false; ++ atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF); ++ hrtimer_cancel(&kbdev->pm.backend.metrics.timer); ++#endif /* CONFIG_MALI_BIFROST_DVFS */ + -+ while (!bitmap_empty(slots_mask, max_csg_slots) && remaining) { -+ remaining = wait_event_timeout(kbdev->csf.event_wait, -+ csg_slots_status_update_finish(kbdev, slots_mask), -+ remaining); -+ if (likely(remaining)) { -+ bitmap_andnot(slots_mask, slots_mask, -+ csg_slots_status_updated, max_csg_slots); -+ } else { -+ dev_warn(kbdev->dev, -+ "STATUS_UPDATE request timed out for slots 0x%lx", -+ slots_mask[0]); -+ } -+ } ++#if MALI_USE_CSF ++ kbase_ipa_control_unregister( ++ kbdev, kbdev->pm.backend.metrics.ipa_control_client); ++#endif +} + -+void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev) ++KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term); ++ ++/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this ++ * function ++ */ ++#if MALI_USE_CSF ++#if defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) ++static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev) +{ -+ u32 max_csg_slots = kbdev->csf.global_iface.group_num; -+ DECLARE_BITMAP(used_csgs, MAX_SUPPORTED_CSGS) = { 0 }; -+ u32 csg_nr; -+ unsigned long flags; ++ int err; ++ u64 gpu_active_counter; ++ u64 protected_time; ++ ktime_t now; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ lockdep_assert_held(&kbdev->pm.backend.metrics.lock); + -+ /* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell -+ * ring for Extract offset update, shall not be made when MCU has been -+ * put to sleep otherwise it will undesirably make MCU exit the sleep -+ * state. Also it isn't really needed as FW will implicitly update the -+ * status of all on-slot groups when MCU sleep request is sent to it. ++ /* Query IPA_CONTROL for the latest GPU-active and protected-time ++ * info. + */ -+ if (kbdev->csf.scheduler.state == SCHED_SLEEPING) { -+ /* Wait for the MCU sleep request to complete. */ -+ kbase_pm_wait_for_desired_state(kbdev); -+ bitmap_copy(csg_slots_status_updated, -+ kbdev->csf.scheduler.csg_inuse_bitmap, max_csg_slots); -+ return; -+ } ++ err = kbase_ipa_control_query( ++ kbdev, kbdev->pm.backend.metrics.ipa_control_client, ++ &gpu_active_counter, 1, &protected_time); + -+ for (csg_nr = 0; csg_nr < max_csg_slots; csg_nr++) { -+ struct kbase_queue_group *const group = -+ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; -+ if (!group) -+ continue; -+ /* Ring the User doorbell for FW to update the Extract offset */ -+ kbase_csf_ring_doorbell(kbdev, group->doorbell_nr); -+ set_bit(csg_nr, used_csgs); ++ /* Read the timestamp after reading the GPU_ACTIVE counter value. ++ * This ensures the time gap between the 2 reads is consistent for ++ * a meaningful comparison between the increment of GPU_ACTIVE and ++ * elapsed time. The lock taken inside kbase_ipa_control_query() ++ * function can cause lot of variation. ++ */ ++ now = ktime_get_raw(); ++ ++ if (err) { ++ dev_err(kbdev->dev, ++ "Failed to query the increment of GPU_ACTIVE counter: err=%d", ++ err); ++ } else { ++ u64 diff_ns; ++ s64 diff_ns_signed; ++ u32 ns_time; ++ ktime_t diff = ktime_sub( ++ now, kbdev->pm.backend.metrics.time_period_start); ++ ++ diff_ns_signed = ktime_to_ns(diff); ++ ++ if (diff_ns_signed < 0) ++ return; ++ ++ diff_ns = (u64)diff_ns_signed; ++ ++#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ /* The GPU_ACTIVE counter shouldn't clock-up more time than has ++ * actually elapsed - but still some margin needs to be given ++ * when doing the comparison. There could be some drift between ++ * the CPU and GPU clock. ++ * ++ * Can do the check only in a real driver build, as an arbitrary ++ * value for GPU_ACTIVE can be fed into dummy model in no_mali ++ * configuration which may not correspond to the real elapsed ++ * time. ++ */ ++ if (!kbdev->pm.backend.metrics.skip_gpu_active_sanity_check) { ++ /* The margin is scaled to allow for the worst-case ++ * scenario where the samples are maximally separated, ++ * plus a small offset for sampling errors. ++ */ ++ u64 const MARGIN_NS = ++ IPA_CONTROL_TIMER_DEFAULT_VALUE_MS * NSEC_PER_MSEC * 3 / 2; ++ ++ if (gpu_active_counter > (diff_ns + MARGIN_NS)) { ++ dev_info( ++ kbdev->dev, ++ "GPU activity takes longer than time interval: %llu ns > %llu ns", ++ (unsigned long long)gpu_active_counter, ++ (unsigned long long)diff_ns); ++ } ++ } ++#endif ++ /* Calculate time difference in units of 256ns */ ++ ns_time = (u32)(diff_ns >> KBASE_PM_TIME_SHIFT); ++ ++ /* Add protected_time to gpu_active_counter so that time in ++ * protected mode is included in the apparent GPU active time, ++ * then convert it from units of 1ns to units of 256ns, to ++ * match what JM GPUs use. The assumption is made here that the ++ * GPU is 100% busy while in protected mode, so we should add ++ * this since the GPU can't (and thus won't) update these ++ * counters while it's actually in protected mode. ++ * ++ * Perform the add after dividing each value down, to reduce ++ * the chances of overflows. ++ */ ++ protected_time >>= KBASE_PM_TIME_SHIFT; ++ gpu_active_counter >>= KBASE_PM_TIME_SHIFT; ++ gpu_active_counter += protected_time; ++ ++ /* Ensure the following equations don't go wrong if ns_time is ++ * slightly larger than gpu_active_counter somehow ++ */ ++ gpu_active_counter = MIN(gpu_active_counter, ns_time); ++ ++ kbdev->pm.backend.metrics.values.time_busy += ++ gpu_active_counter; ++ ++ kbdev->pm.backend.metrics.values.time_idle += ++ ns_time - gpu_active_counter; ++ ++ /* Also make time in protected mode available explicitly, ++ * so users of this data have this info, too. ++ */ ++ kbdev->pm.backend.metrics.values.time_in_protm += ++ protected_time; + } + -+ /* Return early if there are no on-slot groups */ -+ if (bitmap_empty(used_csgs, max_csg_slots)) ++ kbdev->pm.backend.metrics.time_period_start = now; ++} ++#endif /* defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) */ ++#else ++static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, ++ ktime_t now) ++{ ++ ktime_t diff; ++ ++ lockdep_assert_held(&kbdev->pm.backend.metrics.lock); ++ ++ diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); ++ if (ktime_to_ns(diff) < 0) + return; + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ for_each_set_bit(csg_nr, used_csgs, max_csg_slots) { -+ struct kbase_csf_cmd_stream_group_info const *const ginfo = -+ &kbdev->csf.global_iface.groups[csg_nr]; -+ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, -+ ~kbase_csf_firmware_csg_output(ginfo, CSG_ACK), -+ CSG_REQ_STATUS_UPDATE_MASK); ++ if (kbdev->pm.backend.metrics.gpu_active) { ++ u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); ++ ++ kbdev->pm.backend.metrics.values.time_busy += ns_time; ++ if (kbdev->pm.backend.metrics.active_cl_ctx[0]) ++ kbdev->pm.backend.metrics.values.busy_cl[0] += ns_time; ++ if (kbdev->pm.backend.metrics.active_cl_ctx[1]) ++ kbdev->pm.backend.metrics.values.busy_cl[1] += ns_time; ++ if (kbdev->pm.backend.metrics.active_gl_ctx[0]) ++ kbdev->pm.backend.metrics.values.busy_gl += ns_time; ++ if (kbdev->pm.backend.metrics.active_gl_ctx[1]) ++ kbdev->pm.backend.metrics.values.busy_gl += ns_time; ++ if (kbdev->pm.backend.metrics.active_gl_ctx[2]) ++ kbdev->pm.backend.metrics.values.busy_gl += ns_time; ++ } else { ++ kbdev->pm.backend.metrics.values.time_idle += ++ (u32)(ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); + } + -+ BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(used_csgs[0]) * BITS_PER_BYTE)); -+ kbase_csf_ring_csg_slots_doorbell(kbdev, used_csgs[0]); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+ wait_csg_slots_status_update_finish(kbdev, used_csgs); -+ /* Wait for the User doobell ring to take effect */ -+ msleep(100); ++ kbdev->pm.backend.metrics.time_period_start = now; +} ++#endif /* MALI_USE_CSF */ + -+#define MAX_SCHED_STATE_STRING_LEN (16) -+static const char *scheduler_state_to_string(struct kbase_device *kbdev, -+ enum kbase_csf_scheduler_state sched_state) ++#if defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) ++void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, ++ struct kbasep_pm_metrics *last, ++ struct kbasep_pm_metrics *diff) +{ -+ switch (sched_state) { -+ case SCHED_BUSY: -+ return "BUSY"; -+ case SCHED_INACTIVE: -+ return "INACTIVE"; -+ case SCHED_SUSPENDED: -+ return "SUSPENDED"; -+#ifdef KBASE_PM_RUNTIME -+ case SCHED_SLEEPING: -+ return "SLEEPING"; ++ struct kbasep_pm_metrics *cur = &kbdev->pm.backend.metrics.values; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); ++#if MALI_USE_CSF ++ kbase_pm_get_dvfs_utilisation_calc(kbdev); ++#else ++ kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get_raw()); +#endif -+ default: -+ dev_warn(kbdev->dev, "Unknown Scheduler state %d", sched_state); -+ return NULL; -+ } -+} + -+/** -+ * blocked_reason_to_string() - Convert blocking reason id to a string -+ * -+ * @reason_id: blocked_reason -+ * -+ * Return: Suitable string -+ */ -+static const char *blocked_reason_to_string(u32 reason_id) -+{ -+ /* possible blocking reasons of a cs */ -+ static const char *const cs_blocked_reason[] = { -+ [CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED] = "UNBLOCKED", -+ [CS_STATUS_BLOCKED_REASON_REASON_WAIT] = "WAIT", -+ [CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT] = -+ "PROGRESS_WAIT", -+ [CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT] = "SYNC_WAIT", -+ [CS_STATUS_BLOCKED_REASON_REASON_DEFERRED] = "DEFERRED", -+ [CS_STATUS_BLOCKED_REASON_REASON_RESOURCE] = "RESOURCE", -+ [CS_STATUS_BLOCKED_REASON_REASON_FLUSH] = "FLUSH" -+ }; ++ memset(diff, 0, sizeof(*diff)); ++ diff->time_busy = cur->time_busy - last->time_busy; ++ diff->time_idle = cur->time_idle - last->time_idle; + -+ if (WARN_ON(reason_id >= ARRAY_SIZE(cs_blocked_reason))) -+ return "UNKNOWN_BLOCKED_REASON_ID"; ++#if MALI_USE_CSF ++ diff->time_in_protm = cur->time_in_protm - last->time_in_protm; ++#else ++ diff->busy_cl[0] = cur->busy_cl[0] - last->busy_cl[0]; ++ diff->busy_cl[1] = cur->busy_cl[1] - last->busy_cl[1]; ++ diff->busy_gl = cur->busy_gl - last->busy_gl; ++#endif + -+ return cs_blocked_reason[reason_id]; ++ *last = *cur; ++ ++ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); +} ++KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics); ++#endif + -+static bool sb_source_supported(u32 glb_version) ++#ifdef CONFIG_MALI_BIFROST_DVFS ++void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) +{ -+ bool supported = false; ++ int utilisation; ++ struct kbasep_pm_metrics *diff; ++#if !MALI_USE_CSF ++ int busy; ++ int util_gl_share; ++ int util_cl_share[2]; ++#endif + -+ if (((GLB_VERSION_MAJOR_GET(glb_version) == 3) && -+ (GLB_VERSION_MINOR_GET(glb_version) >= 5)) || -+ ((GLB_VERSION_MAJOR_GET(glb_version) == 2) && -+ (GLB_VERSION_MINOR_GET(glb_version) >= 6)) || -+ ((GLB_VERSION_MAJOR_GET(glb_version) == 1) && -+ (GLB_VERSION_MINOR_GET(glb_version) >= 3))) -+ supported = true; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ return supported; ++ diff = &kbdev->pm.backend.metrics.dvfs_diff; ++ ++ kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, ++ diff); ++ ++ utilisation = (100 * diff->time_busy) / ++ max(diff->time_busy + diff->time_idle, 1u); ++ ++#if !MALI_USE_CSF ++ busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u); ++ ++ util_gl_share = (100 * diff->busy_gl) / busy; ++ util_cl_share[0] = (100 * diff->busy_cl[0]) / busy; ++ util_cl_share[1] = (100 * diff->busy_cl[1]) / busy; ++ ++ kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, ++ util_cl_share); ++#else ++ /* Note that, at present, we don't pass protected-mode time to the ++ * platform here. It's unlikely to be useful, however, as the platform ++ * probably just cares whether the GPU is busy or not; time in ++ * protected mode is already added to busy-time at this point, though, ++ * so we should be good. ++ */ ++ kbase_platform_dvfs_event(kbdev, utilisation); ++#endif +} + -+static void kbasep_csf_scheduler_dump_active_queue_cs_status_wait( -+ struct seq_file *file, u32 glb_version, u32 wait_status, u32 wait_sync_value, -+ u64 wait_sync_live_value, u64 wait_sync_pointer, u32 sb_status, u32 blocked_reason) ++bool kbase_pm_metrics_is_active(struct kbase_device *kbdev) +{ -+#define WAITING "Waiting" -+#define NOT_WAITING "Not waiting" ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ seq_printf(file, "SB_MASK: %d\n", -+ CS_STATUS_WAIT_SB_MASK_GET(wait_status)); -+ if (sb_source_supported(glb_version)) -+ seq_printf(file, "SB_SOURCE: %d\n", CS_STATUS_WAIT_SB_SOURCE_GET(wait_status)); -+ seq_printf(file, "PROGRESS_WAIT: %s\n", -+ CS_STATUS_WAIT_PROGRESS_WAIT_GET(wait_status) ? -+ WAITING : NOT_WAITING); -+ seq_printf(file, "PROTM_PEND: %s\n", -+ CS_STATUS_WAIT_PROTM_PEND_GET(wait_status) ? -+ WAITING : NOT_WAITING); -+ seq_printf(file, "SYNC_WAIT: %s\n", -+ CS_STATUS_WAIT_SYNC_WAIT_GET(wait_status) ? -+ WAITING : NOT_WAITING); -+ seq_printf(file, "WAIT_CONDITION: %s\n", -+ CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(wait_status) ? -+ "greater than" : "less or equal"); -+ seq_printf(file, "SYNC_POINTER: 0x%llx\n", wait_sync_pointer); -+ seq_printf(file, "SYNC_VALUE: %d\n", wait_sync_value); -+ seq_printf(file, "SYNC_LIVE_VALUE: 0x%016llx\n", wait_sync_live_value); -+ seq_printf(file, "SB_STATUS: %u\n", -+ CS_STATUS_SCOREBOARDS_NONZERO_GET(sb_status)); -+ seq_printf(file, "BLOCKED_REASON: %s\n", -+ blocked_reason_to_string(CS_STATUS_BLOCKED_REASON_REASON_GET( -+ blocked_reason))); ++ return atomic_read(&kbdev->pm.backend.metrics.timer_state) == TIMER_ON; +} ++KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active); + -+static void kbasep_csf_scheduler_dump_active_cs_trace(struct seq_file *file, -+ struct kbase_csf_cmd_stream_info const *const stream) ++void kbase_pm_metrics_start(struct kbase_device *kbdev) +{ -+ u32 val = kbase_csf_firmware_cs_input_read(stream, -+ CS_INSTR_BUFFER_BASE_LO); -+ u64 addr = ((u64)kbase_csf_firmware_cs_input_read(stream, -+ CS_INSTR_BUFFER_BASE_HI) << 32) | val; -+ val = kbase_csf_firmware_cs_input_read(stream, -+ CS_INSTR_BUFFER_SIZE); ++ struct kbasep_pm_metrics_state *metrics = &kbdev->pm.backend.metrics; + -+ seq_printf(file, "CS_TRACE_BUF_ADDR: 0x%16llx, SIZE: %u\n", addr, val); ++ if (unlikely(!metrics->initialized)) ++ return; + -+ /* Write offset variable address (pointer) */ -+ val = kbase_csf_firmware_cs_input_read(stream, -+ CS_INSTR_BUFFER_OFFSET_POINTER_LO); -+ addr = ((u64)kbase_csf_firmware_cs_input_read(stream, -+ CS_INSTR_BUFFER_OFFSET_POINTER_HI) << 32) | val; -+ seq_printf(file, "CS_TRACE_BUF_OFFSET_PTR: 0x%16llx\n", addr); ++ /* Transition to ON, from a stopped state (transition c) */ ++ if (atomic_xchg(&metrics->timer_state, TIMER_ON) == TIMER_OFF) ++ /* Start the timer only if it's been fully stopped (transition d)*/ ++ hrtimer_start(&metrics->timer, HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period), ++ HRTIMER_MODE_REL); ++} + -+ /* EVENT_SIZE and EVENT_STATEs */ -+ val = kbase_csf_firmware_cs_input_read(stream, CS_INSTR_CONFIG); -+ seq_printf(file, "TRACE_EVENT_SIZE: 0x%x, TRACE_EVENT_STAES 0x%x\n", -+ CS_INSTR_CONFIG_EVENT_SIZE_GET(val), -+ CS_INSTR_CONFIG_EVENT_STATE_GET(val)); ++void kbase_pm_metrics_stop(struct kbase_device *kbdev) ++{ ++ if (unlikely(!kbdev->pm.backend.metrics.initialized)) ++ return; ++ ++ /* Timer is Stopped if its currently on (transition a) */ ++ atomic_cmpxchg(&kbdev->pm.backend.metrics.timer_state, TIMER_ON, TIMER_STOPPED); +} + ++ ++#endif /* CONFIG_MALI_BIFROST_DVFS */ ++ ++#if !MALI_USE_CSF +/** -+ * kbasep_csf_scheduler_dump_active_queue() - Print GPU command queue -+ * debug information ++ * kbase_pm_metrics_active_calc - Update PM active counts based on currently ++ * running atoms ++ * @kbdev: Device pointer + * -+ * @file: seq_file for printing to -+ * @queue: Address of a GPU command queue to examine ++ * The caller must hold kbdev->pm.backend.metrics.lock + */ -+static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file, -+ struct kbase_queue *queue) ++static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) +{ -+ u32 *addr; -+ u64 cs_extract; -+ u64 cs_insert; -+ u32 cs_active; -+ u64 wait_sync_pointer; -+ u32 wait_status, wait_sync_value; -+ u32 sb_status; -+ u32 blocked_reason; -+ struct kbase_vmap_struct *mapping; -+ u64 *evt; -+ u64 wait_sync_live_value; -+ u32 glb_version; ++ unsigned int js; + -+ if (!queue) -+ return; ++ lockdep_assert_held(&kbdev->pm.backend.metrics.lock); + -+ glb_version = queue->kctx->kbdev->csf.global_iface.version; ++ kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; ++ kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; ++ kbdev->pm.backend.metrics.active_gl_ctx[2] = 0; ++ kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; ++ kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; ++ kbdev->pm.backend.metrics.gpu_active = false; + -+ if (WARN_ON(queue->csi_index == KBASEP_IF_NR_INVALID || -+ !queue->group)) -+ return; ++ for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { ++ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); + -+ addr = (u32 *)queue->user_io_addr; -+ cs_insert = addr[CS_INSERT_LO/4] | ((u64)addr[CS_INSERT_HI/4] << 32); ++ /* Head atom may have just completed, so if it isn't running ++ * then try the next atom ++ */ ++ if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) ++ katom = kbase_gpu_inspect(kbdev, js, 1); + -+ addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); -+ cs_extract = addr[CS_EXTRACT_LO/4] | ((u64)addr[CS_EXTRACT_HI/4] << 32); -+ cs_active = addr[CS_ACTIVE/4]; ++ if (katom && katom->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_SUBMITTED) { ++ if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { ++ int device_nr = (katom->core_req & ++ BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) ++ ? katom->device_nr : 0; ++ if (!WARN_ON(device_nr >= 2)) ++ kbdev->pm.backend.metrics.active_cl_ctx[device_nr] = 1; ++ } else { ++ kbdev->pm.backend.metrics.active_gl_ctx[js] = 1; ++ trace_sysgraph(SGR_ACTIVE, 0, js); ++ } ++ kbdev->pm.backend.metrics.gpu_active = true; ++ } else { ++ trace_sysgraph(SGR_INACTIVE, 0, js); ++ } ++ } ++} + -+#define KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO \ -+ "Bind Idx, Ringbuf addr, Size, Prio, Insert offset, Extract offset, Active, Doorbell\n" ++/* called when job is submitted to or removed from a GPU slot */ ++void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp) ++{ ++ unsigned long flags; ++ ktime_t now; + -+ seq_printf(file, KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO "%8d, %16llx, %8x, %4u, %16llx, %16llx, %6u, %8d\n", -+ queue->csi_index, queue->base_addr, -+ queue->size, -+ queue->priority, cs_insert, cs_extract, cs_active, queue->doorbell_nr); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* Print status information for blocked group waiting for sync object. For on-slot queues, -+ * if cs_trace is enabled, dump the interface's cs_trace configuration. -+ */ -+ if (kbase_csf_scheduler_group_get_slot(queue->group) < 0) { -+ seq_printf(file, "SAVED_CMD_PTR: 0x%llx\n", queue->saved_cmd_ptr); -+ if (CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) { -+ wait_status = queue->status_wait; -+ wait_sync_value = queue->sync_value; -+ wait_sync_pointer = queue->sync_ptr; -+ sb_status = queue->sb_status; -+ blocked_reason = queue->blocked_reason; ++ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + -+ evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, wait_sync_pointer, &mapping); -+ if (evt) { -+ wait_sync_live_value = evt[0]; -+ kbase_phy_alloc_mapping_put(queue->kctx, mapping); -+ } else { -+ wait_sync_live_value = U64_MAX; -+ } ++ if (!timestamp) { ++ now = ktime_get_raw(); ++ timestamp = &now; ++ } + -+ kbasep_csf_scheduler_dump_active_queue_cs_status_wait( -+ file, glb_version, wait_status, wait_sync_value, -+ wait_sync_live_value, wait_sync_pointer, sb_status, blocked_reason); -+ } -+ } else { -+ struct kbase_device const *const kbdev = -+ queue->group->kctx->kbdev; -+ struct kbase_csf_cmd_stream_group_info const *const ginfo = -+ &kbdev->csf.global_iface.groups[queue->group->csg_nr]; -+ struct kbase_csf_cmd_stream_info const *const stream = -+ &ginfo->streams[queue->csi_index]; -+ u64 cmd_ptr; -+ u32 req_res; ++ /* Track how much of time has been spent busy or idle. For JM GPUs, ++ * this also evaluates how long CL and/or GL jobs have been busy for. ++ */ ++ kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp); + -+ if (WARN_ON(!stream)) -+ return; ++ kbase_pm_metrics_active_calc(kbdev); ++ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); ++} ++#endif /* !MALI_USE_CSF */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c +new file mode 100644 +index 000000000..4788f0413 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c +@@ -0,0 +1,426 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ cmd_ptr = kbase_csf_firmware_cs_output(stream, -+ CS_STATUS_CMD_PTR_LO); -+ cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, -+ CS_STATUS_CMD_PTR_HI) << 32; -+ req_res = kbase_csf_firmware_cs_output(stream, -+ CS_STATUS_REQ_RESOURCE); ++/* ++ * Power policy API implementations ++ */ + -+ seq_printf(file, "CMD_PTR: 0x%llx\n", cmd_ptr); -+ seq_printf(file, "REQ_RESOURCE [COMPUTE]: %d\n", -+ CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_GET(req_res)); -+ seq_printf(file, "REQ_RESOURCE [FRAGMENT]: %d\n", -+ CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_GET(req_res)); -+ seq_printf(file, "REQ_RESOURCE [TILER]: %d\n", -+ CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_GET(req_res)); -+ seq_printf(file, "REQ_RESOURCE [IDVS]: %d\n", -+ CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_GET(req_res)); ++#include ++#include ++#include ++#include ++#include + -+ wait_status = kbase_csf_firmware_cs_output(stream, -+ CS_STATUS_WAIT); -+ wait_sync_value = kbase_csf_firmware_cs_output(stream, -+ CS_STATUS_WAIT_SYNC_VALUE); -+ wait_sync_pointer = kbase_csf_firmware_cs_output(stream, -+ CS_STATUS_WAIT_SYNC_POINTER_LO); -+ wait_sync_pointer |= (u64)kbase_csf_firmware_cs_output(stream, -+ CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; ++#if MALI_USE_CSF && defined CONFIG_MALI_BIFROST_DEBUG ++#include ++#endif + -+ sb_status = kbase_csf_firmware_cs_output(stream, -+ CS_STATUS_SCOREBOARDS); -+ blocked_reason = kbase_csf_firmware_cs_output( -+ stream, CS_STATUS_BLOCKED_REASON); ++#include + -+ evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, wait_sync_pointer, &mapping); -+ if (evt) { -+ wait_sync_live_value = evt[0]; -+ kbase_phy_alloc_mapping_put(queue->kctx, mapping); -+ } else { -+ wait_sync_live_value = U64_MAX; -+ } ++static const struct kbase_pm_policy *const all_policy_list[] = { ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ &kbase_pm_always_on_policy_ops, ++ &kbase_pm_coarse_demand_policy_ops, ++#else /* CONFIG_MALI_BIFROST_NO_MALI */ ++ &kbase_pm_coarse_demand_policy_ops, ++ &kbase_pm_always_on_policy_ops, ++#endif /* CONFIG_MALI_BIFROST_NO_MALI */ ++}; + -+ kbasep_csf_scheduler_dump_active_queue_cs_status_wait( -+ file, glb_version, wait_status, wait_sync_value, wait_sync_live_value, -+ wait_sync_pointer, sb_status, blocked_reason); -+ /* Dealing with cs_trace */ -+ if (kbase_csf_scheduler_queue_has_trace(queue)) -+ kbasep_csf_scheduler_dump_active_cs_trace(file, stream); -+ else -+ seq_puts(file, "NO CS_TRACE\n"); ++void kbase_pm_policy_init(struct kbase_device *kbdev) ++{ ++ const struct kbase_pm_policy *default_policy = all_policy_list[0]; ++ struct device_node *np = kbdev->dev->of_node; ++ const char *power_policy_name; ++ unsigned long flags; ++ int i; ++ ++ if (of_property_read_string(np, "power_policy", &power_policy_name) == 0) { ++ for (i = 0; i < ARRAY_SIZE(all_policy_list); i++) ++ if (sysfs_streq(all_policy_list[i]->name, power_policy_name)) { ++ default_policy = all_policy_list[i]; ++ break; ++ } + } + -+ seq_puts(file, "\n"); ++#if MALI_USE_CSF && defined(CONFIG_MALI_BIFROST_DEBUG) ++ /* Use always_on policy if module param fw_debug=1 is ++ * passed, to aid firmware debugging. ++ */ ++ if (fw_debug) ++ default_policy = &kbase_pm_always_on_policy_ops; ++#endif ++ ++ default_policy->init(kbdev); ++ ++#if MALI_USE_CSF ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->pm.backend.pm_current_policy = default_policy; ++ kbdev->pm.backend.csf_pm_sched_flags = default_policy->pm_sched_flags; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++#else ++ CSTD_UNUSED(flags); ++ kbdev->pm.backend.pm_current_policy = default_policy; ++#endif +} + -+static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file, -+ struct kbase_queue_group *const group) ++void kbase_pm_policy_term(struct kbase_device *kbdev) +{ -+ if (kbase_csf_scheduler_group_get_slot(group) >= 0) { -+ struct kbase_device *const kbdev = group->kctx->kbdev; -+ u32 ep_c, ep_r; -+ char exclusive; -+ char idle = 'N'; -+ struct kbase_csf_cmd_stream_group_info const *const ginfo = -+ &kbdev->csf.global_iface.groups[group->csg_nr]; -+ u8 slot_priority = -+ kbdev->csf.scheduler.csg_slots[group->csg_nr].priority; ++ kbdev->pm.backend.pm_current_policy->term(kbdev); ++} + -+ ep_c = kbase_csf_firmware_csg_output(ginfo, -+ CSG_STATUS_EP_CURRENT); -+ ep_r = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_EP_REQ); ++void kbase_pm_update_active(struct kbase_device *kbdev) ++{ ++ struct kbase_pm_device_data *pm = &kbdev->pm; ++ struct kbase_pm_backend_data *backend = &pm->backend; ++ unsigned long flags; ++ bool active; + -+ if (CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_GET(ep_r)) -+ exclusive = 'C'; -+ else if (CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_GET(ep_r)) -+ exclusive = 'F'; -+ else -+ exclusive = '0'; ++ lockdep_assert_held(&pm->lock); + -+ if (kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & -+ CSG_STATUS_STATE_IDLE_MASK) -+ idle = 'Y'; ++ /* pm_current_policy will never be NULL while pm.lock is held */ ++ KBASE_DEBUG_ASSERT(backend->pm_current_policy); + -+ if (!test_bit(group->csg_nr, csg_slots_status_updated)) { -+ seq_printf(file, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n", -+ group->csg_nr); -+ seq_puts(file, "*** The following group-record is likely stale\n"); -+ } ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n"); -+ seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n", -+ group->handle, -+ group->csg_nr, -+ slot_priority, -+ group->run_state, -+ group->priority, -+ CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(ep_c), -+ CSG_STATUS_EP_REQ_COMPUTE_EP_GET(ep_r), -+ CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(ep_c), -+ CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r), -+ CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c), -+ CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r), -+ exclusive, -+ idle); ++ active = backend->pm_current_policy->get_core_active(kbdev); ++ WARN((kbase_pm_is_active(kbdev) && !active), ++ "GPU is active but policy '%s' is indicating that it can be powered off", ++ kbdev->pm.backend.pm_current_policy->name); ++ ++ if (active) { ++ /* Power on the GPU and any cores requested by the policy */ ++ if (!pm->backend.invoke_poweroff_wait_wq_when_l2_off && ++ pm->backend.poweroff_wait_in_progress) { ++ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); ++ pm->backend.poweron_required = true; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } else { ++ /* Cancel the invocation of ++ * kbase_pm_gpu_poweroff_wait_wq() from the L2 state ++ * machine. This is safe - it ++ * invoke_poweroff_wait_wq_when_l2_off is true, then ++ * the poweroff work hasn't even been queued yet, ++ * meaning we can go straight to powering on. ++ */ ++ pm->backend.invoke_poweroff_wait_wq_when_l2_off = false; ++ pm->backend.poweroff_wait_in_progress = false; ++ pm->backend.l2_desired = true; ++#if MALI_USE_CSF ++ pm->backend.mcu_desired = true; ++#endif ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ kbase_pm_do_poweron(kbdev, false); ++ } + } else { -+ seq_puts(file, "GroupID, CSG NR, Run State, Priority\n"); -+ seq_printf(file, "%7d, %6d, %9d, %8d\n", -+ group->handle, -+ group->csg_nr, -+ group->run_state, -+ group->priority); -+ } ++ /* It is an error for the power policy to power off the GPU ++ * when there are contexts active ++ */ ++ KBASE_DEBUG_ASSERT(pm->active_count == 0); + -+ if (group->run_state != KBASE_CSF_GROUP_TERMINATED) { -+ unsigned int i; ++ pm->backend.poweron_required = false; + -+ seq_puts(file, "Bound queues:\n"); ++ /* Request power off */ ++ if (pm->backend.gpu_powered) { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { -+ kbasep_csf_scheduler_dump_active_queue(file, -+ group->bound_queues[i]); ++ /* Power off the GPU immediately */ ++ kbase_pm_do_poweroff(kbdev); ++ } else { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + } -+ -+ seq_puts(file, "\n"); +} + -+/** -+ * kbasep_csf_queue_group_debugfs_show() - Print per-context GPU command queue -+ * group debug information -+ * -+ * @file: The seq_file for printing to -+ * @data: The debugfs dentry private data, a pointer to kbase context -+ * -+ * Return: Negative error code or 0 on success. -+ */ -+static int kbasep_csf_queue_group_debugfs_show(struct seq_file *file, -+ void *data) ++void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev) +{ -+ u32 gr; -+ struct kbase_context *const kctx = file->private; -+ struct kbase_device *kbdev; -+ -+ if (WARN_ON(!kctx)) -+ return -EINVAL; ++ bool shaders_desired; + -+ kbdev = kctx->kbdev; ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ lockdep_assert_held(&kbdev->pm.lock); + -+ seq_printf(file, "MALI_CSF_CSG_DEBUGFS_VERSION: v%u\n", -+ MALI_CSF_CSG_DEBUGFS_VERSION); ++ if (kbdev->pm.backend.pm_current_policy == NULL) ++ return; ++ if (kbdev->pm.backend.poweroff_wait_in_progress) ++ return; + -+ mutex_lock(&kctx->csf.lock); -+ kbase_csf_scheduler_lock(kbdev); -+ kbase_csf_debugfs_update_active_groups_status(kbdev); -+ for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) { -+ struct kbase_queue_group *const group = -+ kctx->csf.queue_groups[gr]; ++#if MALI_USE_CSF ++ CSTD_UNUSED(shaders_desired); ++ /* Invoke the MCU state machine to send a request to FW for updating ++ * the mask of shader cores that can be used for allocation of ++ * endpoints requested by CSGs. ++ */ ++ if (kbase_pm_is_mcu_desired(kbdev)) ++ kbase_pm_update_state(kbdev); ++#else ++ /* In protected transition, don't allow outside shader core request ++ * affect transition, return directly ++ */ ++ if (kbdev->pm.backend.protected_transition_override) ++ return; + -+ if (group) -+ kbasep_csf_scheduler_dump_active_group(file, group); -+ } -+ kbase_csf_scheduler_unlock(kbdev); -+ mutex_unlock(&kctx->csf.lock); ++ shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev); + -+ return 0; ++ if (shaders_desired && kbase_pm_is_l2_desired(kbdev)) ++ kbase_pm_update_state(kbdev); ++#endif +} + -+/** -+ * kbasep_csf_scheduler_dump_active_groups() - Print debug info for active -+ * GPU command queue groups -+ * -+ * @file: The seq_file for printing to -+ * @data: The debugfs dentry private data, a pointer to kbase_device -+ * -+ * Return: Negative error code or 0 on success. -+ */ -+static int kbasep_csf_scheduler_dump_active_groups(struct seq_file *file, -+ void *data) ++void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) +{ -+ u32 csg_nr; -+ struct kbase_device *kbdev = file->private; -+ u32 num_groups = kbdev->csf.global_iface.group_num; ++ bool shaders_desired = false; + -+ seq_printf(file, "MALI_CSF_CSG_DEBUGFS_VERSION: v%u\n", -+ MALI_CSF_CSG_DEBUGFS_VERSION); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ kbase_csf_scheduler_lock(kbdev); -+ kbase_csf_debugfs_update_active_groups_status(kbdev); -+ for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { -+ struct kbase_queue_group *const group = -+ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; ++ if (kbdev->pm.backend.pm_current_policy == NULL) ++ return; ++ if (kbdev->pm.backend.poweroff_wait_in_progress) ++ return; + -+ if (!group) -+ continue; ++#if !MALI_USE_CSF ++ if (kbdev->pm.backend.protected_transition_override) ++ /* We are trying to change in/out of protected mode - force all ++ * cores off so that the L2 powers down ++ */ ++ shaders_desired = false; ++ else ++ shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev); ++#endif + -+ seq_printf(file, "\nCtx %d_%d\n", group->kctx->tgid, -+ group->kctx->id); ++ if (kbdev->pm.backend.shaders_desired != shaders_desired) { ++ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, kbdev->pm.backend.shaders_desired); + -+ kbasep_csf_scheduler_dump_active_group(file, group); ++ kbdev->pm.backend.shaders_desired = shaders_desired; ++ kbase_pm_update_state(kbdev); + } -+ kbase_csf_scheduler_unlock(kbdev); -+ -+ return 0; +} + -+static int kbasep_csf_queue_group_debugfs_open(struct inode *in, -+ struct file *file) ++void kbase_pm_update_cores_state(struct kbase_device *kbdev) +{ -+ return single_open(file, kbasep_csf_queue_group_debugfs_show, -+ in->i_private); ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ kbase_pm_update_cores_state_nolock(kbdev); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + -+static int kbasep_csf_active_queue_groups_debugfs_open(struct inode *in, -+ struct file *file) ++int kbase_pm_list_policies(struct kbase_device *kbdev, ++ const struct kbase_pm_policy * const **list) +{ -+ return single_open(file, kbasep_csf_scheduler_dump_active_groups, -+ in->i_private); ++ if (list) ++ *list = all_policy_list; ++ ++ return ARRAY_SIZE(all_policy_list); +} + -+static const struct file_operations kbasep_csf_queue_group_debugfs_fops = { -+ .open = kbasep_csf_queue_group_debugfs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; ++KBASE_EXPORT_TEST_API(kbase_pm_list_policies); + -+void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx) ++const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev) +{ -+ struct dentry *file; -+ const mode_t mode = 0444; -+ -+ if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) -+ return; -+ -+ file = debugfs_create_file("groups", mode, -+ kctx->kctx_dentry, kctx, &kbasep_csf_queue_group_debugfs_fops); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ if (IS_ERR_OR_NULL(file)) { -+ dev_warn(kctx->kbdev->dev, -+ "Unable to create per context queue groups debugfs entry"); -+ } ++ return kbdev->pm.backend.pm_current_policy; +} + -+static const struct file_operations -+ kbasep_csf_active_queue_groups_debugfs_fops = { -+ .open = kbasep_csf_active_queue_groups_debugfs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; ++KBASE_EXPORT_TEST_API(kbase_pm_get_policy); + -+static int kbasep_csf_debugfs_scheduling_timer_enabled_get( -+ void *data, u64 *val) ++#if MALI_USE_CSF ++static int policy_change_wait_for_L2_off(struct kbase_device *kbdev) +{ -+ struct kbase_device *const kbdev = data; ++ long remaining; ++ long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT)); ++ int err = 0; + -+ *val = kbase_csf_scheduler_timer_is_enabled(kbdev); ++ /* Wait for L2 becoming off, by which the MCU is also implicitly off ++ * since the L2 state machine would only start its power-down ++ * sequence when the MCU is in off state. The L2 off is required ++ * as the tiler may need to be power cycled for MCU reconfiguration ++ * for host control of shader cores. ++ */ ++#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE ++ remaining = wait_event_killable_timeout( ++ kbdev->pm.backend.gpu_in_desired_state_wait, ++ kbdev->pm.backend.l2_state == KBASE_L2_OFF, timeout); ++#else ++ remaining = wait_event_timeout( ++ kbdev->pm.backend.gpu_in_desired_state_wait, ++ kbdev->pm.backend.l2_state == KBASE_L2_OFF, timeout); ++#endif + -+ return 0; ++ if (!remaining) { ++ err = -ETIMEDOUT; ++ } else if (remaining < 0) { ++ dev_info(kbdev->dev, ++ "Wait for L2_off got interrupted"); ++ err = (int)remaining; ++ } ++ ++ dev_dbg(kbdev->dev, "%s: err=%d mcu_state=%d, L2_state=%d\n", __func__, ++ err, kbdev->pm.backend.mcu_state, kbdev->pm.backend.l2_state); ++ ++ return err; +} ++#endif + -+static int kbasep_csf_debugfs_scheduling_timer_enabled_set( -+ void *data, u64 val) ++void kbase_pm_set_policy(struct kbase_device *kbdev, ++ const struct kbase_pm_policy *new_policy) +{ -+ struct kbase_device *const kbdev = data; ++ const struct kbase_pm_policy *old_policy; ++ unsigned long flags; ++#if MALI_USE_CSF ++ unsigned int new_policy_csf_pm_sched_flags; ++ bool sched_suspend; ++ bool reset_gpu = false; ++ bool reset_op_prevented = true; ++ struct kbase_csf_scheduler *scheduler = NULL; ++#endif + -+ kbase_csf_scheduler_timer_set_enabled(kbdev, val != 0); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(new_policy != NULL); + -+ return 0; -+} ++ KBASE_KTRACE_ADD(kbdev, PM_SET_POLICY, NULL, new_policy->id); + -+static int kbasep_csf_debugfs_scheduling_timer_kick_set( -+ void *data, u64 val) -+{ -+ struct kbase_device *const kbdev = data; ++#if MALI_USE_CSF ++ scheduler = &kbdev->csf.scheduler; ++ KBASE_DEBUG_ASSERT(scheduler != NULL); + -+ kbase_csf_scheduler_kick(kbdev); ++ /* Serialize calls on kbase_pm_set_policy() */ ++ mutex_lock(&kbdev->pm.backend.policy_change_lock); + -+ return 0; -+} ++ if (kbase_reset_gpu_prevent_and_wait(kbdev)) { ++ dev_warn(kbdev->dev, "Set PM policy failing to prevent gpu reset"); ++ reset_op_prevented = false; ++ } + -+DEFINE_DEBUGFS_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_enabled_fops, -+ &kbasep_csf_debugfs_scheduling_timer_enabled_get, -+ &kbasep_csf_debugfs_scheduling_timer_enabled_set, "%llu\n"); -+DEFINE_DEBUGFS_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_kick_fops, NULL, -+ &kbasep_csf_debugfs_scheduling_timer_kick_set, "%llu\n"); ++ /* In case of CSF, the scheduler may be invoked to suspend. In that ++ * case, there is a risk that the L2 may be turned on by the time we ++ * check it here. So we hold the scheduler lock to avoid other operations ++ * interfering with the policy change and vice versa. ++ */ ++ mutex_lock(&scheduler->lock); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ /* policy_change_clamp_state_to_off, when needed, is set/cleared in ++ * this function, a very limited temporal scope for covering the ++ * change transition. ++ */ ++ WARN_ON(kbdev->pm.backend.policy_change_clamp_state_to_off); ++ new_policy_csf_pm_sched_flags = new_policy->pm_sched_flags; + -+/** -+ * kbase_csf_debugfs_scheduler_state_get() - Get the state of scheduler. -+ * -+ * @file: Object of the file that is being read. -+ * @user_buf: User buffer that contains the string. -+ * @count: Length of user buffer -+ * @ppos: Offset within file object -+ * -+ * This function will return the current Scheduler state to Userspace -+ * Scheduler may exit that state by the time the state string is received -+ * by the Userspace. -+ * -+ * Return: 0 if Scheduler was found in an unexpected state, or the -+ * size of the state string if it was copied successfully to the -+ * User buffer or a negative value in case of an error. -+ */ -+static ssize_t kbase_csf_debugfs_scheduler_state_get(struct file *file, -+ char __user *user_buf, size_t count, loff_t *ppos) -+{ -+ struct kbase_device *kbdev = file->private_data; -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ const char *state_string; ++ /* Requiring the scheduler PM suspend operation when changes involving ++ * the always_on policy, reflected by the CSF_DYNAMIC_PM_CORE_KEEP_ON ++ * flag bit. ++ */ ++ sched_suspend = reset_op_prevented && ++ (CSF_DYNAMIC_PM_CORE_KEEP_ON & ++ (new_policy_csf_pm_sched_flags | kbdev->pm.backend.csf_pm_sched_flags)); + -+ kbase_csf_scheduler_lock(kbdev); -+ state_string = scheduler_state_to_string(kbdev, scheduler->state); -+ kbase_csf_scheduler_unlock(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ if (!state_string) -+ count = 0; ++ if (sched_suspend) { ++ /* Update the suspend flag to reflect actually suspend being done ! */ ++ sched_suspend = !kbase_csf_scheduler_pm_suspend_no_lock(kbdev); ++ /* Set the reset recovery flag if the required suspend failed */ ++ reset_gpu = !sched_suspend; ++ } + -+ return simple_read_from_buffer(user_buf, count, ppos, -+ state_string, strlen(state_string)); -+} ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+/** -+ * kbase_csf_debugfs_scheduler_state_set() - Set the state of scheduler. -+ * -+ * @file: Object of the file that is being written to. -+ * @ubuf: User buffer that contains the string. -+ * @count: Length of user buffer -+ * @ppos: Offset within file object -+ * -+ * This function will update the Scheduler state as per the state string -+ * passed by the Userspace. Scheduler may or may not remain in new state -+ * for long. -+ * -+ * Return: Negative value if the string doesn't correspond to a valid Scheduler -+ * state or if copy from user buffer failed, otherwise the length of -+ * the User buffer. -+ */ -+static ssize_t kbase_csf_debugfs_scheduler_state_set(struct file *file, -+ const char __user *ubuf, size_t count, loff_t *ppos) -+{ -+ struct kbase_device *kbdev = file->private_data; -+ char buf[MAX_SCHED_STATE_STRING_LEN]; -+ ssize_t ret = count; ++ kbdev->pm.backend.policy_change_clamp_state_to_off = sched_suspend; ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ CSTD_UNUSED(ppos); ++ if (sched_suspend) ++ reset_gpu = policy_change_wait_for_L2_off(kbdev); ++#endif + -+ count = min_t(size_t, sizeof(buf) - 1, count); -+ if (copy_from_user(buf, ubuf, count)) -+ return -EFAULT; ++ /* During a policy change we pretend the GPU is active */ ++ /* A suspend won't happen here, because we're in a syscall from a ++ * userspace thread ++ */ ++ kbase_pm_context_active(kbdev); + -+ buf[count] = 0; ++ kbase_pm_lock(kbdev); + -+ if (sysfs_streq(buf, "SUSPENDED")) -+ kbase_csf_scheduler_pm_suspend(kbdev); -+#ifdef KBASE_PM_RUNTIME -+ else if (sysfs_streq(buf, "SLEEPING")) -+ kbase_csf_scheduler_force_sleep(kbdev); ++ /* Remove the policy to prevent IRQ handlers from working on it */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ old_policy = kbdev->pm.backend.pm_current_policy; ++ kbdev->pm.backend.pm_current_policy = NULL; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ KBASE_KTRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, old_policy->id); ++ if (old_policy->term) ++ old_policy->term(kbdev); ++ ++ memset(&kbdev->pm.backend.pm_policy_data, 0, ++ sizeof(union kbase_pm_policy_data)); ++ ++ KBASE_KTRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, new_policy->id); ++ if (new_policy->init) ++ new_policy->init(kbdev); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->pm.backend.pm_current_policy = new_policy; ++#if MALI_USE_CSF ++ kbdev->pm.backend.csf_pm_sched_flags = new_policy_csf_pm_sched_flags; ++ /* New policy in place, release the clamping on mcu/L2 off state */ ++ kbdev->pm.backend.policy_change_clamp_state_to_off = false; ++ kbase_pm_update_state(kbdev); +#endif -+ else if (sysfs_streq(buf, "INACTIVE")) -+ kbase_csf_scheduler_force_wakeup(kbdev); -+ else { -+ dev_dbg(kbdev->dev, "Bad scheduler state %s", buf); -+ ret = -EINVAL; -+ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ return ret; -+} ++ /* If any core power state changes were previously attempted, but ++ * couldn't be made because the policy was changing (current_policy was ++ * NULL), then re-try them here. ++ */ ++ kbase_pm_update_active(kbdev); ++ kbase_pm_update_cores_state(kbdev); + -+static const struct file_operations kbasep_csf_debugfs_scheduler_state_fops = { -+ .owner = THIS_MODULE, -+ .read = kbase_csf_debugfs_scheduler_state_get, -+ .write = kbase_csf_debugfs_scheduler_state_set, -+ .open = simple_open, -+ .llseek = default_llseek, -+}; ++ kbase_pm_unlock(kbdev); + -+void kbase_csf_debugfs_init(struct kbase_device *kbdev) -+{ -+ debugfs_create_file("active_groups", 0444, -+ kbdev->mali_debugfs_directory, kbdev, -+ &kbasep_csf_active_queue_groups_debugfs_fops); ++ /* Now the policy change is finished, we release our fake context active ++ * reference ++ */ ++ kbase_pm_context_idle(kbdev); + -+ debugfs_create_file("scheduling_timer_enabled", 0644, -+ kbdev->mali_debugfs_directory, kbdev, -+ &kbasep_csf_debugfs_scheduling_timer_enabled_fops); -+ debugfs_create_file("scheduling_timer_kick", 0200, -+ kbdev->mali_debugfs_directory, kbdev, -+ &kbasep_csf_debugfs_scheduling_timer_kick_fops); -+ debugfs_create_file("scheduler_state", 0644, -+ kbdev->mali_debugfs_directory, kbdev, -+ &kbasep_csf_debugfs_scheduler_state_fops); ++#if MALI_USE_CSF ++ /* Reverse the suspension done */ ++ if (sched_suspend) ++ kbase_csf_scheduler_pm_resume_no_lock(kbdev); ++ mutex_unlock(&scheduler->lock); + -+ kbase_csf_tl_reader_debugfs_init(kbdev); -+} ++ if (reset_op_prevented) ++ kbase_reset_gpu_allow(kbdev); + -+#else -+/* -+ * Stub functions for when debugfs is disabled -+ */ -+void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx) -+{ -+} ++ if (reset_gpu) { ++ dev_warn(kbdev->dev, "Resorting to GPU reset for policy change\n"); ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(kbdev); ++ kbase_reset_gpu_wait(kbdev); ++ } + -+void kbase_csf_debugfs_init(struct kbase_device *kbdev) -+{ ++ mutex_unlock(&kbdev->pm.backend.policy_change_lock); ++#endif +} + -+#endif /* CONFIG_DEBUG_FS */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h ++KBASE_EXPORT_TEST_API(kbase_pm_set_policy); +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.h new file mode 100644 -index 000000000..16a548bf8 +index 000000000..e8113659b --- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h -@@ -0,0 +1,54 @@ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.h +@@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2015, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -144019,49 +144951,100 @@ index 000000000..16a548bf8 + * + */ + -+#ifndef _KBASE_CSF_CSG_DEBUGFS_H_ -+#define _KBASE_CSF_CSG_DEBUGFS_H_ ++/* ++ * Power policy API definitions ++ */ + -+/* Forward declarations */ -+struct kbase_device; -+struct kbase_context; -+struct kbase_queue_group; ++#ifndef _KBASE_PM_POLICY_H_ ++#define _KBASE_PM_POLICY_H_ + -+#define MALI_CSF_CSG_DEBUGFS_VERSION 0 ++/** ++ * kbase_pm_policy_init - Initialize power policy framework ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Must be called before calling any other policy function ++ */ ++void kbase_pm_policy_init(struct kbase_device *kbdev); + +/** -+ * kbase_csf_queue_group_debugfs_init() - Add debugfs entry for queue groups -+ * associated with @kctx. ++ * kbase_pm_policy_term - Terminate power policy framework + * -+ * @kctx: Pointer to kbase_context ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ -+void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx); ++void kbase_pm_policy_term(struct kbase_device *kbdev); + +/** -+ * kbase_csf_debugfs_init() - Add a global debugfs entry for queue groups ++ * kbase_pm_update_active - Update the active power state of the GPU + * -+ * @kbdev: Pointer to the device ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Calls into the current power policy + */ -+void kbase_csf_debugfs_init(struct kbase_device *kbdev); ++void kbase_pm_update_active(struct kbase_device *kbdev); + +/** -+ * kbase_csf_debugfs_update_active_groups_status() - Update on-slot group statuses ++ * kbase_pm_update_cores - Update the desired core state of the GPU + * -+ * @kbdev: Pointer to the device ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Calls into the current power policy + */ -+void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev); ++void kbase_pm_update_cores(struct kbase_device *kbdev); + -+#endif /* _KBASE_CSF_CSG_DEBUGFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h ++/** ++ * kbase_pm_cores_requested - Check that a power request has been locked into ++ * the HW. ++ * @kbdev: Kbase device ++ * @shader_required: true if shaders are required ++ * ++ * Called by the scheduler to check if a power on request has been locked into ++ * the HW. ++ * ++ * Note that there is no guarantee that the cores are actually ready, however ++ * when the request has been locked into the HW, then it is safe to submit work ++ * since the HW will wait for the transition to ready. ++ * ++ * A reference must first be taken prior to making this call. ++ * ++ * Caller must hold the hwaccess_lock. ++ * ++ * Return: true if the request to the HW was successfully made else false if the ++ * request is still pending. ++ */ ++static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev, ++ bool shader_required) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ /* If the L2 & tiler are not on or pending, then the tiler is not yet ++ * available, and shaders are definitely not powered. ++ */ ++ if (kbdev->pm.backend.l2_state != KBASE_L2_PEND_ON && ++ kbdev->pm.backend.l2_state != KBASE_L2_ON && ++ kbdev->pm.backend.l2_state != KBASE_L2_ON_HWCNT_ENABLE) ++ return false; ++ ++ if (shader_required && ++ kbdev->pm.backend.shaders_state != KBASE_SHADERS_PEND_ON_CORESTACK_ON && ++ kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON && ++ kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON_RECHECK) ++ return false; ++ ++ return true; ++} ++ ++#endif /* _KBASE_PM_POLICY_H_ */ +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_shader_states.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_shader_states.h new file mode 100644 -index 000000000..6fa0e27d6 +index 000000000..8622ef78d --- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h -@@ -0,0 +1,1666 @@ ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_shader_states.h +@@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -144079,1661 +145062,1169 @@ index 000000000..6fa0e27d6 + * + */ + -+/* Definitions (types, defines, etcs) common to the CSF. -+ * They are placed here to allow the hierarchy of header files to work. -+ */ -+ -+#ifndef _KBASE_CSF_DEFS_H_ -+#define _KBASE_CSF_DEFS_H_ -+ -+#include -+#include -+ -+#include "mali_kbase_csf_firmware.h" -+#include "mali_kbase_refcount_defs.h" -+#include "mali_kbase_csf_event.h" -+#include -+ -+#if IS_ENABLED(CONFIG_MALI_CORESIGHT) -+#include -+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ -+ -+/* Maximum number of KCPU command queues to be created per GPU address space. -+ */ -+#define KBASEP_MAX_KCPU_QUEUES ((size_t)256) -+ -+/* Maximum number of GPU command queue groups to be created per GPU address -+ * space. -+ */ -+#define MAX_QUEUE_GROUP_NUM (256) -+ -+/* Maximum number of GPU tiler heaps to allow to be created per GPU address -+ * space. -+ */ -+#define MAX_TILER_HEAPS (128) -+ -+#define CSF_FIRMWARE_ENTRY_READ (1ul << 0) -+#define CSF_FIRMWARE_ENTRY_WRITE (1ul << 1) -+#define CSF_FIRMWARE_ENTRY_EXECUTE (1ul << 2) -+#define CSF_FIRMWARE_ENTRY_CACHE_MODE (3ul << 3) -+#define CSF_FIRMWARE_ENTRY_PROTECTED (1ul << 5) -+#define CSF_FIRMWARE_ENTRY_SHARED (1ul << 30) -+#define CSF_FIRMWARE_ENTRY_ZERO (1ul << 31) -+ -+/** -+ * enum kbase_csf_queue_bind_state - bind state of the queue ++/* ++ * Backend-specific Power Manager shader core state definitions. ++ * The function-like macro KBASEP_SHADER_STATE() must be defined before ++ * including this header file. This header file can be included multiple ++ * times in the same compilation unit with different definitions of ++ * KBASEP_SHADER_STATE(). + * -+ * @KBASE_CSF_QUEUE_UNBOUND: Set when the queue is registered or when the link -+ * between queue and the group to which it was bound or being bound is removed. -+ * @KBASE_CSF_QUEUE_BIND_IN_PROGRESS: Set when the first part of bind operation -+ * has completed i.e. CS_QUEUE_BIND ioctl. -+ * @KBASE_CSF_QUEUE_BOUND: Set when the bind operation has completed i.e. IO -+ * pages have been mapped in the process address space. ++ * @OFF_CORESTACK_OFF: The shaders and core stacks are off ++ * @OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have been ++ * requested to power on and hwcnt is being ++ * disabled ++ * @PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been ++ * requested to power on. Or after doing ++ * partial shader on/off, checking whether ++ * it's the desired state. ++ * @ON_CORESTACK_ON: The shaders and core stacks are on, and ++ * hwcnt already enabled. ++ * @ON_CORESTACK_ON_RECHECK: The shaders and core stacks are on, hwcnt ++ * disabled, and checks to powering down or ++ * re-enabling hwcnt. ++ * @WAIT_OFF_CORESTACK_ON: The shaders have been requested to power ++ * off, but they remain on for the duration ++ * of the hysteresis timer ++ * @WAIT_GPU_IDLE: The shaders partial poweroff needs to ++ * reach a state where jobs on the GPU are ++ * finished including jobs currently running ++ * and in the GPU queue because of ++ * GPU2017-861 ++ * @WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired ++ * @L2_FLUSHING_CORESTACK_ON: The core stacks are on and the level 2 ++ * cache is being flushed. ++ * @READY_OFF_CORESTACK_ON: The core stacks are on and the shaders are ++ * ready to be powered off. ++ * @PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders ++ * have been requested to power off ++ * @OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks ++ * have been requested to power off ++ * @OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are off, but the ++ * tick timer cancellation is still pending. ++ * @RESET_WAIT: The GPU is resetting, shader and core ++ * stack power states are unknown + */ -+enum kbase_csf_queue_bind_state { -+ KBASE_CSF_QUEUE_UNBOUND, -+ KBASE_CSF_QUEUE_BIND_IN_PROGRESS, -+ KBASE_CSF_QUEUE_BOUND, -+}; -+ -+/** -+ * enum kbase_csf_reset_gpu_state - state of the gpu reset -+ * -+ * @KBASE_CSF_RESET_GPU_NOT_PENDING: Set when the GPU reset isn't pending -+ * -+ * @KBASE_CSF_RESET_GPU_PREPARED: Set when kbase_prepare_to_reset_gpu() has -+ * been called. This is just for debugging checks to encourage callers to call -+ * kbase_prepare_to_reset_gpu() before kbase_reset_gpu(). ++KBASEP_SHADER_STATE(OFF_CORESTACK_OFF) ++KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_ON) ++KBASEP_SHADER_STATE(PEND_ON_CORESTACK_ON) ++KBASEP_SHADER_STATE(ON_CORESTACK_ON) ++KBASEP_SHADER_STATE(ON_CORESTACK_ON_RECHECK) ++KBASEP_SHADER_STATE(WAIT_OFF_CORESTACK_ON) ++#if !MALI_USE_CSF ++KBASEP_SHADER_STATE(WAIT_GPU_IDLE) ++#endif /* !MALI_USE_CSF */ ++KBASEP_SHADER_STATE(WAIT_FINISHED_CORESTACK_ON) ++KBASEP_SHADER_STATE(L2_FLUSHING_CORESTACK_ON) ++KBASEP_SHADER_STATE(READY_OFF_CORESTACK_ON) ++KBASEP_SHADER_STATE(PEND_OFF_CORESTACK_ON) ++KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_OFF) ++KBASEP_SHADER_STATE(OFF_CORESTACK_OFF_TIMER_PEND_OFF) ++KBASEP_SHADER_STATE(RESET_WAIT) +diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c +new file mode 100644 +index 000000000..1b3346179 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c +@@ -0,0 +1,279 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * -+ * @KBASE_CSF_RESET_GPU_COMMITTED: Set when the GPU reset process has been -+ * committed and so will definitely happen, but the procedure to reset the GPU -+ * has not yet begun. Other threads must finish accessing the HW before we -+ * reach %KBASE_CSF_RESET_GPU_HAPPENING. ++ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * -+ * @KBASE_CSF_RESET_GPU_HAPPENING: Set when the GPU reset process is occurring -+ * (silent or otherwise), and is actively accessing the HW. Any changes to the -+ * HW in other threads might get lost, overridden, or corrupted. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * @KBASE_CSF_RESET_GPU_COMMITTED_SILENT: Set when the GPU reset process has -+ * been committed but has not started happening. This is used when resetting -+ * the GPU as part of normal behavior (e.g. when exiting protected mode). -+ * Other threads must finish accessing the HW before we reach -+ * %KBASE_CSF_RESET_GPU_HAPPENING. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + * -+ * @KBASE_CSF_RESET_GPU_FAILED: Set when an error is encountered during the -+ * GPU reset process. No more work could then be executed on GPU, unloading -+ * the Driver module is the only option. -+ */ -+enum kbase_csf_reset_gpu_state { -+ KBASE_CSF_RESET_GPU_NOT_PENDING, -+ KBASE_CSF_RESET_GPU_PREPARED, -+ KBASE_CSF_RESET_GPU_COMMITTED, -+ KBASE_CSF_RESET_GPU_HAPPENING, -+ KBASE_CSF_RESET_GPU_COMMITTED_SILENT, -+ KBASE_CSF_RESET_GPU_FAILED, -+}; -+ -+/** -+ * enum kbase_csf_group_state - state of the GPU command queue group ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * @KBASE_CSF_GROUP_INACTIVE: Group is inactive and won't be -+ * considered by scheduler for running on -+ * CSG slot. -+ * @KBASE_CSF_GROUP_RUNNABLE: Group is in the list of runnable groups -+ * and is subjected to time-slice based -+ * scheduling. A start request would be -+ * sent (or already has been sent) if the -+ * group is assigned the CS -+ * group slot for the fist time. -+ * @KBASE_CSF_GROUP_IDLE: Group is currently on a CSG slot -+ * but all the CSs bound to the group have -+ * become either idle or waiting on sync -+ * object. -+ * Group could be evicted from the slot on -+ * the next tick if there are no spare -+ * slots left after scheduling non-idle -+ * queue groups. If the group is kept on -+ * slot then it would be moved to the -+ * RUNNABLE state, also if one of the -+ * queues bound to the group is kicked it -+ * would be moved to the RUNNABLE state. -+ * If the group is evicted from the slot it -+ * would be moved to either -+ * KBASE_CSF_GROUP_SUSPENDED_ON_IDLE or -+ * KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC -+ * state. -+ * @KBASE_CSF_GROUP_SUSPENDED: Group was evicted from the CSG slot -+ * and is not running but is still in the -+ * list of runnable groups and subjected -+ * to time-slice based scheduling. A resume -+ * request would be sent when a CSG slot is -+ * re-assigned to the group and once the -+ * resume is complete group would be moved -+ * back to the RUNNABLE state. -+ * @KBASE_CSF_GROUP_SUSPENDED_ON_IDLE: Same as KBASE_CSF_GROUP_SUSPENDED except -+ * that queue group also became idle before -+ * the suspension. This state helps -+ * Scheduler avoid scheduling the idle -+ * groups over the non-idle groups in the -+ * subsequent ticks. If one of the queues -+ * bound to the group is kicked it would be -+ * moved to the SUSPENDED state. -+ * @KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC: Same as GROUP_SUSPENDED_ON_IDLE -+ * except that at least one CS -+ * bound to this group was -+ * waiting for synchronization object -+ * before the suspension. -+ * @KBASE_CSF_GROUP_FAULT_EVICTED: Group is evicted from the scheduler due -+ * to a fault condition, pending to be -+ * terminated. -+ * @KBASE_CSF_GROUP_TERMINATED: Group is no longer schedulable and is -+ * pending to be deleted by Client, all the -+ * queues bound to it have been unbound. + */ -+enum kbase_csf_group_state { -+ KBASE_CSF_GROUP_INACTIVE, -+ KBASE_CSF_GROUP_RUNNABLE, -+ KBASE_CSF_GROUP_IDLE, -+ KBASE_CSF_GROUP_SUSPENDED, -+ KBASE_CSF_GROUP_SUSPENDED_ON_IDLE, -+ KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC, -+ KBASE_CSF_GROUP_FAULT_EVICTED, -+ KBASE_CSF_GROUP_TERMINATED, -+}; + -+/** -+ * enum kbase_csf_csg_slot_state - state of the command queue group slots under -+ * the scheduler control. -+ * -+ * @CSG_SLOT_READY: The slot is clean and ready to be programmed with a -+ * queue group. -+ * @CSG_SLOT_READY2RUN: The slot has been programmed with a queue group, i.e. a -+ * start or resume request has been sent to the firmware. -+ * @CSG_SLOT_RUNNING: The queue group is running on the slot, acknowledgment -+ * of a start or resume request has been obtained from the -+ * firmware. -+ * @CSG_SLOT_DOWN2STOP: The suspend or terminate request for the queue group on -+ * the slot has been sent to the firmware. -+ * @CSG_SLOT_STOPPED: The queue group is removed from the slot, acknowledgment -+ * of suspend or terminate request has been obtained from -+ * the firmware. -+ * @CSG_SLOT_READY2RUN_TIMEDOUT: The start or resume request sent on the slot -+ * for the queue group timed out. -+ * @CSG_SLOT_DOWN2STOP_TIMEDOUT: The suspend or terminate request for queue -+ * group on the slot timed out. -+ */ -+enum kbase_csf_csg_slot_state { -+ CSG_SLOT_READY, -+ CSG_SLOT_READY2RUN, -+ CSG_SLOT_RUNNING, -+ CSG_SLOT_DOWN2STOP, -+ CSG_SLOT_STOPPED, -+ CSG_SLOT_READY2RUN_TIMEDOUT, -+ CSG_SLOT_DOWN2STOP_TIMEDOUT, -+}; ++#include ++#include ++#if MALI_USE_CSF ++#include ++#include ++#include ++#endif ++#include ++#include ++#include + -+/** -+ * enum kbase_csf_scheduler_state - state of the scheduler operational phases. -+ * -+ * @SCHED_BUSY: The scheduler is busy performing on tick schedule -+ * operations, the state of CSG slots -+ * can't be changed. -+ * @SCHED_INACTIVE: The scheduler is inactive, it is allowed to modify the -+ * state of CSG slots by in-cycle -+ * priority scheduling. -+ * @SCHED_SUSPENDED: The scheduler is in low-power mode with scheduling -+ * operations suspended and is not holding the power -+ * management reference. This can happen if the GPU -+ * becomes idle for a duration exceeding a threshold, -+ * or due to a system triggered suspend action. -+ * @SCHED_SLEEPING: The scheduler is in low-power mode with scheduling -+ * operations suspended and is not holding the power -+ * management reference. This state is set, only for the -+ * GPUs that supports the sleep feature, when GPU idle -+ * notification is received. The state is changed to -+ * @SCHED_SUSPENDED from the runtime suspend callback -+ * function after the suspend of CSGs. -+ */ -+enum kbase_csf_scheduler_state { -+ SCHED_BUSY, -+ SCHED_INACTIVE, -+ SCHED_SUSPENDED, -+ SCHED_SLEEPING, -+}; ++void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, ++ u64 *cycle_counter, ++ u64 *system_time, ++ struct timespec64 *ts) ++{ ++ u32 hi1, hi2; + -+/** -+ * enum kbase_queue_group_priority - Kbase internal relative priority list. -+ * -+ * @KBASE_QUEUE_GROUP_PRIORITY_REALTIME: The realtime queue group priority. -+ * @KBASE_QUEUE_GROUP_PRIORITY_HIGH: The high queue group priority. -+ * @KBASE_QUEUE_GROUP_PRIORITY_MEDIUM: The medium queue group priority. -+ * @KBASE_QUEUE_GROUP_PRIORITY_LOW: The low queue group priority. -+ * @KBASE_QUEUE_GROUP_PRIORITY_COUNT: The number of priority levels. -+ */ -+enum kbase_queue_group_priority { -+ KBASE_QUEUE_GROUP_PRIORITY_REALTIME = 0, -+ KBASE_QUEUE_GROUP_PRIORITY_HIGH, -+ KBASE_QUEUE_GROUP_PRIORITY_MEDIUM, -+ KBASE_QUEUE_GROUP_PRIORITY_LOW, -+ KBASE_QUEUE_GROUP_PRIORITY_COUNT -+}; ++ if (cycle_counter) ++ *cycle_counter = kbase_backend_get_cycle_cnt(kbdev); + -+/** -+ * enum kbase_timeout_selector - The choice of which timeout to get scaled -+ * using the lowest GPU frequency. -+ * @CSF_FIRMWARE_TIMEOUT: Response timeout from CSF firmware. -+ * @CSF_PM_TIMEOUT: Timeout for GPU Power Management to reach the desired -+ * Shader, L2 and MCU state. -+ * @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete. -+ * @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for all active CSGs to be suspended. -+ * @CSF_FIRMWARE_BOOT_TIMEOUT: Maximum time to wait for firmware to boot. -+ * @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond -+ * to a ping from KBase. -+ * @CSF_SCHED_PROTM_PROGRESS_TIMEOUT: Timeout used to prevent protected mode execution hang. -+ * @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion -+ * of a MMU operation -+ * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in -+ * the enum. -+ */ -+enum kbase_timeout_selector { -+ CSF_FIRMWARE_TIMEOUT, -+ CSF_PM_TIMEOUT, -+ CSF_GPU_RESET_TIMEOUT, -+ CSF_CSG_SUSPEND_TIMEOUT, -+ CSF_FIRMWARE_BOOT_TIMEOUT, -+ CSF_FIRMWARE_PING_TIMEOUT, -+ CSF_SCHED_PROTM_PROGRESS_TIMEOUT, -+ MMU_AS_INACTIVE_WAIT_TIMEOUT, ++ if (system_time) { ++ /* Read hi, lo, hi to ensure a coherent u64 */ ++ do { ++ hi1 = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(TIMESTAMP_HI)); ++ *system_time = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(TIMESTAMP_LO)); ++ hi2 = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(TIMESTAMP_HI)); ++ } while (hi1 != hi2); ++ *system_time |= (((u64) hi1) << 32); ++ } + -+ /* Must be the last in the enum */ -+ KBASE_TIMEOUT_SELECTOR_COUNT -+}; ++ /* Record the CPU's idea of current time */ ++ if (ts != NULL) ++#if (KERNEL_VERSION(4, 17, 0) > LINUX_VERSION_CODE) ++ *ts = ktime_to_timespec64(ktime_get_raw()); ++#else ++ ktime_get_raw_ts64(ts); ++#endif ++} + ++#if !MALI_USE_CSF +/** -+ * struct kbase_csf_notification - Event or error generated as part of command -+ * queue execution ++ * timedwait_cycle_count_active() - Timed wait till CYCLE_COUNT_ACTIVE is active + * -+ * @data: Event or error data returned to userspace -+ * @link: Link to the linked list, &struct_kbase_csf_context.error_list. -+ */ -+struct kbase_csf_notification { -+ struct base_csf_notification data; -+ struct list_head link; -+}; -+ -+/** -+ * struct kbase_queue - Object representing a GPU command queue. ++ * @kbdev: Kbase device + * -+ * @kctx: Pointer to the base context with which this GPU command queue -+ * is associated. -+ * @user_io_gpu_va: The start GPU VA address of this queue's userio pages. Only -+ * valid (i.e. not 0 ) when the queue is enabled and its owner -+ * group has a runtime bound csg_reg (group region). -+ * @phys: Pointer to the physical pages allocated for the -+ * pair or User mode input/output page -+ * @user_io_addr: Pointer to the permanent kernel mapping of User mode -+ * input/output pages. The pages can be accessed through -+ * the mapping without any cache maintenance. -+ * @handle: Handle returned with bind ioctl for creating a -+ * contiguous User mode mapping of input/output pages & -+ * the hardware doorbell page. -+ * @doorbell_nr: Index of the hardware doorbell page assigned to the -+ * queue. -+ * @db_file_offset: File offset value that is assigned to userspace mapping -+ * created on bind to access the doorbell page. -+ * It is in page units. -+ * @link: Link to the linked list of GPU command queues created per -+ * GPU address space. -+ * @refcount: Reference count, stands for the number of times the queue -+ * has been referenced. The reference is taken when it is -+ * created, when it is bound to the group and also when the -+ * @oom_event_work work item is queued -+ * for it. -+ * @group: Pointer to the group to which this queue is bound. -+ * @queue_reg: Pointer to the VA region allocated for CS buffer. -+ * @oom_event_work: Work item corresponding to the out of memory event for -+ * chunked tiler heap being used for this queue. -+ * @base_addr: Base address of the CS buffer. -+ * @size: Size of the CS buffer. -+ * @priority: Priority of this queue within the group. -+ * @bind_state: Bind state of the queue as enum @kbase_csf_queue_bind_state -+ * @csi_index: The ID of the assigned CS hardware interface. -+ * @enabled: Indicating whether the CS is running, or not. -+ * @status_wait: Value of CS_STATUS_WAIT register of the CS will -+ * be kept when the CS gets blocked by sync wait. -+ * CS_STATUS_WAIT provides information on conditions queue is -+ * blocking on. This is set when the group, to which queue is -+ * bound, is suspended after getting blocked, i.e. in -+ * KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC state. -+ * @sync_ptr: Value of CS_STATUS_WAIT_SYNC_POINTER register of the CS -+ * will be kept when the CS gets blocked by -+ * sync wait. CS_STATUS_WAIT_SYNC_POINTER contains the address -+ * of synchronization object being waited on. -+ * Valid only when @status_wait is set. -+ * @sync_value: Value of CS_STATUS_WAIT_SYNC_VALUE register of the CS -+ * will be kept when the CS gets blocked by -+ * sync wait. CS_STATUS_WAIT_SYNC_VALUE contains the value -+ * tested against the synchronization object. -+ * Valid only when @status_wait is set. -+ * @sb_status: Value indicates which of the scoreboard entries in the queue -+ * are non-zero -+ * @blocked_reason: Value shows if the queue is blocked, and if so, -+ * the reason why it is blocked -+ * @trace_buffer_base: CS trace buffer base address. -+ * @trace_offset_ptr: Pointer to the CS trace buffer offset variable. -+ * @trace_buffer_size: CS trace buffer size for the queue. -+ * @trace_cfg: CS trace configuration parameters. -+ * @error: GPU command queue fatal information to pass to user space. -+ * @cs_error_work: Work item to handle the CS fatal event reported for this -+ * queue or the CS fault event if dump on fault is enabled -+ * and acknowledgment for CS fault event needs to be done -+ * after dumping is complete. -+ * @cs_error_info: Records additional information about the CS fatal event or -+ * about CS fault event if dump on fault is enabled. -+ * @cs_error: Records information about the CS fatal event or -+ * about CS fault event if dump on fault is enabled. -+ * @cs_error_fatal: Flag to track if the CS fault or CS fatal event occurred. -+ * @pending: Indicating whether the queue has new submitted work. -+ * @extract_ofs: The current EXTRACT offset, this is only updated when handling -+ * the GLB IDLE IRQ if the idle timeout value is non-0 in order -+ * to help detect a queue's true idle status. -+ * @saved_cmd_ptr: The command pointer value for the GPU queue, saved when the -+ * group to which queue is bound is suspended. -+ * This can be useful in certain cases to know that till which -+ * point the execution reached in the Linear command buffer. ++ * Return: true if CYCLE_COUNT_ACTIVE is active within the timeout. + */ -+struct kbase_queue { -+ struct kbase_context *kctx; -+ u64 user_io_gpu_va; -+ struct tagged_addr phys[2]; -+ char *user_io_addr; -+ u64 handle; -+ int doorbell_nr; -+ unsigned long db_file_offset; -+ struct list_head link; -+ kbase_refcount_t refcount; -+ struct kbase_queue_group *group; -+ struct kbase_va_region *queue_reg; -+ struct work_struct oom_event_work; -+ u64 base_addr; -+ u32 size; -+ u8 priority; -+ s8 csi_index; -+ enum kbase_csf_queue_bind_state bind_state; -+ bool enabled; -+ u32 status_wait; -+ u64 sync_ptr; -+ u32 sync_value; -+ u32 sb_status; -+ u32 blocked_reason; -+ u64 trace_buffer_base; -+ u64 trace_offset_ptr; -+ u32 trace_buffer_size; -+ u32 trace_cfg; -+ struct kbase_csf_notification error; -+ struct work_struct cs_error_work; -+ u64 cs_error_info; -+ u32 cs_error; -+ bool cs_error_fatal; -+ atomic_t pending; -+ u64 extract_ofs; -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ u64 saved_cmd_ptr; -+#endif /* CONFIG_DEBUG_FS */ -+}; ++static bool timedwait_cycle_count_active(struct kbase_device *kbdev) ++{ ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ return true; ++#else ++ bool success = false; ++ const unsigned int timeout = 100; ++ const unsigned long remaining = jiffies + msecs_to_jiffies(timeout); + -+/** -+ * struct kbase_normal_suspend_buffer - Object representing a normal -+ * suspend buffer for queue group. -+ * @gpu_va: The start GPU VA address of the bound suspend buffer. Note, this -+ * field is only valid when the owner group has a region bound at -+ * runtime. -+ * @phy: Array of physical memory pages allocated for the normal- -+ * mode suspend buffer. -+ */ -+struct kbase_normal_suspend_buffer { -+ u64 gpu_va; -+ struct tagged_addr *phy; -+}; ++ while (time_is_after_jiffies(remaining)) { ++ if ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & ++ GPU_STATUS_CYCLE_COUNT_ACTIVE)) { ++ success = true; ++ break; ++ } ++ } ++ return success; ++#endif ++} ++#endif + -+/** -+ * struct kbase_protected_suspend_buffer - Object representing a protected -+ * suspend buffer for queue group. -+ * @gpu_va: The start GPU VA address of the bound protected mode suspend buffer. -+ * Note, this field is only valid when the owner group has a region -+ * bound at runtime. -+ * @pma: Array of pointer to protected mode allocations containing -+ * information about memory pages allocated for protected mode -+ * suspend buffer. -+ * @alloc_retries: Number of times we retried allocing physical pages -+ * for protected suspend buffers. -+ */ -+struct kbase_protected_suspend_buffer { -+ u64 gpu_va; -+ struct protected_memory_allocation **pma; -+ u8 alloc_retries; -+}; ++void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, ++ u64 *system_time, struct timespec64 *ts) ++{ ++#if !MALI_USE_CSF ++ kbase_pm_request_gpu_cycle_counter(kbdev); ++ WARN_ONCE(kbdev->pm.backend.l2_state != KBASE_L2_ON, ++ "L2 not powered up"); ++ WARN_ONCE((!timedwait_cycle_count_active(kbdev)), ++ "Timed out on CYCLE_COUNT_ACTIVE"); ++#endif ++ kbase_backend_get_gpu_time_norequest(kbdev, cycle_counter, system_time, ++ ts); ++#if !MALI_USE_CSF ++ kbase_pm_release_gpu_cycle_counter(kbdev); ++#endif ++} + -+/** -+ * struct kbase_queue_group - Object representing a GPU command queue group. -+ * -+ * @kctx: Pointer to the kbase context with which this queue group -+ * is associated. -+ * @normal_suspend_buf: Object representing the normal suspend buffer. -+ * Normal-mode suspend buffer that is used for -+ * group context switch. -+ * @protected_suspend_buf: Object representing the protected suspend -+ * buffer. Protected-mode suspend buffer that is -+ * used for group context switch. -+ * @handle: Handle which identifies this queue group. -+ * @csg_nr: Number/index of the CSG to which this queue group is -+ * mapped; KBASEP_CSG_NR_INVALID indicates that the queue -+ * group is not scheduled. -+ * @priority: Priority of the queue group, 0 being the highest, -+ * BASE_QUEUE_GROUP_PRIORITY_COUNT - 1 being the lowest. -+ * @tiler_max: Maximum number of tiler endpoints the group is allowed -+ * to use. -+ * @fragment_max: Maximum number of fragment endpoints the group is -+ * allowed to use. -+ * @compute_max: Maximum number of compute endpoints the group is -+ * allowed to use. -+ * @csi_handlers: Requested CSI exception handler flags for the group. -+ * @tiler_mask: Mask of tiler endpoints the group is allowed to use. -+ * @fragment_mask: Mask of fragment endpoints the group is allowed to use. -+ * @compute_mask: Mask of compute endpoints the group is allowed to use. -+ * @group_uid: 32-bit wide unsigned identifier for the group, unique -+ * across all kbase devices and contexts. -+ * @link: Link to this queue group in the 'runnable_groups' list of -+ * the corresponding kctx. -+ * @link_to_schedule: Link to this queue group in the list of prepared groups -+ * to be scheduled, if the group is runnable/suspended. -+ * If the group is idle or waiting for CQS, it would be a -+ * link to the list of idle/blocked groups list. -+ * @run_state: Current state of the queue group. -+ * @prepared_seq_num: Indicates the position of queue group in the list of -+ * prepared groups to be scheduled. -+ * @scan_seq_num: Scan out sequence number before adjusting for dynamic -+ * idle conditions. It is used for setting a group's -+ * onslot priority. It could differ from prepared_seq_number -+ * when there are idle groups. -+ * @faulted: Indicates that a GPU fault occurred for the queue group. -+ * This flag persists until the fault has been queued to be -+ * reported to userspace. -+ * @cs_unrecoverable: Flag to unblock the thread waiting for CSG termination in -+ * case of CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE -+ * @reevaluate_idle_status : Flag set when work is submitted for the normal group -+ * or it becomes unblocked during protected mode. The -+ * flag helps Scheduler confirm if the group actually -+ * became non idle or not. -+ * @bound_queues: Array of registered queues bound to this queue group. -+ * @doorbell_nr: Index of the hardware doorbell page assigned to the -+ * group. -+ * @protm_event_work: Work item corresponding to the protected mode entry -+ * event for this queue. -+ * @protm_pending_bitmap: Bit array to keep a track of CSs that -+ * have pending protected mode entry requests. -+ * @error_fatal: An error of type BASE_GPU_QUEUE_GROUP_ERROR_FATAL to be -+ * returned to userspace if such an error has occurred. -+ * @error_timeout: An error of type BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT -+ * to be returned to userspace if such an error has occurred. -+ * @error_tiler_oom: An error of type BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM -+ * to be returned to userspace if such an error has occurred. -+ * @timer_event_work: Work item to handle the progress timeout fatal event -+ * for the group. -+ * @deschedule_deferred_cnt: Counter keeping a track of the number of threads -+ * that tried to deschedule the group and had to defer -+ * the descheduling due to the dump on fault. -+ * @csg_reg: An opaque pointer to the runtime bound shared regions. It is -+ * dynamically managed by the scheduler and can be NULL if the -+ * group is off-slot. -+ * @csg_reg_bind_retries: Runtime MCU shared region map operation attempted counts. -+ * It is accumulated on consecutive mapping attempt failures. On -+ * reaching a preset limit, the group is regarded as suffered -+ * a fatal error and triggers a fatal error notification. -+ */ -+struct kbase_queue_group { -+ struct kbase_context *kctx; -+ struct kbase_normal_suspend_buffer normal_suspend_buf; -+ struct kbase_protected_suspend_buffer protected_suspend_buf; -+ u8 handle; -+ s8 csg_nr; -+ u8 priority; ++unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, ++ enum kbase_timeout_selector selector) ++{ ++ /* Timeout calculation: ++ * dividing number of cycles by freq in KHz automatically gives value ++ * in milliseconds. nr_cycles will have to be multiplied by 1e3 to ++ * get result in microseconds, and 1e6 to get result in nanoseconds. ++ */ + -+ u8 tiler_max; -+ u8 fragment_max; -+ u8 compute_max; -+ u8 csi_handlers; ++ u64 timeout, nr_cycles = 0; ++ u64 freq_khz; + -+ u64 tiler_mask; -+ u64 fragment_mask; -+ u64 compute_mask; ++ /* Only for debug messages, safe default in case it's mis-maintained */ ++ const char *selector_str = "(unknown)"; + -+ u32 group_uid; ++ if (!kbdev->lowest_gpu_freq_khz) { ++ dev_dbg(kbdev->dev, ++ "Lowest frequency uninitialized! Using reference frequency for scaling"); ++ freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ; ++ } else { ++ freq_khz = kbdev->lowest_gpu_freq_khz; ++ } + -+ struct list_head link; -+ struct list_head link_to_schedule; -+ enum kbase_csf_group_state run_state; -+ u32 prepared_seq_num; -+ u32 scan_seq_num; -+ bool faulted; -+ bool cs_unrecoverable; -+ bool reevaluate_idle_status; ++ switch (selector) { ++ case MMU_AS_INACTIVE_WAIT_TIMEOUT: ++ selector_str = "MMU_AS_INACTIVE_WAIT_TIMEOUT"; ++ nr_cycles = MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES; ++ break; ++ case KBASE_TIMEOUT_SELECTOR_COUNT: ++ default: ++#if !MALI_USE_CSF ++ WARN(1, "Invalid timeout selector used! Using default value"); ++ nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES; ++ break; ++ case JM_DEFAULT_JS_FREE_TIMEOUT: ++ selector_str = "JM_DEFAULT_JS_FREE_TIMEOUT"; ++ nr_cycles = JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES; ++ break; ++#else ++ /* Use Firmware timeout if invalid selection */ ++ WARN(1, ++ "Invalid timeout selector used! Using CSF Firmware timeout"); ++ fallthrough; ++ case CSF_FIRMWARE_TIMEOUT: ++ selector_str = "CSF_FIRMWARE_TIMEOUT"; ++ /* Any FW timeout cannot be longer than the FW ping interval, after which ++ * the firmware_aliveness_monitor will be triggered and may restart ++ * the GPU if the FW is unresponsive. ++ */ ++ nr_cycles = min(CSF_FIRMWARE_PING_TIMEOUT_CYCLES, CSF_FIRMWARE_TIMEOUT_CYCLES); + -+ struct kbase_queue *bound_queues[MAX_SUPPORTED_STREAMS_PER_GROUP]; ++ if (nr_cycles == CSF_FIRMWARE_PING_TIMEOUT_CYCLES) ++ dev_warn(kbdev->dev, "Capping %s to CSF_FIRMWARE_PING_TIMEOUT\n", ++ selector_str); ++ break; ++ case CSF_PM_TIMEOUT: ++ selector_str = "CSF_PM_TIMEOUT"; ++ nr_cycles = CSF_PM_TIMEOUT_CYCLES; ++ break; ++ case CSF_GPU_RESET_TIMEOUT: ++ selector_str = "CSF_GPU_RESET_TIMEOUT"; ++ nr_cycles = CSF_GPU_RESET_TIMEOUT_CYCLES; ++ break; ++ case CSF_CSG_SUSPEND_TIMEOUT: ++ selector_str = "CSF_CSG_SUSPEND_TIMEOUT"; ++ nr_cycles = CSF_CSG_SUSPEND_TIMEOUT_CYCLES; ++ break; ++ case CSF_FIRMWARE_BOOT_TIMEOUT: ++ selector_str = "CSF_FIRMWARE_BOOT_TIMEOUT"; ++ nr_cycles = CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES; ++ break; ++ case CSF_FIRMWARE_PING_TIMEOUT: ++ selector_str = "CSF_FIRMWARE_PING_TIMEOUT"; ++ nr_cycles = CSF_FIRMWARE_PING_TIMEOUT_CYCLES; ++ break; ++ case CSF_SCHED_PROTM_PROGRESS_TIMEOUT: ++ selector_str = "CSF_SCHED_PROTM_PROGRESS_TIMEOUT"; ++ nr_cycles = kbase_csf_timeout_get(kbdev); ++ break; ++#endif ++ } + -+ int doorbell_nr; -+ struct work_struct protm_event_work; -+ DECLARE_BITMAP(protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP); ++ timeout = div_u64(nr_cycles, freq_khz); ++ if (WARN(timeout > UINT_MAX, ++ "Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms", ++ (unsigned long long)timeout, selector_str, (unsigned long long)freq_khz)) ++ timeout = UINT_MAX; ++ return (unsigned int)timeout; ++} ++KBASE_EXPORT_TEST_API(kbase_get_timeout_ms); + -+ struct kbase_csf_notification error_fatal; -+ struct kbase_csf_notification error_timeout; -+ struct kbase_csf_notification error_tiler_oom; ++u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev) ++{ ++ u32 hi1, hi2, lo; + -+ struct work_struct timer_event_work; ++ /* Read hi, lo, hi to ensure a coherent u64 */ ++ do { ++ hi1 = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(CYCLE_COUNT_HI)); ++ lo = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(CYCLE_COUNT_LO)); ++ hi2 = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(CYCLE_COUNT_HI)); ++ } while (hi1 != hi2); + -+ /** -+ * @dvs_buf: Address and size of scratch memory. -+ * -+ * Used to store intermediate DVS data by the GPU. -+ */ -+ u64 dvs_buf; -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ u32 deschedule_deferred_cnt; -+#endif -+ void *csg_reg; -+ u8 csg_reg_bind_retries; -+}; ++ return lo | (((u64) hi1) << 32); ++} + -+/** -+ * struct kbase_csf_kcpu_queue_context - Object representing the kernel CPU -+ * queues for a GPU address space. -+ * -+ * @lock: Lock preventing concurrent access to @array and the @in_use bitmap. -+ * @array: Array of pointers to kernel CPU command queues. -+ * @in_use: Bitmap which indicates which kernel CPU command queues are in use. -+ * @cmd_seq_num: The sequence number assigned to an enqueued command, -+ * in incrementing order (older commands shall have a -+ * smaller number). -+ * @jit_lock: Lock to serialise JIT operations. -+ * @jit_cmds_head: A list of the just-in-time memory commands, both -+ * allocate & free, in submission order, protected -+ * by kbase_csf_kcpu_queue_context.lock. -+ * @jit_blocked_queues: A list of KCPU command queues blocked by a pending -+ * just-in-time memory allocation command which will be -+ * reattempted after the impending free of other active -+ * allocations. -+ */ -+struct kbase_csf_kcpu_queue_context { -+ struct mutex lock; -+ struct kbase_kcpu_command_queue *array[KBASEP_MAX_KCPU_QUEUES]; -+ DECLARE_BITMAP(in_use, KBASEP_MAX_KCPU_QUEUES); -+ atomic64_t cmd_seq_num; ++#if MALI_USE_CSF ++u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kbdev, u64 gpu_ts) ++{ ++ if (WARN_ON(!kbdev)) ++ return 0; + -+ struct mutex jit_lock; -+ struct list_head jit_cmds_head; -+ struct list_head jit_blocked_queues; -+}; ++ return div64_u64(gpu_ts * kbdev->backend_time.multiplier, kbdev->backend_time.divisor) + ++ kbdev->backend_time.offset; ++} + +/** -+ * struct kbase_csf_cpu_queue_context - Object representing the cpu queue -+ * information. ++ * get_cpu_gpu_time() - Get current CPU and GPU timestamps. + * -+ * @buffer: Buffer containing CPU queue information provided by Userspace. -+ * @buffer_size: The size of @buffer. -+ * @dump_req_status: Indicates the current status for CPU queues dump request. -+ * @dump_cmp: Dumping cpu queue completion event. ++ * @kbdev: Kbase device. ++ * @cpu_ts: Output CPU timestamp. ++ * @gpu_ts: Output GPU timestamp. ++ * @gpu_cycle: Output GPU cycle counts. + */ -+struct kbase_csf_cpu_queue_context { -+ char *buffer; -+ size_t buffer_size; -+ atomic_t dump_req_status; -+ struct completion dump_cmp; -+}; ++static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_ts, u64 *gpu_cycle) ++{ ++ struct timespec64 ts; + -+/** -+ * struct kbase_csf_heap_context_allocator - Allocator of heap contexts ++ kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts); ++ ++ if (cpu_ts) ++ *cpu_ts = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; ++} ++#endif ++ ++int kbase_backend_time_init(struct kbase_device *kbdev) ++{ ++#if MALI_USE_CSF ++ u64 cpu_ts = 0; ++ u64 gpu_ts = 0; ++ u64 freq; ++ u64 common_factor; ++ ++ get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL); ++ freq = arch_timer_get_cntfrq(); ++ ++ if (!freq) { ++ dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!"); ++ return -EINVAL; ++ } ++ ++ common_factor = gcd(NSEC_PER_SEC, freq); ++ ++ kbdev->backend_time.multiplier = div64_u64(NSEC_PER_SEC, common_factor); ++ kbdev->backend_time.divisor = div64_u64(freq, common_factor); ++ ++ if (!kbdev->backend_time.divisor) { ++ dev_warn(kbdev->dev, "CPU to GPU divisor is zero!"); ++ return -EINVAL; ++ } ++ ++ kbdev->backend_time.offset = cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier, ++ kbdev->backend_time.divisor); ++#endif ++ ++ return 0; ++} +diff --git a/drivers/gpu/arm/bifrost/build.bp b/drivers/gpu/arm/bifrost/build.bp +new file mode 100755 +index 000000000..0a61a12d9 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/build.bp +@@ -0,0 +1,280 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * @kctx: Pointer to the kbase context with which this allocator is -+ * associated. -+ * @region: Pointer to a GPU memory region from which heap context structures -+ * are allocated. NULL if no heap contexts have been allocated. -+ * @gpu_va: GPU virtual address of the start of the region from which heap -+ * context structures are allocated. 0 if no heap contexts have been -+ * allocated. -+ * @lock: Lock preventing concurrent access to the @in_use bitmap. -+ * @in_use: Bitmap that indicates which heap context structures are currently -+ * allocated (in @region). -+ * @heap_context_size_aligned: Size of a heap context structure, in bytes, -+ * aligned to GPU cacheline size. ++ * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. + * -+ * Heap context structures are allocated by the kernel for use by the firmware. -+ * The current implementation subdivides a single GPU memory region for use as -+ * a sparse array. -+ */ -+struct kbase_csf_heap_context_allocator { -+ struct kbase_context *kctx; -+ struct kbase_va_region *region; -+ u64 gpu_va; -+ struct mutex lock; -+ DECLARE_BITMAP(in_use, MAX_TILER_HEAPS); -+ u32 heap_context_size_aligned; -+}; -+ -+/** -+ * struct kbase_csf_tiler_heap_context - Object representing the tiler heaps -+ * context for a GPU address space. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * @lock: Lock to prevent the concurrent access to tiler heaps (after the -+ * initialization), a tiler heap can be terminated whilst an OoM -+ * event is being handled for it. -+ * @list: List of tiler heaps. -+ * @ctx_alloc: Allocator for heap context structures. -+ * @nr_of_heaps: Total number of tiler heaps that were added during the -+ * life time of the context. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * This contains all of the CSF state relating to chunked tiler heaps for one -+ * @kbase_context. It is not the same as a heap context structure allocated by -+ * the kernel for use by the firmware. + */ -+struct kbase_csf_tiler_heap_context { -+ struct mutex lock; -+ struct list_head list; -+ struct kbase_csf_heap_context_allocator ctx_alloc; -+ u64 nr_of_heaps; -+}; + -+/** -+ * struct kbase_csf_ctx_heap_reclaim_info - Object representing the data section of -+ * a kctx for tiler heap reclaim manger -+ * @mgr_link: Link for hooking up to the heap reclaim manger's kctx lists -+ * @nr_freed_pages: Number of freed pages from the the kctx, after its attachment -+ * to the reclaim manager. This is used for tracking reclaim's -+ * free operation progress. -+ * @nr_est_unused_pages: Estimated number of pages that could be freed for the kctx -+ * when all its CSGs are off-slot, on attaching to the reclaim -+ * manager. -+ * @on_slot_grps: Number of on-slot groups from this kctx. In principle, if a -+ * kctx has groups on-slot, the scheduler will detach it from -+ * the tiler heap reclaim manager, i.e. no tiler heap memory -+ * reclaiming operations on the kctx. -+ */ -+struct kbase_csf_ctx_heap_reclaim_info { -+ struct list_head mgr_link; -+ u32 nr_freed_pages; -+ u32 nr_est_unused_pages; -+ u8 on_slot_grps; -+}; ++/* Kernel-side tests may include mali_kbase's headers. Therefore any config ++ * options which affect the sizes of any structs (e.g. adding extra members) ++ * must be included in these defaults, so that the structs are consistent in ++ * both mali_kbase and the test modules. */ ++bob_defaults { ++ name: "mali_kbase_shared_config_defaults", ++ defaults: [ ++ "kernel_defaults", ++ ], ++ mali_no_mali: { ++ kbuild_options: [ ++ "CONFIG_MALI_BIFROST_NO_MALI=y", ++ "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}", ++ "CONFIG_GPU_HWVER={{.hwver}}", ++ ], ++ }, ++ mali_platform_dt_pin_rst: { ++ kbuild_options: ["CONFIG_MALI_PLATFORM_DT_PIN_RST=y"], ++ }, ++ gpu_has_csf: { ++ kbuild_options: ["CONFIG_MALI_CSF_SUPPORT=y"], ++ }, ++ mali_devfreq: { ++ kbuild_options: ["CONFIG_MALI_BIFROST_DEVFREQ=y"], ++ }, ++ mali_midgard_dvfs: { ++ kbuild_options: ["CONFIG_MALI_BIFROST_DVFS=y"], ++ }, ++ mali_gator_support: { ++ kbuild_options: ["CONFIG_MALI_BIFROST_GATOR_SUPPORT=y"], ++ }, ++ mali_midgard_enable_trace: { ++ kbuild_options: ["CONFIG_MALI_BIFROST_ENABLE_TRACE=y"], ++ }, ++ mali_arbiter_support: { ++ kbuild_options: ["CONFIG_MALI_ARBITER_SUPPORT=y"], ++ }, ++ mali_dma_buf_map_on_demand: { ++ kbuild_options: ["CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y"], ++ }, ++ mali_dma_buf_legacy_compat: { ++ kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"], ++ }, ++ large_page_alloc_override: { ++ kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC_OVERRIDE=y"], ++ }, ++ large_page_alloc: { ++ kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC=y"], ++ }, ++ mali_memory_fully_backed: { ++ kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"], ++ }, ++ mali_corestack: { ++ kbuild_options: ["CONFIG_MALI_CORESTACK=y"], ++ }, ++ mali_real_hw: { ++ kbuild_options: ["CONFIG_MALI_REAL_HW=y"], ++ }, ++ mali_error_inject_none: { ++ kbuild_options: ["CONFIG_MALI_ERROR_INJECT_NONE=y"], ++ }, ++ mali_error_inject_track_list: { ++ kbuild_options: ["CONFIG_MALI_ERROR_INJECT_TRACK_LIST=y"], ++ }, ++ mali_error_inject_random: { ++ kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"], ++ }, ++ mali_error_inject: { ++ kbuild_options: ["CONFIG_MALI_BIFROST_ERROR_INJECT=y"], ++ }, ++ mali_debug: { ++ kbuild_options: [ ++ "CONFIG_MALI_BIFROST_DEBUG=y", ++ "MALI_KERNEL_TEST_API={{.debug}}", ++ ], ++ }, ++ mali_fence_debug: { ++ kbuild_options: ["CONFIG_MALI_BIFROST_FENCE_DEBUG=y"], ++ }, ++ mali_system_trace: { ++ kbuild_options: ["CONFIG_MALI_BIFROST_SYSTEM_TRACE=y"], ++ }, ++ buslog: { ++ kbuild_options: ["CONFIG_MALI_BUSLOG=y"], ++ }, ++ cinstr_vector_dump: { ++ kbuild_options: ["CONFIG_MALI_VECTOR_DUMP=y"], ++ }, ++ cinstr_gwt: { ++ kbuild_options: ["CONFIG_MALI_CINSTR_GWT=y"], ++ }, ++ cinstr_primary_hwc: { ++ kbuild_options: ["CONFIG_MALI_PRFCNT_SET_PRIMARY=y"], ++ }, ++ cinstr_secondary_hwc: { ++ kbuild_options: ["CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY=y"], ++ }, ++ cinstr_tertiary_hwc: { ++ kbuild_options: ["CONFIG_MALI_PRFCNT_SET_TERTIARY=y"], ++ }, ++ cinstr_hwc_set_select_via_debug_fs: { ++ kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS=y"], ++ }, ++ mali_job_dump: { ++ kbuild_options: ["CONFIG_MALI_JOB_DUMP"], ++ }, ++ mali_pwrsoft_765: { ++ kbuild_options: ["CONFIG_MALI_PWRSOFT_765=y"], ++ }, ++ mali_hw_errata_1485982_not_affected: { ++ kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y"], ++ }, ++ mali_hw_errata_1485982_use_clock_alternative: { ++ kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE=y"], ++ }, ++ platform_is_fpga: { ++ kbuild_options: ["CONFIG_MALI_IS_FPGA=y"], ++ }, ++ mali_coresight: { ++ kbuild_options: ["CONFIG_MALI_CORESIGHT=y"], ++ }, ++ kbuild_options: [ ++ "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}", ++ "MALI_CUSTOMER_RELEASE={{.release}}", ++ "MALI_UNIT_TEST={{.unit_test_code}}", ++ "MALI_USE_CSF={{.gpu_has_csf}}", ++ "MALI_JIT_PRESSURE_LIMIT_BASE={{.jit_pressure_limit_base}}", + -+/** -+ * struct kbase_csf_scheduler_context - Object representing the scheduler's -+ * context for a GPU address space. -+ * -+ * @runnable_groups: Lists of runnable GPU command queue groups in the kctx, -+ * one per queue group relative-priority level. -+ * @num_runnable_grps: Total number of runnable groups across all priority -+ * levels in @runnable_groups. -+ * @idle_wait_groups: A list of GPU command queue groups in which all enabled -+ * GPU command queues are idle and at least one of them -+ * is blocked on a sync wait operation. -+ * @num_idle_wait_grps: Length of the @idle_wait_groups list. -+ * @sync_update_wq: Dedicated workqueue to process work items corresponding -+ * to the sync_update events by sync_set/sync_add -+ * instruction execution on CSs bound to groups -+ * of @idle_wait_groups list. -+ * @sync_update_work: work item to process the sync_update events by -+ * sync_set / sync_add instruction execution on command -+ * streams bound to groups of @idle_wait_groups list. -+ * @ngrp_to_schedule: Number of groups added for the context to the -+ * 'groups_to_schedule' list of scheduler instance. -+ * @heap_info: Heap reclaim information data of the kctx. As the -+ * reclaim action needs to be coordinated with the scheduler -+ * operations, any manipulations on the data needs holding -+ * the scheduler's mutex lock. -+ */ -+struct kbase_csf_scheduler_context { -+ struct list_head runnable_groups[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; -+ u32 num_runnable_grps; -+ struct list_head idle_wait_groups; -+ u32 num_idle_wait_grps; -+ struct workqueue_struct *sync_update_wq; -+ struct work_struct sync_update_work; -+ u32 ngrp_to_schedule; -+ struct kbase_csf_ctx_heap_reclaim_info heap_info; -+}; ++ // Start of CS experimental features definitions. ++ // If there is nothing below, definition should be added as follows: ++ // "MALI_EXPERIMENTAL_FEATURE={{.experimental_feature}}" ++ // experimental_feature above comes from Mconfig in ++ // /product/base/ ++ // However, in Mconfig, experimental_feature should be looked up (for ++ // similar explanation to this one) as ALLCAPS, i.e. ++ // EXPERIMENTAL_FEATURE. ++ // ++ // IMPORTANT: MALI_CS_EXPERIMENTAL should NEVER be defined below as it ++ // is an umbrella feature that would be open for inappropriate use ++ // (catch-all for experimental CS code without separating it into ++ // different features). ++ "MALI_INCREMENTAL_RENDERING_JM={{.incremental_rendering_jm}}", ++ "MALI_BASE_CSF_PERFORMANCE_TESTS={{.base_csf_performance_tests}}", ++ ], ++} + -+/** -+ * enum kbase_csf_event_callback_action - return type for CSF event callbacks. ++bob_kernel_module { ++ name: "mali_kbase", ++ defaults: [ ++ "mali_kbase_shared_config_defaults", ++ ], ++ srcs: [ ++ "*.c", ++ "*.h", ++ "Kbuild", ++ "backend/gpu/*.c", ++ "backend/gpu/*.h", ++ "backend/gpu/Kbuild", ++ "context/*.c", ++ "context/*.h", ++ "context/Kbuild", ++ "hwcnt/*.c", ++ "hwcnt/*.h", ++ "hwcnt/backend/*.h", ++ "hwcnt/Kbuild", ++ "ipa/*.c", ++ "ipa/*.h", ++ "ipa/Kbuild", ++ "platform/*.h", ++ "platform/*/*.c", ++ "platform/*/*.h", ++ "platform/*/Kbuild", ++ "platform/*/*/*.c", ++ "platform/*/*/*.h", ++ "platform/*/*/Kbuild", ++ "platform/*/*/*.c", ++ "platform/*/*/*.h", ++ "platform/*/*/Kbuild", ++ "platform/*/*/*/*.c", ++ "platform/*/*/*/*.h", ++ "platform/*/*/*/Kbuild", ++ "thirdparty/*.c", ++ "thirdparty/Kbuild", ++ "debug/*.c", ++ "debug/*.h", ++ "debug/Kbuild", ++ "device/*.c", ++ "device/*.h", ++ "device/Kbuild", ++ "gpu/*.c", ++ "gpu/*.h", ++ "gpu/Kbuild", ++ "tl/*.c", ++ "tl/*.h", ++ "tl/Kbuild", ++ "mmu/*.c", ++ "mmu/*.h", ++ "mmu/Kbuild", ++ ], ++ gpu_has_job_manager: { ++ srcs: [ ++ "context/backend/*_jm.c", ++ "debug/backend/*_jm.c", ++ "debug/backend/*_jm.h", ++ "device/backend/*_jm.c", ++ "gpu/backend/*_jm.c", ++ "gpu/backend/*_jm.h", ++ "hwcnt/backend/*_jm.c", ++ "hwcnt/backend/*_jm.h", ++ "hwcnt/backend/*_jm_*.c", ++ "hwcnt/backend/*_jm_*.h", ++ "jm/*.h", ++ "tl/backend/*_jm.c", ++ "mmu/backend/*_jm.c", ++ "ipa/backend/*_jm.c", ++ "ipa/backend/*_jm.h", ++ ], ++ }, ++ gpu_has_csf: { ++ srcs: [ ++ "context/backend/*_csf.c", ++ "csf/*.c", ++ "csf/*.h", ++ "csf/Kbuild", ++ "csf/ipa_control/*.c", ++ "csf/ipa_control/*.h", ++ "csf/ipa_control/Kbuild", ++ "debug/backend/*_csf.c", ++ "debug/backend/*_csf.h", ++ "device/backend/*_csf.c", ++ "gpu/backend/*_csf.c", ++ "gpu/backend/*_csf.h", ++ "hwcnt/backend/*_csf.c", ++ "hwcnt/backend/*_csf.h", ++ "hwcnt/backend/*_csf_*.c", ++ "hwcnt/backend/*_csf_*.h", ++ "tl/backend/*_csf.c", ++ "mmu/backend/*_csf.c", ++ "ipa/backend/*_csf.c", ++ "ipa/backend/*_csf.h", ++ ], ++ }, ++ mali_arbiter_support: { ++ srcs: [ ++ "arbiter/*.c", ++ "arbiter/*.h", ++ "arbiter/Kbuild", ++ ], ++ }, ++ kbuild_options: [ ++ "CONFIG_MALI_BIFROST=m", ++ "CONFIG_MALI_KUTF=n", ++ ], ++ buslog: { ++ extra_symbols: [ ++ "bus_logger", ++ ], ++ }, ++} +diff --git a/drivers/gpu/arm/bifrost/context/Kbuild b/drivers/gpu/arm/bifrost/context/Kbuild +new file mode 100755 +index 000000000..156b46a12 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/context/Kbuild +@@ -0,0 +1,27 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2012-2013, 2016-2017, 2020-2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# ++ ++bifrost_kbase-y += context/mali_kbase_context.o ++ ++ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) ++ bifrost_kbase-y += context/backend/mali_kbase_context_csf.o ++else ++ bifrost_kbase-y += context/backend/mali_kbase_context_jm.o ++endif +diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c +new file mode 100644 +index 000000000..07d277b94 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c +@@ -0,0 +1,207 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * -+ * @KBASE_CSF_EVENT_CALLBACK_FIRST: Never set explicitly. -+ * It doesn't correspond to any action or type of event callback. ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * -+ * @KBASE_CSF_EVENT_CALLBACK_KEEP: The callback will remain registered. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * @KBASE_CSF_EVENT_CALLBACK_REMOVE: The callback will be removed -+ * immediately upon return. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + * -+ * @KBASE_CSF_EVENT_CALLBACK_LAST: Never set explicitly. -+ * It doesn't correspond to any action or type of event callback. -+ */ -+enum kbase_csf_event_callback_action { -+ KBASE_CSF_EVENT_CALLBACK_FIRST = 0, -+ KBASE_CSF_EVENT_CALLBACK_KEEP, -+ KBASE_CSF_EVENT_CALLBACK_REMOVE, -+ KBASE_CSF_EVENT_CALLBACK_LAST, -+}; -+ -+/** -+ * struct kbase_csf_event - Object representing CSF event and error ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * @callback_list: List of callbacks which are registered to serve CSF -+ * events. -+ * @error_list: List for CS fatal errors in CSF context. -+ * Link of fatal error is &struct_kbase_csf_notification.link. -+ * @lock: Lock protecting access to @callback_list and -+ * @error_list. + */ -+struct kbase_csf_event { -+ struct list_head callback_list; -+ struct list_head error_list; -+ spinlock_t lock; -+}; + -+/** -+ * struct kbase_csf_user_reg_context - Object containing members to manage the mapping -+ * of USER Register page for a context. -+ * -+ * @vma: Pointer to the VMA corresponding to the virtual mapping -+ * of the USER register page. -+ * @file_offset: File offset value that is assigned to userspace mapping -+ * of the USER Register page. It is in page units. -+ * @link: Links the context to the device list when mapping is pointing to -+ * either the dummy or the real Register page. ++/* ++ * Base kernel context APIs for CSF GPUs + */ -+struct kbase_csf_user_reg_context { -+ struct vm_area_struct *vma; -+ u32 file_offset; -+ struct list_head link; -+}; + -+/** -+ * struct kbase_csf_context - Object representing CSF for a GPU address space. -+ * -+ * @event_pages_head: A list of pages allocated for the event memory used by -+ * the synchronization objects. A separate list would help -+ * in the fast lookup, since the list is expected to be short -+ * as one page would provide the memory for up to 1K -+ * synchronization objects. -+ * KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES is the upper -+ * bound on the size of event memory. -+ * @cookies: Bitmask containing of KBASE_CSF_NUM_USER_IO_PAGES_HANDLE -+ * bits, used for creating the User mode CPU mapping in a -+ * deferred manner of a pair of User mode input/output pages -+ * & a hardware doorbell page. -+ * The pages are allocated when a GPU command queue is -+ * bound to a CSG in kbase_csf_queue_bind. -+ * This helps returning unique handles to Userspace from -+ * kbase_csf_queue_bind and later retrieving the pointer to -+ * queue in the mmap handler. -+ * @user_pages_info: Array containing pointers to queue -+ * structures, used in conjunction with cookies bitmask for -+ * providing a mechansim to create a CPU mapping of -+ * input/output pages & hardware doorbell page. -+ * @lock: Serializes accesses to all members, except for ones that -+ * have their own locks. -+ * @queue_groups: Array of registered GPU command queue groups. -+ * @queue_list: Linked list of GPU command queues not yet deregistered. -+ * Note that queues can persist after deregistration if the -+ * userspace mapping created for them on bind operation -+ * hasn't been removed. -+ * @kcpu_queues: Kernel CPU command queues. -+ * @event: CSF event object. -+ * @tiler_heaps: Chunked tiler memory heaps. -+ * @wq: Dedicated workqueue to process work items corresponding -+ * to the OoM events raised for chunked tiler heaps being -+ * used by GPU command queues, and progress timeout events. -+ * @link: Link to this csf context in the 'runnable_kctxs' list of -+ * the scheduler instance -+ * @sched: Object representing the scheduler's context -+ * @pending_submission_work: Work item to process pending kicked GPU command queues. -+ * @cpu_queue: CPU queue information. Only be available when DEBUG_FS -+ * is enabled. -+ * @user_reg: Collective information to support mapping to USER Register page. -+ */ -+struct kbase_csf_context { -+ struct list_head event_pages_head; -+ DECLARE_BITMAP(cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); -+ struct kbase_queue *user_pages_info[ -+ KBASE_CSF_NUM_USER_IO_PAGES_HANDLE]; -+ struct mutex lock; -+ struct kbase_queue_group *queue_groups[MAX_QUEUE_GROUP_NUM]; -+ struct list_head queue_list; -+ struct kbase_csf_kcpu_queue_context kcpu_queues; -+ struct kbase_csf_event event; -+ struct kbase_csf_tiler_heap_context tiler_heaps; -+ struct workqueue_struct *wq; -+ struct list_head link; -+ struct kbase_csf_scheduler_context sched; -+ struct work_struct pending_submission_work; ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ +#if IS_ENABLED(CONFIG_DEBUG_FS) -+ struct kbase_csf_cpu_queue_context cpu_queue; -+#endif -+ struct kbase_csf_user_reg_context user_reg; -+}; ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+/** -+ * struct kbase_csf_reset_gpu - Object containing the members required for -+ * GPU reset handling. -+ * @workq: Workqueue to execute the GPU reset work item @work. -+ * @work: Work item for performing the GPU reset. -+ * @wait: Wait queue used to wait for the GPU reset completion. -+ * @sem: RW Semaphore to ensure no other thread attempts to use the -+ * GPU whilst a reset is in process. Unlike traditional -+ * semaphores and wait queues, this allows Linux's lockdep -+ * mechanism to check for deadlocks involving reset waits. -+ * @state: Tracks if the GPU reset is in progress or not. -+ * The state is represented by enum @kbase_csf_reset_gpu_state. -+ */ -+struct kbase_csf_reset_gpu { -+ struct workqueue_struct *workq; -+ struct work_struct work; -+ wait_queue_head_t wait; -+ struct rw_semaphore sem; -+ atomic_t state; -+}; ++void kbase_context_debugfs_init(struct kbase_context *const kctx) ++{ ++ kbase_debug_mem_view_init(kctx); ++ kbase_debug_mem_zones_init(kctx); ++ kbase_debug_mem_allocs_init(kctx); ++ kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx); ++ kbase_jit_debugfs_init(kctx); ++ kbase_csf_queue_group_debugfs_init(kctx); ++ kbase_csf_kcpu_debugfs_init(kctx); ++ kbase_csf_sync_debugfs_init(kctx); ++ kbase_csf_tiler_heap_debugfs_init(kctx); ++ kbase_csf_tiler_heap_total_debugfs_init(kctx); ++ kbase_csf_cpu_queue_debugfs_init(kctx); ++} ++KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); + -+/** -+ * struct kbase_csf_csg_slot - Object containing members for tracking the state -+ * of CSG slots. -+ * @resident_group: pointer to the queue group that is resident on the CSG slot. -+ * @state: state of the slot as per enum @kbase_csf_csg_slot_state. -+ * @trigger_jiffies: value of jiffies when change in slot state is recorded. -+ * @priority: dynamic priority assigned to CSG slot. -+ */ -+struct kbase_csf_csg_slot { -+ struct kbase_queue_group *resident_group; -+ atomic_t state; -+ unsigned long trigger_jiffies; -+ u8 priority; -+}; ++void kbase_context_debugfs_term(struct kbase_context *const kctx) ++{ ++ debugfs_remove_recursive(kctx->kctx_dentry); ++} ++KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); ++#else ++void kbase_context_debugfs_init(struct kbase_context *const kctx) ++{ ++ CSTD_UNUSED(kctx); ++} ++KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); + -+/** -+ * struct kbase_csf_sched_heap_reclaim_mgr - Object for managing tiler heap reclaim -+ * kctx lists inside the CSF device's scheduler. -+ * -+ * @heap_reclaim: Tiler heap reclaim shrinker object. -+ * @ctx_lists: Array of kctx lists, size matching CSG defined priorities. The -+ * lists track the kctxs attached to the reclaim manager. -+ * @unused_pages: Estimated number of unused pages from the @ctxlist array. The -+ * number is indicative for use with reclaim shrinker's count method. -+ */ -+struct kbase_csf_sched_heap_reclaim_mgr { -+ struct shrinker heap_reclaim; -+ struct list_head ctx_lists[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; -+ atomic_t unused_pages; -+}; ++void kbase_context_debugfs_term(struct kbase_context *const kctx) ++{ ++ CSTD_UNUSED(kctx); ++} ++KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); ++#endif /* CONFIG_DEBUG_FS */ + -+/** -+ * struct kbase_csf_mcu_shared_regions - Control data for managing the MCU shared -+ * interface segment regions for scheduler -+ * operations -+ * -+ * @array_csg_regs: Base pointer of an internally created array_csg_regs[]. -+ * @unused_csg_regs: List contains unused csg_regs items. When an item is bound to a -+ * group that is placed onto on-slot by the scheduler, it is dropped -+ * from the list (i.e busy active). The Scheduler will put an active -+ * item back when it's becoming off-slot (not in use). -+ * @dummy_phys: An array of dummy phys[nr_susp_pages] pages for use with normal -+ * and pmode suspend buffers, as a default replacement of a CSG's pages -+ * for the MMU mapping when the csg_reg is not bound to a group. -+ * @pma_phys: Pre-allocated array phy[nr_susp_pages] for transitional use with -+ * protected suspend buffer MMU map operations. -+ * @userio_mem_rd_flags: Userio input page's read access mapping configuration flags. -+ * @dummy_phys_allocated: Indicating the @p dummy_phy page is allocated when true. -+ */ -+struct kbase_csf_mcu_shared_regions { -+ void *array_csg_regs; -+ struct list_head unused_csg_regs; -+ struct tagged_addr *dummy_phys; -+ struct tagged_addr *pma_phys; -+ unsigned long userio_mem_rd_flags; -+ bool dummy_phys_allocated; -+}; ++static void kbase_context_free(struct kbase_context *kctx) ++{ ++ kbase_timeline_post_kbase_context_destroy(kctx); + -+/** -+ * struct kbase_csf_scheduler - Object representing the scheduler used for -+ * CSF for an instance of GPU platform device. -+ * @lock: Lock to serialize the scheduler operations and -+ * access to the data members. -+ * @interrupt_lock: Lock to protect members accessed by interrupt -+ * handler. -+ * @state: The operational phase the scheduler is in. Primarily -+ * used for indicating what in-cycle schedule actions -+ * are allowed. -+ * @doorbell_inuse_bitmap: Bitmap of hardware doorbell pages keeping track of -+ * which pages are currently available for assignment -+ * to clients. -+ * @csg_inuse_bitmap: Bitmap to keep a track of CSG slots -+ * that are currently in use. -+ * @csg_slots: The array for tracking the state of CS -+ * group slots. -+ * @runnable_kctxs: List of Kbase contexts that have runnable command -+ * queue groups. -+ * @groups_to_schedule: List of runnable queue groups prepared on every -+ * scheduler tick. The dynamic priority of the CSG -+ * slot assigned to a group will depend upon the -+ * position of group in the list. -+ * @ngrp_to_schedule: Number of groups in the @groups_to_schedule list, -+ * incremented when a group is added to the list, used -+ * to record the position of group in the list. -+ * @num_active_address_spaces: Number of GPU address space slots that would get -+ * used to program the groups in @groups_to_schedule -+ * list on all the available CSG -+ * slots. -+ * @num_csg_slots_for_tick: Number of CSG slots that can be -+ * active in the given tick/tock. This depends on the -+ * value of @num_active_address_spaces. -+ * @remaining_tick_slots: Tracking the number of remaining available slots -+ * for @num_csg_slots_for_tick during the scheduling -+ * operation in a tick/tock. -+ * @idle_groups_to_schedule: List of runnable queue groups, in which all GPU -+ * command queues became idle or are waiting for -+ * synchronization object, prepared on every -+ * scheduler tick. The groups in this list are -+ * appended to the tail of @groups_to_schedule list -+ * after the scan out so that the idle groups aren't -+ * preferred for scheduling over the non-idle ones. -+ * @csg_scan_count_for_tick: CSG scanout count for assign the scan_seq_num for -+ * each scanned out group during scheduling operation -+ * in a tick/tock. -+ * @total_runnable_grps: Total number of runnable groups across all KCTXs. -+ * @csgs_events_enable_mask: Use for temporary masking off asynchronous events -+ * from firmware (such as OoM events) before a group -+ * is suspended. -+ * @csg_slots_idle_mask: Bit array for storing the mask of CS -+ * group slots for which idle notification was -+ * received. -+ * @csg_slots_prio_update: Bit array for tracking slots that have an on-slot -+ * priority update operation. -+ * @last_schedule: Time in jiffies recorded when the last "tick" or -+ * "tock" schedule operation concluded. Used for -+ * evaluating the exclusion window for in-cycle -+ * schedule operation. -+ * @timer_enabled: Whether the CSF scheduler wakes itself up for -+ * periodic scheduling tasks. If this value is 0 -+ * then it will only perform scheduling under the -+ * influence of external factors e.g., IRQs, IOCTLs. -+ * @wq: Dedicated workqueue to execute the @tick_work. -+ * @tick_timer: High-resolution timer employed to schedule tick -+ * workqueue items (kernel-provided delayed_work -+ * items do not use hrtimer and for some reason do -+ * not provide sufficiently reliable periodicity). -+ * @tick_work: Work item that performs the "schedule on tick" -+ * operation to implement timeslice-based scheduling. -+ * @tock_work: Work item that would perform the schedule on tock -+ * operation to implement the asynchronous scheduling. -+ * @pending_tock_work: Indicates that the tock work item should re-execute -+ * once it's finished instead of going back to sleep. -+ * @ping_work: Work item that would ping the firmware at regular -+ * intervals, only if there is a single active CSG -+ * slot, to check if firmware is alive and would -+ * initiate a reset if the ping request isn't -+ * acknowledged. -+ * @top_ctx: Pointer to the Kbase context corresponding to the -+ * @top_grp. -+ * @top_grp: Pointer to queue group inside @groups_to_schedule -+ * list that was assigned the highest slot priority. -+ * @active_protm_grp: Indicates if firmware has been permitted to let GPU -+ * enter protected mode with the given group. On exit -+ * from protected mode the pointer is reset to NULL. -+ * This pointer is set and PROTM_ENTER request is sent -+ * atomically with @interrupt_lock held. -+ * This pointer being set doesn't necessarily indicates -+ * that GPU is in protected mode, kbdev->protected_mode -+ * needs to be checked for that. -+ * @idle_wq: Workqueue for executing GPU idle notification -+ * handler. -+ * @gpu_idle_work: Work item for facilitating the scheduler to bring -+ * the GPU to a low-power mode on becoming idle. -+ * @fast_gpu_idle_handling: Indicates whether to relax many of the checks -+ * normally done in the GPU idle worker. This is -+ * set to true when handling the GLB IDLE IRQ if the -+ * idle hysteresis timeout is 0, since it makes it -+ * possible to receive this IRQ before the extract -+ * offset is published (which would cause more -+ * extensive GPU idle checks to fail). -+ * @gpu_no_longer_idle: Effective only when the GPU idle worker has been -+ * queued for execution, this indicates whether the -+ * GPU has become non-idle since the last time the -+ * idle notification was received. -+ * @non_idle_offslot_grps: Count of off-slot non-idle groups. Reset during -+ * the scheduler active phase in a tick. It then -+ * tracks the count of non-idle groups across all the -+ * other phases. -+ * @non_idle_scanout_grps: Count on the non-idle groups in the scan-out -+ * list at the scheduling prepare stage. -+ * @pm_active_count: Count indicating if the scheduler is owning a power -+ * management reference count. Reference is taken when -+ * the count becomes 1 and is dropped when the count -+ * becomes 0. It is used to enable the power up of MCU -+ * after GPU and L2 cache have been powered up. So when -+ * this count is zero, MCU will not be powered up. -+ * @csg_scheduling_period_ms: Duration of Scheduling tick in milliseconds. -+ * @tick_timer_active: Indicates whether the @tick_timer is effectively -+ * active or not, as the callback function of -+ * @tick_timer will enqueue @tick_work only if this -+ * flag is true. This is mainly useful for the case -+ * when scheduling tick needs to be advanced from -+ * interrupt context, without actually deactivating -+ * the @tick_timer first and then enqueing @tick_work. -+ * @tick_protm_pending_seq: Scan out sequence number of the group that has -+ * protected mode execution pending for the queue(s) -+ * bound to it and will be considered first for the -+ * protected mode execution compared to other such -+ * groups. It is updated on every tick/tock. -+ * @interrupt_lock is used to serialize the access. -+ * @protm_enter_time: GPU protected mode enter time. -+ * @reclaim_mgr: CSGs tiler heap manager object. -+ * @mcu_regs_data: Scheduler MCU shared regions data for managing the -+ * shared interface mappings for on-slot queues and -+ * CSG suspend buffers. -+ */ -+struct kbase_csf_scheduler { -+ struct mutex lock; -+ spinlock_t interrupt_lock; -+ enum kbase_csf_scheduler_state state; -+ DECLARE_BITMAP(doorbell_inuse_bitmap, CSF_NUM_DOORBELL); -+ DECLARE_BITMAP(csg_inuse_bitmap, MAX_SUPPORTED_CSGS); -+ struct kbase_csf_csg_slot *csg_slots; -+ struct list_head runnable_kctxs; -+ struct list_head groups_to_schedule; -+ u32 ngrp_to_schedule; -+ u32 num_active_address_spaces; -+ u32 num_csg_slots_for_tick; -+ u32 remaining_tick_slots; -+ struct list_head idle_groups_to_schedule; -+ u32 csg_scan_count_for_tick; -+ u32 total_runnable_grps; -+ DECLARE_BITMAP(csgs_events_enable_mask, MAX_SUPPORTED_CSGS); -+ DECLARE_BITMAP(csg_slots_idle_mask, MAX_SUPPORTED_CSGS); -+ DECLARE_BITMAP(csg_slots_prio_update, MAX_SUPPORTED_CSGS); -+ unsigned long last_schedule; -+ bool timer_enabled; -+ struct workqueue_struct *wq; -+ struct hrtimer tick_timer; -+ struct work_struct tick_work; -+ struct delayed_work tock_work; -+ atomic_t pending_tock_work; -+ struct delayed_work ping_work; -+ struct kbase_context *top_ctx; -+ struct kbase_queue_group *top_grp; -+ struct kbase_queue_group *active_protm_grp; -+ struct workqueue_struct *idle_wq; -+ struct work_struct gpu_idle_work; -+ bool fast_gpu_idle_handling; -+ atomic_t gpu_no_longer_idle; -+ atomic_t non_idle_offslot_grps; -+ u32 non_idle_scanout_grps; -+ u32 pm_active_count; -+ unsigned int csg_scheduling_period_ms; -+ bool tick_timer_active; -+ u32 tick_protm_pending_seq; -+ ktime_t protm_enter_time; -+ struct kbase_csf_sched_heap_reclaim_mgr reclaim_mgr; -+ struct kbase_csf_mcu_shared_regions mcu_regs_data; ++ vfree(kctx); ++} ++ ++static const struct kbase_context_init context_init[] = { ++ { NULL, kbase_context_free, NULL }, ++ { kbase_context_common_init, kbase_context_common_term, ++ "Common context initialization failed" }, ++ { kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term, ++ "Memory pool group initialization failed" }, ++ { kbase_mem_evictable_init, kbase_mem_evictable_deinit, ++ "Memory evictable initialization failed" }, ++ { kbase_context_mmu_init, kbase_context_mmu_term, ++ "MMU initialization failed" }, ++ { kbase_context_mem_alloc_page, kbase_context_mem_pool_free, ++ "Memory alloc page failed" }, ++ { kbase_region_tracker_init, kbase_region_tracker_term, ++ "Region tracker initialization failed" }, ++ { kbase_sticky_resource_init, kbase_context_sticky_resource_term, ++ "Sticky resource initialization failed" }, ++ { kbase_jit_init, kbase_jit_term, "JIT initialization failed" }, ++ { kbase_csf_ctx_init, kbase_csf_ctx_term, ++ "CSF context initialization failed" }, ++ { kbase_context_add_to_dev_list, kbase_context_remove_from_dev_list, ++ "Adding kctx to device failed" }, +}; + -+/* -+ * Number of GPU cycles per unit of the global progress timeout. -+ */ -+#define GLB_PROGRESS_TIMER_TIMEOUT_SCALE ((u64)1024) ++static void kbase_context_term_partial( ++ struct kbase_context *kctx, ++ unsigned int i) ++{ ++ while (i-- > 0) { ++ if (context_init[i].term) ++ context_init[i].term(kctx); ++ } ++} + -+/* -+ * Maximum value of the global progress timeout. -+ */ -+#define GLB_PROGRESS_TIMER_TIMEOUT_MAX \ -+ ((GLB_PROGRESS_TIMER_TIMEOUT_MASK >> \ -+ GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) * \ -+ GLB_PROGRESS_TIMER_TIMEOUT_SCALE) ++struct kbase_context *kbase_create_context(struct kbase_device *kbdev, ++ bool is_compat, ++ base_context_create_flags const flags, ++ unsigned long const api_version, ++ struct file *const filp) ++{ ++ struct kbase_context *kctx; ++ unsigned int i = 0; + -+/* -+ * Default GLB_PWROFF_TIMER_TIMEOUT value in unit of micro-seconds. -+ */ -+#define DEFAULT_GLB_PWROFF_TIMEOUT_US (800) ++ if (WARN_ON(!kbdev)) ++ return NULL; + -+/* -+ * In typical operations, the management of the shader core power transitions -+ * is delegated to the MCU/firmware. However, if the host driver is configured -+ * to take direct control, one needs to disable the MCU firmware GLB_PWROFF -+ * timer. -+ */ -+#define DISABLE_GLB_PWROFF_TIMER (0) ++ /* Validate flags */ ++ if (WARN_ON(flags != (flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS))) ++ return NULL; + -+/* Index of the GPU_ACTIVE counter within the CSHW counter block */ -+#define GPU_ACTIVE_CNT_IDX (4) ++ /* zero-inited as lot of code assume it's zero'ed out on create */ ++ kctx = vzalloc(sizeof(*kctx)); ++ if (WARN_ON(!kctx)) ++ return NULL; + -+/* -+ * Maximum number of sessions that can be managed by the IPA Control component. -+ */ -+#if MALI_UNIT_TEST -+#define KBASE_IPA_CONTROL_MAX_SESSIONS ((size_t)8) -+#else -+#define KBASE_IPA_CONTROL_MAX_SESSIONS ((size_t)2) -+#endif ++ kctx->kbdev = kbdev; ++ kctx->api_version = api_version; ++ kctx->filp = filp; ++ kctx->create_flags = flags; + -+/** -+ * enum kbase_ipa_core_type - Type of counter block for performance counters -+ * -+ * @KBASE_IPA_CORE_TYPE_CSHW: CS Hardware counters. -+ * @KBASE_IPA_CORE_TYPE_MEMSYS: Memory System counters. -+ * @KBASE_IPA_CORE_TYPE_TILER: Tiler counters. -+ * @KBASE_IPA_CORE_TYPE_SHADER: Shader Core counters. -+ * @KBASE_IPA_CORE_TYPE_NUM: Number of core types. -+ */ -+enum kbase_ipa_core_type { -+ KBASE_IPA_CORE_TYPE_CSHW = 0, -+ KBASE_IPA_CORE_TYPE_MEMSYS, -+ KBASE_IPA_CORE_TYPE_TILER, -+ KBASE_IPA_CORE_TYPE_SHADER, -+ KBASE_IPA_CORE_TYPE_NUM -+}; ++ if (is_compat) ++ kbase_ctx_flag_set(kctx, KCTX_COMPAT); ++#if defined(CONFIG_64BIT) ++ else ++ kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA); ++#endif /* defined(CONFIG_64BIT) */ + -+/* -+ * Number of configurable counters per type of block on the IPA Control -+ * interface. -+ */ -+#define KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS ((size_t)8) ++ for (i = 0; i < ARRAY_SIZE(context_init); i++) { ++ int err = 0; + -+/* -+ * Total number of configurable counters existing on the IPA Control interface. -+ */ -+#define KBASE_IPA_CONTROL_MAX_COUNTERS \ -+ ((size_t)KBASE_IPA_CORE_TYPE_NUM * KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS) ++ if (context_init[i].init) ++ err = context_init[i].init(kctx); + -+/** -+ * struct kbase_ipa_control_prfcnt - Session for a single performance counter -+ * -+ * @latest_raw_value: Latest raw value read from the counter. -+ * @scaling_factor: Factor raw value shall be multiplied by. -+ * @accumulated_diff: Partial sum of scaled and normalized values from -+ * previous samples. This represent all the values -+ * that were read before the latest raw value. -+ * @type: Type of counter block for performance counter. -+ * @select_idx: Index of the performance counter as configured on -+ * the IPA Control interface. -+ * @gpu_norm: Indicating whether values shall be normalized by -+ * GPU frequency. If true, returned values represent -+ * an interval of time expressed in seconds (when the -+ * scaling factor is set to 1). -+ */ -+struct kbase_ipa_control_prfcnt { -+ u64 latest_raw_value; -+ u64 scaling_factor; -+ u64 accumulated_diff; -+ enum kbase_ipa_core_type type; -+ u8 select_idx; -+ bool gpu_norm; -+}; ++ if (err) { ++ dev_err(kbdev->dev, "%s error = %d\n", ++ context_init[i].err_mes, err); + -+/** -+ * struct kbase_ipa_control_session - Session for an IPA Control client -+ * -+ * @prfcnts: Sessions for individual performance counters. -+ * @num_prfcnts: Number of performance counters. -+ * @active: Indicates whether this slot is in use or not -+ * @last_query_time: Time of last query, in ns -+ * @protm_time: Amount of time (in ns) that GPU has been in protected -+ */ -+struct kbase_ipa_control_session { -+ struct kbase_ipa_control_prfcnt prfcnts[KBASE_IPA_CONTROL_MAX_COUNTERS]; -+ size_t num_prfcnts; -+ bool active; -+ u64 last_query_time; -+ u64 protm_time; -+}; ++ /* kctx should be freed by kbase_context_free(). ++ * Otherwise it will result in memory leak. ++ */ ++ WARN_ON(i == 0); + -+/** -+ * struct kbase_ipa_control_prfcnt_config - Performance counter configuration -+ * -+ * @idx: Index of the performance counter inside the block, as specified -+ * in the GPU architecture. -+ * @refcount: Number of client sessions bound to this counter. -+ * -+ * This structure represents one configurable performance counter of -+ * the IPA Control interface. The entry may be mapped to a specific counter -+ * by one or more client sessions. The counter is considered to be unused -+ * if it isn't part of any client session. -+ */ -+struct kbase_ipa_control_prfcnt_config { -+ u8 idx; -+ u8 refcount; -+}; ++ kbase_context_term_partial(kctx, i); ++ return NULL; ++ } ++ } + -+/** -+ * struct kbase_ipa_control_prfcnt_block - Block of performance counters -+ * -+ * @select: Current performance counter configuration. -+ * @num_available_counters: Number of counters that are not already configured. -+ * -+ */ -+struct kbase_ipa_control_prfcnt_block { -+ struct kbase_ipa_control_prfcnt_config select[KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS]; -+ size_t num_available_counters; -+}; ++ return kctx; ++} ++KBASE_EXPORT_SYMBOL(kbase_create_context); + -+/** -+ * struct kbase_ipa_control - Manager of the IPA Control interface. -+ * -+ * @blocks: Current configuration of performance counters -+ * for the IPA Control interface. -+ * @sessions: State of client sessions, storing information -+ * like performance counters the client subscribed to -+ * and latest value read from each counter. -+ * @lock: Spinlock to serialize access by concurrent clients. -+ * @rtm_listener_data: Private data for allocating a GPU frequency change -+ * listener. -+ * @num_active_sessions: Number of sessions opened by clients. -+ * @cur_gpu_rate: Current GPU top-level operating frequency, in Hz. -+ * @rtm_listener_data: Private data for allocating a GPU frequency change -+ * listener. -+ * @protm_start: Time (in ns) at which the GPU entered protected mode -+ */ -+struct kbase_ipa_control { -+ struct kbase_ipa_control_prfcnt_block blocks[KBASE_IPA_CORE_TYPE_NUM]; -+ struct kbase_ipa_control_session sessions[KBASE_IPA_CONTROL_MAX_SESSIONS]; -+ spinlock_t lock; -+ void *rtm_listener_data; -+ size_t num_active_sessions; -+ u32 cur_gpu_rate; -+ u64 protm_start; -+}; ++void kbase_destroy_context(struct kbase_context *kctx) ++{ ++ struct kbase_device *kbdev; + -+/** -+ * struct kbase_csf_firmware_interface - Interface in the MCU firmware -+ * -+ * @node: Interface objects are on the kbase_device:csf.firmware_interfaces -+ * list using this list_head to link them -+ * @phys: Array of the physical (tagged) addresses making up this interface -+ * @reuse_pages: Flag used to identify if the FW interface entry reuses -+ * physical pages allocated for another FW interface entry. -+ * @is_small_page: Flag used to identify if small pages are used for -+ * the FW interface entry. -+ * @name: NULL-terminated string naming the interface -+ * @num_pages: Number of entries in @phys and @pma (and length of the interface) -+ * @num_pages_aligned: Same as @num_pages except for the case when @is_small_page -+ * is false and @reuse_pages is false and therefore will be -+ * aligned to NUM_4K_PAGES_IN_2MB_PAGE. -+ * @virtual: Starting GPU virtual address this interface is mapped at -+ * @flags: bitmask of CSF_FIRMWARE_ENTRY_* conveying the interface attributes -+ * @data_start: Offset into firmware image at which the interface data starts -+ * @data_end: Offset into firmware image at which the interface data ends -+ * @virtual_exe_start: Starting GPU execution virtual address of this interface -+ * @kernel_map: A kernel mapping of the memory or NULL if not required to be -+ * mapped in the kernel -+ * @pma: Array of pointers to protected memory allocations. -+ */ -+struct kbase_csf_firmware_interface { -+ struct list_head node; -+ struct tagged_addr *phys; -+ bool reuse_pages; -+ bool is_small_page; -+ char *name; -+ u32 num_pages; -+ u32 num_pages_aligned; -+ u32 virtual; -+ u32 flags; -+ u32 data_start; -+ u32 data_end; -+ u32 virtual_exe_start; -+ void *kernel_map; -+ struct protected_memory_allocation **pma; -+}; ++ if (WARN_ON(!kctx)) ++ return; + -+/* -+ * struct kbase_csf_hwcnt - Object containing members for handling the dump of -+ * HW counters. -+ * -+ * @request_pending: Flag set when HWC requested and used for HWC sample -+ * done interrupt. -+ * @enable_pending: Flag set when HWC enable status change and used for -+ * enable done interrupt. -+ */ -+struct kbase_csf_hwcnt { -+ bool request_pending; -+ bool enable_pending; -+}; ++ kbdev = kctx->kbdev; ++ if (WARN_ON(!kbdev)) ++ return; ++ ++ /* Context termination could happen whilst the system suspend of ++ * the GPU device is ongoing or has completed. It has been seen on ++ * Customer side that a hang could occur if context termination is ++ * not blocked until the resume of GPU device. ++ */ ++ while (kbase_pm_context_active_handle_suspend( ++ kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { ++ dev_info(kbdev->dev, ++ "Suspend in progress when destroying context"); ++ wait_event(kbdev->pm.resume_wait, ++ !kbase_pm_is_suspending(kbdev)); ++ } ++ ++ kbase_mem_pool_group_mark_dying(&kctx->mem_pools); ++ ++ kbase_context_term_partial(kctx, ARRAY_SIZE(context_init)); + ++ kbase_pm_context_idle(kbdev); ++} ++KBASE_EXPORT_SYMBOL(kbase_destroy_context); +diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c +new file mode 100644 +index 000000000..f49b4734e +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c +@@ -0,0 +1,272 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* -+ * struct kbase_csf_mcu_fw - Object containing device loaded MCU firmware data. + * -+ * @size: Loaded firmware data size. Meaningful only when the -+ * other field @p data is not NULL. -+ * @data: Pointer to the device retained firmware data. If NULL -+ * means not loaded yet or error in loading stage. ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * + */ -+struct kbase_csf_mcu_fw { -+ size_t size; -+ u8 *data; -+}; + +/* -+ * Firmware log polling period. ++ * Base kernel context APIs for Job Manager GPUs + */ -+#define KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS 25 + -+/** -+ * enum kbase_csf_firmware_log_mode - Firmware log operating mode -+ * -+ * @KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL: Manual mode, firmware log can be read -+ * manually by the userspace (and it will also be dumped automatically into -+ * dmesg on GPU reset). -+ * -+ * @KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT: Automatic printing mode, firmware log -+ * will be periodically emptied into dmesg, manual reading through debugfs is -+ * disabled. -+ */ -+enum kbase_csf_firmware_log_mode { -+ KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL, -+ KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT -+}; ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+/** -+ * struct kbase_csf_firmware_log - Object containing members for handling firmware log. -+ * -+ * @mode: Firmware log operating mode. -+ * @busy: Indicating whether a firmware log operation is in progress. -+ * @poll_work: Work item that would poll firmware log buffer -+ * at regular intervals to perform any periodic -+ * activities required by current log mode. -+ * @dump_buf: Buffer used for dumping the log. -+ * @func_call_list_va_start: Virtual address of the start of the call list of FW log functions. -+ * @func_call_list_va_end: Virtual address of the end of the call list of FW log functions. -+ */ -+struct kbase_csf_firmware_log { -+ enum kbase_csf_firmware_log_mode mode; -+ atomic_t busy; -+ struct delayed_work poll_work; -+ u8 *dump_buf; -+ u32 func_call_list_va_start; -+ u32 func_call_list_va_end; -+}; ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++#include ++#include ++#include ++#include ++ ++void kbase_context_debugfs_init(struct kbase_context *const kctx) ++{ ++ kbase_debug_mem_view_init(kctx); ++ kbase_debug_mem_zones_init(kctx); ++ kbase_debug_mem_allocs_init(kctx); ++ kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx); ++ kbase_jit_debugfs_init(kctx); ++ kbasep_jd_debugfs_ctx_init(kctx); ++} ++KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); ++ ++void kbase_context_debugfs_term(struct kbase_context *const kctx) ++{ ++ debugfs_remove_recursive(kctx->kctx_dentry); ++} ++KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); ++#else ++void kbase_context_debugfs_init(struct kbase_context *const kctx) ++{ ++ CSTD_UNUSED(kctx); ++} ++KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); ++ ++void kbase_context_debugfs_term(struct kbase_context *const kctx) ++{ ++ CSTD_UNUSED(kctx); ++} ++KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); ++#endif /* CONFIG_DEBUG_FS */ ++ ++static int kbase_context_kbase_kinstr_jm_init(struct kbase_context *kctx) ++{ ++ return kbase_kinstr_jm_init(&kctx->kinstr_jm); ++} ++ ++static void kbase_context_kbase_kinstr_jm_term(struct kbase_context *kctx) ++{ ++ kbase_kinstr_jm_term(kctx->kinstr_jm); ++} ++ ++static int kbase_context_kbase_timer_setup(struct kbase_context *kctx) ++{ ++ kbase_timer_setup(&kctx->soft_job_timeout, ++ kbasep_soft_job_timeout_worker); ++ ++ return 0; ++} ++ ++static int kbase_context_submit_check(struct kbase_context *kctx) ++{ ++ struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; ++ unsigned long irq_flags = 0; ++ ++ base_context_create_flags const flags = kctx->create_flags; ++ ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); ++ ++ /* Translate the flags */ ++ if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) ++ kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); ++ ++ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ ++ return 0; ++} ++ ++static void kbase_context_flush_jobs(struct kbase_context *kctx) ++{ ++ kbase_jd_zap_context(kctx); ++ flush_workqueue(kctx->jctx.job_done_wq); ++} + +/** -+ * struct kbase_csf_firmware_core_dump - Object containing members for handling -+ * firmware core dump. ++ * kbase_context_free - Free kcontext at its destruction + * -+ * @mcu_regs_addr: GPU virtual address of the start of the MCU registers buffer -+ * in Firmware. -+ * @version: Version of the FW image header core dump data format. Bits -+ * 7:0 specify version minor and 15:8 specify version major. -+ * @available: Flag to identify if the FW core dump buffer is available. -+ * True if entry is available in the FW image header and version -+ * is supported, False otherwise. ++ * @kctx: kcontext to be freed + */ -+struct kbase_csf_firmware_core_dump { -+ u32 mcu_regs_addr; -+ u16 version; -+ bool available; -+}; ++static void kbase_context_free(struct kbase_context *kctx) ++{ ++ kbase_timeline_post_kbase_context_destroy(kctx); ++ ++ vfree(kctx); ++} + ++static const struct kbase_context_init context_init[] = { ++ { NULL, kbase_context_free, NULL }, ++ { kbase_context_common_init, kbase_context_common_term, ++ "Common context initialization failed" }, ++ { kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term, ++ "Memory pool group initialization failed" }, ++ { kbase_mem_evictable_init, kbase_mem_evictable_deinit, ++ "Memory evictable initialization failed" }, ++ { kbase_context_mmu_init, kbase_context_mmu_term, ++ "MMU initialization failed" }, ++ { kbase_context_mem_alloc_page, kbase_context_mem_pool_free, ++ "Memory alloc page failed" }, ++ { kbase_region_tracker_init, kbase_region_tracker_term, ++ "Region tracker initialization failed" }, ++ { kbase_sticky_resource_init, kbase_context_sticky_resource_term, ++ "Sticky resource initialization failed" }, ++ { kbase_jit_init, kbase_jit_term, "JIT initialization failed" }, ++ { kbase_context_kbase_kinstr_jm_init, ++ kbase_context_kbase_kinstr_jm_term, ++ "JM instrumentation initialization failed" }, ++ { kbase_context_kbase_timer_setup, NULL, ++ "Timers initialization failed" }, ++ { kbase_event_init, kbase_event_cleanup, ++ "Event initialization failed" }, ++ { kbasep_js_kctx_init, kbasep_js_kctx_term, ++ "JS kctx initialization failed" }, ++ { kbase_jd_init, kbase_jd_exit, "JD initialization failed" }, ++ { kbase_context_submit_check, NULL, "Enabling job submission failed" }, +#if IS_ENABLED(CONFIG_DEBUG_FS) -+/** -+ * struct kbase_csf_dump_on_fault - Faulty information to deliver to the daemon -+ * -+ * @error_code: Error code. -+ * @kctx_tgid: tgid value of the Kbase context for which the fault happened. -+ * @kctx_id: id of the Kbase context for which the fault happened. -+ * @enabled: Flag to indicate that 'csf_fault' debugfs has been opened -+ * so dump on fault is enabled. -+ * @fault_wait_wq: Waitqueue on which user space client is blocked till kbase -+ * reports a fault. -+ * @dump_wait_wq: Waitqueue on which kbase threads are blocked till user space client -+ * completes the dump on fault. -+ * @lock: Lock to protect this struct members from concurrent access. -+ */ -+struct kbase_csf_dump_on_fault { -+ enum dumpfault_error_type error_code; -+ u32 kctx_tgid; -+ u32 kctx_id; -+ atomic_t enabled; -+ wait_queue_head_t fault_wait_wq; -+ wait_queue_head_t dump_wait_wq; -+ spinlock_t lock; ++ { kbase_debug_job_fault_context_init, ++ kbase_debug_job_fault_context_term, ++ "Job fault context initialization failed" }, ++#endif ++ { NULL, kbase_context_flush_jobs, NULL }, ++ { kbase_context_add_to_dev_list, kbase_context_remove_from_dev_list, ++ "Adding kctx to device failed" }, ++ { kbasep_platform_context_init, kbasep_platform_context_term, ++ "Platform callback for kctx initialization failed" }, +}; -+#endif /* CONFIG_DEBUG_FS*/ + -+/** -+ * struct kbase_csf_user_reg - Object containing members to manage the mapping -+ * of USER Register page for all contexts -+ * -+ * @dummy_page: Address of a dummy page that is mapped in place -+ * of the real USER Register page just before the GPU -+ * is powered down. The USER Register page is mapped -+ * in the address space of every process, that created -+ * a Base context, to enable the access to LATEST_FLUSH -+ * register from userspace. -+ * @filp: Pointer to a dummy file, that along with @file_offset, -+ * facilitates the use of unique file offset for the userspace mapping -+ * created for USER Register page. -+ * The userspace mapping is made to point to this file -+ * inside the mmap handler. -+ * @file_offset: Counter that is incremented every time Userspace creates a mapping of -+ * USER Register page, to provide a unique file offset range for -+ * @filp file, so that the CPU PTE of the Userspace mapping can be zapped -+ * through the kernel function unmap_mapping_range(). -+ * It is incremented in page units. -+ * @list: Linked list to maintain user processes(contexts) -+ * having the mapping to USER Register page. -+ * It's protected by &kbase_csf_device.reg_lock. -+ */ -+struct kbase_csf_user_reg { -+ struct tagged_addr dummy_page; -+ struct file *filp; -+ u32 file_offset; -+ struct list_head list; -+}; ++static void kbase_context_term_partial( ++ struct kbase_context *kctx, ++ unsigned int i) ++{ ++ while (i-- > 0) { ++ if (context_init[i].term) ++ context_init[i].term(kctx); ++ } ++} + -+/** -+ * struct kbase_csf_device - Object representing CSF for an instance of GPU -+ * platform device. -+ * -+ * @mcu_mmu: MMU page tables for the MCU firmware -+ * @firmware_interfaces: List of interfaces defined in the firmware image -+ * @firmware_config: List of configuration options within the firmware -+ * image -+ * @firmware_timeline_metadata: List of timeline meta-data within the firmware -+ * image -+ * @fw_cfg_kobj: Pointer to the kobject corresponding to the sysf -+ * directory that contains a sub-directory for each -+ * of the configuration option present in the -+ * firmware image. -+ * @firmware_trace_buffers: List of trace buffers described in the firmware -+ * image. -+ * @shared_interface: Pointer to the interface object containing info for -+ * the memory area shared between firmware & host. -+ * @shared_reg_rbtree: RB tree of the memory regions allocated from the -+ * shared interface segment in MCU firmware address -+ * space. -+ * @db_filp: Pointer to a dummy file, that alongwith -+ * @db_file_offsets, facilitates the use of unqiue -+ * file offset for the userspace mapping created -+ * for Hw Doorbell pages. The userspace mapping -+ * is made to point to this file inside the mmap -+ * handler. -+ * @db_file_offsets: Counter that is incremented every time a GPU -+ * command queue is bound to provide a unique file -+ * offset range for @db_filp file, so that pte of -+ * Doorbell page can be zapped through the kernel -+ * function unmap_mapping_range(). It is incremented -+ * in page units. -+ * @dummy_db_page: Address of the dummy page that is mapped in place -+ * of the real Hw doorbell page for the active GPU -+ * command queues after they are stopped or after the -+ * GPU is powered down. -+ * @reg_lock: Lock to serialize the MCU firmware related actions -+ * that affect all contexts such as allocation of -+ * regions from shared interface area, assignment of -+ * hardware doorbell pages, assignment of CSGs, -+ * sending global requests. -+ * @event_wait: Wait queue to wait for receiving csf events, i.e. -+ * the interrupt from CSF firmware, or scheduler state -+ * changes. -+ * @interrupt_received: Flag set when the interrupt is received from CSF fw -+ * @global_iface: The result of parsing the global interface -+ * structure set up by the firmware, including the -+ * CSGs, CSs, and their properties -+ * @scheduler: The CS scheduler instance. -+ * @reset: Contain members required for GPU reset handling. -+ * @progress_timeout: Maximum number of GPU clock cycles without forward -+ * progress to allow, for all tasks running on -+ * hardware endpoints (e.g. shader cores), before -+ * terminating a GPU command queue group. -+ * Must not exceed @GLB_PROGRESS_TIMER_TIMEOUT_MAX. -+ * @pma_dev: Pointer to protected memory allocator device. -+ * @firmware_inited: Flag for indicating that the cold-boot stage of -+ * the MCU has completed. -+ * @firmware_reloaded: Flag for indicating a firmware reload operation -+ * in GPU reset has completed. -+ * @firmware_reload_needed: Flag for indicating that the firmware needs to be -+ * reloaded as part of the GPU reset action. -+ * @firmware_full_reload_needed: Flag for indicating that the firmware needs to -+ * be fully re-loaded. This may be set when the -+ * boot or re-init of MCU fails after a successful -+ * soft reset. -+ * @firmware_hctl_core_pwr: Flag for indicating that the host diver is in -+ * charge of the shader core's power transitions, and -+ * the mcu_core_pwroff timeout feature is disabled -+ * (i.e. configured 0 in the register field). If -+ * false, the control is delegated to the MCU. -+ * @firmware_reload_work: Work item for facilitating the procedural actions -+ * on reloading the firmware. -+ * @glb_init_request_pending: Flag to indicate that Global requests have been -+ * sent to the FW after MCU was re-enabled and their -+ * acknowledgement is pending. -+ * @fw_error_work: Work item for handling the firmware internal error -+ * fatal event. -+ * @ipa_control: IPA Control component manager. -+ * @mcu_core_pwroff_dur_us: Sysfs attribute for the glb_pwroff timeout input -+ * in unit of micro-seconds. The firmware does not use -+ * it directly. -+ * @mcu_core_pwroff_dur_count: The counterpart of the glb_pwroff timeout input -+ * in interface required format, ready to be used -+ * directly in the firmware. -+ * @mcu_core_pwroff_reg_shadow: The actual value that has been programed into -+ * the glb_pwoff register. This is separated from -+ * the @p mcu_core_pwroff_dur_count as an update -+ * to the latter is asynchronous. -+ * @gpu_idle_hysteresis_us: Sysfs attribute for the idle hysteresis time -+ * window in unit of microseconds. The firmware does not -+ * use it directly. -+ * @gpu_idle_dur_count: The counterpart of the hysteresis time window in -+ * interface required format, ready to be used -+ * directly in the firmware. -+ * @fw_timeout_ms: Timeout value (in milliseconds) used when waiting -+ * for any request sent to the firmware. -+ * @hwcnt: Contain members required for handling the dump of -+ * HW counters. -+ * @fw: Copy of the loaded MCU firmware image. -+ * @fw_log: Contain members required for handling firmware log. -+ * @fw_core_dump: Contain members required for handling the firmware -+ * core dump. -+ * @dof: Structure for dump on fault. -+ * @user_reg: Collective information to support the mapping to -+ * USER Register page for user processes. -+ */ -+struct kbase_csf_device { -+ struct kbase_mmu_table mcu_mmu; -+ struct list_head firmware_interfaces; -+ struct list_head firmware_config; -+ struct list_head firmware_timeline_metadata; -+ struct kobject *fw_cfg_kobj; -+ struct kbase_csf_trace_buffers firmware_trace_buffers; -+ void *shared_interface; -+ struct rb_root shared_reg_rbtree; -+ struct file *db_filp; -+ u32 db_file_offsets; -+ struct tagged_addr dummy_db_page; -+ struct mutex reg_lock; -+ wait_queue_head_t event_wait; -+ bool interrupt_received; -+ struct kbase_csf_global_iface global_iface; -+ struct kbase_csf_scheduler scheduler; -+ struct kbase_csf_reset_gpu reset; -+ atomic64_t progress_timeout; -+ struct protected_memory_allocator_device *pma_dev; -+ bool firmware_inited; -+ bool firmware_reloaded; -+ bool firmware_reload_needed; -+ bool firmware_full_reload_needed; -+ bool firmware_hctl_core_pwr; -+ struct work_struct firmware_reload_work; -+ bool glb_init_request_pending; -+ struct work_struct fw_error_work; -+ struct kbase_ipa_control ipa_control; -+ u32 mcu_core_pwroff_dur_us; -+ u32 mcu_core_pwroff_dur_count; -+ u32 mcu_core_pwroff_reg_shadow; -+ u32 gpu_idle_hysteresis_us; -+ u32 gpu_idle_dur_count; -+ unsigned int fw_timeout_ms; -+ struct kbase_csf_hwcnt hwcnt; -+ struct kbase_csf_mcu_fw fw; -+ struct kbase_csf_firmware_log fw_log; -+ struct kbase_csf_firmware_core_dump fw_core_dump; -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ struct kbase_csf_dump_on_fault dof; -+#endif /* CONFIG_DEBUG_FS */ -+#if IS_ENABLED(CONFIG_MALI_CORESIGHT) -+ /** -+ * @coresight: Coresight device structure. ++struct kbase_context *kbase_create_context(struct kbase_device *kbdev, ++ bool is_compat, ++ base_context_create_flags const flags, ++ unsigned long const api_version, ++ struct file *const filp) ++{ ++ struct kbase_context *kctx; ++ unsigned int i = 0; ++ ++ if (WARN_ON(!kbdev)) ++ return NULL; ++ ++ /* Validate flags */ ++ if (WARN_ON(flags != (flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS))) ++ return NULL; ++ ++ /* zero-inited as lot of code assume it's zero'ed out on create */ ++ kctx = vzalloc(sizeof(*kctx)); ++ if (WARN_ON(!kctx)) ++ return NULL; ++ ++ kctx->kbdev = kbdev; ++ kctx->api_version = api_version; ++ kctx->filp = filp; ++ kctx->create_flags = flags; ++ ++ if (is_compat) ++ kbase_ctx_flag_set(kctx, KCTX_COMPAT); ++#if defined(CONFIG_64BIT) ++ else ++ kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA); ++#endif /* defined(CONFIG_64BIT) */ ++ ++ for (i = 0; i < ARRAY_SIZE(context_init); i++) { ++ int err = 0; ++ ++ if (context_init[i].init) ++ err = context_init[i].init(kctx); ++ ++ if (err) { ++ dev_err(kbdev->dev, "%s error = %d\n", ++ context_init[i].err_mes, err); ++ ++ /* kctx should be freed by kbase_context_free(). ++ * Otherwise it will result in memory leak. ++ */ ++ WARN_ON(i == 0); ++ ++ kbase_context_term_partial(kctx, i); ++ return NULL; ++ } ++ } ++ ++ return kctx; ++} ++KBASE_EXPORT_SYMBOL(kbase_create_context); ++ ++void kbase_destroy_context(struct kbase_context *kctx) ++{ ++ struct kbase_device *kbdev; ++ ++ if (WARN_ON(!kctx)) ++ return; ++ ++ kbdev = kctx->kbdev; ++ if (WARN_ON(!kbdev)) ++ return; ++ ++ /* Context termination could happen whilst the system suspend of ++ * the GPU device is ongoing or has completed. It has been seen on ++ * Customer side that a hang could occur if context termination is ++ * not blocked until the resume of GPU device. + */ -+ struct kbase_debug_coresight_device coresight; -+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ -+ struct kbase_csf_user_reg user_reg; -+}; ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ atomic_inc(&kbdev->pm.gpu_users_waiting); ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ while (kbase_pm_context_active_handle_suspend( ++ kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { ++ dev_dbg(kbdev->dev, ++ "Suspend in progress when destroying context"); ++ wait_event(kbdev->pm.resume_wait, ++ !kbase_pm_is_suspending(kbdev)); ++ } ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ atomic_dec(&kbdev->pm.gpu_users_waiting); ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + -+/** -+ * struct kbase_as - Object representing an address space of GPU. -+ * @number: Index at which this address space structure is present -+ * in an array of address space structures embedded inside -+ * the &struct kbase_device. -+ * @pf_wq: Workqueue for processing work items related to -+ * Page fault, Bus fault and GPU fault handling. -+ * @work_pagefault: Work item for the Page fault handling. -+ * @work_busfault: Work item for the Bus fault handling. -+ * @work_gpufault: Work item for the GPU fault handling. -+ * @pf_data: Data relating to Page fault. -+ * @bf_data: Data relating to Bus fault. -+ * @gf_data: Data relating to GPU fault. -+ * @current_setup: Stores the MMU configuration for this address space. -+ * @is_unresponsive: Flag to indicate MMU is not responding. -+ * Set if a MMU command isn't completed within -+ * &kbase_device:mmu_as_inactive_wait_time_ms. -+ * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes. -+ */ -+struct kbase_as { -+ int number; -+ struct workqueue_struct *pf_wq; -+ struct work_struct work_pagefault; -+ struct work_struct work_busfault; -+ struct work_struct work_gpufault; -+ struct kbase_fault pf_data; -+ struct kbase_fault bf_data; -+ struct kbase_fault gf_data; -+ struct kbase_mmu_setup current_setup; -+ bool is_unresponsive; -+}; ++ kbase_mem_pool_group_mark_dying(&kctx->mem_pools); + -+#endif /* _KBASE_CSF_DEFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.c ++ kbase_context_term_partial(kctx, ARRAY_SIZE(context_init)); ++ ++ kbase_pm_context_idle(kbdev); ++} ++KBASE_EXPORT_SYMBOL(kbase_destroy_context); +diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c new file mode 100644 -index 000000000..49e529384 +index 000000000..88be6c2e7 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.c -@@ -0,0 +1,265 @@ ++++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c +@@ -0,0 +1,392 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -145750,261 +146241,388 @@ index 000000000..49e529384 + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ ++ ++/* ++ * Base kernel context APIs ++ */ ++#include ++#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE ++#include ++#else ++#include ++#endif ++ +#include -+#include "mali_kbase_csf_event.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include + +/** -+ * struct kbase_csf_event_cb - CSF event callback. ++ * find_process_node - Used to traverse the process rb_tree to find if ++ * process exists already in process rb_tree. + * -+ * @link: Link to the rest of the list. -+ * @kctx: Pointer to the Kbase context this event belongs to. -+ * @callback: Callback function to call when a CSF event is signalled. -+ * @param: Parameter to pass to the callback function. ++ * @node: Pointer to root node to start search. ++ * @tgid: Thread group PID to search for. + * -+ * This structure belongs to the list of events which is part of a Kbase -+ * context, and describes a callback function with a custom parameter to pass -+ * to it when a CSF event is signalled. ++ * Return: Pointer to kbase_process if exists otherwise NULL. + */ -+struct kbase_csf_event_cb { -+ struct list_head link; -+ struct kbase_context *kctx; -+ kbase_csf_event_callback *callback; -+ void *param; -+}; ++static struct kbase_process *find_process_node(struct rb_node *node, pid_t tgid) ++{ ++ struct kbase_process *kprcs = NULL; + -+int kbase_csf_event_wait_add(struct kbase_context *kctx, -+ kbase_csf_event_callback *callback, void *param) ++ /* Check if the kctx creation request is from a existing process.*/ ++ while (node) { ++ struct kbase_process *prcs_node = ++ rb_entry(node, struct kbase_process, kprcs_node); ++ if (prcs_node->tgid == tgid) { ++ kprcs = prcs_node; ++ break; ++ } ++ ++ if (tgid < prcs_node->tgid) ++ node = node->rb_left; ++ else ++ node = node->rb_right; ++ } ++ ++ return kprcs; ++} ++ ++/** ++ * kbase_insert_kctx_to_process - Initialise kbase process context. ++ * ++ * @kctx: Pointer to kbase context. ++ * ++ * Here we initialise per process rb_tree managed by kbase_device. ++ * We maintain a rb_tree of each unique process that gets created. ++ * and Each process maintains a list of kbase context. ++ * This setup is currently used by kernel trace functionality ++ * to trace and visualise gpu memory consumption. ++ * ++ * Return: 0 on success and error number on failure. ++ */ ++static int kbase_insert_kctx_to_process(struct kbase_context *kctx) +{ -+ int err = -ENOMEM; -+ struct kbase_csf_event_cb *event_cb = -+ kzalloc(sizeof(struct kbase_csf_event_cb), GFP_KERNEL); ++ struct rb_root *const prcs_root = &kctx->kbdev->process_root; ++ const pid_t tgid = kctx->tgid; ++ struct kbase_process *kprcs = NULL; + -+ if (event_cb) { -+ unsigned long flags; ++ lockdep_assert_held(&kctx->kbdev->kctx_list_lock); + -+ event_cb->kctx = kctx; -+ event_cb->callback = callback; -+ event_cb->param = param; ++ kprcs = find_process_node(prcs_root->rb_node, tgid); + -+ spin_lock_irqsave(&kctx->csf.event.lock, flags); -+ list_add_tail(&event_cb->link, &kctx->csf.event.callback_list); -+ dev_dbg(kctx->kbdev->dev, -+ "Added event handler %pK with param %pK\n", event_cb, -+ event_cb->param); -+ spin_unlock_irqrestore(&kctx->csf.event.lock, flags); ++ /* if the kctx is from new process then create a new kbase_process ++ * and add it to the &kbase_device->rb_tree ++ */ ++ if (!kprcs) { ++ struct rb_node **new = &prcs_root->rb_node, *parent = NULL; + -+ err = 0; ++ kprcs = kzalloc(sizeof(*kprcs), GFP_KERNEL); ++ if (kprcs == NULL) ++ return -ENOMEM; ++ kprcs->tgid = tgid; ++ INIT_LIST_HEAD(&kprcs->kctx_list); ++ kprcs->dma_buf_root = RB_ROOT; ++ kprcs->total_gpu_pages = 0; ++ ++ while (*new) { ++ struct kbase_process *prcs_node; ++ ++ parent = *new; ++ prcs_node = rb_entry(parent, struct kbase_process, ++ kprcs_node); ++ if (tgid < prcs_node->tgid) ++ new = &(*new)->rb_left; ++ else ++ new = &(*new)->rb_right; ++ } ++ rb_link_node(&kprcs->kprcs_node, parent, new); ++ rb_insert_color(&kprcs->kprcs_node, prcs_root); + } + -+ return err; ++ kctx->kprcs = kprcs; ++ list_add(&kctx->kprcs_link, &kprcs->kctx_list); ++ ++ return 0; +} + -+void kbase_csf_event_wait_remove(struct kbase_context *kctx, -+ kbase_csf_event_callback *callback, void *param) ++int kbase_context_common_init(struct kbase_context *kctx) +{ -+ struct kbase_csf_event_cb *event_cb; -+ unsigned long flags; ++ const unsigned long cookies_mask = KBASE_COOKIE_MASK; ++ int err = 0; + -+ spin_lock_irqsave(&kctx->csf.event.lock, flags); ++ /* creating a context is considered a disjoint event */ ++ kbase_disjoint_event(kctx->kbdev); + -+ list_for_each_entry(event_cb, &kctx->csf.event.callback_list, link) { -+ if ((event_cb->callback == callback) && (event_cb->param == param)) { -+ list_del(&event_cb->link); -+ dev_dbg(kctx->kbdev->dev, -+ "Removed event handler %pK with param %pK\n", -+ event_cb, event_cb->param); -+ kfree(event_cb); -+ break; ++ kctx->process_mm = NULL; ++ kctx->task = NULL; ++ atomic_set(&kctx->nonmapped_pages, 0); ++ atomic_set(&kctx->permanent_mapped_pages, 0); ++ kctx->tgid = task_tgid_vnr(current); ++ kctx->pid = task_pid_vnr(current); ++ ++ /* Check if this is a Userspace created context */ ++ if (likely(kctx->filp)) { ++ struct pid *pid_struct; ++ ++ rcu_read_lock(); ++ pid_struct = find_get_pid(kctx->tgid); ++ if (likely(pid_struct)) { ++ struct task_struct *task = pid_task(pid_struct, PIDTYPE_PID); ++ ++ if (likely(task)) { ++ /* Take a reference on the task to avoid slow lookup ++ * later on from the page allocation loop. ++ */ ++ get_task_struct(task); ++ kctx->task = task; ++ } else { ++ dev_err(kctx->kbdev->dev, ++ "Failed to get task pointer for %s/%d", ++ current->comm, kctx->pid); ++ err = -ESRCH; ++ } ++ ++ put_pid(pid_struct); ++ } else { ++ dev_err(kctx->kbdev->dev, ++ "Failed to get pid pointer for %s/%d", ++ current->comm, kctx->pid); ++ err = -ESRCH; + } ++ rcu_read_unlock(); ++ ++ if (unlikely(err)) ++ return err; ++ ++ kbase_mem_mmgrab(); ++ kctx->process_mm = current->mm; + } -+ spin_unlock_irqrestore(&kctx->csf.event.lock, flags); -+} + -+static void sync_update_notify_gpu(struct kbase_context *kctx) -+{ -+ bool can_notify_gpu; -+ unsigned long flags; ++ atomic_set(&kctx->used_pages, 0); + -+ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); -+ can_notify_gpu = kctx->kbdev->pm.backend.gpu_powered; -+#ifdef KBASE_PM_RUNTIME -+ if (kctx->kbdev->pm.backend.gpu_sleep_mode_active) -+ can_notify_gpu = false; ++ mutex_init(&kctx->reg_lock); ++ ++ spin_lock_init(&kctx->mem_partials_lock); ++ INIT_LIST_HEAD(&kctx->mem_partials); ++ ++ spin_lock_init(&kctx->waiting_soft_jobs_lock); ++ INIT_LIST_HEAD(&kctx->waiting_soft_jobs); ++ ++ init_waitqueue_head(&kctx->event_queue); ++ atomic_set(&kctx->event_count, 0); ++ ++#if !MALI_USE_CSF ++ atomic_set(&kctx->event_closed, false); ++#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) ++ atomic_set(&kctx->jctx.work_id, 0); ++#endif +#endif + -+ if (can_notify_gpu) { -+ kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR); -+ KBASE_KTRACE_ADD(kctx->kbdev, CSF_SYNC_UPDATE_NOTIFY_GPU_EVENT, kctx, 0u); ++#if MALI_USE_CSF ++ atomic64_set(&kctx->num_fixable_allocs, 0); ++ atomic64_set(&kctx->num_fixed_allocs, 0); ++#endif ++ ++ kbase_gpu_vm_lock(kctx); ++ bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG); ++ kbase_gpu_vm_unlock(kctx); ++ ++ kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1; ++ ++ mutex_lock(&kctx->kbdev->kctx_list_lock); ++ err = kbase_insert_kctx_to_process(kctx); ++ mutex_unlock(&kctx->kbdev->kctx_list_lock); ++ if (err) { ++ dev_err(kctx->kbdev->dev, ++ "(err:%d) failed to insert kctx to kbase_process", err); ++ if (likely(kctx->filp)) { ++ mmdrop(kctx->process_mm); ++ put_task_struct(kctx->task); ++ } + } + -+ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); ++ return err; +} + -+void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu) ++int kbase_context_add_to_dev_list(struct kbase_context *kctx) +{ -+ struct kbase_csf_event_cb *event_cb, *next_event_cb; -+ unsigned long flags; ++ if (WARN_ON(!kctx)) ++ return -EINVAL; + -+ dev_dbg(kctx->kbdev->dev, -+ "Signal event (%s GPU notify) for context %pK\n", -+ notify_gpu ? "with" : "without", (void *)kctx); ++ if (WARN_ON(!kctx->kbdev)) ++ return -EINVAL; + -+ /* First increment the signal count and wake up event thread. -+ */ -+ atomic_set(&kctx->event_count, 1); -+ kbase_event_wakeup(kctx); ++ mutex_lock(&kctx->kbdev->kctx_list_lock); ++ list_add(&kctx->kctx_list_link, &kctx->kbdev->kctx_list); ++ mutex_unlock(&kctx->kbdev->kctx_list_lock); + -+ /* Signal the CSF firmware. This is to ensure that pending command -+ * stream synch object wait operations are re-evaluated. -+ * Write to GLB_DOORBELL would suffice as spec says that all pending -+ * synch object wait operations are re-evaluated on a write to any -+ * CS_DOORBELL/GLB_DOORBELL register. -+ */ -+ if (notify_gpu) -+ sync_update_notify_gpu(kctx); ++ kbase_timeline_post_kbase_context_create(kctx); + -+ /* Now invoke the callbacks registered on backend side. -+ * Allow item removal inside the loop, if requested by the callback. -+ */ -+ spin_lock_irqsave(&kctx->csf.event.lock, flags); ++ return 0; ++} + -+ list_for_each_entry_safe( -+ event_cb, next_event_cb, &kctx->csf.event.callback_list, link) { -+ enum kbase_csf_event_callback_action action; ++void kbase_context_remove_from_dev_list(struct kbase_context *kctx) ++{ ++ if (WARN_ON(!kctx)) ++ return; + -+ dev_dbg(kctx->kbdev->dev, -+ "Calling event handler %pK with param %pK\n", -+ (void *)event_cb, event_cb->param); -+ action = event_cb->callback(event_cb->param); -+ if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) { -+ list_del(&event_cb->link); -+ kfree(event_cb); -+ } -+ } ++ if (WARN_ON(!kctx->kbdev)) ++ return; + -+ spin_unlock_irqrestore(&kctx->csf.event.lock, flags); ++ kbase_timeline_pre_kbase_context_destroy(kctx); ++ ++ mutex_lock(&kctx->kbdev->kctx_list_lock); ++ list_del_init(&kctx->kctx_list_link); ++ mutex_unlock(&kctx->kbdev->kctx_list_lock); +} + -+void kbase_csf_event_term(struct kbase_context *kctx) ++/** ++ * kbase_remove_kctx_from_process - remove a terminating context from ++ * the process list. ++ * ++ * @kctx: Pointer to kbase context. ++ * ++ * Remove the tracking of context from the list of contexts maintained under ++ * kbase process and if the list if empty then there no outstanding contexts ++ * we can remove the process node as well. ++ */ ++ ++static void kbase_remove_kctx_from_process(struct kbase_context *kctx) +{ -+ struct kbase_csf_event_cb *event_cb, *next_event_cb; -+ unsigned long flags; ++ struct kbase_process *kprcs = kctx->kprcs; + -+ spin_lock_irqsave(&kctx->csf.event.lock, flags); ++ lockdep_assert_held(&kctx->kbdev->kctx_list_lock); ++ list_del(&kctx->kprcs_link); + -+ list_for_each_entry_safe( -+ event_cb, next_event_cb, &kctx->csf.event.callback_list, link) { -+ list_del(&event_cb->link); -+ dev_warn(kctx->kbdev->dev, -+ "Removed event handler %pK with param %pK\n", -+ (void *)event_cb, event_cb->param); -+ kfree(event_cb); ++ /* if there are no outstanding contexts in current process node, ++ * we can remove it from the process rb_tree. ++ */ ++ if (list_empty(&kprcs->kctx_list)) { ++ rb_erase(&kprcs->kprcs_node, &kctx->kbdev->process_root); ++ /* Add checks, so that the terminating process Should not ++ * hold any gpu_memory. ++ */ ++ spin_lock(&kctx->kbdev->gpu_mem_usage_lock); ++ WARN_ON(kprcs->total_gpu_pages); ++ spin_unlock(&kctx->kbdev->gpu_mem_usage_lock); ++ WARN_ON(!RB_EMPTY_ROOT(&kprcs->dma_buf_root)); ++ kfree(kprcs); + } ++} + -+ WARN(!list_empty(&kctx->csf.event.error_list), -+ "Error list not empty for ctx %d_%d\n", kctx->tgid, kctx->id); ++void kbase_context_common_term(struct kbase_context *kctx) ++{ ++ int pages; + -+ spin_unlock_irqrestore(&kctx->csf.event.lock, flags); ++ pages = atomic_read(&kctx->used_pages); ++ if (pages != 0) ++ dev_warn(kctx->kbdev->dev, ++ "%s: %d pages in use!\n", __func__, pages); ++ ++ WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); ++ ++ mutex_lock(&kctx->kbdev->kctx_list_lock); ++ kbase_remove_kctx_from_process(kctx); ++ mutex_unlock(&kctx->kbdev->kctx_list_lock); ++ ++ if (likely(kctx->filp)) { ++ mmdrop(kctx->process_mm); ++ put_task_struct(kctx->task); ++ } ++ ++ KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u); +} + -+void kbase_csf_event_init(struct kbase_context *const kctx) ++int kbase_context_mem_pool_group_init(struct kbase_context *kctx) +{ -+ INIT_LIST_HEAD(&kctx->csf.event.callback_list); -+ INIT_LIST_HEAD(&kctx->csf.event.error_list); -+ spin_lock_init(&kctx->csf.event.lock); ++ return kbase_mem_pool_group_init(&kctx->mem_pools, kctx->kbdev, ++ &kctx->kbdev->mem_pool_defaults, &kctx->kbdev->mem_pools); +} + -+void kbase_csf_event_remove_error(struct kbase_context *kctx, -+ struct kbase_csf_notification *error) ++void kbase_context_mem_pool_group_term(struct kbase_context *kctx) +{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&kctx->csf.event.lock, flags); -+ list_del_init(&error->link); -+ spin_unlock_irqrestore(&kctx->csf.event.lock, flags); ++ kbase_mem_pool_group_term(&kctx->mem_pools); +} + -+bool kbase_csf_event_read_error(struct kbase_context *kctx, -+ struct base_csf_notification *event_data) ++int kbase_context_mmu_init(struct kbase_context *kctx) +{ -+ struct kbase_csf_notification *error_data = NULL; -+ unsigned long flags; ++ return kbase_mmu_init( ++ kctx->kbdev, &kctx->mmu, kctx, ++ kbase_context_mmu_group_id_get(kctx->create_flags)); ++} + -+ spin_lock_irqsave(&kctx->csf.event.lock, flags); -+ if (likely(!list_empty(&kctx->csf.event.error_list))) { -+ error_data = list_first_entry(&kctx->csf.event.error_list, -+ struct kbase_csf_notification, link); -+ list_del_init(&error_data->link); -+ *event_data = error_data->data; -+ dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n", -+ (void *)error_data, (void *)kctx); -+ } -+ spin_unlock_irqrestore(&kctx->csf.event.lock, flags); -+ return !!error_data; ++void kbase_context_mmu_term(struct kbase_context *kctx) ++{ ++ kbase_mmu_term(kctx->kbdev, &kctx->mmu); +} + -+void kbase_csf_event_add_error(struct kbase_context *const kctx, -+ struct kbase_csf_notification *const error, -+ struct base_csf_notification const *const data) ++int kbase_context_mem_alloc_page(struct kbase_context *kctx) +{ -+ unsigned long flags; ++ struct page *p; + -+ if (WARN_ON(!kctx)) -+ return; ++ p = kbase_mem_alloc_page(&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK]); ++ if (!p) ++ return -ENOMEM; + -+ if (WARN_ON(!error)) -+ return; ++ kctx->aliasing_sink_page = as_tagged(page_to_phys(p)); + -+ if (WARN_ON(!data)) -+ return; ++ return 0; ++} + -+ spin_lock_irqsave(&kctx->csf.event.lock, flags); -+ if (list_empty(&error->link)) { -+ error->data = *data; -+ list_add_tail(&error->link, &kctx->csf.event.error_list); -+ dev_dbg(kctx->kbdev->dev, -+ "Added error %pK of type %d in context %pK\n", -+ (void *)error, data->type, (void *)kctx); -+ } else { -+ dev_dbg(kctx->kbdev->dev, "Error %pK of type %d already pending in context %pK", -+ (void *)error, error->data.type, (void *)kctx); -+ } -+ spin_unlock_irqrestore(&kctx->csf.event.lock, flags); ++void kbase_context_mem_pool_free(struct kbase_context *kctx) ++{ ++ /* drop the aliasing sink page now that it can't be mapped anymore */ ++ kbase_mem_pool_free( ++ &kctx->mem_pools.small[KBASE_MEM_GROUP_SINK], ++ as_page(kctx->aliasing_sink_page), ++ false); +} + -+bool kbase_csf_event_error_pending(struct kbase_context *kctx) ++void kbase_context_sticky_resource_term(struct kbase_context *kctx) +{ -+ bool error_pending = false; -+ unsigned long flags; ++ unsigned long pending_regions_to_clean; + -+ /* Withhold the error event if the dump on fault is ongoing. -+ * This would prevent the Userspace from taking error recovery actions -+ * (which can potentially affect the state that is being dumped). -+ * Event handling thread would eventually notice the error event. -+ */ -+ if (unlikely(!kbase_debug_csf_fault_dump_complete(kctx->kbdev))) -+ return false; ++ kbase_gpu_vm_lock(kctx); ++ kbase_sticky_resource_term(kctx); + -+ spin_lock_irqsave(&kctx->csf.event.lock, flags); -+ error_pending = !list_empty(&kctx->csf.event.error_list); ++ /* free pending region setups */ ++ pending_regions_to_clean = KBASE_COOKIE_MASK; ++ bitmap_andnot(&pending_regions_to_clean, &pending_regions_to_clean, ++ kctx->cookies, BITS_PER_LONG); ++ while (pending_regions_to_clean) { ++ unsigned int cookie = find_first_bit(&pending_regions_to_clean, ++ BITS_PER_LONG); + -+ dev_dbg(kctx->kbdev->dev, "%s error is pending in context %pK\n", -+ error_pending ? "An" : "No", (void *)kctx); ++ if (!WARN_ON(!kctx->pending_regions[cookie])) { ++ dev_dbg(kctx->kbdev->dev, "Freeing pending unmapped region\n"); ++ kbase_mem_phy_alloc_put( ++ kctx->pending_regions[cookie]->cpu_alloc); ++ kbase_mem_phy_alloc_put( ++ kctx->pending_regions[cookie]->gpu_alloc); ++ kfree(kctx->pending_regions[cookie]); + -+ spin_unlock_irqrestore(&kctx->csf.event.lock, flags); ++ kctx->pending_regions[cookie] = NULL; ++ } + -+ return error_pending; ++ bitmap_clear(&pending_regions_to_clean, cookie, 1); ++ } ++ kbase_gpu_vm_unlock(kctx); +} -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.h +diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context.h b/drivers/gpu/arm/bifrost/context/mali_kbase_context.h new file mode 100644 -index 000000000..52122a9ef +index 000000000..7c90e2708 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.h -@@ -0,0 +1,171 @@ ++++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.h +@@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -146022,166 +146640,136 @@ index 000000000..52122a9ef + * + */ + -+#ifndef _KBASE_CSF_EVENT_H_ -+#define _KBASE_CSF_EVENT_H_ -+ -+#include -+#include ++#ifndef _KBASE_CONTEXT_H_ ++#define _KBASE_CONTEXT_H_ + -+struct kbase_context; -+struct kbase_csf_event; -+enum kbase_csf_event_callback_action; ++#include + +/** -+ * kbase_csf_event_callback - type for callback functions to be -+ * called upon CSF events. -+ * @param: Generic parameter to pass to the callback function. ++ * kbase_context_debugfs_init - Initialize the kctx platform ++ * specific debugfs + * -+ * This is the type of callback functions that can be registered -+ * for CSF events. These function calls shall be triggered by any call -+ * to kbase_csf_event_signal. ++ * @kctx: kbase context + * -+ * Return: KBASE_CSF_EVENT_CALLBACK_KEEP if the callback should remain -+ * registered, or KBASE_CSF_EVENT_CALLBACK_REMOVE if it should be removed. ++ * This initializes some debugfs interfaces specific to the platform the source ++ * is compiled for. + */ -+typedef enum kbase_csf_event_callback_action kbase_csf_event_callback(void *param); ++void kbase_context_debugfs_init(struct kbase_context *const kctx); + +/** -+ * kbase_csf_event_wait_add - Add a CSF event callback -+ * -+ * @kctx: The Kbase context the @callback should be registered to. -+ * @callback: The callback function to register. -+ * @param: Custom parameter to be passed to the @callback function. ++ * kbase_context_debugfs_term - Terminate the kctx platform ++ * specific debugfs + * -+ * This function adds an event callback to the list of CSF event callbacks -+ * belonging to a given Kbase context, to be triggered when a CSF event is -+ * signalled by kbase_csf_event_signal. ++ * @kctx: kbase context + * -+ * Return: 0 on success, or negative on failure. ++ * This terminates some debugfs interfaces specific to the platform the source ++ * is compiled for. + */ -+int kbase_csf_event_wait_add(struct kbase_context *kctx, -+ kbase_csf_event_callback *callback, void *param); ++void kbase_context_debugfs_term(struct kbase_context *const kctx); + +/** -+ * kbase_csf_event_wait_remove - Remove a CSF event callback ++ * kbase_create_context() - Create a kernel base context. + * -+ * @kctx: The kbase context the @callback should be removed from. -+ * @callback: The callback function to remove. -+ * @param: Custom parameter that would have been passed to the @p callback -+ * function. ++ * @kbdev: Object representing an instance of GPU platform device, ++ * allocated from the probe method of the Mali driver. ++ * @is_compat: Force creation of a 32-bit context ++ * @flags: Flags to set, which shall be any combination of ++ * BASEP_CONTEXT_CREATE_KERNEL_FLAGS. ++ * @api_version: Application program interface version, as encoded in ++ * a single integer by the KBASE_API_VERSION macro. ++ * @filp: Pointer to the struct file corresponding to device file ++ * /dev/malixx instance, passed to the file's open method. + * -+ * This function removes an event callback from the list of CSF event callbacks -+ * belonging to a given Kbase context. ++ * Up to one context can be created for each client that opens the device file ++ * /dev/malixx. Context creation is deferred until a special ioctl() system call ++ * is made on the device file. Each context has its own GPU address space. ++ * ++ * Return: new kbase context or NULL on failure + */ -+void kbase_csf_event_wait_remove(struct kbase_context *kctx, -+ kbase_csf_event_callback *callback, void *param); ++struct kbase_context * ++kbase_create_context(struct kbase_device *kbdev, bool is_compat, ++ base_context_create_flags const flags, ++ unsigned long api_version, ++ struct file *filp); + +/** -+ * kbase_csf_event_term - Removes all CSF event callbacks -+ * -+ * @kctx: The kbase context for which CSF event callbacks have to be removed. ++ * kbase_destroy_context - Destroy a kernel base context. ++ * @kctx: Context to destroy + * -+ * This function empties the list of CSF event callbacks belonging to a given -+ * Kbase context. ++ * Will release all outstanding regions. + */ -+void kbase_csf_event_term(struct kbase_context *kctx); ++void kbase_destroy_context(struct kbase_context *kctx); + +/** -+ * kbase_csf_event_signal - Signal a CSF event -+ * -+ * @kctx: The kbase context whose CSF event callbacks shall be triggered. -+ * @notify_gpu: Flag to indicate if CSF firmware should be notified of the -+ * signaling of event that happened on the Driver side, either -+ * the signal came from userspace or from kcpu queues. ++ * kbase_ctx_flag - Check if @flag is set on @kctx ++ * @kctx: Pointer to kbase context to check ++ * @flag: Flag to check + * -+ * This function triggers all the CSF event callbacks that are registered to -+ * a given Kbase context, and also signals the event handling thread of -+ * userspace driver waiting for the CSF event. ++ * Return: true if @flag is set on @kctx, false if not. + */ -+void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu); -+ -+static inline void kbase_csf_event_signal_notify_gpu(struct kbase_context *kctx) -+{ -+ kbase_csf_event_signal(kctx, true); -+} -+ -+static inline void kbase_csf_event_signal_cpu_only(struct kbase_context *kctx) ++static inline bool kbase_ctx_flag(struct kbase_context *kctx, ++ enum kbase_context_flags flag) +{ -+ kbase_csf_event_signal(kctx, false); ++ return atomic_read(&kctx->flags) & flag; +} + +/** -+ * kbase_csf_event_init - Initialize event object -+ * -+ * @kctx: The kbase context whose event object will be initialized. -+ * -+ * This function initializes the event object. -+ */ -+void kbase_csf_event_init(struct kbase_context *const kctx); -+ -+struct kbase_csf_notification; -+struct base_csf_notification; -+/** -+ * kbase_csf_event_read_error - Read and remove an error from error list in event -+ * -+ * @kctx: The kbase context. -+ * @event_data: Caller-provided buffer to copy the fatal error to -+ * -+ * This function takes the CS fatal error from context's ordered -+ * error_list, copies its contents to @event_data. ++ * kbase_ctx_compat_mode - Indicate whether a kbase context needs to operate ++ * in compatibility mode for 32-bit userspace. ++ * @kctx: kbase context + * -+ * Return: true if error is read out or false if there is no error in error list. ++ * Return: True if needs to maintain compatibility, False otherwise. + */ -+bool kbase_csf_event_read_error(struct kbase_context *kctx, -+ struct base_csf_notification *event_data); ++static inline bool kbase_ctx_compat_mode(struct kbase_context *kctx) ++{ ++ return !IS_ENABLED(CONFIG_64BIT) || ++ (IS_ENABLED(CONFIG_64BIT) && kbase_ctx_flag(kctx, KCTX_COMPAT)); ++} + +/** -+ * kbase_csf_event_add_error - Add an error into event error list -+ * -+ * @kctx: Address of a base context associated with a GPU address space. -+ * @error: Address of the item to be added to the context's pending error list. -+ * @data: Error data to be returned to userspace. -+ * -+ * Does not wake up the event queue blocking a user thread in kbase_poll. This -+ * is to make it more efficient to add multiple errors. -+ * -+ * The added error must not already be on the context's list of errors waiting -+ * to be reported (e.g. because a previous error concerning the same object has -+ * not yet been reported). ++ * kbase_ctx_flag_clear - Clear @flag on @kctx ++ * @kctx: Pointer to kbase context ++ * @flag: Flag to clear + * -+ */ -+void kbase_csf_event_add_error(struct kbase_context *const kctx, -+ struct kbase_csf_notification *const error, -+ struct base_csf_notification const *const data); -+ -+/** -+ * kbase_csf_event_remove_error - Remove an error from event error list ++ * Clear the @flag on @kctx. This is done atomically, so other flags being ++ * cleared or set at the same time will be safe. + * -+ * @kctx: Address of a base context associated with a GPU address space. -+ * @error: Address of the item to be removed from the context's event error list. ++ * Some flags have locking requirements, check the documentation for the ++ * respective flags. + */ -+void kbase_csf_event_remove_error(struct kbase_context *kctx, -+ struct kbase_csf_notification *error); ++static inline void kbase_ctx_flag_clear(struct kbase_context *kctx, ++ enum kbase_context_flags flag) ++{ ++ atomic_andnot(flag, &kctx->flags); ++} + +/** -+ * kbase_csf_event_error_pending - Check the error pending status ++ * kbase_ctx_flag_set - Set @flag on @kctx ++ * @kctx: Pointer to kbase context ++ * @flag: Flag to set + * -+ * @kctx: The kbase context to check fatal error upon. ++ * Set the @flag on @kctx. This is done atomically, so other flags being ++ * cleared or set at the same time will be safe. + * -+ * Return: true if there is error in the list. ++ * Some flags have locking requirements, check the documentation for the ++ * respective flags. + */ -+bool kbase_csf_event_error_pending(struct kbase_context *kctx); -+#endif /* _KBASE_CSF_EVENT_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c ++static inline void kbase_ctx_flag_set(struct kbase_context *kctx, ++ enum kbase_context_flags flag) ++{ ++ atomic_or(flag, &kctx->flags); ++} ++#endif /* _KBASE_CONTEXT_H_ */ +diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context_internal.h b/drivers/gpu/arm/bifrost/context/mali_kbase_context_internal.h new file mode 100644 -index 000000000..42bff1e91 +index 000000000..1cde7394c --- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c -@@ -0,0 +1,3144 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context_internal.h +@@ -0,0 +1,54 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -146199,4499 +146787,4782 @@ index 000000000..42bff1e91 + * + */ + -+#include "mali_kbase.h" -+#include "mali_kbase_csf_firmware_cfg.h" -+#include "mali_kbase_csf_firmware_log.h" -+#include "mali_kbase_csf_firmware_core_dump.h" -+#include "mali_kbase_csf_trace_buffer.h" -+#include "mali_kbase_csf_timeout.h" -+#include "mali_kbase_mem.h" -+#include "mali_kbase_mem_pool_group.h" -+#include "mali_kbase_reset_gpu.h" -+#include "mali_kbase_ctx_sched.h" -+#include "mali_kbase_csf_scheduler.h" -+#include -+#include "device/mali_kbase_device.h" -+#include "backend/gpu/mali_kbase_pm_internal.h" -+#include "tl/mali_kbase_timeline_priv.h" -+#include "tl/mali_kbase_tracepoints.h" -+#include "mali_kbase_csf_tl_reader.h" -+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#if (KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE) -+#include -+#endif -+#include -+#include -+#include -+ -+#define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20) -+ -+static char fw_name[MALI_MAX_FIRMWARE_NAME_LEN] = "mali_csffw.bin"; -+module_param_string(fw_name, fw_name, sizeof(fw_name), 0644); -+MODULE_PARM_DESC(fw_name, "firmware image"); ++#include + -+/* The waiting time for firmware to boot */ -+static unsigned int csf_firmware_boot_timeout_ms; -+module_param(csf_firmware_boot_timeout_ms, uint, 0444); -+MODULE_PARM_DESC(csf_firmware_boot_timeout_ms, -+ "Maximum time to wait for firmware to boot."); ++typedef int kbase_context_init_method(struct kbase_context *kctx); ++typedef void kbase_context_term_method(struct kbase_context *kctx); + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+/* Makes Driver wait indefinitely for an acknowledgment for the different -+ * requests it sends to firmware. Otherwise the timeouts interfere with the -+ * use of debugger for source-level debugging of firmware as Driver initiates -+ * a GPU reset when a request times out, which always happen when a debugger -+ * is connected. ++/** ++ * struct kbase_context_init - Device init/term methods. ++ * @init: Function pointer to a initialise method. ++ * @term: Function pointer to a terminate method. ++ * @err_mes: Error message to be printed when init method fails. + */ -+bool fw_debug; /* Default value of 0/false */ -+module_param(fw_debug, bool, 0444); -+MODULE_PARM_DESC(fw_debug, -+ "Enables effective use of a debugger for debugging firmware code."); -+#endif -+ ++struct kbase_context_init { ++ kbase_context_init_method *init; ++ kbase_context_term_method *term; ++ char *err_mes; ++}; + -+#define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul) -+#define FIRMWARE_HEADER_VERSION_MAJOR (0ul) -+#define FIRMWARE_HEADER_VERSION_MINOR (3ul) -+#define FIRMWARE_HEADER_LENGTH (0x14ul) ++int kbase_context_common_init(struct kbase_context *kctx); ++void kbase_context_common_term(struct kbase_context *kctx); + -+#define CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS \ -+ (CSF_FIRMWARE_ENTRY_READ | \ -+ CSF_FIRMWARE_ENTRY_WRITE | \ -+ CSF_FIRMWARE_ENTRY_EXECUTE | \ -+ CSF_FIRMWARE_ENTRY_PROTECTED | \ -+ CSF_FIRMWARE_ENTRY_SHARED | \ -+ CSF_FIRMWARE_ENTRY_ZERO | \ -+ CSF_FIRMWARE_ENTRY_CACHE_MODE) ++int kbase_context_mem_pool_group_init(struct kbase_context *kctx); ++void kbase_context_mem_pool_group_term(struct kbase_context *kctx); + -+#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0) -+#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1) -+#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3) -+#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4) -+#define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6) -+#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST (7) -+#define CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP (9) ++int kbase_context_mmu_init(struct kbase_context *kctx); ++void kbase_context_mmu_term(struct kbase_context *kctx); + -+#define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3) -+#define CSF_FIRMWARE_CACHE_MODE_CACHED (1ul << 3) -+#define CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT (2ul << 3) -+#define CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT (3ul << 3) ++int kbase_context_mem_alloc_page(struct kbase_context *kctx); ++void kbase_context_mem_pool_free(struct kbase_context *kctx); + -+#define INTERFACE_ENTRY_NAME_OFFSET (0x14) ++void kbase_context_sticky_resource_term(struct kbase_context *kctx); + -+#define TL_METADATA_ENTRY_NAME_OFFSET (0x8) ++int kbase_context_add_to_dev_list(struct kbase_context *kctx); ++void kbase_context_remove_from_dev_list(struct kbase_context *kctx); +diff --git a/drivers/gpu/arm/bifrost/csf/Kbuild b/drivers/gpu/arm/bifrost/csf/Kbuild +new file mode 100755 +index 000000000..44217dba1 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/Kbuild +@@ -0,0 +1,58 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# + -+#define BUILD_INFO_METADATA_SIZE_OFFSET (0x4) -+#define BUILD_INFO_GIT_SHA_LEN (40U) -+#define BUILD_INFO_GIT_DIRTY_LEN (1U) -+#define BUILD_INFO_GIT_SHA_PATTERN "git_sha: " ++bifrost_kbase-y += \ ++ csf/mali_kbase_csf_firmware_cfg.o \ ++ csf/mali_kbase_csf_trace_buffer.o \ ++ csf/mali_kbase_csf.o \ ++ csf/mali_kbase_csf_scheduler.o \ ++ csf/mali_kbase_csf_kcpu.o \ ++ csf/mali_kbase_csf_tiler_heap.o \ ++ csf/mali_kbase_csf_timeout.o \ ++ csf/mali_kbase_csf_tl_reader.o \ ++ csf/mali_kbase_csf_heap_context_alloc.o \ ++ csf/mali_kbase_csf_reset_gpu.o \ ++ csf/mali_kbase_csf_csg_debugfs.o \ ++ csf/mali_kbase_csf_kcpu_debugfs.o \ ++ csf/mali_kbase_csf_sync_debugfs.o \ ++ csf/mali_kbase_csf_protected_memory.o \ ++ csf/mali_kbase_csf_tiler_heap_debugfs.o \ ++ csf/mali_kbase_csf_cpu_queue_debugfs.o \ ++ csf/mali_kbase_csf_event.o \ ++ csf/mali_kbase_csf_firmware_log.o \ ++ csf/mali_kbase_csf_firmware_core_dump.o \ ++ csf/mali_kbase_csf_tiler_heap_reclaim.o \ ++ csf/mali_kbase_csf_mcu_shared_reg.o + -+#define CSF_MAX_FW_STOP_LOOPS (100000) ++ifeq ($(CONFIG_MALI_BIFROST_NO_MALI),y) ++bifrost_kbase-y += csf/mali_kbase_csf_firmware_no_mali.o ++else ++bifrost_kbase-y += csf/mali_kbase_csf_firmware.o ++endif + -+#define CSF_GLB_REQ_CFG_MASK \ -+ (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ -+ GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK) ++bifrost_kbase-$(CONFIG_DEBUG_FS) += csf/mali_kbase_debug_csf_fault.o + -+static inline u32 input_page_read(const u32 *const input, const u32 offset) -+{ -+ WARN_ON(offset % sizeof(u32)); ++ifeq ($(KBUILD_EXTMOD),) ++# in-tree ++ -include $(src)/csf/ipa_control/Kbuild ++else ++# out-of-tree ++ include $(src)/csf/ipa_control/Kbuild ++endif +diff --git a/drivers/gpu/arm/bifrost/csf/ipa_control/Kbuild b/drivers/gpu/arm/bifrost/csf/ipa_control/Kbuild +new file mode 100755 +index 000000000..dc30281e4 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/ipa_control/Kbuild +@@ -0,0 +1,22 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# + -+ return input[offset / sizeof(u32)]; -+} ++bifrost_kbase-y += \ ++ csf/ipa_control/mali_kbase_csf_ipa_control.o +diff --git a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c +new file mode 100644 +index 000000000..c81d0a5a7 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c +@@ -0,0 +1,1063 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+static inline void input_page_write(u32 *const input, const u32 offset, -+ const u32 value) -+{ -+ WARN_ON(offset % sizeof(u32)); ++#include ++#include ++#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" ++#include "mali_kbase_csf_ipa_control.h" + -+ input[offset / sizeof(u32)] = value; -+} ++/* ++ * Status flags from the STATUS register of the IPA Control interface. ++ */ ++#define STATUS_COMMAND_ACTIVE ((u32)1 << 0) ++#define STATUS_PROTECTED_MODE ((u32)1 << 8) ++#define STATUS_RESET ((u32)1 << 9) ++#define STATUS_TIMER_ENABLED ((u32)1 << 31) + -+static inline void input_page_partial_write(u32 *const input, const u32 offset, -+ u32 value, u32 mask) -+{ -+ WARN_ON(offset % sizeof(u32)); ++/* ++ * Commands for the COMMAND register of the IPA Control interface. ++ */ ++#define COMMAND_APPLY ((u32)1) ++#define COMMAND_SAMPLE ((u32)3) ++#define COMMAND_PROTECTED_ACK ((u32)4) ++#define COMMAND_RESET_ACK ((u32)5) + -+ input[offset / sizeof(u32)] = -+ (input_page_read(input, offset) & ~mask) | (value & mask); -+} ++/* ++ * Number of timer events per second. ++ */ ++#define TIMER_EVENTS_PER_SECOND ((u32)1000 / IPA_CONTROL_TIMER_DEFAULT_VALUE_MS) + -+static inline u32 output_page_read(const u32 *const output, const u32 offset) -+{ -+ WARN_ON(offset % sizeof(u32)); ++/* ++ * Maximum number of loops polling the GPU before we assume the GPU has hung. ++ */ ++#define IPA_INACTIVE_MAX_LOOPS (8000000U) + -+ return output[offset / sizeof(u32)]; -+} ++/* ++ * Number of bits used to configure a performance counter in SELECT registers. ++ */ ++#define IPA_CONTROL_SELECT_BITS_PER_CNT ((u64)8) + -+static unsigned int entry_type(u32 header) -+{ -+ return header & 0xFF; -+} -+static unsigned int entry_size(u32 header) -+{ -+ return (header >> 8) & 0xFF; -+} -+static bool entry_update(u32 header) -+{ -+ return (header >> 30) & 0x1; -+} -+static bool entry_optional(u32 header) -+{ -+ return (header >> 31) & 0x1; -+} ++/* ++ * Maximum value of a performance counter. ++ */ ++#define MAX_PRFCNT_VALUE (((u64)1 << 48) - 1) + +/** -+ * struct firmware_timeline_metadata - Timeline metadata item within the MCU firmware ++ * struct kbase_ipa_control_listener_data - Data for the GPU clock frequency ++ * listener + * -+ * @node: List head linking all timeline metadata to -+ * kbase_device:csf.firmware_timeline_metadata. -+ * @name: NUL-terminated string naming the metadata. -+ * @data: Metadata content. -+ * @size: Metadata size. ++ * @listener: GPU clock frequency listener. ++ * @kbdev: Pointer to kbase device. + */ -+struct firmware_timeline_metadata { -+ struct list_head node; -+ char *name; -+ char *data; -+ size_t size; ++struct kbase_ipa_control_listener_data { ++ struct kbase_clk_rate_listener listener; ++ struct kbase_device *kbdev; +}; + -+/* The shared interface area, used for communicating with firmware, is managed -+ * like a virtual memory zone. Reserve the virtual space from that zone -+ * corresponding to shared interface entry parsed from the firmware image. -+ * The shared_reg_rbtree should have been initialized before calling this -+ * function. -+ */ -+static int setup_shared_iface_static_region(struct kbase_device *kbdev) ++static u32 timer_value(u32 gpu_rate) +{ -+ struct kbase_csf_firmware_interface *interface = -+ kbdev->csf.shared_interface; -+ struct kbase_va_region *reg; -+ int ret = -ENOMEM; -+ -+ if (!interface) -+ return -EINVAL; -+ -+ reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, -+ interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED); -+ if (reg) { -+ mutex_lock(&kbdev->csf.reg_lock); -+ ret = kbase_add_va_region_rbtree(kbdev, reg, -+ interface->virtual, interface->num_pages_aligned, 1); -+ mutex_unlock(&kbdev->csf.reg_lock); -+ if (ret) -+ kfree(reg); -+ else -+ reg->flags &= ~KBASE_REG_FREE; -+ } -+ -+ return ret; ++ return gpu_rate / TIMER_EVENTS_PER_SECOND; +} + -+static int wait_mcu_status_value(struct kbase_device *kbdev, u32 val) ++static int wait_status(struct kbase_device *kbdev, u32 flags) +{ -+ u32 max_loops = CSF_MAX_FW_STOP_LOOPS; ++ unsigned int max_loops = IPA_INACTIVE_MAX_LOOPS; ++ u32 status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); + -+ /* wait for the MCU_STATUS register to reach the given status value */ -+ while (--max_loops && -+ (kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)) != val)) { ++ /* ++ * Wait for the STATUS register to indicate that flags have been ++ * cleared, in case a transition is pending. ++ */ ++ while (--max_loops && (status & flags)) ++ status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); ++ if (max_loops == 0) { ++ dev_err(kbdev->dev, "IPA_CONTROL STATUS register stuck"); ++ return -EBUSY; + } + -+ return (max_loops == 0) ? -1 : 0; ++ return 0; +} + -+void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev) ++static int apply_select_config(struct kbase_device *kbdev, u64 *select) +{ -+ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING(kbdev, kbase_backend_get_cycle_cnt(kbdev)); ++ int ret; + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE); -+} ++ u32 select_cshw_lo = (u32)(select[KBASE_IPA_CORE_TYPE_CSHW] & U32_MAX); ++ u32 select_cshw_hi = ++ (u32)((select[KBASE_IPA_CORE_TYPE_CSHW] >> 32) & U32_MAX); ++ u32 select_memsys_lo = ++ (u32)(select[KBASE_IPA_CORE_TYPE_MEMSYS] & U32_MAX); ++ u32 select_memsys_hi = ++ (u32)((select[KBASE_IPA_CORE_TYPE_MEMSYS] >> 32) & U32_MAX); ++ u32 select_tiler_lo = ++ (u32)(select[KBASE_IPA_CORE_TYPE_TILER] & U32_MAX); ++ u32 select_tiler_hi = ++ (u32)((select[KBASE_IPA_CORE_TYPE_TILER] >> 32) & U32_MAX); ++ u32 select_shader_lo = ++ (u32)(select[KBASE_IPA_CORE_TYPE_SHADER] & U32_MAX); ++ u32 select_shader_hi = ++ (u32)((select[KBASE_IPA_CORE_TYPE_SHADER] >> 32) & U32_MAX); + -+static void wait_for_firmware_stop(struct kbase_device *kbdev) -+{ -+ if (wait_mcu_status_value(kbdev, MCU_CNTRL_DISABLE) < 0) { -+ /* This error shall go away once MIDJM-2371 is closed */ -+ dev_err(kbdev->dev, "Firmware failed to stop"); ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_LO), select_cshw_lo); ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_HI), select_cshw_hi); ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_MEMSYS_LO), ++ select_memsys_lo); ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_MEMSYS_HI), ++ select_memsys_hi); ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_TILER_LO), ++ select_tiler_lo); ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_TILER_HI), ++ select_tiler_hi); ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_SHADER_LO), ++ select_shader_lo); ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_SHADER_HI), ++ select_shader_hi); ++ ++ ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); ++ ++ if (!ret) { ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_APPLY); ++ ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); ++ } else { ++ dev_err(kbdev->dev, "Wait for the pending command failed"); + } + -+ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF(kbdev, kbase_backend_get_cycle_cnt(kbdev)); ++ return ret; +} + -+void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev) ++static u64 read_value_cnt(struct kbase_device *kbdev, u8 type, int select_idx) +{ -+ wait_for_firmware_stop(kbdev); -+} ++ u32 value_lo, value_hi; + -+static void stop_csf_firmware(struct kbase_device *kbdev) -+{ -+ /* Stop the MCU firmware */ -+ kbase_csf_firmware_disable_mcu(kbdev); ++ switch (type) { ++ case KBASE_IPA_CORE_TYPE_CSHW: ++ value_lo = kbase_reg_read( ++ kbdev, IPA_CONTROL_REG(VALUE_CSHW_REG_LO(select_idx))); ++ value_hi = kbase_reg_read( ++ kbdev, IPA_CONTROL_REG(VALUE_CSHW_REG_HI(select_idx))); ++ break; ++ case KBASE_IPA_CORE_TYPE_MEMSYS: ++ value_lo = kbase_reg_read( ++ kbdev, ++ IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(select_idx))); ++ value_hi = kbase_reg_read( ++ kbdev, ++ IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(select_idx))); ++ break; ++ case KBASE_IPA_CORE_TYPE_TILER: ++ value_lo = kbase_reg_read( ++ kbdev, IPA_CONTROL_REG(VALUE_TILER_REG_LO(select_idx))); ++ value_hi = kbase_reg_read( ++ kbdev, IPA_CONTROL_REG(VALUE_TILER_REG_HI(select_idx))); ++ break; ++ case KBASE_IPA_CORE_TYPE_SHADER: ++ value_lo = kbase_reg_read( ++ kbdev, ++ IPA_CONTROL_REG(VALUE_SHADER_REG_LO(select_idx))); ++ value_hi = kbase_reg_read( ++ kbdev, ++ IPA_CONTROL_REG(VALUE_SHADER_REG_HI(select_idx))); ++ break; ++ default: ++ WARN(1, "Unknown core type: %u\n", type); ++ value_lo = value_hi = 0; ++ break; ++ } + -+ wait_for_firmware_stop(kbdev); ++ return (((u64)value_hi << 32) | value_lo); +} + -+static void wait_for_firmware_boot(struct kbase_device *kbdev) ++static void build_select_config(struct kbase_ipa_control *ipa_ctrl, ++ u64 *select_config) +{ -+ long wait_timeout; -+ long remaining; -+ -+ if (!csf_firmware_boot_timeout_ms) -+ csf_firmware_boot_timeout_ms = -+ kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_BOOT_TIMEOUT); ++ size_t i; + -+ wait_timeout = kbase_csf_timeout_in_jiffies(csf_firmware_boot_timeout_ms); ++ for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) { ++ size_t j; + -+ /* Firmware will generate a global interface interrupt once booting -+ * is complete -+ */ -+ remaining = wait_event_timeout(kbdev->csf.event_wait, -+ kbdev->csf.interrupt_received == true, wait_timeout); ++ select_config[i] = 0ULL; + -+ if (!remaining) -+ dev_err(kbdev->dev, "Timed out waiting for fw boot completion"); ++ for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) { ++ struct kbase_ipa_control_prfcnt_config *prfcnt_config = ++ &ipa_ctrl->blocks[i].select[j]; + -+ kbdev->csf.interrupt_received = false; ++ select_config[i] |= ++ ((u64)prfcnt_config->idx ++ << (IPA_CONTROL_SELECT_BITS_PER_CNT * j)); ++ } ++ } +} + -+static void boot_csf_firmware(struct kbase_device *kbdev) ++static int update_select_registers(struct kbase_device *kbdev) +{ -+ kbase_csf_firmware_enable_mcu(kbdev); ++ u64 select_config[KBASE_IPA_CORE_TYPE_NUM]; + -+#if IS_ENABLED(CONFIG_MALI_CORESIGHT) -+ kbase_debug_coresight_csf_state_request(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED); ++ lockdep_assert_held(&kbdev->csf.ipa_control.lock); + -+ if (!kbase_debug_coresight_csf_state_wait(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) -+ dev_err(kbdev->dev, "Timeout waiting for CoreSight to be enabled"); -+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ ++ build_select_config(&kbdev->csf.ipa_control, select_config); + -+ wait_for_firmware_boot(kbdev); ++ return apply_select_config(kbdev, select_config); +} + -+/** -+ * wait_ready() - Wait for previously issued MMU command to complete. -+ * -+ * @kbdev: Kbase device to wait for a MMU command to complete. -+ * -+ * Reset GPU if the wait for previously issued command times out. -+ * -+ * Return: 0 on success, error code otherwise. -+ */ -+static int wait_ready(struct kbase_device *kbdev) ++static inline void calc_prfcnt_delta(struct kbase_device *kbdev, ++ struct kbase_ipa_control_prfcnt *prfcnt, ++ bool gpu_ready) +{ -+ const ktime_t wait_loop_start = ktime_get_raw(); -+ const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms; -+ s64 diff; -+ -+ do { -+ unsigned int i; -+ -+ for (i = 0; i < 1000; i++) { -+ /* Wait for the MMU status to indicate there is no active command */ -+ if (!(kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) & -+ AS_STATUS_AS_ACTIVE)) -+ return 0; -+ } -+ -+ diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start)); -+ } while (diff < mmu_as_inactive_wait_time_ms); -+ -+ dev_err(kbdev->dev, -+ "AS_ACTIVE bit stuck for MCU AS. Might be caused by unstable GPU clk/pwr or faulty system"); ++ u64 delta_value, raw_value; + -+ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) -+ kbase_reset_gpu_locked(kbdev); ++ if (gpu_ready) ++ raw_value = read_value_cnt(kbdev, (u8)prfcnt->type, ++ prfcnt->select_idx); ++ else ++ raw_value = prfcnt->latest_raw_value; + -+ return -ETIMEDOUT; -+} ++ if (raw_value < prfcnt->latest_raw_value) { ++ delta_value = (MAX_PRFCNT_VALUE - prfcnt->latest_raw_value) + ++ raw_value; ++ } else { ++ delta_value = raw_value - prfcnt->latest_raw_value; ++ } + -+static void unload_mmu_tables(struct kbase_device *kbdev) -+{ -+ unsigned long irq_flags; ++ delta_value *= prfcnt->scaling_factor; + -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); -+ if (kbdev->pm.backend.gpu_powered) -+ kbase_mmu_disable_as(kbdev, MCU_AS_NR); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+} ++ if (kbdev->csf.ipa_control.cur_gpu_rate == 0) { ++ static bool warned; + -+static int load_mmu_tables(struct kbase_device *kbdev) -+{ -+ unsigned long irq_flags; ++ if (!warned) { ++ dev_warn(kbdev->dev, "%s: GPU freq is unexpectedly 0", __func__); ++ warned = true; ++ } ++ } else if (prfcnt->gpu_norm) ++ delta_value = div_u64(delta_value, kbdev->csf.ipa_control.cur_gpu_rate); + -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); -+ kbase_mmu_update(kbdev, &kbdev->csf.mcu_mmu, MCU_AS_NR); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); ++ prfcnt->latest_raw_value = raw_value; + -+ /* Wait for a while for the update command to take effect */ -+ return wait_ready(kbdev); ++ /* Accumulate the difference */ ++ prfcnt->accumulated_diff += delta_value; +} + +/** -+ * convert_mem_flags() - Convert firmware memory flags to GPU region flags ++ * kbase_ipa_control_rate_change_notify - GPU frequency change callback + * -+ * Return: GPU memory region flags ++ * @listener: Clock frequency change listener. ++ * @clk_index: Index of the clock for which the change has occurred. ++ * @clk_rate_hz: Clock frequency(Hz). + * -+ * @kbdev: Instance of GPU platform device (used to determine system coherency) -+ * @flags: Flags of an "interface memory setup" section in a firmware image -+ * @cm: appropriate cache mode chosen for the "interface memory setup" -+ * section, which could be different from the cache mode requested by -+ * firmware. ++ * This callback notifies kbase_ipa_control about GPU frequency changes. ++ * Only top-level clock changes are meaningful. GPU frequency updates ++ * affect all performance counters which require GPU normalization ++ * in every session. + */ -+static unsigned long convert_mem_flags(const struct kbase_device * const kbdev, -+ const u32 flags, u32 *cm) ++static void ++kbase_ipa_control_rate_change_notify(struct kbase_clk_rate_listener *listener, ++ u32 clk_index, u32 clk_rate_hz) +{ -+ unsigned long mem_flags = 0; -+ u32 cache_mode = flags & CSF_FIRMWARE_ENTRY_CACHE_MODE; -+ bool is_shared = (flags & CSF_FIRMWARE_ENTRY_SHARED) ? true : false; -+ -+ /* The memory flags control the access permissions for the MCU, the -+ * shader cores/tiler are not expected to access this memory -+ */ -+ if (flags & CSF_FIRMWARE_ENTRY_READ) -+ mem_flags |= KBASE_REG_GPU_RD; -+ -+ if (flags & CSF_FIRMWARE_ENTRY_WRITE) -+ mem_flags |= KBASE_REG_GPU_WR; -+ -+ if ((flags & CSF_FIRMWARE_ENTRY_EXECUTE) == 0) -+ mem_flags |= KBASE_REG_GPU_NX; -+ -+ if (flags & CSF_FIRMWARE_ENTRY_PROTECTED) -+ mem_flags |= KBASE_REG_PROTECTED; -+ -+ /* Substitute uncached coherent memory for cached coherent memory if -+ * the system does not support ACE coherency. -+ */ -+ if ((cache_mode == CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT) && -+ (kbdev->system_coherency != COHERENCY_ACE)) -+ cache_mode = CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT; ++ if ((clk_index == KBASE_CLOCK_DOMAIN_TOP) && (clk_rate_hz != 0)) { ++ size_t i; ++ unsigned long flags; ++ struct kbase_ipa_control_listener_data *listener_data = ++ container_of(listener, ++ struct kbase_ipa_control_listener_data, ++ listener); ++ struct kbase_device *kbdev = listener_data->kbdev; ++ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; + -+ /* Substitute uncached incoherent memory for uncached coherent memory -+ * if the system does not support ACE-Lite coherency. -+ */ -+ if ((cache_mode == CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT) && -+ (kbdev->system_coherency == COHERENCY_NONE)) -+ cache_mode = CSF_FIRMWARE_CACHE_MODE_NONE; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ *cm = cache_mode; ++ if (!kbdev->pm.backend.gpu_ready) { ++ dev_err(kbdev->dev, ++ "%s: GPU frequency cannot change while GPU is off", ++ __func__); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return; ++ } + -+ switch (cache_mode) { -+ case CSF_FIRMWARE_CACHE_MODE_NONE: -+ mem_flags |= -+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); -+ break; -+ case CSF_FIRMWARE_CACHE_MODE_CACHED: -+ mem_flags |= -+ KBASE_REG_MEMATTR_INDEX( -+ AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY); -+ break; -+ case CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT: -+ case CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT: -+ WARN_ON(!is_shared); -+ mem_flags |= KBASE_REG_SHARE_BOTH | -+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); -+ break; -+ default: -+ dev_err(kbdev->dev, -+ "Firmware contains interface with unsupported cache mode\n"); -+ break; -+ } -+ return mem_flags; -+} ++ /* Interrupts are already disabled and interrupt state is also saved */ ++ spin_lock(&ipa_ctrl->lock); + -+static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data, -+ struct tagged_addr *phys, u32 num_pages, u32 flags, -+ u32 data_start, u32 data_end) -+{ -+ u32 data_pos = data_start; -+ u32 data_len = data_end - data_start; -+ u32 page_num; -+ u32 page_limit; ++ for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) { ++ struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i]; + -+ if (flags & CSF_FIRMWARE_ENTRY_ZERO) -+ page_limit = num_pages; -+ else -+ page_limit = (data_len + PAGE_SIZE - 1) / PAGE_SIZE; ++ if (session->active) { ++ size_t j; + -+ for (page_num = 0; page_num < page_limit; ++page_num) { -+ struct page *const page = as_page(phys[page_num]); -+ char *const p = kmap_atomic(page); -+ u32 const copy_len = min_t(u32, PAGE_SIZE, data_len); ++ for (j = 0; j < session->num_prfcnts; j++) { ++ struct kbase_ipa_control_prfcnt *prfcnt = ++ &session->prfcnts[j]; + -+ if (copy_len > 0) { -+ memcpy(p, data + data_pos, copy_len); -+ data_pos += copy_len; -+ data_len -= copy_len; ++ if (prfcnt->gpu_norm) ++ calc_prfcnt_delta(kbdev, prfcnt, true); ++ } ++ } + } + -+ if (flags & CSF_FIRMWARE_ENTRY_ZERO) { -+ u32 const zi_len = PAGE_SIZE - copy_len; ++ ipa_ctrl->cur_gpu_rate = clk_rate_hz; + -+ memset(p + copy_len, 0, zi_len); ++ /* Update the timer for automatic sampling if active sessions ++ * are present. Counters have already been manually sampled. ++ */ ++ if (ipa_ctrl->num_active_sessions > 0) { ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), ++ timer_value(ipa_ctrl->cur_gpu_rate)); + } + -+ kbase_sync_single_for_device(kbdev, kbase_dma_addr_from_tagged(phys[page_num]), -+ PAGE_SIZE, DMA_TO_DEVICE); -+ kunmap_atomic(p); ++ spin_unlock(&ipa_ctrl->lock); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +} + -+static int reload_fw_image(struct kbase_device *kbdev) ++void kbase_ipa_control_init(struct kbase_device *kbdev) +{ -+ const u32 magic = FIRMWARE_HEADER_MAGIC; -+ struct kbase_csf_firmware_interface *interface; -+ struct kbase_csf_mcu_fw *const mcu_fw = &kbdev->csf.fw; -+ int ret = 0; ++ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; ++ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; ++ struct kbase_ipa_control_listener_data *listener_data; ++ size_t i, j; + -+ if (WARN_ON(mcu_fw->data == NULL)) { -+ dev_err(kbdev->dev, "Firmware image copy not loaded\n"); -+ ret = -EINVAL; -+ goto out; ++ for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) { ++ for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) { ++ ipa_ctrl->blocks[i].select[j].idx = 0; ++ ipa_ctrl->blocks[i].select[j].refcount = 0; ++ } ++ ipa_ctrl->blocks[i].num_available_counters = ++ KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; + } + -+ /* Do a basic sanity check on MAGIC signature */ -+ if (memcmp(mcu_fw->data, &magic, sizeof(magic)) != 0) { -+ dev_err(kbdev->dev, "Incorrect magic value, firmware image could have been corrupted\n"); -+ ret = -EINVAL; -+ goto out; ++ spin_lock_init(&ipa_ctrl->lock); ++ ipa_ctrl->num_active_sessions = 0; ++ for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) ++ ipa_ctrl->sessions[i].active = false; ++ ++ listener_data = kmalloc(sizeof(struct kbase_ipa_control_listener_data), ++ GFP_KERNEL); ++ if (listener_data) { ++ listener_data->listener.notify = ++ kbase_ipa_control_rate_change_notify; ++ listener_data->kbdev = kbdev; ++ ipa_ctrl->rtm_listener_data = listener_data; + } + -+ list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { -+ /* Dont skip re-loading any section if full reload was requested */ -+ if (!kbdev->csf.firmware_full_reload_needed) { -+ /* Skip reload of text & read only data sections */ -+ if ((interface->flags & CSF_FIRMWARE_ENTRY_EXECUTE) || -+ !(interface->flags & CSF_FIRMWARE_ENTRY_WRITE)) -+ continue; -+ } ++ spin_lock(&clk_rtm->lock); ++ if (clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP]) ++ ipa_ctrl->cur_gpu_rate = ++ clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP]->clock_val; ++ if (listener_data) ++ kbase_clk_rate_trace_manager_subscribe_no_lock( ++ clk_rtm, &listener_data->listener); ++ spin_unlock(&clk_rtm->lock); ++} ++KBASE_EXPORT_TEST_API(kbase_ipa_control_init); + -+ load_fw_image_section(kbdev, mcu_fw->data, interface->phys, interface->num_pages, -+ interface->flags, interface->data_start, interface->data_end); -+ } ++void kbase_ipa_control_term(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; ++ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; ++ struct kbase_ipa_control_listener_data *listener_data = ++ ipa_ctrl->rtm_listener_data; + -+ kbdev->csf.firmware_full_reload_needed = false; ++ WARN_ON(ipa_ctrl->num_active_sessions); + -+ kbase_csf_firmware_reload_trace_buffers_data(kbdev); -+out: -+ return ret; ++ if (listener_data) ++ kbase_clk_rate_trace_manager_unsubscribe(clk_rtm, &listener_data->listener); ++ kfree(ipa_ctrl->rtm_listener_data); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (kbdev->pm.backend.gpu_powered) ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), 0); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} ++KBASE_EXPORT_TEST_API(kbase_ipa_control_term); + -+/** -+ * entry_find_large_page_to_reuse() - Find if the large page of previously parsed -+ * FW interface entry can be reused to store -+ * the contents of new FW interface entry. -+ * -+ * @kbdev: Kbase device structure -+ * @virtual_start: Start of the virtual address range required for an entry allocation -+ * @virtual_end: End of the virtual address range required for an entry allocation -+ * @flags: Firmware entry flags for comparison with the reusable pages found -+ * @phys: Pointer to the array of physical (tagged) addresses making up the new -+ * FW interface entry. It is an output parameter which would be made to -+ * point to an already existing array allocated for the previously parsed -+ * FW interface entry using large page(s). If no appropriate entry is -+ * found it is set to NULL. -+ * @pma: Pointer to a protected memory allocation. It is an output parameter -+ * which would be made to the protected memory allocation of a previously -+ * parsed FW interface entry using large page(s) from protected memory. -+ * If no appropriate entry is found it is set to NULL. -+ * @num_pages: Number of pages requested. -+ * @num_pages_aligned: This is an output parameter used to carry the number of 4KB pages -+ * within the 2MB pages aligned allocation. -+ * @is_small_page: This is an output flag used to select between the small and large page -+ * to be used for the FW entry allocation. -+ * -+ * Go through all the already initialized interfaces and find if a previously -+ * allocated large page can be used to store contents of new FW interface entry. ++/** session_read_raw_values - Read latest raw values for a sessions ++ * @kbdev: Pointer to kbase device. ++ * @session: Pointer to the session whose performance counters shall be read. + * -+ * Return: true if a large page can be reused, false otherwise. ++ * Read and update the latest raw values of all the performance counters ++ * belonging to a given session. + */ -+static inline bool entry_find_large_page_to_reuse(struct kbase_device *kbdev, -+ const u32 virtual_start, const u32 virtual_end, -+ const u32 flags, struct tagged_addr **phys, -+ struct protected_memory_allocation ***pma, -+ u32 num_pages, u32 *num_pages_aligned, -+ bool *is_small_page) ++static void session_read_raw_values(struct kbase_device *kbdev, ++ struct kbase_ipa_control_session *session) +{ -+ struct kbase_csf_firmware_interface *interface = NULL; -+ struct kbase_csf_firmware_interface *target_interface = NULL; -+ u32 virtual_diff_min = U32_MAX; -+ bool reuse_large_page = false; ++ size_t i; + -+ CSTD_UNUSED(interface); -+ CSTD_UNUSED(target_interface); -+ CSTD_UNUSED(virtual_diff_min); -+ -+ *num_pages_aligned = num_pages; -+ *is_small_page = true; -+ *phys = NULL; -+ *pma = NULL; ++ lockdep_assert_held(&kbdev->csf.ipa_control.lock); + ++ for (i = 0; i < session->num_prfcnts; i++) { ++ struct kbase_ipa_control_prfcnt *prfcnt = &session->prfcnts[i]; ++ u64 raw_value = read_value_cnt(kbdev, (u8)prfcnt->type, ++ prfcnt->select_idx); + -+ /* If the section starts at 2MB aligned boundary, -+ * then use 2MB page(s) for it. -+ */ -+ if (!(virtual_start & (SZ_2M - 1))) { -+ *num_pages_aligned = -+ round_up(*num_pages_aligned, NUM_4K_PAGES_IN_2MB_PAGE); -+ *is_small_page = false; -+ goto out; ++ prfcnt->latest_raw_value = raw_value; + } ++} + -+ /* If the section doesn't lie within the same 2MB aligned boundary, -+ * then use 4KB pages as it would be complicated to use a 2MB page -+ * for such section. -+ */ -+ if ((virtual_start & ~(SZ_2M - 1)) != (virtual_end & ~(SZ_2M - 1))) -+ goto out; -+ -+ /* Find the nearest 2MB aligned section which comes before the current -+ * section. -+ */ -+ list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { -+ const u32 virtual_diff = virtual_start - interface->virtual; ++/** session_gpu_start - Start one or all sessions ++ * @kbdev: Pointer to kbase device. ++ * @ipa_ctrl: Pointer to IPA_CONTROL descriptor. ++ * @session: Pointer to the session to initialize, or NULL to initialize ++ * all sessions. ++ * ++ * This function starts one or all sessions by capturing a manual sample, ++ * reading the latest raw value of performance counters and possibly enabling ++ * the timer for automatic sampling if necessary. ++ * ++ * If a single session is given, it is assumed to be active, regardless of ++ * the number of active sessions. The number of performance counters belonging ++ * to the session shall be set in advance. ++ * ++ * If no session is given, the function shall start all sessions. ++ * The function does nothing if there are no active sessions. ++ * ++ * Return: 0 on success, or error code on failure. ++ */ ++static int session_gpu_start(struct kbase_device *kbdev, ++ struct kbase_ipa_control *ipa_ctrl, ++ struct kbase_ipa_control_session *session) ++{ ++ bool first_start = ++ (session != NULL) && (ipa_ctrl->num_active_sessions == 0); ++ int ret = 0; + -+ if (interface->virtual > virtual_end) -+ continue; ++ lockdep_assert_held(&kbdev->csf.ipa_control.lock); + -+ if (interface->virtual & (SZ_2M - 1)) -+ continue; ++ /* ++ * Exit immediately if the caller intends to start all sessions ++ * but there are no active sessions. It's important that no operation ++ * is done on the IPA_CONTROL interface in that case. ++ */ ++ if (!session && ipa_ctrl->num_active_sessions == 0) ++ return ret; + -+ if ((virtual_diff < virtual_diff_min) && (interface->flags == flags)) { -+ target_interface = interface; -+ virtual_diff_min = virtual_diff; -+ } ++ /* ++ * Take a manual sample unconditionally if the caller intends ++ * to start all sessions. Otherwise, only take a manual sample ++ * if this is the first session to be initialized, for accumulator ++ * registers are empty and no timer has been configured for automatic ++ * sampling. ++ */ ++ if (!session || first_start) { ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), ++ COMMAND_SAMPLE); ++ ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); ++ if (ret) ++ dev_err(kbdev->dev, "%s: failed to sample new counters", ++ __func__); ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), ++ timer_value(ipa_ctrl->cur_gpu_rate)); + } + -+ if (target_interface) { -+ const u32 page_index = virtual_diff_min >> PAGE_SHIFT; -+ -+ if (page_index >= target_interface->num_pages_aligned) -+ goto out; -+ -+ if (target_interface->phys) -+ *phys = &target_interface->phys[page_index]; ++ /* ++ * Read current raw value to start the session. ++ * This is necessary to put the first query in condition ++ * to generate a correct value by calculating the difference ++ * from the beginning of the session. This consideration ++ * is true regardless of the number of sessions the caller ++ * intends to start. ++ */ ++ if (!ret) { ++ if (session) { ++ /* On starting a session, value read is required for ++ * IPA power model's calculation initialization. ++ */ ++ session_read_raw_values(kbdev, session); ++ } else { ++ size_t session_idx; + -+ if (target_interface->pma) -+ *pma = &target_interface->pma[page_index / NUM_4K_PAGES_IN_2MB_PAGE]; ++ for (session_idx = 0; ++ session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS; ++ session_idx++) { ++ struct kbase_ipa_control_session *session_to_check = &ipa_ctrl->sessions[session_idx]; + -+ *is_small_page = false; -+ reuse_large_page = true; ++ if (session_to_check->active) ++ session_read_raw_values(kbdev, session_to_check); ++ } ++ } + } + -+out: -+ return reuse_large_page; ++ return ret; +} + -+/** -+ * parse_memory_setup_entry() - Process an "interface memory setup" section -+ * -+ * @kbdev: Kbase device structure -+ * @fw: The firmware image containing the section -+ * @entry: Pointer to the start of the section -+ * @size: Size (in bytes) of the section -+ * -+ * Read an "interface memory setup" section from the firmware image and create -+ * the necessary memory region including the MMU page tables. If successful -+ * the interface will be added to the kbase_device:csf.firmware_interfaces list. -+ * -+ * Return: 0 if successful, negative error code on failure -+ */ -+static int parse_memory_setup_entry(struct kbase_device *kbdev, -+ const struct kbase_csf_mcu_fw *const fw, const u32 *entry, -+ unsigned int size) ++int kbase_ipa_control_register( ++ struct kbase_device *kbdev, ++ const struct kbase_ipa_control_perf_counter *perf_counters, ++ size_t num_counters, void **client) +{ + int ret = 0; -+ const u32 flags = entry[0]; -+ const u32 virtual_start = entry[1]; -+ const u32 virtual_end = entry[2]; -+ const u32 data_start = entry[3]; -+ const u32 data_end = entry[4]; -+ u32 num_pages; -+ u32 num_pages_aligned; -+ char *name; -+ void *name_entry; -+ unsigned int name_len; -+ struct tagged_addr *phys = NULL; -+ struct kbase_csf_firmware_interface *interface = NULL; -+ bool allocated_pages = false, protected_mode = false; -+ unsigned long mem_flags = 0; -+ u32 cache_mode = 0; -+ struct protected_memory_allocation **pma = NULL; -+ bool reuse_pages = false; -+ bool is_small_page = true; -+ bool ignore_page_migration = true; ++ size_t i, session_idx, req_counters[KBASE_IPA_CORE_TYPE_NUM]; ++ bool already_configured[KBASE_IPA_CONTROL_MAX_COUNTERS]; ++ bool new_config = false; ++ struct kbase_ipa_control *ipa_ctrl; ++ struct kbase_ipa_control_session *session = NULL; ++ unsigned long flags; + -+ if (data_end < data_start) { -+ dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n", -+ data_end, data_start); -+ return -EINVAL; -+ } -+ if (virtual_end < virtual_start) { -+ dev_err(kbdev->dev, "Firmware corrupt, virtual_end < virtual_start (0x%x<0x%x)\n", -+ virtual_end, virtual_start); -+ return -EINVAL; -+ } -+ if (data_end > fw->size) { -+ dev_err(kbdev->dev, "Firmware corrupt, file truncated? data_end=0x%x > fw->size=0x%zx\n", -+ data_end, fw->size); -+ return -EINVAL; -+ } ++ if (WARN_ON(unlikely(kbdev == NULL))) ++ return -ENODEV; + -+ if ((virtual_start & ~PAGE_MASK) != 0 || -+ (virtual_end & ~PAGE_MASK) != 0) { -+ dev_err(kbdev->dev, "Firmware corrupt: virtual addresses not page aligned: 0x%x-0x%x\n", -+ virtual_start, virtual_end); ++ if (WARN_ON(perf_counters == NULL) || WARN_ON(client == NULL) || ++ WARN_ON(num_counters > KBASE_IPA_CONTROL_MAX_COUNTERS)) { ++ dev_err(kbdev->dev, "%s: wrong input arguments", __func__); + return -EINVAL; + } + -+ if ((flags & CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS) != flags) { -+ dev_err(kbdev->dev, "Firmware contains interface with unsupported flags (0x%x)\n", -+ flags); -+ return -EINVAL; -+ } ++ kbase_pm_context_active(kbdev); + -+ if (flags & CSF_FIRMWARE_ENTRY_PROTECTED) -+ protected_mode = true; ++ ipa_ctrl = &kbdev->csf.ipa_control; ++ spin_lock_irqsave(&ipa_ctrl->lock, flags); + -+ if (protected_mode && kbdev->csf.pma_dev == NULL) { -+ dev_dbg(kbdev->dev, -+ "Protected memory allocator not found, Firmware protected mode entry will not be supported"); -+ return 0; ++ if (ipa_ctrl->num_active_sessions == KBASE_IPA_CONTROL_MAX_SESSIONS) { ++ dev_err(kbdev->dev, "%s: too many sessions", __func__); ++ ret = -EBUSY; ++ goto exit; + } + -+ num_pages = (virtual_end - virtual_start) -+ >> PAGE_SHIFT; -+ -+ reuse_pages = -+ entry_find_large_page_to_reuse(kbdev, virtual_start, virtual_end, flags, &phys, -+ &pma, num_pages, &num_pages_aligned, &is_small_page); -+ if (!reuse_pages) -+ phys = kmalloc_array(num_pages_aligned, sizeof(*phys), GFP_KERNEL); ++ for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) ++ req_counters[i] = 0; + -+ if (!phys) -+ return -ENOMEM; ++ /* ++ * Count how many counters would need to be configured in order to ++ * satisfy the request. Requested counters which happen to be already ++ * configured can be skipped. ++ */ ++ for (i = 0; i < num_counters; i++) { ++ size_t j; ++ enum kbase_ipa_core_type type = perf_counters[i].type; ++ u8 idx = perf_counters[i].idx; + -+ if (protected_mode) { -+ if (!reuse_pages) { -+ pma = kbase_csf_protected_memory_alloc( -+ kbdev, phys, num_pages_aligned, is_small_page); ++ if ((type >= KBASE_IPA_CORE_TYPE_NUM) || ++ (idx >= KBASE_IPA_CONTROL_CNT_MAX_IDX)) { ++ dev_err(kbdev->dev, ++ "%s: invalid requested type %u and/or index %u", ++ __func__, type, idx); ++ ret = -EINVAL; ++ goto exit; + } + -+ if (!pma) -+ ret = -ENOMEM; -+ } else { -+ if (!reuse_pages) { -+ ret = kbase_mem_pool_alloc_pages( -+ kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW, -+ is_small_page), -+ num_pages_aligned, phys, false, NULL); -+ ignore_page_migration = false; ++ for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) { ++ struct kbase_ipa_control_prfcnt_config *prfcnt_config = ++ &ipa_ctrl->blocks[type].select[j]; ++ ++ if (prfcnt_config->refcount > 0) { ++ if (prfcnt_config->idx == idx) { ++ already_configured[i] = true; ++ break; ++ } ++ } + } -+ } + -+ if (ret < 0) { -+ dev_err(kbdev->dev, -+ "Failed to allocate %u physical pages for the firmware interface entry at VA 0x%x\n", -+ num_pages_aligned, virtual_start); -+ goto out; ++ if (j == KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS) { ++ already_configured[i] = false; ++ req_counters[type]++; ++ new_config = true; ++ } + } + -+ allocated_pages = true; -+ load_fw_image_section(kbdev, fw->data, phys, num_pages, flags, -+ data_start, data_end); ++ for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) ++ if (req_counters[i] > ++ ipa_ctrl->blocks[i].num_available_counters) { ++ dev_err(kbdev->dev, ++ "%s: more counters (%zu) than available (%zu) have been requested for type %zu", ++ __func__, req_counters[i], ++ ipa_ctrl->blocks[i].num_available_counters, i); ++ ret = -EINVAL; ++ goto exit; ++ } + -+ /* Allocate enough memory for the struct kbase_csf_firmware_interface and -+ * the name of the interface. ++ /* ++ * The request has been validated. ++ * Firstly, find an available session and then set up the initial state ++ * of the session and update the configuration of performance counters ++ * in the internal state of kbase_ipa_control. + */ -+ name_entry = (void *)entry + INTERFACE_ENTRY_NAME_OFFSET; -+ name_len = strnlen(name_entry, size - INTERFACE_ENTRY_NAME_OFFSET); -+ if (size < (INTERFACE_ENTRY_NAME_OFFSET + name_len + 1 + sizeof(u32))) { -+ dev_err(kbdev->dev, "Memory setup entry too short to contain virtual_exe_start"); -+ ret = -EINVAL; -+ goto out; ++ for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS; ++ session_idx++) { ++ if (!ipa_ctrl->sessions[session_idx].active) { ++ session = &ipa_ctrl->sessions[session_idx]; ++ break; ++ } + } + -+ interface = kmalloc(sizeof(*interface) + name_len + 1, GFP_KERNEL); -+ if (!interface) { -+ ret = -ENOMEM; -+ goto out; ++ if (!session) { ++ dev_err(kbdev->dev, "%s: wrong or corrupt session state", ++ __func__); ++ ret = -EBUSY; ++ goto exit; + } -+ name = (void *)(interface + 1); -+ memcpy(name, name_entry, name_len); -+ name[name_len] = 0; -+ -+ interface->name = name; -+ interface->phys = phys; -+ interface->reuse_pages = reuse_pages; -+ interface->is_small_page = is_small_page; -+ interface->num_pages = num_pages; -+ interface->num_pages_aligned = num_pages_aligned; -+ interface->virtual = virtual_start; -+ interface->kernel_map = NULL; -+ interface->flags = flags; -+ interface->data_start = data_start; -+ interface->data_end = data_end; -+ interface->pma = pma; + -+ /* Discover the virtual execution address field after the end of the name -+ * field taking into account the NULL-termination character. -+ */ -+ interface->virtual_exe_start = *((u32 *)(name_entry + name_len + 1)); ++ for (i = 0; i < num_counters; i++) { ++ struct kbase_ipa_control_prfcnt_config *prfcnt_config; ++ size_t j; ++ u8 type = perf_counters[i].type; ++ u8 idx = perf_counters[i].idx; + -+ mem_flags = convert_mem_flags(kbdev, flags, &cache_mode); ++ for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) { ++ prfcnt_config = &ipa_ctrl->blocks[type].select[j]; + -+ if (flags & CSF_FIRMWARE_ENTRY_SHARED) { -+ struct page **page_list; -+ u32 i; -+ pgprot_t cpu_map_prot; -+ u32 mem_attr_index = KBASE_REG_MEMATTR_VALUE(mem_flags); ++ if (already_configured[i]) { ++ if ((prfcnt_config->refcount > 0) && ++ (prfcnt_config->idx == idx)) { ++ break; ++ } ++ } else { ++ if (prfcnt_config->refcount == 0) ++ break; ++ } ++ } + -+ /* Since SHARED memory type was used for mapping shared memory -+ * on GPU side, it can be mapped as cached on CPU side on both -+ * types of coherent platforms. -+ */ -+ if ((cache_mode == CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT) || -+ (cache_mode == CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT)) { -+ WARN_ON(mem_attr_index != -+ AS_MEMATTR_INDEX_SHARED); -+ cpu_map_prot = PAGE_KERNEL; -+ } else { -+ WARN_ON(mem_attr_index != -+ AS_MEMATTR_INDEX_NON_CACHEABLE); -+ cpu_map_prot = pgprot_writecombine(PAGE_KERNEL); ++ if (WARN_ON((prfcnt_config->refcount > 0 && ++ prfcnt_config->idx != idx) || ++ (j == KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS))) { ++ dev_err(kbdev->dev, ++ "%s: invalid internal state: counter already configured or no counter available to configure", ++ __func__); ++ ret = -EBUSY; ++ goto exit; + } + -+ page_list = kmalloc_array(num_pages, sizeof(*page_list), -+ GFP_KERNEL); -+ if (!page_list) { -+ ret = -ENOMEM; -+ goto out; ++ if (prfcnt_config->refcount == 0) { ++ prfcnt_config->idx = idx; ++ ipa_ctrl->blocks[type].num_available_counters--; + } + -+ for (i = 0; i < num_pages; i++) -+ page_list[i] = as_page(phys[i]); ++ session->prfcnts[i].accumulated_diff = 0; ++ session->prfcnts[i].type = type; ++ session->prfcnts[i].select_idx = j; ++ session->prfcnts[i].scaling_factor = ++ perf_counters[i].scaling_factor; ++ session->prfcnts[i].gpu_norm = perf_counters[i].gpu_norm; + -+ interface->kernel_map = vmap(page_list, num_pages, VM_MAP, -+ cpu_map_prot); ++ /* Reports to this client for GPU time spent in protected mode ++ * should begin from the point of registration. ++ */ ++ session->last_query_time = ktime_get_raw_ns(); + -+ kfree(page_list); ++ /* Initially, no time has been spent in protected mode */ ++ session->protm_time = 0; + -+ if (!interface->kernel_map) { -+ ret = -ENOMEM; -+ goto out; -+ } ++ prfcnt_config->refcount++; + } + -+ /* Start location of the shared interface area is fixed and is -+ * specified in firmware spec, and so there shall only be a -+ * single entry with that start address. ++ /* ++ * Apply new configuration, if necessary. ++ * As a temporary solution, make sure that the GPU is on ++ * before applying the new configuration. + */ -+ if (virtual_start == (KBASE_REG_ZONE_MCU_SHARED_BASE << PAGE_SHIFT)) -+ kbdev->csf.shared_interface = interface; -+ -+ list_add(&interface->node, &kbdev->csf.firmware_interfaces); -+ -+ if (!reuse_pages) { -+ ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, -+ virtual_start >> PAGE_SHIFT, phys, -+ num_pages_aligned, mem_flags, -+ KBASE_MEM_GROUP_CSF_FW, NULL, NULL, -+ ignore_page_migration); -+ -+ if (ret != 0) { -+ dev_err(kbdev->dev, "Failed to insert firmware pages\n"); -+ /* The interface has been added to the list, so cleanup will -+ * be handled by firmware unloading -+ */ -+ } ++ if (new_config) { ++ ret = update_select_registers(kbdev); ++ if (ret) ++ dev_err(kbdev->dev, ++ "%s: failed to apply new SELECT configuration", ++ __func__); + } + -+ dev_dbg(kbdev->dev, "Processed section '%s'", name); -+ -+ return ret; -+ -+out: -+ if (allocated_pages) { -+ if (!reuse_pages) { -+ if (protected_mode) { -+ kbase_csf_protected_memory_free( -+ kbdev, pma, num_pages_aligned, is_small_page); -+ } else { -+ kbase_mem_pool_free_pages( -+ kbase_mem_pool_group_select( -+ kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page), -+ num_pages_aligned, phys, false, false); -+ } -+ } ++ if (!ret) { ++ session->num_prfcnts = num_counters; ++ ret = session_gpu_start(kbdev, ipa_ctrl, session); + } + -+ if (!reuse_pages) -+ kfree(phys); ++ if (!ret) { ++ session->active = true; ++ ipa_ctrl->num_active_sessions++; ++ *client = session; ++ } + -+ kfree(interface); ++exit: ++ spin_unlock_irqrestore(&ipa_ctrl->lock, flags); ++ kbase_pm_context_idle(kbdev); + return ret; +} ++KBASE_EXPORT_TEST_API(kbase_ipa_control_register); + -+/** -+ * parse_timeline_metadata_entry() - Process a "timeline metadata" section -+ * -+ * Return: 0 if successful, negative error code on failure -+ * -+ * @kbdev: Kbase device structure -+ * @fw: Firmware image containing the section -+ * @entry: Pointer to the section -+ * @size: Size (in bytes) of the section -+ */ -+static int parse_timeline_metadata_entry(struct kbase_device *kbdev, -+ const struct kbase_csf_mcu_fw *const fw, const u32 *entry, -+ unsigned int size) ++int kbase_ipa_control_unregister(struct kbase_device *kbdev, const void *client) +{ -+ const u32 data_start = entry[0]; -+ const u32 data_size = entry[1]; -+ const u32 data_end = data_start + data_size; -+ const char *name = (char *)&entry[2]; -+ struct firmware_timeline_metadata *metadata; -+ const unsigned int name_len = -+ size - TL_METADATA_ENTRY_NAME_OFFSET; -+ size_t allocation_size = sizeof(*metadata) + name_len + 1 + data_size; ++ struct kbase_ipa_control *ipa_ctrl; ++ struct kbase_ipa_control_session *session; ++ int ret = 0; ++ size_t i; ++ unsigned long flags; ++ bool new_config = false, valid_session = false; + -+ if (data_end > fw->size) { -+ dev_err(kbdev->dev, -+ "Firmware corrupt, file truncated? data_end=0x%x > fw->size=0x%zx", -+ data_end, fw->size); ++ if (WARN_ON(unlikely(kbdev == NULL))) ++ return -ENODEV; ++ ++ if (WARN_ON(client == NULL)) { ++ dev_err(kbdev->dev, "%s: wrong input arguments", __func__); + return -EINVAL; + } + -+ /* Allocate enough space for firmware_timeline_metadata, -+ * its name and the content. -+ */ -+ metadata = kmalloc(allocation_size, GFP_KERNEL); -+ if (!metadata) -+ return -ENOMEM; -+ -+ metadata->name = (char *)(metadata + 1); -+ metadata->data = (char *)(metadata + 1) + name_len + 1; -+ metadata->size = data_size; -+ -+ memcpy(metadata->name, name, name_len); -+ metadata->name[name_len] = 0; -+ -+ /* Copy metadata's content. */ -+ memcpy(metadata->data, fw->data + data_start, data_size); -+ -+ list_add(&metadata->node, &kbdev->csf.firmware_timeline_metadata); -+ -+ dev_dbg(kbdev->dev, "Timeline metadata '%s'", metadata->name); -+ -+ return 0; -+} -+ -+/** -+ * parse_build_info_metadata_entry() - Process a "build info metadata" section -+ * @kbdev: Kbase device structure -+ * @fw: Firmware image containing the section -+ * @entry: Pointer to the section -+ * @size: Size (in bytes) of the section -+ * -+ * This prints the git SHA of the firmware on frimware load. -+ * -+ * Return: 0 if successful, negative error code on failure -+ */ -+static int parse_build_info_metadata_entry(struct kbase_device *kbdev, -+ const struct kbase_csf_mcu_fw *const fw, -+ const u32 *entry, unsigned int size) -+{ -+ const u32 meta_start_addr = entry[0]; -+ char *ptr = NULL; -+ size_t sha_pattern_len = strlen(BUILD_INFO_GIT_SHA_PATTERN); -+ -+ /* Only print git SHA to avoid releasing sensitive information */ -+ ptr = strstr(fw->data + meta_start_addr, BUILD_INFO_GIT_SHA_PATTERN); -+ /* Check that we won't overrun the found string */ -+ if (ptr && -+ strlen(ptr) >= BUILD_INFO_GIT_SHA_LEN + BUILD_INFO_GIT_DIRTY_LEN + sha_pattern_len) { -+ char git_sha[BUILD_INFO_GIT_SHA_LEN + BUILD_INFO_GIT_DIRTY_LEN + 1]; -+ int i = 0; ++ kbase_pm_context_active(kbdev); + -+ /* Move ptr to start of SHA */ -+ ptr += sha_pattern_len; -+ for (i = 0; i < BUILD_INFO_GIT_SHA_LEN; i++) { -+ /* Ensure that the SHA is made up of hex digits */ -+ if (!isxdigit(ptr[i])) -+ break; ++ ipa_ctrl = &kbdev->csf.ipa_control; ++ session = (struct kbase_ipa_control_session *)client; + -+ git_sha[i] = ptr[i]; -+ } ++ spin_lock_irqsave(&ipa_ctrl->lock, flags); + -+ /* Check if the next char indicates git SHA is dirty */ -+ if (ptr[i] == ' ' || ptr[i] == '+') { -+ git_sha[i] = ptr[i]; -+ i++; ++ for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) { ++ if (session == &ipa_ctrl->sessions[i]) { ++ valid_session = true; ++ break; + } -+ git_sha[i] = '\0'; -+ -+ dev_info(kbdev->dev, "Mali firmware git_sha: %s\n", git_sha); -+ } else -+ dev_info(kbdev->dev, "Mali firmware git_sha not found or invalid\n"); -+ -+ return 0; -+} -+ -+/** -+ * load_firmware_entry() - Process an entry from a firmware image -+ * -+ * @kbdev: Kbase device -+ * @fw: Firmware image containing the entry -+ * @offset: Byte offset within the image of the entry to load -+ * @header: Header word of the entry -+ * -+ * Read an entry from a firmware image and do any necessary work (e.g. loading -+ * the data into page accessible to the MCU). -+ * -+ * Unknown entries are ignored if the 'optional' flag is set within the entry, -+ * otherwise the function will fail with -EINVAL -+ * -+ * Return: 0 if successful, negative error code on failure -+ */ -+static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_csf_mcu_fw *const fw, -+ u32 offset, u32 header) -+{ -+ const unsigned int type = entry_type(header); -+ unsigned int size = entry_size(header); -+ const bool optional = entry_optional(header); -+ /* Update is used with configuration and tracebuffer entries to -+ * initiate a FIRMWARE_CONFIG_UPDATE, instead of triggering a -+ * silent reset. -+ */ -+ const bool updatable = entry_update(header); -+ const u32 *entry = (void *)(fw->data + offset); -+ -+ if ((offset % sizeof(*entry)) || (size % sizeof(*entry))) { -+ dev_err(kbdev->dev, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n", -+ offset, size); -+ return -EINVAL; + } + -+ if (size < sizeof(*entry)) { -+ dev_err(kbdev->dev, "Size field too small: %u\n", size); -+ return -EINVAL; ++ if (!valid_session) { ++ dev_err(kbdev->dev, "%s: invalid session handle", __func__); ++ ret = -EINVAL; ++ goto exit; + } + -+ /* Remove the header */ -+ entry++; -+ size -= sizeof(*entry); -+ -+ switch (type) { -+ case CSF_FIRMWARE_ENTRY_TYPE_INTERFACE: -+ /* Interface memory setup */ -+ if (size < INTERFACE_ENTRY_NAME_OFFSET + sizeof(*entry)) { -+ dev_err(kbdev->dev, "Interface memory setup entry too short (size=%u)\n", -+ size); -+ return -EINVAL; -+ } -+ return parse_memory_setup_entry(kbdev, fw, entry, size); -+ case CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION: -+ /* Configuration option */ -+ if (size < CONFIGURATION_ENTRY_NAME_OFFSET + sizeof(*entry)) { -+ dev_err(kbdev->dev, "Configuration option entry too short (size=%u)\n", -+ size); -+ return -EINVAL; -+ } -+ return kbase_csf_firmware_cfg_option_entry_parse( -+ kbdev, fw, entry, size, updatable); -+ case CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER: -+ /* Trace buffer */ -+ if (size < TRACE_BUFFER_ENTRY_NAME_OFFSET + sizeof(*entry)) { -+ dev_err(kbdev->dev, "Trace Buffer entry too short (size=%u)\n", -+ size); -+ return -EINVAL; -+ } -+ return kbase_csf_firmware_parse_trace_buffer_entry( -+ kbdev, entry, size, updatable); -+ case CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA: -+ /* Meta data section */ -+ if (size < TL_METADATA_ENTRY_NAME_OFFSET + sizeof(*entry)) { -+ dev_err(kbdev->dev, "Timeline metadata entry too short (size=%u)\n", -+ size); -+ return -EINVAL; -+ } -+ return parse_timeline_metadata_entry(kbdev, fw, entry, size); -+ case CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA: -+ if (size < BUILD_INFO_METADATA_SIZE_OFFSET + sizeof(*entry)) { -+ dev_err(kbdev->dev, "Build info metadata entry too short (size=%u)\n", -+ size); -+ return -EINVAL; -+ } -+ return parse_build_info_metadata_entry(kbdev, fw, entry, size); -+ case CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST: -+ /* Function call list section */ -+ if (size < FUNC_CALL_LIST_ENTRY_NAME_OFFSET + sizeof(*entry)) { -+ dev_err(kbdev->dev, "Function call list entry too short (size=%u)\n", -+ size); -+ return -EINVAL; -+ } -+ kbase_csf_firmware_log_parse_logging_call_list_entry(kbdev, entry); -+ return 0; -+ case CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP: -+ /* Core Dump section */ -+ if (size < CORE_DUMP_ENTRY_START_ADDR_OFFSET + sizeof(*entry)) { -+ dev_err(kbdev->dev, "FW Core dump entry too short (size=%u)\n", size); -+ return -EINVAL; -+ } -+ return kbase_csf_firmware_core_dump_entry_parse(kbdev, entry); -+ default: -+ if (!optional) { -+ dev_err(kbdev->dev, "Unsupported non-optional entry type %u in firmware\n", -+ type); -+ return -EINVAL; -+ } ++ if (ipa_ctrl->num_active_sessions == 0) { ++ dev_err(kbdev->dev, "%s: no active sessions found", __func__); ++ ret = -EINVAL; ++ goto exit; + } + -+ return 0; -+} -+ -+static void free_global_iface(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; ++ if (!session->active) { ++ dev_err(kbdev->dev, "%s: session is already inactive", ++ __func__); ++ ret = -EINVAL; ++ goto exit; ++ } + -+ if (iface->groups) { -+ unsigned int gid; ++ for (i = 0; i < session->num_prfcnts; i++) { ++ struct kbase_ipa_control_prfcnt_config *prfcnt_config; ++ u8 type = session->prfcnts[i].type; ++ u8 idx = session->prfcnts[i].select_idx; + -+ for (gid = 0; gid < iface->group_num; ++gid) -+ kfree(iface->groups[gid].streams); ++ prfcnt_config = &ipa_ctrl->blocks[type].select[idx]; + -+ kfree(iface->groups); -+ iface->groups = NULL; ++ if (!WARN_ON(prfcnt_config->refcount == 0)) { ++ prfcnt_config->refcount--; ++ if (prfcnt_config->refcount == 0) { ++ new_config = true; ++ ipa_ctrl->blocks[type].num_available_counters++; ++ } ++ } + } -+} -+ -+/** -+ * iface_gpu_va_to_cpu - Convert a GPU VA address within the shared interface -+ * region to a CPU address, using the existing mapping. -+ * @kbdev: Device pointer -+ * @gpu_va: GPU VA to convert -+ * -+ * Return: A CPU pointer to the location within the shared interface region, or -+ * NULL on failure. -+ */ -+static inline void *iface_gpu_va_to_cpu(struct kbase_device *kbdev, u32 gpu_va) -+{ -+ struct kbase_csf_firmware_interface *interface = -+ kbdev->csf.shared_interface; -+ u8 *kernel_base = interface->kernel_map; + -+ if (gpu_va < interface->virtual || -+ gpu_va >= interface->virtual + interface->num_pages * PAGE_SIZE) { -+ dev_err(kbdev->dev, -+ "Interface address 0x%x not within %u-page region at 0x%x", -+ gpu_va, interface->num_pages, -+ interface->virtual); -+ return NULL; ++ if (new_config) { ++ ret = update_select_registers(kbdev); ++ if (ret) ++ dev_err(kbdev->dev, ++ "%s: failed to apply SELECT configuration", ++ __func__); + } + -+ return (void *)(kernel_base + (gpu_va - interface->virtual)); -+} -+ -+static int parse_cmd_stream_info(struct kbase_device *kbdev, -+ struct kbase_csf_cmd_stream_info *sinfo, -+ u32 *stream_base) -+{ -+ sinfo->kbdev = kbdev; -+ sinfo->features = stream_base[STREAM_FEATURES/4]; -+ sinfo->input = iface_gpu_va_to_cpu(kbdev, -+ stream_base[STREAM_INPUT_VA/4]); -+ sinfo->output = iface_gpu_va_to_cpu(kbdev, -+ stream_base[STREAM_OUTPUT_VA/4]); -+ -+ if (sinfo->input == NULL || sinfo->output == NULL) -+ return -EINVAL; ++ session->num_prfcnts = 0; ++ session->active = false; ++ ipa_ctrl->num_active_sessions--; + -+ return 0; ++exit: ++ spin_unlock_irqrestore(&ipa_ctrl->lock, flags); ++ kbase_pm_context_idle(kbdev); ++ return ret; +} ++KBASE_EXPORT_TEST_API(kbase_ipa_control_unregister); + -+static int parse_cmd_stream_group_info(struct kbase_device *kbdev, -+ struct kbase_csf_cmd_stream_group_info *ginfo, -+ u32 *group_base, u32 group_stride) ++int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client, ++ u64 *values, size_t num_values, u64 *protected_time) +{ -+ unsigned int sid; ++ struct kbase_ipa_control *ipa_ctrl; ++ struct kbase_ipa_control_session *session; ++ size_t i; ++ unsigned long flags; ++ bool gpu_ready; + -+ ginfo->kbdev = kbdev; -+ ginfo->features = group_base[GROUP_FEATURES/4]; -+ ginfo->input = iface_gpu_va_to_cpu(kbdev, -+ group_base[GROUP_INPUT_VA/4]); -+ ginfo->output = iface_gpu_va_to_cpu(kbdev, -+ group_base[GROUP_OUTPUT_VA/4]); ++ if (WARN_ON(unlikely(kbdev == NULL))) ++ return -ENODEV; + -+ if (ginfo->input == NULL || ginfo->output == NULL) -+ return -ENOMEM; ++ if (WARN_ON(client == NULL) || WARN_ON(values == NULL)) { ++ dev_err(kbdev->dev, "%s: wrong input arguments", __func__); ++ return -EINVAL; ++ } + -+ ginfo->suspend_size = group_base[GROUP_SUSPEND_SIZE/4]; -+ ginfo->protm_suspend_size = group_base[GROUP_PROTM_SUSPEND_SIZE/4]; -+ ginfo->stream_num = group_base[GROUP_STREAM_NUM/4]; ++ ipa_ctrl = &kbdev->csf.ipa_control; ++ session = (struct kbase_ipa_control_session *)client; + -+ if (ginfo->stream_num < MIN_SUPPORTED_STREAMS_PER_GROUP || -+ ginfo->stream_num > MAX_SUPPORTED_STREAMS_PER_GROUP) { -+ dev_err(kbdev->dev, "CSG with %u CSs out of range %u-%u", -+ ginfo->stream_num, -+ MIN_SUPPORTED_STREAMS_PER_GROUP, -+ MAX_SUPPORTED_STREAMS_PER_GROUP); ++ if (!session->active) { ++ dev_err(kbdev->dev, ++ "%s: attempt to query inactive session", __func__); + return -EINVAL; + } + -+ ginfo->stream_stride = group_base[GROUP_STREAM_STRIDE/4]; -+ -+ if (ginfo->stream_num * ginfo->stream_stride > group_stride) { ++ if (WARN_ON(num_values < session->num_prfcnts)) { + dev_err(kbdev->dev, -+ "group stride of 0x%x exceeded by %u CSs with stride 0x%x", -+ group_stride, ginfo->stream_num, -+ ginfo->stream_stride); ++ "%s: not enough space (%zu) to return all counter values (%zu)", ++ __func__, num_values, session->num_prfcnts); + return -EINVAL; + } + -+ ginfo->streams = kmalloc_array(ginfo->stream_num, -+ sizeof(*ginfo->streams), GFP_KERNEL); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ gpu_ready = kbdev->pm.backend.gpu_ready; + -+ if (!ginfo->streams) -+ return -ENOMEM; ++ for (i = 0; i < session->num_prfcnts; i++) { ++ struct kbase_ipa_control_prfcnt *prfcnt = &session->prfcnts[i]; + -+ for (sid = 0; sid < ginfo->stream_num; sid++) { -+ int err; -+ u32 *stream_base = group_base + (STREAM_CONTROL_0 + -+ ginfo->stream_stride * sid) / 4; ++ calc_prfcnt_delta(kbdev, prfcnt, gpu_ready); ++ /* Return all the accumulated difference */ ++ values[i] = prfcnt->accumulated_diff; ++ prfcnt->accumulated_diff = 0; ++ } + -+ err = parse_cmd_stream_info(kbdev, &ginfo->streams[sid], -+ stream_base); -+ if (err < 0) { -+ /* caller will free the memory for CSs array */ -+ return err; ++ if (protected_time) { ++ u64 time_now = ktime_get_raw_ns(); ++ ++ /* This is the amount of protected-mode time spent prior to ++ * the current protm period. ++ */ ++ *protected_time = session->protm_time; ++ ++ if (kbdev->protected_mode) { ++ *protected_time += ++ time_now - MAX(session->last_query_time, ++ ipa_ctrl->protm_start); + } ++ session->last_query_time = time_now; ++ session->protm_time = 0; + } + -+ return 0; -+} ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+static u32 get_firmware_version(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_firmware_interface *interface = -+ kbdev->csf.shared_interface; -+ u32 *shared_info = interface->kernel_map; ++ for (i = session->num_prfcnts; i < num_values; i++) ++ values[i] = 0; + -+ return shared_info[GLB_VERSION/4]; ++ return 0; +} ++KBASE_EXPORT_TEST_API(kbase_ipa_control_query); + -+static int parse_capabilities(struct kbase_device *kbdev) ++void kbase_ipa_control_handle_gpu_power_off(struct kbase_device *kbdev) +{ -+ struct kbase_csf_firmware_interface *interface = -+ kbdev->csf.shared_interface; -+ u32 *shared_info = interface->kernel_map; -+ struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; -+ unsigned int gid; -+ -+ /* All offsets are in bytes, so divide by 4 for access via a u32 pointer -+ */ -+ -+ /* The version number of the global interface is expected to be a -+ * non-zero value. If it's not, the firmware may not have booted. -+ */ -+ iface->version = get_firmware_version(kbdev); -+ if (!iface->version) { -+ dev_err(kbdev->dev, "Version check failed. Firmware may have failed to boot."); -+ return -EINVAL; -+ } -+ ++ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; ++ size_t session_idx; ++ int ret; + -+ iface->kbdev = kbdev; -+ iface->features = shared_info[GLB_FEATURES/4]; -+ iface->input = iface_gpu_va_to_cpu(kbdev, shared_info[GLB_INPUT_VA/4]); -+ iface->output = iface_gpu_va_to_cpu(kbdev, -+ shared_info[GLB_OUTPUT_VA/4]); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (iface->input == NULL || iface->output == NULL) -+ return -ENOMEM; ++ /* GPU should still be ready for use when this function gets called */ ++ WARN_ON(!kbdev->pm.backend.gpu_ready); + -+ iface->group_num = shared_info[GLB_GROUP_NUM/4]; ++ /* Interrupts are already disabled and interrupt state is also saved */ ++ spin_lock(&ipa_ctrl->lock); + -+ if (iface->group_num < MIN_SUPPORTED_CSGS || -+ iface->group_num > MAX_SUPPORTED_CSGS) { ++ /* First disable the automatic sampling through TIMER */ ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), 0); ++ ret = wait_status(kbdev, STATUS_TIMER_ENABLED); ++ if (ret) { + dev_err(kbdev->dev, -+ "Interface containing %u CSGs outside of range %u-%u", -+ iface->group_num, MIN_SUPPORTED_CSGS, -+ MAX_SUPPORTED_CSGS); -+ return -EINVAL; ++ "Wait for disabling of IPA control timer failed: %d", ++ ret); + } + -+ iface->group_stride = shared_info[GLB_GROUP_STRIDE/4]; -+ iface->prfcnt_size = shared_info[GLB_PRFCNT_SIZE/4]; -+ -+ if (iface->version >= kbase_csf_interface_version(1, 1, 0)) -+ iface->instr_features = shared_info[GLB_INSTR_FEATURES / 4]; -+ else -+ iface->instr_features = 0; -+ -+ if ((GROUP_CONTROL_0 + -+ (unsigned long)iface->group_num * iface->group_stride) > -+ (interface->num_pages * PAGE_SIZE)) { ++ /* Now issue the manual SAMPLE command */ ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_SAMPLE); ++ ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); ++ if (ret) { + dev_err(kbdev->dev, -+ "interface size of %u pages exceeded by %u CSGs with stride 0x%x", -+ interface->num_pages, iface->group_num, -+ iface->group_stride); -+ return -EINVAL; ++ "Wait for the completion of manual sample failed: %d", ++ ret); + } + -+ WARN_ON(iface->groups); ++ for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS; ++ session_idx++) { + -+ iface->groups = kcalloc(iface->group_num, sizeof(*iface->groups), -+ GFP_KERNEL); -+ if (!iface->groups) -+ return -ENOMEM; ++ struct kbase_ipa_control_session *session = ++ &ipa_ctrl->sessions[session_idx]; + -+ for (gid = 0; gid < iface->group_num; gid++) { -+ int err; -+ u32 *group_base = shared_info + (GROUP_CONTROL_0 + -+ iface->group_stride * gid) / 4; ++ if (session->active) { ++ size_t i; + -+ err = parse_cmd_stream_group_info(kbdev, &iface->groups[gid], -+ group_base, iface->group_stride); -+ if (err < 0) { -+ free_global_iface(kbdev); -+ return err; ++ for (i = 0; i < session->num_prfcnts; i++) { ++ struct kbase_ipa_control_prfcnt *prfcnt = ++ &session->prfcnts[i]; ++ ++ calc_prfcnt_delta(kbdev, prfcnt, true); ++ } + } + } -+ -+ return 0; ++ spin_unlock(&ipa_ctrl->lock); +} + -+static inline void access_firmware_memory_common(struct kbase_device *kbdev, -+ struct kbase_csf_firmware_interface *interface, u32 offset_bytes, -+ u32 *value, const bool read) ++void kbase_ipa_control_handle_gpu_power_on(struct kbase_device *kbdev) +{ -+ u32 page_num = offset_bytes >> PAGE_SHIFT; -+ u32 offset_in_page = offset_bytes & ~PAGE_MASK; -+ struct page *target_page = as_page(interface->phys[page_num]); -+ uintptr_t cpu_addr = (uintptr_t)kmap_atomic(target_page); -+ u32 *addr = (u32 *)(cpu_addr + offset_in_page); ++ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; ++ int ret; + -+ if (read) { -+ kbase_sync_single_for_device(kbdev, -+ kbase_dma_addr_from_tagged(interface->phys[page_num]) + offset_in_page, -+ sizeof(u32), DMA_BIDIRECTIONAL); -+ *value = *addr; -+ } else { -+ *addr = *value; -+ kbase_sync_single_for_device(kbdev, -+ kbase_dma_addr_from_tagged(interface->phys[page_num]) + offset_in_page, -+ sizeof(u32), DMA_BIDIRECTIONAL); -+ } ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ kunmap_atomic((u32 *)cpu_addr); -+} ++ /* GPU should have become ready for use when this function gets called */ ++ WARN_ON(!kbdev->pm.backend.gpu_ready); + -+static inline void access_firmware_memory(struct kbase_device *kbdev, -+ u32 gpu_addr, u32 *value, const bool read) -+{ -+ struct kbase_csf_firmware_interface *interface, *access_interface = NULL; -+ u32 offset_bytes = 0; ++ /* Interrupts are already disabled and interrupt state is also saved */ ++ spin_lock(&ipa_ctrl->lock); + -+ list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { -+ if ((gpu_addr >= interface->virtual) && -+ (gpu_addr < interface->virtual + (interface->num_pages << PAGE_SHIFT))) { -+ offset_bytes = gpu_addr - interface->virtual; -+ access_interface = interface; -+ break; -+ } ++ ret = update_select_registers(kbdev); ++ if (ret) { ++ dev_err(kbdev->dev, ++ "Failed to reconfigure the select registers: %d", ret); + } + -+ if (access_interface) -+ access_firmware_memory_common(kbdev, access_interface, offset_bytes, value, read); -+ else -+ dev_warn(kbdev->dev, "Invalid GPU VA %x passed", gpu_addr); -+} -+ -+static inline void access_firmware_memory_exe(struct kbase_device *kbdev, -+ u32 gpu_addr, u32 *value, const bool read) -+{ -+ struct kbase_csf_firmware_interface *interface, *access_interface = NULL; -+ u32 offset_bytes = 0; -+ -+ list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { -+ if ((gpu_addr >= interface->virtual_exe_start) && -+ (gpu_addr < interface->virtual_exe_start + -+ (interface->num_pages << PAGE_SHIFT))) { -+ offset_bytes = gpu_addr - interface->virtual_exe_start; -+ access_interface = interface; -+ -+ /* If there's an overlap in execution address range between a moved and a -+ * non-moved areas, always prefer the moved one. The idea is that FW may -+ * move sections around during init time, but after the layout is settled, -+ * any moved sections are going to override non-moved areas at the same -+ * location. -+ */ -+ if (interface->virtual_exe_start != interface->virtual) -+ break; -+ } -+ } ++ /* Accumulator registers would not contain any sample after GPU power ++ * cycle if the timer has not been enabled first. Initialize all sessions. ++ */ ++ ret = session_gpu_start(kbdev, ipa_ctrl, NULL); + -+ if (access_interface) -+ access_firmware_memory_common(kbdev, access_interface, offset_bytes, value, read); -+ else -+ dev_warn(kbdev->dev, "Invalid GPU VA %x passed", gpu_addr); ++ spin_unlock(&ipa_ctrl->lock); +} + -+void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, -+ u32 gpu_addr, u32 *value) ++void kbase_ipa_control_handle_gpu_reset_pre(struct kbase_device *kbdev) +{ -+ access_firmware_memory(kbdev, gpu_addr, value, true); ++ /* A soft reset is treated as a power down */ ++ kbase_ipa_control_handle_gpu_power_off(kbdev); +} ++KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_reset_pre); + -+void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, -+ u32 gpu_addr, u32 value) ++void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev) +{ -+ access_firmware_memory(kbdev, gpu_addr, &value, false); -+} ++ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; ++ int ret; ++ u32 status; + -+void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev, -+ u32 gpu_addr, u32 *value) -+{ -+ access_firmware_memory_exe(kbdev, gpu_addr, value, true); -+} ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev, -+ u32 gpu_addr, u32 value) -+{ -+ access_firmware_memory_exe(kbdev, gpu_addr, &value, false); -+} ++ /* GPU should have become ready for use when this function gets called */ ++ WARN_ON(!kbdev->pm.backend.gpu_ready); + -+void kbase_csf_firmware_cs_input( -+ const struct kbase_csf_cmd_stream_info *const info, const u32 offset, -+ const u32 value) -+{ -+ const struct kbase_device * const kbdev = info->kbdev; ++ /* Interrupts are already disabled and interrupt state is also saved */ ++ spin_lock(&ipa_ctrl->lock); + -+ dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x\n", offset, value); -+ input_page_write(info->input, offset, value); -+} ++ /* Check the status reset bit is set before acknowledging it */ ++ status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); ++ if (status & STATUS_RESET) { ++ /* Acknowledge the reset command */ ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_RESET_ACK); ++ ret = wait_status(kbdev, STATUS_RESET); ++ if (ret) { ++ dev_err(kbdev->dev, ++ "Wait for the reset ack command failed: %d", ++ ret); ++ } ++ } + -+u32 kbase_csf_firmware_cs_input_read( -+ const struct kbase_csf_cmd_stream_info *const info, -+ const u32 offset) -+{ -+ const struct kbase_device * const kbdev = info->kbdev; -+ u32 const val = input_page_read(info->input, offset); ++ spin_unlock(&ipa_ctrl->lock); + -+ dev_dbg(kbdev->dev, "cs input r: reg %08x val %08x\n", offset, val); -+ return val; ++ kbase_ipa_control_handle_gpu_power_on(kbdev); +} ++KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_reset_post); + -+void kbase_csf_firmware_cs_input_mask( -+ const struct kbase_csf_cmd_stream_info *const info, const u32 offset, -+ const u32 value, const u32 mask) ++#ifdef KBASE_PM_RUNTIME ++void kbase_ipa_control_handle_gpu_sleep_enter(struct kbase_device *kbdev) +{ -+ const struct kbase_device * const kbdev = info->kbdev; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x mask %08x\n", -+ offset, value, mask); -+ input_page_partial_write(info->input, offset, value, mask); -+} ++ if (kbdev->pm.backend.mcu_state == KBASE_MCU_IN_SLEEP) { ++ /* GPU Sleep is treated as a power down */ ++ kbase_ipa_control_handle_gpu_power_off(kbdev); + -+u32 kbase_csf_firmware_cs_output( -+ const struct kbase_csf_cmd_stream_info *const info, const u32 offset) -+{ -+ const struct kbase_device * const kbdev = info->kbdev; -+ u32 const val = output_page_read(info->output, offset); ++ /* SELECT_CSHW register needs to be cleared to prevent any ++ * IPA control message to be sent to the top level GPU HWCNT. ++ */ ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_LO), 0); ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_HI), 0); + -+ dev_dbg(kbdev->dev, "cs output r: reg %08x val %08x\n", offset, val); -+ return val; ++ /* No need to issue the APPLY command here */ ++ } +} ++KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_sleep_enter); + -+void kbase_csf_firmware_csg_input( -+ const struct kbase_csf_cmd_stream_group_info *const info, -+ const u32 offset, const u32 value) ++void kbase_ipa_control_handle_gpu_sleep_exit(struct kbase_device *kbdev) +{ -+ const struct kbase_device * const kbdev = info->kbdev; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x\n", -+ offset, value); -+ input_page_write(info->input, offset, value); ++ if (kbdev->pm.backend.mcu_state == KBASE_MCU_IN_SLEEP) { ++ /* To keep things simple, currently exit from ++ * GPU Sleep is treated as a power on event where ++ * all 4 SELECT registers are reconfigured. ++ * On exit from sleep, reconfiguration is needed ++ * only for the SELECT_CSHW register. ++ */ ++ kbase_ipa_control_handle_gpu_power_on(kbdev); ++ } +} ++KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_sleep_exit); ++#endif + -+u32 kbase_csf_firmware_csg_input_read( -+ const struct kbase_csf_cmd_stream_group_info *const info, -+ const u32 offset) ++#if MALI_UNIT_TEST ++void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev, ++ u32 clk_index, u32 clk_rate_hz) +{ -+ const struct kbase_device * const kbdev = info->kbdev; -+ u32 const val = input_page_read(info->input, offset); ++ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; ++ struct kbase_ipa_control_listener_data *listener_data = ++ ipa_ctrl->rtm_listener_data; + -+ dev_dbg(kbdev->dev, "csg input r: reg %08x val %08x\n", offset, val); -+ return val; ++ kbase_ipa_control_rate_change_notify(&listener_data->listener, ++ clk_index, clk_rate_hz); +} ++KBASE_EXPORT_TEST_API(kbase_ipa_control_rate_change_notify_test); ++#endif + -+void kbase_csf_firmware_csg_input_mask( -+ const struct kbase_csf_cmd_stream_group_info *const info, -+ const u32 offset, const u32 value, const u32 mask) ++void kbase_ipa_control_protm_entered(struct kbase_device *kbdev) +{ -+ const struct kbase_device * const kbdev = info->kbdev; ++ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; + -+ dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x mask %08x\n", -+ offset, value, mask); -+ input_page_partial_write(info->input, offset, value, mask); ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ipa_ctrl->protm_start = ktime_get_raw_ns(); +} + -+u32 kbase_csf_firmware_csg_output( -+ const struct kbase_csf_cmd_stream_group_info *const info, -+ const u32 offset) ++void kbase_ipa_control_protm_exited(struct kbase_device *kbdev) +{ -+ const struct kbase_device * const kbdev = info->kbdev; -+ u32 const val = output_page_read(info->output, offset); ++ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; ++ size_t i; ++ u64 time_now = ktime_get_raw_ns(); ++ u32 status; + -+ dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val); -+ return val; -+} -+KBASE_EXPORT_TEST_API(kbase_csf_firmware_csg_output); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+void kbase_csf_firmware_global_input( -+ const struct kbase_csf_global_iface *const iface, const u32 offset, -+ const u32 value) -+{ -+ const struct kbase_device * const kbdev = iface->kbdev; ++ for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) { + -+ dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x\n", offset, value); -+ input_page_write(iface->input, offset, value); -+} -+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input); ++ struct kbase_ipa_control_session *session = ++ &ipa_ctrl->sessions[i]; + -+void kbase_csf_firmware_global_input_mask( -+ const struct kbase_csf_global_iface *const iface, const u32 offset, -+ const u32 value, const u32 mask) -+{ -+ const struct kbase_device * const kbdev = iface->kbdev; ++ if (session->active) { ++ u64 protm_time = time_now - MAX(session->last_query_time, ++ ipa_ctrl->protm_start); + -+ dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x mask %08x\n", -+ offset, value, mask); -+ input_page_partial_write(iface->input, offset, value, mask); -+} -+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input_mask); ++ session->protm_time += protm_time; ++ } ++ } + -+u32 kbase_csf_firmware_global_input_read( -+ const struct kbase_csf_global_iface *const iface, const u32 offset) -+{ -+ const struct kbase_device * const kbdev = iface->kbdev; -+ u32 const val = input_page_read(iface->input, offset); ++ /* Acknowledge the protected_mode bit in the IPA_CONTROL STATUS ++ * register ++ */ ++ status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); ++ if (status & STATUS_PROTECTED_MODE) { ++ int ret; + -+ dev_dbg(kbdev->dev, "glob input r: reg %08x val %08x\n", offset, val); -+ return val; ++ /* Acknowledge the protm command */ ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), ++ COMMAND_PROTECTED_ACK); ++ ret = wait_status(kbdev, STATUS_PROTECTED_MODE); ++ if (ret) { ++ dev_err(kbdev->dev, ++ "Wait for the protm ack command failed: %d", ++ ret); ++ } ++ } +} + -+u32 kbase_csf_firmware_global_output( -+ const struct kbase_csf_global_iface *const iface, const u32 offset) -+{ -+ const struct kbase_device * const kbdev = iface->kbdev; -+ u32 const val = output_page_read(iface->output, offset); +diff --git a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h +new file mode 100644 +index 000000000..69ff8973b +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h +@@ -0,0 +1,271 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val); -+ return val; -+} -+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_output); ++#ifndef _KBASE_CSF_IPA_CONTROL_H_ ++#define _KBASE_CSF_IPA_CONTROL_H_ ++ ++#include ++ ++/* ++ * Maximum index accepted to configure an IPA Control performance counter. ++ */ ++#define KBASE_IPA_CONTROL_CNT_MAX_IDX ((u8)64 * 3) + +/** -+ * csf_doorbell_offset() - Calculate the offset to the CSF host doorbell -+ * @doorbell_nr: Doorbell number ++ * struct kbase_ipa_control_perf_counter - Performance counter description + * -+ * Return: CSF host register offset for the specified doorbell number. ++ * @scaling_factor: Scaling factor by which the counter's value shall be ++ * multiplied. A scaling factor of 1 corresponds to units ++ * of 1 second if values are normalised by GPU frequency. ++ * @gpu_norm: Indicating whether counter values shall be normalized by ++ * GPU frequency. If true, returned values represent ++ * an interval of time expressed in seconds (when the scaling ++ * factor is set to 1). ++ * @type: Type of counter block for performance counter. ++ * @idx: Index of the performance counter inside the block. ++ * It may be dependent on GPU architecture. ++ * It cannot be greater than KBASE_IPA_CONTROL_CNT_MAX_IDX. ++ * ++ * This structure is used by clients of the IPA Control component to describe ++ * a performance counter that they intend to read. The counter is identified ++ * by block and index. In addition to that, the client also specifies how ++ * values shall be represented. Raw values are a number of GPU cycles; ++ * if normalized, they are divided by GPU frequency and become an interval ++ * of time expressed in seconds, since the GPU frequency is given in Hz. ++ * The client may specify a scaling factor to multiply counter values before ++ * they are divided by frequency, in case the unit of time of 1 second is ++ * too low in resolution. For instance: a scaling factor of 1000 implies ++ * that the returned value is a time expressed in milliseconds; a scaling ++ * factor of 1000 * 1000 implies that the returned value is a time expressed ++ * in microseconds. + */ -+static u32 csf_doorbell_offset(int doorbell_nr) -+{ -+ WARN_ON(doorbell_nr < 0); -+ WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); ++struct kbase_ipa_control_perf_counter { ++ u64 scaling_factor; ++ bool gpu_norm; ++ enum kbase_ipa_core_type type; ++ u8 idx; ++}; + -+ return CSF_HW_DOORBELL_PAGE_OFFSET + (doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE); -+} ++/** ++ * kbase_ipa_control_init - Initialize the IPA Control component ++ * ++ * @kbdev: Pointer to Kbase device. ++ */ ++void kbase_ipa_control_init(struct kbase_device *kbdev); + -+void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr) -+{ -+ kbase_reg_write(kbdev, csf_doorbell_offset(doorbell_nr), (u32)1); -+} -+EXPORT_SYMBOL(kbase_csf_ring_doorbell); ++/** ++ * kbase_ipa_control_term - Terminate the IPA Control component ++ * ++ * @kbdev: Pointer to Kbase device. ++ */ ++void kbase_ipa_control_term(struct kbase_device *kbdev); + +/** -+ * handle_internal_firmware_fatal - Handler for CS internal firmware fault. ++ * kbase_ipa_control_register - Register a client to the IPA Control component + * -+ * @kbdev: Pointer to kbase device ++ * @kbdev: Pointer to Kbase device. ++ * @perf_counters: Array of performance counters the client intends to read. ++ * For each counter the client specifies block, index, ++ * scaling factor and whether it must be normalized by GPU ++ * frequency. ++ * @num_counters: Number of performance counters. It cannot exceed the total ++ * number of counters that exist on the IPA Control interface. ++ * @client: Handle to an opaque structure set by IPA Control if ++ * the registration is successful. This handle identifies ++ * a client's session and shall be provided in its future ++ * queries. + * -+ * Report group fatal error to user space for all GPU command queue groups -+ * in the device, terminate them and reset GPU. ++ * A client needs to subscribe to the IPA Control component by declaring which ++ * performance counters it intends to read, and specifying a scaling factor ++ * and whether normalization is requested for each performance counter. ++ * The function shall configure the IPA Control interface accordingly and start ++ * a session for the client that made the request. A unique handle is returned ++ * if registration is successful in order to identify the client's session ++ * and be used for future queries. ++ * ++ * Return: 0 on success, negative -errno on error + */ -+static void handle_internal_firmware_fatal(struct kbase_device *const kbdev) -+{ -+ int as; -+ -+ for (as = 0; as < kbdev->nr_hw_address_spaces; as++) { -+ unsigned long flags; -+ struct kbase_context *kctx; -+ struct kbase_fault fault; -+ -+ if (as == MCU_AS_NR) -+ continue; ++int kbase_ipa_control_register( ++ struct kbase_device *kbdev, ++ const struct kbase_ipa_control_perf_counter *perf_counters, ++ size_t num_counters, void **client); + -+ /* Only handle the fault for an active address space. Lock is -+ * taken here to atomically get reference to context in an -+ * active address space and retain its refcount. -+ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as); ++/** ++ * kbase_ipa_control_unregister - Unregister a client from IPA Control ++ * ++ * @kbdev: Pointer to kbase device. ++ * @client: Handle to an opaque structure that identifies the client session ++ * to terminate, as returned by kbase_ipa_control_register. ++ * ++ * Return: 0 on success, negative -errno on error ++ */ ++int kbase_ipa_control_unregister(struct kbase_device *kbdev, ++ const void *client); + -+ if (kctx) { -+ kbase_ctx_sched_retain_ctx_refcount(kctx); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } else { -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ continue; -+ } ++/** ++ * kbase_ipa_control_query - Query performance counters ++ * ++ * @kbdev: Pointer to kbase device. ++ * @client: Handle to an opaque structure that identifies the client ++ * session, as returned by kbase_ipa_control_register. ++ * @values: Array of values queried from performance counters, whose ++ * length depends on the number of counters requested at ++ * the time of registration. Values are scaled and normalized ++ * and represent the difference since the last query. ++ * @num_values: Number of entries in the array of values that has been ++ * passed by the caller. It must be at least equal to the ++ * number of performance counters the client registered itself ++ * to read. ++ * @protected_time: Time spent in protected mode since last query, ++ * expressed in nanoseconds. This pointer may be NULL if the ++ * client doesn't want to know about this. ++ * ++ * A client that has already opened a session by registering itself to read ++ * some performance counters may use this function to query the values of ++ * those counters. The values returned are normalized by GPU frequency if ++ * requested and then multiplied by the scaling factor provided at the time ++ * of registration. Values always represent a difference since the last query. ++ * ++ * Performance counters are not updated while the GPU operates in protected ++ * mode. For this reason, returned values may be unreliable if the GPU has ++ * been in protected mode since the last query. The function returns success ++ * in that case, but it also gives a measure of how much time has been spent ++ * in protected mode. ++ * ++ * Return: 0 on success, negative -errno on error ++ */ ++int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client, ++ u64 *values, size_t num_values, ++ u64 *protected_time); + -+ fault = (struct kbase_fault) { -+ .status = GPU_EXCEPTION_TYPE_SW_FAULT_1, -+ }; ++/** ++ * kbase_ipa_control_handle_gpu_power_on - Handle the GPU power on event ++ * ++ * @kbdev: Pointer to kbase device. ++ * ++ * This function is called after GPU has been powered and is ready for use. ++ * After the GPU power on, IPA Control component needs to ensure that the ++ * counters start incrementing again. ++ */ ++void kbase_ipa_control_handle_gpu_power_on(struct kbase_device *kbdev); + -+ kbase_csf_ctx_handle_fault(kctx, &fault); -+ kbase_ctx_sched_release_ctx_lock(kctx); -+ } ++/** ++ * kbase_ipa_control_handle_gpu_power_off - Handle the GPU power off event ++ * ++ * @kbdev: Pointer to kbase device. ++ * ++ * This function is called just before the GPU is powered off when it is still ++ * ready for use. ++ * IPA Control component needs to be aware of the GPU power off so that it can ++ * handle the query from Clients appropriately and return meaningful values ++ * to them. ++ */ ++void kbase_ipa_control_handle_gpu_power_off(struct kbase_device *kbdev); + -+ if (kbase_prepare_to_reset_gpu(kbdev, -+ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) -+ kbase_reset_gpu(kbdev); -+} ++/** ++ * kbase_ipa_control_handle_gpu_reset_pre - Handle the pre GPU reset event ++ * ++ * @kbdev: Pointer to kbase device. ++ * ++ * This function is called when the GPU is about to be reset. ++ */ ++void kbase_ipa_control_handle_gpu_reset_pre(struct kbase_device *kbdev); + +/** -+ * firmware_error_worker - Worker function for handling firmware internal error ++ * kbase_ipa_control_handle_gpu_reset_post - Handle the post GPU reset event + * -+ * @data: Pointer to a work_struct embedded in kbase device. ++ * @kbdev: Pointer to kbase device. + * -+ * Handle the CS internal firmware error ++ * This function is called after the GPU has been reset. + */ -+static void firmware_error_worker(struct work_struct *const data) -+{ -+ struct kbase_device *const kbdev = -+ container_of(data, struct kbase_device, csf.fw_error_work); ++void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev); + -+ handle_internal_firmware_fatal(kbdev); -+} ++#ifdef KBASE_PM_RUNTIME ++/** ++ * kbase_ipa_control_handle_gpu_sleep_enter - Handle the pre GPU Sleep event ++ * ++ * @kbdev: Pointer to kbase device. ++ * ++ * This function is called after MCU has been put to sleep state & L2 cache has ++ * been powered down. The top level part of GPU is still powered up when this ++ * function is called. ++ */ ++void kbase_ipa_control_handle_gpu_sleep_enter(struct kbase_device *kbdev); + -+static bool global_request_complete(struct kbase_device *const kbdev, -+ u32 const req_mask) -+{ -+ struct kbase_csf_global_iface *global_iface = -+ &kbdev->csf.global_iface; -+ bool complete = false; -+ unsigned long flags; ++/** ++ * kbase_ipa_control_handle_gpu_sleep_exit - Handle the post GPU Sleep event ++ * ++ * @kbdev: Pointer to kbase device. ++ * ++ * This function is called when L2 needs to be powered up and MCU can exit the ++ * sleep state. The top level part of GPU is powered up when this function is ++ * called. ++ * ++ * This function must be called only if kbase_ipa_control_handle_gpu_sleep_enter() ++ * was called previously. ++ */ ++void kbase_ipa_control_handle_gpu_sleep_exit(struct kbase_device *kbdev); ++#endif + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++#if MALI_UNIT_TEST ++/** ++ * kbase_ipa_control_rate_change_notify_test - Notify GPU rate change ++ * (only for testing) ++ * ++ * @kbdev: Pointer to kbase device. ++ * @clk_index: Index of the clock for which the change has occurred. ++ * @clk_rate_hz: Clock frequency(Hz). ++ * ++ * Notify the IPA Control component about a GPU rate change. ++ */ ++void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev, ++ u32 clk_index, u32 clk_rate_hz); ++#endif /* MALI_UNIT_TEST */ + -+ if ((kbase_csf_firmware_global_output(global_iface, GLB_ACK) & -+ req_mask) == -+ (kbase_csf_firmware_global_input_read(global_iface, GLB_REQ) & -+ req_mask)) -+ complete = true; ++/** ++ * kbase_ipa_control_protm_entered - Tell IPA_CONTROL that protected mode ++ * has been entered. ++ * ++ * @kbdev: Pointer to kbase device. ++ * ++ * This function provides a means through which IPA_CONTROL can be informed ++ * that the GPU has entered protected mode. Since the GPU cannot access ++ * performance counters while in this mode, this information is useful as ++ * it implies (a) the values of these registers cannot change, so theres no ++ * point trying to read them, and (b) IPA_CONTROL has a means through which ++ * to record the duration of time the GPU is in protected mode, which can ++ * then be forwarded on to clients, who may wish, for example, to assume ++ * that the GPU was busy 100% of the time while in this mode. ++ */ ++void kbase_ipa_control_protm_entered(struct kbase_device *kbdev); + -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++/** ++ * kbase_ipa_control_protm_exited - Tell IPA_CONTROL that protected mode ++ * has been exited. ++ * ++ * @kbdev: Pointer to kbase device ++ * ++ * This function provides a means through which IPA_CONTROL can be informed ++ * that the GPU has exited from protected mode. ++ */ ++void kbase_ipa_control_protm_exited(struct kbase_device *kbdev); + -+ return complete; -+} ++#endif /* _KBASE_CSF_IPA_CONTROL_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c +new file mode 100644 +index 000000000..7a939fc33 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c +@@ -0,0 +1,3289 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+static int wait_for_global_request_with_timeout(struct kbase_device *const kbdev, -+ u32 const req_mask, unsigned int timeout_ms) -+{ -+ const long wait_timeout = kbase_csf_timeout_in_jiffies(timeout_ms); -+ long remaining; -+ int err = 0; ++#include ++#include ++#include ++#include "mali_kbase_csf.h" ++#include "backend/gpu/mali_kbase_pm_internal.h" ++#include ++#include ++#include ++#include ++#include "mali_kbase_csf_tiler_heap.h" ++#include ++#include "mali_kbase_csf_timeout.h" ++#include ++#include ++#include "mali_kbase_csf_event.h" ++#include ++#include "mali_kbase_csf_mcu_shared_reg.h" + -+ remaining = wait_event_timeout(kbdev->csf.event_wait, -+ global_request_complete(kbdev, req_mask), -+ wait_timeout); ++#define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK) ++#define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK) + -+ if (!remaining) { -+ dev_warn(kbdev->dev, -+ "[%llu] Timeout (%d ms) waiting for global request %x to complete", -+ kbase_backend_get_cycle_cnt(kbdev), timeout_ms, req_mask); -+ err = -ETIMEDOUT; ++#define CS_RING_BUFFER_MAX_SIZE ((uint32_t)(1 << 31)) /* 2GiB */ ++#define CS_RING_BUFFER_MIN_SIZE ((uint32_t)4096) + -+ } ++#define PROTM_ALLOC_MAX_RETRIES ((u8)5) + -+ return err; -+} ++const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = { ++ KBASE_QUEUE_GROUP_PRIORITY_HIGH, ++ KBASE_QUEUE_GROUP_PRIORITY_MEDIUM, ++ KBASE_QUEUE_GROUP_PRIORITY_LOW, ++ KBASE_QUEUE_GROUP_PRIORITY_REALTIME ++}; ++const u8 kbasep_csf_relative_to_queue_group_priority[KBASE_QUEUE_GROUP_PRIORITY_COUNT] = { ++ BASE_QUEUE_GROUP_PRIORITY_REALTIME, ++ BASE_QUEUE_GROUP_PRIORITY_HIGH, ++ BASE_QUEUE_GROUP_PRIORITY_MEDIUM, ++ BASE_QUEUE_GROUP_PRIORITY_LOW ++}; + -+static int wait_for_global_request(struct kbase_device *const kbdev, u32 const req_mask) -+{ -+ return wait_for_global_request_with_timeout(kbdev, req_mask, kbdev->csf.fw_timeout_ms); -+} ++/* ++ * struct irq_idle_and_protm_track - Object that tracks the idle and protected mode ++ * request information in an interrupt case across ++ * groups. ++ * ++ * @protm_grp: Possibly schedulable group that requested protected mode in the interrupt. ++ * If NULL, no such case observed in the tracked interrupt case. ++ * @idle_seq: The highest priority group that notified idle. If no such instance in the ++ * interrupt case, marked with the largest field value: U32_MAX. ++ * @idle_slot: The slot number if @p idle_seq is valid in the given tracking case. ++ */ ++struct irq_idle_and_protm_track { ++ struct kbase_queue_group *protm_grp; ++ u32 idle_seq; ++ s8 idle_slot; ++}; + -+static void set_global_request( -+ const struct kbase_csf_global_iface *const global_iface, -+ u32 const req_mask) ++/** ++ * kbasep_ctx_user_reg_page_mapping_term() - Terminate resources for USER Register Page. ++ * ++ * @kctx: Pointer to the kbase context ++ */ ++static void kbasep_ctx_user_reg_page_mapping_term(struct kbase_context *kctx) +{ -+ u32 glb_req; -+ -+ kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); ++ struct kbase_device *kbdev = kctx->kbdev; + -+ glb_req = kbase_csf_firmware_global_output(global_iface, GLB_ACK); -+ glb_req ^= req_mask; -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, -+ req_mask); ++ if (unlikely(kctx->csf.user_reg.vma)) ++ dev_err(kbdev->dev, "VMA for USER Register page exist on termination of ctx %d_%d", ++ kctx->tgid, kctx->id); ++ if (WARN_ON_ONCE(!list_empty(&kctx->csf.user_reg.link))) ++ list_del_init(&kctx->csf.user_reg.link); +} + -+static void enable_endpoints_global( -+ const struct kbase_csf_global_iface *const global_iface, -+ u64 const shader_core_mask) ++/** ++ * kbasep_ctx_user_reg_page_mapping_init() - Initialize resources for USER Register Page. ++ * ++ * @kctx: Pointer to the kbase context ++ * ++ * @return: 0 on success. ++ */ ++static int kbasep_ctx_user_reg_page_mapping_init(struct kbase_context *kctx) +{ -+ kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_LO, -+ shader_core_mask & U32_MAX); -+ kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_HI, -+ shader_core_mask >> 32); ++ INIT_LIST_HEAD(&kctx->csf.user_reg.link); ++ kctx->csf.user_reg.vma = NULL; ++ kctx->csf.user_reg.file_offset = 0; + -+ set_global_request(global_iface, GLB_REQ_CFG_ALLOC_EN_MASK); ++ return 0; +} + -+static void enable_shader_poweroff_timer(struct kbase_device *const kbdev, -+ const struct kbase_csf_global_iface *const global_iface) ++static void put_user_pages_mmap_handle(struct kbase_context *kctx, ++ struct kbase_queue *queue) +{ -+ u32 pwroff_reg; ++ unsigned long cookie_nr; + -+ if (kbdev->csf.firmware_hctl_core_pwr) -+ pwroff_reg = -+ GLB_PWROFF_TIMER_TIMER_SOURCE_SET(DISABLE_GLB_PWROFF_TIMER, -+ GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); -+ else -+ pwroff_reg = kbdev->csf.mcu_core_pwroff_dur_count; ++ lockdep_assert_held(&kctx->csf.lock); + -+ kbase_csf_firmware_global_input(global_iface, GLB_PWROFF_TIMER, -+ pwroff_reg); -+ set_global_request(global_iface, GLB_REQ_CFG_PWROFF_TIMER_MASK); ++ if (queue->handle == BASEP_MEM_INVALID_HANDLE) ++ return; + -+ /* Save the programed reg value in its shadow field */ -+ kbdev->csf.mcu_core_pwroff_reg_shadow = pwroff_reg; ++ cookie_nr = ++ PFN_DOWN(queue->handle - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE); + -+ dev_dbg(kbdev->dev, "GLB_PWROFF_TIMER set to 0x%.8x\n", pwroff_reg); ++ if (!WARN_ON(kctx->csf.user_pages_info[cookie_nr] != queue)) { ++ /* free up cookie */ ++ kctx->csf.user_pages_info[cookie_nr] = NULL; ++ bitmap_set(kctx->csf.cookies, cookie_nr, 1); ++ } ++ ++ queue->handle = BASEP_MEM_INVALID_HANDLE; +} + -+static void set_timeout_global( -+ const struct kbase_csf_global_iface *const global_iface, -+ u64 const timeout) ++/* Reserve a cookie, to be returned as a handle to userspace for creating ++ * the CPU mapping of the pair of input/output pages and Hw doorbell page. ++ * Will return 0 in case of success otherwise negative on failure. ++ */ ++static int get_user_pages_mmap_handle(struct kbase_context *kctx, ++ struct kbase_queue *queue) +{ -+ kbase_csf_firmware_global_input(global_iface, GLB_PROGRESS_TIMER, -+ timeout / GLB_PROGRESS_TIMER_TIMEOUT_SCALE); ++ unsigned long cookie, cookie_nr; + -+ set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK); -+} ++ lockdep_assert_held(&kctx->csf.lock); + -+static void enable_gpu_idle_timer(struct kbase_device *const kbdev) -+{ -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ if (bitmap_empty(kctx->csf.cookies, ++ KBASE_CSF_NUM_USER_IO_PAGES_HANDLE)) { ++ dev_err(kctx->kbdev->dev, ++ "No csf cookies available for allocation!"); ++ return -ENOMEM; ++ } + -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ /* allocate a cookie */ ++ cookie_nr = find_first_bit(kctx->csf.cookies, ++ KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); ++ if (kctx->csf.user_pages_info[cookie_nr]) { ++ dev_err(kctx->kbdev->dev, ++ "Inconsistent state of csf cookies!"); ++ return -EINVAL; ++ } ++ kctx->csf.user_pages_info[cookie_nr] = queue; ++ bitmap_clear(kctx->csf.cookies, cookie_nr, 1); + -+ kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER, -+ kbdev->csf.gpu_idle_dur_count); -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE, -+ GLB_REQ_IDLE_ENABLE_MASK); -+ dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x", -+ kbdev->csf.gpu_idle_dur_count); ++ /* relocate to correct base */ ++ cookie = cookie_nr + PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE); ++ cookie <<= PAGE_SHIFT; ++ ++ queue->handle = (u64)cookie; ++ ++ return 0; +} + -+static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask) ++static void init_user_io_pages(struct kbase_queue *queue) +{ -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; -+ bool complete = false; -+ unsigned long flags; ++ u32 *input_addr = (u32 *)(queue->user_io_addr); ++ u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ input_addr[CS_INSERT_LO/4] = 0; ++ input_addr[CS_INSERT_HI/4] = 0; + -+ if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) == -+ (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask)) -+ complete = true; ++ input_addr[CS_EXTRACT_INIT_LO/4] = 0; ++ input_addr[CS_EXTRACT_INIT_HI/4] = 0; + -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ output_addr[CS_EXTRACT_LO/4] = 0; ++ output_addr[CS_EXTRACT_HI/4] = 0; + -+ return complete; ++ output_addr[CS_ACTIVE/4] = 0; +} + -+static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface, -+ u32 const req_mask) ++static void kernel_unmap_user_io_pages(struct kbase_context *kctx, ++ struct kbase_queue *queue) +{ -+ u32 glb_debug_req; ++ kbase_gpu_vm_lock(kctx); + -+ kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); ++ vunmap(queue->user_io_addr); + -+ glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); -+ glb_debug_req ^= req_mask; ++ WARN_ON(atomic_read(&kctx->permanent_mapped_pages) < KBASEP_NUM_CS_USER_IO_PAGES); ++ atomic_sub(KBASEP_NUM_CS_USER_IO_PAGES, &kctx->permanent_mapped_pages); + -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask); ++ kbase_gpu_vm_unlock(kctx); +} + -+static void request_fw_core_dump( -+ const struct kbase_csf_global_iface *const global_iface) ++static int kernel_map_user_io_pages(struct kbase_context *kctx, ++ struct kbase_queue *queue) +{ -+ uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP); ++ struct page *page_list[2]; ++ pgprot_t cpu_map_prot; ++ unsigned long flags; ++ char *user_io_addr; ++ int ret = 0; ++ size_t i; + -+ set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode); ++ kbase_gpu_vm_lock(kctx); + -+ set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); -+} ++ if (ARRAY_SIZE(page_list) > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES - ++ atomic_read(&kctx->permanent_mapped_pages))) { ++ ret = -ENOMEM; ++ goto unlock; ++ } + -+int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev) -+{ -+ const struct kbase_csf_global_iface *const global_iface = -+ &kbdev->csf.global_iface; -+ unsigned long flags; -+ int ret; ++ /* The pages are mapped to Userspace also, so use the same mapping ++ * attributes as used inside the CPU page fault handler. ++ */ ++ if (kctx->kbdev->system_coherency == COHERENCY_NONE) ++ cpu_map_prot = pgprot_writecombine(PAGE_KERNEL); ++ else ++ cpu_map_prot = PAGE_KERNEL; + -+ /* Serialize CORE_DUMP requests. */ -+ mutex_lock(&kbdev->csf.reg_lock); ++ for (i = 0; i < ARRAY_SIZE(page_list); i++) ++ page_list[i] = as_page(queue->phys[i]); + -+ /* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */ -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ request_fw_core_dump(global_iface); -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot); + -+ /* Wait for firmware to acknowledge completion of the CORE_DUMP request. */ -+ ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); -+ if (!ret) -+ WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK)); ++ if (!user_io_addr) { ++ dev_err(kctx->kbdev->dev, ++ "%s(): user_io_addr is NULL, queue: %p", ++ __func__, ++ queue); ++ ret = -ENOMEM; ++ } else { ++ atomic_add(ARRAY_SIZE(page_list), &kctx->permanent_mapped_pages); ++ } + -+ mutex_unlock(&kbdev->csf.reg_lock); ++ kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags); ++ queue->user_io_addr = user_io_addr; ++ kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags); + ++unlock: ++ kbase_gpu_vm_unlock(kctx); + return ret; +} + ++static void term_queue_group(struct kbase_queue_group *group); ++static void get_queue(struct kbase_queue *queue); ++static void release_queue(struct kbase_queue *queue); ++ +/** -+ * kbasep_enable_rtu - Enable Ray Tracing Unit on powering up shader core ++ * kbase_csf_free_command_stream_user_pages() - Free the resources allocated ++ * for a queue at the time of bind. + * -+ * @kbdev: The kbase device structure of the device ++ * @kctx: Address of the kbase context within which the queue was created. ++ * @queue: Pointer to the queue to be unlinked. + * -+ * This function needs to be called to enable the Ray Tracing Unit -+ * by writing SHADER_PWRFEATURES only when host controls shader cores power. ++ * This function will free the pair of physical pages allocated for a GPU ++ * command queue, and also release the hardware doorbell page, that were mapped ++ * into the process address space to enable direct submission of commands to ++ * the hardware. Also releases the reference taken on the queue when the mapping ++ * was created. ++ * ++ * This function will be called only when the mapping is being removed and ++ * so the resources for queue will not get freed up until the mapping is ++ * removed even though userspace could have terminated the queue. ++ * Kernel will ensure that the termination of Kbase context would only be ++ * triggered after the mapping is removed. ++ * ++ * If an explicit or implicit unbind was missed by the userspace then the ++ * mapping will persist. On process exit kernel itself will remove the mapping. + */ -+static void kbasep_enable_rtu(struct kbase_device *kbdev) ++void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue) +{ -+ const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; ++ kernel_unmap_user_io_pages(kctx, queue); + -+ if (gpu_id < GPU_ID2_PRODUCT_MAKE(12, 8, 3, 0)) -+ return; ++ kbase_mem_pool_free_pages( ++ &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], ++ KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, true, false); ++ kbase_process_page_usage_dec(kctx, KBASEP_NUM_CS_USER_IO_PAGES); + -+ if (kbdev->csf.firmware_hctl_core_pwr) -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_PWRFEATURES), 1); ++ /* The user_io_gpu_va should have been unmapped inside the scheduler */ ++ WARN_ONCE(queue->user_io_gpu_va, "Userio pages appears still have mapping"); ++ ++ /* If the queue has already been terminated by userspace ++ * then the ref count for queue object will drop to 0 here. ++ */ ++ release_queue(queue); +} ++KBASE_EXPORT_TEST_API(kbase_csf_free_command_stream_user_pages); + -+static void global_init(struct kbase_device *const kbdev, u64 core_mask) ++int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue) +{ -+ u32 const ack_irq_mask = -+ GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | GLB_ACK_IRQ_MASK_PING_MASK | -+ GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | -+ GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | -+ GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK | -+ GLB_REQ_DEBUG_CSF_REQ_MASK | GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK; -+ -+ const struct kbase_csf_global_iface *const global_iface = -+ &kbdev->csf.global_iface; -+ unsigned long flags; ++ struct kbase_device *kbdev = kctx->kbdev; ++ int ret; + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ lockdep_assert_held(&kctx->csf.lock); + -+ kbasep_enable_rtu(kbdev); ++ ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], ++ KBASEP_NUM_CS_USER_IO_PAGES, ++ queue->phys, false, kctx->task); ++ if (ret != KBASEP_NUM_CS_USER_IO_PAGES) { ++ /* Marking both the phys to zero for indicating there is no phys allocated */ ++ queue->phys[0].tagged_addr = 0; ++ queue->phys[1].tagged_addr = 0; ++ return -ENOMEM; ++ } + -+ /* Update shader core allocation enable mask */ -+ enable_endpoints_global(global_iface, core_mask); -+ enable_shader_poweroff_timer(kbdev, global_iface); ++ ret = kernel_map_user_io_pages(kctx, queue); ++ if (ret) ++ goto kernel_map_failed; + -+ set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev)); ++ kbase_process_page_usage_inc(kctx, KBASEP_NUM_CS_USER_IO_PAGES); ++ init_user_io_pages(queue); + -+ /* The GPU idle timer is always enabled for simplicity. Checks will be -+ * done before scheduling the GPU idle worker to see if it is -+ * appropriate for the current power policy. ++ /* user_io_gpu_va is only mapped when scheduler decides to put the queue ++ * on slot at runtime. Initialize it to 0, signalling no mapping. + */ -+ enable_gpu_idle_timer(kbdev); ++ queue->user_io_gpu_va = 0; + -+ /* Unmask the interrupts */ -+ kbase_csf_firmware_global_input(global_iface, -+ GLB_ACK_IRQ_MASK, ack_irq_mask); ++ mutex_lock(&kbdev->csf.reg_lock); ++ if (kbdev->csf.db_file_offsets > (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1)) ++ kbdev->csf.db_file_offsets = 0; + -+#if IS_ENABLED(CONFIG_MALI_CORESIGHT) -+ /* Enable FW MCU read/write debug interfaces */ -+ kbase_csf_firmware_global_input_mask( -+ global_iface, GLB_DEBUG_ACK_IRQ_MASK, -+ GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK, -+ GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK); -+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ ++ queue->db_file_offset = kbdev->csf.db_file_offsets; ++ kbdev->csf.db_file_offsets += BASEP_QUEUE_NR_MMAP_USER_PAGES; ++ WARN(kbase_refcount_read(&queue->refcount) != 1, ++ "Incorrect refcounting for queue object\n"); ++ /* This is the second reference taken on the queue object and ++ * would be dropped only when the IO mapping is removed either ++ * explicitly by userspace or implicitly by kernel on process exit. ++ */ ++ get_queue(queue); ++ queue->bind_state = KBASE_CSF_QUEUE_BOUND; ++ mutex_unlock(&kbdev->csf.reg_lock); + -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ return 0; + -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++kernel_map_failed: ++ kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], ++ KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, false, false); ++ /* Marking both the phys to zero for indicating there is no phys allocated */ ++ queue->phys[0].tagged_addr = 0; ++ queue->phys[1].tagged_addr = 0; ++ ++ return ret; +} ++KBASE_EXPORT_TEST_API(kbase_csf_alloc_command_stream_user_pages); + -+/** -+ * global_init_on_boot - Sends a global request to control various features. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface -+ * -+ * Currently only the request to enable endpoints and timeout for GPU progress -+ * timer is sent. -+ * -+ * Return: 0 on success, or negative on failure. -+ */ -+static int global_init_on_boot(struct kbase_device *const kbdev) ++static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx, ++ u8 group_handle) +{ -+ unsigned long flags; -+ u64 core_mask; ++ uint index = group_handle; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ core_mask = kbase_pm_ca_get_core_mask(kbdev); -+ kbdev->csf.firmware_hctl_core_pwr = -+ kbase_pm_no_mcu_core_pwroff(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ lockdep_assert_held(&kctx->csf.lock); + -+ global_init(kbdev, core_mask); ++ if (index < MAX_QUEUE_GROUP_NUM && kctx->csf.queue_groups[index]) { ++ if (WARN_ON(kctx->csf.queue_groups[index]->handle != index)) ++ return NULL; ++ return kctx->csf.queue_groups[index]; ++ } + -+ return wait_for_global_request(kbdev, CSF_GLB_REQ_CFG_MASK); ++ return NULL; +} + -+void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, -+ u64 core_mask) ++struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ kbdev->csf.glb_init_request_pending = true; -+ kbdev->csf.firmware_hctl_core_pwr = -+ kbase_pm_no_mcu_core_pwroff(kbdev); -+ global_init(kbdev, core_mask); ++ return find_queue_group(kctx, group_handle); +} ++KBASE_EXPORT_TEST_API(kbase_csf_find_queue_group); + -+bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev) ++int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, ++ u8 group_handle) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ WARN_ON(!kbdev->csf.glb_init_request_pending); ++ struct kbase_queue_group *group; + -+ if (global_request_complete(kbdev, CSF_GLB_REQ_CFG_MASK)) -+ kbdev->csf.glb_init_request_pending = false; ++ mutex_lock(&kctx->csf.lock); ++ group = find_queue_group(kctx, group_handle); ++ mutex_unlock(&kctx->csf.lock); + -+ return !kbdev->csf.glb_init_request_pending; ++ return group ? 0 : -EINVAL; +} + -+void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, -+ bool update_core_pwroff_timer, bool update_core_mask, u64 core_mask) ++static struct kbase_queue *find_queue(struct kbase_context *kctx, u64 base_addr) +{ -+ unsigned long flags; ++ struct kbase_queue *queue; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ lockdep_assert_held(&kctx->csf.lock); + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ if (update_core_mask) -+ enable_endpoints_global(&kbdev->csf.global_iface, core_mask); -+ if (update_core_pwroff_timer) -+ enable_shader_poweroff_timer(kbdev, &kbdev->csf.global_iface); ++ list_for_each_entry(queue, &kctx->csf.queue_list, link) { ++ if (base_addr == queue->base_addr) ++ return queue; ++ } + -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ return NULL; +} + -+bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev) ++static void get_queue(struct kbase_queue *queue) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ return global_request_complete(kbdev, GLB_REQ_CFG_ALLOC_EN_MASK | -+ GLB_REQ_CFG_PWROFF_TIMER_MASK); ++ WARN_ON(!kbase_refcount_inc_not_zero(&queue->refcount)); +} + -+/** -+ * kbase_csf_firmware_reload_worker() - reload the fw image and re-enable the MCU -+ * @work: CSF Work item for reloading the firmware. -+ * -+ * This helper function will reload the firmware image and re-enable the MCU. -+ * It is supposed to be called after MCU(GPU) has been reset. -+ * Unlike the initial boot the firmware binary image is not parsed completely. -+ * Only the data sections, which were loaded in memory during the initial boot, -+ * are re-initialized either by zeroing them or copying their data from the -+ * firmware binary image. The memory allocation for the firmware pages and -+ * MMU programming is not needed for the reboot, presuming the firmware binary -+ * file on the filesystem would not change. -+ */ -+static void kbase_csf_firmware_reload_worker(struct work_struct *work) ++static void release_queue(struct kbase_queue *queue) +{ -+ struct kbase_device *kbdev = container_of(work, struct kbase_device, -+ csf.firmware_reload_work); -+ int err; ++ lockdep_assert_held(&queue->kctx->csf.lock); ++ if (kbase_refcount_dec_and_test(&queue->refcount)) { ++ /* The queue can't still be on the per context list. */ ++ WARN_ON(!list_empty(&queue->link)); ++ WARN_ON(queue->group); ++ dev_dbg(queue->kctx->kbdev->dev, ++ "Remove any pending command queue fatal from ctx %d_%d", ++ queue->kctx->tgid, queue->kctx->id); ++ kbase_csf_event_remove_error(queue->kctx, &queue->error); + -+ dev_info(kbdev->dev, "reloading firmware"); ++ /* After this the Userspace would be able to free the ++ * memory for GPU queue. In case the Userspace missed ++ * terminating the queue, the cleanup will happen on ++ * context termination where tear down of region tracker ++ * would free up the GPU queue memory. ++ */ ++ kbase_gpu_vm_lock(queue->kctx); ++ kbase_va_region_no_user_free_dec(queue->queue_reg); ++ kbase_gpu_vm_unlock(queue->kctx); + -+ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING(kbdev, kbase_backend_get_cycle_cnt(kbdev)); ++ kfree(queue); ++ } ++} + -+ /* Reload just the data sections from firmware binary image */ -+ err = reload_fw_image(kbdev); -+ if (err) -+ return; ++static void oom_event_worker(struct work_struct *data); ++static void cs_error_worker(struct work_struct *data); + -+ kbase_csf_tl_reader_reset(&kbdev->timeline->csf_tl_reader); ++/* Between reg and reg_ex, one and only one must be null */ ++static int csf_queue_register_internal(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_register *reg, ++ struct kbase_ioctl_cs_queue_register_ex *reg_ex) ++{ ++ struct kbase_queue *queue; ++ int ret = 0; ++ struct kbase_va_region *region; ++ u64 queue_addr; ++ size_t queue_size; + -+ /* Reboot the firmware */ -+ kbase_csf_firmware_enable_mcu(kbdev); -+} ++ /* Only one pointer expected, otherwise coding error */ ++ if ((reg == NULL && reg_ex == NULL) || (reg && reg_ex)) { ++ dev_dbg(kctx->kbdev->dev, ++ "Error, one and only one param-ptr expected!"); ++ return -EINVAL; ++ } + -+void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ /* struct kbase_ioctl_cs_queue_register_ex contains a full ++ * struct kbase_ioctl_cs_queue_register at the start address. So ++ * the pointer can be safely cast to pointing to a ++ * kbase_ioctl_cs_queue_register object. ++ */ ++ if (reg_ex) ++ reg = (struct kbase_ioctl_cs_queue_register *)reg_ex; + -+ kbdev->csf.firmware_reloaded = false; ++ /* Validate the queue priority */ ++ if (reg->priority > BASE_QUEUE_MAX_PRIORITY) ++ return -EINVAL; + -+ if (kbdev->csf.firmware_reload_needed) { -+ kbdev->csf.firmware_reload_needed = false; -+ queue_work(system_wq, &kbdev->csf.firmware_reload_work); -+ } else { -+ kbase_csf_firmware_enable_mcu(kbdev); ++ queue_addr = reg->buffer_gpu_addr; ++ queue_size = reg->buffer_size >> PAGE_SHIFT; ++ ++ mutex_lock(&kctx->csf.lock); ++ ++ /* Check if queue is already registered */ ++ if (find_queue(kctx, queue_addr) != NULL) { ++ ret = -EINVAL; ++ goto out; + } -+} + -+void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) -+{ -+ u32 version; ++ /* Check if the queue address is valid */ ++ kbase_gpu_vm_lock(kctx); ++ region = kbase_region_tracker_find_region_enclosing_address(kctx, ++ queue_addr); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (kbase_is_region_invalid_or_free(region) || kbase_is_region_shrinkable(region) || ++ region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { ++ ret = -ENOENT; ++ goto out_unlock_vm; ++ } + -+ if (unlikely(!kbdev->csf.firmware_inited)) -+ return; ++ if (queue_size > (region->nr_pages - ++ ((queue_addr >> PAGE_SHIFT) - region->start_pfn))) { ++ ret = -EINVAL; ++ goto out_unlock_vm; ++ } + -+ /* Check firmware rebooted properly: we do not expect -+ * the version number to change with a running reboot. ++ /* Check address validity on cs_trace buffer etc. Don't care ++ * if not enabled (i.e. when size is 0). + */ -+ version = get_firmware_version(kbdev); -+ -+ if (version != kbdev->csf.global_iface.version) -+ dev_err(kbdev->dev, "Version check failed in firmware reboot."); ++ if (reg_ex && reg_ex->ex_buffer_size) { ++ int buf_pages = (reg_ex->ex_buffer_size + ++ (1 << PAGE_SHIFT) - 1) >> PAGE_SHIFT; ++ struct kbase_va_region *region_ex = ++ kbase_region_tracker_find_region_enclosing_address(kctx, ++ reg_ex->ex_buffer_base); + -+ KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_REBOOT, NULL, 0u); ++ if (kbase_is_region_invalid_or_free(region_ex)) { ++ ret = -ENOENT; ++ goto out_unlock_vm; ++ } + -+ /* Tell MCU state machine to transit to next state */ -+ kbdev->csf.firmware_reloaded = true; -+ kbase_pm_update_state(kbdev); -+} ++ if (buf_pages > (region_ex->nr_pages - ++ ((reg_ex->ex_buffer_base >> PAGE_SHIFT) - region_ex->start_pfn))) { ++ ret = -EINVAL; ++ goto out_unlock_vm; ++ } + -+static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_us) -+{ -+#define HYSTERESIS_VAL_UNIT_SHIFT (10) -+ /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ -+ u64 freq = arch_timer_get_cntfrq(); -+ u64 dur_val = dur_us; -+ u32 cnt_val_u32, reg_val_u32; -+ bool src_system_timestamp = freq > 0; ++ region_ex = kbase_region_tracker_find_region_enclosing_address( ++ kctx, reg_ex->ex_offset_var_addr); ++ if (kbase_is_region_invalid_or_free(region_ex)) { ++ ret = -ENOENT; ++ goto out_unlock_vm; ++ } ++ } + -+ if (!src_system_timestamp) { -+ /* Get the cycle_counter source alternative */ -+ spin_lock(&kbdev->pm.clk_rtm.lock); -+ if (kbdev->pm.clk_rtm.clks[0]) -+ freq = kbdev->pm.clk_rtm.clks[0]->clock_val; -+ else -+ dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!"); -+ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ queue = kzalloc(sizeof(struct kbase_queue), GFP_KERNEL); + -+ dev_info( -+ kbdev->dev, -+ "Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!"); ++ if (!queue) { ++ ret = -ENOMEM; ++ goto out_unlock_vm; + } + -+ /* Formula for dur_val = ((dur_us/1000000) * freq_HZ) >> 10) */ -+ dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; -+ dur_val = div_u64(dur_val, 1000000); ++ queue->kctx = kctx; ++ queue->base_addr = queue_addr; + -+ /* Interface limits the value field to S32_MAX */ -+ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; ++ queue->queue_reg = region; ++ kbase_va_region_no_user_free_inc(region); + -+ reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32); -+ /* add the source flag */ -+ if (src_system_timestamp) -+ reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, -+ GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); -+ else -+ reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, -+ GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER); ++ queue->size = (queue_size << PAGE_SHIFT); ++ queue->csi_index = KBASEP_IF_NR_INVALID; ++ queue->enabled = false; + -+ return reg_val_u32; -+} ++ queue->priority = reg->priority; ++ kbase_refcount_set(&queue->refcount, 1); + -+u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) -+{ -+ unsigned long flags; -+ u32 dur; ++ queue->group = NULL; ++ queue->bind_state = KBASE_CSF_QUEUE_UNBOUND; ++ queue->handle = BASEP_MEM_INVALID_HANDLE; ++ queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID; + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ dur = kbdev->csf.gpu_idle_hysteresis_us; -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ queue->status_wait = 0; ++ queue->sync_ptr = 0; ++ queue->sync_value = 0; + -+ return dur; -+} ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ queue->saved_cmd_ptr = 0; ++#endif + -+u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur) -+{ -+ unsigned long flags; -+ const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur); ++ queue->sb_status = 0; ++ queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED; + -+ /* The 'fw_load_lock' is taken to synchronize against the deferred -+ * loading of FW, where the idle timer will be enabled. -+ */ -+ mutex_lock(&kbdev->fw_load_lock); -+ if (unlikely(!kbdev->csf.firmware_inited)) { -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ kbdev->csf.gpu_idle_hysteresis_us = dur; -+ kbdev->csf.gpu_idle_dur_count = hysteresis_val; -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+ mutex_unlock(&kbdev->fw_load_lock); -+ goto end; -+ } -+ mutex_unlock(&kbdev->fw_load_lock); ++ atomic_set(&queue->pending, 0); + -+ kbase_csf_scheduler_pm_active(kbdev); -+ if (kbase_csf_scheduler_wait_mcu_active(kbdev)) { -+ dev_err(kbdev->dev, -+ "Unable to activate the MCU, the idle hysteresis value shall remain unchanged"); -+ kbase_csf_scheduler_pm_idle(kbdev); -+ return kbdev->csf.gpu_idle_dur_count; -+ } ++ INIT_LIST_HEAD(&queue->link); ++ INIT_LIST_HEAD(&queue->error.link); ++ INIT_WORK(&queue->oom_event_work, oom_event_worker); ++ INIT_WORK(&queue->cs_error_work, cs_error_worker); ++ list_add(&queue->link, &kctx->csf.queue_list); + -+ /* The 'reg_lock' is also taken and is held till the update is not -+ * complete, to ensure the update of idle timer value by multiple Users -+ * gets serialized. -+ */ -+ mutex_lock(&kbdev->csf.reg_lock); -+ /* The firmware only reads the new idle timer value when the timer is -+ * disabled. -+ */ -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ kbase_csf_firmware_disable_gpu_idle_timer(kbdev); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+ /* Ensure that the request has taken effect */ -+ wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); ++ queue->extract_ofs = 0; + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ kbdev->csf.gpu_idle_hysteresis_us = dur; -+ kbdev->csf.gpu_idle_dur_count = hysteresis_val; -+ kbase_csf_firmware_enable_gpu_idle_timer(kbdev); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+ wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK); -+ mutex_unlock(&kbdev->csf.reg_lock); ++ region->user_data = queue; + -+ kbase_csf_scheduler_pm_idle(kbdev); ++ /* Initialize the cs_trace configuration parameters, When buffer_size ++ * is 0, trace is disabled. Here we only update the fields when ++ * enabled, otherwise leave them as default zeros. ++ */ ++ if (reg_ex && reg_ex->ex_buffer_size) { ++ u32 cfg = CS_INSTR_CONFIG_EVENT_SIZE_SET( ++ 0, reg_ex->ex_event_size); ++ cfg = CS_INSTR_CONFIG_EVENT_STATE_SET( ++ cfg, reg_ex->ex_event_state); + -+end: -+ dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x", -+ hysteresis_val); ++ queue->trace_cfg = cfg; ++ queue->trace_buffer_size = reg_ex->ex_buffer_size; ++ queue->trace_buffer_base = reg_ex->ex_buffer_base; ++ queue->trace_offset_ptr = reg_ex->ex_offset_var_addr; ++ } + -+ return hysteresis_val; ++out_unlock_vm: ++ kbase_gpu_vm_unlock(kctx); ++out: ++ mutex_unlock(&kctx->csf.lock); ++ ++ return ret; +} + -+static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us) ++int kbase_csf_queue_register(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_register *reg) +{ -+ /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ -+ u64 freq = arch_timer_get_cntfrq(); -+ u64 dur_val = dur_us; -+ u32 cnt_val_u32, reg_val_u32; -+ bool src_system_timestamp = freq > 0; ++ /* Validate the ring buffer configuration parameters */ ++ if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE || ++ reg->buffer_size > CS_RING_BUFFER_MAX_SIZE || ++ reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr || ++ reg->buffer_gpu_addr & ~PAGE_MASK) ++ return -EINVAL; + -+ if (!src_system_timestamp) { -+ /* Get the cycle_counter source alternative */ -+ spin_lock(&kbdev->pm.clk_rtm.lock); -+ if (kbdev->pm.clk_rtm.clks[0]) -+ freq = kbdev->pm.clk_rtm.clks[0]->clock_val; -+ else -+ dev_warn(kbdev->dev, "No GPU clock, unexpected integration issue!"); -+ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ return csf_queue_register_internal(kctx, reg, NULL); ++} + -+ dev_info( -+ kbdev->dev, -+ "Can't get the timestamp frequency, use cycle counter with MCU shader Core Poweroff timer!"); -+ } ++int kbase_csf_queue_register_ex(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_register_ex *reg) ++{ ++ struct kbase_csf_global_iface const *const iface = ++ &kctx->kbdev->csf.global_iface; ++ u32 const glb_version = iface->version; ++ u32 instr = iface->instr_features; ++ u8 max_size = GLB_INSTR_FEATURES_EVENT_SIZE_MAX_GET(instr); ++ u32 min_buf_size = (1u << reg->ex_event_size) * ++ GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(instr); + -+ /* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */ -+ dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; -+ dur_val = div_u64(dur_val, 1000000); ++ /* If cs_trace_command not supported, the call fails */ ++ if (glb_version < kbase_csf_interface_version(1, 1, 0)) ++ return -EINVAL; + -+ /* Interface limits the value field to S32_MAX */ -+ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; ++ /* Validate the ring buffer configuration parameters */ ++ if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE || ++ reg->buffer_size > CS_RING_BUFFER_MAX_SIZE || ++ reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr || ++ reg->buffer_gpu_addr & ~PAGE_MASK) ++ return -EINVAL; + -+ reg_val_u32 = GLB_PWROFF_TIMER_TIMEOUT_SET(0, cnt_val_u32); -+ /* add the source flag */ -+ if (src_system_timestamp) -+ reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, -+ GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); -+ else -+ reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, -+ GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER); ++ /* Validate the cs_trace configuration parameters */ ++ if (reg->ex_buffer_size && ++ ((reg->ex_event_size > max_size) || ++ (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) || ++ (reg->ex_buffer_size < min_buf_size))) ++ return -EINVAL; + -+ return reg_val_u32; ++ return csf_queue_register_internal(kctx, NULL, reg); +} + -+u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev) ++static void unbind_queue(struct kbase_context *kctx, ++ struct kbase_queue *queue); ++ ++void kbase_csf_queue_terminate(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_terminate *term) +{ -+ u32 pwroff; -+ unsigned long flags; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_queue *queue; ++ int err; ++ bool reset_prevented = false; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ pwroff = kbdev->csf.mcu_core_pwroff_dur_us; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ err = kbase_reset_gpu_prevent_and_wait(kbdev); ++ if (err) ++ dev_warn( ++ kbdev->dev, ++ "Unsuccessful GPU reset detected when terminating queue (buffer_addr=0x%.16llx), attempting to terminate regardless", ++ term->buffer_gpu_addr); ++ else ++ reset_prevented = true; + -+ return pwroff; -+} ++ mutex_lock(&kctx->csf.lock); ++ queue = find_queue(kctx, term->buffer_gpu_addr); + -+u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur) -+{ -+ unsigned long flags; -+ const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur); ++ if (queue) { ++ /* As the GPU queue has been terminated by the ++ * user space, undo the actions that were performed when the ++ * queue was registered i.e. remove the queue from the per ++ * context list & release the initial reference. The subsequent ++ * lookups for the queue in find_queue() would fail. ++ */ ++ list_del_init(&queue->link); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->csf.mcu_core_pwroff_dur_us = dur; -+ kbdev->csf.mcu_core_pwroff_dur_count = pwroff; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* Stop the CSI to which queue was bound */ ++ unbind_queue(kctx, queue); + -+ dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff); ++ kbase_gpu_vm_lock(kctx); ++ if (!WARN_ON(!queue->queue_reg)) ++ queue->queue_reg->user_data = NULL; ++ kbase_gpu_vm_unlock(kctx); + -+ return pwroff; ++ release_queue(queue); ++ } ++ ++ mutex_unlock(&kctx->csf.lock); ++ if (reset_prevented) ++ kbase_reset_gpu_allow(kbdev); +} + -+/** -+ * kbase_device_csf_iterator_trace_init - Send request to enable iterator -+ * trace port. -+ * @kbdev: Kernel base device pointer -+ * -+ * Return: 0 on success (or if enable request is not sent), or error -+ * code -EINVAL on failure of GPU to acknowledge enable request. -+ */ -+static int kbase_device_csf_iterator_trace_init(struct kbase_device *kbdev) ++int kbase_csf_queue_bind(struct kbase_context *kctx, union kbase_ioctl_cs_queue_bind *bind) +{ -+ /* Enable the iterator trace port if supported by the GPU. -+ * It requires the GPU to have a nonzero "iter_trace_enable" -+ * property in the device tree, and the FW must advertise -+ * this feature in GLB_FEATURES. -+ */ -+ if (kbdev->pm.backend.gpu_powered) { -+ /* check device tree for iterator trace enable property */ -+ const void *iter_trace_param = of_get_property( -+ kbdev->dev->of_node, -+ "iter_trace_enable", NULL); ++ struct kbase_queue *queue; ++ struct kbase_queue_group *group; ++ u8 max_streams; ++ int ret = -EINVAL; + -+ const struct kbase_csf_global_iface *iface = -+ &kbdev->csf.global_iface; ++ mutex_lock(&kctx->csf.lock); + -+ if (iter_trace_param) { -+ u32 iter_trace_value = be32_to_cpup(iter_trace_param); ++ group = find_queue_group(kctx, bind->in.group_handle); ++ queue = find_queue(kctx, bind->in.buffer_gpu_addr); + -+ if ((iface->features & -+ GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) && -+ iter_trace_value) { -+ long ack_timeout; ++ if (!group || !queue) ++ goto out; + -+ ack_timeout = kbase_csf_timeout_in_jiffies( -+ kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT)); ++ /* For the time being, all CSGs have the same number of CSs ++ * so we check CSG 0 for this number ++ */ ++ max_streams = kctx->kbdev->csf.global_iface.groups[0].stream_num; + -+ /* write enable request to global input */ -+ kbase_csf_firmware_global_input_mask( -+ iface, GLB_REQ, -+ GLB_REQ_ITER_TRACE_ENABLE_MASK, -+ GLB_REQ_ITER_TRACE_ENABLE_MASK); -+ /* Ring global doorbell */ -+ kbase_csf_ring_doorbell(kbdev, -+ CSF_KERNEL_DOORBELL_NR); ++ if (bind->in.csi_index >= max_streams) ++ goto out; + -+ ack_timeout = wait_event_timeout( -+ kbdev->csf.event_wait, -+ !((kbase_csf_firmware_global_input_read( -+ iface, GLB_REQ) ^ -+ kbase_csf_firmware_global_output( -+ iface, GLB_ACK)) & -+ GLB_REQ_ITER_TRACE_ENABLE_MASK), -+ ack_timeout); ++ if (group->run_state == KBASE_CSF_GROUP_TERMINATED) ++ goto out; + -+ return ack_timeout ? 0 : -EINVAL; ++ if (queue->group || group->bound_queues[bind->in.csi_index]) ++ goto out; + -+ } -+ } ++ ret = get_user_pages_mmap_handle(kctx, queue); ++ if (ret) ++ goto out; + -+ } -+ return 0; ++ bind->out.mmap_handle = queue->handle; ++ group->bound_queues[bind->in.csi_index] = queue; ++ queue->group = group; ++ queue->csi_index = bind->in.csi_index; ++ queue->bind_state = KBASE_CSF_QUEUE_BIND_IN_PROGRESS; ++ ++out: ++ mutex_unlock(&kctx->csf.lock); ++ ++ return ret; +} + -+int kbase_csf_firmware_early_init(struct kbase_device *kbdev) ++static struct kbase_queue_group *get_bound_queue_group( ++ struct kbase_queue *queue) +{ -+ init_waitqueue_head(&kbdev->csf.event_wait); -+ kbdev->csf.interrupt_received = false; -+ -+ kbdev->csf.fw_timeout_ms = -+ kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT); ++ struct kbase_context *kctx = queue->kctx; ++ struct kbase_queue_group *group; + -+ kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US; -+ kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count( -+ kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US); ++ if (queue->bind_state == KBASE_CSF_QUEUE_UNBOUND) ++ return NULL; + -+ INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces); -+ INIT_LIST_HEAD(&kbdev->csf.firmware_config); -+ INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata); -+ INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list); -+ INIT_LIST_HEAD(&kbdev->csf.user_reg.list); -+ INIT_WORK(&kbdev->csf.firmware_reload_work, -+ kbase_csf_firmware_reload_worker); -+ INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); ++ if (!queue->group) ++ return NULL; + -+ mutex_init(&kbdev->csf.reg_lock); ++ if (queue->csi_index == KBASEP_IF_NR_INVALID) { ++ dev_warn(kctx->kbdev->dev, "CS interface index is incorrect\n"); ++ return NULL; ++ } + -+ kbdev->csf.fw = (struct kbase_csf_mcu_fw){ .data = NULL }; ++ group = queue->group; + -+ return 0; -+} ++ if (group->bound_queues[queue->csi_index] != queue) { ++ dev_warn(kctx->kbdev->dev, "Incorrect mapping between queues & queue groups\n"); ++ return NULL; ++ } + -+void kbase_csf_firmware_early_term(struct kbase_device *kbdev) -+{ -+ mutex_destroy(&kbdev->csf.reg_lock); ++ return group; +} + -+int kbase_csf_firmware_late_init(struct kbase_device *kbdev) ++static void enqueue_gpu_submission_work(struct kbase_context *const kctx) +{ -+ kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC; -+#ifdef KBASE_PM_RUNTIME -+ if (kbase_pm_gpu_sleep_allowed(kbdev)) -+ kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; -+#endif -+ WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us); -+ kbdev->csf.gpu_idle_dur_count = -+ convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us); -+ -+ return 0; ++ queue_work(system_highpri_wq, &kctx->csf.pending_submission_work); +} + -+int kbase_csf_firmware_load_init(struct kbase_device *kbdev) ++/** ++ * pending_submission_worker() - Work item to process pending kicked GPU command queues. ++ * ++ * @work: Pointer to pending_submission_work. ++ * ++ * This function starts all pending queues, for which the work ++ * was previously submitted via ioctl call from application thread. ++ * If the queue is already scheduled and resident, it will be started ++ * right away, otherwise once the group is made resident. ++ */ ++static void pending_submission_worker(struct work_struct *work) +{ -+ const struct firmware *firmware = NULL; -+ struct kbase_csf_mcu_fw *const mcu_fw = &kbdev->csf.fw; -+ const u32 magic = FIRMWARE_HEADER_MAGIC; -+ u8 version_major, version_minor; -+ u32 version_hash; -+ u32 entry_end_offset; -+ u32 entry_offset; -+ int ret; -+ -+ lockdep_assert_held(&kbdev->fw_load_lock); -+ -+ if (WARN_ON((kbdev->as_free & MCU_AS_BITMASK) == 0)) -+ return -EINVAL; -+ kbdev->as_free &= ~MCU_AS_BITMASK; -+ -+ ret = kbase_mmu_init(kbdev, &kbdev->csf.mcu_mmu, NULL, -+ BASE_MEM_GROUP_DEFAULT); ++ struct kbase_context *kctx = ++ container_of(work, struct kbase_context, csf.pending_submission_work); ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_queue *queue; ++ int err = kbase_reset_gpu_prevent_and_wait(kbdev); + -+ if (ret != 0) { -+ /* Release the address space */ -+ kbdev->as_free |= MCU_AS_BITMASK; -+ return ret; ++ if (err) { ++ dev_err(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue "); ++ return; + } + -+ ret = kbase_mcu_shared_interface_region_tracker_init(kbdev); -+ if (ret != 0) { -+ dev_err(kbdev->dev, -+ "Failed to setup the rb tree for managing shared interface segment\n"); -+ goto err_out; -+ } ++ mutex_lock(&kctx->csf.lock); + -+ if (request_firmware(&firmware, fw_name, kbdev->dev) != 0) { -+ dev_err(kbdev->dev, -+ "Failed to load firmware image '%s'\n", -+ fw_name); -+ ret = -ENOENT; -+ } else { -+ /* Try to save a copy and then release the loaded firmware image */ -+ mcu_fw->size = firmware->size; -+ mcu_fw->data = vmalloc((unsigned long)mcu_fw->size); ++ /* Iterate through the queue list and schedule the pending ones for submission. */ ++ list_for_each_entry(queue, &kctx->csf.queue_list, link) { ++ if (atomic_cmpxchg(&queue->pending, 1, 0) == 1) { ++ struct kbase_queue_group *group = get_bound_queue_group(queue); ++ int ret; + -+ if (mcu_fw->data == NULL) { -+ ret = -ENOMEM; -+ } else { -+ memcpy(mcu_fw->data, firmware->data, mcu_fw->size); -+ dev_dbg(kbdev->dev, "Firmware image (%zu-bytes) retained in csf.fw\n", -+ mcu_fw->size); -+ } ++ if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND) { ++ dev_dbg(kbdev->dev, "queue is not bound to a group"); ++ continue; ++ } + -+ release_firmware(firmware); ++ ret = kbase_csf_scheduler_queue_start(queue); ++ if (unlikely(ret)) { ++ dev_dbg(kbdev->dev, "Failed to start queue"); ++ if (ret == -EBUSY) { ++ atomic_cmpxchg(&queue->pending, 0, 1); ++ enqueue_gpu_submission_work(kctx); ++ } ++ } ++ } + } + -+ /* If error in loading or saving the image, branches to error out */ -+ if (ret) -+ goto err_out; -+ -+ if (mcu_fw->size < FIRMWARE_HEADER_LENGTH) { -+ dev_err(kbdev->dev, "Firmware too small\n"); -+ ret = -EINVAL; -+ goto err_out; -+ } ++ mutex_unlock(&kctx->csf.lock); + -+ if (memcmp(mcu_fw->data, &magic, sizeof(magic)) != 0) { -+ dev_err(kbdev->dev, "Incorrect firmware magic\n"); -+ ret = -EINVAL; -+ goto err_out; -+ } ++ kbase_reset_gpu_allow(kbdev); ++} + -+ version_minor = mcu_fw->data[4]; -+ version_major = mcu_fw->data[5]; ++void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot) ++{ ++ if (WARN_ON(slot < 0)) ++ return; + -+ if (version_major != FIRMWARE_HEADER_VERSION_MAJOR || -+ version_minor != FIRMWARE_HEADER_VERSION_MINOR) { -+ dev_err(kbdev->dev, -+ "Firmware header version %d.%d not understood\n", -+ version_major, version_minor); -+ ret = -EINVAL; -+ goto err_out; -+ } ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); + -+ memcpy(&version_hash, &mcu_fw->data[8], sizeof(version_hash)); ++ kbase_csf_ring_csg_slots_doorbell(kbdev, (u32) (1 << slot)); ++} + -+ dev_notice(kbdev->dev, "Loading Mali firmware 0x%x", version_hash); ++void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, ++ u32 slot_bitmap) ++{ ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ const u32 allowed_bitmap = ++ (u32) ((1U << kbdev->csf.global_iface.group_num) - 1); ++ u32 value; + -+ memcpy(&entry_end_offset, &mcu_fw->data[0x10], sizeof(entry_end_offset)); ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); + -+ if (entry_end_offset > mcu_fw->size) { -+ dev_err(kbdev->dev, "Firmware image is truncated\n"); -+ ret = -EINVAL; -+ goto err_out; -+ } ++ if (WARN_ON(slot_bitmap > allowed_bitmap)) ++ return; + -+ entry_offset = FIRMWARE_HEADER_LENGTH; -+ while (entry_offset < entry_end_offset) { -+ u32 header; -+ unsigned int size; ++ /* The access to GLB_DB_REQ/ACK needs to be ordered with respect to CSG_REQ/ACK and ++ * CSG_DB_REQ/ACK to avoid a scenario where a CSI request overlaps with a CSG request ++ * or 2 CSI requests overlap and FW ends up missing the 2nd request. ++ * Memory barrier is required, both on Host and FW side, to guarantee the ordering. ++ * ++ * 'osh' is used as CPU and GPU would be in the same Outer shareable domain. ++ */ ++ dmb(osh); + -+ memcpy(&header, &mcu_fw->data[entry_offset], sizeof(header)); ++ value = kbase_csf_firmware_global_output(global_iface, GLB_DB_ACK); ++ value ^= slot_bitmap; ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_DB_REQ, value, ++ slot_bitmap); + -+ size = entry_size(header); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++} + -+ ret = load_firmware_entry(kbdev, mcu_fw, entry_offset, header); -+ if (ret != 0) { -+ dev_err(kbdev->dev, "Failed to load firmware image\n"); -+ goto err_out; -+ } -+ entry_offset += size; -+ } ++void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev, ++ struct kbase_queue *queue) ++{ ++ mutex_lock(&kbdev->csf.reg_lock); + -+ if (!kbdev->csf.shared_interface) { -+ dev_err(kbdev->dev, "Shared interface region not found\n"); -+ ret = -EINVAL; -+ goto err_out; -+ } else { -+ ret = setup_shared_iface_static_region(kbdev); -+ if (ret != 0) { -+ dev_err(kbdev->dev, "Failed to insert a region for shared iface entry parsed from fw image\n"); -+ goto err_out; -+ } -+ } ++ if (queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID) ++ kbase_csf_ring_doorbell(kbdev, queue->doorbell_nr); + -+ ret = kbase_csf_firmware_trace_buffers_init(kbdev); -+ if (ret != 0) { -+ dev_err(kbdev->dev, "Failed to initialize trace buffers\n"); -+ goto err_out; -+ } ++ mutex_unlock(&kbdev->csf.reg_lock); ++} + -+ /* Make sure L2 cache is powered up */ -+ kbase_pm_wait_for_l2_powered(kbdev); ++void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, ++ int csi_index, int csg_nr, ++ bool ring_csg_doorbell) ++{ ++ struct kbase_csf_cmd_stream_group_info *ginfo; ++ u32 value; + -+ /* Load the MMU tables into the selected address space */ -+ ret = load_mmu_tables(kbdev); -+ if (ret != 0) -+ goto err_out; ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); + -+ boot_csf_firmware(kbdev); ++ if (WARN_ON(csg_nr < 0) || ++ WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num)) ++ return; + -+ ret = parse_capabilities(kbdev); -+ if (ret != 0) -+ goto err_out; ++ ginfo = &kbdev->csf.global_iface.groups[csg_nr]; + -+ ret = kbase_csf_doorbell_mapping_init(kbdev); -+ if (ret != 0) -+ goto err_out; ++ if (WARN_ON(csi_index < 0) || ++ WARN_ON(csi_index >= ginfo->stream_num)) ++ return; + -+ ret = kbase_csf_scheduler_init(kbdev); -+ if (ret != 0) -+ goto err_out; ++ /* The access to CSG_DB_REQ/ACK needs to be ordered with respect to ++ * CS_REQ/ACK to avoid a scenario where CSG_DB_REQ/ACK becomes visibile to ++ * FW before CS_REQ/ACK is set. ++ * ++ * 'osh' is used as CPU and GPU would be in the same outer shareable domain. ++ */ ++ dmb(osh); + -+ ret = kbase_csf_setup_dummy_user_reg_page(kbdev); -+ if (ret != 0) -+ goto err_out; ++ value = kbase_csf_firmware_csg_output(ginfo, CSG_DB_ACK); ++ value ^= (1 << csi_index); ++ kbase_csf_firmware_csg_input_mask(ginfo, CSG_DB_REQ, value, ++ 1 << csi_index); + -+ ret = kbase_csf_timeout_init(kbdev); -+ if (ret != 0) -+ goto err_out; ++ if (likely(ring_csg_doorbell)) ++ kbase_csf_ring_csg_doorbell(kbdev, csg_nr); ++} + -+ ret = global_init_on_boot(kbdev); -+ if (ret != 0) -+ goto err_out; ++int kbase_csf_queue_kick(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_kick *kick) ++{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ bool trigger_submission = false; ++ struct kbase_va_region *region; ++ int err = 0; + -+ ret = kbase_csf_firmware_cfg_init(kbdev); -+ if (ret != 0) -+ goto err_out; ++ KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK(kbdev, kctx->id, kick->buffer_gpu_addr); + -+ ret = kbase_device_csf_iterator_trace_init(kbdev); -+ if (ret != 0) -+ goto err_out; ++ /* GPU work submission happening asynchronously to prevent the contention with ++ * scheduler lock and as the result blocking application thread. For this reason, ++ * the vm_lock is used here to get the reference to the queue based on its buffer_gpu_addr ++ * from the context list of active va_regions. ++ * Once the target queue is found the pending flag is set to one atomically avoiding ++ * a race between submission ioctl thread and the work item. ++ */ ++ kbase_gpu_vm_lock(kctx); ++ region = kbase_region_tracker_find_region_enclosing_address(kctx, kick->buffer_gpu_addr); ++ if (!kbase_is_region_invalid_or_free(region)) { ++ struct kbase_queue *queue = region->user_data; + -+ ret = kbase_csf_firmware_log_init(kbdev); -+ if (ret != 0) { -+ dev_err(kbdev->dev, "Failed to initialize FW trace (err %d)", ret); -+ goto err_out; ++ if (queue) { ++ atomic_cmpxchg(&queue->pending, 0, 1); ++ trigger_submission = true; ++ } ++ } else { ++ dev_dbg(kbdev->dev, ++ "Attempt to kick GPU queue without a valid command buffer region"); ++ err = -EFAULT; + } ++ kbase_gpu_vm_unlock(kctx); + -+ if (kbdev->csf.fw_core_dump.available) -+ kbase_csf_firmware_core_dump_init(kbdev); -+ -+ /* Firmware loaded successfully, ret = 0 */ -+ KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL, -+ (((u64)version_hash) << 32) | -+ (((u64)version_major) << 8) | version_minor); -+ return 0; ++ if (likely(trigger_submission)) ++ enqueue_gpu_submission_work(kctx); + -+err_out: -+ kbase_csf_firmware_unload_term(kbdev); -+ return ret; ++ return err; +} + -+void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) ++static void unbind_stopped_queue(struct kbase_context *kctx, ++ struct kbase_queue *queue) +{ -+ unsigned long flags; -+ int ret = 0; ++ lockdep_assert_held(&kctx->csf.lock); + -+ cancel_work_sync(&kbdev->csf.fw_error_work); ++ if (WARN_ON(queue->csi_index < 0)) ++ return; + -+ ret = kbase_reset_gpu_wait(kbdev); ++ if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) { ++ unsigned long flags; + -+ WARN(ret, "failed to wait for GPU reset"); ++ kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags); ++ bitmap_clear(queue->group->protm_pending_bitmap, ++ queue->csi_index, 1); ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, CSI_PROTM_PEND_CLEAR, ++ queue->group, queue, queue->group->protm_pending_bitmap[0]); ++ queue->group->bound_queues[queue->csi_index] = NULL; ++ queue->group = NULL; ++ kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags); + -+ kbase_csf_firmware_log_term(kbdev); ++ put_user_pages_mmap_handle(kctx, queue); ++ WARN_ON_ONCE(queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID); ++ queue->bind_state = KBASE_CSF_QUEUE_UNBOUND; ++ } ++} ++/** ++ * unbind_queue() - Remove the linkage between a GPU command queue and the group ++ * to which it was bound or being bound. ++ * ++ * @kctx: Address of the kbase context within which the queue was created. ++ * @queue: Pointer to the queue to be unlinked. ++ * ++ * This function will also send the stop request to firmware for the CS ++ * if the group to which the GPU command queue was bound is scheduled. ++ * ++ * This function would be called when :- ++ * - queue is being unbound. This would happen when the IO mapping ++ * created on bind is removed explicitly by userspace or the process ++ * is getting exited. ++ * - queue group is being terminated which still has queues bound ++ * to it. This could happen on an explicit terminate request from userspace ++ * or when the kbase context is being terminated. ++ * - queue is being terminated without completing the bind operation. ++ * This could happen if either the queue group is terminated ++ * after the CS_QUEUE_BIND ioctl but before the 2nd part of bind operation ++ * to create the IO mapping is initiated. ++ * - There is a failure in executing the 2nd part of bind operation, inside the ++ * mmap handler, which creates the IO mapping for queue. ++ */ + -+ kbase_csf_firmware_cfg_term(kbdev); ++static void unbind_queue(struct kbase_context *kctx, struct kbase_queue *queue) ++{ ++ kbase_reset_gpu_assert_failed_or_prevented(kctx->kbdev); ++ lockdep_assert_held(&kctx->csf.lock); + -+ kbase_csf_timeout_term(kbdev); ++ if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) { ++ if (queue->bind_state == KBASE_CSF_QUEUE_BOUND) ++ kbase_csf_scheduler_queue_stop(queue); + -+ kbase_csf_free_dummy_user_reg_page(kbdev); ++ unbind_stopped_queue(kctx, queue); ++ } ++} + -+ kbase_csf_scheduler_term(kbdev); ++static bool kbase_csf_queue_phys_allocated(struct kbase_queue *queue) ++{ ++ /* The queue's phys are zeroed when allocation fails. Both of them being ++ * zero is an impossible condition for a successful allocated set of phy pages. ++ */ + -+ kbase_csf_doorbell_mapping_term(kbdev); ++ return (queue->phys[0].tagged_addr | queue->phys[1].tagged_addr); ++} + -+ /* Explicitly trigger the disabling of MCU through the state machine and -+ * wait for its completion. It may not have been disabled yet due to the -+ * power policy. ++void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit) ++{ ++ struct kbase_context *kctx = queue->kctx; ++ ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ /* As the process itself is exiting, the termination of queue group can ++ * be done which would be much faster than stopping of individual ++ * queues. This would ensure a faster exit for the process especially ++ * in the case where CSI gets stuck. ++ * The CSI STOP request will wait for the in flight work to drain ++ * whereas CSG TERM request would result in an immediate abort or ++ * cancellation of the pending work. + */ -+ kbdev->pm.backend.mcu_desired = false; -+ kbase_pm_wait_for_desired_state(kbdev); ++ if (process_exit) { ++ struct kbase_queue_group *group = get_bound_queue_group(queue); + -+ free_global_iface(kbdev); ++ if (group) ++ term_queue_group(group); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->csf.firmware_inited = false; -+ if (WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_OFF)) { -+ kbdev->pm.backend.mcu_state = KBASE_MCU_OFF; -+ stop_csf_firmware(kbdev); ++ WARN_ON(queue->bind_state != KBASE_CSF_QUEUE_UNBOUND); ++ } else { ++ unbind_queue(kctx, queue); + } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ unload_mmu_tables(kbdev); ++ /* Free the resources, if allocated phys for this queue */ ++ if (kbase_csf_queue_phys_allocated(queue)) ++ kbase_csf_free_command_stream_user_pages(kctx, queue); ++} + -+ kbase_csf_firmware_trace_buffers_term(kbdev); ++void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue) ++{ ++ struct kbase_context *kctx = queue->kctx; + -+ while (!list_empty(&kbdev->csf.firmware_interfaces)) { -+ struct kbase_csf_firmware_interface *interface; ++ lockdep_assert_held(&kctx->csf.lock); + -+ interface = -+ list_first_entry(&kbdev->csf.firmware_interfaces, -+ struct kbase_csf_firmware_interface, -+ node); -+ list_del(&interface->node); ++ WARN_ON(queue->bind_state == KBASE_CSF_QUEUE_BOUND); ++ unbind_stopped_queue(kctx, queue); + -+ vunmap(interface->kernel_map); ++ /* Free the resources, if allocated phys for this queue */ ++ if (kbase_csf_queue_phys_allocated(queue)) ++ kbase_csf_free_command_stream_user_pages(kctx, queue); ++} + -+ if (!interface->reuse_pages) { -+ if (interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) { -+ kbase_csf_protected_memory_free( -+ kbdev, interface->pma, interface->num_pages_aligned, -+ interface->is_small_page); -+ } else { -+ kbase_mem_pool_free_pages( -+ kbase_mem_pool_group_select( -+ kbdev, KBASE_MEM_GROUP_CSF_FW, -+ interface->is_small_page), -+ interface->num_pages_aligned, -+ interface->phys, -+ true, false); -+ } ++/** ++ * find_free_group_handle() - Find a free handle for a queue group ++ * ++ * @kctx: Address of the kbase context within which the queue group ++ * is to be created. ++ * ++ * Return: a queue group handle on success, or a negative error code on failure. ++ */ ++static int find_free_group_handle(struct kbase_context *const kctx) ++{ ++ /* find the available index in the array of CSGs per this context */ ++ int idx, group_handle = -ENOMEM; + -+ kfree(interface->phys); -+ } ++ lockdep_assert_held(&kctx->csf.lock); + -+ kfree(interface); ++ for (idx = 0; ++ (idx != MAX_QUEUE_GROUP_NUM) && (group_handle < 0); ++ idx++) { ++ if (!kctx->csf.queue_groups[idx]) ++ group_handle = idx; + } + -+ while (!list_empty(&kbdev->csf.firmware_timeline_metadata)) { -+ struct firmware_timeline_metadata *metadata; ++ return group_handle; ++} + -+ metadata = list_first_entry( -+ &kbdev->csf.firmware_timeline_metadata, -+ struct firmware_timeline_metadata, -+ node); -+ list_del(&metadata->node); ++/** ++ * iface_has_enough_streams() - Check that at least one CSG supports ++ * a given number of CS ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @cs_min: Minimum number of CSs required. ++ * ++ * Return: true if at least one CSG supports the given number ++ * of CSs (or more); otherwise false. ++ */ ++static bool iface_has_enough_streams(struct kbase_device *const kbdev, ++ u32 const cs_min) ++{ ++ bool has_enough = false; ++ struct kbase_csf_cmd_stream_group_info *const groups = ++ kbdev->csf.global_iface.groups; ++ const u32 group_num = kbdev->csf.global_iface.group_num; ++ u32 i; + -+ kfree(metadata); ++ for (i = 0; (i < group_num) && !has_enough; i++) { ++ if (groups[i].stream_num >= cs_min) ++ has_enough = true; + } + -+ if (kbdev->csf.fw.data) { -+ /* Free the copy of the firmware image */ -+ vfree(kbdev->csf.fw.data); -+ kbdev->csf.fw.data = NULL; -+ dev_dbg(kbdev->dev, "Free retained image csf.fw (%zu-bytes)\n", kbdev->csf.fw.size); -+ } ++ return has_enough; ++} + -+ /* This will also free up the region allocated for the shared interface -+ * entry parsed from the firmware image. ++/** ++ * create_normal_suspend_buffer() - Create normal-mode suspend buffer per ++ * queue group ++ * ++ * @kctx: Pointer to kbase context where the queue group is created at ++ * @s_buf: Pointer to suspend buffer that is attached to queue group ++ * ++ * Return: 0 if phy-pages for the suspend buffer is successfully allocated. ++ * Otherwise -ENOMEM or error code. ++ */ ++static int create_normal_suspend_buffer(struct kbase_context *const kctx, ++ struct kbase_normal_suspend_buffer *s_buf) ++{ ++ const size_t nr_pages = ++ PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size); ++ int err; ++ ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ /* The suspend buffer's mapping address is valid only when the CSG is to ++ * run on slot, initializing it 0, signalling the buffer is not mapped. + */ -+ kbase_mcu_shared_interface_region_tracker_term(kbdev); ++ s_buf->gpu_va = 0; + -+ kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu); ++ s_buf->phy = kcalloc(nr_pages, sizeof(*s_buf->phy), GFP_KERNEL); + -+ /* Release the address space */ -+ kbdev->as_free |= MCU_AS_BITMASK; ++ if (!s_buf->phy) ++ return -ENOMEM; ++ ++ /* Get physical page for a normal suspend buffer */ ++ err = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages, ++ &s_buf->phy[0], false, kctx->task); ++ ++ if (err < 0) { ++ kfree(s_buf->phy); ++ return err; ++ } ++ ++ kbase_process_page_usage_inc(kctx, nr_pages); ++ return 0; +} + -+#if IS_ENABLED(CONFIG_MALI_CORESIGHT) -+int kbase_csf_firmware_mcu_register_write(struct kbase_device *const kbdev, u32 const reg_addr, -+ u32 const reg_val) ++static void timer_event_worker(struct work_struct *data); ++static void protm_event_worker(struct work_struct *data); ++static void term_normal_suspend_buffer(struct kbase_context *const kctx, ++ struct kbase_normal_suspend_buffer *s_buf); ++ ++/** ++ * create_suspend_buffers - Setup normal and protected mode ++ * suspend buffers. ++ * ++ * @kctx: Address of the kbase context within which the queue group ++ * is to be created. ++ * @group: Pointer to GPU command queue group data. ++ * ++ * Return: 0 if suspend buffers are successfully allocated. Otherwise -ENOMEM. ++ */ ++static int create_suspend_buffers(struct kbase_context *const kctx, ++ struct kbase_queue_group * const group) +{ -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; -+ unsigned long flags; -+ int err; -+ u32 glb_req; ++ if (create_normal_suspend_buffer(kctx, &group->normal_suspend_buf)) { ++ dev_err(kctx->kbdev->dev, "Failed to create normal suspend buffer\n"); ++ return -ENOMEM; ++ } + -+ mutex_lock(&kbdev->csf.reg_lock); -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ /* Protected suspend buffer, runtime binding so just initialize it */ ++ group->protected_suspend_buf.gpu_va = 0; ++ group->protected_suspend_buf.pma = NULL; ++ group->protected_suspend_buf.alloc_retries = 0; + -+ /* Set the address and value to write */ -+ kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr); -+ kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN1, reg_val); ++ return 0; ++} + -+ /* Set the Global Debug request for FW MCU write */ -+ glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); -+ glb_req ^= GLB_DEBUG_REQ_FW_AS_WRITE_MASK; -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req, -+ GLB_DEBUG_REQ_FW_AS_WRITE_MASK); ++/** ++ * generate_group_uid() - Makes an ID unique to all kernel base devices ++ * and contexts, for a queue group and CSG. ++ * ++ * Return: A unique ID in the form of an unsigned 32-bit integer ++ */ ++static u32 generate_group_uid(void) ++{ ++ static atomic_t global_csg_uid = ATOMIC_INIT(0); + -+ set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); ++ return (u32)atomic_inc_return(&global_csg_uid); ++} + -+ /* Notify FW about the Global Debug request */ -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++/** ++ * create_queue_group() - Create a queue group ++ * ++ * @kctx: Address of the kbase context within which the queue group ++ * is to be created. ++ * @create: Address of a structure which contains details of the ++ * queue group which is to be created. ++ * ++ * Return: a queue group handle on success, or a negative error code on failure. ++ */ ++static int create_queue_group(struct kbase_context *const kctx, ++ union kbase_ioctl_cs_queue_group_create *const create) ++{ ++ int group_handle = find_free_group_handle(kctx); + -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ if (group_handle < 0) { ++ dev_dbg(kctx->kbdev->dev, ++ "All queue group handles are already in use"); ++ } else { ++ struct kbase_queue_group * const group = ++ kmalloc(sizeof(struct kbase_queue_group), ++ GFP_KERNEL); + -+ err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); ++ lockdep_assert_held(&kctx->csf.lock); + -+ mutex_unlock(&kbdev->csf.reg_lock); ++ if (!group) { ++ dev_err(kctx->kbdev->dev, "Failed to allocate a queue\n"); ++ group_handle = -ENOMEM; ++ } else { ++ int err = 0; + -+ dev_dbg(kbdev->dev, "w: reg %08x val %08x", reg_addr, reg_val); ++ group->kctx = kctx; ++ group->handle = group_handle; ++ group->csg_nr = KBASEP_CSG_NR_INVALID; + -+ return err; -+} ++ group->tiler_mask = create->in.tiler_mask; ++ group->fragment_mask = create->in.fragment_mask; ++ group->compute_mask = create->in.compute_mask; + -+int kbase_csf_firmware_mcu_register_read(struct kbase_device *const kbdev, u32 const reg_addr, -+ u32 *reg_val) -+{ -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; -+ unsigned long flags; -+ int err; -+ u32 glb_req; ++ group->tiler_max = create->in.tiler_max; ++ group->fragment_max = create->in.fragment_max; ++ group->compute_max = create->in.compute_max; ++ group->csi_handlers = create->in.csi_handlers; ++ group->priority = kbase_csf_priority_queue_group_priority_to_relative( ++ kbase_csf_priority_check(kctx->kbdev, create->in.priority)); ++ group->doorbell_nr = KBASEP_USER_DB_NR_INVALID; ++ group->faulted = false; ++ group->cs_unrecoverable = false; ++ group->reevaluate_idle_status = false; + -+ if (WARN_ON(reg_val == NULL)) -+ return -EINVAL; ++ group->csg_reg = NULL; ++ group->csg_reg_bind_retries = 0; + -+ mutex_lock(&kbdev->csf.reg_lock); -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ group->dvs_buf = create->in.dvs_buf; + -+ /* Set the address to read */ -+ kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr); ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ group->deschedule_deferred_cnt = 0; ++#endif + -+ /* Set the Global Debug request for FW MCU read */ -+ glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); -+ glb_req ^= GLB_DEBUG_REQ_FW_AS_READ_MASK; -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req, -+ GLB_DEBUG_REQ_FW_AS_READ_MASK); ++ group->group_uid = generate_group_uid(); ++ create->out.group_uid = group->group_uid; + -+ set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); ++ INIT_LIST_HEAD(&group->link); ++ INIT_LIST_HEAD(&group->link_to_schedule); ++ INIT_LIST_HEAD(&group->error_fatal.link); ++ INIT_LIST_HEAD(&group->error_timeout.link); ++ INIT_LIST_HEAD(&group->error_tiler_oom.link); ++ INIT_WORK(&group->timer_event_work, timer_event_worker); ++ INIT_WORK(&group->protm_event_work, protm_event_worker); ++ bitmap_zero(group->protm_pending_bitmap, ++ MAX_SUPPORTED_STREAMS_PER_GROUP); + -+ /* Notify FW about the Global Debug request */ -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ group->run_state = KBASE_CSF_GROUP_INACTIVE; ++ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group, ++ group->run_state); + -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ err = create_suspend_buffers(kctx, group); + -+ err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); ++ if (err < 0) { ++ kfree(group); ++ group_handle = err; ++ } else { ++ int j; + -+ if (!err) { -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ *reg_val = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ARG_OUT0); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ kctx->csf.queue_groups[group_handle] = group; ++ for (j = 0; j < MAX_SUPPORTED_STREAMS_PER_GROUP; ++ j++) ++ group->bound_queues[j] = NULL; ++ } ++ } + } + -+ mutex_unlock(&kbdev->csf.reg_lock); ++ return group_handle; ++} + -+ dev_dbg(kbdev->dev, "r: reg %08x val %08x", reg_addr, *reg_val); ++static bool dvs_supported(u32 csf_version) ++{ ++ if (GLB_VERSION_MAJOR_GET(csf_version) < 3) ++ return false; + -+ return err; ++ if (GLB_VERSION_MAJOR_GET(csf_version) == 3) ++ if (GLB_VERSION_MINOR_GET(csf_version) < 2) ++ return false; ++ ++ return true; +} + -+int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 const reg_addr, -+ u32 const val_mask, u32 const reg_val) ++int kbase_csf_queue_group_create(struct kbase_context *const kctx, ++ union kbase_ioctl_cs_queue_group_create *const create) +{ -+ unsigned long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms) + jiffies; -+ u32 read_val; ++ int err = 0; ++ const u32 tiler_count = hweight64(create->in.tiler_mask); ++ const u32 fragment_count = hweight64(create->in.fragment_mask); ++ const u32 compute_count = hweight64(create->in.compute_mask); ++ size_t i; + -+ dev_dbg(kbdev->dev, "p: reg %08x val %08x mask %08x", reg_addr, reg_val, val_mask); ++ for (i = 0; i < sizeof(create->in.padding); i++) { ++ if (create->in.padding[i] != 0) { ++ dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n"); ++ return -EINVAL; ++ } ++ } + -+ while (time_before(jiffies, remaining)) { -+ int err = kbase_csf_firmware_mcu_register_read(kbdev, reg_addr, &read_val); ++ mutex_lock(&kctx->csf.lock); + -+ if (err) { -+ dev_err(kbdev->dev, -+ "Error reading MCU register value (read_val = %u, expect = %u)\n", -+ read_val, reg_val); -+ return err; -+ } ++ if ((create->in.tiler_max > tiler_count) || ++ (create->in.fragment_max > fragment_count) || ++ (create->in.compute_max > compute_count)) { ++ dev_dbg(kctx->kbdev->dev, ++ "Invalid maximum number of endpoints for a queue group"); ++ err = -EINVAL; ++ } else if (create->in.priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT) { ++ dev_dbg(kctx->kbdev->dev, "Invalid queue group priority %u", ++ (unsigned int)create->in.priority); ++ err = -EINVAL; ++ } else if (!iface_has_enough_streams(kctx->kbdev, create->in.cs_min)) { ++ dev_dbg(kctx->kbdev->dev, ++ "No CSG has at least %d CSs", ++ create->in.cs_min); ++ err = -EINVAL; ++ } else if (create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK) { ++ dev_warn(kctx->kbdev->dev, "Unknown exception handler flags set: %u", ++ create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK); ++ err = -EINVAL; ++ } else if (!dvs_supported(kctx->kbdev->csf.global_iface.version) && ++ create->in.dvs_buf) { ++ dev_warn( ++ kctx->kbdev->dev, ++ "GPU does not support DVS but userspace is trying to use it"); ++ err = -EINVAL; ++ } else if (dvs_supported(kctx->kbdev->csf.global_iface.version) && ++ !CSG_DVS_BUF_BUFFER_POINTER_GET(create->in.dvs_buf) && ++ CSG_DVS_BUF_BUFFER_SIZE_GET(create->in.dvs_buf)) { ++ dev_warn(kctx->kbdev->dev, ++ "DVS buffer pointer is null but size is not 0"); ++ err = -EINVAL; ++ } else { ++ /* For the CSG which satisfies the condition for having ++ * the needed number of CSs, check whether it also conforms ++ * with the requirements for at least one of its CSs having ++ * the iterator of the needed type ++ * (note: for CSF v1.0 all CSs in a CSG will have access to ++ * the same iterators) ++ */ ++ const int group_handle = create_queue_group(kctx, create); + -+ if ((read_val & val_mask) == reg_val) -+ return 0; ++ if (group_handle >= 0) ++ create->out.group_handle = group_handle; ++ else ++ err = group_handle; + } + -+ dev_err(kbdev->dev, -+ "Timeout waiting for MCU register value to be set (read_val = %u, expect = %u)\n", -+ read_val, reg_val); ++ mutex_unlock(&kctx->csf.lock); + -+ return -ETIMEDOUT; ++ return err; +} -+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + -+void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev) ++/** ++ * term_normal_suspend_buffer() - Free normal-mode suspend buffer of queue group ++ * ++ * @kctx: Pointer to kbase context where queue group belongs to ++ * @s_buf: Pointer to queue group suspend buffer to be freed ++ */ ++static void term_normal_suspend_buffer(struct kbase_context *const kctx, ++ struct kbase_normal_suspend_buffer *s_buf) +{ -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; -+ const u32 glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); ++ const size_t nr_pages = PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size); + -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); -+ /* The scheduler is assumed to only call the enable when its internal -+ * state indicates that the idle timer has previously been disabled. So -+ * on entry the expected field values are: -+ * 1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0 -+ * 2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0 -+ */ -+ if (glb_req & GLB_REQ_IDLE_ENABLE_MASK) -+ dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!"); ++ lockdep_assert_held(&kctx->csf.lock); + -+ enable_gpu_idle_timer(kbdev); -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ /* The group should not have a bind remaining on any suspend buf region */ ++ WARN_ONCE(s_buf->gpu_va, "Suspend buffer address should be 0 at termination"); ++ ++ kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages, ++ &s_buf->phy[0], false, false); ++ kbase_process_page_usage_dec(kctx, nr_pages); ++ ++ kfree(s_buf->phy); ++ s_buf->phy = NULL; +} + -+void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) ++/** ++ * term_protected_suspend_buffer() - Free protected-mode suspend buffer of ++ * queue group ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @sbuf: Pointer to queue group suspend buffer to be freed ++ */ ++static void term_protected_suspend_buffer(struct kbase_device *const kbdev, ++ struct kbase_protected_suspend_buffer *sbuf) +{ -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ WARN_ONCE(sbuf->gpu_va, "Suspend buf should have been unmapped inside scheduler!"); ++ if (sbuf->pma) { ++ const size_t nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); ++ kbase_csf_protected_memory_free(kbdev, sbuf->pma, nr_pages, true); ++ sbuf->pma = NULL; ++ } ++} + -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group) ++{ ++ struct kbase_context *kctx = group->kctx; + -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, -+ GLB_REQ_REQ_IDLE_DISABLE, -+ GLB_REQ_IDLE_DISABLE_MASK); -+ dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer"); ++ /* Currently each group supports the same number of CS */ ++ u32 max_streams = ++ kctx->kbdev->csf.global_iface.groups[0].stream_num; ++ u32 i; + -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); -+} ++ lockdep_assert_held(&kctx->csf.lock); + -+void kbase_csf_firmware_ping(struct kbase_device *const kbdev) -+{ -+ const struct kbase_csf_global_iface *const global_iface = -+ &kbdev->csf.global_iface; -+ unsigned long flags; ++ WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE && ++ group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED); + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ set_global_request(global_iface, GLB_REQ_PING_MASK); -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ for (i = 0; i < max_streams; i++) { ++ struct kbase_queue *queue = ++ group->bound_queues[i]; ++ ++ /* The group is already being evicted from the scheduler */ ++ if (queue) ++ unbind_stopped_queue(kctx, queue); ++ } ++ ++ term_normal_suspend_buffer(kctx, &group->normal_suspend_buf); ++ if (kctx->kbdev->csf.pma_dev) ++ term_protected_suspend_buffer(kctx->kbdev, ++ &group->protected_suspend_buf); ++ ++ group->run_state = KBASE_CSF_GROUP_TERMINATED; ++ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group, group->run_state); +} + -+int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int wait_timeout_ms) ++/** ++ * term_queue_group - Terminate a GPU command queue group. ++ * ++ * @group: Pointer to GPU command queue group data. ++ * ++ * Terminates a GPU command queue group. From the userspace perspective the ++ * group will still exist but it can't bind new queues to it. Userspace can ++ * still add work in queues bound to the group but it won't be executed. (This ++ * is because the IO mapping created upon binding such queues is still intact.) ++ */ ++static void term_queue_group(struct kbase_queue_group *group) +{ -+ kbase_csf_firmware_ping(kbdev); ++ struct kbase_context *kctx = group->kctx; + -+ return wait_for_global_request_with_timeout(kbdev, GLB_REQ_PING_MASK, wait_timeout_ms); ++ kbase_reset_gpu_assert_failed_or_prevented(kctx->kbdev); ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ /* Stop the group and evict it from the scheduler */ ++ kbase_csf_scheduler_group_deschedule(group); ++ ++ if (group->run_state == KBASE_CSF_GROUP_TERMINATED) ++ return; ++ ++ dev_dbg(kctx->kbdev->dev, "group %d terminating", group->handle); ++ ++ kbase_csf_term_descheduled_queue_group(group); +} + -+int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, -+ u64 const timeout) ++/** ++ * wait_group_deferred_deschedule_completion - Wait for refcount of the group to ++ * become 0 that was taken when the group deschedule had to be deferred. ++ * ++ * @group: Pointer to GPU command queue group that is being deleted. ++ * ++ * This function is called when Userspace deletes the group and after the group ++ * has been descheduled. The function synchronizes with the other threads that were ++ * also trying to deschedule the group whilst the dumping was going on for a fault. ++ * Please refer the documentation of wait_for_dump_complete_on_group_deschedule() ++ * for more details. ++ */ ++static void wait_group_deferred_deschedule_completion(struct kbase_queue_group *group) +{ -+ const struct kbase_csf_global_iface *const global_iface = -+ &kbdev->csf.global_iface; -+ unsigned long flags; -+ int err; ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ struct kbase_context *kctx = group->kctx; + -+ /* The 'reg_lock' is also taken and is held till the update is not -+ * complete, to ensure the update of timeout value by multiple Users -+ * gets serialized. -+ */ -+ mutex_lock(&kbdev->csf.reg_lock); -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ set_timeout_global(global_iface, timeout); -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ lockdep_assert_held(&kctx->csf.lock); + -+ err = wait_for_global_request(kbdev, GLB_REQ_CFG_PROGRESS_TIMER_MASK); -+ mutex_unlock(&kbdev->csf.reg_lock); ++ if (likely(!group->deschedule_deferred_cnt)) ++ return; + -+ return err; ++ mutex_unlock(&kctx->csf.lock); ++ wait_event(kctx->kbdev->csf.event_wait, !group->deschedule_deferred_cnt); ++ mutex_lock(&kctx->csf.lock); ++#endif +} + -+void kbase_csf_enter_protected_mode(struct kbase_device *kbdev) ++static void cancel_queue_group_events(struct kbase_queue_group *group) +{ -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ cancel_work_sync(&group->timer_event_work); ++ cancel_work_sync(&group->protm_event_work); ++} + -+ KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev, kbdev); ++static void remove_pending_group_fatal_error(struct kbase_queue_group *group) ++{ ++ struct kbase_context *kctx = group->kctx; + -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); -+ set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK); -+ dev_dbg(kbdev->dev, "Sending request to enter protected mode"); -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ dev_dbg(kctx->kbdev->dev, ++ "Remove any pending group fatal error from context %pK\n", ++ (void *)group->kctx); ++ ++ kbase_csf_event_remove_error(kctx, &group->error_tiler_oom); ++ kbase_csf_event_remove_error(kctx, &group->error_timeout); ++ kbase_csf_event_remove_error(kctx, &group->error_fatal); +} + -+int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev) ++void kbase_csf_queue_group_terminate(struct kbase_context *kctx, ++ u8 group_handle) +{ ++ struct kbase_queue_group *group; + int err; ++ bool reset_prevented = false; ++ struct kbase_device *const kbdev = kctx->kbdev; + -+ lockdep_assert_held(&kbdev->mmu_hw_mutex); ++ err = kbase_reset_gpu_prevent_and_wait(kbdev); ++ if (err) ++ dev_warn( ++ kbdev->dev, ++ "Unsuccessful GPU reset detected when terminating group %d, attempting to terminate regardless", ++ group_handle); ++ else ++ reset_prevented = true; + -+ err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK); ++ mutex_lock(&kctx->csf.lock); + -+ if (!err) { -+#define WAIT_TIMEOUT 5000 /* 50ms timeout */ -+#define DELAY_TIME_IN_US 10 -+ const int max_iterations = WAIT_TIMEOUT; -+ int loop; ++ group = find_queue_group(kctx, group_handle); + -+ /* Wait for the GPU to actually enter protected mode */ -+ for (loop = 0; loop < max_iterations; loop++) { -+ unsigned long flags; -+ bool pmode_exited; ++ if (group) { ++ kctx->csf.queue_groups[group_handle] = NULL; ++ /* Stop the running of the given group */ ++ term_queue_group(group); ++ mutex_unlock(&kctx->csf.lock); + -+ if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & -+ GPU_STATUS_PROTECTED_MODE_ACTIVE) -+ break; ++ if (reset_prevented) { ++ /* Allow GPU reset before cancelling the group specific ++ * work item to avoid potential deadlock. ++ * Reset prevention isn't needed after group termination. ++ */ ++ kbase_reset_gpu_allow(kbdev); ++ reset_prevented = false; ++ } + -+ /* Check if GPU already exited the protected mode */ -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ pmode_exited = -+ !kbase_csf_scheduler_protected_mode_in_use(kbdev); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+ if (pmode_exited) -+ break; ++ /* Cancel any pending event callbacks. If one is in progress ++ * then this thread waits synchronously for it to complete (which ++ * is why we must unlock the context first). We already ensured ++ * that no more callbacks can be enqueued by terminating the group. ++ */ ++ cancel_queue_group_events(group); + -+ udelay(DELAY_TIME_IN_US); -+ } ++ mutex_lock(&kctx->csf.lock); + -+ if (loop == max_iterations) { -+ dev_err(kbdev->dev, "Timeout for actual pmode entry after PROTM_ENTER ack"); -+ err = -ETIMEDOUT; -+ } -+ } ++ /* Clean up after the termination */ ++ remove_pending_group_fatal_error(group); + -+ if (unlikely(err)) { -+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) -+ kbase_reset_gpu(kbdev); ++ wait_group_deferred_deschedule_completion(group); + } + -+ KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev); ++ mutex_unlock(&kctx->csf.lock); ++ if (reset_prevented) ++ kbase_reset_gpu_allow(kbdev); + -+ return err; ++ kfree(group); +} ++KBASE_EXPORT_TEST_API(kbase_csf_queue_group_terminate); + -+void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) ++#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST ++int kbase_csf_queue_group_suspend(struct kbase_context *kctx, ++ struct kbase_suspend_copy_buffer *sus_buf, ++ u8 group_handle) +{ -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; -+ unsigned long flags; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ int err; ++ struct kbase_queue_group *group; + -+ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT(kbdev, kbase_backend_get_cycle_cnt(kbdev)); ++ err = kbase_reset_gpu_prevent_and_wait(kbdev); ++ if (err) { ++ dev_warn( ++ kbdev->dev, ++ "Unsuccessful GPU reset detected when suspending group %d", ++ group_handle); ++ return err; ++ } ++ mutex_lock(&kctx->csf.lock); + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ /* Validate there are no on-slot groups when sending the -+ * halt request to firmware. -+ */ -+ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev)); -+ set_global_request(global_iface, GLB_REQ_HALT_MASK); -+ dev_dbg(kbdev->dev, "Sending request to HALT MCU"); -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+} ++ group = find_queue_group(kctx, group_handle); ++ if (group) ++ err = kbase_csf_scheduler_group_copy_suspend_buf(group, ++ sus_buf); ++ else ++ err = -EINVAL; + -+void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev) -+{ -+ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING(kbdev, kbase_backend_get_cycle_cnt(kbdev)); ++ mutex_unlock(&kctx->csf.lock); ++ kbase_reset_gpu_allow(kbdev); + -+ /* Trigger the boot of MCU firmware, Use the AUTO mode as -+ * otherwise on fast reset, to exit protected mode, MCU will -+ * not reboot by itself to enter normal mode. -+ */ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO); ++ return err; +} ++#endif + -+#ifdef KBASE_PM_RUNTIME -+void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev) ++void kbase_csf_add_group_fatal_error( ++ struct kbase_queue_group *const group, ++ struct base_gpu_queue_group_error const *const err_payload) +{ -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; -+ unsigned long flags; ++ struct base_csf_notification error; + -+ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP(kbdev, kbase_backend_get_cycle_cnt(kbdev)); ++ if (WARN_ON(!group)) ++ return; + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ set_global_request(global_iface, GLB_REQ_SLEEP_MASK); -+ dev_dbg(kbdev->dev, "Sending sleep request to MCU"); -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+} ++ if (WARN_ON(!err_payload)) ++ return; + -+bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ error = (struct base_csf_notification) { ++ .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, ++ .payload = { ++ .csg_error = { ++ .handle = group->handle, ++ .error = *err_payload ++ } ++ } ++ }; + -+ return (global_request_complete(kbdev, GLB_REQ_SLEEP_MASK) && -+ kbase_csf_firmware_mcu_halted(kbdev)); ++ kbase_csf_event_add_error(group->kctx, &group->error_fatal, &error); +} -+#endif + -+int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev) ++void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev, ++ struct kbase_context *kctx) +{ -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; -+ unsigned long flags; -+ int err = 0; ++ struct list_head evicted_groups; ++ struct kbase_queue_group *group; ++ int i; + -+ /* Ensure GPU is powered-up until we complete config update.*/ -+ kbase_csf_scheduler_pm_active(kbdev); -+ kbase_csf_scheduler_wait_mcu_active(kbdev); ++ INIT_LIST_HEAD(&evicted_groups); + -+ /* The 'reg_lock' is also taken and is held till the update is -+ * complete, to ensure the config update gets serialized. -+ */ -+ mutex_lock(&kbdev->csf.reg_lock); -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ mutex_lock(&kctx->csf.lock); + -+ set_global_request(global_iface, GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); -+ dev_dbg(kbdev->dev, "Sending request for FIRMWARE_CONFIG_UPDATE"); -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ kbase_csf_scheduler_evict_ctx_slots(kbdev, kctx, &evicted_groups); ++ while (!list_empty(&evicted_groups)) { ++ group = list_first_entry(&evicted_groups, ++ struct kbase_queue_group, link); + -+ err = wait_for_global_request(kbdev, -+ GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); -+ mutex_unlock(&kbdev->csf.reg_lock); ++ dev_dbg(kbdev->dev, "Context %d_%d active group %d terminated", ++ kctx->tgid, kctx->id, group->handle); ++ kbase_csf_term_descheduled_queue_group(group); ++ list_del_init(&group->link); ++ } + -+ kbase_csf_scheduler_pm_idle(kbdev); -+ return err; ++ /* Acting on the queue groups that are pending to be terminated. */ ++ for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) { ++ group = kctx->csf.queue_groups[i]; ++ if (group && ++ group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) ++ kbase_csf_term_descheduled_queue_group(group); ++ } ++ ++ mutex_unlock(&kctx->csf.lock); +} + -+/** -+ * copy_grp_and_stm - Copy CS and/or group data -+ * -+ * @iface: Global CSF interface provided by the firmware. -+ * @group_data: Pointer where to store all the group data -+ * (sequentially). -+ * @max_group_num: The maximum number of groups to be read. Can be 0, in -+ * which case group_data is unused. -+ * @stream_data: Pointer where to store all the CS data -+ * (sequentially). -+ * @max_total_stream_num: The maximum number of CSs to be read. -+ * Can be 0, in which case stream_data is unused. -+ * -+ * Return: Total number of CSs, summed across all groups. -+ */ -+static u32 copy_grp_and_stm( -+ const struct kbase_csf_global_iface * const iface, -+ struct basep_cs_group_control * const group_data, -+ u32 max_group_num, -+ struct basep_cs_stream_control * const stream_data, -+ u32 max_total_stream_num) ++int kbase_csf_ctx_init(struct kbase_context *kctx) +{ -+ u32 i, total_stream_num = 0; ++ int err = -ENOMEM; + -+ if (WARN_ON((max_group_num > 0) && !group_data)) -+ max_group_num = 0; ++ INIT_LIST_HEAD(&kctx->csf.queue_list); ++ INIT_LIST_HEAD(&kctx->csf.link); + -+ if (WARN_ON((max_total_stream_num > 0) && !stream_data)) -+ max_total_stream_num = 0; ++ kbase_csf_event_init(kctx); + -+ for (i = 0; i < iface->group_num; i++) { -+ u32 j; ++ /* Mark all the cookies as 'free' */ ++ bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); + -+ if (i < max_group_num) { -+ group_data[i].features = iface->groups[i].features; -+ group_data[i].stream_num = iface->groups[i].stream_num; -+ group_data[i].suspend_size = -+ iface->groups[i].suspend_size; -+ } -+ for (j = 0; j < iface->groups[i].stream_num; j++) { -+ if (total_stream_num < max_total_stream_num) -+ stream_data[total_stream_num].features = -+ iface->groups[i].streams[j].features; -+ total_stream_num++; -+ } -+ } ++ kctx->csf.wq = alloc_workqueue("mali_kbase_csf_wq", ++ WQ_UNBOUND, 1); + -+ return total_stream_num; -+} ++ if (likely(kctx->csf.wq)) { ++ err = kbase_csf_scheduler_context_init(kctx); + -+u32 kbase_csf_firmware_get_glb_iface( -+ struct kbase_device *kbdev, -+ struct basep_cs_group_control *const group_data, -+ u32 const max_group_num, -+ struct basep_cs_stream_control *const stream_data, -+ u32 const max_total_stream_num, u32 *const glb_version, -+ u32 *const features, u32 *const group_num, u32 *const prfcnt_size, -+ u32 *instr_features) -+{ -+ const struct kbase_csf_global_iface * const iface = -+ &kbdev->csf.global_iface; ++ if (likely(!err)) { ++ err = kbase_csf_kcpu_queue_context_init(kctx); + -+ if (WARN_ON(!glb_version) || WARN_ON(!features) || -+ WARN_ON(!group_num) || WARN_ON(!prfcnt_size) || -+ WARN_ON(!instr_features)) -+ return 0; ++ if (likely(!err)) { ++ err = kbase_csf_tiler_heap_context_init(kctx); + -+ *glb_version = iface->version; -+ *features = iface->features; -+ *group_num = iface->group_num; -+ *prfcnt_size = iface->prfcnt_size; -+ *instr_features = iface->instr_features; ++ if (likely(!err)) { ++ mutex_init(&kctx->csf.lock); ++ INIT_WORK(&kctx->csf.pending_submission_work, ++ pending_submission_worker); + -+ return copy_grp_and_stm(iface, group_data, max_group_num, -+ stream_data, max_total_stream_num); -+} ++ err = kbasep_ctx_user_reg_page_mapping_init(kctx); + -+const char *kbase_csf_firmware_get_timeline_metadata( -+ struct kbase_device *kbdev, const char *name, size_t *size) -+{ -+ struct firmware_timeline_metadata *metadata; ++ if (unlikely(err)) ++ kbase_csf_tiler_heap_context_term(kctx); ++ } + -+ list_for_each_entry( -+ metadata, &kbdev->csf.firmware_timeline_metadata, node) { -+ if (!strcmp(metadata->name, name)) { -+ *size = metadata->size; -+ return metadata->data; ++ if (unlikely(err)) ++ kbase_csf_kcpu_queue_context_term(kctx); ++ } ++ ++ if (unlikely(err)) ++ kbase_csf_scheduler_context_term(kctx); + } ++ ++ if (unlikely(err)) ++ destroy_workqueue(kctx->csf.wq); + } + -+ *size = 0; -+ return NULL; ++ return err; +} + -+int kbase_csf_firmware_mcu_shared_mapping_init( -+ struct kbase_device *kbdev, -+ unsigned int num_pages, -+ unsigned long cpu_map_properties, -+ unsigned long gpu_map_properties, -+ struct kbase_csf_mapping *csf_mapping) ++void kbase_csf_ctx_handle_fault(struct kbase_context *kctx, ++ struct kbase_fault *fault) +{ -+ struct tagged_addr *phys; -+ struct kbase_va_region *va_reg; -+ struct page **page_list; -+ void *cpu_addr; -+ int i, ret = 0; -+ pgprot_t cpu_map_prot = PAGE_KERNEL; -+ unsigned long gpu_map_prot; -+ -+ if (cpu_map_properties & PROT_READ) -+ cpu_map_prot = PAGE_KERNEL_RO; ++ int gr; ++ bool reported = false; ++ struct base_gpu_queue_group_error err_payload; ++ int err; ++ struct kbase_device *kbdev; + -+ if (kbdev->system_coherency == COHERENCY_ACE) { -+ gpu_map_prot = -+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); -+ } else { -+ gpu_map_prot = -+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); -+ cpu_map_prot = pgprot_writecombine(cpu_map_prot); -+ } ++ if (WARN_ON(!kctx)) ++ return; + -+ phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); -+ if (!phys) -+ goto out; ++ if (WARN_ON(!fault)) ++ return; + -+ page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL); -+ if (!page_list) -+ goto page_list_alloc_error; ++ kbdev = kctx->kbdev; ++ err = kbase_reset_gpu_try_prevent(kbdev); ++ /* Regardless of whether reset failed or is currently happening, exit ++ * early ++ */ ++ if (err) ++ return; + -+ ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, -+ phys, false, NULL); -+ if (ret <= 0) -+ goto phys_mem_pool_alloc_error; ++ err_payload = (struct base_gpu_queue_group_error) { ++ .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, ++ .payload = { ++ .fatal_group = { ++ .sideband = fault->addr, ++ .status = fault->status, ++ } ++ } ++ }; + -+ for (i = 0; i < num_pages; i++) -+ page_list[i] = as_page(phys[i]); ++ mutex_lock(&kctx->csf.lock); + -+ cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot); -+ if (!cpu_addr) -+ goto vmap_error; ++ for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) { ++ struct kbase_queue_group *const group = ++ kctx->csf.queue_groups[gr]; + -+ va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages, -+ KBASE_REG_ZONE_MCU_SHARED); -+ if (!va_reg) -+ goto va_region_alloc_error; ++ if (group && group->run_state != KBASE_CSF_GROUP_TERMINATED) { ++ term_queue_group(group); ++ kbase_csf_add_group_fatal_error(group, &err_payload); ++ reported = true; ++ } ++ } + -+ mutex_lock(&kbdev->csf.reg_lock); -+ ret = kbase_add_va_region_rbtree(kbdev, va_reg, 0, num_pages, 1); -+ va_reg->flags &= ~KBASE_REG_FREE; -+ if (ret) -+ goto va_region_add_error; -+ mutex_unlock(&kbdev->csf.reg_lock); ++ mutex_unlock(&kctx->csf.lock); + -+ gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR); -+ gpu_map_properties |= gpu_map_prot; ++ if (reported) ++ kbase_event_wakeup(kctx); + -+ ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn, -+ &phys[0], num_pages, gpu_map_properties, -+ KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false); -+ if (ret) -+ goto mmu_insert_pages_error; ++ kbase_reset_gpu_allow(kbdev); ++} + -+ kfree(page_list); -+ csf_mapping->phys = phys; -+ csf_mapping->cpu_addr = cpu_addr; -+ csf_mapping->va_reg = va_reg; -+ csf_mapping->num_pages = num_pages; ++void kbase_csf_ctx_term(struct kbase_context *kctx) ++{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_as *as = NULL; ++ unsigned long flags; ++ u32 i; ++ int err; ++ bool reset_prevented = false; + -+ return 0; ++ /* As the kbase context is terminating, its debugfs sub-directory would ++ * have been removed already and so would be the debugfs file created ++ * for queue groups & kcpu queues, hence no need to explicitly remove ++ * those debugfs files. ++ */ + -+mmu_insert_pages_error: -+ mutex_lock(&kbdev->csf.reg_lock); -+ kbase_remove_va_region(kbdev, va_reg); -+va_region_add_error: -+ kbase_free_alloced_region(va_reg); -+ mutex_unlock(&kbdev->csf.reg_lock); -+va_region_alloc_error: -+ vunmap(cpu_addr); -+vmap_error: -+ kbase_mem_pool_free_pages( -+ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], -+ num_pages, phys, false, false); -+ -+phys_mem_pool_alloc_error: -+ kfree(page_list); -+page_list_alloc_error: -+ kfree(phys); -+out: -+ /* Zero-initialize the mapping to make sure that the termination -+ * function doesn't try to unmap or free random addresses. -+ */ -+ csf_mapping->phys = NULL; -+ csf_mapping->cpu_addr = NULL; -+ csf_mapping->va_reg = NULL; -+ csf_mapping->num_pages = 0; ++ /* Wait for a GPU reset if it is happening, prevent it if not happening */ ++ err = kbase_reset_gpu_prevent_and_wait(kbdev); ++ if (err) ++ dev_warn( ++ kbdev->dev, ++ "Unsuccessful GPU reset detected when terminating csf context (%d_%d), attempting to terminate regardless", ++ kctx->tgid, kctx->id); ++ else ++ reset_prevented = true; + -+ return -ENOMEM; -+} ++ mutex_lock(&kctx->csf.lock); + -+void kbase_csf_firmware_mcu_shared_mapping_term( -+ struct kbase_device *kbdev, struct kbase_csf_mapping *csf_mapping) -+{ -+ if (csf_mapping->va_reg) { -+ mutex_lock(&kbdev->csf.reg_lock); -+ kbase_remove_va_region(kbdev, csf_mapping->va_reg); -+ kbase_free_alloced_region(csf_mapping->va_reg); -+ mutex_unlock(&kbdev->csf.reg_lock); -+ } ++ /* Iterate through the queue groups that were not terminated by ++ * userspace and issue the term request to firmware for them. ++ */ ++ for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) { ++ struct kbase_queue_group *group = kctx->csf.queue_groups[i]; + -+ if (csf_mapping->phys) { -+ kbase_mem_pool_free_pages( -+ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], -+ csf_mapping->num_pages, csf_mapping->phys, false, -+ false); ++ if (group) { ++ remove_pending_group_fatal_error(group); ++ term_queue_group(group); ++ } + } ++ mutex_unlock(&kctx->csf.lock); + -+ vunmap(csf_mapping->cpu_addr); -+ kfree(csf_mapping->phys); -+} -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h -new file mode 100644 -index 000000000..714a14001 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h -@@ -0,0 +1,916 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+#ifndef _KBASE_CSF_FIRMWARE_H_ -+#define _KBASE_CSF_FIRMWARE_H_ -+ -+#include "device/mali_kbase_device.h" -+#include -+ -+/* -+ * PAGE_KERNEL_RO was only defined on 32bit ARM in 4.19 in: -+ * Commit a3266bd49c721e2e0a71f352d83713fbd60caadb -+ * Author: Luis R. Rodriguez -+ * Date: Fri Aug 17 15:46:29 2018 -0700 -+ * -+ * mm: provide a fallback for PAGE_KERNEL_RO for architectures -+ * -+ * Some architectures do not define certain PAGE_KERNEL_* flags, this is -+ * either because: -+ * -+ * a) The way to implement some of these flags is *not yet ported*, or -+ * b) The architecture *has no way* to describe them -+ * -+ * [snip] -+ * -+ * This can be removed once support of 32bit ARM kernels predating 4.19 is no -+ * longer required. -+ */ -+#ifndef PAGE_KERNEL_RO -+#define PAGE_KERNEL_RO PAGE_KERNEL -+#endif ++ if (reset_prevented) ++ kbase_reset_gpu_allow(kbdev); + -+/* Address space number to claim for the firmware. */ -+#define MCU_AS_NR 0 -+#define MCU_AS_BITMASK (1 << MCU_AS_NR) ++ cancel_work_sync(&kctx->csf.pending_submission_work); + -+/* Number of available Doorbells */ -+#define CSF_NUM_DOORBELL ((u8)24) ++ /* Now that all queue groups have been terminated, there can be no ++ * more OoM or timer event interrupts but there can be inflight work ++ * items. Destroying the wq will implicitly flush those work items. ++ */ ++ destroy_workqueue(kctx->csf.wq); + -+/* Offset to the first HW doorbell page */ -+#define CSF_HW_DOORBELL_PAGE_OFFSET ((u32)0x80000) ++ /* Wait for the firmware error work item to also finish as it could ++ * be affecting this outgoing context also. ++ */ ++ flush_work(&kctx->kbdev->csf.fw_error_work); + -+/* Size of HW Doorbell page, used to calculate the offset to subsequent pages */ -+#define CSF_HW_DOORBELL_PAGE_SIZE ((u32)0x10000) ++ /* A work item to handle page_fault/bus_fault/gpu_fault could be ++ * pending for the outgoing context. Flush the workqueue that will ++ * execute that work item. ++ */ ++ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); ++ if (kctx->as_nr != KBASEP_AS_NR_INVALID) ++ as = &kctx->kbdev->as[kctx->as_nr]; ++ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); ++ if (as) ++ flush_workqueue(as->pf_wq); + -+/* Doorbell 0 is used by the driver. */ -+#define CSF_KERNEL_DOORBELL_NR ((u32)0) ++ mutex_lock(&kctx->csf.lock); + -+/* Offset of name inside a trace buffer entry in the firmware image */ -+#define TRACE_BUFFER_ENTRY_NAME_OFFSET (0x1C) ++ for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) { ++ kfree(kctx->csf.queue_groups[i]); ++ kctx->csf.queue_groups[i] = NULL; ++ } + -+/* All implementations of the host interface with major version 0 must comply -+ * with these restrictions: -+ */ -+/* GLB_GROUP_NUM: At least 3 CSGs, but no more than 31 */ -+#define MIN_SUPPORTED_CSGS 3 -+#define MAX_SUPPORTED_CSGS 31 -+/* GROUP_STREAM_NUM: At least 8 CSs per CSG, but no more than 32 */ -+#define MIN_SUPPORTED_STREAMS_PER_GROUP 8 -+/* MAX_SUPPORTED_STREAMS_PER_GROUP: Maximum CSs per csg. */ -+#define MAX_SUPPORTED_STREAMS_PER_GROUP 32 ++ /* Iterate through the queues that were not terminated by ++ * userspace and do the required cleanup for them. ++ */ ++ while (!list_empty(&kctx->csf.queue_list)) { ++ struct kbase_queue *queue; + -+struct kbase_device; ++ queue = list_first_entry(&kctx->csf.queue_list, ++ struct kbase_queue, link); + ++ /* The reference held when the IO mapping was created on bind ++ * would have been dropped otherwise the termination of Kbase ++ * context itself wouldn't have kicked-in. So there shall be ++ * only one reference left that was taken when queue was ++ * registered. ++ */ ++ WARN_ON(kbase_refcount_read(&queue->refcount) != 1); ++ list_del_init(&queue->link); ++ release_queue(queue); ++ } + -+/** -+ * struct kbase_csf_mapping - Memory mapping for CSF memory. -+ * @phys: Physical memory allocation used by the mapping. -+ * @cpu_addr: Starting CPU address for the mapping. -+ * @va_reg: GPU virtual address region for the mapping. -+ * @num_pages: Size of the mapping, in memory pages. -+ */ -+struct kbase_csf_mapping { -+ struct tagged_addr *phys; -+ void *cpu_addr; -+ struct kbase_va_region *va_reg; -+ unsigned int num_pages; -+}; ++ mutex_unlock(&kctx->csf.lock); + -+/** -+ * struct kbase_csf_trace_buffers - List and state of firmware trace buffers. -+ * @list: List of trace buffers descriptors. -+ * @mcu_rw: Metadata for the MCU shared memory mapping used for -+ * GPU-readable,writable/CPU-writable variables. -+ * @mcu_write: Metadata for the MCU shared memory mapping used for -+ * GPU-writable/CPU-readable variables. -+ */ -+struct kbase_csf_trace_buffers { -+ struct list_head list; -+ struct kbase_csf_mapping mcu_rw; -+ struct kbase_csf_mapping mcu_write; -+}; ++ kbasep_ctx_user_reg_page_mapping_term(kctx); ++ kbase_csf_tiler_heap_context_term(kctx); ++ kbase_csf_kcpu_queue_context_term(kctx); ++ kbase_csf_scheduler_context_term(kctx); ++ kbase_csf_event_term(kctx); + -+/** -+ * struct kbase_csf_cmd_stream_info - CSI provided by the firmware. -+ * -+ * @kbdev: Address of the instance of a GPU platform device that implements -+ * this interface. -+ * @features: Bit field of CS features (e.g. which types of jobs -+ * are supported). Bits 7:0 specify the number of work registers(-1). -+ * Bits 11:8 specify the number of scoreboard entries(-1). -+ * @input: Address of CSI input page. -+ * @output: Address of CSI output page. -+ */ -+struct kbase_csf_cmd_stream_info { -+ struct kbase_device *kbdev; -+ u32 features; -+ void *input; -+ void *output; -+}; ++ mutex_destroy(&kctx->csf.lock); ++} + +/** -+ * kbase_csf_firmware_cs_input() - Set a word in a CS's input page ++ * handle_oom_event - Handle the OoM event generated by the firmware for the ++ * CSI. + * -+ * @info: CSI provided by the firmware. -+ * @offset: Offset of the word to be written, in bytes. -+ * @value: Value to be written. -+ */ -+void kbase_csf_firmware_cs_input( -+ const struct kbase_csf_cmd_stream_info *info, u32 offset, u32 value); -+ -+/** -+ * kbase_csf_firmware_cs_input_read() - Read a word in a CS's input page ++ * @group: Pointer to the CSG group the oom-event belongs to. ++ * @stream: Pointer to the structure containing info provided by the firmware ++ * about the CSI. + * -+ * Return: Value of the word read from the CS's input page. ++ * This function will handle the OoM event request from the firmware for the ++ * CS. It will retrieve the address of heap context and heap's ++ * statistics (like number of render passes in-flight) from the CS's kernel ++ * output page and pass them to the tiler heap function to allocate a ++ * new chunk. ++ * It will also update the CS's kernel input page with the address ++ * of a new chunk that was allocated. + * -+ * @info: CSI provided by the firmware. -+ * @offset: Offset of the word to be read, in bytes. ++ * Return: 0 if successfully handled the request, otherwise a negative error ++ * code on failure. + */ -+u32 kbase_csf_firmware_cs_input_read( -+ const struct kbase_csf_cmd_stream_info *const info, const u32 offset); ++static int handle_oom_event(struct kbase_queue_group *const group, ++ struct kbase_csf_cmd_stream_info const *const stream) ++{ ++ struct kbase_context *const kctx = group->kctx; ++ u64 gpu_heap_va = ++ kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_LO) | ++ ((u64)kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_HI) << 32); ++ const u32 vt_start = ++ kbase_csf_firmware_cs_output(stream, CS_HEAP_VT_START); ++ const u32 vt_end = ++ kbase_csf_firmware_cs_output(stream, CS_HEAP_VT_END); ++ const u32 frag_end = ++ kbase_csf_firmware_cs_output(stream, CS_HEAP_FRAG_END); ++ u32 renderpasses_in_flight; ++ u32 pending_frag_count; ++ u64 new_chunk_ptr; ++ int err; ++ bool frag_end_err = false; + -+/** -+ * kbase_csf_firmware_cs_input_mask() - Set part of a word in a CS's input page -+ * -+ * @info: CSI provided by the firmware. -+ * @offset: Offset of the word to be modified, in bytes. -+ * @value: Value to be written. -+ * @mask: Bitmask with the bits to be modified set. -+ */ -+void kbase_csf_firmware_cs_input_mask( -+ const struct kbase_csf_cmd_stream_info *info, u32 offset, -+ u32 value, u32 mask); ++ if ((frag_end > vt_end) || (vt_end >= vt_start)) { ++ frag_end_err = true; ++ dev_dbg(kctx->kbdev->dev, "Invalid Heap statistics provided by firmware: vt_start %d, vt_end %d, frag_end %d\n", ++ vt_start, vt_end, frag_end); ++ } ++ if (frag_end_err) { ++ renderpasses_in_flight = 1; ++ pending_frag_count = 1; ++ } else { ++ renderpasses_in_flight = vt_start - frag_end; ++ pending_frag_count = vt_end - frag_end; ++ } + -+/** -+ * kbase_csf_firmware_cs_output() - Read a word in a CS's output page -+ * -+ * Return: Value of the word read from the CS's output page. -+ * -+ * @info: CSI provided by the firmware. -+ * @offset: Offset of the word to be read, in bytes. -+ */ -+u32 kbase_csf_firmware_cs_output( -+ const struct kbase_csf_cmd_stream_info *info, u32 offset); -+/** -+ * struct kbase_csf_cmd_stream_group_info - CSG interface provided by the -+ * firmware. -+ * -+ * @kbdev: Address of the instance of a GPU platform device that implements -+ * this interface. -+ * @features: Bit mask of features. Reserved bits should be 0, and should -+ * be ignored. -+ * @input: Address of global interface input page. -+ * @output: Address of global interface output page. -+ * @suspend_size: Size in bytes for normal suspend buffer for the CSG -+ * @protm_suspend_size: Size in bytes for protected mode suspend buffer -+ * for the CSG. -+ * @stream_num: Number of CSs in the CSG. -+ * @stream_stride: Stride in bytes in JASID0 virtual address between -+ * CS capability structures. -+ * @streams: Address of an array of CS capability structures. -+ */ -+struct kbase_csf_cmd_stream_group_info { -+ struct kbase_device *kbdev; -+ u32 features; -+ void *input; -+ void *output; -+ u32 suspend_size; -+ u32 protm_suspend_size; -+ u32 stream_num; -+ u32 stream_stride; -+ struct kbase_csf_cmd_stream_info *streams; -+}; ++ err = kbase_csf_tiler_heap_alloc_new_chunk(kctx, ++ gpu_heap_va, renderpasses_in_flight, pending_frag_count, &new_chunk_ptr); + -+/** -+ * kbase_csf_firmware_csg_input() - Set a word in a CSG's input page -+ * -+ * @info: CSG interface provided by the firmware. -+ * @offset: Offset of the word to be written, in bytes. -+ * @value: Value to be written. -+ */ -+void kbase_csf_firmware_csg_input( -+ const struct kbase_csf_cmd_stream_group_info *info, u32 offset, -+ u32 value); ++ if ((group->csi_handlers & BASE_CSF_TILER_OOM_EXCEPTION_FLAG) && ++ (pending_frag_count == 0) && (err == -ENOMEM || err == -EBUSY)) { ++ /* The group allows incremental rendering, trigger it */ ++ new_chunk_ptr = 0; ++ dev_dbg(kctx->kbdev->dev, "Group-%d (slot-%d) enter incremental render\n", ++ group->handle, group->csg_nr); ++ } else if (err == -EBUSY) { ++ /* Acknowledge with a NULL chunk (firmware will then wait for ++ * the fragment jobs to complete and release chunks) ++ */ ++ new_chunk_ptr = 0; ++ } else if (err) ++ return err; + -+/** -+ * kbase_csf_firmware_csg_input_read() - Read a word in a CSG's input page -+ * -+ * Return: Value of the word read from the CSG's input page. -+ * -+ * @info: CSG interface provided by the firmware. -+ * @offset: Offset of the word to be read, in bytes. -+ */ -+u32 kbase_csf_firmware_csg_input_read( -+ const struct kbase_csf_cmd_stream_group_info *info, u32 offset); ++ kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_LO, ++ new_chunk_ptr & 0xFFFFFFFF); ++ kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_HI, ++ new_chunk_ptr >> 32); + -+/** -+ * kbase_csf_firmware_csg_input_mask() - Set part of a word in a CSG's -+ * input page -+ * -+ * @info: CSG interface provided by the firmware. -+ * @offset: Offset of the word to be modified, in bytes. -+ * @value: Value to be written. -+ * @mask: Bitmask with the bits to be modified set. -+ */ -+void kbase_csf_firmware_csg_input_mask( -+ const struct kbase_csf_cmd_stream_group_info *info, u32 offset, -+ u32 value, u32 mask); ++ kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_END_LO, ++ new_chunk_ptr & 0xFFFFFFFF); ++ kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_END_HI, ++ new_chunk_ptr >> 32); + -+/** -+ * kbase_csf_firmware_csg_output()- Read a word in a CSG's output page -+ * -+ * Return: Value of the word read from the CSG's output page. -+ * -+ * @info: CSG interface provided by the firmware. -+ * @offset: Offset of the word to be read, in bytes. -+ */ -+u32 kbase_csf_firmware_csg_output( -+ const struct kbase_csf_cmd_stream_group_info *info, u32 offset); ++ return 0; ++} + +/** -+ * struct kbase_csf_global_iface - Global CSF interface -+ * provided by the firmware. ++ * report_tiler_oom_error - Report a CSG error due to a tiler heap OOM event + * -+ * @kbdev: Address of the instance of a GPU platform device that implements -+ * this interface. -+ * @version: Bits 31:16 hold the major version number and 15:0 hold the minor -+ * version number. A higher minor version is backwards-compatible -+ * with a lower minor version for the same major version. -+ * @features: Bit mask of features (e.g. whether certain types of job can -+ * be suspended). Reserved bits should be 0, and should be ignored. -+ * @input: Address of global interface input page. -+ * @output: Address of global interface output page. -+ * @group_num: Number of CSGs supported. -+ * @group_stride: Stride in bytes in JASID0 virtual address between -+ * CSG capability structures. -+ * @prfcnt_size: Performance counters size. -+ * @instr_features: Instrumentation features. (csf >= 1.1.0) -+ * @groups: Address of an array of CSG capability structures. ++ * @group: Pointer to the GPU command queue group that encountered the error + */ -+struct kbase_csf_global_iface { -+ struct kbase_device *kbdev; -+ u32 version; -+ u32 features; -+ void *input; -+ void *output; -+ u32 group_num; -+ u32 group_stride; -+ u32 prfcnt_size; -+ u32 instr_features; -+ struct kbase_csf_cmd_stream_group_info *groups; -+}; ++static void report_tiler_oom_error(struct kbase_queue_group *group) ++{ ++ struct base_csf_notification const ++ error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, ++ .payload = { ++ .csg_error = { ++ .handle = group->handle, ++ .error = { ++ .error_type = ++ BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM, ++ } } } }; + -+/** -+ * kbase_csf_firmware_global_input() - Set a word in the global input page -+ * -+ * @iface: CSF interface provided by the firmware. -+ * @offset: Offset of the word to be written, in bytes. -+ * @value: Value to be written. -+ */ -+void kbase_csf_firmware_global_input( -+ const struct kbase_csf_global_iface *iface, u32 offset, u32 value); ++ kbase_csf_event_add_error(group->kctx, ++ &group->error_tiler_oom, ++ &error); ++ kbase_event_wakeup(group->kctx); ++} + -+/** -+ * kbase_csf_firmware_global_input_mask() - Set part of a word in the global -+ * input page -+ * -+ * @iface: CSF interface provided by the firmware. -+ * @offset: Offset of the word to be modified, in bytes. -+ * @value: Value to be written. -+ * @mask: Bitmask with the bits to be modified set. -+ */ -+void kbase_csf_firmware_global_input_mask( -+ const struct kbase_csf_global_iface *iface, u32 offset, -+ u32 value, u32 mask); ++static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev) ++{ ++ int err; ++ const unsigned int cache_flush_wait_timeout_ms = 2000; + -+/** -+ * kbase_csf_firmware_global_input_read() - Read a word in a global input page -+ * -+ * Return: Value of the word read from the global input page. -+ * -+ * @info: CSG interface provided by the firmware. -+ * @offset: Offset of the word to be read, in bytes. -+ */ -+u32 kbase_csf_firmware_global_input_read( -+ const struct kbase_csf_global_iface *info, u32 offset); ++ kbase_pm_lock(kbdev); ++ /* With the advent of partial cache flush, dirty cache lines could ++ * be left in the GPU L2 caches by terminating the queue group here ++ * without waiting for proper cache maintenance. A full cache flush ++ * here will prevent these dirty cache lines from being arbitrarily ++ * evicted later and possible causing memory corruption. ++ */ ++ if (kbdev->pm.backend.gpu_powered) { ++ kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC); ++ err = kbase_gpu_wait_cache_clean_timeout(kbdev, cache_flush_wait_timeout_ms); + -+/** -+ * kbase_csf_firmware_global_output() - Read a word in the global output page -+ * -+ * Return: Value of the word read from the global output page. -+ * -+ * @iface: CSF interface provided by the firmware. -+ * @offset: Offset of the word to be read, in bytes. -+ */ -+u32 kbase_csf_firmware_global_output( -+ const struct kbase_csf_global_iface *iface, u32 offset); ++ if (err) { ++ dev_warn( ++ kbdev->dev, ++ "[%llu] Timeout waiting for cache clean to complete after fatal error", ++ kbase_backend_get_cycle_cnt(kbdev)); + -+/** -+ * kbase_csf_ring_doorbell() - Ring the doorbell -+ * -+ * @kbdev: An instance of the GPU platform device -+ * @doorbell_nr: Index of the HW doorbell page -+ */ -+void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr); ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) ++ kbase_reset_gpu(kbdev); ++ } ++ } + -+/** -+ * kbase_csf_read_firmware_memory - Read a value in a GPU address -+ * -+ * @kbdev: Device pointer -+ * @gpu_addr: GPU address to read -+ * @value: output pointer to which the read value will be written. -+ * -+ * This function read a value in a GPU address that belongs to -+ * a private firmware memory region. The function assumes that the location -+ * is not permanently mapped on the CPU address space, therefore it maps it -+ * and then unmaps it to access it independently. -+ */ -+void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, -+ u32 gpu_addr, u32 *value); ++ kbase_pm_unlock(kbdev); ++} + +/** -+ * kbase_csf_update_firmware_memory - Write a value in a GPU address -+ * -+ * @kbdev: Device pointer -+ * @gpu_addr: GPU address to write -+ * @value: Value to write ++ * kbase_queue_oom_event - Handle tiler out-of-memory for a GPU command queue. + * -+ * This function writes a given value in a GPU address that belongs to -+ * a private firmware memory region. The function assumes that the destination -+ * is not permanently mapped on the CPU address space, therefore it maps it -+ * and then unmaps it to access it independently. -+ */ -+void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, -+ u32 gpu_addr, u32 value); -+ -+/** -+ * kbase_csf_read_firmware_memory_exe - Read a value in a GPU address in the -+ * region of its final execution location. ++ * @queue: Pointer to queue for which out-of-memory event was received. + * -+ * @kbdev: Device pointer -+ * @gpu_addr: GPU address to read -+ * @value: Output pointer to which the read value will be written ++ * Called with the CSF locked for the affected GPU virtual address space. ++ * Do not call in interrupt context. + * -+ * This function read a value in a GPU address that belongs to a private loaded -+ * firmware memory region based on its final execution location. The function -+ * assumes that the location is not permanently mapped on the CPU address space, -+ * therefore it maps it and then unmaps it to access it independently. This function -+ * needs to be used when accessing firmware memory regions which will be moved to -+ * their final execution location during firmware boot using an address based on the -+ * final execution location. ++ * Handles tiler out-of-memory for a GPU command queue and then clears the ++ * notification to allow the firmware to report out-of-memory again in future. ++ * If the out-of-memory condition was successfully handled then this function ++ * rings the relevant doorbell to notify the firmware; otherwise, it terminates ++ * the GPU command queue group to which the queue is bound and notify a waiting ++ * user space client of the failure. + */ -+void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev, -+ u32 gpu_addr, u32 *value); ++static void kbase_queue_oom_event(struct kbase_queue *const queue) ++{ ++ struct kbase_context *const kctx = queue->kctx; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ struct kbase_queue_group *group; ++ int slot_num, err; ++ struct kbase_csf_cmd_stream_group_info const *ginfo; ++ struct kbase_csf_cmd_stream_info const *stream; ++ int csi_index = queue->csi_index; ++ u32 cs_oom_ack, cs_oom_req; ++ unsigned long flags; + -+/** -+ * kbase_csf_update_firmware_memory_exe - Write a value in a GPU address in the -+ * region of its final execution location. -+ * -+ * @kbdev: Device pointer -+ * @gpu_addr: GPU address to write -+ * @value: Value to write -+ * -+ * This function writes a value in a GPU address that belongs to a private loaded -+ * firmware memory region based on its final execution location. The function -+ * assumes that the location is not permanently mapped on the CPU address space, -+ * therefore it maps it and then unmaps it to access it independently. This function -+ * needs to be used when accessing firmware memory regions which will be moved to -+ * their final execution location during firmware boot using an address based on the -+ * final execution location. -+ */ -+void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev, -+ u32 gpu_addr, u32 value); ++ lockdep_assert_held(&kctx->csf.lock); + -+/** -+ * kbase_csf_firmware_early_init() - Early initialization for the firmware. -+ * @kbdev: Kbase device -+ * -+ * Initialize resources related to the firmware. Must be called at kbase probe. -+ * -+ * Return: 0 if successful, negative error code on failure -+ */ -+int kbase_csf_firmware_early_init(struct kbase_device *kbdev); ++ group = get_bound_queue_group(queue); ++ if (!group) { ++ dev_warn(kctx->kbdev->dev, "queue not bound\n"); ++ return; ++ } + -+/** -+ * kbase_csf_firmware_early_term() - Terminate resources related to the firmware -+ * after the firmware unload has been done. -+ * -+ * @kbdev: Device pointer -+ * -+ * This should be called only when kbase probe fails or gets rmmoded. -+ */ -+void kbase_csf_firmware_early_term(struct kbase_device *kbdev); ++ kbase_csf_scheduler_lock(kbdev); + -+/** -+ * kbase_csf_firmware_late_init() - Late initialization for the firmware. -+ * @kbdev: Kbase device -+ * -+ * Initialize resources related to the firmware. But must be called after -+ * backend late init is done. Must be used at probe time only. -+ * -+ * Return: 0 if successful, negative error code on failure -+ */ -+int kbase_csf_firmware_late_init(struct kbase_device *kbdev); ++ slot_num = kbase_csf_scheduler_group_get_slot(group); + -+/** -+ * kbase_csf_firmware_load_init() - Load the firmware for the CSF MCU -+ * @kbdev: Kbase device -+ * -+ * Request the firmware from user space and load it into memory. -+ * -+ * Return: 0 if successful, negative error code on failure -+ */ -+int kbase_csf_firmware_load_init(struct kbase_device *kbdev); ++ /* The group could have gone off slot before this work item got ++ * a chance to execute. ++ */ ++ if (slot_num < 0) ++ goto unlock; + -+/** -+ * kbase_csf_firmware_unload_term() - Unload the firmware -+ * @kbdev: Kbase device -+ * -+ * Frees the memory allocated by kbase_csf_firmware_load_init() -+ */ -+void kbase_csf_firmware_unload_term(struct kbase_device *kbdev); ++ /* If the bound group is on slot yet the kctx is marked with disabled ++ * on address-space fault, the group is pending to be killed. So skip ++ * the inflight oom operation. ++ */ ++ if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) ++ goto unlock; + -+#if IS_ENABLED(CONFIG_MALI_CORESIGHT) -+/** -+ * kbase_csf_firmware_mcu_register_write - Write to MCU register -+ * -+ * @kbdev: Instance of a gpu platform device that implements a csf interface. -+ * @reg_addr: Register address to write into -+ * @reg_val: Value to be written -+ * -+ * Write a desired value to a register in MCU address space. -+ * -+ * return: 0 on success, or negative on failure. -+ */ -+int kbase_csf_firmware_mcu_register_write(struct kbase_device *const kbdev, u32 const reg_addr, -+ u32 const reg_val); -+/** -+ * kbase_csf_firmware_mcu_register_read - Read from MCU register -+ * -+ * @kbdev: Instance of a gpu platform device that implements a csf interface. -+ * @reg_addr: Register address to read from -+ * @reg_val: Value as present in reg_addr register -+ * -+ * Read a value from MCU address space. -+ * -+ * return: 0 on success, or negative on failure. -+ */ -+int kbase_csf_firmware_mcu_register_read(struct kbase_device *const kbdev, u32 const reg_addr, -+ u32 *reg_val); ++ ginfo = &kbdev->csf.global_iface.groups[slot_num]; ++ stream = &ginfo->streams[csi_index]; ++ cs_oom_ack = kbase_csf_firmware_cs_output(stream, CS_ACK) & ++ CS_ACK_TILER_OOM_MASK; ++ cs_oom_req = kbase_csf_firmware_cs_input_read(stream, CS_REQ) & ++ CS_REQ_TILER_OOM_MASK; + -+/** -+ * kbase_csf_firmware_mcu_register_poll - Poll MCU register -+ * -+ * @kbdev: Instance of a gpu platform device that implements a csf interface. -+ * @reg_addr: Register address to read from -+ * @val_mask: Value to mask the read value for comparison -+ * @reg_val: Value to be compared against -+ * -+ * Continue to read a value from MCU address space until it matches given mask and value. -+ * -+ * return: 0 on success, or negative on failure. -+ */ -+int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 const reg_addr, -+ u32 const val_mask, u32 const reg_val); -+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ ++ /* The group could have already undergone suspend-resume cycle before ++ * this work item got a chance to execute. On CSG resume the CS_ACK ++ * register is set by firmware to reflect the CS_REQ register, which ++ * implies that all events signaled before suspension are implicitly ++ * acknowledged. ++ * A new OoM event is expected to be generated after resume. ++ */ ++ if (cs_oom_ack == cs_oom_req) ++ goto unlock; + -+/** -+ * kbase_csf_firmware_ping - Send the ping request to firmware. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * -+ * The function sends the ping request to firmware. -+ */ -+void kbase_csf_firmware_ping(struct kbase_device *kbdev); ++ err = handle_oom_event(group, stream); + -+/** -+ * kbase_csf_firmware_ping_wait - Send the ping request to firmware and waits. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @wait_timeout_ms: Timeout to get the acknowledgment for PING request from FW. -+ * -+ * The function sends the ping request to firmware and waits to confirm it is -+ * alive. -+ * -+ * Return: 0 on success, or negative on failure. -+ */ -+int kbase_csf_firmware_ping_wait(struct kbase_device *kbdev, unsigned int wait_timeout_ms); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_oom_ack, ++ CS_REQ_TILER_OOM_MASK); ++ kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); + -+/** -+ * kbase_csf_firmware_set_timeout - Set a hardware endpoint progress timeout. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @timeout: The maximum number of GPU cycles that is allowed to elapse -+ * without forward progress before the driver terminates a GPU -+ * command queue group. -+ * -+ * Configures the progress timeout value used by the firmware to decide -+ * when to report that a task is not making progress on an endpoint. -+ * -+ * Return: 0 on success, or negative on failure. -+ */ -+int kbase_csf_firmware_set_timeout(struct kbase_device *kbdev, u64 timeout); ++ if (unlikely(err)) { ++ dev_warn( ++ kbdev->dev, ++ "Queue group to be terminated, couldn't handle the OoM event\n"); ++ kbase_debug_csf_fault_notify(kbdev, kctx, DF_TILER_OOM); ++ kbase_csf_scheduler_unlock(kbdev); ++ term_queue_group(group); ++ flush_gpu_cache_on_fatal_error(kbdev); ++ report_tiler_oom_error(group); ++ return; ++ } ++unlock: ++ kbase_csf_scheduler_unlock(kbdev); ++} + +/** -+ * kbase_csf_enter_protected_mode - Send the Global request to firmware to -+ * enter protected mode. ++ * oom_event_worker - Tiler out-of-memory handler called from a workqueue. + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @data: Pointer to a work_struct embedded in GPU command queue data. + * -+ * The function must be called with kbdev->csf.scheduler.interrupt_lock held -+ * and it does not wait for the protected mode entry to complete. ++ * Handles a tiler out-of-memory condition for a GPU command queue and then ++ * releases a reference that was added to prevent the queue being destroyed ++ * while this work item was pending on a workqueue. + */ -+void kbase_csf_enter_protected_mode(struct kbase_device *kbdev); ++static void oom_event_worker(struct work_struct *data) ++{ ++ struct kbase_queue *queue = ++ container_of(data, struct kbase_queue, oom_event_work); ++ struct kbase_context *kctx = queue->kctx; ++ struct kbase_device *const kbdev = kctx->kbdev; + -+/** -+ * kbase_csf_wait_protected_mode_enter - Wait for the completion of PROTM_ENTER -+ * Global request sent to firmware. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * -+ * This function needs to be called after kbase_csf_enter_protected_mode() to -+ * wait for the GPU to actually enter protected mode. GPU reset is triggered if -+ * the wait is unsuccessful. -+ * -+ * Return: 0 on success, or negative on failure. -+ */ -+int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev); ++ int err = kbase_reset_gpu_try_prevent(kbdev); + -+static inline bool kbase_csf_firmware_mcu_halted(struct kbase_device *kbdev) -+{ -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ return true; -+#else -+ return (kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)) == -+ MCU_STATUS_HALTED); -+#endif /* CONFIG_MALI_BIFROST_NO_MALI */ -+} ++ /* Regardless of whether reset failed or is currently happening, exit ++ * early ++ */ ++ if (err) ++ return; + -+/** -+ * kbase_csf_firmware_trigger_mcu_halt - Send the Global request to firmware to -+ * halt its operation and bring itself -+ * into a known internal state for warm -+ * boot later. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ */ -+void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev); ++ mutex_lock(&kctx->csf.lock); + -+/** -+ * kbase_csf_firmware_enable_mcu - Send the command to enable MCU -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ */ -+void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev); ++ kbase_queue_oom_event(queue); ++ release_queue(queue); + -+/** -+ * kbase_csf_firmware_disable_mcu - Send the command to disable MCU -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ */ -+void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev); ++ mutex_unlock(&kctx->csf.lock); ++ kbase_reset_gpu_allow(kbdev); ++} + +/** -+ * kbase_csf_firmware_disable_mcu_wait - Wait for the MCU to reach disabled -+ * status. ++ * report_group_timeout_error - Report the timeout error for the group to userspace. + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @group: Pointer to the group for which timeout error occurred + */ -+void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev); ++static void report_group_timeout_error(struct kbase_queue_group *const group) ++{ ++ struct base_csf_notification const ++ error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, ++ .payload = { ++ .csg_error = { ++ .handle = group->handle, ++ .error = { ++ .error_type = ++ BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT, ++ } } } }; + -+#ifdef KBASE_PM_RUNTIME -+/** -+ * kbase_csf_firmware_trigger_mcu_sleep - Send the command to put MCU in sleep -+ * state. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ */ -+void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev); ++ dev_warn(group->kctx->kbdev->dev, ++ "Notify the event notification thread, forward progress timeout (%llu cycles)\n", ++ kbase_csf_timeout_get(group->kctx->kbdev)); ++ ++ kbase_csf_event_add_error(group->kctx, &group->error_timeout, &error); ++ kbase_event_wakeup(group->kctx); ++} + +/** -+ * kbase_csf_firmware_is_mcu_in_sleep - Check if sleep request has completed -+ * and MCU has halted. ++ * timer_event_worker - Handle the progress timeout error for the group + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @data: Pointer to a work_struct embedded in GPU command queue group data. + * -+ * Return: true if sleep request has completed, otherwise false. ++ * Terminate the CSG and report the error to userspace + */ -+bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev); -+#endif ++static void timer_event_worker(struct work_struct *data) ++{ ++ struct kbase_queue_group *const group = ++ container_of(data, struct kbase_queue_group, timer_event_work); ++ struct kbase_context *const kctx = group->kctx; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ bool reset_prevented = false; ++ int err = kbase_reset_gpu_prevent_and_wait(kbdev); + -+/** -+ * kbase_csf_firmware_trigger_reload() - Trigger the reboot of MCU firmware, for -+ * the cold boot case firmware image would -+ * be reloaded from filesystem into memory. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ */ -+void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev); ++ if (err) ++ dev_warn( ++ kbdev->dev, ++ "Unsuccessful GPU reset detected when terminating group %d on progress timeout, attempting to terminate regardless", ++ group->handle); ++ else ++ reset_prevented = true; + -+/** -+ * kbase_csf_firmware_reload_completed - The reboot of MCU firmware has -+ * completed. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ */ -+void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev); ++ mutex_lock(&kctx->csf.lock); + -+/** -+ * kbase_csf_firmware_global_reinit - Send the Global configuration requests -+ * after the reboot of MCU firmware. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @core_mask: Mask of the enabled shader cores. -+ */ -+void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, -+ u64 core_mask); ++ term_queue_group(group); ++ flush_gpu_cache_on_fatal_error(kbdev); ++ report_group_timeout_error(group); ++ ++ mutex_unlock(&kctx->csf.lock); ++ if (reset_prevented) ++ kbase_reset_gpu_allow(kbdev); ++} + +/** -+ * kbase_csf_firmware_global_reinit_complete - Check the Global configuration -+ * requests, sent after the reboot of MCU firmware, have -+ * completed or not. ++ * handle_progress_timer_event - Progress timer timeout event handler. + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @group: Pointer to GPU queue group for which the timeout event is received. + * -+ * Return: true if the Global configuration requests completed otherwise false. ++ * Notify a waiting user space client of the timeout. ++ * Enqueue a work item to terminate the group and notify the event notification ++ * thread of progress timeout fault for the GPU command queue group. + */ -+bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev); ++static void handle_progress_timer_event(struct kbase_queue_group *const group) ++{ ++ kbase_debug_csf_fault_notify(group->kctx->kbdev, group->kctx, ++ DF_PROGRESS_TIMER_TIMEOUT); + -+/** -+ * kbase_csf_firmware_update_core_attr - Send the Global configuration request -+ * to update the requested core attribute -+ * changes. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @update_core_pwroff_timer: If true, signal the firmware needs to update -+ * the MCU power-off timer value. -+ * @update_core_mask: If true, need to do the core_mask update with -+ * the supplied core_mask value. -+ * @core_mask: New core mask value if update_core_mask is true, -+ * otherwise unused. -+ */ -+void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, -+ bool update_core_pwroff_timer, bool update_core_mask, u64 core_mask); ++ queue_work(group->kctx->csf.wq, &group->timer_event_work); ++} + +/** -+ * kbase_csf_firmware_core_attr_updated - Check the Global configuration -+ * request has completed or not, that was sent to update -+ * the core attributes. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * alloc_grp_protected_suspend_buffer_pages() - Allocate physical pages from the protected ++ * memory for the protected mode suspend buffer. ++ * @group: Pointer to the GPU queue group. + * -+ * Return: true if the Global configuration request to update the core -+ * attributes has completed, otherwise false. ++ * Return: 0 if suspend buffer allocation is successful or if its already allocated, otherwise ++ * negative error value. + */ -+bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev); ++static int alloc_grp_protected_suspend_buffer_pages(struct kbase_queue_group *const group) ++{ ++ struct kbase_device *const kbdev = group->kctx->kbdev; ++ struct kbase_context *kctx = group->kctx; ++ struct tagged_addr *phys = NULL; ++ struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf; ++ size_t nr_pages; ++ int err = 0; + -+/** -+ * kbase_csf_firmware_get_glb_iface - Request the global control block of CSF -+ * interface capabilities -+ * -+ * @kbdev: Kbase device. -+ * @group_data: Pointer where to store all the group data -+ * (sequentially). -+ * @max_group_num: The maximum number of groups to be read. -+ * Can be 0, in which case group_data is unused. -+ * @stream_data: Pointer where to store all the CS data -+ * (sequentially). -+ * @max_total_stream_num: The maximum number of CSs to be read. -+ * Can be 0, in which case stream_data is unused. -+ * @glb_version: Where to store the global interface version. -+ * @features: Where to store a bit mask of features (e.g. -+ * whether certain types of job can be suspended). -+ * @group_num: Where to store the number of CSGs -+ * supported. -+ * @prfcnt_size: Where to store the size of CSF performance counters, -+ * in bytes. Bits 31:16 hold the size of firmware -+ * performance counter data and 15:0 hold the size of -+ * hardware performance counter data. -+ * @instr_features: Instrumentation features. Bits 7:4 hold the max size -+ * of events. Bits 3:0 hold the offset update rate. -+ * (csf >= 1,1,0) -+ * -+ * Return: Total number of CSs, summed across all groups. -+ */ -+u32 kbase_csf_firmware_get_glb_iface( -+ struct kbase_device *kbdev, struct basep_cs_group_control *group_data, -+ u32 max_group_num, struct basep_cs_stream_control *stream_data, -+ u32 max_total_stream_num, u32 *glb_version, u32 *features, -+ u32 *group_num, u32 *prfcnt_size, u32 *instr_features); ++ if (likely(sbuf->pma)) ++ return 0; + -+/** -+ * kbase_csf_firmware_get_timeline_metadata - Get CSF firmware header timeline -+ * metadata content -+ * -+ * @kbdev: Kbase device. -+ * @name: Name of the metadata which metadata content to be returned. -+ * @size: Metadata size if specified metadata found. -+ * -+ * Return: The firmware timeline metadata content which match @p name. -+ */ -+const char *kbase_csf_firmware_get_timeline_metadata(struct kbase_device *kbdev, -+ const char *name, size_t *size); ++ nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); ++ phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL); ++ if (unlikely(!phys)) { ++ err = -ENOMEM; ++ goto phys_free; ++ } + -+/** -+ * kbase_csf_firmware_mcu_shared_mapping_init - Allocate and map MCU shared memory. -+ * -+ * @kbdev: Kbase device the memory mapping shall belong to. -+ * @num_pages: Number of memory pages to map. -+ * @cpu_map_properties: Either PROT_READ or PROT_WRITE. -+ * @gpu_map_properties: Either KBASE_REG_GPU_RD or KBASE_REG_GPU_WR. -+ * @csf_mapping: Object where to write metadata for the memory mapping. -+ * -+ * This helper function allocates memory and maps it on both the CPU -+ * and the GPU address spaces. Most of the properties of the mapping -+ * are implicit and will be automatically determined by the function, -+ * e.g. whether memory is cacheable. -+ * -+ * The client is only expected to specify whether the mapping is readable -+ * or writable in the CPU and the GPU address spaces; any other flag -+ * will be ignored by the function. -+ * -+ * Return: 0 if success, or an error code on failure. -+ */ -+int kbase_csf_firmware_mcu_shared_mapping_init( -+ struct kbase_device *kbdev, -+ unsigned int num_pages, -+ unsigned long cpu_map_properties, -+ unsigned long gpu_map_properties, -+ struct kbase_csf_mapping *csf_mapping); ++ mutex_lock(&kctx->csf.lock); ++ kbase_csf_scheduler_lock(kbdev); + -+/** -+ * kbase_csf_firmware_mcu_shared_mapping_term - Unmap and free MCU shared memory. -+ * -+ * @kbdev: Device pointer. -+ * @csf_mapping: Metadata of the memory mapping to terminate. -+ */ -+void kbase_csf_firmware_mcu_shared_mapping_term( -+ struct kbase_device *kbdev, struct kbase_csf_mapping *csf_mapping); ++ if (unlikely(!group->csg_reg)) { ++ /* The only chance of the bound csg_reg is removed from the group is ++ * that it has been put off slot by the scheduler and the csg_reg resource ++ * is contended by other groups. In this case, it needs another occasion for ++ * mapping the pma, which needs a bound csg_reg. Since the group is already ++ * off-slot, returning no error is harmless as the scheduler, when place the ++ * group back on-slot again would do the required MMU map operation on the ++ * allocated and retained pma. ++ */ ++ WARN_ON(group->csg_nr >= 0); ++ dev_dbg(kbdev->dev, "No bound csg_reg for group_%d_%d_%d to enter protected mode", ++ group->kctx->tgid, group->kctx->id, group->handle); ++ goto unlock; ++ } + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+extern bool fw_debug; -+#endif ++ /* Allocate the protected mode pages */ ++ sbuf->pma = kbase_csf_protected_memory_alloc(kbdev, phys, nr_pages, true); ++ if (unlikely(!sbuf->pma)) { ++ err = -ENOMEM; ++ goto unlock; ++ } + -+static inline long kbase_csf_timeout_in_jiffies(const unsigned int msecs) -+{ -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ return (fw_debug ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(msecs)); -+#else -+ return msecs_to_jiffies(msecs); -+#endif ++ /* Map the bound susp_reg to the just allocated pma pages */ ++ err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group); ++ ++unlock: ++ kbase_csf_scheduler_unlock(kbdev); ++ mutex_unlock(&kctx->csf.lock); ++phys_free: ++ kfree(phys); ++ return err; +} + -+/** -+ * kbase_csf_firmware_enable_gpu_idle_timer() - Activate the idle hysteresis -+ * monitoring operation -+ * -+ * @kbdev: Kbase device structure -+ * -+ * Program the firmware interface with its configured hysteresis count value -+ * and enable the firmware to act on it. The Caller is -+ * assumed to hold the kbdev->csf.scheduler.interrupt_lock. -+ */ -+void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev); ++static void report_group_fatal_error(struct kbase_queue_group *const group) ++{ ++ struct base_gpu_queue_group_error const ++ err_payload = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, ++ .payload = { .fatal_group = { ++ .status = GPU_EXCEPTION_TYPE_SW_FAULT_0, ++ } } }; + -+/** -+ * kbase_csf_firmware_disable_gpu_idle_timer() - Disable the idle time -+ * hysteresis monitoring operation -+ * -+ * @kbdev: Kbase device structure -+ * -+ * Program the firmware interface to disable the idle hysteresis timer. The -+ * Caller is assumed to hold the kbdev->csf.scheduler.interrupt_lock. -+ */ -+void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev); ++ kbase_csf_add_group_fatal_error(group, &err_payload); ++ kbase_event_wakeup(group->kctx); ++} + +/** -+ * kbase_csf_firmware_get_gpu_idle_hysteresis_time - Get the firmware GPU idle -+ * detection hysteresis duration ++ * protm_event_worker - Protected mode switch request event handler ++ * called from a workqueue. + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @data: Pointer to a work_struct embedded in GPU command queue group data. + * -+ * Return: the internally recorded hysteresis (nominal) value. ++ * Request to switch to protected mode. + */ -+u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev); ++static void protm_event_worker(struct work_struct *data) ++{ ++ struct kbase_queue_group *const group = ++ container_of(data, struct kbase_queue_group, protm_event_work); ++ struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf; ++ int err = 0; + -+/** -+ * kbase_csf_firmware_set_gpu_idle_hysteresis_time - Set the firmware GPU idle -+ * detection hysteresis duration -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @dur: The duration value (unit: milliseconds) for the configuring -+ * hysteresis field for GPU idle detection -+ * -+ * The supplied value will be recorded internally without any change. But the -+ * actual field value will be subject to hysteresis source frequency scaling -+ * and maximum value limiting. The default source will be SYSTEM_TIMESTAMP -+ * counter. But in case the platform is not able to supply it, the GPU -+ * CYCLE_COUNTER source will be used as an alternative. Bit-31 on the -+ * returned value is the source configuration flag, and it is set to '1' -+ * when CYCLE_COUNTER alternative source is used. -+ * -+ * Return: the actual internally configured hysteresis field value. -+ */ -+u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur); ++ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START, ++ group, 0u); + -+/** -+ * kbase_csf_firmware_get_mcu_core_pwroff_time - Get the MCU shader Core power-off -+ * time value -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * -+ * Return: the internally recorded MCU shader Core power-off (nominal) timeout value. The unit -+ * of the value is in micro-seconds. -+ */ -+u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev); ++ err = alloc_grp_protected_suspend_buffer_pages(group); ++ if (!err) { ++ kbase_csf_scheduler_group_protm_enter(group); ++ } else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) { ++ sbuf->alloc_retries++; ++ /* try again to allocate pages */ ++ queue_work(group->kctx->csf.wq, &group->protm_event_work); ++ } else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) { ++ dev_err(group->kctx->kbdev->dev, ++ "Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d", ++ group->handle, group->kctx->tgid, group->kctx->id); ++ report_group_fatal_error(group); ++ } + -+/** -+ * kbase_csf_firmware_set_mcu_core_pwroff_time - Set the MCU shader Core power-off -+ * time value -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @dur: The duration value (unit: micro-seconds) for configuring MCU -+ * core power-off timer, when the shader cores' power -+ * transitions are delegated to the MCU (normal operational -+ * mode) -+ * -+ * The supplied value will be recorded internally without any change. But the -+ * actual field value will be subject to core power-off timer source frequency -+ * scaling and maximum value limiting. The default source will be -+ * SYSTEM_TIMESTAMP counter. But in case the platform is not able to supply it, -+ * the GPU CYCLE_COUNTER source will be used as an alternative. Bit-31 on the -+ * returned value is the source configuration flag, and it is set to '1' -+ * when CYCLE_COUNTER alternative source is used. -+ * -+ * The configured MCU shader Core power-off timer will only have effect when the host -+ * driver has delegated the shader cores' power management to MCU. -+ * -+ * Return: the actual internal core power-off timer value in register defined -+ * format. -+ */ -+u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur); ++ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, ++ group, 0u); ++} + +/** -+ * kbase_csf_interface_version - Helper function to build the full firmware -+ * interface version in a format compatible with -+ * GLB_VERSION register ++ * handle_fault_event - Handler for CS fault. + * -+ * @major: major version of csf interface -+ * @minor: minor version of csf interface -+ * @patch: patch version of csf interface ++ * @queue: Pointer to queue for which fault event was received. ++ * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for ++ * the queue. + * -+ * Return: firmware interface version ++ * Print required information about the CS fault and notify the user space client ++ * about the fault. + */ -+static inline u32 kbase_csf_interface_version(u32 major, u32 minor, u32 patch) ++static void ++handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack) +{ -+ return ((major << GLB_VERSION_MAJOR_SHIFT) | -+ (minor << GLB_VERSION_MINOR_SHIFT) | -+ (patch << GLB_VERSION_PATCH_SHIFT)); -+} ++ struct kbase_device *const kbdev = queue->kctx->kbdev; ++ struct kbase_csf_cmd_stream_group_info const *ginfo = ++ &kbdev->csf.global_iface.groups[queue->group->csg_nr]; ++ struct kbase_csf_cmd_stream_info const *stream = ++ &ginfo->streams[queue->csi_index]; ++ const u32 cs_fault = kbase_csf_firmware_cs_output(stream, CS_FAULT); ++ const u64 cs_fault_info = ++ kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_LO) | ++ ((u64)kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_HI) ++ << 32); ++ const u8 cs_fault_exception_type = ++ CS_FAULT_EXCEPTION_TYPE_GET(cs_fault); ++ const u32 cs_fault_exception_data = ++ CS_FAULT_EXCEPTION_DATA_GET(cs_fault); ++ const u64 cs_fault_info_exception_data = ++ CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info); + -+/** -+ * kbase_csf_trigger_firmware_config_update - Send a firmware config update. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * -+ * Any changes done to firmware configuration entry or tracebuffer entry -+ * requires a GPU silent reset to reflect the configuration changes -+ * requested, but if Firmware.header.entry.bit(30) is set then we can request a -+ * FIRMWARE_CONFIG_UPDATE rather than doing a silent reset. -+ * -+ * Return: 0 if success, or negative error code on failure. -+ */ -+int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev); ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); + -+/** -+ * kbase_csf_firmware_req_core_dump - Request a firmware core dump -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * -+ * Request a firmware core dump and wait for for firmware to acknowledge. -+ * Firmware will enter infinite loop after the firmware core dump is created. -+ * -+ * Return: 0 if success, or negative error code on failure. -+ */ -+int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev); ++ dev_warn(kbdev->dev, ++ "Ctx %d_%d Group %d CSG %d CSI: %d\n" ++ "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n" ++ "CS_FAULT.EXCEPTION_DATA: 0x%x\n" ++ "CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n", ++ queue->kctx->tgid, queue->kctx->id, queue->group->handle, ++ queue->group->csg_nr, queue->csi_index, ++ cs_fault_exception_type, ++ kbase_gpu_exception_name(cs_fault_exception_type), ++ cs_fault_exception_data, cs_fault_info_exception_data); + -+#endif -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c -new file mode 100644 -index 000000000..c895b0801 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c -@@ -0,0 +1,354 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ + -+#include -+#include -+#include ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ /* CS_RESOURCE_TERMINATED type fault event can be ignored from the ++ * standpoint of dump on error. It is used to report fault for the CSIs ++ * that are associated with the same CSG as the CSI for which the actual ++ * fault was reported by the Iterator. ++ * Dumping would be triggered when the actual fault is reported. ++ * ++ * CS_INHERIT_FAULT can also be ignored. It could happen due to the error ++ * in other types of queues (cpu/kcpu). If a fault had occurred in some ++ * other GPU queue then the dump would have been performed anyways when ++ * that fault was reported. ++ */ ++ if ((cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT) && ++ (cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED)) { ++ if (unlikely(kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) { ++ get_queue(queue); ++ queue->cs_error = cs_fault; ++ queue->cs_error_info = cs_fault_info; ++ queue->cs_error_fatal = false; ++ if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work)) ++ release_queue(queue); ++ return; ++ } ++ } ++#endif + -+#include "mali_kbase_csf_firmware_cfg.h" -+#include "mali_kbase_csf_firmware_log.h" ++ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, ++ CS_REQ_FAULT_MASK); ++ kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, true); ++} + -+#if CONFIG_SYSFS -+#define CSF_FIRMWARE_CFG_SYSFS_DIR_NAME "firmware_config" ++static void report_queue_fatal_error(struct kbase_queue *const queue, ++ u32 cs_fatal, u64 cs_fatal_info, ++ u8 group_handle) ++{ ++ struct base_csf_notification error = { ++ .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, ++ .payload = { ++ .csg_error = { ++ .handle = group_handle, ++ .error = { ++ .error_type = ++ BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, ++ .payload = { ++ .fatal_queue = { ++ .sideband = cs_fatal_info, ++ .status = cs_fatal, ++ .csi_index = queue->csi_index, ++ } ++ } ++ } ++ } ++ } ++ }; + -+#define CSF_FIRMWARE_CFG_LOG_VERBOSITY_ENTRY_NAME "Log verbosity" ++ kbase_csf_event_add_error(queue->kctx, &queue->error, &error); ++ kbase_event_wakeup(queue->kctx); ++} + +/** -+ * struct firmware_config - Configuration item within the MCU firmware ++ * fatal_event_worker - Handle the CS_FATAL/CS_FAULT error for the GPU queue + * -+ * @node: List head linking all options to -+ * kbase_device:csf.firmware_config -+ * @kbdev: Pointer to the Kbase device -+ * @kobj: Kobject corresponding to the sysfs sub-directory, -+ * inside CSF_FIRMWARE_CFG_SYSFS_DIR_NAME directory, -+ * representing the configuration option @name. -+ * @kobj_inited: kobject initialization state -+ * @updatable: Indicates whether config items can be updated with -+ * FIRMWARE_CONFIG_UPDATE -+ * @name: NUL-terminated string naming the option -+ * @address: The address in the firmware image of the configuration option -+ * @min: The lowest legal value of the configuration option -+ * @max: The maximum legal value of the configuration option -+ * @cur_val: The current value of the configuration option ++ * @data: Pointer to a work_struct embedded in GPU command queue. + * -+ * The firmware may expose configuration options. Each option has a name, the -+ * address where the option is controlled and the minimum and maximum values -+ * that the option can take. ++ * Terminate the CSG and report the error to userspace. + */ -+struct firmware_config { -+ struct list_head node; -+ struct kbase_device *kbdev; -+ struct kobject kobj; -+ bool kobj_inited; -+ bool updatable; -+ char *name; -+ u32 address; -+ u32 min; -+ u32 max; -+ u32 cur_val; -+}; ++static void cs_error_worker(struct work_struct *const data) ++{ ++ struct kbase_queue *const queue = ++ container_of(data, struct kbase_queue, cs_error_work); ++ struct kbase_context *const kctx = queue->kctx; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ struct kbase_queue_group *group; ++ u8 group_handle; ++ bool reset_prevented = false; ++ int err; + -+#define FW_CFG_ATTR(_name, _mode) \ -+ struct attribute fw_cfg_attr_##_name = { \ -+ .name = __stringify(_name), \ -+ .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \ ++ kbase_debug_csf_fault_wait_completion(kbdev); ++ err = kbase_reset_gpu_prevent_and_wait(kbdev); ++ ++ if (err) ++ dev_warn( ++ kbdev->dev, ++ "Unsuccessful GPU reset detected when terminating group to handle fatal event, attempting to terminate regardless"); ++ else ++ reset_prevented = true; ++ ++ mutex_lock(&kctx->csf.lock); ++ ++ group = get_bound_queue_group(queue); ++ if (!group) { ++ dev_warn(kbdev->dev, "queue not bound when handling fatal event"); ++ goto unlock; + } + -+static FW_CFG_ATTR(min, 0444); -+static FW_CFG_ATTR(max, 0444); -+static FW_CFG_ATTR(cur, 0644); ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ if (!queue->cs_error_fatal) { ++ unsigned long flags; ++ int slot_num; + -+static void fw_cfg_kobj_release(struct kobject *kobj) -+{ -+ struct firmware_config *config = -+ container_of(kobj, struct firmware_config, kobj); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ slot_num = kbase_csf_scheduler_group_get_slot_locked(group); ++ if (slot_num >= 0) { ++ struct kbase_csf_cmd_stream_group_info const *ginfo = ++ &kbdev->csf.global_iface.groups[slot_num]; ++ struct kbase_csf_cmd_stream_info const *stream = ++ &ginfo->streams[queue->csi_index]; ++ u32 const cs_ack = ++ kbase_csf_firmware_cs_output(stream, CS_ACK); + -+ kfree(config); ++ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, ++ CS_REQ_FAULT_MASK); ++ kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, ++ slot_num, true); ++ } ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ goto unlock; ++ } ++#endif ++ ++ group_handle = group->handle; ++ term_queue_group(group); ++ flush_gpu_cache_on_fatal_error(kbdev); ++ report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info, ++ group_handle); ++ ++unlock: ++ release_queue(queue); ++ mutex_unlock(&kctx->csf.lock); ++ if (reset_prevented) ++ kbase_reset_gpu_allow(kbdev); +} + -+static ssize_t show_fw_cfg(struct kobject *kobj, -+ struct attribute *attr, char *buf) ++/** ++ * handle_fatal_event - Handler for CS fatal. ++ * ++ * @queue: Pointer to queue for which fatal event was received. ++ * @stream: Pointer to the structure containing info provided by the ++ * firmware about the CSI. ++ * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for ++ * the queue. ++ * ++ * Notify a waiting user space client of the CS fatal and prints meaningful ++ * information. ++ * Enqueue a work item to terminate the group and report the fatal error ++ * to user space. ++ */ ++static void ++handle_fatal_event(struct kbase_queue *const queue, ++ struct kbase_csf_cmd_stream_info const *const stream, ++ u32 cs_ack) +{ -+ struct firmware_config *config = -+ container_of(kobj, struct firmware_config, kobj); -+ struct kbase_device *kbdev = config->kbdev; -+ u32 val = 0; ++ const u32 cs_fatal = kbase_csf_firmware_cs_output(stream, CS_FATAL); ++ const u64 cs_fatal_info = ++ kbase_csf_firmware_cs_output(stream, CS_FATAL_INFO_LO) | ++ ((u64)kbase_csf_firmware_cs_output(stream, CS_FATAL_INFO_HI) ++ << 32); ++ const u32 cs_fatal_exception_type = ++ CS_FATAL_EXCEPTION_TYPE_GET(cs_fatal); ++ const u32 cs_fatal_exception_data = ++ CS_FATAL_EXCEPTION_DATA_GET(cs_fatal); ++ const u64 cs_fatal_info_exception_data = ++ CS_FATAL_INFO_EXCEPTION_DATA_GET(cs_fatal_info); ++ struct kbase_device *const kbdev = queue->kctx->kbdev; + -+ if (!kbdev) -+ return -ENODEV; ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); + -+ if (attr == &fw_cfg_attr_max) -+ val = config->max; -+ else if (attr == &fw_cfg_attr_min) -+ val = config->min; -+ else if (attr == &fw_cfg_attr_cur) { -+ unsigned long flags; ++ dev_warn(kbdev->dev, ++ "Ctx %d_%d Group %d CSG %d CSI: %d\n" ++ "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n" ++ "CS_FATAL.EXCEPTION_DATA: 0x%x\n" ++ "CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n", ++ queue->kctx->tgid, queue->kctx->id, queue->group->handle, ++ queue->group->csg_nr, queue->csi_index, ++ cs_fatal_exception_type, ++ kbase_gpu_exception_name(cs_fatal_exception_type), ++ cs_fatal_exception_data, cs_fatal_info_exception_data); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ val = config->cur_val; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (cs_fatal_exception_type == ++ CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) { ++ kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_FW_INTERNAL_ERROR); ++ queue_work(system_wq, &kbdev->csf.fw_error_work); + } else { -+ dev_warn(kbdev->dev, -+ "Unexpected read from entry %s/%s", -+ config->name, attr->name); -+ return -EINVAL; ++ kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FATAL); ++ if (cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE) { ++ queue->group->cs_unrecoverable = true; ++ if (kbase_prepare_to_reset_gpu(queue->kctx->kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(queue->kctx->kbdev); ++ } ++ get_queue(queue); ++ queue->cs_error = cs_fatal; ++ queue->cs_error_info = cs_fatal_info; ++ queue->cs_error_fatal = true; ++ if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work)) ++ release_queue(queue); + } + -+ return snprintf(buf, PAGE_SIZE, "%u\n", val); ++ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, ++ CS_REQ_FATAL_MASK); ++ +} + -+static ssize_t store_fw_cfg(struct kobject *kobj, -+ struct attribute *attr, -+ const char *buf, -+ size_t count) ++/** ++ * process_cs_interrupts - Process interrupts for a CS. ++ * ++ * @group: Pointer to GPU command queue group data. ++ * @ginfo: The CSG interface provided by the firmware. ++ * @irqreq: CSG's IRQ request bitmask (one bit per CS). ++ * @irqack: CSG's IRQ acknowledge bitmask (one bit per CS). ++ * @track: Pointer that tracks the highest scanout priority idle CSG ++ * and any newly potentially viable protected mode requesting ++ * CSG in current IRQ context. ++ * ++ * If the interrupt request bitmask differs from the acknowledge bitmask ++ * then the firmware is notifying the host of an event concerning those ++ * CSs indicated by bits whose value differs. The actions required ++ * are then determined by examining which notification flags differ between ++ * the request and acknowledge registers for the individual CS(s). ++ */ ++static void process_cs_interrupts(struct kbase_queue_group *const group, ++ struct kbase_csf_cmd_stream_group_info const *const ginfo, ++ u32 const irqreq, u32 const irqack, ++ struct irq_idle_and_protm_track *track) +{ -+ struct firmware_config *config = -+ container_of(kobj, struct firmware_config, kobj); -+ struct kbase_device *kbdev = config->kbdev; ++ struct kbase_device *const kbdev = group->kctx->kbdev; ++ u32 remaining = irqreq ^ irqack; ++ bool protm_pend = false; ++ const bool group_suspending = ++ !kbase_csf_scheduler_group_events_enabled(kbdev, group); + -+ if (!kbdev) -+ return -ENODEV; ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); + -+ if (attr == &fw_cfg_attr_cur) { -+ unsigned long flags; -+ u32 val, cur_val; -+ int ret = kstrtouint(buf, 0, &val); ++ while (remaining != 0) { ++ int const i = ffs(remaining) - 1; ++ struct kbase_queue *const queue = group->bound_queues[i]; + -+ if (ret) { -+ dev_err(kbdev->dev, -+ "Couldn't process %s/%s write operation.\n" -+ "Use format \n", -+ config->name, attr->name); -+ return -EINVAL; -+ } ++ remaining &= ~(1 << i); + -+ if ((val < config->min) || (val > config->max)) -+ return -EINVAL; ++ /* The queue pointer can be NULL, but if it isn't NULL then it ++ * cannot disappear since scheduler spinlock is held and before ++ * freeing a bound queue it has to be first unbound which ++ * requires scheduler spinlock. ++ */ ++ if (queue && !WARN_ON(queue->csi_index != i)) { ++ struct kbase_csf_cmd_stream_info const *const stream = ++ &ginfo->streams[i]; ++ u32 const cs_req = kbase_csf_firmware_cs_input_read( ++ stream, CS_REQ); ++ u32 const cs_ack = ++ kbase_csf_firmware_cs_output(stream, CS_ACK); ++ struct workqueue_struct *wq = group->kctx->csf.wq; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) { ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT, ++ group, queue, cs_req ^ cs_ack); ++ handle_fatal_event(queue, stream, cs_ack); ++ } + -+ cur_val = config->cur_val; -+ if (cur_val == val) { -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ return count; -+ } ++ if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) { ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT, ++ group, queue, cs_req ^ cs_ack); ++ handle_fault_event(queue, cs_ack); ++ } + -+ /* If configuration update cannot be performed with -+ * FIRMWARE_CONFIG_UPDATE then we need to do a -+ * silent reset before we update the memory. -+ */ -+ if (!config->updatable) { -+ /* -+ * If there is already a GPU reset pending then inform -+ * the User to retry the write. ++ /* PROTM_PEND and TILER_OOM can be safely ignored ++ * because they will be raised again if the group ++ * is assigned a CSG slot in future. + */ -+ if (kbase_reset_gpu_silent(kbdev)) { -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, -+ flags); -+ return -EAGAIN; ++ if (group_suspending) { ++ u32 const cs_req_remain = cs_req & ~CS_REQ_EXCEPTION_MASK; ++ u32 const cs_ack_remain = cs_ack & ~CS_ACK_EXCEPTION_MASK; ++ ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, ++ CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED, ++ group, queue, ++ cs_req_remain ^ cs_ack_remain); ++ continue; + } -+ } + -+ /* -+ * GPU reset request has been placed, now update the -+ * firmware image. GPU reset will take place only after -+ * hwaccess_lock is released. -+ * Update made to firmware image in memory would not -+ * be lost on GPU reset as configuration entries reside -+ * in the RONLY section of firmware image, which is not -+ * reloaded on firmware reboot due to GPU reset. -+ */ -+ kbase_csf_update_firmware_memory( -+ kbdev, config->address, val); ++ if (((cs_req & CS_REQ_TILER_OOM_MASK) ^ ++ (cs_ack & CS_ACK_TILER_OOM_MASK))) { ++ get_queue(queue); ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM, ++ group, queue, cs_req ^ cs_ack); ++ if (!queue_work(wq, &queue->oom_event_work)) { ++ /* The work item shall not have been ++ * already queued, there can be only ++ * one pending OoM event for a ++ * queue. ++ */ ++ dev_warn( ++ kbdev->dev, ++ "Tiler OOM work pending: queue %d group %d (ctx %d_%d)", ++ queue->csi_index, group->handle, queue->kctx->tgid, ++ queue->kctx->id); ++ release_queue(queue); ++ } ++ } + -+ config->cur_val = val; ++ if ((cs_req & CS_REQ_PROTM_PEND_MASK) ^ ++ (cs_ack & CS_ACK_PROTM_PEND_MASK)) { ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_PROTM_PEND, ++ group, queue, cs_req ^ cs_ack); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ dev_dbg(kbdev->dev, ++ "Protected mode entry request for queue on csi %d bound to group-%d on slot %d", ++ queue->csi_index, group->handle, ++ group->csg_nr); + -+ /* Enable FW logging only if Log verbosity is non-zero */ -+ if (!strcmp(config->name, CSF_FIRMWARE_CFG_LOG_VERBOSITY_ENTRY_NAME) && -+ (!cur_val || !val)) { -+ ret = kbase_csf_firmware_log_toggle_logging_calls(kbdev, val); -+ if (ret) { -+ /* Undo FW configuration changes */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ config->cur_val = cur_val; -+ kbase_csf_update_firmware_memory(kbdev, config->address, cur_val); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ return ret; ++ bitmap_set(group->protm_pending_bitmap, i, 1); ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_SET, group, queue, ++ group->protm_pending_bitmap[0]); ++ protm_pend = true; + } + } ++ } + -+ /* If we can update the config without firmware reset then -+ * we need to just trigger FIRMWARE_CONFIG_UPDATE. -+ */ -+ if (config->updatable) { -+ ret = kbase_csf_trigger_firmware_config_update(kbdev); -+ if (ret) -+ return ret; ++ if (protm_pend) { ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ ++ if (scheduler->tick_protm_pending_seq > group->scan_seq_num) { ++ scheduler->tick_protm_pending_seq = group->scan_seq_num; ++ track->protm_grp = group; + } + -+ /* Wait for the config update to take effect */ -+ if (!config->updatable) -+ kbase_reset_gpu_wait(kbdev); -+ } else { -+ dev_warn(kbdev->dev, -+ "Unexpected write to entry %s/%s", -+ config->name, attr->name); -+ return -EINVAL; -+ } ++ if (!group->protected_suspend_buf.pma) ++ queue_work(group->kctx->csf.wq, &group->protm_event_work); + -+ return count; ++ if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) { ++ clear_bit(group->csg_nr, ++ scheduler->csg_slots_idle_mask); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, ++ scheduler->csg_slots_idle_mask[0]); ++ dev_dbg(kbdev->dev, ++ "Group-%d on slot %d de-idled by protm request", ++ group->handle, group->csg_nr); ++ } ++ } +} + -+static const struct sysfs_ops fw_cfg_ops = { -+ .show = &show_fw_cfg, -+ .store = &store_fw_cfg, -+}; ++/** ++ * process_csg_interrupts - Process interrupts for a CSG. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @csg_nr: CSG number. ++ * @track: Pointer that tracks the highest idle CSG and the newly possible viable ++ * protected mode requesting group, in current IRQ context. ++ * ++ * Handles interrupts for a CSG and for CSs within it. ++ * ++ * If the CSG's request register value differs from its acknowledge register ++ * then the firmware is notifying the host of an event concerning the whole ++ * group. The actions required are then determined by examining which ++ * notification flags differ between those two register values. ++ * ++ * See process_cs_interrupts() for details of per-stream interrupt handling. ++ */ ++static void process_csg_interrupts(struct kbase_device *const kbdev, int const csg_nr, ++ struct irq_idle_and_protm_track *track) ++{ ++ struct kbase_csf_cmd_stream_group_info *ginfo; ++ struct kbase_queue_group *group = NULL; ++ u32 req, ack, irqreq, irqack; + -+static struct attribute *fw_cfg_attrs[] = { -+ &fw_cfg_attr_min, -+ &fw_cfg_attr_max, -+ &fw_cfg_attr_cur, -+ NULL, -+}; -+#if (KERNEL_VERSION(5, 2, 0) <= LINUX_VERSION_CODE) -+ATTRIBUTE_GROUPS(fw_cfg); -+#endif ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); + -+static struct kobj_type fw_cfg_kobj_type = { -+ .release = &fw_cfg_kobj_release, -+ .sysfs_ops = &fw_cfg_ops, -+#if (KERNEL_VERSION(5, 2, 0) <= LINUX_VERSION_CODE) -+ .default_groups = fw_cfg_groups, -+#else -+ .default_attrs = fw_cfg_attrs, -+#endif -+}; ++ if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num)) ++ return; + -+int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev) -+{ -+ struct firmware_config *config; ++ ginfo = &kbdev->csf.global_iface.groups[csg_nr]; ++ req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ); ++ ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); ++ irqreq = kbase_csf_firmware_csg_output(ginfo, CSG_IRQ_REQ); ++ irqack = kbase_csf_firmware_csg_input_read(ginfo, CSG_IRQ_ACK); + -+ kbdev->csf.fw_cfg_kobj = kobject_create_and_add( -+ CSF_FIRMWARE_CFG_SYSFS_DIR_NAME, &kbdev->dev->kobj); -+ if (!kbdev->csf.fw_cfg_kobj) { -+ kobject_put(kbdev->csf.fw_cfg_kobj); -+ dev_err(kbdev->dev, -+ "Creation of %s sysfs sub-directory failed\n", -+ CSF_FIRMWARE_CFG_SYSFS_DIR_NAME); -+ return -ENOMEM; -+ } ++ /* There may not be any pending CSG/CS interrupts to process */ ++ if ((req == ack) && (irqreq == irqack)) ++ return; + -+ list_for_each_entry(config, &kbdev->csf.firmware_config, node) { -+ int err; ++ /* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before ++ * examining the CS_ACK & CS_REQ bits. This would ensure that Host ++ * doesn't misses an interrupt for the CS in the race scenario where ++ * whilst Host is servicing an interrupt for the CS, firmware sends ++ * another interrupt for that CS. ++ */ ++ kbase_csf_firmware_csg_input(ginfo, CSG_IRQ_ACK, irqreq); + -+ kbase_csf_read_firmware_memory(kbdev, config->address, -+ &config->cur_val); ++ group = kbase_csf_scheduler_get_group_on_slot(kbdev, csg_nr); + -+ err = kobject_init_and_add(&config->kobj, &fw_cfg_kobj_type, -+ kbdev->csf.fw_cfg_kobj, "%s", config->name); -+ if (err) { -+ kobject_put(&config->kobj); -+ dev_err(kbdev->dev, -+ "Creation of %s sysfs sub-directory failed\n", -+ config->name); -+ return err; -+ } ++ /* The group pointer can be NULL here if interrupts for the group ++ * (like SYNC_UPDATE, IDLE notification) were delayed and arrived ++ * just after the suspension of group completed. However if not NULL ++ * then the group pointer cannot disappear even if User tries to ++ * terminate the group whilst this loop is running as scheduler ++ * spinlock is held and for freeing a group that is resident on a CSG ++ * slot scheduler spinlock is required. ++ */ ++ if (!group) ++ return; + -+ config->kobj_inited = true; -+ } ++ if (WARN_ON(kbase_csf_scheduler_group_get_slot_locked(group) != csg_nr)) ++ return; + -+ return 0; -+} ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr); + -+void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev) -+{ -+ while (!list_empty(&kbdev->csf.firmware_config)) { -+ struct firmware_config *config; ++ if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) { ++ kbase_csf_firmware_csg_input_mask(ginfo, ++ CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK); + -+ config = list_first_entry(&kbdev->csf.firmware_config, -+ struct firmware_config, node); -+ list_del(&config->node); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_SYNC_UPDATE, group, req ^ ack); + -+ if (config->kobj_inited) { -+ kobject_del(&config->kobj); -+ kobject_put(&config->kobj); -+ } else -+ kfree(config); ++ /* SYNC_UPDATE events shall invalidate GPU idle event */ ++ atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true); ++ ++ kbase_csf_event_signal_cpu_only(group->kctx); + } + -+ kobject_del(kbdev->csf.fw_cfg_kobj); -+ kobject_put(kbdev->csf.fw_cfg_kobj); -+} ++ if ((req ^ ack) & CSG_REQ_IDLE_MASK) { ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + -+int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, -+ const struct kbase_csf_mcu_fw *const fw, -+ const u32 *entry, unsigned int size, bool updatable) -+{ -+ const char *name = (char *)&entry[3]; -+ struct firmware_config *config; -+ const unsigned int name_len = size - CONFIGURATION_ENTRY_NAME_OFFSET; ++ KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE( ++ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, csg_nr); + -+ /* Allocate enough space for struct firmware_config and the -+ * configuration option name (with NULL termination) -+ */ -+ config = kzalloc(sizeof(*config) + name_len + 1, GFP_KERNEL); ++ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, ++ CSG_REQ_IDLE_MASK); + -+ if (!config) -+ return -ENOMEM; ++ set_bit(csg_nr, scheduler->csg_slots_idle_mask); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, group, ++ scheduler->csg_slots_idle_mask[0]); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_IDLE, group, req ^ ack); ++ dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n", ++ group->handle, csg_nr); + -+ config->kbdev = kbdev; -+ config->updatable = updatable; -+ config->name = (char *)(config+1); -+ config->address = entry[0]; -+ config->min = entry[1]; -+ config->max = entry[2]; ++ if (atomic_read(&scheduler->non_idle_offslot_grps)) { ++ /* If there are non-idle CSGs waiting for a slot, fire ++ * a tock for a replacement. ++ */ ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NON_IDLE_GROUPS, ++ group, req ^ ack); ++ kbase_csf_scheduler_invoke_tock(kbdev); ++ } else { ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NO_NON_IDLE_GROUPS, ++ group, req ^ ack); ++ } + -+ memcpy(config->name, name, name_len); -+ config->name[name_len] = 0; ++ if (group->scan_seq_num < track->idle_seq) { ++ track->idle_seq = group->scan_seq_num; ++ track->idle_slot = csg_nr; ++ } ++ } + -+ list_add(&config->node, &kbdev->csf.firmware_config); ++ if ((req ^ ack) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK) { ++ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, ++ CSG_REQ_PROGRESS_TIMER_EVENT_MASK); + -+ dev_dbg(kbdev->dev, "Configuration option '%s' at 0x%x range %u-%u", -+ config->name, config->address, -+ config->min, config->max); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT, group, ++ req ^ ack); ++ dev_info( ++ kbdev->dev, ++ "[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %d\n", ++ kbase_backend_get_cycle_cnt(kbdev), group->handle, group->kctx->tgid, ++ group->kctx->id, csg_nr); + -+ return 0; -+} -+#else -+int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev) -+{ -+ return 0; -+} ++ handle_progress_timer_event(group); ++ } + -+void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev) -+{ -+ /* !CONFIG_SYSFS: Nothing to do here */ -+} ++ process_cs_interrupts(group, ginfo, irqreq, irqack, track); + -+int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, -+ const struct kbase_csf_mcu_fw *const fw, -+ const u32 *entry, unsigned int size) -+{ -+ return 0; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_END, group, ++ ((u64)req ^ ack) | (((u64)irqreq ^ irqack) << 32)); +} -+#endif /* CONFIG_SYSFS */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.h -new file mode 100644 -index 000000000..b227cf158 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.h -@@ -0,0 +1,72 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ ++/** ++ * process_prfcnt_interrupts - Process performance counter interrupts. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @glb_req: Global request register value. ++ * @glb_ack: Global acknowledge register value. + * ++ * Handles interrupts issued by the firmware that relate to the performance ++ * counters. For example, on completion of a performance counter sample. It is ++ * expected that the scheduler spinlock is already held on calling this ++ * function. + */ ++static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, ++ u32 glb_ack) ++{ ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; + -+#ifndef _KBASE_CSF_FIRMWARE_CFG_H_ -+#define _KBASE_CSF_FIRMWARE_CFG_H_ ++ lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); + -+#include -+#include "mali_kbase_csf_firmware.h" -+#include ++ /* Process PRFCNT_SAMPLE interrupt. */ ++ if (kbdev->csf.hwcnt.request_pending && ++ ((glb_req & GLB_REQ_PRFCNT_SAMPLE_MASK) == ++ (glb_ack & GLB_REQ_PRFCNT_SAMPLE_MASK))) { ++ kbdev->csf.hwcnt.request_pending = false; + -+#define CONFIGURATION_ENTRY_NAME_OFFSET (0xC) ++ dev_dbg(kbdev->dev, "PRFCNT_SAMPLE done interrupt received."); + -+/** -+ * kbase_csf_firmware_cfg_init - Create the sysfs directory for configuration -+ * options present in firmware image. -+ * -+ * @kbdev: Pointer to the Kbase device -+ * -+ * This function would create a sysfs directory and populate it with a -+ * sub-directory, that would contain a file per attribute, for every -+ * configuration option parsed from firmware image. -+ * -+ * Return: The initialization error code. -+ */ -+int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev); ++ kbase_hwcnt_backend_csf_on_prfcnt_sample( ++ &kbdev->hwcnt_gpu_iface); ++ } ++ ++ /* Process PRFCNT_ENABLE interrupt. */ ++ if (kbdev->csf.hwcnt.enable_pending && ++ ((glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) == ++ (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK))) { ++ kbdev->csf.hwcnt.enable_pending = false; ++ ++ dev_dbg(kbdev->dev, ++ "PRFCNT_ENABLE status changed interrupt received."); ++ ++ if (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK) ++ kbase_hwcnt_backend_csf_on_prfcnt_enable( ++ &kbdev->hwcnt_gpu_iface); ++ else ++ kbase_hwcnt_backend_csf_on_prfcnt_disable( ++ &kbdev->hwcnt_gpu_iface); ++ } ++ ++ /* Process PRFCNT_THRESHOLD interrupt. */ ++ if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_THRESHOLD_MASK) { ++ dev_dbg(kbdev->dev, "PRFCNT_THRESHOLD interrupt received."); ++ ++ kbase_hwcnt_backend_csf_on_prfcnt_threshold( ++ &kbdev->hwcnt_gpu_iface); ++ ++ /* Set the GLB_REQ.PRFCNT_THRESHOLD flag back to ++ * the same value as GLB_ACK.PRFCNT_THRESHOLD ++ * flag in order to enable reporting of another ++ * PRFCNT_THRESHOLD event. ++ */ ++ kbase_csf_firmware_global_input_mask( ++ global_iface, GLB_REQ, glb_ack, ++ GLB_REQ_PRFCNT_THRESHOLD_MASK); ++ } ++ ++ /* Process PRFCNT_OVERFLOW interrupt. */ ++ if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_OVERFLOW_MASK) { ++ dev_dbg(kbdev->dev, "PRFCNT_OVERFLOW interrupt received."); ++ ++ kbase_hwcnt_backend_csf_on_prfcnt_overflow( ++ &kbdev->hwcnt_gpu_iface); ++ ++ /* Set the GLB_REQ.PRFCNT_OVERFLOW flag back to ++ * the same value as GLB_ACK.PRFCNT_OVERFLOW ++ * flag in order to enable reporting of another ++ * PRFCNT_OVERFLOW event. ++ */ ++ kbase_csf_firmware_global_input_mask( ++ global_iface, GLB_REQ, glb_ack, ++ GLB_REQ_PRFCNT_OVERFLOW_MASK); ++ } ++} + +/** -+ * kbase_csf_firmware_cfg_term - Delete the sysfs directory that was created -+ * for firmware configuration options. ++ * check_protm_enter_req_complete - Check if PROTM_ENTER request completed + * -+ * @kbdev: Pointer to the Kbase device ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @glb_req: Global request register value. ++ * @glb_ack: Global acknowledge register value. + * ++ * This function checks if the PROTM_ENTER Global request had completed and ++ * appropriately sends notification about the protected mode entry to components ++ * like IPA, HWC, IPA_CONTROL. + */ -+void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev); ++static inline void check_protm_enter_req_complete(struct kbase_device *kbdev, ++ u32 glb_req, u32 glb_ack) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ ++ if (likely(!kbdev->csf.scheduler.active_protm_grp)) ++ return; ++ ++ if (kbdev->protected_mode) ++ return; ++ ++ if ((glb_req & GLB_REQ_PROTM_ENTER_MASK) != ++ (glb_ack & GLB_REQ_PROTM_ENTER_MASK)) ++ return; ++ ++ dev_dbg(kbdev->dev, "Protected mode entry interrupt received"); ++ ++ kbdev->protected_mode = true; ++ kbase_ipa_protection_mode_switch_event(kbdev); ++ kbase_ipa_control_protm_entered(kbdev); ++ kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface); ++} + +/** -+ * kbase_csf_firmware_cfg_option_entry_parse() - Process a -+ * "configuration option" section. -+ * -+ * @kbdev: Kbase device structure -+ * @fw: Firmware image containing the section -+ * @entry: Pointer to the section -+ * @size: Size (in bytes) of the section -+ * @updatable: Indicates if entry can be updated with FIRMWARE_CONFIG_UPDATE ++ * process_protm_exit - Handle the protected mode exit interrupt + * -+ * Read a "configuration option" section adding it to the -+ * kbase_device:csf.firmware_config list. ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @glb_ack: Global acknowledge register value. + * -+ * Return: 0 if successful, negative error code on failure ++ * This function handles the PROTM_EXIT interrupt and sends notification ++ * about the protected mode exit to components like HWC, IPA_CONTROL. + */ -+int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, -+ const struct kbase_csf_mcu_fw *const fw, -+ const u32 *entry, unsigned int size, bool updatable); -+#endif /* _KBASE_CSF_FIRMWARE_CFG_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c ++static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack) ++{ ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ ++ dev_dbg(kbdev->dev, "Protected mode exit interrupt received"); ++ ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_ack, ++ GLB_REQ_PROTM_EXIT_MASK); ++ ++ if (likely(scheduler->active_protm_grp)) { ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT, ++ scheduler->active_protm_grp, 0u); ++ scheduler->active_protm_grp = NULL; ++ } else { ++ dev_warn(kbdev->dev, "PROTM_EXIT interrupt after no pmode group"); ++ } ++ ++ if (!WARN_ON(!kbdev->protected_mode)) { ++ kbdev->protected_mode = false; ++ kbase_ipa_control_protm_exited(kbdev); ++ kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface); ++ } ++ ++#if IS_ENABLED(CONFIG_MALI_CORESIGHT) ++ kbase_debug_coresight_csf_enable_pmode_exit(kbdev); ++#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ ++} ++ ++static inline void process_tracked_info_for_protm(struct kbase_device *kbdev, ++ struct irq_idle_and_protm_track *track) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct kbase_queue_group *group = track->protm_grp; ++ u32 current_protm_pending_seq = scheduler->tick_protm_pending_seq; ++ ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ ++ if (likely(current_protm_pending_seq == KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID)) ++ return; ++ ++ /* Handle protm from the tracked information */ ++ if (track->idle_seq < current_protm_pending_seq) { ++ /* If the protm enter was prevented due to groups priority, then fire a tock ++ * for the scheduler to re-examine the case. ++ */ ++ dev_dbg(kbdev->dev, "Attempt pending protm from idle slot %d\n", track->idle_slot); ++ kbase_csf_scheduler_invoke_tock(kbdev); ++ } else if (group) { ++ u32 i, num_groups = kbdev->csf.global_iface.group_num; ++ struct kbase_queue_group *grp; ++ bool tock_triggered = false; ++ ++ /* A new protm request, and track->idle_seq is not sufficient, check across ++ * previously notified idle CSGs in the current tick/tock cycle. ++ */ ++ for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) { ++ if (i == track->idle_slot) ++ continue; ++ grp = kbase_csf_scheduler_get_group_on_slot(kbdev, i); ++ /* If not NULL then the group pointer cannot disappear as the ++ * scheduler spinlock is held. ++ */ ++ if (grp == NULL) ++ continue; ++ ++ if (grp->scan_seq_num < current_protm_pending_seq) { ++ tock_triggered = true; ++ dev_dbg(kbdev->dev, ++ "Attempt new protm from tick/tock idle slot %d\n", i); ++ kbase_csf_scheduler_invoke_tock(kbdev); ++ break; ++ } ++ } ++ ++ if (!tock_triggered) { ++ dev_dbg(kbdev->dev, "Group-%d on slot-%d start protm work\n", ++ group->handle, group->csg_nr); ++ queue_work(group->kctx->csf.wq, &group->protm_event_work); ++ } ++ } ++} ++ ++static void order_job_irq_clear_with_iface_mem_read(void) ++{ ++ /* Ensure that write to the JOB_IRQ_CLEAR is ordered with regards to the ++ * read from interface memory. The ordering is needed considering the way ++ * FW & Kbase writes to the JOB_IRQ_RAWSTAT and JOB_IRQ_CLEAR registers ++ * without any synchronization. Without the barrier there is no guarantee ++ * about the ordering, the write to IRQ_CLEAR can take effect after the read ++ * from interface memory and that could cause a problem for the scenario where ++ * FW sends back to back notifications for the same CSG for events like ++ * SYNC_UPDATE and IDLE, but Kbase gets a single IRQ and observes only the ++ * first event. Similar thing can happen with glb events like CFG_ALLOC_EN ++ * acknowledgment and GPU idle notification. ++ * ++ * MCU CPU ++ * --------------- ---------------- ++ * Update interface memory Write to IRQ_CLEAR to clear current IRQ ++ * ++ * Write to IRQ_RAWSTAT to raise new IRQ Read interface memory ++ */ ++ ++ /* CPU and GPU would be in the same Outer shareable domain */ ++ dmb(osh); ++} ++ ++void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) ++{ ++ bool deferred_handling_glb_idle_irq = false; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_START, NULL, val); ++ ++ do { ++ unsigned long flags; ++ u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF; ++ struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX }; ++ bool glb_idle_irq_received = false; ++ ++ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val); ++ order_job_irq_clear_with_iface_mem_read(); ++ ++ if (csg_interrupts != 0) { ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ /* Looping through and track the highest idle and protm groups */ ++ while (csg_interrupts != 0) { ++ int const csg_nr = ffs(csg_interrupts) - 1; ++ ++ process_csg_interrupts(kbdev, csg_nr, &track); ++ csg_interrupts &= ~(1 << csg_nr); ++ } ++ ++ /* Handle protm from the tracked information */ ++ process_tracked_info_for_protm(kbdev, &track); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ } ++ ++ if (val & JOB_IRQ_GLOBAL_IF) { ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ ++ kbdev->csf.interrupt_received = true; ++ ++ if (!kbdev->csf.firmware_reloaded) ++ kbase_csf_firmware_reload_completed(kbdev); ++ else if (global_iface->output) { ++ u32 glb_req, glb_ack; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ glb_req = ++ kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); ++ glb_ack = kbase_csf_firmware_global_output(global_iface, GLB_ACK); ++ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_GLB_REQ_ACK, NULL, ++ glb_req ^ glb_ack); ++ ++ check_protm_enter_req_complete(kbdev, glb_req, glb_ack); ++ ++ if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK) ++ process_protm_exit(kbdev, glb_ack); ++ ++ /* Handle IDLE Hysteresis notification event */ ++ if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) { ++ dev_dbg(kbdev->dev, "Idle-hysteresis event flagged"); ++ kbase_csf_firmware_global_input_mask( ++ global_iface, GLB_REQ, glb_ack, ++ GLB_REQ_IDLE_EVENT_MASK); ++ ++ glb_idle_irq_received = true; ++ /* Defer handling this IRQ to account for a race condition ++ * where the idle worker could be executed before we have ++ * finished handling all pending IRQs (including CSG IDLE ++ * IRQs). ++ */ ++ deferred_handling_glb_idle_irq = true; ++ } ++ ++ process_prfcnt_interrupts(kbdev, glb_req, glb_ack); ++ ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ ++ /* Invoke the MCU state machine as a state transition ++ * might have completed. ++ */ ++ kbase_pm_update_state(kbdev); ++ } ++ } ++ ++ if (!glb_idle_irq_received) ++ break; ++ /* Attempt to serve potential IRQs that might have occurred ++ * whilst handling the previous IRQ. In case we have observed ++ * the GLB IDLE IRQ without all CSGs having been marked as ++ * idle, the GPU would be treated as no longer idle and left ++ * powered on. ++ */ ++ val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); ++ } while (val); ++ ++ if (deferred_handling_glb_idle_irq) { ++ unsigned long flags; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbase_csf_scheduler_process_gpu_idle_event(kbdev); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ } ++ ++ wake_up_all(&kbdev->csf.event_wait); ++ ++ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val); ++} ++ ++void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev) ++{ ++ if (kbdev->csf.db_filp) { ++ struct page *page = as_page(kbdev->csf.dummy_db_page); ++ ++ kbase_mem_pool_free( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ page, false); ++ ++ fput(kbdev->csf.db_filp); ++ } ++} ++ ++int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev) ++{ ++ struct tagged_addr phys; ++ struct file *filp; ++ int ret; ++ ++ filp = shmem_file_setup("mali csf db", MAX_LFS_FILESIZE, VM_NORESERVE); ++ if (IS_ERR(filp)) ++ return PTR_ERR(filp); ++ ++ ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys, ++ false, NULL); ++ ++ if (ret <= 0) { ++ fput(filp); ++ return ret; ++ } ++ ++ kbdev->csf.db_filp = filp; ++ kbdev->csf.dummy_db_page = phys; ++ kbdev->csf.db_file_offsets = 0; ++ ++ return 0; ++} ++ ++void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev) ++{ ++ if (kbdev->csf.user_reg.filp) { ++ struct page *page = as_page(kbdev->csf.user_reg.dummy_page); ++ ++ kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false); ++ fput(kbdev->csf.user_reg.filp); ++ } ++} ++ ++int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev) ++{ ++ struct tagged_addr phys; ++ struct file *filp; ++ struct page *page; ++ u32 *addr; ++ ++ kbdev->csf.user_reg.filp = NULL; ++ ++ filp = shmem_file_setup("mali csf user_reg", MAX_LFS_FILESIZE, VM_NORESERVE); ++ if (IS_ERR(filp)) { ++ dev_err(kbdev->dev, "failed to get an unlinked file for user_reg"); ++ return PTR_ERR(filp); ++ } ++ ++ if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys, ++ false, NULL) <= 0) { ++ fput(filp); ++ return -ENOMEM; ++ } ++ ++ page = as_page(phys); ++ addr = kmap_atomic(page); ++ ++ /* Write a special value for the latest flush register inside the ++ * dummy page ++ */ ++ addr[LATEST_FLUSH / sizeof(u32)] = POWER_DOWN_LATEST_FLUSH_VALUE; ++ ++ kbase_sync_single_for_device(kbdev, kbase_dma_addr(page) + LATEST_FLUSH, sizeof(u32), ++ DMA_BIDIRECTIONAL); ++ kunmap_atomic(addr); ++ ++ kbdev->csf.user_reg.filp = filp; ++ kbdev->csf.user_reg.dummy_page = phys; ++ kbdev->csf.user_reg.file_offset = 0; ++ return 0; ++} ++ ++u8 kbase_csf_priority_check(struct kbase_device *kbdev, u8 req_priority) ++{ ++ struct priority_control_manager_device *pcm_device = kbdev->pcm_dev; ++ u8 out_priority = req_priority; ++ ++ if (pcm_device) { ++ req_priority = kbase_csf_priority_queue_group_priority_to_relative(req_priority); ++ out_priority = pcm_device->ops.pcm_scheduler_priority_check(pcm_device, current, req_priority); ++ out_priority = kbase_csf_priority_relative_to_queue_group_priority(out_priority); ++ } ++ ++ return out_priority; ++} +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h new file mode 100644 -index 000000000..f0a10d197 +index 000000000..dd947dcba --- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c -@@ -0,0 +1,807 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h +@@ -0,0 +1,506 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -150709,802 +151580,698 @@ index 000000000..f0a10d197 + * + */ + -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "mali_kbase.h" -+#include "mali_kbase_csf_firmware_core_dump.h" -+#include "backend/gpu/mali_kbase_pm_internal.h" ++#ifndef _KBASE_CSF_H_ ++#define _KBASE_CSF_H_ + -+/* Page size in bytes in use by MCU. */ -+#define FW_PAGE_SIZE 4096 ++#include "mali_kbase_csf_kcpu.h" ++#include "mali_kbase_csf_scheduler.h" ++#include "mali_kbase_csf_firmware.h" ++#include "mali_kbase_csf_protected_memory.h" ++#include "mali_kbase_hwaccess_time.h" + -+/* -+ * FW image header core dump data format supported. -+ * Currently only version 0.1 is supported. ++/* Indicate invalid CS h/w interface + */ -+#define FW_CORE_DUMP_DATA_VERSION_MAJOR 0 -+#define FW_CORE_DUMP_DATA_VERSION_MINOR 1 -+ -+/* Full version of the image header core dump data format */ -+#define FW_CORE_DUMP_DATA_VERSION \ -+ ((FW_CORE_DUMP_DATA_VERSION_MAJOR << 8) | FW_CORE_DUMP_DATA_VERSION_MINOR) ++#define KBASEP_IF_NR_INVALID ((s8)-1) + -+/* Validity flag to indicate if the MCU registers in the buffer are valid */ -+#define FW_MCU_STATUS_MASK 0x1 -+#define FW_MCU_STATUS_VALID (1 << 0) ++/* Indicate invalid CSG number for a GPU command queue group ++ */ ++#define KBASEP_CSG_NR_INVALID ((s8)-1) + -+/* Core dump entry fields */ -+#define FW_CORE_DUMP_VERSION_INDEX 0 -+#define FW_CORE_DUMP_START_ADDR_INDEX 1 ++/* Indicate invalid user doorbell number for a GPU command queue ++ */ ++#define KBASEP_USER_DB_NR_INVALID ((s8)-1) + -+/* MCU registers stored by a firmware core dump */ -+struct fw_core_dump_mcu { -+ u32 r0; -+ u32 r1; -+ u32 r2; -+ u32 r3; -+ u32 r4; -+ u32 r5; -+ u32 r6; -+ u32 r7; -+ u32 r8; -+ u32 r9; -+ u32 r10; -+ u32 r11; -+ u32 r12; -+ u32 sp; -+ u32 lr; -+ u32 pc; -+}; ++/* Number of pages used for GPU command queue's User input & output data */ ++#define KBASEP_NUM_CS_USER_IO_PAGES (2) + -+/* Any ELF definitions used in this file are from elf.h/elfcore.h except -+ * when specific 32-bit versions are required (mainly for the -+ * ELF_PRSTATUS32 note that is used to contain the MCU registers). ++/* Indicates an invalid value for the scan out sequence number, used to ++ * signify there is no group that has protected mode execution pending. + */ ++#define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX) + -+/* - 32-bit version of timeval structures used in ELF32 PRSTATUS note. */ -+struct prstatus32_timeval { -+ int tv_sec; -+ int tv_usec; -+}; ++#define FIRMWARE_IDLE_HYSTERESIS_TIME_USEC (10000) /* Default 10 milliseconds */ + -+/* - Structure defining ELF32 PRSTATUS note contents, as defined by the -+ * GNU binutils BFD library used by GDB, in bfd/hosts/x86-64linux.h. -+ * Note: GDB checks for the size of this structure to be 0x94. -+ * Modified pr_reg (array containing the Arm 32-bit MCU registers) to -+ * use u32[18] instead of elf_gregset32_t to prevent introducing new typedefs. -+ */ -+struct elf_prstatus32 { -+ struct elf_siginfo pr_info; /* Info associated with signal. */ -+ short int pr_cursig; /* Current signal. */ -+ unsigned int pr_sigpend; /* Set of pending signals. */ -+ unsigned int pr_sighold; /* Set of held signals. */ -+ pid_t pr_pid; -+ pid_t pr_ppid; -+ pid_t pr_pgrp; -+ pid_t pr_sid; -+ struct prstatus32_timeval pr_utime; /* User time. */ -+ struct prstatus32_timeval pr_stime; /* System time. */ -+ struct prstatus32_timeval pr_cutime; /* Cumulative user time. */ -+ struct prstatus32_timeval pr_cstime; /* Cumulative system time. */ -+ u32 pr_reg[18]; /* GP registers. */ -+ int pr_fpvalid; /* True if math copro being used. */ -+}; ++/* Idle hysteresis time can be scaled down when GPU sleep feature is used */ ++#define FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER (5) + +/** -+ * struct fw_core_dump_data - Context for seq_file operations used on 'fw_core_dump' -+ * debugfs file. -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * kbase_csf_ctx_init - Initialize the CSF interface for a GPU address space. ++ * ++ * @kctx: Pointer to the kbase context which is being initialized. ++ * ++ * Return: 0 if successful or a negative error code on failure. + */ -+struct fw_core_dump_data { -+ struct kbase_device *kbdev; -+}; ++int kbase_csf_ctx_init(struct kbase_context *kctx); + -+/* -+ * struct fw_core_dump_seq_off - Iterator for seq_file operations used on 'fw_core_dump' -+ * debugfs file. -+ * @interface: current firmware memory interface -+ * @page_num: current page number (0..) within @interface ++/** ++ * kbase_csf_ctx_handle_fault - Terminate queue groups & notify fault upon ++ * GPU bus fault, MMU page fault or similar. ++ * ++ * @kctx: Pointer to faulty kbase context. ++ * @fault: Pointer to the fault. ++ * ++ * This function terminates all GPU command queue groups in the context and ++ * notifies the event notification thread of the fault. + */ -+struct fw_core_dump_seq_off { -+ struct kbase_csf_firmware_interface *interface; -+ u32 page_num; -+}; ++void kbase_csf_ctx_handle_fault(struct kbase_context *kctx, ++ struct kbase_fault *fault); + +/** -+ * fw_get_core_dump_mcu - Get the MCU registers saved by a firmware core dump ++ * kbase_csf_ctx_term - Terminate the CSF interface for a GPU address space. + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @regs: Pointer to a core dump mcu struct where the MCU registers are copied -+ * to. Should be allocated by the called. ++ * @kctx: Pointer to the kbase context which is being terminated. + * -+ * Return: 0 if successfully copied the MCU registers, negative error code otherwise. ++ * This function terminates any remaining CSGs and CSs which weren't destroyed ++ * before context termination. + */ -+static int fw_get_core_dump_mcu(struct kbase_device *kbdev, struct fw_core_dump_mcu *regs) -+{ -+ unsigned int i; -+ u32 status = 0; -+ u32 data_addr = kbdev->csf.fw_core_dump.mcu_regs_addr; -+ u32 *data = (u32 *)regs; -+ -+ /* Check if the core dump entry exposed the buffer */ -+ if (!regs || !kbdev->csf.fw_core_dump.available) -+ return -EPERM; -+ -+ /* Check if the data in the buffer is valid, if not, return error */ -+ kbase_csf_read_firmware_memory(kbdev, data_addr, &status); -+ if ((status & FW_MCU_STATUS_MASK) != FW_MCU_STATUS_VALID) -+ return -EPERM; -+ -+ /* According to image header documentation, the MCU registers core dump -+ * buffer is 32-bit aligned. -+ */ -+ for (i = 1; i <= sizeof(struct fw_core_dump_mcu) / sizeof(u32); ++i) -+ kbase_csf_read_firmware_memory(kbdev, data_addr + i * sizeof(u32), &data[i - 1]); -+ -+ return 0; -+} ++void kbase_csf_ctx_term(struct kbase_context *kctx); + +/** -+ * fw_core_dump_fill_elf_header - Initializes an ELF32 header -+ * @hdr: ELF32 header to initialize -+ * @sections: Number of entries in the ELF program header table ++ * kbase_csf_queue_register - Register a GPU command queue. + * -+ * Initializes an ELF32 header for an ARM 32-bit little-endian -+ * 'Core file' object file. ++ * @kctx: Pointer to the kbase context within which the ++ * queue is to be registered. ++ * @reg: Pointer to the structure which contains details of the ++ * queue to be registered within the provided ++ * context. ++ * ++ * Return: 0 on success, or negative on failure. + */ -+static void fw_core_dump_fill_elf_header(struct elf32_hdr *hdr, unsigned int sections) -+{ -+ /* Reset all members in header. */ -+ memset(hdr, 0, sizeof(*hdr)); -+ -+ /* Magic number identifying file as an ELF object. */ -+ memcpy(hdr->e_ident, ELFMAG, SELFMAG); -+ -+ /* Identify file as 32-bit, little-endian, using current -+ * ELF header version, with no OS or ABI specific ELF -+ * extensions used. -+ */ -+ hdr->e_ident[EI_CLASS] = ELFCLASS32; -+ hdr->e_ident[EI_DATA] = ELFDATA2LSB; -+ hdr->e_ident[EI_VERSION] = EV_CURRENT; -+ hdr->e_ident[EI_OSABI] = ELFOSABI_NONE; -+ -+ /* 'Core file' type of object file. */ -+ hdr->e_type = ET_CORE; -+ -+ /* ARM 32-bit architecture (AARCH32) */ -+ hdr->e_machine = EM_ARM; -+ -+ /* Object file version: the original format. */ -+ hdr->e_version = EV_CURRENT; -+ -+ /* Offset of program header table in file. */ -+ hdr->e_phoff = sizeof(struct elf32_hdr); -+ -+ /* No processor specific flags. */ -+ hdr->e_flags = 0; -+ -+ /* Size of the ELF header in bytes. */ -+ hdr->e_ehsize = sizeof(struct elf32_hdr); -+ -+ /* Size of the ELF program header entry in bytes. */ -+ hdr->e_phentsize = sizeof(struct elf32_phdr); -+ -+ /* Number of entries in the program header table. */ -+ hdr->e_phnum = sections; -+} ++int kbase_csf_queue_register(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_register *reg); + +/** -+ * fw_core_dump_fill_elf_program_header_note - Initializes an ELF32 program header -+ * for holding auxiliary information -+ * @phdr: ELF32 program header -+ * @file_offset: Location of the note in the file in bytes -+ * @size: Size of the note in bytes. ++ * kbase_csf_queue_register_ex - Register a GPU command queue with ++ * extended format. + * -+ * Initializes an ELF32 program header describing auxiliary information (containing -+ * one or more notes) of @size bytes alltogether located in the file at offset -+ * @file_offset. ++ * @kctx: Pointer to the kbase context within which the ++ * queue is to be registered. ++ * @reg: Pointer to the structure which contains details of the ++ * queue to be registered within the provided ++ * context, together with the extended parameter fields ++ * for supporting cs trace command. ++ * ++ * Return: 0 on success, or negative on failure. + */ -+static void fw_core_dump_fill_elf_program_header_note(struct elf32_phdr *phdr, u32 file_offset, -+ u32 size) -+{ -+ /* Auxiliary information (note) in program header. */ -+ phdr->p_type = PT_NOTE; -+ -+ /* Location of first note in file in bytes. */ -+ phdr->p_offset = file_offset; -+ -+ /* Size of all notes combined in bytes. */ -+ phdr->p_filesz = size; -+ -+ /* Other members not relevant for a note. */ -+ phdr->p_vaddr = 0; -+ phdr->p_paddr = 0; -+ phdr->p_memsz = 0; -+ phdr->p_align = 0; -+ phdr->p_flags = 0; -+} ++int kbase_csf_queue_register_ex(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_register_ex *reg); + +/** -+ * fw_core_dump_fill_elf_program_header - Initializes an ELF32 program header for a loadable segment -+ * @phdr: ELF32 program header to initialize. -+ * @file_offset: Location of loadable segment in file in bytes -+ * (aligned to FW_PAGE_SIZE bytes) -+ * @vaddr: 32-bit virtual address where to write the segment -+ * (aligned to FW_PAGE_SIZE bytes) -+ * @size: Size of the segment in bytes. -+ * @flags: CSF_FIRMWARE_ENTRY_* flags describing access permissions. ++ * kbase_csf_queue_terminate - Terminate a GPU command queue. + * -+ * Initializes an ELF32 program header describing a loadable segment of -+ * @size bytes located in the file at offset @file_offset to be loaded -+ * at virtual address @vaddr with access permissions as described by -+ * CSF_FIRMWARE_ENTRY_* flags in @flags. ++ * @kctx: Pointer to the kbase context within which the ++ * queue is to be terminated. ++ * @term: Pointer to the structure which identifies which ++ * queue is to be terminated. + */ -+static void fw_core_dump_fill_elf_program_header(struct elf32_phdr *phdr, u32 file_offset, -+ u32 vaddr, u32 size, u32 flags) -+{ -+ /* Loadable segment in program header. */ -+ phdr->p_type = PT_LOAD; -+ -+ /* Location of segment in file in bytes. Aligned to p_align bytes. */ -+ phdr->p_offset = file_offset; -+ -+ /* Virtual address of segment. Aligned to p_align bytes. */ -+ phdr->p_vaddr = vaddr; -+ -+ /* Physical address of segment. Not relevant. */ -+ phdr->p_paddr = 0; -+ -+ /* Size of segment in file and memory. */ -+ phdr->p_filesz = size; -+ phdr->p_memsz = size; -+ -+ /* Alignment of segment in the file and memory in bytes (integral power of 2). */ -+ phdr->p_align = FW_PAGE_SIZE; -+ -+ /* Set segment access permissions. */ -+ phdr->p_flags = 0; -+ if (flags & CSF_FIRMWARE_ENTRY_READ) -+ phdr->p_flags |= PF_R; -+ if (flags & CSF_FIRMWARE_ENTRY_WRITE) -+ phdr->p_flags |= PF_W; -+ if (flags & CSF_FIRMWARE_ENTRY_EXECUTE) -+ phdr->p_flags |= PF_X; -+} ++void kbase_csf_queue_terminate(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_terminate *term); + +/** -+ * fw_core_dump_get_prstatus_note_size - Calculates size of a ELF32 PRSTATUS note -+ * @name: Name given to the PRSTATUS note. ++ * kbase_csf_free_command_stream_user_pages() - Free the resources allocated ++ * for a queue at the time of bind. + * -+ * Calculates the size of a 32-bit PRSTATUS note (which contains information -+ * about a process like the current MCU registers) taking into account -+ * @name must be padded to a 4-byte multiple. ++ * @kctx: Address of the kbase context within which the queue was created. ++ * @queue: Pointer to the queue to be unlinked. + * -+ * Return: size of 32-bit PRSTATUS note in bytes. ++ * This function will free the pair of physical pages allocated for a GPU ++ * command queue, and also release the hardware doorbell page, that were mapped ++ * into the process address space to enable direct submission of commands to ++ * the hardware. Also releases the reference taken on the queue when the mapping ++ * was created. ++ * ++ * If an explicit or implicit unbind was missed by the userspace then the ++ * mapping will persist. On process exit kernel itself will remove the mapping. + */ -+static unsigned int fw_core_dump_get_prstatus_note_size(char *name) -+{ -+ return sizeof(struct elf32_note) + roundup(strlen(name) + 1, 4) + -+ sizeof(struct elf_prstatus32); -+} ++void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, ++ struct kbase_queue *queue); + +/** -+ * fw_core_dump_fill_elf_prstatus - Initializes an ELF32 PRSTATUS structure -+ * @prs: ELF32 PRSTATUS note to initialize -+ * @regs: MCU registers to copy into the PRSTATUS note ++ * kbase_csf_alloc_command_stream_user_pages - Allocate resources for a ++ * GPU command queue. + * -+ * Initializes an ELF32 PRSTATUS structure with MCU registers @regs. -+ * Other process information is N/A for CSF Firmware. ++ * @kctx: Pointer to the kbase context within which the resources ++ * for the queue are being allocated. ++ * @queue: Pointer to the queue for which to allocate resources. ++ * ++ * This function allocates a pair of User mode input/output pages for a ++ * GPU command queue and maps them in the shared interface segment of MCU ++ * firmware address space. Also reserves a hardware doorbell page for the queue. ++ * ++ * Return: 0 on success, or negative on failure. + */ -+static void fw_core_dump_fill_elf_prstatus(struct elf_prstatus32 *prs, -+ struct fw_core_dump_mcu *regs) -+{ -+ /* Only fill in registers (32-bit) of PRSTATUS note. */ -+ memset(prs, 0, sizeof(*prs)); -+ prs->pr_reg[0] = regs->r0; -+ prs->pr_reg[1] = regs->r1; -+ prs->pr_reg[2] = regs->r2; -+ prs->pr_reg[3] = regs->r3; -+ prs->pr_reg[4] = regs->r4; -+ prs->pr_reg[5] = regs->r5; -+ prs->pr_reg[6] = regs->r0; -+ prs->pr_reg[7] = regs->r7; -+ prs->pr_reg[8] = regs->r8; -+ prs->pr_reg[9] = regs->r9; -+ prs->pr_reg[10] = regs->r10; -+ prs->pr_reg[11] = regs->r11; -+ prs->pr_reg[12] = regs->r12; -+ prs->pr_reg[13] = regs->sp; -+ prs->pr_reg[14] = regs->lr; -+ prs->pr_reg[15] = regs->pc; -+} ++int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, ++ struct kbase_queue *queue); + +/** -+ * fw_core_dump_create_prstatus_note - Creates an ELF32 PRSTATUS note -+ * @name: Name for the PRSTATUS note -+ * @prs: ELF32 PRSTATUS structure to put in the PRSTATUS note -+ * @created_prstatus_note: -+ * Pointer to the allocated ELF32 PRSTATUS note ++ * kbase_csf_queue_bind - Bind a GPU command queue to a queue group. + * -+ * Creates an ELF32 note with one PRSTATUS entry containing the -+ * ELF32 PRSTATUS structure @prs. Caller needs to free the created note in -+ * @created_prstatus_note. ++ * @kctx: The kbase context. ++ * @bind: Pointer to the union which specifies a queue group and a ++ * queue to be bound to that group. + * -+ * Return: 0 on failure, otherwise size of ELF32 PRSTATUS note in bytes. ++ * Return: 0 on success, or negative on failure. + */ -+static unsigned int fw_core_dump_create_prstatus_note(char *name, struct elf_prstatus32 *prs, -+ struct elf32_note **created_prstatus_note) -+{ -+ struct elf32_note *note; -+ unsigned int note_name_sz; -+ unsigned int note_sz; ++int kbase_csf_queue_bind(struct kbase_context *kctx, ++ union kbase_ioctl_cs_queue_bind *bind); + -+ /* Allocate memory for ELF32 note containing a PRSTATUS note. */ -+ note_name_sz = strlen(name) + 1; -+ note_sz = sizeof(struct elf32_note) + roundup(note_name_sz, 4) + -+ sizeof(struct elf_prstatus32); -+ note = kmalloc(note_sz, GFP_KERNEL); -+ if (!note) -+ return 0; ++/** ++ * kbase_csf_queue_unbind - Unbind a GPU command queue from a queue group ++ * to which it has been bound and free ++ * resources allocated for this queue if there ++ * are any. ++ * ++ * @queue: Pointer to queue to be unbound. ++ * @process_exit: Flag to indicate if process exit is happening. ++ */ ++void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit); + -+ /* Fill in ELF32 note with one entry for a PRSTATUS note. */ -+ note->n_namesz = note_name_sz; -+ note->n_descsz = sizeof(struct elf_prstatus32); -+ note->n_type = NT_PRSTATUS; -+ memcpy(note + 1, name, note_name_sz); -+ memcpy((char *)(note + 1) + roundup(note_name_sz, 4), prs, sizeof(*prs)); ++/** ++ * kbase_csf_queue_unbind_stopped - Unbind a GPU command queue in the case ++ * where it was never started. ++ * @queue: Pointer to queue to be unbound. ++ * ++ * Variant of kbase_csf_queue_unbind() for use on error paths for cleaning up ++ * queues that failed to fully bind. ++ */ ++void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue); + -+ /* Return pointer and size of the created ELF32 note. */ -+ *created_prstatus_note = note; -+ return note_sz; -+} ++/** ++ * kbase_csf_queue_kick - Schedule a GPU command queue on the firmware ++ * ++ * @kctx: The kbase context. ++ * @kick: Pointer to the struct which specifies the queue ++ * that needs to be scheduled. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_queue_kick(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_kick *kick); + +/** -+ * fw_core_dump_write_elf_header - Writes ELF header for the FW core dump -+ * @m: the seq_file handle ++ * kbase_csf_queue_group_handle_is_valid - Find the queue group corresponding ++ * to the indicated handle. + * -+ * Writes the ELF header of the core dump including program headers for -+ * memory sections and a note containing the current MCU register -+ * values. ++ * @kctx: The kbase context under which the queue group exists. ++ * @group_handle: Handle for the group which uniquely identifies it within ++ * the context with which it was created. + * -+ * Excludes memory sections without read access permissions or -+ * are for protected memory. ++ * This function is used to find the queue group when passed a handle. + * -+ * The data written is as follows: -+ * - ELF header -+ * - ELF PHDRs for memory sections -+ * - ELF PHDR for program header NOTE -+ * - ELF PRSTATUS note -+ * - 0-bytes padding to multiple of ELF_EXEC_PAGESIZE ++ * Return: Pointer to a queue group on success, NULL on failure ++ */ ++struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle); ++ ++/** ++ * kbase_csf_queue_group_handle_is_valid - Find if the given queue group handle ++ * is valid. + * -+ * The actual memory section dumps should follow this (not written -+ * by this function). ++ * @kctx: The kbase context under which the queue group exists. ++ * @group_handle: Handle for the group which uniquely identifies it within ++ * the context with which it was created. + * -+ * Retrieves the necessary information via the struct -+ * fw_core_dump_data stored in the private member of the seq_file -+ * handle. ++ * This function is used to determine if the queue group handle is valid. + * -+ * Return: -+ * * 0 - success -+ * * -ENOMEM - not enough memory for allocating ELF32 note ++ * Return: 0 on success, or negative on failure. + */ -+static int fw_core_dump_write_elf_header(struct seq_file *m) -+{ -+ struct elf32_hdr hdr; -+ struct elf32_phdr phdr; -+ struct fw_core_dump_data *dump_data = m->private; -+ struct kbase_device *const kbdev = dump_data->kbdev; -+ struct kbase_csf_firmware_interface *interface; -+ struct elf_prstatus32 elf_prs; -+ struct elf32_note *elf_prstatus_note; -+ unsigned int sections = 0; -+ unsigned int elf_prstatus_note_size; -+ u32 elf_prstatus_offset; -+ u32 elf_phdr_note_offset; -+ u32 elf_memory_sections_data_offset; -+ u32 total_pages = 0; -+ u32 padding_size, *padding; -+ struct fw_core_dump_mcu regs = { 0 }; -+ -+ /* Count number of memory sections. */ -+ list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { -+ /* Skip memory sections that cannot be read or are protected. */ -+ if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) || -+ (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0) -+ continue; -+ sections++; -+ } -+ -+ /* Prepare ELF header. */ -+ fw_core_dump_fill_elf_header(&hdr, sections + 1); -+ seq_write(m, &hdr, sizeof(struct elf32_hdr)); -+ -+ elf_prstatus_note_size = fw_core_dump_get_prstatus_note_size("CORE"); -+ /* PHDRs of PT_LOAD type. */ -+ elf_phdr_note_offset = sizeof(struct elf32_hdr) + sections * sizeof(struct elf32_phdr); -+ /* PHDR of PT_NOTE type. */ -+ elf_prstatus_offset = elf_phdr_note_offset + sizeof(struct elf32_phdr); -+ elf_memory_sections_data_offset = elf_prstatus_offset + elf_prstatus_note_size; ++int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, ++ u8 group_handle); + -+ /* Calculate padding size to page offset. */ -+ padding_size = roundup(elf_memory_sections_data_offset, ELF_EXEC_PAGESIZE) - -+ elf_memory_sections_data_offset; -+ elf_memory_sections_data_offset += padding_size; ++/** ++ * kbase_csf_queue_group_create - Create a GPU command queue group. ++ * ++ * @kctx: Pointer to the kbase context within which the ++ * queue group is to be created. ++ * @create: Pointer to the structure which contains details of the ++ * queue group which is to be created within the ++ * provided kbase context. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_queue_group_create(struct kbase_context *kctx, ++ union kbase_ioctl_cs_queue_group_create *create); + -+ /* Prepare ELF program header table. */ -+ list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { -+ /* Skip memory sections that cannot be read or are protected. */ -+ if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) || -+ (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0) -+ continue; ++/** ++ * kbase_csf_queue_group_terminate - Terminate a GPU command queue group. ++ * ++ * @kctx: Pointer to the kbase context within which the ++ * queue group is to be terminated. ++ * @group_handle: Pointer to the structure which identifies the queue ++ * group which is to be terminated. ++ */ ++void kbase_csf_queue_group_terminate(struct kbase_context *kctx, ++ u8 group_handle); + -+ fw_core_dump_fill_elf_program_header(&phdr, elf_memory_sections_data_offset, -+ interface->virtual, -+ interface->num_pages * FW_PAGE_SIZE, -+ interface->flags); ++/** ++ * kbase_csf_term_descheduled_queue_group - Terminate a GPU command queue ++ * group that is not operational ++ * inside the scheduler. ++ * ++ * @group: Pointer to the structure which identifies the queue ++ * group to be terminated. The function assumes that the caller ++ * is sure that the given group is not operational inside the ++ * scheduler. If in doubt, use its alternative: ++ * @ref kbase_csf_queue_group_terminate(). ++ */ ++void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group); + -+ seq_write(m, &phdr, sizeof(struct elf32_phdr)); ++#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST ++/** ++ * kbase_csf_queue_group_suspend - Suspend a GPU command queue group ++ * ++ * @kctx: The kbase context for which the queue group is to be ++ * suspended. ++ * @sus_buf: Pointer to the structure which contains details of the ++ * user buffer and its kernel pinned pages. ++ * @group_handle: Handle for the group which uniquely identifies it within ++ * the context within which it was created. ++ * ++ * This function is used to suspend a queue group and copy the suspend buffer. ++ * ++ * Return: 0 on success or negative value if failed to suspend ++ * queue group and copy suspend buffer contents. ++ */ ++int kbase_csf_queue_group_suspend(struct kbase_context *kctx, ++ struct kbase_suspend_copy_buffer *sus_buf, u8 group_handle); ++#endif + -+ elf_memory_sections_data_offset += interface->num_pages * FW_PAGE_SIZE; -+ total_pages += interface->num_pages; -+ } ++/** ++ * kbase_csf_add_group_fatal_error - Report a fatal group error to userspace ++ * ++ * @group: GPU command queue group. ++ * @err_payload: Error payload to report. ++ */ ++void kbase_csf_add_group_fatal_error( ++ struct kbase_queue_group *const group, ++ struct base_gpu_queue_group_error const *const err_payload); + -+ /* Prepare PHDR of PT_NOTE type. */ -+ fw_core_dump_fill_elf_program_header_note(&phdr, elf_prstatus_offset, -+ elf_prstatus_note_size); -+ seq_write(m, &phdr, sizeof(struct elf32_phdr)); ++/** ++ * kbase_csf_interrupt - Handle interrupts issued by CSF firmware. ++ * ++ * @kbdev: The kbase device to handle an IRQ for ++ * @val: The value of JOB IRQ status register which triggered the interrupt ++ */ ++void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val); + -+ /* Prepare ELF note of PRSTATUS type. */ -+ if (fw_get_core_dump_mcu(kbdev, ®s)) -+ dev_dbg(kbdev->dev, "MCU Registers not available, all registers set to zero"); -+ /* Even if MCU Registers are not available the ELF prstatus is still -+ * filled with the registers equal to zero. -+ */ -+ fw_core_dump_fill_elf_prstatus(&elf_prs, ®s); -+ elf_prstatus_note_size = -+ fw_core_dump_create_prstatus_note("CORE", &elf_prs, &elf_prstatus_note); -+ if (elf_prstatus_note_size == 0) -+ return -ENOMEM; ++/** ++ * kbase_csf_doorbell_mapping_init - Initialize the fields that facilitates ++ * the update of userspace mapping of HW ++ * doorbell page. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * The function creates a file and allocates a dummy page to facilitate the ++ * update of userspace mapping to point to the dummy page instead of the real ++ * HW doorbell page after the suspend of queue group. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev); + -+ seq_write(m, elf_prstatus_note, elf_prstatus_note_size); -+ kfree(elf_prstatus_note); ++/** ++ * kbase_csf_doorbell_mapping_term - Free the dummy page & close the file used ++ * to update the userspace mapping of HW doorbell page ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev); + -+ /* Pad file to page size. */ -+ padding = kzalloc(padding_size, GFP_KERNEL); -+ seq_write(m, padding, padding_size); -+ kfree(padding); ++/** ++ * kbase_csf_setup_dummy_user_reg_page - Setup the dummy page that is accessed ++ * instead of the User register page after ++ * the GPU power down. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * The function allocates a dummy page which is used to replace the User ++ * register page in the userspace mapping after the power down of GPU. ++ * On the power up of GPU, the mapping is updated to point to the real ++ * User register page. The mapping is used to allow access to LATEST_FLUSH ++ * register from userspace. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev); + -+ return 0; -+} ++/** ++ * kbase_csf_free_dummy_user_reg_page - Free the dummy page that was used ++ * to replace the User register page ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev); + +/** -+ * fw_core_dump_create - Requests firmware to save state for a firmware core dump ++ * kbase_csf_ring_csg_doorbell - ring the doorbell for a CSG interface. ++ * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @slot: Index of CSG interface for ringing the door-bell. + * -+ * Return: 0 on success, error code otherwise. ++ * The function kicks a notification on the CSG interface to firmware. + */ -+static int fw_core_dump_create(struct kbase_device *kbdev) -+{ -+ int err; ++void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot); + -+ /* Ensure MCU is active before requesting the core dump. */ -+ kbase_csf_scheduler_pm_active(kbdev); -+ err = kbase_csf_scheduler_wait_mcu_active(kbdev); -+ if (!err) -+ err = kbase_csf_firmware_req_core_dump(kbdev); ++/** ++ * kbase_csf_ring_csg_slots_doorbell - ring the doorbell for a set of CSG ++ * interfaces. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @slot_bitmap: bitmap for the given slots, slot-0 on bit-0, etc. ++ * ++ * The function kicks a notification on a set of CSG interfaces to firmware. ++ */ ++void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, ++ u32 slot_bitmap); + -+ kbase_csf_scheduler_pm_idle(kbdev); ++/** ++ * kbase_csf_ring_cs_kernel_doorbell - ring the kernel doorbell for a CSI ++ * assigned to a GPU queue ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @csi_index: ID of the CSI assigned to the GPU queue. ++ * @csg_nr: Index of the CSG slot assigned to the queue ++ * group to which the GPU queue is bound. ++ * @ring_csg_doorbell: Flag to indicate if the CSG doorbell needs to be rung ++ * after updating the CSG_DB_REQ. So if this flag is false ++ * the doorbell interrupt will not be sent to FW. ++ * The flag is supposed be false only when the input page ++ * for bound GPU queues is programmed at the time of ++ * starting/resuming the group on a CSG slot. ++ * ++ * The function sends a doorbell interrupt notification to the firmware for ++ * a CSI assigned to a GPU queue. ++ */ ++void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, ++ int csi_index, int csg_nr, ++ bool ring_csg_doorbell); + -+ return err; -+} ++/** ++ * kbase_csf_ring_cs_user_doorbell - ring the user doorbell allocated for a ++ * queue. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @queue: Pointer to the queue for ringing the door-bell. ++ * ++ * The function kicks a notification to the firmware on the doorbell assigned ++ * to the queue. ++ */ ++void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev, ++ struct kbase_queue *queue); + +/** -+ * fw_core_dump_seq_start - seq_file start operation for firmware core dump file -+ * @m: the seq_file handle -+ * @_pos: holds the current position in pages -+ * (0 or most recent position used in previous session) ++ * kbase_csf_active_queue_groups_reset - Reset the state of all active GPU ++ * command queue groups associated with the context. + * -+ * Starts a seq_file session, positioning the iterator for the session to page @_pos - 1 -+ * within the firmware interface memory sections. @_pos value 0 is used to indicate the -+ * position of the ELF header at the start of the file. ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @kctx: The kbase context. + * -+ * Retrieves the necessary information via the struct fw_core_dump_data stored in -+ * the private member of the seq_file handle. ++ * This function will iterate through all the active/scheduled GPU command ++ * queue groups associated with the context, deschedule and mark them as ++ * terminated (which will then lead to unbinding of all the queues bound to ++ * them) and also no more work would be allowed to execute for them. + * -+ * Return: -+ * * iterator pointer - pointer to iterator struct fw_core_dump_seq_off -+ * * SEQ_START_TOKEN - special iterator pointer indicating its is the start of the file -+ * * NULL - iterator could not be allocated ++ * This is similar to the action taken in response to an unexpected OoM event. + */ -+static void *fw_core_dump_seq_start(struct seq_file *m, loff_t *_pos) -+{ -+ struct fw_core_dump_data *dump_data = m->private; -+ struct fw_core_dump_seq_off *data; -+ struct kbase_csf_firmware_interface *interface; -+ loff_t pos = *_pos; -+ -+ if (pos == 0) -+ return SEQ_START_TOKEN; -+ -+ /* Move iterator in the right position based on page number within -+ * available pages of firmware interface memory sections. -+ */ -+ pos--; /* ignore start token */ -+ list_for_each_entry(interface, &dump_data->kbdev->csf.firmware_interfaces, node) { -+ /* Skip memory sections that cannot be read or are protected. */ -+ if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) || -+ (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0) -+ continue; ++void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev, ++ struct kbase_context *kctx); + -+ if (pos >= interface->num_pages) { -+ pos -= interface->num_pages; -+ } else { -+ data = kmalloc(sizeof(*data), GFP_KERNEL); -+ if (!data) -+ return NULL; -+ data->interface = interface; -+ data->page_num = pos; -+ return data; -+ } -+ } ++/** ++ * kbase_csf_priority_check - Check the priority requested ++ * ++ * @kbdev: Device pointer ++ * @req_priority: Requested priority ++ * ++ * This will determine whether the requested priority can be satisfied. ++ * ++ * Return: The same or lower priority than requested. ++ */ ++u8 kbase_csf_priority_check(struct kbase_device *kbdev, u8 req_priority); + -+ return NULL; -+} ++extern const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT]; ++extern const u8 kbasep_csf_relative_to_queue_group_priority[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; + +/** -+ * fw_core_dump_seq_stop - seq_file stop operation for firmware core dump file -+ * @m: the seq_file handle -+ * @v: the current iterator (pointer to struct fw_core_dump_seq_off) ++ * kbase_csf_priority_relative_to_queue_group_priority - Convert relative to base priority + * -+ * Closes the current session and frees any memory related. ++ * @priority: kbase relative priority ++ * ++ * This will convert the monotonically increasing realtive priority to the ++ * fixed base priority list. ++ * ++ * Return: base_queue_group_priority priority. + */ -+static void fw_core_dump_seq_stop(struct seq_file *m, void *v) ++static inline u8 kbase_csf_priority_relative_to_queue_group_priority(u8 priority) +{ -+ kfree(v); ++ if (priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT) ++ priority = KBASE_QUEUE_GROUP_PRIORITY_LOW; ++ return kbasep_csf_relative_to_queue_group_priority[priority]; +} + +/** -+ * fw_core_dump_seq_next - seq_file next operation for firmware core dump file -+ * @m: the seq_file handle -+ * @v: the current iterator (pointer to struct fw_core_dump_seq_off) -+ * @pos: holds the current position in pages -+ * (0 or most recent position used in previous session) ++ * kbase_csf_priority_queue_group_priority_to_relative - Convert base priority to relative + * -+ * Moves the iterator @v forward to the next page within the firmware interface -+ * memory sections and returns the updated position in @pos. -+ * @v value SEQ_START_TOKEN indicates the ELF header position. ++ * @priority: base_queue_group_priority priority + * -+ * Return: -+ * * iterator pointer - pointer to iterator struct fw_core_dump_seq_off -+ * * NULL - iterator could not be allocated ++ * This will convert the fixed base priority list to monotonically increasing realtive priority. ++ * ++ * Return: kbase relative priority. + */ -+static void *fw_core_dump_seq_next(struct seq_file *m, void *v, loff_t *pos) ++static inline u8 kbase_csf_priority_queue_group_priority_to_relative(u8 priority) +{ -+ struct fw_core_dump_data *dump_data = m->private; -+ struct fw_core_dump_seq_off *data = v; -+ struct kbase_csf_firmware_interface *interface; -+ struct list_head *interfaces = &dump_data->kbdev->csf.firmware_interfaces; -+ -+ /* Is current position at the ELF header ? */ -+ if (v == SEQ_START_TOKEN) { -+ if (list_empty(interfaces)) -+ return NULL; -+ -+ /* Prepare iterator for starting at first page in firmware interface -+ * memory sections. -+ */ -+ data = kmalloc(sizeof(*data), GFP_KERNEL); -+ if (!data) -+ return NULL; -+ data->interface = -+ list_first_entry(interfaces, struct kbase_csf_firmware_interface, node); -+ data->page_num = 0; -+ ++*pos; -+ return data; -+ } -+ -+ /* First attempt to satisfy from current firmware interface memory section. */ -+ interface = data->interface; -+ if (data->page_num + 1 < interface->num_pages) { -+ data->page_num++; -+ ++*pos; -+ return data; -+ } -+ -+ /* Need next firmware interface memory section. This could be the last one. */ -+ if (list_is_last(&interface->node, interfaces)) { -+ kfree(data); -+ return NULL; -+ } -+ -+ /* Move to first page in next firmware interface memory section. */ -+ data->interface = list_next_entry(interface, node); -+ data->page_num = 0; -+ ++*pos; -+ -+ return data; ++ /* Apply low priority in case of invalid priority */ ++ if (priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT) ++ priority = BASE_QUEUE_GROUP_PRIORITY_LOW; ++ return kbasep_csf_queue_group_priority_to_relative[priority]; +} + +/** -+ * fw_core_dump_seq_show - seq_file show operation for firmware core dump file -+ * @m: the seq_file handle -+ * @v: the current iterator (pointer to struct fw_core_dump_seq_off) ++ * kbase_csf_ktrace_gpu_cycle_cnt - Wrapper to retreive the GPU cycle counter ++ * value for Ktrace purpose. + * -+ * Writes the current page in a firmware interface memory section indicated -+ * by the iterator @v to the file. If @v is SEQ_START_TOKEN the ELF -+ * header is written. ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * -+ * Return: 0 on success, error code otherwise. ++ * This function is just a wrapper to retreive the GPU cycle counter value, to ++ * avoid any overhead on Release builds where Ktrace is disabled by default. ++ * ++ * Return: Snapshot of the GPU cycle count register. + */ -+static int fw_core_dump_seq_show(struct seq_file *m, void *v) ++static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev) +{ -+ struct fw_core_dump_seq_off *data = v; -+ struct page *page; -+ u32 *p; -+ -+ /* Either write the ELF header or current page. */ -+ if (v == SEQ_START_TOKEN) -+ return fw_core_dump_write_elf_header(m); -+ -+ /* Write the current page. */ -+ page = as_page(data->interface->phys[data->page_num]); -+ p = kmap_atomic(page); -+ seq_write(m, p, FW_PAGE_SIZE); -+ kunmap_atomic(p); -+ ++#if KBASE_KTRACE_ENABLE ++ return kbase_backend_get_cycle_cnt(kbdev); ++#else + return 0; ++#endif +} + -+/* Sequence file operations for firmware core dump file. */ -+static const struct seq_operations fw_core_dump_seq_ops = { -+ .start = fw_core_dump_seq_start, -+ .next = fw_core_dump_seq_next, -+ .stop = fw_core_dump_seq_stop, -+ .show = fw_core_dump_seq_show, -+}; -+ -+/** -+ * fw_core_dump_debugfs_open - callback for opening the 'fw_core_dump' debugfs file -+ * @inode: inode of the file -+ * @file: file pointer ++#endif /* _KBASE_CSF_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.c +new file mode 100644 +index 000000000..516a33ff7 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.c +@@ -0,0 +1,191 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * -+ * Prepares for servicing a write request to request a core dump from firmware and -+ * a read request to retrieve the core dump. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * -+ * Returns an error if the firmware is not initialized yet. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: 0 on success, error code otherwise. + */ -+static int fw_core_dump_debugfs_open(struct inode *inode, struct file *file) -+{ -+ struct kbase_device *const kbdev = inode->i_private; -+ struct fw_core_dump_data *dump_data; -+ int ret; + -+ /* Fail if firmware is not initialized yet. */ -+ if (!kbdev->csf.firmware_inited) { -+ ret = -ENODEV; -+ goto open_fail; -+ } ++#include "mali_kbase_csf_cpu_queue_debugfs.h" ++#include ++#include + -+ /* Open a sequence file for iterating through the pages in the -+ * firmware interface memory pages. seq_open stores a -+ * struct seq_file * in the private_data field of @file. -+ */ -+ ret = seq_open(file, &fw_core_dump_seq_ops); -+ if (ret) -+ goto open_fail; ++#if IS_ENABLED(CONFIG_DEBUG_FS) + -+ /* Allocate a context for sequence file operations. */ -+ dump_data = kmalloc(sizeof(*dump_data), GFP_KERNEL); -+ if (!dump_data) { -+ ret = -ENOMEM; -+ goto out; ++bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx, ++ struct base_csf_notification *req) ++{ ++ if (atomic_cmpxchg(&kctx->csf.cpu_queue.dump_req_status, ++ BASE_CSF_CPU_QUEUE_DUMP_ISSUED, ++ BASE_CSF_CPU_QUEUE_DUMP_PENDING) != ++ BASE_CSF_CPU_QUEUE_DUMP_ISSUED) { ++ return false; + } + -+ /* Kbase device will be shared with sequence file operations. */ -+ dump_data->kbdev = kbdev; -+ -+ /* Link our sequence file context. */ -+ ((struct seq_file *)file->private_data)->private = dump_data; -+ -+ return 0; -+out: -+ seq_release(inode, file); -+open_fail: -+ return ret; ++ req->type = BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP; ++ return true; +} + +/** -+ * fw_core_dump_debugfs_write - callback for a write to the 'fw_core_dump' debugfs file -+ * @file: file pointer -+ * @ubuf: user buffer containing data to store -+ * @count: number of bytes in user buffer -+ * @ppos: file position ++ * kbasep_csf_cpu_queue_debugfs_show() - Print cpu queue information for per context + * -+ * Any data written to the file triggers a firmware core dump request which -+ * subsequently can be retrieved by reading from the file. ++ * @file: The seq_file for printing to ++ * @data: The debugfs dentry private data, a pointer to kbase_context + * -+ * Return: @count if the function succeeded. An error code on failure. ++ * Return: Negative error code or 0 on success. + */ -+static ssize_t fw_core_dump_debugfs_write(struct file *file, const char __user *ubuf, size_t count, -+ loff_t *ppos) ++static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data) +{ -+ int err; -+ struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private; -+ struct kbase_device *const kbdev = dump_data->kbdev; ++ struct kbase_context *kctx = file->private; + -+ CSTD_UNUSED(ppos); ++ mutex_lock(&kctx->csf.lock); ++ if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) != ++ BASE_CSF_CPU_QUEUE_DUMP_COMPLETE) { ++ seq_puts(file, "Dump request already started! (try again)\n"); ++ mutex_unlock(&kctx->csf.lock); ++ return -EBUSY; ++ } + -+ err = fw_core_dump_create(kbdev); ++ atomic_set(&kctx->csf.cpu_queue.dump_req_status, BASE_CSF_CPU_QUEUE_DUMP_ISSUED); ++ init_completion(&kctx->csf.cpu_queue.dump_cmp); ++ kbase_event_wakeup(kctx); ++ mutex_unlock(&kctx->csf.lock); + -+ return err ? err : count; -+} ++ seq_puts(file, ++ "CPU Queues table (version:v" __stringify(MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION) "):\n"); + -+/** -+ * fw_core_dump_debugfs_release - callback for releasing the 'fw_core_dump' debugfs file -+ * @inode: inode of the file -+ * @file: file pointer -+ * -+ * Return: 0 on success, error code otherwise. -+ */ -+static int fw_core_dump_debugfs_release(struct inode *inode, struct file *file) -+{ -+ struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private; ++ wait_for_completion_timeout(&kctx->csf.cpu_queue.dump_cmp, ++ msecs_to_jiffies(3000)); + -+ seq_release(inode, file); ++ mutex_lock(&kctx->csf.lock); ++ if (kctx->csf.cpu_queue.buffer) { ++ WARN_ON(atomic_read(&kctx->csf.cpu_queue.dump_req_status) != ++ BASE_CSF_CPU_QUEUE_DUMP_PENDING); + -+ kfree(dump_data); ++ seq_printf(file, "%s\n", kctx->csf.cpu_queue.buffer); ++ ++ kfree(kctx->csf.cpu_queue.buffer); ++ kctx->csf.cpu_queue.buffer = NULL; ++ kctx->csf.cpu_queue.buffer_size = 0; ++ } else ++ seq_puts(file, "Dump error! (time out)\n"); ++ ++ atomic_set(&kctx->csf.cpu_queue.dump_req_status, ++ BASE_CSF_CPU_QUEUE_DUMP_COMPLETE); + ++ mutex_unlock(&kctx->csf.lock); + return 0; +} -+/* Debugfs file operations for firmware core dump file. */ -+static const struct file_operations kbase_csf_fw_core_dump_fops = { -+ .owner = THIS_MODULE, -+ .open = fw_core_dump_debugfs_open, ++ ++static int kbasep_csf_cpu_queue_debugfs_open(struct inode *in, struct file *file) ++{ ++ return single_open(file, kbasep_csf_cpu_queue_debugfs_show, in->i_private); ++} ++ ++static const struct file_operations kbasep_csf_cpu_queue_debugfs_fops = { ++ .open = kbasep_csf_cpu_queue_debugfs_open, + .read = seq_read, -+ .write = fw_core_dump_debugfs_write, + .llseek = seq_lseek, -+ .release = fw_core_dump_debugfs_release, ++ .release = single_release, +}; + -+void kbase_csf_firmware_core_dump_init(struct kbase_device *const kbdev) ++void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx) +{ -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ debugfs_create_file("fw_core_dump", 0600, kbdev->mali_debugfs_directory, kbdev, -+ &kbase_csf_fw_core_dump_fops); -+#endif /* CONFIG_DEBUG_FS */ ++ struct dentry *file; ++ ++ if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) ++ return; ++ ++ file = debugfs_create_file("cpu_queue", 0444, kctx->kctx_dentry, ++ kctx, &kbasep_csf_cpu_queue_debugfs_fops); ++ ++ if (IS_ERR_OR_NULL(file)) { ++ dev_warn(kctx->kbdev->dev, ++ "Unable to create cpu queue debugfs entry"); ++ } ++ ++ kctx->csf.cpu_queue.buffer = NULL; ++ kctx->csf.cpu_queue.buffer_size = 0; ++ atomic_set(&kctx->csf.cpu_queue.dump_req_status, ++ BASE_CSF_CPU_QUEUE_DUMP_COMPLETE); +} + -+int kbase_csf_firmware_core_dump_entry_parse(struct kbase_device *kbdev, const u32 *entry) ++int kbase_csf_cpu_queue_dump(struct kbase_context *kctx, ++ u64 buffer, size_t buf_size) +{ -+ /* Casting to u16 as version is defined by bits 15:0 */ -+ kbdev->csf.fw_core_dump.version = (u16)entry[FW_CORE_DUMP_VERSION_INDEX]; ++ int err = 0; + -+ if (kbdev->csf.fw_core_dump.version != FW_CORE_DUMP_DATA_VERSION) -+ return -EPERM; ++ size_t alloc_size = buf_size; ++ char *dump_buffer; + -+ kbdev->csf.fw_core_dump.mcu_regs_addr = entry[FW_CORE_DUMP_START_ADDR_INDEX]; -+ kbdev->csf.fw_core_dump.available = true; ++ if (!buffer || !alloc_size) ++ goto done; ++ ++ alloc_size = (alloc_size + PAGE_SIZE) & ~(PAGE_SIZE - 1); ++ dump_buffer = kzalloc(alloc_size, GFP_KERNEL); ++ if (ZERO_OR_NULL_PTR(dump_buffer)) { ++ err = -ENOMEM; ++ goto done; ++ } ++ ++ WARN_ON(kctx->csf.cpu_queue.buffer != NULL); ++ ++ err = copy_from_user(dump_buffer, ++ u64_to_user_ptr(buffer), ++ buf_size); ++ if (err) { ++ kfree(dump_buffer); ++ err = -EFAULT; ++ goto done; ++ } ++ ++ mutex_lock(&kctx->csf.lock); ++ ++ kfree(kctx->csf.cpu_queue.buffer); ++ ++ if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) == ++ BASE_CSF_CPU_QUEUE_DUMP_PENDING) { ++ kctx->csf.cpu_queue.buffer = dump_buffer; ++ kctx->csf.cpu_queue.buffer_size = buf_size; ++ complete_all(&kctx->csf.cpu_queue.dump_cmp); ++ } else { ++ kfree(dump_buffer); ++ } ++ ++ mutex_unlock(&kctx->csf.lock); ++done: ++ return err; ++} ++#else ++/* ++ * Stub functions for when debugfs is disabled ++ */ ++void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx) ++{ ++} ++ ++bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx, ++ struct base_csf_notification *req) ++{ ++ return false; ++} + ++int kbase_csf_cpu_queue_dump(struct kbase_context *kctx, ++ u64 buffer, size_t buf_size) ++{ + return 0; +} -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.h ++#endif /* CONFIG_DEBUG_FS */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.h new file mode 100644 -index 000000000..0537dca4f +index 000000000..435a99395 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.h -@@ -0,0 +1,65 @@ ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.h +@@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -151522,60 +152289,85 @@ index 000000000..0537dca4f + * + */ + -+#ifndef _KBASE_CSF_FIRMWARE_CORE_DUMP_H_ -+#define _KBASE_CSF_FIRMWARE_CORE_DUMP_H_ ++#ifndef _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_ ++#define _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_ + -+struct kbase_device; ++#include ++#include ++ ++#include "mali_kbase.h" ++ ++/* Forward declaration */ ++struct base_csf_notification; ++ ++#define MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION 0 ++ ++/* CPU queue dump status */ ++/* Dumping is done or no dumping is in progress. */ ++#define BASE_CSF_CPU_QUEUE_DUMP_COMPLETE 0 ++/* Dumping request is pending. */ ++#define BASE_CSF_CPU_QUEUE_DUMP_PENDING 1 ++/* Dumping request is issued to Userspace */ ++#define BASE_CSF_CPU_QUEUE_DUMP_ISSUED 2 + -+/** Offset of the last field of core dump entry from the image header */ -+#define CORE_DUMP_ENTRY_START_ADDR_OFFSET (0x4) + +/** -+ * kbase_csf_firmware_core_dump_entry_parse() - Parse a "core dump" entry from -+ * the image header. ++ * kbase_csf_cpu_queue_debugfs_init() - Create a debugfs entry for per context cpu queue(s) + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @entry: Pointer to section. ++ * @kctx: The kbase_context for which to create the debugfs entry ++ */ ++void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx); ++ ++/** ++ * kbase_csf_cpu_queue_read_dump_req - Read cpu queue dump request event + * -+ * Read a "core dump" entry from the image header, check the version for -+ * compatibility and store the address pointer. ++ * @kctx: The kbase_context which cpu queue dumpped belongs to ++ * @req: Notification with cpu queue dump request. + * -+ * Return: 0 if successfully parse entry, negative error code otherwise. ++ * Return: true if needs CPU queue dump, or false otherwise. + */ -+int kbase_csf_firmware_core_dump_entry_parse(struct kbase_device *kbdev, const u32 *entry); ++bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx, ++ struct base_csf_notification *req); + +/** -+ * kbase_csf_firmware_core_dump_init() - Initialize firmware core dump support -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * Must be zero-initialized. ++ * kbase_csf_cpu_queue_dump_needed - Check the requirement for cpu queue dump + * -+ * Creates the fw_core_dump debugfs file through which to request a firmware -+ * core dump. The created debugfs file is cleaned up as part of kbdev debugfs -+ * cleanup. ++ * @kctx: The kbase_context which cpu queue dumpped belongs to + * -+ * The fw_core_dump debugs file that case be used in the following way: ++ * Return: true if it needs cpu queue dump, or false otherwise. ++ */ ++static inline bool kbase_csf_cpu_queue_dump_needed(struct kbase_context *kctx) ++{ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ return (atomic_read(&kctx->csf.cpu_queue.dump_req_status) == ++ BASE_CSF_CPU_QUEUE_DUMP_ISSUED); ++#else ++ return false; ++#endif ++} ++ ++/** ++ * kbase_csf_cpu_queue_dump - dump buffer containing cpu queue information to debugfs + * -+ * To explicitly request core dump: -+ * echo 1 >/sys/kernel/debug/mali0/fw_core_dump ++ * @kctx: The kbase_context which cpu queue dumpped belongs to ++ * @buffer: Buffer containing the cpu queue information. ++ * @buf_size: Buffer size. + * -+ * To output current core dump (after explicitly requesting a core dump, or -+ * kernel driver reported an internal firmware error): -+ * cat /sys/kernel/debug/mali0/fw_core_dump ++ * Return: Return 0 for dump successfully, or error code. + */ -+void kbase_csf_firmware_core_dump_init(struct kbase_device *const kbdev); -+ -+#endif /* _KBASE_CSF_FIRMWARE_CORE_DUMP_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c ++int kbase_csf_cpu_queue_dump(struct kbase_context *kctx, ++ u64 buffer, size_t buf_size); ++#endif /* _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c new file mode 100644 -index 000000000..6e0d3c2f5 +index 000000000..e96044ae6 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c -@@ -0,0 +1,451 @@ ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c +@@ -0,0 +1,767 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -151593,5120 +152385,6092 @@ index 000000000..6e0d3c2f5 + * + */ + ++#include "mali_kbase_csf_csg_debugfs.h" +#include -+#include "backend/gpu/mali_kbase_pm_internal.h" -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address. -+ */ -+#define ARMV7_T1_BL_IMM_INSTR 0xd800f000 ++#include ++#include ++#include + -+/* -+ * ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address, maximum -+ * negative jump offset. -+ */ -+#define ARMV7_T1_BL_IMM_RANGE_MIN -16777216 ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++#include "mali_kbase_csf_tl_reader.h" + -+/* -+ * ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address, maximum -+ * positive jump offset. ++/* Wait time to be used cumulatively for all the CSG slots. ++ * Since scheduler lock is held when STATUS_UPDATE request is sent, there won't be ++ * any other Host request pending on the FW side and usually FW would be responsive ++ * to the Doorbell IRQs as it won't do any polling for a long time and also it won't ++ * have to wait for any HW state transition to complete for publishing the status. ++ * So it is reasonable to expect that handling of STATUS_UPDATE request would be ++ * relatively very quick. + */ -+#define ARMV7_T1_BL_IMM_RANGE_MAX 16777214 ++#define STATUS_UPDATE_WAIT_TIMEOUT 500 + -+/* -+ * ARMv7 instruction: Double NOP instructions. ++/* The bitmask of CSG slots for which the STATUS_UPDATE request completed. ++ * The access to it is serialized with scheduler lock, so at a time it would ++ * get used either for "active_groups" or per context "groups" debugfs file. + */ -+#define ARMV7_DOUBLE_NOP_INSTR 0xbf00bf00 -+ -+#if defined(CONFIG_DEBUG_FS) ++static DECLARE_BITMAP(csg_slots_status_updated, MAX_SUPPORTED_CSGS); + -+static int kbase_csf_firmware_log_enable_mask_read(void *data, u64 *val) ++static ++bool csg_slot_status_update_finish(struct kbase_device *kbdev, u32 csg_nr) +{ -+ struct kbase_device *kbdev = (struct kbase_device *)data; -+ struct firmware_trace_buffer *tb = -+ kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); ++ struct kbase_csf_cmd_stream_group_info const *const ginfo = ++ &kbdev->csf.global_iface.groups[csg_nr]; + -+ if (tb == NULL) { -+ dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); -+ return -EIO; -+ } -+ /* The enabled traces limited to u64 here, regarded practical */ -+ *val = kbase_csf_firmware_trace_buffer_get_active_mask64(tb); -+ return 0; ++ return !((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^ ++ kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) & ++ CSG_REQ_STATUS_UPDATE_MASK); +} + -+static int kbase_csf_firmware_log_enable_mask_write(void *data, u64 val) ++static ++bool csg_slots_status_update_finish(struct kbase_device *kbdev, ++ const unsigned long *slots_mask) +{ -+ struct kbase_device *kbdev = (struct kbase_device *)data; -+ struct firmware_trace_buffer *tb = -+ kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); -+ u64 new_mask; -+ unsigned int enable_bits_count; ++ const u32 max_csg_slots = kbdev->csf.global_iface.group_num; ++ bool changed = false; ++ u32 csg_nr; + -+ if (tb == NULL) { -+ dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); -+ return -EIO; -+ } ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ /* Ignore unsupported types */ -+ enable_bits_count = kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count(tb); -+ if (enable_bits_count > 64) { -+ dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64", enable_bits_count); -+ enable_bits_count = 64; ++ for_each_set_bit(csg_nr, slots_mask, max_csg_slots) { ++ if (csg_slot_status_update_finish(kbdev, csg_nr)) { ++ set_bit(csg_nr, csg_slots_status_updated); ++ changed = true; ++ } + } -+ new_mask = val & (UINT64_MAX >> (64 - enable_bits_count)); + -+ if (new_mask != kbase_csf_firmware_trace_buffer_get_active_mask64(tb)) -+ return kbase_csf_firmware_trace_buffer_set_active_mask64(tb, new_mask); -+ else -+ return 0; ++ return changed; +} + -+static int kbasep_csf_firmware_log_debugfs_open(struct inode *in, struct file *file) ++static void wait_csg_slots_status_update_finish(struct kbase_device *kbdev, ++ unsigned long *slots_mask) +{ -+ struct kbase_device *kbdev = in->i_private; ++ const u32 max_csg_slots = kbdev->csf.global_iface.group_num; ++ long remaining = kbase_csf_timeout_in_jiffies(STATUS_UPDATE_WAIT_TIMEOUT); + -+ file->private_data = kbdev; -+ dev_dbg(kbdev->dev, "Opened firmware trace buffer dump debugfs file"); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ return 0; ++ bitmap_zero(csg_slots_status_updated, max_csg_slots); ++ ++ while (!bitmap_empty(slots_mask, max_csg_slots) && remaining) { ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ csg_slots_status_update_finish(kbdev, slots_mask), ++ remaining); ++ if (likely(remaining)) { ++ bitmap_andnot(slots_mask, slots_mask, ++ csg_slots_status_updated, max_csg_slots); ++ } else { ++ dev_warn(kbdev->dev, ++ "STATUS_UPDATE request timed out for slots 0x%lx", ++ slots_mask[0]); ++ } ++ } +} + -+static ssize_t kbasep_csf_firmware_log_debugfs_read(struct file *file, char __user *buf, -+ size_t size, loff_t *ppos) ++void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev = file->private_data; -+ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; -+ unsigned int n_read; -+ unsigned long not_copied; -+ /* Limit reads to the kernel dump buffer size */ -+ size_t mem = MIN(size, FIRMWARE_LOG_DUMP_BUF_SIZE); -+ int ret; ++ u32 max_csg_slots = kbdev->csf.global_iface.group_num; ++ DECLARE_BITMAP(used_csgs, MAX_SUPPORTED_CSGS) = { 0 }; ++ u32 csg_nr; ++ unsigned long flags; + -+ struct firmware_trace_buffer *tb = -+ kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ if (tb == NULL) { -+ dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); -+ return -EIO; ++ /* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell ++ * ring for Extract offset update, shall not be made when MCU has been ++ * put to sleep otherwise it will undesirably make MCU exit the sleep ++ * state. Also it isn't really needed as FW will implicitly update the ++ * status of all on-slot groups when MCU sleep request is sent to it. ++ */ ++ if (kbdev->csf.scheduler.state == SCHED_SLEEPING) { ++ /* Wait for the MCU sleep request to complete. */ ++ kbase_pm_wait_for_desired_state(kbdev); ++ bitmap_copy(csg_slots_status_updated, ++ kbdev->csf.scheduler.csg_inuse_bitmap, max_csg_slots); ++ return; + } + -+ if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0) -+ return -EBUSY; -+ -+ /* Reading from userspace is only allowed in manual mode */ -+ if (fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL) { -+ ret = -EINVAL; -+ goto out; ++ for (csg_nr = 0; csg_nr < max_csg_slots; csg_nr++) { ++ struct kbase_queue_group *const group = ++ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; ++ if (!group) ++ continue; ++ /* Ring the User doorbell for FW to update the Extract offset */ ++ kbase_csf_ring_doorbell(kbdev, group->doorbell_nr); ++ set_bit(csg_nr, used_csgs); + } + -+ n_read = kbase_csf_firmware_trace_buffer_read_data(tb, fw_log->dump_buf, mem); -+ -+ /* Do the copy, if we have obtained some trace data */ -+ not_copied = (n_read) ? copy_to_user(buf, fw_log->dump_buf, n_read) : 0; ++ /* Return early if there are no on-slot groups */ ++ if (bitmap_empty(used_csgs, max_csg_slots)) ++ return; + -+ if (not_copied) { -+ dev_err(kbdev->dev, "Couldn't copy trace buffer data to user space buffer"); -+ ret = -EFAULT; -+ goto out; ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ for_each_set_bit(csg_nr, used_csgs, max_csg_slots) { ++ struct kbase_csf_cmd_stream_group_info const *const ginfo = ++ &kbdev->csf.global_iface.groups[csg_nr]; ++ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ++ ~kbase_csf_firmware_csg_output(ginfo, CSG_ACK), ++ CSG_REQ_STATUS_UPDATE_MASK); + } + -+ *ppos += n_read; -+ ret = n_read; -+ -+out: -+ atomic_set(&fw_log->busy, 0); -+ return ret; -+} -+ -+static int kbase_csf_firmware_log_mode_read(void *data, u64 *val) -+{ -+ struct kbase_device *kbdev = (struct kbase_device *)data; -+ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; -+ -+ *val = fw_log->mode; -+ return 0; ++ BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(used_csgs[0]) * BITS_PER_BYTE)); ++ kbase_csf_ring_csg_slots_doorbell(kbdev, used_csgs[0]); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ wait_csg_slots_status_update_finish(kbdev, used_csgs); ++ /* Wait for the User doobell ring to take effect */ ++ msleep(100); +} + -+static int kbase_csf_firmware_log_mode_write(void *data, u64 val) ++#define MAX_SCHED_STATE_STRING_LEN (16) ++static const char *scheduler_state_to_string(struct kbase_device *kbdev, ++ enum kbase_csf_scheduler_state sched_state) +{ -+ struct kbase_device *kbdev = (struct kbase_device *)data; -+ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; -+ int ret = 0; -+ -+ if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0) -+ return -EBUSY; -+ -+ if (val == fw_log->mode) -+ goto out; -+ -+ switch (val) { -+ case KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL: -+ cancel_delayed_work_sync(&fw_log->poll_work); -+ break; -+ case KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT: -+ schedule_delayed_work(&fw_log->poll_work, -+ msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS)); -+ break; ++ switch (sched_state) { ++ case SCHED_BUSY: ++ return "BUSY"; ++ case SCHED_INACTIVE: ++ return "INACTIVE"; ++ case SCHED_SUSPENDED: ++ return "SUSPENDED"; ++#ifdef KBASE_PM_RUNTIME ++ case SCHED_SLEEPING: ++ return "SLEEPING"; ++#endif + default: -+ ret = -EINVAL; -+ goto out; ++ dev_warn(kbdev->dev, "Unknown Scheduler state %d", sched_state); ++ return NULL; + } -+ -+ fw_log->mode = val; -+ -+out: -+ atomic_set(&fw_log->busy, 0); -+ return ret; +} + -+DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_enable_mask_fops, -+ kbase_csf_firmware_log_enable_mask_read, -+ kbase_csf_firmware_log_enable_mask_write, "%llx\n"); -+ -+static const struct file_operations kbasep_csf_firmware_log_debugfs_fops = { -+ .owner = THIS_MODULE, -+ .open = kbasep_csf_firmware_log_debugfs_open, -+ .read = kbasep_csf_firmware_log_debugfs_read, -+ .llseek = no_llseek, -+}; ++/** ++ * blocked_reason_to_string() - Convert blocking reason id to a string ++ * ++ * @reason_id: blocked_reason ++ * ++ * Return: Suitable string ++ */ ++static const char *blocked_reason_to_string(u32 reason_id) ++{ ++ /* possible blocking reasons of a cs */ ++ static const char *const cs_blocked_reason[] = { ++ [CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED] = "UNBLOCKED", ++ [CS_STATUS_BLOCKED_REASON_REASON_WAIT] = "WAIT", ++ [CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT] = ++ "PROGRESS_WAIT", ++ [CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT] = "SYNC_WAIT", ++ [CS_STATUS_BLOCKED_REASON_REASON_DEFERRED] = "DEFERRED", ++ [CS_STATUS_BLOCKED_REASON_REASON_RESOURCE] = "RESOURCE", ++ [CS_STATUS_BLOCKED_REASON_REASON_FLUSH] = "FLUSH" ++ }; + -+DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_mode_fops, kbase_csf_firmware_log_mode_read, -+ kbase_csf_firmware_log_mode_write, "%llu\n"); ++ if (WARN_ON(reason_id >= ARRAY_SIZE(cs_blocked_reason))) ++ return "UNKNOWN_BLOCKED_REASON_ID"; + -+#endif /* CONFIG_DEBUG_FS */ ++ return cs_blocked_reason[reason_id]; ++} + -+static void kbase_csf_firmware_log_poll(struct work_struct *work) ++static bool sb_source_supported(u32 glb_version) +{ -+ struct kbase_device *kbdev = -+ container_of(work, struct kbase_device, csf.fw_log.poll_work.work); -+ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; ++ bool supported = false; + -+ schedule_delayed_work(&fw_log->poll_work, -+ msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS)); ++ if (((GLB_VERSION_MAJOR_GET(glb_version) == 3) && ++ (GLB_VERSION_MINOR_GET(glb_version) >= 5)) || ++ ((GLB_VERSION_MAJOR_GET(glb_version) == 2) && ++ (GLB_VERSION_MINOR_GET(glb_version) >= 6)) || ++ ((GLB_VERSION_MAJOR_GET(glb_version) == 1) && ++ (GLB_VERSION_MINOR_GET(glb_version) >= 3))) ++ supported = true; + -+ kbase_csf_firmware_log_dump_buffer(kbdev); ++ return supported; +} + -+int kbase_csf_firmware_log_init(struct kbase_device *kbdev) ++static void kbasep_csf_scheduler_dump_active_queue_cs_status_wait( ++ struct seq_file *file, u32 glb_version, u32 wait_status, u32 wait_sync_value, ++ u64 wait_sync_live_value, u64 wait_sync_pointer, u32 sb_status, u32 blocked_reason) +{ -+ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; -+ -+ /* Add one byte for null-termination */ -+ fw_log->dump_buf = kmalloc(FIRMWARE_LOG_DUMP_BUF_SIZE + 1, GFP_KERNEL); -+ if (fw_log->dump_buf == NULL) -+ return -ENOMEM; ++#define WAITING "Waiting" ++#define NOT_WAITING "Not waiting" + -+ /* Ensure null-termination for all strings */ -+ fw_log->dump_buf[FIRMWARE_LOG_DUMP_BUF_SIZE] = 0; ++ seq_printf(file, "SB_MASK: %d\n", ++ CS_STATUS_WAIT_SB_MASK_GET(wait_status)); ++ if (sb_source_supported(glb_version)) ++ seq_printf(file, "SB_SOURCE: %d\n", CS_STATUS_WAIT_SB_SOURCE_GET(wait_status)); ++ seq_printf(file, "PROGRESS_WAIT: %s\n", ++ CS_STATUS_WAIT_PROGRESS_WAIT_GET(wait_status) ? ++ WAITING : NOT_WAITING); ++ seq_printf(file, "PROTM_PEND: %s\n", ++ CS_STATUS_WAIT_PROTM_PEND_GET(wait_status) ? ++ WAITING : NOT_WAITING); ++ seq_printf(file, "SYNC_WAIT: %s\n", ++ CS_STATUS_WAIT_SYNC_WAIT_GET(wait_status) ? ++ WAITING : NOT_WAITING); ++ seq_printf(file, "WAIT_CONDITION: %s\n", ++ CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(wait_status) ? ++ "greater than" : "less or equal"); ++ seq_printf(file, "SYNC_POINTER: 0x%llx\n", wait_sync_pointer); ++ seq_printf(file, "SYNC_VALUE: %d\n", wait_sync_value); ++ seq_printf(file, "SYNC_LIVE_VALUE: 0x%016llx\n", wait_sync_live_value); ++ seq_printf(file, "SB_STATUS: %u\n", ++ CS_STATUS_SCOREBOARDS_NONZERO_GET(sb_status)); ++ seq_printf(file, "BLOCKED_REASON: %s\n", ++ blocked_reason_to_string(CS_STATUS_BLOCKED_REASON_REASON_GET( ++ blocked_reason))); ++} + -+ fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL; ++static void kbasep_csf_scheduler_dump_active_cs_trace(struct seq_file *file, ++ struct kbase_csf_cmd_stream_info const *const stream) ++{ ++ u32 val = kbase_csf_firmware_cs_input_read(stream, ++ CS_INSTR_BUFFER_BASE_LO); ++ u64 addr = ((u64)kbase_csf_firmware_cs_input_read(stream, ++ CS_INSTR_BUFFER_BASE_HI) << 32) | val; ++ val = kbase_csf_firmware_cs_input_read(stream, ++ CS_INSTR_BUFFER_SIZE); + -+ atomic_set(&fw_log->busy, 0); -+ INIT_DEFERRABLE_WORK(&fw_log->poll_work, kbase_csf_firmware_log_poll); ++ seq_printf(file, "CS_TRACE_BUF_ADDR: 0x%16llx, SIZE: %u\n", addr, val); + -+#if defined(CONFIG_DEBUG_FS) -+ debugfs_create_file("fw_trace_enable_mask", 0644, kbdev->mali_debugfs_directory, kbdev, -+ &kbase_csf_firmware_log_enable_mask_fops); -+ debugfs_create_file("fw_traces", 0444, kbdev->mali_debugfs_directory, kbdev, -+ &kbasep_csf_firmware_log_debugfs_fops); -+ debugfs_create_file("fw_trace_mode", 0644, kbdev->mali_debugfs_directory, kbdev, -+ &kbase_csf_firmware_log_mode_fops); -+#endif /* CONFIG_DEBUG_FS */ ++ /* Write offset variable address (pointer) */ ++ val = kbase_csf_firmware_cs_input_read(stream, ++ CS_INSTR_BUFFER_OFFSET_POINTER_LO); ++ addr = ((u64)kbase_csf_firmware_cs_input_read(stream, ++ CS_INSTR_BUFFER_OFFSET_POINTER_HI) << 32) | val; ++ seq_printf(file, "CS_TRACE_BUF_OFFSET_PTR: 0x%16llx\n", addr); + -+ return 0; ++ /* EVENT_SIZE and EVENT_STATEs */ ++ val = kbase_csf_firmware_cs_input_read(stream, CS_INSTR_CONFIG); ++ seq_printf(file, "TRACE_EVENT_SIZE: 0x%x, TRACE_EVENT_STAES 0x%x\n", ++ CS_INSTR_CONFIG_EVENT_SIZE_GET(val), ++ CS_INSTR_CONFIG_EVENT_STATE_GET(val)); +} + -+void kbase_csf_firmware_log_term(struct kbase_device *kbdev) ++/** ++ * kbasep_csf_scheduler_dump_active_queue() - Print GPU command queue ++ * debug information ++ * ++ * @file: seq_file for printing to ++ * @queue: Address of a GPU command queue to examine ++ */ ++static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file, ++ struct kbase_queue *queue) +{ -+ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; ++ u32 *addr; ++ u64 cs_extract; ++ u64 cs_insert; ++ u32 cs_active; ++ u64 wait_sync_pointer; ++ u32 wait_status, wait_sync_value; ++ u32 sb_status; ++ u32 blocked_reason; ++ struct kbase_vmap_struct *mapping; ++ u64 *evt; ++ u64 wait_sync_live_value; ++ u32 glb_version; + -+ if (fw_log->dump_buf) { -+ cancel_delayed_work_sync(&fw_log->poll_work); -+ kfree(fw_log->dump_buf); -+ fw_log->dump_buf = NULL; -+ } -+} ++ if (!queue) ++ return; + -+void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; -+ u8 *buf = fw_log->dump_buf, *p, *pnewline, *pend, *pendbuf; -+ unsigned int read_size, remaining_size; -+ struct firmware_trace_buffer *tb = -+ kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); ++ glb_version = queue->kctx->kbdev->csf.global_iface.version; + -+ if (tb == NULL) { -+ dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped"); ++ if (WARN_ON(queue->csi_index == KBASEP_IF_NR_INVALID || ++ !queue->group)) + return; -+ } + -+ if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0) -+ return; ++ addr = (u32 *)queue->user_io_addr; ++ cs_insert = addr[CS_INSERT_LO/4] | ((u64)addr[CS_INSERT_HI/4] << 32); + -+ /* FW should only print complete messages, so there's no need to handle -+ * partial messages over multiple invocations of this function -+ */ ++ addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); ++ cs_extract = addr[CS_EXTRACT_LO/4] | ((u64)addr[CS_EXTRACT_HI/4] << 32); ++ cs_active = addr[CS_ACTIVE/4]; + -+ p = buf; -+ pendbuf = &buf[FIRMWARE_LOG_DUMP_BUF_SIZE]; ++#define KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO \ ++ "Bind Idx, Ringbuf addr, Size, Prio, Insert offset, Extract offset, Active, Doorbell\n" + -+ while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, p, pendbuf - p))) { -+ pend = p + read_size; -+ p = buf; ++ seq_printf(file, KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO "%8d, %16llx, %8x, %4u, %16llx, %16llx, %6u, %8d\n", ++ queue->csi_index, queue->base_addr, ++ queue->size, ++ queue->priority, cs_insert, cs_extract, cs_active, queue->doorbell_nr); + -+ while (p < pend && (pnewline = memchr(p, '\n', pend - p))) { -+ /* Null-terminate the string */ -+ *pnewline = 0; ++ /* Print status information for blocked group waiting for sync object. For on-slot queues, ++ * if cs_trace is enabled, dump the interface's cs_trace configuration. ++ */ ++ if (kbase_csf_scheduler_group_get_slot(queue->group) < 0) { ++ seq_printf(file, "SAVED_CMD_PTR: 0x%llx\n", queue->saved_cmd_ptr); ++ if (CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) { ++ wait_status = queue->status_wait; ++ wait_sync_value = queue->sync_value; ++ wait_sync_pointer = queue->sync_ptr; ++ sb_status = queue->sb_status; ++ blocked_reason = queue->blocked_reason; + -+ dev_err(kbdev->dev, "FW> %s", p); ++ evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, wait_sync_pointer, &mapping); ++ if (evt) { ++ wait_sync_live_value = evt[0]; ++ kbase_phy_alloc_mapping_put(queue->kctx, mapping); ++ } else { ++ wait_sync_live_value = U64_MAX; ++ } + -+ p = pnewline + 1; ++ kbasep_csf_scheduler_dump_active_queue_cs_status_wait( ++ file, glb_version, wait_status, wait_sync_value, ++ wait_sync_live_value, wait_sync_pointer, sb_status, blocked_reason); + } ++ } else { ++ struct kbase_device const *const kbdev = ++ queue->group->kctx->kbdev; ++ struct kbase_csf_cmd_stream_group_info const *const ginfo = ++ &kbdev->csf.global_iface.groups[queue->group->csg_nr]; ++ struct kbase_csf_cmd_stream_info const *const stream = ++ &ginfo->streams[queue->csi_index]; ++ u64 cmd_ptr; ++ u32 req_res; + -+ remaining_size = pend - p; ++ if (WARN_ON(!stream)) ++ return; + -+ if (!remaining_size) { -+ p = buf; -+ } else if (remaining_size < FIRMWARE_LOG_DUMP_BUF_SIZE) { -+ /* Copy unfinished string to the start of the buffer */ -+ memmove(buf, p, remaining_size); -+ p = &buf[remaining_size]; ++ cmd_ptr = kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_CMD_PTR_LO); ++ cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_CMD_PTR_HI) << 32; ++ req_res = kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_REQ_RESOURCE); ++ ++ seq_printf(file, "CMD_PTR: 0x%llx\n", cmd_ptr); ++ seq_printf(file, "REQ_RESOURCE [COMPUTE]: %d\n", ++ CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_GET(req_res)); ++ seq_printf(file, "REQ_RESOURCE [FRAGMENT]: %d\n", ++ CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_GET(req_res)); ++ seq_printf(file, "REQ_RESOURCE [TILER]: %d\n", ++ CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_GET(req_res)); ++ seq_printf(file, "REQ_RESOURCE [IDVS]: %d\n", ++ CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_GET(req_res)); ++ ++ wait_status = kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_WAIT); ++ wait_sync_value = kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_WAIT_SYNC_VALUE); ++ wait_sync_pointer = kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_WAIT_SYNC_POINTER_LO); ++ wait_sync_pointer |= (u64)kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; ++ ++ sb_status = kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_SCOREBOARDS); ++ blocked_reason = kbase_csf_firmware_cs_output( ++ stream, CS_STATUS_BLOCKED_REASON); ++ ++ evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, wait_sync_pointer, &mapping); ++ if (evt) { ++ wait_sync_live_value = evt[0]; ++ kbase_phy_alloc_mapping_put(queue->kctx, mapping); + } else { -+ /* Print abnormally long string without newlines */ -+ dev_err(kbdev->dev, "FW> %s", buf); -+ p = buf; ++ wait_sync_live_value = U64_MAX; + } -+ } + -+ if (p != buf) { -+ /* Null-terminate and print last unfinished string */ -+ *p = 0; -+ dev_err(kbdev->dev, "FW> %s", buf); ++ kbasep_csf_scheduler_dump_active_queue_cs_status_wait( ++ file, glb_version, wait_status, wait_sync_value, wait_sync_live_value, ++ wait_sync_pointer, sb_status, blocked_reason); ++ /* Dealing with cs_trace */ ++ if (kbase_csf_scheduler_queue_has_trace(queue)) ++ kbasep_csf_scheduler_dump_active_cs_trace(file, stream); ++ else ++ seq_puts(file, "NO CS_TRACE\n"); + } + -+ atomic_set(&fw_log->busy, 0); -+} -+ -+void kbase_csf_firmware_log_parse_logging_call_list_entry(struct kbase_device *kbdev, -+ const uint32_t *entry) -+{ -+ kbdev->csf.fw_log.func_call_list_va_start = entry[0]; -+ kbdev->csf.fw_log.func_call_list_va_end = entry[1]; ++ seq_puts(file, "\n"); +} + -+/** -+ * toggle_logging_calls_in_loaded_image - Toggles FW log func calls in loaded FW image. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @enable: Whether to enable or disable the function calls. -+ */ -+static void toggle_logging_calls_in_loaded_image(struct kbase_device *kbdev, bool enable) ++static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file, ++ struct kbase_queue_group *const group) +{ -+ uint32_t bl_instruction, diff; -+ uint32_t imm11, imm10, i1, i2, j1, j2, sign; -+ uint32_t calling_address = 0, callee_address = 0; -+ uint32_t list_entry = kbdev->csf.fw_log.func_call_list_va_start; -+ const uint32_t list_va_end = kbdev->csf.fw_log.func_call_list_va_end; -+ -+ if (list_entry == 0 || list_va_end == 0) -+ return; -+ -+ if (enable) { -+ for (; list_entry < list_va_end; list_entry += 2 * sizeof(uint32_t)) { -+ /* Read calling address */ -+ kbase_csf_read_firmware_memory(kbdev, list_entry, &calling_address); -+ /* Read callee address */ -+ kbase_csf_read_firmware_memory(kbdev, list_entry + sizeof(uint32_t), -+ &callee_address); ++ if (kbase_csf_scheduler_group_get_slot(group) >= 0) { ++ struct kbase_device *const kbdev = group->kctx->kbdev; ++ u32 ep_c, ep_r; ++ char exclusive; ++ char idle = 'N'; ++ struct kbase_csf_cmd_stream_group_info const *const ginfo = ++ &kbdev->csf.global_iface.groups[group->csg_nr]; ++ u8 slot_priority = ++ kbdev->csf.scheduler.csg_slots[group->csg_nr].priority; + -+ diff = callee_address - calling_address - 4; -+ sign = !!(diff & 0x80000000); -+ if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff || -+ ARMV7_T1_BL_IMM_RANGE_MAX < (int32_t)diff) { -+ dev_warn(kbdev->dev, "FW log patch 0x%x out of range, skipping", -+ calling_address); -+ continue; -+ } ++ ep_c = kbase_csf_firmware_csg_output(ginfo, ++ CSG_STATUS_EP_CURRENT); ++ ep_r = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_EP_REQ); + -+ i1 = (diff & 0x00800000) >> 23; -+ j1 = !i1 ^ sign; -+ i2 = (diff & 0x00400000) >> 22; -+ j2 = !i2 ^ sign; -+ imm11 = (diff & 0xffe) >> 1; -+ imm10 = (diff & 0x3ff000) >> 12; ++ if (CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_GET(ep_r)) ++ exclusive = 'C'; ++ else if (CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_GET(ep_r)) ++ exclusive = 'F'; ++ else ++ exclusive = '0'; + -+ /* Compose BL instruction */ -+ bl_instruction = ARMV7_T1_BL_IMM_INSTR; -+ bl_instruction |= j1 << 29; -+ bl_instruction |= j2 << 27; -+ bl_instruction |= imm11 << 16; -+ bl_instruction |= sign << 10; -+ bl_instruction |= imm10; ++ if (kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & ++ CSG_STATUS_STATE_IDLE_MASK) ++ idle = 'Y'; + -+ /* Patch logging func calls in their load location */ -+ dev_dbg(kbdev->dev, "FW log patch 0x%x: 0x%x\n", calling_address, -+ bl_instruction); -+ kbase_csf_update_firmware_memory_exe(kbdev, calling_address, -+ bl_instruction); ++ if (!test_bit(group->csg_nr, csg_slots_status_updated)) { ++ seq_printf(file, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n", ++ group->csg_nr); ++ seq_puts(file, "*** The following group-record is likely stale\n"); + } -+ } else { -+ for (; list_entry < list_va_end; list_entry += 2 * sizeof(uint32_t)) { -+ /* Read calling address */ -+ kbase_csf_read_firmware_memory(kbdev, list_entry, &calling_address); + -+ /* Overwrite logging func calls with 2 NOP instructions */ -+ kbase_csf_update_firmware_memory_exe(kbdev, calling_address, -+ ARMV7_DOUBLE_NOP_INSTR); -+ } ++ seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n"); ++ seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n", ++ group->handle, ++ group->csg_nr, ++ slot_priority, ++ group->run_state, ++ group->priority, ++ CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(ep_c), ++ CSG_STATUS_EP_REQ_COMPUTE_EP_GET(ep_r), ++ CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(ep_c), ++ CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r), ++ CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c), ++ CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r), ++ exclusive, ++ idle); ++ } else { ++ seq_puts(file, "GroupID, CSG NR, Run State, Priority\n"); ++ seq_printf(file, "%7d, %6d, %9d, %8d\n", ++ group->handle, ++ group->csg_nr, ++ group->run_state, ++ group->priority); + } -+} -+ -+int kbase_csf_firmware_log_toggle_logging_calls(struct kbase_device *kbdev, u32 val) -+{ -+ unsigned long flags; -+ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; -+ bool mcu_inactive; -+ bool resume_needed = false; -+ int ret = 0; -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ -+ if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0) -+ return -EBUSY; -+ -+ /* Suspend all the active CS groups */ -+ dev_dbg(kbdev->dev, "Suspend all the active CS groups"); + -+ kbase_csf_scheduler_lock(kbdev); -+ while (scheduler->state != SCHED_SUSPENDED) { -+ kbase_csf_scheduler_unlock(kbdev); -+ kbase_csf_scheduler_pm_suspend(kbdev); -+ kbase_csf_scheduler_lock(kbdev); -+ resume_needed = true; -+ } ++ if (group->run_state != KBASE_CSF_GROUP_TERMINATED) { ++ unsigned int i; + -+ /* Wait for the MCU to get disabled */ -+ dev_info(kbdev->dev, "Wait for the MCU to get disabled"); -+ ret = kbase_pm_wait_for_desired_state(kbdev); -+ if (ret) { -+ dev_err(kbdev->dev, -+ "wait for PM state failed when toggling FW logging calls"); -+ ret = -EAGAIN; -+ goto out; -+ } ++ seq_puts(file, "Bound queues:\n"); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ mcu_inactive = -+ kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ if (!mcu_inactive) { -+ dev_err(kbdev->dev, -+ "MCU not inactive after PM state wait when toggling FW logging calls"); -+ ret = -EAGAIN; -+ goto out; ++ for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { ++ kbasep_csf_scheduler_dump_active_queue(file, ++ group->bound_queues[i]); ++ } + } + -+ /* Toggle FW logging call in the loaded FW image */ -+ toggle_logging_calls_in_loaded_image(kbdev, val); -+ dev_dbg(kbdev->dev, "FW logging: %s", val ? "enabled" : "disabled"); -+ -+out: -+ kbase_csf_scheduler_unlock(kbdev); -+ if (resume_needed) -+ /* Resume queue groups and start mcu */ -+ kbase_csf_scheduler_pm_resume(kbdev); -+ atomic_set(&fw_log->busy, 0); -+ return ret; ++ seq_puts(file, "\n"); +} -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h -new file mode 100644 -index 000000000..100832046 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h -@@ -0,0 +1,77 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+#ifndef _KBASE_CSF_FIRMWARE_LOG_H_ -+#define _KBASE_CSF_FIRMWARE_LOG_H_ -+ -+#include -+ -+/** Offset of the last field of functions call list entry from the image header */ -+#define FUNC_CALL_LIST_ENTRY_NAME_OFFSET (0x8) -+ -+/* -+ * Firmware log dumping buffer size. -+ */ -+#define FIRMWARE_LOG_DUMP_BUF_SIZE PAGE_SIZE -+ -+/** -+ * kbase_csf_firmware_log_init - Initialize firmware log handling. -+ * -+ * @kbdev: Pointer to the Kbase device -+ * -+ * Return: The initialization error code. -+ */ -+int kbase_csf_firmware_log_init(struct kbase_device *kbdev); + +/** -+ * kbase_csf_firmware_log_term - Terminate firmware log handling. ++ * kbasep_csf_queue_group_debugfs_show() - Print per-context GPU command queue ++ * group debug information + * -+ * @kbdev: Pointer to the Kbase device -+ */ -+void kbase_csf_firmware_log_term(struct kbase_device *kbdev); -+ -+/** -+ * kbase_csf_firmware_log_dump_buffer - Read remaining data in the firmware log -+ * buffer and print it to dmesg. ++ * @file: The seq_file for printing to ++ * @data: The debugfs dentry private data, a pointer to kbase context + * -+ * @kbdev: Pointer to the Kbase device ++ * Return: Negative error code or 0 on success. + */ -+void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev); ++static int kbasep_csf_queue_group_debugfs_show(struct seq_file *file, ++ void *data) ++{ ++ u32 gr; ++ struct kbase_context *const kctx = file->private; ++ struct kbase_device *kbdev; + -+/** -+ * kbase_csf_firmware_log_parse_logging_call_list_entry - Parse FW logging function call list entry. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @entry: Pointer to section. -+ */ -+void kbase_csf_firmware_log_parse_logging_call_list_entry(struct kbase_device *kbdev, -+ const uint32_t *entry); -+/** -+ * kbase_csf_firmware_log_toggle_logging_calls - Enables/Disables FW logging function calls. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @val: Configuration option value. -+ * -+ * Return: 0 if successful, negative error code on failure -+ */ -+int kbase_csf_firmware_log_toggle_logging_calls(struct kbase_device *kbdev, u32 val); ++ if (WARN_ON(!kctx)) ++ return -EINVAL; + -+#endif /* _KBASE_CSF_FIRMWARE_LOG_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c -new file mode 100644 -index 000000000..833947fac ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c -@@ -0,0 +1,1661 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ kbdev = kctx->kbdev; + -+#include "mali_kbase.h" -+#include "mali_kbase_csf_firmware.h" -+#include "mali_kbase_csf_trace_buffer.h" -+#include "mali_kbase_csf_timeout.h" -+#include "mali_kbase_mem.h" -+#include "mali_kbase_reset_gpu.h" -+#include "mali_kbase_ctx_sched.h" -+#include "device/mali_kbase_device.h" -+#include -+#include "backend/gpu/mali_kbase_pm_internal.h" -+#include "mali_kbase_csf_scheduler.h" -+#include "mmu/mali_kbase_mmu.h" -+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" -+#include -+#include ++ seq_printf(file, "MALI_CSF_CSG_DEBUGFS_VERSION: v%u\n", ++ MALI_CSF_CSG_DEBUGFS_VERSION); + -+#include -+#include -+#include -+#include -+#include -+#include -+#if (KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE) -+#include -+#endif -+#include ++ mutex_lock(&kctx->csf.lock); ++ kbase_csf_scheduler_lock(kbdev); ++ kbase_csf_debugfs_update_active_groups_status(kbdev); ++ for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) { ++ struct kbase_queue_group *const group = ++ kctx->csf.queue_groups[gr]; + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+/* Makes Driver wait indefinitely for an acknowledgment for the different -+ * requests it sends to firmware. Otherwise the timeouts interfere with the -+ * use of debugger for source-level debugging of firmware as Driver initiates -+ * a GPU reset when a request times out, which always happen when a debugger -+ * is connected. -+ */ -+bool fw_debug; /* Default value of 0/false */ -+module_param(fw_debug, bool, 0444); -+MODULE_PARM_DESC(fw_debug, -+ "Enables effective use of a debugger for debugging firmware code."); -+#endif ++ if (group) ++ kbasep_csf_scheduler_dump_active_group(file, group); ++ } ++ kbase_csf_scheduler_unlock(kbdev); ++ mutex_unlock(&kctx->csf.lock); + -+#define DUMMY_FW_PAGE_SIZE SZ_4K ++ return 0; ++} + +/** -+ * struct dummy_firmware_csi - Represents a dummy interface for MCU firmware CSs ++ * kbasep_csf_scheduler_dump_active_groups() - Print debug info for active ++ * GPU command queue groups + * -+ * @cs_kernel_input: CS kernel input memory region -+ * @cs_kernel_output: CS kernel output memory region -+ */ -+struct dummy_firmware_csi { -+ u8 cs_kernel_input[DUMMY_FW_PAGE_SIZE]; -+ u8 cs_kernel_output[DUMMY_FW_PAGE_SIZE]; -+}; -+ -+/** -+ * struct dummy_firmware_csg - Represents a dummy interface for MCU firmware CSGs ++ * @file: The seq_file for printing to ++ * @data: The debugfs dentry private data, a pointer to kbase_device + * -+ * @csg_input: CSG kernel input memory region -+ * @csg_output: CSG kernel output memory region -+ * @csi: Dummy firmware CSIs ++ * Return: Negative error code or 0 on success. + */ -+struct dummy_firmware_csg { -+ u8 csg_input[DUMMY_FW_PAGE_SIZE]; -+ u8 csg_output[DUMMY_FW_PAGE_SIZE]; -+ struct dummy_firmware_csi csi[8]; -+} dummy_firmware_csg; ++static int kbasep_csf_scheduler_dump_active_groups(struct seq_file *file, ++ void *data) ++{ ++ u32 csg_nr; ++ struct kbase_device *kbdev = file->private; ++ u32 num_groups = kbdev->csf.global_iface.group_num; + -+/** -+ * struct dummy_firmware_interface - Represents a dummy interface in the MCU firmware -+ * -+ * @global_input: Global input memory region -+ * @global_output: Global output memory region -+ * @csg: Dummy firmware CSGs -+ * @node: Interface objects are on the kbase_device:csf.firmware_interfaces -+ * list using this list_head to link them -+ */ -+struct dummy_firmware_interface { -+ u8 global_input[DUMMY_FW_PAGE_SIZE]; -+ u8 global_output[DUMMY_FW_PAGE_SIZE]; -+ struct dummy_firmware_csg csg[8]; -+ struct list_head node; -+} dummy_firmware_interface; ++ seq_printf(file, "MALI_CSF_CSG_DEBUGFS_VERSION: v%u\n", ++ MALI_CSF_CSG_DEBUGFS_VERSION); + -+#define CSF_GLB_REQ_CFG_MASK \ -+ (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ -+ GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK) ++ kbase_csf_scheduler_lock(kbdev); ++ kbase_csf_debugfs_update_active_groups_status(kbdev); ++ for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { ++ struct kbase_queue_group *const group = ++ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; + -+static inline u32 input_page_read(const u32 *const input, const u32 offset) -+{ -+ WARN_ON(offset % sizeof(u32)); ++ if (!group) ++ continue; + -+ return input[offset / sizeof(u32)]; -+} ++ seq_printf(file, "\nCtx %d_%d\n", group->kctx->tgid, ++ group->kctx->id); + -+static inline void input_page_write(u32 *const input, const u32 offset, -+ const u32 value) -+{ -+ WARN_ON(offset % sizeof(u32)); ++ kbasep_csf_scheduler_dump_active_group(file, group); ++ } ++ kbase_csf_scheduler_unlock(kbdev); + -+ input[offset / sizeof(u32)] = value; ++ return 0; +} + -+static inline u32 output_page_read(const u32 *const output, const u32 offset) ++static int kbasep_csf_queue_group_debugfs_open(struct inode *in, ++ struct file *file) +{ -+ WARN_ON(offset % sizeof(u32)); -+ -+ return output[offset / sizeof(u32)]; ++ return single_open(file, kbasep_csf_queue_group_debugfs_show, ++ in->i_private); +} + -+static inline void output_page_write(u32 *const output, const u32 offset, -+ const u32 value) ++static int kbasep_csf_active_queue_groups_debugfs_open(struct inode *in, ++ struct file *file) +{ -+ WARN_ON(offset % sizeof(u32)); -+ -+ output[offset / sizeof(u32)] = value; ++ return single_open(file, kbasep_csf_scheduler_dump_active_groups, ++ in->i_private); +} + -+/** -+ * invent_memory_setup_entry() - Invent an "interface memory setup" section -+ * -+ * @kbdev: Kbase device structure -+ * -+ * Invent an "interface memory setup" section similar to one from a firmware -+ * image. If successful the interface will be added to the -+ * kbase_device:csf.firmware_interfaces list. -+ * -+ * Return: 0 if successful, negative error code on failure -+ */ -+static int invent_memory_setup_entry(struct kbase_device *kbdev) -+{ -+ struct dummy_firmware_interface *interface = NULL; -+ -+ /* Allocate enough memory for the struct dummy_firmware_interface. -+ */ -+ interface = kzalloc(sizeof(*interface), GFP_KERNEL); -+ if (!interface) -+ return -ENOMEM; -+ -+ kbdev->csf.shared_interface = interface; -+ list_add(&interface->node, &kbdev->csf.firmware_interfaces); -+ -+ /* NO_MALI: Don't insert any firmware pages */ -+ return 0; -+} ++static const struct file_operations kbasep_csf_queue_group_debugfs_fops = { ++ .open = kbasep_csf_queue_group_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + -+static void free_global_iface(struct kbase_device *kbdev) ++void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx) +{ -+ struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; ++ struct dentry *file; ++ const mode_t mode = 0444; + -+ if (iface->groups) { -+ unsigned int gid; ++ if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) ++ return; + -+ for (gid = 0; gid < iface->group_num; ++gid) -+ kfree(iface->groups[gid].streams); ++ file = debugfs_create_file("groups", mode, ++ kctx->kctx_dentry, kctx, &kbasep_csf_queue_group_debugfs_fops); + -+ kfree(iface->groups); -+ iface->groups = NULL; ++ if (IS_ERR_OR_NULL(file)) { ++ dev_warn(kctx->kbdev->dev, ++ "Unable to create per context queue groups debugfs entry"); + } +} + -+static int invent_cmd_stream_group_info(struct kbase_device *kbdev, -+ struct kbase_csf_cmd_stream_group_info *ginfo, -+ struct dummy_firmware_csg *csg) -+{ -+ unsigned int sid; -+ -+ ginfo->input = csg->csg_input; -+ ginfo->output = csg->csg_output; -+ -+ ginfo->kbdev = kbdev; -+ ginfo->features = 0; -+ ginfo->suspend_size = 64; -+ ginfo->protm_suspend_size = 64; -+ ginfo->stream_num = ARRAY_SIZE(csg->csi); -+ ginfo->stream_stride = 0; -+ -+ ginfo->streams = kcalloc(ginfo->stream_num, sizeof(*ginfo->streams), GFP_KERNEL); -+ if (ginfo->streams == NULL) -+ return -ENOMEM; -+ -+ for (sid = 0; sid < ginfo->stream_num; ++sid) { -+ struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[sid]; -+ struct dummy_firmware_csi *csi = &csg->csi[sid]; ++static const struct file_operations ++ kbasep_csf_active_queue_groups_debugfs_fops = { ++ .open = kbasep_csf_active_queue_groups_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + -+ stream->input = csi->cs_kernel_input; -+ stream->output = csi->cs_kernel_output; ++static int kbasep_csf_debugfs_scheduling_timer_enabled_get( ++ void *data, u64 *val) ++{ ++ struct kbase_device *const kbdev = data; + -+ stream->kbdev = kbdev; -+ stream->features = -+ STREAM_FEATURES_WORK_REGISTERS_SET(0, 80) | -+ STREAM_FEATURES_SCOREBOARDS_SET(0, 8) | -+ STREAM_FEATURES_COMPUTE_SET(0, 1) | -+ STREAM_FEATURES_FRAGMENT_SET(0, 1) | -+ STREAM_FEATURES_TILER_SET(0, 1); -+ } ++ *val = kbase_csf_scheduler_timer_is_enabled(kbdev); + + return 0; +} + -+static int invent_capabilities(struct kbase_device *kbdev) ++static int kbasep_csf_debugfs_scheduling_timer_enabled_set( ++ void *data, u64 val) +{ -+ struct dummy_firmware_interface *interface = kbdev->csf.shared_interface; -+ struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; -+ unsigned int gid; -+ -+ iface->input = interface->global_input; -+ iface->output = interface->global_output; -+ -+ iface->version = 1; -+ iface->kbdev = kbdev; -+ iface->features = 0; -+ iface->prfcnt_size = -+ GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET(0, KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE); -+ -+ if (iface->version >= kbase_csf_interface_version(1, 1, 0)) { -+ /* update rate=1, max event size = 1<<8 = 256 */ -+ iface->instr_features = 0x81; -+ } else { -+ iface->instr_features = 0; -+ } -+ -+ iface->group_num = ARRAY_SIZE(interface->csg); -+ iface->group_stride = 0; -+ -+ iface->groups = kcalloc(iface->group_num, sizeof(*iface->groups), GFP_KERNEL); -+ if (iface->groups == NULL) -+ return -ENOMEM; -+ -+ for (gid = 0; gid < iface->group_num; ++gid) { -+ int err; ++ struct kbase_device *const kbdev = data; + -+ err = invent_cmd_stream_group_info(kbdev, &iface->groups[gid], -+ &interface->csg[gid]); -+ if (err < 0) { -+ free_global_iface(kbdev); -+ return err; -+ } -+ } ++ kbase_csf_scheduler_timer_set_enabled(kbdev, val != 0); + + return 0; +} + -+void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, -+ u32 gpu_addr, u32 *value) -+{ -+ /* NO_MALI: Nothing to do here */ -+} -+ -+ -+void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, -+ u32 gpu_addr, u32 value) ++static int kbasep_csf_debugfs_scheduling_timer_kick_set( ++ void *data, u64 val) +{ -+ /* NO_MALI: Nothing to do here */ -+} ++ struct kbase_device *const kbdev = data; + -+void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev, -+ u32 gpu_addr, u32 *value) -+{ -+ /* NO_MALI: Nothing to do here */ -+} ++ kbase_csf_scheduler_kick(kbdev); + -+void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev, -+ u32 gpu_addr, u32 value) -+{ -+ /* NO_MALI: Nothing to do here */ ++ return 0; +} + -+void kbase_csf_firmware_cs_input( -+ const struct kbase_csf_cmd_stream_info *const info, const u32 offset, -+ const u32 value) -+{ -+ const struct kbase_device * const kbdev = info->kbdev; -+ -+ dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x\n", offset, value); -+ input_page_write(info->input, offset, value); -+ -+ if (offset == CS_REQ) { -+ /* NO_MALI: Immediately acknowledge requests */ -+ output_page_write(info->output, CS_ACK, value); -+ } -+} ++DEFINE_DEBUGFS_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_enabled_fops, ++ &kbasep_csf_debugfs_scheduling_timer_enabled_get, ++ &kbasep_csf_debugfs_scheduling_timer_enabled_set, "%llu\n"); ++DEFINE_DEBUGFS_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_kick_fops, NULL, ++ &kbasep_csf_debugfs_scheduling_timer_kick_set, "%llu\n"); + -+u32 kbase_csf_firmware_cs_input_read( -+ const struct kbase_csf_cmd_stream_info *const info, -+ const u32 offset) ++/** ++ * kbase_csf_debugfs_scheduler_state_get() - Get the state of scheduler. ++ * ++ * @file: Object of the file that is being read. ++ * @user_buf: User buffer that contains the string. ++ * @count: Length of user buffer ++ * @ppos: Offset within file object ++ * ++ * This function will return the current Scheduler state to Userspace ++ * Scheduler may exit that state by the time the state string is received ++ * by the Userspace. ++ * ++ * Return: 0 if Scheduler was found in an unexpected state, or the ++ * size of the state string if it was copied successfully to the ++ * User buffer or a negative value in case of an error. ++ */ ++static ssize_t kbase_csf_debugfs_scheduler_state_get(struct file *file, ++ char __user *user_buf, size_t count, loff_t *ppos) +{ -+ const struct kbase_device * const kbdev = info->kbdev; -+ u32 const val = input_page_read(info->input, offset); -+ -+ dev_dbg(kbdev->dev, "cs input r: reg %08x val %08x\n", offset, val); -+ return val; -+} ++ struct kbase_device *kbdev = file->private_data; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ const char *state_string; + -+void kbase_csf_firmware_cs_input_mask( -+ const struct kbase_csf_cmd_stream_info *const info, const u32 offset, -+ const u32 value, const u32 mask) -+{ -+ const struct kbase_device * const kbdev = info->kbdev; ++ kbase_csf_scheduler_lock(kbdev); ++ state_string = scheduler_state_to_string(kbdev, scheduler->state); ++ kbase_csf_scheduler_unlock(kbdev); + -+ dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x mask %08x\n", -+ offset, value, mask); ++ if (!state_string) ++ count = 0; + -+ /* NO_MALI: Go through kbase_csf_firmware_cs_input to capture writes */ -+ kbase_csf_firmware_cs_input(info, offset, (input_page_read(info->input, offset) & ~mask) | (value & mask)); ++ return simple_read_from_buffer(user_buf, count, ppos, ++ state_string, strlen(state_string)); +} + -+u32 kbase_csf_firmware_cs_output( -+ const struct kbase_csf_cmd_stream_info *const info, const u32 offset) ++/** ++ * kbase_csf_debugfs_scheduler_state_set() - Set the state of scheduler. ++ * ++ * @file: Object of the file that is being written to. ++ * @ubuf: User buffer that contains the string. ++ * @count: Length of user buffer ++ * @ppos: Offset within file object ++ * ++ * This function will update the Scheduler state as per the state string ++ * passed by the Userspace. Scheduler may or may not remain in new state ++ * for long. ++ * ++ * Return: Negative value if the string doesn't correspond to a valid Scheduler ++ * state or if copy from user buffer failed, otherwise the length of ++ * the User buffer. ++ */ ++static ssize_t kbase_csf_debugfs_scheduler_state_set(struct file *file, ++ const char __user *ubuf, size_t count, loff_t *ppos) +{ -+ const struct kbase_device * const kbdev = info->kbdev; -+ u32 const val = output_page_read(info->output, offset); ++ struct kbase_device *kbdev = file->private_data; ++ char buf[MAX_SCHED_STATE_STRING_LEN]; ++ ssize_t ret = count; + -+ dev_dbg(kbdev->dev, "cs output r: reg %08x val %08x\n", offset, val); -+ return val; -+} ++ CSTD_UNUSED(ppos); + -+void kbase_csf_firmware_csg_input( -+ const struct kbase_csf_cmd_stream_group_info *const info, -+ const u32 offset, const u32 value) -+{ -+ const struct kbase_device * const kbdev = info->kbdev; ++ count = min_t(size_t, sizeof(buf) - 1, count); ++ if (copy_from_user(buf, ubuf, count)) ++ return -EFAULT; + -+ dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x\n", -+ offset, value); -+ input_page_write(info->input, offset, value); ++ buf[count] = 0; + -+ if (offset == CSG_REQ) { -+ /* NO_MALI: Immediately acknowledge requests */ -+ output_page_write(info->output, CSG_ACK, value); ++ if (sysfs_streq(buf, "SUSPENDED")) ++ kbase_csf_scheduler_pm_suspend(kbdev); ++#ifdef KBASE_PM_RUNTIME ++ else if (sysfs_streq(buf, "SLEEPING")) ++ kbase_csf_scheduler_force_sleep(kbdev); ++#endif ++ else if (sysfs_streq(buf, "INACTIVE")) ++ kbase_csf_scheduler_force_wakeup(kbdev); ++ else { ++ dev_dbg(kbdev->dev, "Bad scheduler state %s", buf); ++ ret = -EINVAL; + } -+} -+ -+u32 kbase_csf_firmware_csg_input_read( -+ const struct kbase_csf_cmd_stream_group_info *const info, -+ const u32 offset) -+{ -+ const struct kbase_device * const kbdev = info->kbdev; -+ u32 const val = input_page_read(info->input, offset); + -+ dev_dbg(kbdev->dev, "csg input r: reg %08x val %08x\n", offset, val); -+ return val; ++ return ret; +} + -+void kbase_csf_firmware_csg_input_mask( -+ const struct kbase_csf_cmd_stream_group_info *const info, -+ const u32 offset, const u32 value, const u32 mask) -+{ -+ const struct kbase_device * const kbdev = info->kbdev; -+ -+ dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x mask %08x\n", -+ offset, value, mask); -+ -+ /* NO_MALI: Go through kbase_csf_firmware_csg_input to capture writes */ -+ kbase_csf_firmware_csg_input(info, offset, (input_page_read(info->input, offset) & ~mask) | (value & mask)); -+} ++static const struct file_operations kbasep_csf_debugfs_scheduler_state_fops = { ++ .owner = THIS_MODULE, ++ .read = kbase_csf_debugfs_scheduler_state_get, ++ .write = kbase_csf_debugfs_scheduler_state_set, ++ .open = simple_open, ++ .llseek = default_llseek, ++}; + -+u32 kbase_csf_firmware_csg_output( -+ const struct kbase_csf_cmd_stream_group_info *const info, -+ const u32 offset) ++void kbase_csf_debugfs_init(struct kbase_device *kbdev) +{ -+ const struct kbase_device * const kbdev = info->kbdev; -+ u32 const val = output_page_read(info->output, offset); ++ debugfs_create_file("active_groups", 0444, ++ kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_csf_active_queue_groups_debugfs_fops); + -+ dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val); -+ return val; ++ debugfs_create_file("scheduling_timer_enabled", 0644, ++ kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_csf_debugfs_scheduling_timer_enabled_fops); ++ debugfs_create_file("scheduling_timer_kick", 0200, ++ kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_csf_debugfs_scheduling_timer_kick_fops); ++ debugfs_create_file("scheduler_state", 0644, ++ kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_csf_debugfs_scheduler_state_fops); ++ ++ kbase_csf_tl_reader_debugfs_init(kbdev); +} -+KBASE_EXPORT_TEST_API(kbase_csf_firmware_csg_output); + -+void kbase_csf_firmware_global_input( -+ const struct kbase_csf_global_iface *const iface, const u32 offset, -+ const u32 value) ++#else ++/* ++ * Stub functions for when debugfs is disabled ++ */ ++void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx) +{ -+ const struct kbase_device * const kbdev = iface->kbdev; -+ -+ dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x\n", offset, value); -+ input_page_write(iface->input, offset, value); -+ -+ if (offset == GLB_REQ) { -+ /* NO_MALI: Immediately acknowledge requests - except for PRFCNT_ENABLE -+ * and PRFCNT_SAMPLE. These will be processed along with the -+ * corresponding performance counter registers when the global doorbell -+ * is rung in order to emulate the performance counter sampling behavior -+ * of the real firmware. -+ */ -+ const u32 ack = output_page_read(iface->output, GLB_ACK); -+ const u32 req_mask = ~(GLB_REQ_PRFCNT_ENABLE_MASK | GLB_REQ_PRFCNT_SAMPLE_MASK); -+ const u32 toggled = (value ^ ack) & req_mask; -+ -+ output_page_write(iface->output, GLB_ACK, ack ^ toggled); -+ } +} -+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input); + -+void kbase_csf_firmware_global_input_mask( -+ const struct kbase_csf_global_iface *const iface, const u32 offset, -+ const u32 value, const u32 mask) ++void kbase_csf_debugfs_init(struct kbase_device *kbdev) +{ -+ const struct kbase_device * const kbdev = iface->kbdev; -+ -+ dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x mask %08x\n", -+ offset, value, mask); -+ -+ /* NO_MALI: Go through kbase_csf_firmware_global_input to capture writes */ -+ kbase_csf_firmware_global_input(iface, offset, (input_page_read(iface->input, offset) & ~mask) | (value & mask)); +} -+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input_mask); + -+u32 kbase_csf_firmware_global_input_read( -+ const struct kbase_csf_global_iface *const iface, const u32 offset) -+{ -+ const struct kbase_device * const kbdev = iface->kbdev; -+ u32 const val = input_page_read(iface->input, offset); ++#endif /* CONFIG_DEBUG_FS */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h +new file mode 100644 +index 000000000..16a548bf8 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h +@@ -0,0 +1,54 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ dev_dbg(kbdev->dev, "glob input r: reg %08x val %08x\n", offset, val); -+ return val; -+} ++#ifndef _KBASE_CSF_CSG_DEBUGFS_H_ ++#define _KBASE_CSF_CSG_DEBUGFS_H_ + -+u32 kbase_csf_firmware_global_output( -+ const struct kbase_csf_global_iface *const iface, const u32 offset) -+{ -+ const struct kbase_device * const kbdev = iface->kbdev; -+ u32 const val = output_page_read(iface->output, offset); ++/* Forward declarations */ ++struct kbase_device; ++struct kbase_context; ++struct kbase_queue_group; + -+ dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val); -+ return val; -+} -+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_output); ++#define MALI_CSF_CSG_DEBUGFS_VERSION 0 + +/** -+ * csf_doorbell_prfcnt() - Process CSF performance counter doorbell request ++ * kbase_csf_queue_group_debugfs_init() - Add debugfs entry for queue groups ++ * associated with @kctx. + * -+ * @kbdev: An instance of the GPU platform device ++ * @kctx: Pointer to kbase_context + */ -+static void csf_doorbell_prfcnt(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_global_iface *iface; -+ u32 req; -+ u32 ack; -+ u32 extract_index; -+ -+ if (WARN_ON(!kbdev)) -+ return; -+ -+ iface = &kbdev->csf.global_iface; -+ -+ req = input_page_read(iface->input, GLB_REQ); -+ ack = output_page_read(iface->output, GLB_ACK); -+ extract_index = input_page_read(iface->input, GLB_PRFCNT_EXTRACT); ++void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx); + -+ /* Process enable bit toggle */ -+ if ((req ^ ack) & GLB_REQ_PRFCNT_ENABLE_MASK) { -+ if (req & GLB_REQ_PRFCNT_ENABLE_MASK) { -+ /* Reset insert index to zero on enable bit set */ -+ output_page_write(iface->output, GLB_PRFCNT_INSERT, 0); -+ WARN_ON(extract_index != 0); -+ } -+ ack ^= GLB_REQ_PRFCNT_ENABLE_MASK; -+ } ++/** ++ * kbase_csf_debugfs_init() - Add a global debugfs entry for queue groups ++ * ++ * @kbdev: Pointer to the device ++ */ ++void kbase_csf_debugfs_init(struct kbase_device *kbdev); + -+ /* Process sample request */ -+ if ((req ^ ack) & GLB_REQ_PRFCNT_SAMPLE_MASK) { -+ const u32 ring_size = GLB_PRFCNT_CONFIG_SIZE_GET( -+ input_page_read(iface->input, GLB_PRFCNT_CONFIG)); -+ u32 insert_index = output_page_read(iface->output, GLB_PRFCNT_INSERT); ++/** ++ * kbase_csf_debugfs_update_active_groups_status() - Update on-slot group statuses ++ * ++ * @kbdev: Pointer to the device ++ */ ++void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev); + -+ const bool prev_overflow = (req ^ ack) & GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK; -+ const bool prev_threshold = (req ^ ack) & GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK; ++#endif /* _KBASE_CSF_CSG_DEBUGFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h +new file mode 100644 +index 000000000..6fa0e27d6 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h +@@ -0,0 +1,1666 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ /* If ringbuffer is full toggle PRFCNT_OVERFLOW and skip sample */ -+ if (insert_index - extract_index >= ring_size) { -+ WARN_ON(insert_index - extract_index > ring_size); -+ if (!prev_overflow) -+ ack ^= GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK; -+ } else { -+ struct gpu_model_prfcnt_en enable_maps = { -+ .fe = input_page_read(iface->input, GLB_PRFCNT_CSF_EN), -+ .tiler = input_page_read(iface->input, GLB_PRFCNT_TILER_EN), -+ .l2 = input_page_read(iface->input, GLB_PRFCNT_MMU_L2_EN), -+ .shader = input_page_read(iface->input, GLB_PRFCNT_SHADER_EN), -+ }; ++/* Definitions (types, defines, etcs) common to the CSF. ++ * They are placed here to allow the hierarchy of header files to work. ++ */ + -+ const u64 prfcnt_base = -+ input_page_read(iface->input, GLB_PRFCNT_BASE_LO) + -+ ((u64)input_page_read(iface->input, GLB_PRFCNT_BASE_HI) << 32); ++#ifndef _KBASE_CSF_DEFS_H_ ++#define _KBASE_CSF_DEFS_H_ + -+ u32 *sample_base = (u32 *)(uintptr_t)prfcnt_base + -+ (KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE * -+ (insert_index % ring_size)); ++#include ++#include + -+ /* trigger sample dump in the dummy model */ -+ gpu_model_prfcnt_dump_request(sample_base, enable_maps); ++#include "mali_kbase_csf_firmware.h" ++#include "mali_kbase_refcount_defs.h" ++#include "mali_kbase_csf_event.h" ++#include + -+ /* increment insert index and toggle PRFCNT_SAMPLE bit in ACK */ -+ output_page_write(iface->output, GLB_PRFCNT_INSERT, ++insert_index); -+ ack ^= GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK; -+ } ++#if IS_ENABLED(CONFIG_MALI_CORESIGHT) ++#include ++#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + -+ /* When the ringbuffer reaches 50% capacity toggle PRFCNT_THRESHOLD */ -+ if (!prev_threshold && (insert_index - extract_index >= (ring_size / 2))) -+ ack ^= GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK; -+ } ++/* Maximum number of KCPU command queues to be created per GPU address space. ++ */ ++#define KBASEP_MAX_KCPU_QUEUES ((size_t)256) + -+ /* Update GLB_ACK */ -+ output_page_write(iface->output, GLB_ACK, ack); -+} ++/* Maximum number of GPU command queue groups to be created per GPU address ++ * space. ++ */ ++#define MAX_QUEUE_GROUP_NUM (256) + -+void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr) -+{ -+ WARN_ON(doorbell_nr < 0); -+ WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); ++/* Maximum number of GPU tiler heaps to allow to be created per GPU address ++ * space. ++ */ ++#define MAX_TILER_HEAPS (128) + -+ if (WARN_ON(!kbdev)) -+ return; ++#define CSF_FIRMWARE_ENTRY_READ (1ul << 0) ++#define CSF_FIRMWARE_ENTRY_WRITE (1ul << 1) ++#define CSF_FIRMWARE_ENTRY_EXECUTE (1ul << 2) ++#define CSF_FIRMWARE_ENTRY_CACHE_MODE (3ul << 3) ++#define CSF_FIRMWARE_ENTRY_PROTECTED (1ul << 5) ++#define CSF_FIRMWARE_ENTRY_SHARED (1ul << 30) ++#define CSF_FIRMWARE_ENTRY_ZERO (1ul << 31) + -+ if (doorbell_nr == CSF_KERNEL_DOORBELL_NR) { -+ csf_doorbell_prfcnt(kbdev); -+ gpu_model_glb_request_job_irq(kbdev->model); -+ } -+} -+EXPORT_SYMBOL(kbase_csf_ring_doorbell); ++/** ++ * enum kbase_csf_queue_bind_state - bind state of the queue ++ * ++ * @KBASE_CSF_QUEUE_UNBOUND: Set when the queue is registered or when the link ++ * between queue and the group to which it was bound or being bound is removed. ++ * @KBASE_CSF_QUEUE_BIND_IN_PROGRESS: Set when the first part of bind operation ++ * has completed i.e. CS_QUEUE_BIND ioctl. ++ * @KBASE_CSF_QUEUE_BOUND: Set when the bind operation has completed i.e. IO ++ * pages have been mapped in the process address space. ++ */ ++enum kbase_csf_queue_bind_state { ++ KBASE_CSF_QUEUE_UNBOUND, ++ KBASE_CSF_QUEUE_BIND_IN_PROGRESS, ++ KBASE_CSF_QUEUE_BOUND, ++}; + +/** -+ * handle_internal_firmware_fatal - Handler for CS internal firmware fault. ++ * enum kbase_csf_reset_gpu_state - state of the gpu reset + * -+ * @kbdev: Pointer to kbase device ++ * @KBASE_CSF_RESET_GPU_NOT_PENDING: Set when the GPU reset isn't pending + * -+ * Report group fatal error to user space for all GPU command queue groups -+ * in the device, terminate them and reset GPU. ++ * @KBASE_CSF_RESET_GPU_PREPARED: Set when kbase_prepare_to_reset_gpu() has ++ * been called. This is just for debugging checks to encourage callers to call ++ * kbase_prepare_to_reset_gpu() before kbase_reset_gpu(). ++ * ++ * @KBASE_CSF_RESET_GPU_COMMITTED: Set when the GPU reset process has been ++ * committed and so will definitely happen, but the procedure to reset the GPU ++ * has not yet begun. Other threads must finish accessing the HW before we ++ * reach %KBASE_CSF_RESET_GPU_HAPPENING. ++ * ++ * @KBASE_CSF_RESET_GPU_HAPPENING: Set when the GPU reset process is occurring ++ * (silent or otherwise), and is actively accessing the HW. Any changes to the ++ * HW in other threads might get lost, overridden, or corrupted. ++ * ++ * @KBASE_CSF_RESET_GPU_COMMITTED_SILENT: Set when the GPU reset process has ++ * been committed but has not started happening. This is used when resetting ++ * the GPU as part of normal behavior (e.g. when exiting protected mode). ++ * Other threads must finish accessing the HW before we reach ++ * %KBASE_CSF_RESET_GPU_HAPPENING. ++ * ++ * @KBASE_CSF_RESET_GPU_FAILED: Set when an error is encountered during the ++ * GPU reset process. No more work could then be executed on GPU, unloading ++ * the Driver module is the only option. + */ -+static void handle_internal_firmware_fatal(struct kbase_device *const kbdev) -+{ -+ int as; -+ -+ for (as = 0; as < kbdev->nr_hw_address_spaces; as++) { -+ unsigned long flags; -+ struct kbase_context *kctx; -+ struct kbase_fault fault; ++enum kbase_csf_reset_gpu_state { ++ KBASE_CSF_RESET_GPU_NOT_PENDING, ++ KBASE_CSF_RESET_GPU_PREPARED, ++ KBASE_CSF_RESET_GPU_COMMITTED, ++ KBASE_CSF_RESET_GPU_HAPPENING, ++ KBASE_CSF_RESET_GPU_COMMITTED_SILENT, ++ KBASE_CSF_RESET_GPU_FAILED, ++}; + -+ if (as == MCU_AS_NR) -+ continue; ++/** ++ * enum kbase_csf_group_state - state of the GPU command queue group ++ * ++ * @KBASE_CSF_GROUP_INACTIVE: Group is inactive and won't be ++ * considered by scheduler for running on ++ * CSG slot. ++ * @KBASE_CSF_GROUP_RUNNABLE: Group is in the list of runnable groups ++ * and is subjected to time-slice based ++ * scheduling. A start request would be ++ * sent (or already has been sent) if the ++ * group is assigned the CS ++ * group slot for the fist time. ++ * @KBASE_CSF_GROUP_IDLE: Group is currently on a CSG slot ++ * but all the CSs bound to the group have ++ * become either idle or waiting on sync ++ * object. ++ * Group could be evicted from the slot on ++ * the next tick if there are no spare ++ * slots left after scheduling non-idle ++ * queue groups. If the group is kept on ++ * slot then it would be moved to the ++ * RUNNABLE state, also if one of the ++ * queues bound to the group is kicked it ++ * would be moved to the RUNNABLE state. ++ * If the group is evicted from the slot it ++ * would be moved to either ++ * KBASE_CSF_GROUP_SUSPENDED_ON_IDLE or ++ * KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC ++ * state. ++ * @KBASE_CSF_GROUP_SUSPENDED: Group was evicted from the CSG slot ++ * and is not running but is still in the ++ * list of runnable groups and subjected ++ * to time-slice based scheduling. A resume ++ * request would be sent when a CSG slot is ++ * re-assigned to the group and once the ++ * resume is complete group would be moved ++ * back to the RUNNABLE state. ++ * @KBASE_CSF_GROUP_SUSPENDED_ON_IDLE: Same as KBASE_CSF_GROUP_SUSPENDED except ++ * that queue group also became idle before ++ * the suspension. This state helps ++ * Scheduler avoid scheduling the idle ++ * groups over the non-idle groups in the ++ * subsequent ticks. If one of the queues ++ * bound to the group is kicked it would be ++ * moved to the SUSPENDED state. ++ * @KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC: Same as GROUP_SUSPENDED_ON_IDLE ++ * except that at least one CS ++ * bound to this group was ++ * waiting for synchronization object ++ * before the suspension. ++ * @KBASE_CSF_GROUP_FAULT_EVICTED: Group is evicted from the scheduler due ++ * to a fault condition, pending to be ++ * terminated. ++ * @KBASE_CSF_GROUP_TERMINATED: Group is no longer schedulable and is ++ * pending to be deleted by Client, all the ++ * queues bound to it have been unbound. ++ */ ++enum kbase_csf_group_state { ++ KBASE_CSF_GROUP_INACTIVE, ++ KBASE_CSF_GROUP_RUNNABLE, ++ KBASE_CSF_GROUP_IDLE, ++ KBASE_CSF_GROUP_SUSPENDED, ++ KBASE_CSF_GROUP_SUSPENDED_ON_IDLE, ++ KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC, ++ KBASE_CSF_GROUP_FAULT_EVICTED, ++ KBASE_CSF_GROUP_TERMINATED, ++}; + -+ /* Only handle the fault for an active address space. Lock is -+ * taken here to atomically get reference to context in an -+ * active address space and retain its refcount. -+ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as); ++/** ++ * enum kbase_csf_csg_slot_state - state of the command queue group slots under ++ * the scheduler control. ++ * ++ * @CSG_SLOT_READY: The slot is clean and ready to be programmed with a ++ * queue group. ++ * @CSG_SLOT_READY2RUN: The slot has been programmed with a queue group, i.e. a ++ * start or resume request has been sent to the firmware. ++ * @CSG_SLOT_RUNNING: The queue group is running on the slot, acknowledgment ++ * of a start or resume request has been obtained from the ++ * firmware. ++ * @CSG_SLOT_DOWN2STOP: The suspend or terminate request for the queue group on ++ * the slot has been sent to the firmware. ++ * @CSG_SLOT_STOPPED: The queue group is removed from the slot, acknowledgment ++ * of suspend or terminate request has been obtained from ++ * the firmware. ++ * @CSG_SLOT_READY2RUN_TIMEDOUT: The start or resume request sent on the slot ++ * for the queue group timed out. ++ * @CSG_SLOT_DOWN2STOP_TIMEDOUT: The suspend or terminate request for queue ++ * group on the slot timed out. ++ */ ++enum kbase_csf_csg_slot_state { ++ CSG_SLOT_READY, ++ CSG_SLOT_READY2RUN, ++ CSG_SLOT_RUNNING, ++ CSG_SLOT_DOWN2STOP, ++ CSG_SLOT_STOPPED, ++ CSG_SLOT_READY2RUN_TIMEDOUT, ++ CSG_SLOT_DOWN2STOP_TIMEDOUT, ++}; + -+ if (kctx) { -+ kbase_ctx_sched_retain_ctx_refcount(kctx); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } else { -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ continue; -+ } ++/** ++ * enum kbase_csf_scheduler_state - state of the scheduler operational phases. ++ * ++ * @SCHED_BUSY: The scheduler is busy performing on tick schedule ++ * operations, the state of CSG slots ++ * can't be changed. ++ * @SCHED_INACTIVE: The scheduler is inactive, it is allowed to modify the ++ * state of CSG slots by in-cycle ++ * priority scheduling. ++ * @SCHED_SUSPENDED: The scheduler is in low-power mode with scheduling ++ * operations suspended and is not holding the power ++ * management reference. This can happen if the GPU ++ * becomes idle for a duration exceeding a threshold, ++ * or due to a system triggered suspend action. ++ * @SCHED_SLEEPING: The scheduler is in low-power mode with scheduling ++ * operations suspended and is not holding the power ++ * management reference. This state is set, only for the ++ * GPUs that supports the sleep feature, when GPU idle ++ * notification is received. The state is changed to ++ * @SCHED_SUSPENDED from the runtime suspend callback ++ * function after the suspend of CSGs. ++ */ ++enum kbase_csf_scheduler_state { ++ SCHED_BUSY, ++ SCHED_INACTIVE, ++ SCHED_SUSPENDED, ++ SCHED_SLEEPING, ++}; + -+ fault = (struct kbase_fault) { -+ .status = GPU_EXCEPTION_TYPE_SW_FAULT_1, -+ }; ++/** ++ * enum kbase_queue_group_priority - Kbase internal relative priority list. ++ * ++ * @KBASE_QUEUE_GROUP_PRIORITY_REALTIME: The realtime queue group priority. ++ * @KBASE_QUEUE_GROUP_PRIORITY_HIGH: The high queue group priority. ++ * @KBASE_QUEUE_GROUP_PRIORITY_MEDIUM: The medium queue group priority. ++ * @KBASE_QUEUE_GROUP_PRIORITY_LOW: The low queue group priority. ++ * @KBASE_QUEUE_GROUP_PRIORITY_COUNT: The number of priority levels. ++ */ ++enum kbase_queue_group_priority { ++ KBASE_QUEUE_GROUP_PRIORITY_REALTIME = 0, ++ KBASE_QUEUE_GROUP_PRIORITY_HIGH, ++ KBASE_QUEUE_GROUP_PRIORITY_MEDIUM, ++ KBASE_QUEUE_GROUP_PRIORITY_LOW, ++ KBASE_QUEUE_GROUP_PRIORITY_COUNT ++}; + -+ kbase_csf_ctx_handle_fault(kctx, &fault); -+ kbase_ctx_sched_release_ctx_lock(kctx); -+ } ++/** ++ * enum kbase_timeout_selector - The choice of which timeout to get scaled ++ * using the lowest GPU frequency. ++ * @CSF_FIRMWARE_TIMEOUT: Response timeout from CSF firmware. ++ * @CSF_PM_TIMEOUT: Timeout for GPU Power Management to reach the desired ++ * Shader, L2 and MCU state. ++ * @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete. ++ * @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for all active CSGs to be suspended. ++ * @CSF_FIRMWARE_BOOT_TIMEOUT: Maximum time to wait for firmware to boot. ++ * @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond ++ * to a ping from KBase. ++ * @CSF_SCHED_PROTM_PROGRESS_TIMEOUT: Timeout used to prevent protected mode execution hang. ++ * @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion ++ * of a MMU operation ++ * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in ++ * the enum. ++ */ ++enum kbase_timeout_selector { ++ CSF_FIRMWARE_TIMEOUT, ++ CSF_PM_TIMEOUT, ++ CSF_GPU_RESET_TIMEOUT, ++ CSF_CSG_SUSPEND_TIMEOUT, ++ CSF_FIRMWARE_BOOT_TIMEOUT, ++ CSF_FIRMWARE_PING_TIMEOUT, ++ CSF_SCHED_PROTM_PROGRESS_TIMEOUT, ++ MMU_AS_INACTIVE_WAIT_TIMEOUT, + -+ if (kbase_prepare_to_reset_gpu(kbdev, -+ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) -+ kbase_reset_gpu(kbdev); -+} ++ /* Must be the last in the enum */ ++ KBASE_TIMEOUT_SELECTOR_COUNT ++}; + +/** -+ * firmware_error_worker - Worker function for handling firmware internal error -+ * -+ * @data: Pointer to a work_struct embedded in kbase device. ++ * struct kbase_csf_notification - Event or error generated as part of command ++ * queue execution + * -+ * Handle the CS internal firmware error ++ * @data: Event or error data returned to userspace ++ * @link: Link to the linked list, &struct_kbase_csf_context.error_list. + */ -+static void firmware_error_worker(struct work_struct *const data) -+{ -+ struct kbase_device *const kbdev = -+ container_of(data, struct kbase_device, csf.fw_error_work); ++struct kbase_csf_notification { ++ struct base_csf_notification data; ++ struct list_head link; ++}; + -+ handle_internal_firmware_fatal(kbdev); -+} ++/** ++ * struct kbase_queue - Object representing a GPU command queue. ++ * ++ * @kctx: Pointer to the base context with which this GPU command queue ++ * is associated. ++ * @user_io_gpu_va: The start GPU VA address of this queue's userio pages. Only ++ * valid (i.e. not 0 ) when the queue is enabled and its owner ++ * group has a runtime bound csg_reg (group region). ++ * @phys: Pointer to the physical pages allocated for the ++ * pair or User mode input/output page ++ * @user_io_addr: Pointer to the permanent kernel mapping of User mode ++ * input/output pages. The pages can be accessed through ++ * the mapping without any cache maintenance. ++ * @handle: Handle returned with bind ioctl for creating a ++ * contiguous User mode mapping of input/output pages & ++ * the hardware doorbell page. ++ * @doorbell_nr: Index of the hardware doorbell page assigned to the ++ * queue. ++ * @db_file_offset: File offset value that is assigned to userspace mapping ++ * created on bind to access the doorbell page. ++ * It is in page units. ++ * @link: Link to the linked list of GPU command queues created per ++ * GPU address space. ++ * @refcount: Reference count, stands for the number of times the queue ++ * has been referenced. The reference is taken when it is ++ * created, when it is bound to the group and also when the ++ * @oom_event_work work item is queued ++ * for it. ++ * @group: Pointer to the group to which this queue is bound. ++ * @queue_reg: Pointer to the VA region allocated for CS buffer. ++ * @oom_event_work: Work item corresponding to the out of memory event for ++ * chunked tiler heap being used for this queue. ++ * @base_addr: Base address of the CS buffer. ++ * @size: Size of the CS buffer. ++ * @priority: Priority of this queue within the group. ++ * @bind_state: Bind state of the queue as enum @kbase_csf_queue_bind_state ++ * @csi_index: The ID of the assigned CS hardware interface. ++ * @enabled: Indicating whether the CS is running, or not. ++ * @status_wait: Value of CS_STATUS_WAIT register of the CS will ++ * be kept when the CS gets blocked by sync wait. ++ * CS_STATUS_WAIT provides information on conditions queue is ++ * blocking on. This is set when the group, to which queue is ++ * bound, is suspended after getting blocked, i.e. in ++ * KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC state. ++ * @sync_ptr: Value of CS_STATUS_WAIT_SYNC_POINTER register of the CS ++ * will be kept when the CS gets blocked by ++ * sync wait. CS_STATUS_WAIT_SYNC_POINTER contains the address ++ * of synchronization object being waited on. ++ * Valid only when @status_wait is set. ++ * @sync_value: Value of CS_STATUS_WAIT_SYNC_VALUE register of the CS ++ * will be kept when the CS gets blocked by ++ * sync wait. CS_STATUS_WAIT_SYNC_VALUE contains the value ++ * tested against the synchronization object. ++ * Valid only when @status_wait is set. ++ * @sb_status: Value indicates which of the scoreboard entries in the queue ++ * are non-zero ++ * @blocked_reason: Value shows if the queue is blocked, and if so, ++ * the reason why it is blocked ++ * @trace_buffer_base: CS trace buffer base address. ++ * @trace_offset_ptr: Pointer to the CS trace buffer offset variable. ++ * @trace_buffer_size: CS trace buffer size for the queue. ++ * @trace_cfg: CS trace configuration parameters. ++ * @error: GPU command queue fatal information to pass to user space. ++ * @cs_error_work: Work item to handle the CS fatal event reported for this ++ * queue or the CS fault event if dump on fault is enabled ++ * and acknowledgment for CS fault event needs to be done ++ * after dumping is complete. ++ * @cs_error_info: Records additional information about the CS fatal event or ++ * about CS fault event if dump on fault is enabled. ++ * @cs_error: Records information about the CS fatal event or ++ * about CS fault event if dump on fault is enabled. ++ * @cs_error_fatal: Flag to track if the CS fault or CS fatal event occurred. ++ * @pending: Indicating whether the queue has new submitted work. ++ * @extract_ofs: The current EXTRACT offset, this is only updated when handling ++ * the GLB IDLE IRQ if the idle timeout value is non-0 in order ++ * to help detect a queue's true idle status. ++ * @saved_cmd_ptr: The command pointer value for the GPU queue, saved when the ++ * group to which queue is bound is suspended. ++ * This can be useful in certain cases to know that till which ++ * point the execution reached in the Linear command buffer. ++ */ ++struct kbase_queue { ++ struct kbase_context *kctx; ++ u64 user_io_gpu_va; ++ struct tagged_addr phys[2]; ++ char *user_io_addr; ++ u64 handle; ++ int doorbell_nr; ++ unsigned long db_file_offset; ++ struct list_head link; ++ kbase_refcount_t refcount; ++ struct kbase_queue_group *group; ++ struct kbase_va_region *queue_reg; ++ struct work_struct oom_event_work; ++ u64 base_addr; ++ u32 size; ++ u8 priority; ++ s8 csi_index; ++ enum kbase_csf_queue_bind_state bind_state; ++ bool enabled; ++ u32 status_wait; ++ u64 sync_ptr; ++ u32 sync_value; ++ u32 sb_status; ++ u32 blocked_reason; ++ u64 trace_buffer_base; ++ u64 trace_offset_ptr; ++ u32 trace_buffer_size; ++ u32 trace_cfg; ++ struct kbase_csf_notification error; ++ struct work_struct cs_error_work; ++ u64 cs_error_info; ++ u32 cs_error; ++ bool cs_error_fatal; ++ atomic_t pending; ++ u64 extract_ofs; ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ u64 saved_cmd_ptr; ++#endif /* CONFIG_DEBUG_FS */ ++}; + -+static bool global_request_complete(struct kbase_device *const kbdev, -+ u32 const req_mask) -+{ -+ struct kbase_csf_global_iface *global_iface = -+ &kbdev->csf.global_iface; -+ bool complete = false; -+ unsigned long flags; ++/** ++ * struct kbase_normal_suspend_buffer - Object representing a normal ++ * suspend buffer for queue group. ++ * @gpu_va: The start GPU VA address of the bound suspend buffer. Note, this ++ * field is only valid when the owner group has a region bound at ++ * runtime. ++ * @phy: Array of physical memory pages allocated for the normal- ++ * mode suspend buffer. ++ */ ++struct kbase_normal_suspend_buffer { ++ u64 gpu_va; ++ struct tagged_addr *phy; ++}; + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++/** ++ * struct kbase_protected_suspend_buffer - Object representing a protected ++ * suspend buffer for queue group. ++ * @gpu_va: The start GPU VA address of the bound protected mode suspend buffer. ++ * Note, this field is only valid when the owner group has a region ++ * bound at runtime. ++ * @pma: Array of pointer to protected mode allocations containing ++ * information about memory pages allocated for protected mode ++ * suspend buffer. ++ * @alloc_retries: Number of times we retried allocing physical pages ++ * for protected suspend buffers. ++ */ ++struct kbase_protected_suspend_buffer { ++ u64 gpu_va; ++ struct protected_memory_allocation **pma; ++ u8 alloc_retries; ++}; + -+ if ((kbase_csf_firmware_global_output(global_iface, GLB_ACK) & -+ req_mask) == -+ (kbase_csf_firmware_global_input_read(global_iface, GLB_REQ) & -+ req_mask)) -+ complete = true; ++/** ++ * struct kbase_queue_group - Object representing a GPU command queue group. ++ * ++ * @kctx: Pointer to the kbase context with which this queue group ++ * is associated. ++ * @normal_suspend_buf: Object representing the normal suspend buffer. ++ * Normal-mode suspend buffer that is used for ++ * group context switch. ++ * @protected_suspend_buf: Object representing the protected suspend ++ * buffer. Protected-mode suspend buffer that is ++ * used for group context switch. ++ * @handle: Handle which identifies this queue group. ++ * @csg_nr: Number/index of the CSG to which this queue group is ++ * mapped; KBASEP_CSG_NR_INVALID indicates that the queue ++ * group is not scheduled. ++ * @priority: Priority of the queue group, 0 being the highest, ++ * BASE_QUEUE_GROUP_PRIORITY_COUNT - 1 being the lowest. ++ * @tiler_max: Maximum number of tiler endpoints the group is allowed ++ * to use. ++ * @fragment_max: Maximum number of fragment endpoints the group is ++ * allowed to use. ++ * @compute_max: Maximum number of compute endpoints the group is ++ * allowed to use. ++ * @csi_handlers: Requested CSI exception handler flags for the group. ++ * @tiler_mask: Mask of tiler endpoints the group is allowed to use. ++ * @fragment_mask: Mask of fragment endpoints the group is allowed to use. ++ * @compute_mask: Mask of compute endpoints the group is allowed to use. ++ * @group_uid: 32-bit wide unsigned identifier for the group, unique ++ * across all kbase devices and contexts. ++ * @link: Link to this queue group in the 'runnable_groups' list of ++ * the corresponding kctx. ++ * @link_to_schedule: Link to this queue group in the list of prepared groups ++ * to be scheduled, if the group is runnable/suspended. ++ * If the group is idle or waiting for CQS, it would be a ++ * link to the list of idle/blocked groups list. ++ * @run_state: Current state of the queue group. ++ * @prepared_seq_num: Indicates the position of queue group in the list of ++ * prepared groups to be scheduled. ++ * @scan_seq_num: Scan out sequence number before adjusting for dynamic ++ * idle conditions. It is used for setting a group's ++ * onslot priority. It could differ from prepared_seq_number ++ * when there are idle groups. ++ * @faulted: Indicates that a GPU fault occurred for the queue group. ++ * This flag persists until the fault has been queued to be ++ * reported to userspace. ++ * @cs_unrecoverable: Flag to unblock the thread waiting for CSG termination in ++ * case of CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE ++ * @reevaluate_idle_status : Flag set when work is submitted for the normal group ++ * or it becomes unblocked during protected mode. The ++ * flag helps Scheduler confirm if the group actually ++ * became non idle or not. ++ * @bound_queues: Array of registered queues bound to this queue group. ++ * @doorbell_nr: Index of the hardware doorbell page assigned to the ++ * group. ++ * @protm_event_work: Work item corresponding to the protected mode entry ++ * event for this queue. ++ * @protm_pending_bitmap: Bit array to keep a track of CSs that ++ * have pending protected mode entry requests. ++ * @error_fatal: An error of type BASE_GPU_QUEUE_GROUP_ERROR_FATAL to be ++ * returned to userspace if such an error has occurred. ++ * @error_timeout: An error of type BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT ++ * to be returned to userspace if such an error has occurred. ++ * @error_tiler_oom: An error of type BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM ++ * to be returned to userspace if such an error has occurred. ++ * @timer_event_work: Work item to handle the progress timeout fatal event ++ * for the group. ++ * @deschedule_deferred_cnt: Counter keeping a track of the number of threads ++ * that tried to deschedule the group and had to defer ++ * the descheduling due to the dump on fault. ++ * @csg_reg: An opaque pointer to the runtime bound shared regions. It is ++ * dynamically managed by the scheduler and can be NULL if the ++ * group is off-slot. ++ * @csg_reg_bind_retries: Runtime MCU shared region map operation attempted counts. ++ * It is accumulated on consecutive mapping attempt failures. On ++ * reaching a preset limit, the group is regarded as suffered ++ * a fatal error and triggers a fatal error notification. ++ */ ++struct kbase_queue_group { ++ struct kbase_context *kctx; ++ struct kbase_normal_suspend_buffer normal_suspend_buf; ++ struct kbase_protected_suspend_buffer protected_suspend_buf; ++ u8 handle; ++ s8 csg_nr; ++ u8 priority; + -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ u8 tiler_max; ++ u8 fragment_max; ++ u8 compute_max; ++ u8 csi_handlers; + -+ return complete; -+} ++ u64 tiler_mask; ++ u64 fragment_mask; ++ u64 compute_mask; + -+static int wait_for_global_request(struct kbase_device *const kbdev, -+ u32 const req_mask) -+{ -+ const long wait_timeout = -+ kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); -+ long remaining; -+ int err = 0; ++ u32 group_uid; + -+ remaining = wait_event_timeout(kbdev->csf.event_wait, -+ global_request_complete(kbdev, req_mask), -+ wait_timeout); ++ struct list_head link; ++ struct list_head link_to_schedule; ++ enum kbase_csf_group_state run_state; ++ u32 prepared_seq_num; ++ u32 scan_seq_num; ++ bool faulted; ++ bool cs_unrecoverable; ++ bool reevaluate_idle_status; + -+ if (!remaining) { -+ dev_warn(kbdev->dev, "Timed out waiting for global request %x to complete", -+ req_mask); -+ err = -ETIMEDOUT; ++ struct kbase_queue *bound_queues[MAX_SUPPORTED_STREAMS_PER_GROUP]; + ++ int doorbell_nr; ++ struct work_struct protm_event_work; ++ DECLARE_BITMAP(protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP); + -+ } ++ struct kbase_csf_notification error_fatal; ++ struct kbase_csf_notification error_timeout; ++ struct kbase_csf_notification error_tiler_oom; + -+ return err; -+} ++ struct work_struct timer_event_work; + -+static void set_global_request( -+ const struct kbase_csf_global_iface *const global_iface, -+ u32 const req_mask) -+{ -+ u32 glb_req; ++ /** ++ * @dvs_buf: Address and size of scratch memory. ++ * ++ * Used to store intermediate DVS data by the GPU. ++ */ ++ u64 dvs_buf; ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ u32 deschedule_deferred_cnt; ++#endif ++ void *csg_reg; ++ u8 csg_reg_bind_retries; ++}; + -+ kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); ++/** ++ * struct kbase_csf_kcpu_queue_context - Object representing the kernel CPU ++ * queues for a GPU address space. ++ * ++ * @lock: Lock preventing concurrent access to @array and the @in_use bitmap. ++ * @array: Array of pointers to kernel CPU command queues. ++ * @in_use: Bitmap which indicates which kernel CPU command queues are in use. ++ * @cmd_seq_num: The sequence number assigned to an enqueued command, ++ * in incrementing order (older commands shall have a ++ * smaller number). ++ * @jit_lock: Lock to serialise JIT operations. ++ * @jit_cmds_head: A list of the just-in-time memory commands, both ++ * allocate & free, in submission order, protected ++ * by kbase_csf_kcpu_queue_context.lock. ++ * @jit_blocked_queues: A list of KCPU command queues blocked by a pending ++ * just-in-time memory allocation command which will be ++ * reattempted after the impending free of other active ++ * allocations. ++ */ ++struct kbase_csf_kcpu_queue_context { ++ struct mutex lock; ++ struct kbase_kcpu_command_queue *array[KBASEP_MAX_KCPU_QUEUES]; ++ DECLARE_BITMAP(in_use, KBASEP_MAX_KCPU_QUEUES); ++ atomic64_t cmd_seq_num; + -+ glb_req = kbase_csf_firmware_global_output(global_iface, GLB_ACK); -+ glb_req ^= req_mask; -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, -+ req_mask); -+} ++ struct mutex jit_lock; ++ struct list_head jit_cmds_head; ++ struct list_head jit_blocked_queues; ++}; + -+static void enable_endpoints_global( -+ const struct kbase_csf_global_iface *const global_iface, -+ u64 const shader_core_mask) -+{ -+ kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_LO, -+ shader_core_mask & U32_MAX); -+ kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_HI, -+ shader_core_mask >> 32); ++/** ++ * struct kbase_csf_cpu_queue_context - Object representing the cpu queue ++ * information. ++ * ++ * @buffer: Buffer containing CPU queue information provided by Userspace. ++ * @buffer_size: The size of @buffer. ++ * @dump_req_status: Indicates the current status for CPU queues dump request. ++ * @dump_cmp: Dumping cpu queue completion event. ++ */ ++struct kbase_csf_cpu_queue_context { ++ char *buffer; ++ size_t buffer_size; ++ atomic_t dump_req_status; ++ struct completion dump_cmp; ++}; + -+ set_global_request(global_iface, GLB_REQ_CFG_ALLOC_EN_MASK); -+} ++/** ++ * struct kbase_csf_heap_context_allocator - Allocator of heap contexts ++ * ++ * @kctx: Pointer to the kbase context with which this allocator is ++ * associated. ++ * @region: Pointer to a GPU memory region from which heap context structures ++ * are allocated. NULL if no heap contexts have been allocated. ++ * @gpu_va: GPU virtual address of the start of the region from which heap ++ * context structures are allocated. 0 if no heap contexts have been ++ * allocated. ++ * @lock: Lock preventing concurrent access to the @in_use bitmap. ++ * @in_use: Bitmap that indicates which heap context structures are currently ++ * allocated (in @region). ++ * @heap_context_size_aligned: Size of a heap context structure, in bytes, ++ * aligned to GPU cacheline size. ++ * ++ * Heap context structures are allocated by the kernel for use by the firmware. ++ * The current implementation subdivides a single GPU memory region for use as ++ * a sparse array. ++ */ ++struct kbase_csf_heap_context_allocator { ++ struct kbase_context *kctx; ++ struct kbase_va_region *region; ++ u64 gpu_va; ++ struct mutex lock; ++ DECLARE_BITMAP(in_use, MAX_TILER_HEAPS); ++ u32 heap_context_size_aligned; ++}; + -+static void enable_shader_poweroff_timer(struct kbase_device *const kbdev, -+ const struct kbase_csf_global_iface *const global_iface) -+{ -+ u32 pwroff_reg; ++/** ++ * struct kbase_csf_tiler_heap_context - Object representing the tiler heaps ++ * context for a GPU address space. ++ * ++ * @lock: Lock to prevent the concurrent access to tiler heaps (after the ++ * initialization), a tiler heap can be terminated whilst an OoM ++ * event is being handled for it. ++ * @list: List of tiler heaps. ++ * @ctx_alloc: Allocator for heap context structures. ++ * @nr_of_heaps: Total number of tiler heaps that were added during the ++ * life time of the context. ++ * ++ * This contains all of the CSF state relating to chunked tiler heaps for one ++ * @kbase_context. It is not the same as a heap context structure allocated by ++ * the kernel for use by the firmware. ++ */ ++struct kbase_csf_tiler_heap_context { ++ struct mutex lock; ++ struct list_head list; ++ struct kbase_csf_heap_context_allocator ctx_alloc; ++ u64 nr_of_heaps; ++}; + -+ if (kbdev->csf.firmware_hctl_core_pwr) -+ pwroff_reg = -+ GLB_PWROFF_TIMER_TIMER_SOURCE_SET(DISABLE_GLB_PWROFF_TIMER, -+ GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); -+ else -+ pwroff_reg = kbdev->csf.mcu_core_pwroff_dur_count; ++/** ++ * struct kbase_csf_ctx_heap_reclaim_info - Object representing the data section of ++ * a kctx for tiler heap reclaim manger ++ * @mgr_link: Link for hooking up to the heap reclaim manger's kctx lists ++ * @nr_freed_pages: Number of freed pages from the the kctx, after its attachment ++ * to the reclaim manager. This is used for tracking reclaim's ++ * free operation progress. ++ * @nr_est_unused_pages: Estimated number of pages that could be freed for the kctx ++ * when all its CSGs are off-slot, on attaching to the reclaim ++ * manager. ++ * @on_slot_grps: Number of on-slot groups from this kctx. In principle, if a ++ * kctx has groups on-slot, the scheduler will detach it from ++ * the tiler heap reclaim manager, i.e. no tiler heap memory ++ * reclaiming operations on the kctx. ++ */ ++struct kbase_csf_ctx_heap_reclaim_info { ++ struct list_head mgr_link; ++ u32 nr_freed_pages; ++ u32 nr_est_unused_pages; ++ u8 on_slot_grps; ++}; + -+ kbase_csf_firmware_global_input(global_iface, GLB_PWROFF_TIMER, -+ pwroff_reg); -+ set_global_request(global_iface, GLB_REQ_CFG_PWROFF_TIMER_MASK); ++/** ++ * struct kbase_csf_scheduler_context - Object representing the scheduler's ++ * context for a GPU address space. ++ * ++ * @runnable_groups: Lists of runnable GPU command queue groups in the kctx, ++ * one per queue group relative-priority level. ++ * @num_runnable_grps: Total number of runnable groups across all priority ++ * levels in @runnable_groups. ++ * @idle_wait_groups: A list of GPU command queue groups in which all enabled ++ * GPU command queues are idle and at least one of them ++ * is blocked on a sync wait operation. ++ * @num_idle_wait_grps: Length of the @idle_wait_groups list. ++ * @sync_update_wq: Dedicated workqueue to process work items corresponding ++ * to the sync_update events by sync_set/sync_add ++ * instruction execution on CSs bound to groups ++ * of @idle_wait_groups list. ++ * @sync_update_work: work item to process the sync_update events by ++ * sync_set / sync_add instruction execution on command ++ * streams bound to groups of @idle_wait_groups list. ++ * @ngrp_to_schedule: Number of groups added for the context to the ++ * 'groups_to_schedule' list of scheduler instance. ++ * @heap_info: Heap reclaim information data of the kctx. As the ++ * reclaim action needs to be coordinated with the scheduler ++ * operations, any manipulations on the data needs holding ++ * the scheduler's mutex lock. ++ */ ++struct kbase_csf_scheduler_context { ++ struct list_head runnable_groups[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; ++ u32 num_runnable_grps; ++ struct list_head idle_wait_groups; ++ u32 num_idle_wait_grps; ++ struct workqueue_struct *sync_update_wq; ++ struct work_struct sync_update_work; ++ u32 ngrp_to_schedule; ++ struct kbase_csf_ctx_heap_reclaim_info heap_info; ++}; + -+ /* Save the programed reg value in its shadow field */ -+ kbdev->csf.mcu_core_pwroff_reg_shadow = pwroff_reg; -+} ++/** ++ * enum kbase_csf_event_callback_action - return type for CSF event callbacks. ++ * ++ * @KBASE_CSF_EVENT_CALLBACK_FIRST: Never set explicitly. ++ * It doesn't correspond to any action or type of event callback. ++ * ++ * @KBASE_CSF_EVENT_CALLBACK_KEEP: The callback will remain registered. ++ * ++ * @KBASE_CSF_EVENT_CALLBACK_REMOVE: The callback will be removed ++ * immediately upon return. ++ * ++ * @KBASE_CSF_EVENT_CALLBACK_LAST: Never set explicitly. ++ * It doesn't correspond to any action or type of event callback. ++ */ ++enum kbase_csf_event_callback_action { ++ KBASE_CSF_EVENT_CALLBACK_FIRST = 0, ++ KBASE_CSF_EVENT_CALLBACK_KEEP, ++ KBASE_CSF_EVENT_CALLBACK_REMOVE, ++ KBASE_CSF_EVENT_CALLBACK_LAST, ++}; + -+static void set_timeout_global( -+ const struct kbase_csf_global_iface *const global_iface, -+ u64 const timeout) -+{ -+ kbase_csf_firmware_global_input(global_iface, GLB_PROGRESS_TIMER, -+ timeout / GLB_PROGRESS_TIMER_TIMEOUT_SCALE); ++/** ++ * struct kbase_csf_event - Object representing CSF event and error ++ * ++ * @callback_list: List of callbacks which are registered to serve CSF ++ * events. ++ * @error_list: List for CS fatal errors in CSF context. ++ * Link of fatal error is &struct_kbase_csf_notification.link. ++ * @lock: Lock protecting access to @callback_list and ++ * @error_list. ++ */ ++struct kbase_csf_event { ++ struct list_head callback_list; ++ struct list_head error_list; ++ spinlock_t lock; ++}; + -+ set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK); -+} -+ -+static void enable_gpu_idle_timer(struct kbase_device *const kbdev) -+{ -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; -+ -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); -+ -+ kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER, -+ kbdev->csf.gpu_idle_dur_count); -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE, -+ GLB_REQ_IDLE_ENABLE_MASK); -+ dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x", -+ kbdev->csf.gpu_idle_dur_count); -+} -+ -+static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask) -+{ -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; -+ bool complete = false; -+ unsigned long flags; -+ -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ -+ if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) == -+ (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask)) -+ complete = true; ++/** ++ * struct kbase_csf_user_reg_context - Object containing members to manage the mapping ++ * of USER Register page for a context. ++ * ++ * @vma: Pointer to the VMA corresponding to the virtual mapping ++ * of the USER register page. ++ * @file_offset: File offset value that is assigned to userspace mapping ++ * of the USER Register page. It is in page units. ++ * @link: Links the context to the device list when mapping is pointing to ++ * either the dummy or the real Register page. ++ */ ++struct kbase_csf_user_reg_context { ++ struct vm_area_struct *vma; ++ u32 file_offset; ++ struct list_head link; ++}; + -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++/** ++ * struct kbase_csf_context - Object representing CSF for a GPU address space. ++ * ++ * @event_pages_head: A list of pages allocated for the event memory used by ++ * the synchronization objects. A separate list would help ++ * in the fast lookup, since the list is expected to be short ++ * as one page would provide the memory for up to 1K ++ * synchronization objects. ++ * KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES is the upper ++ * bound on the size of event memory. ++ * @cookies: Bitmask containing of KBASE_CSF_NUM_USER_IO_PAGES_HANDLE ++ * bits, used for creating the User mode CPU mapping in a ++ * deferred manner of a pair of User mode input/output pages ++ * & a hardware doorbell page. ++ * The pages are allocated when a GPU command queue is ++ * bound to a CSG in kbase_csf_queue_bind. ++ * This helps returning unique handles to Userspace from ++ * kbase_csf_queue_bind and later retrieving the pointer to ++ * queue in the mmap handler. ++ * @user_pages_info: Array containing pointers to queue ++ * structures, used in conjunction with cookies bitmask for ++ * providing a mechansim to create a CPU mapping of ++ * input/output pages & hardware doorbell page. ++ * @lock: Serializes accesses to all members, except for ones that ++ * have their own locks. ++ * @queue_groups: Array of registered GPU command queue groups. ++ * @queue_list: Linked list of GPU command queues not yet deregistered. ++ * Note that queues can persist after deregistration if the ++ * userspace mapping created for them on bind operation ++ * hasn't been removed. ++ * @kcpu_queues: Kernel CPU command queues. ++ * @event: CSF event object. ++ * @tiler_heaps: Chunked tiler memory heaps. ++ * @wq: Dedicated workqueue to process work items corresponding ++ * to the OoM events raised for chunked tiler heaps being ++ * used by GPU command queues, and progress timeout events. ++ * @link: Link to this csf context in the 'runnable_kctxs' list of ++ * the scheduler instance ++ * @sched: Object representing the scheduler's context ++ * @pending_submission_work: Work item to process pending kicked GPU command queues. ++ * @cpu_queue: CPU queue information. Only be available when DEBUG_FS ++ * is enabled. ++ * @user_reg: Collective information to support mapping to USER Register page. ++ */ ++struct kbase_csf_context { ++ struct list_head event_pages_head; ++ DECLARE_BITMAP(cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); ++ struct kbase_queue *user_pages_info[ ++ KBASE_CSF_NUM_USER_IO_PAGES_HANDLE]; ++ struct mutex lock; ++ struct kbase_queue_group *queue_groups[MAX_QUEUE_GROUP_NUM]; ++ struct list_head queue_list; ++ struct kbase_csf_kcpu_queue_context kcpu_queues; ++ struct kbase_csf_event event; ++ struct kbase_csf_tiler_heap_context tiler_heaps; ++ struct workqueue_struct *wq; ++ struct list_head link; ++ struct kbase_csf_scheduler_context sched; ++ struct work_struct pending_submission_work; ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ struct kbase_csf_cpu_queue_context cpu_queue; ++#endif ++ struct kbase_csf_user_reg_context user_reg; ++}; + -+ return complete; -+} ++/** ++ * struct kbase_csf_reset_gpu - Object containing the members required for ++ * GPU reset handling. ++ * @workq: Workqueue to execute the GPU reset work item @work. ++ * @work: Work item for performing the GPU reset. ++ * @wait: Wait queue used to wait for the GPU reset completion. ++ * @sem: RW Semaphore to ensure no other thread attempts to use the ++ * GPU whilst a reset is in process. Unlike traditional ++ * semaphores and wait queues, this allows Linux's lockdep ++ * mechanism to check for deadlocks involving reset waits. ++ * @state: Tracks if the GPU reset is in progress or not. ++ * The state is represented by enum @kbase_csf_reset_gpu_state. ++ */ ++struct kbase_csf_reset_gpu { ++ struct workqueue_struct *workq; ++ struct work_struct work; ++ wait_queue_head_t wait; ++ struct rw_semaphore sem; ++ atomic_t state; ++}; + -+static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface, -+ u32 const req_mask) -+{ -+ u32 glb_debug_req; ++/** ++ * struct kbase_csf_csg_slot - Object containing members for tracking the state ++ * of CSG slots. ++ * @resident_group: pointer to the queue group that is resident on the CSG slot. ++ * @state: state of the slot as per enum @kbase_csf_csg_slot_state. ++ * @trigger_jiffies: value of jiffies when change in slot state is recorded. ++ * @priority: dynamic priority assigned to CSG slot. ++ */ ++struct kbase_csf_csg_slot { ++ struct kbase_queue_group *resident_group; ++ atomic_t state; ++ unsigned long trigger_jiffies; ++ u8 priority; ++}; + -+ kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); ++/** ++ * struct kbase_csf_sched_heap_reclaim_mgr - Object for managing tiler heap reclaim ++ * kctx lists inside the CSF device's scheduler. ++ * ++ * @heap_reclaim: Tiler heap reclaim shrinker object. ++ * @ctx_lists: Array of kctx lists, size matching CSG defined priorities. The ++ * lists track the kctxs attached to the reclaim manager. ++ * @unused_pages: Estimated number of unused pages from the @ctxlist array. The ++ * number is indicative for use with reclaim shrinker's count method. ++ */ ++struct kbase_csf_sched_heap_reclaim_mgr { ++ struct shrinker heap_reclaim; ++ struct list_head ctx_lists[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; ++ atomic_t unused_pages; ++}; + -+ glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); -+ glb_debug_req ^= req_mask; ++/** ++ * struct kbase_csf_mcu_shared_regions - Control data for managing the MCU shared ++ * interface segment regions for scheduler ++ * operations ++ * ++ * @array_csg_regs: Base pointer of an internally created array_csg_regs[]. ++ * @unused_csg_regs: List contains unused csg_regs items. When an item is bound to a ++ * group that is placed onto on-slot by the scheduler, it is dropped ++ * from the list (i.e busy active). The Scheduler will put an active ++ * item back when it's becoming off-slot (not in use). ++ * @dummy_phys: An array of dummy phys[nr_susp_pages] pages for use with normal ++ * and pmode suspend buffers, as a default replacement of a CSG's pages ++ * for the MMU mapping when the csg_reg is not bound to a group. ++ * @pma_phys: Pre-allocated array phy[nr_susp_pages] for transitional use with ++ * protected suspend buffer MMU map operations. ++ * @userio_mem_rd_flags: Userio input page's read access mapping configuration flags. ++ * @dummy_phys_allocated: Indicating the @p dummy_phy page is allocated when true. ++ */ ++struct kbase_csf_mcu_shared_regions { ++ void *array_csg_regs; ++ struct list_head unused_csg_regs; ++ struct tagged_addr *dummy_phys; ++ struct tagged_addr *pma_phys; ++ unsigned long userio_mem_rd_flags; ++ bool dummy_phys_allocated; ++}; + -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask); -+} ++/** ++ * struct kbase_csf_scheduler - Object representing the scheduler used for ++ * CSF for an instance of GPU platform device. ++ * @lock: Lock to serialize the scheduler operations and ++ * access to the data members. ++ * @interrupt_lock: Lock to protect members accessed by interrupt ++ * handler. ++ * @state: The operational phase the scheduler is in. Primarily ++ * used for indicating what in-cycle schedule actions ++ * are allowed. ++ * @doorbell_inuse_bitmap: Bitmap of hardware doorbell pages keeping track of ++ * which pages are currently available for assignment ++ * to clients. ++ * @csg_inuse_bitmap: Bitmap to keep a track of CSG slots ++ * that are currently in use. ++ * @csg_slots: The array for tracking the state of CS ++ * group slots. ++ * @runnable_kctxs: List of Kbase contexts that have runnable command ++ * queue groups. ++ * @groups_to_schedule: List of runnable queue groups prepared on every ++ * scheduler tick. The dynamic priority of the CSG ++ * slot assigned to a group will depend upon the ++ * position of group in the list. ++ * @ngrp_to_schedule: Number of groups in the @groups_to_schedule list, ++ * incremented when a group is added to the list, used ++ * to record the position of group in the list. ++ * @num_active_address_spaces: Number of GPU address space slots that would get ++ * used to program the groups in @groups_to_schedule ++ * list on all the available CSG ++ * slots. ++ * @num_csg_slots_for_tick: Number of CSG slots that can be ++ * active in the given tick/tock. This depends on the ++ * value of @num_active_address_spaces. ++ * @remaining_tick_slots: Tracking the number of remaining available slots ++ * for @num_csg_slots_for_tick during the scheduling ++ * operation in a tick/tock. ++ * @idle_groups_to_schedule: List of runnable queue groups, in which all GPU ++ * command queues became idle or are waiting for ++ * synchronization object, prepared on every ++ * scheduler tick. The groups in this list are ++ * appended to the tail of @groups_to_schedule list ++ * after the scan out so that the idle groups aren't ++ * preferred for scheduling over the non-idle ones. ++ * @csg_scan_count_for_tick: CSG scanout count for assign the scan_seq_num for ++ * each scanned out group during scheduling operation ++ * in a tick/tock. ++ * @total_runnable_grps: Total number of runnable groups across all KCTXs. ++ * @csgs_events_enable_mask: Use for temporary masking off asynchronous events ++ * from firmware (such as OoM events) before a group ++ * is suspended. ++ * @csg_slots_idle_mask: Bit array for storing the mask of CS ++ * group slots for which idle notification was ++ * received. ++ * @csg_slots_prio_update: Bit array for tracking slots that have an on-slot ++ * priority update operation. ++ * @last_schedule: Time in jiffies recorded when the last "tick" or ++ * "tock" schedule operation concluded. Used for ++ * evaluating the exclusion window for in-cycle ++ * schedule operation. ++ * @timer_enabled: Whether the CSF scheduler wakes itself up for ++ * periodic scheduling tasks. If this value is 0 ++ * then it will only perform scheduling under the ++ * influence of external factors e.g., IRQs, IOCTLs. ++ * @wq: Dedicated workqueue to execute the @tick_work. ++ * @tick_timer: High-resolution timer employed to schedule tick ++ * workqueue items (kernel-provided delayed_work ++ * items do not use hrtimer and for some reason do ++ * not provide sufficiently reliable periodicity). ++ * @tick_work: Work item that performs the "schedule on tick" ++ * operation to implement timeslice-based scheduling. ++ * @tock_work: Work item that would perform the schedule on tock ++ * operation to implement the asynchronous scheduling. ++ * @pending_tock_work: Indicates that the tock work item should re-execute ++ * once it's finished instead of going back to sleep. ++ * @ping_work: Work item that would ping the firmware at regular ++ * intervals, only if there is a single active CSG ++ * slot, to check if firmware is alive and would ++ * initiate a reset if the ping request isn't ++ * acknowledged. ++ * @top_ctx: Pointer to the Kbase context corresponding to the ++ * @top_grp. ++ * @top_grp: Pointer to queue group inside @groups_to_schedule ++ * list that was assigned the highest slot priority. ++ * @active_protm_grp: Indicates if firmware has been permitted to let GPU ++ * enter protected mode with the given group. On exit ++ * from protected mode the pointer is reset to NULL. ++ * This pointer is set and PROTM_ENTER request is sent ++ * atomically with @interrupt_lock held. ++ * This pointer being set doesn't necessarily indicates ++ * that GPU is in protected mode, kbdev->protected_mode ++ * needs to be checked for that. ++ * @idle_wq: Workqueue for executing GPU idle notification ++ * handler. ++ * @gpu_idle_work: Work item for facilitating the scheduler to bring ++ * the GPU to a low-power mode on becoming idle. ++ * @fast_gpu_idle_handling: Indicates whether to relax many of the checks ++ * normally done in the GPU idle worker. This is ++ * set to true when handling the GLB IDLE IRQ if the ++ * idle hysteresis timeout is 0, since it makes it ++ * possible to receive this IRQ before the extract ++ * offset is published (which would cause more ++ * extensive GPU idle checks to fail). ++ * @gpu_no_longer_idle: Effective only when the GPU idle worker has been ++ * queued for execution, this indicates whether the ++ * GPU has become non-idle since the last time the ++ * idle notification was received. ++ * @non_idle_offslot_grps: Count of off-slot non-idle groups. Reset during ++ * the scheduler active phase in a tick. It then ++ * tracks the count of non-idle groups across all the ++ * other phases. ++ * @non_idle_scanout_grps: Count on the non-idle groups in the scan-out ++ * list at the scheduling prepare stage. ++ * @pm_active_count: Count indicating if the scheduler is owning a power ++ * management reference count. Reference is taken when ++ * the count becomes 1 and is dropped when the count ++ * becomes 0. It is used to enable the power up of MCU ++ * after GPU and L2 cache have been powered up. So when ++ * this count is zero, MCU will not be powered up. ++ * @csg_scheduling_period_ms: Duration of Scheduling tick in milliseconds. ++ * @tick_timer_active: Indicates whether the @tick_timer is effectively ++ * active or not, as the callback function of ++ * @tick_timer will enqueue @tick_work only if this ++ * flag is true. This is mainly useful for the case ++ * when scheduling tick needs to be advanced from ++ * interrupt context, without actually deactivating ++ * the @tick_timer first and then enqueing @tick_work. ++ * @tick_protm_pending_seq: Scan out sequence number of the group that has ++ * protected mode execution pending for the queue(s) ++ * bound to it and will be considered first for the ++ * protected mode execution compared to other such ++ * groups. It is updated on every tick/tock. ++ * @interrupt_lock is used to serialize the access. ++ * @protm_enter_time: GPU protected mode enter time. ++ * @reclaim_mgr: CSGs tiler heap manager object. ++ * @mcu_regs_data: Scheduler MCU shared regions data for managing the ++ * shared interface mappings for on-slot queues and ++ * CSG suspend buffers. ++ */ ++struct kbase_csf_scheduler { ++ struct mutex lock; ++ spinlock_t interrupt_lock; ++ enum kbase_csf_scheduler_state state; ++ DECLARE_BITMAP(doorbell_inuse_bitmap, CSF_NUM_DOORBELL); ++ DECLARE_BITMAP(csg_inuse_bitmap, MAX_SUPPORTED_CSGS); ++ struct kbase_csf_csg_slot *csg_slots; ++ struct list_head runnable_kctxs; ++ struct list_head groups_to_schedule; ++ u32 ngrp_to_schedule; ++ u32 num_active_address_spaces; ++ u32 num_csg_slots_for_tick; ++ u32 remaining_tick_slots; ++ struct list_head idle_groups_to_schedule; ++ u32 csg_scan_count_for_tick; ++ u32 total_runnable_grps; ++ DECLARE_BITMAP(csgs_events_enable_mask, MAX_SUPPORTED_CSGS); ++ DECLARE_BITMAP(csg_slots_idle_mask, MAX_SUPPORTED_CSGS); ++ DECLARE_BITMAP(csg_slots_prio_update, MAX_SUPPORTED_CSGS); ++ unsigned long last_schedule; ++ bool timer_enabled; ++ struct workqueue_struct *wq; ++ struct hrtimer tick_timer; ++ struct work_struct tick_work; ++ struct delayed_work tock_work; ++ atomic_t pending_tock_work; ++ struct delayed_work ping_work; ++ struct kbase_context *top_ctx; ++ struct kbase_queue_group *top_grp; ++ struct kbase_queue_group *active_protm_grp; ++ struct workqueue_struct *idle_wq; ++ struct work_struct gpu_idle_work; ++ bool fast_gpu_idle_handling; ++ atomic_t gpu_no_longer_idle; ++ atomic_t non_idle_offslot_grps; ++ u32 non_idle_scanout_grps; ++ u32 pm_active_count; ++ unsigned int csg_scheduling_period_ms; ++ bool tick_timer_active; ++ u32 tick_protm_pending_seq; ++ ktime_t protm_enter_time; ++ struct kbase_csf_sched_heap_reclaim_mgr reclaim_mgr; ++ struct kbase_csf_mcu_shared_regions mcu_regs_data; ++}; + -+static void request_fw_core_dump( -+ const struct kbase_csf_global_iface *const global_iface) -+{ -+ uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP); ++/* ++ * Number of GPU cycles per unit of the global progress timeout. ++ */ ++#define GLB_PROGRESS_TIMER_TIMEOUT_SCALE ((u64)1024) + -+ set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode); ++/* ++ * Maximum value of the global progress timeout. ++ */ ++#define GLB_PROGRESS_TIMER_TIMEOUT_MAX \ ++ ((GLB_PROGRESS_TIMER_TIMEOUT_MASK >> \ ++ GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) * \ ++ GLB_PROGRESS_TIMER_TIMEOUT_SCALE) + -+ set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); -+} ++/* ++ * Default GLB_PWROFF_TIMER_TIMEOUT value in unit of micro-seconds. ++ */ ++#define DEFAULT_GLB_PWROFF_TIMEOUT_US (800) + -+int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev) -+{ -+ const struct kbase_csf_global_iface *const global_iface = -+ &kbdev->csf.global_iface; -+ unsigned long flags; -+ int ret; ++/* ++ * In typical operations, the management of the shader core power transitions ++ * is delegated to the MCU/firmware. However, if the host driver is configured ++ * to take direct control, one needs to disable the MCU firmware GLB_PWROFF ++ * timer. ++ */ ++#define DISABLE_GLB_PWROFF_TIMER (0) + -+ /* Serialize CORE_DUMP requests. */ -+ mutex_lock(&kbdev->csf.reg_lock); ++/* Index of the GPU_ACTIVE counter within the CSHW counter block */ ++#define GPU_ACTIVE_CNT_IDX (4) + -+ /* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */ -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ request_fw_core_dump(global_iface); -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++/* ++ * Maximum number of sessions that can be managed by the IPA Control component. ++ */ ++#if MALI_UNIT_TEST ++#define KBASE_IPA_CONTROL_MAX_SESSIONS ((size_t)8) ++#else ++#define KBASE_IPA_CONTROL_MAX_SESSIONS ((size_t)2) ++#endif + -+ /* Wait for firmware to acknowledge completion of the CORE_DUMP request. */ -+ ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); -+ if (!ret) -+ WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK)); ++/** ++ * enum kbase_ipa_core_type - Type of counter block for performance counters ++ * ++ * @KBASE_IPA_CORE_TYPE_CSHW: CS Hardware counters. ++ * @KBASE_IPA_CORE_TYPE_MEMSYS: Memory System counters. ++ * @KBASE_IPA_CORE_TYPE_TILER: Tiler counters. ++ * @KBASE_IPA_CORE_TYPE_SHADER: Shader Core counters. ++ * @KBASE_IPA_CORE_TYPE_NUM: Number of core types. ++ */ ++enum kbase_ipa_core_type { ++ KBASE_IPA_CORE_TYPE_CSHW = 0, ++ KBASE_IPA_CORE_TYPE_MEMSYS, ++ KBASE_IPA_CORE_TYPE_TILER, ++ KBASE_IPA_CORE_TYPE_SHADER, ++ KBASE_IPA_CORE_TYPE_NUM ++}; + -+ mutex_unlock(&kbdev->csf.reg_lock); ++/* ++ * Number of configurable counters per type of block on the IPA Control ++ * interface. ++ */ ++#define KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS ((size_t)8) + -+ return ret; -+} ++/* ++ * Total number of configurable counters existing on the IPA Control interface. ++ */ ++#define KBASE_IPA_CONTROL_MAX_COUNTERS \ ++ ((size_t)KBASE_IPA_CORE_TYPE_NUM * KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS) + -+static void global_init(struct kbase_device *const kbdev, u64 core_mask) -+{ -+ u32 const ack_irq_mask = -+ GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | GLB_ACK_IRQ_MASK_PING_MASK | -+ GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | -+ GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | -+ GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK | -+ GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK | GLB_REQ_DEBUG_CSF_REQ_MASK; ++/** ++ * struct kbase_ipa_control_prfcnt - Session for a single performance counter ++ * ++ * @latest_raw_value: Latest raw value read from the counter. ++ * @scaling_factor: Factor raw value shall be multiplied by. ++ * @accumulated_diff: Partial sum of scaled and normalized values from ++ * previous samples. This represent all the values ++ * that were read before the latest raw value. ++ * @type: Type of counter block for performance counter. ++ * @select_idx: Index of the performance counter as configured on ++ * the IPA Control interface. ++ * @gpu_norm: Indicating whether values shall be normalized by ++ * GPU frequency. If true, returned values represent ++ * an interval of time expressed in seconds (when the ++ * scaling factor is set to 1). ++ */ ++struct kbase_ipa_control_prfcnt { ++ u64 latest_raw_value; ++ u64 scaling_factor; ++ u64 accumulated_diff; ++ enum kbase_ipa_core_type type; ++ u8 select_idx; ++ bool gpu_norm; ++}; + -+ const struct kbase_csf_global_iface *const global_iface = -+ &kbdev->csf.global_iface; -+ unsigned long flags; ++/** ++ * struct kbase_ipa_control_session - Session for an IPA Control client ++ * ++ * @prfcnts: Sessions for individual performance counters. ++ * @num_prfcnts: Number of performance counters. ++ * @active: Indicates whether this slot is in use or not ++ * @last_query_time: Time of last query, in ns ++ * @protm_time: Amount of time (in ns) that GPU has been in protected ++ */ ++struct kbase_ipa_control_session { ++ struct kbase_ipa_control_prfcnt prfcnts[KBASE_IPA_CONTROL_MAX_COUNTERS]; ++ size_t num_prfcnts; ++ bool active; ++ u64 last_query_time; ++ u64 protm_time; ++}; + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++/** ++ * struct kbase_ipa_control_prfcnt_config - Performance counter configuration ++ * ++ * @idx: Index of the performance counter inside the block, as specified ++ * in the GPU architecture. ++ * @refcount: Number of client sessions bound to this counter. ++ * ++ * This structure represents one configurable performance counter of ++ * the IPA Control interface. The entry may be mapped to a specific counter ++ * by one or more client sessions. The counter is considered to be unused ++ * if it isn't part of any client session. ++ */ ++struct kbase_ipa_control_prfcnt_config { ++ u8 idx; ++ u8 refcount; ++}; + -+ /* Update shader core allocation enable mask */ -+ enable_endpoints_global(global_iface, core_mask); -+ enable_shader_poweroff_timer(kbdev, global_iface); ++/** ++ * struct kbase_ipa_control_prfcnt_block - Block of performance counters ++ * ++ * @select: Current performance counter configuration. ++ * @num_available_counters: Number of counters that are not already configured. ++ * ++ */ ++struct kbase_ipa_control_prfcnt_block { ++ struct kbase_ipa_control_prfcnt_config select[KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS]; ++ size_t num_available_counters; ++}; + -+ set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev)); ++/** ++ * struct kbase_ipa_control - Manager of the IPA Control interface. ++ * ++ * @blocks: Current configuration of performance counters ++ * for the IPA Control interface. ++ * @sessions: State of client sessions, storing information ++ * like performance counters the client subscribed to ++ * and latest value read from each counter. ++ * @lock: Spinlock to serialize access by concurrent clients. ++ * @rtm_listener_data: Private data for allocating a GPU frequency change ++ * listener. ++ * @num_active_sessions: Number of sessions opened by clients. ++ * @cur_gpu_rate: Current GPU top-level operating frequency, in Hz. ++ * @rtm_listener_data: Private data for allocating a GPU frequency change ++ * listener. ++ * @protm_start: Time (in ns) at which the GPU entered protected mode ++ */ ++struct kbase_ipa_control { ++ struct kbase_ipa_control_prfcnt_block blocks[KBASE_IPA_CORE_TYPE_NUM]; ++ struct kbase_ipa_control_session sessions[KBASE_IPA_CONTROL_MAX_SESSIONS]; ++ spinlock_t lock; ++ void *rtm_listener_data; ++ size_t num_active_sessions; ++ u32 cur_gpu_rate; ++ u64 protm_start; ++}; + -+ /* The GPU idle timer is always enabled for simplicity. Checks will be -+ * done before scheduling the GPU idle worker to see if it is -+ * appropriate for the current power policy. -+ */ -+ enable_gpu_idle_timer(kbdev); ++/** ++ * struct kbase_csf_firmware_interface - Interface in the MCU firmware ++ * ++ * @node: Interface objects are on the kbase_device:csf.firmware_interfaces ++ * list using this list_head to link them ++ * @phys: Array of the physical (tagged) addresses making up this interface ++ * @reuse_pages: Flag used to identify if the FW interface entry reuses ++ * physical pages allocated for another FW interface entry. ++ * @is_small_page: Flag used to identify if small pages are used for ++ * the FW interface entry. ++ * @name: NULL-terminated string naming the interface ++ * @num_pages: Number of entries in @phys and @pma (and length of the interface) ++ * @num_pages_aligned: Same as @num_pages except for the case when @is_small_page ++ * is false and @reuse_pages is false and therefore will be ++ * aligned to NUM_4K_PAGES_IN_2MB_PAGE. ++ * @virtual: Starting GPU virtual address this interface is mapped at ++ * @flags: bitmask of CSF_FIRMWARE_ENTRY_* conveying the interface attributes ++ * @data_start: Offset into firmware image at which the interface data starts ++ * @data_end: Offset into firmware image at which the interface data ends ++ * @virtual_exe_start: Starting GPU execution virtual address of this interface ++ * @kernel_map: A kernel mapping of the memory or NULL if not required to be ++ * mapped in the kernel ++ * @pma: Array of pointers to protected memory allocations. ++ */ ++struct kbase_csf_firmware_interface { ++ struct list_head node; ++ struct tagged_addr *phys; ++ bool reuse_pages; ++ bool is_small_page; ++ char *name; ++ u32 num_pages; ++ u32 num_pages_aligned; ++ u32 virtual; ++ u32 flags; ++ u32 data_start; ++ u32 data_end; ++ u32 virtual_exe_start; ++ void *kernel_map; ++ struct protected_memory_allocation **pma; ++}; + -+ /* Unmask the interrupts */ -+ kbase_csf_firmware_global_input(global_iface, -+ GLB_ACK_IRQ_MASK, ack_irq_mask); ++/* ++ * struct kbase_csf_hwcnt - Object containing members for handling the dump of ++ * HW counters. ++ * ++ * @request_pending: Flag set when HWC requested and used for HWC sample ++ * done interrupt. ++ * @enable_pending: Flag set when HWC enable status change and used for ++ * enable done interrupt. ++ */ ++struct kbase_csf_hwcnt { ++ bool request_pending; ++ bool enable_pending; ++}; + -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++/* ++ * struct kbase_csf_mcu_fw - Object containing device loaded MCU firmware data. ++ * ++ * @size: Loaded firmware data size. Meaningful only when the ++ * other field @p data is not NULL. ++ * @data: Pointer to the device retained firmware data. If NULL ++ * means not loaded yet or error in loading stage. ++ */ ++struct kbase_csf_mcu_fw { ++ size_t size; ++ u8 *data; ++}; + -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+} ++/* ++ * Firmware log polling period. ++ */ ++#define KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS 25 + +/** -+ * global_init_on_boot - Sends a global request to control various features. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * enum kbase_csf_firmware_log_mode - Firmware log operating mode + * -+ * Currently only the request to enable endpoints and cycle counter is sent. ++ * @KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL: Manual mode, firmware log can be read ++ * manually by the userspace (and it will also be dumped automatically into ++ * dmesg on GPU reset). + * -+ * Return: 0 on success, or negative on failure. ++ * @KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT: Automatic printing mode, firmware log ++ * will be periodically emptied into dmesg, manual reading through debugfs is ++ * disabled. + */ -+static int global_init_on_boot(struct kbase_device *const kbdev) -+{ -+ unsigned long flags; -+ u64 core_mask; ++enum kbase_csf_firmware_log_mode { ++ KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL, ++ KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT ++}; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ core_mask = kbase_pm_ca_get_core_mask(kbdev); -+ kbdev->csf.firmware_hctl_core_pwr = -+ kbase_pm_no_mcu_core_pwroff(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++/** ++ * struct kbase_csf_firmware_log - Object containing members for handling firmware log. ++ * ++ * @mode: Firmware log operating mode. ++ * @busy: Indicating whether a firmware log operation is in progress. ++ * @poll_work: Work item that would poll firmware log buffer ++ * at regular intervals to perform any periodic ++ * activities required by current log mode. ++ * @dump_buf: Buffer used for dumping the log. ++ * @func_call_list_va_start: Virtual address of the start of the call list of FW log functions. ++ * @func_call_list_va_end: Virtual address of the end of the call list of FW log functions. ++ */ ++struct kbase_csf_firmware_log { ++ enum kbase_csf_firmware_log_mode mode; ++ atomic_t busy; ++ struct delayed_work poll_work; ++ u8 *dump_buf; ++ u32 func_call_list_va_start; ++ u32 func_call_list_va_end; ++}; + -+ global_init(kbdev, core_mask); ++/** ++ * struct kbase_csf_firmware_core_dump - Object containing members for handling ++ * firmware core dump. ++ * ++ * @mcu_regs_addr: GPU virtual address of the start of the MCU registers buffer ++ * in Firmware. ++ * @version: Version of the FW image header core dump data format. Bits ++ * 7:0 specify version minor and 15:8 specify version major. ++ * @available: Flag to identify if the FW core dump buffer is available. ++ * True if entry is available in the FW image header and version ++ * is supported, False otherwise. ++ */ ++struct kbase_csf_firmware_core_dump { ++ u32 mcu_regs_addr; ++ u16 version; ++ bool available; ++}; + -+ return wait_for_global_request(kbdev, CSF_GLB_REQ_CFG_MASK); -+} ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++/** ++ * struct kbase_csf_dump_on_fault - Faulty information to deliver to the daemon ++ * ++ * @error_code: Error code. ++ * @kctx_tgid: tgid value of the Kbase context for which the fault happened. ++ * @kctx_id: id of the Kbase context for which the fault happened. ++ * @enabled: Flag to indicate that 'csf_fault' debugfs has been opened ++ * so dump on fault is enabled. ++ * @fault_wait_wq: Waitqueue on which user space client is blocked till kbase ++ * reports a fault. ++ * @dump_wait_wq: Waitqueue on which kbase threads are blocked till user space client ++ * completes the dump on fault. ++ * @lock: Lock to protect this struct members from concurrent access. ++ */ ++struct kbase_csf_dump_on_fault { ++ enum dumpfault_error_type error_code; ++ u32 kctx_tgid; ++ u32 kctx_id; ++ atomic_t enabled; ++ wait_queue_head_t fault_wait_wq; ++ wait_queue_head_t dump_wait_wq; ++ spinlock_t lock; ++}; ++#endif /* CONFIG_DEBUG_FS*/ + -+void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, -+ u64 core_mask) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++/** ++ * struct kbase_csf_user_reg - Object containing members to manage the mapping ++ * of USER Register page for all contexts ++ * ++ * @dummy_page: Address of a dummy page that is mapped in place ++ * of the real USER Register page just before the GPU ++ * is powered down. The USER Register page is mapped ++ * in the address space of every process, that created ++ * a Base context, to enable the access to LATEST_FLUSH ++ * register from userspace. ++ * @filp: Pointer to a dummy file, that along with @file_offset, ++ * facilitates the use of unique file offset for the userspace mapping ++ * created for USER Register page. ++ * The userspace mapping is made to point to this file ++ * inside the mmap handler. ++ * @file_offset: Counter that is incremented every time Userspace creates a mapping of ++ * USER Register page, to provide a unique file offset range for ++ * @filp file, so that the CPU PTE of the Userspace mapping can be zapped ++ * through the kernel function unmap_mapping_range(). ++ * It is incremented in page units. ++ * @list: Linked list to maintain user processes(contexts) ++ * having the mapping to USER Register page. ++ * It's protected by &kbase_csf_device.reg_lock. ++ */ ++struct kbase_csf_user_reg { ++ struct tagged_addr dummy_page; ++ struct file *filp; ++ u32 file_offset; ++ struct list_head list; ++}; + -+ kbdev->csf.glb_init_request_pending = true; -+ kbdev->csf.firmware_hctl_core_pwr = -+ kbase_pm_no_mcu_core_pwroff(kbdev); -+ global_init(kbdev, core_mask); -+} ++/** ++ * struct kbase_csf_device - Object representing CSF for an instance of GPU ++ * platform device. ++ * ++ * @mcu_mmu: MMU page tables for the MCU firmware ++ * @firmware_interfaces: List of interfaces defined in the firmware image ++ * @firmware_config: List of configuration options within the firmware ++ * image ++ * @firmware_timeline_metadata: List of timeline meta-data within the firmware ++ * image ++ * @fw_cfg_kobj: Pointer to the kobject corresponding to the sysf ++ * directory that contains a sub-directory for each ++ * of the configuration option present in the ++ * firmware image. ++ * @firmware_trace_buffers: List of trace buffers described in the firmware ++ * image. ++ * @shared_interface: Pointer to the interface object containing info for ++ * the memory area shared between firmware & host. ++ * @shared_reg_rbtree: RB tree of the memory regions allocated from the ++ * shared interface segment in MCU firmware address ++ * space. ++ * @db_filp: Pointer to a dummy file, that alongwith ++ * @db_file_offsets, facilitates the use of unqiue ++ * file offset for the userspace mapping created ++ * for Hw Doorbell pages. The userspace mapping ++ * is made to point to this file inside the mmap ++ * handler. ++ * @db_file_offsets: Counter that is incremented every time a GPU ++ * command queue is bound to provide a unique file ++ * offset range for @db_filp file, so that pte of ++ * Doorbell page can be zapped through the kernel ++ * function unmap_mapping_range(). It is incremented ++ * in page units. ++ * @dummy_db_page: Address of the dummy page that is mapped in place ++ * of the real Hw doorbell page for the active GPU ++ * command queues after they are stopped or after the ++ * GPU is powered down. ++ * @reg_lock: Lock to serialize the MCU firmware related actions ++ * that affect all contexts such as allocation of ++ * regions from shared interface area, assignment of ++ * hardware doorbell pages, assignment of CSGs, ++ * sending global requests. ++ * @event_wait: Wait queue to wait for receiving csf events, i.e. ++ * the interrupt from CSF firmware, or scheduler state ++ * changes. ++ * @interrupt_received: Flag set when the interrupt is received from CSF fw ++ * @global_iface: The result of parsing the global interface ++ * structure set up by the firmware, including the ++ * CSGs, CSs, and their properties ++ * @scheduler: The CS scheduler instance. ++ * @reset: Contain members required for GPU reset handling. ++ * @progress_timeout: Maximum number of GPU clock cycles without forward ++ * progress to allow, for all tasks running on ++ * hardware endpoints (e.g. shader cores), before ++ * terminating a GPU command queue group. ++ * Must not exceed @GLB_PROGRESS_TIMER_TIMEOUT_MAX. ++ * @pma_dev: Pointer to protected memory allocator device. ++ * @firmware_inited: Flag for indicating that the cold-boot stage of ++ * the MCU has completed. ++ * @firmware_reloaded: Flag for indicating a firmware reload operation ++ * in GPU reset has completed. ++ * @firmware_reload_needed: Flag for indicating that the firmware needs to be ++ * reloaded as part of the GPU reset action. ++ * @firmware_full_reload_needed: Flag for indicating that the firmware needs to ++ * be fully re-loaded. This may be set when the ++ * boot or re-init of MCU fails after a successful ++ * soft reset. ++ * @firmware_hctl_core_pwr: Flag for indicating that the host diver is in ++ * charge of the shader core's power transitions, and ++ * the mcu_core_pwroff timeout feature is disabled ++ * (i.e. configured 0 in the register field). If ++ * false, the control is delegated to the MCU. ++ * @firmware_reload_work: Work item for facilitating the procedural actions ++ * on reloading the firmware. ++ * @glb_init_request_pending: Flag to indicate that Global requests have been ++ * sent to the FW after MCU was re-enabled and their ++ * acknowledgement is pending. ++ * @fw_error_work: Work item for handling the firmware internal error ++ * fatal event. ++ * @ipa_control: IPA Control component manager. ++ * @mcu_core_pwroff_dur_us: Sysfs attribute for the glb_pwroff timeout input ++ * in unit of micro-seconds. The firmware does not use ++ * it directly. ++ * @mcu_core_pwroff_dur_count: The counterpart of the glb_pwroff timeout input ++ * in interface required format, ready to be used ++ * directly in the firmware. ++ * @mcu_core_pwroff_reg_shadow: The actual value that has been programed into ++ * the glb_pwoff register. This is separated from ++ * the @p mcu_core_pwroff_dur_count as an update ++ * to the latter is asynchronous. ++ * @gpu_idle_hysteresis_us: Sysfs attribute for the idle hysteresis time ++ * window in unit of microseconds. The firmware does not ++ * use it directly. ++ * @gpu_idle_dur_count: The counterpart of the hysteresis time window in ++ * interface required format, ready to be used ++ * directly in the firmware. ++ * @fw_timeout_ms: Timeout value (in milliseconds) used when waiting ++ * for any request sent to the firmware. ++ * @hwcnt: Contain members required for handling the dump of ++ * HW counters. ++ * @fw: Copy of the loaded MCU firmware image. ++ * @fw_log: Contain members required for handling firmware log. ++ * @fw_core_dump: Contain members required for handling the firmware ++ * core dump. ++ * @dof: Structure for dump on fault. ++ * @user_reg: Collective information to support the mapping to ++ * USER Register page for user processes. ++ */ ++struct kbase_csf_device { ++ struct kbase_mmu_table mcu_mmu; ++ struct list_head firmware_interfaces; ++ struct list_head firmware_config; ++ struct list_head firmware_timeline_metadata; ++ struct kobject *fw_cfg_kobj; ++ struct kbase_csf_trace_buffers firmware_trace_buffers; ++ void *shared_interface; ++ struct rb_root shared_reg_rbtree; ++ struct file *db_filp; ++ u32 db_file_offsets; ++ struct tagged_addr dummy_db_page; ++ struct mutex reg_lock; ++ wait_queue_head_t event_wait; ++ bool interrupt_received; ++ struct kbase_csf_global_iface global_iface; ++ struct kbase_csf_scheduler scheduler; ++ struct kbase_csf_reset_gpu reset; ++ atomic64_t progress_timeout; ++ struct protected_memory_allocator_device *pma_dev; ++ bool firmware_inited; ++ bool firmware_reloaded; ++ bool firmware_reload_needed; ++ bool firmware_full_reload_needed; ++ bool firmware_hctl_core_pwr; ++ struct work_struct firmware_reload_work; ++ bool glb_init_request_pending; ++ struct work_struct fw_error_work; ++ struct kbase_ipa_control ipa_control; ++ u32 mcu_core_pwroff_dur_us; ++ u32 mcu_core_pwroff_dur_count; ++ u32 mcu_core_pwroff_reg_shadow; ++ u32 gpu_idle_hysteresis_us; ++ u32 gpu_idle_dur_count; ++ unsigned int fw_timeout_ms; ++ struct kbase_csf_hwcnt hwcnt; ++ struct kbase_csf_mcu_fw fw; ++ struct kbase_csf_firmware_log fw_log; ++ struct kbase_csf_firmware_core_dump fw_core_dump; ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ struct kbase_csf_dump_on_fault dof; ++#endif /* CONFIG_DEBUG_FS */ ++#if IS_ENABLED(CONFIG_MALI_CORESIGHT) ++ /** ++ * @coresight: Coresight device structure. ++ */ ++ struct kbase_debug_coresight_device coresight; ++#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ ++ struct kbase_csf_user_reg user_reg; ++}; + -+bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ WARN_ON(!kbdev->csf.glb_init_request_pending); ++/** ++ * struct kbase_as - Object representing an address space of GPU. ++ * @number: Index at which this address space structure is present ++ * in an array of address space structures embedded inside ++ * the &struct kbase_device. ++ * @pf_wq: Workqueue for processing work items related to ++ * Page fault, Bus fault and GPU fault handling. ++ * @work_pagefault: Work item for the Page fault handling. ++ * @work_busfault: Work item for the Bus fault handling. ++ * @work_gpufault: Work item for the GPU fault handling. ++ * @pf_data: Data relating to Page fault. ++ * @bf_data: Data relating to Bus fault. ++ * @gf_data: Data relating to GPU fault. ++ * @current_setup: Stores the MMU configuration for this address space. ++ * @is_unresponsive: Flag to indicate MMU is not responding. ++ * Set if a MMU command isn't completed within ++ * &kbase_device:mmu_as_inactive_wait_time_ms. ++ * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes. ++ */ ++struct kbase_as { ++ int number; ++ struct workqueue_struct *pf_wq; ++ struct work_struct work_pagefault; ++ struct work_struct work_busfault; ++ struct work_struct work_gpufault; ++ struct kbase_fault pf_data; ++ struct kbase_fault bf_data; ++ struct kbase_fault gf_data; ++ struct kbase_mmu_setup current_setup; ++ bool is_unresponsive; ++}; + -+ if (global_request_complete(kbdev, CSF_GLB_REQ_CFG_MASK)) -+ kbdev->csf.glb_init_request_pending = false; ++#endif /* _KBASE_CSF_DEFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.c +new file mode 100644 +index 000000000..49e529384 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.c +@@ -0,0 +1,265 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++#include ++#include "mali_kbase_csf_event.h" + -+ return !kbdev->csf.glb_init_request_pending; -+} ++/** ++ * struct kbase_csf_event_cb - CSF event callback. ++ * ++ * @link: Link to the rest of the list. ++ * @kctx: Pointer to the Kbase context this event belongs to. ++ * @callback: Callback function to call when a CSF event is signalled. ++ * @param: Parameter to pass to the callback function. ++ * ++ * This structure belongs to the list of events which is part of a Kbase ++ * context, and describes a callback function with a custom parameter to pass ++ * to it when a CSF event is signalled. ++ */ ++struct kbase_csf_event_cb { ++ struct list_head link; ++ struct kbase_context *kctx; ++ kbase_csf_event_callback *callback; ++ void *param; ++}; + -+void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, -+ bool update_core_pwroff_timer, bool update_core_mask, u64 core_mask) ++int kbase_csf_event_wait_add(struct kbase_context *kctx, ++ kbase_csf_event_callback *callback, void *param) +{ -+ unsigned long flags; ++ int err = -ENOMEM; ++ struct kbase_csf_event_cb *event_cb = ++ kzalloc(sizeof(struct kbase_csf_event_cb), GFP_KERNEL); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (event_cb) { ++ unsigned long flags; + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ if (update_core_mask) -+ enable_endpoints_global(&kbdev->csf.global_iface, core_mask); -+ if (update_core_pwroff_timer) -+ enable_shader_poweroff_timer(kbdev, &kbdev->csf.global_iface); ++ event_cb->kctx = kctx; ++ event_cb->callback = callback; ++ event_cb->param = param; + -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+} ++ spin_lock_irqsave(&kctx->csf.event.lock, flags); ++ list_add_tail(&event_cb->link, &kctx->csf.event.callback_list); ++ dev_dbg(kctx->kbdev->dev, ++ "Added event handler %pK with param %pK\n", event_cb, ++ event_cb->param); ++ spin_unlock_irqrestore(&kctx->csf.event.lock, flags); + -+bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ err = 0; ++ } + -+ return global_request_complete(kbdev, GLB_REQ_CFG_ALLOC_EN_MASK | -+ GLB_REQ_CFG_PWROFF_TIMER_MASK); ++ return err; +} + -+static void kbase_csf_firmware_reload_worker(struct work_struct *work) ++void kbase_csf_event_wait_remove(struct kbase_context *kctx, ++ kbase_csf_event_callback *callback, void *param) +{ -+ struct kbase_device *kbdev = container_of(work, struct kbase_device, -+ csf.firmware_reload_work); ++ struct kbase_csf_event_cb *event_cb; + unsigned long flags; + -+ /* Reboot the firmware */ -+ kbase_csf_firmware_enable_mcu(kbdev); -+ -+ /* Tell MCU state machine to transit to next state */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->csf.firmware_reloaded = true; -+ kbase_pm_update_state(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} -+ -+void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ kbdev->csf.firmware_reloaded = false; ++ spin_lock_irqsave(&kctx->csf.event.lock, flags); + -+ if (kbdev->csf.firmware_reload_needed) { -+ kbdev->csf.firmware_reload_needed = false; -+ queue_work(system_wq, &kbdev->csf.firmware_reload_work); -+ } else { -+ kbase_csf_firmware_enable_mcu(kbdev); -+ kbdev->csf.firmware_reloaded = true; ++ list_for_each_entry(event_cb, &kctx->csf.event.callback_list, link) { ++ if ((event_cb->callback == callback) && (event_cb->param == param)) { ++ list_del(&event_cb->link); ++ dev_dbg(kctx->kbdev->dev, ++ "Removed event handler %pK with param %pK\n", ++ event_cb, event_cb->param); ++ kfree(event_cb); ++ break; ++ } + } ++ spin_unlock_irqrestore(&kctx->csf.event.lock, flags); +} + -+void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ if (unlikely(!kbdev->csf.firmware_inited)) -+ return; -+ -+ /* Tell MCU state machine to transit to next state */ -+ kbdev->csf.firmware_reloaded = true; -+ kbase_pm_update_state(kbdev); -+} -+ -+static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms) ++static void sync_update_notify_gpu(struct kbase_context *kctx) +{ -+#define HYSTERESIS_VAL_UNIT_SHIFT (10) -+ /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ -+ u64 freq = arch_timer_get_cntfrq(); -+ u64 dur_val = dur_ms; -+ u32 cnt_val_u32, reg_val_u32; -+ bool src_system_timestamp = freq > 0; ++ bool can_notify_gpu; ++ unsigned long flags; + -+ if (!src_system_timestamp) { -+ /* Get the cycle_counter source alternative */ -+ spin_lock(&kbdev->pm.clk_rtm.lock); -+ if (kbdev->pm.clk_rtm.clks[0]) -+ freq = kbdev->pm.clk_rtm.clks[0]->clock_val; -+ else -+ dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!"); -+ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); ++ can_notify_gpu = kctx->kbdev->pm.backend.gpu_powered; ++#ifdef KBASE_PM_RUNTIME ++ if (kctx->kbdev->pm.backend.gpu_sleep_mode_active) ++ can_notify_gpu = false; ++#endif + -+ dev_info( -+ kbdev->dev, -+ "Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!"); ++ if (can_notify_gpu) { ++ kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR); ++ KBASE_KTRACE_ADD(kctx->kbdev, CSF_SYNC_UPDATE_NOTIFY_GPU_EVENT, kctx, 0u); + } + -+ /* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */ -+ dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; -+ dur_val = div_u64(dur_val, 1000); -+ -+ /* Interface limits the value field to S32_MAX */ -+ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; -+ -+ reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32); -+ /* add the source flag */ -+ if (src_system_timestamp) -+ reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, -+ GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); -+ else -+ reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, -+ GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER); -+ -+ return reg_val_u32; ++ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); +} + -+u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) ++void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu) +{ ++ struct kbase_csf_event_cb *event_cb, *next_event_cb; + unsigned long flags; -+ u32 dur; + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ dur = kbdev->csf.gpu_idle_hysteresis_us; -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+ -+ return dur; -+} -+ -+u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur) -+{ -+ unsigned long flags; -+ const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur); ++ dev_dbg(kctx->kbdev->dev, ++ "Signal event (%s GPU notify) for context %pK\n", ++ notify_gpu ? "with" : "without", (void *)kctx); + -+ /* The 'fw_load_lock' is taken to synchronize against the deferred -+ * loading of FW, where the idle timer will be enabled. ++ /* First increment the signal count and wake up event thread. + */ -+ mutex_lock(&kbdev->fw_load_lock); -+ if (unlikely(!kbdev->csf.firmware_inited)) { -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ kbdev->csf.gpu_idle_hysteresis_us = dur; -+ kbdev->csf.gpu_idle_dur_count = hysteresis_val; -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+ mutex_unlock(&kbdev->fw_load_lock); -+ goto end; -+ } -+ mutex_unlock(&kbdev->fw_load_lock); -+ -+ kbase_csf_scheduler_pm_active(kbdev); -+ if (kbase_csf_scheduler_wait_mcu_active(kbdev)) { -+ dev_err(kbdev->dev, -+ "Unable to activate the MCU, the idle hysteresis value shall remain unchanged"); -+ kbase_csf_scheduler_pm_idle(kbdev); -+ return kbdev->csf.gpu_idle_dur_count; -+ } ++ atomic_set(&kctx->event_count, 1); ++ kbase_event_wakeup(kctx); + -+ /* The 'reg_lock' is also taken and is held till the update is not -+ * complete, to ensure the update of idle timer value by multiple Users -+ * gets serialized. -+ */ -+ mutex_lock(&kbdev->csf.reg_lock); -+ /* The firmware only reads the new idle timer value when the timer is -+ * disabled. ++ /* Signal the CSF firmware. This is to ensure that pending command ++ * stream synch object wait operations are re-evaluated. ++ * Write to GLB_DOORBELL would suffice as spec says that all pending ++ * synch object wait operations are re-evaluated on a write to any ++ * CS_DOORBELL/GLB_DOORBELL register. + */ -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ kbase_csf_firmware_disable_gpu_idle_timer(kbdev); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+ /* Ensure that the request has taken effect */ -+ wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); ++ if (notify_gpu) ++ sync_update_notify_gpu(kctx); + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ kbdev->csf.gpu_idle_hysteresis_us = dur; -+ kbdev->csf.gpu_idle_dur_count = hysteresis_val; -+ kbase_csf_firmware_enable_gpu_idle_timer(kbdev); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+ wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK); -+ mutex_unlock(&kbdev->csf.reg_lock); ++ /* Now invoke the callbacks registered on backend side. ++ * Allow item removal inside the loop, if requested by the callback. ++ */ ++ spin_lock_irqsave(&kctx->csf.event.lock, flags); + -+ kbase_csf_scheduler_pm_idle(kbdev); ++ list_for_each_entry_safe( ++ event_cb, next_event_cb, &kctx->csf.event.callback_list, link) { ++ enum kbase_csf_event_callback_action action; + -+end: -+ dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x", -+ hysteresis_val); ++ dev_dbg(kctx->kbdev->dev, ++ "Calling event handler %pK with param %pK\n", ++ (void *)event_cb, event_cb->param); ++ action = event_cb->callback(event_cb->param); ++ if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) { ++ list_del(&event_cb->link); ++ kfree(event_cb); ++ } ++ } + -+ return hysteresis_val; ++ spin_unlock_irqrestore(&kctx->csf.event.lock, flags); +} + -+static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us) ++void kbase_csf_event_term(struct kbase_context *kctx) +{ -+ /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ -+ u64 freq = arch_timer_get_cntfrq(); -+ u64 dur_val = dur_us; -+ u32 cnt_val_u32, reg_val_u32; -+ bool src_system_timestamp = freq > 0; ++ struct kbase_csf_event_cb *event_cb, *next_event_cb; ++ unsigned long flags; + -+ if (!src_system_timestamp) { -+ /* Get the cycle_counter source alternative */ -+ spin_lock(&kbdev->pm.clk_rtm.lock); -+ if (kbdev->pm.clk_rtm.clks[0]) -+ freq = kbdev->pm.clk_rtm.clks[0]->clock_val; -+ else -+ dev_warn(kbdev->dev, "No GPU clock, unexpected integration issue!"); -+ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ spin_lock_irqsave(&kctx->csf.event.lock, flags); + -+ dev_info( -+ kbdev->dev, -+ "Can't get the timestamp frequency, use cycle counter with MCU shader Core Poweroff timer!"); ++ list_for_each_entry_safe( ++ event_cb, next_event_cb, &kctx->csf.event.callback_list, link) { ++ list_del(&event_cb->link); ++ dev_warn(kctx->kbdev->dev, ++ "Removed event handler %pK with param %pK\n", ++ (void *)event_cb, event_cb->param); ++ kfree(event_cb); + } + -+ /* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */ -+ dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; -+ dur_val = div_u64(dur_val, 1000000); -+ -+ /* Interface limits the value field to S32_MAX */ -+ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; ++ WARN(!list_empty(&kctx->csf.event.error_list), ++ "Error list not empty for ctx %d_%d\n", kctx->tgid, kctx->id); + -+ reg_val_u32 = GLB_PWROFF_TIMER_TIMEOUT_SET(0, cnt_val_u32); -+ /* add the source flag */ -+ if (src_system_timestamp) -+ reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, -+ GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); -+ else -+ reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, -+ GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER); ++ spin_unlock_irqrestore(&kctx->csf.event.lock, flags); ++} + -+ return reg_val_u32; ++void kbase_csf_event_init(struct kbase_context *const kctx) ++{ ++ INIT_LIST_HEAD(&kctx->csf.event.callback_list); ++ INIT_LIST_HEAD(&kctx->csf.event.error_list); ++ spin_lock_init(&kctx->csf.event.lock); +} + -+u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev) ++void kbase_csf_event_remove_error(struct kbase_context *kctx, ++ struct kbase_csf_notification *error) +{ -+ u32 pwroff; + unsigned long flags; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ pwroff = kbdev->csf.mcu_core_pwroff_dur_us; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ return pwroff; ++ spin_lock_irqsave(&kctx->csf.event.lock, flags); ++ list_del_init(&error->link); ++ spin_unlock_irqrestore(&kctx->csf.event.lock, flags); +} + -+u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur) ++bool kbase_csf_event_read_error(struct kbase_context *kctx, ++ struct base_csf_notification *event_data) +{ ++ struct kbase_csf_notification *error_data = NULL; + unsigned long flags; -+ const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur); -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->csf.mcu_core_pwroff_dur_us = dur; -+ kbdev->csf.mcu_core_pwroff_dur_count = pwroff; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff); + -+ return pwroff; ++ spin_lock_irqsave(&kctx->csf.event.lock, flags); ++ if (likely(!list_empty(&kctx->csf.event.error_list))) { ++ error_data = list_first_entry(&kctx->csf.event.error_list, ++ struct kbase_csf_notification, link); ++ list_del_init(&error_data->link); ++ *event_data = error_data->data; ++ dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n", ++ (void *)error_data, (void *)kctx); ++ } ++ spin_unlock_irqrestore(&kctx->csf.event.lock, flags); ++ return !!error_data; +} + -+int kbase_csf_firmware_early_init(struct kbase_device *kbdev) ++void kbase_csf_event_add_error(struct kbase_context *const kctx, ++ struct kbase_csf_notification *const error, ++ struct base_csf_notification const *const data) +{ -+ init_waitqueue_head(&kbdev->csf.event_wait); -+ kbdev->csf.interrupt_received = false; ++ unsigned long flags; + -+ kbdev->csf.fw_timeout_ms = -+ kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT); ++ if (WARN_ON(!kctx)) ++ return; + -+ INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces); -+ INIT_LIST_HEAD(&kbdev->csf.firmware_config); -+ INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list); -+ INIT_LIST_HEAD(&kbdev->csf.user_reg.list); -+ INIT_WORK(&kbdev->csf.firmware_reload_work, -+ kbase_csf_firmware_reload_worker); -+ INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); ++ if (WARN_ON(!error)) ++ return; + -+ mutex_init(&kbdev->csf.reg_lock); ++ if (WARN_ON(!data)) ++ return; + -+ return 0; ++ spin_lock_irqsave(&kctx->csf.event.lock, flags); ++ if (list_empty(&error->link)) { ++ error->data = *data; ++ list_add_tail(&error->link, &kctx->csf.event.error_list); ++ dev_dbg(kctx->kbdev->dev, ++ "Added error %pK of type %d in context %pK\n", ++ (void *)error, data->type, (void *)kctx); ++ } else { ++ dev_dbg(kctx->kbdev->dev, "Error %pK of type %d already pending in context %pK", ++ (void *)error, error->data.type, (void *)kctx); ++ } ++ spin_unlock_irqrestore(&kctx->csf.event.lock, flags); +} + -+void kbase_csf_firmware_early_term(struct kbase_device *kbdev) ++bool kbase_csf_event_error_pending(struct kbase_context *kctx) +{ -+ mutex_destroy(&kbdev->csf.reg_lock); -+} ++ bool error_pending = false; ++ unsigned long flags; + -+int kbase_csf_firmware_late_init(struct kbase_device *kbdev) -+{ -+ kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC; -+#ifdef KBASE_PM_RUNTIME -+ if (kbase_pm_gpu_sleep_allowed(kbdev)) -+ kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; -+#endif -+ WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us); -+ kbdev->csf.gpu_idle_dur_count = -+ convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us); ++ /* Withhold the error event if the dump on fault is ongoing. ++ * This would prevent the Userspace from taking error recovery actions ++ * (which can potentially affect the state that is being dumped). ++ * Event handling thread would eventually notice the error event. ++ */ ++ if (unlikely(!kbase_debug_csf_fault_dump_complete(kctx->kbdev))) ++ return false; + -+ return 0; -+} ++ spin_lock_irqsave(&kctx->csf.event.lock, flags); ++ error_pending = !list_empty(&kctx->csf.event.error_list); + -+int kbase_csf_firmware_load_init(struct kbase_device *kbdev) -+{ -+ int ret; ++ dev_dbg(kctx->kbdev->dev, "%s error is pending in context %pK\n", ++ error_pending ? "An" : "No", (void *)kctx); + -+ lockdep_assert_held(&kbdev->fw_load_lock); ++ spin_unlock_irqrestore(&kctx->csf.event.lock, flags); + -+ if (WARN_ON((kbdev->as_free & MCU_AS_BITMASK) == 0)) -+ return -EINVAL; -+ kbdev->as_free &= ~MCU_AS_BITMASK; ++ return error_pending; ++} +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.h +new file mode 100644 +index 000000000..52122a9ef +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.h +@@ -0,0 +1,171 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ ret = kbase_mmu_init(kbdev, &kbdev->csf.mcu_mmu, NULL, -+ BASE_MEM_GROUP_DEFAULT); ++#ifndef _KBASE_CSF_EVENT_H_ ++#define _KBASE_CSF_EVENT_H_ + -+ if (ret != 0) { -+ /* Release the address space */ -+ kbdev->as_free |= MCU_AS_BITMASK; -+ return ret; -+ } ++#include ++#include + -+ ret = kbase_mcu_shared_interface_region_tracker_init(kbdev); -+ if (ret != 0) { -+ dev_err(kbdev->dev, -+ "Failed to setup the rb tree for managing shared interface segment\n"); -+ goto error; -+ } ++struct kbase_context; ++struct kbase_csf_event; ++enum kbase_csf_event_callback_action; + -+ ret = invent_memory_setup_entry(kbdev); -+ if (ret != 0) { -+ dev_err(kbdev->dev, "Failed to load firmware entry\n"); -+ goto error; -+ } ++/** ++ * kbase_csf_event_callback - type for callback functions to be ++ * called upon CSF events. ++ * @param: Generic parameter to pass to the callback function. ++ * ++ * This is the type of callback functions that can be registered ++ * for CSF events. These function calls shall be triggered by any call ++ * to kbase_csf_event_signal. ++ * ++ * Return: KBASE_CSF_EVENT_CALLBACK_KEEP if the callback should remain ++ * registered, or KBASE_CSF_EVENT_CALLBACK_REMOVE if it should be removed. ++ */ ++typedef enum kbase_csf_event_callback_action kbase_csf_event_callback(void *param); + -+ /* Make sure L2 cache is powered up */ -+ kbase_pm_wait_for_l2_powered(kbdev); ++/** ++ * kbase_csf_event_wait_add - Add a CSF event callback ++ * ++ * @kctx: The Kbase context the @callback should be registered to. ++ * @callback: The callback function to register. ++ * @param: Custom parameter to be passed to the @callback function. ++ * ++ * This function adds an event callback to the list of CSF event callbacks ++ * belonging to a given Kbase context, to be triggered when a CSF event is ++ * signalled by kbase_csf_event_signal. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_event_wait_add(struct kbase_context *kctx, ++ kbase_csf_event_callback *callback, void *param); + -+ /* NO_MALI: Don't init trace buffers */ ++/** ++ * kbase_csf_event_wait_remove - Remove a CSF event callback ++ * ++ * @kctx: The kbase context the @callback should be removed from. ++ * @callback: The callback function to remove. ++ * @param: Custom parameter that would have been passed to the @p callback ++ * function. ++ * ++ * This function removes an event callback from the list of CSF event callbacks ++ * belonging to a given Kbase context. ++ */ ++void kbase_csf_event_wait_remove(struct kbase_context *kctx, ++ kbase_csf_event_callback *callback, void *param); + -+ /* NO_MALI: Don't load the MMU tables or boot CSF firmware */ ++/** ++ * kbase_csf_event_term - Removes all CSF event callbacks ++ * ++ * @kctx: The kbase context for which CSF event callbacks have to be removed. ++ * ++ * This function empties the list of CSF event callbacks belonging to a given ++ * Kbase context. ++ */ ++void kbase_csf_event_term(struct kbase_context *kctx); + -+ ret = invent_capabilities(kbdev); -+ if (ret != 0) -+ goto error; ++/** ++ * kbase_csf_event_signal - Signal a CSF event ++ * ++ * @kctx: The kbase context whose CSF event callbacks shall be triggered. ++ * @notify_gpu: Flag to indicate if CSF firmware should be notified of the ++ * signaling of event that happened on the Driver side, either ++ * the signal came from userspace or from kcpu queues. ++ * ++ * This function triggers all the CSF event callbacks that are registered to ++ * a given Kbase context, and also signals the event handling thread of ++ * userspace driver waiting for the CSF event. ++ */ ++void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu); + -+ ret = kbase_csf_doorbell_mapping_init(kbdev); -+ if (ret != 0) -+ goto error; ++static inline void kbase_csf_event_signal_notify_gpu(struct kbase_context *kctx) ++{ ++ kbase_csf_event_signal(kctx, true); ++} + -+ ret = kbase_csf_setup_dummy_user_reg_page(kbdev); -+ if (ret != 0) -+ goto error; ++static inline void kbase_csf_event_signal_cpu_only(struct kbase_context *kctx) ++{ ++ kbase_csf_event_signal(kctx, false); ++} + -+ ret = kbase_csf_scheduler_init(kbdev); -+ if (ret != 0) -+ goto error; ++/** ++ * kbase_csf_event_init - Initialize event object ++ * ++ * @kctx: The kbase context whose event object will be initialized. ++ * ++ * This function initializes the event object. ++ */ ++void kbase_csf_event_init(struct kbase_context *const kctx); + -+ ret = kbase_csf_timeout_init(kbdev); -+ if (ret != 0) -+ goto error; ++struct kbase_csf_notification; ++struct base_csf_notification; ++/** ++ * kbase_csf_event_read_error - Read and remove an error from error list in event ++ * ++ * @kctx: The kbase context. ++ * @event_data: Caller-provided buffer to copy the fatal error to ++ * ++ * This function takes the CS fatal error from context's ordered ++ * error_list, copies its contents to @event_data. ++ * ++ * Return: true if error is read out or false if there is no error in error list. ++ */ ++bool kbase_csf_event_read_error(struct kbase_context *kctx, ++ struct base_csf_notification *event_data); + -+ ret = global_init_on_boot(kbdev); -+ if (ret != 0) -+ goto error; ++/** ++ * kbase_csf_event_add_error - Add an error into event error list ++ * ++ * @kctx: Address of a base context associated with a GPU address space. ++ * @error: Address of the item to be added to the context's pending error list. ++ * @data: Error data to be returned to userspace. ++ * ++ * Does not wake up the event queue blocking a user thread in kbase_poll. This ++ * is to make it more efficient to add multiple errors. ++ * ++ * The added error must not already be on the context's list of errors waiting ++ * to be reported (e.g. because a previous error concerning the same object has ++ * not yet been reported). ++ * ++ */ ++void kbase_csf_event_add_error(struct kbase_context *const kctx, ++ struct kbase_csf_notification *const error, ++ struct base_csf_notification const *const data); + -+ return 0; ++/** ++ * kbase_csf_event_remove_error - Remove an error from event error list ++ * ++ * @kctx: Address of a base context associated with a GPU address space. ++ * @error: Address of the item to be removed from the context's event error list. ++ */ ++void kbase_csf_event_remove_error(struct kbase_context *kctx, ++ struct kbase_csf_notification *error); + -+error: -+ kbase_csf_firmware_unload_term(kbdev); -+ return ret; -+} ++/** ++ * kbase_csf_event_error_pending - Check the error pending status ++ * ++ * @kctx: The kbase context to check fatal error upon. ++ * ++ * Return: true if there is error in the list. ++ */ ++bool kbase_csf_event_error_pending(struct kbase_context *kctx); ++#endif /* _KBASE_CSF_EVENT_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c +new file mode 100644 +index 000000000..42bff1e91 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c +@@ -0,0 +1,3144 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) -+{ -+ cancel_work_sync(&kbdev->csf.fw_error_work); ++#include "mali_kbase.h" ++#include "mali_kbase_csf_firmware_cfg.h" ++#include "mali_kbase_csf_firmware_log.h" ++#include "mali_kbase_csf_firmware_core_dump.h" ++#include "mali_kbase_csf_trace_buffer.h" ++#include "mali_kbase_csf_timeout.h" ++#include "mali_kbase_mem.h" ++#include "mali_kbase_mem_pool_group.h" ++#include "mali_kbase_reset_gpu.h" ++#include "mali_kbase_ctx_sched.h" ++#include "mali_kbase_csf_scheduler.h" ++#include ++#include "device/mali_kbase_device.h" ++#include "backend/gpu/mali_kbase_pm_internal.h" ++#include "tl/mali_kbase_timeline_priv.h" ++#include "tl/mali_kbase_tracepoints.h" ++#include "mali_kbase_csf_tl_reader.h" ++#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#if (KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE) ++#include ++#endif ++#include ++#include ++#include + -+ kbase_csf_timeout_term(kbdev); ++#define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20) + -+ /* NO_MALI: Don't stop firmware or unload MMU tables */ ++static char fw_name[MALI_MAX_FIRMWARE_NAME_LEN] = "mali_csffw.bin"; ++module_param_string(fw_name, fw_name, sizeof(fw_name), 0644); ++MODULE_PARM_DESC(fw_name, "firmware image"); + -+ kbase_csf_scheduler_term(kbdev); ++/* The waiting time for firmware to boot */ ++static unsigned int csf_firmware_boot_timeout_ms; ++module_param(csf_firmware_boot_timeout_ms, uint, 0444); ++MODULE_PARM_DESC(csf_firmware_boot_timeout_ms, ++ "Maximum time to wait for firmware to boot."); + -+ kbase_csf_free_dummy_user_reg_page(kbdev); ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++/* Makes Driver wait indefinitely for an acknowledgment for the different ++ * requests it sends to firmware. Otherwise the timeouts interfere with the ++ * use of debugger for source-level debugging of firmware as Driver initiates ++ * a GPU reset when a request times out, which always happen when a debugger ++ * is connected. ++ */ ++bool fw_debug; /* Default value of 0/false */ ++module_param(fw_debug, bool, 0444); ++MODULE_PARM_DESC(fw_debug, ++ "Enables effective use of a debugger for debugging firmware code."); ++#endif + -+ kbase_csf_doorbell_mapping_term(kbdev); + -+ free_global_iface(kbdev); ++#define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul) ++#define FIRMWARE_HEADER_VERSION_MAJOR (0ul) ++#define FIRMWARE_HEADER_VERSION_MINOR (3ul) ++#define FIRMWARE_HEADER_LENGTH (0x14ul) + -+ /* Release the address space */ -+ kbdev->as_free |= MCU_AS_BITMASK; ++#define CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS \ ++ (CSF_FIRMWARE_ENTRY_READ | \ ++ CSF_FIRMWARE_ENTRY_WRITE | \ ++ CSF_FIRMWARE_ENTRY_EXECUTE | \ ++ CSF_FIRMWARE_ENTRY_PROTECTED | \ ++ CSF_FIRMWARE_ENTRY_SHARED | \ ++ CSF_FIRMWARE_ENTRY_ZERO | \ ++ CSF_FIRMWARE_ENTRY_CACHE_MODE) + -+ while (!list_empty(&kbdev->csf.firmware_interfaces)) { -+ struct dummy_firmware_interface *interface; ++#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0) ++#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1) ++#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3) ++#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4) ++#define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6) ++#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST (7) ++#define CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP (9) + -+ interface = list_first_entry(&kbdev->csf.firmware_interfaces, -+ struct dummy_firmware_interface, node); -+ list_del(&interface->node); ++#define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3) ++#define CSF_FIRMWARE_CACHE_MODE_CACHED (1ul << 3) ++#define CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT (2ul << 3) ++#define CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT (3ul << 3) + -+ /* NO_MALI: No cleanup in dummy interface necessary */ ++#define INTERFACE_ENTRY_NAME_OFFSET (0x14) + -+ kfree(interface); -+ } ++#define TL_METADATA_ENTRY_NAME_OFFSET (0x8) + -+ /* NO_MALI: No trace buffers to terminate */ ++#define BUILD_INFO_METADATA_SIZE_OFFSET (0x4) ++#define BUILD_INFO_GIT_SHA_LEN (40U) ++#define BUILD_INFO_GIT_DIRTY_LEN (1U) ++#define BUILD_INFO_GIT_SHA_PATTERN "git_sha: " + -+ /* This will also free up the region allocated for the shared interface -+ * entry parsed from the firmware image. -+ */ -+ kbase_mcu_shared_interface_region_tracker_term(kbdev); ++#define CSF_MAX_FW_STOP_LOOPS (100000) + -+ kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu); -+} ++#define CSF_GLB_REQ_CFG_MASK \ ++ (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ ++ GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK) + -+void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev) ++static inline u32 input_page_read(const u32 *const input, const u32 offset) +{ -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; -+ const u32 glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); -+ -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); -+ /* The scheduler is assumed to only call the enable when its internal -+ * state indicates that the idle timer has previously been disabled. So -+ * on entry the expected field values are: -+ * 1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0 -+ * 2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0 -+ */ -+ if (glb_req & GLB_REQ_IDLE_ENABLE_MASK) -+ dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!"); ++ WARN_ON(offset % sizeof(u32)); + -+ enable_gpu_idle_timer(kbdev); -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ return input[offset / sizeof(u32)]; +} + -+void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) ++static inline void input_page_write(u32 *const input, const u32 offset, ++ const u32 value) +{ -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; -+ -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ WARN_ON(offset % sizeof(u32)); + -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, -+ GLB_REQ_REQ_IDLE_DISABLE, -+ GLB_REQ_IDLE_DISABLE_MASK); ++ input[offset / sizeof(u32)] = value; ++} + -+ dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer"); ++static inline void input_page_partial_write(u32 *const input, const u32 offset, ++ u32 value, u32 mask) ++{ ++ WARN_ON(offset % sizeof(u32)); + -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ input[offset / sizeof(u32)] = ++ (input_page_read(input, offset) & ~mask) | (value & mask); +} + -+void kbase_csf_firmware_ping(struct kbase_device *const kbdev) ++static inline u32 output_page_read(const u32 *const output, const u32 offset) +{ -+ const struct kbase_csf_global_iface *const global_iface = -+ &kbdev->csf.global_iface; -+ unsigned long flags; ++ WARN_ON(offset % sizeof(u32)); + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ set_global_request(global_iface, GLB_REQ_PING_MASK); -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ return output[offset / sizeof(u32)]; +} + -+int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int wait_timeout_ms) ++static unsigned int entry_type(u32 header) +{ -+ CSTD_UNUSED(wait_timeout_ms); -+ kbase_csf_firmware_ping(kbdev); -+ return wait_for_global_request(kbdev, GLB_REQ_PING_MASK); ++ return header & 0xFF; ++} ++static unsigned int entry_size(u32 header) ++{ ++ return (header >> 8) & 0xFF; ++} ++static bool entry_update(u32 header) ++{ ++ return (header >> 30) & 0x1; ++} ++static bool entry_optional(u32 header) ++{ ++ return (header >> 31) & 0x1; +} + -+int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, -+ u64 const timeout) ++/** ++ * struct firmware_timeline_metadata - Timeline metadata item within the MCU firmware ++ * ++ * @node: List head linking all timeline metadata to ++ * kbase_device:csf.firmware_timeline_metadata. ++ * @name: NUL-terminated string naming the metadata. ++ * @data: Metadata content. ++ * @size: Metadata size. ++ */ ++struct firmware_timeline_metadata { ++ struct list_head node; ++ char *name; ++ char *data; ++ size_t size; ++}; ++ ++/* The shared interface area, used for communicating with firmware, is managed ++ * like a virtual memory zone. Reserve the virtual space from that zone ++ * corresponding to shared interface entry parsed from the firmware image. ++ * The shared_reg_rbtree should have been initialized before calling this ++ * function. ++ */ ++static int setup_shared_iface_static_region(struct kbase_device *kbdev) +{ -+ const struct kbase_csf_global_iface *const global_iface = -+ &kbdev->csf.global_iface; -+ unsigned long flags; -+ int err; ++ struct kbase_csf_firmware_interface *interface = ++ kbdev->csf.shared_interface; ++ struct kbase_va_region *reg; ++ int ret = -ENOMEM; + -+ /* The 'reg_lock' is also taken and is held till the update is not -+ * complete, to ensure the update of timeout value by multiple Users -+ * gets serialized. -+ */ -+ mutex_lock(&kbdev->csf.reg_lock); -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ set_timeout_global(global_iface, timeout); -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ if (!interface) ++ return -EINVAL; + -+ err = wait_for_global_request(kbdev, GLB_REQ_CFG_PROGRESS_TIMER_MASK); -+ mutex_unlock(&kbdev->csf.reg_lock); ++ reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, ++ interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED); ++ if (reg) { ++ mutex_lock(&kbdev->csf.reg_lock); ++ ret = kbase_add_va_region_rbtree(kbdev, reg, ++ interface->virtual, interface->num_pages_aligned, 1); ++ mutex_unlock(&kbdev->csf.reg_lock); ++ if (ret) ++ kfree(reg); ++ else ++ reg->flags &= ~KBASE_REG_FREE; ++ } + -+ return err; ++ return ret; +} + -+void kbase_csf_enter_protected_mode(struct kbase_device *kbdev) ++static int wait_mcu_status_value(struct kbase_device *kbdev, u32 val) +{ -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ u32 max_loops = CSF_MAX_FW_STOP_LOOPS; + -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); -+ set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK); -+ dev_dbg(kbdev->dev, "Sending request to enter protected mode"); -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ /* wait for the MCU_STATUS register to reach the given status value */ ++ while (--max_loops && ++ (kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)) != val)) { ++ } ++ ++ return (max_loops == 0) ? -1 : 0; +} + -+int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev) ++void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev) +{ -+ int err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK); -+ -+ if (err) { -+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) -+ kbase_reset_gpu(kbdev); -+ } ++ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING(kbdev, kbase_backend_get_cycle_cnt(kbdev)); + -+ return err; ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE); +} + -+void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) ++static void wait_for_firmware_stop(struct kbase_device *kbdev) +{ -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; -+ unsigned long flags; ++ if (wait_mcu_status_value(kbdev, MCU_CNTRL_DISABLE) < 0) { ++ /* This error shall go away once MIDJM-2371 is closed */ ++ dev_err(kbdev->dev, "Firmware failed to stop"); ++ } + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ /* Validate there are no on-slot groups when sending the -+ * halt request to firmware. -+ */ -+ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev)); -+ set_global_request(global_iface, GLB_REQ_HALT_MASK); -+ dev_dbg(kbdev->dev, "Sending request to HALT MCU"); -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF(kbdev, kbase_backend_get_cycle_cnt(kbdev)); +} + -+void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev) ++void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev) +{ -+ /* Trigger the boot of MCU firmware, Use the AUTO mode as -+ * otherwise on fast reset, to exit protected mode, MCU will -+ * not reboot by itself to enter normal mode. -+ */ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO); ++ wait_for_firmware_stop(kbdev); +} + -+#ifdef KBASE_PM_RUNTIME -+void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev) ++static void stop_csf_firmware(struct kbase_device *kbdev) +{ -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; -+ unsigned long flags; ++ /* Stop the MCU firmware */ ++ kbase_csf_firmware_disable_mcu(kbdev); + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ set_global_request(global_iface, GLB_REQ_SLEEP_MASK); -+ dev_dbg(kbdev->dev, "Sending sleep request to MCU"); -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ wait_for_firmware_stop(kbdev); +} + -+bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev) ++static void wait_for_firmware_boot(struct kbase_device *kbdev) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ long wait_timeout; ++ long remaining; + -+ return (global_request_complete(kbdev, GLB_REQ_SLEEP_MASK) && -+ kbase_csf_firmware_mcu_halted(kbdev)); ++ if (!csf_firmware_boot_timeout_ms) ++ csf_firmware_boot_timeout_ms = ++ kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_BOOT_TIMEOUT); ++ ++ wait_timeout = kbase_csf_timeout_in_jiffies(csf_firmware_boot_timeout_ms); ++ ++ /* Firmware will generate a global interface interrupt once booting ++ * is complete ++ */ ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ kbdev->csf.interrupt_received == true, wait_timeout); ++ ++ if (!remaining) ++ dev_err(kbdev->dev, "Timed out waiting for fw boot completion"); ++ ++ kbdev->csf.interrupt_received = false; +} -+#endif + -+int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev) ++static void boot_csf_firmware(struct kbase_device *kbdev) +{ -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; -+ unsigned long flags; -+ int err = 0; ++ kbase_csf_firmware_enable_mcu(kbdev); + -+ /* The 'reg_lock' is also taken and is held till the update is -+ * complete, to ensure the config update gets serialized. -+ */ -+ mutex_lock(&kbdev->csf.reg_lock); -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++#if IS_ENABLED(CONFIG_MALI_CORESIGHT) ++ kbase_debug_coresight_csf_state_request(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED); + -+ set_global_request(global_iface, GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); -+ dev_dbg(kbdev->dev, "Sending request for FIRMWARE_CONFIG_UPDATE"); -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ if (!kbase_debug_coresight_csf_state_wait(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) ++ dev_err(kbdev->dev, "Timeout waiting for CoreSight to be enabled"); ++#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + -+ err = wait_for_global_request(kbdev, -+ GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); -+ mutex_unlock(&kbdev->csf.reg_lock); -+ return err; ++ wait_for_firmware_boot(kbdev); +} + +/** -+ * copy_grp_and_stm - Copy CS and/or group data ++ * wait_ready() - Wait for previously issued MMU command to complete. + * -+ * @iface: Global CSF interface provided by -+ * the firmware. -+ * @group_data: Pointer where to store all the group data -+ * (sequentially). -+ * @max_group_num: The maximum number of groups to be read. Can be 0, in -+ * which case group_data is unused. -+ * @stream_data: Pointer where to store all the stream data -+ * (sequentially). -+ * @max_total_stream_num: The maximum number of streams to be read. -+ * Can be 0, in which case stream_data is unused. ++ * @kbdev: Kbase device to wait for a MMU command to complete. + * -+ * Return: Total number of CSs, summed across all groups. ++ * Reset GPU if the wait for previously issued command times out. ++ * ++ * Return: 0 on success, error code otherwise. + */ -+static u32 copy_grp_and_stm( -+ const struct kbase_csf_global_iface * const iface, -+ struct basep_cs_group_control * const group_data, -+ u32 max_group_num, -+ struct basep_cs_stream_control * const stream_data, -+ u32 max_total_stream_num) ++static int wait_ready(struct kbase_device *kbdev) +{ -+ u32 i, total_stream_num = 0; ++ const ktime_t wait_loop_start = ktime_get_raw(); ++ const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms; ++ s64 diff; + -+ if (WARN_ON((max_group_num > 0) && !group_data)) -+ max_group_num = 0; ++ do { ++ unsigned int i; + -+ if (WARN_ON((max_total_stream_num > 0) && !stream_data)) -+ max_total_stream_num = 0; ++ for (i = 0; i < 1000; i++) { ++ /* Wait for the MMU status to indicate there is no active command */ ++ if (!(kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) & ++ AS_STATUS_AS_ACTIVE)) ++ return 0; ++ } + -+ for (i = 0; i < iface->group_num; i++) { -+ u32 j; ++ diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start)); ++ } while (diff < mmu_as_inactive_wait_time_ms); + -+ if (i < max_group_num) { -+ group_data[i].features = iface->groups[i].features; -+ group_data[i].stream_num = iface->groups[i].stream_num; -+ group_data[i].suspend_size = -+ iface->groups[i].suspend_size; -+ } -+ for (j = 0; j < iface->groups[i].stream_num; j++) { -+ if (total_stream_num < max_total_stream_num) -+ stream_data[total_stream_num].features = -+ iface->groups[i].streams[j].features; -+ total_stream_num++; -+ } -+ } -+ -+ return total_stream_num; -+} -+ -+u32 kbase_csf_firmware_get_glb_iface( -+ struct kbase_device *kbdev, -+ struct basep_cs_group_control *const group_data, -+ u32 const max_group_num, -+ struct basep_cs_stream_control *const stream_data, -+ u32 const max_total_stream_num, u32 *const glb_version, -+ u32 *const features, u32 *const group_num, u32 *const prfcnt_size, -+ u32 *const instr_features) -+{ -+ const struct kbase_csf_global_iface * const iface = -+ &kbdev->csf.global_iface; -+ -+ if (WARN_ON(!glb_version) || WARN_ON(!features) || -+ WARN_ON(!group_num) || WARN_ON(!prfcnt_size) || -+ WARN_ON(!instr_features)) -+ return 0; ++ dev_err(kbdev->dev, ++ "AS_ACTIVE bit stuck for MCU AS. Might be caused by unstable GPU clk/pwr or faulty system"); + -+ *glb_version = iface->version; -+ *features = iface->features; -+ *group_num = iface->group_num; -+ *prfcnt_size = iface->prfcnt_size; -+ *instr_features = iface->instr_features; ++ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) ++ kbase_reset_gpu_locked(kbdev); + -+ return copy_grp_and_stm(iface, group_data, max_group_num, -+ stream_data, max_total_stream_num); ++ return -ETIMEDOUT; +} + -+const char *kbase_csf_firmware_get_timeline_metadata( -+ struct kbase_device *kbdev, const char *name, size_t *size) ++static void unload_mmu_tables(struct kbase_device *kbdev) +{ -+ if (WARN_ON(!kbdev) || -+ WARN_ON(!name) || -+ WARN_ON(!size)) { -+ return NULL; -+ } ++ unsigned long irq_flags; + -+ *size = 0; -+ return NULL; ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); ++ if (kbdev->pm.backend.gpu_powered) ++ kbase_mmu_disable_as(kbdev, MCU_AS_NR); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); +} + -+void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev) ++static int load_mmu_tables(struct kbase_device *kbdev) +{ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE); -+} ++ unsigned long irq_flags; + -+void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev) -+{ -+ /* NO_MALI: Nothing to do here */ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); ++ kbase_mmu_update(kbdev, &kbdev->csf.mcu_mmu, MCU_AS_NR); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ ++ /* Wait for a while for the update command to take effect */ ++ return wait_ready(kbdev); +} + -+int kbase_csf_firmware_mcu_shared_mapping_init( -+ struct kbase_device *kbdev, -+ unsigned int num_pages, -+ unsigned long cpu_map_properties, -+ unsigned long gpu_map_properties, -+ struct kbase_csf_mapping *csf_mapping) ++/** ++ * convert_mem_flags() - Convert firmware memory flags to GPU region flags ++ * ++ * Return: GPU memory region flags ++ * ++ * @kbdev: Instance of GPU platform device (used to determine system coherency) ++ * @flags: Flags of an "interface memory setup" section in a firmware image ++ * @cm: appropriate cache mode chosen for the "interface memory setup" ++ * section, which could be different from the cache mode requested by ++ * firmware. ++ */ ++static unsigned long convert_mem_flags(const struct kbase_device * const kbdev, ++ const u32 flags, u32 *cm) +{ -+ struct tagged_addr *phys; -+ struct kbase_va_region *va_reg; -+ struct page **page_list; -+ void *cpu_addr; -+ int i, ret = 0; -+ pgprot_t cpu_map_prot = PAGE_KERNEL; -+ unsigned long gpu_map_prot; -+ -+ if (cpu_map_properties & PROT_READ) -+ cpu_map_prot = PAGE_KERNEL_RO; -+ -+ if (kbdev->system_coherency == COHERENCY_ACE) { -+ gpu_map_prot = -+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); -+ } else { -+ gpu_map_prot = -+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); -+ cpu_map_prot = pgprot_writecombine(cpu_map_prot); -+ } -+ -+ phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); -+ if (!phys) -+ goto out; ++ unsigned long mem_flags = 0; ++ u32 cache_mode = flags & CSF_FIRMWARE_ENTRY_CACHE_MODE; ++ bool is_shared = (flags & CSF_FIRMWARE_ENTRY_SHARED) ? true : false; + -+ page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL); -+ if (!page_list) -+ goto page_list_alloc_error; ++ /* The memory flags control the access permissions for the MCU, the ++ * shader cores/tiler are not expected to access this memory ++ */ ++ if (flags & CSF_FIRMWARE_ENTRY_READ) ++ mem_flags |= KBASE_REG_GPU_RD; + -+ ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, -+ phys, false, NULL); -+ if (ret <= 0) -+ goto phys_mem_pool_alloc_error; ++ if (flags & CSF_FIRMWARE_ENTRY_WRITE) ++ mem_flags |= KBASE_REG_GPU_WR; + -+ for (i = 0; i < num_pages; i++) -+ page_list[i] = as_page(phys[i]); ++ if ((flags & CSF_FIRMWARE_ENTRY_EXECUTE) == 0) ++ mem_flags |= KBASE_REG_GPU_NX; + -+ cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot); -+ if (!cpu_addr) -+ goto vmap_error; ++ if (flags & CSF_FIRMWARE_ENTRY_PROTECTED) ++ mem_flags |= KBASE_REG_PROTECTED; + -+ va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages, -+ KBASE_REG_ZONE_MCU_SHARED); -+ if (!va_reg) -+ goto va_region_alloc_error; ++ /* Substitute uncached coherent memory for cached coherent memory if ++ * the system does not support ACE coherency. ++ */ ++ if ((cache_mode == CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT) && ++ (kbdev->system_coherency != COHERENCY_ACE)) ++ cache_mode = CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT; + -+ mutex_lock(&kbdev->csf.reg_lock); -+ ret = kbase_add_va_region_rbtree(kbdev, va_reg, 0, num_pages, 1); -+ va_reg->flags &= ~KBASE_REG_FREE; -+ if (ret) -+ goto va_region_add_error; -+ mutex_unlock(&kbdev->csf.reg_lock); ++ /* Substitute uncached incoherent memory for uncached coherent memory ++ * if the system does not support ACE-Lite coherency. ++ */ ++ if ((cache_mode == CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT) && ++ (kbdev->system_coherency == COHERENCY_NONE)) ++ cache_mode = CSF_FIRMWARE_CACHE_MODE_NONE; + -+ gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR); -+ gpu_map_properties |= gpu_map_prot; ++ *cm = cache_mode; + -+ ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn, -+ &phys[0], num_pages, gpu_map_properties, -+ KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false); -+ if (ret) -+ goto mmu_insert_pages_error; ++ switch (cache_mode) { ++ case CSF_FIRMWARE_CACHE_MODE_NONE: ++ mem_flags |= ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); ++ break; ++ case CSF_FIRMWARE_CACHE_MODE_CACHED: ++ mem_flags |= ++ KBASE_REG_MEMATTR_INDEX( ++ AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY); ++ break; ++ case CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT: ++ case CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT: ++ WARN_ON(!is_shared); ++ mem_flags |= KBASE_REG_SHARE_BOTH | ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); ++ break; ++ default: ++ dev_err(kbdev->dev, ++ "Firmware contains interface with unsupported cache mode\n"); ++ break; ++ } ++ return mem_flags; ++} + -+ kfree(page_list); -+ csf_mapping->phys = phys; -+ csf_mapping->cpu_addr = cpu_addr; -+ csf_mapping->va_reg = va_reg; -+ csf_mapping->num_pages = num_pages; ++static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data, ++ struct tagged_addr *phys, u32 num_pages, u32 flags, ++ u32 data_start, u32 data_end) ++{ ++ u32 data_pos = data_start; ++ u32 data_len = data_end - data_start; ++ u32 page_num; ++ u32 page_limit; + -+ return 0; ++ if (flags & CSF_FIRMWARE_ENTRY_ZERO) ++ page_limit = num_pages; ++ else ++ page_limit = (data_len + PAGE_SIZE - 1) / PAGE_SIZE; + -+mmu_insert_pages_error: -+ mutex_lock(&kbdev->csf.reg_lock); -+ kbase_remove_va_region(kbdev, va_reg); -+va_region_add_error: -+ kbase_free_alloced_region(va_reg); -+ mutex_unlock(&kbdev->csf.reg_lock); -+va_region_alloc_error: -+ vunmap(cpu_addr); -+vmap_error: -+ kbase_mem_pool_free_pages( -+ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], -+ num_pages, phys, false, false); ++ for (page_num = 0; page_num < page_limit; ++page_num) { ++ struct page *const page = as_page(phys[page_num]); ++ char *const p = kmap_atomic(page); ++ u32 const copy_len = min_t(u32, PAGE_SIZE, data_len); + -+phys_mem_pool_alloc_error: -+ kfree(page_list); -+page_list_alloc_error: -+ kfree(phys); -+out: -+ /* Zero-initialize the mapping to make sure that the termination -+ * function doesn't try to unmap or free random addresses. -+ */ -+ csf_mapping->phys = NULL; -+ csf_mapping->cpu_addr = NULL; -+ csf_mapping->va_reg = NULL; -+ csf_mapping->num_pages = 0; ++ if (copy_len > 0) { ++ memcpy(p, data + data_pos, copy_len); ++ data_pos += copy_len; ++ data_len -= copy_len; ++ } + -+ return -ENOMEM; -+} ++ if (flags & CSF_FIRMWARE_ENTRY_ZERO) { ++ u32 const zi_len = PAGE_SIZE - copy_len; + -+void kbase_csf_firmware_mcu_shared_mapping_term( -+ struct kbase_device *kbdev, struct kbase_csf_mapping *csf_mapping) -+{ -+ if (csf_mapping->va_reg) { -+ mutex_lock(&kbdev->csf.reg_lock); -+ kbase_remove_va_region(kbdev, csf_mapping->va_reg); -+ kbase_free_alloced_region(csf_mapping->va_reg); -+ mutex_unlock(&kbdev->csf.reg_lock); -+ } ++ memset(p + copy_len, 0, zi_len); ++ } + -+ if (csf_mapping->phys) { -+ kbase_mem_pool_free_pages( -+ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], -+ csf_mapping->num_pages, csf_mapping->phys, false, -+ false); ++ kbase_sync_single_for_device(kbdev, kbase_dma_addr_from_tagged(phys[page_num]), ++ PAGE_SIZE, DMA_TO_DEVICE); ++ kunmap_atomic(p); + } -+ -+ vunmap(csf_mapping->cpu_addr); -+ kfree(csf_mapping->phys); +} -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c -new file mode 100644 -index 000000000..7c14b8eb5 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c -@@ -0,0 +1,236 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+#include -+#include "mali_kbase_csf_heap_context_alloc.h" -+ -+/* Size of one heap context structure, in bytes. */ -+#define HEAP_CTX_SIZE ((u32)32) + -+/** -+ * sub_alloc - Sub-allocate a heap context from a GPU memory region -+ * -+ * @ctx_alloc: Pointer to the heap context allocator. -+ * -+ * Return: GPU virtual address of the allocated heap context or 0 on failure. -+ */ -+static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc) ++static int reload_fw_image(struct kbase_device *kbdev) +{ -+ struct kbase_context *const kctx = ctx_alloc->kctx; -+ unsigned long heap_nr = 0; -+ u32 ctx_offset = 0; -+ u64 heap_gpu_va = 0; -+ struct kbase_vmap_struct mapping; -+ void *ctx_ptr = NULL; -+ -+ lockdep_assert_held(&ctx_alloc->lock); -+ -+ heap_nr = find_first_zero_bit(ctx_alloc->in_use, -+ MAX_TILER_HEAPS); ++ const u32 magic = FIRMWARE_HEADER_MAGIC; ++ struct kbase_csf_firmware_interface *interface; ++ struct kbase_csf_mcu_fw *const mcu_fw = &kbdev->csf.fw; ++ int ret = 0; + -+ if (unlikely(heap_nr >= MAX_TILER_HEAPS)) { -+ dev_dbg(kctx->kbdev->dev, -+ "No free tiler heap contexts in the pool"); -+ return 0; ++ if (WARN_ON(mcu_fw->data == NULL)) { ++ dev_err(kbdev->dev, "Firmware image copy not loaded\n"); ++ ret = -EINVAL; ++ goto out; + } + -+ ctx_offset = heap_nr * ctx_alloc->heap_context_size_aligned; -+ heap_gpu_va = ctx_alloc->gpu_va + ctx_offset; -+ ctx_ptr = kbase_vmap_prot(kctx, heap_gpu_va, -+ ctx_alloc->heap_context_size_aligned, KBASE_REG_CPU_WR, &mapping); -+ -+ if (unlikely(!ctx_ptr)) { -+ dev_err(kctx->kbdev->dev, -+ "Failed to map tiler heap context %lu (0x%llX)\n", -+ heap_nr, heap_gpu_va); -+ return 0; ++ /* Do a basic sanity check on MAGIC signature */ ++ if (memcmp(mcu_fw->data, &magic, sizeof(magic)) != 0) { ++ dev_err(kbdev->dev, "Incorrect magic value, firmware image could have been corrupted\n"); ++ ret = -EINVAL; ++ goto out; + } + -+ memset(ctx_ptr, 0, ctx_alloc->heap_context_size_aligned); -+ kbase_vunmap(ctx_ptr, &mapping); ++ list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { ++ /* Dont skip re-loading any section if full reload was requested */ ++ if (!kbdev->csf.firmware_full_reload_needed) { ++ /* Skip reload of text & read only data sections */ ++ if ((interface->flags & CSF_FIRMWARE_ENTRY_EXECUTE) || ++ !(interface->flags & CSF_FIRMWARE_ENTRY_WRITE)) ++ continue; ++ } + -+ bitmap_set(ctx_alloc->in_use, heap_nr, 1); ++ load_fw_image_section(kbdev, mcu_fw->data, interface->phys, interface->num_pages, ++ interface->flags, interface->data_start, interface->data_end); ++ } + -+ dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %lu (0x%llX)\n", -+ heap_nr, heap_gpu_va); ++ kbdev->csf.firmware_full_reload_needed = false; + -+ return heap_gpu_va; ++ kbase_csf_firmware_reload_trace_buffers_data(kbdev); ++out: ++ return ret; +} + +/** -+ * evict_heap_context - Evict the data of heap context from GPU's L2 cache. ++ * entry_find_large_page_to_reuse() - Find if the large page of previously parsed ++ * FW interface entry can be reused to store ++ * the contents of new FW interface entry. + * -+ * @ctx_alloc: Pointer to the heap context allocator. -+ * @heap_gpu_va: The GPU virtual address of a heap context structure to free. ++ * @kbdev: Kbase device structure ++ * @virtual_start: Start of the virtual address range required for an entry allocation ++ * @virtual_end: End of the virtual address range required for an entry allocation ++ * @flags: Firmware entry flags for comparison with the reusable pages found ++ * @phys: Pointer to the array of physical (tagged) addresses making up the new ++ * FW interface entry. It is an output parameter which would be made to ++ * point to an already existing array allocated for the previously parsed ++ * FW interface entry using large page(s). If no appropriate entry is ++ * found it is set to NULL. ++ * @pma: Pointer to a protected memory allocation. It is an output parameter ++ * which would be made to the protected memory allocation of a previously ++ * parsed FW interface entry using large page(s) from protected memory. ++ * If no appropriate entry is found it is set to NULL. ++ * @num_pages: Number of pages requested. ++ * @num_pages_aligned: This is an output parameter used to carry the number of 4KB pages ++ * within the 2MB pages aligned allocation. ++ * @is_small_page: This is an output flag used to select between the small and large page ++ * to be used for the FW entry allocation. + * -+ * This function is called when memory for the heap context is freed. It uses the -+ * FLUSH_PA_RANGE command to evict the data of heap context, so on older CSF GPUs -+ * there is nothing done. The whole GPU cache is anyways expected to be flushed -+ * on older GPUs when initial chunks of the heap are freed just before the memory -+ * for heap context is freed. -+ */ -+static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ctx_alloc, -+ u64 const heap_gpu_va) -+{ -+ struct kbase_context *const kctx = ctx_alloc->kctx; -+ u32 offset_in_bytes = (u32)(heap_gpu_va - ctx_alloc->gpu_va); -+ u32 offset_within_page = offset_in_bytes & ~PAGE_MASK; -+ u32 page_index = offset_in_bytes >> PAGE_SHIFT; -+ struct tagged_addr page = -+ kbase_get_gpu_phy_pages(ctx_alloc->region)[page_index]; -+ phys_addr_t heap_context_pa = as_phys_addr_t(page) + offset_within_page; -+ -+ lockdep_assert_held(&ctx_alloc->lock); -+ -+ /* There is no need to take vm_lock here as the ctx_alloc region is protected -+ * via a nonzero no_user_free_count. The region and the backing page can't -+ * disappear whilst this function is executing. Flush type is passed as FLUSH_PT -+ * to CLN+INV L2 only. -+ */ -+ kbase_mmu_flush_pa_range(kctx->kbdev, kctx, -+ heap_context_pa, ctx_alloc->heap_context_size_aligned, -+ KBASE_MMU_OP_FLUSH_PT); -+} -+ -+/** -+ * sub_free - Free a heap context sub-allocated from a GPU memory region ++ * Go through all the already initialized interfaces and find if a previously ++ * allocated large page can be used to store contents of new FW interface entry. + * -+ * @ctx_alloc: Pointer to the heap context allocator. -+ * @heap_gpu_va: The GPU virtual address of a heap context structure to free. ++ * Return: true if a large page can be reused, false otherwise. + */ -+static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc, -+ u64 const heap_gpu_va) ++static inline bool entry_find_large_page_to_reuse(struct kbase_device *kbdev, ++ const u32 virtual_start, const u32 virtual_end, ++ const u32 flags, struct tagged_addr **phys, ++ struct protected_memory_allocation ***pma, ++ u32 num_pages, u32 *num_pages_aligned, ++ bool *is_small_page) +{ -+ struct kbase_context *const kctx = ctx_alloc->kctx; -+ u32 ctx_offset = 0; -+ unsigned int heap_nr = 0; -+ -+ lockdep_assert_held(&ctx_alloc->lock); -+ -+ if (WARN_ON(!ctx_alloc->region)) -+ return; -+ -+ if (WARN_ON(heap_gpu_va < ctx_alloc->gpu_va)) -+ return; -+ -+ ctx_offset = (u32)(heap_gpu_va - ctx_alloc->gpu_va); -+ -+ if (WARN_ON(ctx_offset >= (ctx_alloc->region->nr_pages << PAGE_SHIFT)) || -+ WARN_ON(ctx_offset % ctx_alloc->heap_context_size_aligned)) -+ return; -+ -+ evict_heap_context(ctx_alloc, heap_gpu_va); ++ struct kbase_csf_firmware_interface *interface = NULL; ++ struct kbase_csf_firmware_interface *target_interface = NULL; ++ u32 virtual_diff_min = U32_MAX; ++ bool reuse_large_page = false; + -+ heap_nr = ctx_offset / ctx_alloc->heap_context_size_aligned; -+ dev_dbg(kctx->kbdev->dev, -+ "Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va); ++ CSTD_UNUSED(interface); ++ CSTD_UNUSED(target_interface); ++ CSTD_UNUSED(virtual_diff_min); + -+ bitmap_clear(ctx_alloc->in_use, heap_nr, 1); -+} ++ *num_pages_aligned = num_pages; ++ *is_small_page = true; ++ *phys = NULL; ++ *pma = NULL; + -+int kbase_csf_heap_context_allocator_init( -+ struct kbase_csf_heap_context_allocator *const ctx_alloc, -+ struct kbase_context *const kctx) -+{ -+ const u32 gpu_cache_line_size = -+ (1U << kctx->kbdev->gpu_props.props.l2_props.log2_line_size); + -+ /* We cannot pre-allocate GPU memory here because the -+ * custom VA zone may not have been created yet. ++ /* If the section starts at 2MB aligned boundary, ++ * then use 2MB page(s) for it. + */ -+ ctx_alloc->kctx = kctx; -+ ctx_alloc->region = NULL; -+ ctx_alloc->gpu_va = 0; -+ ctx_alloc->heap_context_size_aligned = -+ (HEAP_CTX_SIZE + gpu_cache_line_size - 1) & ~(gpu_cache_line_size - 1); -+ -+ mutex_init(&ctx_alloc->lock); -+ bitmap_zero(ctx_alloc->in_use, MAX_TILER_HEAPS); -+ -+ dev_dbg(kctx->kbdev->dev, -+ "Initialized a tiler heap context allocator\n"); ++ if (!(virtual_start & (SZ_2M - 1))) { ++ *num_pages_aligned = ++ round_up(*num_pages_aligned, NUM_4K_PAGES_IN_2MB_PAGE); ++ *is_small_page = false; ++ goto out; ++ } + -+ return 0; -+} ++ /* If the section doesn't lie within the same 2MB aligned boundary, ++ * then use 4KB pages as it would be complicated to use a 2MB page ++ * for such section. ++ */ ++ if ((virtual_start & ~(SZ_2M - 1)) != (virtual_end & ~(SZ_2M - 1))) ++ goto out; + -+void kbase_csf_heap_context_allocator_term( -+ struct kbase_csf_heap_context_allocator *const ctx_alloc) -+{ -+ struct kbase_context *const kctx = ctx_alloc->kctx; ++ /* Find the nearest 2MB aligned section which comes before the current ++ * section. ++ */ ++ list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { ++ const u32 virtual_diff = virtual_start - interface->virtual; + -+ dev_dbg(kctx->kbdev->dev, -+ "Terminating tiler heap context allocator\n"); ++ if (interface->virtual > virtual_end) ++ continue; + -+ if (ctx_alloc->region) { -+ kbase_gpu_vm_lock(kctx); -+ WARN_ON(!kbase_va_region_is_no_user_free(ctx_alloc->region)); ++ if (interface->virtual & (SZ_2M - 1)) ++ continue; + -+ kbase_va_region_no_user_free_dec(ctx_alloc->region); -+ kbase_mem_free_region(kctx, ctx_alloc->region); -+ kbase_gpu_vm_unlock(kctx); ++ if ((virtual_diff < virtual_diff_min) && (interface->flags == flags)) { ++ target_interface = interface; ++ virtual_diff_min = virtual_diff; ++ } + } + -+ mutex_destroy(&ctx_alloc->lock); -+} ++ if (target_interface) { ++ const u32 page_index = virtual_diff_min >> PAGE_SHIFT; + -+u64 kbase_csf_heap_context_allocator_alloc( -+ struct kbase_csf_heap_context_allocator *const ctx_alloc) -+{ -+ struct kbase_context *const kctx = ctx_alloc->kctx; -+ u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR | -+ BASEP_MEM_NO_USER_FREE | BASE_MEM_PROT_CPU_RD; -+ u64 nr_pages = PFN_UP(MAX_TILER_HEAPS * ctx_alloc->heap_context_size_aligned); -+ u64 heap_gpu_va = 0; ++ if (page_index >= target_interface->num_pages_aligned) ++ goto out; + -+ /* Calls to this function are inherently asynchronous, with respect to -+ * MMU operations. -+ */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; ++ if (target_interface->phys) ++ *phys = &target_interface->phys[page_index]; + -+ mutex_lock(&ctx_alloc->lock); ++ if (target_interface->pma) ++ *pma = &target_interface->pma[page_index / NUM_4K_PAGES_IN_2MB_PAGE]; + -+ /* If the pool of heap contexts wasn't already allocated then -+ * allocate it. -+ */ -+ if (!ctx_alloc->region) { -+ ctx_alloc->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, -+ &ctx_alloc->gpu_va, mmu_sync_info); ++ *is_small_page = false; ++ reuse_large_page = true; + } + -+ /* If the pool still isn't allocated then an error occurred. */ -+ if (unlikely(!ctx_alloc->region)) -+ dev_dbg(kctx->kbdev->dev, "Failed to allocate a pool of tiler heap contexts"); -+ else -+ heap_gpu_va = sub_alloc(ctx_alloc); -+ -+ mutex_unlock(&ctx_alloc->lock); -+ -+ return heap_gpu_va; -+} -+ -+void kbase_csf_heap_context_allocator_free( -+ struct kbase_csf_heap_context_allocator *const ctx_alloc, -+ u64 const heap_gpu_va) -+{ -+ mutex_lock(&ctx_alloc->lock); -+ sub_free(ctx_alloc, heap_gpu_va); -+ mutex_unlock(&ctx_alloc->lock); ++out: ++ return reuse_large_page; +} -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.h -new file mode 100644 -index 000000000..9aab7abfb ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.h -@@ -0,0 +1,75 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+#include -+ -+#ifndef _KBASE_CSF_HEAP_CONTEXT_ALLOC_H_ -+#define _KBASE_CSF_HEAP_CONTEXT_ALLOC_H_ -+ -+/** -+ * kbase_csf_heap_context_allocator_init - Initialize an allocator for heap -+ * contexts -+ * @ctx_alloc: Pointer to the heap context allocator to initialize. -+ * @kctx: Pointer to the kbase context. -+ * -+ * Return: 0 if successful or a negative error code on failure. -+ */ -+int kbase_csf_heap_context_allocator_init( -+ struct kbase_csf_heap_context_allocator *const ctx_alloc, -+ struct kbase_context *const kctx); -+ -+/** -+ * kbase_csf_heap_context_allocator_term - Terminate an allocator for heap -+ * contexts -+ * @ctx_alloc: Pointer to the heap context allocator to terminate. -+ */ -+void kbase_csf_heap_context_allocator_term( -+ struct kbase_csf_heap_context_allocator *const ctx_alloc); + +/** -+ * kbase_csf_heap_context_allocator_alloc - Allocate a heap context structure ++ * parse_memory_setup_entry() - Process an "interface memory setup" section + * -+ * @ctx_alloc: Pointer to the heap context allocator. ++ * @kbdev: Kbase device structure ++ * @fw: The firmware image containing the section ++ * @entry: Pointer to the start of the section ++ * @size: Size (in bytes) of the section + * -+ * If this function is successful then it returns the address of a -+ * zero-initialized heap context structure for use by the firmware. ++ * Read an "interface memory setup" section from the firmware image and create ++ * the necessary memory region including the MMU page tables. If successful ++ * the interface will be added to the kbase_device:csf.firmware_interfaces list. + * -+ * Return: GPU virtual address of the allocated heap context or 0 on failure. ++ * Return: 0 if successful, negative error code on failure + */ -+u64 kbase_csf_heap_context_allocator_alloc( -+ struct kbase_csf_heap_context_allocator *const ctx_alloc); ++static int parse_memory_setup_entry(struct kbase_device *kbdev, ++ const struct kbase_csf_mcu_fw *const fw, const u32 *entry, ++ unsigned int size) ++{ ++ int ret = 0; ++ const u32 flags = entry[0]; ++ const u32 virtual_start = entry[1]; ++ const u32 virtual_end = entry[2]; ++ const u32 data_start = entry[3]; ++ const u32 data_end = entry[4]; ++ u32 num_pages; ++ u32 num_pages_aligned; ++ char *name; ++ void *name_entry; ++ unsigned int name_len; ++ struct tagged_addr *phys = NULL; ++ struct kbase_csf_firmware_interface *interface = NULL; ++ bool allocated_pages = false, protected_mode = false; ++ unsigned long mem_flags = 0; ++ u32 cache_mode = 0; ++ struct protected_memory_allocation **pma = NULL; ++ bool reuse_pages = false; ++ bool is_small_page = true; ++ bool ignore_page_migration = true; + -+/** -+ * kbase_csf_heap_context_allocator_free - Free a heap context structure -+ * -+ * @ctx_alloc: Pointer to the heap context allocator. -+ * @heap_gpu_va: The GPU virtual address of a heap context structure that -+ * was allocated for the firmware. -+ * -+ * This function returns a heap context structure to the free pool of unused -+ * contexts for possible reuse by a future call to -+ * @kbase_csf_heap_context_allocator_alloc. -+ */ -+void kbase_csf_heap_context_allocator_free( -+ struct kbase_csf_heap_context_allocator *const ctx_alloc, -+ u64 const heap_gpu_va); ++ if (data_end < data_start) { ++ dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n", ++ data_end, data_start); ++ return -EINVAL; ++ } ++ if (virtual_end < virtual_start) { ++ dev_err(kbdev->dev, "Firmware corrupt, virtual_end < virtual_start (0x%x<0x%x)\n", ++ virtual_end, virtual_start); ++ return -EINVAL; ++ } ++ if (data_end > fw->size) { ++ dev_err(kbdev->dev, "Firmware corrupt, file truncated? data_end=0x%x > fw->size=0x%zx\n", ++ data_end, fw->size); ++ return -EINVAL; ++ } + -+#endif /* _KBASE_CSF_HEAP_CONTEXT_ALLOC_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c -new file mode 100644 -index 000000000..da8dde239 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c -@@ -0,0 +1,2595 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ if ((virtual_start & ~PAGE_MASK) != 0 || ++ (virtual_end & ~PAGE_MASK) != 0) { ++ dev_err(kbdev->dev, "Firmware corrupt: virtual addresses not page aligned: 0x%x-0x%x\n", ++ virtual_start, virtual_end); ++ return -EINVAL; ++ } + -+#include -+#include -+#include -+#include "device/mali_kbase_device.h" -+#include "mali_kbase_csf.h" -+#include ++ if ((flags & CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS) != flags) { ++ dev_err(kbdev->dev, "Firmware contains interface with unsupported flags (0x%x)\n", ++ flags); ++ return -EINVAL; ++ } + -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+#include "mali_kbase_fence.h" -+#include "mali_kbase_sync.h" ++ if (flags & CSF_FIRMWARE_ENTRY_PROTECTED) ++ protected_mode = true; + -+static DEFINE_SPINLOCK(kbase_csf_fence_lock); -+#endif ++ if (protected_mode && kbdev->csf.pma_dev == NULL) { ++ dev_dbg(kbdev->dev, ++ "Protected memory allocator not found, Firmware protected mode entry will not be supported"); ++ return 0; ++ } + -+#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG -+#define FENCE_WAIT_TIMEOUT_MS 3000 -+#endif ++ num_pages = (virtual_end - virtual_start) ++ >> PAGE_SHIFT; + -+static void kcpu_queue_process(struct kbase_kcpu_command_queue *kcpu_queue, -+ bool drain_queue); ++ reuse_pages = ++ entry_find_large_page_to_reuse(kbdev, virtual_start, virtual_end, flags, &phys, ++ &pma, num_pages, &num_pages_aligned, &is_small_page); ++ if (!reuse_pages) ++ phys = kmalloc_array(num_pages_aligned, sizeof(*phys), GFP_KERNEL); + -+static void kcpu_queue_process_worker(struct work_struct *data); ++ if (!phys) ++ return -ENOMEM; + -+static int kbase_kcpu_map_import_prepare( -+ struct kbase_kcpu_command_queue *kcpu_queue, -+ struct base_kcpu_command_import_info *import_info, -+ struct kbase_kcpu_command *current_command) -+{ -+ struct kbase_context *const kctx = kcpu_queue->kctx; -+ struct kbase_va_region *reg; -+ struct kbase_mem_phy_alloc *alloc; -+ struct page **pages; -+ struct tagged_addr *pa; -+ long i; -+ int ret = 0; ++ if (protected_mode) { ++ if (!reuse_pages) { ++ pma = kbase_csf_protected_memory_alloc( ++ kbdev, phys, num_pages_aligned, is_small_page); ++ } + -+ lockdep_assert_held(&kcpu_queue->lock); ++ if (!pma) ++ ret = -ENOMEM; ++ } else { ++ if (!reuse_pages) { ++ ret = kbase_mem_pool_alloc_pages( ++ kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW, ++ is_small_page), ++ num_pages_aligned, phys, false, NULL); ++ ignore_page_migration = false; ++ } ++ } + -+ /* Take the processes mmap lock */ -+ down_read(kbase_mem_get_process_mmap_lock()); -+ kbase_gpu_vm_lock(kctx); ++ if (ret < 0) { ++ dev_err(kbdev->dev, ++ "Failed to allocate %u physical pages for the firmware interface entry at VA 0x%x\n", ++ num_pages_aligned, virtual_start); ++ goto out; ++ } + -+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, -+ import_info->handle); ++ allocated_pages = true; ++ load_fw_image_section(kbdev, fw->data, phys, num_pages, flags, ++ data_start, data_end); + -+ if (kbase_is_region_invalid_or_free(reg) || -+ !kbase_mem_is_imported(reg->gpu_alloc->type)) { ++ /* Allocate enough memory for the struct kbase_csf_firmware_interface and ++ * the name of the interface. ++ */ ++ name_entry = (void *)entry + INTERFACE_ENTRY_NAME_OFFSET; ++ name_len = strnlen(name_entry, size - INTERFACE_ENTRY_NAME_OFFSET); ++ if (size < (INTERFACE_ENTRY_NAME_OFFSET + name_len + 1 + sizeof(u32))) { ++ dev_err(kbdev->dev, "Memory setup entry too short to contain virtual_exe_start"); + ret = -EINVAL; + goto out; + } + -+ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { -+ /* Pin the physical pages backing the user buffer while -+ * we are in the process context and holding the mmap lock. -+ * The dma mapping & GPU mapping of the pages would be done -+ * when the MAP_IMPORT operation is executed. -+ * -+ * Though the pages would be pinned, no reference is taken -+ * on the physical pages tracking object. When the last -+ * reference to the tracking object is dropped the pages -+ * would be unpinned if they weren't unpinned before. -+ * -+ * Region should be CPU cached: abort if it isn't. -+ */ -+ if (WARN_ON(!(reg->flags & KBASE_REG_CPU_CACHED))) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ ret = kbase_jd_user_buf_pin_pages(kctx, reg); -+ if (ret) -+ goto out; -+ -+ alloc = reg->gpu_alloc; -+ pa = kbase_get_gpu_phy_pages(reg); -+ pages = alloc->imported.user_buf.pages; -+ -+ for (i = 0; i < alloc->nents; i++) -+ pa[i] = as_tagged(page_to_phys(pages[i])); ++ interface = kmalloc(sizeof(*interface) + name_len + 1, GFP_KERNEL); ++ if (!interface) { ++ ret = -ENOMEM; ++ goto out; + } ++ name = (void *)(interface + 1); ++ memcpy(name, name_entry, name_len); ++ name[name_len] = 0; + -+ current_command->type = BASE_KCPU_COMMAND_TYPE_MAP_IMPORT; -+ current_command->info.import.gpu_va = import_info->handle; ++ interface->name = name; ++ interface->phys = phys; ++ interface->reuse_pages = reuse_pages; ++ interface->is_small_page = is_small_page; ++ interface->num_pages = num_pages; ++ interface->num_pages_aligned = num_pages_aligned; ++ interface->virtual = virtual_start; ++ interface->kernel_map = NULL; ++ interface->flags = flags; ++ interface->data_start = data_start; ++ interface->data_end = data_end; ++ interface->pma = pma; + -+out: -+ kbase_gpu_vm_unlock(kctx); -+ /* Release the processes mmap lock */ -+ up_read(kbase_mem_get_process_mmap_lock()); ++ /* Discover the virtual execution address field after the end of the name ++ * field taking into account the NULL-termination character. ++ */ ++ interface->virtual_exe_start = *((u32 *)(name_entry + name_len + 1)); + -+ return ret; -+} ++ mem_flags = convert_mem_flags(kbdev, flags, &cache_mode); + -+static int kbase_kcpu_unmap_import_prepare_internal( -+ struct kbase_kcpu_command_queue *kcpu_queue, -+ struct base_kcpu_command_import_info *import_info, -+ struct kbase_kcpu_command *current_command, -+ enum base_kcpu_command_type type) -+{ -+ struct kbase_context *const kctx = kcpu_queue->kctx; -+ struct kbase_va_region *reg; -+ int ret = 0; ++ if (flags & CSF_FIRMWARE_ENTRY_SHARED) { ++ struct page **page_list; ++ u32 i; ++ pgprot_t cpu_map_prot; ++ u32 mem_attr_index = KBASE_REG_MEMATTR_VALUE(mem_flags); + -+ lockdep_assert_held(&kcpu_queue->lock); ++ /* Since SHARED memory type was used for mapping shared memory ++ * on GPU side, it can be mapped as cached on CPU side on both ++ * types of coherent platforms. ++ */ ++ if ((cache_mode == CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT) || ++ (cache_mode == CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT)) { ++ WARN_ON(mem_attr_index != ++ AS_MEMATTR_INDEX_SHARED); ++ cpu_map_prot = PAGE_KERNEL; ++ } else { ++ WARN_ON(mem_attr_index != ++ AS_MEMATTR_INDEX_NON_CACHEABLE); ++ cpu_map_prot = pgprot_writecombine(PAGE_KERNEL); ++ } + -+ kbase_gpu_vm_lock(kctx); ++ page_list = kmalloc_array(num_pages, sizeof(*page_list), ++ GFP_KERNEL); ++ if (!page_list) { ++ ret = -ENOMEM; ++ goto out; ++ } + -+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, -+ import_info->handle); ++ for (i = 0; i < num_pages; i++) ++ page_list[i] = as_page(phys[i]); + -+ if (kbase_is_region_invalid_or_free(reg) || -+ !kbase_mem_is_imported(reg->gpu_alloc->type)) { -+ ret = -EINVAL; -+ goto out; -+ } ++ interface->kernel_map = vmap(page_list, num_pages, VM_MAP, ++ cpu_map_prot); + -+ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { -+ /* The pages should have been pinned when MAP_IMPORT -+ * was enqueued previously. -+ */ -+ if (reg->gpu_alloc->nents != -+ reg->gpu_alloc->imported.user_buf.nr_pages) { -+ ret = -EINVAL; ++ kfree(page_list); ++ ++ if (!interface->kernel_map) { ++ ret = -ENOMEM; + goto out; + } + } + -+ current_command->type = type; -+ current_command->info.import.gpu_va = import_info->handle; -+ -+out: -+ kbase_gpu_vm_unlock(kctx); -+ -+ return ret; -+} ++ /* Start location of the shared interface area is fixed and is ++ * specified in firmware spec, and so there shall only be a ++ * single entry with that start address. ++ */ ++ if (virtual_start == (KBASE_REG_ZONE_MCU_SHARED_BASE << PAGE_SHIFT)) ++ kbdev->csf.shared_interface = interface; + -+static int kbase_kcpu_unmap_import_prepare( -+ struct kbase_kcpu_command_queue *kcpu_queue, -+ struct base_kcpu_command_import_info *import_info, -+ struct kbase_kcpu_command *current_command) -+{ -+ return kbase_kcpu_unmap_import_prepare_internal(kcpu_queue, -+ import_info, current_command, -+ BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT); -+} ++ list_add(&interface->node, &kbdev->csf.firmware_interfaces); + -+static int kbase_kcpu_unmap_import_force_prepare( -+ struct kbase_kcpu_command_queue *kcpu_queue, -+ struct base_kcpu_command_import_info *import_info, -+ struct kbase_kcpu_command *current_command) -+{ -+ return kbase_kcpu_unmap_import_prepare_internal(kcpu_queue, -+ import_info, current_command, -+ BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE); -+} ++ if (!reuse_pages) { ++ ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, ++ virtual_start >> PAGE_SHIFT, phys, ++ num_pages_aligned, mem_flags, ++ KBASE_MEM_GROUP_CSF_FW, NULL, NULL, ++ ignore_page_migration); + -+/** -+ * kbase_jit_add_to_pending_alloc_list() - Pend JIT allocation -+ * -+ * @queue: The queue containing this JIT allocation -+ * @cmd: The JIT allocation that is blocking this queue -+ */ -+static void kbase_jit_add_to_pending_alloc_list( -+ struct kbase_kcpu_command_queue *queue, -+ struct kbase_kcpu_command *cmd) -+{ -+ struct kbase_context *const kctx = queue->kctx; -+ struct list_head *target_list_head = -+ &kctx->csf.kcpu_queues.jit_blocked_queues; -+ struct kbase_kcpu_command_queue *blocked_queue; ++ if (ret != 0) { ++ dev_err(kbdev->dev, "Failed to insert firmware pages\n"); ++ /* The interface has been added to the list, so cleanup will ++ * be handled by firmware unloading ++ */ ++ } ++ } + -+ lockdep_assert_held(&queue->lock); -+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); ++ dev_dbg(kbdev->dev, "Processed section '%s'", name); + -+ list_for_each_entry(blocked_queue, -+ &kctx->csf.kcpu_queues.jit_blocked_queues, -+ jit_blocked) { -+ struct kbase_kcpu_command const *const jit_alloc_cmd = -+ &blocked_queue->commands[blocked_queue->start_offset]; ++ return ret; + -+ WARN_ON(jit_alloc_cmd->type != BASE_KCPU_COMMAND_TYPE_JIT_ALLOC); -+ if (cmd->enqueue_ts < jit_alloc_cmd->enqueue_ts) { -+ target_list_head = &blocked_queue->jit_blocked; -+ break; ++out: ++ if (allocated_pages) { ++ if (!reuse_pages) { ++ if (protected_mode) { ++ kbase_csf_protected_memory_free( ++ kbdev, pma, num_pages_aligned, is_small_page); ++ } else { ++ kbase_mem_pool_free_pages( ++ kbase_mem_pool_group_select( ++ kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page), ++ num_pages_aligned, phys, false, false); ++ } + } + } + -+ list_add_tail(&queue->jit_blocked, target_list_head); ++ if (!reuse_pages) ++ kfree(phys); ++ ++ kfree(interface); ++ return ret; +} + +/** -+ * kbase_kcpu_jit_allocate_process() - Process JIT allocation ++ * parse_timeline_metadata_entry() - Process a "timeline metadata" section + * -+ * @queue: The queue containing this JIT allocation -+ * @cmd: The JIT allocation command ++ * Return: 0 if successful, negative error code on failure + * -+ * Return: -+ * * 0 - allocation OK -+ * * -EINVAL - missing info or JIT ID still in use -+ * * -EAGAIN - Retry -+ * * -ENOMEM - no memory. unable to allocate ++ * @kbdev: Kbase device structure ++ * @fw: Firmware image containing the section ++ * @entry: Pointer to the section ++ * @size: Size (in bytes) of the section + */ -+static int kbase_kcpu_jit_allocate_process( -+ struct kbase_kcpu_command_queue *queue, -+ struct kbase_kcpu_command *cmd) ++static int parse_timeline_metadata_entry(struct kbase_device *kbdev, ++ const struct kbase_csf_mcu_fw *const fw, const u32 *entry, ++ unsigned int size) +{ -+ struct kbase_context *const kctx = queue->kctx; -+ struct kbase_kcpu_command_jit_alloc_info *alloc_info = -+ &cmd->info.jit_alloc; -+ struct base_jit_alloc_info *info = alloc_info->info; -+ struct kbase_vmap_struct mapping; -+ struct kbase_va_region *reg; -+ u32 count = alloc_info->count; -+ u64 *ptr, new_addr; -+ u32 i; -+ int ret; -+ -+ lockdep_assert_held(&queue->lock); ++ const u32 data_start = entry[0]; ++ const u32 data_size = entry[1]; ++ const u32 data_end = data_start + data_size; ++ const char *name = (char *)&entry[2]; ++ struct firmware_timeline_metadata *metadata; ++ const unsigned int name_len = ++ size - TL_METADATA_ENTRY_NAME_OFFSET; ++ size_t allocation_size = sizeof(*metadata) + name_len + 1 + data_size; + -+ if (WARN_ON(!info)) ++ if (data_end > fw->size) { ++ dev_err(kbdev->dev, ++ "Firmware corrupt, file truncated? data_end=0x%x > fw->size=0x%zx", ++ data_end, fw->size); + return -EINVAL; -+ -+ mutex_lock(&kctx->csf.kcpu_queues.jit_lock); -+ -+ /* Check if all JIT IDs are not in use */ -+ for (i = 0; i < count; i++, info++) { -+ /* The JIT ID is still in use so fail the allocation */ -+ if (kctx->jit_alloc[info->id]) { -+ dev_dbg(kctx->kbdev->dev, "JIT ID still in use"); -+ ret = -EINVAL; -+ goto fail; -+ } + } + -+ if (alloc_info->blocked) { -+ list_del(&queue->jit_blocked); -+ alloc_info->blocked = false; -+ } ++ /* Allocate enough space for firmware_timeline_metadata, ++ * its name and the content. ++ */ ++ metadata = kmalloc(allocation_size, GFP_KERNEL); ++ if (!metadata) ++ return -ENOMEM; + -+ /* Now start the allocation loop */ -+ for (i = 0, info = alloc_info->info; i < count; i++, info++) { -+ /* Create a JIT allocation */ -+ reg = kbase_jit_allocate(kctx, info, true); -+ if (!reg) { -+ bool can_block = false; -+ struct kbase_kcpu_command const *jit_cmd; ++ metadata->name = (char *)(metadata + 1); ++ metadata->data = (char *)(metadata + 1) + name_len + 1; ++ metadata->size = data_size; + -+ list_for_each_entry(jit_cmd, &kctx->csf.kcpu_queues.jit_cmds_head, info.jit_alloc.node) { -+ if (jit_cmd == cmd) -+ break; ++ memcpy(metadata->name, name, name_len); ++ metadata->name[name_len] = 0; + -+ if (jit_cmd->type == BASE_KCPU_COMMAND_TYPE_JIT_FREE) { -+ u8 const *const free_ids = jit_cmd->info.jit_free.ids; ++ /* Copy metadata's content. */ ++ memcpy(metadata->data, fw->data + data_start, data_size); + -+ if (free_ids && *free_ids && kctx->jit_alloc[*free_ids]) { -+ /* -+ * A JIT free which is active -+ * and submitted before this -+ * command. -+ */ -+ can_block = true; -+ break; -+ } -+ } -+ } ++ list_add(&metadata->node, &kbdev->csf.firmware_timeline_metadata); + -+ if (!can_block) { -+ /* -+ * No prior JIT_FREE command is active. Roll -+ * back previous allocations and fail. -+ */ -+ dev_warn_ratelimited(kctx->kbdev->dev, "JIT alloc command failed: %pK\n", cmd); -+ ret = -ENOMEM; -+ goto fail_rollback; -+ } ++ dev_dbg(kbdev->dev, "Timeline metadata '%s'", metadata->name); + -+ /* There are pending frees for an active allocation -+ * so we should wait to see whether they free the -+ * memory. Add to the list of atoms for which JIT -+ * allocation is pending. -+ */ -+ kbase_jit_add_to_pending_alloc_list(queue, cmd); -+ alloc_info->blocked = true; ++ return 0; ++} + -+ /* Rollback, the whole set will be re-attempted */ -+ while (i-- > 0) { -+ info--; -+ kbase_jit_free(kctx, kctx->jit_alloc[info->id]); -+ kctx->jit_alloc[info->id] = NULL; -+ } ++/** ++ * parse_build_info_metadata_entry() - Process a "build info metadata" section ++ * @kbdev: Kbase device structure ++ * @fw: Firmware image containing the section ++ * @entry: Pointer to the section ++ * @size: Size (in bytes) of the section ++ * ++ * This prints the git SHA of the firmware on frimware load. ++ * ++ * Return: 0 if successful, negative error code on failure ++ */ ++static int parse_build_info_metadata_entry(struct kbase_device *kbdev, ++ const struct kbase_csf_mcu_fw *const fw, ++ const u32 *entry, unsigned int size) ++{ ++ const u32 meta_start_addr = entry[0]; ++ char *ptr = NULL; ++ size_t sha_pattern_len = strlen(BUILD_INFO_GIT_SHA_PATTERN); + -+ ret = -EAGAIN; -+ goto fail; -+ } ++ /* Only print git SHA to avoid releasing sensitive information */ ++ ptr = strstr(fw->data + meta_start_addr, BUILD_INFO_GIT_SHA_PATTERN); ++ /* Check that we won't overrun the found string */ ++ if (ptr && ++ strlen(ptr) >= BUILD_INFO_GIT_SHA_LEN + BUILD_INFO_GIT_DIRTY_LEN + sha_pattern_len) { ++ char git_sha[BUILD_INFO_GIT_SHA_LEN + BUILD_INFO_GIT_DIRTY_LEN + 1]; ++ int i = 0; + -+ /* Bind it to the user provided ID. */ -+ kctx->jit_alloc[info->id] = reg; -+ } ++ /* Move ptr to start of SHA */ ++ ptr += sha_pattern_len; ++ for (i = 0; i < BUILD_INFO_GIT_SHA_LEN; i++) { ++ /* Ensure that the SHA is made up of hex digits */ ++ if (!isxdigit(ptr[i])) ++ break; + -+ for (i = 0, info = alloc_info->info; i < count; i++, info++) { -+ /* -+ * Write the address of the JIT allocation to the user provided -+ * GPU allocation. -+ */ -+ ptr = kbase_vmap_prot(kctx, info->gpu_alloc_addr, sizeof(*ptr), -+ KBASE_REG_CPU_WR, &mapping); -+ if (!ptr) { -+ ret = -ENOMEM; -+ goto fail_rollback; ++ git_sha[i] = ptr[i]; + } + -+ reg = kctx->jit_alloc[info->id]; -+ new_addr = reg->start_pfn << PAGE_SHIFT; -+ *ptr = new_addr; -+ kbase_vunmap(kctx, &mapping); -+ } ++ /* Check if the next char indicates git SHA is dirty */ ++ if (ptr[i] == ' ' || ptr[i] == '+') { ++ git_sha[i] = ptr[i]; ++ i++; ++ } ++ git_sha[i] = '\0'; + -+ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); ++ dev_info(kbdev->dev, "Mali firmware git_sha: %s\n", git_sha); ++ } else ++ dev_info(kbdev->dev, "Mali firmware git_sha not found or invalid\n"); + + return 0; -+ -+fail_rollback: -+ /* Roll back completely */ -+ for (i = 0, info = alloc_info->info; i < count; i++, info++) { -+ /* Free the allocations that were successful. -+ * Mark all the allocations including the failed one and the -+ * other un-attempted allocations in the set, so we know they -+ * are in use. -+ */ -+ if (kctx->jit_alloc[info->id]) -+ kbase_jit_free(kctx, kctx->jit_alloc[info->id]); -+ -+ kctx->jit_alloc[info->id] = KBASE_RESERVED_REG_JIT_ALLOC; -+ } -+fail: -+ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); -+ -+ return ret; +} + -+static int kbase_kcpu_jit_allocate_prepare( -+ struct kbase_kcpu_command_queue *kcpu_queue, -+ struct base_kcpu_command_jit_alloc_info *alloc_info, -+ struct kbase_kcpu_command *current_command) ++/** ++ * load_firmware_entry() - Process an entry from a firmware image ++ * ++ * @kbdev: Kbase device ++ * @fw: Firmware image containing the entry ++ * @offset: Byte offset within the image of the entry to load ++ * @header: Header word of the entry ++ * ++ * Read an entry from a firmware image and do any necessary work (e.g. loading ++ * the data into page accessible to the MCU). ++ * ++ * Unknown entries are ignored if the 'optional' flag is set within the entry, ++ * otherwise the function will fail with -EINVAL ++ * ++ * Return: 0 if successful, negative error code on failure ++ */ ++static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_csf_mcu_fw *const fw, ++ u32 offset, u32 header) +{ -+ struct kbase_context *const kctx = kcpu_queue->kctx; -+ void __user *data = u64_to_user_ptr(alloc_info->info); -+ struct base_jit_alloc_info *info = NULL; -+ u32 count = alloc_info->count; -+ int ret = 0; -+ u32 i; -+ -+ lockdep_assert_held(&kcpu_queue->lock); -+ -+ if ((count == 0) || (count > ARRAY_SIZE(kctx->jit_alloc)) || -+ (count > kcpu_queue->kctx->jit_max_allocations) || (!data) || -+ !kbase_mem_allow_alloc(kctx)) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ info = kmalloc_array(count, sizeof(*info), GFP_KERNEL); -+ if (!info) { -+ ret = -ENOMEM; -+ goto out; -+ } ++ const unsigned int type = entry_type(header); ++ unsigned int size = entry_size(header); ++ const bool optional = entry_optional(header); ++ /* Update is used with configuration and tracebuffer entries to ++ * initiate a FIRMWARE_CONFIG_UPDATE, instead of triggering a ++ * silent reset. ++ */ ++ const bool updatable = entry_update(header); ++ const u32 *entry = (void *)(fw->data + offset); + -+ if (copy_from_user(info, data, sizeof(*info) * count) != 0) { -+ ret = -EINVAL; -+ goto out_free; ++ if ((offset % sizeof(*entry)) || (size % sizeof(*entry))) { ++ dev_err(kbdev->dev, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n", ++ offset, size); ++ return -EINVAL; + } + -+ for (i = 0; i < count; i++) { -+ ret = kbasep_jit_alloc_validate(kctx, &info[i]); -+ if (ret) -+ goto out_free; ++ if (size < sizeof(*entry)) { ++ dev_err(kbdev->dev, "Size field too small: %u\n", size); ++ return -EINVAL; + } + -+ /* Search for duplicate JIT ids */ -+ for (i = 0; i < (count - 1); i++) { -+ u32 j; ++ /* Remove the header */ ++ entry++; ++ size -= sizeof(*entry); + -+ for (j = (i + 1); j < count; j++) { -+ if (info[i].id == info[j].id) { -+ ret = -EINVAL; -+ goto out_free; -+ } ++ switch (type) { ++ case CSF_FIRMWARE_ENTRY_TYPE_INTERFACE: ++ /* Interface memory setup */ ++ if (size < INTERFACE_ENTRY_NAME_OFFSET + sizeof(*entry)) { ++ dev_err(kbdev->dev, "Interface memory setup entry too short (size=%u)\n", ++ size); ++ return -EINVAL; ++ } ++ return parse_memory_setup_entry(kbdev, fw, entry, size); ++ case CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION: ++ /* Configuration option */ ++ if (size < CONFIGURATION_ENTRY_NAME_OFFSET + sizeof(*entry)) { ++ dev_err(kbdev->dev, "Configuration option entry too short (size=%u)\n", ++ size); ++ return -EINVAL; ++ } ++ return kbase_csf_firmware_cfg_option_entry_parse( ++ kbdev, fw, entry, size, updatable); ++ case CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER: ++ /* Trace buffer */ ++ if (size < TRACE_BUFFER_ENTRY_NAME_OFFSET + sizeof(*entry)) { ++ dev_err(kbdev->dev, "Trace Buffer entry too short (size=%u)\n", ++ size); ++ return -EINVAL; ++ } ++ return kbase_csf_firmware_parse_trace_buffer_entry( ++ kbdev, entry, size, updatable); ++ case CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA: ++ /* Meta data section */ ++ if (size < TL_METADATA_ENTRY_NAME_OFFSET + sizeof(*entry)) { ++ dev_err(kbdev->dev, "Timeline metadata entry too short (size=%u)\n", ++ size); ++ return -EINVAL; ++ } ++ return parse_timeline_metadata_entry(kbdev, fw, entry, size); ++ case CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA: ++ if (size < BUILD_INFO_METADATA_SIZE_OFFSET + sizeof(*entry)) { ++ dev_err(kbdev->dev, "Build info metadata entry too short (size=%u)\n", ++ size); ++ return -EINVAL; ++ } ++ return parse_build_info_metadata_entry(kbdev, fw, entry, size); ++ case CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST: ++ /* Function call list section */ ++ if (size < FUNC_CALL_LIST_ENTRY_NAME_OFFSET + sizeof(*entry)) { ++ dev_err(kbdev->dev, "Function call list entry too short (size=%u)\n", ++ size); ++ return -EINVAL; ++ } ++ kbase_csf_firmware_log_parse_logging_call_list_entry(kbdev, entry); ++ return 0; ++ case CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP: ++ /* Core Dump section */ ++ if (size < CORE_DUMP_ENTRY_START_ADDR_OFFSET + sizeof(*entry)) { ++ dev_err(kbdev->dev, "FW Core dump entry too short (size=%u)\n", size); ++ return -EINVAL; ++ } ++ return kbase_csf_firmware_core_dump_entry_parse(kbdev, entry); ++ default: ++ if (!optional) { ++ dev_err(kbdev->dev, "Unsupported non-optional entry type %u in firmware\n", ++ type); ++ return -EINVAL; + } + } + -+ current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_ALLOC; -+ current_command->info.jit_alloc.info = info; -+ current_command->info.jit_alloc.count = count; -+ current_command->info.jit_alloc.blocked = false; -+ mutex_lock(&kctx->csf.kcpu_queues.jit_lock); -+ list_add_tail(¤t_command->info.jit_alloc.node, -+ &kctx->csf.kcpu_queues.jit_cmds_head); -+ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); -+ + return 0; -+out_free: -+ kfree(info); -+out: -+ return ret; +} + -+/** -+ * kbase_kcpu_jit_allocate_finish() - Finish handling the JIT_ALLOC command -+ * -+ * @queue: The queue containing this JIT allocation -+ * @cmd: The JIT allocation command -+ */ -+static void kbase_kcpu_jit_allocate_finish( -+ struct kbase_kcpu_command_queue *queue, -+ struct kbase_kcpu_command *cmd) ++static void free_global_iface(struct kbase_device *kbdev) +{ -+ lockdep_assert_held(&queue->lock); ++ struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; + -+ mutex_lock(&queue->kctx->csf.kcpu_queues.jit_lock); ++ if (iface->groups) { ++ unsigned int gid; + -+ /* Remove this command from the jit_cmds_head list */ -+ list_del(&cmd->info.jit_alloc.node); ++ for (gid = 0; gid < iface->group_num; ++gid) ++ kfree(iface->groups[gid].streams); + -+ /* -+ * If we get to this point we must have already cleared the blocked -+ * flag, otherwise it'd be a bug. -+ */ -+ if (WARN_ON(cmd->info.jit_alloc.blocked)) { -+ list_del(&queue->jit_blocked); -+ cmd->info.jit_alloc.blocked = false; ++ kfree(iface->groups); ++ iface->groups = NULL; + } -+ -+ mutex_unlock(&queue->kctx->csf.kcpu_queues.jit_lock); -+ -+ kfree(cmd->info.jit_alloc.info); +} + +/** -+ * kbase_kcpu_jit_retry_pending_allocs() - Retry blocked JIT_ALLOC commands ++ * iface_gpu_va_to_cpu - Convert a GPU VA address within the shared interface ++ * region to a CPU address, using the existing mapping. ++ * @kbdev: Device pointer ++ * @gpu_va: GPU VA to convert + * -+ * @kctx: The context containing the blocked JIT_ALLOC commands ++ * Return: A CPU pointer to the location within the shared interface region, or ++ * NULL on failure. + */ -+static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx) ++static inline void *iface_gpu_va_to_cpu(struct kbase_device *kbdev, u32 gpu_va) +{ -+ struct kbase_kcpu_command_queue *blocked_queue; ++ struct kbase_csf_firmware_interface *interface = ++ kbdev->csf.shared_interface; ++ u8 *kernel_base = interface->kernel_map; + -+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); ++ if (gpu_va < interface->virtual || ++ gpu_va >= interface->virtual + interface->num_pages * PAGE_SIZE) { ++ dev_err(kbdev->dev, ++ "Interface address 0x%x not within %u-page region at 0x%x", ++ gpu_va, interface->num_pages, ++ interface->virtual); ++ return NULL; ++ } + -+ /* -+ * Reschedule all queues blocked by JIT_ALLOC commands. -+ * NOTE: This code traverses the list of blocked queues directly. It -+ * only works as long as the queued works are not executed at the same -+ * time. This precondition is true since we're holding the -+ * kbase_csf_kcpu_queue_context.jit_lock . -+ */ -+ list_for_each_entry(blocked_queue, &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked) -+ queue_work(blocked_queue->wq, &blocked_queue->work); ++ return (void *)(kernel_base + (gpu_va - interface->virtual)); +} + -+static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue, -+ struct kbase_kcpu_command *const cmd) ++static int parse_cmd_stream_info(struct kbase_device *kbdev, ++ struct kbase_csf_cmd_stream_info *sinfo, ++ u32 *stream_base) +{ -+ struct kbase_kcpu_command_jit_free_info const *const free_info = -+ &cmd->info.jit_free; -+ u8 const *const ids = free_info->ids; -+ u32 const count = free_info->count; -+ u32 i; -+ int rc = 0; -+ struct kbase_context *kctx = queue->kctx; ++ sinfo->kbdev = kbdev; ++ sinfo->features = stream_base[STREAM_FEATURES/4]; ++ sinfo->input = iface_gpu_va_to_cpu(kbdev, ++ stream_base[STREAM_INPUT_VA/4]); ++ sinfo->output = iface_gpu_va_to_cpu(kbdev, ++ stream_base[STREAM_OUTPUT_VA/4]); + -+ if (WARN_ON(!ids)) ++ if (sinfo->input == NULL || sinfo->output == NULL) + return -EINVAL; + -+ lockdep_assert_held(&queue->lock); -+ mutex_lock(&kctx->csf.kcpu_queues.jit_lock); ++ return 0; ++} + -+ KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(queue->kctx->kbdev, -+ queue); ++static int parse_cmd_stream_group_info(struct kbase_device *kbdev, ++ struct kbase_csf_cmd_stream_group_info *ginfo, ++ u32 *group_base, u32 group_stride) ++{ ++ unsigned int sid; + -+ for (i = 0; i < count; i++) { -+ u64 pages_used = 0; -+ int item_err = 0; ++ ginfo->kbdev = kbdev; ++ ginfo->features = group_base[GROUP_FEATURES/4]; ++ ginfo->input = iface_gpu_va_to_cpu(kbdev, ++ group_base[GROUP_INPUT_VA/4]); ++ ginfo->output = iface_gpu_va_to_cpu(kbdev, ++ group_base[GROUP_OUTPUT_VA/4]); + -+ if (!kctx->jit_alloc[ids[i]]) { -+ dev_dbg(kctx->kbdev->dev, "invalid JIT free ID"); -+ rc = -EINVAL; -+ item_err = rc; -+ } else { -+ struct kbase_va_region *const reg = kctx->jit_alloc[ids[i]]; ++ if (ginfo->input == NULL || ginfo->output == NULL) ++ return -ENOMEM; + -+ /* -+ * If the ID is valid but the allocation request failed, still -+ * succeed this command but don't try and free the allocation. -+ */ -+ if (reg != KBASE_RESERVED_REG_JIT_ALLOC) { -+ pages_used = reg->gpu_alloc->nents; -+ kbase_jit_free(kctx, reg); -+ } ++ ginfo->suspend_size = group_base[GROUP_SUSPEND_SIZE/4]; ++ ginfo->protm_suspend_size = group_base[GROUP_PROTM_SUSPEND_SIZE/4]; ++ ginfo->stream_num = group_base[GROUP_STREAM_NUM/4]; + -+ kctx->jit_alloc[ids[i]] = NULL; -+ } ++ if (ginfo->stream_num < MIN_SUPPORTED_STREAMS_PER_GROUP || ++ ginfo->stream_num > MAX_SUPPORTED_STREAMS_PER_GROUP) { ++ dev_err(kbdev->dev, "CSG with %u CSs out of range %u-%u", ++ ginfo->stream_num, ++ MIN_SUPPORTED_STREAMS_PER_GROUP, ++ MAX_SUPPORTED_STREAMS_PER_GROUP); ++ return -EINVAL; ++ } + -+ KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END( -+ queue->kctx->kbdev, queue, item_err, pages_used); ++ ginfo->stream_stride = group_base[GROUP_STREAM_STRIDE/4]; ++ ++ if (ginfo->stream_num * ginfo->stream_stride > group_stride) { ++ dev_err(kbdev->dev, ++ "group stride of 0x%x exceeded by %u CSs with stride 0x%x", ++ group_stride, ginfo->stream_num, ++ ginfo->stream_stride); ++ return -EINVAL; + } + -+ /* -+ * Remove this command from the jit_cmds_head list and retry pending -+ * allocations. -+ */ -+ list_del(&cmd->info.jit_free.node); -+ kbase_kcpu_jit_retry_pending_allocs(kctx); ++ ginfo->streams = kmalloc_array(ginfo->stream_num, ++ sizeof(*ginfo->streams), GFP_KERNEL); + -+ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); ++ if (!ginfo->streams) ++ return -ENOMEM; + -+ /* Free the list of ids */ -+ kfree(ids); ++ for (sid = 0; sid < ginfo->stream_num; sid++) { ++ int err; ++ u32 *stream_base = group_base + (STREAM_CONTROL_0 + ++ ginfo->stream_stride * sid) / 4; + -+ return rc; ++ err = parse_cmd_stream_info(kbdev, &ginfo->streams[sid], ++ stream_base); ++ if (err < 0) { ++ /* caller will free the memory for CSs array */ ++ return err; ++ } ++ } ++ ++ return 0; +} + -+static int kbase_kcpu_jit_free_prepare( -+ struct kbase_kcpu_command_queue *kcpu_queue, -+ struct base_kcpu_command_jit_free_info *free_info, -+ struct kbase_kcpu_command *current_command) ++static u32 get_firmware_version(struct kbase_device *kbdev) +{ -+ struct kbase_context *const kctx = kcpu_queue->kctx; -+ void __user *data = u64_to_user_ptr(free_info->ids); -+ u8 *ids; -+ u32 count = free_info->count; -+ int ret; -+ u32 i; -+ -+ lockdep_assert_held(&kcpu_queue->lock); ++ struct kbase_csf_firmware_interface *interface = ++ kbdev->csf.shared_interface; ++ u32 *shared_info = interface->kernel_map; + -+ /* Sanity checks */ -+ if (!count || count > ARRAY_SIZE(kctx->jit_alloc)) { -+ ret = -EINVAL; -+ goto out; -+ } ++ return shared_info[GLB_VERSION/4]; ++} + -+ /* Copy the information for safe access and future storage */ -+ ids = kmalloc_array(count, sizeof(*ids), GFP_KERNEL); -+ if (!ids) { -+ ret = -ENOMEM; -+ goto out; -+ } ++static int parse_capabilities(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_firmware_interface *interface = ++ kbdev->csf.shared_interface; ++ u32 *shared_info = interface->kernel_map; ++ struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; ++ unsigned int gid; + -+ if (!data) { -+ ret = -EINVAL; -+ goto out_free; -+ } ++ /* All offsets are in bytes, so divide by 4 for access via a u32 pointer ++ */ + -+ if (copy_from_user(ids, data, sizeof(*ids) * count)) { -+ ret = -EINVAL; -+ goto out_free; ++ /* The version number of the global interface is expected to be a ++ * non-zero value. If it's not, the firmware may not have booted. ++ */ ++ iface->version = get_firmware_version(kbdev); ++ if (!iface->version) { ++ dev_err(kbdev->dev, "Version check failed. Firmware may have failed to boot."); ++ return -EINVAL; + } + -+ for (i = 0; i < count; i++) { -+ /* Fail the command if ID sent is zero */ -+ if (!ids[i]) { -+ ret = -EINVAL; -+ goto out_free; -+ } -+ } + -+ /* Search for duplicate JIT ids */ -+ for (i = 0; i < (count - 1); i++) { -+ u32 j; ++ iface->kbdev = kbdev; ++ iface->features = shared_info[GLB_FEATURES/4]; ++ iface->input = iface_gpu_va_to_cpu(kbdev, shared_info[GLB_INPUT_VA/4]); ++ iface->output = iface_gpu_va_to_cpu(kbdev, ++ shared_info[GLB_OUTPUT_VA/4]); + -+ for (j = (i + 1); j < count; j++) { -+ if (ids[i] == ids[j]) { -+ ret = -EINVAL; -+ goto out_free; -+ } -+ } -+ } ++ if (iface->input == NULL || iface->output == NULL) ++ return -ENOMEM; + -+ current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_FREE; -+ current_command->info.jit_free.ids = ids; -+ current_command->info.jit_free.count = count; -+ mutex_lock(&kctx->csf.kcpu_queues.jit_lock); -+ list_add_tail(¤t_command->info.jit_free.node, -+ &kctx->csf.kcpu_queues.jit_cmds_head); -+ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); ++ iface->group_num = shared_info[GLB_GROUP_NUM/4]; + -+ return 0; -+out_free: -+ kfree(ids); -+out: -+ return ret; -+} ++ if (iface->group_num < MIN_SUPPORTED_CSGS || ++ iface->group_num > MAX_SUPPORTED_CSGS) { ++ dev_err(kbdev->dev, ++ "Interface containing %u CSGs outside of range %u-%u", ++ iface->group_num, MIN_SUPPORTED_CSGS, ++ MAX_SUPPORTED_CSGS); ++ return -EINVAL; ++ } + -+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST -+static int kbase_csf_queue_group_suspend_prepare( -+ struct kbase_kcpu_command_queue *kcpu_queue, -+ struct base_kcpu_command_group_suspend_info *suspend_buf, -+ struct kbase_kcpu_command *current_command) -+{ -+ struct kbase_context *const kctx = kcpu_queue->kctx; -+ struct kbase_suspend_copy_buffer *sus_buf = NULL; -+ const u32 csg_suspend_buf_size = -+ kctx->kbdev->csf.global_iface.groups[0].suspend_size; -+ u64 addr = suspend_buf->buffer; -+ u64 page_addr = addr & PAGE_MASK; -+ u64 end_addr = addr + csg_suspend_buf_size - 1; -+ u64 last_page_addr = end_addr & PAGE_MASK; -+ int nr_pages = (last_page_addr - page_addr) / PAGE_SIZE + 1; -+ int pinned_pages = 0, ret = 0; -+ struct kbase_va_region *reg; ++ iface->group_stride = shared_info[GLB_GROUP_STRIDE/4]; ++ iface->prfcnt_size = shared_info[GLB_PRFCNT_SIZE/4]; + -+ lockdep_assert_held(&kcpu_queue->lock); ++ if (iface->version >= kbase_csf_interface_version(1, 1, 0)) ++ iface->instr_features = shared_info[GLB_INSTR_FEATURES / 4]; ++ else ++ iface->instr_features = 0; + -+ if (suspend_buf->size < csg_suspend_buf_size) ++ if ((GROUP_CONTROL_0 + ++ (unsigned long)iface->group_num * iface->group_stride) > ++ (interface->num_pages * PAGE_SIZE)) { ++ dev_err(kbdev->dev, ++ "interface size of %u pages exceeded by %u CSGs with stride 0x%x", ++ interface->num_pages, iface->group_num, ++ iface->group_stride); + return -EINVAL; ++ } + -+ ret = kbase_csf_queue_group_handle_is_valid(kctx, -+ suspend_buf->group_handle); -+ if (ret) -+ return ret; ++ WARN_ON(iface->groups); + -+ sus_buf = kzalloc(sizeof(*sus_buf), GFP_KERNEL); -+ if (!sus_buf) ++ iface->groups = kcalloc(iface->group_num, sizeof(*iface->groups), ++ GFP_KERNEL); ++ if (!iface->groups) + return -ENOMEM; + -+ sus_buf->size = csg_suspend_buf_size; -+ sus_buf->nr_pages = nr_pages; -+ sus_buf->offset = addr & ~PAGE_MASK; ++ for (gid = 0; gid < iface->group_num; gid++) { ++ int err; ++ u32 *group_base = shared_info + (GROUP_CONTROL_0 + ++ iface->group_stride * gid) / 4; + -+ sus_buf->pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); -+ if (!sus_buf->pages) { -+ ret = -ENOMEM; -+ goto out_clean_sus_buf; ++ err = parse_cmd_stream_group_info(kbdev, &iface->groups[gid], ++ group_base, iface->group_stride); ++ if (err < 0) { ++ free_global_iface(kbdev); ++ return err; ++ } + } + -+ /* Check if the page_addr is a valid GPU VA from SAME_VA zone, -+ * otherwise consider it is a CPU VA corresponding to the Host -+ * memory allocated by userspace. -+ */ -+ kbase_gpu_vm_lock(kctx); -+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, -+ page_addr); ++ return 0; ++} + -+ if (kbase_is_region_invalid_or_free(reg)) { -+ kbase_gpu_vm_unlock(kctx); -+ pinned_pages = get_user_pages_fast(page_addr, nr_pages, 1, -+ sus_buf->pages); -+ kbase_gpu_vm_lock(kctx); ++static inline void access_firmware_memory_common(struct kbase_device *kbdev, ++ struct kbase_csf_firmware_interface *interface, u32 offset_bytes, ++ u32 *value, const bool read) ++{ ++ u32 page_num = offset_bytes >> PAGE_SHIFT; ++ u32 offset_in_page = offset_bytes & ~PAGE_MASK; ++ struct page *target_page = as_page(interface->phys[page_num]); ++ uintptr_t cpu_addr = (uintptr_t)kmap_atomic(target_page); ++ u32 *addr = (u32 *)(cpu_addr + offset_in_page); + -+ if (pinned_pages < 0) { -+ ret = pinned_pages; -+ goto out_clean_pages; -+ } -+ if (pinned_pages != nr_pages) { -+ ret = -EINVAL; -+ goto out_clean_pages; -+ } ++ if (read) { ++ kbase_sync_single_for_device(kbdev, ++ kbase_dma_addr_from_tagged(interface->phys[page_num]) + offset_in_page, ++ sizeof(u32), DMA_BIDIRECTIONAL); ++ *value = *addr; + } else { -+ struct tagged_addr *page_array; -+ u64 start, end, i; ++ *addr = *value; ++ kbase_sync_single_for_device(kbdev, ++ kbase_dma_addr_from_tagged(interface->phys[page_num]) + offset_in_page, ++ sizeof(u32), DMA_BIDIRECTIONAL); ++ } + -+ if (((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_SAME_VA) || -+ (kbase_reg_current_backed_size(reg) < nr_pages) || -+ !(reg->flags & KBASE_REG_CPU_WR) || -+ (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) || -+ (kbase_is_region_shrinkable(reg)) || (kbase_va_region_is_no_user_free(reg))) { -+ ret = -EINVAL; -+ goto out_clean_pages; -+ } ++ kunmap_atomic((u32 *)cpu_addr); ++} + -+ start = PFN_DOWN(page_addr) - reg->start_pfn; -+ end = start + nr_pages; ++static inline void access_firmware_memory(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 *value, const bool read) ++{ ++ struct kbase_csf_firmware_interface *interface, *access_interface = NULL; ++ u32 offset_bytes = 0; + -+ if (end > reg->nr_pages) { -+ ret = -EINVAL; -+ goto out_clean_pages; ++ list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { ++ if ((gpu_addr >= interface->virtual) && ++ (gpu_addr < interface->virtual + (interface->num_pages << PAGE_SHIFT))) { ++ offset_bytes = gpu_addr - interface->virtual; ++ access_interface = interface; ++ break; + } ++ } + -+ sus_buf->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); -+ kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc); -+ page_array = kbase_get_cpu_phy_pages(reg); -+ page_array += start; ++ if (access_interface) ++ access_firmware_memory_common(kbdev, access_interface, offset_bytes, value, read); ++ else ++ dev_warn(kbdev->dev, "Invalid GPU VA %x passed", gpu_addr); ++} + -+ for (i = 0; i < nr_pages; i++, page_array++) -+ sus_buf->pages[i] = as_page(*page_array); -+ } ++static inline void access_firmware_memory_exe(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 *value, const bool read) ++{ ++ struct kbase_csf_firmware_interface *interface, *access_interface = NULL; ++ u32 offset_bytes = 0; + -+ kbase_gpu_vm_unlock(kctx); -+ current_command->type = BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND; -+ current_command->info.suspend_buf_copy.sus_buf = sus_buf; -+ current_command->info.suspend_buf_copy.group_handle = -+ suspend_buf->group_handle; -+ return ret; ++ list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { ++ if ((gpu_addr >= interface->virtual_exe_start) && ++ (gpu_addr < interface->virtual_exe_start + ++ (interface->num_pages << PAGE_SHIFT))) { ++ offset_bytes = gpu_addr - interface->virtual_exe_start; ++ access_interface = interface; + -+out_clean_pages: -+ kbase_gpu_vm_unlock(kctx); -+ kfree(sus_buf->pages); -+out_clean_sus_buf: -+ kfree(sus_buf); ++ /* If there's an overlap in execution address range between a moved and a ++ * non-moved areas, always prefer the moved one. The idea is that FW may ++ * move sections around during init time, but after the layout is settled, ++ * any moved sections are going to override non-moved areas at the same ++ * location. ++ */ ++ if (interface->virtual_exe_start != interface->virtual) ++ break; ++ } ++ } + -+ return ret; ++ if (access_interface) ++ access_firmware_memory_common(kbdev, access_interface, offset_bytes, value, read); ++ else ++ dev_warn(kbdev->dev, "Invalid GPU VA %x passed", gpu_addr); +} + -+static int kbase_csf_queue_group_suspend_process(struct kbase_context *kctx, -+ struct kbase_suspend_copy_buffer *sus_buf, -+ u8 group_handle) ++void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 *value) +{ -+ return kbase_csf_queue_group_suspend(kctx, sus_buf, group_handle); ++ access_firmware_memory(kbdev, gpu_addr, value, true); +} -+#endif + -+static enum kbase_csf_event_callback_action event_cqs_callback(void *param) ++void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 value) +{ -+ struct kbase_kcpu_command_queue *kcpu_queue = -+ (struct kbase_kcpu_command_queue *)param; -+ -+ queue_work(kcpu_queue->wq, &kcpu_queue->work); ++ access_firmware_memory(kbdev, gpu_addr, &value, false); ++} + -+ return KBASE_CSF_EVENT_CALLBACK_KEEP; ++void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 *value) ++{ ++ access_firmware_memory_exe(kbdev, gpu_addr, value, true); +} + -+static void cleanup_cqs_wait(struct kbase_kcpu_command_queue *queue, -+ struct kbase_kcpu_command_cqs_wait_info *cqs_wait) ++void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 value) +{ -+ WARN_ON(!cqs_wait->nr_objs); -+ WARN_ON(!cqs_wait->objs); -+ WARN_ON(!cqs_wait->signaled); -+ WARN_ON(!queue->cqs_wait_count); ++ access_firmware_memory_exe(kbdev, gpu_addr, &value, false); ++} + -+ if (--queue->cqs_wait_count == 0) { -+ kbase_csf_event_wait_remove(queue->kctx, -+ event_cqs_callback, queue); -+ } ++void kbase_csf_firmware_cs_input( ++ const struct kbase_csf_cmd_stream_info *const info, const u32 offset, ++ const u32 value) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; + -+ kfree(cqs_wait->signaled); -+ kfree(cqs_wait->objs); -+ cqs_wait->signaled = NULL; -+ cqs_wait->objs = NULL; ++ dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x\n", offset, value); ++ input_page_write(info->input, offset, value); +} + -+static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev, -+ struct kbase_kcpu_command_queue *queue, -+ struct kbase_kcpu_command_cqs_wait_info *cqs_wait) ++u32 kbase_csf_firmware_cs_input_read( ++ const struct kbase_csf_cmd_stream_info *const info, ++ const u32 offset) +{ -+ u32 i; -+ -+ lockdep_assert_held(&queue->lock); ++ const struct kbase_device * const kbdev = info->kbdev; ++ u32 const val = input_page_read(info->input, offset); + -+ if (WARN_ON(!cqs_wait->objs)) -+ return -EINVAL; ++ dev_dbg(kbdev->dev, "cs input r: reg %08x val %08x\n", offset, val); ++ return val; ++} + -+ /* Skip the CQS waits that have already been signaled when processing */ -+ for (i = find_first_zero_bit(cqs_wait->signaled, cqs_wait->nr_objs); i < cqs_wait->nr_objs; i++) { -+ if (!test_bit(i, cqs_wait->signaled)) { -+ struct kbase_vmap_struct *mapping; -+ bool sig_set; -+ u32 *evt = (u32 *)kbase_phy_alloc_mapping_get(queue->kctx, -+ cqs_wait->objs[i].addr, &mapping); ++void kbase_csf_firmware_cs_input_mask( ++ const struct kbase_csf_cmd_stream_info *const info, const u32 offset, ++ const u32 value, const u32 mask) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; + -+ if (!queue->command_started) { -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START(kbdev, -+ queue); -+ queue->command_started = true; -+ KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_WAIT_START, -+ queue, cqs_wait->nr_objs, 0); -+ } ++ dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x mask %08x\n", ++ offset, value, mask); ++ input_page_partial_write(info->input, offset, value, mask); ++} + -+ if (!evt) { -+ dev_warn(kbdev->dev, -+ "Sync memory %llx already freed", cqs_wait->objs[i].addr); -+ queue->has_error = true; -+ return -EINVAL; -+ } ++u32 kbase_csf_firmware_cs_output( ++ const struct kbase_csf_cmd_stream_info *const info, const u32 offset) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ u32 const val = output_page_read(info->output, offset); + -+ sig_set = -+ evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)] > cqs_wait->objs[i].val; -+ if (sig_set) { -+ bool error = false; ++ dev_dbg(kbdev->dev, "cs output r: reg %08x val %08x\n", offset, val); ++ return val; ++} + -+ bitmap_set(cqs_wait->signaled, i, 1); -+ if ((cqs_wait->inherit_err_flags & (1U << i)) && -+ evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)] > 0) { -+ queue->has_error = true; -+ error = true; -+ } ++void kbase_csf_firmware_csg_input( ++ const struct kbase_csf_cmd_stream_group_info *const info, ++ const u32 offset, const u32 value) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; + -+ KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_WAIT_END, -+ queue, cqs_wait->objs[i].addr, -+ error); ++ dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x\n", ++ offset, value); ++ input_page_write(info->input, offset, value); ++} + -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END( -+ kbdev, queue, evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)]); -+ queue->command_started = false; -+ } ++u32 kbase_csf_firmware_csg_input_read( ++ const struct kbase_csf_cmd_stream_group_info *const info, ++ const u32 offset) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ u32 const val = input_page_read(info->input, offset); + -+ kbase_phy_alloc_mapping_put(queue->kctx, mapping); ++ dev_dbg(kbdev->dev, "csg input r: reg %08x val %08x\n", offset, val); ++ return val; ++} + -+ if (!sig_set) -+ break; -+ } -+ } ++void kbase_csf_firmware_csg_input_mask( ++ const struct kbase_csf_cmd_stream_group_info *const info, ++ const u32 offset, const u32 value, const u32 mask) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; + -+ /* For the queue to progress further, all cqs objects should get -+ * signaled. -+ */ -+ return bitmap_full(cqs_wait->signaled, cqs_wait->nr_objs); ++ dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x mask %08x\n", ++ offset, value, mask); ++ input_page_partial_write(info->input, offset, value, mask); +} + -+static inline bool kbase_kcpu_cqs_is_data_type_valid(u8 data_type) ++u32 kbase_csf_firmware_csg_output( ++ const struct kbase_csf_cmd_stream_group_info *const info, ++ const u32 offset) +{ -+ return data_type == BASEP_CQS_DATA_TYPE_U32 || data_type == BASEP_CQS_DATA_TYPE_U64; ++ const struct kbase_device * const kbdev = info->kbdev; ++ u32 const val = output_page_read(info->output, offset); ++ ++ dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val); ++ return val; +} ++KBASE_EXPORT_TEST_API(kbase_csf_firmware_csg_output); + -+static inline bool kbase_kcpu_cqs_is_aligned(u64 addr, u8 data_type) ++void kbase_csf_firmware_global_input( ++ const struct kbase_csf_global_iface *const iface, const u32 offset, ++ const u32 value) +{ -+ BUILD_BUG_ON(BASEP_EVENT32_ALIGN_BYTES != BASEP_EVENT32_SIZE_BYTES); -+ BUILD_BUG_ON(BASEP_EVENT64_ALIGN_BYTES != BASEP_EVENT64_SIZE_BYTES); -+ WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(data_type)); ++ const struct kbase_device * const kbdev = iface->kbdev; + -+ switch (data_type) { -+ default: -+ return false; -+ case BASEP_CQS_DATA_TYPE_U32: -+ return (addr & (BASEP_EVENT32_ALIGN_BYTES - 1)) == 0; -+ case BASEP_CQS_DATA_TYPE_U64: -+ return (addr & (BASEP_EVENT64_ALIGN_BYTES - 1)) == 0; -+ } ++ dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x\n", offset, value); ++ input_page_write(iface->input, offset, value); +} ++KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input); + -+static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue, -+ struct base_kcpu_command_cqs_wait_info *cqs_wait_info, -+ struct kbase_kcpu_command *current_command) ++void kbase_csf_firmware_global_input_mask( ++ const struct kbase_csf_global_iface *const iface, const u32 offset, ++ const u32 value, const u32 mask) +{ -+ struct base_cqs_wait_info *objs; -+ unsigned int nr_objs = cqs_wait_info->nr_objs; -+ unsigned int i; ++ const struct kbase_device * const kbdev = iface->kbdev; + -+ lockdep_assert_held(&queue->lock); ++ dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x mask %08x\n", ++ offset, value, mask); ++ input_page_partial_write(iface->input, offset, value, mask); ++} ++KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input_mask); + -+ if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) -+ return -EINVAL; ++u32 kbase_csf_firmware_global_input_read( ++ const struct kbase_csf_global_iface *const iface, const u32 offset) ++{ ++ const struct kbase_device * const kbdev = iface->kbdev; ++ u32 const val = input_page_read(iface->input, offset); + -+ if (!nr_objs) -+ return -EINVAL; ++ dev_dbg(kbdev->dev, "glob input r: reg %08x val %08x\n", offset, val); ++ return val; ++} + -+ objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); -+ if (!objs) -+ return -ENOMEM; ++u32 kbase_csf_firmware_global_output( ++ const struct kbase_csf_global_iface *const iface, const u32 offset) ++{ ++ const struct kbase_device * const kbdev = iface->kbdev; ++ u32 const val = output_page_read(iface->output, offset); + -+ if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_info->objs), -+ nr_objs * sizeof(*objs))) { -+ kfree(objs); -+ return -ENOMEM; -+ } ++ dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val); ++ return val; ++} ++KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_output); + -+ /* Check the CQS objects as early as possible. By checking their alignment -+ * (required alignment equals to size for Sync32 and Sync64 objects), we can -+ * prevent overrunning the supplied event page. -+ */ -+ for (i = 0; i < nr_objs; i++) { -+ if (!kbase_kcpu_cqs_is_aligned(objs[i].addr, BASEP_CQS_DATA_TYPE_U32)) { -+ kfree(objs); -+ return -EINVAL; -+ } -+ } ++/** ++ * csf_doorbell_offset() - Calculate the offset to the CSF host doorbell ++ * @doorbell_nr: Doorbell number ++ * ++ * Return: CSF host register offset for the specified doorbell number. ++ */ ++static u32 csf_doorbell_offset(int doorbell_nr) ++{ ++ WARN_ON(doorbell_nr < 0); ++ WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); + -+ if (++queue->cqs_wait_count == 1) { -+ if (kbase_csf_event_wait_add(queue->kctx, -+ event_cqs_callback, queue)) { -+ kfree(objs); -+ queue->cqs_wait_count--; -+ return -ENOMEM; -+ } -+ } ++ return CSF_HW_DOORBELL_PAGE_OFFSET + (doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE); ++} + -+ current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_WAIT; -+ current_command->info.cqs_wait.nr_objs = nr_objs; -+ current_command->info.cqs_wait.objs = objs; -+ current_command->info.cqs_wait.inherit_err_flags = -+ cqs_wait_info->inherit_err_flags; ++void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr) ++{ ++ kbase_reg_write(kbdev, csf_doorbell_offset(doorbell_nr), (u32)1); ++} ++EXPORT_SYMBOL(kbase_csf_ring_doorbell); + -+ current_command->info.cqs_wait.signaled = kcalloc(BITS_TO_LONGS(nr_objs), -+ sizeof(*current_command->info.cqs_wait.signaled), GFP_KERNEL); -+ if (!current_command->info.cqs_wait.signaled) { -+ if (--queue->cqs_wait_count == 0) { -+ kbase_csf_event_wait_remove(queue->kctx, -+ event_cqs_callback, queue); ++/** ++ * handle_internal_firmware_fatal - Handler for CS internal firmware fault. ++ * ++ * @kbdev: Pointer to kbase device ++ * ++ * Report group fatal error to user space for all GPU command queue groups ++ * in the device, terminate them and reset GPU. ++ */ ++static void handle_internal_firmware_fatal(struct kbase_device *const kbdev) ++{ ++ int as; ++ ++ for (as = 0; as < kbdev->nr_hw_address_spaces; as++) { ++ unsigned long flags; ++ struct kbase_context *kctx; ++ struct kbase_fault fault; ++ ++ if (as == MCU_AS_NR) ++ continue; ++ ++ /* Only handle the fault for an active address space. Lock is ++ * taken here to atomically get reference to context in an ++ * active address space and retain its refcount. ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as); ++ ++ if (kctx) { ++ kbase_ctx_sched_retain_ctx_refcount(kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } else { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ continue; + } + -+ kfree(objs); -+ return -ENOMEM; ++ fault = (struct kbase_fault) { ++ .status = GPU_EXCEPTION_TYPE_SW_FAULT_1, ++ }; ++ ++ kbase_csf_ctx_handle_fault(kctx, &fault); ++ kbase_ctx_sched_release_ctx_lock(kctx); + } + -+ return 0; ++ if (kbase_prepare_to_reset_gpu(kbdev, ++ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) ++ kbase_reset_gpu(kbdev); +} + -+static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev, -+ struct kbase_kcpu_command_queue *queue, -+ struct kbase_kcpu_command_cqs_set_info *cqs_set) ++/** ++ * firmware_error_worker - Worker function for handling firmware internal error ++ * ++ * @data: Pointer to a work_struct embedded in kbase device. ++ * ++ * Handle the CS internal firmware error ++ */ ++static void firmware_error_worker(struct work_struct *const data) +{ -+ unsigned int i; ++ struct kbase_device *const kbdev = ++ container_of(data, struct kbase_device, csf.fw_error_work); + -+ lockdep_assert_held(&queue->lock); ++ handle_internal_firmware_fatal(kbdev); ++} + -+ if (WARN_ON(!cqs_set->objs)) -+ return; ++static bool global_request_complete(struct kbase_device *const kbdev, ++ u32 const req_mask) ++{ ++ struct kbase_csf_global_iface *global_iface = ++ &kbdev->csf.global_iface; ++ bool complete = false; ++ unsigned long flags; + -+ for (i = 0; i < cqs_set->nr_objs; i++) { -+ struct kbase_vmap_struct *mapping; -+ u32 *evt; ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); + -+ evt = (u32 *)kbase_phy_alloc_mapping_get( -+ queue->kctx, cqs_set->objs[i].addr, &mapping); ++ if ((kbase_csf_firmware_global_output(global_iface, GLB_ACK) & ++ req_mask) == ++ (kbase_csf_firmware_global_input_read(global_iface, GLB_REQ) & ++ req_mask)) ++ complete = true; + -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue, evt ? 0 : 1); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); + -+ if (!evt) { -+ dev_warn(kbdev->dev, -+ "Sync memory %llx already freed", cqs_set->objs[i].addr); -+ queue->has_error = true; -+ } else { -+ evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)] = queue->has_error; -+ /* Set to signaled */ -+ evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)]++; -+ kbase_phy_alloc_mapping_put(queue->kctx, mapping); ++ return complete; ++} ++ ++static int wait_for_global_request_with_timeout(struct kbase_device *const kbdev, ++ u32 const req_mask, unsigned int timeout_ms) ++{ ++ const long wait_timeout = kbase_csf_timeout_in_jiffies(timeout_ms); ++ long remaining; ++ int err = 0; ++ ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ global_request_complete(kbdev, req_mask), ++ wait_timeout); ++ ++ if (!remaining) { ++ dev_warn(kbdev->dev, ++ "[%llu] Timeout (%d ms) waiting for global request %x to complete", ++ kbase_backend_get_cycle_cnt(kbdev), timeout_ms, req_mask); ++ err = -ETIMEDOUT; + -+ KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_SET, queue, cqs_set->objs[i].addr, -+ evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)]); -+ } + } + -+ kbase_csf_event_signal_notify_gpu(queue->kctx); ++ return err; ++} + -+ kfree(cqs_set->objs); -+ cqs_set->objs = NULL; ++static int wait_for_global_request(struct kbase_device *const kbdev, u32 const req_mask) ++{ ++ return wait_for_global_request_with_timeout(kbdev, req_mask, kbdev->csf.fw_timeout_ms); +} + -+static int kbase_kcpu_cqs_set_prepare( -+ struct kbase_kcpu_command_queue *kcpu_queue, -+ struct base_kcpu_command_cqs_set_info *cqs_set_info, -+ struct kbase_kcpu_command *current_command) ++static void set_global_request( ++ const struct kbase_csf_global_iface *const global_iface, ++ u32 const req_mask) +{ -+ struct base_cqs_set *objs; -+ unsigned int nr_objs = cqs_set_info->nr_objs; -+ unsigned int i; ++ u32 glb_req; + -+ lockdep_assert_held(&kcpu_queue->lock); ++ kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); + -+ if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) -+ return -EINVAL; ++ glb_req = kbase_csf_firmware_global_output(global_iface, GLB_ACK); ++ glb_req ^= req_mask; ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, ++ req_mask); ++} + -+ if (!nr_objs) -+ return -EINVAL; ++static void enable_endpoints_global( ++ const struct kbase_csf_global_iface *const global_iface, ++ u64 const shader_core_mask) ++{ ++ kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_LO, ++ shader_core_mask & U32_MAX); ++ kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_HI, ++ shader_core_mask >> 32); + -+ objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); -+ if (!objs) -+ return -ENOMEM; ++ set_global_request(global_iface, GLB_REQ_CFG_ALLOC_EN_MASK); ++} + -+ if (copy_from_user(objs, u64_to_user_ptr(cqs_set_info->objs), -+ nr_objs * sizeof(*objs))) { -+ kfree(objs); -+ return -ENOMEM; -+ } ++static void enable_shader_poweroff_timer(struct kbase_device *const kbdev, ++ const struct kbase_csf_global_iface *const global_iface) ++{ ++ u32 pwroff_reg; + -+ /* Check the CQS objects as early as possible. By checking their alignment -+ * (required alignment equals to size for Sync32 and Sync64 objects), we can -+ * prevent overrunning the supplied event page. -+ */ -+ for (i = 0; i < nr_objs; i++) { -+ if (!kbase_kcpu_cqs_is_aligned(objs[i].addr, BASEP_CQS_DATA_TYPE_U32)) { -+ kfree(objs); -+ return -EINVAL; -+ } -+ } ++ if (kbdev->csf.firmware_hctl_core_pwr) ++ pwroff_reg = ++ GLB_PWROFF_TIMER_TIMER_SOURCE_SET(DISABLE_GLB_PWROFF_TIMER, ++ GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); ++ else ++ pwroff_reg = kbdev->csf.mcu_core_pwroff_dur_count; + -+ current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET; -+ current_command->info.cqs_set.nr_objs = nr_objs; -+ current_command->info.cqs_set.objs = objs; ++ kbase_csf_firmware_global_input(global_iface, GLB_PWROFF_TIMER, ++ pwroff_reg); ++ set_global_request(global_iface, GLB_REQ_CFG_PWROFF_TIMER_MASK); + -+ return 0; ++ /* Save the programed reg value in its shadow field */ ++ kbdev->csf.mcu_core_pwroff_reg_shadow = pwroff_reg; ++ ++ dev_dbg(kbdev->dev, "GLB_PWROFF_TIMER set to 0x%.8x\n", pwroff_reg); +} + -+static void cleanup_cqs_wait_operation(struct kbase_kcpu_command_queue *queue, -+ struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation) ++static void set_timeout_global( ++ const struct kbase_csf_global_iface *const global_iface, ++ u64 const timeout) +{ -+ WARN_ON(!cqs_wait_operation->nr_objs); -+ WARN_ON(!cqs_wait_operation->objs); -+ WARN_ON(!cqs_wait_operation->signaled); -+ WARN_ON(!queue->cqs_wait_count); ++ kbase_csf_firmware_global_input(global_iface, GLB_PROGRESS_TIMER, ++ timeout / GLB_PROGRESS_TIMER_TIMEOUT_SCALE); + -+ if (--queue->cqs_wait_count == 0) { -+ kbase_csf_event_wait_remove(queue->kctx, -+ event_cqs_callback, queue); -+ } ++ set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK); ++} + -+ kfree(cqs_wait_operation->signaled); -+ kfree(cqs_wait_operation->objs); -+ cqs_wait_operation->signaled = NULL; -+ cqs_wait_operation->objs = NULL; ++static void enable_gpu_idle_timer(struct kbase_device *const kbdev) ++{ ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ ++ kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER, ++ kbdev->csf.gpu_idle_dur_count); ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE, ++ GLB_REQ_IDLE_ENABLE_MASK); ++ dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x", ++ kbdev->csf.gpu_idle_dur_count); +} + -+static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, -+ struct kbase_kcpu_command_queue *queue, -+ struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation) ++static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask) +{ -+ u32 i; ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ bool complete = false; ++ unsigned long flags; + -+ lockdep_assert_held(&queue->lock); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); + -+ if (WARN_ON(!cqs_wait_operation->objs)) -+ return -EINVAL; ++ if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) == ++ (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask)) ++ complete = true; + -+ /* Skip the CQS waits that have already been signaled when processing */ -+ for (i = find_first_zero_bit(cqs_wait_operation->signaled, cqs_wait_operation->nr_objs); i < cqs_wait_operation->nr_objs; i++) { -+ if (!test_bit(i, cqs_wait_operation->signaled)) { -+ struct kbase_vmap_struct *mapping; -+ bool sig_set; -+ uintptr_t evt = (uintptr_t)kbase_phy_alloc_mapping_get( -+ queue->kctx, cqs_wait_operation->objs[i].addr, &mapping); -+ u64 val = 0; ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); + -+ if (!queue->command_started) { -+ queue->command_started = true; -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START( -+ kbdev, queue); -+ } ++ return complete; ++} + ++static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface, ++ u32 const req_mask) ++{ ++ u32 glb_debug_req; + -+ if (!evt) { -+ dev_warn(kbdev->dev, -+ "Sync memory %llx already freed", cqs_wait_operation->objs[i].addr); -+ queue->has_error = true; -+ return -EINVAL; -+ } ++ kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); + -+ switch (cqs_wait_operation->objs[i].data_type) { -+ default: -+ WARN_ON(!kbase_kcpu_cqs_is_data_type_valid( -+ cqs_wait_operation->objs[i].data_type)); -+ kbase_phy_alloc_mapping_put(queue->kctx, mapping); -+ queue->has_error = true; -+ return -EINVAL; -+ case BASEP_CQS_DATA_TYPE_U32: -+ val = *(u32 *)evt; -+ evt += BASEP_EVENT32_ERR_OFFSET - BASEP_EVENT32_VAL_OFFSET; -+ break; -+ case BASEP_CQS_DATA_TYPE_U64: -+ val = *(u64 *)evt; -+ evt += BASEP_EVENT64_ERR_OFFSET - BASEP_EVENT64_VAL_OFFSET; -+ break; -+ } ++ glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); ++ glb_debug_req ^= req_mask; + -+ switch (cqs_wait_operation->objs[i].operation) { -+ case BASEP_CQS_WAIT_OPERATION_LE: -+ sig_set = val <= cqs_wait_operation->objs[i].val; -+ break; -+ case BASEP_CQS_WAIT_OPERATION_GT: -+ sig_set = val > cqs_wait_operation->objs[i].val; -+ break; -+ default: -+ dev_dbg(kbdev->dev, -+ "Unsupported CQS wait operation %d", cqs_wait_operation->objs[i].operation); ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask); ++} + -+ kbase_phy_alloc_mapping_put(queue->kctx, mapping); -+ queue->has_error = true; ++static void request_fw_core_dump( ++ const struct kbase_csf_global_iface *const global_iface) ++{ ++ uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP); + -+ return -EINVAL; -+ } ++ set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode); + -+ if (sig_set) { -+ bitmap_set(cqs_wait_operation->signaled, i, 1); -+ if ((cqs_wait_operation->inherit_err_flags & (1U << i)) && -+ *(u32 *)evt > 0) { -+ queue->has_error = true; -+ } ++ set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); ++} + -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END( -+ kbdev, queue, *(u32 *)evt); ++int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev) ++{ ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ unsigned long flags; ++ int ret; + -+ queue->command_started = false; -+ } ++ /* Serialize CORE_DUMP requests. */ ++ mutex_lock(&kbdev->csf.reg_lock); + -+ kbase_phy_alloc_mapping_put(queue->kctx, mapping); ++ /* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ request_fw_core_dump(global_iface); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); + -+ if (!sig_set) -+ break; -+ } -+ } ++ /* Wait for firmware to acknowledge completion of the CORE_DUMP request. */ ++ ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); ++ if (!ret) ++ WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK)); + -+ /* For the queue to progress further, all cqs objects should get -+ * signaled. -+ */ -+ return bitmap_full(cqs_wait_operation->signaled, cqs_wait_operation->nr_objs); ++ mutex_unlock(&kbdev->csf.reg_lock); ++ ++ return ret; +} + -+static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue *queue, -+ struct base_kcpu_command_cqs_wait_operation_info *cqs_wait_operation_info, -+ struct kbase_kcpu_command *current_command) ++/** ++ * kbasep_enable_rtu - Enable Ray Tracing Unit on powering up shader core ++ * ++ * @kbdev: The kbase device structure of the device ++ * ++ * This function needs to be called to enable the Ray Tracing Unit ++ * by writing SHADER_PWRFEATURES only when host controls shader cores power. ++ */ ++static void kbasep_enable_rtu(struct kbase_device *kbdev) +{ -+ struct base_cqs_wait_operation_info *objs; -+ unsigned int nr_objs = cqs_wait_operation_info->nr_objs; -+ unsigned int i; ++ const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + -+ lockdep_assert_held(&queue->lock); ++ if (gpu_id < GPU_ID2_PRODUCT_MAKE(12, 8, 3, 0)) ++ return; + -+ if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) -+ return -EINVAL; ++ if (kbdev->csf.firmware_hctl_core_pwr) ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_PWRFEATURES), 1); ++} + -+ if (!nr_objs) -+ return -EINVAL; ++static void global_init(struct kbase_device *const kbdev, u64 core_mask) ++{ ++ u32 const ack_irq_mask = ++ GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | GLB_ACK_IRQ_MASK_PING_MASK | ++ GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | ++ GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | ++ GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK | ++ GLB_REQ_DEBUG_CSF_REQ_MASK | GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK; + -+ objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); -+ if (!objs) -+ return -ENOMEM; ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ unsigned long flags; + -+ if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_operation_info->objs), -+ nr_objs * sizeof(*objs))) { -+ kfree(objs); -+ return -ENOMEM; -+ } ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); + -+ /* Check the CQS objects as early as possible. By checking their alignment -+ * (required alignment equals to size for Sync32 and Sync64 objects), we can -+ * prevent overrunning the supplied event page. ++ kbasep_enable_rtu(kbdev); ++ ++ /* Update shader core allocation enable mask */ ++ enable_endpoints_global(global_iface, core_mask); ++ enable_shader_poweroff_timer(kbdev, global_iface); ++ ++ set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev)); ++ ++ /* The GPU idle timer is always enabled for simplicity. Checks will be ++ * done before scheduling the GPU idle worker to see if it is ++ * appropriate for the current power policy. + */ -+ for (i = 0; i < nr_objs; i++) { -+ if (!kbase_kcpu_cqs_is_data_type_valid(objs[i].data_type) || -+ !kbase_kcpu_cqs_is_aligned(objs[i].addr, objs[i].data_type)) { -+ kfree(objs); -+ return -EINVAL; -+ } -+ } ++ enable_gpu_idle_timer(kbdev); + -+ if (++queue->cqs_wait_count == 1) { -+ if (kbase_csf_event_wait_add(queue->kctx, -+ event_cqs_callback, queue)) { -+ kfree(objs); -+ queue->cqs_wait_count--; -+ return -ENOMEM; -+ } -+ } ++ /* Unmask the interrupts */ ++ kbase_csf_firmware_global_input(global_iface, ++ GLB_ACK_IRQ_MASK, ack_irq_mask); + -+ current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION; -+ current_command->info.cqs_wait_operation.nr_objs = nr_objs; -+ current_command->info.cqs_wait_operation.objs = objs; -+ current_command->info.cqs_wait_operation.inherit_err_flags = -+ cqs_wait_operation_info->inherit_err_flags; ++#if IS_ENABLED(CONFIG_MALI_CORESIGHT) ++ /* Enable FW MCU read/write debug interfaces */ ++ kbase_csf_firmware_global_input_mask( ++ global_iface, GLB_DEBUG_ACK_IRQ_MASK, ++ GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK, ++ GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK); ++#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + -+ current_command->info.cqs_wait_operation.signaled = kcalloc(BITS_TO_LONGS(nr_objs), -+ sizeof(*current_command->info.cqs_wait_operation.signaled), GFP_KERNEL); -+ if (!current_command->info.cqs_wait_operation.signaled) { -+ if (--queue->cqs_wait_count == 0) { -+ kbase_csf_event_wait_remove(queue->kctx, -+ event_cqs_callback, queue); -+ } ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + -+ kfree(objs); -+ return -ENOMEM; -+ } ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++} + -+ return 0; ++/** ++ * global_init_on_boot - Sends a global request to control various features. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface ++ * ++ * Currently only the request to enable endpoints and timeout for GPU progress ++ * timer is sent. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++static int global_init_on_boot(struct kbase_device *const kbdev) ++{ ++ unsigned long flags; ++ u64 core_mask; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ core_mask = kbase_pm_ca_get_core_mask(kbdev); ++ kbdev->csf.firmware_hctl_core_pwr = ++ kbase_pm_no_mcu_core_pwroff(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ global_init(kbdev, core_mask); ++ ++ return wait_for_global_request(kbdev, CSF_GLB_REQ_CFG_MASK); +} + -+static void kbasep_kcpu_cqs_do_set_operation_32(struct kbase_kcpu_command_queue *queue, -+ uintptr_t evt, u8 operation, u64 val) ++void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, ++ u64 core_mask) +{ -+ struct kbase_device *kbdev = queue->kctx->kbdev; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ switch (operation) { -+ case BASEP_CQS_SET_OPERATION_ADD: -+ *(u32 *)evt += (u32)val; -+ break; -+ case BASEP_CQS_SET_OPERATION_SET: -+ *(u32 *)evt = val; -+ break; -+ default: -+ dev_dbg(kbdev->dev, "Unsupported CQS set operation %d", operation); -+ queue->has_error = true; -+ break; -+ } ++ kbdev->csf.glb_init_request_pending = true; ++ kbdev->csf.firmware_hctl_core_pwr = ++ kbase_pm_no_mcu_core_pwroff(kbdev); ++ global_init(kbdev, core_mask); +} + -+static void kbasep_kcpu_cqs_do_set_operation_64(struct kbase_kcpu_command_queue *queue, -+ uintptr_t evt, u8 operation, u64 val) ++bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev = queue->kctx->kbdev; ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ WARN_ON(!kbdev->csf.glb_init_request_pending); + -+ switch (operation) { -+ case BASEP_CQS_SET_OPERATION_ADD: -+ *(u64 *)evt += val; -+ break; -+ case BASEP_CQS_SET_OPERATION_SET: -+ *(u64 *)evt = val; -+ break; -+ default: -+ dev_dbg(kbdev->dev, "Unsupported CQS set operation %d", operation); -+ queue->has_error = true; -+ break; -+ } ++ if (global_request_complete(kbdev, CSF_GLB_REQ_CFG_MASK)) ++ kbdev->csf.glb_init_request_pending = false; ++ ++ return !kbdev->csf.glb_init_request_pending; +} + -+static void kbase_kcpu_cqs_set_operation_process( -+ struct kbase_device *kbdev, -+ struct kbase_kcpu_command_queue *queue, -+ struct kbase_kcpu_command_cqs_set_operation_info *cqs_set_operation) ++void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, ++ bool update_core_pwroff_timer, bool update_core_mask, u64 core_mask) +{ -+ unsigned int i; ++ unsigned long flags; + -+ lockdep_assert_held(&queue->lock); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (WARN_ON(!cqs_set_operation->objs)) -+ return; ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ if (update_core_mask) ++ enable_endpoints_global(&kbdev->csf.global_iface, core_mask); ++ if (update_core_pwroff_timer) ++ enable_shader_poweroff_timer(kbdev, &kbdev->csf.global_iface); + -+ for (i = 0; i < cqs_set_operation->nr_objs; i++) { -+ struct kbase_vmap_struct *mapping; -+ uintptr_t evt; ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++} + -+ evt = (uintptr_t)kbase_phy_alloc_mapping_get( -+ queue->kctx, cqs_set_operation->objs[i].addr, &mapping); ++bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (!evt) { -+ dev_warn(kbdev->dev, -+ "Sync memory %llx already freed", cqs_set_operation->objs[i].addr); -+ queue->has_error = true; -+ } else { -+ struct base_cqs_set_operation_info *obj = &cqs_set_operation->objs[i]; ++ return global_request_complete(kbdev, GLB_REQ_CFG_ALLOC_EN_MASK | ++ GLB_REQ_CFG_PWROFF_TIMER_MASK); ++} + -+ switch (obj->data_type) { -+ default: -+ WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(obj->data_type)); -+ queue->has_error = true; -+ goto skip_err_propagation; -+ case BASEP_CQS_DATA_TYPE_U32: -+ kbasep_kcpu_cqs_do_set_operation_32(queue, evt, obj->operation, -+ obj->val); -+ evt += BASEP_EVENT32_ERR_OFFSET - BASEP_EVENT32_VAL_OFFSET; -+ break; -+ case BASEP_CQS_DATA_TYPE_U64: -+ kbasep_kcpu_cqs_do_set_operation_64(queue, evt, obj->operation, -+ obj->val); -+ evt += BASEP_EVENT64_ERR_OFFSET - BASEP_EVENT64_VAL_OFFSET; -+ break; -+ } ++/** ++ * kbase_csf_firmware_reload_worker() - reload the fw image and re-enable the MCU ++ * @work: CSF Work item for reloading the firmware. ++ * ++ * This helper function will reload the firmware image and re-enable the MCU. ++ * It is supposed to be called after MCU(GPU) has been reset. ++ * Unlike the initial boot the firmware binary image is not parsed completely. ++ * Only the data sections, which were loaded in memory during the initial boot, ++ * are re-initialized either by zeroing them or copying their data from the ++ * firmware binary image. The memory allocation for the firmware pages and ++ * MMU programming is not needed for the reboot, presuming the firmware binary ++ * file on the filesystem would not change. ++ */ ++static void kbase_csf_firmware_reload_worker(struct work_struct *work) ++{ ++ struct kbase_device *kbdev = container_of(work, struct kbase_device, ++ csf.firmware_reload_work); ++ int err; + -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION( -+ kbdev, queue, *(u32 *)evt ? 1 : 0); ++ dev_info(kbdev->dev, "reloading firmware"); + -+ /* Always propagate errors */ -+ *(u32 *)evt = queue->has_error; ++ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING(kbdev, kbase_backend_get_cycle_cnt(kbdev)); + -+skip_err_propagation: -+ kbase_phy_alloc_mapping_put(queue->kctx, mapping); -+ } -+ } ++ /* Reload just the data sections from firmware binary image */ ++ err = reload_fw_image(kbdev); ++ if (err) ++ return; + -+ kbase_csf_event_signal_notify_gpu(queue->kctx); ++ kbase_csf_tl_reader_reset(&kbdev->timeline->csf_tl_reader); + -+ kfree(cqs_set_operation->objs); -+ cqs_set_operation->objs = NULL; ++ /* Reboot the firmware */ ++ kbase_csf_firmware_enable_mcu(kbdev); +} + -+static int kbase_kcpu_cqs_set_operation_prepare( -+ struct kbase_kcpu_command_queue *kcpu_queue, -+ struct base_kcpu_command_cqs_set_operation_info *cqs_set_operation_info, -+ struct kbase_kcpu_command *current_command) ++void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev) +{ -+ struct base_cqs_set_operation_info *objs; -+ unsigned int nr_objs = cqs_set_operation_info->nr_objs; -+ unsigned int i; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ lockdep_assert_held(&kcpu_queue->lock); ++ kbdev->csf.firmware_reloaded = false; + -+ if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) -+ return -EINVAL; ++ if (kbdev->csf.firmware_reload_needed) { ++ kbdev->csf.firmware_reload_needed = false; ++ queue_work(system_wq, &kbdev->csf.firmware_reload_work); ++ } else { ++ kbase_csf_firmware_enable_mcu(kbdev); ++ } ++} + -+ if (!nr_objs) -+ return -EINVAL; ++void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) ++{ ++ u32 version; + -+ objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); -+ if (!objs) -+ return -ENOMEM; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (copy_from_user(objs, u64_to_user_ptr(cqs_set_operation_info->objs), -+ nr_objs * sizeof(*objs))) { -+ kfree(objs); -+ return -ENOMEM; -+ } ++ if (unlikely(!kbdev->csf.firmware_inited)) ++ return; + -+ /* Check the CQS objects as early as possible. By checking their alignment -+ * (required alignment equals to size for Sync32 and Sync64 objects), we can -+ * prevent overrunning the supplied event page. ++ /* Check firmware rebooted properly: we do not expect ++ * the version number to change with a running reboot. + */ -+ for (i = 0; i < nr_objs; i++) { -+ if (!kbase_kcpu_cqs_is_data_type_valid(objs[i].data_type) || -+ !kbase_kcpu_cqs_is_aligned(objs[i].addr, objs[i].data_type)) { -+ kfree(objs); -+ return -EINVAL; -+ } -+ } ++ version = get_firmware_version(kbdev); + -+ current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION; -+ current_command->info.cqs_set_operation.nr_objs = nr_objs; -+ current_command->info.cqs_set_operation.objs = objs; ++ if (version != kbdev->csf.global_iface.version) ++ dev_err(kbdev->dev, "Version check failed in firmware reboot."); + -+ return 0; ++ KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_REBOOT, NULL, 0u); ++ ++ /* Tell MCU state machine to transit to next state */ ++ kbdev->csf.firmware_reloaded = true; ++ kbase_pm_update_state(kbdev); +} + -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+static void kbase_csf_fence_wait_callback(struct fence *fence, -+ struct fence_cb *cb) -+#else -+static void kbase_csf_fence_wait_callback(struct dma_fence *fence, -+ struct dma_fence_cb *cb) -+#endif ++static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_us) +{ -+ struct kbase_kcpu_command_fence_info *fence_info = container_of(cb, -+ struct kbase_kcpu_command_fence_info, fence_cb); -+ struct kbase_kcpu_command_queue *kcpu_queue = fence_info->kcpu_queue; -+ struct kbase_context *const kctx = kcpu_queue->kctx; ++#define HYSTERESIS_VAL_UNIT_SHIFT (10) ++ /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ ++ u64 freq = arch_timer_get_cntfrq(); ++ u64 dur_val = dur_us; ++ u32 cnt_val_u32, reg_val_u32; ++ bool src_system_timestamp = freq > 0; + -+#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG -+ /* Fence gets signaled. Deactivate the timer for fence-wait timeout */ -+ del_timer(&kcpu_queue->fence_timeout); -+#endif -+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue, -+ fence->context, fence->seqno); ++ if (!src_system_timestamp) { ++ /* Get the cycle_counter source alternative */ ++ spin_lock(&kbdev->pm.clk_rtm.lock); ++ if (kbdev->pm.clk_rtm.clks[0]) ++ freq = kbdev->pm.clk_rtm.clks[0]->clock_val; ++ else ++ dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!"); ++ spin_unlock(&kbdev->pm.clk_rtm.lock); + -+ /* Resume kcpu command queue processing. */ -+ queue_work(kcpu_queue->wq, &kcpu_queue->work); ++ dev_info( ++ kbdev->dev, ++ "Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!"); ++ } ++ ++ /* Formula for dur_val = ((dur_us/1000000) * freq_HZ) >> 10) */ ++ dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; ++ dur_val = div_u64(dur_val, 1000000); ++ ++ /* Interface limits the value field to S32_MAX */ ++ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; ++ ++ reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32); ++ /* add the source flag */ ++ if (src_system_timestamp) ++ reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, ++ GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); ++ else ++ reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, ++ GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER); ++ ++ return reg_val_u32; +} + -+static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_queue, -+ struct kbase_kcpu_command_fence_info *fence_info) ++u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) +{ -+ struct kbase_context *const kctx = kcpu_queue->kctx; ++ unsigned long flags; ++ u32 dur; + -+ lockdep_assert_held(&kcpu_queue->lock); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ dur = kbdev->csf.gpu_idle_hysteresis_us; ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); + -+ if (WARN_ON(!fence_info->fence)) -+ return; ++ return dur; ++} + -+ if (kcpu_queue->fence_wait_processed) { -+ bool removed = dma_fence_remove_callback(fence_info->fence, -+ &fence_info->fence_cb); ++u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur) ++{ ++ unsigned long flags; ++ const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur); + -+#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG -+ /* Fence-wait cancelled or fence signaled. In the latter case -+ * the timer would already have been deactivated inside -+ * kbase_csf_fence_wait_callback(). -+ */ -+ del_timer_sync(&kcpu_queue->fence_timeout); -+#endif -+ if (removed) -+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, -+ kcpu_queue, fence_info->fence->context, -+ fence_info->fence->seqno); ++ /* The 'fw_load_lock' is taken to synchronize against the deferred ++ * loading of FW, where the idle timer will be enabled. ++ */ ++ mutex_lock(&kbdev->fw_load_lock); ++ if (unlikely(!kbdev->csf.firmware_inited)) { ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbdev->csf.gpu_idle_hysteresis_us = dur; ++ kbdev->csf.gpu_idle_dur_count = hysteresis_val; ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ mutex_unlock(&kbdev->fw_load_lock); ++ goto end; + } ++ mutex_unlock(&kbdev->fw_load_lock); + -+ /* Release the reference which is kept by the kcpu_queue */ -+ kbase_fence_put(fence_info->fence); -+ kcpu_queue->fence_wait_processed = false; ++ kbase_csf_scheduler_pm_active(kbdev); ++ if (kbase_csf_scheduler_wait_mcu_active(kbdev)) { ++ dev_err(kbdev->dev, ++ "Unable to activate the MCU, the idle hysteresis value shall remain unchanged"); ++ kbase_csf_scheduler_pm_idle(kbdev); ++ return kbdev->csf.gpu_idle_dur_count; ++ } + -+ fence_info->fence = NULL; ++ /* The 'reg_lock' is also taken and is held till the update is not ++ * complete, to ensure the update of idle timer value by multiple Users ++ * gets serialized. ++ */ ++ mutex_lock(&kbdev->csf.reg_lock); ++ /* The firmware only reads the new idle timer value when the timer is ++ * disabled. ++ */ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbase_csf_firmware_disable_gpu_idle_timer(kbdev); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ /* Ensure that the request has taken effect */ ++ wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbdev->csf.gpu_idle_hysteresis_us = dur; ++ kbdev->csf.gpu_idle_dur_count = hysteresis_val; ++ kbase_csf_firmware_enable_gpu_idle_timer(kbdev); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK); ++ mutex_unlock(&kbdev->csf.reg_lock); ++ ++ kbase_csf_scheduler_pm_idle(kbdev); ++ ++end: ++ dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x", ++ hysteresis_val); ++ ++ return hysteresis_val; +} + -+#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG -+/** -+ * fence_timeout_callback() - Timeout callback function for fence-wait -+ * -+ * @timer: Timer struct -+ * -+ * Context and seqno of the timed-out fence will be displayed in dmesg. -+ * If the fence has been signalled a work will be enqueued to process -+ * the fence-wait without displaying debugging information. -+ */ -+static void fence_timeout_callback(struct timer_list *timer) ++static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us) +{ -+ struct kbase_kcpu_command_queue *kcpu_queue = -+ container_of(timer, struct kbase_kcpu_command_queue, fence_timeout); -+ struct kbase_context *const kctx = kcpu_queue->kctx; -+ struct kbase_kcpu_command *cmd = &kcpu_queue->commands[kcpu_queue->start_offset]; -+ struct kbase_kcpu_command_fence_info *fence_info; -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ struct fence *fence; -+#else -+ struct dma_fence *fence; -+#endif -+ struct kbase_sync_fence_info info; ++ /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ ++ u64 freq = arch_timer_get_cntfrq(); ++ u64 dur_val = dur_us; ++ u32 cnt_val_u32, reg_val_u32; ++ bool src_system_timestamp = freq > 0; + -+ if (cmd->type != BASE_KCPU_COMMAND_TYPE_FENCE_WAIT) { -+ dev_err(kctx->kbdev->dev, -+ "%s: Unexpected command type %d in ctx:%d_%d kcpu queue:%u", __func__, -+ cmd->type, kctx->tgid, kctx->id, kcpu_queue->id); -+ return; ++ if (!src_system_timestamp) { ++ /* Get the cycle_counter source alternative */ ++ spin_lock(&kbdev->pm.clk_rtm.lock); ++ if (kbdev->pm.clk_rtm.clks[0]) ++ freq = kbdev->pm.clk_rtm.clks[0]->clock_val; ++ else ++ dev_warn(kbdev->dev, "No GPU clock, unexpected integration issue!"); ++ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ ++ dev_info( ++ kbdev->dev, ++ "Can't get the timestamp frequency, use cycle counter with MCU shader Core Poweroff timer!"); + } + -+ fence_info = &cmd->info.fence; ++ /* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */ ++ dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; ++ dur_val = div_u64(dur_val, 1000000); + -+ fence = kbase_fence_get(fence_info); -+ if (!fence) { -+ dev_err(kctx->kbdev->dev, "no fence found in ctx:%d_%d kcpu queue:%u", kctx->tgid, -+ kctx->id, kcpu_queue->id); -+ return; -+ } ++ /* Interface limits the value field to S32_MAX */ ++ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; + -+ kbase_sync_fence_info_get(fence, &info); ++ reg_val_u32 = GLB_PWROFF_TIMER_TIMEOUT_SET(0, cnt_val_u32); ++ /* add the source flag */ ++ if (src_system_timestamp) ++ reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, ++ GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); ++ else ++ reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, ++ GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER); + -+ if (info.status == 1) { -+ queue_work(kcpu_queue->wq, &kcpu_queue->work); -+ } else if (info.status == 0) { -+ dev_warn(kctx->kbdev->dev, "fence has not yet signalled in %ums", -+ FENCE_WAIT_TIMEOUT_MS); -+ dev_warn(kctx->kbdev->dev, -+ "ctx:%d_%d kcpu queue:%u still waiting for fence[%pK] context#seqno:%s", -+ kctx->tgid, kctx->id, kcpu_queue->id, fence, info.name); -+ } else { -+ dev_warn(kctx->kbdev->dev, "fence has got error"); -+ dev_warn(kctx->kbdev->dev, -+ "ctx:%d_%d kcpu queue:%u faulty fence[%pK] context#seqno:%s error(%d)", -+ kctx->tgid, kctx->id, kcpu_queue->id, fence, info.name, info.status); -+ } ++ return reg_val_u32; ++} + -+ kbase_fence_put(fence); ++u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev) ++{ ++ u32 pwroff; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ pwroff = kbdev->csf.mcu_core_pwroff_dur_us; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ return pwroff; +} + -+/** -+ * fence_timeout_start() - Start a timer to check fence-wait timeout -+ * -+ * @cmd: KCPU command queue -+ * -+ * Activate a timer to check whether a fence-wait command in the queue -+ * gets completed within FENCE_WAIT_TIMEOUT_MS -+ */ -+static void fence_timeout_start(struct kbase_kcpu_command_queue *cmd) ++u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur) +{ -+ mod_timer(&cmd->fence_timeout, jiffies + msecs_to_jiffies(FENCE_WAIT_TIMEOUT_MS)); ++ unsigned long flags; ++ const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->csf.mcu_core_pwroff_dur_us = dur; ++ kbdev->csf.mcu_core_pwroff_dur_count = pwroff; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff); ++ ++ return pwroff; +} -+#endif + +/** -+ * kbase_kcpu_fence_wait_process() - Process the kcpu fence wait command -+ * -+ * @kcpu_queue: The queue containing the fence wait command -+ * @fence_info: Reference to a fence for which the command is waiting ++ * kbase_device_csf_iterator_trace_init - Send request to enable iterator ++ * trace port. ++ * @kbdev: Kernel base device pointer + * -+ * Return: 0 if fence wait is blocked, 1 if it is unblocked, negative error if -+ * an error has occurred and fence should no longer be waited on. ++ * Return: 0 on success (or if enable request is not sent), or error ++ * code -EINVAL on failure of GPU to acknowledge enable request. + */ -+static int kbase_kcpu_fence_wait_process( -+ struct kbase_kcpu_command_queue *kcpu_queue, -+ struct kbase_kcpu_command_fence_info *fence_info) ++static int kbase_device_csf_iterator_trace_init(struct kbase_device *kbdev) +{ -+ int fence_status = 0; -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ struct fence *fence; -+#else -+ struct dma_fence *fence; -+#endif -+ struct kbase_context *const kctx = kcpu_queue->kctx; ++ /* Enable the iterator trace port if supported by the GPU. ++ * It requires the GPU to have a nonzero "iter_trace_enable" ++ * property in the device tree, and the FW must advertise ++ * this feature in GLB_FEATURES. ++ */ ++ if (kbdev->pm.backend.gpu_powered) { ++ /* check device tree for iterator trace enable property */ ++ const void *iter_trace_param = of_get_property( ++ kbdev->dev->of_node, ++ "iter_trace_enable", NULL); + -+ lockdep_assert_held(&kcpu_queue->lock); ++ const struct kbase_csf_global_iface *iface = ++ &kbdev->csf.global_iface; + -+ if (WARN_ON(!fence_info->fence)) -+ return -EINVAL; ++ if (iter_trace_param) { ++ u32 iter_trace_value = be32_to_cpup(iter_trace_param); + -+ fence = fence_info->fence; ++ if ((iface->features & ++ GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) && ++ iter_trace_value) { ++ long ack_timeout; + -+ if (kcpu_queue->fence_wait_processed) { -+ fence_status = dma_fence_get_status(fence); -+ } else { -+ int cb_err = dma_fence_add_callback(fence, -+ &fence_info->fence_cb, -+ kbase_csf_fence_wait_callback); ++ ack_timeout = kbase_csf_timeout_in_jiffies( ++ kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT)); + -+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, -+ KCPU_FENCE_WAIT_START, kcpu_queue, -+ fence->context, fence->seqno); -+ fence_status = cb_err; -+ if (cb_err == 0) { -+ kcpu_queue->fence_wait_processed = true; -+#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG -+ fence_timeout_start(kcpu_queue); -+#endif -+ } else if (cb_err == -ENOENT) { -+ fence_status = dma_fence_get_status(fence); -+ if (!fence_status) { -+ struct kbase_sync_fence_info info; ++ /* write enable request to global input */ ++ kbase_csf_firmware_global_input_mask( ++ iface, GLB_REQ, ++ GLB_REQ_ITER_TRACE_ENABLE_MASK, ++ GLB_REQ_ITER_TRACE_ENABLE_MASK); ++ /* Ring global doorbell */ ++ kbase_csf_ring_doorbell(kbdev, ++ CSF_KERNEL_DOORBELL_NR); ++ ++ ack_timeout = wait_event_timeout( ++ kbdev->csf.event_wait, ++ !((kbase_csf_firmware_global_input_read( ++ iface, GLB_REQ) ^ ++ kbase_csf_firmware_global_output( ++ iface, GLB_ACK)) & ++ GLB_REQ_ITER_TRACE_ENABLE_MASK), ++ ack_timeout); ++ ++ return ack_timeout ? 0 : -EINVAL; + -+ kbase_sync_fence_info_get(fence, &info); -+ dev_warn(kctx->kbdev->dev, -+ "Unexpected status for fence %s of ctx:%d_%d kcpu queue:%u", -+ info.name, kctx->tgid, kctx->id, kcpu_queue->id); + } + } ++ + } ++ return 0; ++} + -+ /* -+ * At this point fence status can contain 3 types of values: -+ * - Value 0 to represent that fence in question is not signalled yet -+ * - Value 1 to represent that fence in question is signalled without -+ * errors -+ * - Negative error code to represent that some error has occurred such -+ * that waiting on it is no longer valid. -+ */ ++int kbase_csf_firmware_early_init(struct kbase_device *kbdev) ++{ ++ init_waitqueue_head(&kbdev->csf.event_wait); ++ kbdev->csf.interrupt_received = false; + -+ if (fence_status) -+ kbasep_kcpu_fence_wait_cancel(kcpu_queue, fence_info); ++ kbdev->csf.fw_timeout_ms = ++ kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT); + -+ return fence_status; -+} ++ kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US; ++ kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count( ++ kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US); + -+static int kbase_kcpu_fence_wait_prepare(struct kbase_kcpu_command_queue *kcpu_queue, -+ struct base_kcpu_command_fence_info *fence_info, -+ struct kbase_kcpu_command *current_command) -+{ -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ struct fence *fence_in; -+#else -+ struct dma_fence *fence_in; -+#endif -+ struct base_fence fence; ++ INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces); ++ INIT_LIST_HEAD(&kbdev->csf.firmware_config); ++ INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata); ++ INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list); ++ INIT_LIST_HEAD(&kbdev->csf.user_reg.list); ++ INIT_WORK(&kbdev->csf.firmware_reload_work, ++ kbase_csf_firmware_reload_worker); ++ INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); + -+ lockdep_assert_held(&kcpu_queue->lock); ++ mutex_init(&kbdev->csf.reg_lock); + -+ if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence))) -+ return -ENOMEM; ++ kbdev->csf.fw = (struct kbase_csf_mcu_fw){ .data = NULL }; + -+ fence_in = sync_file_get_fence(fence.basep.fd); ++ return 0; ++} + -+ if (!fence_in) -+ return -ENOENT; ++void kbase_csf_firmware_early_term(struct kbase_device *kbdev) ++{ ++ mutex_destroy(&kbdev->csf.reg_lock); ++} ++ ++int kbase_csf_firmware_late_init(struct kbase_device *kbdev) ++{ ++ kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC; ++#ifdef KBASE_PM_RUNTIME ++ if (kbase_pm_gpu_sleep_allowed(kbdev)) ++ kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; ++#endif ++ WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us); ++ kbdev->csf.gpu_idle_dur_count = ++ convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us); + -+ current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_WAIT; -+ current_command->info.fence.fence = fence_in; -+ current_command->info.fence.kcpu_queue = kcpu_queue; + return 0; +} + -+static int kbasep_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, -+ struct kbase_kcpu_command_fence_info *fence_info) ++int kbase_csf_firmware_load_init(struct kbase_device *kbdev) +{ -+ struct kbase_context *const kctx = kcpu_queue->kctx; ++ const struct firmware *firmware = NULL; ++ struct kbase_csf_mcu_fw *const mcu_fw = &kbdev->csf.fw; ++ const u32 magic = FIRMWARE_HEADER_MAGIC; ++ u8 version_major, version_minor; ++ u32 version_hash; ++ u32 entry_end_offset; ++ u32 entry_offset; + int ret; + -+ if (WARN_ON(!fence_info->fence)) ++ lockdep_assert_held(&kbdev->fw_load_lock); ++ ++ if (WARN_ON((kbdev->as_free & MCU_AS_BITMASK) == 0)) + return -EINVAL; ++ kbdev->as_free &= ~MCU_AS_BITMASK; + -+ ret = dma_fence_signal(fence_info->fence); ++ ret = kbase_mmu_init(kbdev, &kbdev->csf.mcu_mmu, NULL, ++ BASE_MEM_GROUP_DEFAULT); + -+ if (unlikely(ret < 0)) { -+ dev_warn(kctx->kbdev->dev, "dma_fence(%d) has been signalled already\n", ret); -+ /* Treated as a success */ -+ ret = 0; ++ if (ret != 0) { ++ /* Release the address space */ ++ kbdev->as_free |= MCU_AS_BITMASK; ++ return ret; + } + -+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_SIGNAL, kcpu_queue, -+ fence_info->fence->context, -+ fence_info->fence->seqno); ++ ret = kbase_mcu_shared_interface_region_tracker_init(kbdev); ++ if (ret != 0) { ++ dev_err(kbdev->dev, ++ "Failed to setup the rb tree for managing shared interface segment\n"); ++ goto err_out; ++ } + -+ /* dma_fence refcount needs to be decreased to release it. */ -+ kbase_fence_put(fence_info->fence); -+ fence_info->fence = NULL; ++ if (request_firmware(&firmware, fw_name, kbdev->dev) != 0) { ++ dev_err(kbdev->dev, ++ "Failed to load firmware image '%s'\n", ++ fw_name); ++ ret = -ENOENT; ++ } else { ++ /* Try to save a copy and then release the loaded firmware image */ ++ mcu_fw->size = firmware->size; ++ mcu_fw->data = vmalloc((unsigned long)mcu_fw->size); + -+ return ret; -+} ++ if (mcu_fw->data == NULL) { ++ ret = -ENOMEM; ++ } else { ++ memcpy(mcu_fw->data, firmware->data, mcu_fw->size); ++ dev_dbg(kbdev->dev, "Firmware image (%zu-bytes) retained in csf.fw\n", ++ mcu_fw->size); ++ } + -+static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, -+ struct kbase_kcpu_command *current_command, -+ struct base_fence *fence, struct sync_file **sync_file, -+ int *fd) -+{ -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ struct fence *fence_out; -+#else -+ struct dma_fence *fence_out; -+#endif -+ struct kbase_kcpu_dma_fence *kcpu_fence; -+ int ret = 0; ++ release_firmware(firmware); ++ } + -+ lockdep_assert_held(&kcpu_queue->lock); ++ /* If error in loading or saving the image, branches to error out */ ++ if (ret) ++ goto err_out; + -+ kcpu_fence = kzalloc(sizeof(*kcpu_fence), GFP_KERNEL); -+ if (!kcpu_fence) -+ return -ENOMEM; ++ if (mcu_fw->size < FIRMWARE_HEADER_LENGTH) { ++ dev_err(kbdev->dev, "Firmware too small\n"); ++ ret = -EINVAL; ++ goto err_out; ++ } + -+ /* Set reference to KCPU metadata and increment refcount */ -+ kcpu_fence->metadata = kcpu_queue->metadata; -+ WARN_ON(!kbase_refcount_inc_not_zero(&kcpu_fence->metadata->refcount)); ++ if (memcmp(mcu_fw->data, &magic, sizeof(magic)) != 0) { ++ dev_err(kbdev->dev, "Incorrect firmware magic\n"); ++ ret = -EINVAL; ++ goto err_out; ++ } + -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ fence_out = (struct fence *)kcpu_fence; -+#else -+ fence_out = (struct dma_fence *)kcpu_fence; -+#endif ++ version_minor = mcu_fw->data[4]; ++ version_major = mcu_fw->data[5]; + -+ dma_fence_init(fence_out, -+ &kbase_fence_ops, -+ &kbase_csf_fence_lock, -+ kcpu_queue->fence_context, -+ ++kcpu_queue->fence_seqno); ++ if (version_major != FIRMWARE_HEADER_VERSION_MAJOR || ++ version_minor != FIRMWARE_HEADER_VERSION_MINOR) { ++ dev_err(kbdev->dev, ++ "Firmware header version %d.%d not understood\n", ++ version_major, version_minor); ++ ret = -EINVAL; ++ goto err_out; ++ } + -+#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) -+ /* Take an extra reference to the fence on behalf of the sync file. -+ * This is only needded on older kernels where sync_file_create() -+ * does not take its own reference. This was changed in v4.9.68 -+ * where sync_file_create() now takes its own reference. -+ */ -+ dma_fence_get(fence_out); -+#endif ++ memcpy(&version_hash, &mcu_fw->data[8], sizeof(version_hash)); + -+ /* create a sync_file fd representing the fence */ -+ *sync_file = sync_file_create(fence_out); -+ if (!(*sync_file)) { -+ ret = -ENOMEM; -+ goto file_create_fail; ++ dev_notice(kbdev->dev, "Loading Mali firmware 0x%x", version_hash); ++ ++ memcpy(&entry_end_offset, &mcu_fw->data[0x10], sizeof(entry_end_offset)); ++ ++ if (entry_end_offset > mcu_fw->size) { ++ dev_err(kbdev->dev, "Firmware image is truncated\n"); ++ ret = -EINVAL; ++ goto err_out; + } + -+ *fd = get_unused_fd_flags(O_CLOEXEC); -+ if (*fd < 0) { -+ ret = *fd; -+ goto fd_flags_fail; ++ entry_offset = FIRMWARE_HEADER_LENGTH; ++ while (entry_offset < entry_end_offset) { ++ u32 header; ++ unsigned int size; ++ ++ memcpy(&header, &mcu_fw->data[entry_offset], sizeof(header)); ++ ++ size = entry_size(header); ++ ++ ret = load_firmware_entry(kbdev, mcu_fw, entry_offset, header); ++ if (ret != 0) { ++ dev_err(kbdev->dev, "Failed to load firmware image\n"); ++ goto err_out; ++ } ++ entry_offset += size; + } + -+ fence->basep.fd = *fd; ++ if (!kbdev->csf.shared_interface) { ++ dev_err(kbdev->dev, "Shared interface region not found\n"); ++ ret = -EINVAL; ++ goto err_out; ++ } else { ++ ret = setup_shared_iface_static_region(kbdev); ++ if (ret != 0) { ++ dev_err(kbdev->dev, "Failed to insert a region for shared iface entry parsed from fw image\n"); ++ goto err_out; ++ } ++ } + -+ current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL; -+ current_command->info.fence.fence = fence_out; ++ ret = kbase_csf_firmware_trace_buffers_init(kbdev); ++ if (ret != 0) { ++ dev_err(kbdev->dev, "Failed to initialize trace buffers\n"); ++ goto err_out; ++ } + -+ return 0; ++ /* Make sure L2 cache is powered up */ ++ kbase_pm_wait_for_l2_powered(kbdev); + -+fd_flags_fail: -+ fput((*sync_file)->file); -+file_create_fail: -+ /* -+ * Upon failure, dma_fence refcount that was increased by -+ * dma_fence_get() or sync_file_create() needs to be decreased -+ * to release it. -+ */ -+ kbase_fence_put(fence_out); -+ current_command->info.fence.fence = NULL; ++ /* Load the MMU tables into the selected address space */ ++ ret = load_mmu_tables(kbdev); ++ if (ret != 0) ++ goto err_out; + -+ return ret; -+} ++ boot_csf_firmware(kbdev); + -+static int kbase_kcpu_fence_signal_prepare(struct kbase_kcpu_command_queue *kcpu_queue, -+ struct base_kcpu_command_fence_info *fence_info, -+ struct kbase_kcpu_command *current_command) -+{ -+ struct base_fence fence; -+ struct sync_file *sync_file = NULL; -+ int fd; -+ int ret = 0; ++ ret = parse_capabilities(kbdev); ++ if (ret != 0) ++ goto err_out; + -+ lockdep_assert_held(&kcpu_queue->lock); ++ ret = kbase_csf_doorbell_mapping_init(kbdev); ++ if (ret != 0) ++ goto err_out; + -+ if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence))) -+ return -EFAULT; ++ ret = kbase_csf_scheduler_init(kbdev); ++ if (ret != 0) ++ goto err_out; + -+ ret = kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, &fence, &sync_file, &fd); -+ if (ret) -+ return ret; ++ ret = kbase_csf_setup_dummy_user_reg_page(kbdev); ++ if (ret != 0) ++ goto err_out; + -+ if (copy_to_user(u64_to_user_ptr(fence_info->fence), &fence, -+ sizeof(fence))) { -+ ret = -EFAULT; -+ goto fail; ++ ret = kbase_csf_timeout_init(kbdev); ++ if (ret != 0) ++ goto err_out; ++ ++ ret = global_init_on_boot(kbdev); ++ if (ret != 0) ++ goto err_out; ++ ++ ret = kbase_csf_firmware_cfg_init(kbdev); ++ if (ret != 0) ++ goto err_out; ++ ++ ret = kbase_device_csf_iterator_trace_init(kbdev); ++ if (ret != 0) ++ goto err_out; ++ ++ ret = kbase_csf_firmware_log_init(kbdev); ++ if (ret != 0) { ++ dev_err(kbdev->dev, "Failed to initialize FW trace (err %d)", ret); ++ goto err_out; + } + -+ /* 'sync_file' pointer can't be safely dereferenced once 'fd' is -+ * installed, so the install step needs to be done at the last -+ * before returning success. -+ */ -+ fd_install(fd, sync_file->file); -+ return 0; ++ if (kbdev->csf.fw_core_dump.available) ++ kbase_csf_firmware_core_dump_init(kbdev); + -+fail: -+ fput(sync_file->file); -+ kbase_fence_put(current_command->info.fence.fence); -+ current_command->info.fence.fence = NULL; ++ /* Firmware loaded successfully, ret = 0 */ ++ KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL, ++ (((u64)version_hash) << 32) | ++ (((u64)version_major) << 8) | version_minor); ++ return 0; + ++err_out: ++ kbase_csf_firmware_unload_term(kbdev); + return ret; +} + -+int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, -+ struct kbase_kcpu_command_fence_info *fence_info) ++void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) +{ -+ if (!kcpu_queue || !fence_info) -+ return -EINVAL; -+ -+ return kbasep_kcpu_fence_signal_process(kcpu_queue, fence_info); -+} -+KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_process); ++ unsigned long flags; ++ int ret = 0; + -+int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, -+ struct kbase_kcpu_command *current_command, -+ struct base_fence *fence, struct sync_file **sync_file, int *fd) -+{ -+ if (!kcpu_queue || !current_command || !fence || !sync_file || !fd) -+ return -EINVAL; ++ cancel_work_sync(&kbdev->csf.fw_error_work); + -+ return kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, fence, sync_file, fd); -+} -+KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_init); -+#endif /* CONFIG_SYNC_FILE */ ++ ret = kbase_reset_gpu_wait(kbdev); + -+static void kcpu_queue_process_worker(struct work_struct *data) -+{ -+ struct kbase_kcpu_command_queue *queue = container_of(data, -+ struct kbase_kcpu_command_queue, work); ++ WARN(ret, "failed to wait for GPU reset"); + -+ mutex_lock(&queue->lock); -+ kcpu_queue_process(queue, false); -+ mutex_unlock(&queue->lock); -+} ++ kbase_csf_firmware_log_term(kbdev); + -+static int delete_queue(struct kbase_context *kctx, u32 id) -+{ -+ int err = 0; ++ kbase_csf_firmware_cfg_term(kbdev); + -+ mutex_lock(&kctx->csf.kcpu_queues.lock); ++ kbase_csf_timeout_term(kbdev); + -+ if ((id < KBASEP_MAX_KCPU_QUEUES) && kctx->csf.kcpu_queues.array[id]) { -+ struct kbase_kcpu_command_queue *queue = -+ kctx->csf.kcpu_queues.array[id]; ++ kbase_csf_free_dummy_user_reg_page(kbdev); + -+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DELETE, -+ queue, queue->num_pending_cmds, queue->cqs_wait_count); ++ kbase_csf_scheduler_term(kbdev); + -+ /* Disassociate the queue from the system to prevent further -+ * submissions. Draining pending commands would be acceptable -+ * even if a new queue is created using the same ID. -+ */ -+ kctx->csf.kcpu_queues.array[id] = NULL; -+ bitmap_clear(kctx->csf.kcpu_queues.in_use, id, 1); ++ kbase_csf_doorbell_mapping_term(kbdev); + -+ mutex_unlock(&kctx->csf.kcpu_queues.lock); ++ /* Explicitly trigger the disabling of MCU through the state machine and ++ * wait for its completion. It may not have been disabled yet due to the ++ * power policy. ++ */ ++ kbdev->pm.backend.mcu_desired = false; ++ kbase_pm_wait_for_desired_state(kbdev); + -+ mutex_lock(&queue->lock); ++ free_global_iface(kbdev); + -+ /* Metadata struct may outlive KCPU queue. */ -+ kbase_kcpu_dma_fence_meta_put(queue->metadata); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->csf.firmware_inited = false; ++ if (WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_OFF)) { ++ kbdev->pm.backend.mcu_state = KBASE_MCU_OFF; ++ stop_csf_firmware(kbdev); ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ /* Drain the remaining work for this queue first and go past -+ * all the waits. -+ */ -+ kcpu_queue_process(queue, true); ++ unload_mmu_tables(kbdev); + -+ /* All commands should have been processed */ -+ WARN_ON(queue->num_pending_cmds); ++ kbase_csf_firmware_trace_buffers_term(kbdev); + -+ /* All CQS wait commands should have been cleaned up */ -+ WARN_ON(queue->cqs_wait_count); ++ while (!list_empty(&kbdev->csf.firmware_interfaces)) { ++ struct kbase_csf_firmware_interface *interface; + -+ /* Fire the tracepoint with the mutex held to enforce correct -+ * ordering with the summary stream. -+ */ -+ KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE(kctx->kbdev, queue); ++ interface = ++ list_first_entry(&kbdev->csf.firmware_interfaces, ++ struct kbase_csf_firmware_interface, ++ node); ++ list_del(&interface->node); + -+ mutex_unlock(&queue->lock); ++ vunmap(interface->kernel_map); + -+ cancel_work_sync(&queue->work); -+ destroy_workqueue(queue->wq); ++ if (!interface->reuse_pages) { ++ if (interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) { ++ kbase_csf_protected_memory_free( ++ kbdev, interface->pma, interface->num_pages_aligned, ++ interface->is_small_page); ++ } else { ++ kbase_mem_pool_free_pages( ++ kbase_mem_pool_group_select( ++ kbdev, KBASE_MEM_GROUP_CSF_FW, ++ interface->is_small_page), ++ interface->num_pages_aligned, ++ interface->phys, ++ true, false); ++ } + -+ mutex_destroy(&queue->lock); ++ kfree(interface->phys); ++ } + -+ kfree(queue); -+ } else { -+ dev_dbg(kctx->kbdev->dev, -+ "Attempt to delete a non-existent KCPU queue"); -+ mutex_unlock(&kctx->csf.kcpu_queues.lock); -+ err = -EINVAL; ++ kfree(interface); + } -+ return err; -+} + -+static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO( -+ struct kbase_device *kbdev, -+ const struct kbase_kcpu_command_queue *queue, -+ const struct kbase_kcpu_command_jit_alloc_info *jit_alloc, -+ int alloc_status) -+{ -+ u8 i; ++ while (!list_empty(&kbdev->csf.firmware_timeline_metadata)) { ++ struct firmware_timeline_metadata *metadata; + -+ KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(kbdev, queue); -+ for (i = 0; i < jit_alloc->count; i++) { -+ const u8 id = jit_alloc->info[i].id; -+ const struct kbase_va_region *reg = queue->kctx->jit_alloc[id]; -+ u64 gpu_alloc_addr = 0; -+ u64 mmu_flags = 0; ++ metadata = list_first_entry( ++ &kbdev->csf.firmware_timeline_metadata, ++ struct firmware_timeline_metadata, ++ node); ++ list_del(&metadata->node); + -+ if ((alloc_status == 0) && !WARN_ON(!reg) && -+ !WARN_ON(reg == KBASE_RESERVED_REG_JIT_ALLOC)) { -+#ifdef CONFIG_MALI_VECTOR_DUMP -+ struct tagged_addr phy = {0}; -+#endif /* CONFIG_MALI_VECTOR_DUMP */ ++ kfree(metadata); ++ } + -+ gpu_alloc_addr = reg->start_pfn << PAGE_SHIFT; -+#ifdef CONFIG_MALI_VECTOR_DUMP -+ mmu_flags = kbase_mmu_create_ate(kbdev, -+ phy, reg->flags, -+ MIDGARD_MMU_BOTTOMLEVEL, -+ queue->kctx->jit_group_id); -+#endif /* CONFIG_MALI_VECTOR_DUMP */ -+ } -+ KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( -+ kbdev, queue, alloc_status, gpu_alloc_addr, mmu_flags); ++ if (kbdev->csf.fw.data) { ++ /* Free the copy of the firmware image */ ++ vfree(kbdev->csf.fw.data); ++ kbdev->csf.fw.data = NULL; ++ dev_dbg(kbdev->dev, "Free retained image csf.fw (%zu-bytes)\n", kbdev->csf.fw.size); + } -+} + -+static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( -+ struct kbase_device *kbdev, -+ const struct kbase_kcpu_command_queue *queue) -+{ -+ KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(kbdev, queue); ++ /* This will also free up the region allocated for the shared interface ++ * entry parsed from the firmware image. ++ */ ++ kbase_mcu_shared_interface_region_tracker_term(kbdev); ++ ++ kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu); ++ ++ /* Release the address space */ ++ kbdev->as_free |= MCU_AS_BITMASK; +} + -+static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END( -+ struct kbase_device *kbdev, -+ const struct kbase_kcpu_command_queue *queue) ++#if IS_ENABLED(CONFIG_MALI_CORESIGHT) ++int kbase_csf_firmware_mcu_register_write(struct kbase_device *const kbdev, u32 const reg_addr, ++ u32 const reg_val) +{ -+ KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(kbdev, queue); ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ unsigned long flags; ++ int err; ++ u32 glb_req; ++ ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ ++ /* Set the address and value to write */ ++ kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr); ++ kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN1, reg_val); ++ ++ /* Set the Global Debug request for FW MCU write */ ++ glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); ++ glb_req ^= GLB_DEBUG_REQ_FW_AS_WRITE_MASK; ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req, ++ GLB_DEBUG_REQ_FW_AS_WRITE_MASK); ++ ++ set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); ++ ++ /* Notify FW about the Global Debug request */ ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ ++ err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); ++ ++ mutex_unlock(&kbdev->csf.reg_lock); ++ ++ dev_dbg(kbdev->dev, "w: reg %08x val %08x", reg_addr, reg_val); ++ ++ return err; +} + -+static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, -+ bool drain_queue) ++int kbase_csf_firmware_mcu_register_read(struct kbase_device *const kbdev, u32 const reg_addr, ++ u32 *reg_val) +{ -+ struct kbase_device *kbdev = queue->kctx->kbdev; -+ bool process_next = true; -+ size_t i; ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ unsigned long flags; ++ int err; ++ u32 glb_req; + -+ lockdep_assert_held(&queue->lock); ++ if (WARN_ON(reg_val == NULL)) ++ return -EINVAL; + -+ for (i = 0; i != queue->num_pending_cmds; ++i) { -+ struct kbase_kcpu_command *cmd = -+ &queue->commands[(u8)(queue->start_offset + i)]; -+ int status; ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); + -+ switch (cmd->type) { -+ case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: -+ if (!queue->command_started) { -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START(kbdev, -+ queue); -+ queue->command_started = true; -+ } ++ /* Set the address to read */ ++ kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr); + -+ status = 0; -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ if (drain_queue) { -+ kbasep_kcpu_fence_wait_cancel(queue, &cmd->info.fence); -+ } else { -+ status = kbase_kcpu_fence_wait_process(queue, -+ &cmd->info.fence); ++ /* Set the Global Debug request for FW MCU read */ ++ glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); ++ glb_req ^= GLB_DEBUG_REQ_FW_AS_READ_MASK; ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req, ++ GLB_DEBUG_REQ_FW_AS_READ_MASK); + -+ if (status == 0) -+ process_next = false; -+ else if (status < 0) -+ queue->has_error = true; -+ } -+#else -+ dev_warn(kbdev->dev, -+ "unexpected fence wait command found\n"); ++ set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); + -+ status = -EINVAL; -+ queue->has_error = true; -+#endif ++ /* Notify FW about the Global Debug request */ ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + -+ if (process_next) { -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END( -+ kbdev, queue, status < 0 ? status : 0); -+ queue->command_started = false; -+ } -+ break; -+ case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START(kbdev, queue); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); + -+ status = 0; ++ err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); + -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ status = kbasep_kcpu_fence_signal_process(queue, &cmd->info.fence); ++ if (!err) { ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ *reg_val = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ARG_OUT0); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ } + -+ if (status < 0) -+ queue->has_error = true; -+#else -+ dev_warn(kbdev->dev, -+ "unexpected fence signal command found\n"); ++ mutex_unlock(&kbdev->csf.reg_lock); + -+ status = -EINVAL; -+ queue->has_error = true; -+#endif ++ dev_dbg(kbdev->dev, "r: reg %08x val %08x", reg_addr, *reg_val); + -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(kbdev, queue, -+ status); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: -+ status = kbase_kcpu_cqs_wait_process(kbdev, queue, -+ &cmd->info.cqs_wait); ++ return err; ++} + -+ if (!status && !drain_queue) { -+ process_next = false; -+ } else { -+ /* Either all CQS objects were signaled or -+ * there was an error or the queue itself is -+ * being deleted. -+ * In all cases can move to the next command. -+ * TBD: handle the error -+ */ -+ cleanup_cqs_wait(queue, &cmd->info.cqs_wait); -+ } ++int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 const reg_addr, ++ u32 const val_mask, u32 const reg_val) ++{ ++ unsigned long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms) + jiffies; ++ u32 read_val; + -+ break; -+ case BASE_KCPU_COMMAND_TYPE_CQS_SET: -+ kbase_kcpu_cqs_set_process(kbdev, queue, -+ &cmd->info.cqs_set); ++ dev_dbg(kbdev->dev, "p: reg %08x val %08x mask %08x", reg_addr, reg_val, val_mask); + -+ break; -+ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: -+ status = kbase_kcpu_cqs_wait_operation_process(kbdev, queue, -+ &cmd->info.cqs_wait_operation); ++ while (time_before(jiffies, remaining)) { ++ int err = kbase_csf_firmware_mcu_register_read(kbdev, reg_addr, &read_val); + -+ if (!status && !drain_queue) { -+ process_next = false; -+ } else { -+ /* Either all CQS objects were signaled or -+ * there was an error or the queue itself is -+ * being deleted. -+ * In all cases can move to the next command. -+ * TBD: handle the error -+ */ -+ cleanup_cqs_wait_operation(queue, &cmd->info.cqs_wait_operation); -+ } ++ if (err) { ++ dev_err(kbdev->dev, ++ "Error reading MCU register value (read_val = %u, expect = %u)\n", ++ read_val, reg_val); ++ return err; ++ } + -+ break; -+ case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: -+ kbase_kcpu_cqs_set_operation_process(kbdev, queue, -+ &cmd->info.cqs_set_operation); ++ if ((read_val & val_mask) == reg_val) ++ return 0; ++ } + -+ break; -+ case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: -+ /* Clear the queue's error state */ -+ queue->has_error = false; ++ dev_err(kbdev->dev, ++ "Timeout waiting for MCU register value to be set (read_val = %u, expect = %u)\n", ++ read_val, reg_val); + -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(kbdev, queue); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: { -+ struct kbase_ctx_ext_res_meta *meta = NULL; ++ return -ETIMEDOUT; ++} ++#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + -+ if (!drain_queue) { -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(kbdev, -+ queue); ++void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ const u32 glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); + -+ kbase_gpu_vm_lock(queue->kctx); -+ meta = kbase_sticky_resource_acquire( -+ queue->kctx, cmd->info.import.gpu_va); -+ kbase_gpu_vm_unlock(queue->kctx); ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ /* The scheduler is assumed to only call the enable when its internal ++ * state indicates that the idle timer has previously been disabled. So ++ * on entry the expected field values are: ++ * 1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0 ++ * 2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0 ++ */ ++ if (glb_req & GLB_REQ_IDLE_ENABLE_MASK) ++ dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!"); + -+ if (meta == NULL) { -+ queue->has_error = true; -+ dev_dbg( -+ kbdev->dev, -+ "failed to map an external resource"); -+ } ++ enable_gpu_idle_timer(kbdev); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++} + -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END( -+ kbdev, queue, meta ? 0 : 1); -+ } -+ break; -+ } -+ case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: { -+ bool ret; ++void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(kbdev, queue); ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); + -+ kbase_gpu_vm_lock(queue->kctx); -+ ret = kbase_sticky_resource_release( -+ queue->kctx, NULL, cmd->info.import.gpu_va); -+ kbase_gpu_vm_unlock(queue->kctx); ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, ++ GLB_REQ_REQ_IDLE_DISABLE, ++ GLB_REQ_IDLE_DISABLE_MASK); ++ dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer"); + -+ if (!ret) { -+ queue->has_error = true; -+ dev_dbg(kbdev->dev, -+ "failed to release the reference. resource not found"); -+ } ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++} + -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(kbdev, queue, -+ ret ? 0 : 1); -+ break; -+ } -+ case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: { -+ bool ret; ++void kbase_csf_firmware_ping(struct kbase_device *const kbdev) ++{ ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ unsigned long flags; + -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(kbdev, -+ queue); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ set_global_request(global_iface, GLB_REQ_PING_MASK); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++} + -+ kbase_gpu_vm_lock(queue->kctx); -+ ret = kbase_sticky_resource_release_force( -+ queue->kctx, NULL, cmd->info.import.gpu_va); -+ kbase_gpu_vm_unlock(queue->kctx); ++int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int wait_timeout_ms) ++{ ++ kbase_csf_firmware_ping(kbdev); + -+ if (!ret) { -+ queue->has_error = true; -+ dev_dbg(kbdev->dev, -+ "failed to release the reference. resource not found"); -+ } ++ return wait_for_global_request_with_timeout(kbdev, GLB_REQ_PING_MASK, wait_timeout_ms); ++} + -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END( -+ kbdev, queue, ret ? 0 : 1); -+ break; -+ } -+ case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: -+ { -+ if (drain_queue) { -+ /* We still need to call this function to clean the JIT alloc info up */ -+ kbase_kcpu_jit_allocate_finish(queue, cmd); -+ } else { -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(kbdev, -+ queue); ++int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, ++ u64 const timeout) ++{ ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ unsigned long flags; ++ int err; + -+ status = kbase_kcpu_jit_allocate_process(queue, -+ cmd); -+ if (status == -EAGAIN) { -+ process_next = false; -+ } else { -+ if (status != 0) -+ queue->has_error = true; ++ /* The 'reg_lock' is also taken and is held till the update is not ++ * complete, to ensure the update of timeout value by multiple Users ++ * gets serialized. ++ */ ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ set_timeout_global(global_iface, timeout); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); + -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO( -+ kbdev, queue, -+ &cmd->info.jit_alloc, status); ++ err = wait_for_global_request(kbdev, GLB_REQ_CFG_PROGRESS_TIMER_MASK); ++ mutex_unlock(&kbdev->csf.reg_lock); + -+ kbase_kcpu_jit_allocate_finish(queue, -+ cmd); -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( -+ kbdev, queue); -+ } -+ } ++ return err; ++} + -+ break; -+ } -+ case BASE_KCPU_COMMAND_TYPE_JIT_FREE: { -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(kbdev, queue); ++void kbase_csf_enter_protected_mode(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + -+ status = kbase_kcpu_jit_free_process(queue, cmd); -+ if (status) -+ queue->has_error = true; ++ KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev, kbdev); + -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END( -+ kbdev, queue); -+ break; -+ } -+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST -+ case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: { -+ struct kbase_suspend_copy_buffer *sus_buf = -+ cmd->info.suspend_buf_copy.sus_buf; ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK); ++ dev_dbg(kbdev->dev, "Sending request to enter protected mode"); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++} + -+ if (!drain_queue) { -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START( -+ kbdev, queue); ++int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev) ++{ ++ int err; + -+ status = kbase_csf_queue_group_suspend_process( -+ queue->kctx, sus_buf, -+ cmd->info.suspend_buf_copy.group_handle); -+ if (status) -+ queue->has_error = true; ++ lockdep_assert_held(&kbdev->mmu_hw_mutex); + -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END( -+ kbdev, queue, status); -+ } ++ err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK); + -+ if (!sus_buf->cpu_alloc) { -+ int i; ++ if (!err) { ++#define WAIT_TIMEOUT 5000 /* 50ms timeout */ ++#define DELAY_TIME_IN_US 10 ++ const int max_iterations = WAIT_TIMEOUT; ++ int loop; + -+ for (i = 0; i < sus_buf->nr_pages; i++) -+ put_page(sus_buf->pages[i]); -+ } else { -+ kbase_mem_phy_alloc_kernel_unmapped( -+ sus_buf->cpu_alloc); -+ kbase_mem_phy_alloc_put( -+ sus_buf->cpu_alloc); -+ } ++ /* Wait for the GPU to actually enter protected mode */ ++ for (loop = 0; loop < max_iterations; loop++) { ++ unsigned long flags; ++ bool pmode_exited; + -+ kfree(sus_buf->pages); -+ kfree(sus_buf); -+ break; ++ if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & ++ GPU_STATUS_PROTECTED_MODE_ACTIVE) ++ break; ++ ++ /* Check if GPU already exited the protected mode */ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ pmode_exited = ++ !kbase_csf_scheduler_protected_mode_in_use(kbdev); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ if (pmode_exited) ++ break; ++ ++ udelay(DELAY_TIME_IN_US); + } -+#endif -+ default: -+ dev_dbg(kbdev->dev, -+ "Unrecognized command type"); -+ break; -+ } /* switch */ + -+ /*TBD: error handling */ ++ if (loop == max_iterations) { ++ dev_err(kbdev->dev, "Timeout for actual pmode entry after PROTM_ENTER ack"); ++ err = -ETIMEDOUT; ++ } ++ } + -+ if (!process_next) -+ break; ++ if (unlikely(err)) { ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) ++ kbase_reset_gpu(kbdev); + } + -+ if (i > 0) { -+ queue->start_offset += i; -+ queue->num_pending_cmds -= i; ++ KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev); + -+ /* If an attempt to enqueue commands failed then we must raise -+ * an event in case the client wants to retry now that there is -+ * free space in the buffer. -+ */ -+ if (queue->enqueue_failed) { -+ queue->enqueue_failed = false; -+ kbase_csf_event_signal_cpu_only(queue->kctx); -+ } -+ } ++ return err; +} + -+static size_t kcpu_queue_get_space(struct kbase_kcpu_command_queue *queue) ++void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) +{ -+ return KBASEP_KCPU_QUEUE_SIZE - queue->num_pending_cmds; ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ unsigned long flags; ++ ++ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT(kbdev, kbase_backend_get_cycle_cnt(kbdev)); ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ /* Validate there are no on-slot groups when sending the ++ * halt request to firmware. ++ */ ++ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev)); ++ set_global_request(global_iface, GLB_REQ_HALT_MASK); ++ dev_dbg(kbdev->dev, "Sending request to HALT MCU"); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + -+static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( -+ const struct kbase_kcpu_command_queue *queue, -+ const struct kbase_kcpu_command *cmd) ++void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev = queue->kctx->kbdev; ++ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING(kbdev, kbase_backend_get_cycle_cnt(kbdev)); + -+ switch (cmd->type) { -+ case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT(kbdev, queue, -+ cmd->info.fence.fence); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL(kbdev, queue, -+ cmd->info.fence.fence); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: -+ { -+ const struct base_cqs_wait_info *waits = -+ cmd->info.cqs_wait.objs; -+ u32 inherit_err_flags = cmd->info.cqs_wait.inherit_err_flags; -+ unsigned int i; -+ -+ for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) { -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT( -+ kbdev, queue, waits[i].addr, waits[i].val, -+ (inherit_err_flags & ((u32)1 << i)) ? 1 : 0); -+ } -+ break; -+ } -+ case BASE_KCPU_COMMAND_TYPE_CQS_SET: -+ { -+ const struct base_cqs_set *sets = cmd->info.cqs_set.objs; -+ unsigned int i; -+ -+ for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) { -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET(kbdev, queue, -+ sets[i].addr); -+ } -+ break; -+ } -+ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: -+ { -+ const struct base_cqs_wait_operation_info *waits = -+ cmd->info.cqs_wait_operation.objs; -+ u32 inherit_err_flags = cmd->info.cqs_wait_operation.inherit_err_flags; -+ unsigned int i; ++ /* Trigger the boot of MCU firmware, Use the AUTO mode as ++ * otherwise on fast reset, to exit protected mode, MCU will ++ * not reboot by itself to enter normal mode. ++ */ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO); ++} + -+ for (i = 0; i < cmd->info.cqs_wait_operation.nr_objs; i++) { -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION( -+ kbdev, queue, waits[i].addr, waits[i].val, -+ waits[i].operation, waits[i].data_type, -+ (inherit_err_flags & ((uint32_t)1 << i)) ? 1 : 0); -+ } -+ break; -+ } -+ case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: -+ { -+ const struct base_cqs_set_operation_info *sets = cmd->info.cqs_set_operation.objs; -+ unsigned int i; ++#ifdef KBASE_PM_RUNTIME ++void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ unsigned long flags; + -+ for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) { -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION( -+ kbdev, queue, sets[i].addr, sets[i].val, -+ sets[i].operation, sets[i].data_type); -+ } -+ break; -+ } -+ case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev, queue); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT(kbdev, queue, -+ cmd->info.import.gpu_va); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT(kbdev, queue, -+ cmd->info.import.gpu_va); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( -+ kbdev, queue, cmd->info.import.gpu_va); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: -+ { -+ u8 i; ++ KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP(kbdev, kbase_backend_get_cycle_cnt(kbdev)); + -+ KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC(kbdev, queue); -+ for (i = 0; i < cmd->info.jit_alloc.count; i++) { -+ const struct base_jit_alloc_info *info = -+ &cmd->info.jit_alloc.info[i]; ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ set_global_request(global_iface, GLB_REQ_SLEEP_MASK); ++ dev_dbg(kbdev->dev, "Sending sleep request to MCU"); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++} + -+ KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC( -+ kbdev, queue, info->gpu_alloc_addr, info->va_pages, -+ info->commit_pages, info->extension, info->id, info->bin_id, -+ info->max_allocations, info->flags, info->usage_id); -+ } -+ KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC(kbdev, queue); -+ break; -+ } -+ case BASE_KCPU_COMMAND_TYPE_JIT_FREE: -+ { -+ u8 i; ++bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue); -+ for (i = 0; i < cmd->info.jit_free.count; i++) { -+ KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE( -+ kbdev, queue, cmd->info.jit_free.ids[i]); -+ } -+ KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue); -+ break; -+ } -+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST -+ case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( -+ kbdev, queue, cmd->info.suspend_buf_copy.sus_buf, -+ cmd->info.suspend_buf_copy.group_handle); -+ break; -+#endif -+ default: -+ dev_dbg(kbdev->dev, "Unknown command type %u", cmd->type); -+ break; -+ } ++ return (global_request_complete(kbdev, GLB_REQ_SLEEP_MASK) && ++ kbase_csf_firmware_mcu_halted(kbdev)); +} ++#endif + -+int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, -+ struct kbase_ioctl_kcpu_queue_enqueue *enq) ++int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev) +{ -+ struct kbase_kcpu_command_queue *queue = NULL; -+ void __user *user_cmds = u64_to_user_ptr(enq->addr); -+ int ret = 0; -+ u32 i; ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ unsigned long flags; ++ int err = 0; + -+ /* The offset to the first command that is being processed or yet to -+ * be processed is of u8 type, so the number of commands inside the -+ * queue cannot be more than 256. The current implementation expects -+ * exactly 256, any other size will require the addition of wrapping -+ * logic. -+ */ -+ BUILD_BUG_ON(KBASEP_KCPU_QUEUE_SIZE != 256); ++ /* Ensure GPU is powered-up until we complete config update.*/ ++ kbase_csf_scheduler_pm_active(kbdev); ++ kbase_csf_scheduler_wait_mcu_active(kbdev); + -+ /* Whilst the backend interface allows enqueueing multiple commands in -+ * a single operation, the Base interface does not expose any mechanism -+ * to do so. And also right now the handling is missing for the case -+ * where multiple commands are submitted and the enqueue of one of the -+ * command in the set fails after successfully enqueuing other commands -+ * in the set. ++ /* The 'reg_lock' is also taken and is held till the update is ++ * complete, to ensure the config update gets serialized. + */ -+ if (enq->nr_commands != 1) { -+ dev_dbg(kctx->kbdev->dev, -+ "More than one commands enqueued"); -+ return -EINVAL; -+ } ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); + -+ /* There might be a race between one thread trying to enqueue commands to the queue -+ * and other thread trying to delete the same queue. -+ * This racing could lead to use-after-free problem by enqueuing thread if -+ * resources for the queue has already been freed by deleting thread. -+ * -+ * To prevent the issue, two mutexes are acquired/release asymmetrically as follows. -+ * -+ * Lock A (kctx mutex) -+ * Lock B (queue mutex) -+ * Unlock A -+ * Unlock B -+ * -+ * With the kctx mutex being held, enqueuing thread will check the queue -+ * and will return error code if the queue had already been deleted. -+ */ -+ mutex_lock(&kctx->csf.kcpu_queues.lock); -+ queue = kctx->csf.kcpu_queues.array[enq->id]; -+ if (queue == NULL) { -+ dev_dbg(kctx->kbdev->dev, "Invalid KCPU queue (id:%u)", enq->id); -+ mutex_unlock(&kctx->csf.kcpu_queues.lock); -+ return -EINVAL; -+ } -+ mutex_lock(&queue->lock); -+ mutex_unlock(&kctx->csf.kcpu_queues.lock); ++ set_global_request(global_iface, GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); ++ dev_dbg(kbdev->dev, "Sending request for FIRMWARE_CONFIG_UPDATE"); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); + -+ if (kcpu_queue_get_space(queue) < enq->nr_commands) { -+ ret = -EBUSY; -+ queue->enqueue_failed = true; -+ goto out; -+ } ++ err = wait_for_global_request(kbdev, ++ GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); ++ mutex_unlock(&kbdev->csf.reg_lock); + -+ /* Copy all command's info to the command buffer. -+ * Note: it would be more efficient to process all commands in-line -+ * until we encounter an unresolved CQS_ / FENCE_WAIT, however, the -+ * interface allows multiple commands to be enqueued so we must account -+ * for the possibility to roll back. -+ */ ++ kbase_csf_scheduler_pm_idle(kbdev); ++ return err; ++} + -+ for (i = 0; (i != enq->nr_commands) && !ret; ++i) { -+ struct kbase_kcpu_command *kcpu_cmd = -+ &queue->commands[(u8)(queue->start_offset + queue->num_pending_cmds + i)]; -+ struct base_kcpu_command command; -+ unsigned int j; ++/** ++ * copy_grp_and_stm - Copy CS and/or group data ++ * ++ * @iface: Global CSF interface provided by the firmware. ++ * @group_data: Pointer where to store all the group data ++ * (sequentially). ++ * @max_group_num: The maximum number of groups to be read. Can be 0, in ++ * which case group_data is unused. ++ * @stream_data: Pointer where to store all the CS data ++ * (sequentially). ++ * @max_total_stream_num: The maximum number of CSs to be read. ++ * Can be 0, in which case stream_data is unused. ++ * ++ * Return: Total number of CSs, summed across all groups. ++ */ ++static u32 copy_grp_and_stm( ++ const struct kbase_csf_global_iface * const iface, ++ struct basep_cs_group_control * const group_data, ++ u32 max_group_num, ++ struct basep_cs_stream_control * const stream_data, ++ u32 max_total_stream_num) ++{ ++ u32 i, total_stream_num = 0; + -+ if (copy_from_user(&command, user_cmds, sizeof(command))) { -+ ret = -EFAULT; -+ goto out; -+ } ++ if (WARN_ON((max_group_num > 0) && !group_data)) ++ max_group_num = 0; + -+ user_cmds = (void __user *)((uintptr_t)user_cmds + -+ sizeof(struct base_kcpu_command)); ++ if (WARN_ON((max_total_stream_num > 0) && !stream_data)) ++ max_total_stream_num = 0; + -+ for (j = 0; j < sizeof(command.padding); j++) { -+ if (command.padding[j] != 0) { -+ dev_dbg(kctx->kbdev->dev, -+ "base_kcpu_command padding not 0\n"); -+ ret = -EINVAL; -+ goto out; -+ } -+ } ++ for (i = 0; i < iface->group_num; i++) { ++ u32 j; + -+ kcpu_cmd->enqueue_ts = atomic64_inc_return(&kctx->csf.kcpu_queues.cmd_seq_num); -+ switch (command.type) { -+ case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ ret = kbase_kcpu_fence_wait_prepare(queue, -+ &command.info.fence, kcpu_cmd); -+#else -+ ret = -EINVAL; -+ dev_warn(kctx->kbdev->dev, "fence wait command unsupported\n"); -+#endif -+ break; -+ case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ ret = kbase_kcpu_fence_signal_prepare(queue, -+ &command.info.fence, kcpu_cmd); -+#else -+ ret = -EINVAL; -+ dev_warn(kctx->kbdev->dev, "fence signal command unsupported\n"); -+#endif -+ break; -+ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: -+ ret = kbase_kcpu_cqs_wait_prepare(queue, -+ &command.info.cqs_wait, kcpu_cmd); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_CQS_SET: -+ ret = kbase_kcpu_cqs_set_prepare(queue, -+ &command.info.cqs_set, kcpu_cmd); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: -+ ret = kbase_kcpu_cqs_wait_operation_prepare(queue, -+ &command.info.cqs_wait_operation, kcpu_cmd); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: -+ ret = kbase_kcpu_cqs_set_operation_prepare(queue, -+ &command.info.cqs_set_operation, kcpu_cmd); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: -+ kcpu_cmd->type = BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER; -+ ret = 0; -+ break; -+ case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: -+ ret = kbase_kcpu_map_import_prepare(queue, -+ &command.info.import, kcpu_cmd); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: -+ ret = kbase_kcpu_unmap_import_prepare(queue, -+ &command.info.import, kcpu_cmd); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: -+ ret = kbase_kcpu_unmap_import_force_prepare(queue, -+ &command.info.import, kcpu_cmd); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: -+ ret = kbase_kcpu_jit_allocate_prepare(queue, -+ &command.info.jit_alloc, kcpu_cmd); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_JIT_FREE: -+ ret = kbase_kcpu_jit_free_prepare(queue, -+ &command.info.jit_free, kcpu_cmd); -+ break; -+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST -+ case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: -+ ret = kbase_csf_queue_group_suspend_prepare(queue, -+ &command.info.suspend_buf_copy, -+ kcpu_cmd); -+ break; -+#endif -+ default: -+ dev_dbg(queue->kctx->kbdev->dev, -+ "Unknown command type %u", command.type); -+ ret = -EINVAL; -+ break; ++ if (i < max_group_num) { ++ group_data[i].features = iface->groups[i].features; ++ group_data[i].stream_num = iface->groups[i].stream_num; ++ group_data[i].suspend_size = ++ iface->groups[i].suspend_size; + } -+ } -+ -+ if (!ret) { -+ /* We only instrument the enqueues after all commands have been -+ * successfully enqueued, as if we do them during the enqueue -+ * and there is an error, we won't be able to roll them back -+ * like is done for the command enqueues themselves. -+ */ -+ for (i = 0; i != enq->nr_commands; ++i) { -+ u8 cmd_idx = (u8)(queue->start_offset + queue->num_pending_cmds + i); -+ -+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( -+ queue, &queue->commands[cmd_idx]); ++ for (j = 0; j < iface->groups[i].stream_num; j++) { ++ if (total_stream_num < max_total_stream_num) ++ stream_data[total_stream_num].features = ++ iface->groups[i].streams[j].features; ++ total_stream_num++; + } -+ -+ queue->num_pending_cmds += enq->nr_commands; -+ kcpu_queue_process(queue, false); + } + -+out: -+ mutex_unlock(&queue->lock); -+ -+ return ret; ++ return total_stream_num; +} + -+int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx) ++u32 kbase_csf_firmware_get_glb_iface( ++ struct kbase_device *kbdev, ++ struct basep_cs_group_control *const group_data, ++ u32 const max_group_num, ++ struct basep_cs_stream_control *const stream_data, ++ u32 const max_total_stream_num, u32 *const glb_version, ++ u32 *const features, u32 *const group_num, u32 *const prfcnt_size, ++ u32 *instr_features) +{ -+ int idx; -+ -+ bitmap_zero(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES); -+ -+ for (idx = 0; idx < KBASEP_MAX_KCPU_QUEUES; ++idx) -+ kctx->csf.kcpu_queues.array[idx] = NULL; ++ const struct kbase_csf_global_iface * const iface = ++ &kbdev->csf.global_iface; + -+ mutex_init(&kctx->csf.kcpu_queues.lock); ++ if (WARN_ON(!glb_version) || WARN_ON(!features) || ++ WARN_ON(!group_num) || WARN_ON(!prfcnt_size) || ++ WARN_ON(!instr_features)) ++ return 0; + -+ atomic64_set(&kctx->csf.kcpu_queues.cmd_seq_num, 0); ++ *glb_version = iface->version; ++ *features = iface->features; ++ *group_num = iface->group_num; ++ *prfcnt_size = iface->prfcnt_size; ++ *instr_features = iface->instr_features; + -+ return 0; ++ return copy_grp_and_stm(iface, group_data, max_group_num, ++ stream_data, max_total_stream_num); +} + -+void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx) ++const char *kbase_csf_firmware_get_timeline_metadata( ++ struct kbase_device *kbdev, const char *name, size_t *size) +{ -+ while (!bitmap_empty(kctx->csf.kcpu_queues.in_use, -+ KBASEP_MAX_KCPU_QUEUES)) { -+ int id = find_first_bit(kctx->csf.kcpu_queues.in_use, -+ KBASEP_MAX_KCPU_QUEUES); ++ struct firmware_timeline_metadata *metadata; + -+ if (WARN_ON(!kctx->csf.kcpu_queues.array[id])) -+ clear_bit(id, kctx->csf.kcpu_queues.in_use); -+ else -+ (void)delete_queue(kctx, id); ++ list_for_each_entry( ++ metadata, &kbdev->csf.firmware_timeline_metadata, node) { ++ if (!strcmp(metadata->name, name)) { ++ *size = metadata->size; ++ return metadata->data; ++ } + } + -+ mutex_destroy(&kctx->csf.kcpu_queues.lock); -+} -+KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_context_term); -+ -+int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, -+ struct kbase_ioctl_kcpu_queue_delete *del) -+{ -+ return delete_queue(kctx, (u32)del->id); ++ *size = 0; ++ return NULL; +} + -+int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, -+ struct kbase_ioctl_kcpu_queue_new *newq) ++int kbase_csf_firmware_mcu_shared_mapping_init( ++ struct kbase_device *kbdev, ++ unsigned int num_pages, ++ unsigned long cpu_map_properties, ++ unsigned long gpu_map_properties, ++ struct kbase_csf_mapping *csf_mapping) +{ -+ struct kbase_kcpu_command_queue *queue; -+ int idx; -+ int n; -+ int ret = 0; -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ struct kbase_kcpu_dma_fence_meta *metadata; -+#endif -+ /* The queue id is of u8 type and we use the index of the kcpu_queues -+ * array as an id, so the number of elements in the array can't be -+ * more than 256. -+ */ -+ BUILD_BUG_ON(KBASEP_MAX_KCPU_QUEUES > 256); ++ struct tagged_addr *phys; ++ struct kbase_va_region *va_reg; ++ struct page **page_list; ++ void *cpu_addr; ++ int i, ret = 0; ++ pgprot_t cpu_map_prot = PAGE_KERNEL; ++ unsigned long gpu_map_prot; + -+ mutex_lock(&kctx->csf.kcpu_queues.lock); ++ if (cpu_map_properties & PROT_READ) ++ cpu_map_prot = PAGE_KERNEL_RO; + -+ idx = find_first_zero_bit(kctx->csf.kcpu_queues.in_use, -+ KBASEP_MAX_KCPU_QUEUES); -+ if (idx >= (int)KBASEP_MAX_KCPU_QUEUES) { -+ ret = -ENOMEM; -+ goto out; ++ if (kbdev->system_coherency == COHERENCY_ACE) { ++ gpu_map_prot = ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); ++ } else { ++ gpu_map_prot = ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); ++ cpu_map_prot = pgprot_writecombine(cpu_map_prot); + } + -+ if (WARN_ON(kctx->csf.kcpu_queues.array[idx])) { -+ ret = -EINVAL; ++ phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); ++ if (!phys) + goto out; -+ } + -+ queue = kzalloc(sizeof(*queue), GFP_KERNEL); ++ page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL); ++ if (!page_list) ++ goto page_list_alloc_error; + -+ if (!queue) { -+ ret = -ENOMEM; -+ goto out; -+ } ++ ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, ++ phys, false, NULL); ++ if (ret <= 0) ++ goto phys_mem_pool_alloc_error; + -+ queue->wq = alloc_workqueue("mali_kbase_csf_kcpu_wq_%i", WQ_UNBOUND | WQ_HIGHPRI, 0, idx); -+ if (queue->wq == NULL) { -+ kfree(queue); -+ ret = -ENOMEM; ++ for (i = 0; i < num_pages; i++) ++ page_list[i] = as_page(phys[i]); + -+ goto out; -+ } ++ cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot); ++ if (!cpu_addr) ++ goto vmap_error; + -+ bitmap_set(kctx->csf.kcpu_queues.in_use, idx, 1); -+ kctx->csf.kcpu_queues.array[idx] = queue; -+ mutex_init(&queue->lock); -+ queue->kctx = kctx; -+ queue->start_offset = 0; -+ queue->num_pending_cmds = 0; -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ queue->fence_context = dma_fence_context_alloc(1); -+ queue->fence_seqno = 0; -+ queue->fence_wait_processed = false; ++ va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages, ++ KBASE_REG_ZONE_MCU_SHARED); ++ if (!va_reg) ++ goto va_region_alloc_error; + -+ metadata = kzalloc(sizeof(*metadata), GFP_KERNEL); -+ if (!metadata) { -+ destroy_workqueue(queue->wq); -+ kfree(queue); -+ ret = -ENOMEM; -+ goto out; -+ } ++ mutex_lock(&kbdev->csf.reg_lock); ++ ret = kbase_add_va_region_rbtree(kbdev, va_reg, 0, num_pages, 1); ++ va_reg->flags &= ~KBASE_REG_FREE; ++ if (ret) ++ goto va_region_add_error; ++ mutex_unlock(&kbdev->csf.reg_lock); + -+ metadata->kbdev = kctx->kbdev; -+ metadata->kctx_id = kctx->id; -+ n = snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%d-%d_%d-%lld-kcpu", -+ kctx->kbdev->id, kctx->tgid, kctx->id, queue->fence_context); -+ if (WARN_ON(n >= MAX_TIMELINE_NAME)) { -+ destroy_workqueue(queue->wq); -+ kfree(queue); -+ kfree(metadata); -+ ret = -EINVAL; -+ goto out; -+ } ++ gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR); ++ gpu_map_properties |= gpu_map_prot; + -+ kbase_refcount_set(&metadata->refcount, 1); -+ queue->metadata = metadata; -+ atomic_inc(&kctx->kbdev->live_fence_metadata); -+#endif /* CONFIG_SYNC_FILE */ -+ queue->enqueue_failed = false; -+ queue->command_started = false; -+ INIT_LIST_HEAD(&queue->jit_blocked); -+ queue->has_error = false; -+ INIT_WORK(&queue->work, kcpu_queue_process_worker); -+ queue->id = idx; ++ ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn, ++ &phys[0], num_pages, gpu_map_properties, ++ KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false); ++ if (ret) ++ goto mmu_insert_pages_error; + -+ newq->id = idx; ++ kfree(page_list); ++ csf_mapping->phys = phys; ++ csf_mapping->cpu_addr = cpu_addr; ++ csf_mapping->va_reg = va_reg; ++ csf_mapping->num_pages = num_pages; + -+ /* Fire the tracepoint with the mutex held to enforce correct ordering -+ * with the summary stream. -+ */ -+ KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(kctx->kbdev, queue, queue->id, kctx->id, -+ queue->num_pending_cmds); ++ return 0; + -+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_CREATE, queue, -+ queue->fence_context, 0); -+#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG -+ kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback); -+#endif ++mmu_insert_pages_error: ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_remove_va_region(kbdev, va_reg); ++va_region_add_error: ++ kbase_free_alloced_region(va_reg); ++ mutex_unlock(&kbdev->csf.reg_lock); ++va_region_alloc_error: ++ vunmap(cpu_addr); ++vmap_error: ++ kbase_mem_pool_free_pages( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ num_pages, phys, false, false); ++ ++phys_mem_pool_alloc_error: ++ kfree(page_list); ++page_list_alloc_error: ++ kfree(phys); +out: -+ mutex_unlock(&kctx->csf.kcpu_queues.lock); ++ /* Zero-initialize the mapping to make sure that the termination ++ * function doesn't try to unmap or free random addresses. ++ */ ++ csf_mapping->phys = NULL; ++ csf_mapping->cpu_addr = NULL; ++ csf_mapping->va_reg = NULL; ++ csf_mapping->num_pages = 0; + -+ return ret; ++ return -ENOMEM; +} -+KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_new); -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h ++ ++void kbase_csf_firmware_mcu_shared_mapping_term( ++ struct kbase_device *kbdev, struct kbase_csf_mapping *csf_mapping) ++{ ++ if (csf_mapping->va_reg) { ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_remove_va_region(kbdev, csf_mapping->va_reg); ++ kbase_free_alloced_region(csf_mapping->va_reg); ++ mutex_unlock(&kbdev->csf.reg_lock); ++ } ++ ++ if (csf_mapping->phys) { ++ kbase_mem_pool_free_pages( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ csf_mapping->num_pages, csf_mapping->phys, false, ++ false); ++ } ++ ++ vunmap(csf_mapping->cpu_addr); ++ kfree(csf_mapping->phys); ++} +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h new file mode 100644 -index 000000000..5cad8b200 +index 000000000..714a14001 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h -@@ -0,0 +1,384 @@ ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h +@@ -0,0 +1,916 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -156724,621 +158488,911 @@ index 000000000..5cad8b200 + * + */ + -+#ifndef _KBASE_CSF_KCPU_H_ -+#define _KBASE_CSF_KCPU_H_ ++#ifndef _KBASE_CSF_FIRMWARE_H_ ++#define _KBASE_CSF_FIRMWARE_H_ + -+#include -+#include ++#include "device/mali_kbase_device.h" ++#include + -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+#include -+#else -+#include -+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ ++/* ++ * PAGE_KERNEL_RO was only defined on 32bit ARM in 4.19 in: ++ * Commit a3266bd49c721e2e0a71f352d83713fbd60caadb ++ * Author: Luis R. Rodriguez ++ * Date: Fri Aug 17 15:46:29 2018 -0700 ++ * ++ * mm: provide a fallback for PAGE_KERNEL_RO for architectures ++ * ++ * Some architectures do not define certain PAGE_KERNEL_* flags, this is ++ * either because: ++ * ++ * a) The way to implement some of these flags is *not yet ported*, or ++ * b) The architecture *has no way* to describe them ++ * ++ * [snip] ++ * ++ * This can be removed once support of 32bit ARM kernels predating 4.19 is no ++ * longer required. ++ */ ++#ifndef PAGE_KERNEL_RO ++#define PAGE_KERNEL_RO PAGE_KERNEL ++#endif + -+/* The maximum number of KCPU commands in flight, enqueueing more commands -+ * than this value shall block. ++/* Address space number to claim for the firmware. */ ++#define MCU_AS_NR 0 ++#define MCU_AS_BITMASK (1 << MCU_AS_NR) ++ ++/* Number of available Doorbells */ ++#define CSF_NUM_DOORBELL ((u8)24) ++ ++/* Offset to the first HW doorbell page */ ++#define CSF_HW_DOORBELL_PAGE_OFFSET ((u32)0x80000) ++ ++/* Size of HW Doorbell page, used to calculate the offset to subsequent pages */ ++#define CSF_HW_DOORBELL_PAGE_SIZE ((u32)0x10000) ++ ++/* Doorbell 0 is used by the driver. */ ++#define CSF_KERNEL_DOORBELL_NR ((u32)0) ++ ++/* Offset of name inside a trace buffer entry in the firmware image */ ++#define TRACE_BUFFER_ENTRY_NAME_OFFSET (0x1C) ++ ++/* All implementations of the host interface with major version 0 must comply ++ * with these restrictions: + */ -+#define KBASEP_KCPU_QUEUE_SIZE ((size_t)256) ++/* GLB_GROUP_NUM: At least 3 CSGs, but no more than 31 */ ++#define MIN_SUPPORTED_CSGS 3 ++#define MAX_SUPPORTED_CSGS 31 ++/* GROUP_STREAM_NUM: At least 8 CSs per CSG, but no more than 32 */ ++#define MIN_SUPPORTED_STREAMS_PER_GROUP 8 ++/* MAX_SUPPORTED_STREAMS_PER_GROUP: Maximum CSs per csg. */ ++#define MAX_SUPPORTED_STREAMS_PER_GROUP 32 ++ ++struct kbase_device; ++ + +/** -+ * struct kbase_kcpu_command_import_info - Structure which holds information -+ * about the buffer to be imported -+ * -+ * @gpu_va: Address of the buffer to be imported. ++ * struct kbase_csf_mapping - Memory mapping for CSF memory. ++ * @phys: Physical memory allocation used by the mapping. ++ * @cpu_addr: Starting CPU address for the mapping. ++ * @va_reg: GPU virtual address region for the mapping. ++ * @num_pages: Size of the mapping, in memory pages. + */ -+struct kbase_kcpu_command_import_info { -+ u64 gpu_va; ++struct kbase_csf_mapping { ++ struct tagged_addr *phys; ++ void *cpu_addr; ++ struct kbase_va_region *va_reg; ++ unsigned int num_pages; +}; + +/** -+ * struct kbase_kcpu_command_fence_info - Structure which holds information about the -+ * fence object enqueued in the kcpu command queue -+ * -+ * @fence_cb: Fence callback -+ * @fence: Fence -+ * @kcpu_queue: kcpu command queue ++ * struct kbase_csf_trace_buffers - List and state of firmware trace buffers. ++ * @list: List of trace buffers descriptors. ++ * @mcu_rw: Metadata for the MCU shared memory mapping used for ++ * GPU-readable,writable/CPU-writable variables. ++ * @mcu_write: Metadata for the MCU shared memory mapping used for ++ * GPU-writable/CPU-readable variables. + */ -+struct kbase_kcpu_command_fence_info { -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ struct fence_cb fence_cb; -+ struct fence *fence; -+#else -+ struct dma_fence_cb fence_cb; -+ struct dma_fence *fence; -+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ -+ struct kbase_kcpu_command_queue *kcpu_queue; ++struct kbase_csf_trace_buffers { ++ struct list_head list; ++ struct kbase_csf_mapping mcu_rw; ++ struct kbase_csf_mapping mcu_write; +}; + +/** -+ * struct kbase_kcpu_command_cqs_set_info - Structure which holds information -+ * about CQS objects for the kcpu CQS set command ++ * struct kbase_csf_cmd_stream_info - CSI provided by the firmware. + * -+ * @objs: Array of structures which define CQS objects to be used by -+ * the kcpu command. -+ * @nr_objs: Number of CQS objects in the array. ++ * @kbdev: Address of the instance of a GPU platform device that implements ++ * this interface. ++ * @features: Bit field of CS features (e.g. which types of jobs ++ * are supported). Bits 7:0 specify the number of work registers(-1). ++ * Bits 11:8 specify the number of scoreboard entries(-1). ++ * @input: Address of CSI input page. ++ * @output: Address of CSI output page. + */ -+struct kbase_kcpu_command_cqs_set_info { -+ struct base_cqs_set *objs; -+ unsigned int nr_objs; ++struct kbase_csf_cmd_stream_info { ++ struct kbase_device *kbdev; ++ u32 features; ++ void *input; ++ void *output; +}; + +/** -+ * struct kbase_kcpu_command_cqs_wait_info - Structure which holds information -+ * about CQS objects for the kcpu CQS wait command ++ * kbase_csf_firmware_cs_input() - Set a word in a CS's input page + * -+ * @objs: Array of structures which define CQS objects to be used by -+ * the kcpu command. -+ * @signaled: Bit array used to report the status of the CQS wait objects. -+ * 1 is signaled, 0 otherwise. -+ * @nr_objs: Number of CQS objects in the array. -+ * @inherit_err_flags: Bit-pattern for the CQSs in the array who's error field -+ * to be served as the source for importing into the -+ * queue's error-state. ++ * @info: CSI provided by the firmware. ++ * @offset: Offset of the word to be written, in bytes. ++ * @value: Value to be written. + */ -+struct kbase_kcpu_command_cqs_wait_info { -+ struct base_cqs_wait_info *objs; -+ unsigned long *signaled; -+ unsigned int nr_objs; -+ u32 inherit_err_flags; -+}; ++void kbase_csf_firmware_cs_input( ++ const struct kbase_csf_cmd_stream_info *info, u32 offset, u32 value); + +/** -+ * struct kbase_kcpu_command_cqs_set_operation_info - Structure which holds information -+ * about CQS objects for the kcpu CQS timeline set command ++ * kbase_csf_firmware_cs_input_read() - Read a word in a CS's input page + * -+ * @objs: Array of structures which define CQS timeline objects to be used by -+ * the kcpu command. -+ * @nr_objs: Number of CQS objects in the array. ++ * Return: Value of the word read from the CS's input page. ++ * ++ * @info: CSI provided by the firmware. ++ * @offset: Offset of the word to be read, in bytes. + */ -+struct kbase_kcpu_command_cqs_set_operation_info { -+ struct base_cqs_set_operation_info *objs; -+ unsigned int nr_objs; -+}; ++u32 kbase_csf_firmware_cs_input_read( ++ const struct kbase_csf_cmd_stream_info *const info, const u32 offset); + +/** -+ * struct kbase_kcpu_command_cqs_wait_operation_info - Structure which holds information -+ * about CQS objects for the kcpu CQS timeline wait command ++ * kbase_csf_firmware_cs_input_mask() - Set part of a word in a CS's input page + * -+ * @objs: Array of structures which define CQS timeline objects to be used by -+ * the kcpu command. -+ * @signaled: Bit array used to report the status of the CQS wait objects. -+ * 1 is signaled, 0 otherwise. -+ * @nr_objs: Number of CQS objects in the array. -+ * @inherit_err_flags: Bit-pattern for CQSs in the array who's error field is to -+ * be used as the source to import into the queue's error-state ++ * @info: CSI provided by the firmware. ++ * @offset: Offset of the word to be modified, in bytes. ++ * @value: Value to be written. ++ * @mask: Bitmask with the bits to be modified set. + */ -+struct kbase_kcpu_command_cqs_wait_operation_info { -+ struct base_cqs_wait_operation_info *objs; -+ unsigned long *signaled; -+ unsigned int nr_objs; -+ u32 inherit_err_flags; -+}; ++void kbase_csf_firmware_cs_input_mask( ++ const struct kbase_csf_cmd_stream_info *info, u32 offset, ++ u32 value, u32 mask); + +/** -+ * struct kbase_kcpu_command_jit_alloc_info - Structure which holds information -+ * needed for the kcpu command for jit allocations ++ * kbase_csf_firmware_cs_output() - Read a word in a CS's output page + * -+ * @node: Used to keep track of all JIT free/alloc commands in submission -+ * order. This must be located in the front of this struct to -+ * match that of kbase_kcpu_command_jit_free_info. -+ * @info: Array of objects of the struct base_jit_alloc_info type which -+ * specify jit allocations to be made by the kcpu command. -+ * @count: Number of jit alloc objects in the array. -+ * @blocked: Whether this allocation has been put into the pending list to -+ * be retried later. ++ * Return: Value of the word read from the CS's output page. ++ * ++ * @info: CSI provided by the firmware. ++ * @offset: Offset of the word to be read, in bytes. + */ -+struct kbase_kcpu_command_jit_alloc_info { -+ struct list_head node; -+ struct base_jit_alloc_info *info; -+ u8 count; -+ bool blocked; ++u32 kbase_csf_firmware_cs_output( ++ const struct kbase_csf_cmd_stream_info *info, u32 offset); ++/** ++ * struct kbase_csf_cmd_stream_group_info - CSG interface provided by the ++ * firmware. ++ * ++ * @kbdev: Address of the instance of a GPU platform device that implements ++ * this interface. ++ * @features: Bit mask of features. Reserved bits should be 0, and should ++ * be ignored. ++ * @input: Address of global interface input page. ++ * @output: Address of global interface output page. ++ * @suspend_size: Size in bytes for normal suspend buffer for the CSG ++ * @protm_suspend_size: Size in bytes for protected mode suspend buffer ++ * for the CSG. ++ * @stream_num: Number of CSs in the CSG. ++ * @stream_stride: Stride in bytes in JASID0 virtual address between ++ * CS capability structures. ++ * @streams: Address of an array of CS capability structures. ++ */ ++struct kbase_csf_cmd_stream_group_info { ++ struct kbase_device *kbdev; ++ u32 features; ++ void *input; ++ void *output; ++ u32 suspend_size; ++ u32 protm_suspend_size; ++ u32 stream_num; ++ u32 stream_stride; ++ struct kbase_csf_cmd_stream_info *streams; +}; + +/** -+ * struct kbase_kcpu_command_jit_free_info - Structure which holds information -+ * needed for the kcpu jit free command ++ * kbase_csf_firmware_csg_input() - Set a word in a CSG's input page + * -+ * @node: Used to keep track of all JIT free/alloc commands in submission -+ * order. This must be located in the front of this struct to -+ * match that of kbase_kcpu_command_jit_alloc_info. -+ * @ids: Array of identifiers of jit allocations which are to be freed -+ * by the kcpu command. -+ * @count: Number of elements in the array. ++ * @info: CSG interface provided by the firmware. ++ * @offset: Offset of the word to be written, in bytes. ++ * @value: Value to be written. + */ -+struct kbase_kcpu_command_jit_free_info { -+ struct list_head node; -+ u8 *ids; -+ u8 count; -+}; ++void kbase_csf_firmware_csg_input( ++ const struct kbase_csf_cmd_stream_group_info *info, u32 offset, ++ u32 value); + +/** -+ * struct kbase_suspend_copy_buffer - information about the suspend buffer -+ * to be copied. ++ * kbase_csf_firmware_csg_input_read() - Read a word in a CSG's input page + * -+ * @size: size of the suspend buffer in bytes. -+ * @pages: pointer to an array of pointers to the pages which contain -+ * the user buffer. -+ * @nr_pages: number of pages. -+ * @offset: offset into the pages -+ * @cpu_alloc: Reference to physical pages of suspend buffer allocation. ++ * Return: Value of the word read from the CSG's input page. ++ * ++ * @info: CSG interface provided by the firmware. ++ * @offset: Offset of the word to be read, in bytes. + */ -+struct kbase_suspend_copy_buffer { -+ size_t size; -+ struct page **pages; -+ int nr_pages; -+ size_t offset; -+ struct kbase_mem_phy_alloc *cpu_alloc; -+}; ++u32 kbase_csf_firmware_csg_input_read( ++ const struct kbase_csf_cmd_stream_group_info *info, u32 offset); + -+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST +/** -+ * struct kbase_kcpu_command_group_suspend_info - structure which contains -+ * suspend buffer data captured for a suspended queue group. ++ * kbase_csf_firmware_csg_input_mask() - Set part of a word in a CSG's ++ * input page + * -+ * @sus_buf: Pointer to the structure which contains details of the -+ * user buffer and its kernel pinned pages. -+ * @group_handle: Handle to the mapping of CSG. ++ * @info: CSG interface provided by the firmware. ++ * @offset: Offset of the word to be modified, in bytes. ++ * @value: Value to be written. ++ * @mask: Bitmask with the bits to be modified set. + */ -+struct kbase_kcpu_command_group_suspend_info { -+ struct kbase_suspend_copy_buffer *sus_buf; -+ u8 group_handle; -+}; -+#endif ++void kbase_csf_firmware_csg_input_mask( ++ const struct kbase_csf_cmd_stream_group_info *info, u32 offset, ++ u32 value, u32 mask); + ++/** ++ * kbase_csf_firmware_csg_output()- Read a word in a CSG's output page ++ * ++ * Return: Value of the word read from the CSG's output page. ++ * ++ * @info: CSG interface provided by the firmware. ++ * @offset: Offset of the word to be read, in bytes. ++ */ ++u32 kbase_csf_firmware_csg_output( ++ const struct kbase_csf_cmd_stream_group_info *info, u32 offset); + +/** -+ * struct kbase_kcpu_command - Command which is to be part of the kernel -+ * command queue ++ * struct kbase_csf_global_iface - Global CSF interface ++ * provided by the firmware. + * -+ * @type: Type of the command. -+ * @enqueue_ts: Denotes the relative time of enqueueing, a smaller value -+ * indicates that it has been enqueued earlier. -+ * @info: Structure which holds information about the command -+ * dependent on the command type. -+ * @info.fence: Fence -+ * @info.cqs_wait: CQS wait -+ * @info.cqs_set: CQS set -+ * @info.cqs_wait_operation: CQS wait operation -+ * @info.cqs_set_operation: CQS set operation -+ * @info.import: import -+ * @info.jit_alloc: JIT allocation -+ * @info.jit_free: JIT deallocation -+ * @info.suspend_buf_copy: suspend buffer copy -+ * @info.sample_time: sample time ++ * @kbdev: Address of the instance of a GPU platform device that implements ++ * this interface. ++ * @version: Bits 31:16 hold the major version number and 15:0 hold the minor ++ * version number. A higher minor version is backwards-compatible ++ * with a lower minor version for the same major version. ++ * @features: Bit mask of features (e.g. whether certain types of job can ++ * be suspended). Reserved bits should be 0, and should be ignored. ++ * @input: Address of global interface input page. ++ * @output: Address of global interface output page. ++ * @group_num: Number of CSGs supported. ++ * @group_stride: Stride in bytes in JASID0 virtual address between ++ * CSG capability structures. ++ * @prfcnt_size: Performance counters size. ++ * @instr_features: Instrumentation features. (csf >= 1.1.0) ++ * @groups: Address of an array of CSG capability structures. + */ -+struct kbase_kcpu_command { -+ enum base_kcpu_command_type type; -+ u64 enqueue_ts; -+ union { -+ struct kbase_kcpu_command_fence_info fence; -+ struct kbase_kcpu_command_cqs_wait_info cqs_wait; -+ struct kbase_kcpu_command_cqs_set_info cqs_set; -+ struct kbase_kcpu_command_cqs_wait_operation_info cqs_wait_operation; -+ struct kbase_kcpu_command_cqs_set_operation_info cqs_set_operation; -+ struct kbase_kcpu_command_import_info import; -+ struct kbase_kcpu_command_jit_alloc_info jit_alloc; -+ struct kbase_kcpu_command_jit_free_info jit_free; -+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST -+ struct kbase_kcpu_command_group_suspend_info suspend_buf_copy; -+#endif -+ } info; ++struct kbase_csf_global_iface { ++ struct kbase_device *kbdev; ++ u32 version; ++ u32 features; ++ void *input; ++ void *output; ++ u32 group_num; ++ u32 group_stride; ++ u32 prfcnt_size; ++ u32 instr_features; ++ struct kbase_csf_cmd_stream_group_info *groups; +}; + +/** -+ * struct kbase_kcpu_command_queue - a command queue executed by the kernel ++ * kbase_csf_firmware_global_input() - Set a word in the global input page + * -+ * @lock: Lock to protect accesses to this queue. -+ * @kctx: The context to which this command queue belongs. -+ * @commands: Array of commands which have been successfully -+ * enqueued to this command queue. -+ * @wq: Dedicated workqueue for processing commands. -+ * @work: struct work_struct which contains a pointer to -+ * the function which handles processing of kcpu -+ * commands enqueued into a kcpu command queue; -+ * part of kernel API for processing workqueues -+ * @start_offset: Index of the command to be executed next -+ * @id: KCPU command queue ID. -+ * @num_pending_cmds: The number of commands enqueued but not yet -+ * executed or pending -+ * @cqs_wait_count: Tracks the number of CQS wait commands enqueued -+ * @fence_context: The dma-buf fence context number for this kcpu -+ * queue. A unique context number is allocated for -+ * each kcpu queue. -+ * @fence_seqno: The dma-buf fence sequence number for the fence -+ * that is returned on the enqueue of fence signal -+ * command. This is increased every time the -+ * fence signal command is queued. -+ * @fence_wait_processed: Used to avoid reprocessing of the fence wait -+ * command which has blocked the processing of -+ * commands that follow it. -+ * @enqueue_failed: Indicates that no space has become available in -+ * the buffer since an enqueue operation failed -+ * because of insufficient free space. -+ * @command_started: Indicates that the command at the front of the -+ * queue has been started in a previous queue -+ * process, but was not completed due to some -+ * unmet dependencies. Ensures that instrumentation -+ * of the execution start of these commands is only -+ * fired exactly once. -+ * @has_error: Indicates that the kcpu queue is in error mode -+ * or without errors since last cleaned. -+ * @jit_blocked: Used to keep track of command queues blocked -+ * by a pending JIT allocation command. -+ * @fence_timeout: Timer used to detect the fence wait timeout. -+ * @metadata: Metadata structure containing basic information about -+ * this queue for any fence objects associated with this queue. ++ * @iface: CSF interface provided by the firmware. ++ * @offset: Offset of the word to be written, in bytes. ++ * @value: Value to be written. + */ -+struct kbase_kcpu_command_queue { -+ struct mutex lock; -+ struct kbase_context *kctx; -+ struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE]; -+ struct workqueue_struct *wq; -+ struct work_struct work; -+ u8 start_offset; -+ u8 id; -+ u16 num_pending_cmds; -+ u32 cqs_wait_count; -+ u64 fence_context; -+ unsigned int fence_seqno; -+ bool fence_wait_processed; -+ bool enqueue_failed; -+ bool command_started; -+ struct list_head jit_blocked; -+ bool has_error; -+#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG -+ struct timer_list fence_timeout; -+#endif /* CONFIG_MALI_BIFROST_FENCE_DEBUG */ -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ struct kbase_kcpu_dma_fence_meta *metadata; -+#endif /* CONFIG_SYNC_FILE */ -+}; ++void kbase_csf_firmware_global_input( ++ const struct kbase_csf_global_iface *iface, u32 offset, u32 value); + +/** -+ * kbase_csf_kcpu_queue_new - Create new KCPU command queue. ++ * kbase_csf_firmware_global_input_mask() - Set part of a word in the global ++ * input page + * -+ * @kctx: Pointer to the kbase context within which the KCPU command -+ * queue will be created. -+ * @newq: Pointer to the structure which contains information about -+ * the new KCPU command queue to be created. ++ * @iface: CSF interface provided by the firmware. ++ * @offset: Offset of the word to be modified, in bytes. ++ * @value: Value to be written. ++ * @mask: Bitmask with the bits to be modified set. ++ */ ++void kbase_csf_firmware_global_input_mask( ++ const struct kbase_csf_global_iface *iface, u32 offset, ++ u32 value, u32 mask); ++ ++/** ++ * kbase_csf_firmware_global_input_read() - Read a word in a global input page + * -+ * Return: 0 if successful or a negative error code on failure. ++ * Return: Value of the word read from the global input page. ++ * ++ * @info: CSG interface provided by the firmware. ++ * @offset: Offset of the word to be read, in bytes. + */ -+int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, -+ struct kbase_ioctl_kcpu_queue_new *newq); ++u32 kbase_csf_firmware_global_input_read( ++ const struct kbase_csf_global_iface *info, u32 offset); + +/** -+ * kbase_csf_kcpu_queue_delete - Delete KCPU command queue. ++ * kbase_csf_firmware_global_output() - Read a word in the global output page + * -+ * @kctx: Pointer to the kbase context from which the KCPU command -+ * queue is to be deleted. -+ * @del: Pointer to the structure which specifies the KCPU command -+ * queue to be deleted. ++ * Return: Value of the word read from the global output page. + * -+ * Return: 0 if successful or a negative error code on failure. ++ * @iface: CSF interface provided by the firmware. ++ * @offset: Offset of the word to be read, in bytes. + */ -+int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, -+ struct kbase_ioctl_kcpu_queue_delete *del); ++u32 kbase_csf_firmware_global_output( ++ const struct kbase_csf_global_iface *iface, u32 offset); + +/** -+ * kbase_csf_kcpu_queue_enqueue - Enqueue a KCPU command into a KCPU command -+ * queue. ++ * kbase_csf_ring_doorbell() - Ring the doorbell + * -+ * @kctx: Pointer to the kbase context within which the KCPU command -+ * is to be enqueued into the KCPU command queue. -+ * @enq: Pointer to the structure which specifies the KCPU command -+ * as well as the KCPU command queue into which the command -+ * is to be enqueued. ++ * @kbdev: An instance of the GPU platform device ++ * @doorbell_nr: Index of the HW doorbell page ++ */ ++void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr); ++ ++/** ++ * kbase_csf_read_firmware_memory - Read a value in a GPU address + * -+ * Return: 0 if successful or a negative error code on failure. ++ * @kbdev: Device pointer ++ * @gpu_addr: GPU address to read ++ * @value: output pointer to which the read value will be written. ++ * ++ * This function read a value in a GPU address that belongs to ++ * a private firmware memory region. The function assumes that the location ++ * is not permanently mapped on the CPU address space, therefore it maps it ++ * and then unmaps it to access it independently. + */ -+int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, -+ struct kbase_ioctl_kcpu_queue_enqueue *enq); ++void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 *value); + +/** -+ * kbase_csf_kcpu_queue_context_init - Initialize the kernel CPU queues context -+ * for a GPU address space ++ * kbase_csf_update_firmware_memory - Write a value in a GPU address + * -+ * @kctx: Pointer to the kbase context being initialized. ++ * @kbdev: Device pointer ++ * @gpu_addr: GPU address to write ++ * @value: Value to write + * -+ * Return: 0 if successful or a negative error code on failure. ++ * This function writes a given value in a GPU address that belongs to ++ * a private firmware memory region. The function assumes that the destination ++ * is not permanently mapped on the CPU address space, therefore it maps it ++ * and then unmaps it to access it independently. + */ -+int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx); ++void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 value); + +/** -+ * kbase_csf_kcpu_queue_context_term - Terminate the kernel CPU queues context -+ * for a GPU address space -+ * @kctx: Pointer to the kbase context being terminated. ++ * kbase_csf_read_firmware_memory_exe - Read a value in a GPU address in the ++ * region of its final execution location. + * -+ * This function deletes any kernel CPU queues that weren't deleted before -+ * context termination. ++ * @kbdev: Device pointer ++ * @gpu_addr: GPU address to read ++ * @value: Output pointer to which the read value will be written + * ++ * This function read a value in a GPU address that belongs to a private loaded ++ * firmware memory region based on its final execution location. The function ++ * assumes that the location is not permanently mapped on the CPU address space, ++ * therefore it maps it and then unmaps it to access it independently. This function ++ * needs to be used when accessing firmware memory regions which will be moved to ++ * their final execution location during firmware boot using an address based on the ++ * final execution location. + */ -+void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx); ++void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 *value); + -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+/* Test wrappers for dma fence operations. */ -+int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, -+ struct kbase_kcpu_command_fence_info *fence_info); ++/** ++ * kbase_csf_update_firmware_memory_exe - Write a value in a GPU address in the ++ * region of its final execution location. ++ * ++ * @kbdev: Device pointer ++ * @gpu_addr: GPU address to write ++ * @value: Value to write ++ * ++ * This function writes a value in a GPU address that belongs to a private loaded ++ * firmware memory region based on its final execution location. The function ++ * assumes that the location is not permanently mapped on the CPU address space, ++ * therefore it maps it and then unmaps it to access it independently. This function ++ * needs to be used when accessing firmware memory regions which will be moved to ++ * their final execution location during firmware boot using an address based on the ++ * final execution location. ++ */ ++void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 value); + -+int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, -+ struct kbase_kcpu_command *current_command, -+ struct base_fence *fence, struct sync_file **sync_file, int *fd); -+#endif /* CONFIG_SYNC_FILE */ ++/** ++ * kbase_csf_firmware_early_init() - Early initialization for the firmware. ++ * @kbdev: Kbase device ++ * ++ * Initialize resources related to the firmware. Must be called at kbase probe. ++ * ++ * Return: 0 if successful, negative error code on failure ++ */ ++int kbase_csf_firmware_early_init(struct kbase_device *kbdev); + -+#endif /* _KBASE_CSF_KCPU_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.c -new file mode 100644 -index 000000000..fa877778c ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.c -@@ -0,0 +1,193 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* ++/** ++ * kbase_csf_firmware_early_term() - Terminate resources related to the firmware ++ * after the firmware unload has been done. + * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * @kbdev: Device pointer + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * This should be called only when kbase probe fails or gets rmmoded. ++ */ ++void kbase_csf_firmware_early_term(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_late_init() - Late initialization for the firmware. ++ * @kbdev: Kbase device + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * Initialize resources related to the firmware. But must be called after ++ * backend late init is done. Must be used at probe time only. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * Return: 0 if successful, negative error code on failure ++ */ ++int kbase_csf_firmware_late_init(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_load_init() - Load the firmware for the CSF MCU ++ * @kbdev: Kbase device + * ++ * Request the firmware from user space and load it into memory. ++ * ++ * Return: 0 if successful, negative error code on failure + */ ++int kbase_csf_firmware_load_init(struct kbase_device *kbdev); + -+#include "mali_kbase_csf_kcpu_debugfs.h" -+#include -+#include ++/** ++ * kbase_csf_firmware_unload_term() - Unload the firmware ++ * @kbdev: Kbase device ++ * ++ * Frees the memory allocated by kbase_csf_firmware_load_init() ++ */ ++void kbase_csf_firmware_unload_term(struct kbase_device *kbdev); + -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+#include "mali_kbase_sync.h" -+#endif ++#if IS_ENABLED(CONFIG_MALI_CORESIGHT) ++/** ++ * kbase_csf_firmware_mcu_register_write - Write to MCU register ++ * ++ * @kbdev: Instance of a gpu platform device that implements a csf interface. ++ * @reg_addr: Register address to write into ++ * @reg_val: Value to be written ++ * ++ * Write a desired value to a register in MCU address space. ++ * ++ * return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_firmware_mcu_register_write(struct kbase_device *const kbdev, u32 const reg_addr, ++ u32 const reg_val); ++/** ++ * kbase_csf_firmware_mcu_register_read - Read from MCU register ++ * ++ * @kbdev: Instance of a gpu platform device that implements a csf interface. ++ * @reg_addr: Register address to read from ++ * @reg_val: Value as present in reg_addr register ++ * ++ * Read a value from MCU address space. ++ * ++ * return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_firmware_mcu_register_read(struct kbase_device *const kbdev, u32 const reg_addr, ++ u32 *reg_val); + -+#if IS_ENABLED(CONFIG_DEBUG_FS) ++/** ++ * kbase_csf_firmware_mcu_register_poll - Poll MCU register ++ * ++ * @kbdev: Instance of a gpu platform device that implements a csf interface. ++ * @reg_addr: Register address to read from ++ * @val_mask: Value to mask the read value for comparison ++ * @reg_val: Value to be compared against ++ * ++ * Continue to read a value from MCU address space until it matches given mask and value. ++ * ++ * return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 const reg_addr, ++ u32 const val_mask, u32 const reg_val); ++#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + +/** -+ * kbasep_csf_kcpu_debugfs_print_cqs_waits() - Print additional info for KCPU -+ * queues blocked on CQS wait commands. ++ * kbase_csf_firmware_ping - Send the ping request to firmware. + * -+ * @file: The seq_file to print to -+ * @kctx: The context of the KCPU queue -+ * @waits: Pointer to the KCPU CQS wait command info ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * The function sends the ping request to firmware. + */ -+static void kbasep_csf_kcpu_debugfs_print_cqs_waits(struct seq_file *file, -+ struct kbase_context *kctx, -+ struct kbase_kcpu_command_cqs_wait_info *waits) -+{ -+ unsigned int i; ++void kbase_csf_firmware_ping(struct kbase_device *kbdev); + -+ for (i = 0; i < waits->nr_objs; i++) { -+ struct kbase_vmap_struct *mapping; -+ u32 val; -+ char const *msg; -+ u32 *const cpu_ptr = (u32 *)kbase_phy_alloc_mapping_get(kctx, -+ waits->objs[i].addr, &mapping); ++/** ++ * kbase_csf_firmware_ping_wait - Send the ping request to firmware and waits. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @wait_timeout_ms: Timeout to get the acknowledgment for PING request from FW. ++ * ++ * The function sends the ping request to firmware and waits to confirm it is ++ * alive. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_firmware_ping_wait(struct kbase_device *kbdev, unsigned int wait_timeout_ms); + -+ if (!cpu_ptr) -+ return; ++/** ++ * kbase_csf_firmware_set_timeout - Set a hardware endpoint progress timeout. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @timeout: The maximum number of GPU cycles that is allowed to elapse ++ * without forward progress before the driver terminates a GPU ++ * command queue group. ++ * ++ * Configures the progress timeout value used by the firmware to decide ++ * when to report that a task is not making progress on an endpoint. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_firmware_set_timeout(struct kbase_device *kbdev, u64 timeout); + -+ val = *cpu_ptr; ++/** ++ * kbase_csf_enter_protected_mode - Send the Global request to firmware to ++ * enter protected mode. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * The function must be called with kbdev->csf.scheduler.interrupt_lock held ++ * and it does not wait for the protected mode entry to complete. ++ */ ++void kbase_csf_enter_protected_mode(struct kbase_device *kbdev); + -+ kbase_phy_alloc_mapping_put(kctx, mapping); ++/** ++ * kbase_csf_wait_protected_mode_enter - Wait for the completion of PROTM_ENTER ++ * Global request sent to firmware. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function needs to be called after kbase_csf_enter_protected_mode() to ++ * wait for the GPU to actually enter protected mode. GPU reset is triggered if ++ * the wait is unsuccessful. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev); + -+ msg = (waits->inherit_err_flags && (1U << i)) ? "true" : -+ "false"; -+ seq_printf(file, " %llx(%u > %u, inherit_err: %s), ", -+ waits->objs[i].addr, val, waits->objs[i].val, msg); -+ } ++static inline bool kbase_csf_firmware_mcu_halted(struct kbase_device *kbdev) ++{ ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ return true; ++#else ++ return (kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)) == ++ MCU_STATUS_HALTED); ++#endif /* CONFIG_MALI_BIFROST_NO_MALI */ +} + +/** -+ * kbasep_csf_kcpu_debugfs_print_queue() - Print debug data for a KCPU queue ++ * kbase_csf_firmware_trigger_mcu_halt - Send the Global request to firmware to ++ * halt its operation and bring itself ++ * into a known internal state for warm ++ * boot later. + * -+ * @file: The seq_file to print to -+ * @kctx: The context of the KCPU queue -+ * @queue: Pointer to the KCPU queue ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ -+static void kbasep_csf_kcpu_debugfs_print_queue(struct seq_file *file, -+ struct kbase_context *kctx, -+ struct kbase_kcpu_command_queue *queue) -+{ -+ if (WARN_ON(!queue)) -+ return; -+ -+ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); ++void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev); + -+ seq_printf(file, "%16u, %11u, %7u, %13llu %8u", -+ queue->num_pending_cmds, queue->enqueue_failed, -+ queue->command_started ? 1 : 0, -+ queue->fence_context, queue->fence_seqno); ++/** ++ * kbase_csf_firmware_enable_mcu - Send the command to enable MCU ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev); + -+ if (queue->command_started) { -+ struct kbase_kcpu_command *cmd = -+ &queue->commands[queue->start_offset]; -+ switch (cmd->type) { -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: -+ { -+ struct kbase_sync_fence_info info; ++/** ++ * kbase_csf_firmware_disable_mcu - Send the command to disable MCU ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev); + -+ kbase_sync_fence_info_get(cmd->info.fence.fence, &info); -+ seq_printf(file, ", Fence %pK %s %s", -+ info.fence, info.name, -+ kbase_sync_status_string(info.status)); -+ break; -+ } -+#endif -+ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: -+ seq_puts(file, ", CQS "); -+ kbasep_csf_kcpu_debugfs_print_cqs_waits(file, kctx, -+ &cmd->info.cqs_wait); -+ break; -+ default: -+ seq_puts(file, ", U, Unknown blocking command"); -+ break; -+ } -+ } ++/** ++ * kbase_csf_firmware_disable_mcu_wait - Wait for the MCU to reach disabled ++ * status. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev); + -+ seq_puts(file, "\n"); -+} ++#ifdef KBASE_PM_RUNTIME ++/** ++ * kbase_csf_firmware_trigger_mcu_sleep - Send the command to put MCU in sleep ++ * state. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev); + +/** -+ * kbasep_csf_kcpu_debugfs_show() - Print the KCPU queues debug information ++ * kbase_csf_firmware_is_mcu_in_sleep - Check if sleep request has completed ++ * and MCU has halted. + * -+ * @file: The seq_file for printing to -+ * @data: The debugfs dentry private data, a pointer to kbase_context ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * -+ * Return: Negative error code or 0 on success. ++ * Return: true if sleep request has completed, otherwise false. + */ -+static int kbasep_csf_kcpu_debugfs_show(struct seq_file *file, void *data) -+{ -+ struct kbase_context *kctx = file->private; -+ unsigned long idx; ++bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev); ++#endif + -+ seq_printf(file, "MALI_CSF_KCPU_DEBUGFS_VERSION: v%u\n", MALI_CSF_KCPU_DEBUGFS_VERSION); -+ seq_puts(file, "Queue Idx(err-mode), Pending Commands, Enqueue err, Blocked, Fence context & seqno, (Wait Type, Additional info)\n"); -+ mutex_lock(&kctx->csf.kcpu_queues.lock); ++/** ++ * kbase_csf_firmware_trigger_reload() - Trigger the reboot of MCU firmware, for ++ * the cold boot case firmware image would ++ * be reloaded from filesystem into memory. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev); + -+ idx = find_first_bit(kctx->csf.kcpu_queues.in_use, -+ KBASEP_MAX_KCPU_QUEUES); ++/** ++ * kbase_csf_firmware_reload_completed - The reboot of MCU firmware has ++ * completed. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev); + -+ while (idx < KBASEP_MAX_KCPU_QUEUES) { -+ struct kbase_kcpu_command_queue *queue = -+ kctx->csf.kcpu_queues.array[idx]; ++/** ++ * kbase_csf_firmware_global_reinit - Send the Global configuration requests ++ * after the reboot of MCU firmware. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @core_mask: Mask of the enabled shader cores. ++ */ ++void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, ++ u64 core_mask); + -+ seq_printf(file, "%9lu( %s ), ", idx, -+ queue->has_error ? "InErr" : "NoErr"); -+ kbasep_csf_kcpu_debugfs_print_queue(file, kctx, -+ kctx->csf.kcpu_queues.array[idx]); ++/** ++ * kbase_csf_firmware_global_reinit_complete - Check the Global configuration ++ * requests, sent after the reboot of MCU firmware, have ++ * completed or not. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Return: true if the Global configuration requests completed otherwise false. ++ */ ++bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev); + -+ idx = find_next_bit(kctx->csf.kcpu_queues.in_use, -+ KBASEP_MAX_KCPU_QUEUES, idx + 1); -+ } ++/** ++ * kbase_csf_firmware_update_core_attr - Send the Global configuration request ++ * to update the requested core attribute ++ * changes. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @update_core_pwroff_timer: If true, signal the firmware needs to update ++ * the MCU power-off timer value. ++ * @update_core_mask: If true, need to do the core_mask update with ++ * the supplied core_mask value. ++ * @core_mask: New core mask value if update_core_mask is true, ++ * otherwise unused. ++ */ ++void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, ++ bool update_core_pwroff_timer, bool update_core_mask, u64 core_mask); + -+ mutex_unlock(&kctx->csf.kcpu_queues.lock); -+ return 0; -+} ++/** ++ * kbase_csf_firmware_core_attr_updated - Check the Global configuration ++ * request has completed or not, that was sent to update ++ * the core attributes. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Return: true if the Global configuration request to update the core ++ * attributes has completed, otherwise false. ++ */ ++bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev); + -+static int kbasep_csf_kcpu_debugfs_open(struct inode *in, struct file *file) -+{ -+ return single_open(file, kbasep_csf_kcpu_debugfs_show, in->i_private); -+} ++/** ++ * kbase_csf_firmware_get_glb_iface - Request the global control block of CSF ++ * interface capabilities ++ * ++ * @kbdev: Kbase device. ++ * @group_data: Pointer where to store all the group data ++ * (sequentially). ++ * @max_group_num: The maximum number of groups to be read. ++ * Can be 0, in which case group_data is unused. ++ * @stream_data: Pointer where to store all the CS data ++ * (sequentially). ++ * @max_total_stream_num: The maximum number of CSs to be read. ++ * Can be 0, in which case stream_data is unused. ++ * @glb_version: Where to store the global interface version. ++ * @features: Where to store a bit mask of features (e.g. ++ * whether certain types of job can be suspended). ++ * @group_num: Where to store the number of CSGs ++ * supported. ++ * @prfcnt_size: Where to store the size of CSF performance counters, ++ * in bytes. Bits 31:16 hold the size of firmware ++ * performance counter data and 15:0 hold the size of ++ * hardware performance counter data. ++ * @instr_features: Instrumentation features. Bits 7:4 hold the max size ++ * of events. Bits 3:0 hold the offset update rate. ++ * (csf >= 1,1,0) ++ * ++ * Return: Total number of CSs, summed across all groups. ++ */ ++u32 kbase_csf_firmware_get_glb_iface( ++ struct kbase_device *kbdev, struct basep_cs_group_control *group_data, ++ u32 max_group_num, struct basep_cs_stream_control *stream_data, ++ u32 max_total_stream_num, u32 *glb_version, u32 *features, ++ u32 *group_num, u32 *prfcnt_size, u32 *instr_features); + -+static const struct file_operations kbasep_csf_kcpu_debugfs_fops = { -+ .open = kbasep_csf_kcpu_debugfs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; ++/** ++ * kbase_csf_firmware_get_timeline_metadata - Get CSF firmware header timeline ++ * metadata content ++ * ++ * @kbdev: Kbase device. ++ * @name: Name of the metadata which metadata content to be returned. ++ * @size: Metadata size if specified metadata found. ++ * ++ * Return: The firmware timeline metadata content which match @p name. ++ */ ++const char *kbase_csf_firmware_get_timeline_metadata(struct kbase_device *kbdev, ++ const char *name, size_t *size); + -+void kbase_csf_kcpu_debugfs_init(struct kbase_context *kctx) -+{ -+ struct dentry *file; -+ const mode_t mode = 0444; ++/** ++ * kbase_csf_firmware_mcu_shared_mapping_init - Allocate and map MCU shared memory. ++ * ++ * @kbdev: Kbase device the memory mapping shall belong to. ++ * @num_pages: Number of memory pages to map. ++ * @cpu_map_properties: Either PROT_READ or PROT_WRITE. ++ * @gpu_map_properties: Either KBASE_REG_GPU_RD or KBASE_REG_GPU_WR. ++ * @csf_mapping: Object where to write metadata for the memory mapping. ++ * ++ * This helper function allocates memory and maps it on both the CPU ++ * and the GPU address spaces. Most of the properties of the mapping ++ * are implicit and will be automatically determined by the function, ++ * e.g. whether memory is cacheable. ++ * ++ * The client is only expected to specify whether the mapping is readable ++ * or writable in the CPU and the GPU address spaces; any other flag ++ * will be ignored by the function. ++ * ++ * Return: 0 if success, or an error code on failure. ++ */ ++int kbase_csf_firmware_mcu_shared_mapping_init( ++ struct kbase_device *kbdev, ++ unsigned int num_pages, ++ unsigned long cpu_map_properties, ++ unsigned long gpu_map_properties, ++ struct kbase_csf_mapping *csf_mapping); + -+ if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) -+ return; ++/** ++ * kbase_csf_firmware_mcu_shared_mapping_term - Unmap and free MCU shared memory. ++ * ++ * @kbdev: Device pointer. ++ * @csf_mapping: Metadata of the memory mapping to terminate. ++ */ ++void kbase_csf_firmware_mcu_shared_mapping_term( ++ struct kbase_device *kbdev, struct kbase_csf_mapping *csf_mapping); + -+ file = debugfs_create_file("kcpu_queues", mode, kctx->kctx_dentry, -+ kctx, &kbasep_csf_kcpu_debugfs_fops); ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++extern bool fw_debug; ++#endif + -+ if (IS_ERR_OR_NULL(file)) { -+ dev_warn(kctx->kbdev->dev, -+ "Unable to create KCPU debugfs entry"); -+ } ++static inline long kbase_csf_timeout_in_jiffies(const unsigned int msecs) ++{ ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ return (fw_debug ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(msecs)); ++#else ++ return msecs_to_jiffies(msecs); ++#endif +} + ++/** ++ * kbase_csf_firmware_enable_gpu_idle_timer() - Activate the idle hysteresis ++ * monitoring operation ++ * ++ * @kbdev: Kbase device structure ++ * ++ * Program the firmware interface with its configured hysteresis count value ++ * and enable the firmware to act on it. The Caller is ++ * assumed to hold the kbdev->csf.scheduler.interrupt_lock. ++ */ ++void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev); + -+#else -+/* -+ * Stub functions for when debugfs is disabled ++/** ++ * kbase_csf_firmware_disable_gpu_idle_timer() - Disable the idle time ++ * hysteresis monitoring operation ++ * ++ * @kbdev: Kbase device structure ++ * ++ * Program the firmware interface to disable the idle hysteresis timer. The ++ * Caller is assumed to hold the kbdev->csf.scheduler.interrupt_lock. + */ -+void kbase_csf_kcpu_debugfs_init(struct kbase_context *kctx) -+{ -+} ++void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev); + -+#endif /* CONFIG_DEBUG_FS */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.h -new file mode 100644 -index 000000000..08f2fda03 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.h -@@ -0,0 +1,37 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* ++/** ++ * kbase_csf_firmware_get_gpu_idle_hysteresis_time - Get the firmware GPU idle ++ * detection hysteresis duration + * -+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * Return: the internally recorded hysteresis (nominal) value. ++ */ ++u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_set_gpu_idle_hysteresis_time - Set the firmware GPU idle ++ * detection hysteresis duration + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @dur: The duration value (unit: milliseconds) for the configuring ++ * hysteresis field for GPU idle detection + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * The supplied value will be recorded internally without any change. But the ++ * actual field value will be subject to hysteresis source frequency scaling ++ * and maximum value limiting. The default source will be SYSTEM_TIMESTAMP ++ * counter. But in case the platform is not able to supply it, the GPU ++ * CYCLE_COUNTER source will be used as an alternative. Bit-31 on the ++ * returned value is the source configuration flag, and it is set to '1' ++ * when CYCLE_COUNTER alternative source is used. + * ++ * Return: the actual internally configured hysteresis field value. + */ ++u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur); + -+#ifndef _KBASE_CSF_KCPU_DEBUGFS_H_ -+#define _KBASE_CSF_KCPU_DEBUGFS_H_ ++/** ++ * kbase_csf_firmware_get_mcu_core_pwroff_time - Get the MCU shader Core power-off ++ * time value ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Return: the internally recorded MCU shader Core power-off (nominal) timeout value. The unit ++ * of the value is in micro-seconds. ++ */ ++u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev); + -+/* Forward declaration */ -+struct kbase_context; ++/** ++ * kbase_csf_firmware_set_mcu_core_pwroff_time - Set the MCU shader Core power-off ++ * time value ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @dur: The duration value (unit: micro-seconds) for configuring MCU ++ * core power-off timer, when the shader cores' power ++ * transitions are delegated to the MCU (normal operational ++ * mode) ++ * ++ * The supplied value will be recorded internally without any change. But the ++ * actual field value will be subject to core power-off timer source frequency ++ * scaling and maximum value limiting. The default source will be ++ * SYSTEM_TIMESTAMP counter. But in case the platform is not able to supply it, ++ * the GPU CYCLE_COUNTER source will be used as an alternative. Bit-31 on the ++ * returned value is the source configuration flag, and it is set to '1' ++ * when CYCLE_COUNTER alternative source is used. ++ * ++ * The configured MCU shader Core power-off timer will only have effect when the host ++ * driver has delegated the shader cores' power management to MCU. ++ * ++ * Return: the actual internal core power-off timer value in register defined ++ * format. ++ */ ++u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur); + -+#define MALI_CSF_KCPU_DEBUGFS_VERSION 0 ++/** ++ * kbase_csf_interface_version - Helper function to build the full firmware ++ * interface version in a format compatible with ++ * GLB_VERSION register ++ * ++ * @major: major version of csf interface ++ * @minor: minor version of csf interface ++ * @patch: patch version of csf interface ++ * ++ * Return: firmware interface version ++ */ ++static inline u32 kbase_csf_interface_version(u32 major, u32 minor, u32 patch) ++{ ++ return ((major << GLB_VERSION_MAJOR_SHIFT) | ++ (minor << GLB_VERSION_MINOR_SHIFT) | ++ (patch << GLB_VERSION_PATCH_SHIFT)); ++} + +/** -+ * kbase_csf_kcpu_debugfs_init() - Create a debugfs entry for KCPU queues ++ * kbase_csf_trigger_firmware_config_update - Send a firmware config update. + * -+ * @kctx: The kbase_context for which to create the debugfs entry ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Any changes done to firmware configuration entry or tracebuffer entry ++ * requires a GPU silent reset to reflect the configuration changes ++ * requested, but if Firmware.header.entry.bit(30) is set then we can request a ++ * FIRMWARE_CONFIG_UPDATE rather than doing a silent reset. ++ * ++ * Return: 0 if success, or negative error code on failure. + */ -+void kbase_csf_kcpu_debugfs_init(struct kbase_context *kctx); ++int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev); + -+#endif /* _KBASE_CSF_KCPU_DEBUGFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c ++/** ++ * kbase_csf_firmware_req_core_dump - Request a firmware core dump ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Request a firmware core dump and wait for for firmware to acknowledge. ++ * Firmware will enter infinite loop after the firmware core dump is created. ++ * ++ * Return: 0 if success, or negative error code on failure. ++ */ ++int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev); ++ ++#endif +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c new file mode 100644 -index 000000000..4056a9d93 +index 000000000..c895b0801 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c -@@ -0,0 +1,817 @@ ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c +@@ -0,0 +1,354 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -157356,812 +159410,1240 @@ index 000000000..4056a9d93 + * + */ + -+#include +#include -+#include "mali_kbase_csf.h" -+#include "mali_kbase_csf_mcu_shared_reg.h" -+#include -+ -+/* Scaling factor in pre-allocating shared regions for suspend bufs and userios */ -+#define MCU_SHARED_REGS_PREALLOCATE_SCALE (8) -+ -+/* MCU shared region map attempt limit */ -+#define MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT (4) -+ -+/* Convert a VPFN to its start addr */ -+#define GET_VPFN_VA(vpfn) ((vpfn) << PAGE_SHIFT) ++#include ++#include + -+/* Macros for extract the corresponding VPFNs from a CSG_REG */ -+#define CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages) (reg->start_pfn) -+#define CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages) (reg->start_pfn + nr_susp_pages) -+#define CSG_REG_USERIO_VPFN(reg, csi, nr_susp_pages) (reg->start_pfn + 2 * (nr_susp_pages + csi)) ++#include "mali_kbase_csf_firmware_cfg.h" ++#include "mali_kbase_csf_firmware_log.h" + -+/* MCU shared segment dummy page mapping flags */ -+#define DUMMY_PAGE_MAP_FLAGS (KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT) | KBASE_REG_GPU_NX) ++#if CONFIG_SYSFS ++#define CSF_FIRMWARE_CFG_SYSFS_DIR_NAME "firmware_config" + -+/* MCU shared segment suspend buffer mapping flags */ -+#define SUSP_PAGE_MAP_FLAGS \ -+ (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR | KBASE_REG_GPU_NX | \ -+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT)) ++#define CSF_FIRMWARE_CFG_LOG_VERBOSITY_ENTRY_NAME "Log verbosity" + +/** -+ * struct kbase_csg_shared_region - Wrapper object for use with a CSG on runtime -+ * resources for suspend buffer pages, userio pages -+ * and their corresponding mapping GPU VA addresses -+ * from the MCU shared interface segment ++ * struct firmware_config - Configuration item within the MCU firmware + * -+ * @link: Link to the managing list for the wrapper object. -+ * @reg: pointer to the region allocated from the shared interface segment, which -+ * covers the normal/P-mode suspend buffers, userio pages of the queues -+ * @grp: Pointer to the bound kbase_queue_group, or NULL if no binding (free). -+ * @pmode_mapped: Boolean for indicating the region has MMU mapped with the bound group's -+ * protected mode suspend buffer pages. ++ * @node: List head linking all options to ++ * kbase_device:csf.firmware_config ++ * @kbdev: Pointer to the Kbase device ++ * @kobj: Kobject corresponding to the sysfs sub-directory, ++ * inside CSF_FIRMWARE_CFG_SYSFS_DIR_NAME directory, ++ * representing the configuration option @name. ++ * @kobj_inited: kobject initialization state ++ * @updatable: Indicates whether config items can be updated with ++ * FIRMWARE_CONFIG_UPDATE ++ * @name: NUL-terminated string naming the option ++ * @address: The address in the firmware image of the configuration option ++ * @min: The lowest legal value of the configuration option ++ * @max: The maximum legal value of the configuration option ++ * @cur_val: The current value of the configuration option ++ * ++ * The firmware may expose configuration options. Each option has a name, the ++ * address where the option is controlled and the minimum and maximum values ++ * that the option can take. + */ -+struct kbase_csg_shared_region { -+ struct list_head link; -+ struct kbase_va_region *reg; -+ struct kbase_queue_group *grp; -+ bool pmode_mapped; ++struct firmware_config { ++ struct list_head node; ++ struct kbase_device *kbdev; ++ struct kobject kobj; ++ bool kobj_inited; ++ bool updatable; ++ char *name; ++ u32 address; ++ u32 min; ++ u32 max; ++ u32 cur_val; +}; + -+static unsigned long get_userio_mmu_flags(struct kbase_device *kbdev) -+{ -+ unsigned long userio_map_flags; -+ -+ if (kbdev->system_coherency == COHERENCY_NONE) -+ userio_map_flags = -+ KBASE_REG_GPU_RD | KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); -+ else -+ userio_map_flags = KBASE_REG_GPU_RD | KBASE_REG_SHARE_BOTH | -+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); ++#define FW_CFG_ATTR(_name, _mode) \ ++ struct attribute fw_cfg_attr_##_name = { \ ++ .name = __stringify(_name), \ ++ .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \ ++ } + -+ return (userio_map_flags | KBASE_REG_GPU_NX); -+} ++static FW_CFG_ATTR(min, 0444); ++static FW_CFG_ATTR(max, 0444); ++static FW_CFG_ATTR(cur, 0644); + -+static void set_page_meta_status_not_movable(struct tagged_addr phy) ++static void fw_cfg_kobj_release(struct kobject *kobj) +{ -+ if (kbase_page_migration_enabled) { -+ struct kbase_page_metadata *page_md = kbase_page_private(as_page(phy)); ++ struct firmware_config *config = ++ container_of(kobj, struct firmware_config, kobj); + -+ if (page_md) { -+ spin_lock(&page_md->migrate_lock); -+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); -+ spin_unlock(&page_md->migrate_lock); -+ } -+ } ++ kfree(config); +} + -+static struct kbase_csg_shared_region *get_group_bound_csg_reg(struct kbase_queue_group *group) ++static ssize_t show_fw_cfg(struct kobject *kobj, ++ struct attribute *attr, char *buf) +{ -+ return (struct kbase_csg_shared_region *)group->csg_reg; -+} ++ struct firmware_config *config = ++ container_of(kobj, struct firmware_config, kobj); ++ struct kbase_device *kbdev = config->kbdev; ++ u32 val = 0; + -+static inline int update_mapping_with_dummy_pages(struct kbase_device *kbdev, u64 vpfn, -+ u32 nr_pages) -+{ -+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; -+ const unsigned long mem_flags = DUMMY_PAGE_MAP_FLAGS; ++ if (!kbdev) ++ return -ENODEV; + -+ return kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, shared_regs->dummy_phys, nr_pages, -+ mem_flags, KBASE_MEM_GROUP_CSF_FW); -+} ++ if (attr == &fw_cfg_attr_max) ++ val = config->max; ++ else if (attr == &fw_cfg_attr_min) ++ val = config->min; ++ else if (attr == &fw_cfg_attr_cur) { ++ unsigned long flags; + -+static inline int insert_dummy_pages(struct kbase_device *kbdev, u64 vpfn, u32 nr_pages) -+{ -+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; -+ const unsigned long mem_flags = DUMMY_PAGE_MAP_FLAGS; -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ val = config->cur_val; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } else { ++ dev_warn(kbdev->dev, ++ "Unexpected read from entry %s/%s", ++ config->name, attr->name); ++ return -EINVAL; ++ } + -+ return kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, -+ nr_pages, mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW, -+ mmu_sync_info, NULL, false); ++ return snprintf(buf, PAGE_SIZE, "%u\n", val); +} + -+/* Reset consecutive retry count to zero */ -+static void notify_group_csg_reg_map_done(struct kbase_queue_group *group) ++static ssize_t store_fw_cfg(struct kobject *kobj, ++ struct attribute *attr, ++ const char *buf, ++ size_t count) +{ -+ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); -+ -+ /* Just clear the internal map retry count */ -+ group->csg_reg_bind_retries = 0; -+} ++ struct firmware_config *config = ++ container_of(kobj, struct firmware_config, kobj); ++ struct kbase_device *kbdev = config->kbdev; + -+/* Return true if a fatal group error has already been triggered */ -+static bool notify_group_csg_reg_map_error(struct kbase_queue_group *group) -+{ -+ struct kbase_device *kbdev = group->kctx->kbdev; ++ if (!kbdev) ++ return -ENODEV; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ if (attr == &fw_cfg_attr_cur) { ++ unsigned long flags; ++ u32 val, cur_val; ++ int ret = kstrtouint(buf, 0, &val); + -+ if (group->csg_reg_bind_retries < U8_MAX) -+ group->csg_reg_bind_retries++; ++ if (ret) { ++ dev_err(kbdev->dev, ++ "Couldn't process %s/%s write operation.\n" ++ "Use format \n", ++ config->name, attr->name); ++ return -EINVAL; ++ } + -+ /* Allow only one fatal error notification */ -+ if (group->csg_reg_bind_retries == MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT) { -+ struct base_gpu_queue_group_error const err_payload = { -+ .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, -+ .payload = { .fatal_group = { .status = GPU_EXCEPTION_TYPE_SW_FAULT_0 } } -+ }; ++ if ((val < config->min) || (val > config->max)) ++ return -EINVAL; + -+ dev_err(kbdev->dev, "Fatal: group_%d_%d_%d exceeded shared region map retry limit", -+ group->kctx->tgid, group->kctx->id, group->handle); -+ kbase_csf_add_group_fatal_error(group, &err_payload); -+ kbase_event_wakeup(group->kctx); -+ } ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ return group->csg_reg_bind_retries >= MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT; -+} ++ cur_val = config->cur_val; ++ if (cur_val == val) { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return count; ++ } + -+/* Replace the given phys at vpfn (reflecting a queue's userio_pages) mapping. -+ * If phys is NULL, the internal dummy_phys is used, which effectively -+ * restores back to the initialized state for the given queue's userio_pages -+ * (i.e. mapped to the default dummy page). -+ * In case of CSF mmu update error on a queue, the dummy phy is used to restore -+ * back the default 'unbound' (i.e. mapped to dummy) condition. -+ * -+ * It's the caller's responsibility to ensure that the given vpfn is extracted -+ * correctly from a CSG_REG object, for example, using CSG_REG_USERIO_VPFN(). -+ */ -+static int userio_pages_replace_phys(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys) ++ /* If configuration update cannot be performed with ++ * FIRMWARE_CONFIG_UPDATE then we need to do a ++ * silent reset before we update the memory. ++ */ ++ if (!config->updatable) { ++ /* ++ * If there is already a GPU reset pending then inform ++ * the User to retry the write. ++ */ ++ if (kbase_reset_gpu_silent(kbdev)) { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, ++ flags); ++ return -EAGAIN; ++ } ++ } ++ ++ /* ++ * GPU reset request has been placed, now update the ++ * firmware image. GPU reset will take place only after ++ * hwaccess_lock is released. ++ * Update made to firmware image in memory would not ++ * be lost on GPU reset as configuration entries reside ++ * in the RONLY section of firmware image, which is not ++ * reloaded on firmware reboot due to GPU reset. ++ */ ++ kbase_csf_update_firmware_memory( ++ kbdev, config->address, val); ++ ++ config->cur_val = val; ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ /* Enable FW logging only if Log verbosity is non-zero */ ++ if (!strcmp(config->name, CSF_FIRMWARE_CFG_LOG_VERBOSITY_ENTRY_NAME) && ++ (!cur_val || !val)) { ++ ret = kbase_csf_firmware_log_toggle_logging_calls(kbdev, val); ++ if (ret) { ++ /* Undo FW configuration changes */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ config->cur_val = cur_val; ++ kbase_csf_update_firmware_memory(kbdev, config->address, cur_val); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return ret; ++ } ++ } ++ ++ /* If we can update the config without firmware reset then ++ * we need to just trigger FIRMWARE_CONFIG_UPDATE. ++ */ ++ if (config->updatable) { ++ ret = kbase_csf_trigger_firmware_config_update(kbdev); ++ if (ret) ++ return ret; ++ } ++ ++ /* Wait for the config update to take effect */ ++ if (!config->updatable) ++ kbase_reset_gpu_wait(kbdev); ++ } else { ++ dev_warn(kbdev->dev, ++ "Unexpected write to entry %s/%s", ++ config->name, attr->name); ++ return -EINVAL; ++ } ++ ++ return count; ++} ++ ++static const struct sysfs_ops fw_cfg_ops = { ++ .show = &show_fw_cfg, ++ .store = &store_fw_cfg, ++}; ++ ++static struct attribute *fw_cfg_attrs[] = { ++ &fw_cfg_attr_min, ++ &fw_cfg_attr_max, ++ &fw_cfg_attr_cur, ++ NULL, ++}; ++#if (KERNEL_VERSION(5, 2, 0) <= LINUX_VERSION_CODE) ++ATTRIBUTE_GROUPS(fw_cfg); ++#endif ++ ++static struct kobj_type fw_cfg_kobj_type = { ++ .release = &fw_cfg_kobj_release, ++ .sysfs_ops = &fw_cfg_ops, ++#if (KERNEL_VERSION(5, 2, 0) <= LINUX_VERSION_CODE) ++ .default_groups = fw_cfg_groups, ++#else ++ .default_attrs = fw_cfg_attrs, ++#endif ++}; ++ ++int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev) +{ -+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; -+ int err = 0, err1; ++ struct firmware_config *config; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ kbdev->csf.fw_cfg_kobj = kobject_create_and_add( ++ CSF_FIRMWARE_CFG_SYSFS_DIR_NAME, &kbdev->dev->kobj); ++ if (!kbdev->csf.fw_cfg_kobj) { ++ kobject_put(kbdev->csf.fw_cfg_kobj); ++ dev_err(kbdev->dev, ++ "Creation of %s sysfs sub-directory failed\n", ++ CSF_FIRMWARE_CFG_SYSFS_DIR_NAME); ++ return -ENOMEM; ++ } + -+ if (phys) { -+ unsigned long mem_flags_input = shared_regs->userio_mem_rd_flags; -+ unsigned long mem_flags_output = mem_flags_input | KBASE_REG_GPU_WR; ++ list_for_each_entry(config, &kbdev->csf.firmware_config, node) { ++ int err; + -+ /* Dealing with a queue's INPUT page */ -+ err = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, &phys[0], 1, mem_flags_input, -+ KBASE_MEM_GROUP_CSF_IO); -+ /* Dealing with a queue's OUTPUT page */ -+ err1 = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn + 1, &phys[1], 1, -+ mem_flags_output, KBASE_MEM_GROUP_CSF_IO); -+ if (unlikely(err1)) -+ err = err1; ++ kbase_csf_read_firmware_memory(kbdev, config->address, ++ &config->cur_val); ++ ++ err = kobject_init_and_add(&config->kobj, &fw_cfg_kobj_type, ++ kbdev->csf.fw_cfg_kobj, "%s", config->name); ++ if (err) { ++ kobject_put(&config->kobj); ++ dev_err(kbdev->dev, ++ "Creation of %s sysfs sub-directory failed\n", ++ config->name); ++ return err; ++ } ++ ++ config->kobj_inited = true; + } + -+ if (unlikely(err) || !phys) { -+ /* Restore back to dummy_userio_phy */ -+ update_mapping_with_dummy_pages(kbdev, vpfn, KBASEP_NUM_CS_USER_IO_PAGES); ++ return 0; ++} ++ ++void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev) ++{ ++ while (!list_empty(&kbdev->csf.firmware_config)) { ++ struct firmware_config *config; ++ ++ config = list_first_entry(&kbdev->csf.firmware_config, ++ struct firmware_config, node); ++ list_del(&config->node); ++ ++ if (config->kobj_inited) { ++ kobject_del(&config->kobj); ++ kobject_put(&config->kobj); ++ } else ++ kfree(config); + } + -+ return err; ++ kobject_del(kbdev->csf.fw_cfg_kobj); ++ kobject_put(kbdev->csf.fw_cfg_kobj); +} + -+/* Update a group's queues' mappings for a group with its runtime bound group region */ -+static int csg_reg_update_on_csis(struct kbase_device *kbdev, struct kbase_queue_group *group, -+ struct kbase_queue_group *prev_grp) ++int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, ++ const struct kbase_csf_mcu_fw *const fw, ++ const u32 *entry, unsigned int size, bool updatable) +{ -+ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); -+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); -+ const u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num; -+ struct tagged_addr *phy; -+ int err = 0, err1; -+ u32 i; ++ const char *name = (char *)&entry[3]; ++ struct firmware_config *config; ++ const unsigned int name_len = size - CONFIGURATION_ENTRY_NAME_OFFSET; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ /* Allocate enough space for struct firmware_config and the ++ * configuration option name (with NULL termination) ++ */ ++ config = kzalloc(sizeof(*config) + name_len + 1, GFP_KERNEL); + -+ if (WARN_ONCE(!csg_reg, "Update_userio pages: group has no bound csg_reg")) -+ return -EINVAL; ++ if (!config) ++ return -ENOMEM; + -+ for (i = 0; i < nr_csis; i++) { -+ struct kbase_queue *queue = group->bound_queues[i]; -+ struct kbase_queue *prev_queue = prev_grp ? prev_grp->bound_queues[i] : NULL; ++ config->kbdev = kbdev; ++ config->updatable = updatable; ++ config->name = (char *)(config+1); ++ config->address = entry[0]; ++ config->min = entry[1]; ++ config->max = entry[2]; + -+ /* Set the phy if the group's queue[i] needs mapping, otherwise NULL */ -+ phy = (queue && queue->enabled && !queue->user_io_gpu_va) ? queue->phys : NULL; ++ memcpy(config->name, name, name_len); ++ config->name[name_len] = 0; + -+ /* Either phy is valid, or this update is for a transition change from -+ * prev_group, and the prev_queue was mapped, so an update is required. -+ */ -+ if (phy || (prev_queue && prev_queue->user_io_gpu_va)) { -+ u64 vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, i, nr_susp_pages); ++ list_add(&config->node, &kbdev->csf.firmware_config); + -+ err1 = userio_pages_replace_phys(kbdev, vpfn, phy); ++ dev_dbg(kbdev->dev, "Configuration option '%s' at 0x%x range %u-%u", ++ config->name, config->address, ++ config->min, config->max); + -+ if (unlikely(err1)) { -+ dev_warn(kbdev->dev, -+ "%s: Error in update queue-%d mapping for csg_%d_%d_%d", -+ __func__, i, group->kctx->tgid, group->kctx->id, -+ group->handle); -+ err = err1; -+ } else if (phy) -+ queue->user_io_gpu_va = GET_VPFN_VA(vpfn); ++ return 0; ++} ++#else ++int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev) ++{ ++ return 0; ++} + -+ /* Mark prev_group's queue has lost its mapping */ -+ if (prev_queue) -+ prev_queue->user_io_gpu_va = 0; -+ } -+ } ++void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev) ++{ ++ /* !CONFIG_SYSFS: Nothing to do here */ ++} + -+ return err; ++int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, ++ const struct kbase_csf_mcu_fw *const fw, ++ const u32 *entry, unsigned int size) ++{ ++ return 0; +} ++#endif /* CONFIG_SYSFS */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.h +new file mode 100644 +index 000000000..b227cf158 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.h +@@ -0,0 +1,72 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+/* Bind a group to a given csg_reg, any previous mappings with the csg_reg are replaced -+ * with the given group's phy pages, or, if no replacement, the default dummy pages. -+ * Note, the csg_reg's fields are in transition step-by-step from the prev_grp to its -+ * new binding owner in this function. At the end, the prev_grp would be completely -+ * detached away from the previously bound csg_reg. ++#ifndef _KBASE_CSF_FIRMWARE_CFG_H_ ++#define _KBASE_CSF_FIRMWARE_CFG_H_ ++ ++#include ++#include "mali_kbase_csf_firmware.h" ++#include ++ ++#define CONFIGURATION_ENTRY_NAME_OFFSET (0xC) ++ ++/** ++ * kbase_csf_firmware_cfg_init - Create the sysfs directory for configuration ++ * options present in firmware image. ++ * ++ * @kbdev: Pointer to the Kbase device ++ * ++ * This function would create a sysfs directory and populate it with a ++ * sub-directory, that would contain a file per attribute, for every ++ * configuration option parsed from firmware image. ++ * ++ * Return: The initialization error code. + */ -+static int group_bind_csg_reg(struct kbase_device *kbdev, struct kbase_queue_group *group, -+ struct kbase_csg_shared_region *csg_reg) -+{ -+ const unsigned long mem_flags = SUSP_PAGE_MAP_FLAGS; -+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); -+ struct kbase_queue_group *prev_grp = csg_reg->grp; -+ struct kbase_va_region *reg = csg_reg->reg; -+ struct tagged_addr *phy; -+ int err = 0, err1; ++int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev); + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++/** ++ * kbase_csf_firmware_cfg_term - Delete the sysfs directory that was created ++ * for firmware configuration options. ++ * ++ * @kbdev: Pointer to the Kbase device ++ * ++ */ ++void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev); + -+ /* The csg_reg is expected still on the unused list so its link is not empty */ -+ if (WARN_ON_ONCE(list_empty(&csg_reg->link))) { -+ dev_dbg(kbdev->dev, "csg_reg is marked in active use"); -+ return -EINVAL; -+ } ++/** ++ * kbase_csf_firmware_cfg_option_entry_parse() - Process a ++ * "configuration option" section. ++ * ++ * @kbdev: Kbase device structure ++ * @fw: Firmware image containing the section ++ * @entry: Pointer to the section ++ * @size: Size (in bytes) of the section ++ * @updatable: Indicates if entry can be updated with FIRMWARE_CONFIG_UPDATE ++ * ++ * Read a "configuration option" section adding it to the ++ * kbase_device:csf.firmware_config list. ++ * ++ * Return: 0 if successful, negative error code on failure ++ */ ++int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, ++ const struct kbase_csf_mcu_fw *const fw, ++ const u32 *entry, unsigned int size, bool updatable); ++#endif /* _KBASE_CSF_FIRMWARE_CFG_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c +new file mode 100644 +index 000000000..f0a10d197 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c +@@ -0,0 +1,807 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ if (WARN_ON_ONCE(prev_grp && prev_grp->csg_reg != csg_reg)) { -+ dev_dbg(kbdev->dev, "Unexpected bound lost on prev_group"); -+ prev_grp->csg_reg = NULL; -+ return -EINVAL; -+ } ++#include ++#include ++#include ++#include ++#include ++#include + -+ /* Replacing the csg_reg bound group to the newly given one */ -+ csg_reg->grp = group; -+ group->csg_reg = csg_reg; ++#include "mali_kbase.h" ++#include "mali_kbase_csf_firmware_core_dump.h" ++#include "backend/gpu/mali_kbase_pm_internal.h" + -+ /* Resolving mappings, deal with protected mode first */ -+ if (group->protected_suspend_buf.pma) { -+ /* We are binding a new group with P-mode ready, the prev_grp's P-mode mapping -+ * status is now stale during this transition of ownership. For the new owner, -+ * its mapping would have been updated away when it lost its binding previously. -+ * So it needs an update to this pma map. By clearing here the mapped flag -+ * ensures it reflects the new owner's condition. -+ */ -+ csg_reg->pmode_mapped = false; -+ err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group); -+ } else if (csg_reg->pmode_mapped) { -+ /* Need to unmap the previous one, use the dummy pages */ -+ err = update_mapping_with_dummy_pages( -+ kbdev, CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages); ++/* Page size in bytes in use by MCU. */ ++#define FW_PAGE_SIZE 4096 + -+ if (unlikely(err)) -+ dev_warn(kbdev->dev, "%s: Failed to update P-mode dummy for csg_%d_%d_%d", -+ __func__, group->kctx->tgid, group->kctx->id, group->handle); ++/* ++ * FW image header core dump data format supported. ++ * Currently only version 0.1 is supported. ++ */ ++#define FW_CORE_DUMP_DATA_VERSION_MAJOR 0 ++#define FW_CORE_DUMP_DATA_VERSION_MINOR 1 + -+ csg_reg->pmode_mapped = false; -+ } ++/* Full version of the image header core dump data format */ ++#define FW_CORE_DUMP_DATA_VERSION \ ++ ((FW_CORE_DUMP_DATA_VERSION_MAJOR << 8) | FW_CORE_DUMP_DATA_VERSION_MINOR) + -+ /* Unlike the normal suspend buf, the mapping of the protected mode suspend buffer is -+ * actually reflected by a specific mapped flag (due to phys[] is only allocated on -+ * in-need basis). So the GPU_VA is always updated to the bound region's corresponding -+ * VA, as a reflection of the binding to the csg_reg. -+ */ -+ group->protected_suspend_buf.gpu_va = -+ GET_VPFN_VA(CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages)); ++/* Validity flag to indicate if the MCU registers in the buffer are valid */ ++#define FW_MCU_STATUS_MASK 0x1 ++#define FW_MCU_STATUS_VALID (1 << 0) + -+ /* Deal with normal mode suspend buffer */ -+ phy = group->normal_suspend_buf.phy; -+ err1 = kbase_mmu_update_csf_mcu_pages(kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), phy, -+ nr_susp_pages, mem_flags, KBASE_MEM_GROUP_CSF_FW); ++/* Core dump entry fields */ ++#define FW_CORE_DUMP_VERSION_INDEX 0 ++#define FW_CORE_DUMP_START_ADDR_INDEX 1 + -+ if (unlikely(err1)) { -+ dev_warn(kbdev->dev, "%s: Failed to update suspend buffer for csg_%d_%d_%d", -+ __func__, group->kctx->tgid, group->kctx->id, group->handle); ++/* MCU registers stored by a firmware core dump */ ++struct fw_core_dump_mcu { ++ u32 r0; ++ u32 r1; ++ u32 r2; ++ u32 r3; ++ u32 r4; ++ u32 r5; ++ u32 r6; ++ u32 r7; ++ u32 r8; ++ u32 r9; ++ u32 r10; ++ u32 r11; ++ u32 r12; ++ u32 sp; ++ u32 lr; ++ u32 pc; ++}; + -+ /* Attempt a restore to default dummy for removing previous mapping */ -+ if (prev_grp) -+ update_mapping_with_dummy_pages( -+ kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages); -+ err = err1; -+ /* Marking the normal suspend buffer is not mapped (due to error) */ -+ group->normal_suspend_buf.gpu_va = 0; -+ } else { -+ /* Marking the normal suspend buffer is actually mapped */ -+ group->normal_suspend_buf.gpu_va = -+ GET_VPFN_VA(CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages)); -+ } ++/* Any ELF definitions used in this file are from elf.h/elfcore.h except ++ * when specific 32-bit versions are required (mainly for the ++ * ELF_PRSTATUS32 note that is used to contain the MCU registers). ++ */ + -+ /* Deal with queue uerio_pages */ -+ err1 = csg_reg_update_on_csis(kbdev, group, prev_grp); -+ if (likely(!err1)) -+ err = err1; ++/* - 32-bit version of timeval structures used in ELF32 PRSTATUS note. */ ++struct prstatus32_timeval { ++ int tv_sec; ++ int tv_usec; ++}; + -+ /* Reset the previous group's suspend buffers' GPU_VAs as it has lost its bound */ -+ if (prev_grp) { -+ prev_grp->normal_suspend_buf.gpu_va = 0; -+ prev_grp->protected_suspend_buf.gpu_va = 0; -+ prev_grp->csg_reg = NULL; -+ } ++/* - Structure defining ELF32 PRSTATUS note contents, as defined by the ++ * GNU binutils BFD library used by GDB, in bfd/hosts/x86-64linux.h. ++ * Note: GDB checks for the size of this structure to be 0x94. ++ * Modified pr_reg (array containing the Arm 32-bit MCU registers) to ++ * use u32[18] instead of elf_gregset32_t to prevent introducing new typedefs. ++ */ ++struct elf_prstatus32 { ++ struct elf_siginfo pr_info; /* Info associated with signal. */ ++ short int pr_cursig; /* Current signal. */ ++ unsigned int pr_sigpend; /* Set of pending signals. */ ++ unsigned int pr_sighold; /* Set of held signals. */ ++ pid_t pr_pid; ++ pid_t pr_ppid; ++ pid_t pr_pgrp; ++ pid_t pr_sid; ++ struct prstatus32_timeval pr_utime; /* User time. */ ++ struct prstatus32_timeval pr_stime; /* System time. */ ++ struct prstatus32_timeval pr_cutime; /* Cumulative user time. */ ++ struct prstatus32_timeval pr_cstime; /* Cumulative system time. */ ++ u32 pr_reg[18]; /* GP registers. */ ++ int pr_fpvalid; /* True if math copro being used. */ ++}; + -+ return err; -+} ++/** ++ * struct fw_core_dump_data - Context for seq_file operations used on 'fw_core_dump' ++ * debugfs file. ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++struct fw_core_dump_data { ++ struct kbase_device *kbdev; ++}; + -+/* Notify the group is placed on-slot, hence the bound csg_reg is active in use */ -+void kbase_csf_mcu_shared_set_group_csg_reg_active(struct kbase_device *kbdev, -+ struct kbase_queue_group *group) ++/* ++ * struct fw_core_dump_seq_off - Iterator for seq_file operations used on 'fw_core_dump' ++ * debugfs file. ++ * @interface: current firmware memory interface ++ * @page_num: current page number (0..) within @interface ++ */ ++struct fw_core_dump_seq_off { ++ struct kbase_csf_firmware_interface *interface; ++ u32 page_num; ++}; ++ ++/** ++ * fw_get_core_dump_mcu - Get the MCU registers saved by a firmware core dump ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @regs: Pointer to a core dump mcu struct where the MCU registers are copied ++ * to. Should be allocated by the called. ++ * ++ * Return: 0 if successfully copied the MCU registers, negative error code otherwise. ++ */ ++static int fw_get_core_dump_mcu(struct kbase_device *kbdev, struct fw_core_dump_mcu *regs) +{ -+ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); ++ unsigned int i; ++ u32 status = 0; ++ u32 data_addr = kbdev->csf.fw_core_dump.mcu_regs_addr; ++ u32 *data = (u32 *)regs; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ /* Check if the core dump entry exposed the buffer */ ++ if (!regs || !kbdev->csf.fw_core_dump.available) ++ return -EPERM; + -+ if (WARN_ONCE(!csg_reg || csg_reg->grp != group, "Group_%d_%d_%d has no csg_reg bounding", -+ group->kctx->tgid, group->kctx->id, group->handle)) -+ return; ++ /* Check if the data in the buffer is valid, if not, return error */ ++ kbase_csf_read_firmware_memory(kbdev, data_addr, &status); ++ if ((status & FW_MCU_STATUS_MASK) != FW_MCU_STATUS_VALID) ++ return -EPERM; + -+ /* By dropping out the csg_reg from the unused list, it becomes active and is tracked -+ * by its bound group that is on-slot. The design is that, when this on-slot group is -+ * moved to off-slot, the scheduler slot-clean up will add it back to the tail of the -+ * unused list. ++ /* According to image header documentation, the MCU registers core dump ++ * buffer is 32-bit aligned. + */ -+ if (!WARN_ON_ONCE(list_empty(&csg_reg->link))) -+ list_del_init(&csg_reg->link); ++ for (i = 1; i <= sizeof(struct fw_core_dump_mcu) / sizeof(u32); ++i) ++ kbase_csf_read_firmware_memory(kbdev, data_addr + i * sizeof(u32), &data[i - 1]); ++ ++ return 0; +} + -+/* Notify the group is placed off-slot, hence the bound csg_reg is not in active use -+ * anymore. Existing bounding/mappings are left untouched. These would only be dealt with -+ * if the bound csg_reg is to be reused with another group. ++/** ++ * fw_core_dump_fill_elf_header - Initializes an ELF32 header ++ * @hdr: ELF32 header to initialize ++ * @sections: Number of entries in the ELF program header table ++ * ++ * Initializes an ELF32 header for an ARM 32-bit little-endian ++ * 'Core file' object file. + */ -+void kbase_csf_mcu_shared_set_group_csg_reg_unused(struct kbase_device *kbdev, -+ struct kbase_queue_group *group) ++static void fw_core_dump_fill_elf_header(struct elf32_hdr *hdr, unsigned int sections) +{ -+ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); -+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; -+ -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ /* Reset all members in header. */ ++ memset(hdr, 0, sizeof(*hdr)); + -+ if (WARN_ONCE(!csg_reg || csg_reg->grp != group, "Group_%d_%d_%d has no csg_reg bound", -+ group->kctx->tgid, group->kctx->id, group->handle)) -+ return; ++ /* Magic number identifying file as an ELF object. */ ++ memcpy(hdr->e_ident, ELFMAG, SELFMAG); + -+ /* By adding back the csg_reg to the unused list, it becomes available for another -+ * group to break its existing binding and set up a new one. ++ /* Identify file as 32-bit, little-endian, using current ++ * ELF header version, with no OS or ABI specific ELF ++ * extensions used. + */ -+ if (!list_empty(&csg_reg->link)) { -+ WARN_ONCE(group->csg_nr >= 0, "Group is assumed vacated from slot"); -+ list_move_tail(&csg_reg->link, &shared_regs->unused_csg_regs); -+ } else -+ list_add_tail(&csg_reg->link, &shared_regs->unused_csg_regs); -+} ++ hdr->e_ident[EI_CLASS] = ELFCLASS32; ++ hdr->e_ident[EI_DATA] = ELFDATA2LSB; ++ hdr->e_ident[EI_VERSION] = EV_CURRENT; ++ hdr->e_ident[EI_OSABI] = ELFOSABI_NONE; + -+/* Adding a new queue to an existing on-slot group */ -+int kbase_csf_mcu_shared_add_queue(struct kbase_device *kbdev, struct kbase_queue *queue) -+{ -+ struct kbase_queue_group *group = queue->group; -+ struct kbase_csg_shared_region *csg_reg; -+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); -+ u64 vpfn; -+ int err; ++ /* 'Core file' type of object file. */ ++ hdr->e_type = ET_CORE; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ /* ARM 32-bit architecture (AARCH32) */ ++ hdr->e_machine = EM_ARM; + -+ if (WARN_ONCE(!group || group->csg_nr < 0, "No bound group, or group is not on-slot")) -+ return -EIO; ++ /* Object file version: the original format. */ ++ hdr->e_version = EV_CURRENT; + -+ csg_reg = get_group_bound_csg_reg(group); -+ if (WARN_ONCE(!csg_reg || !list_empty(&csg_reg->link), -+ "No bound csg_reg, or in wrong state")) -+ return -EIO; ++ /* Offset of program header table in file. */ ++ hdr->e_phoff = sizeof(struct elf32_hdr); + -+ vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, queue->csi_index, nr_susp_pages); -+ err = userio_pages_replace_phys(kbdev, vpfn, queue->phys); -+ if (likely(!err)) { -+ /* Mark the queue has been successfully mapped */ -+ queue->user_io_gpu_va = GET_VPFN_VA(vpfn); -+ } else { -+ /* Mark the queue has no mapping on its phys[] */ -+ queue->user_io_gpu_va = 0; -+ dev_dbg(kbdev->dev, -+ "%s: Error in mapping userio pages for queue-%d of csg_%d_%d_%d", __func__, -+ queue->csi_index, group->kctx->tgid, group->kctx->id, group->handle); ++ /* No processor specific flags. */ ++ hdr->e_flags = 0; + -+ /* notify the error for the bound group */ -+ if (notify_group_csg_reg_map_error(group)) -+ err = -EIO; -+ } ++ /* Size of the ELF header in bytes. */ ++ hdr->e_ehsize = sizeof(struct elf32_hdr); + -+ return err; ++ /* Size of the ELF program header entry in bytes. */ ++ hdr->e_phentsize = sizeof(struct elf32_phdr); ++ ++ /* Number of entries in the program header table. */ ++ hdr->e_phnum = sections; +} + -+/* Unmap a given queue's userio pages, when the queue is deleted */ -+void kbase_csf_mcu_shared_drop_stopped_queue(struct kbase_device *kbdev, struct kbase_queue *queue) ++/** ++ * fw_core_dump_fill_elf_program_header_note - Initializes an ELF32 program header ++ * for holding auxiliary information ++ * @phdr: ELF32 program header ++ * @file_offset: Location of the note in the file in bytes ++ * @size: Size of the note in bytes. ++ * ++ * Initializes an ELF32 program header describing auxiliary information (containing ++ * one or more notes) of @size bytes alltogether located in the file at offset ++ * @file_offset. ++ */ ++static void fw_core_dump_fill_elf_program_header_note(struct elf32_phdr *phdr, u32 file_offset, ++ u32 size) +{ -+ struct kbase_queue_group *group; -+ struct kbase_csg_shared_region *csg_reg; -+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); -+ u64 vpfn; ++ /* Auxiliary information (note) in program header. */ ++ phdr->p_type = PT_NOTE; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ /* Location of first note in file in bytes. */ ++ phdr->p_offset = file_offset; + -+ /* The queue has no existing mapping, nothing to do */ -+ if (!queue || !queue->user_io_gpu_va) -+ return; ++ /* Size of all notes combined in bytes. */ ++ phdr->p_filesz = size; + -+ group = queue->group; -+ if (WARN_ONCE(!group || !group->csg_reg, "Queue/Group has no bound region")) -+ return; ++ /* Other members not relevant for a note. */ ++ phdr->p_vaddr = 0; ++ phdr->p_paddr = 0; ++ phdr->p_memsz = 0; ++ phdr->p_align = 0; ++ phdr->p_flags = 0; ++} + -+ csg_reg = get_group_bound_csg_reg(group); ++/** ++ * fw_core_dump_fill_elf_program_header - Initializes an ELF32 program header for a loadable segment ++ * @phdr: ELF32 program header to initialize. ++ * @file_offset: Location of loadable segment in file in bytes ++ * (aligned to FW_PAGE_SIZE bytes) ++ * @vaddr: 32-bit virtual address where to write the segment ++ * (aligned to FW_PAGE_SIZE bytes) ++ * @size: Size of the segment in bytes. ++ * @flags: CSF_FIRMWARE_ENTRY_* flags describing access permissions. ++ * ++ * Initializes an ELF32 program header describing a loadable segment of ++ * @size bytes located in the file at offset @file_offset to be loaded ++ * at virtual address @vaddr with access permissions as described by ++ * CSF_FIRMWARE_ENTRY_* flags in @flags. ++ */ ++static void fw_core_dump_fill_elf_program_header(struct elf32_phdr *phdr, u32 file_offset, ++ u32 vaddr, u32 size, u32 flags) ++{ ++ /* Loadable segment in program header. */ ++ phdr->p_type = PT_LOAD; + -+ vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, queue->csi_index, nr_susp_pages); ++ /* Location of segment in file in bytes. Aligned to p_align bytes. */ ++ phdr->p_offset = file_offset; + -+ WARN_ONCE(userio_pages_replace_phys(kbdev, vpfn, NULL), -+ "Unexpected restoring to dummy map update error"); -+ queue->user_io_gpu_va = 0; ++ /* Virtual address of segment. Aligned to p_align bytes. */ ++ phdr->p_vaddr = vaddr; ++ ++ /* Physical address of segment. Not relevant. */ ++ phdr->p_paddr = 0; ++ ++ /* Size of segment in file and memory. */ ++ phdr->p_filesz = size; ++ phdr->p_memsz = size; ++ ++ /* Alignment of segment in the file and memory in bytes (integral power of 2). */ ++ phdr->p_align = FW_PAGE_SIZE; ++ ++ /* Set segment access permissions. */ ++ phdr->p_flags = 0; ++ if (flags & CSF_FIRMWARE_ENTRY_READ) ++ phdr->p_flags |= PF_R; ++ if (flags & CSF_FIRMWARE_ENTRY_WRITE) ++ phdr->p_flags |= PF_W; ++ if (flags & CSF_FIRMWARE_ENTRY_EXECUTE) ++ phdr->p_flags |= PF_X; +} + -+int kbase_csf_mcu_shared_group_update_pmode_map(struct kbase_device *kbdev, -+ struct kbase_queue_group *group) ++/** ++ * fw_core_dump_get_prstatus_note_size - Calculates size of a ELF32 PRSTATUS note ++ * @name: Name given to the PRSTATUS note. ++ * ++ * Calculates the size of a 32-bit PRSTATUS note (which contains information ++ * about a process like the current MCU registers) taking into account ++ * @name must be padded to a 4-byte multiple. ++ * ++ * Return: size of 32-bit PRSTATUS note in bytes. ++ */ ++static unsigned int fw_core_dump_get_prstatus_note_size(char *name) +{ -+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; -+ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); -+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); -+ int err = 0, err1; ++ return sizeof(struct elf32_note) + roundup(strlen(name) + 1, 4) + ++ sizeof(struct elf_prstatus32); ++} + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++/** ++ * fw_core_dump_fill_elf_prstatus - Initializes an ELF32 PRSTATUS structure ++ * @prs: ELF32 PRSTATUS note to initialize ++ * @regs: MCU registers to copy into the PRSTATUS note ++ * ++ * Initializes an ELF32 PRSTATUS structure with MCU registers @regs. ++ * Other process information is N/A for CSF Firmware. ++ */ ++static void fw_core_dump_fill_elf_prstatus(struct elf_prstatus32 *prs, ++ struct fw_core_dump_mcu *regs) ++{ ++ /* Only fill in registers (32-bit) of PRSTATUS note. */ ++ memset(prs, 0, sizeof(*prs)); ++ prs->pr_reg[0] = regs->r0; ++ prs->pr_reg[1] = regs->r1; ++ prs->pr_reg[2] = regs->r2; ++ prs->pr_reg[3] = regs->r3; ++ prs->pr_reg[4] = regs->r4; ++ prs->pr_reg[5] = regs->r5; ++ prs->pr_reg[6] = regs->r0; ++ prs->pr_reg[7] = regs->r7; ++ prs->pr_reg[8] = regs->r8; ++ prs->pr_reg[9] = regs->r9; ++ prs->pr_reg[10] = regs->r10; ++ prs->pr_reg[11] = regs->r11; ++ prs->pr_reg[12] = regs->r12; ++ prs->pr_reg[13] = regs->sp; ++ prs->pr_reg[14] = regs->lr; ++ prs->pr_reg[15] = regs->pc; ++} + -+ if (WARN_ONCE(!csg_reg, "Update_pmode_map: the bound csg_reg can't be NULL")) -+ return -EINVAL; ++/** ++ * fw_core_dump_create_prstatus_note - Creates an ELF32 PRSTATUS note ++ * @name: Name for the PRSTATUS note ++ * @prs: ELF32 PRSTATUS structure to put in the PRSTATUS note ++ * @created_prstatus_note: ++ * Pointer to the allocated ELF32 PRSTATUS note ++ * ++ * Creates an ELF32 note with one PRSTATUS entry containing the ++ * ELF32 PRSTATUS structure @prs. Caller needs to free the created note in ++ * @created_prstatus_note. ++ * ++ * Return: 0 on failure, otherwise size of ELF32 PRSTATUS note in bytes. ++ */ ++static unsigned int fw_core_dump_create_prstatus_note(char *name, struct elf_prstatus32 *prs, ++ struct elf32_note **created_prstatus_note) ++{ ++ struct elf32_note *note; ++ unsigned int note_name_sz; ++ unsigned int note_sz; + -+ /* If the pmode already mapped, nothing to do */ -+ if (csg_reg->pmode_mapped) ++ /* Allocate memory for ELF32 note containing a PRSTATUS note. */ ++ note_name_sz = strlen(name) + 1; ++ note_sz = sizeof(struct elf32_note) + roundup(note_name_sz, 4) + ++ sizeof(struct elf_prstatus32); ++ note = kmalloc(note_sz, GFP_KERNEL); ++ if (!note) + return 0; + -+ /* P-mode map not in place and the group has allocated P-mode pages, map it */ -+ if (group->protected_suspend_buf.pma) { -+ unsigned long mem_flags = SUSP_PAGE_MAP_FLAGS; -+ struct tagged_addr *phy = shared_regs->pma_phys; -+ struct kbase_va_region *reg = csg_reg->reg; -+ u64 vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); -+ u32 i; -+ -+ /* Populate the protected phys from pma to phy[] */ -+ for (i = 0; i < nr_susp_pages; i++) -+ phy[i] = as_tagged(group->protected_suspend_buf.pma[i]->pa); ++ /* Fill in ELF32 note with one entry for a PRSTATUS note. */ ++ note->n_namesz = note_name_sz; ++ note->n_descsz = sizeof(struct elf_prstatus32); ++ note->n_type = NT_PRSTATUS; ++ memcpy(note + 1, name, note_name_sz); ++ memcpy((char *)(note + 1) + roundup(note_name_sz, 4), prs, sizeof(*prs)); + -+ /* Add the P-mode suspend buffer mapping */ -+ err = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, phy, nr_susp_pages, mem_flags, -+ KBASE_MEM_GROUP_CSF_FW); ++ /* Return pointer and size of the created ELF32 note. */ ++ *created_prstatus_note = note; ++ return note_sz; ++} + -+ /* If error, restore to default dummpy */ -+ if (unlikely(err)) { -+ err1 = update_mapping_with_dummy_pages(kbdev, vpfn, nr_susp_pages); -+ if (unlikely(err1)) -+ dev_warn( -+ kbdev->dev, -+ "%s: Failed in recovering to P-mode dummy for csg_%d_%d_%d", -+ __func__, group->kctx->tgid, group->kctx->id, -+ group->handle); ++/** ++ * fw_core_dump_write_elf_header - Writes ELF header for the FW core dump ++ * @m: the seq_file handle ++ * ++ * Writes the ELF header of the core dump including program headers for ++ * memory sections and a note containing the current MCU register ++ * values. ++ * ++ * Excludes memory sections without read access permissions or ++ * are for protected memory. ++ * ++ * The data written is as follows: ++ * - ELF header ++ * - ELF PHDRs for memory sections ++ * - ELF PHDR for program header NOTE ++ * - ELF PRSTATUS note ++ * - 0-bytes padding to multiple of ELF_EXEC_PAGESIZE ++ * ++ * The actual memory section dumps should follow this (not written ++ * by this function). ++ * ++ * Retrieves the necessary information via the struct ++ * fw_core_dump_data stored in the private member of the seq_file ++ * handle. ++ * ++ * Return: ++ * * 0 - success ++ * * -ENOMEM - not enough memory for allocating ELF32 note ++ */ ++static int fw_core_dump_write_elf_header(struct seq_file *m) ++{ ++ struct elf32_hdr hdr; ++ struct elf32_phdr phdr; ++ struct fw_core_dump_data *dump_data = m->private; ++ struct kbase_device *const kbdev = dump_data->kbdev; ++ struct kbase_csf_firmware_interface *interface; ++ struct elf_prstatus32 elf_prs; ++ struct elf32_note *elf_prstatus_note; ++ unsigned int sections = 0; ++ unsigned int elf_prstatus_note_size; ++ u32 elf_prstatus_offset; ++ u32 elf_phdr_note_offset; ++ u32 elf_memory_sections_data_offset; ++ u32 total_pages = 0; ++ u32 padding_size, *padding; ++ struct fw_core_dump_mcu regs = { 0 }; + -+ csg_reg->pmode_mapped = false; -+ } else -+ csg_reg->pmode_mapped = true; ++ /* Count number of memory sections. */ ++ list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { ++ /* Skip memory sections that cannot be read or are protected. */ ++ if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) || ++ (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0) ++ continue; ++ sections++; + } + -+ return err; -+} ++ /* Prepare ELF header. */ ++ fw_core_dump_fill_elf_header(&hdr, sections + 1); ++ seq_write(m, &hdr, sizeof(struct elf32_hdr)); + -+void kbase_csf_mcu_shared_clear_evicted_group_csg_reg(struct kbase_device *kbdev, -+ struct kbase_queue_group *group) -+{ -+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; -+ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); -+ struct kbase_va_region *reg; -+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); -+ u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num; -+ int err = 0; -+ u32 i; ++ elf_prstatus_note_size = fw_core_dump_get_prstatus_note_size("CORE"); ++ /* PHDRs of PT_LOAD type. */ ++ elf_phdr_note_offset = sizeof(struct elf32_hdr) + sections * sizeof(struct elf32_phdr); ++ /* PHDR of PT_NOTE type. */ ++ elf_prstatus_offset = elf_phdr_note_offset + sizeof(struct elf32_phdr); ++ elf_memory_sections_data_offset = elf_prstatus_offset + elf_prstatus_note_size; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ /* Calculate padding size to page offset. */ ++ padding_size = roundup(elf_memory_sections_data_offset, ELF_EXEC_PAGESIZE) - ++ elf_memory_sections_data_offset; ++ elf_memory_sections_data_offset += padding_size; + -+ /* Nothing to do for clearing up if no bound csg_reg */ -+ if (!csg_reg) -+ return; ++ /* Prepare ELF program header table. */ ++ list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { ++ /* Skip memory sections that cannot be read or are protected. */ ++ if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) || ++ (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0) ++ continue; + -+ reg = csg_reg->reg; -+ /* Restore mappings default dummy pages for any mapped pages */ -+ if (csg_reg->pmode_mapped) { -+ err = update_mapping_with_dummy_pages( -+ kbdev, CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages); -+ WARN_ONCE(unlikely(err), "Restore dummy failed for clearing pmod buffer mapping"); ++ fw_core_dump_fill_elf_program_header(&phdr, elf_memory_sections_data_offset, ++ interface->virtual, ++ interface->num_pages * FW_PAGE_SIZE, ++ interface->flags); + -+ csg_reg->pmode_mapped = false; -+ } ++ seq_write(m, &phdr, sizeof(struct elf32_phdr)); + -+ if (group->normal_suspend_buf.gpu_va) { -+ err = update_mapping_with_dummy_pages( -+ kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages); -+ WARN_ONCE(err, "Restore dummy failed for clearing suspend buffer mapping"); ++ elf_memory_sections_data_offset += interface->num_pages * FW_PAGE_SIZE; ++ total_pages += interface->num_pages; + } + -+ /* Deal with queue uerio pages */ -+ for (i = 0; i < nr_csis; i++) -+ kbase_csf_mcu_shared_drop_stopped_queue(kbdev, group->bound_queues[i]); ++ /* Prepare PHDR of PT_NOTE type. */ ++ fw_core_dump_fill_elf_program_header_note(&phdr, elf_prstatus_offset, ++ elf_prstatus_note_size); ++ seq_write(m, &phdr, sizeof(struct elf32_phdr)); + -+ group->normal_suspend_buf.gpu_va = 0; -+ group->protected_suspend_buf.gpu_va = 0; ++ /* Prepare ELF note of PRSTATUS type. */ ++ if (fw_get_core_dump_mcu(kbdev, ®s)) ++ dev_dbg(kbdev->dev, "MCU Registers not available, all registers set to zero"); ++ /* Even if MCU Registers are not available the ELF prstatus is still ++ * filled with the registers equal to zero. ++ */ ++ fw_core_dump_fill_elf_prstatus(&elf_prs, ®s); ++ elf_prstatus_note_size = ++ fw_core_dump_create_prstatus_note("CORE", &elf_prs, &elf_prstatus_note); ++ if (elf_prstatus_note_size == 0) ++ return -ENOMEM; + -+ /* Break the binding */ -+ group->csg_reg = NULL; -+ csg_reg->grp = NULL; ++ seq_write(m, elf_prstatus_note, elf_prstatus_note_size); ++ kfree(elf_prstatus_note); + -+ /* Put the csg_reg to the front of the unused list */ -+ if (WARN_ON_ONCE(list_empty(&csg_reg->link))) -+ list_add(&csg_reg->link, &shared_regs->unused_csg_regs); -+ else -+ list_move(&csg_reg->link, &shared_regs->unused_csg_regs); ++ /* Pad file to page size. */ ++ padding = kzalloc(padding_size, GFP_KERNEL); ++ seq_write(m, padding, padding_size); ++ kfree(padding); ++ ++ return 0; +} + -+int kbase_csf_mcu_shared_group_bind_csg_reg(struct kbase_device *kbdev, -+ struct kbase_queue_group *group) ++/** ++ * fw_core_dump_create - Requests firmware to save state for a firmware core dump ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Return: 0 on success, error code otherwise. ++ */ ++static int fw_core_dump_create(struct kbase_device *kbdev) +{ -+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; -+ struct kbase_csg_shared_region *csg_reg; + int err; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); -+ -+ csg_reg = get_group_bound_csg_reg(group); -+ if (!csg_reg) -+ csg_reg = list_first_entry_or_null(&shared_regs->unused_csg_regs, -+ struct kbase_csg_shared_region, link); -+ -+ if (!WARN_ON_ONCE(!csg_reg)) { -+ struct kbase_queue_group *prev_grp = csg_reg->grp; -+ -+ /* Deal with the previous binding and lazy unmap, i.e if the previous mapping not -+ * the required one, unmap it. -+ */ -+ if (prev_grp == group) { -+ /* Update existing bindings, if there have been some changes */ -+ err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group); -+ if (likely(!err)) -+ err = csg_reg_update_on_csis(kbdev, group, NULL); -+ } else -+ err = group_bind_csg_reg(kbdev, group, csg_reg); -+ } else { -+ /* This should not have been possible if the code operates rightly */ -+ dev_err(kbdev->dev, "%s: Unexpected NULL csg_reg for group %d of context %d_%d", -+ __func__, group->handle, group->kctx->tgid, group->kctx->id); -+ return -EIO; -+ } ++ /* Ensure MCU is active before requesting the core dump. */ ++ kbase_csf_scheduler_pm_active(kbdev); ++ err = kbase_csf_scheduler_wait_mcu_active(kbdev); ++ if (!err) ++ err = kbase_csf_firmware_req_core_dump(kbdev); + -+ if (likely(!err)) -+ notify_group_csg_reg_map_done(group); -+ else -+ notify_group_csg_reg_map_error(group); ++ kbase_csf_scheduler_pm_idle(kbdev); + + return err; +} + -+static int shared_mcu_csg_reg_init(struct kbase_device *kbdev, -+ struct kbase_csg_shared_region *csg_reg) ++/** ++ * fw_core_dump_seq_start - seq_file start operation for firmware core dump file ++ * @m: the seq_file handle ++ * @_pos: holds the current position in pages ++ * (0 or most recent position used in previous session) ++ * ++ * Starts a seq_file session, positioning the iterator for the session to page @_pos - 1 ++ * within the firmware interface memory sections. @_pos value 0 is used to indicate the ++ * position of the ELF header at the start of the file. ++ * ++ * Retrieves the necessary information via the struct fw_core_dump_data stored in ++ * the private member of the seq_file handle. ++ * ++ * Return: ++ * * iterator pointer - pointer to iterator struct fw_core_dump_seq_off ++ * * SEQ_START_TOKEN - special iterator pointer indicating its is the start of the file ++ * * NULL - iterator could not be allocated ++ */ ++static void *fw_core_dump_seq_start(struct seq_file *m, loff_t *_pos) +{ -+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; -+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); -+ u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num; -+ const size_t nr_csg_reg_pages = 2 * (nr_susp_pages + nr_csis); -+ struct kbase_va_region *reg; -+ u64 vpfn; -+ int err, i; ++ struct fw_core_dump_data *dump_data = m->private; ++ struct fw_core_dump_seq_off *data; ++ struct kbase_csf_firmware_interface *interface; ++ loff_t pos = *_pos; + -+ INIT_LIST_HEAD(&csg_reg->link); -+ reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages, -+ KBASE_REG_ZONE_MCU_SHARED); ++ if (pos == 0) ++ return SEQ_START_TOKEN; + -+ if (!reg) { -+ dev_err(kbdev->dev, "%s: Failed to allocate a MCU shared region for %zu pages\n", -+ __func__, nr_csg_reg_pages); -+ return -ENOMEM; -+ } ++ /* Move iterator in the right position based on page number within ++ * available pages of firmware interface memory sections. ++ */ ++ pos--; /* ignore start token */ ++ list_for_each_entry(interface, &dump_data->kbdev->csf.firmware_interfaces, node) { ++ /* Skip memory sections that cannot be read or are protected. */ ++ if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) || ++ (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0) ++ continue; + -+ /* Insert the region into rbtree, so it becomes ready to use */ -+ mutex_lock(&kbdev->csf.reg_lock); -+ err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_csg_reg_pages, 1); -+ reg->flags &= ~KBASE_REG_FREE; -+ mutex_unlock(&kbdev->csf.reg_lock); -+ if (err) { -+ kfree(reg); -+ dev_err(kbdev->dev, "%s: Failed to add a region of %zu pages into rbtree", __func__, -+ nr_csg_reg_pages); -+ return err; ++ if (pos >= interface->num_pages) { ++ pos -= interface->num_pages; ++ } else { ++ data = kmalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) ++ return NULL; ++ data->interface = interface; ++ data->page_num = pos; ++ return data; ++ } + } + -+ /* Initialize the mappings so MMU only need to update the the corresponding -+ * mapped phy-pages at runtime. -+ * Map the normal suspend buffer pages to the prepared dummy phys[]. -+ */ -+ vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages); -+ err = insert_dummy_pages(kbdev, vpfn, nr_susp_pages); -+ -+ if (unlikely(err)) -+ goto fail_susp_map_fail; ++ return NULL; ++} + -+ /* Map the protected suspend buffer pages to the prepared dummy phys[] */ -+ vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); -+ err = insert_dummy_pages(kbdev, vpfn, nr_susp_pages); ++/** ++ * fw_core_dump_seq_stop - seq_file stop operation for firmware core dump file ++ * @m: the seq_file handle ++ * @v: the current iterator (pointer to struct fw_core_dump_seq_off) ++ * ++ * Closes the current session and frees any memory related. ++ */ ++static void fw_core_dump_seq_stop(struct seq_file *m, void *v) ++{ ++ kfree(v); ++} + -+ if (unlikely(err)) -+ goto fail_pmod_map_fail; ++/** ++ * fw_core_dump_seq_next - seq_file next operation for firmware core dump file ++ * @m: the seq_file handle ++ * @v: the current iterator (pointer to struct fw_core_dump_seq_off) ++ * @pos: holds the current position in pages ++ * (0 or most recent position used in previous session) ++ * ++ * Moves the iterator @v forward to the next page within the firmware interface ++ * memory sections and returns the updated position in @pos. ++ * @v value SEQ_START_TOKEN indicates the ELF header position. ++ * ++ * Return: ++ * * iterator pointer - pointer to iterator struct fw_core_dump_seq_off ++ * * NULL - iterator could not be allocated ++ */ ++static void *fw_core_dump_seq_next(struct seq_file *m, void *v, loff_t *pos) ++{ ++ struct fw_core_dump_data *dump_data = m->private; ++ struct fw_core_dump_seq_off *data = v; ++ struct kbase_csf_firmware_interface *interface; ++ struct list_head *interfaces = &dump_data->kbdev->csf.firmware_interfaces; + -+ for (i = 0; i < nr_csis; i++) { -+ vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages); -+ err = insert_dummy_pages(kbdev, vpfn, KBASEP_NUM_CS_USER_IO_PAGES); ++ /* Is current position at the ELF header ? */ ++ if (v == SEQ_START_TOKEN) { ++ if (list_empty(interfaces)) ++ return NULL; + -+ if (unlikely(err)) -+ goto fail_userio_pages_map_fail; ++ /* Prepare iterator for starting at first page in firmware interface ++ * memory sections. ++ */ ++ data = kmalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) ++ return NULL; ++ data->interface = ++ list_first_entry(interfaces, struct kbase_csf_firmware_interface, node); ++ data->page_num = 0; ++ ++*pos; ++ return data; + } + -+ /* Replace the previous NULL-valued field with the successully initialized reg */ -+ csg_reg->reg = reg; -+ -+ return 0; ++ /* First attempt to satisfy from current firmware interface memory section. */ ++ interface = data->interface; ++ if (data->page_num + 1 < interface->num_pages) { ++ data->page_num++; ++ ++*pos; ++ return data; ++ } + -+fail_userio_pages_map_fail: -+ while (i-- > 0) { -+ vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages); -+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, -+ KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES, -+ MCU_AS_NR, true); ++ /* Need next firmware interface memory section. This could be the last one. */ ++ if (list_is_last(&interface->node, interfaces)) { ++ kfree(data); ++ return NULL; + } + -+ vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); -+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, -+ nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); -+fail_pmod_map_fail: -+ vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages); -+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, -+ nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); -+fail_susp_map_fail: -+ mutex_lock(&kbdev->csf.reg_lock); -+ kbase_remove_va_region(kbdev, reg); -+ mutex_unlock(&kbdev->csf.reg_lock); -+ kfree(reg); ++ /* Move to first page in next firmware interface memory section. */ ++ data->interface = list_next_entry(interface, node); ++ data->page_num = 0; ++ ++*pos; + -+ return err; ++ return data; +} + -+/* Note, this helper can only be called on scheduler shutdown */ -+static void shared_mcu_csg_reg_term(struct kbase_device *kbdev, -+ struct kbase_csg_shared_region *csg_reg) ++/** ++ * fw_core_dump_seq_show - seq_file show operation for firmware core dump file ++ * @m: the seq_file handle ++ * @v: the current iterator (pointer to struct fw_core_dump_seq_off) ++ * ++ * Writes the current page in a firmware interface memory section indicated ++ * by the iterator @v to the file. If @v is SEQ_START_TOKEN the ELF ++ * header is written. ++ * ++ * Return: 0 on success, error code otherwise. ++ */ ++static int fw_core_dump_seq_show(struct seq_file *m, void *v) +{ -+ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; -+ struct kbase_va_region *reg = csg_reg->reg; -+ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); -+ const u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num; -+ u64 vpfn; -+ int i; ++ struct fw_core_dump_seq_off *data = v; ++ struct page *page; ++ u32 *p; + -+ for (i = 0; i < nr_csis; i++) { -+ vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages); -+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, -+ KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES, -+ MCU_AS_NR, true); -+ } ++ /* Either write the ELF header or current page. */ ++ if (v == SEQ_START_TOKEN) ++ return fw_core_dump_write_elf_header(m); + -+ vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); -+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, -+ nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); -+ vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages); -+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, -+ nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); ++ /* Write the current page. */ ++ page = as_page(data->interface->phys[data->page_num]); ++ p = kmap_atomic(page); ++ seq_write(m, p, FW_PAGE_SIZE); ++ kunmap_atomic(p); + -+ mutex_lock(&kbdev->csf.reg_lock); -+ kbase_remove_va_region(kbdev, reg); -+ mutex_unlock(&kbdev->csf.reg_lock); -+ kfree(reg); ++ return 0; +} + -+int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev) ++/* Sequence file operations for firmware core dump file. */ ++static const struct seq_operations fw_core_dump_seq_ops = { ++ .start = fw_core_dump_seq_start, ++ .next = fw_core_dump_seq_next, ++ .stop = fw_core_dump_seq_stop, ++ .show = fw_core_dump_seq_show, ++}; ++ ++/** ++ * fw_core_dump_debugfs_open - callback for opening the 'fw_core_dump' debugfs file ++ * @inode: inode of the file ++ * @file: file pointer ++ * ++ * Prepares for servicing a write request to request a core dump from firmware and ++ * a read request to retrieve the core dump. ++ * ++ * Returns an error if the firmware is not initialized yet. ++ * ++ * Return: 0 on success, error code otherwise. ++ */ ++static int fw_core_dump_debugfs_open(struct inode *inode, struct file *file) +{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ struct kbase_csf_mcu_shared_regions *shared_regs = &scheduler->mcu_regs_data; -+ struct kbase_csg_shared_region *array_csg_regs; -+ const size_t nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); -+ const u32 nr_groups = kbdev->csf.global_iface.group_num; -+ const u32 nr_csg_regs = MCU_SHARED_REGS_PREALLOCATE_SCALE * nr_groups; -+ const u32 nr_dummy_phys = MAX(nr_susp_pages, KBASEP_NUM_CS_USER_IO_PAGES); -+ u32 i; -+ int err; ++ struct kbase_device *const kbdev = inode->i_private; ++ struct fw_core_dump_data *dump_data; ++ int ret; + -+ shared_regs->userio_mem_rd_flags = get_userio_mmu_flags(kbdev); -+ INIT_LIST_HEAD(&shared_regs->unused_csg_regs); ++ /* Fail if firmware is not initialized yet. */ ++ if (!kbdev->csf.firmware_inited) { ++ ret = -ENODEV; ++ goto open_fail; ++ } + -+ shared_regs->dummy_phys = -+ kcalloc(nr_dummy_phys, sizeof(*shared_regs->dummy_phys), GFP_KERNEL); -+ if (!shared_regs->dummy_phys) -+ return -ENOMEM; ++ /* Open a sequence file for iterating through the pages in the ++ * firmware interface memory pages. seq_open stores a ++ * struct seq_file * in the private_data field of @file. ++ */ ++ ret = seq_open(file, &fw_core_dump_seq_ops); ++ if (ret) ++ goto open_fail; + -+ if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, -+ &shared_regs->dummy_phys[0], false, NULL) <= 0) -+ return -ENOMEM; ++ /* Allocate a context for sequence file operations. */ ++ dump_data = kmalloc(sizeof(*dump_data), GFP_KERNEL); ++ if (!dump_data) { ++ ret = -ENOMEM; ++ goto out; ++ } + -+ shared_regs->dummy_phys_allocated = true; -+ set_page_meta_status_not_movable(shared_regs->dummy_phys[0]); ++ /* Kbase device will be shared with sequence file operations. */ ++ dump_data->kbdev = kbdev; + -+ /* Replicate the allocated single shared_regs->dummy_phys[0] to the full array */ -+ for (i = 1; i < nr_dummy_phys; i++) -+ shared_regs->dummy_phys[i] = shared_regs->dummy_phys[0]; ++ /* Link our sequence file context. */ ++ ((struct seq_file *)file->private_data)->private = dump_data; + -+ shared_regs->pma_phys = kcalloc(nr_susp_pages, sizeof(*shared_regs->pma_phys), GFP_KERNEL); -+ if (!shared_regs->pma_phys) -+ return -ENOMEM; ++ return 0; ++out: ++ seq_release(inode, file); ++open_fail: ++ return ret; ++} + -+ array_csg_regs = kcalloc(nr_csg_regs, sizeof(*array_csg_regs), GFP_KERNEL); -+ if (!array_csg_regs) -+ return -ENOMEM; -+ shared_regs->array_csg_regs = array_csg_regs; ++/** ++ * fw_core_dump_debugfs_write - callback for a write to the 'fw_core_dump' debugfs file ++ * @file: file pointer ++ * @ubuf: user buffer containing data to store ++ * @count: number of bytes in user buffer ++ * @ppos: file position ++ * ++ * Any data written to the file triggers a firmware core dump request which ++ * subsequently can be retrieved by reading from the file. ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t fw_core_dump_debugfs_write(struct file *file, const char __user *ubuf, size_t count, ++ loff_t *ppos) ++{ ++ int err; ++ struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private; ++ struct kbase_device *const kbdev = dump_data->kbdev; + -+ /* All fields in scheduler->mcu_regs_data except the shared_regs->array_csg_regs -+ * are properly populated and ready to use. Now initialize the items in -+ * shared_regs->array_csg_regs[] -+ */ -+ for (i = 0; i < nr_csg_regs; i++) { -+ err = shared_mcu_csg_reg_init(kbdev, &array_csg_regs[i]); -+ if (err) -+ return err; ++ CSTD_UNUSED(ppos); + -+ list_add_tail(&array_csg_regs[i].link, &shared_regs->unused_csg_regs); -+ } ++ err = fw_core_dump_create(kbdev); + -+ return 0; ++ return err ? err : count; +} + -+void kbase_csf_mcu_shared_regs_data_term(struct kbase_device *kbdev) ++/** ++ * fw_core_dump_debugfs_release - callback for releasing the 'fw_core_dump' debugfs file ++ * @inode: inode of the file ++ * @file: file pointer ++ * ++ * Return: 0 on success, error code otherwise. ++ */ ++static int fw_core_dump_debugfs_release(struct inode *inode, struct file *file) +{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ struct kbase_csf_mcu_shared_regions *shared_regs = &scheduler->mcu_regs_data; -+ struct kbase_csg_shared_region *array_csg_regs = -+ (struct kbase_csg_shared_region *)shared_regs->array_csg_regs; -+ const u32 nr_groups = kbdev->csf.global_iface.group_num; -+ const u32 nr_csg_regs = MCU_SHARED_REGS_PREALLOCATE_SCALE * nr_groups; ++ struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private; + -+ if (array_csg_regs) { -+ struct kbase_csg_shared_region *csg_reg; -+ u32 i, cnt_csg_regs = 0; ++ seq_release(inode, file); + -+ for (i = 0; i < nr_csg_regs; i++) { -+ csg_reg = &array_csg_regs[i]; -+ /* There should not be any group mapping bindings */ -+ WARN_ONCE(csg_reg->grp, "csg_reg has a bound group"); ++ kfree(dump_data); + -+ if (csg_reg->reg) { -+ shared_mcu_csg_reg_term(kbdev, csg_reg); -+ cnt_csg_regs++; -+ } -+ } ++ return 0; ++} ++/* Debugfs file operations for firmware core dump file. */ ++static const struct file_operations kbase_csf_fw_core_dump_fops = { ++ .owner = THIS_MODULE, ++ .open = fw_core_dump_debugfs_open, ++ .read = seq_read, ++ .write = fw_core_dump_debugfs_write, ++ .llseek = seq_lseek, ++ .release = fw_core_dump_debugfs_release, ++}; + -+ /* The nr_susp_regs counts should match the array_csg_regs' length */ -+ list_for_each_entry(csg_reg, &shared_regs->unused_csg_regs, link) -+ cnt_csg_regs--; ++void kbase_csf_firmware_core_dump_init(struct kbase_device *const kbdev) ++{ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ debugfs_create_file("fw_core_dump", 0600, kbdev->mali_debugfs_directory, kbdev, ++ &kbase_csf_fw_core_dump_fops); ++#endif /* CONFIG_DEBUG_FS */ ++} + -+ WARN_ONCE(cnt_csg_regs, "Unmatched counts of susp_regs"); -+ kfree(shared_regs->array_csg_regs); -+ } ++int kbase_csf_firmware_core_dump_entry_parse(struct kbase_device *kbdev, const u32 *entry) ++{ ++ /* Casting to u16 as version is defined by bits 15:0 */ ++ kbdev->csf.fw_core_dump.version = (u16)entry[FW_CORE_DUMP_VERSION_INDEX]; + -+ if (shared_regs->dummy_phys_allocated) { -+ struct page *page = as_page(shared_regs->dummy_phys[0]); ++ if (kbdev->csf.fw_core_dump.version != FW_CORE_DUMP_DATA_VERSION) ++ return -EPERM; + -+ kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false); -+ } ++ kbdev->csf.fw_core_dump.mcu_regs_addr = entry[FW_CORE_DUMP_START_ADDR_INDEX]; ++ kbdev->csf.fw_core_dump.available = true; + -+ kfree(shared_regs->dummy_phys); -+ kfree(shared_regs->pma_phys); ++ return 0; +} -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.h +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.h new file mode 100644 -index 000000000..61943cbbf +index 000000000..0537dca4f --- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.h -@@ -0,0 +1,139 @@ ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.h +@@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -158179,134 +160661,60 @@ index 000000000..61943cbbf + * + */ + -+#ifndef _KBASE_CSF_MCU_SHARED_REG_H_ -+#define _KBASE_CSF_MCU_SHARED_REG_H_ -+ -+/** -+ * kbase_csf_mcu_shared_set_group_csg_reg_active - Notify that the group is active on-slot with -+ * scheduling action. Essential runtime resources -+ * are bound with the group for it to run -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @group: Pointer to the group that is placed into active on-slot running by the scheduler. -+ * -+ */ -+void kbase_csf_mcu_shared_set_group_csg_reg_active(struct kbase_device *kbdev, -+ struct kbase_queue_group *group); -+ -+/** -+ * kbase_csf_mcu_shared_set_group_csg_reg_unused - Notify that the group is placed off-slot with -+ * scheduling action. Some of bound runtime -+ * resources can be reallocated for others to use -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @group: Pointer to the group that is placed off-slot by the scheduler. -+ * -+ */ -+void kbase_csf_mcu_shared_set_group_csg_reg_unused(struct kbase_device *kbdev, -+ struct kbase_queue_group *group); -+ -+/** -+ * kbase_csf_mcu_shared_group_update_pmode_map - Request to update the given group's protected -+ * suspend buffer pages to be mapped for supporting -+ * protected mode operations. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @group: Pointer to the group for attempting a protected mode suspend buffer binding/mapping. -+ * -+ * Return: 0 for success, the group has a protected suspend buffer region mapped. Otherwise an -+ * error code is returned. -+ */ -+int kbase_csf_mcu_shared_group_update_pmode_map(struct kbase_device *kbdev, -+ struct kbase_queue_group *group); -+ -+/** -+ * kbase_csf_mcu_shared_clear_evicted_group_csg_reg - Clear any bound regions/mappings as the -+ * given group is evicted out of the runtime -+ * operations. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @group: Pointer to the group that has been evicted out of set of operational groups. -+ * -+ * This function will taken away any of the bindings/mappings immediately so the resources -+ * are not tied up to the given group, which has been evicted out of scheduling action for -+ * termination. -+ */ -+void kbase_csf_mcu_shared_clear_evicted_group_csg_reg(struct kbase_device *kbdev, -+ struct kbase_queue_group *group); ++#ifndef _KBASE_CSF_FIRMWARE_CORE_DUMP_H_ ++#define _KBASE_CSF_FIRMWARE_CORE_DUMP_H_ + -+/** -+ * kbase_csf_mcu_shared_add_queue - Request to add a newly activated queue for a group to be -+ * run on slot. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @queue: Pointer to the queue that requires some runtime resource to be bound for joining -+ * others that are already running on-slot with their bound group. -+ * -+ * Return: 0 on success, or negative on failure. -+ */ -+int kbase_csf_mcu_shared_add_queue(struct kbase_device *kbdev, struct kbase_queue *queue); ++struct kbase_device; + -+/** -+ * kbase_csf_mcu_shared_drop_stopped_queue - Request to drop a queue after it has been stopped -+ * from its operational state from a group. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @queue: Pointer to the queue that has been stopped from operational state. -+ * -+ */ -+void kbase_csf_mcu_shared_drop_stopped_queue(struct kbase_device *kbdev, struct kbase_queue *queue); ++/** Offset of the last field of core dump entry from the image header */ ++#define CORE_DUMP_ENTRY_START_ADDR_OFFSET (0x4) + +/** -+ * kbase_csf_mcu_shared_group_bind_csg_reg - Bind some required runtime resources to the given -+ * group for ready to run on-slot. ++ * kbase_csf_firmware_core_dump_entry_parse() - Parse a "core dump" entry from ++ * the image header. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @group: Pointer to the queue group that requires the runtime resources. ++ * @entry: Pointer to section. + * -+ * This function binds/maps the required suspend buffer pages and userio pages for the given -+ * group, readying it to run on-slot. ++ * Read a "core dump" entry from the image header, check the version for ++ * compatibility and store the address pointer. + * -+ * Return: 0 on success, or negative on failure. ++ * Return: 0 if successfully parse entry, negative error code otherwise. + */ -+int kbase_csf_mcu_shared_group_bind_csg_reg(struct kbase_device *kbdev, -+ struct kbase_queue_group *group); ++int kbase_csf_firmware_core_dump_entry_parse(struct kbase_device *kbdev, const u32 *entry); + +/** -+ * kbase_csf_mcu_shared_regs_data_init - Allocate and initialize the MCU shared regions data for -+ * the given device. ++ * kbase_csf_firmware_core_dump_init() - Initialize firmware core dump support + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * Must be zero-initialized. + * -+ * This function allocate and initialize the MCU shared VA regions for runtime operations -+ * of the CSF scheduler. ++ * Creates the fw_core_dump debugfs file through which to request a firmware ++ * core dump. The created debugfs file is cleaned up as part of kbdev debugfs ++ * cleanup. + * -+ * Return: 0 on success, or an error code. -+ */ -+int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev); -+ -+/** -+ * kbase_csf_mcu_shared_regs_data_term - Terminate the allocated MCU shared regions data for -+ * the given device. ++ * The fw_core_dump debugs file that case be used in the following way: + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * To explicitly request core dump: ++ * echo 1 >/sys/kernel/debug/mali0/fw_core_dump + * -+ * This function terminates the MCU shared VA regions allocated for runtime operations -+ * of the CSF scheduler. ++ * To output current core dump (after explicitly requesting a core dump, or ++ * kernel driver reported an internal firmware error): ++ * cat /sys/kernel/debug/mali0/fw_core_dump + */ -+void kbase_csf_mcu_shared_regs_data_term(struct kbase_device *kbdev); ++void kbase_csf_firmware_core_dump_init(struct kbase_device *const kbdev); + -+#endif /* _KBASE_CSF_MCU_SHARED_REG_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.c ++#endif /* _KBASE_CSF_FIRMWARE_CORE_DUMP_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c new file mode 100644 -index 000000000..bf1835b5b +index 000000000..6e0d3c2f5 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.c -@@ -0,0 +1,163 @@ ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c +@@ -0,0 +1,451 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -158324,9461 +160732,9297 @@ index 000000000..bf1835b5b + * + */ + -+#include "mali_kbase_csf_protected_memory.h" -+#include ++#include ++#include "backend/gpu/mali_kbase_pm_internal.h" ++#include ++#include ++#include ++#include ++#include + -+#if IS_ENABLED(CONFIG_OF) -+#include -+#endif ++/* ++ * ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address. ++ */ ++#define ARMV7_T1_BL_IMM_INSTR 0xd800f000 + -+int kbase_csf_protected_memory_init(struct kbase_device *const kbdev) -+{ -+ int err = 0; ++/* ++ * ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address, maximum ++ * negative jump offset. ++ */ ++#define ARMV7_T1_BL_IMM_RANGE_MIN -16777216 + -+#if IS_ENABLED(CONFIG_OF) -+ struct device_node *pma_node = of_parse_phandle(kbdev->dev->of_node, -+ "protected-memory-allocator", 0); -+ if (!pma_node) { -+ dev_info(kbdev->dev, "Protected memory allocator not available\n"); -+ } else { -+ struct platform_device *const pdev = -+ of_find_device_by_node(pma_node); ++/* ++ * ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address, maximum ++ * positive jump offset. ++ */ ++#define ARMV7_T1_BL_IMM_RANGE_MAX 16777214 + -+ kbdev->csf.pma_dev = NULL; -+ if (!pdev) { -+ dev_err(kbdev->dev, "Platform device for Protected memory allocator not found\n"); -+ } else { -+ kbdev->csf.pma_dev = platform_get_drvdata(pdev); -+ if (!kbdev->csf.pma_dev) { -+ dev_info(kbdev->dev, "Protected memory allocator is not ready\n"); -+ err = -EPROBE_DEFER; -+ } else if (!try_module_get(kbdev->csf.pma_dev->owner)) { -+ dev_err(kbdev->dev, "Failed to get Protected memory allocator module\n"); -+ err = -ENODEV; -+ } else { -+ dev_info(kbdev->dev, "Protected memory allocator successfully loaded\n"); -+ } -+ } -+ of_node_put(pma_node); -+ } -+#endif ++/* ++ * ARMv7 instruction: Double NOP instructions. ++ */ ++#define ARMV7_DOUBLE_NOP_INSTR 0xbf00bf00 + -+ return err; -+} ++#if defined(CONFIG_DEBUG_FS) + -+void kbase_csf_protected_memory_term(struct kbase_device *const kbdev) ++static int kbase_csf_firmware_log_enable_mask_read(void *data, u64 *val) +{ -+ if (kbdev->csf.pma_dev) -+ module_put(kbdev->csf.pma_dev->owner); ++ struct kbase_device *kbdev = (struct kbase_device *)data; ++ struct firmware_trace_buffer *tb = ++ kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); ++ ++ if (tb == NULL) { ++ dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); ++ return -EIO; ++ } ++ /* The enabled traces limited to u64 here, regarded practical */ ++ *val = kbase_csf_firmware_trace_buffer_get_active_mask64(tb); ++ return 0; +} + -+struct protected_memory_allocation ** -+ kbase_csf_protected_memory_alloc( -+ struct kbase_device *const kbdev, -+ struct tagged_addr *phys, -+ size_t num_pages, -+ bool is_small_page) ++static int kbase_csf_firmware_log_enable_mask_write(void *data, u64 val) +{ -+ size_t i; -+ struct protected_memory_allocator_device *pma_dev = -+ kbdev->csf.pma_dev; -+ struct protected_memory_allocation **pma = NULL; -+ unsigned int order = KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER; -+ unsigned int num_pages_order; ++ struct kbase_device *kbdev = (struct kbase_device *)data; ++ struct firmware_trace_buffer *tb = ++ kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); ++ u64 new_mask; ++ unsigned int enable_bits_count; + -+ if (is_small_page) -+ order = KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER; ++ if (tb == NULL) { ++ dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); ++ return -EIO; ++ } + -+ num_pages_order = (1u << order); ++ /* Ignore unsupported types */ ++ enable_bits_count = kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count(tb); ++ if (enable_bits_count > 64) { ++ dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64", enable_bits_count); ++ enable_bits_count = 64; ++ } ++ new_mask = val & (UINT64_MAX >> (64 - enable_bits_count)); + -+ /* Ensure the requested num_pages is aligned with -+ * the order type passed as argument. -+ * -+ * pma_alloc_page() will then handle the granularity -+ * of the allocation based on order. -+ */ -+ num_pages = div64_u64(num_pages + num_pages_order - 1, num_pages_order); ++ if (new_mask != kbase_csf_firmware_trace_buffer_get_active_mask64(tb)) ++ return kbase_csf_firmware_trace_buffer_set_active_mask64(tb, new_mask); ++ else ++ return 0; ++} + -+ pma = kmalloc_array(num_pages, sizeof(*pma), GFP_KERNEL); ++static int kbasep_csf_firmware_log_debugfs_open(struct inode *in, struct file *file) ++{ ++ struct kbase_device *kbdev = in->i_private; + -+ if (WARN_ON(!pma_dev) || WARN_ON(!phys) || !pma) -+ return NULL; ++ file->private_data = kbdev; ++ dev_dbg(kbdev->dev, "Opened firmware trace buffer dump debugfs file"); + -+ for (i = 0; i < num_pages; i++) { -+ phys_addr_t phys_addr; ++ return 0; ++} + -+ pma[i] = pma_dev->ops.pma_alloc_page(pma_dev, order); -+ if (!pma[i]) -+ break; ++static ssize_t kbasep_csf_firmware_log_debugfs_read(struct file *file, char __user *buf, ++ size_t size, loff_t *ppos) ++{ ++ struct kbase_device *kbdev = file->private_data; ++ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; ++ unsigned int n_read; ++ unsigned long not_copied; ++ /* Limit reads to the kernel dump buffer size */ ++ size_t mem = MIN(size, FIRMWARE_LOG_DUMP_BUF_SIZE); ++ int ret; + -+ phys_addr = pma_dev->ops.pma_get_phys_addr(pma_dev, pma[i]); ++ struct firmware_trace_buffer *tb = ++ kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); + -+ if (order) { -+ size_t j; ++ if (tb == NULL) { ++ dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); ++ return -EIO; ++ } + -+ *phys++ = as_tagged_tag(phys_addr, HUGE_HEAD | HUGE_PAGE); ++ if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0) ++ return -EBUSY; + -+ for (j = 1; j < num_pages_order; j++) { -+ *phys++ = as_tagged_tag(phys_addr + -+ PAGE_SIZE * j, -+ HUGE_PAGE); -+ } -+ } else { -+ phys[i] = as_tagged(phys_addr); -+ } ++ /* Reading from userspace is only allowed in manual mode */ ++ if (fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL) { ++ ret = -EINVAL; ++ goto out; + } + -+ if (i != num_pages) { -+ kbase_csf_protected_memory_free(kbdev, pma, i * num_pages_order, is_small_page); -+ return NULL; ++ n_read = kbase_csf_firmware_trace_buffer_read_data(tb, fw_log->dump_buf, mem); ++ ++ /* Do the copy, if we have obtained some trace data */ ++ not_copied = (n_read) ? copy_to_user(buf, fw_log->dump_buf, n_read) : 0; ++ ++ if (not_copied) { ++ dev_err(kbdev->dev, "Couldn't copy trace buffer data to user space buffer"); ++ ret = -EFAULT; ++ goto out; + } + -+ return pma; ++ *ppos += n_read; ++ ret = n_read; ++ ++out: ++ atomic_set(&fw_log->busy, 0); ++ return ret; +} + -+void kbase_csf_protected_memory_free( -+ struct kbase_device *const kbdev, -+ struct protected_memory_allocation **pma, -+ size_t num_pages, -+ bool is_small_page) ++static int kbase_csf_firmware_log_mode_read(void *data, u64 *val) +{ -+ size_t i; -+ struct protected_memory_allocator_device *pma_dev = -+ kbdev->csf.pma_dev; -+ unsigned int num_pages_order = (1u << KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER); ++ struct kbase_device *kbdev = (struct kbase_device *)data; ++ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + -+ if (is_small_page) -+ num_pages_order = (1u << KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER); ++ *val = fw_log->mode; ++ return 0; ++} + -+ if (WARN_ON(!pma_dev) || WARN_ON(!pma)) -+ return; ++static int kbase_csf_firmware_log_mode_write(void *data, u64 val) ++{ ++ struct kbase_device *kbdev = (struct kbase_device *)data; ++ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; ++ int ret = 0; + -+ /* Ensure the requested num_pages is aligned with -+ * the order type passed as argument. -+ * -+ * pma_alloc_page() will then handle the granularity -+ * of the allocation based on order. -+ */ -+ num_pages = div64_u64(num_pages + num_pages_order - 1, num_pages_order); ++ if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0) ++ return -EBUSY; + -+ for (i = 0; i < num_pages; i++) -+ pma_dev->ops.pma_free_page(pma_dev, pma[i]); ++ if (val == fw_log->mode) ++ goto out; + -+ kfree(pma); -+} -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.h -new file mode 100644 -index 000000000..8c1aa919f ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.h -@@ -0,0 +1,75 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ switch (val) { ++ case KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL: ++ cancel_delayed_work_sync(&fw_log->poll_work); ++ break; ++ case KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT: ++ schedule_delayed_work(&fw_log->poll_work, ++ msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS)); ++ break; ++ default: ++ ret = -EINVAL; ++ goto out; ++ } + -+#ifndef _KBASE_CSF_PROTECTED_MEMORY_H_ -+#define _KBASE_CSF_PROTECTED_MEMORY_H_ ++ fw_log->mode = val; + -+#include "mali_kbase.h" -+/** -+ * kbase_csf_protected_memory_init - Initilaise protected memory allocator. -+ * -+ * @kbdev: Device pointer. -+ * -+ * Return: 0 if success, or an error code on failure. -+ */ -+int kbase_csf_protected_memory_init(struct kbase_device *const kbdev); ++out: ++ atomic_set(&fw_log->busy, 0); ++ return ret; ++} + -+/** -+ * kbase_csf_protected_memory_term - Terminate prtotected memory allocator. -+ * -+ * @kbdev: Device pointer. -+ */ -+void kbase_csf_protected_memory_term(struct kbase_device *const kbdev); ++DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_enable_mask_fops, ++ kbase_csf_firmware_log_enable_mask_read, ++ kbase_csf_firmware_log_enable_mask_write, "%llx\n"); + -+/** -+ * kbase_csf_protected_memory_alloc - Allocate protected memory pages. -+ * -+ * @kbdev: Device pointer. -+ * @phys: Array of physical addresses to be filled in by the protected -+ * memory allocator. -+ * @num_pages: Number of pages requested to be allocated. -+ * @is_small_page: Flag used to select the order of protected memory page. -+ * -+ * Return: Pointer to an array of protected memory allocations on success, -+ * or NULL on failure. -+ */ -+struct protected_memory_allocation ** -+ kbase_csf_protected_memory_alloc( -+ struct kbase_device *const kbdev, -+ struct tagged_addr *phys, -+ size_t num_pages, -+ bool is_small_page); ++static const struct file_operations kbasep_csf_firmware_log_debugfs_fops = { ++ .owner = THIS_MODULE, ++ .open = kbasep_csf_firmware_log_debugfs_open, ++ .read = kbasep_csf_firmware_log_debugfs_read, ++ .llseek = no_llseek, ++}; + -+/** -+ * kbase_csf_protected_memory_free - Free the allocated -+ * protected memory pages -+ * -+ * @kbdev: Device pointer. -+ * @pma: Array of pointer to protected memory allocations. -+ * @num_pages: Number of pages to be freed. -+ * @is_small_page: Flag used to select the order of protected memory page. -+ */ -+void kbase_csf_protected_memory_free( -+ struct kbase_device *const kbdev, -+ struct protected_memory_allocation **pma, -+ size_t num_pages, -+ bool is_small_page); -+#endif -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h -new file mode 100644 -index 000000000..b5bf7bbbc ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h -@@ -0,0 +1,1678 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_mode_fops, kbase_csf_firmware_log_mode_read, ++ kbase_csf_firmware_log_mode_write, "%llu\n"); + -+/* -+ * This header was originally autogenerated, but it is now ok (and -+ * expected) to have to add to it. -+ */ ++#endif /* CONFIG_DEBUG_FS */ + -+#ifndef _KBASE_CSF_REGISTERS_H_ -+#define _KBASE_CSF_REGISTERS_H_ ++static void kbase_csf_firmware_log_poll(struct work_struct *work) ++{ ++ struct kbase_device *kbdev = ++ container_of(work, struct kbase_device, csf.fw_log.poll_work.work); ++ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + -+/* -+ * Begin register sets -+ */ ++ schedule_delayed_work(&fw_log->poll_work, ++ msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS)); + -+/* CS_KERNEL_INPUT_BLOCK base address */ -+#define CS_KERNEL_INPUT_BLOCK_BASE 0x0000 -+#define CS_KERNEL_INPUT_BLOCK_REG(r) (CS_KERNEL_INPUT_BLOCK_BASE + (r)) ++ kbase_csf_firmware_log_dump_buffer(kbdev); ++} + -+/* CS_KERNEL_OUTPUT_BLOCK base address */ -+#define CS_KERNEL_OUTPUT_BLOCK_BASE 0x0000 -+#define CS_KERNEL_OUTPUT_BLOCK_REG(r) (CS_KERNEL_OUTPUT_BLOCK_BASE + (r)) ++int kbase_csf_firmware_log_init(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + -+/* CS_USER_INPUT_BLOCK base address */ -+#define CS_USER_INPUT_BLOCK_BASE 0x0000 -+#define CS_USER_INPUT_BLOCK_REG(r) (CS_USER_INPUT_BLOCK_BASE + (r)) ++ /* Add one byte for null-termination */ ++ fw_log->dump_buf = kmalloc(FIRMWARE_LOG_DUMP_BUF_SIZE + 1, GFP_KERNEL); ++ if (fw_log->dump_buf == NULL) ++ return -ENOMEM; + -+/* CS_USER_OUTPUT_BLOCK base address */ -+#define CS_USER_OUTPUT_BLOCK_BASE 0x0000 -+#define CS_USER_OUTPUT_BLOCK_REG(r) (CS_USER_OUTPUT_BLOCK_BASE + (r)) ++ /* Ensure null-termination for all strings */ ++ fw_log->dump_buf[FIRMWARE_LOG_DUMP_BUF_SIZE] = 0; + -+/* CSG_INPUT_BLOCK base address */ -+#define CSG_INPUT_BLOCK_BASE 0x0000 -+#define CSG_INPUT_BLOCK_REG(r) (CSG_INPUT_BLOCK_BASE + (r)) ++ fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL; + -+/* CSG_OUTPUT_BLOCK base address */ -+#define CSG_OUTPUT_BLOCK_BASE 0x0000 -+#define CSG_OUTPUT_BLOCK_REG(r) (CSG_OUTPUT_BLOCK_BASE + (r)) ++ atomic_set(&fw_log->busy, 0); ++ INIT_DEFERRABLE_WORK(&fw_log->poll_work, kbase_csf_firmware_log_poll); + -+/* GLB_CONTROL_BLOCK base address */ -+#define GLB_CONTROL_BLOCK_BASE 0x04000000 -+#define GLB_CONTROL_BLOCK_REG(r) (GLB_CONTROL_BLOCK_BASE + (r)) ++#if defined(CONFIG_DEBUG_FS) ++ debugfs_create_file("fw_trace_enable_mask", 0644, kbdev->mali_debugfs_directory, kbdev, ++ &kbase_csf_firmware_log_enable_mask_fops); ++ debugfs_create_file("fw_traces", 0444, kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_csf_firmware_log_debugfs_fops); ++ debugfs_create_file("fw_trace_mode", 0644, kbdev->mali_debugfs_directory, kbdev, ++ &kbase_csf_firmware_log_mode_fops); ++#endif /* CONFIG_DEBUG_FS */ + -+/* GLB_INPUT_BLOCK base address */ -+#define GLB_INPUT_BLOCK_BASE 0x0000 -+#define GLB_INPUT_BLOCK_REG(r) (GLB_INPUT_BLOCK_BASE + (r)) ++ return 0; ++} + -+/* GLB_OUTPUT_BLOCK base address */ -+#define GLB_OUTPUT_BLOCK_BASE 0x0000 -+#define GLB_OUTPUT_BLOCK_REG(r) (GLB_OUTPUT_BLOCK_BASE + (r)) ++void kbase_csf_firmware_log_term(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + -+/* End register sets */ ++ if (fw_log->dump_buf) { ++ cancel_delayed_work_sync(&fw_log->poll_work); ++ kfree(fw_log->dump_buf); ++ fw_log->dump_buf = NULL; ++ } ++} + -+/* -+ * Begin register offsets -+ */ ++void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; ++ u8 *buf = fw_log->dump_buf, *p, *pnewline, *pend, *pendbuf; ++ unsigned int read_size, remaining_size; ++ struct firmware_trace_buffer *tb = ++ kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); + -+/* DOORBELLS register offsets */ -+#define DOORBELL_0 0x0000 /* () Doorbell 0 register */ -+#define DOORBELL(n) (DOORBELL_0 + (n)*65536) -+#define DOORBELL_REG(n, r) (DOORBELL(n) + DOORBELL_BLOCK_REG(r)) -+#define DOORBELL_COUNT 1024 ++ if (tb == NULL) { ++ dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped"); ++ return; ++ } + -+/* DOORBELL_BLOCK register offsets */ -+#define DB_BLK_DOORBELL 0x0000 /* (WO) Doorbell request */ ++ if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0) ++ return; + -+/* CS_KERNEL_INPUT_BLOCK register offsets */ -+#define CS_REQ 0x0000 /* () CS request flags */ -+#define CS_CONFIG 0x0004 /* () CS configuration */ -+#define CS_ACK_IRQ_MASK 0x000C /* () Command steam interrupt mask */ -+#define CS_BASE_LO 0x0010 /* () Base pointer for the ring buffer, low word */ -+#define CS_BASE_HI 0x0014 /* () Base pointer for the ring buffer, high word */ -+#define CS_SIZE 0x0018 /* () Size of the ring buffer */ -+#define CS_TILER_HEAP_START_LO 0x0020 /* () Pointer to heap start, low word */ -+#define CS_TILER_HEAP_START_HI 0x0024 /* () Pointer to heap start, high word */ -+#define CS_TILER_HEAP_END_LO 0x0028 /* () Tiler heap descriptor address, low word */ -+#define CS_TILER_HEAP_END_HI 0x002C /* () Tiler heap descriptor address, high word */ -+#define CS_USER_INPUT_LO 0x0030 /* () CS user mode input page address, low word */ -+#define CS_USER_INPUT_HI 0x0034 /* () CS user mode input page address, high word */ -+#define CS_USER_OUTPUT_LO 0x0038 /* () CS user mode input page address, low word */ -+#define CS_USER_OUTPUT_HI 0x003C /* () CS user mode input page address, high word */ -+#define CS_INSTR_CONFIG 0x0040 /* () Instrumentation buffer configuration */ -+#define CS_INSTR_BUFFER_SIZE 0x0044 /* () Instrumentation buffer size */ -+#define CS_INSTR_BUFFER_BASE_LO 0x0048 /* () Instrumentation buffer base pointer, low word */ -+#define CS_INSTR_BUFFER_BASE_HI 0x004C /* () Instrumentation buffer base pointer, high word */ -+#define CS_INSTR_BUFFER_OFFSET_POINTER_LO 0x0050 /* () Instrumentation buffer pointer to insert offset, low word */ -+#define CS_INSTR_BUFFER_OFFSET_POINTER_HI 0x0054 /* () Instrumentation buffer pointer to insert offset, high word */ ++ /* FW should only print complete messages, so there's no need to handle ++ * partial messages over multiple invocations of this function ++ */ + -+/* CS_KERNEL_OUTPUT_BLOCK register offsets */ -+#define CS_ACK 0x0000 /* () CS acknowledge flags */ -+#define CS_STATUS_CMD_PTR_LO 0x0040 /* () Program pointer current value, low word */ -+#define CS_STATUS_CMD_PTR_HI 0x0044 /* () Program pointer current value, high word */ -+#define CS_STATUS_WAIT 0x0048 /* () Wait condition status register */ -+#define CS_STATUS_REQ_RESOURCE 0x004C /* () Indicates the resources requested by the CS */ -+#define CS_STATUS_WAIT_SYNC_POINTER_LO 0x0050 /* () Sync object pointer, low word */ -+#define CS_STATUS_WAIT_SYNC_POINTER_HI 0x0054 /* () Sync object pointer, high word */ -+#define CS_STATUS_WAIT_SYNC_VALUE 0x0058 /* () Sync object test value */ -+#define CS_STATUS_SCOREBOARDS 0x005C /* () Scoreboard status */ -+#define CS_STATUS_BLOCKED_REASON 0x0060 /* () Blocked reason */ -+#define CS_FAULT 0x0080 /* () Recoverable fault information */ -+#define CS_FATAL 0x0084 /* () Unrecoverable fault information */ -+#define CS_FAULT_INFO_LO 0x0088 /* () Additional information about a recoverable fault, low word */ -+#define CS_FAULT_INFO_HI 0x008C /* () Additional information about a recoverable fault, high word */ -+#define CS_FATAL_INFO_LO 0x0090 /* () Additional information about a non-recoverable fault, low word */ -+#define CS_FATAL_INFO_HI 0x0094 /* () Additional information about a non-recoverable fault, high word */ -+#define CS_HEAP_VT_START 0x00C0 /* () Number of vertex/tiling operations started */ -+#define CS_HEAP_VT_END 0x00C4 /* () Number of vertex/tiling operations completed */ -+#define CS_HEAP_FRAG_END 0x00CC /* () Number of fragment completed */ -+#define CS_HEAP_ADDRESS_LO 0x00D0 /* () Heap address, low word */ -+#define CS_HEAP_ADDRESS_HI 0x00D4 /* () Heap address, high word */ ++ p = buf; ++ pendbuf = &buf[FIRMWARE_LOG_DUMP_BUF_SIZE]; + -+/* CS_USER_INPUT_BLOCK register offsets */ -+#define CS_INSERT_LO 0x0000 /* () Current insert offset for ring buffer, low word */ -+#define CS_INSERT_HI 0x0004 /* () Current insert offset for ring buffer, high word */ -+#define CS_EXTRACT_INIT_LO 0x0008 /* () Initial extract offset for ring buffer, low word */ -+#define CS_EXTRACT_INIT_HI 0x000C /* () Initial extract offset for ring buffer, high word */ ++ while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, p, pendbuf - p))) { ++ pend = p + read_size; ++ p = buf; + -+/* CS_USER_OUTPUT_BLOCK register offsets */ -+#define CS_EXTRACT_LO 0x0000 /* () Current extract offset for ring buffer, low word */ -+#define CS_EXTRACT_HI 0x0004 /* () Current extract offset for ring buffer, high word */ -+#define CS_ACTIVE 0x0008 /* () Initial extract offset when the CS is started */ ++ while (p < pend && (pnewline = memchr(p, '\n', pend - p))) { ++ /* Null-terminate the string */ ++ *pnewline = 0; + -+/* CSG_INPUT_BLOCK register offsets */ -+#define CSG_REQ 0x0000 /* () CSG request */ -+#define CSG_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */ -+#define CSG_DB_REQ 0x0008 /* () Global doorbell request */ -+#define CSG_IRQ_ACK 0x000C /* () CS IRQ acknowledge */ -+#define CSG_ALLOW_COMPUTE_LO 0x0020 /* () Allowed compute endpoints, low word */ -+#define CSG_ALLOW_COMPUTE_HI 0x0024 /* () Allowed compute endpoints, high word */ -+#define CSG_ALLOW_FRAGMENT_LO 0x0028 /* () Allowed fragment endpoints, low word */ -+#define CSG_ALLOW_FRAGMENT_HI 0x002C /* () Allowed fragment endpoints, high word */ -+#define CSG_ALLOW_OTHER 0x0030 /* () Allowed other endpoints */ -+#define CSG_EP_REQ 0x0034 /* () Maximum number of endpoints allowed */ -+#define CSG_SUSPEND_BUF_LO 0x0040 /* () Normal mode suspend buffer, low word */ -+#define CSG_SUSPEND_BUF_HI 0x0044 /* () Normal mode suspend buffer, high word */ -+#define CSG_PROTM_SUSPEND_BUF_LO 0x0048 /* () Protected mode suspend buffer, low word */ -+#define CSG_PROTM_SUSPEND_BUF_HI 0x004C /* () Protected mode suspend buffer, high word */ -+#define CSG_CONFIG 0x0050 /* () CSG configuration options */ -+#define CSG_ITER_TRACE_CONFIG 0x0054 /* () CSG trace configuration */ -+#define CSG_DVS_BUF_LO 0x0060 /* () Normal mode deferred vertex shading work buffer, low word */ -+#define CSG_DVS_BUF_HI 0x0064 /* () Normal mode deferred vertex shading work buffer, high word */ ++ dev_err(kbdev->dev, "FW> %s", p); + -+/* CSG_OUTPUT_BLOCK register offsets */ -+#define CSG_ACK 0x0000 /* () CSG acknowledge flags */ -+#define CSG_DB_ACK 0x0008 /* () CS kernel doorbell acknowledge flags */ -+#define CSG_IRQ_REQ 0x000C /* () CS interrupt request flags */ -+#define CSG_STATUS_EP_CURRENT 0x0010 /* () Endpoint allocation status register */ -+#define CSG_STATUS_EP_REQ 0x0014 /* () Endpoint request status register */ -+#define CSG_RESOURCE_DEP 0x001C /* () Current resource dependencies */ ++ p = pnewline + 1; ++ } + -+/* GLB_CONTROL_BLOCK register offsets */ -+#define GLB_VERSION 0x0000 /* () Global interface version */ -+#define GLB_FEATURES 0x0004 /* () Global interface features */ -+#define GLB_INPUT_VA 0x0008 /* () Address of GLB_INPUT_BLOCK */ -+#define GLB_OUTPUT_VA 0x000C /* () Address of GLB_OUTPUT_BLOCK */ -+#define GLB_GROUP_NUM 0x0010 /* () Number of CSG interfaces */ -+#define GLB_GROUP_STRIDE 0x0014 /* () Stride between CSG interfaces */ -+#define GLB_PRFCNT_SIZE 0x0018 /* () Size of CSF performance counters */ -+#define GLB_INSTR_FEATURES \ -+ 0x001C /* () TRACE_POINT instrumentation. (csf >= 1.1.0) */ -+#define GROUP_CONTROL_0 0x1000 /* () CSG control and capabilities */ -+#define GROUP_CONTROL(n) (GROUP_CONTROL_0 + (n)*256) -+#define GROUP_CONTROL_REG(n, r) (GROUP_CONTROL(n) + GROUP_CONTROL_BLOCK_REG(r)) -+#define GROUP_CONTROL_COUNT 16 ++ remaining_size = pend - p; + -+/* STREAM_CONTROL_BLOCK register offsets */ -+#define STREAM_FEATURES 0x0000 /* () CSI features */ -+#define STREAM_INPUT_VA 0x0004 /* () Address of CS_KERNEL_INPUT_BLOCK */ -+#define STREAM_OUTPUT_VA 0x0008 /* () Address of CS_KERNEL_OUTPUT_BLOCK */ ++ if (!remaining_size) { ++ p = buf; ++ } else if (remaining_size < FIRMWARE_LOG_DUMP_BUF_SIZE) { ++ /* Copy unfinished string to the start of the buffer */ ++ memmove(buf, p, remaining_size); ++ p = &buf[remaining_size]; ++ } else { ++ /* Print abnormally long string without newlines */ ++ dev_err(kbdev->dev, "FW> %s", buf); ++ p = buf; ++ } ++ } + -+/* GROUP_CONTROL_BLOCK register offsets */ -+#define GROUP_FEATURES 0x0000 /* () CSG interface features */ -+#define GROUP_INPUT_VA 0x0004 /* () Address of CSG_INPUT_BLOCK */ -+#define GROUP_OUTPUT_VA 0x0008 /* () Address of CSG_OUTPUT_BLOCK */ -+#define GROUP_SUSPEND_SIZE 0x000C /* () Size of CSG suspend buffer */ -+#define GROUP_PROTM_SUSPEND_SIZE 0x0010 /* () Size of CSG protected-mode suspend buffer */ -+#define GROUP_STREAM_NUM 0x0014 /* () Number of CS interfaces */ -+#define GROUP_STREAM_STRIDE 0x0018 /* () Stride between CS interfaces */ -+#define STREAM_CONTROL_0 0x0040 /* () CS control and capabilities */ -+#define STREAM_CONTROL(n) (STREAM_CONTROL_0 + (n)*12) -+#define STREAM_CONTROL_REG(n, r) (STREAM_CONTROL(n) + STREAM_CONTROL_BLOCK_REG(r)) -+#define STREAM_CONTROL_COUNT 16 ++ if (p != buf) { ++ /* Null-terminate and print last unfinished string */ ++ *p = 0; ++ dev_err(kbdev->dev, "FW> %s", buf); ++ } + -+/* GLB_INPUT_BLOCK register offsets */ -+#define GLB_REQ 0x0000 /* () Global request */ -+#define GLB_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */ -+#define GLB_DB_REQ 0x0008 /* () Global doorbell request */ -+#define GLB_PROGRESS_TIMER 0x0010 /* () Global progress timeout */ -+#define GLB_PWROFF_TIMER 0x0014 /* () Global shader core power off timer */ -+#define GLB_ALLOC_EN_LO 0x0018 /* () Global shader core allocation enable mask, low word */ -+#define GLB_ALLOC_EN_HI 0x001C /* () Global shader core allocation enable mask, high word */ ++ atomic_set(&fw_log->busy, 0); ++} + -+#define GLB_PRFCNT_JASID 0x0024 /* () Performance counter address space */ -+#define GLB_PRFCNT_BASE_LO 0x0028 /* () Performance counter buffer address, low word */ -+#define GLB_PRFCNT_BASE_HI 0x002C /* () Performance counter buffer address, high word */ -+#define GLB_PRFCNT_EXTRACT 0x0030 /* () Performance counter buffer extract index */ -+#define GLB_PRFCNT_CONFIG 0x0040 /* () Performance counter configuration */ -+#define GLB_PRFCNT_CSG_SELECT 0x0044 /* () CSG performance counting enable */ -+#define GLB_PRFCNT_FW_EN 0x0048 /* () Performance counter enable for firmware */ -+#define GLB_PRFCNT_CSG_EN 0x004C /* () Performance counter enable for CSG */ -+#define GLB_PRFCNT_CSF_EN 0x0050 /* () Performance counter enable for CSF */ -+#define GLB_PRFCNT_SHADER_EN 0x0054 /* () Performance counter enable for shader cores */ -+#define GLB_PRFCNT_TILER_EN 0x0058 /* () Performance counter enable for tiler */ -+#define GLB_PRFCNT_MMU_L2_EN 0x005C /* () Performance counter enable for MMU/L2 cache */ ++void kbase_csf_firmware_log_parse_logging_call_list_entry(struct kbase_device *kbdev, ++ const uint32_t *entry) ++{ ++ kbdev->csf.fw_log.func_call_list_va_start = entry[0]; ++ kbdev->csf.fw_log.func_call_list_va_end = entry[1]; ++} + -+#define GLB_DEBUG_ARG_IN0 0x0FE0 /* Firmware Debug argument array element 0 */ -+#define GLB_DEBUG_ARG_IN1 0x0FE4 /* Firmware Debug argument array element 1 */ -+#define GLB_DEBUG_ARG_IN2 0x0FE8 /* Firmware Debug argument array element 2 */ -+#define GLB_DEBUG_ARG_IN3 0x0FEC /* Firmware Debug argument array element 3 */ ++/** ++ * toggle_logging_calls_in_loaded_image - Toggles FW log func calls in loaded FW image. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @enable: Whether to enable or disable the function calls. ++ */ ++static void toggle_logging_calls_in_loaded_image(struct kbase_device *kbdev, bool enable) ++{ ++ uint32_t bl_instruction, diff; ++ uint32_t imm11, imm10, i1, i2, j1, j2, sign; ++ uint32_t calling_address = 0, callee_address = 0; ++ uint32_t list_entry = kbdev->csf.fw_log.func_call_list_va_start; ++ const uint32_t list_va_end = kbdev->csf.fw_log.func_call_list_va_end; + -+/* Mappings based on GLB_DEBUG_REQ.FWUTF_RUN bit being different from GLB_DEBUG_ACK.FWUTF_RUN */ -+#define GLB_DEBUG_FWUTF_DESTROY GLB_DEBUG_ARG_IN0 /* () Test fixture destroy function address */ -+#define GLB_DEBUG_FWUTF_TEST GLB_DEBUG_ARG_IN1 /* () Test index */ -+#define GLB_DEBUG_FWUTF_FIXTURE GLB_DEBUG_ARG_IN2 /* () Test fixture index */ -+#define GLB_DEBUG_FWUTF_CREATE GLB_DEBUG_ARG_IN3 /* () Test fixture create function address */ ++ if (list_entry == 0 || list_va_end == 0) ++ return; + -+#define GLB_DEBUG_ACK_IRQ_MASK 0x0FF8 /* () Global debug acknowledge interrupt mask */ -+#define GLB_DEBUG_REQ 0x0FFC /* () Global debug request */ ++ if (enable) { ++ for (; list_entry < list_va_end; list_entry += 2 * sizeof(uint32_t)) { ++ /* Read calling address */ ++ kbase_csf_read_firmware_memory(kbdev, list_entry, &calling_address); ++ /* Read callee address */ ++ kbase_csf_read_firmware_memory(kbdev, list_entry + sizeof(uint32_t), ++ &callee_address); + -+/* GLB_OUTPUT_BLOCK register offsets */ -+#define GLB_DEBUG_ARG_OUT0 0x0FE0 /* Firmware debug result element 0 */ -+#define GLB_DEBUG_ARG_OUT1 0x0FE4 /* Firmware debug result element 1 */ -+#define GLB_DEBUG_ARG_OUT2 0x0FE8 /* Firmware debug result element 2 */ -+#define GLB_DEBUG_ARG_OUT3 0x0FEC /* Firmware debug result element 3 */ ++ diff = callee_address - calling_address - 4; ++ sign = !!(diff & 0x80000000); ++ if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff || ++ ARMV7_T1_BL_IMM_RANGE_MAX < (int32_t)diff) { ++ dev_warn(kbdev->dev, "FW log patch 0x%x out of range, skipping", ++ calling_address); ++ continue; ++ } + -+#define GLB_ACK 0x0000 /* () Global acknowledge */ -+#define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */ -+#define GLB_HALT_STATUS 0x0010 /* () Global halt status */ -+#define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */ -+#define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */ -+#define GLB_DEBUG_FWUTF_RESULT GLB_DEBUG_ARG_OUT0 /* () Firmware debug test result */ -+#define GLB_DEBUG_ACK 0x0FFC /* () Global debug acknowledge */ ++ i1 = (diff & 0x00800000) >> 23; ++ j1 = !i1 ^ sign; ++ i2 = (diff & 0x00400000) >> 22; ++ j2 = !i2 ^ sign; ++ imm11 = (diff & 0xffe) >> 1; ++ imm10 = (diff & 0x3ff000) >> 12; + -+#ifdef CONFIG_MALI_CORESIGHT -+#define GLB_DEBUG_REQ_FW_AS_WRITE_SHIFT 4 -+#define GLB_DEBUG_REQ_FW_AS_WRITE_MASK (0x1 << GLB_DEBUG_REQ_FW_AS_WRITE_SHIFT) -+#define GLB_DEBUG_REQ_FW_AS_READ_SHIFT 5 -+#define GLB_DEBUG_REQ_FW_AS_READ_MASK (0x1 << GLB_DEBUG_REQ_FW_AS_READ_SHIFT) -+#define GLB_DEBUG_ARG_IN0 0x0FE0 -+#define GLB_DEBUG_ARG_IN1 0x0FE4 -+#define GLB_DEBUG_ARG_OUT0 0x0FE0 -+#endif /* CONFIG_MALI_CORESIGHT */ ++ /* Compose BL instruction */ ++ bl_instruction = ARMV7_T1_BL_IMM_INSTR; ++ bl_instruction |= j1 << 29; ++ bl_instruction |= j2 << 27; ++ bl_instruction |= imm11 << 16; ++ bl_instruction |= sign << 10; ++ bl_instruction |= imm10; + -+/* End register offsets */ ++ /* Patch logging func calls in their load location */ ++ dev_dbg(kbdev->dev, "FW log patch 0x%x: 0x%x\n", calling_address, ++ bl_instruction); ++ kbase_csf_update_firmware_memory_exe(kbdev, calling_address, ++ bl_instruction); ++ } ++ } else { ++ for (; list_entry < list_va_end; list_entry += 2 * sizeof(uint32_t)) { ++ /* Read calling address */ ++ kbase_csf_read_firmware_memory(kbdev, list_entry, &calling_address); + -+/* CS_KERNEL_INPUT_BLOCK register set definitions */ -+/* GLB_VERSION register */ -+#define GLB_VERSION_PATCH_SHIFT (0) -+#define GLB_VERSION_PATCH_MASK ((0xFFFF) << GLB_VERSION_PATCH_SHIFT) -+#define GLB_VERSION_PATCH_GET(reg_val) (((reg_val)&GLB_VERSION_PATCH_MASK) >> GLB_VERSION_PATCH_SHIFT) -+#define GLB_VERSION_PATCH_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_VERSION_PATCH_MASK) | (((value) << GLB_VERSION_PATCH_SHIFT) & GLB_VERSION_PATCH_MASK)) -+#define GLB_VERSION_MINOR_SHIFT (16) -+#define GLB_VERSION_MINOR_MASK ((0xFF) << GLB_VERSION_MINOR_SHIFT) -+#define GLB_VERSION_MINOR_GET(reg_val) (((reg_val)&GLB_VERSION_MINOR_MASK) >> GLB_VERSION_MINOR_SHIFT) -+#define GLB_VERSION_MINOR_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_VERSION_MINOR_MASK) | (((value) << GLB_VERSION_MINOR_SHIFT) & GLB_VERSION_MINOR_MASK)) -+#define GLB_VERSION_MAJOR_SHIFT (24) -+#define GLB_VERSION_MAJOR_MASK ((0xFF) << GLB_VERSION_MAJOR_SHIFT) -+#define GLB_VERSION_MAJOR_GET(reg_val) (((reg_val)&GLB_VERSION_MAJOR_MASK) >> GLB_VERSION_MAJOR_SHIFT) -+#define GLB_VERSION_MAJOR_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_VERSION_MAJOR_MASK) | (((value) << GLB_VERSION_MAJOR_SHIFT) & GLB_VERSION_MAJOR_MASK)) ++ /* Overwrite logging func calls with 2 NOP instructions */ ++ kbase_csf_update_firmware_memory_exe(kbdev, calling_address, ++ ARMV7_DOUBLE_NOP_INSTR); ++ } ++ } ++} + -+/* CS_REQ register */ -+#define CS_REQ_STATE_SHIFT 0 -+#define CS_REQ_STATE_MASK (0x7 << CS_REQ_STATE_SHIFT) -+#define CS_REQ_STATE_GET(reg_val) (((reg_val)&CS_REQ_STATE_MASK) >> CS_REQ_STATE_SHIFT) -+#define CS_REQ_STATE_SET(reg_val, value) \ -+ (((reg_val) & ~CS_REQ_STATE_MASK) | (((value) << CS_REQ_STATE_SHIFT) & CS_REQ_STATE_MASK)) -+/* CS_REQ_STATE values */ -+#define CS_REQ_STATE_STOP 0x0 -+#define CS_REQ_STATE_START 0x1 -+/* End of CS_REQ_STATE values */ -+#define CS_REQ_EXTRACT_EVENT_SHIFT 4 -+#define CS_REQ_EXTRACT_EVENT_MASK (0x1 << CS_REQ_EXTRACT_EVENT_SHIFT) -+#define CS_REQ_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_REQ_EXTRACT_EVENT_MASK) >> CS_REQ_EXTRACT_EVENT_SHIFT) -+#define CS_REQ_EXTRACT_EVENT_SET(reg_val, value) \ -+ (((reg_val) & ~CS_REQ_EXTRACT_EVENT_MASK) | (((value) << CS_REQ_EXTRACT_EVENT_SHIFT) & CS_REQ_EXTRACT_EVENT_MASK)) ++int kbase_csf_firmware_log_toggle_logging_calls(struct kbase_device *kbdev, u32 val) ++{ ++ unsigned long flags; ++ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; ++ bool mcu_inactive; ++ bool resume_needed = false; ++ int ret = 0; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + -+#define CS_REQ_IDLE_SYNC_WAIT_SHIFT 8 -+#define CS_REQ_IDLE_SYNC_WAIT_MASK (0x1 << CS_REQ_IDLE_SYNC_WAIT_SHIFT) -+#define CS_REQ_IDLE_SYNC_WAIT_GET(reg_val) (((reg_val)&CS_REQ_IDLE_SYNC_WAIT_MASK) >> CS_REQ_IDLE_SYNC_WAIT_SHIFT) -+#define CS_REQ_IDLE_SYNC_WAIT_SET(reg_val, value) \ -+ (((reg_val) & ~CS_REQ_IDLE_SYNC_WAIT_MASK) | \ -+ (((value) << CS_REQ_IDLE_SYNC_WAIT_SHIFT) & CS_REQ_IDLE_SYNC_WAIT_MASK)) -+#define CS_REQ_IDLE_PROTM_PEND_SHIFT 9 -+#define CS_REQ_IDLE_PROTM_PEND_MASK (0x1 << CS_REQ_IDLE_PROTM_PEND_SHIFT) -+#define CS_REQ_IDLE_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_IDLE_PROTM_PEND_MASK) >> CS_REQ_IDLE_PROTM_PEND_SHIFT) -+#define CS_REQ_IDLE_PROTM_PEND_SET(reg_val, value) \ -+ (((reg_val) & ~CS_REQ_IDLE_PROTM_PEND_MASK) | \ -+ (((value) << CS_REQ_IDLE_PROTM_PEND_SHIFT) & CS_REQ_IDLE_PROTM_PEND_MASK)) -+#define CS_REQ_IDLE_EMPTY_SHIFT 10 -+#define CS_REQ_IDLE_EMPTY_MASK (0x1 << CS_REQ_IDLE_EMPTY_SHIFT) -+#define CS_REQ_IDLE_EMPTY_GET(reg_val) (((reg_val)&CS_REQ_IDLE_EMPTY_MASK) >> CS_REQ_IDLE_EMPTY_SHIFT) -+#define CS_REQ_IDLE_EMPTY_SET(reg_val, value) \ -+ (((reg_val) & ~CS_REQ_IDLE_EMPTY_MASK) | (((value) << CS_REQ_IDLE_EMPTY_SHIFT) & CS_REQ_IDLE_EMPTY_MASK)) -+#define CS_REQ_IDLE_RESOURCE_REQ_SHIFT 11 -+#define CS_REQ_IDLE_RESOURCE_REQ_MASK (0x1 << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) -+#define CS_REQ_IDLE_RESOURCE_REQ_GET(reg_val) \ -+ (((reg_val) & CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT) -+#define CS_REQ_IDLE_RESOURCE_REQ_SET(reg_val, value) \ -+ (((reg_val) & ~CS_REQ_IDLE_RESOURCE_REQ_MASK) | \ -+ (((value) << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) & CS_REQ_IDLE_RESOURCE_REQ_MASK)) -+#define CS_REQ_IDLE_SHARED_SB_DEC_SHIFT 12 -+#define CS_REQ_IDLE_SHARED_SB_DEC_MASK (0x1 << CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) -+#define CS_REQ_IDLE_SHARED_SB_DEC_GET(reg_val) \ -+ (((reg_val) & CS_REQ_IDLE_SHARED_SB_DEC_MASK) >> CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) -+#define CS_REQ_IDLE_SHARED_SB_DEC_REQ_SET(reg_val, value) \ -+ (((reg_val) & ~CS_REQ_IDLE_SHARED_SB_DEC_MASK) | \ -+ (((value) << CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) & CS_REQ_IDLE_SHARED_SB_DEC_MASK)) -+#define CS_REQ_TILER_OOM_SHIFT 26 -+#define CS_REQ_TILER_OOM_MASK (0x1 << CS_REQ_TILER_OOM_SHIFT) -+#define CS_REQ_TILER_OOM_GET(reg_val) (((reg_val)&CS_REQ_TILER_OOM_MASK) >> CS_REQ_TILER_OOM_SHIFT) -+#define CS_REQ_TILER_OOM_SET(reg_val, value) \ -+ (((reg_val) & ~CS_REQ_TILER_OOM_MASK) | (((value) << CS_REQ_TILER_OOM_SHIFT) & CS_REQ_TILER_OOM_MASK)) -+#define CS_REQ_PROTM_PEND_SHIFT 27 -+#define CS_REQ_PROTM_PEND_MASK (0x1 << CS_REQ_PROTM_PEND_SHIFT) -+#define CS_REQ_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_PROTM_PEND_MASK) >> CS_REQ_PROTM_PEND_SHIFT) -+#define CS_REQ_PROTM_PEND_SET(reg_val, value) \ -+ (((reg_val) & ~CS_REQ_PROTM_PEND_MASK) | (((value) << CS_REQ_PROTM_PEND_SHIFT) & CS_REQ_PROTM_PEND_MASK)) -+#define CS_REQ_FATAL_SHIFT 30 -+#define CS_REQ_FATAL_MASK (0x1 << CS_REQ_FATAL_SHIFT) -+#define CS_REQ_FATAL_GET(reg_val) (((reg_val)&CS_REQ_FATAL_MASK) >> CS_REQ_FATAL_SHIFT) -+#define CS_REQ_FATAL_SET(reg_val, value) \ -+ (((reg_val) & ~CS_REQ_FATAL_MASK) | (((value) << CS_REQ_FATAL_SHIFT) & CS_REQ_FATAL_MASK)) -+#define CS_REQ_FAULT_SHIFT 31 -+#define CS_REQ_FAULT_MASK (0x1 << CS_REQ_FAULT_SHIFT) -+#define CS_REQ_FAULT_GET(reg_val) (((reg_val)&CS_REQ_FAULT_MASK) >> CS_REQ_FAULT_SHIFT) -+#define CS_REQ_FAULT_SET(reg_val, value) \ -+ (((reg_val) & ~CS_REQ_FAULT_MASK) | (((value) << CS_REQ_FAULT_SHIFT) & CS_REQ_FAULT_MASK)) ++ if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0) ++ return -EBUSY; + -+/* CS_CONFIG register */ -+#define CS_CONFIG_PRIORITY_SHIFT 0 -+#define CS_CONFIG_PRIORITY_MASK (0xF << CS_CONFIG_PRIORITY_SHIFT) -+#define CS_CONFIG_PRIORITY_GET(reg_val) (((reg_val)&CS_CONFIG_PRIORITY_MASK) >> CS_CONFIG_PRIORITY_SHIFT) -+#define CS_CONFIG_PRIORITY_SET(reg_val, value) \ -+ (((reg_val) & ~CS_CONFIG_PRIORITY_MASK) | (((value) << CS_CONFIG_PRIORITY_SHIFT) & CS_CONFIG_PRIORITY_MASK)) -+#define CS_CONFIG_USER_DOORBELL_SHIFT 8 -+#define CS_CONFIG_USER_DOORBELL_MASK (0xFF << CS_CONFIG_USER_DOORBELL_SHIFT) -+#define CS_CONFIG_USER_DOORBELL_GET(reg_val) (((reg_val)&CS_CONFIG_USER_DOORBELL_MASK) >> CS_CONFIG_USER_DOORBELL_SHIFT) -+#define CS_CONFIG_USER_DOORBELL_SET(reg_val, value) \ -+ (((reg_val) & ~CS_CONFIG_USER_DOORBELL_MASK) | \ -+ (((value) << CS_CONFIG_USER_DOORBELL_SHIFT) & CS_CONFIG_USER_DOORBELL_MASK)) ++ /* Suspend all the active CS groups */ ++ dev_dbg(kbdev->dev, "Suspend all the active CS groups"); + -+/* CS_ACK_IRQ_MASK register */ -+#define CS_ACK_IRQ_MASK_STATE_SHIFT 0 -+#define CS_ACK_IRQ_MASK_STATE_MASK (0x7 << CS_ACK_IRQ_MASK_STATE_SHIFT) -+#define CS_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_STATE_MASK) >> CS_ACK_IRQ_MASK_STATE_SHIFT) -+#define CS_ACK_IRQ_MASK_STATE_SET(reg_val, value) \ -+ (((reg_val) & ~CS_ACK_IRQ_MASK_STATE_MASK) | \ -+ (((value) << CS_ACK_IRQ_MASK_STATE_SHIFT) & CS_ACK_IRQ_MASK_STATE_MASK)) -+/* CS_ACK_IRQ_MASK_STATE values */ -+#define CS_ACK_IRQ_MASK_STATE_DISABLED 0x0 -+#define CS_ACK_IRQ_MASK_STATE_ENABLED 0x7 -+/* End of CS_ACK_IRQ_MASK_STATE values */ -+#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT 4 -+#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) -+#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_GET(reg_val) \ -+ (((reg_val)&CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) >> CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) -+#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SET(reg_val, value) \ -+ (((reg_val) & ~CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) | \ -+ (((value) << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) & CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK)) -+#define CS_ACK_IRQ_MASK_TILER_OOM_SHIFT 26 -+#define CS_ACK_IRQ_MASK_TILER_OOM_MASK (0x1 << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) -+#define CS_ACK_IRQ_MASK_TILER_OOM_GET(reg_val) \ -+ (((reg_val)&CS_ACK_IRQ_MASK_TILER_OOM_MASK) >> CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) -+#define CS_ACK_IRQ_MASK_TILER_OOM_SET(reg_val, value) \ -+ (((reg_val) & ~CS_ACK_IRQ_MASK_TILER_OOM_MASK) | \ -+ (((value) << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) & CS_ACK_IRQ_MASK_TILER_OOM_MASK)) -+#define CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT 27 -+#define CS_ACK_IRQ_MASK_PROTM_PEND_MASK (0x1 << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) -+#define CS_ACK_IRQ_MASK_PROTM_PEND_GET(reg_val) \ -+ (((reg_val)&CS_ACK_IRQ_MASK_PROTM_PEND_MASK) >> CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) -+#define CS_ACK_IRQ_MASK_PROTM_PEND_SET(reg_val, value) \ -+ (((reg_val) & ~CS_ACK_IRQ_MASK_PROTM_PEND_MASK) | \ -+ (((value) << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) & CS_ACK_IRQ_MASK_PROTM_PEND_MASK)) -+#define CS_ACK_IRQ_MASK_FATAL_SHIFT 30 -+#define CS_ACK_IRQ_MASK_FATAL_MASK (0x1 << CS_ACK_IRQ_MASK_FATAL_SHIFT) -+#define CS_ACK_IRQ_MASK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FATAL_MASK) >> CS_ACK_IRQ_MASK_FATAL_SHIFT) -+#define CS_ACK_IRQ_MASK_FATAL_SET(reg_val, value) \ -+ (((reg_val) & ~CS_ACK_IRQ_MASK_FATAL_MASK) | \ -+ (((value) << CS_ACK_IRQ_MASK_FATAL_SHIFT) & CS_ACK_IRQ_MASK_FATAL_MASK)) -+#define CS_ACK_IRQ_MASK_FAULT_SHIFT 31 -+#define CS_ACK_IRQ_MASK_FAULT_MASK (0x1 << CS_ACK_IRQ_MASK_FAULT_SHIFT) -+#define CS_ACK_IRQ_MASK_FAULT_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FAULT_MASK) >> CS_ACK_IRQ_MASK_FAULT_SHIFT) -+#define CS_ACK_IRQ_MASK_FAULT_SET(reg_val, value) \ -+ (((reg_val) & ~CS_ACK_IRQ_MASK_FAULT_MASK) | \ -+ (((value) << CS_ACK_IRQ_MASK_FAULT_SHIFT) & CS_ACK_IRQ_MASK_FAULT_MASK)) ++ kbase_csf_scheduler_lock(kbdev); ++ while (scheduler->state != SCHED_SUSPENDED) { ++ kbase_csf_scheduler_unlock(kbdev); ++ kbase_csf_scheduler_pm_suspend(kbdev); ++ kbase_csf_scheduler_lock(kbdev); ++ resume_needed = true; ++ } + -+/* CS_BASE register */ -+#define CS_BASE_POINTER_SHIFT 0 -+#define CS_BASE_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_BASE_POINTER_SHIFT) -+#define CS_BASE_POINTER_GET(reg_val) (((reg_val)&CS_BASE_POINTER_MASK) >> CS_BASE_POINTER_SHIFT) -+#define CS_BASE_POINTER_SET(reg_val, value) \ -+ (((reg_val) & ~CS_BASE_POINTER_MASK) | (((value) << CS_BASE_POINTER_SHIFT) & CS_BASE_POINTER_MASK)) ++ /* Wait for the MCU to get disabled */ ++ dev_info(kbdev->dev, "Wait for the MCU to get disabled"); ++ ret = kbase_pm_wait_for_desired_state(kbdev); ++ if (ret) { ++ dev_err(kbdev->dev, ++ "wait for PM state failed when toggling FW logging calls"); ++ ret = -EAGAIN; ++ goto out; ++ } + -+/* CS_SIZE register */ -+#define CS_SIZE_SIZE_SHIFT 0 -+#define CS_SIZE_SIZE_MASK (0xFFFFFFFF << CS_SIZE_SIZE_SHIFT) -+#define CS_SIZE_SIZE_GET(reg_val) (((reg_val)&CS_SIZE_SIZE_MASK) >> CS_SIZE_SIZE_SHIFT) -+#define CS_SIZE_SIZE_SET(reg_val, value) \ -+ (((reg_val) & ~CS_SIZE_SIZE_MASK) | (((value) << CS_SIZE_SIZE_SHIFT) & CS_SIZE_SIZE_MASK)) ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ mcu_inactive = ++ kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (!mcu_inactive) { ++ dev_err(kbdev->dev, ++ "MCU not inactive after PM state wait when toggling FW logging calls"); ++ ret = -EAGAIN; ++ goto out; ++ } + -+/* CS_TILER_HEAP_START register */ -+#define CS_TILER_HEAP_START_POINTER_SHIFT 0 -+#define CS_TILER_HEAP_START_POINTER_MASK \ -+ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_TILER_HEAP_START_POINTER_SHIFT) -+#define CS_TILER_HEAP_START_POINTER_GET(reg_val) \ -+ (((reg_val)&CS_TILER_HEAP_START_POINTER_MASK) >> CS_TILER_HEAP_START_POINTER_SHIFT) -+#define CS_TILER_HEAP_START_POINTER_SET(reg_val, value) \ -+ (((reg_val) & ~CS_TILER_HEAP_START_POINTER_MASK) | \ -+ (((value) << CS_TILER_HEAP_START_POINTER_SHIFT) & CS_TILER_HEAP_START_POINTER_MASK)) -+/* HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */ -+/* End of HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */ ++ /* Toggle FW logging call in the loaded FW image */ ++ toggle_logging_calls_in_loaded_image(kbdev, val); ++ dev_dbg(kbdev->dev, "FW logging: %s", val ? "enabled" : "disabled"); + -+/* CS_TILER_HEAP_END register */ -+#define CS_TILER_HEAP_END_POINTER_SHIFT 0 -+#define CS_TILER_HEAP_END_POINTER_MASK \ -+ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_TILER_HEAP_END_POINTER_SHIFT) -+#define CS_TILER_HEAP_END_POINTER_GET(reg_val) \ -+ (((reg_val)&CS_TILER_HEAP_END_POINTER_MASK) >> CS_TILER_HEAP_END_POINTER_SHIFT) -+#define CS_TILER_HEAP_END_POINTER_SET(reg_val, value) \ -+ (((reg_val) & ~CS_TILER_HEAP_END_POINTER_MASK) | \ -+ (((value) << CS_TILER_HEAP_END_POINTER_SHIFT) & CS_TILER_HEAP_END_POINTER_MASK)) -+/* HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */ -+/* End of HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */ ++out: ++ kbase_csf_scheduler_unlock(kbdev); ++ if (resume_needed) ++ /* Resume queue groups and start mcu */ ++ kbase_csf_scheduler_pm_resume(kbdev); ++ atomic_set(&fw_log->busy, 0); ++ return ret; ++} +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h +new file mode 100644 +index 000000000..100832046 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h +@@ -0,0 +1,77 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+/* CS_USER_INPUT register */ -+#define CS_USER_INPUT_POINTER_SHIFT 0 -+#define CS_USER_INPUT_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_USER_INPUT_POINTER_SHIFT) -+#define CS_USER_INPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_INPUT_POINTER_MASK) >> CS_USER_INPUT_POINTER_SHIFT) -+#define CS_USER_INPUT_POINTER_SET(reg_val, value) \ -+ (((reg_val) & ~CS_USER_INPUT_POINTER_MASK) | \ -+ (((value) << CS_USER_INPUT_POINTER_SHIFT) & CS_USER_INPUT_POINTER_MASK)) ++#ifndef _KBASE_CSF_FIRMWARE_LOG_H_ ++#define _KBASE_CSF_FIRMWARE_LOG_H_ + -+/* CS_USER_OUTPUT register */ -+#define CS_USER_OUTPUT_POINTER_SHIFT 0 -+#define CS_USER_OUTPUT_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_USER_OUTPUT_POINTER_SHIFT) -+#define CS_USER_OUTPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_OUTPUT_POINTER_MASK) >> CS_USER_OUTPUT_POINTER_SHIFT) -+#define CS_USER_OUTPUT_POINTER_SET(reg_val, value) \ -+ (((reg_val) & ~CS_USER_OUTPUT_POINTER_MASK) | \ -+ (((value) << CS_USER_OUTPUT_POINTER_SHIFT) & CS_USER_OUTPUT_POINTER_MASK)) ++#include + -+/* CS_INSTR_CONFIG register */ -+#define CS_INSTR_CONFIG_JASID_SHIFT (0) -+#define CS_INSTR_CONFIG_JASID_MASK ((u32)0xF << CS_INSTR_CONFIG_JASID_SHIFT) -+#define CS_INSTR_CONFIG_JASID_GET(reg_val) (((reg_val)&CS_INSTR_CONFIG_JASID_MASK) >> CS_INSTR_CONFIG_JASID_SHIFT) -+#define CS_INSTR_CONFIG_JASID_SET(reg_val, value) \ -+ (((reg_val) & ~CS_INSTR_CONFIG_JASID_MASK) | \ -+ (((value) << CS_INSTR_CONFIG_JASID_SHIFT) & CS_INSTR_CONFIG_JASID_MASK)) -+#define CS_INSTR_CONFIG_EVENT_SIZE_SHIFT (4) -+#define CS_INSTR_CONFIG_EVENT_SIZE_MASK ((u32)0xF << CS_INSTR_CONFIG_EVENT_SIZE_SHIFT) -+#define CS_INSTR_CONFIG_EVENT_SIZE_GET(reg_val) \ -+ (((reg_val)&CS_INSTR_CONFIG_EVENT_SIZE_MASK) >> CS_INSTR_CONFIG_EVENT_SIZE_SHIFT) -+#define CS_INSTR_CONFIG_EVENT_SIZE_SET(reg_val, value) \ -+ (((reg_val) & ~CS_INSTR_CONFIG_EVENT_SIZE_MASK) | \ -+ (((value) << CS_INSTR_CONFIG_EVENT_SIZE_SHIFT) & CS_INSTR_CONFIG_EVENT_SIZE_MASK)) -+#define CS_INSTR_CONFIG_EVENT_STATE_SHIFT (16) -+#define CS_INSTR_CONFIG_EVENT_STATE_MASK ((u32)0xFF << CS_INSTR_CONFIG_EVENT_STATE_SHIFT) -+#define CS_INSTR_CONFIG_EVENT_STATE_GET(reg_val) \ -+ (((reg_val)&CS_INSTR_CONFIG_EVENT_STATE_MASK) >> CS_INSTR_CONFIG_EVENT_STATE_SHIFT) -+#define CS_INSTR_CONFIG_EVENT_STATE_SET(reg_val, value) \ -+ (((reg_val) & ~CS_INSTR_CONFIG_EVENT_STATE_MASK) | \ -+ (((value) << CS_INSTR_CONFIG_EVENT_STATE_SHIFT) & CS_INSTR_CONFIG_EVENT_STATE_MASK)) ++/** Offset of the last field of functions call list entry from the image header */ ++#define FUNC_CALL_LIST_ENTRY_NAME_OFFSET (0x8) + -+/* CS_INSTR_BUFFER_SIZE register */ -+#define CS_INSTR_BUFFER_SIZE_SIZE_SHIFT (0) -+#define CS_INSTR_BUFFER_SIZE_SIZE_MASK ((u32)0xFFFFFFFF << CS_INSTR_BUFFER_SIZE_SIZE_SHIFT) -+#define CS_INSTR_BUFFER_SIZE_SIZE_GET(reg_val) \ -+ (((reg_val)&CS_INSTR_BUFFER_SIZE_SIZE_MASK) >> CS_INSTR_BUFFER_SIZE_SIZE_SHIFT) -+#define CS_INSTR_BUFFER_SIZE_SIZE_SET(reg_val, value) \ -+ (((reg_val) & ~CS_INSTR_BUFFER_SIZE_SIZE_MASK) | \ -+ (((value) << CS_INSTR_BUFFER_SIZE_SIZE_SHIFT) & CS_INSTR_BUFFER_SIZE_SIZE_MASK)) ++/* ++ * Firmware log dumping buffer size. ++ */ ++#define FIRMWARE_LOG_DUMP_BUF_SIZE PAGE_SIZE + -+/* CS_INSTR_BUFFER_BASE register */ -+#define CS_INSTR_BUFFER_BASE_POINTER_SHIFT (0) -+#define CS_INSTR_BUFFER_BASE_POINTER_MASK \ -+ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_INSTR_BUFFER_BASE_POINTER_SHIFT) -+#define CS_INSTR_BUFFER_BASE_POINTER_GET(reg_val) \ -+ (((reg_val)&CS_INSTR_BUFFER_BASE_POINTER_MASK) >> CS_INSTR_BUFFER_BASE_POINTER_SHIFT) -+#define CS_INSTR_BUFFER_BASE_POINTER_SET(reg_val, value) \ -+ (((reg_val) & ~CS_INSTR_BUFFER_BASE_POINTER_MASK) | \ -+ (((value) << CS_INSTR_BUFFER_BASE_POINTER_SHIFT) & CS_INSTR_BUFFER_BASE_POINTER_MASK)) ++/** ++ * kbase_csf_firmware_log_init - Initialize firmware log handling. ++ * ++ * @kbdev: Pointer to the Kbase device ++ * ++ * Return: The initialization error code. ++ */ ++int kbase_csf_firmware_log_init(struct kbase_device *kbdev); + -+/* CS_INSTR_BUFFER_OFFSET_POINTER register */ -+#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT (0) -+#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK \ -+ ((GPU_ULL(0xFFFFFFFFFFFFFFFF)) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) -+#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_GET(reg_val) \ -+ (((reg_val)&CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) >> CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) -+#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SET(reg_val, value) \ -+ (((reg_val) & ~CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) | \ -+ (((value) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) & CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK)) ++/** ++ * kbase_csf_firmware_log_term - Terminate firmware log handling. ++ * ++ * @kbdev: Pointer to the Kbase device ++ */ ++void kbase_csf_firmware_log_term(struct kbase_device *kbdev); + -+/* End of CS_KERNEL_INPUT_BLOCK register set definitions */ ++/** ++ * kbase_csf_firmware_log_dump_buffer - Read remaining data in the firmware log ++ * buffer and print it to dmesg. ++ * ++ * @kbdev: Pointer to the Kbase device ++ */ ++void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev); + -+/* CS_KERNEL_OUTPUT_BLOCK register set definitions */ ++/** ++ * kbase_csf_firmware_log_parse_logging_call_list_entry - Parse FW logging function call list entry. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @entry: Pointer to section. ++ */ ++void kbase_csf_firmware_log_parse_logging_call_list_entry(struct kbase_device *kbdev, ++ const uint32_t *entry); ++/** ++ * kbase_csf_firmware_log_toggle_logging_calls - Enables/Disables FW logging function calls. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @val: Configuration option value. ++ * ++ * Return: 0 if successful, negative error code on failure ++ */ ++int kbase_csf_firmware_log_toggle_logging_calls(struct kbase_device *kbdev, u32 val); + -+/* CS_ACK register */ -+#define CS_ACK_STATE_SHIFT 0 -+#define CS_ACK_STATE_MASK (0x7 << CS_ACK_STATE_SHIFT) -+#define CS_ACK_STATE_GET(reg_val) (((reg_val)&CS_ACK_STATE_MASK) >> CS_ACK_STATE_SHIFT) -+#define CS_ACK_STATE_SET(reg_val, value) \ -+ (((reg_val) & ~CS_ACK_STATE_MASK) | (((value) << CS_ACK_STATE_SHIFT) & CS_ACK_STATE_MASK)) -+/* CS_ACK_STATE values */ -+#define CS_ACK_STATE_STOP 0x0 -+#define CS_ACK_STATE_START 0x1 -+/* End of CS_ACK_STATE values */ -+#define CS_ACK_EXTRACT_EVENT_SHIFT 4 -+#define CS_ACK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_EXTRACT_EVENT_SHIFT) -+#define CS_ACK_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_ACK_EXTRACT_EVENT_MASK) >> CS_ACK_EXTRACT_EVENT_SHIFT) -+#define CS_ACK_EXTRACT_EVENT_SET(reg_val, value) \ -+ (((reg_val) & ~CS_ACK_EXTRACT_EVENT_MASK) | (((value) << CS_ACK_EXTRACT_EVENT_SHIFT) & CS_ACK_EXTRACT_EVENT_MASK)) -+#define CS_ACK_TILER_OOM_SHIFT 26 -+#define CS_ACK_TILER_OOM_MASK (0x1 << CS_ACK_TILER_OOM_SHIFT) -+#define CS_ACK_TILER_OOM_GET(reg_val) (((reg_val)&CS_ACK_TILER_OOM_MASK) >> CS_ACK_TILER_OOM_SHIFT) -+#define CS_ACK_TILER_OOM_SET(reg_val, value) \ -+ (((reg_val) & ~CS_ACK_TILER_OOM_MASK) | (((value) << CS_ACK_TILER_OOM_SHIFT) & CS_ACK_TILER_OOM_MASK)) -+#define CS_ACK_PROTM_PEND_SHIFT 27 -+#define CS_ACK_PROTM_PEND_MASK (0x1 << CS_ACK_PROTM_PEND_SHIFT) -+#define CS_ACK_PROTM_PEND_GET(reg_val) (((reg_val)&CS_ACK_PROTM_PEND_MASK) >> CS_ACK_PROTM_PEND_SHIFT) -+#define CS_ACK_PROTM_PEND_SET(reg_val, value) \ -+ (((reg_val) & ~CS_ACK_PROTM_PEND_MASK) | (((value) << CS_ACK_PROTM_PEND_SHIFT) & CS_ACK_PROTM_PEND_MASK)) -+#define CS_ACK_FATAL_SHIFT 30 -+#define CS_ACK_FATAL_MASK (0x1 << CS_ACK_FATAL_SHIFT) -+#define CS_ACK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_FATAL_MASK) >> CS_ACK_FATAL_SHIFT) -+#define CS_ACK_FATAL_SET(reg_val, value) \ -+ (((reg_val) & ~CS_ACK_FATAL_MASK) | (((value) << CS_ACK_FATAL_SHIFT) & CS_ACK_FATAL_MASK)) -+#define CS_ACK_FAULT_SHIFT 31 -+#define CS_ACK_FAULT_MASK (0x1 << CS_ACK_FAULT_SHIFT) -+#define CS_ACK_FAULT_GET(reg_val) (((reg_val)&CS_ACK_FAULT_MASK) >> CS_ACK_FAULT_SHIFT) -+#define CS_ACK_FAULT_SET(reg_val, value) \ -+ (((reg_val) & ~CS_ACK_FAULT_MASK) | (((value) << CS_ACK_FAULT_SHIFT) & CS_ACK_FAULT_MASK)) ++#endif /* _KBASE_CSF_FIRMWARE_LOG_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c +new file mode 100644 +index 000000000..833947fac +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c +@@ -0,0 +1,1661 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+/* CS_STATUS_CMD_PTR register */ -+#define CS_STATUS_CMD_PTR_POINTER_SHIFT 0 -+#define CS_STATUS_CMD_PTR_POINTER_MASK \ -+ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_STATUS_CMD_PTR_POINTER_SHIFT) -+#define CS_STATUS_CMD_PTR_POINTER_GET(reg_val) \ -+ (((reg_val)&CS_STATUS_CMD_PTR_POINTER_MASK) >> CS_STATUS_CMD_PTR_POINTER_SHIFT) -+#define CS_STATUS_CMD_PTR_POINTER_SET(reg_val, value) \ -+ (((reg_val) & ~CS_STATUS_CMD_PTR_POINTER_MASK) | \ -+ (((value) << CS_STATUS_CMD_PTR_POINTER_SHIFT) & CS_STATUS_CMD_PTR_POINTER_MASK)) ++#include "mali_kbase.h" ++#include "mali_kbase_csf_firmware.h" ++#include "mali_kbase_csf_trace_buffer.h" ++#include "mali_kbase_csf_timeout.h" ++#include "mali_kbase_mem.h" ++#include "mali_kbase_reset_gpu.h" ++#include "mali_kbase_ctx_sched.h" ++#include "device/mali_kbase_device.h" ++#include ++#include "backend/gpu/mali_kbase_pm_internal.h" ++#include "mali_kbase_csf_scheduler.h" ++#include "mmu/mali_kbase_mmu.h" ++#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" ++#include ++#include + -+/* CS_STATUS_WAIT register */ -+#define CS_STATUS_WAIT_SB_MASK_SHIFT 0 -+#define CS_STATUS_WAIT_SB_MASK_MASK (0xFFFF << CS_STATUS_WAIT_SB_MASK_SHIFT) -+#define CS_STATUS_WAIT_SB_MASK_GET(reg_val) (((reg_val)&CS_STATUS_WAIT_SB_MASK_MASK) >> CS_STATUS_WAIT_SB_MASK_SHIFT) -+#define CS_STATUS_WAIT_SB_MASK_SET(reg_val, value) \ -+ (((reg_val) & ~CS_STATUS_WAIT_SB_MASK_MASK) | \ -+ (((value) << CS_STATUS_WAIT_SB_MASK_SHIFT) & CS_STATUS_WAIT_SB_MASK_MASK)) -+#define CS_STATUS_WAIT_SB_SOURCE_SHIFT 16 -+#define CS_STATUS_WAIT_SB_SOURCE_MASK (0xF << CS_STATUS_WAIT_SB_SOURCE_SHIFT) -+#define CS_STATUS_WAIT_SB_SOURCE_GET(reg_val) \ -+ (((reg_val)&CS_STATUS_WAIT_SB_SOURCE_MASK) >> CS_STATUS_WAIT_SB_SOURCE_SHIFT) -+#define CS_STATUS_WAIT_SB_SOURCE_SET(reg_val, value) \ -+ (((reg_val) & ~CS_STATUS_WAIT_SB_SOURCE_MASK) | \ -+ (((value) << CS_STATUS_WAIT_SB_SOURCE_SHIFT) & CS_STATUS_WAIT_SB_SOURCE_MASK)) -+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT 24 -+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK (0xF << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) -+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(reg_val) \ -+ (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) -+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SET(reg_val, value) \ -+ (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) | \ -+ (((value) << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK)) -+/* CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */ -+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE 0x0 -+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT 0x1 -+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE 0x5 -+/* End of CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */ -+#define CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT 28 -+#define CS_STATUS_WAIT_PROGRESS_WAIT_MASK (0x1 << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) -+#define CS_STATUS_WAIT_PROGRESS_WAIT_GET(reg_val) \ -+ (((reg_val)&CS_STATUS_WAIT_PROGRESS_WAIT_MASK) >> CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) -+#define CS_STATUS_WAIT_PROGRESS_WAIT_SET(reg_val, value) \ -+ (((reg_val) & ~CS_STATUS_WAIT_PROGRESS_WAIT_MASK) | \ -+ (((value) << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) & CS_STATUS_WAIT_PROGRESS_WAIT_MASK)) -+#define CS_STATUS_WAIT_PROTM_PEND_SHIFT 29 -+#define CS_STATUS_WAIT_PROTM_PEND_MASK (0x1 << CS_STATUS_WAIT_PROTM_PEND_SHIFT) -+#define CS_STATUS_WAIT_PROTM_PEND_GET(reg_val) \ -+ (((reg_val)&CS_STATUS_WAIT_PROTM_PEND_MASK) >> CS_STATUS_WAIT_PROTM_PEND_SHIFT) -+#define CS_STATUS_WAIT_PROTM_PEND_SET(reg_val, value) \ -+ (((reg_val) & ~CS_STATUS_WAIT_PROTM_PEND_MASK) | \ -+ (((value) << CS_STATUS_WAIT_PROTM_PEND_SHIFT) & CS_STATUS_WAIT_PROTM_PEND_MASK)) -+#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT 30 -+#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT) -+#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(reg_val) \ -+ (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT) -+#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_SET(reg_val, value) \ -+ (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK) | \ -+ (((value) << CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK)) -+#define CS_STATUS_WAIT_SYNC_WAIT_SHIFT 31 -+#define CS_STATUS_WAIT_SYNC_WAIT_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SHIFT) -+#define CS_STATUS_WAIT_SYNC_WAIT_GET(reg_val) \ -+ (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_SHIFT) -+#define CS_STATUS_WAIT_SYNC_WAIT_SET(reg_val, value) \ -+ (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_MASK) | \ -+ (((value) << CS_STATUS_WAIT_SYNC_WAIT_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_MASK)) ++#include ++#include ++#include ++#include ++#include ++#include ++#if (KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE) ++#include ++#endif ++#include + -+/* CS_STATUS_REQ_RESOURCE register */ -+#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT 0 -+#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) -+#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_GET(reg_val) \ -+ (((reg_val)&CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) -+#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SET(reg_val, value) \ -+ (((reg_val) & ~CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) | \ -+ (((value) << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK)) -+#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT 1 -+#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) -+#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_GET(reg_val) \ -+ (((reg_val)&CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) -+#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SET(reg_val, value) \ -+ (((reg_val) & ~CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) | \ -+ (((value) << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK)) -+#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT 2 -+#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) -+#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_GET(reg_val) \ -+ (((reg_val)&CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) -+#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SET(reg_val, value) \ -+ (((reg_val) & ~CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) | \ -+ (((value) << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK)) -+#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT 3 -+#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) -+#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_GET(reg_val) \ -+ (((reg_val)&CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) -+#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SET(reg_val, value) \ -+ (((reg_val) & ~CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) | \ -+ (((value) << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK)) ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++/* Makes Driver wait indefinitely for an acknowledgment for the different ++ * requests it sends to firmware. Otherwise the timeouts interfere with the ++ * use of debugger for source-level debugging of firmware as Driver initiates ++ * a GPU reset when a request times out, which always happen when a debugger ++ * is connected. ++ */ ++bool fw_debug; /* Default value of 0/false */ ++module_param(fw_debug, bool, 0444); ++MODULE_PARM_DESC(fw_debug, ++ "Enables effective use of a debugger for debugging firmware code."); ++#endif + -+/* CS_STATUS_WAIT_SYNC_POINTER register */ -+#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT 0 -+#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK \ -+ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) -+#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_GET(reg_val) \ -+ (((reg_val)&CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) >> CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) -+#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SET(reg_val, value) \ -+ (((reg_val) & ~CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) | \ -+ (((value) << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) & CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK)) ++#define DUMMY_FW_PAGE_SIZE SZ_4K + -+/* CS_STATUS_WAIT_SYNC_VALUE register */ -+#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT 0 -+#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK (0xFFFFFFFF << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) -+#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_GET(reg_val) \ -+ (((reg_val)&CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) >> CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) -+#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SET(reg_val, value) \ -+ (((reg_val) & ~CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) | \ -+ (((value) << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) & CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK)) ++/** ++ * struct dummy_firmware_csi - Represents a dummy interface for MCU firmware CSs ++ * ++ * @cs_kernel_input: CS kernel input memory region ++ * @cs_kernel_output: CS kernel output memory region ++ */ ++struct dummy_firmware_csi { ++ u8 cs_kernel_input[DUMMY_FW_PAGE_SIZE]; ++ u8 cs_kernel_output[DUMMY_FW_PAGE_SIZE]; ++}; + -+/* CS_STATUS_SCOREBOARDS register */ -+#define CS_STATUS_SCOREBOARDS_NONZERO_SHIFT (0) -+#define CS_STATUS_SCOREBOARDS_NONZERO_MASK \ -+ ((0xFFFF) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) -+#define CS_STATUS_SCOREBOARDS_NONZERO_GET(reg_val) \ -+ (((reg_val)&CS_STATUS_SCOREBOARDS_NONZERO_MASK) >> \ -+ CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) -+#define CS_STATUS_SCOREBOARDS_NONZERO_SET(reg_val, value) \ -+ (((reg_val) & ~CS_STATUS_SCOREBOARDS_NONZERO_MASK) | \ -+ (((value) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) & \ -+ CS_STATUS_SCOREBOARDS_NONZERO_MASK)) ++/** ++ * struct dummy_firmware_csg - Represents a dummy interface for MCU firmware CSGs ++ * ++ * @csg_input: CSG kernel input memory region ++ * @csg_output: CSG kernel output memory region ++ * @csi: Dummy firmware CSIs ++ */ ++struct dummy_firmware_csg { ++ u8 csg_input[DUMMY_FW_PAGE_SIZE]; ++ u8 csg_output[DUMMY_FW_PAGE_SIZE]; ++ struct dummy_firmware_csi csi[8]; ++} dummy_firmware_csg; + -+/* CS_STATUS_BLOCKED_REASON register */ -+#define CS_STATUS_BLOCKED_REASON_REASON_SHIFT (0) -+#define CS_STATUS_BLOCKED_REASON_REASON_MASK \ -+ ((0xF) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) -+#define CS_STATUS_BLOCKED_REASON_REASON_GET(reg_val) \ -+ (((reg_val)&CS_STATUS_BLOCKED_REASON_REASON_MASK) >> \ -+ CS_STATUS_BLOCKED_REASON_REASON_SHIFT) -+#define CS_STATUS_BLOCKED_REASON_REASON_SET(reg_val, value) \ -+ (((reg_val) & ~CS_STATUS_BLOCKED_REASON_REASON_MASK) | \ -+ (((value) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) & \ -+ CS_STATUS_BLOCKED_REASON_REASON_MASK)) -+/* CS_STATUS_BLOCKED_REASON_reason values */ -+#define CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED 0x0 -+#define CS_STATUS_BLOCKED_REASON_REASON_WAIT 0x1 -+#define CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT 0x2 -+#define CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT 0x3 -+#define CS_STATUS_BLOCKED_REASON_REASON_DEFERRED 0x4 -+#define CS_STATUS_BLOCKED_REASON_REASON_RESOURCE 0x5 -+#define CS_STATUS_BLOCKED_REASON_REASON_FLUSH 0x6 -+/* End of CS_STATUS_BLOCKED_REASON_reason values */ ++/** ++ * struct dummy_firmware_interface - Represents a dummy interface in the MCU firmware ++ * ++ * @global_input: Global input memory region ++ * @global_output: Global output memory region ++ * @csg: Dummy firmware CSGs ++ * @node: Interface objects are on the kbase_device:csf.firmware_interfaces ++ * list using this list_head to link them ++ */ ++struct dummy_firmware_interface { ++ u8 global_input[DUMMY_FW_PAGE_SIZE]; ++ u8 global_output[DUMMY_FW_PAGE_SIZE]; ++ struct dummy_firmware_csg csg[8]; ++ struct list_head node; ++} dummy_firmware_interface; + -+/* CS_FAULT register */ -+#define CS_FAULT_EXCEPTION_TYPE_SHIFT 0 -+#define CS_FAULT_EXCEPTION_TYPE_MASK (0xFF << CS_FAULT_EXCEPTION_TYPE_SHIFT) -+#define CS_FAULT_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_TYPE_MASK) >> CS_FAULT_EXCEPTION_TYPE_SHIFT) -+#define CS_FAULT_EXCEPTION_TYPE_SET(reg_val, value) \ -+ (((reg_val) & ~CS_FAULT_EXCEPTION_TYPE_MASK) | \ -+ (((value) << CS_FAULT_EXCEPTION_TYPE_SHIFT) & CS_FAULT_EXCEPTION_TYPE_MASK)) -+/* CS_FAULT_EXCEPTION_TYPE values */ -+#define CS_FAULT_EXCEPTION_TYPE_KABOOM 0x05 -+#define CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED 0x0F -+#define CS_FAULT_EXCEPTION_TYPE_CS_BUS_FAULT 0x48 -+#define CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT 0x4B -+#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_PC 0x50 -+#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_ENC 0x51 -+#define CS_FAULT_EXCEPTION_TYPE_INSTR_BARRIER_FAULT 0x55 -+#define CS_FAULT_EXCEPTION_TYPE_DATA_INVALID_FAULT 0x58 -+#define CS_FAULT_EXCEPTION_TYPE_TILE_RANGE_FAULT 0x59 -+#define CS_FAULT_EXCEPTION_TYPE_ADDR_RANGE_FAULT 0x5A -+#define CS_FAULT_EXCEPTION_TYPE_IMPRECISE_FAULT 0x5B -+#define CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT 0x69 -+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L0 0xC0 -+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L1 0xC1 -+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L2 0xC2 -+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L3 0xC3 -+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L4 0xC4 -+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_0 0xC8 -+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_1 0xC9 -+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_2 0xCA -+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_3 0xCB -+#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_1 0xD9 -+#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_2 0xDA -+#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_3 0xDB -+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN 0xE0 -+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_0 0xE4 -+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_1 0xE5 -+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_2 0xE6 -+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_3 0xE7 -+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0 0xE8 -+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1 0xE9 -+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2 0xEA -+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3 0xEB -+/* End of CS_FAULT_EXCEPTION_TYPE values */ -+#define CS_FAULT_EXCEPTION_DATA_SHIFT 8 -+#define CS_FAULT_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FAULT_EXCEPTION_DATA_SHIFT) -+#define CS_FAULT_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_DATA_MASK) >> CS_FAULT_EXCEPTION_DATA_SHIFT) -+#define CS_FAULT_EXCEPTION_DATA_SET(reg_val, value) \ -+ (((reg_val) & ~CS_FAULT_EXCEPTION_DATA_MASK) | \ -+ (((value) << CS_FAULT_EXCEPTION_DATA_SHIFT) & CS_FAULT_EXCEPTION_DATA_MASK)) ++#define CSF_GLB_REQ_CFG_MASK \ ++ (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ ++ GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK) + -+/* CS_FATAL register */ -+#define CS_FATAL_EXCEPTION_TYPE_SHIFT 0 -+#define CS_FATAL_EXCEPTION_TYPE_MASK (0xFF << CS_FATAL_EXCEPTION_TYPE_SHIFT) -+#define CS_FATAL_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_TYPE_MASK) >> CS_FATAL_EXCEPTION_TYPE_SHIFT) -+#define CS_FATAL_EXCEPTION_TYPE_SET(reg_val, value) \ -+ (((reg_val) & ~CS_FATAL_EXCEPTION_TYPE_MASK) | \ -+ (((value) << CS_FATAL_EXCEPTION_TYPE_SHIFT) & CS_FATAL_EXCEPTION_TYPE_MASK)) -+/* CS_FATAL_EXCEPTION_TYPE values */ -+#define CS_FATAL_EXCEPTION_TYPE_CS_CONFIG_FAULT 0x40 -+#define CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE 0x41 -+#define CS_FATAL_EXCEPTION_TYPE_CS_ENDPOINT_FAULT 0x44 -+#define CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT 0x48 -+#define CS_FATAL_EXCEPTION_TYPE_CS_INVALID_INSTRUCTION 0x49 -+#define CS_FATAL_EXCEPTION_TYPE_CS_CALL_STACK_OVERFLOW 0x4A -+#define CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR 0x68 -+/* End of CS_FATAL_EXCEPTION_TYPE values */ -+#define CS_FATAL_EXCEPTION_DATA_SHIFT 8 -+#define CS_FATAL_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FATAL_EXCEPTION_DATA_SHIFT) -+#define CS_FATAL_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_DATA_MASK) >> CS_FATAL_EXCEPTION_DATA_SHIFT) -+#define CS_FATAL_EXCEPTION_DATA_SET(reg_val, value) \ -+ (((reg_val) & ~CS_FATAL_EXCEPTION_DATA_MASK) | \ -+ (((value) << CS_FATAL_EXCEPTION_DATA_SHIFT) & CS_FATAL_EXCEPTION_DATA_MASK)) ++static inline u32 input_page_read(const u32 *const input, const u32 offset) ++{ ++ WARN_ON(offset % sizeof(u32)); + -+/* CS_FAULT_INFO register */ -+#define CS_FAULT_INFO_EXCEPTION_DATA_SHIFT 0 -+#define CS_FAULT_INFO_EXCEPTION_DATA_MASK \ -+ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) -+#define CS_FAULT_INFO_EXCEPTION_DATA_GET(reg_val) \ -+ (((reg_val)&CS_FAULT_INFO_EXCEPTION_DATA_MASK) >> CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) -+#define CS_FAULT_INFO_EXCEPTION_DATA_SET(reg_val, value) \ -+ (((reg_val) & ~CS_FAULT_INFO_EXCEPTION_DATA_MASK) | \ -+ (((value) << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) & CS_FAULT_INFO_EXCEPTION_DATA_MASK)) ++ return input[offset / sizeof(u32)]; ++} + -+/* CS_FATAL_INFO register */ -+#define CS_FATAL_INFO_EXCEPTION_DATA_SHIFT 0 -+#define CS_FATAL_INFO_EXCEPTION_DATA_MASK \ -+ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) -+#define CS_FATAL_INFO_EXCEPTION_DATA_GET(reg_val) \ -+ (((reg_val)&CS_FATAL_INFO_EXCEPTION_DATA_MASK) >> CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) -+#define CS_FATAL_INFO_EXCEPTION_DATA_SET(reg_val, value) \ -+ (((reg_val) & ~CS_FATAL_INFO_EXCEPTION_DATA_MASK) | \ -+ (((value) << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) & CS_FATAL_INFO_EXCEPTION_DATA_MASK)) ++static inline void input_page_write(u32 *const input, const u32 offset, ++ const u32 value) ++{ ++ WARN_ON(offset % sizeof(u32)); + -+/* CS_HEAP_VT_START register */ -+#define CS_HEAP_VT_START_VALUE_SHIFT 0 -+#define CS_HEAP_VT_START_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_START_VALUE_SHIFT) -+#define CS_HEAP_VT_START_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_START_VALUE_MASK) >> CS_HEAP_VT_START_VALUE_SHIFT) -+#define CS_HEAP_VT_START_VALUE_SET(reg_val, value) \ -+ (((reg_val) & ~CS_HEAP_VT_START_VALUE_MASK) | \ -+ (((value) << CS_HEAP_VT_START_VALUE_SHIFT) & CS_HEAP_VT_START_VALUE_MASK)) ++ input[offset / sizeof(u32)] = value; ++} + -+/* CS_HEAP_VT_END register */ -+#define CS_HEAP_VT_END_VALUE_SHIFT 0 -+#define CS_HEAP_VT_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_END_VALUE_SHIFT) -+#define CS_HEAP_VT_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_END_VALUE_MASK) >> CS_HEAP_VT_END_VALUE_SHIFT) -+#define CS_HEAP_VT_END_VALUE_SET(reg_val, value) \ -+ (((reg_val) & ~CS_HEAP_VT_END_VALUE_MASK) | (((value) << CS_HEAP_VT_END_VALUE_SHIFT) & CS_HEAP_VT_END_VALUE_MASK)) ++static inline u32 output_page_read(const u32 *const output, const u32 offset) ++{ ++ WARN_ON(offset % sizeof(u32)); + -+/* CS_HEAP_FRAG_END register */ -+#define CS_HEAP_FRAG_END_VALUE_SHIFT 0 -+#define CS_HEAP_FRAG_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_FRAG_END_VALUE_SHIFT) -+#define CS_HEAP_FRAG_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_FRAG_END_VALUE_MASK) >> CS_HEAP_FRAG_END_VALUE_SHIFT) -+#define CS_HEAP_FRAG_END_VALUE_SET(reg_val, value) \ -+ (((reg_val) & ~CS_HEAP_FRAG_END_VALUE_MASK) | \ -+ (((value) << CS_HEAP_FRAG_END_VALUE_SHIFT) & CS_HEAP_FRAG_END_VALUE_MASK)) ++ return output[offset / sizeof(u32)]; ++} + -+/* CS_HEAP_ADDRESS register */ -+#define CS_HEAP_ADDRESS_POINTER_SHIFT 0 -+#define CS_HEAP_ADDRESS_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_HEAP_ADDRESS_POINTER_SHIFT) -+#define CS_HEAP_ADDRESS_POINTER_GET(reg_val) (((reg_val)&CS_HEAP_ADDRESS_POINTER_MASK) >> CS_HEAP_ADDRESS_POINTER_SHIFT) -+#define CS_HEAP_ADDRESS_POINTER_SET(reg_val, value) \ -+ (((reg_val) & ~CS_HEAP_ADDRESS_POINTER_MASK) | \ -+ (((value) << CS_HEAP_ADDRESS_POINTER_SHIFT) & CS_HEAP_ADDRESS_POINTER_MASK)) -+/* End of CS_KERNEL_OUTPUT_BLOCK register set definitions */ ++static inline void output_page_write(u32 *const output, const u32 offset, ++ const u32 value) ++{ ++ WARN_ON(offset % sizeof(u32)); + -+/* CS_USER_INPUT_BLOCK register set definitions */ ++ output[offset / sizeof(u32)] = value; ++} + -+/* CS_INSERT register */ -+#define CS_INSERT_VALUE_SHIFT 0 -+#define CS_INSERT_VALUE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_INSERT_VALUE_SHIFT) -+#define CS_INSERT_VALUE_GET(reg_val) (((reg_val)&CS_INSERT_VALUE_MASK) >> CS_INSERT_VALUE_SHIFT) -+#define CS_INSERT_VALUE_SET(reg_val, value) \ -+ (((reg_val) & ~CS_INSERT_VALUE_MASK) | (((value) << CS_INSERT_VALUE_SHIFT) & CS_INSERT_VALUE_MASK)) ++/** ++ * invent_memory_setup_entry() - Invent an "interface memory setup" section ++ * ++ * @kbdev: Kbase device structure ++ * ++ * Invent an "interface memory setup" section similar to one from a firmware ++ * image. If successful the interface will be added to the ++ * kbase_device:csf.firmware_interfaces list. ++ * ++ * Return: 0 if successful, negative error code on failure ++ */ ++static int invent_memory_setup_entry(struct kbase_device *kbdev) ++{ ++ struct dummy_firmware_interface *interface = NULL; + -+/* CS_EXTRACT_INIT register */ -+#define CS_EXTRACT_INIT_VALUE_SHIFT 0 -+#define CS_EXTRACT_INIT_VALUE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_EXTRACT_INIT_VALUE_SHIFT) -+#define CS_EXTRACT_INIT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_INIT_VALUE_MASK) >> CS_EXTRACT_INIT_VALUE_SHIFT) -+#define CS_EXTRACT_INIT_VALUE_SET(reg_val, value) \ -+ (((reg_val) & ~CS_EXTRACT_INIT_VALUE_MASK) | \ -+ (((value) << CS_EXTRACT_INIT_VALUE_SHIFT) & CS_EXTRACT_INIT_VALUE_MASK)) -+/* End of CS_USER_INPUT_BLOCK register set definitions */ ++ /* Allocate enough memory for the struct dummy_firmware_interface. ++ */ ++ interface = kzalloc(sizeof(*interface), GFP_KERNEL); ++ if (!interface) ++ return -ENOMEM; + -+/* CS_USER_OUTPUT_BLOCK register set definitions */ ++ kbdev->csf.shared_interface = interface; ++ list_add(&interface->node, &kbdev->csf.firmware_interfaces); + -+/* CS_EXTRACT register */ -+#define CS_EXTRACT_VALUE_SHIFT 0 -+#define CS_EXTRACT_VALUE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_EXTRACT_VALUE_SHIFT) -+#define CS_EXTRACT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_VALUE_MASK) >> CS_EXTRACT_VALUE_SHIFT) -+#define CS_EXTRACT_VALUE_SET(reg_val, value) \ -+ (((reg_val) & ~CS_EXTRACT_VALUE_MASK) | (((value) << CS_EXTRACT_VALUE_SHIFT) & CS_EXTRACT_VALUE_MASK)) ++ /* NO_MALI: Don't insert any firmware pages */ ++ return 0; ++} + -+/* CS_ACTIVE register */ -+#define CS_ACTIVE_HW_ACTIVE_SHIFT 0 -+#define CS_ACTIVE_HW_ACTIVE_MASK (0x1 << CS_ACTIVE_HW_ACTIVE_SHIFT) -+#define CS_ACTIVE_HW_ACTIVE_GET(reg_val) (((reg_val)&CS_ACTIVE_HW_ACTIVE_MASK) >> CS_ACTIVE_HW_ACTIVE_SHIFT) -+#define CS_ACTIVE_HW_ACTIVE_SET(reg_val, value) \ -+ (((reg_val) & ~CS_ACTIVE_HW_ACTIVE_MASK) | (((value) << CS_ACTIVE_HW_ACTIVE_SHIFT) & CS_ACTIVE_HW_ACTIVE_MASK)) -+/* End of CS_USER_OUTPUT_BLOCK register set definitions */ ++static void free_global_iface(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; + -+/* CSG_INPUT_BLOCK register set definitions */ ++ if (iface->groups) { ++ unsigned int gid; + -+/* CSG_REQ register */ -+#define CSG_REQ_STATE_SHIFT 0 -+#define CSG_REQ_STATE_MASK (0x7 << CSG_REQ_STATE_SHIFT) -+#define CSG_REQ_STATE_GET(reg_val) (((reg_val)&CSG_REQ_STATE_MASK) >> CSG_REQ_STATE_SHIFT) -+#define CSG_REQ_STATE_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_REQ_STATE_MASK) | (((value) << CSG_REQ_STATE_SHIFT) & CSG_REQ_STATE_MASK)) -+/* CSG_REQ_STATE values */ -+#define CSG_REQ_STATE_TERMINATE 0x0 -+#define CSG_REQ_STATE_START 0x1 -+#define CSG_REQ_STATE_SUSPEND 0x2 -+#define CSG_REQ_STATE_RESUME 0x3 -+/* End of CSG_REQ_STATE values */ -+#define CSG_REQ_EP_CFG_SHIFT 4 -+#define CSG_REQ_EP_CFG_MASK (0x1 << CSG_REQ_EP_CFG_SHIFT) -+#define CSG_REQ_EP_CFG_GET(reg_val) (((reg_val)&CSG_REQ_EP_CFG_MASK) >> CSG_REQ_EP_CFG_SHIFT) -+#define CSG_REQ_EP_CFG_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_REQ_EP_CFG_MASK) | (((value) << CSG_REQ_EP_CFG_SHIFT) & CSG_REQ_EP_CFG_MASK)) -+#define CSG_REQ_STATUS_UPDATE_SHIFT 5 -+#define CSG_REQ_STATUS_UPDATE_MASK (0x1 << CSG_REQ_STATUS_UPDATE_SHIFT) -+#define CSG_REQ_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_STATUS_UPDATE_MASK) >> CSG_REQ_STATUS_UPDATE_SHIFT) -+#define CSG_REQ_STATUS_UPDATE_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_REQ_STATUS_UPDATE_MASK) | \ -+ (((value) << CSG_REQ_STATUS_UPDATE_SHIFT) & CSG_REQ_STATUS_UPDATE_MASK)) -+#define CSG_REQ_SYNC_UPDATE_SHIFT 28 -+#define CSG_REQ_SYNC_UPDATE_MASK (0x1 << CSG_REQ_SYNC_UPDATE_SHIFT) -+#define CSG_REQ_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_SYNC_UPDATE_MASK) >> CSG_REQ_SYNC_UPDATE_SHIFT) -+#define CSG_REQ_SYNC_UPDATE_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_REQ_SYNC_UPDATE_MASK) | (((value) << CSG_REQ_SYNC_UPDATE_SHIFT) & CSG_REQ_SYNC_UPDATE_MASK)) -+#define CSG_REQ_IDLE_SHIFT 29 -+#define CSG_REQ_IDLE_MASK (0x1 << CSG_REQ_IDLE_SHIFT) -+#define CSG_REQ_IDLE_GET(reg_val) (((reg_val)&CSG_REQ_IDLE_MASK) >> CSG_REQ_IDLE_SHIFT) -+#define CSG_REQ_IDLE_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_REQ_IDLE_MASK) | (((value) << CSG_REQ_IDLE_SHIFT) & CSG_REQ_IDLE_MASK)) -+#define CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT 31 -+#define CSG_REQ_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) -+#define CSG_REQ_PROGRESS_TIMER_EVENT_GET(reg_val) \ -+ (((reg_val)&CSG_REQ_PROGRESS_TIMER_EVENT_MASK) >> CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) -+#define CSG_REQ_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_REQ_PROGRESS_TIMER_EVENT_MASK) | \ -+ (((value) << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK)) ++ for (gid = 0; gid < iface->group_num; ++gid) ++ kfree(iface->groups[gid].streams); + -+/* CSG_ACK_IRQ_MASK register */ -+#define CSG_ACK_IRQ_MASK_STATE_SHIFT 0 -+#define CSG_ACK_IRQ_MASK_STATE_MASK (0x7 << CSG_ACK_IRQ_MASK_STATE_SHIFT) -+#define CSG_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_STATE_MASK) >> CSG_ACK_IRQ_MASK_STATE_SHIFT) -+#define CSG_ACK_IRQ_MASK_STATE_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_ACK_IRQ_MASK_STATE_MASK) | \ -+ (((value) << CSG_ACK_IRQ_MASK_STATE_SHIFT) & CSG_ACK_IRQ_MASK_STATE_MASK)) -+/* CSG_ACK_IRQ_MASK_STATE values */ -+#define CSG_ACK_IRQ_MASK_STATE_DISABLED 0x0 -+#define CSG_ACK_IRQ_MASK_STATE_ENABLED 0x7 -+/* End of CSG_ACK_IRQ_MASK_STATE values */ -+#define CSG_ACK_IRQ_MASK_EP_CFG_SHIFT 4 -+#define CSG_ACK_IRQ_MASK_EP_CFG_MASK (0x1 << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) -+#define CSG_ACK_IRQ_MASK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_EP_CFG_MASK) >> CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) -+#define CSG_ACK_IRQ_MASK_EP_CFG_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_ACK_IRQ_MASK_EP_CFG_MASK) | \ -+ (((value) << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) & CSG_ACK_IRQ_MASK_EP_CFG_MASK)) -+#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT 5 -+#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) -+#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_GET(reg_val) \ -+ (((reg_val)&CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) -+#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) | \ -+ (((value) << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK)) -+#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT 28 -+#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) -+#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_GET(reg_val) \ -+ (((reg_val)&CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) -+#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) | \ -+ (((value) << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK)) -+#define CSG_ACK_IRQ_MASK_IDLE_SHIFT 29 -+#define CSG_ACK_IRQ_MASK_IDLE_MASK (0x1 << CSG_ACK_IRQ_MASK_IDLE_SHIFT) -+#define CSG_ACK_IRQ_MASK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_IDLE_MASK) >> CSG_ACK_IRQ_MASK_IDLE_SHIFT) -+#define CSG_ACK_IRQ_MASK_IDLE_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_ACK_IRQ_MASK_IDLE_MASK) | \ -+ (((value) << CSG_ACK_IRQ_MASK_IDLE_SHIFT) & CSG_ACK_IRQ_MASK_IDLE_MASK)) -+#define CSG_ACK_IRQ_MASK_DOORBELL_SHIFT 30 -+#define CSG_ACK_IRQ_MASK_DOORBELL_MASK (0x1 << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) -+#define CSG_ACK_IRQ_MASK_DOORBELL_GET(reg_val) \ -+ (((reg_val)&CSG_ACK_IRQ_MASK_DOORBELL_MASK) >> CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) -+#define CSG_ACK_IRQ_MASK_DOORBELL_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_ACK_IRQ_MASK_DOORBELL_MASK) | \ -+ (((value) << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) & CSG_ACK_IRQ_MASK_DOORBELL_MASK)) -+#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT 31 -+#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) -+#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_GET(reg_val) \ -+ (((reg_val)&CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) -+#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) | \ -+ (((value) << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK)) ++ kfree(iface->groups); ++ iface->groups = NULL; ++ } ++} + -+/* CSG_EP_REQ register */ -+#define CSG_EP_REQ_COMPUTE_EP_SHIFT 0 -+#define CSG_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_EP_REQ_COMPUTE_EP_SHIFT) -+#define CSG_EP_REQ_COMPUTE_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_COMPUTE_EP_MASK) >> CSG_EP_REQ_COMPUTE_EP_SHIFT) -+#define CSG_EP_REQ_COMPUTE_EP_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_EP_REQ_COMPUTE_EP_MASK) | \ -+ (((value) << CSG_EP_REQ_COMPUTE_EP_SHIFT) & CSG_EP_REQ_COMPUTE_EP_MASK)) -+#define CSG_EP_REQ_FRAGMENT_EP_SHIFT 8 -+#define CSG_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_EP_REQ_FRAGMENT_EP_SHIFT) -+#define CSG_EP_REQ_FRAGMENT_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_FRAGMENT_EP_MASK) >> CSG_EP_REQ_FRAGMENT_EP_SHIFT) -+#define CSG_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_EP_REQ_FRAGMENT_EP_MASK) | \ -+ (((value) << CSG_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_EP_REQ_FRAGMENT_EP_MASK)) -+#define CSG_EP_REQ_TILER_EP_SHIFT 16 -+#define CSG_EP_REQ_TILER_EP_MASK (0xF << CSG_EP_REQ_TILER_EP_SHIFT) -+#define CSG_EP_REQ_TILER_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_TILER_EP_MASK) >> CSG_EP_REQ_TILER_EP_SHIFT) -+#define CSG_EP_REQ_TILER_EP_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_EP_REQ_TILER_EP_MASK) | (((value) << CSG_EP_REQ_TILER_EP_SHIFT) & CSG_EP_REQ_TILER_EP_MASK)) -+#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20 -+#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) -+#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \ -+ (((reg_val)&CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) -+#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \ -+ (((value) << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK)) -+#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21 -+#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) -+#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \ -+ (((reg_val)&CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) -+#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \ -+ (((value) << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) -+#define CSG_EP_REQ_PRIORITY_SHIFT 28 -+#define CSG_EP_REQ_PRIORITY_MASK (0xF << CSG_EP_REQ_PRIORITY_SHIFT) -+#define CSG_EP_REQ_PRIORITY_GET(reg_val) (((reg_val)&CSG_EP_REQ_PRIORITY_MASK) >> CSG_EP_REQ_PRIORITY_SHIFT) -+#define CSG_EP_REQ_PRIORITY_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_EP_REQ_PRIORITY_MASK) | (((value) << CSG_EP_REQ_PRIORITY_SHIFT) & CSG_EP_REQ_PRIORITY_MASK)) ++static int invent_cmd_stream_group_info(struct kbase_device *kbdev, ++ struct kbase_csf_cmd_stream_group_info *ginfo, ++ struct dummy_firmware_csg *csg) ++{ ++ unsigned int sid; + -+/* CSG_SUSPEND_BUF register */ -+#define CSG_SUSPEND_BUF_POINTER_SHIFT 0 -+#define CSG_SUSPEND_BUF_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CSG_SUSPEND_BUF_POINTER_SHIFT) -+#define CSG_SUSPEND_BUF_POINTER_GET(reg_val) (((reg_val)&CSG_SUSPEND_BUF_POINTER_MASK) >> CSG_SUSPEND_BUF_POINTER_SHIFT) -+#define CSG_SUSPEND_BUF_POINTER_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_SUSPEND_BUF_POINTER_MASK) | \ -+ (((value) << CSG_SUSPEND_BUF_POINTER_SHIFT) & CSG_SUSPEND_BUF_POINTER_MASK)) ++ ginfo->input = csg->csg_input; ++ ginfo->output = csg->csg_output; + -+/* CSG_PROTM_SUSPEND_BUF register */ -+#define CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT 0 -+#define CSG_PROTM_SUSPEND_BUF_POINTER_MASK \ -+ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) -+#define CSG_PROTM_SUSPEND_BUF_POINTER_GET(reg_val) \ -+ (((reg_val)&CSG_PROTM_SUSPEND_BUF_POINTER_MASK) >> CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) -+#define CSG_PROTM_SUSPEND_BUF_POINTER_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_PROTM_SUSPEND_BUF_POINTER_MASK) | \ -+ (((value) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) & CSG_PROTM_SUSPEND_BUF_POINTER_MASK)) ++ ginfo->kbdev = kbdev; ++ ginfo->features = 0; ++ ginfo->suspend_size = 64; ++ ginfo->protm_suspend_size = 64; ++ ginfo->stream_num = ARRAY_SIZE(csg->csi); ++ ginfo->stream_stride = 0; + -+/* CSG_DVS_BUF_BUFFER register */ -+#define CSG_DVS_BUF_BUFFER_SIZE_SHIFT GPU_U(0) -+#define CSG_DVS_BUF_BUFFER_SIZE_MASK (GPU_U(0xFFF) << CSG_DVS_BUF_BUFFER_SIZE_SHIFT) -+#define CSG_DVS_BUF_BUFFER_SIZE_GET(reg_val) (((reg_val)&CSG_DVS_BUF_BUFFER_SIZE_MASK) >> CSG_DVS_BUF_BUFFER_SIZE_SHIFT) -+#define CSG_DVS_BUF_BUFFER_SIZE_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_DVS_BUF_BUFFER_SIZE_MASK) | \ -+ (((value) << CSG_DVS_BUF_BUFFER_SIZE_SHIFT) & CSG_DVS_BUF_BUFFER_SIZE_MASK)) -+#define CSG_DVS_BUF_BUFFER_POINTER_SHIFT GPU_U(12) -+#define CSG_DVS_BUF_BUFFER_POINTER_MASK \ -+ (GPU_ULL(0xFFFFFFFFFFFFF) << CSG_DVS_BUF_BUFFER_POINTER_SHIFT) -+#define CSG_DVS_BUF_BUFFER_POINTER_GET(reg_val) \ -+ (((reg_val)&CSG_DVS_BUF_BUFFER_POINTER_MASK) >> CSG_DVS_BUF_BUFFER_POINTER_SHIFT) -+#define CSG_DVS_BUF_BUFFER_POINTER_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_DVS_BUF_BUFFER_POINTER_MASK) | \ -+ (((value) << CSG_DVS_BUF_BUFFER_POINTER_SHIFT) & CSG_DVS_BUF_BUFFER_POINTER_MASK)) ++ ginfo->streams = kcalloc(ginfo->stream_num, sizeof(*ginfo->streams), GFP_KERNEL); ++ if (ginfo->streams == NULL) ++ return -ENOMEM; + -+/* End of CSG_INPUT_BLOCK register set definitions */ ++ for (sid = 0; sid < ginfo->stream_num; ++sid) { ++ struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[sid]; ++ struct dummy_firmware_csi *csi = &csg->csi[sid]; + -+/* CSG_OUTPUT_BLOCK register set definitions */ ++ stream->input = csi->cs_kernel_input; ++ stream->output = csi->cs_kernel_output; + -+/* CSG_ACK register */ -+#define CSG_ACK_STATE_SHIFT 0 -+#define CSG_ACK_STATE_MASK (0x7 << CSG_ACK_STATE_SHIFT) -+#define CSG_ACK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_STATE_MASK) >> CSG_ACK_STATE_SHIFT) -+#define CSG_ACK_STATE_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_ACK_STATE_MASK) | (((value) << CSG_ACK_STATE_SHIFT) & CSG_ACK_STATE_MASK)) -+/* CSG_ACK_STATE values */ -+#define CSG_ACK_STATE_TERMINATE 0x0 -+#define CSG_ACK_STATE_START 0x1 -+#define CSG_ACK_STATE_SUSPEND 0x2 -+#define CSG_ACK_STATE_RESUME 0x3 -+/* End of CSG_ACK_STATE values */ -+#define CSG_ACK_EP_CFG_SHIFT 4 -+#define CSG_ACK_EP_CFG_MASK (0x1 << CSG_ACK_EP_CFG_SHIFT) -+#define CSG_ACK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_EP_CFG_MASK) >> CSG_ACK_EP_CFG_SHIFT) -+#define CSG_ACK_EP_CFG_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_ACK_EP_CFG_MASK) | (((value) << CSG_ACK_EP_CFG_SHIFT) & CSG_ACK_EP_CFG_MASK)) -+#define CSG_ACK_STATUS_UPDATE_SHIFT 5 -+#define CSG_ACK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_STATUS_UPDATE_SHIFT) -+#define CSG_ACK_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_STATUS_UPDATE_MASK) >> CSG_ACK_STATUS_UPDATE_SHIFT) -+#define CSG_ACK_STATUS_UPDATE_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_ACK_STATUS_UPDATE_MASK) | \ -+ (((value) << CSG_ACK_STATUS_UPDATE_SHIFT) & CSG_ACK_STATUS_UPDATE_MASK)) -+#define CSG_ACK_SYNC_UPDATE_SHIFT 28 -+#define CSG_ACK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_SYNC_UPDATE_SHIFT) -+#define CSG_ACK_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_SYNC_UPDATE_MASK) >> CSG_ACK_SYNC_UPDATE_SHIFT) -+#define CSG_ACK_SYNC_UPDATE_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_ACK_SYNC_UPDATE_MASK) | (((value) << CSG_ACK_SYNC_UPDATE_SHIFT) & CSG_ACK_SYNC_UPDATE_MASK)) -+#define CSG_ACK_IDLE_SHIFT 29 -+#define CSG_ACK_IDLE_MASK (0x1 << CSG_ACK_IDLE_SHIFT) -+#define CSG_ACK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IDLE_MASK) >> CSG_ACK_IDLE_SHIFT) -+#define CSG_ACK_IDLE_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_ACK_IDLE_MASK) | (((value) << CSG_ACK_IDLE_SHIFT) & CSG_ACK_IDLE_MASK)) -+#define CSG_ACK_DOORBELL_SHIFT 30 -+#define CSG_ACK_DOORBELL_MASK (0x1 << CSG_ACK_DOORBELL_SHIFT) -+#define CSG_ACK_DOORBELL_GET(reg_val) (((reg_val)&CSG_ACK_DOORBELL_MASK) >> CSG_ACK_DOORBELL_SHIFT) -+#define CSG_ACK_DOORBELL_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_ACK_DOORBELL_MASK) | (((value) << CSG_ACK_DOORBELL_SHIFT) & CSG_ACK_DOORBELL_MASK)) -+#define CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT 31 -+#define CSG_ACK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) -+#define CSG_ACK_PROGRESS_TIMER_EVENT_GET(reg_val) \ -+ (((reg_val)&CSG_ACK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) -+#define CSG_ACK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_ACK_PROGRESS_TIMER_EVENT_MASK) | \ -+ (((value) << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_PROGRESS_TIMER_EVENT_MASK)) ++ stream->kbdev = kbdev; ++ stream->features = ++ STREAM_FEATURES_WORK_REGISTERS_SET(0, 80) | ++ STREAM_FEATURES_SCOREBOARDS_SET(0, 8) | ++ STREAM_FEATURES_COMPUTE_SET(0, 1) | ++ STREAM_FEATURES_FRAGMENT_SET(0, 1) | ++ STREAM_FEATURES_TILER_SET(0, 1); ++ } + -+/* CSG_STATUS_EP_CURRENT register */ -+#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT 0 -+#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) -+#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(reg_val) \ -+ (((reg_val)&CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) >> CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) -+#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) | \ -+ (((value) << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK)) -+#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT 8 -+#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) -+#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(reg_val) \ -+ (((reg_val)&CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) -+#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) | \ -+ (((value) << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK)) -+#define CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT 16 -+#define CSG_STATUS_EP_CURRENT_TILER_EP_MASK (0xF << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) -+#define CSG_STATUS_EP_CURRENT_TILER_EP_GET(reg_val) \ -+ (((reg_val)&CSG_STATUS_EP_CURRENT_TILER_EP_MASK) >> CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) -+#define CSG_STATUS_EP_CURRENT_TILER_EP_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_STATUS_EP_CURRENT_TILER_EP_MASK) | \ -+ (((value) << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) & CSG_STATUS_EP_CURRENT_TILER_EP_MASK)) ++ return 0; ++} + -+/* CSG_STATUS_EP_REQ register */ -+#define CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT 0 -+#define CSG_STATUS_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) -+#define CSG_STATUS_EP_REQ_COMPUTE_EP_GET(reg_val) \ -+ (((reg_val)&CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) >> CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) -+#define CSG_STATUS_EP_REQ_COMPUTE_EP_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) | \ -+ (((value) << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_REQ_COMPUTE_EP_MASK)) -+#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT 8 -+#define CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) -+#define CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(reg_val) \ -+ (((reg_val)&CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) -+#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) | \ -+ (((value) << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK)) -+#define CSG_STATUS_EP_REQ_TILER_EP_SHIFT 16 -+#define CSG_STATUS_EP_REQ_TILER_EP_MASK (0xF << CSG_STATUS_EP_REQ_TILER_EP_SHIFT) -+#define CSG_STATUS_EP_REQ_TILER_EP_GET(reg_val) \ -+ (((reg_val)&CSG_STATUS_EP_REQ_TILER_EP_MASK) >> CSG_STATUS_EP_REQ_TILER_EP_SHIFT) -+#define CSG_STATUS_EP_REQ_TILER_EP_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_STATUS_EP_REQ_TILER_EP_MASK) | \ -+ (((value) << CSG_STATUS_EP_REQ_TILER_EP_SHIFT) & CSG_STATUS_EP_REQ_TILER_EP_MASK)) -+#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20 -+#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) -+#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \ -+ (((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) -+#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \ -+ (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK)) -+#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21 -+#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) -+#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \ -+ (((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) -+#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \ -+ (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) ++static int invent_capabilities(struct kbase_device *kbdev) ++{ ++ struct dummy_firmware_interface *interface = kbdev->csf.shared_interface; ++ struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; ++ unsigned int gid; + -+/* End of CSG_OUTPUT_BLOCK register set definitions */ ++ iface->input = interface->global_input; ++ iface->output = interface->global_output; + -+/* STREAM_CONTROL_BLOCK register set definitions */ ++ iface->version = 1; ++ iface->kbdev = kbdev; ++ iface->features = 0; ++ iface->prfcnt_size = ++ GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET(0, KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE); + -+/* STREAM_FEATURES register */ -+#define STREAM_FEATURES_WORK_REGISTERS_SHIFT 0 -+#define STREAM_FEATURES_WORK_REGISTERS_MASK (0xFF << STREAM_FEATURES_WORK_REGISTERS_SHIFT) -+#define STREAM_FEATURES_WORK_REGISTERS_GET(reg_val) \ -+ (((reg_val)&STREAM_FEATURES_WORK_REGISTERS_MASK) >> STREAM_FEATURES_WORK_REGISTERS_SHIFT) -+#define STREAM_FEATURES_WORK_REGISTERS_SET(reg_val, value) \ -+ (((reg_val) & ~STREAM_FEATURES_WORK_REGISTERS_MASK) | \ -+ (((value) << STREAM_FEATURES_WORK_REGISTERS_SHIFT) & STREAM_FEATURES_WORK_REGISTERS_MASK)) -+#define STREAM_FEATURES_SCOREBOARDS_SHIFT 8 -+#define STREAM_FEATURES_SCOREBOARDS_MASK (0xFF << STREAM_FEATURES_SCOREBOARDS_SHIFT) -+#define STREAM_FEATURES_SCOREBOARDS_GET(reg_val) \ -+ (((reg_val)&STREAM_FEATURES_SCOREBOARDS_MASK) >> STREAM_FEATURES_SCOREBOARDS_SHIFT) -+#define STREAM_FEATURES_SCOREBOARDS_SET(reg_val, value) \ -+ (((reg_val) & ~STREAM_FEATURES_SCOREBOARDS_MASK) | \ -+ (((value) << STREAM_FEATURES_SCOREBOARDS_SHIFT) & STREAM_FEATURES_SCOREBOARDS_MASK)) -+#define STREAM_FEATURES_COMPUTE_SHIFT 16 -+#define STREAM_FEATURES_COMPUTE_MASK (0x1 << STREAM_FEATURES_COMPUTE_SHIFT) -+#define STREAM_FEATURES_COMPUTE_GET(reg_val) (((reg_val)&STREAM_FEATURES_COMPUTE_MASK) >> STREAM_FEATURES_COMPUTE_SHIFT) -+#define STREAM_FEATURES_COMPUTE_SET(reg_val, value) \ -+ (((reg_val) & ~STREAM_FEATURES_COMPUTE_MASK) | \ -+ (((value) << STREAM_FEATURES_COMPUTE_SHIFT) & STREAM_FEATURES_COMPUTE_MASK)) -+#define STREAM_FEATURES_FRAGMENT_SHIFT 17 -+#define STREAM_FEATURES_FRAGMENT_MASK (0x1 << STREAM_FEATURES_FRAGMENT_SHIFT) -+#define STREAM_FEATURES_FRAGMENT_GET(reg_val) \ -+ (((reg_val)&STREAM_FEATURES_FRAGMENT_MASK) >> STREAM_FEATURES_FRAGMENT_SHIFT) -+#define STREAM_FEATURES_FRAGMENT_SET(reg_val, value) \ -+ (((reg_val) & ~STREAM_FEATURES_FRAGMENT_MASK) | \ -+ (((value) << STREAM_FEATURES_FRAGMENT_SHIFT) & STREAM_FEATURES_FRAGMENT_MASK)) -+#define STREAM_FEATURES_TILER_SHIFT 18 -+#define STREAM_FEATURES_TILER_MASK (0x1 << STREAM_FEATURES_TILER_SHIFT) -+#define STREAM_FEATURES_TILER_GET(reg_val) (((reg_val)&STREAM_FEATURES_TILER_MASK) >> STREAM_FEATURES_TILER_SHIFT) -+#define STREAM_FEATURES_TILER_SET(reg_val, value) \ -+ (((reg_val) & ~STREAM_FEATURES_TILER_MASK) | \ -+ (((value) << STREAM_FEATURES_TILER_SHIFT) & STREAM_FEATURES_TILER_MASK)) ++ if (iface->version >= kbase_csf_interface_version(1, 1, 0)) { ++ /* update rate=1, max event size = 1<<8 = 256 */ ++ iface->instr_features = 0x81; ++ } else { ++ iface->instr_features = 0; ++ } + -+/* STREAM_INPUT_VA register */ -+#define STREAM_INPUT_VA_VALUE_SHIFT 0 -+#define STREAM_INPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_INPUT_VA_VALUE_SHIFT) -+#define STREAM_INPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_INPUT_VA_VALUE_MASK) >> STREAM_INPUT_VA_VALUE_SHIFT) -+#define STREAM_INPUT_VA_VALUE_SET(reg_val, value) \ -+ (((reg_val) & ~STREAM_INPUT_VA_VALUE_MASK) | \ -+ (((value) << STREAM_INPUT_VA_VALUE_SHIFT) & STREAM_INPUT_VA_VALUE_MASK)) ++ iface->group_num = ARRAY_SIZE(interface->csg); ++ iface->group_stride = 0; + -+/* STREAM_OUTPUT_VA register */ -+#define STREAM_OUTPUT_VA_VALUE_SHIFT 0 -+#define STREAM_OUTPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_OUTPUT_VA_VALUE_SHIFT) -+#define STREAM_OUTPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_OUTPUT_VA_VALUE_MASK) >> STREAM_OUTPUT_VA_VALUE_SHIFT) -+#define STREAM_OUTPUT_VA_VALUE_SET(reg_val, value) \ -+ (((reg_val) & ~STREAM_OUTPUT_VA_VALUE_MASK) | \ -+ (((value) << STREAM_OUTPUT_VA_VALUE_SHIFT) & STREAM_OUTPUT_VA_VALUE_MASK)) -+/* End of STREAM_CONTROL_BLOCK register set definitions */ ++ iface->groups = kcalloc(iface->group_num, sizeof(*iface->groups), GFP_KERNEL); ++ if (iface->groups == NULL) ++ return -ENOMEM; + -+/* GLB_INPUT_BLOCK register set definitions */ ++ for (gid = 0; gid < iface->group_num; ++gid) { ++ int err; + -+/* GLB_REQ register */ -+#define GLB_REQ_HALT_SHIFT 0 -+#define GLB_REQ_HALT_MASK (0x1 << GLB_REQ_HALT_SHIFT) -+#define GLB_REQ_HALT_GET(reg_val) (((reg_val)&GLB_REQ_HALT_MASK) >> GLB_REQ_HALT_SHIFT) -+#define GLB_REQ_HALT_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_HALT_MASK) | (((value) << GLB_REQ_HALT_SHIFT) & GLB_REQ_HALT_MASK)) -+#define GLB_REQ_CFG_PROGRESS_TIMER_SHIFT 1 -+#define GLB_REQ_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) -+#define GLB_REQ_CFG_PROGRESS_TIMER_GET(reg_val) \ -+ (((reg_val)&GLB_REQ_CFG_PROGRESS_TIMER_MASK) >> GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) -+#define GLB_REQ_CFG_PROGRESS_TIMER_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_CFG_PROGRESS_TIMER_MASK) | \ -+ (((value) << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) & GLB_REQ_CFG_PROGRESS_TIMER_MASK)) -+#define GLB_REQ_CFG_ALLOC_EN_SHIFT 2 -+#define GLB_REQ_CFG_ALLOC_EN_MASK (0x1 << GLB_REQ_CFG_ALLOC_EN_SHIFT) -+#define GLB_REQ_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_REQ_CFG_ALLOC_EN_MASK) >> GLB_REQ_CFG_ALLOC_EN_SHIFT) -+#define GLB_REQ_CFG_ALLOC_EN_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_CFG_ALLOC_EN_MASK) | (((value) << GLB_REQ_CFG_ALLOC_EN_SHIFT) & GLB_REQ_CFG_ALLOC_EN_MASK)) -+#define GLB_REQ_CFG_PWROFF_TIMER_SHIFT 3 -+#define GLB_REQ_CFG_PWROFF_TIMER_MASK (0x1 << GLB_REQ_CFG_PWROFF_TIMER_SHIFT) -+#define GLB_REQ_CFG_PWROFF_TIMER_GET(reg_val) \ -+ (((reg_val)&GLB_REQ_CFG_PWROFF_TIMER_MASK) >> GLB_REQ_CFG_PWROFF_TIMER_SHIFT) -+#define GLB_REQ_CFG_PWROFF_TIMER_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_CFG_PWROFF_TIMER_MASK) | \ -+ (((value) << GLB_REQ_CFG_PWROFF_TIMER_SHIFT) & GLB_REQ_CFG_PWROFF_TIMER_MASK)) -+#define GLB_REQ_PROTM_ENTER_SHIFT 4 -+#define GLB_REQ_PROTM_ENTER_MASK (0x1 << GLB_REQ_PROTM_ENTER_SHIFT) -+#define GLB_REQ_PROTM_ENTER_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_ENTER_MASK) >> GLB_REQ_PROTM_ENTER_SHIFT) -+#define GLB_REQ_PROTM_ENTER_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_PROTM_ENTER_MASK) | (((value) << GLB_REQ_PROTM_ENTER_SHIFT) & GLB_REQ_PROTM_ENTER_MASK)) -+#define GLB_REQ_PRFCNT_ENABLE_SHIFT 5 -+#define GLB_REQ_PRFCNT_ENABLE_MASK (0x1 << GLB_REQ_PRFCNT_ENABLE_SHIFT) -+#define GLB_REQ_PRFCNT_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_ENABLE_MASK) >> GLB_REQ_PRFCNT_ENABLE_SHIFT) -+#define GLB_REQ_PRFCNT_ENABLE_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_PRFCNT_ENABLE_MASK) | \ -+ (((value) << GLB_REQ_PRFCNT_ENABLE_SHIFT) & GLB_REQ_PRFCNT_ENABLE_MASK)) -+#define GLB_REQ_PRFCNT_SAMPLE_SHIFT 6 -+#define GLB_REQ_PRFCNT_SAMPLE_MASK (0x1 << GLB_REQ_PRFCNT_SAMPLE_SHIFT) -+#define GLB_REQ_PRFCNT_SAMPLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_SAMPLE_MASK) >> GLB_REQ_PRFCNT_SAMPLE_SHIFT) -+#define GLB_REQ_PRFCNT_SAMPLE_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_PRFCNT_SAMPLE_MASK) | \ -+ (((value) << GLB_REQ_PRFCNT_SAMPLE_SHIFT) & GLB_REQ_PRFCNT_SAMPLE_MASK)) -+#define GLB_REQ_COUNTER_ENABLE_SHIFT 7 -+#define GLB_REQ_COUNTER_ENABLE_MASK (0x1 << GLB_REQ_COUNTER_ENABLE_SHIFT) -+#define GLB_REQ_COUNTER_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_COUNTER_ENABLE_MASK) >> GLB_REQ_COUNTER_ENABLE_SHIFT) -+#define GLB_REQ_COUNTER_ENABLE_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_COUNTER_ENABLE_MASK) | \ -+ (((value) << GLB_REQ_COUNTER_ENABLE_SHIFT) & GLB_REQ_COUNTER_ENABLE_MASK)) -+#define GLB_REQ_PING_SHIFT 8 -+#define GLB_REQ_PING_MASK (0x1 << GLB_REQ_PING_SHIFT) -+#define GLB_REQ_PING_GET(reg_val) (((reg_val)&GLB_REQ_PING_MASK) >> GLB_REQ_PING_SHIFT) -+#define GLB_REQ_PING_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_PING_MASK) | (((value) << GLB_REQ_PING_SHIFT) & GLB_REQ_PING_MASK)) -+#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT 9 -+#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK \ -+ (0x1 << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) -+#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_GET(reg_val) \ -+ (((reg_val)&GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) >> \ -+ GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) -+#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) | \ -+ (((value) << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) & \ -+ GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK)) -+#define GLB_REQ_SLEEP_SHIFT 12 -+#define GLB_REQ_SLEEP_MASK (0x1 << GLB_REQ_SLEEP_SHIFT) -+#define GLB_REQ_SLEEP_GET(reg_val) \ -+ (((reg_val) & GLB_REQ_SLEEP_MASK) >> GLB_REQ_SLEEP_SHIFT) -+#define GLB_REQ_SLEEP_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_SLEEP_MASK) | \ -+ (((value) << GLB_REQ_SLEEP_SHIFT) & GLB_REQ_SLEEP_MASK)) -+#define GLB_REQ_INACTIVE_COMPUTE_SHIFT 20 -+#define GLB_REQ_INACTIVE_COMPUTE_MASK (0x1 << GLB_REQ_INACTIVE_COMPUTE_SHIFT) -+#define GLB_REQ_INACTIVE_COMPUTE_GET(reg_val) \ -+ (((reg_val)&GLB_REQ_INACTIVE_COMPUTE_MASK) >> GLB_REQ_INACTIVE_COMPUTE_SHIFT) -+#define GLB_REQ_INACTIVE_COMPUTE_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_INACTIVE_COMPUTE_MASK) | \ -+ (((value) << GLB_REQ_INACTIVE_COMPUTE_SHIFT) & GLB_REQ_INACTIVE_COMPUTE_MASK)) -+#define GLB_REQ_INACTIVE_FRAGMENT_SHIFT 21 -+#define GLB_REQ_INACTIVE_FRAGMENT_MASK (0x1 << GLB_REQ_INACTIVE_FRAGMENT_SHIFT) -+#define GLB_REQ_INACTIVE_FRAGMENT_GET(reg_val) \ -+ (((reg_val)&GLB_REQ_INACTIVE_FRAGMENT_MASK) >> GLB_REQ_INACTIVE_FRAGMENT_SHIFT) -+#define GLB_REQ_INACTIVE_FRAGMENT_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_INACTIVE_FRAGMENT_MASK) | \ -+ (((value) << GLB_REQ_INACTIVE_FRAGMENT_SHIFT) & GLB_REQ_INACTIVE_FRAGMENT_MASK)) -+#define GLB_REQ_INACTIVE_TILER_SHIFT 22 -+#define GLB_REQ_INACTIVE_TILER_MASK (0x1 << GLB_REQ_INACTIVE_TILER_SHIFT) -+#define GLB_REQ_INACTIVE_TILER_GET(reg_val) (((reg_val)&GLB_REQ_INACTIVE_TILER_MASK) >> GLB_REQ_INACTIVE_TILER_SHIFT) -+#define GLB_REQ_INACTIVE_TILER_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_INACTIVE_TILER_MASK) | \ -+ (((value) << GLB_REQ_INACTIVE_TILER_SHIFT) & GLB_REQ_INACTIVE_TILER_MASK)) -+#define GLB_REQ_PROTM_EXIT_SHIFT 23 -+#define GLB_REQ_PROTM_EXIT_MASK (0x1 << GLB_REQ_PROTM_EXIT_SHIFT) -+#define GLB_REQ_PROTM_EXIT_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_EXIT_MASK) >> GLB_REQ_PROTM_EXIT_SHIFT) -+#define GLB_REQ_PROTM_EXIT_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_PROTM_EXIT_MASK) | (((value) << GLB_REQ_PROTM_EXIT_SHIFT) & GLB_REQ_PROTM_EXIT_MASK)) -+#define GLB_REQ_PRFCNT_THRESHOLD_SHIFT 24 -+#define GLB_REQ_PRFCNT_THRESHOLD_MASK (0x1 << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) -+#define GLB_REQ_PRFCNT_THRESHOLD_GET(reg_val) \ -+ (((reg_val)&GLB_REQ_PRFCNT_THRESHOLD_MASK) >> \ -+ GLB_REQ_PRFCNT_THRESHOLD_SHIFT) -+#define GLB_REQ_PRFCNT_THRESHOLD_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_PRFCNT_THRESHOLD_MASK) | \ -+ (((value) << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) & \ -+ GLB_REQ_PRFCNT_THRESHOLD_MASK)) -+#define GLB_REQ_PRFCNT_OVERFLOW_SHIFT 25 -+#define GLB_REQ_PRFCNT_OVERFLOW_MASK (0x1 << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) -+#define GLB_REQ_PRFCNT_OVERFLOW_GET(reg_val) \ -+ (((reg_val)&GLB_REQ_PRFCNT_OVERFLOW_MASK) >> \ -+ GLB_REQ_PRFCNT_OVERFLOW_SHIFT) -+#define GLB_REQ_PRFCNT_OVERFLOW_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_PRFCNT_OVERFLOW_MASK) | \ -+ (((value) << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) & \ -+ GLB_REQ_PRFCNT_OVERFLOW_MASK)) -+#define GLB_REQ_DEBUG_CSF_REQ_SHIFT 30 -+#define GLB_REQ_DEBUG_CSF_REQ_MASK (0x1 << GLB_REQ_DEBUG_CSF_REQ_SHIFT) -+#define GLB_REQ_DEBUG_CSF_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_CSF_REQ_MASK) >> GLB_REQ_DEBUG_CSF_REQ_SHIFT) -+#define GLB_REQ_DEBUG_CSF_REQ_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_DEBUG_CSF_REQ_MASK) | \ -+ (((value) << GLB_REQ_DEBUG_CSF_REQ_SHIFT) & GLB_REQ_DEBUG_CSF_REQ_MASK)) -+#define GLB_REQ_DEBUG_HOST_REQ_SHIFT 31 -+#define GLB_REQ_DEBUG_HOST_REQ_MASK (0x1 << GLB_REQ_DEBUG_HOST_REQ_SHIFT) -+#define GLB_REQ_DEBUG_HOST_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_HOST_REQ_MASK) >> GLB_REQ_DEBUG_HOST_REQ_SHIFT) -+#define GLB_REQ_DEBUG_HOST_REQ_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_DEBUG_HOST_REQ_MASK) | \ -+ (((value) << GLB_REQ_DEBUG_HOST_REQ_SHIFT) & GLB_REQ_DEBUG_HOST_REQ_MASK)) ++ err = invent_cmd_stream_group_info(kbdev, &iface->groups[gid], ++ &interface->csg[gid]); ++ if (err < 0) { ++ free_global_iface(kbdev); ++ return err; ++ } ++ } + -+/* GLB_ACK_IRQ_MASK register */ -+#define GLB_ACK_IRQ_MASK_HALT_SHIFT 0 -+#define GLB_ACK_IRQ_MASK_HALT_MASK (0x1 << GLB_ACK_IRQ_MASK_HALT_SHIFT) -+#define GLB_ACK_IRQ_MASK_HALT_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_HALT_MASK) >> GLB_ACK_IRQ_MASK_HALT_SHIFT) -+#define GLB_ACK_IRQ_MASK_HALT_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_IRQ_MASK_HALT_MASK) | \ -+ (((value) << GLB_ACK_IRQ_MASK_HALT_SHIFT) & GLB_ACK_IRQ_MASK_HALT_MASK)) -+#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT 1 -+#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) -+#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_GET(reg_val) \ -+ (((reg_val)&GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) -+#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) | \ -+ (((value) << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK)) -+#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT 2 -+#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) -+#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_GET(reg_val) \ -+ (((reg_val)&GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) >> GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) -+#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) | \ -+ (((value) << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK)) -+#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT 3 -+#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) -+#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_GET(reg_val) \ -+ (((reg_val)&GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) -+#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) | \ -+ (((value) << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK)) -+#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT 4 -+#define GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) -+#define GLB_ACK_IRQ_MASK_PROTM_ENTER_GET(reg_val) \ -+ (((reg_val)&GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) >> GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) -+#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) | \ -+ (((value) << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK)) -+#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT 5 -+#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) -+#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_GET(reg_val) \ -+ (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) -+#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) | \ -+ (((value) << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK)) -+#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT 6 -+#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) -+#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_GET(reg_val) \ -+ (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) -+#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) | \ -+ (((value) << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK)) -+#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT 7 -+#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) -+#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_GET(reg_val) \ -+ (((reg_val)&GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) -+#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) | \ -+ (((value) << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK)) -+#define GLB_ACK_IRQ_MASK_PING_SHIFT 8 -+#define GLB_ACK_IRQ_MASK_PING_MASK (0x1 << GLB_ACK_IRQ_MASK_PING_SHIFT) -+#define GLB_ACK_IRQ_MASK_PING_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_PING_MASK) >> GLB_ACK_IRQ_MASK_PING_SHIFT) -+#define GLB_ACK_IRQ_MASK_PING_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_IRQ_MASK_PING_MASK) | \ -+ (((value) << GLB_ACK_IRQ_MASK_PING_SHIFT) & GLB_ACK_IRQ_MASK_PING_MASK)) -+#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT 9 -+#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK \ -+ (0x1 << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) -+#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_GET(reg_val) \ -+ (((reg_val)&GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) >> \ -+ GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) -+#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) | \ -+ (((value) << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) & \ -+ GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK)) -+#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT 20 -+#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) -+#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_GET(reg_val) \ -+ (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) -+#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) | \ -+ (((value) << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK)) -+#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT 21 -+#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) -+#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_GET(reg_val) \ -+ (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) -+#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) | \ -+ (((value) << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK)) -+#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT 22 -+#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) -+#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_GET(reg_val) \ -+ (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) -+#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) | \ -+ (((value) << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK)) -+#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT 23 -+#define GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) -+#define GLB_ACK_IRQ_MASK_PROTM_EXIT_GET(reg_val) \ -+ (((reg_val)&GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) >> GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) -+#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) | \ -+ (((value) << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK)) -+#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT 24 -+#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK \ -+ (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) -+#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_GET(reg_val) \ -+ (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) >> \ -+ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) -+#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) | \ -+ (((value) << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) & \ -+ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK)) -+#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT 25 -+#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK \ -+ (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) -+#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_GET(reg_val) \ -+ (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) >> \ -+ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) -+#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) | \ -+ (((value) << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) & \ -+ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK)) -+#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT 30 -+#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) -+#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_GET(reg_val) \ -+ (((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) -+#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) | \ -+ (((value) << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK)) -+#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT 31 -+#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) -+#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_GET(reg_val) \ -+ (((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) -+#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) | \ -+ (((value) << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK)) ++ return 0; ++} + -+/* GLB_PROGRESS_TIMER register */ -+#define GLB_PROGRESS_TIMER_TIMEOUT_SHIFT 0 -+#define GLB_PROGRESS_TIMER_TIMEOUT_MASK (0xFFFFFFFF << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) -+#define GLB_PROGRESS_TIMER_TIMEOUT_GET(reg_val) \ -+ (((reg_val)&GLB_PROGRESS_TIMER_TIMEOUT_MASK) >> GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) -+#define GLB_PROGRESS_TIMER_TIMEOUT_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_PROGRESS_TIMER_TIMEOUT_MASK) | \ -+ (((value) << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) & GLB_PROGRESS_TIMER_TIMEOUT_MASK)) ++void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 *value) ++{ ++ /* NO_MALI: Nothing to do here */ ++} + -+/* GLB_PWROFF_TIMER register */ -+#define GLB_PWROFF_TIMER_TIMEOUT_SHIFT 0 -+#define GLB_PWROFF_TIMER_TIMEOUT_MASK (0x7FFFFFFF << GLB_PWROFF_TIMER_TIMEOUT_SHIFT) -+#define GLB_PWROFF_TIMER_TIMEOUT_GET(reg_val) \ -+ (((reg_val)&GLB_PWROFF_TIMER_TIMEOUT_MASK) >> GLB_PWROFF_TIMER_TIMEOUT_SHIFT) -+#define GLB_PWROFF_TIMER_TIMEOUT_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_PWROFF_TIMER_TIMEOUT_MASK) | \ -+ (((value) << GLB_PWROFF_TIMER_TIMEOUT_SHIFT) & GLB_PWROFF_TIMER_TIMEOUT_MASK)) -+#define GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT 31 -+#define GLB_PWROFF_TIMER_TIMER_SOURCE_MASK (0x1 << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) -+#define GLB_PWROFF_TIMER_TIMER_SOURCE_GET(reg_val) \ -+ (((reg_val)&GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) >> GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) -+#define GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) | \ -+ (((value) << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) & GLB_PWROFF_TIMER_TIMER_SOURCE_MASK)) -+/* GLB_PWROFF_TIMER_TIMER_SOURCE values */ -+#define GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0 -+#define GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1 -+/* End of GLB_PWROFF_TIMER_TIMER_SOURCE values */ + -+/* GLB_ALLOC_EN register */ -+#define GLB_ALLOC_EN_MASK_SHIFT 0 -+#define GLB_ALLOC_EN_MASK_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << GLB_ALLOC_EN_MASK_SHIFT) -+#define GLB_ALLOC_EN_MASK_GET(reg_val) (((reg_val)&GLB_ALLOC_EN_MASK_MASK) >> GLB_ALLOC_EN_MASK_SHIFT) -+#define GLB_ALLOC_EN_MASK_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ALLOC_EN_MASK_MASK) | (((value) << GLB_ALLOC_EN_MASK_SHIFT) & GLB_ALLOC_EN_MASK_MASK)) ++void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 value) ++{ ++ /* NO_MALI: Nothing to do here */ ++} + -+/* GLB_OUTPUT_BLOCK register set definitions */ ++void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 *value) ++{ ++ /* NO_MALI: Nothing to do here */ ++} + -+/* GLB_ACK register */ -+#define GLB_ACK_CFG_PROGRESS_TIMER_SHIFT 1 -+#define GLB_ACK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) -+#define GLB_ACK_CFG_PROGRESS_TIMER_GET(reg_val) \ -+ (((reg_val)&GLB_ACK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) -+#define GLB_ACK_CFG_PROGRESS_TIMER_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_CFG_PROGRESS_TIMER_MASK) | \ -+ (((value) << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_CFG_PROGRESS_TIMER_MASK)) -+#define GLB_ACK_CFG_ALLOC_EN_SHIFT 2 -+#define GLB_ACK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_CFG_ALLOC_EN_SHIFT) -+#define GLB_ACK_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_ACK_CFG_ALLOC_EN_MASK) >> GLB_ACK_CFG_ALLOC_EN_SHIFT) -+#define GLB_ACK_CFG_ALLOC_EN_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_ACK_CFG_ALLOC_EN_MASK) | (((value) << GLB_ACK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_CFG_ALLOC_EN_MASK)) -+/* End of GLB_OUTPUT_BLOCK register set definitions */ ++void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 value) ++{ ++ /* NO_MALI: Nothing to do here */ ++} + -+/* The following register and fields are for headers before 10.x.7/11.x.4 */ -+#define GLB_REQ_IDLE_ENABLE_SHIFT (10) -+#define GLB_REQ_REQ_IDLE_ENABLE (1 << GLB_REQ_IDLE_ENABLE_SHIFT) -+#define GLB_REQ_REQ_IDLE_DISABLE (0 << GLB_REQ_IDLE_ENABLE_SHIFT) -+#define GLB_REQ_IDLE_ENABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT) -+#define GLB_REQ_IDLE_DISABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT) -+#define GLB_REQ_IDLE_EVENT_SHIFT (26) -+#define GLB_REQ_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT) -+#define GLB_ACK_IDLE_ENABLE_SHIFT (10) -+#define GLB_ACK_ACK_IDLE_ENABLE (1 << GLB_ACK_IDLE_ENABLE_SHIFT) -+#define GLB_ACK_ACK_IDLE_DISABLE (0 << GLB_ACK_IDLE_ENABLE_SHIFT) -+#define GLB_ACK_IDLE_ENABLE_MASK (0x1 << GLB_ACK_IDLE_ENABLE_SHIFT) -+#define GLB_ACK_IDLE_EVENT_SHIFT (26) -+#define GLB_ACK_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT) ++void kbase_csf_firmware_cs_input( ++ const struct kbase_csf_cmd_stream_info *const info, const u32 offset, ++ const u32 value) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; + -+#define GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT (26) -+#define GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK (0x1 << GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT) ++ dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x\n", offset, value); ++ input_page_write(info->input, offset, value); + -+#define GLB_ACK_IRQ_MASK_IDLE_ENABLE_SHIFT GPU_U(10) -+#define GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK (GPU_U(0x1) << GLB_ACK_IRQ_MASK_IDLE_ENABLE_SHIFT) ++ if (offset == CS_REQ) { ++ /* NO_MALI: Immediately acknowledge requests */ ++ output_page_write(info->output, CS_ACK, value); ++ } ++} + -+#define GLB_IDLE_TIMER (0x0080) -+/* GLB_IDLE_TIMER register */ -+#define GLB_IDLE_TIMER_TIMEOUT_SHIFT (0) -+#define GLB_IDLE_TIMER_TIMEOUT_MASK ((0x7FFFFFFF) << GLB_IDLE_TIMER_TIMEOUT_SHIFT) -+#define GLB_IDLE_TIMER_TIMEOUT_GET(reg_val) (((reg_val)&GLB_IDLE_TIMER_TIMEOUT_MASK) >> GLB_IDLE_TIMER_TIMEOUT_SHIFT) -+#define GLB_IDLE_TIMER_TIMEOUT_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_IDLE_TIMER_TIMEOUT_MASK) | \ -+ (((value) << GLB_IDLE_TIMER_TIMEOUT_SHIFT) & GLB_IDLE_TIMER_TIMEOUT_MASK)) -+#define GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT (31) -+#define GLB_IDLE_TIMER_TIMER_SOURCE_MASK ((0x1) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) -+#define GLB_IDLE_TIMER_TIMER_SOURCE_GET(reg_val) \ -+ (((reg_val)&GLB_IDLE_TIMER_TIMER_SOURCE_MASK) >> GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) -+#define GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_IDLE_TIMER_TIMER_SOURCE_MASK) | \ -+ (((value) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) & GLB_IDLE_TIMER_TIMER_SOURCE_MASK)) -+/* GLB_IDLE_TIMER_TIMER_SOURCE values */ -+#define GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0 -+#define GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1 -+/* End of GLB_IDLE_TIMER_TIMER_SOURCE values */ ++u32 kbase_csf_firmware_cs_input_read( ++ const struct kbase_csf_cmd_stream_info *const info, ++ const u32 offset) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ u32 const val = input_page_read(info->input, offset); + -+/* GLB_INSTR_FEATURES register */ -+#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT (0) -+#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK ((u32)0xF << GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) -+#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(reg_val) \ -+ (((reg_val)&GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK) >> GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) -+#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK) | \ -+ (((value) << GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) & GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK)) -+#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT (4) -+#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK ((u32)0xF << GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT) -+#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_GET(reg_val) \ -+ (((reg_val)&GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK) >> GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT) -+#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK) | \ -+ (((value) << GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT) & GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK)) ++ dev_dbg(kbdev->dev, "cs input r: reg %08x val %08x\n", offset, val); ++ return val; ++} + -+#define CSG_STATUS_STATE (0x0018) /* CSG state status register */ -+/* CSG_STATUS_STATE register */ -+#define CSG_STATUS_STATE_IDLE_SHIFT (0) -+#define CSG_STATUS_STATE_IDLE_MASK ((0x1) << CSG_STATUS_STATE_IDLE_SHIFT) -+#define CSG_STATUS_STATE_IDLE_GET(reg_val) \ -+ (((reg_val)&CSG_STATUS_STATE_IDLE_MASK) >> CSG_STATUS_STATE_IDLE_SHIFT) -+#define CSG_STATUS_STATE_IDLE_SET(reg_val, value) \ -+ (((reg_val) & ~CSG_STATUS_STATE_IDLE_MASK) | \ -+ (((value) << CSG_STATUS_STATE_IDLE_SHIFT) & CSG_STATUS_STATE_IDLE_MASK)) ++void kbase_csf_firmware_cs_input_mask( ++ const struct kbase_csf_cmd_stream_info *const info, const u32 offset, ++ const u32 value, const u32 mask) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; + -+/* GLB_FEATURES_ITER_TRACE_SUPPORTED register */ -+#define GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT GPU_U(4) -+#define GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK \ -+ (GPU_U(0x1) << GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT) -+#define GLB_FEATURES_ITER_TRACE_SUPPORTED_GET(reg_val) \ -+ (((reg_val)&GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) >> \ -+ GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT) -+#define GLB_FEATURES_ITER_TRACE_SUPPORTED_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) | \ -+ (((value) << GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT) & \ -+ GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK)) ++ dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x mask %08x\n", ++ offset, value, mask); + -+/* GLB_REQ_ITER_TRACE_ENABLE register */ -+#define GLB_REQ_ITER_TRACE_ENABLE_SHIFT GPU_U(11) -+#define GLB_REQ_ITER_TRACE_ENABLE_MASK \ -+ (GPU_U(0x1) << GLB_REQ_ITER_TRACE_ENABLE_SHIFT) -+#define GLB_REQ_ITER_TRACE_ENABLE_GET(reg_val) \ -+ (((reg_val)&GLB_REQ_ITER_TRACE_ENABLE_MASK) >> \ -+ GLB_REQ_ITER_TRACE_ENABLE_SHIFT) -+#define GLB_REQ_ITER_TRACE_ENABLE_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_REQ_ITER_TRACE_ENABLE_MASK) | \ -+ (((value) << GLB_REQ_ITER_TRACE_ENABLE_SHIFT) & \ -+ GLB_REQ_ITER_TRACE_ENABLE_MASK)) ++ /* NO_MALI: Go through kbase_csf_firmware_cs_input to capture writes */ ++ kbase_csf_firmware_cs_input(info, offset, (input_page_read(info->input, offset) & ~mask) | (value & mask)); ++} + -+/* GLB_PRFCNT_CONFIG register */ -+#define GLB_PRFCNT_CONFIG_SIZE_SHIFT (0) -+#define GLB_PRFCNT_CONFIG_SIZE_MASK (0xFF << GLB_PRFCNT_CONFIG_SIZE_SHIFT) -+#define GLB_PRFCNT_CONFIG_SIZE_GET(reg_val) \ -+ (((reg_val)&GLB_PRFCNT_CONFIG_SIZE_MASK) >> GLB_PRFCNT_CONFIG_SIZE_SHIFT) -+#define GLB_PRFCNT_CONFIG_SIZE_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_PRFCNT_CONFIG_SIZE_MASK) | \ -+ (((value) << GLB_PRFCNT_CONFIG_SIZE_SHIFT) & GLB_PRFCNT_CONFIG_SIZE_MASK)) -+#define GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT GPU_U(8) -+#define GLB_PRFCNT_CONFIG_SET_SELECT_MASK (GPU_U(0x3) << GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT) -+#define GLB_PRFCNT_CONFIG_SET_SELECT_GET(reg_val) \ -+ (((reg_val)&GLB_PRFCNT_CONFIG_SET_SELECT_MASK) >> GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT) -+#define GLB_PRFCNT_CONFIG_SET_SELECT_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_PRFCNT_CONFIG_SET_SELECT_MASK) | \ -+ (((value) << GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT) & GLB_PRFCNT_CONFIG_SET_SELECT_MASK)) ++u32 kbase_csf_firmware_cs_output( ++ const struct kbase_csf_cmd_stream_info *const info, const u32 offset) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ u32 const val = output_page_read(info->output, offset); + -+/* GLB_PRFCNT_SIZE register */ -+#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET_MOD(value) ((value) >> 8) -+#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET_MOD(value) ((value) << 8) -+#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT GPU_U(0) -+#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK (GPU_U(0xFFFF) << GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT) -+#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET(reg_val) \ -+ (GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET_MOD(((reg_val)&GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK) >> \ -+ GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT)) -+#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK) | \ -+ ((GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT) & \ -+ GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK)) -+#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) ((value) >> 8) -+#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET_MOD(value) ((value) << 8) -+#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT GPU_U(16) -+#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK (GPU_U(0xFFFF) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT) -+#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(reg_val) \ -+ (GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET_MOD(((reg_val)&GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK) >> \ -+ GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT)) -+#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK) | \ -+ ((GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT) & \ -+ GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK)) ++ dev_dbg(kbdev->dev, "cs output r: reg %08x val %08x\n", offset, val); ++ return val; ++} + -+/* GLB_DEBUG_REQ register */ -+#define GLB_DEBUG_REQ_DEBUG_RUN_SHIFT GPU_U(23) -+#define GLB_DEBUG_REQ_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) -+#define GLB_DEBUG_REQ_DEBUG_RUN_GET(reg_val) \ -+ (((reg_val)&GLB_DEBUG_REQ_DEBUG_RUN_MASK) >> GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) -+#define GLB_DEBUG_REQ_DEBUG_RUN_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_DEBUG_REQ_DEBUG_RUN_MASK) | \ -+ (((value) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) & GLB_DEBUG_REQ_DEBUG_RUN_MASK)) ++void kbase_csf_firmware_csg_input( ++ const struct kbase_csf_cmd_stream_group_info *const info, ++ const u32 offset, const u32 value) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; + -+#define GLB_DEBUG_REQ_RUN_MODE_SHIFT GPU_U(24) -+#define GLB_DEBUG_REQ_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) -+#define GLB_DEBUG_REQ_RUN_MODE_GET(reg_val) \ -+ (((reg_val)&GLB_DEBUG_REQ_RUN_MODE_MASK) >> GLB_DEBUG_REQ_RUN_MODE_SHIFT) -+#define GLB_DEBUG_REQ_RUN_MODE_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_DEBUG_REQ_RUN_MODE_MASK) | \ -+ (((value) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) & GLB_DEBUG_REQ_RUN_MODE_MASK)) ++ dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x\n", ++ offset, value); ++ input_page_write(info->input, offset, value); + -+/* GLB_DEBUG_ACK register */ -+#define GLB_DEBUG_ACK_DEBUG_RUN_SHIFT GPU_U(23) -+#define GLB_DEBUG_ACK_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) -+#define GLB_DEBUG_ACK_DEBUG_RUN_GET(reg_val) \ -+ (((reg_val)&GLB_DEBUG_ACK_DEBUG_RUN_MASK) >> GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) -+#define GLB_DEBUG_ACK_DEBUG_RUN_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_DEBUG_ACK_DEBUG_RUN_MASK) | \ -+ (((value) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) & GLB_DEBUG_ACK_DEBUG_RUN_MASK)) ++ if (offset == CSG_REQ) { ++ /* NO_MALI: Immediately acknowledge requests */ ++ output_page_write(info->output, CSG_ACK, value); ++ } ++} + -+#define GLB_DEBUG_ACK_RUN_MODE_SHIFT GPU_U(24) -+#define GLB_DEBUG_ACK_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_ACK_RUN_MODE_SHIFT) -+#define GLB_DEBUG_ACK_RUN_MODE_GET(reg_val) \ -+ (((reg_val)&GLB_DEBUG_ACK_RUN_MODE_MASK) >> GLB_DEBUG_ACK_RUN_MODE_SHIFT) -+#define GLB_DEBUG_ACK_RUN_MODE_SET(reg_val, value) \ -+ (((reg_val) & ~GLB_DEBUG_ACK_RUN_MODE_MASK) | \ -+ (((value) << GLB_DEBUG_ACK_RUN_MODE_SHIFT) & GLB_DEBUG_ACK_RUN_MODE_MASK)) ++u32 kbase_csf_firmware_csg_input_read( ++ const struct kbase_csf_cmd_stream_group_info *const info, ++ const u32 offset) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ u32 const val = input_page_read(info->input, offset); + -+/* RUN_MODE values */ -+#define GLB_DEBUG_RUN_MODE_TYPE_NOP 0x0 -+#define GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP 0x1 -+/* End of RUN_MODE values */ ++ dev_dbg(kbdev->dev, "csg input r: reg %08x val %08x\n", offset, val); ++ return val; ++} + -+#endif /* _KBASE_CSF_REGISTERS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c -new file mode 100644 -index 000000000..fe3b91a48 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c -@@ -0,0 +1,637 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++void kbase_csf_firmware_csg_input_mask( ++ const struct kbase_csf_cmd_stream_group_info *const info, ++ const u32 offset, const u32 value, const u32 mask) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; + -+enum kbasep_soft_reset_status { -+ RESET_SUCCESS = 0, -+ SOFT_RESET_FAILED, -+ L2_ON_FAILED, -+ MCU_REINIT_FAILED -+}; ++ dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x mask %08x\n", ++ offset, value, mask); + -+static inline bool -+kbase_csf_reset_state_is_silent(enum kbase_csf_reset_gpu_state state) -+{ -+ return (state == KBASE_CSF_RESET_GPU_COMMITTED_SILENT); ++ /* NO_MALI: Go through kbase_csf_firmware_csg_input to capture writes */ ++ kbase_csf_firmware_csg_input(info, offset, (input_page_read(info->input, offset) & ~mask) | (value & mask)); +} + -+static inline bool -+kbase_csf_reset_state_is_committed(enum kbase_csf_reset_gpu_state state) ++u32 kbase_csf_firmware_csg_output( ++ const struct kbase_csf_cmd_stream_group_info *const info, ++ const u32 offset) +{ -+ return (state == KBASE_CSF_RESET_GPU_COMMITTED || -+ state == KBASE_CSF_RESET_GPU_COMMITTED_SILENT); -+} ++ const struct kbase_device * const kbdev = info->kbdev; ++ u32 const val = output_page_read(info->output, offset); + -+static inline bool -+kbase_csf_reset_state_is_active(enum kbase_csf_reset_gpu_state state) -+{ -+ return (state == KBASE_CSF_RESET_GPU_HAPPENING); ++ dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val); ++ return val; +} ++KBASE_EXPORT_TEST_API(kbase_csf_firmware_csg_output); + -+/** -+ * DOC: Mechanism for coherent access to the HW with respect to GPU reset -+ * -+ * Access to the HW from non-atomic context outside of the reset thread must -+ * use kbase_reset_gpu_prevent_and_wait() / kbase_reset_gpu_try_prevent(). -+ * -+ * This currently works by taking the &kbase_device's csf.reset.sem, for -+ * 'write' access by the GPU reset thread and 'read' access by every other -+ * thread. The use of this rw_semaphore means: -+ * -+ * - there will be mutual exclusion (and thus waiting) between the thread doing -+ * reset ('writer') and threads trying to access the GPU for 'normal' -+ * operations ('readers') -+ * -+ * - multiple threads may prevent reset from happening without serializing each -+ * other prematurely. Note that at present the wait for reset to finish has -+ * to be done higher up in the driver than actual GPU access, at a point -+ * where it won't cause lock ordering issues. At such a point, some paths may -+ * actually lead to no GPU access, but we would prefer to avoid serializing -+ * at that level -+ * -+ * - lockdep (if enabled in the kernel) will check such uses for deadlock -+ * -+ * If instead &kbase_device's csf.reset.wait &wait_queue_head_t were used on -+ * its own, we'd also need to add a &lockdep_map and appropriate lockdep calls -+ * to make use of lockdep checking in all places where the &wait_queue_head_t -+ * is waited upon or signaled. -+ * -+ * Indeed places where we wait on &kbase_device's csf.reset.wait (such as -+ * kbase_reset_gpu_wait()) are the only places where we need extra call(s) to -+ * lockdep, and they are made on the existing rw_semaphore. -+ * -+ * For non-atomic access, the &kbase_device's csf.reset.state member should be -+ * checked instead, such as by using kbase_reset_gpu_is_active(). -+ * -+ * Ideally the &rw_semaphore should be replaced in future with a single mutex -+ * that protects any access to the GPU, via reset or otherwise. -+ */ -+ -+int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev) ++void kbase_csf_firmware_global_input( ++ const struct kbase_csf_global_iface *const iface, const u32 offset, ++ const u32 value) +{ -+ down_read(&kbdev->csf.reset.sem); ++ const struct kbase_device * const kbdev = iface->kbdev; + -+ if (atomic_read(&kbdev->csf.reset.state) == -+ KBASE_CSF_RESET_GPU_FAILED) { -+ up_read(&kbdev->csf.reset.sem); -+ return -ENOMEM; -+ } ++ dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x\n", offset, value); ++ input_page_write(iface->input, offset, value); + -+ if (WARN_ON(kbase_reset_gpu_is_active(kbdev))) { -+ up_read(&kbdev->csf.reset.sem); -+ return -EFAULT; -+ } ++ if (offset == GLB_REQ) { ++ /* NO_MALI: Immediately acknowledge requests - except for PRFCNT_ENABLE ++ * and PRFCNT_SAMPLE. These will be processed along with the ++ * corresponding performance counter registers when the global doorbell ++ * is rung in order to emulate the performance counter sampling behavior ++ * of the real firmware. ++ */ ++ const u32 ack = output_page_read(iface->output, GLB_ACK); ++ const u32 req_mask = ~(GLB_REQ_PRFCNT_ENABLE_MASK | GLB_REQ_PRFCNT_SAMPLE_MASK); ++ const u32 toggled = (value ^ ack) & req_mask; + -+ return 0; ++ output_page_write(iface->output, GLB_ACK, ack ^ toggled); ++ } +} -+KBASE_EXPORT_TEST_API(kbase_reset_gpu_prevent_and_wait); ++KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input); + -+int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev) ++void kbase_csf_firmware_global_input_mask( ++ const struct kbase_csf_global_iface *const iface, const u32 offset, ++ const u32 value, const u32 mask) +{ -+ if (!down_read_trylock(&kbdev->csf.reset.sem)) -+ return -EAGAIN; -+ -+ if (atomic_read(&kbdev->csf.reset.state) == -+ KBASE_CSF_RESET_GPU_FAILED) { -+ up_read(&kbdev->csf.reset.sem); -+ return -ENOMEM; -+ } ++ const struct kbase_device * const kbdev = iface->kbdev; + -+ if (WARN_ON(kbase_reset_gpu_is_active(kbdev))) { -+ up_read(&kbdev->csf.reset.sem); -+ return -EFAULT; -+ } ++ dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x mask %08x\n", ++ offset, value, mask); + -+ return 0; ++ /* NO_MALI: Go through kbase_csf_firmware_global_input to capture writes */ ++ kbase_csf_firmware_global_input(iface, offset, (input_page_read(iface->input, offset) & ~mask) | (value & mask)); +} ++KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input_mask); + -+void kbase_reset_gpu_allow(struct kbase_device *kbdev) ++u32 kbase_csf_firmware_global_input_read( ++ const struct kbase_csf_global_iface *const iface, const u32 offset) +{ -+ up_read(&kbdev->csf.reset.sem); -+} -+KBASE_EXPORT_TEST_API(kbase_reset_gpu_allow); ++ const struct kbase_device * const kbdev = iface->kbdev; ++ u32 const val = input_page_read(iface->input, offset); + -+void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev) -+{ -+#if KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE -+ lockdep_assert_held_read(&kbdev->csf.reset.sem); -+#else -+ lockdep_assert_held(&kbdev->csf.reset.sem); -+#endif -+ WARN_ON(kbase_reset_gpu_is_active(kbdev)); ++ dev_dbg(kbdev->dev, "glob input r: reg %08x val %08x\n", offset, val); ++ return val; +} + -+void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev) ++u32 kbase_csf_firmware_global_output( ++ const struct kbase_csf_global_iface *const iface, const u32 offset) +{ -+ if (atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_FAILED) -+ return; ++ const struct kbase_device * const kbdev = iface->kbdev; ++ u32 const val = output_page_read(iface->output, offset); + -+#if KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE -+ lockdep_assert_held_read(&kbdev->csf.reset.sem); -+#else -+ lockdep_assert_held(&kbdev->csf.reset.sem); -+#endif -+ WARN_ON(kbase_reset_gpu_is_active(kbdev)); ++ dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val); ++ return val; +} ++KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_output); + -+/* Mark the reset as now happening, and synchronize with other threads that -+ * might be trying to access the GPU ++/** ++ * csf_doorbell_prfcnt() - Process CSF performance counter doorbell request ++ * ++ * @kbdev: An instance of the GPU platform device + */ -+static void kbase_csf_reset_begin_hw_access_sync( -+ struct kbase_device *kbdev, -+ enum kbase_csf_reset_gpu_state initial_reset_state) ++static void csf_doorbell_prfcnt(struct kbase_device *kbdev) +{ -+ unsigned long hwaccess_lock_flags; -+ unsigned long scheduler_spin_lock_flags; ++ struct kbase_csf_global_iface *iface; ++ u32 req; ++ u32 ack; ++ u32 extract_index; + -+ /* Note this is a WARN/atomic_set because it is a software issue for a -+ * race to be occurring here -+ */ -+ WARN_ON(!kbase_csf_reset_state_is_committed(initial_reset_state)); ++ if (WARN_ON(!kbdev)) ++ return; + -+ down_write(&kbdev->csf.reset.sem); ++ iface = &kbdev->csf.global_iface; + -+ /* Threads in atomic context accessing the HW will hold one of these -+ * locks, so synchronize with them too. -+ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_lock_flags); -+ kbase_csf_scheduler_spin_lock(kbdev, &scheduler_spin_lock_flags); -+ atomic_set(&kbdev->csf.reset.state, KBASE_RESET_GPU_HAPPENING); -+ kbase_csf_scheduler_spin_unlock(kbdev, scheduler_spin_lock_flags); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_lock_flags); -+} ++ req = input_page_read(iface->input, GLB_REQ); ++ ack = output_page_read(iface->output, GLB_ACK); ++ extract_index = input_page_read(iface->input, GLB_PRFCNT_EXTRACT); + -+/* Mark the reset as finished and allow others threads to once more access the -+ * GPU -+ */ -+static void kbase_csf_reset_end_hw_access(struct kbase_device *kbdev, -+ int err_during_reset, -+ bool firmware_inited) -+{ -+ unsigned long hwaccess_lock_flags; -+ unsigned long scheduler_spin_lock_flags; ++ /* Process enable bit toggle */ ++ if ((req ^ ack) & GLB_REQ_PRFCNT_ENABLE_MASK) { ++ if (req & GLB_REQ_PRFCNT_ENABLE_MASK) { ++ /* Reset insert index to zero on enable bit set */ ++ output_page_write(iface->output, GLB_PRFCNT_INSERT, 0); ++ WARN_ON(extract_index != 0); ++ } ++ ack ^= GLB_REQ_PRFCNT_ENABLE_MASK; ++ } + -+ WARN_ON(!kbase_csf_reset_state_is_active( -+ atomic_read(&kbdev->csf.reset.state))); ++ /* Process sample request */ ++ if ((req ^ ack) & GLB_REQ_PRFCNT_SAMPLE_MASK) { ++ const u32 ring_size = GLB_PRFCNT_CONFIG_SIZE_GET( ++ input_page_read(iface->input, GLB_PRFCNT_CONFIG)); ++ u32 insert_index = output_page_read(iface->output, GLB_PRFCNT_INSERT); + -+ /* Once again, we synchronize with atomic context threads accessing the -+ * HW, as otherwise any actions they defer could get lost -+ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_lock_flags); -+ kbase_csf_scheduler_spin_lock(kbdev, &scheduler_spin_lock_flags); ++ const bool prev_overflow = (req ^ ack) & GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK; ++ const bool prev_threshold = (req ^ ack) & GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK; + -+ if (!err_during_reset) { -+ atomic_set(&kbdev->csf.reset.state, -+ KBASE_CSF_RESET_GPU_NOT_PENDING); -+ } else { -+ dev_err(kbdev->dev, "Reset failed to complete"); -+ atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_FAILED); -+ } ++ /* If ringbuffer is full toggle PRFCNT_OVERFLOW and skip sample */ ++ if (insert_index - extract_index >= ring_size) { ++ WARN_ON(insert_index - extract_index > ring_size); ++ if (!prev_overflow) ++ ack ^= GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK; ++ } else { ++ struct gpu_model_prfcnt_en enable_maps = { ++ .fe = input_page_read(iface->input, GLB_PRFCNT_CSF_EN), ++ .tiler = input_page_read(iface->input, GLB_PRFCNT_TILER_EN), ++ .l2 = input_page_read(iface->input, GLB_PRFCNT_MMU_L2_EN), ++ .shader = input_page_read(iface->input, GLB_PRFCNT_SHADER_EN), ++ }; + -+ kbase_csf_scheduler_spin_unlock(kbdev, scheduler_spin_lock_flags); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_lock_flags); ++ const u64 prfcnt_base = ++ input_page_read(iface->input, GLB_PRFCNT_BASE_LO) + ++ ((u64)input_page_read(iface->input, GLB_PRFCNT_BASE_HI) << 32); + -+ /* Invoke the scheduling tick after formally finishing the reset, -+ * otherwise the tick might start too soon and notice that reset -+ * is still in progress. -+ */ -+ up_write(&kbdev->csf.reset.sem); -+ wake_up(&kbdev->csf.reset.wait); ++ u32 *sample_base = (u32 *)(uintptr_t)prfcnt_base + ++ (KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE * ++ (insert_index % ring_size)); + -+ if (!err_during_reset && likely(firmware_inited)) -+ kbase_csf_scheduler_enable_tick_timer(kbdev); -+} ++ /* trigger sample dump in the dummy model */ ++ gpu_model_prfcnt_dump_request(sample_base, enable_maps); + -+static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev) -+{ -+ kbase_io_history_dump(kbdev); ++ /* increment insert index and toggle PRFCNT_SAMPLE bit in ACK */ ++ output_page_write(iface->output, GLB_PRFCNT_INSERT, ++insert_index); ++ ack ^= GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK; ++ } + -+ dev_err(kbdev->dev, "Register state:"); -+ dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x MCU_STATUS=0x%08x", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS))); -+ dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", -+ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)), -+ kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS))); -+ dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)), -+ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)), -+ kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK))); -+ dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1))); -+ dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x TILER_CONFIG=0x%08x", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG))); ++ /* When the ringbuffer reaches 50% capacity toggle PRFCNT_THRESHOLD */ ++ if (!prev_threshold && (insert_index - extract_index >= (ring_size / 2))) ++ ack ^= GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK; ++ } ++ ++ /* Update GLB_ACK */ ++ output_page_write(iface->output, GLB_ACK, ack); +} + -+/** -+ * kbase_csf_hwcnt_on_reset_error() - Sets HWCNT to appropriate state in the -+ * event of an error during GPU reset. -+ * @kbdev: Pointer to KBase device -+ */ -+static void kbase_csf_hwcnt_on_reset_error(struct kbase_device *kbdev) ++void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr) +{ -+ unsigned long flags; ++ WARN_ON(doorbell_nr < 0); ++ WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); + -+ /* Treat this as an unrecoverable error for HWCNT */ -+ kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); ++ if (WARN_ON(!kbdev)) ++ return; + -+ /* Re-enable counters to ensure matching enable/disable pair. -+ * This might reduce the hwcnt disable count to 0, and therefore -+ * trigger actual re-enabling of hwcnt. -+ * However, as the backend is now in the unrecoverable error state, -+ * re-enabling will immediately fail and put the context into the error -+ * state, preventing the hardware from being touched (which could have -+ * risked a hang). -+ */ -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ if (doorbell_nr == CSF_KERNEL_DOORBELL_NR) { ++ csf_doorbell_prfcnt(kbdev); ++ gpu_model_glb_request_job_irq(kbdev->model); ++ } +} ++EXPORT_SYMBOL(kbase_csf_ring_doorbell); + -+static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_device *kbdev, -+ bool firmware_inited, bool silent) ++/** ++ * handle_internal_firmware_fatal - Handler for CS internal firmware fault. ++ * ++ * @kbdev: Pointer to kbase device ++ * ++ * Report group fatal error to user space for all GPU command queue groups ++ * in the device, terminate them and reset GPU. ++ */ ++static void handle_internal_firmware_fatal(struct kbase_device *const kbdev) +{ -+ unsigned long flags; -+ int err; -+ enum kbasep_soft_reset_status ret = RESET_SUCCESS; -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ spin_lock(&kbdev->mmu_mask_change); -+ kbase_pm_reset_start_locked(kbdev); -+ -+ dev_dbg(kbdev->dev, -+ "We're about to flush out the IRQs and their bottom halves\n"); -+ kbdev->irq_reset_flush = true; -+ -+ /* Disable IRQ to avoid IRQ handlers to kick in after releasing the -+ * spinlock; this also clears any outstanding interrupts -+ */ -+ kbase_pm_disable_interrupts_nolock(kbdev); ++ int as; + -+ spin_unlock(&kbdev->mmu_mask_change); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ for (as = 0; as < kbdev->nr_hw_address_spaces; as++) { ++ unsigned long flags; ++ struct kbase_context *kctx; ++ struct kbase_fault fault; + -+ dev_dbg(kbdev->dev, "Ensure that any IRQ handlers have finished\n"); -+ /* Must be done without any locks IRQ handlers will take. */ -+ kbase_synchronize_irqs(kbdev); ++ if (as == MCU_AS_NR) ++ continue; + -+ dev_dbg(kbdev->dev, "Flush out any in-flight work items\n"); -+ kbase_flush_mmu_wqs(kbdev); ++ /* Only handle the fault for an active address space. Lock is ++ * taken here to atomically get reference to context in an ++ * active address space and retain its refcount. ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as); + -+ dev_dbg(kbdev->dev, -+ "The flush has completed so reset the active indicator\n"); -+ kbdev->irq_reset_flush = false; ++ if (kctx) { ++ kbase_ctx_sched_retain_ctx_refcount(kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } else { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ continue; ++ } + -+ if (!silent) -+ dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", -+ RESET_TIMEOUT); ++ fault = (struct kbase_fault) { ++ .status = GPU_EXCEPTION_TYPE_SW_FAULT_1, ++ }; + -+ /* Output the state of some interesting registers to help in the -+ * debugging of GPU resets, and dump the firmware trace buffer -+ */ -+ if (!silent) { -+ kbase_csf_debug_dump_registers(kbdev); -+ if (likely(firmware_inited)) -+ kbase_csf_firmware_log_dump_buffer(kbdev); ++ kbase_csf_ctx_handle_fault(kctx, &fault); ++ kbase_ctx_sched_release_ctx_lock(kctx); + } + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_ipa_control_handle_gpu_reset_pre(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ /* Tell hardware counters a reset is about to occur. -+ * If the backend is in an unrecoverable error state (e.g. due to -+ * firmware being unresponsive) this will transition the backend out of -+ * it, on the assumption a reset will fix whatever problem there was. -+ */ -+ kbase_hwcnt_backend_csf_on_before_reset(&kbdev->hwcnt_gpu_iface); -+ -+ mutex_lock(&kbdev->pm.lock); -+ /* Reset the GPU */ -+ err = kbase_pm_init_hw(kbdev, 0); -+ -+ mutex_unlock(&kbdev->pm.lock); -+ -+ if (WARN_ON(err)) -+ return SOFT_RESET_FAILED; -+ -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_ctx_sched_restore_all_as(kbdev); -+ kbase_ipa_control_handle_gpu_reset_post(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ -+ kbase_pm_enable_interrupts(kbdev); -+ -+ mutex_lock(&kbdev->pm.lock); -+ kbase_pm_reset_complete(kbdev); -+ /* Synchronously wait for the reload of firmware to complete */ -+ err = kbase_pm_wait_for_desired_state(kbdev); -+ mutex_unlock(&kbdev->pm.lock); ++ if (kbase_prepare_to_reset_gpu(kbdev, ++ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) ++ kbase_reset_gpu(kbdev); ++} + -+ if (err) { -+ if (!kbase_pm_l2_is_in_desired_state(kbdev)) -+ ret = L2_ON_FAILED; -+ else if (!kbase_pm_mcu_is_in_desired_state(kbdev)) -+ ret = MCU_REINIT_FAILED; -+ } ++/** ++ * firmware_error_worker - Worker function for handling firmware internal error ++ * ++ * @data: Pointer to a work_struct embedded in kbase device. ++ * ++ * Handle the CS internal firmware error ++ */ ++static void firmware_error_worker(struct work_struct *const data) ++{ ++ struct kbase_device *const kbdev = ++ container_of(data, struct kbase_device, csf.fw_error_work); + -+ return ret; ++ handle_internal_firmware_fatal(kbdev); +} + -+static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, bool firmware_inited, bool silent) ++static bool global_request_complete(struct kbase_device *const kbdev, ++ u32 const req_mask) +{ ++ struct kbase_csf_global_iface *global_iface = ++ &kbdev->csf.global_iface; ++ bool complete = false; + unsigned long flags; -+ enum kbasep_soft_reset_status ret; -+ -+ WARN_ON(kbdev->irq_reset_flush); -+ /* The reset must now be happening otherwise other threads will not -+ * have been synchronized with to stop their access to the HW -+ */ -+#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE -+ lockdep_assert_held_write(&kbdev->csf.reset.sem); -+#elif KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE -+ lockdep_assert_held_exclusive(&kbdev->csf.reset.sem); -+#else -+ lockdep_assert_held(&kbdev->csf.reset.sem); -+#endif -+ WARN_ON(!kbase_reset_gpu_is_active(kbdev)); + -+ /* Reset the scheduler state before disabling the interrupts as suspend -+ * of active CSG slots would also be done as a part of reset. -+ */ -+ if (likely(firmware_inited)) -+ kbase_csf_scheduler_reset(kbdev); -+ cancel_work_sync(&kbdev->csf.firmware_reload_work); -+ -+ dev_dbg(kbdev->dev, "Disable GPU hardware counters.\n"); -+ /* This call will block until counters are disabled. */ -+ kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); + -+ ret = kbase_csf_reset_gpu_once(kbdev, firmware_inited, silent); -+ if (ret == SOFT_RESET_FAILED) { -+ dev_err(kbdev->dev, "Soft-reset failed"); -+ goto err; -+ } else if (ret == L2_ON_FAILED) { -+ dev_err(kbdev->dev, "L2 power up failed after the soft-reset"); -+ goto err; -+ } else if (ret == MCU_REINIT_FAILED) { -+ dev_err(kbdev->dev, "MCU re-init failed trying full firmware reload"); -+ /* Since MCU reinit failed despite successful soft reset, we can try -+ * the firmware full reload. -+ */ -+ kbdev->csf.firmware_full_reload_needed = true; -+ ret = kbase_csf_reset_gpu_once(kbdev, firmware_inited, true); -+ if (ret != RESET_SUCCESS) { -+ dev_err(kbdev->dev, -+ "MCU Re-init failed even after trying full firmware reload, ret = [%d]", -+ ret); -+ goto err; -+ } -+ } ++ if ((kbase_csf_firmware_global_output(global_iface, GLB_ACK) & ++ req_mask) == ++ (kbase_csf_firmware_global_input_read(global_iface, GLB_REQ) & ++ req_mask)) ++ complete = true; + -+ /* Re-enable GPU hardware counters */ -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbase_csf_scheduler_spin_unlock(kbdev, flags); -+ if (!silent) -+ dev_err(kbdev->dev, "Reset complete"); -+ return 0; -+err: + -+ kbase_csf_hwcnt_on_reset_error(kbdev); -+ return -1; ++ return complete; +} + -+static void kbase_csf_reset_gpu_worker(struct work_struct *data) ++static int wait_for_global_request(struct kbase_device *const kbdev, ++ u32 const req_mask) +{ -+ struct kbase_device *kbdev = container_of(data, struct kbase_device, -+ csf.reset.work); -+ bool gpu_sleep_mode_active = false; -+ bool firmware_inited; -+ unsigned long flags; ++ const long wait_timeout = ++ kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ long remaining; + int err = 0; -+ const enum kbase_csf_reset_gpu_state initial_reset_state = -+ atomic_read(&kbdev->csf.reset.state); -+ const bool silent = -+ kbase_csf_reset_state_is_silent(initial_reset_state); + -+ /* Ensure any threads (e.g. executing the CSF scheduler) have finished -+ * using the HW -+ */ -+ kbase_csf_reset_begin_hw_access_sync(kbdev, initial_reset_state); ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ global_request_complete(kbdev, req_mask), ++ wait_timeout); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ firmware_inited = kbdev->csf.firmware_inited; -+#ifdef KBASE_PM_RUNTIME -+ gpu_sleep_mode_active = kbdev->pm.backend.gpu_sleep_mode_active; -+#endif -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (!remaining) { ++ dev_warn(kbdev->dev, "Timed out waiting for global request %x to complete", ++ req_mask); ++ err = -ETIMEDOUT; + -+ if (unlikely(gpu_sleep_mode_active)) { -+#ifdef KBASE_PM_RUNTIME -+ /* As prior to GPU reset all on-slot groups are suspended, -+ * need to wake up the MCU from sleep. -+ * No pm active reference is taken here since GPU is in sleep -+ * state and both runtime & system suspend synchronize with the -+ * GPU reset before they wake up the GPU to suspend on-slot -+ * groups. GPUCORE-29850 would add the proper handling. -+ */ -+ kbase_pm_lock(kbdev); -+ if (kbase_pm_force_mcu_wakeup_after_sleep(kbdev)) -+ dev_warn(kbdev->dev, "Wait for MCU wake up failed on GPU reset"); -+ kbase_pm_unlock(kbdev); + -+ err = kbase_csf_reset_gpu_now(kbdev, firmware_inited, silent); -+#endif -+ } else if (!kbase_pm_context_active_handle_suspend(kbdev, -+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { -+ err = kbase_csf_reset_gpu_now(kbdev, firmware_inited, silent); -+ kbase_pm_context_idle(kbdev); + } + -+ kbase_disjoint_state_down(kbdev); -+ -+ /* Allow other threads to once again use the GPU */ -+ kbase_csf_reset_end_hw_access(kbdev, err, firmware_inited); ++ return err; +} + -+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags) ++static void set_global_request( ++ const struct kbase_csf_global_iface *const global_iface, ++ u32 const req_mask) +{ -+ if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR) -+ kbase_hwcnt_backend_csf_on_unrecoverable_error( -+ &kbdev->hwcnt_gpu_iface); ++ u32 glb_req; + -+ if (atomic_cmpxchg(&kbdev->csf.reset.state, -+ KBASE_CSF_RESET_GPU_NOT_PENDING, -+ KBASE_CSF_RESET_GPU_PREPARED) != -+ KBASE_CSF_RESET_GPU_NOT_PENDING) -+ /* Some other thread is already resetting the GPU */ -+ return false; ++ kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); + -+ return true; ++ glb_req = kbase_csf_firmware_global_output(global_iface, GLB_ACK); ++ glb_req ^= req_mask; ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, ++ req_mask); +} -+KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu); + -+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, -+ unsigned int flags) ++static void enable_endpoints_global( ++ const struct kbase_csf_global_iface *const global_iface, ++ u64 const shader_core_mask) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_LO, ++ shader_core_mask & U32_MAX); ++ kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_HI, ++ shader_core_mask >> 32); + -+ return kbase_prepare_to_reset_gpu(kbdev, flags); ++ set_global_request(global_iface, GLB_REQ_CFG_ALLOC_EN_MASK); +} + -+void kbase_reset_gpu(struct kbase_device *kbdev) ++static void enable_shader_poweroff_timer(struct kbase_device *const kbdev, ++ const struct kbase_csf_global_iface *const global_iface) +{ -+ /* Note this is a WARN/atomic_set because it is a software issue for -+ * a race to be occurring here -+ */ -+ if (WARN_ON(atomic_read(&kbdev->csf.reset.state) != -+ KBASE_RESET_GPU_PREPARED)) -+ return; ++ u32 pwroff_reg; + -+ atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_COMMITTED); -+ dev_err(kbdev->dev, "Preparing to soft-reset GPU\n"); ++ if (kbdev->csf.firmware_hctl_core_pwr) ++ pwroff_reg = ++ GLB_PWROFF_TIMER_TIMER_SOURCE_SET(DISABLE_GLB_PWROFF_TIMER, ++ GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); ++ else ++ pwroff_reg = kbdev->csf.mcu_core_pwroff_dur_count; + -+ kbase_disjoint_state_up(kbdev); ++ kbase_csf_firmware_global_input(global_iface, GLB_PWROFF_TIMER, ++ pwroff_reg); ++ set_global_request(global_iface, GLB_REQ_CFG_PWROFF_TIMER_MASK); + -+ queue_work(kbdev->csf.reset.workq, &kbdev->csf.reset.work); ++ /* Save the programed reg value in its shadow field */ ++ kbdev->csf.mcu_core_pwroff_reg_shadow = pwroff_reg; +} -+KBASE_EXPORT_TEST_API(kbase_reset_gpu); + -+void kbase_reset_gpu_locked(struct kbase_device *kbdev) ++static void set_timeout_global( ++ const struct kbase_csf_global_iface *const global_iface, ++ u64 const timeout) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ kbase_csf_firmware_global_input(global_iface, GLB_PROGRESS_TIMER, ++ timeout / GLB_PROGRESS_TIMER_TIMEOUT_SCALE); + -+ kbase_reset_gpu(kbdev); ++ set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK); +} + -+int kbase_reset_gpu_silent(struct kbase_device *kbdev) ++static void enable_gpu_idle_timer(struct kbase_device *const kbdev) +{ -+ if (atomic_cmpxchg(&kbdev->csf.reset.state, -+ KBASE_CSF_RESET_GPU_NOT_PENDING, -+ KBASE_CSF_RESET_GPU_COMMITTED_SILENT) != -+ KBASE_CSF_RESET_GPU_NOT_PENDING) { -+ /* Some other thread is already resetting the GPU */ -+ return -EAGAIN; -+ } -+ -+ kbase_disjoint_state_up(kbdev); ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + -+ queue_work(kbdev->csf.reset.workq, &kbdev->csf.reset.work); ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); + -+ return 0; ++ kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER, ++ kbdev->csf.gpu_idle_dur_count); ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE, ++ GLB_REQ_IDLE_ENABLE_MASK); ++ dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x", ++ kbdev->csf.gpu_idle_dur_count); +} -+KBASE_EXPORT_TEST_API(kbase_reset_gpu_silent); + -+bool kbase_reset_gpu_is_active(struct kbase_device *kbdev) ++static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask) +{ -+ enum kbase_csf_reset_gpu_state reset_state = -+ atomic_read(&kbdev->csf.reset.state); ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ bool complete = false; ++ unsigned long flags; + -+ /* For CSF, the reset is considered active only when the reset worker -+ * is actually executing and other threads would have to wait for it to -+ * complete -+ */ -+ return kbase_csf_reset_state_is_active(reset_state); -+} ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); + -+bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev) -+{ -+ return atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_NOT_PENDING; ++ if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) == ++ (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask)) ++ complete = true; ++ ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ ++ return complete; +} + -+int kbase_reset_gpu_wait(struct kbase_device *kbdev) ++static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface, ++ u32 const req_mask) +{ -+ const long wait_timeout = -+ kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_GPU_RESET_TIMEOUT)); -+ long remaining; ++ u32 glb_debug_req; + -+ /* Inform lockdep we might be trying to wait on a reset (as -+ * would've been done with down_read() - which has no 'timeout' -+ * variant), then use wait_event_timeout() to implement the timed -+ * wait. -+ * -+ * in CONFIG_PROVE_LOCKING builds, this should catch potential 'time -+ * bound' deadlocks such as: -+ * - incorrect lock order with respect to others locks -+ * - current thread has prevented reset -+ * - current thread is executing the reset worker -+ */ -+ might_lock_read(&kbdev->csf.reset.sem); ++ kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); + -+ remaining = wait_event_timeout( -+ kbdev->csf.reset.wait, -+ (atomic_read(&kbdev->csf.reset.state) == -+ KBASE_CSF_RESET_GPU_NOT_PENDING) || -+ (atomic_read(&kbdev->csf.reset.state) == -+ KBASE_CSF_RESET_GPU_FAILED), -+ wait_timeout); ++ glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK); ++ glb_debug_req ^= req_mask; + -+ if (!remaining) { -+ dev_warn(kbdev->dev, "Timed out waiting for the GPU reset to complete"); ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask); ++} + ++static void request_fw_core_dump( ++ const struct kbase_csf_global_iface *const global_iface) ++{ ++ uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP); + -+ return -ETIMEDOUT; -+ } else if (atomic_read(&kbdev->csf.reset.state) == -+ KBASE_CSF_RESET_GPU_FAILED) { -+ return -ENOMEM; -+ } ++ set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode); + -+ return 0; ++ set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK); +} -+KBASE_EXPORT_TEST_API(kbase_reset_gpu_wait); + -+int kbase_reset_gpu_init(struct kbase_device *kbdev) ++int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev) +{ -+ kbdev->csf.reset.workq = alloc_workqueue("Mali reset workqueue", 0, 1); -+ if (kbdev->csf.reset.workq == NULL) -+ return -ENOMEM; -+ -+ INIT_WORK(&kbdev->csf.reset.work, kbase_csf_reset_gpu_worker); ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ unsigned long flags; ++ int ret; + -+ init_waitqueue_head(&kbdev->csf.reset.wait); -+ init_rwsem(&kbdev->csf.reset.sem); ++ /* Serialize CORE_DUMP requests. */ ++ mutex_lock(&kbdev->csf.reg_lock); + -+ return 0; -+} ++ /* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ request_fw_core_dump(global_iface); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); + -+void kbase_reset_gpu_term(struct kbase_device *kbdev) -+{ -+ destroy_workqueue(kbdev->csf.reset.workq); -+} -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c -new file mode 100644 -index 000000000..edaa6d17e ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c -@@ -0,0 +1,6889 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ /* Wait for firmware to acknowledge completion of the CORE_DUMP request. */ ++ ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK); ++ if (!ret) ++ WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK)); + -+#include -+#include "mali_kbase_config_defaults.h" -+#include -+#include -+#include -+#include "mali_kbase_csf.h" -+#include -+#include -+#include -+#include -+#include -+#include -+#include "mali_kbase_csf_tiler_heap_reclaim.h" -+#include "mali_kbase_csf_mcu_shared_reg.h" ++ mutex_unlock(&kbdev->csf.reg_lock); + -+/* Value to indicate that a queue group is not groups_to_schedule list */ -+#define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX) ++ return ret; ++} + -+/* This decides the upper limit on the waiting time for the Scheduler -+ * to exit the sleep state. Usually the value of autosuspend_delay is -+ * expected to be around 100 milli seconds. -+ */ -+#define MAX_AUTO_SUSPEND_DELAY_MS (5000) ++static void global_init(struct kbase_device *const kbdev, u64 core_mask) ++{ ++ u32 const ack_irq_mask = ++ GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | GLB_ACK_IRQ_MASK_PING_MASK | ++ GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | ++ GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | ++ GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK | ++ GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK | GLB_REQ_DEBUG_CSF_REQ_MASK; + -+/* Maximum number of endpoints which may run tiler jobs. */ -+#define CSG_TILER_MAX ((u8)1) ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ unsigned long flags; + -+/* Maximum dynamic CSG slot priority value */ -+#define MAX_CSG_SLOT_PRIORITY ((u8)15) ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); + -+/* CSF scheduler time slice value */ -+#define CSF_SCHEDULER_TIME_TICK_MS (100) /* 100 milliseconds */ ++ /* Update shader core allocation enable mask */ ++ enable_endpoints_global(global_iface, core_mask); ++ enable_shader_poweroff_timer(kbdev, global_iface); + -+/* A GPU address space slot is reserved for MCU. */ -+#define NUM_RESERVED_AS_SLOTS (1) ++ set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev)); + -+/* Time to wait for completion of PING req before considering MCU as hung */ -+#define FW_PING_AFTER_ERROR_TIMEOUT_MS (10) ++ /* The GPU idle timer is always enabled for simplicity. Checks will be ++ * done before scheduling the GPU idle worker to see if it is ++ * appropriate for the current power policy. ++ */ ++ enable_gpu_idle_timer(kbdev); + -+/* Explicitly defining this blocked_reason code as SB_WAIT for clarity */ -+#define CS_STATUS_BLOCKED_ON_SB_WAIT CS_STATUS_BLOCKED_REASON_REASON_WAIT ++ /* Unmask the interrupts */ ++ kbase_csf_firmware_global_input(global_iface, ++ GLB_ACK_IRQ_MASK, ack_irq_mask); + -+static int scheduler_group_schedule(struct kbase_queue_group *group); -+static void remove_group_from_idle_wait(struct kbase_queue_group *const group); -+static -+void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, -+ struct kbase_queue_group *const group, -+ enum kbase_csf_group_state run_state); -+static struct kbase_queue_group *scheduler_get_protm_enter_async_group( -+ struct kbase_device *const kbdev, -+ struct kbase_queue_group *const group); -+static struct kbase_queue_group *get_tock_top_group( -+ struct kbase_csf_scheduler *const scheduler); -+static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev); -+static int suspend_active_queue_groups(struct kbase_device *kbdev, -+ unsigned long *slot_mask); -+static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, -+ bool system_suspend); -+static void schedule_in_cycle(struct kbase_queue_group *group, bool force); -+static bool queue_group_scheduled_locked(struct kbase_queue_group *group); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + -+#define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++} + +/** -+ * wait_for_dump_complete_on_group_deschedule() - Wait for dump on fault and -+ * scheduling tick/tock to complete before the group deschedule. ++ * global_init_on_boot - Sends a global request to control various features. + * -+ * @group: Pointer to the group that is being descheduled. ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * -+ * This function blocks the descheduling of the group until the dump on fault is -+ * completed and scheduling tick/tock has completed. -+ * To deschedule an on slot group CSG termination request would be sent and that -+ * might time out if the fault had occurred and also potentially affect the state -+ * being dumped. Moreover the scheduler lock would be held, so the access to debugfs -+ * files would get blocked. -+ * Scheduler lock and 'kctx->csf.lock' are released before this function starts -+ * to wait. When a request sent by the Scheduler to the FW times out, Scheduler -+ * would also wait for the dumping to complete and release the Scheduler lock -+ * before the wait. Meanwhile Userspace can try to delete the group, this function -+ * would ensure that the group doesn't exit the Scheduler until scheduling -+ * tick/tock has completed. Though very unlikely, group deschedule can be triggered -+ * from multiple threads around the same time and after the wait Userspace thread -+ * can win the race and get the group descheduled and free the memory for group -+ * pointer before the other threads wake up and notice that group has already been -+ * descheduled. To avoid the freeing in such a case, a sort of refcount is used -+ * for the group which is incremented & decremented across the wait. ++ * Currently only the request to enable endpoints and cycle counter is sent. ++ * ++ * Return: 0 on success, or negative on failure. + */ -+static -+void wait_for_dump_complete_on_group_deschedule(struct kbase_queue_group *group) ++static int global_init_on_boot(struct kbase_device *const kbdev) +{ -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ struct kbase_device *kbdev = group->kctx->kbdev; -+ struct kbase_context *kctx = group->kctx; -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ unsigned long flags; ++ u64 core_mask; + -+ lockdep_assert_held(&kctx->csf.lock); -+ lockdep_assert_held(&scheduler->lock); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ core_mask = kbase_pm_ca_get_core_mask(kbdev); ++ kbdev->csf.firmware_hctl_core_pwr = ++ kbase_pm_no_mcu_core_pwroff(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev))) -+ return; ++ global_init(kbdev, core_mask); + -+ while ((!kbase_debug_csf_fault_dump_complete(kbdev) || -+ (scheduler->state == SCHED_BUSY)) && -+ queue_group_scheduled_locked(group)) { -+ group->deschedule_deferred_cnt++; -+ mutex_unlock(&scheduler->lock); -+ mutex_unlock(&kctx->csf.lock); -+ kbase_debug_csf_fault_wait_completion(kbdev); -+ mutex_lock(&kctx->csf.lock); -+ mutex_lock(&scheduler->lock); -+ group->deschedule_deferred_cnt--; -+ } -+#endif ++ return wait_for_global_request(kbdev, CSF_GLB_REQ_CFG_MASK); +} + -+/** -+ * schedule_actions_trigger_df() - Notify the client about the fault and -+ * wait for the dumping to complete. -+ * -+ * @kbdev: Pointer to the device -+ * @kctx: Pointer to the context associated with the CSG slot for which -+ * the timeout was seen. -+ * @error: Error code indicating the type of timeout that occurred. -+ * -+ * This function notifies the Userspace client waiting for the faults and wait -+ * for the Client to complete the dumping. -+ * The function is called only from Scheduling tick/tock when a request sent by -+ * the Scheduler to FW times out or from the protm event work item of the group -+ * when the protected mode entry request times out. -+ * In the latter case there is no wait done as scheduler lock would be released -+ * immediately. In the former case the function waits and releases the scheduler -+ * lock before the wait. It has been ensured that the Scheduler view of the groups -+ * won't change meanwhile, so no group can enter/exit the Scheduler, become -+ * runnable or go off slot. -+ */ -+static void schedule_actions_trigger_df(struct kbase_device *kbdev, -+ struct kbase_context *kctx, enum dumpfault_error_type error) ++void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, ++ u64 core_mask) +{ -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ lockdep_assert_held(&scheduler->lock); ++ kbdev->csf.glb_init_request_pending = true; ++ kbdev->csf.firmware_hctl_core_pwr = ++ kbase_pm_no_mcu_core_pwroff(kbdev); ++ global_init(kbdev, core_mask); ++} + -+ if (!kbase_debug_csf_fault_notify(kbdev, kctx, error)) -+ return; ++bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ WARN_ON(!kbdev->csf.glb_init_request_pending); + -+ if (unlikely(scheduler->state != SCHED_BUSY)) { -+ WARN_ON(error != DF_PROTECTED_MODE_ENTRY_FAILURE); -+ return; -+ } ++ if (global_request_complete(kbdev, CSF_GLB_REQ_CFG_MASK)) ++ kbdev->csf.glb_init_request_pending = false; + -+ mutex_unlock(&scheduler->lock); -+ kbase_debug_csf_fault_wait_completion(kbdev); -+ mutex_lock(&scheduler->lock); -+ WARN_ON(scheduler->state != SCHED_BUSY); -+#endif ++ return !kbdev->csf.glb_init_request_pending; +} + -+#ifdef KBASE_PM_RUNTIME -+/** -+ * wait_for_scheduler_to_exit_sleep() - Wait for Scheduler to exit the -+ * sleeping state. -+ * -+ * @kbdev: Pointer to the device -+ * -+ * This function waits until the Scheduler has exited the sleep state and -+ * it is called when an on-slot group is terminated or when the suspend -+ * buffer of an on-slot group needs to be captured. -+ * -+ * Return: 0 when the wait is successful, otherwise an error code. -+ */ -+static int wait_for_scheduler_to_exit_sleep(struct kbase_device *kbdev) ++void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, ++ bool update_core_pwroff_timer, bool update_core_mask, u64 core_mask) +{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ int autosuspend_delay = kbdev->dev->power.autosuspend_delay; -+ unsigned int sleep_exit_wait_time; -+ long remaining; -+ int ret = 0; -+ -+ lockdep_assert_held(&scheduler->lock); -+ WARN_ON(scheduler->state != SCHED_SLEEPING); -+ -+ /* No point in waiting if autosuspend_delay value is negative. -+ * For the negative value of autosuspend_delay Driver will directly -+ * go for the suspend of Scheduler, but the autosuspend_delay value -+ * could have been changed after the sleep was initiated. -+ */ -+ if (autosuspend_delay < 0) -+ return -EINVAL; ++ unsigned long flags; + -+ if (autosuspend_delay > MAX_AUTO_SUSPEND_DELAY_MS) -+ autosuspend_delay = MAX_AUTO_SUSPEND_DELAY_MS; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* Usually Scheduler would remain in sleeping state until the -+ * auto-suspend timer expires and all active CSGs are suspended. -+ */ -+ sleep_exit_wait_time = autosuspend_delay + kbdev->reset_timeout_ms; ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ if (update_core_mask) ++ enable_endpoints_global(&kbdev->csf.global_iface, core_mask); ++ if (update_core_pwroff_timer) ++ enable_shader_poweroff_timer(kbdev, &kbdev->csf.global_iface); + -+ remaining = kbase_csf_timeout_in_jiffies(sleep_exit_wait_time); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++} + -+ while ((scheduler->state == SCHED_SLEEPING) && !ret) { -+ mutex_unlock(&scheduler->lock); -+ remaining = wait_event_timeout( -+ kbdev->csf.event_wait, -+ (scheduler->state != SCHED_SLEEPING), -+ remaining); -+ mutex_lock(&scheduler->lock); -+ if (!remaining && (scheduler->state == SCHED_SLEEPING)) -+ ret = -ETIMEDOUT; -+ } ++bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ return ret; ++ return global_request_complete(kbdev, GLB_REQ_CFG_ALLOC_EN_MASK | ++ GLB_REQ_CFG_PWROFF_TIMER_MASK); +} + -+/** -+ * force_scheduler_to_exit_sleep() - Force scheduler to exit sleep state -+ * -+ * @kbdev: Pointer to the device -+ * -+ * This function will force the Scheduler to exit the sleep state by doing the -+ * wake up of MCU and suspension of on-slot groups. It is called at the time of -+ * system suspend. -+ * -+ * Return: 0 on success. -+ */ -+static int force_scheduler_to_exit_sleep(struct kbase_device *kbdev) ++static void kbase_csf_firmware_reload_worker(struct work_struct *work) +{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct kbase_device *kbdev = container_of(work, struct kbase_device, ++ csf.firmware_reload_work); + unsigned long flags; -+ int ret = 0; -+ -+ lockdep_assert_held(&scheduler->lock); -+ WARN_ON(scheduler->state != SCHED_SLEEPING); -+ WARN_ON(!kbdev->pm.backend.gpu_sleep_mode_active); -+ -+ kbase_pm_lock(kbdev); -+ ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev); -+ kbase_pm_unlock(kbdev); -+ if (ret) { -+ dev_warn(kbdev->dev, -+ "[%llu] Wait for MCU wake up failed on forced scheduler suspend", -+ kbase_backend_get_cycle_cnt(kbdev)); -+ goto out; -+ } + -+ ret = suspend_active_groups_on_powerdown(kbdev, true); -+ if (ret) -+ goto out; ++ /* Reboot the firmware */ ++ kbase_csf_firmware_enable_mcu(kbdev); + -+ kbase_pm_lock(kbdev); ++ /* Tell MCU state machine to transit to next state */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->pm.backend.gpu_sleep_mode_active = false; -+ kbdev->pm.backend.gpu_wakeup_override = false; ++ kbdev->csf.firmware_reloaded = true; + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ ret = kbase_pm_wait_for_desired_state(kbdev); -+ kbase_pm_unlock(kbdev); -+ if (ret) { -+ dev_warn(kbdev->dev, -+ "[%llu] Wait for pm state change failed on forced scheduler suspend", -+ kbase_backend_get_cycle_cnt(kbdev)); -+ goto out; -+ } -+ -+ scheduler->state = SCHED_SUSPENDED; -+ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); ++} + -+ return 0; ++void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+out: -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->pm.backend.exit_gpu_sleep_mode = true; -+ kbdev->pm.backend.gpu_wakeup_override = false; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ kbase_csf_scheduler_invoke_tick(kbdev); ++ kbdev->csf.firmware_reloaded = false; + -+ return ret; ++ if (kbdev->csf.firmware_reload_needed) { ++ kbdev->csf.firmware_reload_needed = false; ++ queue_work(system_wq, &kbdev->csf.firmware_reload_work); ++ } else { ++ kbase_csf_firmware_enable_mcu(kbdev); ++ kbdev->csf.firmware_reloaded = true; ++ } +} -+#endif + -+/** -+ * tick_timer_callback() - Callback function for the scheduling tick hrtimer -+ * -+ * @timer: Pointer to the scheduling tick hrtimer -+ * -+ * This function will enqueue the scheduling tick work item for immediate -+ * execution, if it has not been queued already. -+ * -+ * Return: enum value to indicate that timer should not be restarted. -+ */ -+static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer) ++void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev = container_of(timer, struct kbase_device, -+ csf.scheduler.tick_timer); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ kbase_csf_scheduler_tick_advance(kbdev); -+ return HRTIMER_NORESTART; ++ if (unlikely(!kbdev->csf.firmware_inited)) ++ return; ++ ++ /* Tell MCU state machine to transit to next state */ ++ kbdev->csf.firmware_reloaded = true; ++ kbase_pm_update_state(kbdev); +} + -+/** -+ * start_tick_timer() - Start the scheduling tick hrtimer. -+ * -+ * @kbdev: Pointer to the device -+ * -+ * This function will start the scheduling tick hrtimer and is supposed to -+ * be called only from the tick work item function. The tick hrtimer should -+ * not be active already. -+ */ -+static void start_tick_timer(struct kbase_device *kbdev) ++static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms) +{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ unsigned long flags; -+ -+ lockdep_assert_held(&scheduler->lock); ++#define HYSTERESIS_VAL_UNIT_SHIFT (10) ++ /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ ++ u64 freq = arch_timer_get_cntfrq(); ++ u64 dur_val = dur_ms; ++ u32 cnt_val_u32, reg_val_u32; ++ bool src_system_timestamp = freq > 0; + -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); -+ WARN_ON(scheduler->tick_timer_active); -+ if (likely(!work_pending(&scheduler->tick_work))) { -+ scheduler->tick_timer_active = true; ++ if (!src_system_timestamp) { ++ /* Get the cycle_counter source alternative */ ++ spin_lock(&kbdev->pm.clk_rtm.lock); ++ if (kbdev->pm.clk_rtm.clks[0]) ++ freq = kbdev->pm.clk_rtm.clks[0]->clock_val; ++ else ++ dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!"); ++ spin_unlock(&kbdev->pm.clk_rtm.lock); + -+ hrtimer_start(&scheduler->tick_timer, -+ HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms), -+ HRTIMER_MODE_REL); ++ dev_info( ++ kbdev->dev, ++ "Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!"); + } -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); -+} + -+/** -+ * cancel_tick_timer() - Cancel the scheduling tick hrtimer -+ * -+ * @kbdev: Pointer to the device -+ */ -+static void cancel_tick_timer(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ unsigned long flags; ++ /* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */ ++ dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; ++ dur_val = div_u64(dur_val, 1000); + -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); -+ scheduler->tick_timer_active = false; -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); -+ hrtimer_cancel(&scheduler->tick_timer); ++ /* Interface limits the value field to S32_MAX */ ++ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; ++ ++ reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32); ++ /* add the source flag */ ++ if (src_system_timestamp) ++ reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, ++ GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); ++ else ++ reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, ++ GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER); ++ ++ return reg_val_u32; +} + -+/** -+ * enqueue_tick_work() - Enqueue the scheduling tick work item -+ * -+ * @kbdev: Pointer to the device -+ * -+ * This function will queue the scheduling tick work item for immediate -+ * execution. This shall only be called when both the tick hrtimer and tick -+ * work item are not active/pending. -+ */ -+static void enqueue_tick_work(struct kbase_device *kbdev) ++u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) +{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ unsigned long flags; ++ u32 dur; + -+ lockdep_assert_held(&scheduler->lock); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ dur = kbdev->csf.gpu_idle_hysteresis_us; ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); + -+ kbase_csf_scheduler_invoke_tick(kbdev); ++ return dur; +} + -+static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr) ++u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur) +{ -+ WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); ++ unsigned long flags; ++ const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur); + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); -+ clear_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap); -+} ++ /* The 'fw_load_lock' is taken to synchronize against the deferred ++ * loading of FW, where the idle timer will be enabled. ++ */ ++ mutex_lock(&kbdev->fw_load_lock); ++ if (unlikely(!kbdev->csf.firmware_inited)) { ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbdev->csf.gpu_idle_hysteresis_us = dur; ++ kbdev->csf.gpu_idle_dur_count = hysteresis_val; ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ mutex_unlock(&kbdev->fw_load_lock); ++ goto end; ++ } ++ mutex_unlock(&kbdev->fw_load_lock); + -+static int acquire_doorbell(struct kbase_device *kbdev) -+{ -+ int doorbell_nr; ++ kbase_csf_scheduler_pm_active(kbdev); ++ if (kbase_csf_scheduler_wait_mcu_active(kbdev)) { ++ dev_err(kbdev->dev, ++ "Unable to activate the MCU, the idle hysteresis value shall remain unchanged"); ++ kbase_csf_scheduler_pm_idle(kbdev); ++ return kbdev->csf.gpu_idle_dur_count; ++ } + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ /* The 'reg_lock' is also taken and is held till the update is not ++ * complete, to ensure the update of idle timer value by multiple Users ++ * gets serialized. ++ */ ++ mutex_lock(&kbdev->csf.reg_lock); ++ /* The firmware only reads the new idle timer value when the timer is ++ * disabled. ++ */ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbase_csf_firmware_disable_gpu_idle_timer(kbdev); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ /* Ensure that the request has taken effect */ ++ wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); + -+ doorbell_nr = find_first_zero_bit( -+ kbdev->csf.scheduler.doorbell_inuse_bitmap, -+ CSF_NUM_DOORBELL); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbdev->csf.gpu_idle_hysteresis_us = dur; ++ kbdev->csf.gpu_idle_dur_count = hysteresis_val; ++ kbase_csf_firmware_enable_gpu_idle_timer(kbdev); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK); ++ mutex_unlock(&kbdev->csf.reg_lock); + -+ if (doorbell_nr >= CSF_NUM_DOORBELL) -+ return KBASEP_USER_DB_NR_INVALID; ++ kbase_csf_scheduler_pm_idle(kbdev); + -+ set_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap); ++end: ++ dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x", ++ hysteresis_val); + -+ return doorbell_nr; ++ return hysteresis_val; +} + -+static void unassign_user_doorbell_from_group(struct kbase_device *kbdev, -+ struct kbase_queue_group *group) ++static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us) +{ -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ ++ u64 freq = arch_timer_get_cntfrq(); ++ u64 dur_val = dur_us; ++ u32 cnt_val_u32, reg_val_u32; ++ bool src_system_timestamp = freq > 0; + -+ if (group->doorbell_nr != KBASEP_USER_DB_NR_INVALID) { -+ release_doorbell(kbdev, group->doorbell_nr); -+ group->doorbell_nr = KBASEP_USER_DB_NR_INVALID; ++ if (!src_system_timestamp) { ++ /* Get the cycle_counter source alternative */ ++ spin_lock(&kbdev->pm.clk_rtm.lock); ++ if (kbdev->pm.clk_rtm.clks[0]) ++ freq = kbdev->pm.clk_rtm.clks[0]->clock_val; ++ else ++ dev_warn(kbdev->dev, "No GPU clock, unexpected integration issue!"); ++ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ ++ dev_info( ++ kbdev->dev, ++ "Can't get the timestamp frequency, use cycle counter with MCU shader Core Poweroff timer!"); + } ++ ++ /* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */ ++ dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; ++ dur_val = div_u64(dur_val, 1000000); ++ ++ /* Interface limits the value field to S32_MAX */ ++ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; ++ ++ reg_val_u32 = GLB_PWROFF_TIMER_TIMEOUT_SET(0, cnt_val_u32); ++ /* add the source flag */ ++ if (src_system_timestamp) ++ reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, ++ GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); ++ else ++ reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, ++ GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER); ++ ++ return reg_val_u32; +} + -+static void unassign_user_doorbell_from_queue(struct kbase_device *kbdev, -+ struct kbase_queue *queue) ++u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev) +{ -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); -+ -+ mutex_lock(&kbdev->csf.reg_lock); ++ u32 pwroff; ++ unsigned long flags; + -+ if (queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID) { -+ queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID; -+ /* After this the dummy page would be mapped in */ -+ unmap_mapping_range(kbdev->csf.db_filp->f_inode->i_mapping, -+ queue->db_file_offset << PAGE_SHIFT, PAGE_SIZE, 1); -+ } ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ pwroff = kbdev->csf.mcu_core_pwroff_dur_us; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ mutex_unlock(&kbdev->csf.reg_lock); ++ return pwroff; +} + -+static void assign_user_doorbell_to_group(struct kbase_device *kbdev, -+ struct kbase_queue_group *group) ++u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur) +{ -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ unsigned long flags; ++ const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur); + -+ if (group->doorbell_nr == KBASEP_USER_DB_NR_INVALID) -+ group->doorbell_nr = acquire_doorbell(kbdev); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->csf.mcu_core_pwroff_dur_us = dur; ++ kbdev->csf.mcu_core_pwroff_dur_count = pwroff; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff); ++ ++ return pwroff; +} + -+static void assign_user_doorbell_to_queue(struct kbase_device *kbdev, -+ struct kbase_queue *const queue) ++int kbase_csf_firmware_early_init(struct kbase_device *kbdev) +{ -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ init_waitqueue_head(&kbdev->csf.event_wait); ++ kbdev->csf.interrupt_received = false; + -+ mutex_lock(&kbdev->csf.reg_lock); ++ kbdev->csf.fw_timeout_ms = ++ kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT); + -+ /* If bind operation for the queue hasn't completed yet, then the -+ * CSI can't be programmed for the queue -+ * (even in stopped state) and so the doorbell also can't be assigned -+ * to it. -+ */ -+ if ((queue->bind_state == KBASE_CSF_QUEUE_BOUND) && -+ (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)) { -+ WARN_ON(queue->group->doorbell_nr == KBASEP_USER_DB_NR_INVALID); -+ queue->doorbell_nr = queue->group->doorbell_nr; ++ INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces); ++ INIT_LIST_HEAD(&kbdev->csf.firmware_config); ++ INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list); ++ INIT_LIST_HEAD(&kbdev->csf.user_reg.list); ++ INIT_WORK(&kbdev->csf.firmware_reload_work, ++ kbase_csf_firmware_reload_worker); ++ INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); + -+ /* After this the real Hw doorbell page would be mapped in */ -+ unmap_mapping_range( -+ kbdev->csf.db_filp->f_inode->i_mapping, -+ queue->db_file_offset << PAGE_SHIFT, -+ PAGE_SIZE, 1); -+ } ++ mutex_init(&kbdev->csf.reg_lock); + -+ mutex_unlock(&kbdev->csf.reg_lock); ++ return 0; +} + -+static void scheduler_doorbell_init(struct kbase_device *kbdev) ++void kbase_csf_firmware_early_term(struct kbase_device *kbdev) +{ -+ int doorbell_nr; -+ -+ bitmap_zero(kbdev->csf.scheduler.doorbell_inuse_bitmap, -+ CSF_NUM_DOORBELL); ++ mutex_destroy(&kbdev->csf.reg_lock); ++} + -+ mutex_lock(&kbdev->csf.scheduler.lock); -+ /* Reserve doorbell 0 for use by kernel driver */ -+ doorbell_nr = acquire_doorbell(kbdev); -+ mutex_unlock(&kbdev->csf.scheduler.lock); ++int kbase_csf_firmware_late_init(struct kbase_device *kbdev) ++{ ++ kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC; ++#ifdef KBASE_PM_RUNTIME ++ if (kbase_pm_gpu_sleep_allowed(kbdev)) ++ kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; ++#endif ++ WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us); ++ kbdev->csf.gpu_idle_dur_count = ++ convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us); + -+ WARN_ON(doorbell_nr != CSF_KERNEL_DOORBELL_NR); ++ return 0; +} + -+/** -+ * update_on_slot_queues_offsets - Update active queues' INSERT & EXTRACT ofs -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * -+ * This function updates the EXTRACT offset for all queues which groups have -+ * been assigned a physical slot. These values could be used to detect a -+ * queue's true idleness status. This is intended to be an additional check -+ * on top of the GPU idle notification to account for race conditions. -+ * This function is supposed to be called only when GPU idle notification -+ * interrupt is received. -+ */ -+static void update_on_slot_queues_offsets(struct kbase_device *kbdev) ++int kbase_csf_firmware_load_init(struct kbase_device *kbdev) +{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ /* All CSGs have the same number of CSs */ -+ size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num; -+ size_t i; ++ int ret; + -+ lockdep_assert_held(&scheduler->interrupt_lock); ++ lockdep_assert_held(&kbdev->fw_load_lock); + -+ /* csg_slots_idle_mask is not used here for the looping, as it could get -+ * updated concurrently when Scheduler re-evaluates the idle status of -+ * the CSGs for which idle notification was received previously. -+ */ -+ for_each_set_bit(i, scheduler->csg_inuse_bitmap, kbdev->csf.global_iface.group_num) { -+ struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group; -+ size_t j; ++ if (WARN_ON((kbdev->as_free & MCU_AS_BITMASK) == 0)) ++ return -EINVAL; ++ kbdev->as_free &= ~MCU_AS_BITMASK; + -+ if (WARN_ON(!group)) -+ continue; ++ ret = kbase_mmu_init(kbdev, &kbdev->csf.mcu_mmu, NULL, ++ BASE_MEM_GROUP_DEFAULT); + -+ for (j = 0; j < max_streams; ++j) { -+ struct kbase_queue *const queue = group->bound_queues[j]; ++ if (ret != 0) { ++ /* Release the address space */ ++ kbdev->as_free |= MCU_AS_BITMASK; ++ return ret; ++ } + -+ if (queue) { -+ if (queue->user_io_addr) { -+ u64 const *const output_addr = -+ (u64 const *)(queue->user_io_addr + PAGE_SIZE); ++ ret = kbase_mcu_shared_interface_region_tracker_init(kbdev); ++ if (ret != 0) { ++ dev_err(kbdev->dev, ++ "Failed to setup the rb tree for managing shared interface segment\n"); ++ goto error; ++ } + -+ queue->extract_ofs = -+ output_addr[CS_EXTRACT_LO / sizeof(u64)]; -+ } else { -+ dev_warn(kbdev->dev, -+ "%s(): queue->user_io_addr is NULL, queue: %p", -+ __func__, -+ queue); -+ } -+ } -+ } ++ ret = invent_memory_setup_entry(kbdev); ++ if (ret != 0) { ++ dev_err(kbdev->dev, "Failed to load firmware entry\n"); ++ goto error; + } -+} + -+static void enqueue_gpu_idle_work(struct kbase_csf_scheduler *const scheduler) -+{ -+ atomic_set(&scheduler->gpu_no_longer_idle, false); -+ queue_work(scheduler->idle_wq, &scheduler->gpu_idle_work); -+} ++ /* Make sure L2 cache is powered up */ ++ kbase_pm_wait_for_l2_powered(kbdev); + -+void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ int non_idle_offslot_grps; -+ bool can_suspend_on_idle; ++ /* NO_MALI: Don't init trace buffers */ + -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ lockdep_assert_held(&scheduler->interrupt_lock); ++ /* NO_MALI: Don't load the MMU tables or boot CSF firmware */ + -+ non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps); -+ can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev); -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND, NULL, -+ ((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32)); ++ ret = invent_capabilities(kbdev); ++ if (ret != 0) ++ goto error; + -+ if (!non_idle_offslot_grps) { -+ if (can_suspend_on_idle) { -+ /* fast_gpu_idle_handling is protected by the -+ * interrupt_lock, which would prevent this from being -+ * updated whilst gpu_idle_worker() is executing. -+ */ -+ scheduler->fast_gpu_idle_handling = -+ (kbdev->csf.gpu_idle_hysteresis_us == 0) || -+ !kbase_csf_scheduler_all_csgs_idle(kbdev); ++ ret = kbase_csf_doorbell_mapping_init(kbdev); ++ if (ret != 0) ++ goto error; + -+ /* The GPU idle worker relies on update_on_slot_queues_offsets() to have -+ * finished. It's queued before to reduce the time it takes till execution -+ * but it'll eventually be blocked by the scheduler->interrupt_lock. -+ */ -+ enqueue_gpu_idle_work(scheduler); ++ ret = kbase_csf_setup_dummy_user_reg_page(kbdev); ++ if (ret != 0) ++ goto error; + -+ /* The extract offsets are unused in fast GPU idle handling */ -+ if (!scheduler->fast_gpu_idle_handling) -+ update_on_slot_queues_offsets(kbdev); -+ } -+ } else { -+ /* Advance the scheduling tick to get the non-idle suspended groups loaded soon */ -+ kbase_csf_scheduler_tick_advance_nolock(kbdev); -+ } -+} ++ ret = kbase_csf_scheduler_init(kbdev); ++ if (ret != 0) ++ goto error; + -+u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev) -+{ -+ u32 nr_active_csgs; ++ ret = kbase_csf_timeout_init(kbdev); ++ if (ret != 0) ++ goto error; + -+ lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); ++ ret = global_init_on_boot(kbdev); ++ if (ret != 0) ++ goto error; + -+ nr_active_csgs = bitmap_weight(kbdev->csf.scheduler.csg_inuse_bitmap, -+ kbdev->csf.global_iface.group_num); ++ return 0; + -+ return nr_active_csgs; ++error: ++ kbase_csf_firmware_unload_term(kbdev); ++ return ret; +} + -+u32 kbase_csf_scheduler_get_nr_active_csgs(struct kbase_device *kbdev) ++void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) +{ -+ u32 nr_active_csgs; -+ unsigned long flags; ++ cancel_work_sync(&kbdev->csf.fw_error_work); + -+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); -+ nr_active_csgs = kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev); -+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); ++ kbase_csf_timeout_term(kbdev); + -+ return nr_active_csgs; -+} ++ /* NO_MALI: Don't stop firmware or unload MMU tables */ + -+/** -+ * csg_slot_in_use - returns true if a queue group has been programmed on a -+ * given CSG slot. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @slot: Index/number of the CSG slot in question. -+ * -+ * Return: the interface is actively engaged flag. -+ * -+ * Note: Caller must hold the scheduler lock. -+ */ -+static inline bool csg_slot_in_use(struct kbase_device *kbdev, int slot) -+{ -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ kbase_csf_scheduler_term(kbdev); + -+ return (kbdev->csf.scheduler.csg_slots[slot].resident_group != NULL); -+} ++ kbase_csf_free_dummy_user_reg_page(kbdev); + -+static bool queue_group_suspended_locked(struct kbase_queue_group *group) -+{ -+ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); ++ kbase_csf_doorbell_mapping_term(kbdev); + -+ return (group->run_state == KBASE_CSF_GROUP_SUSPENDED || -+ group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE || -+ group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC); -+} ++ free_global_iface(kbdev); + -+static bool queue_group_idle_locked(struct kbase_queue_group *group) -+{ -+ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); ++ /* Release the address space */ ++ kbdev->as_free |= MCU_AS_BITMASK; + -+ return (group->run_state == KBASE_CSF_GROUP_IDLE || -+ group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE); -+} ++ while (!list_empty(&kbdev->csf.firmware_interfaces)) { ++ struct dummy_firmware_interface *interface; + -+static bool on_slot_group_idle_locked(struct kbase_queue_group *group) -+{ -+ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); ++ interface = list_first_entry(&kbdev->csf.firmware_interfaces, ++ struct dummy_firmware_interface, node); ++ list_del(&interface->node); + -+ return (group->run_state == KBASE_CSF_GROUP_IDLE); -+} ++ /* NO_MALI: No cleanup in dummy interface necessary */ + -+static bool can_schedule_idle_group(struct kbase_queue_group *group) -+{ -+ return (on_slot_group_idle_locked(group) || -+ (group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME)); -+} ++ kfree(interface); ++ } + -+static bool queue_group_scheduled(struct kbase_queue_group *group) -+{ -+ return (group->run_state != KBASE_CSF_GROUP_INACTIVE && -+ group->run_state != KBASE_CSF_GROUP_TERMINATED && -+ group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED); -+} ++ /* NO_MALI: No trace buffers to terminate */ + -+static bool queue_group_scheduled_locked(struct kbase_queue_group *group) -+{ -+ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); ++ /* This will also free up the region allocated for the shared interface ++ * entry parsed from the firmware image. ++ */ ++ kbase_mcu_shared_interface_region_tracker_term(kbdev); + -+ return queue_group_scheduled(group); ++ kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu); +} + -+/** -+ * scheduler_protm_wait_quit() - Wait for GPU to exit protected mode. -+ * -+ * @kbdev: Pointer to the GPU device -+ * -+ * This function waits for the GPU to exit protected mode which is confirmed -+ * when active_protm_grp is set to NULL. -+ * -+ * Return: true on success, false otherwise. -+ */ -+static bool scheduler_protm_wait_quit(struct kbase_device *kbdev) ++void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev) +{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); -+ long remaining; -+ bool success = true; ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ const u32 glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); + -+ lockdep_assert_held(&scheduler->lock); ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ /* The scheduler is assumed to only call the enable when its internal ++ * state indicates that the idle timer has previously been disabled. So ++ * on entry the expected field values are: ++ * 1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0 ++ * 2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0 ++ */ ++ if (glb_req & GLB_REQ_IDLE_ENABLE_MASK) ++ dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!"); + -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_START, NULL, jiffies_to_msecs(wt)); ++ enable_gpu_idle_timer(kbdev); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++} + -+ remaining = wait_event_timeout(kbdev->csf.event_wait, -+ !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt); ++void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + -+ if (unlikely(!remaining)) { -+ struct kbase_queue_group *group = kbdev->csf.scheduler.active_protm_grp; -+ struct kbase_context *kctx = group ? group->kctx : NULL; ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); + -+ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms), protm_quit wait skipped", -+ kbase_backend_get_cycle_cnt(kbdev), -+ kbdev->csf.fw_timeout_ms); -+ schedule_actions_trigger_df(kbdev, kctx, DF_PROTECTED_MODE_EXIT_TIMEOUT); -+ success = false; -+ } ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, ++ GLB_REQ_REQ_IDLE_DISABLE, ++ GLB_REQ_IDLE_DISABLE_MASK); + -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_END, NULL, jiffies_to_msecs(remaining)); ++ dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer"); + -+ return success; ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); +} + -+/** -+ * scheduler_force_protm_exit() - Force GPU to exit protected mode. -+ * -+ * @kbdev: Pointer to the GPU device -+ * -+ * This function sends a ping request to the firmware and waits for the GPU -+ * to exit protected mode. -+ * -+ * If the GPU does not exit protected mode, it is considered as hang. -+ * A GPU reset would then be triggered. -+ */ -+static void scheduler_force_protm_exit(struct kbase_device *kbdev) ++void kbase_csf_firmware_ping(struct kbase_device *const kbdev) +{ ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; + unsigned long flags; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ set_global_request(global_iface, GLB_REQ_PING_MASK); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++} + ++int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int wait_timeout_ms) ++{ ++ CSTD_UNUSED(wait_timeout_ms); + kbase_csf_firmware_ping(kbdev); ++ return wait_for_global_request(kbdev, GLB_REQ_PING_MASK); ++} + -+ if (scheduler_protm_wait_quit(kbdev)) -+ return; ++int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, ++ u64 const timeout) ++{ ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ unsigned long flags; ++ int err; + -+ dev_err(kbdev->dev, "Possible GPU hang in Protected mode"); ++ /* The 'reg_lock' is also taken and is held till the update is not ++ * complete, to ensure the update of timeout value by multiple Users ++ * gets serialized. ++ */ ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ set_timeout_global(global_iface, timeout); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); + -+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); -+ if (kbdev->csf.scheduler.active_protm_grp) { -+ dev_err(kbdev->dev, -+ "Group-%d of context %d_%d ran in protected mode for too long on slot %d", -+ kbdev->csf.scheduler.active_protm_grp->handle, -+ kbdev->csf.scheduler.active_protm_grp->kctx->tgid, -+ kbdev->csf.scheduler.active_protm_grp->kctx->id, -+ kbdev->csf.scheduler.active_protm_grp->csg_nr); -+ } -+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); ++ err = wait_for_global_request(kbdev, GLB_REQ_CFG_PROGRESS_TIMER_MASK); ++ mutex_unlock(&kbdev->csf.reg_lock); + -+ /* The GPU could be stuck in Protected mode. To prevent a hang, -+ * a GPU reset is performed. -+ */ -+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) -+ kbase_reset_gpu(kbdev); ++ return err; +} + -+/** -+ * scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up -+ * automatically for periodic tasks. -+ * -+ * @kbdev: Pointer to the device -+ * -+ * This is a variant of kbase_csf_scheduler_timer_is_enabled() that assumes the -+ * CSF scheduler lock to already have been held. -+ * -+ * Return: true if the scheduler is configured to wake up periodically -+ */ -+static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev) ++void kbase_csf_enter_protected_mode(struct kbase_device *kbdev) +{ -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + -+ return kbdev->csf.scheduler.timer_enabled; ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK); ++ dev_dbg(kbdev->dev, "Sending request to enter protected mode"); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); +} + -+/** -+ * scheduler_pm_active_handle_suspend() - Acquire the PM reference count for -+ * Scheduler -+ * -+ * @kbdev: Pointer to the device -+ * @suspend_handler: Handler code for how to handle a suspend that might occur. -+ * -+ * This function is usually called when Scheduler needs to be activated. -+ * The PM reference count is acquired for the Scheduler and the power on -+ * of GPU is initiated. -+ * -+ * Return: 0 if successful or a negative error code on failure. -+ */ -+static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev, -+ enum kbase_pm_suspend_handler suspend_handler) ++int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev) +{ -+ unsigned long flags; -+ u32 prev_count; -+ int ret = 0; -+ -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ prev_count = kbdev->csf.scheduler.pm_active_count; -+ if (!WARN_ON(prev_count == U32_MAX)) -+ kbdev->csf.scheduler.pm_active_count++; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ int err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK); + -+ /* On 0 => 1, make a pm_ctx_active request */ -+ if (!prev_count) { -+ ret = kbase_pm_context_active_handle_suspend(kbdev, -+ suspend_handler); -+ /* Invoke the PM state machines again as the change in MCU -+ * desired status, due to the update of scheduler.pm_active_count, -+ * may be missed by the thread that called pm_wait_for_desired_state() -+ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ if (ret) -+ kbdev->csf.scheduler.pm_active_count--; -+ kbase_pm_update_state(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (err) { ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(kbdev); + } + -+ return ret; ++ return err; +} + -+#ifdef KBASE_PM_RUNTIME -+/** -+ * scheduler_pm_active_after_sleep() - Acquire the PM reference count for -+ * Scheduler -+ * -+ * @kbdev: Pointer to the device -+ * @flags: Pointer to the flags variable containing the interrupt state -+ * when hwaccess lock was acquired. -+ * -+ * This function is called when Scheduler needs to be activated from the -+ * sleeping state. -+ * The PM reference count is acquired for the Scheduler and the wake up of -+ * MCU is initiated. It resets the flag that indicates to the MCU state -+ * machine that MCU needs to be put in sleep state. -+ * -+ * Note: This function shall be called with hwaccess lock held and it may -+ * release that lock and reacquire it. -+ * -+ * Return: zero when the PM reference was taken and non-zero when the -+ * system is being suspending/suspended. -+ */ -+static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev, -+ unsigned long *flags) ++void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) +{ -+ u32 prev_count; -+ int ret = 0; ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ unsigned long flags; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ /* Validate there are no on-slot groups when sending the ++ * halt request to firmware. ++ */ ++ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev)); ++ set_global_request(global_iface, GLB_REQ_HALT_MASK); ++ dev_dbg(kbdev->dev, "Sending request to HALT MCU"); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++} + -+ prev_count = kbdev->csf.scheduler.pm_active_count; -+ if (!WARN_ON(prev_count == U32_MAX)) -+ kbdev->csf.scheduler.pm_active_count++; ++void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev) ++{ ++ /* Trigger the boot of MCU firmware, Use the AUTO mode as ++ * otherwise on fast reset, to exit protected mode, MCU will ++ * not reboot by itself to enter normal mode. ++ */ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO); ++} + -+ /* On 0 => 1, make a pm_ctx_active request */ -+ if (!prev_count) { -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, *flags); ++#ifdef KBASE_PM_RUNTIME ++void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ unsigned long flags; + -+ ret = kbase_pm_context_active_handle_suspend(kbdev, -+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ set_global_request(global_iface, GLB_REQ_SLEEP_MASK); ++ dev_dbg(kbdev->dev, "Sending sleep request to MCU"); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++} + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, *flags); -+ if (ret) -+ kbdev->csf.scheduler.pm_active_count--; -+ else -+ kbdev->pm.backend.gpu_sleep_mode_active = false; -+ kbase_pm_update_state(kbdev); -+ } ++bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ return ret; ++ return (global_request_complete(kbdev, GLB_REQ_SLEEP_MASK) && ++ kbase_csf_firmware_mcu_halted(kbdev)); +} +#endif + -+/** -+ * scheduler_pm_idle() - Release the PM reference count held by Scheduler -+ * -+ * @kbdev: Pointer to the device -+ * -+ * This function is usually called after Scheduler is suspended. -+ * The PM reference count held by the Scheduler is released to trigger the -+ * power down of GPU. -+ */ -+static void scheduler_pm_idle(struct kbase_device *kbdev) ++int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev) +{ ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + unsigned long flags; -+ u32 prev_count; ++ int err = 0; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ /* The 'reg_lock' is also taken and is held till the update is ++ * complete, to ensure the config update gets serialized. ++ */ ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ prev_count = kbdev->csf.scheduler.pm_active_count; -+ if (!WARN_ON(prev_count == 0)) -+ kbdev->csf.scheduler.pm_active_count--; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ set_global_request(global_iface, GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); ++ dev_dbg(kbdev->dev, "Sending request for FIRMWARE_CONFIG_UPDATE"); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); + -+ if (prev_count == 1) { -+ kbase_pm_context_idle(kbdev); -+ /* Invoke the PM state machines again as the change in MCU -+ * desired status, due to the update of scheduler.pm_active_count, -+ * may be missed by the thread that called pm_wait_for_desired_state() -+ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_pm_update_state(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } ++ err = wait_for_global_request(kbdev, ++ GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); ++ mutex_unlock(&kbdev->csf.reg_lock); ++ return err; +} + -+#ifdef KBASE_PM_RUNTIME +/** -+ * scheduler_pm_idle_before_sleep() - Release the PM reference count and -+ * trigger the tranistion to sleep state. ++ * copy_grp_and_stm - Copy CS and/or group data + * -+ * @kbdev: Pointer to the device ++ * @iface: Global CSF interface provided by ++ * the firmware. ++ * @group_data: Pointer where to store all the group data ++ * (sequentially). ++ * @max_group_num: The maximum number of groups to be read. Can be 0, in ++ * which case group_data is unused. ++ * @stream_data: Pointer where to store all the stream data ++ * (sequentially). ++ * @max_total_stream_num: The maximum number of streams to be read. ++ * Can be 0, in which case stream_data is unused. + * -+ * This function is called on the GPU idle notification. It releases the -+ * Scheduler's PM reference count and sets the flag to indicate to the -+ * MCU state machine that MCU needs to be put in sleep state. ++ * Return: Total number of CSs, summed across all groups. + */ -+static void scheduler_pm_idle_before_sleep(struct kbase_device *kbdev) ++static u32 copy_grp_and_stm( ++ const struct kbase_csf_global_iface * const iface, ++ struct basep_cs_group_control * const group_data, ++ u32 max_group_num, ++ struct basep_cs_stream_control * const stream_data, ++ u32 max_total_stream_num) +{ -+ unsigned long flags; -+ u32 prev_count; ++ u32 i, total_stream_num = 0; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ if (WARN_ON((max_group_num > 0) && !group_data)) ++ max_group_num = 0; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ prev_count = kbdev->csf.scheduler.pm_active_count; -+ if (!WARN_ON(prev_count == 0)) -+ kbdev->csf.scheduler.pm_active_count--; -+ kbdev->pm.backend.gpu_sleep_mode_active = true; -+ kbdev->pm.backend.exit_gpu_sleep_mode = false; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (WARN_ON((max_total_stream_num > 0) && !stream_data)) ++ max_total_stream_num = 0; + -+ if (prev_count == 1) { -+ kbase_pm_context_idle(kbdev); -+ /* Invoke the PM state machines again as the change in MCU -+ * desired status, due to the update of scheduler.pm_active_count, -+ * may be missed by the thread that called pm_wait_for_desired_state() -+ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_pm_update_state(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } ++ for (i = 0; i < iface->group_num; i++) { ++ u32 j; ++ ++ if (i < max_group_num) { ++ group_data[i].features = iface->groups[i].features; ++ group_data[i].stream_num = iface->groups[i].stream_num; ++ group_data[i].suspend_size = ++ iface->groups[i].suspend_size; ++ } ++ for (j = 0; j < iface->groups[i].stream_num; j++) { ++ if (total_stream_num < max_total_stream_num) ++ stream_data[total_stream_num].features = ++ iface->groups[i].streams[j].features; ++ total_stream_num++; ++ } ++ } ++ ++ return total_stream_num; +} -+#endif + -+static void scheduler_wakeup(struct kbase_device *kbdev, bool kick) ++u32 kbase_csf_firmware_get_glb_iface( ++ struct kbase_device *kbdev, ++ struct basep_cs_group_control *const group_data, ++ u32 const max_group_num, ++ struct basep_cs_stream_control *const stream_data, ++ u32 const max_total_stream_num, u32 *const glb_version, ++ u32 *const features, u32 *const group_num, u32 *const prfcnt_size, ++ u32 *const instr_features) +{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ int ret; -+ -+ lockdep_assert_held(&scheduler->lock); ++ const struct kbase_csf_global_iface * const iface = ++ &kbdev->csf.global_iface; + -+ if ((scheduler->state != SCHED_SUSPENDED) && -+ (scheduler->state != SCHED_SLEEPING)) -+ return; ++ if (WARN_ON(!glb_version) || WARN_ON(!features) || ++ WARN_ON(!group_num) || WARN_ON(!prfcnt_size) || ++ WARN_ON(!instr_features)) ++ return 0; + -+ if (scheduler->state == SCHED_SUSPENDED) { -+ dev_dbg(kbdev->dev, -+ "Re-activating the Scheduler after suspend"); -+ ret = scheduler_pm_active_handle_suspend(kbdev, -+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); -+ } else { -+#ifdef KBASE_PM_RUNTIME -+ unsigned long flags; ++ *glb_version = iface->version; ++ *features = iface->features; ++ *group_num = iface->group_num; ++ *prfcnt_size = iface->prfcnt_size; ++ *instr_features = iface->instr_features; + -+ dev_dbg(kbdev->dev, -+ "Re-activating the Scheduler out of sleep"); ++ return copy_grp_and_stm(iface, group_data, max_group_num, ++ stream_data, max_total_stream_num); ++} + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ ret = scheduler_pm_active_after_sleep(kbdev, &flags); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+#endif ++const char *kbase_csf_firmware_get_timeline_metadata( ++ struct kbase_device *kbdev, const char *name, size_t *size) ++{ ++ if (WARN_ON(!kbdev) || ++ WARN_ON(!name) || ++ WARN_ON(!size)) { ++ return NULL; + } + -+ if (ret) { -+ /* GPUCORE-29850 would add the handling for the case where -+ * Scheduler could not be activated due to system suspend. -+ */ -+ dev_info(kbdev->dev, -+ "Couldn't wakeup Scheduler due to system suspend"); -+ return; -+ } ++ *size = 0; ++ return NULL; ++} + -+ scheduler->state = SCHED_INACTIVE; -+ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state); ++void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev) ++{ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE); ++} + -+ if (kick) -+ scheduler_enable_tick_timer_nolock(kbdev); ++void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev) ++{ ++ /* NO_MALI: Nothing to do here */ +} + -+static void scheduler_suspend(struct kbase_device *kbdev) ++int kbase_csf_firmware_mcu_shared_mapping_init( ++ struct kbase_device *kbdev, ++ unsigned int num_pages, ++ unsigned long cpu_map_properties, ++ unsigned long gpu_map_properties, ++ struct kbase_csf_mapping *csf_mapping) +{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ struct tagged_addr *phys; ++ struct kbase_va_region *va_reg; ++ struct page **page_list; ++ void *cpu_addr; ++ int i, ret = 0; ++ pgprot_t cpu_map_prot = PAGE_KERNEL; ++ unsigned long gpu_map_prot; + -+ lockdep_assert_held(&scheduler->lock); ++ if (cpu_map_properties & PROT_READ) ++ cpu_map_prot = PAGE_KERNEL_RO; + -+ if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) { -+ dev_dbg(kbdev->dev, "Suspending the Scheduler"); -+ scheduler_pm_idle(kbdev); -+ scheduler->state = SCHED_SUSPENDED; -+ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); ++ if (kbdev->system_coherency == COHERENCY_ACE) { ++ gpu_map_prot = ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); ++ } else { ++ gpu_map_prot = ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); ++ cpu_map_prot = pgprot_writecombine(cpu_map_prot); + } -+} + -+/** -+ * update_idle_suspended_group_state() - Move the queue group to a non-idle -+ * suspended state. -+ * @group: Pointer to the queue group. -+ * -+ * This function is called to change the state of queue group to non-idle -+ * suspended state, if the group was suspended when all the queues bound to it -+ * became empty or when some queues got blocked on a sync wait & others became -+ * empty. The group is also moved to the runnable list from idle wait list in -+ * the latter case. -+ * So the function gets called when a queue is kicked or sync wait condition -+ * gets satisfied. -+ */ -+static void update_idle_suspended_group_state(struct kbase_queue_group *group) -+{ -+ struct kbase_csf_scheduler *scheduler = -+ &group->kctx->kbdev->csf.scheduler; -+ int new_val; ++ phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); ++ if (!phys) ++ goto out; + -+ lockdep_assert_held(&scheduler->lock); ++ page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL); ++ if (!page_list) ++ goto page_list_alloc_error; + -+ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) { -+ remove_group_from_idle_wait(group); -+ insert_group_to_runnable(scheduler, group, -+ KBASE_CSF_GROUP_SUSPENDED); -+ } else if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) { -+ group->run_state = KBASE_CSF_GROUP_SUSPENDED; -+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED, group, -+ group->run_state); ++ ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, ++ phys, false, NULL); ++ if (ret <= 0) ++ goto phys_mem_pool_alloc_error; + -+ /* If scheduler is not suspended and the given group's -+ * static priority (reflected by the scan_seq_num) is inside -+ * the current tick slot-range, or there are some on_slot -+ * idle groups, schedule an async tock. -+ */ -+ if (scheduler->state != SCHED_SUSPENDED) { -+ unsigned long flags; -+ int n_idle; -+ int n_used; -+ int n_slots = -+ group->kctx->kbdev->csf.global_iface.group_num; ++ for (i = 0; i < num_pages; i++) ++ page_list[i] = as_page(phys[i]); + -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); -+ n_idle = bitmap_weight(scheduler->csg_slots_idle_mask, -+ n_slots); -+ n_used = bitmap_weight(scheduler->csg_inuse_bitmap, -+ n_slots); -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, -+ flags); ++ cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot); ++ if (!cpu_addr) ++ goto vmap_error; + -+ if (n_idle || -+ n_used < scheduler->num_csg_slots_for_tick || -+ group->scan_seq_num < -+ scheduler->num_csg_slots_for_tick) -+ schedule_in_cycle(group, true); -+ } -+ } else -+ return; ++ va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages, ++ KBASE_REG_ZONE_MCU_SHARED); ++ if (!va_reg) ++ goto va_region_alloc_error; + -+ new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps); -+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, -+ new_val); -+} ++ mutex_lock(&kbdev->csf.reg_lock); ++ ret = kbase_add_va_region_rbtree(kbdev, va_reg, 0, num_pages, 1); ++ va_reg->flags &= ~KBASE_REG_FREE; ++ if (ret) ++ goto va_region_add_error; ++ mutex_unlock(&kbdev->csf.reg_lock); + -+int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group) -+{ -+ struct kbase_csf_scheduler *scheduler = -+ &group->kctx->kbdev->csf.scheduler; -+ int slot_num = group->csg_nr; ++ gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR); ++ gpu_map_properties |= gpu_map_prot; + -+ lockdep_assert_held(&scheduler->interrupt_lock); ++ ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn, ++ &phys[0], num_pages, gpu_map_properties, ++ KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false); ++ if (ret) ++ goto mmu_insert_pages_error; + -+ if (slot_num >= 0) { -+ if (WARN_ON(scheduler->csg_slots[slot_num].resident_group != -+ group)) -+ return -1; -+ } ++ kfree(page_list); ++ csf_mapping->phys = phys; ++ csf_mapping->cpu_addr = cpu_addr; ++ csf_mapping->va_reg = va_reg; ++ csf_mapping->num_pages = num_pages; + -+ return slot_num; ++ return 0; ++ ++mmu_insert_pages_error: ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_remove_va_region(kbdev, va_reg); ++va_region_add_error: ++ kbase_free_alloced_region(va_reg); ++ mutex_unlock(&kbdev->csf.reg_lock); ++va_region_alloc_error: ++ vunmap(cpu_addr); ++vmap_error: ++ kbase_mem_pool_free_pages( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ num_pages, phys, false, false); ++ ++phys_mem_pool_alloc_error: ++ kfree(page_list); ++page_list_alloc_error: ++ kfree(phys); ++out: ++ /* Zero-initialize the mapping to make sure that the termination ++ * function doesn't try to unmap or free random addresses. ++ */ ++ csf_mapping->phys = NULL; ++ csf_mapping->cpu_addr = NULL; ++ csf_mapping->va_reg = NULL; ++ csf_mapping->num_pages = 0; ++ ++ return -ENOMEM; +} + -+int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group) ++void kbase_csf_firmware_mcu_shared_mapping_term( ++ struct kbase_device *kbdev, struct kbase_csf_mapping *csf_mapping) +{ -+ struct kbase_csf_scheduler *scheduler = -+ &group->kctx->kbdev->csf.scheduler; -+ unsigned long flags; -+ int slot_num; ++ if (csf_mapping->va_reg) { ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_remove_va_region(kbdev, csf_mapping->va_reg); ++ kbase_free_alloced_region(csf_mapping->va_reg); ++ mutex_unlock(&kbdev->csf.reg_lock); ++ } + -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); -+ slot_num = kbase_csf_scheduler_group_get_slot_locked(group); -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ if (csf_mapping->phys) { ++ kbase_mem_pool_free_pages( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ csf_mapping->num_pages, csf_mapping->phys, false, ++ false); ++ } + -+ return slot_num; ++ vunmap(csf_mapping->cpu_addr); ++ kfree(csf_mapping->phys); +} -+ -+/* kbasep_csf_scheduler_group_is_on_slot_locked() - Check if CSG is on slot. +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c +new file mode 100644 +index 000000000..7c14b8eb5 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c +@@ -0,0 +1,236 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * -+ * @group: GPU queue group to be checked ++ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * -+ * This function needs to be called with scheduler's lock held ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: true if @group is on slot. + */ -+static bool kbasep_csf_scheduler_group_is_on_slot_locked( -+ struct kbase_queue_group *group) ++ ++#include ++#include "mali_kbase_csf_heap_context_alloc.h" ++ ++/* Size of one heap context structure, in bytes. */ ++#define HEAP_CTX_SIZE ((u32)32) ++ ++/** ++ * sub_alloc - Sub-allocate a heap context from a GPU memory region ++ * ++ * @ctx_alloc: Pointer to the heap context allocator. ++ * ++ * Return: GPU virtual address of the allocated heap context or 0 on failure. ++ */ ++static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc) +{ -+ struct kbase_csf_scheduler *scheduler = -+ &group->kctx->kbdev->csf.scheduler; -+ int slot_num = group->csg_nr; ++ struct kbase_context *const kctx = ctx_alloc->kctx; ++ unsigned long heap_nr = 0; ++ u32 ctx_offset = 0; ++ u64 heap_gpu_va = 0; ++ struct kbase_vmap_struct mapping; ++ void *ctx_ptr = NULL; + -+ lockdep_assert_held(&scheduler->lock); ++ lockdep_assert_held(&ctx_alloc->lock); + -+ if (slot_num >= 0) { -+ if (!WARN_ON(scheduler->csg_slots[slot_num].resident_group != -+ group)) -+ return true; ++ heap_nr = find_first_zero_bit(ctx_alloc->in_use, ++ MAX_TILER_HEAPS); ++ ++ if (unlikely(heap_nr >= MAX_TILER_HEAPS)) { ++ dev_dbg(kctx->kbdev->dev, ++ "No free tiler heap contexts in the pool"); ++ return 0; + } + -+ return false; -+} ++ ctx_offset = heap_nr * ctx_alloc->heap_context_size_aligned; ++ heap_gpu_va = ctx_alloc->gpu_va + ctx_offset; ++ ctx_ptr = kbase_vmap_prot(kctx, heap_gpu_va, ++ ctx_alloc->heap_context_size_aligned, KBASE_REG_CPU_WR, &mapping); + -+bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev, -+ struct kbase_queue_group *group) -+{ -+ struct kbase_csf_scheduler *scheduler = -+ &group->kctx->kbdev->csf.scheduler; -+ int slot_num = group->csg_nr; ++ if (unlikely(!ctx_ptr)) { ++ dev_err(kctx->kbdev->dev, ++ "Failed to map tiler heap context %lu (0x%llX)\n", ++ heap_nr, heap_gpu_va); ++ return 0; ++ } + -+ lockdep_assert_held(&scheduler->interrupt_lock); ++ memset(ctx_ptr, 0, ctx_alloc->heap_context_size_aligned); ++ kbase_vunmap(ctx_ptr, &mapping); + -+ if (WARN_ON(slot_num < 0)) -+ return false; ++ bitmap_set(ctx_alloc->in_use, heap_nr, 1); + -+ return test_bit(slot_num, scheduler->csgs_events_enable_mask); ++ dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %lu (0x%llX)\n", ++ heap_nr, heap_gpu_va); ++ ++ return heap_gpu_va; +} + -+struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot( -+ struct kbase_device *kbdev, int slot) ++/** ++ * evict_heap_context - Evict the data of heap context from GPU's L2 cache. ++ * ++ * @ctx_alloc: Pointer to the heap context allocator. ++ * @heap_gpu_va: The GPU virtual address of a heap context structure to free. ++ * ++ * This function is called when memory for the heap context is freed. It uses the ++ * FLUSH_PA_RANGE command to evict the data of heap context, so on older CSF GPUs ++ * there is nothing done. The whole GPU cache is anyways expected to be flushed ++ * on older GPUs when initial chunks of the heap are freed just before the memory ++ * for heap context is freed. ++ */ ++static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ctx_alloc, ++ u64 const heap_gpu_va) +{ -+ lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); ++ struct kbase_context *const kctx = ctx_alloc->kctx; ++ u32 offset_in_bytes = (u32)(heap_gpu_va - ctx_alloc->gpu_va); ++ u32 offset_within_page = offset_in_bytes & ~PAGE_MASK; ++ u32 page_index = offset_in_bytes >> PAGE_SHIFT; ++ struct tagged_addr page = ++ kbase_get_gpu_phy_pages(ctx_alloc->region)[page_index]; ++ phys_addr_t heap_context_pa = as_phys_addr_t(page) + offset_within_page; + -+ return kbdev->csf.scheduler.csg_slots[slot].resident_group; ++ lockdep_assert_held(&ctx_alloc->lock); ++ ++ /* There is no need to take vm_lock here as the ctx_alloc region is protected ++ * via a nonzero no_user_free_count. The region and the backing page can't ++ * disappear whilst this function is executing. Flush type is passed as FLUSH_PT ++ * to CLN+INV L2 only. ++ */ ++ kbase_mmu_flush_pa_range(kctx->kbdev, kctx, ++ heap_context_pa, ctx_alloc->heap_context_size_aligned, ++ KBASE_MMU_OP_FLUSH_PT); +} + -+static int halt_stream_sync(struct kbase_queue *queue) ++/** ++ * sub_free - Free a heap context sub-allocated from a GPU memory region ++ * ++ * @ctx_alloc: Pointer to the heap context allocator. ++ * @heap_gpu_va: The GPU virtual address of a heap context structure to free. ++ */ ++static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc, ++ u64 const heap_gpu_va) +{ -+ struct kbase_queue_group *group = queue->group; -+ struct kbase_device *kbdev = queue->kctx->kbdev; -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; -+ struct kbase_csf_cmd_stream_group_info *ginfo; -+ struct kbase_csf_cmd_stream_info *stream; -+ int csi_index = queue->csi_index; -+ long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); -+ unsigned long flags; ++ struct kbase_context *const kctx = ctx_alloc->kctx; ++ u32 ctx_offset = 0; ++ unsigned int heap_nr = 0; + -+ if (WARN_ON(!group) || -+ WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) -+ return -EINVAL; ++ lockdep_assert_held(&ctx_alloc->lock); + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); -+ ginfo = &global_iface->groups[group->csg_nr]; -+ stream = &ginfo->streams[csi_index]; ++ if (WARN_ON(!ctx_alloc->region)) ++ return; + -+ if (CS_REQ_STATE_GET(kbase_csf_firmware_cs_input_read(stream, CS_REQ)) == -+ CS_REQ_STATE_START) { ++ if (WARN_ON(heap_gpu_va < ctx_alloc->gpu_va)) ++ return; + -+ remaining = wait_event_timeout(kbdev->csf.event_wait, -+ (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK)) -+ == CS_ACK_STATE_START), remaining); ++ ctx_offset = (u32)(heap_gpu_va - ctx_alloc->gpu_va); + -+ if (!remaining) { -+ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to start on csi %d bound to group %d on slot %d", -+ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, -+ csi_index, group->handle, group->csg_nr); -+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) -+ kbase_reset_gpu(kbdev); ++ if (WARN_ON(ctx_offset >= (ctx_alloc->region->nr_pages << PAGE_SHIFT)) || ++ WARN_ON(ctx_offset % ctx_alloc->heap_context_size_aligned)) ++ return; + ++ evict_heap_context(ctx_alloc, heap_gpu_va); + -+ return -ETIMEDOUT; -+ } ++ heap_nr = ctx_offset / ctx_alloc->heap_context_size_aligned; ++ dev_dbg(kctx->kbdev->dev, ++ "Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va); + -+ remaining = -+ kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); -+ } ++ bitmap_clear(ctx_alloc->in_use, heap_nr, 1); ++} + -+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); -+ /* Set state to STOP */ -+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP, -+ CS_REQ_STATE_MASK); ++int kbase_csf_heap_context_allocator_init( ++ struct kbase_csf_heap_context_allocator *const ctx_alloc, ++ struct kbase_context *const kctx) ++{ ++ const u32 gpu_cache_line_size = ++ (1U << kctx->kbdev->gpu_props.props.l2_props.log2_line_size); + -+ kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true); -+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); ++ /* We cannot pre-allocate GPU memory here because the ++ * custom VA zone may not have been created yet. ++ */ ++ ctx_alloc->kctx = kctx; ++ ctx_alloc->region = NULL; ++ ctx_alloc->gpu_va = 0; ++ ctx_alloc->heap_context_size_aligned = ++ (HEAP_CTX_SIZE + gpu_cache_line_size - 1) & ~(gpu_cache_line_size - 1); + -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQ, group, queue, 0u); ++ mutex_init(&ctx_alloc->lock); ++ bitmap_zero(ctx_alloc->in_use, MAX_TILER_HEAPS); + -+ /* Timed wait */ -+ remaining = wait_event_timeout(kbdev->csf.event_wait, -+ (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK)) -+ == CS_ACK_STATE_STOP), remaining); ++ dev_dbg(kctx->kbdev->dev, ++ "Initialized a tiler heap context allocator\n"); + -+ if (!remaining) { -+ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to stop on csi %d bound to group %d on slot %d", -+ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, -+ queue->csi_index, group->handle, group->csg_nr); ++ return 0; ++} + -+ /* TODO GPUCORE-25328: The CSG can't be terminated, the GPU -+ * will be reset as a work-around. -+ */ -+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) -+ kbase_reset_gpu(kbdev); ++void kbase_csf_heap_context_allocator_term( ++ struct kbase_csf_heap_context_allocator *const ctx_alloc) ++{ ++ struct kbase_context *const kctx = ctx_alloc->kctx; ++ ++ dev_dbg(kctx->kbdev->dev, ++ "Terminating tiler heap context allocator\n"); + ++ if (ctx_alloc->region) { ++ kbase_gpu_vm_lock(kctx); ++ WARN_ON(!kbase_va_region_is_no_user_free(ctx_alloc->region)); + ++ kbase_va_region_no_user_free_dec(ctx_alloc->region); ++ kbase_mem_free_region(kctx, ctx_alloc->region); ++ kbase_gpu_vm_unlock(kctx); + } -+ return (remaining) ? 0 : -ETIMEDOUT; ++ ++ mutex_destroy(&ctx_alloc->lock); +} + -+static bool can_halt_stream(struct kbase_device *kbdev, -+ struct kbase_queue_group *group) ++u64 kbase_csf_heap_context_allocator_alloc( ++ struct kbase_csf_heap_context_allocator *const ctx_alloc) +{ -+ struct kbase_csf_csg_slot *const csg_slot = -+ kbdev->csf.scheduler.csg_slots; -+ unsigned long flags; -+ bool can_halt; -+ int slot; ++ struct kbase_context *const kctx = ctx_alloc->kctx; ++ u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR | ++ BASEP_MEM_NO_USER_FREE | BASE_MEM_PROT_CPU_RD; ++ u64 nr_pages = PFN_UP(MAX_TILER_HEAPS * ctx_alloc->heap_context_size_aligned); ++ u64 heap_gpu_va = 0; + -+ if (!queue_group_scheduled(group)) -+ return true; ++ /* Calls to this function are inherently asynchronous, with respect to ++ * MMU operations. ++ */ ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + -+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); -+ slot = kbase_csf_scheduler_group_get_slot_locked(group); -+ can_halt = (slot >= 0) && -+ (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING); -+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, -+ flags); ++ mutex_lock(&ctx_alloc->lock); + -+ return can_halt; ++ /* If the pool of heap contexts wasn't already allocated then ++ * allocate it. ++ */ ++ if (!ctx_alloc->region) { ++ ctx_alloc->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, ++ &ctx_alloc->gpu_va, mmu_sync_info); ++ } ++ ++ /* If the pool still isn't allocated then an error occurred. */ ++ if (unlikely(!ctx_alloc->region)) ++ dev_dbg(kctx->kbdev->dev, "Failed to allocate a pool of tiler heap contexts"); ++ else ++ heap_gpu_va = sub_alloc(ctx_alloc); ++ ++ mutex_unlock(&ctx_alloc->lock); ++ ++ return heap_gpu_va; ++} ++ ++void kbase_csf_heap_context_allocator_free( ++ struct kbase_csf_heap_context_allocator *const ctx_alloc, ++ u64 const heap_gpu_va) ++{ ++ mutex_lock(&ctx_alloc->lock); ++ sub_free(ctx_alloc, heap_gpu_va); ++ mutex_unlock(&ctx_alloc->lock); +} +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.h +new file mode 100644 +index 000000000..9aab7abfb +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.h +@@ -0,0 +1,75 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++ ++#ifndef _KBASE_CSF_HEAP_CONTEXT_ALLOC_H_ ++#define _KBASE_CSF_HEAP_CONTEXT_ALLOC_H_ + +/** -+ * sched_halt_stream() - Stop a GPU queue when its queue group is not running -+ * on a CSG slot. -+ * @queue: Pointer to the GPU queue to stop. ++ * kbase_csf_heap_context_allocator_init - Initialize an allocator for heap ++ * contexts ++ * @ctx_alloc: Pointer to the heap context allocator to initialize. ++ * @kctx: Pointer to the kbase context. + * -+ * This function handles stopping gpu queues for groups that are either not on -+ * a CSG slot or are on the slot but undergoing transition to -+ * resume or suspend states. -+ * It waits until the queue group is scheduled on a slot and starts running, -+ * which is needed as groups that were suspended may need to resume all queues -+ * that were enabled and running at the time of suspension. ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++int kbase_csf_heap_context_allocator_init( ++ struct kbase_csf_heap_context_allocator *const ctx_alloc, ++ struct kbase_context *const kctx); ++ ++/** ++ * kbase_csf_heap_context_allocator_term - Terminate an allocator for heap ++ * contexts ++ * @ctx_alloc: Pointer to the heap context allocator to terminate. ++ */ ++void kbase_csf_heap_context_allocator_term( ++ struct kbase_csf_heap_context_allocator *const ctx_alloc); ++ ++/** ++ * kbase_csf_heap_context_allocator_alloc - Allocate a heap context structure + * -+ * Return: 0 on success, or negative on failure. ++ * @ctx_alloc: Pointer to the heap context allocator. ++ * ++ * If this function is successful then it returns the address of a ++ * zero-initialized heap context structure for use by the firmware. ++ * ++ * Return: GPU virtual address of the allocated heap context or 0 on failure. + */ -+static int sched_halt_stream(struct kbase_queue *queue) -+{ -+ struct kbase_queue_group *group = queue->group; -+ struct kbase_device *kbdev = queue->kctx->kbdev; -+ struct kbase_csf_scheduler *const scheduler = -+ &kbdev->csf.scheduler; -+ struct kbase_csf_csg_slot *const csg_slot = -+ kbdev->csf.scheduler.csg_slots; -+ bool retry_needed = false; -+ bool retried = false; -+ long remaining; -+ int slot; -+ int err = 0; -+ const u32 group_schedule_timeout = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT); ++u64 kbase_csf_heap_context_allocator_alloc( ++ struct kbase_csf_heap_context_allocator *const ctx_alloc); + -+ if (WARN_ON(!group)) -+ return -EINVAL; ++/** ++ * kbase_csf_heap_context_allocator_free - Free a heap context structure ++ * ++ * @ctx_alloc: Pointer to the heap context allocator. ++ * @heap_gpu_va: The GPU virtual address of a heap context structure that ++ * was allocated for the firmware. ++ * ++ * This function returns a heap context structure to the free pool of unused ++ * contexts for possible reuse by a future call to ++ * @kbase_csf_heap_context_allocator_alloc. ++ */ ++void kbase_csf_heap_context_allocator_free( ++ struct kbase_csf_heap_context_allocator *const ctx_alloc, ++ u64 const heap_gpu_va); + -+ lockdep_assert_held(&queue->kctx->csf.lock); -+ lockdep_assert_held(&scheduler->lock); ++#endif /* _KBASE_CSF_HEAP_CONTEXT_ALLOC_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c +new file mode 100644 +index 000000000..da8dde239 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c +@@ -0,0 +1,2595 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ slot = kbase_csf_scheduler_group_get_slot(group); ++#include ++#include ++#include ++#include "device/mali_kbase_device.h" ++#include "mali_kbase_csf.h" ++#include + -+ if (slot >= 0) { -+ WARN_ON(atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING); ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++#include "mali_kbase_fence.h" ++#include "mali_kbase_sync.h" + -+ if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) { -+ dev_dbg(kbdev->dev, "Stopping a queue on csi %d when Group-%d is in under transition to running state", -+ queue->csi_index, group->handle); -+ retry_needed = true; -+ } -+ } -+retry: -+ /* Update the group state so that it can get scheduled soon */ -+ update_idle_suspended_group_state(group); ++static DEFINE_SPINLOCK(kbase_csf_fence_lock); ++#endif + -+ mutex_unlock(&scheduler->lock); ++#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG ++#define FENCE_WAIT_TIMEOUT_MS 3000 ++#endif + -+ /* This function is called when the queue group is either not on a CSG -+ * slot or is on the slot but undergoing transition. -+ * -+ * To stop the queue, the function needs to wait either for the queue -+ * group to be assigned a CSG slot (and that slot has to reach the -+ * running state) or for the eviction of the queue group from the -+ * scheduler's list. -+ * -+ * In order to evaluate the latter condition, the function doesn't -+ * really need to lock the scheduler, as any update to the run_state -+ * of the queue group by sched_evict_group() would be visible due -+ * to implicit barriers provided by the kernel waitqueue macros. -+ * -+ * The group pointer cannot disappear meanwhile, as the high level -+ * CSF context is locked. Therefore, the scheduler would be -+ * the only one to update the run_state of the group. -+ */ -+ remaining = wait_event_timeout( -+ kbdev->csf.event_wait, can_halt_stream(kbdev, group), -+ kbase_csf_timeout_in_jiffies(group_schedule_timeout)); ++static void kcpu_queue_process(struct kbase_kcpu_command_queue *kcpu_queue, ++ bool drain_queue); + -+ mutex_lock(&scheduler->lock); ++static void kcpu_queue_process_worker(struct work_struct *data); + -+ if (remaining && queue_group_scheduled_locked(group)) { -+ slot = kbase_csf_scheduler_group_get_slot(group); ++static int kbase_kcpu_map_import_prepare( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_import_info *import_info, ++ struct kbase_kcpu_command *current_command) ++{ ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++ struct kbase_va_region *reg; ++ struct kbase_mem_phy_alloc *alloc; ++ struct page **pages; ++ struct tagged_addr *pa; ++ long i; ++ int ret = 0; + -+ /* If the group is still on slot and slot is in running state -+ * then explicitly stop the CSI of the -+ * queue. Otherwise there are different cases to consider ++ lockdep_assert_held(&kcpu_queue->lock); ++ ++ /* Take the processes mmap lock */ ++ down_read(kbase_mem_get_process_mmap_lock()); ++ kbase_gpu_vm_lock(kctx); ++ ++ reg = kbase_region_tracker_find_region_enclosing_address(kctx, ++ import_info->handle); ++ ++ if (kbase_is_region_invalid_or_free(reg) || ++ !kbase_mem_is_imported(reg->gpu_alloc->type)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { ++ /* Pin the physical pages backing the user buffer while ++ * we are in the process context and holding the mmap lock. ++ * The dma mapping & GPU mapping of the pages would be done ++ * when the MAP_IMPORT operation is executed. + * -+ * - If the queue group was already undergoing transition to -+ * resume/start state when this function was entered then it -+ * would not have disabled the CSI of the -+ * queue being stopped and the previous wait would have ended -+ * once the slot was in a running state with CS -+ * interface still enabled. -+ * Now the group is going through another transition either -+ * to a suspend state or to a resume state (it could have -+ * been suspended before the scheduler lock was grabbed). -+ * In both scenarios need to wait again for the group to -+ * come on a slot and that slot to reach the running state, -+ * as that would guarantee that firmware will observe the -+ * CSI as disabled. ++ * Though the pages would be pinned, no reference is taken ++ * on the physical pages tracking object. When the last ++ * reference to the tracking object is dropped the pages ++ * would be unpinned if they weren't unpinned before. + * -+ * - If the queue group was either off the slot or was -+ * undergoing transition to suspend state on entering this -+ * function, then the group would have been resumed with the -+ * queue's CSI in disabled state. -+ * So now if the group is undergoing another transition -+ * (after the resume) then just need to wait for the state -+ * bits in the ACK register of CSI to be -+ * set to STOP value. It is expected that firmware will -+ * process the stop/disable request of the CS -+ * interface after resuming the group before it processes -+ * another state change request of the group. ++ * Region should be CPU cached: abort if it isn't. + */ -+ if ((slot >= 0) && -+ (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) { -+ err = halt_stream_sync(queue); -+ } else if (retry_needed && !retried) { -+ retried = true; -+ goto retry; -+ } else if (slot >= 0) { -+ struct kbase_csf_global_iface *global_iface = -+ &kbdev->csf.global_iface; -+ struct kbase_csf_cmd_stream_group_info *ginfo = -+ &global_iface->groups[slot]; -+ struct kbase_csf_cmd_stream_info *stream = -+ &ginfo->streams[queue->csi_index]; -+ u32 cs_req = -+ kbase_csf_firmware_cs_input_read(stream, CS_REQ); ++ if (WARN_ON(!(reg->flags & KBASE_REG_CPU_CACHED))) { ++ ret = -EINVAL; ++ goto out; ++ } + -+ if (!WARN_ON(CS_REQ_STATE_GET(cs_req) != -+ CS_REQ_STATE_STOP)) { -+ /* Timed wait */ -+ remaining = wait_event_timeout( -+ kbdev->csf.event_wait, -+ (CS_ACK_STATE_GET( -+ kbase_csf_firmware_cs_output( -+ stream, CS_ACK)) == -+ CS_ACK_STATE_STOP), -+ kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms)); ++ ret = kbase_jd_user_buf_pin_pages(kctx, reg); ++ if (ret) ++ goto out; + -+ if (!remaining) { -+ dev_warn(kbdev->dev, -+ "[%llu] Timeout (%d ms) waiting for queue stop ack on csi %d bound to group %d on slot %d", -+ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, -+ queue->csi_index, -+ group->handle, group->csg_nr); ++ alloc = reg->gpu_alloc; ++ pa = kbase_get_gpu_phy_pages(reg); ++ pages = alloc->imported.user_buf.pages; + ++ for (i = 0; i < alloc->nents; i++) ++ pa[i] = as_tagged(page_to_phys(pages[i])); ++ } + -+ err = -ETIMEDOUT; -+ } -+ } -+ } -+ } else if (!remaining) { -+ dev_warn(kbdev->dev, "[%llu] Group-%d failed to get a slot for stopping the queue on csi %d (timeout %d ms)", -+ kbase_backend_get_cycle_cnt(kbdev), -+ group->handle, queue->csi_index, -+ group_schedule_timeout); ++ current_command->type = BASE_KCPU_COMMAND_TYPE_MAP_IMPORT; ++ current_command->info.import.gpu_va = import_info->handle; + ++out: ++ kbase_gpu_vm_unlock(kctx); ++ /* Release the processes mmap lock */ ++ up_read(kbase_mem_get_process_mmap_lock()); + -+ err = -ETIMEDOUT; ++ return ret; ++} ++ ++static int kbase_kcpu_unmap_import_prepare_internal( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_import_info *import_info, ++ struct kbase_kcpu_command *current_command, ++ enum base_kcpu_command_type type) ++{ ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++ struct kbase_va_region *reg; ++ int ret = 0; ++ ++ lockdep_assert_held(&kcpu_queue->lock); ++ ++ kbase_gpu_vm_lock(kctx); ++ ++ reg = kbase_region_tracker_find_region_enclosing_address(kctx, ++ import_info->handle); ++ ++ if (kbase_is_region_invalid_or_free(reg) || ++ !kbase_mem_is_imported(reg->gpu_alloc->type)) { ++ ret = -EINVAL; ++ goto out; + } + -+ return err; ++ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { ++ /* The pages should have been pinned when MAP_IMPORT ++ * was enqueued previously. ++ */ ++ if (reg->gpu_alloc->nents != ++ reg->gpu_alloc->imported.user_buf.nr_pages) { ++ ret = -EINVAL; ++ goto out; ++ } ++ } ++ ++ current_command->type = type; ++ current_command->info.import.gpu_va = import_info->handle; ++ ++out: ++ kbase_gpu_vm_unlock(kctx); ++ ++ return ret; ++} ++ ++static int kbase_kcpu_unmap_import_prepare( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_import_info *import_info, ++ struct kbase_kcpu_command *current_command) ++{ ++ return kbase_kcpu_unmap_import_prepare_internal(kcpu_queue, ++ import_info, current_command, ++ BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT); ++} ++ ++static int kbase_kcpu_unmap_import_force_prepare( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_import_info *import_info, ++ struct kbase_kcpu_command *current_command) ++{ ++ return kbase_kcpu_unmap_import_prepare_internal(kcpu_queue, ++ import_info, current_command, ++ BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE); +} + +/** -+ * scheduler_activate_on_queue_stop() - Activate the Scheduler when the GPU -+ * queue needs to be stopped. -+ * -+ * @queue: Pointer the GPU command queue ++ * kbase_jit_add_to_pending_alloc_list() - Pend JIT allocation + * -+ * This function is called when the CSI to which GPU queue is bound needs to -+ * be stopped. For that the corresponding queue group needs to be resident on -+ * the CSG slot and MCU firmware should be running. So this function makes the -+ * Scheduler exit the sleeping or suspended state. ++ * @queue: The queue containing this JIT allocation ++ * @cmd: The JIT allocation that is blocking this queue + */ -+static void scheduler_activate_on_queue_stop(struct kbase_queue *queue) ++static void kbase_jit_add_to_pending_alloc_list( ++ struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command *cmd) +{ -+ struct kbase_device *kbdev = queue->kctx->kbdev; ++ struct kbase_context *const kctx = queue->kctx; ++ struct list_head *target_list_head = ++ &kctx->csf.kcpu_queues.jit_blocked_queues; ++ struct kbase_kcpu_command_queue *blocked_queue; + -+ scheduler_wakeup(kbdev, true); ++ lockdep_assert_held(&queue->lock); ++ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); + -+ /* Wait for MCU firmware to start running */ -+ if (kbase_csf_scheduler_wait_mcu_active(kbdev)) { -+ dev_warn( -+ kbdev->dev, -+ "[%llu] Wait for MCU active failed for stopping queue on csi %d bound to group %d of context %d_%d on slot %d", -+ kbase_backend_get_cycle_cnt(kbdev), -+ queue->csi_index, queue->group->handle, -+ queue->kctx->tgid, queue->kctx->id, -+ queue->group->csg_nr); ++ list_for_each_entry(blocked_queue, ++ &kctx->csf.kcpu_queues.jit_blocked_queues, ++ jit_blocked) { ++ struct kbase_kcpu_command const *const jit_alloc_cmd = ++ &blocked_queue->commands[blocked_queue->start_offset]; ++ ++ WARN_ON(jit_alloc_cmd->type != BASE_KCPU_COMMAND_TYPE_JIT_ALLOC); ++ if (cmd->enqueue_ts < jit_alloc_cmd->enqueue_ts) { ++ target_list_head = &blocked_queue->jit_blocked; ++ break; ++ } + } ++ ++ list_add_tail(&queue->jit_blocked, target_list_head); +} + -+int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue) ++/** ++ * kbase_kcpu_jit_allocate_process() - Process JIT allocation ++ * ++ * @queue: The queue containing this JIT allocation ++ * @cmd: The JIT allocation command ++ * ++ * Return: ++ * * 0 - allocation OK ++ * * -EINVAL - missing info or JIT ID still in use ++ * * -EAGAIN - Retry ++ * * -ENOMEM - no memory. unable to allocate ++ */ ++static int kbase_kcpu_jit_allocate_process( ++ struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command *cmd) +{ -+ struct kbase_device *kbdev = queue->kctx->kbdev; -+ struct kbase_queue_group *group = queue->group; -+ bool const cs_enabled = queue->enabled; -+ int err = 0; ++ struct kbase_context *const kctx = queue->kctx; ++ struct kbase_kcpu_command_jit_alloc_info *alloc_info = ++ &cmd->info.jit_alloc; ++ struct base_jit_alloc_info *info = alloc_info->info; ++ struct kbase_vmap_struct mapping; ++ struct kbase_va_region *reg; ++ u32 count = alloc_info->count; ++ u64 *ptr, new_addr; ++ u32 i; ++ int ret; + -+ if (WARN_ON(!group)) ++ lockdep_assert_held(&queue->lock); ++ ++ if (WARN_ON(!info)) + return -EINVAL; + -+ kbase_reset_gpu_assert_failed_or_prevented(kbdev); -+ lockdep_assert_held(&queue->kctx->csf.lock); -+ mutex_lock(&kbdev->csf.scheduler.lock); ++ mutex_lock(&kctx->csf.kcpu_queues.jit_lock); + -+ queue->enabled = false; -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP, group, queue, cs_enabled); ++ /* Check if all JIT IDs are not in use */ ++ for (i = 0; i < count; i++, info++) { ++ /* The JIT ID is still in use so fail the allocation */ ++ if (kctx->jit_alloc[info->id]) { ++ dev_dbg(kctx->kbdev->dev, "JIT ID still in use"); ++ ret = -EINVAL; ++ goto fail; ++ } ++ } + -+ if (cs_enabled && queue_group_scheduled_locked(group)) { -+ struct kbase_csf_csg_slot *const csg_slot = -+ kbdev->csf.scheduler.csg_slots; -+ int slot = kbase_csf_scheduler_group_get_slot(group); ++ if (alloc_info->blocked) { ++ list_del(&queue->jit_blocked); ++ alloc_info->blocked = false; ++ } + -+ /* Since the group needs to be resumed in order to stop the queue, -+ * check if GPU needs to be powered up. -+ */ -+ scheduler_activate_on_queue_stop(queue); ++ /* Now start the allocation loop */ ++ for (i = 0, info = alloc_info->info; i < count; i++, info++) { ++ /* Create a JIT allocation */ ++ reg = kbase_jit_allocate(kctx, info, true); ++ if (!reg) { ++ bool can_block = false; ++ struct kbase_kcpu_command const *jit_cmd; + -+ if ((slot >= 0) && -+ (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) -+ err = halt_stream_sync(queue); -+ else -+ err = sched_halt_stream(queue); ++ list_for_each_entry(jit_cmd, &kctx->csf.kcpu_queues.jit_cmds_head, info.jit_alloc.node) { ++ if (jit_cmd == cmd) ++ break; + -+ unassign_user_doorbell_from_queue(kbdev, queue); -+ kbase_csf_mcu_shared_drop_stopped_queue(kbdev, queue); -+ } ++ if (jit_cmd->type == BASE_KCPU_COMMAND_TYPE_JIT_FREE) { ++ u8 const *const free_ids = jit_cmd->info.jit_free.ids; + -+ mutex_unlock(&kbdev->csf.scheduler.lock); -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_STOP, group, queue, group->run_state); -+ return err; -+} ++ if (free_ids && *free_ids && kctx->jit_alloc[*free_ids]) { ++ /* ++ * A JIT free which is active ++ * and submitted before this ++ * command. ++ */ ++ can_block = true; ++ break; ++ } ++ } ++ } + -+static void update_hw_active(struct kbase_queue *queue, bool active) -+{ -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ if (queue && queue->enabled) { -+ u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); ++ if (!can_block) { ++ /* ++ * No prior JIT_FREE command is active. Roll ++ * back previous allocations and fail. ++ */ ++ dev_warn_ratelimited(kctx->kbdev->dev, "JIT alloc command failed: %pK\n", cmd); ++ ret = -ENOMEM; ++ goto fail_rollback; ++ } + -+ output_addr[CS_ACTIVE / sizeof(u32)] = active; -+ } -+#else -+ CSTD_UNUSED(queue); -+ CSTD_UNUSED(active); -+#endif -+} ++ /* There are pending frees for an active allocation ++ * so we should wait to see whether they free the ++ * memory. Add to the list of atoms for which JIT ++ * allocation is pending. ++ */ ++ kbase_jit_add_to_pending_alloc_list(queue, cmd); ++ alloc_info->blocked = true; + -+static void program_cs_extract_init(struct kbase_queue *queue) -+{ -+ u64 *input_addr = (u64 *)queue->user_io_addr; -+ u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE); ++ /* Rollback, the whole set will be re-attempted */ ++ while (i-- > 0) { ++ info--; ++ kbase_jit_free(kctx, kctx->jit_alloc[info->id]); ++ kctx->jit_alloc[info->id] = NULL; ++ } + -+ input_addr[CS_EXTRACT_INIT_LO / sizeof(u64)] = -+ output_addr[CS_EXTRACT_LO / sizeof(u64)]; -+} ++ ret = -EAGAIN; ++ goto fail; ++ } + -+static void program_cs_trace_cfg(struct kbase_csf_cmd_stream_info *stream, -+ struct kbase_queue *queue) -+{ -+ struct kbase_device *kbdev = queue->kctx->kbdev; -+ u32 const glb_version = kbdev->csf.global_iface.version; ++ /* Bind it to the user provided ID. */ ++ kctx->jit_alloc[info->id] = reg; ++ } + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ for (i = 0, info = alloc_info->info; i < count; i++, info++) { ++ /* ++ * Write the address of the JIT allocation to the user provided ++ * GPU allocation. ++ */ ++ ptr = kbase_vmap_prot(kctx, info->gpu_alloc_addr, sizeof(*ptr), ++ KBASE_REG_CPU_WR, &mapping); ++ if (!ptr) { ++ ret = -ENOMEM; ++ goto fail_rollback; ++ } + -+ /* If cs_trace_command not supported, nothing to program */ -+ if (glb_version < kbase_csf_interface_version(1, 1, 0)) -+ return; ++ reg = kctx->jit_alloc[info->id]; ++ new_addr = reg->start_pfn << PAGE_SHIFT; ++ *ptr = new_addr; ++ kbase_vunmap(kctx, &mapping); ++ } + -+ /* Program for cs_trace if enabled. In the current arrangement, it is -+ * possible for the context to enable the cs_trace after some queues -+ * has been registered in cs_trace in disabled state. This is tracked by -+ * the queue's trace buffer base address, which had been validated at the -+ * queue's register_ex call. -+ */ -+ if (kbase_csf_scheduler_queue_has_trace(queue)) { -+ u32 cs_cfg = CS_INSTR_CONFIG_JASID_SET( -+ queue->trace_cfg, queue->kctx->as_nr); ++ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); + -+ kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, cs_cfg); -+ kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, -+ queue->trace_buffer_size); ++ return 0; + -+ kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_LO, -+ queue->trace_buffer_base & U32_MAX); -+ kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_HI, -+ queue->trace_buffer_base >> 32); ++fail_rollback: ++ /* Roll back completely */ ++ for (i = 0, info = alloc_info->info; i < count; i++, info++) { ++ /* Free the allocations that were successful. ++ * Mark all the allocations including the failed one and the ++ * other un-attempted allocations in the set, so we know they ++ * are in use. ++ */ ++ if (kctx->jit_alloc[info->id]) ++ kbase_jit_free(kctx, kctx->jit_alloc[info->id]); + -+ kbase_csf_firmware_cs_input( -+ stream, CS_INSTR_BUFFER_OFFSET_POINTER_LO, -+ queue->trace_offset_ptr & U32_MAX); -+ kbase_csf_firmware_cs_input( -+ stream, CS_INSTR_BUFFER_OFFSET_POINTER_HI, -+ queue->trace_offset_ptr >> 32); -+ } else { -+ /* Place the configuration to the disabled condition */ -+ kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, 0); -+ kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, 0); ++ kctx->jit_alloc[info->id] = KBASE_RESERVED_REG_JIT_ALLOC; + } ++fail: ++ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); ++ ++ return ret; +} + -+static void program_cs(struct kbase_device *kbdev, -+ struct kbase_queue *queue, bool ring_csg_doorbell) ++static int kbase_kcpu_jit_allocate_prepare( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_jit_alloc_info *alloc_info, ++ struct kbase_kcpu_command *current_command) +{ -+ struct kbase_queue_group *group = queue->group; -+ struct kbase_csf_cmd_stream_group_info *ginfo; -+ struct kbase_csf_cmd_stream_info *stream; -+ int csi_index = queue->csi_index; -+ unsigned long flags; -+ u64 user_input; -+ u64 user_output; ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++ void __user *data = u64_to_user_ptr(alloc_info->info); ++ struct base_jit_alloc_info *info = NULL; ++ u32 count = alloc_info->count; ++ int ret = 0; ++ u32 i; + -+ if (WARN_ON(!group)) -+ return; ++ lockdep_assert_held(&kcpu_queue->lock); + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ if ((count == 0) || (count > ARRAY_SIZE(kctx->jit_alloc)) || ++ (count > kcpu_queue->kctx->jit_max_allocations) || (!data) || ++ !kbase_mem_allow_alloc(kctx)) { ++ ret = -EINVAL; ++ goto out; ++ } + -+ if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) -+ return; ++ info = kmalloc_array(count, sizeof(*info), GFP_KERNEL); ++ if (!info) { ++ ret = -ENOMEM; ++ goto out; ++ } + -+ ginfo = &kbdev->csf.global_iface.groups[group->csg_nr]; ++ if (copy_from_user(info, data, sizeof(*info) * count) != 0) { ++ ret = -EINVAL; ++ goto out_free; ++ } + -+ if (WARN_ON(csi_index < 0) || -+ WARN_ON(csi_index >= ginfo->stream_num)) -+ return; ++ for (i = 0; i < count; i++) { ++ ret = kbasep_jit_alloc_validate(kctx, &info[i]); ++ if (ret) ++ goto out_free; ++ } + -+ if (queue->enabled) { -+ assign_user_doorbell_to_queue(kbdev, queue); -+ if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID) -+ return; ++ /* Search for duplicate JIT ids */ ++ for (i = 0; i < (count - 1); i++) { ++ u32 j; + -+ WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr); ++ for (j = (i + 1); j < count; j++) { ++ if (info[i].id == info[j].id) { ++ ret = -EINVAL; ++ goto out_free; ++ } ++ } + } + -+ if (queue->enabled && queue_group_suspended_locked(group)) -+ program_cs_extract_init(queue); ++ current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_ALLOC; ++ current_command->info.jit_alloc.info = info; ++ current_command->info.jit_alloc.count = count; ++ current_command->info.jit_alloc.blocked = false; ++ mutex_lock(&kctx->csf.kcpu_queues.jit_lock); ++ list_add_tail(¤t_command->info.jit_alloc.node, ++ &kctx->csf.kcpu_queues.jit_cmds_head); ++ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); + -+ stream = &ginfo->streams[csi_index]; ++ return 0; ++out_free: ++ kfree(info); ++out: ++ return ret; ++} + -+ kbase_csf_firmware_cs_input(stream, CS_BASE_LO, -+ queue->base_addr & 0xFFFFFFFF); -+ kbase_csf_firmware_cs_input(stream, CS_BASE_HI, -+ queue->base_addr >> 32); -+ kbase_csf_firmware_cs_input(stream, CS_SIZE, -+ queue->size); ++/** ++ * kbase_kcpu_jit_allocate_finish() - Finish handling the JIT_ALLOC command ++ * ++ * @queue: The queue containing this JIT allocation ++ * @cmd: The JIT allocation command ++ */ ++static void kbase_kcpu_jit_allocate_finish( ++ struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command *cmd) ++{ ++ lockdep_assert_held(&queue->lock); + -+ user_input = queue->user_io_gpu_va; -+ WARN_ONCE(!user_input && queue->enabled, "Enabled queue should have a valid gpu_va"); ++ mutex_lock(&queue->kctx->csf.kcpu_queues.jit_lock); + -+ kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, user_input & 0xFFFFFFFF); -+ kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, user_input >> 32); ++ /* Remove this command from the jit_cmds_head list */ ++ list_del(&cmd->info.jit_alloc.node); + -+ user_output = user_input + PAGE_SIZE; -+ kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, user_output & 0xFFFFFFFF); -+ kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, user_output >> 32); ++ /* ++ * If we get to this point we must have already cleared the blocked ++ * flag, otherwise it'd be a bug. ++ */ ++ if (WARN_ON(cmd->info.jit_alloc.blocked)) { ++ list_del(&queue->jit_blocked); ++ cmd->info.jit_alloc.blocked = false; ++ } + -+ kbase_csf_firmware_cs_input(stream, CS_CONFIG, -+ (queue->doorbell_nr << 8) | (queue->priority & 0xF)); ++ mutex_unlock(&queue->kctx->csf.kcpu_queues.jit_lock); + -+ /* Program the queue's cs_trace configuration */ -+ program_cs_trace_cfg(stream, queue); ++ kfree(cmd->info.jit_alloc.info); ++} + -+ /* Enable all interrupts for now */ -+ kbase_csf_firmware_cs_input(stream, CS_ACK_IRQ_MASK, ~((u32)0)); ++/** ++ * kbase_kcpu_jit_retry_pending_allocs() - Retry blocked JIT_ALLOC commands ++ * ++ * @kctx: The context containing the blocked JIT_ALLOC commands ++ */ ++static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx) ++{ ++ struct kbase_kcpu_command_queue *blocked_queue; + -+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); ++ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); + -+ /* The fault bit could be misaligned between CS_REQ and CS_ACK if the -+ * acknowledgment was deferred due to dump on fault and the group was -+ * removed from the CSG slot before the fault could be acknowledged. ++ /* ++ * Reschedule all queues blocked by JIT_ALLOC commands. ++ * NOTE: This code traverses the list of blocked queues directly. It ++ * only works as long as the queued works are not executed at the same ++ * time. This precondition is true since we're holding the ++ * kbase_csf_kcpu_queue_context.jit_lock . + */ -+ if (queue->enabled) { -+ u32 const cs_ack = -+ kbase_csf_firmware_cs_output(stream, CS_ACK); ++ list_for_each_entry(blocked_queue, &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked) ++ queue_work(blocked_queue->wq, &blocked_queue->work); ++} + -+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, -+ CS_REQ_FAULT_MASK); -+ } ++static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command *const cmd) ++{ ++ struct kbase_kcpu_command_jit_free_info const *const free_info = ++ &cmd->info.jit_free; ++ u8 const *const ids = free_info->ids; ++ u32 const count = free_info->count; ++ u32 i; ++ int rc = 0; ++ struct kbase_context *kctx = queue->kctx; + -+ /* -+ * Enable the CSG idle notification once the CS's ringbuffer -+ * becomes empty or the CS becomes sync_idle, waiting sync update -+ * or protected mode switch. -+ */ -+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, -+ CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK | -+ CS_REQ_IDLE_SHARED_SB_DEC_MASK, -+ CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK | -+ CS_REQ_IDLE_SHARED_SB_DEC_MASK); ++ if (WARN_ON(!ids)) ++ return -EINVAL; + -+ /* Set state to START/STOP */ -+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, -+ queue->enabled ? CS_REQ_STATE_START : CS_REQ_STATE_STOP, -+ CS_REQ_STATE_MASK); -+ kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, -+ ring_csg_doorbell); -+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); ++ lockdep_assert_held(&queue->lock); ++ mutex_lock(&kctx->csf.kcpu_queues.jit_lock); + -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_START, group, queue, queue->enabled); ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(queue->kctx->kbdev, ++ queue); + -+ update_hw_active(queue, true); -+} ++ for (i = 0; i < count; i++) { ++ u64 pages_used = 0; ++ int item_err = 0; + -+static int onslot_csg_add_new_queue(struct kbase_queue *queue) -+{ -+ struct kbase_device *kbdev = queue->kctx->kbdev; -+ int err; ++ if (!kctx->jit_alloc[ids[i]]) { ++ dev_dbg(kctx->kbdev->dev, "invalid JIT free ID"); ++ rc = -EINVAL; ++ item_err = rc; ++ } else { ++ struct kbase_va_region *const reg = kctx->jit_alloc[ids[i]]; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ /* ++ * If the ID is valid but the allocation request failed, still ++ * succeed this command but don't try and free the allocation. ++ */ ++ if (reg != KBASE_RESERVED_REG_JIT_ALLOC) { ++ pages_used = reg->gpu_alloc->nents; ++ kbase_jit_free(kctx, reg); ++ } + -+ err = kbase_csf_mcu_shared_add_queue(kbdev, queue); -+ if (!err) -+ program_cs(kbdev, queue, true); ++ kctx->jit_alloc[ids[i]] = NULL; ++ } + -+ return err; ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END( ++ queue->kctx->kbdev, queue, item_err, pages_used); ++ } ++ ++ /* ++ * Remove this command from the jit_cmds_head list and retry pending ++ * allocations. ++ */ ++ list_del(&cmd->info.jit_free.node); ++ kbase_kcpu_jit_retry_pending_allocs(kctx); ++ ++ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); ++ ++ /* Free the list of ids */ ++ kfree(ids); ++ ++ return rc; +} + -+int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) ++static int kbase_kcpu_jit_free_prepare( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_jit_free_info *free_info, ++ struct kbase_kcpu_command *current_command) +{ -+ struct kbase_queue_group *group = queue->group; -+ struct kbase_device *kbdev = queue->kctx->kbdev; -+ bool const cs_enabled = queue->enabled; -+ int err = 0; -+ bool evicted = false; ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++ void __user *data = u64_to_user_ptr(free_info->ids); ++ u8 *ids; ++ u32 count = free_info->count; ++ int ret; ++ u32 i; + -+ kbase_reset_gpu_assert_prevented(kbdev); -+ lockdep_assert_held(&queue->kctx->csf.lock); ++ lockdep_assert_held(&kcpu_queue->lock); + -+ if (WARN_ON(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)) -+ return -EINVAL; ++ /* Sanity checks */ ++ if (!count || count > ARRAY_SIZE(kctx->jit_alloc)) { ++ ret = -EINVAL; ++ goto out; ++ } + -+ mutex_lock(&kbdev->csf.scheduler.lock); ++ /* Copy the information for safe access and future storage */ ++ ids = kmalloc_array(count, sizeof(*ids), GFP_KERNEL); ++ if (!ids) { ++ ret = -ENOMEM; ++ goto out; ++ } + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ if (unlikely(kbdev->csf.scheduler.state == SCHED_BUSY)) { -+ mutex_unlock(&kbdev->csf.scheduler.lock); -+ return -EBUSY; ++ if (!data) { ++ ret = -EINVAL; ++ goto out_free; + } -+#endif + -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue, -+ group->run_state); -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, queue, -+ queue->status_wait); ++ if (copy_from_user(ids, data, sizeof(*ids) * count)) { ++ ret = -EINVAL; ++ goto out_free; ++ } + -+ if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) { -+ err = -EIO; -+ evicted = true; -+ } else if ((group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) -+ && CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) { -+ dev_dbg(kbdev->dev, "blocked queue(csi_index=%d) of group %d was kicked", -+ queue->csi_index, group->handle); -+ } else { -+ err = scheduler_group_schedule(group); ++ for (i = 0; i < count; i++) { ++ /* Fail the command if ID sent is zero */ ++ if (!ids[i]) { ++ ret = -EINVAL; ++ goto out_free; ++ } ++ } + -+ if (!err) { -+ queue->enabled = true; -+ if (kbasep_csf_scheduler_group_is_on_slot_locked(group)) { -+ if (cs_enabled) { -+ /* In normal situation, when a queue is -+ * already running, the queue update -+ * would be a doorbell kick on user -+ * side. However, if such a kick is -+ * shortly following a start or resume, -+ * the queue may actually in transition -+ * hence the said kick would enter the -+ * kernel as the hw_active flag is yet -+ * to be set. The sheduler needs to -+ * give a kick to the corresponding -+ * user door-bell on such a case. -+ */ -+ kbase_csf_ring_cs_user_doorbell(kbdev, queue); -+ } else { -+ err = onslot_csg_add_new_queue(queue); -+ /* For an on slot CSG, the only error in adding a new -+ * queue to run is that the scheduler could not map -+ * the required userio pages due to likely some resource -+ * issues. In such a case, and if the group is yet -+ * to enter its fatal error state, we return a -EBUSY -+ * to the submitter for another kick. The queue itself -+ * has yet to be programmed hence needs to remain its -+ * previous (disabled) state. If the error persists, -+ * the group will eventually reports a fatal error by -+ * the group's error reporting mechanism, when the MCU -+ * shared region map retry limit of the group is -+ * exceeded. For such a case, the expected error value -+ * is -EIO. -+ */ -+ if (unlikely(err)) { -+ queue->enabled = cs_enabled; -+ mutex_unlock(&kbdev->csf.scheduler.lock); -+ return (err != -EIO) ? -EBUSY : err; -+ } -+ } ++ /* Search for duplicate JIT ids */ ++ for (i = 0; i < (count - 1); i++) { ++ u32 j; ++ ++ for (j = (i + 1); j < count; j++) { ++ if (ids[i] == ids[j]) { ++ ret = -EINVAL; ++ goto out_free; + } -+ queue_delayed_work(system_long_wq, &kbdev->csf.scheduler.ping_work, -+ msecs_to_jiffies(kbase_get_timeout_ms( -+ kbdev, CSF_FIRMWARE_PING_TIMEOUT))); + } + } + -+ mutex_unlock(&kbdev->csf.scheduler.lock); -+ -+ if (evicted) -+ kbase_csf_term_descheduled_queue_group(group); ++ current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_FREE; ++ current_command->info.jit_free.ids = ids; ++ current_command->info.jit_free.count = count; ++ mutex_lock(&kctx->csf.kcpu_queues.jit_lock); ++ list_add_tail(¤t_command->info.jit_free.node, ++ &kctx->csf.kcpu_queues.jit_cmds_head); ++ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); + -+ return err; ++ return 0; ++out_free: ++ kfree(ids); ++out: ++ return ret; +} + -+static enum kbase_csf_csg_slot_state update_csg_slot_status( -+ struct kbase_device *kbdev, s8 slot) ++#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST ++static int kbase_csf_queue_group_suspend_prepare( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_group_suspend_info *suspend_buf, ++ struct kbase_kcpu_command *current_command) +{ -+ struct kbase_csf_csg_slot *csg_slot = -+ &kbdev->csf.scheduler.csg_slots[slot]; -+ struct kbase_csf_cmd_stream_group_info *ginfo = -+ &kbdev->csf.global_iface.groups[slot]; -+ u32 state; -+ enum kbase_csf_csg_slot_state slot_state; ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++ struct kbase_suspend_copy_buffer *sus_buf = NULL; ++ const u32 csg_suspend_buf_size = ++ kctx->kbdev->csf.global_iface.groups[0].suspend_size; ++ u64 addr = suspend_buf->buffer; ++ u64 page_addr = addr & PAGE_MASK; ++ u64 end_addr = addr + csg_suspend_buf_size - 1; ++ u64 last_page_addr = end_addr & PAGE_MASK; ++ int nr_pages = (last_page_addr - page_addr) / PAGE_SIZE + 1; ++ int pinned_pages = 0, ret = 0; ++ struct kbase_va_region *reg; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ lockdep_assert_held(&kcpu_queue->lock); + -+ state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, -+ CSG_ACK)); -+ slot_state = atomic_read(&csg_slot->state); ++ if (suspend_buf->size < csg_suspend_buf_size) ++ return -EINVAL; + -+ switch (slot_state) { -+ case CSG_SLOT_READY2RUN: -+ if ((state == CSG_ACK_STATE_START) || -+ (state == CSG_ACK_STATE_RESUME)) { -+ slot_state = CSG_SLOT_RUNNING; -+ atomic_set(&csg_slot->state, slot_state); -+ csg_slot->trigger_jiffies = jiffies; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_RUNNING, csg_slot->resident_group, -+ state); -+ dev_dbg(kbdev->dev, "Group %u running on slot %d\n", -+ csg_slot->resident_group->handle, slot); ++ ret = kbase_csf_queue_group_handle_is_valid(kctx, ++ suspend_buf->group_handle); ++ if (ret) ++ return ret; ++ ++ sus_buf = kzalloc(sizeof(*sus_buf), GFP_KERNEL); ++ if (!sus_buf) ++ return -ENOMEM; ++ ++ sus_buf->size = csg_suspend_buf_size; ++ sus_buf->nr_pages = nr_pages; ++ sus_buf->offset = addr & ~PAGE_MASK; ++ ++ sus_buf->pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); ++ if (!sus_buf->pages) { ++ ret = -ENOMEM; ++ goto out_clean_sus_buf; ++ } ++ ++ /* Check if the page_addr is a valid GPU VA from SAME_VA zone, ++ * otherwise consider it is a CPU VA corresponding to the Host ++ * memory allocated by userspace. ++ */ ++ kbase_gpu_vm_lock(kctx); ++ reg = kbase_region_tracker_find_region_enclosing_address(kctx, ++ page_addr); ++ ++ if (kbase_is_region_invalid_or_free(reg)) { ++ kbase_gpu_vm_unlock(kctx); ++ pinned_pages = get_user_pages_fast(page_addr, nr_pages, 1, ++ sus_buf->pages); ++ kbase_gpu_vm_lock(kctx); ++ ++ if (pinned_pages < 0) { ++ ret = pinned_pages; ++ goto out_clean_pages; + } -+ break; -+ case CSG_SLOT_DOWN2STOP: -+ if ((state == CSG_ACK_STATE_SUSPEND) || -+ (state == CSG_ACK_STATE_TERMINATE)) { -+ slot_state = CSG_SLOT_STOPPED; -+ atomic_set(&csg_slot->state, slot_state); -+ csg_slot->trigger_jiffies = jiffies; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, csg_slot->resident_group, state); -+ dev_dbg(kbdev->dev, "Group %u stopped on slot %d\n", -+ csg_slot->resident_group->handle, slot); ++ if (pinned_pages != nr_pages) { ++ ret = -EINVAL; ++ goto out_clean_pages; + } -+ break; -+ case CSG_SLOT_DOWN2STOP_TIMEDOUT: -+ case CSG_SLOT_READY2RUN_TIMEDOUT: -+ case CSG_SLOT_READY: -+ case CSG_SLOT_RUNNING: -+ case CSG_SLOT_STOPPED: -+ break; -+ default: -+ dev_warn(kbdev->dev, "Unknown CSG slot state %d", slot_state); -+ break; ++ } else { ++ struct tagged_addr *page_array; ++ u64 start, end, i; ++ ++ if (((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_SAME_VA) || ++ (kbase_reg_current_backed_size(reg) < nr_pages) || ++ !(reg->flags & KBASE_REG_CPU_WR) || ++ (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) || ++ (kbase_is_region_shrinkable(reg)) || (kbase_va_region_is_no_user_free(reg))) { ++ ret = -EINVAL; ++ goto out_clean_pages; ++ } ++ ++ start = PFN_DOWN(page_addr) - reg->start_pfn; ++ end = start + nr_pages; ++ ++ if (end > reg->nr_pages) { ++ ret = -EINVAL; ++ goto out_clean_pages; ++ } ++ ++ sus_buf->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); ++ kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc); ++ page_array = kbase_get_cpu_phy_pages(reg); ++ page_array += start; ++ ++ for (i = 0; i < nr_pages; i++, page_array++) ++ sus_buf->pages[i] = as_page(*page_array); + } + -+ return slot_state; -+} ++ kbase_gpu_vm_unlock(kctx); ++ current_command->type = BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND; ++ current_command->info.suspend_buf_copy.sus_buf = sus_buf; ++ current_command->info.suspend_buf_copy.group_handle = ++ suspend_buf->group_handle; ++ return ret; + -+static bool csg_slot_running(struct kbase_device *kbdev, s8 slot) -+{ -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++out_clean_pages: ++ kbase_gpu_vm_unlock(kctx); ++ kfree(sus_buf->pages); ++out_clean_sus_buf: ++ kfree(sus_buf); + -+ return (update_csg_slot_status(kbdev, slot) == CSG_SLOT_RUNNING); ++ return ret; +} + -+static bool csg_slot_stopped_locked(struct kbase_device *kbdev, s8 slot) ++static int kbase_csf_queue_group_suspend_process(struct kbase_context *kctx, ++ struct kbase_suspend_copy_buffer *sus_buf, ++ u8 group_handle) +{ -+ enum kbase_csf_csg_slot_state slot_state; ++ return kbase_csf_queue_group_suspend(kctx, sus_buf, group_handle); ++} ++#endif + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++static enum kbase_csf_event_callback_action event_cqs_callback(void *param) ++{ ++ struct kbase_kcpu_command_queue *kcpu_queue = ++ (struct kbase_kcpu_command_queue *)param; + -+ slot_state = update_csg_slot_status(kbdev, slot); ++ queue_work(kcpu_queue->wq, &kcpu_queue->work); + -+ return (slot_state == CSG_SLOT_STOPPED || -+ slot_state == CSG_SLOT_READY); ++ return KBASE_CSF_EVENT_CALLBACK_KEEP; +} + -+static bool csg_slot_stopped_raw(struct kbase_device *kbdev, s8 slot) ++static void cleanup_cqs_wait(struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command_cqs_wait_info *cqs_wait) +{ -+ struct kbase_csf_cmd_stream_group_info *ginfo = -+ &kbdev->csf.global_iface.groups[slot]; -+ u32 state; -+ -+ state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, -+ CSG_ACK)); ++ WARN_ON(!cqs_wait->nr_objs); ++ WARN_ON(!cqs_wait->objs); ++ WARN_ON(!cqs_wait->signaled); ++ WARN_ON(!queue->cqs_wait_count); + -+ if (state == CSG_ACK_STATE_SUSPEND || state == CSG_ACK_STATE_TERMINATE) { -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, kbdev->csf.scheduler.csg_slots[slot].resident_group, state); -+ dev_dbg(kbdev->dev, "(raw status) slot %d stopped\n", slot); -+ return true; ++ if (--queue->cqs_wait_count == 0) { ++ kbase_csf_event_wait_remove(queue->kctx, ++ event_cqs_callback, queue); + } + -+ return false; ++ kfree(cqs_wait->signaled); ++ kfree(cqs_wait->objs); ++ cqs_wait->signaled = NULL; ++ cqs_wait->objs = NULL; +} + -+static void halt_csg_slot(struct kbase_queue_group *group, bool suspend) ++static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev, ++ struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command_cqs_wait_info *cqs_wait) +{ -+ struct kbase_device *kbdev = group->kctx->kbdev; -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; -+ struct kbase_csf_csg_slot *csg_slot = -+ kbdev->csf.scheduler.csg_slots; -+ s8 slot; ++ u32 i; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ lockdep_assert_held(&queue->lock); + -+ if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) -+ return; ++ if (WARN_ON(!cqs_wait->objs)) ++ return -EINVAL; + -+ slot = group->csg_nr; ++ /* Skip the CQS waits that have already been signaled when processing */ ++ for (i = find_first_zero_bit(cqs_wait->signaled, cqs_wait->nr_objs); i < cqs_wait->nr_objs; i++) { ++ if (!test_bit(i, cqs_wait->signaled)) { ++ struct kbase_vmap_struct *mapping; ++ bool sig_set; ++ u32 *evt = (u32 *)kbase_phy_alloc_mapping_get(queue->kctx, ++ cqs_wait->objs[i].addr, &mapping); + -+ /* When in transition, wait for it to complete */ -+ if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) { -+ long remaining = -+ kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ if (!queue->command_started) { ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START(kbdev, ++ queue); ++ queue->command_started = true; ++ KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_WAIT_START, ++ queue, cqs_wait->nr_objs, 0); ++ } + -+ dev_dbg(kbdev->dev, "slot %d wait for up-running\n", slot); -+ remaining = wait_event_timeout(kbdev->csf.event_wait, -+ csg_slot_running(kbdev, slot), remaining); -+ if (!remaining) -+ dev_warn(kbdev->dev, -+ "[%llu] slot %d timeout (%d ms) on up-running\n", -+ kbase_backend_get_cycle_cnt(kbdev), -+ slot, kbdev->csf.fw_timeout_ms); -+ } ++ if (!evt) { ++ dev_warn(kbdev->dev, ++ "Sync memory %llx already freed", cqs_wait->objs[i].addr); ++ queue->has_error = true; ++ return -EINVAL; ++ } + -+ if (csg_slot_running(kbdev, slot)) { -+ unsigned long flags; -+ struct kbase_csf_cmd_stream_group_info *ginfo = -+ &global_iface->groups[slot]; ++ sig_set = ++ evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)] > cqs_wait->objs[i].val; ++ if (sig_set) { ++ bool error = false; + -+ u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND : -+ CSG_REQ_STATE_TERMINATE; ++ bitmap_set(cqs_wait->signaled, i, 1); ++ if ((cqs_wait->inherit_err_flags & (1U << i)) && ++ evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)] > 0) { ++ queue->has_error = true; ++ error = true; ++ } + -+ dev_dbg(kbdev->dev, "Halting(suspend=%d) group %d of context %d_%d on slot %d", -+ suspend, group->handle, group->kctx->tgid, group->kctx->id, slot); ++ KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_WAIT_END, ++ queue, cqs_wait->objs[i].addr, ++ error); + -+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); -+ /* Set state to SUSPEND/TERMINATE */ -+ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, halt_cmd, -+ CSG_REQ_STATE_MASK); -+ kbase_csf_ring_csg_doorbell(kbdev, slot); -+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, -+ flags); -+ atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP); -+ csg_slot[slot].trigger_jiffies = jiffies; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd); ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END( ++ kbdev, queue, evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)]); ++ queue->command_started = false; ++ } + -+ KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG( -+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot, suspend); ++ kbase_phy_alloc_mapping_put(queue->kctx, mapping); ++ ++ if (!sig_set) ++ break; ++ } + } -+} + -+static void term_csg_slot(struct kbase_queue_group *group) -+{ -+ halt_csg_slot(group, false); ++ /* For the queue to progress further, all cqs objects should get ++ * signaled. ++ */ ++ return bitmap_full(cqs_wait->signaled, cqs_wait->nr_objs); +} + -+static void suspend_csg_slot(struct kbase_queue_group *group) ++static inline bool kbase_kcpu_cqs_is_data_type_valid(u8 data_type) +{ -+ halt_csg_slot(group, true); ++ return data_type == BASEP_CQS_DATA_TYPE_U32 || data_type == BASEP_CQS_DATA_TYPE_U64; +} + -+static bool csf_wait_ge_condition_supported(struct kbase_device *kbdev) ++static inline bool kbase_kcpu_cqs_is_aligned(u64 addr, u8 data_type) +{ -+ const uint32_t glb_major = GLB_VERSION_MAJOR_GET(kbdev->csf.global_iface.version); -+ const uint32_t glb_minor = GLB_VERSION_MINOR_GET(kbdev->csf.global_iface.version); ++ BUILD_BUG_ON(BASEP_EVENT32_ALIGN_BYTES != BASEP_EVENT32_SIZE_BYTES); ++ BUILD_BUG_ON(BASEP_EVENT64_ALIGN_BYTES != BASEP_EVENT64_SIZE_BYTES); ++ WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(data_type)); + -+ switch (glb_major) { -+ case 0: -+ break; -+ case 1: -+ if (glb_minor >= 4) -+ return true; -+ break; -+ case 2: -+ if (glb_minor >= 6) -+ return true; -+ break; -+ case 3: -+ if (glb_minor >= 6) -+ return true; -+ break; ++ switch (data_type) { + default: -+ return true; ++ return false; ++ case BASEP_CQS_DATA_TYPE_U32: ++ return (addr & (BASEP_EVENT32_ALIGN_BYTES - 1)) == 0; ++ case BASEP_CQS_DATA_TYPE_U64: ++ return (addr & (BASEP_EVENT64_ALIGN_BYTES - 1)) == 0; + } -+ return false; +} -+/** -+ * evaluate_sync_update() - Evaluate the sync wait condition the GPU command -+ * queue has been blocked on. -+ * -+ * @queue: Pointer to the GPU command queue -+ * -+ * Return: true if sync wait condition is satisfied. -+ */ -+static bool evaluate_sync_update(struct kbase_queue *queue) -+{ -+ struct kbase_vmap_struct *mapping; -+ bool updated = false; -+ u32 *sync_ptr; -+ u32 sync_wait_size; -+ u32 sync_wait_align_mask; -+ u32 sync_wait_cond; -+ u32 sync_current_val; -+ struct kbase_device *kbdev; -+ bool sync_wait_align_valid = false; -+ bool sync_wait_cond_valid = false; -+ -+ if (WARN_ON(!queue)) -+ return false; + -+ kbdev = queue->kctx->kbdev; ++static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue, ++ struct base_kcpu_command_cqs_wait_info *cqs_wait_info, ++ struct kbase_kcpu_command *current_command) ++{ ++ struct base_cqs_wait_info *objs; ++ unsigned int nr_objs = cqs_wait_info->nr_objs; ++ unsigned int i; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ lockdep_assert_held(&queue->lock); + -+ sync_wait_size = CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(queue->status_wait); -+ sync_wait_align_mask = -+ (sync_wait_size == 0 ? BASEP_EVENT32_ALIGN_BYTES : BASEP_EVENT64_ALIGN_BYTES) - 1; -+ sync_wait_align_valid = ((uintptr_t)queue->sync_ptr & sync_wait_align_mask) == 0; -+ if (!sync_wait_align_valid) { -+ dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX is misaligned", -+ queue->sync_ptr); -+ goto out; -+ } ++ if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) ++ return -EINVAL; + -+ sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr, -+ &mapping); ++ if (!nr_objs) ++ return -EINVAL; + -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_START, queue->group, queue, -+ queue->sync_ptr); -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_BLOCKED_REASON, queue->group, queue, -+ queue->blocked_reason); ++ objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); ++ if (!objs) ++ return -ENOMEM; + -+ if (!sync_ptr) { -+ dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX already freed", -+ queue->sync_ptr); -+ goto out; ++ if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_info->objs), ++ nr_objs * sizeof(*objs))) { ++ kfree(objs); ++ return -ENOMEM; + } + -+ sync_wait_cond = -+ CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(queue->status_wait); -+ sync_wait_cond_valid = (sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) || -+ (sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) || -+ ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE) && -+ csf_wait_ge_condition_supported(kbdev)); ++ /* Check the CQS objects as early as possible. By checking their alignment ++ * (required alignment equals to size for Sync32 and Sync64 objects), we can ++ * prevent overrunning the supplied event page. ++ */ ++ for (i = 0; i < nr_objs; i++) { ++ if (!kbase_kcpu_cqs_is_aligned(objs[i].addr, BASEP_CQS_DATA_TYPE_U32)) { ++ kfree(objs); ++ return -EINVAL; ++ } ++ } + -+ WARN_ON(!sync_wait_cond_valid); ++ if (++queue->cqs_wait_count == 1) { ++ if (kbase_csf_event_wait_add(queue->kctx, ++ event_cqs_callback, queue)) { ++ kfree(objs); ++ queue->cqs_wait_count--; ++ return -ENOMEM; ++ } ++ } + -+ sync_current_val = READ_ONCE(*sync_ptr); -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_CUR_VAL, queue->group, queue, -+ sync_current_val); ++ current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_WAIT; ++ current_command->info.cqs_wait.nr_objs = nr_objs; ++ current_command->info.cqs_wait.objs = objs; ++ current_command->info.cqs_wait.inherit_err_flags = ++ cqs_wait_info->inherit_err_flags; + -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_TEST_VAL, queue->group, queue, -+ queue->sync_value); ++ current_command->info.cqs_wait.signaled = kcalloc(BITS_TO_LONGS(nr_objs), ++ sizeof(*current_command->info.cqs_wait.signaled), GFP_KERNEL); ++ if (!current_command->info.cqs_wait.signaled) { ++ if (--queue->cqs_wait_count == 0) { ++ kbase_csf_event_wait_remove(queue->kctx, ++ event_cqs_callback, queue); ++ } + -+ if (((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) && -+ (sync_current_val > queue->sync_value)) || -+ ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE) && -+ (sync_current_val >= queue->sync_value) && csf_wait_ge_condition_supported(kbdev)) || -+ ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) && -+ (sync_current_val <= queue->sync_value))) { -+ /* The sync wait condition is satisfied so the group to which -+ * queue is bound can be re-scheduled. -+ */ -+ updated = true; -+ } else { -+ dev_dbg(queue->kctx->kbdev->dev, -+ "sync memory not updated yet(%u)", sync_current_val); ++ kfree(objs); ++ return -ENOMEM; + } + -+ kbase_phy_alloc_mapping_put(queue->kctx, mapping); -+out: -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_END, queue->group, queue, updated); -+ return updated; ++ return 0; +} + -+/** -+ * save_slot_cs() - Save the state for blocked GPU command queue. -+ * -+ * @ginfo: Pointer to the CSG interface used by the group -+ * the queue is bound to. -+ * @queue: Pointer to the GPU command queue. -+ * -+ * This function will check if GPU command queue is blocked on a sync wait and -+ * evaluate the wait condition. If the wait condition isn't satisfied it would -+ * save the state needed to reevaluate the condition in future. -+ * The group to which queue is bound shall be in idle state. -+ * -+ * Return: true if the queue is blocked on a sync wait operation. -+ */ -+static -+bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo, -+ struct kbase_queue *queue) ++static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev, ++ struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command_cqs_set_info *cqs_set) +{ -+ struct kbase_csf_cmd_stream_info *const stream = -+ &ginfo->streams[queue->csi_index]; -+ u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT); -+ bool is_waiting = false; ++ unsigned int i; + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ u64 cmd_ptr = kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_LO); ++ lockdep_assert_held(&queue->lock); + -+ cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_HI) << 32; -+ queue->saved_cmd_ptr = cmd_ptr; -+#endif ++ if (WARN_ON(!cqs_set->objs)) ++ return; + -+ KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, -+ queue, status); ++ for (i = 0; i < cqs_set->nr_objs; i++) { ++ struct kbase_vmap_struct *mapping; ++ u32 *evt; + -+ if (CS_STATUS_WAIT_SYNC_WAIT_GET(status) || CS_STATUS_WAIT_SB_MASK_GET(status)) { -+ queue->status_wait = status; -+ queue->sync_ptr = kbase_csf_firmware_cs_output(stream, -+ CS_STATUS_WAIT_SYNC_POINTER_LO); -+ queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output(stream, -+ CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; -+ queue->sync_value = kbase_csf_firmware_cs_output(stream, -+ CS_STATUS_WAIT_SYNC_VALUE); ++ evt = (u32 *)kbase_phy_alloc_mapping_get( ++ queue->kctx, cqs_set->objs[i].addr, &mapping); + -+ queue->sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET( -+ kbase_csf_firmware_cs_output(stream, -+ CS_STATUS_SCOREBOARDS)); -+ queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_GET( -+ kbase_csf_firmware_cs_output(stream, -+ CS_STATUS_BLOCKED_REASON)); ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue, evt ? 0 : 1); + -+ if ((queue->blocked_reason == CS_STATUS_BLOCKED_ON_SB_WAIT) || -+ !evaluate_sync_update(queue)) { -+ is_waiting = true; ++ if (!evt) { ++ dev_warn(kbdev->dev, ++ "Sync memory %llx already freed", cqs_set->objs[i].addr); ++ queue->has_error = true; + } else { -+ /* Sync object already got updated & met the condition -+ * thus it doesn't need to be reevaluated and so can -+ * clear the 'status_wait' here. -+ */ -+ queue->status_wait = 0; ++ evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)] = queue->has_error; ++ /* Set to signaled */ ++ evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)]++; ++ kbase_phy_alloc_mapping_put(queue->kctx, mapping); ++ ++ KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_SET, queue, cqs_set->objs[i].addr, ++ evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)]); + } -+ } else { -+ /* Invalidate wait status info that would have been recorded if -+ * this queue was blocked when the group (in idle state) was -+ * suspended previously. After that the group could have been -+ * unblocked due to the kicking of another queue bound to it & -+ * so the wait status info would have stuck with this queue. -+ */ -+ queue->status_wait = 0; + } + -+ return is_waiting; ++ kbase_csf_event_signal_notify_gpu(queue->kctx); ++ ++ kfree(cqs_set->objs); ++ cqs_set->objs = NULL; +} + -+static void schedule_in_cycle(struct kbase_queue_group *group, bool force) ++static int kbase_kcpu_cqs_set_prepare( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_cqs_set_info *cqs_set_info, ++ struct kbase_kcpu_command *current_command) +{ -+ struct kbase_context *kctx = group->kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct base_cqs_set *objs; ++ unsigned int nr_objs = cqs_set_info->nr_objs; ++ unsigned int i; + -+ lockdep_assert_held(&scheduler->lock); ++ lockdep_assert_held(&kcpu_queue->lock); + -+ /* Only try to schedule work for this event if no requests are pending, -+ * otherwise the function will end up canceling previous work requests, -+ * and scheduler is configured to wake up periodically (or the schedule -+ * of work needs to be enforced in situation such as entering into -+ * protected mode). ++ if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) ++ return -EINVAL; ++ ++ if (!nr_objs) ++ return -EINVAL; ++ ++ objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); ++ if (!objs) ++ return -ENOMEM; ++ ++ if (copy_from_user(objs, u64_to_user_ptr(cqs_set_info->objs), ++ nr_objs * sizeof(*objs))) { ++ kfree(objs); ++ return -ENOMEM; ++ } ++ ++ /* Check the CQS objects as early as possible. By checking their alignment ++ * (required alignment equals to size for Sync32 and Sync64 objects), we can ++ * prevent overrunning the supplied event page. + */ -+ if (likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) { -+ dev_dbg(kbdev->dev, "Kicking async for group %d\n", -+ group->handle); -+ kbase_csf_scheduler_invoke_tock(kbdev); ++ for (i = 0; i < nr_objs; i++) { ++ if (!kbase_kcpu_cqs_is_aligned(objs[i].addr, BASEP_CQS_DATA_TYPE_U32)) { ++ kfree(objs); ++ return -EINVAL; ++ } + } ++ ++ current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET; ++ current_command->info.cqs_set.nr_objs = nr_objs; ++ current_command->info.cqs_set.objs = objs; ++ ++ return 0; +} + -+static void ktrace_log_group_state(struct kbase_queue_group *const group) ++static void cleanup_cqs_wait_operation(struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation) +{ -+ switch (group->run_state) { -+ case KBASE_CSF_GROUP_INACTIVE: -+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group, -+ group->run_state); -+ break; -+ case KBASE_CSF_GROUP_RUNNABLE: -+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_RUNNABLE, group, -+ group->run_state); -+ break; -+ case KBASE_CSF_GROUP_IDLE: -+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_IDLE, group, -+ group->run_state); -+ break; -+ case KBASE_CSF_GROUP_SUSPENDED: -+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED, group, -+ group->run_state); -+ break; -+ case KBASE_CSF_GROUP_SUSPENDED_ON_IDLE: -+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED_ON_IDLE, group, -+ group->run_state); -+ break; -+ case KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC: -+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED_ON_WAIT_SYNC, -+ group, group->run_state); -+ break; -+ case KBASE_CSF_GROUP_FAULT_EVICTED: -+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_FAULT_EVICTED, group, -+ group->run_state); -+ break; -+ case KBASE_CSF_GROUP_TERMINATED: -+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group, -+ group->run_state); -+ break; ++ WARN_ON(!cqs_wait_operation->nr_objs); ++ WARN_ON(!cqs_wait_operation->objs); ++ WARN_ON(!cqs_wait_operation->signaled); ++ WARN_ON(!queue->cqs_wait_count); ++ ++ if (--queue->cqs_wait_count == 0) { ++ kbase_csf_event_wait_remove(queue->kctx, ++ event_cqs_callback, queue); + } ++ ++ kfree(cqs_wait_operation->signaled); ++ kfree(cqs_wait_operation->objs); ++ cqs_wait_operation->signaled = NULL; ++ cqs_wait_operation->objs = NULL; +} + -+static -+void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, -+ struct kbase_queue_group *const group, -+ enum kbase_csf_group_state run_state) ++static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, ++ struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation) +{ -+ struct kbase_context *const kctx = group->kctx; -+ struct kbase_device *const kbdev = kctx->kbdev; ++ u32 i; + -+ lockdep_assert_held(&scheduler->lock); ++ lockdep_assert_held(&queue->lock); + -+ WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE); ++ if (WARN_ON(!cqs_wait_operation->objs)) ++ return -EINVAL; + -+ if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) -+ return; ++ /* Skip the CQS waits that have already been signaled when processing */ ++ for (i = find_first_zero_bit(cqs_wait_operation->signaled, cqs_wait_operation->nr_objs); i < cqs_wait_operation->nr_objs; i++) { ++ if (!test_bit(i, cqs_wait_operation->signaled)) { ++ struct kbase_vmap_struct *mapping; ++ bool sig_set; ++ uintptr_t evt = (uintptr_t)kbase_phy_alloc_mapping_get( ++ queue->kctx, cqs_wait_operation->objs[i].addr, &mapping); ++ u64 val = 0; + -+ group->run_state = run_state; ++ if (!queue->command_started) { ++ queue->command_started = true; ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START( ++ kbdev, queue); ++ } + -+ ktrace_log_group_state(group); + -+ if (run_state == KBASE_CSF_GROUP_RUNNABLE) -+ group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID; ++ if (!evt) { ++ dev_warn(kbdev->dev, ++ "Sync memory %llx already freed", cqs_wait_operation->objs[i].addr); ++ queue->has_error = true; ++ return -EINVAL; ++ } + -+ list_add_tail(&group->link, -+ &kctx->csf.sched.runnable_groups[group->priority]); -+ kctx->csf.sched.num_runnable_grps++; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_INSERT, group, -+ kctx->csf.sched.num_runnable_grps); ++ switch (cqs_wait_operation->objs[i].data_type) { ++ default: ++ WARN_ON(!kbase_kcpu_cqs_is_data_type_valid( ++ cqs_wait_operation->objs[i].data_type)); ++ kbase_phy_alloc_mapping_put(queue->kctx, mapping); ++ queue->has_error = true; ++ return -EINVAL; ++ case BASEP_CQS_DATA_TYPE_U32: ++ val = *(u32 *)evt; ++ evt += BASEP_EVENT32_ERR_OFFSET - BASEP_EVENT32_VAL_OFFSET; ++ break; ++ case BASEP_CQS_DATA_TYPE_U64: ++ val = *(u64 *)evt; ++ evt += BASEP_EVENT64_ERR_OFFSET - BASEP_EVENT64_VAL_OFFSET; ++ break; ++ } + -+ /* Add the kctx if not yet in runnable kctxs */ -+ if (kctx->csf.sched.num_runnable_grps == 1) { -+ /* First runnable csg, adds to the runnable_kctxs */ -+ INIT_LIST_HEAD(&kctx->csf.link); -+ list_add_tail(&kctx->csf.link, &scheduler->runnable_kctxs); -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_INSERT, kctx, 0u); -+ } ++ switch (cqs_wait_operation->objs[i].operation) { ++ case BASEP_CQS_WAIT_OPERATION_LE: ++ sig_set = val <= cqs_wait_operation->objs[i].val; ++ break; ++ case BASEP_CQS_WAIT_OPERATION_GT: ++ sig_set = val > cqs_wait_operation->objs[i].val; ++ break; ++ default: ++ dev_dbg(kbdev->dev, ++ "Unsupported CQS wait operation %d", cqs_wait_operation->objs[i].operation); + -+ scheduler->total_runnable_grps++; ++ kbase_phy_alloc_mapping_put(queue->kctx, mapping); ++ queue->has_error = true; + -+ if (likely(scheduler_timer_is_enabled_nolock(kbdev)) && -+ (scheduler->total_runnable_grps == 1 || -+ scheduler->state == SCHED_SUSPENDED || -+ scheduler->state == SCHED_SLEEPING)) { -+ dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n"); -+ /* Fire a scheduling to start the time-slice */ -+ enqueue_tick_work(kbdev); -+ } else -+ schedule_in_cycle(group, false); ++ return -EINVAL; ++ } + -+ /* Since a new group has become runnable, check if GPU needs to be -+ * powered up. ++ if (sig_set) { ++ bitmap_set(cqs_wait_operation->signaled, i, 1); ++ if ((cqs_wait_operation->inherit_err_flags & (1U << i)) && ++ *(u32 *)evt > 0) { ++ queue->has_error = true; ++ } ++ ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END( ++ kbdev, queue, *(u32 *)evt); ++ ++ queue->command_started = false; ++ } ++ ++ kbase_phy_alloc_mapping_put(queue->kctx, mapping); ++ ++ if (!sig_set) ++ break; ++ } ++ } ++ ++ /* For the queue to progress further, all cqs objects should get ++ * signaled. + */ -+ scheduler_wakeup(kbdev, false); ++ return bitmap_full(cqs_wait_operation->signaled, cqs_wait_operation->nr_objs); +} + -+static -+void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, -+ struct kbase_queue_group *group, -+ enum kbase_csf_group_state run_state) ++static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue *queue, ++ struct base_kcpu_command_cqs_wait_operation_info *cqs_wait_operation_info, ++ struct kbase_kcpu_command *current_command) +{ -+ struct kbase_context *kctx = group->kctx; -+ struct kbase_queue_group *new_head_grp; -+ struct list_head *list = -+ &kctx->csf.sched.runnable_groups[group->priority]; -+ unsigned long flags; ++ struct base_cqs_wait_operation_info *objs; ++ unsigned int nr_objs = cqs_wait_operation_info->nr_objs; ++ unsigned int i; + -+ lockdep_assert_held(&scheduler->lock); ++ lockdep_assert_held(&queue->lock); + -+ WARN_ON(!queue_group_scheduled_locked(group)); ++ if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) ++ return -EINVAL; + -+ group->run_state = run_state; ++ if (!nr_objs) ++ return -EINVAL; + -+ ktrace_log_group_state(group); ++ objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); ++ if (!objs) ++ return -ENOMEM; + -+ list_del_init(&group->link); ++ if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_operation_info->objs), ++ nr_objs * sizeof(*objs))) { ++ kfree(objs); ++ return -ENOMEM; ++ } + -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); -+ /* The below condition will be true when the group running in protected -+ * mode is being terminated but the protected mode exit interrupt was't -+ * received. This can happen if the FW got stuck during protected mode -+ * for some reason (like GPU page fault or some internal error). -+ * In normal cases FW is expected to send the protected mode exit -+ * interrupt before it handles the CSG termination request. ++ /* Check the CQS objects as early as possible. By checking their alignment ++ * (required alignment equals to size for Sync32 and Sync64 objects), we can ++ * prevent overrunning the supplied event page. + */ -+ if (unlikely(scheduler->active_protm_grp == group)) { -+ /* CSG slot cleanup should have happened for the pmode group */ -+ WARN_ON(kbasep_csf_scheduler_group_is_on_slot_locked(group)); -+ WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE); -+ /* Initiate a GPU reset, in case it wasn't initiated yet, -+ * in order to rectify the anomaly. -+ */ -+ if (kbase_prepare_to_reset_gpu(kctx->kbdev, RESET_FLAGS_NONE)) -+ kbase_reset_gpu(kctx->kbdev); -+ -+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_PROTM_EXIT, -+ scheduler->active_protm_grp, 0u); -+ scheduler->active_protm_grp = NULL; ++ for (i = 0; i < nr_objs; i++) { ++ if (!kbase_kcpu_cqs_is_data_type_valid(objs[i].data_type) || ++ !kbase_kcpu_cqs_is_aligned(objs[i].addr, objs[i].data_type)) { ++ kfree(objs); ++ return -EINVAL; ++ } + } -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); -+ -+ if (scheduler->top_grp == group) { -+ /* -+ * Note: this disables explicit rotation in the next scheduling -+ * cycle. However, removing the top_grp is the same as an -+ * implicit rotation (e.g. if we instead rotated the top_ctx -+ * and then remove top_grp) -+ * -+ * This implicit rotation is assumed by the scheduler rotate -+ * functions. -+ */ -+ scheduler->top_grp = NULL; + -+ /* -+ * Trigger a scheduling tock for a CSG containing protected -+ * content in case there has been any in order to minimise -+ * latency. -+ */ -+ group = scheduler_get_protm_enter_async_group(kctx->kbdev, -+ NULL); -+ if (group) -+ schedule_in_cycle(group, true); ++ if (++queue->cqs_wait_count == 1) { ++ if (kbase_csf_event_wait_add(queue->kctx, ++ event_cqs_callback, queue)) { ++ kfree(objs); ++ queue->cqs_wait_count--; ++ return -ENOMEM; ++ } + } + -+ kctx->csf.sched.num_runnable_grps--; -+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_REMOVE, group, -+ kctx->csf.sched.num_runnable_grps); -+ new_head_grp = (!list_empty(list)) ? -+ list_first_entry(list, struct kbase_queue_group, link) : -+ NULL; -+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u); ++ current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION; ++ current_command->info.cqs_wait_operation.nr_objs = nr_objs; ++ current_command->info.cqs_wait_operation.objs = objs; ++ current_command->info.cqs_wait_operation.inherit_err_flags = ++ cqs_wait_operation_info->inherit_err_flags; + -+ if (kctx->csf.sched.num_runnable_grps == 0) { -+ struct kbase_context *new_head_kctx; -+ struct list_head *kctx_list = &scheduler->runnable_kctxs; -+ /* drop the kctx */ -+ list_del_init(&kctx->csf.link); -+ if (scheduler->top_ctx == kctx) -+ scheduler->top_ctx = NULL; -+ KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_REMOVE, kctx, 0u); -+ new_head_kctx = (!list_empty(kctx_list)) ? -+ list_first_entry(kctx_list, struct kbase_context, csf.link) : -+ NULL; -+ KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx, 0u); -+ } ++ current_command->info.cqs_wait_operation.signaled = kcalloc(BITS_TO_LONGS(nr_objs), ++ sizeof(*current_command->info.cqs_wait_operation.signaled), GFP_KERNEL); ++ if (!current_command->info.cqs_wait_operation.signaled) { ++ if (--queue->cqs_wait_count == 0) { ++ kbase_csf_event_wait_remove(queue->kctx, ++ event_cqs_callback, queue); ++ } + -+ WARN_ON(scheduler->total_runnable_grps == 0); -+ scheduler->total_runnable_grps--; -+ if (!scheduler->total_runnable_grps) { -+ dev_dbg(kctx->kbdev->dev, "Scheduler idle has no runnable groups"); -+ cancel_tick_timer(kctx->kbdev); -+ WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps)); -+ if (scheduler->state != SCHED_SUSPENDED) -+ enqueue_gpu_idle_work(scheduler); ++ kfree(objs); ++ return -ENOMEM; + } -+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, -+ scheduler->num_active_address_spaces | -+ (((u64)scheduler->total_runnable_grps) << 32)); ++ ++ return 0; +} + -+static void insert_group_to_idle_wait(struct kbase_queue_group *const group) ++static void kbasep_kcpu_cqs_do_set_operation_32(struct kbase_kcpu_command_queue *queue, ++ uintptr_t evt, u8 operation, u64 val) +{ -+ struct kbase_context *kctx = group->kctx; ++ struct kbase_device *kbdev = queue->kctx->kbdev; + -+ lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); ++ switch (operation) { ++ case BASEP_CQS_SET_OPERATION_ADD: ++ *(u32 *)evt += (u32)val; ++ break; ++ case BASEP_CQS_SET_OPERATION_SET: ++ *(u32 *)evt = val; ++ break; ++ default: ++ dev_dbg(kbdev->dev, "Unsupported CQS set operation %d", operation); ++ queue->has_error = true; ++ break; ++ } ++} + -+ WARN_ON(group->run_state != KBASE_CSF_GROUP_IDLE); ++static void kbasep_kcpu_cqs_do_set_operation_64(struct kbase_kcpu_command_queue *queue, ++ uintptr_t evt, u8 operation, u64 val) ++{ ++ struct kbase_device *kbdev = queue->kctx->kbdev; + -+ list_add_tail(&group->link, &kctx->csf.sched.idle_wait_groups); -+ kctx->csf.sched.num_idle_wait_grps++; -+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_INSERT, group, -+ kctx->csf.sched.num_idle_wait_grps); -+ group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC; -+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, CSF_GROUP_SUSPENDED_ON_WAIT_SYNC, group, -+ group->run_state); -+ dev_dbg(kctx->kbdev->dev, -+ "Group-%d suspended on sync_wait, total wait_groups: %u\n", -+ group->handle, kctx->csf.sched.num_idle_wait_grps); ++ switch (operation) { ++ case BASEP_CQS_SET_OPERATION_ADD: ++ *(u64 *)evt += val; ++ break; ++ case BASEP_CQS_SET_OPERATION_SET: ++ *(u64 *)evt = val; ++ break; ++ default: ++ dev_dbg(kbdev->dev, "Unsupported CQS set operation %d", operation); ++ queue->has_error = true; ++ break; ++ } +} + -+static void remove_group_from_idle_wait(struct kbase_queue_group *const group) ++static void kbase_kcpu_cqs_set_operation_process( ++ struct kbase_device *kbdev, ++ struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command_cqs_set_operation_info *cqs_set_operation) +{ -+ struct kbase_context *kctx = group->kctx; -+ struct list_head *list = &kctx->csf.sched.idle_wait_groups; -+ struct kbase_queue_group *new_head_grp; ++ unsigned int i; + -+ lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); ++ lockdep_assert_held(&queue->lock); + -+ WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC); ++ if (WARN_ON(!cqs_set_operation->objs)) ++ return; + -+ list_del_init(&group->link); -+ WARN_ON(kctx->csf.sched.num_idle_wait_grps == 0); -+ kctx->csf.sched.num_idle_wait_grps--; -+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_REMOVE, group, -+ kctx->csf.sched.num_idle_wait_grps); -+ new_head_grp = (!list_empty(list)) ? -+ list_first_entry(list, struct kbase_queue_group, link) : -+ NULL; -+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_HEAD, new_head_grp, 0u); -+ group->run_state = KBASE_CSF_GROUP_INACTIVE; -+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, CSF_GROUP_INACTIVE, group, group->run_state); -+} ++ for (i = 0; i < cqs_set_operation->nr_objs; i++) { ++ struct kbase_vmap_struct *mapping; ++ uintptr_t evt; + -+static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler, -+ struct kbase_queue_group *group) -+{ -+ lockdep_assert_held(&scheduler->lock); ++ evt = (uintptr_t)kbase_phy_alloc_mapping_get( ++ queue->kctx, cqs_set_operation->objs[i].addr, &mapping); + -+ if (WARN_ON(!group)) -+ return; ++ if (!evt) { ++ dev_warn(kbdev->dev, ++ "Sync memory %llx already freed", cqs_set_operation->objs[i].addr); ++ queue->has_error = true; ++ } else { ++ struct base_cqs_set_operation_info *obj = &cqs_set_operation->objs[i]; + -+ remove_group_from_runnable(scheduler, group, KBASE_CSF_GROUP_IDLE); -+ insert_group_to_idle_wait(group); -+} ++ switch (obj->data_type) { ++ default: ++ WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(obj->data_type)); ++ queue->has_error = true; ++ goto skip_err_propagation; ++ case BASEP_CQS_DATA_TYPE_U32: ++ kbasep_kcpu_cqs_do_set_operation_32(queue, evt, obj->operation, ++ obj->val); ++ evt += BASEP_EVENT32_ERR_OFFSET - BASEP_EVENT32_VAL_OFFSET; ++ break; ++ case BASEP_CQS_DATA_TYPE_U64: ++ kbasep_kcpu_cqs_do_set_operation_64(queue, evt, obj->operation, ++ obj->val); ++ evt += BASEP_EVENT64_ERR_OFFSET - BASEP_EVENT64_VAL_OFFSET; ++ break; ++ } + -+static void update_offslot_non_idle_cnt(struct kbase_queue_group *group) -+{ -+ struct kbase_device *kbdev = group->kctx->kbdev; -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION( ++ kbdev, queue, *(u32 *)evt ? 1 : 0); + -+ lockdep_assert_held(&scheduler->lock); ++ /* Always propagate errors */ ++ *(u32 *)evt = queue->has_error; + -+ if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) { -+ int new_val = -+ atomic_dec_return(&scheduler->non_idle_offslot_grps); -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val); ++skip_err_propagation: ++ kbase_phy_alloc_mapping_put(queue->kctx, mapping); ++ } + } ++ ++ kbase_csf_event_signal_notify_gpu(queue->kctx); ++ ++ kfree(cqs_set_operation->objs); ++ cqs_set_operation->objs = NULL; +} + -+static void update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group *group) ++static int kbase_kcpu_cqs_set_operation_prepare( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_cqs_set_operation_info *cqs_set_operation_info, ++ struct kbase_kcpu_command *current_command) +{ -+ struct kbase_device *kbdev = group->kctx->kbdev; -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ struct base_cqs_set_operation_info *objs; ++ unsigned int nr_objs = cqs_set_operation_info->nr_objs; ++ unsigned int i; + -+ lockdep_assert_held(&scheduler->lock); ++ lockdep_assert_held(&kcpu_queue->lock); + -+ WARN_ON(group->csg_nr < 0); ++ if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) ++ return -EINVAL; + -+ if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) { -+ int new_val = -+ atomic_dec_return(&scheduler->non_idle_offslot_grps); -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val); -+ } -+} ++ if (!nr_objs) ++ return -EINVAL; + -+static void update_offslot_non_idle_cnt_on_grp_suspend( -+ struct kbase_queue_group *group) -+{ -+ struct kbase_device *kbdev = group->kctx->kbdev; -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); ++ if (!objs) ++ return -ENOMEM; + -+ lockdep_assert_held(&scheduler->lock); ++ if (copy_from_user(objs, u64_to_user_ptr(cqs_set_operation_info->objs), ++ nr_objs * sizeof(*objs))) { ++ kfree(objs); ++ return -ENOMEM; ++ } + -+ if (scheduler->state == SCHED_BUSY) { -+ /* active phase or, async entering the protected mode */ -+ if (group->prepared_seq_num >= -+ scheduler->non_idle_scanout_grps) { -+ /* At scanout, it was tagged as on-slot idle */ -+ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) { -+ int new_val = atomic_inc_return( -+ &scheduler->non_idle_offslot_grps); -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, -+ group, new_val); -+ } -+ } else { -+ if (group->run_state != KBASE_CSF_GROUP_SUSPENDED) { -+ int new_val = atomic_dec_return( -+ &scheduler->non_idle_offslot_grps); -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, -+ group, new_val); -+ } -+ } -+ } else { -+ /* async phases */ -+ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) { -+ int new_val = atomic_inc_return( -+ &scheduler->non_idle_offslot_grps); -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, -+ new_val); ++ /* Check the CQS objects as early as possible. By checking their alignment ++ * (required alignment equals to size for Sync32 and Sync64 objects), we can ++ * prevent overrunning the supplied event page. ++ */ ++ for (i = 0; i < nr_objs; i++) { ++ if (!kbase_kcpu_cqs_is_data_type_valid(objs[i].data_type) || ++ !kbase_kcpu_cqs_is_aligned(objs[i].addr, objs[i].data_type)) { ++ kfree(objs); ++ return -EINVAL; + } + } ++ ++ current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION; ++ current_command->info.cqs_set_operation.nr_objs = nr_objs; ++ current_command->info.cqs_set_operation.objs = objs; ++ ++ return 0; +} + -+static bool confirm_cmd_buf_empty(struct kbase_queue const *queue) ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++static void kbase_csf_fence_wait_callback(struct fence *fence, ++ struct fence_cb *cb) ++#else ++static void kbase_csf_fence_wait_callback(struct dma_fence *fence, ++ struct dma_fence_cb *cb) ++#endif +{ -+ bool cs_empty; -+ bool cs_idle; -+ u32 sb_status = 0; ++ struct kbase_kcpu_command_fence_info *fence_info = container_of(cb, ++ struct kbase_kcpu_command_fence_info, fence_cb); ++ struct kbase_kcpu_command_queue *kcpu_queue = fence_info->kcpu_queue; ++ struct kbase_context *const kctx = kcpu_queue->kctx; + -+ struct kbase_device const *const kbdev = queue->group->kctx->kbdev; -+ struct kbase_csf_global_iface const *const iface = -+ &kbdev->csf.global_iface; ++#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG ++ /* Fence gets signaled. Deactivate the timer for fence-wait timeout */ ++ del_timer(&kcpu_queue->fence_timeout); ++#endif ++ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue, ++ fence->context, fence->seqno); + -+ u32 glb_version = iface->version; ++ /* Resume kcpu command queue processing. */ ++ queue_work(kcpu_queue->wq, &kcpu_queue->work); ++} + -+ u64 const *input_addr = (u64 const *)queue->user_io_addr; -+ u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE); ++static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_queue, ++ struct kbase_kcpu_command_fence_info *fence_info) ++{ ++ struct kbase_context *const kctx = kcpu_queue->kctx; + -+ if (glb_version >= kbase_csf_interface_version(1, 0, 0)) { -+ /* CS_STATUS_SCOREBOARD supported from CSF 1.0 */ -+ struct kbase_csf_cmd_stream_group_info const *const ginfo = -+ &kbdev->csf.global_iface.groups[queue->group->csg_nr]; -+ struct kbase_csf_cmd_stream_info const *const stream = -+ &ginfo->streams[queue->csi_index]; ++ lockdep_assert_held(&kcpu_queue->lock); + -+ sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET( -+ kbase_csf_firmware_cs_output(stream, -+ CS_STATUS_SCOREBOARDS)); ++ if (WARN_ON(!fence_info->fence)) ++ return; ++ ++ if (kcpu_queue->fence_wait_processed) { ++ bool removed = dma_fence_remove_callback(fence_info->fence, ++ &fence_info->fence_cb); ++ ++#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG ++ /* Fence-wait cancelled or fence signaled. In the latter case ++ * the timer would already have been deactivated inside ++ * kbase_csf_fence_wait_callback(). ++ */ ++ del_timer_sync(&kcpu_queue->fence_timeout); ++#endif ++ if (removed) ++ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, ++ kcpu_queue, fence_info->fence->context, ++ fence_info->fence->seqno); + } + -+ cs_empty = (input_addr[CS_INSERT_LO / sizeof(u64)] == -+ output_addr[CS_EXTRACT_LO / sizeof(u64)]); -+ cs_idle = cs_empty && (!sb_status); ++ /* Release the reference which is kept by the kcpu_queue */ ++ kbase_fence_put(fence_info->fence); ++ kcpu_queue->fence_wait_processed = false; + -+ return cs_idle; ++ fence_info->fence = NULL; +} + -+static void save_csg_slot(struct kbase_queue_group *group) ++#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG ++/** ++ * fence_timeout_callback() - Timeout callback function for fence-wait ++ * ++ * @timer: Timer struct ++ * ++ * Context and seqno of the timed-out fence will be displayed in dmesg. ++ * If the fence has been signalled a work will be enqueued to process ++ * the fence-wait without displaying debugging information. ++ */ ++static void fence_timeout_callback(struct timer_list *timer) +{ -+ struct kbase_device *kbdev = group->kctx->kbdev; -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ struct kbase_csf_cmd_stream_group_info *ginfo; -+ u32 state; -+ -+ lockdep_assert_held(&scheduler->lock); ++ struct kbase_kcpu_command_queue *kcpu_queue = ++ container_of(timer, struct kbase_kcpu_command_queue, fence_timeout); ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++ struct kbase_kcpu_command *cmd = &kcpu_queue->commands[kcpu_queue->start_offset]; ++ struct kbase_kcpu_command_fence_info *fence_info; ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence *fence; ++#else ++ struct dma_fence *fence; ++#endif ++ struct kbase_sync_fence_info info; + -+ if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) ++ if (cmd->type != BASE_KCPU_COMMAND_TYPE_FENCE_WAIT) { ++ dev_err(kctx->kbdev->dev, ++ "%s: Unexpected command type %d in ctx:%d_%d kcpu queue:%u", __func__, ++ cmd->type, kctx->tgid, kctx->id, kcpu_queue->id); + return; ++ } + -+ ginfo = &kbdev->csf.global_iface.groups[group->csg_nr]; -+ -+ state = -+ CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, CSG_ACK)); ++ fence_info = &cmd->info.fence; + -+ if (!WARN_ON((state != CSG_ACK_STATE_SUSPEND) && -+ (state != CSG_ACK_STATE_TERMINATE))) { -+ u32 max_streams = ginfo->stream_num; -+ u32 i; -+ bool sync_wait = false; -+ bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & -+ CSG_STATUS_STATE_IDLE_MASK; -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ for (i = 0; i < max_streams; i++) -+ update_hw_active(group->bound_queues[i], false); -+#endif /* CONFIG_MALI_BIFROST_NO_MALI */ -+ for (i = 0; idle && i < max_streams; i++) { -+ struct kbase_queue *const queue = -+ group->bound_queues[i]; ++ fence = kbase_fence_get(fence_info); ++ if (!fence) { ++ dev_err(kctx->kbdev->dev, "no fence found in ctx:%d_%d kcpu queue:%u", kctx->tgid, ++ kctx->id, kcpu_queue->id); ++ return; ++ } + -+ if (!queue || !queue->enabled) -+ continue; ++ kbase_sync_fence_info_get(fence, &info); + -+ if (save_slot_cs(ginfo, queue)) { -+ /* sync_wait is only true if the queue is blocked on -+ * a CQS and not a scoreboard. -+ */ -+ if (queue->blocked_reason != -+ CS_STATUS_BLOCKED_ON_SB_WAIT) -+ sync_wait = true; -+ } else { -+ /* Need to confirm if ringbuffer of the GPU -+ * queue is empty or not. A race can arise -+ * between the flush of GPU queue and suspend -+ * of CSG. If a queue is flushed after FW has -+ * set the IDLE bit in CSG_STATUS_STATE, then -+ * Scheduler will incorrectly consider CSG -+ * as idle. And there may not be any further -+ * flush call for the GPU queue, which would -+ * have de-idled the CSG. -+ */ -+ idle = confirm_cmd_buf_empty(queue); -+ } -+ } ++ if (info.status == 1) { ++ queue_work(kcpu_queue->wq, &kcpu_queue->work); ++ } else if (info.status == 0) { ++ dev_warn(kctx->kbdev->dev, "fence has not yet signalled in %ums", ++ FENCE_WAIT_TIMEOUT_MS); ++ dev_warn(kctx->kbdev->dev, ++ "ctx:%d_%d kcpu queue:%u still waiting for fence[%pK] context#seqno:%s", ++ kctx->tgid, kctx->id, kcpu_queue->id, fence, info.name); ++ } else { ++ dev_warn(kctx->kbdev->dev, "fence has got error"); ++ dev_warn(kctx->kbdev->dev, ++ "ctx:%d_%d kcpu queue:%u faulty fence[%pK] context#seqno:%s error(%d)", ++ kctx->tgid, kctx->id, kcpu_queue->id, fence, info.name, info.status); ++ } + -+ if (idle) { -+ /* Take the suspended group out of the runnable_groups -+ * list of the context and move it to the -+ * idle_wait_groups list. -+ */ -+ if (sync_wait) -+ deschedule_idle_wait_group(scheduler, group); -+ else { -+ group->run_state = -+ KBASE_CSF_GROUP_SUSPENDED_ON_IDLE; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED_ON_IDLE, group, -+ group->run_state); -+ dev_dbg(kbdev->dev, "Group-%d suspended: idle", -+ group->handle); -+ } -+ } else { -+ group->run_state = KBASE_CSF_GROUP_SUSPENDED; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED, group, -+ group->run_state); -+ } ++ kbase_fence_put(fence); ++} + -+ update_offslot_non_idle_cnt_on_grp_suspend(group); -+ kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(group); -+ } ++/** ++ * fence_timeout_start() - Start a timer to check fence-wait timeout ++ * ++ * @cmd: KCPU command queue ++ * ++ * Activate a timer to check whether a fence-wait command in the queue ++ * gets completed within FENCE_WAIT_TIMEOUT_MS ++ */ ++static void fence_timeout_start(struct kbase_kcpu_command_queue *cmd) ++{ ++ mod_timer(&cmd->fence_timeout, jiffies + msecs_to_jiffies(FENCE_WAIT_TIMEOUT_MS)); +} ++#endif + -+/* Cleanup_csg_slot after it has been vacated, ready for next csg run. -+ * Return whether there is a kctx address fault associated with the group -+ * for which the clean-up is done. ++/** ++ * kbase_kcpu_fence_wait_process() - Process the kcpu fence wait command ++ * ++ * @kcpu_queue: The queue containing the fence wait command ++ * @fence_info: Reference to a fence for which the command is waiting ++ * ++ * Return: 0 if fence wait is blocked, 1 if it is unblocked, negative error if ++ * an error has occurred and fence should no longer be waited on. + */ -+static bool cleanup_csg_slot(struct kbase_queue_group *group) ++static int kbase_kcpu_fence_wait_process( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct kbase_kcpu_command_fence_info *fence_info) +{ -+ struct kbase_context *kctx = group->kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; -+ struct kbase_csf_cmd_stream_group_info *ginfo; -+ s8 slot; -+ struct kbase_csf_csg_slot *csg_slot; -+ unsigned long flags; -+ u32 i; -+ bool as_fault = false; ++ int fence_status = 0; ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence *fence; ++#else ++ struct dma_fence *fence; ++#endif ++ struct kbase_context *const kctx = kcpu_queue->kctx; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ lockdep_assert_held(&kcpu_queue->lock); + -+ if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) -+ return as_fault; ++ if (WARN_ON(!fence_info->fence)) ++ return -EINVAL; + -+ slot = group->csg_nr; -+ csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; -+ ginfo = &global_iface->groups[slot]; ++ fence = fence_info->fence; + -+ /* Now loop through all the bound CSs, and clean them via a stop */ -+ for (i = 0; i < ginfo->stream_num; i++) { -+ struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[i]; ++ if (kcpu_queue->fence_wait_processed) { ++ fence_status = dma_fence_get_status(fence); ++ } else { ++ int cb_err = dma_fence_add_callback(fence, ++ &fence_info->fence_cb, ++ kbase_csf_fence_wait_callback); + -+ if (group->bound_queues[i]) { -+ if (group->bound_queues[i]->enabled) { -+ kbase_csf_firmware_cs_input_mask(stream, -+ CS_REQ, CS_REQ_STATE_STOP, -+ CS_REQ_STATE_MASK); -+ } ++ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, ++ KCPU_FENCE_WAIT_START, kcpu_queue, ++ fence->context, fence->seqno); ++ fence_status = cb_err; ++ if (cb_err == 0) { ++ kcpu_queue->fence_wait_processed = true; ++#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG ++ fence_timeout_start(kcpu_queue); ++#endif ++ } else if (cb_err == -ENOENT) { ++ fence_status = dma_fence_get_status(fence); ++ if (!fence_status) { ++ struct kbase_sync_fence_info info; + -+ unassign_user_doorbell_from_queue(kbdev, -+ group->bound_queues[i]); ++ kbase_sync_fence_info_get(fence, &info); ++ dev_warn(kctx->kbdev->dev, ++ "Unexpected status for fence %s of ctx:%d_%d kcpu queue:%u", ++ info.name, kctx->tgid, kctx->id, kcpu_queue->id); ++ } + } + } + -+ unassign_user_doorbell_from_group(kbdev, group); -+ -+ /* The csg does not need cleanup other than drop its AS */ -+ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); -+ as_fault = kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT); -+ kbase_ctx_sched_release_ctx(kctx); -+ if (unlikely(group->faulted)) -+ as_fault = true; -+ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); -+ -+ /* now marking the slot is vacant */ -+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); -+ kbdev->csf.scheduler.csg_slots[slot].resident_group = NULL; -+ clear_bit(slot, kbdev->csf.scheduler.csg_slots_idle_mask); -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, -+ kbdev->csf.scheduler.csg_slots_idle_mask[0]); -+ -+ group->csg_nr = KBASEP_CSG_NR_INVALID; -+ set_bit(slot, kbdev->csf.scheduler.csgs_events_enable_mask); -+ clear_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap); -+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); -+ -+ csg_slot->trigger_jiffies = jiffies; -+ atomic_set(&csg_slot->state, CSG_SLOT_READY); -+ -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_CLEANED, group, slot); -+ dev_dbg(kbdev->dev, "Cleanup done for group %d on slot %d\n", -+ group->handle, slot); -+ -+ KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev, -+ kbdev->gpu_props.props.raw_props.gpu_id, slot); -+ -+ /* Notify the group is off-slot and the csg_reg might be available for -+ * resue with other groups in a 'lazy unbinding' style. ++ /* ++ * At this point fence status can contain 3 types of values: ++ * - Value 0 to represent that fence in question is not signalled yet ++ * - Value 1 to represent that fence in question is signalled without ++ * errors ++ * - Negative error code to represent that some error has occurred such ++ * that waiting on it is no longer valid. + */ -+ kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group); + -+ return as_fault; ++ if (fence_status) ++ kbasep_kcpu_fence_wait_cancel(kcpu_queue, fence_info); ++ ++ return fence_status; +} + -+static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio) ++static int kbase_kcpu_fence_wait_prepare(struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_fence_info *fence_info, ++ struct kbase_kcpu_command *current_command) +{ -+ struct kbase_device *kbdev = group->kctx->kbdev; -+ struct kbase_csf_csg_slot *csg_slot; -+ struct kbase_csf_cmd_stream_group_info *ginfo; -+ s8 slot; -+ u8 prev_prio; -+ u32 ep_cfg; -+ u32 csg_req; -+ unsigned long flags; ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence *fence_in; ++#else ++ struct dma_fence *fence_in; ++#endif ++ struct base_fence fence; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ lockdep_assert_held(&kcpu_queue->lock); + -+ if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) -+ return; ++ if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence))) ++ return -ENOMEM; + -+ slot = group->csg_nr; -+ csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; -+ ginfo = &kbdev->csf.global_iface.groups[slot]; ++ fence_in = sync_file_get_fence(fence.basep.fd); + -+ /* CSGs remaining on-slot can be either idle or runnable. -+ * This also applies in protected mode. -+ */ -+ WARN_ON(!((group->run_state == KBASE_CSF_GROUP_RUNNABLE) || -+ (group->run_state == KBASE_CSF_GROUP_IDLE))); ++ if (!fence_in) ++ return -ENOENT; + -+ /* Update consumes a group from scanout */ -+ update_offslot_non_idle_cnt_for_onslot_grp(group); ++ current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_WAIT; ++ current_command->info.fence.fence = fence_in; ++ current_command->info.fence.kcpu_queue = kcpu_queue; ++ return 0; ++} + -+ if (csg_slot->priority == prio) -+ return; ++static int kbasep_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, ++ struct kbase_kcpu_command_fence_info *fence_info) ++{ ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++ int ret; + -+ /* Read the csg_ep_cfg back for updating the priority field */ -+ ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ); -+ prev_prio = CSG_EP_REQ_PRIORITY_GET(ep_cfg); -+ ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); -+ kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); ++ if (WARN_ON(!fence_info->fence)) ++ return -EINVAL; + -+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); -+ csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); -+ csg_req ^= CSG_REQ_EP_CFG_MASK; -+ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, -+ CSG_REQ_EP_CFG_MASK); -+ kbase_csf_ring_csg_doorbell(kbdev, slot); -+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); ++ ret = dma_fence_signal(fence_info->fence); + -+ csg_slot->priority = prio; ++ if (unlikely(ret < 0)) { ++ dev_warn(kctx->kbdev->dev, "dma_fence(%d) has been signalled already\n", ret); ++ /* Treated as a success */ ++ ret = 0; ++ } + -+ dev_dbg(kbdev->dev, "Priority for group %d of context %d_%d on slot %d to be updated from %u to %u\n", -+ group->handle, group->kctx->tgid, group->kctx->id, slot, -+ prev_prio, prio); ++ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_SIGNAL, kcpu_queue, ++ fence_info->fence->context, ++ fence_info->fence->seqno); + -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_PRIO_UPDATE, group, prev_prio); ++ /* dma_fence refcount needs to be decreased to release it. */ ++ kbase_fence_put(fence_info->fence); ++ fence_info->fence = NULL; + -+ set_bit(slot, kbdev->csf.scheduler.csg_slots_prio_update); ++ return ret; +} + -+static void program_csg_slot(struct kbase_queue_group *group, s8 slot, -+ u8 prio) ++static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, ++ struct kbase_kcpu_command *current_command, ++ struct base_fence *fence, struct sync_file **sync_file, ++ int *fd) +{ -+ struct kbase_context *kctx = group->kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; -+ const u64 shader_core_mask = -+ kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER); -+ const u64 tiler_core_mask = -+ kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_TILER); -+ const u64 compute_mask = shader_core_mask & group->compute_mask; -+ const u64 fragment_mask = shader_core_mask & group->fragment_mask; -+ const u64 tiler_mask = tiler_core_mask & group->tiler_mask; -+ const u8 num_cores = kbdev->gpu_props.num_cores; -+ const u8 compute_max = min(num_cores, group->compute_max); -+ const u8 fragment_max = min(num_cores, group->fragment_max); -+ const u8 tiler_max = min(CSG_TILER_MAX, group->tiler_max); -+ struct kbase_csf_cmd_stream_group_info *ginfo; -+ u32 ep_cfg = 0; -+ u32 csg_req; -+ u32 state; -+ int i; -+ unsigned long flags; -+ u64 normal_suspend_buf; -+ u64 protm_suspend_buf; -+ struct kbase_csf_csg_slot *csg_slot = -+ &kbdev->csf.scheduler.csg_slots[slot]; ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence *fence_out; ++#else ++ struct dma_fence *fence_out; ++#endif ++ struct kbase_kcpu_dma_fence *kcpu_fence; ++ int ret = 0; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ lockdep_assert_held(&kcpu_queue->lock); + -+ if (WARN_ON(slot < 0) && -+ WARN_ON(slot >= global_iface->group_num)) -+ return; ++ kcpu_fence = kzalloc(sizeof(*kcpu_fence), GFP_KERNEL); ++ if (!kcpu_fence) ++ return -ENOMEM; + -+ WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY); ++ /* Set reference to KCPU metadata and increment refcount */ ++ kcpu_fence->metadata = kcpu_queue->metadata; ++ WARN_ON(!kbase_refcount_inc_not_zero(&kcpu_fence->metadata->refcount)); + -+ if (unlikely(kbase_csf_mcu_shared_group_bind_csg_reg(kbdev, group))) { -+ dev_warn(kbdev->dev, -+ "Couldn't bind MCU shared csg_reg for group %d of context %d_%d, slot=%u", -+ group->handle, group->kctx->tgid, kctx->id, slot); -+ kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group); -+ return; -+ } ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ fence_out = (struct fence *)kcpu_fence; ++#else ++ fence_out = (struct dma_fence *)kcpu_fence; ++#endif + -+ /* The suspend buf has already been mapped through binding to csg_reg */ -+ normal_suspend_buf = group->normal_suspend_buf.gpu_va; -+ protm_suspend_buf = group->protected_suspend_buf.gpu_va; -+ WARN_ONCE(!normal_suspend_buf, "Normal suspend buffer not mapped"); ++ dma_fence_init(fence_out, ++ &kbase_fence_ops, ++ &kbase_csf_fence_lock, ++ kcpu_queue->fence_context, ++ ++kcpu_queue->fence_seqno); + -+ ginfo = &global_iface->groups[slot]; ++#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) ++ /* Take an extra reference to the fence on behalf of the sync file. ++ * This is only needded on older kernels where sync_file_create() ++ * does not take its own reference. This was changed in v4.9.68 ++ * where sync_file_create() now takes its own reference. ++ */ ++ dma_fence_get(fence_out); ++#endif + -+ /* Pick an available address space for this context */ -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_ctx_sched_retain_ctx(kctx); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); ++ /* create a sync_file fd representing the fence */ ++ *sync_file = sync_file_create(fence_out); ++ if (!(*sync_file)) { ++ ret = -ENOMEM; ++ goto file_create_fail; ++ } + -+ if (kctx->as_nr == KBASEP_AS_NR_INVALID) { -+ dev_dbg(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n", -+ group->handle, kctx->tgid, kctx->id, slot); -+ kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group); -+ return; ++ *fd = get_unused_fd_flags(O_CLOEXEC); ++ if (*fd < 0) { ++ ret = *fd; ++ goto fd_flags_fail; + } + -+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); -+ set_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap); -+ kbdev->csf.scheduler.csg_slots[slot].resident_group = group; -+ group->csg_nr = slot; -+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); ++ fence->basep.fd = *fd; + -+ assign_user_doorbell_to_group(kbdev, group); ++ current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL; ++ current_command->info.fence.fence = fence_out; + -+ /* Now loop through all the bound & kicked CSs, and program them */ -+ for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { -+ struct kbase_queue *queue = group->bound_queues[i]; ++ return 0; + -+ if (queue) -+ program_cs(kbdev, queue, false); -+ } ++fd_flags_fail: ++ fput((*sync_file)->file); ++file_create_fail: ++ /* ++ * Upon failure, dma_fence refcount that was increased by ++ * dma_fence_get() or sync_file_create() needs to be decreased ++ * to release it. ++ */ ++ kbase_fence_put(fence_out); ++ current_command->info.fence.fence = NULL; + ++ return ret; ++} + -+ /* Endpoint programming for CSG */ -+ kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_LO, -+ compute_mask & U32_MAX); -+ kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_HI, -+ compute_mask >> 32); -+ kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_LO, -+ fragment_mask & U32_MAX); -+ kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_HI, -+ fragment_mask >> 32); -+ kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER, -+ tiler_mask & U32_MAX); ++static int kbase_kcpu_fence_signal_prepare(struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_fence_info *fence_info, ++ struct kbase_kcpu_command *current_command) ++{ ++ struct base_fence fence; ++ struct sync_file *sync_file = NULL; ++ int fd; ++ int ret = 0; + -+ /* Register group UID with firmware */ -+ kbase_csf_firmware_csg_input(ginfo, CSG_ITER_TRACE_CONFIG, -+ group->group_uid); ++ lockdep_assert_held(&kcpu_queue->lock); + -+ ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max); -+ ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max); -+ ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max); -+ ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); -+ kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); ++ if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence))) ++ return -EFAULT; + -+ /* Program the address space number assigned to the context */ -+ kbase_csf_firmware_csg_input(ginfo, CSG_CONFIG, kctx->as_nr); ++ ret = kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, &fence, &sync_file, &fd); ++ if (ret) ++ return ret; + -+ kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_LO, -+ normal_suspend_buf & U32_MAX); -+ kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI, -+ normal_suspend_buf >> 32); ++ if (copy_to_user(u64_to_user_ptr(fence_info->fence), &fence, ++ sizeof(fence))) { ++ ret = -EFAULT; ++ goto fail; ++ } + -+ /* Note, we program the P-mode buffer pointer here, but actual runtime -+ * enter into pmode execution is controlled by the P-mode phy pages are -+ * allocated and mapped with the bound csg_reg, which has a specific flag -+ * for indicating this P-mode runnable condition before a group is -+ * granted its p-mode section entry. Without a P-mode entry, the buffer -+ * pointed is not going to be accessed at all. ++ /* 'sync_file' pointer can't be safely dereferenced once 'fd' is ++ * installed, so the install step needs to be done at the last ++ * before returning success. + */ -+ kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, protm_suspend_buf & U32_MAX); -+ kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, protm_suspend_buf >> 32); ++ fd_install(fd, sync_file->file); ++ return 0; + -+ if (group->dvs_buf) { -+ kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_LO, -+ group->dvs_buf & U32_MAX); -+ kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_HI, -+ group->dvs_buf >> 32); -+ } ++fail: ++ fput(sync_file->file); ++ kbase_fence_put(current_command->info.fence.fence); ++ current_command->info.fence.fence = NULL; + -+ /* Enable all interrupts for now */ -+ kbase_csf_firmware_csg_input(ginfo, CSG_ACK_IRQ_MASK, ~((u32)0)); ++ return ret; ++} + -+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); -+ csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); -+ csg_req ^= CSG_REQ_EP_CFG_MASK; -+ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, -+ CSG_REQ_EP_CFG_MASK); ++int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, ++ struct kbase_kcpu_command_fence_info *fence_info) ++{ ++ if (!kcpu_queue || !fence_info) ++ return -EINVAL; + -+ /* Set state to START/RESUME */ -+ if (queue_group_suspended_locked(group)) { -+ state = CSG_REQ_STATE_RESUME; -+ } else { -+ WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE); -+ state = CSG_REQ_STATE_START; -+ } ++ return kbasep_kcpu_fence_signal_process(kcpu_queue, fence_info); ++} ++KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_process); + -+ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, -+ state, CSG_REQ_STATE_MASK); -+ kbase_csf_ring_csg_doorbell(kbdev, slot); -+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); ++int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, ++ struct kbase_kcpu_command *current_command, ++ struct base_fence *fence, struct sync_file **sync_file, int *fd) ++{ ++ if (!kcpu_queue || !current_command || !fence || !sync_file || !fd) ++ return -EINVAL; + -+ /* Update status before rings the door-bell, marking ready => run */ -+ atomic_set(&csg_slot->state, CSG_SLOT_READY2RUN); -+ csg_slot->trigger_jiffies = jiffies; -+ csg_slot->priority = prio; ++ return kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, fence, sync_file, fd); ++} ++KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_init); ++#endif /* CONFIG_SYNC_FILE */ + -+ /* Trace the programming of the CSG on the slot */ -+ KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( -+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, group->kctx->id, -+ group->handle, slot, (state == CSG_REQ_STATE_RESUME) ? 1 : 0); ++static void kcpu_queue_process_worker(struct work_struct *data) ++{ ++ struct kbase_kcpu_command_queue *queue = container_of(data, ++ struct kbase_kcpu_command_queue, work); + -+ dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n", -+ group->handle, kctx->tgid, kctx->id, slot, prio); ++ mutex_lock(&queue->lock); ++ kcpu_queue_process(queue, false); ++ mutex_unlock(&queue->lock); ++} + -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START_REQ, group, -+ (((u64)ep_cfg) << 32) | ((((u32)kctx->as_nr) & 0xF) << 16) | -+ (state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT))); ++static int delete_queue(struct kbase_context *kctx, u32 id) ++{ ++ int err = 0; + -+ /* Update the heap reclaim manager */ -+ kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(group); ++ mutex_lock(&kctx->csf.kcpu_queues.lock); + -+ /* Programming a slot consumes a group from scanout */ -+ update_offslot_non_idle_cnt_for_onslot_grp(group); ++ if ((id < KBASEP_MAX_KCPU_QUEUES) && kctx->csf.kcpu_queues.array[id]) { ++ struct kbase_kcpu_command_queue *queue = ++ kctx->csf.kcpu_queues.array[id]; + -+ /* Notify the group's bound csg_reg is now in active use */ -+ kbase_csf_mcu_shared_set_group_csg_reg_active(kbdev, group); -+} ++ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DELETE, ++ queue, queue->num_pending_cmds, queue->cqs_wait_count); + -+static void remove_scheduled_group(struct kbase_device *kbdev, -+ struct kbase_queue_group *group) -+{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ /* Disassociate the queue from the system to prevent further ++ * submissions. Draining pending commands would be acceptable ++ * even if a new queue is created using the same ID. ++ */ ++ kctx->csf.kcpu_queues.array[id] = NULL; ++ bitmap_clear(kctx->csf.kcpu_queues.in_use, id, 1); + -+ lockdep_assert_held(&scheduler->lock); ++ mutex_unlock(&kctx->csf.kcpu_queues.lock); + -+ WARN_ON(group->prepared_seq_num == -+ KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID); -+ WARN_ON(list_empty(&group->link_to_schedule)); ++ mutex_lock(&queue->lock); + -+ list_del_init(&group->link_to_schedule); -+ scheduler->ngrp_to_schedule--; -+ group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID; -+ group->kctx->csf.sched.ngrp_to_schedule--; -+} ++ /* Metadata struct may outlive KCPU queue. */ ++ kbase_kcpu_dma_fence_meta_put(queue->metadata); + -+static void sched_evict_group(struct kbase_queue_group *group, bool fault, -+ bool update_non_idle_offslot_grps_cnt_from_run_state) -+{ -+ struct kbase_context *kctx = group->kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ /* Drain the remaining work for this queue first and go past ++ * all the waits. ++ */ ++ kcpu_queue_process(queue, true); + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ /* All commands should have been processed */ ++ WARN_ON(queue->num_pending_cmds); + -+ if (queue_group_scheduled_locked(group)) { -+ u32 i; ++ /* All CQS wait commands should have been cleaned up */ ++ WARN_ON(queue->cqs_wait_count); + -+ if (update_non_idle_offslot_grps_cnt_from_run_state && -+ (group->run_state == KBASE_CSF_GROUP_SUSPENDED || -+ group->run_state == KBASE_CSF_GROUP_RUNNABLE)) { -+ int new_val = atomic_dec_return( -+ &scheduler->non_idle_offslot_grps); -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, -+ new_val); -+ } ++ /* Fire the tracepoint with the mutex held to enforce correct ++ * ordering with the summary stream. ++ */ ++ KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE(kctx->kbdev, queue); + -+ for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { -+ if (group->bound_queues[i]) -+ group->bound_queues[i]->enabled = false; -+ } ++ mutex_unlock(&queue->lock); + -+ if (group->prepared_seq_num != -+ KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) { -+ if (!update_non_idle_offslot_grps_cnt_from_run_state) -+ update_offslot_non_idle_cnt(group); -+ remove_scheduled_group(kbdev, group); -+ } ++ cancel_work_sync(&queue->work); ++ destroy_workqueue(queue->wq); + -+ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) -+ remove_group_from_idle_wait(group); -+ else { -+ remove_group_from_runnable(scheduler, group, -+ KBASE_CSF_GROUP_INACTIVE); -+ } ++ mutex_destroy(&queue->lock); + -+ WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE); ++ kfree(queue); ++ } else { ++ dev_dbg(kctx->kbdev->dev, ++ "Attempt to delete a non-existent KCPU queue"); ++ mutex_unlock(&kctx->csf.kcpu_queues.lock); ++ err = -EINVAL; ++ } ++ return err; ++} + -+ if (fault) { -+ group->run_state = KBASE_CSF_GROUP_FAULT_EVICTED; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_FAULT_EVICTED, group, -+ scheduler->total_runnable_grps); -+ } ++static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO( ++ struct kbase_device *kbdev, ++ const struct kbase_kcpu_command_queue *queue, ++ const struct kbase_kcpu_command_jit_alloc_info *jit_alloc, ++ int alloc_status) ++{ ++ u8 i; + -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT, group, -+ (((u64)scheduler->total_runnable_grps) << 32) | -+ ((u32)group->run_state)); -+ dev_dbg(kbdev->dev, "group %d exited scheduler, num_runnable_grps %d\n", -+ group->handle, scheduler->total_runnable_grps); -+ /* Notify a group has been evicted */ -+ wake_up_all(&kbdev->csf.event_wait); -+ } ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(kbdev, queue); ++ for (i = 0; i < jit_alloc->count; i++) { ++ const u8 id = jit_alloc->info[i].id; ++ const struct kbase_va_region *reg = queue->kctx->jit_alloc[id]; ++ u64 gpu_alloc_addr = 0; ++ u64 mmu_flags = 0; + -+ kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(group); ++ if ((alloc_status == 0) && !WARN_ON(!reg) && ++ !WARN_ON(reg == KBASE_RESERVED_REG_JIT_ALLOC)) { ++#ifdef CONFIG_MALI_VECTOR_DUMP ++ struct tagged_addr phy = {0}; ++#endif /* CONFIG_MALI_VECTOR_DUMP */ + -+ /* Clear all the bound shared regions and unmap any in-place MMU maps */ -+ kbase_csf_mcu_shared_clear_evicted_group_csg_reg(kbdev, group); ++ gpu_alloc_addr = reg->start_pfn << PAGE_SHIFT; ++#ifdef CONFIG_MALI_VECTOR_DUMP ++ mmu_flags = kbase_mmu_create_ate(kbdev, ++ phy, reg->flags, ++ MIDGARD_MMU_BOTTOMLEVEL, ++ queue->kctx->jit_group_id); ++#endif /* CONFIG_MALI_VECTOR_DUMP */ ++ } ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( ++ kbdev, queue, alloc_status, gpu_alloc_addr, mmu_flags); ++ } +} + -+static int term_group_sync(struct kbase_queue_group *group) ++static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( ++ struct kbase_device *kbdev, ++ const struct kbase_kcpu_command_queue *queue) +{ -+ struct kbase_device *kbdev = group->kctx->kbdev; -+ long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); -+ int err = 0; ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(kbdev, queue); ++} + -+ term_csg_slot(group); ++static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END( ++ struct kbase_device *kbdev, ++ const struct kbase_kcpu_command_queue *queue) ++{ ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(kbdev, queue); ++} + -+ remaining = wait_event_timeout(kbdev->csf.event_wait, -+ group->cs_unrecoverable || csg_slot_stopped_locked(kbdev, group->csg_nr), -+ remaining); ++static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, ++ bool drain_queue) ++{ ++ struct kbase_device *kbdev = queue->kctx->kbdev; ++ bool process_next = true; ++ size_t i; + -+ if (unlikely(!remaining)) { -+ enum dumpfault_error_type error_type = DF_CSG_TERMINATE_TIMEOUT; ++ lockdep_assert_held(&queue->lock); + -+ dev_warn(kbdev->dev, "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d", -+ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, -+ group->handle, group->kctx->tgid, -+ group->kctx->id, group->csg_nr); -+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) -+ error_type = DF_PING_REQUEST_TIMEOUT; -+ kbase_debug_csf_fault_notify(kbdev, group->kctx, error_type); -+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) -+ kbase_reset_gpu(kbdev); ++ for (i = 0; i != queue->num_pending_cmds; ++i) { ++ struct kbase_kcpu_command *cmd = ++ &queue->commands[(u8)(queue->start_offset + i)]; ++ int status; + ++ switch (cmd->type) { ++ case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: ++ if (!queue->command_started) { ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START(kbdev, ++ queue); ++ queue->command_started = true; ++ } + -+ err = -ETIMEDOUT; -+ } ++ status = 0; ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ if (drain_queue) { ++ kbasep_kcpu_fence_wait_cancel(queue, &cmd->info.fence); ++ } else { ++ status = kbase_kcpu_fence_wait_process(queue, ++ &cmd->info.fence); + -+ return err; -+} ++ if (status == 0) ++ process_next = false; ++ else if (status < 0) ++ queue->has_error = true; ++ } ++#else ++ dev_warn(kbdev->dev, ++ "unexpected fence wait command found\n"); + -+void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group) -+{ -+ struct kbase_device *kbdev = group->kctx->kbdev; -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ bool wait_for_termination = true; -+ bool on_slot; ++ status = -EINVAL; ++ queue->has_error = true; ++#endif + -+ kbase_reset_gpu_assert_failed_or_prevented(kbdev); -+ lockdep_assert_held(&group->kctx->csf.lock); -+ mutex_lock(&scheduler->lock); ++ if (process_next) { ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END( ++ kbdev, queue, status < 0 ? status : 0); ++ queue->command_started = false; ++ } ++ break; ++ case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START(kbdev, queue); + -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_DESCHEDULE, group, group->run_state); -+ wait_for_dump_complete_on_group_deschedule(group); -+ if (!queue_group_scheduled_locked(group)) -+ goto unlock; ++ status = 0; + -+ on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group); ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ status = kbasep_kcpu_fence_signal_process(queue, &cmd->info.fence); + -+#ifdef KBASE_PM_RUNTIME -+ /* If the queue group is on slot and Scheduler is in SLEEPING state, -+ * then we need to wake up the Scheduler to exit the sleep state rather -+ * than waiting for the runtime suspend or power down of GPU. -+ * The group termination is usually triggered in the context of Application -+ * thread and it has been seen that certain Apps can destroy groups at -+ * random points and not necessarily when the App is exiting. -+ */ -+ if (on_slot && (scheduler->state == SCHED_SLEEPING)) { -+ scheduler_wakeup(kbdev, true); ++ if (status < 0) ++ queue->has_error = true; ++#else ++ dev_warn(kbdev->dev, ++ "unexpected fence signal command found\n"); + -+ /* Wait for MCU firmware to start running */ -+ if (kbase_csf_scheduler_wait_mcu_active(kbdev)) { -+ dev_warn( -+ kbdev->dev, -+ "[%llu] Wait for MCU active failed when terminating group %d of context %d_%d on slot %d", -+ kbase_backend_get_cycle_cnt(kbdev), -+ group->handle, group->kctx->tgid, -+ group->kctx->id, group->csg_nr); -+ /* No point in waiting for CSG termination if MCU didn't -+ * become active. -+ */ -+ wait_for_termination = false; -+ } -+ } ++ status = -EINVAL; ++ queue->has_error = true; +#endif -+ if (!on_slot) { -+ sched_evict_group(group, false, true); -+ } else { -+ bool as_faulty; + -+ if (likely(wait_for_termination)) -+ term_group_sync(group); -+ else -+ term_csg_slot(group); ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(kbdev, queue, ++ status); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: ++ status = kbase_kcpu_cqs_wait_process(kbdev, queue, ++ &cmd->info.cqs_wait); + -+ /* Treat the csg been terminated */ -+ as_faulty = cleanup_csg_slot(group); -+ /* remove from the scheduler list */ -+ sched_evict_group(group, as_faulty, false); -+ } ++ if (!status && !drain_queue) { ++ process_next = false; ++ } else { ++ /* Either all CQS objects were signaled or ++ * there was an error or the queue itself is ++ * being deleted. ++ * In all cases can move to the next command. ++ * TBD: handle the error ++ */ ++ cleanup_cqs_wait(queue, &cmd->info.cqs_wait); ++ } + -+ WARN_ON(queue_group_scheduled_locked(group)); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_SET: ++ kbase_kcpu_cqs_set_process(kbdev, queue, ++ &cmd->info.cqs_set); + -+unlock: -+ mutex_unlock(&scheduler->lock); -+} ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: ++ status = kbase_kcpu_cqs_wait_operation_process(kbdev, queue, ++ &cmd->info.cqs_wait_operation); + -+/** -+ * scheduler_group_schedule() - Schedule a GPU command queue group on firmware -+ * -+ * @group: Pointer to the queue group to be scheduled. -+ * -+ * This function would enable the scheduling of GPU command queue group on -+ * firmware. -+ * -+ * Return: 0 on success, or negative on failure. -+ */ -+static int scheduler_group_schedule(struct kbase_queue_group *group) -+{ -+ struct kbase_context *kctx = group->kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ if (!status && !drain_queue) { ++ process_next = false; ++ } else { ++ /* Either all CQS objects were signaled or ++ * there was an error or the queue itself is ++ * being deleted. ++ * In all cases can move to the next command. ++ * TBD: handle the error ++ */ ++ cleanup_cqs_wait_operation(queue, &cmd->info.cqs_wait_operation); ++ } + -+ lockdep_assert_held(&kctx->csf.lock); -+ lockdep_assert_held(&scheduler->lock); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: ++ kbase_kcpu_cqs_set_operation_process(kbdev, queue, ++ &cmd->info.cqs_set_operation); + -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SCHEDULE, group, group->run_state); -+ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) -+ update_idle_suspended_group_state(group); -+ else if (queue_group_idle_locked(group)) { -+ WARN_ON(kctx->csf.sched.num_runnable_grps == 0); -+ WARN_ON(kbdev->csf.scheduler.total_runnable_grps == 0); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: ++ /* Clear the queue's error state */ ++ queue->has_error = false; + -+ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) -+ update_idle_suspended_group_state(group); -+ else { -+ struct kbase_queue_group *protm_grp; -+ unsigned long flags; ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(kbdev, queue); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: { ++ struct kbase_ctx_ext_res_meta *meta = NULL; + -+ WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked( -+ group)); ++ if (!drain_queue) { ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(kbdev, ++ queue); + -+ group->run_state = KBASE_CSF_GROUP_RUNNABLE; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, -+ group->run_state); ++ kbase_gpu_vm_lock(queue->kctx); ++ meta = kbase_sticky_resource_acquire( ++ queue->kctx, cmd->info.import.gpu_va); ++ kbase_gpu_vm_unlock(queue->kctx); + -+ /* A normal mode CSG could be idle onslot during -+ * protected mode. In this case clear the -+ * appropriate bit in csg_slots_idle_mask. -+ */ -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); -+ protm_grp = scheduler->active_protm_grp; -+ if (protm_grp && protm_grp != group) { -+ clear_bit((unsigned int)group->csg_nr, -+ scheduler->csg_slots_idle_mask); -+ /* Request the update to confirm the condition inferred. */ -+ group->reevaluate_idle_status = true; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, -+ scheduler->csg_slots_idle_mask[0]); -+ } -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, -+ flags); ++ if (meta == NULL) { ++ queue->has_error = true; ++ dev_dbg( ++ kbdev->dev, ++ "failed to map an external resource"); ++ } + -+ /* If GPU is in protected mode then any doorbells rang -+ * would have no effect. Check if GPU is in protected -+ * mode and if this group has higher priority than the -+ * active protected mode group. If so prompt the FW -+ * to exit protected mode. -+ */ -+ if (protm_grp && -+ group->scan_seq_num < protm_grp->scan_seq_num) { -+ /* Prompt the FW to exit protected mode */ -+ scheduler_force_protm_exit(kbdev); ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END( ++ kbdev, queue, meta ? 0 : 1); + } ++ break; + } -+ } else if (!queue_group_scheduled_locked(group)) { -+ int new_val; -+ -+ insert_group_to_runnable(&kbdev->csf.scheduler, group, -+ KBASE_CSF_GROUP_RUNNABLE); -+ /* A new group into the scheduler */ -+ new_val = atomic_inc_return( -+ &kbdev->csf.scheduler.non_idle_offslot_grps); -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val); -+ } ++ case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: { ++ bool ret; + -+ /* Since a group has become active now, check if GPU needs to be -+ * powered up. Also rekick the Scheduler. -+ */ -+ scheduler_wakeup(kbdev, true); ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(kbdev, queue); + -+ return 0; -+} ++ kbase_gpu_vm_lock(queue->kctx); ++ ret = kbase_sticky_resource_release( ++ queue->kctx, NULL, cmd->info.import.gpu_va); ++ kbase_gpu_vm_unlock(queue->kctx); + -+/** -+ * set_max_csg_slots() - Set the number of available CSG slots -+ * -+ * @kbdev: Pointer of the GPU device. -+ * -+ * This function would set/limit the number of CSG slots that -+ * can be used in the given tick/tock. It would be less than the total CSG -+ * slots supported by firmware if the number of GPU address space slots -+ * required to utilize all the CSG slots is more than the available -+ * address space slots. -+ */ -+static inline void set_max_csg_slots(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ unsigned int total_csg_slots = kbdev->csf.global_iface.group_num; -+ unsigned int max_address_space_slots = -+ kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS; ++ if (!ret) { ++ queue->has_error = true; ++ dev_dbg(kbdev->dev, ++ "failed to release the reference. resource not found"); ++ } + -+ WARN_ON(scheduler->num_active_address_spaces > total_csg_slots); ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(kbdev, queue, ++ ret ? 0 : 1); ++ break; ++ } ++ case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: { ++ bool ret; + -+ if (likely(scheduler->num_active_address_spaces <= -+ max_address_space_slots)) -+ scheduler->num_csg_slots_for_tick = total_csg_slots; -+} ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(kbdev, ++ queue); + -+/** -+ * count_active_address_space() - Count the number of GPU address space slots -+ * -+ * @kbdev: Pointer of the GPU device. -+ * @kctx: Pointer of the Kbase context. -+ * -+ * This function would update the counter that is tracking the number of GPU -+ * address space slots that would be required to program the CS -+ * group slots from the groups at the head of groups_to_schedule list. -+ */ -+static inline void count_active_address_space(struct kbase_device *kbdev, -+ struct kbase_context *kctx) -+{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ unsigned int total_csg_slots = kbdev->csf.global_iface.group_num; -+ unsigned int max_address_space_slots = -+ kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS; ++ kbase_gpu_vm_lock(queue->kctx); ++ ret = kbase_sticky_resource_release_force( ++ queue->kctx, NULL, cmd->info.import.gpu_va); ++ kbase_gpu_vm_unlock(queue->kctx); + -+ if (scheduler->ngrp_to_schedule <= total_csg_slots) { -+ if (kctx->csf.sched.ngrp_to_schedule == 1) -+ scheduler->num_active_address_spaces++; ++ if (!ret) { ++ queue->has_error = true; ++ dev_dbg(kbdev->dev, ++ "failed to release the reference. resource not found"); ++ } + -+ if (scheduler->num_active_address_spaces <= -+ max_address_space_slots) -+ scheduler->num_csg_slots_for_tick++; -+ } -+} ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END( ++ kbdev, queue, ret ? 0 : 1); ++ break; ++ } ++ case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: ++ { ++ if (drain_queue) { ++ /* We still need to call this function to clean the JIT alloc info up */ ++ kbase_kcpu_jit_allocate_finish(queue, cmd); ++ } else { ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(kbdev, ++ queue); + -+/* Two schemes are used in assigning the priority to CSG slots for a given -+ * CSG from the 'groups_to_schedule' list. -+ * This is needed as an idle on-slot group is deprioritized by moving it to -+ * the tail of 'groups_to_schedule' list. As a result it can either get -+ * evicted from the CSG slot in current tick/tock dealing, or its position -+ * can be after the lower priority non-idle groups in the 'groups_to_schedule' -+ * list. The latter case can result in the on-slot subset containing both -+ * non-idle and idle CSGs, and is handled through the 2nd scheme described -+ * below. -+ * -+ * First scheme :- If all the slots are going to be occupied by the non-idle or -+ * idle groups, then a simple assignment of the priority is done as per the -+ * position of a group in the 'groups_to_schedule' list. So maximum priority -+ * gets assigned to the slot of a group which is at the head of the list. -+ * Here the 'groups_to_schedule' list would effectively be ordered as per the -+ * static priority of groups. -+ * -+ * Second scheme :- If the slots are going to be occupied by a mix of idle and -+ * non-idle groups then the priority assignment needs to ensure that the -+ * priority of a slot belonging to a higher priority idle group will always be -+ * greater than the priority of a slot belonging to a lower priority non-idle -+ * group, reflecting the original position of a group in the scan order (i.e -+ * static priority) 'scan_seq_num', which is set during the prepare phase of a -+ * tick/tock before the group is moved to 'idle_groups_to_schedule' list if it -+ * is idle. -+ * The priority range [MAX_CSG_SLOT_PRIORITY, 0] is partitioned with the first -+ * 'slots_for_tick' groups in the original scan order are assigned a priority in -+ * the subrange [MAX_CSG_SLOT_PRIORITY, MAX_CSG_SLOT_PRIORITY - slots_for_tick), -+ * whereas rest of the groups are assigned the priority in the subrange -+ * [MAX_CSG_SLOT_PRIORITY - slots_for_tick, 0]. This way even if an idle higher -+ * priority group ends up after the non-idle lower priority groups in the -+ * 'groups_to_schedule' list, it will get a higher slot priority. And this will -+ * enable the FW to quickly start the execution of higher priority group when it -+ * gets de-idled. -+ */ -+static u8 get_slot_priority(struct kbase_queue_group *group) -+{ -+ struct kbase_csf_scheduler *scheduler = -+ &group->kctx->kbdev->csf.scheduler; -+ u8 slot_prio; -+ u32 slots_for_tick = scheduler->num_csg_slots_for_tick; -+ u32 used_slots = slots_for_tick - scheduler->remaining_tick_slots; -+ /* Check if all the slots are going to be occupied by the non-idle or -+ * idle groups. -+ */ -+ if (scheduler->non_idle_scanout_grps >= slots_for_tick || -+ !scheduler->non_idle_scanout_grps) { -+ slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - used_slots); -+ } else { -+ /* There will be a mix of idle and non-idle groups. */ -+ if (group->scan_seq_num < slots_for_tick) -+ slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - -+ group->scan_seq_num); -+ else if (MAX_CSG_SLOT_PRIORITY > (slots_for_tick + used_slots)) -+ slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - (slots_for_tick + used_slots)); -+ else -+ slot_prio = 0; -+ } -+ return slot_prio; -+} ++ status = kbase_kcpu_jit_allocate_process(queue, ++ cmd); ++ if (status == -EAGAIN) { ++ process_next = false; ++ } else { ++ if (status != 0) ++ queue->has_error = true; + -+/** -+ * update_resident_groups_priority() - Update the priority of resident groups -+ * -+ * @kbdev: The GPU device. -+ * -+ * This function will update the priority of all resident queue groups -+ * that are at the head of groups_to_schedule list, preceding the first -+ * non-resident group. -+ * -+ * This function will also adjust kbase_csf_scheduler.remaining_tick_slots on -+ * the priority update. -+ */ -+static void update_resident_groups_priority(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ u32 num_groups = scheduler->num_csg_slots_for_tick; ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO( ++ kbdev, queue, ++ &cmd->info.jit_alloc, status); + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); -+ while (!list_empty(&scheduler->groups_to_schedule)) { -+ struct kbase_queue_group *group = -+ list_first_entry(&scheduler->groups_to_schedule, -+ struct kbase_queue_group, -+ link_to_schedule); -+ bool resident = -+ kbasep_csf_scheduler_group_is_on_slot_locked(group); ++ kbase_kcpu_jit_allocate_finish(queue, ++ cmd); ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( ++ kbdev, queue); ++ } ++ } + -+ if ((group->prepared_seq_num >= num_groups) || !resident) + break; ++ } ++ case BASE_KCPU_COMMAND_TYPE_JIT_FREE: { ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(kbdev, queue); + -+ update_csg_slot_priority(group, -+ get_slot_priority(group)); ++ status = kbase_kcpu_jit_free_process(queue, cmd); ++ if (status) ++ queue->has_error = true; + -+ /* Drop the head group from the list */ -+ remove_scheduled_group(kbdev, group); -+ scheduler->remaining_tick_slots--; -+ } -+} ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END( ++ kbdev, queue); ++ break; ++ } ++#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST ++ case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: { ++ struct kbase_suspend_copy_buffer *sus_buf = ++ cmd->info.suspend_buf_copy.sus_buf; + -+/** -+ * program_group_on_vacant_csg_slot() - Program a non-resident group on the -+ * given vacant CSG slot. -+ * @kbdev: Pointer to the GPU device. -+ * @slot: Vacant CSG slot number. -+ * -+ * This function will program a non-resident group at the head of -+ * kbase_csf_scheduler.groups_to_schedule list on the given vacant -+ * CSG slot, provided the initial position of the non-resident -+ * group in the list is less than the number of CSG slots and there is -+ * an available GPU address space slot. -+ * kbase_csf_scheduler.remaining_tick_slots would also be adjusted after -+ * programming the slot. -+ */ -+static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev, -+ s8 slot) -+{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ struct kbase_queue_group *const group = -+ list_empty(&scheduler->groups_to_schedule) ? NULL : -+ list_first_entry(&scheduler->groups_to_schedule, -+ struct kbase_queue_group, -+ link_to_schedule); -+ u32 num_groups = scheduler->num_csg_slots_for_tick; ++ if (!drain_queue) { ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START( ++ kbdev, queue); + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); -+ if (group && (group->prepared_seq_num < num_groups)) { -+ bool ret = kbasep_csf_scheduler_group_is_on_slot_locked(group); ++ status = kbase_csf_queue_group_suspend_process( ++ queue->kctx, sus_buf, ++ cmd->info.suspend_buf_copy.group_handle); ++ if (status) ++ queue->has_error = true; + -+ if (!WARN_ON(ret)) { -+ if (kctx_as_enabled(group->kctx) && !group->faulted) { -+ program_csg_slot(group, slot, -+ get_slot_priority(group)); ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END( ++ kbdev, queue, status); ++ } + -+ if (likely(csg_slot_in_use(kbdev, slot))) { -+ /* Drop the head group from the list */ -+ remove_scheduled_group(kbdev, group); -+ scheduler->remaining_tick_slots--; -+ } ++ if (!sus_buf->cpu_alloc) { ++ int i; ++ ++ for (i = 0; i < sus_buf->nr_pages; i++) ++ put_page(sus_buf->pages[i]); + } else { -+ update_offslot_non_idle_cnt(group); -+ remove_scheduled_group(kbdev, group); ++ kbase_mem_phy_alloc_kernel_unmapped( ++ sus_buf->cpu_alloc); ++ kbase_mem_phy_alloc_put( ++ sus_buf->cpu_alloc); + } -+ } -+ } -+} + -+/** -+ * program_vacant_csg_slot() - Program the vacant CSG slot with a non-resident -+ * group and update the priority of resident groups. -+ * -+ * @kbdev: Pointer to the GPU device. -+ * @slot: Vacant CSG slot number. -+ * -+ * This function will first update the priority of all resident queue groups -+ * that are at the head of groups_to_schedule list, preceding the first -+ * non-resident group, it will then try to program the given CS -+ * group slot with the non-resident group. Finally update the priority of all -+ * resident queue groups following the non-resident group. -+ * -+ * kbase_csf_scheduler.remaining_tick_slots would also be adjusted. -+ */ -+static void program_vacant_csg_slot(struct kbase_device *kbdev, s8 slot) -+{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ struct kbase_csf_csg_slot *const csg_slot = -+ scheduler->csg_slots; ++ kfree(sus_buf->pages); ++ kfree(sus_buf); ++ break; ++ } ++#endif ++ default: ++ dev_dbg(kbdev->dev, ++ "Unrecognized command type"); ++ break; ++ } /* switch */ + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); -+ WARN_ON(atomic_read(&csg_slot[slot].state) != CSG_SLOT_READY); ++ /*TBD: error handling */ + -+ /* First update priority for already resident groups (if any) -+ * before the non-resident group -+ */ -+ update_resident_groups_priority(kbdev); ++ if (!process_next) ++ break; ++ } + -+ /* Now consume the vacant slot for the non-resident group */ -+ program_group_on_vacant_csg_slot(kbdev, slot); ++ if (i > 0) { ++ queue->start_offset += i; ++ queue->num_pending_cmds -= i; + -+ /* Now update priority for already resident groups (if any) -+ * following the non-resident group -+ */ -+ update_resident_groups_priority(kbdev); ++ /* If an attempt to enqueue commands failed then we must raise ++ * an event in case the client wants to retry now that there is ++ * free space in the buffer. ++ */ ++ if (queue->enqueue_failed) { ++ queue->enqueue_failed = false; ++ kbase_csf_event_signal_cpu_only(queue->kctx); ++ } ++ } +} + -+static bool slots_state_changed(struct kbase_device *kbdev, -+ unsigned long *slots_mask, -+ bool (*state_check_func)(struct kbase_device *, s8)) ++static size_t kcpu_queue_get_space(struct kbase_kcpu_command_queue *queue) +{ -+ u32 num_groups = kbdev->csf.global_iface.group_num; -+ DECLARE_BITMAP(changed_slots, MAX_SUPPORTED_CSGS) = {0}; -+ bool changed = false; -+ u32 i; -+ -+ for_each_set_bit(i, slots_mask, num_groups) { -+ if (state_check_func(kbdev, (s8)i)) { -+ set_bit(i, changed_slots); -+ changed = true; -+ } -+ } -+ -+ if (changed) -+ bitmap_copy(slots_mask, changed_slots, MAX_SUPPORTED_CSGS); -+ -+ return changed; ++ return KBASEP_KCPU_QUEUE_SIZE - queue->num_pending_cmds; +} + -+/** -+ * program_suspending_csg_slots() - Program the CSG slots vacated on suspension -+ * of queue groups running on them. -+ * -+ * @kbdev: Pointer to the GPU device. -+ * -+ * This function will first wait for the ongoing suspension to complete on a -+ * CSG slot and will then program the vacant slot with the -+ * non-resident queue group inside the groups_to_schedule list. -+ * The programming of the non-resident queue group on the vacant slot could -+ * fail due to unavailability of free GPU address space slot and so the -+ * programming is re-attempted after the ongoing suspension has completed -+ * for all the CSG slots. -+ * The priority of resident groups before and after the non-resident group -+ * in the groups_to_schedule list would also be updated. -+ * This would be repeated for all the slots undergoing suspension. -+ * GPU reset would be initiated if the wait for suspend times out. -+ */ -+static void program_suspending_csg_slots(struct kbase_device *kbdev) ++static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( ++ const struct kbase_kcpu_command_queue *queue, ++ const struct kbase_kcpu_command *cmd) +{ -+ u32 num_groups = kbdev->csf.global_iface.group_num; -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS); -+ DECLARE_BITMAP(evicted_mask, MAX_SUPPORTED_CSGS) = {0}; -+ bool suspend_wait_failed = false; -+ long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); -+ -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); -+ -+ /* In the current implementation, csgs_events_enable_mask would be used -+ * only to indicate suspending CSGs. -+ */ -+ bitmap_complement(slot_mask, scheduler->csgs_events_enable_mask, -+ MAX_SUPPORTED_CSGS); -+ -+ while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) { -+ DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); -+ -+ bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS); -+ -+ remaining = wait_event_timeout(kbdev->csf.event_wait, -+ slots_state_changed(kbdev, changed, -+ csg_slot_stopped_raw), -+ remaining); -+ -+ if (likely(remaining)) { -+ u32 i; -+ -+ for_each_set_bit(i, changed, num_groups) { -+ struct kbase_queue_group *group = -+ scheduler->csg_slots[i].resident_group; -+ -+ if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) -+ continue; -+ -+ /* The on slot csg is now stopped */ -+ clear_bit(i, slot_mask); -+ -+ KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( -+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i); -+ -+ if (likely(group)) { -+ bool as_fault; -+ /* Only do save/cleanup if the -+ * group is not terminated during -+ * the sleep. -+ */ -+ save_csg_slot(group); -+ as_fault = cleanup_csg_slot(group); -+ /* If AS fault detected, evict it */ -+ if (as_fault) { -+ sched_evict_group(group, true, true); -+ set_bit(i, evicted_mask); -+ } -+ } -+ -+ program_vacant_csg_slot(kbdev, (s8)i); -+ } -+ } else { -+ u32 i; -+ -+ /* Groups that have failed to suspend in time shall -+ * raise a fatal error as they could no longer be -+ * safely resumed. -+ */ -+ for_each_set_bit(i, slot_mask, num_groups) { -+ struct kbase_queue_group *const group = -+ scheduler->csg_slots[i].resident_group; -+ enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT; -+ -+ struct base_gpu_queue_group_error const -+ err_payload = { .error_type = -+ BASE_GPU_QUEUE_GROUP_ERROR_FATAL, -+ .payload = { -+ .fatal_group = { -+ .status = -+ GPU_EXCEPTION_TYPE_SW_FAULT_2, -+ } } }; ++ struct kbase_device *kbdev = queue->kctx->kbdev; + -+ if (unlikely(group == NULL)) -+ continue; ++ switch (cmd->type) { ++ case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT(kbdev, queue, ++ cmd->info.fence.fence); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL(kbdev, queue, ++ cmd->info.fence.fence); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: ++ { ++ const struct base_cqs_wait_info *waits = ++ cmd->info.cqs_wait.objs; ++ u32 inherit_err_flags = cmd->info.cqs_wait.inherit_err_flags; ++ unsigned int i; + -+ /* TODO GPUCORE-25328: The CSG can't be -+ * terminated, the GPU will be reset as a -+ * work-around. -+ */ -+ dev_warn( -+ kbdev->dev, -+ "[%llu] Group %d of context %d_%d on slot %u failed to suspend (timeout %d ms)", -+ kbase_backend_get_cycle_cnt(kbdev), -+ group->handle, group->kctx->tgid, -+ group->kctx->id, i, -+ kbdev->csf.fw_timeout_ms); -+ if (kbase_csf_firmware_ping_wait(kbdev, -+ FW_PING_AFTER_ERROR_TIMEOUT_MS)) -+ error_type = DF_PING_REQUEST_TIMEOUT; -+ schedule_actions_trigger_df(kbdev, group->kctx, error_type); ++ for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) { ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT( ++ kbdev, queue, waits[i].addr, waits[i].val, ++ (inherit_err_flags & ((u32)1 << i)) ? 1 : 0); ++ } ++ break; ++ } ++ case BASE_KCPU_COMMAND_TYPE_CQS_SET: ++ { ++ const struct base_cqs_set *sets = cmd->info.cqs_set.objs; ++ unsigned int i; + -+ kbase_csf_add_group_fatal_error(group, &err_payload); -+ kbase_event_wakeup(group->kctx); ++ for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) { ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET(kbdev, queue, ++ sets[i].addr); ++ } ++ break; ++ } ++ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: ++ { ++ const struct base_cqs_wait_operation_info *waits = ++ cmd->info.cqs_wait_operation.objs; ++ u32 inherit_err_flags = cmd->info.cqs_wait_operation.inherit_err_flags; ++ unsigned int i; + -+ /* The group has failed suspension, stop -+ * further examination. -+ */ -+ clear_bit(i, slot_mask); -+ set_bit(i, scheduler->csgs_events_enable_mask); -+ } ++ for (i = 0; i < cmd->info.cqs_wait_operation.nr_objs; i++) { ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION( ++ kbdev, queue, waits[i].addr, waits[i].val, ++ waits[i].operation, waits[i].data_type, ++ (inherit_err_flags & ((uint32_t)1 << i)) ? 1 : 0); ++ } ++ break; ++ } ++ case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: ++ { ++ const struct base_cqs_set_operation_info *sets = cmd->info.cqs_set_operation.objs; ++ unsigned int i; + -+ suspend_wait_failed = true; ++ for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) { ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION( ++ kbdev, queue, sets[i].addr, sets[i].val, ++ sets[i].operation, sets[i].data_type); + } ++ break; + } ++ case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev, queue); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT(kbdev, queue, ++ cmd->info.import.gpu_va); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT(kbdev, queue, ++ cmd->info.import.gpu_va); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( ++ kbdev, queue, cmd->info.import.gpu_va); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: ++ { ++ u8 i; + -+ if (!bitmap_empty(evicted_mask, MAX_SUPPORTED_CSGS)) -+ dev_info(kbdev->dev, "Scheduler evicting slots: 0x%*pb\n", -+ num_groups, evicted_mask); ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC(kbdev, queue); ++ for (i = 0; i < cmd->info.jit_alloc.count; i++) { ++ const struct base_jit_alloc_info *info = ++ &cmd->info.jit_alloc.info[i]; + -+ if (likely(!suspend_wait_failed)) { -+ u32 i; ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC( ++ kbdev, queue, info->gpu_alloc_addr, info->va_pages, ++ info->commit_pages, info->extension, info->id, info->bin_id, ++ info->max_allocations, info->flags, info->usage_id); ++ } ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC(kbdev, queue); ++ break; ++ } ++ case BASE_KCPU_COMMAND_TYPE_JIT_FREE: ++ { ++ u8 i; + -+ while (scheduler->ngrp_to_schedule && -+ scheduler->remaining_tick_slots) { -+ i = find_first_zero_bit(scheduler->csg_inuse_bitmap, -+ num_groups); -+ if (WARN_ON(i == num_groups)) -+ break; -+ program_vacant_csg_slot(kbdev, (s8)i); -+ if (!csg_slot_in_use(kbdev, (int)i)) { -+ dev_warn(kbdev->dev, "Couldn't use CSG slot %d despite being vacant", i); -+ break; -+ } ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue); ++ for (i = 0; i < cmd->info.jit_free.count; i++) { ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE( ++ kbdev, queue, cmd->info.jit_free.ids[i]); + } -+ } else { -+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) -+ kbase_reset_gpu(kbdev); ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue); ++ break; ++ } ++#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST ++ case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( ++ kbdev, queue, cmd->info.suspend_buf_copy.sus_buf, ++ cmd->info.suspend_buf_copy.group_handle); ++ break; ++#endif ++ default: ++ dev_dbg(kbdev->dev, "Unknown command type %u", cmd->type); ++ break; + } +} + -+static void suspend_queue_group(struct kbase_queue_group *group) ++int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, ++ struct kbase_ioctl_kcpu_queue_enqueue *enq) +{ -+ unsigned long flags; -+ struct kbase_csf_scheduler *const scheduler = -+ &group->kctx->kbdev->csf.scheduler; ++ struct kbase_kcpu_command_queue *queue = NULL; ++ void __user *user_cmds = u64_to_user_ptr(enq->addr); ++ int ret = 0; ++ u32 i; + -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); -+ /* This shall be used in program_suspending_csg_slots() where we -+ * assume that whilst CSGs are being suspended, this bitmask is not -+ * used by anything else i.e., it indicates only the CSGs going -+ * through suspension. ++ /* The offset to the first command that is being processed or yet to ++ * be processed is of u8 type, so the number of commands inside the ++ * queue cannot be more than 256. The current implementation expects ++ * exactly 256, any other size will require the addition of wrapping ++ * logic. + */ -+ clear_bit(group->csg_nr, scheduler->csgs_events_enable_mask); -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); -+ -+ /* If AS fault detected, terminate the group */ -+ if (!kctx_as_enabled(group->kctx) || group->faulted) -+ term_csg_slot(group); -+ else -+ suspend_csg_slot(group); -+} ++ BUILD_BUG_ON(KBASEP_KCPU_QUEUE_SIZE != 256); + -+static void wait_csg_slots_start(struct kbase_device *kbdev) -+{ -+ u32 num_groups = kbdev->csf.global_iface.group_num; -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); -+ DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; -+ u32 i; ++ /* Whilst the backend interface allows enqueueing multiple commands in ++ * a single operation, the Base interface does not expose any mechanism ++ * to do so. And also right now the handling is missing for the case ++ * where multiple commands are submitted and the enqueue of one of the ++ * command in the set fails after successfully enqueuing other commands ++ * in the set. ++ */ ++ if (enq->nr_commands != 1) { ++ dev_dbg(kctx->kbdev->dev, ++ "More than one commands enqueued"); ++ return -EINVAL; ++ } + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ /* There might be a race between one thread trying to enqueue commands to the queue ++ * and other thread trying to delete the same queue. ++ * This racing could lead to use-after-free problem by enqueuing thread if ++ * resources for the queue has already been freed by deleting thread. ++ * ++ * To prevent the issue, two mutexes are acquired/release asymmetrically as follows. ++ * ++ * Lock A (kctx mutex) ++ * Lock B (queue mutex) ++ * Unlock A ++ * Unlock B ++ * ++ * With the kctx mutex being held, enqueuing thread will check the queue ++ * and will return error code if the queue had already been deleted. ++ */ ++ mutex_lock(&kctx->csf.kcpu_queues.lock); ++ queue = kctx->csf.kcpu_queues.array[enq->id]; ++ if (queue == NULL) { ++ dev_dbg(kctx->kbdev->dev, "Invalid KCPU queue (id:%u)", enq->id); ++ mutex_unlock(&kctx->csf.kcpu_queues.lock); ++ return -EINVAL; ++ } ++ mutex_lock(&queue->lock); ++ mutex_unlock(&kctx->csf.kcpu_queues.lock); + -+ /* extract start slot flags for check */ -+ for (i = 0; i < num_groups; i++) { -+ if (atomic_read(&scheduler->csg_slots[i].state) == -+ CSG_SLOT_READY2RUN) -+ set_bit(i, slot_mask); ++ if (kcpu_queue_get_space(queue) < enq->nr_commands) { ++ ret = -EBUSY; ++ queue->enqueue_failed = true; ++ goto out; + } + -+ while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) { -+ DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); ++ /* Copy all command's info to the command buffer. ++ * Note: it would be more efficient to process all commands in-line ++ * until we encounter an unresolved CQS_ / FENCE_WAIT, however, the ++ * interface allows multiple commands to be enqueued so we must account ++ * for the possibility to roll back. ++ */ + -+ bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS); ++ for (i = 0; (i != enq->nr_commands) && !ret; ++i) { ++ struct kbase_kcpu_command *kcpu_cmd = ++ &queue->commands[(u8)(queue->start_offset + queue->num_pending_cmds + i)]; ++ struct base_kcpu_command command; ++ unsigned int j; + -+ remaining = wait_event_timeout(kbdev->csf.event_wait, -+ slots_state_changed(kbdev, changed, csg_slot_running), -+ remaining); ++ if (copy_from_user(&command, user_cmds, sizeof(command))) { ++ ret = -EFAULT; ++ goto out; ++ } + -+ if (likely(remaining)) { -+ for_each_set_bit(i, changed, num_groups) { -+ struct kbase_queue_group *group = -+ scheduler->csg_slots[i].resident_group; ++ user_cmds = (void __user *)((uintptr_t)user_cmds + ++ sizeof(struct base_kcpu_command)); + -+ /* The on slot csg is now running */ -+ clear_bit(i, slot_mask); -+ group->run_state = KBASE_CSF_GROUP_RUNNABLE; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, -+ group->run_state); ++ for (j = 0; j < sizeof(command.padding); j++) { ++ if (command.padding[j] != 0) { ++ dev_dbg(kctx->kbdev->dev, ++ "base_kcpu_command padding not 0\n"); ++ ret = -EINVAL; ++ goto out; + } -+ } else { -+ const int csg_nr = ffs(slot_mask[0]) - 1; -+ struct kbase_queue_group *group = -+ scheduler->csg_slots[csg_nr].resident_group; -+ enum dumpfault_error_type error_type = DF_CSG_START_TIMEOUT; -+ -+ dev_err(kbdev->dev, -+ "[%llu] Timeout (%d ms) waiting for CSG slots to start, slots: 0x%*pb\n", -+ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, -+ num_groups, slot_mask); -+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) -+ error_type = DF_PING_REQUEST_TIMEOUT; -+ schedule_actions_trigger_df(kbdev, group->kctx, error_type); ++ } + -+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) -+ kbase_reset_gpu(kbdev); ++ kcpu_cmd->enqueue_ts = atomic64_inc_return(&kctx->csf.kcpu_queues.cmd_seq_num); ++ switch (command.type) { ++ case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ ret = kbase_kcpu_fence_wait_prepare(queue, ++ &command.info.fence, kcpu_cmd); ++#else ++ ret = -EINVAL; ++ dev_warn(kctx->kbdev->dev, "fence wait command unsupported\n"); ++#endif ++ break; ++ case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ ret = kbase_kcpu_fence_signal_prepare(queue, ++ &command.info.fence, kcpu_cmd); ++#else ++ ret = -EINVAL; ++ dev_warn(kctx->kbdev->dev, "fence signal command unsupported\n"); ++#endif ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: ++ ret = kbase_kcpu_cqs_wait_prepare(queue, ++ &command.info.cqs_wait, kcpu_cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_SET: ++ ret = kbase_kcpu_cqs_set_prepare(queue, ++ &command.info.cqs_set, kcpu_cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: ++ ret = kbase_kcpu_cqs_wait_operation_prepare(queue, ++ &command.info.cqs_wait_operation, kcpu_cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: ++ ret = kbase_kcpu_cqs_set_operation_prepare(queue, ++ &command.info.cqs_set_operation, kcpu_cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: ++ kcpu_cmd->type = BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER; ++ ret = 0; ++ break; ++ case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: ++ ret = kbase_kcpu_map_import_prepare(queue, ++ &command.info.import, kcpu_cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: ++ ret = kbase_kcpu_unmap_import_prepare(queue, ++ &command.info.import, kcpu_cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: ++ ret = kbase_kcpu_unmap_import_force_prepare(queue, ++ &command.info.import, kcpu_cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: ++ ret = kbase_kcpu_jit_allocate_prepare(queue, ++ &command.info.jit_alloc, kcpu_cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_JIT_FREE: ++ ret = kbase_kcpu_jit_free_prepare(queue, ++ &command.info.jit_free, kcpu_cmd); ++ break; ++#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST ++ case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: ++ ret = kbase_csf_queue_group_suspend_prepare(queue, ++ &command.info.suspend_buf_copy, ++ kcpu_cmd); ++ break; ++#endif ++ default: ++ dev_dbg(queue->kctx->kbdev->dev, ++ "Unknown command type %u", command.type); ++ ret = -EINVAL; + break; + } + } -+} + -+/** -+ * group_on_slot_is_idle() - Check if the given slot has a CSG-idle state -+ * flagged after the completion of a CSG status -+ * update command -+ * -+ * @kbdev: Pointer to the GPU device. -+ * @slot: The given slot for checking an occupying resident group's idle -+ * state. -+ * -+ * This function is called at the start of scheduling tick to check the -+ * idle status of a queue group resident on a CSG slot. -+ * The caller must make sure the corresponding status update command has -+ * been called and completed before checking this status. -+ * -+ * Return: true if the group resident on slot is idle, otherwise false. -+ */ -+static bool group_on_slot_is_idle(struct kbase_device *kbdev, -+ unsigned long slot) -+{ -+ struct kbase_csf_cmd_stream_group_info *ginfo = -+ &kbdev->csf.global_iface.groups[slot]; -+ bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & -+ CSG_STATUS_STATE_IDLE_MASK; ++ if (!ret) { ++ /* We only instrument the enqueues after all commands have been ++ * successfully enqueued, as if we do them during the enqueue ++ * and there is an error, we won't be able to roll them back ++ * like is done for the command enqueues themselves. ++ */ ++ for (i = 0; i != enq->nr_commands; ++i) { ++ u8 cmd_idx = (u8)(queue->start_offset + queue->num_pending_cmds + i); + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( ++ queue, &queue->commands[cmd_idx]); ++ } + -+ return idle; ++ queue->num_pending_cmds += enq->nr_commands; ++ kcpu_queue_process(queue, false); ++ } ++ ++out: ++ mutex_unlock(&queue->lock); ++ ++ return ret; +} + -+/** -+ * slots_update_state_changed() - Check the handshake state of a subset of -+ * command group slots. -+ * -+ * @kbdev: The GPU device. -+ * @field_mask: The field mask for checking the state in the csg_req/ack. -+ * @slots_mask: A bit_map specifying the slots to check. -+ * @slots_done: A cleared bit_map for returning the slots that -+ * have finished update. -+ * -+ * Checks the state of a subset of slots selected through the slots_mask -+ * bit_map. Records which slots' handshake completed and send it back in the -+ * slots_done bit_map. -+ * -+ * Return: true if the slots_done is set for at least one slot. -+ * Otherwise false. -+ */ -+static -+bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask, -+ const unsigned long *slots_mask, unsigned long *slots_done) ++int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx) +{ -+ u32 num_groups = kbdev->csf.global_iface.group_num; -+ bool changed = false; -+ u32 i; ++ int idx; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ bitmap_zero(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES); + -+ for_each_set_bit(i, slots_mask, num_groups) { -+ struct kbase_csf_cmd_stream_group_info const *const ginfo = -+ &kbdev->csf.global_iface.groups[i]; -+ u32 state = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ); ++ for (idx = 0; idx < KBASEP_MAX_KCPU_QUEUES; ++idx) ++ kctx->csf.kcpu_queues.array[idx] = NULL; + -+ state ^= kbase_csf_firmware_csg_output(ginfo, CSG_ACK); ++ mutex_init(&kctx->csf.kcpu_queues.lock); + -+ if (!(state & field_mask)) { -+ set_bit(i, slots_done); -+ changed = true; -+ } -+ } ++ atomic64_set(&kctx->csf.kcpu_queues.cmd_seq_num, 0); + -+ return changed; ++ return 0; +} + -+/** -+ * wait_csg_slots_handshake_ack - Wait the req/ack handshakes to complete on -+ * the specified groups. -+ * -+ * @kbdev: Pointer to the GPU device. -+ * @field_mask: The field mask for checking the state in the csg_req/ack. -+ * @slot_mask: Bitmap reflecting the slots, the function will modify -+ * the acknowledged slots by clearing their corresponding -+ * bits. -+ * @wait_in_jiffies: Wait duration in jiffies, controlling the time-out. -+ * -+ * This function waits for the acknowledgment of the request that have -+ * already been placed for the CSG slots by the caller. Currently used for -+ * the CSG priority update and status update requests. -+ * -+ * Return: 0 on all specified slots acknowledged; otherwise -ETIMEDOUT. For -+ * timed out condition with unacknowledged slots, their bits remain -+ * set in the slot_mask. -+ */ -+static int wait_csg_slots_handshake_ack(struct kbase_device *kbdev, -+ u32 field_mask, unsigned long *slot_mask, long wait_in_jiffies) ++void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx) +{ -+ const u32 num_groups = kbdev->csf.global_iface.group_num; -+ long remaining = wait_in_jiffies; -+ -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); -+ -+ while (!bitmap_empty(slot_mask, num_groups) && -+ !kbase_reset_gpu_is_active(kbdev)) { -+ DECLARE_BITMAP(dones, MAX_SUPPORTED_CSGS) = { 0 }; -+ -+ remaining = wait_event_timeout(kbdev->csf.event_wait, -+ slots_update_state_changed(kbdev, field_mask, -+ slot_mask, dones), -+ remaining); -+ -+ if (likely(remaining)) -+ bitmap_andnot(slot_mask, slot_mask, dones, num_groups); -+ else { ++ while (!bitmap_empty(kctx->csf.kcpu_queues.in_use, ++ KBASEP_MAX_KCPU_QUEUES)) { ++ int id = find_first_bit(kctx->csf.kcpu_queues.in_use, ++ KBASEP_MAX_KCPU_QUEUES); + -+ /* Timed-out on the wait */ -+ return -ETIMEDOUT; -+ } ++ if (WARN_ON(!kctx->csf.kcpu_queues.array[id])) ++ clear_bit(id, kctx->csf.kcpu_queues.in_use); ++ else ++ (void)delete_queue(kctx, id); + } + -+ return 0; ++ mutex_destroy(&kctx->csf.kcpu_queues.lock); +} ++KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_context_term); + -+static void wait_csg_slots_finish_prio_update(struct kbase_device *kbdev) ++int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, ++ struct kbase_ioctl_kcpu_queue_delete *del) +{ -+ unsigned long *slot_mask = -+ kbdev->csf.scheduler.csg_slots_prio_update; -+ long wait_time = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); -+ int ret = wait_csg_slots_handshake_ack(kbdev, CSG_REQ_EP_CFG_MASK, -+ slot_mask, wait_time); -+ -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ return delete_queue(kctx, (u32)del->id); ++} + -+ if (unlikely(ret != 0)) { -+ const int csg_nr = ffs(slot_mask[0]) - 1; -+ struct kbase_queue_group *group = -+ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; -+ enum dumpfault_error_type error_type = DF_CSG_EP_CFG_TIMEOUT; ++int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, ++ struct kbase_ioctl_kcpu_queue_new *newq) ++{ ++ struct kbase_kcpu_command_queue *queue; ++ int idx; ++ int n; ++ int ret = 0; ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ struct kbase_kcpu_dma_fence_meta *metadata; ++#endif ++ /* The queue id is of u8 type and we use the index of the kcpu_queues ++ * array as an id, so the number of elements in the array can't be ++ * more than 256. ++ */ ++ BUILD_BUG_ON(KBASEP_MAX_KCPU_QUEUES > 256); + -+ dev_warn( -+ kbdev->dev, -+ "[%llu] Timeout (%d ms) on CSG_REQ:EP_CFG, skipping the update wait: slot mask=0x%lx", -+ kbase_backend_get_cycle_cnt(kbdev), -+ kbdev->csf.fw_timeout_ms, -+ slot_mask[0]); -+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) -+ error_type = DF_PING_REQUEST_TIMEOUT; -+ schedule_actions_trigger_df(kbdev, group->kctx, error_type); ++ mutex_lock(&kctx->csf.kcpu_queues.lock); + -+ /* Timeout could indicate firmware is unresponsive so trigger a GPU reset. */ -+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) -+ kbase_reset_gpu(kbdev); ++ idx = find_first_zero_bit(kctx->csf.kcpu_queues.in_use, ++ KBASEP_MAX_KCPU_QUEUES); ++ if (idx >= (int)KBASEP_MAX_KCPU_QUEUES) { ++ ret = -ENOMEM; ++ goto out; + } -+} + -+static void report_csg_termination(struct kbase_queue_group *const group) -+{ -+ struct base_gpu_queue_group_error -+ err = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, -+ .payload = { .fatal_group = { -+ .status = GPU_EXCEPTION_TYPE_SW_FAULT_2, -+ } } }; ++ if (WARN_ON(kctx->csf.kcpu_queues.array[idx])) { ++ ret = -EINVAL; ++ goto out; ++ } + -+ kbase_csf_add_group_fatal_error(group, &err); -+} ++ queue = kzalloc(sizeof(*queue), GFP_KERNEL); + -+void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, -+ struct kbase_context *kctx, struct list_head *evicted_groups) -+{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ struct kbase_queue_group *group; -+ u32 num_groups = kbdev->csf.global_iface.group_num; -+ u32 slot; -+ DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; ++ if (!queue) { ++ ret = -ENOMEM; ++ goto out; ++ } + -+ lockdep_assert_held(&kctx->csf.lock); -+ mutex_lock(&scheduler->lock); ++ queue->wq = alloc_workqueue("mali_kbase_csf_kcpu_wq_%i", WQ_UNBOUND | WQ_HIGHPRI, 0, idx); ++ if (queue->wq == NULL) { ++ kfree(queue); ++ ret = -ENOMEM; + -+ /* This code is only called during reset, so we don't wait for the CSG -+ * slots to be stopped -+ */ -+ WARN_ON(!kbase_reset_gpu_is_active(kbdev)); ++ goto out; ++ } + -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_EVICT_CTX_SLOTS_START, kctx, 0u); -+ for (slot = 0; slot < num_groups; slot++) { -+ group = kbdev->csf.scheduler.csg_slots[slot].resident_group; -+ if (group && group->kctx == kctx) { -+ bool as_fault; ++ bitmap_set(kctx->csf.kcpu_queues.in_use, idx, 1); ++ kctx->csf.kcpu_queues.array[idx] = queue; ++ mutex_init(&queue->lock); ++ queue->kctx = kctx; ++ queue->start_offset = 0; ++ queue->num_pending_cmds = 0; ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ queue->fence_context = dma_fence_context_alloc(1); ++ queue->fence_seqno = 0; ++ queue->fence_wait_processed = false; + -+ dev_dbg(kbdev->dev, "Evicting group [%d] running on slot [%d] due to reset", -+ group->handle, group->csg_nr); ++ metadata = kzalloc(sizeof(*metadata), GFP_KERNEL); ++ if (!metadata) { ++ destroy_workqueue(queue->wq); ++ kfree(queue); ++ ret = -ENOMEM; ++ goto out; ++ } + -+ term_csg_slot(group); -+ as_fault = cleanup_csg_slot(group); -+ /* remove the group from the scheduler list */ -+ sched_evict_group(group, as_fault, false); -+ /* signal Userspace that CSG is being terminated */ -+ report_csg_termination(group); -+ /* return the evicted group to the caller */ -+ list_add_tail(&group->link, evicted_groups); -+ set_bit(slot, slot_mask); -+ } ++ metadata->kbdev = kctx->kbdev; ++ metadata->kctx_id = kctx->id; ++ n = snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%d-%d_%d-%lld-kcpu", ++ kctx->kbdev->id, kctx->tgid, kctx->id, queue->fence_context); ++ if (WARN_ON(n >= MAX_TIMELINE_NAME)) { ++ destroy_workqueue(queue->wq); ++ kfree(queue); ++ kfree(metadata); ++ ret = -EINVAL; ++ goto out; + } + -+ dev_info(kbdev->dev, "Evicting context %d_%d slots: 0x%*pb\n", -+ kctx->tgid, kctx->id, num_groups, slot_mask); ++ kbase_refcount_set(&metadata->refcount, 1); ++ queue->metadata = metadata; ++ atomic_inc(&kctx->kbdev->live_fence_metadata); ++#endif /* CONFIG_SYNC_FILE */ ++ queue->enqueue_failed = false; ++ queue->command_started = false; ++ INIT_LIST_HEAD(&queue->jit_blocked); ++ queue->has_error = false; ++ INIT_WORK(&queue->work, kcpu_queue_process_worker); ++ queue->id = idx; + -+ /* Fatal errors may have been the cause of the GPU reset -+ * taking place, in which case we want to make sure that -+ * we wake up the fatal event queue to notify userspace -+ * only once. Otherwise, we may have duplicate event -+ * notifications between the time the first notification -+ * occurs and the time the GPU is reset. ++ newq->id = idx; ++ ++ /* Fire the tracepoint with the mutex held to enforce correct ordering ++ * with the summary stream. + */ -+ kbase_event_wakeup(kctx); ++ KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(kctx->kbdev, queue, queue->id, kctx->id, ++ queue->num_pending_cmds); + -+ mutex_unlock(&scheduler->lock); -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_EVICT_CTX_SLOTS_END, kctx, num_groups); -+} ++ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_CREATE, queue, ++ queue->fence_context, 0); ++#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG ++ kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback); ++#endif ++out: ++ mutex_unlock(&kctx->csf.kcpu_queues.lock); + -+/** -+ * scheduler_slot_protm_ack - Acknowledging the protected region requests -+ * from the resident group on a given slot. ++ return ret; ++} ++KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_new); +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h +new file mode 100644 +index 000000000..5cad8b200 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h +@@ -0,0 +1,384 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * @kbdev: Pointer to the GPU device. -+ * @group: Pointer to the resident group on the given slot. -+ * @slot: The slot that the given group is actively operating on. ++ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * -+ * The function assumes that the given slot is in stable running state and -+ * has already been judged by the caller on that any pending protected region -+ * requests of the resident group should be acknowledged. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: true if the group has pending protm request(s) and is acknowledged. -+ * The caller should arrange to enter the protected mode for servicing -+ * it. Otherwise return false, indicating the group has no pending protm -+ * request. + */ -+static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev, -+ struct kbase_queue_group *const group, -+ const int slot) -+{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ bool protm_ack = false; -+ struct kbase_csf_cmd_stream_group_info *ginfo = -+ &kbdev->csf.global_iface.groups[slot]; -+ u32 max_csi; -+ int i; -+ -+ if (WARN_ON(scheduler->csg_slots[slot].resident_group != group)) -+ return protm_ack; + -+ lockdep_assert_held(&scheduler->lock); -+ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.interrupt_lock); ++#ifndef _KBASE_CSF_KCPU_H_ ++#define _KBASE_CSF_KCPU_H_ + -+ max_csi = ginfo->stream_num; -+ for (i = find_first_bit(group->protm_pending_bitmap, max_csi); -+ i < max_csi; -+ i = find_next_bit(group->protm_pending_bitmap, max_csi, i + 1)) { -+ struct kbase_queue *queue = group->bound_queues[i]; ++#include ++#include + -+ clear_bit(i, group->protm_pending_bitmap); -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_CLEAR, group, queue, -+ group->protm_pending_bitmap[0]); ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++#include ++#else ++#include ++#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ + -+ if (!WARN_ON(!queue) && queue->enabled) { -+ struct kbase_csf_cmd_stream_info *stream = -+ &ginfo->streams[i]; -+ u32 cs_protm_ack = kbase_csf_firmware_cs_output( -+ stream, CS_ACK) & -+ CS_ACK_PROTM_PEND_MASK; -+ u32 cs_protm_req = kbase_csf_firmware_cs_input_read( -+ stream, CS_REQ) & -+ CS_REQ_PROTM_PEND_MASK; ++/* The maximum number of KCPU commands in flight, enqueueing more commands ++ * than this value shall block. ++ */ ++#define KBASEP_KCPU_QUEUE_SIZE ((size_t)256) + -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_ACK, group, -+ queue, cs_protm_ack ^ cs_protm_req); ++/** ++ * struct kbase_kcpu_command_import_info - Structure which holds information ++ * about the buffer to be imported ++ * ++ * @gpu_va: Address of the buffer to be imported. ++ */ ++struct kbase_kcpu_command_import_info { ++ u64 gpu_va; ++}; + -+ if (cs_protm_ack == cs_protm_req) { -+ dev_dbg(kbdev->dev, -+ "PROTM-ack already done for queue-%d group-%d slot-%d", -+ queue->csi_index, group->handle, slot); -+ continue; -+ } ++/** ++ * struct kbase_kcpu_command_fence_info - Structure which holds information about the ++ * fence object enqueued in the kcpu command queue ++ * ++ * @fence_cb: Fence callback ++ * @fence: Fence ++ * @kcpu_queue: kcpu command queue ++ */ ++struct kbase_kcpu_command_fence_info { ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence_cb fence_cb; ++ struct fence *fence; ++#else ++ struct dma_fence_cb fence_cb; ++ struct dma_fence *fence; ++#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ ++ struct kbase_kcpu_command_queue *kcpu_queue; ++}; + -+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, -+ cs_protm_ack, -+ CS_ACK_PROTM_PEND_MASK); -+ protm_ack = true; -+ dev_dbg(kbdev->dev, -+ "PROTM-ack for queue-%d, group-%d slot-%d", -+ queue->csi_index, group->handle, slot); -+ } -+ } ++/** ++ * struct kbase_kcpu_command_cqs_set_info - Structure which holds information ++ * about CQS objects for the kcpu CQS set command ++ * ++ * @objs: Array of structures which define CQS objects to be used by ++ * the kcpu command. ++ * @nr_objs: Number of CQS objects in the array. ++ */ ++struct kbase_kcpu_command_cqs_set_info { ++ struct base_cqs_set *objs; ++ unsigned int nr_objs; ++}; + -+ return protm_ack; -+} ++/** ++ * struct kbase_kcpu_command_cqs_wait_info - Structure which holds information ++ * about CQS objects for the kcpu CQS wait command ++ * ++ * @objs: Array of structures which define CQS objects to be used by ++ * the kcpu command. ++ * @signaled: Bit array used to report the status of the CQS wait objects. ++ * 1 is signaled, 0 otherwise. ++ * @nr_objs: Number of CQS objects in the array. ++ * @inherit_err_flags: Bit-pattern for the CQSs in the array who's error field ++ * to be served as the source for importing into the ++ * queue's error-state. ++ */ ++struct kbase_kcpu_command_cqs_wait_info { ++ struct base_cqs_wait_info *objs; ++ unsigned long *signaled; ++ unsigned int nr_objs; ++ u32 inherit_err_flags; ++}; + +/** -+ * protm_enter_set_next_pending_seq - Update the scheduler's field of -+ * tick_protm_pending_seq to that from the next available on-slot protm -+ * pending CSG. ++ * struct kbase_kcpu_command_cqs_set_operation_info - Structure which holds information ++ * about CQS objects for the kcpu CQS timeline set command + * -+ * @kbdev: Pointer to the GPU device. ++ * @objs: Array of structures which define CQS timeline objects to be used by ++ * the kcpu command. ++ * @nr_objs: Number of CQS objects in the array. ++ */ ++struct kbase_kcpu_command_cqs_set_operation_info { ++ struct base_cqs_set_operation_info *objs; ++ unsigned int nr_objs; ++}; ++ ++/** ++ * struct kbase_kcpu_command_cqs_wait_operation_info - Structure which holds information ++ * about CQS objects for the kcpu CQS timeline wait command + * -+ * If applicable, the function updates the scheduler's tick_protm_pending_seq -+ * field from the next available on-slot protm pending CSG. If not, the field -+ * is set to KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID. ++ * @objs: Array of structures which define CQS timeline objects to be used by ++ * the kcpu command. ++ * @signaled: Bit array used to report the status of the CQS wait objects. ++ * 1 is signaled, 0 otherwise. ++ * @nr_objs: Number of CQS objects in the array. ++ * @inherit_err_flags: Bit-pattern for CQSs in the array who's error field is to ++ * be used as the source to import into the queue's error-state + */ -+static void protm_enter_set_next_pending_seq(struct kbase_device *const kbdev) -+{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ u32 num_groups = kbdev->csf.global_iface.group_num; -+ u32 num_csis = kbdev->csf.global_iface.groups[0].stream_num; -+ DECLARE_BITMAP(active_csgs, MAX_SUPPORTED_CSGS) = { 0 }; -+ u32 i; ++struct kbase_kcpu_command_cqs_wait_operation_info { ++ struct base_cqs_wait_operation_info *objs; ++ unsigned long *signaled; ++ unsigned int nr_objs; ++ u32 inherit_err_flags; ++}; + -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++/** ++ * struct kbase_kcpu_command_jit_alloc_info - Structure which holds information ++ * needed for the kcpu command for jit allocations ++ * ++ * @node: Used to keep track of all JIT free/alloc commands in submission ++ * order. This must be located in the front of this struct to ++ * match that of kbase_kcpu_command_jit_free_info. ++ * @info: Array of objects of the struct base_jit_alloc_info type which ++ * specify jit allocations to be made by the kcpu command. ++ * @count: Number of jit alloc objects in the array. ++ * @blocked: Whether this allocation has been put into the pending list to ++ * be retried later. ++ */ ++struct kbase_kcpu_command_jit_alloc_info { ++ struct list_head node; ++ struct base_jit_alloc_info *info; ++ u8 count; ++ bool blocked; ++}; + -+ bitmap_xor(active_csgs, scheduler->csg_slots_idle_mask, scheduler->csg_inuse_bitmap, -+ num_groups); -+ /* Reset the tick's pending protm seq number to invalid initially */ -+ scheduler->tick_protm_pending_seq = KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID; -+ for_each_set_bit(i, active_csgs, num_groups) { -+ struct kbase_queue_group *group = scheduler->csg_slots[i].resident_group; ++/** ++ * struct kbase_kcpu_command_jit_free_info - Structure which holds information ++ * needed for the kcpu jit free command ++ * ++ * @node: Used to keep track of all JIT free/alloc commands in submission ++ * order. This must be located in the front of this struct to ++ * match that of kbase_kcpu_command_jit_alloc_info. ++ * @ids: Array of identifiers of jit allocations which are to be freed ++ * by the kcpu command. ++ * @count: Number of elements in the array. ++ */ ++struct kbase_kcpu_command_jit_free_info { ++ struct list_head node; ++ u8 *ids; ++ u8 count; ++}; + -+ /* Set to the next pending protm group's scan_seq_number */ -+ if ((group != scheduler->active_protm_grp) && -+ (!bitmap_empty(group->protm_pending_bitmap, num_csis)) && -+ (group->scan_seq_num < scheduler->tick_protm_pending_seq)) -+ scheduler->tick_protm_pending_seq = group->scan_seq_num; -+ } -+} ++/** ++ * struct kbase_suspend_copy_buffer - information about the suspend buffer ++ * to be copied. ++ * ++ * @size: size of the suspend buffer in bytes. ++ * @pages: pointer to an array of pointers to the pages which contain ++ * the user buffer. ++ * @nr_pages: number of pages. ++ * @offset: offset into the pages ++ * @cpu_alloc: Reference to physical pages of suspend buffer allocation. ++ */ ++struct kbase_suspend_copy_buffer { ++ size_t size; ++ struct page **pages; ++ int nr_pages; ++ size_t offset; ++ struct kbase_mem_phy_alloc *cpu_alloc; ++}; + ++#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST +/** -+ * scheduler_group_check_protm_enter - Request the given group to be evaluated -+ * for triggering the protected mode. ++ * struct kbase_kcpu_command_group_suspend_info - structure which contains ++ * suspend buffer data captured for a suspended queue group. + * -+ * @kbdev: Pointer to the GPU device. -+ * @input_grp: Pointer to the GPU queue group. ++ * @sus_buf: Pointer to the structure which contains details of the ++ * user buffer and its kernel pinned pages. ++ * @group_handle: Handle to the mapping of CSG. ++ */ ++struct kbase_kcpu_command_group_suspend_info { ++ struct kbase_suspend_copy_buffer *sus_buf; ++ u8 group_handle; ++}; ++#endif ++ ++ ++/** ++ * struct kbase_kcpu_command - Command which is to be part of the kernel ++ * command queue + * -+ * The function assumes the given group is either an active running group or -+ * the scheduler internally maintained field scheduler->top_grp. ++ * @type: Type of the command. ++ * @enqueue_ts: Denotes the relative time of enqueueing, a smaller value ++ * indicates that it has been enqueued earlier. ++ * @info: Structure which holds information about the command ++ * dependent on the command type. ++ * @info.fence: Fence ++ * @info.cqs_wait: CQS wait ++ * @info.cqs_set: CQS set ++ * @info.cqs_wait_operation: CQS wait operation ++ * @info.cqs_set_operation: CQS set operation ++ * @info.import: import ++ * @info.jit_alloc: JIT allocation ++ * @info.jit_free: JIT deallocation ++ * @info.suspend_buf_copy: suspend buffer copy ++ * @info.sample_time: sample time ++ */ ++struct kbase_kcpu_command { ++ enum base_kcpu_command_type type; ++ u64 enqueue_ts; ++ union { ++ struct kbase_kcpu_command_fence_info fence; ++ struct kbase_kcpu_command_cqs_wait_info cqs_wait; ++ struct kbase_kcpu_command_cqs_set_info cqs_set; ++ struct kbase_kcpu_command_cqs_wait_operation_info cqs_wait_operation; ++ struct kbase_kcpu_command_cqs_set_operation_info cqs_set_operation; ++ struct kbase_kcpu_command_import_info import; ++ struct kbase_kcpu_command_jit_alloc_info jit_alloc; ++ struct kbase_kcpu_command_jit_free_info jit_free; ++#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST ++ struct kbase_kcpu_command_group_suspend_info suspend_buf_copy; ++#endif ++ } info; ++}; ++ ++/** ++ * struct kbase_kcpu_command_queue - a command queue executed by the kernel + * -+ * If the GPU is not already running in protected mode and the input group -+ * has protected region requests from its bound queues, the requests are -+ * acknowledged and the GPU is instructed to enter the protected mode. ++ * @lock: Lock to protect accesses to this queue. ++ * @kctx: The context to which this command queue belongs. ++ * @commands: Array of commands which have been successfully ++ * enqueued to this command queue. ++ * @wq: Dedicated workqueue for processing commands. ++ * @work: struct work_struct which contains a pointer to ++ * the function which handles processing of kcpu ++ * commands enqueued into a kcpu command queue; ++ * part of kernel API for processing workqueues ++ * @start_offset: Index of the command to be executed next ++ * @id: KCPU command queue ID. ++ * @num_pending_cmds: The number of commands enqueued but not yet ++ * executed or pending ++ * @cqs_wait_count: Tracks the number of CQS wait commands enqueued ++ * @fence_context: The dma-buf fence context number for this kcpu ++ * queue. A unique context number is allocated for ++ * each kcpu queue. ++ * @fence_seqno: The dma-buf fence sequence number for the fence ++ * that is returned on the enqueue of fence signal ++ * command. This is increased every time the ++ * fence signal command is queued. ++ * @fence_wait_processed: Used to avoid reprocessing of the fence wait ++ * command which has blocked the processing of ++ * commands that follow it. ++ * @enqueue_failed: Indicates that no space has become available in ++ * the buffer since an enqueue operation failed ++ * because of insufficient free space. ++ * @command_started: Indicates that the command at the front of the ++ * queue has been started in a previous queue ++ * process, but was not completed due to some ++ * unmet dependencies. Ensures that instrumentation ++ * of the execution start of these commands is only ++ * fired exactly once. ++ * @has_error: Indicates that the kcpu queue is in error mode ++ * or without errors since last cleaned. ++ * @jit_blocked: Used to keep track of command queues blocked ++ * by a pending JIT allocation command. ++ * @fence_timeout: Timer used to detect the fence wait timeout. ++ * @metadata: Metadata structure containing basic information about ++ * this queue for any fence objects associated with this queue. + */ -+static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, -+ struct kbase_queue_group *const input_grp) -+{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ struct kbase_protected_suspend_buffer *sbuf = &input_grp->protected_suspend_buf; -+ unsigned long flags; -+ bool protm_in_use; ++struct kbase_kcpu_command_queue { ++ struct mutex lock; ++ struct kbase_context *kctx; ++ struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE]; ++ struct workqueue_struct *wq; ++ struct work_struct work; ++ u8 start_offset; ++ u8 id; ++ u16 num_pending_cmds; ++ u32 cqs_wait_count; ++ u64 fence_context; ++ unsigned int fence_seqno; ++ bool fence_wait_processed; ++ bool enqueue_failed; ++ bool command_started; ++ struct list_head jit_blocked; ++ bool has_error; ++#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG ++ struct timer_list fence_timeout; ++#endif /* CONFIG_MALI_BIFROST_FENCE_DEBUG */ ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ struct kbase_kcpu_dma_fence_meta *metadata; ++#endif /* CONFIG_SYNC_FILE */ ++}; + -+ lockdep_assert_held(&scheduler->lock); ++/** ++ * kbase_csf_kcpu_queue_new - Create new KCPU command queue. ++ * ++ * @kctx: Pointer to the kbase context within which the KCPU command ++ * queue will be created. ++ * @newq: Pointer to the structure which contains information about ++ * the new KCPU command queue to be created. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, ++ struct kbase_ioctl_kcpu_queue_new *newq); + -+ /* Return early if the physical pages have not been allocated yet */ -+ if (unlikely(!sbuf->pma)) -+ return; ++/** ++ * kbase_csf_kcpu_queue_delete - Delete KCPU command queue. ++ * ++ * @kctx: Pointer to the kbase context from which the KCPU command ++ * queue is to be deleted. ++ * @del: Pointer to the structure which specifies the KCPU command ++ * queue to be deleted. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, ++ struct kbase_ioctl_kcpu_queue_delete *del); + -+ /* This lock is taken to prevent the issuing of MMU command during the -+ * transition to protected mode. This helps avoid the scenario where the -+ * entry to protected mode happens with a memory region being locked and -+ * the same region is then accessed by the GPU in protected mode. -+ */ -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++/** ++ * kbase_csf_kcpu_queue_enqueue - Enqueue a KCPU command into a KCPU command ++ * queue. ++ * ++ * @kctx: Pointer to the kbase context within which the KCPU command ++ * is to be enqueued into the KCPU command queue. ++ * @enq: Pointer to the structure which specifies the KCPU command ++ * as well as the KCPU command queue into which the command ++ * is to be enqueued. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, ++ struct kbase_ioctl_kcpu_queue_enqueue *enq); + -+ /* Check if the previous transition to enter & exit the protected -+ * mode has completed or not. -+ */ -+ protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev) || -+ kbdev->protected_mode; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER_CHECK, input_grp, protm_in_use); ++/** ++ * kbase_csf_kcpu_queue_context_init - Initialize the kernel CPU queues context ++ * for a GPU address space ++ * ++ * @kctx: Pointer to the kbase context being initialized. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx); + -+ /* Firmware samples the PROTM_PEND ACK bit for CSs when -+ * Host sends PROTM_ENTER global request. So if PROTM_PEND ACK bit -+ * is set for a CS after Host has sent the PROTM_ENTER -+ * Global request, then there is no guarantee that firmware will -+ * notice that prior to switching to protected mode. And firmware -+ * may not again raise the PROTM_PEND interrupt for that CS -+ * later on. To avoid that uncertainty PROTM_PEND ACK bit -+ * is not set for a CS if the request to enter protected -+ * mode has already been sent. It will be set later (after the exit -+ * from protected mode has taken place) when the group to which -+ * CS is bound becomes the top group. -+ * -+ * The actual decision of entering protected mode is hinging on the -+ * input group is the top priority group, or, in case the previous -+ * top-group is evicted from the scheduler during the tick, its would -+ * be replacement, and that it is currently in a stable state (i.e. the -+ * slot state is running). -+ */ -+ if (!protm_in_use && !WARN_ON(!input_grp)) { -+ const int slot = -+ kbase_csf_scheduler_group_get_slot_locked(input_grp); ++/** ++ * kbase_csf_kcpu_queue_context_term - Terminate the kernel CPU queues context ++ * for a GPU address space ++ * @kctx: Pointer to the kbase context being terminated. ++ * ++ * This function deletes any kernel CPU queues that weren't deleted before ++ * context termination. ++ * ++ */ ++void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx); + -+ /* check the input_grp is running and requesting protected mode -+ */ -+ if (slot >= 0 && -+ atomic_read(&scheduler->csg_slots[slot].state) == -+ CSG_SLOT_RUNNING) { -+ if (kctx_as_enabled(input_grp->kctx) && -+ scheduler_slot_protm_ack(kbdev, input_grp, slot)) { -+ int err; ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++/* Test wrappers for dma fence operations. */ ++int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, ++ struct kbase_kcpu_command_fence_info *fence_info); + -+ /* Option of acknowledging to multiple -+ * CSGs from the same kctx is dropped, -+ * after consulting with the -+ * architecture team. See the comment in -+ * GPUCORE-21394. -+ */ ++int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, ++ struct kbase_kcpu_command *current_command, ++ struct base_fence *fence, struct sync_file **sync_file, int *fd); ++#endif /* CONFIG_SYNC_FILE */ + -+ /* Switch to protected mode */ -+ scheduler->active_protm_grp = input_grp; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER, input_grp, -+ 0u); ++#endif /* _KBASE_CSF_KCPU_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.c +new file mode 100644 +index 000000000..fa877778c +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.c +@@ -0,0 +1,193 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+#if IS_ENABLED(CONFIG_MALI_CORESIGHT) -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++#include "mali_kbase_csf_kcpu_debugfs.h" ++#include ++#include + -+ /* Coresight must be disabled before entering protected mode. */ -+ kbase_debug_coresight_csf_disable_pmode_enter(kbdev); ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++#include "mali_kbase_sync.h" ++#endif + -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); -+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ ++#if IS_ENABLED(CONFIG_DEBUG_FS) + -+ kbase_csf_enter_protected_mode(kbdev); -+ /* Set the pending protm seq number to the next one */ -+ protm_enter_set_next_pending_seq(kbdev); ++/** ++ * kbasep_csf_kcpu_debugfs_print_cqs_waits() - Print additional info for KCPU ++ * queues blocked on CQS wait commands. ++ * ++ * @file: The seq_file to print to ++ * @kctx: The context of the KCPU queue ++ * @waits: Pointer to the KCPU CQS wait command info ++ */ ++static void kbasep_csf_kcpu_debugfs_print_cqs_waits(struct seq_file *file, ++ struct kbase_context *kctx, ++ struct kbase_kcpu_command_cqs_wait_info *waits) ++{ ++ unsigned int i; + -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ for (i = 0; i < waits->nr_objs; i++) { ++ struct kbase_vmap_struct *mapping; ++ u32 val; ++ char const *msg; ++ u32 *const cpu_ptr = (u32 *)kbase_phy_alloc_mapping_get(kctx, ++ waits->objs[i].addr, &mapping); + -+ err = kbase_csf_wait_protected_mode_enter(kbdev); -+ mutex_unlock(&kbdev->mmu_hw_mutex); ++ if (!cpu_ptr) ++ return; + -+ if (err) -+ schedule_actions_trigger_df(kbdev, input_grp->kctx, -+ DF_PROTECTED_MODE_ENTRY_FAILURE); ++ val = *cpu_ptr; + -+ scheduler->protm_enter_time = ktime_get_raw(); ++ kbase_phy_alloc_mapping_put(kctx, mapping); + -+ return; -+ } -+ } ++ msg = (waits->inherit_err_flags && (1U << i)) ? "true" : ++ "false"; ++ seq_printf(file, " %llx(%u > %u, inherit_err: %s), ", ++ waits->objs[i].addr, val, waits->objs[i].val, msg); + } -+ -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); +} + +/** -+ * scheduler_check_pmode_progress - Check if protected mode execution is progressing -+ * -+ * @kbdev: Pointer to the GPU device. -+ * -+ * This function is called when the GPU is in protected mode. ++ * kbasep_csf_kcpu_debugfs_print_queue() - Print debug data for a KCPU queue + * -+ * It will check if the time spent in protected mode is less -+ * than CSF_SCHED_PROTM_PROGRESS_TIMEOUT. If not, a PROTM_EXIT -+ * request is sent to the FW. ++ * @file: The seq_file to print to ++ * @kctx: The context of the KCPU queue ++ * @queue: Pointer to the KCPU queue + */ -+static void scheduler_check_pmode_progress(struct kbase_device *kbdev) ++static void kbasep_csf_kcpu_debugfs_print_queue(struct seq_file *file, ++ struct kbase_context *kctx, ++ struct kbase_kcpu_command_queue *queue) +{ -+ u64 protm_spent_time_ms; -+ u64 protm_progress_timeout = -+ kbase_get_timeout_ms(kbdev, CSF_SCHED_PROTM_PROGRESS_TIMEOUT); -+ s64 diff_ms_signed = -+ ktime_ms_delta(ktime_get_raw(), kbdev->csf.scheduler.protm_enter_time); -+ -+ if (diff_ms_signed < 0) -+ return; -+ -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); -+ -+ protm_spent_time_ms = (u64)diff_ms_signed; -+ if (protm_spent_time_ms < protm_progress_timeout) ++ if (WARN_ON(!queue)) + return; + -+ dev_dbg(kbdev->dev, "Protected mode progress timeout: %llu >= %llu", -+ protm_spent_time_ms, protm_progress_timeout); -+ -+ /* Prompt the FW to exit protected mode */ -+ scheduler_force_protm_exit(kbdev); -+} ++ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + -+static void scheduler_apply(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ const u32 total_csg_slots = kbdev->csf.global_iface.group_num; -+ const u32 available_csg_slots = scheduler->num_csg_slots_for_tick; -+ u32 suspend_cnt = 0; -+ u32 remain_cnt = 0; -+ u32 resident_cnt = 0; -+ struct kbase_queue_group *group; -+ u32 i; -+ u32 spare; ++ seq_printf(file, "%16u, %11u, %7u, %13llu %8u", ++ queue->num_pending_cmds, queue->enqueue_failed, ++ queue->command_started ? 1 : 0, ++ queue->fence_context, queue->fence_seqno); + -+ lockdep_assert_held(&scheduler->lock); ++ if (queue->command_started) { ++ struct kbase_kcpu_command *cmd = ++ &queue->commands[queue->start_offset]; ++ switch (cmd->type) { ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: ++ { ++ struct kbase_sync_fence_info info; + -+ /* Suspend those resident groups not in the run list */ -+ for (i = 0; i < total_csg_slots; i++) { -+ group = scheduler->csg_slots[i].resident_group; -+ if (group) { -+ resident_cnt++; -+ if (group->prepared_seq_num >= available_csg_slots) { -+ suspend_queue_group(group); -+ suspend_cnt++; -+ } else -+ remain_cnt++; ++ kbase_sync_fence_info_get(cmd->info.fence.fence, &info); ++ seq_printf(file, ", Fence %pK %s %s", ++ info.fence, info.name, ++ kbase_sync_status_string(info.status)); ++ break; ++ } ++#endif ++ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: ++ seq_puts(file, ", CQS "); ++ kbasep_csf_kcpu_debugfs_print_cqs_waits(file, kctx, ++ &cmd->info.cqs_wait); ++ break; ++ default: ++ seq_puts(file, ", U, Unknown blocking command"); ++ break; + } + } + -+ /* Initialize the remaining available csg slots for the tick/tock */ -+ scheduler->remaining_tick_slots = available_csg_slots; ++ seq_puts(file, "\n"); ++} + -+ /* If there are spare slots, apply heads in the list */ -+ spare = (available_csg_slots > resident_cnt) ? -+ (available_csg_slots - resident_cnt) : 0; -+ while (!list_empty(&scheduler->groups_to_schedule)) { -+ group = list_first_entry(&scheduler->groups_to_schedule, -+ struct kbase_queue_group, -+ link_to_schedule); ++/** ++ * kbasep_csf_kcpu_debugfs_show() - Print the KCPU queues debug information ++ * ++ * @file: The seq_file for printing to ++ * @data: The debugfs dentry private data, a pointer to kbase_context ++ * ++ * Return: Negative error code or 0 on success. ++ */ ++static int kbasep_csf_kcpu_debugfs_show(struct seq_file *file, void *data) ++{ ++ struct kbase_context *kctx = file->private; ++ unsigned long idx; + -+ if (kbasep_csf_scheduler_group_is_on_slot_locked(group) && -+ group->prepared_seq_num < available_csg_slots) { -+ /* One of the resident remainders */ -+ update_csg_slot_priority(group, -+ get_slot_priority(group)); -+ } else if (spare != 0) { -+ s8 slot = (s8)find_first_zero_bit( -+ kbdev->csf.scheduler.csg_inuse_bitmap, -+ total_csg_slots); ++ seq_printf(file, "MALI_CSF_KCPU_DEBUGFS_VERSION: v%u\n", MALI_CSF_KCPU_DEBUGFS_VERSION); ++ seq_puts(file, "Queue Idx(err-mode), Pending Commands, Enqueue err, Blocked, Fence context & seqno, (Wait Type, Additional info)\n"); ++ mutex_lock(&kctx->csf.kcpu_queues.lock); + -+ if (WARN_ON(slot >= (s8)total_csg_slots)) -+ break; ++ idx = find_first_bit(kctx->csf.kcpu_queues.in_use, ++ KBASEP_MAX_KCPU_QUEUES); + -+ if (!kctx_as_enabled(group->kctx) || group->faulted) { -+ /* Drop the head group and continue */ -+ update_offslot_non_idle_cnt(group); -+ remove_scheduled_group(kbdev, group); -+ continue; -+ } -+ program_csg_slot(group, slot, -+ get_slot_priority(group)); -+ if (unlikely(!csg_slot_in_use(kbdev, slot))) -+ break; ++ while (idx < KBASEP_MAX_KCPU_QUEUES) { ++ struct kbase_kcpu_command_queue *queue = ++ kctx->csf.kcpu_queues.array[idx]; + -+ spare--; -+ } else -+ break; ++ seq_printf(file, "%9lu( %s ), ", idx, ++ queue->has_error ? "InErr" : "NoErr"); ++ kbasep_csf_kcpu_debugfs_print_queue(file, kctx, ++ kctx->csf.kcpu_queues.array[idx]); + -+ /* Drop the head csg from the list */ -+ remove_scheduled_group(kbdev, group); -+ if (!WARN_ON(!scheduler->remaining_tick_slots)) -+ scheduler->remaining_tick_slots--; ++ idx = find_next_bit(kctx->csf.kcpu_queues.in_use, ++ KBASEP_MAX_KCPU_QUEUES, idx + 1); + } + -+ /* Dealing with groups currently going through suspend */ -+ program_suspending_csg_slots(kbdev); ++ mutex_unlock(&kctx->csf.kcpu_queues.lock); ++ return 0; +} + -+static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, -+ struct kbase_context *kctx, int priority) ++static int kbasep_csf_kcpu_debugfs_open(struct inode *in, struct file *file) +{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ struct kbase_queue_group *group; ++ return single_open(file, kbasep_csf_kcpu_debugfs_show, in->i_private); ++} + -+ lockdep_assert_held(&scheduler->lock); -+ lockdep_assert_held(&scheduler->interrupt_lock); -+ if (WARN_ON(priority < 0) || -+ WARN_ON(priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) -+ return; ++static const struct file_operations kbasep_csf_kcpu_debugfs_fops = { ++ .open = kbasep_csf_kcpu_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + -+ if (!kctx_as_enabled(kctx)) ++void kbase_csf_kcpu_debugfs_init(struct kbase_context *kctx) ++{ ++ struct dentry *file; ++ const mode_t mode = 0444; ++ ++ if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) + return; + -+ list_for_each_entry(group, &kctx->csf.sched.runnable_groups[priority], -+ link) { -+ if (WARN_ON(!list_empty(&group->link_to_schedule))) -+ /* This would be a bug */ -+ list_del_init(&group->link_to_schedule); ++ file = debugfs_create_file("kcpu_queues", mode, kctx->kctx_dentry, ++ kctx, &kbasep_csf_kcpu_debugfs_fops); + -+ if (unlikely(group->faulted)) -+ continue; ++ if (IS_ERR_OR_NULL(file)) { ++ dev_warn(kctx->kbdev->dev, ++ "Unable to create KCPU debugfs entry"); ++ } ++} + -+ /* Set the scanout sequence number, starting from 0 */ -+ group->scan_seq_num = scheduler->csg_scan_count_for_tick++; + -+ if (scheduler->tick_protm_pending_seq == -+ KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) { -+ if (!bitmap_empty(group->protm_pending_bitmap, -+ kbdev->csf.global_iface.groups[0].stream_num)) -+ scheduler->tick_protm_pending_seq = -+ group->scan_seq_num; -+ } ++#else ++/* ++ * Stub functions for when debugfs is disabled ++ */ ++void kbase_csf_kcpu_debugfs_init(struct kbase_context *kctx) ++{ ++} + -+ if (queue_group_idle_locked(group)) { -+ if (can_schedule_idle_group(group)) -+ list_add_tail(&group->link_to_schedule, -+ &scheduler->idle_groups_to_schedule); -+ continue; -+ } ++#endif /* CONFIG_DEBUG_FS */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.h +new file mode 100644 +index 000000000..08f2fda03 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.h +@@ -0,0 +1,37 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ if (!scheduler->ngrp_to_schedule) { -+ /* keep the top csg's origin */ -+ scheduler->top_ctx = kctx; -+ scheduler->top_grp = group; -+ } ++#ifndef _KBASE_CSF_KCPU_DEBUGFS_H_ ++#define _KBASE_CSF_KCPU_DEBUGFS_H_ + -+ list_add_tail(&group->link_to_schedule, -+ &scheduler->groups_to_schedule); -+ group->prepared_seq_num = scheduler->ngrp_to_schedule++; ++/* Forward declaration */ ++struct kbase_context; + -+ kctx->csf.sched.ngrp_to_schedule++; -+ count_active_address_space(kbdev, kctx); -+ } -+} ++#define MALI_CSF_KCPU_DEBUGFS_VERSION 0 + +/** -+ * scheduler_rotate_groups() - Rotate the runnable queue groups to provide -+ * fairness of scheduling within a single -+ * kbase_context. -+ * -+ * @kbdev: Pointer to the GPU device. ++ * kbase_csf_kcpu_debugfs_init() - Create a debugfs entry for KCPU queues + * -+ * Since only kbase_csf_scheduler's top_grp (i.e. the queue group assigned -+ * the highest slot priority) is guaranteed to get the resources that it -+ * needs we only rotate the kbase_context corresponding to it - -+ * kbase_csf_scheduler's top_ctx. ++ * @kctx: The kbase_context for which to create the debugfs entry ++ */ ++void kbase_csf_kcpu_debugfs_init(struct kbase_context *kctx); ++ ++#endif /* _KBASE_CSF_KCPU_DEBUGFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c +new file mode 100644 +index 000000000..4056a9d93 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c +@@ -0,0 +1,817 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * -+ * The priority level chosen for rotation is the one containing the previous -+ * scheduling cycle's kbase_csf_scheduler's top_grp. ++ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * -+ * In a 'fresh-slice-cycle' this always corresponds to the highest group -+ * priority in use by kbase_csf_scheduler's top_ctx. That is, it's the priority -+ * level of the previous scheduling cycle's first runnable kbase_context. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * We choose this priority level because when higher priority work is -+ * scheduled, we should always cause the scheduler to run and do a scan. The -+ * scan always enumerates the highest priority work first (whether that be -+ * based on process priority or group priority), and thus -+ * kbase_csf_scheduler's top_grp will point to the first of those high priority -+ * groups, which necessarily must be the highest priority group in -+ * kbase_csf_scheduler's top_ctx. The fresh-slice-cycle will run later and pick -+ * up that group appropriately. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + * -+ * If kbase_csf_scheduler's top_grp was instead evicted (and thus is NULL), -+ * then no explicit rotation occurs on the next fresh-slice-cycle schedule, but -+ * will set up kbase_csf_scheduler's top_ctx again for the next scheduling -+ * cycle. Implicitly, a rotation had already occurred by removing -+ * the kbase_csf_scheduler's top_grp ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * If kbase_csf_scheduler's top_grp became idle and all other groups belonging -+ * to kbase_csf_scheduler's top_grp's priority level in kbase_csf_scheduler's -+ * top_ctx are also idle, then the effect of this will be to rotate idle -+ * groups, which might not actually become resident in the next -+ * scheduling slice. However this is acceptable since a queue group becoming -+ * idle is implicitly a rotation (as above with evicted queue groups), as it -+ * automatically allows a new queue group to take the maximum slot priority -+ * whilst the idle kbase_csf_scheduler's top_grp ends up near the back of -+ * the kbase_csf_scheduler's groups_to_schedule list. In this example, it will -+ * be for a group in the next lowest priority level or in absence of those the -+ * next kbase_context's queue groups. + */ -+static void scheduler_rotate_groups(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ struct kbase_context *const top_ctx = scheduler->top_ctx; -+ struct kbase_queue_group *const top_grp = scheduler->top_grp; -+ -+ lockdep_assert_held(&scheduler->lock); -+ if (top_ctx && top_grp) { -+ struct list_head *list = -+ &top_ctx->csf.sched.runnable_groups[top_grp->priority]; + -+ WARN_ON(top_grp->kctx != top_ctx); -+ if (!WARN_ON(list_empty(list))) { -+ struct kbase_queue_group *new_head_grp; ++#include ++#include ++#include "mali_kbase_csf.h" ++#include "mali_kbase_csf_mcu_shared_reg.h" ++#include + -+ list_move_tail(&top_grp->link, list); -+ new_head_grp = (!list_empty(list)) ? -+ list_first_entry(list, struct kbase_queue_group, link) : -+ NULL; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_ROTATE, top_grp, -+ top_ctx->csf.sched.num_runnable_grps); -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u); -+ dev_dbg(kbdev->dev, -+ "groups rotated for a context, num_runnable_groups: %u\n", -+ scheduler->top_ctx->csf.sched.num_runnable_grps); -+ } -+ } -+} ++/* Scaling factor in pre-allocating shared regions for suspend bufs and userios */ ++#define MCU_SHARED_REGS_PREALLOCATE_SCALE (8) + -+static void scheduler_rotate_ctxs(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ struct list_head *list = &scheduler->runnable_kctxs; ++/* MCU shared region map attempt limit */ ++#define MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT (4) + -+ lockdep_assert_held(&scheduler->lock); -+ if (scheduler->top_ctx) { -+ if (!WARN_ON(list_empty(list))) { -+ struct kbase_context *pos; -+ bool found = false; ++/* Convert a VPFN to its start addr */ ++#define GET_VPFN_VA(vpfn) ((vpfn) << PAGE_SHIFT) + -+ /* Locate the ctx on the list */ -+ list_for_each_entry(pos, list, csf.link) { -+ if (scheduler->top_ctx == pos) { -+ found = true; -+ break; -+ } -+ } ++/* Macros for extract the corresponding VPFNs from a CSG_REG */ ++#define CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages) (reg->start_pfn) ++#define CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages) (reg->start_pfn + nr_susp_pages) ++#define CSG_REG_USERIO_VPFN(reg, csi, nr_susp_pages) (reg->start_pfn + 2 * (nr_susp_pages + csi)) + -+ if (!WARN_ON(!found)) { -+ struct kbase_context *new_head_kctx; ++/* MCU shared segment dummy page mapping flags */ ++#define DUMMY_PAGE_MAP_FLAGS (KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT) | KBASE_REG_GPU_NX) + -+ list_move_tail(&pos->csf.link, list); -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_ROTATE, pos, 0u); -+ new_head_kctx = (!list_empty(list)) ? -+ list_first_entry(list, struct kbase_context, csf.link) : -+ NULL; -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx, -+ 0u); -+ dev_dbg(kbdev->dev, "contexts rotated\n"); -+ } -+ } -+ } -+} ++/* MCU shared segment suspend buffer mapping flags */ ++#define SUSP_PAGE_MAP_FLAGS \ ++ (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR | KBASE_REG_GPU_NX | \ ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT)) + +/** -+ * scheduler_update_idle_slots_status() - Get the status update for the CSG -+ * slots for which the IDLE notification was received -+ * previously. -+ * -+ * @kbdev: Pointer to the GPU device. -+ * @csg_bitmap: Bitmap of the CSG slots for which -+ * the status update request completed successfully. -+ * @failed_csg_bitmap: Bitmap of the idle CSG slots for which -+ * the status update request timedout. -+ * -+ * This function sends a CSG status update request for all the CSG slots -+ * present in the bitmap scheduler->csg_slots_idle_mask. Additionally, if -+ * the group's 'reevaluate_idle_status' field is set, the nominally non-idle -+ * slots are also included in the status update for a confirmation of their -+ * status. The function wait for the status update request to complete and -+ * returns the update completed slots bitmap and any timed out idle-flagged -+ * slots bitmap. ++ * struct kbase_csg_shared_region - Wrapper object for use with a CSG on runtime ++ * resources for suspend buffer pages, userio pages ++ * and their corresponding mapping GPU VA addresses ++ * from the MCU shared interface segment + * -+ * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by -+ * this function. ++ * @link: Link to the managing list for the wrapper object. ++ * @reg: pointer to the region allocated from the shared interface segment, which ++ * covers the normal/P-mode suspend buffers, userio pages of the queues ++ * @grp: Pointer to the bound kbase_queue_group, or NULL if no binding (free). ++ * @pmode_mapped: Boolean for indicating the region has MMU mapped with the bound group's ++ * protected mode suspend buffer pages. + */ -+static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, -+ unsigned long *csg_bitmap, unsigned long *failed_csg_bitmap) ++struct kbase_csg_shared_region { ++ struct list_head link; ++ struct kbase_va_region *reg; ++ struct kbase_queue_group *grp; ++ bool pmode_mapped; ++}; ++ ++static unsigned long get_userio_mmu_flags(struct kbase_device *kbdev) +{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ const u32 num_groups = kbdev->csf.global_iface.group_num; -+ struct kbase_csf_global_iface *const global_iface = -+ &kbdev->csf.global_iface; -+ unsigned long flags, i; -+ u32 active_chk = 0; ++ unsigned long userio_map_flags; + -+ lockdep_assert_held(&scheduler->lock); ++ if (kbdev->system_coherency == COHERENCY_NONE) ++ userio_map_flags = ++ KBASE_REG_GPU_RD | KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); ++ else ++ userio_map_flags = KBASE_REG_GPU_RD | KBASE_REG_SHARE_BOTH | ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); + -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ return (userio_map_flags | KBASE_REG_GPU_NX); ++} + -+ for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) { -+ struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; -+ struct kbase_queue_group *group = csg_slot->resident_group; -+ struct kbase_csf_cmd_stream_group_info *const ginfo = -+ &global_iface->groups[i]; -+ u32 csg_req; -+ bool idle_flag; ++static void set_page_meta_status_not_movable(struct tagged_addr phy) ++{ ++ if (kbase_page_migration_enabled) { ++ struct kbase_page_metadata *page_md = kbase_page_private(as_page(phy)); + -+ if (WARN_ON(!group)) { -+ clear_bit(i, scheduler->csg_inuse_bitmap); -+ clear_bit(i, scheduler->csg_slots_idle_mask); -+ continue; ++ if (page_md) { ++ spin_lock(&page_md->migrate_lock); ++ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); ++ spin_unlock(&page_md->migrate_lock); + } ++ } ++} + -+ idle_flag = test_bit(i, scheduler->csg_slots_idle_mask); -+ if (idle_flag || group->reevaluate_idle_status) { -+ if (idle_flag) { -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ if (!bitmap_empty(group->protm_pending_bitmap, -+ ginfo->stream_num)) { -+ dev_warn(kbdev->dev, -+ "Idle bit set for group %d of ctx %d_%d on slot %d with pending protm execution", -+ group->handle, group->kctx->tgid, -+ group->kctx->id, (int)i); -+ } -+#endif -+ clear_bit(i, scheduler->csg_slots_idle_mask); -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, -+ scheduler->csg_slots_idle_mask[0]); -+ } else { -+ /* Updates include slots for which reevaluation is needed. -+ * Here one tracks the extra included slots in active_chk. -+ * For protm pending slots, their status of activeness are -+ * assured so no need to request an update. -+ */ -+ active_chk |= BIT(i); -+ group->reevaluate_idle_status = false; -+ } ++static struct kbase_csg_shared_region *get_group_bound_csg_reg(struct kbase_queue_group *group) ++{ ++ return (struct kbase_csg_shared_region *)group->csg_reg; ++} + -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_UPDATE_IDLE_SLOT_REQ, group, i); -+ csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); -+ csg_req ^= CSG_REQ_STATUS_UPDATE_MASK; -+ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, -+ CSG_REQ_STATUS_UPDATE_MASK); ++static inline int update_mapping_with_dummy_pages(struct kbase_device *kbdev, u64 vpfn, ++ u32 nr_pages) ++{ ++ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; ++ const unsigned long mem_flags = DUMMY_PAGE_MAP_FLAGS; + -+ /* Track the slot update requests in csg_bitmap. -+ * Note, if the scheduler requested extended update, the resulting -+ * csg_bitmap would be the idle_flags + active_chk. Otherwise it's -+ * identical to the idle_flags. -+ */ -+ set_bit(i, csg_bitmap); -+ } else { -+ group->run_state = KBASE_CSF_GROUP_RUNNABLE; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, -+ group->run_state); -+ } -+ } ++ return kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, shared_regs->dummy_phys, nr_pages, ++ mem_flags, KBASE_MEM_GROUP_CSF_FW); ++} ++ ++static inline int insert_dummy_pages(struct kbase_device *kbdev, u64 vpfn, u32 nr_pages) ++{ ++ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; ++ const unsigned long mem_flags = DUMMY_PAGE_MAP_FLAGS; ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + ++ return kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, ++ nr_pages, mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW, ++ mmu_sync_info, NULL, false); ++} + -+ /* The groups are aggregated into a single kernel doorbell request */ -+ if (!bitmap_empty(csg_bitmap, num_groups)) { -+ long wt = -+ kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); -+ u32 db_slots = (u32)csg_bitmap[0]; ++/* Reset consecutive retry count to zero */ ++static void notify_group_csg_reg_map_done(struct kbase_queue_group *group) ++{ ++ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); + -+ kbase_csf_ring_csg_slots_doorbell(kbdev, db_slots); -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ /* Just clear the internal map retry count */ ++ group->csg_reg_bind_retries = 0; ++} + -+ if (wait_csg_slots_handshake_ack(kbdev, -+ CSG_REQ_STATUS_UPDATE_MASK, csg_bitmap, wt)) { -+ const int csg_nr = ffs(csg_bitmap[0]) - 1; -+ struct kbase_queue_group *group = -+ scheduler->csg_slots[csg_nr].resident_group; ++/* Return true if a fatal group error has already been triggered */ ++static bool notify_group_csg_reg_map_error(struct kbase_queue_group *group) ++{ ++ struct kbase_device *kbdev = group->kctx->kbdev; + -+ dev_warn( -+ kbdev->dev, -+ "[%llu] Timeout (%d ms) on CSG_REQ:STATUS_UPDATE, treat groups as not idle: slot mask=0x%lx", -+ kbase_backend_get_cycle_cnt(kbdev), -+ kbdev->csf.fw_timeout_ms, -+ csg_bitmap[0]); -+ schedule_actions_trigger_df(kbdev, group->kctx, -+ DF_CSG_STATUS_UPDATE_TIMEOUT); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ /* Store the bitmap of timed out slots */ -+ bitmap_copy(failed_csg_bitmap, csg_bitmap, num_groups); -+ csg_bitmap[0] = ~csg_bitmap[0] & db_slots; ++ if (group->csg_reg_bind_retries < U8_MAX) ++ group->csg_reg_bind_retries++; + -+ /* Mask off any failed bit position contributed from active ones, as the -+ * intention is to retain the failed bit pattern contains only those from -+ * idle flags reporting back to the caller. This way, any failed to update -+ * original idle flag would be kept as 'idle' (an informed guess, as the -+ * update did not come to a conclusive result). So will be the failed -+ * active ones be treated as still 'non-idle'. This is for a graceful -+ * handling to the unexpected timeout condition. -+ */ -+ failed_csg_bitmap[0] &= ~active_chk; ++ /* Allow only one fatal error notification */ ++ if (group->csg_reg_bind_retries == MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT) { ++ struct base_gpu_queue_group_error const err_payload = { ++ .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, ++ .payload = { .fatal_group = { .status = GPU_EXCEPTION_TYPE_SW_FAULT_0 } } ++ }; + -+ } else { -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_UPDATE_IDLE_SLOTS_ACK, NULL, db_slots); -+ csg_bitmap[0] = db_slots; -+ } -+ } else { -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ dev_err(kbdev->dev, "Fatal: group_%d_%d_%d exceeded shared region map retry limit", ++ group->kctx->tgid, group->kctx->id, group->handle); ++ kbase_csf_add_group_fatal_error(group, &err_payload); ++ kbase_event_wakeup(group->kctx); + } ++ ++ return group->csg_reg_bind_retries >= MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT; +} + -+/** -+ * scheduler_handle_idle_slots() - Update the idle status of queue groups -+ * resident on CSG slots for which the -+ * IDLE notification was received previously. -+ * -+ * @kbdev: Pointer to the GPU device. -+ * -+ * This function is called at the start of scheduling tick/tock to reconfirm -+ * the idle status of queue groups resident on CSG slots for -+ * which idle notification was received previously, i.e. all the CSG slots -+ * present in the bitmap scheduler->csg_slots_idle_mask. -+ * The confirmation is done by sending the CSG status update request to the -+ * firmware. On completion, the firmware will mark the idleness at the -+ * slot's interface CSG_STATUS_STATE register accordingly. ++/* Replace the given phys at vpfn (reflecting a queue's userio_pages) mapping. ++ * If phys is NULL, the internal dummy_phys is used, which effectively ++ * restores back to the initialized state for the given queue's userio_pages ++ * (i.e. mapped to the default dummy page). ++ * In case of CSF mmu update error on a queue, the dummy phy is used to restore ++ * back the default 'unbound' (i.e. mapped to dummy) condition. + * -+ * The run state of the groups resident on still idle CSG slots is changed to -+ * KBASE_CSF_GROUP_IDLE and the bitmap scheduler->csg_slots_idle_mask is -+ * updated accordingly. -+ * The bits corresponding to slots for which the status update request timedout -+ * remain set in scheduler->csg_slots_idle_mask. ++ * It's the caller's responsibility to ensure that the given vpfn is extracted ++ * correctly from a CSG_REG object, for example, using CSG_REG_USERIO_VPFN(). + */ -+static void scheduler_handle_idle_slots(struct kbase_device *kbdev) ++static int userio_pages_replace_phys(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys) +{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ u32 num_groups = kbdev->csf.global_iface.group_num; -+ unsigned long flags, i; -+ DECLARE_BITMAP(csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 }; -+ DECLARE_BITMAP(failed_csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 }; ++ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; ++ int err = 0, err1; + -+ lockdep_assert_held(&scheduler->lock); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ scheduler_update_idle_slots_status(kbdev, csg_bitmap, -+ failed_csg_bitmap); ++ if (phys) { ++ unsigned long mem_flags_input = shared_regs->userio_mem_rd_flags; ++ unsigned long mem_flags_output = mem_flags_input | KBASE_REG_GPU_WR; + -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); -+ for_each_set_bit(i, csg_bitmap, num_groups) { -+ struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; -+ struct kbase_queue_group *group = csg_slot->resident_group; ++ /* Dealing with a queue's INPUT page */ ++ err = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, &phys[0], 1, mem_flags_input, ++ KBASE_MEM_GROUP_CSF_IO); ++ /* Dealing with a queue's OUTPUT page */ ++ err1 = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn + 1, &phys[1], 1, ++ mem_flags_output, KBASE_MEM_GROUP_CSF_IO); ++ if (unlikely(err1)) ++ err = err1; ++ } + -+ if (WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_RUNNING)) -+ continue; -+ if (WARN_ON(!group)) -+ continue; -+ if (WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE && -+ group->run_state != KBASE_CSF_GROUP_IDLE)) -+ continue; -+ if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) -+ continue; -+ -+ if (group_on_slot_is_idle(kbdev, i)) { -+ group->run_state = KBASE_CSF_GROUP_IDLE; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_IDLE, group, group->run_state); -+ set_bit(i, scheduler->csg_slots_idle_mask); -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, -+ group, scheduler->csg_slots_idle_mask[0]); -+ } else { -+ group->run_state = KBASE_CSF_GROUP_RUNNABLE; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, -+ group->run_state); -+ } ++ if (unlikely(err) || !phys) { ++ /* Restore back to dummy_userio_phy */ ++ update_mapping_with_dummy_pages(kbdev, vpfn, KBASEP_NUM_CS_USER_IO_PAGES); + } + -+ bitmap_or(scheduler->csg_slots_idle_mask, -+ scheduler->csg_slots_idle_mask, -+ failed_csg_bitmap, num_groups); -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_HANDLE_IDLE_SLOTS, NULL, -+ scheduler->csg_slots_idle_mask[0]); -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ return err; +} + -+static void scheduler_scan_idle_groups(struct kbase_device *kbdev) ++/* Update a group's queues' mappings for a group with its runtime bound group region */ ++static int csg_reg_update_on_csis(struct kbase_device *kbdev, struct kbase_queue_group *group, ++ struct kbase_queue_group *prev_grp) +{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ struct kbase_queue_group *group, *n; -+ -+ list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule, -+ link_to_schedule) { -+ WARN_ON(!can_schedule_idle_group(group)); -+ -+ if (!scheduler->ngrp_to_schedule) { -+ /* keep the top csg's origin */ -+ scheduler->top_ctx = group->kctx; -+ scheduler->top_grp = group; -+ } -+ -+ group->prepared_seq_num = scheduler->ngrp_to_schedule++; -+ list_move_tail(&group->link_to_schedule, -+ &scheduler->groups_to_schedule); ++ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); ++ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); ++ const u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num; ++ struct tagged_addr *phy; ++ int err = 0, err1; ++ u32 i; + -+ group->kctx->csf.sched.ngrp_to_schedule++; -+ count_active_address_space(kbdev, group->kctx); -+ } -+} ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+static void scheduler_rotate(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ if (WARN_ONCE(!csg_reg, "Update_userio pages: group has no bound csg_reg")) ++ return -EINVAL; + -+ lockdep_assert_held(&scheduler->lock); ++ for (i = 0; i < nr_csis; i++) { ++ struct kbase_queue *queue = group->bound_queues[i]; ++ struct kbase_queue *prev_queue = prev_grp ? prev_grp->bound_queues[i] : NULL; + -+ /* Dealing with rotation */ -+ scheduler_rotate_groups(kbdev); -+ scheduler_rotate_ctxs(kbdev); -+} ++ /* Set the phy if the group's queue[i] needs mapping, otherwise NULL */ ++ phy = (queue && queue->enabled && !queue->user_io_gpu_va) ? queue->phys : NULL; + -+static struct kbase_queue_group *get_tock_top_group( -+ struct kbase_csf_scheduler *const scheduler) -+{ -+ struct kbase_context *kctx; -+ int i; ++ /* Either phy is valid, or this update is for a transition change from ++ * prev_group, and the prev_queue was mapped, so an update is required. ++ */ ++ if (phy || (prev_queue && prev_queue->user_io_gpu_va)) { ++ u64 vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, i, nr_susp_pages); + -+ lockdep_assert_held(&scheduler->lock); -+ for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) { -+ list_for_each_entry(kctx, -+ &scheduler->runnable_kctxs, csf.link) { -+ struct kbase_queue_group *group; ++ err1 = userio_pages_replace_phys(kbdev, vpfn, phy); + -+ list_for_each_entry(group, -+ &kctx->csf.sched.runnable_groups[i], -+ link) { -+ if (queue_group_idle_locked(group)) -+ continue; ++ if (unlikely(err1)) { ++ dev_warn(kbdev->dev, ++ "%s: Error in update queue-%d mapping for csg_%d_%d_%d", ++ __func__, i, group->kctx->tgid, group->kctx->id, ++ group->handle); ++ err = err1; ++ } else if (phy) ++ queue->user_io_gpu_va = GET_VPFN_VA(vpfn); + -+ return group; -+ } ++ /* Mark prev_group's queue has lost its mapping */ ++ if (prev_queue) ++ prev_queue->user_io_gpu_va = 0; + } + } + -+ return NULL; ++ return err; +} + -+/** -+ * suspend_active_groups_on_powerdown() - Suspend active CSG groups upon -+ * suspend or GPU IDLE. -+ * -+ * @kbdev: Pointer to the device -+ * @system_suspend: Flag to indicate it's for system suspend. -+ * -+ * This function will suspend all active CSG groups upon either -+ * system suspend, runtime suspend or GPU IDLE. -+ * -+ * Return: 0 on success, -1 otherwise. ++/* Bind a group to a given csg_reg, any previous mappings with the csg_reg are replaced ++ * with the given group's phy pages, or, if no replacement, the default dummy pages. ++ * Note, the csg_reg's fields are in transition step-by-step from the prev_grp to its ++ * new binding owner in this function. At the end, the prev_grp would be completely ++ * detached away from the previously bound csg_reg. + */ -+static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, -+ bool system_suspend) ++static int group_bind_csg_reg(struct kbase_device *kbdev, struct kbase_queue_group *group, ++ struct kbase_csg_shared_region *csg_reg) +{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 }; ++ const unsigned long mem_flags = SUSP_PAGE_MAP_FLAGS; ++ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); ++ struct kbase_queue_group *prev_grp = csg_reg->grp; ++ struct kbase_va_region *reg = csg_reg->reg; ++ struct tagged_addr *phy; ++ int err = 0, err1; + -+ int ret = suspend_active_queue_groups(kbdev, slot_mask); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ if (unlikely(ret)) { -+ const int csg_nr = ffs(slot_mask[0]) - 1; -+ struct kbase_queue_group *group = -+ scheduler->csg_slots[csg_nr].resident_group; -+ enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT; ++ /* The csg_reg is expected still on the unused list so its link is not empty */ ++ if (WARN_ON_ONCE(list_empty(&csg_reg->link))) { ++ dev_dbg(kbdev->dev, "csg_reg is marked in active use"); ++ return -EINVAL; ++ } + -+ /* The suspend of CSGs failed, -+ * trigger the GPU reset to be in a deterministic state. ++ if (WARN_ON_ONCE(prev_grp && prev_grp->csg_reg != csg_reg)) { ++ dev_dbg(kbdev->dev, "Unexpected bound lost on prev_group"); ++ prev_grp->csg_reg = NULL; ++ return -EINVAL; ++ } ++ ++ /* Replacing the csg_reg bound group to the newly given one */ ++ csg_reg->grp = group; ++ group->csg_reg = csg_reg; ++ ++ /* Resolving mappings, deal with protected mode first */ ++ if (group->protected_suspend_buf.pma) { ++ /* We are binding a new group with P-mode ready, the prev_grp's P-mode mapping ++ * status is now stale during this transition of ownership. For the new owner, ++ * its mapping would have been updated away when it lost its binding previously. ++ * So it needs an update to this pma map. By clearing here the mapped flag ++ * ensures it reflects the new owner's condition. + */ -+ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n", -+ kbase_backend_get_cycle_cnt(kbdev), -+ kbdev->csf.fw_timeout_ms, -+ kbdev->csf.global_iface.group_num, slot_mask); -+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) -+ error_type = DF_PING_REQUEST_TIMEOUT; -+ schedule_actions_trigger_df(kbdev, group->kctx, error_type); ++ csg_reg->pmode_mapped = false; ++ err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group); ++ } else if (csg_reg->pmode_mapped) { ++ /* Need to unmap the previous one, use the dummy pages */ ++ err = update_mapping_with_dummy_pages( ++ kbdev, CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages); + -+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) -+ kbase_reset_gpu(kbdev); ++ if (unlikely(err)) ++ dev_warn(kbdev->dev, "%s: Failed to update P-mode dummy for csg_%d_%d_%d", ++ __func__, group->kctx->tgid, group->kctx->id, group->handle); + -+ return -1; ++ csg_reg->pmode_mapped = false; + } + -+ /* Check if the groups became active whilst the suspend was ongoing, -+ * but only for the case where the system suspend is not in progress ++ /* Unlike the normal suspend buf, the mapping of the protected mode suspend buffer is ++ * actually reflected by a specific mapped flag (due to phys[] is only allocated on ++ * in-need basis). So the GPU_VA is always updated to the bound region's corresponding ++ * VA, as a reflection of the binding to the csg_reg. + */ -+ if (!system_suspend && atomic_read(&scheduler->non_idle_offslot_grps)) -+ return -1; -+ -+ return 0; -+} -+ -+/** -+ * all_on_slot_groups_remained_idle - Live check for all groups' idleness -+ * -+ * @kbdev: Pointer to the device. -+ * -+ * Returns false if any of the queues inside any of the groups that have been -+ * assigned a physical CSG slot have work to execute, or have executed work -+ * since having received a GPU idle notification. This function is used to -+ * handle a rance condition between firmware reporting GPU idle and userspace -+ * submitting more work by directly ringing a doorbell. -+ * -+ * Return: false if any queue inside any resident group has work to be processed -+ * or has processed work since GPU idle event, true otherwise. -+ */ -+static bool all_on_slot_groups_remained_idle(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ /* All CSGs have the same number of CSs */ -+ size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num; -+ size_t i; ++ group->protected_suspend_buf.gpu_va = ++ GET_VPFN_VA(CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages)); + -+ lockdep_assert_held(&scheduler->lock); -+ lockdep_assert_held(&scheduler->interrupt_lock); ++ /* Deal with normal mode suspend buffer */ ++ phy = group->normal_suspend_buf.phy; ++ err1 = kbase_mmu_update_csf_mcu_pages(kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), phy, ++ nr_susp_pages, mem_flags, KBASE_MEM_GROUP_CSF_FW); + -+ for_each_set_bit(i, scheduler->csg_slots_idle_mask, -+ kbdev->csf.global_iface.group_num) { -+ struct kbase_queue_group *const group = -+ scheduler->csg_slots[i].resident_group; -+ size_t j; ++ if (unlikely(err1)) { ++ dev_warn(kbdev->dev, "%s: Failed to update suspend buffer for csg_%d_%d_%d", ++ __func__, group->kctx->tgid, group->kctx->id, group->handle); + -+ for (j = 0; j < max_streams; ++j) { -+ struct kbase_queue const *const queue = -+ group->bound_queues[j]; -+ u64 const *output_addr; -+ u64 cur_extract_ofs; ++ /* Attempt a restore to default dummy for removing previous mapping */ ++ if (prev_grp) ++ update_mapping_with_dummy_pages( ++ kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages); ++ err = err1; ++ /* Marking the normal suspend buffer is not mapped (due to error) */ ++ group->normal_suspend_buf.gpu_va = 0; ++ } else { ++ /* Marking the normal suspend buffer is actually mapped */ ++ group->normal_suspend_buf.gpu_va = ++ GET_VPFN_VA(CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages)); ++ } + -+ if (!queue || !queue->user_io_addr) -+ continue; ++ /* Deal with queue uerio_pages */ ++ err1 = csg_reg_update_on_csis(kbdev, group, prev_grp); ++ if (likely(!err1)) ++ err = err1; + -+ output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE); -+ cur_extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)]; -+ if (cur_extract_ofs != queue->extract_ofs) { -+ /* More work has been executed since the idle -+ * notification. -+ */ -+ return false; -+ } -+ } ++ /* Reset the previous group's suspend buffers' GPU_VAs as it has lost its bound */ ++ if (prev_grp) { ++ prev_grp->normal_suspend_buf.gpu_va = 0; ++ prev_grp->protected_suspend_buf.gpu_va = 0; ++ prev_grp->csg_reg = NULL; + } + -+ return true; ++ return err; +} + -+static bool scheduler_idle_suspendable(struct kbase_device *kbdev) ++/* Notify the group is placed on-slot, hence the bound csg_reg is active in use */ ++void kbase_csf_mcu_shared_set_group_csg_reg_active(struct kbase_device *kbdev, ++ struct kbase_queue_group *group) +{ -+ bool suspend; -+ unsigned long flags; -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ -+ lockdep_assert_held(&scheduler->lock); -+ -+ if ((scheduler->state == SCHED_SUSPENDED) || -+ (scheduler->state == SCHED_SLEEPING)) -+ return false; ++ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ spin_lock(&scheduler->interrupt_lock); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ if (scheduler->fast_gpu_idle_handling) { -+ scheduler->fast_gpu_idle_handling = false; ++ if (WARN_ONCE(!csg_reg || csg_reg->grp != group, "Group_%d_%d_%d has no csg_reg bounding", ++ group->kctx->tgid, group->kctx->id, group->handle)) ++ return; + -+ if (scheduler->total_runnable_grps) { -+ suspend = !atomic_read(&scheduler->non_idle_offslot_grps) && -+ kbase_pm_idle_groups_sched_suspendable(kbdev); -+ } else -+ suspend = kbase_pm_no_runnables_sched_suspendable(kbdev); -+ spin_unlock(&scheduler->interrupt_lock); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* By dropping out the csg_reg from the unused list, it becomes active and is tracked ++ * by its bound group that is on-slot. The design is that, when this on-slot group is ++ * moved to off-slot, the scheduler slot-clean up will add it back to the tail of the ++ * unused list. ++ */ ++ if (!WARN_ON_ONCE(list_empty(&csg_reg->link))) ++ list_del_init(&csg_reg->link); ++} + -+ return suspend; -+ } ++/* Notify the group is placed off-slot, hence the bound csg_reg is not in active use ++ * anymore. Existing bounding/mappings are left untouched. These would only be dealt with ++ * if the bound csg_reg is to be reused with another group. ++ */ ++void kbase_csf_mcu_shared_set_group_csg_reg_unused(struct kbase_device *kbdev, ++ struct kbase_queue_group *group) ++{ ++ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); ++ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; + -+ if (scheduler->total_runnable_grps) { ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ /* Check both on-slots and off-slots groups idle status */ -+ suspend = kbase_csf_scheduler_all_csgs_idle(kbdev) && -+ !atomic_read(&scheduler->non_idle_offslot_grps) && -+ kbase_pm_idle_groups_sched_suspendable(kbdev); -+ } else -+ suspend = kbase_pm_no_runnables_sched_suspendable(kbdev); ++ if (WARN_ONCE(!csg_reg || csg_reg->grp != group, "Group_%d_%d_%d has no csg_reg bound", ++ group->kctx->tgid, group->kctx->id, group->handle)) ++ return; + -+ /* Confirm that all groups are actually idle before proceeding with -+ * suspension as groups might potentially become active again without -+ * informing the scheduler in case userspace rings a doorbell directly. ++ /* By adding back the csg_reg to the unused list, it becomes available for another ++ * group to break its existing binding and set up a new one. + */ -+ if (suspend && (unlikely(atomic_read(&scheduler->gpu_no_longer_idle)) || -+ unlikely(!all_on_slot_groups_remained_idle(kbdev)))) -+ suspend = false; -+ -+ spin_unlock(&scheduler->interrupt_lock); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ return suspend; ++ if (!list_empty(&csg_reg->link)) { ++ WARN_ONCE(group->csg_nr >= 0, "Group is assumed vacated from slot"); ++ list_move_tail(&csg_reg->link, &shared_regs->unused_csg_regs); ++ } else ++ list_add_tail(&csg_reg->link, &shared_regs->unused_csg_regs); +} + -+#ifdef KBASE_PM_RUNTIME -+/** -+ * scheduler_sleep_on_idle - Put the Scheduler in sleeping state on GPU -+ * becoming idle. -+ * -+ * @kbdev: Pointer to the device. -+ * -+ * This function is called on GPU idle notification to trigger the transition of -+ * GPU to sleep state, where MCU firmware pauses execution and L2 cache is -+ * turned off. Scheduler's state is changed to sleeping and all the active queue -+ * groups remain on the CSG slots. -+ */ -+static void scheduler_sleep_on_idle(struct kbase_device *kbdev) ++/* Adding a new queue to an existing on-slot group */ ++int kbase_csf_mcu_shared_add_queue(struct kbase_device *kbdev, struct kbase_queue *queue) +{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ struct kbase_queue_group *group = queue->group; ++ struct kbase_csg_shared_region *csg_reg; ++ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); ++ u64 vpfn; ++ int err; + -+ lockdep_assert_held(&scheduler->lock); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ dev_dbg(kbdev->dev, -+ "Scheduler to be put to sleep on GPU becoming idle"); -+ cancel_tick_timer(kbdev); -+ scheduler_pm_idle_before_sleep(kbdev); -+ scheduler->state = SCHED_SLEEPING; -+ KBASE_KTRACE_ADD(kbdev, SCHED_SLEEPING, NULL, scheduler->state); -+} -+#endif ++ if (WARN_ONCE(!group || group->csg_nr < 0, "No bound group, or group is not on-slot")) ++ return -EIO; + -+/** -+ * scheduler_suspend_on_idle - Put the Scheduler in suspended state on GPU -+ * becoming idle. -+ * -+ * @kbdev: Pointer to the device. -+ * -+ * This function is called on GPU idle notification to trigger the power down of -+ * GPU. Scheduler's state is changed to suspended and all the active queue -+ * groups are suspended before halting the MCU firmware. -+ * -+ * Return: true if scheduler will be suspended or false if suspend is aborted. -+ */ -+static bool scheduler_suspend_on_idle(struct kbase_device *kbdev) -+{ -+ int ret = suspend_active_groups_on_powerdown(kbdev, false); ++ csg_reg = get_group_bound_csg_reg(group); ++ if (WARN_ONCE(!csg_reg || !list_empty(&csg_reg->link), ++ "No bound csg_reg, or in wrong state")) ++ return -EIO; + -+ if (ret) { -+ dev_dbg(kbdev->dev, "Aborting suspend scheduler (grps: %d)", -+ atomic_read( -+ &kbdev->csf.scheduler.non_idle_offslot_grps)); -+ /* Bring forward the next tick */ -+ kbase_csf_scheduler_tick_advance(kbdev); -+ return false; ++ vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, queue->csi_index, nr_susp_pages); ++ err = userio_pages_replace_phys(kbdev, vpfn, queue->phys); ++ if (likely(!err)) { ++ /* Mark the queue has been successfully mapped */ ++ queue->user_io_gpu_va = GET_VPFN_VA(vpfn); ++ } else { ++ /* Mark the queue has no mapping on its phys[] */ ++ queue->user_io_gpu_va = 0; ++ dev_dbg(kbdev->dev, ++ "%s: Error in mapping userio pages for queue-%d of csg_%d_%d_%d", __func__, ++ queue->csi_index, group->kctx->tgid, group->kctx->id, group->handle); ++ ++ /* notify the error for the bound group */ ++ if (notify_group_csg_reg_map_error(group)) ++ err = -EIO; + } + -+ dev_dbg(kbdev->dev, "Scheduler to be suspended on GPU becoming idle"); -+ scheduler_suspend(kbdev); -+ cancel_tick_timer(kbdev); -+ return true; ++ return err; +} + -+static void gpu_idle_worker(struct work_struct *work) ++/* Unmap a given queue's userio pages, when the queue is deleted */ ++void kbase_csf_mcu_shared_drop_stopped_queue(struct kbase_device *kbdev, struct kbase_queue *queue) +{ -+ struct kbase_device *kbdev = container_of( -+ work, struct kbase_device, csf.scheduler.gpu_idle_work); -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ bool scheduler_is_idle_suspendable = false; -+ bool all_groups_suspended = false; -+ -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_START, NULL, 0u); ++ struct kbase_queue_group *group; ++ struct kbase_csg_shared_region *csg_reg; ++ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); ++ u64 vpfn; + -+#define __ENCODE_KTRACE_INFO(reset, idle, all_suspend) \ -+ (((u32)reset) | (((u32)idle) << 4) | (((u32)all_suspend) << 8)) ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ if (kbase_reset_gpu_try_prevent(kbdev)) { -+ dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n"); -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL, -+ __ENCODE_KTRACE_INFO(true, false, false)); ++ /* The queue has no existing mapping, nothing to do */ ++ if (!queue || !queue->user_io_gpu_va) + return; -+ } -+ kbase_debug_csf_fault_wait_completion(kbdev); -+ mutex_lock(&scheduler->lock); + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ if (unlikely(scheduler->state == SCHED_BUSY)) { -+ mutex_unlock(&scheduler->lock); -+ kbase_reset_gpu_allow(kbdev); ++ group = queue->group; ++ if (WARN_ONCE(!group || !group->csg_reg, "Queue/Group has no bound region")) + return; -+ } -+#endif + -+ scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev); -+ if (scheduler_is_idle_suspendable) { -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_HANDLING_START, NULL, -+ kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); -+#ifdef KBASE_PM_RUNTIME -+ if (kbase_pm_gpu_sleep_allowed(kbdev) && -+ kbase_csf_scheduler_get_nr_active_csgs(kbdev)) -+ scheduler_sleep_on_idle(kbdev); -+ else -+#endif -+ all_groups_suspended = scheduler_suspend_on_idle(kbdev); ++ csg_reg = get_group_bound_csg_reg(group); + -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_HANDLING_END, NULL, 0u); -+ } ++ vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, queue->csi_index, nr_susp_pages); + -+ mutex_unlock(&scheduler->lock); -+ kbase_reset_gpu_allow(kbdev); -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL, -+ __ENCODE_KTRACE_INFO(false, scheduler_is_idle_suspendable, -+ all_groups_suspended)); -+#undef __ENCODE_KTRACE_INFO ++ WARN_ONCE(userio_pages_replace_phys(kbdev, vpfn, NULL), ++ "Unexpected restoring to dummy map update error"); ++ queue->user_io_gpu_va = 0; +} + -+static int scheduler_prepare(struct kbase_device *kbdev) ++int kbase_csf_mcu_shared_group_update_pmode_map(struct kbase_device *kbdev, ++ struct kbase_queue_group *group) +{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ unsigned long flags; -+ int i; -+ -+ lockdep_assert_held(&scheduler->lock); -+ -+ /* Empty the groups_to_schedule */ -+ while (!list_empty(&scheduler->groups_to_schedule)) { -+ struct kbase_queue_group *grp = -+ list_first_entry(&scheduler->groups_to_schedule, -+ struct kbase_queue_group, -+ link_to_schedule); ++ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; ++ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); ++ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); ++ int err = 0, err1; + -+ remove_scheduled_group(kbdev, grp); -+ } ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ /* Pre-scan init scheduler fields */ -+ if (WARN_ON(scheduler->ngrp_to_schedule != 0)) -+ scheduler->ngrp_to_schedule = 0; -+ scheduler->top_ctx = NULL; -+ scheduler->top_grp = NULL; -+ scheduler->csg_scan_count_for_tick = 0; -+ WARN_ON(!list_empty(&scheduler->idle_groups_to_schedule)); -+ scheduler->num_active_address_spaces = 0; -+ scheduler->num_csg_slots_for_tick = 0; -+ bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS); ++ if (WARN_ONCE(!csg_reg, "Update_pmode_map: the bound csg_reg can't be NULL")) ++ return -EINVAL; + -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); -+ scheduler->tick_protm_pending_seq = -+ KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID; -+ /* Scan out to run groups */ -+ for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) { -+ struct kbase_context *kctx; ++ /* If the pmode already mapped, nothing to do */ ++ if (csg_reg->pmode_mapped) ++ return 0; + -+ list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link) -+ scheduler_ctx_scan_groups(kbdev, kctx, i); -+ } -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ /* P-mode map not in place and the group has allocated P-mode pages, map it */ ++ if (group->protected_suspend_buf.pma) { ++ unsigned long mem_flags = SUSP_PAGE_MAP_FLAGS; ++ struct tagged_addr *phy = shared_regs->pma_phys; ++ struct kbase_va_region *reg = csg_reg->reg; ++ u64 vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); ++ u32 i; + -+ /* Update this tick's non-idle groups */ -+ scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule; ++ /* Populate the protected phys from pma to phy[] */ ++ for (i = 0; i < nr_susp_pages; i++) ++ phy[i] = as_tagged(group->protected_suspend_buf.pma[i]->pa); + -+ /* Initial number of non-idle off-slot groups, before the scheduler's -+ * scheduler_apply() operation. This gives a sensible start point view -+ * of the tick. It will be subject to up/downs during the scheduler -+ * active phase. -+ */ -+ atomic_set(&scheduler->non_idle_offslot_grps, -+ scheduler->non_idle_scanout_grps); -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, NULL, -+ scheduler->non_idle_scanout_grps); ++ /* Add the P-mode suspend buffer mapping */ ++ err = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, phy, nr_susp_pages, mem_flags, ++ KBASE_MEM_GROUP_CSF_FW); + -+ /* Adds those idle but runnable groups to the scanout list */ -+ scheduler_scan_idle_groups(kbdev); ++ /* If error, restore to default dummpy */ ++ if (unlikely(err)) { ++ err1 = update_mapping_with_dummy_pages(kbdev, vpfn, nr_susp_pages); ++ if (unlikely(err1)) ++ dev_warn( ++ kbdev->dev, ++ "%s: Failed in recovering to P-mode dummy for csg_%d_%d_%d", ++ __func__, group->kctx->tgid, group->kctx->id, ++ group->handle); + -+ WARN_ON(scheduler->csg_scan_count_for_tick < scheduler->ngrp_to_schedule); ++ csg_reg->pmode_mapped = false; ++ } else ++ csg_reg->pmode_mapped = true; ++ } + -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, -+ scheduler->num_active_address_spaces | -+ (((u64)scheduler->ngrp_to_schedule) << 32)); -+ set_max_csg_slots(kbdev); -+ dev_dbg(kbdev->dev, "prepared groups length: %u, num_active_address_spaces: %u\n", -+ scheduler->ngrp_to_schedule, scheduler->num_active_address_spaces); -+ return 0; ++ return err; +} + -+/** -+ * keep_lru_on_slots() - Check the condition for LRU is met. -+ * -+ * @kbdev: Pointer to the device. -+ * -+ * This function tries to maintain the Last-Recent-Use case on slots, when -+ * the scheduler has no non-idle off-slot CSGs for a replacement -+ * consideration. This effectively extends the previous scheduling results -+ * for the new one. That is, the last recent used CSGs are retained on slots -+ * for the new tick/tock action. -+ * -+ * Return: true for avoiding on-slot CSGs changes (i.e. keep existing LRU), -+ * otherwise false. -+ */ -+static bool keep_lru_on_slots(struct kbase_device *kbdev) ++void kbase_csf_mcu_shared_clear_evicted_group_csg_reg(struct kbase_device *kbdev, ++ struct kbase_queue_group *group) +{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ bool keep_lru = false; -+ int on_slots = bitmap_weight(scheduler->csg_inuse_bitmap, -+ kbdev->csf.global_iface.group_num); ++ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; ++ struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group); ++ struct kbase_va_region *reg; ++ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); ++ u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num; ++ int err = 0; ++ u32 i; + -+ lockdep_assert_held(&scheduler->lock); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ if (on_slots && !atomic_read(&scheduler->non_idle_offslot_grps)) { -+ unsigned long flags; ++ /* Nothing to do for clearing up if no bound csg_reg */ ++ if (!csg_reg) ++ return; + -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); -+ /* All on-slots are idle, no non-idle off-slot CSGs available -+ * for considering a meaningful change. Set keep_lru. -+ */ -+ keep_lru = kbase_csf_scheduler_all_csgs_idle(kbdev); ++ reg = csg_reg->reg; ++ /* Restore mappings default dummy pages for any mapped pages */ ++ if (csg_reg->pmode_mapped) { ++ err = update_mapping_with_dummy_pages( ++ kbdev, CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages); ++ WARN_ONCE(unlikely(err), "Restore dummy failed for clearing pmod buffer mapping"); + -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ csg_reg->pmode_mapped = false; ++ } + -+ dev_dbg(kbdev->dev, "Keep_LRU: %d, CSGs on-slots: %d\n", -+ keep_lru, on_slots); ++ if (group->normal_suspend_buf.gpu_va) { ++ err = update_mapping_with_dummy_pages( ++ kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages); ++ WARN_ONCE(err, "Restore dummy failed for clearing suspend buffer mapping"); + } + -+ return keep_lru; ++ /* Deal with queue uerio pages */ ++ for (i = 0; i < nr_csis; i++) ++ kbase_csf_mcu_shared_drop_stopped_queue(kbdev, group->bound_queues[i]); ++ ++ group->normal_suspend_buf.gpu_va = 0; ++ group->protected_suspend_buf.gpu_va = 0; ++ ++ /* Break the binding */ ++ group->csg_reg = NULL; ++ csg_reg->grp = NULL; ++ ++ /* Put the csg_reg to the front of the unused list */ ++ if (WARN_ON_ONCE(list_empty(&csg_reg->link))) ++ list_add(&csg_reg->link, &shared_regs->unused_csg_regs); ++ else ++ list_move(&csg_reg->link, &shared_regs->unused_csg_regs); +} + -+/** -+ * prepare_fast_local_tock() - making preparation arrangement for exercizing -+ * a fast local tock inside scheduling-actions. -+ * -+ * @kbdev: Pointer to the GPU device. -+ * -+ * The function assumes that a scheduling action of firing a fast local tock -+ * call (i.e. an equivalent tock action without dropping the lock) is desired -+ * if there are idle onslot CSGs. The function updates those affected CSGs' -+ * run-state as a preparation. This should only be called from inside the -+ * schedule_actions(), where the previous idle-flags are still considered to -+ * be reflective, following its earlier idle confirmation operational call, -+ * plus some potential newly idle CSGs in the scheduling action committing -+ * steps. -+ * -+ * Return: number of on-slots CSGs that can be considered for replacing. -+ */ -+static int prepare_fast_local_tock(struct kbase_device *kbdev) ++int kbase_csf_mcu_shared_group_bind_csg_reg(struct kbase_device *kbdev, ++ struct kbase_queue_group *group) +{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ u32 num_groups = kbdev->csf.global_iface.group_num; -+ unsigned long flags, i; -+ DECLARE_BITMAP(csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 }; ++ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; ++ struct kbase_csg_shared_region *csg_reg; ++ int err; + -+ lockdep_assert_held(&scheduler->lock); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); -+ bitmap_copy(csg_bitmap, scheduler->csg_slots_idle_mask, num_groups); -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ csg_reg = get_group_bound_csg_reg(group); ++ if (!csg_reg) ++ csg_reg = list_first_entry_or_null(&shared_regs->unused_csg_regs, ++ struct kbase_csg_shared_region, link); + -+ /* Marking the flagged idle CSGs' run state to IDLE, so -+ * the intended fast local tock can replacing them with off-slots -+ * non-idle CSGs. -+ */ -+ for_each_set_bit(i, csg_bitmap, num_groups) { -+ struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; -+ struct kbase_queue_group *group = csg_slot->resident_group; ++ if (!WARN_ON_ONCE(!csg_reg)) { ++ struct kbase_queue_group *prev_grp = csg_reg->grp; + -+ if (!queue_group_idle_locked(group)) { -+ group->run_state = KBASE_CSF_GROUP_IDLE; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_IDLE, group, group->run_state); -+ } ++ /* Deal with the previous binding and lazy unmap, i.e if the previous mapping not ++ * the required one, unmap it. ++ */ ++ if (prev_grp == group) { ++ /* Update existing bindings, if there have been some changes */ ++ err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group); ++ if (likely(!err)) ++ err = csg_reg_update_on_csis(kbdev, group, NULL); ++ } else ++ err = group_bind_csg_reg(kbdev, group, csg_reg); ++ } else { ++ /* This should not have been possible if the code operates rightly */ ++ dev_err(kbdev->dev, "%s: Unexpected NULL csg_reg for group %d of context %d_%d", ++ __func__, group->handle, group->kctx->tgid, group->kctx->id); ++ return -EIO; + } + -+ /* Return the number of idle slots for potential replacement */ -+ return bitmap_weight(csg_bitmap, num_groups); ++ if (likely(!err)) ++ notify_group_csg_reg_map_done(group); ++ else ++ notify_group_csg_reg_map_error(group); ++ ++ return err; +} + -+static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slot_mask, -+ unsigned int timeout_ms) ++static int shared_mcu_csg_reg_init(struct kbase_device *kbdev, ++ struct kbase_csg_shared_region *csg_reg) +{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ long remaining = kbase_csf_timeout_in_jiffies(timeout_ms); -+ u32 num_groups = kbdev->csf.global_iface.group_num; -+ int err = 0; -+ DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS); ++ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; ++ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); ++ u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num; ++ const size_t nr_csg_reg_pages = 2 * (nr_susp_pages + nr_csis); ++ struct kbase_va_region *reg; ++ u64 vpfn; ++ int err, i; + -+ lockdep_assert_held(&scheduler->lock); ++ INIT_LIST_HEAD(&csg_reg->link); ++ reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages, ++ KBASE_REG_ZONE_MCU_SHARED); + -+ bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS); ++ if (!reg) { ++ dev_err(kbdev->dev, "%s: Failed to allocate a MCU shared region for %zu pages\n", ++ __func__, nr_csg_reg_pages); ++ return -ENOMEM; ++ } + -+ while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS) && remaining) { -+ DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); ++ /* Insert the region into rbtree, so it becomes ready to use */ ++ mutex_lock(&kbdev->csf.reg_lock); ++ err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_csg_reg_pages, 1); ++ reg->flags &= ~KBASE_REG_FREE; ++ mutex_unlock(&kbdev->csf.reg_lock); ++ if (err) { ++ kfree(reg); ++ dev_err(kbdev->dev, "%s: Failed to add a region of %zu pages into rbtree", __func__, ++ nr_csg_reg_pages); ++ return err; ++ } + -+ bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS); ++ /* Initialize the mappings so MMU only need to update the the corresponding ++ * mapped phy-pages at runtime. ++ * Map the normal suspend buffer pages to the prepared dummy phys[]. ++ */ ++ vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages); ++ err = insert_dummy_pages(kbdev, vpfn, nr_susp_pages); + -+ remaining = wait_event_timeout( -+ kbdev->csf.event_wait, -+ slots_state_changed(kbdev, changed, csg_slot_stopped_locked), remaining); ++ if (unlikely(err)) ++ goto fail_susp_map_fail; + -+ if (likely(remaining)) { -+ u32 i; ++ /* Map the protected suspend buffer pages to the prepared dummy phys[] */ ++ vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); ++ err = insert_dummy_pages(kbdev, vpfn, nr_susp_pages); + -+ for_each_set_bit(i, changed, num_groups) { -+ struct kbase_queue_group *group; ++ if (unlikely(err)) ++ goto fail_pmod_map_fail; + -+ if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) -+ continue; ++ for (i = 0; i < nr_csis; i++) { ++ vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages); ++ err = insert_dummy_pages(kbdev, vpfn, KBASEP_NUM_CS_USER_IO_PAGES); + -+ /* The on slot csg is now stopped */ -+ clear_bit(i, slot_mask_local); ++ if (unlikely(err)) ++ goto fail_userio_pages_map_fail; ++ } + -+ KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( -+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i); ++ /* Replace the previous NULL-valued field with the successully initialized reg */ ++ csg_reg->reg = reg; + -+ group = scheduler->csg_slots[i].resident_group; -+ if (likely(group)) { -+ /* Only do save/cleanup if the -+ * group is not terminated during -+ * the sleep. -+ */ -+ save_csg_slot(group); -+ if (cleanup_csg_slot(group)) -+ sched_evict_group(group, true, true); -+ } -+ } -+ } else { -+ dev_warn( -+ kbdev->dev, -+ "[%llu] Suspend request sent on CSG slots 0x%lx timed out for slots 0x%lx", -+ kbase_backend_get_cycle_cnt(kbdev), slot_mask[0], -+ slot_mask_local[0]); -+ /* Return the bitmask of the timed out slots to the caller */ -+ bitmap_copy(slot_mask, slot_mask_local, MAX_SUPPORTED_CSGS); ++ return 0; + -+ err = -ETIMEDOUT; -+ } ++fail_userio_pages_map_fail: ++ while (i-- > 0) { ++ vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages); ++ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, ++ KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES, ++ MCU_AS_NR, true); + } + ++ vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); ++ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, ++ nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); ++fail_pmod_map_fail: ++ vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages); ++ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, ++ nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); ++fail_susp_map_fail: ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_remove_va_region(kbdev, reg); ++ mutex_unlock(&kbdev->csf.reg_lock); ++ kfree(reg); ++ + return err; +} + -+/** -+ * evict_lru_or_blocked_csg() - Evict the least-recently-used idle or blocked CSG -+ * -+ * @kbdev: Pointer to the device -+ * -+ * Used to allow for speedier starting/resumption of another CSG. The worst-case -+ * scenario of the evicted CSG being scheduled next is expected to be rare. -+ * Also, the eviction will not be applied if the GPU is running in protected mode. -+ * Otherwise the the eviction attempt would force the MCU to quit the execution of -+ * the protected mode, and likely re-request to enter it again. -+ */ -+static void evict_lru_or_blocked_csg(struct kbase_device *kbdev) ++/* Note, this helper can only be called on scheduler shutdown */ ++static void shared_mcu_csg_reg_term(struct kbase_device *kbdev, ++ struct kbase_csg_shared_region *csg_reg) +{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ size_t i; -+ struct kbase_queue_group *lru_idle_group = NULL; -+ const u32 total_csg_slots = kbdev->csf.global_iface.group_num; -+ const bool all_addr_spaces_used = (scheduler->num_active_address_spaces >= -+ (kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS)); -+ u8 as_usage[BASE_MAX_NR_AS] = { 0 }; ++ struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data; ++ struct kbase_va_region *reg = csg_reg->reg; ++ const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); ++ const u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num; ++ u64 vpfn; ++ int i; + -+ lockdep_assert_held(&scheduler->lock); -+ if (kbase_csf_scheduler_protected_mode_in_use(kbdev)) -+ return; ++ for (i = 0; i < nr_csis; i++) { ++ vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages); ++ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, ++ KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES, ++ MCU_AS_NR, true); ++ } + -+ BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(int) * BITS_PER_BYTE)); -+ if (fls(scheduler->csg_inuse_bitmap[0]) != total_csg_slots) -+ return; /* Some CSG slots remain unused */ ++ vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); ++ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, ++ nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); ++ vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages); ++ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, ++ nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); + -+ if (all_addr_spaces_used) { -+ for (i = 0; i != total_csg_slots; ++i) { -+ if (scheduler->csg_slots[i].resident_group != NULL) { -+ if (WARN_ON(scheduler->csg_slots[i].resident_group->kctx->as_nr < -+ 0)) -+ continue; ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_remove_va_region(kbdev, reg); ++ mutex_unlock(&kbdev->csf.reg_lock); ++ kfree(reg); ++} + -+ as_usage[scheduler->csg_slots[i].resident_group->kctx->as_nr]++; -+ } -+ } -+ } ++int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct kbase_csf_mcu_shared_regions *shared_regs = &scheduler->mcu_regs_data; ++ struct kbase_csg_shared_region *array_csg_regs; ++ const size_t nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); ++ const u32 nr_groups = kbdev->csf.global_iface.group_num; ++ const u32 nr_csg_regs = MCU_SHARED_REGS_PREALLOCATE_SCALE * nr_groups; ++ const u32 nr_dummy_phys = MAX(nr_susp_pages, KBASEP_NUM_CS_USER_IO_PAGES); ++ u32 i; ++ int err; + -+ for (i = 0; i != total_csg_slots; ++i) { -+ struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group; ++ shared_regs->userio_mem_rd_flags = get_userio_mmu_flags(kbdev); ++ INIT_LIST_HEAD(&shared_regs->unused_csg_regs); + -+ /* We expect that by this point all groups would normally be -+ * assigned a physical CSG slot, but if circumstances have -+ * changed then bail out of this optimisation. -+ */ -+ if (group == NULL) -+ return; ++ shared_regs->dummy_phys = ++ kcalloc(nr_dummy_phys, sizeof(*shared_regs->dummy_phys), GFP_KERNEL); ++ if (!shared_regs->dummy_phys) ++ return -ENOMEM; + -+ /* Real-time priority CSGs must be kept on-slot even when -+ * idle. -+ */ -+ if ((group->run_state == KBASE_CSF_GROUP_IDLE) && -+ (group->priority != BASE_QUEUE_GROUP_PRIORITY_REALTIME) && -+ ((lru_idle_group == NULL) || -+ (lru_idle_group->prepared_seq_num < group->prepared_seq_num))) { -+ if (WARN_ON(group->kctx->as_nr < 0)) -+ continue; ++ if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, ++ &shared_regs->dummy_phys[0], false, NULL) <= 0) ++ return -ENOMEM; + -+ /* If all address spaces are used, we need to ensure the group does not -+ * share the AS with other active CSGs. Or CSG would be freed without AS -+ * and this optimization would not work. -+ */ -+ if ((!all_addr_spaces_used) || (as_usage[group->kctx->as_nr] == 1)) -+ lru_idle_group = group; -+ } -+ } ++ shared_regs->dummy_phys_allocated = true; ++ set_page_meta_status_not_movable(shared_regs->dummy_phys[0]); + -+ if (lru_idle_group != NULL) { -+ unsigned long slot_mask = 1 << lru_idle_group->csg_nr; ++ /* Replicate the allocated single shared_regs->dummy_phys[0] to the full array */ ++ for (i = 1; i < nr_dummy_phys; i++) ++ shared_regs->dummy_phys[i] = shared_regs->dummy_phys[0]; + -+ dev_dbg(kbdev->dev, "Suspending LRU idle group %d of context %d_%d on slot %d", -+ lru_idle_group->handle, lru_idle_group->kctx->tgid, -+ lru_idle_group->kctx->id, lru_idle_group->csg_nr); -+ suspend_queue_group(lru_idle_group); -+ if (wait_csg_slots_suspend(kbdev, &slot_mask, kbdev->csf.fw_timeout_ms)) { -+ enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT; ++ shared_regs->pma_phys = kcalloc(nr_susp_pages, sizeof(*shared_regs->pma_phys), GFP_KERNEL); ++ if (!shared_regs->pma_phys) ++ return -ENOMEM; + -+ dev_warn( -+ kbdev->dev, -+ "[%llu] LRU idle group %d of context %d_%d failed to suspend on slot %d (timeout %d ms)", -+ kbase_backend_get_cycle_cnt(kbdev), lru_idle_group->handle, -+ lru_idle_group->kctx->tgid, lru_idle_group->kctx->id, -+ lru_idle_group->csg_nr, kbdev->csf.fw_timeout_ms); -+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) -+ error_type = DF_PING_REQUEST_TIMEOUT; -+ schedule_actions_trigger_df(kbdev, lru_idle_group->kctx, error_type); -+ } ++ array_csg_regs = kcalloc(nr_csg_regs, sizeof(*array_csg_regs), GFP_KERNEL); ++ if (!array_csg_regs) ++ return -ENOMEM; ++ shared_regs->array_csg_regs = array_csg_regs; ++ ++ /* All fields in scheduler->mcu_regs_data except the shared_regs->array_csg_regs ++ * are properly populated and ready to use. Now initialize the items in ++ * shared_regs->array_csg_regs[] ++ */ ++ for (i = 0; i < nr_csg_regs; i++) { ++ err = shared_mcu_csg_reg_init(kbdev, &array_csg_regs[i]); ++ if (err) ++ return err; ++ ++ list_add_tail(&array_csg_regs[i].link, &shared_regs->unused_csg_regs); + } ++ ++ return 0; +} + -+static void schedule_actions(struct kbase_device *kbdev, bool is_tick) ++void kbase_csf_mcu_shared_regs_data_term(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ unsigned long flags; -+ struct kbase_queue_group *protm_grp; -+ int ret; -+ bool skip_scheduling_actions; -+ bool skip_idle_slots_update; -+ bool new_protm_top_grp = false; -+ int local_tock_slots = 0; -+ -+ kbase_reset_gpu_assert_prevented(kbdev); -+ lockdep_assert_held(&scheduler->lock); ++ struct kbase_csf_mcu_shared_regions *shared_regs = &scheduler->mcu_regs_data; ++ struct kbase_csg_shared_region *array_csg_regs = ++ (struct kbase_csg_shared_region *)shared_regs->array_csg_regs; ++ const u32 nr_groups = kbdev->csf.global_iface.group_num; ++ const u32 nr_csg_regs = MCU_SHARED_REGS_PREALLOCATE_SCALE * nr_groups; + -+ ret = kbase_csf_scheduler_wait_mcu_active(kbdev); -+ if (ret) { -+ dev_err(kbdev->dev, -+ "Wait for MCU power on failed on scheduling tick/tock"); -+ return; -+ } ++ if (array_csg_regs) { ++ struct kbase_csg_shared_region *csg_reg; ++ u32 i, cnt_csg_regs = 0; + -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); -+ skip_idle_slots_update = kbase_csf_scheduler_protected_mode_in_use(kbdev); -+ skip_scheduling_actions = -+ !skip_idle_slots_update && kbdev->protected_mode; -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ for (i = 0; i < nr_csg_regs; i++) { ++ csg_reg = &array_csg_regs[i]; ++ /* There should not be any group mapping bindings */ ++ WARN_ONCE(csg_reg->grp, "csg_reg has a bound group"); + -+ /* Skip scheduling actions as GPU reset hasn't been performed yet to -+ * rectify the anomaly that happened when pmode exit interrupt wasn't -+ * received before the termination of group running in pmode. -+ */ -+ if (unlikely(skip_scheduling_actions)) { -+ dev_info(kbdev->dev, -+ "Scheduling actions skipped due to anomaly in pmode"); -+ return; -+ } ++ if (csg_reg->reg) { ++ shared_mcu_csg_reg_term(kbdev, csg_reg); ++ cnt_csg_regs++; ++ } ++ } + -+ if (!skip_idle_slots_update) { -+ /* Updating on-slot idle CSGs when not in protected mode. */ -+ scheduler_handle_idle_slots(kbdev); ++ /* The nr_susp_regs counts should match the array_csg_regs' length */ ++ list_for_each_entry(csg_reg, &shared_regs->unused_csg_regs, link) ++ cnt_csg_regs--; + -+ /* Determine whether the condition is met for keeping the -+ * Last-Recent-Use. If true, skipping the remaining action -+ * steps and thus extending the previous tick's arrangement, -+ * in particular, no alterations to on-slot CSGs. -+ */ -+ if (keep_lru_on_slots(kbdev)) -+ return; ++ WARN_ONCE(cnt_csg_regs, "Unmatched counts of susp_regs"); ++ kfree(shared_regs->array_csg_regs); + } + -+ if (is_tick) -+ scheduler_rotate(kbdev); ++ if (shared_regs->dummy_phys_allocated) { ++ struct page *page = as_page(shared_regs->dummy_phys[0]); + -+redo_local_tock: -+ scheduler_prepare(kbdev); -+ /* Need to specifically enqueue the GPU idle work if there are no groups -+ * to schedule despite the runnable groups. This scenario will happen -+ * if System suspend is done when all groups are idle and and no work -+ * is submitted for the groups after the System resume. -+ */ -+ if (unlikely(!scheduler->ngrp_to_schedule && -+ scheduler->total_runnable_grps)) { -+ dev_dbg(kbdev->dev, "No groups to schedule in the tick"); -+ enqueue_gpu_idle_work(scheduler); -+ return; ++ kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false); + } -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); -+ protm_grp = scheduler->active_protm_grp; -+ -+ /* Avoid update if the top-group remains unchanged and in protected -+ * mode. For the said case, all the slots update is effectively -+ * competing against the active protected mode group (typically the -+ * top-group). If we update other slots, even on leaving the -+ * top-group slot untouched, the firmware would exit the protected mode -+ * for interacting with the host-driver. After it, as the top-group -+ * would again raise the request for entering protected mode, we would -+ * be actively doing the switching over twice without progressing the -+ * queue jobs. -+ */ -+ if (protm_grp && scheduler->top_grp == protm_grp) { -+ dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d", -+ protm_grp->handle); -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); -+ -+ update_offslot_non_idle_cnt_for_onslot_grp(protm_grp); -+ remove_scheduled_group(kbdev, protm_grp); -+ scheduler_check_pmode_progress(kbdev); -+ } else if (scheduler->top_grp) { -+ if (protm_grp) -+ dev_dbg(kbdev->dev, "Scheduler drop protm exec: group-%d", -+ protm_grp->handle); + -+ if (!bitmap_empty(scheduler->top_grp->protm_pending_bitmap, -+ kbdev->csf.global_iface.groups[0].stream_num)) { -+ dev_dbg(kbdev->dev, "Scheduler prepare protm exec: group-%d of context %d_%d", -+ scheduler->top_grp->handle, -+ scheduler->top_grp->kctx->tgid, -+ scheduler->top_grp->kctx->id); ++ kfree(shared_regs->dummy_phys); ++ kfree(shared_regs->pma_phys); ++} +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.h +new file mode 100644 +index 000000000..61943cbbf +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.h +@@ -0,0 +1,139 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ /* When entering protected mode all CSG slots can be occupied -+ * but only the protected mode CSG will be running. Any event -+ * that would trigger the execution of an on-slot idle CSG will -+ * need to be handled by the host during protected mode. -+ */ -+ new_protm_top_grp = true; -+ } ++#ifndef _KBASE_CSF_MCU_SHARED_REG_H_ ++#define _KBASE_CSF_MCU_SHARED_REG_H_ + -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++/** ++ * kbase_csf_mcu_shared_set_group_csg_reg_active - Notify that the group is active on-slot with ++ * scheduling action. Essential runtime resources ++ * are bound with the group for it to run ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @group: Pointer to the group that is placed into active on-slot running by the scheduler. ++ * ++ */ ++void kbase_csf_mcu_shared_set_group_csg_reg_active(struct kbase_device *kbdev, ++ struct kbase_queue_group *group); + -+ scheduler_apply(kbdev); ++/** ++ * kbase_csf_mcu_shared_set_group_csg_reg_unused - Notify that the group is placed off-slot with ++ * scheduling action. Some of bound runtime ++ * resources can be reallocated for others to use ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @group: Pointer to the group that is placed off-slot by the scheduler. ++ * ++ */ ++void kbase_csf_mcu_shared_set_group_csg_reg_unused(struct kbase_device *kbdev, ++ struct kbase_queue_group *group); + -+ /* Scheduler is dropping the exec of the previous protm_grp, -+ * Until the protm quit completes, the GPU is effectively -+ * locked in the secure mode. -+ */ -+ if (protm_grp) -+ scheduler_force_protm_exit(kbdev); ++/** ++ * kbase_csf_mcu_shared_group_update_pmode_map - Request to update the given group's protected ++ * suspend buffer pages to be mapped for supporting ++ * protected mode operations. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @group: Pointer to the group for attempting a protected mode suspend buffer binding/mapping. ++ * ++ * Return: 0 for success, the group has a protected suspend buffer region mapped. Otherwise an ++ * error code is returned. ++ */ ++int kbase_csf_mcu_shared_group_update_pmode_map(struct kbase_device *kbdev, ++ struct kbase_queue_group *group); + -+ wait_csg_slots_start(kbdev); -+ wait_csg_slots_finish_prio_update(kbdev); ++/** ++ * kbase_csf_mcu_shared_clear_evicted_group_csg_reg - Clear any bound regions/mappings as the ++ * given group is evicted out of the runtime ++ * operations. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @group: Pointer to the group that has been evicted out of set of operational groups. ++ * ++ * This function will taken away any of the bindings/mappings immediately so the resources ++ * are not tied up to the given group, which has been evicted out of scheduling action for ++ * termination. ++ */ ++void kbase_csf_mcu_shared_clear_evicted_group_csg_reg(struct kbase_device *kbdev, ++ struct kbase_queue_group *group); + -+ if (new_protm_top_grp) { -+ scheduler_group_check_protm_enter(kbdev, -+ scheduler->top_grp); -+ } else if (!local_tock_slots && -+ atomic_read(&scheduler->non_idle_offslot_grps)) { -+ /* If during the scheduling action, we have off-slot -+ * non-idle CSGs in waiting, if it happens to have -+ * some new idle slots emerging during the committed -+ * action steps, trigger a one-off fast local tock. -+ */ -+ local_tock_slots = prepare_fast_local_tock(kbdev); ++/** ++ * kbase_csf_mcu_shared_add_queue - Request to add a newly activated queue for a group to be ++ * run on slot. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @queue: Pointer to the queue that requires some runtime resource to be bound for joining ++ * others that are already running on-slot with their bound group. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_mcu_shared_add_queue(struct kbase_device *kbdev, struct kbase_queue *queue); + -+ if (local_tock_slots) { -+ dev_dbg(kbdev->dev, -+ "In-cycle %d idle slots available\n", -+ local_tock_slots); -+ goto redo_local_tock; -+ } -+ } -+ } else { -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); -+ } ++/** ++ * kbase_csf_mcu_shared_drop_stopped_queue - Request to drop a queue after it has been stopped ++ * from its operational state from a group. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @queue: Pointer to the queue that has been stopped from operational state. ++ * ++ */ ++void kbase_csf_mcu_shared_drop_stopped_queue(struct kbase_device *kbdev, struct kbase_queue *queue); + -+ evict_lru_or_blocked_csg(kbdev); -+} ++/** ++ * kbase_csf_mcu_shared_group_bind_csg_reg - Bind some required runtime resources to the given ++ * group for ready to run on-slot. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @group: Pointer to the queue group that requires the runtime resources. ++ * ++ * This function binds/maps the required suspend buffer pages and userio pages for the given ++ * group, readying it to run on-slot. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_mcu_shared_group_bind_csg_reg(struct kbase_device *kbdev, ++ struct kbase_queue_group *group); + +/** -+ * can_skip_scheduling() - Check if the scheduling actions can be skipped. ++ * kbase_csf_mcu_shared_regs_data_init - Allocate and initialize the MCU shared regions data for ++ * the given device. + * -+ * @kbdev: Pointer to the device ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * -+ * This function is called on a scheduling tick or tock to determine if the -+ * scheduling actions can be skipped. -+ * If Scheduler is in sleeping state and exit from the sleep state is allowed -+ * then activation of MCU will be triggered. The tick or tock work item could -+ * have been in flight when the state of Scheduler was changed to sleeping. ++ * This function allocate and initialize the MCU shared VA regions for runtime operations ++ * of the CSF scheduler. + * -+ * Return: true if the scheduling actions can be skipped. ++ * Return: 0 on success, or an error code. + */ -+static bool can_skip_scheduling(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev); + -+ lockdep_assert_held(&scheduler->lock); ++/** ++ * kbase_csf_mcu_shared_regs_data_term - Terminate the allocated MCU shared regions data for ++ * the given device. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function terminates the MCU shared VA regions allocated for runtime operations ++ * of the CSF scheduler. ++ */ ++void kbase_csf_mcu_shared_regs_data_term(struct kbase_device *kbdev); + -+ if (unlikely(!kbase_reset_gpu_is_not_pending(kbdev))) -+ return true; ++#endif /* _KBASE_CSF_MCU_SHARED_REG_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.c +new file mode 100644 +index 000000000..bf1835b5b +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.c +@@ -0,0 +1,163 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ if (scheduler->state == SCHED_SUSPENDED) -+ return true; ++#include "mali_kbase_csf_protected_memory.h" ++#include + -+#ifdef KBASE_PM_RUNTIME -+ if (scheduler->state == SCHED_SLEEPING) { -+ unsigned long flags; ++#if IS_ENABLED(CONFIG_OF) ++#include ++#endif + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ if (kbdev->pm.backend.exit_gpu_sleep_mode) { -+ int ret = scheduler_pm_active_after_sleep(kbdev, &flags); ++int kbase_csf_protected_memory_init(struct kbase_device *const kbdev) ++{ ++ int err = 0; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ if (!ret) { -+ scheduler->state = SCHED_INACTIVE; -+ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state); -+ return false; -+ } ++#if IS_ENABLED(CONFIG_OF) ++ struct device_node *pma_node = of_parse_phandle(kbdev->dev->of_node, ++ "protected-memory-allocator", 0); ++ if (!pma_node) { ++ dev_info(kbdev->dev, "Protected memory allocator not available\n"); ++ } else { ++ struct platform_device *const pdev = ++ of_find_device_by_node(pma_node); + -+ dev_info(kbdev->dev, -+ "Skip scheduling due to system suspend"); -+ return true; ++ kbdev->csf.pma_dev = NULL; ++ if (!pdev) { ++ dev_err(kbdev->dev, "Platform device for Protected memory allocator not found\n"); ++ } else { ++ kbdev->csf.pma_dev = platform_get_drvdata(pdev); ++ if (!kbdev->csf.pma_dev) { ++ dev_info(kbdev->dev, "Protected memory allocator is not ready\n"); ++ err = -EPROBE_DEFER; ++ } else if (!try_module_get(kbdev->csf.pma_dev->owner)) { ++ dev_err(kbdev->dev, "Failed to get Protected memory allocator module\n"); ++ err = -ENODEV; ++ } else { ++ dev_info(kbdev->dev, "Protected memory allocator successfully loaded\n"); ++ } + } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ return true; ++ of_node_put(pma_node); + } +#endif + -+ return false; ++ return err; +} + -+static void schedule_on_tock(struct work_struct *work) ++void kbase_csf_protected_memory_term(struct kbase_device *const kbdev) +{ -+ struct kbase_device *kbdev = -+ container_of(work, struct kbase_device, csf.scheduler.tock_work.work); -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ int err; -+ -+ err = kbase_reset_gpu_try_prevent(kbdev); -+ /* Regardless of whether reset failed or is currently happening, exit -+ * early -+ */ -+ if (err) -+ return; -+ -+ kbase_debug_csf_fault_wait_completion(kbdev); -+ mutex_lock(&scheduler->lock); -+ if (can_skip_scheduling(kbdev)) -+ { -+ atomic_set(&scheduler->pending_tock_work, false); -+ goto exit_no_schedule_unlock; -+ } -+ -+ WARN_ON(!(scheduler->state == SCHED_INACTIVE)); -+ scheduler->state = SCHED_BUSY; -+ KBASE_KTRACE_ADD(kbdev, SCHED_BUSY, NULL, scheduler->state); -+ -+ /* Undertaking schedule action steps */ -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_START, NULL, 0u); -+ while (atomic_cmpxchg(&scheduler->pending_tock_work, true, false) == true) -+ schedule_actions(kbdev, false); -+ -+ /* Record time information on a non-skipped tock */ -+ scheduler->last_schedule = jiffies; -+ -+ scheduler->state = SCHED_INACTIVE; -+ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state); -+ if (!scheduler->total_runnable_grps) -+ enqueue_gpu_idle_work(scheduler); -+ mutex_unlock(&scheduler->lock); -+ kbase_reset_gpu_allow(kbdev); -+ -+ dev_dbg(kbdev->dev, -+ "Waking up for event after schedule-on-tock completes."); -+ wake_up_all(&kbdev->csf.event_wait); -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_END, NULL, 0u); -+ return; -+ -+exit_no_schedule_unlock: -+ mutex_unlock(&scheduler->lock); -+ kbase_reset_gpu_allow(kbdev); ++ if (kbdev->csf.pma_dev) ++ module_put(kbdev->csf.pma_dev->owner); +} + -+static void schedule_on_tick(struct work_struct *work) ++struct protected_memory_allocation ** ++ kbase_csf_protected_memory_alloc( ++ struct kbase_device *const kbdev, ++ struct tagged_addr *phys, ++ size_t num_pages, ++ bool is_small_page) +{ -+ struct kbase_device *kbdev = -+ container_of(work, struct kbase_device, csf.scheduler.tick_work); -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ -+ int err = kbase_reset_gpu_try_prevent(kbdev); -+ /* Regardless of whether reset failed or is currently happening, exit -+ * early -+ */ -+ if (err) -+ return; -+ -+ kbase_debug_csf_fault_wait_completion(kbdev); -+ mutex_lock(&scheduler->lock); -+ -+ WARN_ON(scheduler->tick_timer_active); -+ if (can_skip_scheduling(kbdev)) -+ goto exit_no_schedule_unlock; ++ size_t i; ++ struct protected_memory_allocator_device *pma_dev = ++ kbdev->csf.pma_dev; ++ struct protected_memory_allocation **pma = NULL; ++ unsigned int order = KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER; ++ unsigned int num_pages_order; + -+ scheduler->state = SCHED_BUSY; -+ KBASE_KTRACE_ADD(kbdev, SCHED_BUSY, NULL, scheduler->state); ++ if (is_small_page) ++ order = KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER; + -+ /* Undertaking schedule action steps */ -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_START, NULL, scheduler->total_runnable_grps); -+ schedule_actions(kbdev, true); ++ num_pages_order = (1u << order); + -+ /* Record time information */ -+ scheduler->last_schedule = jiffies; ++ /* Ensure the requested num_pages is aligned with ++ * the order type passed as argument. ++ * ++ * pma_alloc_page() will then handle the granularity ++ * of the allocation based on order. ++ */ ++ num_pages = div64_u64(num_pages + num_pages_order - 1, num_pages_order); + -+ /* Kicking next scheduling if needed */ -+ if (likely(scheduler_timer_is_enabled_nolock(kbdev)) && -+ (scheduler->total_runnable_grps > 0)) { -+ start_tick_timer(kbdev); -+ dev_dbg(kbdev->dev, -+ "scheduling for next tick, num_runnable_groups:%u\n", -+ scheduler->total_runnable_grps); -+ } else if (!scheduler->total_runnable_grps) { -+ enqueue_gpu_idle_work(scheduler); -+ } ++ pma = kmalloc_array(num_pages, sizeof(*pma), GFP_KERNEL); + -+ scheduler->state = SCHED_INACTIVE; -+ mutex_unlock(&scheduler->lock); -+ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state); -+ kbase_reset_gpu_allow(kbdev); ++ if (WARN_ON(!pma_dev) || WARN_ON(!phys) || !pma) ++ return NULL; + -+ dev_dbg(kbdev->dev, "Waking up for event after schedule-on-tick completes."); -+ wake_up_all(&kbdev->csf.event_wait); -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_END, NULL, -+ scheduler->total_runnable_grps); -+ return; ++ for (i = 0; i < num_pages; i++) { ++ phys_addr_t phys_addr; + -+exit_no_schedule_unlock: -+ mutex_unlock(&scheduler->lock); -+ kbase_reset_gpu_allow(kbdev); -+} ++ pma[i] = pma_dev->ops.pma_alloc_page(pma_dev, order); ++ if (!pma[i]) ++ break; + -+static int suspend_active_queue_groups(struct kbase_device *kbdev, -+ unsigned long *slot_mask) -+{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ u32 num_groups = kbdev->csf.global_iface.group_num; -+ u32 slot_num; -+ int ret; ++ phys_addr = pma_dev->ops.pma_get_phys_addr(pma_dev, pma[i]); + -+ lockdep_assert_held(&scheduler->lock); ++ if (order) { ++ size_t j; + -+ for (slot_num = 0; slot_num < num_groups; slot_num++) { -+ struct kbase_queue_group *group = -+ scheduler->csg_slots[slot_num].resident_group; ++ *phys++ = as_tagged_tag(phys_addr, HUGE_HEAD | HUGE_PAGE); + -+ if (group) { -+ suspend_queue_group(group); -+ set_bit(slot_num, slot_mask); ++ for (j = 1; j < num_pages_order; j++) { ++ *phys++ = as_tagged_tag(phys_addr + ++ PAGE_SIZE * j, ++ HUGE_PAGE); ++ } ++ } else { ++ phys[i] = as_tagged(phys_addr); + } + } + -+ ret = wait_csg_slots_suspend(kbdev, slot_mask, kbdev->reset_timeout_ms); -+ return ret; ++ if (i != num_pages) { ++ kbase_csf_protected_memory_free(kbdev, pma, i * num_pages_order, is_small_page); ++ return NULL; ++ } ++ ++ return pma; +} + -+static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev) ++void kbase_csf_protected_memory_free( ++ struct kbase_device *const kbdev, ++ struct protected_memory_allocation **pma, ++ size_t num_pages, ++ bool is_small_page) +{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 }; -+ int ret; -+ int ret2; -+ -+ mutex_lock(&scheduler->lock); ++ size_t i; ++ struct protected_memory_allocator_device *pma_dev = ++ kbdev->csf.pma_dev; ++ unsigned int num_pages_order = (1u << KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER); + -+ ret = suspend_active_queue_groups(kbdev, slot_mask); ++ if (is_small_page) ++ num_pages_order = (1u << KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER); + -+ if (ret) { -+ dev_warn(kbdev->dev, "Timeout waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n", -+ kbdev->csf.global_iface.group_num, slot_mask); -+ } ++ if (WARN_ON(!pma_dev) || WARN_ON(!pma)) ++ return; + -+ /* Need to flush the GPU cache to ensure suspend buffer -+ * contents are not lost on reset of GPU. -+ * Do this even if suspend operation had timed out for some of -+ * the CSG slots. -+ * In case the scheduler already in suspended state, the -+ * cache clean is required as the async reset request from -+ * the debugfs may race against the scheduler suspend operation -+ * due to the extra context ref-count, which prevents the -+ * L2 powering down cache clean operation in the non racing -+ * case. -+ * LSC is being flushed together to cover buslogging usecase, -+ * where GPU reset is done regularly to avoid the log buffer -+ * overflow. ++ /* Ensure the requested num_pages is aligned with ++ * the order type passed as argument. ++ * ++ * pma_alloc_page() will then handle the granularity ++ * of the allocation based on order. + */ -+ kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC); -+ ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev, -+ kbdev->reset_timeout_ms); -+ if (ret2) { -+ dev_warn(kbdev->dev, "[%llu] Timeout waiting for cache clean to complete before reset", -+ kbase_backend_get_cycle_cnt(kbdev)); -+ if (!ret) -+ ret = ret2; -+ } ++ num_pages = div64_u64(num_pages + num_pages_order - 1, num_pages_order); + -+ mutex_unlock(&scheduler->lock); ++ for (i = 0; i < num_pages; i++) ++ pma_dev->ops.pma_free_page(pma_dev, pma[i]); + -+ return ret; ++ kfree(pma); +} +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.h +new file mode 100644 +index 000000000..8c1aa919f +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.h +@@ -0,0 +1,75 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_CSF_PROTECTED_MEMORY_H_ ++#define _KBASE_CSF_PROTECTED_MEMORY_H_ + ++#include "mali_kbase.h" +/** -+ * scheduler_handle_reset_in_protected_mode() - Update the state of normal mode -+ * groups when reset is done during -+ * protected mode execution. ++ * kbase_csf_protected_memory_init - Initilaise protected memory allocator. + * -+ * @kbdev: Pointer to the device. ++ * @kbdev: Device pointer. + * -+ * This function is called at the time of GPU reset, before the suspension of -+ * queue groups, to handle the case when the reset is getting performed whilst -+ * GPU is in protected mode. -+ * On entry to protected mode all the groups, except the top group that executes -+ * in protected mode, are implicitly suspended by the FW. Thus this function -+ * simply marks the normal mode groups as suspended (and cleans up the -+ * corresponding CSG slots) to prevent their potential forceful eviction from -+ * the Scheduler. So if GPU was in protected mode and there was no fault, then -+ * only the protected mode group would be suspended in the regular way post exit -+ * from this function. And if GPU was in normal mode, then all on-slot groups -+ * will get suspended in the regular way. ++ * Return: 0 if success, or an error code on failure. ++ */ ++int kbase_csf_protected_memory_init(struct kbase_device *const kbdev); ++ ++/** ++ * kbase_csf_protected_memory_term - Terminate prtotected memory allocator. + * -+ * Return: true if the groups remaining on the CSG slots need to be suspended in -+ * the regular way by sending CSG SUSPEND reqs to FW, otherwise false. ++ * @kbdev: Device pointer. + */ -+static bool scheduler_handle_reset_in_protected_mode(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ u32 const num_groups = kbdev->csf.global_iface.group_num; -+ struct kbase_queue_group *protm_grp; -+ bool suspend_on_slot_groups = true; -+ bool pmode_active; -+ unsigned long flags; -+ u32 csg_nr; ++void kbase_csf_protected_memory_term(struct kbase_device *const kbdev); + -+ mutex_lock(&scheduler->lock); ++/** ++ * kbase_csf_protected_memory_alloc - Allocate protected memory pages. ++ * ++ * @kbdev: Device pointer. ++ * @phys: Array of physical addresses to be filled in by the protected ++ * memory allocator. ++ * @num_pages: Number of pages requested to be allocated. ++ * @is_small_page: Flag used to select the order of protected memory page. ++ * ++ * Return: Pointer to an array of protected memory allocations on success, ++ * or NULL on failure. ++ */ ++struct protected_memory_allocation ** ++ kbase_csf_protected_memory_alloc( ++ struct kbase_device *const kbdev, ++ struct tagged_addr *phys, ++ size_t num_pages, ++ bool is_small_page); + -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); -+ protm_grp = scheduler->active_protm_grp; -+ pmode_active = kbdev->protected_mode; ++/** ++ * kbase_csf_protected_memory_free - Free the allocated ++ * protected memory pages ++ * ++ * @kbdev: Device pointer. ++ * @pma: Array of pointer to protected memory allocations. ++ * @num_pages: Number of pages to be freed. ++ * @is_small_page: Flag used to select the order of protected memory page. ++ */ ++void kbase_csf_protected_memory_free( ++ struct kbase_device *const kbdev, ++ struct protected_memory_allocation **pma, ++ size_t num_pages, ++ bool is_small_page); ++#endif +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h +new file mode 100644 +index 000000000..b5bf7bbbc +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h +@@ -0,0 +1,1678 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ if (likely(!protm_grp && !pmode_active)) { -+ /* Case 1: GPU is not in protected mode or it successfully -+ * exited protected mode. All on-slot groups can be suspended in -+ * the regular way before reset. -+ */ -+ suspend_on_slot_groups = true; -+ } else if (protm_grp && pmode_active) { -+ /* Case 2: GPU went successfully into protected mode and hasn't -+ * exited from it yet and the protected mode group is still -+ * active. If there was no fault for the protected mode group -+ * then it can be suspended in the regular way before reset. -+ * The other normal mode on-slot groups were already implicitly -+ * suspended on entry to protected mode so they can be marked as -+ * suspended right away. -+ */ -+ suspend_on_slot_groups = !protm_grp->faulted; -+ } else if (!protm_grp && pmode_active) { -+ /* Case 3: GPU went successfully into protected mode and hasn't -+ * exited from it yet but the protected mode group got deleted. -+ * This would have happened if the FW got stuck during protected -+ * mode for some reason (like GPU page fault or some internal -+ * error). In normal cases FW is expected to send the pmode exit -+ * interrupt before it handles the CSG termination request. -+ * The other normal mode on-slot groups would already have been -+ * implicitly suspended on entry to protected mode so they can be -+ * marked as suspended right away. -+ */ -+ suspend_on_slot_groups = false; -+ } else if (protm_grp && !pmode_active) { -+ /* Case 4: GPU couldn't successfully enter protected mode, i.e. -+ * PROTM_ENTER request had timed out. -+ * All the on-slot groups need to be suspended in the regular -+ * way before reset. -+ */ -+ suspend_on_slot_groups = true; -+ } ++/* ++ * This header was originally autogenerated, but it is now ok (and ++ * expected) to have to add to it. ++ */ + -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++#ifndef _KBASE_CSF_REGISTERS_H_ ++#define _KBASE_CSF_REGISTERS_H_ + -+ if (likely(!pmode_active)) -+ goto unlock; ++/* ++ * Begin register sets ++ */ + -+ /* GPU hasn't exited protected mode, so all the on-slot groups barring -+ * the protected mode group can be marked as suspended right away. -+ */ -+ for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { -+ struct kbase_queue_group *const group = -+ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; -+ int new_val; ++/* CS_KERNEL_INPUT_BLOCK base address */ ++#define CS_KERNEL_INPUT_BLOCK_BASE 0x0000 ++#define CS_KERNEL_INPUT_BLOCK_REG(r) (CS_KERNEL_INPUT_BLOCK_BASE + (r)) + -+ if (!group || (group == protm_grp)) -+ continue; ++/* CS_KERNEL_OUTPUT_BLOCK base address */ ++#define CS_KERNEL_OUTPUT_BLOCK_BASE 0x0000 ++#define CS_KERNEL_OUTPUT_BLOCK_REG(r) (CS_KERNEL_OUTPUT_BLOCK_BASE + (r)) + -+ cleanup_csg_slot(group); -+ group->run_state = KBASE_CSF_GROUP_SUSPENDED; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED, group, group->run_state); ++/* CS_USER_INPUT_BLOCK base address */ ++#define CS_USER_INPUT_BLOCK_BASE 0x0000 ++#define CS_USER_INPUT_BLOCK_REG(r) (CS_USER_INPUT_BLOCK_BASE + (r)) + -+ /* Simply treat the normal mode groups as non-idle. The tick -+ * scheduled after the reset will re-initialize the counter -+ * anyways. -+ */ -+ new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps); -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val); -+ } ++/* CS_USER_OUTPUT_BLOCK base address */ ++#define CS_USER_OUTPUT_BLOCK_BASE 0x0000 ++#define CS_USER_OUTPUT_BLOCK_REG(r) (CS_USER_OUTPUT_BLOCK_BASE + (r)) + -+unlock: -+ mutex_unlock(&scheduler->lock); -+ return suspend_on_slot_groups; -+} ++/* CSG_INPUT_BLOCK base address */ ++#define CSG_INPUT_BLOCK_BASE 0x0000 ++#define CSG_INPUT_BLOCK_REG(r) (CSG_INPUT_BLOCK_BASE + (r)) + -+static void cancel_tick_work(struct kbase_csf_scheduler *const scheduler) -+{ -+ cancel_work_sync(&scheduler->tick_work); -+} ++/* CSG_OUTPUT_BLOCK base address */ ++#define CSG_OUTPUT_BLOCK_BASE 0x0000 ++#define CSG_OUTPUT_BLOCK_REG(r) (CSG_OUTPUT_BLOCK_BASE + (r)) + -+static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler) -+{ -+ atomic_set(&scheduler->pending_tock_work, false); -+ cancel_delayed_work_sync(&scheduler->tock_work); -+} ++/* GLB_CONTROL_BLOCK base address */ ++#define GLB_CONTROL_BLOCK_BASE 0x04000000 ++#define GLB_CONTROL_BLOCK_REG(r) (GLB_CONTROL_BLOCK_BASE + (r)) + -+static void scheduler_inner_reset(struct kbase_device *kbdev) -+{ -+ u32 const num_groups = kbdev->csf.global_iface.group_num; -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ unsigned long flags; ++/* GLB_INPUT_BLOCK base address */ ++#define GLB_INPUT_BLOCK_BASE 0x0000 ++#define GLB_INPUT_BLOCK_REG(r) (GLB_INPUT_BLOCK_BASE + (r)) + -+ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev)); ++/* GLB_OUTPUT_BLOCK base address */ ++#define GLB_OUTPUT_BLOCK_BASE 0x0000 ++#define GLB_OUTPUT_BLOCK_REG(r) (GLB_OUTPUT_BLOCK_BASE + (r)) + -+ /* Cancel any potential queued delayed work(s) */ -+ cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work); -+ cancel_tick_timer(kbdev); -+ cancel_tick_work(scheduler); -+ cancel_tock_work(scheduler); -+ cancel_delayed_work_sync(&scheduler->ping_work); ++/* End register sets */ + -+ mutex_lock(&scheduler->lock); ++/* ++ * Begin register offsets ++ */ + -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); -+ bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS); -+ if (scheduler->active_protm_grp) -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT, scheduler->active_protm_grp, -+ 0u); -+ scheduler->active_protm_grp = NULL; -+ memset(kbdev->csf.scheduler.csg_slots, 0, -+ num_groups * sizeof(struct kbase_csf_csg_slot)); -+ bitmap_zero(kbdev->csf.scheduler.csg_inuse_bitmap, num_groups); -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++/* DOORBELLS register offsets */ ++#define DOORBELL_0 0x0000 /* () Doorbell 0 register */ ++#define DOORBELL(n) (DOORBELL_0 + (n)*65536) ++#define DOORBELL_REG(n, r) (DOORBELL(n) + DOORBELL_BLOCK_REG(r)) ++#define DOORBELL_COUNT 1024 + -+ scheduler->top_ctx = NULL; -+ scheduler->top_grp = NULL; ++/* DOORBELL_BLOCK register offsets */ ++#define DB_BLK_DOORBELL 0x0000 /* (WO) Doorbell request */ + -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, -+ scheduler->num_active_address_spaces | -+ (((u64)scheduler->total_runnable_grps) << 32)); ++/* CS_KERNEL_INPUT_BLOCK register offsets */ ++#define CS_REQ 0x0000 /* () CS request flags */ ++#define CS_CONFIG 0x0004 /* () CS configuration */ ++#define CS_ACK_IRQ_MASK 0x000C /* () Command steam interrupt mask */ ++#define CS_BASE_LO 0x0010 /* () Base pointer for the ring buffer, low word */ ++#define CS_BASE_HI 0x0014 /* () Base pointer for the ring buffer, high word */ ++#define CS_SIZE 0x0018 /* () Size of the ring buffer */ ++#define CS_TILER_HEAP_START_LO 0x0020 /* () Pointer to heap start, low word */ ++#define CS_TILER_HEAP_START_HI 0x0024 /* () Pointer to heap start, high word */ ++#define CS_TILER_HEAP_END_LO 0x0028 /* () Tiler heap descriptor address, low word */ ++#define CS_TILER_HEAP_END_HI 0x002C /* () Tiler heap descriptor address, high word */ ++#define CS_USER_INPUT_LO 0x0030 /* () CS user mode input page address, low word */ ++#define CS_USER_INPUT_HI 0x0034 /* () CS user mode input page address, high word */ ++#define CS_USER_OUTPUT_LO 0x0038 /* () CS user mode input page address, low word */ ++#define CS_USER_OUTPUT_HI 0x003C /* () CS user mode input page address, high word */ ++#define CS_INSTR_CONFIG 0x0040 /* () Instrumentation buffer configuration */ ++#define CS_INSTR_BUFFER_SIZE 0x0044 /* () Instrumentation buffer size */ ++#define CS_INSTR_BUFFER_BASE_LO 0x0048 /* () Instrumentation buffer base pointer, low word */ ++#define CS_INSTR_BUFFER_BASE_HI 0x004C /* () Instrumentation buffer base pointer, high word */ ++#define CS_INSTR_BUFFER_OFFSET_POINTER_LO 0x0050 /* () Instrumentation buffer pointer to insert offset, low word */ ++#define CS_INSTR_BUFFER_OFFSET_POINTER_HI 0x0054 /* () Instrumentation buffer pointer to insert offset, high word */ + -+ mutex_unlock(&scheduler->lock); -+} ++/* CS_KERNEL_OUTPUT_BLOCK register offsets */ ++#define CS_ACK 0x0000 /* () CS acknowledge flags */ ++#define CS_STATUS_CMD_PTR_LO 0x0040 /* () Program pointer current value, low word */ ++#define CS_STATUS_CMD_PTR_HI 0x0044 /* () Program pointer current value, high word */ ++#define CS_STATUS_WAIT 0x0048 /* () Wait condition status register */ ++#define CS_STATUS_REQ_RESOURCE 0x004C /* () Indicates the resources requested by the CS */ ++#define CS_STATUS_WAIT_SYNC_POINTER_LO 0x0050 /* () Sync object pointer, low word */ ++#define CS_STATUS_WAIT_SYNC_POINTER_HI 0x0054 /* () Sync object pointer, high word */ ++#define CS_STATUS_WAIT_SYNC_VALUE 0x0058 /* () Sync object test value */ ++#define CS_STATUS_SCOREBOARDS 0x005C /* () Scoreboard status */ ++#define CS_STATUS_BLOCKED_REASON 0x0060 /* () Blocked reason */ ++#define CS_FAULT 0x0080 /* () Recoverable fault information */ ++#define CS_FATAL 0x0084 /* () Unrecoverable fault information */ ++#define CS_FAULT_INFO_LO 0x0088 /* () Additional information about a recoverable fault, low word */ ++#define CS_FAULT_INFO_HI 0x008C /* () Additional information about a recoverable fault, high word */ ++#define CS_FATAL_INFO_LO 0x0090 /* () Additional information about a non-recoverable fault, low word */ ++#define CS_FATAL_INFO_HI 0x0094 /* () Additional information about a non-recoverable fault, high word */ ++#define CS_HEAP_VT_START 0x00C0 /* () Number of vertex/tiling operations started */ ++#define CS_HEAP_VT_END 0x00C4 /* () Number of vertex/tiling operations completed */ ++#define CS_HEAP_FRAG_END 0x00CC /* () Number of fragment completed */ ++#define CS_HEAP_ADDRESS_LO 0x00D0 /* () Heap address, low word */ ++#define CS_HEAP_ADDRESS_HI 0x00D4 /* () Heap address, high word */ + -+void kbase_csf_scheduler_reset(struct kbase_device *kbdev) -+{ -+ struct kbase_context *kctx; ++/* CS_USER_INPUT_BLOCK register offsets */ ++#define CS_INSERT_LO 0x0000 /* () Current insert offset for ring buffer, low word */ ++#define CS_INSERT_HI 0x0004 /* () Current insert offset for ring buffer, high word */ ++#define CS_EXTRACT_INIT_LO 0x0008 /* () Initial extract offset for ring buffer, low word */ ++#define CS_EXTRACT_INIT_HI 0x000C /* () Initial extract offset for ring buffer, high word */ + -+ WARN_ON(!kbase_reset_gpu_is_active(kbdev)); ++/* CS_USER_OUTPUT_BLOCK register offsets */ ++#define CS_EXTRACT_LO 0x0000 /* () Current extract offset for ring buffer, low word */ ++#define CS_EXTRACT_HI 0x0004 /* () Current extract offset for ring buffer, high word */ ++#define CS_ACTIVE 0x0008 /* () Initial extract offset when the CS is started */ + -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_START, NULL, 0u); ++/* CSG_INPUT_BLOCK register offsets */ ++#define CSG_REQ 0x0000 /* () CSG request */ ++#define CSG_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */ ++#define CSG_DB_REQ 0x0008 /* () Global doorbell request */ ++#define CSG_IRQ_ACK 0x000C /* () CS IRQ acknowledge */ ++#define CSG_ALLOW_COMPUTE_LO 0x0020 /* () Allowed compute endpoints, low word */ ++#define CSG_ALLOW_COMPUTE_HI 0x0024 /* () Allowed compute endpoints, high word */ ++#define CSG_ALLOW_FRAGMENT_LO 0x0028 /* () Allowed fragment endpoints, low word */ ++#define CSG_ALLOW_FRAGMENT_HI 0x002C /* () Allowed fragment endpoints, high word */ ++#define CSG_ALLOW_OTHER 0x0030 /* () Allowed other endpoints */ ++#define CSG_EP_REQ 0x0034 /* () Maximum number of endpoints allowed */ ++#define CSG_SUSPEND_BUF_LO 0x0040 /* () Normal mode suspend buffer, low word */ ++#define CSG_SUSPEND_BUF_HI 0x0044 /* () Normal mode suspend buffer, high word */ ++#define CSG_PROTM_SUSPEND_BUF_LO 0x0048 /* () Protected mode suspend buffer, low word */ ++#define CSG_PROTM_SUSPEND_BUF_HI 0x004C /* () Protected mode suspend buffer, high word */ ++#define CSG_CONFIG 0x0050 /* () CSG configuration options */ ++#define CSG_ITER_TRACE_CONFIG 0x0054 /* () CSG trace configuration */ ++#define CSG_DVS_BUF_LO 0x0060 /* () Normal mode deferred vertex shading work buffer, low word */ ++#define CSG_DVS_BUF_HI 0x0064 /* () Normal mode deferred vertex shading work buffer, high word */ + -+ kbase_debug_csf_fault_wait_completion(kbdev); ++/* CSG_OUTPUT_BLOCK register offsets */ ++#define CSG_ACK 0x0000 /* () CSG acknowledge flags */ ++#define CSG_DB_ACK 0x0008 /* () CS kernel doorbell acknowledge flags */ ++#define CSG_IRQ_REQ 0x000C /* () CS interrupt request flags */ ++#define CSG_STATUS_EP_CURRENT 0x0010 /* () Endpoint allocation status register */ ++#define CSG_STATUS_EP_REQ 0x0014 /* () Endpoint request status register */ ++#define CSG_RESOURCE_DEP 0x001C /* () Current resource dependencies */ + -+ if (scheduler_handle_reset_in_protected_mode(kbdev) && -+ !suspend_active_queue_groups_on_reset(kbdev)) { -+ /* As all groups have been successfully evicted from the CSG -+ * slots, clear out thee scheduler data fields and return -+ */ -+ scheduler_inner_reset(kbdev); -+ return; -+ } ++/* GLB_CONTROL_BLOCK register offsets */ ++#define GLB_VERSION 0x0000 /* () Global interface version */ ++#define GLB_FEATURES 0x0004 /* () Global interface features */ ++#define GLB_INPUT_VA 0x0008 /* () Address of GLB_INPUT_BLOCK */ ++#define GLB_OUTPUT_VA 0x000C /* () Address of GLB_OUTPUT_BLOCK */ ++#define GLB_GROUP_NUM 0x0010 /* () Number of CSG interfaces */ ++#define GLB_GROUP_STRIDE 0x0014 /* () Stride between CSG interfaces */ ++#define GLB_PRFCNT_SIZE 0x0018 /* () Size of CSF performance counters */ ++#define GLB_INSTR_FEATURES \ ++ 0x001C /* () TRACE_POINT instrumentation. (csf >= 1.1.0) */ ++#define GROUP_CONTROL_0 0x1000 /* () CSG control and capabilities */ ++#define GROUP_CONTROL(n) (GROUP_CONTROL_0 + (n)*256) ++#define GROUP_CONTROL_REG(n, r) (GROUP_CONTROL(n) + GROUP_CONTROL_BLOCK_REG(r)) ++#define GROUP_CONTROL_COUNT 16 + -+ mutex_lock(&kbdev->kctx_list_lock); ++/* STREAM_CONTROL_BLOCK register offsets */ ++#define STREAM_FEATURES 0x0000 /* () CSI features */ ++#define STREAM_INPUT_VA 0x0004 /* () Address of CS_KERNEL_INPUT_BLOCK */ ++#define STREAM_OUTPUT_VA 0x0008 /* () Address of CS_KERNEL_OUTPUT_BLOCK */ + -+ /* The loop to iterate over the kbase contexts is present due to lock -+ * ordering issue between kctx->csf.lock & kbdev->csf.scheduler.lock. -+ * CSF ioctls first take kctx->csf.lock which is context-specific and -+ * then take kbdev->csf.scheduler.lock for global actions like assigning -+ * a CSG slot. -+ * If the lock ordering constraint was not there then could have -+ * directly looped over the active queue groups. -+ */ -+ list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { -+ /* Firmware reload would reinitialize the CSG & CS interface IO -+ * pages, so just need to internally mark the currently active -+ * queue groups as terminated (similar to the unexpected OoM -+ * event case). -+ * No further work can now get executed for the active groups -+ * (new groups would have to be created to execute work) and -+ * in near future Clients would be duly informed of this -+ * reset. The resources (like User IO pages, GPU queue memory) -+ * allocated for the associated queues would be freed when the -+ * Clients do the teardown when they become aware of the reset. -+ */ -+ kbase_csf_active_queue_groups_reset(kbdev, kctx); -+ } ++/* GROUP_CONTROL_BLOCK register offsets */ ++#define GROUP_FEATURES 0x0000 /* () CSG interface features */ ++#define GROUP_INPUT_VA 0x0004 /* () Address of CSG_INPUT_BLOCK */ ++#define GROUP_OUTPUT_VA 0x0008 /* () Address of CSG_OUTPUT_BLOCK */ ++#define GROUP_SUSPEND_SIZE 0x000C /* () Size of CSG suspend buffer */ ++#define GROUP_PROTM_SUSPEND_SIZE 0x0010 /* () Size of CSG protected-mode suspend buffer */ ++#define GROUP_STREAM_NUM 0x0014 /* () Number of CS interfaces */ ++#define GROUP_STREAM_STRIDE 0x0018 /* () Stride between CS interfaces */ ++#define STREAM_CONTROL_0 0x0040 /* () CS control and capabilities */ ++#define STREAM_CONTROL(n) (STREAM_CONTROL_0 + (n)*12) ++#define STREAM_CONTROL_REG(n, r) (STREAM_CONTROL(n) + STREAM_CONTROL_BLOCK_REG(r)) ++#define STREAM_CONTROL_COUNT 16 + -+ mutex_unlock(&kbdev->kctx_list_lock); ++/* GLB_INPUT_BLOCK register offsets */ ++#define GLB_REQ 0x0000 /* () Global request */ ++#define GLB_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */ ++#define GLB_DB_REQ 0x0008 /* () Global doorbell request */ ++#define GLB_PROGRESS_TIMER 0x0010 /* () Global progress timeout */ ++#define GLB_PWROFF_TIMER 0x0014 /* () Global shader core power off timer */ ++#define GLB_ALLOC_EN_LO 0x0018 /* () Global shader core allocation enable mask, low word */ ++#define GLB_ALLOC_EN_HI 0x001C /* () Global shader core allocation enable mask, high word */ + -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_END, NULL, 0u); ++#define GLB_PRFCNT_JASID 0x0024 /* () Performance counter address space */ ++#define GLB_PRFCNT_BASE_LO 0x0028 /* () Performance counter buffer address, low word */ ++#define GLB_PRFCNT_BASE_HI 0x002C /* () Performance counter buffer address, high word */ ++#define GLB_PRFCNT_EXTRACT 0x0030 /* () Performance counter buffer extract index */ ++#define GLB_PRFCNT_CONFIG 0x0040 /* () Performance counter configuration */ ++#define GLB_PRFCNT_CSG_SELECT 0x0044 /* () CSG performance counting enable */ ++#define GLB_PRFCNT_FW_EN 0x0048 /* () Performance counter enable for firmware */ ++#define GLB_PRFCNT_CSG_EN 0x004C /* () Performance counter enable for CSG */ ++#define GLB_PRFCNT_CSF_EN 0x0050 /* () Performance counter enable for CSF */ ++#define GLB_PRFCNT_SHADER_EN 0x0054 /* () Performance counter enable for shader cores */ ++#define GLB_PRFCNT_TILER_EN 0x0058 /* () Performance counter enable for tiler */ ++#define GLB_PRFCNT_MMU_L2_EN 0x005C /* () Performance counter enable for MMU/L2 cache */ + -+ /* After queue groups reset, the scheduler data fields clear out */ -+ scheduler_inner_reset(kbdev); -+} ++#define GLB_DEBUG_ARG_IN0 0x0FE0 /* Firmware Debug argument array element 0 */ ++#define GLB_DEBUG_ARG_IN1 0x0FE4 /* Firmware Debug argument array element 1 */ ++#define GLB_DEBUG_ARG_IN2 0x0FE8 /* Firmware Debug argument array element 2 */ ++#define GLB_DEBUG_ARG_IN3 0x0FEC /* Firmware Debug argument array element 3 */ + -+static void firmware_aliveness_monitor(struct work_struct *work) -+{ -+ struct kbase_device *kbdev = container_of(work, struct kbase_device, -+ csf.scheduler.ping_work.work); -+ int err; -+ -+ /* Ensure that reset will not be occurring while this function is being -+ * executed as otherwise calling kbase_reset_gpu when reset is already -+ * occurring is a programming error. -+ * -+ * We must use the 'try' variant as the Reset worker can try to flush -+ * this workqueue, which would otherwise deadlock here if we tried to -+ * wait for the reset (and thus ourselves) to complete. -+ */ -+ err = kbase_reset_gpu_try_prevent(kbdev); -+ if (err) { -+ /* It doesn't matter whether the value was -EAGAIN or a fatal -+ * error, just stop processing. In case of -EAGAIN, the Reset -+ * worker will restart the scheduler later to resume ping -+ */ -+ return; -+ } -+ -+ mutex_lock(&kbdev->csf.scheduler.lock); -+ -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ if (fw_debug) { -+ /* ping requests cause distraction in firmware debugging */ -+ goto exit; -+ } -+#endif -+ -+ if (kbdev->csf.scheduler.state == SCHED_SUSPENDED || -+ kbdev->csf.scheduler.state == SCHED_SLEEPING) -+ goto exit; -+ -+ if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) != 1) -+ goto exit; -+ -+ if (kbase_csf_scheduler_protected_mode_in_use(kbdev)) -+ goto exit; ++/* Mappings based on GLB_DEBUG_REQ.FWUTF_RUN bit being different from GLB_DEBUG_ACK.FWUTF_RUN */ ++#define GLB_DEBUG_FWUTF_DESTROY GLB_DEBUG_ARG_IN0 /* () Test fixture destroy function address */ ++#define GLB_DEBUG_FWUTF_TEST GLB_DEBUG_ARG_IN1 /* () Test index */ ++#define GLB_DEBUG_FWUTF_FIXTURE GLB_DEBUG_ARG_IN2 /* () Test fixture index */ ++#define GLB_DEBUG_FWUTF_CREATE GLB_DEBUG_ARG_IN3 /* () Test fixture create function address */ + -+ if (kbase_pm_context_active_handle_suspend(kbdev, -+ KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { -+ /* Suspend pending - no real need to ping */ -+ goto exit; -+ } ++#define GLB_DEBUG_ACK_IRQ_MASK 0x0FF8 /* () Global debug acknowledge interrupt mask */ ++#define GLB_DEBUG_REQ 0x0FFC /* () Global debug request */ + -+ kbase_csf_scheduler_wait_mcu_active(kbdev); ++/* GLB_OUTPUT_BLOCK register offsets */ ++#define GLB_DEBUG_ARG_OUT0 0x0FE0 /* Firmware debug result element 0 */ ++#define GLB_DEBUG_ARG_OUT1 0x0FE4 /* Firmware debug result element 1 */ ++#define GLB_DEBUG_ARG_OUT2 0x0FE8 /* Firmware debug result element 2 */ ++#define GLB_DEBUG_ARG_OUT3 0x0FEC /* Firmware debug result element 3 */ + -+ err = kbase_csf_firmware_ping_wait(kbdev, kbdev->csf.fw_timeout_ms); ++#define GLB_ACK 0x0000 /* () Global acknowledge */ ++#define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */ ++#define GLB_HALT_STATUS 0x0010 /* () Global halt status */ ++#define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */ ++#define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */ ++#define GLB_DEBUG_FWUTF_RESULT GLB_DEBUG_ARG_OUT0 /* () Firmware debug test result */ ++#define GLB_DEBUG_ACK 0x0FFC /* () Global debug acknowledge */ + -+ if (err) { -+ /* It is acceptable to enqueue a reset whilst we've prevented -+ * them, it will happen after we've allowed them again -+ */ -+ if (kbase_prepare_to_reset_gpu( -+ kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) -+ kbase_reset_gpu(kbdev); -+ } else if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) == 1) { -+ queue_delayed_work( -+ system_long_wq, &kbdev->csf.scheduler.ping_work, -+ msecs_to_jiffies(kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_PING_TIMEOUT))); -+ } ++#ifdef CONFIG_MALI_CORESIGHT ++#define GLB_DEBUG_REQ_FW_AS_WRITE_SHIFT 4 ++#define GLB_DEBUG_REQ_FW_AS_WRITE_MASK (0x1 << GLB_DEBUG_REQ_FW_AS_WRITE_SHIFT) ++#define GLB_DEBUG_REQ_FW_AS_READ_SHIFT 5 ++#define GLB_DEBUG_REQ_FW_AS_READ_MASK (0x1 << GLB_DEBUG_REQ_FW_AS_READ_SHIFT) ++#define GLB_DEBUG_ARG_IN0 0x0FE0 ++#define GLB_DEBUG_ARG_IN1 0x0FE4 ++#define GLB_DEBUG_ARG_OUT0 0x0FE0 ++#endif /* CONFIG_MALI_CORESIGHT */ + -+ kbase_pm_context_idle(kbdev); -+exit: -+ mutex_unlock(&kbdev->csf.scheduler.lock); -+ kbase_reset_gpu_allow(kbdev); -+} ++/* End register offsets */ + -+int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, -+ struct kbase_suspend_copy_buffer *sus_buf) -+{ -+ struct kbase_context *const kctx = group->kctx; -+ struct kbase_device *const kbdev = kctx->kbdev; -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ bool on_slot; -+ int err = 0; ++/* CS_KERNEL_INPUT_BLOCK register set definitions */ ++/* GLB_VERSION register */ ++#define GLB_VERSION_PATCH_SHIFT (0) ++#define GLB_VERSION_PATCH_MASK ((0xFFFF) << GLB_VERSION_PATCH_SHIFT) ++#define GLB_VERSION_PATCH_GET(reg_val) (((reg_val)&GLB_VERSION_PATCH_MASK) >> GLB_VERSION_PATCH_SHIFT) ++#define GLB_VERSION_PATCH_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_VERSION_PATCH_MASK) | (((value) << GLB_VERSION_PATCH_SHIFT) & GLB_VERSION_PATCH_MASK)) ++#define GLB_VERSION_MINOR_SHIFT (16) ++#define GLB_VERSION_MINOR_MASK ((0xFF) << GLB_VERSION_MINOR_SHIFT) ++#define GLB_VERSION_MINOR_GET(reg_val) (((reg_val)&GLB_VERSION_MINOR_MASK) >> GLB_VERSION_MINOR_SHIFT) ++#define GLB_VERSION_MINOR_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_VERSION_MINOR_MASK) | (((value) << GLB_VERSION_MINOR_SHIFT) & GLB_VERSION_MINOR_MASK)) ++#define GLB_VERSION_MAJOR_SHIFT (24) ++#define GLB_VERSION_MAJOR_MASK ((0xFF) << GLB_VERSION_MAJOR_SHIFT) ++#define GLB_VERSION_MAJOR_GET(reg_val) (((reg_val)&GLB_VERSION_MAJOR_MASK) >> GLB_VERSION_MAJOR_SHIFT) ++#define GLB_VERSION_MAJOR_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_VERSION_MAJOR_MASK) | (((value) << GLB_VERSION_MAJOR_SHIFT) & GLB_VERSION_MAJOR_MASK)) + -+ kbase_reset_gpu_assert_prevented(kbdev); -+ lockdep_assert_held(&kctx->csf.lock); -+ mutex_lock(&scheduler->lock); ++/* CS_REQ register */ ++#define CS_REQ_STATE_SHIFT 0 ++#define CS_REQ_STATE_MASK (0x7 << CS_REQ_STATE_SHIFT) ++#define CS_REQ_STATE_GET(reg_val) (((reg_val)&CS_REQ_STATE_MASK) >> CS_REQ_STATE_SHIFT) ++#define CS_REQ_STATE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_STATE_MASK) | (((value) << CS_REQ_STATE_SHIFT) & CS_REQ_STATE_MASK)) ++/* CS_REQ_STATE values */ ++#define CS_REQ_STATE_STOP 0x0 ++#define CS_REQ_STATE_START 0x1 ++/* End of CS_REQ_STATE values */ ++#define CS_REQ_EXTRACT_EVENT_SHIFT 4 ++#define CS_REQ_EXTRACT_EVENT_MASK (0x1 << CS_REQ_EXTRACT_EVENT_SHIFT) ++#define CS_REQ_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_REQ_EXTRACT_EVENT_MASK) >> CS_REQ_EXTRACT_EVENT_SHIFT) ++#define CS_REQ_EXTRACT_EVENT_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_EXTRACT_EVENT_MASK) | (((value) << CS_REQ_EXTRACT_EVENT_SHIFT) & CS_REQ_EXTRACT_EVENT_MASK)) + -+ on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group); ++#define CS_REQ_IDLE_SYNC_WAIT_SHIFT 8 ++#define CS_REQ_IDLE_SYNC_WAIT_MASK (0x1 << CS_REQ_IDLE_SYNC_WAIT_SHIFT) ++#define CS_REQ_IDLE_SYNC_WAIT_GET(reg_val) (((reg_val)&CS_REQ_IDLE_SYNC_WAIT_MASK) >> CS_REQ_IDLE_SYNC_WAIT_SHIFT) ++#define CS_REQ_IDLE_SYNC_WAIT_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_IDLE_SYNC_WAIT_MASK) | \ ++ (((value) << CS_REQ_IDLE_SYNC_WAIT_SHIFT) & CS_REQ_IDLE_SYNC_WAIT_MASK)) ++#define CS_REQ_IDLE_PROTM_PEND_SHIFT 9 ++#define CS_REQ_IDLE_PROTM_PEND_MASK (0x1 << CS_REQ_IDLE_PROTM_PEND_SHIFT) ++#define CS_REQ_IDLE_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_IDLE_PROTM_PEND_MASK) >> CS_REQ_IDLE_PROTM_PEND_SHIFT) ++#define CS_REQ_IDLE_PROTM_PEND_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_IDLE_PROTM_PEND_MASK) | \ ++ (((value) << CS_REQ_IDLE_PROTM_PEND_SHIFT) & CS_REQ_IDLE_PROTM_PEND_MASK)) ++#define CS_REQ_IDLE_EMPTY_SHIFT 10 ++#define CS_REQ_IDLE_EMPTY_MASK (0x1 << CS_REQ_IDLE_EMPTY_SHIFT) ++#define CS_REQ_IDLE_EMPTY_GET(reg_val) (((reg_val)&CS_REQ_IDLE_EMPTY_MASK) >> CS_REQ_IDLE_EMPTY_SHIFT) ++#define CS_REQ_IDLE_EMPTY_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_IDLE_EMPTY_MASK) | (((value) << CS_REQ_IDLE_EMPTY_SHIFT) & CS_REQ_IDLE_EMPTY_MASK)) ++#define CS_REQ_IDLE_RESOURCE_REQ_SHIFT 11 ++#define CS_REQ_IDLE_RESOURCE_REQ_MASK (0x1 << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) ++#define CS_REQ_IDLE_RESOURCE_REQ_GET(reg_val) \ ++ (((reg_val) & CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT) ++#define CS_REQ_IDLE_RESOURCE_REQ_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_IDLE_RESOURCE_REQ_MASK) | \ ++ (((value) << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) & CS_REQ_IDLE_RESOURCE_REQ_MASK)) ++#define CS_REQ_IDLE_SHARED_SB_DEC_SHIFT 12 ++#define CS_REQ_IDLE_SHARED_SB_DEC_MASK (0x1 << CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) ++#define CS_REQ_IDLE_SHARED_SB_DEC_GET(reg_val) \ ++ (((reg_val) & CS_REQ_IDLE_SHARED_SB_DEC_MASK) >> CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) ++#define CS_REQ_IDLE_SHARED_SB_DEC_REQ_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_IDLE_SHARED_SB_DEC_MASK) | \ ++ (((value) << CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) & CS_REQ_IDLE_SHARED_SB_DEC_MASK)) ++#define CS_REQ_TILER_OOM_SHIFT 26 ++#define CS_REQ_TILER_OOM_MASK (0x1 << CS_REQ_TILER_OOM_SHIFT) ++#define CS_REQ_TILER_OOM_GET(reg_val) (((reg_val)&CS_REQ_TILER_OOM_MASK) >> CS_REQ_TILER_OOM_SHIFT) ++#define CS_REQ_TILER_OOM_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_TILER_OOM_MASK) | (((value) << CS_REQ_TILER_OOM_SHIFT) & CS_REQ_TILER_OOM_MASK)) ++#define CS_REQ_PROTM_PEND_SHIFT 27 ++#define CS_REQ_PROTM_PEND_MASK (0x1 << CS_REQ_PROTM_PEND_SHIFT) ++#define CS_REQ_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_PROTM_PEND_MASK) >> CS_REQ_PROTM_PEND_SHIFT) ++#define CS_REQ_PROTM_PEND_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_PROTM_PEND_MASK) | (((value) << CS_REQ_PROTM_PEND_SHIFT) & CS_REQ_PROTM_PEND_MASK)) ++#define CS_REQ_FATAL_SHIFT 30 ++#define CS_REQ_FATAL_MASK (0x1 << CS_REQ_FATAL_SHIFT) ++#define CS_REQ_FATAL_GET(reg_val) (((reg_val)&CS_REQ_FATAL_MASK) >> CS_REQ_FATAL_SHIFT) ++#define CS_REQ_FATAL_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_FATAL_MASK) | (((value) << CS_REQ_FATAL_SHIFT) & CS_REQ_FATAL_MASK)) ++#define CS_REQ_FAULT_SHIFT 31 ++#define CS_REQ_FAULT_MASK (0x1 << CS_REQ_FAULT_SHIFT) ++#define CS_REQ_FAULT_GET(reg_val) (((reg_val)&CS_REQ_FAULT_MASK) >> CS_REQ_FAULT_SHIFT) ++#define CS_REQ_FAULT_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_FAULT_MASK) | (((value) << CS_REQ_FAULT_SHIFT) & CS_REQ_FAULT_MASK)) + -+#ifdef KBASE_PM_RUNTIME -+ if (on_slot && (scheduler->state == SCHED_SLEEPING)) { -+ if (wait_for_scheduler_to_exit_sleep(kbdev)) { -+ dev_warn( -+ kbdev->dev, -+ "Wait for scheduler to exit sleep state timedout when copying suspend buffer for group %d of ctx %d_%d on slot %d", -+ group->handle, group->kctx->tgid, -+ group->kctx->id, group->csg_nr); ++/* CS_CONFIG register */ ++#define CS_CONFIG_PRIORITY_SHIFT 0 ++#define CS_CONFIG_PRIORITY_MASK (0xF << CS_CONFIG_PRIORITY_SHIFT) ++#define CS_CONFIG_PRIORITY_GET(reg_val) (((reg_val)&CS_CONFIG_PRIORITY_MASK) >> CS_CONFIG_PRIORITY_SHIFT) ++#define CS_CONFIG_PRIORITY_SET(reg_val, value) \ ++ (((reg_val) & ~CS_CONFIG_PRIORITY_MASK) | (((value) << CS_CONFIG_PRIORITY_SHIFT) & CS_CONFIG_PRIORITY_MASK)) ++#define CS_CONFIG_USER_DOORBELL_SHIFT 8 ++#define CS_CONFIG_USER_DOORBELL_MASK (0xFF << CS_CONFIG_USER_DOORBELL_SHIFT) ++#define CS_CONFIG_USER_DOORBELL_GET(reg_val) (((reg_val)&CS_CONFIG_USER_DOORBELL_MASK) >> CS_CONFIG_USER_DOORBELL_SHIFT) ++#define CS_CONFIG_USER_DOORBELL_SET(reg_val, value) \ ++ (((reg_val) & ~CS_CONFIG_USER_DOORBELL_MASK) | \ ++ (((value) << CS_CONFIG_USER_DOORBELL_SHIFT) & CS_CONFIG_USER_DOORBELL_MASK)) + -+ scheduler_wakeup(kbdev, true); ++/* CS_ACK_IRQ_MASK register */ ++#define CS_ACK_IRQ_MASK_STATE_SHIFT 0 ++#define CS_ACK_IRQ_MASK_STATE_MASK (0x7 << CS_ACK_IRQ_MASK_STATE_SHIFT) ++#define CS_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_STATE_MASK) >> CS_ACK_IRQ_MASK_STATE_SHIFT) ++#define CS_ACK_IRQ_MASK_STATE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_IRQ_MASK_STATE_MASK) | \ ++ (((value) << CS_ACK_IRQ_MASK_STATE_SHIFT) & CS_ACK_IRQ_MASK_STATE_MASK)) ++/* CS_ACK_IRQ_MASK_STATE values */ ++#define CS_ACK_IRQ_MASK_STATE_DISABLED 0x0 ++#define CS_ACK_IRQ_MASK_STATE_ENABLED 0x7 ++/* End of CS_ACK_IRQ_MASK_STATE values */ ++#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT 4 ++#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) ++#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_GET(reg_val) \ ++ (((reg_val)&CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) >> CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) ++#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) | \ ++ (((value) << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) & CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK)) ++#define CS_ACK_IRQ_MASK_TILER_OOM_SHIFT 26 ++#define CS_ACK_IRQ_MASK_TILER_OOM_MASK (0x1 << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) ++#define CS_ACK_IRQ_MASK_TILER_OOM_GET(reg_val) \ ++ (((reg_val)&CS_ACK_IRQ_MASK_TILER_OOM_MASK) >> CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) ++#define CS_ACK_IRQ_MASK_TILER_OOM_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_IRQ_MASK_TILER_OOM_MASK) | \ ++ (((value) << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) & CS_ACK_IRQ_MASK_TILER_OOM_MASK)) ++#define CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT 27 ++#define CS_ACK_IRQ_MASK_PROTM_PEND_MASK (0x1 << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) ++#define CS_ACK_IRQ_MASK_PROTM_PEND_GET(reg_val) \ ++ (((reg_val)&CS_ACK_IRQ_MASK_PROTM_PEND_MASK) >> CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) ++#define CS_ACK_IRQ_MASK_PROTM_PEND_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_IRQ_MASK_PROTM_PEND_MASK) | \ ++ (((value) << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) & CS_ACK_IRQ_MASK_PROTM_PEND_MASK)) ++#define CS_ACK_IRQ_MASK_FATAL_SHIFT 30 ++#define CS_ACK_IRQ_MASK_FATAL_MASK (0x1 << CS_ACK_IRQ_MASK_FATAL_SHIFT) ++#define CS_ACK_IRQ_MASK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FATAL_MASK) >> CS_ACK_IRQ_MASK_FATAL_SHIFT) ++#define CS_ACK_IRQ_MASK_FATAL_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_IRQ_MASK_FATAL_MASK) | \ ++ (((value) << CS_ACK_IRQ_MASK_FATAL_SHIFT) & CS_ACK_IRQ_MASK_FATAL_MASK)) ++#define CS_ACK_IRQ_MASK_FAULT_SHIFT 31 ++#define CS_ACK_IRQ_MASK_FAULT_MASK (0x1 << CS_ACK_IRQ_MASK_FAULT_SHIFT) ++#define CS_ACK_IRQ_MASK_FAULT_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FAULT_MASK) >> CS_ACK_IRQ_MASK_FAULT_SHIFT) ++#define CS_ACK_IRQ_MASK_FAULT_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_IRQ_MASK_FAULT_MASK) | \ ++ (((value) << CS_ACK_IRQ_MASK_FAULT_SHIFT) & CS_ACK_IRQ_MASK_FAULT_MASK)) + -+ /* Wait for MCU firmware to start running */ -+ if (kbase_csf_scheduler_wait_mcu_active(kbdev)) -+ dev_warn( -+ kbdev->dev, -+ "Wait for MCU active failed when copying suspend buffer for group %d of ctx %d_%d on slot %d", -+ group->handle, group->kctx->tgid, -+ group->kctx->id, group->csg_nr); -+ } ++/* CS_BASE register */ ++#define CS_BASE_POINTER_SHIFT 0 ++#define CS_BASE_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_BASE_POINTER_SHIFT) ++#define CS_BASE_POINTER_GET(reg_val) (((reg_val)&CS_BASE_POINTER_MASK) >> CS_BASE_POINTER_SHIFT) ++#define CS_BASE_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_BASE_POINTER_MASK) | (((value) << CS_BASE_POINTER_SHIFT) & CS_BASE_POINTER_MASK)) + -+ /* Check the group state again as scheduler lock would have been -+ * released when waiting for the exit from SLEEPING state. -+ */ -+ on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group); -+ } -+#endif -+ if (on_slot) { -+ DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; ++/* CS_SIZE register */ ++#define CS_SIZE_SIZE_SHIFT 0 ++#define CS_SIZE_SIZE_MASK (0xFFFFFFFF << CS_SIZE_SIZE_SHIFT) ++#define CS_SIZE_SIZE_GET(reg_val) (((reg_val)&CS_SIZE_SIZE_MASK) >> CS_SIZE_SIZE_SHIFT) ++#define CS_SIZE_SIZE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_SIZE_SIZE_MASK) | (((value) << CS_SIZE_SIZE_SHIFT) & CS_SIZE_SIZE_MASK)) + -+ set_bit(kbase_csf_scheduler_group_get_slot(group), slot_mask); ++/* CS_TILER_HEAP_START register */ ++#define CS_TILER_HEAP_START_POINTER_SHIFT 0 ++#define CS_TILER_HEAP_START_POINTER_MASK \ ++ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_TILER_HEAP_START_POINTER_SHIFT) ++#define CS_TILER_HEAP_START_POINTER_GET(reg_val) \ ++ (((reg_val)&CS_TILER_HEAP_START_POINTER_MASK) >> CS_TILER_HEAP_START_POINTER_SHIFT) ++#define CS_TILER_HEAP_START_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_TILER_HEAP_START_POINTER_MASK) | \ ++ (((value) << CS_TILER_HEAP_START_POINTER_SHIFT) & CS_TILER_HEAP_START_POINTER_MASK)) ++/* HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */ ++/* End of HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */ + -+ if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) -+ suspend_queue_group(group); -+ err = wait_csg_slots_suspend(kbdev, slot_mask, -+ kbdev->csf.fw_timeout_ms); -+ if (err) { -+ dev_warn(kbdev->dev, "[%llu] Timeout waiting for the group %d to suspend on slot %d", -+ kbase_backend_get_cycle_cnt(kbdev), -+ group->handle, group->csg_nr); -+ goto exit; -+ } -+ } ++/* CS_TILER_HEAP_END register */ ++#define CS_TILER_HEAP_END_POINTER_SHIFT 0 ++#define CS_TILER_HEAP_END_POINTER_MASK \ ++ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_TILER_HEAP_END_POINTER_SHIFT) ++#define CS_TILER_HEAP_END_POINTER_GET(reg_val) \ ++ (((reg_val)&CS_TILER_HEAP_END_POINTER_MASK) >> CS_TILER_HEAP_END_POINTER_SHIFT) ++#define CS_TILER_HEAP_END_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_TILER_HEAP_END_POINTER_MASK) | \ ++ (((value) << CS_TILER_HEAP_END_POINTER_SHIFT) & CS_TILER_HEAP_END_POINTER_MASK)) ++/* HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */ ++/* End of HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */ + -+ if (queue_group_suspended_locked(group)) { -+ unsigned int target_page_nr = 0, i = 0; -+ u64 offset = sus_buf->offset; -+ size_t to_copy = sus_buf->size; -+ const u32 csg_suspend_buf_nr_pages = -+ PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); ++/* CS_USER_INPUT register */ ++#define CS_USER_INPUT_POINTER_SHIFT 0 ++#define CS_USER_INPUT_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_USER_INPUT_POINTER_SHIFT) ++#define CS_USER_INPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_INPUT_POINTER_MASK) >> CS_USER_INPUT_POINTER_SHIFT) ++#define CS_USER_INPUT_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_USER_INPUT_POINTER_MASK) | \ ++ (((value) << CS_USER_INPUT_POINTER_SHIFT) & CS_USER_INPUT_POINTER_MASK)) + -+ if (scheduler->state != SCHED_SUSPENDED) { -+ /* Similar to the case of HW counters, need to flush -+ * the GPU L2 cache before reading from the suspend buffer -+ * pages as they are mapped and cached on GPU side. -+ * Flushing LSC is not done here, since only the flush of -+ * CSG suspend buffer contents is needed from the L2 cache. -+ */ -+ kbase_gpu_start_cache_clean( -+ kbdev, GPU_COMMAND_CACHE_CLN_INV_L2); -+ kbase_gpu_wait_cache_clean(kbdev); -+ } else { -+ /* Make sure power down transitions have completed, -+ * i.e. L2 has been powered off as that would ensure -+ * its contents are flushed to memory. -+ * This is needed as Scheduler doesn't wait for the -+ * power down to finish. -+ */ -+ kbase_pm_wait_for_desired_state(kbdev); -+ } ++/* CS_USER_OUTPUT register */ ++#define CS_USER_OUTPUT_POINTER_SHIFT 0 ++#define CS_USER_OUTPUT_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_USER_OUTPUT_POINTER_SHIFT) ++#define CS_USER_OUTPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_OUTPUT_POINTER_MASK) >> CS_USER_OUTPUT_POINTER_SHIFT) ++#define CS_USER_OUTPUT_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_USER_OUTPUT_POINTER_MASK) | \ ++ (((value) << CS_USER_OUTPUT_POINTER_SHIFT) & CS_USER_OUTPUT_POINTER_MASK)) + -+ for (i = 0; i < csg_suspend_buf_nr_pages && -+ target_page_nr < sus_buf->nr_pages; i++) { -+ struct page *pg = -+ as_page(group->normal_suspend_buf.phy[i]); -+ void *sus_page = kmap(pg); ++/* CS_INSTR_CONFIG register */ ++#define CS_INSTR_CONFIG_JASID_SHIFT (0) ++#define CS_INSTR_CONFIG_JASID_MASK ((u32)0xF << CS_INSTR_CONFIG_JASID_SHIFT) ++#define CS_INSTR_CONFIG_JASID_GET(reg_val) (((reg_val)&CS_INSTR_CONFIG_JASID_MASK) >> CS_INSTR_CONFIG_JASID_SHIFT) ++#define CS_INSTR_CONFIG_JASID_SET(reg_val, value) \ ++ (((reg_val) & ~CS_INSTR_CONFIG_JASID_MASK) | \ ++ (((value) << CS_INSTR_CONFIG_JASID_SHIFT) & CS_INSTR_CONFIG_JASID_MASK)) ++#define CS_INSTR_CONFIG_EVENT_SIZE_SHIFT (4) ++#define CS_INSTR_CONFIG_EVENT_SIZE_MASK ((u32)0xF << CS_INSTR_CONFIG_EVENT_SIZE_SHIFT) ++#define CS_INSTR_CONFIG_EVENT_SIZE_GET(reg_val) \ ++ (((reg_val)&CS_INSTR_CONFIG_EVENT_SIZE_MASK) >> CS_INSTR_CONFIG_EVENT_SIZE_SHIFT) ++#define CS_INSTR_CONFIG_EVENT_SIZE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_INSTR_CONFIG_EVENT_SIZE_MASK) | \ ++ (((value) << CS_INSTR_CONFIG_EVENT_SIZE_SHIFT) & CS_INSTR_CONFIG_EVENT_SIZE_MASK)) ++#define CS_INSTR_CONFIG_EVENT_STATE_SHIFT (16) ++#define CS_INSTR_CONFIG_EVENT_STATE_MASK ((u32)0xFF << CS_INSTR_CONFIG_EVENT_STATE_SHIFT) ++#define CS_INSTR_CONFIG_EVENT_STATE_GET(reg_val) \ ++ (((reg_val)&CS_INSTR_CONFIG_EVENT_STATE_MASK) >> CS_INSTR_CONFIG_EVENT_STATE_SHIFT) ++#define CS_INSTR_CONFIG_EVENT_STATE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_INSTR_CONFIG_EVENT_STATE_MASK) | \ ++ (((value) << CS_INSTR_CONFIG_EVENT_STATE_SHIFT) & CS_INSTR_CONFIG_EVENT_STATE_MASK)) + -+ if (sus_page) { -+ kbase_sync_single_for_cpu(kbdev, -+ kbase_dma_addr(pg), -+ PAGE_SIZE, DMA_BIDIRECTIONAL); ++/* CS_INSTR_BUFFER_SIZE register */ ++#define CS_INSTR_BUFFER_SIZE_SIZE_SHIFT (0) ++#define CS_INSTR_BUFFER_SIZE_SIZE_MASK ((u32)0xFFFFFFFF << CS_INSTR_BUFFER_SIZE_SIZE_SHIFT) ++#define CS_INSTR_BUFFER_SIZE_SIZE_GET(reg_val) \ ++ (((reg_val)&CS_INSTR_BUFFER_SIZE_SIZE_MASK) >> CS_INSTR_BUFFER_SIZE_SIZE_SHIFT) ++#define CS_INSTR_BUFFER_SIZE_SIZE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_INSTR_BUFFER_SIZE_SIZE_MASK) | \ ++ (((value) << CS_INSTR_BUFFER_SIZE_SIZE_SHIFT) & CS_INSTR_BUFFER_SIZE_SIZE_MASK)) + -+ err = kbase_mem_copy_to_pinned_user_pages( -+ sus_buf->pages, sus_page, -+ &to_copy, sus_buf->nr_pages, -+ &target_page_nr, offset); -+ kunmap(pg); -+ if (err) -+ break; -+ } else { -+ err = -ENOMEM; -+ break; -+ } -+ } -+ schedule_in_cycle(group, false); -+ } else { -+ /* If addr-space fault, the group may have been evicted */ -+ err = -EIO; -+ } ++/* CS_INSTR_BUFFER_BASE register */ ++#define CS_INSTR_BUFFER_BASE_POINTER_SHIFT (0) ++#define CS_INSTR_BUFFER_BASE_POINTER_MASK \ ++ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_INSTR_BUFFER_BASE_POINTER_SHIFT) ++#define CS_INSTR_BUFFER_BASE_POINTER_GET(reg_val) \ ++ (((reg_val)&CS_INSTR_BUFFER_BASE_POINTER_MASK) >> CS_INSTR_BUFFER_BASE_POINTER_SHIFT) ++#define CS_INSTR_BUFFER_BASE_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_INSTR_BUFFER_BASE_POINTER_MASK) | \ ++ (((value) << CS_INSTR_BUFFER_BASE_POINTER_SHIFT) & CS_INSTR_BUFFER_BASE_POINTER_MASK)) + -+exit: -+ mutex_unlock(&scheduler->lock); -+ return err; -+} ++/* CS_INSTR_BUFFER_OFFSET_POINTER register */ ++#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT (0) ++#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK \ ++ ((GPU_ULL(0xFFFFFFFFFFFFFFFF)) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) ++#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_GET(reg_val) \ ++ (((reg_val)&CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) >> CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) ++#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) | \ ++ (((value) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) & CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK)) + -+KBASE_EXPORT_TEST_API(kbase_csf_scheduler_group_copy_suspend_buf); ++/* End of CS_KERNEL_INPUT_BLOCK register set definitions */ + -+/** -+ * group_sync_updated() - Evaluate sync wait condition of all blocked command -+ * queues of the group. -+ * -+ * @group: Pointer to the command queue group that has blocked command queue(s) -+ * bound to it. -+ * -+ * Return: true if sync wait condition is satisfied for at least one blocked -+ * queue of the group. -+ */ -+static bool group_sync_updated(struct kbase_queue_group *group) -+{ -+ bool updated = false; -+ int stream; ++/* CS_KERNEL_OUTPUT_BLOCK register set definitions */ + -+ /* Groups can also be blocked on-slot during protected mode. */ -+ WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC && -+ group->run_state != KBASE_CSF_GROUP_IDLE); ++/* CS_ACK register */ ++#define CS_ACK_STATE_SHIFT 0 ++#define CS_ACK_STATE_MASK (0x7 << CS_ACK_STATE_SHIFT) ++#define CS_ACK_STATE_GET(reg_val) (((reg_val)&CS_ACK_STATE_MASK) >> CS_ACK_STATE_SHIFT) ++#define CS_ACK_STATE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_STATE_MASK) | (((value) << CS_ACK_STATE_SHIFT) & CS_ACK_STATE_MASK)) ++/* CS_ACK_STATE values */ ++#define CS_ACK_STATE_STOP 0x0 ++#define CS_ACK_STATE_START 0x1 ++/* End of CS_ACK_STATE values */ ++#define CS_ACK_EXTRACT_EVENT_SHIFT 4 ++#define CS_ACK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_EXTRACT_EVENT_SHIFT) ++#define CS_ACK_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_ACK_EXTRACT_EVENT_MASK) >> CS_ACK_EXTRACT_EVENT_SHIFT) ++#define CS_ACK_EXTRACT_EVENT_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_EXTRACT_EVENT_MASK) | (((value) << CS_ACK_EXTRACT_EVENT_SHIFT) & CS_ACK_EXTRACT_EVENT_MASK)) ++#define CS_ACK_TILER_OOM_SHIFT 26 ++#define CS_ACK_TILER_OOM_MASK (0x1 << CS_ACK_TILER_OOM_SHIFT) ++#define CS_ACK_TILER_OOM_GET(reg_val) (((reg_val)&CS_ACK_TILER_OOM_MASK) >> CS_ACK_TILER_OOM_SHIFT) ++#define CS_ACK_TILER_OOM_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_TILER_OOM_MASK) | (((value) << CS_ACK_TILER_OOM_SHIFT) & CS_ACK_TILER_OOM_MASK)) ++#define CS_ACK_PROTM_PEND_SHIFT 27 ++#define CS_ACK_PROTM_PEND_MASK (0x1 << CS_ACK_PROTM_PEND_SHIFT) ++#define CS_ACK_PROTM_PEND_GET(reg_val) (((reg_val)&CS_ACK_PROTM_PEND_MASK) >> CS_ACK_PROTM_PEND_SHIFT) ++#define CS_ACK_PROTM_PEND_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_PROTM_PEND_MASK) | (((value) << CS_ACK_PROTM_PEND_SHIFT) & CS_ACK_PROTM_PEND_MASK)) ++#define CS_ACK_FATAL_SHIFT 30 ++#define CS_ACK_FATAL_MASK (0x1 << CS_ACK_FATAL_SHIFT) ++#define CS_ACK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_FATAL_MASK) >> CS_ACK_FATAL_SHIFT) ++#define CS_ACK_FATAL_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_FATAL_MASK) | (((value) << CS_ACK_FATAL_SHIFT) & CS_ACK_FATAL_MASK)) ++#define CS_ACK_FAULT_SHIFT 31 ++#define CS_ACK_FAULT_MASK (0x1 << CS_ACK_FAULT_SHIFT) ++#define CS_ACK_FAULT_GET(reg_val) (((reg_val)&CS_ACK_FAULT_MASK) >> CS_ACK_FAULT_SHIFT) ++#define CS_ACK_FAULT_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_FAULT_MASK) | (((value) << CS_ACK_FAULT_SHIFT) & CS_ACK_FAULT_MASK)) + -+ for (stream = 0; stream < MAX_SUPPORTED_STREAMS_PER_GROUP; ++stream) { -+ struct kbase_queue *const queue = group->bound_queues[stream]; ++/* CS_STATUS_CMD_PTR register */ ++#define CS_STATUS_CMD_PTR_POINTER_SHIFT 0 ++#define CS_STATUS_CMD_PTR_POINTER_MASK \ ++ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_STATUS_CMD_PTR_POINTER_SHIFT) ++#define CS_STATUS_CMD_PTR_POINTER_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_CMD_PTR_POINTER_MASK) >> CS_STATUS_CMD_PTR_POINTER_SHIFT) ++#define CS_STATUS_CMD_PTR_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_CMD_PTR_POINTER_MASK) | \ ++ (((value) << CS_STATUS_CMD_PTR_POINTER_SHIFT) & CS_STATUS_CMD_PTR_POINTER_MASK)) + -+ /* To check the necessity of sync-wait evaluation, -+ * we rely on the cached 'status_wait' instead of reading it -+ * directly from shared memory as the CSG has been already -+ * evicted from the CSG slot, thus this CSG doesn't have -+ * valid information in the shared memory. -+ */ -+ if (queue && queue->enabled && -+ CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) -+ if (evaluate_sync_update(queue)) { -+ updated = true; -+ queue->status_wait = 0; -+ } -+ } ++/* CS_STATUS_WAIT register */ ++#define CS_STATUS_WAIT_SB_MASK_SHIFT 0 ++#define CS_STATUS_WAIT_SB_MASK_MASK (0xFFFF << CS_STATUS_WAIT_SB_MASK_SHIFT) ++#define CS_STATUS_WAIT_SB_MASK_GET(reg_val) (((reg_val)&CS_STATUS_WAIT_SB_MASK_MASK) >> CS_STATUS_WAIT_SB_MASK_SHIFT) ++#define CS_STATUS_WAIT_SB_MASK_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_WAIT_SB_MASK_MASK) | \ ++ (((value) << CS_STATUS_WAIT_SB_MASK_SHIFT) & CS_STATUS_WAIT_SB_MASK_MASK)) ++#define CS_STATUS_WAIT_SB_SOURCE_SHIFT 16 ++#define CS_STATUS_WAIT_SB_SOURCE_MASK (0xF << CS_STATUS_WAIT_SB_SOURCE_SHIFT) ++#define CS_STATUS_WAIT_SB_SOURCE_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_WAIT_SB_SOURCE_MASK) >> CS_STATUS_WAIT_SB_SOURCE_SHIFT) ++#define CS_STATUS_WAIT_SB_SOURCE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_WAIT_SB_SOURCE_MASK) | \ ++ (((value) << CS_STATUS_WAIT_SB_SOURCE_SHIFT) & CS_STATUS_WAIT_SB_SOURCE_MASK)) ++#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT 24 ++#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK (0xF << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) ++#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) ++#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) | \ ++ (((value) << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK)) ++/* CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */ ++#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE 0x0 ++#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT 0x1 ++#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE 0x5 ++/* End of CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */ ++#define CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT 28 ++#define CS_STATUS_WAIT_PROGRESS_WAIT_MASK (0x1 << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) ++#define CS_STATUS_WAIT_PROGRESS_WAIT_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_WAIT_PROGRESS_WAIT_MASK) >> CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) ++#define CS_STATUS_WAIT_PROGRESS_WAIT_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_WAIT_PROGRESS_WAIT_MASK) | \ ++ (((value) << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) & CS_STATUS_WAIT_PROGRESS_WAIT_MASK)) ++#define CS_STATUS_WAIT_PROTM_PEND_SHIFT 29 ++#define CS_STATUS_WAIT_PROTM_PEND_MASK (0x1 << CS_STATUS_WAIT_PROTM_PEND_SHIFT) ++#define CS_STATUS_WAIT_PROTM_PEND_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_WAIT_PROTM_PEND_MASK) >> CS_STATUS_WAIT_PROTM_PEND_SHIFT) ++#define CS_STATUS_WAIT_PROTM_PEND_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_WAIT_PROTM_PEND_MASK) | \ ++ (((value) << CS_STATUS_WAIT_PROTM_PEND_SHIFT) & CS_STATUS_WAIT_PROTM_PEND_MASK)) ++#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT 30 ++#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT) ++#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT) ++#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK) | \ ++ (((value) << CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK)) ++#define CS_STATUS_WAIT_SYNC_WAIT_SHIFT 31 ++#define CS_STATUS_WAIT_SYNC_WAIT_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SHIFT) ++#define CS_STATUS_WAIT_SYNC_WAIT_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_SHIFT) ++#define CS_STATUS_WAIT_SYNC_WAIT_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_MASK) | \ ++ (((value) << CS_STATUS_WAIT_SYNC_WAIT_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_MASK)) + -+ return updated; -+} ++/* CS_STATUS_REQ_RESOURCE register */ ++#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT 0 ++#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) ++#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) ++#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) | \ ++ (((value) << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK)) ++#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT 1 ++#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) ++#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) ++#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) | \ ++ (((value) << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK)) ++#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT 2 ++#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) ++#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) ++#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) | \ ++ (((value) << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK)) ++#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT 3 ++#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) ++#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) ++#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) | \ ++ (((value) << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK)) + -+/** -+ * scheduler_get_protm_enter_async_group() - Check if the GPU queue group -+ * can be now allowed to execute in protected mode. -+ * -+ * @kbdev: Pointer to the GPU device. -+ * @group: Pointer to the GPU queue group. -+ * -+ * This function is called outside the scheduling tick/tock to determine -+ * if the given GPU queue group can now execute in protected mode or not. -+ * If the group pointer passed is NULL then the evaluation is done for the -+ * highest priority group on the scheduler maintained group lists without -+ * tick associated rotation actions. This is referred as the 'top-group' -+ * in a tock action sense. -+ * -+ * It returns the same group pointer, that was passed as an argument, if that -+ * group matches the highest priority group and has pending protected region -+ * requests otherwise NULL is returned. -+ * -+ * If the group pointer passed is NULL then the internal evaluated highest -+ * priority group is returned if that has pending protected region requests -+ * otherwise NULL is returned. -+ * -+ * The evaluated highest priority group may not necessarily be the same as the -+ * scheduler->top_grp. This can happen if there is dynamic de-idle update -+ * during the tick interval for some on-slots groups that were idle during the -+ * scheduler normal scheduling action, where the scheduler->top_grp was set. -+ * The recorded scheduler->top_grp is untouched by this evualuation, so will not -+ * affect the scheduler context/priority list rotation arrangement. -+ * -+ * Return: the pointer to queue group that can currently execute in protected -+ * mode or NULL. -+ */ -+static struct kbase_queue_group *scheduler_get_protm_enter_async_group( -+ struct kbase_device *const kbdev, -+ struct kbase_queue_group *const group) -+{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ struct kbase_queue_group *match_grp, *input_grp; ++/* CS_STATUS_WAIT_SYNC_POINTER register */ ++#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT 0 ++#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK \ ++ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) ++#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) >> CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) ++#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) | \ ++ (((value) << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) & CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK)) + -+ lockdep_assert_held(&scheduler->lock); ++/* CS_STATUS_WAIT_SYNC_VALUE register */ ++#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT 0 ++#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK (0xFFFFFFFF << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) ++#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) >> CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) ++#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) | \ ++ (((value) << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) & CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK)) + -+ if (scheduler->state != SCHED_INACTIVE) -+ return NULL; ++/* CS_STATUS_SCOREBOARDS register */ ++#define CS_STATUS_SCOREBOARDS_NONZERO_SHIFT (0) ++#define CS_STATUS_SCOREBOARDS_NONZERO_MASK \ ++ ((0xFFFF) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) ++#define CS_STATUS_SCOREBOARDS_NONZERO_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_SCOREBOARDS_NONZERO_MASK) >> \ ++ CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) ++#define CS_STATUS_SCOREBOARDS_NONZERO_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_SCOREBOARDS_NONZERO_MASK) | \ ++ (((value) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) & \ ++ CS_STATUS_SCOREBOARDS_NONZERO_MASK)) + -+ match_grp = get_tock_top_group(scheduler); -+ input_grp = group ? group : match_grp; ++/* CS_STATUS_BLOCKED_REASON register */ ++#define CS_STATUS_BLOCKED_REASON_REASON_SHIFT (0) ++#define CS_STATUS_BLOCKED_REASON_REASON_MASK \ ++ ((0xF) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) ++#define CS_STATUS_BLOCKED_REASON_REASON_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_BLOCKED_REASON_REASON_MASK) >> \ ++ CS_STATUS_BLOCKED_REASON_REASON_SHIFT) ++#define CS_STATUS_BLOCKED_REASON_REASON_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_BLOCKED_REASON_REASON_MASK) | \ ++ (((value) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) & \ ++ CS_STATUS_BLOCKED_REASON_REASON_MASK)) ++/* CS_STATUS_BLOCKED_REASON_reason values */ ++#define CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED 0x0 ++#define CS_STATUS_BLOCKED_REASON_REASON_WAIT 0x1 ++#define CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT 0x2 ++#define CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT 0x3 ++#define CS_STATUS_BLOCKED_REASON_REASON_DEFERRED 0x4 ++#define CS_STATUS_BLOCKED_REASON_REASON_RESOURCE 0x5 ++#define CS_STATUS_BLOCKED_REASON_REASON_FLUSH 0x6 ++/* End of CS_STATUS_BLOCKED_REASON_reason values */ + -+ if (input_grp && (input_grp == match_grp)) { -+ struct kbase_csf_cmd_stream_group_info *ginfo = -+ &kbdev->csf.global_iface.groups[0]; -+ unsigned long *pending = -+ input_grp->protm_pending_bitmap; -+ unsigned long flags; ++/* CS_FAULT register */ ++#define CS_FAULT_EXCEPTION_TYPE_SHIFT 0 ++#define CS_FAULT_EXCEPTION_TYPE_MASK (0xFF << CS_FAULT_EXCEPTION_TYPE_SHIFT) ++#define CS_FAULT_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_TYPE_MASK) >> CS_FAULT_EXCEPTION_TYPE_SHIFT) ++#define CS_FAULT_EXCEPTION_TYPE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_FAULT_EXCEPTION_TYPE_MASK) | \ ++ (((value) << CS_FAULT_EXCEPTION_TYPE_SHIFT) & CS_FAULT_EXCEPTION_TYPE_MASK)) ++/* CS_FAULT_EXCEPTION_TYPE values */ ++#define CS_FAULT_EXCEPTION_TYPE_KABOOM 0x05 ++#define CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED 0x0F ++#define CS_FAULT_EXCEPTION_TYPE_CS_BUS_FAULT 0x48 ++#define CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT 0x4B ++#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_PC 0x50 ++#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_ENC 0x51 ++#define CS_FAULT_EXCEPTION_TYPE_INSTR_BARRIER_FAULT 0x55 ++#define CS_FAULT_EXCEPTION_TYPE_DATA_INVALID_FAULT 0x58 ++#define CS_FAULT_EXCEPTION_TYPE_TILE_RANGE_FAULT 0x59 ++#define CS_FAULT_EXCEPTION_TYPE_ADDR_RANGE_FAULT 0x5A ++#define CS_FAULT_EXCEPTION_TYPE_IMPRECISE_FAULT 0x5B ++#define CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT 0x69 ++#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L0 0xC0 ++#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L1 0xC1 ++#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L2 0xC2 ++#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L3 0xC3 ++#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L4 0xC4 ++#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_0 0xC8 ++#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_1 0xC9 ++#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_2 0xCA ++#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_3 0xCB ++#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_1 0xD9 ++#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_2 0xDA ++#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_3 0xDB ++#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN 0xE0 ++#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_0 0xE4 ++#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_1 0xE5 ++#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_2 0xE6 ++#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_3 0xE7 ++#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0 0xE8 ++#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1 0xE9 ++#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2 0xEA ++#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3 0xEB ++/* End of CS_FAULT_EXCEPTION_TYPE values */ ++#define CS_FAULT_EXCEPTION_DATA_SHIFT 8 ++#define CS_FAULT_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FAULT_EXCEPTION_DATA_SHIFT) ++#define CS_FAULT_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_DATA_MASK) >> CS_FAULT_EXCEPTION_DATA_SHIFT) ++#define CS_FAULT_EXCEPTION_DATA_SET(reg_val, value) \ ++ (((reg_val) & ~CS_FAULT_EXCEPTION_DATA_MASK) | \ ++ (((value) << CS_FAULT_EXCEPTION_DATA_SHIFT) & CS_FAULT_EXCEPTION_DATA_MASK)) + -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++/* CS_FATAL register */ ++#define CS_FATAL_EXCEPTION_TYPE_SHIFT 0 ++#define CS_FATAL_EXCEPTION_TYPE_MASK (0xFF << CS_FATAL_EXCEPTION_TYPE_SHIFT) ++#define CS_FATAL_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_TYPE_MASK) >> CS_FATAL_EXCEPTION_TYPE_SHIFT) ++#define CS_FATAL_EXCEPTION_TYPE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_FATAL_EXCEPTION_TYPE_MASK) | \ ++ (((value) << CS_FATAL_EXCEPTION_TYPE_SHIFT) & CS_FATAL_EXCEPTION_TYPE_MASK)) ++/* CS_FATAL_EXCEPTION_TYPE values */ ++#define CS_FATAL_EXCEPTION_TYPE_CS_CONFIG_FAULT 0x40 ++#define CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE 0x41 ++#define CS_FATAL_EXCEPTION_TYPE_CS_ENDPOINT_FAULT 0x44 ++#define CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT 0x48 ++#define CS_FATAL_EXCEPTION_TYPE_CS_INVALID_INSTRUCTION 0x49 ++#define CS_FATAL_EXCEPTION_TYPE_CS_CALL_STACK_OVERFLOW 0x4A ++#define CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR 0x68 ++/* End of CS_FATAL_EXCEPTION_TYPE values */ ++#define CS_FATAL_EXCEPTION_DATA_SHIFT 8 ++#define CS_FATAL_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FATAL_EXCEPTION_DATA_SHIFT) ++#define CS_FATAL_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_DATA_MASK) >> CS_FATAL_EXCEPTION_DATA_SHIFT) ++#define CS_FATAL_EXCEPTION_DATA_SET(reg_val, value) \ ++ (((reg_val) & ~CS_FATAL_EXCEPTION_DATA_MASK) | \ ++ (((value) << CS_FATAL_EXCEPTION_DATA_SHIFT) & CS_FATAL_EXCEPTION_DATA_MASK)) + -+ if (kbase_csf_scheduler_protected_mode_in_use(kbdev) || -+ bitmap_empty(pending, ginfo->stream_num)) -+ input_grp = NULL; ++/* CS_FAULT_INFO register */ ++#define CS_FAULT_INFO_EXCEPTION_DATA_SHIFT 0 ++#define CS_FAULT_INFO_EXCEPTION_DATA_MASK \ ++ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) ++#define CS_FAULT_INFO_EXCEPTION_DATA_GET(reg_val) \ ++ (((reg_val)&CS_FAULT_INFO_EXCEPTION_DATA_MASK) >> CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) ++#define CS_FAULT_INFO_EXCEPTION_DATA_SET(reg_val, value) \ ++ (((reg_val) & ~CS_FAULT_INFO_EXCEPTION_DATA_MASK) | \ ++ (((value) << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) & CS_FAULT_INFO_EXCEPTION_DATA_MASK)) + -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); -+ } else { -+ input_grp = NULL; -+ } ++/* CS_FATAL_INFO register */ ++#define CS_FATAL_INFO_EXCEPTION_DATA_SHIFT 0 ++#define CS_FATAL_INFO_EXCEPTION_DATA_MASK \ ++ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) ++#define CS_FATAL_INFO_EXCEPTION_DATA_GET(reg_val) \ ++ (((reg_val)&CS_FATAL_INFO_EXCEPTION_DATA_MASK) >> CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) ++#define CS_FATAL_INFO_EXCEPTION_DATA_SET(reg_val, value) \ ++ (((reg_val) & ~CS_FATAL_INFO_EXCEPTION_DATA_MASK) | \ ++ (((value) << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) & CS_FATAL_INFO_EXCEPTION_DATA_MASK)) + -+ return input_grp; -+} ++/* CS_HEAP_VT_START register */ ++#define CS_HEAP_VT_START_VALUE_SHIFT 0 ++#define CS_HEAP_VT_START_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_START_VALUE_SHIFT) ++#define CS_HEAP_VT_START_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_START_VALUE_MASK) >> CS_HEAP_VT_START_VALUE_SHIFT) ++#define CS_HEAP_VT_START_VALUE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_HEAP_VT_START_VALUE_MASK) | \ ++ (((value) << CS_HEAP_VT_START_VALUE_SHIFT) & CS_HEAP_VT_START_VALUE_MASK)) + -+void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group) -+{ -+ struct kbase_device *const kbdev = group->kctx->kbdev; -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++/* CS_HEAP_VT_END register */ ++#define CS_HEAP_VT_END_VALUE_SHIFT 0 ++#define CS_HEAP_VT_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_END_VALUE_SHIFT) ++#define CS_HEAP_VT_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_END_VALUE_MASK) >> CS_HEAP_VT_END_VALUE_SHIFT) ++#define CS_HEAP_VT_END_VALUE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_HEAP_VT_END_VALUE_MASK) | (((value) << CS_HEAP_VT_END_VALUE_SHIFT) & CS_HEAP_VT_END_VALUE_MASK)) + -+ int err = kbase_reset_gpu_try_prevent(kbdev); -+ /* Regardless of whether reset failed or is currently happening, exit -+ * early -+ */ -+ if (err) -+ return; ++/* CS_HEAP_FRAG_END register */ ++#define CS_HEAP_FRAG_END_VALUE_SHIFT 0 ++#define CS_HEAP_FRAG_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_FRAG_END_VALUE_SHIFT) ++#define CS_HEAP_FRAG_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_FRAG_END_VALUE_MASK) >> CS_HEAP_FRAG_END_VALUE_SHIFT) ++#define CS_HEAP_FRAG_END_VALUE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_HEAP_FRAG_END_VALUE_MASK) | \ ++ (((value) << CS_HEAP_FRAG_END_VALUE_SHIFT) & CS_HEAP_FRAG_END_VALUE_MASK)) + -+ mutex_lock(&scheduler->lock); ++/* CS_HEAP_ADDRESS register */ ++#define CS_HEAP_ADDRESS_POINTER_SHIFT 0 ++#define CS_HEAP_ADDRESS_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_HEAP_ADDRESS_POINTER_SHIFT) ++#define CS_HEAP_ADDRESS_POINTER_GET(reg_val) (((reg_val)&CS_HEAP_ADDRESS_POINTER_MASK) >> CS_HEAP_ADDRESS_POINTER_SHIFT) ++#define CS_HEAP_ADDRESS_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_HEAP_ADDRESS_POINTER_MASK) | \ ++ (((value) << CS_HEAP_ADDRESS_POINTER_SHIFT) & CS_HEAP_ADDRESS_POINTER_MASK)) ++/* End of CS_KERNEL_OUTPUT_BLOCK register set definitions */ + -+ if (group->run_state == KBASE_CSF_GROUP_IDLE) { -+ group->run_state = KBASE_CSF_GROUP_RUNNABLE; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, -+ group->run_state); -+ } -+ /* Check if the group is now eligible for execution in protected mode. */ -+ if (scheduler_get_protm_enter_async_group(kbdev, group)) -+ scheduler_group_check_protm_enter(kbdev, group); ++/* CS_USER_INPUT_BLOCK register set definitions */ + -+ mutex_unlock(&scheduler->lock); -+ kbase_reset_gpu_allow(kbdev); -+} ++/* CS_INSERT register */ ++#define CS_INSERT_VALUE_SHIFT 0 ++#define CS_INSERT_VALUE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_INSERT_VALUE_SHIFT) ++#define CS_INSERT_VALUE_GET(reg_val) (((reg_val)&CS_INSERT_VALUE_MASK) >> CS_INSERT_VALUE_SHIFT) ++#define CS_INSERT_VALUE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_INSERT_VALUE_MASK) | (((value) << CS_INSERT_VALUE_SHIFT) & CS_INSERT_VALUE_MASK)) + -+/** -+ * check_sync_update_for_on_slot_group() - Check the sync wait condition -+ * for all the queues bound to -+ * the given on-slot group. -+ * -+ * @group: Pointer to the on-slot group that requires evaluation. -+ * -+ * This function is called if the GPU is in protected mode and there are on -+ * slot idle groups with higher priority than the active protected mode group -+ * or this function is called when CQS object is signaled whilst GPU is in -+ * sleep state. -+ * This function will evaluate the sync condition, if any, of all the queues -+ * bound to the given group. -+ * -+ * Return: true if the sync condition of at least one queue has been satisfied. -+ */ -+static bool check_sync_update_for_on_slot_group( -+ struct kbase_queue_group *group) -+{ -+ struct kbase_device *const kbdev = group->kctx->kbdev; -+ struct kbase_csf_scheduler *const scheduler = -+ &kbdev->csf.scheduler; -+ bool sync_update_done = false; -+ int i; ++/* CS_EXTRACT_INIT register */ ++#define CS_EXTRACT_INIT_VALUE_SHIFT 0 ++#define CS_EXTRACT_INIT_VALUE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_EXTRACT_INIT_VALUE_SHIFT) ++#define CS_EXTRACT_INIT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_INIT_VALUE_MASK) >> CS_EXTRACT_INIT_VALUE_SHIFT) ++#define CS_EXTRACT_INIT_VALUE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_EXTRACT_INIT_VALUE_MASK) | \ ++ (((value) << CS_EXTRACT_INIT_VALUE_SHIFT) & CS_EXTRACT_INIT_VALUE_MASK)) ++/* End of CS_USER_INPUT_BLOCK register set definitions */ + -+ lockdep_assert_held(&scheduler->lock); ++/* CS_USER_OUTPUT_BLOCK register set definitions */ + -+ for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { -+ struct kbase_queue *queue = group->bound_queues[i]; ++/* CS_EXTRACT register */ ++#define CS_EXTRACT_VALUE_SHIFT 0 ++#define CS_EXTRACT_VALUE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_EXTRACT_VALUE_SHIFT) ++#define CS_EXTRACT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_VALUE_MASK) >> CS_EXTRACT_VALUE_SHIFT) ++#define CS_EXTRACT_VALUE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_EXTRACT_VALUE_MASK) | (((value) << CS_EXTRACT_VALUE_SHIFT) & CS_EXTRACT_VALUE_MASK)) + -+ if (queue && queue->enabled && !sync_update_done) { -+ struct kbase_csf_cmd_stream_group_info *const ginfo = -+ &kbdev->csf.global_iface.groups[group->csg_nr]; -+ struct kbase_csf_cmd_stream_info *const stream = -+ &ginfo->streams[queue->csi_index]; -+ u32 status = kbase_csf_firmware_cs_output( -+ stream, CS_STATUS_WAIT); -+ unsigned long flags; ++/* CS_ACTIVE register */ ++#define CS_ACTIVE_HW_ACTIVE_SHIFT 0 ++#define CS_ACTIVE_HW_ACTIVE_MASK (0x1 << CS_ACTIVE_HW_ACTIVE_SHIFT) ++#define CS_ACTIVE_HW_ACTIVE_GET(reg_val) (((reg_val)&CS_ACTIVE_HW_ACTIVE_MASK) >> CS_ACTIVE_HW_ACTIVE_SHIFT) ++#define CS_ACTIVE_HW_ACTIVE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACTIVE_HW_ACTIVE_MASK) | (((value) << CS_ACTIVE_HW_ACTIVE_SHIFT) & CS_ACTIVE_HW_ACTIVE_MASK)) ++/* End of CS_USER_OUTPUT_BLOCK register set definitions */ + -+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, -+ queue->group, queue, status); ++/* CSG_INPUT_BLOCK register set definitions */ + -+ if (!CS_STATUS_WAIT_SYNC_WAIT_GET(status)) -+ continue; ++/* CSG_REQ register */ ++#define CSG_REQ_STATE_SHIFT 0 ++#define CSG_REQ_STATE_MASK (0x7 << CSG_REQ_STATE_SHIFT) ++#define CSG_REQ_STATE_GET(reg_val) (((reg_val)&CSG_REQ_STATE_MASK) >> CSG_REQ_STATE_SHIFT) ++#define CSG_REQ_STATE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_REQ_STATE_MASK) | (((value) << CSG_REQ_STATE_SHIFT) & CSG_REQ_STATE_MASK)) ++/* CSG_REQ_STATE values */ ++#define CSG_REQ_STATE_TERMINATE 0x0 ++#define CSG_REQ_STATE_START 0x1 ++#define CSG_REQ_STATE_SUSPEND 0x2 ++#define CSG_REQ_STATE_RESUME 0x3 ++/* End of CSG_REQ_STATE values */ ++#define CSG_REQ_EP_CFG_SHIFT 4 ++#define CSG_REQ_EP_CFG_MASK (0x1 << CSG_REQ_EP_CFG_SHIFT) ++#define CSG_REQ_EP_CFG_GET(reg_val) (((reg_val)&CSG_REQ_EP_CFG_MASK) >> CSG_REQ_EP_CFG_SHIFT) ++#define CSG_REQ_EP_CFG_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_REQ_EP_CFG_MASK) | (((value) << CSG_REQ_EP_CFG_SHIFT) & CSG_REQ_EP_CFG_MASK)) ++#define CSG_REQ_STATUS_UPDATE_SHIFT 5 ++#define CSG_REQ_STATUS_UPDATE_MASK (0x1 << CSG_REQ_STATUS_UPDATE_SHIFT) ++#define CSG_REQ_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_STATUS_UPDATE_MASK) >> CSG_REQ_STATUS_UPDATE_SHIFT) ++#define CSG_REQ_STATUS_UPDATE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_REQ_STATUS_UPDATE_MASK) | \ ++ (((value) << CSG_REQ_STATUS_UPDATE_SHIFT) & CSG_REQ_STATUS_UPDATE_MASK)) ++#define CSG_REQ_SYNC_UPDATE_SHIFT 28 ++#define CSG_REQ_SYNC_UPDATE_MASK (0x1 << CSG_REQ_SYNC_UPDATE_SHIFT) ++#define CSG_REQ_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_SYNC_UPDATE_MASK) >> CSG_REQ_SYNC_UPDATE_SHIFT) ++#define CSG_REQ_SYNC_UPDATE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_REQ_SYNC_UPDATE_MASK) | (((value) << CSG_REQ_SYNC_UPDATE_SHIFT) & CSG_REQ_SYNC_UPDATE_MASK)) ++#define CSG_REQ_IDLE_SHIFT 29 ++#define CSG_REQ_IDLE_MASK (0x1 << CSG_REQ_IDLE_SHIFT) ++#define CSG_REQ_IDLE_GET(reg_val) (((reg_val)&CSG_REQ_IDLE_MASK) >> CSG_REQ_IDLE_SHIFT) ++#define CSG_REQ_IDLE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_REQ_IDLE_MASK) | (((value) << CSG_REQ_IDLE_SHIFT) & CSG_REQ_IDLE_MASK)) ++#define CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT 31 ++#define CSG_REQ_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) ++#define CSG_REQ_PROGRESS_TIMER_EVENT_GET(reg_val) \ ++ (((reg_val)&CSG_REQ_PROGRESS_TIMER_EVENT_MASK) >> CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) ++#define CSG_REQ_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_REQ_PROGRESS_TIMER_EVENT_MASK) | \ ++ (((value) << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK)) + -+ /* Save the information of sync object of the command -+ * queue so the callback function, 'group_sync_updated' -+ * can evaluate the sync object when it gets updated -+ * later. -+ */ -+ queue->status_wait = status; -+ queue->sync_ptr = kbase_csf_firmware_cs_output( -+ stream, CS_STATUS_WAIT_SYNC_POINTER_LO); -+ queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output( -+ stream, CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; -+ queue->sync_value = kbase_csf_firmware_cs_output( -+ stream, CS_STATUS_WAIT_SYNC_VALUE); -+ queue->blocked_reason = -+ CS_STATUS_BLOCKED_REASON_REASON_GET( -+ kbase_csf_firmware_cs_output( -+ stream, -+ CS_STATUS_BLOCKED_REASON)); ++/* CSG_ACK_IRQ_MASK register */ ++#define CSG_ACK_IRQ_MASK_STATE_SHIFT 0 ++#define CSG_ACK_IRQ_MASK_STATE_MASK (0x7 << CSG_ACK_IRQ_MASK_STATE_SHIFT) ++#define CSG_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_STATE_MASK) >> CSG_ACK_IRQ_MASK_STATE_SHIFT) ++#define CSG_ACK_IRQ_MASK_STATE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_IRQ_MASK_STATE_MASK) | \ ++ (((value) << CSG_ACK_IRQ_MASK_STATE_SHIFT) & CSG_ACK_IRQ_MASK_STATE_MASK)) ++/* CSG_ACK_IRQ_MASK_STATE values */ ++#define CSG_ACK_IRQ_MASK_STATE_DISABLED 0x0 ++#define CSG_ACK_IRQ_MASK_STATE_ENABLED 0x7 ++/* End of CSG_ACK_IRQ_MASK_STATE values */ ++#define CSG_ACK_IRQ_MASK_EP_CFG_SHIFT 4 ++#define CSG_ACK_IRQ_MASK_EP_CFG_MASK (0x1 << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) ++#define CSG_ACK_IRQ_MASK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_EP_CFG_MASK) >> CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) ++#define CSG_ACK_IRQ_MASK_EP_CFG_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_IRQ_MASK_EP_CFG_MASK) | \ ++ (((value) << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) & CSG_ACK_IRQ_MASK_EP_CFG_MASK)) ++#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT 5 ++#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) ++#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_GET(reg_val) \ ++ (((reg_val)&CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) ++#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) | \ ++ (((value) << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK)) ++#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT 28 ++#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) ++#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_GET(reg_val) \ ++ (((reg_val)&CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) ++#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) | \ ++ (((value) << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK)) ++#define CSG_ACK_IRQ_MASK_IDLE_SHIFT 29 ++#define CSG_ACK_IRQ_MASK_IDLE_MASK (0x1 << CSG_ACK_IRQ_MASK_IDLE_SHIFT) ++#define CSG_ACK_IRQ_MASK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_IDLE_MASK) >> CSG_ACK_IRQ_MASK_IDLE_SHIFT) ++#define CSG_ACK_IRQ_MASK_IDLE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_IRQ_MASK_IDLE_MASK) | \ ++ (((value) << CSG_ACK_IRQ_MASK_IDLE_SHIFT) & CSG_ACK_IRQ_MASK_IDLE_MASK)) ++#define CSG_ACK_IRQ_MASK_DOORBELL_SHIFT 30 ++#define CSG_ACK_IRQ_MASK_DOORBELL_MASK (0x1 << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) ++#define CSG_ACK_IRQ_MASK_DOORBELL_GET(reg_val) \ ++ (((reg_val)&CSG_ACK_IRQ_MASK_DOORBELL_MASK) >> CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) ++#define CSG_ACK_IRQ_MASK_DOORBELL_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_IRQ_MASK_DOORBELL_MASK) | \ ++ (((value) << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) & CSG_ACK_IRQ_MASK_DOORBELL_MASK)) ++#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT 31 ++#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) ++#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_GET(reg_val) \ ++ (((reg_val)&CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) ++#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) | \ ++ (((value) << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK)) + -+ if (!evaluate_sync_update(queue)) -+ continue; ++/* CSG_EP_REQ register */ ++#define CSG_EP_REQ_COMPUTE_EP_SHIFT 0 ++#define CSG_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_EP_REQ_COMPUTE_EP_SHIFT) ++#define CSG_EP_REQ_COMPUTE_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_COMPUTE_EP_MASK) >> CSG_EP_REQ_COMPUTE_EP_SHIFT) ++#define CSG_EP_REQ_COMPUTE_EP_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_EP_REQ_COMPUTE_EP_MASK) | \ ++ (((value) << CSG_EP_REQ_COMPUTE_EP_SHIFT) & CSG_EP_REQ_COMPUTE_EP_MASK)) ++#define CSG_EP_REQ_FRAGMENT_EP_SHIFT 8 ++#define CSG_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_EP_REQ_FRAGMENT_EP_SHIFT) ++#define CSG_EP_REQ_FRAGMENT_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_FRAGMENT_EP_MASK) >> CSG_EP_REQ_FRAGMENT_EP_SHIFT) ++#define CSG_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_EP_REQ_FRAGMENT_EP_MASK) | \ ++ (((value) << CSG_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_EP_REQ_FRAGMENT_EP_MASK)) ++#define CSG_EP_REQ_TILER_EP_SHIFT 16 ++#define CSG_EP_REQ_TILER_EP_MASK (0xF << CSG_EP_REQ_TILER_EP_SHIFT) ++#define CSG_EP_REQ_TILER_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_TILER_EP_MASK) >> CSG_EP_REQ_TILER_EP_SHIFT) ++#define CSG_EP_REQ_TILER_EP_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_EP_REQ_TILER_EP_MASK) | (((value) << CSG_EP_REQ_TILER_EP_SHIFT) & CSG_EP_REQ_TILER_EP_MASK)) ++#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20 ++#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) ++#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \ ++ (((reg_val)&CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) ++#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \ ++ (((value) << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK)) ++#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21 ++#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) ++#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \ ++ (((reg_val)&CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) ++#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \ ++ (((value) << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) ++#define CSG_EP_REQ_PRIORITY_SHIFT 28 ++#define CSG_EP_REQ_PRIORITY_MASK (0xF << CSG_EP_REQ_PRIORITY_SHIFT) ++#define CSG_EP_REQ_PRIORITY_GET(reg_val) (((reg_val)&CSG_EP_REQ_PRIORITY_MASK) >> CSG_EP_REQ_PRIORITY_SHIFT) ++#define CSG_EP_REQ_PRIORITY_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_EP_REQ_PRIORITY_MASK) | (((value) << CSG_EP_REQ_PRIORITY_SHIFT) & CSG_EP_REQ_PRIORITY_MASK)) + -+ /* Update csg_slots_idle_mask and group's run_state */ -+ if (group->run_state != KBASE_CSF_GROUP_RUNNABLE) { -+ /* Only clear the group's idle flag if it has been dealt -+ * with by the scheduler's tick/tock action, otherwise -+ * leave it untouched. -+ */ -+ spin_lock_irqsave(&scheduler->interrupt_lock, -+ flags); -+ clear_bit((unsigned int)group->csg_nr, -+ scheduler->csg_slots_idle_mask); -+ KBASE_KTRACE_ADD_CSF_GRP( -+ kbdev, CSG_SLOT_IDLE_CLEAR, group, -+ scheduler->csg_slots_idle_mask[0]); -+ spin_unlock_irqrestore( -+ &scheduler->interrupt_lock, flags); -+ /* Request the scheduler to confirm the condition inferred -+ * here inside the protected mode. -+ */ -+ group->reevaluate_idle_status = true; -+ group->run_state = KBASE_CSF_GROUP_RUNNABLE; -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, -+ group->run_state); -+ } ++/* CSG_SUSPEND_BUF register */ ++#define CSG_SUSPEND_BUF_POINTER_SHIFT 0 ++#define CSG_SUSPEND_BUF_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CSG_SUSPEND_BUF_POINTER_SHIFT) ++#define CSG_SUSPEND_BUF_POINTER_GET(reg_val) (((reg_val)&CSG_SUSPEND_BUF_POINTER_MASK) >> CSG_SUSPEND_BUF_POINTER_SHIFT) ++#define CSG_SUSPEND_BUF_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_SUSPEND_BUF_POINTER_MASK) | \ ++ (((value) << CSG_SUSPEND_BUF_POINTER_SHIFT) & CSG_SUSPEND_BUF_POINTER_MASK)) + -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u); -+ sync_update_done = true; -+ } -+ } ++/* CSG_PROTM_SUSPEND_BUF register */ ++#define CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT 0 ++#define CSG_PROTM_SUSPEND_BUF_POINTER_MASK \ ++ (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) ++#define CSG_PROTM_SUSPEND_BUF_POINTER_GET(reg_val) \ ++ (((reg_val)&CSG_PROTM_SUSPEND_BUF_POINTER_MASK) >> CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) ++#define CSG_PROTM_SUSPEND_BUF_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_PROTM_SUSPEND_BUF_POINTER_MASK) | \ ++ (((value) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) & CSG_PROTM_SUSPEND_BUF_POINTER_MASK)) + -+ return sync_update_done; -+} ++/* CSG_DVS_BUF_BUFFER register */ ++#define CSG_DVS_BUF_BUFFER_SIZE_SHIFT GPU_U(0) ++#define CSG_DVS_BUF_BUFFER_SIZE_MASK (GPU_U(0xFFF) << CSG_DVS_BUF_BUFFER_SIZE_SHIFT) ++#define CSG_DVS_BUF_BUFFER_SIZE_GET(reg_val) (((reg_val)&CSG_DVS_BUF_BUFFER_SIZE_MASK) >> CSG_DVS_BUF_BUFFER_SIZE_SHIFT) ++#define CSG_DVS_BUF_BUFFER_SIZE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_DVS_BUF_BUFFER_SIZE_MASK) | \ ++ (((value) << CSG_DVS_BUF_BUFFER_SIZE_SHIFT) & CSG_DVS_BUF_BUFFER_SIZE_MASK)) ++#define CSG_DVS_BUF_BUFFER_POINTER_SHIFT GPU_U(12) ++#define CSG_DVS_BUF_BUFFER_POINTER_MASK \ ++ (GPU_ULL(0xFFFFFFFFFFFFF) << CSG_DVS_BUF_BUFFER_POINTER_SHIFT) ++#define CSG_DVS_BUF_BUFFER_POINTER_GET(reg_val) \ ++ (((reg_val)&CSG_DVS_BUF_BUFFER_POINTER_MASK) >> CSG_DVS_BUF_BUFFER_POINTER_SHIFT) ++#define CSG_DVS_BUF_BUFFER_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_DVS_BUF_BUFFER_POINTER_MASK) | \ ++ (((value) << CSG_DVS_BUF_BUFFER_POINTER_SHIFT) & CSG_DVS_BUF_BUFFER_POINTER_MASK)) + -+/** -+ * check_sync_update_for_idle_groups_protm() - Check the sync wait condition -+ * for the idle groups on slot -+ * during protected mode. -+ * -+ * @kbdev: Pointer to the GPU device -+ * -+ * This function checks the gpu queues of all the idle groups on slot during -+ * protected mode that has a higher priority than the active protected mode -+ * group. -+ * -+ * Return: true if the sync condition of at least one queue in a group has been -+ * satisfied. -+ */ -+static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ struct kbase_queue_group *protm_grp; -+ bool exit_protm = false; -+ unsigned long flags; -+ u32 num_groups; -+ u32 i; ++/* End of CSG_INPUT_BLOCK register set definitions */ + -+ lockdep_assert_held(&scheduler->lock); ++/* CSG_OUTPUT_BLOCK register set definitions */ + -+ spin_lock_irqsave(&scheduler->interrupt_lock, flags); -+ protm_grp = scheduler->active_protm_grp; -+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++/* CSG_ACK register */ ++#define CSG_ACK_STATE_SHIFT 0 ++#define CSG_ACK_STATE_MASK (0x7 << CSG_ACK_STATE_SHIFT) ++#define CSG_ACK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_STATE_MASK) >> CSG_ACK_STATE_SHIFT) ++#define CSG_ACK_STATE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_STATE_MASK) | (((value) << CSG_ACK_STATE_SHIFT) & CSG_ACK_STATE_MASK)) ++/* CSG_ACK_STATE values */ ++#define CSG_ACK_STATE_TERMINATE 0x0 ++#define CSG_ACK_STATE_START 0x1 ++#define CSG_ACK_STATE_SUSPEND 0x2 ++#define CSG_ACK_STATE_RESUME 0x3 ++/* End of CSG_ACK_STATE values */ ++#define CSG_ACK_EP_CFG_SHIFT 4 ++#define CSG_ACK_EP_CFG_MASK (0x1 << CSG_ACK_EP_CFG_SHIFT) ++#define CSG_ACK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_EP_CFG_MASK) >> CSG_ACK_EP_CFG_SHIFT) ++#define CSG_ACK_EP_CFG_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_EP_CFG_MASK) | (((value) << CSG_ACK_EP_CFG_SHIFT) & CSG_ACK_EP_CFG_MASK)) ++#define CSG_ACK_STATUS_UPDATE_SHIFT 5 ++#define CSG_ACK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_STATUS_UPDATE_SHIFT) ++#define CSG_ACK_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_STATUS_UPDATE_MASK) >> CSG_ACK_STATUS_UPDATE_SHIFT) ++#define CSG_ACK_STATUS_UPDATE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_STATUS_UPDATE_MASK) | \ ++ (((value) << CSG_ACK_STATUS_UPDATE_SHIFT) & CSG_ACK_STATUS_UPDATE_MASK)) ++#define CSG_ACK_SYNC_UPDATE_SHIFT 28 ++#define CSG_ACK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_SYNC_UPDATE_SHIFT) ++#define CSG_ACK_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_SYNC_UPDATE_MASK) >> CSG_ACK_SYNC_UPDATE_SHIFT) ++#define CSG_ACK_SYNC_UPDATE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_SYNC_UPDATE_MASK) | (((value) << CSG_ACK_SYNC_UPDATE_SHIFT) & CSG_ACK_SYNC_UPDATE_MASK)) ++#define CSG_ACK_IDLE_SHIFT 29 ++#define CSG_ACK_IDLE_MASK (0x1 << CSG_ACK_IDLE_SHIFT) ++#define CSG_ACK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IDLE_MASK) >> CSG_ACK_IDLE_SHIFT) ++#define CSG_ACK_IDLE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_IDLE_MASK) | (((value) << CSG_ACK_IDLE_SHIFT) & CSG_ACK_IDLE_MASK)) ++#define CSG_ACK_DOORBELL_SHIFT 30 ++#define CSG_ACK_DOORBELL_MASK (0x1 << CSG_ACK_DOORBELL_SHIFT) ++#define CSG_ACK_DOORBELL_GET(reg_val) (((reg_val)&CSG_ACK_DOORBELL_MASK) >> CSG_ACK_DOORBELL_SHIFT) ++#define CSG_ACK_DOORBELL_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_DOORBELL_MASK) | (((value) << CSG_ACK_DOORBELL_SHIFT) & CSG_ACK_DOORBELL_MASK)) ++#define CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT 31 ++#define CSG_ACK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) ++#define CSG_ACK_PROGRESS_TIMER_EVENT_GET(reg_val) \ ++ (((reg_val)&CSG_ACK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) ++#define CSG_ACK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_PROGRESS_TIMER_EVENT_MASK) | \ ++ (((value) << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_PROGRESS_TIMER_EVENT_MASK)) + -+ if (!protm_grp) -+ return exit_protm; ++/* CSG_STATUS_EP_CURRENT register */ ++#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT 0 ++#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) ++#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(reg_val) \ ++ (((reg_val)&CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) >> CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) ++#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) | \ ++ (((value) << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK)) ++#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT 8 ++#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) ++#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(reg_val) \ ++ (((reg_val)&CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) ++#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) | \ ++ (((value) << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK)) ++#define CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT 16 ++#define CSG_STATUS_EP_CURRENT_TILER_EP_MASK (0xF << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) ++#define CSG_STATUS_EP_CURRENT_TILER_EP_GET(reg_val) \ ++ (((reg_val)&CSG_STATUS_EP_CURRENT_TILER_EP_MASK) >> CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) ++#define CSG_STATUS_EP_CURRENT_TILER_EP_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_STATUS_EP_CURRENT_TILER_EP_MASK) | \ ++ (((value) << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) & CSG_STATUS_EP_CURRENT_TILER_EP_MASK)) + -+ num_groups = kbdev->csf.global_iface.group_num; ++/* CSG_STATUS_EP_REQ register */ ++#define CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT 0 ++#define CSG_STATUS_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) ++#define CSG_STATUS_EP_REQ_COMPUTE_EP_GET(reg_val) \ ++ (((reg_val)&CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) >> CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) ++#define CSG_STATUS_EP_REQ_COMPUTE_EP_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) | \ ++ (((value) << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_REQ_COMPUTE_EP_MASK)) ++#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT 8 ++#define CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) ++#define CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(reg_val) \ ++ (((reg_val)&CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) ++#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) | \ ++ (((value) << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK)) ++#define CSG_STATUS_EP_REQ_TILER_EP_SHIFT 16 ++#define CSG_STATUS_EP_REQ_TILER_EP_MASK (0xF << CSG_STATUS_EP_REQ_TILER_EP_SHIFT) ++#define CSG_STATUS_EP_REQ_TILER_EP_GET(reg_val) \ ++ (((reg_val)&CSG_STATUS_EP_REQ_TILER_EP_MASK) >> CSG_STATUS_EP_REQ_TILER_EP_SHIFT) ++#define CSG_STATUS_EP_REQ_TILER_EP_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_STATUS_EP_REQ_TILER_EP_MASK) | \ ++ (((value) << CSG_STATUS_EP_REQ_TILER_EP_SHIFT) & CSG_STATUS_EP_REQ_TILER_EP_MASK)) ++#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20 ++#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) ++#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \ ++ (((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) ++#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \ ++ (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK)) ++#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21 ++#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) ++#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \ ++ (((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) ++#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \ ++ (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) + -+ for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) { -+ struct kbase_csf_csg_slot *csg_slot = -+ &scheduler->csg_slots[i]; -+ struct kbase_queue_group *group = csg_slot->resident_group; ++/* End of CSG_OUTPUT_BLOCK register set definitions */ + -+ if (group->scan_seq_num < protm_grp->scan_seq_num) { -+ /* If sync update has been performed for the group that -+ * has a higher priority than the protm group, then we -+ * need to exit protected mode. -+ */ -+ if (check_sync_update_for_on_slot_group(group)) -+ exit_protm = true; -+ } -+ } ++/* STREAM_CONTROL_BLOCK register set definitions */ + -+ return exit_protm; -+} ++/* STREAM_FEATURES register */ ++#define STREAM_FEATURES_WORK_REGISTERS_SHIFT 0 ++#define STREAM_FEATURES_WORK_REGISTERS_MASK (0xFF << STREAM_FEATURES_WORK_REGISTERS_SHIFT) ++#define STREAM_FEATURES_WORK_REGISTERS_GET(reg_val) \ ++ (((reg_val)&STREAM_FEATURES_WORK_REGISTERS_MASK) >> STREAM_FEATURES_WORK_REGISTERS_SHIFT) ++#define STREAM_FEATURES_WORK_REGISTERS_SET(reg_val, value) \ ++ (((reg_val) & ~STREAM_FEATURES_WORK_REGISTERS_MASK) | \ ++ (((value) << STREAM_FEATURES_WORK_REGISTERS_SHIFT) & STREAM_FEATURES_WORK_REGISTERS_MASK)) ++#define STREAM_FEATURES_SCOREBOARDS_SHIFT 8 ++#define STREAM_FEATURES_SCOREBOARDS_MASK (0xFF << STREAM_FEATURES_SCOREBOARDS_SHIFT) ++#define STREAM_FEATURES_SCOREBOARDS_GET(reg_val) \ ++ (((reg_val)&STREAM_FEATURES_SCOREBOARDS_MASK) >> STREAM_FEATURES_SCOREBOARDS_SHIFT) ++#define STREAM_FEATURES_SCOREBOARDS_SET(reg_val, value) \ ++ (((reg_val) & ~STREAM_FEATURES_SCOREBOARDS_MASK) | \ ++ (((value) << STREAM_FEATURES_SCOREBOARDS_SHIFT) & STREAM_FEATURES_SCOREBOARDS_MASK)) ++#define STREAM_FEATURES_COMPUTE_SHIFT 16 ++#define STREAM_FEATURES_COMPUTE_MASK (0x1 << STREAM_FEATURES_COMPUTE_SHIFT) ++#define STREAM_FEATURES_COMPUTE_GET(reg_val) (((reg_val)&STREAM_FEATURES_COMPUTE_MASK) >> STREAM_FEATURES_COMPUTE_SHIFT) ++#define STREAM_FEATURES_COMPUTE_SET(reg_val, value) \ ++ (((reg_val) & ~STREAM_FEATURES_COMPUTE_MASK) | \ ++ (((value) << STREAM_FEATURES_COMPUTE_SHIFT) & STREAM_FEATURES_COMPUTE_MASK)) ++#define STREAM_FEATURES_FRAGMENT_SHIFT 17 ++#define STREAM_FEATURES_FRAGMENT_MASK (0x1 << STREAM_FEATURES_FRAGMENT_SHIFT) ++#define STREAM_FEATURES_FRAGMENT_GET(reg_val) \ ++ (((reg_val)&STREAM_FEATURES_FRAGMENT_MASK) >> STREAM_FEATURES_FRAGMENT_SHIFT) ++#define STREAM_FEATURES_FRAGMENT_SET(reg_val, value) \ ++ (((reg_val) & ~STREAM_FEATURES_FRAGMENT_MASK) | \ ++ (((value) << STREAM_FEATURES_FRAGMENT_SHIFT) & STREAM_FEATURES_FRAGMENT_MASK)) ++#define STREAM_FEATURES_TILER_SHIFT 18 ++#define STREAM_FEATURES_TILER_MASK (0x1 << STREAM_FEATURES_TILER_SHIFT) ++#define STREAM_FEATURES_TILER_GET(reg_val) (((reg_val)&STREAM_FEATURES_TILER_MASK) >> STREAM_FEATURES_TILER_SHIFT) ++#define STREAM_FEATURES_TILER_SET(reg_val, value) \ ++ (((reg_val) & ~STREAM_FEATURES_TILER_MASK) | \ ++ (((value) << STREAM_FEATURES_TILER_SHIFT) & STREAM_FEATURES_TILER_MASK)) + -+static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ u32 const num_groups = kbdev->csf.global_iface.group_num; -+ u32 csg_nr; ++/* STREAM_INPUT_VA register */ ++#define STREAM_INPUT_VA_VALUE_SHIFT 0 ++#define STREAM_INPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_INPUT_VA_VALUE_SHIFT) ++#define STREAM_INPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_INPUT_VA_VALUE_MASK) >> STREAM_INPUT_VA_VALUE_SHIFT) ++#define STREAM_INPUT_VA_VALUE_SET(reg_val, value) \ ++ (((reg_val) & ~STREAM_INPUT_VA_VALUE_MASK) | \ ++ (((value) << STREAM_INPUT_VA_VALUE_SHIFT) & STREAM_INPUT_VA_VALUE_MASK)) + -+ lockdep_assert_held(&scheduler->lock); ++/* STREAM_OUTPUT_VA register */ ++#define STREAM_OUTPUT_VA_VALUE_SHIFT 0 ++#define STREAM_OUTPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_OUTPUT_VA_VALUE_SHIFT) ++#define STREAM_OUTPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_OUTPUT_VA_VALUE_MASK) >> STREAM_OUTPUT_VA_VALUE_SHIFT) ++#define STREAM_OUTPUT_VA_VALUE_SET(reg_val, value) \ ++ (((reg_val) & ~STREAM_OUTPUT_VA_VALUE_MASK) | \ ++ (((value) << STREAM_OUTPUT_VA_VALUE_SHIFT) & STREAM_OUTPUT_VA_VALUE_MASK)) ++/* End of STREAM_CONTROL_BLOCK register set definitions */ + -+ for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { -+ struct kbase_queue_group *const group = -+ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; ++/* GLB_INPUT_BLOCK register set definitions */ + -+ if (!group) -+ continue; ++/* GLB_REQ register */ ++#define GLB_REQ_HALT_SHIFT 0 ++#define GLB_REQ_HALT_MASK (0x1 << GLB_REQ_HALT_SHIFT) ++#define GLB_REQ_HALT_GET(reg_val) (((reg_val)&GLB_REQ_HALT_MASK) >> GLB_REQ_HALT_SHIFT) ++#define GLB_REQ_HALT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_HALT_MASK) | (((value) << GLB_REQ_HALT_SHIFT) & GLB_REQ_HALT_MASK)) ++#define GLB_REQ_CFG_PROGRESS_TIMER_SHIFT 1 ++#define GLB_REQ_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) ++#define GLB_REQ_CFG_PROGRESS_TIMER_GET(reg_val) \ ++ (((reg_val)&GLB_REQ_CFG_PROGRESS_TIMER_MASK) >> GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) ++#define GLB_REQ_CFG_PROGRESS_TIMER_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_CFG_PROGRESS_TIMER_MASK) | \ ++ (((value) << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) & GLB_REQ_CFG_PROGRESS_TIMER_MASK)) ++#define GLB_REQ_CFG_ALLOC_EN_SHIFT 2 ++#define GLB_REQ_CFG_ALLOC_EN_MASK (0x1 << GLB_REQ_CFG_ALLOC_EN_SHIFT) ++#define GLB_REQ_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_REQ_CFG_ALLOC_EN_MASK) >> GLB_REQ_CFG_ALLOC_EN_SHIFT) ++#define GLB_REQ_CFG_ALLOC_EN_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_CFG_ALLOC_EN_MASK) | (((value) << GLB_REQ_CFG_ALLOC_EN_SHIFT) & GLB_REQ_CFG_ALLOC_EN_MASK)) ++#define GLB_REQ_CFG_PWROFF_TIMER_SHIFT 3 ++#define GLB_REQ_CFG_PWROFF_TIMER_MASK (0x1 << GLB_REQ_CFG_PWROFF_TIMER_SHIFT) ++#define GLB_REQ_CFG_PWROFF_TIMER_GET(reg_val) \ ++ (((reg_val)&GLB_REQ_CFG_PWROFF_TIMER_MASK) >> GLB_REQ_CFG_PWROFF_TIMER_SHIFT) ++#define GLB_REQ_CFG_PWROFF_TIMER_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_CFG_PWROFF_TIMER_MASK) | \ ++ (((value) << GLB_REQ_CFG_PWROFF_TIMER_SHIFT) & GLB_REQ_CFG_PWROFF_TIMER_MASK)) ++#define GLB_REQ_PROTM_ENTER_SHIFT 4 ++#define GLB_REQ_PROTM_ENTER_MASK (0x1 << GLB_REQ_PROTM_ENTER_SHIFT) ++#define GLB_REQ_PROTM_ENTER_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_ENTER_MASK) >> GLB_REQ_PROTM_ENTER_SHIFT) ++#define GLB_REQ_PROTM_ENTER_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_PROTM_ENTER_MASK) | (((value) << GLB_REQ_PROTM_ENTER_SHIFT) & GLB_REQ_PROTM_ENTER_MASK)) ++#define GLB_REQ_PRFCNT_ENABLE_SHIFT 5 ++#define GLB_REQ_PRFCNT_ENABLE_MASK (0x1 << GLB_REQ_PRFCNT_ENABLE_SHIFT) ++#define GLB_REQ_PRFCNT_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_ENABLE_MASK) >> GLB_REQ_PRFCNT_ENABLE_SHIFT) ++#define GLB_REQ_PRFCNT_ENABLE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_PRFCNT_ENABLE_MASK) | \ ++ (((value) << GLB_REQ_PRFCNT_ENABLE_SHIFT) & GLB_REQ_PRFCNT_ENABLE_MASK)) ++#define GLB_REQ_PRFCNT_SAMPLE_SHIFT 6 ++#define GLB_REQ_PRFCNT_SAMPLE_MASK (0x1 << GLB_REQ_PRFCNT_SAMPLE_SHIFT) ++#define GLB_REQ_PRFCNT_SAMPLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_SAMPLE_MASK) >> GLB_REQ_PRFCNT_SAMPLE_SHIFT) ++#define GLB_REQ_PRFCNT_SAMPLE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_PRFCNT_SAMPLE_MASK) | \ ++ (((value) << GLB_REQ_PRFCNT_SAMPLE_SHIFT) & GLB_REQ_PRFCNT_SAMPLE_MASK)) ++#define GLB_REQ_COUNTER_ENABLE_SHIFT 7 ++#define GLB_REQ_COUNTER_ENABLE_MASK (0x1 << GLB_REQ_COUNTER_ENABLE_SHIFT) ++#define GLB_REQ_COUNTER_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_COUNTER_ENABLE_MASK) >> GLB_REQ_COUNTER_ENABLE_SHIFT) ++#define GLB_REQ_COUNTER_ENABLE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_COUNTER_ENABLE_MASK) | \ ++ (((value) << GLB_REQ_COUNTER_ENABLE_SHIFT) & GLB_REQ_COUNTER_ENABLE_MASK)) ++#define GLB_REQ_PING_SHIFT 8 ++#define GLB_REQ_PING_MASK (0x1 << GLB_REQ_PING_SHIFT) ++#define GLB_REQ_PING_GET(reg_val) (((reg_val)&GLB_REQ_PING_MASK) >> GLB_REQ_PING_SHIFT) ++#define GLB_REQ_PING_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_PING_MASK) | (((value) << GLB_REQ_PING_SHIFT) & GLB_REQ_PING_MASK)) ++#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT 9 ++#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK \ ++ (0x1 << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) ++#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_GET(reg_val) \ ++ (((reg_val)&GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) >> \ ++ GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) ++#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) | \ ++ (((value) << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) & \ ++ GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK)) ++#define GLB_REQ_SLEEP_SHIFT 12 ++#define GLB_REQ_SLEEP_MASK (0x1 << GLB_REQ_SLEEP_SHIFT) ++#define GLB_REQ_SLEEP_GET(reg_val) \ ++ (((reg_val) & GLB_REQ_SLEEP_MASK) >> GLB_REQ_SLEEP_SHIFT) ++#define GLB_REQ_SLEEP_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_SLEEP_MASK) | \ ++ (((value) << GLB_REQ_SLEEP_SHIFT) & GLB_REQ_SLEEP_MASK)) ++#define GLB_REQ_INACTIVE_COMPUTE_SHIFT 20 ++#define GLB_REQ_INACTIVE_COMPUTE_MASK (0x1 << GLB_REQ_INACTIVE_COMPUTE_SHIFT) ++#define GLB_REQ_INACTIVE_COMPUTE_GET(reg_val) \ ++ (((reg_val)&GLB_REQ_INACTIVE_COMPUTE_MASK) >> GLB_REQ_INACTIVE_COMPUTE_SHIFT) ++#define GLB_REQ_INACTIVE_COMPUTE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_INACTIVE_COMPUTE_MASK) | \ ++ (((value) << GLB_REQ_INACTIVE_COMPUTE_SHIFT) & GLB_REQ_INACTIVE_COMPUTE_MASK)) ++#define GLB_REQ_INACTIVE_FRAGMENT_SHIFT 21 ++#define GLB_REQ_INACTIVE_FRAGMENT_MASK (0x1 << GLB_REQ_INACTIVE_FRAGMENT_SHIFT) ++#define GLB_REQ_INACTIVE_FRAGMENT_GET(reg_val) \ ++ (((reg_val)&GLB_REQ_INACTIVE_FRAGMENT_MASK) >> GLB_REQ_INACTIVE_FRAGMENT_SHIFT) ++#define GLB_REQ_INACTIVE_FRAGMENT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_INACTIVE_FRAGMENT_MASK) | \ ++ (((value) << GLB_REQ_INACTIVE_FRAGMENT_SHIFT) & GLB_REQ_INACTIVE_FRAGMENT_MASK)) ++#define GLB_REQ_INACTIVE_TILER_SHIFT 22 ++#define GLB_REQ_INACTIVE_TILER_MASK (0x1 << GLB_REQ_INACTIVE_TILER_SHIFT) ++#define GLB_REQ_INACTIVE_TILER_GET(reg_val) (((reg_val)&GLB_REQ_INACTIVE_TILER_MASK) >> GLB_REQ_INACTIVE_TILER_SHIFT) ++#define GLB_REQ_INACTIVE_TILER_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_INACTIVE_TILER_MASK) | \ ++ (((value) << GLB_REQ_INACTIVE_TILER_SHIFT) & GLB_REQ_INACTIVE_TILER_MASK)) ++#define GLB_REQ_PROTM_EXIT_SHIFT 23 ++#define GLB_REQ_PROTM_EXIT_MASK (0x1 << GLB_REQ_PROTM_EXIT_SHIFT) ++#define GLB_REQ_PROTM_EXIT_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_EXIT_MASK) >> GLB_REQ_PROTM_EXIT_SHIFT) ++#define GLB_REQ_PROTM_EXIT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_PROTM_EXIT_MASK) | (((value) << GLB_REQ_PROTM_EXIT_SHIFT) & GLB_REQ_PROTM_EXIT_MASK)) ++#define GLB_REQ_PRFCNT_THRESHOLD_SHIFT 24 ++#define GLB_REQ_PRFCNT_THRESHOLD_MASK (0x1 << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) ++#define GLB_REQ_PRFCNT_THRESHOLD_GET(reg_val) \ ++ (((reg_val)&GLB_REQ_PRFCNT_THRESHOLD_MASK) >> \ ++ GLB_REQ_PRFCNT_THRESHOLD_SHIFT) ++#define GLB_REQ_PRFCNT_THRESHOLD_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_PRFCNT_THRESHOLD_MASK) | \ ++ (((value) << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) & \ ++ GLB_REQ_PRFCNT_THRESHOLD_MASK)) ++#define GLB_REQ_PRFCNT_OVERFLOW_SHIFT 25 ++#define GLB_REQ_PRFCNT_OVERFLOW_MASK (0x1 << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) ++#define GLB_REQ_PRFCNT_OVERFLOW_GET(reg_val) \ ++ (((reg_val)&GLB_REQ_PRFCNT_OVERFLOW_MASK) >> \ ++ GLB_REQ_PRFCNT_OVERFLOW_SHIFT) ++#define GLB_REQ_PRFCNT_OVERFLOW_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_PRFCNT_OVERFLOW_MASK) | \ ++ (((value) << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) & \ ++ GLB_REQ_PRFCNT_OVERFLOW_MASK)) ++#define GLB_REQ_DEBUG_CSF_REQ_SHIFT 30 ++#define GLB_REQ_DEBUG_CSF_REQ_MASK (0x1 << GLB_REQ_DEBUG_CSF_REQ_SHIFT) ++#define GLB_REQ_DEBUG_CSF_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_CSF_REQ_MASK) >> GLB_REQ_DEBUG_CSF_REQ_SHIFT) ++#define GLB_REQ_DEBUG_CSF_REQ_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_DEBUG_CSF_REQ_MASK) | \ ++ (((value) << GLB_REQ_DEBUG_CSF_REQ_SHIFT) & GLB_REQ_DEBUG_CSF_REQ_MASK)) ++#define GLB_REQ_DEBUG_HOST_REQ_SHIFT 31 ++#define GLB_REQ_DEBUG_HOST_REQ_MASK (0x1 << GLB_REQ_DEBUG_HOST_REQ_SHIFT) ++#define GLB_REQ_DEBUG_HOST_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_HOST_REQ_MASK) >> GLB_REQ_DEBUG_HOST_REQ_SHIFT) ++#define GLB_REQ_DEBUG_HOST_REQ_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_DEBUG_HOST_REQ_MASK) | \ ++ (((value) << GLB_REQ_DEBUG_HOST_REQ_SHIFT) & GLB_REQ_DEBUG_HOST_REQ_MASK)) + -+ if (check_sync_update_for_on_slot_group(group)) { -+ scheduler_wakeup(kbdev, true); -+ return; -+ } -+ } -+} ++/* GLB_ACK_IRQ_MASK register */ ++#define GLB_ACK_IRQ_MASK_HALT_SHIFT 0 ++#define GLB_ACK_IRQ_MASK_HALT_MASK (0x1 << GLB_ACK_IRQ_MASK_HALT_SHIFT) ++#define GLB_ACK_IRQ_MASK_HALT_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_HALT_MASK) >> GLB_ACK_IRQ_MASK_HALT_SHIFT) ++#define GLB_ACK_IRQ_MASK_HALT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_HALT_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_HALT_SHIFT) & GLB_ACK_IRQ_MASK_HALT_MASK)) ++#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT 1 ++#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) ++#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) ++#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK)) ++#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT 2 ++#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) ++#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) >> GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) ++#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK)) ++#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT 3 ++#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) ++#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) ++#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK)) ++#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT 4 ++#define GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) ++#define GLB_ACK_IRQ_MASK_PROTM_ENTER_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) >> GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) ++#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK)) ++#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT 5 ++#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) ++#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) ++#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK)) ++#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT 6 ++#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) ++#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) ++#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK)) ++#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT 7 ++#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) ++#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) ++#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK)) ++#define GLB_ACK_IRQ_MASK_PING_SHIFT 8 ++#define GLB_ACK_IRQ_MASK_PING_MASK (0x1 << GLB_ACK_IRQ_MASK_PING_SHIFT) ++#define GLB_ACK_IRQ_MASK_PING_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_PING_MASK) >> GLB_ACK_IRQ_MASK_PING_SHIFT) ++#define GLB_ACK_IRQ_MASK_PING_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_PING_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_PING_SHIFT) & GLB_ACK_IRQ_MASK_PING_MASK)) ++#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT 9 ++#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK \ ++ (0x1 << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) ++#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) >> \ ++ GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) ++#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) & \ ++ GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK)) ++#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT 20 ++#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) ++#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) ++#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK)) ++#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT 21 ++#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) ++#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) ++#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK)) ++#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT 22 ++#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) ++#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) ++#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK)) ++#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT 23 ++#define GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) ++#define GLB_ACK_IRQ_MASK_PROTM_EXIT_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) >> GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) ++#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK)) ++#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT 24 ++#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK \ ++ (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) ++#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) >> \ ++ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) ++#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) & \ ++ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK)) ++#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT 25 ++#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK \ ++ (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) ++#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) >> \ ++ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) ++#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) & \ ++ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK)) ++#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT 30 ++#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) ++#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) ++#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK)) ++#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT 31 ++#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) ++#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) ++#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK)) + -+/** -+ * check_group_sync_update_worker() - Check the sync wait condition for all the -+ * blocked queue groups -+ * -+ * @work: Pointer to the context-specific work item for evaluating the wait -+ * condition for all the queue groups in idle_wait_groups list. -+ * -+ * This function checks the gpu queues of all the groups present in both -+ * idle_wait_groups list of a context and all on slot idle groups (if GPU -+ * is in protected mode). -+ * If the sync wait condition for at least one queue bound to the group has -+ * been satisfied then the group is moved to the per context list of -+ * runnable groups so that Scheduler can consider scheduling the group -+ * in next tick or exit protected mode. -+ */ -+static void check_group_sync_update_worker(struct work_struct *work) -+{ -+ struct kbase_context *const kctx = container_of(work, -+ struct kbase_context, csf.sched.sync_update_work); -+ struct kbase_device *const kbdev = kctx->kbdev; -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ bool sync_updated = false; ++/* GLB_PROGRESS_TIMER register */ ++#define GLB_PROGRESS_TIMER_TIMEOUT_SHIFT 0 ++#define GLB_PROGRESS_TIMER_TIMEOUT_MASK (0xFFFFFFFF << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) ++#define GLB_PROGRESS_TIMER_TIMEOUT_GET(reg_val) \ ++ (((reg_val)&GLB_PROGRESS_TIMER_TIMEOUT_MASK) >> GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) ++#define GLB_PROGRESS_TIMER_TIMEOUT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_PROGRESS_TIMER_TIMEOUT_MASK) | \ ++ (((value) << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) & GLB_PROGRESS_TIMER_TIMEOUT_MASK)) + -+ mutex_lock(&scheduler->lock); ++/* GLB_PWROFF_TIMER register */ ++#define GLB_PWROFF_TIMER_TIMEOUT_SHIFT 0 ++#define GLB_PWROFF_TIMER_TIMEOUT_MASK (0x7FFFFFFF << GLB_PWROFF_TIMER_TIMEOUT_SHIFT) ++#define GLB_PWROFF_TIMER_TIMEOUT_GET(reg_val) \ ++ (((reg_val)&GLB_PWROFF_TIMER_TIMEOUT_MASK) >> GLB_PWROFF_TIMER_TIMEOUT_SHIFT) ++#define GLB_PWROFF_TIMER_TIMEOUT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_PWROFF_TIMER_TIMEOUT_MASK) | \ ++ (((value) << GLB_PWROFF_TIMER_TIMEOUT_SHIFT) & GLB_PWROFF_TIMER_TIMEOUT_MASK)) ++#define GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT 31 ++#define GLB_PWROFF_TIMER_TIMER_SOURCE_MASK (0x1 << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) ++#define GLB_PWROFF_TIMER_TIMER_SOURCE_GET(reg_val) \ ++ (((reg_val)&GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) >> GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) ++#define GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) | \ ++ (((value) << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) & GLB_PWROFF_TIMER_TIMER_SOURCE_MASK)) ++/* GLB_PWROFF_TIMER_TIMER_SOURCE values */ ++#define GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0 ++#define GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1 ++/* End of GLB_PWROFF_TIMER_TIMER_SOURCE values */ + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ if (unlikely(scheduler->state == SCHED_BUSY)) { -+ queue_work(kctx->csf.sched.sync_update_wq, -+ &kctx->csf.sched.sync_update_work); -+ mutex_unlock(&scheduler->lock); -+ return; -+ } -+#endif ++/* GLB_ALLOC_EN register */ ++#define GLB_ALLOC_EN_MASK_SHIFT 0 ++#define GLB_ALLOC_EN_MASK_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << GLB_ALLOC_EN_MASK_SHIFT) ++#define GLB_ALLOC_EN_MASK_GET(reg_val) (((reg_val)&GLB_ALLOC_EN_MASK_MASK) >> GLB_ALLOC_EN_MASK_SHIFT) ++#define GLB_ALLOC_EN_MASK_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ALLOC_EN_MASK_MASK) | (((value) << GLB_ALLOC_EN_MASK_SHIFT) & GLB_ALLOC_EN_MASK_MASK)) + -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START, kctx, 0u); -+ if (kctx->csf.sched.num_idle_wait_grps != 0) { -+ struct kbase_queue_group *group, *temp; ++/* GLB_OUTPUT_BLOCK register set definitions */ + -+ list_for_each_entry_safe(group, temp, -+ &kctx->csf.sched.idle_wait_groups, link) { -+ if (group_sync_updated(group)) { -+ sync_updated = true; -+ /* Move this group back in to the runnable -+ * groups list of the context. -+ */ -+ update_idle_suspended_group_state(group); -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u); -+ } -+ } -+ } else { -+ WARN_ON(!list_empty(&kctx->csf.sched.idle_wait_groups)); -+ } ++/* GLB_ACK register */ ++#define GLB_ACK_CFG_PROGRESS_TIMER_SHIFT 1 ++#define GLB_ACK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) ++#define GLB_ACK_CFG_PROGRESS_TIMER_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) ++#define GLB_ACK_CFG_PROGRESS_TIMER_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_CFG_PROGRESS_TIMER_MASK) | \ ++ (((value) << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_CFG_PROGRESS_TIMER_MASK)) ++#define GLB_ACK_CFG_ALLOC_EN_SHIFT 2 ++#define GLB_ACK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_CFG_ALLOC_EN_SHIFT) ++#define GLB_ACK_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_ACK_CFG_ALLOC_EN_MASK) >> GLB_ACK_CFG_ALLOC_EN_SHIFT) ++#define GLB_ACK_CFG_ALLOC_EN_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_CFG_ALLOC_EN_MASK) | (((value) << GLB_ACK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_CFG_ALLOC_EN_MASK)) ++/* End of GLB_OUTPUT_BLOCK register set definitions */ + -+ if (check_sync_update_for_idle_groups_protm(kbdev)) { -+ scheduler_force_protm_exit(kbdev); -+ sync_updated = true; -+ } ++/* The following register and fields are for headers before 10.x.7/11.x.4 */ ++#define GLB_REQ_IDLE_ENABLE_SHIFT (10) ++#define GLB_REQ_REQ_IDLE_ENABLE (1 << GLB_REQ_IDLE_ENABLE_SHIFT) ++#define GLB_REQ_REQ_IDLE_DISABLE (0 << GLB_REQ_IDLE_ENABLE_SHIFT) ++#define GLB_REQ_IDLE_ENABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT) ++#define GLB_REQ_IDLE_DISABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT) ++#define GLB_REQ_IDLE_EVENT_SHIFT (26) ++#define GLB_REQ_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT) ++#define GLB_ACK_IDLE_ENABLE_SHIFT (10) ++#define GLB_ACK_ACK_IDLE_ENABLE (1 << GLB_ACK_IDLE_ENABLE_SHIFT) ++#define GLB_ACK_ACK_IDLE_DISABLE (0 << GLB_ACK_IDLE_ENABLE_SHIFT) ++#define GLB_ACK_IDLE_ENABLE_MASK (0x1 << GLB_ACK_IDLE_ENABLE_SHIFT) ++#define GLB_ACK_IDLE_EVENT_SHIFT (26) ++#define GLB_ACK_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT) + -+ /* If scheduler is in sleep or suspended state, re-activate it -+ * to serve on-slot CSGs blocked on CQS which has been signaled. -+ */ -+ if (!sync_updated && (scheduler->state == SCHED_SLEEPING)) -+ check_sync_update_in_sleep_mode(kbdev); ++#define GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT (26) ++#define GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK (0x1 << GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT) + -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u); ++#define GLB_ACK_IRQ_MASK_IDLE_ENABLE_SHIFT GPU_U(10) ++#define GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK (GPU_U(0x1) << GLB_ACK_IRQ_MASK_IDLE_ENABLE_SHIFT) + -+ mutex_unlock(&scheduler->lock); -+} ++#define GLB_IDLE_TIMER (0x0080) ++/* GLB_IDLE_TIMER register */ ++#define GLB_IDLE_TIMER_TIMEOUT_SHIFT (0) ++#define GLB_IDLE_TIMER_TIMEOUT_MASK ((0x7FFFFFFF) << GLB_IDLE_TIMER_TIMEOUT_SHIFT) ++#define GLB_IDLE_TIMER_TIMEOUT_GET(reg_val) (((reg_val)&GLB_IDLE_TIMER_TIMEOUT_MASK) >> GLB_IDLE_TIMER_TIMEOUT_SHIFT) ++#define GLB_IDLE_TIMER_TIMEOUT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_IDLE_TIMER_TIMEOUT_MASK) | \ ++ (((value) << GLB_IDLE_TIMER_TIMEOUT_SHIFT) & GLB_IDLE_TIMER_TIMEOUT_MASK)) ++#define GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT (31) ++#define GLB_IDLE_TIMER_TIMER_SOURCE_MASK ((0x1) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) ++#define GLB_IDLE_TIMER_TIMER_SOURCE_GET(reg_val) \ ++ (((reg_val)&GLB_IDLE_TIMER_TIMER_SOURCE_MASK) >> GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) ++#define GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_IDLE_TIMER_TIMER_SOURCE_MASK) | \ ++ (((value) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) & GLB_IDLE_TIMER_TIMER_SOURCE_MASK)) ++/* GLB_IDLE_TIMER_TIMER_SOURCE values */ ++#define GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0 ++#define GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1 ++/* End of GLB_IDLE_TIMER_TIMER_SOURCE values */ + -+static -+enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param) -+{ -+ struct kbase_context *const kctx = param; ++/* GLB_INSTR_FEATURES register */ ++#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT (0) ++#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK ((u32)0xF << GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) ++#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(reg_val) \ ++ (((reg_val)&GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK) >> GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) ++#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK) | \ ++ (((value) << GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) & GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK)) ++#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT (4) ++#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK ((u32)0xF << GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT) ++#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_GET(reg_val) \ ++ (((reg_val)&GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK) >> GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT) ++#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK) | \ ++ (((value) << GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT) & GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK)) + -+ KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_GROUP_SYNC_UPDATE_EVENT, kctx, 0u); ++#define CSG_STATUS_STATE (0x0018) /* CSG state status register */ ++/* CSG_STATUS_STATE register */ ++#define CSG_STATUS_STATE_IDLE_SHIFT (0) ++#define CSG_STATUS_STATE_IDLE_MASK ((0x1) << CSG_STATUS_STATE_IDLE_SHIFT) ++#define CSG_STATUS_STATE_IDLE_GET(reg_val) \ ++ (((reg_val)&CSG_STATUS_STATE_IDLE_MASK) >> CSG_STATUS_STATE_IDLE_SHIFT) ++#define CSG_STATUS_STATE_IDLE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_STATUS_STATE_IDLE_MASK) | \ ++ (((value) << CSG_STATUS_STATE_IDLE_SHIFT) & CSG_STATUS_STATE_IDLE_MASK)) + -+ queue_work(kctx->csf.sched.sync_update_wq, -+ &kctx->csf.sched.sync_update_work); ++/* GLB_FEATURES_ITER_TRACE_SUPPORTED register */ ++#define GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT GPU_U(4) ++#define GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK \ ++ (GPU_U(0x1) << GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT) ++#define GLB_FEATURES_ITER_TRACE_SUPPORTED_GET(reg_val) \ ++ (((reg_val)&GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) >> \ ++ GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT) ++#define GLB_FEATURES_ITER_TRACE_SUPPORTED_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) | \ ++ (((value) << GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT) & \ ++ GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK)) + -+ return KBASE_CSF_EVENT_CALLBACK_KEEP; -+} ++/* GLB_REQ_ITER_TRACE_ENABLE register */ ++#define GLB_REQ_ITER_TRACE_ENABLE_SHIFT GPU_U(11) ++#define GLB_REQ_ITER_TRACE_ENABLE_MASK \ ++ (GPU_U(0x1) << GLB_REQ_ITER_TRACE_ENABLE_SHIFT) ++#define GLB_REQ_ITER_TRACE_ENABLE_GET(reg_val) \ ++ (((reg_val)&GLB_REQ_ITER_TRACE_ENABLE_MASK) >> \ ++ GLB_REQ_ITER_TRACE_ENABLE_SHIFT) ++#define GLB_REQ_ITER_TRACE_ENABLE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_ITER_TRACE_ENABLE_MASK) | \ ++ (((value) << GLB_REQ_ITER_TRACE_ENABLE_SHIFT) & \ ++ GLB_REQ_ITER_TRACE_ENABLE_MASK)) + -+int kbase_csf_scheduler_context_init(struct kbase_context *kctx) -+{ -+ int priority; -+ int err; ++/* GLB_PRFCNT_CONFIG register */ ++#define GLB_PRFCNT_CONFIG_SIZE_SHIFT (0) ++#define GLB_PRFCNT_CONFIG_SIZE_MASK (0xFF << GLB_PRFCNT_CONFIG_SIZE_SHIFT) ++#define GLB_PRFCNT_CONFIG_SIZE_GET(reg_val) \ ++ (((reg_val)&GLB_PRFCNT_CONFIG_SIZE_MASK) >> GLB_PRFCNT_CONFIG_SIZE_SHIFT) ++#define GLB_PRFCNT_CONFIG_SIZE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_PRFCNT_CONFIG_SIZE_MASK) | \ ++ (((value) << GLB_PRFCNT_CONFIG_SIZE_SHIFT) & GLB_PRFCNT_CONFIG_SIZE_MASK)) ++#define GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT GPU_U(8) ++#define GLB_PRFCNT_CONFIG_SET_SELECT_MASK (GPU_U(0x3) << GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT) ++#define GLB_PRFCNT_CONFIG_SET_SELECT_GET(reg_val) \ ++ (((reg_val)&GLB_PRFCNT_CONFIG_SET_SELECT_MASK) >> GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT) ++#define GLB_PRFCNT_CONFIG_SET_SELECT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_PRFCNT_CONFIG_SET_SELECT_MASK) | \ ++ (((value) << GLB_PRFCNT_CONFIG_SET_SELECT_SHIFT) & GLB_PRFCNT_CONFIG_SET_SELECT_MASK)) + -+ kbase_ctx_sched_init_ctx(kctx); ++/* GLB_PRFCNT_SIZE register */ ++#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET_MOD(value) ((value) >> 8) ++#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET_MOD(value) ((value) << 8) ++#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT GPU_U(0) ++#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK (GPU_U(0xFFFF) << GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT) ++#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET(reg_val) \ ++ (GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET_MOD(((reg_val)&GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK) >> \ ++ GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT)) ++#define GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK) | \ ++ ((GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT) & \ ++ GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK)) ++#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) ((value) >> 8) ++#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET_MOD(value) ((value) << 8) ++#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT GPU_U(16) ++#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK (GPU_U(0xFFFF) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT) ++#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(reg_val) \ ++ (GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET_MOD(((reg_val)&GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK) >> \ ++ GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT)) ++#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK) | \ ++ ((GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT) & \ ++ GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK)) + -+ for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT; -+ ++priority) { -+ INIT_LIST_HEAD(&kctx->csf.sched.runnable_groups[priority]); -+ } ++/* GLB_DEBUG_REQ register */ ++#define GLB_DEBUG_REQ_DEBUG_RUN_SHIFT GPU_U(23) ++#define GLB_DEBUG_REQ_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) ++#define GLB_DEBUG_REQ_DEBUG_RUN_GET(reg_val) \ ++ (((reg_val)&GLB_DEBUG_REQ_DEBUG_RUN_MASK) >> GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) ++#define GLB_DEBUG_REQ_DEBUG_RUN_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_DEBUG_REQ_DEBUG_RUN_MASK) | \ ++ (((value) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) & GLB_DEBUG_REQ_DEBUG_RUN_MASK)) + -+ kctx->csf.sched.num_runnable_grps = 0; -+ INIT_LIST_HEAD(&kctx->csf.sched.idle_wait_groups); -+ kctx->csf.sched.num_idle_wait_grps = 0; -+ kctx->csf.sched.ngrp_to_schedule = 0; ++#define GLB_DEBUG_REQ_RUN_MODE_SHIFT GPU_U(24) ++#define GLB_DEBUG_REQ_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) ++#define GLB_DEBUG_REQ_RUN_MODE_GET(reg_val) \ ++ (((reg_val)&GLB_DEBUG_REQ_RUN_MODE_MASK) >> GLB_DEBUG_REQ_RUN_MODE_SHIFT) ++#define GLB_DEBUG_REQ_RUN_MODE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_DEBUG_REQ_RUN_MODE_MASK) | \ ++ (((value) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) & GLB_DEBUG_REQ_RUN_MODE_MASK)) + -+ kctx->csf.sched.sync_update_wq = -+ alloc_ordered_workqueue("mali_kbase_csf_sync_update_wq", -+ WQ_HIGHPRI); -+ if (!kctx->csf.sched.sync_update_wq) { -+ dev_err(kctx->kbdev->dev, -+ "Failed to initialize scheduler context workqueue"); -+ err = -ENOMEM; -+ goto alloc_wq_failed; -+ } ++/* GLB_DEBUG_ACK register */ ++#define GLB_DEBUG_ACK_DEBUG_RUN_SHIFT GPU_U(23) ++#define GLB_DEBUG_ACK_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) ++#define GLB_DEBUG_ACK_DEBUG_RUN_GET(reg_val) \ ++ (((reg_val)&GLB_DEBUG_ACK_DEBUG_RUN_MASK) >> GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) ++#define GLB_DEBUG_ACK_DEBUG_RUN_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_DEBUG_ACK_DEBUG_RUN_MASK) | \ ++ (((value) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) & GLB_DEBUG_ACK_DEBUG_RUN_MASK)) + -+ INIT_WORK(&kctx->csf.sched.sync_update_work, -+ check_group_sync_update_worker); ++#define GLB_DEBUG_ACK_RUN_MODE_SHIFT GPU_U(24) ++#define GLB_DEBUG_ACK_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_ACK_RUN_MODE_SHIFT) ++#define GLB_DEBUG_ACK_RUN_MODE_GET(reg_val) \ ++ (((reg_val)&GLB_DEBUG_ACK_RUN_MODE_MASK) >> GLB_DEBUG_ACK_RUN_MODE_SHIFT) ++#define GLB_DEBUG_ACK_RUN_MODE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_DEBUG_ACK_RUN_MODE_MASK) | \ ++ (((value) << GLB_DEBUG_ACK_RUN_MODE_SHIFT) & GLB_DEBUG_ACK_RUN_MODE_MASK)) + -+ kbase_csf_tiler_heap_reclaim_ctx_init(kctx); ++/* RUN_MODE values */ ++#define GLB_DEBUG_RUN_MODE_TYPE_NOP 0x0 ++#define GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP 0x1 ++/* End of RUN_MODE values */ + -+ err = kbase_csf_event_wait_add(kctx, check_group_sync_update_cb, kctx); ++#endif /* _KBASE_CSF_REGISTERS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c +new file mode 100644 +index 000000000..fe3b91a48 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c +@@ -0,0 +1,637 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ if (err) { -+ dev_err(kctx->kbdev->dev, -+ "Failed to register a sync update callback"); -+ goto event_wait_add_failed; -+ } ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ return err; ++enum kbasep_soft_reset_status { ++ RESET_SUCCESS = 0, ++ SOFT_RESET_FAILED, ++ L2_ON_FAILED, ++ MCU_REINIT_FAILED ++}; + -+event_wait_add_failed: -+ destroy_workqueue(kctx->csf.sched.sync_update_wq); -+alloc_wq_failed: -+ kbase_ctx_sched_remove_ctx(kctx); -+ return err; ++static inline bool ++kbase_csf_reset_state_is_silent(enum kbase_csf_reset_gpu_state state) ++{ ++ return (state == KBASE_CSF_RESET_GPU_COMMITTED_SILENT); +} + -+void kbase_csf_scheduler_context_term(struct kbase_context *kctx) ++static inline bool ++kbase_csf_reset_state_is_committed(enum kbase_csf_reset_gpu_state state) +{ -+ kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx); -+ cancel_work_sync(&kctx->csf.sched.sync_update_work); -+ destroy_workqueue(kctx->csf.sched.sync_update_wq); -+ -+ kbase_ctx_sched_remove_ctx(kctx); ++ return (state == KBASE_CSF_RESET_GPU_COMMITTED || ++ state == KBASE_CSF_RESET_GPU_COMMITTED_SILENT); +} + -+int kbase_csf_scheduler_init(struct kbase_device *kbdev) ++static inline bool ++kbase_csf_reset_state_is_active(enum kbase_csf_reset_gpu_state state) +{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ u32 num_groups = kbdev->csf.global_iface.group_num; ++ return (state == KBASE_CSF_RESET_GPU_HAPPENING); ++} + -+ bitmap_zero(scheduler->csg_inuse_bitmap, num_groups); -+ bitmap_zero(scheduler->csg_slots_idle_mask, num_groups); ++/** ++ * DOC: Mechanism for coherent access to the HW with respect to GPU reset ++ * ++ * Access to the HW from non-atomic context outside of the reset thread must ++ * use kbase_reset_gpu_prevent_and_wait() / kbase_reset_gpu_try_prevent(). ++ * ++ * This currently works by taking the &kbase_device's csf.reset.sem, for ++ * 'write' access by the GPU reset thread and 'read' access by every other ++ * thread. The use of this rw_semaphore means: ++ * ++ * - there will be mutual exclusion (and thus waiting) between the thread doing ++ * reset ('writer') and threads trying to access the GPU for 'normal' ++ * operations ('readers') ++ * ++ * - multiple threads may prevent reset from happening without serializing each ++ * other prematurely. Note that at present the wait for reset to finish has ++ * to be done higher up in the driver than actual GPU access, at a point ++ * where it won't cause lock ordering issues. At such a point, some paths may ++ * actually lead to no GPU access, but we would prefer to avoid serializing ++ * at that level ++ * ++ * - lockdep (if enabled in the kernel) will check such uses for deadlock ++ * ++ * If instead &kbase_device's csf.reset.wait &wait_queue_head_t were used on ++ * its own, we'd also need to add a &lockdep_map and appropriate lockdep calls ++ * to make use of lockdep checking in all places where the &wait_queue_head_t ++ * is waited upon or signaled. ++ * ++ * Indeed places where we wait on &kbase_device's csf.reset.wait (such as ++ * kbase_reset_gpu_wait()) are the only places where we need extra call(s) to ++ * lockdep, and they are made on the existing rw_semaphore. ++ * ++ * For non-atomic access, the &kbase_device's csf.reset.state member should be ++ * checked instead, such as by using kbase_reset_gpu_is_active(). ++ * ++ * Ideally the &rw_semaphore should be replaced in future with a single mutex ++ * that protects any access to the GPU, via reset or otherwise. ++ */ + -+ scheduler->csg_slots = kcalloc(num_groups, -+ sizeof(*scheduler->csg_slots), GFP_KERNEL); -+ if (!scheduler->csg_slots) { -+ dev_err(kbdev->dev, -+ "Failed to allocate memory for csg slot status array\n"); ++int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev) ++{ ++ down_read(&kbdev->csf.reset.sem); ++ ++ if (atomic_read(&kbdev->csf.reset.state) == ++ KBASE_CSF_RESET_GPU_FAILED) { ++ up_read(&kbdev->csf.reset.sem); + return -ENOMEM; + } + -+ return kbase_csf_mcu_shared_regs_data_init(kbdev); ++ if (WARN_ON(kbase_reset_gpu_is_active(kbdev))) { ++ up_read(&kbdev->csf.reset.sem); ++ return -EFAULT; ++ } ++ ++ return 0; +} ++KBASE_EXPORT_TEST_API(kbase_reset_gpu_prevent_and_wait); + -+int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) ++int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev) +{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ -+ scheduler->timer_enabled = true; ++ if (!down_read_trylock(&kbdev->csf.reset.sem)) ++ return -EAGAIN; + -+ scheduler->wq = alloc_ordered_workqueue("csf_scheduler_wq", WQ_HIGHPRI); -+ if (!scheduler->wq) { -+ dev_err(kbdev->dev, "Failed to allocate scheduler workqueue\n"); ++ if (atomic_read(&kbdev->csf.reset.state) == ++ KBASE_CSF_RESET_GPU_FAILED) { ++ up_read(&kbdev->csf.reset.sem); + return -ENOMEM; + } -+ scheduler->idle_wq = alloc_ordered_workqueue( -+ "csf_scheduler_gpu_idle_wq", WQ_HIGHPRI); -+ if (!scheduler->idle_wq) { -+ dev_err(kbdev->dev, -+ "Failed to allocate GPU idle scheduler workqueue\n"); -+ destroy_workqueue(kbdev->csf.scheduler.wq); -+ return -ENOMEM; ++ ++ if (WARN_ON(kbase_reset_gpu_is_active(kbdev))) { ++ up_read(&kbdev->csf.reset.sem); ++ return -EFAULT; + } + -+ INIT_WORK(&scheduler->tick_work, schedule_on_tick); -+ INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock); -+ atomic_set(&scheduler->pending_tock_work, false); ++ return 0; ++} + -+ INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor); ++void kbase_reset_gpu_allow(struct kbase_device *kbdev) ++{ ++ up_read(&kbdev->csf.reset.sem); ++} ++KBASE_EXPORT_TEST_API(kbase_reset_gpu_allow); + -+ mutex_init(&scheduler->lock); -+ spin_lock_init(&scheduler->interrupt_lock); ++void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev) ++{ ++#if KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE ++ lockdep_assert_held_read(&kbdev->csf.reset.sem); ++#else ++ lockdep_assert_held(&kbdev->csf.reset.sem); ++#endif ++ WARN_ON(kbase_reset_gpu_is_active(kbdev)); ++} + -+ /* Internal lists */ -+ INIT_LIST_HEAD(&scheduler->runnable_kctxs); -+ INIT_LIST_HEAD(&scheduler->groups_to_schedule); -+ INIT_LIST_HEAD(&scheduler->idle_groups_to_schedule); ++void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev) ++{ ++ if (atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_FAILED) ++ return; + -+ BUILD_BUG_ON(MAX_SUPPORTED_CSGS > -+ (sizeof(scheduler->csgs_events_enable_mask) * BITS_PER_BYTE)); -+ bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS); -+ scheduler->state = SCHED_SUSPENDED; -+ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); -+ scheduler->pm_active_count = 0; -+ scheduler->ngrp_to_schedule = 0; -+ scheduler->total_runnable_grps = 0; -+ scheduler->top_ctx = NULL; -+ scheduler->top_grp = NULL; -+ scheduler->last_schedule = 0; -+ scheduler->active_protm_grp = NULL; -+ scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS; -+ scheduler_doorbell_init(kbdev); ++#if KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE ++ lockdep_assert_held_read(&kbdev->csf.reset.sem); ++#else ++ lockdep_assert_held(&kbdev->csf.reset.sem); ++#endif ++ WARN_ON(kbase_reset_gpu_is_active(kbdev)); ++} + -+ INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker); -+ scheduler->fast_gpu_idle_handling = false; -+ atomic_set(&scheduler->gpu_no_longer_idle, false); -+ atomic_set(&scheduler->non_idle_offslot_grps, 0); ++/* Mark the reset as now happening, and synchronize with other threads that ++ * might be trying to access the GPU ++ */ ++static void kbase_csf_reset_begin_hw_access_sync( ++ struct kbase_device *kbdev, ++ enum kbase_csf_reset_gpu_state initial_reset_state) ++{ ++ unsigned long hwaccess_lock_flags; ++ unsigned long scheduler_spin_lock_flags; + -+ hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); -+ scheduler->tick_timer.function = tick_timer_callback; -+ scheduler->tick_timer_active = false; ++ /* Note this is a WARN/atomic_set because it is a software issue for a ++ * race to be occurring here ++ */ ++ WARN_ON(!kbase_csf_reset_state_is_committed(initial_reset_state)); + -+ kbase_csf_tiler_heap_reclaim_mgr_init(kbdev); ++ down_write(&kbdev->csf.reset.sem); + -+ return 0; ++ /* Threads in atomic context accessing the HW will hold one of these ++ * locks, so synchronize with them too. ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_lock_flags); ++ kbase_csf_scheduler_spin_lock(kbdev, &scheduler_spin_lock_flags); ++ atomic_set(&kbdev->csf.reset.state, KBASE_RESET_GPU_HAPPENING); ++ kbase_csf_scheduler_spin_unlock(kbdev, scheduler_spin_lock_flags); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_lock_flags); +} + -+void kbase_csf_scheduler_term(struct kbase_device *kbdev) ++/* Mark the reset as finished and allow others threads to once more access the ++ * GPU ++ */ ++static void kbase_csf_reset_end_hw_access(struct kbase_device *kbdev, ++ int err_during_reset, ++ bool firmware_inited) +{ -+ if (kbdev->csf.scheduler.csg_slots) { -+ WARN_ON(atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps)); -+ /* The unload of Driver can take place only when all contexts have -+ * been terminated. The groups that were not terminated by the User -+ * are terminated on context termination. So no CSGs are expected -+ * to be active at the time of Driver unload. -+ */ -+ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev)); -+ flush_work(&kbdev->csf.scheduler.gpu_idle_work); -+ mutex_lock(&kbdev->csf.scheduler.lock); ++ unsigned long hwaccess_lock_flags; ++ unsigned long scheduler_spin_lock_flags; + -+ if (kbdev->csf.scheduler.state != SCHED_SUSPENDED) { -+ unsigned long flags; -+ /* The power policy could prevent the Scheduler from -+ * getting suspended when GPU becomes idle. -+ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ WARN_ON(kbase_pm_idle_groups_sched_suspendable(kbdev)); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ scheduler_suspend(kbdev); -+ } ++ WARN_ON(!kbase_csf_reset_state_is_active( ++ atomic_read(&kbdev->csf.reset.state))); + -+ mutex_unlock(&kbdev->csf.scheduler.lock); -+ cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work); -+ cancel_tick_timer(kbdev); -+ cancel_tick_work(&kbdev->csf.scheduler); -+ cancel_tock_work(&kbdev->csf.scheduler); -+ kfree(kbdev->csf.scheduler.csg_slots); -+ kbdev->csf.scheduler.csg_slots = NULL; ++ /* Once again, we synchronize with atomic context threads accessing the ++ * HW, as otherwise any actions they defer could get lost ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_lock_flags); ++ kbase_csf_scheduler_spin_lock(kbdev, &scheduler_spin_lock_flags); ++ ++ if (!err_during_reset) { ++ atomic_set(&kbdev->csf.reset.state, ++ KBASE_CSF_RESET_GPU_NOT_PENDING); ++ } else { ++ dev_err(kbdev->dev, "Reset failed to complete"); ++ atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_FAILED); + } -+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_TERMINATED, NULL, -+ kbase_csf_scheduler_get_nr_active_csgs(kbdev)); -+ /* Terminating the MCU shared regions, following the release of slots */ -+ kbase_csf_mcu_shared_regs_data_term(kbdev); ++ ++ kbase_csf_scheduler_spin_unlock(kbdev, scheduler_spin_lock_flags); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_lock_flags); ++ ++ /* Invoke the scheduling tick after formally finishing the reset, ++ * otherwise the tick might start too soon and notice that reset ++ * is still in progress. ++ */ ++ up_write(&kbdev->csf.reset.sem); ++ wake_up(&kbdev->csf.reset.wait); ++ ++ if (!err_during_reset && likely(firmware_inited)) ++ kbase_csf_scheduler_enable_tick_timer(kbdev); +} + -+void kbase_csf_scheduler_early_term(struct kbase_device *kbdev) ++static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev) +{ -+ if (kbdev->csf.scheduler.idle_wq) -+ destroy_workqueue(kbdev->csf.scheduler.idle_wq); -+ if (kbdev->csf.scheduler.wq) -+ destroy_workqueue(kbdev->csf.scheduler.wq); ++ kbase_io_history_dump(kbdev); + -+ kbase_csf_tiler_heap_reclaim_mgr_term(kbdev); -+ mutex_destroy(&kbdev->csf.scheduler.lock); ++ dev_err(kbdev->dev, "Register state:"); ++ dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x MCU_STATUS=0x%08x", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS))); ++ dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", ++ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)), ++ kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS))); ++ dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)), ++ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)), ++ kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK))); ++ dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1))); ++ dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x TILER_CONFIG=0x%08x", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG))); +} + +/** -+ * scheduler_enable_tick_timer_nolock - Enable the scheduler tick timer. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * -+ * This function will restart the scheduler tick so that regular scheduling can -+ * be resumed without any explicit trigger (like kicking of GPU queues). This -+ * is a variant of kbase_csf_scheduler_enable_tick_timer() that assumes the -+ * CSF scheduler lock to already have been held. ++ * kbase_csf_hwcnt_on_reset_error() - Sets HWCNT to appropriate state in the ++ * event of an error during GPU reset. ++ * @kbdev: Pointer to KBase device + */ -+static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev) ++static void kbase_csf_hwcnt_on_reset_error(struct kbase_device *kbdev) +{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); -+ -+ if (unlikely(!scheduler_timer_is_enabled_nolock(kbdev))) -+ return; ++ unsigned long flags; + -+ WARN_ON((scheduler->state != SCHED_INACTIVE) && -+ (scheduler->state != SCHED_SUSPENDED) && -+ (scheduler->state != SCHED_SLEEPING)); ++ /* Treat this as an unrecoverable error for HWCNT */ ++ kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); + -+ if (scheduler->total_runnable_grps > 0) { -+ enqueue_tick_work(kbdev); -+ dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n"); -+ } else if (scheduler->state != SCHED_SUSPENDED) { -+ enqueue_gpu_idle_work(scheduler); -+ } ++ /* Re-enable counters to ensure matching enable/disable pair. ++ * This might reduce the hwcnt disable count to 0, and therefore ++ * trigger actual re-enabling of hwcnt. ++ * However, as the backend is now in the unrecoverable error state, ++ * re-enabling will immediately fail and put the context into the error ++ * state, preventing the hardware from being touched (which could have ++ * risked a hang). ++ */ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + -+void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev) ++static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_device *kbdev, ++ bool firmware_inited, bool silent) +{ -+ mutex_lock(&kbdev->csf.scheduler.lock); -+ scheduler_enable_tick_timer_nolock(kbdev); -+ mutex_unlock(&kbdev->csf.scheduler.lock); -+} ++ unsigned long flags; ++ int err; ++ enum kbasep_soft_reset_status ret = RESET_SUCCESS; + -+bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ bool enabled; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ spin_lock(&kbdev->mmu_mask_change); ++ kbase_pm_reset_start_locked(kbdev); + -+ mutex_lock(&scheduler->lock); -+ enabled = scheduler_timer_is_enabled_nolock(kbdev); -+ mutex_unlock(&scheduler->lock); ++ dev_dbg(kbdev->dev, ++ "We're about to flush out the IRQs and their bottom halves\n"); ++ kbdev->irq_reset_flush = true; + -+ return enabled; -+} ++ /* Disable IRQ to avoid IRQ handlers to kick in after releasing the ++ * spinlock; this also clears any outstanding interrupts ++ */ ++ kbase_pm_disable_interrupts_nolock(kbdev); + -+void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev, -+ bool enable) -+{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ bool currently_enabled; ++ spin_unlock(&kbdev->mmu_mask_change); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ mutex_lock(&scheduler->lock); ++ dev_dbg(kbdev->dev, "Ensure that any IRQ handlers have finished\n"); ++ /* Must be done without any locks IRQ handlers will take. */ ++ kbase_synchronize_irqs(kbdev); + -+ currently_enabled = scheduler_timer_is_enabled_nolock(kbdev); -+ if (currently_enabled && !enable) { -+ scheduler->timer_enabled = false; -+ cancel_tick_timer(kbdev); -+ mutex_unlock(&scheduler->lock); -+ /* The non-sync version to cancel the normal work item is not -+ * available, so need to drop the lock before cancellation. -+ */ -+ cancel_tick_work(scheduler); -+ cancel_tock_work(scheduler); -+ return; -+ } ++ dev_dbg(kbdev->dev, "Flush out any in-flight work items\n"); ++ kbase_flush_mmu_wqs(kbdev); + -+ if (!currently_enabled && enable) { -+ scheduler->timer_enabled = true; ++ dev_dbg(kbdev->dev, ++ "The flush has completed so reset the active indicator\n"); ++ kbdev->irq_reset_flush = false; + -+ scheduler_enable_tick_timer_nolock(kbdev); ++ if (!silent) ++ dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", ++ RESET_TIMEOUT); ++ ++ /* Output the state of some interesting registers to help in the ++ * debugging of GPU resets, and dump the firmware trace buffer ++ */ ++ if (!silent) { ++ kbase_csf_debug_dump_registers(kbdev); ++ if (likely(firmware_inited)) ++ kbase_csf_firmware_log_dump_buffer(kbdev); + } + -+ mutex_unlock(&scheduler->lock); -+} ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_ipa_control_handle_gpu_reset_pre(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+void kbase_csf_scheduler_kick(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ /* Tell hardware counters a reset is about to occur. ++ * If the backend is in an unrecoverable error state (e.g. due to ++ * firmware being unresponsive) this will transition the backend out of ++ * it, on the assumption a reset will fix whatever problem there was. ++ */ ++ kbase_hwcnt_backend_csf_on_before_reset(&kbdev->hwcnt_gpu_iface); + -+ mutex_lock(&scheduler->lock); ++ mutex_lock(&kbdev->pm.lock); ++ /* Reset the GPU */ ++ err = kbase_pm_init_hw(kbdev, 0); + -+ if (unlikely(scheduler_timer_is_enabled_nolock(kbdev))) -+ goto out; ++ mutex_unlock(&kbdev->pm.lock); + -+ if (scheduler->total_runnable_grps > 0) { -+ enqueue_tick_work(kbdev); -+ dev_dbg(kbdev->dev, "Kicking the scheduler manually\n"); ++ if (WARN_ON(err)) ++ return SOFT_RESET_FAILED; ++ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_ctx_sched_restore_all_as(kbdev); ++ kbase_ipa_control_handle_gpu_reset_post(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ ++ kbase_pm_enable_interrupts(kbdev); ++ ++ mutex_lock(&kbdev->pm.lock); ++ kbase_pm_reset_complete(kbdev); ++ /* Synchronously wait for the reload of firmware to complete */ ++ err = kbase_pm_wait_for_desired_state(kbdev); ++ mutex_unlock(&kbdev->pm.lock); ++ ++ if (err) { ++ if (!kbase_pm_l2_is_in_desired_state(kbdev)) ++ ret = L2_ON_FAILED; ++ else if (!kbase_pm_mcu_is_in_desired_state(kbdev)) ++ ret = MCU_REINIT_FAILED; + } + -+out: -+ mutex_unlock(&scheduler->lock); ++ return ret; +} + -+int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev) ++static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, bool firmware_inited, bool silent) +{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ int result = 0; -+ -+ lockdep_assert_held(&scheduler->lock); ++ unsigned long flags; ++ enum kbasep_soft_reset_status ret; + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ if (unlikely(scheduler->state == SCHED_BUSY)) -+ return -EBUSY; ++ WARN_ON(kbdev->irq_reset_flush); ++ /* The reset must now be happening otherwise other threads will not ++ * have been synchronized with to stop their access to the HW ++ */ ++#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE ++ lockdep_assert_held_write(&kbdev->csf.reset.sem); ++#elif KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE ++ lockdep_assert_held_exclusive(&kbdev->csf.reset.sem); ++#else ++ lockdep_assert_held(&kbdev->csf.reset.sem); +#endif ++ WARN_ON(!kbase_reset_gpu_is_active(kbdev)); + -+#ifdef KBASE_PM_RUNTIME -+ /* If scheduler is in sleeping state, then MCU needs to be activated -+ * to suspend CSGs. ++ /* Reset the scheduler state before disabling the interrupts as suspend ++ * of active CSG slots would also be done as a part of reset. + */ -+ if (scheduler->state == SCHED_SLEEPING) { -+ dev_info(kbdev->dev, "Activating MCU out of sleep on system suspend"); -+ result = force_scheduler_to_exit_sleep(kbdev); -+ if (result) { -+ dev_warn(kbdev->dev, "Scheduler failed to exit from sleep"); -+ goto exit; -+ } -+ } -+#endif -+ if (scheduler->state != SCHED_SUSPENDED) { -+ result = suspend_active_groups_on_powerdown(kbdev, true); -+ if (result) { -+ dev_warn(kbdev->dev, "failed to suspend active groups"); -+ goto exit; -+ } else { -+ dev_info(kbdev->dev, "Scheduler PM suspend"); -+ scheduler_suspend(kbdev); -+ cancel_tick_timer(kbdev); ++ if (likely(firmware_inited)) ++ kbase_csf_scheduler_reset(kbdev); ++ cancel_work_sync(&kbdev->csf.firmware_reload_work); ++ ++ dev_dbg(kbdev->dev, "Disable GPU hardware counters.\n"); ++ /* This call will block until counters are disabled. */ ++ kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); ++ ++ ret = kbase_csf_reset_gpu_once(kbdev, firmware_inited, silent); ++ if (ret == SOFT_RESET_FAILED) { ++ dev_err(kbdev->dev, "Soft-reset failed"); ++ goto err; ++ } else if (ret == L2_ON_FAILED) { ++ dev_err(kbdev->dev, "L2 power up failed after the soft-reset"); ++ goto err; ++ } else if (ret == MCU_REINIT_FAILED) { ++ dev_err(kbdev->dev, "MCU re-init failed trying full firmware reload"); ++ /* Since MCU reinit failed despite successful soft reset, we can try ++ * the firmware full reload. ++ */ ++ kbdev->csf.firmware_full_reload_needed = true; ++ ret = kbase_csf_reset_gpu_once(kbdev, firmware_inited, true); ++ if (ret != RESET_SUCCESS) { ++ dev_err(kbdev->dev, ++ "MCU Re-init failed even after trying full firmware reload, ret = [%d]", ++ ret); ++ goto err; + } + } + -+exit: -+ return result; ++ /* Re-enable GPU hardware counters */ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ if (!silent) ++ dev_err(kbdev->dev, "Reset complete"); ++ return 0; ++err: ++ ++ kbase_csf_hwcnt_on_reset_error(kbdev); ++ return -1; +} + -+int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev) ++static void kbase_csf_reset_gpu_worker(struct work_struct *data) +{ -+ int result = 0; -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct kbase_device *kbdev = container_of(data, struct kbase_device, ++ csf.reset.work); ++ bool gpu_sleep_mode_active = false; ++ bool firmware_inited; ++ unsigned long flags; ++ int err = 0; ++ const enum kbase_csf_reset_gpu_state initial_reset_state = ++ atomic_read(&kbdev->csf.reset.state); ++ const bool silent = ++ kbase_csf_reset_state_is_silent(initial_reset_state); + -+ /* Cancel any potential queued delayed work(s) */ -+ cancel_tick_work(scheduler); -+ cancel_tock_work(scheduler); ++ /* Ensure any threads (e.g. executing the CSF scheduler) have finished ++ * using the HW ++ */ ++ kbase_csf_reset_begin_hw_access_sync(kbdev, initial_reset_state); + -+ result = kbase_reset_gpu_prevent_and_wait(kbdev); -+ if (result) { -+ dev_warn(kbdev->dev, "Stop PM suspending for failing to prevent gpu reset.\n"); -+ return result; -+ } ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ firmware_inited = kbdev->csf.firmware_inited; ++#ifdef KBASE_PM_RUNTIME ++ gpu_sleep_mode_active = kbdev->pm.backend.gpu_sleep_mode_active; ++#endif ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ mutex_lock(&scheduler->lock); ++ if (unlikely(gpu_sleep_mode_active)) { ++#ifdef KBASE_PM_RUNTIME ++ /* As prior to GPU reset all on-slot groups are suspended, ++ * need to wake up the MCU from sleep. ++ * No pm active reference is taken here since GPU is in sleep ++ * state and both runtime & system suspend synchronize with the ++ * GPU reset before they wake up the GPU to suspend on-slot ++ * groups. GPUCORE-29850 would add the proper handling. ++ */ ++ kbase_pm_lock(kbdev); ++ if (kbase_pm_force_mcu_wakeup_after_sleep(kbdev)) ++ dev_warn(kbdev->dev, "Wait for MCU wake up failed on GPU reset"); ++ kbase_pm_unlock(kbdev); + -+ result = kbase_csf_scheduler_pm_suspend_no_lock(kbdev); -+ mutex_unlock(&scheduler->lock); ++ err = kbase_csf_reset_gpu_now(kbdev, firmware_inited, silent); ++#endif ++ } else if (!kbase_pm_context_active_handle_suspend(kbdev, ++ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { ++ err = kbase_csf_reset_gpu_now(kbdev, firmware_inited, silent); ++ kbase_pm_context_idle(kbdev); ++ } + -+ kbase_reset_gpu_allow(kbdev); ++ kbase_disjoint_state_down(kbdev); + -+ return result; ++ /* Allow other threads to once again use the GPU */ ++ kbase_csf_reset_end_hw_access(kbdev, err, firmware_inited); +} -+KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_suspend); + -+void kbase_csf_scheduler_pm_resume_no_lock(struct kbase_device *kbdev) ++bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags) +{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ -+ lockdep_assert_held(&scheduler->lock); -+ if ((scheduler->total_runnable_grps > 0) && -+ (scheduler->state == SCHED_SUSPENDED)) { -+ dev_info(kbdev->dev, "Scheduler PM resume"); -+ scheduler_wakeup(kbdev, true); -+ } -+} ++ if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR) ++ kbase_hwcnt_backend_csf_on_unrecoverable_error( ++ &kbdev->hwcnt_gpu_iface); + -+void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev) -+{ -+ mutex_lock(&kbdev->csf.scheduler.lock); ++ if (atomic_cmpxchg(&kbdev->csf.reset.state, ++ KBASE_CSF_RESET_GPU_NOT_PENDING, ++ KBASE_CSF_RESET_GPU_PREPARED) != ++ KBASE_CSF_RESET_GPU_NOT_PENDING) ++ /* Some other thread is already resetting the GPU */ ++ return false; + -+ kbase_csf_scheduler_pm_resume_no_lock(kbdev); -+ mutex_unlock(&kbdev->csf.scheduler.lock); ++ return true; +} -+KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_resume); ++KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu); + -+void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev) ++bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, ++ unsigned int flags) +{ -+ /* Here the lock is taken to synchronize against the runtime suspend -+ * callback function, which may need to wake up the MCU for suspending -+ * the CSGs before powering down the GPU. -+ */ -+ mutex_lock(&kbdev->csf.scheduler.lock); -+ scheduler_pm_active_handle_suspend(kbdev, -+ KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); -+ mutex_unlock(&kbdev->csf.scheduler.lock); -+} -+KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_active); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev) -+{ -+ /* Here the lock is taken just to maintain symmetry with -+ * kbase_csf_scheduler_pm_active(). -+ */ -+ mutex_lock(&kbdev->csf.scheduler.lock); -+ scheduler_pm_idle(kbdev); -+ mutex_unlock(&kbdev->csf.scheduler.lock); ++ return kbase_prepare_to_reset_gpu(kbdev, flags); +} -+KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle); + -+int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev) ++void kbase_reset_gpu(struct kbase_device *kbdev) +{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ unsigned long flags; -+ int err; -+ -+ kbase_pm_lock(kbdev); -+ WARN_ON(!kbdev->pm.active_count); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ WARN_ON(!scheduler->pm_active_count); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ kbase_pm_unlock(kbdev); ++ /* Note this is a WARN/atomic_set because it is a software issue for ++ * a race to be occurring here ++ */ ++ if (WARN_ON(atomic_read(&kbdev->csf.reset.state) != ++ KBASE_RESET_GPU_PREPARED)) ++ return; + -+ kbase_pm_wait_for_poweroff_work_complete(kbdev); ++ atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_COMMITTED); ++ dev_err(kbdev->dev, "Preparing to soft-reset GPU\n"); + -+ err = kbase_pm_wait_for_desired_state(kbdev); -+ if (!err) { -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_ON); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } ++ kbase_disjoint_state_up(kbdev); + -+ return err; ++ queue_work(kbdev->csf.reset.workq, &kbdev->csf.reset.work); +} -+KBASE_EXPORT_TEST_API(kbase_csf_scheduler_wait_mcu_active); ++KBASE_EXPORT_TEST_API(kbase_reset_gpu); + -+#ifdef KBASE_PM_RUNTIME -+int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev) ++void kbase_reset_gpu_locked(struct kbase_device *kbdev) +{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ unsigned long flags; -+ int ret; -+ -+ dev_dbg(kbdev->dev, "Handling runtime suspend"); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ kbase_reset_gpu_assert_prevented(kbdev); -+ lockdep_assert_held(&scheduler->lock); -+ WARN_ON(scheduler->pm_active_count); ++ kbase_reset_gpu(kbdev); ++} + -+ if (scheduler->state == SCHED_SUSPENDED) { -+ WARN_ON(kbdev->pm.backend.gpu_sleep_mode_active); -+ return 0; ++int kbase_reset_gpu_silent(struct kbase_device *kbdev) ++{ ++ if (atomic_cmpxchg(&kbdev->csf.reset.state, ++ KBASE_CSF_RESET_GPU_NOT_PENDING, ++ KBASE_CSF_RESET_GPU_COMMITTED_SILENT) != ++ KBASE_CSF_RESET_GPU_NOT_PENDING) { ++ /* Some other thread is already resetting the GPU */ ++ return -EAGAIN; + } + -+ ret = suspend_active_groups_on_powerdown(kbdev, false); ++ kbase_disjoint_state_up(kbdev); + -+ if (ret) { -+ dev_dbg(kbdev->dev, "Aborting runtime suspend (grps: %d)", -+ atomic_read(&scheduler->non_idle_offslot_grps)); ++ queue_work(kbdev->csf.reset.workq, &kbdev->csf.reset.work); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->pm.backend.exit_gpu_sleep_mode = true; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return 0; ++} ++KBASE_EXPORT_TEST_API(kbase_reset_gpu_silent); + -+ kbase_csf_scheduler_invoke_tick(kbdev); -+ return ret; -+ } ++bool kbase_reset_gpu_is_active(struct kbase_device *kbdev) ++{ ++ enum kbase_csf_reset_gpu_state reset_state = ++ atomic_read(&kbdev->csf.reset.state); + -+ scheduler->state = SCHED_SUSPENDED; -+ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->pm.backend.gpu_sleep_mode_active = false; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* For CSF, the reset is considered active only when the reset worker ++ * is actually executing and other threads would have to wait for it to ++ * complete ++ */ ++ return kbase_csf_reset_state_is_active(reset_state); ++} + -+ wake_up_all(&kbdev->csf.event_wait); -+ return 0; ++bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev) ++{ ++ return atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_NOT_PENDING; +} + -+void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev) ++int kbase_reset_gpu_wait(struct kbase_device *kbdev) +{ -+ u32 csg_nr; ++ const long wait_timeout = ++ kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_GPU_RESET_TIMEOUT)); ++ long remaining; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ /* Inform lockdep we might be trying to wait on a reset (as ++ * would've been done with down_read() - which has no 'timeout' ++ * variant), then use wait_event_timeout() to implement the timed ++ * wait. ++ * ++ * in CONFIG_PROVE_LOCKING builds, this should catch potential 'time ++ * bound' deadlocks such as: ++ * - incorrect lock order with respect to others locks ++ * - current thread has prevented reset ++ * - current thread is executing the reset worker ++ */ ++ might_lock_read(&kbdev->csf.reset.sem); + -+ WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP); ++ remaining = wait_event_timeout( ++ kbdev->csf.reset.wait, ++ (atomic_read(&kbdev->csf.reset.state) == ++ KBASE_CSF_RESET_GPU_NOT_PENDING) || ++ (atomic_read(&kbdev->csf.reset.state) == ++ KBASE_CSF_RESET_GPU_FAILED), ++ wait_timeout); + -+ for (csg_nr = 0; csg_nr < kbdev->csf.global_iface.group_num; csg_nr++) { -+ struct kbase_csf_cmd_stream_group_info *ginfo = -+ &kbdev->csf.global_iface.groups[csg_nr]; -+ bool csg_idle; ++ if (!remaining) { ++ dev_warn(kbdev->dev, "Timed out waiting for the GPU reset to complete"); + -+ if (!kbdev->csf.scheduler.csg_slots[csg_nr].resident_group) -+ continue; + -+ csg_idle = -+ kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & -+ CSG_STATUS_STATE_IDLE_MASK; -+ if (!csg_idle) { -+ dev_dbg(kbdev->dev, -+ "Re-activate Scheduler after MCU sleep"); -+ kbdev->pm.backend.exit_gpu_sleep_mode = true; -+ kbase_csf_scheduler_invoke_tick(kbdev); -+ break; -+ } ++ return -ETIMEDOUT; ++ } else if (atomic_read(&kbdev->csf.reset.state) == ++ KBASE_CSF_RESET_GPU_FAILED) { ++ return -ENOMEM; + } ++ ++ return 0; +} ++KBASE_EXPORT_TEST_API(kbase_reset_gpu_wait); + -+void kbase_csf_scheduler_force_sleep(struct kbase_device *kbdev) ++int kbase_reset_gpu_init(struct kbase_device *kbdev) +{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ kbdev->csf.reset.workq = alloc_workqueue("Mali reset workqueue", 0, 1); ++ if (kbdev->csf.reset.workq == NULL) ++ return -ENOMEM; + -+ mutex_lock(&scheduler->lock); -+ if (kbase_pm_gpu_sleep_allowed(kbdev) && -+ (scheduler->state == SCHED_INACTIVE)) -+ scheduler_sleep_on_idle(kbdev); -+ mutex_unlock(&scheduler->lock); ++ INIT_WORK(&kbdev->csf.reset.work, kbase_csf_reset_gpu_worker); ++ ++ init_waitqueue_head(&kbdev->csf.reset.wait); ++ init_rwsem(&kbdev->csf.reset.sem); ++ ++ return 0; +} -+#endif + -+void kbase_csf_scheduler_force_wakeup(struct kbase_device *kbdev) ++void kbase_reset_gpu_term(struct kbase_device *kbdev) +{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ -+ mutex_lock(&scheduler->lock); -+ scheduler_wakeup(kbdev, true); -+ mutex_unlock(&scheduler->lock); ++ destroy_workqueue(kbdev->csf.reset.workq); +} -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c new file mode 100644 -index 000000000..d22d7c8b9 +index 000000000..edaa6d17e --- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h -@@ -0,0 +1,681 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c +@@ -0,0 +1,6889 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -167796,6771 +170040,6884 @@ index 000000000..d22d7c8b9 + * + */ + -+#ifndef _KBASE_CSF_SCHEDULER_H_ -+#define _KBASE_CSF_SCHEDULER_H_ -+ ++#include ++#include "mali_kbase_config_defaults.h" ++#include ++#include ++#include +#include "mali_kbase_csf.h" -+#include "mali_kbase_csf_event.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include "mali_kbase_csf_tiler_heap_reclaim.h" ++#include "mali_kbase_csf_mcu_shared_reg.h" + -+/** -+ * kbase_csf_scheduler_queue_start() - Enable the running of GPU command queue -+ * on firmware. -+ * -+ * @queue: Pointer to the GPU command queue to be started. -+ * -+ * This function would enable the start of a CSI, within a -+ * CSG, to which the @queue was bound. -+ * If the CSG is already scheduled and resident, the CSI will be started -+ * right away, otherwise once the group is made resident. -+ * -+ * Return: 0 on success, or negative on failure. -EBUSY is returned to -+ * indicate to the caller that queue could not be enabled due to Scheduler -+ * state and the caller can try to enable the queue after sometime. -+ */ -+int kbase_csf_scheduler_queue_start(struct kbase_queue *queue); ++/* Value to indicate that a queue group is not groups_to_schedule list */ ++#define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX) + -+/** -+ * kbase_csf_scheduler_queue_stop() - Disable the running of GPU command queue -+ * on firmware. -+ * -+ * @queue: Pointer to the GPU command queue to be stopped. -+ * -+ * This function would stop the CSI, within a CSG, to which @queue was bound. -+ * -+ * Return: 0 on success, or negative on failure. ++/* This decides the upper limit on the waiting time for the Scheduler ++ * to exit the sleep state. Usually the value of autosuspend_delay is ++ * expected to be around 100 milli seconds. + */ -+int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue); ++#define MAX_AUTO_SUSPEND_DELAY_MS (5000) + -+/** -+ * kbase_csf_scheduler_group_protm_enter - Handle the protm enter event for the -+ * GPU command queue group. -+ * -+ * @group: The command queue group. -+ * -+ * This function could request the firmware to enter the protected mode -+ * and allow the execution of protected region instructions for all the -+ * bound queues of the group that have protm pending bit set in their -+ * respective CS_ACK register. -+ */ -+void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group); ++/* Maximum number of endpoints which may run tiler jobs. */ ++#define CSG_TILER_MAX ((u8)1) + -+/** -+ * kbase_csf_scheduler_group_get_slot() - Checks if a queue group is -+ * programmed on a firmware CSG slot -+ * and returns the slot number. -+ * -+ * @group: The command queue group. -+ * -+ * Return: The slot number, if the group is programmed on a slot. -+ * Otherwise returns a negative number. -+ * -+ * Note: This function should not be used if the interrupt_lock is held. Use -+ * kbase_csf_scheduler_group_get_slot_locked() instead. -+ */ -+int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group); ++/* Maximum dynamic CSG slot priority value */ ++#define MAX_CSG_SLOT_PRIORITY ((u8)15) + -+/** -+ * kbase_csf_scheduler_group_get_slot_locked() - Checks if a queue group is -+ * programmed on a firmware CSG slot -+ * and returns the slot number. -+ * -+ * @group: The command queue group. -+ * -+ * Return: The slot number, if the group is programmed on a slot. -+ * Otherwise returns a negative number. -+ * -+ * Note: Caller must hold the interrupt_lock. -+ */ -+int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group); ++/* CSF scheduler time slice value */ ++#define CSF_SCHEDULER_TIME_TICK_MS (100) /* 100 milliseconds */ + -+/** -+ * kbase_csf_scheduler_group_events_enabled() - Checks if interrupt events -+ * should be handled for a queue group. -+ * -+ * @kbdev: The device of the group. -+ * @group: The queue group. -+ * -+ * Return: true if interrupt events should be handled. -+ * -+ * Note: Caller must hold the interrupt_lock. -+ */ -+bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev, -+ struct kbase_queue_group *group); ++/* A GPU address space slot is reserved for MCU. */ ++#define NUM_RESERVED_AS_SLOTS (1) + -+/** -+ * kbase_csf_scheduler_get_group_on_slot()- Gets the queue group that has been -+ * programmed to a firmware CSG slot. -+ * -+ * @kbdev: The GPU device. -+ * @slot: The slot for which to get the queue group. -+ * -+ * Return: Pointer to the programmed queue group. -+ * -+ * Note: Caller must hold the interrupt_lock. -+ */ -+struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot( -+ struct kbase_device *kbdev, int slot); ++/* Time to wait for completion of PING req before considering MCU as hung */ ++#define FW_PING_AFTER_ERROR_TIMEOUT_MS (10) + -+/** -+ * kbase_csf_scheduler_group_deschedule() - Deschedule a GPU command queue -+ * group from the firmware. -+ * -+ * @group: Pointer to the queue group to be descheduled. -+ * -+ * This function would disable the scheduling of GPU command queue group on -+ * firmware. -+ */ -+void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group); ++/* Explicitly defining this blocked_reason code as SB_WAIT for clarity */ ++#define CS_STATUS_BLOCKED_ON_SB_WAIT CS_STATUS_BLOCKED_REASON_REASON_WAIT + -+/** -+ * kbase_csf_scheduler_evict_ctx_slots() - Evict all GPU command queue groups -+ * of a given context that are active -+ * running from the firmware. -+ * -+ * @kbdev: The GPU device. -+ * @kctx: Kbase context for the evict operation. -+ * @evicted_groups: List_head for returning evicted active queue groups. -+ * -+ * This function would disable the scheduling of GPU command queue groups active -+ * on firmware slots from the given Kbase context. The affected groups are -+ * added to the supplied list_head argument. -+ */ -+void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, -+ struct kbase_context *kctx, struct list_head *evicted_groups); ++static int scheduler_group_schedule(struct kbase_queue_group *group); ++static void remove_group_from_idle_wait(struct kbase_queue_group *const group); ++static ++void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, ++ struct kbase_queue_group *const group, ++ enum kbase_csf_group_state run_state); ++static struct kbase_queue_group *scheduler_get_protm_enter_async_group( ++ struct kbase_device *const kbdev, ++ struct kbase_queue_group *const group); ++static struct kbase_queue_group *get_tock_top_group( ++ struct kbase_csf_scheduler *const scheduler); ++static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev); ++static int suspend_active_queue_groups(struct kbase_device *kbdev, ++ unsigned long *slot_mask); ++static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, ++ bool system_suspend); ++static void schedule_in_cycle(struct kbase_queue_group *group, bool force); ++static bool queue_group_scheduled_locked(struct kbase_queue_group *group); + -+/** -+ * kbase_csf_scheduler_context_init() - Initialize the context-specific part -+ * for CSF scheduler. -+ * -+ * @kctx: Pointer to kbase context that is being created. -+ * -+ * This function must be called during Kbase context creation. -+ * -+ * Return: 0 on success, or negative on failure. -+ */ -+int kbase_csf_scheduler_context_init(struct kbase_context *kctx); ++#define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) + +/** -+ * kbase_csf_scheduler_init - Initialize the CSF scheduler -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * wait_for_dump_complete_on_group_deschedule() - Wait for dump on fault and ++ * scheduling tick/tock to complete before the group deschedule. + * -+ * The scheduler does the arbitration for the CSG slots -+ * provided by the firmware between the GPU command queue groups created -+ * by the Clients. -+ * This function must be called after loading firmware and parsing its capabilities. ++ * @group: Pointer to the group that is being descheduled. + * -+ * Return: 0 on success, or negative on failure. ++ * This function blocks the descheduling of the group until the dump on fault is ++ * completed and scheduling tick/tock has completed. ++ * To deschedule an on slot group CSG termination request would be sent and that ++ * might time out if the fault had occurred and also potentially affect the state ++ * being dumped. Moreover the scheduler lock would be held, so the access to debugfs ++ * files would get blocked. ++ * Scheduler lock and 'kctx->csf.lock' are released before this function starts ++ * to wait. When a request sent by the Scheduler to the FW times out, Scheduler ++ * would also wait for the dumping to complete and release the Scheduler lock ++ * before the wait. Meanwhile Userspace can try to delete the group, this function ++ * would ensure that the group doesn't exit the Scheduler until scheduling ++ * tick/tock has completed. Though very unlikely, group deschedule can be triggered ++ * from multiple threads around the same time and after the wait Userspace thread ++ * can win the race and get the group descheduled and free the memory for group ++ * pointer before the other threads wake up and notice that group has already been ++ * descheduled. To avoid the freeing in such a case, a sort of refcount is used ++ * for the group which is incremented & decremented across the wait. + */ -+int kbase_csf_scheduler_init(struct kbase_device *kbdev); ++static ++void wait_for_dump_complete_on_group_deschedule(struct kbase_queue_group *group) ++{ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ struct kbase_device *kbdev = group->kctx->kbdev; ++ struct kbase_context *kctx = group->kctx; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + -+/** -+ * kbase_csf_scheduler_early_init - Early initialization for the CSF scheduler -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * -+ * Initialize necessary resources such as locks, workqueue for CSF scheduler. -+ * This must be called at kbase probe. -+ * -+ * Return: 0 on success, or negative on failure. -+ */ -+int kbase_csf_scheduler_early_init(struct kbase_device *kbdev); ++ lockdep_assert_held(&kctx->csf.lock); ++ lockdep_assert_held(&scheduler->lock); + -+/** -+ * kbase_csf_scheduler_context_term() - Terminate the context-specific part -+ * for CSF scheduler. -+ * -+ * @kctx: Pointer to kbase context that is being terminated. -+ * -+ * This function must be called during Kbase context termination. -+ */ -+void kbase_csf_scheduler_context_term(struct kbase_context *kctx); ++ if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev))) ++ return; + -+/** -+ * kbase_csf_scheduler_term - Terminate the CSF scheduler. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * -+ * This should be called when unload of firmware is done on device -+ * termination. -+ */ -+void kbase_csf_scheduler_term(struct kbase_device *kbdev); ++ while ((!kbase_debug_csf_fault_dump_complete(kbdev) || ++ (scheduler->state == SCHED_BUSY)) && ++ queue_group_scheduled_locked(group)) { ++ group->deschedule_deferred_cnt++; ++ mutex_unlock(&scheduler->lock); ++ mutex_unlock(&kctx->csf.lock); ++ kbase_debug_csf_fault_wait_completion(kbdev); ++ mutex_lock(&kctx->csf.lock); ++ mutex_lock(&scheduler->lock); ++ group->deschedule_deferred_cnt--; ++ } ++#endif ++} + +/** -+ * kbase_csf_scheduler_early_term - Early termination of the CSF scheduler. ++ * schedule_actions_trigger_df() - Notify the client about the fault and ++ * wait for the dumping to complete. + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @kbdev: Pointer to the device ++ * @kctx: Pointer to the context associated with the CSG slot for which ++ * the timeout was seen. ++ * @error: Error code indicating the type of timeout that occurred. + * -+ * This should be called only when kbase probe fails or gets rmmoded. ++ * This function notifies the Userspace client waiting for the faults and wait ++ * for the Client to complete the dumping. ++ * The function is called only from Scheduling tick/tock when a request sent by ++ * the Scheduler to FW times out or from the protm event work item of the group ++ * when the protected mode entry request times out. ++ * In the latter case there is no wait done as scheduler lock would be released ++ * immediately. In the former case the function waits and releases the scheduler ++ * lock before the wait. It has been ensured that the Scheduler view of the groups ++ * won't change meanwhile, so no group can enter/exit the Scheduler, become ++ * runnable or go off slot. + */ -+void kbase_csf_scheduler_early_term(struct kbase_device *kbdev); ++static void schedule_actions_trigger_df(struct kbase_device *kbdev, ++ struct kbase_context *kctx, enum dumpfault_error_type error) ++{ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + -+/** -+ * kbase_csf_scheduler_reset - Reset the state of all active GPU command -+ * queue groups. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * -+ * This function will first iterate through all the active/scheduled GPU -+ * command queue groups and suspend them (to avoid losing work for groups -+ * that are not stuck). The groups that could not get suspended would be -+ * descheduled and marked as terminated (which will then lead to unbinding -+ * of all the queues bound to them) and also no more work would be allowed -+ * to execute for them. -+ * -+ * This is similar to the action taken in response to an unexpected OoM event. -+ * No explicit re-initialization is done for CSG & CS interface I/O pages; -+ * instead, that happens implicitly on firmware reload. -+ * -+ * Should be called only after initiating the GPU reset. -+ */ -+void kbase_csf_scheduler_reset(struct kbase_device *kbdev); ++ lockdep_assert_held(&scheduler->lock); + -+/** -+ * kbase_csf_scheduler_enable_tick_timer - Enable the scheduler tick timer. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * -+ * This function will restart the scheduler tick so that regular scheduling can -+ * be resumed without any explicit trigger (like kicking of GPU queues). -+ */ -+void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev); ++ if (!kbase_debug_csf_fault_notify(kbdev, kctx, error)) ++ return; + -+/** -+ * kbase_csf_scheduler_group_copy_suspend_buf - Suspend a queue -+ * group and copy suspend buffer. -+ * -+ * @group: Pointer to the queue group to be suspended. -+ * @sus_buf: Pointer to the structure which contains details of the -+ * user buffer and its kernel pinned pages to which we need to copy -+ * the group suspend buffer. -+ * -+ * This function is called to suspend a queue group and copy the suspend_buffer -+ * contents to the input buffer provided. -+ * -+ * Return: 0 on success, or negative on failure. -+ */ -+int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, -+ struct kbase_suspend_copy_buffer *sus_buf); ++ if (unlikely(scheduler->state != SCHED_BUSY)) { ++ WARN_ON(error != DF_PROTECTED_MODE_ENTRY_FAILURE); ++ return; ++ } + -+/** -+ * kbase_csf_scheduler_lock - Acquire the global Scheduler lock. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * -+ * This function will take the global scheduler lock, in order to serialize -+ * against the Scheduler actions, for access to CS IO pages. -+ */ -+static inline void kbase_csf_scheduler_lock(struct kbase_device *kbdev) -+{ -+ mutex_lock(&kbdev->csf.scheduler.lock); ++ mutex_unlock(&scheduler->lock); ++ kbase_debug_csf_fault_wait_completion(kbdev); ++ mutex_lock(&scheduler->lock); ++ WARN_ON(scheduler->state != SCHED_BUSY); ++#endif +} + ++#ifdef KBASE_PM_RUNTIME +/** -+ * kbase_csf_scheduler_unlock - Release the global Scheduler lock. ++ * wait_for_scheduler_to_exit_sleep() - Wait for Scheduler to exit the ++ * sleeping state. + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ */ -+static inline void kbase_csf_scheduler_unlock(struct kbase_device *kbdev) -+{ -+ mutex_unlock(&kbdev->csf.scheduler.lock); -+} -+ -+/** -+ * kbase_csf_scheduler_spin_lock - Acquire Scheduler interrupt spinlock. ++ * @kbdev: Pointer to the device + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @flags: Pointer to the memory location that would store the previous -+ * interrupt state. ++ * This function waits until the Scheduler has exited the sleep state and ++ * it is called when an on-slot group is terminated or when the suspend ++ * buffer of an on-slot group needs to be captured. + * -+ * This function will take the global scheduler lock, in order to serialize -+ * against the Scheduler actions, for access to CS IO pages. ++ * Return: 0 when the wait is successful, otherwise an error code. + */ -+static inline void kbase_csf_scheduler_spin_lock(struct kbase_device *kbdev, -+ unsigned long *flags) ++static int wait_for_scheduler_to_exit_sleep(struct kbase_device *kbdev) +{ -+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, *flags); -+} ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ int autosuspend_delay = kbdev->dev->power.autosuspend_delay; ++ unsigned int sleep_exit_wait_time; ++ long remaining; ++ int ret = 0; + -+/** -+ * kbase_csf_scheduler_spin_unlock - Release Scheduler interrupt spinlock. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @flags: Previously stored interrupt state when Scheduler interrupt -+ * spinlock was acquired. -+ */ -+static inline void kbase_csf_scheduler_spin_unlock(struct kbase_device *kbdev, -+ unsigned long flags) -+{ -+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); -+} ++ lockdep_assert_held(&scheduler->lock); ++ WARN_ON(scheduler->state != SCHED_SLEEPING); + -+/** -+ * kbase_csf_scheduler_spin_lock_assert_held - Assert if the Scheduler -+ * interrupt spinlock is held. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ */ -+static inline void -+kbase_csf_scheduler_spin_lock_assert_held(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); -+} ++ /* No point in waiting if autosuspend_delay value is negative. ++ * For the negative value of autosuspend_delay Driver will directly ++ * go for the suspend of Scheduler, but the autosuspend_delay value ++ * could have been changed after the sleep was initiated. ++ */ ++ if (autosuspend_delay < 0) ++ return -EINVAL; + -+/** -+ * kbase_csf_scheduler_timer_is_enabled() - Check if the scheduler wakes up -+ * automatically for periodic tasks. -+ * -+ * @kbdev: Pointer to the device -+ * -+ * Return: true if the scheduler is configured to wake up periodically -+ */ -+bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev); ++ if (autosuspend_delay > MAX_AUTO_SUSPEND_DELAY_MS) ++ autosuspend_delay = MAX_AUTO_SUSPEND_DELAY_MS; + -+/** -+ * kbase_csf_scheduler_timer_set_enabled() - Enable/disable periodic -+ * scheduler tasks. -+ * -+ * @kbdev: Pointer to the device -+ * @enable: Whether to enable periodic scheduler tasks -+ */ -+void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev, -+ bool enable); ++ /* Usually Scheduler would remain in sleeping state until the ++ * auto-suspend timer expires and all active CSGs are suspended. ++ */ ++ sleep_exit_wait_time = autosuspend_delay + kbdev->reset_timeout_ms; + -+/** -+ * kbase_csf_scheduler_kick - Perform pending scheduling tasks once. -+ * -+ * Note: This function is only effective if the scheduling timer is disabled. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ */ -+void kbase_csf_scheduler_kick(struct kbase_device *kbdev); ++ remaining = kbase_csf_timeout_in_jiffies(sleep_exit_wait_time); + -+/** -+ * kbase_csf_scheduler_protected_mode_in_use() - Check if the scheduler is -+ * running with protected mode tasks. -+ * -+ * @kbdev: Pointer to the device -+ * -+ * Return: true if the scheduler is running with protected mode tasks -+ */ -+static inline bool kbase_csf_scheduler_protected_mode_in_use( -+ struct kbase_device *kbdev) -+{ -+ return (kbdev->csf.scheduler.active_protm_grp != NULL); ++ while ((scheduler->state == SCHED_SLEEPING) && !ret) { ++ mutex_unlock(&scheduler->lock); ++ remaining = wait_event_timeout( ++ kbdev->csf.event_wait, ++ (scheduler->state != SCHED_SLEEPING), ++ remaining); ++ mutex_lock(&scheduler->lock); ++ if (!remaining && (scheduler->state == SCHED_SLEEPING)) ++ ret = -ETIMEDOUT; ++ } ++ ++ return ret; +} + +/** -+ * kbase_csf_scheduler_pm_active - Perform scheduler power active operation ++ * force_scheduler_to_exit_sleep() - Force scheduler to exit sleep state + * -+ * Note: This function will increase the scheduler's internal pm_active_count -+ * value, ensuring that both GPU and MCU are powered for access. The MCU may -+ * not have actually become active when this function returns, so need to -+ * call kbase_csf_scheduler_wait_mcu_active() for that. ++ * @kbdev: Pointer to the device + * -+ * This function should not be called with global scheduler lock held. ++ * This function will force the Scheduler to exit the sleep state by doing the ++ * wake up of MCU and suspension of on-slot groups. It is called at the time of ++ * system suspend. + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * Return: 0 on success. + */ -+void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev); ++static int force_scheduler_to_exit_sleep(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ unsigned long flags; ++ int ret = 0; + -+/** -+ * kbase_csf_scheduler_pm_idle - Perform the scheduler power idle operation -+ * -+ * Note: This function will decrease the scheduler's internal pm_active_count -+ * value. On reaching 0, the MCU and GPU could be powered off. This function -+ * should not be called with global scheduler lock held. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ */ -+void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev); ++ lockdep_assert_held(&scheduler->lock); ++ WARN_ON(scheduler->state != SCHED_SLEEPING); ++ WARN_ON(!kbdev->pm.backend.gpu_sleep_mode_active); + -+/** -+ * kbase_csf_scheduler_wait_mcu_active - Wait for the MCU to actually become active -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * -+ * This function will wait for the MCU to actually become active. It is supposed -+ * to be called after calling kbase_csf_scheduler_pm_active(). It is needed as -+ * kbase_csf_scheduler_pm_active() may not make the MCU active right away. -+ * -+ * Return: 0 if the MCU was successfully activated otherwise an error code. -+ */ -+int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev); ++ kbase_pm_lock(kbdev); ++ ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev); ++ kbase_pm_unlock(kbdev); ++ if (ret) { ++ dev_warn(kbdev->dev, ++ "[%llu] Wait for MCU wake up failed on forced scheduler suspend", ++ kbase_backend_get_cycle_cnt(kbdev)); ++ goto out; ++ } + -+/** -+ * kbase_csf_scheduler_pm_resume_no_lock - Reactivate the scheduler on system resume -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * -+ * This function will make the scheduler resume the scheduling of queue groups -+ * and take the power managemenet reference, if there are any runnable groups. -+ * The caller must have acquired the global Scheduler lock. -+ */ -+void kbase_csf_scheduler_pm_resume_no_lock(struct kbase_device *kbdev); ++ ret = suspend_active_groups_on_powerdown(kbdev, true); ++ if (ret) ++ goto out; + -+/** -+ * kbase_csf_scheduler_pm_resume - Reactivate the scheduler on system resume -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * -+ * This function will make the scheduler resume the scheduling of queue groups -+ * and take the power managemenet reference, if there are any runnable groups. -+ */ -+void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev); ++ kbase_pm_lock(kbdev); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->pm.backend.gpu_sleep_mode_active = false; ++ kbdev->pm.backend.gpu_wakeup_override = false; ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ret = kbase_pm_wait_for_desired_state(kbdev); ++ kbase_pm_unlock(kbdev); ++ if (ret) { ++ dev_warn(kbdev->dev, ++ "[%llu] Wait for pm state change failed on forced scheduler suspend", ++ kbase_backend_get_cycle_cnt(kbdev)); ++ goto out; ++ } + -+/** -+ * kbase_csf_scheduler_pm_suspend_no_lock - Idle the scheduler on system suspend -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * -+ * This function will make the scheduler suspend all the running queue groups -+ * and drop its power managemenet reference. -+ * The caller must have acquired the global Scheduler lock. -+ * -+ * Return: 0 on success. -+ */ -+int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev); ++ scheduler->state = SCHED_SUSPENDED; ++ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); + -+/** -+ * kbase_csf_scheduler_pm_suspend - Idle the scheduler on system suspend -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * -+ * This function will make the scheduler suspend all the running queue groups -+ * and drop its power managemenet reference. -+ * -+ * Return: 0 on success. -+ */ -+int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev); ++ return 0; + -+/** -+ * kbase_csf_scheduler_all_csgs_idle() - Check if the scheduler internal -+ * runtime used slots are all tagged as idle command queue groups. -+ * -+ * @kbdev: Pointer to the device -+ * -+ * Return: true if all the used slots are tagged as idle CSGs. -+ */ -+static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); -+ return bitmap_equal(kbdev->csf.scheduler.csg_slots_idle_mask, -+ kbdev->csf.scheduler.csg_inuse_bitmap, -+ kbdev->csf.global_iface.group_num); ++out: ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->pm.backend.exit_gpu_sleep_mode = true; ++ kbdev->pm.backend.gpu_wakeup_override = false; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ kbase_csf_scheduler_invoke_tick(kbdev); ++ ++ return ret; +} ++#endif + +/** -+ * kbase_csf_scheduler_tick_advance_nolock() - Advance the scheduling tick ++ * tick_timer_callback() - Callback function for the scheduling tick hrtimer + * -+ * @kbdev: Pointer to the device ++ * @timer: Pointer to the scheduling tick hrtimer + * -+ * This function advances the scheduling tick by enqueing the tick work item for -+ * immediate execution, but only if the tick hrtimer is active. If the timer -+ * is inactive then the tick work item is already in flight. -+ * The caller must hold the interrupt lock. ++ * This function will enqueue the scheduling tick work item for immediate ++ * execution, if it has not been queued already. ++ * ++ * Return: enum value to indicate that timer should not be restarted. + */ -+static inline void -+kbase_csf_scheduler_tick_advance_nolock(struct kbase_device *kbdev) ++static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer) +{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ -+ lockdep_assert_held(&scheduler->interrupt_lock); ++ struct kbase_device *kbdev = container_of(timer, struct kbase_device, ++ csf.scheduler.tick_timer); + -+ if (scheduler->tick_timer_active) { -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_ADVANCE, NULL, 0u); -+ scheduler->tick_timer_active = false; -+ queue_work(scheduler->wq, &scheduler->tick_work); -+ } else { -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_NOADVANCE, NULL, 0u); -+ } ++ kbase_csf_scheduler_tick_advance(kbdev); ++ return HRTIMER_NORESTART; +} + +/** -+ * kbase_csf_scheduler_tick_advance() - Advance the scheduling tick ++ * start_tick_timer() - Start the scheduling tick hrtimer. + * + * @kbdev: Pointer to the device + * -+ * This function advances the scheduling tick by enqueing the tick work item for -+ * immediate execution, but only if the tick hrtimer is active. If the timer -+ * is inactive then the tick work item is already in flight. ++ * This function will start the scheduling tick hrtimer and is supposed to ++ * be called only from the tick work item function. The tick hrtimer should ++ * not be active already. + */ -+static inline void kbase_csf_scheduler_tick_advance(struct kbase_device *kbdev) ++static void start_tick_timer(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + unsigned long flags; + ++ lockdep_assert_held(&scheduler->lock); ++ + spin_lock_irqsave(&scheduler->interrupt_lock, flags); -+ kbase_csf_scheduler_tick_advance_nolock(kbdev); ++ WARN_ON(scheduler->tick_timer_active); ++ if (likely(!work_pending(&scheduler->tick_work))) { ++ scheduler->tick_timer_active = true; ++ ++ hrtimer_start(&scheduler->tick_timer, ++ HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms), ++ HRTIMER_MODE_REL); ++ } + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); +} + +/** -+ * kbase_csf_scheduler_invoke_tick() - Invoke the scheduling tick ++ * cancel_tick_timer() - Cancel the scheduling tick hrtimer + * + * @kbdev: Pointer to the device -+ * -+ * This function will queue the scheduling tick work item for immediate -+ * execution if tick timer is not active. This can be called from interrupt -+ * context to resume the scheduling after GPU was put to sleep. + */ -+static inline void kbase_csf_scheduler_invoke_tick(struct kbase_device *kbdev) ++static void cancel_tick_timer(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + unsigned long flags; + -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_INVOKE, NULL, 0u); + spin_lock_irqsave(&scheduler->interrupt_lock, flags); -+ if (!scheduler->tick_timer_active) -+ queue_work(scheduler->wq, &scheduler->tick_work); ++ scheduler->tick_timer_active = false; + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ hrtimer_cancel(&scheduler->tick_timer); +} + +/** -+ * kbase_csf_scheduler_invoke_tock() - Invoke the scheduling tock ++ * enqueue_tick_work() - Enqueue the scheduling tick work item + * + * @kbdev: Pointer to the device + * -+ * This function will queue the scheduling tock work item for immediate -+ * execution. ++ * This function will queue the scheduling tick work item for immediate ++ * execution. This shall only be called when both the tick hrtimer and tick ++ * work item are not active/pending. + */ -+static inline void kbase_csf_scheduler_invoke_tock(struct kbase_device *kbdev) ++static void enqueue_tick_work(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + -+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_INVOKE, NULL, 0u); -+ if (atomic_cmpxchg(&scheduler->pending_tock_work, false, true) == false) -+ mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0); ++ lockdep_assert_held(&scheduler->lock); ++ ++ kbase_csf_scheduler_invoke_tick(kbdev); +} + -+/** -+ * kbase_csf_scheduler_queue_has_trace() - report whether the queue has been -+ * configured to operate with the -+ * cs_trace feature. -+ * -+ * @queue: Pointer to the queue. -+ * -+ * Return: True if the gpu queue is configured to operate with the cs_trace -+ * feature, otherwise false. -+ */ -+static inline bool kbase_csf_scheduler_queue_has_trace(struct kbase_queue *queue) ++static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr) +{ -+ lockdep_assert_held(&queue->kctx->kbdev->csf.scheduler.lock); -+ /* In the current arrangement, it is possible for the context to enable -+ * the cs_trace after some queues have been registered with cs_trace in -+ * disabled state. So each queue has its own enabled/disabled condition. -+ */ -+ return (queue->trace_buffer_size && queue->trace_buffer_base); -+} ++ WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); + -+#ifdef KBASE_PM_RUNTIME -+/** -+ * kbase_csf_scheduler_reval_idleness_post_sleep() - Check GPU's idleness after -+ * putting MCU to sleep state -+ * -+ * @kbdev: Pointer to the device -+ * -+ * This function re-evaluates the idleness of on-slot queue groups after MCU -+ * was put to the sleep state and invokes the scheduling tick if any of the -+ * on-slot queue group became non-idle. -+ * CSG_OUTPUT_BLOCK.CSG_STATUS_STATE.IDLE bit is checked to determine the -+ * idleness which is updated by MCU firmware on handling of the sleep request. -+ * -+ * This function is needed to detect if more work was flushed in the window -+ * between the GPU idle notification and the enabling of Doorbell mirror -+ * interrupt (from MCU state machine). Once Doorbell mirror interrupt is -+ * enabled, Host can receive the notification on User doorbell rings. -+ */ -+void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ clear_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap); ++} + -+/** -+ * kbase_csf_scheduler_handle_runtime_suspend() - Handle runtime suspend by -+ * suspending CSGs. -+ * -+ * @kbdev: Pointer to the device -+ * -+ * This function is called from the runtime suspend callback function for -+ * suspending all the on-slot queue groups. If any of the group is found to -+ * be non-idle after the completion of CSG suspend operation or the CSG -+ * suspend operation times out, then the scheduling tick is invoked and an -+ * error is returned so that the GPU power down can be aborted. -+ * -+ * Return: 0 if all the CSGs were suspended, otherwise an error code. -+ */ -+int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev); -+#endif ++static int acquire_doorbell(struct kbase_device *kbdev) ++{ ++ int doorbell_nr; + -+/** -+ * kbase_csf_scheduler_process_gpu_idle_event() - Process GPU idle IRQ -+ * -+ * @kbdev: Pointer to the device -+ * -+ * This function is called when a GPU idle IRQ has been raised. -+ */ -+void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+/** -+ * kbase_csf_scheduler_get_nr_active_csgs() - Get the number of active CSGs -+ * -+ * @kbdev: Pointer to the device -+ * -+ * This function calculates the number of CSG slots that have a queue group -+ * resident on them. -+ * -+ * Note: This function should not be used if the interrupt_lock is held. Use -+ * kbase_csf_scheduler_get_nr_active_csgs_locked() instead. -+ * -+ * Return: number of active CSGs. -+ */ -+u32 kbase_csf_scheduler_get_nr_active_csgs(struct kbase_device *kbdev); ++ doorbell_nr = find_first_zero_bit( ++ kbdev->csf.scheduler.doorbell_inuse_bitmap, ++ CSF_NUM_DOORBELL); + -+/** -+ * kbase_csf_scheduler_get_nr_active_csgs_locked() - Get the number of active -+ * CSGs -+ * -+ * @kbdev: Pointer to the device -+ * -+ * This function calculates the number of CSG slots that have a queue group -+ * resident on them. -+ * -+ * Note: This function should be called with interrupt_lock held. -+ * -+ * Return: number of active CSGs. -+ */ -+u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev); ++ if (doorbell_nr >= CSF_NUM_DOORBELL) ++ return KBASEP_USER_DB_NR_INVALID; + -+/** -+ * kbase_csf_scheduler_force_wakeup() - Forcefully resume the scheduling of CSGs -+ * -+ * @kbdev: Pointer to the device -+ * -+ * This function is called to forcefully resume the scheduling of CSGs, even -+ * when there wasn't any work submitted for them. -+ * This function is only used for testing purpose. -+ */ -+void kbase_csf_scheduler_force_wakeup(struct kbase_device *kbdev); ++ set_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap); + -+#ifdef KBASE_PM_RUNTIME -+/** -+ * kbase_csf_scheduler_force_sleep() - Forcefully put the Scheduler to sleeping -+ * state. -+ * -+ * @kbdev: Pointer to the device -+ * -+ * This function is called to forcefully put the Scheduler to sleeping state -+ * and trigger the sleep of MCU. If the CSGs are not idle, then the Scheduler -+ * would get reactivated again immediately. -+ * This function is only used for testing purpose. -+ */ -+void kbase_csf_scheduler_force_sleep(struct kbase_device *kbdev); -+#endif ++ return doorbell_nr; ++} + -+#endif /* _KBASE_CSF_SCHEDULER_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c -new file mode 100644 -index 000000000..a5e0ab5ea ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c -@@ -0,0 +1,788 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++static void unassign_user_doorbell_from_group(struct kbase_device *kbdev, ++ struct kbase_queue_group *group) ++{ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+#include "mali_kbase_csf_sync_debugfs.h" -+#include "mali_kbase_csf_csg_debugfs.h" -+#include -+#include ++ if (group->doorbell_nr != KBASEP_USER_DB_NR_INVALID) { ++ release_doorbell(kbdev, group->doorbell_nr); ++ group->doorbell_nr = KBASEP_USER_DB_NR_INVALID; ++ } ++} + -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+#include "mali_kbase_sync.h" -+#endif ++static void unassign_user_doorbell_from_queue(struct kbase_device *kbdev, ++ struct kbase_queue *queue) ++{ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+#if IS_ENABLED(CONFIG_DEBUG_FS) ++ mutex_lock(&kbdev->csf.reg_lock); + -+#define CQS_UNREADABLE_LIVE_VALUE "(unavailable)" ++ if (queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID) { ++ queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID; ++ /* After this the dummy page would be mapped in */ ++ unmap_mapping_range(kbdev->csf.db_filp->f_inode->i_mapping, ++ queue->db_file_offset << PAGE_SHIFT, PAGE_SIZE, 1); ++ } + -+/* GPU queue related values */ -+#define GPU_CSF_MOVE_OPCODE ((u64)0x1) -+#define GPU_CSF_MOVE32_OPCODE ((u64)0x2) -+#define GPU_CSF_SYNC_ADD_OPCODE ((u64)0x25) -+#define GPU_CSF_SYNC_SET_OPCODE ((u64)0x26) -+#define GPU_CSF_SYNC_WAIT_OPCODE ((u64)0x27) -+#define GPU_CSF_SYNC_ADD64_OPCODE ((u64)0x33) -+#define GPU_CSF_SYNC_SET64_OPCODE ((u64)0x34) -+#define GPU_CSF_SYNC_WAIT64_OPCODE ((u64)0x35) -+#define GPU_CSF_CALL_OPCODE ((u64)0x20) ++ mutex_unlock(&kbdev->csf.reg_lock); ++} + -+#define MAX_NR_GPU_CALLS (5) -+#define INSTR_OPCODE_MASK ((u64)0xFF << 56) -+#define INSTR_OPCODE_GET(value) ((value & INSTR_OPCODE_MASK) >> 56) -+#define MOVE32_IMM_MASK ((u64)0xFFFFFFFFFUL) -+#define MOVE_DEST_MASK ((u64)0xFF << 48) -+#define MOVE_DEST_GET(value) ((value & MOVE_DEST_MASK) >> 48) -+#define MOVE_IMM_MASK ((u64)0xFFFFFFFFFFFFUL) -+#define SYNC_SRC0_MASK ((u64)0xFF << 40) -+#define SYNC_SRC1_MASK ((u64)0xFF << 32) -+#define SYNC_SRC0_GET(value) (u8)((value & SYNC_SRC0_MASK) >> 40) -+#define SYNC_SRC1_GET(value) (u8)((value & SYNC_SRC1_MASK) >> 32) -+#define SYNC_WAIT_CONDITION_MASK ((u64)0xF << 28) -+#define SYNC_WAIT_CONDITION_GET(value) (u8)((value & SYNC_WAIT_CONDITION_MASK) >> 28) ++static void assign_user_doorbell_to_group(struct kbase_device *kbdev, ++ struct kbase_queue_group *group) ++{ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+/* Enumeration for types of GPU queue sync events for -+ * the purpose of dumping them through debugfs. -+ */ -+enum debugfs_gpu_sync_type { -+ DEBUGFS_GPU_SYNC_WAIT, -+ DEBUGFS_GPU_SYNC_SET, -+ DEBUGFS_GPU_SYNC_ADD, -+ NUM_DEBUGFS_GPU_SYNC_TYPES -+}; ++ if (group->doorbell_nr == KBASEP_USER_DB_NR_INVALID) ++ group->doorbell_nr = acquire_doorbell(kbdev); ++} + -+/** -+ * kbasep_csf_debugfs_get_cqs_live_u32() - Obtain live (u32) value for a CQS object. -+ * -+ * @kctx: The context of the queue. -+ * @obj_addr: Pointer to the CQS live 32-bit value. -+ * @live_val: Pointer to the u32 that will be set to the CQS object's current, live -+ * value. -+ * -+ * Return: 0 if successful or a negative error code on failure. -+ */ -+static int kbasep_csf_debugfs_get_cqs_live_u32(struct kbase_context *kctx, u64 obj_addr, -+ u32 *live_val) ++static void assign_user_doorbell_to_queue(struct kbase_device *kbdev, ++ struct kbase_queue *const queue) +{ -+ struct kbase_vmap_struct *mapping; -+ u32 *const cpu_ptr = (u32 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ if (!cpu_ptr) -+ return -1; ++ mutex_lock(&kbdev->csf.reg_lock); + -+ *live_val = *cpu_ptr; -+ kbase_phy_alloc_mapping_put(kctx, mapping); -+ return 0; ++ /* If bind operation for the queue hasn't completed yet, then the ++ * CSI can't be programmed for the queue ++ * (even in stopped state) and so the doorbell also can't be assigned ++ * to it. ++ */ ++ if ((queue->bind_state == KBASE_CSF_QUEUE_BOUND) && ++ (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)) { ++ WARN_ON(queue->group->doorbell_nr == KBASEP_USER_DB_NR_INVALID); ++ queue->doorbell_nr = queue->group->doorbell_nr; ++ ++ /* After this the real Hw doorbell page would be mapped in */ ++ unmap_mapping_range( ++ kbdev->csf.db_filp->f_inode->i_mapping, ++ queue->db_file_offset << PAGE_SHIFT, ++ PAGE_SIZE, 1); ++ } ++ ++ mutex_unlock(&kbdev->csf.reg_lock); +} + -+/** -+ * kbasep_csf_debugfs_get_cqs_live_u64() - Obtain live (u64) value for a CQS object. -+ * -+ * @kctx: The context of the queue. -+ * @obj_addr: Pointer to the CQS live value (32 or 64-bit). -+ * @live_val: Pointer to the u64 that will be set to the CQS object's current, live -+ * value. -+ * -+ * Return: 0 if successful or a negative error code on failure. -+ */ -+static int kbasep_csf_debugfs_get_cqs_live_u64(struct kbase_context *kctx, u64 obj_addr, -+ u64 *live_val) ++static void scheduler_doorbell_init(struct kbase_device *kbdev) +{ -+ struct kbase_vmap_struct *mapping; -+ u64 *cpu_ptr = (u64 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping); ++ int doorbell_nr; + -+ if (!cpu_ptr) -+ return -1; ++ bitmap_zero(kbdev->csf.scheduler.doorbell_inuse_bitmap, ++ CSF_NUM_DOORBELL); + -+ *live_val = *cpu_ptr; -+ kbase_phy_alloc_mapping_put(kctx, mapping); -+ return 0; ++ mutex_lock(&kbdev->csf.scheduler.lock); ++ /* Reserve doorbell 0 for use by kernel driver */ ++ doorbell_nr = acquire_doorbell(kbdev); ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++ ++ WARN_ON(doorbell_nr != CSF_KERNEL_DOORBELL_NR); +} + +/** -+ * kbasep_csf_sync_print_kcpu_fence_wait_or_signal() - Print details of a CSF SYNC Fence Wait -+ * or Fence Signal command, contained in a -+ * KCPU queue. ++ * update_on_slot_queues_offsets - Update active queues' INSERT & EXTRACT ofs + * -+ * @file: The seq_file for printing to. -+ * @cmd: The KCPU Command to be printed. -+ * @cmd_name: The name of the command: indicates either a fence SIGNAL or WAIT. ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function updates the EXTRACT offset for all queues which groups have ++ * been assigned a physical slot. These values could be used to detect a ++ * queue's true idleness status. This is intended to be an additional check ++ * on top of the GPU idle notification to account for race conditions. ++ * This function is supposed to be called only when GPU idle notification ++ * interrupt is received. + */ -+static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(struct seq_file *file, -+ struct kbase_kcpu_command *cmd, -+ const char *cmd_name) ++static void update_on_slot_queues_offsets(struct kbase_device *kbdev) +{ -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ struct fence *fence = NULL; -+#else -+ struct dma_fence *fence = NULL; -+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ /* All CSGs have the same number of CSs */ ++ size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num; ++ size_t i; + -+ struct kbase_sync_fence_info info; -+ const char *timeline_name = NULL; -+ bool is_signaled = false; ++ lockdep_assert_held(&scheduler->interrupt_lock); + -+ fence = cmd->info.fence.fence; -+ if (WARN_ON(!fence)) -+ return; ++ /* csg_slots_idle_mask is not used here for the looping, as it could get ++ * updated concurrently when Scheduler re-evaluates the idle status of ++ * the CSGs for which idle notification was received previously. ++ */ ++ for_each_set_bit(i, scheduler->csg_inuse_bitmap, kbdev->csf.global_iface.group_num) { ++ struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group; ++ size_t j; + -+ kbase_sync_fence_info_get(cmd->info.fence.fence, &info); -+ timeline_name = fence->ops->get_timeline_name(fence); -+ is_signaled = info.status > 0; ++ if (WARN_ON(!group)) ++ continue; + -+ seq_printf(file, "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, cmd->info.fence.fence, -+ is_signaled); ++ for (j = 0; j < max_streams; ++j) { ++ struct kbase_queue *const queue = group->bound_queues[j]; + -+ /* Note: fence->seqno was u32 until 5.1 kernel, then u64 */ -+ seq_printf(file, "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx", -+ timeline_name, fence->context, (u64)fence->seqno); ++ if (queue) { ++ if (queue->user_io_addr) { ++ u64 const *const output_addr = ++ (u64 const *)(queue->user_io_addr + PAGE_SIZE); ++ ++ queue->extract_ofs = ++ output_addr[CS_EXTRACT_LO / sizeof(u64)]; ++ } else { ++ dev_warn(kbdev->dev, ++ "%s(): queue->user_io_addr is NULL, queue: %p", ++ __func__, ++ queue); ++ } ++ } ++ } ++ } +} + -+/** -+ * kbasep_csf_sync_print_kcpu_cqs_wait() - Print details of a CSF SYNC CQS Wait command, -+ * contained in a KCPU queue. -+ * -+ * @file: The seq_file for printing to. -+ * @cmd: The KCPU Command to be printed. -+ */ -+static void kbasep_csf_sync_print_kcpu_cqs_wait(struct seq_file *file, -+ struct kbase_kcpu_command *cmd) ++static void enqueue_gpu_idle_work(struct kbase_csf_scheduler *const scheduler) +{ -+ struct kbase_context *kctx = file->private; -+ size_t i; ++ atomic_set(&scheduler->gpu_no_longer_idle, false); ++ queue_work(scheduler->idle_wq, &scheduler->gpu_idle_work); ++} + -+ for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) { -+ struct base_cqs_wait_info *cqs_obj = &cmd->info.cqs_wait.objs[i]; ++void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ int non_idle_offslot_grps; ++ bool can_suspend_on_idle; + -+ u32 live_val; -+ int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val); -+ bool live_val_valid = (ret >= 0); ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ lockdep_assert_held(&scheduler->interrupt_lock); + -+ seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); ++ non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps); ++ can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev); ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND, NULL, ++ ((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32)); + -+ if (live_val_valid) -+ seq_printf(file, "0x%.16llx", (u64)live_val); -+ else -+ seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); ++ if (!non_idle_offslot_grps) { ++ if (can_suspend_on_idle) { ++ /* fast_gpu_idle_handling is protected by the ++ * interrupt_lock, which would prevent this from being ++ * updated whilst gpu_idle_worker() is executing. ++ */ ++ scheduler->fast_gpu_idle_handling = ++ (kbdev->csf.gpu_idle_hysteresis_us == 0) || ++ !kbase_csf_scheduler_all_csgs_idle(kbdev); + -+ seq_printf(file, " | op:gt arg_value:0x%.8x", cqs_obj->val); ++ /* The GPU idle worker relies on update_on_slot_queues_offsets() to have ++ * finished. It's queued before to reduce the time it takes till execution ++ * but it'll eventually be blocked by the scheduler->interrupt_lock. ++ */ ++ enqueue_gpu_idle_work(scheduler); ++ ++ /* The extract offsets are unused in fast GPU idle handling */ ++ if (!scheduler->fast_gpu_idle_handling) ++ update_on_slot_queues_offsets(kbdev); ++ } ++ } else { ++ /* Advance the scheduling tick to get the non-idle suspended groups loaded soon */ ++ kbase_csf_scheduler_tick_advance_nolock(kbdev); + } +} + -+/** -+ * kbasep_csf_sync_print_kcpu_cqs_set() - Print details of a CSF SYNC CQS -+ * Set command, contained in a KCPU queue. -+ * -+ * @file: The seq_file for printing to. -+ * @cmd: The KCPU Command to be printed. -+ */ -+static void kbasep_csf_sync_print_kcpu_cqs_set(struct seq_file *file, -+ struct kbase_kcpu_command *cmd) ++u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev) +{ -+ struct kbase_context *kctx = file->private; -+ size_t i; ++ u32 nr_active_csgs; + -+ for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) { -+ struct base_cqs_set *cqs_obj = &cmd->info.cqs_set.objs[i]; ++ lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); + -+ u32 live_val; -+ int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val); -+ bool live_val_valid = (ret >= 0); ++ nr_active_csgs = bitmap_weight(kbdev->csf.scheduler.csg_inuse_bitmap, ++ kbdev->csf.global_iface.group_num); + -+ seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); ++ return nr_active_csgs; ++} + -+ if (live_val_valid) -+ seq_printf(file, "0x%.16llx", (u64)live_val); -+ else -+ seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); ++u32 kbase_csf_scheduler_get_nr_active_csgs(struct kbase_device *kbdev) ++{ ++ u32 nr_active_csgs; ++ unsigned long flags; + -+ seq_printf(file, " | op:add arg_value:0x%.8x", 1); -+ } ++ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); ++ nr_active_csgs = kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev); ++ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); ++ ++ return nr_active_csgs; +} + +/** -+ * kbasep_csf_sync_get_wait_op_name() - Print the name of a CQS Wait Operation. ++ * csg_slot_in_use - returns true if a queue group has been programmed on a ++ * given CSG slot. + * -+ * @op: The numerical value of operation. ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @slot: Index/number of the CSG slot in question. + * -+ * Return: const static pointer to the command name, or '??' if unknown. ++ * Return: the interface is actively engaged flag. ++ * ++ * Note: Caller must hold the scheduler lock. + */ -+static const char *kbasep_csf_sync_get_wait_op_name(basep_cqs_wait_operation_op op) ++static inline bool csg_slot_in_use(struct kbase_device *kbdev, int slot) +{ -+ const char *string; ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ switch (op) { -+ case BASEP_CQS_WAIT_OPERATION_LE: -+ string = "le"; -+ break; -+ case BASEP_CQS_WAIT_OPERATION_GT: -+ string = "gt"; -+ break; -+ default: -+ string = "??"; -+ break; -+ } -+ return string; ++ return (kbdev->csf.scheduler.csg_slots[slot].resident_group != NULL); +} + -+/** -+ * kbasep_csf_sync_get_set_op_name() - Print the name of a CQS Set Operation. -+ * -+ * @op: The numerical value of operation. -+ * -+ * Return: const static pointer to the command name, or '??' if unknown. -+ */ -+static const char *kbasep_csf_sync_get_set_op_name(basep_cqs_set_operation_op op) ++static bool queue_group_suspended_locked(struct kbase_queue_group *group) +{ -+ const char *string; ++ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); + -+ switch (op) { -+ case BASEP_CQS_SET_OPERATION_ADD: -+ string = "add"; -+ break; -+ case BASEP_CQS_SET_OPERATION_SET: -+ string = "set"; -+ break; -+ default: -+ string = "???"; -+ break; -+ } -+ return string; ++ return (group->run_state == KBASE_CSF_GROUP_SUSPENDED || ++ group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE || ++ group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC); +} + -+/** -+ * kbasep_csf_sync_print_kcpu_cqs_wait_op() - Print details of a CSF SYNC CQS -+ * Wait Operation command, contained -+ * in a KCPU queue. -+ * -+ * @file: The seq_file for printing to. -+ * @cmd: The KCPU Command to be printed. -+ */ -+static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct seq_file *file, -+ struct kbase_kcpu_command *cmd) ++static bool queue_group_idle_locked(struct kbase_queue_group *group) +{ -+ size_t i; -+ struct kbase_context *kctx = file->private; ++ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); + -+ for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) { -+ struct base_cqs_wait_operation_info *wait_op = -+ &cmd->info.cqs_wait_operation.objs[i]; -+ const char *op_name = kbasep_csf_sync_get_wait_op_name(wait_op->operation); ++ return (group->run_state == KBASE_CSF_GROUP_IDLE || ++ group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE); ++} + -+ u64 live_val; -+ int ret = kbasep_csf_debugfs_get_cqs_live_u64(kctx, wait_op->addr, &live_val); ++static bool on_slot_group_idle_locked(struct kbase_queue_group *group) ++{ ++ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); + -+ bool live_val_valid = (ret >= 0); ++ return (group->run_state == KBASE_CSF_GROUP_IDLE); ++} + -+ seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr); ++static bool can_schedule_idle_group(struct kbase_queue_group *group) ++{ ++ return (on_slot_group_idle_locked(group) || ++ (group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME)); ++} + -+ if (live_val_valid) -+ seq_printf(file, "0x%.16llx", live_val); -+ else -+ seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); ++static bool queue_group_scheduled(struct kbase_queue_group *group) ++{ ++ return (group->run_state != KBASE_CSF_GROUP_INACTIVE && ++ group->run_state != KBASE_CSF_GROUP_TERMINATED && ++ group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED); ++} + -+ seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, wait_op->val); -+ } ++static bool queue_group_scheduled_locked(struct kbase_queue_group *group) ++{ ++ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); ++ ++ return queue_group_scheduled(group); +} + +/** -+ * kbasep_csf_sync_print_kcpu_cqs_set_op() - Print details of a CSF SYNC CQS -+ * Set Operation command, contained -+ * in a KCPU queue. ++ * scheduler_protm_wait_quit() - Wait for GPU to exit protected mode. + * -+ * @file: The seq_file for printing to. -+ * @cmd: The KCPU Command to be printed. ++ * @kbdev: Pointer to the GPU device ++ * ++ * This function waits for the GPU to exit protected mode which is confirmed ++ * when active_protm_grp is set to NULL. ++ * ++ * Return: true on success, false otherwise. + */ -+static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct seq_file *file, -+ struct kbase_kcpu_command *cmd) ++static bool scheduler_protm_wait_quit(struct kbase_device *kbdev) +{ -+ size_t i; -+ struct kbase_context *kctx = file->private; -+ -+ for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) { -+ struct base_cqs_set_operation_info *set_op = &cmd->info.cqs_set_operation.objs[i]; -+ const char *op_name = kbasep_csf_sync_get_set_op_name( -+ (basep_cqs_set_operation_op)set_op->operation); ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ long remaining; ++ bool success = true; + -+ u64 live_val; -+ int ret = kbasep_csf_debugfs_get_cqs_live_u64(kctx, set_op->addr, &live_val); ++ lockdep_assert_held(&scheduler->lock); + -+ bool live_val_valid = (ret >= 0); ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_START, NULL, jiffies_to_msecs(wt)); + -+ seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr); ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt); + -+ if (live_val_valid) -+ seq_printf(file, "0x%.16llx", live_val); -+ else -+ seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); ++ if (unlikely(!remaining)) { ++ struct kbase_queue_group *group = kbdev->csf.scheduler.active_protm_grp; ++ struct kbase_context *kctx = group ? group->kctx : NULL; + -+ seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, set_op->val); ++ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms), protm_quit wait skipped", ++ kbase_backend_get_cycle_cnt(kbdev), ++ kbdev->csf.fw_timeout_ms); ++ schedule_actions_trigger_df(kbdev, kctx, DF_PROTECTED_MODE_EXIT_TIMEOUT); ++ success = false; + } ++ ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_END, NULL, jiffies_to_msecs(remaining)); ++ ++ return success; +} + +/** -+ * kbasep_csf_kcpu_debugfs_print_queue() - Print debug data for a KCPU queue ++ * scheduler_force_protm_exit() - Force GPU to exit protected mode. + * -+ * @file: The seq_file to print to. -+ * @queue: Pointer to the KCPU queue. ++ * @kbdev: Pointer to the GPU device ++ * ++ * This function sends a ping request to the firmware and waits for the GPU ++ * to exit protected mode. ++ * ++ * If the GPU does not exit protected mode, it is considered as hang. ++ * A GPU reset would then be triggered. + */ -+static void kbasep_csf_sync_kcpu_debugfs_print_queue(struct seq_file *file, -+ struct kbase_kcpu_command_queue *queue) ++static void scheduler_force_protm_exit(struct kbase_device *kbdev) +{ -+ char started_or_pending; -+ struct kbase_kcpu_command *cmd; -+ struct kbase_context *kctx = file->private; -+ size_t i; ++ unsigned long flags; + -+ if (WARN_ON(!queue)) -+ return; ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); -+ mutex_lock(&queue->lock); ++ kbase_csf_firmware_ping(kbdev); + -+ for (i = 0; i != queue->num_pending_cmds; ++i) { -+ started_or_pending = ((i == 0) && queue->command_started) ? 'S' : 'P'; -+ seq_printf(file, "queue:KCPU-%u-%u exec:%c ", kctx->id, queue->id, -+ started_or_pending); ++ if (scheduler_protm_wait_quit(kbdev)) ++ return; + -+ cmd = &queue->commands[queue->start_offset + i]; -+ switch (cmd->type) { -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: -+ kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_SIGNAL"); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: -+ kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_WAIT"); -+ break; -+#endif -+ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: -+ kbasep_csf_sync_print_kcpu_cqs_wait(file, cmd); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_CQS_SET: -+ kbasep_csf_sync_print_kcpu_cqs_set(file, cmd); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: -+ kbasep_csf_sync_print_kcpu_cqs_wait_op(file, cmd); -+ break; -+ case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: -+ kbasep_csf_sync_print_kcpu_cqs_set_op(file, cmd); -+ break; -+ default: -+ seq_puts(file, ", U, Unknown blocking command"); -+ break; -+ } ++ dev_err(kbdev->dev, "Possible GPU hang in Protected mode"); + -+ seq_puts(file, "\n"); ++ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); ++ if (kbdev->csf.scheduler.active_protm_grp) { ++ dev_err(kbdev->dev, ++ "Group-%d of context %d_%d ran in protected mode for too long on slot %d", ++ kbdev->csf.scheduler.active_protm_grp->handle, ++ kbdev->csf.scheduler.active_protm_grp->kctx->tgid, ++ kbdev->csf.scheduler.active_protm_grp->kctx->id, ++ kbdev->csf.scheduler.active_protm_grp->csg_nr); + } ++ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); + -+ mutex_unlock(&queue->lock); ++ /* The GPU could be stuck in Protected mode. To prevent a hang, ++ * a GPU reset is performed. ++ */ ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(kbdev); +} + +/** -+ * kbasep_csf_sync_kcpu_debugfs_show() - Print CSF KCPU queue sync info ++ * scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up ++ * automatically for periodic tasks. + * -+ * @file: The seq_file for printing to. ++ * @kbdev: Pointer to the device + * -+ * Return: Negative error code or 0 on success. ++ * This is a variant of kbase_csf_scheduler_timer_is_enabled() that assumes the ++ * CSF scheduler lock to already have been held. ++ * ++ * Return: true if the scheduler is configured to wake up periodically + */ -+static int kbasep_csf_sync_kcpu_debugfs_show(struct seq_file *file) ++static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev) +{ -+ struct kbase_context *kctx = file->private; -+ unsigned long queue_idx; ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ mutex_lock(&kctx->csf.kcpu_queues.lock); -+ seq_printf(file, "KCPU queues for ctx %u:\n", kctx->id); ++ return kbdev->csf.scheduler.timer_enabled; ++} + -+ queue_idx = find_first_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES); ++/** ++ * scheduler_pm_active_handle_suspend() - Acquire the PM reference count for ++ * Scheduler ++ * ++ * @kbdev: Pointer to the device ++ * @suspend_handler: Handler code for how to handle a suspend that might occur. ++ * ++ * This function is usually called when Scheduler needs to be activated. ++ * The PM reference count is acquired for the Scheduler and the power on ++ * of GPU is initiated. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev, ++ enum kbase_pm_suspend_handler suspend_handler) ++{ ++ unsigned long flags; ++ u32 prev_count; ++ int ret = 0; + -+ while (queue_idx < KBASEP_MAX_KCPU_QUEUES) { -+ kbasep_csf_sync_kcpu_debugfs_print_queue(file, -+ kctx->csf.kcpu_queues.array[queue_idx]); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ queue_idx = find_next_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES, -+ queue_idx + 1); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ prev_count = kbdev->csf.scheduler.pm_active_count; ++ if (!WARN_ON(prev_count == U32_MAX)) ++ kbdev->csf.scheduler.pm_active_count++; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ /* On 0 => 1, make a pm_ctx_active request */ ++ if (!prev_count) { ++ ret = kbase_pm_context_active_handle_suspend(kbdev, ++ suspend_handler); ++ /* Invoke the PM state machines again as the change in MCU ++ * desired status, due to the update of scheduler.pm_active_count, ++ * may be missed by the thread that called pm_wait_for_desired_state() ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (ret) ++ kbdev->csf.scheduler.pm_active_count--; ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + -+ mutex_unlock(&kctx->csf.kcpu_queues.lock); -+ return 0; ++ return ret; +} + ++#ifdef KBASE_PM_RUNTIME +/** -+ * kbasep_csf_get_move_immediate_value() - Get the immediate values for sync operations -+ * from a MOVE instruction. ++ * scheduler_pm_active_after_sleep() - Acquire the PM reference count for ++ * Scheduler + * -+ * @move_cmd: Raw MOVE instruction. -+ * @sync_addr_reg: Register identifier from SYNC_* instruction. -+ * @compare_val_reg: Register identifier from SYNC_* instruction. -+ * @sync_val: Pointer to store CQS object address for sync operation. -+ * @compare_val: Pointer to store compare value for sync operation. ++ * @kbdev: Pointer to the device ++ * @flags: Pointer to the flags variable containing the interrupt state ++ * when hwaccess lock was acquired. + * -+ * Return: True if value is obtained by checking for correct register identifier, -+ * or false otherwise. ++ * This function is called when Scheduler needs to be activated from the ++ * sleeping state. ++ * The PM reference count is acquired for the Scheduler and the wake up of ++ * MCU is initiated. It resets the flag that indicates to the MCU state ++ * machine that MCU needs to be put in sleep state. ++ * ++ * Note: This function shall be called with hwaccess lock held and it may ++ * release that lock and reacquire it. ++ * ++ * Return: zero when the PM reference was taken and non-zero when the ++ * system is being suspending/suspended. + */ -+static bool kbasep_csf_get_move_immediate_value(u64 move_cmd, u64 sync_addr_reg, -+ u64 compare_val_reg, u64 *sync_val, -+ u64 *compare_val) ++static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev, ++ unsigned long *flags) +{ -+ u64 imm_mask; ++ u32 prev_count; ++ int ret = 0; + -+ /* Verify MOVE instruction and get immediate mask */ -+ if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE32_OPCODE) -+ imm_mask = MOVE32_IMM_MASK; -+ else if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE_OPCODE) -+ imm_mask = MOVE_IMM_MASK; -+ else -+ /* Error return */ -+ return false; ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* Verify value from MOVE instruction and assign to variable */ -+ if (sync_addr_reg == MOVE_DEST_GET(move_cmd)) -+ *sync_val = move_cmd & imm_mask; -+ else if (compare_val_reg == MOVE_DEST_GET(move_cmd)) -+ *compare_val = move_cmd & imm_mask; -+ else -+ /* Error return */ -+ return false; ++ prev_count = kbdev->csf.scheduler.pm_active_count; ++ if (!WARN_ON(prev_count == U32_MAX)) ++ kbdev->csf.scheduler.pm_active_count++; + -+ return true; ++ /* On 0 => 1, make a pm_ctx_active request */ ++ if (!prev_count) { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, *flags); ++ ++ ret = kbase_pm_context_active_handle_suspend(kbdev, ++ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, *flags); ++ if (ret) ++ kbdev->csf.scheduler.pm_active_count--; ++ else ++ kbdev->pm.backend.gpu_sleep_mode_active = false; ++ kbase_pm_update_state(kbdev); ++ } ++ ++ return ret; +} ++#endif + -+/** kbasep_csf_read_ringbuffer_value() - Reads a u64 from the ringbuffer at a provided -+ * offset. ++/** ++ * scheduler_pm_idle() - Release the PM reference count held by Scheduler + * -+ * @queue: Pointer to the queue. -+ * @ringbuff_offset: Ringbuffer offset. ++ * @kbdev: Pointer to the device + * -+ * Return: the u64 in the ringbuffer at the desired offset. ++ * This function is usually called after Scheduler is suspended. ++ * The PM reference count held by the Scheduler is released to trigger the ++ * power down of GPU. + */ -+static u64 kbasep_csf_read_ringbuffer_value(struct kbase_queue *queue, u32 ringbuff_offset) ++static void scheduler_pm_idle(struct kbase_device *kbdev) +{ -+ u64 page_off = ringbuff_offset >> PAGE_SHIFT; -+ u64 offset_within_page = ringbuff_offset & ~PAGE_MASK; -+ struct page *page = as_page(queue->queue_reg->gpu_alloc->pages[page_off]); -+ u64 *ringbuffer = kmap_atomic(page); -+ u64 value = ringbuffer[offset_within_page / sizeof(u64)]; ++ unsigned long flags; ++ u32 prev_count; + -+ kunmap_atomic(ringbuffer); -+ return value; ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ prev_count = kbdev->csf.scheduler.pm_active_count; ++ if (!WARN_ON(prev_count == 0)) ++ kbdev->csf.scheduler.pm_active_count--; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ if (prev_count == 1) { ++ kbase_pm_context_idle(kbdev); ++ /* Invoke the PM state machines again as the change in MCU ++ * desired status, due to the update of scheduler.pm_active_count, ++ * may be missed by the thread that called pm_wait_for_desired_state() ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } +} + ++#ifdef KBASE_PM_RUNTIME +/** -+ * kbasep_csf_print_gpu_sync_op() - Print sync operation info for given sync command. ++ * scheduler_pm_idle_before_sleep() - Release the PM reference count and ++ * trigger the tranistion to sleep state. + * -+ * @file: Pointer to debugfs seq_file file struct for writing output. -+ * @kctx: Pointer to kbase context. -+ * @queue: Pointer to the GPU command queue. -+ * @ringbuff_offset: Offset to index the ring buffer with, for the given sync command. -+ * (Useful for finding preceding MOVE commands) -+ * @sync_cmd: Entire u64 of the sync command, which has both sync address and -+ * comparison-value encoded in it. -+ * @type: Type of GPU sync command (e.g. SYNC_SET, SYNC_ADD, SYNC_WAIT). -+ * @is_64bit: Bool to indicate if operation is 64 bit (true) or 32 bit (false). -+ * @follows_wait: Bool to indicate if the operation follows at least one wait -+ * operation. Used to determine whether it's pending or started. ++ * @kbdev: Pointer to the device ++ * ++ * This function is called on the GPU idle notification. It releases the ++ * Scheduler's PM reference count and sets the flag to indicate to the ++ * MCU state machine that MCU needs to be put in sleep state. + */ -+static void kbasep_csf_print_gpu_sync_op(struct seq_file *file, struct kbase_context *kctx, -+ struct kbase_queue *queue, u32 ringbuff_offset, -+ u64 sync_cmd, enum debugfs_gpu_sync_type type, -+ bool is_64bit, bool follows_wait) ++static void scheduler_pm_idle_before_sleep(struct kbase_device *kbdev) +{ -+ u64 sync_addr = 0, compare_val = 0, live_val = 0; -+ u64 move_cmd; -+ u8 sync_addr_reg, compare_val_reg, wait_condition = 0; -+ int err; ++ unsigned long flags; ++ u32 prev_count; + -+ static const char *const gpu_sync_type_name[] = { "SYNC_WAIT", "SYNC_SET", "SYNC_ADD" }; -+ static const char *const gpu_sync_type_op[] = { -+ "wait", /* This should never be printed, only included to simplify indexing */ -+ "set", "add" -+ }; ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ if (type >= NUM_DEBUGFS_GPU_SYNC_TYPES) { -+ dev_warn(kctx->kbdev->dev, "Expected GPU queue sync type is unknown!"); -+ return; -+ } ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ prev_count = kbdev->csf.scheduler.pm_active_count; ++ if (!WARN_ON(prev_count == 0)) ++ kbdev->csf.scheduler.pm_active_count--; ++ kbdev->pm.backend.gpu_sleep_mode_active = true; ++ kbdev->pm.backend.exit_gpu_sleep_mode = false; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ /* We expect there to be at least 2 preceding MOVE instructions, and -+ * Base will always arrange for the 2 MOVE + SYNC instructions to be -+ * contiguously located, and is therefore never expected to be wrapped -+ * around the ringbuffer boundary. -+ */ -+ if (unlikely(ringbuff_offset < (2 * sizeof(u64)))) { -+ dev_warn(kctx->kbdev->dev, -+ "Unexpected wraparound detected between %s & MOVE instruction", -+ gpu_sync_type_name[type]); -+ return; ++ if (prev_count == 1) { ++ kbase_pm_context_idle(kbdev); ++ /* Invoke the PM state machines again as the change in MCU ++ * desired status, due to the update of scheduler.pm_active_count, ++ * may be missed by the thread that called pm_wait_for_desired_state() ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } ++} ++#endif + -+ /* 1. Get Register identifiers from SYNC_* instruction */ -+ sync_addr_reg = SYNC_SRC0_GET(sync_cmd); -+ compare_val_reg = SYNC_SRC1_GET(sync_cmd); ++static void scheduler_wakeup(struct kbase_device *kbdev, bool kick) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ int ret; + -+ /* 2. Get values from first MOVE command */ -+ ringbuff_offset -= sizeof(u64); -+ move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset); -+ if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg, -+ &sync_addr, &compare_val)) -+ return; ++ lockdep_assert_held(&scheduler->lock); + -+ /* 3. Get values from next MOVE command */ -+ ringbuff_offset -= sizeof(u64); -+ move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset); -+ if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg, -+ &sync_addr, &compare_val)) ++ if ((scheduler->state != SCHED_SUSPENDED) && ++ (scheduler->state != SCHED_SLEEPING)) + return; + -+ /* 4. Get CQS object value */ -+ if (is_64bit) -+ err = kbasep_csf_debugfs_get_cqs_live_u64(kctx, sync_addr, &live_val); -+ else -+ err = kbasep_csf_debugfs_get_cqs_live_u32(kctx, sync_addr, (u32 *)(&live_val)); ++ if (scheduler->state == SCHED_SUSPENDED) { ++ dev_dbg(kbdev->dev, ++ "Re-activating the Scheduler after suspend"); ++ ret = scheduler_pm_active_handle_suspend(kbdev, ++ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); ++ } else { ++#ifdef KBASE_PM_RUNTIME ++ unsigned long flags; + -+ if (err) ++ dev_dbg(kbdev->dev, ++ "Re-activating the Scheduler out of sleep"); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ret = scheduler_pm_active_after_sleep(kbdev, &flags); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++#endif ++ } ++ ++ if (ret) { ++ /* GPUCORE-29850 would add the handling for the case where ++ * Scheduler could not be activated due to system suspend. ++ */ ++ dev_info(kbdev->dev, ++ "Couldn't wakeup Scheduler due to system suspend"); + return; ++ } + -+ /* 5. Print info */ -+ seq_printf(file, "queue:GPU-%u-%u-%u exec:%c cmd:%s ", kctx->id, queue->group->handle, -+ queue->csi_index, queue->enabled && !follows_wait ? 'S' : 'P', -+ gpu_sync_type_name[type]); ++ scheduler->state = SCHED_INACTIVE; ++ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state); + -+ if (queue->group->csg_nr == KBASEP_CSG_NR_INVALID) -+ seq_puts(file, "slot:-"); -+ else -+ seq_printf(file, "slot:%d", (int)queue->group->csg_nr); ++ if (kick) ++ scheduler_enable_tick_timer_nolock(kbdev); ++} + -+ seq_printf(file, " obj:0x%.16llx live_value:0x%.16llx | ", sync_addr, live_val); ++static void scheduler_suspend(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + -+ if (type == DEBUGFS_GPU_SYNC_WAIT) { -+ wait_condition = SYNC_WAIT_CONDITION_GET(sync_cmd); -+ seq_printf(file, "op:%s ", kbasep_csf_sync_get_wait_op_name(wait_condition)); -+ } else -+ seq_printf(file, "op:%s ", gpu_sync_type_op[type]); ++ lockdep_assert_held(&scheduler->lock); + -+ seq_printf(file, "arg_value:0x%.16llx\n", compare_val); ++ if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) { ++ dev_dbg(kbdev->dev, "Suspending the Scheduler"); ++ scheduler_pm_idle(kbdev); ++ scheduler->state = SCHED_SUSPENDED; ++ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); ++ } +} + +/** -+ * kbasep_csf_dump_active_queue_sync_info() - Print GPU command queue sync information. -+ * -+ * @file: seq_file for printing to. -+ * @queue: Address of a GPU command queue to examine. ++ * update_idle_suspended_group_state() - Move the queue group to a non-idle ++ * suspended state. ++ * @group: Pointer to the queue group. + * -+ * This function will iterate through each command in the ring buffer of the given GPU queue from -+ * CS_EXTRACT, and if is a SYNC_* instruction it will attempt to decode the sync operation and -+ * print relevant information to the debugfs file. -+ * This function will stop iterating once the CS_INSERT address is reached by the cursor (i.e. -+ * when there are no more commands to view) or a number of consumed GPU CALL commands have -+ * been observed. ++ * This function is called to change the state of queue group to non-idle ++ * suspended state, if the group was suspended when all the queues bound to it ++ * became empty or when some queues got blocked on a sync wait & others became ++ * empty. The group is also moved to the runnable list from idle wait list in ++ * the latter case. ++ * So the function gets called when a queue is kicked or sync wait condition ++ * gets satisfied. + */ -+static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct kbase_queue *queue) ++static void update_idle_suspended_group_state(struct kbase_queue_group *group) +{ -+ struct kbase_context *kctx; -+ u32 *addr; -+ u64 cs_extract, cs_insert, instr, cursor; -+ bool follows_wait = false; -+ int nr_calls = 0; -+ -+ if (!queue) -+ return; ++ struct kbase_csf_scheduler *scheduler = ++ &group->kctx->kbdev->csf.scheduler; ++ int new_val; + -+ kctx = queue->kctx; ++ lockdep_assert_held(&scheduler->lock); + -+ addr = (u32 *)queue->user_io_addr; -+ cs_insert = addr[CS_INSERT_LO / 4] | ((u64)addr[CS_INSERT_HI / 4] << 32); ++ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) { ++ remove_group_from_idle_wait(group); ++ insert_group_to_runnable(scheduler, group, ++ KBASE_CSF_GROUP_SUSPENDED); ++ } else if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) { ++ group->run_state = KBASE_CSF_GROUP_SUSPENDED; ++ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED, group, ++ group->run_state); + -+ addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); -+ cs_extract = addr[CS_EXTRACT_LO / 4] | ((u64)addr[CS_EXTRACT_HI / 4] << 32); ++ /* If scheduler is not suspended and the given group's ++ * static priority (reflected by the scan_seq_num) is inside ++ * the current tick slot-range, or there are some on_slot ++ * idle groups, schedule an async tock. ++ */ ++ if (scheduler->state != SCHED_SUSPENDED) { ++ unsigned long flags; ++ int n_idle; ++ int n_used; ++ int n_slots = ++ group->kctx->kbdev->csf.global_iface.group_num; + -+ cursor = cs_extract; ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ n_idle = bitmap_weight(scheduler->csg_slots_idle_mask, ++ n_slots); ++ n_used = bitmap_weight(scheduler->csg_inuse_bitmap, ++ n_slots); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, ++ flags); + -+ if (!is_power_of_2(queue->size)) { -+ dev_warn(kctx->kbdev->dev, "GPU queue %u size of %u not a power of 2", -+ queue->csi_index, queue->size); ++ if (n_idle || ++ n_used < scheduler->num_csg_slots_for_tick || ++ group->scan_seq_num < ++ scheduler->num_csg_slots_for_tick) ++ schedule_in_cycle(group, true); ++ } ++ } else + return; -+ } -+ -+ while ((cursor < cs_insert) && (nr_calls < MAX_NR_GPU_CALLS)) { -+ bool instr_is_64_bit = false; -+ /* Calculate offset into ringbuffer from the absolute cursor, -+ * by finding the remainder of the cursor divided by the -+ * ringbuffer size. The ringbuffer size is guaranteed to be -+ * a power of 2, so the remainder can be calculated without an -+ * explicit modulo. queue->size - 1 is the ringbuffer mask. -+ */ -+ u32 cursor_ringbuff_offset = (u32)(cursor & (queue->size - 1)); + -+ /* Find instruction that cursor is currently on */ -+ instr = kbasep_csf_read_ringbuffer_value(queue, cursor_ringbuff_offset); ++ new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, ++ new_val); ++} + -+ switch (INSTR_OPCODE_GET(instr)) { -+ case GPU_CSF_SYNC_ADD64_OPCODE: -+ case GPU_CSF_SYNC_SET64_OPCODE: -+ case GPU_CSF_SYNC_WAIT64_OPCODE: -+ instr_is_64_bit = true; -+ default: -+ break; -+ } ++int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group) ++{ ++ struct kbase_csf_scheduler *scheduler = ++ &group->kctx->kbdev->csf.scheduler; ++ int slot_num = group->csg_nr; + -+ switch (INSTR_OPCODE_GET(instr)) { -+ case GPU_CSF_SYNC_ADD_OPCODE: -+ case GPU_CSF_SYNC_ADD64_OPCODE: -+ kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset, -+ instr, DEBUGFS_GPU_SYNC_ADD, instr_is_64_bit, -+ follows_wait); -+ break; -+ case GPU_CSF_SYNC_SET_OPCODE: -+ case GPU_CSF_SYNC_SET64_OPCODE: -+ kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset, -+ instr, DEBUGFS_GPU_SYNC_SET, instr_is_64_bit, -+ follows_wait); -+ break; -+ case GPU_CSF_SYNC_WAIT_OPCODE: -+ case GPU_CSF_SYNC_WAIT64_OPCODE: -+ kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset, -+ instr, DEBUGFS_GPU_SYNC_WAIT, instr_is_64_bit, -+ follows_wait); -+ follows_wait = true; /* Future commands will follow at least one wait */ -+ break; -+ case GPU_CSF_CALL_OPCODE: -+ nr_calls++; -+ /* Fallthrough */ -+ default: -+ /* Unrecognized command, skip past it */ -+ break; -+ } ++ lockdep_assert_held(&scheduler->interrupt_lock); + -+ cursor += sizeof(u64); ++ if (slot_num >= 0) { ++ if (WARN_ON(scheduler->csg_slots[slot_num].resident_group != ++ group)) ++ return -1; + } ++ ++ return slot_num; +} + -+/** -+ * kbasep_csf_dump_active_group_sync_state() - Prints SYNC commands in all GPU queues of -+ * the provided queue group. -+ * -+ * @file: seq_file for printing to. -+ * @group: Address of a GPU command group to iterate through. -+ * -+ * This function will iterate through each queue in the provided GPU queue group and -+ * print its SYNC related commands. -+ */ -+static void kbasep_csf_dump_active_group_sync_state(struct seq_file *file, -+ struct kbase_queue_group *const group) ++int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group) +{ -+ struct kbase_context *kctx = file->private; -+ unsigned int i; ++ struct kbase_csf_scheduler *scheduler = ++ &group->kctx->kbdev->csf.scheduler; ++ unsigned long flags; ++ int slot_num; + -+ seq_printf(file, "GPU queues for group %u (slot %d) of ctx %d_%d\n", group->handle, -+ group->csg_nr, kctx->tgid, kctx->id); ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ slot_num = kbase_csf_scheduler_group_get_slot_locked(group); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + -+ for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) -+ kbasep_csf_dump_active_queue_sync_info(file, group->bound_queues[i]); ++ return slot_num; +} + -+/** -+ * kbasep_csf_sync_gpu_debugfs_show() - Print CSF GPU queue sync info ++/* kbasep_csf_scheduler_group_is_on_slot_locked() - Check if CSG is on slot. + * -+ * @file: The seq_file for printing to. ++ * @group: GPU queue group to be checked + * -+ * Return: Negative error code or 0 on success. ++ * This function needs to be called with scheduler's lock held ++ * ++ * Return: true if @group is on slot. + */ -+static int kbasep_csf_sync_gpu_debugfs_show(struct seq_file *file) ++static bool kbasep_csf_scheduler_group_is_on_slot_locked( ++ struct kbase_queue_group *group) +{ -+ u32 gr; -+ struct kbase_context *kctx = file->private; -+ struct kbase_device *kbdev; -+ -+ if (WARN_ON(!kctx)) -+ return -EINVAL; ++ struct kbase_csf_scheduler *scheduler = ++ &group->kctx->kbdev->csf.scheduler; ++ int slot_num = group->csg_nr; + -+ kbdev = kctx->kbdev; -+ kbase_csf_scheduler_lock(kbdev); -+ kbase_csf_debugfs_update_active_groups_status(kbdev); ++ lockdep_assert_held(&scheduler->lock); + -+ for (gr = 0; gr < kbdev->csf.global_iface.group_num; gr++) { -+ struct kbase_queue_group *const group = -+ kbdev->csf.scheduler.csg_slots[gr].resident_group; -+ if (!group || group->kctx != kctx) -+ continue; -+ kbasep_csf_dump_active_group_sync_state(file, group); ++ if (slot_num >= 0) { ++ if (!WARN_ON(scheduler->csg_slots[slot_num].resident_group != ++ group)) ++ return true; + } + -+ kbase_csf_scheduler_unlock(kbdev); -+ return 0; ++ return false; +} + -+/** -+ * kbasep_csf_sync_debugfs_show() - Print CSF queue sync information -+ * -+ * @file: The seq_file for printing to. -+ * @data: The debugfs dentry private data, a pointer to kbase_context. -+ * -+ * Return: Negative error code or 0 on success. -+ */ -+static int kbasep_csf_sync_debugfs_show(struct seq_file *file, void *data) ++bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev, ++ struct kbase_queue_group *group) +{ -+ seq_printf(file, "MALI_CSF_SYNC_DEBUGFS_VERSION: v%u\n", MALI_CSF_SYNC_DEBUGFS_VERSION); ++ struct kbase_csf_scheduler *scheduler = ++ &group->kctx->kbdev->csf.scheduler; ++ int slot_num = group->csg_nr; + -+ kbasep_csf_sync_kcpu_debugfs_show(file); -+ kbasep_csf_sync_gpu_debugfs_show(file); -+ return 0; -+} ++ lockdep_assert_held(&scheduler->interrupt_lock); + -+static int kbasep_csf_sync_debugfs_open(struct inode *in, struct file *file) -+{ -+ return single_open(file, kbasep_csf_sync_debugfs_show, in->i_private); -+} ++ if (WARN_ON(slot_num < 0)) ++ return false; + -+static const struct file_operations kbasep_csf_sync_debugfs_fops = { -+ .open = kbasep_csf_sync_debugfs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; ++ return test_bit(slot_num, scheduler->csgs_events_enable_mask); ++} + -+/** -+ * kbase_csf_sync_debugfs_init() - Initialise debugfs file. -+ * -+ * @kctx: Kernel context pointer. -+ */ -+void kbase_csf_sync_debugfs_init(struct kbase_context *kctx) ++struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot( ++ struct kbase_device *kbdev, int slot) +{ -+ struct dentry *file; -+ const mode_t mode = 0444; -+ -+ if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) -+ return; -+ -+ file = debugfs_create_file("csf_sync", mode, kctx->kctx_dentry, kctx, -+ &kbasep_csf_sync_debugfs_fops); ++ lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); + -+ if (IS_ERR_OR_NULL(file)) -+ dev_warn(kctx->kbdev->dev, "Unable to create CSF Sync debugfs entry"); ++ return kbdev->csf.scheduler.csg_slots[slot].resident_group; +} + -+#else -+/* -+ * Stub functions for when debugfs is disabled -+ */ -+void kbase_csf_sync_debugfs_init(struct kbase_context *kctx) ++static int halt_stream_sync(struct kbase_queue *queue) +{ -+} ++ struct kbase_queue_group *group = queue->group; ++ struct kbase_device *kbdev = queue->kctx->kbdev; ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ struct kbase_csf_cmd_stream_group_info *ginfo; ++ struct kbase_csf_cmd_stream_info *stream; ++ int csi_index = queue->csi_index; ++ long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ unsigned long flags; + -+#endif /* CONFIG_DEBUG_FS */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h -new file mode 100644 -index 000000000..177e15d85 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h -@@ -0,0 +1,37 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ if (WARN_ON(!group) || ++ WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) ++ return -EINVAL; + -+#ifndef _KBASE_CSF_SYNC_DEBUGFS_H_ -+#define _KBASE_CSF_SYNC_DEBUGFS_H_ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ginfo = &global_iface->groups[group->csg_nr]; ++ stream = &ginfo->streams[csi_index]; + -+/* Forward declaration */ -+struct kbase_context; ++ if (CS_REQ_STATE_GET(kbase_csf_firmware_cs_input_read(stream, CS_REQ)) == ++ CS_REQ_STATE_START) { + -+#define MALI_CSF_SYNC_DEBUGFS_VERSION 0 ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK)) ++ == CS_ACK_STATE_START), remaining); + -+/** -+ * kbase_csf_sync_debugfs_init() - Create a debugfs entry for CSF queue sync info -+ * -+ * @kctx: The kbase_context for which to create the debugfs entry -+ */ -+void kbase_csf_sync_debugfs_init(struct kbase_context *kctx); ++ if (!remaining) { ++ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to start on csi %d bound to group %d on slot %d", ++ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, ++ csi_index, group->handle, group->csg_nr); ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(kbdev); + -+#endif /* _KBASE_CSF_SYNC_DEBUGFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c -new file mode 100644 -index 000000000..8072a8bd2 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c -@@ -0,0 +1,1372 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ + -+#include ++ return -ETIMEDOUT; ++ } + -+#include "mali_kbase_csf_tiler_heap.h" -+#include "mali_kbase_csf_tiler_heap_def.h" -+#include "mali_kbase_csf_heap_context_alloc.h" ++ remaining = ++ kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ } + -+/* Tiler heap shrink stop limit for maintaining a minimum number of chunks */ -+#define HEAP_SHRINK_STOP_LIMIT (1) ++ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); ++ /* Set state to STOP */ ++ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP, ++ CS_REQ_STATE_MASK); + -+/** -+ * struct kbase_csf_gpu_buffer_heap - A gpu buffer object specific to tiler heap -+ * -+ * @cdsbp_0: Descriptor_type and buffer_type -+ * @size: The size of the current heap chunk -+ * @pointer: Pointer to the current heap chunk -+ * @low_pointer: Pointer to low end of current heap chunk -+ * @high_pointer: Pointer to high end of current heap chunk -+ */ -+struct kbase_csf_gpu_buffer_heap { -+ u32 cdsbp_0; -+ u32 size; -+ u64 pointer; -+ u64 low_pointer; -+ u64 high_pointer; -+} __packed; ++ kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true); ++ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); + -+/** -+ * encode_chunk_ptr - Encode the address and size of a chunk as an integer. -+ * -+ * @chunk_size: Size of a tiler heap chunk, in bytes. -+ * @chunk_addr: GPU virtual address of the same tiler heap chunk. -+ * -+ * The size and address of the next chunk in a list are packed into a single -+ * 64-bit value for storage in a chunk's header. This function returns that -+ * value. -+ * -+ * Return: Next chunk pointer suitable for writing into a chunk header. -+ */ -+static u64 encode_chunk_ptr(u32 const chunk_size, u64 const chunk_addr) -+{ -+ u64 encoded_size, encoded_addr; ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQ, group, queue, 0u); + -+ WARN_ON(chunk_size & ~CHUNK_SIZE_MASK); -+ WARN_ON(chunk_addr & ~CHUNK_ADDR_MASK); ++ /* Timed wait */ ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK)) ++ == CS_ACK_STATE_STOP), remaining); + -+ encoded_size = -+ (u64)(chunk_size >> CHUNK_HDR_NEXT_SIZE_ENCODE_SHIFT) << -+ CHUNK_HDR_NEXT_SIZE_POS; ++ if (!remaining) { ++ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to stop on csi %d bound to group %d on slot %d", ++ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, ++ queue->csi_index, group->handle, group->csg_nr); + -+ encoded_addr = -+ (chunk_addr >> CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT) << -+ CHUNK_HDR_NEXT_ADDR_POS; ++ /* TODO GPUCORE-25328: The CSG can't be terminated, the GPU ++ * will be reset as a work-around. ++ */ ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(kbdev); + -+ return (encoded_size & CHUNK_HDR_NEXT_SIZE_MASK) | -+ (encoded_addr & CHUNK_HDR_NEXT_ADDR_MASK); ++ ++ } ++ return (remaining) ? 0 : -ETIMEDOUT; +} + -+/** -+ * get_last_chunk - Get the last chunk of a tiler heap -+ * -+ * @heap: Pointer to the tiler heap. -+ * -+ * Return: The address of the most recently-linked chunk, or NULL if none. -+ */ -+static struct kbase_csf_tiler_heap_chunk *get_last_chunk( -+ struct kbase_csf_tiler_heap *const heap) ++static bool can_halt_stream(struct kbase_device *kbdev, ++ struct kbase_queue_group *group) +{ -+ if (list_empty(&heap->chunks_list)) -+ return NULL; ++ struct kbase_csf_csg_slot *const csg_slot = ++ kbdev->csf.scheduler.csg_slots; ++ unsigned long flags; ++ bool can_halt; ++ int slot; + -+ return list_last_entry(&heap->chunks_list, -+ struct kbase_csf_tiler_heap_chunk, link); ++ if (!queue_group_scheduled(group)) ++ return true; ++ ++ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); ++ slot = kbase_csf_scheduler_group_get_slot_locked(group); ++ can_halt = (slot >= 0) && ++ (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING); ++ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, ++ flags); ++ ++ return can_halt; +} + +/** -+ * remove_external_chunk_mappings - Remove external mappings from a chunk that -+ * is being transitioned to the tiler heap -+ * memory system. ++ * sched_halt_stream() - Stop a GPU queue when its queue group is not running ++ * on a CSG slot. ++ * @queue: Pointer to the GPU queue to stop. + * -+ * @kctx: kbase context the chunk belongs to. -+ * @chunk: The chunk whose external mappings are going to be removed. ++ * This function handles stopping gpu queues for groups that are either not on ++ * a CSG slot or are on the slot but undergoing transition to ++ * resume or suspend states. ++ * It waits until the queue group is scheduled on a slot and starts running, ++ * which is needed as groups that were suspended may need to resume all queues ++ * that were enabled and running at the time of suspension. + * -+ * This function marks the region as DONT NEED. Along with NO_USER_FREE, this indicates -+ * that the VA region is owned by the tiler heap and could potentially be shrunk at any time. Other -+ * parts of kbase outside of tiler heap management should not take references on its physical -+ * pages, and should not modify them. ++ * Return: 0 on success, or negative on failure. + */ -+static void remove_external_chunk_mappings(struct kbase_context *const kctx, -+ struct kbase_csf_tiler_heap_chunk *chunk) ++static int sched_halt_stream(struct kbase_queue *queue) +{ -+ lockdep_assert_held(&kctx->reg_lock); ++ struct kbase_queue_group *group = queue->group; ++ struct kbase_device *kbdev = queue->kctx->kbdev; ++ struct kbase_csf_scheduler *const scheduler = ++ &kbdev->csf.scheduler; ++ struct kbase_csf_csg_slot *const csg_slot = ++ kbdev->csf.scheduler.csg_slots; ++ bool retry_needed = false; ++ bool retried = false; ++ long remaining; ++ int slot; ++ int err = 0; ++ const u32 group_schedule_timeout = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT); + -+ if (chunk->region->cpu_alloc != NULL) { -+ kbase_mem_shrink_cpu_mapping(kctx, chunk->region, 0, -+ chunk->region->cpu_alloc->nents); ++ if (WARN_ON(!group)) ++ return -EINVAL; ++ ++ lockdep_assert_held(&queue->kctx->csf.lock); ++ lockdep_assert_held(&scheduler->lock); ++ ++ slot = kbase_csf_scheduler_group_get_slot(group); ++ ++ if (slot >= 0) { ++ WARN_ON(atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING); ++ ++ if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) { ++ dev_dbg(kbdev->dev, "Stopping a queue on csi %d when Group-%d is in under transition to running state", ++ queue->csi_index, group->handle); ++ retry_needed = true; ++ } + } -+#if !defined(CONFIG_MALI_VECTOR_DUMP) -+ chunk->region->flags |= KBASE_REG_DONT_NEED; -+#endif ++retry: ++ /* Update the group state so that it can get scheduled soon */ ++ update_idle_suspended_group_state(group); + -+ dev_dbg(kctx->kbdev->dev, "Removed external mappings from chunk 0x%llX", chunk->gpu_va); -+} ++ mutex_unlock(&scheduler->lock); + -+/** -+ * link_chunk - Link a chunk into a tiler heap -+ * -+ * @heap: Pointer to the tiler heap. -+ * @chunk: Pointer to the heap chunk to be linked. -+ * -+ * Unless the @chunk is the first in the kernel's list of chunks belonging to -+ * a given tiler heap, this function stores the size and address of the @chunk -+ * in the header of the preceding chunk. This requires the GPU memory region -+ * containing the header to be mapped temporarily, which can fail. -+ * -+ * Return: 0 if successful or a negative error code on failure. -+ */ -+static int link_chunk(struct kbase_csf_tiler_heap *const heap, -+ struct kbase_csf_tiler_heap_chunk *const chunk) -+{ -+ struct kbase_csf_tiler_heap_chunk *const prev = get_last_chunk(heap); ++ /* This function is called when the queue group is either not on a CSG ++ * slot or is on the slot but undergoing transition. ++ * ++ * To stop the queue, the function needs to wait either for the queue ++ * group to be assigned a CSG slot (and that slot has to reach the ++ * running state) or for the eviction of the queue group from the ++ * scheduler's list. ++ * ++ * In order to evaluate the latter condition, the function doesn't ++ * really need to lock the scheduler, as any update to the run_state ++ * of the queue group by sched_evict_group() would be visible due ++ * to implicit barriers provided by the kernel waitqueue macros. ++ * ++ * The group pointer cannot disappear meanwhile, as the high level ++ * CSF context is locked. Therefore, the scheduler would be ++ * the only one to update the run_state of the group. ++ */ ++ remaining = wait_event_timeout( ++ kbdev->csf.event_wait, can_halt_stream(kbdev, group), ++ kbase_csf_timeout_in_jiffies(group_schedule_timeout)); + -+ if (prev) { -+ struct kbase_context *const kctx = heap->kctx; -+ u64 *prev_hdr = prev->map.addr; ++ mutex_lock(&scheduler->lock); + -+ WARN((prev->region->flags & KBASE_REG_CPU_CACHED), -+ "Cannot support CPU cached chunks without sync operations"); ++ if (remaining && queue_group_scheduled_locked(group)) { ++ slot = kbase_csf_scheduler_group_get_slot(group); + -+ *prev_hdr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va); ++ /* If the group is still on slot and slot is in running state ++ * then explicitly stop the CSI of the ++ * queue. Otherwise there are different cases to consider ++ * ++ * - If the queue group was already undergoing transition to ++ * resume/start state when this function was entered then it ++ * would not have disabled the CSI of the ++ * queue being stopped and the previous wait would have ended ++ * once the slot was in a running state with CS ++ * interface still enabled. ++ * Now the group is going through another transition either ++ * to a suspend state or to a resume state (it could have ++ * been suspended before the scheduler lock was grabbed). ++ * In both scenarios need to wait again for the group to ++ * come on a slot and that slot to reach the running state, ++ * as that would guarantee that firmware will observe the ++ * CSI as disabled. ++ * ++ * - If the queue group was either off the slot or was ++ * undergoing transition to suspend state on entering this ++ * function, then the group would have been resumed with the ++ * queue's CSI in disabled state. ++ * So now if the group is undergoing another transition ++ * (after the resume) then just need to wait for the state ++ * bits in the ACK register of CSI to be ++ * set to STOP value. It is expected that firmware will ++ * process the stop/disable request of the CS ++ * interface after resuming the group before it processes ++ * another state change request of the group. ++ */ ++ if ((slot >= 0) && ++ (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) { ++ err = halt_stream_sync(queue); ++ } else if (retry_needed && !retried) { ++ retried = true; ++ goto retry; ++ } else if (slot >= 0) { ++ struct kbase_csf_global_iface *global_iface = ++ &kbdev->csf.global_iface; ++ struct kbase_csf_cmd_stream_group_info *ginfo = ++ &global_iface->groups[slot]; ++ struct kbase_csf_cmd_stream_info *stream = ++ &ginfo->streams[queue->csi_index]; ++ u32 cs_req = ++ kbase_csf_firmware_cs_input_read(stream, CS_REQ); + -+ dev_dbg(kctx->kbdev->dev, -+ "Linked tiler heap chunks, 0x%llX -> 0x%llX\n", -+ prev->gpu_va, chunk->gpu_va); ++ if (!WARN_ON(CS_REQ_STATE_GET(cs_req) != ++ CS_REQ_STATE_STOP)) { ++ /* Timed wait */ ++ remaining = wait_event_timeout( ++ kbdev->csf.event_wait, ++ (CS_ACK_STATE_GET( ++ kbase_csf_firmware_cs_output( ++ stream, CS_ACK)) == ++ CS_ACK_STATE_STOP), ++ kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms)); ++ ++ if (!remaining) { ++ dev_warn(kbdev->dev, ++ "[%llu] Timeout (%d ms) waiting for queue stop ack on csi %d bound to group %d on slot %d", ++ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, ++ queue->csi_index, ++ group->handle, group->csg_nr); ++ ++ ++ err = -ETIMEDOUT; ++ } ++ } ++ } ++ } else if (!remaining) { ++ dev_warn(kbdev->dev, "[%llu] Group-%d failed to get a slot for stopping the queue on csi %d (timeout %d ms)", ++ kbase_backend_get_cycle_cnt(kbdev), ++ group->handle, queue->csi_index, ++ group_schedule_timeout); ++ ++ ++ err = -ETIMEDOUT; + } + -+ return 0; ++ return err; +} + +/** -+ * init_chunk - Initialize and link a tiler heap chunk -+ * -+ * @heap: Pointer to the tiler heap. -+ * @chunk: Pointer to the heap chunk to be initialized and linked. -+ * @link_with_prev: Flag to indicate if the new chunk needs to be linked with -+ * the previously allocated chunk. ++ * scheduler_activate_on_queue_stop() - Activate the Scheduler when the GPU ++ * queue needs to be stopped. + * -+ * Zero-initialize a new chunk's header (including its pointer to the next -+ * chunk, which doesn't exist yet) and then update the previous chunk's -+ * header to link the new chunk into the chunk list. ++ * @queue: Pointer the GPU command queue + * -+ * Return: 0 if successful or a negative error code on failure. ++ * This function is called when the CSI to which GPU queue is bound needs to ++ * be stopped. For that the corresponding queue group needs to be resident on ++ * the CSG slot and MCU firmware should be running. So this function makes the ++ * Scheduler exit the sleeping or suspended state. + */ -+static int init_chunk(struct kbase_csf_tiler_heap *const heap, -+ struct kbase_csf_tiler_heap_chunk *const chunk, bool link_with_prev) ++static void scheduler_activate_on_queue_stop(struct kbase_queue *queue) +{ -+ int err = 0; -+ u64 *chunk_hdr; -+ struct kbase_context *const kctx = heap->kctx; ++ struct kbase_device *kbdev = queue->kctx->kbdev; + -+ lockdep_assert_held(&kctx->csf.tiler_heaps.lock); ++ scheduler_wakeup(kbdev, true); + -+ if (unlikely(chunk->gpu_va & ~CHUNK_ADDR_MASK)) { -+ dev_err(kctx->kbdev->dev, -+ "Tiler heap chunk address is unusable\n"); -+ return -EINVAL; ++ /* Wait for MCU firmware to start running */ ++ if (kbase_csf_scheduler_wait_mcu_active(kbdev)) { ++ dev_warn( ++ kbdev->dev, ++ "[%llu] Wait for MCU active failed for stopping queue on csi %d bound to group %d of context %d_%d on slot %d", ++ kbase_backend_get_cycle_cnt(kbdev), ++ queue->csi_index, queue->group->handle, ++ queue->kctx->tgid, queue->kctx->id, ++ queue->group->csg_nr); + } ++} + -+ WARN((chunk->region->flags & KBASE_REG_CPU_CACHED), -+ "Cannot support CPU cached chunks without sync operations"); -+ chunk_hdr = chunk->map.addr; -+ if (WARN(chunk->map.size < CHUNK_HDR_SIZE, -+ "Tiler chunk kernel mapping was not large enough for zero-init")) { ++int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue) ++{ ++ struct kbase_device *kbdev = queue->kctx->kbdev; ++ struct kbase_queue_group *group = queue->group; ++ bool const cs_enabled = queue->enabled; ++ int err = 0; ++ ++ if (WARN_ON(!group)) + return -EINVAL; -+ } + -+ memset(chunk_hdr, 0, CHUNK_HDR_SIZE); -+ INIT_LIST_HEAD(&chunk->link); ++ kbase_reset_gpu_assert_failed_or_prevented(kbdev); ++ lockdep_assert_held(&queue->kctx->csf.lock); ++ mutex_lock(&kbdev->csf.scheduler.lock); + -+ if (link_with_prev) -+ err = link_chunk(heap, chunk); ++ queue->enabled = false; ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP, group, queue, cs_enabled); + -+ if (unlikely(err)) { -+ dev_err(kctx->kbdev->dev, "Failed to link a chunk to a tiler heap\n"); -+ return -EINVAL; -+ } ++ if (cs_enabled && queue_group_scheduled_locked(group)) { ++ struct kbase_csf_csg_slot *const csg_slot = ++ kbdev->csf.scheduler.csg_slots; ++ int slot = kbase_csf_scheduler_group_get_slot(group); + -+ list_add_tail(&chunk->link, &heap->chunks_list); -+ heap->chunk_count++; ++ /* Since the group needs to be resumed in order to stop the queue, ++ * check if GPU needs to be powered up. ++ */ ++ scheduler_activate_on_queue_stop(queue); ++ ++ if ((slot >= 0) && ++ (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) ++ err = halt_stream_sync(queue); ++ else ++ err = sched_halt_stream(queue); ++ ++ unassign_user_doorbell_from_queue(kbdev, queue); ++ kbase_csf_mcu_shared_drop_stopped_queue(kbdev, queue); ++ } + ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_STOP, group, queue, group->run_state); + return err; +} + -+/** -+ * remove_unlinked_chunk - Remove a chunk that is not currently linked into a -+ * heap. -+ * -+ * @kctx: Kbase context that was used to allocate the memory. -+ * @chunk: Chunk that has been allocated, but not linked into a heap. -+ */ -+static void remove_unlinked_chunk(struct kbase_context *kctx, -+ struct kbase_csf_tiler_heap_chunk *chunk) ++static void update_hw_active(struct kbase_queue *queue, bool active) +{ -+ if (WARN_ON(!list_empty(&chunk->link))) -+ return; ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ if (queue && queue->enabled) { ++ u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); + -+ kbase_gpu_vm_lock(kctx); -+ kbase_vunmap(kctx, &chunk->map); -+ /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT -+ * regions), and so we must clear that flag too before freeing. -+ * For "no user free count", we check that the count is 1 as it is a shrinkable region; -+ * no other code part within kbase can take a reference to it. -+ */ -+ WARN_ON(atomic_read(&chunk->region->no_user_free_count) > 1); -+ kbase_va_region_no_user_free_dec(chunk->region); -+#if !defined(CONFIG_MALI_VECTOR_DUMP) -+ chunk->region->flags &= ~KBASE_REG_DONT_NEED; ++ output_addr[CS_ACTIVE / sizeof(u32)] = active; ++ } ++#else ++ CSTD_UNUSED(queue); ++ CSTD_UNUSED(active); +#endif -+ kbase_mem_free_region(kctx, chunk->region); -+ kbase_gpu_vm_unlock(kctx); ++} + -+ kfree(chunk); ++static void program_cs_extract_init(struct kbase_queue *queue) ++{ ++ u64 *input_addr = (u64 *)queue->user_io_addr; ++ u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE); ++ ++ input_addr[CS_EXTRACT_INIT_LO / sizeof(u64)] = ++ output_addr[CS_EXTRACT_LO / sizeof(u64)]; +} + -+/** -+ * alloc_new_chunk - Allocate new chunk metadata for the tiler heap, reserve a fully backed VA -+ * region for the chunk, and provide a kernel mapping. -+ * @kctx: kbase context with which the chunk will be linked -+ * @chunk_size: the size of the chunk from the corresponding heap -+ * -+ * Allocate the chunk tracking metadata and a corresponding fully backed VA region for the -+ * chunk. The kernel may need to invoke the reclaim path while trying to fulfill the allocation, so -+ * we cannot hold any lock that would be held in the shrinker paths (JIT evict lock or tiler heap -+ * lock). -+ * -+ * Since the chunk may have its physical backing removed, to prevent use-after-free scenarios we -+ * ensure that it is protected from being mapped by other parts of kbase. -+ * -+ * The chunk's GPU memory can be accessed via its 'map' member, but should only be done so by the -+ * shrinker path, as it may be otherwise shrunk at any time. -+ * -+ * Return: pointer to kbase_csf_tiler_heap_chunk on success or a NULL pointer -+ * on failure -+ */ -+static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *kctx, -+ u64 chunk_size) ++static void program_cs_trace_cfg(struct kbase_csf_cmd_stream_info *stream, ++ struct kbase_queue *queue) +{ -+ u64 nr_pages = PFN_UP(chunk_size); -+ u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR | -+ BASEP_MEM_NO_USER_FREE | BASE_MEM_COHERENT_LOCAL | BASE_MEM_PROT_CPU_RD; -+ struct kbase_csf_tiler_heap_chunk *chunk = NULL; -+ /* The chunk kernel mapping needs to be large enough to: -+ * - initially zero the CHUNK_HDR_SIZE area -+ * - on shrinking, access the NEXT_CHUNK_ADDR_SIZE area -+ */ -+ const size_t chunk_kernel_map_size = max(CHUNK_HDR_SIZE, NEXT_CHUNK_ADDR_SIZE); ++ struct kbase_device *kbdev = queue->kctx->kbdev; ++ u32 const glb_version = kbdev->csf.global_iface.version; + -+ /* Calls to this function are inherently synchronous, with respect to -+ * MMU operations. ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ /* If cs_trace_command not supported, nothing to program */ ++ if (glb_version < kbase_csf_interface_version(1, 1, 0)) ++ return; ++ ++ /* Program for cs_trace if enabled. In the current arrangement, it is ++ * possible for the context to enable the cs_trace after some queues ++ * has been registered in cs_trace in disabled state. This is tracked by ++ * the queue's trace buffer base address, which had been validated at the ++ * queue's register_ex call. + */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; -+ flags |= kbase_mem_group_id_set(kctx->jit_group_id); ++ if (kbase_csf_scheduler_queue_has_trace(queue)) { ++ u32 cs_cfg = CS_INSTR_CONFIG_JASID_SET( ++ queue->trace_cfg, queue->kctx->as_nr); + -+ chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); -+ if (unlikely(!chunk)) { -+ dev_err(kctx->kbdev->dev, -+ "No kernel memory for a new tiler heap chunk\n"); -+ return NULL; -+ } ++ kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, cs_cfg); ++ kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, ++ queue->trace_buffer_size); + -+ /* Allocate GPU memory for the new chunk. */ -+ chunk->region = -+ kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, &chunk->gpu_va, mmu_sync_info); ++ kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_LO, ++ queue->trace_buffer_base & U32_MAX); ++ kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_HI, ++ queue->trace_buffer_base >> 32); + -+ if (unlikely(!chunk->region)) { -+ dev_err(kctx->kbdev->dev, "Failed to allocate a tiler heap chunk!\n"); -+ goto unroll_chunk; ++ kbase_csf_firmware_cs_input( ++ stream, CS_INSTR_BUFFER_OFFSET_POINTER_LO, ++ queue->trace_offset_ptr & U32_MAX); ++ kbase_csf_firmware_cs_input( ++ stream, CS_INSTR_BUFFER_OFFSET_POINTER_HI, ++ queue->trace_offset_ptr >> 32); ++ } else { ++ /* Place the configuration to the disabled condition */ ++ kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, 0); ++ kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, 0); + } ++} + -+ kbase_gpu_vm_lock(kctx); ++static void program_cs(struct kbase_device *kbdev, ++ struct kbase_queue *queue, bool ring_csg_doorbell) ++{ ++ struct kbase_queue_group *group = queue->group; ++ struct kbase_csf_cmd_stream_group_info *ginfo; ++ struct kbase_csf_cmd_stream_info *stream; ++ int csi_index = queue->csi_index; ++ unsigned long flags; ++ u64 user_input; ++ u64 user_output; + -+ /* Some checks done here as NO_USER_FREE still allows such things to be made -+ * whilst we had dropped the region lock -+ */ -+ if (unlikely(atomic_read(&chunk->region->gpu_alloc->kernel_mappings) > 0)) { -+ dev_err(kctx->kbdev->dev, "Chunk region has active kernel mappings!\n"); -+ goto unroll_region; -+ } ++ if (WARN_ON(!group)) ++ return; + -+ /* There is a race condition with regard to KBASE_REG_DONT_NEED, where another -+ * thread can have the "no user free" refcount increased between kbase_mem_alloc -+ * and kbase_gpu_vm_lock (above) and before KBASE_REG_DONT_NEED is set by -+ * remove_external_chunk_mappings (below). -+ * -+ * It should be fine and not a security risk if we let the region leak till -+ * region tracker termination in such a case. -+ */ -+ if (unlikely(atomic_read(&chunk->region->no_user_free_count) > 1)) { -+ dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_count > 1!\n"); -+ goto unroll_region; -+ } ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ /* Whilst we can be sure of a number of other restrictions due to BASEP_MEM_NO_USER_FREE -+ * being requested, it's useful to document in code what those restrictions are, and ensure -+ * they remain in place in future. -+ */ -+ if (WARN(!chunk->region->gpu_alloc, -+ "NO_USER_FREE chunks should not have had their alloc freed")) { -+ goto unroll_region; -+ } ++ if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) ++ return; + -+ if (WARN(chunk->region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE, -+ "NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) { -+ goto unroll_region; -+ } ++ ginfo = &kbdev->csf.global_iface.groups[group->csg_nr]; + -+ if (WARN((chunk->region->flags & KBASE_REG_ACTIVE_JIT_ALLOC), -+ "NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) { -+ goto unroll_region; -+ } ++ if (WARN_ON(csi_index < 0) || ++ WARN_ON(csi_index >= ginfo->stream_num)) ++ return; + -+ if (WARN((chunk->region->flags & KBASE_REG_DONT_NEED), -+ "NO_USER_FREE chunks should not have been made ephemeral")) { -+ goto unroll_region; -+ } ++ if (queue->enabled) { ++ assign_user_doorbell_to_queue(kbdev, queue); ++ if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID) ++ return; + -+ if (WARN(atomic_read(&chunk->region->cpu_alloc->gpu_mappings) > 1, -+ "NO_USER_FREE chunks should not have been aliased")) { -+ goto unroll_region; ++ WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr); + } + -+ if (unlikely(!kbase_vmap_reg(kctx, chunk->region, chunk->gpu_va, chunk_kernel_map_size, -+ (KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &chunk->map, -+ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING))) { -+ dev_err(kctx->kbdev->dev, "Failed to map chunk header for shrinking!\n"); -+ goto unroll_region; -+ } ++ if (queue->enabled && queue_group_suspended_locked(group)) ++ program_cs_extract_init(queue); + -+ remove_external_chunk_mappings(kctx, chunk); -+ kbase_gpu_vm_unlock(kctx); ++ stream = &ginfo->streams[csi_index]; + -+ /* If page migration is enabled, we don't want to migrate tiler heap pages. -+ * This does not change if the constituent pages are already marked as isolated. -+ */ -+ if (kbase_page_migration_enabled) -+ kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE); ++ kbase_csf_firmware_cs_input(stream, CS_BASE_LO, ++ queue->base_addr & 0xFFFFFFFF); ++ kbase_csf_firmware_cs_input(stream, CS_BASE_HI, ++ queue->base_addr >> 32); ++ kbase_csf_firmware_cs_input(stream, CS_SIZE, ++ queue->size); + -+ return chunk; ++ user_input = queue->user_io_gpu_va; ++ WARN_ONCE(!user_input && queue->enabled, "Enabled queue should have a valid gpu_va"); + -+unroll_region: -+ /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT -+ * regions), and so we must clear that flag too before freeing. -+ */ -+ kbase_va_region_no_user_free_dec(chunk->region); -+#if !defined(CONFIG_MALI_VECTOR_DUMP) -+ chunk->region->flags &= ~KBASE_REG_DONT_NEED; -+#endif -+ kbase_mem_free_region(kctx, chunk->region); -+ kbase_gpu_vm_unlock(kctx); -+unroll_chunk: -+ kfree(chunk); -+ return NULL; -+} ++ kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, user_input & 0xFFFFFFFF); ++ kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, user_input >> 32); + -+/** -+ * create_chunk - Create a tiler heap chunk -+ * -+ * @heap: Pointer to the tiler heap for which to allocate memory. -+ * -+ * This function allocates a chunk of memory for a tiler heap, adds it to the -+ * the list of chunks associated with that heap both on the host side and in GPU -+ * memory. -+ * -+ * Return: 0 if successful or a negative error code on failure. -+ */ -+static int create_chunk(struct kbase_csf_tiler_heap *const heap) -+{ -+ int err = 0; -+ struct kbase_csf_tiler_heap_chunk *chunk = NULL; ++ user_output = user_input + PAGE_SIZE; ++ kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, user_output & 0xFFFFFFFF); ++ kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, user_output >> 32); + -+ chunk = alloc_new_chunk(heap->kctx, heap->chunk_size); -+ if (unlikely(!chunk)) { -+ err = -ENOMEM; -+ goto allocation_failure; -+ } ++ kbase_csf_firmware_cs_input(stream, CS_CONFIG, ++ (queue->doorbell_nr << 8) | (queue->priority & 0xF)); + -+ mutex_lock(&heap->kctx->csf.tiler_heaps.lock); -+ err = init_chunk(heap, chunk, true); -+ mutex_unlock(&heap->kctx->csf.tiler_heaps.lock); ++ /* Program the queue's cs_trace configuration */ ++ program_cs_trace_cfg(stream, queue); + -+ if (unlikely(err)) -+ goto initialization_failure; ++ /* Enable all interrupts for now */ ++ kbase_csf_firmware_cs_input(stream, CS_ACK_IRQ_MASK, ~((u32)0)); + -+ dev_dbg(heap->kctx->kbdev->dev, "Created tiler heap chunk 0x%llX\n", chunk->gpu_va); ++ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); + -+ return 0; -+initialization_failure: -+ remove_unlinked_chunk(heap->kctx, chunk); -+allocation_failure: -+ return err; -+} ++ /* The fault bit could be misaligned between CS_REQ and CS_ACK if the ++ * acknowledgment was deferred due to dump on fault and the group was ++ * removed from the CSG slot before the fault could be acknowledged. ++ */ ++ if (queue->enabled) { ++ u32 const cs_ack = ++ kbase_csf_firmware_cs_output(stream, CS_ACK); + -+/** -+ * delete_all_chunks - Delete all chunks belonging to an unlinked tiler heap -+ * -+ * @heap: Pointer to a tiler heap. -+ * -+ * This function empties the list of chunks associated with a tiler heap by freeing all chunks -+ * previously allocated by @create_chunk. -+ * -+ * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the -+ * tiler_heaps lock cannot be held whilst deleting its chunks due to also needing the &struct -+ * kbase_context.region_lock. -+ * -+ * WARNING: Whilst the deleted chunks are unlinked from host memory, they are not unlinked from the -+ * list of chunks used by the GPU, therefore it is only safe to use this function when -+ * deleting a heap. -+ */ -+static void delete_all_chunks(struct kbase_csf_tiler_heap *heap) -+{ -+ struct kbase_context *const kctx = heap->kctx; -+ struct list_head *entry = NULL, *tmp = NULL; ++ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, ++ CS_REQ_FAULT_MASK); ++ } + -+ WARN(!list_empty(&heap->link), -+ "Deleting a heap's chunks when that heap is still linked requires the tiler_heaps lock, which cannot be held by the caller"); ++ /* ++ * Enable the CSG idle notification once the CS's ringbuffer ++ * becomes empty or the CS becomes sync_idle, waiting sync update ++ * or protected mode switch. ++ */ ++ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, ++ CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK | ++ CS_REQ_IDLE_SHARED_SB_DEC_MASK, ++ CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK | ++ CS_REQ_IDLE_SHARED_SB_DEC_MASK); + -+ list_for_each_safe(entry, tmp, &heap->chunks_list) { -+ struct kbase_csf_tiler_heap_chunk *chunk = list_entry( -+ entry, struct kbase_csf_tiler_heap_chunk, link); ++ /* Set state to START/STOP */ ++ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, ++ queue->enabled ? CS_REQ_STATE_START : CS_REQ_STATE_STOP, ++ CS_REQ_STATE_MASK); ++ kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, ++ ring_csg_doorbell); ++ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); + -+ list_del_init(&chunk->link); -+ heap->chunk_count--; ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_START, group, queue, queue->enabled); + -+ remove_unlinked_chunk(kctx, chunk); -+ } ++ update_hw_active(queue, true); +} + -+/** -+ * create_initial_chunks - Create the initial list of chunks for a tiler heap -+ * -+ * @heap: Pointer to the tiler heap for which to allocate memory. -+ * @nchunks: Number of chunks to create. -+ * -+ * This function allocates a given number of chunks for a tiler heap and -+ * adds them to the list of chunks associated with that heap. -+ * -+ * Return: 0 if successful or a negative error code on failure. -+ */ -+static int create_initial_chunks(struct kbase_csf_tiler_heap *const heap, -+ u32 const nchunks) ++static int onslot_csg_add_new_queue(struct kbase_queue *queue) +{ -+ int err = 0; -+ u32 i; ++ struct kbase_device *kbdev = queue->kctx->kbdev; ++ int err; + -+ for (i = 0; (i < nchunks) && likely(!err); i++) -+ err = create_chunk(heap); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ if (unlikely(err)) -+ delete_all_chunks(heap); ++ err = kbase_csf_mcu_shared_add_queue(kbdev, queue); ++ if (!err) ++ program_cs(kbdev, queue, true); + + return err; +} + -+/** -+ * delete_heap - Delete an unlinked tiler heap -+ * -+ * @heap: Pointer to a tiler heap to be deleted. -+ * -+ * This function frees any chunks allocated for a tiler heap previously -+ * initialized by @kbase_csf_tiler_heap_init. The heap context structure used by -+ * the firmware is also freed. -+ * -+ * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the -+ * tiler_heaps lock cannot be held whilst deleting it due to also needing the &struct -+ * kbase_context.region_lock. -+ */ -+static void delete_heap(struct kbase_csf_tiler_heap *heap) ++int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) +{ -+ struct kbase_context *const kctx = heap->kctx; ++ struct kbase_queue_group *group = queue->group; ++ struct kbase_device *kbdev = queue->kctx->kbdev; ++ bool const cs_enabled = queue->enabled; ++ int err = 0; ++ bool evicted = false; + -+ dev_dbg(kctx->kbdev->dev, "Deleting tiler heap 0x%llX\n", heap->gpu_va); ++ kbase_reset_gpu_assert_prevented(kbdev); ++ lockdep_assert_held(&queue->kctx->csf.lock); + -+ WARN(!list_empty(&heap->link), -+ "Deleting a heap that is still linked requires the tiler_heaps lock, which cannot be held by the caller"); ++ if (WARN_ON(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)) ++ return -EINVAL; + -+ /* Make sure that all of the VA regions corresponding to the chunks are -+ * freed at this time and that the work queue is not trying to access freed -+ * memory. -+ * -+ * Note: since the heap is unlinked, and that no references are made to chunks other -+ * than from their heap, there is no need to separately move the chunks out of the -+ * heap->chunks_list to delete them. -+ */ -+ delete_all_chunks(heap); ++ mutex_lock(&kbdev->csf.scheduler.lock); + -+ kbase_vunmap(kctx, &heap->gpu_va_map); -+ /* We could optimize context destruction by not freeing leaked heap -+ * contexts but it doesn't seem worth the extra complexity. After this -+ * point, the suballocation is returned to the heap context allocator and -+ * may be overwritten with new data, meaning heap->gpu_va should not -+ * be used past this point. -+ */ -+ kbase_csf_heap_context_allocator_free(&kctx->csf.tiler_heaps.ctx_alloc, -+ heap->gpu_va); ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ if (unlikely(kbdev->csf.scheduler.state == SCHED_BUSY)) { ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++ return -EBUSY; ++ } ++#endif + -+ WARN_ON(heap->chunk_count); -+ KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, -+ heap->heap_id, 0, 0, heap->max_chunks, heap->chunk_size, 0, -+ heap->target_in_flight, 0); ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue, ++ group->run_state); ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, queue, ++ queue->status_wait); + -+ if (heap->buf_desc_reg) { -+ kbase_vunmap(kctx, &heap->buf_desc_map); -+ kbase_gpu_vm_lock(kctx); -+ kbase_va_region_no_user_free_dec(heap->buf_desc_reg); -+ kbase_gpu_vm_unlock(kctx); ++ if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) { ++ err = -EIO; ++ evicted = true; ++ } else if ((group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) ++ && CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) { ++ dev_dbg(kbdev->dev, "blocked queue(csi_index=%d) of group %d was kicked", ++ queue->csi_index, group->handle); ++ } else { ++ err = scheduler_group_schedule(group); ++ ++ if (!err) { ++ queue->enabled = true; ++ if (kbasep_csf_scheduler_group_is_on_slot_locked(group)) { ++ if (cs_enabled) { ++ /* In normal situation, when a queue is ++ * already running, the queue update ++ * would be a doorbell kick on user ++ * side. However, if such a kick is ++ * shortly following a start or resume, ++ * the queue may actually in transition ++ * hence the said kick would enter the ++ * kernel as the hw_active flag is yet ++ * to be set. The sheduler needs to ++ * give a kick to the corresponding ++ * user door-bell on such a case. ++ */ ++ kbase_csf_ring_cs_user_doorbell(kbdev, queue); ++ } else { ++ err = onslot_csg_add_new_queue(queue); ++ /* For an on slot CSG, the only error in adding a new ++ * queue to run is that the scheduler could not map ++ * the required userio pages due to likely some resource ++ * issues. In such a case, and if the group is yet ++ * to enter its fatal error state, we return a -EBUSY ++ * to the submitter for another kick. The queue itself ++ * has yet to be programmed hence needs to remain its ++ * previous (disabled) state. If the error persists, ++ * the group will eventually reports a fatal error by ++ * the group's error reporting mechanism, when the MCU ++ * shared region map retry limit of the group is ++ * exceeded. For such a case, the expected error value ++ * is -EIO. ++ */ ++ if (unlikely(err)) { ++ queue->enabled = cs_enabled; ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++ return (err != -EIO) ? -EBUSY : err; ++ } ++ } ++ } ++ queue_delayed_work(system_long_wq, &kbdev->csf.scheduler.ping_work, ++ msecs_to_jiffies(kbase_get_timeout_ms( ++ kbdev, CSF_FIRMWARE_PING_TIMEOUT))); ++ } + } + -+ kfree(heap); ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++ ++ if (evicted) ++ kbase_csf_term_descheduled_queue_group(group); ++ ++ return err; +} + -+/** -+ * find_tiler_heap - Find a tiler heap from the address of its heap context -+ * -+ * @kctx: Pointer to the kbase context to search for a tiler heap. -+ * @heap_gpu_va: GPU virtual address of a heap context structure. -+ * -+ * Each tiler heap managed by the kernel has an associated heap context -+ * structure used by the firmware. This function finds a tiler heap object from -+ * the GPU virtual address of its associated heap context. The heap context -+ * should have been allocated by @kbase_csf_heap_context_allocator_alloc in the -+ * same @kctx. -+ * -+ * Return: pointer to the tiler heap object, or NULL if not found. -+ */ -+static struct kbase_csf_tiler_heap *find_tiler_heap( -+ struct kbase_context *const kctx, u64 const heap_gpu_va) ++static enum kbase_csf_csg_slot_state update_csg_slot_status( ++ struct kbase_device *kbdev, s8 slot) +{ -+ struct kbase_csf_tiler_heap *heap = NULL; ++ struct kbase_csf_csg_slot *csg_slot = ++ &kbdev->csf.scheduler.csg_slots[slot]; ++ struct kbase_csf_cmd_stream_group_info *ginfo = ++ &kbdev->csf.global_iface.groups[slot]; ++ u32 state; ++ enum kbase_csf_csg_slot_state slot_state; + -+ lockdep_assert_held(&kctx->csf.tiler_heaps.lock); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) { -+ if (heap_gpu_va == heap->gpu_va) -+ return heap; -+ } ++ state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, ++ CSG_ACK)); ++ slot_state = atomic_read(&csg_slot->state); + -+ dev_dbg(kctx->kbdev->dev, "Tiler heap 0x%llX was not found\n", -+ heap_gpu_va); ++ switch (slot_state) { ++ case CSG_SLOT_READY2RUN: ++ if ((state == CSG_ACK_STATE_START) || ++ (state == CSG_ACK_STATE_RESUME)) { ++ slot_state = CSG_SLOT_RUNNING; ++ atomic_set(&csg_slot->state, slot_state); ++ csg_slot->trigger_jiffies = jiffies; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_RUNNING, csg_slot->resident_group, ++ state); ++ dev_dbg(kbdev->dev, "Group %u running on slot %d\n", ++ csg_slot->resident_group->handle, slot); ++ } ++ break; ++ case CSG_SLOT_DOWN2STOP: ++ if ((state == CSG_ACK_STATE_SUSPEND) || ++ (state == CSG_ACK_STATE_TERMINATE)) { ++ slot_state = CSG_SLOT_STOPPED; ++ atomic_set(&csg_slot->state, slot_state); ++ csg_slot->trigger_jiffies = jiffies; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, csg_slot->resident_group, state); ++ dev_dbg(kbdev->dev, "Group %u stopped on slot %d\n", ++ csg_slot->resident_group->handle, slot); ++ } ++ break; ++ case CSG_SLOT_DOWN2STOP_TIMEDOUT: ++ case CSG_SLOT_READY2RUN_TIMEDOUT: ++ case CSG_SLOT_READY: ++ case CSG_SLOT_RUNNING: ++ case CSG_SLOT_STOPPED: ++ break; ++ default: ++ dev_warn(kbdev->dev, "Unknown CSG slot state %d", slot_state); ++ break; ++ } + -+ return NULL; ++ return slot_state; +} + -+static struct kbase_csf_tiler_heap_chunk *find_chunk(struct kbase_csf_tiler_heap *heap, -+ u64 const chunk_gpu_va) ++static bool csg_slot_running(struct kbase_device *kbdev, s8 slot) +{ -+ struct kbase_csf_tiler_heap_chunk *chunk = NULL; ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); ++ return (update_csg_slot_status(kbdev, slot) == CSG_SLOT_RUNNING); ++} + -+ list_for_each_entry(chunk, &heap->chunks_list, link) { -+ if (chunk->gpu_va == chunk_gpu_va) -+ return chunk; -+ } ++static bool csg_slot_stopped_locked(struct kbase_device *kbdev, s8 slot) ++{ ++ enum kbase_csf_csg_slot_state slot_state; + -+ dev_dbg(heap->kctx->kbdev->dev, "Tiler heap chunk 0x%llX was not found\n", chunk_gpu_va); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ return NULL; ++ slot_state = update_csg_slot_status(kbdev, slot); ++ ++ return (slot_state == CSG_SLOT_STOPPED || ++ slot_state == CSG_SLOT_READY); +} + -+int kbase_csf_tiler_heap_context_init(struct kbase_context *const kctx) ++static bool csg_slot_stopped_raw(struct kbase_device *kbdev, s8 slot) +{ -+ int err = kbase_csf_heap_context_allocator_init( -+ &kctx->csf.tiler_heaps.ctx_alloc, kctx); -+ -+ if (unlikely(err)) -+ return err; ++ struct kbase_csf_cmd_stream_group_info *ginfo = ++ &kbdev->csf.global_iface.groups[slot]; ++ u32 state; + -+ INIT_LIST_HEAD(&kctx->csf.tiler_heaps.list); -+ mutex_init(&kctx->csf.tiler_heaps.lock); ++ state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, ++ CSG_ACK)); + -+ dev_dbg(kctx->kbdev->dev, "Initialized a context for tiler heaps\n"); ++ if (state == CSG_ACK_STATE_SUSPEND || state == CSG_ACK_STATE_TERMINATE) { ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, kbdev->csf.scheduler.csg_slots[slot].resident_group, state); ++ dev_dbg(kbdev->dev, "(raw status) slot %d stopped\n", slot); ++ return true; ++ } + -+ return 0; ++ return false; +} + -+void kbase_csf_tiler_heap_context_term(struct kbase_context *const kctx) ++static void halt_csg_slot(struct kbase_queue_group *group, bool suspend) +{ -+ LIST_HEAD(local_heaps_list); -+ struct list_head *entry = NULL, *tmp = NULL; ++ struct kbase_device *kbdev = group->kctx->kbdev; ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ struct kbase_csf_csg_slot *csg_slot = ++ kbdev->csf.scheduler.csg_slots; ++ s8 slot; + -+ dev_dbg(kctx->kbdev->dev, "Terminating a context for tiler heaps\n"); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ mutex_lock(&kctx->csf.tiler_heaps.lock); -+ list_splice_init(&kctx->csf.tiler_heaps.list, &local_heaps_list); -+ mutex_unlock(&kctx->csf.tiler_heaps.lock); ++ if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) ++ return; + -+ list_for_each_safe(entry, tmp, &local_heaps_list) { -+ struct kbase_csf_tiler_heap *heap = list_entry( -+ entry, struct kbase_csf_tiler_heap, link); ++ slot = group->csg_nr; + -+ list_del_init(&heap->link); -+ delete_heap(heap); -+ } ++ /* When in transition, wait for it to complete */ ++ if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) { ++ long remaining = ++ kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + -+ mutex_destroy(&kctx->csf.tiler_heaps.lock); ++ dev_dbg(kbdev->dev, "slot %d wait for up-running\n", slot); ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ csg_slot_running(kbdev, slot), remaining); ++ if (!remaining) ++ dev_warn(kbdev->dev, ++ "[%llu] slot %d timeout (%d ms) on up-running\n", ++ kbase_backend_get_cycle_cnt(kbdev), ++ slot, kbdev->csf.fw_timeout_ms); ++ } + -+ kbase_csf_heap_context_allocator_term(&kctx->csf.tiler_heaps.ctx_alloc); -+} ++ if (csg_slot_running(kbdev, slot)) { ++ unsigned long flags; ++ struct kbase_csf_cmd_stream_group_info *ginfo = ++ &global_iface->groups[slot]; + -+/** -+ * kbasep_is_buffer_descriptor_region_suitable - Check if a VA region chosen to house -+ * the tiler heap buffer descriptor -+ * is suitable for the purpose. -+ * @kctx: kbase context of the tiler heap -+ * @reg: VA region being checked for suitability -+ * -+ * The tiler heap buffer descriptor memory does not admit page faults according -+ * to its design, so it must have the entirety of the backing upon allocation, -+ * and it has to remain alive as long as the tiler heap is alive, meaning it -+ * cannot be allocated from JIT/Ephemeral, or user freeable memory. -+ * -+ * Return: true on suitability, false otherwise. -+ */ -+static bool kbasep_is_buffer_descriptor_region_suitable(struct kbase_context *const kctx, -+ struct kbase_va_region *const reg) -+{ -+ if (kbase_is_region_invalid_or_free(reg)) { -+ dev_err(kctx->kbdev->dev, "Region is either invalid or free!\n"); -+ return false; -+ } ++ u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND : ++ CSG_REQ_STATE_TERMINATE; + -+ if (!(reg->flags & KBASE_REG_CPU_RD) || kbase_is_region_shrinkable(reg) || -+ (reg->flags & KBASE_REG_PF_GROW)) { -+ dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%lX!\n", reg->flags); -+ return false; -+ } ++ dev_dbg(kbdev->dev, "Halting(suspend=%d) group %d of context %d_%d on slot %d", ++ suspend, group->handle, group->kctx->tgid, group->kctx->id, slot); + -+ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { -+ dev_err(kctx->kbdev->dev, "Region has invalid type!\n"); -+ return false; -+ } ++ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); ++ /* Set state to SUSPEND/TERMINATE */ ++ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, halt_cmd, ++ CSG_REQ_STATE_MASK); ++ kbase_csf_ring_csg_doorbell(kbdev, slot); ++ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, ++ flags); ++ atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP); ++ csg_slot[slot].trigger_jiffies = jiffies; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd); + -+ if ((reg->nr_pages != kbase_reg_current_backed_size(reg)) || -+ (reg->nr_pages < PFN_UP(sizeof(struct kbase_csf_gpu_buffer_heap)))) { -+ dev_err(kctx->kbdev->dev, "Region has invalid backing!\n"); -+ return false; ++ KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG( ++ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot, suspend); + } ++} + -+ return true; ++static void term_csg_slot(struct kbase_queue_group *group) ++{ ++ halt_csg_slot(group, false); +} + -+#define TILER_BUF_DESC_SIZE (sizeof(struct kbase_csf_gpu_buffer_heap)) ++static void suspend_csg_slot(struct kbase_queue_group *group) ++{ ++ halt_csg_slot(group, true); ++} + -+int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_size, -+ u32 const initial_chunks, u32 const max_chunks, -+ u16 const target_in_flight, u64 const buf_desc_va, -+ u64 *const heap_gpu_va, u64 *const first_chunk_va) ++static bool csf_wait_ge_condition_supported(struct kbase_device *kbdev) +{ -+ int err = 0; -+ struct kbase_csf_tiler_heap *heap = NULL; -+ struct kbase_csf_heap_context_allocator *const ctx_alloc = -+ &kctx->csf.tiler_heaps.ctx_alloc; -+ struct kbase_csf_tiler_heap_chunk *chunk = NULL; -+ struct kbase_va_region *gpu_va_reg = NULL; -+ void *vmap_ptr = NULL; ++ const uint32_t glb_major = GLB_VERSION_MAJOR_GET(kbdev->csf.global_iface.version); ++ const uint32_t glb_minor = GLB_VERSION_MINOR_GET(kbdev->csf.global_iface.version); + -+ dev_dbg(kctx->kbdev->dev, -+ "Creating a tiler heap with %u chunks (limit: %u) of size %u, buf_desc_va: 0x%llx\n", -+ initial_chunks, max_chunks, chunk_size, buf_desc_va); ++ switch (glb_major) { ++ case 0: ++ break; ++ case 1: ++ if (glb_minor >= 4) ++ return true; ++ break; ++ case 2: ++ if (glb_minor >= 6) ++ return true; ++ break; ++ case 3: ++ if (glb_minor >= 6) ++ return true; ++ break; ++ default: ++ return true; ++ } ++ return false; ++} ++/** ++ * evaluate_sync_update() - Evaluate the sync wait condition the GPU command ++ * queue has been blocked on. ++ * ++ * @queue: Pointer to the GPU command queue ++ * ++ * Return: true if sync wait condition is satisfied. ++ */ ++static bool evaluate_sync_update(struct kbase_queue *queue) ++{ ++ struct kbase_vmap_struct *mapping; ++ bool updated = false; ++ u32 *sync_ptr; ++ u32 sync_wait_size; ++ u32 sync_wait_align_mask; ++ u32 sync_wait_cond; ++ u32 sync_current_val; ++ struct kbase_device *kbdev; ++ bool sync_wait_align_valid = false; ++ bool sync_wait_cond_valid = false; + -+ if (!kbase_mem_allow_alloc(kctx)) -+ return -EINVAL; ++ if (WARN_ON(!queue)) ++ return false; + -+ if (chunk_size == 0) -+ return -EINVAL; ++ kbdev = queue->kctx->kbdev; + -+ if (chunk_size & ~CHUNK_SIZE_MASK) -+ return -EINVAL; ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ if (initial_chunks == 0) -+ return -EINVAL; ++ sync_wait_size = CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(queue->status_wait); ++ sync_wait_align_mask = ++ (sync_wait_size == 0 ? BASEP_EVENT32_ALIGN_BYTES : BASEP_EVENT64_ALIGN_BYTES) - 1; ++ sync_wait_align_valid = ((uintptr_t)queue->sync_ptr & sync_wait_align_mask) == 0; ++ if (!sync_wait_align_valid) { ++ dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX is misaligned", ++ queue->sync_ptr); ++ goto out; ++ } + -+ if (initial_chunks > max_chunks) -+ return -EINVAL; ++ sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr, ++ &mapping); + -+ if (target_in_flight == 0) -+ return -EINVAL; ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_START, queue->group, queue, ++ queue->sync_ptr); ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_BLOCKED_REASON, queue->group, queue, ++ queue->blocked_reason); + -+ heap = kzalloc(sizeof(*heap), GFP_KERNEL); -+ if (unlikely(!heap)) { -+ dev_err(kctx->kbdev->dev, "No kernel memory for a new tiler heap"); -+ return -ENOMEM; ++ if (!sync_ptr) { ++ dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX already freed", ++ queue->sync_ptr); ++ goto out; + } + -+ heap->kctx = kctx; -+ heap->chunk_size = chunk_size; -+ heap->max_chunks = max_chunks; -+ heap->target_in_flight = target_in_flight; -+ heap->buf_desc_checked = false; -+ INIT_LIST_HEAD(&heap->chunks_list); -+ INIT_LIST_HEAD(&heap->link); ++ sync_wait_cond = ++ CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(queue->status_wait); ++ sync_wait_cond_valid = (sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) || ++ (sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) || ++ ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE) && ++ csf_wait_ge_condition_supported(kbdev)); + -+ /* Check on the buffer descriptor virtual Address */ -+ if (buf_desc_va) { -+ struct kbase_va_region *buf_desc_reg; ++ WARN_ON(!sync_wait_cond_valid); + -+ kbase_gpu_vm_lock(kctx); -+ buf_desc_reg = -+ kbase_region_tracker_find_region_enclosing_address(kctx, buf_desc_va); ++ sync_current_val = READ_ONCE(*sync_ptr); ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_CUR_VAL, queue->group, queue, ++ sync_current_val); + -+ if (!kbasep_is_buffer_descriptor_region_suitable(kctx, buf_desc_reg)) { -+ kbase_gpu_vm_unlock(kctx); -+ dev_err(kctx->kbdev->dev, -+ "Could not find a suitable VA region for the tiler heap buf desc!\n"); -+ err = -EINVAL; -+ goto buf_desc_not_suitable; -+ } ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_TEST_VAL, queue->group, queue, ++ queue->sync_value); + -+ /* If we don't prevent userspace from unmapping this, we may run into -+ * use-after-free, as we don't check for the existence of the region throughout. ++ if (((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) && ++ (sync_current_val > queue->sync_value)) || ++ ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE) && ++ (sync_current_val >= queue->sync_value) && csf_wait_ge_condition_supported(kbdev)) || ++ ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) && ++ (sync_current_val <= queue->sync_value))) { ++ /* The sync wait condition is satisfied so the group to which ++ * queue is bound can be re-scheduled. + */ ++ updated = true; ++ } else { ++ dev_dbg(queue->kctx->kbdev->dev, ++ "sync memory not updated yet(%u)", sync_current_val); ++ } + -+ heap->buf_desc_va = buf_desc_va; -+ heap->buf_desc_reg = buf_desc_reg; -+ kbase_va_region_no_user_free_inc(buf_desc_reg); -+ -+ vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE, -+ KBASE_REG_CPU_RD, &heap->buf_desc_map, -+ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING); ++ kbase_phy_alloc_mapping_put(queue->kctx, mapping); ++out: ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_END, queue->group, queue, updated); ++ return updated; ++} + -+ if (kbase_page_migration_enabled) -+ kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE); ++/** ++ * save_slot_cs() - Save the state for blocked GPU command queue. ++ * ++ * @ginfo: Pointer to the CSG interface used by the group ++ * the queue is bound to. ++ * @queue: Pointer to the GPU command queue. ++ * ++ * This function will check if GPU command queue is blocked on a sync wait and ++ * evaluate the wait condition. If the wait condition isn't satisfied it would ++ * save the state needed to reevaluate the condition in future. ++ * The group to which queue is bound shall be in idle state. ++ * ++ * Return: true if the queue is blocked on a sync wait operation. ++ */ ++static ++bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo, ++ struct kbase_queue *queue) ++{ ++ struct kbase_csf_cmd_stream_info *const stream = ++ &ginfo->streams[queue->csi_index]; ++ u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT); ++ bool is_waiting = false; + -+ kbase_gpu_vm_unlock(kctx); ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ u64 cmd_ptr = kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_LO); + -+ if (unlikely(!vmap_ptr)) { -+ dev_err(kctx->kbdev->dev, -+ "Could not vmap buffer descriptor into kernel memory (err %d)\n", -+ err); -+ err = -ENOMEM; -+ goto buf_desc_vmap_failed; -+ } -+ } ++ cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_HI) << 32; ++ queue->saved_cmd_ptr = cmd_ptr; ++#endif + -+ heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc); -+ if (unlikely(!heap->gpu_va)) { -+ dev_dbg(kctx->kbdev->dev, "Failed to allocate a tiler heap context\n"); -+ err = -ENOMEM; -+ goto heap_context_alloc_failed; -+ } ++ KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, ++ queue, status); + -+ gpu_va_reg = ctx_alloc->region; ++ if (CS_STATUS_WAIT_SYNC_WAIT_GET(status) || CS_STATUS_WAIT_SB_MASK_GET(status)) { ++ queue->status_wait = status; ++ queue->sync_ptr = kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_WAIT_SYNC_POINTER_LO); ++ queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; ++ queue->sync_value = kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_WAIT_SYNC_VALUE); + -+ kbase_gpu_vm_lock(kctx); -+ /* gpu_va_reg was created with BASEP_MEM_NO_USER_FREE, the code to unset this only happens -+ * on kctx termination (after all syscalls on kctx have finished), and so it is safe to -+ * assume that gpu_va_reg is still present. -+ */ -+ vmap_ptr = kbase_vmap_reg(kctx, gpu_va_reg, heap->gpu_va, NEXT_CHUNK_ADDR_SIZE, -+ (KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &heap->gpu_va_map, -+ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING); -+ kbase_gpu_vm_unlock(kctx); -+ if (unlikely(!vmap_ptr)) { -+ dev_dbg(kctx->kbdev->dev, "Failed to vmap the correct heap GPU VA address\n"); -+ err = -ENOMEM; -+ goto heap_context_vmap_failed; -+ } -+ -+ err = create_initial_chunks(heap, initial_chunks); -+ if (unlikely(err)) { -+ dev_dbg(kctx->kbdev->dev, "Failed to create the initial tiler heap chunks\n"); -+ goto create_chunks_failed; -+ } -+ chunk = list_first_entry(&heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link); -+ -+ *heap_gpu_va = heap->gpu_va; -+ *first_chunk_va = chunk->gpu_va; -+ -+ mutex_lock(&kctx->csf.tiler_heaps.lock); -+ kctx->csf.tiler_heaps.nr_of_heaps++; -+ heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps; -+ list_add(&heap->link, &kctx->csf.tiler_heaps.list); -+ -+ KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id, -+ PFN_UP(heap->chunk_size * heap->max_chunks), -+ PFN_UP(heap->chunk_size * heap->chunk_count), -+ heap->max_chunks, heap->chunk_size, heap->chunk_count, -+ heap->target_in_flight, 0); ++ queue->sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET( ++ kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_SCOREBOARDS)); ++ queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_GET( ++ kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_BLOCKED_REASON)); + -+#if defined(CONFIG_MALI_VECTOR_DUMP) -+ list_for_each_entry(chunk, &heap->chunks_list, link) { -+ KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC(kctx->kbdev, kctx->id, heap->heap_id, -+ chunk->gpu_va); ++ if ((queue->blocked_reason == CS_STATUS_BLOCKED_ON_SB_WAIT) || ++ !evaluate_sync_update(queue)) { ++ is_waiting = true; ++ } else { ++ /* Sync object already got updated & met the condition ++ * thus it doesn't need to be reevaluated and so can ++ * clear the 'status_wait' here. ++ */ ++ queue->status_wait = 0; ++ } ++ } else { ++ /* Invalidate wait status info that would have been recorded if ++ * this queue was blocked when the group (in idle state) was ++ * suspended previously. After that the group could have been ++ * unblocked due to the kicking of another queue bound to it & ++ * so the wait status info would have stuck with this queue. ++ */ ++ queue->status_wait = 0; + } -+#endif -+ kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count; -+ kctx->running_total_tiler_heap_memory += (u64)heap->chunk_size * heap->chunk_count; -+ if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory) -+ kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory; -+ -+ dev_dbg(kctx->kbdev->dev, -+ "Created tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n", heap->gpu_va, -+ buf_desc_va, kctx->tgid, kctx->id); -+ mutex_unlock(&kctx->csf.tiler_heaps.lock); -+ -+ return 0; + -+create_chunks_failed: -+ kbase_vunmap(kctx, &heap->gpu_va_map); -+heap_context_vmap_failed: -+ kbase_csf_heap_context_allocator_free(ctx_alloc, heap->gpu_va); -+heap_context_alloc_failed: -+ if (heap->buf_desc_reg) -+ kbase_vunmap(kctx, &heap->buf_desc_map); -+buf_desc_vmap_failed: -+ if (heap->buf_desc_reg) { -+ kbase_gpu_vm_lock(kctx); -+ kbase_va_region_no_user_free_dec(heap->buf_desc_reg); -+ kbase_gpu_vm_unlock(kctx); -+ } -+buf_desc_not_suitable: -+ kfree(heap); -+ return err; ++ return is_waiting; +} + -+int kbase_csf_tiler_heap_term(struct kbase_context *const kctx, -+ u64 const heap_gpu_va) ++static void schedule_in_cycle(struct kbase_queue_group *group, bool force) +{ -+ int err = 0; -+ struct kbase_csf_tiler_heap *heap = NULL; -+ u32 chunk_count = 0; -+ u64 heap_size = 0; -+ -+ mutex_lock(&kctx->csf.tiler_heaps.lock); -+ heap = find_tiler_heap(kctx, heap_gpu_va); -+ if (likely(heap)) { -+ chunk_count = heap->chunk_count; -+ heap_size = heap->chunk_size * chunk_count; -+ -+ list_del_init(&heap->link); -+ } else { -+ err = -EINVAL; -+ } ++ struct kbase_context *kctx = group->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + -+ /* Update stats whilst still holding the lock so they are in sync with the tiler_heaps.list -+ * at all times -+ */ -+ if (likely(kctx->running_total_tiler_heap_memory >= heap_size)) -+ kctx->running_total_tiler_heap_memory -= heap_size; -+ else -+ dev_warn(kctx->kbdev->dev, -+ "Running total tiler heap memory lower than expected!"); -+ if (likely(kctx->running_total_tiler_heap_nr_chunks >= chunk_count)) -+ kctx->running_total_tiler_heap_nr_chunks -= chunk_count; -+ else -+ dev_warn(kctx->kbdev->dev, -+ "Running total tiler chunk count lower than expected!"); -+ if (!err) -+ dev_dbg(kctx->kbdev->dev, -+ "Terminated tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n", -+ heap->gpu_va, heap->buf_desc_va, kctx->tgid, kctx->id); -+ mutex_unlock(&kctx->csf.tiler_heaps.lock); ++ lockdep_assert_held(&scheduler->lock); + -+ /* Deletion requires the kctx->reg_lock, so must only operate on it whilst unlinked from -+ * the kctx's csf.tiler_heaps.list, and without holding the csf.tiler_heaps.lock ++ /* Only try to schedule work for this event if no requests are pending, ++ * otherwise the function will end up canceling previous work requests, ++ * and scheduler is configured to wake up periodically (or the schedule ++ * of work needs to be enforced in situation such as entering into ++ * protected mode). + */ -+ if (likely(heap)) -+ delete_heap(heap); -+ -+ return err; ++ if (likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) { ++ dev_dbg(kbdev->dev, "Kicking async for group %d\n", ++ group->handle); ++ kbase_csf_scheduler_invoke_tock(kbdev); ++ } +} + -+/** -+ * validate_allocation_request - Check whether the chunk allocation request -+ * received on tiler OOM should be handled at -+ * current time. -+ * -+ * @heap: The tiler heap the OOM is associated with -+ * @nr_in_flight: Number of fragment jobs in flight -+ * @pending_frag_count: Number of pending fragment jobs -+ * -+ * Context: must hold the tiler heap lock to guarantee its lifetime -+ * -+ * Return: -+ * * 0 - allowed to allocate an additional chunk -+ * * -EINVAL - invalid -+ * * -EBUSY - there are fragment jobs still in flight, which may free chunks -+ * after completing -+ * * -ENOMEM - the targeted number of in-flight chunks has been reached and -+ * no new ones will be allocated -+ */ -+static int validate_allocation_request(struct kbase_csf_tiler_heap *heap, u32 nr_in_flight, -+ u32 pending_frag_count) ++static void ktrace_log_group_state(struct kbase_queue_group *const group) +{ -+ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); -+ -+ if (WARN_ON(!nr_in_flight) || WARN_ON(pending_frag_count > nr_in_flight)) -+ return -EINVAL; -+ -+ if (nr_in_flight <= heap->target_in_flight) { -+ if (heap->chunk_count < heap->max_chunks) { -+ /* Not exceeded the target number of render passes yet so be -+ * generous with memory. -+ */ -+ return 0; -+ } else if (pending_frag_count > 0) { -+ return -EBUSY; -+ } else { -+ return -ENOMEM; -+ } -+ } else { -+ /* Reached target number of render passes in flight. -+ * Wait for some of them to finish -+ */ -+ return -EBUSY; ++ switch (group->run_state) { ++ case KBASE_CSF_GROUP_INACTIVE: ++ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group, ++ group->run_state); ++ break; ++ case KBASE_CSF_GROUP_RUNNABLE: ++ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_RUNNABLE, group, ++ group->run_state); ++ break; ++ case KBASE_CSF_GROUP_IDLE: ++ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_IDLE, group, ++ group->run_state); ++ break; ++ case KBASE_CSF_GROUP_SUSPENDED: ++ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED, group, ++ group->run_state); ++ break; ++ case KBASE_CSF_GROUP_SUSPENDED_ON_IDLE: ++ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED_ON_IDLE, group, ++ group->run_state); ++ break; ++ case KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC: ++ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED_ON_WAIT_SYNC, ++ group, group->run_state); ++ break; ++ case KBASE_CSF_GROUP_FAULT_EVICTED: ++ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_FAULT_EVICTED, group, ++ group->run_state); ++ break; ++ case KBASE_CSF_GROUP_TERMINATED: ++ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group, ++ group->run_state); ++ break; + } -+ return -ENOMEM; +} + -+int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, -+ u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr) ++static ++void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, ++ struct kbase_queue_group *const group, ++ enum kbase_csf_group_state run_state) +{ -+ struct kbase_csf_tiler_heap *heap; -+ struct kbase_csf_tiler_heap_chunk *chunk; -+ int err = -EINVAL; -+ u64 chunk_size = 0; -+ u64 heap_id = 0; ++ struct kbase_context *const kctx = group->kctx; ++ struct kbase_device *const kbdev = kctx->kbdev; + -+ /* To avoid potential locking issues during allocation, this is handled -+ * in three phases: -+ * 1. Take the lock, find the corresponding heap, and find its chunk size -+ * (this is always 2 MB, but may change down the line). -+ * 2. Allocate memory for the chunk and its region. -+ * 3. If the heap still exists, link it to the end of the list. If it -+ * doesn't, roll back the allocation. -+ */ ++ lockdep_assert_held(&scheduler->lock); + -+ mutex_lock(&kctx->csf.tiler_heaps.lock); -+ heap = find_tiler_heap(kctx, gpu_heap_va); -+ if (likely(heap)) { -+ chunk_size = heap->chunk_size; -+ heap_id = heap->heap_id; -+ } else { -+ dev_err(kctx->kbdev->dev, "Heap 0x%llX does not exist", gpu_heap_va); -+ mutex_unlock(&kctx->csf.tiler_heaps.lock); -+ goto prelink_failure; -+ } ++ WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE); + -+ err = validate_allocation_request(heap, nr_in_flight, pending_frag_count); -+ if (unlikely(err)) { -+ /* The allocation request can be legitimate, but be invoked on a heap -+ * that has already reached the maximum pre-configured capacity. This -+ * is useful debug information, but should not be treated as an error, -+ * since the request will be re-sent at a later point. -+ */ -+ dev_dbg(kctx->kbdev->dev, -+ "Not allocating new chunk for heap 0x%llX due to current heap state (err %d)", -+ gpu_heap_va, err); -+ mutex_unlock(&kctx->csf.tiler_heaps.lock); -+ goto prelink_failure; -+ } -+ mutex_unlock(&kctx->csf.tiler_heaps.lock); -+ /* this heap must not be used whilst we have dropped the lock */ -+ heap = NULL; ++ if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) ++ return; + -+ chunk = alloc_new_chunk(kctx, chunk_size); -+ if (unlikely(!chunk)) { -+ dev_err(kctx->kbdev->dev, "Could not allocate chunk of size %lld for ctx %d_%d", -+ chunk_size, kctx->tgid, kctx->id); -+ goto prelink_failure; -+ } ++ group->run_state = run_state; + -+ /* After this point, the heap that we were targeting could already have had the needed -+ * chunks allocated, if we were handling multiple OoM events on multiple threads, so -+ * we need to revalidate the need for the allocation. -+ */ -+ mutex_lock(&kctx->csf.tiler_heaps.lock); -+ heap = find_tiler_heap(kctx, gpu_heap_va); ++ ktrace_log_group_state(group); + -+ if (unlikely(!heap)) { -+ dev_err(kctx->kbdev->dev, "Tiler heap 0x%llX no longer exists!\n", gpu_heap_va); -+ mutex_unlock(&kctx->csf.tiler_heaps.lock); -+ goto unroll_chunk; -+ } ++ if (run_state == KBASE_CSF_GROUP_RUNNABLE) ++ group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID; + -+ if (heap_id != heap->heap_id) { -+ dev_err(kctx->kbdev->dev, -+ "Tiler heap 0x%llX was removed from ctx %d_%d while allocating chunk of size %lld!", -+ gpu_heap_va, kctx->tgid, kctx->id, chunk_size); -+ mutex_unlock(&kctx->csf.tiler_heaps.lock); -+ goto unroll_chunk; -+ } ++ list_add_tail(&group->link, ++ &kctx->csf.sched.runnable_groups[group->priority]); ++ kctx->csf.sched.num_runnable_grps++; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_INSERT, group, ++ kctx->csf.sched.num_runnable_grps); + -+ if (WARN_ON(chunk_size != heap->chunk_size)) { -+ mutex_unlock(&kctx->csf.tiler_heaps.lock); -+ goto unroll_chunk; ++ /* Add the kctx if not yet in runnable kctxs */ ++ if (kctx->csf.sched.num_runnable_grps == 1) { ++ /* First runnable csg, adds to the runnable_kctxs */ ++ INIT_LIST_HEAD(&kctx->csf.link); ++ list_add_tail(&kctx->csf.link, &scheduler->runnable_kctxs); ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_INSERT, kctx, 0u); + } + -+ err = validate_allocation_request(heap, nr_in_flight, pending_frag_count); -+ if (unlikely(err)) { -+ dev_warn( -+ kctx->kbdev->dev, -+ "Aborting linking chunk to heap 0x%llX: heap state changed during allocation (err %d)", -+ gpu_heap_va, err); -+ mutex_unlock(&kctx->csf.tiler_heaps.lock); -+ goto unroll_chunk; -+ } ++ scheduler->total_runnable_grps++; + -+ err = init_chunk(heap, chunk, false); ++ if (likely(scheduler_timer_is_enabled_nolock(kbdev)) && ++ (scheduler->total_runnable_grps == 1 || ++ scheduler->state == SCHED_SUSPENDED || ++ scheduler->state == SCHED_SLEEPING)) { ++ dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n"); ++ /* Fire a scheduling to start the time-slice */ ++ enqueue_tick_work(kbdev); ++ } else ++ schedule_in_cycle(group, false); + -+ /* On error, the chunk would not be linked, so we can still treat it as an unlinked -+ * chunk for error handling. ++ /* Since a new group has become runnable, check if GPU needs to be ++ * powered up. + */ -+ if (unlikely(err)) { -+ dev_err(kctx->kbdev->dev, -+ "Could not link chunk(0x%llX) with tiler heap 0%llX in ctx %d_%d due to error %d", -+ chunk->gpu_va, gpu_heap_va, kctx->tgid, kctx->id, err); -+ mutex_unlock(&kctx->csf.tiler_heaps.lock); -+ goto unroll_chunk; -+ } -+ -+ *new_chunk_ptr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va); -+ -+ /* update total and peak tiler heap memory record */ -+ kctx->running_total_tiler_heap_nr_chunks++; -+ kctx->running_total_tiler_heap_memory += heap->chunk_size; -+ -+ if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory) -+ kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory; -+ -+ KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id, -+ PFN_UP(heap->chunk_size * heap->max_chunks), -+ PFN_UP(heap->chunk_size * heap->chunk_count), -+ heap->max_chunks, heap->chunk_size, heap->chunk_count, -+ heap->target_in_flight, nr_in_flight); -+ -+ mutex_unlock(&kctx->csf.tiler_heaps.lock); -+ -+ return err; -+unroll_chunk: -+ remove_unlinked_chunk(kctx, chunk); -+prelink_failure: -+ return err; ++ scheduler_wakeup(kbdev, false); +} + -+static bool delete_chunk_physical_pages(struct kbase_csf_tiler_heap *heap, u64 chunk_gpu_va, -+ u64 *hdr_val) ++static ++void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, ++ struct kbase_queue_group *group, ++ enum kbase_csf_group_state run_state) +{ -+ int err; -+ u64 *chunk_hdr; -+ struct kbase_context *kctx = heap->kctx; -+ struct kbase_csf_tiler_heap_chunk *chunk = NULL; ++ struct kbase_context *kctx = group->kctx; ++ struct kbase_queue_group *new_head_grp; ++ struct list_head *list = ++ &kctx->csf.sched.runnable_groups[group->priority]; ++ unsigned long flags; + -+ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); ++ lockdep_assert_held(&scheduler->lock); + -+ chunk = find_chunk(heap, chunk_gpu_va); -+ if (unlikely(!chunk)) { -+ dev_warn(kctx->kbdev->dev, -+ "Failed to find tiler heap(0x%llX) chunk(0x%llX) for reclaim-delete\n", -+ heap->gpu_va, chunk_gpu_va); -+ return false; -+ } ++ WARN_ON(!queue_group_scheduled_locked(group)); + -+ WARN((chunk->region->flags & KBASE_REG_CPU_CACHED), -+ "Cannot support CPU cached chunks without sync operations"); -+ chunk_hdr = chunk->map.addr; -+ *hdr_val = *chunk_hdr; ++ group->run_state = run_state; + -+ dev_dbg(kctx->kbdev->dev, -+ "Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n", -+ chunk_gpu_va, heap->gpu_va, *hdr_val); ++ ktrace_log_group_state(group); + -+ err = kbase_mem_shrink_gpu_mapping(kctx, chunk->region, 0, chunk->region->gpu_alloc->nents); -+ if (unlikely(err)) { -+ dev_warn( -+ kctx->kbdev->dev, -+ "Reclaim: shrinking GPU mapping failed on chunk(0x%llx) in heap(0x%llx) (err %d)\n", -+ chunk_gpu_va, heap->gpu_va, err); ++ list_del_init(&group->link); + -+ /* Cannot free the pages whilst references on the GPU remain, so keep the chunk on -+ * the heap's chunk list and try a different heap. ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ /* The below condition will be true when the group running in protected ++ * mode is being terminated but the protected mode exit interrupt was't ++ * received. This can happen if the FW got stuck during protected mode ++ * for some reason (like GPU page fault or some internal error). ++ * In normal cases FW is expected to send the protected mode exit ++ * interrupt before it handles the CSG termination request. ++ */ ++ if (unlikely(scheduler->active_protm_grp == group)) { ++ /* CSG slot cleanup should have happened for the pmode group */ ++ WARN_ON(kbasep_csf_scheduler_group_is_on_slot_locked(group)); ++ WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE); ++ /* Initiate a GPU reset, in case it wasn't initiated yet, ++ * in order to rectify the anomaly. + */ ++ if (kbase_prepare_to_reset_gpu(kctx->kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(kctx->kbdev); + -+ return false; ++ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_PROTM_EXIT, ++ scheduler->active_protm_grp, 0u); ++ scheduler->active_protm_grp = NULL; + } -+ /* Destroy the mapping before the physical pages which are mapped are destroyed. */ -+ kbase_vunmap(kctx, &chunk->map); -+ -+ err = kbase_free_phy_pages_helper(chunk->region->gpu_alloc, -+ chunk->region->gpu_alloc->nents); -+ if (unlikely(err)) { -+ dev_warn( -+ kctx->kbdev->dev, -+ "Reclaim: remove physical backing failed on chunk(0x%llx) in heap(0x%llx) (err %d), continuing with deferred removal\n", -+ chunk_gpu_va, heap->gpu_va, err); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + -+ /* kbase_free_phy_pages_helper() should only fail on invalid input, and WARNs -+ * anyway, so continue instead of returning early. ++ if (scheduler->top_grp == group) { ++ /* ++ * Note: this disables explicit rotation in the next scheduling ++ * cycle. However, removing the top_grp is the same as an ++ * implicit rotation (e.g. if we instead rotated the top_ctx ++ * and then remove top_grp) + * -+ * Indeed, we don't want to leave the chunk on the heap's chunk list whilst it has -+ * its mapping removed, as that could lead to problems. It's safest to instead -+ * continue with deferred destruction of the chunk. ++ * This implicit rotation is assumed by the scheduler rotate ++ * functions. + */ -+ } ++ scheduler->top_grp = NULL; + -+ dev_dbg(kctx->kbdev->dev, -+ "Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n", -+ chunk_gpu_va, heap->gpu_va, *hdr_val); ++ /* ++ * Trigger a scheduling tock for a CSG containing protected ++ * content in case there has been any in order to minimise ++ * latency. ++ */ ++ group = scheduler_get_protm_enter_async_group(kctx->kbdev, ++ NULL); ++ if (group) ++ schedule_in_cycle(group, true); ++ } + -+ mutex_lock(&heap->kctx->jit_evict_lock); -+ list_move(&chunk->region->jit_node, &kctx->jit_destroy_head); -+ mutex_unlock(&heap->kctx->jit_evict_lock); ++ kctx->csf.sched.num_runnable_grps--; ++ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_REMOVE, group, ++ kctx->csf.sched.num_runnable_grps); ++ new_head_grp = (!list_empty(list)) ? ++ list_first_entry(list, struct kbase_queue_group, link) : ++ NULL; ++ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u); + -+ list_del(&chunk->link); -+ heap->chunk_count--; -+ kfree(chunk); ++ if (kctx->csf.sched.num_runnable_grps == 0) { ++ struct kbase_context *new_head_kctx; ++ struct list_head *kctx_list = &scheduler->runnable_kctxs; ++ /* drop the kctx */ ++ list_del_init(&kctx->csf.link); ++ if (scheduler->top_ctx == kctx) ++ scheduler->top_ctx = NULL; ++ KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_REMOVE, kctx, 0u); ++ new_head_kctx = (!list_empty(kctx_list)) ? ++ list_first_entry(kctx_list, struct kbase_context, csf.link) : ++ NULL; ++ KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx, 0u); ++ } + -+ return true; ++ WARN_ON(scheduler->total_runnable_grps == 0); ++ scheduler->total_runnable_grps--; ++ if (!scheduler->total_runnable_grps) { ++ dev_dbg(kctx->kbdev->dev, "Scheduler idle has no runnable groups"); ++ cancel_tick_timer(kctx->kbdev); ++ WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps)); ++ if (scheduler->state != SCHED_SUSPENDED) ++ enqueue_gpu_idle_work(scheduler); ++ } ++ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, ++ scheduler->num_active_address_spaces | ++ (((u64)scheduler->total_runnable_grps) << 32)); +} + -+static void sanity_check_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap, -+ struct kbase_csf_gpu_buffer_heap *desc) ++static void insert_group_to_idle_wait(struct kbase_queue_group *const group) +{ -+ u64 first_hoarded_chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK; -+ -+ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); ++ struct kbase_context *kctx = group->kctx; + -+ if (first_hoarded_chunk_gpu_va) { -+ struct kbase_csf_tiler_heap_chunk *chunk = -+ find_chunk(heap, first_hoarded_chunk_gpu_va); ++ lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); + -+ if (likely(chunk)) { -+ dev_dbg(heap->kctx->kbdev->dev, -+ "Buffer descriptor 0x%llX sanity check ok, HW reclaim allowed\n", -+ heap->buf_desc_va); ++ WARN_ON(group->run_state != KBASE_CSF_GROUP_IDLE); + -+ heap->buf_desc_checked = true; -+ return; -+ } -+ } -+ /* If there is no match, defer the check to next time */ -+ dev_dbg(heap->kctx->kbdev->dev, "Buffer descriptor 0x%llX runtime sanity check deferred\n", -+ heap->buf_desc_va); ++ list_add_tail(&group->link, &kctx->csf.sched.idle_wait_groups); ++ kctx->csf.sched.num_idle_wait_grps++; ++ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_INSERT, group, ++ kctx->csf.sched.num_idle_wait_grps); ++ group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC; ++ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, CSF_GROUP_SUSPENDED_ON_WAIT_SYNC, group, ++ group->run_state); ++ dev_dbg(kctx->kbdev->dev, ++ "Group-%d suspended on sync_wait, total wait_groups: %u\n", ++ group->handle, kctx->csf.sched.num_idle_wait_grps); +} + -+static bool can_read_hw_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap, u64 *chunk_gpu_va_ptr) ++static void remove_group_from_idle_wait(struct kbase_queue_group *const group) +{ -+ struct kbase_context *kctx = heap->kctx; ++ struct kbase_context *kctx = group->kctx; ++ struct list_head *list = &kctx->csf.sched.idle_wait_groups; ++ struct kbase_queue_group *new_head_grp; + -+ lockdep_assert_held(&kctx->csf.tiler_heaps.lock); ++ lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); + -+ /* Initialize the descriptor pointer value to 0 */ -+ *chunk_gpu_va_ptr = 0; ++ WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC); + -+ /* The BufferDescriptor on heap is a hint on creation, do a sanity check at runtime */ -+ if (heap->buf_desc_reg && !heap->buf_desc_checked) { -+ struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr; ++ list_del_init(&group->link); ++ WARN_ON(kctx->csf.sched.num_idle_wait_grps == 0); ++ kctx->csf.sched.num_idle_wait_grps--; ++ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_REMOVE, group, ++ kctx->csf.sched.num_idle_wait_grps); ++ new_head_grp = (!list_empty(list)) ? ++ list_first_entry(list, struct kbase_queue_group, link) : ++ NULL; ++ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_HEAD, new_head_grp, 0u); ++ group->run_state = KBASE_CSF_GROUP_INACTIVE; ++ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, CSF_GROUP_INACTIVE, group, group->run_state); ++} + -+ /* BufferDescriptor is supplied by userspace, so could be CPU-cached */ -+ if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED) -+ kbase_sync_mem_regions(kctx, &heap->buf_desc_map, KBASE_SYNC_TO_CPU); ++static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler, ++ struct kbase_queue_group *group) ++{ ++ lockdep_assert_held(&scheduler->lock); + -+ sanity_check_gpu_buffer_heap(heap, desc); -+ if (heap->buf_desc_checked) -+ *chunk_gpu_va_ptr = desc->pointer & CHUNK_ADDR_MASK; -+ } ++ if (WARN_ON(!group)) ++ return; + -+ return heap->buf_desc_checked; ++ remove_group_from_runnable(scheduler, group, KBASE_CSF_GROUP_IDLE); ++ insert_group_to_idle_wait(group); +} + -+static u32 delete_hoarded_chunks(struct kbase_csf_tiler_heap *heap) ++static void update_offslot_non_idle_cnt(struct kbase_queue_group *group) +{ -+ u32 freed = 0; -+ u64 chunk_gpu_va = 0; -+ struct kbase_context *kctx = heap->kctx; -+ struct kbase_csf_tiler_heap_chunk *chunk = NULL; -+ -+ lockdep_assert_held(&kctx->csf.tiler_heaps.lock); -+ -+ if (can_read_hw_gpu_buffer_heap(heap, &chunk_gpu_va)) { -+ u64 chunk_hdr_val; -+ u64 *hw_hdr; ++ struct kbase_device *kbdev = group->kctx->kbdev; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + -+ if (!chunk_gpu_va) { -+ struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr; ++ lockdep_assert_held(&scheduler->lock); + -+ /* BufferDescriptor is supplied by userspace, so could be CPU-cached */ -+ if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED) -+ kbase_sync_mem_regions(kctx, &heap->buf_desc_map, -+ KBASE_SYNC_TO_CPU); -+ chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK; ++ if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) { ++ int new_val = ++ atomic_dec_return(&scheduler->non_idle_offslot_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val); ++ } ++} + -+ if (!chunk_gpu_va) { -+ dev_dbg(kctx->kbdev->dev, -+ "Buffer descriptor 0x%llX has no chunks (NULL) for reclaim scan\n", -+ heap->buf_desc_va); -+ goto out; -+ } -+ } ++static void update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group *group) ++{ ++ struct kbase_device *kbdev = group->kctx->kbdev; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + -+ chunk = find_chunk(heap, chunk_gpu_va); -+ if (unlikely(!chunk)) -+ goto out; ++ lockdep_assert_held(&scheduler->lock); + -+ WARN((chunk->region->flags & KBASE_REG_CPU_CACHED), -+ "Cannot support CPU cached chunks without sync operations"); -+ hw_hdr = chunk->map.addr; ++ WARN_ON(group->csg_nr < 0); + -+ /* Move onto the next chunk relevant information */ -+ chunk_hdr_val = *hw_hdr; -+ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; ++ if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) { ++ int new_val = ++ atomic_dec_return(&scheduler->non_idle_offslot_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val); ++ } ++} + -+ while (chunk_gpu_va && heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) { -+ bool success = -+ delete_chunk_physical_pages(heap, chunk_gpu_va, &chunk_hdr_val); ++static void update_offslot_non_idle_cnt_on_grp_suspend( ++ struct kbase_queue_group *group) ++{ ++ struct kbase_device *kbdev = group->kctx->kbdev; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + -+ if (!success) -+ break; ++ lockdep_assert_held(&scheduler->lock); + -+ freed++; -+ /* On success, chunk_hdr_val is updated, extract the next chunk address */ -+ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; ++ if (scheduler->state == SCHED_BUSY) { ++ /* active phase or, async entering the protected mode */ ++ if (group->prepared_seq_num >= ++ scheduler->non_idle_scanout_grps) { ++ /* At scanout, it was tagged as on-slot idle */ ++ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) { ++ int new_val = atomic_inc_return( ++ &scheduler->non_idle_offslot_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, ++ group, new_val); ++ } ++ } else { ++ if (group->run_state != KBASE_CSF_GROUP_SUSPENDED) { ++ int new_val = atomic_dec_return( ++ &scheduler->non_idle_offslot_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, ++ group, new_val); ++ } + } -+ -+ /* Update the existing hardware chunk header, after reclaim deletion of chunks */ -+ *hw_hdr = chunk_hdr_val; -+ -+ dev_dbg(heap->kctx->kbdev->dev, -+ "HW reclaim scan freed chunks: %u, set hw_hdr[0]: 0x%llX\n", freed, -+ chunk_hdr_val); + } else { -+ dev_dbg(kctx->kbdev->dev, -+ "Skip HW reclaim scan, (disabled: buffer descriptor 0x%llX)\n", -+ heap->buf_desc_va); ++ /* async phases */ ++ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) { ++ int new_val = atomic_inc_return( ++ &scheduler->non_idle_offslot_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, ++ new_val); ++ } + } -+out: -+ return freed; +} + -+static u64 delete_unused_chunk_pages(struct kbase_csf_tiler_heap *heap) ++static bool confirm_cmd_buf_empty(struct kbase_queue const *queue) +{ -+ u32 freed_chunks = 0; -+ u64 freed_pages = 0; -+ u64 chunk_gpu_va; -+ u64 chunk_hdr_val; -+ struct kbase_context *kctx = heap->kctx; -+ u64 *ctx_ptr; -+ -+ lockdep_assert_held(&kctx->csf.tiler_heaps.lock); -+ -+ WARN(heap->gpu_va_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED, -+ "Cannot support CPU cached heap context without sync operations"); ++ bool cs_empty; ++ bool cs_idle; ++ u32 sb_status = 0; + -+ ctx_ptr = heap->gpu_va_map.addr; ++ struct kbase_device const *const kbdev = queue->group->kctx->kbdev; ++ struct kbase_csf_global_iface const *const iface = ++ &kbdev->csf.global_iface; + -+ /* Extract the first chunk address from the context's free_list_head */ -+ chunk_hdr_val = *ctx_ptr; -+ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; ++ u32 glb_version = iface->version; + -+ while (chunk_gpu_va) { -+ u64 hdr_val; -+ bool success = delete_chunk_physical_pages(heap, chunk_gpu_va, &hdr_val); ++ u64 const *input_addr = (u64 const *)queue->user_io_addr; ++ u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE); + -+ if (!success) -+ break; ++ if (glb_version >= kbase_csf_interface_version(1, 0, 0)) { ++ /* CS_STATUS_SCOREBOARD supported from CSF 1.0 */ ++ struct kbase_csf_cmd_stream_group_info const *const ginfo = ++ &kbdev->csf.global_iface.groups[queue->group->csg_nr]; ++ struct kbase_csf_cmd_stream_info const *const stream = ++ &ginfo->streams[queue->csi_index]; + -+ freed_chunks++; -+ chunk_hdr_val = hdr_val; -+ /* extract the next chunk address */ -+ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; ++ sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET( ++ kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_SCOREBOARDS)); + } + -+ /* Update the post-scan deletion to context header */ -+ *ctx_ptr = chunk_hdr_val; -+ -+ /* Try to scan the HW hoarded list of unused chunks */ -+ freed_chunks += delete_hoarded_chunks(heap); -+ freed_pages = freed_chunks * PFN_UP(heap->chunk_size); -+ dev_dbg(heap->kctx->kbdev->dev, -+ "Scan reclaim freed chunks/pages %u/%llu, set heap-ctx_u64[0]: 0x%llX\n", -+ freed_chunks, freed_pages, chunk_hdr_val); ++ cs_empty = (input_addr[CS_INSERT_LO / sizeof(u64)] == ++ output_addr[CS_EXTRACT_LO / sizeof(u64)]); ++ cs_idle = cs_empty && (!sb_status); + -+ /* Update context tiler heaps memory usage */ -+ kctx->running_total_tiler_heap_memory -= freed_pages << PAGE_SHIFT; -+ kctx->running_total_tiler_heap_nr_chunks -= freed_chunks; -+ return freed_pages; ++ return cs_idle; +} + -+u32 kbase_csf_tiler_heap_scan_kctx_unused_pages(struct kbase_context *kctx, u32 to_free) ++static void save_csg_slot(struct kbase_queue_group *group) +{ -+ u64 freed = 0; -+ struct kbase_csf_tiler_heap *heap; ++ struct kbase_device *kbdev = group->kctx->kbdev; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ struct kbase_csf_cmd_stream_group_info *ginfo; ++ u32 state; + -+ mutex_lock(&kctx->csf.tiler_heaps.lock); ++ lockdep_assert_held(&scheduler->lock); + -+ list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) { -+ freed += delete_unused_chunk_pages(heap); ++ if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) ++ return; + -+ /* If freed enough, then stop here */ -+ if (freed >= to_free) -+ break; -+ } ++ ginfo = &kbdev->csf.global_iface.groups[group->csg_nr]; + -+ mutex_unlock(&kctx->csf.tiler_heaps.lock); -+ /* The scan is surely not more than 4-G pages, but for logic flow limit it */ -+ if (WARN_ON(unlikely(freed > U32_MAX))) -+ return U32_MAX; -+ else -+ return (u32)freed; -+} ++ state = ++ CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, CSG_ACK)); + -+static u64 count_unused_heap_pages(struct kbase_csf_tiler_heap *heap) -+{ -+ u32 chunk_cnt = 0; -+ u64 page_cnt = 0; ++ if (!WARN_ON((state != CSG_ACK_STATE_SUSPEND) && ++ (state != CSG_ACK_STATE_TERMINATE))) { ++ u32 max_streams = ginfo->stream_num; ++ u32 i; ++ bool sync_wait = false; ++ bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & ++ CSG_STATUS_STATE_IDLE_MASK; ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ for (i = 0; i < max_streams; i++) ++ update_hw_active(group->bound_queues[i], false); ++#endif /* CONFIG_MALI_BIFROST_NO_MALI */ ++ for (i = 0; idle && i < max_streams; i++) { ++ struct kbase_queue *const queue = ++ group->bound_queues[i]; + -+ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); ++ if (!queue || !queue->enabled) ++ continue; + -+ /* Here the count is basically an informed estimate, avoiding the costly mapping/unmaping -+ * in the chunk list walk. The downside is that the number is a less reliable guide for -+ * later on scan (free) calls on this heap for what actually is freeable. -+ */ -+ if (heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) { -+ chunk_cnt = heap->chunk_count - HEAP_SHRINK_STOP_LIMIT; -+ page_cnt = chunk_cnt * PFN_UP(heap->chunk_size); -+ } ++ if (save_slot_cs(ginfo, queue)) { ++ /* sync_wait is only true if the queue is blocked on ++ * a CQS and not a scoreboard. ++ */ ++ if (queue->blocked_reason != ++ CS_STATUS_BLOCKED_ON_SB_WAIT) ++ sync_wait = true; ++ } else { ++ /* Need to confirm if ringbuffer of the GPU ++ * queue is empty or not. A race can arise ++ * between the flush of GPU queue and suspend ++ * of CSG. If a queue is flushed after FW has ++ * set the IDLE bit in CSG_STATUS_STATE, then ++ * Scheduler will incorrectly consider CSG ++ * as idle. And there may not be any further ++ * flush call for the GPU queue, which would ++ * have de-idled the CSG. ++ */ ++ idle = confirm_cmd_buf_empty(queue); ++ } ++ } + -+ dev_dbg(heap->kctx->kbdev->dev, -+ "Reclaim count chunks/pages %u/%llu (estimated), heap_va: 0x%llX\n", chunk_cnt, -+ page_cnt, heap->gpu_va); ++ if (idle) { ++ /* Take the suspended group out of the runnable_groups ++ * list of the context and move it to the ++ * idle_wait_groups list. ++ */ ++ if (sync_wait) ++ deschedule_idle_wait_group(scheduler, group); ++ else { ++ group->run_state = ++ KBASE_CSF_GROUP_SUSPENDED_ON_IDLE; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED_ON_IDLE, group, ++ group->run_state); ++ dev_dbg(kbdev->dev, "Group-%d suspended: idle", ++ group->handle); ++ } ++ } else { ++ group->run_state = KBASE_CSF_GROUP_SUSPENDED; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED, group, ++ group->run_state); ++ } + -+ return page_cnt; ++ update_offslot_non_idle_cnt_on_grp_suspend(group); ++ kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(group); ++ } +} + -+u32 kbase_csf_tiler_heap_count_kctx_unused_pages(struct kbase_context *kctx) ++/* Cleanup_csg_slot after it has been vacated, ready for next csg run. ++ * Return whether there is a kctx address fault associated with the group ++ * for which the clean-up is done. ++ */ ++static bool cleanup_csg_slot(struct kbase_queue_group *group) +{ -+ u64 page_cnt = 0; -+ struct kbase_csf_tiler_heap *heap; ++ struct kbase_context *kctx = group->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ struct kbase_csf_cmd_stream_group_info *ginfo; ++ s8 slot; ++ struct kbase_csf_csg_slot *csg_slot; ++ unsigned long flags; ++ u32 i; ++ bool as_fault = false; + -+ mutex_lock(&kctx->csf.tiler_heaps.lock); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) -+ page_cnt += count_unused_heap_pages(heap); ++ if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) ++ return as_fault; + -+ mutex_unlock(&kctx->csf.tiler_heaps.lock); ++ slot = group->csg_nr; ++ csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; ++ ginfo = &global_iface->groups[slot]; + -+ /* The count is surely not more than 4-G pages, but for logic flow limit it */ -+ if (WARN_ON(unlikely(page_cnt > U32_MAX))) -+ return U32_MAX; -+ else -+ return (u32)page_cnt; -+} -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h -new file mode 100644 -index 000000000..1b5cb5608 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h -@@ -0,0 +1,142 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ /* Now loop through all the bound CSs, and clean them via a stop */ ++ for (i = 0; i < ginfo->stream_num; i++) { ++ struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[i]; + -+#ifndef _KBASE_CSF_TILER_HEAP_H_ -+#define _KBASE_CSF_TILER_HEAP_H_ ++ if (group->bound_queues[i]) { ++ if (group->bound_queues[i]->enabled) { ++ kbase_csf_firmware_cs_input_mask(stream, ++ CS_REQ, CS_REQ_STATE_STOP, ++ CS_REQ_STATE_MASK); ++ } + -+#include -+/** -+ * kbase_csf_tiler_heap_context_init - Initialize the tiler heaps context for a -+ * GPU address space -+ * -+ * @kctx: Pointer to the kbase context being initialized. -+ * -+ * Return: 0 if successful or a negative error code on failure. -+ */ -+int kbase_csf_tiler_heap_context_init(struct kbase_context *kctx); ++ unassign_user_doorbell_from_queue(kbdev, ++ group->bound_queues[i]); ++ } ++ } + -+/** -+ * kbase_csf_tiler_heap_context_term - Terminate the tiler heaps context for a -+ * GPU address space -+ * -+ * @kctx: Pointer to the kbase context being terminated. -+ * -+ * This function deletes any chunked tiler heaps that weren't deleted before -+ * context termination. -+ */ -+void kbase_csf_tiler_heap_context_term(struct kbase_context *kctx); ++ unassign_user_doorbell_from_group(kbdev, group); + -+/** -+ * kbase_csf_tiler_heap_init - Initialize a chunked tiler memory heap. -+ * -+ * @kctx: Pointer to the kbase context in which to allocate resources for the -+ * tiler heap. -+ * @chunk_size: Size of each chunk, in bytes. Must be page-aligned. -+ * @initial_chunks: The initial number of chunks to allocate. Must not be -+ * zero or greater than @max_chunks. -+ * @max_chunks: The maximum number of chunks that the heap should be allowed -+ * to use. Must not be less than @initial_chunks. -+ * @target_in_flight: Number of render-passes that the driver should attempt to -+ * keep in flight for which allocation of new chunks is -+ * allowed. Must not be zero. -+ * @buf_desc_va: Buffer descriptor GPU virtual address. This is a hint for -+ * indicating that the caller is intending to perform tiler heap -+ * chunks reclaim for those that are hoarded with hardware while -+ * the associated shader activites are suspended and the CSGs are -+ * off slots. If the referred reclaiming is not desired, can -+ * set it to 0. -+ * @gpu_heap_va: Where to store the GPU virtual address of the context that was -+ * set up for the tiler heap. -+ * @first_chunk_va: Where to store the GPU virtual address of the first chunk -+ * allocated for the heap. This points to the header of the -+ * heap chunk and not to the low address of free memory in it. -+ * -+ * Return: 0 if successful or a negative error code on failure. -+ */ -+int kbase_csf_tiler_heap_init(struct kbase_context *kctx, u32 chunk_size, u32 initial_chunks, -+ u32 max_chunks, u16 target_in_flight, u64 const buf_desc_va, -+ u64 *gpu_heap_va, u64 *first_chunk_va); ++ /* The csg does not need cleanup other than drop its AS */ ++ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); ++ as_fault = kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT); ++ kbase_ctx_sched_release_ctx(kctx); ++ if (unlikely(group->faulted)) ++ as_fault = true; ++ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); + -+/** -+ * kbase_csf_tiler_heap_term - Terminate a chunked tiler memory heap. -+ * -+ * @kctx: Pointer to the kbase context in which the tiler heap was initialized. -+ * @gpu_heap_va: The GPU virtual address of the context that was set up for the -+ * tiler heap. -+ * -+ * This function will terminate a chunked tiler heap and cause all the chunks -+ * (initial and those added during out-of-memory processing) to be freed. -+ * It is the caller's responsibility to ensure no further operations on this -+ * heap will happen before calling this function. -+ * -+ * Return: 0 if successful or a negative error code on failure. -+ */ -+int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va); ++ /* now marking the slot is vacant */ ++ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); ++ kbdev->csf.scheduler.csg_slots[slot].resident_group = NULL; ++ clear_bit(slot, kbdev->csf.scheduler.csg_slots_idle_mask); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, ++ kbdev->csf.scheduler.csg_slots_idle_mask[0]); + -+/** -+ * kbase_csf_tiler_heap_alloc_new_chunk - Allocate a new chunk for tiler heap. -+ * -+ * @kctx: Pointer to the kbase context in which the tiler heap was initialized. -+ * @gpu_heap_va: GPU virtual address of the heap context. -+ * @nr_in_flight: Number of render passes that are in-flight, must not be zero. -+ * @pending_frag_count: Number of render passes in-flight with completed vertex/tiler stage. -+ * The minimum value is zero but it must be less or equal to -+ * the total number of render passes in flight -+ * @new_chunk_ptr: Where to store the GPU virtual address & size of the new -+ * chunk allocated for the heap. -+ * -+ * This function will allocate a new chunk for the chunked tiler heap depending -+ * on the settings provided by userspace when the heap was created and the -+ * heap's statistics (like number of render passes in-flight). -+ * It would return an appropriate error code if a new chunk couldn't be -+ * allocated. -+ * -+ * Return: 0 if a new chunk was allocated otherwise an appropriate negative -+ * error code (like -EBUSY when a free chunk is expected to be -+ * available upon completion of a render pass and -EINVAL when -+ * invalid value was passed for one of the argument). -+ */ -+int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, -+ u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr); ++ group->csg_nr = KBASEP_CSG_NR_INVALID; ++ set_bit(slot, kbdev->csf.scheduler.csgs_events_enable_mask); ++ clear_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap); ++ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); + -+/** -+ * kbase_csf_tiler_heap_scan_kctx_unused_pages - Performs the tiler heap shrinker calim's scan -+ * functionality. -+ * -+ * @kctx: Pointer to the kbase context for which the tiler heap recalim is to be -+ * operated with. -+ * @to_free: Number of pages suggested for the reclaim scan (free) method to reach. -+ * -+ * Return: the actual number of pages the scan method has freed from the call. -+ */ -+u32 kbase_csf_tiler_heap_scan_kctx_unused_pages(struct kbase_context *kctx, u32 to_free); ++ csg_slot->trigger_jiffies = jiffies; ++ atomic_set(&csg_slot->state, CSG_SLOT_READY); + -+/** -+ * kbase_csf_tiler_heap_count_kctx_unused_pages - Performs the tiler heap shrinker calim's count -+ * functionality. -+ * -+ * @kctx: Pointer to the kbase context for which the tiler heap recalim is to be -+ * operated with. -+ * -+ * Return: a number of pages that could likely be freed on the subsequent scan method call. -+ */ -+u32 kbase_csf_tiler_heap_count_kctx_unused_pages(struct kbase_context *kctx); -+#endif -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.c -new file mode 100644 -index 000000000..96e0f2829 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.c -@@ -0,0 +1,162 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_CLEANED, group, slot); ++ dev_dbg(kbdev->dev, "Cleanup done for group %d on slot %d\n", ++ group->handle, slot); + -+#include "mali_kbase_csf_tiler_heap_debugfs.h" -+#include "mali_kbase_csf_tiler_heap_def.h" -+#include -+#include ++ KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev, ++ kbdev->gpu_props.props.raw_props.gpu_id, slot); + -+#if IS_ENABLED(CONFIG_DEBUG_FS) ++ /* Notify the group is off-slot and the csg_reg might be available for ++ * resue with other groups in a 'lazy unbinding' style. ++ */ ++ kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group); + -+/** -+ * kbasep_csf_tiler_heap_debugfs_show() - Print tiler heap information for per context -+ * -+ * @file: The seq_file for printing to -+ * @data: The debugfs dentry private data, a pointer to kbase_context -+ * -+ * Return: 0 in any case. -+ */ -+static int kbasep_csf_tiler_heap_debugfs_show(struct seq_file *file, void *data) ++ return as_fault; ++} ++ ++static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio) +{ -+ struct kbase_context *kctx = file->private; -+ struct kbase_csf_tiler_heap_context *tiler_heaps_p = &kctx->csf.tiler_heaps; -+ struct kbase_csf_tiler_heap *heap; -+ struct kbase_csf_tiler_heap_chunk *chunk; ++ struct kbase_device *kbdev = group->kctx->kbdev; ++ struct kbase_csf_csg_slot *csg_slot; ++ struct kbase_csf_cmd_stream_group_info *ginfo; ++ s8 slot; ++ u8 prev_prio; ++ u32 ep_cfg; ++ u32 csg_req; ++ unsigned long flags; + -+ seq_printf(file, "MALI_CSF_TILER_HEAP_DEBUGFS_VERSION: v%u\n", MALI_CSF_TILER_HEAP_DEBUGFS_VERSION); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ mutex_lock(&tiler_heaps_p->lock); ++ if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) ++ return; + -+ list_for_each_entry(heap, &tiler_heaps_p->list, link) { -+ if (heap->kctx != kctx) -+ continue; ++ slot = group->csg_nr; ++ csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; ++ ginfo = &kbdev->csf.global_iface.groups[slot]; + -+ seq_printf(file, "HEAP(gpu_va = 0x%llx):\n", heap->gpu_va); -+ seq_printf(file, "\tchunk_size = %u\n", heap->chunk_size); -+ seq_printf(file, "\tchunk_count = %u\n", heap->chunk_count); -+ seq_printf(file, "\tmax_chunks = %u\n", heap->max_chunks); -+ seq_printf(file, "\ttarget_in_flight = %u\n", heap->target_in_flight); ++ /* CSGs remaining on-slot can be either idle or runnable. ++ * This also applies in protected mode. ++ */ ++ WARN_ON(!((group->run_state == KBASE_CSF_GROUP_RUNNABLE) || ++ (group->run_state == KBASE_CSF_GROUP_IDLE))); + -+ list_for_each_entry(chunk, &heap->chunks_list, link) -+ seq_printf(file, "\t\tchunk gpu_va = 0x%llx\n", -+ chunk->gpu_va); -+ } ++ /* Update consumes a group from scanout */ ++ update_offslot_non_idle_cnt_for_onslot_grp(group); + -+ mutex_unlock(&tiler_heaps_p->lock); ++ if (csg_slot->priority == prio) ++ return; + -+ return 0; -+} ++ /* Read the csg_ep_cfg back for updating the priority field */ ++ ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ); ++ prev_prio = CSG_EP_REQ_PRIORITY_GET(ep_cfg); ++ ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); ++ kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); + -+/** -+ * kbasep_csf_tiler_heap_total_debugfs_show() - Print the total memory allocated -+ * for all tiler heaps in a context. -+ * -+ * @file: The seq_file for printing to -+ * @data: The debugfs dentry private data, a pointer to kbase_context -+ * -+ * Return: 0 in any case. -+ */ -+static int kbasep_csf_tiler_heap_total_debugfs_show(struct seq_file *file, void *data) -+{ -+ struct kbase_context *kctx = file->private; ++ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); ++ csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); ++ csg_req ^= CSG_REQ_EP_CFG_MASK; ++ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, ++ CSG_REQ_EP_CFG_MASK); ++ kbase_csf_ring_csg_doorbell(kbdev, slot); ++ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); + -+ seq_printf(file, "MALI_CSF_TILER_HEAP_DEBUGFS_VERSION: v%u\n", -+ MALI_CSF_TILER_HEAP_DEBUGFS_VERSION); -+ seq_printf(file, "Total number of chunks of all heaps in the context: %lu\n", -+ (unsigned long)kctx->running_total_tiler_heap_nr_chunks); -+ seq_printf(file, "Total allocated memory of all heaps in the context: %llu\n", -+ (unsigned long long)kctx->running_total_tiler_heap_memory); -+ seq_printf(file, "Peak allocated tiler heap memory in the context: %llu\n", -+ (unsigned long long)kctx->peak_total_tiler_heap_memory); ++ csg_slot->priority = prio; + -+ return 0; -+} ++ dev_dbg(kbdev->dev, "Priority for group %d of context %d_%d on slot %d to be updated from %u to %u\n", ++ group->handle, group->kctx->tgid, group->kctx->id, slot, ++ prev_prio, prio); + -+static int kbasep_csf_tiler_heap_debugfs_open(struct inode *in, struct file *file) -+{ -+ return single_open(file, kbasep_csf_tiler_heap_debugfs_show, in->i_private); -+} ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_PRIO_UPDATE, group, prev_prio); + -+static int kbasep_csf_tiler_heap_total_debugfs_open(struct inode *in, struct file *file) -+{ -+ return single_open(file, kbasep_csf_tiler_heap_total_debugfs_show, in->i_private); ++ set_bit(slot, kbdev->csf.scheduler.csg_slots_prio_update); +} + -+static const struct file_operations kbasep_csf_tiler_heap_debugfs_fops = { -+ .open = kbasep_csf_tiler_heap_debugfs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; -+ -+static const struct file_operations kbasep_csf_tiler_heap_total_debugfs_fops = { -+ .open = kbasep_csf_tiler_heap_total_debugfs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; -+ -+void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx) ++static void program_csg_slot(struct kbase_queue_group *group, s8 slot, ++ u8 prio) +{ -+ struct dentry *file; ++ struct kbase_context *kctx = group->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ const u64 shader_core_mask = ++ kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER); ++ const u64 tiler_core_mask = ++ kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_TILER); ++ const u64 compute_mask = shader_core_mask & group->compute_mask; ++ const u64 fragment_mask = shader_core_mask & group->fragment_mask; ++ const u64 tiler_mask = tiler_core_mask & group->tiler_mask; ++ const u8 num_cores = kbdev->gpu_props.num_cores; ++ const u8 compute_max = min(num_cores, group->compute_max); ++ const u8 fragment_max = min(num_cores, group->fragment_max); ++ const u8 tiler_max = min(CSG_TILER_MAX, group->tiler_max); ++ struct kbase_csf_cmd_stream_group_info *ginfo; ++ u32 ep_cfg = 0; ++ u32 csg_req; ++ u32 state; ++ int i; ++ unsigned long flags; ++ u64 normal_suspend_buf; ++ u64 protm_suspend_buf; ++ struct kbase_csf_csg_slot *csg_slot = ++ &kbdev->csf.scheduler.csg_slots[slot]; + -+ if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ if (WARN_ON(slot < 0) && ++ WARN_ON(slot >= global_iface->group_num)) + return; + -+ file = debugfs_create_file("tiler_heaps", 0444, kctx->kctx_dentry, -+ kctx, &kbasep_csf_tiler_heap_debugfs_fops); ++ WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY); + -+ if (IS_ERR_OR_NULL(file)) { -+ dev_warn(kctx->kbdev->dev, -+ "Unable to create tiler heap debugfs entry"); ++ if (unlikely(kbase_csf_mcu_shared_group_bind_csg_reg(kbdev, group))) { ++ dev_warn(kbdev->dev, ++ "Couldn't bind MCU shared csg_reg for group %d of context %d_%d, slot=%u", ++ group->handle, group->kctx->tgid, kctx->id, slot); ++ kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group); ++ return; + } -+} + -+void kbase_csf_tiler_heap_total_debugfs_init(struct kbase_context *kctx) -+{ -+ struct dentry *file; ++ /* The suspend buf has already been mapped through binding to csg_reg */ ++ normal_suspend_buf = group->normal_suspend_buf.gpu_va; ++ protm_suspend_buf = group->protected_suspend_buf.gpu_va; ++ WARN_ONCE(!normal_suspend_buf, "Normal suspend buffer not mapped"); + -+ if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) -+ return; ++ ginfo = &global_iface->groups[slot]; + -+ file = debugfs_create_file("tiler_heaps_total", 0444, kctx->kctx_dentry, -+ kctx, &kbasep_csf_tiler_heap_total_debugfs_fops); ++ /* Pick an available address space for this context */ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_ctx_sched_retain_ctx(kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); + -+ if (IS_ERR_OR_NULL(file)) { -+ dev_warn(kctx->kbdev->dev, -+ "Unable to create total tiler heap allocated memory debugfs entry"); ++ if (kctx->as_nr == KBASEP_AS_NR_INVALID) { ++ dev_dbg(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n", ++ group->handle, kctx->tgid, kctx->id, slot); ++ kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group); ++ return; + } -+} -+ -+#else -+/* -+ * Stub functions for when debugfs is disabled -+ */ -+void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx) -+{ -+} -+ -+void kbase_csf_tiler_heap_total_debugfs_init(struct kbase_context *kctx) -+{ -+} -+ -+#endif /* CONFIG_DEBUG_FS */ + -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.h -new file mode 100644 -index 000000000..4a1b413ef ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.h -@@ -0,0 +1,44 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); ++ set_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap); ++ kbdev->csf.scheduler.csg_slots[slot].resident_group = group; ++ group->csg_nr = slot; ++ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); + -+#ifndef _KBASE_CSF_TILER_HEAP_DEBUGFS_H_ -+#define _KBASE_CSF_TILER_HEAP_DEBUGFS_H_ ++ assign_user_doorbell_to_group(kbdev, group); + -+/* Forward declaration */ -+struct kbase_context; ++ /* Now loop through all the bound & kicked CSs, and program them */ ++ for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { ++ struct kbase_queue *queue = group->bound_queues[i]; + -+#define MALI_CSF_TILER_HEAP_DEBUGFS_VERSION 0 ++ if (queue) ++ program_cs(kbdev, queue, false); ++ } + -+/** -+ * kbase_csf_tiler_heap_debugfs_init - Create a debugfs entry for per context tiler heap -+ * -+ * @kctx: The kbase_context for which to create the debugfs entry -+ */ -+void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx); + -+/** -+ * kbase_csf_tiler_heap_total_debugfs_init - Create a debugfs entry for per context tiler heap -+ * -+ * @kctx: The kbase_context for which to create the debugfs entry -+ */ -+void kbase_csf_tiler_heap_total_debugfs_init(struct kbase_context *kctx); ++ /* Endpoint programming for CSG */ ++ kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_LO, ++ compute_mask & U32_MAX); ++ kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_HI, ++ compute_mask >> 32); ++ kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_LO, ++ fragment_mask & U32_MAX); ++ kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_HI, ++ fragment_mask >> 32); ++ kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER, ++ tiler_mask & U32_MAX); + -+#endif /* _KBASE_CSF_TILER_HEAP_DEBUGFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h -new file mode 100644 -index 000000000..96f2b03d2 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h -@@ -0,0 +1,140 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ /* Register group UID with firmware */ ++ kbase_csf_firmware_csg_input(ginfo, CSG_ITER_TRACE_CONFIG, ++ group->group_uid); + -+#ifndef _KBASE_CSF_TILER_HEAP_DEF_H_ -+#define _KBASE_CSF_TILER_HEAP_DEF_H_ ++ ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max); ++ ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max); ++ ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max); ++ ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); ++ kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); + -+#include ++ /* Program the address space number assigned to the context */ ++ kbase_csf_firmware_csg_input(ginfo, CSG_CONFIG, kctx->as_nr); + -+/* Size of a tiler heap chunk header, in bytes. */ -+#define CHUNK_HDR_SIZE ((size_t)64) ++ kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_LO, ++ normal_suspend_buf & U32_MAX); ++ kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI, ++ normal_suspend_buf >> 32); + -+/* Bit-position of the next chunk's size when stored in a chunk header. */ -+#define CHUNK_HDR_NEXT_SIZE_POS (0) ++ /* Note, we program the P-mode buffer pointer here, but actual runtime ++ * enter into pmode execution is controlled by the P-mode phy pages are ++ * allocated and mapped with the bound csg_reg, which has a specific flag ++ * for indicating this P-mode runnable condition before a group is ++ * granted its p-mode section entry. Without a P-mode entry, the buffer ++ * pointed is not going to be accessed at all. ++ */ ++ kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, protm_suspend_buf & U32_MAX); ++ kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, protm_suspend_buf >> 32); + -+/* Bit-position of the next chunk's address when stored in a chunk header. */ -+#define CHUNK_HDR_NEXT_ADDR_POS (12) ++ if (group->dvs_buf) { ++ kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_LO, ++ group->dvs_buf & U32_MAX); ++ kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_HI, ++ group->dvs_buf >> 32); ++ } + -+/* Bitmask of the next chunk's size when stored in a chunk header. */ -+#define CHUNK_HDR_NEXT_SIZE_MASK (((u64)1 << CHUNK_HDR_NEXT_ADDR_POS) - 1u) ++ /* Enable all interrupts for now */ ++ kbase_csf_firmware_csg_input(ginfo, CSG_ACK_IRQ_MASK, ~((u32)0)); + -+/* Bitmask of the address of the next chunk when stored in a chunk header. */ -+#define CHUNK_HDR_NEXT_ADDR_MASK (~CHUNK_HDR_NEXT_SIZE_MASK) ++ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); ++ csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); ++ csg_req ^= CSG_REQ_EP_CFG_MASK; ++ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, ++ CSG_REQ_EP_CFG_MASK); + -+/* Right-shift before storing the next chunk's size in a chunk header. */ -+#define CHUNK_HDR_NEXT_SIZE_ENCODE_SHIFT (12) ++ /* Set state to START/RESUME */ ++ if (queue_group_suspended_locked(group)) { ++ state = CSG_REQ_STATE_RESUME; ++ } else { ++ WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE); ++ state = CSG_REQ_STATE_START; ++ } + -+/* Right-shift before storing the next chunk's address in a chunk header. */ -+#define CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT (12) ++ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ++ state, CSG_REQ_STATE_MASK); ++ kbase_csf_ring_csg_doorbell(kbdev, slot); ++ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); + -+/* Bitmask of valid chunk sizes. This is also the maximum chunk size, in bytes. -+ */ -+#define CHUNK_SIZE_MASK \ -+ ((CHUNK_HDR_NEXT_SIZE_MASK >> CHUNK_HDR_NEXT_SIZE_POS) << \ -+ CHUNK_HDR_NEXT_SIZE_ENCODE_SHIFT) ++ /* Update status before rings the door-bell, marking ready => run */ ++ atomic_set(&csg_slot->state, CSG_SLOT_READY2RUN); ++ csg_slot->trigger_jiffies = jiffies; ++ csg_slot->priority = prio; + -+/* Bitmask of valid chunk addresses. This is also the highest address. */ -+#define CHUNK_ADDR_MASK \ -+ ((CHUNK_HDR_NEXT_ADDR_MASK >> CHUNK_HDR_NEXT_ADDR_POS) << \ -+ CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT) ++ /* Trace the programming of the CSG on the slot */ ++ KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( ++ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, group->kctx->id, ++ group->handle, slot, (state == CSG_REQ_STATE_RESUME) ? 1 : 0); + -+/* The size of the area needed to be vmapped prior to handing the tiler heap -+ * over to the tiler, so that the shrinker could be invoked. -+ */ -+#define NEXT_CHUNK_ADDR_SIZE (sizeof(u64)) ++ dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n", ++ group->handle, kctx->tgid, kctx->id, slot, prio); + -+/** -+ * struct kbase_csf_tiler_heap_chunk - A tiler heap chunk managed by the kernel -+ * -+ * @link: Link to this chunk in a list of chunks belonging to a -+ * @kbase_csf_tiler_heap. -+ * @region: Pointer to the GPU memory region allocated for the chunk. -+ * @map: Kernel VA mapping so that we would not need to use vmap in the -+ * shrinker callback, which can allocate. This maps only the header -+ * of the chunk, so it could be traversed. -+ * @gpu_va: GPU virtual address of the start of the memory region. -+ * This points to the header of the chunk and not to the low address -+ * of free memory within it. -+ * -+ * Chunks are allocated upon initialization of a tiler heap or in response to -+ * out-of-memory events from the firmware. Chunks are always fully backed by -+ * physical memory to avoid the overhead of processing GPU page faults. The -+ * allocated GPU memory regions are linked together independent of the list of -+ * kernel objects of this type. -+ */ -+struct kbase_csf_tiler_heap_chunk { -+ struct list_head link; -+ struct kbase_va_region *region; -+ struct kbase_vmap_struct map; -+ u64 gpu_va; -+}; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START_REQ, group, ++ (((u64)ep_cfg) << 32) | ((((u32)kctx->as_nr) & 0xF) << 16) | ++ (state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT))); + -+#define HEAP_BUF_DESCRIPTOR_CHECKED (1 << 0) ++ /* Update the heap reclaim manager */ ++ kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(group); + -+/** -+ * struct kbase_csf_tiler_heap - A tiler heap managed by the kernel -+ * -+ * @kctx: Pointer to the kbase context with which this heap is -+ * associated. -+ * @link: Link to this heap in a list of tiler heaps belonging to -+ * the @kbase_csf_tiler_heap_context. -+ * @chunks_list: Linked list of allocated chunks. -+ * @gpu_va: The GPU virtual address of the heap context structure that -+ * was allocated for the firmware. This is also used to -+ * uniquely identify the heap. -+ * @heap_id: Unique id representing the heap, assigned during heap -+ * initialization. -+ * @buf_desc_va: Buffer descriptor GPU VA. Can be 0 for backward compatible -+ * to earlier version base interfaces. -+ * @buf_desc_reg: Pointer to the VA region that covers the provided buffer -+ * descriptor memory object pointed to by buf_desc_va. -+ * @gpu_va_map: Kernel VA mapping of the GPU VA region. -+ * @buf_desc_map: Kernel VA mapping of the buffer descriptor, read from -+ * during the tiler heap shrinker. Sync operations may need -+ * to be done before each read. -+ * @chunk_size: Size of each chunk, in bytes. Must be page-aligned. -+ * @chunk_count: The number of chunks currently allocated. Must not be -+ * zero or greater than @max_chunks. -+ * @max_chunks: The maximum number of chunks that the heap should be -+ * allowed to use. Must not be less than @chunk_count. -+ * @target_in_flight: Number of render-passes that the driver should attempt -+ * to keep in flight for which allocation of new chunks is -+ * allowed. Must not be zero. -+ * @buf_desc_checked: Indicates if runtime check on buffer descriptor has been done. -+ */ -+struct kbase_csf_tiler_heap { -+ struct kbase_context *kctx; -+ struct list_head link; -+ struct list_head chunks_list; -+ u64 gpu_va; -+ u64 heap_id; -+ u64 buf_desc_va; -+ struct kbase_va_region *buf_desc_reg; -+ struct kbase_vmap_struct buf_desc_map; -+ struct kbase_vmap_struct gpu_va_map; -+ u32 chunk_size; -+ u32 chunk_count; -+ u32 max_chunks; -+ u16 target_in_flight; -+ bool buf_desc_checked; -+}; ++ /* Programming a slot consumes a group from scanout */ ++ update_offslot_non_idle_cnt_for_onslot_grp(group); + -+#endif /* !_KBASE_CSF_TILER_HEAP_DEF_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c -new file mode 100644 -index 000000000..6357e3518 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c -@@ -0,0 +1,359 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ /* Notify the group's bound csg_reg is now in active use */ ++ kbase_csf_mcu_shared_set_group_csg_reg_active(kbdev, group); ++} + -+#include -+#include "mali_kbase_csf.h" -+#include "mali_kbase_csf_tiler_heap.h" -+#include "mali_kbase_csf_tiler_heap_reclaim.h" ++static void remove_scheduled_group(struct kbase_device *kbdev, ++ struct kbase_queue_group *group) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + -+/* Tiler heap shrinker seek value, needs to be higher than jit and memory pools */ -+#define HEAP_SHRINKER_SEEKS (DEFAULT_SEEKS + 2) ++ lockdep_assert_held(&scheduler->lock); + -+/* Tiler heap shrinker batch value */ -+#define HEAP_SHRINKER_BATCH (512) ++ WARN_ON(group->prepared_seq_num == ++ KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID); ++ WARN_ON(list_empty(&group->link_to_schedule)); + -+/* Tiler heap reclaim scan (free) method size for limiting a scan run length */ -+#define HEAP_RECLAIM_SCAN_BATCH_SIZE (HEAP_SHRINKER_BATCH << 7) ++ list_del_init(&group->link_to_schedule); ++ scheduler->ngrp_to_schedule--; ++ group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID; ++ group->kctx->csf.sched.ngrp_to_schedule--; ++} + -+static u8 get_kctx_highest_csg_priority(struct kbase_context *kctx) ++static void sched_evict_group(struct kbase_queue_group *group, bool fault, ++ bool update_non_idle_offslot_grps_cnt_from_run_state) +{ -+ u8 prio; ++ struct kbase_context *kctx = group->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + -+ for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_LOW; -+ prio++) -+ if (!list_empty(&kctx->csf.sched.runnable_groups[prio])) -+ break; ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ if (prio != KBASE_QUEUE_GROUP_PRIORITY_REALTIME && kctx->csf.sched.num_idle_wait_grps) { -+ struct kbase_queue_group *group; ++ if (queue_group_scheduled_locked(group)) { ++ u32 i; + -+ list_for_each_entry(group, &kctx->csf.sched.idle_wait_groups, link) { -+ if (group->priority < prio) -+ prio = group->priority; ++ if (update_non_idle_offslot_grps_cnt_from_run_state && ++ (group->run_state == KBASE_CSF_GROUP_SUSPENDED || ++ group->run_state == KBASE_CSF_GROUP_RUNNABLE)) { ++ int new_val = atomic_dec_return( ++ &scheduler->non_idle_offslot_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, ++ new_val); + } -+ } + -+ return prio; -+} ++ for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { ++ if (group->bound_queues[i]) ++ group->bound_queues[i]->enabled = false; ++ } + -+static void detach_ctx_from_heap_reclaim_mgr(struct kbase_context *kctx) -+{ -+ struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; -+ struct kbase_csf_ctx_heap_reclaim_info *info = &kctx->csf.sched.heap_info; ++ if (group->prepared_seq_num != ++ KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) { ++ if (!update_non_idle_offslot_grps_cnt_from_run_state) ++ update_offslot_non_idle_cnt(group); ++ remove_scheduled_group(kbdev, group); ++ } + -+ lockdep_assert_held(&scheduler->lock); ++ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) ++ remove_group_from_idle_wait(group); ++ else { ++ remove_group_from_runnable(scheduler, group, ++ KBASE_CSF_GROUP_INACTIVE); ++ } + -+ if (!list_empty(&info->mgr_link)) { -+ u32 remaining = (info->nr_est_unused_pages > info->nr_freed_pages) ? -+ info->nr_est_unused_pages - info->nr_freed_pages : -+ 0; ++ WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE); + -+ list_del_init(&info->mgr_link); -+ if (remaining) -+ WARN_ON(atomic_sub_return(remaining, &scheduler->reclaim_mgr.unused_pages) < -+ 0); ++ if (fault) { ++ group->run_state = KBASE_CSF_GROUP_FAULT_EVICTED; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_FAULT_EVICTED, group, ++ scheduler->total_runnable_grps); ++ } + -+ dev_dbg(kctx->kbdev->dev, -+ "Reclaim_mgr_detach: ctx_%d_%d, est_pages=0%u, freed_pages=%u", kctx->tgid, -+ kctx->id, info->nr_est_unused_pages, info->nr_freed_pages); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT, group, ++ (((u64)scheduler->total_runnable_grps) << 32) | ++ ((u32)group->run_state)); ++ dev_dbg(kbdev->dev, "group %d exited scheduler, num_runnable_grps %d\n", ++ group->handle, scheduler->total_runnable_grps); ++ /* Notify a group has been evicted */ ++ wake_up_all(&kbdev->csf.event_wait); + } -+} -+ -+static void attach_ctx_to_heap_reclaim_mgr(struct kbase_context *kctx) -+{ -+ struct kbase_csf_ctx_heap_reclaim_info *const info = &kctx->csf.sched.heap_info; -+ struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; -+ u8 const prio = get_kctx_highest_csg_priority(kctx); + -+ lockdep_assert_held(&scheduler->lock); ++ kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(group); + -+ if (WARN_ON(!list_empty(&info->mgr_link))) -+ list_del_init(&info->mgr_link); ++ /* Clear all the bound shared regions and unmap any in-place MMU maps */ ++ kbase_csf_mcu_shared_clear_evicted_group_csg_reg(kbdev, group); ++} + -+ /* Count the pages that could be freed */ -+ info->nr_est_unused_pages = kbase_csf_tiler_heap_count_kctx_unused_pages(kctx); -+ /* Initialize the scan operation tracking pages */ -+ info->nr_freed_pages = 0; ++static int term_group_sync(struct kbase_queue_group *group) ++{ ++ struct kbase_device *kbdev = group->kctx->kbdev; ++ long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ int err = 0; + -+ list_add_tail(&info->mgr_link, &scheduler->reclaim_mgr.ctx_lists[prio]); -+ /* Accumulate the estimated pages to the manager total field */ -+ atomic_add(info->nr_est_unused_pages, &scheduler->reclaim_mgr.unused_pages); ++ term_csg_slot(group); + -+ dev_dbg(kctx->kbdev->dev, "Reclaim_mgr_attach: ctx_%d_%d, est_count_pages=%u", kctx->tgid, -+ kctx->id, info->nr_est_unused_pages); -+} ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ group->cs_unrecoverable || csg_slot_stopped_locked(kbdev, group->csg_nr), ++ remaining); + -+void kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(struct kbase_queue_group *group) -+{ -+ struct kbase_context *kctx = group->kctx; -+ struct kbase_csf_ctx_heap_reclaim_info *info = &kctx->csf.sched.heap_info; ++ if (unlikely(!remaining)) { ++ enum dumpfault_error_type error_type = DF_CSG_TERMINATE_TIMEOUT; + -+ lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); ++ dev_warn(kbdev->dev, "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d", ++ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, ++ group->handle, group->kctx->tgid, ++ group->kctx->id, group->csg_nr); ++ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) ++ error_type = DF_PING_REQUEST_TIMEOUT; ++ kbase_debug_csf_fault_notify(kbdev, group->kctx, error_type); ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(kbdev); + -+ info->on_slot_grps++; -+ /* If the kctx has an on-slot change from 0 => 1, detach it from reclaim_mgr */ -+ if (info->on_slot_grps == 1) { -+ dev_dbg(kctx->kbdev->dev, "CSG_%d_%d_%d on-slot, remove kctx from reclaim manager", -+ group->kctx->tgid, group->kctx->id, group->handle); + -+ detach_ctx_from_heap_reclaim_mgr(kctx); ++ err = -ETIMEDOUT; + } ++ ++ return err; +} + -+void kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(struct kbase_queue_group *group) ++void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group) +{ -+ struct kbase_context *kctx = group->kctx; -+ struct kbase_csf_ctx_heap_reclaim_info *const info = &kctx->csf.sched.heap_info; -+ struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; -+ const u32 num_groups = kctx->kbdev->csf.global_iface.group_num; -+ u32 on_slot_grps = 0; -+ u32 i; -+ -+ lockdep_assert_held(&scheduler->lock); -+ -+ /* Group eviction from the scheduler is a bit more complex, but fairly less -+ * frequent in operations. Taking the opportunity to actually count the -+ * on-slot CSGs from the given kctx, for robustness and clearer code logic. -+ */ -+ for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) { -+ struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; -+ struct kbase_queue_group *grp = csg_slot->resident_group; -+ -+ if (unlikely(!grp)) -+ continue; ++ struct kbase_device *kbdev = group->kctx->kbdev; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ bool wait_for_termination = true; ++ bool on_slot; + -+ if (grp->kctx == kctx) -+ on_slot_grps++; -+ } ++ kbase_reset_gpu_assert_failed_or_prevented(kbdev); ++ lockdep_assert_held(&group->kctx->csf.lock); ++ mutex_lock(&scheduler->lock); + -+ info->on_slot_grps = on_slot_grps; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_DESCHEDULE, group, group->run_state); ++ wait_for_dump_complete_on_group_deschedule(group); ++ if (!queue_group_scheduled_locked(group)) ++ goto unlock; + -+ /* If the kctx has no other CSGs on-slot, handle the heap reclaim related actions */ -+ if (!info->on_slot_grps) { -+ if (kctx->csf.sched.num_runnable_grps || kctx->csf.sched.num_idle_wait_grps) { -+ /* The kctx has other operational CSGs, attach it if not yet done */ -+ if (list_empty(&info->mgr_link)) { -+ dev_dbg(kctx->kbdev->dev, -+ "CSG_%d_%d_%d evict, add kctx to reclaim manager", -+ group->kctx->tgid, group->kctx->id, group->handle); ++ on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group); + -+ attach_ctx_to_heap_reclaim_mgr(kctx); -+ } -+ } else { -+ /* The kctx is a zombie after the group eviction, drop it out */ -+ dev_dbg(kctx->kbdev->dev, -+ "CSG_%d_%d_%d evict leading to zombie kctx, dettach from reclaim manager", -+ group->kctx->tgid, group->kctx->id, group->handle); ++#ifdef KBASE_PM_RUNTIME ++ /* If the queue group is on slot and Scheduler is in SLEEPING state, ++ * then we need to wake up the Scheduler to exit the sleep state rather ++ * than waiting for the runtime suspend or power down of GPU. ++ * The group termination is usually triggered in the context of Application ++ * thread and it has been seen that certain Apps can destroy groups at ++ * random points and not necessarily when the App is exiting. ++ */ ++ if (on_slot && (scheduler->state == SCHED_SLEEPING)) { ++ scheduler_wakeup(kbdev, true); + -+ detach_ctx_from_heap_reclaim_mgr(kctx); ++ /* Wait for MCU firmware to start running */ ++ if (kbase_csf_scheduler_wait_mcu_active(kbdev)) { ++ dev_warn( ++ kbdev->dev, ++ "[%llu] Wait for MCU active failed when terminating group %d of context %d_%d on slot %d", ++ kbase_backend_get_cycle_cnt(kbdev), ++ group->handle, group->kctx->tgid, ++ group->kctx->id, group->csg_nr); ++ /* No point in waiting for CSG termination if MCU didn't ++ * become active. ++ */ ++ wait_for_termination = false; + } + } -+} ++#endif ++ if (!on_slot) { ++ sched_evict_group(group, false, true); ++ } else { ++ bool as_faulty; + -+void kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(struct kbase_queue_group *group) -+{ -+ struct kbase_context *kctx = group->kctx; -+ struct kbase_csf_ctx_heap_reclaim_info *info = &kctx->csf.sched.heap_info; ++ if (likely(wait_for_termination)) ++ term_group_sync(group); ++ else ++ term_csg_slot(group); + -+ lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); ++ /* Treat the csg been terminated */ ++ as_faulty = cleanup_csg_slot(group); ++ /* remove from the scheduler list */ ++ sched_evict_group(group, as_faulty, false); ++ } + -+ if (!WARN_ON(info->on_slot_grps == 0)) -+ info->on_slot_grps--; -+ /* If the kctx has no CSGs on-slot, attach it to scheduler's reclaim manager */ -+ if (info->on_slot_grps == 0) { -+ dev_dbg(kctx->kbdev->dev, "CSG_%d_%d_%d off-slot, add kctx to reclaim manager", -+ group->kctx->tgid, group->kctx->id, group->handle); ++ WARN_ON(queue_group_scheduled_locked(group)); + -+ attach_ctx_to_heap_reclaim_mgr(kctx); -+ } ++unlock: ++ mutex_unlock(&scheduler->lock); +} + -+static unsigned long reclaim_unused_heap_pages(struct kbase_device *kbdev) ++/** ++ * scheduler_group_schedule() - Schedule a GPU command queue group on firmware ++ * ++ * @group: Pointer to the queue group to be scheduled. ++ * ++ * This function would enable the scheduling of GPU command queue group on ++ * firmware. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++static int scheduler_group_schedule(struct kbase_queue_group *group) +{ -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; -+ struct kbase_csf_sched_heap_reclaim_mgr *const mgr = &scheduler->reclaim_mgr; -+ unsigned long total_freed_pages = 0; -+ int prio; -+ -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ struct kbase_context *kctx = group->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + -+ for (prio = KBASE_QUEUE_GROUP_PRIORITY_LOW; -+ total_freed_pages < HEAP_RECLAIM_SCAN_BATCH_SIZE && -+ prio >= KBASE_QUEUE_GROUP_PRIORITY_REALTIME; -+ prio--) { -+ struct kbase_csf_ctx_heap_reclaim_info *info, *tmp; -+ u32 cnt_ctxs = 0; ++ lockdep_assert_held(&kctx->csf.lock); ++ lockdep_assert_held(&scheduler->lock); + -+ list_for_each_entry_safe(info, tmp, &scheduler->reclaim_mgr.ctx_lists[prio], -+ mgr_link) { -+ struct kbase_context *kctx = -+ container_of(info, struct kbase_context, csf.sched.heap_info); -+ u32 freed_pages = kbase_csf_tiler_heap_scan_kctx_unused_pages( -+ kctx, info->nr_est_unused_pages); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SCHEDULE, group, group->run_state); ++ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) ++ update_idle_suspended_group_state(group); ++ else if (queue_group_idle_locked(group)) { ++ WARN_ON(kctx->csf.sched.num_runnable_grps == 0); ++ WARN_ON(kbdev->csf.scheduler.total_runnable_grps == 0); + -+ if (freed_pages) { -+ /* Remove the freed pages from the manager retained estimate. The -+ * accumulated removals from the kctx should not exceed the kctx -+ * initially notified contribution amount: -+ * info->nr_est_unused_pages. -+ */ -+ u32 rm_cnt = MIN(info->nr_est_unused_pages - info->nr_freed_pages, -+ freed_pages); ++ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) ++ update_idle_suspended_group_state(group); ++ else { ++ struct kbase_queue_group *protm_grp; ++ unsigned long flags; + -+ WARN_ON(atomic_sub_return(rm_cnt, &mgr->unused_pages) < 0); ++ WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked( ++ group)); + -+ /* tracking the freed pages, before a potential detach call */ -+ info->nr_freed_pages += freed_pages; -+ total_freed_pages += freed_pages; ++ group->run_state = KBASE_CSF_GROUP_RUNNABLE; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, ++ group->run_state); + -+ schedule_work(&kctx->jit_work); ++ /* A normal mode CSG could be idle onslot during ++ * protected mode. In this case clear the ++ * appropriate bit in csg_slots_idle_mask. ++ */ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ protm_grp = scheduler->active_protm_grp; ++ if (protm_grp && protm_grp != group) { ++ clear_bit((unsigned int)group->csg_nr, ++ scheduler->csg_slots_idle_mask); ++ /* Request the update to confirm the condition inferred. */ ++ group->reevaluate_idle_status = true; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, ++ scheduler->csg_slots_idle_mask[0]); + } ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, ++ flags); + -+ /* If the kctx can't offer anymore, drop it from the reclaim manger, -+ * otherwise leave it remaining in. If the kctx changes its state (i.e. -+ * some CSGs becoming on-slot), the scheduler will pull it out. ++ /* If GPU is in protected mode then any doorbells rang ++ * would have no effect. Check if GPU is in protected ++ * mode and if this group has higher priority than the ++ * active protected mode group. If so prompt the FW ++ * to exit protected mode. + */ -+ if (info->nr_freed_pages >= info->nr_est_unused_pages || freed_pages == 0) -+ detach_ctx_from_heap_reclaim_mgr(kctx); -+ -+ cnt_ctxs++; -+ -+ /* Enough has been freed, break to avoid holding the lock too long */ -+ if (total_freed_pages >= HEAP_RECLAIM_SCAN_BATCH_SIZE) -+ break; ++ if (protm_grp && ++ group->scan_seq_num < protm_grp->scan_seq_num) { ++ /* Prompt the FW to exit protected mode */ ++ scheduler_force_protm_exit(kbdev); ++ } + } ++ } else if (!queue_group_scheduled_locked(group)) { ++ int new_val; + -+ dev_dbg(kbdev->dev, "Reclaim free heap pages: %lu (cnt_ctxs: %u, prio: %d)", -+ total_freed_pages, cnt_ctxs, prio); ++ insert_group_to_runnable(&kbdev->csf.scheduler, group, ++ KBASE_CSF_GROUP_RUNNABLE); ++ /* A new group into the scheduler */ ++ new_val = atomic_inc_return( ++ &kbdev->csf.scheduler.non_idle_offslot_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val); + } + -+ dev_dbg(kbdev->dev, "Reclaim free total heap pages: %lu (across all CSG priority)", -+ total_freed_pages); ++ /* Since a group has become active now, check if GPU needs to be ++ * powered up. Also rekick the Scheduler. ++ */ ++ scheduler_wakeup(kbdev, true); + -+ return total_freed_pages; ++ return 0; +} + -+static unsigned long kbase_csf_tiler_heap_reclaim_count_free_pages(struct kbase_device *kbdev, -+ struct shrink_control *sc) ++/** ++ * set_max_csg_slots() - Set the number of available CSG slots ++ * ++ * @kbdev: Pointer of the GPU device. ++ * ++ * This function would set/limit the number of CSG slots that ++ * can be used in the given tick/tock. It would be less than the total CSG ++ * slots supported by firmware if the number of GPU address space slots ++ * required to utilize all the CSG slots is more than the available ++ * address space slots. ++ */ ++static inline void set_max_csg_slots(struct kbase_device *kbdev) +{ -+ struct kbase_csf_sched_heap_reclaim_mgr *mgr = &kbdev->csf.scheduler.reclaim_mgr; -+ unsigned long page_cnt = atomic_read(&mgr->unused_pages); ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ unsigned int total_csg_slots = kbdev->csf.global_iface.group_num; ++ unsigned int max_address_space_slots = ++ kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS; + -+ dev_dbg(kbdev->dev, "Reclaim count unused pages (estimate): %lu", page_cnt); ++ WARN_ON(scheduler->num_active_address_spaces > total_csg_slots); + -+ return page_cnt; ++ if (likely(scheduler->num_active_address_spaces <= ++ max_address_space_slots)) ++ scheduler->num_csg_slots_for_tick = total_csg_slots; +} + -+static unsigned long kbase_csf_tiler_heap_reclaim_scan_free_pages(struct kbase_device *kbdev, -+ struct shrink_control *sc) ++/** ++ * count_active_address_space() - Count the number of GPU address space slots ++ * ++ * @kbdev: Pointer of the GPU device. ++ * @kctx: Pointer of the Kbase context. ++ * ++ * This function would update the counter that is tracking the number of GPU ++ * address space slots that would be required to program the CS ++ * group slots from the groups at the head of groups_to_schedule list. ++ */ ++static inline void count_active_address_space(struct kbase_device *kbdev, ++ struct kbase_context *kctx) +{ -+ struct kbase_csf_sched_heap_reclaim_mgr *mgr = &kbdev->csf.scheduler.reclaim_mgr; -+ unsigned long freed = 0; -+ unsigned long avail = 0; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ unsigned int total_csg_slots = kbdev->csf.global_iface.group_num; ++ unsigned int max_address_space_slots = ++ kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS; + -+ /* If Scheduler is busy in action, return 0 */ -+ if (!mutex_trylock(&kbdev->csf.scheduler.lock)) { -+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ if (scheduler->ngrp_to_schedule <= total_csg_slots) { ++ if (kctx->csf.sched.ngrp_to_schedule == 1) ++ scheduler->num_active_address_spaces++; + -+ /* Wait for roughly 2-ms */ -+ wait_event_timeout(kbdev->csf.event_wait, (scheduler->state != SCHED_BUSY), -+ msecs_to_jiffies(2)); -+ if (!mutex_trylock(&kbdev->csf.scheduler.lock)) { -+ dev_dbg(kbdev->dev, "Tiler heap reclaim scan see device busy (freed: 0)"); -+ return 0; -+ } ++ if (scheduler->num_active_address_spaces <= ++ max_address_space_slots) ++ scheduler->num_csg_slots_for_tick++; + } ++} + -+ avail = atomic_read(&mgr->unused_pages); -+ if (avail) -+ freed = reclaim_unused_heap_pages(kbdev); ++/* Two schemes are used in assigning the priority to CSG slots for a given ++ * CSG from the 'groups_to_schedule' list. ++ * This is needed as an idle on-slot group is deprioritized by moving it to ++ * the tail of 'groups_to_schedule' list. As a result it can either get ++ * evicted from the CSG slot in current tick/tock dealing, or its position ++ * can be after the lower priority non-idle groups in the 'groups_to_schedule' ++ * list. The latter case can result in the on-slot subset containing both ++ * non-idle and idle CSGs, and is handled through the 2nd scheme described ++ * below. ++ * ++ * First scheme :- If all the slots are going to be occupied by the non-idle or ++ * idle groups, then a simple assignment of the priority is done as per the ++ * position of a group in the 'groups_to_schedule' list. So maximum priority ++ * gets assigned to the slot of a group which is at the head of the list. ++ * Here the 'groups_to_schedule' list would effectively be ordered as per the ++ * static priority of groups. ++ * ++ * Second scheme :- If the slots are going to be occupied by a mix of idle and ++ * non-idle groups then the priority assignment needs to ensure that the ++ * priority of a slot belonging to a higher priority idle group will always be ++ * greater than the priority of a slot belonging to a lower priority non-idle ++ * group, reflecting the original position of a group in the scan order (i.e ++ * static priority) 'scan_seq_num', which is set during the prepare phase of a ++ * tick/tock before the group is moved to 'idle_groups_to_schedule' list if it ++ * is idle. ++ * The priority range [MAX_CSG_SLOT_PRIORITY, 0] is partitioned with the first ++ * 'slots_for_tick' groups in the original scan order are assigned a priority in ++ * the subrange [MAX_CSG_SLOT_PRIORITY, MAX_CSG_SLOT_PRIORITY - slots_for_tick), ++ * whereas rest of the groups are assigned the priority in the subrange ++ * [MAX_CSG_SLOT_PRIORITY - slots_for_tick, 0]. This way even if an idle higher ++ * priority group ends up after the non-idle lower priority groups in the ++ * 'groups_to_schedule' list, it will get a higher slot priority. And this will ++ * enable the FW to quickly start the execution of higher priority group when it ++ * gets de-idled. ++ */ ++static u8 get_slot_priority(struct kbase_queue_group *group) ++{ ++ struct kbase_csf_scheduler *scheduler = ++ &group->kctx->kbdev->csf.scheduler; ++ u8 slot_prio; ++ u32 slots_for_tick = scheduler->num_csg_slots_for_tick; ++ u32 used_slots = slots_for_tick - scheduler->remaining_tick_slots; ++ /* Check if all the slots are going to be occupied by the non-idle or ++ * idle groups. ++ */ ++ if (scheduler->non_idle_scanout_grps >= slots_for_tick || ++ !scheduler->non_idle_scanout_grps) { ++ slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - used_slots); ++ } else { ++ /* There will be a mix of idle and non-idle groups. */ ++ if (group->scan_seq_num < slots_for_tick) ++ slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - ++ group->scan_seq_num); ++ else if (MAX_CSG_SLOT_PRIORITY > (slots_for_tick + used_slots)) ++ slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - (slots_for_tick + used_slots)); ++ else ++ slot_prio = 0; ++ } ++ return slot_prio; ++} + -+ mutex_unlock(&kbdev->csf.scheduler.lock); ++/** ++ * update_resident_groups_priority() - Update the priority of resident groups ++ * ++ * @kbdev: The GPU device. ++ * ++ * This function will update the priority of all resident queue groups ++ * that are at the head of groups_to_schedule list, preceding the first ++ * non-resident group. ++ * ++ * This function will also adjust kbase_csf_scheduler.remaining_tick_slots on ++ * the priority update. ++ */ ++static void update_resident_groups_priority(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ u32 num_groups = scheduler->num_csg_slots_for_tick; + -+#if (KERNEL_VERSION(4, 14, 0) <= LINUX_VERSION_CODE) -+ if (freed > sc->nr_to_scan) -+ sc->nr_scanned = freed; -+#endif /* (KERNEL_VERSION(4, 14, 0) <= LINUX_VERSION_CODE) */ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ while (!list_empty(&scheduler->groups_to_schedule)) { ++ struct kbase_queue_group *group = ++ list_first_entry(&scheduler->groups_to_schedule, ++ struct kbase_queue_group, ++ link_to_schedule); ++ bool resident = ++ kbasep_csf_scheduler_group_is_on_slot_locked(group); + -+ dev_info(kbdev->dev, "Tiler heap reclaim scan freed pages: %lu (unused: %lu)", freed, -+ avail); ++ if ((group->prepared_seq_num >= num_groups) || !resident) ++ break; + -+ /* On estimate suggesting available, yet actual free failed, return STOP */ -+ if (avail && !freed) -+ return SHRINK_STOP; -+ else -+ return freed; ++ update_csg_slot_priority(group, ++ get_slot_priority(group)); ++ ++ /* Drop the head group from the list */ ++ remove_scheduled_group(kbdev, group); ++ scheduler->remaining_tick_slots--; ++ } +} + -+static unsigned long kbase_csf_tiler_heap_reclaim_count_objects(struct shrinker *s, -+ struct shrink_control *sc) ++/** ++ * program_group_on_vacant_csg_slot() - Program a non-resident group on the ++ * given vacant CSG slot. ++ * @kbdev: Pointer to the GPU device. ++ * @slot: Vacant CSG slot number. ++ * ++ * This function will program a non-resident group at the head of ++ * kbase_csf_scheduler.groups_to_schedule list on the given vacant ++ * CSG slot, provided the initial position of the non-resident ++ * group in the list is less than the number of CSG slots and there is ++ * an available GPU address space slot. ++ * kbase_csf_scheduler.remaining_tick_slots would also be adjusted after ++ * programming the slot. ++ */ ++static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev, ++ s8 slot) +{ -+ struct kbase_device *kbdev = -+ container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim); ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct kbase_queue_group *const group = ++ list_empty(&scheduler->groups_to_schedule) ? NULL : ++ list_first_entry(&scheduler->groups_to_schedule, ++ struct kbase_queue_group, ++ link_to_schedule); ++ u32 num_groups = scheduler->num_csg_slots_for_tick; + -+ return kbase_csf_tiler_heap_reclaim_count_free_pages(kbdev, sc); -+} ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ if (group && (group->prepared_seq_num < num_groups)) { ++ bool ret = kbasep_csf_scheduler_group_is_on_slot_locked(group); + -+static unsigned long kbase_csf_tiler_heap_reclaim_scan_objects(struct shrinker *s, -+ struct shrink_control *sc) -+{ -+ struct kbase_device *kbdev = -+ container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim); ++ if (!WARN_ON(ret)) { ++ if (kctx_as_enabled(group->kctx) && !group->faulted) { ++ program_csg_slot(group, slot, ++ get_slot_priority(group)); + -+ return kbase_csf_tiler_heap_reclaim_scan_free_pages(kbdev, sc); ++ if (likely(csg_slot_in_use(kbdev, slot))) { ++ /* Drop the head group from the list */ ++ remove_scheduled_group(kbdev, group); ++ scheduler->remaining_tick_slots--; ++ } ++ } else { ++ update_offslot_non_idle_cnt(group); ++ remove_scheduled_group(kbdev, group); ++ } ++ } ++ } +} + -+void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx) ++/** ++ * program_vacant_csg_slot() - Program the vacant CSG slot with a non-resident ++ * group and update the priority of resident groups. ++ * ++ * @kbdev: Pointer to the GPU device. ++ * @slot: Vacant CSG slot number. ++ * ++ * This function will first update the priority of all resident queue groups ++ * that are at the head of groups_to_schedule list, preceding the first ++ * non-resident group, it will then try to program the given CS ++ * group slot with the non-resident group. Finally update the priority of all ++ * resident queue groups following the non-resident group. ++ * ++ * kbase_csf_scheduler.remaining_tick_slots would also be adjusted. ++ */ ++static void program_vacant_csg_slot(struct kbase_device *kbdev, s8 slot) +{ -+ /* Per-kctx heap_info object initialization */ -+ memset(&kctx->csf.sched.heap_info, 0, sizeof(struct kbase_csf_ctx_heap_reclaim_info)); -+ INIT_LIST_HEAD(&kctx->csf.sched.heap_info.mgr_link); -+} ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ struct kbase_csf_csg_slot *const csg_slot = ++ scheduler->csg_slots; + -+void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev) -+{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ struct shrinker *reclaim = &scheduler->reclaim_mgr.heap_reclaim; -+ u8 prio; ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ WARN_ON(atomic_read(&csg_slot[slot].state) != CSG_SLOT_READY); + -+ for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT; -+ prio++) -+ INIT_LIST_HEAD(&scheduler->reclaim_mgr.ctx_lists[prio]); ++ /* First update priority for already resident groups (if any) ++ * before the non-resident group ++ */ ++ update_resident_groups_priority(kbdev); + -+ atomic_set(&scheduler->reclaim_mgr.unused_pages, 0); ++ /* Now consume the vacant slot for the non-resident group */ ++ program_group_on_vacant_csg_slot(kbdev, slot); + -+ reclaim->count_objects = kbase_csf_tiler_heap_reclaim_count_objects; -+ reclaim->scan_objects = kbase_csf_tiler_heap_reclaim_scan_objects; -+ reclaim->seeks = HEAP_SHRINKER_SEEKS; -+ reclaim->batch = HEAP_SHRINKER_BATCH; ++ /* Now update priority for already resident groups (if any) ++ * following the non-resident group ++ */ ++ update_resident_groups_priority(kbdev); +} + -+void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev) ++static bool slots_state_changed(struct kbase_device *kbdev, ++ unsigned long *slots_mask, ++ bool (*state_check_func)(struct kbase_device *, s8)) +{ -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ u8 prio; -+ -+ for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT; -+ prio++) -+ WARN_ON(!list_empty(&scheduler->reclaim_mgr.ctx_lists[prio])); ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ DECLARE_BITMAP(changed_slots, MAX_SUPPORTED_CSGS) = {0}; ++ bool changed = false; ++ u32 i; + -+ WARN_ON(atomic_read(&scheduler->reclaim_mgr.unused_pages)); -+} -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h -new file mode 100644 -index 000000000..b6e580e48 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h -@@ -0,0 +1,80 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ for_each_set_bit(i, slots_mask, num_groups) { ++ if (state_check_func(kbdev, (s8)i)) { ++ set_bit(i, changed_slots); ++ changed = true; ++ } ++ } + -+#ifndef _KBASE_CSF_TILER_HEAP_RECLAIM_H_ -+#define _KBASE_CSF_TILER_HEAP_RECLAIM_H_ ++ if (changed) ++ bitmap_copy(slots_mask, changed_slots, MAX_SUPPORTED_CSGS); + -+#include ++ return changed; ++} + +/** -+ * kbase_csf_tiler_heap_reclaim_sched_notify_grp_active - Notifier function for the scheduler -+ * to use when a group is put on-slot. ++ * program_suspending_csg_slots() - Program the CSG slots vacated on suspension ++ * of queue groups running on them. + * -+ * @group: Pointer to the group object that has been placed on-slot for running. ++ * @kbdev: Pointer to the GPU device. + * ++ * This function will first wait for the ongoing suspension to complete on a ++ * CSG slot and will then program the vacant slot with the ++ * non-resident queue group inside the groups_to_schedule list. ++ * The programming of the non-resident queue group on the vacant slot could ++ * fail due to unavailability of free GPU address space slot and so the ++ * programming is re-attempted after the ongoing suspension has completed ++ * for all the CSG slots. ++ * The priority of resident groups before and after the non-resident group ++ * in the groups_to_schedule list would also be updated. ++ * This would be repeated for all the slots undergoing suspension. ++ * GPU reset would be initiated if the wait for suspend times out. + */ -+void kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(struct kbase_queue_group *group); ++static void program_suspending_csg_slots(struct kbase_device *kbdev) ++{ ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS); ++ DECLARE_BITMAP(evicted_mask, MAX_SUPPORTED_CSGS) = {0}; ++ bool suspend_wait_failed = false; ++ long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + -+/** -+ * kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict - Notifier function for the scheduler -+ * to use when a group is evicted out of the schedulder's scope, i.e no run of -+ * the group is possible afterwards. -+ * -+ * @group: Pointer to the group object that has been evicted. -+ * -+ */ -+void kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(struct kbase_queue_group *group); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+/** -+ * kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend - Notifier function for the scheduler -+ * to use when a group is suspended from running, but could resume in future. -+ * -+ * @group: Pointer to the group object that is in suspended state. -+ * -+ */ -+void kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(struct kbase_queue_group *group); ++ /* In the current implementation, csgs_events_enable_mask would be used ++ * only to indicate suspending CSGs. ++ */ ++ bitmap_complement(slot_mask, scheduler->csgs_events_enable_mask, ++ MAX_SUPPORTED_CSGS); + -+/** -+ * kbase_csf_tiler_heap_reclaim_ctx_init - Initializer on per context data fields for use -+ * with the tiler heap reclaim manager. -+ * -+ * @kctx: Pointer to the kbase_context. -+ * -+ */ -+void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx); ++ while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) { ++ DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); + -+/** -+ * kbase_csf_tiler_heap_reclaim_mgr_init - Initializer for the tiler heap reclaim manger. -+ * -+ * @kbdev: Pointer to the device. -+ * -+ */ -+void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev); ++ bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS); + -+/** -+ * kbase_csf_tiler_heap_reclaim_mgr_term - Termination call for the tiler heap reclaim manger. -+ * -+ * @kbdev: Pointer to the device. -+ * -+ */ -+void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev); ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ slots_state_changed(kbdev, changed, ++ csg_slot_stopped_raw), ++ remaining); + -+#endif -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.c -new file mode 100644 -index 000000000..ea6c11624 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.c -@@ -0,0 +1,177 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ if (likely(remaining)) { ++ u32 i; + -+#include -+#include -+#include -+#include -+#include ++ for_each_set_bit(i, changed, num_groups) { ++ struct kbase_queue_group *group = ++ scheduler->csg_slots[i].resident_group; + -+#include "mali_kbase.h" -+#include "mali_kbase_config_defaults.h" -+#include "mali_kbase_csf_firmware.h" -+#include "mali_kbase_csf_timeout.h" -+#include "mali_kbase_reset_gpu.h" -+#include "backend/gpu/mali_kbase_pm_internal.h" ++ if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) ++ continue; + -+/** -+ * set_timeout - set a new global progress timeout. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @timeout: the maximum number of GPU cycles without forward progress to allow -+ * to elapse before terminating a GPU command queue group. -+ * -+ * Return: 0 on success, or negative on failure -+ * (e.g. -ERANGE if the requested timeout is too large). -+ */ -+static int set_timeout(struct kbase_device *const kbdev, u64 const timeout) -+{ -+ if (timeout > GLB_PROGRESS_TIMER_TIMEOUT_MAX) { -+ dev_err(kbdev->dev, "Timeout %llu is too large.\n", timeout); -+ return -ERANGE; -+ } ++ /* The on slot csg is now stopped */ ++ clear_bit(i, slot_mask); + -+ dev_dbg(kbdev->dev, "New progress timeout: %llu cycles\n", timeout); ++ KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( ++ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i); + -+ atomic64_set(&kbdev->csf.progress_timeout, timeout); ++ if (likely(group)) { ++ bool as_fault; ++ /* Only do save/cleanup if the ++ * group is not terminated during ++ * the sleep. ++ */ ++ save_csg_slot(group); ++ as_fault = cleanup_csg_slot(group); ++ /* If AS fault detected, evict it */ ++ if (as_fault) { ++ sched_evict_group(group, true, true); ++ set_bit(i, evicted_mask); ++ } ++ } + -+ return 0; -+} ++ program_vacant_csg_slot(kbdev, (s8)i); ++ } ++ } else { ++ u32 i; + -+/** -+ * progress_timeout_store - Store the progress_timeout device attribute. -+ * @dev: The device that has the attribute. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The value written to the sysfs file. -+ * @count: The number of bytes written to the sysfs file. -+ * -+ * This function is called when the progress_timeout sysfs file is written to. -+ * It checks the data written, and if valid updates the progress timeout value. -+ * The function also checks gpu reset status, if the gpu is in reset process, -+ * the function will return an error code (-EBUSY), and no change for timeout -+ * value. -+ * -+ * Return: @count if the function succeeded. An error code on failure. -+ */ -+static ssize_t progress_timeout_store(struct device * const dev, -+ struct device_attribute * const attr, const char * const buf, -+ size_t const count) -+{ -+ struct kbase_device *const kbdev = dev_get_drvdata(dev); -+ int err; -+ u64 timeout; ++ /* Groups that have failed to suspend in time shall ++ * raise a fatal error as they could no longer be ++ * safely resumed. ++ */ ++ for_each_set_bit(i, slot_mask, num_groups) { ++ struct kbase_queue_group *const group = ++ scheduler->csg_slots[i].resident_group; ++ enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT; + -+ if (!kbdev) -+ return -ENODEV; ++ struct base_gpu_queue_group_error const ++ err_payload = { .error_type = ++ BASE_GPU_QUEUE_GROUP_ERROR_FATAL, ++ .payload = { ++ .fatal_group = { ++ .status = ++ GPU_EXCEPTION_TYPE_SW_FAULT_2, ++ } } }; + -+ err = kbase_reset_gpu_try_prevent(kbdev); -+ if (err) { -+ dev_warn(kbdev->dev, -+ "Couldn't process progress_timeout write operation for GPU reset.\n"); -+ return -EBUSY; -+ } ++ if (unlikely(group == NULL)) ++ continue; + -+ err = kstrtou64(buf, 0, &timeout); -+ if (err) -+ dev_err(kbdev->dev, -+ "Couldn't process progress_timeout write operation.\n" -+ "Use format \n"); -+ else -+ err = set_timeout(kbdev, timeout); ++ /* TODO GPUCORE-25328: The CSG can't be ++ * terminated, the GPU will be reset as a ++ * work-around. ++ */ ++ dev_warn( ++ kbdev->dev, ++ "[%llu] Group %d of context %d_%d on slot %u failed to suspend (timeout %d ms)", ++ kbase_backend_get_cycle_cnt(kbdev), ++ group->handle, group->kctx->tgid, ++ group->kctx->id, i, ++ kbdev->csf.fw_timeout_ms); ++ if (kbase_csf_firmware_ping_wait(kbdev, ++ FW_PING_AFTER_ERROR_TIMEOUT_MS)) ++ error_type = DF_PING_REQUEST_TIMEOUT; ++ schedule_actions_trigger_df(kbdev, group->kctx, error_type); + -+ if (!err) { -+ kbase_csf_scheduler_pm_active(kbdev); ++ kbase_csf_add_group_fatal_error(group, &err_payload); ++ kbase_event_wakeup(group->kctx); + -+ err = kbase_csf_scheduler_wait_mcu_active(kbdev); -+ if (!err) -+ err = kbase_csf_firmware_set_timeout(kbdev, timeout); ++ /* The group has failed suspension, stop ++ * further examination. ++ */ ++ clear_bit(i, slot_mask); ++ set_bit(i, scheduler->csgs_events_enable_mask); ++ } + -+ kbase_csf_scheduler_pm_idle(kbdev); ++ suspend_wait_failed = true; ++ } + } + -+ kbase_reset_gpu_allow(kbdev); -+ if (err) -+ return err; ++ if (!bitmap_empty(evicted_mask, MAX_SUPPORTED_CSGS)) ++ dev_info(kbdev->dev, "Scheduler evicting slots: 0x%*pb\n", ++ num_groups, evicted_mask); + -+ return count; ++ if (likely(!suspend_wait_failed)) { ++ u32 i; ++ ++ while (scheduler->ngrp_to_schedule && ++ scheduler->remaining_tick_slots) { ++ i = find_first_zero_bit(scheduler->csg_inuse_bitmap, ++ num_groups); ++ if (WARN_ON(i == num_groups)) ++ break; ++ program_vacant_csg_slot(kbdev, (s8)i); ++ if (!csg_slot_in_use(kbdev, (int)i)) { ++ dev_warn(kbdev->dev, "Couldn't use CSG slot %d despite being vacant", i); ++ break; ++ } ++ } ++ } else { ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(kbdev); ++ } +} + -+/** -+ * progress_timeout_show - Show the progress_timeout device attribute. -+ * @dev: The device that has the attribute. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the global timeout. -+ * -+ * This function is called to get the progress timeout value. -+ * -+ * Return: The number of bytes output to @buf. -+ */ -+static ssize_t progress_timeout_show(struct device * const dev, -+ struct device_attribute * const attr, char * const buf) ++static void suspend_queue_group(struct kbase_queue_group *group) +{ -+ struct kbase_device *const kbdev = dev_get_drvdata(dev); -+ int err; ++ unsigned long flags; ++ struct kbase_csf_scheduler *const scheduler = ++ &group->kctx->kbdev->csf.scheduler; + -+ if (!kbdev) -+ return -ENODEV; ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ /* This shall be used in program_suspending_csg_slots() where we ++ * assume that whilst CSGs are being suspended, this bitmask is not ++ * used by anything else i.e., it indicates only the CSGs going ++ * through suspension. ++ */ ++ clear_bit(group->csg_nr, scheduler->csgs_events_enable_mask); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + -+ err = scnprintf(buf, PAGE_SIZE, "%llu\n", kbase_csf_timeout_get(kbdev)); ++ /* If AS fault detected, terminate the group */ ++ if (!kctx_as_enabled(group->kctx) || group->faulted) ++ term_csg_slot(group); ++ else ++ suspend_csg_slot(group); ++} + -+ return err; ++static void wait_csg_slots_start(struct kbase_device *kbdev) ++{ ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; ++ u32 i; + -+} ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+static DEVICE_ATTR_RW(progress_timeout); ++ /* extract start slot flags for check */ ++ for (i = 0; i < num_groups; i++) { ++ if (atomic_read(&scheduler->csg_slots[i].state) == ++ CSG_SLOT_READY2RUN) ++ set_bit(i, slot_mask); ++ } + -+int kbase_csf_timeout_init(struct kbase_device *const kbdev) -+{ -+ u64 timeout = DEFAULT_PROGRESS_TIMEOUT; -+ int err; ++ while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) { ++ DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); + -+#if IS_ENABLED(CONFIG_OF) -+ err = of_property_read_u64(kbdev->dev->of_node, -+ "progress_timeout", &timeout); -+ if (!err) -+ dev_info(kbdev->dev, "Found progress_timeout = %llu in Devicetree\n", -+ timeout); -+#endif ++ bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS); + -+ err = set_timeout(kbdev, timeout); -+ if (err) -+ return err; ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ slots_state_changed(kbdev, changed, csg_slot_running), ++ remaining); + -+ err = sysfs_create_file(&kbdev->dev->kobj, -+ &dev_attr_progress_timeout.attr); -+ if (err) -+ dev_err(kbdev->dev, "SysFS file creation failed\n"); ++ if (likely(remaining)) { ++ for_each_set_bit(i, changed, num_groups) { ++ struct kbase_queue_group *group = ++ scheduler->csg_slots[i].resident_group; + -+ return err; -+} ++ /* The on slot csg is now running */ ++ clear_bit(i, slot_mask); ++ group->run_state = KBASE_CSF_GROUP_RUNNABLE; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, ++ group->run_state); ++ } ++ } else { ++ const int csg_nr = ffs(slot_mask[0]) - 1; ++ struct kbase_queue_group *group = ++ scheduler->csg_slots[csg_nr].resident_group; ++ enum dumpfault_error_type error_type = DF_CSG_START_TIMEOUT; + -+void kbase_csf_timeout_term(struct kbase_device * const kbdev) -+{ -+ sysfs_remove_file(&kbdev->dev->kobj, &dev_attr_progress_timeout.attr); -+} ++ dev_err(kbdev->dev, ++ "[%llu] Timeout (%d ms) waiting for CSG slots to start, slots: 0x%*pb\n", ++ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, ++ num_groups, slot_mask); ++ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) ++ error_type = DF_PING_REQUEST_TIMEOUT; ++ schedule_actions_trigger_df(kbdev, group->kctx, error_type); + -+u64 kbase_csf_timeout_get(struct kbase_device *const kbdev) -+{ -+ return atomic64_read(&kbdev->csf.progress_timeout); ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(kbdev); ++ break; ++ } ++ } +} -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.h -new file mode 100644 -index 000000000..b406eaad2 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.h -@@ -0,0 +1,66 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ ++/** ++ * group_on_slot_is_idle() - Check if the given slot has a CSG-idle state ++ * flagged after the completion of a CSG status ++ * update command + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * @kbdev: Pointer to the GPU device. ++ * @slot: The given slot for checking an occupying resident group's idle ++ * state. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * This function is called at the start of scheduling tick to check the ++ * idle status of a queue group resident on a CSG slot. ++ * The caller must make sure the corresponding status update command has ++ * been called and completed before checking this status. + * ++ * Return: true if the group resident on slot is idle, otherwise false. + */ ++static bool group_on_slot_is_idle(struct kbase_device *kbdev, ++ unsigned long slot) ++{ ++ struct kbase_csf_cmd_stream_group_info *ginfo = ++ &kbdev->csf.global_iface.groups[slot]; ++ bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & ++ CSG_STATUS_STATE_IDLE_MASK; + -+#ifndef _KBASE_CSF_TIMEOUT_H_ -+#define _KBASE_CSF_TIMEOUT_H_ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+struct kbase_device; ++ return idle; ++} + +/** -+ * kbase_csf_timeout_init - Initialize the progress timeout. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * Must be zero-initialized. ++ * slots_update_state_changed() - Check the handshake state of a subset of ++ * command group slots. + * -+ * The progress timeout is the number of GPU clock cycles allowed to elapse -+ * before the driver terminates a GPU command queue group in which a task is -+ * making no forward progress on an endpoint (e.g. a shader core). This function -+ * determines the initial value and also creates a sysfs file to allow the -+ * timeout to be reconfigured later. ++ * @kbdev: The GPU device. ++ * @field_mask: The field mask for checking the state in the csg_req/ack. ++ * @slots_mask: A bit_map specifying the slots to check. ++ * @slots_done: A cleared bit_map for returning the slots that ++ * have finished update. + * -+ * Reconfigures the global firmware interface to enable the current timeout. ++ * Checks the state of a subset of slots selected through the slots_mask ++ * bit_map. Records which slots' handshake completed and send it back in the ++ * slots_done bit_map. + * -+ * Return: 0 on success, or negative on failure. ++ * Return: true if the slots_done is set for at least one slot. ++ * Otherwise false. + */ -+int kbase_csf_timeout_init(struct kbase_device *kbdev); ++static ++bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask, ++ const unsigned long *slots_mask, unsigned long *slots_done) ++{ ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ bool changed = false; ++ u32 i; + -+/** -+ * kbase_csf_timeout_term - Terminate the progress timeout. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * -+ * Removes the sysfs file which allowed the timeout to be reconfigured. -+ * Does nothing if called on a zero-initialized object. -+ */ -+void kbase_csf_timeout_term(struct kbase_device *kbdev); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+/** -+ * kbase_csf_timeout_get - get the current global progress timeout. -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * -+ * Return: the maximum number of GPU cycles that is allowed to elapse without -+ * forward progress before the driver terminates a GPU command queue -+ * group. -+ */ -+u64 kbase_csf_timeout_get(struct kbase_device *const kbdev); ++ for_each_set_bit(i, slots_mask, num_groups) { ++ struct kbase_csf_cmd_stream_group_info const *const ginfo = ++ &kbdev->csf.global_iface.groups[i]; ++ u32 state = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ); + -+#endif /* _KBASE_CSF_TIMEOUT_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c -new file mode 100644 -index 000000000..6859d6529 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c -@@ -0,0 +1,441 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ state ^= kbase_csf_firmware_csg_output(ginfo, CSG_ACK); ++ ++ if (!(state & field_mask)) { ++ set_bit(i, slots_done); ++ changed = true; ++ } ++ } ++ ++ return changed; ++} ++ ++/** ++ * wait_csg_slots_handshake_ack - Wait the req/ack handshakes to complete on ++ * the specified groups. + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * @kbdev: Pointer to the GPU device. ++ * @field_mask: The field mask for checking the state in the csg_req/ack. ++ * @slot_mask: Bitmap reflecting the slots, the function will modify ++ * the acknowledged slots by clearing their corresponding ++ * bits. ++ * @wait_in_jiffies: Wait duration in jiffies, controlling the time-out. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * This function waits for the acknowledgment of the request that have ++ * already been placed for the CSG slots by the caller. Currently used for ++ * the CSG priority update and status update requests. + * ++ * Return: 0 on all specified slots acknowledged; otherwise -ETIMEDOUT. For ++ * timed out condition with unacknowledged slots, their bits remain ++ * set in the slot_mask. + */ ++static int wait_csg_slots_handshake_ack(struct kbase_device *kbdev, ++ u32 field_mask, unsigned long *slot_mask, long wait_in_jiffies) ++{ ++ const u32 num_groups = kbdev->csf.global_iface.group_num; ++ long remaining = wait_in_jiffies; + -+#include "mali_kbase_csf_tl_reader.h" ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+#include "mali_kbase_csf_trace_buffer.h" -+#include "mali_kbase_reset_gpu.h" ++ while (!bitmap_empty(slot_mask, num_groups) && ++ !kbase_reset_gpu_is_active(kbdev)) { ++ DECLARE_BITMAP(dones, MAX_SUPPORTED_CSGS) = { 0 }; + -+#include "tl/mali_kbase_tlstream.h" -+#include "tl/mali_kbase_tl_serialize.h" -+#include "tl/mali_kbase_tracepoints.h" ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ slots_update_state_changed(kbdev, field_mask, ++ slot_mask, dones), ++ remaining); + -+#include "mali_kbase_pm.h" -+#include "mali_kbase_hwaccess_time.h" ++ if (likely(remaining)) ++ bitmap_andnot(slot_mask, slot_mask, dones, num_groups); ++ else { + -+#include ++ /* Timed-out on the wait */ ++ return -ETIMEDOUT; ++ } ++ } + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+#include "tl/mali_kbase_timeline_priv.h" -+#include -+#endif ++ return 0; ++} + -+/* Name of the CSFFW timeline tracebuffer. */ -+#define KBASE_CSFFW_TRACEBUFFER_NAME "timeline" -+/* Name of the timeline header metatadata */ -+#define KBASE_CSFFW_TIMELINE_HEADER_NAME "timeline_header" ++static void wait_csg_slots_finish_prio_update(struct kbase_device *kbdev) ++{ ++ unsigned long *slot_mask = ++ kbdev->csf.scheduler.csg_slots_prio_update; ++ long wait_time = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ int ret = wait_csg_slots_handshake_ack(kbdev, CSG_REQ_EP_CFG_MASK, ++ slot_mask, wait_time); ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ if (unlikely(ret != 0)) { ++ const int csg_nr = ffs(slot_mask[0]) - 1; ++ struct kbase_queue_group *group = ++ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; ++ enum dumpfault_error_type error_type = DF_CSG_EP_CFG_TIMEOUT; ++ ++ dev_warn( ++ kbdev->dev, ++ "[%llu] Timeout (%d ms) on CSG_REQ:EP_CFG, skipping the update wait: slot mask=0x%lx", ++ kbase_backend_get_cycle_cnt(kbdev), ++ kbdev->csf.fw_timeout_ms, ++ slot_mask[0]); ++ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) ++ error_type = DF_PING_REQUEST_TIMEOUT; ++ schedule_actions_trigger_df(kbdev, group->kctx, error_type); ++ ++ /* Timeout could indicate firmware is unresponsive so trigger a GPU reset. */ ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) ++ kbase_reset_gpu(kbdev); ++ } ++} ++ ++static void report_csg_termination(struct kbase_queue_group *const group) ++{ ++ struct base_gpu_queue_group_error ++ err = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, ++ .payload = { .fatal_group = { ++ .status = GPU_EXCEPTION_TYPE_SW_FAULT_2, ++ } } }; ++ ++ kbase_csf_add_group_fatal_error(group, &err); ++} ++ ++void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, ++ struct kbase_context *kctx, struct list_head *evicted_groups) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct kbase_queue_group *group; ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ u32 slot; ++ DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; ++ ++ lockdep_assert_held(&kctx->csf.lock); ++ mutex_lock(&scheduler->lock); ++ ++ /* This code is only called during reset, so we don't wait for the CSG ++ * slots to be stopped ++ */ ++ WARN_ON(!kbase_reset_gpu_is_active(kbdev)); ++ ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_EVICT_CTX_SLOTS_START, kctx, 0u); ++ for (slot = 0; slot < num_groups; slot++) { ++ group = kbdev->csf.scheduler.csg_slots[slot].resident_group; ++ if (group && group->kctx == kctx) { ++ bool as_fault; ++ ++ dev_dbg(kbdev->dev, "Evicting group [%d] running on slot [%d] due to reset", ++ group->handle, group->csg_nr); ++ ++ term_csg_slot(group); ++ as_fault = cleanup_csg_slot(group); ++ /* remove the group from the scheduler list */ ++ sched_evict_group(group, as_fault, false); ++ /* signal Userspace that CSG is being terminated */ ++ report_csg_termination(group); ++ /* return the evicted group to the caller */ ++ list_add_tail(&group->link, evicted_groups); ++ set_bit(slot, slot_mask); ++ } ++ } ++ ++ dev_info(kbdev->dev, "Evicting context %d_%d slots: 0x%*pb\n", ++ kctx->tgid, kctx->id, num_groups, slot_mask); ++ ++ /* Fatal errors may have been the cause of the GPU reset ++ * taking place, in which case we want to make sure that ++ * we wake up the fatal event queue to notify userspace ++ * only once. Otherwise, we may have duplicate event ++ * notifications between the time the first notification ++ * occurs and the time the GPU is reset. ++ */ ++ kbase_event_wakeup(kctx); ++ ++ mutex_unlock(&scheduler->lock); ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_EVICT_CTX_SLOTS_END, kctx, num_groups); ++} + +/** -+ * struct kbase_csffw_tl_message - CSFFW timeline message. ++ * scheduler_slot_protm_ack - Acknowledging the protected region requests ++ * from the resident group on a given slot. + * -+ * @msg_id: Message ID. -+ * @timestamp: Timestamp of the event. -+ * @cycle_counter: Cycle number of the event. ++ * @kbdev: Pointer to the GPU device. ++ * @group: Pointer to the resident group on the given slot. ++ * @slot: The slot that the given group is actively operating on. + * -+ * Contain fields that are common for all CSFFW timeline messages. ++ * The function assumes that the given slot is in stable running state and ++ * has already been judged by the caller on that any pending protected region ++ * requests of the resident group should be acknowledged. ++ * ++ * Return: true if the group has pending protm request(s) and is acknowledged. ++ * The caller should arrange to enter the protected mode for servicing ++ * it. Otherwise return false, indicating the group has no pending protm ++ * request. + */ -+struct kbase_csffw_tl_message { -+ u32 msg_id; -+ u64 timestamp; -+ u64 cycle_counter; -+} __packed __aligned(4); -+ -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+static int kbase_csf_tl_debugfs_poll_interval_read(void *data, u64 *val) ++static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev, ++ struct kbase_queue_group *const group, ++ const int slot) +{ -+ struct kbase_device *kbdev = (struct kbase_device *)data; -+ struct kbase_csf_tl_reader *self = &kbdev->timeline->csf_tl_reader; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ bool protm_ack = false; ++ struct kbase_csf_cmd_stream_group_info *ginfo = ++ &kbdev->csf.global_iface.groups[slot]; ++ u32 max_csi; ++ int i; + -+ *val = self->timer_interval; ++ if (WARN_ON(scheduler->csg_slots[slot].resident_group != group)) ++ return protm_ack; + -+ return 0; -+} ++ lockdep_assert_held(&scheduler->lock); ++ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.interrupt_lock); + -+static int kbase_csf_tl_debugfs_poll_interval_write(void *data, u64 val) -+{ -+ struct kbase_device *kbdev = (struct kbase_device *)data; -+ struct kbase_csf_tl_reader *self = &kbdev->timeline->csf_tl_reader; ++ max_csi = ginfo->stream_num; ++ for (i = find_first_bit(group->protm_pending_bitmap, max_csi); ++ i < max_csi; ++ i = find_next_bit(group->protm_pending_bitmap, max_csi, i + 1)) { ++ struct kbase_queue *queue = group->bound_queues[i]; + -+ if (val > KBASE_CSF_TL_READ_INTERVAL_MAX || val < KBASE_CSF_TL_READ_INTERVAL_MIN) -+ return -EINVAL; ++ clear_bit(i, group->protm_pending_bitmap); ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_CLEAR, group, queue, ++ group->protm_pending_bitmap[0]); + -+ self->timer_interval = (u32)val; ++ if (!WARN_ON(!queue) && queue->enabled) { ++ struct kbase_csf_cmd_stream_info *stream = ++ &ginfo->streams[i]; ++ u32 cs_protm_ack = kbase_csf_firmware_cs_output( ++ stream, CS_ACK) & ++ CS_ACK_PROTM_PEND_MASK; ++ u32 cs_protm_req = kbase_csf_firmware_cs_input_read( ++ stream, CS_REQ) & ++ CS_REQ_PROTM_PEND_MASK; + -+ return 0; -+} ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_ACK, group, ++ queue, cs_protm_ack ^ cs_protm_req); + -+DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_tl_poll_interval_fops, -+ kbase_csf_tl_debugfs_poll_interval_read, -+ kbase_csf_tl_debugfs_poll_interval_write, "%llu\n"); ++ if (cs_protm_ack == cs_protm_req) { ++ dev_dbg(kbdev->dev, ++ "PROTM-ack already done for queue-%d group-%d slot-%d", ++ queue->csi_index, group->handle, slot); ++ continue; ++ } + -+void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev) -+{ -+ debugfs_create_file("csf_tl_poll_interval_in_ms", 0644, -+ kbdev->debugfs_instr_directory, kbdev, -+ &kbase_csf_tl_poll_interval_fops); ++ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, ++ cs_protm_ack, ++ CS_ACK_PROTM_PEND_MASK); ++ protm_ack = true; ++ dev_dbg(kbdev->dev, ++ "PROTM-ack for queue-%d, group-%d slot-%d", ++ queue->csi_index, group->handle, slot); ++ } ++ } ++ ++ return protm_ack; +} -+#endif + +/** -+ * tl_reader_overflow_notify() - Emit stream overflow tracepoint. ++ * protm_enter_set_next_pending_seq - Update the scheduler's field of ++ * tick_protm_pending_seq to that from the next available on-slot protm ++ * pending CSG. + * -+ * @self: CSFFW TL Reader instance. -+ * @msg_buf_start: Start of the message. -+ * @msg_buf_end: End of the message buffer. ++ * @kbdev: Pointer to the GPU device. ++ * ++ * If applicable, the function updates the scheduler's tick_protm_pending_seq ++ * field from the next available on-slot protm pending CSG. If not, the field ++ * is set to KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID. + */ -+static void tl_reader_overflow_notify( -+ const struct kbase_csf_tl_reader *self, -+ u8 *const msg_buf_start, -+ u8 *const msg_buf_end) ++static void protm_enter_set_next_pending_seq(struct kbase_device *const kbdev) +{ -+ struct kbase_device *kbdev = self->kbdev; -+ struct kbase_csffw_tl_message message = {0}; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ u32 num_csis = kbdev->csf.global_iface.groups[0].stream_num; ++ DECLARE_BITMAP(active_csgs, MAX_SUPPORTED_CSGS) = { 0 }; ++ u32 i; + -+ /* Reuse the timestamp and cycle count from current event if possible */ -+ if (msg_buf_start + sizeof(message) <= msg_buf_end) -+ memcpy(&message, msg_buf_start, sizeof(message)); ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); + -+ KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( -+ kbdev, message.timestamp, message.cycle_counter); ++ bitmap_xor(active_csgs, scheduler->csg_slots_idle_mask, scheduler->csg_inuse_bitmap, ++ num_groups); ++ /* Reset the tick's pending protm seq number to invalid initially */ ++ scheduler->tick_protm_pending_seq = KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID; ++ for_each_set_bit(i, active_csgs, num_groups) { ++ struct kbase_queue_group *group = scheduler->csg_slots[i].resident_group; ++ ++ /* Set to the next pending protm group's scan_seq_number */ ++ if ((group != scheduler->active_protm_grp) && ++ (!bitmap_empty(group->protm_pending_bitmap, num_csis)) && ++ (group->scan_seq_num < scheduler->tick_protm_pending_seq)) ++ scheduler->tick_protm_pending_seq = group->scan_seq_num; ++ } +} + +/** -+ * tl_reader_overflow_check() - Check if an overflow has happened ++ * scheduler_group_check_protm_enter - Request the given group to be evaluated ++ * for triggering the protected mode. + * -+ * @self: CSFFW TL Reader instance. -+ * @event_id: Incoming event id. ++ * @kbdev: Pointer to the GPU device. ++ * @input_grp: Pointer to the GPU queue group. + * -+ * Return: True, if an overflow has happened, False otherwise. ++ * The function assumes the given group is either an active running group or ++ * the scheduler internally maintained field scheduler->top_grp. ++ * ++ * If the GPU is not already running in protected mode and the input group ++ * has protected region requests from its bound queues, the requests are ++ * acknowledged and the GPU is instructed to enter the protected mode. + */ -+static bool tl_reader_overflow_check( -+ struct kbase_csf_tl_reader *self, -+ u16 event_id) ++static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, ++ struct kbase_queue_group *const input_grp) +{ -+ struct kbase_device *kbdev = self->kbdev; -+ bool has_overflow = false; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct kbase_protected_suspend_buffer *sbuf = &input_grp->protected_suspend_buf; ++ unsigned long flags; ++ bool protm_in_use; + -+ /* 0 is a special event_id and reserved for the very first tracepoint -+ * after reset, we should skip overflow check when reset happened. ++ lockdep_assert_held(&scheduler->lock); ++ ++ /* Return early if the physical pages have not been allocated yet */ ++ if (unlikely(!sbuf->pma)) ++ return; ++ ++ /* This lock is taken to prevent the issuing of MMU command during the ++ * transition to protected mode. This helps avoid the scenario where the ++ * entry to protected mode happens with a memory region being locked and ++ * the same region is then accessed by the GPU in protected mode. + */ -+ if (event_id != 0) { -+ has_overflow = self->got_first_event -+ && self->expected_event_id != event_id; ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); + -+ if (has_overflow) -+ dev_warn(kbdev->dev, -+ "CSFFW overflow, event_id: %u, expected: %u.", -+ event_id, self->expected_event_id); -+ } ++ /* Check if the previous transition to enter & exit the protected ++ * mode has completed or not. ++ */ ++ protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev) || ++ kbdev->protected_mode; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER_CHECK, input_grp, protm_in_use); + -+ self->got_first_event = true; -+ self->expected_event_id = event_id + 1; -+ /* When event_id reaches its max value, it skips 0 and wraps to 1. */ -+ if (self->expected_event_id == 0) -+ self->expected_event_id++; ++ /* Firmware samples the PROTM_PEND ACK bit for CSs when ++ * Host sends PROTM_ENTER global request. So if PROTM_PEND ACK bit ++ * is set for a CS after Host has sent the PROTM_ENTER ++ * Global request, then there is no guarantee that firmware will ++ * notice that prior to switching to protected mode. And firmware ++ * may not again raise the PROTM_PEND interrupt for that CS ++ * later on. To avoid that uncertainty PROTM_PEND ACK bit ++ * is not set for a CS if the request to enter protected ++ * mode has already been sent. It will be set later (after the exit ++ * from protected mode has taken place) when the group to which ++ * CS is bound becomes the top group. ++ * ++ * The actual decision of entering protected mode is hinging on the ++ * input group is the top priority group, or, in case the previous ++ * top-group is evicted from the scheduler during the tick, its would ++ * be replacement, and that it is currently in a stable state (i.e. the ++ * slot state is running). ++ */ ++ if (!protm_in_use && !WARN_ON(!input_grp)) { ++ const int slot = ++ kbase_csf_scheduler_group_get_slot_locked(input_grp); + -+ return has_overflow; ++ /* check the input_grp is running and requesting protected mode ++ */ ++ if (slot >= 0 && ++ atomic_read(&scheduler->csg_slots[slot].state) == ++ CSG_SLOT_RUNNING) { ++ if (kctx_as_enabled(input_grp->kctx) && ++ scheduler_slot_protm_ack(kbdev, input_grp, slot)) { ++ int err; ++ ++ /* Option of acknowledging to multiple ++ * CSGs from the same kctx is dropped, ++ * after consulting with the ++ * architecture team. See the comment in ++ * GPUCORE-21394. ++ */ ++ ++ /* Switch to protected mode */ ++ scheduler->active_protm_grp = input_grp; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER, input_grp, ++ 0u); ++ ++#if IS_ENABLED(CONFIG_MALI_CORESIGHT) ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ ++ /* Coresight must be disabled before entering protected mode. */ ++ kbase_debug_coresight_csf_disable_pmode_enter(kbdev); ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ ++ ++ kbase_csf_enter_protected_mode(kbdev); ++ /* Set the pending protm seq number to the next one */ ++ protm_enter_set_next_pending_seq(kbdev); ++ ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ ++ err = kbase_csf_wait_protected_mode_enter(kbdev); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ ++ if (err) ++ schedule_actions_trigger_df(kbdev, input_grp->kctx, ++ DF_PROTECTED_MODE_ENTRY_FAILURE); ++ ++ scheduler->protm_enter_time = ktime_get_raw(); ++ ++ return; ++ } ++ } ++ } ++ ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); +} + +/** -+ * tl_reader_reset() - Reset timeline tracebuffer reader state machine. ++ * scheduler_check_pmode_progress - Check if protected mode execution is progressing + * -+ * @self: CSFFW TL Reader instance. ++ * @kbdev: Pointer to the GPU device. + * -+ * Reset the reader to the default state, i.e. set all the -+ * mutable fields to zero. ++ * This function is called when the GPU is in protected mode. ++ * ++ * It will check if the time spent in protected mode is less ++ * than CSF_SCHED_PROTM_PROGRESS_TIMEOUT. If not, a PROTM_EXIT ++ * request is sent to the FW. + */ -+static void tl_reader_reset(struct kbase_csf_tl_reader *self) ++static void scheduler_check_pmode_progress(struct kbase_device *kbdev) +{ -+ self->got_first_event = false; -+ self->is_active = false; -+ self->expected_event_id = 0; -+ self->tl_header.btc = 0; -+} ++ u64 protm_spent_time_ms; ++ u64 protm_progress_timeout = ++ kbase_get_timeout_ms(kbdev, CSF_SCHED_PROTM_PROGRESS_TIMEOUT); ++ s64 diff_ms_signed = ++ ktime_ms_delta(ktime_get_raw(), kbdev->csf.scheduler.protm_enter_time); + -+int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self) -+{ -+ int ret = 0; -+ struct kbase_device *kbdev = self->kbdev; -+ struct kbase_tlstream *stream = self->stream; ++ if (diff_ms_signed < 0) ++ return; + -+ u8 *read_buffer = self->read_buffer; -+ const size_t read_buffer_size = sizeof(self->read_buffer); ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ u32 bytes_read; -+ u8 *csffw_data_begin; -+ u8 *csffw_data_end; -+ u8 *csffw_data_it; ++ protm_spent_time_ms = (u64)diff_ms_signed; ++ if (protm_spent_time_ms < protm_progress_timeout) ++ return; + -+ unsigned long flags; ++ dev_dbg(kbdev->dev, "Protected mode progress timeout: %llu >= %llu", ++ protm_spent_time_ms, protm_progress_timeout); + -+ spin_lock_irqsave(&self->read_lock, flags); ++ /* Prompt the FW to exit protected mode */ ++ scheduler_force_protm_exit(kbdev); ++} + -+ /* If not running, early exit. */ -+ if (!self->is_active) { -+ spin_unlock_irqrestore(&self->read_lock, flags); -+ return -EBUSY; ++static void scheduler_apply(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ const u32 total_csg_slots = kbdev->csf.global_iface.group_num; ++ const u32 available_csg_slots = scheduler->num_csg_slots_for_tick; ++ u32 suspend_cnt = 0; ++ u32 remain_cnt = 0; ++ u32 resident_cnt = 0; ++ struct kbase_queue_group *group; ++ u32 i; ++ u32 spare; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ /* Suspend those resident groups not in the run list */ ++ for (i = 0; i < total_csg_slots; i++) { ++ group = scheduler->csg_slots[i].resident_group; ++ if (group) { ++ resident_cnt++; ++ if (group->prepared_seq_num >= available_csg_slots) { ++ suspend_queue_group(group); ++ suspend_cnt++; ++ } else ++ remain_cnt++; ++ } + } + -+ /* Copying the whole buffer in a single shot. We assume -+ * that the buffer will not contain partially written messages. -+ */ -+ bytes_read = kbase_csf_firmware_trace_buffer_read_data( -+ self->trace_buffer, read_buffer, read_buffer_size); -+ csffw_data_begin = read_buffer; -+ csffw_data_end = read_buffer + bytes_read; ++ /* Initialize the remaining available csg slots for the tick/tock */ ++ scheduler->remaining_tick_slots = available_csg_slots; + -+ for (csffw_data_it = csffw_data_begin; -+ csffw_data_it < csffw_data_end;) { -+ u32 event_header; -+ u16 event_id; -+ u16 event_size; -+ unsigned long acq_flags; -+ char *buffer; ++ /* If there are spare slots, apply heads in the list */ ++ spare = (available_csg_slots > resident_cnt) ? ++ (available_csg_slots - resident_cnt) : 0; ++ while (!list_empty(&scheduler->groups_to_schedule)) { ++ group = list_first_entry(&scheduler->groups_to_schedule, ++ struct kbase_queue_group, ++ link_to_schedule); + -+ /* Can we safely read event_id? */ -+ if (csffw_data_it + sizeof(event_header) > csffw_data_end) { -+ dev_warn( -+ kbdev->dev, -+ "Unable to parse CSFFW tracebuffer event header."); -+ ret = -EBUSY; -+ break; -+ } ++ if (kbasep_csf_scheduler_group_is_on_slot_locked(group) && ++ group->prepared_seq_num < available_csg_slots) { ++ /* One of the resident remainders */ ++ update_csg_slot_priority(group, ++ get_slot_priority(group)); ++ } else if (spare != 0) { ++ s8 slot = (s8)find_first_zero_bit( ++ kbdev->csf.scheduler.csg_inuse_bitmap, ++ total_csg_slots); + -+ /* Read and parse the event header. */ -+ memcpy(&event_header, csffw_data_it, sizeof(event_header)); -+ event_id = (event_header >> 0) & 0xFFFF; -+ event_size = (event_header >> 16) & 0xFFFF; -+ csffw_data_it += sizeof(event_header); ++ if (WARN_ON(slot >= (s8)total_csg_slots)) ++ break; + -+ /* Detect if an overflow has happened. */ -+ if (tl_reader_overflow_check(self, event_id)) -+ tl_reader_overflow_notify(self, -+ csffw_data_it, -+ csffw_data_end); ++ if (!kctx_as_enabled(group->kctx) || group->faulted) { ++ /* Drop the head group and continue */ ++ update_offslot_non_idle_cnt(group); ++ remove_scheduled_group(kbdev, group); ++ continue; ++ } ++ program_csg_slot(group, slot, ++ get_slot_priority(group)); ++ if (unlikely(!csg_slot_in_use(kbdev, slot))) ++ break; + -+ /* Can we safely read the message body? */ -+ if (csffw_data_it + event_size > csffw_data_end) { -+ dev_warn(kbdev->dev, -+ "event_id: %u, can't read with event_size: %u.", -+ event_id, event_size); -+ ret = -EBUSY; ++ spare--; ++ } else + break; -+ } -+ -+ /* Convert GPU timestamp to CPU timestamp. */ -+ { -+ struct kbase_csffw_tl_message *msg = -+ (struct kbase_csffw_tl_message *) csffw_data_it; -+ msg->timestamp = -+ kbase_backend_time_convert_gpu_to_cpu(kbdev, msg->timestamp); -+ } + -+ /* Copy the message out to the tl_stream. */ -+ buffer = kbase_tlstream_msgbuf_acquire( -+ stream, event_size, &acq_flags); -+ kbasep_serialize_bytes(buffer, 0, csffw_data_it, event_size); -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+ csffw_data_it += event_size; ++ /* Drop the head csg from the list */ ++ remove_scheduled_group(kbdev, group); ++ if (!WARN_ON(!scheduler->remaining_tick_slots)) ++ scheduler->remaining_tick_slots--; + } + -+ spin_unlock_irqrestore(&self->read_lock, flags); -+ return ret; ++ /* Dealing with groups currently going through suspend */ ++ program_suspending_csg_slots(kbdev); +} + -+static void kbasep_csf_tl_reader_read_callback(struct timer_list *timer) ++static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, ++ struct kbase_context *kctx, int priority) +{ -+ struct kbase_csf_tl_reader *self = -+ container_of(timer, struct kbase_csf_tl_reader, read_timer); ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct kbase_queue_group *group; + -+ int rcode; ++ lockdep_assert_held(&scheduler->lock); ++ lockdep_assert_held(&scheduler->interrupt_lock); ++ if (WARN_ON(priority < 0) || ++ WARN_ON(priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) ++ return; + -+ kbase_csf_tl_reader_flush_buffer(self); ++ if (!kctx_as_enabled(kctx)) ++ return; + -+ rcode = mod_timer(&self->read_timer, -+ jiffies + msecs_to_jiffies(self->timer_interval)); ++ list_for_each_entry(group, &kctx->csf.sched.runnable_groups[priority], ++ link) { ++ if (WARN_ON(!list_empty(&group->link_to_schedule))) ++ /* This would be a bug */ ++ list_del_init(&group->link_to_schedule); + -+ CSTD_UNUSED(rcode); ++ if (unlikely(group->faulted)) ++ continue; ++ ++ /* Set the scanout sequence number, starting from 0 */ ++ group->scan_seq_num = scheduler->csg_scan_count_for_tick++; ++ ++ if (scheduler->tick_protm_pending_seq == ++ KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) { ++ if (!bitmap_empty(group->protm_pending_bitmap, ++ kbdev->csf.global_iface.groups[0].stream_num)) ++ scheduler->tick_protm_pending_seq = ++ group->scan_seq_num; ++ } ++ ++ if (queue_group_idle_locked(group)) { ++ if (can_schedule_idle_group(group)) ++ list_add_tail(&group->link_to_schedule, ++ &scheduler->idle_groups_to_schedule); ++ continue; ++ } ++ ++ if (!scheduler->ngrp_to_schedule) { ++ /* keep the top csg's origin */ ++ scheduler->top_ctx = kctx; ++ scheduler->top_grp = group; ++ } ++ ++ list_add_tail(&group->link_to_schedule, ++ &scheduler->groups_to_schedule); ++ group->prepared_seq_num = scheduler->ngrp_to_schedule++; ++ ++ kctx->csf.sched.ngrp_to_schedule++; ++ count_active_address_space(kbdev, kctx); ++ } +} + +/** -+ * tl_reader_init_late() - Late CSFFW TL Reader initialization. ++ * scheduler_rotate_groups() - Rotate the runnable queue groups to provide ++ * fairness of scheduling within a single ++ * kbase_context. + * -+ * @self: CSFFW TL Reader instance. -+ * @kbdev: Kbase device. ++ * @kbdev: Pointer to the GPU device. + * -+ * Late initialization is done once at kbase_csf_tl_reader_start() time. -+ * This is because the firmware image is not parsed -+ * by the kbase_csf_tl_reader_init() time. ++ * Since only kbase_csf_scheduler's top_grp (i.e. the queue group assigned ++ * the highest slot priority) is guaranteed to get the resources that it ++ * needs we only rotate the kbase_context corresponding to it - ++ * kbase_csf_scheduler's top_ctx. + * -+ * Return: Zero on success, -1 otherwise. ++ * The priority level chosen for rotation is the one containing the previous ++ * scheduling cycle's kbase_csf_scheduler's top_grp. ++ * ++ * In a 'fresh-slice-cycle' this always corresponds to the highest group ++ * priority in use by kbase_csf_scheduler's top_ctx. That is, it's the priority ++ * level of the previous scheduling cycle's first runnable kbase_context. ++ * ++ * We choose this priority level because when higher priority work is ++ * scheduled, we should always cause the scheduler to run and do a scan. The ++ * scan always enumerates the highest priority work first (whether that be ++ * based on process priority or group priority), and thus ++ * kbase_csf_scheduler's top_grp will point to the first of those high priority ++ * groups, which necessarily must be the highest priority group in ++ * kbase_csf_scheduler's top_ctx. The fresh-slice-cycle will run later and pick ++ * up that group appropriately. ++ * ++ * If kbase_csf_scheduler's top_grp was instead evicted (and thus is NULL), ++ * then no explicit rotation occurs on the next fresh-slice-cycle schedule, but ++ * will set up kbase_csf_scheduler's top_ctx again for the next scheduling ++ * cycle. Implicitly, a rotation had already occurred by removing ++ * the kbase_csf_scheduler's top_grp ++ * ++ * If kbase_csf_scheduler's top_grp became idle and all other groups belonging ++ * to kbase_csf_scheduler's top_grp's priority level in kbase_csf_scheduler's ++ * top_ctx are also idle, then the effect of this will be to rotate idle ++ * groups, which might not actually become resident in the next ++ * scheduling slice. However this is acceptable since a queue group becoming ++ * idle is implicitly a rotation (as above with evicted queue groups), as it ++ * automatically allows a new queue group to take the maximum slot priority ++ * whilst the idle kbase_csf_scheduler's top_grp ends up near the back of ++ * the kbase_csf_scheduler's groups_to_schedule list. In this example, it will ++ * be for a group in the next lowest priority level or in absence of those the ++ * next kbase_context's queue groups. + */ -+static int tl_reader_init_late( -+ struct kbase_csf_tl_reader *self, -+ struct kbase_device *kbdev) ++static void scheduler_rotate_groups(struct kbase_device *kbdev) +{ -+ struct firmware_trace_buffer *tb; -+ size_t hdr_size = 0; -+ const char *hdr = NULL; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct kbase_context *const top_ctx = scheduler->top_ctx; ++ struct kbase_queue_group *const top_grp = scheduler->top_grp; + -+ if (self->kbdev) -+ return 0; ++ lockdep_assert_held(&scheduler->lock); ++ if (top_ctx && top_grp) { ++ struct list_head *list = ++ &top_ctx->csf.sched.runnable_groups[top_grp->priority]; + -+ tb = kbase_csf_firmware_get_trace_buffer( -+ kbdev, KBASE_CSFFW_TRACEBUFFER_NAME); -+ hdr = kbase_csf_firmware_get_timeline_metadata( -+ kbdev, KBASE_CSFFW_TIMELINE_HEADER_NAME, &hdr_size); ++ WARN_ON(top_grp->kctx != top_ctx); ++ if (!WARN_ON(list_empty(list))) { ++ struct kbase_queue_group *new_head_grp; + -+ if (!tb) { -+ dev_warn( -+ kbdev->dev, -+ "'%s' tracebuffer is not present in the firmware image.", -+ KBASE_CSFFW_TRACEBUFFER_NAME); -+ return -1; ++ list_move_tail(&top_grp->link, list); ++ new_head_grp = (!list_empty(list)) ? ++ list_first_entry(list, struct kbase_queue_group, link) : ++ NULL; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_ROTATE, top_grp, ++ top_ctx->csf.sched.num_runnable_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u); ++ dev_dbg(kbdev->dev, ++ "groups rotated for a context, num_runnable_groups: %u\n", ++ scheduler->top_ctx->csf.sched.num_runnable_grps); ++ } + } ++} + -+ if (!hdr) { -+ dev_warn( -+ kbdev->dev, -+ "'%s' timeline metadata is not present in the firmware image.", -+ KBASE_CSFFW_TIMELINE_HEADER_NAME); -+ return -1; -+ } ++static void scheduler_rotate_ctxs(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct list_head *list = &scheduler->runnable_kctxs; + -+ self->kbdev = kbdev; -+ self->trace_buffer = tb; -+ self->tl_header.data = hdr; -+ self->tl_header.size = hdr_size; ++ lockdep_assert_held(&scheduler->lock); ++ if (scheduler->top_ctx) { ++ if (!WARN_ON(list_empty(list))) { ++ struct kbase_context *pos; ++ bool found = false; + -+ return 0; ++ /* Locate the ctx on the list */ ++ list_for_each_entry(pos, list, csf.link) { ++ if (scheduler->top_ctx == pos) { ++ found = true; ++ break; ++ } ++ } ++ ++ if (!WARN_ON(!found)) { ++ struct kbase_context *new_head_kctx; ++ ++ list_move_tail(&pos->csf.link, list); ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_ROTATE, pos, 0u); ++ new_head_kctx = (!list_empty(list)) ? ++ list_first_entry(list, struct kbase_context, csf.link) : ++ NULL; ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx, ++ 0u); ++ dev_dbg(kbdev->dev, "contexts rotated\n"); ++ } ++ } ++ } +} + +/** -+ * tl_reader_update_enable_bit() - Update the first bit of a CSFFW tracebuffer. ++ * scheduler_update_idle_slots_status() - Get the status update for the CSG ++ * slots for which the IDLE notification was received ++ * previously. + * -+ * @self: CSFFW TL Reader instance. -+ * @value: The value to set. ++ * @kbdev: Pointer to the GPU device. ++ * @csg_bitmap: Bitmap of the CSG slots for which ++ * the status update request completed successfully. ++ * @failed_csg_bitmap: Bitmap of the idle CSG slots for which ++ * the status update request timedout. + * -+ * Update the first bit of a CSFFW tracebufer and then reset the GPU. -+ * This is to make these changes visible to the MCU. ++ * This function sends a CSG status update request for all the CSG slots ++ * present in the bitmap scheduler->csg_slots_idle_mask. Additionally, if ++ * the group's 'reevaluate_idle_status' field is set, the nominally non-idle ++ * slots are also included in the status update for a confirmation of their ++ * status. The function wait for the status update request to complete and ++ * returns the update completed slots bitmap and any timed out idle-flagged ++ * slots bitmap. + * -+ * Return: 0 on success, or negative error code for failure. ++ * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by ++ * this function. + */ -+static int tl_reader_update_enable_bit( -+ struct kbase_csf_tl_reader *self, -+ bool value) ++static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, ++ unsigned long *csg_bitmap, unsigned long *failed_csg_bitmap) +{ -+ int err = 0; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ const u32 num_groups = kbdev->csf.global_iface.group_num; ++ struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ unsigned long flags, i; ++ u32 active_chk = 0; + -+ err = kbase_csf_firmware_trace_buffer_update_trace_enable_bit( -+ self->trace_buffer, 0, value); ++ lockdep_assert_held(&scheduler->lock); + -+ return err; -+} ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); + -+void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self, -+ struct kbase_tlstream *stream) -+{ -+ self->timer_interval = KBASE_CSF_TL_READ_INTERVAL_DEFAULT; ++ for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) { ++ struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; ++ struct kbase_queue_group *group = csg_slot->resident_group; ++ struct kbase_csf_cmd_stream_group_info *const ginfo = ++ &global_iface->groups[i]; ++ u32 csg_req; ++ bool idle_flag; + -+ kbase_timer_setup(&self->read_timer, -+ kbasep_csf_tl_reader_read_callback); ++ if (WARN_ON(!group)) { ++ clear_bit(i, scheduler->csg_inuse_bitmap); ++ clear_bit(i, scheduler->csg_slots_idle_mask); ++ continue; ++ } + -+ self->stream = stream; ++ idle_flag = test_bit(i, scheduler->csg_slots_idle_mask); ++ if (idle_flag || group->reevaluate_idle_status) { ++ if (idle_flag) { ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ if (!bitmap_empty(group->protm_pending_bitmap, ++ ginfo->stream_num)) { ++ dev_warn(kbdev->dev, ++ "Idle bit set for group %d of ctx %d_%d on slot %d with pending protm execution", ++ group->handle, group->kctx->tgid, ++ group->kctx->id, (int)i); ++ } ++#endif ++ clear_bit(i, scheduler->csg_slots_idle_mask); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, ++ scheduler->csg_slots_idle_mask[0]); ++ } else { ++ /* Updates include slots for which reevaluation is needed. ++ * Here one tracks the extra included slots in active_chk. ++ * For protm pending slots, their status of activeness are ++ * assured so no need to request an update. ++ */ ++ active_chk |= BIT(i); ++ group->reevaluate_idle_status = false; ++ } + -+ /* This will be initialized by tl_reader_init_late() */ -+ self->kbdev = NULL; -+ self->trace_buffer = NULL; -+ self->tl_header.data = NULL; -+ self->tl_header.size = 0; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_UPDATE_IDLE_SLOT_REQ, group, i); ++ csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); ++ csg_req ^= CSG_REQ_STATUS_UPDATE_MASK; ++ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, ++ CSG_REQ_STATUS_UPDATE_MASK); + -+ spin_lock_init(&self->read_lock); ++ /* Track the slot update requests in csg_bitmap. ++ * Note, if the scheduler requested extended update, the resulting ++ * csg_bitmap would be the idle_flags + active_chk. Otherwise it's ++ * identical to the idle_flags. ++ */ ++ set_bit(i, csg_bitmap); ++ } else { ++ group->run_state = KBASE_CSF_GROUP_RUNNABLE; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, ++ group->run_state); ++ } ++ } + -+ tl_reader_reset(self); -+} + -+void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self) -+{ -+ del_timer_sync(&self->read_timer); -+} ++ /* The groups are aggregated into a single kernel doorbell request */ ++ if (!bitmap_empty(csg_bitmap, num_groups)) { ++ long wt = ++ kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ u32 db_slots = (u32)csg_bitmap[0]; + -+int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, -+ struct kbase_device *kbdev) -+{ -+ int rcode; ++ kbase_csf_ring_csg_slots_doorbell(kbdev, db_slots); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + -+ /* If already running, early exit. */ -+ if (self->is_active) -+ return 0; ++ if (wait_csg_slots_handshake_ack(kbdev, ++ CSG_REQ_STATUS_UPDATE_MASK, csg_bitmap, wt)) { ++ const int csg_nr = ffs(csg_bitmap[0]) - 1; ++ struct kbase_queue_group *group = ++ scheduler->csg_slots[csg_nr].resident_group; + -+ if (tl_reader_init_late(self, kbdev)) { -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ dev_warn( -+ kbdev->dev, -+ "CSFFW timeline is not available for MALI_BIFROST_NO_MALI builds!"); -+ return 0; -+#else -+ return -EINVAL; -+#endif ++ dev_warn( ++ kbdev->dev, ++ "[%llu] Timeout (%d ms) on CSG_REQ:STATUS_UPDATE, treat groups as not idle: slot mask=0x%lx", ++ kbase_backend_get_cycle_cnt(kbdev), ++ kbdev->csf.fw_timeout_ms, ++ csg_bitmap[0]); ++ schedule_actions_trigger_df(kbdev, group->kctx, ++ DF_CSG_STATUS_UPDATE_TIMEOUT); ++ ++ /* Store the bitmap of timed out slots */ ++ bitmap_copy(failed_csg_bitmap, csg_bitmap, num_groups); ++ csg_bitmap[0] = ~csg_bitmap[0] & db_slots; ++ ++ /* Mask off any failed bit position contributed from active ones, as the ++ * intention is to retain the failed bit pattern contains only those from ++ * idle flags reporting back to the caller. This way, any failed to update ++ * original idle flag would be kept as 'idle' (an informed guess, as the ++ * update did not come to a conclusive result). So will be the failed ++ * active ones be treated as still 'non-idle'. This is for a graceful ++ * handling to the unexpected timeout condition. ++ */ ++ failed_csg_bitmap[0] &= ~active_chk; ++ ++ } else { ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_UPDATE_IDLE_SLOTS_ACK, NULL, db_slots); ++ csg_bitmap[0] = db_slots; ++ } ++ } else { ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + } ++} + -+ tl_reader_reset(self); ++/** ++ * scheduler_handle_idle_slots() - Update the idle status of queue groups ++ * resident on CSG slots for which the ++ * IDLE notification was received previously. ++ * ++ * @kbdev: Pointer to the GPU device. ++ * ++ * This function is called at the start of scheduling tick/tock to reconfirm ++ * the idle status of queue groups resident on CSG slots for ++ * which idle notification was received previously, i.e. all the CSG slots ++ * present in the bitmap scheduler->csg_slots_idle_mask. ++ * The confirmation is done by sending the CSG status update request to the ++ * firmware. On completion, the firmware will mark the idleness at the ++ * slot's interface CSG_STATUS_STATE register accordingly. ++ * ++ * The run state of the groups resident on still idle CSG slots is changed to ++ * KBASE_CSF_GROUP_IDLE and the bitmap scheduler->csg_slots_idle_mask is ++ * updated accordingly. ++ * The bits corresponding to slots for which the status update request timedout ++ * remain set in scheduler->csg_slots_idle_mask. ++ */ ++static void scheduler_handle_idle_slots(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ unsigned long flags, i; ++ DECLARE_BITMAP(csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 }; ++ DECLARE_BITMAP(failed_csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 }; + -+ self->is_active = true; -+ /* Set bytes to copy to the header size. This is to trigger copying -+ * of the header to the user space. -+ */ -+ self->tl_header.btc = self->tl_header.size; ++ lockdep_assert_held(&scheduler->lock); + -+ /* Enable the tracebuffer on the CSFFW side. */ -+ rcode = tl_reader_update_enable_bit(self, true); -+ if (rcode != 0) -+ return rcode; ++ scheduler_update_idle_slots_status(kbdev, csg_bitmap, ++ failed_csg_bitmap); + -+ rcode = mod_timer(&self->read_timer, -+ jiffies + msecs_to_jiffies(self->timer_interval)); ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ for_each_set_bit(i, csg_bitmap, num_groups) { ++ struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; ++ struct kbase_queue_group *group = csg_slot->resident_group; + -+ return 0; ++ if (WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_RUNNING)) ++ continue; ++ if (WARN_ON(!group)) ++ continue; ++ if (WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE && ++ group->run_state != KBASE_CSF_GROUP_IDLE)) ++ continue; ++ if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) ++ continue; ++ ++ if (group_on_slot_is_idle(kbdev, i)) { ++ group->run_state = KBASE_CSF_GROUP_IDLE; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_IDLE, group, group->run_state); ++ set_bit(i, scheduler->csg_slots_idle_mask); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, ++ group, scheduler->csg_slots_idle_mask[0]); ++ } else { ++ group->run_state = KBASE_CSF_GROUP_RUNNABLE; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, ++ group->run_state); ++ } ++ } ++ ++ bitmap_or(scheduler->csg_slots_idle_mask, ++ scheduler->csg_slots_idle_mask, ++ failed_csg_bitmap, num_groups); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_HANDLE_IDLE_SLOTS, NULL, ++ scheduler->csg_slots_idle_mask[0]); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); +} + -+void kbase_csf_tl_reader_stop(struct kbase_csf_tl_reader *self) ++static void scheduler_scan_idle_groups(struct kbase_device *kbdev) +{ -+ unsigned long flags; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct kbase_queue_group *group, *n; + -+ /* If is not running, early exit. */ -+ if (!self->is_active) -+ return; ++ list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule, ++ link_to_schedule) { ++ WARN_ON(!can_schedule_idle_group(group)); + -+ /* Disable the tracebuffer on the CSFFW side. */ -+ tl_reader_update_enable_bit(self, false); ++ if (!scheduler->ngrp_to_schedule) { ++ /* keep the top csg's origin */ ++ scheduler->top_ctx = group->kctx; ++ scheduler->top_grp = group; ++ } + -+ del_timer_sync(&self->read_timer); ++ group->prepared_seq_num = scheduler->ngrp_to_schedule++; ++ list_move_tail(&group->link_to_schedule, ++ &scheduler->groups_to_schedule); + -+ spin_lock_irqsave(&self->read_lock, flags); ++ group->kctx->csf.sched.ngrp_to_schedule++; ++ count_active_address_space(kbdev, group->kctx); ++ } ++} + -+ tl_reader_reset(self); ++static void scheduler_rotate(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + -+ spin_unlock_irqrestore(&self->read_lock, flags); ++ lockdep_assert_held(&scheduler->lock); ++ ++ /* Dealing with rotation */ ++ scheduler_rotate_groups(kbdev); ++ scheduler_rotate_ctxs(kbdev); +} + -+void kbase_csf_tl_reader_reset(struct kbase_csf_tl_reader *self) ++static struct kbase_queue_group *get_tock_top_group( ++ struct kbase_csf_scheduler *const scheduler) +{ -+ kbase_csf_tl_reader_flush_buffer(self); -+} -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h -new file mode 100644 -index 000000000..2f8eb1dd4 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h -@@ -0,0 +1,144 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ struct kbase_context *kctx; ++ int i; + -+#ifndef _KBASE_CSFFW_TL_READER_H_ -+#define _KBASE_CSFFW_TL_READER_H_ ++ lockdep_assert_held(&scheduler->lock); ++ for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) { ++ list_for_each_entry(kctx, ++ &scheduler->runnable_kctxs, csf.link) { ++ struct kbase_queue_group *group; + -+#include -+#include -+#include ++ list_for_each_entry(group, ++ &kctx->csf.sched.runnable_groups[i], ++ link) { ++ if (queue_group_idle_locked(group)) ++ continue; + -+/* The number of pages used for CSFFW trace buffer. Can be tweaked. */ -+#define KBASE_CSF_TL_BUFFER_NR_PAGES 128 -+/* CSFFW Timeline read polling minimum period in milliseconds. */ -+#define KBASE_CSF_TL_READ_INTERVAL_MIN 20 -+/* CSFFW Timeline read polling default period in milliseconds. */ -+#define KBASE_CSF_TL_READ_INTERVAL_DEFAULT 200 -+/* CSFFW Timeline read polling maximum period in milliseconds. */ -+#define KBASE_CSF_TL_READ_INTERVAL_MAX (60 * 1000) ++ return group; ++ } ++ } ++ } + -+struct firmware_trace_buffer; -+struct kbase_tlstream; -+struct kbase_device; ++ return NULL; ++} + +/** -+ * struct kbase_csf_tl_reader - CSFFW timeline reader state. ++ * suspend_active_groups_on_powerdown() - Suspend active CSG groups upon ++ * suspend or GPU IDLE. + * -+ * @read_timer: Timer used for periodical tracebufer reading. -+ * @timer_interval: Timer polling period in milliseconds. -+ * @stream: Timeline stream where to the tracebuffer content -+ * is copied. -+ * @kbdev: KBase device. -+ * @trace_buffer: CSF Firmware timeline tracebuffer. -+ * @tl_header: CSFFW Timeline header -+ * @tl_header.data: CSFFW Timeline header content. -+ * @tl_header.size: CSFFW Timeline header size. -+ * @tl_header.btc: CSFFW Timeline header remaining bytes to copy to -+ * the user space. -+ * @ts_converter: Timestamp converter state. -+ * @got_first_event: True, if a CSFFW timelime session has been enabled -+ * and the first event was received. -+ * @is_active: True, if a CSFFW timelime session has been enabled. -+ * @expected_event_id: The last 16 bit event ID received from CSFFW. It -+ * is only valid when got_first_event is true. -+ * @read_buffer: Temporary buffer used for CSFFW timeline data -+ * reading from the tracebufer. -+ * @read_lock: CSFFW timeline reader lock. ++ * @kbdev: Pointer to the device ++ * @system_suspend: Flag to indicate it's for system suspend. ++ * ++ * This function will suspend all active CSG groups upon either ++ * system suspend, runtime suspend or GPU IDLE. ++ * ++ * Return: 0 on success, -1 otherwise. + */ -+struct kbase_csf_tl_reader { -+ struct timer_list read_timer; -+ u32 timer_interval; -+ struct kbase_tlstream *stream; ++static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, ++ bool system_suspend) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 }; + -+ struct kbase_device *kbdev; -+ struct firmware_trace_buffer *trace_buffer; -+ struct { -+ const char *data; -+ size_t size; -+ size_t btc; -+ } tl_header; ++ int ret = suspend_active_queue_groups(kbdev, slot_mask); + -+ bool got_first_event; -+ bool is_active; -+ u16 expected_event_id; ++ if (unlikely(ret)) { ++ const int csg_nr = ffs(slot_mask[0]) - 1; ++ struct kbase_queue_group *group = ++ scheduler->csg_slots[csg_nr].resident_group; ++ enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT; + -+ u8 read_buffer[PAGE_SIZE * KBASE_CSF_TL_BUFFER_NR_PAGES]; -+ spinlock_t read_lock; -+}; ++ /* The suspend of CSGs failed, ++ * trigger the GPU reset to be in a deterministic state. ++ */ ++ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n", ++ kbase_backend_get_cycle_cnt(kbdev), ++ kbdev->csf.fw_timeout_ms, ++ kbdev->csf.global_iface.group_num, slot_mask); ++ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) ++ error_type = DF_PING_REQUEST_TIMEOUT; ++ schedule_actions_trigger_df(kbdev, group->kctx, error_type); + -+/** -+ * kbase_csf_tl_reader_init() - Initialize CSFFW Timelime Stream Reader. -+ * -+ * @self: CSFFW TL Reader instance. -+ * @stream: Destination timeline stream. -+ */ -+void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self, struct kbase_tlstream *stream); ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(kbdev); + -+/** -+ * kbase_csf_tl_reader_term() - Terminate CSFFW Timelime Stream Reader. -+ * -+ * @self: CSFFW TL Reader instance. -+ */ -+void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self); ++ return -1; ++ } + -+/** -+ * kbase_csf_tl_reader_flush_buffer() - Flush trace from buffer into CSFFW timeline stream. -+ * -+ * @self: CSFFW TL Reader instance. -+ * -+ * Return: Zero on success, negative error code (EBUSY) otherwise -+ */ -+int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self); ++ /* Check if the groups became active whilst the suspend was ongoing, ++ * but only for the case where the system suspend is not in progress ++ */ ++ if (!system_suspend && atomic_read(&scheduler->non_idle_offslot_grps)) ++ return -1; ++ ++ return 0; ++} + +/** -+ * kbase_csf_tl_reader_start() - Start asynchronous copying of CSFFW timeline stream. ++ * all_on_slot_groups_remained_idle - Live check for all groups' idleness + * -+ * @self: CSFFW TL Reader instance. -+ * @kbdev: Kbase device. ++ * @kbdev: Pointer to the device. + * -+ * Return: zero on success, a negative error code otherwise. -+ */ -+int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, struct kbase_device *kbdev); -+ -+/** -+ * kbase_csf_tl_reader_stop() - Stop asynchronous copying of CSFFW timeline stream. ++ * Returns false if any of the queues inside any of the groups that have been ++ * assigned a physical CSG slot have work to execute, or have executed work ++ * since having received a GPU idle notification. This function is used to ++ * handle a rance condition between firmware reporting GPU idle and userspace ++ * submitting more work by directly ringing a doorbell. + * -+ * @self: CSFFW TL Reader instance. ++ * Return: false if any queue inside any resident group has work to be processed ++ * or has processed work since GPU idle event, true otherwise. + */ -+void kbase_csf_tl_reader_stop(struct kbase_csf_tl_reader *self); ++static bool all_on_slot_groups_remained_idle(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ /* All CSGs have the same number of CSs */ ++ size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num; ++ size_t i; + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+/** -+ * kbase_csf_tl_reader_debugfs_init() - Initialize debugfs for CSFFW Timelime Stream Reader. -+ * -+ * @kbdev: Kbase device. -+ */ -+void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev); -+#endif ++ lockdep_assert_held(&scheduler->lock); ++ lockdep_assert_held(&scheduler->interrupt_lock); + -+/** -+ * kbase_csf_tl_reader_reset() - Reset CSFFW timeline reader, it should be called before reset CSFFW. -+ * -+ * @self: CSFFW TL Reader instance. -+ */ -+void kbase_csf_tl_reader_reset(struct kbase_csf_tl_reader *self); ++ for_each_set_bit(i, scheduler->csg_slots_idle_mask, ++ kbdev->csf.global_iface.group_num) { ++ struct kbase_queue_group *const group = ++ scheduler->csg_slots[i].resident_group; ++ size_t j; + -+#endif /* _KBASE_CSFFW_TL_READER_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c -new file mode 100644 -index 000000000..46872f937 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c -@@ -0,0 +1,555 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ for (j = 0; j < max_streams; ++j) { ++ struct kbase_queue const *const queue = ++ group->bound_queues[j]; ++ u64 const *output_addr; ++ u64 cur_extract_ofs; + -+#include "mali_kbase.h" -+#include "mali_kbase_defs.h" -+#include "mali_kbase_csf_firmware.h" -+#include "mali_kbase_csf_trace_buffer.h" -+#include "mali_kbase_reset_gpu.h" -+#include "mali_kbase_csf_tl_reader.h" ++ if (!queue || !queue->user_io_addr) ++ continue; + -+#include -+#include ++ output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE); ++ cur_extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)]; ++ if (cur_extract_ofs != queue->extract_ofs) { ++ /* More work has been executed since the idle ++ * notification. ++ */ ++ return false; ++ } ++ } ++ } + -+/** -+ * struct firmware_trace_buffer - Trace Buffer within the MCU firmware -+ * -+ * @kbdev: Pointer to the Kbase device. -+ * @node: List head linking all trace buffers to -+ * kbase_device:csf.firmware_trace_buffers -+ * @data_mapping: MCU shared memory mapping used for the data buffer. -+ * @updatable: Indicates whether config items can be updated with -+ * FIRMWARE_CONFIG_UPDATE -+ * @type: The type of the trace buffer. -+ * @trace_enable_entry_count: Number of Trace Enable bits. -+ * @gpu_va: Structure containing all the Firmware addresses -+ * that are accessed by the MCU. -+ * @gpu_va.size_address: The address where the MCU shall read the size of -+ * the data buffer. -+ * @gpu_va.insert_address: The address that shall be dereferenced by the MCU -+ * to write the Insert offset. -+ * @gpu_va.extract_address: The address that shall be dereferenced by the MCU -+ * to read the Extract offset. -+ * @gpu_va.data_address: The address that shall be dereferenced by the MCU -+ * to write the Trace Buffer. -+ * @gpu_va.trace_enable: The address where the MCU shall read the array of -+ * Trace Enable bits describing which trace points -+ * and features shall be enabled. -+ * @cpu_va: Structure containing CPU addresses of variables -+ * which are permanently mapped on the CPU address -+ * space. -+ * @cpu_va.insert_cpu_va: CPU virtual address of the Insert variable. -+ * @cpu_va.extract_cpu_va: CPU virtual address of the Extract variable. -+ * @num_pages: Size of the data buffer, in pages. -+ * @trace_enable_init_mask: Initial value for the trace enable bit mask. -+ * @name: NULL terminated string which contains the name of the trace buffer. -+ * -+ * The firmware relays information to the host by writing on memory buffers -+ * which are allocated and partially configured by the host. These buffers -+ * are called Trace Buffers: each of them has a specific purpose and is -+ * identified by a name and a set of memory addresses where the host can -+ * set pointers to host-allocated structures. -+ */ -+struct firmware_trace_buffer { -+ struct kbase_device *kbdev; -+ struct list_head node; -+ struct kbase_csf_mapping data_mapping; -+ bool updatable; -+ u32 type; -+ u32 trace_enable_entry_count; -+ struct gpu_va { -+ u32 size_address; -+ u32 insert_address; -+ u32 extract_address; -+ u32 data_address; -+ u32 trace_enable; -+ } gpu_va; -+ struct cpu_va { -+ u32 *insert_cpu_va; -+ u32 *extract_cpu_va; -+ } cpu_va; -+ u32 num_pages; -+ u32 trace_enable_init_mask[CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX]; -+ char name[1]; /* this field must be last */ -+}; ++ return true; ++} + -+/** -+ * struct firmware_trace_buffer_data - Configuration data for trace buffers -+ * -+ * @name: Name identifier of the trace buffer -+ * @trace_enable_init_mask: Initial value to assign to the trace enable bits -+ * @size: Size of the data buffer to allocate for the trace buffer, in pages. -+ * The size of a data buffer must always be a power of 2. -+ * -+ * Describe how to set up a trace buffer interface. -+ * Trace buffers are identified by name and they require a data buffer and -+ * an initial mask of values for the trace enable bits. -+ */ -+struct firmware_trace_buffer_data { -+ char name[64]; -+ u32 trace_enable_init_mask[CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX]; -+ size_t size; -+}; ++static bool scheduler_idle_suspendable(struct kbase_device *kbdev) ++{ ++ bool suspend; ++ unsigned long flags; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + -+/* -+ * Table of configuration data for trace buffers. -+ * -+ * This table contains the configuration data for the trace buffers that are -+ * expected to be parsed from the firmware. -+ */ -+static const struct firmware_trace_buffer_data trace_buffer_data[] = { -+#if MALI_UNIT_TEST -+ { "fwutf", { 0 }, 1 }, -+#endif -+ { FIRMWARE_LOG_BUF_NAME, { 0 }, 4 }, -+ { "benchmark", { 0 }, 2 }, -+ { "timeline", { 0 }, KBASE_CSF_TL_BUFFER_NR_PAGES }, -+}; ++ lockdep_assert_held(&scheduler->lock); + -+int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev) -+{ -+ struct firmware_trace_buffer *trace_buffer; -+ int ret = 0; -+ u32 mcu_rw_offset = 0, mcu_write_offset = 0; -+ const u32 cache_line_alignment = kbase_get_cache_line_alignment(kbdev); ++ if ((scheduler->state == SCHED_SUSPENDED) || ++ (scheduler->state == SCHED_SLEEPING)) ++ return false; + -+ if (list_empty(&kbdev->csf.firmware_trace_buffers.list)) { -+ dev_dbg(kbdev->dev, "No trace buffers to initialise\n"); -+ return 0; -+ } ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ spin_lock(&scheduler->interrupt_lock); + -+ /* GPU-readable,writable memory used for Extract variables */ -+ ret = kbase_csf_firmware_mcu_shared_mapping_init( -+ kbdev, 1, PROT_WRITE, -+ KBASE_REG_GPU_RD | KBASE_REG_GPU_WR, -+ &kbdev->csf.firmware_trace_buffers.mcu_rw); -+ if (ret != 0) { -+ dev_err(kbdev->dev, "Failed to map GPU-rw MCU shared memory\n"); -+ goto out; -+ } ++ if (scheduler->fast_gpu_idle_handling) { ++ scheduler->fast_gpu_idle_handling = false; + -+ /* GPU-writable memory used for Insert variables */ -+ ret = kbase_csf_firmware_mcu_shared_mapping_init( -+ kbdev, 1, PROT_READ, KBASE_REG_GPU_WR, -+ &kbdev->csf.firmware_trace_buffers.mcu_write); -+ if (ret != 0) { -+ dev_err(kbdev->dev, "Failed to map GPU-writable MCU shared memory\n"); -+ goto out; ++ if (scheduler->total_runnable_grps) { ++ suspend = !atomic_read(&scheduler->non_idle_offslot_grps) && ++ kbase_pm_idle_groups_sched_suspendable(kbdev); ++ } else ++ suspend = kbase_pm_no_runnables_sched_suspendable(kbdev); ++ spin_unlock(&scheduler->interrupt_lock); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ return suspend; + } + -+ list_for_each_entry(trace_buffer, &kbdev->csf.firmware_trace_buffers.list, node) { -+ u32 extract_gpu_va, insert_gpu_va, data_buffer_gpu_va, -+ trace_enable_size_dwords; -+ u32 *extract_cpu_va, *insert_cpu_va; -+ unsigned int i; ++ if (scheduler->total_runnable_grps) { + -+ /* GPU-writable data buffer for the individual trace buffer */ -+ ret = kbase_csf_firmware_mcu_shared_mapping_init( -+ kbdev, trace_buffer->num_pages, PROT_READ, KBASE_REG_GPU_WR, -+ &trace_buffer->data_mapping); -+ if (ret) { -+ dev_err(kbdev->dev, "Failed to map GPU-writable MCU shared memory for a trace buffer\n"); -+ goto out; -+ } ++ /* Check both on-slots and off-slots groups idle status */ ++ suspend = kbase_csf_scheduler_all_csgs_idle(kbdev) && ++ !atomic_read(&scheduler->non_idle_offslot_grps) && ++ kbase_pm_idle_groups_sched_suspendable(kbdev); ++ } else ++ suspend = kbase_pm_no_runnables_sched_suspendable(kbdev); + -+ extract_gpu_va = -+ (kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) + -+ mcu_rw_offset; -+ extract_cpu_va = (u32 *)( -+ kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr + -+ mcu_rw_offset); -+ insert_gpu_va = -+ (kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn << PAGE_SHIFT) + -+ mcu_write_offset; -+ insert_cpu_va = (u32 *)( -+ kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr + -+ mcu_write_offset); -+ data_buffer_gpu_va = -+ (trace_buffer->data_mapping.va_reg->start_pfn << PAGE_SHIFT); ++ /* Confirm that all groups are actually idle before proceeding with ++ * suspension as groups might potentially become active again without ++ * informing the scheduler in case userspace rings a doorbell directly. ++ */ ++ if (suspend && (unlikely(atomic_read(&scheduler->gpu_no_longer_idle)) || ++ unlikely(!all_on_slot_groups_remained_idle(kbdev)))) ++ suspend = false; + -+ /* Initialize the Extract variable */ -+ *extract_cpu_va = 0; ++ spin_unlock(&scheduler->interrupt_lock); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ /* Each FW address shall be mapped and set individually, as we can't -+ * assume anything about their location in the memory address space. -+ */ -+ kbase_csf_update_firmware_memory( -+ kbdev, trace_buffer->gpu_va.data_address, data_buffer_gpu_va); -+ kbase_csf_update_firmware_memory( -+ kbdev, trace_buffer->gpu_va.insert_address, insert_gpu_va); -+ kbase_csf_update_firmware_memory( -+ kbdev, trace_buffer->gpu_va.extract_address, extract_gpu_va); -+ kbase_csf_update_firmware_memory( -+ kbdev, trace_buffer->gpu_va.size_address, -+ trace_buffer->num_pages << PAGE_SHIFT); ++ return suspend; ++} + -+ trace_enable_size_dwords = -+ (trace_buffer->trace_enable_entry_count + 31) >> 5; ++#ifdef KBASE_PM_RUNTIME ++/** ++ * scheduler_sleep_on_idle - Put the Scheduler in sleeping state on GPU ++ * becoming idle. ++ * ++ * @kbdev: Pointer to the device. ++ * ++ * This function is called on GPU idle notification to trigger the transition of ++ * GPU to sleep state, where MCU firmware pauses execution and L2 cache is ++ * turned off. Scheduler's state is changed to sleeping and all the active queue ++ * groups remain on the CSG slots. ++ */ ++static void scheduler_sleep_on_idle(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + -+ for (i = 0; i < trace_enable_size_dwords; i++) { -+ kbase_csf_update_firmware_memory( -+ kbdev, trace_buffer->gpu_va.trace_enable + i*4, -+ trace_buffer->trace_enable_init_mask[i]); -+ } ++ lockdep_assert_held(&scheduler->lock); + -+ /* Store CPU virtual addresses for permanently mapped variables */ -+ trace_buffer->cpu_va.insert_cpu_va = insert_cpu_va; -+ trace_buffer->cpu_va.extract_cpu_va = extract_cpu_va; ++ dev_dbg(kbdev->dev, ++ "Scheduler to be put to sleep on GPU becoming idle"); ++ cancel_tick_timer(kbdev); ++ scheduler_pm_idle_before_sleep(kbdev); ++ scheduler->state = SCHED_SLEEPING; ++ KBASE_KTRACE_ADD(kbdev, SCHED_SLEEPING, NULL, scheduler->state); ++} ++#endif + -+ /* Update offsets */ -+ mcu_write_offset += cache_line_alignment; -+ mcu_rw_offset += cache_line_alignment; ++/** ++ * scheduler_suspend_on_idle - Put the Scheduler in suspended state on GPU ++ * becoming idle. ++ * ++ * @kbdev: Pointer to the device. ++ * ++ * This function is called on GPU idle notification to trigger the power down of ++ * GPU. Scheduler's state is changed to suspended and all the active queue ++ * groups are suspended before halting the MCU firmware. ++ * ++ * Return: true if scheduler will be suspended or false if suspend is aborted. ++ */ ++static bool scheduler_suspend_on_idle(struct kbase_device *kbdev) ++{ ++ int ret = suspend_active_groups_on_powerdown(kbdev, false); ++ ++ if (ret) { ++ dev_dbg(kbdev->dev, "Aborting suspend scheduler (grps: %d)", ++ atomic_read( ++ &kbdev->csf.scheduler.non_idle_offslot_grps)); ++ /* Bring forward the next tick */ ++ kbase_csf_scheduler_tick_advance(kbdev); ++ return false; + } + -+out: -+ return ret; ++ dev_dbg(kbdev->dev, "Scheduler to be suspended on GPU becoming idle"); ++ scheduler_suspend(kbdev); ++ cancel_tick_timer(kbdev); ++ return true; +} + -+void kbase_csf_firmware_trace_buffers_term(struct kbase_device *kbdev) ++static void gpu_idle_worker(struct work_struct *work) +{ -+ if (list_empty(&kbdev->csf.firmware_trace_buffers.list)) ++ struct kbase_device *kbdev = container_of( ++ work, struct kbase_device, csf.scheduler.gpu_idle_work); ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ bool scheduler_is_idle_suspendable = false; ++ bool all_groups_suspended = false; ++ ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_START, NULL, 0u); ++ ++#define __ENCODE_KTRACE_INFO(reset, idle, all_suspend) \ ++ (((u32)reset) | (((u32)idle) << 4) | (((u32)all_suspend) << 8)) ++ ++ if (kbase_reset_gpu_try_prevent(kbdev)) { ++ dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n"); ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL, ++ __ENCODE_KTRACE_INFO(true, false, false)); + return; ++ } ++ kbase_debug_csf_fault_wait_completion(kbdev); ++ mutex_lock(&scheduler->lock); + -+ while (!list_empty(&kbdev->csf.firmware_trace_buffers.list)) { -+ struct firmware_trace_buffer *trace_buffer; ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ if (unlikely(scheduler->state == SCHED_BUSY)) { ++ mutex_unlock(&scheduler->lock); ++ kbase_reset_gpu_allow(kbdev); ++ return; ++ } ++#endif + -+ trace_buffer = list_first_entry(&kbdev->csf.firmware_trace_buffers.list, -+ struct firmware_trace_buffer, node); -+ kbase_csf_firmware_mcu_shared_mapping_term(kbdev, &trace_buffer->data_mapping); -+ list_del(&trace_buffer->node); ++ scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev); ++ if (scheduler_is_idle_suspendable) { ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_HANDLING_START, NULL, ++ kbase_csf_ktrace_gpu_cycle_cnt(kbdev)); ++#ifdef KBASE_PM_RUNTIME ++ if (kbase_pm_gpu_sleep_allowed(kbdev) && ++ kbase_csf_scheduler_get_nr_active_csgs(kbdev)) ++ scheduler_sleep_on_idle(kbdev); ++ else ++#endif ++ all_groups_suspended = scheduler_suspend_on_idle(kbdev); + -+ kfree(trace_buffer); ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_HANDLING_END, NULL, 0u); + } + -+ kbase_csf_firmware_mcu_shared_mapping_term( -+ kbdev, &kbdev->csf.firmware_trace_buffers.mcu_rw); -+ kbase_csf_firmware_mcu_shared_mapping_term( -+ kbdev, &kbdev->csf.firmware_trace_buffers.mcu_write); ++ mutex_unlock(&scheduler->lock); ++ kbase_reset_gpu_allow(kbdev); ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL, ++ __ENCODE_KTRACE_INFO(false, scheduler_is_idle_suspendable, ++ all_groups_suspended)); ++#undef __ENCODE_KTRACE_INFO +} + -+int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev, -+ const u32 *entry, -+ unsigned int size, -+ bool updatable) ++static int scheduler_prepare(struct kbase_device *kbdev) +{ -+ const char *name = (char *)&entry[7]; -+ const unsigned int name_len = size - TRACE_BUFFER_ENTRY_NAME_OFFSET; -+ struct firmware_trace_buffer *trace_buffer; -+ unsigned int i; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ unsigned long flags; ++ int i; + -+ /* Allocate enough space for struct firmware_trace_buffer and the -+ * trace buffer name (with NULL termination). -+ */ -+ trace_buffer = -+ kmalloc(sizeof(*trace_buffer) + name_len + 1, GFP_KERNEL); ++ lockdep_assert_held(&scheduler->lock); + -+ if (!trace_buffer) -+ return -ENOMEM; ++ /* Empty the groups_to_schedule */ ++ while (!list_empty(&scheduler->groups_to_schedule)) { ++ struct kbase_queue_group *grp = ++ list_first_entry(&scheduler->groups_to_schedule, ++ struct kbase_queue_group, ++ link_to_schedule); + -+ memcpy(&trace_buffer->name, name, name_len); -+ trace_buffer->name[name_len] = '\0'; ++ remove_scheduled_group(kbdev, grp); ++ } + -+ for (i = 0; i < ARRAY_SIZE(trace_buffer_data); i++) { -+ if (!strcmp(trace_buffer_data[i].name, trace_buffer->name)) { -+ unsigned int j; ++ /* Pre-scan init scheduler fields */ ++ if (WARN_ON(scheduler->ngrp_to_schedule != 0)) ++ scheduler->ngrp_to_schedule = 0; ++ scheduler->top_ctx = NULL; ++ scheduler->top_grp = NULL; ++ scheduler->csg_scan_count_for_tick = 0; ++ WARN_ON(!list_empty(&scheduler->idle_groups_to_schedule)); ++ scheduler->num_active_address_spaces = 0; ++ scheduler->num_csg_slots_for_tick = 0; ++ bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS); + -+ trace_buffer->kbdev = kbdev; -+ trace_buffer->updatable = updatable; -+ trace_buffer->type = entry[0]; -+ trace_buffer->gpu_va.size_address = entry[1]; -+ trace_buffer->gpu_va.insert_address = entry[2]; -+ trace_buffer->gpu_va.extract_address = entry[3]; -+ trace_buffer->gpu_va.data_address = entry[4]; -+ trace_buffer->gpu_va.trace_enable = entry[5]; -+ trace_buffer->trace_enable_entry_count = entry[6]; -+ trace_buffer->num_pages = trace_buffer_data[i].size; ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ scheduler->tick_protm_pending_seq = ++ KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID; ++ /* Scan out to run groups */ ++ for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) { ++ struct kbase_context *kctx; + -+ for (j = 0; j < CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX; j++) { -+ trace_buffer->trace_enable_init_mask[j] = -+ trace_buffer_data[i].trace_enable_init_mask[j]; -+ } -+ break; -+ } ++ list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link) ++ scheduler_ctx_scan_groups(kbdev, kctx, i); + } ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + -+ if (i < ARRAY_SIZE(trace_buffer_data)) { -+ list_add(&trace_buffer->node, &kbdev->csf.firmware_trace_buffers.list); -+ dev_dbg(kbdev->dev, "Trace buffer '%s'", trace_buffer->name); -+ } else { -+ dev_dbg(kbdev->dev, "Unknown trace buffer '%s'", trace_buffer->name); -+ kfree(trace_buffer); -+ } ++ /* Update this tick's non-idle groups */ ++ scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule; ++ ++ /* Initial number of non-idle off-slot groups, before the scheduler's ++ * scheduler_apply() operation. This gives a sensible start point view ++ * of the tick. It will be subject to up/downs during the scheduler ++ * active phase. ++ */ ++ atomic_set(&scheduler->non_idle_offslot_grps, ++ scheduler->non_idle_scanout_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, NULL, ++ scheduler->non_idle_scanout_grps); ++ ++ /* Adds those idle but runnable groups to the scanout list */ ++ scheduler_scan_idle_groups(kbdev); ++ ++ WARN_ON(scheduler->csg_scan_count_for_tick < scheduler->ngrp_to_schedule); + ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, ++ scheduler->num_active_address_spaces | ++ (((u64)scheduler->ngrp_to_schedule) << 32)); ++ set_max_csg_slots(kbdev); ++ dev_dbg(kbdev->dev, "prepared groups length: %u, num_active_address_spaces: %u\n", ++ scheduler->ngrp_to_schedule, scheduler->num_active_address_spaces); + return 0; +} + -+void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev) ++/** ++ * keep_lru_on_slots() - Check the condition for LRU is met. ++ * ++ * @kbdev: Pointer to the device. ++ * ++ * This function tries to maintain the Last-Recent-Use case on slots, when ++ * the scheduler has no non-idle off-slot CSGs for a replacement ++ * consideration. This effectively extends the previous scheduling results ++ * for the new one. That is, the last recent used CSGs are retained on slots ++ * for the new tick/tock action. ++ * ++ * Return: true for avoiding on-slot CSGs changes (i.e. keep existing LRU), ++ * otherwise false. ++ */ ++static bool keep_lru_on_slots(struct kbase_device *kbdev) +{ -+ struct firmware_trace_buffer *trace_buffer; -+ u32 mcu_rw_offset = 0, mcu_write_offset = 0; -+ const u32 cache_line_alignment = kbase_get_cache_line_alignment(kbdev); -+ -+ list_for_each_entry(trace_buffer, &kbdev->csf.firmware_trace_buffers.list, node) { -+ u32 extract_gpu_va, insert_gpu_va, data_buffer_gpu_va, -+ trace_enable_size_dwords; -+ u32 *extract_cpu_va, *insert_cpu_va; -+ unsigned int i; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ bool keep_lru = false; ++ int on_slots = bitmap_weight(scheduler->csg_inuse_bitmap, ++ kbdev->csf.global_iface.group_num); + -+ /* Rely on the fact that all required mappings already exist */ -+ extract_gpu_va = -+ (kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) + -+ mcu_rw_offset; -+ extract_cpu_va = (u32 *)( -+ kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr + -+ mcu_rw_offset); -+ insert_gpu_va = -+ (kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn << PAGE_SHIFT) + -+ mcu_write_offset; -+ insert_cpu_va = (u32 *)( -+ kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr + -+ mcu_write_offset); -+ data_buffer_gpu_va = -+ (trace_buffer->data_mapping.va_reg->start_pfn << PAGE_SHIFT); ++ lockdep_assert_held(&scheduler->lock); + -+ /* Notice that the function only re-updates firmware memory locations -+ * with information that allows access to the trace buffers without -+ * really resetting their state. For instance, the Insert offset will -+ * not change and, as a consequence, the Extract offset is not going -+ * to be reset to keep consistency. -+ */ ++ if (on_slots && !atomic_read(&scheduler->non_idle_offslot_grps)) { ++ unsigned long flags; + -+ /* Each FW address shall be mapped and set individually, as we can't -+ * assume anything about their location in the memory address space. ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ /* All on-slots are idle, no non-idle off-slot CSGs available ++ * for considering a meaningful change. Set keep_lru. + */ -+ kbase_csf_update_firmware_memory( -+ kbdev, trace_buffer->gpu_va.data_address, data_buffer_gpu_va); -+ kbase_csf_update_firmware_memory( -+ kbdev, trace_buffer->gpu_va.insert_address, insert_gpu_va); -+ kbase_csf_update_firmware_memory( -+ kbdev, trace_buffer->gpu_va.extract_address, extract_gpu_va); -+ kbase_csf_update_firmware_memory( -+ kbdev, trace_buffer->gpu_va.size_address, -+ trace_buffer->num_pages << PAGE_SHIFT); -+ -+ trace_enable_size_dwords = -+ (trace_buffer->trace_enable_entry_count + 31) >> 5; -+ -+ for (i = 0; i < trace_enable_size_dwords; i++) { -+ kbase_csf_update_firmware_memory( -+ kbdev, trace_buffer->gpu_va.trace_enable + i*4, -+ trace_buffer->trace_enable_init_mask[i]); -+ } ++ keep_lru = kbase_csf_scheduler_all_csgs_idle(kbdev); + -+ /* Store CPU virtual addresses for permanently mapped variables, -+ * as they might have slightly changed. -+ */ -+ trace_buffer->cpu_va.insert_cpu_va = insert_cpu_va; -+ trace_buffer->cpu_va.extract_cpu_va = extract_cpu_va; ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + -+ /* Update offsets */ -+ mcu_write_offset += cache_line_alignment; -+ mcu_rw_offset += cache_line_alignment; ++ dev_dbg(kbdev->dev, "Keep_LRU: %d, CSGs on-slots: %d\n", ++ keep_lru, on_slots); + } ++ ++ return keep_lru; +} + -+struct firmware_trace_buffer *kbase_csf_firmware_get_trace_buffer( -+ struct kbase_device *kbdev, const char *name) ++/** ++ * prepare_fast_local_tock() - making preparation arrangement for exercizing ++ * a fast local tock inside scheduling-actions. ++ * ++ * @kbdev: Pointer to the GPU device. ++ * ++ * The function assumes that a scheduling action of firing a fast local tock ++ * call (i.e. an equivalent tock action without dropping the lock) is desired ++ * if there are idle onslot CSGs. The function updates those affected CSGs' ++ * run-state as a preparation. This should only be called from inside the ++ * schedule_actions(), where the previous idle-flags are still considered to ++ * be reflective, following its earlier idle confirmation operational call, ++ * plus some potential newly idle CSGs in the scheduling action committing ++ * steps. ++ * ++ * Return: number of on-slots CSGs that can be considered for replacing. ++ */ ++static int prepare_fast_local_tock(struct kbase_device *kbdev) +{ -+ struct firmware_trace_buffer *trace_buffer; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ unsigned long flags, i; ++ DECLARE_BITMAP(csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 }; + -+ list_for_each_entry(trace_buffer, &kbdev->csf.firmware_trace_buffers.list, node) { -+ if (!strcmp(trace_buffer->name, name)) -+ return trace_buffer; ++ lockdep_assert_held(&scheduler->lock); ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ bitmap_copy(csg_bitmap, scheduler->csg_slots_idle_mask, num_groups); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ ++ /* Marking the flagged idle CSGs' run state to IDLE, so ++ * the intended fast local tock can replacing them with off-slots ++ * non-idle CSGs. ++ */ ++ for_each_set_bit(i, csg_bitmap, num_groups) { ++ struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; ++ struct kbase_queue_group *group = csg_slot->resident_group; ++ ++ if (!queue_group_idle_locked(group)) { ++ group->run_state = KBASE_CSF_GROUP_IDLE; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_IDLE, group, group->run_state); ++ } + } + -+ return NULL; ++ /* Return the number of idle slots for potential replacement */ ++ return bitmap_weight(csg_bitmap, num_groups); +} -+EXPORT_SYMBOL(kbase_csf_firmware_get_trace_buffer); + -+unsigned int kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count( -+ const struct firmware_trace_buffer *trace_buffer) ++static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slot_mask, ++ unsigned int timeout_ms) +{ -+ return trace_buffer->trace_enable_entry_count; -+} -+EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count); ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ long remaining = kbase_csf_timeout_in_jiffies(timeout_ms); ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ int err = 0; ++ DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS); + -+static void kbasep_csf_firmware_trace_buffer_update_trace_enable_bit( -+ struct firmware_trace_buffer *tb, unsigned int bit, bool value) -+{ -+ struct kbase_device *kbdev = tb->kbdev; ++ lockdep_assert_held(&scheduler->lock); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS); + -+ if (bit < tb->trace_enable_entry_count) { -+ unsigned int trace_enable_reg_offset = bit >> 5; -+ u32 trace_enable_bit_mask = 1u << (bit & 0x1F); ++ while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS) && remaining) { ++ DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); + -+ if (value) { -+ tb->trace_enable_init_mask[trace_enable_reg_offset] |= -+ trace_enable_bit_mask; -+ } else { -+ tb->trace_enable_init_mask[trace_enable_reg_offset] &= -+ ~trace_enable_bit_mask; -+ } ++ bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS); + -+ /* This is not strictly needed as the caller is supposed to -+ * reload the firmware image (through GPU reset) after updating -+ * the bitmask. Otherwise there is no guarantee that firmware -+ * will take into account the updated bitmask for all types of -+ * trace buffers, since firmware could continue to use the -+ * value of bitmask it cached after the boot. -+ */ -+ kbase_csf_update_firmware_memory( -+ kbdev, -+ tb->gpu_va.trace_enable + trace_enable_reg_offset * 4, -+ tb->trace_enable_init_mask[trace_enable_reg_offset]); -+ } -+} ++ remaining = wait_event_timeout( ++ kbdev->csf.event_wait, ++ slots_state_changed(kbdev, changed, csg_slot_stopped_locked), remaining); + -+int kbase_csf_firmware_trace_buffer_update_trace_enable_bit( -+ struct firmware_trace_buffer *tb, unsigned int bit, bool value) -+{ -+ struct kbase_device *kbdev = tb->kbdev; -+ int err = 0; -+ unsigned long flags; ++ if (likely(remaining)) { ++ u32 i; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ for_each_set_bit(i, changed, num_groups) { ++ struct kbase_queue_group *group; + -+ /* If trace buffer update cannot be performed with -+ * FIRMWARE_CONFIG_UPDATE then we need to do a -+ * silent reset before we update the memory. -+ */ -+ if (!tb->updatable) { -+ /* If there is already a GPU reset pending then inform -+ * the User to retry the update. -+ */ -+ if (kbase_reset_gpu_silent(kbdev)) { ++ if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) ++ continue; ++ ++ /* The on slot csg is now stopped */ ++ clear_bit(i, slot_mask_local); ++ ++ KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( ++ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i); ++ ++ group = scheduler->csg_slots[i].resident_group; ++ if (likely(group)) { ++ /* Only do save/cleanup if the ++ * group is not terminated during ++ * the sleep. ++ */ ++ save_csg_slot(group); ++ if (cleanup_csg_slot(group)) ++ sched_evict_group(group, true, true); ++ } ++ } ++ } else { + dev_warn( + kbdev->dev, -+ "GPU reset already in progress when enabling firmware timeline."); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ return -EAGAIN; ++ "[%llu] Suspend request sent on CSG slots 0x%lx timed out for slots 0x%lx", ++ kbase_backend_get_cycle_cnt(kbdev), slot_mask[0], ++ slot_mask_local[0]); ++ /* Return the bitmask of the timed out slots to the caller */ ++ bitmap_copy(slot_mask, slot_mask_local, MAX_SUPPORTED_CSGS); ++ ++ err = -ETIMEDOUT; + } + } + -+ kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(tb, bit, -+ value); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ if (tb->updatable) -+ err = kbase_csf_trigger_firmware_config_update(kbdev); -+ + return err; +} -+EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_update_trace_enable_bit); -+ -+bool kbase_csf_firmware_trace_buffer_is_empty( -+ const struct firmware_trace_buffer *trace_buffer) -+{ -+ return *(trace_buffer->cpu_va.insert_cpu_va) == -+ *(trace_buffer->cpu_va.extract_cpu_va); -+} -+EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_is_empty); + -+unsigned int kbase_csf_firmware_trace_buffer_read_data( -+ struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes) ++/** ++ * evict_lru_or_blocked_csg() - Evict the least-recently-used idle or blocked CSG ++ * ++ * @kbdev: Pointer to the device ++ * ++ * Used to allow for speedier starting/resumption of another CSG. The worst-case ++ * scenario of the evicted CSG being scheduled next is expected to be rare. ++ * Also, the eviction will not be applied if the GPU is running in protected mode. ++ * Otherwise the the eviction attempt would force the MCU to quit the execution of ++ * the protected mode, and likely re-request to enter it again. ++ */ ++static void evict_lru_or_blocked_csg(struct kbase_device *kbdev) +{ -+ unsigned int bytes_copied; -+ u8 *data_cpu_va = trace_buffer->data_mapping.cpu_addr; -+ u32 extract_offset = *(trace_buffer->cpu_va.extract_cpu_va); -+ u32 insert_offset = *(trace_buffer->cpu_va.insert_cpu_va); -+ u32 buffer_size = trace_buffer->num_pages << PAGE_SHIFT; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ size_t i; ++ struct kbase_queue_group *lru_idle_group = NULL; ++ const u32 total_csg_slots = kbdev->csf.global_iface.group_num; ++ const bool all_addr_spaces_used = (scheduler->num_active_address_spaces >= ++ (kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS)); ++ u8 as_usage[BASE_MAX_NR_AS] = { 0 }; + -+ if (insert_offset >= extract_offset) { -+ bytes_copied = min_t(unsigned int, num_bytes, -+ (insert_offset - extract_offset)); -+ memcpy(data, &data_cpu_va[extract_offset], bytes_copied); -+ extract_offset += bytes_copied; -+ } else { -+ unsigned int bytes_copied_head, bytes_copied_tail; ++ lockdep_assert_held(&scheduler->lock); ++ if (kbase_csf_scheduler_protected_mode_in_use(kbdev)) ++ return; + -+ bytes_copied_tail = min_t(unsigned int, num_bytes, -+ (buffer_size - extract_offset)); -+ memcpy(data, &data_cpu_va[extract_offset], bytes_copied_tail); ++ BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(int) * BITS_PER_BYTE)); ++ if (fls(scheduler->csg_inuse_bitmap[0]) != total_csg_slots) ++ return; /* Some CSG slots remain unused */ + -+ bytes_copied_head = min_t(unsigned int, -+ (num_bytes - bytes_copied_tail), insert_offset); -+ memcpy(&data[bytes_copied_tail], data_cpu_va, bytes_copied_head); ++ if (all_addr_spaces_used) { ++ for (i = 0; i != total_csg_slots; ++i) { ++ if (scheduler->csg_slots[i].resident_group != NULL) { ++ if (WARN_ON(scheduler->csg_slots[i].resident_group->kctx->as_nr < ++ 0)) ++ continue; + -+ bytes_copied = bytes_copied_head + bytes_copied_tail; -+ extract_offset += bytes_copied; -+ if (extract_offset >= buffer_size) -+ extract_offset = bytes_copied_head; ++ as_usage[scheduler->csg_slots[i].resident_group->kctx->as_nr]++; ++ } ++ } + } + -+ *(trace_buffer->cpu_va.extract_cpu_va) = extract_offset; ++ for (i = 0; i != total_csg_slots; ++i) { ++ struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group; + -+ return bytes_copied; -+} -+EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_read_data); ++ /* We expect that by this point all groups would normally be ++ * assigned a physical CSG slot, but if circumstances have ++ * changed then bail out of this optimisation. ++ */ ++ if (group == NULL) ++ return; + -+static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, u64 mask) -+{ -+ unsigned int i; ++ /* Real-time priority CSGs must be kept on-slot even when ++ * idle. ++ */ ++ if ((group->run_state == KBASE_CSF_GROUP_IDLE) && ++ (group->priority != BASE_QUEUE_GROUP_PRIORITY_REALTIME) && ++ ((lru_idle_group == NULL) || ++ (lru_idle_group->prepared_seq_num < group->prepared_seq_num))) { ++ if (WARN_ON(group->kctx->as_nr < 0)) ++ continue; + -+ for (i = 0; i < tb->trace_enable_entry_count; i++) -+ kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(tb, i, (mask >> i) & 1); -+} ++ /* If all address spaces are used, we need to ensure the group does not ++ * share the AS with other active CSGs. Or CSG would be freed without AS ++ * and this optimization would not work. ++ */ ++ if ((!all_addr_spaces_used) || (as_usage[group->kctx->as_nr] == 1)) ++ lru_idle_group = group; ++ } ++ } + -+#define U32_BITS 32 -+u64 kbase_csf_firmware_trace_buffer_get_active_mask64(struct firmware_trace_buffer *tb) -+{ -+ u64 active_mask = tb->trace_enable_init_mask[0]; ++ if (lru_idle_group != NULL) { ++ unsigned long slot_mask = 1 << lru_idle_group->csg_nr; + -+ if (tb->trace_enable_entry_count > U32_BITS) -+ active_mask |= (u64)tb->trace_enable_init_mask[1] << U32_BITS; ++ dev_dbg(kbdev->dev, "Suspending LRU idle group %d of context %d_%d on slot %d", ++ lru_idle_group->handle, lru_idle_group->kctx->tgid, ++ lru_idle_group->kctx->id, lru_idle_group->csg_nr); ++ suspend_queue_group(lru_idle_group); ++ if (wait_csg_slots_suspend(kbdev, &slot_mask, kbdev->csf.fw_timeout_ms)) { ++ enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT; + -+ return active_mask; ++ dev_warn( ++ kbdev->dev, ++ "[%llu] LRU idle group %d of context %d_%d failed to suspend on slot %d (timeout %d ms)", ++ kbase_backend_get_cycle_cnt(kbdev), lru_idle_group->handle, ++ lru_idle_group->kctx->tgid, lru_idle_group->kctx->id, ++ lru_idle_group->csg_nr, kbdev->csf.fw_timeout_ms); ++ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) ++ error_type = DF_PING_REQUEST_TIMEOUT; ++ schedule_actions_trigger_df(kbdev, lru_idle_group->kctx, error_type); ++ } ++ } +} + -+int kbase_csf_firmware_trace_buffer_set_active_mask64(struct firmware_trace_buffer *tb, u64 mask) ++static void schedule_actions(struct kbase_device *kbdev, bool is_tick) +{ -+ struct kbase_device *kbdev = tb->kbdev; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + unsigned long flags; -+ int err = 0; ++ struct kbase_queue_group *protm_grp; ++ int ret; ++ bool skip_scheduling_actions; ++ bool skip_idle_slots_update; ++ bool new_protm_top_grp = false; ++ int local_tock_slots = 0; + -+ if (!tb->updatable) { -+ /* If there is already a GPU reset pending, need a retry */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ if (kbase_reset_gpu_silent(kbdev)) -+ err = -EAGAIN; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ kbase_reset_gpu_assert_prevented(kbdev); ++ lockdep_assert_held(&scheduler->lock); ++ ++ ret = kbase_csf_scheduler_wait_mcu_active(kbdev); ++ if (ret) { ++ dev_err(kbdev->dev, ++ "Wait for MCU power on failed on scheduling tick/tock"); ++ return; + } + -+ if (!err) { -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ update_trace_buffer_active_mask64(tb, mask); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ skip_idle_slots_update = kbase_csf_scheduler_protected_mode_in_use(kbdev); ++ skip_scheduling_actions = ++ !skip_idle_slots_update && kbdev->protected_mode; ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + -+ /* if we can update the config we need to just trigger -+ * FIRMWARE_CONFIG_UPDATE. -+ */ -+ if (tb->updatable) -+ err = kbase_csf_trigger_firmware_config_update(kbdev); ++ /* Skip scheduling actions as GPU reset hasn't been performed yet to ++ * rectify the anomaly that happened when pmode exit interrupt wasn't ++ * received before the termination of group running in pmode. ++ */ ++ if (unlikely(skip_scheduling_actions)) { ++ dev_info(kbdev->dev, ++ "Scheduling actions skipped due to anomaly in pmode"); ++ return; + } + -+ return err; -+} -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h -new file mode 100644 -index 000000000..0389d093a ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h -@@ -0,0 +1,187 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ if (!skip_idle_slots_update) { ++ /* Updating on-slot idle CSGs when not in protected mode. */ ++ scheduler_handle_idle_slots(kbdev); + -+#ifndef _KBASE_CSF_TRACE_BUFFER_H_ -+#define _KBASE_CSF_TRACE_BUFFER_H_ ++ /* Determine whether the condition is met for keeping the ++ * Last-Recent-Use. If true, skipping the remaining action ++ * steps and thus extending the previous tick's arrangement, ++ * in particular, no alterations to on-slot CSGs. ++ */ ++ if (keep_lru_on_slots(kbdev)) ++ return; ++ } + -+#include ++ if (is_tick) ++ scheduler_rotate(kbdev); + -+#define CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX (4) -+#define FIRMWARE_LOG_BUF_NAME "fwlog" ++redo_local_tock: ++ scheduler_prepare(kbdev); ++ /* Need to specifically enqueue the GPU idle work if there are no groups ++ * to schedule despite the runnable groups. This scenario will happen ++ * if System suspend is done when all groups are idle and and no work ++ * is submitted for the groups after the System resume. ++ */ ++ if (unlikely(!scheduler->ngrp_to_schedule && ++ scheduler->total_runnable_grps)) { ++ dev_dbg(kbdev->dev, "No groups to schedule in the tick"); ++ enqueue_gpu_idle_work(scheduler); ++ return; ++ } ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ protm_grp = scheduler->active_protm_grp; + -+/* Forward declarations */ -+struct firmware_trace_buffer; -+struct kbase_device; ++ /* Avoid update if the top-group remains unchanged and in protected ++ * mode. For the said case, all the slots update is effectively ++ * competing against the active protected mode group (typically the ++ * top-group). If we update other slots, even on leaving the ++ * top-group slot untouched, the firmware would exit the protected mode ++ * for interacting with the host-driver. After it, as the top-group ++ * would again raise the request for entering protected mode, we would ++ * be actively doing the switching over twice without progressing the ++ * queue jobs. ++ */ ++ if (protm_grp && scheduler->top_grp == protm_grp) { ++ dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d", ++ protm_grp->handle); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + -+/** -+ * kbase_csf_firmware_trace_buffers_init - Initialize trace buffers -+ * -+ * @kbdev: Device pointer -+ * -+ * Allocate resources for trace buffers. In particular: -+ * - One memory page of GPU-readable, CPU-writable memory is used for -+ * the Extract variables of all trace buffers. -+ * - One memory page of GPU-writable, CPU-readable memory is used for -+ * the Insert variables of all trace buffers. -+ * - A data buffer of GPU-writable, CPU-readable memory is allocated -+ * for each trace buffer. -+ * -+ * After that, firmware addresses are written with pointers to the -+ * insert, extract and data buffer variables. The size and the trace -+ * enable bits are not dereferenced by the GPU and shall be written -+ * in the firmware addresses directly. -+ * -+ * This function relies on the assumption that the list of -+ * firmware_trace_buffer elements in the device has already been -+ * populated with data from the firmware image parsing. -+ * -+ * Return: 0 if success, or an error code on failure. -+ */ -+int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev); ++ update_offslot_non_idle_cnt_for_onslot_grp(protm_grp); ++ remove_scheduled_group(kbdev, protm_grp); ++ scheduler_check_pmode_progress(kbdev); ++ } else if (scheduler->top_grp) { ++ if (protm_grp) ++ dev_dbg(kbdev->dev, "Scheduler drop protm exec: group-%d", ++ protm_grp->handle); + -+/** -+ * kbase_csf_firmware_trace_buffers_term - Terminate trace buffers -+ * -+ * @kbdev: Device pointer -+ */ -+void kbase_csf_firmware_trace_buffers_term(struct kbase_device *kbdev); ++ if (!bitmap_empty(scheduler->top_grp->protm_pending_bitmap, ++ kbdev->csf.global_iface.groups[0].stream_num)) { ++ dev_dbg(kbdev->dev, "Scheduler prepare protm exec: group-%d of context %d_%d", ++ scheduler->top_grp->handle, ++ scheduler->top_grp->kctx->tgid, ++ scheduler->top_grp->kctx->id); + -+/** -+ * kbase_csf_firmware_parse_trace_buffer_entry - Process a "trace buffer" section -+ * -+ * @kbdev: Kbase device structure -+ * @entry: Pointer to the section -+ * @size: Size (in bytes) of the section -+ * @updatable: Indicates whether config items can be updated with FIRMWARE_CONFIG_UPDATE -+ * -+ * Read a "trace buffer" section adding metadata for the related trace buffer -+ * to the kbase_device:csf.firmware_trace_buffers list. -+ * -+ * Unexpected trace buffers will not be parsed and, as a consequence, -+ * will not be initialized. -+ * -+ * Return: 0 if successful, negative error code on failure. -+ */ -+int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev, -+ const u32 *entry, -+ unsigned int size, -+ bool updatable); ++ /* When entering protected mode all CSG slots can be occupied ++ * but only the protected mode CSG will be running. Any event ++ * that would trigger the execution of an on-slot idle CSG will ++ * need to be handled by the host during protected mode. ++ */ ++ new_protm_top_grp = true; ++ } + -+/** -+ * kbase_csf_firmware_reload_trace_buffers_data - Reload trace buffers data for firmware reboot -+ * -+ * @kbdev: Device pointer -+ * -+ * Helper function used when rebooting the firmware to reload the initial setup -+ * for all the trace buffers which have been previously parsed and initialized. -+ * -+ * Almost all of the operations done in the initialization process are -+ * replicated, with the difference that they might be done in a different order -+ * and that the variables of a given trace buffer may be mapped to different -+ * offsets within the same existing mappings. -+ * -+ * In other words, the re-initialization done by this function will be -+ * equivalent but not necessarily identical to the original initialization. -+ */ -+void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + -+/** -+ * kbase_csf_firmware_get_trace_buffer - Get a trace buffer -+ * -+ * @kbdev: Device pointer -+ * @name: Name of the trace buffer to find -+ * -+ * Return: handle to a trace buffer, given the name, or NULL if a trace buffer -+ * with that name couldn't be found. -+ */ -+struct firmware_trace_buffer *kbase_csf_firmware_get_trace_buffer( -+ struct kbase_device *kbdev, const char *name); ++ scheduler_apply(kbdev); + -+/** -+ * kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count - Get number of trace enable bits for a trace buffer -+ * -+ * @trace_buffer: Trace buffer handle -+ * -+ * Return: Number of trace enable bits in a trace buffer. -+ */ -+unsigned int kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count( -+ const struct firmware_trace_buffer *trace_buffer); ++ /* Scheduler is dropping the exec of the previous protm_grp, ++ * Until the protm quit completes, the GPU is effectively ++ * locked in the secure mode. ++ */ ++ if (protm_grp) ++ scheduler_force_protm_exit(kbdev); + -+/** -+ * kbase_csf_firmware_trace_buffer_update_trace_enable_bit - Update a trace enable bit -+ * -+ * @trace_buffer: Trace buffer handle -+ * @bit: Bit to update -+ * @value: New value for the given bit -+ * -+ * Update the value of a given trace enable bit. -+ * -+ * Return: 0 if successful, negative error code on failure. -+ */ -+int kbase_csf_firmware_trace_buffer_update_trace_enable_bit( -+ struct firmware_trace_buffer *trace_buffer, unsigned int bit, -+ bool value); ++ wait_csg_slots_start(kbdev); ++ wait_csg_slots_finish_prio_update(kbdev); + -+/** -+ * kbase_csf_firmware_trace_buffer_is_empty - Empty trace buffer predicate -+ * -+ * @trace_buffer: Trace buffer handle -+ * -+ * Return: True if the trace buffer is empty, or false otherwise. -+ */ -+bool kbase_csf_firmware_trace_buffer_is_empty( -+ const struct firmware_trace_buffer *trace_buffer); ++ if (new_protm_top_grp) { ++ scheduler_group_check_protm_enter(kbdev, ++ scheduler->top_grp); ++ } else if (!local_tock_slots && ++ atomic_read(&scheduler->non_idle_offslot_grps)) { ++ /* If during the scheduling action, we have off-slot ++ * non-idle CSGs in waiting, if it happens to have ++ * some new idle slots emerging during the committed ++ * action steps, trigger a one-off fast local tock. ++ */ ++ local_tock_slots = prepare_fast_local_tock(kbdev); + -+/** -+ * kbase_csf_firmware_trace_buffer_read_data - Read data from a trace buffer -+ * -+ * @trace_buffer: Trace buffer handle -+ * @data: Pointer to a client-allocated where data shall be written. -+ * @num_bytes: Maximum number of bytes to read from the trace buffer. -+ * -+ * Read available data from a trace buffer. The client provides a data buffer -+ * of a given size and the maximum number of bytes to read. -+ * -+ * Return: Number of bytes read from the trace buffer. -+ */ -+unsigned int kbase_csf_firmware_trace_buffer_read_data( -+ struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes); ++ if (local_tock_slots) { ++ dev_dbg(kbdev->dev, ++ "In-cycle %d idle slots available\n", ++ local_tock_slots); ++ goto redo_local_tock; ++ } ++ } ++ } else { ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ } + -+/** -+ * kbase_csf_firmware_trace_buffer_get_active_mask64 - Get trace buffer active mask -+ * -+ * @tb: Trace buffer handle -+ * -+ * Return: Trace buffer active mask. -+ */ -+u64 kbase_csf_firmware_trace_buffer_get_active_mask64(struct firmware_trace_buffer *tb); ++ evict_lru_or_blocked_csg(kbdev); ++} + +/** -+ * kbase_csf_firmware_trace_buffer_set_active_mask64 - Set trace buffer active mask -+ * -+ * @tb: Trace buffer handle -+ * @mask: New active mask -+ * -+ * Return: 0 if successful, negative error code on failure. -+ */ -+int kbase_csf_firmware_trace_buffer_set_active_mask64(struct firmware_trace_buffer *tb, u64 mask); -+ -+#endif /* _KBASE_CSF_TRACE_BUFFER_H_ */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.c -new file mode 100644 -index 000000000..185779c16 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.c -@@ -0,0 +1,271 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * can_skip_scheduling() - Check if the scheduling actions can be skipped. + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * @kbdev: Pointer to the device + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * This function is called on a scheduling tick or tock to determine if the ++ * scheduling actions can be skipped. ++ * If Scheduler is in sleeping state and exit from the sleep state is allowed ++ * then activation of MCU will be triggered. The tick or tock work item could ++ * have been in flight when the state of Scheduler was changed to sleeping. + * ++ * Return: true if the scheduling actions can be skipped. + */ ++static bool can_skip_scheduling(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + -+#include ++ lockdep_assert_held(&scheduler->lock); + -+#if IS_ENABLED(CONFIG_DEBUG_FS) ++ if (unlikely(!kbase_reset_gpu_is_not_pending(kbdev))) ++ return true; + -+/** -+ * kbasep_fault_occurred - Check if fault occurred. -+ * -+ * @kbdev: Device pointer -+ * -+ * Return: true if a fault occurred. -+ */ -+static bool kbasep_fault_occurred(struct kbase_device *kbdev) -+{ -+ unsigned long flags; -+ bool ret; ++ if (scheduler->state == SCHED_SUSPENDED) ++ return true; + -+ spin_lock_irqsave(&kbdev->csf.dof.lock, flags); -+ ret = (kbdev->csf.dof.error_code != DF_NO_ERROR); -+ spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags); ++#ifdef KBASE_PM_RUNTIME ++ if (scheduler->state == SCHED_SLEEPING) { ++ unsigned long flags; + -+ return ret; -+} ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (kbdev->pm.backend.exit_gpu_sleep_mode) { ++ int ret = scheduler_pm_active_after_sleep(kbdev, &flags); + -+void kbase_debug_csf_fault_wait_completion(struct kbase_device *kbdev) -+{ -+ if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev))) { -+ dev_dbg(kbdev->dev, "No userspace client for dumping exists"); -+ return; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (!ret) { ++ scheduler->state = SCHED_INACTIVE; ++ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state); ++ return false; ++ } ++ ++ dev_info(kbdev->dev, ++ "Skip scheduling due to system suspend"); ++ return true; ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return true; + } ++#endif + -+ wait_event(kbdev->csf.dof.dump_wait_wq, kbase_debug_csf_fault_dump_complete(kbdev)); ++ return false; +} -+KBASE_EXPORT_TEST_API(kbase_debug_csf_fault_wait_completion); + -+/** -+ * kbase_debug_csf_fault_wakeup - Wake up a waiting user space client. -+ * -+ * @kbdev: Kbase device -+ */ -+static void kbase_debug_csf_fault_wakeup(struct kbase_device *kbdev) ++static void schedule_on_tock(struct work_struct *work) +{ -+ wake_up_interruptible(&kbdev->csf.dof.fault_wait_wq); -+} ++ struct kbase_device *kbdev = ++ container_of(work, struct kbase_device, csf.scheduler.tock_work.work); ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ int err; + -+bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev, -+ struct kbase_context *kctx, enum dumpfault_error_type error) -+{ -+ unsigned long flags; ++ err = kbase_reset_gpu_try_prevent(kbdev); ++ /* Regardless of whether reset failed or is currently happening, exit ++ * early ++ */ ++ if (err) ++ return; + -+ if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev))) -+ return false; ++ kbase_debug_csf_fault_wait_completion(kbdev); ++ mutex_lock(&scheduler->lock); ++ if (can_skip_scheduling(kbdev)) ++ { ++ atomic_set(&scheduler->pending_tock_work, false); ++ goto exit_no_schedule_unlock; ++ } + -+ if (WARN_ON(error == DF_NO_ERROR)) -+ return false; ++ WARN_ON(!(scheduler->state == SCHED_INACTIVE)); ++ scheduler->state = SCHED_BUSY; ++ KBASE_KTRACE_ADD(kbdev, SCHED_BUSY, NULL, scheduler->state); + -+ if (kctx && kbase_ctx_flag(kctx, KCTX_DYING)) { -+ dev_info(kbdev->dev, "kctx %d_%d is dying when error %d is reported", -+ kctx->tgid, kctx->id, error); -+ kctx = NULL; -+ } ++ /* Undertaking schedule action steps */ ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_START, NULL, 0u); ++ while (atomic_cmpxchg(&scheduler->pending_tock_work, true, false) == true) ++ schedule_actions(kbdev, false); + -+ spin_lock_irqsave(&kbdev->csf.dof.lock, flags); ++ /* Record time information on a non-skipped tock */ ++ scheduler->last_schedule = jiffies; + -+ /* Only one fault at a time can be processed */ -+ if (kbdev->csf.dof.error_code) { -+ dev_info(kbdev->dev, "skip this fault as there's a pending fault"); -+ goto unlock; -+ } ++ scheduler->state = SCHED_INACTIVE; ++ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state); ++ if (!scheduler->total_runnable_grps) ++ enqueue_gpu_idle_work(scheduler); ++ mutex_unlock(&scheduler->lock); ++ kbase_reset_gpu_allow(kbdev); + -+ kbdev->csf.dof.kctx_tgid = kctx ? kctx->tgid : 0; -+ kbdev->csf.dof.kctx_id = kctx ? kctx->id : 0; -+ kbdev->csf.dof.error_code = error; -+ kbase_debug_csf_fault_wakeup(kbdev); ++ dev_dbg(kbdev->dev, ++ "Waking up for event after schedule-on-tock completes."); ++ wake_up_all(&kbdev->csf.event_wait); ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_END, NULL, 0u); ++ return; + -+unlock: -+ spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags); -+ return true; ++exit_no_schedule_unlock: ++ mutex_unlock(&scheduler->lock); ++ kbase_reset_gpu_allow(kbdev); +} + -+static ssize_t debug_csf_fault_read(struct file *file, char __user *buffer, size_t size, -+ loff_t *f_pos) ++static void schedule_on_tick(struct work_struct *work) +{ -+#define BUF_SIZE 64 -+ struct kbase_device *kbdev; -+ unsigned long flags; -+ int count; -+ char buf[BUF_SIZE]; -+ u32 tgid, ctx_id; -+ enum dumpfault_error_type error_code; ++ struct kbase_device *kbdev = ++ container_of(work, struct kbase_device, csf.scheduler.tick_work); ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + -+ if (unlikely(!file)) { -+ pr_warn("%s: file is NULL", __func__); -+ return -EINVAL; -+ } ++ int err = kbase_reset_gpu_try_prevent(kbdev); ++ /* Regardless of whether reset failed or is currently happening, exit ++ * early ++ */ ++ if (err) ++ return; + -+ kbdev = file->private_data; -+ if (unlikely(!buffer)) { -+ dev_warn(kbdev->dev, "%s: buffer is NULL", __func__); -+ return -EINVAL; -+ } ++ kbase_debug_csf_fault_wait_completion(kbdev); ++ mutex_lock(&scheduler->lock); + -+ if (unlikely(*f_pos < 0)) { -+ dev_warn(kbdev->dev, "%s: f_pos is negative", __func__); -+ return -EINVAL; -+ } ++ WARN_ON(scheduler->tick_timer_active); ++ if (can_skip_scheduling(kbdev)) ++ goto exit_no_schedule_unlock; + -+ if (size < sizeof(buf)) { -+ dev_warn(kbdev->dev, "%s: buffer is too small", __func__); -+ return -EINVAL; ++ scheduler->state = SCHED_BUSY; ++ KBASE_KTRACE_ADD(kbdev, SCHED_BUSY, NULL, scheduler->state); ++ ++ /* Undertaking schedule action steps */ ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_START, NULL, scheduler->total_runnable_grps); ++ schedule_actions(kbdev, true); ++ ++ /* Record time information */ ++ scheduler->last_schedule = jiffies; ++ ++ /* Kicking next scheduling if needed */ ++ if (likely(scheduler_timer_is_enabled_nolock(kbdev)) && ++ (scheduler->total_runnable_grps > 0)) { ++ start_tick_timer(kbdev); ++ dev_dbg(kbdev->dev, ++ "scheduling for next tick, num_runnable_groups:%u\n", ++ scheduler->total_runnable_grps); ++ } else if (!scheduler->total_runnable_grps) { ++ enqueue_gpu_idle_work(scheduler); + } + -+ if (wait_event_interruptible(kbdev->csf.dof.fault_wait_wq, kbasep_fault_occurred(kbdev))) -+ return -ERESTARTSYS; ++ scheduler->state = SCHED_INACTIVE; ++ mutex_unlock(&scheduler->lock); ++ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state); ++ kbase_reset_gpu_allow(kbdev); + -+ spin_lock_irqsave(&kbdev->csf.dof.lock, flags); -+ tgid = kbdev->csf.dof.kctx_tgid; -+ ctx_id = kbdev->csf.dof.kctx_id; -+ error_code = kbdev->csf.dof.error_code; -+ BUILD_BUG_ON(sizeof(buf) < (sizeof(tgid) + sizeof(ctx_id) + sizeof(error_code))); -+ count = scnprintf(buf, sizeof(buf), "%u_%u_%u\n", tgid, ctx_id, error_code); -+ spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags); ++ dev_dbg(kbdev->dev, "Waking up for event after schedule-on-tick completes."); ++ wake_up_all(&kbdev->csf.event_wait); ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_END, NULL, ++ scheduler->total_runnable_grps); ++ return; + -+ dev_info(kbdev->dev, "debug csf fault info read"); -+ return simple_read_from_buffer(buffer, size, f_pos, buf, count); ++exit_no_schedule_unlock: ++ mutex_unlock(&scheduler->lock); ++ kbase_reset_gpu_allow(kbdev); +} + -+static int debug_csf_fault_open(struct inode *in, struct file *file) ++static int suspend_active_queue_groups(struct kbase_device *kbdev, ++ unsigned long *slot_mask) +{ -+ struct kbase_device *kbdev; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ u32 slot_num; ++ int ret; + -+ if (unlikely(!in)) { -+ pr_warn("%s: inode is NULL", __func__); -+ return -EINVAL; -+ } ++ lockdep_assert_held(&scheduler->lock); + -+ kbdev = in->i_private; -+ if (unlikely(!file)) { -+ dev_warn(kbdev->dev, "%s: file is NULL", __func__); -+ return -EINVAL; -+ } ++ for (slot_num = 0; slot_num < num_groups; slot_num++) { ++ struct kbase_queue_group *group = ++ scheduler->csg_slots[slot_num].resident_group; + -+ if (atomic_cmpxchg(&kbdev->csf.dof.enabled, 0, 1) == 1) { -+ dev_warn(kbdev->dev, "Only one client is allowed for dump on fault"); -+ return -EBUSY; ++ if (group) { ++ suspend_queue_group(group); ++ set_bit(slot_num, slot_mask); ++ } + } + -+ dev_info(kbdev->dev, "debug csf fault file open"); -+ -+ return simple_open(in, file); ++ ret = wait_csg_slots_suspend(kbdev, slot_mask, kbdev->reset_timeout_ms); ++ return ret; +} + -+static ssize_t debug_csf_fault_write(struct file *file, const char __user *ubuf, size_t count, -+ loff_t *ppos) ++static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev; -+ unsigned long flags; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 }; ++ int ret; ++ int ret2; + -+ if (unlikely(!file)) { -+ pr_warn("%s: file is NULL", __func__); -+ return -EINVAL; -+ } ++ mutex_lock(&scheduler->lock); + -+ kbdev = file->private_data; -+ spin_lock_irqsave(&kbdev->csf.dof.lock, flags); -+ kbdev->csf.dof.error_code = DF_NO_ERROR; -+ kbdev->csf.dof.kctx_tgid = 0; -+ kbdev->csf.dof.kctx_id = 0; -+ dev_info(kbdev->dev, "debug csf fault dump complete"); -+ spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags); ++ ret = suspend_active_queue_groups(kbdev, slot_mask); + -+ /* User space finished the dump. -+ * Wake up blocked kernel threads to proceed. ++ if (ret) { ++ dev_warn(kbdev->dev, "Timeout waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n", ++ kbdev->csf.global_iface.group_num, slot_mask); ++ } ++ ++ /* Need to flush the GPU cache to ensure suspend buffer ++ * contents are not lost on reset of GPU. ++ * Do this even if suspend operation had timed out for some of ++ * the CSG slots. ++ * In case the scheduler already in suspended state, the ++ * cache clean is required as the async reset request from ++ * the debugfs may race against the scheduler suspend operation ++ * due to the extra context ref-count, which prevents the ++ * L2 powering down cache clean operation in the non racing ++ * case. ++ * LSC is being flushed together to cover buslogging usecase, ++ * where GPU reset is done regularly to avoid the log buffer ++ * overflow. + */ -+ wake_up(&kbdev->csf.dof.dump_wait_wq); ++ kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC); ++ ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev, ++ kbdev->reset_timeout_ms); ++ if (ret2) { ++ dev_warn(kbdev->dev, "[%llu] Timeout waiting for cache clean to complete before reset", ++ kbase_backend_get_cycle_cnt(kbdev)); ++ if (!ret) ++ ret = ret2; ++ } + -+ return count; ++ mutex_unlock(&scheduler->lock); ++ ++ return ret; +} + -+static int debug_csf_fault_release(struct inode *in, struct file *file) ++/** ++ * scheduler_handle_reset_in_protected_mode() - Update the state of normal mode ++ * groups when reset is done during ++ * protected mode execution. ++ * ++ * @kbdev: Pointer to the device. ++ * ++ * This function is called at the time of GPU reset, before the suspension of ++ * queue groups, to handle the case when the reset is getting performed whilst ++ * GPU is in protected mode. ++ * On entry to protected mode all the groups, except the top group that executes ++ * in protected mode, are implicitly suspended by the FW. Thus this function ++ * simply marks the normal mode groups as suspended (and cleans up the ++ * corresponding CSG slots) to prevent their potential forceful eviction from ++ * the Scheduler. So if GPU was in protected mode and there was no fault, then ++ * only the protected mode group would be suspended in the regular way post exit ++ * from this function. And if GPU was in normal mode, then all on-slot groups ++ * will get suspended in the regular way. ++ * ++ * Return: true if the groups remaining on the CSG slots need to be suspended in ++ * the regular way by sending CSG SUSPEND reqs to FW, otherwise false. ++ */ ++static bool scheduler_handle_reset_in_protected_mode(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ u32 const num_groups = kbdev->csf.global_iface.group_num; ++ struct kbase_queue_group *protm_grp; ++ bool suspend_on_slot_groups = true; ++ bool pmode_active; + unsigned long flags; ++ u32 csg_nr; + -+ if (unlikely(!in)) { -+ pr_warn("%s: inode is NULL", __func__); -+ return -EINVAL; ++ mutex_lock(&scheduler->lock); ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ protm_grp = scheduler->active_protm_grp; ++ pmode_active = kbdev->protected_mode; ++ ++ if (likely(!protm_grp && !pmode_active)) { ++ /* Case 1: GPU is not in protected mode or it successfully ++ * exited protected mode. All on-slot groups can be suspended in ++ * the regular way before reset. ++ */ ++ suspend_on_slot_groups = true; ++ } else if (protm_grp && pmode_active) { ++ /* Case 2: GPU went successfully into protected mode and hasn't ++ * exited from it yet and the protected mode group is still ++ * active. If there was no fault for the protected mode group ++ * then it can be suspended in the regular way before reset. ++ * The other normal mode on-slot groups were already implicitly ++ * suspended on entry to protected mode so they can be marked as ++ * suspended right away. ++ */ ++ suspend_on_slot_groups = !protm_grp->faulted; ++ } else if (!protm_grp && pmode_active) { ++ /* Case 3: GPU went successfully into protected mode and hasn't ++ * exited from it yet but the protected mode group got deleted. ++ * This would have happened if the FW got stuck during protected ++ * mode for some reason (like GPU page fault or some internal ++ * error). In normal cases FW is expected to send the pmode exit ++ * interrupt before it handles the CSG termination request. ++ * The other normal mode on-slot groups would already have been ++ * implicitly suspended on entry to protected mode so they can be ++ * marked as suspended right away. ++ */ ++ suspend_on_slot_groups = false; ++ } else if (protm_grp && !pmode_active) { ++ /* Case 4: GPU couldn't successfully enter protected mode, i.e. ++ * PROTM_ENTER request had timed out. ++ * All the on-slot groups need to be suspended in the regular ++ * way before reset. ++ */ ++ suspend_on_slot_groups = true; + } + -+ kbdev = in->i_private; -+ spin_lock_irqsave(&kbdev->csf.dof.lock, flags); -+ kbdev->csf.dof.kctx_tgid = 0; -+ kbdev->csf.dof.kctx_id = 0; -+ kbdev->csf.dof.error_code = DF_NO_ERROR; -+ spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + -+ atomic_set(&kbdev->csf.dof.enabled, 0); -+ dev_info(kbdev->dev, "debug csf fault file close"); ++ if (likely(!pmode_active)) ++ goto unlock; + -+ /* User space closed the debugfs file. -+ * Wake up blocked kernel threads to resume. ++ /* GPU hasn't exited protected mode, so all the on-slot groups barring ++ * the protected mode group can be marked as suspended right away. + */ -+ wake_up(&kbdev->csf.dof.dump_wait_wq); -+ -+ return 0; -+} ++ for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { ++ struct kbase_queue_group *const group = ++ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; ++ int new_val; + -+static const struct file_operations kbasep_debug_csf_fault_fops = { -+ .owner = THIS_MODULE, -+ .open = debug_csf_fault_open, -+ .read = debug_csf_fault_read, -+ .write = debug_csf_fault_write, -+ .llseek = default_llseek, -+ .release = debug_csf_fault_release, -+}; ++ if (!group || (group == protm_grp)) ++ continue; + -+void kbase_debug_csf_fault_debugfs_init(struct kbase_device *kbdev) -+{ -+ const char *fname = "csf_fault"; ++ cleanup_csg_slot(group); ++ group->run_state = KBASE_CSF_GROUP_SUSPENDED; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED, group, group->run_state); + -+ if (unlikely(!kbdev)) { -+ pr_warn("%s: kbdev is NULL", __func__); -+ return; ++ /* Simply treat the normal mode groups as non-idle. The tick ++ * scheduled after the reset will re-initialize the counter ++ * anyways. ++ */ ++ new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val); + } + -+ debugfs_create_file(fname, 0600, kbdev->mali_debugfs_directory, kbdev, -+ &kbasep_debug_csf_fault_fops); ++unlock: ++ mutex_unlock(&scheduler->lock); ++ return suspend_on_slot_groups; +} + -+int kbase_debug_csf_fault_init(struct kbase_device *kbdev) ++static void cancel_tick_work(struct kbase_csf_scheduler *const scheduler) +{ -+ if (unlikely(!kbdev)) { -+ pr_warn("%s: kbdev is NULL", __func__); -+ return -EINVAL; -+ } -+ -+ init_waitqueue_head(&(kbdev->csf.dof.fault_wait_wq)); -+ init_waitqueue_head(&(kbdev->csf.dof.dump_wait_wq)); -+ spin_lock_init(&kbdev->csf.dof.lock); -+ kbdev->csf.dof.kctx_tgid = 0; -+ kbdev->csf.dof.kctx_id = 0; -+ kbdev->csf.dof.error_code = DF_NO_ERROR; -+ atomic_set(&kbdev->csf.dof.enabled, 0); -+ -+ return 0; ++ cancel_work_sync(&scheduler->tick_work); +} + -+void kbase_debug_csf_fault_term(struct kbase_device *kbdev) ++static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler) +{ ++ atomic_set(&scheduler->pending_tock_work, false); ++ cancel_delayed_work_sync(&scheduler->tock_work); +} -+#endif /* CONFIG_DEBUG_FS */ -diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.h -new file mode 100644 -index 000000000..6e9b1a9d5 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.h -@@ -0,0 +1,137 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ + -+#ifndef _KBASE_DEBUG_CSF_FAULT_H -+#define _KBASE_DEBUG_CSF_FAULT_H ++static void scheduler_inner_reset(struct kbase_device *kbdev) ++{ ++ u32 const num_groups = kbdev->csf.global_iface.group_num; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ unsigned long flags; + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+/** -+ * kbase_debug_csf_fault_debugfs_init - Initialize CSF fault debugfs -+ * @kbdev: Device pointer -+ */ -+void kbase_debug_csf_fault_debugfs_init(struct kbase_device *kbdev); ++ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev)); + -+/** -+ * kbase_debug_csf_fault_init - Create the fault event wait queue per device -+ * and initialize the required resources. -+ * @kbdev: Device pointer -+ * -+ * Return: Zero on success or a negative error code. -+ */ -+int kbase_debug_csf_fault_init(struct kbase_device *kbdev); ++ /* Cancel any potential queued delayed work(s) */ ++ cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work); ++ cancel_tick_timer(kbdev); ++ cancel_tick_work(scheduler); ++ cancel_tock_work(scheduler); ++ cancel_delayed_work_sync(&scheduler->ping_work); + -+/** -+ * kbase_debug_csf_fault_term - Clean up resources created by -+ * @kbase_debug_csf_fault_init. -+ * @kbdev: Device pointer -+ */ -+void kbase_debug_csf_fault_term(struct kbase_device *kbdev); ++ mutex_lock(&scheduler->lock); + -+/** -+ * kbase_debug_csf_fault_wait_completion - Wait for the client to complete. -+ * -+ * @kbdev: Device Pointer -+ * -+ * Wait for the user space client to finish reading the fault information. -+ * This function must be called in thread context. -+ */ -+void kbase_debug_csf_fault_wait_completion(struct kbase_device *kbdev); ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS); ++ if (scheduler->active_protm_grp) ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT, scheduler->active_protm_grp, ++ 0u); ++ scheduler->active_protm_grp = NULL; ++ memset(kbdev->csf.scheduler.csg_slots, 0, ++ num_groups * sizeof(struct kbase_csf_csg_slot)); ++ bitmap_zero(kbdev->csf.scheduler.csg_inuse_bitmap, num_groups); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + -+/** -+ * kbase_debug_csf_fault_notify - Notify client of a fault. -+ * -+ * @kbdev: Device pointer -+ * @kctx: Faulty context (can be NULL) -+ * @error: Error code. -+ * -+ * Store fault information and wake up the user space client. -+ * -+ * Return: true if a dump on fault was initiated or was is in progress and -+ * so caller can opt to wait for the dumping to complete. -+ */ -+bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev, -+ struct kbase_context *kctx, enum dumpfault_error_type error); ++ scheduler->top_ctx = NULL; ++ scheduler->top_grp = NULL; + -+/** -+ * kbase_debug_csf_fault_dump_enabled - Check if dump on fault is enabled. -+ * -+ * @kbdev: Device pointer -+ * -+ * Return: true if debugfs file is opened so dump on fault is enabled. -+ */ -+static inline bool kbase_debug_csf_fault_dump_enabled(struct kbase_device *kbdev) -+{ -+ return atomic_read(&kbdev->csf.dof.enabled); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, ++ scheduler->num_active_address_spaces | ++ (((u64)scheduler->total_runnable_grps) << 32)); ++ ++ mutex_unlock(&scheduler->lock); +} + -+/** -+ * kbase_debug_csf_fault_dump_complete - Check if dump on fault is completed. -+ * -+ * @kbdev: Device pointer -+ * -+ * Return: true if dump on fault completes or file is closed. -+ */ -+static inline bool kbase_debug_csf_fault_dump_complete(struct kbase_device *kbdev) ++void kbase_csf_scheduler_reset(struct kbase_device *kbdev) +{ -+ unsigned long flags; -+ bool ret; ++ struct kbase_context *kctx; + -+ if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev))) -+ return true; ++ WARN_ON(!kbase_reset_gpu_is_active(kbdev)); + -+ spin_lock_irqsave(&kbdev->csf.dof.lock, flags); -+ ret = (kbdev->csf.dof.error_code == DF_NO_ERROR); -+ spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags); ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_START, NULL, 0u); + -+ return ret; -+} -+#else /* CONFIG_DEBUG_FS */ -+static inline int kbase_debug_csf_fault_init(struct kbase_device *kbdev) -+{ -+ return 0; -+} ++ kbase_debug_csf_fault_wait_completion(kbdev); + -+static inline void kbase_debug_csf_fault_term(struct kbase_device *kbdev) -+{ -+} ++ if (scheduler_handle_reset_in_protected_mode(kbdev) && ++ !suspend_active_queue_groups_on_reset(kbdev)) { ++ /* As all groups have been successfully evicted from the CSG ++ * slots, clear out thee scheduler data fields and return ++ */ ++ scheduler_inner_reset(kbdev); ++ return; ++ } + -+static inline void kbase_debug_csf_fault_wait_completion(struct kbase_device *kbdev) -+{ -+} ++ mutex_lock(&kbdev->kctx_list_lock); + -+static inline bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev, -+ struct kbase_context *kctx, enum dumpfault_error_type error) -+{ -+ return false; -+} ++ /* The loop to iterate over the kbase contexts is present due to lock ++ * ordering issue between kctx->csf.lock & kbdev->csf.scheduler.lock. ++ * CSF ioctls first take kctx->csf.lock which is context-specific and ++ * then take kbdev->csf.scheduler.lock for global actions like assigning ++ * a CSG slot. ++ * If the lock ordering constraint was not there then could have ++ * directly looped over the active queue groups. ++ */ ++ list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { ++ /* Firmware reload would reinitialize the CSG & CS interface IO ++ * pages, so just need to internally mark the currently active ++ * queue groups as terminated (similar to the unexpected OoM ++ * event case). ++ * No further work can now get executed for the active groups ++ * (new groups would have to be created to execute work) and ++ * in near future Clients would be duly informed of this ++ * reset. The resources (like User IO pages, GPU queue memory) ++ * allocated for the associated queues would be freed when the ++ * Clients do the teardown when they become aware of the reset. ++ */ ++ kbase_csf_active_queue_groups_reset(kbdev, kctx); ++ } + -+static inline bool kbase_debug_csf_fault_dump_enabled(struct kbase_device *kbdev) -+{ -+ return false; -+} ++ mutex_unlock(&kbdev->kctx_list_lock); + -+static inline bool kbase_debug_csf_fault_dump_complete(struct kbase_device *kbdev) -+{ -+ return true; -+} -+#endif /* CONFIG_DEBUG_FS */ ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_END, NULL, 0u); + -+#endif /*_KBASE_DEBUG_CSF_FAULT_H*/ -diff --git a/drivers/gpu/arm/bifrost/debug/Kbuild b/drivers/gpu/arm/bifrost/debug/Kbuild -new file mode 100755 -index 000000000..ebf3ddb76 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/debug/Kbuild -@@ -0,0 +1,28 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++ /* After queue groups reset, the scheduler data fields clear out */ ++ scheduler_inner_reset(kbdev); ++} + -+bifrost_kbase-y += debug/mali_kbase_debug_ktrace.o ++static void firmware_aliveness_monitor(struct work_struct *work) ++{ ++ struct kbase_device *kbdev = container_of(work, struct kbase_device, ++ csf.scheduler.ping_work.work); ++ int err; + -+ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) -+ bifrost_kbase-y += debug/backend/mali_kbase_debug_ktrace_csf.o -+ bifrost_kbase-$(CONFIG_MALI_CORESIGHT) += debug/backend/mali_kbase_debug_coresight_csf.o -+else -+ bifrost_kbase-y += debug/backend/mali_kbase_debug_ktrace_jm.o -+endif -diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c -new file mode 100644 -index 000000000..ff5f947e2 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c -@@ -0,0 +1,851 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ /* Ensure that reset will not be occurring while this function is being ++ * executed as otherwise calling kbase_reset_gpu when reset is already ++ * occurring is a programming error. ++ * ++ * We must use the 'try' variant as the Reset worker can try to flush ++ * this workqueue, which would otherwise deadlock here if we tried to ++ * wait for the reset (and thus ourselves) to complete. ++ */ ++ err = kbase_reset_gpu_try_prevent(kbdev); ++ if (err) { ++ /* It doesn't matter whether the value was -EAGAIN or a fatal ++ * error, just stop processing. In case of -EAGAIN, the Reset ++ * worker will restart the scheduler later to resume ping ++ */ ++ return; ++ } + -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ mutex_lock(&kbdev->csf.scheduler.lock); + -+static const char *coresight_state_to_string(enum kbase_debug_coresight_csf_state state) -+{ -+ switch (state) { -+ case KBASE_DEBUG_CORESIGHT_CSF_DISABLED: -+ return "DISABLED"; -+ case KBASE_DEBUG_CORESIGHT_CSF_ENABLED: -+ return "ENABLED"; -+ default: -+ break; ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ if (fw_debug) { ++ /* ping requests cause distraction in firmware debugging */ ++ goto exit; + } ++#endif + -+ return "UNKNOWN"; -+} ++ if (kbdev->csf.scheduler.state == SCHED_SUSPENDED || ++ kbdev->csf.scheduler.state == SCHED_SLEEPING) ++ goto exit; + -+static bool validate_reg_addr(struct kbase_debug_coresight_csf_client *client, -+ struct kbase_device *kbdev, u32 reg_addr, u8 op_type) -+{ -+ int i; ++ if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) != 1) ++ goto exit; + -+ if (reg_addr & 0x3) { -+ dev_err(kbdev->dev, "Invalid operation %d: reg_addr (0x%x) not 32bit aligned", -+ op_type, reg_addr); -+ return false; ++ if (kbase_csf_scheduler_protected_mode_in_use(kbdev)) ++ goto exit; ++ ++ if (kbase_pm_context_active_handle_suspend(kbdev, ++ KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { ++ /* Suspend pending - no real need to ping */ ++ goto exit; + } + -+ for (i = 0; i < client->nr_ranges; i++) { -+ struct kbase_debug_coresight_csf_address_range *range = &client->addr_ranges[i]; ++ kbase_csf_scheduler_wait_mcu_active(kbdev); + -+ if ((range->start <= reg_addr) && (reg_addr <= range->end)) -+ return true; -+ } ++ err = kbase_csf_firmware_ping_wait(kbdev, kbdev->csf.fw_timeout_ms); + -+ dev_err(kbdev->dev, "Invalid operation %d: reg_addr (0x%x) not in client range", op_type, -+ reg_addr); ++ if (err) { ++ /* It is acceptable to enqueue a reset whilst we've prevented ++ * them, it will happen after we've allowed them again ++ */ ++ if (kbase_prepare_to_reset_gpu( ++ kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) ++ kbase_reset_gpu(kbdev); ++ } else if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) == 1) { ++ queue_delayed_work( ++ system_long_wq, &kbdev->csf.scheduler.ping_work, ++ msecs_to_jiffies(kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_PING_TIMEOUT))); ++ } + -+ return false; ++ kbase_pm_context_idle(kbdev); ++exit: ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++ kbase_reset_gpu_allow(kbdev); +} + -+static bool validate_op(struct kbase_debug_coresight_csf_client *client, -+ struct kbase_debug_coresight_csf_op *op) ++int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, ++ struct kbase_suspend_copy_buffer *sus_buf) +{ -+ struct kbase_device *kbdev; -+ u32 reg; ++ struct kbase_context *const kctx = group->kctx; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ bool on_slot; ++ int err = 0; + -+ if (!op) -+ return false; ++ kbase_reset_gpu_assert_prevented(kbdev); ++ lockdep_assert_held(&kctx->csf.lock); ++ mutex_lock(&scheduler->lock); + -+ if (!client) -+ return false; ++ on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group); + -+ kbdev = (struct kbase_device *)client->drv_data; ++#ifdef KBASE_PM_RUNTIME ++ if (on_slot && (scheduler->state == SCHED_SLEEPING)) { ++ if (wait_for_scheduler_to_exit_sleep(kbdev)) { ++ dev_warn( ++ kbdev->dev, ++ "Wait for scheduler to exit sleep state timedout when copying suspend buffer for group %d of ctx %d_%d on slot %d", ++ group->handle, group->kctx->tgid, ++ group->kctx->id, group->csg_nr); + -+ switch (op->type) { -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_NOP: -+ return true; -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM: -+ if (validate_reg_addr(client, kbdev, op->op.write_imm.reg_addr, op->type)) -+ return true; ++ scheduler_wakeup(kbdev, true); + -+ break; -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE: -+ for (reg = op->op.write_imm_range.reg_start; reg <= op->op.write_imm_range.reg_end; -+ reg += sizeof(u32)) { -+ if (!validate_reg_addr(client, kbdev, reg, op->type)) -+ return false; ++ /* Wait for MCU firmware to start running */ ++ if (kbase_csf_scheduler_wait_mcu_active(kbdev)) ++ dev_warn( ++ kbdev->dev, ++ "Wait for MCU active failed when copying suspend buffer for group %d of ctx %d_%d on slot %d", ++ group->handle, group->kctx->tgid, ++ group->kctx->id, group->csg_nr); + } + -+ return true; -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE: -+ if (!op->op.write.ptr) { -+ dev_err(kbdev->dev, "Invalid operation %d: ptr not set", op->type); -+ break; -+ } ++ /* Check the group state again as scheduler lock would have been ++ * released when waiting for the exit from SLEEPING state. ++ */ ++ on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group); ++ } ++#endif ++ if (on_slot) { ++ DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; + -+ if (validate_reg_addr(client, kbdev, op->op.write.reg_addr, op->type)) -+ return true; ++ set_bit(kbase_csf_scheduler_group_get_slot(group), slot_mask); + -+ break; -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ: -+ if (!op->op.read.ptr) { -+ dev_err(kbdev->dev, "Invalid operation %d: ptr not set", op->type); -+ break; ++ if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) ++ suspend_queue_group(group); ++ err = wait_csg_slots_suspend(kbdev, slot_mask, ++ kbdev->csf.fw_timeout_ms); ++ if (err) { ++ dev_warn(kbdev->dev, "[%llu] Timeout waiting for the group %d to suspend on slot %d", ++ kbase_backend_get_cycle_cnt(kbdev), ++ group->handle, group->csg_nr); ++ goto exit; + } ++ } + -+ if (validate_reg_addr(client, kbdev, op->op.read.reg_addr, op->type)) -+ return true; ++ if (queue_group_suspended_locked(group)) { ++ unsigned int target_page_nr = 0, i = 0; ++ u64 offset = sus_buf->offset; ++ size_t to_copy = sus_buf->size; ++ const u32 csg_suspend_buf_nr_pages = ++ PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + -+ break; -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL: -+ if (validate_reg_addr(client, kbdev, op->op.poll.reg_addr, op->type)) -+ return true; ++ if (scheduler->state != SCHED_SUSPENDED) { ++ /* Similar to the case of HW counters, need to flush ++ * the GPU L2 cache before reading from the suspend buffer ++ * pages as they are mapped and cached on GPU side. ++ * Flushing LSC is not done here, since only the flush of ++ * CSG suspend buffer contents is needed from the L2 cache. ++ */ ++ kbase_gpu_start_cache_clean( ++ kbdev, GPU_COMMAND_CACHE_CLN_INV_L2); ++ kbase_gpu_wait_cache_clean(kbdev); ++ } else { ++ /* Make sure power down transitions have completed, ++ * i.e. L2 has been powered off as that would ensure ++ * its contents are flushed to memory. ++ * This is needed as Scheduler doesn't wait for the ++ * power down to finish. ++ */ ++ kbase_pm_wait_for_desired_state(kbdev); ++ } + -+ break; -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND: -+ fallthrough; -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR: -+ fallthrough; -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR: -+ fallthrough; -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT: -+ if (op->op.bitw.ptr != NULL) -+ return true; ++ for (i = 0; i < csg_suspend_buf_nr_pages && ++ target_page_nr < sus_buf->nr_pages; i++) { ++ struct page *pg = ++ as_page(group->normal_suspend_buf.phy[i]); ++ void *sus_page = kmap(pg); + -+ dev_err(kbdev->dev, "Invalid bitwise operation pointer"); ++ if (sus_page) { ++ kbase_sync_single_for_cpu(kbdev, ++ kbase_dma_addr(pg), ++ PAGE_SIZE, DMA_BIDIRECTIONAL); + -+ break; -+ default: -+ dev_err(kbdev->dev, "Invalid operation %d", op->type); -+ break; ++ err = kbase_mem_copy_to_pinned_user_pages( ++ sus_buf->pages, sus_page, ++ &to_copy, sus_buf->nr_pages, ++ &target_page_nr, offset); ++ kunmap(pg); ++ if (err) ++ break; ++ } else { ++ err = -ENOMEM; ++ break; ++ } ++ } ++ schedule_in_cycle(group, false); ++ } else { ++ /* If addr-space fault, the group may have been evicted */ ++ err = -EIO; + } + -+ return false; ++exit: ++ mutex_unlock(&scheduler->lock); ++ return err; +} + -+static bool validate_seq(struct kbase_debug_coresight_csf_client *client, -+ struct kbase_debug_coresight_csf_sequence *seq) ++KBASE_EXPORT_TEST_API(kbase_csf_scheduler_group_copy_suspend_buf); ++ ++/** ++ * group_sync_updated() - Evaluate sync wait condition of all blocked command ++ * queues of the group. ++ * ++ * @group: Pointer to the command queue group that has blocked command queue(s) ++ * bound to it. ++ * ++ * Return: true if sync wait condition is satisfied for at least one blocked ++ * queue of the group. ++ */ ++static bool group_sync_updated(struct kbase_queue_group *group) +{ -+ struct kbase_debug_coresight_csf_op *ops = seq->ops; -+ int nr_ops = seq->nr_ops; -+ int i; ++ bool updated = false; ++ int stream; + -+ for (i = 0; i < nr_ops; i++) { -+ if (!validate_op(client, &ops[i])) -+ return false; ++ /* Groups can also be blocked on-slot during protected mode. */ ++ WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC && ++ group->run_state != KBASE_CSF_GROUP_IDLE); ++ ++ for (stream = 0; stream < MAX_SUPPORTED_STREAMS_PER_GROUP; ++stream) { ++ struct kbase_queue *const queue = group->bound_queues[stream]; ++ ++ /* To check the necessity of sync-wait evaluation, ++ * we rely on the cached 'status_wait' instead of reading it ++ * directly from shared memory as the CSG has been already ++ * evicted from the CSG slot, thus this CSG doesn't have ++ * valid information in the shared memory. ++ */ ++ if (queue && queue->enabled && ++ CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) ++ if (evaluate_sync_update(queue)) { ++ updated = true; ++ queue->status_wait = 0; ++ } + } + -+ return true; ++ return updated; +} + -+static int execute_op(struct kbase_device *kbdev, struct kbase_debug_coresight_csf_op *op) ++/** ++ * scheduler_get_protm_enter_async_group() - Check if the GPU queue group ++ * can be now allowed to execute in protected mode. ++ * ++ * @kbdev: Pointer to the GPU device. ++ * @group: Pointer to the GPU queue group. ++ * ++ * This function is called outside the scheduling tick/tock to determine ++ * if the given GPU queue group can now execute in protected mode or not. ++ * If the group pointer passed is NULL then the evaluation is done for the ++ * highest priority group on the scheduler maintained group lists without ++ * tick associated rotation actions. This is referred as the 'top-group' ++ * in a tock action sense. ++ * ++ * It returns the same group pointer, that was passed as an argument, if that ++ * group matches the highest priority group and has pending protected region ++ * requests otherwise NULL is returned. ++ * ++ * If the group pointer passed is NULL then the internal evaluated highest ++ * priority group is returned if that has pending protected region requests ++ * otherwise NULL is returned. ++ * ++ * The evaluated highest priority group may not necessarily be the same as the ++ * scheduler->top_grp. This can happen if there is dynamic de-idle update ++ * during the tick interval for some on-slots groups that were idle during the ++ * scheduler normal scheduling action, where the scheduler->top_grp was set. ++ * The recorded scheduler->top_grp is untouched by this evualuation, so will not ++ * affect the scheduler context/priority list rotation arrangement. ++ * ++ * Return: the pointer to queue group that can currently execute in protected ++ * mode or NULL. ++ */ ++static struct kbase_queue_group *scheduler_get_protm_enter_async_group( ++ struct kbase_device *const kbdev, ++ struct kbase_queue_group *const group) +{ -+ int result = -EINVAL; -+ u32 reg; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct kbase_queue_group *match_grp, *input_grp; + -+ dev_dbg(kbdev->dev, "Execute operation %d", op->type); ++ lockdep_assert_held(&scheduler->lock); + -+ switch (op->type) { -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_NOP: -+ result = 0; -+ break; -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM: -+ result = kbase_csf_firmware_mcu_register_write(kbdev, op->op.write.reg_addr, -+ op->op.write_imm.val); -+ break; -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE: -+ for (reg = op->op.write_imm_range.reg_start; reg <= op->op.write_imm_range.reg_end; -+ reg += sizeof(u32)) { -+ result = kbase_csf_firmware_mcu_register_write(kbdev, reg, -+ op->op.write_imm_range.val); -+ if (!result) -+ break; -+ } -+ break; -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE: -+ result = kbase_csf_firmware_mcu_register_write(kbdev, op->op.write.reg_addr, -+ *op->op.write.ptr); -+ break; -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ: -+ result = kbase_csf_firmware_mcu_register_read(kbdev, op->op.read.reg_addr, -+ op->op.read.ptr); -+ break; -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL: -+ result = kbase_csf_firmware_mcu_register_poll(kbdev, op->op.poll.reg_addr, -+ op->op.poll.mask, op->op.poll.val); -+ break; -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND: -+ *op->op.bitw.ptr &= op->op.bitw.val; -+ result = 0; -+ break; -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR: -+ *op->op.bitw.ptr |= op->op.bitw.val; -+ result = 0; -+ break; -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR: -+ *op->op.bitw.ptr ^= op->op.bitw.val; -+ result = 0; -+ break; -+ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT: -+ *op->op.bitw.ptr = ~(*op->op.bitw.ptr); -+ result = 0; -+ break; -+ default: -+ dev_err(kbdev->dev, "Invalid operation %d", op->type); -+ break; ++ if (scheduler->state != SCHED_INACTIVE) ++ return NULL; ++ ++ match_grp = get_tock_top_group(scheduler); ++ input_grp = group ? group : match_grp; ++ ++ if (input_grp && (input_grp == match_grp)) { ++ struct kbase_csf_cmd_stream_group_info *ginfo = ++ &kbdev->csf.global_iface.groups[0]; ++ unsigned long *pending = ++ input_grp->protm_pending_bitmap; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ ++ if (kbase_csf_scheduler_protected_mode_in_use(kbdev) || ++ bitmap_empty(pending, ginfo->stream_num)) ++ input_grp = NULL; ++ ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ } else { ++ input_grp = NULL; + } + -+ return result; ++ return input_grp; +} + -+static int coresight_config_enable(struct kbase_device *kbdev, -+ struct kbase_debug_coresight_csf_config *config) ++void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group) +{ -+ int ret = 0; -+ int i; -+ -+ if (!config) -+ return -EINVAL; ++ struct kbase_device *const kbdev = group->kctx->kbdev; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + -+ if (config->state == KBASE_DEBUG_CORESIGHT_CSF_ENABLED) -+ return ret; ++ int err = kbase_reset_gpu_try_prevent(kbdev); ++ /* Regardless of whether reset failed or is currently happening, exit ++ * early ++ */ ++ if (err) ++ return; + -+ for (i = 0; config->enable_seq && !ret && i < config->enable_seq->nr_ops; i++) -+ ret = execute_op(kbdev, &config->enable_seq->ops[i]); ++ mutex_lock(&scheduler->lock); + -+ if (!ret) { -+ dev_dbg(kbdev->dev, "Coresight config (0x%pK) state transition: %s to %s", config, -+ coresight_state_to_string(config->state), -+ coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_ENABLED)); -+ config->state = KBASE_DEBUG_CORESIGHT_CSF_ENABLED; ++ if (group->run_state == KBASE_CSF_GROUP_IDLE) { ++ group->run_state = KBASE_CSF_GROUP_RUNNABLE; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, ++ group->run_state); + } ++ /* Check if the group is now eligible for execution in protected mode. */ ++ if (scheduler_get_protm_enter_async_group(kbdev, group)) ++ scheduler_group_check_protm_enter(kbdev, group); + -+ /* Always assign the return code during config enable. -+ * It gets propagated when calling config disable. -+ */ -+ config->error = ret; -+ -+ return ret; ++ mutex_unlock(&scheduler->lock); ++ kbase_reset_gpu_allow(kbdev); +} + -+static int coresight_config_disable(struct kbase_device *kbdev, -+ struct kbase_debug_coresight_csf_config *config) ++/** ++ * check_sync_update_for_on_slot_group() - Check the sync wait condition ++ * for all the queues bound to ++ * the given on-slot group. ++ * ++ * @group: Pointer to the on-slot group that requires evaluation. ++ * ++ * This function is called if the GPU is in protected mode and there are on ++ * slot idle groups with higher priority than the active protected mode group ++ * or this function is called when CQS object is signaled whilst GPU is in ++ * sleep state. ++ * This function will evaluate the sync condition, if any, of all the queues ++ * bound to the given group. ++ * ++ * Return: true if the sync condition of at least one queue has been satisfied. ++ */ ++static bool check_sync_update_for_on_slot_group( ++ struct kbase_queue_group *group) +{ -+ int ret = 0; ++ struct kbase_device *const kbdev = group->kctx->kbdev; ++ struct kbase_csf_scheduler *const scheduler = ++ &kbdev->csf.scheduler; ++ bool sync_update_done = false; + int i; + -+ if (!config) -+ return -EINVAL; ++ lockdep_assert_held(&scheduler->lock); + -+ if (config->state == KBASE_DEBUG_CORESIGHT_CSF_DISABLED) -+ return ret; ++ for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { ++ struct kbase_queue *queue = group->bound_queues[i]; + -+ for (i = 0; config->disable_seq && !ret && i < config->disable_seq->nr_ops; i++) -+ ret = execute_op(kbdev, &config->disable_seq->ops[i]); ++ if (queue && queue->enabled && !sync_update_done) { ++ struct kbase_csf_cmd_stream_group_info *const ginfo = ++ &kbdev->csf.global_iface.groups[group->csg_nr]; ++ struct kbase_csf_cmd_stream_info *const stream = ++ &ginfo->streams[queue->csi_index]; ++ u32 status = kbase_csf_firmware_cs_output( ++ stream, CS_STATUS_WAIT); ++ unsigned long flags; + -+ if (!ret) { -+ dev_dbg(kbdev->dev, "Coresight config (0x%pK) state transition: %s to %s", config, -+ coresight_state_to_string(config->state), -+ coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_DISABLED)); -+ config->state = KBASE_DEBUG_CORESIGHT_CSF_DISABLED; -+ } else { -+ /* Only assign the error if ret is not 0. -+ * As we don't want to overwrite an error from config enable -+ */ -+ if (!config->error) -+ config->error = ret; ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, ++ queue->group, queue, status); ++ ++ if (!CS_STATUS_WAIT_SYNC_WAIT_GET(status)) ++ continue; ++ ++ /* Save the information of sync object of the command ++ * queue so the callback function, 'group_sync_updated' ++ * can evaluate the sync object when it gets updated ++ * later. ++ */ ++ queue->status_wait = status; ++ queue->sync_ptr = kbase_csf_firmware_cs_output( ++ stream, CS_STATUS_WAIT_SYNC_POINTER_LO); ++ queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output( ++ stream, CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; ++ queue->sync_value = kbase_csf_firmware_cs_output( ++ stream, CS_STATUS_WAIT_SYNC_VALUE); ++ queue->blocked_reason = ++ CS_STATUS_BLOCKED_REASON_REASON_GET( ++ kbase_csf_firmware_cs_output( ++ stream, ++ CS_STATUS_BLOCKED_REASON)); ++ ++ if (!evaluate_sync_update(queue)) ++ continue; ++ ++ /* Update csg_slots_idle_mask and group's run_state */ ++ if (group->run_state != KBASE_CSF_GROUP_RUNNABLE) { ++ /* Only clear the group's idle flag if it has been dealt ++ * with by the scheduler's tick/tock action, otherwise ++ * leave it untouched. ++ */ ++ spin_lock_irqsave(&scheduler->interrupt_lock, ++ flags); ++ clear_bit((unsigned int)group->csg_nr, ++ scheduler->csg_slots_idle_mask); ++ KBASE_KTRACE_ADD_CSF_GRP( ++ kbdev, CSG_SLOT_IDLE_CLEAR, group, ++ scheduler->csg_slots_idle_mask[0]); ++ spin_unlock_irqrestore( ++ &scheduler->interrupt_lock, flags); ++ /* Request the scheduler to confirm the condition inferred ++ * here inside the protected mode. ++ */ ++ group->reevaluate_idle_status = true; ++ group->run_state = KBASE_CSF_GROUP_RUNNABLE; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, ++ group->run_state); ++ } ++ ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u); ++ sync_update_done = true; ++ } + } + -+ return ret; ++ return sync_update_done; +} + -+void *kbase_debug_coresight_csf_register(void *drv_data, -+ struct kbase_debug_coresight_csf_address_range *ranges, -+ int nr_ranges) ++/** ++ * check_sync_update_for_idle_groups_protm() - Check the sync wait condition ++ * for the idle groups on slot ++ * during protected mode. ++ * ++ * @kbdev: Pointer to the GPU device ++ * ++ * This function checks the gpu queues of all the idle groups on slot during ++ * protected mode that has a higher priority than the active protected mode ++ * group. ++ * ++ * Return: true if the sync condition of at least one queue in a group has been ++ * satisfied. ++ */ ++static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev) +{ -+ struct kbase_debug_coresight_csf_client *client, *client_entry; -+ struct kbase_device *kbdev; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ struct kbase_queue_group *protm_grp; ++ bool exit_protm = false; + unsigned long flags; -+ int k; ++ u32 num_groups; ++ u32 i; + -+ if (unlikely(!drv_data)) { -+ pr_err("NULL drv_data"); -+ return NULL; -+ } ++ lockdep_assert_held(&scheduler->lock); + -+ kbdev = (struct kbase_device *)drv_data; ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ protm_grp = scheduler->active_protm_grp; ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + -+ if (unlikely(!ranges)) { -+ dev_err(kbdev->dev, "NULL ranges"); -+ return NULL; -+ } ++ if (!protm_grp) ++ return exit_protm; + -+ if (unlikely(!nr_ranges)) { -+ dev_err(kbdev->dev, "nr_ranges is 0"); -+ return NULL; -+ } ++ num_groups = kbdev->csf.global_iface.group_num; + -+ for (k = 0; k < nr_ranges; k++) { -+ if (ranges[k].end < ranges[k].start) { -+ dev_err(kbdev->dev, "Invalid address ranges 0x%08x - 0x%08x", -+ ranges[k].start, ranges[k].end); -+ return NULL; ++ for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) { ++ struct kbase_csf_csg_slot *csg_slot = ++ &scheduler->csg_slots[i]; ++ struct kbase_queue_group *group = csg_slot->resident_group; ++ ++ if (group->scan_seq_num < protm_grp->scan_seq_num) { ++ /* If sync update has been performed for the group that ++ * has a higher priority than the protm group, then we ++ * need to exit protected mode. ++ */ ++ if (check_sync_update_for_on_slot_group(group)) ++ exit_protm = true; + } + } + -+ client = kzalloc(sizeof(struct kbase_debug_coresight_csf_client), GFP_KERNEL); ++ return exit_protm; ++} + -+ if (!client) -+ return NULL; ++static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ u32 const num_groups = kbdev->csf.global_iface.group_num; ++ u32 csg_nr; + -+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); -+ list_for_each_entry(client_entry, &kbdev->csf.coresight.clients, link) { -+ struct kbase_debug_coresight_csf_address_range *client_ranges = -+ client_entry->addr_ranges; -+ int i; ++ lockdep_assert_held(&scheduler->lock); + -+ for (i = 0; i < client_entry->nr_ranges; i++) { -+ int j; ++ for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { ++ struct kbase_queue_group *const group = ++ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; + -+ for (j = 0; j < nr_ranges; j++) { -+ if ((ranges[j].start < client_ranges[i].end) && -+ (client_ranges[i].start < ranges[j].end)) { -+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); -+ kfree(client); -+ dev_err(kbdev->dev, -+ "Client with range 0x%08x - 0x%08x already present at address range 0x%08x - 0x%08x", -+ client_ranges[i].start, client_ranges[i].end, -+ ranges[j].start, ranges[j].end); ++ if (!group) ++ continue; + -+ return NULL; -+ } -+ } ++ if (check_sync_update_for_on_slot_group(group)) { ++ scheduler_wakeup(kbdev, true); ++ return; + } + } -+ -+ client->drv_data = drv_data; -+ client->addr_ranges = ranges; -+ client->nr_ranges = nr_ranges; -+ list_add(&client->link, &kbdev->csf.coresight.clients); -+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); -+ -+ return client; +} -+EXPORT_SYMBOL(kbase_debug_coresight_csf_register); + -+void kbase_debug_coresight_csf_unregister(void *client_data) ++/** ++ * check_group_sync_update_worker() - Check the sync wait condition for all the ++ * blocked queue groups ++ * ++ * @work: Pointer to the context-specific work item for evaluating the wait ++ * condition for all the queue groups in idle_wait_groups list. ++ * ++ * This function checks the gpu queues of all the groups present in both ++ * idle_wait_groups list of a context and all on slot idle groups (if GPU ++ * is in protected mode). ++ * If the sync wait condition for at least one queue bound to the group has ++ * been satisfied then the group is moved to the per context list of ++ * runnable groups so that Scheduler can consider scheduling the group ++ * in next tick or exit protected mode. ++ */ ++static void check_group_sync_update_worker(struct work_struct *work) +{ -+ struct kbase_debug_coresight_csf_client *client; -+ struct kbase_debug_coresight_csf_config *config_entry; -+ struct kbase_device *kbdev; -+ unsigned long flags; -+ bool retry = true; -+ -+ if (unlikely(!client_data)) { -+ pr_err("NULL client"); -+ return; -+ } ++ struct kbase_context *const kctx = container_of(work, ++ struct kbase_context, csf.sched.sync_update_work); ++ struct kbase_device *const kbdev = kctx->kbdev; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ bool sync_updated = false; + -+ client = (struct kbase_debug_coresight_csf_client *)client_data; ++ mutex_lock(&scheduler->lock); + -+ kbdev = (struct kbase_device *)client->drv_data; -+ if (unlikely(!kbdev)) { -+ pr_err("NULL drv_data in client"); ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ if (unlikely(scheduler->state == SCHED_BUSY)) { ++ queue_work(kctx->csf.sched.sync_update_wq, ++ &kctx->csf.sched.sync_update_work); ++ mutex_unlock(&scheduler->lock); + return; + } ++#endif + -+ /* check for active config from client */ -+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); -+ list_del_init(&client->link); ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START, kctx, 0u); ++ if (kctx->csf.sched.num_idle_wait_grps != 0) { ++ struct kbase_queue_group *group, *temp; + -+ while (retry && !list_empty(&kbdev->csf.coresight.configs)) { -+ retry = false; -+ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { -+ if (config_entry->client == client) { -+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); -+ kbase_debug_coresight_csf_config_free(config_entry); -+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); -+ retry = true; -+ break; ++ list_for_each_entry_safe(group, temp, ++ &kctx->csf.sched.idle_wait_groups, link) { ++ if (group_sync_updated(group)) { ++ sync_updated = true; ++ /* Move this group back in to the runnable ++ * groups list of the context. ++ */ ++ update_idle_suspended_group_state(group); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u); + } + } ++ } else { ++ WARN_ON(!list_empty(&kctx->csf.sched.idle_wait_groups)); + } -+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + -+ kfree(client); ++ if (check_sync_update_for_idle_groups_protm(kbdev)) { ++ scheduler_force_protm_exit(kbdev); ++ sync_updated = true; ++ } ++ ++ /* If scheduler is in sleep or suspended state, re-activate it ++ * to serve on-slot CSGs blocked on CQS which has been signaled. ++ */ ++ if (!sync_updated && (scheduler->state == SCHED_SLEEPING)) ++ check_sync_update_in_sleep_mode(kbdev); ++ ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u); ++ ++ mutex_unlock(&scheduler->lock); +} -+EXPORT_SYMBOL(kbase_debug_coresight_csf_unregister); + -+void * -+kbase_debug_coresight_csf_config_create(void *client_data, -+ struct kbase_debug_coresight_csf_sequence *enable_seq, -+ struct kbase_debug_coresight_csf_sequence *disable_seq) ++static ++enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param) +{ -+ struct kbase_debug_coresight_csf_client *client; -+ struct kbase_debug_coresight_csf_config *config; -+ struct kbase_device *kbdev; ++ struct kbase_context *const kctx = param; + -+ if (unlikely(!client_data)) { -+ pr_err("NULL client"); -+ return NULL; -+ } ++ KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_GROUP_SYNC_UPDATE_EVENT, kctx, 0u); + -+ client = (struct kbase_debug_coresight_csf_client *)client_data; ++ queue_work(kctx->csf.sched.sync_update_wq, ++ &kctx->csf.sched.sync_update_work); + -+ kbdev = (struct kbase_device *)client->drv_data; -+ if (unlikely(!kbdev)) { -+ pr_err("NULL drv_data in client"); -+ return NULL; -+ } ++ return KBASE_CSF_EVENT_CALLBACK_KEEP; ++} + -+ if (enable_seq) { -+ if (!validate_seq(client, enable_seq)) { -+ dev_err(kbdev->dev, "Invalid enable_seq"); -+ return NULL; -+ } -+ } ++int kbase_csf_scheduler_context_init(struct kbase_context *kctx) ++{ ++ int priority; ++ int err; + -+ if (disable_seq) { -+ if (!validate_seq(client, disable_seq)) { -+ dev_err(kbdev->dev, "Invalid disable_seq"); -+ return NULL; -+ } ++ kbase_ctx_sched_init_ctx(kctx); ++ ++ for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++ ++priority) { ++ INIT_LIST_HEAD(&kctx->csf.sched.runnable_groups[priority]); + } + -+ config = kzalloc(sizeof(struct kbase_debug_coresight_csf_config), GFP_KERNEL); -+ if (WARN_ON(!client)) -+ return NULL; ++ kctx->csf.sched.num_runnable_grps = 0; ++ INIT_LIST_HEAD(&kctx->csf.sched.idle_wait_groups); ++ kctx->csf.sched.num_idle_wait_grps = 0; ++ kctx->csf.sched.ngrp_to_schedule = 0; + -+ config->client = client; -+ config->enable_seq = enable_seq; -+ config->disable_seq = disable_seq; -+ config->error = 0; -+ config->state = KBASE_DEBUG_CORESIGHT_CSF_DISABLED; ++ kctx->csf.sched.sync_update_wq = ++ alloc_ordered_workqueue("mali_kbase_csf_sync_update_wq", ++ WQ_HIGHPRI); ++ if (!kctx->csf.sched.sync_update_wq) { ++ dev_err(kctx->kbdev->dev, ++ "Failed to initialize scheduler context workqueue"); ++ err = -ENOMEM; ++ goto alloc_wq_failed; ++ } + -+ INIT_LIST_HEAD(&config->link); ++ INIT_WORK(&kctx->csf.sched.sync_update_work, ++ check_group_sync_update_worker); + -+ return config; -+} -+EXPORT_SYMBOL(kbase_debug_coresight_csf_config_create); ++ kbase_csf_tiler_heap_reclaim_ctx_init(kctx); + -+void kbase_debug_coresight_csf_config_free(void *config_data) -+{ -+ struct kbase_debug_coresight_csf_config *config; ++ err = kbase_csf_event_wait_add(kctx, check_group_sync_update_cb, kctx); + -+ if (unlikely(!config_data)) { -+ pr_err("NULL config"); -+ return; ++ if (err) { ++ dev_err(kctx->kbdev->dev, ++ "Failed to register a sync update callback"); ++ goto event_wait_add_failed; + } + -+ config = (struct kbase_debug_coresight_csf_config *)config_data; ++ return err; + -+ kbase_debug_coresight_csf_config_disable(config); ++event_wait_add_failed: ++ destroy_workqueue(kctx->csf.sched.sync_update_wq); ++alloc_wq_failed: ++ kbase_ctx_sched_remove_ctx(kctx); ++ return err; ++} + -+ kfree(config); ++void kbase_csf_scheduler_context_term(struct kbase_context *kctx) ++{ ++ kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx); ++ cancel_work_sync(&kctx->csf.sched.sync_update_work); ++ destroy_workqueue(kctx->csf.sched.sync_update_wq); ++ ++ kbase_ctx_sched_remove_ctx(kctx); +} -+EXPORT_SYMBOL(kbase_debug_coresight_csf_config_free); + -+int kbase_debug_coresight_csf_config_enable(void *config_data) ++int kbase_csf_scheduler_init(struct kbase_device *kbdev) +{ -+ struct kbase_debug_coresight_csf_config *config; -+ struct kbase_debug_coresight_csf_client *client; -+ struct kbase_device *kbdev; -+ struct kbase_debug_coresight_csf_config *config_entry; -+ unsigned long flags; -+ int ret = 0; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ u32 num_groups = kbdev->csf.global_iface.group_num; + -+ if (unlikely(!config_data)) { -+ pr_err("NULL config"); -+ return -EINVAL; ++ bitmap_zero(scheduler->csg_inuse_bitmap, num_groups); ++ bitmap_zero(scheduler->csg_slots_idle_mask, num_groups); ++ ++ scheduler->csg_slots = kcalloc(num_groups, ++ sizeof(*scheduler->csg_slots), GFP_KERNEL); ++ if (!scheduler->csg_slots) { ++ dev_err(kbdev->dev, ++ "Failed to allocate memory for csg slot status array\n"); ++ return -ENOMEM; + } + -+ config = (struct kbase_debug_coresight_csf_config *)config_data; -+ client = (struct kbase_debug_coresight_csf_client *)config->client; ++ return kbase_csf_mcu_shared_regs_data_init(kbdev); ++} + -+ if (unlikely(!client)) { -+ pr_err("NULL client in config"); -+ return -EINVAL; -+ } ++int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + -+ kbdev = (struct kbase_device *)client->drv_data; -+ if (unlikely(!kbdev)) { -+ pr_err("NULL drv_data in client"); -+ return -EINVAL; -+ } ++ scheduler->timer_enabled = true; + -+ /* Check to prevent double entry of config */ -+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); -+ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { -+ if (config_entry == config) { -+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); -+ dev_err(kbdev->dev, "Config already enabled"); -+ return -EINVAL; -+ } ++ scheduler->wq = alloc_ordered_workqueue("csf_scheduler_wq", WQ_HIGHPRI); ++ if (!scheduler->wq) { ++ dev_err(kbdev->dev, "Failed to allocate scheduler workqueue\n"); ++ return -ENOMEM; ++ } ++ scheduler->idle_wq = alloc_ordered_workqueue( ++ "csf_scheduler_gpu_idle_wq", WQ_HIGHPRI); ++ if (!scheduler->idle_wq) { ++ dev_err(kbdev->dev, ++ "Failed to allocate GPU idle scheduler workqueue\n"); ++ destroy_workqueue(kbdev->csf.scheduler.wq); ++ return -ENOMEM; + } -+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + -+ kbase_csf_scheduler_lock(kbdev); -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ INIT_WORK(&scheduler->tick_work, schedule_on_tick); ++ INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock); ++ atomic_set(&scheduler->pending_tock_work, false); + -+ /* Check the state of Scheduler to confirm the desired state of MCU */ -+ if (((kbdev->csf.scheduler.state != SCHED_SUSPENDED) && -+ (kbdev->csf.scheduler.state != SCHED_SLEEPING) && -+ !kbase_csf_scheduler_protected_mode_in_use(kbdev)) || -+ kbase_pm_get_policy(kbdev) == &kbase_pm_always_on_policy_ops) { -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+ /* Wait for MCU to reach the stable ON state */ -+ ret = kbase_pm_wait_for_desired_state(kbdev); ++ INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor); + -+ if (ret) -+ dev_err(kbdev->dev, -+ "Wait for PM state failed when enabling coresight config"); -+ else -+ ret = coresight_config_enable(kbdev, config); ++ mutex_init(&scheduler->lock); ++ spin_lock_init(&scheduler->interrupt_lock); + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ } ++ /* Internal lists */ ++ INIT_LIST_HEAD(&scheduler->runnable_kctxs); ++ INIT_LIST_HEAD(&scheduler->groups_to_schedule); ++ INIT_LIST_HEAD(&scheduler->idle_groups_to_schedule); + -+ /* Add config to next enable sequence */ -+ if (!ret) { -+ spin_lock(&kbdev->csf.coresight.lock); -+ list_add(&config->link, &kbdev->csf.coresight.configs); -+ spin_unlock(&kbdev->csf.coresight.lock); -+ } ++ BUILD_BUG_ON(MAX_SUPPORTED_CSGS > ++ (sizeof(scheduler->csgs_events_enable_mask) * BITS_PER_BYTE)); ++ bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS); ++ scheduler->state = SCHED_SUSPENDED; ++ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); ++ scheduler->pm_active_count = 0; ++ scheduler->ngrp_to_schedule = 0; ++ scheduler->total_runnable_grps = 0; ++ scheduler->top_ctx = NULL; ++ scheduler->top_grp = NULL; ++ scheduler->last_schedule = 0; ++ scheduler->active_protm_grp = NULL; ++ scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS; ++ scheduler_doorbell_init(kbdev); + -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+ kbase_csf_scheduler_unlock(kbdev); ++ INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker); ++ scheduler->fast_gpu_idle_handling = false; ++ atomic_set(&scheduler->gpu_no_longer_idle, false); ++ atomic_set(&scheduler->non_idle_offslot_grps, 0); + -+ return ret; ++ hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ scheduler->tick_timer.function = tick_timer_callback; ++ scheduler->tick_timer_active = false; ++ ++ kbase_csf_tiler_heap_reclaim_mgr_init(kbdev); ++ ++ return 0; +} -+EXPORT_SYMBOL(kbase_debug_coresight_csf_config_enable); + -+int kbase_debug_coresight_csf_config_disable(void *config_data) ++void kbase_csf_scheduler_term(struct kbase_device *kbdev) +{ -+ struct kbase_debug_coresight_csf_config *config; -+ struct kbase_debug_coresight_csf_client *client; -+ struct kbase_device *kbdev; -+ struct kbase_debug_coresight_csf_config *config_entry; -+ bool found_in_list = false; -+ unsigned long flags; -+ int ret = 0; ++ if (kbdev->csf.scheduler.csg_slots) { ++ WARN_ON(atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps)); ++ /* The unload of Driver can take place only when all contexts have ++ * been terminated. The groups that were not terminated by the User ++ * are terminated on context termination. So no CSGs are expected ++ * to be active at the time of Driver unload. ++ */ ++ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev)); ++ flush_work(&kbdev->csf.scheduler.gpu_idle_work); ++ mutex_lock(&kbdev->csf.scheduler.lock); + -+ if (unlikely(!config_data)) { -+ pr_err("NULL config"); -+ return -EINVAL; ++ if (kbdev->csf.scheduler.state != SCHED_SUSPENDED) { ++ unsigned long flags; ++ /* The power policy could prevent the Scheduler from ++ * getting suspended when GPU becomes idle. ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ WARN_ON(kbase_pm_idle_groups_sched_suspendable(kbdev)); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ scheduler_suspend(kbdev); ++ } ++ ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++ cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work); ++ cancel_tick_timer(kbdev); ++ cancel_tick_work(&kbdev->csf.scheduler); ++ cancel_tock_work(&kbdev->csf.scheduler); ++ kfree(kbdev->csf.scheduler.csg_slots); ++ kbdev->csf.scheduler.csg_slots = NULL; + } ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_TERMINATED, NULL, ++ kbase_csf_scheduler_get_nr_active_csgs(kbdev)); ++ /* Terminating the MCU shared regions, following the release of slots */ ++ kbase_csf_mcu_shared_regs_data_term(kbdev); ++} + -+ config = (struct kbase_debug_coresight_csf_config *)config_data; ++void kbase_csf_scheduler_early_term(struct kbase_device *kbdev) ++{ ++ if (kbdev->csf.scheduler.idle_wq) ++ destroy_workqueue(kbdev->csf.scheduler.idle_wq); ++ if (kbdev->csf.scheduler.wq) ++ destroy_workqueue(kbdev->csf.scheduler.wq); + -+ /* Exit early if not enabled prior */ -+ if (list_empty(&config->link)) -+ return ret; ++ kbase_csf_tiler_heap_reclaim_mgr_term(kbdev); ++ mutex_destroy(&kbdev->csf.scheduler.lock); ++} + -+ client = (struct kbase_debug_coresight_csf_client *)config->client; ++/** ++ * scheduler_enable_tick_timer_nolock - Enable the scheduler tick timer. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function will restart the scheduler tick so that regular scheduling can ++ * be resumed without any explicit trigger (like kicking of GPU queues). This ++ * is a variant of kbase_csf_scheduler_enable_tick_timer() that assumes the ++ * CSF scheduler lock to already have been held. ++ */ ++static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + -+ if (unlikely(!client)) { -+ pr_err("NULL client in config"); -+ return -EINVAL; -+ } ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+ kbdev = (struct kbase_device *)client->drv_data; -+ if (unlikely(!kbdev)) { -+ pr_err("NULL drv_data in client"); -+ return -EINVAL; -+ } ++ if (unlikely(!scheduler_timer_is_enabled_nolock(kbdev))) ++ return; + -+ /* Check if the config is in the correct list */ -+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); -+ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { -+ if (config_entry == config) { -+ found_in_list = true; -+ break; -+ } -+ } -+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); ++ WARN_ON((scheduler->state != SCHED_INACTIVE) && ++ (scheduler->state != SCHED_SUSPENDED) && ++ (scheduler->state != SCHED_SLEEPING)); + -+ if (!found_in_list) { -+ dev_err(kbdev->dev, "Config looks corrupted"); -+ return -EINVAL; ++ if (scheduler->total_runnable_grps > 0) { ++ enqueue_tick_work(kbdev); ++ dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n"); ++ } else if (scheduler->state != SCHED_SUSPENDED) { ++ enqueue_gpu_idle_work(scheduler); + } ++} + -+ kbase_csf_scheduler_lock(kbdev); -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev) ++{ ++ mutex_lock(&kbdev->csf.scheduler.lock); ++ scheduler_enable_tick_timer_nolock(kbdev); ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++} + -+ /* Check the state of Scheduler to confirm the desired state of MCU */ -+ if (((kbdev->csf.scheduler.state != SCHED_SUSPENDED) && -+ (kbdev->csf.scheduler.state != SCHED_SLEEPING) && -+ !kbase_csf_scheduler_protected_mode_in_use(kbdev)) || -+ kbase_pm_get_policy(kbdev) == &kbase_pm_always_on_policy_ops) { -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+ /* Wait for MCU to reach the stable ON state */ -+ ret = kbase_pm_wait_for_desired_state(kbdev); ++bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ bool enabled; + -+ if (ret) -+ dev_err(kbdev->dev, -+ "Wait for PM state failed when disabling coresight config"); -+ else -+ ret = coresight_config_disable(kbdev, config); ++ mutex_lock(&scheduler->lock); ++ enabled = scheduler_timer_is_enabled_nolock(kbdev); ++ mutex_unlock(&scheduler->lock); + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ } else if (kbdev->pm.backend.mcu_state == KBASE_MCU_OFF) { -+ /* MCU is OFF, so the disable sequence was already executed. -+ * -+ * Propagate any error that would have occurred during the enable -+ * or disable sequence. -+ * -+ * This is done as part of the disable sequence, since the call from -+ * client is synchronous. ++ return enabled; ++} ++ ++void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev, ++ bool enable) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ bool currently_enabled; ++ ++ mutex_lock(&scheduler->lock); ++ ++ currently_enabled = scheduler_timer_is_enabled_nolock(kbdev); ++ if (currently_enabled && !enable) { ++ scheduler->timer_enabled = false; ++ cancel_tick_timer(kbdev); ++ mutex_unlock(&scheduler->lock); ++ /* The non-sync version to cancel the normal work item is not ++ * available, so need to drop the lock before cancellation. + */ -+ ret = config->error; ++ cancel_tick_work(scheduler); ++ cancel_tock_work(scheduler); ++ return; + } + -+ /* Remove config from next disable sequence */ -+ spin_lock(&kbdev->csf.coresight.lock); -+ list_del_init(&config->link); -+ spin_unlock(&kbdev->csf.coresight.lock); ++ if (!currently_enabled && enable) { ++ scheduler->timer_enabled = true; + -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); -+ kbase_csf_scheduler_unlock(kbdev); ++ scheduler_enable_tick_timer_nolock(kbdev); ++ } + -+ return ret; ++ mutex_unlock(&scheduler->lock); +} -+EXPORT_SYMBOL(kbase_debug_coresight_csf_config_disable); + -+static void coresight_config_enable_all(struct work_struct *data) ++void kbase_csf_scheduler_kick(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev = -+ container_of(data, struct kbase_device, csf.coresight.enable_work); -+ struct kbase_debug_coresight_csf_config *config_entry; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + -+ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { -+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); -+ if (coresight_config_enable(kbdev, config_entry)) -+ dev_err(kbdev->dev, "enable config (0x%pK) failed", config_entry); -+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); -+ } ++ mutex_lock(&scheduler->lock); + -+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); ++ if (unlikely(scheduler_timer_is_enabled_nolock(kbdev))) ++ goto out; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_pm_update_state(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (scheduler->total_runnable_grps > 0) { ++ enqueue_tick_work(kbdev); ++ dev_dbg(kbdev->dev, "Kicking the scheduler manually\n"); ++ } + -+ wake_up_all(&kbdev->csf.coresight.event_wait); ++out: ++ mutex_unlock(&scheduler->lock); +} + -+static void coresight_config_disable_all(struct work_struct *data) ++int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev = -+ container_of(data, struct kbase_device, csf.coresight.disable_work); -+ struct kbase_debug_coresight_csf_config *config_entry; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ int result = 0; + -+ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { -+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); -+ if (coresight_config_disable(kbdev, config_entry)) -+ dev_err(kbdev->dev, "disable config (0x%pK) failed", config_entry); -+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); -+ } ++ lockdep_assert_held(&scheduler->lock); + -+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ if (unlikely(scheduler->state == SCHED_BUSY)) ++ return -EBUSY; ++#endif + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_pm_update_state(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++#ifdef KBASE_PM_RUNTIME ++ /* If scheduler is in sleeping state, then MCU needs to be activated ++ * to suspend CSGs. ++ */ ++ if (scheduler->state == SCHED_SLEEPING) { ++ dev_info(kbdev->dev, "Activating MCU out of sleep on system suspend"); ++ result = force_scheduler_to_exit_sleep(kbdev); ++ if (result) { ++ dev_warn(kbdev->dev, "Scheduler failed to exit from sleep"); ++ goto exit; ++ } ++ } ++#endif ++ if (scheduler->state != SCHED_SUSPENDED) { ++ result = suspend_active_groups_on_powerdown(kbdev, true); ++ if (result) { ++ dev_warn(kbdev->dev, "failed to suspend active groups"); ++ goto exit; ++ } else { ++ dev_info(kbdev->dev, "Scheduler PM suspend"); ++ scheduler_suspend(kbdev); ++ cancel_tick_timer(kbdev); ++ } ++ } + -+ wake_up_all(&kbdev->csf.coresight.event_wait); ++exit: ++ return result; +} + -+void kbase_debug_coresight_csf_disable_pmode_enter(struct kbase_device *kbdev) ++int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev) +{ -+ unsigned long flags; -+ -+ dev_dbg(kbdev->dev, "Coresight state %s before protected mode enter", -+ coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_ENABLED)); ++ int result = 0; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + -+ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ /* Cancel any potential queued delayed work(s) */ ++ cancel_tick_work(scheduler); ++ cancel_tock_work(scheduler); + -+ kbase_pm_lock(kbdev); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ result = kbase_reset_gpu_prevent_and_wait(kbdev); ++ if (result) { ++ dev_warn(kbdev->dev, "Stop PM suspending for failing to prevent gpu reset.\n"); ++ return result; ++ } + -+ kbdev->csf.coresight.disable_on_pmode_enter = true; -+ kbdev->csf.coresight.enable_on_pmode_exit = false; -+ kbase_pm_update_state(kbdev); ++ mutex_lock(&scheduler->lock); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ result = kbase_csf_scheduler_pm_suspend_no_lock(kbdev); ++ mutex_unlock(&scheduler->lock); + -+ kbase_pm_wait_for_desired_state(kbdev); ++ kbase_reset_gpu_allow(kbdev); + -+ kbase_pm_unlock(kbdev); ++ return result; +} ++KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_suspend); + -+void kbase_debug_coresight_csf_enable_pmode_exit(struct kbase_device *kbdev) ++void kbase_csf_scheduler_pm_resume_no_lock(struct kbase_device *kbdev) +{ -+ dev_dbg(kbdev->dev, "Coresight state %s after protected mode exit", -+ coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_DISABLED)); -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ WARN_ON(kbdev->csf.coresight.disable_on_pmode_enter); ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + -+ kbdev->csf.coresight.enable_on_pmode_exit = true; -+ kbase_pm_update_state(kbdev); ++ lockdep_assert_held(&scheduler->lock); ++ if ((scheduler->total_runnable_grps > 0) && ++ (scheduler->state == SCHED_SUSPENDED)) { ++ dev_info(kbdev->dev, "Scheduler PM resume"); ++ scheduler_wakeup(kbdev, true); ++ } +} + -+void kbase_debug_coresight_csf_state_request(struct kbase_device *kbdev, -+ enum kbase_debug_coresight_csf_state state) ++void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev) +{ -+ if (unlikely(!kbdev)) -+ return; ++ mutex_lock(&kbdev->csf.scheduler.lock); + -+ if (unlikely(!kbdev->csf.coresight.workq)) -+ return; ++ kbase_csf_scheduler_pm_resume_no_lock(kbdev); ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++} ++KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_resume); + -+ dev_dbg(kbdev->dev, "Coresight state %s requested", coresight_state_to_string(state)); ++void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev) ++{ ++ /* Here the lock is taken to synchronize against the runtime suspend ++ * callback function, which may need to wake up the MCU for suspending ++ * the CSGs before powering down the GPU. ++ */ ++ mutex_lock(&kbdev->csf.scheduler.lock); ++ scheduler_pm_active_handle_suspend(kbdev, ++ KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++} ++KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_active); + -+ switch (state) { -+ case KBASE_DEBUG_CORESIGHT_CSF_DISABLED: -+ queue_work(kbdev->csf.coresight.workq, &kbdev->csf.coresight.disable_work); -+ break; -+ case KBASE_DEBUG_CORESIGHT_CSF_ENABLED: -+ queue_work(kbdev->csf.coresight.workq, &kbdev->csf.coresight.enable_work); -+ break; -+ default: -+ dev_err(kbdev->dev, "Invalid Coresight state %d", state); -+ break; -+ } ++void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev) ++{ ++ /* Here the lock is taken just to maintain symmetry with ++ * kbase_csf_scheduler_pm_active(). ++ */ ++ mutex_lock(&kbdev->csf.scheduler.lock); ++ scheduler_pm_idle(kbdev); ++ mutex_unlock(&kbdev->csf.scheduler.lock); +} ++KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle); + -+bool kbase_debug_coresight_csf_state_check(struct kbase_device *kbdev, -+ enum kbase_debug_coresight_csf_state state) ++int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev) +{ -+ struct kbase_debug_coresight_csf_config *config_entry; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + unsigned long flags; -+ bool success = true; ++ int err; + -+ dev_dbg(kbdev->dev, "Coresight check for state: %s", coresight_state_to_string(state)); ++ kbase_pm_lock(kbdev); ++ WARN_ON(!kbdev->pm.active_count); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ WARN_ON(!scheduler->pm_active_count); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ kbase_pm_unlock(kbdev); + -+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); ++ kbase_pm_wait_for_poweroff_work_complete(kbdev); + -+ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { -+ if (state != config_entry->state) { -+ success = false; -+ break; -+ } ++ err = kbase_pm_wait_for_desired_state(kbdev); ++ if (!err) { ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_ON); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + -+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); -+ -+ return success; ++ return err; +} -+KBASE_EXPORT_TEST_API(kbase_debug_coresight_csf_state_check); ++KBASE_EXPORT_TEST_API(kbase_csf_scheduler_wait_mcu_active); + -+bool kbase_debug_coresight_csf_state_wait(struct kbase_device *kbdev, -+ enum kbase_debug_coresight_csf_state state) ++#ifdef KBASE_PM_RUNTIME ++int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev) +{ -+ const long wait_timeout = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); -+ struct kbase_debug_coresight_csf_config *config_entry, *next_config_entry; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + unsigned long flags; -+ bool success = true; -+ -+ dev_dbg(kbdev->dev, "Coresight wait for state: %s", coresight_state_to_string(state)); -+ -+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); ++ int ret; + -+ list_for_each_entry_safe(config_entry, next_config_entry, &kbdev->csf.coresight.configs, -+ link) { -+ const enum kbase_debug_coresight_csf_state prev_state = config_entry->state; -+ long remaining; ++ dev_dbg(kbdev->dev, "Handling runtime suspend"); + -+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); -+ remaining = wait_event_timeout(kbdev->csf.coresight.event_wait, -+ state == config_entry->state, wait_timeout); -+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); ++ kbase_reset_gpu_assert_prevented(kbdev); ++ lockdep_assert_held(&scheduler->lock); ++ WARN_ON(scheduler->pm_active_count); + -+ if (!remaining) { -+ success = false; -+ dev_err(kbdev->dev, -+ "Timeout waiting for Coresight state transition %s to %s", -+ coresight_state_to_string(prev_state), -+ coresight_state_to_string(state)); -+ } ++ if (scheduler->state == SCHED_SUSPENDED) { ++ WARN_ON(kbdev->pm.backend.gpu_sleep_mode_active); ++ return 0; + } + -+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); ++ ret = suspend_active_groups_on_powerdown(kbdev, false); + -+ return success; -+} -+KBASE_EXPORT_TEST_API(kbase_debug_coresight_csf_state_wait); ++ if (ret) { ++ dev_dbg(kbdev->dev, "Aborting runtime suspend (grps: %d)", ++ atomic_read(&scheduler->non_idle_offslot_grps)); + -+int kbase_debug_coresight_csf_init(struct kbase_device *kbdev) -+{ -+ kbdev->csf.coresight.workq = alloc_ordered_workqueue("Mali CoreSight workqueue", 0); -+ if (kbdev->csf.coresight.workq == NULL) -+ return -ENOMEM; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->pm.backend.exit_gpu_sleep_mode = true; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ INIT_LIST_HEAD(&kbdev->csf.coresight.clients); -+ INIT_LIST_HEAD(&kbdev->csf.coresight.configs); -+ INIT_WORK(&kbdev->csf.coresight.enable_work, coresight_config_enable_all); -+ INIT_WORK(&kbdev->csf.coresight.disable_work, coresight_config_disable_all); -+ init_waitqueue_head(&kbdev->csf.coresight.event_wait); -+ spin_lock_init(&kbdev->csf.coresight.lock); ++ kbase_csf_scheduler_invoke_tick(kbdev); ++ return ret; ++ } + -+ kbdev->csf.coresight.disable_on_pmode_enter = false; -+ kbdev->csf.coresight.enable_on_pmode_exit = false; ++ scheduler->state = SCHED_SUSPENDED; ++ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->pm.backend.gpu_sleep_mode_active = false; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + ++ wake_up_all(&kbdev->csf.event_wait); + return 0; +} + -+void kbase_debug_coresight_csf_term(struct kbase_device *kbdev) ++void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev) +{ -+ struct kbase_debug_coresight_csf_client *client_entry, *next_client_entry; -+ struct kbase_debug_coresight_csf_config *config_entry, *next_config_entry; -+ unsigned long flags; ++ u32 csg_nr; + -+ kbdev->csf.coresight.disable_on_pmode_enter = false; -+ kbdev->csf.coresight.enable_on_pmode_exit = false; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ cancel_work_sync(&kbdev->csf.coresight.enable_work); -+ cancel_work_sync(&kbdev->csf.coresight.disable_work); -+ destroy_workqueue(kbdev->csf.coresight.workq); -+ kbdev->csf.coresight.workq = NULL; ++ WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP); + -+ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); ++ for (csg_nr = 0; csg_nr < kbdev->csf.global_iface.group_num; csg_nr++) { ++ struct kbase_csf_cmd_stream_group_info *ginfo = ++ &kbdev->csf.global_iface.groups[csg_nr]; ++ bool csg_idle; + -+ list_for_each_entry_safe(config_entry, next_config_entry, &kbdev->csf.coresight.configs, -+ link) { -+ list_del_init(&config_entry->link); -+ kfree(config_entry); -+ } ++ if (!kbdev->csf.scheduler.csg_slots[csg_nr].resident_group) ++ continue; + -+ list_for_each_entry_safe(client_entry, next_client_entry, &kbdev->csf.coresight.clients, -+ link) { -+ list_del_init(&client_entry->link); -+ kfree(client_entry); ++ csg_idle = ++ kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & ++ CSG_STATUS_STATE_IDLE_MASK; ++ if (!csg_idle) { ++ dev_dbg(kbdev->dev, ++ "Re-activate Scheduler after MCU sleep"); ++ kbdev->pm.backend.exit_gpu_sleep_mode = true; ++ kbase_csf_scheduler_invoke_tick(kbdev); ++ break; ++ } + } ++} + -+ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); ++void kbase_csf_scheduler_force_sleep(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ ++ mutex_lock(&scheduler->lock); ++ if (kbase_pm_gpu_sleep_allowed(kbdev) && ++ (scheduler->state == SCHED_INACTIVE)) ++ scheduler_sleep_on_idle(kbdev); ++ mutex_unlock(&scheduler->lock); +} -diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_internal_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_internal_csf.h ++#endif ++ ++void kbase_csf_scheduler_force_wakeup(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ ++ mutex_lock(&scheduler->lock); ++ scheduler_wakeup(kbdev, true); ++ mutex_unlock(&scheduler->lock); ++} +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h new file mode 100644 -index 000000000..06d62dc70 +index 000000000..d22d7c8b9 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_internal_csf.h -@@ -0,0 +1,182 @@ ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h +@@ -0,0 +1,681 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -174578,717 +176935,676 @@ index 000000000..06d62dc70 + * + */ + -+#ifndef _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_ -+#define _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_ ++#ifndef _KBASE_CSF_SCHEDULER_H_ ++#define _KBASE_CSF_SCHEDULER_H_ + -+#include -+#include ++#include "mali_kbase_csf.h" ++#include "mali_kbase_csf_event.h" + +/** -+ * struct kbase_debug_coresight_csf_client - Coresight client definition ++ * kbase_csf_scheduler_queue_start() - Enable the running of GPU command queue ++ * on firmware. + * -+ * @drv_data: Pointer to driver device data. -+ * @addr_ranges: Arrays of address ranges used by the registered client. -+ * @nr_ranges: Size of @addr_ranges array. -+ * @link: Link item of a Coresight client. -+ * Linked to &struct_kbase_device.csf.coresight.clients. ++ * @queue: Pointer to the GPU command queue to be started. ++ * ++ * This function would enable the start of a CSI, within a ++ * CSG, to which the @queue was bound. ++ * If the CSG is already scheduled and resident, the CSI will be started ++ * right away, otherwise once the group is made resident. ++ * ++ * Return: 0 on success, or negative on failure. -EBUSY is returned to ++ * indicate to the caller that queue could not be enabled due to Scheduler ++ * state and the caller can try to enable the queue after sometime. + */ -+struct kbase_debug_coresight_csf_client { -+ void *drv_data; -+ struct kbase_debug_coresight_csf_address_range *addr_ranges; -+ u32 nr_ranges; -+ struct list_head link; -+}; ++int kbase_csf_scheduler_queue_start(struct kbase_queue *queue); + +/** -+ * enum kbase_debug_coresight_csf_state - Coresight configuration states ++ * kbase_csf_scheduler_queue_stop() - Disable the running of GPU command queue ++ * on firmware. + * -+ * @KBASE_DEBUG_CORESIGHT_CSF_DISABLED: Coresight configuration is disabled. -+ * @KBASE_DEBUG_CORESIGHT_CSF_ENABLED: Coresight configuration is enabled. ++ * @queue: Pointer to the GPU command queue to be stopped. ++ * ++ * This function would stop the CSI, within a CSG, to which @queue was bound. ++ * ++ * Return: 0 on success, or negative on failure. + */ -+enum kbase_debug_coresight_csf_state { -+ KBASE_DEBUG_CORESIGHT_CSF_DISABLED = 0, -+ KBASE_DEBUG_CORESIGHT_CSF_ENABLED, -+}; ++int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue); + +/** -+ * struct kbase_debug_coresight_csf_config - Coresight configuration definition ++ * kbase_csf_scheduler_group_protm_enter - Handle the protm enter event for the ++ * GPU command queue group. + * -+ * @client: Pointer to the client for which the configuration is created. -+ * @enable_seq: Array of operations for Coresight client enable sequence. Can be NULL. -+ * @disable_seq: Array of operations for Coresight client disable sequence. Can be NULL. -+ * @state: Current Coresight configuration state. -+ * @error: Error code used to know if an error occurred during the execution -+ * of the enable or disable sequences. -+ * @link: Link item of a Coresight configuration. -+ * Linked to &struct_kbase_device.csf.coresight.configs. ++ * @group: The command queue group. ++ * ++ * This function could request the firmware to enter the protected mode ++ * and allow the execution of protected region instructions for all the ++ * bound queues of the group that have protm pending bit set in their ++ * respective CS_ACK register. + */ -+struct kbase_debug_coresight_csf_config { -+ void *client; -+ struct kbase_debug_coresight_csf_sequence *enable_seq; -+ struct kbase_debug_coresight_csf_sequence *disable_seq; -+ enum kbase_debug_coresight_csf_state state; -+ int error; -+ struct list_head link; -+}; ++void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group); + +/** -+ * struct kbase_debug_coresight_device - Object representing the Coresight device ++ * kbase_csf_scheduler_group_get_slot() - Checks if a queue group is ++ * programmed on a firmware CSG slot ++ * and returns the slot number. + * -+ * @clients: List head to maintain Coresight clients. -+ * @configs: List head to maintain Coresight configs. -+ * @lock: A lock to protect client/config lists. -+ * Lists can be accessed concurrently by -+ * Coresight kernel modules and kernel threads. -+ * @workq: Work queue for Coresight enable/disable execution. -+ * @enable_work: Work item used to enable Coresight. -+ * @disable_work: Work item used to disable Coresight. -+ * @event_wait: Wait queue for Coresight events. -+ * @enable_on_pmode_exit: Flag used by the PM state machine to -+ * identify if Coresight enable is needed. -+ * @disable_on_pmode_enter: Flag used by the PM state machine to -+ * identify if Coresight disable is needed. ++ * @group: The command queue group. ++ * ++ * Return: The slot number, if the group is programmed on a slot. ++ * Otherwise returns a negative number. ++ * ++ * Note: This function should not be used if the interrupt_lock is held. Use ++ * kbase_csf_scheduler_group_get_slot_locked() instead. + */ -+struct kbase_debug_coresight_device { -+ struct list_head clients; -+ struct list_head configs; -+ spinlock_t lock; -+ struct workqueue_struct *workq; -+ struct work_struct enable_work; -+ struct work_struct disable_work; -+ wait_queue_head_t event_wait; -+ bool enable_on_pmode_exit; -+ bool disable_on_pmode_enter; -+}; ++int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group); + +/** -+ * kbase_debug_coresight_csf_init - Initialize Coresight resources. ++ * kbase_csf_scheduler_group_get_slot_locked() - Checks if a queue group is ++ * programmed on a firmware CSG slot ++ * and returns the slot number. + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @group: The command queue group. + * -+ * This function should be called once at device initialization. ++ * Return: The slot number, if the group is programmed on a slot. ++ * Otherwise returns a negative number. + * -+ * Return: 0 on success. ++ * Note: Caller must hold the interrupt_lock. + */ -+int kbase_debug_coresight_csf_init(struct kbase_device *kbdev); ++int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group); + +/** -+ * kbase_debug_coresight_csf_term - Terminate Coresight resources. ++ * kbase_csf_scheduler_group_events_enabled() - Checks if interrupt events ++ * should be handled for a queue group. + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @kbdev: The device of the group. ++ * @group: The queue group. + * -+ * This function should be called at device termination to prevent any -+ * memory leaks if Coresight module would have been removed without calling -+ * kbasep_debug_coresight_csf_trace_disable(). ++ * Return: true if interrupt events should be handled. ++ * ++ * Note: Caller must hold the interrupt_lock. + */ -+void kbase_debug_coresight_csf_term(struct kbase_device *kbdev); ++bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev, ++ struct kbase_queue_group *group); + +/** -+ * kbase_debug_coresight_csf_disable_pmode_enter - Disable Coresight on Protected -+ * mode enter. ++ * kbase_csf_scheduler_get_group_on_slot()- Gets the queue group that has been ++ * programmed to a firmware CSG slot. + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @kbdev: The GPU device. ++ * @slot: The slot for which to get the queue group. + * -+ * This function should be called just before requesting to enter protected mode. -+ * It will trigger a PM state machine transition from MCU_ON -+ * to ON_PMODE_ENTER_CORESIGHT_DISABLE. ++ * Return: Pointer to the programmed queue group. ++ * ++ * Note: Caller must hold the interrupt_lock. + */ -+void kbase_debug_coresight_csf_disable_pmode_enter(struct kbase_device *kbdev); ++struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot( ++ struct kbase_device *kbdev, int slot); + +/** -+ * kbase_debug_coresight_csf_enable_pmode_exit - Enable Coresight on Protected -+ * mode enter. ++ * kbase_csf_scheduler_group_deschedule() - Deschedule a GPU command queue ++ * group from the firmware. + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @group: Pointer to the queue group to be descheduled. + * -+ * This function should be called after protected mode exit is acknowledged. -+ * It will trigger a PM state machine transition from MCU_ON -+ * to ON_PMODE_EXIT_CORESIGHT_ENABLE. ++ * This function would disable the scheduling of GPU command queue group on ++ * firmware. + */ -+void kbase_debug_coresight_csf_enable_pmode_exit(struct kbase_device *kbdev); ++void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group); + +/** -+ * kbase_debug_coresight_csf_state_request - Request Coresight state transition. ++ * kbase_csf_scheduler_evict_ctx_slots() - Evict all GPU command queue groups ++ * of a given context that are active ++ * running from the firmware. + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @state: Coresight state to check for. ++ * @kbdev: The GPU device. ++ * @kctx: Kbase context for the evict operation. ++ * @evicted_groups: List_head for returning evicted active queue groups. ++ * ++ * This function would disable the scheduling of GPU command queue groups active ++ * on firmware slots from the given Kbase context. The affected groups are ++ * added to the supplied list_head argument. + */ -+void kbase_debug_coresight_csf_state_request(struct kbase_device *kbdev, -+ enum kbase_debug_coresight_csf_state state); ++void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, ++ struct kbase_context *kctx, struct list_head *evicted_groups); + +/** -+ * kbase_debug_coresight_csf_state_check - Check Coresight state. ++ * kbase_csf_scheduler_context_init() - Initialize the context-specific part ++ * for CSF scheduler. + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @state: Coresight state to check for. ++ * @kctx: Pointer to kbase context that is being created. + * -+ * Return: true if all states of configs are @state. ++ * This function must be called during Kbase context creation. ++ * ++ * Return: 0 on success, or negative on failure. + */ -+bool kbase_debug_coresight_csf_state_check(struct kbase_device *kbdev, -+ enum kbase_debug_coresight_csf_state state); ++int kbase_csf_scheduler_context_init(struct kbase_context *kctx); + +/** -+ * kbase_debug_coresight_csf_state_wait - Wait for Coresight state transition to complete. ++ * kbase_csf_scheduler_init - Initialize the CSF scheduler + * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. -+ * @state: Coresight state to wait for. ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * -+ * Return: true if all configs become @state in pre-defined time period. ++ * The scheduler does the arbitration for the CSG slots ++ * provided by the firmware between the GPU command queue groups created ++ * by the Clients. ++ * This function must be called after loading firmware and parsing its capabilities. ++ * ++ * Return: 0 on success, or negative on failure. + */ -+bool kbase_debug_coresight_csf_state_wait(struct kbase_device *kbdev, -+ enum kbase_debug_coresight_csf_state state); ++int kbase_csf_scheduler_init(struct kbase_device *kbdev); + -+#endif /* _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_ */ -diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h -new file mode 100644 -index 000000000..41b2b00f1 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h -@@ -0,0 +1,322 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* ++/** ++ * kbase_csf_scheduler_early_init - Early initialization for the CSF scheduler + * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * Initialize necessary resources such as locks, workqueue for CSF scheduler. ++ * This must be called at kbase probe. + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_scheduler_early_init(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_context_term() - Terminate the context-specific part ++ * for CSF scheduler. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * @kctx: Pointer to kbase context that is being terminated. + * ++ * This function must be called during Kbase context termination. + */ ++void kbase_csf_scheduler_context_term(struct kbase_context *kctx); + -+/* -+ * ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** -+ * ***** DO NOT INCLUDE DIRECTLY ***** -+ * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** ++/** ++ * kbase_csf_scheduler_term - Terminate the CSF scheduler. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This should be called when unload of firmware is done on device ++ * termination. + */ ++void kbase_csf_scheduler_term(struct kbase_device *kbdev); + -+/* -+ * The purpose of this header file is just to contain a list of trace code -+ * identifiers ++/** ++ * kbase_csf_scheduler_early_term - Early termination of the CSF scheduler. + * -+ * When updating this file, also remember to update -+ * mali_kbase_debug_linux_ktrace_csf.h ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * -+ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THAT -+ * DESCRIBED IN mali_kbase_debug_ktrace_codes.h ++ * This should be called only when kbase probe fails or gets rmmoded. + */ ++void kbase_csf_scheduler_early_term(struct kbase_device *kbdev); + -+#if 0 /* Dummy section to avoid breaking formatting */ -+int dummy_array[] = { -+#endif -+ /* -+ * Generic CSF events -+ */ -+ /* info_val = 0 */ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_START), -+ /* info_val == number of CSGs supported */ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_END), -+ /* info_val[0:7] == fw version_minor -+ * info_val[15:8] == fw version_major -+ * info_val[63:32] == fw version_hash -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_BOOT), -+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_REBOOT), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_INVOKE), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_INVOKE), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_START), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_END), -+ /* info_val == total number of runnable groups across all kctxs */ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_START), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_END), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET_START), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET_END), -+ /* info_val = timeout in ms */ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_WAIT_QUIT_START), -+ /* info_val = remaining ms timeout, or 0 if timedout */ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_WAIT_QUIT_END), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GROUP_SYNC_UPDATE_EVENT), -+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_SYNC_UPDATE_NOTIFY_GPU_EVENT), -+ -+ /* info_val = JOB_IRQ_STATUS */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT_START), -+ /* info_val = JOB_IRQ_STATUS */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT_END), -+ /* info_val = JOB_IRQ_STATUS */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_PROCESS_START), -+ /* info_val = GLB_REQ ^ GLB_ACQ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT_GLB_REQ_ACK), -+ /* info_val[31:0] = num non idle offslot groups -+ * info_val[32] = scheduler can suspend on idle -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_ADVANCE), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_NOADVANCE), -+ /* kctx is added to the back of the list */ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_INSERT), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_REMOVE), -+ /* kctx is moved to the back of the list */ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_ROTATE), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_HEAD), -+ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_START), -+ /* 4-bit encoding of boolean values (ease of reading as hex values) -+ * -+ * info_val[3:0] = was reset active/failed to be prevented -+ * info_val[7:4] = whether scheduler was both idle and suspendable -+ * info_val[11:8] = whether all groups were suspended -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_END), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END), -+ -+ /* info_val = bitmask of slots that gave an ACK for STATUS_UPDATE */ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_UPDATE_IDLE_SLOTS_ACK), -+ -+ /* info_val[63:0] = GPU cycle counter, used mainly for benchmarking -+ * purpose. -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_HANDLING_START), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_HANDLING_END), -+ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_MCU_HALTED), -+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_MCU_SLEEP), -+ -+ /* -+ * Group events -+ */ -+ /* info_val[2:0] == CSG_REQ state issued -+ * info_val[19:16] == as_nr -+ * info_val[63:32] == endpoint config (max number of endpoints allowed) -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_START_REQ), -+ /* info_val == CSG_REQ state issued */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STOP_REQ), -+ /* info_val == CSG_ACK state */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_RUNNING), -+ /* info_val == CSG_ACK state */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STOPPED), -+ /* info_val == slot cleaned */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_CLEANED), -+ /* info_val = slot requesting STATUS_UPDATE */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_UPDATE_IDLE_SLOT_REQ), -+ /* info_val = scheduler's new csg_slots_idle_mask[0] -+ * group->csg_nr indicates which bit was set -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_IDLE_SET), -+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_NO_NON_IDLE_GROUPS), -+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_NON_IDLE_GROUPS), -+ /* info_val = scheduler's new csg_slots_idle_mask[0] -+ * group->csg_nr indicates which bit was cleared -+ * -+ * in case of no group, multiple bits may have been updated -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_IDLE_CLEAR), -+ /* info_val == previous priority */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_PRIO_UPDATE), -+ /* info_val == CSG_REQ ^ CSG_ACK */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_SYNC_UPDATE), -+ /* info_val == CSG_REQ ^ CSG_ACK */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_IDLE), -+ /* info_val == CSG_REQ ^ CSG_ACK */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_PROGRESS_TIMER_EVENT), -+ /* info_val[31:0] == CSG_REQ ^ CSG_ACQ -+ * info_val[63:32] == CSG_IRQ_REQ ^ CSG_IRQ_ACK -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_PROCESS_END), -+ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_SYNC_UPDATE_DONE), -+ /* info_val == run state of the group */ -+ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_DESCHEDULE), -+ /* info_val == run state of the group */ -+ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_SCHEDULE), -+ /* info_val[31:0] == new run state of the evicted group -+ * info_val[63:32] == number of runnable groups -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_EVICT), -+ -+ /* info_val == new num_runnable_grps -+ * group is added to the back of the list for its priority level -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_INSERT), -+ /* info_val == new num_runnable_grps -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_REMOVE), -+ /* info_val == num_runnable_grps -+ * group is moved to the back of the list for its priority level -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_ROTATE), -+ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_HEAD), -+ /* info_val == new num_idle_wait_grps -+ * group is added to the back of the list -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_IDLE_WAIT_INSERT), -+ /* info_val == new num_idle_wait_grps -+ * group is added to the back of the list -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_IDLE_WAIT_REMOVE), -+ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_IDLE_WAIT_HEAD), -+ -+ /* info_val == is scheduler running with protected mode tasks */ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_ENTER_CHECK), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_ENTER), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_EXIT), -+ /* info_val[31:0] == number of GPU address space slots in use -+ * info_val[63:32] == number of runnable groups -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOP_GRP), -+ /* info_val == new count of off-slot non-idle groups -+ * no group indicates it was set rather than incremented -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_GRP_INC), -+ /* info_val == new count of off-slot non-idle groups */ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC), -+ /* info_val = scheduler's new csg_slots_idle_mask[0] -+ * group->csg_nr indicates which bit was set -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_HANDLE_IDLE_SLOTS), -+ -+ KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_START), -+ KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_END), -+ -+ /* info_val = scheduler state */ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHED_BUSY), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHED_INACTIVE), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SUSPENDED), -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SLEEPING), -+ -+ /* info_val = mcu state */ -+#define KBASEP_MCU_STATE(n) KBASE_KTRACE_CODE_MAKE_CODE(PM_MCU_ ## n), -+#include "backend/gpu/mali_kbase_pm_mcu_states.h" -+#undef KBASEP_MCU_STATE -+ -+ /* info_val = number of runnable groups */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_INACTIVE), -+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_RUNNABLE), -+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_IDLE), -+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_SUSPENDED), -+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_SUSPENDED_ON_IDLE), -+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_SUSPENDED_ON_WAIT_SYNC), -+ /* info_val = new run state of the evicted group */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_FAULT_EVICTED), -+ /* info_val = get the number of active CSGs */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_TERMINATED), -+ -+ /* -+ * Group + Queue events -+ */ -+ /* info_val == queue->enabled */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSI_START), -+ /* info_val == queue->enabled before stop */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP), -+ KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP_REQ), -+ /* info_val == CS_REQ ^ CS_ACK that were not processed due to the group -+ * being suspended -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED), -+ /* info_val == CS_REQ ^ CS_ACK */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_FAULT), -+ /* info_val == CS_REQ ^ CS_ACK */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_TILER_OOM), -+ /* info_val == CS_REQ ^ CS_ACK */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_PROTM_PEND), -+ /* info_val == CS_ACK_PROTM_PEND ^ CS_REQ_PROTM_PEND */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_ACK), -+ /* info_val == group->run_State (for group the queue is bound to) */ -+ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_START), -+ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_STOP), -+ /* info_val == contents of CS_STATUS_WAIT_SYNC_POINTER */ -+ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_EVAL_START), -+ /* info_val == bool for result of the evaluation */ -+ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_EVAL_END), -+ /* info_val == contents of CS_STATUS_WAIT */ -+ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_WAIT_STATUS), -+ /* info_val == current sync value pointed to by queue->sync_ptr */ -+ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_CUR_VAL), -+ /* info_val == current value of CS_STATUS_WAIT_SYNC_VALUE */ -+ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_TEST_VAL), -+ /* info_val == current value of CS_STATUS_BLOCKED_REASON */ -+ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_BLOCKED_REASON), -+ /* info_val = group's new protm_pending_bitmap[0] -+ * queue->csi_index indicates which bit was set -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_PEND_SET), -+ /* info_val = group's new protm_pending_bitmap[0] -+ * queue->csi_index indicates which bit was cleared -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_PEND_CLEAR), -+ -+ /* -+ * KCPU queue events -+ */ -+ /* KTrace info_val == KCPU queue fence context -+ * KCPU extra_info_val == N/A. -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_CREATE), -+ /* KTrace info_val == Number of pending commands in KCPU queue when -+ * it is destroyed. -+ * KCPU extra_info_val == Number of CQS wait operations present in -+ * the KCPU queue when it is destroyed. -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_DELETE), -+ /* KTrace info_val == CQS event memory address -+ * KCPU extra_info_val == Upper 32 bits of event memory, i.e. contents -+ * of error field. -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_CQS_SET), -+ /* KTrace info_val == Number of CQS objects to be waited upon -+ * KCPU extra_info_val == N/A. -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_CQS_WAIT_START), -+ /* KTrace info_val == CQS event memory address -+ * KCPU extra_info_val == 1 if CQS was signaled with an error and queue -+ * inherited the error, otherwise 0. -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_CQS_WAIT_END), -+ /* KTrace info_val == Fence context -+ * KCPU extra_info_val == Fence seqno. -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_FENCE_SIGNAL), -+ /* KTrace info_val == Fence context -+ * KCPU extra_info_val == Fence seqno. -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_FENCE_WAIT_START), -+ /* KTrace info_val == Fence context -+ * KCPU extra_info_val == Fence seqno. -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_FENCE_WAIT_END), ++/** ++ * kbase_csf_scheduler_reset - Reset the state of all active GPU command ++ * queue groups. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function will first iterate through all the active/scheduled GPU ++ * command queue groups and suspend them (to avoid losing work for groups ++ * that are not stuck). The groups that could not get suspended would be ++ * descheduled and marked as terminated (which will then lead to unbinding ++ * of all the queues bound to them) and also no more work would be allowed ++ * to execute for them. ++ * ++ * This is similar to the action taken in response to an unexpected OoM event. ++ * No explicit re-initialization is done for CSG & CS interface I/O pages; ++ * instead, that happens implicitly on firmware reload. ++ * ++ * Should be called only after initiating the GPU reset. ++ */ ++void kbase_csf_scheduler_reset(struct kbase_device *kbdev); + -+#if 0 /* Dummy section to avoid breaking formatting */ -+}; -+#endif ++/** ++ * kbase_csf_scheduler_enable_tick_timer - Enable the scheduler tick timer. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function will restart the scheduler tick so that regular scheduling can ++ * be resumed without any explicit trigger (like kicking of GPU queues). ++ */ ++void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev); + -+ /* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ -diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_jm.h -new file mode 100644 -index 000000000..6ba98b7c4 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_jm.h -@@ -0,0 +1,206 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* ++/** ++ * kbase_csf_scheduler_group_copy_suspend_buf - Suspend a queue ++ * group and copy suspend buffer. + * -+ * (C) COPYRIGHT 2011-2015, 2018-2021 ARM Limited. All rights reserved. ++ * @group: Pointer to the queue group to be suspended. ++ * @sus_buf: Pointer to the structure which contains details of the ++ * user buffer and its kernel pinned pages to which we need to copy ++ * the group suspend buffer. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * This function is called to suspend a queue group and copy the suspend_buffer ++ * contents to the input buffer provided. + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, ++ struct kbase_suspend_copy_buffer *sus_buf); ++ ++/** ++ * kbase_csf_scheduler_lock - Acquire the global Scheduler lock. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * ++ * This function will take the global scheduler lock, in order to serialize ++ * against the Scheduler actions, for access to CS IO pages. + */ ++static inline void kbase_csf_scheduler_lock(struct kbase_device *kbdev) ++{ ++ mutex_lock(&kbdev->csf.scheduler.lock); ++} + -+/* -+ * ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** -+ * ***** DO NOT INCLUDE DIRECTLY ***** -+ * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** ++/** ++ * kbase_csf_scheduler_unlock - Release the global Scheduler lock. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ ++static inline void kbase_csf_scheduler_unlock(struct kbase_device *kbdev) ++{ ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++} + -+/* -+ * The purpose of this header file is just to contain a list of trace code -+ * identifiers ++/** ++ * kbase_csf_scheduler_spin_lock - Acquire Scheduler interrupt spinlock. + * -+ * When updating this file, also remember to update -+ * mali_kbase_debug_linux_ktrace_jm.h ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @flags: Pointer to the memory location that would store the previous ++ * interrupt state. + * -+ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THAT -+ * DESCRIBED IN mali_kbase_debug_ktrace_codes.h ++ * This function will take the global scheduler lock, in order to serialize ++ * against the Scheduler actions, for access to CS IO pages. + */ ++static inline void kbase_csf_scheduler_spin_lock(struct kbase_device *kbdev, ++ unsigned long *flags) ++{ ++ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, *flags); ++} + -+#if 0 /* Dummy section to avoid breaking formatting */ -+int dummy_array[] = { -+#endif ++/** ++ * kbase_csf_scheduler_spin_unlock - Release Scheduler interrupt spinlock. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @flags: Previously stored interrupt state when Scheduler interrupt ++ * spinlock was acquired. ++ */ ++static inline void kbase_csf_scheduler_spin_unlock(struct kbase_device *kbdev, ++ unsigned long flags) ++{ ++ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); ++} + -+ /* -+ * Job Slot management events -+ */ -+ /* info_val==irq rawstat at start */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_IRQ), -+ /* info_val==jobs processed */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_IRQ_END), -+ /* In the following: -+ * -+ * - ctx is set if a corresponding job found (NULL otherwise, e.g. some -+ * soft-stop cases) -+ * - uatom==kernel-side mapped uatom address (for correlation with -+ * user-side) -+ */ -+ /* info_val==exit code; gpu_addr==chain gpuaddr */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_JOB_DONE), -+ /* gpu_addr==JS_HEAD read -+ * info_val==event code -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_RETURN_ATOM_TO_JS), -+ /* gpu_addr==JS_HEAD read -+ * info_val==event code -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_MARK_FOR_RETURN_TO_JS), -+ /* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of -+ * affinity -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_SUBMIT), -+ /* gpu_addr is as follows: -+ * - If JS_STATUS active after soft-stop, val==gpu addr written to -+ * JS_HEAD on submit -+ * - otherwise gpu_addr==0 -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP), -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0), -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1), -+ /* gpu_addr==JS_HEAD read */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_HARDSTOP), -+ /* gpu_addr==JS_HEAD read */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_HARDSTOP_0), -+ /* gpu_addr==JS_HEAD read */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_HARDSTOP_1), -+ /* gpu_addr==JS_TAIL read */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD), -+ /* gpu_addr is as follows: -+ * - If JS_STATUS active before soft-stop, val==JS_HEAD -+ * - otherwise gpu_addr==0 -+ */ -+ /* gpu_addr==JS_HEAD read */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_CHECK_HEAD), -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS), -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE), -+ /* info_val == is_scheduled */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED), -+ /* info_val == is_scheduled */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED), -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_DONE), -+ /* info_val == nr jobs submitted */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP), -+ /* gpu_addr==JS_HEAD_NEXT last written */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_SLOT_EVICT), -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET), -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER), -+ KBASE_KTRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER), -+ /* -+ * Job dispatch events -+ */ -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE), -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE_WORKER), -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END), -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB), -+ /* gpu_addr==0, info_val==0, uatom==0 */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT), -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JD_CANCEL), -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER), -+ /* -+ * Scheduler Core events -+ */ -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_ADD_JOB), -+ /* gpu_addr==last value written/would be written to JS_HEAD */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_REMOVE_JOB), -+ /* gpu_addr==value to write into JS_HEAD -+ * info_val==priority of atom as a KBASE_JS_ATOM_SCHED_PRIO_<...> value -+ * (0 highest) -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_PULL_JOB), -+ /* gpu_addr==value that would be written to JS_HEAD if run again */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_UNPULL_JOB), -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX), -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB), -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED), -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED), -+ /* info_val == lower 32 bits of affinity */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT), -+ /* info_val == lower 32 bits of affinity */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED), -+ /* info_val == lower 32 bits of affinity */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED), -+ /* info_val == lower 32 bits of rechecked affinity */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED), -+ /* info_val == lower 32 bits of rechecked affinity */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED), -+ /* info_val == lower 32 bits of affinity */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE), -+ /* info_val == the ctx attribute now on ctx */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX), -+ /* info_val == the ctx attribute now on runpool */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL), -+ /* info_val == the ctx attribute now off ctx */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX), -+ /* info_val == the ctx attribute now off runpool */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL), -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_RETURN_WORKER), -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_RETURN_WORKER_END), -+ /* info_val==priority level blocked (0 highest) */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_SLOT_PRIO_BLOCKED), -+ /* info_val==priority level unblocked (0 highest) -+ * note that the priority level may still be blocked on higher levels -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_SLOT_PRIO_UNBLOCKED), -+ /* gpu_addr==value to write into JS_HEAD -+ * info_val==priority level unblocked - priorities at this and higher -+ * are unblocked (0 highest) -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_SLOT_PRIO_AND_HIGHER_UNBLOCKED), -+ /* gpu_addr==value to write into JS_HEAD -+ * info_val==priority level blocked (0 highest) -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_SLOT_PRIO_IS_BLOCKED), -+ /* -+ * Scheduler Policy events ++/** ++ * kbase_csf_scheduler_spin_lock_assert_held - Assert if the Scheduler ++ * interrupt spinlock is held. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++static inline void ++kbase_csf_scheduler_spin_lock_assert_held(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); ++} ++ ++/** ++ * kbase_csf_scheduler_timer_is_enabled() - Check if the scheduler wakes up ++ * automatically for periodic tasks. ++ * ++ * @kbdev: Pointer to the device ++ * ++ * Return: true if the scheduler is configured to wake up periodically ++ */ ++bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_timer_set_enabled() - Enable/disable periodic ++ * scheduler tasks. ++ * ++ * @kbdev: Pointer to the device ++ * @enable: Whether to enable periodic scheduler tasks ++ */ ++void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev, ++ bool enable); ++ ++/** ++ * kbase_csf_scheduler_kick - Perform pending scheduling tasks once. ++ * ++ * Note: This function is only effective if the scheduling timer is disabled. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_scheduler_kick(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_protected_mode_in_use() - Check if the scheduler is ++ * running with protected mode tasks. ++ * ++ * @kbdev: Pointer to the device ++ * ++ * Return: true if the scheduler is running with protected mode tasks ++ */ ++static inline bool kbase_csf_scheduler_protected_mode_in_use( ++ struct kbase_device *kbdev) ++{ ++ return (kbdev->csf.scheduler.active_protm_grp != NULL); ++} ++ ++/** ++ * kbase_csf_scheduler_pm_active - Perform scheduler power active operation ++ * ++ * Note: This function will increase the scheduler's internal pm_active_count ++ * value, ensuring that both GPU and MCU are powered for access. The MCU may ++ * not have actually become active when this function returns, so need to ++ * call kbase_csf_scheduler_wait_mcu_active() for that. ++ * ++ * This function should not be called with global scheduler lock held. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_pm_idle - Perform the scheduler power idle operation ++ * ++ * Note: This function will decrease the scheduler's internal pm_active_count ++ * value. On reaching 0, the MCU and GPU could be powered off. This function ++ * should not be called with global scheduler lock held. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_wait_mcu_active - Wait for the MCU to actually become active ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function will wait for the MCU to actually become active. It is supposed ++ * to be called after calling kbase_csf_scheduler_pm_active(). It is needed as ++ * kbase_csf_scheduler_pm_active() may not make the MCU active right away. ++ * ++ * Return: 0 if the MCU was successfully activated otherwise an error code. ++ */ ++int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_pm_resume_no_lock - Reactivate the scheduler on system resume ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function will make the scheduler resume the scheduling of queue groups ++ * and take the power managemenet reference, if there are any runnable groups. ++ * The caller must have acquired the global Scheduler lock. ++ */ ++void kbase_csf_scheduler_pm_resume_no_lock(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_pm_resume - Reactivate the scheduler on system resume ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function will make the scheduler resume the scheduling of queue groups ++ * and take the power managemenet reference, if there are any runnable groups. ++ */ ++void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_pm_suspend_no_lock - Idle the scheduler on system suspend ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function will make the scheduler suspend all the running queue groups ++ * and drop its power managemenet reference. ++ * The caller must have acquired the global Scheduler lock. ++ * ++ * Return: 0 on success. ++ */ ++int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_pm_suspend - Idle the scheduler on system suspend ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function will make the scheduler suspend all the running queue groups ++ * and drop its power managemenet reference. ++ * ++ * Return: 0 on success. ++ */ ++int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_all_csgs_idle() - Check if the scheduler internal ++ * runtime used slots are all tagged as idle command queue groups. ++ * ++ * @kbdev: Pointer to the device ++ * ++ * Return: true if all the used slots are tagged as idle CSGs. ++ */ ++static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); ++ return bitmap_equal(kbdev->csf.scheduler.csg_slots_idle_mask, ++ kbdev->csf.scheduler.csg_inuse_bitmap, ++ kbdev->csf.global_iface.group_num); ++} ++ ++/** ++ * kbase_csf_scheduler_tick_advance_nolock() - Advance the scheduling tick ++ * ++ * @kbdev: Pointer to the device ++ * ++ * This function advances the scheduling tick by enqueing the tick work item for ++ * immediate execution, but only if the tick hrtimer is active. If the timer ++ * is inactive then the tick work item is already in flight. ++ * The caller must hold the interrupt lock. ++ */ ++static inline void ++kbase_csf_scheduler_tick_advance_nolock(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ ++ lockdep_assert_held(&scheduler->interrupt_lock); ++ ++ if (scheduler->tick_timer_active) { ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_ADVANCE, NULL, 0u); ++ scheduler->tick_timer_active = false; ++ queue_work(scheduler->wq, &scheduler->tick_work); ++ } else { ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_NOADVANCE, NULL, 0u); ++ } ++} ++ ++/** ++ * kbase_csf_scheduler_tick_advance() - Advance the scheduling tick ++ * ++ * @kbdev: Pointer to the device ++ * ++ * This function advances the scheduling tick by enqueing the tick work item for ++ * immediate execution, but only if the tick hrtimer is active. If the timer ++ * is inactive then the tick work item is already in flight. ++ */ ++static inline void kbase_csf_scheduler_tick_advance(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ kbase_csf_scheduler_tick_advance_nolock(kbdev); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++} ++ ++/** ++ * kbase_csf_scheduler_invoke_tick() - Invoke the scheduling tick ++ * ++ * @kbdev: Pointer to the device ++ * ++ * This function will queue the scheduling tick work item for immediate ++ * execution if tick timer is not active. This can be called from interrupt ++ * context to resume the scheduling after GPU was put to sleep. ++ */ ++static inline void kbase_csf_scheduler_invoke_tick(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ unsigned long flags; ++ ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_INVOKE, NULL, 0u); ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ if (!scheduler->tick_timer_active) ++ queue_work(scheduler->wq, &scheduler->tick_work); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++} ++ ++/** ++ * kbase_csf_scheduler_invoke_tock() - Invoke the scheduling tock ++ * ++ * @kbdev: Pointer to the device ++ * ++ * This function will queue the scheduling tock work item for immediate ++ * execution. ++ */ ++static inline void kbase_csf_scheduler_invoke_tock(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_INVOKE, NULL, 0u); ++ if (atomic_cmpxchg(&scheduler->pending_tock_work, false, true) == false) ++ mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0); ++} ++ ++/** ++ * kbase_csf_scheduler_queue_has_trace() - report whether the queue has been ++ * configured to operate with the ++ * cs_trace feature. ++ * ++ * @queue: Pointer to the queue. ++ * ++ * Return: True if the gpu queue is configured to operate with the cs_trace ++ * feature, otherwise false. ++ */ ++static inline bool kbase_csf_scheduler_queue_has_trace(struct kbase_queue *queue) ++{ ++ lockdep_assert_held(&queue->kctx->kbdev->csf.scheduler.lock); ++ /* In the current arrangement, it is possible for the context to enable ++ * the cs_trace after some queues have been registered with cs_trace in ++ * disabled state. So each queue has its own enabled/disabled condition. + */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX), -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX), -+ /* info_val == whether it was evicted */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX), -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS), -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX), -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX), -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX), -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX), -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB), -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ), -+ /* gpu_addr==JS_HEAD to write if the job were run */ -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB), -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START), -+ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END), ++ return (queue->trace_buffer_size && queue->trace_buffer_base); ++} + -+#if 0 /* Dummy section to avoid breaking formatting */ -+}; ++#ifdef KBASE_PM_RUNTIME ++/** ++ * kbase_csf_scheduler_reval_idleness_post_sleep() - Check GPU's idleness after ++ * putting MCU to sleep state ++ * ++ * @kbdev: Pointer to the device ++ * ++ * This function re-evaluates the idleness of on-slot queue groups after MCU ++ * was put to the sleep state and invokes the scheduling tick if any of the ++ * on-slot queue group became non-idle. ++ * CSG_OUTPUT_BLOCK.CSG_STATUS_STATE.IDLE bit is checked to determine the ++ * idleness which is updated by MCU firmware on handling of the sleep request. ++ * ++ * This function is needed to detect if more work was flushed in the window ++ * between the GPU idle notification and the enabling of Doorbell mirror ++ * interrupt (from MCU state machine). Once Doorbell mirror interrupt is ++ * enabled, Host can receive the notification on User doorbell rings. ++ */ ++void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_handle_runtime_suspend() - Handle runtime suspend by ++ * suspending CSGs. ++ * ++ * @kbdev: Pointer to the device ++ * ++ * This function is called from the runtime suspend callback function for ++ * suspending all the on-slot queue groups. If any of the group is found to ++ * be non-idle after the completion of CSG suspend operation or the CSG ++ * suspend operation times out, then the scheduling tick is invoked and an ++ * error is returned so that the GPU power down can be aborted. ++ * ++ * Return: 0 if all the CSGs were suspended, otherwise an error code. ++ */ ++int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev); +#endif + -+/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ -diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c ++/** ++ * kbase_csf_scheduler_process_gpu_idle_event() - Process GPU idle IRQ ++ * ++ * @kbdev: Pointer to the device ++ * ++ * This function is called when a GPU idle IRQ has been raised. ++ */ ++void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_get_nr_active_csgs() - Get the number of active CSGs ++ * ++ * @kbdev: Pointer to the device ++ * ++ * This function calculates the number of CSG slots that have a queue group ++ * resident on them. ++ * ++ * Note: This function should not be used if the interrupt_lock is held. Use ++ * kbase_csf_scheduler_get_nr_active_csgs_locked() instead. ++ * ++ * Return: number of active CSGs. ++ */ ++u32 kbase_csf_scheduler_get_nr_active_csgs(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_get_nr_active_csgs_locked() - Get the number of active ++ * CSGs ++ * ++ * @kbdev: Pointer to the device ++ * ++ * This function calculates the number of CSG slots that have a queue group ++ * resident on them. ++ * ++ * Note: This function should be called with interrupt_lock held. ++ * ++ * Return: number of active CSGs. ++ */ ++u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_force_wakeup() - Forcefully resume the scheduling of CSGs ++ * ++ * @kbdev: Pointer to the device ++ * ++ * This function is called to forcefully resume the scheduling of CSGs, even ++ * when there wasn't any work submitted for them. ++ * This function is only used for testing purpose. ++ */ ++void kbase_csf_scheduler_force_wakeup(struct kbase_device *kbdev); ++ ++#ifdef KBASE_PM_RUNTIME ++/** ++ * kbase_csf_scheduler_force_sleep() - Forcefully put the Scheduler to sleeping ++ * state. ++ * ++ * @kbdev: Pointer to the device ++ * ++ * This function is called to forcefully put the Scheduler to sleeping state ++ * and trigger the sleep of MCU. If the CSGs are not idle, then the Scheduler ++ * would get reactivated again immediately. ++ * This function is only used for testing purpose. ++ */ ++void kbase_csf_scheduler_force_sleep(struct kbase_device *kbdev); ++#endif ++ ++#endif /* _KBASE_CSF_SCHEDULER_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c new file mode 100644 -index 000000000..cff6f8959 +index 000000000..a5e0ab5ea --- /dev/null -+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c -@@ -0,0 +1,199 @@ ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c +@@ -0,0 +1,788 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -175306,525 +177622,783 @@ index 000000000..cff6f8959 + * + */ + ++#include "mali_kbase_csf_sync_debugfs.h" ++#include "mali_kbase_csf_csg_debugfs.h" +#include -+#include "debug/mali_kbase_debug_ktrace_internal.h" -+#include "debug/backend/mali_kbase_debug_ktrace_csf.h" -+ -+#if KBASE_KTRACE_TARGET_RBUF ++#include + -+void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written) -+{ -+ *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), -+ "group,slot,prio,csi,kcpu"), 0); -+} ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++#include "mali_kbase_sync.h" ++#endif + -+void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, -+ char *buffer, int sz, s32 *written) -+{ -+ const union kbase_ktrace_backend * const be_msg = &trace_msg->backend; -+ /* At present, no need to check for KBASE_KTRACE_FLAG_BACKEND, as the -+ * other backend-specific flags currently imply this anyway -+ */ ++#if IS_ENABLED(CONFIG_DEBUG_FS) + -+ /* group parts */ -+ if (be_msg->gpu.flags & KBASE_KTRACE_FLAG_CSF_GROUP) { -+ const s8 slot = be_msg->gpu.csg_nr; -+ /* group,slot, */ -+ *written += MAX(snprintf(buffer + *written, -+ MAX(sz - *written, 0), -+ "%u,%d,", be_msg->gpu.group_handle, slot), 0); ++#define CQS_UNREADABLE_LIVE_VALUE "(unavailable)" + -+ /* prio */ -+ if (slot >= 0) -+ *written += MAX(snprintf(buffer + *written, -+ MAX(sz - *written, 0), -+ "%u", be_msg->gpu.slot_prio), 0); ++/* GPU queue related values */ ++#define GPU_CSF_MOVE_OPCODE ((u64)0x1) ++#define GPU_CSF_MOVE32_OPCODE ((u64)0x2) ++#define GPU_CSF_SYNC_ADD_OPCODE ((u64)0x25) ++#define GPU_CSF_SYNC_SET_OPCODE ((u64)0x26) ++#define GPU_CSF_SYNC_WAIT_OPCODE ((u64)0x27) ++#define GPU_CSF_SYNC_ADD64_OPCODE ((u64)0x33) ++#define GPU_CSF_SYNC_SET64_OPCODE ((u64)0x34) ++#define GPU_CSF_SYNC_WAIT64_OPCODE ((u64)0x35) ++#define GPU_CSF_CALL_OPCODE ((u64)0x20) + -+ /* , */ -+ *written += MAX(snprintf(buffer + *written, -+ MAX(sz - *written, 0), -+ ","), 0); -+ } else { -+ /* No group,slot,prio fields, but ensure ending with "," */ -+ *written += MAX(snprintf(buffer + *written, -+ MAX(sz - *written, 0), -+ ",,,"), 0); -+ } ++#define MAX_NR_GPU_CALLS (5) ++#define INSTR_OPCODE_MASK ((u64)0xFF << 56) ++#define INSTR_OPCODE_GET(value) ((value & INSTR_OPCODE_MASK) >> 56) ++#define MOVE32_IMM_MASK ((u64)0xFFFFFFFFFUL) ++#define MOVE_DEST_MASK ((u64)0xFF << 48) ++#define MOVE_DEST_GET(value) ((value & MOVE_DEST_MASK) >> 48) ++#define MOVE_IMM_MASK ((u64)0xFFFFFFFFFFFFUL) ++#define SYNC_SRC0_MASK ((u64)0xFF << 40) ++#define SYNC_SRC1_MASK ((u64)0xFF << 32) ++#define SYNC_SRC0_GET(value) (u8)((value & SYNC_SRC0_MASK) >> 40) ++#define SYNC_SRC1_GET(value) (u8)((value & SYNC_SRC1_MASK) >> 32) ++#define SYNC_WAIT_CONDITION_MASK ((u64)0xF << 28) ++#define SYNC_WAIT_CONDITION_GET(value) (u8)((value & SYNC_WAIT_CONDITION_MASK) >> 28) + -+ /* queue parts: csi */ -+ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_CSF_QUEUE) -+ *written += MAX(snprintf(buffer + *written, -+ MAX(sz - *written, 0), -+ "%d", be_msg->gpu.csi_index), 0); ++/* Enumeration for types of GPU queue sync events for ++ * the purpose of dumping them through debugfs. ++ */ ++enum debugfs_gpu_sync_type { ++ DEBUGFS_GPU_SYNC_WAIT, ++ DEBUGFS_GPU_SYNC_SET, ++ DEBUGFS_GPU_SYNC_ADD, ++ NUM_DEBUGFS_GPU_SYNC_TYPES ++}; + -+ /* , */ -+ *written += MAX(snprintf(buffer + *written, -+ MAX(sz - *written, 0), -+ ","), 0); ++/** ++ * kbasep_csf_debugfs_get_cqs_live_u32() - Obtain live (u32) value for a CQS object. ++ * ++ * @kctx: The context of the queue. ++ * @obj_addr: Pointer to the CQS live 32-bit value. ++ * @live_val: Pointer to the u32 that will be set to the CQS object's current, live ++ * value. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++static int kbasep_csf_debugfs_get_cqs_live_u32(struct kbase_context *kctx, u64 obj_addr, ++ u32 *live_val) ++{ ++ struct kbase_vmap_struct *mapping; ++ u32 *const cpu_ptr = (u32 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping); + -+ if (be_msg->gpu.flags & KBASE_KTRACE_FLAG_CSF_KCPU) { -+ /* kcpu data */ -+ *written += MAX(snprintf(buffer + *written, -+ MAX(sz - *written, 0), -+ "kcpu %d (0x%llx)", -+ be_msg->kcpu.id, -+ be_msg->kcpu.extra_info_val), 0); -+ } ++ if (!cpu_ptr) ++ return -1; + -+ /* Don't end with a trailing "," - this is a 'standalone' formatted -+ * msg, caller will handle the delimiters -+ */ ++ *live_val = *cpu_ptr; ++ kbase_phy_alloc_mapping_put(kctx, mapping); ++ return 0; +} + -+void kbasep_ktrace_add_csf(struct kbase_device *kbdev, -+ enum kbase_ktrace_code code, struct kbase_queue_group *group, -+ struct kbase_queue *queue, kbase_ktrace_flag_t flags, -+ u64 info_val) ++/** ++ * kbasep_csf_debugfs_get_cqs_live_u64() - Obtain live (u64) value for a CQS object. ++ * ++ * @kctx: The context of the queue. ++ * @obj_addr: Pointer to the CQS live value (32 or 64-bit). ++ * @live_val: Pointer to the u64 that will be set to the CQS object's current, live ++ * value. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++static int kbasep_csf_debugfs_get_cqs_live_u64(struct kbase_context *kctx, u64 obj_addr, ++ u64 *live_val) +{ -+ unsigned long irqflags; -+ struct kbase_ktrace_msg *trace_msg; -+ struct kbase_context *kctx = NULL; ++ struct kbase_vmap_struct *mapping; ++ u64 *cpu_ptr = (u64 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping); + -+ if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace))) -+ return; ++ if (!cpu_ptr) ++ return -1; + -+ spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); ++ *live_val = *cpu_ptr; ++ kbase_phy_alloc_mapping_put(kctx, mapping); ++ return 0; ++} + -+ /* Reserve and update indices */ -+ trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); ++/** ++ * kbasep_csf_sync_print_kcpu_fence_wait_or_signal() - Print details of a CSF SYNC Fence Wait ++ * or Fence Signal command, contained in a ++ * KCPU queue. ++ * ++ * @file: The seq_file for printing to. ++ * @cmd: The KCPU Command to be printed. ++ * @cmd_name: The name of the command: indicates either a fence SIGNAL or WAIT. ++ */ ++static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(struct seq_file *file, ++ struct kbase_kcpu_command *cmd, ++ const char *cmd_name) ++{ ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence *fence = NULL; ++#else ++ struct dma_fence *fence = NULL; ++#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ + -+ /* Determine the kctx */ -+ if (group) -+ kctx = group->kctx; -+ else if (queue) -+ kctx = queue->kctx; ++ struct kbase_sync_fence_info info; ++ const char *timeline_name = NULL; ++ bool is_signaled = false; + -+ /* Fill the common part of the message (including backend.gpu.flags) */ -+ kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, -+ info_val); ++ fence = cmd->info.fence.fence; ++ if (WARN_ON(!fence)) ++ return; + -+ /* Indicate to the common code that backend-specific parts will be -+ * valid -+ */ -+ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_BACKEND; ++ kbase_sync_fence_info_get(cmd->info.fence.fence, &info); ++ timeline_name = fence->ops->get_timeline_name(fence); ++ is_signaled = info.status > 0; + -+ /* Fill the CSF-specific parts of the message -+ * -+ * Generally, no need to use default initializers when queue/group not -+ * present - can usually check the flags instead. -+ */ ++ seq_printf(file, "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, cmd->info.fence.fence, ++ is_signaled); + -+ if (queue) { -+ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_CSF_QUEUE; -+ trace_msg->backend.gpu.csi_index = queue->csi_index; -+ } ++ /* Note: fence->seqno was u32 until 5.1 kernel, then u64 */ ++ seq_printf(file, "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx", ++ timeline_name, fence->context, (u64)fence->seqno); ++} + -+ if (group) { -+ const s8 slot = group->csg_nr; ++/** ++ * kbasep_csf_sync_print_kcpu_cqs_wait() - Print details of a CSF SYNC CQS Wait command, ++ * contained in a KCPU queue. ++ * ++ * @file: The seq_file for printing to. ++ * @cmd: The KCPU Command to be printed. ++ */ ++static void kbasep_csf_sync_print_kcpu_cqs_wait(struct seq_file *file, ++ struct kbase_kcpu_command *cmd) ++{ ++ struct kbase_context *kctx = file->private; ++ size_t i; + -+ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_CSF_GROUP; ++ for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) { ++ struct base_cqs_wait_info *cqs_obj = &cmd->info.cqs_wait.objs[i]; + -+ trace_msg->backend.gpu.csg_nr = slot; ++ u32 live_val; ++ int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val); ++ bool live_val_valid = (ret >= 0); + -+ if (slot >= 0) { -+ struct kbase_csf_csg_slot *csg_slot = -+ &kbdev->csf.scheduler.csg_slots[slot]; ++ seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); + -+ trace_msg->backend.gpu.slot_prio = -+ csg_slot->priority; -+ } -+ /* slot >=0 indicates whether slot_prio valid, so no need to -+ * initialize in the case where it's invalid -+ */ ++ if (live_val_valid) ++ seq_printf(file, "0x%.16llx", (u64)live_val); ++ else ++ seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); + -+ trace_msg->backend.gpu.group_handle = group->handle; ++ seq_printf(file, " | op:gt arg_value:0x%.8x", cqs_obj->val); + } -+ -+ WARN_ON((trace_msg->backend.gpu.flags & ~KBASE_KTRACE_FLAG_ALL)); -+ -+ /* Done */ -+ spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); +} + -+void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev, -+ enum kbase_ktrace_code code, -+ struct kbase_kcpu_command_queue *queue, -+ u64 info_val1, u64 info_val2) ++/** ++ * kbasep_csf_sync_print_kcpu_cqs_set() - Print details of a CSF SYNC CQS ++ * Set command, contained in a KCPU queue. ++ * ++ * @file: The seq_file for printing to. ++ * @cmd: The KCPU Command to be printed. ++ */ ++static void kbasep_csf_sync_print_kcpu_cqs_set(struct seq_file *file, ++ struct kbase_kcpu_command *cmd) +{ -+ unsigned long irqflags; -+ struct kbase_ktrace_msg *trace_msg; -+ struct kbase_context *kctx = queue->kctx; -+ -+ if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace))) -+ return; -+ -+ spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); -+ -+ /* Reserve and update indices */ -+ trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); ++ struct kbase_context *kctx = file->private; ++ size_t i; + -+ /* Fill the common part of the message */ -+ kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, 0, -+ info_val1); ++ for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) { ++ struct base_cqs_set *cqs_obj = &cmd->info.cqs_set.objs[i]; + -+ /* Indicate to the common code that backend-specific parts will be -+ * valid -+ */ -+ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_BACKEND; ++ u32 live_val; ++ int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val); ++ bool live_val_valid = (ret >= 0); + -+ /* Fill the KCPU-specific parts of the message */ -+ trace_msg->backend.kcpu.id = queue->id; -+ trace_msg->backend.kcpu.extra_info_val = info_val2; -+ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_CSF_KCPU; ++ seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); + -+ WARN_ON((trace_msg->backend.gpu.flags & ~KBASE_KTRACE_FLAG_ALL)); ++ if (live_val_valid) ++ seq_printf(file, "0x%.16llx", (u64)live_val); ++ else ++ seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); + -+ /* Done */ -+ spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); ++ seq_printf(file, " | op:add arg_value:0x%.8x", 1); ++ } +} + -+#endif /* KBASE_KTRACE_TARGET_RBUF */ -diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.h -new file mode 100644 -index 000000000..e3d037307 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.h -@@ -0,0 +1,203 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++/** ++ * kbasep_csf_sync_get_wait_op_name() - Print the name of a CQS Wait Operation. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * @op: The numerical value of operation. + * ++ * Return: const static pointer to the command name, or '??' if unknown. + */ ++static const char *kbasep_csf_sync_get_wait_op_name(basep_cqs_wait_operation_op op) ++{ ++ const char *string; + -+#ifndef _KBASE_DEBUG_KTRACE_CSF_H_ -+#define _KBASE_DEBUG_KTRACE_CSF_H_ ++ switch (op) { ++ case BASEP_CQS_WAIT_OPERATION_LE: ++ string = "le"; ++ break; ++ case BASEP_CQS_WAIT_OPERATION_GT: ++ string = "gt"; ++ break; ++ default: ++ string = "??"; ++ break; ++ } ++ return string; ++} + -+/* -+ * KTrace target for internal ringbuffer -+ */ -+#if KBASE_KTRACE_TARGET_RBUF +/** -+ * kbasep_ktrace_add_csf - internal function to add trace about CSF -+ * @kbdev: kbase device -+ * @code: trace code -+ * @group: queue group, or NULL if no queue group -+ * @queue: queue, or NULL if no queue -+ * @flags: flags about the message -+ * @info_val: generic information about @code to add to the trace ++ * kbasep_csf_sync_get_set_op_name() - Print the name of a CQS Set Operation. + * -+ * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD_CSF() instead. ++ * @op: The numerical value of operation. ++ * ++ * Return: const static pointer to the command name, or '??' if unknown. + */ ++static const char *kbasep_csf_sync_get_set_op_name(basep_cqs_set_operation_op op) ++{ ++ const char *string; + -+void kbasep_ktrace_add_csf(struct kbase_device *kbdev, -+ enum kbase_ktrace_code code, struct kbase_queue_group *group, -+ struct kbase_queue *queue, kbase_ktrace_flag_t flags, -+ u64 info_val); ++ switch (op) { ++ case BASEP_CQS_SET_OPERATION_ADD: ++ string = "add"; ++ break; ++ case BASEP_CQS_SET_OPERATION_SET: ++ string = "set"; ++ break; ++ default: ++ string = "???"; ++ break; ++ } ++ return string; ++} + +/** -+ * kbasep_ktrace_add_csf_kcpu - internal function to add trace about the CSF -+ * KCPU queues. -+ * @kbdev: kbase device -+ * @code: trace code -+ * @queue: queue, or NULL if no queue -+ * @info_val1: Main infoval variable with information based on the KCPU -+ * ktrace call. Refer to mali_kbase_debug_ktrace_codes_csf.h -+ * for information on the infoval values. -+ * @info_val2: Extra infoval variable with information based on the KCPU -+ * ktrace call. Refer to mali_kbase_debug_ktrace_codes_csf.h -+ * for information on the infoval values. ++ * kbasep_csf_sync_print_kcpu_cqs_wait_op() - Print details of a CSF SYNC CQS ++ * Wait Operation command, contained ++ * in a KCPU queue. + * -+ * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD_CSF_KCPU() instead. ++ * @file: The seq_file for printing to. ++ * @cmd: The KCPU Command to be printed. + */ -+void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev, -+ enum kbase_ktrace_code code, -+ struct kbase_kcpu_command_queue *queue, -+ u64 info_val1, u64 info_val2); ++static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct seq_file *file, ++ struct kbase_kcpu_command *cmd) ++{ ++ size_t i; ++ struct kbase_context *kctx = file->private; + -+#define KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, group, queue, flags, info_val) \ -+ kbasep_ktrace_add_csf(kbdev, KBASE_KTRACE_CODE(code), group, queue, \ -+ flags, info_val) ++ for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) { ++ struct base_cqs_wait_operation_info *wait_op = ++ &cmd->info.cqs_wait_operation.objs[i]; ++ const char *op_name = kbasep_csf_sync_get_wait_op_name(wait_op->operation); + -+#define KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, queue, info_val1, \ -+ info_val2) kbasep_ktrace_add_csf_kcpu(kbdev, KBASE_KTRACE_CODE(code), \ -+ queue, info_val1, info_val2) ++ u64 live_val; ++ int ret = kbasep_csf_debugfs_get_cqs_live_u64(kctx, wait_op->addr, &live_val); + -+#else /* KBASE_KTRACE_TARGET_RBUF */ ++ bool live_val_valid = (ret >= 0); + -+#define KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, group, queue, flags, info_val) \ -+ do {\ -+ CSTD_UNUSED(kbdev);\ -+ CSTD_NOP(code);\ -+ CSTD_UNUSED(group);\ -+ CSTD_UNUSED(queue);\ -+ CSTD_UNUSED(flags);\ -+ CSTD_UNUSED(info_val);\ -+ CSTD_NOP(0);\ -+ } while (0) ++ seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr); + -+#define KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, queue, info_val1, info_val2) \ -+ do {\ -+ CSTD_UNUSED(kbdev);\ -+ CSTD_NOP(code);\ -+ CSTD_UNUSED(queue);\ -+ CSTD_UNUSED(info_val1);\ -+ CSTD_UNUSED(info_val2);\ -+ } while (0) ++ if (live_val_valid) ++ seq_printf(file, "0x%.16llx", live_val); ++ else ++ seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); + -+#endif /* KBASE_KTRACE_TARGET_RBUF */ ++ seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, wait_op->val); ++ } ++} + -+/* -+ * KTrace target for Linux's ftrace ++/** ++ * kbasep_csf_sync_print_kcpu_cqs_set_op() - Print details of a CSF SYNC CQS ++ * Set Operation command, contained ++ * in a KCPU queue. + * -+ * Note: the header file(s) that define the trace_mali_<...> tracepoints are -+ * included by the parent header file ++ * @file: The seq_file for printing to. ++ * @cmd: The KCPU Command to be printed. + */ -+#if KBASE_KTRACE_TARGET_FTRACE -+ -+#define KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, group, queue, info_val) \ -+ trace_mali_##code(kbdev, group, queue, info_val) ++static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct seq_file *file, ++ struct kbase_kcpu_command *cmd) ++{ ++ size_t i; ++ struct kbase_context *kctx = file->private; + -+#define KBASE_KTRACE_FTRACE_ADD_KCPU(code, queue, info_val1, info_val2) \ -+ trace_mali_##code(queue, info_val1, info_val2) ++ for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) { ++ struct base_cqs_set_operation_info *set_op = &cmd->info.cqs_set_operation.objs[i]; ++ const char *op_name = kbasep_csf_sync_get_set_op_name( ++ (basep_cqs_set_operation_op)set_op->operation); + -+#else /* KBASE_KTRACE_TARGET_FTRACE */ ++ u64 live_val; ++ int ret = kbasep_csf_debugfs_get_cqs_live_u64(kctx, set_op->addr, &live_val); + -+#define KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, group, queue, info_val) \ -+ do {\ -+ CSTD_UNUSED(kbdev);\ -+ CSTD_NOP(code);\ -+ CSTD_UNUSED(group);\ -+ CSTD_UNUSED(queue);\ -+ CSTD_UNUSED(info_val);\ -+ CSTD_NOP(0);\ -+ } while (0) ++ bool live_val_valid = (ret >= 0); + -+#define KBASE_KTRACE_FTRACE_ADD_KCPU(code, queue, info_val1, info_val2) \ -+ do {\ -+ CSTD_NOP(code);\ -+ CSTD_UNUSED(queue);\ -+ CSTD_UNUSED(info_val1);\ -+ CSTD_UNUSED(info_val2);\ -+ } while (0) ++ seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr); + -+#endif /* KBASE_KTRACE_TARGET_FTRACE */ ++ if (live_val_valid) ++ seq_printf(file, "0x%.16llx", live_val); ++ else ++ seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); + -+/* -+ * Master set of macros to route KTrace to any of the targets -+ */ ++ seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, set_op->val); ++ } ++} + +/** -+ * KBASE_KTRACE_ADD_CSF_GRP - Add trace values about a group, with info -+ * @kbdev: kbase device -+ * @code: trace code -+ * @group: queue group, or NULL if no queue group -+ * @info_val: generic information about @code to add to the trace ++ * kbasep_csf_kcpu_debugfs_print_queue() - Print debug data for a KCPU queue + * -+ * Note: Any functions called through this macro will still be evaluated in -+ * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when -+ * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied -+ * to this macro must: -+ * a) be static or static inline, and -+ * b) just return 0 and have no other statements present in the body. ++ * @file: The seq_file to print to. ++ * @queue: Pointer to the KCPU queue. + */ -+#define KBASE_KTRACE_ADD_CSF_GRP(kbdev, code, group, info_val) \ -+ do { \ -+ /* capture values that could come from non-pure fn calls */ \ -+ struct kbase_queue_group *__group = group; \ -+ u64 __info_val = info_val; \ -+ KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, __group, NULL, 0u, \ -+ __info_val); \ -+ KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, __group, NULL, \ -+ __info_val); \ -+ } while (0) ++static void kbasep_csf_sync_kcpu_debugfs_print_queue(struct seq_file *file, ++ struct kbase_kcpu_command_queue *queue) ++{ ++ char started_or_pending; ++ struct kbase_kcpu_command *cmd; ++ struct kbase_context *kctx = file->private; ++ size_t i; ++ ++ if (WARN_ON(!queue)) ++ return; ++ ++ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); ++ mutex_lock(&queue->lock); ++ ++ for (i = 0; i != queue->num_pending_cmds; ++i) { ++ started_or_pending = ((i == 0) && queue->command_started) ? 'S' : 'P'; ++ seq_printf(file, "queue:KCPU-%u-%u exec:%c ", kctx->id, queue->id, ++ started_or_pending); ++ ++ cmd = &queue->commands[queue->start_offset + i]; ++ switch (cmd->type) { ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: ++ kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_SIGNAL"); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: ++ kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_WAIT"); ++ break; ++#endif ++ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: ++ kbasep_csf_sync_print_kcpu_cqs_wait(file, cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_SET: ++ kbasep_csf_sync_print_kcpu_cqs_set(file, cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: ++ kbasep_csf_sync_print_kcpu_cqs_wait_op(file, cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: ++ kbasep_csf_sync_print_kcpu_cqs_set_op(file, cmd); ++ break; ++ default: ++ seq_puts(file, ", U, Unknown blocking command"); ++ break; ++ } ++ ++ seq_puts(file, "\n"); ++ } ++ ++ mutex_unlock(&queue->lock); ++} + +/** -+ * KBASE_KTRACE_ADD_CSF_GRP_Q - Add trace values about a group, queue, with info -+ * @kbdev: kbase device -+ * @code: trace code -+ * @group: queue group, or NULL if no queue group -+ * @queue: queue, or NULL if no queue -+ * @info_val: generic information about @code to add to the trace ++ * kbasep_csf_sync_kcpu_debugfs_show() - Print CSF KCPU queue sync info + * -+ * Note: Any functions called through this macro will still be evaluated in -+ * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when -+ * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied -+ * to this macro must: -+ * a) be static or static inline, and -+ * b) just return 0 and have no other statements present in the body. ++ * @file: The seq_file for printing to. ++ * ++ * Return: Negative error code or 0 on success. + */ -+#define KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, code, group, queue, info_val) \ -+ do { \ -+ /* capture values that could come from non-pure fn calls */ \ -+ struct kbase_queue_group *__group = group; \ -+ struct kbase_queue *__queue = queue; \ -+ u64 __info_val = info_val; \ -+ KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, __group, __queue, 0u, \ -+ __info_val); \ -+ KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, __group, \ -+ __queue, __info_val); \ -+ } while (0) ++static int kbasep_csf_sync_kcpu_debugfs_show(struct seq_file *file) ++{ ++ struct kbase_context *kctx = file->private; ++ unsigned long queue_idx; + ++ mutex_lock(&kctx->csf.kcpu_queues.lock); ++ seq_printf(file, "KCPU queues for ctx %u:\n", kctx->id); + -+#define KBASE_KTRACE_ADD_CSF_KCPU(kbdev, code, queue, info_val1, info_val2) \ -+ do { \ -+ /* capture values that could come from non-pure fn calls */ \ -+ struct kbase_kcpu_command_queue *__queue = queue; \ -+ u64 __info_val1 = info_val1; \ -+ u64 __info_val2 = info_val2; \ -+ KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, __queue, \ -+ __info_val1, __info_val2); \ -+ KBASE_KTRACE_FTRACE_ADD_KCPU(code, __queue, \ -+ __info_val1, __info_val2); \ -+ } while (0) ++ queue_idx = find_first_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES); + -+#endif /* _KBASE_DEBUG_KTRACE_CSF_H_ */ -diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_csf.h -new file mode 100644 -index 000000000..1896e10ed ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_csf.h -@@ -0,0 +1,116 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* ++ while (queue_idx < KBASEP_MAX_KCPU_QUEUES) { ++ kbasep_csf_sync_kcpu_debugfs_print_queue(file, ++ kctx->csf.kcpu_queues.array[queue_idx]); ++ ++ queue_idx = find_next_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES, ++ queue_idx + 1); ++ } ++ ++ mutex_unlock(&kctx->csf.kcpu_queues.lock); ++ return 0; ++} ++ ++/** ++ * kbasep_csf_get_move_immediate_value() - Get the immediate values for sync operations ++ * from a MOVE instruction. + * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * @move_cmd: Raw MOVE instruction. ++ * @sync_addr_reg: Register identifier from SYNC_* instruction. ++ * @compare_val_reg: Register identifier from SYNC_* instruction. ++ * @sync_val: Pointer to store CQS object address for sync operation. ++ * @compare_val: Pointer to store compare value for sync operation. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * Return: True if value is obtained by checking for correct register identifier, ++ * or false otherwise. ++ */ ++static bool kbasep_csf_get_move_immediate_value(u64 move_cmd, u64 sync_addr_reg, ++ u64 compare_val_reg, u64 *sync_val, ++ u64 *compare_val) ++{ ++ u64 imm_mask; ++ ++ /* Verify MOVE instruction and get immediate mask */ ++ if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE32_OPCODE) ++ imm_mask = MOVE32_IMM_MASK; ++ else if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE_OPCODE) ++ imm_mask = MOVE_IMM_MASK; ++ else ++ /* Error return */ ++ return false; ++ ++ /* Verify value from MOVE instruction and assign to variable */ ++ if (sync_addr_reg == MOVE_DEST_GET(move_cmd)) ++ *sync_val = move_cmd & imm_mask; ++ else if (compare_val_reg == MOVE_DEST_GET(move_cmd)) ++ *compare_val = move_cmd & imm_mask; ++ else ++ /* Error return */ ++ return false; ++ ++ return true; ++} ++ ++/** kbasep_csf_read_ringbuffer_value() - Reads a u64 from the ringbuffer at a provided ++ * offset. + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * @queue: Pointer to the queue. ++ * @ringbuff_offset: Ringbuffer offset. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * Return: the u64 in the ringbuffer at the desired offset. ++ */ ++static u64 kbasep_csf_read_ringbuffer_value(struct kbase_queue *queue, u32 ringbuff_offset) ++{ ++ u64 page_off = ringbuff_offset >> PAGE_SHIFT; ++ u64 offset_within_page = ringbuff_offset & ~PAGE_MASK; ++ struct page *page = as_page(queue->queue_reg->gpu_alloc->pages[page_off]); ++ u64 *ringbuffer = kmap_atomic(page); ++ u64 value = ringbuffer[offset_within_page / sizeof(u64)]; ++ ++ kunmap_atomic(ringbuffer); ++ return value; ++} ++ ++/** ++ * kbasep_csf_print_gpu_sync_op() - Print sync operation info for given sync command. + * ++ * @file: Pointer to debugfs seq_file file struct for writing output. ++ * @kctx: Pointer to kbase context. ++ * @queue: Pointer to the GPU command queue. ++ * @ringbuff_offset: Offset to index the ring buffer with, for the given sync command. ++ * (Useful for finding preceding MOVE commands) ++ * @sync_cmd: Entire u64 of the sync command, which has both sync address and ++ * comparison-value encoded in it. ++ * @type: Type of GPU sync command (e.g. SYNC_SET, SYNC_ADD, SYNC_WAIT). ++ * @is_64bit: Bool to indicate if operation is 64 bit (true) or 32 bit (false). ++ * @follows_wait: Bool to indicate if the operation follows at least one wait ++ * operation. Used to determine whether it's pending or started. + */ ++static void kbasep_csf_print_gpu_sync_op(struct seq_file *file, struct kbase_context *kctx, ++ struct kbase_queue *queue, u32 ringbuff_offset, ++ u64 sync_cmd, enum debugfs_gpu_sync_type type, ++ bool is_64bit, bool follows_wait) ++{ ++ u64 sync_addr = 0, compare_val = 0, live_val = 0; ++ u64 move_cmd; ++ u8 sync_addr_reg, compare_val_reg, wait_condition = 0; ++ int err; + -+#ifndef _KBASE_DEBUG_KTRACE_DEFS_CSF_H_ -+#define _KBASE_DEBUG_KTRACE_DEFS_CSF_H_ ++ static const char *const gpu_sync_type_name[] = { "SYNC_WAIT", "SYNC_SET", "SYNC_ADD" }; ++ static const char *const gpu_sync_type_op[] = { ++ "wait", /* This should never be printed, only included to simplify indexing */ ++ "set", "add" ++ }; ++ ++ if (type >= NUM_DEBUGFS_GPU_SYNC_TYPES) { ++ dev_warn(kctx->kbdev->dev, "Expected GPU queue sync type is unknown!"); ++ return; ++ } ++ ++ /* We expect there to be at least 2 preceding MOVE instructions, and ++ * Base will always arrange for the 2 MOVE + SYNC instructions to be ++ * contiguously located, and is therefore never expected to be wrapped ++ * around the ringbuffer boundary. ++ */ ++ if (unlikely(ringbuff_offset < (2 * sizeof(u64)))) { ++ dev_warn(kctx->kbdev->dev, ++ "Unexpected wraparound detected between %s & MOVE instruction", ++ gpu_sync_type_name[type]); ++ return; ++ } ++ ++ /* 1. Get Register identifiers from SYNC_* instruction */ ++ sync_addr_reg = SYNC_SRC0_GET(sync_cmd); ++ compare_val_reg = SYNC_SRC1_GET(sync_cmd); ++ ++ /* 2. Get values from first MOVE command */ ++ ringbuff_offset -= sizeof(u64); ++ move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset); ++ if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg, ++ &sync_addr, &compare_val)) ++ return; ++ ++ /* 3. Get values from next MOVE command */ ++ ringbuff_offset -= sizeof(u64); ++ move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset); ++ if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg, ++ &sync_addr, &compare_val)) ++ return; ++ ++ /* 4. Get CQS object value */ ++ if (is_64bit) ++ err = kbasep_csf_debugfs_get_cqs_live_u64(kctx, sync_addr, &live_val); ++ else ++ err = kbasep_csf_debugfs_get_cqs_live_u32(kctx, sync_addr, (u32 *)(&live_val)); ++ ++ if (err) ++ return; ++ ++ /* 5. Print info */ ++ seq_printf(file, "queue:GPU-%u-%u-%u exec:%c cmd:%s ", kctx->id, queue->group->handle, ++ queue->csi_index, queue->enabled && !follows_wait ? 'S' : 'P', ++ gpu_sync_type_name[type]); ++ ++ if (queue->group->csg_nr == KBASEP_CSG_NR_INVALID) ++ seq_puts(file, "slot:-"); ++ else ++ seq_printf(file, "slot:%d", (int)queue->group->csg_nr); ++ ++ seq_printf(file, " obj:0x%.16llx live_value:0x%.16llx | ", sync_addr, live_val); ++ ++ if (type == DEBUGFS_GPU_SYNC_WAIT) { ++ wait_condition = SYNC_WAIT_CONDITION_GET(sync_cmd); ++ seq_printf(file, "op:%s ", kbasep_csf_sync_get_wait_op_name(wait_condition)); ++ } else ++ seq_printf(file, "op:%s ", gpu_sync_type_op[type]); ++ ++ seq_printf(file, "arg_value:0x%.16llx\n", compare_val); ++} + -+#if KBASE_KTRACE_TARGET_RBUF +/** -+ * DOC: KTrace version history, CSF variant ++ * kbasep_csf_dump_active_queue_sync_info() - Print GPU command queue sync information. + * -+ * 1.0: -+ * First version, with version information in the header. ++ * @file: seq_file for printing to. ++ * @queue: Address of a GPU command queue to examine. + * -+ * 1.1: -+ * kctx field is no longer a pointer, and is now an ID of the format %d_%u as -+ * used by kctx directories in mali debugfs entries: (tgid creating the kctx), -+ * (unique kctx id) ++ * This function will iterate through each command in the ring buffer of the given GPU queue from ++ * CS_EXTRACT, and if is a SYNC_* instruction it will attempt to decode the sync operation and ++ * print relevant information to the debugfs file. ++ * This function will stop iterating once the CS_INSERT address is reached by the cursor (i.e. ++ * when there are no more commands to view) or a number of consumed GPU CALL commands have ++ * been observed. ++ */ ++static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct kbase_queue *queue) ++{ ++ struct kbase_context *kctx; ++ u32 *addr; ++ u64 cs_extract, cs_insert, instr, cursor; ++ bool follows_wait = false; ++ int nr_calls = 0; ++ ++ if (!queue) ++ return; ++ ++ kctx = queue->kctx; ++ ++ addr = (u32 *)queue->user_io_addr; ++ cs_insert = addr[CS_INSERT_LO / 4] | ((u64)addr[CS_INSERT_HI / 4] << 32); ++ ++ addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); ++ cs_extract = addr[CS_EXTRACT_LO / 4] | ((u64)addr[CS_EXTRACT_HI / 4] << 32); ++ ++ cursor = cs_extract; ++ ++ if (!is_power_of_2(queue->size)) { ++ dev_warn(kctx->kbdev->dev, "GPU queue %u size of %u not a power of 2", ++ queue->csi_index, queue->size); ++ return; ++ } ++ ++ while ((cursor < cs_insert) && (nr_calls < MAX_NR_GPU_CALLS)) { ++ bool instr_is_64_bit = false; ++ /* Calculate offset into ringbuffer from the absolute cursor, ++ * by finding the remainder of the cursor divided by the ++ * ringbuffer size. The ringbuffer size is guaranteed to be ++ * a power of 2, so the remainder can be calculated without an ++ * explicit modulo. queue->size - 1 is the ringbuffer mask. ++ */ ++ u32 cursor_ringbuff_offset = (u32)(cursor & (queue->size - 1)); ++ ++ /* Find instruction that cursor is currently on */ ++ instr = kbasep_csf_read_ringbuffer_value(queue, cursor_ringbuff_offset); ++ ++ switch (INSTR_OPCODE_GET(instr)) { ++ case GPU_CSF_SYNC_ADD64_OPCODE: ++ case GPU_CSF_SYNC_SET64_OPCODE: ++ case GPU_CSF_SYNC_WAIT64_OPCODE: ++ instr_is_64_bit = true; ++ default: ++ break; ++ } ++ ++ switch (INSTR_OPCODE_GET(instr)) { ++ case GPU_CSF_SYNC_ADD_OPCODE: ++ case GPU_CSF_SYNC_ADD64_OPCODE: ++ kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset, ++ instr, DEBUGFS_GPU_SYNC_ADD, instr_is_64_bit, ++ follows_wait); ++ break; ++ case GPU_CSF_SYNC_SET_OPCODE: ++ case GPU_CSF_SYNC_SET64_OPCODE: ++ kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset, ++ instr, DEBUGFS_GPU_SYNC_SET, instr_is_64_bit, ++ follows_wait); ++ break; ++ case GPU_CSF_SYNC_WAIT_OPCODE: ++ case GPU_CSF_SYNC_WAIT64_OPCODE: ++ kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset, ++ instr, DEBUGFS_GPU_SYNC_WAIT, instr_is_64_bit, ++ follows_wait); ++ follows_wait = true; /* Future commands will follow at least one wait */ ++ break; ++ case GPU_CSF_CALL_OPCODE: ++ nr_calls++; ++ /* Fallthrough */ ++ default: ++ /* Unrecognized command, skip past it */ ++ break; ++ } ++ ++ cursor += sizeof(u64); ++ } ++} ++ ++/** ++ * kbasep_csf_dump_active_group_sync_state() - Prints SYNC commands in all GPU queues of ++ * the provided queue group. + * -+ * ftrace backend now outputs kctx field (as %d_%u format). ++ * @file: seq_file for printing to. ++ * @group: Address of a GPU command group to iterate through. + * -+ * Add fields group, slot, prio, csi into backend-specific part. ++ * This function will iterate through each queue in the provided GPU queue group and ++ * print its SYNC related commands. ++ */ ++static void kbasep_csf_dump_active_group_sync_state(struct seq_file *file, ++ struct kbase_queue_group *const group) ++{ ++ struct kbase_context *kctx = file->private; ++ unsigned int i; ++ ++ seq_printf(file, "GPU queues for group %u (slot %d) of ctx %d_%d\n", group->handle, ++ group->csg_nr, kctx->tgid, kctx->id); ++ ++ for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) ++ kbasep_csf_dump_active_queue_sync_info(file, group->bound_queues[i]); ++} ++ ++/** ++ * kbasep_csf_sync_gpu_debugfs_show() - Print CSF GPU queue sync info + * -+ * 1.2: -+ * There is a new class of KCPU traces; with this, a new KCPU column in the -+ * ringbuffer RBUF (mali_trace) between csi and info_val, which is empty -+ * for non-kcpu related traces, and usually displays the KCPU Queue ID and -+ * an extra information value. ftrace also displays these KCPU traces. ++ * @file: The seq_file for printing to. + * -+ * 1.3: -+ * Add a lot of extra new traces. Tweak some existing scheduler related traces -+ * to contain extra information information/happen at slightly different times. -+ * SCHEDULER_PROTM_EXIT now has group information ++ * Return: Negative error code or 0 on success. + */ -+#define KBASE_KTRACE_VERSION_MAJOR 1 -+#define KBASE_KTRACE_VERSION_MINOR 3 ++static int kbasep_csf_sync_gpu_debugfs_show(struct seq_file *file) ++{ ++ u32 gr; ++ struct kbase_context *kctx = file->private; ++ struct kbase_device *kbdev; + -+/* indicates if the trace message has valid queue-group related info. */ -+#define KBASE_KTRACE_FLAG_CSF_GROUP (((kbase_ktrace_flag_t)1) << 0) ++ if (WARN_ON(!kctx)) ++ return -EINVAL; + -+/* indicates if the trace message has valid queue related info. */ -+#define KBASE_KTRACE_FLAG_CSF_QUEUE (((kbase_ktrace_flag_t)1) << 1) ++ kbdev = kctx->kbdev; ++ kbase_csf_scheduler_lock(kbdev); ++ kbase_csf_debugfs_update_active_groups_status(kbdev); + -+/* indicates if the trace message has valid KCPU-queue related info. */ -+#define KBASE_KTRACE_FLAG_CSF_KCPU (((kbase_ktrace_flag_t)1) << 2) ++ for (gr = 0; gr < kbdev->csf.global_iface.group_num; gr++) { ++ struct kbase_queue_group *const group = ++ kbdev->csf.scheduler.csg_slots[gr].resident_group; ++ if (!group || group->kctx != kctx) ++ continue; ++ kbasep_csf_dump_active_group_sync_state(file, group); ++ } + -+/* Collect all the flags together for debug checking */ -+#define KBASE_KTRACE_FLAG_BACKEND_ALL \ -+ (KBASE_KTRACE_FLAG_CSF_GROUP | KBASE_KTRACE_FLAG_CSF_QUEUE | \ -+ KBASE_KTRACE_FLAG_CSF_KCPU) ++ kbase_csf_scheduler_unlock(kbdev); ++ return 0; ++} + +/** -+ * union kbase_ktrace_backend - backend specific part of a trace message -+ * @kcpu: kcpu union member -+ * @kcpu.code: Identifies the event, refer to enum kbase_ktrace_code. -+ * @kcpu.flags: indicates information about the trace message itself. Used -+ * during dumping of the message. -+ * @kcpu.id: ID of the KCPU queue. -+ * @kcpu.extra_info_val: value specific to the type of KCPU event being traced. -+ * Refer to the KPU specific code in enum kbase_ktrace_code in -+ * mali_kbase_debug_ktrace_codes_csf.h -+ * @gpu: gpu union member -+ * @gpu.code: Identifies the event, refer to enum kbase_ktrace_code. -+ * @gpu.flags: indicates information about the trace message itself. Used -+ * during dumping of the message. -+ * @gpu.group_handle: Handle identifying the associated queue group. Only valid -+ * when @flags contains KBASE_KTRACE_FLAG_CSF_GROUP. -+ * @gpu.csg_nr: Number/index of the associated queue group's CS group to -+ * which it is mapped, or negative if none associated. Only -+ * valid when @flags contains KBASE_KTRACE_FLAG_CSF_GROUP. -+ * @gpu.slot_prio: The priority of the slot for the associated group, if it -+ * was scheduled. Hence, only valid when @csg_nr >=0 and -+ * @flags contains KBASE_KTRACE_FLAG_CSF_GROUP. -+ * @gpu.csi_index: ID of the associated queue's CS HW interface. -+ * Only valid when @flags contains KBASE_KTRACE_FLAG_CSF_QUEUE. ++ * kbasep_csf_sync_debugfs_show() - Print CSF queue sync information ++ * ++ * @file: The seq_file for printing to. ++ * @data: The debugfs dentry private data, a pointer to kbase_context. ++ * ++ * Return: Negative error code or 0 on success. + */ ++static int kbasep_csf_sync_debugfs_show(struct seq_file *file, void *data) ++{ ++ seq_printf(file, "MALI_CSF_SYNC_DEBUGFS_VERSION: v%u\n", MALI_CSF_SYNC_DEBUGFS_VERSION); + -+union kbase_ktrace_backend { -+ /* Place 64 and 32-bit members together */ -+ /* Pack smaller members together */ -+ struct { -+ kbase_ktrace_code_t code; -+ kbase_ktrace_flag_t flags; -+ u8 id; -+ u64 extra_info_val; -+ } kcpu; ++ kbasep_csf_sync_kcpu_debugfs_show(file); ++ kbasep_csf_sync_gpu_debugfs_show(file); ++ return 0; ++} + -+ struct { -+ kbase_ktrace_code_t code; -+ kbase_ktrace_flag_t flags; -+ u8 group_handle; -+ s8 csg_nr; -+ u8 slot_prio; -+ s8 csi_index; -+ } gpu; ++static int kbasep_csf_sync_debugfs_open(struct inode *in, struct file *file) ++{ ++ return single_open(file, kbasep_csf_sync_debugfs_show, in->i_private); ++} ++ ++static const struct file_operations kbasep_csf_sync_debugfs_fops = { ++ .open = kbasep_csf_sync_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, +}; + -+#endif /* KBASE_KTRACE_TARGET_RBUF */ -+#endif /* _KBASE_DEBUG_KTRACE_DEFS_CSF_H_ */ -diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_jm.h ++/** ++ * kbase_csf_sync_debugfs_init() - Initialise debugfs file. ++ * ++ * @kctx: Kernel context pointer. ++ */ ++void kbase_csf_sync_debugfs_init(struct kbase_context *kctx) ++{ ++ struct dentry *file; ++ const mode_t mode = 0444; ++ ++ if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) ++ return; ++ ++ file = debugfs_create_file("csf_sync", mode, kctx->kctx_dentry, kctx, ++ &kbasep_csf_sync_debugfs_fops); ++ ++ if (IS_ERR_OR_NULL(file)) ++ dev_warn(kctx->kbdev->dev, "Unable to create CSF Sync debugfs entry"); ++} ++ ++#else ++/* ++ * Stub functions for when debugfs is disabled ++ */ ++void kbase_csf_sync_debugfs_init(struct kbase_context *kctx) ++{ ++} ++ ++#endif /* CONFIG_DEBUG_FS */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h new file mode 100644 -index 000000000..efa8ab05b +index 000000000..177e15d85 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_jm.h -@@ -0,0 +1,112 @@ ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h +@@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -175842,107 +178416,32 @@ index 000000000..efa8ab05b + * + */ + -+#ifndef _KBASE_DEBUG_KTRACE_DEFS_JM_H_ -+#define _KBASE_DEBUG_KTRACE_DEFS_JM_H_ -+ -+#if KBASE_KTRACE_TARGET_RBUF -+/** -+ * DOC: KTrace version history, JM variant -+ * -+ * 1.0: -+ * Original version (implicit, header did not carry version information). -+ * -+ * 2.0: -+ * Introduced version information into the header. -+ * -+ * Some changes of parameter names in header. -+ * -+ * Trace now uses all 64-bits of info_val. -+ * -+ * Non-JM specific parts moved to using info_val instead of refcount/gpu_addr. -+ * -+ * 2.1: -+ * kctx field is no longer a pointer, and is now an ID of the format %d_%u as -+ * used by kctx directories in mali debugfs entries: (tgid creating the kctx), -+ * (unique kctx id). -+ * -+ * ftrace backend now outputs kctx field (as %d_%u format). -+ * -+ * 2.2: -+ * Add tracing codes for pulling, unpulling, and returns atoms to JS for -+ * diagnosing soft-stop path and preemption problems -+ */ -+#define KBASE_KTRACE_VERSION_MAJOR 2 -+#define KBASE_KTRACE_VERSION_MINOR 2 -+#endif /* KBASE_KTRACE_TARGET_RBUF */ -+ -+/* -+ * Note: mali_kbase_debug_ktrace_jm.h needs these value even if the RBUF target -+ * is disabled (they get discarded with CSTD_UNUSED(), but they're still -+ * referenced) -+ */ ++#ifndef _KBASE_CSF_SYNC_DEBUGFS_H_ ++#define _KBASE_CSF_SYNC_DEBUGFS_H_ + -+/* indicates if the trace message has a valid refcount member */ -+#define KBASE_KTRACE_FLAG_JM_REFCOUNT (((kbase_ktrace_flag_t)1) << 0) -+/* indicates if the trace message has a valid jobslot member */ -+#define KBASE_KTRACE_FLAG_JM_JOBSLOT (((kbase_ktrace_flag_t)1) << 1) -+/* indicates if the trace message has valid atom related info. */ -+#define KBASE_KTRACE_FLAG_JM_ATOM (((kbase_ktrace_flag_t)1) << 2) ++/* Forward declaration */ ++struct kbase_context; + -+#if KBASE_KTRACE_TARGET_RBUF -+/* Collect all the flags together for debug checking */ -+#define KBASE_KTRACE_FLAG_BACKEND_ALL \ -+ (KBASE_KTRACE_FLAG_JM_REFCOUNT | KBASE_KTRACE_FLAG_JM_JOBSLOT \ -+ | KBASE_KTRACE_FLAG_JM_ATOM) ++#define MALI_CSF_SYNC_DEBUGFS_VERSION 0 + +/** -+ * union kbase_ktrace_backend - backend specific part of a trace message -+ * Contains only a struct but is a union such that it is compatible with -+ * generic JM and CSF KTrace calls. ++ * kbase_csf_sync_debugfs_init() - Create a debugfs entry for CSF queue sync info + * -+ * @gpu: gpu union member -+ * @gpu.atom_udata: Copy of the user data sent for the atom in base_jd_submit. -+ * Only valid if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags -+ * @gpu.gpu_addr: GPU address, usually of the job-chain represented by an -+ * atom. -+ * @gpu.atom_number: id of the atom for which trace message was added. Only -+ * valid if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags -+ * @gpu.code: Identifies the event, refer to enum kbase_ktrace_code. -+ * @gpu.flags: indicates information about the trace message itself. Used -+ * during dumping of the message. -+ * @gpu.jobslot: job-slot for which trace message was added, valid only for -+ * job-slot management events. -+ * @gpu.refcount: reference count for the context, valid for certain events -+ * related to scheduler core and policy. ++ * @kctx: The kbase_context for which to create the debugfs entry + */ -+union kbase_ktrace_backend { -+ struct { -+ /* Place 64 and 32-bit members together */ -+ u64 atom_udata[2]; /* Only valid for -+ * KBASE_KTRACE_FLAG_JM_ATOM -+ */ -+ u64 gpu_addr; -+ int atom_number; /* Only valid for KBASE_KTRACE_FLAG_JM_ATOM */ -+ /* Pack smaller members together */ -+ kbase_ktrace_code_t code; -+ kbase_ktrace_flag_t flags; -+ u8 jobslot; -+ u8 refcount; -+ } gpu; -+}; -+#endif /* KBASE_KTRACE_TARGET_RBUF */ ++void kbase_csf_sync_debugfs_init(struct kbase_context *kctx); + -+#endif /* _KBASE_DEBUG_KTRACE_DEFS_JM_H_ */ -diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c ++#endif /* _KBASE_CSF_SYNC_DEBUGFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c new file mode 100644 -index 000000000..6597a15e5 +index 000000000..8072a8bd2 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c -@@ -0,0 +1,121 @@ ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c +@@ -0,0 +1,1372 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -175960,1251 +178459,1367 @@ index 000000000..6597a15e5 + * + */ + -+#include -+#include "debug/mali_kbase_debug_ktrace_internal.h" -+#include "debug/backend/mali_kbase_debug_ktrace_jm.h" ++#include + -+#if KBASE_KTRACE_TARGET_RBUF ++#include "mali_kbase_csf_tiler_heap.h" ++#include "mali_kbase_csf_tiler_heap_def.h" ++#include "mali_kbase_csf_heap_context_alloc.h" + -+void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written) -+{ -+ *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), -+ "katom,gpu_addr,jobslot,refcount"), 0); -+} ++/* Tiler heap shrink stop limit for maintaining a minimum number of chunks */ ++#define HEAP_SHRINK_STOP_LIMIT (1) + -+void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, -+ char *buffer, int sz, s32 *written) ++/** ++ * struct kbase_csf_gpu_buffer_heap - A gpu buffer object specific to tiler heap ++ * ++ * @cdsbp_0: Descriptor_type and buffer_type ++ * @size: The size of the current heap chunk ++ * @pointer: Pointer to the current heap chunk ++ * @low_pointer: Pointer to low end of current heap chunk ++ * @high_pointer: Pointer to high end of current heap chunk ++ */ ++struct kbase_csf_gpu_buffer_heap { ++ u32 cdsbp_0; ++ u32 size; ++ u64 pointer; ++ u64 low_pointer; ++ u64 high_pointer; ++} __packed; ++ ++/** ++ * encode_chunk_ptr - Encode the address and size of a chunk as an integer. ++ * ++ * @chunk_size: Size of a tiler heap chunk, in bytes. ++ * @chunk_addr: GPU virtual address of the same tiler heap chunk. ++ * ++ * The size and address of the next chunk in a list are packed into a single ++ * 64-bit value for storage in a chunk's header. This function returns that ++ * value. ++ * ++ * Return: Next chunk pointer suitable for writing into a chunk header. ++ */ ++static u64 encode_chunk_ptr(u32 const chunk_size, u64 const chunk_addr) +{ -+ /* katom */ -+ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_ATOM) -+ *written += MAX(snprintf(buffer + *written, -+ MAX(sz - *written, 0), -+ "atom %d (ud: 0x%llx 0x%llx)", -+ trace_msg->backend.gpu.atom_number, -+ trace_msg->backend.gpu.atom_udata[0], -+ trace_msg->backend.gpu.atom_udata[1]), 0); ++ u64 encoded_size, encoded_addr; + -+ /* gpu_addr */ -+ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_BACKEND) -+ *written += MAX(snprintf(buffer + *written, -+ MAX(sz - *written, 0), -+ ",%.8llx,", trace_msg->backend.gpu.gpu_addr), -+ 0); -+ else -+ *written += MAX(snprintf(buffer + *written, -+ MAX(sz - *written, 0), -+ ",,"), 0); ++ WARN_ON(chunk_size & ~CHUNK_SIZE_MASK); ++ WARN_ON(chunk_addr & ~CHUNK_ADDR_MASK); + -+ /* jobslot */ -+ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_JOBSLOT) -+ *written += MAX(snprintf(buffer + *written, -+ MAX(sz - *written, 0), -+ "%d", trace_msg->backend.gpu.jobslot), 0); ++ encoded_size = ++ (u64)(chunk_size >> CHUNK_HDR_NEXT_SIZE_ENCODE_SHIFT) << ++ CHUNK_HDR_NEXT_SIZE_POS; + -+ *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), -+ ","), 0); ++ encoded_addr = ++ (chunk_addr >> CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT) << ++ CHUNK_HDR_NEXT_ADDR_POS; + -+ /* refcount */ -+ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_REFCOUNT) -+ *written += MAX(snprintf(buffer + *written, -+ MAX(sz - *written, 0), -+ "%d", trace_msg->backend.gpu.refcount), 0); ++ return (encoded_size & CHUNK_HDR_NEXT_SIZE_MASK) | ++ (encoded_addr & CHUNK_HDR_NEXT_ADDR_MASK); +} + -+void kbasep_ktrace_add_jm(struct kbase_device *kbdev, -+ enum kbase_ktrace_code code, -+ struct kbase_context *kctx, -+ const struct kbase_jd_atom *katom, u64 gpu_addr, -+ kbase_ktrace_flag_t flags, int refcount, int jobslot, -+ u64 info_val) ++/** ++ * get_last_chunk - Get the last chunk of a tiler heap ++ * ++ * @heap: Pointer to the tiler heap. ++ * ++ * Return: The address of the most recently-linked chunk, or NULL if none. ++ */ ++static struct kbase_csf_tiler_heap_chunk *get_last_chunk( ++ struct kbase_csf_tiler_heap *const heap) +{ -+ unsigned long irqflags; -+ struct kbase_ktrace_msg *trace_msg; ++ if (list_empty(&heap->chunks_list)) ++ return NULL; + -+ if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace))) -+ return; ++ return list_last_entry(&heap->chunks_list, ++ struct kbase_csf_tiler_heap_chunk, link); ++} + -+ spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); ++/** ++ * remove_external_chunk_mappings - Remove external mappings from a chunk that ++ * is being transitioned to the tiler heap ++ * memory system. ++ * ++ * @kctx: kbase context the chunk belongs to. ++ * @chunk: The chunk whose external mappings are going to be removed. ++ * ++ * This function marks the region as DONT NEED. Along with NO_USER_FREE, this indicates ++ * that the VA region is owned by the tiler heap and could potentially be shrunk at any time. Other ++ * parts of kbase outside of tiler heap management should not take references on its physical ++ * pages, and should not modify them. ++ */ ++static void remove_external_chunk_mappings(struct kbase_context *const kctx, ++ struct kbase_csf_tiler_heap_chunk *chunk) ++{ ++ lockdep_assert_held(&kctx->reg_lock); + -+ /* Reserve and update indices */ -+ trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); ++ if (chunk->region->cpu_alloc != NULL) { ++ kbase_mem_shrink_cpu_mapping(kctx, chunk->region, 0, ++ chunk->region->cpu_alloc->nents); ++ } ++#if !defined(CONFIG_MALI_VECTOR_DUMP) ++ chunk->region->flags |= KBASE_REG_DONT_NEED; ++#endif + -+ /* Fill the common part of the message (including backend.gpu.flags) */ -+ kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, -+ info_val); ++ dev_dbg(kctx->kbdev->dev, "Removed external mappings from chunk 0x%llX", chunk->gpu_va); ++} + -+ /* Indicate to the common code that backend-specific parts will be -+ * valid -+ */ -+ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_BACKEND; ++/** ++ * link_chunk - Link a chunk into a tiler heap ++ * ++ * @heap: Pointer to the tiler heap. ++ * @chunk: Pointer to the heap chunk to be linked. ++ * ++ * Unless the @chunk is the first in the kernel's list of chunks belonging to ++ * a given tiler heap, this function stores the size and address of the @chunk ++ * in the header of the preceding chunk. This requires the GPU memory region ++ * containing the header to be mapped temporarily, which can fail. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++static int link_chunk(struct kbase_csf_tiler_heap *const heap, ++ struct kbase_csf_tiler_heap_chunk *const chunk) ++{ ++ struct kbase_csf_tiler_heap_chunk *const prev = get_last_chunk(heap); + -+ /* Fill the JM-specific parts of the message */ -+ if (katom) { -+ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_JM_ATOM; ++ if (prev) { ++ struct kbase_context *const kctx = heap->kctx; ++ u64 *prev_hdr = prev->map.addr; + -+ trace_msg->backend.gpu.atom_number = -+ kbase_jd_atom_id(katom->kctx, katom); -+ trace_msg->backend.gpu.atom_udata[0] = katom->udata.blob[0]; -+ trace_msg->backend.gpu.atom_udata[1] = katom->udata.blob[1]; -+ } ++ WARN((prev->region->flags & KBASE_REG_CPU_CACHED), ++ "Cannot support CPU cached chunks without sync operations"); + -+ trace_msg->backend.gpu.gpu_addr = gpu_addr; -+ trace_msg->backend.gpu.jobslot = jobslot; -+ /* Clamp refcount */ -+ trace_msg->backend.gpu.refcount = MIN((unsigned int)refcount, 0xFF); ++ *prev_hdr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va); + -+ WARN_ON((trace_msg->backend.gpu.flags & ~KBASE_KTRACE_FLAG_ALL)); ++ dev_dbg(kctx->kbdev->dev, ++ "Linked tiler heap chunks, 0x%llX -> 0x%llX\n", ++ prev->gpu_va, chunk->gpu_va); ++ } + -+ /* Done */ -+ spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); ++ return 0; +} + -+#endif /* KBASE_KTRACE_TARGET_RBUF */ -diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.h -new file mode 100644 -index 000000000..41be7a120 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.h -@@ -0,0 +1,309 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++/** ++ * init_chunk - Initialize and link a tiler heap chunk + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * @heap: Pointer to the tiler heap. ++ * @chunk: Pointer to the heap chunk to be initialized and linked. ++ * @link_with_prev: Flag to indicate if the new chunk needs to be linked with ++ * the previously allocated chunk. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * Zero-initialize a new chunk's header (including its pointer to the next ++ * chunk, which doesn't exist yet) and then update the previous chunk's ++ * header to link the new chunk into the chunk list. + * ++ * Return: 0 if successful or a negative error code on failure. + */ ++static int init_chunk(struct kbase_csf_tiler_heap *const heap, ++ struct kbase_csf_tiler_heap_chunk *const chunk, bool link_with_prev) ++{ ++ int err = 0; ++ u64 *chunk_hdr; ++ struct kbase_context *const kctx = heap->kctx; + -+#ifndef _KBASE_DEBUG_KTRACE_JM_H_ -+#define _KBASE_DEBUG_KTRACE_JM_H_ ++ lockdep_assert_held(&kctx->csf.tiler_heaps.lock); + -+/* -+ * KTrace target for internal ringbuffer -+ */ -+#if KBASE_KTRACE_TARGET_RBUF -+/** -+ * kbasep_ktrace_add_jm - internal function to add trace about Job Management -+ * @kbdev: kbase device -+ * @code: trace code -+ * @kctx: kbase context, or NULL if no context -+ * @katom: kbase atom, or NULL if no atom -+ * @gpu_addr: GPU address, usually related to @katom -+ * @flags: flags about the message -+ * @refcount: reference count information to add to the trace -+ * @jobslot: jobslot information to add to the trace -+ * @info_val: generic information about @code to add to the trace -+ * -+ * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD_JM() instead. -+ */ -+void kbasep_ktrace_add_jm(struct kbase_device *kbdev, -+ enum kbase_ktrace_code code, -+ struct kbase_context *kctx, -+ const struct kbase_jd_atom *katom, u64 gpu_addr, -+ kbase_ktrace_flag_t flags, int refcount, int jobslot, -+ u64 info_val); ++ if (unlikely(chunk->gpu_va & ~CHUNK_ADDR_MASK)) { ++ dev_err(kctx->kbdev->dev, ++ "Tiler heap chunk address is unusable\n"); ++ return -EINVAL; ++ } + -+#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, flags, \ -+ refcount, jobslot, info_val) \ -+ kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ -+ gpu_addr, flags, refcount, jobslot, info_val) ++ WARN((chunk->region->flags & KBASE_REG_CPU_CACHED), ++ "Cannot support CPU cached chunks without sync operations"); ++ chunk_hdr = chunk->map.addr; ++ if (WARN(chunk->map.size < CHUNK_HDR_SIZE, ++ "Tiler chunk kernel mapping was not large enough for zero-init")) { ++ return -EINVAL; ++ } + -+#else /* KBASE_KTRACE_TARGET_RBUF */ ++ memset(chunk_hdr, 0, CHUNK_HDR_SIZE); ++ INIT_LIST_HEAD(&chunk->link); + -+#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, flags, \ -+ refcount, jobslot, info_val) \ -+ do {\ -+ CSTD_UNUSED(kbdev);\ -+ CSTD_NOP(code);\ -+ CSTD_UNUSED(kctx);\ -+ CSTD_UNUSED(katom);\ -+ CSTD_UNUSED(gpu_addr);\ -+ CSTD_UNUSED(flags);\ -+ CSTD_UNUSED(refcount);\ -+ CSTD_UNUSED(jobslot);\ -+ CSTD_UNUSED(info_val);\ -+ CSTD_NOP(0);\ -+ } while (0) -+#endif /* KBASE_KTRACE_TARGET_RBUF */ ++ if (link_with_prev) ++ err = link_chunk(heap, chunk); + -+/* -+ * KTrace target for Linux's ftrace ++ if (unlikely(err)) { ++ dev_err(kctx->kbdev->dev, "Failed to link a chunk to a tiler heap\n"); ++ return -EINVAL; ++ } ++ ++ list_add_tail(&chunk->link, &heap->chunks_list); ++ heap->chunk_count++; ++ ++ return err; ++} ++ ++/** ++ * remove_unlinked_chunk - Remove a chunk that is not currently linked into a ++ * heap. + * -+ * Note: the header file(s) that define the trace_mali_<...> tracepoints are -+ * included by the parent header file ++ * @kctx: Kbase context that was used to allocate the memory. ++ * @chunk: Chunk that has been allocated, but not linked into a heap. + */ -+#if KBASE_KTRACE_TARGET_FTRACE -+#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ -+ jobslot) \ -+ trace_mali_##code(kctx, jobslot, 0) ++static void remove_unlinked_chunk(struct kbase_context *kctx, ++ struct kbase_csf_tiler_heap_chunk *chunk) ++{ ++ if (WARN_ON(!list_empty(&chunk->link))) ++ return; + -+#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, \ -+ gpu_addr, jobslot, info_val) \ -+ trace_mali_##code(kctx, jobslot, info_val) ++ kbase_gpu_vm_lock(kctx); ++ kbase_vunmap(kctx, &chunk->map); ++ /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT ++ * regions), and so we must clear that flag too before freeing. ++ * For "no user free count", we check that the count is 1 as it is a shrinkable region; ++ * no other code part within kbase can take a reference to it. ++ */ ++ WARN_ON(atomic_read(&chunk->region->no_user_free_count) > 1); ++ kbase_va_region_no_user_free_dec(chunk->region); ++#if !defined(CONFIG_MALI_VECTOR_DUMP) ++ chunk->region->flags &= ~KBASE_REG_DONT_NEED; ++#endif ++ kbase_mem_free_region(kctx, chunk->region); ++ kbase_gpu_vm_unlock(kctx); + -+#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, \ -+ gpu_addr, refcount) \ -+ trace_mali_##code(kctx, refcount, 0) ++ kfree(chunk); ++} + -+#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ -+ gpu_addr, refcount, info_val) \ -+ trace_mali_##code(kctx, refcount, info_val) ++/** ++ * alloc_new_chunk - Allocate new chunk metadata for the tiler heap, reserve a fully backed VA ++ * region for the chunk, and provide a kernel mapping. ++ * @kctx: kbase context with which the chunk will be linked ++ * @chunk_size: the size of the chunk from the corresponding heap ++ * ++ * Allocate the chunk tracking metadata and a corresponding fully backed VA region for the ++ * chunk. The kernel may need to invoke the reclaim path while trying to fulfill the allocation, so ++ * we cannot hold any lock that would be held in the shrinker paths (JIT evict lock or tiler heap ++ * lock). ++ * ++ * Since the chunk may have its physical backing removed, to prevent use-after-free scenarios we ++ * ensure that it is protected from being mapped by other parts of kbase. ++ * ++ * The chunk's GPU memory can be accessed via its 'map' member, but should only be done so by the ++ * shrinker path, as it may be otherwise shrunk at any time. ++ * ++ * Return: pointer to kbase_csf_tiler_heap_chunk on success or a NULL pointer ++ * on failure ++ */ ++static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *kctx, ++ u64 chunk_size) ++{ ++ u64 nr_pages = PFN_UP(chunk_size); ++ u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR | ++ BASEP_MEM_NO_USER_FREE | BASE_MEM_COHERENT_LOCAL | BASE_MEM_PROT_CPU_RD; ++ struct kbase_csf_tiler_heap_chunk *chunk = NULL; ++ /* The chunk kernel mapping needs to be large enough to: ++ * - initially zero the CHUNK_HDR_SIZE area ++ * - on shrinking, access the NEXT_CHUNK_ADDR_SIZE area ++ */ ++ const size_t chunk_kernel_map_size = max(CHUNK_HDR_SIZE, NEXT_CHUNK_ADDR_SIZE); + -+#define KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, \ -+ info_val) \ -+ trace_mali_##code(kctx, gpu_addr, info_val) -+#else /* KBASE_KTRACE_TARGET_FTRACE */ -+#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ -+ jobslot) \ -+ do {\ -+ CSTD_UNUSED(kbdev);\ -+ CSTD_NOP(code);\ -+ CSTD_UNUSED(kctx);\ -+ CSTD_UNUSED(katom);\ -+ CSTD_UNUSED(gpu_addr);\ -+ CSTD_UNUSED(jobslot);\ -+ CSTD_NOP(0);\ -+ } while (0) ++ /* Calls to this function are inherently synchronous, with respect to ++ * MMU operations. ++ */ ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; ++ flags |= kbase_mem_group_id_set(kctx->jit_group_id); + -+#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, \ -+ gpu_addr, jobslot, info_val) \ -+ do {\ -+ CSTD_UNUSED(kbdev);\ -+ CSTD_NOP(code);\ -+ CSTD_UNUSED(kctx);\ -+ CSTD_UNUSED(katom);\ -+ CSTD_UNUSED(gpu_addr);\ -+ CSTD_UNUSED(jobslot);\ -+ CSTD_UNUSED(info_val);\ -+ CSTD_NOP(0);\ -+ } while (0) ++ chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); ++ if (unlikely(!chunk)) { ++ dev_err(kctx->kbdev->dev, ++ "No kernel memory for a new tiler heap chunk\n"); ++ return NULL; ++ } + -+#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, \ -+ gpu_addr, refcount) \ -+ do {\ -+ CSTD_UNUSED(kbdev);\ -+ CSTD_NOP(code);\ -+ CSTD_UNUSED(kctx);\ -+ CSTD_UNUSED(katom);\ -+ CSTD_UNUSED(gpu_addr);\ -+ CSTD_UNUSED(refcount);\ -+ CSTD_NOP(0);\ -+ } while (0) ++ /* Allocate GPU memory for the new chunk. */ ++ chunk->region = ++ kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, &chunk->gpu_va, mmu_sync_info); + -+#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ -+ gpu_addr, refcount, info_val) \ -+ do {\ -+ CSTD_UNUSED(kbdev);\ -+ CSTD_NOP(code);\ -+ CSTD_UNUSED(kctx);\ -+ CSTD_UNUSED(katom);\ -+ CSTD_UNUSED(gpu_addr);\ -+ CSTD_UNUSED(info_val);\ -+ CSTD_NOP(0);\ -+ } while (0) ++ if (unlikely(!chunk->region)) { ++ dev_err(kctx->kbdev->dev, "Failed to allocate a tiler heap chunk!\n"); ++ goto unroll_chunk; ++ } + -+#define KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, \ -+ info_val)\ -+ do {\ -+ CSTD_UNUSED(kbdev);\ -+ CSTD_NOP(code);\ -+ CSTD_UNUSED(kctx);\ -+ CSTD_UNUSED(katom);\ -+ CSTD_UNUSED(gpu_addr);\ -+ CSTD_UNUSED(info_val);\ -+ CSTD_NOP(0);\ -+ } while (0) -+#endif /* KBASE_KTRACE_TARGET_FTRACE */ ++ kbase_gpu_vm_lock(kctx); + -+/* -+ * Master set of macros to route KTrace to any of the targets -+ */ ++ /* Some checks done here as NO_USER_FREE still allows such things to be made ++ * whilst we had dropped the region lock ++ */ ++ if (unlikely(atomic_read(&chunk->region->gpu_alloc->kernel_mappings) > 0)) { ++ dev_err(kctx->kbdev->dev, "Chunk region has active kernel mappings!\n"); ++ goto unroll_region; ++ } + -+/** -+ * KBASE_KTRACE_ADD_JM_SLOT - Add trace values about a job-slot -+ * @kbdev: kbase device -+ * @code: trace code -+ * @kctx: kbase context, or NULL if no context -+ * @katom: kbase atom, or NULL if no atom -+ * @gpu_addr: GPU address, usually related to @katom -+ * @jobslot: jobslot information to add to the trace -+ * -+ * Note: Any functions called through this macro will still be evaluated in -+ * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when -+ * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied -+ * to this macro must: -+ * a) be static or static inline, and -+ * b) just return 0 and have no other statements present in the body. -+ */ -+#define KBASE_KTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ -+ jobslot) \ -+ do { \ -+ /* capture values that could come from non-pure function calls */ \ -+ u64 __gpu_addr = gpu_addr; \ -+ int __jobslot = jobslot; \ -+ KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ -+ KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, __jobslot, \ -+ 0); \ -+ KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, __gpu_addr, __jobslot); \ -+ } while (0) ++ /* There is a race condition with regard to KBASE_REG_DONT_NEED, where another ++ * thread can have the "no user free" refcount increased between kbase_mem_alloc ++ * and kbase_gpu_vm_lock (above) and before KBASE_REG_DONT_NEED is set by ++ * remove_external_chunk_mappings (below). ++ * ++ * It should be fine and not a security risk if we let the region leak till ++ * region tracker termination in such a case. ++ */ ++ if (unlikely(atomic_read(&chunk->region->no_user_free_count) > 1)) { ++ dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_count > 1!\n"); ++ goto unroll_region; ++ } ++ ++ /* Whilst we can be sure of a number of other restrictions due to BASEP_MEM_NO_USER_FREE ++ * being requested, it's useful to document in code what those restrictions are, and ensure ++ * they remain in place in future. ++ */ ++ if (WARN(!chunk->region->gpu_alloc, ++ "NO_USER_FREE chunks should not have had their alloc freed")) { ++ goto unroll_region; ++ } ++ ++ if (WARN(chunk->region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE, ++ "NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) { ++ goto unroll_region; ++ } ++ ++ if (WARN((chunk->region->flags & KBASE_REG_ACTIVE_JIT_ALLOC), ++ "NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) { ++ goto unroll_region; ++ } ++ ++ if (WARN((chunk->region->flags & KBASE_REG_DONT_NEED), ++ "NO_USER_FREE chunks should not have been made ephemeral")) { ++ goto unroll_region; ++ } ++ ++ if (WARN(atomic_read(&chunk->region->cpu_alloc->gpu_mappings) > 1, ++ "NO_USER_FREE chunks should not have been aliased")) { ++ goto unroll_region; ++ } ++ ++ if (unlikely(!kbase_vmap_reg(kctx, chunk->region, chunk->gpu_va, chunk_kernel_map_size, ++ (KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &chunk->map, ++ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING))) { ++ dev_err(kctx->kbdev->dev, "Failed to map chunk header for shrinking!\n"); ++ goto unroll_region; ++ } ++ ++ remove_external_chunk_mappings(kctx, chunk); ++ kbase_gpu_vm_unlock(kctx); ++ ++ /* If page migration is enabled, we don't want to migrate tiler heap pages. ++ * This does not change if the constituent pages are already marked as isolated. ++ */ ++ if (kbase_page_migration_enabled) ++ kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE); ++ ++ return chunk; ++ ++unroll_region: ++ /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT ++ * regions), and so we must clear that flag too before freeing. ++ */ ++ kbase_va_region_no_user_free_dec(chunk->region); ++#if !defined(CONFIG_MALI_VECTOR_DUMP) ++ chunk->region->flags &= ~KBASE_REG_DONT_NEED; ++#endif ++ kbase_mem_free_region(kctx, chunk->region); ++ kbase_gpu_vm_unlock(kctx); ++unroll_chunk: ++ kfree(chunk); ++ return NULL; ++} + +/** -+ * KBASE_KTRACE_ADD_JM_SLOT_INFO - Add trace values about a job-slot, with info -+ * @kbdev: kbase device -+ * @code: trace code -+ * @kctx: kbase context, or NULL if no context -+ * @katom: kbase atom, or NULL if no atom -+ * @gpu_addr: GPU address, usually related to @katom -+ * @jobslot: jobslot information to add to the trace -+ * @info_val: generic information about @code to add to the trace ++ * create_chunk - Create a tiler heap chunk + * -+ * Note: Any functions called through this macro will still be evaluated in -+ * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when -+ * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied -+ * to this macro must: -+ * a) be static or static inline, and -+ * b) just return 0 and have no other statements present in the body. ++ * @heap: Pointer to the tiler heap for which to allocate memory. ++ * ++ * This function allocates a chunk of memory for a tiler heap, adds it to the ++ * the list of chunks associated with that heap both on the host side and in GPU ++ * memory. ++ * ++ * Return: 0 if successful or a negative error code on failure. + */ -+#define KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, gpu_addr, \ -+ jobslot, info_val) \ -+ do { \ -+ /* capture values that could come from non-pure function calls */ \ -+ u64 __gpu_addr = gpu_addr; \ -+ int __jobslot = jobslot; \ -+ u64 __info_val = info_val; \ -+ KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ -+ KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, __jobslot, \ -+ __info_val); \ -+ KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, __gpu_addr, __jobslot, __info_val); \ -+ } while (0) ++static int create_chunk(struct kbase_csf_tiler_heap *const heap) ++{ ++ int err = 0; ++ struct kbase_csf_tiler_heap_chunk *chunk = NULL; ++ ++ chunk = alloc_new_chunk(heap->kctx, heap->chunk_size); ++ if (unlikely(!chunk)) { ++ err = -ENOMEM; ++ goto allocation_failure; ++ } ++ ++ mutex_lock(&heap->kctx->csf.tiler_heaps.lock); ++ err = init_chunk(heap, chunk, true); ++ mutex_unlock(&heap->kctx->csf.tiler_heaps.lock); ++ ++ if (unlikely(err)) ++ goto initialization_failure; ++ ++ dev_dbg(heap->kctx->kbdev->dev, "Created tiler heap chunk 0x%llX\n", chunk->gpu_va); ++ ++ return 0; ++initialization_failure: ++ remove_unlinked_chunk(heap->kctx, chunk); ++allocation_failure: ++ return err; ++} + +/** -+ * KBASE_KTRACE_ADD_JM_REFCOUNT - Add trace values about a kctx refcount -+ * @kbdev: kbase device -+ * @code: trace code -+ * @kctx: kbase context, or NULL if no context -+ * @katom: kbase atom, or NULL if no atom -+ * @gpu_addr: GPU address, usually related to @katom -+ * @refcount: reference count information to add to the trace ++ * delete_all_chunks - Delete all chunks belonging to an unlinked tiler heap + * -+ * Note: Any functions called through this macro will still be evaluated in -+ * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when -+ * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied -+ * to this macro must: -+ * a) be static or static inline, and -+ * b) just return 0 and have no other statements present in the body. ++ * @heap: Pointer to a tiler heap. ++ * ++ * This function empties the list of chunks associated with a tiler heap by freeing all chunks ++ * previously allocated by @create_chunk. ++ * ++ * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the ++ * tiler_heaps lock cannot be held whilst deleting its chunks due to also needing the &struct ++ * kbase_context.region_lock. ++ * ++ * WARNING: Whilst the deleted chunks are unlinked from host memory, they are not unlinked from the ++ * list of chunks used by the GPU, therefore it is only safe to use this function when ++ * deleting a heap. + */ -+#define KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, gpu_addr, \ -+ refcount) \ -+ do { \ -+ /* capture values that could come from non-pure function calls */ \ -+ u64 __gpu_addr = gpu_addr; \ -+ int __refcount = refcount; \ -+ KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ -+ KBASE_KTRACE_FLAG_JM_REFCOUNT, __refcount, 0, \ -+ 0u); \ -+ KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount); \ -+ } while (0) ++static void delete_all_chunks(struct kbase_csf_tiler_heap *heap) ++{ ++ struct kbase_context *const kctx = heap->kctx; ++ struct list_head *entry = NULL, *tmp = NULL; ++ ++ WARN(!list_empty(&heap->link), ++ "Deleting a heap's chunks when that heap is still linked requires the tiler_heaps lock, which cannot be held by the caller"); ++ ++ list_for_each_safe(entry, tmp, &heap->chunks_list) { ++ struct kbase_csf_tiler_heap_chunk *chunk = list_entry( ++ entry, struct kbase_csf_tiler_heap_chunk, link); ++ ++ list_del_init(&chunk->link); ++ heap->chunk_count--; ++ ++ remove_unlinked_chunk(kctx, chunk); ++ } ++} + +/** -+ * KBASE_KTRACE_ADD_JM_REFCOUNT_INFO - Add trace values about a kctx refcount, -+ * and info -+ * @kbdev: kbase device -+ * @code: trace code -+ * @kctx: kbase context, or NULL if no context -+ * @katom: kbase atom, or NULL if no atom -+ * @gpu_addr: GPU address, usually related to @katom -+ * @refcount: reference count information to add to the trace -+ * @info_val: generic information about @code to add to the trace ++ * create_initial_chunks - Create the initial list of chunks for a tiler heap + * -+ * Note: Any functions called through this macro will still be evaluated in -+ * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when -+ * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied -+ * to this macro must: -+ * a) be static or static inline, and -+ * b) just return 0 and have no other statements present in the body. ++ * @heap: Pointer to the tiler heap for which to allocate memory. ++ * @nchunks: Number of chunks to create. ++ * ++ * This function allocates a given number of chunks for a tiler heap and ++ * adds them to the list of chunks associated with that heap. ++ * ++ * Return: 0 if successful or a negative error code on failure. + */ -+#define KBASE_KTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ -+ gpu_addr, refcount, info_val) \ -+ do { \ -+ /* capture values that could come from non-pure function calls */ \ -+ u64 __gpu_addr = gpu_addr; \ -+ int __refcount = refcount; \ -+ u64 __info_val = info_val; \ -+ KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ -+ KBASE_KTRACE_FLAG_JM_REFCOUNT, __refcount, 0, \ -+ __info_val); \ -+ KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount, __info_val); \ -+ } while (0) ++static int create_initial_chunks(struct kbase_csf_tiler_heap *const heap, ++ u32 const nchunks) ++{ ++ int err = 0; ++ u32 i; ++ ++ for (i = 0; (i < nchunks) && likely(!err); i++) ++ err = create_chunk(heap); ++ ++ if (unlikely(err)) ++ delete_all_chunks(heap); ++ ++ return err; ++} + +/** -+ * KBASE_KTRACE_ADD_JM - Add trace values (no slot or refcount) -+ * @kbdev: kbase device -+ * @code: trace code -+ * @kctx: kbase context, or NULL if no context -+ * @katom: kbase atom, or NULL if no atom -+ * @gpu_addr: GPU address, usually related to @katom -+ * @info_val: generic information about @code to add to the trace ++ * delete_heap - Delete an unlinked tiler heap + * -+ * Note: Any functions called through this macro will still be evaluated in -+ * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when -+ * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied -+ * to this macro must: -+ * a) be static or static inline, and -+ * b) just return 0 and have no other statements present in the body. -+ */ -+#define KBASE_KTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, info_val) \ -+ do { \ -+ /* capture values that could come from non-pure function calls */ \ -+ u64 __gpu_addr = gpu_addr; \ -+ u64 __info_val = info_val; \ -+ KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ -+ 0u, 0, 0, __info_val); \ -+ KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, __info_val); \ -+ } while (0) -+ -+#endif /* _KBASE_DEBUG_KTRACE_JM_H_ */ -diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h -new file mode 100644 -index 000000000..ddcac906c ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h -@@ -0,0 +1,267 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* ++ * @heap: Pointer to a tiler heap to be deleted. + * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * This function frees any chunks allocated for a tiler heap previously ++ * initialized by @kbase_csf_tiler_heap_init. The heap context structure used by ++ * the firmware is also freed. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the ++ * tiler_heaps lock cannot be held whilst deleting it due to also needing the &struct ++ * kbase_context.region_lock. ++ */ ++static void delete_heap(struct kbase_csf_tiler_heap *heap) ++{ ++ struct kbase_context *const kctx = heap->kctx; ++ ++ dev_dbg(kctx->kbdev->dev, "Deleting tiler heap 0x%llX\n", heap->gpu_va); ++ ++ WARN(!list_empty(&heap->link), ++ "Deleting a heap that is still linked requires the tiler_heaps lock, which cannot be held by the caller"); ++ ++ /* Make sure that all of the VA regions corresponding to the chunks are ++ * freed at this time and that the work queue is not trying to access freed ++ * memory. ++ * ++ * Note: since the heap is unlinked, and that no references are made to chunks other ++ * than from their heap, there is no need to separately move the chunks out of the ++ * heap->chunks_list to delete them. ++ */ ++ delete_all_chunks(heap); ++ ++ kbase_vunmap(kctx, &heap->gpu_va_map); ++ /* We could optimize context destruction by not freeing leaked heap ++ * contexts but it doesn't seem worth the extra complexity. After this ++ * point, the suballocation is returned to the heap context allocator and ++ * may be overwritten with new data, meaning heap->gpu_va should not ++ * be used past this point. ++ */ ++ kbase_csf_heap_context_allocator_free(&kctx->csf.tiler_heaps.ctx_alloc, ++ heap->gpu_va); ++ ++ WARN_ON(heap->chunk_count); ++ KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, ++ heap->heap_id, 0, 0, heap->max_chunks, heap->chunk_size, 0, ++ heap->target_in_flight, 0); ++ ++ if (heap->buf_desc_reg) { ++ kbase_vunmap(kctx, &heap->buf_desc_map); ++ kbase_gpu_vm_lock(kctx); ++ kbase_va_region_no_user_free_dec(heap->buf_desc_reg); ++ kbase_gpu_vm_unlock(kctx); ++ } ++ ++ kfree(heap); ++} ++ ++/** ++ * find_tiler_heap - Find a tiler heap from the address of its heap context + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * @kctx: Pointer to the kbase context to search for a tiler heap. ++ * @heap_gpu_va: GPU virtual address of a heap context structure. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * Each tiler heap managed by the kernel has an associated heap context ++ * structure used by the firmware. This function finds a tiler heap object from ++ * the GPU virtual address of its associated heap context. The heap context ++ * should have been allocated by @kbase_csf_heap_context_allocator_alloc in the ++ * same @kctx. + * ++ * Return: pointer to the tiler heap object, or NULL if not found. + */ ++static struct kbase_csf_tiler_heap *find_tiler_heap( ++ struct kbase_context *const kctx, u64 const heap_gpu_va) ++{ ++ struct kbase_csf_tiler_heap *heap = NULL; + -+/* -+ * NOTE: This must **only** be included through mali_linux_trace.h, -+ * otherwise it will fail to setup tracepoints correctly -+ */ ++ lockdep_assert_held(&kctx->csf.tiler_heaps.lock); + -+#if !defined(_KBASE_DEBUG_LINUX_KTRACE_CSF_H_) || defined(TRACE_HEADER_MULTI_READ) -+#define _KBASE_DEBUG_LINUX_KTRACE_CSF_H_ ++ list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) { ++ if (heap_gpu_va == heap->gpu_va) ++ return heap; ++ } + -+/* -+ * Generic CSF events - using the common DEFINE_MALI_ADD_EVENT -+ */ -+DEFINE_MALI_ADD_EVENT(SCHEDULER_EVICT_CTX_SLOTS_START); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_EVICT_CTX_SLOTS_END); -+DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_BOOT); -+DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_REBOOT); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_INVOKE); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_INVOKE); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_START); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_END); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_START); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_END); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET_START); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET_END); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_PROTM_WAIT_QUIT_START); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_PROTM_WAIT_QUIT_END); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_EVENT); -+DEFINE_MALI_ADD_EVENT(CSF_SYNC_UPDATE_NOTIFY_GPU_EVENT); -+DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT_START); -+DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT_END); -+DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT_GLB_REQ_ACK); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_ADVANCE); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_NOADVANCE); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_INSERT); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_REMOVE); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_ROTATE); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_HEAD); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_START); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_END); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_UPDATE_IDLE_SLOTS_ACK); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_HANDLING_START); -+DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_HANDLING_END); -+DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_MCU_HALTED); -+DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_MCU_SLEEP); -+DEFINE_MALI_ADD_EVENT(SCHED_BUSY); -+DEFINE_MALI_ADD_EVENT(SCHED_INACTIVE); -+DEFINE_MALI_ADD_EVENT(SCHED_SUSPENDED); -+DEFINE_MALI_ADD_EVENT(SCHED_SLEEPING); -+#define KBASEP_MCU_STATE(n) DEFINE_MALI_ADD_EVENT(PM_MCU_ ## n); -+#include "backend/gpu/mali_kbase_pm_mcu_states.h" -+#undef KBASEP_MCU_STATE ++ dev_dbg(kctx->kbdev->dev, "Tiler heap 0x%llX was not found\n", ++ heap_gpu_va); + -+DECLARE_EVENT_CLASS(mali_csf_grp_q_template, -+ TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, -+ struct kbase_queue *queue, u64 info_val), -+ TP_ARGS(kbdev, group, queue, info_val), -+ TP_STRUCT__entry( -+ __field(u64, info_val) -+ __field(pid_t, kctx_tgid) -+ __field(u32, kctx_id) -+ __field(u8, group_handle) -+ __field(s8, csg_nr) -+ __field(u8, slot_prio) -+ __field(s8, csi_index) -+ ), -+ TP_fast_assign( -+ { -+ struct kbase_context *kctx = NULL; ++ return NULL; ++} + -+ __entry->info_val = info_val; -+ /* Note: if required in future, we could record some -+ * flags in __entry about whether the group/queue parts -+ * are valid, and add that to the trace message e.g. -+ * by using __print_flags()/__print_symbolic() -+ */ -+ if (queue) { -+ /* Note: kctx overridden by group->kctx later if group is valid */ -+ kctx = queue->kctx; -+ __entry->csi_index = queue->csi_index; -+ } else { -+ __entry->csi_index = -1; -+ } ++static struct kbase_csf_tiler_heap_chunk *find_chunk(struct kbase_csf_tiler_heap *heap, ++ u64 const chunk_gpu_va) ++{ ++ struct kbase_csf_tiler_heap_chunk *chunk = NULL; + -+ if (group) { -+ kctx = group->kctx; -+ __entry->group_handle = group->handle; -+ __entry->csg_nr = group->csg_nr; -+ if (group->csg_nr >= 0) -+ __entry->slot_prio = kbdev->csf.scheduler.csg_slots[group->csg_nr].priority; -+ else -+ __entry->slot_prio = 0u; -+ } else { -+ __entry->group_handle = 0u; -+ __entry->csg_nr = -1; -+ __entry->slot_prio = 0u; -+ } -+ __entry->kctx_id = (kctx) ? kctx->id : 0u; -+ __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; -+ } ++ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); + -+ ), -+ TP_printk("kctx=%d_%u group=%u slot=%d prio=%u csi=%d info=0x%llx", -+ __entry->kctx_tgid, __entry->kctx_id, -+ __entry->group_handle, __entry->csg_nr, -+ __entry->slot_prio, __entry->csi_index, -+ __entry->info_val) -+); ++ list_for_each_entry(chunk, &heap->chunks_list, link) { ++ if (chunk->gpu_va == chunk_gpu_va) ++ return chunk; ++ } + -+/* -+ * Group events -+ */ -+#define DEFINE_MALI_CSF_GRP_EVENT(name) \ -+ DEFINE_EVENT_PRINT(mali_csf_grp_q_template, mali_##name, \ -+ TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, \ -+ struct kbase_queue *queue, u64 info_val), \ -+ TP_ARGS(kbdev, group, queue, info_val), \ -+ TP_printk("kctx=%d_%u group=%u slot=%d prio=%u info=0x%llx", \ -+ __entry->kctx_tgid, __entry->kctx_id, __entry->group_handle, \ -+ __entry->csg_nr, __entry->slot_prio, __entry->info_val)) ++ dev_dbg(heap->kctx->kbdev->dev, "Tiler heap chunk 0x%llX was not found\n", chunk_gpu_va); + -+DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_START_REQ); -+DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOP_REQ); -+DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_RUNNING); -+DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOPPED); -+DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_CLEANED); -+DEFINE_MALI_CSF_GRP_EVENT(CSG_UPDATE_IDLE_SLOT_REQ); -+DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_SET); -+DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_NO_NON_IDLE_GROUPS); -+DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_NON_IDLE_GROUPS); -+DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_CLEAR); -+DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_PRIO_UPDATE); -+DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_SYNC_UPDATE); -+DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_IDLE); -+DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_PROGRESS_TIMER_EVENT); -+DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_PROCESS_START); -+DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_PROCESS_END); -+DEFINE_MALI_CSF_GRP_EVENT(GROUP_SYNC_UPDATE_DONE); -+DEFINE_MALI_CSF_GRP_EVENT(GROUP_DESCHEDULE); -+DEFINE_MALI_CSF_GRP_EVENT(GROUP_SCHEDULE); -+DEFINE_MALI_CSF_GRP_EVENT(GROUP_EVICT); -+DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_INSERT); -+DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_REMOVE); -+DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_ROTATE); -+DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_HEAD); -+DEFINE_MALI_CSF_GRP_EVENT(GROUP_IDLE_WAIT_INSERT); -+DEFINE_MALI_CSF_GRP_EVENT(GROUP_IDLE_WAIT_REMOVE); -+DEFINE_MALI_CSF_GRP_EVENT(GROUP_IDLE_WAIT_HEAD); -+DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_PROTM_ENTER_CHECK); -+DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_PROTM_ENTER); -+DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_PROTM_EXIT); -+DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_TOP_GRP); -+DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_GRP_INC); -+DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC); -+DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_HANDLE_IDLE_SLOTS); -+DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_START); -+DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_END); -+DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_INACTIVE); -+DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_RUNNABLE); -+DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_IDLE); -+DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_SUSPENDED); -+DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_SUSPENDED_ON_IDLE); -+DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_SUSPENDED_ON_WAIT_SYNC); -+DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_FAULT_EVICTED); -+DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_TERMINATED); ++ return NULL; ++} + -+#undef DEFINE_MALI_CSF_GRP_EVENT ++int kbase_csf_tiler_heap_context_init(struct kbase_context *const kctx) ++{ ++ int err = kbase_csf_heap_context_allocator_init( ++ &kctx->csf.tiler_heaps.ctx_alloc, kctx); + -+/* -+ * Group + Queue events -+ */ -+#define DEFINE_MALI_CSF_GRP_Q_EVENT(name) \ -+ DEFINE_EVENT(mali_csf_grp_q_template, mali_##name, \ -+ TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, \ -+ struct kbase_queue *queue, u64 info_val), \ -+ TP_ARGS(kbdev, group, queue, info_val)) ++ if (unlikely(err)) ++ return err; + -+DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_START); -+DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_STOP); -+DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_STOP_REQ); -+DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED); -+DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_FAULT); -+DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_TILER_OOM); -+DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_PROTM_PEND); -+DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_ACK); -+DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_START); -+DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_STOP); -+DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_EVAL_START); -+DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_EVAL_END); -+DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_WAIT_STATUS); -+DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_CUR_VAL); -+DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_TEST_VAL); -+DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_BLOCKED_REASON); -+DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_PEND_SET); -+DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_PEND_CLEAR); ++ INIT_LIST_HEAD(&kctx->csf.tiler_heaps.list); ++ mutex_init(&kctx->csf.tiler_heaps.lock); + -+#undef DEFINE_MALI_CSF_GRP_Q_EVENT ++ dev_dbg(kctx->kbdev->dev, "Initialized a context for tiler heaps\n"); + -+/* -+ * KCPU queue events -+ */ -+DECLARE_EVENT_CLASS(mali_csf_kcpu_queue_template, -+ TP_PROTO(struct kbase_kcpu_command_queue *queue, -+ u64 info_val1, u64 info_val2), -+ TP_ARGS(queue, info_val1, info_val2), -+ TP_STRUCT__entry( -+ __field(u64, info_val1) -+ __field(u64, info_val2) -+ __field(pid_t, kctx_tgid) -+ __field(u32, kctx_id) -+ __field(u8, id) -+ ), -+ TP_fast_assign( -+ { -+ __entry->info_val1 = info_val1; -+ __entry->info_val2 = info_val2; -+ __entry->kctx_id = queue->kctx->id; -+ __entry->kctx_tgid = queue->kctx->tgid; -+ __entry->id = queue->id; -+ } ++ return 0; ++} + -+ ), -+ TP_printk("kctx=%d_%u id=%u info_val1=0x%llx info_val2=0x%llx", -+ __entry->kctx_tgid, __entry->kctx_id, __entry->id, -+ __entry->info_val1, __entry->info_val2) -+); ++void kbase_csf_tiler_heap_context_term(struct kbase_context *const kctx) ++{ ++ LIST_HEAD(local_heaps_list); ++ struct list_head *entry = NULL, *tmp = NULL; + -+#define DEFINE_MALI_CSF_KCPU_EVENT(name) \ -+ DEFINE_EVENT(mali_csf_kcpu_queue_template, mali_##name, \ -+ TP_PROTO(struct kbase_kcpu_command_queue *queue, \ -+ u64 info_val1, u64 info_val2), \ -+ TP_ARGS(queue, info_val1, info_val2)) ++ dev_dbg(kctx->kbdev->dev, "Terminating a context for tiler heaps\n"); + -+DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_CREATE); -+DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_DELETE); -+DEFINE_MALI_CSF_KCPU_EVENT(KCPU_CQS_SET); -+DEFINE_MALI_CSF_KCPU_EVENT(KCPU_CQS_WAIT_START); -+DEFINE_MALI_CSF_KCPU_EVENT(KCPU_CQS_WAIT_END); -+DEFINE_MALI_CSF_KCPU_EVENT(KCPU_FENCE_SIGNAL); -+DEFINE_MALI_CSF_KCPU_EVENT(KCPU_FENCE_WAIT_START); -+DEFINE_MALI_CSF_KCPU_EVENT(KCPU_FENCE_WAIT_END); ++ mutex_lock(&kctx->csf.tiler_heaps.lock); ++ list_splice_init(&kctx->csf.tiler_heaps.list, &local_heaps_list); ++ mutex_unlock(&kctx->csf.tiler_heaps.lock); + -+#undef DEFINE_MALI_CSF_KCPU_EVENT ++ list_for_each_safe(entry, tmp, &local_heaps_list) { ++ struct kbase_csf_tiler_heap *heap = list_entry( ++ entry, struct kbase_csf_tiler_heap, link); + -+#endif /* !defined(_KBASE_DEBUG_LINUX_KTRACE_CSF_H_) || defined(TRACE_HEADER_MULTI_READ) */ -diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_jm.h -new file mode 100644 -index 000000000..8fa4e2a7c ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_jm.h -@@ -0,0 +1,174 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2014, 2018, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ list_del_init(&heap->link); ++ delete_heap(heap); ++ } ++ ++ mutex_destroy(&kctx->csf.tiler_heaps.lock); ++ ++ kbase_csf_heap_context_allocator_term(&kctx->csf.tiler_heaps.ctx_alloc); ++} ++ ++/** ++ * kbasep_is_buffer_descriptor_region_suitable - Check if a VA region chosen to house ++ * the tiler heap buffer descriptor ++ * is suitable for the purpose. ++ * @kctx: kbase context of the tiler heap ++ * @reg: VA region being checked for suitability + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * The tiler heap buffer descriptor memory does not admit page faults according ++ * to its design, so it must have the entirety of the backing upon allocation, ++ * and it has to remain alive as long as the tiler heap is alive, meaning it ++ * cannot be allocated from JIT/Ephemeral, or user freeable memory. + * ++ * Return: true on suitability, false otherwise. + */ ++static bool kbasep_is_buffer_descriptor_region_suitable(struct kbase_context *const kctx, ++ struct kbase_va_region *const reg) ++{ ++ if (kbase_is_region_invalid_or_free(reg)) { ++ dev_err(kctx->kbdev->dev, "Region is either invalid or free!\n"); ++ return false; ++ } + -+/* -+ * NOTE: This must **only** be included through mali_linux_trace.h, -+ * otherwise it will fail to setup tracepoints correctly -+ */ ++ if (!(reg->flags & KBASE_REG_CPU_RD) || kbase_is_region_shrinkable(reg) || ++ (reg->flags & KBASE_REG_PF_GROW)) { ++ dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%lX!\n", reg->flags); ++ return false; ++ } + -+#if !defined(_KBASE_DEBUG_LINUX_KTRACE_JM_H_) || defined(TRACE_HEADER_MULTI_READ) -+#define _KBASE_DEBUG_LINUX_KTRACE_JM_H_ ++ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { ++ dev_err(kctx->kbdev->dev, "Region has invalid type!\n"); ++ return false; ++ } + -+DECLARE_EVENT_CLASS(mali_jm_slot_template, -+ TP_PROTO(struct kbase_context *kctx, int jobslot, u64 info_val), -+ TP_ARGS(kctx, jobslot, info_val), -+ TP_STRUCT__entry( -+ __field(pid_t, kctx_tgid) -+ __field(u32, kctx_id) -+ __field(unsigned int, jobslot) -+ __field(u64, info_val) -+ ), -+ TP_fast_assign( -+ __entry->kctx_id = (kctx) ? kctx->id : 0u; -+ __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; -+ __entry->jobslot = jobslot; -+ __entry->info_val = info_val; -+ ), -+ TP_printk("kctx=%d_%u jobslot=%u info=0x%llx", __entry->kctx_tgid, -+ __entry->kctx_id, __entry->jobslot, __entry->info_val) -+); ++ if ((reg->nr_pages != kbase_reg_current_backed_size(reg)) || ++ (reg->nr_pages < PFN_UP(sizeof(struct kbase_csf_gpu_buffer_heap)))) { ++ dev_err(kctx->kbdev->dev, "Region has invalid backing!\n"); ++ return false; ++ } + -+#define DEFINE_MALI_JM_SLOT_EVENT(name) \ -+DEFINE_EVENT(mali_jm_slot_template, mali_##name, \ -+ TP_PROTO(struct kbase_context *kctx, int jobslot, u64 info_val), \ -+ TP_ARGS(kctx, jobslot, info_val)) -+DEFINE_MALI_JM_SLOT_EVENT(JM_RETURN_ATOM_TO_JS); -+DEFINE_MALI_JM_SLOT_EVENT(JM_MARK_FOR_RETURN_TO_JS); -+DEFINE_MALI_JM_SLOT_EVENT(JM_SUBMIT); -+DEFINE_MALI_JM_SLOT_EVENT(JM_JOB_DONE); -+DEFINE_MALI_JM_SLOT_EVENT(JM_UPDATE_HEAD); -+DEFINE_MALI_JM_SLOT_EVENT(JM_CHECK_HEAD); -+DEFINE_MALI_JM_SLOT_EVENT(JM_SOFTSTOP); -+DEFINE_MALI_JM_SLOT_EVENT(JM_SOFTSTOP_0); -+DEFINE_MALI_JM_SLOT_EVENT(JM_SOFTSTOP_1); -+DEFINE_MALI_JM_SLOT_EVENT(JM_HARDSTOP); -+DEFINE_MALI_JM_SLOT_EVENT(JM_HARDSTOP_0); -+DEFINE_MALI_JM_SLOT_EVENT(JM_HARDSTOP_1); -+DEFINE_MALI_JM_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP); -+DEFINE_MALI_JM_SLOT_EVENT(JM_SLOT_EVICT); -+DEFINE_MALI_JM_SLOT_EVENT(JM_BEGIN_RESET_WORKER); -+DEFINE_MALI_JM_SLOT_EVENT(JM_END_RESET_WORKER); -+DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED); -+DEFINE_MALI_JM_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED); -+DEFINE_MALI_JM_SLOT_EVENT(JS_AFFINITY_CURRENT); -+DEFINE_MALI_JM_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB); -+DEFINE_MALI_JM_SLOT_EVENT(JS_PULL_JOB); -+DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED); -+DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED); -+DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED); -+DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE); -+DEFINE_MALI_JM_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB); -+DEFINE_MALI_JM_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED); -+DEFINE_MALI_JM_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB); -+DEFINE_MALI_JM_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ); -+DEFINE_MALI_JM_SLOT_EVENT(JS_SLOT_PRIO_BLOCKED); -+DEFINE_MALI_JM_SLOT_EVENT(JS_SLOT_PRIO_UNBLOCKED); -+DEFINE_MALI_JM_SLOT_EVENT(JS_SLOT_PRIO_AND_HIGHER_UNBLOCKED); -+DEFINE_MALI_JM_SLOT_EVENT(JS_SLOT_PRIO_IS_BLOCKED); -+#undef DEFINE_MALI_JM_SLOT_EVENT ++ return true; ++} + -+DECLARE_EVENT_CLASS(mali_jm_refcount_template, -+ TP_PROTO(struct kbase_context *kctx, int refcount, u64 info_val), -+ TP_ARGS(kctx, refcount, info_val), -+ TP_STRUCT__entry( -+ __field(pid_t, kctx_tgid) -+ __field(u32, kctx_id) -+ __field(unsigned int, refcount) -+ __field(u64, info_val) -+ ), -+ TP_fast_assign( -+ __entry->kctx_id = (kctx) ? kctx->id : 0u; -+ __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; -+ __entry->refcount = refcount; -+ __entry->info_val = info_val; -+ ), -+ TP_printk("kctx=%d_%u refcount=%u info=0x%llx", __entry->kctx_tgid, -+ __entry->kctx_id, __entry->refcount, __entry->info_val) -+); ++#define TILER_BUF_DESC_SIZE (sizeof(struct kbase_csf_gpu_buffer_heap)) + -+#define DEFINE_MALI_JM_REFCOUNT_EVENT(name) \ -+DEFINE_EVENT(mali_jm_refcount_template, mali_##name, \ -+ TP_PROTO(struct kbase_context *kctx, int refcount, u64 info_val), \ -+ TP_ARGS(kctx, refcount, info_val)) -+DEFINE_MALI_JM_REFCOUNT_EVENT(JS_ADD_JOB); -+DEFINE_MALI_JM_REFCOUNT_EVENT(JS_REMOVE_JOB); -+DEFINE_MALI_JM_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX); -+DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_INIT_CTX); -+DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_TERM_CTX); -+DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX); -+DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX); -+DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX); -+DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX); -+DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX); -+DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS); -+#undef DEFINE_MALI_JM_REFCOUNT_EVENT ++int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_size, ++ u32 const initial_chunks, u32 const max_chunks, ++ u16 const target_in_flight, u64 const buf_desc_va, ++ u64 *const heap_gpu_va, u64 *const first_chunk_va) ++{ ++ int err = 0; ++ struct kbase_csf_tiler_heap *heap = NULL; ++ struct kbase_csf_heap_context_allocator *const ctx_alloc = ++ &kctx->csf.tiler_heaps.ctx_alloc; ++ struct kbase_csf_tiler_heap_chunk *chunk = NULL; ++ struct kbase_va_region *gpu_va_reg = NULL; ++ void *vmap_ptr = NULL; + -+DECLARE_EVENT_CLASS(mali_jm_add_template, -+ TP_PROTO(struct kbase_context *kctx, u64 gpu_addr, u64 info_val), -+ TP_ARGS(kctx, gpu_addr, info_val), -+ TP_STRUCT__entry( -+ __field(pid_t, kctx_tgid) -+ __field(u32, kctx_id) -+ __field(u64, gpu_addr) -+ __field(u64, info_val) -+ ), -+ TP_fast_assign( -+ __entry->kctx_id = (kctx) ? kctx->id : 0u; -+ __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; -+ __entry->gpu_addr = gpu_addr; -+ __entry->info_val = info_val; -+ ), -+ TP_printk("kctx=%d_%u gpu_addr=0x%llx info=0x%llx", __entry->kctx_tgid, -+ __entry->kctx_id, __entry->gpu_addr, __entry->info_val) -+); ++ dev_dbg(kctx->kbdev->dev, ++ "Creating a tiler heap with %u chunks (limit: %u) of size %u, buf_desc_va: 0x%llx\n", ++ initial_chunks, max_chunks, chunk_size, buf_desc_va); + -+#define DEFINE_MALI_JM_ADD_EVENT(name) \ -+DEFINE_EVENT(mali_jm_add_template, mali_##name, \ -+ TP_PROTO(struct kbase_context *kctx, u64 gpu_addr, u64 info_val), \ -+ TP_ARGS(kctx, gpu_addr, info_val)) -+DEFINE_MALI_JM_ADD_EVENT(JD_DONE_WORKER); -+DEFINE_MALI_JM_ADD_EVENT(JD_DONE_WORKER_END); -+DEFINE_MALI_JM_ADD_EVENT(JD_CANCEL_WORKER); -+DEFINE_MALI_JM_ADD_EVENT(JD_DONE); -+DEFINE_MALI_JM_ADD_EVENT(JD_CANCEL); -+DEFINE_MALI_JM_ADD_EVENT(JD_ZAP_CONTEXT); -+DEFINE_MALI_JM_ADD_EVENT(JM_IRQ); -+DEFINE_MALI_JM_ADD_EVENT(JM_IRQ_END); -+DEFINE_MALI_JM_ADD_EVENT(JM_FLUSH_WORKQS); -+DEFINE_MALI_JM_ADD_EVENT(JM_FLUSH_WORKQS_DONE); -+DEFINE_MALI_JM_ADD_EVENT(JM_ZAP_NON_SCHEDULED); -+DEFINE_MALI_JM_ADD_EVENT(JM_ZAP_SCHEDULED); -+DEFINE_MALI_JM_ADD_EVENT(JM_ZAP_DONE); -+DEFINE_MALI_JM_ADD_EVENT(JM_SUBMIT_AFTER_RESET); -+DEFINE_MALI_JM_ADD_EVENT(JM_JOB_COMPLETE); -+DEFINE_MALI_JM_ADD_EVENT(JS_UNPULL_JOB); -+DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL); -+DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL); -+DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX); -+DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX); -+DEFINE_MALI_JM_ADD_EVENT(JS_RETURN_WORKER); -+DEFINE_MALI_JM_ADD_EVENT(JS_RETURN_WORKER_END); -+DEFINE_MALI_JM_ADD_EVENT(JS_POLICY_TIMER_END); -+DEFINE_MALI_JM_ADD_EVENT(JS_POLICY_TIMER_START); -+DEFINE_MALI_JM_ADD_EVENT(JS_POLICY_ENQUEUE_JOB); -+#undef DEFINE_MALI_JM_ADD_EVENT ++ if (!kbase_mem_allow_alloc(kctx)) ++ return -EINVAL; + -+#endif /* !defined(_KBASE_DEBUG_LINUX_KTRACE_JM_H_) || defined(TRACE_HEADER_MULTI_READ)*/ -diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c -new file mode 100644 -index 000000000..f521b4712 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c -@@ -0,0 +1,361 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ if (chunk_size == 0) ++ return -EINVAL; + -+#include -+#include "debug/mali_kbase_debug_ktrace_internal.h" ++ if (chunk_size & ~CHUNK_SIZE_MASK) ++ return -EINVAL; + -+int kbase_ktrace_init(struct kbase_device *kbdev) -+{ -+#if KBASE_KTRACE_TARGET_RBUF -+ struct kbase_ktrace_msg *rbuf; ++ if (initial_chunks == 0) ++ return -EINVAL; + -+ spin_lock_init(&kbdev->ktrace.lock); -+ rbuf = kmalloc_array(KBASE_KTRACE_SIZE, sizeof(*rbuf), GFP_KERNEL); ++ if (initial_chunks > max_chunks) ++ return -EINVAL; + -+ if (!rbuf) ++ if (target_in_flight == 0) + return -EINVAL; + -+ kbdev->ktrace.rbuf = rbuf; -+#endif /* KBASE_KTRACE_TARGET_RBUF */ -+ return 0; -+} ++ heap = kzalloc(sizeof(*heap), GFP_KERNEL); ++ if (unlikely(!heap)) { ++ dev_err(kctx->kbdev->dev, "No kernel memory for a new tiler heap"); ++ return -ENOMEM; ++ } + -+void kbase_ktrace_term(struct kbase_device *kbdev) -+{ -+#if KBASE_KTRACE_TARGET_RBUF -+ kfree(kbdev->ktrace.rbuf); -+ kbdev->ktrace.rbuf = NULL; -+#endif /* KBASE_KTRACE_TARGET_RBUF */ -+} ++ heap->kctx = kctx; ++ heap->chunk_size = chunk_size; ++ heap->max_chunks = max_chunks; ++ heap->target_in_flight = target_in_flight; ++ heap->buf_desc_checked = false; ++ INIT_LIST_HEAD(&heap->chunks_list); ++ INIT_LIST_HEAD(&heap->link); + -+void kbase_ktrace_hook_wrapper(void *param) -+{ -+ struct kbase_device *kbdev = (struct kbase_device *)param; ++ /* Check on the buffer descriptor virtual Address */ ++ if (buf_desc_va) { ++ struct kbase_va_region *buf_desc_reg; + -+ KBASE_KTRACE_DUMP(kbdev); -+} ++ kbase_gpu_vm_lock(kctx); ++ buf_desc_reg = ++ kbase_region_tracker_find_region_enclosing_address(kctx, buf_desc_va); + -+#if KBASE_KTRACE_TARGET_RBUF ++ if (!kbasep_is_buffer_descriptor_region_suitable(kctx, buf_desc_reg)) { ++ kbase_gpu_vm_unlock(kctx); ++ dev_err(kctx->kbdev->dev, ++ "Could not find a suitable VA region for the tiler heap buf desc!\n"); ++ err = -EINVAL; ++ goto buf_desc_not_suitable; ++ } + -+static const char * const kbasep_ktrace_code_string[] = { -+ /* -+ * IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE -+ * THIS MUST BE USED AT THE START OF THE ARRAY ++ /* If we don't prevent userspace from unmapping this, we may run into ++ * use-after-free, as we don't check for the existence of the region throughout. ++ */ ++ ++ heap->buf_desc_va = buf_desc_va; ++ heap->buf_desc_reg = buf_desc_reg; ++ kbase_va_region_no_user_free_inc(buf_desc_reg); ++ ++ vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE, ++ KBASE_REG_CPU_RD, &heap->buf_desc_map, ++ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING); ++ ++ if (kbase_page_migration_enabled) ++ kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE); ++ ++ kbase_gpu_vm_unlock(kctx); ++ ++ if (unlikely(!vmap_ptr)) { ++ dev_err(kctx->kbdev->dev, ++ "Could not vmap buffer descriptor into kernel memory (err %d)\n", ++ err); ++ err = -ENOMEM; ++ goto buf_desc_vmap_failed; ++ } ++ } ++ ++ heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc); ++ if (unlikely(!heap->gpu_va)) { ++ dev_dbg(kctx->kbdev->dev, "Failed to allocate a tiler heap context\n"); ++ err = -ENOMEM; ++ goto heap_context_alloc_failed; ++ } ++ ++ gpu_va_reg = ctx_alloc->region; ++ ++ kbase_gpu_vm_lock(kctx); ++ /* gpu_va_reg was created with BASEP_MEM_NO_USER_FREE, the code to unset this only happens ++ * on kctx termination (after all syscalls on kctx have finished), and so it is safe to ++ * assume that gpu_va_reg is still present. + */ -+#define KBASE_KTRACE_CODE_MAKE_CODE(X) # X -+#include "debug/mali_kbase_debug_ktrace_codes.h" -+#undef KBASE_KTRACE_CODE_MAKE_CODE -+}; ++ vmap_ptr = kbase_vmap_reg(kctx, gpu_va_reg, heap->gpu_va, NEXT_CHUNK_ADDR_SIZE, ++ (KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &heap->gpu_va_map, ++ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING); ++ kbase_gpu_vm_unlock(kctx); ++ if (unlikely(!vmap_ptr)) { ++ dev_dbg(kctx->kbdev->dev, "Failed to vmap the correct heap GPU VA address\n"); ++ err = -ENOMEM; ++ goto heap_context_vmap_failed; ++ } + -+static void kbasep_ktrace_format_header(char *buffer, int sz, s32 written) -+{ -+ written += MAX(snprintf(buffer + written, MAX(sz - written, 0), -+ "secs,thread_id,cpu,code,kctx,"), 0); ++ err = create_initial_chunks(heap, initial_chunks); ++ if (unlikely(err)) { ++ dev_dbg(kctx->kbdev->dev, "Failed to create the initial tiler heap chunks\n"); ++ goto create_chunks_failed; ++ } ++ chunk = list_first_entry(&heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link); + -+ kbasep_ktrace_backend_format_header(buffer, sz, &written); ++ *heap_gpu_va = heap->gpu_va; ++ *first_chunk_va = chunk->gpu_va; + -+ written += MAX(snprintf(buffer + written, MAX(sz - written, 0), -+ ",info_val,ktrace_version=%u.%u", -+ KBASE_KTRACE_VERSION_MAJOR, -+ KBASE_KTRACE_VERSION_MINOR), 0); ++ mutex_lock(&kctx->csf.tiler_heaps.lock); ++ kctx->csf.tiler_heaps.nr_of_heaps++; ++ heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps; ++ list_add(&heap->link, &kctx->csf.tiler_heaps.list); + -+ buffer[sz - 1] = 0; ++ KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id, ++ PFN_UP(heap->chunk_size * heap->max_chunks), ++ PFN_UP(heap->chunk_size * heap->chunk_count), ++ heap->max_chunks, heap->chunk_size, heap->chunk_count, ++ heap->target_in_flight, 0); ++ ++#if defined(CONFIG_MALI_VECTOR_DUMP) ++ list_for_each_entry(chunk, &heap->chunks_list, link) { ++ KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC(kctx->kbdev, kctx->id, heap->heap_id, ++ chunk->gpu_va); ++ } ++#endif ++ kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count; ++ kctx->running_total_tiler_heap_memory += (u64)heap->chunk_size * heap->chunk_count; ++ if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory) ++ kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory; ++ ++ dev_dbg(kctx->kbdev->dev, ++ "Created tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n", heap->gpu_va, ++ buf_desc_va, kctx->tgid, kctx->id); ++ mutex_unlock(&kctx->csf.tiler_heaps.lock); ++ ++ return 0; ++ ++create_chunks_failed: ++ kbase_vunmap(kctx, &heap->gpu_va_map); ++heap_context_vmap_failed: ++ kbase_csf_heap_context_allocator_free(ctx_alloc, heap->gpu_va); ++heap_context_alloc_failed: ++ if (heap->buf_desc_reg) ++ kbase_vunmap(kctx, &heap->buf_desc_map); ++buf_desc_vmap_failed: ++ if (heap->buf_desc_reg) { ++ kbase_gpu_vm_lock(kctx); ++ kbase_va_region_no_user_free_dec(heap->buf_desc_reg); ++ kbase_gpu_vm_unlock(kctx); ++ } ++buf_desc_not_suitable: ++ kfree(heap); ++ return err; +} + -+static void kbasep_ktrace_format_msg(struct kbase_ktrace_msg *trace_msg, -+ char *buffer, int sz) ++int kbase_csf_tiler_heap_term(struct kbase_context *const kctx, ++ u64 const heap_gpu_va) +{ -+ s32 written = 0; ++ int err = 0; ++ struct kbase_csf_tiler_heap *heap = NULL; ++ u32 chunk_count = 0; ++ u64 heap_size = 0; + -+ /* Initial part of message: -+ * -+ * secs,thread_id,cpu,code, -+ */ -+ written += MAX(snprintf(buffer + written, MAX(sz - written, 0), -+ "%d.%.6d,%d,%d,%s,", -+ (int)trace_msg->timestamp.tv_sec, -+ (int)(trace_msg->timestamp.tv_nsec / 1000), -+ trace_msg->thread_id, trace_msg->cpu, -+ kbasep_ktrace_code_string[trace_msg->backend.gpu.code]), -+ 0); ++ mutex_lock(&kctx->csf.tiler_heaps.lock); ++ heap = find_tiler_heap(kctx, heap_gpu_va); ++ if (likely(heap)) { ++ chunk_count = heap->chunk_count; ++ heap_size = heap->chunk_size * chunk_count; + -+ /* kctx part: */ -+ if (trace_msg->kctx_tgid) { -+ written += MAX(snprintf(buffer + written, MAX(sz - written, 0), -+ "%d_%u", -+ trace_msg->kctx_tgid, trace_msg->kctx_id), 0); ++ list_del_init(&heap->link); ++ } else { ++ err = -EINVAL; + } -+ /* Trailing comma */ -+ written += MAX(snprintf(buffer + written, MAX(sz - written, 0), -+ ","), 0); + -+ /* Backend parts */ -+ kbasep_ktrace_backend_format_msg(trace_msg, buffer, sz, -+ &written); ++ /* Update stats whilst still holding the lock so they are in sync with the tiler_heaps.list ++ * at all times ++ */ ++ if (likely(kctx->running_total_tiler_heap_memory >= heap_size)) ++ kctx->running_total_tiler_heap_memory -= heap_size; ++ else ++ dev_warn(kctx->kbdev->dev, ++ "Running total tiler heap memory lower than expected!"); ++ if (likely(kctx->running_total_tiler_heap_nr_chunks >= chunk_count)) ++ kctx->running_total_tiler_heap_nr_chunks -= chunk_count; ++ else ++ dev_warn(kctx->kbdev->dev, ++ "Running total tiler chunk count lower than expected!"); ++ if (!err) ++ dev_dbg(kctx->kbdev->dev, ++ "Terminated tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n", ++ heap->gpu_va, heap->buf_desc_va, kctx->tgid, kctx->id); ++ mutex_unlock(&kctx->csf.tiler_heaps.lock); + -+ /* Rest of message: -+ * -+ * ,info_val -+ * -+ * Note that the last column is empty, it's simply to hold the ktrace -+ * version in the header ++ /* Deletion requires the kctx->reg_lock, so must only operate on it whilst unlinked from ++ * the kctx's csf.tiler_heaps.list, and without holding the csf.tiler_heaps.lock + */ -+ written += MAX(snprintf(buffer + written, MAX(sz - written, 0), -+ ",0x%.16llx", -+ (unsigned long long)trace_msg->info_val), 0); -+ buffer[sz - 1] = 0; ++ if (likely(heap)) ++ delete_heap(heap); ++ ++ return err; +} + -+static void kbasep_ktrace_dump_msg(struct kbase_device *kbdev, -+ struct kbase_ktrace_msg *trace_msg) ++/** ++ * validate_allocation_request - Check whether the chunk allocation request ++ * received on tiler OOM should be handled at ++ * current time. ++ * ++ * @heap: The tiler heap the OOM is associated with ++ * @nr_in_flight: Number of fragment jobs in flight ++ * @pending_frag_count: Number of pending fragment jobs ++ * ++ * Context: must hold the tiler heap lock to guarantee its lifetime ++ * ++ * Return: ++ * * 0 - allowed to allocate an additional chunk ++ * * -EINVAL - invalid ++ * * -EBUSY - there are fragment jobs still in flight, which may free chunks ++ * after completing ++ * * -ENOMEM - the targeted number of in-flight chunks has been reached and ++ * no new ones will be allocated ++ */ ++static int validate_allocation_request(struct kbase_csf_tiler_heap *heap, u32 nr_in_flight, ++ u32 pending_frag_count) +{ -+ char buffer[KTRACE_DUMP_MESSAGE_SIZE]; ++ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); + -+ lockdep_assert_held(&kbdev->ktrace.lock); ++ if (WARN_ON(!nr_in_flight) || WARN_ON(pending_frag_count > nr_in_flight)) ++ return -EINVAL; + -+ kbasep_ktrace_format_msg(trace_msg, buffer, sizeof(buffer)); -+ dev_dbg(kbdev->dev, "%s", buffer); ++ if (nr_in_flight <= heap->target_in_flight) { ++ if (heap->chunk_count < heap->max_chunks) { ++ /* Not exceeded the target number of render passes yet so be ++ * generous with memory. ++ */ ++ return 0; ++ } else if (pending_frag_count > 0) { ++ return -EBUSY; ++ } else { ++ return -ENOMEM; ++ } ++ } else { ++ /* Reached target number of render passes in flight. ++ * Wait for some of them to finish ++ */ ++ return -EBUSY; ++ } ++ return -ENOMEM; +} + -+struct kbase_ktrace_msg *kbasep_ktrace_reserve(struct kbase_ktrace *ktrace) ++int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, ++ u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr) +{ -+ struct kbase_ktrace_msg *trace_msg; ++ struct kbase_csf_tiler_heap *heap; ++ struct kbase_csf_tiler_heap_chunk *chunk; ++ int err = -EINVAL; ++ u64 chunk_size = 0; ++ u64 heap_id = 0; + -+ lockdep_assert_held(&ktrace->lock); ++ /* To avoid potential locking issues during allocation, this is handled ++ * in three phases: ++ * 1. Take the lock, find the corresponding heap, and find its chunk size ++ * (this is always 2 MB, but may change down the line). ++ * 2. Allocate memory for the chunk and its region. ++ * 3. If the heap still exists, link it to the end of the list. If it ++ * doesn't, roll back the allocation. ++ */ + -+ trace_msg = &ktrace->rbuf[ktrace->next_in]; ++ mutex_lock(&kctx->csf.tiler_heaps.lock); ++ heap = find_tiler_heap(kctx, gpu_heap_va); ++ if (likely(heap)) { ++ chunk_size = heap->chunk_size; ++ heap_id = heap->heap_id; ++ } else { ++ dev_err(kctx->kbdev->dev, "Heap 0x%llX does not exist", gpu_heap_va); ++ mutex_unlock(&kctx->csf.tiler_heaps.lock); ++ goto prelink_failure; ++ } + -+ /* Update the ringbuffer indices */ -+ ktrace->next_in = (ktrace->next_in + 1) & KBASE_KTRACE_MASK; -+ if (ktrace->next_in == ktrace->first_out) -+ ktrace->first_out = (ktrace->first_out + 1) & KBASE_KTRACE_MASK; ++ err = validate_allocation_request(heap, nr_in_flight, pending_frag_count); ++ if (unlikely(err)) { ++ /* The allocation request can be legitimate, but be invoked on a heap ++ * that has already reached the maximum pre-configured capacity. This ++ * is useful debug information, but should not be treated as an error, ++ * since the request will be re-sent at a later point. ++ */ ++ dev_dbg(kctx->kbdev->dev, ++ "Not allocating new chunk for heap 0x%llX due to current heap state (err %d)", ++ gpu_heap_va, err); ++ mutex_unlock(&kctx->csf.tiler_heaps.lock); ++ goto prelink_failure; ++ } ++ mutex_unlock(&kctx->csf.tiler_heaps.lock); ++ /* this heap must not be used whilst we have dropped the lock */ ++ heap = NULL; + -+ return trace_msg; -+} -+void kbasep_ktrace_msg_init(struct kbase_ktrace *ktrace, -+ struct kbase_ktrace_msg *trace_msg, enum kbase_ktrace_code code, -+ struct kbase_context *kctx, kbase_ktrace_flag_t flags, -+ u64 info_val) -+{ -+ lockdep_assert_held(&ktrace->lock); ++ chunk = alloc_new_chunk(kctx, chunk_size); ++ if (unlikely(!chunk)) { ++ dev_err(kctx->kbdev->dev, "Could not allocate chunk of size %lld for ctx %d_%d", ++ chunk_size, kctx->tgid, kctx->id); ++ goto prelink_failure; ++ } + -+ trace_msg->thread_id = task_pid_nr(current); -+ trace_msg->cpu = task_cpu(current); ++ /* After this point, the heap that we were targeting could already have had the needed ++ * chunks allocated, if we were handling multiple OoM events on multiple threads, so ++ * we need to revalidate the need for the allocation. ++ */ ++ mutex_lock(&kctx->csf.tiler_heaps.lock); ++ heap = find_tiler_heap(kctx, gpu_heap_va); + -+ ktime_get_real_ts64(&trace_msg->timestamp); ++ if (unlikely(!heap)) { ++ dev_err(kctx->kbdev->dev, "Tiler heap 0x%llX no longer exists!\n", gpu_heap_va); ++ mutex_unlock(&kctx->csf.tiler_heaps.lock); ++ goto unroll_chunk; ++ } + -+ /* No need to store a flag about whether there was a kctx, tgid==0 is -+ * sufficient ++ if (heap_id != heap->heap_id) { ++ dev_err(kctx->kbdev->dev, ++ "Tiler heap 0x%llX was removed from ctx %d_%d while allocating chunk of size %lld!", ++ gpu_heap_va, kctx->tgid, kctx->id, chunk_size); ++ mutex_unlock(&kctx->csf.tiler_heaps.lock); ++ goto unroll_chunk; ++ } ++ ++ if (WARN_ON(chunk_size != heap->chunk_size)) { ++ mutex_unlock(&kctx->csf.tiler_heaps.lock); ++ goto unroll_chunk; ++ } ++ ++ err = validate_allocation_request(heap, nr_in_flight, pending_frag_count); ++ if (unlikely(err)) { ++ dev_warn( ++ kctx->kbdev->dev, ++ "Aborting linking chunk to heap 0x%llX: heap state changed during allocation (err %d)", ++ gpu_heap_va, err); ++ mutex_unlock(&kctx->csf.tiler_heaps.lock); ++ goto unroll_chunk; ++ } ++ ++ err = init_chunk(heap, chunk, false); ++ ++ /* On error, the chunk would not be linked, so we can still treat it as an unlinked ++ * chunk for error handling. + */ -+ if (kctx) { -+ trace_msg->kctx_tgid = kctx->tgid; -+ trace_msg->kctx_id = kctx->id; -+ } else { -+ trace_msg->kctx_tgid = 0; -+ trace_msg->kctx_id = 0; ++ if (unlikely(err)) { ++ dev_err(kctx->kbdev->dev, ++ "Could not link chunk(0x%llX) with tiler heap 0%llX in ctx %d_%d due to error %d", ++ chunk->gpu_va, gpu_heap_va, kctx->tgid, kctx->id, err); ++ mutex_unlock(&kctx->csf.tiler_heaps.lock); ++ goto unroll_chunk; + } -+ trace_msg->info_val = info_val; -+ trace_msg->backend.gpu.code = code; -+ trace_msg->backend.gpu.flags = flags; ++ ++ *new_chunk_ptr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va); ++ ++ /* update total and peak tiler heap memory record */ ++ kctx->running_total_tiler_heap_nr_chunks++; ++ kctx->running_total_tiler_heap_memory += heap->chunk_size; ++ ++ if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory) ++ kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory; ++ ++ KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id, ++ PFN_UP(heap->chunk_size * heap->max_chunks), ++ PFN_UP(heap->chunk_size * heap->chunk_count), ++ heap->max_chunks, heap->chunk_size, heap->chunk_count, ++ heap->target_in_flight, nr_in_flight); ++ ++ mutex_unlock(&kctx->csf.tiler_heaps.lock); ++ ++ return err; ++unroll_chunk: ++ remove_unlinked_chunk(kctx, chunk); ++prelink_failure: ++ return err; +} + -+void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code, -+ struct kbase_context *kctx, kbase_ktrace_flag_t flags, -+ u64 info_val) ++static bool delete_chunk_physical_pages(struct kbase_csf_tiler_heap *heap, u64 chunk_gpu_va, ++ u64 *hdr_val) +{ -+ unsigned long irqflags; -+ struct kbase_ktrace_msg *trace_msg; ++ int err; ++ u64 *chunk_hdr; ++ struct kbase_context *kctx = heap->kctx; ++ struct kbase_csf_tiler_heap_chunk *chunk = NULL; + -+ if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace))) -+ return; ++ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); + -+ WARN_ON((flags & ~KBASE_KTRACE_FLAG_COMMON_ALL)); ++ chunk = find_chunk(heap, chunk_gpu_va); ++ if (unlikely(!chunk)) { ++ dev_warn(kctx->kbdev->dev, ++ "Failed to find tiler heap(0x%llX) chunk(0x%llX) for reclaim-delete\n", ++ heap->gpu_va, chunk_gpu_va); ++ return false; ++ } + -+ spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); ++ WARN((chunk->region->flags & KBASE_REG_CPU_CACHED), ++ "Cannot support CPU cached chunks without sync operations"); ++ chunk_hdr = chunk->map.addr; ++ *hdr_val = *chunk_hdr; + -+ /* Reserve and update indices */ -+ trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); ++ dev_dbg(kctx->kbdev->dev, ++ "Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n", ++ chunk_gpu_va, heap->gpu_va, *hdr_val); + -+ /* Fill the common part of the message (including backend.gpu.flags) */ -+ kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, -+ info_val); ++ err = kbase_mem_shrink_gpu_mapping(kctx, chunk->region, 0, chunk->region->gpu_alloc->nents); ++ if (unlikely(err)) { ++ dev_warn( ++ kctx->kbdev->dev, ++ "Reclaim: shrinking GPU mapping failed on chunk(0x%llx) in heap(0x%llx) (err %d)\n", ++ chunk_gpu_va, heap->gpu_va, err); + -+ /* Done */ -+ spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); ++ /* Cannot free the pages whilst references on the GPU remain, so keep the chunk on ++ * the heap's chunk list and try a different heap. ++ */ ++ ++ return false; ++ } ++ /* Destroy the mapping before the physical pages which are mapped are destroyed. */ ++ kbase_vunmap(kctx, &chunk->map); ++ ++ err = kbase_free_phy_pages_helper(chunk->region->gpu_alloc, ++ chunk->region->gpu_alloc->nents); ++ if (unlikely(err)) { ++ dev_warn( ++ kctx->kbdev->dev, ++ "Reclaim: remove physical backing failed on chunk(0x%llx) in heap(0x%llx) (err %d), continuing with deferred removal\n", ++ chunk_gpu_va, heap->gpu_va, err); ++ ++ /* kbase_free_phy_pages_helper() should only fail on invalid input, and WARNs ++ * anyway, so continue instead of returning early. ++ * ++ * Indeed, we don't want to leave the chunk on the heap's chunk list whilst it has ++ * its mapping removed, as that could lead to problems. It's safest to instead ++ * continue with deferred destruction of the chunk. ++ */ ++ } ++ ++ dev_dbg(kctx->kbdev->dev, ++ "Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n", ++ chunk_gpu_va, heap->gpu_va, *hdr_val); ++ ++ mutex_lock(&heap->kctx->jit_evict_lock); ++ list_move(&chunk->region->jit_node, &kctx->jit_destroy_head); ++ mutex_unlock(&heap->kctx->jit_evict_lock); ++ ++ list_del(&chunk->link); ++ heap->chunk_count--; ++ kfree(chunk); ++ ++ return true; +} + -+static void kbasep_ktrace_clear_locked(struct kbase_device *kbdev) ++static void sanity_check_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap, ++ struct kbase_csf_gpu_buffer_heap *desc) +{ -+ lockdep_assert_held(&kbdev->ktrace.lock); -+ kbdev->ktrace.first_out = kbdev->ktrace.next_in; ++ u64 first_hoarded_chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK; ++ ++ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); ++ ++ if (first_hoarded_chunk_gpu_va) { ++ struct kbase_csf_tiler_heap_chunk *chunk = ++ find_chunk(heap, first_hoarded_chunk_gpu_va); ++ ++ if (likely(chunk)) { ++ dev_dbg(heap->kctx->kbdev->dev, ++ "Buffer descriptor 0x%llX sanity check ok, HW reclaim allowed\n", ++ heap->buf_desc_va); ++ ++ heap->buf_desc_checked = true; ++ return; ++ } ++ } ++ /* If there is no match, defer the check to next time */ ++ dev_dbg(heap->kctx->kbdev->dev, "Buffer descriptor 0x%llX runtime sanity check deferred\n", ++ heap->buf_desc_va); +} -+void kbasep_ktrace_clear(struct kbase_device *kbdev) ++ ++static bool can_read_hw_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap, u64 *chunk_gpu_va_ptr) +{ -+ unsigned long flags; ++ struct kbase_context *kctx = heap->kctx; + -+ spin_lock_irqsave(&kbdev->ktrace.lock, flags); -+ kbasep_ktrace_clear_locked(kbdev); -+ spin_unlock_irqrestore(&kbdev->ktrace.lock, flags); ++ lockdep_assert_held(&kctx->csf.tiler_heaps.lock); ++ ++ /* Initialize the descriptor pointer value to 0 */ ++ *chunk_gpu_va_ptr = 0; ++ ++ /* The BufferDescriptor on heap is a hint on creation, do a sanity check at runtime */ ++ if (heap->buf_desc_reg && !heap->buf_desc_checked) { ++ struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr; ++ ++ /* BufferDescriptor is supplied by userspace, so could be CPU-cached */ ++ if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED) ++ kbase_sync_mem_regions(kctx, &heap->buf_desc_map, KBASE_SYNC_TO_CPU); ++ ++ sanity_check_gpu_buffer_heap(heap, desc); ++ if (heap->buf_desc_checked) ++ *chunk_gpu_va_ptr = desc->pointer & CHUNK_ADDR_MASK; ++ } ++ ++ return heap->buf_desc_checked; +} + -+void kbasep_ktrace_dump(struct kbase_device *kbdev) ++static u32 delete_hoarded_chunks(struct kbase_csf_tiler_heap *heap) +{ -+ unsigned long flags; -+ u32 start; -+ u32 end; -+ char buffer[KTRACE_DUMP_MESSAGE_SIZE] = "Dumping trace:\n"; ++ u32 freed = 0; ++ u64 chunk_gpu_va = 0; ++ struct kbase_context *kctx = heap->kctx; ++ struct kbase_csf_tiler_heap_chunk *chunk = NULL; + -+ kbasep_ktrace_format_header(buffer, sizeof(buffer), strlen(buffer)); -+ dev_dbg(kbdev->dev, "%s", buffer); ++ lockdep_assert_held(&kctx->csf.tiler_heaps.lock); + -+ spin_lock_irqsave(&kbdev->ktrace.lock, flags); -+ start = kbdev->ktrace.first_out; -+ end = kbdev->ktrace.next_in; ++ if (can_read_hw_gpu_buffer_heap(heap, &chunk_gpu_va)) { ++ u64 chunk_hdr_val; ++ u64 *hw_hdr; + -+ while (start != end) { -+ struct kbase_ktrace_msg *trace_msg = &kbdev->ktrace.rbuf[start]; ++ if (!chunk_gpu_va) { ++ struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr; + -+ kbasep_ktrace_dump_msg(kbdev, trace_msg); ++ /* BufferDescriptor is supplied by userspace, so could be CPU-cached */ ++ if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED) ++ kbase_sync_mem_regions(kctx, &heap->buf_desc_map, ++ KBASE_SYNC_TO_CPU); ++ chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK; + -+ start = (start + 1) & KBASE_KTRACE_MASK; -+ } -+ dev_dbg(kbdev->dev, "TRACE_END"); ++ if (!chunk_gpu_va) { ++ dev_dbg(kctx->kbdev->dev, ++ "Buffer descriptor 0x%llX has no chunks (NULL) for reclaim scan\n", ++ heap->buf_desc_va); ++ goto out; ++ } ++ } + -+ kbasep_ktrace_clear_locked(kbdev); ++ chunk = find_chunk(heap, chunk_gpu_va); ++ if (unlikely(!chunk)) ++ goto out; + -+ spin_unlock_irqrestore(&kbdev->ktrace.lock, flags); -+} ++ WARN((chunk->region->flags & KBASE_REG_CPU_CACHED), ++ "Cannot support CPU cached chunks without sync operations"); ++ hw_hdr = chunk->map.addr; + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+struct trace_seq_state { -+ struct kbase_ktrace_msg trace_buf[KBASE_KTRACE_SIZE]; -+ u32 start; -+ u32 end; -+}; ++ /* Move onto the next chunk relevant information */ ++ chunk_hdr_val = *hw_hdr; ++ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; + -+static void *kbasep_ktrace_seq_start(struct seq_file *s, loff_t *pos) -+{ -+ struct trace_seq_state *state = s->private; -+ int i; ++ while (chunk_gpu_va && heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) { ++ bool success = ++ delete_chunk_physical_pages(heap, chunk_gpu_va, &chunk_hdr_val); + -+ if (*pos == 0) -+ /* See Documentation/filesystems/seq_file.txt */ -+ return SEQ_START_TOKEN; ++ if (!success) ++ break; + -+ if (*pos > KBASE_KTRACE_SIZE) -+ return NULL; -+ i = state->start + *pos; -+ if ((state->end >= state->start && i >= state->end) || -+ i >= state->end + KBASE_KTRACE_SIZE) -+ return NULL; ++ freed++; ++ /* On success, chunk_hdr_val is updated, extract the next chunk address */ ++ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; ++ } + -+ i &= KBASE_KTRACE_MASK; ++ /* Update the existing hardware chunk header, after reclaim deletion of chunks */ ++ *hw_hdr = chunk_hdr_val; + -+ return &state->trace_buf[i]; ++ dev_dbg(heap->kctx->kbdev->dev, ++ "HW reclaim scan freed chunks: %u, set hw_hdr[0]: 0x%llX\n", freed, ++ chunk_hdr_val); ++ } else { ++ dev_dbg(kctx->kbdev->dev, ++ "Skip HW reclaim scan, (disabled: buffer descriptor 0x%llX)\n", ++ heap->buf_desc_va); ++ } ++out: ++ return freed; +} + -+static void kbasep_ktrace_seq_stop(struct seq_file *s, void *data) ++static u64 delete_unused_chunk_pages(struct kbase_csf_tiler_heap *heap) +{ -+} ++ u32 freed_chunks = 0; ++ u64 freed_pages = 0; ++ u64 chunk_gpu_va; ++ u64 chunk_hdr_val; ++ struct kbase_context *kctx = heap->kctx; ++ u64 *ctx_ptr; + -+static void *kbasep_ktrace_seq_next(struct seq_file *s, void *data, loff_t *pos) -+{ -+ struct trace_seq_state *state = s->private; -+ int i; ++ lockdep_assert_held(&kctx->csf.tiler_heaps.lock); + -+ if (data != SEQ_START_TOKEN) -+ (*pos)++; ++ WARN(heap->gpu_va_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED, ++ "Cannot support CPU cached heap context without sync operations"); + -+ i = (state->start + *pos) & KBASE_KTRACE_MASK; -+ if (i == state->end) -+ return NULL; ++ ctx_ptr = heap->gpu_va_map.addr; + -+ return &state->trace_buf[i]; ++ /* Extract the first chunk address from the context's free_list_head */ ++ chunk_hdr_val = *ctx_ptr; ++ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; ++ ++ while (chunk_gpu_va) { ++ u64 hdr_val; ++ bool success = delete_chunk_physical_pages(heap, chunk_gpu_va, &hdr_val); ++ ++ if (!success) ++ break; ++ ++ freed_chunks++; ++ chunk_hdr_val = hdr_val; ++ /* extract the next chunk address */ ++ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; ++ } ++ ++ /* Update the post-scan deletion to context header */ ++ *ctx_ptr = chunk_hdr_val; ++ ++ /* Try to scan the HW hoarded list of unused chunks */ ++ freed_chunks += delete_hoarded_chunks(heap); ++ freed_pages = freed_chunks * PFN_UP(heap->chunk_size); ++ dev_dbg(heap->kctx->kbdev->dev, ++ "Scan reclaim freed chunks/pages %u/%llu, set heap-ctx_u64[0]: 0x%llX\n", ++ freed_chunks, freed_pages, chunk_hdr_val); ++ ++ /* Update context tiler heaps memory usage */ ++ kctx->running_total_tiler_heap_memory -= freed_pages << PAGE_SHIFT; ++ kctx->running_total_tiler_heap_nr_chunks -= freed_chunks; ++ return freed_pages; +} + -+static int kbasep_ktrace_seq_show(struct seq_file *s, void *data) ++u32 kbase_csf_tiler_heap_scan_kctx_unused_pages(struct kbase_context *kctx, u32 to_free) +{ -+ struct kbase_ktrace_msg *trace_msg = data; -+ char buffer[KTRACE_DUMP_MESSAGE_SIZE]; ++ u64 freed = 0; ++ struct kbase_csf_tiler_heap *heap; + -+ /* If this is the start, print a header */ -+ if (data == SEQ_START_TOKEN) -+ kbasep_ktrace_format_header(buffer, sizeof(buffer), 0); -+ else -+ kbasep_ktrace_format_msg(trace_msg, buffer, sizeof(buffer)); ++ mutex_lock(&kctx->csf.tiler_heaps.lock); + -+ seq_printf(s, "%s\n", buffer); -+ return 0; -+} ++ list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) { ++ freed += delete_unused_chunk_pages(heap); + -+static const struct seq_operations kbasep_ktrace_seq_ops = { -+ .start = kbasep_ktrace_seq_start, -+ .next = kbasep_ktrace_seq_next, -+ .stop = kbasep_ktrace_seq_stop, -+ .show = kbasep_ktrace_seq_show, -+}; ++ /* If freed enough, then stop here */ ++ if (freed >= to_free) ++ break; ++ } + -+static int kbasep_ktrace_debugfs_open(struct inode *inode, struct file *file) ++ mutex_unlock(&kctx->csf.tiler_heaps.lock); ++ /* The scan is surely not more than 4-G pages, but for logic flow limit it */ ++ if (WARN_ON(unlikely(freed > U32_MAX))) ++ return U32_MAX; ++ else ++ return (u32)freed; ++} ++ ++static u64 count_unused_heap_pages(struct kbase_csf_tiler_heap *heap) +{ -+ struct kbase_device *kbdev = inode->i_private; -+ unsigned long flags; ++ u32 chunk_cnt = 0; ++ u64 page_cnt = 0; + -+ struct trace_seq_state *state; ++ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); + -+ state = __seq_open_private(file, &kbasep_ktrace_seq_ops, -+ sizeof(*state)); -+ if (!state) -+ return -ENOMEM; ++ /* Here the count is basically an informed estimate, avoiding the costly mapping/unmaping ++ * in the chunk list walk. The downside is that the number is a less reliable guide for ++ * later on scan (free) calls on this heap for what actually is freeable. ++ */ ++ if (heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) { ++ chunk_cnt = heap->chunk_count - HEAP_SHRINK_STOP_LIMIT; ++ page_cnt = chunk_cnt * PFN_UP(heap->chunk_size); ++ } + -+ spin_lock_irqsave(&kbdev->ktrace.lock, flags); -+ state->start = kbdev->ktrace.first_out; -+ state->end = kbdev->ktrace.next_in; -+ memcpy(state->trace_buf, kbdev->ktrace.rbuf, sizeof(state->trace_buf)); -+ spin_unlock_irqrestore(&kbdev->ktrace.lock, flags); ++ dev_dbg(heap->kctx->kbdev->dev, ++ "Reclaim count chunks/pages %u/%llu (estimated), heap_va: 0x%llX\n", chunk_cnt, ++ page_cnt, heap->gpu_va); + -+ return 0; ++ return page_cnt; +} + -+static const struct file_operations kbasep_ktrace_debugfs_fops = { -+ .owner = THIS_MODULE, -+ .open = kbasep_ktrace_debugfs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = seq_release_private, -+}; -+ -+void kbase_ktrace_debugfs_init(struct kbase_device *kbdev) ++u32 kbase_csf_tiler_heap_count_kctx_unused_pages(struct kbase_context *kctx) +{ -+ debugfs_create_file("mali_trace", 0444, -+ kbdev->mali_debugfs_directory, kbdev, -+ &kbasep_ktrace_debugfs_fops); -+} -+#endif /* CONFIG_DEBUG_FS */ ++ u64 page_cnt = 0; ++ struct kbase_csf_tiler_heap *heap; + -+#else /* KBASE_KTRACE_TARGET_RBUF */ ++ mutex_lock(&kctx->csf.tiler_heaps.lock); + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+void kbase_ktrace_debugfs_init(struct kbase_device *kbdev) -+{ -+ CSTD_UNUSED(kbdev); ++ list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) ++ page_cnt += count_unused_heap_pages(heap); ++ ++ mutex_unlock(&kctx->csf.tiler_heaps.lock); ++ ++ /* The count is surely not more than 4-G pages, but for logic flow limit it */ ++ if (WARN_ON(unlikely(page_cnt > U32_MAX))) ++ return U32_MAX; ++ else ++ return (u32)page_cnt; +} -+#endif /* CONFIG_DEBUG_FS */ -+#endif /* KBASE_KTRACE_TARGET_RBUF */ -diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.h +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h new file mode 100644 -index 000000000..11f0b5c42 +index 000000000..1b5cb5608 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.h -@@ -0,0 +1,238 @@ ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h +@@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -177222,233 +179837,137 @@ index 000000000..11f0b5c42 + * + */ + -+/* -+ * DOC: Kbase's own trace, 'KTrace' -+ * -+ * Low overhead trace specific to kbase, aimed at: -+ * - common use-cases for tracing kbase specific functionality to do with -+ * running work on the GPU -+ * - easy 1-line addition of new types of trace -+ * -+ * KTrace can be recorded in one or more of the following targets: -+ * - KBASE_KTRACE_TARGET_RBUF: low overhead ringbuffer protected by an -+ * irq-spinlock, output available via dev_dbg() and debugfs file -+ * - KBASE_KTRACE_TARGET_FTRACE: ftrace based tracepoints under 'mali' events -+ */ -+ -+#ifndef _KBASE_DEBUG_KTRACE_H_ -+#define _KBASE_DEBUG_KTRACE_H_ -+ -+#if KBASE_KTRACE_TARGET_FTRACE -+#include "mali_linux_trace.h" -+#endif -+ -+#if MALI_USE_CSF -+#include "debug/backend/mali_kbase_debug_ktrace_csf.h" -+#else -+#include "debug/backend/mali_kbase_debug_ktrace_jm.h" -+#endif ++#ifndef _KBASE_CSF_TILER_HEAP_H_ ++#define _KBASE_CSF_TILER_HEAP_H_ + ++#include +/** -+ * kbase_ktrace_init - initialize kbase ktrace. -+ * @kbdev: kbase device ++ * kbase_csf_tiler_heap_context_init - Initialize the tiler heaps context for a ++ * GPU address space ++ * ++ * @kctx: Pointer to the kbase context being initialized. ++ * + * Return: 0 if successful or a negative error code on failure. + */ -+int kbase_ktrace_init(struct kbase_device *kbdev); -+ -+/** -+ * kbase_ktrace_term - terminate kbase ktrace. -+ * @kbdev: kbase device -+ */ -+void kbase_ktrace_term(struct kbase_device *kbdev); ++int kbase_csf_tiler_heap_context_init(struct kbase_context *kctx); + +/** -+ * kbase_ktrace_hook_wrapper - wrapper so that dumping ktrace can be done via a -+ * callback. -+ * @param: kbase device, cast to void pointer ++ * kbase_csf_tiler_heap_context_term - Terminate the tiler heaps context for a ++ * GPU address space ++ * ++ * @kctx: Pointer to the kbase context being terminated. ++ * ++ * This function deletes any chunked tiler heaps that weren't deleted before ++ * context termination. + */ -+void kbase_ktrace_hook_wrapper(void *param); ++void kbase_csf_tiler_heap_context_term(struct kbase_context *kctx); + -+#if IS_ENABLED(CONFIG_DEBUG_FS) +/** -+ * kbase_ktrace_debugfs_init - initialize kbase ktrace for debugfs usage, if -+ * the selected targets support it. -+ * @kbdev: kbase device ++ * kbase_csf_tiler_heap_init - Initialize a chunked tiler memory heap. + * -+ * There is no matching 'term' call, debugfs_remove_recursive() is sufficient. ++ * @kctx: Pointer to the kbase context in which to allocate resources for the ++ * tiler heap. ++ * @chunk_size: Size of each chunk, in bytes. Must be page-aligned. ++ * @initial_chunks: The initial number of chunks to allocate. Must not be ++ * zero or greater than @max_chunks. ++ * @max_chunks: The maximum number of chunks that the heap should be allowed ++ * to use. Must not be less than @initial_chunks. ++ * @target_in_flight: Number of render-passes that the driver should attempt to ++ * keep in flight for which allocation of new chunks is ++ * allowed. Must not be zero. ++ * @buf_desc_va: Buffer descriptor GPU virtual address. This is a hint for ++ * indicating that the caller is intending to perform tiler heap ++ * chunks reclaim for those that are hoarded with hardware while ++ * the associated shader activites are suspended and the CSGs are ++ * off slots. If the referred reclaiming is not desired, can ++ * set it to 0. ++ * @gpu_heap_va: Where to store the GPU virtual address of the context that was ++ * set up for the tiler heap. ++ * @first_chunk_va: Where to store the GPU virtual address of the first chunk ++ * allocated for the heap. This points to the header of the ++ * heap chunk and not to the low address of free memory in it. ++ * ++ * Return: 0 if successful or a negative error code on failure. + */ -+void kbase_ktrace_debugfs_init(struct kbase_device *kbdev); -+#endif /* CONFIG_DEBUG_FS */ ++int kbase_csf_tiler_heap_init(struct kbase_context *kctx, u32 chunk_size, u32 initial_chunks, ++ u32 max_chunks, u16 target_in_flight, u64 const buf_desc_va, ++ u64 *gpu_heap_va, u64 *first_chunk_va); + -+/* -+ * KTrace target for internal ringbuffer -+ */ -+#if KBASE_KTRACE_TARGET_RBUF +/** -+ * kbasep_ktrace_initialized - Check whether kbase ktrace is initialized ++ * kbase_csf_tiler_heap_term - Terminate a chunked tiler memory heap. + * -+ * @ktrace: ktrace of kbase device. ++ * @kctx: Pointer to the kbase context in which the tiler heap was initialized. ++ * @gpu_heap_va: The GPU virtual address of the context that was set up for the ++ * tiler heap. + * -+ * Return: true if ktrace has been initialized. ++ * This function will terminate a chunked tiler heap and cause all the chunks ++ * (initial and those added during out-of-memory processing) to be freed. ++ * It is the caller's responsibility to ensure no further operations on this ++ * heap will happen before calling this function. ++ * ++ * Return: 0 if successful or a negative error code on failure. + */ -+static inline bool kbasep_ktrace_initialized(struct kbase_ktrace *ktrace) -+{ -+ return ktrace->rbuf != NULL; -+} ++int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va); + +/** -+ * kbasep_ktrace_add - internal function to add trace to the ringbuffer. -+ * @kbdev: kbase device -+ * @code: ktrace code -+ * @kctx: kbase context, or NULL if no context -+ * @flags: flags about the message -+ * @info_val: generic information about @code to add to the trace ++ * kbase_csf_tiler_heap_alloc_new_chunk - Allocate a new chunk for tiler heap. + * -+ * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD() instead. ++ * @kctx: Pointer to the kbase context in which the tiler heap was initialized. ++ * @gpu_heap_va: GPU virtual address of the heap context. ++ * @nr_in_flight: Number of render passes that are in-flight, must not be zero. ++ * @pending_frag_count: Number of render passes in-flight with completed vertex/tiler stage. ++ * The minimum value is zero but it must be less or equal to ++ * the total number of render passes in flight ++ * @new_chunk_ptr: Where to store the GPU virtual address & size of the new ++ * chunk allocated for the heap. ++ * ++ * This function will allocate a new chunk for the chunked tiler heap depending ++ * on the settings provided by userspace when the heap was created and the ++ * heap's statistics (like number of render passes in-flight). ++ * It would return an appropriate error code if a new chunk couldn't be ++ * allocated. ++ * ++ * Return: 0 if a new chunk was allocated otherwise an appropriate negative ++ * error code (like -EBUSY when a free chunk is expected to be ++ * available upon completion of a render pass and -EINVAL when ++ * invalid value was passed for one of the argument). + */ -+void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code, -+ struct kbase_context *kctx, kbase_ktrace_flag_t flags, -+ u64 info_val); ++int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, ++ u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr); + +/** -+ * kbasep_ktrace_clear - clear the trace ringbuffer -+ * @kbdev: kbase device ++ * kbase_csf_tiler_heap_scan_kctx_unused_pages - Performs the tiler heap shrinker calim's scan ++ * functionality. + * -+ * PRIVATE: do not use directly. Use KBASE_KTRACE_CLEAR() instead. ++ * @kctx: Pointer to the kbase context for which the tiler heap recalim is to be ++ * operated with. ++ * @to_free: Number of pages suggested for the reclaim scan (free) method to reach. ++ * ++ * Return: the actual number of pages the scan method has freed from the call. + */ -+void kbasep_ktrace_clear(struct kbase_device *kbdev); ++u32 kbase_csf_tiler_heap_scan_kctx_unused_pages(struct kbase_context *kctx, u32 to_free); + +/** -+ * kbasep_ktrace_dump - dump ktrace ringbuffer to dev_dbg(), then clear it -+ * @kbdev: kbase device ++ * kbase_csf_tiler_heap_count_kctx_unused_pages - Performs the tiler heap shrinker calim's count ++ * functionality. + * -+ * PRIVATE: do not use directly. Use KBASE_KTRACE_DUMP() instead. ++ * @kctx: Pointer to the kbase context for which the tiler heap recalim is to be ++ * operated with. ++ * ++ * Return: a number of pages that could likely be freed on the subsequent scan method call. + */ -+void kbasep_ktrace_dump(struct kbase_device *kbdev); -+ -+#define KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, info_val) \ -+ kbasep_ktrace_add(kbdev, KBASE_KTRACE_CODE(code), kctx, 0, \ -+ info_val) \ -+ -+#define KBASE_KTRACE_RBUF_CLEAR(kbdev) \ -+ kbasep_ktrace_clear(kbdev) -+ -+#define KBASE_KTRACE_RBUF_DUMP(kbdev) \ -+ kbasep_ktrace_dump(kbdev) -+ -+#else /* KBASE_KTRACE_TARGET_RBUF */ -+ -+#define KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, info_val) \ -+ do { \ -+ CSTD_UNUSED(kbdev); \ -+ CSTD_NOP(code); \ -+ CSTD_UNUSED(kctx); \ -+ CSTD_UNUSED(info_val); \ -+ CSTD_NOP(0); \ -+ } while (0) -+ -+#define KBASE_KTRACE_RBUF_CLEAR(kbdev) \ -+ do { \ -+ CSTD_UNUSED(kbdev); \ -+ CSTD_NOP(0); \ -+ } while (0) -+#define KBASE_KTRACE_RBUF_DUMP(kbdev) \ -+ do { \ -+ CSTD_UNUSED(kbdev); \ -+ CSTD_NOP(0); \ -+ } while (0) -+#endif /* KBASE_KTRACE_TARGET_RBUF */ -+ -+/* -+ * KTrace target for Linux's ftrace -+ */ -+#if KBASE_KTRACE_TARGET_FTRACE -+ -+#define KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, info_val) \ -+ trace_mali_##code(kctx, info_val) -+ -+#else /* KBASE_KTRACE_TARGET_FTRACE */ -+#define KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, info_val) \ -+ do { \ -+ CSTD_UNUSED(kbdev); \ -+ CSTD_NOP(code); \ -+ CSTD_UNUSED(kctx); \ -+ CSTD_UNUSED(info_val); \ -+ CSTD_NOP(0); \ -+ } while (0) -+#endif /* KBASE_KTRACE_TARGET_FTRACE */ -+ -+/* No 'clear' implementation for ftrace yet */ -+#define KBASE_KTRACE_FTRACE_CLEAR(kbdev) \ -+ do { \ -+ CSTD_UNUSED(kbdev); \ -+ CSTD_NOP(0); \ -+ } while (0) -+ -+/* No 'dump' implementation for ftrace yet */ -+#define KBASE_KTRACE_FTRACE_DUMP(kbdev) \ -+ do { \ -+ CSTD_UNUSED(kbdev); \ -+ CSTD_NOP(0); \ -+ } while (0) -+ -+/* -+ * Master set of macros to route KTrace to any of the targets -+ */ -+ -+/** -+ * KBASE_KTRACE_ADD - Add trace values -+ * @kbdev: kbase device -+ * @code: trace code -+ * @kctx: kbase context, or NULL if no context -+ * @info_val: generic information about @code to add to the trace -+ * -+ * Note: Any functions called through this macro will still be evaluated in -+ * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when -+ * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied -+ * to this macro must: -+ * a) be static or static inline, and -+ * b) just return 0 and have no other statements present in the body. -+ */ -+#define KBASE_KTRACE_ADD(kbdev, code, kctx, info_val) \ -+ do { \ -+ /* capture values that could come from non-pure function calls */ \ -+ u64 __info_val = info_val; \ -+ KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, __info_val); \ -+ KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, __info_val); \ -+ } while (0) -+ -+/** -+ * KBASE_KTRACE_CLEAR - Clear the trace, if applicable to the target(s) -+ * @kbdev: kbase device -+ */ -+#define KBASE_KTRACE_CLEAR(kbdev) \ -+ do { \ -+ KBASE_KTRACE_RBUF_CLEAR(kbdev); \ -+ KBASE_KTRACE_FTRACE_CLEAR(kbdev); \ -+ } while (0) -+ -+/** -+ * KBASE_KTRACE_DUMP - Dump the trace, if applicable to the target(s) -+ * @kbdev: kbase device -+ */ -+#define KBASE_KTRACE_DUMP(kbdev) \ -+ do { \ -+ KBASE_KTRACE_RBUF_DUMP(kbdev); \ -+ KBASE_KTRACE_FTRACE_DUMP(kbdev); \ -+ } while (0) -+ -+#endif /* _KBASE_DEBUG_KTRACE_H_ */ -diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h ++u32 kbase_csf_tiler_heap_count_kctx_unused_pages(struct kbase_context *kctx); ++#endif +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.c new file mode 100644 -index 000000000..6103c3ee0 +index 000000000..96e0f2829 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h -@@ -0,0 +1,181 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.c +@@ -0,0 +1,162 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2011-2015, 2018-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -177466,369 +179985,157 @@ index 000000000..6103c3ee0 + * + */ + -+/* -+ * ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** -+ * ***** DO NOT INCLUDE DIRECTLY ***** -+ * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** -+ */ ++#include "mali_kbase_csf_tiler_heap_debugfs.h" ++#include "mali_kbase_csf_tiler_heap_def.h" ++#include ++#include + -+/* -+ * The purpose of this header file is just to contain a list of trace code -+ * identifiers -+ * -+ * When updating this file, also remember to update -+ * mali_kbase_debug_linux_ktrace.h -+ * -+ * Each identifier is wrapped in a macro, so that its string form and enum form -+ * can be created -+ * -+ * Each macro is separated with a comma, to allow insertion into an array -+ * initializer or enum definition block. -+ * -+ * This allows automatic creation of an enum and a corresponding array of -+ * strings -+ * -+ * Before #including, the includer MUST #define KBASE_KTRACE_CODE_MAKE_CODE. -+ * After #including, the includer MUST #under KBASE_KTRACE_CODE_MAKE_CODE. -+ * -+ * e.g.: -+ * #define KBASE_KTRACE_CODE( X ) KBASE_KTRACE_CODE_ ## X -+ * typedef enum -+ * { -+ * #define KBASE_KTRACE_CODE_MAKE_CODE( X ) KBASE_KTRACE_CODE( X ) -+ * #include "mali_kbase_debug_ktrace_codes.h" -+ * #undef KBASE_KTRACE_CODE_MAKE_CODE -+ * } kbase_ktrace_code; -+ * -+ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE -+ * -+ * -+ * The use of the macro here is: -+ * - KBASE_KTRACE_CODE_MAKE_CODE( X ) -+ * -+ * Which produces: -+ * - For an enum, KBASE_KTRACE_CODE_X -+ * - For a string, "X" ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ ++/** ++ * kbasep_csf_tiler_heap_debugfs_show() - Print tiler heap information for per context + * ++ * @file: The seq_file for printing to ++ * @data: The debugfs dentry private data, a pointer to kbase_context + * -+ * For example: -+ * - KBASE_KTRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to: -+ * - KBASE_KTRACE_CODE_JM_JOB_COMPLETE for the enum -+ * - "JM_JOB_COMPLETE" for the string -+ * - To use it to trace an event, do: -+ * - KBASE_KTRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val ); ++ * Return: 0 in any case. + */ ++static int kbasep_csf_tiler_heap_debugfs_show(struct seq_file *file, void *data) ++{ ++ struct kbase_context *kctx = file->private; ++ struct kbase_csf_tiler_heap_context *tiler_heaps_p = &kctx->csf.tiler_heaps; ++ struct kbase_csf_tiler_heap *heap; ++ struct kbase_csf_tiler_heap_chunk *chunk; + -+#if 0 /* Dummy section to avoid breaking formatting */ -+int dummy_array[] = { -+#endif -+ -+ /* -+ * Core events -+ */ -+ /* no info_val */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY), -+ /* no info_val */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM), -+ /* info_val == GPU_IRQ_STATUS register */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_IRQ), -+ /* info_val == bits cleared */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR), -+ /* info_val == GPU_IRQ_STATUS register */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE), -+ KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET), -+ KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET), -+ KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR), -+ /* info_val == dump address */ -+ KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE), -+ KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES), ++ seq_printf(file, "MALI_CSF_TILER_HEAP_DEBUGFS_VERSION: v%u\n", MALI_CSF_TILER_HEAP_DEBUGFS_VERSION); + -+ /* -+ * Power Management Events -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON_TILER), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON_L2), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF_TILER), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF_L2), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_POWERED), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_L2), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_NEEDED), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_NEEDED), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS), -+ /* info_val == kbdev->pm.active_count*/ -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE), -+ /* info_val == kbdev->pm.active_count*/ -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_GPU_ON), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_GPU_OFF), -+ /* info_val == policy number, or -1 for "Already changing" */ -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_SET_POLICY), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY), -+ /* info_val == policy number */ -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT), -+ /* info_val == policy number */ -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM), ++ mutex_lock(&tiler_heaps_p->lock); + -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_POWEROFF_WAIT_WQ), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_RUNTIME_SUSPEND_CALLBACK), -+ KBASE_KTRACE_CODE_MAKE_CODE(PM_RUNTIME_RESUME_CALLBACK), ++ list_for_each_entry(heap, &tiler_heaps_p->list, link) { ++ if (heap->kctx != kctx) ++ continue; + -+ /* info_val = l2 state */ -+#define KBASEP_L2_STATE(n) KBASE_KTRACE_CODE_MAKE_CODE(PM_L2_ ## n), -+#include "backend/gpu/mali_kbase_pm_l2_states.h" -+#undef KBASEP_L2_STATE ++ seq_printf(file, "HEAP(gpu_va = 0x%llx):\n", heap->gpu_va); ++ seq_printf(file, "\tchunk_size = %u\n", heap->chunk_size); ++ seq_printf(file, "\tchunk_count = %u\n", heap->chunk_count); ++ seq_printf(file, "\tmax_chunks = %u\n", heap->max_chunks); ++ seq_printf(file, "\ttarget_in_flight = %u\n", heap->target_in_flight); + -+ /* -+ * Context Scheduler events -+ */ -+ /* info_val == kctx->refcount */ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RETAIN_CTX_NOLOCK), -+ /* info_val == kctx->refcount */ -+ KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RELEASE_CTX), -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ /* -+ * Arbitration events -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_LOST), -+ KBASE_KTRACE_CODE_MAKE_CODE(ARB_VM_STATE), -+ KBASE_KTRACE_CODE_MAKE_CODE(ARB_VM_EVT), -+#endif ++ list_for_each_entry(chunk, &heap->chunks_list, link) ++ seq_printf(file, "\t\tchunk gpu_va = 0x%llx\n", ++ chunk->gpu_va); ++ } + -+#if MALI_USE_CSF -+#include "debug/backend/mali_kbase_debug_ktrace_codes_csf.h" -+#else -+#include "debug/backend/mali_kbase_debug_ktrace_codes_jm.h" -+#endif -+ /* -+ * Unused code just to make it easier to not have a comma at the end. -+ * All other codes MUST come before this -+ */ -+ KBASE_KTRACE_CODE_MAKE_CODE(DUMMY) ++ mutex_unlock(&tiler_heaps_p->lock); + -+#if 0 /* Dummy section to avoid breaking formatting */ -+}; -+#endif ++ return 0; ++} + -+/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ -diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_defs.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_defs.h -new file mode 100644 -index 000000000..a0fc9e51d ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_defs.h -@@ -0,0 +1,187 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++/** ++ * kbasep_csf_tiler_heap_total_debugfs_show() - Print the total memory allocated ++ * for all tiler heaps in a context. + * -+ */ -+ -+#ifndef _KBASE_DEBUG_KTRACE_DEFS_H_ -+#define _KBASE_DEBUG_KTRACE_DEFS_H_ -+ -+/* Enable SW tracing when set */ -+#if defined(CONFIG_MALI_BIFROST_ENABLE_TRACE) || defined(CONFIG_MALI_BIFROST_SYSTEM_TRACE) -+#define KBASE_KTRACE_ENABLE 1 -+#endif -+ -+#ifndef KBASE_KTRACE_ENABLE -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+#define KBASE_KTRACE_ENABLE 1 -+#else /* CONFIG_MALI_BIFROST_DEBUG */ -+#define KBASE_KTRACE_ENABLE 0 -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ -+#endif /* KBASE_KTRACE_ENABLE */ -+ -+/* Select targets for recording of trace: ++ * @file: The seq_file for printing to ++ * @data: The debugfs dentry private data, a pointer to kbase_context + * ++ * Return: 0 in any case. + */ -+#if KBASE_KTRACE_ENABLE -+ -+#ifdef CONFIG_MALI_BIFROST_SYSTEM_TRACE -+#define KBASE_KTRACE_TARGET_FTRACE 1 -+#else /* CONFIG_MALI_BIFROST_SYSTEM_TRACE */ -+#define KBASE_KTRACE_TARGET_FTRACE 0 -+#endif /* CONFIG_MALI_BIFROST_SYSTEM_TRACE */ ++static int kbasep_csf_tiler_heap_total_debugfs_show(struct seq_file *file, void *data) ++{ ++ struct kbase_context *kctx = file->private; + -+#ifdef CONFIG_MALI_BIFROST_ENABLE_TRACE -+#define KBASE_KTRACE_TARGET_RBUF 1 -+#else /* CONFIG_MALI_BIFROST_ENABLE_TRACE*/ -+#define KBASE_KTRACE_TARGET_RBUF 0 -+#endif /* CONFIG_MALI_BIFROST_ENABLE_TRACE */ ++ seq_printf(file, "MALI_CSF_TILER_HEAP_DEBUGFS_VERSION: v%u\n", ++ MALI_CSF_TILER_HEAP_DEBUGFS_VERSION); ++ seq_printf(file, "Total number of chunks of all heaps in the context: %lu\n", ++ (unsigned long)kctx->running_total_tiler_heap_nr_chunks); ++ seq_printf(file, "Total allocated memory of all heaps in the context: %llu\n", ++ (unsigned long long)kctx->running_total_tiler_heap_memory); ++ seq_printf(file, "Peak allocated tiler heap memory in the context: %llu\n", ++ (unsigned long long)kctx->peak_total_tiler_heap_memory); + -+#else /* KBASE_KTRACE_ENABLE */ -+#define KBASE_KTRACE_TARGET_FTRACE 0 -+#define KBASE_KTRACE_TARGET_RBUF 0 -+#endif /* KBASE_KTRACE_ENABLE */ ++ return 0; ++} + -+/* -+ * Note: Some backends define flags in this type even if the RBUF target is -+ * disabled (they get discarded with CSTD_UNUSED(), but they're still -+ * referenced) -+ */ -+typedef u8 kbase_ktrace_flag_t; ++static int kbasep_csf_tiler_heap_debugfs_open(struct inode *in, struct file *file) ++{ ++ return single_open(file, kbasep_csf_tiler_heap_debugfs_show, in->i_private); ++} + -+#if KBASE_KTRACE_TARGET_RBUF -+typedef u8 kbase_ktrace_code_t; ++static int kbasep_csf_tiler_heap_total_debugfs_open(struct inode *in, struct file *file) ++{ ++ return single_open(file, kbasep_csf_tiler_heap_total_debugfs_show, in->i_private); ++} + -+/* -+ * NOTE: KBASE_KTRACE_VERSION_MAJOR, KBASE_KTRACE_VERSION_MINOR are kept in -+ * the backend, since updates can be made to one backend in a way that doesn't -+ * affect the other. -+ * -+ * However, modifying the common part could require both backend versions to be -+ * updated. -+ */ ++static const struct file_operations kbasep_csf_tiler_heap_debugfs_fops = { ++ .open = kbasep_csf_tiler_heap_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + -+/* -+ * union kbase_ktrace_backend - backend specific part of a trace message. -+ * At the very least, this must contain a kbase_ktrace_code_t 'code' member -+ * and a kbase_ktrace_flag_t 'flags' inside a "gpu" sub-struct. Should a -+ * backend need several sub structs in its union to optimize the data storage -+ * for different message types, then it can use a "common initial sequence" to -+ * allow 'flags' and 'code' to pack optimally without corrupting them. -+ * Different backends need not share common initial sequences between them, they -+ * only need to ensure they have gpu.flags and gpu.code members, it -+ * is up to the backend then how to order these. -+ */ -+union kbase_ktrace_backend; ++static const struct file_operations kbasep_csf_tiler_heap_total_debugfs_fops = { ++ .open = kbasep_csf_tiler_heap_total_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + -+#endif /* KBASE_KTRACE_TARGET_RBUF */ ++void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx) ++{ ++ struct dentry *file; + -+#if MALI_USE_CSF -+#include "debug/backend/mali_kbase_debug_ktrace_defs_csf.h" -+#else -+#include "debug/backend/mali_kbase_debug_ktrace_defs_jm.h" -+#endif ++ if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) ++ return; + -+#if KBASE_KTRACE_TARGET_RBUF -+/* Indicates if the trace message has backend related info. -+ * -+ * If not set, consider the &kbase_ktrace_backend part of a &kbase_ktrace_msg -+ * as uninitialized, apart from the mandatory parts: -+ * - code -+ * - flags -+ */ -+#define KBASE_KTRACE_FLAG_BACKEND (((kbase_ktrace_flag_t)1) << 7) ++ file = debugfs_create_file("tiler_heaps", 0444, kctx->kctx_dentry, ++ kctx, &kbasep_csf_tiler_heap_debugfs_fops); + -+/* Collect all the common flags together for debug checking */ -+#define KBASE_KTRACE_FLAG_COMMON_ALL \ -+ (KBASE_KTRACE_FLAG_BACKEND) ++ if (IS_ERR_OR_NULL(file)) { ++ dev_warn(kctx->kbdev->dev, ++ "Unable to create tiler heap debugfs entry"); ++ } ++} + -+#define KBASE_KTRACE_FLAG_ALL \ -+ (KBASE_KTRACE_FLAG_COMMON_ALL | KBASE_KTRACE_FLAG_BACKEND_ALL) ++void kbase_csf_tiler_heap_total_debugfs_init(struct kbase_context *kctx) ++{ ++ struct dentry *file; + -+#define KBASE_KTRACE_SHIFT (9) /* 512 entries */ -+#define KBASE_KTRACE_SIZE (1 << KBASE_KTRACE_SHIFT) -+#define KBASE_KTRACE_MASK ((1 << KBASE_KTRACE_SHIFT)-1) ++ if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) ++ return; + -+#define KBASE_KTRACE_CODE(X) KBASE_KTRACE_CODE_ ## X ++ file = debugfs_create_file("tiler_heaps_total", 0444, kctx->kctx_dentry, ++ kctx, &kbasep_csf_tiler_heap_total_debugfs_fops); + -+/* Note: compiletime_assert() about this against kbase_ktrace_code_t is in -+ * kbase_ktrace_init() -+ */ -+enum kbase_ktrace_code { -+ /* -+ * IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE -+ * THIS MUST BE USED AT THE START OF THE ENUM -+ */ -+#define KBASE_KTRACE_CODE_MAKE_CODE(X) KBASE_KTRACE_CODE(X) -+#include -+#undef KBASE_KTRACE_CODE_MAKE_CODE -+ /* Comma on its own, to extend the list */ -+ , -+ /* Must be the last in the enum */ -+ KBASE_KTRACE_CODE_COUNT -+}; ++ if (IS_ERR_OR_NULL(file)) { ++ dev_warn(kctx->kbdev->dev, ++ "Unable to create total tiler heap allocated memory debugfs entry"); ++ } ++} + -+/** -+ * struct kbase_ktrace_msg - object representing a trace message added to trace -+ * buffer trace_rbuf in &kbase_device -+ * @timestamp: CPU timestamp at which the trace message was added. -+ * @thread_id: id of the thread in the context of which trace message was -+ * added. -+ * @cpu: indicates which CPU the @thread_id was scheduled on when the -+ * trace message was added. -+ * @kctx_tgid: Thread group ID of the &kbase_context associated with the -+ * message, or 0 if none associated. -+ * @kctx_id: Unique identifier of the &kbase_context associated with the -+ * message. Only valid if @kctx_tgid != 0. -+ * @info_val: value specific to the type of event being traced. Refer to the -+ * specific code in enum kbase_ktrace_code. -+ * @backend: backend-specific trace information. All backends must implement -+ * a minimum common set of members. ++#else ++/* ++ * Stub functions for when debugfs is disabled + */ -+struct kbase_ktrace_msg { -+ struct timespec64 timestamp; -+ u32 thread_id; -+ u32 cpu; -+ pid_t kctx_tgid; -+ u32 kctx_id; -+ u64 info_val; -+ union kbase_ktrace_backend backend; -+}; -+ -+struct kbase_ktrace { -+ spinlock_t lock; -+ u16 first_out; -+ u16 next_in; -+ struct kbase_ktrace_msg *rbuf; -+}; -+ -+ -+static inline void kbase_ktrace_compiletime_asserts(void) ++void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx) +{ -+ /* See also documentation of enum kbase_ktrace_code */ -+ compiletime_assert(sizeof(kbase_ktrace_code_t) == sizeof(unsigned long long) || -+ KBASE_KTRACE_CODE_COUNT <= (1ull << (sizeof(kbase_ktrace_code_t) * BITS_PER_BYTE)), -+ "kbase_ktrace_code_t not wide enough for KBASE_KTRACE_CODE_COUNT"); -+ compiletime_assert((KBASE_KTRACE_FLAG_BACKEND_ALL & KBASE_KTRACE_FLAG_COMMON_ALL) == 0, -+ "KTrace backend flags intersect with KTrace common flags"); ++} + ++void kbase_csf_tiler_heap_total_debugfs_init(struct kbase_context *kctx) ++{ +} + -+#endif /* KBASE_KTRACE_TARGET_RBUF */ -+#endif /* _KBASE_DEBUG_KTRACE_DEFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_internal.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_internal.h ++#endif /* CONFIG_DEBUG_FS */ ++ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.h new file mode 100644 -index 000000000..ba93f29fe +index 000000000..4a1b413ef --- /dev/null -+++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_internal.h -@@ -0,0 +1,90 @@ ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.h +@@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -177846,85 +180153,39 @@ index 000000000..ba93f29fe + * + */ + -+#ifndef _KBASE_DEBUG_KTRACE_INTERNAL_H_ -+#define _KBASE_DEBUG_KTRACE_INTERNAL_H_ -+ -+#if KBASE_KTRACE_TARGET_RBUF -+ -+#define KTRACE_DUMP_MESSAGE_SIZE 256 -+ -+/** -+ * kbasep_ktrace_backend_format_header - format the backend part of the header -+ * @buffer: buffer to write to -+ * @sz: size of @buffer in bytes -+ * @written: pointer to storage for updating bytes written so far to @buffer -+ * -+ * The backend must format only the non-common backend specific parts of the -+ * header. It must format them as though they were standalone. The caller will -+ * handle adding any delimiters around this. -+ */ -+void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written); ++#ifndef _KBASE_CSF_TILER_HEAP_DEBUGFS_H_ ++#define _KBASE_CSF_TILER_HEAP_DEBUGFS_H_ + -+/** -+ * kbasep_ktrace_backend_format_msg - format the backend part of the message -+ * @trace_msg: ktrace message -+ * @buffer: buffer to write to -+ * @sz: size of @buffer in bytes -+ * @written: pointer to storage for updating bytes written so far to @buffer -+ * -+ * The backend must format only the non-common backend specific parts of the -+ * message. It must format them as though they were standalone. The caller will -+ * handle adding any delimiters around this. -+ * -+ * A caller may have the flags member of @trace_msg with -+ * %KBASE_KTRACE_FLAG_BACKEND clear. The backend must handle that setting -+ * appropriately. -+ */ -+void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, -+ char *buffer, int sz, s32 *written); ++/* Forward declaration */ ++struct kbase_context; + ++#define MALI_CSF_TILER_HEAP_DEBUGFS_VERSION 0 + +/** -+ * kbasep_ktrace_reserve - internal function to reserve space for a ktrace -+ * message -+ * @ktrace: kbase device's ktrace -+ * -+ * This may also empty the oldest entry in the ringbuffer to make space. ++ * kbase_csf_tiler_heap_debugfs_init - Create a debugfs entry for per context tiler heap + * -+ * Return: ktrace message ++ * @kctx: The kbase_context for which to create the debugfs entry + */ -+struct kbase_ktrace_msg *kbasep_ktrace_reserve(struct kbase_ktrace *ktrace); ++void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx); + +/** -+ * kbasep_ktrace_msg_init - internal function to initialize just the common -+ * part of a ktrace message -+ * @ktrace: kbase device's ktrace -+ * @trace_msg: ktrace message to initialize -+ * @code: ktrace code -+ * @kctx: kbase context, or NULL if no context -+ * @flags: flags about the message -+ * @info_val: generic information about @code to add to the trace ++ * kbase_csf_tiler_heap_total_debugfs_init - Create a debugfs entry for per context tiler heap + * -+ * The common part includes the mandatory parts of the backend part ++ * @kctx: The kbase_context for which to create the debugfs entry + */ -+void kbasep_ktrace_msg_init(struct kbase_ktrace *ktrace, -+ struct kbase_ktrace_msg *trace_msg, enum kbase_ktrace_code code, -+ struct kbase_context *kctx, kbase_ktrace_flag_t flags, -+ u64 info_val); -+ -+#endif /* KBASE_KTRACE_TARGET_RBUF */ ++void kbase_csf_tiler_heap_total_debugfs_init(struct kbase_context *kctx); + -+#endif /* _KBASE_DEBUG_KTRACE_INTERNAL_H_ */ -diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h ++#endif /* _KBASE_CSF_TILER_HEAP_DEBUGFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h new file mode 100644 -index 000000000..6d9664716 +index 000000000..96f2b03d2 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h -@@ -0,0 +1,123 @@ ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h +@@ -0,0 +1,140 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2014, 2018, 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -177942,157 +180203,135 @@ index 000000000..6d9664716 + * + */ + -+/* -+ * NOTE: This must **only** be included through mali_linux_trace.h, -+ * otherwise it will fail to setup tracepoints correctly -+ */ ++#ifndef _KBASE_CSF_TILER_HEAP_DEF_H_ ++#define _KBASE_CSF_TILER_HEAP_DEF_H_ + -+#if !defined(_KBASE_DEBUG_LINUX_KTRACE_H_) || defined(TRACE_HEADER_MULTI_READ) -+#define _KBASE_DEBUG_LINUX_KTRACE_H_ ++#include + -+#if KBASE_KTRACE_TARGET_FTRACE ++/* Size of a tiler heap chunk header, in bytes. */ ++#define CHUNK_HDR_SIZE ((size_t)64) + -+DECLARE_EVENT_CLASS(mali_add_template, -+ TP_PROTO(struct kbase_context *kctx, u64 info_val), -+ TP_ARGS(kctx, info_val), -+ TP_STRUCT__entry( -+ __field(pid_t, kctx_tgid) -+ __field(u32, kctx_id) -+ __field(u64, info_val) -+ ), -+ TP_fast_assign( -+ __entry->kctx_id = (kctx) ? kctx->id : 0u; -+ __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; -+ __entry->info_val = info_val; -+ ), -+ TP_printk("kctx=%d_%u info=0x%llx", __entry->kctx_tgid, -+ __entry->kctx_id, __entry->info_val) -+); ++/* Bit-position of the next chunk's size when stored in a chunk header. */ ++#define CHUNK_HDR_NEXT_SIZE_POS (0) + -+/* DEFINE_MALI_ADD_EVENT is available also to backends for backend-specific -+ * simple trace codes -+ */ -+#define DEFINE_MALI_ADD_EVENT(name) \ -+DEFINE_EVENT(mali_add_template, mali_##name, \ -+ TP_PROTO(struct kbase_context *kctx, u64 info_val), \ -+ TP_ARGS(kctx, info_val)) -+DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY); -+DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM); -+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ); -+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR); -+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE); -+DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET); -+DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET); -+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE); -+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR); -+DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES); -+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED); -+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP); -+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP); -+DEFINE_MALI_ADD_EVENT(PM_PWRON); -+DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER); -+DEFINE_MALI_ADD_EVENT(PM_PWRON_L2); -+DEFINE_MALI_ADD_EVENT(PM_PWROFF); -+DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER); -+DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2); -+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED); -+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER); -+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2); -+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED); -+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER); -+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED); -+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_TILER_NEEDED); -+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_NEEDED); -+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_TILER_NEEDED); -+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE); -+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER); -+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE); -+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER); -+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_L2); -+DEFINE_MALI_ADD_EVENT(PM_GPU_ON); -+DEFINE_MALI_ADD_EVENT(PM_GPU_OFF); -+DEFINE_MALI_ADD_EVENT(PM_SET_POLICY); -+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT); -+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM); -+DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY); -+DEFINE_MALI_ADD_EVENT(PM_CONTEXT_ACTIVE); -+DEFINE_MALI_ADD_EVENT(PM_CONTEXT_IDLE); -+DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS); -+DEFINE_MALI_ADD_EVENT(PM_POWEROFF_WAIT_WQ); -+DEFINE_MALI_ADD_EVENT(PM_RUNTIME_SUSPEND_CALLBACK); -+DEFINE_MALI_ADD_EVENT(PM_RUNTIME_RESUME_CALLBACK); -+#define KBASEP_L2_STATE(n) DEFINE_MALI_ADD_EVENT(PM_L2_ ## n); -+#include "backend/gpu/mali_kbase_pm_l2_states.h" -+#undef KBASEP_L2_STATE -+DEFINE_MALI_ADD_EVENT(SCHED_RETAIN_CTX_NOLOCK); -+DEFINE_MALI_ADD_EVENT(SCHED_RELEASE_CTX); -+#ifdef CONFIG_MALI_ARBITER_SUPPORT ++/* Bit-position of the next chunk's address when stored in a chunk header. */ ++#define CHUNK_HDR_NEXT_ADDR_POS (12) + -+DEFINE_MALI_ADD_EVENT(ARB_GPU_LOST); -+DEFINE_MALI_ADD_EVENT(ARB_VM_STATE); -+DEFINE_MALI_ADD_EVENT(ARB_VM_EVT); ++/* Bitmask of the next chunk's size when stored in a chunk header. */ ++#define CHUNK_HDR_NEXT_SIZE_MASK (((u64)1 << CHUNK_HDR_NEXT_ADDR_POS) - 1u) + -+#endif -+#if MALI_USE_CSF -+#include "backend/mali_kbase_debug_linux_ktrace_csf.h" -+#else -+#include "backend/mali_kbase_debug_linux_ktrace_jm.h" -+#endif ++/* Bitmask of the address of the next chunk when stored in a chunk header. */ ++#define CHUNK_HDR_NEXT_ADDR_MASK (~CHUNK_HDR_NEXT_SIZE_MASK) + -+#undef DEFINE_MALI_ADD_EVENT ++/* Right-shift before storing the next chunk's size in a chunk header. */ ++#define CHUNK_HDR_NEXT_SIZE_ENCODE_SHIFT (12) + -+#endif /* KBASE_KTRACE_TARGET_FTRACE */ ++/* Right-shift before storing the next chunk's address in a chunk header. */ ++#define CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT (12) + -+#endif /* !defined(_KBASE_DEBUG_LINUX_KTRACE_H_) || defined(TRACE_HEADER_MULTI_READ) */ -diff --git a/drivers/gpu/arm/bifrost/device/Kbuild b/drivers/gpu/arm/bifrost/device/Kbuild -new file mode 100755 -index 000000000..723ffd215 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/device/Kbuild -@@ -0,0 +1,33 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++/* Bitmask of valid chunk sizes. This is also the maximum chunk size, in bytes. ++ */ ++#define CHUNK_SIZE_MASK \ ++ ((CHUNK_HDR_NEXT_SIZE_MASK >> CHUNK_HDR_NEXT_SIZE_POS) << \ ++ CHUNK_HDR_NEXT_SIZE_ENCODE_SHIFT) + -+bifrost_kbase-y += \ -+ device/mali_kbase_device.o \ -+ device/mali_kbase_device_hw.o ++/* Bitmask of valid chunk addresses. This is also the highest address. */ ++#define CHUNK_ADDR_MASK \ ++ ((CHUNK_HDR_NEXT_ADDR_MASK >> CHUNK_HDR_NEXT_ADDR_POS) << \ ++ CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT) + -+ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) -+ bifrost_kbase-y += \ -+ device/backend/mali_kbase_device_csf.o \ -+ device/backend/mali_kbase_device_hw_csf.o -+else -+ bifrost_kbase-y += \ -+ device/backend/mali_kbase_device_jm.o \ -+ device/backend/mali_kbase_device_hw_jm.o -+endif -diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c ++/* The size of the area needed to be vmapped prior to handing the tiler heap ++ * over to the tiler, so that the shrinker could be invoked. ++ */ ++#define NEXT_CHUNK_ADDR_SIZE (sizeof(u64)) ++ ++/** ++ * struct kbase_csf_tiler_heap_chunk - A tiler heap chunk managed by the kernel ++ * ++ * @link: Link to this chunk in a list of chunks belonging to a ++ * @kbase_csf_tiler_heap. ++ * @region: Pointer to the GPU memory region allocated for the chunk. ++ * @map: Kernel VA mapping so that we would not need to use vmap in the ++ * shrinker callback, which can allocate. This maps only the header ++ * of the chunk, so it could be traversed. ++ * @gpu_va: GPU virtual address of the start of the memory region. ++ * This points to the header of the chunk and not to the low address ++ * of free memory within it. ++ * ++ * Chunks are allocated upon initialization of a tiler heap or in response to ++ * out-of-memory events from the firmware. Chunks are always fully backed by ++ * physical memory to avoid the overhead of processing GPU page faults. The ++ * allocated GPU memory regions are linked together independent of the list of ++ * kernel objects of this type. ++ */ ++struct kbase_csf_tiler_heap_chunk { ++ struct list_head link; ++ struct kbase_va_region *region; ++ struct kbase_vmap_struct map; ++ u64 gpu_va; ++}; ++ ++#define HEAP_BUF_DESCRIPTOR_CHECKED (1 << 0) ++ ++/** ++ * struct kbase_csf_tiler_heap - A tiler heap managed by the kernel ++ * ++ * @kctx: Pointer to the kbase context with which this heap is ++ * associated. ++ * @link: Link to this heap in a list of tiler heaps belonging to ++ * the @kbase_csf_tiler_heap_context. ++ * @chunks_list: Linked list of allocated chunks. ++ * @gpu_va: The GPU virtual address of the heap context structure that ++ * was allocated for the firmware. This is also used to ++ * uniquely identify the heap. ++ * @heap_id: Unique id representing the heap, assigned during heap ++ * initialization. ++ * @buf_desc_va: Buffer descriptor GPU VA. Can be 0 for backward compatible ++ * to earlier version base interfaces. ++ * @buf_desc_reg: Pointer to the VA region that covers the provided buffer ++ * descriptor memory object pointed to by buf_desc_va. ++ * @gpu_va_map: Kernel VA mapping of the GPU VA region. ++ * @buf_desc_map: Kernel VA mapping of the buffer descriptor, read from ++ * during the tiler heap shrinker. Sync operations may need ++ * to be done before each read. ++ * @chunk_size: Size of each chunk, in bytes. Must be page-aligned. ++ * @chunk_count: The number of chunks currently allocated. Must not be ++ * zero or greater than @max_chunks. ++ * @max_chunks: The maximum number of chunks that the heap should be ++ * allowed to use. Must not be less than @chunk_count. ++ * @target_in_flight: Number of render-passes that the driver should attempt ++ * to keep in flight for which allocation of new chunks is ++ * allowed. Must not be zero. ++ * @buf_desc_checked: Indicates if runtime check on buffer descriptor has been done. ++ */ ++struct kbase_csf_tiler_heap { ++ struct kbase_context *kctx; ++ struct list_head link; ++ struct list_head chunks_list; ++ u64 gpu_va; ++ u64 heap_id; ++ u64 buf_desc_va; ++ struct kbase_va_region *buf_desc_reg; ++ struct kbase_vmap_struct buf_desc_map; ++ struct kbase_vmap_struct gpu_va_map; ++ u32 chunk_size; ++ u32 chunk_count; ++ u32 max_chunks; ++ u16 target_in_flight; ++ bool buf_desc_checked; ++}; ++ ++#endif /* !_KBASE_CSF_TILER_HEAP_DEF_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c new file mode 100644 -index 000000000..f7054f5b0 +index 000000000..6357e3518 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c -@@ -0,0 +1,525 @@ ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c +@@ -0,0 +1,359 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -178110,520 +180349,354 @@ index 000000000..f7054f5b0 + * + */ + -+#include -+#include ++#include ++#include "mali_kbase_csf.h" ++#include "mali_kbase_csf_tiler_heap.h" ++#include "mali_kbase_csf_tiler_heap_reclaim.h" + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++/* Tiler heap shrinker seek value, needs to be higher than jit and memory pools */ ++#define HEAP_SHRINKER_SEEKS (DEFAULT_SEEKS + 2) + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++/* Tiler heap shrinker batch value */ ++#define HEAP_SHRINKER_BATCH (512) + -+/** -+ * kbase_device_firmware_hwcnt_term - Terminate CSF firmware and HWC -+ * -+ * @kbdev: An instance of the GPU platform device, allocated from the probe -+ * method of the driver. -+ * -+ * When a kbase driver is removed, terminate CSF firmware and hardware counter -+ * components. -+ */ -+static void kbase_device_firmware_hwcnt_term(struct kbase_device *kbdev) ++/* Tiler heap reclaim scan (free) method size for limiting a scan run length */ ++#define HEAP_RECLAIM_SCAN_BATCH_SIZE (HEAP_SHRINKER_BATCH << 7) ++ ++static u8 get_kctx_highest_csg_priority(struct kbase_context *kctx) +{ -+ if (kbdev->csf.firmware_inited) { -+ kbase_kinstr_prfcnt_term(kbdev->kinstr_prfcnt_ctx); -+ kbase_vinstr_term(kbdev->vinstr_ctx); -+ kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); -+ kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface); -+ kbase_csf_firmware_unload_term(kbdev); ++ u8 prio; ++ ++ for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_LOW; ++ prio++) ++ if (!list_empty(&kctx->csf.sched.runnable_groups[prio])) ++ break; ++ ++ if (prio != KBASE_QUEUE_GROUP_PRIORITY_REALTIME && kctx->csf.sched.num_idle_wait_grps) { ++ struct kbase_queue_group *group; ++ ++ list_for_each_entry(group, &kctx->csf.sched.idle_wait_groups, link) { ++ if (group->priority < prio) ++ prio = group->priority; ++ } + } ++ ++ return prio; +} + -+/** -+ * kbase_backend_late_init - Perform any backend-specific initialization. -+ * @kbdev: Device pointer -+ * -+ * Return: 0 on success, or an error code on failure. -+ */ -+static int kbase_backend_late_init(struct kbase_device *kbdev) ++static void detach_ctx_from_heap_reclaim_mgr(struct kbase_context *kctx) +{ -+ int err; -+ -+ err = kbase_hwaccess_pm_init(kbdev); -+ if (err) -+ return err; ++ struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; ++ struct kbase_csf_ctx_heap_reclaim_info *info = &kctx->csf.sched.heap_info; + -+ err = kbase_reset_gpu_init(kbdev); -+ if (err) -+ goto fail_reset_gpu_init; ++ lockdep_assert_held(&scheduler->lock); + -+ err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT); -+ if (err) -+ goto fail_pm_powerup; ++ if (!list_empty(&info->mgr_link)) { ++ u32 remaining = (info->nr_est_unused_pages > info->nr_freed_pages) ? ++ info->nr_est_unused_pages - info->nr_freed_pages : ++ 0; + -+ err = kbase_backend_timer_init(kbdev); -+ if (err) -+ goto fail_timer; ++ list_del_init(&info->mgr_link); ++ if (remaining) ++ WARN_ON(atomic_sub_return(remaining, &scheduler->reclaim_mgr.unused_pages) < ++ 0); + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+#if IS_ENABLED(CONFIG_MALI_REAL_HW) -+ if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { -+ dev_err(kbdev->dev, "Interrupt assignment check failed.\n"); -+ err = -EINVAL; -+ goto fail_interrupt_test; ++ dev_dbg(kctx->kbdev->dev, ++ "Reclaim_mgr_detach: ctx_%d_%d, est_pages=0%u, freed_pages=%u", kctx->tgid, ++ kctx->id, info->nr_est_unused_pages, info->nr_freed_pages); + } -+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++} + -+ kbase_ipa_control_init(kbdev); ++static void attach_ctx_to_heap_reclaim_mgr(struct kbase_context *kctx) ++{ ++ struct kbase_csf_ctx_heap_reclaim_info *const info = &kctx->csf.sched.heap_info; ++ struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; ++ u8 const prio = get_kctx_highest_csg_priority(kctx); + -+ /* Initialise the metrics subsystem, it couldn't be initialized earlier -+ * due to dependency on kbase_ipa_control. -+ */ -+ err = kbasep_pm_metrics_init(kbdev); -+ if (err) -+ goto fail_pm_metrics_init; ++ lockdep_assert_held(&scheduler->lock); + -+ /* Do the initialisation of devfreq. -+ * Devfreq needs backend_timer_init() for completion of its -+ * initialisation and it also needs to catch the first callback -+ * occurrence of the runtime_suspend event for maintaining state -+ * coherence with the backend power management, hence needs to be -+ * placed before the kbase_pm_context_idle(). -+ */ -+ err = kbase_backend_devfreq_init(kbdev); -+ if (err) -+ goto fail_devfreq_init; ++ if (WARN_ON(!list_empty(&info->mgr_link))) ++ list_del_init(&info->mgr_link); + -+ /* Update gpuprops with L2_FEATURES if applicable */ -+ err = kbase_gpuprops_update_l2_features(kbdev); -+ if (err) -+ goto fail_update_l2_features; ++ /* Count the pages that could be freed */ ++ info->nr_est_unused_pages = kbase_csf_tiler_heap_count_kctx_unused_pages(kctx); ++ /* Initialize the scan operation tracking pages */ ++ info->nr_freed_pages = 0; + -+ err = kbase_backend_time_init(kbdev); -+ if (err) -+ goto fail_update_l2_features; ++ list_add_tail(&info->mgr_link, &scheduler->reclaim_mgr.ctx_lists[prio]); ++ /* Accumulate the estimated pages to the manager total field */ ++ atomic_add(info->nr_est_unused_pages, &scheduler->reclaim_mgr.unused_pages); + -+ init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); ++ dev_dbg(kctx->kbdev->dev, "Reclaim_mgr_attach: ctx_%d_%d, est_count_pages=%u", kctx->tgid, ++ kctx->id, info->nr_est_unused_pages); ++} + -+ kbase_pm_context_idle(kbdev); ++void kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(struct kbase_queue_group *group) ++{ ++ struct kbase_context *kctx = group->kctx; ++ struct kbase_csf_ctx_heap_reclaim_info *info = &kctx->csf.sched.heap_info; + -+ mutex_init(&kbdev->fw_load_lock); ++ lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); + -+ return 0; ++ info->on_slot_grps++; ++ /* If the kctx has an on-slot change from 0 => 1, detach it from reclaim_mgr */ ++ if (info->on_slot_grps == 1) { ++ dev_dbg(kctx->kbdev->dev, "CSG_%d_%d_%d on-slot, remove kctx from reclaim manager", ++ group->kctx->tgid, group->kctx->id, group->handle); + -+fail_update_l2_features: -+ kbase_backend_devfreq_term(kbdev); -+fail_devfreq_init: -+ kbasep_pm_metrics_term(kbdev); -+fail_pm_metrics_init: -+ kbase_ipa_control_term(kbdev); ++ detach_ctx_from_heap_reclaim_mgr(kctx); ++ } ++} + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+#if IS_ENABLED(CONFIG_MALI_REAL_HW) -+fail_interrupt_test: -+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++void kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(struct kbase_queue_group *group) ++{ ++ struct kbase_context *kctx = group->kctx; ++ struct kbase_csf_ctx_heap_reclaim_info *const info = &kctx->csf.sched.heap_info; ++ struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; ++ const u32 num_groups = kctx->kbdev->csf.global_iface.group_num; ++ u32 on_slot_grps = 0; ++ u32 i; + -+ kbase_backend_timer_term(kbdev); -+fail_timer: -+ kbase_pm_context_idle(kbdev); -+ kbase_hwaccess_pm_halt(kbdev); -+fail_pm_powerup: -+ kbase_reset_gpu_term(kbdev); -+fail_reset_gpu_init: -+ kbase_hwaccess_pm_term(kbdev); ++ lockdep_assert_held(&scheduler->lock); + -+ return err; -+} ++ /* Group eviction from the scheduler is a bit more complex, but fairly less ++ * frequent in operations. Taking the opportunity to actually count the ++ * on-slot CSGs from the given kctx, for robustness and clearer code logic. ++ */ ++ for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) { ++ struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; ++ struct kbase_queue_group *grp = csg_slot->resident_group; + -+/** -+ * kbase_backend_late_term - Perform any backend-specific termination. -+ * @kbdev: Device pointer -+ */ -+static void kbase_backend_late_term(struct kbase_device *kbdev) -+{ -+ kbase_backend_devfreq_term(kbdev); -+ kbasep_pm_metrics_term(kbdev); -+ kbase_ipa_control_term(kbdev); -+ kbase_hwaccess_pm_halt(kbdev); -+ kbase_reset_gpu_term(kbdev); -+ kbase_hwaccess_pm_term(kbdev); -+} ++ if (unlikely(!grp)) ++ continue; + -+/** -+ * kbase_csf_early_init - Early initialization for firmware & scheduler. -+ * @kbdev: Device pointer -+ * -+ * Return: 0 on success, error code otherwise. -+ */ -+static int kbase_csf_early_init(struct kbase_device *kbdev) -+{ -+ int err = kbase_csf_firmware_early_init(kbdev); ++ if (grp->kctx == kctx) ++ on_slot_grps++; ++ } + -+ if (err) -+ return err; ++ info->on_slot_grps = on_slot_grps; + -+ err = kbase_csf_scheduler_early_init(kbdev); ++ /* If the kctx has no other CSGs on-slot, handle the heap reclaim related actions */ ++ if (!info->on_slot_grps) { ++ if (kctx->csf.sched.num_runnable_grps || kctx->csf.sched.num_idle_wait_grps) { ++ /* The kctx has other operational CSGs, attach it if not yet done */ ++ if (list_empty(&info->mgr_link)) { ++ dev_dbg(kctx->kbdev->dev, ++ "CSG_%d_%d_%d evict, add kctx to reclaim manager", ++ group->kctx->tgid, group->kctx->id, group->handle); + -+ return err; -+} ++ attach_ctx_to_heap_reclaim_mgr(kctx); ++ } ++ } else { ++ /* The kctx is a zombie after the group eviction, drop it out */ ++ dev_dbg(kctx->kbdev->dev, ++ "CSG_%d_%d_%d evict leading to zombie kctx, dettach from reclaim manager", ++ group->kctx->tgid, group->kctx->id, group->handle); + -+/** -+ * kbase_csf_early_term() - Early termination for firmware & scheduler. -+ * @kbdev: Device pointer -+ */ -+static void kbase_csf_early_term(struct kbase_device *kbdev) -+{ -+ kbase_csf_scheduler_early_term(kbdev); -+ kbase_csf_firmware_early_term(kbdev); ++ detach_ctx_from_heap_reclaim_mgr(kctx); ++ } ++ } +} + -+/** -+ * kbase_csf_late_init - late initialization for firmware. -+ * @kbdev: Device pointer -+ * -+ * Return: 0 on success, error code otherwise. -+ */ -+static int kbase_csf_late_init(struct kbase_device *kbdev) ++void kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(struct kbase_queue_group *group) +{ -+ int err = kbase_csf_firmware_late_init(kbdev); ++ struct kbase_context *kctx = group->kctx; ++ struct kbase_csf_ctx_heap_reclaim_info *info = &kctx->csf.sched.heap_info; + -+ return err; -+} ++ lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); + -+/** -+ * kbase_device_hwcnt_watchdog_if_init - Create hardware counter watchdog -+ * interface. -+ * @kbdev: Device pointer -+ * -+ * Return: 0 if successful or a negative error code on failure. -+ */ -+static int kbase_device_hwcnt_watchdog_if_init(struct kbase_device *kbdev) -+{ -+ return kbase_hwcnt_watchdog_if_timer_create( -+ &kbdev->hwcnt_watchdog_timer); -+} ++ if (!WARN_ON(info->on_slot_grps == 0)) ++ info->on_slot_grps--; ++ /* If the kctx has no CSGs on-slot, attach it to scheduler's reclaim manager */ ++ if (info->on_slot_grps == 0) { ++ dev_dbg(kctx->kbdev->dev, "CSG_%d_%d_%d off-slot, add kctx to reclaim manager", ++ group->kctx->tgid, group->kctx->id, group->handle); + -+/** -+ * kbase_device_hwcnt_watchdog_if_term - Terminate hardware counter watchdog -+ * interface. -+ * @kbdev: Device pointer -+ */ -+static void kbase_device_hwcnt_watchdog_if_term(struct kbase_device *kbdev) -+{ -+ kbase_hwcnt_watchdog_if_timer_destroy(&kbdev->hwcnt_watchdog_timer); ++ attach_ctx_to_heap_reclaim_mgr(kctx); ++ } +} + -+/** -+ * kbase_device_hwcnt_backend_csf_if_init - Create hardware counter backend -+ * firmware interface. -+ * @kbdev: Device pointer -+ * Return: 0 if successful or a negative error code on failure. -+ */ -+static int kbase_device_hwcnt_backend_csf_if_init(struct kbase_device *kbdev) ++static unsigned long reclaim_unused_heap_pages(struct kbase_device *kbdev) +{ -+ return kbase_hwcnt_backend_csf_if_fw_create( -+ kbdev, &kbdev->hwcnt_backend_csf_if_fw); -+} ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ struct kbase_csf_sched_heap_reclaim_mgr *const mgr = &scheduler->reclaim_mgr; ++ unsigned long total_freed_pages = 0; ++ int prio; + -+/** -+ * kbase_device_hwcnt_backend_csf_if_term - Terminate hardware counter backend -+ * firmware interface. -+ * @kbdev: Device pointer -+ */ -+static void kbase_device_hwcnt_backend_csf_if_term(struct kbase_device *kbdev) -+{ -+ kbase_hwcnt_backend_csf_if_fw_destroy(&kbdev->hwcnt_backend_csf_if_fw); -+} ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+/** -+ * kbase_device_hwcnt_backend_csf_init - Create hardware counter backend. -+ * @kbdev: Device pointer -+ * -+ * Return: 0 if successful or a negative error code on failure. -+ */ -+static int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev) -+{ -+ return kbase_hwcnt_backend_csf_create( -+ &kbdev->hwcnt_backend_csf_if_fw, -+ KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT, -+ &kbdev->hwcnt_watchdog_timer, &kbdev->hwcnt_gpu_iface); -+} ++ for (prio = KBASE_QUEUE_GROUP_PRIORITY_LOW; ++ total_freed_pages < HEAP_RECLAIM_SCAN_BATCH_SIZE && ++ prio >= KBASE_QUEUE_GROUP_PRIORITY_REALTIME; ++ prio--) { ++ struct kbase_csf_ctx_heap_reclaim_info *info, *tmp; ++ u32 cnt_ctxs = 0; + -+/** -+ * kbase_device_hwcnt_backend_csf_term - Terminate hardware counter backend. -+ * @kbdev: Device pointer -+ */ -+static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev) -+{ -+ kbase_hwcnt_backend_csf_destroy(&kbdev->hwcnt_gpu_iface); -+} ++ list_for_each_entry_safe(info, tmp, &scheduler->reclaim_mgr.ctx_lists[prio], ++ mgr_link) { ++ struct kbase_context *kctx = ++ container_of(info, struct kbase_context, csf.sched.heap_info); ++ u32 freed_pages = kbase_csf_tiler_heap_scan_kctx_unused_pages( ++ kctx, info->nr_est_unused_pages); + -+static const struct kbase_device_init dev_init[] = { -+#if !IS_ENABLED(CONFIG_MALI_REAL_HW) -+ { kbase_gpu_device_create, kbase_gpu_device_destroy, -+ "Dummy model initialization failed" }, -+#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ -+ { assign_irqs, NULL, "IRQ search failed" }, -+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ -+#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ { registers_map, registers_unmap, "Register map failed" }, -+#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ -+ { power_control_init, power_control_term, "Power control initialization failed" }, -+ { kbase_device_io_history_init, kbase_device_io_history_term, -+ "Register access history initialization failed" }, -+ { kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" }, -+ { kbase_device_populate_max_freq, NULL, "Populating max frequency failed" }, -+ { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" }, -+ { kbase_device_misc_init, kbase_device_misc_term, -+ "Miscellaneous device initialization failed" }, -+ { kbase_device_pcm_dev_init, kbase_device_pcm_dev_term, -+ "Priority control manager initialization failed" }, -+ { kbase_ctx_sched_init, kbase_ctx_sched_term, "Context scheduler initialization failed" }, -+ { kbase_mem_init, kbase_mem_term, "Memory subsystem initialization failed" }, -+ { kbase_csf_protected_memory_init, kbase_csf_protected_memory_term, -+ "Protected memory allocator initialization failed" }, -+ { kbase_device_coherency_init, NULL, "Device coherency init failed" }, -+ { kbase_protected_mode_init, kbase_protected_mode_term, -+ "Protected mode subsystem initialization failed" }, -+ { kbase_device_list_init, kbase_device_list_term, "Device list setup failed" }, -+ { kbase_device_timeline_init, kbase_device_timeline_term, -+ "Timeline stream initialization failed" }, -+ { kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term, -+ "Clock rate trace manager initialization failed" }, -+ { kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term, -+ "GPU hwcnt backend watchdog interface creation failed" }, -+ { kbase_device_hwcnt_backend_csf_if_init, kbase_device_hwcnt_backend_csf_if_term, -+ "GPU hwcnt backend CSF interface creation failed" }, -+ { kbase_device_hwcnt_backend_csf_init, kbase_device_hwcnt_backend_csf_term, -+ "GPU hwcnt backend creation failed" }, -+ { kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term, -+ "GPU hwcnt context initialization failed" }, -+ { kbase_csf_early_init, kbase_csf_early_term, "Early CSF initialization failed" }, -+ { kbase_backend_late_init, kbase_backend_late_term, "Late backend initialization failed" }, -+ { kbase_csf_late_init, NULL, "Late CSF initialization failed" }, -+ { NULL, kbase_device_firmware_hwcnt_term, NULL }, -+ { kbase_debug_csf_fault_init, kbase_debug_csf_fault_term, -+ "CSF fault debug initialization failed" }, -+ { kbase_device_debugfs_init, kbase_device_debugfs_term, "DebugFS initialization failed" }, -+ /* Sysfs init needs to happen before registering the device with -+ * misc_register(), otherwise it causes a race condition between -+ * registering the device and a uevent event being generated for -+ * userspace, causing udev rules to run which might expect certain -+ * sysfs attributes present. As a result of the race condition -+ * we avoid, some Mali sysfs entries may have appeared to udev -+ * to not exist. -+ * For more information, see -+ * https://www.kernel.org/doc/Documentation/driver-model/device.txt, the -+ * paragraph that starts with "Word of warning", currently the -+ * second-last paragraph. -+ */ -+ { kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed" }, -+ { kbase_device_misc_register, kbase_device_misc_deregister, -+ "Misc device registration failed" }, -+ { kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, -+ "GPU property population failed" }, -+ { kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" }, -+#if IS_ENABLED(CONFIG_MALI_CORESIGHT) -+ { kbase_debug_coresight_csf_init, kbase_debug_coresight_csf_term, -+ "Coresight initialization failed" }, -+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ -+}; ++ if (freed_pages) { ++ /* Remove the freed pages from the manager retained estimate. The ++ * accumulated removals from the kctx should not exceed the kctx ++ * initially notified contribution amount: ++ * info->nr_est_unused_pages. ++ */ ++ u32 rm_cnt = MIN(info->nr_est_unused_pages - info->nr_freed_pages, ++ freed_pages); + -+static void kbase_device_term_partial(struct kbase_device *kbdev, -+ unsigned int i) -+{ -+ while (i-- > 0) { -+ if (dev_init[i].term) -+ dev_init[i].term(kbdev); -+ } -+} ++ WARN_ON(atomic_sub_return(rm_cnt, &mgr->unused_pages) < 0); + -+void kbase_device_term(struct kbase_device *kbdev) -+{ -+ kbase_device_term_partial(kbdev, ARRAY_SIZE(dev_init)); -+ kbase_mem_halt(kbdev); -+} ++ /* tracking the freed pages, before a potential detach call */ ++ info->nr_freed_pages += freed_pages; ++ total_freed_pages += freed_pages; + -+int kbase_device_init(struct kbase_device *kbdev) -+{ -+ int err = 0; -+ unsigned int i = 0; ++ schedule_work(&kctx->jit_work); ++ } + -+ dev_info(kbdev->dev, "Kernel DDK version %s", MALI_RELEASE_NAME); ++ /* If the kctx can't offer anymore, drop it from the reclaim manger, ++ * otherwise leave it remaining in. If the kctx changes its state (i.e. ++ * some CSGs becoming on-slot), the scheduler will pull it out. ++ */ ++ if (info->nr_freed_pages >= info->nr_est_unused_pages || freed_pages == 0) ++ detach_ctx_from_heap_reclaim_mgr(kctx); + -+ kbase_device_id_init(kbdev); -+ kbase_disjoint_init(kbdev); ++ cnt_ctxs++; + -+ for (i = 0; i < ARRAY_SIZE(dev_init); i++) { -+ if (dev_init[i].init) { -+ err = dev_init[i].init(kbdev); -+ if (err) { -+ dev_err(kbdev->dev, "%s error = %d\n", -+ dev_init[i].err_mes, err); -+ kbase_device_term_partial(kbdev, i); ++ /* Enough has been freed, break to avoid holding the lock too long */ ++ if (total_freed_pages >= HEAP_RECLAIM_SCAN_BATCH_SIZE) + break; -+ } + } ++ ++ dev_dbg(kbdev->dev, "Reclaim free heap pages: %lu (cnt_ctxs: %u, prio: %d)", ++ total_freed_pages, cnt_ctxs, prio); + } + -+ return err; ++ dev_dbg(kbdev->dev, "Reclaim free total heap pages: %lu (across all CSG priority)", ++ total_freed_pages); ++ ++ return total_freed_pages; +} + -+/** -+ * kbase_device_hwcnt_csf_deferred_init - Initialize CSF deferred HWC components -+ * -+ * @kbdev: An instance of the GPU platform device, allocated from the probe -+ * method of the driver. -+ * -+ * Hardware counter components depending on firmware are initialized after CSF -+ * firmware is loaded. -+ * -+ * Return: 0 on success. An error code on failure. -+ */ -+static int kbase_device_hwcnt_csf_deferred_init(struct kbase_device *kbdev) ++static unsigned long kbase_csf_tiler_heap_reclaim_count_free_pages(struct kbase_device *kbdev, ++ struct shrink_control *sc) +{ -+ int ret = 0; ++ struct kbase_csf_sched_heap_reclaim_mgr *mgr = &kbdev->csf.scheduler.reclaim_mgr; ++ unsigned long page_cnt = atomic_read(&mgr->unused_pages); + -+ /* For CSF GPUs, HWC metadata needs to query information from CSF -+ * firmware, so the initialization of HWC metadata only can be called -+ * after firmware initialized, but firmware initialization depends on -+ * HWC backend initialization, so we need to separate HWC backend -+ * metadata initialization from HWC backend initialization. -+ */ -+ ret = kbase_hwcnt_backend_csf_metadata_init(&kbdev->hwcnt_gpu_iface); -+ if (ret) { -+ dev_err(kbdev->dev, -+ "GPU hwcnt backend metadata creation failed"); -+ return ret; -+ } ++ dev_dbg(kbdev->dev, "Reclaim count unused pages (estimate): %lu", page_cnt); + -+ ret = kbase_hwcnt_virtualizer_init( -+ kbdev->hwcnt_gpu_ctx, -+ KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS, -+ &kbdev->hwcnt_gpu_virt); -+ if (ret) { -+ dev_err(kbdev->dev, -+ "GPU hwcnt virtualizer initialization failed"); -+ goto virt_fail; -+ } ++ return page_cnt; ++} + -+ ret = kbase_vinstr_init(kbdev->hwcnt_gpu_virt, &kbdev->vinstr_ctx); -+ if (ret) { -+ dev_err(kbdev->dev, -+ "Virtual instrumentation initialization failed"); -+ goto vinstr_fail; -+ } ++static unsigned long kbase_csf_tiler_heap_reclaim_scan_free_pages(struct kbase_device *kbdev, ++ struct shrink_control *sc) ++{ ++ struct kbase_csf_sched_heap_reclaim_mgr *mgr = &kbdev->csf.scheduler.reclaim_mgr; ++ unsigned long freed = 0; ++ unsigned long avail = 0; + -+ ret = kbase_kinstr_prfcnt_init(kbdev->hwcnt_gpu_virt, -+ &kbdev->kinstr_prfcnt_ctx); -+ if (ret) { -+ dev_err(kbdev->dev, -+ "Performance counter instrumentation initialization failed"); -+ goto kinstr_prfcnt_fail; ++ /* If Scheduler is busy in action, return 0 */ ++ if (!mutex_trylock(&kbdev->csf.scheduler.lock)) { ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ ++ /* Wait for roughly 2-ms */ ++ wait_event_timeout(kbdev->csf.event_wait, (scheduler->state != SCHED_BUSY), ++ msecs_to_jiffies(2)); ++ if (!mutex_trylock(&kbdev->csf.scheduler.lock)) { ++ dev_dbg(kbdev->dev, "Tiler heap reclaim scan see device busy (freed: 0)"); ++ return 0; ++ } + } + -+ return ret; ++ avail = atomic_read(&mgr->unused_pages); ++ if (avail) ++ freed = reclaim_unused_heap_pages(kbdev); + -+kinstr_prfcnt_fail: -+ kbase_vinstr_term(kbdev->vinstr_ctx); ++ mutex_unlock(&kbdev->csf.scheduler.lock); + -+vinstr_fail: -+ kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); ++#if (KERNEL_VERSION(4, 14, 0) <= LINUX_VERSION_CODE) ++ if (freed > sc->nr_to_scan) ++ sc->nr_scanned = freed; ++#endif /* (KERNEL_VERSION(4, 14, 0) <= LINUX_VERSION_CODE) */ + -+virt_fail: -+ kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface); -+ return ret; ++ dev_info(kbdev->dev, "Tiler heap reclaim scan freed pages: %lu (unused: %lu)", freed, ++ avail); ++ ++ /* On estimate suggesting available, yet actual free failed, return STOP */ ++ if (avail && !freed) ++ return SHRINK_STOP; ++ else ++ return freed; +} + -+/** -+ * kbase_csf_firmware_deferred_init - Load and initialize CSF firmware -+ * -+ * @kbdev: An instance of the GPU platform device, allocated from the probe -+ * method of the driver. -+ * -+ * Called when a device file is opened for the first time. -+ * To meet Android GKI vendor guideline, firmware load is deferred at -+ * the time when @ref kbase_open is called for the first time. -+ * -+ * Return: 0 on success. An error code on failure. -+ */ -+static int kbase_csf_firmware_deferred_init(struct kbase_device *kbdev) ++static unsigned long kbase_csf_tiler_heap_reclaim_count_objects(struct shrinker *s, ++ struct shrink_control *sc) +{ -+ int err = 0; -+ -+ lockdep_assert_held(&kbdev->fw_load_lock); ++ struct kbase_device *kbdev = ++ container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim); + -+ err = kbase_csf_firmware_load_init(kbdev); -+ if (!err) { -+ unsigned long flags; ++ return kbase_csf_tiler_heap_reclaim_count_free_pages(kbdev, sc); ++} + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->pm.backend.mcu_state = KBASE_MCU_ON; -+ kbdev->csf.firmware_inited = true; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } else { -+ dev_err(kbdev->dev, "Firmware initialization failed"); -+ } ++static unsigned long kbase_csf_tiler_heap_reclaim_scan_objects(struct shrinker *s, ++ struct shrink_control *sc) ++{ ++ struct kbase_device *kbdev = ++ container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim); + -+ return err; ++ return kbase_csf_tiler_heap_reclaim_scan_free_pages(kbdev, sc); +} + -+int kbase_device_firmware_init_once(struct kbase_device *kbdev) ++void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx) +{ -+ int ret = 0; ++ /* Per-kctx heap_info object initialization */ ++ memset(&kctx->csf.sched.heap_info, 0, sizeof(struct kbase_csf_ctx_heap_reclaim_info)); ++ INIT_LIST_HEAD(&kctx->csf.sched.heap_info.mgr_link); ++} + -+ mutex_lock(&kbdev->fw_load_lock); ++void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct shrinker *reclaim = &scheduler->reclaim_mgr.heap_reclaim; ++ u8 prio; + -+ if (!kbdev->csf.firmware_inited) { -+ kbase_pm_context_active(kbdev); ++ for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++ prio++) ++ INIT_LIST_HEAD(&scheduler->reclaim_mgr.ctx_lists[prio]); + -+ ret = kbase_csf_firmware_deferred_init(kbdev); -+ if (ret) -+ goto out; ++ atomic_set(&scheduler->reclaim_mgr.unused_pages, 0); + -+ ret = kbase_device_hwcnt_csf_deferred_init(kbdev); -+ if (ret) { -+ kbase_csf_firmware_unload_term(kbdev); -+ goto out; -+ } ++ reclaim->count_objects = kbase_csf_tiler_heap_reclaim_count_objects; ++ reclaim->scan_objects = kbase_csf_tiler_heap_reclaim_scan_objects; ++ reclaim->seeks = HEAP_SHRINKER_SEEKS; ++ reclaim->batch = HEAP_SHRINKER_BATCH; ++} + -+ kbase_csf_debugfs_init(kbdev); -+ kbase_timeline_io_debugfs_init(kbdev); -+out: -+ kbase_pm_context_idle(kbdev); -+ } ++void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ u8 prio; + -+ mutex_unlock(&kbdev->fw_load_lock); ++ for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++ prio++) ++ WARN_ON(!list_empty(&scheduler->reclaim_mgr.ctx_lists[prio])); + -+ return ret; ++ WARN_ON(atomic_read(&scheduler->reclaim_mgr.unused_pages)); +} -+KBASE_EXPORT_TEST_API(kbase_device_firmware_init_once); -diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h new file mode 100644 -index 000000000..2abd62aaa +index 000000000..b6e580e48 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c -@@ -0,0 +1,252 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h +@@ -0,0 +1,80 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -178641,247 +180714,258 @@ index 000000000..2abd62aaa + * + */ + ++#ifndef _KBASE_CSF_TILER_HEAP_RECLAIM_H_ ++#define _KBASE_CSF_TILER_HEAP_RECLAIM_H_ ++ +#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include + +/** -+ * kbase_report_gpu_fault - Report a GPU fault of the device. ++ * kbase_csf_tiler_heap_reclaim_sched_notify_grp_active - Notifier function for the scheduler ++ * to use when a group is put on-slot. + * -+ * @kbdev: Kbase device pointer -+ * @status: Fault status -+ * @as_nr: Faulty address space -+ * @as_valid: true if address space is valid ++ * @group: Pointer to the group object that has been placed on-slot for running. + * -+ * This function is called from the interrupt handler when a GPU fault occurs. + */ -+static void kbase_report_gpu_fault(struct kbase_device *kbdev, u32 status, -+ u32 as_nr, bool as_valid) -+{ -+ u64 address = (u64) kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; ++void kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(struct kbase_queue_group *group); + -+ address |= kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); ++/** ++ * kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict - Notifier function for the scheduler ++ * to use when a group is evicted out of the schedulder's scope, i.e no run of ++ * the group is possible afterwards. ++ * ++ * @group: Pointer to the group object that has been evicted. ++ * ++ */ ++void kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(struct kbase_queue_group *group); + -+ /* Report GPU fault for all contexts in case either -+ * the address space is invalid or it's MCU address space. -+ */ -+ kbase_mmu_gpu_fault_interrupt(kbdev, status, as_nr, address, as_valid); -+} ++/** ++ * kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend - Notifier function for the scheduler ++ * to use when a group is suspended from running, but could resume in future. ++ * ++ * @group: Pointer to the group object that is in suspended state. ++ * ++ */ ++void kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(struct kbase_queue_group *group); + -+static void kbase_gpu_fault_interrupt(struct kbase_device *kbdev) -+{ -+ const u32 status = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_FAULTSTATUS)); -+ const bool as_valid = status & GPU_FAULTSTATUS_JASID_VALID_FLAG; -+ const u32 as_nr = (status & GPU_FAULTSTATUS_JASID_MASK) >> -+ GPU_FAULTSTATUS_JASID_SHIFT; -+ bool bus_fault = (status & GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) == -+ GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT; ++/** ++ * kbase_csf_tiler_heap_reclaim_ctx_init - Initializer on per context data fields for use ++ * with the tiler heap reclaim manager. ++ * ++ * @kctx: Pointer to the kbase_context. ++ * ++ */ ++void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx); + -+ if (bus_fault) { -+ /* If as_valid, reset gpu when ASID is for MCU. */ -+ if (!as_valid || (as_nr == MCU_AS_NR)) { -+ kbase_report_gpu_fault(kbdev, status, as_nr, as_valid); ++/** ++ * kbase_csf_tiler_heap_reclaim_mgr_init - Initializer for the tiler heap reclaim manger. ++ * ++ * @kbdev: Pointer to the device. ++ * ++ */ ++void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev); + -+ dev_err(kbdev->dev, "GPU bus fault triggering gpu-reset ...\n"); -+ if (kbase_prepare_to_reset_gpu( -+ kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) -+ kbase_reset_gpu(kbdev); -+ } else { -+ /* Handle Bus fault */ -+ if (kbase_mmu_bus_fault_interrupt(kbdev, status, as_nr)) -+ dev_warn(kbdev->dev, -+ "fail to handle GPU bus fault ...\n"); -+ } -+ } else -+ kbase_report_gpu_fault(kbdev, status, as_nr, as_valid); ++/** ++ * kbase_csf_tiler_heap_reclaim_mgr_term - Termination call for the tiler heap reclaim manger. ++ * ++ * @kbdev: Pointer to the device. ++ * ++ */ ++void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev); + -+} ++#endif +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.c +new file mode 100644 +index 000000000..ea6c11624 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.c +@@ -0,0 +1,177 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) -+{ -+ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, val); -+ if (val & GPU_FAULT) -+ kbase_gpu_fault_interrupt(kbdev); ++#include ++#include ++#include ++#include ++#include + -+ if (val & GPU_PROTECTED_FAULT) { -+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; -+ unsigned long flags; ++#include "mali_kbase.h" ++#include "mali_kbase_config_defaults.h" ++#include "mali_kbase_csf_firmware.h" ++#include "mali_kbase_csf_timeout.h" ++#include "mali_kbase_reset_gpu.h" ++#include "backend/gpu/mali_kbase_pm_internal.h" + -+ dev_err_ratelimited(kbdev->dev, "GPU fault in protected mode"); ++/** ++ * set_timeout - set a new global progress timeout. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @timeout: the maximum number of GPU cycles without forward progress to allow ++ * to elapse before terminating a GPU command queue group. ++ * ++ * Return: 0 on success, or negative on failure ++ * (e.g. -ERANGE if the requested timeout is too large). ++ */ ++static int set_timeout(struct kbase_device *const kbdev, u64 const timeout) ++{ ++ if (timeout > GLB_PROGRESS_TIMER_TIMEOUT_MAX) { ++ dev_err(kbdev->dev, "Timeout %llu is too large.\n", timeout); ++ return -ERANGE; ++ } + -+ /* Mask the protected fault interrupt to avoid the potential -+ * deluge of such interrupts. It will be unmasked on GPU reset. -+ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), -+ GPU_IRQ_REG_ALL & ~GPU_PROTECTED_FAULT); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ dev_dbg(kbdev->dev, "New progress timeout: %llu cycles\n", timeout); + -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ if (!WARN_ON(!kbase_csf_scheduler_protected_mode_in_use( -+ kbdev))) { -+ struct base_gpu_queue_group_error const -+ err_payload = { .error_type = -+ BASE_GPU_QUEUE_GROUP_ERROR_FATAL, -+ .payload = { -+ .fatal_group = { -+ .status = -+ GPU_EXCEPTION_TYPE_SW_FAULT_0, -+ } } }; ++ atomic64_set(&kbdev->csf.progress_timeout, timeout); + -+ kbase_debug_csf_fault_notify(kbdev, scheduler->active_protm_grp->kctx, -+ DF_GPU_PROTECTED_FAULT); ++ return 0; ++} + -+ scheduler->active_protm_grp->faulted = true; -+ kbase_csf_add_group_fatal_error( -+ scheduler->active_protm_grp, &err_payload); -+ kbase_event_wakeup(scheduler->active_protm_grp->kctx); -+ } -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++/** ++ * progress_timeout_store - Store the progress_timeout device attribute. ++ * @dev: The device that has the attribute. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The value written to the sysfs file. ++ * @count: The number of bytes written to the sysfs file. ++ * ++ * This function is called when the progress_timeout sysfs file is written to. ++ * It checks the data written, and if valid updates the progress timeout value. ++ * The function also checks gpu reset status, if the gpu is in reset process, ++ * the function will return an error code (-EBUSY), and no change for timeout ++ * value. ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t progress_timeout_store(struct device * const dev, ++ struct device_attribute * const attr, const char * const buf, ++ size_t const count) ++{ ++ struct kbase_device *const kbdev = dev_get_drvdata(dev); ++ int err; ++ u64 timeout; + -+ if (kbase_prepare_to_reset_gpu( -+ kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) -+ kbase_reset_gpu(kbdev); ++ if (!kbdev) ++ return -ENODEV; + -+ /* Defer the clearing to the GPU reset sequence */ -+ val &= ~GPU_PROTECTED_FAULT; ++ err = kbase_reset_gpu_try_prevent(kbdev); ++ if (err) { ++ dev_warn(kbdev->dev, ++ "Couldn't process progress_timeout write operation for GPU reset.\n"); ++ return -EBUSY; + } + -+ if (val & RESET_COMPLETED) -+ kbase_pm_reset_done(kbdev); -+ -+ /* Defer clearing CLEAN_CACHES_COMPLETED to kbase_clean_caches_done. -+ * We need to acquire hwaccess_lock to avoid a race condition with -+ * kbase_gpu_cache_flush_and_busy_wait -+ */ -+ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val & ~CLEAN_CACHES_COMPLETED); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val & ~CLEAN_CACHES_COMPLETED); -+ -+#ifdef KBASE_PM_RUNTIME -+ if (val & DOORBELL_MIRROR) { -+ unsigned long flags; -+ -+ dev_dbg(kbdev->dev, "Doorbell mirror interrupt received"); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_pm_disable_db_mirror_interrupt(kbdev); -+ kbdev->pm.backend.exit_gpu_sleep_mode = true; -+ kbase_csf_scheduler_invoke_tick(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } -+#endif ++ err = kstrtou64(buf, 0, &timeout); ++ if (err) ++ dev_err(kbdev->dev, ++ "Couldn't process progress_timeout write operation.\n" ++ "Use format \n"); ++ else ++ err = set_timeout(kbdev, timeout); + -+ /* kbase_pm_check_transitions (called by kbase_pm_power_changed) must -+ * be called after the IRQ has been cleared. This is because it might -+ * trigger further power transitions and we don't want to miss the -+ * interrupt raised to notify us that these further transitions have -+ * finished. The same applies to kbase_clean_caches_done() - if another -+ * clean was queued, it might trigger another clean, which might -+ * generate another interrupt which shouldn't be missed. -+ */ ++ if (!err) { ++ kbase_csf_scheduler_pm_active(kbdev); + -+ if (val & CLEAN_CACHES_COMPLETED) -+ kbase_clean_caches_done(kbdev); ++ err = kbase_csf_scheduler_wait_mcu_active(kbdev); ++ if (!err) ++ err = kbase_csf_firmware_set_timeout(kbdev, timeout); + -+ if (val & (POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ)) { -+ kbase_pm_power_changed(kbdev); -+ } else if (val & CLEAN_CACHES_COMPLETED) { -+ /* If cache line evict messages can be lost when shader cores -+ * power down then we need to flush the L2 cache before powering -+ * down cores. When the flush completes, the shaders' state -+ * machine needs to be re-invoked to proceed with powering down -+ * cores. -+ */ -+ if (kbdev->pm.backend.l2_always_on || -+ kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) -+ kbase_pm_power_changed(kbdev); ++ kbase_csf_scheduler_pm_idle(kbdev); + } + -+ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val); -+} -+ -+#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+bool kbase_is_register_accessible(u32 offset) -+{ -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ if (((offset >= MCU_SUBSYSTEM_BASE) && (offset < IPA_CONTROL_BASE)) || -+ ((offset >= GPU_CONTROL_MCU_BASE) && (offset < USER_BASE))) { -+ WARN(1, "Invalid register offset 0x%x", offset); -+ return false; -+ } -+#endif ++ kbase_reset_gpu_allow(kbdev); ++ if (err) ++ return err; + -+ return true; ++ return count; +} -+#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ + -+#if IS_ENABLED(CONFIG_MALI_REAL_HW) -+void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) ++/** ++ * progress_timeout_show - Show the progress_timeout device attribute. ++ * @dev: The device that has the attribute. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the global timeout. ++ * ++ * This function is called to get the progress timeout value. ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t progress_timeout_show(struct device * const dev, ++ struct device_attribute * const attr, char * const buf) +{ -+ if (WARN_ON(!kbdev->pm.backend.gpu_powered)) -+ return; ++ struct kbase_device *const kbdev = dev_get_drvdata(dev); ++ int err; + -+ if (WARN_ON(kbdev->dev == NULL)) -+ return; ++ if (!kbdev) ++ return -ENODEV; + -+ if (!kbase_is_register_accessible(offset)) -+ return; ++ err = scnprintf(buf, PAGE_SIZE, "%llu\n", kbase_csf_timeout_get(kbdev)); + -+ writel(value, kbdev->reg + offset); ++ return err; + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ if (unlikely(kbdev->io_history.enabled)) -+ kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, -+ value, 1); -+#endif /* CONFIG_DEBUG_FS */ -+ dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value); +} -+KBASE_EXPORT_TEST_API(kbase_reg_write); + -+u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) ++static DEVICE_ATTR_RW(progress_timeout); ++ ++int kbase_csf_timeout_init(struct kbase_device *const kbdev) +{ -+ u32 val; ++ u64 timeout = DEFAULT_PROGRESS_TIMEOUT; ++ int err; + -+ if (WARN_ON(!kbdev->pm.backend.gpu_powered)) -+ return 0; ++#if IS_ENABLED(CONFIG_OF) ++ err = of_property_read_u64(kbdev->dev->of_node, ++ "progress_timeout", &timeout); ++ if (!err) ++ dev_info(kbdev->dev, "Found progress_timeout = %llu in Devicetree\n", ++ timeout); ++#endif + -+ if (WARN_ON(kbdev->dev == NULL)) -+ return 0; ++ err = set_timeout(kbdev, timeout); ++ if (err) ++ return err; + -+ if (!kbase_is_register_accessible(offset)) -+ return 0; ++ err = sysfs_create_file(&kbdev->dev->kobj, ++ &dev_attr_progress_timeout.attr); ++ if (err) ++ dev_err(kbdev->dev, "SysFS file creation failed\n"); + -+ val = readl(kbdev->reg + offset); ++ return err; ++} + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ if (unlikely(kbdev->io_history.enabled)) -+ kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, -+ val, 0); -+#endif /* CONFIG_DEBUG_FS */ -+ dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val); ++void kbase_csf_timeout_term(struct kbase_device * const kbdev) ++{ ++ sysfs_remove_file(&kbdev->dev->kobj, &dev_attr_progress_timeout.attr); ++} + -+ return val; ++u64 kbase_csf_timeout_get(struct kbase_device *const kbdev) ++{ ++ return atomic64_read(&kbdev->csf.progress_timeout); +} -+KBASE_EXPORT_TEST_API(kbase_reg_read); -+#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ -diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.h new file mode 100644 -index 000000000..38223af21 +index 000000000..b406eaad2 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c -@@ -0,0 +1,143 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.h +@@ -0,0 +1,66 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -178899,488 +180983,508 @@ index 000000000..38223af21 + * + */ + -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++#ifndef _KBASE_CSF_TIMEOUT_H_ ++#define _KBASE_CSF_TIMEOUT_H_ ++ ++struct kbase_device; + +/** -+ * kbase_report_gpu_fault - Report a GPU fault. -+ * @kbdev: Kbase device pointer -+ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS -+ * was also set ++ * kbase_csf_timeout_init - Initialize the progress timeout. + * -+ * This function is called from the interrupt handler when a GPU fault occurs. -+ * It reports the details of the fault using dev_warn(). ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * Must be zero-initialized. ++ * ++ * The progress timeout is the number of GPU clock cycles allowed to elapse ++ * before the driver terminates a GPU command queue group in which a task is ++ * making no forward progress on an endpoint (e.g. a shader core). This function ++ * determines the initial value and also creates a sysfs file to allow the ++ * timeout to be reconfigured later. ++ * ++ * Reconfigures the global firmware interface to enable the current timeout. ++ * ++ * Return: 0 on success, or negative on failure. + */ -+static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple) -+{ -+ u32 status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)); -+ u64 address = (u64) kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; -+ -+ address |= kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); ++int kbase_csf_timeout_init(struct kbase_device *kbdev); + -+ dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx", -+ status, -+ kbase_gpu_exception_name(status & 0xFF), -+ address); -+ if (multiple) -+ dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n"); ++/** ++ * kbase_csf_timeout_term - Terminate the progress timeout. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Removes the sysfs file which allowed the timeout to be reconfigured. ++ * Does nothing if called on a zero-initialized object. ++ */ ++void kbase_csf_timeout_term(struct kbase_device *kbdev); + -+} ++/** ++ * kbase_csf_timeout_get - get the current global progress timeout. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Return: the maximum number of GPU cycles that is allowed to elapse without ++ * forward progress before the driver terminates a GPU command queue ++ * group. ++ */ ++u64 kbase_csf_timeout_get(struct kbase_device *const kbdev); + -+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) -+{ -+ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, val); -+ if (val & GPU_FAULT) -+ kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS); ++#endif /* _KBASE_CSF_TIMEOUT_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c +new file mode 100644 +index 000000000..6859d6529 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c +@@ -0,0 +1,441 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ if (val & RESET_COMPLETED) -+ kbase_pm_reset_done(kbdev); ++#include "mali_kbase_csf_tl_reader.h" + -+ /* Defer clearing CLEAN_CACHES_COMPLETED to kbase_clean_caches_done. -+ * We need to acquire hwaccess_lock to avoid a race condition with -+ * kbase_gpu_cache_flush_and_busy_wait -+ */ -+ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val & ~CLEAN_CACHES_COMPLETED); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val & ~CLEAN_CACHES_COMPLETED); ++#include "mali_kbase_csf_trace_buffer.h" ++#include "mali_kbase_reset_gpu.h" + -+ /* kbase_instr_hwcnt_sample_done frees the HWCNT pipeline to request another -+ * sample. Therefore this must be called after clearing the IRQ to avoid a -+ * race between clearing and the next sample raising the IRQ again. -+ */ -+ if (val & PRFCNT_SAMPLE_COMPLETED) -+ kbase_instr_hwcnt_sample_done(kbdev); ++#include "tl/mali_kbase_tlstream.h" ++#include "tl/mali_kbase_tl_serialize.h" ++#include "tl/mali_kbase_tracepoints.h" + -+ /* kbase_pm_check_transitions (called by kbase_pm_power_changed) must -+ * be called after the IRQ has been cleared. This is because it might -+ * trigger further power transitions and we don't want to miss the -+ * interrupt raised to notify us that these further transitions have -+ * finished. The same applies to kbase_clean_caches_done() - if another -+ * clean was queued, it might trigger another clean, which might -+ * generate another interrupt which shouldn't be missed. -+ */ ++#include "mali_kbase_pm.h" ++#include "mali_kbase_hwaccess_time.h" + -+ if (val & CLEAN_CACHES_COMPLETED) -+ kbase_clean_caches_done(kbdev); ++#include + -+ if (val & POWER_CHANGED_ALL) { -+ kbase_pm_power_changed(kbdev); -+ } else if (val & CLEAN_CACHES_COMPLETED) { -+ /* If cache line evict messages can be lost when shader cores -+ * power down then we need to flush the L2 cache before powering -+ * down cores. When the flush completes, the shaders' state -+ * machine needs to be re-invoked to proceed with powering down -+ * cores. -+ */ -+ if (kbdev->pm.backend.l2_always_on || -+ kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) -+ kbase_pm_power_changed(kbdev); -+ } ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++#include "tl/mali_kbase_timeline_priv.h" ++#include ++#endif + -+ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val); -+} ++/* Name of the CSFFW timeline tracebuffer. */ ++#define KBASE_CSFFW_TRACEBUFFER_NAME "timeline" ++/* Name of the timeline header metatadata */ ++#define KBASE_CSFFW_TIMELINE_HEADER_NAME "timeline_header" + -+#if IS_ENABLED(CONFIG_MALI_REAL_HW) -+void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) ++/** ++ * struct kbase_csffw_tl_message - CSFFW timeline message. ++ * ++ * @msg_id: Message ID. ++ * @timestamp: Timestamp of the event. ++ * @cycle_counter: Cycle number of the event. ++ * ++ * Contain fields that are common for all CSFFW timeline messages. ++ */ ++struct kbase_csffw_tl_message { ++ u32 msg_id; ++ u64 timestamp; ++ u64 cycle_counter; ++} __packed __aligned(4); ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++static int kbase_csf_tl_debugfs_poll_interval_read(void *data, u64 *val) +{ -+ WARN_ON(!kbdev->pm.backend.gpu_powered); ++ struct kbase_device *kbdev = (struct kbase_device *)data; ++ struct kbase_csf_tl_reader *self = &kbdev->timeline->csf_tl_reader; + -+ writel(value, kbdev->reg + offset); ++ *val = self->timer_interval; + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ if (unlikely(kbdev->io_history.enabled)) -+ kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, -+ value, 1); -+#endif /* CONFIG_DEBUG_FS */ -+ dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value); ++ return 0; +} -+KBASE_EXPORT_TEST_API(kbase_reg_write); + -+u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) ++static int kbase_csf_tl_debugfs_poll_interval_write(void *data, u64 val) +{ -+ u32 val; ++ struct kbase_device *kbdev = (struct kbase_device *)data; ++ struct kbase_csf_tl_reader *self = &kbdev->timeline->csf_tl_reader; + -+ WARN_ON(!kbdev->pm.backend.gpu_powered); ++ if (val > KBASE_CSF_TL_READ_INTERVAL_MAX || val < KBASE_CSF_TL_READ_INTERVAL_MIN) ++ return -EINVAL; + -+ val = readl(kbdev->reg + offset); ++ self->timer_interval = (u32)val; + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ if (unlikely(kbdev->io_history.enabled)) -+ kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, -+ val, 0); -+#endif /* CONFIG_DEBUG_FS */ -+ dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val); ++ return 0; ++} + -+ return val; ++DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_tl_poll_interval_fops, ++ kbase_csf_tl_debugfs_poll_interval_read, ++ kbase_csf_tl_debugfs_poll_interval_write, "%llu\n"); ++ ++void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev) ++{ ++ debugfs_create_file("csf_tl_poll_interval_in_ms", 0644, ++ kbdev->debugfs_instr_directory, kbdev, ++ &kbase_csf_tl_poll_interval_fops); +} -+KBASE_EXPORT_TEST_API(kbase_reg_read); -+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ -diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c -new file mode 100644 -index 000000000..2d3672383 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c -@@ -0,0 +1,344 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++#endif ++ ++/** ++ * tl_reader_overflow_notify() - Emit stream overflow tracepoint. + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * @self: CSFFW TL Reader instance. ++ * @msg_buf_start: Start of the message. ++ * @msg_buf_end: End of the message buffer. ++ */ ++static void tl_reader_overflow_notify( ++ const struct kbase_csf_tl_reader *self, ++ u8 *const msg_buf_start, ++ u8 *const msg_buf_end) ++{ ++ struct kbase_device *kbdev = self->kbdev; ++ struct kbase_csffw_tl_message message = {0}; ++ ++ /* Reuse the timestamp and cycle count from current event if possible */ ++ if (msg_buf_start + sizeof(message) <= msg_buf_end) ++ memcpy(&message, msg_buf_start, sizeof(message)); ++ ++ KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( ++ kbdev, message.timestamp, message.cycle_counter); ++} ++ ++/** ++ * tl_reader_overflow_check() - Check if an overflow has happened + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * @self: CSFFW TL Reader instance. ++ * @event_id: Incoming event id. + * ++ * Return: True, if an overflow has happened, False otherwise. + */ ++static bool tl_reader_overflow_check( ++ struct kbase_csf_tl_reader *self, ++ u16 event_id) ++{ ++ struct kbase_device *kbdev = self->kbdev; ++ bool has_overflow = false; + -+#include -+#include -+#include ++ /* 0 is a special event_id and reserved for the very first tracepoint ++ * after reset, we should skip overflow check when reset happened. ++ */ ++ if (event_id != 0) { ++ has_overflow = self->got_first_event ++ && self->expected_event_id != event_id; + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ if (has_overflow) ++ dev_warn(kbdev->dev, ++ "CSFFW overflow, event_id: %u, expected: %u.", ++ event_id, self->expected_event_id); ++ } + -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+#include -+#endif ++ self->got_first_event = true; ++ self->expected_event_id = event_id + 1; ++ /* When event_id reaches its max value, it skips 0 and wraps to 1. */ ++ if (self->expected_event_id == 0) ++ self->expected_event_id++; + -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ return has_overflow; ++} + +/** -+ * kbase_backend_late_init - Perform any backend-specific initialization. -+ * @kbdev: Device pointer ++ * tl_reader_reset() - Reset timeline tracebuffer reader state machine. + * -+ * Return: 0 on success, or an error code on failure. ++ * @self: CSFFW TL Reader instance. ++ * ++ * Reset the reader to the default state, i.e. set all the ++ * mutable fields to zero. + */ -+static int kbase_backend_late_init(struct kbase_device *kbdev) ++static void tl_reader_reset(struct kbase_csf_tl_reader *self) +{ -+ int err; ++ self->got_first_event = false; ++ self->is_active = false; ++ self->expected_event_id = 0; ++ self->tl_header.btc = 0; ++} + -+ err = kbase_hwaccess_pm_init(kbdev); -+ if (err) -+ return err; ++int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self) ++{ ++ int ret = 0; ++ struct kbase_device *kbdev = self->kbdev; ++ struct kbase_tlstream *stream = self->stream; + -+ err = kbase_reset_gpu_init(kbdev); -+ if (err) -+ goto fail_reset_gpu_init; ++ u8 *read_buffer = self->read_buffer; ++ const size_t read_buffer_size = sizeof(self->read_buffer); + -+ err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT); -+ if (err) -+ goto fail_pm_powerup; ++ u32 bytes_read; ++ u8 *csffw_data_begin; ++ u8 *csffw_data_end; ++ u8 *csffw_data_it; + -+ err = kbase_backend_timer_init(kbdev); -+ if (err) -+ goto fail_timer; ++ unsigned long flags; + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+#if IS_ENABLED(CONFIG_MALI_REAL_HW) -+ if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { -+ dev_err(kbdev->dev, "Interrupt assignment check failed.\n"); -+ err = -EINVAL; -+ goto fail_interrupt_test; -+ } -+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++ spin_lock_irqsave(&self->read_lock, flags); + -+ err = kbase_job_slot_init(kbdev); -+ if (err) -+ goto fail_job_slot; ++ /* If not running, early exit. */ ++ if (!self->is_active) { ++ spin_unlock_irqrestore(&self->read_lock, flags); ++ return -EBUSY; ++ } + -+ /* Do the initialisation of devfreq. -+ * Devfreq needs backend_timer_init() for completion of its -+ * initialisation and it also needs to catch the first callback -+ * occurrence of the runtime_suspend event for maintaining state -+ * coherence with the backend power management, hence needs to be -+ * placed before the kbase_pm_context_idle(). ++ /* Copying the whole buffer in a single shot. We assume ++ * that the buffer will not contain partially written messages. + */ -+ err = kbase_backend_devfreq_init(kbdev); -+ if (err) -+ goto fail_devfreq_init; -+ -+ /* Update gpuprops with L2_FEATURES if applicable */ -+ err = kbase_gpuprops_update_l2_features(kbdev); -+ if (err) -+ goto fail_update_l2_features; -+ -+ err = kbase_backend_time_init(kbdev); -+ if (err) -+ goto fail_update_l2_features; ++ bytes_read = kbase_csf_firmware_trace_buffer_read_data( ++ self->trace_buffer, read_buffer, read_buffer_size); ++ csffw_data_begin = read_buffer; ++ csffw_data_end = read_buffer + bytes_read; + -+ init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); ++ for (csffw_data_it = csffw_data_begin; ++ csffw_data_it < csffw_data_end;) { ++ u32 event_header; ++ u16 event_id; ++ u16 event_size; ++ unsigned long acq_flags; ++ char *buffer; + -+ /* Idle the GPU and/or cores, if the policy wants it to */ -+ kbase_pm_context_idle(kbdev); ++ /* Can we safely read event_id? */ ++ if (csffw_data_it + sizeof(event_header) > csffw_data_end) { ++ dev_warn( ++ kbdev->dev, ++ "Unable to parse CSFFW tracebuffer event header."); ++ ret = -EBUSY; ++ break; ++ } + -+ mutex_init(&kbdev->fw_load_lock); ++ /* Read and parse the event header. */ ++ memcpy(&event_header, csffw_data_it, sizeof(event_header)); ++ event_id = (event_header >> 0) & 0xFFFF; ++ event_size = (event_header >> 16) & 0xFFFF; ++ csffw_data_it += sizeof(event_header); + -+ return 0; ++ /* Detect if an overflow has happened. */ ++ if (tl_reader_overflow_check(self, event_id)) ++ tl_reader_overflow_notify(self, ++ csffw_data_it, ++ csffw_data_end); + -+fail_update_l2_features: -+ kbase_backend_devfreq_term(kbdev); -+fail_devfreq_init: -+ kbase_job_slot_term(kbdev); -+fail_job_slot: ++ /* Can we safely read the message body? */ ++ if (csffw_data_it + event_size > csffw_data_end) { ++ dev_warn(kbdev->dev, ++ "event_id: %u, can't read with event_size: %u.", ++ event_id, event_size); ++ ret = -EBUSY; ++ break; ++ } + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+#if IS_ENABLED(CONFIG_MALI_REAL_HW) -+fail_interrupt_test: -+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++ /* Convert GPU timestamp to CPU timestamp. */ ++ { ++ struct kbase_csffw_tl_message *msg = ++ (struct kbase_csffw_tl_message *) csffw_data_it; ++ msg->timestamp = ++ kbase_backend_time_convert_gpu_to_cpu(kbdev, msg->timestamp); ++ } + -+ kbase_backend_timer_term(kbdev); -+fail_timer: -+ kbase_pm_context_idle(kbdev); -+ kbase_hwaccess_pm_halt(kbdev); -+fail_pm_powerup: -+ kbase_reset_gpu_term(kbdev); -+fail_reset_gpu_init: -+ kbase_hwaccess_pm_term(kbdev); ++ /* Copy the message out to the tl_stream. */ ++ buffer = kbase_tlstream_msgbuf_acquire( ++ stream, event_size, &acq_flags); ++ kbasep_serialize_bytes(buffer, 0, csffw_data_it, event_size); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ csffw_data_it += event_size; ++ } + -+ return err; ++ spin_unlock_irqrestore(&self->read_lock, flags); ++ return ret; +} + -+/** -+ * kbase_backend_late_term - Perform any backend-specific termination. -+ * @kbdev: Device pointer -+ */ -+static void kbase_backend_late_term(struct kbase_device *kbdev) ++static void kbasep_csf_tl_reader_read_callback(struct timer_list *timer) +{ -+ kbase_backend_devfreq_term(kbdev); -+ kbase_job_slot_halt(kbdev); -+ kbase_job_slot_term(kbdev); -+ kbase_backend_timer_term(kbdev); -+ kbase_hwaccess_pm_halt(kbdev); -+ kbase_reset_gpu_term(kbdev); -+ kbase_hwaccess_pm_term(kbdev); -+} ++ struct kbase_csf_tl_reader *self = ++ container_of(timer, struct kbase_csf_tl_reader, read_timer); + -+/** -+ * kbase_device_hwcnt_watchdog_if_init - Create hardware counter watchdog -+ * interface. -+ * @kbdev: Device pointer -+ * Return: 0 on success, or an error code on failure. -+ */ -+static int kbase_device_hwcnt_watchdog_if_init(struct kbase_device *kbdev) -+{ -+ return kbase_hwcnt_watchdog_if_timer_create(&kbdev->hwcnt_watchdog_timer); -+} ++ int rcode; + -+/** -+ * kbase_device_hwcnt_watchdog_if_term - Terminate hardware counter watchdog -+ * interface. -+ * @kbdev: Device pointer -+ */ -+static void kbase_device_hwcnt_watchdog_if_term(struct kbase_device *kbdev) -+{ -+ kbase_hwcnt_watchdog_if_timer_destroy(&kbdev->hwcnt_watchdog_timer); -+} ++ kbase_csf_tl_reader_flush_buffer(self); + -+/** -+ * kbase_device_hwcnt_backend_jm_init - Create hardware counter backend. -+ * @kbdev: Device pointer -+ * Return: 0 on success, or an error code on failure. -+ */ -+static int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev) -+{ -+ return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_jm_backend); ++ rcode = mod_timer(&self->read_timer, ++ jiffies + msecs_to_jiffies(self->timer_interval)); ++ ++ CSTD_UNUSED(rcode); +} + +/** -+ * kbase_device_hwcnt_backend_jm_term - Terminate hardware counter backend. -+ * @kbdev: Device pointer ++ * tl_reader_init_late() - Late CSFFW TL Reader initialization. ++ * ++ * @self: CSFFW TL Reader instance. ++ * @kbdev: Kbase device. ++ * ++ * Late initialization is done once at kbase_csf_tl_reader_start() time. ++ * This is because the firmware image is not parsed ++ * by the kbase_csf_tl_reader_init() time. ++ * ++ * Return: Zero on success, -1 otherwise. + */ -+static void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev) ++static int tl_reader_init_late( ++ struct kbase_csf_tl_reader *self, ++ struct kbase_device *kbdev) +{ -+ kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_jm_backend); ++ struct firmware_trace_buffer *tb; ++ size_t hdr_size = 0; ++ const char *hdr = NULL; ++ ++ if (self->kbdev) ++ return 0; ++ ++ tb = kbase_csf_firmware_get_trace_buffer( ++ kbdev, KBASE_CSFFW_TRACEBUFFER_NAME); ++ hdr = kbase_csf_firmware_get_timeline_metadata( ++ kbdev, KBASE_CSFFW_TIMELINE_HEADER_NAME, &hdr_size); ++ ++ if (!tb) { ++ dev_warn( ++ kbdev->dev, ++ "'%s' tracebuffer is not present in the firmware image.", ++ KBASE_CSFFW_TRACEBUFFER_NAME); ++ return -1; ++ } ++ ++ if (!hdr) { ++ dev_warn( ++ kbdev->dev, ++ "'%s' timeline metadata is not present in the firmware image.", ++ KBASE_CSFFW_TIMELINE_HEADER_NAME); ++ return -1; ++ } ++ ++ self->kbdev = kbdev; ++ self->trace_buffer = tb; ++ self->tl_header.data = hdr; ++ self->tl_header.size = hdr_size; ++ ++ return 0; +} + +/** -+ * kbase_device_hwcnt_backend_jm_watchdog_init - Create hardware counter watchdog backend. -+ * @kbdev: Device pointer -+ * Return: 0 on success, or an error code on failure. ++ * tl_reader_update_enable_bit() - Update the first bit of a CSFFW tracebuffer. ++ * ++ * @self: CSFFW TL Reader instance. ++ * @value: The value to set. ++ * ++ * Update the first bit of a CSFFW tracebufer and then reset the GPU. ++ * This is to make these changes visible to the MCU. ++ * ++ * Return: 0 on success, or negative error code for failure. + */ -+static int kbase_device_hwcnt_backend_jm_watchdog_init(struct kbase_device *kbdev) ++static int tl_reader_update_enable_bit( ++ struct kbase_csf_tl_reader *self, ++ bool value) +{ -+ return kbase_hwcnt_backend_jm_watchdog_create(&kbdev->hwcnt_gpu_jm_backend, -+ &kbdev->hwcnt_watchdog_timer, -+ &kbdev->hwcnt_gpu_iface); ++ int err = 0; ++ ++ err = kbase_csf_firmware_trace_buffer_update_trace_enable_bit( ++ self->trace_buffer, 0, value); ++ ++ return err; +} + -+/** -+ * kbase_device_hwcnt_backend_jm_watchdog_term - Terminate hardware counter watchdog backend. -+ * @kbdev: Device pointer -+ */ -+static void kbase_device_hwcnt_backend_jm_watchdog_term(struct kbase_device *kbdev) ++void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self, ++ struct kbase_tlstream *stream) +{ -+ kbase_hwcnt_backend_jm_watchdog_destroy(&kbdev->hwcnt_gpu_iface); -+} ++ self->timer_interval = KBASE_CSF_TL_READ_INTERVAL_DEFAULT; + -+static const struct kbase_device_init dev_init[] = { -+#if !IS_ENABLED(CONFIG_MALI_REAL_HW) -+ { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" }, -+#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ -+ { assign_irqs, NULL, "IRQ search failed" }, -+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ -+#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ { registers_map, registers_unmap, "Register map failed" }, -+#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ -+ { kbase_device_io_history_init, kbase_device_io_history_term, -+ "Register access history initialization failed" }, -+ { kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" }, -+ { kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" }, -+ { kbase_device_populate_max_freq, NULL, "Populating max frequency failed" }, -+ { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" }, -+ { kbase_device_misc_init, kbase_device_misc_term, -+ "Miscellaneous device initialization failed" }, -+ { kbase_device_pcm_dev_init, kbase_device_pcm_dev_term, -+ "Priority control manager initialization failed" }, -+ { kbase_ctx_sched_init, kbase_ctx_sched_term, "Context scheduler initialization failed" }, -+ { kbase_mem_init, kbase_mem_term, "Memory subsystem initialization failed" }, -+ { kbase_device_coherency_init, NULL, "Device coherency init failed" }, -+ { kbase_protected_mode_init, kbase_protected_mode_term, -+ "Protected mode subsystem initialization failed" }, -+ { kbase_device_list_init, kbase_device_list_term, "Device list setup failed" }, -+ { kbasep_js_devdata_init, kbasep_js_devdata_term, "Job JS devdata initialization failed" }, -+ { kbase_device_timeline_init, kbase_device_timeline_term, -+ "Timeline stream initialization failed" }, -+ { kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term, -+ "Clock rate trace manager initialization failed" }, -+ { kbase_instr_backend_init, kbase_instr_backend_term, -+ "Instrumentation backend initialization failed" }, -+ { kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term, -+ "GPU hwcnt backend watchdog interface creation failed" }, -+ { kbase_device_hwcnt_backend_jm_init, kbase_device_hwcnt_backend_jm_term, -+ "GPU hwcnt backend creation failed" }, -+ { kbase_device_hwcnt_backend_jm_watchdog_init, kbase_device_hwcnt_backend_jm_watchdog_term, -+ "GPU hwcnt watchdog backend creation failed" }, -+ { kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term, -+ "GPU hwcnt context initialization failed" }, -+ { kbase_device_hwcnt_virtualizer_init, kbase_device_hwcnt_virtualizer_term, -+ "GPU hwcnt virtualizer initialization failed" }, -+ { kbase_device_vinstr_init, kbase_device_vinstr_term, -+ "Virtual instrumentation initialization failed" }, -+ { kbase_device_kinstr_prfcnt_init, kbase_device_kinstr_prfcnt_term, -+ "Performance counter instrumentation initialization failed" }, -+ { kbase_backend_late_init, kbase_backend_late_term, "Late backend initialization failed" }, -+ { kbase_debug_job_fault_dev_init, kbase_debug_job_fault_dev_term, -+ "Job fault debug initialization failed" }, -+ { kbase_device_debugfs_init, kbase_device_debugfs_term, "DebugFS initialization failed" }, -+ /* Sysfs init needs to happen before registering the device with -+ * misc_register(), otherwise it causes a race condition between -+ * registering the device and a uevent event being generated for -+ * userspace, causing udev rules to run which might expect certain -+ * sysfs attributes present. As a result of the race condition -+ * we avoid, some Mali sysfs entries may have appeared to udev -+ * to not exist. -+ * For more information, see -+ * https://www.kernel.org/doc/Documentation/driver-model/device.txt, the -+ * paragraph that starts with "Word of warning", currently the -+ * second-last paragraph. -+ */ -+ { kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed" }, -+ { kbase_device_misc_register, kbase_device_misc_deregister, -+ "Misc device registration failed" }, -+ { kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, -+ "GPU property population failed" }, -+ { NULL, kbase_dummy_job_wa_cleanup, NULL }, -+ { kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" }, -+}; ++ kbase_timer_setup(&self->read_timer, ++ kbasep_csf_tl_reader_read_callback); + -+static void kbase_device_term_partial(struct kbase_device *kbdev, -+ unsigned int i) -+{ -+ while (i-- > 0) { -+ if (dev_init[i].term) -+ dev_init[i].term(kbdev); -+ } ++ self->stream = stream; ++ ++ /* This will be initialized by tl_reader_init_late() */ ++ self->kbdev = NULL; ++ self->trace_buffer = NULL; ++ self->tl_header.data = NULL; ++ self->tl_header.size = 0; ++ ++ spin_lock_init(&self->read_lock); ++ ++ tl_reader_reset(self); +} + -+void kbase_device_term(struct kbase_device *kbdev) ++void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self) +{ -+ kbase_device_term_partial(kbdev, ARRAY_SIZE(dev_init)); -+ kbasep_js_devdata_halt(kbdev); -+ kbase_mem_halt(kbdev); ++ del_timer_sync(&self->read_timer); +} + -+int kbase_device_init(struct kbase_device *kbdev) ++int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, ++ struct kbase_device *kbdev) +{ -+ int err = 0; -+ unsigned int i = 0; -+ -+ dev_info(kbdev->dev, "Kernel DDK version %s", MALI_RELEASE_NAME); ++ int rcode; + -+ kbase_device_id_init(kbdev); -+ kbase_disjoint_init(kbdev); ++ /* If already running, early exit. */ ++ if (self->is_active) ++ return 0; + -+ for (i = 0; i < ARRAY_SIZE(dev_init); i++) { -+ if (dev_init[i].init) { -+ err = dev_init[i].init(kbdev); -+ if (err) { -+ if (err != -EPROBE_DEFER) -+ dev_err(kbdev->dev, "%s error = %d\n", -+ dev_init[i].err_mes, err); -+ kbase_device_term_partial(kbdev, i); -+ break; -+ } -+ } ++ if (tl_reader_init_late(self, kbdev)) { ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ dev_warn( ++ kbdev->dev, ++ "CSFFW timeline is not available for MALI_BIFROST_NO_MALI builds!"); ++ return 0; ++#else ++ return -EINVAL; ++#endif + } + -+ return err; ++ tl_reader_reset(self); ++ ++ self->is_active = true; ++ /* Set bytes to copy to the header size. This is to trigger copying ++ * of the header to the user space. ++ */ ++ self->tl_header.btc = self->tl_header.size; ++ ++ /* Enable the tracebuffer on the CSFFW side. */ ++ rcode = tl_reader_update_enable_bit(self, true); ++ if (rcode != 0) ++ return rcode; ++ ++ rcode = mod_timer(&self->read_timer, ++ jiffies + msecs_to_jiffies(self->timer_interval)); ++ ++ return 0; +} + -+int kbase_device_firmware_init_once(struct kbase_device *kbdev) ++void kbase_csf_tl_reader_stop(struct kbase_csf_tl_reader *self) +{ -+ int ret = 0; ++ unsigned long flags; + -+ mutex_lock(&kbdev->fw_load_lock); ++ /* If is not running, early exit. */ ++ if (!self->is_active) ++ return; + -+ if (!kbdev->dummy_job_wa_loaded) { -+ ret = kbase_dummy_job_wa_load(kbdev); -+ if (!ret) -+ kbdev->dummy_job_wa_loaded = true; -+ } ++ /* Disable the tracebuffer on the CSFFW side. */ ++ tl_reader_update_enable_bit(self, false); + -+ mutex_unlock(&kbdev->fw_load_lock); ++ del_timer_sync(&self->read_timer); + -+ return ret; ++ spin_lock_irqsave(&self->read_lock, flags); ++ ++ tl_reader_reset(self); ++ ++ spin_unlock_irqrestore(&self->read_lock, flags); +} -diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c ++ ++void kbase_csf_tl_reader_reset(struct kbase_csf_tl_reader *self) ++{ ++ kbase_csf_tl_reader_flush_buffer(self); ++} +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h new file mode 100644 -index 000000000..b2b0cfd6c +index 000000000..2f8eb1dd4 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c -@@ -0,0 +1,611 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h +@@ -0,0 +1,144 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -179398,606 +181502,700 @@ index 000000000..b2b0cfd6c + * + */ + -+/* -+ * Base kernel device APIs -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include "mali_kbase_kinstr_prfcnt.h" -+#include "mali_kbase_vinstr.h" -+#include "hwcnt/mali_kbase_hwcnt_context.h" -+#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" ++#ifndef _KBASE_CSFFW_TL_READER_H_ ++#define _KBASE_CSFFW_TL_READER_H_ + -+#include "mali_kbase_device.h" -+#include "mali_kbase_device_internal.h" -+#include "backend/gpu/mali_kbase_pm_internal.h" -+#include "backend/gpu/mali_kbase_irq_internal.h" -+#include "mali_kbase_regs_history_debugfs.h" -+#include "mali_kbase_pbha.h" ++#include ++#include ++#include + -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+#include "arbiter/mali_kbase_arbiter_pm.h" -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++/* The number of pages used for CSFFW trace buffer. Can be tweaked. */ ++#define KBASE_CSF_TL_BUFFER_NR_PAGES 128 ++/* CSFFW Timeline read polling minimum period in milliseconds. */ ++#define KBASE_CSF_TL_READ_INTERVAL_MIN 20 ++/* CSFFW Timeline read polling default period in milliseconds. */ ++#define KBASE_CSF_TL_READ_INTERVAL_DEFAULT 200 ++/* CSFFW Timeline read polling maximum period in milliseconds. */ ++#define KBASE_CSF_TL_READ_INTERVAL_MAX (60 * 1000) + -+#if defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++struct firmware_trace_buffer; ++struct kbase_tlstream; ++struct kbase_device; + -+/* Number of register accesses for the buffer that we allocate during -+ * initialization time. The buffer size can be changed later via debugfs. ++/** ++ * struct kbase_csf_tl_reader - CSFFW timeline reader state. ++ * ++ * @read_timer: Timer used for periodical tracebufer reading. ++ * @timer_interval: Timer polling period in milliseconds. ++ * @stream: Timeline stream where to the tracebuffer content ++ * is copied. ++ * @kbdev: KBase device. ++ * @trace_buffer: CSF Firmware timeline tracebuffer. ++ * @tl_header: CSFFW Timeline header ++ * @tl_header.data: CSFFW Timeline header content. ++ * @tl_header.size: CSFFW Timeline header size. ++ * @tl_header.btc: CSFFW Timeline header remaining bytes to copy to ++ * the user space. ++ * @ts_converter: Timestamp converter state. ++ * @got_first_event: True, if a CSFFW timelime session has been enabled ++ * and the first event was received. ++ * @is_active: True, if a CSFFW timelime session has been enabled. ++ * @expected_event_id: The last 16 bit event ID received from CSFFW. It ++ * is only valid when got_first_event is true. ++ * @read_buffer: Temporary buffer used for CSFFW timeline data ++ * reading from the tracebufer. ++ * @read_lock: CSFFW timeline reader lock. + */ -+#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512) ++struct kbase_csf_tl_reader { ++ struct timer_list read_timer; ++ u32 timer_interval; ++ struct kbase_tlstream *stream; + -+#endif /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ ++ struct kbase_device *kbdev; ++ struct firmware_trace_buffer *trace_buffer; ++ struct { ++ const char *data; ++ size_t size; ++ size_t btc; ++ } tl_header; + -+static DEFINE_MUTEX(kbase_dev_list_lock); -+static LIST_HEAD(kbase_dev_list); -+static int kbase_dev_nr; ++ bool got_first_event; ++ bool is_active; ++ u16 expected_event_id; + -+struct kbase_device *kbase_device_alloc(void) -+{ -+ return kzalloc(sizeof(struct kbase_device), GFP_KERNEL); -+} ++ u8 read_buffer[PAGE_SIZE * KBASE_CSF_TL_BUFFER_NR_PAGES]; ++ spinlock_t read_lock; ++}; + +/** -+ * kbase_device_all_as_init() - Initialise address space objects of the device. -+ * -+ * @kbdev: Pointer to kbase device. ++ * kbase_csf_tl_reader_init() - Initialize CSFFW Timelime Stream Reader. + * -+ * Return: 0 on success otherwise non-zero. ++ * @self: CSFFW TL Reader instance. ++ * @stream: Destination timeline stream. + */ -+static int kbase_device_all_as_init(struct kbase_device *kbdev) -+{ -+ int i, err = 0; -+ -+ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { -+ err = kbase_mmu_as_init(kbdev, i); -+ if (err) -+ break; -+ } -+ -+ if (err) { -+ while (i-- > 0) -+ kbase_mmu_as_term(kbdev, i); -+ } -+ -+ return err; -+} ++void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self, struct kbase_tlstream *stream); + -+static void kbase_device_all_as_term(struct kbase_device *kbdev) -+{ -+ int i; ++/** ++ * kbase_csf_tl_reader_term() - Terminate CSFFW Timelime Stream Reader. ++ * ++ * @self: CSFFW TL Reader instance. ++ */ ++void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self); + -+ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) -+ kbase_mmu_as_term(kbdev, i); -+} ++/** ++ * kbase_csf_tl_reader_flush_buffer() - Flush trace from buffer into CSFFW timeline stream. ++ * ++ * @self: CSFFW TL Reader instance. ++ * ++ * Return: Zero on success, negative error code (EBUSY) otherwise ++ */ ++int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self); + -+int kbase_device_pcm_dev_init(struct kbase_device *const kbdev) -+{ -+ int err = 0; ++/** ++ * kbase_csf_tl_reader_start() - Start asynchronous copying of CSFFW timeline stream. ++ * ++ * @self: CSFFW TL Reader instance. ++ * @kbdev: Kbase device. ++ * ++ * Return: zero on success, a negative error code otherwise. ++ */ ++int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, struct kbase_device *kbdev); + -+#if IS_ENABLED(CONFIG_OF) -+ struct device_node *prio_ctrl_node; ++/** ++ * kbase_csf_tl_reader_stop() - Stop asynchronous copying of CSFFW timeline stream. ++ * ++ * @self: CSFFW TL Reader instance. ++ */ ++void kbase_csf_tl_reader_stop(struct kbase_csf_tl_reader *self); + -+ /* Check to see whether or not a platform specific priority control manager -+ * is available. -+ */ -+ prio_ctrl_node = of_parse_phandle(kbdev->dev->of_node, -+ "priority-control-manager", 0); -+ if (!prio_ctrl_node) { -+ dev_info(kbdev->dev, -+ "No priority control manager is configured"); -+ } else { -+ struct platform_device *const pdev = -+ of_find_device_by_node(prio_ctrl_node); ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++/** ++ * kbase_csf_tl_reader_debugfs_init() - Initialize debugfs for CSFFW Timelime Stream Reader. ++ * ++ * @kbdev: Kbase device. ++ */ ++void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev); ++#endif + -+ if (!pdev) { -+ dev_err(kbdev->dev, -+ "The configured priority control manager was not found"); -+ } else { -+ struct priority_control_manager_device *pcm_dev = -+ platform_get_drvdata(pdev); -+ if (!pcm_dev) { -+ dev_info(kbdev->dev, "Priority control manager is not ready"); -+ err = -EPROBE_DEFER; -+ } else if (!try_module_get(pcm_dev->owner)) { -+ dev_err(kbdev->dev, "Failed to get priority control manager module"); -+ err = -ENODEV; -+ } else { -+ dev_info(kbdev->dev, "Priority control manager successfully loaded"); -+ kbdev->pcm_dev = pcm_dev; -+ } -+ } -+ of_node_put(prio_ctrl_node); -+ } -+#endif /* CONFIG_OF */ ++/** ++ * kbase_csf_tl_reader_reset() - Reset CSFFW timeline reader, it should be called before reset CSFFW. ++ * ++ * @self: CSFFW TL Reader instance. ++ */ ++void kbase_csf_tl_reader_reset(struct kbase_csf_tl_reader *self); + -+ return err; -+} ++#endif /* _KBASE_CSFFW_TL_READER_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c +new file mode 100644 +index 000000000..46872f937 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c +@@ -0,0 +1,555 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+void kbase_device_pcm_dev_term(struct kbase_device *const kbdev) -+{ -+ if (kbdev->pcm_dev) -+ module_put(kbdev->pcm_dev->owner); -+} ++#include "mali_kbase.h" ++#include "mali_kbase_defs.h" ++#include "mali_kbase_csf_firmware.h" ++#include "mali_kbase_csf_trace_buffer.h" ++#include "mali_kbase_reset_gpu.h" ++#include "mali_kbase_csf_tl_reader.h" + -+#define KBASE_PAGES_TO_KIB(pages) (((unsigned int)pages) << (PAGE_SHIFT - 10)) ++#include ++#include + +/** -+ * mali_oom_notifier_handler - Mali driver out-of-memory handler -+ * -+ * @nb: notifier block - used to retrieve kbdev pointer -+ * @action: action (unused) -+ * @data: data pointer (unused) ++ * struct firmware_trace_buffer - Trace Buffer within the MCU firmware + * -+ * This function simply lists memory usage by the Mali driver, per GPU device, -+ * for diagnostic purposes. ++ * @kbdev: Pointer to the Kbase device. ++ * @node: List head linking all trace buffers to ++ * kbase_device:csf.firmware_trace_buffers ++ * @data_mapping: MCU shared memory mapping used for the data buffer. ++ * @updatable: Indicates whether config items can be updated with ++ * FIRMWARE_CONFIG_UPDATE ++ * @type: The type of the trace buffer. ++ * @trace_enable_entry_count: Number of Trace Enable bits. ++ * @gpu_va: Structure containing all the Firmware addresses ++ * that are accessed by the MCU. ++ * @gpu_va.size_address: The address where the MCU shall read the size of ++ * the data buffer. ++ * @gpu_va.insert_address: The address that shall be dereferenced by the MCU ++ * to write the Insert offset. ++ * @gpu_va.extract_address: The address that shall be dereferenced by the MCU ++ * to read the Extract offset. ++ * @gpu_va.data_address: The address that shall be dereferenced by the MCU ++ * to write the Trace Buffer. ++ * @gpu_va.trace_enable: The address where the MCU shall read the array of ++ * Trace Enable bits describing which trace points ++ * and features shall be enabled. ++ * @cpu_va: Structure containing CPU addresses of variables ++ * which are permanently mapped on the CPU address ++ * space. ++ * @cpu_va.insert_cpu_va: CPU virtual address of the Insert variable. ++ * @cpu_va.extract_cpu_va: CPU virtual address of the Extract variable. ++ * @num_pages: Size of the data buffer, in pages. ++ * @trace_enable_init_mask: Initial value for the trace enable bit mask. ++ * @name: NULL terminated string which contains the name of the trace buffer. + * -+ * Return: NOTIFY_OK on success, NOTIFY_BAD otherwise. ++ * The firmware relays information to the host by writing on memory buffers ++ * which are allocated and partially configured by the host. These buffers ++ * are called Trace Buffers: each of them has a specific purpose and is ++ * identified by a name and a set of memory addresses where the host can ++ * set pointers to host-allocated structures. + */ -+static int mali_oom_notifier_handler(struct notifier_block *nb, -+ unsigned long action, void *data) -+{ ++struct firmware_trace_buffer { + struct kbase_device *kbdev; -+ struct kbase_context *kctx = NULL; -+ unsigned long kbdev_alloc_total; -+ -+ if (WARN_ON(nb == NULL)) -+ return NOTIFY_BAD; -+ -+ kbdev = container_of(nb, struct kbase_device, oom_notifier_block); -+ -+ kbdev_alloc_total = -+ KBASE_PAGES_TO_KIB(atomic_read(&(kbdev->memdev.used_pages))); -+ -+ dev_err(kbdev->dev, "OOM notifier: dev %s %lu kB\n", kbdev->devname, -+ kbdev_alloc_total); -+ -+ mutex_lock(&kbdev->kctx_list_lock); ++ struct list_head node; ++ struct kbase_csf_mapping data_mapping; ++ bool updatable; ++ u32 type; ++ u32 trace_enable_entry_count; ++ struct gpu_va { ++ u32 size_address; ++ u32 insert_address; ++ u32 extract_address; ++ u32 data_address; ++ u32 trace_enable; ++ } gpu_va; ++ struct cpu_va { ++ u32 *insert_cpu_va; ++ u32 *extract_cpu_va; ++ } cpu_va; ++ u32 num_pages; ++ u32 trace_enable_init_mask[CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX]; ++ char name[1]; /* this field must be last */ ++}; + -+ list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { -+ struct pid *pid_struct; -+ struct task_struct *task; -+ struct pid *tgid_struct; -+ struct task_struct *tgid_task; ++/** ++ * struct firmware_trace_buffer_data - Configuration data for trace buffers ++ * ++ * @name: Name identifier of the trace buffer ++ * @trace_enable_init_mask: Initial value to assign to the trace enable bits ++ * @size: Size of the data buffer to allocate for the trace buffer, in pages. ++ * The size of a data buffer must always be a power of 2. ++ * ++ * Describe how to set up a trace buffer interface. ++ * Trace buffers are identified by name and they require a data buffer and ++ * an initial mask of values for the trace enable bits. ++ */ ++struct firmware_trace_buffer_data { ++ char name[64]; ++ u32 trace_enable_init_mask[CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX]; ++ size_t size; ++}; + -+ unsigned long task_alloc_total = -+ KBASE_PAGES_TO_KIB(atomic_read(&(kctx->used_pages))); ++/* ++ * Table of configuration data for trace buffers. ++ * ++ * This table contains the configuration data for the trace buffers that are ++ * expected to be parsed from the firmware. ++ */ ++static const struct firmware_trace_buffer_data trace_buffer_data[] = { ++#if MALI_UNIT_TEST ++ { "fwutf", { 0 }, 1 }, ++#endif ++ { FIRMWARE_LOG_BUF_NAME, { 0 }, 4 }, ++ { "benchmark", { 0 }, 2 }, ++ { "timeline", { 0 }, KBASE_CSF_TL_BUFFER_NR_PAGES }, ++}; + -+ rcu_read_lock(); -+ pid_struct = find_get_pid(kctx->pid); -+ task = pid_task(pid_struct, PIDTYPE_PID); -+ tgid_struct = find_get_pid(kctx->tgid); -+ tgid_task = pid_task(tgid_struct, PIDTYPE_PID); ++int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev) ++{ ++ struct firmware_trace_buffer *trace_buffer; ++ int ret = 0; ++ u32 mcu_rw_offset = 0, mcu_write_offset = 0; ++ const u32 cache_line_alignment = kbase_get_cache_line_alignment(kbdev); + -+ dev_err(kbdev->dev, -+ "OOM notifier: tsk %s:%s tgid (%u) pid (%u) %lu kB\n", -+ tgid_task ? tgid_task->comm : "[null task]", -+ task ? task->comm : "[null comm]", kctx->tgid, -+ kctx->pid, task_alloc_total); ++ if (list_empty(&kbdev->csf.firmware_trace_buffers.list)) { ++ dev_dbg(kbdev->dev, "No trace buffers to initialise\n"); ++ return 0; ++ } + -+ put_pid(pid_struct); -+ rcu_read_unlock(); ++ /* GPU-readable,writable memory used for Extract variables */ ++ ret = kbase_csf_firmware_mcu_shared_mapping_init( ++ kbdev, 1, PROT_WRITE, ++ KBASE_REG_GPU_RD | KBASE_REG_GPU_WR, ++ &kbdev->csf.firmware_trace_buffers.mcu_rw); ++ if (ret != 0) { ++ dev_err(kbdev->dev, "Failed to map GPU-rw MCU shared memory\n"); ++ goto out; + } + -+ mutex_unlock(&kbdev->kctx_list_lock); -+ return NOTIFY_OK; -+} ++ /* GPU-writable memory used for Insert variables */ ++ ret = kbase_csf_firmware_mcu_shared_mapping_init( ++ kbdev, 1, PROT_READ, KBASE_REG_GPU_WR, ++ &kbdev->csf.firmware_trace_buffers.mcu_write); ++ if (ret != 0) { ++ dev_err(kbdev->dev, "Failed to map GPU-writable MCU shared memory\n"); ++ goto out; ++ } + -+int kbase_device_misc_init(struct kbase_device * const kbdev) -+{ -+ int err; -+#if IS_ENABLED(CONFIG_ARM64) -+ struct device_node *np = NULL; -+#endif /* CONFIG_ARM64 */ ++ list_for_each_entry(trace_buffer, &kbdev->csf.firmware_trace_buffers.list, node) { ++ u32 extract_gpu_va, insert_gpu_va, data_buffer_gpu_va, ++ trace_enable_size_dwords; ++ u32 *extract_cpu_va, *insert_cpu_va; ++ unsigned int i; + -+ spin_lock_init(&kbdev->mmu_mask_change); -+ mutex_init(&kbdev->mmu_hw_mutex); -+#if IS_ENABLED(CONFIG_ARM64) -+ kbdev->cci_snoop_enabled = false; -+ np = kbdev->dev->of_node; -+ if (np != NULL) { -+ if (of_property_read_u32(np, "snoop_enable_smc", -+ &kbdev->snoop_enable_smc)) -+ kbdev->snoop_enable_smc = 0; -+ if (of_property_read_u32(np, "snoop_disable_smc", -+ &kbdev->snoop_disable_smc)) -+ kbdev->snoop_disable_smc = 0; -+ /* Either both or none of the calls should be provided. */ -+ if (!((kbdev->snoop_disable_smc == 0 -+ && kbdev->snoop_enable_smc == 0) -+ || (kbdev->snoop_disable_smc != 0 -+ && kbdev->snoop_enable_smc != 0))) { -+ WARN_ON(1); -+ err = -EINVAL; -+ goto fail; ++ /* GPU-writable data buffer for the individual trace buffer */ ++ ret = kbase_csf_firmware_mcu_shared_mapping_init( ++ kbdev, trace_buffer->num_pages, PROT_READ, KBASE_REG_GPU_WR, ++ &trace_buffer->data_mapping); ++ if (ret) { ++ dev_err(kbdev->dev, "Failed to map GPU-writable MCU shared memory for a trace buffer\n"); ++ goto out; + } -+ } -+#endif /* CONFIG_ARM64 */ + -+ /* Get the list of workarounds for issues on the current HW -+ * (identified by the GPU_ID register) -+ */ -+ err = kbase_hw_set_issues_mask(kbdev); -+ if (err) -+ goto fail; ++ extract_gpu_va = ++ (kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) + ++ mcu_rw_offset; ++ extract_cpu_va = (u32 *)( ++ kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr + ++ mcu_rw_offset); ++ insert_gpu_va = ++ (kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn << PAGE_SHIFT) + ++ mcu_write_offset; ++ insert_cpu_va = (u32 *)( ++ kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr + ++ mcu_write_offset); ++ data_buffer_gpu_va = ++ (trace_buffer->data_mapping.va_reg->start_pfn << PAGE_SHIFT); + -+ /* Set the list of features available on the current HW -+ * (identified by the GPU_ID register) -+ */ -+ kbase_hw_set_features_mask(kbdev); ++ /* Initialize the Extract variable */ ++ *extract_cpu_va = 0; + -+ err = kbase_gpuprops_set_features(kbdev); -+ if (err) -+ goto fail; ++ /* Each FW address shall be mapped and set individually, as we can't ++ * assume anything about their location in the memory address space. ++ */ ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.data_address, data_buffer_gpu_va); ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.insert_address, insert_gpu_va); ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.extract_address, extract_gpu_va); ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.size_address, ++ trace_buffer->num_pages << PAGE_SHIFT); + -+ /* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our -+ * device structure was created by device-tree -+ */ -+ if (!kbdev->dev->dma_mask) -+ kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask; ++ trace_enable_size_dwords = ++ (trace_buffer->trace_enable_entry_count + 31) >> 5; + -+ err = dma_set_mask(kbdev->dev, -+ DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); -+ if (err) -+ goto dma_set_mask_failed; ++ for (i = 0; i < trace_enable_size_dwords; i++) { ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.trace_enable + i*4, ++ trace_buffer->trace_enable_init_mask[i]); ++ } + -+ err = dma_set_coherent_mask(kbdev->dev, -+ DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); -+ if (err) -+ goto dma_set_mask_failed; ++ /* Store CPU virtual addresses for permanently mapped variables */ ++ trace_buffer->cpu_va.insert_cpu_va = insert_cpu_va; ++ trace_buffer->cpu_va.extract_cpu_va = extract_cpu_va; + ++ /* Update offsets */ ++ mcu_write_offset += cache_line_alignment; ++ mcu_rw_offset += cache_line_alignment; ++ } + -+ /* There is no limit for Mali, so set to max. */ -+ if (kbdev->dev->dma_parms) -+ err = dma_set_max_seg_size(kbdev->dev, UINT_MAX); -+ if (err) -+ goto dma_set_mask_failed; ++out: ++ return ret; ++} + -+ kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces; ++void kbase_csf_firmware_trace_buffers_term(struct kbase_device *kbdev) ++{ ++ if (list_empty(&kbdev->csf.firmware_trace_buffers.list)) ++ return; + -+ err = kbase_device_all_as_init(kbdev); -+ if (err) -+ goto dma_set_mask_failed; ++ while (!list_empty(&kbdev->csf.firmware_trace_buffers.list)) { ++ struct firmware_trace_buffer *trace_buffer; + -+ err = kbase_pbha_read_dtb(kbdev); -+ if (err) -+ goto term_as; ++ trace_buffer = list_first_entry(&kbdev->csf.firmware_trace_buffers.list, ++ struct firmware_trace_buffer, node); ++ kbase_csf_firmware_mcu_shared_mapping_term(kbdev, &trace_buffer->data_mapping); ++ list_del(&trace_buffer->node); + -+ init_waitqueue_head(&kbdev->cache_clean_wait); ++ kfree(trace_buffer); ++ } + -+ kbase_debug_assert_register_hook(&kbase_ktrace_hook_wrapper, kbdev); ++ kbase_csf_firmware_mcu_shared_mapping_term( ++ kbdev, &kbdev->csf.firmware_trace_buffers.mcu_rw); ++ kbase_csf_firmware_mcu_shared_mapping_term( ++ kbdev, &kbdev->csf.firmware_trace_buffers.mcu_write); ++} + -+ atomic_set(&kbdev->ctx_num, 0); ++int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev, ++ const u32 *entry, ++ unsigned int size, ++ bool updatable) ++{ ++ const char *name = (char *)&entry[7]; ++ const unsigned int name_len = size - TRACE_BUFFER_ENTRY_NAME_OFFSET; ++ struct firmware_trace_buffer *trace_buffer; ++ unsigned int i; + -+ kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD; ++ /* Allocate enough space for struct firmware_trace_buffer and the ++ * trace buffer name (with NULL termination). ++ */ ++ trace_buffer = ++ kmalloc(sizeof(*trace_buffer) + name_len + 1, GFP_KERNEL); + -+#if MALI_USE_CSF -+ kbdev->reset_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT); -+#else -+ kbdev->reset_timeout_ms = JM_DEFAULT_RESET_TIMEOUT_MS; -+#endif /* MALI_USE_CSF */ ++ if (!trace_buffer) ++ return -ENOMEM; + -+ kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); -+ kbdev->mmu_as_inactive_wait_time_ms = -+ kbase_get_timeout_ms(kbdev, MMU_AS_INACTIVE_WAIT_TIMEOUT); -+ mutex_init(&kbdev->kctx_list_lock); -+ INIT_LIST_HEAD(&kbdev->kctx_list); ++ memcpy(&trace_buffer->name, name, name_len); ++ trace_buffer->name[name_len] = '\0'; + -+ dev_dbg(kbdev->dev, "Registering mali_oom_notifier_handlern"); -+ kbdev->oom_notifier_block.notifier_call = mali_oom_notifier_handler; -+ err = register_oom_notifier(&kbdev->oom_notifier_block); ++ for (i = 0; i < ARRAY_SIZE(trace_buffer_data); i++) { ++ if (!strcmp(trace_buffer_data[i].name, trace_buffer->name)) { ++ unsigned int j; + -+ if (err) { -+ dev_err(kbdev->dev, -+ "Unable to register OOM notifier for Mali - but will continue\n"); -+ kbdev->oom_notifier_block.notifier_call = NULL; ++ trace_buffer->kbdev = kbdev; ++ trace_buffer->updatable = updatable; ++ trace_buffer->type = entry[0]; ++ trace_buffer->gpu_va.size_address = entry[1]; ++ trace_buffer->gpu_va.insert_address = entry[2]; ++ trace_buffer->gpu_va.extract_address = entry[3]; ++ trace_buffer->gpu_va.data_address = entry[4]; ++ trace_buffer->gpu_va.trace_enable = entry[5]; ++ trace_buffer->trace_enable_entry_count = entry[6]; ++ trace_buffer->num_pages = trace_buffer_data[i].size; ++ ++ for (j = 0; j < CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX; j++) { ++ trace_buffer->trace_enable_init_mask[j] = ++ trace_buffer_data[i].trace_enable_init_mask[j]; ++ } ++ break; ++ } + } + -+#if !MALI_USE_CSF -+ spin_lock_init(&kbdev->quick_reset_lock); -+ kbdev->quick_reset_enabled = true; -+ kbdev->num_of_atoms_hw_completed = 0; -+#endif ++ if (i < ARRAY_SIZE(trace_buffer_data)) { ++ list_add(&trace_buffer->node, &kbdev->csf.firmware_trace_buffers.list); ++ dev_dbg(kbdev->dev, "Trace buffer '%s'", trace_buffer->name); ++ } else { ++ dev_dbg(kbdev->dev, "Unknown trace buffer '%s'", trace_buffer->name); ++ kfree(trace_buffer); ++ } + -+#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) -+ atomic_set(&kbdev->live_fence_metadata, 0); -+#endif + return 0; -+ -+term_as: -+ kbase_device_all_as_term(kbdev); -+dma_set_mask_failed: -+fail: -+ return err; +} + -+void kbase_device_misc_term(struct kbase_device *kbdev) ++void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev) +{ -+ KBASE_DEBUG_ASSERT(kbdev); -+ -+ WARN_ON(!list_empty(&kbdev->kctx_list)); -+ -+#if KBASE_KTRACE_ENABLE -+ kbase_debug_assert_register_hook(NULL, NULL); -+#endif -+ kbase_device_all_as_term(kbdev); -+ ++ struct firmware_trace_buffer *trace_buffer; ++ u32 mcu_rw_offset = 0, mcu_write_offset = 0; ++ const u32 cache_line_alignment = kbase_get_cache_line_alignment(kbdev); + -+ if (kbdev->oom_notifier_block.notifier_call) -+ unregister_oom_notifier(&kbdev->oom_notifier_block); ++ list_for_each_entry(trace_buffer, &kbdev->csf.firmware_trace_buffers.list, node) { ++ u32 extract_gpu_va, insert_gpu_va, data_buffer_gpu_va, ++ trace_enable_size_dwords; ++ u32 *extract_cpu_va, *insert_cpu_va; ++ unsigned int i; + -+#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) -+ if (atomic_read(&kbdev->live_fence_metadata) > 0) -+ dev_warn(kbdev->dev, "Terminating Kbase device with live fence metadata!"); -+#endif -+} ++ /* Rely on the fact that all required mappings already exist */ ++ extract_gpu_va = ++ (kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) + ++ mcu_rw_offset; ++ extract_cpu_va = (u32 *)( ++ kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr + ++ mcu_rw_offset); ++ insert_gpu_va = ++ (kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn << PAGE_SHIFT) + ++ mcu_write_offset; ++ insert_cpu_va = (u32 *)( ++ kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr + ++ mcu_write_offset); ++ data_buffer_gpu_va = ++ (trace_buffer->data_mapping.va_reg->start_pfn << PAGE_SHIFT); + -+#if !MALI_USE_CSF -+void kbase_enable_quick_reset(struct kbase_device *kbdev) -+{ -+ spin_lock(&kbdev->quick_reset_lock); ++ /* Notice that the function only re-updates firmware memory locations ++ * with information that allows access to the trace buffers without ++ * really resetting their state. For instance, the Insert offset will ++ * not change and, as a consequence, the Extract offset is not going ++ * to be reset to keep consistency. ++ */ + -+ kbdev->quick_reset_enabled = true; -+ kbdev->num_of_atoms_hw_completed = 0; ++ /* Each FW address shall be mapped and set individually, as we can't ++ * assume anything about their location in the memory address space. ++ */ ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.data_address, data_buffer_gpu_va); ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.insert_address, insert_gpu_va); ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.extract_address, extract_gpu_va); ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.size_address, ++ trace_buffer->num_pages << PAGE_SHIFT); + -+ spin_unlock(&kbdev->quick_reset_lock); -+} ++ trace_enable_size_dwords = ++ (trace_buffer->trace_enable_entry_count + 31) >> 5; + -+void kbase_disable_quick_reset(struct kbase_device *kbdev) -+{ -+ spin_lock(&kbdev->quick_reset_lock); ++ for (i = 0; i < trace_enable_size_dwords; i++) { ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.trace_enable + i*4, ++ trace_buffer->trace_enable_init_mask[i]); ++ } + -+ kbdev->quick_reset_enabled = false; -+ kbdev->num_of_atoms_hw_completed = 0; ++ /* Store CPU virtual addresses for permanently mapped variables, ++ * as they might have slightly changed. ++ */ ++ trace_buffer->cpu_va.insert_cpu_va = insert_cpu_va; ++ trace_buffer->cpu_va.extract_cpu_va = extract_cpu_va; + -+ spin_unlock(&kbdev->quick_reset_lock); ++ /* Update offsets */ ++ mcu_write_offset += cache_line_alignment; ++ mcu_rw_offset += cache_line_alignment; ++ } +} + -+bool kbase_is_quick_reset_enabled(struct kbase_device *kbdev) ++struct firmware_trace_buffer *kbase_csf_firmware_get_trace_buffer( ++ struct kbase_device *kbdev, const char *name) +{ -+ return kbdev->quick_reset_enabled; -+} -+#endif ++ struct firmware_trace_buffer *trace_buffer; + -+void kbase_device_free(struct kbase_device *kbdev) -+{ -+ kfree(kbdev); -+} ++ list_for_each_entry(trace_buffer, &kbdev->csf.firmware_trace_buffers.list, node) { ++ if (!strcmp(trace_buffer->name, name)) ++ return trace_buffer; ++ } + -+void kbase_device_id_init(struct kbase_device *kbdev) -+{ -+ scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, -+ kbase_dev_nr); -+ kbdev->id = kbase_dev_nr; ++ return NULL; +} ++EXPORT_SYMBOL(kbase_csf_firmware_get_trace_buffer); + -+void kbase_increment_device_id(void) ++unsigned int kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count( ++ const struct firmware_trace_buffer *trace_buffer) +{ -+ kbase_dev_nr++; ++ return trace_buffer->trace_enable_entry_count; +} ++EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count); + -+int kbase_device_hwcnt_context_init(struct kbase_device *kbdev) ++static void kbasep_csf_firmware_trace_buffer_update_trace_enable_bit( ++ struct firmware_trace_buffer *tb, unsigned int bit, bool value) +{ -+ return kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface, -+ &kbdev->hwcnt_gpu_ctx); -+} ++ struct kbase_device *kbdev = tb->kbdev; + -+void kbase_device_hwcnt_context_term(struct kbase_device *kbdev) -+{ -+ kbase_hwcnt_context_term(kbdev->hwcnt_gpu_ctx); -+} ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+int kbase_device_hwcnt_virtualizer_init(struct kbase_device *kbdev) -+{ -+ return kbase_hwcnt_virtualizer_init(kbdev->hwcnt_gpu_ctx, -+ KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS, -+ &kbdev->hwcnt_gpu_virt); -+} ++ if (bit < tb->trace_enable_entry_count) { ++ unsigned int trace_enable_reg_offset = bit >> 5; ++ u32 trace_enable_bit_mask = 1u << (bit & 0x1F); + -+void kbase_device_hwcnt_virtualizer_term(struct kbase_device *kbdev) -+{ -+ kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); -+} ++ if (value) { ++ tb->trace_enable_init_mask[trace_enable_reg_offset] |= ++ trace_enable_bit_mask; ++ } else { ++ tb->trace_enable_init_mask[trace_enable_reg_offset] &= ++ ~trace_enable_bit_mask; ++ } + -+int kbase_device_timeline_init(struct kbase_device *kbdev) -+{ -+ atomic_set(&kbdev->timeline_flags, 0); -+ return kbase_timeline_init(&kbdev->timeline, &kbdev->timeline_flags); ++ /* This is not strictly needed as the caller is supposed to ++ * reload the firmware image (through GPU reset) after updating ++ * the bitmask. Otherwise there is no guarantee that firmware ++ * will take into account the updated bitmask for all types of ++ * trace buffers, since firmware could continue to use the ++ * value of bitmask it cached after the boot. ++ */ ++ kbase_csf_update_firmware_memory( ++ kbdev, ++ tb->gpu_va.trace_enable + trace_enable_reg_offset * 4, ++ tb->trace_enable_init_mask[trace_enable_reg_offset]); ++ } +} + -+void kbase_device_timeline_term(struct kbase_device *kbdev) ++int kbase_csf_firmware_trace_buffer_update_trace_enable_bit( ++ struct firmware_trace_buffer *tb, unsigned int bit, bool value) +{ -+ kbase_timeline_term(kbdev->timeline); -+} ++ struct kbase_device *kbdev = tb->kbdev; ++ int err = 0; ++ unsigned long flags; + -+int kbase_device_vinstr_init(struct kbase_device *kbdev) -+{ -+ return kbase_vinstr_init(kbdev->hwcnt_gpu_virt, &kbdev->vinstr_ctx); -+} ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+void kbase_device_vinstr_term(struct kbase_device *kbdev) -+{ -+ kbase_vinstr_term(kbdev->vinstr_ctx); -+} ++ /* If trace buffer update cannot be performed with ++ * FIRMWARE_CONFIG_UPDATE then we need to do a ++ * silent reset before we update the memory. ++ */ ++ if (!tb->updatable) { ++ /* If there is already a GPU reset pending then inform ++ * the User to retry the update. ++ */ ++ if (kbase_reset_gpu_silent(kbdev)) { ++ dev_warn( ++ kbdev->dev, ++ "GPU reset already in progress when enabling firmware timeline."); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return -EAGAIN; ++ } ++ } + -+int kbase_device_kinstr_prfcnt_init(struct kbase_device *kbdev) -+{ -+ return kbase_kinstr_prfcnt_init(kbdev->hwcnt_gpu_virt, -+ &kbdev->kinstr_prfcnt_ctx); -+} ++ kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(tb, bit, ++ value); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+void kbase_device_kinstr_prfcnt_term(struct kbase_device *kbdev) -+{ -+ kbase_kinstr_prfcnt_term(kbdev->kinstr_prfcnt_ctx); -+} ++ if (tb->updatable) ++ err = kbase_csf_trigger_firmware_config_update(kbdev); + -+int kbase_device_io_history_init(struct kbase_device *kbdev) -+{ -+ return kbase_io_history_init(&kbdev->io_history, -+ KBASEP_DEFAULT_REGISTER_HISTORY_SIZE); ++ return err; +} ++EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_update_trace_enable_bit); + -+void kbase_device_io_history_term(struct kbase_device *kbdev) ++bool kbase_csf_firmware_trace_buffer_is_empty( ++ const struct firmware_trace_buffer *trace_buffer) +{ -+ kbase_io_history_term(&kbdev->io_history); ++ return *(trace_buffer->cpu_va.insert_cpu_va) == ++ *(trace_buffer->cpu_va.extract_cpu_va); +} ++EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_is_empty); + -+int kbase_device_misc_register(struct kbase_device *kbdev) ++unsigned int kbase_csf_firmware_trace_buffer_read_data( ++ struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes) +{ -+ return misc_register(&kbdev->mdev); -+} ++ unsigned int bytes_copied; ++ u8 *data_cpu_va = trace_buffer->data_mapping.cpu_addr; ++ u32 extract_offset = *(trace_buffer->cpu_va.extract_cpu_va); ++ u32 insert_offset = *(trace_buffer->cpu_va.insert_cpu_va); ++ u32 buffer_size = trace_buffer->num_pages << PAGE_SHIFT; + -+void kbase_device_misc_deregister(struct kbase_device *kbdev) -+{ -+ misc_deregister(&kbdev->mdev); -+} ++ if (insert_offset >= extract_offset) { ++ bytes_copied = min_t(unsigned int, num_bytes, ++ (insert_offset - extract_offset)); ++ memcpy(data, &data_cpu_va[extract_offset], bytes_copied); ++ extract_offset += bytes_copied; ++ } else { ++ unsigned int bytes_copied_head, bytes_copied_tail; + -+int kbase_device_list_init(struct kbase_device *kbdev) -+{ -+ const struct list_head *dev_list; ++ bytes_copied_tail = min_t(unsigned int, num_bytes, ++ (buffer_size - extract_offset)); ++ memcpy(data, &data_cpu_va[extract_offset], bytes_copied_tail); + -+ dev_list = kbase_device_get_list(); -+ list_add(&kbdev->entry, &kbase_dev_list); -+ kbase_device_put_list(dev_list); ++ bytes_copied_head = min_t(unsigned int, ++ (num_bytes - bytes_copied_tail), insert_offset); ++ memcpy(&data[bytes_copied_tail], data_cpu_va, bytes_copied_head); + -+ return 0; -+} ++ bytes_copied = bytes_copied_head + bytes_copied_tail; ++ extract_offset += bytes_copied; ++ if (extract_offset >= buffer_size) ++ extract_offset = bytes_copied_head; ++ } + -+void kbase_device_list_term(struct kbase_device *kbdev) -+{ -+ const struct list_head *dev_list; ++ *(trace_buffer->cpu_va.extract_cpu_va) = extract_offset; + -+ dev_list = kbase_device_get_list(); -+ list_del(&kbdev->entry); -+ kbase_device_put_list(dev_list); ++ return bytes_copied; +} ++EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_read_data); + -+const struct list_head *kbase_device_get_list(void) ++static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, u64 mask) +{ -+ mutex_lock(&kbase_dev_list_lock); -+ return &kbase_dev_list; -+} -+KBASE_EXPORT_TEST_API(kbase_device_get_list); ++ unsigned int i; + -+void kbase_device_put_list(const struct list_head *dev_list) -+{ -+ mutex_unlock(&kbase_dev_list_lock); ++ for (i = 0; i < tb->trace_enable_entry_count; i++) ++ kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(tb, i, (mask >> i) & 1); +} -+KBASE_EXPORT_TEST_API(kbase_device_put_list); + -+int kbase_device_early_init(struct kbase_device *kbdev) ++#define U32_BITS 32 ++u64 kbase_csf_firmware_trace_buffer_get_active_mask64(struct firmware_trace_buffer *tb) +{ -+ int err; -+ -+ err = kbase_ktrace_init(kbdev); -+ if (err) -+ return err; -+ -+ -+ err = kbasep_platform_device_init(kbdev); -+ if (err) -+ goto ktrace_term; -+ -+ err = kbase_pm_runtime_init(kbdev); -+ if (err) -+ goto fail_runtime_pm; -+ -+ /* This spinlock is initialized before doing the first access to GPU -+ * registers and installing interrupt handlers. -+ */ -+ spin_lock_init(&kbdev->hwaccess_lock); -+ -+ /* Ensure we can access the GPU registers */ -+ kbase_pm_register_access_enable(kbdev); -+ -+ /* -+ * Find out GPU properties based on the GPU feature registers. -+ * Note that this does not populate the few properties that depend on -+ * hw_features being initialized. Those are set by kbase_gpuprops_set_features -+ * soon after this in the init process. -+ */ -+ kbase_gpuprops_set(kbdev); -+ -+ /* We're done accessing the GPU registers for now. */ -+ kbase_pm_register_access_disable(kbdev); -+ -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ if (kbdev->arb.arb_if) -+ err = kbase_arbiter_pm_install_interrupts(kbdev); -+ else -+ err = kbase_install_interrupts(kbdev); -+#else -+ err = kbase_install_interrupts(kbdev); -+#endif -+ if (err) -+ goto fail_interrupts; -+ -+ return 0; ++ u64 active_mask = tb->trace_enable_init_mask[0]; + -+fail_interrupts: -+ kbase_pm_runtime_term(kbdev); -+fail_runtime_pm: -+ kbasep_platform_device_term(kbdev); -+ktrace_term: -+ kbase_ktrace_term(kbdev); ++ if (tb->trace_enable_entry_count > U32_BITS) ++ active_mask |= (u64)tb->trace_enable_init_mask[1] << U32_BITS; + -+ return err; ++ return active_mask; +} + -+void kbase_device_early_term(struct kbase_device *kbdev) ++int kbase_csf_firmware_trace_buffer_set_active_mask64(struct firmware_trace_buffer *tb, u64 mask) +{ -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ if (kbdev->arb.arb_if) -+ kbase_arbiter_pm_release_interrupts(kbdev); -+ else -+ kbase_release_interrupts(kbdev); -+#else -+ kbase_release_interrupts(kbdev); -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ -+ kbase_pm_runtime_term(kbdev); -+ kbasep_platform_device_term(kbdev); -+ kbase_ktrace_term(kbdev); -+} ++ struct kbase_device *kbdev = tb->kbdev; ++ unsigned long flags; ++ int err = 0; + -+int kbase_device_late_init(struct kbase_device *kbdev) -+{ -+ int err; ++ if (!tb->updatable) { ++ /* If there is already a GPU reset pending, need a retry */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (kbase_reset_gpu_silent(kbdev)) ++ err = -EAGAIN; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } + -+ err = kbasep_platform_device_late_init(kbdev); ++ if (!err) { ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ update_trace_buffer_active_mask64(tb, mask); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ return err; -+} ++ /* if we can update the config we need to just trigger ++ * FIRMWARE_CONFIG_UPDATE. ++ */ ++ if (tb->updatable) ++ err = kbase_csf_trigger_firmware_config_update(kbdev); ++ } + -+void kbase_device_late_term(struct kbase_device *kbdev) -+{ -+ kbasep_platform_device_late_term(kbdev); ++ return err; +} -diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.h b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h new file mode 100644 -index 000000000..f02501100 +index 000000000..0389d093a --- /dev/null -+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h -@@ -0,0 +1,227 @@ ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h +@@ -0,0 +1,187 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -180015,222 +182213,182 @@ index 000000000..f02501100 + * + */ + -+#include ++#ifndef _KBASE_CSF_TRACE_BUFFER_H_ ++#define _KBASE_CSF_TRACE_BUFFER_H_ + -+/** -+ * kbase_device_get_list - get device list. -+ * Get access to device list. -+ * -+ * Return: Pointer to the linked list head. -+ */ -+const struct list_head *kbase_device_get_list(void); ++#include ++ ++#define CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX (4) ++#define FIRMWARE_LOG_BUF_NAME "fwlog" ++ ++/* Forward declarations */ ++struct firmware_trace_buffer; ++struct kbase_device; + +/** -+ * kbase_device_put_list - put device list. ++ * kbase_csf_firmware_trace_buffers_init - Initialize trace buffers + * -+ * @dev_list: head of linked list containing device list. ++ * @kbdev: Device pointer + * -+ * Put access to the device list. -+ */ -+void kbase_device_put_list(const struct list_head *dev_list); -+ -+/** -+ * kbase_increment_device_id - increment device id. ++ * Allocate resources for trace buffers. In particular: ++ * - One memory page of GPU-readable, CPU-writable memory is used for ++ * the Extract variables of all trace buffers. ++ * - One memory page of GPU-writable, CPU-readable memory is used for ++ * the Insert variables of all trace buffers. ++ * - A data buffer of GPU-writable, CPU-readable memory is allocated ++ * for each trace buffer. + * -+ * Used to increment device id on successful initialization of the device. ++ * After that, firmware addresses are written with pointers to the ++ * insert, extract and data buffer variables. The size and the trace ++ * enable bits are not dereferenced by the GPU and shall be written ++ * in the firmware addresses directly. ++ * ++ * This function relies on the assumption that the list of ++ * firmware_trace_buffer elements in the device has already been ++ * populated with data from the firmware image parsing. ++ * ++ * Return: 0 if success, or an error code on failure. + */ -+void kbase_increment_device_id(void); ++int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev); + +/** -+ * kbase_device_firmware_init_once - Initialize firmware and HWC -+ * -+ * @kbdev: An instance of the GPU platform device, allocated from the probe -+ * method of the driver. -+ * -+ * When a device file is opened for the first time, -+ * load firmware and initialize hardware counter components. ++ * kbase_csf_firmware_trace_buffers_term - Terminate trace buffers + * -+ * Return: 0 on success. An error code on failure. ++ * @kbdev: Device pointer + */ -+int kbase_device_firmware_init_once(struct kbase_device *kbdev); ++void kbase_csf_firmware_trace_buffers_term(struct kbase_device *kbdev); + +/** -+ * kbase_device_init - Device initialisation. ++ * kbase_csf_firmware_parse_trace_buffer_entry - Process a "trace buffer" section + * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @kbdev: Kbase device structure ++ * @entry: Pointer to the section ++ * @size: Size (in bytes) of the section ++ * @updatable: Indicates whether config items can be updated with FIRMWARE_CONFIG_UPDATE + * -+ * This is called from device probe to initialise various other -+ * components needed. ++ * Read a "trace buffer" section adding metadata for the related trace buffer ++ * to the kbase_device:csf.firmware_trace_buffers list. + * -+ * Return: 0 on success and non-zero value on failure. ++ * Unexpected trace buffers will not be parsed and, as a consequence, ++ * will not be initialized. ++ * ++ * Return: 0 if successful, negative error code on failure. + */ -+int kbase_device_init(struct kbase_device *kbdev); ++int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev, ++ const u32 *entry, ++ unsigned int size, ++ bool updatable); + +/** -+ * kbase_device_term - Device termination. ++ * kbase_csf_firmware_reload_trace_buffers_data - Reload trace buffers data for firmware reboot + * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @kbdev: Device pointer + * -+ * This is called from device remove to terminate various components that -+ * were initialised during kbase_device_init. -+ */ -+void kbase_device_term(struct kbase_device *kbdev); -+ -+/** -+ * kbase_reg_write - write to GPU register -+ * @kbdev: Kbase device pointer -+ * @offset: Offset of register -+ * @value: Value to write ++ * Helper function used when rebooting the firmware to reload the initial setup ++ * for all the trace buffers which have been previously parsed and initialized. + * -+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). ++ * Almost all of the operations done in the initialization process are ++ * replicated, with the difference that they might be done in a different order ++ * and that the variables of a given trace buffer may be mapped to different ++ * offsets within the same existing mappings. ++ * ++ * In other words, the re-initialization done by this function will be ++ * equivalent but not necessarily identical to the original initialization. + */ -+void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value); ++void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev); + +/** -+ * kbase_reg_read - read from GPU register -+ * @kbdev: Kbase device pointer -+ * @offset: Offset of register ++ * kbase_csf_firmware_get_trace_buffer - Get a trace buffer + * -+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). ++ * @kbdev: Device pointer ++ * @name: Name of the trace buffer to find + * -+ * Return: Value in desired register ++ * Return: handle to a trace buffer, given the name, or NULL if a trace buffer ++ * with that name couldn't be found. + */ -+u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset); ++struct firmware_trace_buffer *kbase_csf_firmware_get_trace_buffer( ++ struct kbase_device *kbdev, const char *name); + +/** -+ * kbase_is_gpu_removed() - Has the GPU been removed. -+ * @kbdev: Kbase device pointer ++ * kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count - Get number of trace enable bits for a trace buffer + * -+ * When Kbase takes too long to give up the GPU, the Arbiter -+ * can remove it. This will then be followed by a GPU lost event. -+ * This function will return true if the GPU has been removed. -+ * When this happens register reads will be zero. A zero GPU_ID is -+ * invalid so this is used to detect when GPU is removed. ++ * @trace_buffer: Trace buffer handle + * -+ * Return: True if GPU removed ++ * Return: Number of trace enable bits in a trace buffer. + */ -+bool kbase_is_gpu_removed(struct kbase_device *kbdev); ++unsigned int kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count( ++ const struct firmware_trace_buffer *trace_buffer); + +/** -+ * kbase_gpu_cache_flush_pa_range_and_busy_wait() - Start a cache physical range flush -+ * and busy wait ++ * kbase_csf_firmware_trace_buffer_update_trace_enable_bit - Update a trace enable bit + * -+ * @kbdev: kbase device to issue the MMU operation on. -+ * @phys: Starting address of the physical range to start the operation on. -+ * @nr_bytes: Number of bytes to work on. -+ * @flush_op: Flush command register value to be sent to HW ++ * @trace_buffer: Trace buffer handle ++ * @bit: Bit to update ++ * @value: New value for the given bit + * -+ * Issue a cache flush physical range command, then busy wait an irq status. -+ * This function will clear FLUSH_PA_RANGE_COMPLETED irq mask bit -+ * and busy-wait the rawstat register. ++ * Update the value of a given trace enable bit. + * -+ * Return: 0 if successful or a negative error code on failure. ++ * Return: 0 if successful, negative error code on failure. + */ -+#if MALI_USE_CSF -+int kbase_gpu_cache_flush_pa_range_and_busy_wait(struct kbase_device *kbdev, phys_addr_t phys, -+ size_t nr_bytes, u32 flush_op); -+#endif /* MALI_USE_CSF */ ++int kbase_csf_firmware_trace_buffer_update_trace_enable_bit( ++ struct firmware_trace_buffer *trace_buffer, unsigned int bit, ++ bool value); + +/** -+ * kbase_gpu_cache_flush_and_busy_wait - Start a cache flush and busy wait -+ * @kbdev: Kbase device -+ * @flush_op: Flush command register value to be sent to HW ++ * kbase_csf_firmware_trace_buffer_is_empty - Empty trace buffer predicate + * -+ * Issue a cache flush command to hardware, then busy wait an irq status. -+ * This function will clear CLEAN_CACHES_COMPLETED irq mask bit set by other -+ * threads through kbase_gpu_start_cache_clean(), and wake them up manually -+ * after the busy-wait is done. Any pended cache flush commands raised by -+ * other thread are handled in this function. -+ * hwaccess_lock must be held by the caller. ++ * @trace_buffer: Trace buffer handle + * -+ * Return: 0 if successful or a negative error code on failure. ++ * Return: True if the trace buffer is empty, or false otherwise. + */ -+int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, -+ u32 flush_op); ++bool kbase_csf_firmware_trace_buffer_is_empty( ++ const struct firmware_trace_buffer *trace_buffer); + +/** -+ * kbase_gpu_start_cache_clean - Start a cache clean -+ * @kbdev: Kbase device -+ * @flush_op: Flush command register value to be sent to HW ++ * kbase_csf_firmware_trace_buffer_read_data - Read data from a trace buffer + * -+ * Issue a given cache flush command to hardware. -+ * This function will take hwaccess_lock. -+ */ -+void kbase_gpu_start_cache_clean(struct kbase_device *kbdev, u32 flush_op); -+ -+/** -+ * kbase_gpu_start_cache_clean_nolock - Start a cache clean -+ * @kbdev: Kbase device -+ * @flush_op: Flush command register value to be sent to HW ++ * @trace_buffer: Trace buffer handle ++ * @data: Pointer to a client-allocated where data shall be written. ++ * @num_bytes: Maximum number of bytes to read from the trace buffer. + * -+ * Issue a given cache flush command to hardware. -+ * hwaccess_lock must be held by the caller. -+ */ -+void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev, -+ u32 flush_op); -+ -+/** -+ * kbase_gpu_wait_cache_clean - Wait for cache cleaning to finish -+ * @kbdev: Kbase device ++ * Read available data from a trace buffer. The client provides a data buffer ++ * of a given size and the maximum number of bytes to read. + * -+ * This function will take hwaccess_lock, and may sleep. ++ * Return: Number of bytes read from the trace buffer. + */ -+void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev); ++unsigned int kbase_csf_firmware_trace_buffer_read_data( ++ struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes); + +/** -+ * kbase_gpu_wait_cache_clean_timeout - Wait for certain time for cache -+ * cleaning to finish -+ * @kbdev: Kbase device -+ * @wait_timeout_ms: Time in milliseconds, to wait for cache clean to complete. ++ * kbase_csf_firmware_trace_buffer_get_active_mask64 - Get trace buffer active mask + * -+ * This function will take hwaccess_lock, and may sleep. This is supposed to be -+ * called from paths (like GPU reset) where an indefinite wait for the -+ * completion of cache clean operation can cause deadlock, as the operation may -+ * never complete. ++ * @tb: Trace buffer handle + * -+ * Return: 0 if successful or a negative error code on failure. ++ * Return: Trace buffer active mask. + */ -+int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, -+ unsigned int wait_timeout_ms); ++u64 kbase_csf_firmware_trace_buffer_get_active_mask64(struct firmware_trace_buffer *tb); + +/** -+ * kbase_gpu_cache_clean_wait_complete - Called after the cache cleaning is -+ * finished. Would also be called after -+ * the GPU reset. -+ * @kbdev: Kbase device ++ * kbase_csf_firmware_trace_buffer_set_active_mask64 - Set trace buffer active mask + * -+ * Caller must hold the hwaccess_lock. -+ */ -+void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev); -+ -+/** -+ * kbase_clean_caches_done - Issue previously queued cache clean request or -+ * wake up the requester that issued cache clean. -+ * @kbdev: Kbase device ++ * @tb: Trace buffer handle ++ * @mask: New active mask + * -+ * Caller must hold the hwaccess_lock. ++ * Return: 0 if successful, negative error code on failure. + */ -+void kbase_clean_caches_done(struct kbase_device *kbdev); ++int kbase_csf_firmware_trace_buffer_set_active_mask64(struct firmware_trace_buffer *tb, u64 mask); + -+/** -+ * kbase_gpu_interrupt - GPU interrupt handler -+ * @kbdev: Kbase device pointer -+ * @val: The value of the GPU IRQ status register which triggered the call -+ * -+ * This function is called from the interrupt handler when a GPU irq is to be -+ * handled. -+ */ -+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val); -diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c ++#endif /* _KBASE_CSF_TRACE_BUFFER_H_ */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.c new file mode 100644 -index 000000000..d55495045 +index 000000000..185779c16 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c -@@ -0,0 +1,291 @@ ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.c +@@ -0,0 +1,271 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -180249,285 +182407,265 @@ index 000000000..d55495045 + */ + +#include -+#include -+#include -+#include -+#include -+#include -+#include + -+#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+bool kbase_is_gpu_removed(struct kbase_device *kbdev) ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ ++/** ++ * kbasep_fault_occurred - Check if fault occurred. ++ * ++ * @kbdev: Device pointer ++ * ++ * Return: true if a fault occurred. ++ */ ++static bool kbasep_fault_occurred(struct kbase_device *kbdev) +{ -+ u32 val; ++ unsigned long flags; ++ bool ret; + -+ val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); ++ spin_lock_irqsave(&kbdev->csf.dof.lock, flags); ++ ret = (kbdev->csf.dof.error_code != DF_NO_ERROR); ++ spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags); + -+ return val == 0; ++ return ret; +} -+#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ + -+static int busy_wait_on_irq(struct kbase_device *kbdev, u32 irq_bit) ++void kbase_debug_csf_fault_wait_completion(struct kbase_device *kbdev) +{ -+ char *irq_flag_name; -+ /* Previously MMU-AS command was used for L2 cache flush on page-table update. -+ * And we're using the same max-loops count for GPU command, because amount of -+ * L2 cache flush overhead are same between them. -+ */ -+ unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; -+ -+ /* Wait for the GPU cache clean operation to complete */ -+ while (--max_loops && -+ !(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & irq_bit)) { -+ ; ++ if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev))) { ++ dev_dbg(kbdev->dev, "No userspace client for dumping exists"); ++ return; + } + -+ /* reset gpu if time-out occurred */ -+ if (max_loops == 0) { -+ switch (irq_bit) { -+ case CLEAN_CACHES_COMPLETED: -+ irq_flag_name = "CLEAN_CACHES_COMPLETED"; -+ break; -+ case FLUSH_PA_RANGE_COMPLETED: -+ irq_flag_name = "FLUSH_PA_RANGE_COMPLETED"; -+ break; -+ default: -+ irq_flag_name = "UNKNOWN"; -+ break; -+ } ++ wait_event(kbdev->csf.dof.dump_wait_wq, kbase_debug_csf_fault_dump_complete(kbdev)); ++} ++KBASE_EXPORT_TEST_API(kbase_debug_csf_fault_wait_completion); + -+ dev_err(kbdev->dev, -+ "Stuck waiting on %s bit, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n", -+ irq_flag_name); -+ -+ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) -+ kbase_reset_gpu_locked(kbdev); -+ return -EBUSY; -+ } -+ -+ /* Clear the interrupt bit. */ -+ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, irq_bit); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), irq_bit); -+ -+ return 0; ++/** ++ * kbase_debug_csf_fault_wakeup - Wake up a waiting user space client. ++ * ++ * @kbdev: Kbase device ++ */ ++static void kbase_debug_csf_fault_wakeup(struct kbase_device *kbdev) ++{ ++ wake_up_interruptible(&kbdev->csf.dof.fault_wait_wq); +} + -+#if MALI_USE_CSF -+#define U64_LO_MASK ((1ULL << 32) - 1) -+#define U64_HI_MASK (~U64_LO_MASK) -+ -+int kbase_gpu_cache_flush_pa_range_and_busy_wait(struct kbase_device *kbdev, phys_addr_t phys, -+ size_t nr_bytes, u32 flush_op) ++bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev, ++ struct kbase_context *kctx, enum dumpfault_error_type error) +{ -+ u64 start_pa, end_pa; -+ int ret = 0; ++ unsigned long flags; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev))) ++ return false; + -+ /* 1. Clear the interrupt FLUSH_PA_RANGE_COMPLETED bit. */ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), FLUSH_PA_RANGE_COMPLETED); ++ if (WARN_ON(error == DF_NO_ERROR)) ++ return false; + -+ /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_PA_RANGE operation. */ -+ start_pa = phys; -+ end_pa = start_pa + nr_bytes - 1; ++ if (kctx && kbase_ctx_flag(kctx, KCTX_DYING)) { ++ dev_info(kbdev->dev, "kctx %d_%d is dying when error %d is reported", ++ kctx->tgid, kctx->id, error); ++ kctx = NULL; ++ } + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG0_LO), start_pa & U64_LO_MASK); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG0_HI), -+ (start_pa & U64_HI_MASK) >> 32); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG1_LO), end_pa & U64_LO_MASK); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG1_HI), (end_pa & U64_HI_MASK) >> 32); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op); ++ spin_lock_irqsave(&kbdev->csf.dof.lock, flags); + -+ /* 3. Busy-wait irq status to be enabled. */ -+ ret = busy_wait_on_irq(kbdev, (u32)FLUSH_PA_RANGE_COMPLETED); ++ /* Only one fault at a time can be processed */ ++ if (kbdev->csf.dof.error_code) { ++ dev_info(kbdev->dev, "skip this fault as there's a pending fault"); ++ goto unlock; ++ } + -+ return ret; ++ kbdev->csf.dof.kctx_tgid = kctx ? kctx->tgid : 0; ++ kbdev->csf.dof.kctx_id = kctx ? kctx->id : 0; ++ kbdev->csf.dof.error_code = error; ++ kbase_debug_csf_fault_wakeup(kbdev); ++ ++unlock: ++ spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags); ++ return true; +} -+#endif /* MALI_USE_CSF */ + -+int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, -+ u32 flush_op) ++static ssize_t debug_csf_fault_read(struct file *file, char __user *buffer, size_t size, ++ loff_t *f_pos) +{ -+ int need_to_wake_up = 0; -+ int ret = 0; -+ -+ /* hwaccess_lock must be held to avoid any sync issue with -+ * kbase_gpu_start_cache_clean() / kbase_clean_caches_done() -+ */ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ /* 1. Check if kbdev->cache_clean_in_progress is set. -+ * If it is set, it means there are threads waiting for -+ * CLEAN_CACHES_COMPLETED irq to be raised and that the -+ * corresponding irq mask bit is set. -+ * We'll clear the irq mask bit and busy-wait for the cache -+ * clean operation to complete before submitting the cache -+ * clean command required after the GPU page table update. -+ * Pended flush commands will be merged to requested command. -+ */ -+ if (kbdev->cache_clean_in_progress) { -+ /* disable irq first */ -+ u32 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), -+ irq_mask & ~CLEAN_CACHES_COMPLETED); ++#define BUF_SIZE 64 ++ struct kbase_device *kbdev; ++ unsigned long flags; ++ int count; ++ char buf[BUF_SIZE]; ++ u32 tgid, ctx_id; ++ enum dumpfault_error_type error_code; + -+ /* busy wait irq status to be enabled */ -+ ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED); -+ if (ret) -+ return ret; ++ if (unlikely(!file)) { ++ pr_warn("%s: file is NULL", __func__); ++ return -EINVAL; ++ } + -+ /* merge pended command if there's any */ -+ flush_op = GPU_COMMAND_FLUSH_CACHE_MERGE( -+ kbdev->cache_clean_queued, flush_op); ++ kbdev = file->private_data; ++ if (unlikely(!buffer)) { ++ dev_warn(kbdev->dev, "%s: buffer is NULL", __func__); ++ return -EINVAL; ++ } + -+ /* enable wake up notify flag */ -+ need_to_wake_up = 1; -+ } else { -+ /* Clear the interrupt CLEAN_CACHES_COMPLETED bit. */ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), -+ CLEAN_CACHES_COMPLETED); ++ if (unlikely(*f_pos < 0)) { ++ dev_warn(kbdev->dev, "%s: f_pos is negative", __func__); ++ return -EINVAL; + } + -+ /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHE operation. */ -+ KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, flush_op); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op); ++ if (size < sizeof(buf)) { ++ dev_warn(kbdev->dev, "%s: buffer is too small", __func__); ++ return -EINVAL; ++ } + -+ /* 3. Busy-wait irq status to be enabled. */ -+ ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED); -+ if (ret) -+ return ret; ++ if (wait_event_interruptible(kbdev->csf.dof.fault_wait_wq, kbasep_fault_occurred(kbdev))) ++ return -ERESTARTSYS; + -+ /* 4. Wake-up blocked threads when there is any. */ -+ if (need_to_wake_up) -+ kbase_gpu_cache_clean_wait_complete(kbdev); ++ spin_lock_irqsave(&kbdev->csf.dof.lock, flags); ++ tgid = kbdev->csf.dof.kctx_tgid; ++ ctx_id = kbdev->csf.dof.kctx_id; ++ error_code = kbdev->csf.dof.error_code; ++ BUILD_BUG_ON(sizeof(buf) < (sizeof(tgid) + sizeof(ctx_id) + sizeof(error_code))); ++ count = scnprintf(buf, sizeof(buf), "%u_%u_%u\n", tgid, ctx_id, error_code); ++ spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags); + -+ return ret; ++ dev_info(kbdev->dev, "debug csf fault info read"); ++ return simple_read_from_buffer(buffer, size, f_pos, buf, count); +} + -+void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev, -+ u32 flush_op) ++static int debug_csf_fault_open(struct inode *in, struct file *file) +{ -+ u32 irq_mask; ++ struct kbase_device *kbdev; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (unlikely(!in)) { ++ pr_warn("%s: inode is NULL", __func__); ++ return -EINVAL; ++ } + -+ if (kbdev->cache_clean_in_progress) { -+ /* If this is called while another clean is in progress, we -+ * can't rely on the current one to flush any new changes in -+ * the cache. Instead, accumulate all cache clean operations -+ * and trigger that immediately after this one finishes. -+ */ -+ kbdev->cache_clean_queued = GPU_COMMAND_FLUSH_CACHE_MERGE( -+ kbdev->cache_clean_queued, flush_op); -+ return; ++ kbdev = in->i_private; ++ if (unlikely(!file)) { ++ dev_warn(kbdev->dev, "%s: file is NULL", __func__); ++ return -EINVAL; + } + -+ /* Enable interrupt */ -+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), -+ irq_mask | CLEAN_CACHES_COMPLETED); ++ if (atomic_cmpxchg(&kbdev->csf.dof.enabled, 0, 1) == 1) { ++ dev_warn(kbdev->dev, "Only one client is allowed for dump on fault"); ++ return -EBUSY; ++ } + -+ KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, flush_op); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op); ++ dev_info(kbdev->dev, "debug csf fault file open"); + -+ kbdev->cache_clean_in_progress = true; ++ return simple_open(in, file); +} + -+void kbase_gpu_start_cache_clean(struct kbase_device *kbdev, u32 flush_op) ++static ssize_t debug_csf_fault_write(struct file *file, const char __user *ubuf, size_t count, ++ loff_t *ppos) +{ ++ struct kbase_device *kbdev; + unsigned long flags; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_gpu_start_cache_clean_nolock(kbdev, flush_op); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} ++ if (unlikely(!file)) { ++ pr_warn("%s: file is NULL", __func__); ++ return -EINVAL; ++ } + -+void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ kbdev = file->private_data; ++ spin_lock_irqsave(&kbdev->csf.dof.lock, flags); ++ kbdev->csf.dof.error_code = DF_NO_ERROR; ++ kbdev->csf.dof.kctx_tgid = 0; ++ kbdev->csf.dof.kctx_id = 0; ++ dev_info(kbdev->dev, "debug csf fault dump complete"); ++ spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags); + -+ kbdev->cache_clean_queued = 0; -+ kbdev->cache_clean_in_progress = false; -+ wake_up(&kbdev->cache_clean_wait); ++ /* User space finished the dump. ++ * Wake up blocked kernel threads to proceed. ++ */ ++ wake_up(&kbdev->csf.dof.dump_wait_wq); ++ ++ return count; +} + -+void kbase_clean_caches_done(struct kbase_device *kbdev) ++static int debug_csf_fault_release(struct inode *in, struct file *file) +{ -+ u32 irq_mask; ++ struct kbase_device *kbdev; + unsigned long flags; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ -+ if (kbdev->cache_clean_in_progress) { -+ /* Clear the interrupt CLEAN_CACHES_COMPLETED bit if set. -+ * It might have already been done by kbase_gpu_cache_flush_and_busy_wait. -+ */ -+ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, CLEAN_CACHES_COMPLETED); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), CLEAN_CACHES_COMPLETED); -+ -+ if (kbdev->cache_clean_queued) { -+ u32 pended_flush_op = kbdev->cache_clean_queued; ++ if (unlikely(!in)) { ++ pr_warn("%s: inode is NULL", __func__); ++ return -EINVAL; ++ } + -+ kbdev->cache_clean_queued = 0; ++ kbdev = in->i_private; ++ spin_lock_irqsave(&kbdev->csf.dof.lock, flags); ++ kbdev->csf.dof.kctx_tgid = 0; ++ kbdev->csf.dof.kctx_id = 0; ++ kbdev->csf.dof.error_code = DF_NO_ERROR; ++ spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags); + -+ KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, pended_flush_op); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), pended_flush_op); -+ } else { -+ /* Disable interrupt */ -+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), -+ irq_mask & ~CLEAN_CACHES_COMPLETED); ++ atomic_set(&kbdev->csf.dof.enabled, 0); ++ dev_info(kbdev->dev, "debug csf fault file close"); + -+ kbase_gpu_cache_clean_wait_complete(kbdev); -+ } -+ } ++ /* User space closed the debugfs file. ++ * Wake up blocked kernel threads to resume. ++ */ ++ wake_up(&kbdev->csf.dof.dump_wait_wq); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return 0; +} + -+static inline bool get_cache_clean_flag(struct kbase_device *kbdev) ++static const struct file_operations kbasep_debug_csf_fault_fops = { ++ .owner = THIS_MODULE, ++ .open = debug_csf_fault_open, ++ .read = debug_csf_fault_read, ++ .write = debug_csf_fault_write, ++ .llseek = default_llseek, ++ .release = debug_csf_fault_release, ++}; ++ ++void kbase_debug_csf_fault_debugfs_init(struct kbase_device *kbdev) +{ -+ bool cache_clean_in_progress; -+ unsigned long flags; ++ const char *fname = "csf_fault"; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ cache_clean_in_progress = kbdev->cache_clean_in_progress; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (unlikely(!kbdev)) { ++ pr_warn("%s: kbdev is NULL", __func__); ++ return; ++ } + -+ return cache_clean_in_progress; ++ debugfs_create_file(fname, 0600, kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_debug_csf_fault_fops); +} + -+void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev) ++int kbase_debug_csf_fault_init(struct kbase_device *kbdev) +{ -+ while (get_cache_clean_flag(kbdev)) { -+ wait_event_interruptible(kbdev->cache_clean_wait, -+ !kbdev->cache_clean_in_progress); ++ if (unlikely(!kbdev)) { ++ pr_warn("%s: kbdev is NULL", __func__); ++ return -EINVAL; + } -+} + -+int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, -+ unsigned int wait_timeout_ms) -+{ -+ long remaining = msecs_to_jiffies(wait_timeout_ms); ++ init_waitqueue_head(&(kbdev->csf.dof.fault_wait_wq)); ++ init_waitqueue_head(&(kbdev->csf.dof.dump_wait_wq)); ++ spin_lock_init(&kbdev->csf.dof.lock); ++ kbdev->csf.dof.kctx_tgid = 0; ++ kbdev->csf.dof.kctx_id = 0; ++ kbdev->csf.dof.error_code = DF_NO_ERROR; ++ atomic_set(&kbdev->csf.dof.enabled, 0); + -+ while (remaining && get_cache_clean_flag(kbdev)) { -+ remaining = wait_event_timeout(kbdev->cache_clean_wait, -+ !kbdev->cache_clean_in_progress, -+ remaining); -+ } ++ return 0; ++} + -+ return (remaining ? 0 : -ETIMEDOUT); ++void kbase_debug_csf_fault_term(struct kbase_device *kbdev) ++{ +} -diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h b/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h ++#endif /* CONFIG_DEBUG_FS */ +diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.h new file mode 100644 -index 000000000..6f77592df +index 000000000..6e9b1a9d5 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h -@@ -0,0 +1,101 @@ ++++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.h +@@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -180545,95 +182683,131 @@ index 000000000..6f77592df + * + */ + -+#include -+ -+typedef int kbase_device_init_method(struct kbase_device *kbdev); -+typedef void kbase_device_term_method(struct kbase_device *kbdev); ++#ifndef _KBASE_DEBUG_CSF_FAULT_H ++#define _KBASE_DEBUG_CSF_FAULT_H + ++#if IS_ENABLED(CONFIG_DEBUG_FS) +/** -+ * struct kbase_device_init - Device init/term methods. -+ * @init: Function pointer to a initialise method. -+ * @term: Function pointer to a terminate method. -+ * @err_mes: Error message to be printed when init method fails. ++ * kbase_debug_csf_fault_debugfs_init - Initialize CSF fault debugfs ++ * @kbdev: Device pointer + */ -+struct kbase_device_init { -+ kbase_device_init_method *init; -+ kbase_device_term_method *term; -+ char *err_mes; -+}; -+ -+int kbase_device_vinstr_init(struct kbase_device *kbdev); -+void kbase_device_vinstr_term(struct kbase_device *kbdev); -+ -+int kbase_device_kinstr_prfcnt_init(struct kbase_device *kbdev); -+void kbase_device_kinstr_prfcnt_term(struct kbase_device *kbdev); -+ -+int kbase_device_timeline_init(struct kbase_device *kbdev); -+void kbase_device_timeline_term(struct kbase_device *kbdev); -+ -+int kbase_device_hwcnt_context_init(struct kbase_device *kbdev); -+void kbase_device_hwcnt_context_term(struct kbase_device *kbdev); -+ -+int kbase_device_hwcnt_virtualizer_init(struct kbase_device *kbdev); -+void kbase_device_hwcnt_virtualizer_term(struct kbase_device *kbdev); -+ -+int kbase_device_list_init(struct kbase_device *kbdev); -+void kbase_device_list_term(struct kbase_device *kbdev); -+ -+int kbase_device_io_history_init(struct kbase_device *kbdev); -+void kbase_device_io_history_term(struct kbase_device *kbdev); -+ -+int kbase_device_misc_register(struct kbase_device *kbdev); -+void kbase_device_misc_deregister(struct kbase_device *kbdev); -+ -+void kbase_device_id_init(struct kbase_device *kbdev); ++void kbase_debug_csf_fault_debugfs_init(struct kbase_device *kbdev); + +/** -+ * kbase_device_early_init - Perform any device-specific initialization. -+ * @kbdev: Device pointer ++ * kbase_debug_csf_fault_init - Create the fault event wait queue per device ++ * and initialize the required resources. ++ * @kbdev: Device pointer + * -+ * Return: 0 on success, or an error code on failure. ++ * Return: Zero on success or a negative error code. + */ -+int kbase_device_early_init(struct kbase_device *kbdev); ++int kbase_debug_csf_fault_init(struct kbase_device *kbdev); + +/** -+ * kbase_device_early_term - Perform any device-specific termination. -+ * @kbdev: Device pointer ++ * kbase_debug_csf_fault_term - Clean up resources created by ++ * @kbase_debug_csf_fault_init. ++ * @kbdev: Device pointer + */ -+void kbase_device_early_term(struct kbase_device *kbdev); ++void kbase_debug_csf_fault_term(struct kbase_device *kbdev); + +/** -+ * kbase_device_late_init - Complete any device-specific initialization. -+ * @kbdev: Device pointer ++ * kbase_debug_csf_fault_wait_completion - Wait for the client to complete. + * -+ * Return: 0 on success, or an error code on failure. ++ * @kbdev: Device Pointer ++ * ++ * Wait for the user space client to finish reading the fault information. ++ * This function must be called in thread context. + */ -+int kbase_device_late_init(struct kbase_device *kbdev); ++void kbase_debug_csf_fault_wait_completion(struct kbase_device *kbdev); + +/** -+ * kbase_device_late_term - Complete any device-specific termination. -+ * @kbdev: Device pointer ++ * kbase_debug_csf_fault_notify - Notify client of a fault. ++ * ++ * @kbdev: Device pointer ++ * @kctx: Faulty context (can be NULL) ++ * @error: Error code. ++ * ++ * Store fault information and wake up the user space client. ++ * ++ * Return: true if a dump on fault was initiated or was is in progress and ++ * so caller can opt to wait for the dumping to complete. + */ -+void kbase_device_late_term(struct kbase_device *kbdev); ++bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev, ++ struct kbase_context *kctx, enum dumpfault_error_type error); + -+#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) +/** -+ * kbase_is_register_accessible - Checks if register is accessible -+ * @offset: Register offset ++ * kbase_debug_csf_fault_dump_enabled - Check if dump on fault is enabled. + * -+ * Return: true if the register is accessible, false otherwise. ++ * @kbdev: Device pointer ++ * ++ * Return: true if debugfs file is opened so dump on fault is enabled. + */ -+bool kbase_is_register_accessible(u32 offset); -+#endif /* MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ -diff --git a/drivers/gpu/arm/bifrost/gpu/Kbuild b/drivers/gpu/arm/bifrost/gpu/Kbuild ++static inline bool kbase_debug_csf_fault_dump_enabled(struct kbase_device *kbdev) ++{ ++ return atomic_read(&kbdev->csf.dof.enabled); ++} ++ ++/** ++ * kbase_debug_csf_fault_dump_complete - Check if dump on fault is completed. ++ * ++ * @kbdev: Device pointer ++ * ++ * Return: true if dump on fault completes or file is closed. ++ */ ++static inline bool kbase_debug_csf_fault_dump_complete(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ bool ret; ++ ++ if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev))) ++ return true; ++ ++ spin_lock_irqsave(&kbdev->csf.dof.lock, flags); ++ ret = (kbdev->csf.dof.error_code == DF_NO_ERROR); ++ spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags); ++ ++ return ret; ++} ++#else /* CONFIG_DEBUG_FS */ ++static inline int kbase_debug_csf_fault_init(struct kbase_device *kbdev) ++{ ++ return 0; ++} ++ ++static inline void kbase_debug_csf_fault_term(struct kbase_device *kbdev) ++{ ++} ++ ++static inline void kbase_debug_csf_fault_wait_completion(struct kbase_device *kbdev) ++{ ++} ++ ++static inline bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev, ++ struct kbase_context *kctx, enum dumpfault_error_type error) ++{ ++ return false; ++} ++ ++static inline bool kbase_debug_csf_fault_dump_enabled(struct kbase_device *kbdev) ++{ ++ return false; ++} ++ ++static inline bool kbase_debug_csf_fault_dump_complete(struct kbase_device *kbdev) ++{ ++ return true; ++} ++#endif /* CONFIG_DEBUG_FS */ ++ ++#endif /*_KBASE_DEBUG_CSF_FAULT_H*/ +diff --git a/drivers/gpu/arm/bifrost/debug/Kbuild b/drivers/gpu/arm/bifrost/debug/Kbuild new file mode 100755 -index 000000000..f3a48caf6 +index 000000000..ebf3ddb76 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/gpu/Kbuild -@@ -0,0 +1,27 @@ ++++ b/drivers/gpu/arm/bifrost/debug/Kbuild +@@ -0,0 +1,28 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# -+# (C) COPYRIGHT 2012, 2020-2021 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software @@ -180651,23 +182825,24 @@ index 000000000..f3a48caf6 +# +# + -+bifrost_kbase-y += gpu/mali_kbase_gpu.o ++bifrost_kbase-y += debug/mali_kbase_debug_ktrace.o + +ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) -+ bifrost_kbase-y += gpu/backend/mali_kbase_gpu_fault_csf.o ++ bifrost_kbase-y += debug/backend/mali_kbase_debug_ktrace_csf.o ++ bifrost_kbase-$(CONFIG_MALI_CORESIGHT) += debug/backend/mali_kbase_debug_coresight_csf.o +else -+ bifrost_kbase-y += gpu/backend/mali_kbase_gpu_fault_jm.o ++ bifrost_kbase-y += debug/backend/mali_kbase_debug_ktrace_jm.o +endif -diff --git a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_csf.c b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_csf.c +diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c new file mode 100644 -index 000000000..60ba9beab +index 000000000..ff5f947e2 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_csf.c -@@ -0,0 +1,179 @@ ++++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c +@@ -0,0 +1,851 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -180686,1125 +182861,845 @@ index 000000000..60ba9beab + */ + +#include ++#include +#include -+#include ++#include ++#include ++#include ++#include + -+const char *kbase_gpu_exception_name(u32 const exception_code) ++static const char *coresight_state_to_string(enum kbase_debug_coresight_csf_state state) +{ -+ const char *e; -+ -+ switch (exception_code) { -+ /* CS exceptions */ -+ case CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED: -+ e = "CS_RESOURCE_TERMINATED"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT: -+ e = "CS_INHERIT_FAULT"; -+ break; -+ /* CS fatal exceptions */ -+ case CS_FATAL_EXCEPTION_TYPE_CS_CONFIG_FAULT: -+ e = "CS_CONFIG_FAULT"; -+ break; -+ case CS_FATAL_EXCEPTION_TYPE_CS_ENDPOINT_FAULT: -+ e = "FATAL_CS_ENDPOINT_FAULT"; -+ break; -+ case CS_FATAL_EXCEPTION_TYPE_CS_INVALID_INSTRUCTION: -+ e = "FATAL_CS_INVALID_INSTRUCTION"; -+ break; -+ case CS_FATAL_EXCEPTION_TYPE_CS_CALL_STACK_OVERFLOW: -+ e = "FATAL_CS_CALL_STACK_OVERFLOW"; -+ break; -+ /* -+ * CS_FAULT_EXCEPTION_TYPE_CS_BUS_FAULT and CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT share the same error code -+ * Type of CS_BUS_FAULT will be differentiated by CSF exception handler -+ */ -+ case CS_FAULT_EXCEPTION_TYPE_CS_BUS_FAULT: -+ e = "CS_BUS_FAULT"; -+ break; -+ /* Shader exceptions */ -+ case CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_PC: -+ e = "INSTR_INVALID_PC"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_ENC: -+ e = "INSTR_INVALID_ENC"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_INSTR_BARRIER_FAULT: -+ e = "INSTR_BARRIER_FAULT"; -+ break; -+ /* Iterator exceptions */ -+ case CS_FAULT_EXCEPTION_TYPE_KABOOM: -+ e = "KABOOM"; -+ break; -+ /* Misc exceptions */ -+ case CS_FAULT_EXCEPTION_TYPE_DATA_INVALID_FAULT: -+ e = "DATA_INVALID_FAULT"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_TILE_RANGE_FAULT: -+ e = "TILE_RANGE_FAULT"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_ADDR_RANGE_FAULT: -+ e = "ADDR_RANGE_FAULT"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_IMPRECISE_FAULT: -+ e = "IMPRECISE_FAULT"; -+ break; -+ /* FW exceptions */ -+ case CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR: -+ e = "FIRMWARE_INTERNAL_ERROR"; -+ break; -+ case CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE: -+ e = "CS_UNRECOVERABLE"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT: -+ e = "RESOURCE_EVICTION_TIMEOUT"; -+ break; -+ /* GPU Fault */ -+ case GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT: -+ e = "GPU_BUS_FAULT"; -+ break; -+ case GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_SHAREABILITY_FAULT: -+ e = "GPU_SHAREABILITY_FAULT"; -+ break; -+ case GPU_FAULTSTATUS_EXCEPTION_TYPE_SYSTEM_SHAREABILITY_FAULT: -+ e = "SYSTEM_SHAREABILITY_FAULT"; -+ break; -+ case GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT: -+ e = "GPU_CACHEABILITY_FAULT"; -+ break; -+ /* MMU Fault */ -+ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L0: -+ e = "TRANSLATION_FAULT at level 0"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L1: -+ e = "TRANSLATION_FAULT at level 1"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L2: -+ e = "TRANSLATION_FAULT at level 2"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L3: -+ e = "TRANSLATION_FAULT at level 3"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L4: -+ e = "TRANSLATION_FAULT"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_0: -+ e = "PERMISSION_FAULT at level 0"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_1: -+ e = "PERMISSION_FAULT at level 1"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_2: -+ e = "PERMISSION_FAULT at level 2"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_3: -+ e = "PERMISSION_FAULT at level 3"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_1: -+ e = "ACCESS_FLAG at level 1"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_2: -+ e = "ACCESS_FLAG at level 2"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_3: -+ e = "ACCESS_FLAG at level 3"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN: -+ e = "ADDRESS_SIZE_FAULT_IN"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_0: -+ e = "ADDRESS_SIZE_FAULT_OUT_0 at level 0"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_1: -+ e = "ADDRESS_SIZE_FAULT_OUT_1 at level 1"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_2: -+ e = "ADDRESS_SIZE_FAULT_OUT_2 at level 2"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_3: -+ e = "ADDRESS_SIZE_FAULT_OUT_3 at level 3"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0: -+ e = "MEMORY_ATTRIBUTE_FAULT_0 at level 0"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1: -+ e = "MEMORY_ATTRIBUTE_FAULT_1 at level 1"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2: -+ e = "MEMORY_ATTRIBUTE_FAULT_2 at level 2"; -+ break; -+ case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3: -+ e = "MEMORY_ATTRIBUTE_FAULT_3 at level 3"; -+ break; -+ /* Any other exception code is unknown */ ++ switch (state) { ++ case KBASE_DEBUG_CORESIGHT_CSF_DISABLED: ++ return "DISABLED"; ++ case KBASE_DEBUG_CORESIGHT_CSF_ENABLED: ++ return "ENABLED"; + default: -+ e = "UNKNOWN"; + break; + } + -+ return e; ++ return "UNKNOWN"; +} -diff --git a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_jm.c b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_jm.c -new file mode 100644 -index 000000000..7f3743ca6 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_jm.c -@@ -0,0 +1,176 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ + -+#include ++static bool validate_reg_addr(struct kbase_debug_coresight_csf_client *client, ++ struct kbase_device *kbdev, u32 reg_addr, u8 op_type) ++{ ++ int i; + -+#include ++ if (reg_addr & 0x3) { ++ dev_err(kbdev->dev, "Invalid operation %d: reg_addr (0x%x) not 32bit aligned", ++ op_type, reg_addr); ++ return false; ++ } + -+const char *kbase_gpu_exception_name(u32 const exception_code) ++ for (i = 0; i < client->nr_ranges; i++) { ++ struct kbase_debug_coresight_csf_address_range *range = &client->addr_ranges[i]; ++ ++ if ((range->start <= reg_addr) && (reg_addr <= range->end)) ++ return true; ++ } ++ ++ dev_err(kbdev->dev, "Invalid operation %d: reg_addr (0x%x) not in client range", op_type, ++ reg_addr); ++ ++ return false; ++} ++ ++static bool validate_op(struct kbase_debug_coresight_csf_client *client, ++ struct kbase_debug_coresight_csf_op *op) +{ -+ const char *e; ++ struct kbase_device *kbdev; ++ u32 reg; ++ ++ if (!op) ++ return false; ++ ++ if (!client) ++ return false; ++ ++ kbdev = (struct kbase_device *)client->drv_data; ++ ++ switch (op->type) { ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_NOP: ++ return true; ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM: ++ if (validate_reg_addr(client, kbdev, op->op.write_imm.reg_addr, op->type)) ++ return true; + -+ switch (exception_code) { -+ /* Non-Fault Status code */ -+ case 0x00: -+ e = "NOT_STARTED/IDLE/OK"; -+ break; -+ case 0x01: -+ e = "DONE"; -+ break; -+ case 0x02: -+ e = "INTERRUPTED"; -+ break; -+ case 0x03: -+ e = "STOPPED"; -+ break; -+ case 0x04: -+ e = "TERMINATED"; -+ break; -+ case 0x08: -+ e = "ACTIVE"; -+ break; -+ /* Job exceptions */ -+ case 0x40: -+ e = "JOB_CONFIG_FAULT"; -+ break; -+ case 0x41: -+ e = "JOB_POWER_FAULT"; -+ break; -+ case 0x42: -+ e = "JOB_READ_FAULT"; -+ break; -+ case 0x43: -+ e = "JOB_WRITE_FAULT"; -+ break; -+ case 0x44: -+ e = "JOB_AFFINITY_FAULT"; -+ break; -+ case 0x48: -+ e = "JOB_BUS_FAULT"; -+ break; -+ case 0x50: -+ e = "INSTR_INVALID_PC"; -+ break; -+ case 0x51: -+ e = "INSTR_INVALID_ENC"; -+ break; -+ case 0x52: -+ e = "INSTR_TYPE_MISMATCH"; -+ break; -+ case 0x53: -+ e = "INSTR_OPERAND_FAULT"; + break; -+ case 0x54: -+ e = "INSTR_TLS_FAULT"; ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE: ++ for (reg = op->op.write_imm_range.reg_start; reg <= op->op.write_imm_range.reg_end; ++ reg += sizeof(u32)) { ++ if (!validate_reg_addr(client, kbdev, reg, op->type)) ++ return false; ++ } ++ ++ return true; ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE: ++ if (!op->op.write.ptr) { ++ dev_err(kbdev->dev, "Invalid operation %d: ptr not set", op->type); ++ break; ++ } ++ ++ if (validate_reg_addr(client, kbdev, op->op.write.reg_addr, op->type)) ++ return true; ++ + break; -+ case 0x55: -+ e = "INSTR_BARRIER_FAULT"; ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ: ++ if (!op->op.read.ptr) { ++ dev_err(kbdev->dev, "Invalid operation %d: ptr not set", op->type); ++ break; ++ } ++ ++ if (validate_reg_addr(client, kbdev, op->op.read.reg_addr, op->type)) ++ return true; ++ + break; -+ case 0x56: -+ e = "INSTR_ALIGN_FAULT"; ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL: ++ if (validate_reg_addr(client, kbdev, op->op.poll.reg_addr, op->type)) ++ return true; ++ + break; -+ case 0x58: -+ e = "DATA_INVALID_FAULT"; ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND: ++ fallthrough; ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR: ++ fallthrough; ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR: ++ fallthrough; ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT: ++ if (op->op.bitw.ptr != NULL) ++ return true; ++ ++ dev_err(kbdev->dev, "Invalid bitwise operation pointer"); ++ + break; -+ case 0x59: -+ e = "TILE_RANGE_FAULT"; ++ default: ++ dev_err(kbdev->dev, "Invalid operation %d", op->type); + break; -+ case 0x5A: -+ e = "ADDR_RANGE_FAULT"; ++ } ++ ++ return false; ++} ++ ++static bool validate_seq(struct kbase_debug_coresight_csf_client *client, ++ struct kbase_debug_coresight_csf_sequence *seq) ++{ ++ struct kbase_debug_coresight_csf_op *ops = seq->ops; ++ int nr_ops = seq->nr_ops; ++ int i; ++ ++ for (i = 0; i < nr_ops; i++) { ++ if (!validate_op(client, &ops[i])) ++ return false; ++ } ++ ++ return true; ++} ++ ++static int execute_op(struct kbase_device *kbdev, struct kbase_debug_coresight_csf_op *op) ++{ ++ int result = -EINVAL; ++ u32 reg; ++ ++ dev_dbg(kbdev->dev, "Execute operation %d", op->type); ++ ++ switch (op->type) { ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_NOP: ++ result = 0; + break; -+ case 0x60: -+ e = "OUT_OF_MEMORY"; ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM: ++ result = kbase_csf_firmware_mcu_register_write(kbdev, op->op.write.reg_addr, ++ op->op.write_imm.val); + break; -+ /* GPU exceptions */ -+ case 0x80: -+ e = "DELAYED_BUS_FAULT"; ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE: ++ for (reg = op->op.write_imm_range.reg_start; reg <= op->op.write_imm_range.reg_end; ++ reg += sizeof(u32)) { ++ result = kbase_csf_firmware_mcu_register_write(kbdev, reg, ++ op->op.write_imm_range.val); ++ if (!result) ++ break; ++ } + break; -+ case 0x88: -+ e = "SHAREABILITY_FAULT"; ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE: ++ result = kbase_csf_firmware_mcu_register_write(kbdev, op->op.write.reg_addr, ++ *op->op.write.ptr); + break; -+ /* MMU exceptions */ -+ case 0xC0: -+ case 0xC1: -+ case 0xC2: -+ case 0xC3: -+ case 0xC4: -+ case 0xC5: -+ case 0xC6: -+ case 0xC7: -+ e = "TRANSLATION_FAULT"; ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ: ++ result = kbase_csf_firmware_mcu_register_read(kbdev, op->op.read.reg_addr, ++ op->op.read.ptr); + break; -+ case 0xC8: -+ case 0xC9: -+ case 0xCA: -+ case 0xCB: -+ case 0xCC: -+ case 0xCD: -+ case 0xCE: -+ case 0xCF: -+ e = "PERMISSION_FAULT"; ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL: ++ result = kbase_csf_firmware_mcu_register_poll(kbdev, op->op.poll.reg_addr, ++ op->op.poll.mask, op->op.poll.val); + break; -+ case 0xD0: -+ case 0xD1: -+ case 0xD2: -+ case 0xD3: -+ case 0xD4: -+ case 0xD5: -+ case 0xD6: -+ case 0xD7: -+ e = "TRANSTAB_BUS_FAULT"; ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND: ++ *op->op.bitw.ptr &= op->op.bitw.val; ++ result = 0; + break; -+ case 0xD8: -+ case 0xD9: -+ case 0xDA: -+ case 0xDB: -+ case 0xDC: -+ case 0xDD: -+ case 0xDE: -+ case 0xDF: -+ e = "ACCESS_FLAG"; ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR: ++ *op->op.bitw.ptr |= op->op.bitw.val; ++ result = 0; + break; -+ case 0xE0: -+ case 0xE1: -+ case 0xE2: -+ case 0xE3: -+ case 0xE4: -+ case 0xE5: -+ case 0xE6: -+ case 0xE7: -+ e = "ADDRESS_SIZE_FAULT"; ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR: ++ *op->op.bitw.ptr ^= op->op.bitw.val; ++ result = 0; + break; -+ case 0xE8: -+ case 0xE9: -+ case 0xEA: -+ case 0xEB: -+ case 0xEC: -+ case 0xED: -+ case 0xEE: -+ case 0xEF: -+ e = "MEMORY_ATTRIBUTES_FAULT"; ++ case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT: ++ *op->op.bitw.ptr = ~(*op->op.bitw.ptr); ++ result = 0; + break; + default: -+ e = "UNKNOWN"; ++ dev_err(kbdev->dev, "Invalid operation %d", op->type); + break; + } + -+ return e; ++ return result; +} -diff --git a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h -new file mode 100644 -index 000000000..e7457ddb5 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h -@@ -0,0 +1,381 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ + -+#ifndef _KBASE_GPU_REGMAP_CSF_H_ -+#define _KBASE_GPU_REGMAP_CSF_H_ ++static int coresight_config_enable(struct kbase_device *kbdev, ++ struct kbase_debug_coresight_csf_config *config) ++{ ++ int ret = 0; ++ int i; + -+#include ++ if (!config) ++ return -EINVAL; + -+#if !MALI_USE_CSF && defined(__KERNEL__) -+#error "Cannot be compiled with JM" -+#endif ++ if (config->state == KBASE_DEBUG_CORESIGHT_CSF_ENABLED) ++ return ret; + -+/* GPU_CONTROL_MCU base address */ -+#define GPU_CONTROL_MCU_BASE 0x3000 ++ for (i = 0; config->enable_seq && !ret && i < config->enable_seq->nr_ops; i++) ++ ret = execute_op(kbdev, &config->enable_seq->ops[i]); + -+/* MCU_SUBSYSTEM base address */ -+#define MCU_SUBSYSTEM_BASE 0x20000 ++ if (!ret) { ++ dev_dbg(kbdev->dev, "Coresight config (0x%pK) state transition: %s to %s", config, ++ coresight_state_to_string(config->state), ++ coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_ENABLED)); ++ config->state = KBASE_DEBUG_CORESIGHT_CSF_ENABLED; ++ } + -+/* IPA control registers */ -+#define COMMAND 0x000 /* (WO) Command register */ -+#define TIMER 0x008 /* (RW) Timer control register */ ++ /* Always assign the return code during config enable. ++ * It gets propagated when calling config disable. ++ */ ++ config->error = ret; + -+#define SELECT_CSHW_LO 0x010 /* (RW) Counter select for CS hardware, low word */ -+#define SELECT_CSHW_HI 0x014 /* (RW) Counter select for CS hardware, high word */ -+#define SELECT_MEMSYS_LO 0x018 /* (RW) Counter select for Memory system, low word */ -+#define SELECT_MEMSYS_HI 0x01C /* (RW) Counter select for Memory system, high word */ -+#define SELECT_TILER_LO 0x020 /* (RW) Counter select for Tiler cores, low word */ -+#define SELECT_TILER_HI 0x024 /* (RW) Counter select for Tiler cores, high word */ -+#define SELECT_SHADER_LO 0x028 /* (RW) Counter select for Shader cores, low word */ -+#define SELECT_SHADER_HI 0x02C /* (RW) Counter select for Shader cores, high word */ ++ return ret; ++} + -+/* Accumulated counter values for CS hardware */ -+#define VALUE_CSHW_BASE 0x100 -+#define VALUE_CSHW_REG_LO(n) (VALUE_CSHW_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ -+#define VALUE_CSHW_REG_HI(n) (VALUE_CSHW_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ ++static int coresight_config_disable(struct kbase_device *kbdev, ++ struct kbase_debug_coresight_csf_config *config) ++{ ++ int ret = 0; ++ int i; + -+/* Accumulated counter values for memory system */ -+#define VALUE_MEMSYS_BASE 0x140 -+#define VALUE_MEMSYS_REG_LO(n) (VALUE_MEMSYS_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ -+#define VALUE_MEMSYS_REG_HI(n) (VALUE_MEMSYS_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ ++ if (!config) ++ return -EINVAL; + -+#define VALUE_TILER_BASE 0x180 -+#define VALUE_TILER_REG_LO(n) (VALUE_TILER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ -+#define VALUE_TILER_REG_HI(n) (VALUE_TILER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ ++ if (config->state == KBASE_DEBUG_CORESIGHT_CSF_DISABLED) ++ return ret; + -+#define VALUE_SHADER_BASE 0x1C0 -+#define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ -+#define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ ++ for (i = 0; config->disable_seq && !ret && i < config->disable_seq->nr_ops; i++) ++ ret = execute_op(kbdev, &config->disable_seq->ops[i]); + -+#define AS_STATUS_AS_ACTIVE_INT 0x2 ++ if (!ret) { ++ dev_dbg(kbdev->dev, "Coresight config (0x%pK) state transition: %s to %s", config, ++ coresight_state_to_string(config->state), ++ coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_DISABLED)); ++ config->state = KBASE_DEBUG_CORESIGHT_CSF_DISABLED; ++ } else { ++ /* Only assign the error if ret is not 0. ++ * As we don't want to overwrite an error from config enable ++ */ ++ if (!config->error) ++ config->error = ret; ++ } + -+/* Set to implementation defined, outer caching */ -+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull -+/* Set to write back memory, outer caching */ -+#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull -+/* Set to inner non-cacheable, outer-non-cacheable -+ * Setting defined by the alloc bits is ignored, but set to a valid encoding: -+ * - no-alloc on read -+ * - no alloc on write -+ */ -+#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull -+/* Set to shared memory, that is inner cacheable on ACE and inner or outer -+ * shared, otherwise inner non-cacheable. -+ * Outer cacheable if inner or outer shared, otherwise outer non-cacheable. -+ */ -+#define AS_MEMATTR_AARCH64_SHARED 0x8ull ++ return ret; ++} + -+/* Symbols for default MEMATTR to use -+ * Default is - HW implementation defined caching -+ */ -+#define AS_MEMATTR_INDEX_DEFAULT 0 -+#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 ++void *kbase_debug_coresight_csf_register(void *drv_data, ++ struct kbase_debug_coresight_csf_address_range *ranges, ++ int nr_ranges) ++{ ++ struct kbase_debug_coresight_csf_client *client, *client_entry; ++ struct kbase_device *kbdev; ++ unsigned long flags; ++ int k; + -+/* HW implementation defined caching */ -+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 -+/* Force cache on */ -+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 -+/* Write-alloc */ -+#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 -+/* Outer coherent, inner implementation defined policy */ -+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 -+/* Outer coherent, write alloc inner */ -+#define AS_MEMATTR_INDEX_OUTER_WA 4 -+/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ -+#define AS_MEMATTR_INDEX_NON_CACHEABLE 5 -+/* Normal memory, shared between MCU and Host */ -+#define AS_MEMATTR_INDEX_SHARED 6 ++ if (unlikely(!drv_data)) { ++ pr_err("NULL drv_data"); ++ return NULL; ++ } + -+/* Configuration bits for the CSF. */ -+#define CSF_CONFIG 0xF00 ++ kbdev = (struct kbase_device *)drv_data; + -+/* CSF_CONFIG register */ -+#define CSF_CONFIG_FORCE_COHERENCY_FEATURES_SHIFT 2 ++ if (unlikely(!ranges)) { ++ dev_err(kbdev->dev, "NULL ranges"); ++ return NULL; ++ } + -+/* GPU control registers */ -+#define CORE_FEATURES 0x008 /* () Shader Core Features */ -+#define MCU_CONTROL 0x700 -+#define MCU_STATUS 0x704 ++ if (unlikely(!nr_ranges)) { ++ dev_err(kbdev->dev, "nr_ranges is 0"); ++ return NULL; ++ } + -+#define MCU_CNTRL_ENABLE (1 << 0) -+#define MCU_CNTRL_AUTO (1 << 1) -+#define MCU_CNTRL_DISABLE (0) ++ for (k = 0; k < nr_ranges; k++) { ++ if (ranges[k].end < ranges[k].start) { ++ dev_err(kbdev->dev, "Invalid address ranges 0x%08x - 0x%08x", ++ ranges[k].start, ranges[k].end); ++ return NULL; ++ } ++ } + -+#define MCU_CNTRL_DOORBELL_DISABLE_SHIFT (31) -+#define MCU_CNTRL_DOORBELL_DISABLE_MASK (1 << MCU_CNTRL_DOORBELL_DISABLE_SHIFT) ++ client = kzalloc(sizeof(struct kbase_debug_coresight_csf_client), GFP_KERNEL); + -+#define MCU_STATUS_HALTED (1 << 1) ++ if (!client) ++ return NULL; + -+#define L2_CONFIG_PBHA_HWU_SHIFT GPU_U(12) -+#define L2_CONFIG_PBHA_HWU_MASK (GPU_U(0xF) << L2_CONFIG_PBHA_HWU_SHIFT) -+#define L2_CONFIG_PBHA_HWU_GET(reg_val) \ -+ (((reg_val)&L2_CONFIG_PBHA_HWU_MASK) >> L2_CONFIG_PBHA_HWU_SHIFT) -+#define L2_CONFIG_PBHA_HWU_SET(reg_val, value) \ -+ (((reg_val) & ~L2_CONFIG_PBHA_HWU_MASK) | \ -+ (((value) << L2_CONFIG_PBHA_HWU_SHIFT) & L2_CONFIG_PBHA_HWU_MASK)) ++ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); ++ list_for_each_entry(client_entry, &kbdev->csf.coresight.clients, link) { ++ struct kbase_debug_coresight_csf_address_range *client_ranges = ++ client_entry->addr_ranges; ++ int i; + -+/* JOB IRQ flags */ -+#define JOB_IRQ_GLOBAL_IF (1u << 31) /* Global interface interrupt received */ ++ for (i = 0; i < client_entry->nr_ranges; i++) { ++ int j; + -+/* GPU_COMMAND codes */ -+#define GPU_COMMAND_CODE_NOP 0x00 /* No operation, nothing happens */ -+#define GPU_COMMAND_CODE_RESET 0x01 /* Reset the GPU */ -+#define GPU_COMMAND_CODE_TIME 0x03 /* Configure time sources */ -+#define GPU_COMMAND_CODE_FLUSH_CACHES 0x04 /* Flush caches */ -+#define GPU_COMMAND_CODE_SET_PROTECTED_MODE 0x05 /* Places the GPU in protected mode */ -+#define GPU_COMMAND_CODE_FINISH_HALT 0x06 /* Halt CSF */ -+#define GPU_COMMAND_CODE_CLEAR_FAULT 0x07 /* Clear GPU_FAULTSTATUS and GPU_FAULTADDRESS, TODX */ -+#define GPU_COMMAND_CODE_FLUSH_PA_RANGE 0x08 /* Flush the GPU caches for a physical range, TITX */ ++ for (j = 0; j < nr_ranges; j++) { ++ if ((ranges[j].start < client_ranges[i].end) && ++ (client_ranges[i].start < ranges[j].end)) { ++ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); ++ kfree(client); ++ dev_err(kbdev->dev, ++ "Client with range 0x%08x - 0x%08x already present at address range 0x%08x - 0x%08x", ++ client_ranges[i].start, client_ranges[i].end, ++ ranges[j].start, ranges[j].end); + -+/* GPU_COMMAND_RESET payloads */ ++ return NULL; ++ } ++ } ++ } ++ } + -+/* This will leave the state of active jobs UNDEFINED, but will leave the external bus in a defined and idle state. -+ * Power domains will remain powered on. -+ */ -+#define GPU_COMMAND_RESET_PAYLOAD_FAST_RESET 0x00 ++ client->drv_data = drv_data; ++ client->addr_ranges = ranges; ++ client->nr_ranges = nr_ranges; ++ list_add(&client->link, &kbdev->csf.coresight.clients); ++ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + -+/* This will leave the state of active CSs UNDEFINED, but will leave the external bus in a defined and -+ * idle state. -+ */ -+#define GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET 0x01 ++ return client; ++} ++EXPORT_SYMBOL(kbase_debug_coresight_csf_register); + -+/* This reset will leave the state of currently active streams UNDEFINED, will likely lose data, and may leave -+ * the system bus in an inconsistent state. Use only as a last resort when nothing else works. -+ */ -+#define GPU_COMMAND_RESET_PAYLOAD_HARD_RESET 0x02 ++void kbase_debug_coresight_csf_unregister(void *client_data) ++{ ++ struct kbase_debug_coresight_csf_client *client; ++ struct kbase_debug_coresight_csf_config *config_entry; ++ struct kbase_device *kbdev; ++ unsigned long flags; ++ bool retry = true; + -+/* GPU_COMMAND_TIME payloads */ -+#define GPU_COMMAND_TIME_DISABLE 0x00 /* Disable cycle counter */ -+#define GPU_COMMAND_TIME_ENABLE 0x01 /* Enable cycle counter */ ++ if (unlikely(!client_data)) { ++ pr_err("NULL client"); ++ return; ++ } + -+/* GPU_COMMAND_FLUSH_CACHES payloads bits for L2 caches */ -+#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_NONE 0x000 /* No flush */ -+#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN 0x001 /* CLN only */ -+#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE 0x003 /* CLN + INV */ ++ client = (struct kbase_debug_coresight_csf_client *)client_data; + -+/* GPU_COMMAND_FLUSH_CACHES payloads bits for Load-store caches */ -+#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_NONE 0x000 /* No flush */ -+#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN 0x010 /* CLN only */ -+#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE 0x030 /* CLN + INV */ ++ kbdev = (struct kbase_device *)client->drv_data; ++ if (unlikely(!kbdev)) { ++ pr_err("NULL drv_data in client"); ++ return; ++ } + -+/* GPU_COMMAND_FLUSH_CACHES payloads bits for Other caches */ -+#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE 0x000 /* No flush */ -+#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_INVALIDATE 0x200 /* INV only */ ++ /* check for active config from client */ ++ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); ++ list_del_init(&client->link); + -+/* GPU_COMMAND_FLUSH_PA_RANGE payload bits for flush modes */ -+#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_NONE 0x00 /* No flush */ -+#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN 0x01 /* CLN only */ -+#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_INVALIDATE 0x02 /* INV only */ -+#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE 0x03 /* CLN + INV */ ++ while (retry && !list_empty(&kbdev->csf.coresight.configs)) { ++ retry = false; ++ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { ++ if (config_entry->client == client) { ++ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); ++ kbase_debug_coresight_csf_config_free(config_entry); ++ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); ++ retry = true; ++ break; ++ } ++ } ++ } ++ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + -+/* GPU_COMMAND_FLUSH_PA_RANGE payload bits for which caches should be the target of the command */ -+#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_OTHER_CACHE 0x10 /* Other caches */ -+#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE 0x20 /* Load-store caches */ -+#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE 0x40 /* L2 caches */ ++ kfree(client); ++} ++EXPORT_SYMBOL(kbase_debug_coresight_csf_unregister); + -+/* GPU_COMMAND command + payload */ -+#define GPU_COMMAND_CODE_PAYLOAD(opcode, payload) \ -+ ((__u32)opcode | ((__u32)payload << 8)) ++void * ++kbase_debug_coresight_csf_config_create(void *client_data, ++ struct kbase_debug_coresight_csf_sequence *enable_seq, ++ struct kbase_debug_coresight_csf_sequence *disable_seq) ++{ ++ struct kbase_debug_coresight_csf_client *client; ++ struct kbase_debug_coresight_csf_config *config; ++ struct kbase_device *kbdev; + -+/* Final GPU_COMMAND form */ -+/* No operation, nothing happens */ -+#define GPU_COMMAND_NOP \ -+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_NOP, 0) ++ if (unlikely(!client_data)) { ++ pr_err("NULL client"); ++ return NULL; ++ } + -+/* Stop all external bus interfaces, and then reset the entire GPU. */ -+#define GPU_COMMAND_SOFT_RESET \ -+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET) ++ client = (struct kbase_debug_coresight_csf_client *)client_data; + -+/* Immediately reset the entire GPU. */ -+#define GPU_COMMAND_HARD_RESET \ -+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_HARD_RESET) ++ kbdev = (struct kbase_device *)client->drv_data; ++ if (unlikely(!kbdev)) { ++ pr_err("NULL drv_data in client"); ++ return NULL; ++ } + -+/* Starts the cycle counter, and system timestamp propagation */ -+#define GPU_COMMAND_CYCLE_COUNT_START \ -+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_ENABLE) ++ if (enable_seq) { ++ if (!validate_seq(client, enable_seq)) { ++ dev_err(kbdev->dev, "Invalid enable_seq"); ++ return NULL; ++ } ++ } + -+/* Stops the cycle counter, and system timestamp propagation */ -+#define GPU_COMMAND_CYCLE_COUNT_STOP \ -+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_DISABLE) ++ if (disable_seq) { ++ if (!validate_seq(client, disable_seq)) { ++ dev_err(kbdev->dev, "Invalid disable_seq"); ++ return NULL; ++ } ++ } + -+/* Clean and invalidate L2 cache (Equivalent to FLUSH_PT) */ -+#define GPU_COMMAND_CACHE_CLN_INV_L2 \ -+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ -+ (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \ -+ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_NONE | \ -+ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE)) ++ config = kzalloc(sizeof(struct kbase_debug_coresight_csf_config), GFP_KERNEL); ++ if (WARN_ON(!client)) ++ return NULL; + -+/* Clean and invalidate L2 and LSC caches (Equivalent to FLUSH_MEM) */ -+#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC \ -+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ -+ (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \ -+ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \ -+ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE)) ++ config->client = client; ++ config->enable_seq = enable_seq; ++ config->disable_seq = disable_seq; ++ config->error = 0; ++ config->state = KBASE_DEBUG_CORESIGHT_CSF_DISABLED; + -+/* Clean and invalidate L2, LSC, and Other caches */ -+#define GPU_COMMAND_CACHE_CLN_INV_FULL \ -+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ -+ (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \ -+ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \ -+ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_INVALIDATE)) ++ INIT_LIST_HEAD(&config->link); + -+/* Clean and invalidate only LSC cache */ -+#define GPU_COMMAND_CACHE_CLN_INV_LSC \ -+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ -+ (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_NONE | \ -+ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \ -+ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE)) ++ return config; ++} ++EXPORT_SYMBOL(kbase_debug_coresight_csf_config_create); + -+/* Clean and invalidate physical range L2 cache (equivalent to FLUSH_PT) */ -+#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2 \ -+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \ -+ (GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \ -+ GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE)) ++void kbase_debug_coresight_csf_config_free(void *config_data) ++{ ++ struct kbase_debug_coresight_csf_config *config; + -+/* Clean and invalidate physical range L2 and LSC cache (equivalent to FLUSH_MEM) */ -+#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC \ -+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \ -+ (GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \ -+ GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE | \ -+ GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE)) ++ if (unlikely(!config_data)) { ++ pr_err("NULL config"); ++ return; ++ } + -+/* Clean and invalidate physical range L2, LSC and Other caches */ -+#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_FULL \ -+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \ -+ (GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \ -+ GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_OTHER_CACHE | \ -+ GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE | \ -+ GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE)) ++ config = (struct kbase_debug_coresight_csf_config *)config_data; + -+/* Merge cache flush commands */ -+#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) ((cmd1) | (cmd2)) ++ kbase_debug_coresight_csf_config_disable(config); + -+/* Places the GPU in protected mode */ -+#define GPU_COMMAND_SET_PROTECTED_MODE \ -+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_SET_PROTECTED_MODE, 0) ++ kfree(config); ++} ++EXPORT_SYMBOL(kbase_debug_coresight_csf_config_free); + -+/* Halt CSF */ -+#define GPU_COMMAND_FINISH_HALT \ -+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FINISH_HALT, 0) ++int kbase_debug_coresight_csf_config_enable(void *config_data) ++{ ++ struct kbase_debug_coresight_csf_config *config; ++ struct kbase_debug_coresight_csf_client *client; ++ struct kbase_device *kbdev; ++ struct kbase_debug_coresight_csf_config *config_entry; ++ unsigned long flags; ++ int ret = 0; + -+/* Clear GPU faults */ -+#define GPU_COMMAND_CLEAR_FAULT \ -+ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_CLEAR_FAULT, 0) ++ if (unlikely(!config_data)) { ++ pr_err("NULL config"); ++ return -EINVAL; ++ } + -+/* End Command Values */ ++ config = (struct kbase_debug_coresight_csf_config *)config_data; ++ client = (struct kbase_debug_coresight_csf_client *)config->client; + -+/* GPU_FAULTSTATUS register */ -+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 -+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFFul) -+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ -+ (((reg_val)&GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) \ -+ >> GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) -+#define GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 -+#define GPU_FAULTSTATUS_ACCESS_TYPE_MASK \ -+ (0x3ul << GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT) ++ if (unlikely(!client)) { ++ pr_err("NULL client in config"); ++ return -EINVAL; ++ } + -+#define GPU_FAULTSTATUS_ADDR_VALID_SHIFT 10 -+#define GPU_FAULTSTATUS_ADDR_VALID_FLAG \ -+ (1ul << GPU_FAULTSTATUS_ADDR_VALID_SHIFT) ++ kbdev = (struct kbase_device *)client->drv_data; ++ if (unlikely(!kbdev)) { ++ pr_err("NULL drv_data in client"); ++ return -EINVAL; ++ } + -+#define GPU_FAULTSTATUS_JASID_VALID_SHIFT 11 -+#define GPU_FAULTSTATUS_JASID_VALID_FLAG \ -+ (1ul << GPU_FAULTSTATUS_JASID_VALID_SHIFT) ++ /* Check to prevent double entry of config */ ++ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); ++ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { ++ if (config_entry == config) { ++ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); ++ dev_err(kbdev->dev, "Config already enabled"); ++ return -EINVAL; ++ } ++ } ++ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + -+#define GPU_FAULTSTATUS_JASID_SHIFT 12 -+#define GPU_FAULTSTATUS_JASID_MASK (0xF << GPU_FAULTSTATUS_JASID_SHIFT) -+#define GPU_FAULTSTATUS_JASID_GET(reg_val) \ -+ (((reg_val)&GPU_FAULTSTATUS_JASID_MASK) >> GPU_FAULTSTATUS_JASID_SHIFT) -+#define GPU_FAULTSTATUS_JASID_SET(reg_val, value) \ -+ (((reg_val) & ~GPU_FAULTSTATUS_JASID_MASK) | \ -+ (((value) << GPU_FAULTSTATUS_JASID_SHIFT) & GPU_FAULTSTATUS_JASID_MASK)) ++ kbase_csf_scheduler_lock(kbdev); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); + -+#define GPU_FAULTSTATUS_SOURCE_ID_SHIFT 16 -+#define GPU_FAULTSTATUS_SOURCE_ID_MASK \ -+ (0xFFFFul << GPU_FAULTSTATUS_SOURCE_ID_SHIFT) -+/* End GPU_FAULTSTATUS register */ ++ /* Check the state of Scheduler to confirm the desired state of MCU */ ++ if (((kbdev->csf.scheduler.state != SCHED_SUSPENDED) && ++ (kbdev->csf.scheduler.state != SCHED_SLEEPING) && ++ !kbase_csf_scheduler_protected_mode_in_use(kbdev)) || ++ kbase_pm_get_policy(kbdev) == &kbase_pm_always_on_policy_ops) { ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ /* Wait for MCU to reach the stable ON state */ ++ ret = kbase_pm_wait_for_desired_state(kbdev); + -+/* GPU_FAULTSTATUS_ACCESS_TYPE values */ -+#define GPU_FAULTSTATUS_ACCESS_TYPE_ATOMIC 0x0 -+#define GPU_FAULTSTATUS_ACCESS_TYPE_EXECUTE 0x1 -+#define GPU_FAULTSTATUS_ACCESS_TYPE_READ 0x2 -+#define GPU_FAULTSTATUS_ACCESS_TYPE_WRITE 0x3 -+/* End of GPU_FAULTSTATUS_ACCESS_TYPE values */ ++ if (ret) ++ dev_err(kbdev->dev, ++ "Wait for PM state failed when enabling coresight config"); ++ else ++ ret = coresight_config_enable(kbdev, config); + -+/* Implementation-dependent exception codes used to indicate CSG -+ * and CS errors that are not specified in the specs. -+ */ -+#define GPU_EXCEPTION_TYPE_SW_FAULT_0 ((__u8)0x70) -+#define GPU_EXCEPTION_TYPE_SW_FAULT_1 ((__u8)0x71) -+#define GPU_EXCEPTION_TYPE_SW_FAULT_2 ((__u8)0x72) ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ } + -+/* GPU_FAULTSTATUS_EXCEPTION_TYPE values */ -+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_OK 0x00 -+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT 0x80 -+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_SHAREABILITY_FAULT 0x88 -+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SYSTEM_SHAREABILITY_FAULT 0x89 -+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT 0x8A -+/* End of GPU_FAULTSTATUS_EXCEPTION_TYPE values */ ++ /* Add config to next enable sequence */ ++ if (!ret) { ++ spin_lock(&kbdev->csf.coresight.lock); ++ list_add(&config->link, &kbdev->csf.coresight.configs); ++ spin_unlock(&kbdev->csf.coresight.lock); ++ } + -+#define GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT GPU_U(10) -+#define GPU_FAULTSTATUS_ADDRESS_VALID_MASK (GPU_U(0x1) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) -+#define GPU_FAULTSTATUS_ADDRESS_VALID_GET(reg_val) \ -+ (((reg_val)&GPU_FAULTSTATUS_ADDRESS_VALID_MASK) >> GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) -+#define GPU_FAULTSTATUS_ADDRESS_VALID_SET(reg_val, value) \ -+ (((reg_val) & ~GPU_FAULTSTATUS_ADDRESS_VALID_MASK) | \ -+ (((value) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) & GPU_FAULTSTATUS_ADDRESS_VALID_MASK)) ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ kbase_csf_scheduler_unlock(kbdev); + -+/* IRQ flags */ -+#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ -+#define GPU_PROTECTED_FAULT (1 << 1) /* A GPU fault has occurred in protected mode */ -+#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ -+#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ -+#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ -+#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ -+#define DOORBELL_MIRROR (1 << 18) /* Mirrors the doorbell interrupt line to the CPU */ -+#define MCU_STATUS_GPU_IRQ (1 << 19) /* MCU requires attention */ -+#define FLUSH_PA_RANGE_COMPLETED \ -+ (1 << 20) /* Set when a physical range cache clean operation has completed. */ ++ return ret; ++} ++EXPORT_SYMBOL(kbase_debug_coresight_csf_config_enable); + -+/* -+ * In Debug build, -+ * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and unmask interupts sources of GPU_IRQ -+ * by writing it onto GPU_IRQ_CLEAR/MASK registers. -+ * -+ * In Release build, -+ * GPU_IRQ_REG_COMMON is used. -+ * -+ * Note: -+ * CLEAN_CACHES_COMPLETED - Used separately for cache operation. -+ * DOORBELL_MIRROR - Do not have it included for GPU_IRQ_REG_COMMON -+ * as it can't be cleared by GPU_IRQ_CLEAR, thus interrupt storm might happen -+ */ -+#define GPU_IRQ_REG_COMMON (GPU_FAULT | GPU_PROTECTED_FAULT | RESET_COMPLETED \ -+ | POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ) ++int kbase_debug_coresight_csf_config_disable(void *config_data) ++{ ++ struct kbase_debug_coresight_csf_config *config; ++ struct kbase_debug_coresight_csf_client *client; ++ struct kbase_device *kbdev; ++ struct kbase_debug_coresight_csf_config *config_entry; ++ bool found_in_list = false; ++ unsigned long flags; ++ int ret = 0; + -+/* GPU_FEATURES register */ -+#define GPU_FEATURES_RAY_TRACING_SHIFT GPU_U(2) -+#define GPU_FEATURES_RAY_TRACING_MASK (GPU_U(0x1) << GPU_FEATURES_RAY_TRACING_SHIFT) -+#define GPU_FEATURES_RAY_TRACING_GET(reg_val) \ -+ (((reg_val)&GPU_FEATURES_RAY_TRACING_MASK) >> GPU_FEATURES_RAY_TRACING_SHIFT) -+/* End of GPU_FEATURES register */ ++ if (unlikely(!config_data)) { ++ pr_err("NULL config"); ++ return -EINVAL; ++ } + -+#endif /* _KBASE_GPU_REGMAP_CSF_H_ */ -diff --git a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h -new file mode 100644 -index 000000000..f86f493c7 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h -@@ -0,0 +1,276 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+#ifndef _KBASE_GPU_REGMAP_JM_H_ -+#define _KBASE_GPU_REGMAP_JM_H_ ++ config = (struct kbase_debug_coresight_csf_config *)config_data; + -+#if MALI_USE_CSF && defined(__KERNEL__) -+#error "Cannot be compiled with CSF" -+#endif ++ /* Exit early if not enabled prior */ ++ if (list_empty(&config->link)) ++ return ret; + -+/* Set to implementation defined, outer caching */ -+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull -+/* Set to write back memory, outer caching */ -+#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull -+/* Set to inner non-cacheable, outer-non-cacheable -+ * Setting defined by the alloc bits is ignored, but set to a valid encoding: -+ * - no-alloc on read -+ * - no alloc on write -+ */ -+#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull ++ client = (struct kbase_debug_coresight_csf_client *)config->client; + -+/* Symbols for default MEMATTR to use -+ * Default is - HW implementation defined caching -+ */ -+#define AS_MEMATTR_INDEX_DEFAULT 0 -+#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 ++ if (unlikely(!client)) { ++ pr_err("NULL client in config"); ++ return -EINVAL; ++ } + -+/* HW implementation defined caching */ -+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 -+/* Force cache on */ -+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 -+/* Write-alloc */ -+#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 -+/* Outer coherent, inner implementation defined policy */ -+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 -+/* Outer coherent, write alloc inner */ -+#define AS_MEMATTR_INDEX_OUTER_WA 4 -+/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ -+#define AS_MEMATTR_INDEX_NON_CACHEABLE 5 ++ kbdev = (struct kbase_device *)client->drv_data; ++ if (unlikely(!kbdev)) { ++ pr_err("NULL drv_data in client"); ++ return -EINVAL; ++ } + -+/* GPU control registers */ ++ /* Check if the config is in the correct list */ ++ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); ++ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { ++ if (config_entry == config) { ++ found_in_list = true; ++ break; ++ } ++ } ++ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + -+#define CORE_FEATURES 0x008 /* (RO) Shader Core Features */ -+#define JS_PRESENT 0x01C /* (RO) Job slots present */ ++ if (!found_in_list) { ++ dev_err(kbdev->dev, "Config looks corrupted"); ++ return -EINVAL; ++ } + -+#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory -+ * region base address, low word -+ */ -+#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory -+ * region base address, high word -+ */ -+#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter -+ * configuration -+ */ -+#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable -+ * flags for Job Manager -+ */ -+#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable -+ * flags for shader cores -+ */ -+#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable -+ * flags for tiler -+ */ -+#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable -+ * flags for MMU/L2 cache -+ */ ++ kbase_csf_scheduler_lock(kbdev); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); + -+#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */ -+#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */ -+#define JS2_FEATURES 0x0C8 /* (RO) Features of job slot 2 */ -+#define JS3_FEATURES 0x0CC /* (RO) Features of job slot 3 */ -+#define JS4_FEATURES 0x0D0 /* (RO) Features of job slot 4 */ -+#define JS5_FEATURES 0x0D4 /* (RO) Features of job slot 5 */ -+#define JS6_FEATURES 0x0D8 /* (RO) Features of job slot 6 */ -+#define JS7_FEATURES 0x0DC /* (RO) Features of job slot 7 */ -+#define JS8_FEATURES 0x0E0 /* (RO) Features of job slot 8 */ -+#define JS9_FEATURES 0x0E4 /* (RO) Features of job slot 9 */ -+#define JS10_FEATURES 0x0E8 /* (RO) Features of job slot 10 */ -+#define JS11_FEATURES 0x0EC /* (RO) Features of job slot 11 */ -+#define JS12_FEATURES 0x0F0 /* (RO) Features of job slot 12 */ -+#define JS13_FEATURES 0x0F4 /* (RO) Features of job slot 13 */ -+#define JS14_FEATURES 0x0F8 /* (RO) Features of job slot 14 */ -+#define JS15_FEATURES 0x0FC /* (RO) Features of job slot 15 */ ++ /* Check the state of Scheduler to confirm the desired state of MCU */ ++ if (((kbdev->csf.scheduler.state != SCHED_SUSPENDED) && ++ (kbdev->csf.scheduler.state != SCHED_SLEEPING) && ++ !kbase_csf_scheduler_protected_mode_in_use(kbdev)) || ++ kbase_pm_get_policy(kbdev) == &kbase_pm_always_on_policy_ops) { ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ /* Wait for MCU to reach the stable ON state */ ++ ret = kbase_pm_wait_for_desired_state(kbdev); + -+#define JS_FEATURES_REG(n) GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2)) ++ if (ret) ++ dev_err(kbdev->dev, ++ "Wait for PM state failed when disabling coresight config"); ++ else ++ ret = coresight_config_disable(kbdev, config); + -+#define JM_CONFIG 0xF00 /* (RW) Job manager configuration (implementation-specific) */ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ } else if (kbdev->pm.backend.mcu_state == KBASE_MCU_OFF) { ++ /* MCU is OFF, so the disable sequence was already executed. ++ * ++ * Propagate any error that would have occurred during the enable ++ * or disable sequence. ++ * ++ * This is done as part of the disable sequence, since the call from ++ * client is synchronous. ++ */ ++ ret = config->error; ++ } + -+/* Job control registers */ ++ /* Remove config from next disable sequence */ ++ spin_lock(&kbdev->csf.coresight.lock); ++ list_del_init(&config->link); ++ spin_unlock(&kbdev->csf.coresight.lock); + -+#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */ -+#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */ ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ kbase_csf_scheduler_unlock(kbdev); + -+#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */ -+#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */ -+#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */ -+#define JOB_SLOT4 0xA00 /* Configuration registers for job slot 4 */ -+#define JOB_SLOT5 0xA80 /* Configuration registers for job slot 5 */ -+#define JOB_SLOT6 0xB00 /* Configuration registers for job slot 6 */ -+#define JOB_SLOT7 0xB80 /* Configuration registers for job slot 7 */ -+#define JOB_SLOT8 0xC00 /* Configuration registers for job slot 8 */ -+#define JOB_SLOT9 0xC80 /* Configuration registers for job slot 9 */ -+#define JOB_SLOT10 0xD00 /* Configuration registers for job slot 10 */ -+#define JOB_SLOT11 0xD80 /* Configuration registers for job slot 11 */ -+#define JOB_SLOT12 0xE00 /* Configuration registers for job slot 12 */ -+#define JOB_SLOT13 0xE80 /* Configuration registers for job slot 13 */ -+#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */ -+#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */ ++ return ret; ++} ++EXPORT_SYMBOL(kbase_debug_coresight_csf_config_disable); + -+#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job slot n*/ ++static void coresight_config_enable_all(struct work_struct *data) ++{ ++ struct kbase_device *kbdev = ++ container_of(data, struct kbase_device, csf.coresight.enable_work); ++ struct kbase_debug_coresight_csf_config *config_entry; ++ unsigned long flags; + -+#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ -+#define JS_STATUS 0x24 /* (RO) Status register for job slot n */ ++ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + -+#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for job slot n */ ++ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { ++ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); ++ if (coresight_config_enable(kbdev, config_entry)) ++ dev_err(kbdev->dev, "enable config (0x%pK) failed", config_entry); ++ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); ++ } + -+#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ ++ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + -+/* No JM-specific MMU control registers */ -+/* No JM-specific MMU address space control registers */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+/* JS_COMMAND register commands */ -+#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */ -+#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */ -+#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */ -+#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */ -+#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */ -+#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */ -+#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ -+#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ ++ wake_up_all(&kbdev->csf.coresight.event_wait); ++} + -+#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */ ++static void coresight_config_disable_all(struct work_struct *data) ++{ ++ struct kbase_device *kbdev = ++ container_of(data, struct kbase_device, csf.coresight.disable_work); ++ struct kbase_debug_coresight_csf_config *config_entry; ++ unsigned long flags; + -+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */ -+#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0) -+#define JS_CONFIG_START_FLUSH_CLEAN (1u << 8) -+#define JS_CONFIG_START_FLUSH_INV_SHADER_OTHER (2u << 8) -+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8) -+#define JS_CONFIG_START_MMU (1u << 10) -+#define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11) -+#define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION -+#define JS_CONFIG_END_FLUSH_CLEAN (1u << 12) -+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) -+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14) -+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15) -+#define JS_CONFIG_THREAD_PRI(n) ((n) << 16) ++ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + -+/* JS_XAFFINITY register values */ -+#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0) -+#define JS_XAFFINITY_TILER_ENABLE (1u << 8) -+#define JS_XAFFINITY_CACHE_ENABLE (1u << 16) ++ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { ++ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); ++ if (coresight_config_disable(kbdev, config_entry)) ++ dev_err(kbdev->dev, "disable config (0x%pK) failed", config_entry); ++ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); ++ } + -+/* JS_STATUS register values */ ++ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); + -+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h. -+ * The values are separated to avoid dependency of userspace and kernel code. -+ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+/* Group of values representing the job status instead of a particular fault */ -+#define JS_STATUS_NO_EXCEPTION_BASE 0x00 -+#define JS_STATUS_INTERRUPTED (JS_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */ -+#define JS_STATUS_STOPPED (JS_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */ -+#define JS_STATUS_TERMINATED (JS_STATUS_NO_EXCEPTION_BASE + 0x04) /* 0x04 means TERMINATED */ ++ wake_up_all(&kbdev->csf.coresight.event_wait); ++} + -+/* General fault values */ -+#define JS_STATUS_FAULT_BASE 0x40 -+#define JS_STATUS_CONFIG_FAULT (JS_STATUS_FAULT_BASE) /* 0x40 means CONFIG FAULT */ -+#define JS_STATUS_POWER_FAULT (JS_STATUS_FAULT_BASE + 0x01) /* 0x41 means POWER FAULT */ -+#define JS_STATUS_READ_FAULT (JS_STATUS_FAULT_BASE + 0x02) /* 0x42 means READ FAULT */ -+#define JS_STATUS_WRITE_FAULT (JS_STATUS_FAULT_BASE + 0x03) /* 0x43 means WRITE FAULT */ -+#define JS_STATUS_AFFINITY_FAULT (JS_STATUS_FAULT_BASE + 0x04) /* 0x44 means AFFINITY FAULT */ -+#define JS_STATUS_BUS_FAULT (JS_STATUS_FAULT_BASE + 0x08) /* 0x48 means BUS FAULT */ ++void kbase_debug_coresight_csf_disable_pmode_enter(struct kbase_device *kbdev) ++{ ++ unsigned long flags; + -+/* Instruction or data faults */ -+#define JS_STATUS_INSTRUCTION_FAULT_BASE 0x50 -+#define JS_STATUS_INSTR_INVALID_PC (JS_STATUS_INSTRUCTION_FAULT_BASE) /* 0x50 means INSTR INVALID PC */ -+#define JS_STATUS_INSTR_INVALID_ENC (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01) /* 0x51 means INSTR INVALID ENC */ -+#define JS_STATUS_INSTR_TYPE_MISMATCH (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02) /* 0x52 means INSTR TYPE MISMATCH */ -+#define JS_STATUS_INSTR_OPERAND_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03) /* 0x53 means INSTR OPERAND FAULT */ -+#define JS_STATUS_INSTR_TLS_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04) /* 0x54 means INSTR TLS FAULT */ -+#define JS_STATUS_INSTR_BARRIER_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05) /* 0x55 means INSTR BARRIER FAULT */ -+#define JS_STATUS_INSTR_ALIGN_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06) /* 0x56 means INSTR ALIGN FAULT */ -+/* NOTE: No fault with 0x57 code defined in spec. */ -+#define JS_STATUS_DATA_INVALID_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08) /* 0x58 means DATA INVALID FAULT */ -+#define JS_STATUS_TILE_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09) /* 0x59 means TILE RANGE FAULT */ -+#define JS_STATUS_ADDRESS_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A) /* 0x5A means ADDRESS RANGE FAULT */ ++ dev_dbg(kbdev->dev, "Coresight state %s before protected mode enter", ++ coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_ENABLED)); + -+/* Other faults */ -+#define JS_STATUS_MEMORY_FAULT_BASE 0x60 -+#define JS_STATUS_OUT_OF_MEMORY (JS_STATUS_MEMORY_FAULT_BASE) /* 0x60 means OUT OF MEMORY */ -+#define JS_STATUS_UNKNOWN 0x7F /* 0x7F means UNKNOWN */ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); + -+/* JS_FEATURES register */ -+#define JS_FEATURE_NULL_JOB (1u << 1) -+#define JS_FEATURE_SET_VALUE_JOB (1u << 2) -+#define JS_FEATURE_CACHE_FLUSH_JOB (1u << 3) -+#define JS_FEATURE_COMPUTE_JOB (1u << 4) -+#define JS_FEATURE_VERTEX_JOB (1u << 5) -+#define JS_FEATURE_GEOMETRY_JOB (1u << 6) -+#define JS_FEATURE_TILER_JOB (1u << 7) -+#define JS_FEATURE_FUSED_JOB (1u << 8) -+#define JS_FEATURE_FRAGMENT_JOB (1u << 9) ++ kbase_pm_lock(kbdev); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+/* JM_CONFIG register */ -+#define JM_TIMESTAMP_OVERRIDE (1ul << 0) -+#define JM_CLOCK_GATE_OVERRIDE (1ul << 1) -+#define JM_JOB_THROTTLE_ENABLE (1ul << 2) -+#define JM_JOB_THROTTLE_LIMIT_SHIFT (3) -+#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F) -+#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2) ++ kbdev->csf.coresight.disable_on_pmode_enter = true; ++ kbdev->csf.coresight.enable_on_pmode_exit = false; ++ kbase_pm_update_state(kbdev); + -+/* GPU_COMMAND values */ -+#define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */ -+#define GPU_COMMAND_SOFT_RESET 0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */ -+#define GPU_COMMAND_HARD_RESET 0x02 /* Immediately reset the entire GPU. */ -+#define GPU_COMMAND_PRFCNT_CLEAR 0x03 /* Clear all performance counters, setting them all to zero. */ -+#define GPU_COMMAND_PRFCNT_SAMPLE 0x04 /* Sample all performance counters, writing them out to memory */ -+#define GPU_COMMAND_CYCLE_COUNT_START 0x05 /* Starts the cycle counter, and system timestamp propagation */ -+#define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */ -+#define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */ -+#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ -+#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+/* GPU_COMMAND cache flush alias to CSF command payload */ -+#define GPU_COMMAND_CACHE_CLN_INV_L2 GPU_COMMAND_CLEAN_INV_CACHES -+#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC GPU_COMMAND_CLEAN_INV_CACHES -+#define GPU_COMMAND_CACHE_CLN_INV_FULL GPU_COMMAND_CLEAN_INV_CACHES -+#define GPU_COMMAND_CACHE_CLN_INV_LSC GPU_COMMAND_CLEAN_INV_CACHES ++ kbase_pm_wait_for_desired_state(kbdev); + -+/* Merge cache flush commands */ -+#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) \ -+ ((cmd1) > (cmd2) ? (cmd1) : (cmd2)) ++ kbase_pm_unlock(kbdev); ++} + -+/* IRQ flags */ -+#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ -+#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ -+#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ -+#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ -+#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ -+#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ -+#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ -+#define FLUSH_PA_RANGE_COMPLETED \ -+ (1 << 20) /* Set when a physical range cache clean operation has completed. */ ++void kbase_debug_coresight_csf_enable_pmode_exit(struct kbase_device *kbdev) ++{ ++ dev_dbg(kbdev->dev, "Coresight state %s after protected mode exit", ++ coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_DISABLED)); + -+/* -+ * In Debug build, -+ * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and enable interupts sources of GPU_IRQ -+ * by writing it onto GPU_IRQ_CLEAR/MASK registers. -+ * -+ * In Release build, -+ * GPU_IRQ_REG_COMMON is used. -+ * -+ * Note: -+ * CLEAN_CACHES_COMPLETED - Used separately for cache operation. -+ */ -+#define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ -+ | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+#endif /* _KBASE_GPU_REGMAP_JM_H_ */ -diff --git a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.c -new file mode 100644 -index 000000000..8a84ef541 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.c -@@ -0,0 +1,41 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ WARN_ON(kbdev->csf.coresight.disable_on_pmode_enter); + -+#include -+#include -+#include ++ kbdev->csf.coresight.enable_on_pmode_exit = true; ++ kbase_pm_update_state(kbdev); ++} + -+const char *kbase_gpu_access_type_name(u32 fault_status) ++void kbase_debug_coresight_csf_state_request(struct kbase_device *kbdev, ++ enum kbase_debug_coresight_csf_state state) +{ -+ switch (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault_status)) { -+ case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: -+ return "ATOMIC"; -+ case AS_FAULTSTATUS_ACCESS_TYPE_READ: -+ return "READ"; -+ case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: -+ return "WRITE"; -+ case AS_FAULTSTATUS_ACCESS_TYPE_EX: -+ return "EXECUTE"; ++ if (unlikely(!kbdev)) ++ return; ++ ++ if (unlikely(!kbdev->csf.coresight.workq)) ++ return; ++ ++ dev_dbg(kbdev->dev, "Coresight state %s requested", coresight_state_to_string(state)); ++ ++ switch (state) { ++ case KBASE_DEBUG_CORESIGHT_CSF_DISABLED: ++ queue_work(kbdev->csf.coresight.workq, &kbdev->csf.coresight.disable_work); ++ break; ++ case KBASE_DEBUG_CORESIGHT_CSF_ENABLED: ++ queue_work(kbdev->csf.coresight.workq, &kbdev->csf.coresight.enable_work); ++ break; + default: -+ WARN_ON(1); -+ return NULL; ++ dev_err(kbdev->dev, "Invalid Coresight state %d", state); ++ break; + } +} -diff --git a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_fault.h b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_fault.h -new file mode 100644 -index 000000000..6a937a5ed ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_fault.h -@@ -0,0 +1,48 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ + -+#ifndef _KBASE_GPU_FAULT_H_ -+#define _KBASE_GPU_FAULT_H_ ++bool kbase_debug_coresight_csf_state_check(struct kbase_device *kbdev, ++ enum kbase_debug_coresight_csf_state state) ++{ ++ struct kbase_debug_coresight_csf_config *config_entry; ++ unsigned long flags; ++ bool success = true; + -+/** -+ * kbase_gpu_exception_name() - Returns associated string of the exception code -+ * -+ * @exception_code: exception code -+ * -+ * This function is called by error handlers when GPU reports an error. -+ * -+ * Return: Error string associated with the exception code -+ */ -+const char *kbase_gpu_exception_name(u32 exception_code); ++ dev_dbg(kbdev->dev, "Coresight check for state: %s", coresight_state_to_string(state)); + -+/** -+ * kbase_gpu_access_type_name - Convert MMU_AS_CONTROL.FAULTSTATUS.ACCESS_TYPE -+ * into string. -+ * @fault_status: value of FAULTSTATUS register. -+ * -+ * After MMU fault, this function can be used to get readable information about -+ * access_type of the MMU fault. -+ * -+ * Return: String of the access type. -+ */ -+const char *kbase_gpu_access_type_name(u32 fault_status); ++ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); + -+#endif /* _KBASE_GPU_FAULT_H_ */ -diff --git a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h ++ list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) { ++ if (state != config_entry->state) { ++ success = false; ++ break; ++ } ++ } ++ ++ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); ++ ++ return success; ++} ++KBASE_EXPORT_TEST_API(kbase_debug_coresight_csf_state_check); ++ ++bool kbase_debug_coresight_csf_state_wait(struct kbase_device *kbdev, ++ enum kbase_debug_coresight_csf_state state) ++{ ++ const long wait_timeout = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ struct kbase_debug_coresight_csf_config *config_entry, *next_config_entry; ++ unsigned long flags; ++ bool success = true; ++ ++ dev_dbg(kbdev->dev, "Coresight wait for state: %s", coresight_state_to_string(state)); ++ ++ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); ++ ++ list_for_each_entry_safe(config_entry, next_config_entry, &kbdev->csf.coresight.configs, ++ link) { ++ const enum kbase_debug_coresight_csf_state prev_state = config_entry->state; ++ long remaining; ++ ++ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); ++ remaining = wait_event_timeout(kbdev->csf.coresight.event_wait, ++ state == config_entry->state, wait_timeout); ++ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); ++ ++ if (!remaining) { ++ success = false; ++ dev_err(kbdev->dev, ++ "Timeout waiting for Coresight state transition %s to %s", ++ coresight_state_to_string(prev_state), ++ coresight_state_to_string(state)); ++ } ++ } ++ ++ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); ++ ++ return success; ++} ++KBASE_EXPORT_TEST_API(kbase_debug_coresight_csf_state_wait); ++ ++int kbase_debug_coresight_csf_init(struct kbase_device *kbdev) ++{ ++ kbdev->csf.coresight.workq = alloc_ordered_workqueue("Mali CoreSight workqueue", 0); ++ if (kbdev->csf.coresight.workq == NULL) ++ return -ENOMEM; ++ ++ INIT_LIST_HEAD(&kbdev->csf.coresight.clients); ++ INIT_LIST_HEAD(&kbdev->csf.coresight.configs); ++ INIT_WORK(&kbdev->csf.coresight.enable_work, coresight_config_enable_all); ++ INIT_WORK(&kbdev->csf.coresight.disable_work, coresight_config_disable_all); ++ init_waitqueue_head(&kbdev->csf.coresight.event_wait); ++ spin_lock_init(&kbdev->csf.coresight.lock); ++ ++ kbdev->csf.coresight.disable_on_pmode_enter = false; ++ kbdev->csf.coresight.enable_on_pmode_exit = false; ++ ++ return 0; ++} ++ ++void kbase_debug_coresight_csf_term(struct kbase_device *kbdev) ++{ ++ struct kbase_debug_coresight_csf_client *client_entry, *next_client_entry; ++ struct kbase_debug_coresight_csf_config *config_entry, *next_config_entry; ++ unsigned long flags; ++ ++ kbdev->csf.coresight.disable_on_pmode_enter = false; ++ kbdev->csf.coresight.enable_on_pmode_exit = false; ++ ++ cancel_work_sync(&kbdev->csf.coresight.enable_work); ++ cancel_work_sync(&kbdev->csf.coresight.disable_work); ++ destroy_workqueue(kbdev->csf.coresight.workq); ++ kbdev->csf.coresight.workq = NULL; ++ ++ spin_lock_irqsave(&kbdev->csf.coresight.lock, flags); ++ ++ list_for_each_entry_safe(config_entry, next_config_entry, &kbdev->csf.coresight.configs, ++ link) { ++ list_del_init(&config_entry->link); ++ kfree(config_entry); ++ } ++ ++ list_for_each_entry_safe(client_entry, next_client_entry, &kbdev->csf.coresight.clients, ++ link) { ++ list_del_init(&client_entry->link); ++ kfree(client_entry); ++ } ++ ++ spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags); ++} +diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_internal_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_internal_csf.h new file mode 100644 -index 000000000..6cef2bdd1 +index 000000000..06d62dc70 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h -@@ -0,0 +1,637 @@ ++++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_internal_csf.h +@@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -181822,675 +183717,922 @@ index 000000000..6cef2bdd1 + * + */ + -+#ifndef _KBASE_GPU_REGMAP_H_ -+#define _KBASE_GPU_REGMAP_H_ -+ -+#include -+#include -+#include -+#if MALI_USE_CSF -+#include "backend/mali_kbase_gpu_regmap_csf.h" -+#else -+#include "backend/mali_kbase_gpu_regmap_jm.h" -+#endif -+ -+/* GPU_U definition */ -+#ifdef __ASSEMBLER__ -+#define GPU_U(x) x -+#define GPU_UL(x) x -+#define GPU_ULL(x) x -+#else -+#define GPU_U(x) x##u -+#define GPU_UL(x) x##ul -+#define GPU_ULL(x) x##ull -+#endif /* __ASSEMBLER__ */ -+ -+/* Begin Register Offsets */ -+/* GPU control registers */ -+ -+#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ -+#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ -+#define MEM_FEATURES 0x010 /* (RO) Memory system features */ -+#define MMU_FEATURES 0x014 /* (RO) MMU features */ -+#define AS_PRESENT 0x018 /* (RO) Address space slots present */ -+#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ -+#define GPU_IRQ_MASK 0x028 /* (RW) */ -+ -+#define GPU_COMMAND 0x030 /* (WO) */ -+#define GPU_STATUS 0x034 /* (RO) */ -+ -+#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ -+ -+#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ -+#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ -+#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ -+ -+#define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */ -+ -+#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ -+#define SUPER_L2_COHERENT (1 << 1) /* Shader cores within a core -+ * supergroup are l2 coherent -+ */ -+ -+#define PWR_KEY 0x050 /* (WO) Power manager key register */ -+#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ -+#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ -+#define GPU_FEATURES_LO 0x060 /* (RO) GPU features, low word */ -+#define GPU_FEATURES_HI 0x064 /* (RO) GPU features, high word */ -+#define PRFCNT_FEATURES 0x068 /* (RO) Performance counter features */ -+#define TIMESTAMP_OFFSET_LO 0x088 /* (RW) Global time stamp offset, low word */ -+#define TIMESTAMP_OFFSET_HI 0x08C /* (RW) Global time stamp offset, high word */ -+#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ -+#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ -+#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ -+#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ -+ -+#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ -+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ -+#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ -+#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ -+#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that TLS must be allocated for */ -+ -+#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ -+#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ -+#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ -+#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */ -+ -+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) -+ -+#define GPU_COMMAND_ARG0_LO 0x0D0 /* (RW) Additional parameter 0 for GPU commands, low word */ -+#define GPU_COMMAND_ARG0_HI 0x0D4 /* (RW) Additional parameter 0 for GPU commands, high word */ -+#define GPU_COMMAND_ARG1_LO 0x0D8 /* (RW) Additional parameter 1 for GPU commands, low word */ -+#define GPU_COMMAND_ARG1_HI 0x0DC /* (RW) Additional parameter 1 for GPU commands, high word */ ++#ifndef _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_ ++#define _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_ + -+#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ -+#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ ++#include ++#include + -+#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ -+#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ ++/** ++ * struct kbase_debug_coresight_csf_client - Coresight client definition ++ * ++ * @drv_data: Pointer to driver device data. ++ * @addr_ranges: Arrays of address ranges used by the registered client. ++ * @nr_ranges: Size of @addr_ranges array. ++ * @link: Link item of a Coresight client. ++ * Linked to &struct_kbase_device.csf.coresight.clients. ++ */ ++struct kbase_debug_coresight_csf_client { ++ void *drv_data; ++ struct kbase_debug_coresight_csf_address_range *addr_ranges; ++ u32 nr_ranges; ++ struct list_head link; ++}; + -+#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ -+#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ ++/** ++ * enum kbase_debug_coresight_csf_state - Coresight configuration states ++ * ++ * @KBASE_DEBUG_CORESIGHT_CSF_DISABLED: Coresight configuration is disabled. ++ * @KBASE_DEBUG_CORESIGHT_CSF_ENABLED: Coresight configuration is enabled. ++ */ ++enum kbase_debug_coresight_csf_state { ++ KBASE_DEBUG_CORESIGHT_CSF_DISABLED = 0, ++ KBASE_DEBUG_CORESIGHT_CSF_ENABLED, ++}; + -+#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ -+#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ ++/** ++ * struct kbase_debug_coresight_csf_config - Coresight configuration definition ++ * ++ * @client: Pointer to the client for which the configuration is created. ++ * @enable_seq: Array of operations for Coresight client enable sequence. Can be NULL. ++ * @disable_seq: Array of operations for Coresight client disable sequence. Can be NULL. ++ * @state: Current Coresight configuration state. ++ * @error: Error code used to know if an error occurred during the execution ++ * of the enable or disable sequences. ++ * @link: Link item of a Coresight configuration. ++ * Linked to &struct_kbase_device.csf.coresight.configs. ++ */ ++struct kbase_debug_coresight_csf_config { ++ void *client; ++ struct kbase_debug_coresight_csf_sequence *enable_seq; ++ struct kbase_debug_coresight_csf_sequence *disable_seq; ++ enum kbase_debug_coresight_csf_state state; ++ int error; ++ struct list_head link; ++}; + -+#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ -+#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ ++/** ++ * struct kbase_debug_coresight_device - Object representing the Coresight device ++ * ++ * @clients: List head to maintain Coresight clients. ++ * @configs: List head to maintain Coresight configs. ++ * @lock: A lock to protect client/config lists. ++ * Lists can be accessed concurrently by ++ * Coresight kernel modules and kernel threads. ++ * @workq: Work queue for Coresight enable/disable execution. ++ * @enable_work: Work item used to enable Coresight. ++ * @disable_work: Work item used to disable Coresight. ++ * @event_wait: Wait queue for Coresight events. ++ * @enable_on_pmode_exit: Flag used by the PM state machine to ++ * identify if Coresight enable is needed. ++ * @disable_on_pmode_enter: Flag used by the PM state machine to ++ * identify if Coresight disable is needed. ++ */ ++struct kbase_debug_coresight_device { ++ struct list_head clients; ++ struct list_head configs; ++ spinlock_t lock; ++ struct workqueue_struct *workq; ++ struct work_struct enable_work; ++ struct work_struct disable_work; ++ wait_queue_head_t event_wait; ++ bool enable_on_pmode_exit; ++ bool disable_on_pmode_enter; ++}; + -+#define SHADER_PWRFEATURES 0x188 /* (RW) Shader core power features */ ++/** ++ * kbase_debug_coresight_csf_init - Initialize Coresight resources. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function should be called once at device initialization. ++ * ++ * Return: 0 on success. ++ */ ++int kbase_debug_coresight_csf_init(struct kbase_device *kbdev); + -+#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ -+#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ ++/** ++ * kbase_debug_coresight_csf_term - Terminate Coresight resources. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function should be called at device termination to prevent any ++ * memory leaks if Coresight module would have been removed without calling ++ * kbasep_debug_coresight_csf_trace_disable(). ++ */ ++void kbase_debug_coresight_csf_term(struct kbase_device *kbdev); + -+#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ -+#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ ++/** ++ * kbase_debug_coresight_csf_disable_pmode_enter - Disable Coresight on Protected ++ * mode enter. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function should be called just before requesting to enter protected mode. ++ * It will trigger a PM state machine transition from MCU_ON ++ * to ON_PMODE_ENTER_CORESIGHT_DISABLE. ++ */ ++void kbase_debug_coresight_csf_disable_pmode_enter(struct kbase_device *kbdev); + -+#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ -+#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ ++/** ++ * kbase_debug_coresight_csf_enable_pmode_exit - Enable Coresight on Protected ++ * mode enter. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function should be called after protected mode exit is acknowledged. ++ * It will trigger a PM state machine transition from MCU_ON ++ * to ON_PMODE_EXIT_CORESIGHT_ENABLE. ++ */ ++void kbase_debug_coresight_csf_enable_pmode_exit(struct kbase_device *kbdev); + -+#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ -+#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ ++/** ++ * kbase_debug_coresight_csf_state_request - Request Coresight state transition. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @state: Coresight state to check for. ++ */ ++void kbase_debug_coresight_csf_state_request(struct kbase_device *kbdev, ++ enum kbase_debug_coresight_csf_state state); + -+#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ -+#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ ++/** ++ * kbase_debug_coresight_csf_state_check - Check Coresight state. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @state: Coresight state to check for. ++ * ++ * Return: true if all states of configs are @state. ++ */ ++bool kbase_debug_coresight_csf_state_check(struct kbase_device *kbdev, ++ enum kbase_debug_coresight_csf_state state); + -+#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ -+#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ ++/** ++ * kbase_debug_coresight_csf_state_wait - Wait for Coresight state transition to complete. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @state: Coresight state to wait for. ++ * ++ * Return: true if all configs become @state in pre-defined time period. ++ */ ++bool kbase_debug_coresight_csf_state_wait(struct kbase_device *kbdev, ++ enum kbase_debug_coresight_csf_state state); + -+#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ -+#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ ++#endif /* _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_ */ +diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h +new file mode 100644 +index 000000000..41b2b00f1 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h +@@ -0,0 +1,322 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ -+#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ ++/* ++ * ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** ++ * ***** DO NOT INCLUDE DIRECTLY ***** ++ * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** ++ */ + -+#define ASN_HASH_0 0x02C0 /* (RW) ASN hash function argument 0 */ -+#define ASN_HASH(n) (ASN_HASH_0 + (n)*4) -+#define ASN_HASH_COUNT 3 ++/* ++ * The purpose of this header file is just to contain a list of trace code ++ * identifiers ++ * ++ * When updating this file, also remember to update ++ * mali_kbase_debug_linux_ktrace_csf.h ++ * ++ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THAT ++ * DESCRIBED IN mali_kbase_debug_ktrace_codes.h ++ */ + -+#define SYSC_ALLOC0 0x0340 /* (RW) System cache allocation hint from source ID */ -+#define SYSC_ALLOC(n) (SYSC_ALLOC0 + (n)*4) -+#define SYSC_ALLOC_COUNT 8 ++#if 0 /* Dummy section to avoid breaking formatting */ ++int dummy_array[] = { ++#endif ++ /* ++ * Generic CSF events ++ */ ++ /* info_val = 0 */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_START), ++ /* info_val == number of CSGs supported */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_END), ++ /* info_val[0:7] == fw version_minor ++ * info_val[15:8] == fw version_major ++ * info_val[63:32] == fw version_hash ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_BOOT), ++ KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_REBOOT), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_INVOKE), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_INVOKE), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_START), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_END), ++ /* info_val == total number of runnable groups across all kctxs */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_START), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_END), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET_START), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET_END), ++ /* info_val = timeout in ms */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_WAIT_QUIT_START), ++ /* info_val = remaining ms timeout, or 0 if timedout */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_WAIT_QUIT_END), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GROUP_SYNC_UPDATE_EVENT), ++ KBASE_KTRACE_CODE_MAKE_CODE(CSF_SYNC_UPDATE_NOTIFY_GPU_EVENT), + -+#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ -+#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ ++ /* info_val = JOB_IRQ_STATUS */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT_START), ++ /* info_val = JOB_IRQ_STATUS */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT_END), ++ /* info_val = JOB_IRQ_STATUS */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_PROCESS_START), ++ /* info_val = GLB_REQ ^ GLB_ACQ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT_GLB_REQ_ACK), ++ /* info_val[31:0] = num non idle offslot groups ++ * info_val[32] = scheduler can suspend on idle ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_ADVANCE), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_NOADVANCE), ++ /* kctx is added to the back of the list */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_INSERT), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_REMOVE), ++ /* kctx is moved to the back of the list */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_ROTATE), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_HEAD), + -+#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ -+#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_START), ++ /* 4-bit encoding of boolean values (ease of reading as hex values) ++ * ++ * info_val[3:0] = was reset active/failed to be prevented ++ * info_val[7:4] = whether scheduler was both idle and suspendable ++ * info_val[11:8] = whether all groups were suspended ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_END), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END), + -+#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ -+#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ ++ /* info_val = bitmask of slots that gave an ACK for STATUS_UPDATE */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_UPDATE_IDLE_SLOTS_ACK), + -+#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ -+#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ ++ /* info_val[63:0] = GPU cycle counter, used mainly for benchmarking ++ * purpose. ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_HANDLING_START), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_HANDLING_END), + -+#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ -+#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_MCU_HALTED), ++ KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_MCU_SLEEP), + -+#define AMBA_FEATURES 0x300 /* (RO) AMBA bus supported features */ -+#define AMBA_ENABLE 0x304 /* (RW) AMBA features enable */ ++ /* ++ * Group events ++ */ ++ /* info_val[2:0] == CSG_REQ state issued ++ * info_val[19:16] == as_nr ++ * info_val[63:32] == endpoint config (max number of endpoints allowed) ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_START_REQ), ++ /* info_val == CSG_REQ state issued */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STOP_REQ), ++ /* info_val == CSG_ACK state */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_RUNNING), ++ /* info_val == CSG_ACK state */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STOPPED), ++ /* info_val == slot cleaned */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_CLEANED), ++ /* info_val = slot requesting STATUS_UPDATE */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_UPDATE_IDLE_SLOT_REQ), ++ /* info_val = scheduler's new csg_slots_idle_mask[0] ++ * group->csg_nr indicates which bit was set ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_IDLE_SET), ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_NO_NON_IDLE_GROUPS), ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_NON_IDLE_GROUPS), ++ /* info_val = scheduler's new csg_slots_idle_mask[0] ++ * group->csg_nr indicates which bit was cleared ++ * ++ * in case of no group, multiple bits may have been updated ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_IDLE_CLEAR), ++ /* info_val == previous priority */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_PRIO_UPDATE), ++ /* info_val == CSG_REQ ^ CSG_ACK */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_SYNC_UPDATE), ++ /* info_val == CSG_REQ ^ CSG_ACK */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_IDLE), ++ /* info_val == CSG_REQ ^ CSG_ACK */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_PROGRESS_TIMER_EVENT), ++ /* info_val[31:0] == CSG_REQ ^ CSG_ACQ ++ * info_val[63:32] == CSG_IRQ_REQ ^ CSG_IRQ_ACK ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_PROCESS_END), ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_SYNC_UPDATE_DONE), ++ /* info_val == run state of the group */ ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_DESCHEDULE), ++ /* info_val == run state of the group */ ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_SCHEDULE), ++ /* info_val[31:0] == new run state of the evicted group ++ * info_val[63:32] == number of runnable groups ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_EVICT), + -+#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */ -+#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */ -+#define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */ ++ /* info_val == new num_runnable_grps ++ * group is added to the back of the list for its priority level ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_INSERT), ++ /* info_val == new num_runnable_grps ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_REMOVE), ++ /* info_val == num_runnable_grps ++ * group is moved to the back of the list for its priority level ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_ROTATE), ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_HEAD), ++ /* info_val == new num_idle_wait_grps ++ * group is added to the back of the list ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_IDLE_WAIT_INSERT), ++ /* info_val == new num_idle_wait_grps ++ * group is added to the back of the list ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_IDLE_WAIT_REMOVE), ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_IDLE_WAIT_HEAD), + -+/* Job control registers */ ++ /* info_val == is scheduler running with protected mode tasks */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_ENTER_CHECK), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_ENTER), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_EXIT), ++ /* info_val[31:0] == number of GPU address space slots in use ++ * info_val[63:32] == number of runnable groups ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOP_GRP), ++ /* info_val == new count of off-slot non-idle groups ++ * no group indicates it was set rather than incremented ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_GRP_INC), ++ /* info_val == new count of off-slot non-idle groups */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC), ++ /* info_val = scheduler's new csg_slots_idle_mask[0] ++ * group->csg_nr indicates which bit was set ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_HANDLE_IDLE_SLOTS), + -+#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ ++ KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_START), ++ KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_END), + -+/* MMU control registers */ ++ /* info_val = scheduler state */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHED_BUSY), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHED_INACTIVE), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SUSPENDED), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SLEEPING), + -+#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ -+#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ -+#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ -+#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ -+#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ -+#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ -+#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ -+#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ -+#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ -+#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ -+#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ -+#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ -+#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ -+#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ -+#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ ++ /* info_val = mcu state */ ++#define KBASEP_MCU_STATE(n) KBASE_KTRACE_CODE_MAKE_CODE(PM_MCU_ ## n), ++#include "backend/gpu/mali_kbase_pm_mcu_states.h" ++#undef KBASEP_MCU_STATE + -+/* MMU address space control registers */ -+#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ -+#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ -+#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ -+#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ -+#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ -+#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ ++ /* info_val = number of runnable groups */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_INACTIVE), ++ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_RUNNABLE), ++ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_IDLE), ++ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_SUSPENDED), ++ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_SUSPENDED_ON_IDLE), ++ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_SUSPENDED_ON_WAIT_SYNC), ++ /* info_val = new run state of the evicted group */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_FAULT_EVICTED), ++ /* info_val = get the number of active CSGs */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_TERMINATED), + -+/* (RO) Secondary fault address for address space n, low word */ -+#define AS_FAULTEXTRA_LO 0x38 -+/* (RO) Secondary fault address for address space n, high word */ -+#define AS_FAULTEXTRA_HI 0x3C ++ /* ++ * Group + Queue events ++ */ ++ /* info_val == queue->enabled */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSI_START), ++ /* info_val == queue->enabled before stop */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP), ++ KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP_REQ), ++ /* info_val == CS_REQ ^ CS_ACK that were not processed due to the group ++ * being suspended ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED), ++ /* info_val == CS_REQ ^ CS_ACK */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_FAULT), ++ /* info_val == CS_REQ ^ CS_ACK */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_TILER_OOM), ++ /* info_val == CS_REQ ^ CS_ACK */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_PROTM_PEND), ++ /* info_val == CS_ACK_PROTM_PEND ^ CS_REQ_PROTM_PEND */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_ACK), ++ /* info_val == group->run_State (for group the queue is bound to) */ ++ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_START), ++ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_STOP), ++ /* info_val == contents of CS_STATUS_WAIT_SYNC_POINTER */ ++ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_EVAL_START), ++ /* info_val == bool for result of the evaluation */ ++ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_EVAL_END), ++ /* info_val == contents of CS_STATUS_WAIT */ ++ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_WAIT_STATUS), ++ /* info_val == current sync value pointed to by queue->sync_ptr */ ++ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_CUR_VAL), ++ /* info_val == current value of CS_STATUS_WAIT_SYNC_VALUE */ ++ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_TEST_VAL), ++ /* info_val == current value of CS_STATUS_BLOCKED_REASON */ ++ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_BLOCKED_REASON), ++ /* info_val = group's new protm_pending_bitmap[0] ++ * queue->csi_index indicates which bit was set ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_PEND_SET), ++ /* info_val = group's new protm_pending_bitmap[0] ++ * queue->csi_index indicates which bit was cleared ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_PEND_CLEAR), + -+/* End Register Offsets */ ++ /* ++ * KCPU queue events ++ */ ++ /* KTrace info_val == KCPU queue fence context ++ * KCPU extra_info_val == N/A. ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_CREATE), ++ /* KTrace info_val == Number of pending commands in KCPU queue when ++ * it is destroyed. ++ * KCPU extra_info_val == Number of CQS wait operations present in ++ * the KCPU queue when it is destroyed. ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_DELETE), ++ /* KTrace info_val == CQS event memory address ++ * KCPU extra_info_val == Upper 32 bits of event memory, i.e. contents ++ * of error field. ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_CQS_SET), ++ /* KTrace info_val == Number of CQS objects to be waited upon ++ * KCPU extra_info_val == N/A. ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_CQS_WAIT_START), ++ /* KTrace info_val == CQS event memory address ++ * KCPU extra_info_val == 1 if CQS was signaled with an error and queue ++ * inherited the error, otherwise 0. ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_CQS_WAIT_END), ++ /* KTrace info_val == Fence context ++ * KCPU extra_info_val == Fence seqno. ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_FENCE_SIGNAL), ++ /* KTrace info_val == Fence context ++ * KCPU extra_info_val == Fence seqno. ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_FENCE_WAIT_START), ++ /* KTrace info_val == Fence context ++ * KCPU extra_info_val == Fence seqno. ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_FENCE_WAIT_END), + -+#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON) ++#if 0 /* Dummy section to avoid breaking formatting */ ++}; ++#endif + ++ /* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ +diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_jm.h +new file mode 100644 +index 000000000..6ba98b7c4 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_jm.h +@@ -0,0 +1,206 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* -+ * MMU_IRQ_RAWSTAT register values. Values are valid also for -+ * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. ++ * ++ * (C) COPYRIGHT 2011-2015, 2018-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * + */ + -+#define MMU_PAGE_FAULT_FLAGS 16 -+ -+/* Macros returning a bitmask to retrieve page fault or bus error flags from -+ * MMU registers ++/* ++ * ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** ++ * ***** DO NOT INCLUDE DIRECTLY ***** ++ * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** + */ -+#define MMU_PAGE_FAULT(n) (1UL << (n)) -+#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) + +/* -+ * Begin AARCH64 MMU TRANSTAB register values ++ * The purpose of this header file is just to contain a list of trace code ++ * identifiers ++ * ++ * When updating this file, also remember to update ++ * mali_kbase_debug_linux_ktrace_jm.h ++ * ++ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THAT ++ * DESCRIBED IN mali_kbase_debug_ktrace_codes.h + */ -+#define MMU_HW_OUTA_BITS 40 -+#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) + ++#if 0 /* Dummy section to avoid breaking formatting */ ++int dummy_array[] = { ++#endif ++ ++ /* ++ * Job Slot management events ++ */ ++ /* info_val==irq rawstat at start */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_IRQ), ++ /* info_val==jobs processed */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_IRQ_END), ++ /* In the following: ++ * ++ * - ctx is set if a corresponding job found (NULL otherwise, e.g. some ++ * soft-stop cases) ++ * - uatom==kernel-side mapped uatom address (for correlation with ++ * user-side) ++ */ ++ /* info_val==exit code; gpu_addr==chain gpuaddr */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_JOB_DONE), ++ /* gpu_addr==JS_HEAD read ++ * info_val==event code ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_RETURN_ATOM_TO_JS), ++ /* gpu_addr==JS_HEAD read ++ * info_val==event code ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_MARK_FOR_RETURN_TO_JS), ++ /* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of ++ * affinity ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_SUBMIT), ++ /* gpu_addr is as follows: ++ * - If JS_STATUS active after soft-stop, val==gpu addr written to ++ * JS_HEAD on submit ++ * - otherwise gpu_addr==0 ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP), ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0), ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1), ++ /* gpu_addr==JS_HEAD read */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_HARDSTOP), ++ /* gpu_addr==JS_HEAD read */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_HARDSTOP_0), ++ /* gpu_addr==JS_HEAD read */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_HARDSTOP_1), ++ /* gpu_addr==JS_TAIL read */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD), ++ /* gpu_addr is as follows: ++ * - If JS_STATUS active before soft-stop, val==JS_HEAD ++ * - otherwise gpu_addr==0 ++ */ ++ /* gpu_addr==JS_HEAD read */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_CHECK_HEAD), ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS), ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE), ++ /* info_val == is_scheduled */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED), ++ /* info_val == is_scheduled */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED), ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_DONE), ++ /* info_val == nr jobs submitted */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP), ++ /* gpu_addr==JS_HEAD_NEXT last written */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_SLOT_EVICT), ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET), ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER), ++ KBASE_KTRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER), ++ /* ++ * Job dispatch events ++ */ ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE), ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE_WORKER), ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END), ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB), ++ /* gpu_addr==0, info_val==0, uatom==0 */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT), ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JD_CANCEL), ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER), ++ /* ++ * Scheduler Core events ++ */ ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_ADD_JOB), ++ /* gpu_addr==last value written/would be written to JS_HEAD */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_REMOVE_JOB), ++ /* gpu_addr==value to write into JS_HEAD ++ * info_val==priority of atom as a KBASE_JS_ATOM_SCHED_PRIO_<...> value ++ * (0 highest) ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_PULL_JOB), ++ /* gpu_addr==value that would be written to JS_HEAD if run again */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_UNPULL_JOB), ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX), ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB), ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED), ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED), ++ /* info_val == lower 32 bits of affinity */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT), ++ /* info_val == lower 32 bits of affinity */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED), ++ /* info_val == lower 32 bits of affinity */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED), ++ /* info_val == lower 32 bits of rechecked affinity */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED), ++ /* info_val == lower 32 bits of rechecked affinity */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED), ++ /* info_val == lower 32 bits of affinity */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE), ++ /* info_val == the ctx attribute now on ctx */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX), ++ /* info_val == the ctx attribute now on runpool */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL), ++ /* info_val == the ctx attribute now off ctx */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX), ++ /* info_val == the ctx attribute now off runpool */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL), ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_RETURN_WORKER), ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_RETURN_WORKER_END), ++ /* info_val==priority level blocked (0 highest) */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_SLOT_PRIO_BLOCKED), ++ /* info_val==priority level unblocked (0 highest) ++ * note that the priority level may still be blocked on higher levels ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_SLOT_PRIO_UNBLOCKED), ++ /* gpu_addr==value to write into JS_HEAD ++ * info_val==priority level unblocked - priorities at this and higher ++ * are unblocked (0 highest) ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_SLOT_PRIO_AND_HIGHER_UNBLOCKED), ++ /* gpu_addr==value to write into JS_HEAD ++ * info_val==priority level blocked (0 highest) ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_SLOT_PRIO_IS_BLOCKED), ++ /* ++ * Scheduler Policy events ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX), ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX), ++ /* info_val == whether it was evicted */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX), ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS), ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX), ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX), ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX), ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX), ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB), ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ), ++ /* gpu_addr==JS_HEAD to write if the job were run */ ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB), ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START), ++ KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END), ++ ++#if 0 /* Dummy section to avoid breaking formatting */ ++}; ++#endif ++ ++/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ +diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c +new file mode 100644 +index 000000000..cff6f8959 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c +@@ -0,0 +1,199 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* -+ * Begin MMU STATUS register values ++ * ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * + */ -+#define AS_STATUS_AS_ACTIVE 0x01 + -+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3) -+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3) -+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3) -+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) -+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) -+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) -+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) ++#include ++#include "debug/mali_kbase_debug_ktrace_internal.h" ++#include "debug/backend/mali_kbase_debug_ktrace_csf.h" + -+#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 -+#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) -+#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ -+ (((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) -+#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0 0xC0 ++#if KBASE_KTRACE_TARGET_RBUF + -+#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 -+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) -+#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \ -+ (((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) ++void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written) ++{ ++ *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), ++ "group,slot,prio,csi,kcpu"), 0); ++} + -+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0) -+#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1) -+#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2) -+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3) ++void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, ++ char *buffer, int sz, s32 *written) ++{ ++ const union kbase_ktrace_backend * const be_msg = &trace_msg->backend; ++ /* At present, no need to check for KBASE_KTRACE_FLAG_BACKEND, as the ++ * other backend-specific flags currently imply this anyway ++ */ + -+#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16 -+#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT) -+#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \ -+ (((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT) ++ /* group parts */ ++ if (be_msg->gpu.flags & KBASE_KTRACE_FLAG_CSF_GROUP) { ++ const s8 slot = be_msg->gpu.csg_nr; ++ /* group,slot, */ ++ *written += MAX(snprintf(buffer + *written, ++ MAX(sz - *written, 0), ++ "%u,%d,", be_msg->gpu.group_handle, slot), 0); + -+#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT (0) -+#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK \ -+ ((0xFF) << PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT) -+#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(reg_val) \ -+ (((reg_val)&PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK) >> \ -+ PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT) ++ /* prio */ ++ if (slot >= 0) ++ *written += MAX(snprintf(buffer + *written, ++ MAX(sz - *written, 0), ++ "%u", be_msg->gpu.slot_prio), 0); + -+/* -+ * Begin MMU TRANSCFG register values -+ */ -+#define AS_TRANSCFG_ADRMODE_LEGACY 0 -+#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 -+#define AS_TRANSCFG_ADRMODE_IDENTITY 2 -+#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 -+#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 ++ /* , */ ++ *written += MAX(snprintf(buffer + *written, ++ MAX(sz - *written, 0), ++ ","), 0); ++ } else { ++ /* No group,slot,prio fields, but ensure ending with "," */ ++ *written += MAX(snprintf(buffer + *written, ++ MAX(sz - *written, 0), ++ ",,,"), 0); ++ } + -+#define AS_TRANSCFG_ADRMODE_MASK 0xF ++ /* queue parts: csi */ ++ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_CSF_QUEUE) ++ *written += MAX(snprintf(buffer + *written, ++ MAX(sz - *written, 0), ++ "%d", be_msg->gpu.csi_index), 0); + -+/* -+ * Begin TRANSCFG register values -+ */ -+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24) -+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24) -+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24) ++ /* , */ ++ *written += MAX(snprintf(buffer + *written, ++ MAX(sz - *written, 0), ++ ","), 0); + -+#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28)) -+#define AS_TRANSCFG_PTW_SH_OS (2ull << 28) -+#define AS_TRANSCFG_PTW_SH_IS (3ull << 28) -+#define AS_TRANSCFG_R_ALLOCATE (1ull << 30) ++ if (be_msg->gpu.flags & KBASE_KTRACE_FLAG_CSF_KCPU) { ++ /* kcpu data */ ++ *written += MAX(snprintf(buffer + *written, ++ MAX(sz - *written, 0), ++ "kcpu %d (0x%llx)", ++ be_msg->kcpu.id, ++ be_msg->kcpu.extra_info_val), 0); ++ } + -+/* -+ * Begin Command Values -+ */ ++ /* Don't end with a trailing "," - this is a 'standalone' formatted ++ * msg, caller will handle the delimiters ++ */ ++} + -+/* AS_COMMAND register commands */ -+#define AS_COMMAND_NOP 0x00 /* NOP Operation */ -+#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ -+#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ -+#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ -+/* Flush all L2 caches then issue a flush region command to all MMUs */ -+#define AS_COMMAND_FLUSH_PT 0x04 -+/* Wait for memory accesses to complete, flush all the L1s cache then flush all -+ * L2 caches then issue a flush region command to all MMUs -+ */ -+#define AS_COMMAND_FLUSH_MEM 0x05 ++void kbasep_ktrace_add_csf(struct kbase_device *kbdev, ++ enum kbase_ktrace_code code, struct kbase_queue_group *group, ++ struct kbase_queue *queue, kbase_ktrace_flag_t flags, ++ u64 info_val) ++{ ++ unsigned long irqflags; ++ struct kbase_ktrace_msg *trace_msg; ++ struct kbase_context *kctx = NULL; + -+/* AS_LOCKADDR register */ -+#define AS_LOCKADDR_LOCKADDR_SIZE_SHIFT GPU_U(0) -+#define AS_LOCKADDR_LOCKADDR_SIZE_MASK \ -+ (GPU_U(0x3F) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) -+#define AS_LOCKADDR_LOCKADDR_SIZE_GET(reg_val) \ -+ (((reg_val)&AS_LOCKADDR_LOCKADDR_SIZE_MASK) >> \ -+ AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) -+#define AS_LOCKADDR_LOCKADDR_SIZE_SET(reg_val, value) \ -+ (((reg_val) & ~AS_LOCKADDR_LOCKADDR_SIZE_MASK) | \ -+ (((value) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) & \ -+ AS_LOCKADDR_LOCKADDR_SIZE_MASK)) -+#define AS_LOCKADDR_LOCKADDR_BASE_SHIFT GPU_U(12) -+#define AS_LOCKADDR_LOCKADDR_BASE_MASK \ -+ (GPU_ULL(0xFFFFFFFFFFFFF) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) -+#define AS_LOCKADDR_LOCKADDR_BASE_GET(reg_val) \ -+ (((reg_val)&AS_LOCKADDR_LOCKADDR_BASE_MASK) >> \ -+ AS_LOCKADDR_LOCKADDR_BASE_SHIFT) -+#define AS_LOCKADDR_LOCKADDR_BASE_SET(reg_val, value) \ -+ (((reg_val) & ~AS_LOCKADDR_LOCKADDR_BASE_MASK) | \ -+ (((value) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) & \ -+ AS_LOCKADDR_LOCKADDR_BASE_MASK)) -+#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT (6) -+#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK ((0xF) << AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT) -+#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_SET(reg_val, value) \ -+ (((reg_val) & ~AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK) | \ -+ ((value << AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT) & AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK)) ++ if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace))) ++ return; + -+/* GPU_STATUS values */ -+#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ -+#define GPU_STATUS_CYCLE_COUNT_ACTIVE (1 << 6) /* Set if the cycle counter is active. */ -+#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ ++ spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); + -+/* PRFCNT_CONFIG register values */ -+#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ -+#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ -+#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ ++ /* Reserve and update indices */ ++ trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); + -+/* The performance counters are disabled. */ -+#define PRFCNT_CONFIG_MODE_OFF 0 -+/* The performance counters are enabled, but are only written out when a -+ * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. -+ */ -+#define PRFCNT_CONFIG_MODE_MANUAL 1 -+/* The performance counters are enabled, and are written out each time a tile -+ * finishes rendering. -+ */ -+#define PRFCNT_CONFIG_MODE_TILE 2 ++ /* Determine the kctx */ ++ if (group) ++ kctx = group->kctx; ++ else if (queue) ++ kctx = queue->kctx; + -+/* AS_MEMATTR values from MMU_MEMATTR_STAGE1: */ -+/* Use GPU implementation-defined caching policy. */ -+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull -+/* The attribute set to force all resources to be cached. */ -+#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full -+/* Inner write-alloc cache setup, no outer caching */ -+#define AS_MEMATTR_WRITE_ALLOC 0x8Dull ++ /* Fill the common part of the message (including backend.gpu.flags) */ ++ kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, ++ info_val); + -+/* Use GPU implementation-defined caching policy. */ -+#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull -+/* The attribute set to force all resources to be cached. */ -+#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full -+/* Inner write-alloc cache setup, no outer caching */ -+#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull -+/* Set to implementation defined, outer caching */ -+#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull -+/* Set to write back memory, outer caching */ -+#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull -+/* There is no LPAE support for non-cacheable, since the memory type is always -+ * write-back. -+ * Marking this setting as reserved for LPAE -+ */ -+#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED ++ /* Indicate to the common code that backend-specific parts will be ++ * valid ++ */ ++ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_BACKEND; + -+/* L2_MMU_CONFIG register */ -+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) -+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) ++ /* Fill the CSF-specific parts of the message ++ * ++ * Generally, no need to use default initializers when queue/group not ++ * present - can usually check the flags instead. ++ */ + -+/* End L2_MMU_CONFIG register */ ++ if (queue) { ++ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_CSF_QUEUE; ++ trace_msg->backend.gpu.csi_index = queue->csi_index; ++ } + -+/* THREAD_* registers */ ++ if (group) { ++ const s8 slot = group->csg_nr; + -+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */ -+#define IMPLEMENTATION_UNSPECIFIED 0 -+#define IMPLEMENTATION_SILICON 1 -+#define IMPLEMENTATION_FPGA 2 -+#define IMPLEMENTATION_MODEL 3 ++ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_CSF_GROUP; + -+/* Default values when registers are not supported by the implemented hardware */ -+#define THREAD_MT_DEFAULT 256 -+#define THREAD_MWS_DEFAULT 256 -+#define THREAD_MBS_DEFAULT 256 -+#define THREAD_MR_DEFAULT 1024 -+#define THREAD_MTQ_DEFAULT 4 -+#define THREAD_MTGS_DEFAULT 10 ++ trace_msg->backend.gpu.csg_nr = slot; + -+/* End THREAD_* registers */ ++ if (slot >= 0) { ++ struct kbase_csf_csg_slot *csg_slot = ++ &kbdev->csf.scheduler.csg_slots[slot]; + -+/* SHADER_CONFIG register */ -+#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16) -+#define SC_TLS_HASH_ENABLE (1ul << 17) -+#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18) -+#define SC_VAR_ALGORITHM (1ul << 29) -+/* End SHADER_CONFIG register */ ++ trace_msg->backend.gpu.slot_prio = ++ csg_slot->priority; ++ } ++ /* slot >=0 indicates whether slot_prio valid, so no need to ++ * initialize in the case where it's invalid ++ */ + -+/* TILER_CONFIG register */ -+#define TC_CLOCK_GATE_OVERRIDE (1ul << 0) -+/* End TILER_CONFIG register */ ++ trace_msg->backend.gpu.group_handle = group->handle; ++ } + -+/* L2_CONFIG register */ -+#define L2_CONFIG_SIZE_SHIFT 16 -+#define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT) -+#define L2_CONFIG_HASH_SHIFT 24 -+#define L2_CONFIG_HASH_MASK (0xFFul << L2_CONFIG_HASH_SHIFT) -+#define L2_CONFIG_ASN_HASH_ENABLE_SHIFT 24 -+#define L2_CONFIG_ASN_HASH_ENABLE_MASK (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT) -+/* End L2_CONFIG register */ ++ WARN_ON((trace_msg->backend.gpu.flags & ~KBASE_KTRACE_FLAG_ALL)); + -+/* AMBA_FEATURES register */ -+#define AMBA_FEATURES_ACE_LITE_SHIFT GPU_U(0) -+#define AMBA_FEATURES_ACE_LITE_MASK (GPU_U(0x1) << AMBA_FEATURES_ACE_LITE_SHIFT) -+#define AMBA_FEATURES_ACE_LITE_GET(reg_val) \ -+ (((reg_val)&AMBA_FEATURES_ACE_LITE_MASK) >> \ -+ AMBA_FEATURES_ACE_LITE_SHIFT) -+#define AMBA_FEATURES_ACE_LITE_SET(reg_val, value) \ -+ (((reg_val) & ~AMBA_FEATURES_ACE_LITE_MASK) | \ -+ (((value) << AMBA_FEATURES_ACE_LITE_SHIFT) & \ -+ AMBA_FEATURES_ACE_LITE_MASK)) -+#define AMBA_FEATURES_ACE_SHIFT GPU_U(1) -+#define AMBA_FEATURES_ACE_MASK (GPU_U(0x1) << AMBA_FEATURES_ACE_SHIFT) -+#define AMBA_FEATURES_ACE_GET(reg_val) \ -+ (((reg_val)&AMBA_FEATURES_ACE_MASK) >> AMBA_FEATURES_ACE_SHIFT) -+#define AMBA_FEATURES_ACE_SET(reg_val, value) \ -+ (((reg_val) & ~AMBA_FEATURES_ACE_MASK) | \ -+ (((value) << AMBA_FEATURES_ACE_SHIFT) & AMBA_FEATURES_ACE_MASK)) -+#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT GPU_U(5) -+#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK \ -+ (GPU_U(0x1) << AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT) -+#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_GET(reg_val) \ -+ (((reg_val)&AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK) >> \ -+ AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT) -+#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SET(reg_val, value) \ -+ (((reg_val) & ~AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK) | \ -+ (((value) << AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT) & \ -+ AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK)) -+#define AMBA_FEATURES_INVALIDATE_HINT_SHIFT GPU_U(6) -+#define AMBA_FEATURES_INVALIDATE_HINT_MASK \ -+ (GPU_U(0x1) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT) -+#define AMBA_FEATURES_INVALIDATE_HINT_GET(reg_val) \ -+ (((reg_val)&AMBA_FEATURES_INVALIDATE_HINT_MASK) >> \ -+ AMBA_FEATURES_INVALIDATE_HINT_SHIFT) -+#define AMBA_FEATURES_INVALIDATE_HINT_SET(reg_val, value) \ -+ (((reg_val) & ~AMBA_FEATURES_INVALIDATE_HINT_MASK) | \ -+ (((value) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT) & \ -+ AMBA_FEATURES_INVALIDATE_HINT_MASK)) ++ /* Done */ ++ spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); ++} + -+/* AMBA_ENABLE register */ -+#define AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT GPU_U(0) -+#define AMBA_ENABLE_COHERENCY_PROTOCOL_MASK \ -+ (GPU_U(0x1F) << AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT) -+#define AMBA_ENABLE_COHERENCY_PROTOCOL_GET(reg_val) \ -+ (((reg_val)&AMBA_ENABLE_COHERENCY_PROTOCOL_MASK) >> \ -+ AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT) -+#define AMBA_ENABLE_COHERENCY_PROTOCOL_SET(reg_val, value) \ -+ (((reg_val) & ~AMBA_ENABLE_COHERENCY_PROTOCOL_MASK) | \ -+ (((value) << AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT) & \ -+ AMBA_ENABLE_COHERENCY_PROTOCOL_MASK)) -+/* AMBA_ENABLE_coherency_protocol values */ -+#define AMBA_ENABLE_COHERENCY_PROTOCOL_ACE_LITE 0x0 -+#define AMBA_ENABLE_COHERENCY_PROTOCOL_ACE 0x1 -+#define AMBA_ENABLE_COHERENCY_PROTOCOL_NO_COHERENCY 0x1F -+/* End of AMBA_ENABLE_coherency_protocol values */ -+#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT GPU_U(5) -+#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK \ -+ (GPU_U(0x1) << AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT) -+#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_GET(reg_val) \ -+ (((reg_val)&AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK) >> \ -+ AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT) -+#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(reg_val, value) \ -+ (((reg_val) & ~AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK) | \ -+ (((value) << AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT) & \ -+ AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK)) -+#define AMBA_ENABLE_INVALIDATE_HINT_SHIFT GPU_U(6) -+#define AMBA_ENABLE_INVALIDATE_HINT_MASK \ -+ (GPU_U(0x1) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT) -+#define AMBA_ENABLE_INVALIDATE_HINT_GET(reg_val) \ -+ (((reg_val)&AMBA_ENABLE_INVALIDATE_HINT_MASK) >> \ -+ AMBA_ENABLE_INVALIDATE_HINT_SHIFT) -+#define AMBA_ENABLE_INVALIDATE_HINT_SET(reg_val, value) \ -+ (((reg_val) & ~AMBA_ENABLE_INVALIDATE_HINT_MASK) | \ -+ (((value) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT) & \ -+ AMBA_ENABLE_INVALIDATE_HINT_MASK)) ++void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev, ++ enum kbase_ktrace_code code, ++ struct kbase_kcpu_command_queue *queue, ++ u64 info_val1, u64 info_val2) ++{ ++ unsigned long irqflags; ++ struct kbase_ktrace_msg *trace_msg; ++ struct kbase_context *kctx = queue->kctx; + -+/* IDVS_GROUP register */ -+#define IDVS_GROUP_SIZE_SHIFT (16) -+#define IDVS_GROUP_MAX_SIZE (0x3F) ++ if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace))) ++ return; + -+/* SYSC_ALLOC read IDs */ -+#define SYSC_ALLOC_ID_R_OTHER 0x00 -+#define SYSC_ALLOC_ID_R_CSF 0x02 -+#define SYSC_ALLOC_ID_R_MMU 0x04 -+#define SYSC_ALLOC_ID_R_TILER_VERT 0x08 -+#define SYSC_ALLOC_ID_R_TILER_PTR 0x09 -+#define SYSC_ALLOC_ID_R_TILER_INDEX 0x0A -+#define SYSC_ALLOC_ID_R_TILER_OTHER 0x0B -+#define SYSC_ALLOC_ID_R_IC 0x10 -+#define SYSC_ALLOC_ID_R_ATTR 0x11 -+#define SYSC_ALLOC_ID_R_SCM 0x12 -+#define SYSC_ALLOC_ID_R_FSDC 0x13 -+#define SYSC_ALLOC_ID_R_VL 0x14 -+#define SYSC_ALLOC_ID_R_PLR 0x15 -+#define SYSC_ALLOC_ID_R_TEX 0x18 -+#define SYSC_ALLOC_ID_R_LSC 0x1c ++ spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); + -+/* SYSC_ALLOC write IDs */ -+#define SYSC_ALLOC_ID_W_OTHER 0x00 -+#define SYSC_ALLOC_ID_W_CSF 0x02 -+#define SYSC_ALLOC_ID_W_PCB 0x07 -+#define SYSC_ALLOC_ID_W_TILER_PTR 0x09 -+#define SYSC_ALLOC_ID_W_TILER_VERT_PLIST 0x0A -+#define SYSC_ALLOC_ID_W_TILER_OTHER 0x0B -+#define SYSC_ALLOC_ID_W_L2_EVICT 0x0C -+#define SYSC_ALLOC_ID_W_L2_FLUSH 0x0D -+#define SYSC_ALLOC_ID_W_TIB_COLOR 0x10 -+#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCH 0x11 -+#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCB 0x12 -+#define SYSC_ALLOC_ID_W_TIB_CRC 0x13 -+#define SYSC_ALLOC_ID_W_TIB_DS 0x14 -+#define SYSC_ALLOC_ID_W_TIB_DS_AFBCH 0x15 -+#define SYSC_ALLOC_ID_W_TIB_DS_AFBCB 0x16 -+#define SYSC_ALLOC_ID_W_LSC 0x1C ++ /* Reserve and update indices */ ++ trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); + -+/* SYSC_ALLOC values */ -+#define SYSC_ALLOC_L2_ALLOC 0x0 -+#define SYSC_ALLOC_NEVER_ALLOC 0x2 -+#define SYSC_ALLOC_ALWAYS_ALLOC 0x3 -+#define SYSC_ALLOC_PTL_ALLOC 0x4 -+#define SYSC_ALLOC_L2_PTL_ALLOC 0x5 ++ /* Fill the common part of the message */ ++ kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, 0, ++ info_val1); + -+/* SYSC_ALLOC register */ -+#define SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT (0) -+#define SYSC_ALLOC_R_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) -+#define SYSC_ALLOC_R_SYSC_ALLOC0_GET(reg_val) \ -+ (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC0_MASK) >> \ -+ SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) -+#define SYSC_ALLOC_R_SYSC_ALLOC0_SET(reg_val, value) \ -+ (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC0_MASK) | \ -+ (((value) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) & \ -+ SYSC_ALLOC_R_SYSC_ALLOC0_MASK)) -+/* End of SYSC_ALLOC_R_SYSC_ALLOC0 values */ -+#define SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT (4) -+#define SYSC_ALLOC_W_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) -+#define SYSC_ALLOC_W_SYSC_ALLOC0_GET(reg_val) \ -+ (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC0_MASK) >> \ -+ SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) -+#define SYSC_ALLOC_W_SYSC_ALLOC0_SET(reg_val, value) \ -+ (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC0_MASK) | \ -+ (((value) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) & \ -+ SYSC_ALLOC_W_SYSC_ALLOC0_MASK)) -+/* End of SYSC_ALLOC_W_SYSC_ALLOC0 values */ -+#define SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT (8) -+#define SYSC_ALLOC_R_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) -+#define SYSC_ALLOC_R_SYSC_ALLOC1_GET(reg_val) \ -+ (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC1_MASK) >> \ -+ SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) -+#define SYSC_ALLOC_R_SYSC_ALLOC1_SET(reg_val, value) \ -+ (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC1_MASK) | \ -+ (((value) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) & \ -+ SYSC_ALLOC_R_SYSC_ALLOC1_MASK)) -+/* End of SYSC_ALLOC_R_SYSC_ALLOC1 values */ -+#define SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT (12) -+#define SYSC_ALLOC_W_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) -+#define SYSC_ALLOC_W_SYSC_ALLOC1_GET(reg_val) \ -+ (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC1_MASK) >> \ -+ SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) -+#define SYSC_ALLOC_W_SYSC_ALLOC1_SET(reg_val, value) \ -+ (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC1_MASK) | \ -+ (((value) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) & \ -+ SYSC_ALLOC_W_SYSC_ALLOC1_MASK)) -+/* End of SYSC_ALLOC_W_SYSC_ALLOC1 values */ -+#define SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT (16) -+#define SYSC_ALLOC_R_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) -+#define SYSC_ALLOC_R_SYSC_ALLOC2_GET(reg_val) \ -+ (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC2_MASK) >> \ -+ SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) -+#define SYSC_ALLOC_R_SYSC_ALLOC2_SET(reg_val, value) \ -+ (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC2_MASK) | \ -+ (((value) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) & \ -+ SYSC_ALLOC_R_SYSC_ALLOC2_MASK)) -+/* End of SYSC_ALLOC_R_SYSC_ALLOC2 values */ -+#define SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT (20) -+#define SYSC_ALLOC_W_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) -+#define SYSC_ALLOC_W_SYSC_ALLOC2_GET(reg_val) \ -+ (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC2_MASK) >> \ -+ SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) -+#define SYSC_ALLOC_W_SYSC_ALLOC2_SET(reg_val, value) \ -+ (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC2_MASK) | \ -+ (((value) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) & \ -+ SYSC_ALLOC_W_SYSC_ALLOC2_MASK)) -+/* End of SYSC_ALLOC_W_SYSC_ALLOC2 values */ -+#define SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT (24) -+#define SYSC_ALLOC_R_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) -+#define SYSC_ALLOC_R_SYSC_ALLOC3_GET(reg_val) \ -+ (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC3_MASK) >> \ -+ SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) -+#define SYSC_ALLOC_R_SYSC_ALLOC3_SET(reg_val, value) \ -+ (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC3_MASK) | \ -+ (((value) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) & \ -+ SYSC_ALLOC_R_SYSC_ALLOC3_MASK)) -+/* End of SYSC_ALLOC_R_SYSC_ALLOC3 values */ -+#define SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT (28) -+#define SYSC_ALLOC_W_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) -+#define SYSC_ALLOC_W_SYSC_ALLOC3_GET(reg_val) \ -+ (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC3_MASK) >> \ -+ SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) -+#define SYSC_ALLOC_W_SYSC_ALLOC3_SET(reg_val, value) \ -+ (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC3_MASK) | \ -+ (((value) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) & \ -+ SYSC_ALLOC_W_SYSC_ALLOC3_MASK)) -+/* End of SYSC_ALLOC_W_SYSC_ALLOC3 values */ ++ /* Indicate to the common code that backend-specific parts will be ++ * valid ++ */ ++ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_BACKEND; + -+/* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. */ -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+#undef GPU_IRQ_REG_ALL -+#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE) -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++ /* Fill the KCPU-specific parts of the message */ ++ trace_msg->backend.kcpu.id = queue->id; ++ trace_msg->backend.kcpu.extra_info_val = info_val2; ++ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_CSF_KCPU; + -+#endif /* _KBASE_GPU_REGMAP_H_ */ -diff --git a/drivers/gpu/arm/bifrost/hwcnt/Kbuild b/drivers/gpu/arm/bifrost/hwcnt/Kbuild -new file mode 100755 -index 000000000..c1a381b24 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/Kbuild -@@ -0,0 +1,37 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++ WARN_ON((trace_msg->backend.gpu.flags & ~KBASE_KTRACE_FLAG_ALL)); + -+bifrost_kbase-y += \ -+ hwcnt/mali_kbase_hwcnt.o \ -+ hwcnt/mali_kbase_hwcnt_gpu.o \ -+ hwcnt/mali_kbase_hwcnt_gpu_narrow.o \ -+ hwcnt/mali_kbase_hwcnt_types.o \ -+ hwcnt/mali_kbase_hwcnt_virtualizer.o \ -+ hwcnt/mali_kbase_hwcnt_watchdog_if_timer.o ++ /* Done */ ++ spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); ++} + -+ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) -+ bifrost_kbase-y += \ -+ hwcnt/backend/mali_kbase_hwcnt_backend_csf.o \ -+ hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.o -+else -+ bifrost_kbase-y += \ -+ hwcnt/backend/mali_kbase_hwcnt_backend_jm.o \ -+ hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.o -+endif -diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h ++#endif /* KBASE_KTRACE_TARGET_RBUF */ +diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.h new file mode 100644 -index 000000000..6cfa6f5ee +index 000000000..e3d037307 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h -@@ -0,0 +1,225 @@ ++++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.h +@@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -182508,220 +184650,320 @@ index 000000000..6cfa6f5ee + * + */ + ++#ifndef _KBASE_DEBUG_KTRACE_CSF_H_ ++#define _KBASE_DEBUG_KTRACE_CSF_H_ ++ +/* -+ * Virtual interface for hardware counter backends. ++ * KTrace target for internal ringbuffer ++ */ ++#if KBASE_KTRACE_TARGET_RBUF ++/** ++ * kbasep_ktrace_add_csf - internal function to add trace about CSF ++ * @kbdev: kbase device ++ * @code: trace code ++ * @group: queue group, or NULL if no queue group ++ * @queue: queue, or NULL if no queue ++ * @flags: flags about the message ++ * @info_val: generic information about @code to add to the trace ++ * ++ * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD_CSF() instead. + */ + -+#ifndef _KBASE_HWCNT_BACKEND_H_ -+#define _KBASE_HWCNT_BACKEND_H_ ++void kbasep_ktrace_add_csf(struct kbase_device *kbdev, ++ enum kbase_ktrace_code code, struct kbase_queue_group *group, ++ struct kbase_queue *queue, kbase_ktrace_flag_t flags, ++ u64 info_val); + -+#include ++/** ++ * kbasep_ktrace_add_csf_kcpu - internal function to add trace about the CSF ++ * KCPU queues. ++ * @kbdev: kbase device ++ * @code: trace code ++ * @queue: queue, or NULL if no queue ++ * @info_val1: Main infoval variable with information based on the KCPU ++ * ktrace call. Refer to mali_kbase_debug_ktrace_codes_csf.h ++ * for information on the infoval values. ++ * @info_val2: Extra infoval variable with information based on the KCPU ++ * ktrace call. Refer to mali_kbase_debug_ktrace_codes_csf.h ++ * for information on the infoval values. ++ * ++ * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD_CSF_KCPU() instead. ++ */ ++void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev, ++ enum kbase_ktrace_code code, ++ struct kbase_kcpu_command_queue *queue, ++ u64 info_val1, u64 info_val2); + -+struct kbase_hwcnt_metadata; -+struct kbase_hwcnt_enable_map; -+struct kbase_hwcnt_dump_buffer; ++#define KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, group, queue, flags, info_val) \ ++ kbasep_ktrace_add_csf(kbdev, KBASE_KTRACE_CODE(code), group, queue, \ ++ flags, info_val) + -+/* -+ * struct kbase_hwcnt_backend_info - Opaque pointer to information used to -+ * create an instance of a hardware counter -+ * backend. -+ */ -+struct kbase_hwcnt_backend_info; ++#define KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, queue, info_val1, \ ++ info_val2) kbasep_ktrace_add_csf_kcpu(kbdev, KBASE_KTRACE_CODE(code), \ ++ queue, info_val1, info_val2) + -+/* -+ * struct kbase_hwcnt_backend - Opaque pointer to a hardware counter -+ * backend, used to perform dumps. -+ */ -+struct kbase_hwcnt_backend; ++#else /* KBASE_KTRACE_TARGET_RBUF */ ++ ++#define KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, group, queue, flags, info_val) \ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(code);\ ++ CSTD_UNUSED(group);\ ++ CSTD_UNUSED(queue);\ ++ CSTD_UNUSED(flags);\ ++ CSTD_UNUSED(info_val);\ ++ CSTD_NOP(0);\ ++ } while (0) ++ ++#define KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, queue, info_val1, info_val2) \ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(code);\ ++ CSTD_UNUSED(queue);\ ++ CSTD_UNUSED(info_val1);\ ++ CSTD_UNUSED(info_val2);\ ++ } while (0) ++ ++#endif /* KBASE_KTRACE_TARGET_RBUF */ + +/* -+ * typedef kbase_hwcnt_backend_metadata_fn - Get the immutable hardware counter -+ * metadata that describes the layout -+ * of the counter data structures. -+ * @info: Non-NULL pointer to backend info. -+ * -+ * Multiple calls to this function with the same info are guaranteed to return -+ * the same metadata object each time. ++ * KTrace target for Linux's ftrace + * -+ * Return: Non-NULL pointer to immutable hardware counter metadata. ++ * Note: the header file(s) that define the trace_mali_<...> tracepoints are ++ * included by the parent header file + */ -+typedef const struct kbase_hwcnt_metadata * -+kbase_hwcnt_backend_metadata_fn(const struct kbase_hwcnt_backend_info *info); ++#if KBASE_KTRACE_TARGET_FTRACE + -+/** -+ * typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend. -+ * @info: Non-NULL pointer to backend info. -+ * @out_backend: Non-NULL pointer to where backend is stored on success. -+ * -+ * All uses of the created hardware counter backend must be externally -+ * synchronised. -+ * -+ * Return: 0 on success, else error code. -+ */ -+typedef int kbase_hwcnt_backend_init_fn(const struct kbase_hwcnt_backend_info *info, -+ struct kbase_hwcnt_backend **out_backend); ++#define KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, group, queue, info_val) \ ++ trace_mali_##code(kbdev, group, queue, info_val) + -+/** -+ * typedef kbase_hwcnt_backend_term_fn - Terminate a counter backend. -+ * @backend: Pointer to backend to be terminated. ++#define KBASE_KTRACE_FTRACE_ADD_KCPU(code, queue, info_val1, info_val2) \ ++ trace_mali_##code(queue, info_val1, info_val2) ++ ++#else /* KBASE_KTRACE_TARGET_FTRACE */ ++ ++#define KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, group, queue, info_val) \ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(code);\ ++ CSTD_UNUSED(group);\ ++ CSTD_UNUSED(queue);\ ++ CSTD_UNUSED(info_val);\ ++ CSTD_NOP(0);\ ++ } while (0) ++ ++#define KBASE_KTRACE_FTRACE_ADD_KCPU(code, queue, info_val1, info_val2) \ ++ do {\ ++ CSTD_NOP(code);\ ++ CSTD_UNUSED(queue);\ ++ CSTD_UNUSED(info_val1);\ ++ CSTD_UNUSED(info_val2);\ ++ } while (0) ++ ++#endif /* KBASE_KTRACE_TARGET_FTRACE */ ++ ++/* ++ * Master set of macros to route KTrace to any of the targets + */ -+typedef void kbase_hwcnt_backend_term_fn(struct kbase_hwcnt_backend *backend); + +/** -+ * typedef kbase_hwcnt_backend_timestamp_ns_fn - Get the current backend -+ * timestamp. -+ * @backend: Non-NULL pointer to backend. ++ * KBASE_KTRACE_ADD_CSF_GRP - Add trace values about a group, with info ++ * @kbdev: kbase device ++ * @code: trace code ++ * @group: queue group, or NULL if no queue group ++ * @info_val: generic information about @code to add to the trace + * -+ * Return: Backend timestamp in nanoseconds. ++ * Note: Any functions called through this macro will still be evaluated in ++ * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when ++ * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied ++ * to this macro must: ++ * a) be static or static inline, and ++ * b) just return 0 and have no other statements present in the body. + */ -+typedef u64 kbase_hwcnt_backend_timestamp_ns_fn(struct kbase_hwcnt_backend *backend); ++#define KBASE_KTRACE_ADD_CSF_GRP(kbdev, code, group, info_val) \ ++ do { \ ++ /* capture values that could come from non-pure fn calls */ \ ++ struct kbase_queue_group *__group = group; \ ++ u64 __info_val = info_val; \ ++ KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, __group, NULL, 0u, \ ++ __info_val); \ ++ KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, __group, NULL, \ ++ __info_val); \ ++ } while (0) + +/** -+ * typedef kbase_hwcnt_backend_dump_enable_fn - Start counter dumping with the -+ * backend. -+ * @backend: Non-NULL pointer to backend. -+ * @enable_map: Non-NULL pointer to enable map specifying enabled counters. -+ * -+ * The enable_map must have been created using the interface's metadata. -+ * If the backend has already been enabled, an error is returned. -+ * -+ * May be called in an atomic context. ++ * KBASE_KTRACE_ADD_CSF_GRP_Q - Add trace values about a group, queue, with info ++ * @kbdev: kbase device ++ * @code: trace code ++ * @group: queue group, or NULL if no queue group ++ * @queue: queue, or NULL if no queue ++ * @info_val: generic information about @code to add to the trace + * -+ * Return: 0 on success, else error code. ++ * Note: Any functions called through this macro will still be evaluated in ++ * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when ++ * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied ++ * to this macro must: ++ * a) be static or static inline, and ++ * b) just return 0 and have no other statements present in the body. + */ -+typedef int kbase_hwcnt_backend_dump_enable_fn(struct kbase_hwcnt_backend *backend, -+ const struct kbase_hwcnt_enable_map *enable_map); ++#define KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, code, group, queue, info_val) \ ++ do { \ ++ /* capture values that could come from non-pure fn calls */ \ ++ struct kbase_queue_group *__group = group; \ ++ struct kbase_queue *__queue = queue; \ ++ u64 __info_val = info_val; \ ++ KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, __group, __queue, 0u, \ ++ __info_val); \ ++ KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, __group, \ ++ __queue, __info_val); \ ++ } while (0) + -+/** -+ * typedef kbase_hwcnt_backend_dump_enable_nolock_fn - Start counter dumping -+ * with the backend. -+ * @backend: Non-NULL pointer to backend. -+ * @enable_map: Non-NULL pointer to enable map specifying enabled counters. ++ ++#define KBASE_KTRACE_ADD_CSF_KCPU(kbdev, code, queue, info_val1, info_val2) \ ++ do { \ ++ /* capture values that could come from non-pure fn calls */ \ ++ struct kbase_kcpu_command_queue *__queue = queue; \ ++ u64 __info_val1 = info_val1; \ ++ u64 __info_val2 = info_val2; \ ++ KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, __queue, \ ++ __info_val1, __info_val2); \ ++ KBASE_KTRACE_FTRACE_ADD_KCPU(code, __queue, \ ++ __info_val1, __info_val2); \ ++ } while (0) ++ ++#endif /* _KBASE_DEBUG_KTRACE_CSF_H_ */ +diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_csf.h +new file mode 100644 +index 000000000..1896e10ed +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_csf.h +@@ -0,0 +1,116 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * Exactly the same as kbase_hwcnt_backend_dump_enable_fn(), except must be -+ * called in an atomic context with the spinlock documented by the specific -+ * backend interface held. ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * -+ * Return: 0 on success, else error code. -+ */ -+typedef int -+kbase_hwcnt_backend_dump_enable_nolock_fn(struct kbase_hwcnt_backend *backend, -+ const struct kbase_hwcnt_enable_map *enable_map); -+ -+/** -+ * typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with -+ * the backend. -+ * @backend: Non-NULL pointer to backend. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * If the backend is already disabled, does nothing. -+ * Any undumped counter values since the last dump get will be lost. -+ */ -+typedef void kbase_hwcnt_backend_dump_disable_fn(struct kbase_hwcnt_backend *backend); -+ -+/** -+ * typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped -+ * counters. -+ * @backend: Non-NULL pointer to backend. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + * -+ * If the backend is not enabled, returns an error. ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: 0 on success, else error code. + */ -+typedef int kbase_hwcnt_backend_dump_clear_fn(struct kbase_hwcnt_backend *backend); + ++#ifndef _KBASE_DEBUG_KTRACE_DEFS_CSF_H_ ++#define _KBASE_DEBUG_KTRACE_DEFS_CSF_H_ ++ ++#if KBASE_KTRACE_TARGET_RBUF +/** -+ * typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter -+ * dump. -+ * @backend: Non-NULL pointer to backend. -+ * @dump_time_ns: Non-NULL pointer where the timestamp of when the dump was -+ * requested will be written out to on success. ++ * DOC: KTrace version history, CSF variant + * -+ * If the backend is not enabled or another dump is already in progress, -+ * returns an error. ++ * 1.0: ++ * First version, with version information in the header. + * -+ * Return: 0 on success, else error code. -+ */ -+typedef int kbase_hwcnt_backend_dump_request_fn(struct kbase_hwcnt_backend *backend, -+ u64 *dump_time_ns); -+ -+/** -+ * typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested -+ * counter dump has completed. -+ * @backend: Non-NULL pointer to backend. ++ * 1.1: ++ * kctx field is no longer a pointer, and is now an ID of the format %d_%u as ++ * used by kctx directories in mali debugfs entries: (tgid creating the kctx), ++ * (unique kctx id) + * -+ * If the backend is not enabled, returns an error. ++ * ftrace backend now outputs kctx field (as %d_%u format). + * -+ * Return: 0 on success, else error code. -+ */ -+typedef int kbase_hwcnt_backend_dump_wait_fn(struct kbase_hwcnt_backend *backend); -+ -+/** -+ * typedef kbase_hwcnt_backend_dump_get_fn - Copy or accumulate enable the -+ * counters dumped after the last dump -+ * request into the dump buffer. -+ * @backend: Non-NULL pointer to backend. -+ * @dump_buffer: Non-NULL pointer to destination dump buffer. -+ * @enable_map: Non-NULL pointer to enable map specifying enabled values. -+ * @accumulate: True if counters should be accumulated into dump_buffer, rather -+ * than copied. ++ * Add fields group, slot, prio, csi into backend-specific part. + * -+ * The resultant contents of the dump buffer are only well defined if a prior -+ * call to dump_wait returned successfully, and a new dump has not yet been -+ * requested by a call to dump_request. ++ * 1.2: ++ * There is a new class of KCPU traces; with this, a new KCPU column in the ++ * ringbuffer RBUF (mali_trace) between csi and info_val, which is empty ++ * for non-kcpu related traces, and usually displays the KCPU Queue ID and ++ * an extra information value. ftrace also displays these KCPU traces. + * -+ * Return: 0 on success, else error code. ++ * 1.3: ++ * Add a lot of extra new traces. Tweak some existing scheduler related traces ++ * to contain extra information information/happen at slightly different times. ++ * SCHEDULER_PROTM_EXIT now has group information + */ -+typedef int kbase_hwcnt_backend_dump_get_fn(struct kbase_hwcnt_backend *backend, -+ struct kbase_hwcnt_dump_buffer *dump_buffer, -+ const struct kbase_hwcnt_enable_map *enable_map, -+ bool accumulate); ++#define KBASE_KTRACE_VERSION_MAJOR 1 ++#define KBASE_KTRACE_VERSION_MINOR 3 ++ ++/* indicates if the trace message has valid queue-group related info. */ ++#define KBASE_KTRACE_FLAG_CSF_GROUP (((kbase_ktrace_flag_t)1) << 0) ++ ++/* indicates if the trace message has valid queue related info. */ ++#define KBASE_KTRACE_FLAG_CSF_QUEUE (((kbase_ktrace_flag_t)1) << 1) ++ ++/* indicates if the trace message has valid KCPU-queue related info. */ ++#define KBASE_KTRACE_FLAG_CSF_KCPU (((kbase_ktrace_flag_t)1) << 2) ++ ++/* Collect all the flags together for debug checking */ ++#define KBASE_KTRACE_FLAG_BACKEND_ALL \ ++ (KBASE_KTRACE_FLAG_CSF_GROUP | KBASE_KTRACE_FLAG_CSF_QUEUE | \ ++ KBASE_KTRACE_FLAG_CSF_KCPU) + +/** -+ * struct kbase_hwcnt_backend_interface - Hardware counter backend virtual -+ * interface. -+ * @info: Immutable info used to initialise an instance of the -+ * backend. -+ * @metadata: Function ptr to get the immutable hardware counter -+ * metadata. -+ * @init: Function ptr to initialise an instance of the backend. -+ * @term: Function ptr to terminate an instance of the backend. -+ * @timestamp_ns: Function ptr to get the current backend timestamp. -+ * @dump_enable: Function ptr to enable dumping. -+ * @dump_enable_nolock: Function ptr to enable dumping while the -+ * backend-specific spinlock is already held. -+ * @dump_disable: Function ptr to disable dumping. -+ * @dump_clear: Function ptr to clear counters. -+ * @dump_request: Function ptr to request a dump. -+ * @dump_wait: Function ptr to wait until dump to complete. -+ * @dump_get: Function ptr to copy or accumulate dump into a dump -+ * buffer. ++ * union kbase_ktrace_backend - backend specific part of a trace message ++ * @kcpu: kcpu union member ++ * @kcpu.code: Identifies the event, refer to enum kbase_ktrace_code. ++ * @kcpu.flags: indicates information about the trace message itself. Used ++ * during dumping of the message. ++ * @kcpu.id: ID of the KCPU queue. ++ * @kcpu.extra_info_val: value specific to the type of KCPU event being traced. ++ * Refer to the KPU specific code in enum kbase_ktrace_code in ++ * mali_kbase_debug_ktrace_codes_csf.h ++ * @gpu: gpu union member ++ * @gpu.code: Identifies the event, refer to enum kbase_ktrace_code. ++ * @gpu.flags: indicates information about the trace message itself. Used ++ * during dumping of the message. ++ * @gpu.group_handle: Handle identifying the associated queue group. Only valid ++ * when @flags contains KBASE_KTRACE_FLAG_CSF_GROUP. ++ * @gpu.csg_nr: Number/index of the associated queue group's CS group to ++ * which it is mapped, or negative if none associated. Only ++ * valid when @flags contains KBASE_KTRACE_FLAG_CSF_GROUP. ++ * @gpu.slot_prio: The priority of the slot for the associated group, if it ++ * was scheduled. Hence, only valid when @csg_nr >=0 and ++ * @flags contains KBASE_KTRACE_FLAG_CSF_GROUP. ++ * @gpu.csi_index: ID of the associated queue's CS HW interface. ++ * Only valid when @flags contains KBASE_KTRACE_FLAG_CSF_QUEUE. + */ -+struct kbase_hwcnt_backend_interface { -+ const struct kbase_hwcnt_backend_info *info; -+ kbase_hwcnt_backend_metadata_fn *metadata; -+ kbase_hwcnt_backend_init_fn *init; -+ kbase_hwcnt_backend_term_fn *term; -+ kbase_hwcnt_backend_timestamp_ns_fn *timestamp_ns; -+ kbase_hwcnt_backend_dump_enable_fn *dump_enable; -+ kbase_hwcnt_backend_dump_enable_nolock_fn *dump_enable_nolock; -+ kbase_hwcnt_backend_dump_disable_fn *dump_disable; -+ kbase_hwcnt_backend_dump_clear_fn *dump_clear; -+ kbase_hwcnt_backend_dump_request_fn *dump_request; -+ kbase_hwcnt_backend_dump_wait_fn *dump_wait; -+ kbase_hwcnt_backend_dump_get_fn *dump_get; ++ ++union kbase_ktrace_backend { ++ /* Place 64 and 32-bit members together */ ++ /* Pack smaller members together */ ++ struct { ++ kbase_ktrace_code_t code; ++ kbase_ktrace_flag_t flags; ++ u8 id; ++ u64 extra_info_val; ++ } kcpu; ++ ++ struct { ++ kbase_ktrace_code_t code; ++ kbase_ktrace_flag_t flags; ++ u8 group_handle; ++ s8 csg_nr; ++ u8 slot_prio; ++ s8 csi_index; ++ } gpu; +}; + -+#endif /* _KBASE_HWCNT_BACKEND_H_ */ -diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c ++#endif /* KBASE_KTRACE_TARGET_RBUF */ ++#endif /* _KBASE_DEBUG_KTRACE_DEFS_CSF_H_ */ +diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_jm.h new file mode 100644 -index 000000000..4a429a6cd +index 000000000..efa8ab05b --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c -@@ -0,0 +1,1896 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_jm.h +@@ -0,0 +1,112 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -182739,1891 +184981,1800 @@ index 000000000..4a429a6cd + * + */ + -+#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf.h" -+#include "hwcnt/mali_kbase_hwcnt_gpu.h" -+#include "hwcnt/mali_kbase_hwcnt_types.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifndef BASE_MAX_NR_CLOCKS_REGULATORS -+#define BASE_MAX_NR_CLOCKS_REGULATORS 4 -+#endif -+ -+#if IS_ENABLED(CONFIG_MALI_IS_FPGA) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+/* Backend watch dog timer interval in milliseconds: 18 seconds. */ -+#define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)18000) -+#else -+/* Backend watch dog timer interval in milliseconds: 1 second. */ -+#define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)1000) -+#endif /* IS_FPGA && !NO_MALI */ ++#ifndef _KBASE_DEBUG_KTRACE_DEFS_JM_H_ ++#define _KBASE_DEBUG_KTRACE_DEFS_JM_H_ + ++#if KBASE_KTRACE_TARGET_RBUF +/** -+ * enum kbase_hwcnt_backend_csf_dump_state - HWC CSF backend dumping states. ++ * DOC: KTrace version history, JM variant + * -+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE: Initial state, or the state if there is -+ * an error. ++ * 1.0: ++ * Original version (implicit, header did not carry version information). + * -+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED: A user dump has been requested and -+ * we are waiting for an ACK, this ACK could come from either PRFCNT_ACK, -+ * PROTMODE_ENTER_ACK, or if an error occurs. ++ * 2.0: ++ * Introduced version information into the header. + * -+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED: A watchdog dump has been -+ * requested and we're waiting for an ACK - this ACK could come from either -+ * PRFCNT_ACK, or if an error occurs, PROTMODE_ENTER_ACK is not applied here -+ * since watchdog request can't be triggered in protected mode. ++ * Some changes of parameter names in header. + * -+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT: Checking the insert -+ * immediately after receiving the ACK, so we know which index corresponds to -+ * the buffer we requested. ++ * Trace now uses all 64-bits of info_val. + * -+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED: The insert has been saved and -+ * now we have kicked off the worker. ++ * Non-JM specific parts moved to using info_val instead of refcount/gpu_addr. + * -+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING: The insert has been saved and now -+ * we have kicked off the worker to accumulate up to that insert and then copy -+ * the delta to the user buffer to prepare for dump_get(). ++ * 2.1: ++ * kctx field is no longer a pointer, and is now an ID of the format %d_%u as ++ * used by kctx directories in mali debugfs entries: (tgid creating the kctx), ++ * (unique kctx id). + * -+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED: The dump completed successfully. ++ * ftrace backend now outputs kctx field (as %d_%u format). + * -+ * Valid state transitions: -+ * IDLE -> REQUESTED (on user dump request) -+ * IDLE -> WATCHDOG_REQUESTED (on watchdog request) -+ * IDLE -> QUERYING_INSERT (on user dump request in protected mode) -+ * REQUESTED -> QUERYING_INSERT (on dump acknowledged from firmware) -+ * WATCHDOG_REQUESTED -> REQUESTED (on user dump request) -+ * WATCHDOG_REQUESTED -> COMPLETED (on dump acknowledged from firmware for watchdog request) -+ * QUERYING_INSERT -> WORKER_LAUNCHED (on worker submission) -+ * WORKER_LAUNCHED -> ACCUMULATING (while the worker is accumulating) -+ * ACCUMULATING -> COMPLETED (on accumulation completion) -+ * COMPLETED -> QUERYING_INSERT (on user dump request in protected mode) -+ * COMPLETED -> REQUESTED (on user dump request) -+ * COMPLETED -> WATCHDOG_REQUESTED (on watchdog request) -+ * COMPLETED -> IDLE (on disable) -+ * ANY -> IDLE (on error) ++ * 2.2: ++ * Add tracing codes for pulling, unpulling, and returns atoms to JS for ++ * diagnosing soft-stop path and preemption problems + */ -+enum kbase_hwcnt_backend_csf_dump_state { -+ KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE, -+ KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED, -+ KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED, -+ KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT, -+ KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED, -+ KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING, -+ KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED, -+}; ++#define KBASE_KTRACE_VERSION_MAJOR 2 ++#define KBASE_KTRACE_VERSION_MINOR 2 ++#endif /* KBASE_KTRACE_TARGET_RBUF */ ++ ++/* ++ * Note: mali_kbase_debug_ktrace_jm.h needs these value even if the RBUF target ++ * is disabled (they get discarded with CSTD_UNUSED(), but they're still ++ * referenced) ++ */ ++ ++/* indicates if the trace message has a valid refcount member */ ++#define KBASE_KTRACE_FLAG_JM_REFCOUNT (((kbase_ktrace_flag_t)1) << 0) ++/* indicates if the trace message has a valid jobslot member */ ++#define KBASE_KTRACE_FLAG_JM_JOBSLOT (((kbase_ktrace_flag_t)1) << 1) ++/* indicates if the trace message has valid atom related info. */ ++#define KBASE_KTRACE_FLAG_JM_ATOM (((kbase_ktrace_flag_t)1) << 2) ++ ++#if KBASE_KTRACE_TARGET_RBUF ++/* Collect all the flags together for debug checking */ ++#define KBASE_KTRACE_FLAG_BACKEND_ALL \ ++ (KBASE_KTRACE_FLAG_JM_REFCOUNT | KBASE_KTRACE_FLAG_JM_JOBSLOT \ ++ | KBASE_KTRACE_FLAG_JM_ATOM) + +/** -+ * enum kbase_hwcnt_backend_csf_enable_state - HWC CSF backend enable states. -+ * -+ * @KBASE_HWCNT_BACKEND_CSF_DISABLED: Initial state, and the state when backend -+ * is disabled. -+ * -+ * @KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: Enable request is in -+ * progress, waiting for firmware acknowledgment. ++ * union kbase_ktrace_backend - backend specific part of a trace message ++ * Contains only a struct but is a union such that it is compatible with ++ * generic JM and CSF KTrace calls. + * -+ * @KBASE_HWCNT_BACKEND_CSF_ENABLED: Enable request has been acknowledged, -+ * enable is done. ++ * @gpu: gpu union member ++ * @gpu.atom_udata: Copy of the user data sent for the atom in base_jd_submit. ++ * Only valid if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags ++ * @gpu.gpu_addr: GPU address, usually of the job-chain represented by an ++ * atom. ++ * @gpu.atom_number: id of the atom for which trace message was added. Only ++ * valid if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags ++ * @gpu.code: Identifies the event, refer to enum kbase_ktrace_code. ++ * @gpu.flags: indicates information about the trace message itself. Used ++ * during dumping of the message. ++ * @gpu.jobslot: job-slot for which trace message was added, valid only for ++ * job-slot management events. ++ * @gpu.refcount: reference count for the context, valid for certain events ++ * related to scheduler core and policy. ++ */ ++union kbase_ktrace_backend { ++ struct { ++ /* Place 64 and 32-bit members together */ ++ u64 atom_udata[2]; /* Only valid for ++ * KBASE_KTRACE_FLAG_JM_ATOM ++ */ ++ u64 gpu_addr; ++ int atom_number; /* Only valid for KBASE_KTRACE_FLAG_JM_ATOM */ ++ /* Pack smaller members together */ ++ kbase_ktrace_code_t code; ++ kbase_ktrace_flag_t flags; ++ u8 jobslot; ++ u8 refcount; ++ } gpu; ++}; ++#endif /* KBASE_KTRACE_TARGET_RBUF */ ++ ++#endif /* _KBASE_DEBUG_KTRACE_DEFS_JM_H_ */ +diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c +new file mode 100644 +index 000000000..6597a15e5 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c +@@ -0,0 +1,121 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * -+ * @KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: Disable request is in -+ * progress, waiting for firmware acknowledgment. ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * -+ * @KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: Disable request has been -+ * acknowledged, waiting for dump workers to be finished. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * @KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: An -+ * unrecoverable error happened, waiting for dump workers to be finished. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + * -+ * @KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR: An unrecoverable error -+ * happened, and dump workers have finished, waiting for reset. ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Valid state transitions: -+ * DISABLED -> TRANSITIONING_TO_ENABLED (on enable) -+ * TRANSITIONING_TO_ENABLED -> ENABLED (on enable ack) -+ * ENABLED -> TRANSITIONING_TO_DISABLED (on disable) -+ * TRANSITIONING_TO_DISABLED -> DISABLED_WAIT_FOR_WORKER (on disable ack) -+ * DISABLED_WAIT_FOR_WORKER -> DISABLED (after workers are flushed) -+ * DISABLED -> UNRECOVERABLE_ERROR (on unrecoverable error) -+ * ANY but DISABLED -> UNRECOVERABLE_ERROR_WAIT_FOR_WORKER (on unrecoverable -+ * error) -+ * UNRECOVERABLE_ERROR -> DISABLED (on before reset) -+ */ -+enum kbase_hwcnt_backend_csf_enable_state { -+ KBASE_HWCNT_BACKEND_CSF_DISABLED, -+ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED, -+ KBASE_HWCNT_BACKEND_CSF_ENABLED, -+ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED, -+ KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER, -+ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER, -+ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR, -+}; -+ -+/** -+ * struct kbase_hwcnt_backend_csf_info - Information used to create an instance -+ * of a CSF hardware counter backend. -+ * @backend: Pointer to access CSF backend. -+ * @fw_in_protected_mode: True if FW is running in protected mode, else -+ * false. -+ * @unrecoverable_error_happened: True if an recoverable error happened, else -+ * false. -+ * @csf_if: CSF interface object pointer. -+ * @ring_buf_cnt: Dump buffer count in the ring buffer. -+ * @counter_set: The performance counter set to use. -+ * @metadata: Hardware counter metadata. -+ * @prfcnt_info: Performance counter information. -+ * @watchdog_if: Watchdog interface object pointer. + */ -+struct kbase_hwcnt_backend_csf_info { -+ struct kbase_hwcnt_backend_csf *backend; -+ bool fw_in_protected_mode; -+ bool unrecoverable_error_happened; -+ struct kbase_hwcnt_backend_csf_if *csf_if; -+ u32 ring_buf_cnt; -+ enum kbase_hwcnt_set counter_set; -+ const struct kbase_hwcnt_metadata *metadata; -+ struct kbase_hwcnt_backend_csf_if_prfcnt_info prfcnt_info; -+ struct kbase_hwcnt_watchdog_interface *watchdog_if; -+}; + -+/** -+ * struct kbase_hwcnt_csf_physical_layout - HWC sample memory physical layout -+ * information. -+ * @hw_block_cnt: Total number of hardware counters blocks. The hw counters blocks are -+ * sub-categorized into 4 classes: front-end, tiler, memory system, and shader. -+ * hw_block_cnt = fe_cnt + tiler_cnt + mmu_l2_cnt + shader_cnt. -+ * @fe_cnt: Front end block count. -+ * @tiler_cnt: Tiler block count. -+ * @mmu_l2_cnt: Memory system (MMU and L2 cache) block count. -+ * @shader_cnt: Shader Core block count. -+ * @fw_block_cnt: Total number of firmware counters blocks. -+ * @block_cnt: Total block count (sum of all counter blocks: hw_block_cnt + fw_block_cnt). -+ * @shader_avail_mask: Bitmap of all shader cores in the system. -+ * @enable_mask_offset: Offset in array elements of enable mask in each block -+ * starting from the beginning of block. -+ * @headers_per_block: For any block, the number of counters designated as block's header. -+ * @counters_per_block: For any block, the number of counters designated as block's payload. -+ * @values_per_block: For any block, the number of counters in total (header + payload). -+ */ -+struct kbase_hwcnt_csf_physical_layout { -+ u8 hw_block_cnt; -+ u8 fe_cnt; -+ u8 tiler_cnt; -+ u8 mmu_l2_cnt; -+ u8 shader_cnt; -+ u8 fw_block_cnt; -+ u8 block_cnt; -+ u64 shader_avail_mask; -+ size_t enable_mask_offset; -+ size_t headers_per_block; -+ size_t counters_per_block; -+ size_t values_per_block; -+}; ++#include ++#include "debug/mali_kbase_debug_ktrace_internal.h" ++#include "debug/backend/mali_kbase_debug_ktrace_jm.h" + -+/** -+ * struct kbase_hwcnt_backend_csf - Instance of a CSF hardware counter backend. -+ * @info: CSF Info used to create the backend. -+ * @dump_state: The dumping state of the backend. -+ * @enable_state: The CSF backend internal enabled state. -+ * @insert_index_to_accumulate: The insert index in the ring buffer which need -+ * to accumulate up to. -+ * @enable_state_waitq: Wait queue object used to notify the enable -+ * changing flag is done. -+ * @to_user_buf: HWC sample buffer for client user, size -+ * metadata.dump_buf_bytes. -+ * @accum_buf: HWC sample buffer used as an internal -+ * accumulator, size metadata.dump_buf_bytes. -+ * @old_sample_buf: HWC sample buffer to save the previous values -+ * for delta calculation, size -+ * prfcnt_info.dump_bytes. -+ * @watchdog_last_seen_insert_idx: The insert index which watchdog has last -+ * seen, to check any new firmware automatic -+ * samples generated during the watchdog -+ * period. -+ * @ring_buf: Opaque pointer for ring buffer object. -+ * @ring_buf_cpu_base: CPU base address of the allocated ring buffer. -+ * @clk_enable_map: The enable map specifying enabled clock domains. -+ * @cycle_count_elapsed: Cycle count elapsed for a given sample period. -+ * @prev_cycle_count: Previous cycle count to calculate the cycle -+ * count for sample period. -+ * @phys_layout: Physical memory layout information of HWC -+ * sample buffer. -+ * @dump_completed: Completion signaled by the dump worker when -+ * it is completed accumulating up to the -+ * insert_index_to_accumulate. -+ * Should be initialized to the "complete" state. -+ * @user_requested: Flag to indicate a dump_request called from -+ * user. -+ * @hwc_dump_workq: Single threaded work queue for HWC workers -+ * execution. -+ * @hwc_dump_work: Worker to accumulate samples. -+ * @hwc_threshold_work: Worker for consuming available samples when -+ * threshold interrupt raised. -+ */ -+struct kbase_hwcnt_backend_csf { -+ struct kbase_hwcnt_backend_csf_info *info; -+ enum kbase_hwcnt_backend_csf_dump_state dump_state; -+ enum kbase_hwcnt_backend_csf_enable_state enable_state; -+ u32 insert_index_to_accumulate; -+ wait_queue_head_t enable_state_waitq; -+ u64 *to_user_buf; -+ u64 *accum_buf; -+ u32 *old_sample_buf; -+ u32 watchdog_last_seen_insert_idx; -+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf; -+ void *ring_buf_cpu_base; -+ u64 clk_enable_map; -+ u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS]; -+ u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS]; -+ struct kbase_hwcnt_csf_physical_layout phys_layout; -+ struct completion dump_completed; -+ bool user_requested; -+ struct workqueue_struct *hwc_dump_workq; -+ struct work_struct hwc_dump_work; -+ struct work_struct hwc_threshold_work; -+}; ++#if KBASE_KTRACE_TARGET_RBUF + -+static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_csf_info *csf_info) ++void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written) +{ -+ WARN_ON(!csf_info); -+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); -+ return (csf_info->backend != NULL); ++ *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), ++ "katom,gpu_addr,jobslot,refcount"), 0); +} + -+/** -+ * kbasep_hwcnt_backend_csf_cc_initial_sample() - Initialize cycle count -+ * tracking. -+ * -+ * @backend_csf: Non-NULL pointer to backend. -+ * @enable_map: Non-NULL pointer to enable map specifying enabled counters. -+ */ -+static void -+kbasep_hwcnt_backend_csf_cc_initial_sample(struct kbase_hwcnt_backend_csf *backend_csf, -+ const struct kbase_hwcnt_enable_map *enable_map) ++void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, ++ char *buffer, int sz, s32 *written) +{ -+ u64 clk_enable_map = enable_map->clk_enable_map; -+ u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS]; -+ size_t clk; ++ /* katom */ ++ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_ATOM) ++ *written += MAX(snprintf(buffer + *written, ++ MAX(sz - *written, 0), ++ "atom %d (ud: 0x%llx 0x%llx)", ++ trace_msg->backend.gpu.atom_number, ++ trace_msg->backend.gpu.atom_udata[0], ++ trace_msg->backend.gpu.atom_udata[1]), 0); + -+ memset(cycle_counts, 0, sizeof(cycle_counts)); ++ /* gpu_addr */ ++ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_BACKEND) ++ *written += MAX(snprintf(buffer + *written, ++ MAX(sz - *written, 0), ++ ",%.8llx,", trace_msg->backend.gpu.gpu_addr), ++ 0); ++ else ++ *written += MAX(snprintf(buffer + *written, ++ MAX(sz - *written, 0), ++ ",,"), 0); + -+ /* Read cycle count from CSF interface for both clock domains. */ -+ backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts, -+ clk_enable_map); ++ /* jobslot */ ++ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_JOBSLOT) ++ *written += MAX(snprintf(buffer + *written, ++ MAX(sz - *written, 0), ++ "%d", trace_msg->backend.gpu.jobslot), 0); + -+ kbase_hwcnt_metadata_for_each_clock(enable_map->metadata, clk) -+ { -+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, clk)) -+ backend_csf->prev_cycle_count[clk] = cycle_counts[clk]; -+ } ++ *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), ++ ","), 0); + -+ /* Keep clk_enable_map for dump_request. */ -+ backend_csf->clk_enable_map = clk_enable_map; ++ /* refcount */ ++ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_REFCOUNT) ++ *written += MAX(snprintf(buffer + *written, ++ MAX(sz - *written, 0), ++ "%d", trace_msg->backend.gpu.refcount), 0); +} + -+static void kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *backend_csf) ++void kbasep_ktrace_add_jm(struct kbase_device *kbdev, ++ enum kbase_ktrace_code code, ++ struct kbase_context *kctx, ++ const struct kbase_jd_atom *katom, u64 gpu_addr, ++ kbase_ktrace_flag_t flags, int refcount, int jobslot, ++ u64 info_val) +{ -+ u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS]; -+ size_t clk; ++ unsigned long irqflags; ++ struct kbase_ktrace_msg *trace_msg; + -+ memset(cycle_counts, 0, sizeof(cycle_counts)); ++ if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace))) ++ return; + -+ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); ++ spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); + -+ backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts, -+ backend_csf->clk_enable_map); ++ /* Reserve and update indices */ ++ trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); + -+ kbase_hwcnt_metadata_for_each_clock(backend_csf->info->metadata, clk) -+ { -+ if (kbase_hwcnt_clk_enable_map_enabled(backend_csf->clk_enable_map, clk)) { -+ backend_csf->cycle_count_elapsed[clk] = -+ cycle_counts[clk] - backend_csf->prev_cycle_count[clk]; -+ backend_csf->prev_cycle_count[clk] = cycle_counts[clk]; -+ } ++ /* Fill the common part of the message (including backend.gpu.flags) */ ++ kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, ++ info_val); ++ ++ /* Indicate to the common code that backend-specific parts will be ++ * valid ++ */ ++ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_BACKEND; ++ ++ /* Fill the JM-specific parts of the message */ ++ if (katom) { ++ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_JM_ATOM; ++ ++ trace_msg->backend.gpu.atom_number = ++ kbase_jd_atom_id(katom->kctx, katom); ++ trace_msg->backend.gpu.atom_udata[0] = katom->udata.blob[0]; ++ trace_msg->backend.gpu.atom_udata[1] = katom->udata.blob[1]; + } -+} + -+/* CSF backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ -+static u64 kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *backend) -+{ -+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; ++ trace_msg->backend.gpu.gpu_addr = gpu_addr; ++ trace_msg->backend.gpu.jobslot = jobslot; ++ /* Clamp refcount */ ++ trace_msg->backend.gpu.refcount = MIN((unsigned int)refcount, 0xFF); + -+ if (!backend_csf || !backend_csf->info || !backend_csf->info->csf_if) -+ return 0; ++ WARN_ON((trace_msg->backend.gpu.flags & ~KBASE_KTRACE_FLAG_ALL)); + -+ return backend_csf->info->csf_if->timestamp_ns(backend_csf->info->csf_if->ctx); ++ /* Done */ ++ spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); +} + -+/** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to -+ * guarantee headers are -+ * enabled if any counter is -+ * required. -+ *@phys_enable_map: HWC physical enable map to be processed. ++#endif /* KBASE_KTRACE_TARGET_RBUF */ +diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.h +new file mode 100644 +index 000000000..41be7a120 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.h +@@ -0,0 +1,309 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * + */ -+static void -+kbasep_hwcnt_backend_csf_process_enable_map(struct kbase_hwcnt_physical_enable_map *phys_enable_map) -+{ -+ WARN_ON(!phys_enable_map); + -+ /* Enable header if any counter is required from user, the header is -+ * controlled by bit 0 of the enable mask. -+ */ -+ if (phys_enable_map->fe_bm) -+ phys_enable_map->fe_bm |= 1; ++#ifndef _KBASE_DEBUG_KTRACE_JM_H_ ++#define _KBASE_DEBUG_KTRACE_JM_H_ + -+ if (phys_enable_map->tiler_bm) -+ phys_enable_map->tiler_bm |= 1; ++/* ++ * KTrace target for internal ringbuffer ++ */ ++#if KBASE_KTRACE_TARGET_RBUF ++/** ++ * kbasep_ktrace_add_jm - internal function to add trace about Job Management ++ * @kbdev: kbase device ++ * @code: trace code ++ * @kctx: kbase context, or NULL if no context ++ * @katom: kbase atom, or NULL if no atom ++ * @gpu_addr: GPU address, usually related to @katom ++ * @flags: flags about the message ++ * @refcount: reference count information to add to the trace ++ * @jobslot: jobslot information to add to the trace ++ * @info_val: generic information about @code to add to the trace ++ * ++ * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD_JM() instead. ++ */ ++void kbasep_ktrace_add_jm(struct kbase_device *kbdev, ++ enum kbase_ktrace_code code, ++ struct kbase_context *kctx, ++ const struct kbase_jd_atom *katom, u64 gpu_addr, ++ kbase_ktrace_flag_t flags, int refcount, int jobslot, ++ u64 info_val); + -+ if (phys_enable_map->mmu_l2_bm) -+ phys_enable_map->mmu_l2_bm |= 1; ++#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, flags, \ ++ refcount, jobslot, info_val) \ ++ kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ ++ gpu_addr, flags, refcount, jobslot, info_val) + -+ if (phys_enable_map->shader_bm) -+ phys_enable_map->shader_bm |= 1; -+} ++#else /* KBASE_KTRACE_TARGET_RBUF */ + -+static void kbasep_hwcnt_backend_csf_init_layout( -+ const struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info, -+ struct kbase_hwcnt_csf_physical_layout *phys_layout) -+{ -+ size_t shader_core_cnt; -+ size_t values_per_block; -+ size_t fw_blocks_count; -+ size_t hw_blocks_count; ++#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, flags, \ ++ refcount, jobslot, info_val) \ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(code);\ ++ CSTD_UNUSED(kctx);\ ++ CSTD_UNUSED(katom);\ ++ CSTD_UNUSED(gpu_addr);\ ++ CSTD_UNUSED(flags);\ ++ CSTD_UNUSED(refcount);\ ++ CSTD_UNUSED(jobslot);\ ++ CSTD_UNUSED(info_val);\ ++ CSTD_NOP(0);\ ++ } while (0) ++#endif /* KBASE_KTRACE_TARGET_RBUF */ + -+ WARN_ON(!prfcnt_info); -+ WARN_ON(!phys_layout); ++/* ++ * KTrace target for Linux's ftrace ++ * ++ * Note: the header file(s) that define the trace_mali_<...> tracepoints are ++ * included by the parent header file ++ */ ++#if KBASE_KTRACE_TARGET_FTRACE ++#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ ++ jobslot) \ ++ trace_mali_##code(kctx, jobslot, 0) + -+ shader_core_cnt = fls64(prfcnt_info->core_mask); -+ values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES; -+ fw_blocks_count = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size); -+ hw_blocks_count = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size); ++#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, \ ++ gpu_addr, jobslot, info_val) \ ++ trace_mali_##code(kctx, jobslot, info_val) + -+ /* The number of hardware counters reported by the GPU matches the legacy guess-work we -+ * have done in the past -+ */ -+ WARN_ON(hw_blocks_count != KBASE_HWCNT_V5_FE_BLOCK_COUNT + -+ KBASE_HWCNT_V5_TILER_BLOCK_COUNT + -+ prfcnt_info->l2_count + shader_core_cnt); ++#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, \ ++ gpu_addr, refcount) \ ++ trace_mali_##code(kctx, refcount, 0) + -+ *phys_layout = (struct kbase_hwcnt_csf_physical_layout){ -+ .fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT, -+ .tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT, -+ .mmu_l2_cnt = prfcnt_info->l2_count, -+ .shader_cnt = shader_core_cnt, -+ .fw_block_cnt = fw_blocks_count, -+ .hw_block_cnt = hw_blocks_count, -+ .block_cnt = fw_blocks_count + hw_blocks_count, -+ .shader_avail_mask = prfcnt_info->core_mask, -+ .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, -+ .values_per_block = values_per_block, -+ .counters_per_block = values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, -+ .enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER, -+ }; -+} ++#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ ++ gpu_addr, refcount, info_val) \ ++ trace_mali_##code(kctx, refcount, info_val) + -+static void -+kbasep_hwcnt_backend_csf_reset_internal_buffers(struct kbase_hwcnt_backend_csf *backend_csf) -+{ -+ size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes; ++#define KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, \ ++ info_val) \ ++ trace_mali_##code(kctx, gpu_addr, info_val) ++#else /* KBASE_KTRACE_TARGET_FTRACE */ ++#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ ++ jobslot) \ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(code);\ ++ CSTD_UNUSED(kctx);\ ++ CSTD_UNUSED(katom);\ ++ CSTD_UNUSED(gpu_addr);\ ++ CSTD_UNUSED(jobslot);\ ++ CSTD_NOP(0);\ ++ } while (0) + -+ memset(backend_csf->to_user_buf, 0, user_buf_bytes); -+ memset(backend_csf->accum_buf, 0, user_buf_bytes); -+ memset(backend_csf->old_sample_buf, 0, backend_csf->info->prfcnt_info.dump_bytes); -+} ++#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, \ ++ gpu_addr, jobslot, info_val) \ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(code);\ ++ CSTD_UNUSED(kctx);\ ++ CSTD_UNUSED(katom);\ ++ CSTD_UNUSED(gpu_addr);\ ++ CSTD_UNUSED(jobslot);\ ++ CSTD_UNUSED(info_val);\ ++ CSTD_NOP(0);\ ++ } while (0) + -+static void -+kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(struct kbase_hwcnt_backend_csf *backend_csf, -+ u32 *sample) -+{ -+ u32 block_idx; -+ const struct kbase_hwcnt_csf_physical_layout *phys_layout; -+ u32 *block_buf; ++#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, \ ++ gpu_addr, refcount) \ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(code);\ ++ CSTD_UNUSED(kctx);\ ++ CSTD_UNUSED(katom);\ ++ CSTD_UNUSED(gpu_addr);\ ++ CSTD_UNUSED(refcount);\ ++ CSTD_NOP(0);\ ++ } while (0) + -+ phys_layout = &backend_csf->phys_layout; ++#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ ++ gpu_addr, refcount, info_val) \ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(code);\ ++ CSTD_UNUSED(kctx);\ ++ CSTD_UNUSED(katom);\ ++ CSTD_UNUSED(gpu_addr);\ ++ CSTD_UNUSED(info_val);\ ++ CSTD_NOP(0);\ ++ } while (0) + -+ for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) { -+ block_buf = sample + block_idx * phys_layout->values_per_block; -+ block_buf[phys_layout->enable_mask_offset] = 0; -+ } -+} ++#define KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, \ ++ info_val)\ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(code);\ ++ CSTD_UNUSED(kctx);\ ++ CSTD_UNUSED(katom);\ ++ CSTD_UNUSED(gpu_addr);\ ++ CSTD_UNUSED(info_val);\ ++ CSTD_NOP(0);\ ++ } while (0) ++#endif /* KBASE_KTRACE_TARGET_FTRACE */ + -+static void -+kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(struct kbase_hwcnt_backend_csf *backend_csf) -+{ -+ u32 idx; -+ u32 *sample; -+ char *cpu_dump_base; -+ size_t dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; ++/* ++ * Master set of macros to route KTrace to any of the targets ++ */ + -+ cpu_dump_base = (char *)backend_csf->ring_buf_cpu_base; ++/** ++ * KBASE_KTRACE_ADD_JM_SLOT - Add trace values about a job-slot ++ * @kbdev: kbase device ++ * @code: trace code ++ * @kctx: kbase context, or NULL if no context ++ * @katom: kbase atom, or NULL if no atom ++ * @gpu_addr: GPU address, usually related to @katom ++ * @jobslot: jobslot information to add to the trace ++ * ++ * Note: Any functions called through this macro will still be evaluated in ++ * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when ++ * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied ++ * to this macro must: ++ * a) be static or static inline, and ++ * b) just return 0 and have no other statements present in the body. ++ */ ++#define KBASE_KTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ ++ jobslot) \ ++ do { \ ++ /* capture values that could come from non-pure function calls */ \ ++ u64 __gpu_addr = gpu_addr; \ ++ int __jobslot = jobslot; \ ++ KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ ++ KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, __jobslot, \ ++ 0); \ ++ KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, __gpu_addr, __jobslot); \ ++ } while (0) + -+ for (idx = 0; idx < backend_csf->info->ring_buf_cnt; idx++) { -+ sample = (u32 *)&cpu_dump_base[idx * dump_bytes]; -+ kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(backend_csf, sample); -+ } -+} ++/** ++ * KBASE_KTRACE_ADD_JM_SLOT_INFO - Add trace values about a job-slot, with info ++ * @kbdev: kbase device ++ * @code: trace code ++ * @kctx: kbase context, or NULL if no context ++ * @katom: kbase atom, or NULL if no atom ++ * @gpu_addr: GPU address, usually related to @katom ++ * @jobslot: jobslot information to add to the trace ++ * @info_val: generic information about @code to add to the trace ++ * ++ * Note: Any functions called through this macro will still be evaluated in ++ * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when ++ * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied ++ * to this macro must: ++ * a) be static or static inline, and ++ * b) just return 0 and have no other statements present in the body. ++ */ ++#define KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, gpu_addr, \ ++ jobslot, info_val) \ ++ do { \ ++ /* capture values that could come from non-pure function calls */ \ ++ u64 __gpu_addr = gpu_addr; \ ++ int __jobslot = jobslot; \ ++ u64 __info_val = info_val; \ ++ KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ ++ KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, __jobslot, \ ++ __info_val); \ ++ KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, __gpu_addr, __jobslot, __info_val); \ ++ } while (0) + -+static void kbasep_hwcnt_backend_csf_update_user_sample(struct kbase_hwcnt_backend_csf *backend_csf) -+{ -+ size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes; ++/** ++ * KBASE_KTRACE_ADD_JM_REFCOUNT - Add trace values about a kctx refcount ++ * @kbdev: kbase device ++ * @code: trace code ++ * @kctx: kbase context, or NULL if no context ++ * @katom: kbase atom, or NULL if no atom ++ * @gpu_addr: GPU address, usually related to @katom ++ * @refcount: reference count information to add to the trace ++ * ++ * Note: Any functions called through this macro will still be evaluated in ++ * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when ++ * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied ++ * to this macro must: ++ * a) be static or static inline, and ++ * b) just return 0 and have no other statements present in the body. ++ */ ++#define KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, gpu_addr, \ ++ refcount) \ ++ do { \ ++ /* capture values that could come from non-pure function calls */ \ ++ u64 __gpu_addr = gpu_addr; \ ++ int __refcount = refcount; \ ++ KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ ++ KBASE_KTRACE_FLAG_JM_REFCOUNT, __refcount, 0, \ ++ 0u); \ ++ KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount); \ ++ } while (0) + -+ /* Copy the data into the sample and wait for the user to get it. */ -+ memcpy(backend_csf->to_user_buf, backend_csf->accum_buf, user_buf_bytes); ++/** ++ * KBASE_KTRACE_ADD_JM_REFCOUNT_INFO - Add trace values about a kctx refcount, ++ * and info ++ * @kbdev: kbase device ++ * @code: trace code ++ * @kctx: kbase context, or NULL if no context ++ * @katom: kbase atom, or NULL if no atom ++ * @gpu_addr: GPU address, usually related to @katom ++ * @refcount: reference count information to add to the trace ++ * @info_val: generic information about @code to add to the trace ++ * ++ * Note: Any functions called through this macro will still be evaluated in ++ * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when ++ * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied ++ * to this macro must: ++ * a) be static or static inline, and ++ * b) just return 0 and have no other statements present in the body. ++ */ ++#define KBASE_KTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ ++ gpu_addr, refcount, info_val) \ ++ do { \ ++ /* capture values that could come from non-pure function calls */ \ ++ u64 __gpu_addr = gpu_addr; \ ++ int __refcount = refcount; \ ++ u64 __info_val = info_val; \ ++ KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ ++ KBASE_KTRACE_FLAG_JM_REFCOUNT, __refcount, 0, \ ++ __info_val); \ ++ KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount, __info_val); \ ++ } while (0) + -+ /* After copied data into user sample, clear the accumulator values to -+ * prepare for the next accumulator, such as the next request or -+ * threshold. -+ */ -+ memset(backend_csf->accum_buf, 0, user_buf_bytes); -+} ++/** ++ * KBASE_KTRACE_ADD_JM - Add trace values (no slot or refcount) ++ * @kbdev: kbase device ++ * @code: trace code ++ * @kctx: kbase context, or NULL if no context ++ * @katom: kbase atom, or NULL if no atom ++ * @gpu_addr: GPU address, usually related to @katom ++ * @info_val: generic information about @code to add to the trace ++ * ++ * Note: Any functions called through this macro will still be evaluated in ++ * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when ++ * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied ++ * to this macro must: ++ * a) be static or static inline, and ++ * b) just return 0 and have no other statements present in the body. ++ */ ++#define KBASE_KTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, info_val) \ ++ do { \ ++ /* capture values that could come from non-pure function calls */ \ ++ u64 __gpu_addr = gpu_addr; \ ++ u64 __info_val = info_val; \ ++ KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ ++ 0u, 0, 0, __info_val); \ ++ KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, __info_val); \ ++ } while (0) + -+static void kbasep_hwcnt_backend_csf_accumulate_sample( -+ const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes, -+ u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf, bool clearing_samples) -+{ -+ size_t block_idx; -+ const u32 *old_block = old_sample_buf; -+ const u32 *new_block = new_sample_buf; -+ u64 *acc_block = accum_buf; -+ const size_t values_per_block = phys_layout->values_per_block; ++#endif /* _KBASE_DEBUG_KTRACE_JM_H_ */ +diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h +new file mode 100644 +index 000000000..ddcac906c +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h +@@ -0,0 +1,267 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ /* Performance counter blocks for firmware are stored before blocks for hardware. -+ * We skip over the firmware's performance counter blocks (counters dumping is not -+ * supported for firmware blocks, only hardware ones). -+ */ -+ old_block += values_per_block * phys_layout->fw_block_cnt; -+ new_block += values_per_block * phys_layout->fw_block_cnt; ++/* ++ * NOTE: This must **only** be included through mali_linux_trace.h, ++ * otherwise it will fail to setup tracepoints correctly ++ */ + -+ for (block_idx = phys_layout->fw_block_cnt; block_idx < phys_layout->block_cnt; -+ block_idx++) { -+ const u32 old_enable_mask = old_block[phys_layout->enable_mask_offset]; -+ const u32 new_enable_mask = new_block[phys_layout->enable_mask_offset]; ++#if !defined(_KBASE_DEBUG_LINUX_KTRACE_CSF_H_) || defined(TRACE_HEADER_MULTI_READ) ++#define _KBASE_DEBUG_LINUX_KTRACE_CSF_H_ + -+ if (new_enable_mask == 0) { -+ /* Hardware block was unavailable or we didn't turn on -+ * any counters. Do nothing. -+ */ -+ } else { -+ /* Hardware block was available and it had some counters -+ * enabled. We need to update the accumulation buffer. -+ */ -+ size_t ctr_idx; ++/* ++ * Generic CSF events - using the common DEFINE_MALI_ADD_EVENT ++ */ ++DEFINE_MALI_ADD_EVENT(SCHEDULER_EVICT_CTX_SLOTS_START); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_EVICT_CTX_SLOTS_END); ++DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_BOOT); ++DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_REBOOT); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_INVOKE); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_INVOKE); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_START); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_END); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_START); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_END); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET_START); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET_END); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_PROTM_WAIT_QUIT_START); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_PROTM_WAIT_QUIT_END); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_EVENT); ++DEFINE_MALI_ADD_EVENT(CSF_SYNC_UPDATE_NOTIFY_GPU_EVENT); ++DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT_START); ++DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT_END); ++DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT_GLB_REQ_ACK); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_ADVANCE); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_NOADVANCE); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_INSERT); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_REMOVE); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_ROTATE); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_HEAD); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_START); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_END); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_UPDATE_IDLE_SLOTS_ACK); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_HANDLING_START); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_HANDLING_END); ++DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_MCU_HALTED); ++DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_MCU_SLEEP); ++DEFINE_MALI_ADD_EVENT(SCHED_BUSY); ++DEFINE_MALI_ADD_EVENT(SCHED_INACTIVE); ++DEFINE_MALI_ADD_EVENT(SCHED_SUSPENDED); ++DEFINE_MALI_ADD_EVENT(SCHED_SLEEPING); ++#define KBASEP_MCU_STATE(n) DEFINE_MALI_ADD_EVENT(PM_MCU_ ## n); ++#include "backend/gpu/mali_kbase_pm_mcu_states.h" ++#undef KBASEP_MCU_STATE + -+ /* Unconditionally copy the headers. */ -+ for (ctr_idx = 0; ctr_idx < phys_layout->headers_per_block; ctr_idx++) { -+ acc_block[ctr_idx] = new_block[ctr_idx]; -+ } ++DECLARE_EVENT_CLASS(mali_csf_grp_q_template, ++ TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, ++ struct kbase_queue *queue, u64 info_val), ++ TP_ARGS(kbdev, group, queue, info_val), ++ TP_STRUCT__entry( ++ __field(u64, info_val) ++ __field(pid_t, kctx_tgid) ++ __field(u32, kctx_id) ++ __field(u8, group_handle) ++ __field(s8, csg_nr) ++ __field(u8, slot_prio) ++ __field(s8, csi_index) ++ ), ++ TP_fast_assign( ++ { ++ struct kbase_context *kctx = NULL; + -+ /* Accumulate counter samples -+ * -+ * When accumulating samples we need to take into -+ * account whether the counter sampling method involves -+ * clearing counters back to zero after each sample is -+ * taken. -+ * -+ * The intention for CSF was that all HW should use -+ * counters which wrap to zero when their maximum value -+ * is reached. This, combined with non-clearing -+ * sampling, enables multiple concurrent users to -+ * request samples without interfering with each other. -+ * -+ * However some early HW may not support wrapping -+ * counters, for these GPUs counters must be cleared on -+ * sample to avoid loss of data due to counters -+ * saturating at their maximum value. ++ __entry->info_val = info_val; ++ /* Note: if required in future, we could record some ++ * flags in __entry about whether the group/queue parts ++ * are valid, and add that to the trace message e.g. ++ * by using __print_flags()/__print_symbolic() + */ -+ if (!clearing_samples) { -+ if (old_enable_mask == 0) { -+ /* Hardware block was previously -+ * unavailable. Accumulate the new -+ * counters only, as we know previous -+ * values are zeroes. -+ */ -+ for (ctr_idx = phys_layout->headers_per_block; -+ ctr_idx < values_per_block; ctr_idx++) { -+ acc_block[ctr_idx] += new_block[ctr_idx]; -+ } -+ } else { -+ /* Hardware block was previously -+ * available. Accumulate the delta -+ * between old and new counter values. -+ */ -+ for (ctr_idx = phys_layout->headers_per_block; -+ ctr_idx < values_per_block; ctr_idx++) { -+ acc_block[ctr_idx] += -+ new_block[ctr_idx] - old_block[ctr_idx]; -+ } -+ } ++ if (queue) { ++ /* Note: kctx overridden by group->kctx later if group is valid */ ++ kctx = queue->kctx; ++ __entry->csi_index = queue->csi_index; + } else { -+ for (ctr_idx = phys_layout->headers_per_block; -+ ctr_idx < values_per_block; ctr_idx++) { -+ acc_block[ctr_idx] += new_block[ctr_idx]; -+ } ++ __entry->csi_index = -1; + } -+ } -+ old_block += values_per_block; -+ new_block += values_per_block; -+ acc_block += values_per_block; -+ } -+ -+ WARN_ON(old_block != old_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); -+ WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); -+ WARN_ON(acc_block != accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES) - -+ (values_per_block * phys_layout->fw_block_cnt)); -+ (void)dump_bytes; -+} -+ -+static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backend_csf *backend_csf, -+ u32 extract_index_to_start, -+ u32 insert_index_to_stop) -+{ -+ u32 raw_idx; -+ unsigned long flags = 0UL; -+ u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base; -+ const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt; -+ const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; -+ bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples; -+ u32 *old_sample_buf = backend_csf->old_sample_buf; -+ u32 *new_sample_buf = old_sample_buf; -+ -+ if (extract_index_to_start == insert_index_to_stop) -+ /* No samples to accumulate. Early out. */ -+ return; -+ -+ /* Sync all the buffers to CPU side before read the data. */ -+ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, -+ backend_csf->ring_buf, extract_index_to_start, -+ insert_index_to_stop, true); + -+ /* Consider u32 wrap case, '!=' is used here instead of '<' operator */ -+ for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; raw_idx++) { -+ /* The logical "&" acts as a modulo operation since buf_count -+ * must be a power of two. -+ */ -+ const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); ++ if (group) { ++ kctx = group->kctx; ++ __entry->group_handle = group->handle; ++ __entry->csg_nr = group->csg_nr; ++ if (group->csg_nr >= 0) ++ __entry->slot_prio = kbdev->csf.scheduler.csg_slots[group->csg_nr].priority; ++ else ++ __entry->slot_prio = 0u; ++ } else { ++ __entry->group_handle = 0u; ++ __entry->csg_nr = -1; ++ __entry->slot_prio = 0u; ++ } ++ __entry->kctx_id = (kctx) ? kctx->id : 0u; ++ __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; ++ } + -+ new_sample_buf = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; ++ ), ++ TP_printk("kctx=%d_%u group=%u slot=%d prio=%u csi=%d info=0x%llx", ++ __entry->kctx_tgid, __entry->kctx_id, ++ __entry->group_handle, __entry->csg_nr, ++ __entry->slot_prio, __entry->csi_index, ++ __entry->info_val) ++); + -+ kbasep_hwcnt_backend_csf_accumulate_sample(&backend_csf->phys_layout, -+ buf_dump_bytes, backend_csf->accum_buf, -+ old_sample_buf, new_sample_buf, -+ clearing_samples); ++/* ++ * Group events ++ */ ++#define DEFINE_MALI_CSF_GRP_EVENT(name) \ ++ DEFINE_EVENT_PRINT(mali_csf_grp_q_template, mali_##name, \ ++ TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, \ ++ struct kbase_queue *queue, u64 info_val), \ ++ TP_ARGS(kbdev, group, queue, info_val), \ ++ TP_printk("kctx=%d_%u group=%u slot=%d prio=%u info=0x%llx", \ ++ __entry->kctx_tgid, __entry->kctx_id, __entry->group_handle, \ ++ __entry->csg_nr, __entry->slot_prio, __entry->info_val)) + -+ old_sample_buf = new_sample_buf; -+ } ++DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_START_REQ); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOP_REQ); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_RUNNING); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOPPED); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_CLEANED); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_UPDATE_IDLE_SLOT_REQ); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_SET); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_NO_NON_IDLE_GROUPS); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_NON_IDLE_GROUPS); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_CLEAR); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_PRIO_UPDATE); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_SYNC_UPDATE); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_IDLE); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_PROGRESS_TIMER_EVENT); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_PROCESS_START); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_PROCESS_END); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_SYNC_UPDATE_DONE); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_DESCHEDULE); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_SCHEDULE); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_EVICT); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_INSERT); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_REMOVE); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_ROTATE); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_HEAD); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_IDLE_WAIT_INSERT); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_IDLE_WAIT_REMOVE); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_IDLE_WAIT_HEAD); ++DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_PROTM_ENTER_CHECK); ++DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_PROTM_ENTER); ++DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_PROTM_EXIT); ++DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_TOP_GRP); ++DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_GRP_INC); ++DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC); ++DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_HANDLE_IDLE_SLOTS); ++DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_START); ++DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_END); ++DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_INACTIVE); ++DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_RUNNABLE); ++DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_IDLE); ++DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_SUSPENDED); ++DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_SUSPENDED_ON_IDLE); ++DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_SUSPENDED_ON_WAIT_SYNC); ++DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_FAULT_EVICTED); ++DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_TERMINATED); + -+ /* Save the newest buffer as the old buffer for next time. */ -+ memcpy(backend_csf->old_sample_buf, new_sample_buf, buf_dump_bytes); ++#undef DEFINE_MALI_CSF_GRP_EVENT + -+ /* Reset the prfcnt_en header on each sample before releasing them. */ -+ for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; raw_idx++) { -+ const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); -+ u32 *sample = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; ++/* ++ * Group + Queue events ++ */ ++#define DEFINE_MALI_CSF_GRP_Q_EVENT(name) \ ++ DEFINE_EVENT(mali_csf_grp_q_template, mali_##name, \ ++ TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, \ ++ struct kbase_queue *queue, u64 info_val), \ ++ TP_ARGS(kbdev, group, queue, info_val)) + -+ kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(backend_csf, sample); -+ } ++DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_START); ++DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_STOP); ++DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_STOP_REQ); ++DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED); ++DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_FAULT); ++DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_TILER_OOM); ++DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_PROTM_PEND); ++DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_ACK); ++DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_START); ++DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_STOP); ++DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_EVAL_START); ++DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_EVAL_END); ++DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_WAIT_STATUS); ++DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_CUR_VAL); ++DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_TEST_VAL); ++DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_BLOCKED_REASON); ++DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_PEND_SET); ++DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_PEND_CLEAR); + -+ /* Sync zeroed buffers to avoid coherency issues on future use. */ -+ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, -+ backend_csf->ring_buf, extract_index_to_start, -+ insert_index_to_stop, false); ++#undef DEFINE_MALI_CSF_GRP_Q_EVENT + -+ /* After consuming all samples between extract_idx and insert_idx, -+ * set the raw extract index to insert_idx so that the sample buffers -+ * can be released back to the ring buffer pool. -+ */ -+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); -+ backend_csf->info->csf_if->set_extract_index(backend_csf->info->csf_if->ctx, -+ insert_index_to_stop); -+ /* Update the watchdog last seen index to check any new FW auto samples -+ * in next watchdog callback. -+ */ -+ backend_csf->watchdog_last_seen_insert_idx = insert_index_to_stop; -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); -+} ++/* ++ * KCPU queue events ++ */ ++DECLARE_EVENT_CLASS(mali_csf_kcpu_queue_template, ++ TP_PROTO(struct kbase_kcpu_command_queue *queue, ++ u64 info_val1, u64 info_val2), ++ TP_ARGS(queue, info_val1, info_val2), ++ TP_STRUCT__entry( ++ __field(u64, info_val1) ++ __field(u64, info_val2) ++ __field(pid_t, kctx_tgid) ++ __field(u32, kctx_id) ++ __field(u8, id) ++ ), ++ TP_fast_assign( ++ { ++ __entry->info_val1 = info_val1; ++ __entry->info_val2 = info_val2; ++ __entry->kctx_id = queue->kctx->id; ++ __entry->kctx_tgid = queue->kctx->tgid; ++ __entry->id = queue->id; ++ } + -+static void kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( -+ struct kbase_hwcnt_backend_csf *backend_csf, -+ enum kbase_hwcnt_backend_csf_enable_state new_state) -+{ -+ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); ++ ), ++ TP_printk("kctx=%d_%u id=%u info_val1=0x%llx info_val2=0x%llx", ++ __entry->kctx_tgid, __entry->kctx_id, __entry->id, ++ __entry->info_val1, __entry->info_val2) ++); + -+ if (backend_csf->enable_state != new_state) { -+ backend_csf->enable_state = new_state; ++#define DEFINE_MALI_CSF_KCPU_EVENT(name) \ ++ DEFINE_EVENT(mali_csf_kcpu_queue_template, mali_##name, \ ++ TP_PROTO(struct kbase_kcpu_command_queue *queue, \ ++ u64 info_val1, u64 info_val2), \ ++ TP_ARGS(queue, info_val1, info_val2)) + -+ wake_up(&backend_csf->enable_state_waitq); -+ } -+} ++DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_CREATE); ++DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_DELETE); ++DEFINE_MALI_CSF_KCPU_EVENT(KCPU_CQS_SET); ++DEFINE_MALI_CSF_KCPU_EVENT(KCPU_CQS_WAIT_START); ++DEFINE_MALI_CSF_KCPU_EVENT(KCPU_CQS_WAIT_END); ++DEFINE_MALI_CSF_KCPU_EVENT(KCPU_FENCE_SIGNAL); ++DEFINE_MALI_CSF_KCPU_EVENT(KCPU_FENCE_WAIT_START); ++DEFINE_MALI_CSF_KCPU_EVENT(KCPU_FENCE_WAIT_END); + -+static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info) -+{ -+ struct kbase_hwcnt_backend_csf_info *csf_info = info; -+ struct kbase_hwcnt_backend_csf *backend_csf; -+ unsigned long flags = 0UL; ++#undef DEFINE_MALI_CSF_KCPU_EVENT + -+ csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); ++#endif /* !defined(_KBASE_DEBUG_LINUX_KTRACE_CSF_H_) || defined(TRACE_HEADER_MULTI_READ) */ +diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_jm.h +new file mode 100644 +index 000000000..8fa4e2a7c +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_jm.h +@@ -0,0 +1,174 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2014, 2018, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ if (WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info))) { -+ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); -+ return; -+ } ++/* ++ * NOTE: This must **only** be included through mali_linux_trace.h, ++ * otherwise it will fail to setup tracepoints correctly ++ */ + -+ backend_csf = csf_info->backend; ++#if !defined(_KBASE_DEBUG_LINUX_KTRACE_JM_H_) || defined(TRACE_HEADER_MULTI_READ) ++#define _KBASE_DEBUG_LINUX_KTRACE_JM_H_ + -+ /* Only do watchdog request when all conditions are met: */ -+ if (/* 1. Backend is enabled. */ -+ (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) && -+ /* 2. FW is not in protected mode. */ -+ (!csf_info->fw_in_protected_mode) && -+ /* 3. dump state indicates no other dumping is in progress. */ -+ ((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) || -+ (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED))) { -+ u32 extract_index = 0U; -+ u32 insert_index = 0U; ++DECLARE_EVENT_CLASS(mali_jm_slot_template, ++ TP_PROTO(struct kbase_context *kctx, int jobslot, u64 info_val), ++ TP_ARGS(kctx, jobslot, info_val), ++ TP_STRUCT__entry( ++ __field(pid_t, kctx_tgid) ++ __field(u32, kctx_id) ++ __field(unsigned int, jobslot) ++ __field(u64, info_val) ++ ), ++ TP_fast_assign( ++ __entry->kctx_id = (kctx) ? kctx->id : 0u; ++ __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; ++ __entry->jobslot = jobslot; ++ __entry->info_val = info_val; ++ ), ++ TP_printk("kctx=%d_%u jobslot=%u info=0x%llx", __entry->kctx_tgid, ++ __entry->kctx_id, __entry->jobslot, __entry->info_val) ++); + -+ /* Read the raw extract and insert indexes from the CSF interface. */ -+ csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index, &insert_index); ++#define DEFINE_MALI_JM_SLOT_EVENT(name) \ ++DEFINE_EVENT(mali_jm_slot_template, mali_##name, \ ++ TP_PROTO(struct kbase_context *kctx, int jobslot, u64 info_val), \ ++ TP_ARGS(kctx, jobslot, info_val)) ++DEFINE_MALI_JM_SLOT_EVENT(JM_RETURN_ATOM_TO_JS); ++DEFINE_MALI_JM_SLOT_EVENT(JM_MARK_FOR_RETURN_TO_JS); ++DEFINE_MALI_JM_SLOT_EVENT(JM_SUBMIT); ++DEFINE_MALI_JM_SLOT_EVENT(JM_JOB_DONE); ++DEFINE_MALI_JM_SLOT_EVENT(JM_UPDATE_HEAD); ++DEFINE_MALI_JM_SLOT_EVENT(JM_CHECK_HEAD); ++DEFINE_MALI_JM_SLOT_EVENT(JM_SOFTSTOP); ++DEFINE_MALI_JM_SLOT_EVENT(JM_SOFTSTOP_0); ++DEFINE_MALI_JM_SLOT_EVENT(JM_SOFTSTOP_1); ++DEFINE_MALI_JM_SLOT_EVENT(JM_HARDSTOP); ++DEFINE_MALI_JM_SLOT_EVENT(JM_HARDSTOP_0); ++DEFINE_MALI_JM_SLOT_EVENT(JM_HARDSTOP_1); ++DEFINE_MALI_JM_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP); ++DEFINE_MALI_JM_SLOT_EVENT(JM_SLOT_EVICT); ++DEFINE_MALI_JM_SLOT_EVENT(JM_BEGIN_RESET_WORKER); ++DEFINE_MALI_JM_SLOT_EVENT(JM_END_RESET_WORKER); ++DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED); ++DEFINE_MALI_JM_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED); ++DEFINE_MALI_JM_SLOT_EVENT(JS_AFFINITY_CURRENT); ++DEFINE_MALI_JM_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB); ++DEFINE_MALI_JM_SLOT_EVENT(JS_PULL_JOB); ++DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED); ++DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED); ++DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED); ++DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE); ++DEFINE_MALI_JM_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB); ++DEFINE_MALI_JM_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED); ++DEFINE_MALI_JM_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB); ++DEFINE_MALI_JM_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ); ++DEFINE_MALI_JM_SLOT_EVENT(JS_SLOT_PRIO_BLOCKED); ++DEFINE_MALI_JM_SLOT_EVENT(JS_SLOT_PRIO_UNBLOCKED); ++DEFINE_MALI_JM_SLOT_EVENT(JS_SLOT_PRIO_AND_HIGHER_UNBLOCKED); ++DEFINE_MALI_JM_SLOT_EVENT(JS_SLOT_PRIO_IS_BLOCKED); ++#undef DEFINE_MALI_JM_SLOT_EVENT + -+ /* Do watchdog request if no new FW auto samples. */ -+ if (insert_index == backend_csf->watchdog_last_seen_insert_idx) { -+ /* Trigger the watchdog request. */ -+ csf_info->csf_if->dump_request(csf_info->csf_if->ctx); ++DECLARE_EVENT_CLASS(mali_jm_refcount_template, ++ TP_PROTO(struct kbase_context *kctx, int refcount, u64 info_val), ++ TP_ARGS(kctx, refcount, info_val), ++ TP_STRUCT__entry( ++ __field(pid_t, kctx_tgid) ++ __field(u32, kctx_id) ++ __field(unsigned int, refcount) ++ __field(u64, info_val) ++ ), ++ TP_fast_assign( ++ __entry->kctx_id = (kctx) ? kctx->id : 0u; ++ __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; ++ __entry->refcount = refcount; ++ __entry->info_val = info_val; ++ ), ++ TP_printk("kctx=%d_%u refcount=%u info=0x%llx", __entry->kctx_tgid, ++ __entry->kctx_id, __entry->refcount, __entry->info_val) ++); + -+ /* A watchdog dump is required, change the state to -+ * start the request process. -+ */ -+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED; -+ } -+ } ++#define DEFINE_MALI_JM_REFCOUNT_EVENT(name) \ ++DEFINE_EVENT(mali_jm_refcount_template, mali_##name, \ ++ TP_PROTO(struct kbase_context *kctx, int refcount, u64 info_val), \ ++ TP_ARGS(kctx, refcount, info_val)) ++DEFINE_MALI_JM_REFCOUNT_EVENT(JS_ADD_JOB); ++DEFINE_MALI_JM_REFCOUNT_EVENT(JS_REMOVE_JOB); ++DEFINE_MALI_JM_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX); ++DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_INIT_CTX); ++DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_TERM_CTX); ++DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX); ++DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX); ++DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX); ++DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX); ++DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX); ++DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS); ++#undef DEFINE_MALI_JM_REFCOUNT_EVENT + -+ /* Must schedule another callback when in the transitional state because -+ * this function can be called for the first time before the performance -+ * counter enabled interrupt. -+ */ -+ if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) || -+ (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED)) { -+ /* Reschedule the timer for next watchdog callback. */ -+ csf_info->watchdog_if->modify(csf_info->watchdog_if->timer, -+ HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS); -+ } ++DECLARE_EVENT_CLASS(mali_jm_add_template, ++ TP_PROTO(struct kbase_context *kctx, u64 gpu_addr, u64 info_val), ++ TP_ARGS(kctx, gpu_addr, info_val), ++ TP_STRUCT__entry( ++ __field(pid_t, kctx_tgid) ++ __field(u32, kctx_id) ++ __field(u64, gpu_addr) ++ __field(u64, info_val) ++ ), ++ TP_fast_assign( ++ __entry->kctx_id = (kctx) ? kctx->id : 0u; ++ __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; ++ __entry->gpu_addr = gpu_addr; ++ __entry->info_val = info_val; ++ ), ++ TP_printk("kctx=%d_%u gpu_addr=0x%llx info=0x%llx", __entry->kctx_tgid, ++ __entry->kctx_id, __entry->gpu_addr, __entry->info_val) ++); + -+ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); -+} ++#define DEFINE_MALI_JM_ADD_EVENT(name) \ ++DEFINE_EVENT(mali_jm_add_template, mali_##name, \ ++ TP_PROTO(struct kbase_context *kctx, u64 gpu_addr, u64 info_val), \ ++ TP_ARGS(kctx, gpu_addr, info_val)) ++DEFINE_MALI_JM_ADD_EVENT(JD_DONE_WORKER); ++DEFINE_MALI_JM_ADD_EVENT(JD_DONE_WORKER_END); ++DEFINE_MALI_JM_ADD_EVENT(JD_CANCEL_WORKER); ++DEFINE_MALI_JM_ADD_EVENT(JD_DONE); ++DEFINE_MALI_JM_ADD_EVENT(JD_CANCEL); ++DEFINE_MALI_JM_ADD_EVENT(JD_ZAP_CONTEXT); ++DEFINE_MALI_JM_ADD_EVENT(JM_IRQ); ++DEFINE_MALI_JM_ADD_EVENT(JM_IRQ_END); ++DEFINE_MALI_JM_ADD_EVENT(JM_FLUSH_WORKQS); ++DEFINE_MALI_JM_ADD_EVENT(JM_FLUSH_WORKQS_DONE); ++DEFINE_MALI_JM_ADD_EVENT(JM_ZAP_NON_SCHEDULED); ++DEFINE_MALI_JM_ADD_EVENT(JM_ZAP_SCHEDULED); ++DEFINE_MALI_JM_ADD_EVENT(JM_ZAP_DONE); ++DEFINE_MALI_JM_ADD_EVENT(JM_SUBMIT_AFTER_RESET); ++DEFINE_MALI_JM_ADD_EVENT(JM_JOB_COMPLETE); ++DEFINE_MALI_JM_ADD_EVENT(JS_UNPULL_JOB); ++DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL); ++DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL); ++DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX); ++DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX); ++DEFINE_MALI_JM_ADD_EVENT(JS_RETURN_WORKER); ++DEFINE_MALI_JM_ADD_EVENT(JS_RETURN_WORKER_END); ++DEFINE_MALI_JM_ADD_EVENT(JS_POLICY_TIMER_END); ++DEFINE_MALI_JM_ADD_EVENT(JS_POLICY_TIMER_START); ++DEFINE_MALI_JM_ADD_EVENT(JS_POLICY_ENQUEUE_JOB); ++#undef DEFINE_MALI_JM_ADD_EVENT + -+/** -+ * kbasep_hwcnt_backend_csf_dump_worker() - HWC dump worker. -+ * @work: Work structure. ++#endif /* !defined(_KBASE_DEBUG_LINUX_KTRACE_JM_H_) || defined(TRACE_HEADER_MULTI_READ)*/ +diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c +new file mode 100644 +index 000000000..f521b4712 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c +@@ -0,0 +1,361 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * -+ * To accumulate all available samples in the ring buffer when a request has -+ * been done. ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ -+static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work) -+{ -+ unsigned long flags = 0ULL; -+ struct kbase_hwcnt_backend_csf *backend_csf; -+ u32 insert_index_to_acc; -+ u32 extract_index = 0U; -+ u32 insert_index = 0U; + -+ WARN_ON(!work); -+ backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_dump_work); -+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); -+ /* Assert the backend is not destroyed. */ -+ WARN_ON(backend_csf != backend_csf->info->backend); ++#include ++#include "debug/mali_kbase_debug_ktrace_internal.h" + -+ /* The backend was disabled or had an error while the worker was being -+ * launched. -+ */ -+ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { -+ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); -+ WARN_ON(!completion_done(&backend_csf->dump_completed)); -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); -+ return; -+ } ++int kbase_ktrace_init(struct kbase_device *kbdev) ++{ ++#if KBASE_KTRACE_TARGET_RBUF ++ struct kbase_ktrace_msg *rbuf; + -+ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED); ++ spin_lock_init(&kbdev->ktrace.lock); ++ rbuf = kmalloc_array(KBASE_KTRACE_SIZE, sizeof(*rbuf), GFP_KERNEL); + -+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING; -+ insert_index_to_acc = backend_csf->insert_index_to_accumulate; ++ if (!rbuf) ++ return -EINVAL; + -+ /* Read the raw extract and insert indexes from the CSF interface. */ -+ backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index, -+ &insert_index); ++ kbdev->ktrace.rbuf = rbuf; ++#endif /* KBASE_KTRACE_TARGET_RBUF */ ++ return 0; ++} + -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); ++void kbase_ktrace_term(struct kbase_device *kbdev) ++{ ++#if KBASE_KTRACE_TARGET_RBUF ++ kfree(kbdev->ktrace.rbuf); ++ kbdev->ktrace.rbuf = NULL; ++#endif /* KBASE_KTRACE_TARGET_RBUF */ ++} + -+ /* Accumulate up to the insert we grabbed at the prfcnt request -+ * interrupt. -+ */ -+ kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index, -+ insert_index_to_acc); ++void kbase_ktrace_hook_wrapper(void *param) ++{ ++ struct kbase_device *kbdev = (struct kbase_device *)param; + -+ /* Copy to the user buffer so if a threshold interrupt fires -+ * between now and get(), the accumulations are untouched. -+ */ -+ kbasep_hwcnt_backend_csf_update_user_sample(backend_csf); ++ KBASE_KTRACE_DUMP(kbdev); ++} + -+ /* Dump done, set state back to COMPLETED for next request. */ -+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); -+ /* Assert the backend is not destroyed. */ -+ WARN_ON(backend_csf != backend_csf->info->backend); ++#if KBASE_KTRACE_TARGET_RBUF + -+ /* The backend was disabled or had an error while we were accumulating. ++static const char * const kbasep_ktrace_code_string[] = { ++ /* ++ * IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE ++ * THIS MUST BE USED AT THE START OF THE ARRAY + */ -+ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { -+ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); -+ WARN_ON(!completion_done(&backend_csf->dump_completed)); -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); -+ return; -+ } -+ -+ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING); -+ -+ /* Our work here is done - set the wait object and unblock waiters. */ -+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; -+ complete_all(&backend_csf->dump_completed); -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); -+} ++#define KBASE_KTRACE_CODE_MAKE_CODE(X) # X ++#include "debug/mali_kbase_debug_ktrace_codes.h" ++#undef KBASE_KTRACE_CODE_MAKE_CODE ++}; + -+/** -+ * kbasep_hwcnt_backend_csf_threshold_worker() - Threshold worker. -+ * -+ * @work: Work structure. -+ * -+ * Called when a HWC threshold interrupt raised to consume all available samples -+ * in the ring buffer. -+ */ -+static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work) ++static void kbasep_ktrace_format_header(char *buffer, int sz, s32 written) +{ -+ unsigned long flags = 0ULL; -+ struct kbase_hwcnt_backend_csf *backend_csf; -+ u32 extract_index = 0U; -+ u32 insert_index = 0U; ++ written += MAX(snprintf(buffer + written, MAX(sz - written, 0), ++ "secs,thread_id,cpu,code,kctx,"), 0); + -+ WARN_ON(!work); ++ kbasep_ktrace_backend_format_header(buffer, sz, &written); + -+ backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_threshold_work); -+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ written += MAX(snprintf(buffer + written, MAX(sz - written, 0), ++ ",info_val,ktrace_version=%u.%u", ++ KBASE_KTRACE_VERSION_MAJOR, ++ KBASE_KTRACE_VERSION_MINOR), 0); + -+ /* Assert the backend is not destroyed. */ -+ WARN_ON(backend_csf != backend_csf->info->backend); ++ buffer[sz - 1] = 0; ++} + -+ /* Read the raw extract and insert indexes from the CSF interface. */ -+ backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index, -+ &insert_index); ++static void kbasep_ktrace_format_msg(struct kbase_ktrace_msg *trace_msg, ++ char *buffer, int sz) ++{ ++ s32 written = 0; + -+ /* The backend was disabled or had an error while the worker was being -+ * launched. ++ /* Initial part of message: ++ * ++ * secs,thread_id,cpu,code, + */ -+ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); -+ return; -+ } ++ written += MAX(snprintf(buffer + written, MAX(sz - written, 0), ++ "%d.%.6d,%d,%d,%s,", ++ (int)trace_msg->timestamp.tv_sec, ++ (int)(trace_msg->timestamp.tv_nsec / 1000), ++ trace_msg->thread_id, trace_msg->cpu, ++ kbasep_ktrace_code_string[trace_msg->backend.gpu.code]), ++ 0); + -+ /* Early out if we are not in the IDLE state or COMPLETED state, as this -+ * means a concurrent dump is in progress and we don't want to -+ * interfere. -+ */ -+ if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) && -+ (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) { -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); -+ return; ++ /* kctx part: */ ++ if (trace_msg->kctx_tgid) { ++ written += MAX(snprintf(buffer + written, MAX(sz - written, 0), ++ "%d_%u", ++ trace_msg->kctx_tgid, trace_msg->kctx_id), 0); + } -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); ++ /* Trailing comma */ ++ written += MAX(snprintf(buffer + written, MAX(sz - written, 0), ++ ","), 0); + -+ /* Accumulate everything we possibly can. We grabbed the insert index -+ * immediately after we acquired the lock but before we checked whether -+ * a concurrent dump was triggered. This ensures that if a concurrent -+ * dump was triggered between releasing the lock and now, we know for a -+ * fact that our insert will not exceed the concurrent dump's -+ * insert_to_accumulate, so we don't risk accumulating too much data. -+ */ -+ kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index, insert_index); ++ /* Backend parts */ ++ kbasep_ktrace_backend_format_msg(trace_msg, buffer, sz, ++ &written); + -+ /* No need to wake up anything since it is not a user dump request. */ ++ /* Rest of message: ++ * ++ * ,info_val ++ * ++ * Note that the last column is empty, it's simply to hold the ktrace ++ * version in the header ++ */ ++ written += MAX(snprintf(buffer + written, MAX(sz - written, 0), ++ ",0x%.16llx", ++ (unsigned long long)trace_msg->info_val), 0); ++ buffer[sz - 1] = 0; +} + -+static void -+kbase_hwcnt_backend_csf_submit_dump_worker(struct kbase_hwcnt_backend_csf_info *csf_info) ++static void kbasep_ktrace_dump_msg(struct kbase_device *kbdev, ++ struct kbase_ktrace_msg *trace_msg) +{ -+ u32 extract_index; -+ -+ WARN_ON(!csf_info); -+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); -+ -+ WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info)); -+ WARN_ON(csf_info->backend->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED); -+ WARN_ON(csf_info->backend->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT); ++ char buffer[KTRACE_DUMP_MESSAGE_SIZE]; + -+ /* Save insert index now so that the dump worker only accumulates the -+ * HWC data associated with this request. Extract index is not stored -+ * as that needs to be checked when accumulating to prevent re-reading -+ * buffers that have already been read and returned to the GPU. -+ */ -+ csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index, -+ &csf_info->backend->insert_index_to_accumulate); -+ csf_info->backend->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED; ++ lockdep_assert_held(&kbdev->ktrace.lock); + -+ /* Submit the accumulator task into the work queue. */ -+ queue_work(csf_info->backend->hwc_dump_workq, &csf_info->backend->hwc_dump_work); ++ kbasep_ktrace_format_msg(trace_msg, buffer, sizeof(buffer)); ++ dev_dbg(kbdev->dev, "%s", buffer); +} + -+static void -+kbasep_hwcnt_backend_csf_get_physical_enable(struct kbase_hwcnt_backend_csf *backend_csf, -+ const struct kbase_hwcnt_enable_map *enable_map, -+ struct kbase_hwcnt_backend_csf_if_enable *enable) ++struct kbase_ktrace_msg *kbasep_ktrace_reserve(struct kbase_ktrace *ktrace) +{ -+ enum kbase_hwcnt_physical_set phys_counter_set; -+ struct kbase_hwcnt_physical_enable_map phys_enable_map; ++ struct kbase_ktrace_msg *trace_msg; + -+ kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map); ++ lockdep_assert_held(&ktrace->lock); + -+ /* process the enable_map to guarantee the block header is enabled which -+ * is needed for delta calculation. -+ */ -+ kbasep_hwcnt_backend_csf_process_enable_map(&phys_enable_map); ++ trace_msg = &ktrace->rbuf[ktrace->next_in]; + -+ kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_csf->info->counter_set); ++ /* Update the ringbuffer indices */ ++ ktrace->next_in = (ktrace->next_in + 1) & KBASE_KTRACE_MASK; ++ if (ktrace->next_in == ktrace->first_out) ++ ktrace->first_out = (ktrace->first_out + 1) & KBASE_KTRACE_MASK; + -+ /* Use processed enable_map to enable HWC in HW level. */ -+ enable->fe_bm = phys_enable_map.fe_bm; -+ enable->shader_bm = phys_enable_map.shader_bm; -+ enable->tiler_bm = phys_enable_map.tiler_bm; -+ enable->mmu_l2_bm = phys_enable_map.mmu_l2_bm; -+ enable->counter_set = phys_counter_set; -+ enable->clk_enable_map = enable_map->clk_enable_map; ++ return trace_msg; +} -+ -+/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ -+static int -+kbasep_hwcnt_backend_csf_dump_enable_nolock(struct kbase_hwcnt_backend *backend, -+ const struct kbase_hwcnt_enable_map *enable_map) ++void kbasep_ktrace_msg_init(struct kbase_ktrace *ktrace, ++ struct kbase_ktrace_msg *trace_msg, enum kbase_ktrace_code code, ++ struct kbase_context *kctx, kbase_ktrace_flag_t flags, ++ u64 info_val) +{ -+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; -+ struct kbase_hwcnt_backend_csf_if_enable enable; -+ int err; ++ lockdep_assert_held(&ktrace->lock); + -+ if (!backend_csf || !enable_map || (enable_map->metadata != backend_csf->info->metadata)) -+ return -EINVAL; ++ trace_msg->thread_id = task_pid_nr(current); ++ trace_msg->cpu = task_cpu(current); + -+ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); ++ ktime_get_real_ts64(&trace_msg->timestamp); + -+ kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map, &enable); ++ /* No need to store a flag about whether there was a kctx, tgid==0 is ++ * sufficient ++ */ ++ if (kctx) { ++ trace_msg->kctx_tgid = kctx->tgid; ++ trace_msg->kctx_id = kctx->id; ++ } else { ++ trace_msg->kctx_tgid = 0; ++ trace_msg->kctx_id = 0; ++ } ++ trace_msg->info_val = info_val; ++ trace_msg->backend.gpu.code = code; ++ trace_msg->backend.gpu.flags = flags; ++} + -+ /* enable_state should be DISABLED before we transfer it to enabled */ -+ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) -+ return -EIO; ++void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code, ++ struct kbase_context *kctx, kbase_ktrace_flag_t flags, ++ u64 info_val) ++{ ++ unsigned long irqflags; ++ struct kbase_ktrace_msg *trace_msg; + -+ err = backend_csf->info->watchdog_if->enable(backend_csf->info->watchdog_if->timer, -+ HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS, -+ kbasep_hwcnt_backend_watchdog_timer_cb, -+ backend_csf->info); -+ if (err) -+ return err; ++ if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace))) ++ return; + -+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; -+ WARN_ON(!completion_done(&backend_csf->dump_completed)); -+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( -+ backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED); ++ WARN_ON((flags & ~KBASE_KTRACE_FLAG_COMMON_ALL)); + -+ backend_csf->info->csf_if->dump_enable(backend_csf->info->csf_if->ctx, -+ backend_csf->ring_buf, &enable); ++ spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); + -+ kbasep_hwcnt_backend_csf_cc_initial_sample(backend_csf, enable_map); ++ /* Reserve and update indices */ ++ trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); + -+ return 0; ++ /* Fill the common part of the message (including backend.gpu.flags) */ ++ kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, ++ info_val); ++ ++ /* Done */ ++ spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); +} + -+/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_fn */ -+static int kbasep_hwcnt_backend_csf_dump_enable(struct kbase_hwcnt_backend *backend, -+ const struct kbase_hwcnt_enable_map *enable_map) ++static void kbasep_ktrace_clear_locked(struct kbase_device *kbdev) +{ -+ int errcode; -+ unsigned long flags = 0UL; -+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; -+ -+ if (!backend_csf) -+ return -EINVAL; -+ -+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); -+ errcode = kbasep_hwcnt_backend_csf_dump_enable_nolock(backend, enable_map); -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); -+ return errcode; ++ lockdep_assert_held(&kbdev->ktrace.lock); ++ kbdev->ktrace.first_out = kbdev->ktrace.next_in; +} -+ -+static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete( -+ struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags) ++void kbasep_ktrace_clear(struct kbase_device *kbdev) +{ -+ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); -+ -+ while ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) || -+ (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)) { -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, *lock_flags); -+ -+ wait_event(backend_csf->enable_state_waitq, -+ (backend_csf->enable_state != -+ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) && -+ (backend_csf->enable_state != -+ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)); ++ unsigned long flags; + -+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, lock_flags); -+ } ++ spin_lock_irqsave(&kbdev->ktrace.lock, flags); ++ kbasep_ktrace_clear_locked(kbdev); ++ spin_unlock_irqrestore(&kbdev->ktrace.lock, flags); +} + -+/* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */ -+static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) ++void kbasep_ktrace_dump(struct kbase_device *kbdev) +{ -+ unsigned long flags = 0UL; -+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; -+ bool do_disable = false; ++ unsigned long flags; ++ u32 start; ++ u32 end; ++ char buffer[KTRACE_DUMP_MESSAGE_SIZE] = "Dumping trace:\n"; + -+ WARN_ON(!backend_csf); ++ kbasep_ktrace_format_header(buffer, sizeof(buffer), strlen(buffer)); ++ dev_dbg(kbdev->dev, "%s", buffer); + -+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ spin_lock_irqsave(&kbdev->ktrace.lock, flags); ++ start = kbdev->ktrace.first_out; ++ end = kbdev->ktrace.next_in; + -+ /* Make sure we wait until any previous enable or disable have completed -+ * before doing anything. -+ */ -+ kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, &flags); ++ while (start != end) { ++ struct kbase_ktrace_msg *trace_msg = &kbdev->ktrace.rbuf[start]; + -+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED || -+ backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) { -+ /* If we are already disabled or in an unrecoverable error -+ * state, there is nothing for us to do. -+ */ -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); -+ return; -+ } ++ kbasep_ktrace_dump_msg(kbdev, trace_msg); + -+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) { -+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( -+ backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); -+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; -+ complete_all(&backend_csf->dump_completed); -+ /* Only disable if we were previously enabled - in all other -+ * cases the call to disable will have already been made. -+ */ -+ do_disable = true; ++ start = (start + 1) & KBASE_KTRACE_MASK; + } ++ dev_dbg(kbdev->dev, "TRACE_END"); + -+ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); -+ WARN_ON(!completion_done(&backend_csf->dump_completed)); -+ -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); -+ -+ /* Deregister the timer and block until any timer callback has completed. -+ * We've transitioned out of the ENABLED state so we can guarantee it -+ * won't reschedule itself. -+ */ -+ backend_csf->info->watchdog_if->disable(backend_csf->info->watchdog_if->timer); -+ -+ /* Block until any async work has completed. We have transitioned out of -+ * the ENABLED state so we can guarantee no new work will concurrently -+ * be submitted. -+ */ -+ flush_workqueue(backend_csf->hwc_dump_workq); -+ -+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); -+ -+ if (do_disable) -+ backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx); ++ kbasep_ktrace_clear_locked(kbdev); + -+ kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, &flags); ++ spin_unlock_irqrestore(&kbdev->ktrace.lock, flags); ++} + -+ switch (backend_csf->enable_state) { -+ case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: -+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( -+ backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED); -+ break; -+ case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: -+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( -+ backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR); -+ break; -+ default: -+ WARN_ON(true); -+ break; -+ } ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++struct trace_seq_state { ++ struct kbase_ktrace_msg trace_buf[KBASE_KTRACE_SIZE]; ++ u32 start; ++ u32 end; ++}; + -+ backend_csf->user_requested = false; -+ backend_csf->watchdog_last_seen_insert_idx = 0; ++static void *kbasep_ktrace_seq_start(struct seq_file *s, loff_t *pos) ++{ ++ struct trace_seq_state *state = s->private; ++ int i; + -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); ++ if (*pos == 0) ++ /* See Documentation/filesystems/seq_file.txt */ ++ return SEQ_START_TOKEN; + -+ /* After disable, zero the header of all buffers in the ring buffer back -+ * to 0 to prepare for the next enable. -+ */ -+ kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf); ++ if (*pos > KBASE_KTRACE_SIZE) ++ return NULL; ++ i = state->start + *pos; ++ if ((state->end >= state->start && i >= state->end) || ++ i >= state->end + KBASE_KTRACE_SIZE) ++ return NULL; + -+ /* Sync zeroed buffers to avoid coherency issues on future use. */ -+ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, -+ backend_csf->ring_buf, 0, -+ backend_csf->info->ring_buf_cnt, false); ++ i &= KBASE_KTRACE_MASK; + -+ /* Reset accumulator, old_sample_buf and user_sample to all-0 to prepare -+ * for next enable. -+ */ -+ kbasep_hwcnt_backend_csf_reset_internal_buffers(backend_csf); ++ return &state->trace_buf[i]; +} + -+/* CSF backend implementation of kbase_hwcnt_backend_dump_request_fn */ -+static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, -+ u64 *dump_time_ns) ++static void kbasep_ktrace_seq_stop(struct seq_file *s, void *data) +{ -+ unsigned long flags = 0UL; -+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; -+ bool do_request = false; -+ bool watchdog_dumping = false; -+ -+ if (!backend_csf) -+ return -EINVAL; ++} + -+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++static void *kbasep_ktrace_seq_next(struct seq_file *s, void *data, loff_t *pos) ++{ ++ struct trace_seq_state *state = s->private; ++ int i; + -+ /* If we're transitioning to enabled there's nothing to accumulate, and -+ * the user dump buffer is already zeroed. We can just short circuit to -+ * the DUMP_COMPLETED state. -+ */ -+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { -+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; -+ *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); -+ kbasep_hwcnt_backend_csf_cc_update(backend_csf); -+ backend_csf->user_requested = true; -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); -+ return 0; -+ } ++ if (data != SEQ_START_TOKEN) ++ (*pos)++; + -+ /* Otherwise, make sure we're already enabled. */ -+ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); -+ return -EIO; -+ } ++ i = (state->start + *pos) & KBASE_KTRACE_MASK; ++ if (i == state->end) ++ return NULL; + -+ /* Make sure that this is either the first request since enable or the -+ * previous user dump has completed or a watchdog dump is in progress, -+ * so we can avoid midway through a user dump. -+ * If user request comes while a watchdog dumping is in progress, -+ * the user request takes the ownership of the watchdog dumping sample by -+ * changing the dump_state so the interrupt for the watchdog -+ * request can be processed instead of ignored. -+ */ -+ if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) && -+ (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) && -+ (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)) { -+ /* HWC is disabled or another user dump is ongoing, -+ * or we're on fault. -+ */ -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); -+ /* HWC is disabled or another dump is ongoing, or we are on -+ * fault. -+ */ -+ return -EIO; -+ } ++ return &state->trace_buf[i]; ++} + -+ /* Reset the completion so dump_wait() has something to wait on. */ -+ reinit_completion(&backend_csf->dump_completed); ++static int kbasep_ktrace_seq_show(struct seq_file *s, void *data) ++{ ++ struct kbase_ktrace_msg *trace_msg = data; ++ char buffer[KTRACE_DUMP_MESSAGE_SIZE]; + -+ if (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED) -+ watchdog_dumping = true; ++ /* If this is the start, print a header */ ++ if (data == SEQ_START_TOKEN) ++ kbasep_ktrace_format_header(buffer, sizeof(buffer), 0); ++ else ++ kbasep_ktrace_format_msg(trace_msg, buffer, sizeof(buffer)); + -+ if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) && -+ !backend_csf->info->fw_in_protected_mode) { -+ /* Only do the request if we are fully enabled and not in -+ * protected mode. -+ */ -+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED; -+ do_request = true; -+ } else { -+ /* Skip the request and waiting for ack and go straight to -+ * checking the insert and kicking off the worker to do the dump -+ */ -+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; -+ } ++ seq_printf(s, "%s\n", buffer); ++ return 0; ++} + -+ /* CSF firmware might enter protected mode now, but still call request. -+ * That is fine, as we changed state while holding the lock, so the -+ * protected mode enter function will query the insert and launch the -+ * dumping worker. -+ * At some point we will get the dump request ACK saying a dump is done, -+ * but we can ignore it if we are not in the REQUESTED state and process -+ * it in next round dumping worker. -+ */ ++static const struct seq_operations kbasep_ktrace_seq_ops = { ++ .start = kbasep_ktrace_seq_start, ++ .next = kbasep_ktrace_seq_next, ++ .stop = kbasep_ktrace_seq_stop, ++ .show = kbasep_ktrace_seq_show, ++}; + -+ *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); -+ kbasep_hwcnt_backend_csf_cc_update(backend_csf); -+ backend_csf->user_requested = true; ++static int kbasep_ktrace_debugfs_open(struct inode *inode, struct file *file) ++{ ++ struct kbase_device *kbdev = inode->i_private; ++ unsigned long flags; + -+ if (do_request) { -+ /* If a watchdog dumping is in progress, don't need to do -+ * another request, just update the dump_state and take the -+ * ownership of the sample which watchdog requested. -+ */ -+ if (!watchdog_dumping) -+ backend_csf->info->csf_if->dump_request(backend_csf->info->csf_if->ctx); -+ } else -+ kbase_hwcnt_backend_csf_submit_dump_worker(backend_csf->info); ++ struct trace_seq_state *state; + -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); ++ state = __seq_open_private(file, &kbasep_ktrace_seq_ops, ++ sizeof(*state)); ++ if (!state) ++ return -ENOMEM; + -+ /* Modify watchdog timer to delay the regular check time since -+ * just requested. -+ */ -+ backend_csf->info->watchdog_if->modify(backend_csf->info->watchdog_if->timer, -+ HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS); ++ spin_lock_irqsave(&kbdev->ktrace.lock, flags); ++ state->start = kbdev->ktrace.first_out; ++ state->end = kbdev->ktrace.next_in; ++ memcpy(state->trace_buf, kbdev->ktrace.rbuf, sizeof(state->trace_buf)); ++ spin_unlock_irqrestore(&kbdev->ktrace.lock, flags); + + return 0; +} + -+/* CSF backend implementation of kbase_hwcnt_backend_dump_wait_fn */ -+static int kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend) -+{ -+ unsigned long flags = 0UL; -+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; -+ int errcode; -+ -+ if (!backend_csf) -+ return -EINVAL; ++static const struct file_operations kbasep_ktrace_debugfs_fops = { ++ .owner = THIS_MODULE, ++ .open = kbasep_ktrace_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = seq_release_private, ++}; + -+ wait_for_completion(&backend_csf->dump_completed); ++void kbase_ktrace_debugfs_init(struct kbase_device *kbdev) ++{ ++ debugfs_create_file("mali_trace", 0444, ++ kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_ktrace_debugfs_fops); ++} ++#endif /* CONFIG_DEBUG_FS */ + -+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); -+ /* Make sure the last dump actually succeeded when user requested is -+ * set. -+ */ -+ if (backend_csf->user_requested && -+ ((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) || -+ (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED))) -+ errcode = 0; -+ else -+ errcode = -EIO; ++#else /* KBASE_KTRACE_TARGET_RBUF */ + -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); -+ -+ return errcode; -+} -+ -+/* CSF backend implementation of kbase_hwcnt_backend_dump_clear_fn */ -+static int kbasep_hwcnt_backend_csf_dump_clear(struct kbase_hwcnt_backend *backend) ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++void kbase_ktrace_debugfs_init(struct kbase_device *kbdev) +{ -+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; -+ int errcode; -+ u64 ts; -+ -+ if (!backend_csf) -+ return -EINVAL; ++ CSTD_UNUSED(kbdev); ++} ++#endif /* CONFIG_DEBUG_FS */ ++#endif /* KBASE_KTRACE_TARGET_RBUF */ +diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.h +new file mode 100644 +index 000000000..11f0b5c42 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.h +@@ -0,0 +1,238 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ /* Request a dump so we can clear all current counters. */ -+ errcode = kbasep_hwcnt_backend_csf_dump_request(backend, &ts); -+ if (!errcode) -+ /* Wait for the manual dump or auto dump to be done and -+ * accumulator to be updated. -+ */ -+ errcode = kbasep_hwcnt_backend_csf_dump_wait(backend); ++/* ++ * DOC: Kbase's own trace, 'KTrace' ++ * ++ * Low overhead trace specific to kbase, aimed at: ++ * - common use-cases for tracing kbase specific functionality to do with ++ * running work on the GPU ++ * - easy 1-line addition of new types of trace ++ * ++ * KTrace can be recorded in one or more of the following targets: ++ * - KBASE_KTRACE_TARGET_RBUF: low overhead ringbuffer protected by an ++ * irq-spinlock, output available via dev_dbg() and debugfs file ++ * - KBASE_KTRACE_TARGET_FTRACE: ftrace based tracepoints under 'mali' events ++ */ + -+ return errcode; -+} ++#ifndef _KBASE_DEBUG_KTRACE_H_ ++#define _KBASE_DEBUG_KTRACE_H_ + -+/* CSF backend implementation of kbase_hwcnt_backend_dump_get_fn */ -+static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend, -+ struct kbase_hwcnt_dump_buffer *dst, -+ const struct kbase_hwcnt_enable_map *dst_enable_map, -+ bool accumulate) -+{ -+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; -+ int ret; -+ size_t clk; ++#if KBASE_KTRACE_TARGET_FTRACE ++#include "mali_linux_trace.h" ++#endif + -+ if (!backend_csf || !dst || !dst_enable_map || -+ (backend_csf->info->metadata != dst->metadata) || -+ (dst_enable_map->metadata != dst->metadata)) -+ return -EINVAL; ++#if MALI_USE_CSF ++#include "debug/backend/mali_kbase_debug_ktrace_csf.h" ++#else ++#include "debug/backend/mali_kbase_debug_ktrace_jm.h" ++#endif + -+ /* Extract elapsed cycle count for each clock domain if enabled. */ -+ kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) -+ { -+ if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) -+ continue; ++/** ++ * kbase_ktrace_init - initialize kbase ktrace. ++ * @kbdev: kbase device ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++int kbase_ktrace_init(struct kbase_device *kbdev); + -+ /* Reset the counter to zero if accumulation is off. */ -+ if (!accumulate) -+ dst->clk_cnt_buf[clk] = 0; -+ dst->clk_cnt_buf[clk] += backend_csf->cycle_count_elapsed[clk]; -+ } ++/** ++ * kbase_ktrace_term - terminate kbase ktrace. ++ * @kbdev: kbase device ++ */ ++void kbase_ktrace_term(struct kbase_device *kbdev); + -+ /* We just return the user buffer without checking the current state, -+ * as it is undefined to call this function without a prior succeeding -+ * one to dump_wait(). -+ */ -+ ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, dst_enable_map, accumulate); ++/** ++ * kbase_ktrace_hook_wrapper - wrapper so that dumping ktrace can be done via a ++ * callback. ++ * @param: kbase device, cast to void pointer ++ */ ++void kbase_ktrace_hook_wrapper(void *param); + -+ return ret; -+} ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++/** ++ * kbase_ktrace_debugfs_init - initialize kbase ktrace for debugfs usage, if ++ * the selected targets support it. ++ * @kbdev: kbase device ++ * ++ * There is no matching 'term' call, debugfs_remove_recursive() is sufficient. ++ */ ++void kbase_ktrace_debugfs_init(struct kbase_device *kbdev); ++#endif /* CONFIG_DEBUG_FS */ + ++/* ++ * KTrace target for internal ringbuffer ++ */ ++#if KBASE_KTRACE_TARGET_RBUF +/** -+ * kbasep_hwcnt_backend_csf_destroy() - Destroy CSF backend. -+ * @backend_csf: Pointer to CSF backend to destroy. ++ * kbasep_ktrace_initialized - Check whether kbase ktrace is initialized + * -+ * Can be safely called on a backend in any state of partial construction. ++ * @ktrace: ktrace of kbase device. + * ++ * Return: true if ktrace has been initialized. + */ -+static void kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *backend_csf) ++static inline bool kbasep_ktrace_initialized(struct kbase_ktrace *ktrace) +{ -+ if (!backend_csf) -+ return; -+ -+ destroy_workqueue(backend_csf->hwc_dump_workq); -+ -+ backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx, -+ backend_csf->ring_buf); -+ -+ kfree(backend_csf->accum_buf); -+ backend_csf->accum_buf = NULL; -+ -+ kfree(backend_csf->old_sample_buf); -+ backend_csf->old_sample_buf = NULL; -+ -+ kfree(backend_csf->to_user_buf); -+ backend_csf->to_user_buf = NULL; -+ -+ kfree(backend_csf); ++ return ktrace->rbuf != NULL; +} + +/** -+ * kbasep_hwcnt_backend_csf_create() - Create a CSF backend instance. -+ * -+ * @csf_info: Non-NULL pointer to backend info. -+ * @out_backend: Non-NULL pointer to where backend is stored on success. ++ * kbasep_ktrace_add - internal function to add trace to the ringbuffer. ++ * @kbdev: kbase device ++ * @code: ktrace code ++ * @kctx: kbase context, or NULL if no context ++ * @flags: flags about the message ++ * @info_val: generic information about @code to add to the trace + * -+ * Return: 0 on success, else error code. ++ * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD() instead. + */ -+static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info, -+ struct kbase_hwcnt_backend_csf **out_backend) -+{ -+ struct kbase_hwcnt_backend_csf *backend_csf = NULL; -+ int errcode = -ENOMEM; -+ -+ WARN_ON(!csf_info); -+ WARN_ON(!out_backend); -+ -+ backend_csf = kzalloc(sizeof(*backend_csf), GFP_KERNEL); -+ if (!backend_csf) -+ goto alloc_error; -+ -+ backend_csf->info = csf_info; -+ kbasep_hwcnt_backend_csf_init_layout(&csf_info->prfcnt_info, &backend_csf->phys_layout); -+ -+ backend_csf->accum_buf = kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL); -+ if (!backend_csf->accum_buf) -+ goto err_alloc_acc_buf; -+ -+ backend_csf->old_sample_buf = kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); -+ if (!backend_csf->old_sample_buf) -+ goto err_alloc_pre_sample_buf; -+ -+ backend_csf->to_user_buf = kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL); -+ if (!backend_csf->to_user_buf) -+ goto err_alloc_user_sample_buf; -+ -+ errcode = csf_info->csf_if->ring_buf_alloc(csf_info->csf_if->ctx, csf_info->ring_buf_cnt, -+ &backend_csf->ring_buf_cpu_base, -+ &backend_csf->ring_buf); -+ if (errcode) -+ goto err_ring_buf_alloc; -+ errcode = -ENOMEM; -+ -+ /* Zero all performance enable header to prepare for first enable. */ -+ kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf); -+ -+ /* Sync zeroed buffers to avoid coherency issues on use. */ -+ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, -+ backend_csf->ring_buf, 0, -+ backend_csf->info->ring_buf_cnt, false); -+ -+ init_completion(&backend_csf->dump_completed); -+ -+ init_waitqueue_head(&backend_csf->enable_state_waitq); -+ -+ /* Allocate a single threaded work queue for dump worker and threshold -+ * worker. -+ */ -+ backend_csf->hwc_dump_workq = -+ alloc_workqueue("mali_hwc_dump_wq", WQ_HIGHPRI | WQ_UNBOUND, 1); -+ if (!backend_csf->hwc_dump_workq) -+ goto err_alloc_workqueue; -+ -+ INIT_WORK(&backend_csf->hwc_dump_work, kbasep_hwcnt_backend_csf_dump_worker); -+ INIT_WORK(&backend_csf->hwc_threshold_work, kbasep_hwcnt_backend_csf_threshold_worker); ++void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code, ++ struct kbase_context *kctx, kbase_ktrace_flag_t flags, ++ u64 info_val); + -+ backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_DISABLED; -+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; -+ complete_all(&backend_csf->dump_completed); -+ backend_csf->user_requested = false; -+ backend_csf->watchdog_last_seen_insert_idx = 0; ++/** ++ * kbasep_ktrace_clear - clear the trace ringbuffer ++ * @kbdev: kbase device ++ * ++ * PRIVATE: do not use directly. Use KBASE_KTRACE_CLEAR() instead. ++ */ ++void kbasep_ktrace_clear(struct kbase_device *kbdev); + -+ *out_backend = backend_csf; -+ return 0; ++/** ++ * kbasep_ktrace_dump - dump ktrace ringbuffer to dev_dbg(), then clear it ++ * @kbdev: kbase device ++ * ++ * PRIVATE: do not use directly. Use KBASE_KTRACE_DUMP() instead. ++ */ ++void kbasep_ktrace_dump(struct kbase_device *kbdev); + -+err_alloc_workqueue: -+ backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx, -+ backend_csf->ring_buf); -+err_ring_buf_alloc: -+ kfree(backend_csf->to_user_buf); -+ backend_csf->to_user_buf = NULL; -+err_alloc_user_sample_buf: -+ kfree(backend_csf->old_sample_buf); -+ backend_csf->old_sample_buf = NULL; -+err_alloc_pre_sample_buf: -+ kfree(backend_csf->accum_buf); -+ backend_csf->accum_buf = NULL; -+err_alloc_acc_buf: -+ kfree(backend_csf); -+alloc_error: -+ return errcode; -+} ++#define KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, info_val) \ ++ kbasep_ktrace_add(kbdev, KBASE_KTRACE_CODE(code), kctx, 0, \ ++ info_val) \ + -+/* CSF backend implementation of kbase_hwcnt_backend_init_fn */ -+static int kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info, -+ struct kbase_hwcnt_backend **out_backend) -+{ -+ unsigned long flags = 0UL; -+ struct kbase_hwcnt_backend_csf *backend_csf = NULL; -+ struct kbase_hwcnt_backend_csf_info *csf_info = (struct kbase_hwcnt_backend_csf_info *)info; -+ int errcode; -+ bool success = false; ++#define KBASE_KTRACE_RBUF_CLEAR(kbdev) \ ++ kbasep_ktrace_clear(kbdev) + -+ if (!info || !out_backend) -+ return -EINVAL; ++#define KBASE_KTRACE_RBUF_DUMP(kbdev) \ ++ kbasep_ktrace_dump(kbdev) + -+ /* Create the backend. */ -+ errcode = kbasep_hwcnt_backend_csf_create(csf_info, &backend_csf); -+ if (errcode) -+ return errcode; ++#else /* KBASE_KTRACE_TARGET_RBUF */ + -+ /* If it was not created before, attach it to csf_info. -+ * Use spin lock to avoid concurrent initialization. -+ */ -+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); -+ if (csf_info->backend == NULL) { -+ csf_info->backend = backend_csf; -+ *out_backend = (struct kbase_hwcnt_backend *)backend_csf; -+ success = true; -+ if (csf_info->unrecoverable_error_happened) -+ backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR; -+ } -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); ++#define KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, info_val) \ ++ do { \ ++ CSTD_UNUSED(kbdev); \ ++ CSTD_NOP(code); \ ++ CSTD_UNUSED(kctx); \ ++ CSTD_UNUSED(info_val); \ ++ CSTD_NOP(0); \ ++ } while (0) + -+ /* Destroy the new created backend if the backend has already created -+ * before. In normal case, this won't happen if the client call init() -+ * function properly. -+ */ -+ if (!success) { -+ kbasep_hwcnt_backend_csf_destroy(backend_csf); -+ return -EBUSY; -+ } ++#define KBASE_KTRACE_RBUF_CLEAR(kbdev) \ ++ do { \ ++ CSTD_UNUSED(kbdev); \ ++ CSTD_NOP(0); \ ++ } while (0) ++#define KBASE_KTRACE_RBUF_DUMP(kbdev) \ ++ do { \ ++ CSTD_UNUSED(kbdev); \ ++ CSTD_NOP(0); \ ++ } while (0) ++#endif /* KBASE_KTRACE_TARGET_RBUF */ + -+ return 0; -+} ++/* ++ * KTrace target for Linux's ftrace ++ */ ++#if KBASE_KTRACE_TARGET_FTRACE + -+/* CSF backend implementation of kbase_hwcnt_backend_term_fn */ -+static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend) -+{ -+ unsigned long flags = 0UL; -+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; ++#define KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, info_val) \ ++ trace_mali_##code(kctx, info_val) + -+ if (!backend) -+ return; ++#else /* KBASE_KTRACE_TARGET_FTRACE */ ++#define KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, info_val) \ ++ do { \ ++ CSTD_UNUSED(kbdev); \ ++ CSTD_NOP(code); \ ++ CSTD_UNUSED(kctx); \ ++ CSTD_UNUSED(info_val); \ ++ CSTD_NOP(0); \ ++ } while (0) ++#endif /* KBASE_KTRACE_TARGET_FTRACE */ + -+ kbasep_hwcnt_backend_csf_dump_disable(backend); ++/* No 'clear' implementation for ftrace yet */ ++#define KBASE_KTRACE_FTRACE_CLEAR(kbdev) \ ++ do { \ ++ CSTD_UNUSED(kbdev); \ ++ CSTD_NOP(0); \ ++ } while (0) + -+ /* Set the backend in csf_info to NULL so we won't handle any external -+ * notification anymore since we are terminating. -+ */ -+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); -+ backend_csf->info->backend = NULL; -+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); ++/* No 'dump' implementation for ftrace yet */ ++#define KBASE_KTRACE_FTRACE_DUMP(kbdev) \ ++ do { \ ++ CSTD_UNUSED(kbdev); \ ++ CSTD_NOP(0); \ ++ } while (0) + -+ kbasep_hwcnt_backend_csf_destroy(backend_csf); -+} ++/* ++ * Master set of macros to route KTrace to any of the targets ++ */ + +/** -+ * kbasep_hwcnt_backend_csf_info_destroy() - Destroy a CSF backend info. -+ * @info: Pointer to info to destroy. -+ * -+ * Can be safely called on a backend info in any state of partial construction. ++ * KBASE_KTRACE_ADD - Add trace values ++ * @kbdev: kbase device ++ * @code: trace code ++ * @kctx: kbase context, or NULL if no context ++ * @info_val: generic information about @code to add to the trace + * ++ * Note: Any functions called through this macro will still be evaluated in ++ * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when ++ * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied ++ * to this macro must: ++ * a) be static or static inline, and ++ * b) just return 0 and have no other statements present in the body. + */ -+static void kbasep_hwcnt_backend_csf_info_destroy(const struct kbase_hwcnt_backend_csf_info *info) -+{ -+ if (!info) -+ return; -+ -+ /* The backend should be destroyed before the info object destroy. */ -+ WARN_ON(info->backend != NULL); -+ -+ /* The metadata should be destroyed before the info object destroy. */ -+ WARN_ON(info->metadata != NULL); ++#define KBASE_KTRACE_ADD(kbdev, code, kctx, info_val) \ ++ do { \ ++ /* capture values that could come from non-pure function calls */ \ ++ u64 __info_val = info_val; \ ++ KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, __info_val); \ ++ KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, __info_val); \ ++ } while (0) + -+ kfree(info); -+} ++/** ++ * KBASE_KTRACE_CLEAR - Clear the trace, if applicable to the target(s) ++ * @kbdev: kbase device ++ */ ++#define KBASE_KTRACE_CLEAR(kbdev) \ ++ do { \ ++ KBASE_KTRACE_RBUF_CLEAR(kbdev); \ ++ KBASE_KTRACE_FTRACE_CLEAR(kbdev); \ ++ } while (0) + +/** -+ * kbasep_hwcnt_backend_csf_info_create() - Create a CSF backend info. ++ * KBASE_KTRACE_DUMP - Dump the trace, if applicable to the target(s) ++ * @kbdev: kbase device ++ */ ++#define KBASE_KTRACE_DUMP(kbdev) \ ++ do { \ ++ KBASE_KTRACE_RBUF_DUMP(kbdev); \ ++ KBASE_KTRACE_FTRACE_DUMP(kbdev); \ ++ } while (0) ++ ++#endif /* _KBASE_DEBUG_KTRACE_H_ */ +diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h +new file mode 100644 +index 000000000..6103c3ee0 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h +@@ -0,0 +1,181 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * @csf_if: Non-NULL pointer to a hwcnt backend CSF interface structure -+ * used to create backend interface. -+ * @ring_buf_cnt: The buffer count of the CSF hwcnt backend ring buffer. -+ * MUST be power of 2. -+ * @watchdog_if: Non-NULL pointer to a hwcnt watchdog interface structure used to create -+ * backend interface. -+ * @out_info: Non-NULL pointer to where info is stored on success. ++ * (C) COPYRIGHT 2011-2015, 2018-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: 0 on success, else error code. + */ -+static int -+kbasep_hwcnt_backend_csf_info_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, -+ struct kbase_hwcnt_watchdog_interface *watchdog_if, -+ const struct kbase_hwcnt_backend_csf_info **out_info) -+{ -+ struct kbase_hwcnt_backend_csf_info *info = NULL; + -+ if (WARN_ON(!csf_if) || WARN_ON(!watchdog_if) || WARN_ON(!out_info) || -+ WARN_ON(!is_power_of_2(ring_buf_cnt))) -+ return -EINVAL; ++/* ++ * ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** ++ * ***** DO NOT INCLUDE DIRECTLY ***** ++ * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** ++ */ + -+ info = kmalloc(sizeof(*info), GFP_KERNEL); -+ if (!info) -+ return -ENOMEM; ++/* ++ * The purpose of this header file is just to contain a list of trace code ++ * identifiers ++ * ++ * When updating this file, also remember to update ++ * mali_kbase_debug_linux_ktrace.h ++ * ++ * Each identifier is wrapped in a macro, so that its string form and enum form ++ * can be created ++ * ++ * Each macro is separated with a comma, to allow insertion into an array ++ * initializer or enum definition block. ++ * ++ * This allows automatic creation of an enum and a corresponding array of ++ * strings ++ * ++ * Before #including, the includer MUST #define KBASE_KTRACE_CODE_MAKE_CODE. ++ * After #including, the includer MUST #under KBASE_KTRACE_CODE_MAKE_CODE. ++ * ++ * e.g.: ++ * #define KBASE_KTRACE_CODE( X ) KBASE_KTRACE_CODE_ ## X ++ * typedef enum ++ * { ++ * #define KBASE_KTRACE_CODE_MAKE_CODE( X ) KBASE_KTRACE_CODE( X ) ++ * #include "mali_kbase_debug_ktrace_codes.h" ++ * #undef KBASE_KTRACE_CODE_MAKE_CODE ++ * } kbase_ktrace_code; ++ * ++ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE ++ * ++ * ++ * The use of the macro here is: ++ * - KBASE_KTRACE_CODE_MAKE_CODE( X ) ++ * ++ * Which produces: ++ * - For an enum, KBASE_KTRACE_CODE_X ++ * - For a string, "X" ++ * ++ * ++ * For example: ++ * - KBASE_KTRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to: ++ * - KBASE_KTRACE_CODE_JM_JOB_COMPLETE for the enum ++ * - "JM_JOB_COMPLETE" for the string ++ * - To use it to trace an event, do: ++ * - KBASE_KTRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val ); ++ */ + -+ *info = (struct kbase_hwcnt_backend_csf_info) -+ { -+#if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY) -+ .counter_set = KBASE_HWCNT_SET_SECONDARY, -+#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) -+ .counter_set = KBASE_HWCNT_SET_TERTIARY, -+#else -+ /* Default to primary */ -+ .counter_set = KBASE_HWCNT_SET_PRIMARY, ++#if 0 /* Dummy section to avoid breaking formatting */ ++int dummy_array[] = { +#endif -+ .backend = NULL, .csf_if = csf_if, .ring_buf_cnt = ring_buf_cnt, -+ .fw_in_protected_mode = false, .unrecoverable_error_happened = false, -+ .watchdog_if = watchdog_if, -+ }; -+ *out_info = info; -+ -+ return 0; -+} -+ -+/* CSF backend implementation of kbase_hwcnt_backend_metadata_fn */ -+static const struct kbase_hwcnt_metadata * -+kbasep_hwcnt_backend_csf_metadata(const struct kbase_hwcnt_backend_info *info) -+{ -+ if (!info) -+ return NULL; -+ -+ WARN_ON(!((const struct kbase_hwcnt_backend_csf_info *)info)->metadata); -+ -+ return ((const struct kbase_hwcnt_backend_csf_info *)info)->metadata; -+} -+ -+static void -+kbasep_hwcnt_backend_csf_handle_unrecoverable_error(struct kbase_hwcnt_backend_csf *backend_csf) -+{ -+ bool do_disable = false; -+ -+ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); -+ -+ /* We are already in or transitioning to the unrecoverable error state. -+ * Early out. -+ */ -+ if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) || -+ (backend_csf->enable_state == -+ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER)) -+ return; -+ -+ /* If we are disabled, we know we have no pending workers, so skip the -+ * waiting state. -+ */ -+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED) { -+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( -+ backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR); -+ return; -+ } -+ -+ /* Trigger a disable only if we are not already transitioning to -+ * disabled, we don't want to disable twice if an unrecoverable error -+ * happens while we are disabling. -+ */ -+ do_disable = -+ (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); -+ -+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( -+ backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER); -+ -+ /* Transition the dump to the IDLE state and unblock any waiters. The -+ * IDLE state signifies an error. -+ */ -+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; -+ complete_all(&backend_csf->dump_completed); + -+ /* Trigger a disable only if we are not already transitioning to -+ * disabled, - we don't want to disable twice if an unrecoverable error -+ * happens while we are disabling. ++ /* ++ * Core events + */ -+ if (do_disable) -+ backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx); -+} -+ -+static void -+kbasep_hwcnt_backend_csf_handle_recoverable_error(struct kbase_hwcnt_backend_csf *backend_csf) -+{ -+ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); -+ -+ switch (backend_csf->enable_state) { -+ case KBASE_HWCNT_BACKEND_CSF_DISABLED: -+ case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: -+ case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: -+ case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR: -+ case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: -+ /* Already disabled or disabling, or in an unrecoverable error. -+ * Nothing to be done to handle the error. -+ */ -+ return; -+ case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: -+ /* A seemingly recoverable error that occurs while we are -+ * transitioning to enabled is probably unrecoverable. -+ */ -+ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(backend_csf); -+ return; -+ case KBASE_HWCNT_BACKEND_CSF_ENABLED: -+ /* Start transitioning to the disabled state. We can't wait for -+ * it as this recoverable error might be triggered from an -+ * interrupt. The wait will be done in the eventual call to -+ * disable(). -+ */ -+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( -+ backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); -+ /* Transition the dump to the IDLE state and unblock any -+ * waiters. The IDLE state signifies an error. -+ */ -+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; -+ complete_all(&backend_csf->dump_completed); -+ -+ backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx); -+ return; -+ } -+} -+ -+void kbase_hwcnt_backend_csf_protm_entered(struct kbase_hwcnt_backend_interface *iface) -+{ -+ struct kbase_hwcnt_backend_csf_info *csf_info = -+ (struct kbase_hwcnt_backend_csf_info *)iface->info; -+ -+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); -+ csf_info->fw_in_protected_mode = true; ++ /* no info_val */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY), ++ /* no info_val */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM), ++ /* info_val == GPU_IRQ_STATUS register */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_IRQ), ++ /* info_val == bits cleared */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR), ++ /* info_val == GPU_IRQ_STATUS register */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE), ++ KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET), ++ KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET), ++ KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR), ++ /* info_val == dump address */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE), ++ KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES), + -+ /* Call on_prfcnt_sample() to trigger collection of the protected mode -+ * entry auto-sample if there is currently a pending dump request. ++ /* ++ * Power Management Events + */ -+ kbase_hwcnt_backend_csf_on_prfcnt_sample(iface); -+} -+ -+void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *iface) -+{ -+ struct kbase_hwcnt_backend_csf_info *csf_info; -+ -+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; -+ -+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); -+ csf_info->fw_in_protected_mode = false; -+} -+ -+void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface) -+{ -+ unsigned long flags = 0UL; -+ struct kbase_hwcnt_backend_csf_info *csf_info; -+ -+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; -+ -+ csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); -+ csf_info->unrecoverable_error_happened = true; -+ /* Early out if the backend does not exist. */ -+ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { -+ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); -+ return; -+ } -+ -+ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); -+ -+ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); -+} -+ -+void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface) -+{ -+ unsigned long flags = 0UL; -+ struct kbase_hwcnt_backend_csf_info *csf_info; -+ struct kbase_hwcnt_backend_csf *backend_csf; -+ -+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; -+ -+ csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); -+ csf_info->unrecoverable_error_happened = false; -+ /* Early out if the backend does not exist. */ -+ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { -+ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); -+ return; -+ } -+ backend_csf = csf_info->backend; -+ -+ if ((backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) && -+ (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR)) { -+ /* Before a reset occurs, we must either have been disabled -+ * (else we lose data) or we should have encountered an -+ * unrecoverable error. Either way, we will have disabled the -+ * interface and waited for any workers that might have still -+ * been in flight. -+ * If not in these states, fire off one more disable to make -+ * sure everything is turned off before the power is pulled. -+ * We can't wait for this disable to complete, but it doesn't -+ * really matter, the power is being pulled. -+ */ -+ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); -+ } -+ -+ /* A reset is the only way to exit the unrecoverable error state */ -+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) { -+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( -+ backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED); -+ } -+ -+ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); -+} -+ -+void kbase_hwcnt_backend_csf_on_prfcnt_sample(struct kbase_hwcnt_backend_interface *iface) -+{ -+ struct kbase_hwcnt_backend_csf_info *csf_info; -+ struct kbase_hwcnt_backend_csf *backend_csf; -+ -+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; -+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON_TILER), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON_L2), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF_TILER), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF_L2), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_POWERED), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_L2), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_NEEDED), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_NEEDED), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS), ++ /* info_val == kbdev->pm.active_count*/ ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE), ++ /* info_val == kbdev->pm.active_count*/ ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_GPU_ON), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_GPU_OFF), ++ /* info_val == policy number, or -1 for "Already changing" */ ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_SET_POLICY), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY), ++ /* info_val == policy number */ ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT), ++ /* info_val == policy number */ ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM), + -+ /* Early out if the backend does not exist. */ -+ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) -+ return; -+ backend_csf = csf_info->backend; ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_POWEROFF_WAIT_WQ), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_RUNTIME_SUSPEND_CALLBACK), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_RUNTIME_RESUME_CALLBACK), + -+ /* Skip the dump_work if it's a watchdog request. */ -+ if (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED) { -+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; -+ return; -+ } ++ /* info_val = l2 state */ ++#define KBASEP_L2_STATE(n) KBASE_KTRACE_CODE_MAKE_CODE(PM_L2_ ## n), ++#include "backend/gpu/mali_kbase_pm_l2_states.h" ++#undef KBASEP_L2_STATE + -+ /* If the current state is not REQUESTED, this HWC sample will be -+ * skipped and processed in next dump_request. ++ /* ++ * Context Scheduler events + */ -+ if (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED) -+ return; -+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; -+ -+ kbase_hwcnt_backend_csf_submit_dump_worker(csf_info); -+} -+ -+void kbase_hwcnt_backend_csf_on_prfcnt_threshold(struct kbase_hwcnt_backend_interface *iface) -+{ -+ struct kbase_hwcnt_backend_csf_info *csf_info; -+ struct kbase_hwcnt_backend_csf *backend_csf; -+ -+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; -+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); -+ -+ /* Early out if the backend does not exist. */ -+ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) -+ return; -+ backend_csf = csf_info->backend; -+ -+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) -+ /* Submit the threshold work into the work queue to consume the -+ * available samples. -+ */ -+ queue_work(backend_csf->hwc_dump_workq, &backend_csf->hwc_threshold_work); -+} -+ -+void kbase_hwcnt_backend_csf_on_prfcnt_overflow(struct kbase_hwcnt_backend_interface *iface) -+{ -+ struct kbase_hwcnt_backend_csf_info *csf_info; -+ -+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; -+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); -+ -+ /* Early out if the backend does not exist. */ -+ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) -+ return; -+ -+ /* Called when an overflow occurs. We treat this as a recoverable error, -+ * so we start transitioning to the disabled state. -+ * We could try and handle it while enabled, but in a real system we -+ * never expect an overflow to occur so there is no point implementing -+ * complex recovery code when we can just turn ourselves off instead for -+ * a while. ++ /* info_val == kctx->refcount */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RETAIN_CTX_NOLOCK), ++ /* info_val == kctx->refcount */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RELEASE_CTX), ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ /* ++ * Arbitration events + */ -+ kbasep_hwcnt_backend_csf_handle_recoverable_error(csf_info->backend); -+} -+ -+void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interface *iface) -+{ -+ struct kbase_hwcnt_backend_csf_info *csf_info; -+ struct kbase_hwcnt_backend_csf *backend_csf; -+ -+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; -+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); -+ -+ /* Early out if the backend does not exist. */ -+ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) -+ return; -+ backend_csf = csf_info->backend; -+ -+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { -+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( -+ backend_csf, KBASE_HWCNT_BACKEND_CSF_ENABLED); -+ } else if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) { -+ /* Unexpected, but we are already in the right state so just -+ * ignore it. -+ */ -+ } else { -+ /* Unexpected state change, assume everything is broken until -+ * we reset. -+ */ -+ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); -+ } -+} -+ -+void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface) -+{ -+ struct kbase_hwcnt_backend_csf_info *csf_info; -+ struct kbase_hwcnt_backend_csf *backend_csf; -+ -+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; -+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); -+ -+ /* Early out if the backend does not exist. */ -+ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) -+ return; -+ backend_csf = csf_info->backend; -+ -+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED) { -+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( -+ backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER); -+ } else if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED) { -+ /* Unexpected, but we are already in the right state so just -+ * ignore it. -+ */ -+ } else { -+ /* Unexpected state change, assume everything is broken until -+ * we reset. -+ */ -+ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); -+ } -+} -+ -+int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *iface) -+{ -+ struct kbase_hwcnt_backend_csf_info *csf_info; -+ struct kbase_hwcnt_gpu_info gpu_info; -+ -+ if (!iface) -+ return -EINVAL; -+ -+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; -+ -+ WARN_ON(!csf_info->csf_if->get_prfcnt_info); -+ -+ csf_info->csf_if->get_prfcnt_info(csf_info->csf_if->ctx, &csf_info->prfcnt_info); ++ KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_LOST), ++ KBASE_KTRACE_CODE_MAKE_CODE(ARB_VM_STATE), ++ KBASE_KTRACE_CODE_MAKE_CODE(ARB_VM_EVT), ++#endif + -+ /* The clock domain counts should not exceed the number of maximum -+ * number of clock regulators. ++#if MALI_USE_CSF ++#include "debug/backend/mali_kbase_debug_ktrace_codes_csf.h" ++#else ++#include "debug/backend/mali_kbase_debug_ktrace_codes_jm.h" ++#endif ++ /* ++ * Unused code just to make it easier to not have a comma at the end. ++ * All other codes MUST come before this + */ -+ if (csf_info->prfcnt_info.clk_cnt > BASE_MAX_NR_CLOCKS_REGULATORS) -+ return -EIO; -+ -+ gpu_info.l2_count = csf_info->prfcnt_info.l2_count; -+ gpu_info.core_mask = csf_info->prfcnt_info.core_mask; -+ gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt; -+ gpu_info.prfcnt_values_per_block = -+ csf_info->prfcnt_info.prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES; -+ return kbase_hwcnt_csf_metadata_create(&gpu_info, csf_info->counter_set, -+ &csf_info->metadata); -+} -+ -+void kbase_hwcnt_backend_csf_metadata_term(struct kbase_hwcnt_backend_interface *iface) -+{ -+ struct kbase_hwcnt_backend_csf_info *csf_info; -+ -+ if (!iface) -+ return; -+ -+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; -+ if (csf_info->metadata) { -+ kbase_hwcnt_csf_metadata_destroy(csf_info->metadata); -+ csf_info->metadata = NULL; -+ } -+} -+ -+int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, -+ struct kbase_hwcnt_watchdog_interface *watchdog_if, -+ struct kbase_hwcnt_backend_interface *iface) -+{ -+ int errcode; -+ const struct kbase_hwcnt_backend_csf_info *info = NULL; -+ -+ if (!iface || !csf_if || !watchdog_if) -+ return -EINVAL; -+ -+ /* The buffer count must be power of 2 */ -+ if (!is_power_of_2(ring_buf_cnt)) -+ return -EINVAL; -+ -+ errcode = kbasep_hwcnt_backend_csf_info_create(csf_if, ring_buf_cnt, watchdog_if, &info); -+ if (errcode) -+ return errcode; -+ -+ iface->info = (struct kbase_hwcnt_backend_info *)info; -+ iface->metadata = kbasep_hwcnt_backend_csf_metadata; -+ iface->init = kbasep_hwcnt_backend_csf_init; -+ iface->term = kbasep_hwcnt_backend_csf_term; -+ iface->timestamp_ns = kbasep_hwcnt_backend_csf_timestamp_ns; -+ iface->dump_enable = kbasep_hwcnt_backend_csf_dump_enable; -+ iface->dump_enable_nolock = kbasep_hwcnt_backend_csf_dump_enable_nolock; -+ iface->dump_disable = kbasep_hwcnt_backend_csf_dump_disable; -+ iface->dump_clear = kbasep_hwcnt_backend_csf_dump_clear; -+ iface->dump_request = kbasep_hwcnt_backend_csf_dump_request; -+ iface->dump_wait = kbasep_hwcnt_backend_csf_dump_wait; -+ iface->dump_get = kbasep_hwcnt_backend_csf_dump_get; -+ -+ return 0; -+} ++ KBASE_KTRACE_CODE_MAKE_CODE(DUMMY) + -+void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface) -+{ -+ if (!iface) -+ return; ++#if 0 /* Dummy section to avoid breaking formatting */ ++}; ++#endif + -+ kbasep_hwcnt_backend_csf_info_destroy( -+ (const struct kbase_hwcnt_backend_csf_info *)iface->info); -+ memset(iface, 0, sizeof(*iface)); -+} -diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h ++/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ +diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_defs.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_defs.h new file mode 100644 -index 000000000..9c5a5c996 +index 000000000..a0fc9e51d --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h -@@ -0,0 +1,153 @@ ++++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_defs.h +@@ -0,0 +1,187 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -184641,148 +186792,182 @@ index 000000000..9c5a5c996 + * + */ + -+/* -+ * Concrete implementation of mali_kbase_hwcnt_backend interface for CSF -+ * backend. -+ */ ++#ifndef _KBASE_DEBUG_KTRACE_DEFS_H_ ++#define _KBASE_DEBUG_KTRACE_DEFS_H_ + -+#ifndef _KBASE_HWCNT_BACKEND_CSF_H_ -+#define _KBASE_HWCNT_BACKEND_CSF_H_ ++/* Enable SW tracing when set */ ++#if defined(CONFIG_MALI_BIFROST_ENABLE_TRACE) || defined(CONFIG_MALI_BIFROST_SYSTEM_TRACE) ++#define KBASE_KTRACE_ENABLE 1 ++#endif + -+#include "hwcnt/backend/mali_kbase_hwcnt_backend.h" -+#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h" -+#include "hwcnt/mali_kbase_hwcnt_watchdog_if.h" ++#ifndef KBASE_KTRACE_ENABLE ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++#define KBASE_KTRACE_ENABLE 1 ++#else /* CONFIG_MALI_BIFROST_DEBUG */ ++#define KBASE_KTRACE_ENABLE 0 ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++#endif /* KBASE_KTRACE_ENABLE */ + -+/** -+ * kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend -+ * interface. -+ * @csf_if: Non-NULL pointer to a hwcnt backend CSF interface structure -+ * used to create backend interface. -+ * @ring_buf_cnt: The buffer count of CSF hwcnt backend, used when allocate ring -+ * buffer, MUST be power of 2. -+ * @watchdog_if: Non-NULL pointer to a hwcnt watchdog interface structure used -+ * to create backend interface. -+ * @iface: Non-NULL pointer to backend interface structure that is filled -+ * in on creation success. -+ * -+ * Calls to iface->dump_enable_nolock() require the CSF Scheduler IRQ lock. ++/* Select targets for recording of trace: + * -+ * Return: 0 on success, else error code. + */ -+int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, -+ struct kbase_hwcnt_watchdog_interface *watchdog_if, -+ struct kbase_hwcnt_backend_interface *iface); ++#if KBASE_KTRACE_ENABLE + -+/** -+ * kbase_hwcnt_backend_csf_metadata_init() - Initialize the metadata for a CSF -+ * hardware counter backend. -+ * @iface: Non-NULL pointer to backend interface structure -+ * Return: 0 on success, else error code. -+ */ -+int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *iface); ++#ifdef CONFIG_MALI_BIFROST_SYSTEM_TRACE ++#define KBASE_KTRACE_TARGET_FTRACE 1 ++#else /* CONFIG_MALI_BIFROST_SYSTEM_TRACE */ ++#define KBASE_KTRACE_TARGET_FTRACE 0 ++#endif /* CONFIG_MALI_BIFROST_SYSTEM_TRACE */ + -+/** -+ * kbase_hwcnt_backend_csf_metadata_term() - Terminate the metadata for a CSF -+ * hardware counter backend. -+ * @iface: Non-NULL pointer to backend interface structure. ++#ifdef CONFIG_MALI_BIFROST_ENABLE_TRACE ++#define KBASE_KTRACE_TARGET_RBUF 1 ++#else /* CONFIG_MALI_BIFROST_ENABLE_TRACE*/ ++#define KBASE_KTRACE_TARGET_RBUF 0 ++#endif /* CONFIG_MALI_BIFROST_ENABLE_TRACE */ ++ ++#else /* KBASE_KTRACE_ENABLE */ ++#define KBASE_KTRACE_TARGET_FTRACE 0 ++#define KBASE_KTRACE_TARGET_RBUF 0 ++#endif /* KBASE_KTRACE_ENABLE */ ++ ++/* ++ * Note: Some backends define flags in this type even if the RBUF target is ++ * disabled (they get discarded with CSTD_UNUSED(), but they're still ++ * referenced) + */ -+void kbase_hwcnt_backend_csf_metadata_term(struct kbase_hwcnt_backend_interface *iface); ++typedef u8 kbase_ktrace_flag_t; + -+/** -+ * kbase_hwcnt_backend_csf_destroy() - Destroy a CSF hardware counter backend -+ * interface. -+ * @iface: Pointer to interface to destroy. ++#if KBASE_KTRACE_TARGET_RBUF ++typedef u8 kbase_ktrace_code_t; ++ ++/* ++ * NOTE: KBASE_KTRACE_VERSION_MAJOR, KBASE_KTRACE_VERSION_MINOR are kept in ++ * the backend, since updates can be made to one backend in a way that doesn't ++ * affect the other. + * -+ * Can be safely called on an all-zeroed interface, or on an already destroyed -+ * interface. ++ * However, modifying the common part could require both backend versions to be ++ * updated. + */ -+void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface); + -+/** -+ * kbase_hwcnt_backend_csf_protm_entered() - CSF HWC backend function to receive -+ * notification that protected mode -+ * has been entered. -+ * @iface: Non-NULL pointer to HWC backend interface. ++/* ++ * union kbase_ktrace_backend - backend specific part of a trace message. ++ * At the very least, this must contain a kbase_ktrace_code_t 'code' member ++ * and a kbase_ktrace_flag_t 'flags' inside a "gpu" sub-struct. Should a ++ * backend need several sub structs in its union to optimize the data storage ++ * for different message types, then it can use a "common initial sequence" to ++ * allow 'flags' and 'code' to pack optimally without corrupting them. ++ * Different backends need not share common initial sequences between them, they ++ * only need to ensure they have gpu.flags and gpu.code members, it ++ * is up to the backend then how to order these. + */ -+void kbase_hwcnt_backend_csf_protm_entered(struct kbase_hwcnt_backend_interface *iface); ++union kbase_ktrace_backend; + -+/** -+ * kbase_hwcnt_backend_csf_protm_exited() - CSF HWC backend function to receive -+ * notification that protected mode has -+ * been exited. -+ * @iface: Non-NULL pointer to HWC backend interface. -+ */ -+void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *iface); ++#endif /* KBASE_KTRACE_TARGET_RBUF */ + -+/** -+ * kbase_hwcnt_backend_csf_on_unrecoverable_error() - CSF HWC backend function -+ * called when unrecoverable -+ * errors are detected. -+ * @iface: Non-NULL pointer to HWC backend interface. ++#if MALI_USE_CSF ++#include "debug/backend/mali_kbase_debug_ktrace_defs_csf.h" ++#else ++#include "debug/backend/mali_kbase_debug_ktrace_defs_jm.h" ++#endif ++ ++#if KBASE_KTRACE_TARGET_RBUF ++/* Indicates if the trace message has backend related info. + * -+ * This should be called on encountering errors that can only be recovered from -+ * with reset, or that may put HWC logic in state that could result in hang. For -+ * example, on bus error, or when FW becomes unresponsive. ++ * If not set, consider the &kbase_ktrace_backend part of a &kbase_ktrace_msg ++ * as uninitialized, apart from the mandatory parts: ++ * - code ++ * - flags + */ -+void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface); ++#define KBASE_KTRACE_FLAG_BACKEND (((kbase_ktrace_flag_t)1) << 7) + -+/** -+ * kbase_hwcnt_backend_csf_on_before_reset() - CSF HWC backend function to be -+ * called immediately before a -+ * reset. Takes us out of the -+ * unrecoverable error state, if we -+ * were in it. -+ * @iface: Non-NULL pointer to HWC backend interface. -+ */ -+void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface); ++/* Collect all the common flags together for debug checking */ ++#define KBASE_KTRACE_FLAG_COMMON_ALL \ ++ (KBASE_KTRACE_FLAG_BACKEND) + -+/** -+ * kbase_hwcnt_backend_csf_on_prfcnt_sample() - CSF performance counter sample -+ * complete interrupt handler. -+ * @iface: Non-NULL pointer to HWC backend interface. -+ */ -+void kbase_hwcnt_backend_csf_on_prfcnt_sample(struct kbase_hwcnt_backend_interface *iface); ++#define KBASE_KTRACE_FLAG_ALL \ ++ (KBASE_KTRACE_FLAG_COMMON_ALL | KBASE_KTRACE_FLAG_BACKEND_ALL) + -+/** -+ * kbase_hwcnt_backend_csf_on_prfcnt_threshold() - CSF performance counter -+ * buffer reach threshold -+ * interrupt handler. -+ * @iface: Non-NULL pointer to HWC backend interface. -+ */ -+void kbase_hwcnt_backend_csf_on_prfcnt_threshold(struct kbase_hwcnt_backend_interface *iface); ++#define KBASE_KTRACE_SHIFT (9) /* 512 entries */ ++#define KBASE_KTRACE_SIZE (1 << KBASE_KTRACE_SHIFT) ++#define KBASE_KTRACE_MASK ((1 << KBASE_KTRACE_SHIFT)-1) + -+/** -+ * kbase_hwcnt_backend_csf_on_prfcnt_overflow() - CSF performance counter buffer -+ * overflow interrupt handler. -+ * @iface: Non-NULL pointer to HWC backend interface. -+ */ -+void kbase_hwcnt_backend_csf_on_prfcnt_overflow(struct kbase_hwcnt_backend_interface *iface); ++#define KBASE_KTRACE_CODE(X) KBASE_KTRACE_CODE_ ## X + -+/** -+ * kbase_hwcnt_backend_csf_on_prfcnt_enable() - CSF performance counter enabled -+ * interrupt handler. -+ * @iface: Non-NULL pointer to HWC backend interface. ++/* Note: compiletime_assert() about this against kbase_ktrace_code_t is in ++ * kbase_ktrace_init() + */ -+void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interface *iface); ++enum kbase_ktrace_code { ++ /* ++ * IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE ++ * THIS MUST BE USED AT THE START OF THE ENUM ++ */ ++#define KBASE_KTRACE_CODE_MAKE_CODE(X) KBASE_KTRACE_CODE(X) ++#include ++#undef KBASE_KTRACE_CODE_MAKE_CODE ++ /* Comma on its own, to extend the list */ ++ , ++ /* Must be the last in the enum */ ++ KBASE_KTRACE_CODE_COUNT ++}; + +/** -+ * kbase_hwcnt_backend_csf_on_prfcnt_disable() - CSF performance counter -+ * disabled interrupt handler. -+ * @iface: Non-NULL pointer to HWC backend interface. ++ * struct kbase_ktrace_msg - object representing a trace message added to trace ++ * buffer trace_rbuf in &kbase_device ++ * @timestamp: CPU timestamp at which the trace message was added. ++ * @thread_id: id of the thread in the context of which trace message was ++ * added. ++ * @cpu: indicates which CPU the @thread_id was scheduled on when the ++ * trace message was added. ++ * @kctx_tgid: Thread group ID of the &kbase_context associated with the ++ * message, or 0 if none associated. ++ * @kctx_id: Unique identifier of the &kbase_context associated with the ++ * message. Only valid if @kctx_tgid != 0. ++ * @info_val: value specific to the type of event being traced. Refer to the ++ * specific code in enum kbase_ktrace_code. ++ * @backend: backend-specific trace information. All backends must implement ++ * a minimum common set of members. + */ -+void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface); ++struct kbase_ktrace_msg { ++ struct timespec64 timestamp; ++ u32 thread_id; ++ u32 cpu; ++ pid_t kctx_tgid; ++ u32 kctx_id; ++ u64 info_val; ++ union kbase_ktrace_backend backend; ++}; + -+#endif /* _KBASE_HWCNT_BACKEND_CSF_H_ */ -diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h ++struct kbase_ktrace { ++ spinlock_t lock; ++ u16 first_out; ++ u16 next_in; ++ struct kbase_ktrace_msg *rbuf; ++}; ++ ++ ++static inline void kbase_ktrace_compiletime_asserts(void) ++{ ++ /* See also documentation of enum kbase_ktrace_code */ ++ compiletime_assert(sizeof(kbase_ktrace_code_t) == sizeof(unsigned long long) || ++ KBASE_KTRACE_CODE_COUNT <= (1ull << (sizeof(kbase_ktrace_code_t) * BITS_PER_BYTE)), ++ "kbase_ktrace_code_t not wide enough for KBASE_KTRACE_CODE_COUNT"); ++ compiletime_assert((KBASE_KTRACE_FLAG_BACKEND_ALL & KBASE_KTRACE_FLAG_COMMON_ALL) == 0, ++ "KTrace backend flags intersect with KTrace common flags"); ++ ++} ++ ++#endif /* KBASE_KTRACE_TARGET_RBUF */ ++#endif /* _KBASE_DEBUG_KTRACE_DEFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_internal.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_internal.h new file mode 100644 -index 000000000..382a3adaa +index 000000000..ba93f29fe --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h -@@ -0,0 +1,302 @@ ++++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_internal.h +@@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -184800,297 +186985,253 @@ index 000000000..382a3adaa + * + */ + -+/* -+ * Virtual interface for CSF hardware counter backend. -+ */ -+ -+#ifndef _KBASE_HWCNT_BACKEND_CSF_IF_H_ -+#define _KBASE_HWCNT_BACKEND_CSF_IF_H_ -+ -+#include -+ -+struct kbase_hwcnt_backend_csf_if_ctx; -+ -+struct kbase_hwcnt_backend_csf_if_ring_buf; -+ -+/** -+ * struct kbase_hwcnt_backend_csf_if_enable - enable hardware counter collection -+ * structure. -+ * @fe_bm: Front End counters selection bitmask. -+ * @shader_bm: Shader counters selection bitmask. -+ * @tiler_bm: Tiler counters selection bitmask. -+ * @mmu_l2_bm: MMU_L2 counters selection bitmask. -+ * @counter_set: The performance counter set to enable. -+ * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle -+ * counter for a given clock domain. -+ */ -+struct kbase_hwcnt_backend_csf_if_enable { -+ u32 fe_bm; -+ u32 shader_bm; -+ u32 tiler_bm; -+ u32 mmu_l2_bm; -+ u8 counter_set; -+ u64 clk_enable_map; -+}; ++#ifndef _KBASE_DEBUG_KTRACE_INTERNAL_H_ ++#define _KBASE_DEBUG_KTRACE_INTERNAL_H_ + -+/** -+ * struct kbase_hwcnt_backend_csf_if_prfcnt_info - Performance counter -+ * information. -+ * @prfcnt_hw_size: Total length in bytes of all the hardware counters data. The hardware -+ * counters are sub-divided into 4 classes: front-end, shader, tiler, and -+ * memory system (l2 cache + MMU). -+ * @prfcnt_fw_size: Total length in bytes of all the firmware counters data. -+ * @dump_bytes: Bytes of GPU memory required to perform a performance -+ * counter dump. dump_bytes = prfcnt_hw_size + prfcnt_fw_size. -+ * @prfcnt_block_size: Bytes of each performance counter block. -+ * @l2_count: The MMU L2 cache count. -+ * @core_mask: Shader core mask. -+ * @clk_cnt: Clock domain count in the system. -+ * @clearing_samples: Indicates whether counters are cleared after each sample -+ * is taken. -+ */ -+struct kbase_hwcnt_backend_csf_if_prfcnt_info { -+ size_t prfcnt_hw_size; -+ size_t prfcnt_fw_size; -+ size_t dump_bytes; -+ size_t prfcnt_block_size; -+ size_t l2_count; -+ u64 core_mask; -+ u8 clk_cnt; -+ bool clearing_samples; -+}; ++#if KBASE_KTRACE_TARGET_RBUF + -+/** -+ * typedef kbase_hwcnt_backend_csf_if_assert_lock_held_fn - Assert that the -+ * backend spinlock is -+ * held. -+ * @ctx: Non-NULL pointer to a CSF context. -+ */ -+typedef void -+kbase_hwcnt_backend_csf_if_assert_lock_held_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); ++#define KTRACE_DUMP_MESSAGE_SIZE 256 + +/** -+ * typedef kbase_hwcnt_backend_csf_if_lock_fn - Acquire backend spinlock. ++ * kbasep_ktrace_backend_format_header - format the backend part of the header ++ * @buffer: buffer to write to ++ * @sz: size of @buffer in bytes ++ * @written: pointer to storage for updating bytes written so far to @buffer + * -+ * @ctx: Non-NULL pointer to a CSF context. -+ * @flags: Pointer to the memory location that would store the previous -+ * interrupt state. ++ * The backend must format only the non-common backend specific parts of the ++ * header. It must format them as though they were standalone. The caller will ++ * handle adding any delimiters around this. + */ -+typedef void kbase_hwcnt_backend_csf_if_lock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, -+ unsigned long *flags); ++void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written); + +/** -+ * typedef kbase_hwcnt_backend_csf_if_unlock_fn - Release backend spinlock. ++ * kbasep_ktrace_backend_format_msg - format the backend part of the message ++ * @trace_msg: ktrace message ++ * @buffer: buffer to write to ++ * @sz: size of @buffer in bytes ++ * @written: pointer to storage for updating bytes written so far to @buffer + * -+ * @ctx: Non-NULL pointer to a CSF context. -+ * @flags: Previously stored interrupt state when Scheduler interrupt -+ * spinlock was acquired. ++ * The backend must format only the non-common backend specific parts of the ++ * message. It must format them as though they were standalone. The caller will ++ * handle adding any delimiters around this. ++ * ++ * A caller may have the flags member of @trace_msg with ++ * %KBASE_KTRACE_FLAG_BACKEND clear. The backend must handle that setting ++ * appropriately. + */ -+typedef void kbase_hwcnt_backend_csf_if_unlock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, -+ unsigned long flags); ++void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, ++ char *buffer, int sz, s32 *written); + -+/** -+ * typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance -+ * counter information. -+ * @ctx: Non-NULL pointer to a CSF context. -+ * @prfcnt_info: Non-NULL pointer to struct where performance counter -+ * information should be stored. -+ */ -+typedef void kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn( -+ struct kbase_hwcnt_backend_csf_if_ctx *ctx, -+ struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info); + +/** -+ * typedef kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn - Allocate a ring buffer -+ * for CSF interface. -+ * @ctx: Non-NULL pointer to a CSF context. -+ * @buf_count: The buffer count in the ring buffer to be allocated, -+ * MUST be power of 2. -+ * @cpu_dump_base: Non-NULL pointer to where ring buffer CPU base address is -+ * stored when success. -+ * @ring_buf: Non-NULL pointer to where ring buffer is stored when success. ++ * kbasep_ktrace_reserve - internal function to reserve space for a ktrace ++ * message ++ * @ktrace: kbase device's ktrace + * -+ * A ring buffer is needed by the CSF interface to do manual HWC sample and -+ * automatic HWC samples, the buffer count in the ring buffer MUST be power -+ * of 2 to meet the hardware requirement. ++ * This may also empty the oldest entry in the ringbuffer to make space. + * -+ * Return: 0 on success, else error code. ++ * Return: ktrace message + */ -+typedef int -+kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, -+ u32 buf_count, void **cpu_dump_base, -+ struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf); ++struct kbase_ktrace_msg *kbasep_ktrace_reserve(struct kbase_ktrace *ktrace); + +/** -+ * typedef kbase_hwcnt_backend_csf_if_ring_buf_sync_fn - Sync HWC dump buffers -+ * memory. -+ * @ctx: Non-NULL pointer to a CSF context. -+ * @ring_buf: Non-NULL pointer to the ring buffer. -+ * @buf_index_first: The first buffer index in the ring buffer to be synced, -+ * inclusive. -+ * @buf_index_last: The last buffer index in the ring buffer to be synced, -+ * exclusive. -+ * @for_cpu: The direction of sync to be applied, set to true when CPU -+ * cache needs invalidating before reading the buffer, and set -+ * to false after CPU writes to flush these before this memory -+ * is overwritten by the GPU. ++ * kbasep_ktrace_msg_init - internal function to initialize just the common ++ * part of a ktrace message ++ * @ktrace: kbase device's ktrace ++ * @trace_msg: ktrace message to initialize ++ * @code: ktrace code ++ * @kctx: kbase context, or NULL if no context ++ * @flags: flags about the message ++ * @info_val: generic information about @code to add to the trace + * -+ * Flush cached HWC dump buffer data to ensure that all writes from GPU and CPU -+ * are correctly observed. ++ * The common part includes the mandatory parts of the backend part + */ -+typedef void -+kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, -+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, -+ u32 buf_index_first, u32 buf_index_last, bool for_cpu); ++void kbasep_ktrace_msg_init(struct kbase_ktrace *ktrace, ++ struct kbase_ktrace_msg *trace_msg, enum kbase_ktrace_code code, ++ struct kbase_context *kctx, kbase_ktrace_flag_t flags, ++ u64 info_val); + -+/** -+ * typedef kbase_hwcnt_backend_csf_if_ring_buf_free_fn - Free a ring buffer for -+ * the CSF interface. -+ * -+ * @ctx: Non-NULL pointer to a CSF interface context. -+ * @ring_buf: Non-NULL pointer to the ring buffer which to be freed. -+ */ -+typedef void -+kbase_hwcnt_backend_csf_if_ring_buf_free_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, -+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf); ++#endif /* KBASE_KTRACE_TARGET_RBUF */ + -+/** -+ * typedef kbase_hwcnt_backend_csf_if_timestamp_ns_fn - Get the current -+ * timestamp of the CSF -+ * interface. -+ * @ctx: Non-NULL pointer to a CSF interface context. ++#endif /* _KBASE_DEBUG_KTRACE_INTERNAL_H_ */ +diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h +new file mode 100644 +index 000000000..6d9664716 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h +@@ -0,0 +1,123 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * Return: CSF interface timestamp in nanoseconds. -+ */ -+typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); -+ -+/** -+ * typedef kbase_hwcnt_backend_csf_if_dump_enable_fn - Setup and enable hardware -+ * counter in CSF interface. -+ * @ctx: Non-NULL pointer to a CSF interface context. -+ * @ring_buf: Non-NULL pointer to the ring buffer which used to setup the HWC. -+ * @enable: Non-NULL pointer to the enable map of HWC. ++ * (C) COPYRIGHT 2014, 2018, 2020-2022 ARM Limited. All rights reserved. + * -+ * Requires lock to be taken before calling. -+ */ -+typedef void -+kbase_hwcnt_backend_csf_if_dump_enable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, -+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, -+ struct kbase_hwcnt_backend_csf_if_enable *enable); -+ -+/** -+ * typedef kbase_hwcnt_backend_csf_if_dump_disable_fn - Disable hardware counter -+ * in CSF interface. -+ * @ctx: Non-NULL pointer to a CSF interface context. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * Requires lock to be taken before calling. -+ */ -+typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); -+ -+/** -+ * typedef kbase_hwcnt_backend_csf_if_dump_request_fn - Request a HWC dump. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + * -+ * @ctx: Non-NULL pointer to the interface context. ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Requires lock to be taken before calling. + */ -+typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); + -+/** -+ * typedef kbase_hwcnt_backend_csf_if_get_indexes_fn - Get current extract and -+ * insert indexes of the -+ * ring buffer. -+ * -+ * @ctx: Non-NULL pointer to a CSF interface context. -+ * @extract_index: Non-NULL pointer where current extract index to be saved. -+ * @insert_index: Non-NULL pointer where current insert index to be saved. -+ * -+ * Requires lock to be taken before calling. ++/* ++ * NOTE: This must **only** be included through mali_linux_trace.h, ++ * otherwise it will fail to setup tracepoints correctly + */ -+typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, -+ u32 *extract_index, u32 *insert_index); + -+/** -+ * typedef kbase_hwcnt_backend_csf_if_set_extract_index_fn - Update the extract -+ * index of the ring -+ * buffer. -+ * -+ * @ctx: Non-NULL pointer to a CSF interface context. -+ * @extract_index: New extract index to be set. -+ * -+ * Requires lock to be taken before calling. -+ */ -+typedef void -+kbase_hwcnt_backend_csf_if_set_extract_index_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, -+ u32 extract_index); ++#if !defined(_KBASE_DEBUG_LINUX_KTRACE_H_) || defined(TRACE_HEADER_MULTI_READ) ++#define _KBASE_DEBUG_LINUX_KTRACE_H_ + -+/** -+ * typedef kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn - Get the current -+ * GPU cycle count. -+ * @ctx: Non-NULL pointer to a CSF interface context. -+ * @cycle_counts: Non-NULL pointer to an array where cycle counts to be saved, -+ * the array size should be at least as big as the number of -+ * clock domains returned by get_prfcnt_info interface. -+ * @clk_enable_map: An array of bitfields, each bit specifies an enabled clock -+ * domain. -+ * -+ * Requires lock to be taken before calling. -+ */ -+typedef void -+kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, -+ u64 *cycle_counts, u64 clk_enable_map); ++#if KBASE_KTRACE_TARGET_FTRACE + -+/** -+ * struct kbase_hwcnt_backend_csf_if - Hardware counter backend CSF virtual -+ * interface. -+ * @ctx: CSF interface context. -+ * @assert_lock_held: Function ptr to assert backend spinlock is held. -+ * @lock: Function ptr to acquire backend spinlock. -+ * @unlock: Function ptr to release backend spinlock. -+ * @get_prfcnt_info: Function ptr to get performance counter related -+ * information. -+ * @ring_buf_alloc: Function ptr to allocate ring buffer for CSF HWC. -+ * @ring_buf_sync: Function ptr to sync ring buffer to CPU. -+ * @ring_buf_free: Function ptr to free ring buffer for CSF HWC. -+ * @timestamp_ns: Function ptr to get the current CSF interface -+ * timestamp. -+ * @dump_enable: Function ptr to enable dumping. -+ * @dump_disable: Function ptr to disable dumping. -+ * @dump_request: Function ptr to request a dump. -+ * @get_indexes: Function ptr to get extract and insert indexes of the -+ * ring buffer. -+ * @set_extract_index: Function ptr to set extract index of ring buffer. -+ * @get_gpu_cycle_count: Function ptr to get the GPU cycle count. ++DECLARE_EVENT_CLASS(mali_add_template, ++ TP_PROTO(struct kbase_context *kctx, u64 info_val), ++ TP_ARGS(kctx, info_val), ++ TP_STRUCT__entry( ++ __field(pid_t, kctx_tgid) ++ __field(u32, kctx_id) ++ __field(u64, info_val) ++ ), ++ TP_fast_assign( ++ __entry->kctx_id = (kctx) ? kctx->id : 0u; ++ __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; ++ __entry->info_val = info_val; ++ ), ++ TP_printk("kctx=%d_%u info=0x%llx", __entry->kctx_tgid, ++ __entry->kctx_id, __entry->info_val) ++); ++ ++/* DEFINE_MALI_ADD_EVENT is available also to backends for backend-specific ++ * simple trace codes + */ -+struct kbase_hwcnt_backend_csf_if { -+ struct kbase_hwcnt_backend_csf_if_ctx *ctx; -+ kbase_hwcnt_backend_csf_if_assert_lock_held_fn *assert_lock_held; -+ kbase_hwcnt_backend_csf_if_lock_fn *lock; -+ kbase_hwcnt_backend_csf_if_unlock_fn *unlock; -+ kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn *get_prfcnt_info; -+ kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn *ring_buf_alloc; -+ kbase_hwcnt_backend_csf_if_ring_buf_sync_fn *ring_buf_sync; -+ kbase_hwcnt_backend_csf_if_ring_buf_free_fn *ring_buf_free; -+ kbase_hwcnt_backend_csf_if_timestamp_ns_fn *timestamp_ns; -+ kbase_hwcnt_backend_csf_if_dump_enable_fn *dump_enable; -+ kbase_hwcnt_backend_csf_if_dump_disable_fn *dump_disable; -+ kbase_hwcnt_backend_csf_if_dump_request_fn *dump_request; -+ kbase_hwcnt_backend_csf_if_get_indexes_fn *get_indexes; -+ kbase_hwcnt_backend_csf_if_set_extract_index_fn *set_extract_index; -+ kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn *get_gpu_cycle_count; -+}; ++#define DEFINE_MALI_ADD_EVENT(name) \ ++DEFINE_EVENT(mali_add_template, mali_##name, \ ++ TP_PROTO(struct kbase_context *kctx, u64 info_val), \ ++ TP_ARGS(kctx, info_val)) ++DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY); ++DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM); ++DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ); ++DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR); ++DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE); ++DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET); ++DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET); ++DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE); ++DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR); ++DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES); ++DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED); ++DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP); ++DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP); ++DEFINE_MALI_ADD_EVENT(PM_PWRON); ++DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER); ++DEFINE_MALI_ADD_EVENT(PM_PWRON_L2); ++DEFINE_MALI_ADD_EVENT(PM_PWROFF); ++DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER); ++DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2); ++DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED); ++DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER); ++DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2); ++DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED); ++DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER); ++DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED); ++DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_TILER_NEEDED); ++DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_NEEDED); ++DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_TILER_NEEDED); ++DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE); ++DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER); ++DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE); ++DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER); ++DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_L2); ++DEFINE_MALI_ADD_EVENT(PM_GPU_ON); ++DEFINE_MALI_ADD_EVENT(PM_GPU_OFF); ++DEFINE_MALI_ADD_EVENT(PM_SET_POLICY); ++DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT); ++DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM); ++DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY); ++DEFINE_MALI_ADD_EVENT(PM_CONTEXT_ACTIVE); ++DEFINE_MALI_ADD_EVENT(PM_CONTEXT_IDLE); ++DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS); ++DEFINE_MALI_ADD_EVENT(PM_POWEROFF_WAIT_WQ); ++DEFINE_MALI_ADD_EVENT(PM_RUNTIME_SUSPEND_CALLBACK); ++DEFINE_MALI_ADD_EVENT(PM_RUNTIME_RESUME_CALLBACK); ++#define KBASEP_L2_STATE(n) DEFINE_MALI_ADD_EVENT(PM_L2_ ## n); ++#include "backend/gpu/mali_kbase_pm_l2_states.h" ++#undef KBASEP_L2_STATE ++DEFINE_MALI_ADD_EVENT(SCHED_RETAIN_CTX_NOLOCK); ++DEFINE_MALI_ADD_EVENT(SCHED_RELEASE_CTX); ++#ifdef CONFIG_MALI_ARBITER_SUPPORT + -+#endif /* #define _KBASE_HWCNT_BACKEND_CSF_IF_H_ */ -diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c ++DEFINE_MALI_ADD_EVENT(ARB_GPU_LOST); ++DEFINE_MALI_ADD_EVENT(ARB_VM_STATE); ++DEFINE_MALI_ADD_EVENT(ARB_VM_EVT); ++ ++#endif ++#if MALI_USE_CSF ++#include "backend/mali_kbase_debug_linux_ktrace_csf.h" ++#else ++#include "backend/mali_kbase_debug_linux_ktrace_jm.h" ++#endif ++ ++#undef DEFINE_MALI_ADD_EVENT ++ ++#endif /* KBASE_KTRACE_TARGET_FTRACE */ ++ ++#endif /* !defined(_KBASE_DEBUG_LINUX_KTRACE_H_) || defined(TRACE_HEADER_MULTI_READ) */ +diff --git a/drivers/gpu/arm/bifrost/device/Kbuild b/drivers/gpu/arm/bifrost/device/Kbuild +new file mode 100755 +index 000000000..723ffd215 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/device/Kbuild +@@ -0,0 +1,33 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# ++ ++bifrost_kbase-y += \ ++ device/mali_kbase_device.o \ ++ device/mali_kbase_device_hw.o ++ ++ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) ++ bifrost_kbase-y += \ ++ device/backend/mali_kbase_device_csf.o \ ++ device/backend/mali_kbase_device_hw_csf.o ++else ++ bifrost_kbase-y += \ ++ device/backend/mali_kbase_device_jm.o \ ++ device/backend/mali_kbase_device_hw_jm.o ++endif +diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c new file mode 100644 -index 000000000..b11f3a4e5 +index 000000000..f7054f5b0 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c -@@ -0,0 +1,787 @@ ++++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c +@@ -0,0 +1,525 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -185108,782 +187249,927 @@ index 000000000..b11f3a4e5 + * + */ + -+/* -+ * CSF GPU HWC backend firmware interface APIs. -+ */ -+ -+#include -+#include ++#include +#include -+#include "hwcnt/mali_kbase_hwcnt_gpu.h" -+#include "hwcnt/mali_kbase_hwcnt_types.h" -+#include + -+#include "csf/mali_kbase_csf_firmware.h" -+#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h" -+#include "mali_kbase_hwaccess_time.h" -+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include +#include + -+#include -+#include "mali_kbase_ccswe.h" -+ -+ -+/* Ring buffer virtual address start at 4GB */ -+#define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32) ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + +/** -+ * struct kbase_hwcnt_backend_csf_if_fw_ring_buf - ring buffer for CSF interface -+ * used to save the manual and -+ * auto HWC samples from -+ * firmware. -+ * @gpu_dump_base: Starting GPU base address of the ring buffer. -+ * @cpu_dump_base: Starting CPU address for the mapping. -+ * @buf_count: Buffer count in the ring buffer, MUST be power of 2. -+ * @as_nr: Address space number for the memory mapping. -+ * @phys: Physical memory allocation used by the mapping. -+ * @num_pages: Size of the mapping, in memory pages. ++ * kbase_device_firmware_hwcnt_term - Terminate CSF firmware and HWC ++ * ++ * @kbdev: An instance of the GPU platform device, allocated from the probe ++ * method of the driver. ++ * ++ * When a kbase driver is removed, terminate CSF firmware and hardware counter ++ * components. + */ -+struct kbase_hwcnt_backend_csf_if_fw_ring_buf { -+ u64 gpu_dump_base; -+ void *cpu_dump_base; -+ size_t buf_count; -+ u32 as_nr; -+ struct tagged_addr *phys; -+ size_t num_pages; -+}; ++static void kbase_device_firmware_hwcnt_term(struct kbase_device *kbdev) ++{ ++ if (kbdev->csf.firmware_inited) { ++ kbase_kinstr_prfcnt_term(kbdev->kinstr_prfcnt_ctx); ++ kbase_vinstr_term(kbdev->vinstr_ctx); ++ kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); ++ kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface); ++ kbase_csf_firmware_unload_term(kbdev); ++ } ++} + +/** -+ * struct kbase_hwcnt_backend_csf_if_fw_ctx - Firmware context for the CSF -+ * interface, used to communicate -+ * with firmware. -+ * @kbdev: KBase device. -+ * @buf_bytes: The size in bytes for each buffer in the ring buffer. -+ * @clk_cnt: The number of clock domains in the system. -+ * The maximum is 64. -+ * @clk_enable_map: Bitmask of enabled clocks -+ * @rate_listener: Clock rate listener callback state. -+ * @ccswe_shader_cores: Shader cores cycle count software estimator. ++ * kbase_backend_late_init - Perform any backend-specific initialization. ++ * @kbdev: Device pointer ++ * ++ * Return: 0 on success, or an error code on failure. + */ -+struct kbase_hwcnt_backend_csf_if_fw_ctx { -+ struct kbase_device *kbdev; -+ size_t buf_bytes; -+ u8 clk_cnt; -+ u64 clk_enable_map; -+ struct kbase_clk_rate_listener rate_listener; -+ struct kbase_ccswe ccswe_shader_cores; -+}; -+ -+static void -+kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_if_ctx *ctx) ++static int kbase_backend_late_init(struct kbase_device *kbdev) +{ -+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; -+ struct kbase_device *kbdev; ++ int err; + -+ WARN_ON(!ctx); ++ err = kbase_hwaccess_pm_init(kbdev); ++ if (err) ++ return err; + -+ fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; -+ kbdev = fw_ctx->kbdev; ++ err = kbase_reset_gpu_init(kbdev); ++ if (err) ++ goto fail_reset_gpu_init; + -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); -+} ++ err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT); ++ if (err) ++ goto fail_pm_powerup; + -+static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, -+ unsigned long *flags) -+ __acquires(&(struct kbase_hwcnt_backend_csf_if_fw_ctx) -+ ctx->kbdev->csf.scheduler.interrupt_lock) -+{ -+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; -+ struct kbase_device *kbdev; ++ err = kbase_backend_timer_init(kbdev); ++ if (err) ++ goto fail_timer; + -+ WARN_ON(!ctx); ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++#if IS_ENABLED(CONFIG_MALI_REAL_HW) ++ if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { ++ dev_err(kbdev->dev, "Interrupt assignment check failed.\n"); ++ err = -EINVAL; ++ goto fail_interrupt_test; ++ } ++#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ + -+ fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; -+ kbdev = fw_ctx->kbdev; ++ kbase_ipa_control_init(kbdev); + -+ kbase_csf_scheduler_spin_lock(kbdev, flags); -+} ++ /* Initialise the metrics subsystem, it couldn't be initialized earlier ++ * due to dependency on kbase_ipa_control. ++ */ ++ err = kbasep_pm_metrics_init(kbdev); ++ if (err) ++ goto fail_pm_metrics_init; + -+static void kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, -+ unsigned long flags) -+ __releases(&(struct kbase_hwcnt_backend_csf_if_fw_ctx) -+ ctx->kbdev->csf.scheduler.interrupt_lock) -+{ -+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; -+ struct kbase_device *kbdev; ++ /* Do the initialisation of devfreq. ++ * Devfreq needs backend_timer_init() for completion of its ++ * initialisation and it also needs to catch the first callback ++ * occurrence of the runtime_suspend event for maintaining state ++ * coherence with the backend power management, hence needs to be ++ * placed before the kbase_pm_context_idle(). ++ */ ++ err = kbase_backend_devfreq_init(kbdev); ++ if (err) ++ goto fail_devfreq_init; + -+ WARN_ON(!ctx); ++ /* Update gpuprops with L2_FEATURES if applicable */ ++ err = kbase_gpuprops_update_l2_features(kbdev); ++ if (err) ++ goto fail_update_l2_features; + -+ fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; -+ kbdev = fw_ctx->kbdev; ++ err = kbase_backend_time_init(kbdev); ++ if (err) ++ goto fail_update_l2_features; + -+ kbase_csf_scheduler_spin_lock_assert_held(kbdev); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); ++ ++ kbase_pm_context_idle(kbdev); ++ ++ mutex_init(&kbdev->fw_load_lock); ++ ++ return 0; ++ ++fail_update_l2_features: ++ kbase_backend_devfreq_term(kbdev); ++fail_devfreq_init: ++ kbasep_pm_metrics_term(kbdev); ++fail_pm_metrics_init: ++ kbase_ipa_control_term(kbdev); ++ ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++#if IS_ENABLED(CONFIG_MALI_REAL_HW) ++fail_interrupt_test: ++#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++ ++ kbase_backend_timer_term(kbdev); ++fail_timer: ++ kbase_pm_context_idle(kbdev); ++ kbase_hwaccess_pm_halt(kbdev); ++fail_pm_powerup: ++ kbase_reset_gpu_term(kbdev); ++fail_reset_gpu_init: ++ kbase_hwaccess_pm_term(kbdev); ++ ++ return err; +} + +/** -+ * kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback -+ * -+ * @rate_listener: Callback state -+ * @clk_index: Clock index -+ * @clk_rate_hz: Clock frequency(hz) ++ * kbase_backend_late_term - Perform any backend-specific termination. ++ * @kbdev: Device pointer + */ -+static void -+kbasep_hwcnt_backend_csf_if_fw_on_freq_change(struct kbase_clk_rate_listener *rate_listener, -+ u32 clk_index, u32 clk_rate_hz) ++static void kbase_backend_late_term(struct kbase_device *kbdev) +{ -+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = container_of( -+ rate_listener, struct kbase_hwcnt_backend_csf_if_fw_ctx, rate_listener); -+ u64 timestamp_ns; -+ -+ if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES) -+ return; -+ -+ timestamp_ns = ktime_get_raw_ns(); -+ kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, clk_rate_hz); ++ kbase_backend_devfreq_term(kbdev); ++ kbasep_pm_metrics_term(kbdev); ++ kbase_ipa_control_term(kbdev); ++ kbase_hwaccess_pm_halt(kbdev); ++ kbase_reset_gpu_term(kbdev); ++ kbase_hwaccess_pm_term(kbdev); +} + +/** -+ * kbasep_hwcnt_backend_csf_if_fw_cc_enable() - Enable cycle count tracking ++ * kbase_csf_early_init - Early initialization for firmware & scheduler. ++ * @kbdev: Device pointer + * -+ * @fw_ctx: Non-NULL pointer to CSF firmware interface context. -+ * @clk_enable_map: Non-NULL pointer to enable map specifying enabled counters. ++ * Return: 0 on success, error code otherwise. + */ -+static void -+kbasep_hwcnt_backend_csf_if_fw_cc_enable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx, -+ u64 clk_enable_map) ++static int kbase_csf_early_init(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev = fw_ctx->kbdev; -+ -+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { -+ /* software estimation for non-top clock domains */ -+ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; -+ const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES]; -+ u32 cur_freq; -+ unsigned long flags; -+ u64 timestamp_ns; -+ -+ timestamp_ns = ktime_get_raw_ns(); -+ -+ spin_lock_irqsave(&rtm->lock, flags); ++ int err = kbase_csf_firmware_early_init(kbdev); + -+ cur_freq = (u32)clk_data->clock_val; -+ kbase_ccswe_reset(&fw_ctx->ccswe_shader_cores); -+ kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, cur_freq); ++ if (err) ++ return err; + -+ kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &fw_ctx->rate_listener); ++ err = kbase_csf_scheduler_early_init(kbdev); + -+ spin_unlock_irqrestore(&rtm->lock, flags); -+ } ++ return err; ++} + -+ fw_ctx->clk_enable_map = clk_enable_map; ++/** ++ * kbase_csf_early_term() - Early termination for firmware & scheduler. ++ * @kbdev: Device pointer ++ */ ++static void kbase_csf_early_term(struct kbase_device *kbdev) ++{ ++ kbase_csf_scheduler_early_term(kbdev); ++ kbase_csf_firmware_early_term(kbdev); +} + +/** -+ * kbasep_hwcnt_backend_csf_if_fw_cc_disable() - Disable cycle count tracking ++ * kbase_csf_late_init - late initialization for firmware. ++ * @kbdev: Device pointer + * -+ * @fw_ctx: Non-NULL pointer to CSF firmware interface context. ++ * Return: 0 on success, error code otherwise. + */ -+static void -+kbasep_hwcnt_backend_csf_if_fw_cc_disable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx) ++static int kbase_csf_late_init(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev = fw_ctx->kbdev; -+ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; -+ u64 clk_enable_map = fw_ctx->clk_enable_map; ++ int err = kbase_csf_firmware_late_init(kbdev); + -+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) -+ kbase_clk_rate_trace_manager_unsubscribe(rtm, &fw_ctx->rate_listener); ++ return err; +} + -+static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( -+ struct kbase_hwcnt_backend_csf_if_ctx *ctx, -+ struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info) -+{ -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = -+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++/** ++ * kbase_device_hwcnt_watchdog_if_init - Create hardware counter watchdog ++ * interface. ++ * @kbdev: Device pointer ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++static int kbase_device_hwcnt_watchdog_if_init(struct kbase_device *kbdev) ++{ ++ return kbase_hwcnt_watchdog_if_timer_create( ++ &kbdev->hwcnt_watchdog_timer); ++} + -+ *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){ -+ .l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS, -+ .core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1, -+ .prfcnt_hw_size = -+ KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE, -+ .prfcnt_fw_size = -+ KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE, -+ .dump_bytes = KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE, -+ .prfcnt_block_size = KBASE_DUMMY_MODEL_BLOCK_SIZE, -+ .clk_cnt = 1, -+ .clearing_samples = true, -+ }; ++/** ++ * kbase_device_hwcnt_watchdog_if_term - Terminate hardware counter watchdog ++ * interface. ++ * @kbdev: Device pointer ++ */ ++static void kbase_device_hwcnt_watchdog_if_term(struct kbase_device *kbdev) ++{ ++ kbase_hwcnt_watchdog_if_timer_destroy(&kbdev->hwcnt_watchdog_timer); ++} + -+ fw_ctx->buf_bytes = prfcnt_info->dump_bytes; -+#else -+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; -+ struct kbase_device *kbdev; -+ u32 prfcnt_size; -+ u32 prfcnt_hw_size; -+ u32 prfcnt_fw_size; -+ u32 prfcnt_block_size = -+ KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * KBASE_HWCNT_VALUE_HW_BYTES; ++/** ++ * kbase_device_hwcnt_backend_csf_if_init - Create hardware counter backend ++ * firmware interface. ++ * @kbdev: Device pointer ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++static int kbase_device_hwcnt_backend_csf_if_init(struct kbase_device *kbdev) ++{ ++ return kbase_hwcnt_backend_csf_if_fw_create( ++ kbdev, &kbdev->hwcnt_backend_csf_if_fw); ++} + -+ WARN_ON(!ctx); -+ WARN_ON(!prfcnt_info); ++/** ++ * kbase_device_hwcnt_backend_csf_if_term - Terminate hardware counter backend ++ * firmware interface. ++ * @kbdev: Device pointer ++ */ ++static void kbase_device_hwcnt_backend_csf_if_term(struct kbase_device *kbdev) ++{ ++ kbase_hwcnt_backend_csf_if_fw_destroy(&kbdev->hwcnt_backend_csf_if_fw); ++} + -+ fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; -+ kbdev = fw_ctx->kbdev; -+ prfcnt_size = kbdev->csf.global_iface.prfcnt_size; -+ prfcnt_hw_size = GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET(prfcnt_size); -+ prfcnt_fw_size = GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(prfcnt_size); -+ fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size; ++/** ++ * kbase_device_hwcnt_backend_csf_init - Create hardware counter backend. ++ * @kbdev: Device pointer ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++static int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev) ++{ ++ return kbase_hwcnt_backend_csf_create( ++ &kbdev->hwcnt_backend_csf_if_fw, ++ KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT, ++ &kbdev->hwcnt_watchdog_timer, &kbdev->hwcnt_gpu_iface); ++} + -+ /* Read the block size if the GPU has the register PRFCNT_FEATURES -+ * which was introduced in architecture version 11.x.7. -+ */ -+ if ((kbdev->gpu_props.props.raw_props.gpu_id & GPU_ID2_PRODUCT_MODEL) >= -+ GPU_ID2_PRODUCT_TTUX) { -+ prfcnt_block_size = PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET( -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES))) -+ << 8; -+ } ++/** ++ * kbase_device_hwcnt_backend_csf_term - Terminate hardware counter backend. ++ * @kbdev: Device pointer ++ */ ++static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev) ++{ ++ kbase_hwcnt_backend_csf_destroy(&kbdev->hwcnt_gpu_iface); ++} + -+ *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){ -+ .prfcnt_hw_size = prfcnt_hw_size, -+ .prfcnt_fw_size = prfcnt_fw_size, -+ .dump_bytes = fw_ctx->buf_bytes, -+ .prfcnt_block_size = prfcnt_block_size, -+ .l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices, -+ .core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask, -+ .clk_cnt = fw_ctx->clk_cnt, -+ .clearing_samples = true, -+ }; ++static const struct kbase_device_init dev_init[] = { ++#if !IS_ENABLED(CONFIG_MALI_REAL_HW) ++ { kbase_gpu_device_create, kbase_gpu_device_destroy, ++ "Dummy model initialization failed" }, ++#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ ++ { assign_irqs, NULL, "IRQ search failed" }, ++#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ ++#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ { registers_map, registers_unmap, "Register map failed" }, ++#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ ++ { power_control_init, power_control_term, "Power control initialization failed" }, ++ { kbase_device_io_history_init, kbase_device_io_history_term, ++ "Register access history initialization failed" }, ++ { kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" }, ++ { kbase_device_populate_max_freq, NULL, "Populating max frequency failed" }, ++ { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" }, ++ { kbase_device_misc_init, kbase_device_misc_term, ++ "Miscellaneous device initialization failed" }, ++ { kbase_device_pcm_dev_init, kbase_device_pcm_dev_term, ++ "Priority control manager initialization failed" }, ++ { kbase_ctx_sched_init, kbase_ctx_sched_term, "Context scheduler initialization failed" }, ++ { kbase_mem_init, kbase_mem_term, "Memory subsystem initialization failed" }, ++ { kbase_csf_protected_memory_init, kbase_csf_protected_memory_term, ++ "Protected memory allocator initialization failed" }, ++ { kbase_device_coherency_init, NULL, "Device coherency init failed" }, ++ { kbase_protected_mode_init, kbase_protected_mode_term, ++ "Protected mode subsystem initialization failed" }, ++ { kbase_device_list_init, kbase_device_list_term, "Device list setup failed" }, ++ { kbase_device_timeline_init, kbase_device_timeline_term, ++ "Timeline stream initialization failed" }, ++ { kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term, ++ "Clock rate trace manager initialization failed" }, ++ { kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term, ++ "GPU hwcnt backend watchdog interface creation failed" }, ++ { kbase_device_hwcnt_backend_csf_if_init, kbase_device_hwcnt_backend_csf_if_term, ++ "GPU hwcnt backend CSF interface creation failed" }, ++ { kbase_device_hwcnt_backend_csf_init, kbase_device_hwcnt_backend_csf_term, ++ "GPU hwcnt backend creation failed" }, ++ { kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term, ++ "GPU hwcnt context initialization failed" }, ++ { kbase_csf_early_init, kbase_csf_early_term, "Early CSF initialization failed" }, ++ { kbase_backend_late_init, kbase_backend_late_term, "Late backend initialization failed" }, ++ { kbase_csf_late_init, NULL, "Late CSF initialization failed" }, ++ { NULL, kbase_device_firmware_hwcnt_term, NULL }, ++ { kbase_debug_csf_fault_init, kbase_debug_csf_fault_term, ++ "CSF fault debug initialization failed" }, ++ { kbase_device_debugfs_init, kbase_device_debugfs_term, "DebugFS initialization failed" }, ++ /* Sysfs init needs to happen before registering the device with ++ * misc_register(), otherwise it causes a race condition between ++ * registering the device and a uevent event being generated for ++ * userspace, causing udev rules to run which might expect certain ++ * sysfs attributes present. As a result of the race condition ++ * we avoid, some Mali sysfs entries may have appeared to udev ++ * to not exist. ++ * For more information, see ++ * https://www.kernel.org/doc/Documentation/driver-model/device.txt, the ++ * paragraph that starts with "Word of warning", currently the ++ * second-last paragraph. ++ */ ++ { kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed" }, ++ { kbase_device_misc_register, kbase_device_misc_deregister, ++ "Misc device registration failed" }, ++ { kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, ++ "GPU property population failed" }, ++ { kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" }, ++#if IS_ENABLED(CONFIG_MALI_CORESIGHT) ++ { kbase_debug_coresight_csf_init, kbase_debug_coresight_csf_term, ++ "Coresight initialization failed" }, ++#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ ++}; + -+ /* Block size must be multiple of counter size. */ -+ WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) != 0); -+ /* Total size must be multiple of block size. */ -+ WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) != 0); -+#endif ++static void kbase_device_term_partial(struct kbase_device *kbdev, ++ unsigned int i) ++{ ++ while (i-- > 0) { ++ if (dev_init[i].term) ++ dev_init[i].term(kbdev); ++ } +} + -+static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( -+ struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, void **cpu_dump_base, -+ struct kbase_hwcnt_backend_csf_if_ring_buf **out_ring_buf) ++void kbase_device_term(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev; -+ struct tagged_addr *phys; -+ struct page **page_list; -+ void *cpu_addr; -+ int ret; -+ int i; -+ size_t num_pages; -+ u64 flags; -+ struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf; -+ -+ pgprot_t cpu_map_prot = PAGE_KERNEL; -+ u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START; ++ kbase_device_term_partial(kbdev, ARRAY_SIZE(dev_init)); ++ kbase_mem_halt(kbdev); ++} + -+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = -+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++int kbase_device_init(struct kbase_device *kbdev) ++{ ++ int err = 0; ++ unsigned int i = 0; + -+ /* Calls to this function are inherently asynchronous, with respect to -+ * MMU operations. -+ */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; ++ dev_info(kbdev->dev, "Kernel DDK version %s", MALI_RELEASE_NAME); + -+ WARN_ON(!ctx); -+ WARN_ON(!cpu_dump_base); -+ WARN_ON(!out_ring_buf); ++ kbase_device_id_init(kbdev); ++ kbase_disjoint_init(kbdev); + -+ kbdev = fw_ctx->kbdev; ++ for (i = 0; i < ARRAY_SIZE(dev_init); i++) { ++ if (dev_init[i].init) { ++ err = dev_init[i].init(kbdev); ++ if (err) { ++ dev_err(kbdev->dev, "%s error = %d\n", ++ dev_init[i].err_mes, err); ++ kbase_device_term_partial(kbdev, i); ++ break; ++ } ++ } ++ } + -+ /* The buffer count must be power of 2 */ -+ if (!is_power_of_2(buf_count)) -+ return -EINVAL; ++ return err; ++} + -+ /* alignment failure */ -+ if (gpu_va_base & (2048 - 1)) -+ return -EINVAL; ++/** ++ * kbase_device_hwcnt_csf_deferred_init - Initialize CSF deferred HWC components ++ * ++ * @kbdev: An instance of the GPU platform device, allocated from the probe ++ * method of the driver. ++ * ++ * Hardware counter components depending on firmware are initialized after CSF ++ * firmware is loaded. ++ * ++ * Return: 0 on success. An error code on failure. ++ */ ++static int kbase_device_hwcnt_csf_deferred_init(struct kbase_device *kbdev) ++{ ++ int ret = 0; + -+ fw_ring_buf = kzalloc(sizeof(*fw_ring_buf), GFP_KERNEL); -+ if (!fw_ring_buf) -+ return -ENOMEM; ++ /* For CSF GPUs, HWC metadata needs to query information from CSF ++ * firmware, so the initialization of HWC metadata only can be called ++ * after firmware initialized, but firmware initialization depends on ++ * HWC backend initialization, so we need to separate HWC backend ++ * metadata initialization from HWC backend initialization. ++ */ ++ ret = kbase_hwcnt_backend_csf_metadata_init(&kbdev->hwcnt_gpu_iface); ++ if (ret) { ++ dev_err(kbdev->dev, ++ "GPU hwcnt backend metadata creation failed"); ++ return ret; ++ } + -+ num_pages = PFN_UP(fw_ctx->buf_bytes * buf_count); -+ phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); -+ if (!phys) -+ goto phys_alloc_error; ++ ret = kbase_hwcnt_virtualizer_init( ++ kbdev->hwcnt_gpu_ctx, ++ KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS, ++ &kbdev->hwcnt_gpu_virt); ++ if (ret) { ++ dev_err(kbdev->dev, ++ "GPU hwcnt virtualizer initialization failed"); ++ goto virt_fail; ++ } + -+ page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL); -+ if (!page_list) -+ goto page_list_alloc_error; ++ ret = kbase_vinstr_init(kbdev->hwcnt_gpu_virt, &kbdev->vinstr_ctx); ++ if (ret) { ++ dev_err(kbdev->dev, ++ "Virtual instrumentation initialization failed"); ++ goto vinstr_fail; ++ } + -+ /* Get physical page for the buffer */ -+ ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, -+ phys, false, NULL); -+ if (ret != num_pages) -+ goto phys_mem_pool_alloc_error; ++ ret = kbase_kinstr_prfcnt_init(kbdev->hwcnt_gpu_virt, ++ &kbdev->kinstr_prfcnt_ctx); ++ if (ret) { ++ dev_err(kbdev->dev, ++ "Performance counter instrumentation initialization failed"); ++ goto kinstr_prfcnt_fail; ++ } + -+ /* Get the CPU virtual address */ -+ for (i = 0; i < num_pages; i++) -+ page_list[i] = as_page(phys[i]); ++ return ret; + -+ cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot); -+ if (!cpu_addr) -+ goto vmap_error; ++kinstr_prfcnt_fail: ++ kbase_vinstr_term(kbdev->vinstr_ctx); + -+ flags = KBASE_REG_GPU_WR | KBASE_REG_GPU_NX | -+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); ++vinstr_fail: ++ kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); + -+ /* Update MMU table */ -+ ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys, -+ num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW, -+ mmu_sync_info, NULL, false); -+ if (ret) -+ goto mmu_insert_failed; ++virt_fail: ++ kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface); ++ return ret; ++} + -+ kfree(page_list); ++/** ++ * kbase_csf_firmware_deferred_init - Load and initialize CSF firmware ++ * ++ * @kbdev: An instance of the GPU platform device, allocated from the probe ++ * method of the driver. ++ * ++ * Called when a device file is opened for the first time. ++ * To meet Android GKI vendor guideline, firmware load is deferred at ++ * the time when @ref kbase_open is called for the first time. ++ * ++ * Return: 0 on success. An error code on failure. ++ */ ++static int kbase_csf_firmware_deferred_init(struct kbase_device *kbdev) ++{ ++ int err = 0; + -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ fw_ring_buf->gpu_dump_base = (uintptr_t)cpu_addr; -+#else -+ fw_ring_buf->gpu_dump_base = gpu_va_base; -+#endif /* CONFIG_MALI_BIFROST_NO_MALI */ -+ fw_ring_buf->cpu_dump_base = cpu_addr; -+ fw_ring_buf->phys = phys; -+ fw_ring_buf->num_pages = num_pages; -+ fw_ring_buf->buf_count = buf_count; -+ fw_ring_buf->as_nr = MCU_AS_NR; ++ lockdep_assert_held(&kbdev->fw_load_lock); + -+ *cpu_dump_base = fw_ring_buf->cpu_dump_base; -+ *out_ring_buf = (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf; ++ err = kbase_csf_firmware_load_init(kbdev); ++ if (!err) { ++ unsigned long flags; + -+ return 0; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->pm.backend.mcu_state = KBASE_MCU_ON; ++ kbdev->csf.firmware_inited = true; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } else { ++ dev_err(kbdev->dev, "Firmware initialization failed"); ++ } + -+mmu_insert_failed: -+ vunmap(cpu_addr); -+vmap_error: -+ kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, phys, -+ false, false); -+phys_mem_pool_alloc_error: -+ kfree(page_list); -+page_list_alloc_error: -+ kfree(phys); -+phys_alloc_error: -+ kfree(fw_ring_buf); -+ return -ENOMEM; ++ return err; +} + -+static void -+kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(struct kbase_hwcnt_backend_csf_if_ctx *ctx, -+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, -+ u32 buf_index_first, u32 buf_index_last, bool for_cpu) ++int kbase_device_firmware_init_once(struct kbase_device *kbdev) +{ -+ struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = -+ (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; -+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = -+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; -+ size_t i; -+ size_t pg_first; -+ size_t pg_last; -+ u64 start_address; -+ u64 stop_address; -+ u32 ring_buf_index_first; -+ u32 ring_buf_index_last; -+ -+ WARN_ON(!ctx); -+ WARN_ON(!ring_buf); -+ -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ /* When using the dummy backend syncing the ring buffer is unnecessary as -+ * the ring buffer is only accessed by the CPU. It may also cause data loss -+ * due to cache invalidation so return early. -+ */ -+ return; -+#endif /* CONFIG_MALI_BIFROST_NO_MALI */ -+ -+ /* The index arguments for this function form an inclusive, exclusive -+ * range. -+ * However, when masking back to the available buffers we will make this -+ * inclusive at both ends so full flushes are not 0 -> 0. -+ */ -+ ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1); -+ ring_buf_index_last = (buf_index_last - 1) & (fw_ring_buf->buf_count - 1); ++ int ret = 0; + -+ /* The start address is the offset of the first buffer. */ -+ start_address = fw_ctx->buf_bytes * ring_buf_index_first; -+ pg_first = start_address >> PAGE_SHIFT; ++ mutex_lock(&kbdev->fw_load_lock); + -+ /* The stop address is the last byte in the final buffer. */ -+ stop_address = (fw_ctx->buf_bytes * (ring_buf_index_last + 1)) - 1; -+ pg_last = stop_address >> PAGE_SHIFT; ++ if (!kbdev->csf.firmware_inited) { ++ kbase_pm_context_active(kbdev); + -+ /* Check whether the buffer range wraps. */ -+ if (start_address > stop_address) { -+ /* sync the first part to the end of ring buffer. */ -+ for (i = pg_first; i < fw_ring_buf->num_pages; i++) { -+ struct page *pg = as_page(fw_ring_buf->phys[i]); ++ ret = kbase_csf_firmware_deferred_init(kbdev); ++ if (ret) ++ goto out; + -+ if (for_cpu) { -+ kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), -+ PAGE_SIZE, DMA_BIDIRECTIONAL); -+ } else { -+ kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg), -+ PAGE_SIZE, DMA_BIDIRECTIONAL); -+ } ++ ret = kbase_device_hwcnt_csf_deferred_init(kbdev); ++ if (ret) { ++ kbase_csf_firmware_unload_term(kbdev); ++ goto out; + } + -+ /* second part starts from page 0. */ -+ pg_first = 0; ++ kbase_csf_debugfs_init(kbdev); ++ kbase_timeline_io_debugfs_init(kbdev); ++out: ++ kbase_pm_context_idle(kbdev); + } + -+ for (i = pg_first; i <= pg_last; i++) { -+ struct page *pg = as_page(fw_ring_buf->phys[i]); ++ mutex_unlock(&kbdev->fw_load_lock); + -+ if (for_cpu) { -+ kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE, -+ DMA_BIDIRECTIONAL); -+ } else { -+ kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE, -+ DMA_BIDIRECTIONAL); -+ } -+ } ++ return ret; +} ++KBASE_EXPORT_TEST_API(kbase_device_firmware_init_once); +diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c +new file mode 100644 +index 000000000..2abd62aaa +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c +@@ -0,0 +1,252 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(struct kbase_hwcnt_backend_csf_if_ctx *ctx) -+{ -+ CSTD_UNUSED(ctx); -+ return ktime_get_raw_ns(); -+} ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+static void -+kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_ctx *ctx, -+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf) ++/** ++ * kbase_report_gpu_fault - Report a GPU fault of the device. ++ * ++ * @kbdev: Kbase device pointer ++ * @status: Fault status ++ * @as_nr: Faulty address space ++ * @as_valid: true if address space is valid ++ * ++ * This function is called from the interrupt handler when a GPU fault occurs. ++ */ ++static void kbase_report_gpu_fault(struct kbase_device *kbdev, u32 status, ++ u32 as_nr, bool as_valid) +{ -+ struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = -+ (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; -+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = -+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; -+ -+ if (!fw_ring_buf) -+ return; ++ u64 address = (u64) kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; + -+ if (fw_ring_buf->phys) { -+ u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START; ++ address |= kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); + -+ WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu, -+ gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys, -+ fw_ring_buf->num_pages, fw_ring_buf->num_pages, -+ MCU_AS_NR, true)); ++ /* Report GPU fault for all contexts in case either ++ * the address space is invalid or it's MCU address space. ++ */ ++ kbase_mmu_gpu_fault_interrupt(kbdev, status, as_nr, address, as_valid); ++} + -+ vunmap(fw_ring_buf->cpu_dump_base); ++static void kbase_gpu_fault_interrupt(struct kbase_device *kbdev) ++{ ++ const u32 status = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FAULTSTATUS)); ++ const bool as_valid = status & GPU_FAULTSTATUS_JASID_VALID_FLAG; ++ const u32 as_nr = (status & GPU_FAULTSTATUS_JASID_MASK) >> ++ GPU_FAULTSTATUS_JASID_SHIFT; ++ bool bus_fault = (status & GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) == ++ GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT; + -+ kbase_mem_pool_free_pages(&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], -+ fw_ring_buf->num_pages, fw_ring_buf->phys, false, false); ++ if (bus_fault) { ++ /* If as_valid, reset gpu when ASID is for MCU. */ ++ if (!as_valid || (as_nr == MCU_AS_NR)) { ++ kbase_report_gpu_fault(kbdev, status, as_nr, as_valid); + -+ kfree(fw_ring_buf->phys); ++ dev_err(kbdev->dev, "GPU bus fault triggering gpu-reset ...\n"); ++ if (kbase_prepare_to_reset_gpu( ++ kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) ++ kbase_reset_gpu(kbdev); ++ } else { ++ /* Handle Bus fault */ ++ if (kbase_mmu_bus_fault_interrupt(kbdev, status, as_nr)) ++ dev_warn(kbdev->dev, ++ "fail to handle GPU bus fault ...\n"); ++ } ++ } else ++ kbase_report_gpu_fault(kbdev, status, as_nr, as_valid); + -+ kfree(fw_ring_buf); -+ } +} + -+static void -+kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx *ctx, -+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, -+ struct kbase_hwcnt_backend_csf_if_enable *enable) ++void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) +{ -+ u32 prfcnt_config; -+ struct kbase_device *kbdev; -+ struct kbase_csf_global_iface *global_iface; -+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = -+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; -+ struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = -+ (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; -+ -+ WARN_ON(!ctx); -+ WARN_ON(!ring_buf); -+ WARN_ON(!enable); -+ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); -+ -+ kbdev = fw_ctx->kbdev; -+ global_iface = &kbdev->csf.global_iface; -+ -+ /* Configure */ -+ prfcnt_config = GLB_PRFCNT_CONFIG_SIZE_SET(0, fw_ring_buf->buf_count); -+ prfcnt_config = GLB_PRFCNT_CONFIG_SET_SELECT_SET(prfcnt_config, enable->counter_set); ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, val); ++ if (val & GPU_FAULT) ++ kbase_gpu_fault_interrupt(kbdev); + -+ /* Configure the ring buffer base address */ -+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID, fw_ring_buf->as_nr); -+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_LO, -+ fw_ring_buf->gpu_dump_base & U32_MAX); -+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_HI, -+ fw_ring_buf->gpu_dump_base >> 32); ++ if (val & GPU_PROTECTED_FAULT) { ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ unsigned long flags; + -+ /* Set extract position to 0 */ -+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_EXTRACT, 0); ++ dev_err_ratelimited(kbdev->dev, "GPU fault in protected mode"); + -+ /* Configure the enable bitmap */ -+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN, enable->fe_bm); -+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN, enable->shader_bm); -+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN, enable->mmu_l2_bm); -+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN, enable->tiler_bm); ++ /* Mask the protected fault interrupt to avoid the potential ++ * deluge of such interrupts. It will be unmasked on GPU reset. ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), ++ GPU_IRQ_REG_ALL & ~GPU_PROTECTED_FAULT); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ /* Configure the HWC set and buffer size */ -+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG, prfcnt_config); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ if (!WARN_ON(!kbase_csf_scheduler_protected_mode_in_use( ++ kbdev))) { ++ struct base_gpu_queue_group_error const ++ err_payload = { .error_type = ++ BASE_GPU_QUEUE_GROUP_ERROR_FATAL, ++ .payload = { ++ .fatal_group = { ++ .status = ++ GPU_EXCEPTION_TYPE_SW_FAULT_0, ++ } } }; + -+ kbdev->csf.hwcnt.enable_pending = true; ++ kbase_debug_csf_fault_notify(kbdev, scheduler->active_protm_grp->kctx, ++ DF_GPU_PROTECTED_FAULT); + -+ /* Unmask the interrupts */ -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, -+ GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK, -+ GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK); -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, -+ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK, -+ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK); -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, -+ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK, -+ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK); -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, -+ GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK, -+ GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK); ++ scheduler->active_protm_grp->faulted = true; ++ kbase_csf_add_group_fatal_error( ++ scheduler->active_protm_grp, &err_payload); ++ kbase_event_wakeup(scheduler->active_protm_grp->kctx); ++ } ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); + -+ /* Enable the HWC */ -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, -+ (1 << GLB_REQ_PRFCNT_ENABLE_SHIFT), -+ GLB_REQ_PRFCNT_ENABLE_MASK); -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ if (kbase_prepare_to_reset_gpu( ++ kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) ++ kbase_reset_gpu(kbdev); + -+ prfcnt_config = kbase_csf_firmware_global_input_read(global_iface, GLB_PRFCNT_CONFIG); ++ /* Defer the clearing to the GPU reset sequence */ ++ val &= ~GPU_PROTECTED_FAULT; ++ } + -+ kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx, enable->clk_enable_map); -+} ++ if (val & RESET_COMPLETED) ++ kbase_pm_reset_done(kbdev); + -+static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(struct kbase_hwcnt_backend_csf_if_ctx *ctx) -+{ -+ struct kbase_device *kbdev; -+ struct kbase_csf_global_iface *global_iface; -+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = -+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ /* Defer clearing CLEAN_CACHES_COMPLETED to kbase_clean_caches_done. ++ * We need to acquire hwaccess_lock to avoid a race condition with ++ * kbase_gpu_cache_flush_and_busy_wait ++ */ ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val & ~CLEAN_CACHES_COMPLETED); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val & ~CLEAN_CACHES_COMPLETED); + -+ WARN_ON(!ctx); -+ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); ++#ifdef KBASE_PM_RUNTIME ++ if (val & DOORBELL_MIRROR) { ++ unsigned long flags; + -+ kbdev = fw_ctx->kbdev; -+ global_iface = &kbdev->csf.global_iface; ++ dev_dbg(kbdev->dev, "Doorbell mirror interrupt received"); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_pm_disable_db_mirror_interrupt(kbdev); ++ kbdev->pm.backend.exit_gpu_sleep_mode = true; ++ kbase_csf_scheduler_invoke_tick(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } ++#endif + -+ /* Disable the HWC */ -+ kbdev->csf.hwcnt.enable_pending = true; -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0, GLB_REQ_PRFCNT_ENABLE_MASK); -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ /* kbase_pm_check_transitions (called by kbase_pm_power_changed) must ++ * be called after the IRQ has been cleared. This is because it might ++ * trigger further power transitions and we don't want to miss the ++ * interrupt raised to notify us that these further transitions have ++ * finished. The same applies to kbase_clean_caches_done() - if another ++ * clean was queued, it might trigger another clean, which might ++ * generate another interrupt which shouldn't be missed. ++ */ + -+ /* mask the interrupts */ -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0, -+ GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK); -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0, -+ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK); -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0, -+ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK); ++ if (val & CLEAN_CACHES_COMPLETED) ++ kbase_clean_caches_done(kbdev); + -+ /* In case we have a previous request in flight when the disable -+ * happens. -+ */ -+ kbdev->csf.hwcnt.request_pending = false; ++ if (val & (POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ)) { ++ kbase_pm_power_changed(kbdev); ++ } else if (val & CLEAN_CACHES_COMPLETED) { ++ /* If cache line evict messages can be lost when shader cores ++ * power down then we need to flush the L2 cache before powering ++ * down cores. When the flush completes, the shaders' state ++ * machine needs to be re-invoked to proceed with powering down ++ * cores. ++ */ ++ if (kbdev->pm.backend.l2_always_on || ++ kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) ++ kbase_pm_power_changed(kbdev); ++ } + -+ kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx); ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val); +} + -+static void kbasep_hwcnt_backend_csf_if_fw_dump_request(struct kbase_hwcnt_backend_csf_if_ctx *ctx) ++#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++bool kbase_is_register_accessible(u32 offset) +{ -+ u32 glb_req; -+ struct kbase_device *kbdev; -+ struct kbase_csf_global_iface *global_iface; -+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = -+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; -+ -+ WARN_ON(!ctx); -+ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); -+ -+ kbdev = fw_ctx->kbdev; -+ global_iface = &kbdev->csf.global_iface; ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ if (((offset >= MCU_SUBSYSTEM_BASE) && (offset < IPA_CONTROL_BASE)) || ++ ((offset >= GPU_CONTROL_MCU_BASE) && (offset < USER_BASE))) { ++ WARN(1, "Invalid register offset 0x%x", offset); ++ return false; ++ } ++#endif + -+ /* Trigger dumping */ -+ kbdev->csf.hwcnt.request_pending = true; -+ glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); -+ glb_req ^= GLB_REQ_PRFCNT_SAMPLE_MASK; -+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, -+ GLB_REQ_PRFCNT_SAMPLE_MASK); -+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ return true; +} ++#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ + -+static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(struct kbase_hwcnt_backend_csf_if_ctx *ctx, -+ u32 *extract_index, u32 *insert_index) ++#if IS_ENABLED(CONFIG_MALI_REAL_HW) ++void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) +{ -+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = -+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ if (WARN_ON(!kbdev->pm.backend.gpu_powered)) ++ return; + -+ WARN_ON(!ctx); -+ WARN_ON(!extract_index); -+ WARN_ON(!insert_index); -+ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); ++ if (WARN_ON(kbdev->dev == NULL)) ++ return; + -+ *extract_index = kbase_csf_firmware_global_input_read(&fw_ctx->kbdev->csf.global_iface, -+ GLB_PRFCNT_EXTRACT); -+ *insert_index = kbase_csf_firmware_global_output(&fw_ctx->kbdev->csf.global_iface, -+ GLB_PRFCNT_INSERT); ++ if (!kbase_is_register_accessible(offset)) ++ return; ++ ++ writel(value, kbdev->reg + offset); ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ if (unlikely(kbdev->io_history.enabled)) ++ kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, ++ value, 1); ++#endif /* CONFIG_DEBUG_FS */ ++ dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value); +} ++KBASE_EXPORT_TEST_API(kbase_reg_write); + -+static void -+kbasep_hwcnt_backend_csf_if_fw_set_extract_index(struct kbase_hwcnt_backend_csf_if_ctx *ctx, -+ u32 extract_idx) ++u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) +{ -+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = -+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ u32 val; + -+ WARN_ON(!ctx); -+ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); ++ if (WARN_ON(!kbdev->pm.backend.gpu_powered)) ++ return 0; + -+ /* Set the raw extract index to release the buffer back to the ring -+ * buffer. -+ */ -+ kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT, -+ extract_idx); -+} ++ if (WARN_ON(kbdev->dev == NULL)) ++ return 0; + -+static void -+kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(struct kbase_hwcnt_backend_csf_if_ctx *ctx, -+ u64 *cycle_counts, u64 clk_enable_map) -+{ -+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = -+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; -+ u8 clk; -+ u64 timestamp_ns = ktime_get_raw_ns(); ++ if (!kbase_is_register_accessible(offset)) ++ return 0; + -+ WARN_ON(!ctx); -+ WARN_ON(!cycle_counts); -+ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); ++ val = readl(kbdev->reg + offset); + -+ for (clk = 0; clk < fw_ctx->clk_cnt; clk++) { -+ if (!(clk_enable_map & (1ull << clk))) -+ continue; ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ if (unlikely(kbdev->io_history.enabled)) ++ kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, ++ val, 0); ++#endif /* CONFIG_DEBUG_FS */ ++ dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val); + -+ if (clk == KBASE_CLOCK_DOMAIN_TOP) { -+ /* Read cycle count for top clock domain. */ -+ kbase_backend_get_gpu_time_norequest(fw_ctx->kbdev, &cycle_counts[clk], -+ NULL, NULL); -+ } else { -+ /* Estimate cycle count for non-top clock domain. */ -+ cycle_counts[clk] = -+ kbase_ccswe_cycle_at(&fw_ctx->ccswe_shader_cores, timestamp_ns); -+ } -+ } ++ return val; +} ++KBASE_EXPORT_TEST_API(kbase_reg_read); ++#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ +diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c +new file mode 100644 +index 000000000..38223af21 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c +@@ -0,0 +1,143 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include + +/** -+ * kbasep_hwcnt_backend_csf_if_fw_ctx_destroy() - Destroy a CSF FW interface context. ++ * kbase_report_gpu_fault - Report a GPU fault. ++ * @kbdev: Kbase device pointer ++ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS ++ * was also set + * -+ * @fw_ctx: Pointer to context to destroy. ++ * This function is called from the interrupt handler when a GPU fault occurs. ++ * It reports the details of the fault using dev_warn(). + */ -+static void -+kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx) ++static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple) +{ -+ if (!fw_ctx) -+ return; ++ u32 status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)); ++ u64 address = (u64) kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; ++ ++ address |= kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); ++ ++ dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx", ++ status, ++ kbase_gpu_exception_name(status & 0xFF), ++ address); ++ if (multiple) ++ dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n"); + -+ kfree(fw_ctx); +} + -+/** -+ * kbasep_hwcnt_backend_csf_if_fw_ctx_create() - Create a CSF Firmware context. -+ * -+ * @kbdev: Non_NULL pointer to kbase device. -+ * @out_ctx: Non-NULL pointer to where info is stored on success. -+ * Return: 0 on success, else error code. -+ */ -+static int -+kbasep_hwcnt_backend_csf_if_fw_ctx_create(struct kbase_device *kbdev, -+ struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx) ++void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) +{ -+ u8 clk; -+ int errcode = -ENOMEM; -+ struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL; ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, val); ++ if (val & GPU_FAULT) ++ kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS); + -+ WARN_ON(!kbdev); -+ WARN_ON(!out_ctx); ++ if (val & RESET_COMPLETED) ++ kbase_pm_reset_done(kbdev); + -+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); -+ if (!ctx) -+ goto error; ++ /* Defer clearing CLEAN_CACHES_COMPLETED to kbase_clean_caches_done. ++ * We need to acquire hwaccess_lock to avoid a race condition with ++ * kbase_gpu_cache_flush_and_busy_wait ++ */ ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val & ~CLEAN_CACHES_COMPLETED); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val & ~CLEAN_CACHES_COMPLETED); + -+ ctx->kbdev = kbdev; ++ /* kbase_instr_hwcnt_sample_done frees the HWCNT pipeline to request another ++ * sample. Therefore this must be called after clearing the IRQ to avoid a ++ * race between clearing and the next sample raising the IRQ again. ++ */ ++ if (val & PRFCNT_SAMPLE_COMPLETED) ++ kbase_instr_hwcnt_sample_done(kbdev); + -+ /* Determine the number of available clock domains. */ -+ for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) { -+ if (kbdev->pm.clk_rtm.clks[clk] == NULL) -+ break; -+ } -+ ctx->clk_cnt = clk; ++ /* kbase_pm_check_transitions (called by kbase_pm_power_changed) must ++ * be called after the IRQ has been cleared. This is because it might ++ * trigger further power transitions and we don't want to miss the ++ * interrupt raised to notify us that these further transitions have ++ * finished. The same applies to kbase_clean_caches_done() - if another ++ * clean was queued, it might trigger another clean, which might ++ * generate another interrupt which shouldn't be missed. ++ */ + -+ ctx->clk_enable_map = 0; -+ kbase_ccswe_init(&ctx->ccswe_shader_cores); -+ ctx->rate_listener.notify = kbasep_hwcnt_backend_csf_if_fw_on_freq_change; ++ if (val & CLEAN_CACHES_COMPLETED) ++ kbase_clean_caches_done(kbdev); + -+ *out_ctx = ctx; ++ if (val & POWER_CHANGED_ALL) { ++ kbase_pm_power_changed(kbdev); ++ } else if (val & CLEAN_CACHES_COMPLETED) { ++ /* If cache line evict messages can be lost when shader cores ++ * power down then we need to flush the L2 cache before powering ++ * down cores. When the flush completes, the shaders' state ++ * machine needs to be re-invoked to proceed with powering down ++ * cores. ++ */ ++ if (kbdev->pm.backend.l2_always_on || ++ kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) ++ kbase_pm_power_changed(kbdev); ++ } + -+ return 0; -+error: -+ kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(ctx); -+ return errcode; ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val); +} + -+void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw) ++#if IS_ENABLED(CONFIG_MALI_REAL_HW) ++void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) +{ -+ if (!if_fw) -+ return; ++ WARN_ON(!kbdev->pm.backend.gpu_powered); + -+ kbasep_hwcnt_backend_csf_if_fw_ctx_destroy( -+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)if_fw->ctx); -+ memset(if_fw, 0, sizeof(*if_fw)); ++ writel(value, kbdev->reg + offset); ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ if (unlikely(kbdev->io_history.enabled)) ++ kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, ++ value, 1); ++#endif /* CONFIG_DEBUG_FS */ ++ dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value); +} ++KBASE_EXPORT_TEST_API(kbase_reg_write); + -+int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev, -+ struct kbase_hwcnt_backend_csf_if *if_fw) ++u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) +{ -+ int errcode; -+ struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL; ++ u32 val; + -+ if (!kbdev || !if_fw) -+ return -EINVAL; ++ WARN_ON(!kbdev->pm.backend.gpu_powered); + -+ errcode = kbasep_hwcnt_backend_csf_if_fw_ctx_create(kbdev, &ctx); -+ if (errcode) -+ return errcode; ++ val = readl(kbdev->reg + offset); + -+ if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx; -+ if_fw->assert_lock_held = kbasep_hwcnt_backend_csf_if_fw_assert_lock_held; -+ if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock; -+ if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock; -+ if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info; -+ if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc; -+ if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync; -+ if_fw->ring_buf_free = kbasep_hwcnt_backend_csf_if_fw_ring_buf_free; -+ if_fw->timestamp_ns = kbasep_hwcnt_backend_csf_if_fw_timestamp_ns; -+ if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable; -+ if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable; -+ if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request; -+ if_fw->get_gpu_cycle_count = kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count; -+ if_fw->get_indexes = kbasep_hwcnt_backend_csf_if_fw_get_indexes; -+ if_fw->set_extract_index = kbasep_hwcnt_backend_csf_if_fw_set_extract_index; ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ if (unlikely(kbdev->io_history.enabled)) ++ kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, ++ val, 0); ++#endif /* CONFIG_DEBUG_FS */ ++ dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val); + -+ return 0; ++ return val; +} -diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h ++KBASE_EXPORT_TEST_API(kbase_reg_read); ++#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ +diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c new file mode 100644 -index 000000000..71d150669 +index 000000000..2d3672383 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h -@@ -0,0 +1,49 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c +@@ -0,0 +1,344 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -185901,44 +188187,339 @@ index 000000000..71d150669 + * + */ + -+/* -+ * Concrete implementation of kbase_hwcnt_backend_csf_if interface for CSF FW ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++#include ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * kbase_backend_late_init - Perform any backend-specific initialization. ++ * @kbdev: Device pointer ++ * ++ * Return: 0 on success, or an error code on failure. + */ ++static int kbase_backend_late_init(struct kbase_device *kbdev) ++{ ++ int err; + -+#ifndef _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ -+#define _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ ++ err = kbase_hwaccess_pm_init(kbdev); ++ if (err) ++ return err; + -+#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h" ++ err = kbase_reset_gpu_init(kbdev); ++ if (err) ++ goto fail_reset_gpu_init; ++ ++ err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT); ++ if (err) ++ goto fail_pm_powerup; ++ ++ err = kbase_backend_timer_init(kbdev); ++ if (err) ++ goto fail_timer; ++ ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++#if IS_ENABLED(CONFIG_MALI_REAL_HW) ++ if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { ++ dev_err(kbdev->dev, "Interrupt assignment check failed.\n"); ++ err = -EINVAL; ++ goto fail_interrupt_test; ++ } ++#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++ ++ err = kbase_job_slot_init(kbdev); ++ if (err) ++ goto fail_job_slot; ++ ++ /* Do the initialisation of devfreq. ++ * Devfreq needs backend_timer_init() for completion of its ++ * initialisation and it also needs to catch the first callback ++ * occurrence of the runtime_suspend event for maintaining state ++ * coherence with the backend power management, hence needs to be ++ * placed before the kbase_pm_context_idle(). ++ */ ++ err = kbase_backend_devfreq_init(kbdev); ++ if (err) ++ goto fail_devfreq_init; ++ ++ /* Update gpuprops with L2_FEATURES if applicable */ ++ err = kbase_gpuprops_update_l2_features(kbdev); ++ if (err) ++ goto fail_update_l2_features; ++ ++ err = kbase_backend_time_init(kbdev); ++ if (err) ++ goto fail_update_l2_features; ++ ++ init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); ++ ++ /* Idle the GPU and/or cores, if the policy wants it to */ ++ kbase_pm_context_idle(kbdev); ++ ++ mutex_init(&kbdev->fw_load_lock); ++ ++ return 0; ++ ++fail_update_l2_features: ++ kbase_backend_devfreq_term(kbdev); ++fail_devfreq_init: ++ kbase_job_slot_term(kbdev); ++fail_job_slot: ++ ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++#if IS_ENABLED(CONFIG_MALI_REAL_HW) ++fail_interrupt_test: ++#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++ ++ kbase_backend_timer_term(kbdev); ++fail_timer: ++ kbase_pm_context_idle(kbdev); ++ kbase_hwaccess_pm_halt(kbdev); ++fail_pm_powerup: ++ kbase_reset_gpu_term(kbdev); ++fail_reset_gpu_init: ++ kbase_hwaccess_pm_term(kbdev); ++ ++ return err; ++} + +/** -+ * kbase_hwcnt_backend_csf_if_fw_create() - Create a firmware CSF interface -+ * of hardware counter backend. -+ * @kbdev: Non-NULL pointer to Kbase device. -+ * @if_fw: Non-NULL pointer to backend interface structure that is filled in on -+ * creation success. -+ * Return: 0 on success, else error code. ++ * kbase_backend_late_term - Perform any backend-specific termination. ++ * @kbdev: Device pointer + */ -+int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev, -+ struct kbase_hwcnt_backend_csf_if *if_fw); ++static void kbase_backend_late_term(struct kbase_device *kbdev) ++{ ++ kbase_backend_devfreq_term(kbdev); ++ kbase_job_slot_halt(kbdev); ++ kbase_job_slot_term(kbdev); ++ kbase_backend_timer_term(kbdev); ++ kbase_hwaccess_pm_halt(kbdev); ++ kbase_reset_gpu_term(kbdev); ++ kbase_hwaccess_pm_term(kbdev); ++} + +/** -+ * kbase_hwcnt_backend_csf_if_fw_destroy() - Destroy a firmware CSF interface of -+ * hardware counter backend. -+ * @if_fw: Pointer to a CSF interface to destroy. ++ * kbase_device_hwcnt_watchdog_if_init - Create hardware counter watchdog ++ * interface. ++ * @kbdev: Device pointer ++ * Return: 0 on success, or an error code on failure. + */ -+void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw); ++static int kbase_device_hwcnt_watchdog_if_init(struct kbase_device *kbdev) ++{ ++ return kbase_hwcnt_watchdog_if_timer_create(&kbdev->hwcnt_watchdog_timer); ++} + -+#endif /* _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ */ -diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c ++/** ++ * kbase_device_hwcnt_watchdog_if_term - Terminate hardware counter watchdog ++ * interface. ++ * @kbdev: Device pointer ++ */ ++static void kbase_device_hwcnt_watchdog_if_term(struct kbase_device *kbdev) ++{ ++ kbase_hwcnt_watchdog_if_timer_destroy(&kbdev->hwcnt_watchdog_timer); ++} ++ ++/** ++ * kbase_device_hwcnt_backend_jm_init - Create hardware counter backend. ++ * @kbdev: Device pointer ++ * Return: 0 on success, or an error code on failure. ++ */ ++static int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev) ++{ ++ return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_jm_backend); ++} ++ ++/** ++ * kbase_device_hwcnt_backend_jm_term - Terminate hardware counter backend. ++ * @kbdev: Device pointer ++ */ ++static void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev) ++{ ++ kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_jm_backend); ++} ++ ++/** ++ * kbase_device_hwcnt_backend_jm_watchdog_init - Create hardware counter watchdog backend. ++ * @kbdev: Device pointer ++ * Return: 0 on success, or an error code on failure. ++ */ ++static int kbase_device_hwcnt_backend_jm_watchdog_init(struct kbase_device *kbdev) ++{ ++ return kbase_hwcnt_backend_jm_watchdog_create(&kbdev->hwcnt_gpu_jm_backend, ++ &kbdev->hwcnt_watchdog_timer, ++ &kbdev->hwcnt_gpu_iface); ++} ++ ++/** ++ * kbase_device_hwcnt_backend_jm_watchdog_term - Terminate hardware counter watchdog backend. ++ * @kbdev: Device pointer ++ */ ++static void kbase_device_hwcnt_backend_jm_watchdog_term(struct kbase_device *kbdev) ++{ ++ kbase_hwcnt_backend_jm_watchdog_destroy(&kbdev->hwcnt_gpu_iface); ++} ++ ++static const struct kbase_device_init dev_init[] = { ++#if !IS_ENABLED(CONFIG_MALI_REAL_HW) ++ { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" }, ++#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ ++ { assign_irqs, NULL, "IRQ search failed" }, ++#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ ++#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ { registers_map, registers_unmap, "Register map failed" }, ++#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ ++ { kbase_device_io_history_init, kbase_device_io_history_term, ++ "Register access history initialization failed" }, ++ { kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" }, ++ { kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" }, ++ { kbase_device_populate_max_freq, NULL, "Populating max frequency failed" }, ++ { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" }, ++ { kbase_device_misc_init, kbase_device_misc_term, ++ "Miscellaneous device initialization failed" }, ++ { kbase_device_pcm_dev_init, kbase_device_pcm_dev_term, ++ "Priority control manager initialization failed" }, ++ { kbase_ctx_sched_init, kbase_ctx_sched_term, "Context scheduler initialization failed" }, ++ { kbase_mem_init, kbase_mem_term, "Memory subsystem initialization failed" }, ++ { kbase_device_coherency_init, NULL, "Device coherency init failed" }, ++ { kbase_protected_mode_init, kbase_protected_mode_term, ++ "Protected mode subsystem initialization failed" }, ++ { kbase_device_list_init, kbase_device_list_term, "Device list setup failed" }, ++ { kbasep_js_devdata_init, kbasep_js_devdata_term, "Job JS devdata initialization failed" }, ++ { kbase_device_timeline_init, kbase_device_timeline_term, ++ "Timeline stream initialization failed" }, ++ { kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term, ++ "Clock rate trace manager initialization failed" }, ++ { kbase_instr_backend_init, kbase_instr_backend_term, ++ "Instrumentation backend initialization failed" }, ++ { kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term, ++ "GPU hwcnt backend watchdog interface creation failed" }, ++ { kbase_device_hwcnt_backend_jm_init, kbase_device_hwcnt_backend_jm_term, ++ "GPU hwcnt backend creation failed" }, ++ { kbase_device_hwcnt_backend_jm_watchdog_init, kbase_device_hwcnt_backend_jm_watchdog_term, ++ "GPU hwcnt watchdog backend creation failed" }, ++ { kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term, ++ "GPU hwcnt context initialization failed" }, ++ { kbase_device_hwcnt_virtualizer_init, kbase_device_hwcnt_virtualizer_term, ++ "GPU hwcnt virtualizer initialization failed" }, ++ { kbase_device_vinstr_init, kbase_device_vinstr_term, ++ "Virtual instrumentation initialization failed" }, ++ { kbase_device_kinstr_prfcnt_init, kbase_device_kinstr_prfcnt_term, ++ "Performance counter instrumentation initialization failed" }, ++ { kbase_backend_late_init, kbase_backend_late_term, "Late backend initialization failed" }, ++ { kbase_debug_job_fault_dev_init, kbase_debug_job_fault_dev_term, ++ "Job fault debug initialization failed" }, ++ { kbase_device_debugfs_init, kbase_device_debugfs_term, "DebugFS initialization failed" }, ++ /* Sysfs init needs to happen before registering the device with ++ * misc_register(), otherwise it causes a race condition between ++ * registering the device and a uevent event being generated for ++ * userspace, causing udev rules to run which might expect certain ++ * sysfs attributes present. As a result of the race condition ++ * we avoid, some Mali sysfs entries may have appeared to udev ++ * to not exist. ++ * For more information, see ++ * https://www.kernel.org/doc/Documentation/driver-model/device.txt, the ++ * paragraph that starts with "Word of warning", currently the ++ * second-last paragraph. ++ */ ++ { kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed" }, ++ { kbase_device_misc_register, kbase_device_misc_deregister, ++ "Misc device registration failed" }, ++ { kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, ++ "GPU property population failed" }, ++ { NULL, kbase_dummy_job_wa_cleanup, NULL }, ++ { kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" }, ++}; ++ ++static void kbase_device_term_partial(struct kbase_device *kbdev, ++ unsigned int i) ++{ ++ while (i-- > 0) { ++ if (dev_init[i].term) ++ dev_init[i].term(kbdev); ++ } ++} ++ ++void kbase_device_term(struct kbase_device *kbdev) ++{ ++ kbase_device_term_partial(kbdev, ARRAY_SIZE(dev_init)); ++ kbasep_js_devdata_halt(kbdev); ++ kbase_mem_halt(kbdev); ++} ++ ++int kbase_device_init(struct kbase_device *kbdev) ++{ ++ int err = 0; ++ unsigned int i = 0; ++ ++ dev_info(kbdev->dev, "Kernel DDK version %s", MALI_RELEASE_NAME); ++ ++ kbase_device_id_init(kbdev); ++ kbase_disjoint_init(kbdev); ++ ++ for (i = 0; i < ARRAY_SIZE(dev_init); i++) { ++ if (dev_init[i].init) { ++ err = dev_init[i].init(kbdev); ++ if (err) { ++ if (err != -EPROBE_DEFER) ++ dev_err(kbdev->dev, "%s error = %d\n", ++ dev_init[i].err_mes, err); ++ kbase_device_term_partial(kbdev, i); ++ break; ++ } ++ } ++ } ++ ++ return err; ++} ++ ++int kbase_device_firmware_init_once(struct kbase_device *kbdev) ++{ ++ int ret = 0; ++ ++ mutex_lock(&kbdev->fw_load_lock); ++ ++ if (!kbdev->dummy_job_wa_loaded) { ++ ret = kbase_dummy_job_wa_load(kbdev); ++ if (!ret) ++ kbdev->dummy_job_wa_loaded = true; ++ } ++ ++ mutex_unlock(&kbdev->fw_load_lock); ++ ++ return ret; ++} +diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c new file mode 100644 -index 000000000..669701c29 +index 000000000..b2b0cfd6c --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c -@@ -0,0 +1,860 @@ ++++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c +@@ -0,0 +1,611 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -185956,855 +188537,606 @@ index 000000000..669701c29 + * + */ + -+#include "hwcnt/backend/mali_kbase_hwcnt_backend_jm.h" -+#include "hwcnt/mali_kbase_hwcnt_gpu.h" -+#include "hwcnt/mali_kbase_hwcnt_types.h" -+#include "mali_kbase.h" -+#include "backend/gpu/mali_kbase_pm_ca.h" -+#include "mali_kbase_hwaccess_instr.h" -+#include "mali_kbase_hwaccess_time.h" -+#include "mali_kbase_ccswe.h" -+#include "backend/gpu/mali_kbase_model_linux.h" -+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" ++/* ++ * Base kernel device APIs ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include "mali_kbase_kinstr_prfcnt.h" ++#include "mali_kbase_vinstr.h" ++#include "hwcnt/mali_kbase_hwcnt_context.h" ++#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" + ++#include "mali_kbase_device.h" ++#include "mali_kbase_device_internal.h" +#include "backend/gpu/mali_kbase_pm_internal.h" ++#include "backend/gpu/mali_kbase_irq_internal.h" ++#include "mali_kbase_regs_history_debugfs.h" ++#include "mali_kbase_pbha.h" + -+/** -+ * struct kbase_hwcnt_backend_jm_info - Information used to create an instance -+ * of a JM hardware counter backend. -+ * @kbdev: KBase device. -+ * @counter_set: The performance counter set to use. -+ * @metadata: Hardware counter metadata. -+ * @dump_bytes: Bytes of GPU memory required to perform a -+ * hardware counter dump. -+ * @hwcnt_gpu_info: Hardware counter block information. -+ */ -+struct kbase_hwcnt_backend_jm_info { -+ struct kbase_device *kbdev; -+ enum kbase_hwcnt_set counter_set; -+ const struct kbase_hwcnt_metadata *metadata; -+ size_t dump_bytes; -+ struct kbase_hwcnt_gpu_info hwcnt_gpu_info; -+}; ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++#include "arbiter/mali_kbase_arbiter_pm.h" ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + -+/** -+ * struct kbase_hwcnt_jm_physical_layout - HWC sample memory physical layout -+ * information. -+ * @fe_cnt: Front end block count. -+ * @tiler_cnt: Tiler block count. -+ * @mmu_l2_cnt: Memory system(MMU and L2 cache) block count. -+ * @shader_cnt: Shader Core block count. -+ * @block_cnt: Total block count (sum of all other block counts). -+ * @shader_avail_mask: Bitmap of all shader cores in the system. -+ * @enable_mask_offset: Offset in array elements of enable mask in each block -+ * starting from the beginning of block. -+ * @headers_per_block: Header size per block. -+ * @counters_per_block: Counters size per block. -+ * @values_per_block: Total size per block. -+ */ -+struct kbase_hwcnt_jm_physical_layout { -+ u8 fe_cnt; -+ u8 tiler_cnt; -+ u8 mmu_l2_cnt; -+ u8 shader_cnt; -+ u8 block_cnt; -+ u64 shader_avail_mask; -+ size_t enable_mask_offset; -+ size_t headers_per_block; -+ size_t counters_per_block; -+ size_t values_per_block; -+}; ++#if defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) + -+/** -+ * struct kbase_hwcnt_backend_jm - Instance of a JM hardware counter backend. -+ * @info: Info used to create the backend. -+ * @kctx: KBase context used for GPU memory allocation and -+ * counter dumping. -+ * @gpu_dump_va: GPU hardware counter dump buffer virtual address. -+ * @cpu_dump_va: CPU mapping of gpu_dump_va. -+ * @vmap: Dump buffer vmap. -+ * @to_user_buf: HWC sample buffer for client user, size -+ * metadata.dump_buf_bytes. -+ * @enabled: True if dumping has been enabled, else false. -+ * @pm_core_mask: PM state sync-ed shaders core mask for the enabled -+ * dumping. -+ * @curr_config: Current allocated hardware resources to correctly map the -+ * source raw dump buffer to the destination dump buffer. -+ * @clk_enable_map: The enable map specifying enabled clock domains. -+ * @cycle_count_elapsed: -+ * Cycle count elapsed for a given sample period. -+ * The top clock cycle, index 0, is read directly from -+ * hardware, but the other clock domains need to be -+ * calculated with software estimation. -+ * @prev_cycle_count: Previous cycle count to calculate the cycle count for -+ * sample period. -+ * @rate_listener: Clock rate listener callback state. -+ * @ccswe_shader_cores: Shader cores cycle count software estimator. -+ * @phys_layout: Physical memory layout information of HWC sample buffer. ++/* Number of register accesses for the buffer that we allocate during ++ * initialization time. The buffer size can be changed later via debugfs. + */ -+struct kbase_hwcnt_backend_jm { -+ const struct kbase_hwcnt_backend_jm_info *info; -+ struct kbase_context *kctx; -+ u64 gpu_dump_va; -+ void *cpu_dump_va; -+ struct kbase_vmap_struct *vmap; -+ u64 *to_user_buf; -+ bool enabled; -+ u64 pm_core_mask; -+ struct kbase_hwcnt_curr_config curr_config; -+ u64 clk_enable_map; -+ u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS]; -+ u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS]; -+ struct kbase_clk_rate_listener rate_listener; -+ struct kbase_ccswe ccswe_shader_cores; -+ struct kbase_hwcnt_jm_physical_layout phys_layout; -+}; ++#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512) ++ ++#endif /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ ++ ++static DEFINE_MUTEX(kbase_dev_list_lock); ++static LIST_HEAD(kbase_dev_list); ++static int kbase_dev_nr; ++ ++struct kbase_device *kbase_device_alloc(void) ++{ ++ return kzalloc(sizeof(struct kbase_device), GFP_KERNEL); ++} + +/** -+ * kbasep_hwcnt_backend_jm_gpu_info_init() - Initialise an info structure used -+ * to create the hwcnt metadata. -+ * @kbdev: Non-NULL pointer to kbase device. -+ * @info: Non-NULL pointer to data structure to be filled in. ++ * kbase_device_all_as_init() - Initialise address space objects of the device. + * -+ * The initialised info struct will only be valid for use while kbdev is valid. ++ * @kbdev: Pointer to kbase device. + * -+ * Return: 0 on success, else error code. ++ * Return: 0 on success otherwise non-zero. + */ -+static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev, -+ struct kbase_hwcnt_gpu_info *info) ++static int kbase_device_all_as_init(struct kbase_device *kbdev) +{ -+ size_t clk; -+ -+ if (!kbdev || !info) -+ return -EINVAL; -+ -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; -+ info->core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; -+ info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; -+#else /* CONFIG_MALI_BIFROST_NO_MALI */ -+ { -+ const struct base_gpu_props *props = &kbdev->gpu_props.props; -+ const size_t l2_count = props->l2_props.num_l2_slices; -+ const size_t core_mask = props->coherency_info.group[0].core_mask; ++ int i, err = 0; + -+ info->l2_count = l2_count; -+ info->core_mask = core_mask; -+ info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; ++ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { ++ err = kbase_mmu_as_init(kbdev, i); ++ if (err) ++ break; + } -+#endif /* CONFIG_MALI_BIFROST_NO_MALI */ + -+ /* Determine the number of available clock domains. */ -+ for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) { -+ if (kbdev->pm.clk_rtm.clks[clk] == NULL) -+ break; ++ if (err) { ++ while (i-- > 0) ++ kbase_mmu_as_term(kbdev, i); + } -+ info->clk_cnt = clk; + -+ return 0; ++ return err; +} + -+static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_info *gpu_info, -+ struct kbase_hwcnt_jm_physical_layout *phys_layout) ++static void kbase_device_all_as_term(struct kbase_device *kbdev) +{ -+ u8 shader_core_cnt; -+ -+ WARN_ON(!gpu_info); -+ WARN_ON(!phys_layout); -+ -+ shader_core_cnt = fls64(gpu_info->core_mask); ++ int i; + -+ *phys_layout = (struct kbase_hwcnt_jm_physical_layout){ -+ .fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT, -+ .tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT, -+ .mmu_l2_cnt = gpu_info->l2_count, -+ .shader_cnt = shader_core_cnt, -+ .block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT + -+ gpu_info->l2_count + shader_core_cnt, -+ .shader_avail_mask = gpu_info->core_mask, -+ .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, -+ .values_per_block = gpu_info->prfcnt_values_per_block, -+ .counters_per_block = -+ gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, -+ .enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER, -+ }; ++ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) ++ kbase_mmu_as_term(kbdev, i); +} + -+static void -+kbasep_hwcnt_backend_jm_dump_sample(const struct kbase_hwcnt_backend_jm *const backend_jm) ++int kbase_device_pcm_dev_init(struct kbase_device *const kbdev) +{ -+ size_t block_idx; -+ const u32 *new_sample_buf = backend_jm->cpu_dump_va; -+ const u32 *new_block = new_sample_buf; -+ u64 *dst_buf = backend_jm->to_user_buf; -+ u64 *dst_block = dst_buf; -+ const size_t values_per_block = backend_jm->phys_layout.values_per_block; -+ const size_t dump_bytes = backend_jm->info->dump_bytes; ++ int err = 0; + -+ for (block_idx = 0; block_idx < backend_jm->phys_layout.block_cnt; block_idx++) { -+ size_t ctr_idx; ++#if IS_ENABLED(CONFIG_OF) ++ struct device_node *prio_ctrl_node; + -+ for (ctr_idx = 0; ctr_idx < values_per_block; ctr_idx++) -+ dst_block[ctr_idx] = new_block[ctr_idx]; ++ /* Check to see whether or not a platform specific priority control manager ++ * is available. ++ */ ++ prio_ctrl_node = of_parse_phandle(kbdev->dev->of_node, ++ "priority-control-manager", 0); ++ if (!prio_ctrl_node) { ++ dev_info(kbdev->dev, ++ "No priority control manager is configured"); ++ } else { ++ struct platform_device *const pdev = ++ of_find_device_by_node(prio_ctrl_node); + -+ new_block += values_per_block; -+ dst_block += values_per_block; ++ if (!pdev) { ++ dev_err(kbdev->dev, ++ "The configured priority control manager was not found"); ++ } else { ++ struct priority_control_manager_device *pcm_dev = ++ platform_get_drvdata(pdev); ++ if (!pcm_dev) { ++ dev_info(kbdev->dev, "Priority control manager is not ready"); ++ err = -EPROBE_DEFER; ++ } else if (!try_module_get(pcm_dev->owner)) { ++ dev_err(kbdev->dev, "Failed to get priority control manager module"); ++ err = -ENODEV; ++ } else { ++ dev_info(kbdev->dev, "Priority control manager successfully loaded"); ++ kbdev->pcm_dev = pcm_dev; ++ } ++ } ++ of_node_put(prio_ctrl_node); + } ++#endif /* CONFIG_OF */ + -+ WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); -+ WARN_ON(dst_block != dst_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); ++ return err; +} + -+/** -+ * kbasep_hwcnt_backend_jm_on_freq_change() - On freq change callback -+ * -+ * @rate_listener: Callback state -+ * @clk_index: Clock index -+ * @clk_rate_hz: Clock frequency(hz) -+ */ -+static void kbasep_hwcnt_backend_jm_on_freq_change(struct kbase_clk_rate_listener *rate_listener, -+ u32 clk_index, u32 clk_rate_hz) ++void kbase_device_pcm_dev_term(struct kbase_device *const kbdev) +{ -+ struct kbase_hwcnt_backend_jm *backend_jm = -+ container_of(rate_listener, struct kbase_hwcnt_backend_jm, rate_listener); -+ u64 timestamp_ns; -+ -+ if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES) -+ return; -+ -+ timestamp_ns = ktime_get_raw_ns(); -+ kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz); ++ if (kbdev->pcm_dev) ++ module_put(kbdev->pcm_dev->owner); +} + ++#define KBASE_PAGES_TO_KIB(pages) (((unsigned int)pages) << (PAGE_SHIFT - 10)) ++ +/** -+ * kbasep_hwcnt_backend_jm_cc_enable() - Enable cycle count tracking ++ * mali_oom_notifier_handler - Mali driver out-of-memory handler + * -+ * @backend_jm: Non-NULL pointer to backend. -+ * @enable_map: Non-NULL pointer to enable map specifying enabled counters. -+ * @timestamp_ns: Timestamp(ns) when HWCNT were enabled. ++ * @nb: notifier block - used to retrieve kbdev pointer ++ * @action: action (unused) ++ * @data: data pointer (unused) ++ * ++ * This function simply lists memory usage by the Mali driver, per GPU device, ++ * for diagnostic purposes. ++ * ++ * Return: NOTIFY_OK on success, NOTIFY_BAD otherwise. + */ -+static void kbasep_hwcnt_backend_jm_cc_enable(struct kbase_hwcnt_backend_jm *backend_jm, -+ const struct kbase_hwcnt_enable_map *enable_map, -+ u64 timestamp_ns) ++static int mali_oom_notifier_handler(struct notifier_block *nb, ++ unsigned long action, void *data) +{ -+ struct kbase_device *kbdev = backend_jm->kctx->kbdev; -+ u64 clk_enable_map = enable_map->clk_enable_map; -+ u64 cycle_count; ++ struct kbase_device *kbdev; ++ struct kbase_context *kctx = NULL; ++ unsigned long kbdev_alloc_total; + -+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) { -+ /* turn on the cycle counter */ -+ kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev); -+ /* Read cycle count for top clock domain. */ -+ kbase_backend_get_gpu_time_norequest(kbdev, &cycle_count, NULL, NULL); ++ if (WARN_ON(nb == NULL)) ++ return NOTIFY_BAD; + -+ backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] = cycle_count; -+ } ++ kbdev = container_of(nb, struct kbase_device, oom_notifier_block); + -+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { -+ /* software estimation for non-top clock domains */ -+ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; -+ const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES]; -+ u32 cur_freq; -+ unsigned long flags; ++ kbdev_alloc_total = ++ KBASE_PAGES_TO_KIB(atomic_read(&(kbdev->memdev.used_pages))); + -+ spin_lock_irqsave(&rtm->lock, flags); ++ dev_err(kbdev->dev, "OOM notifier: dev %s %lu kB\n", kbdev->devname, ++ kbdev_alloc_total); + -+ cur_freq = (u32)clk_data->clock_val; -+ kbase_ccswe_reset(&backend_jm->ccswe_shader_cores); -+ kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, cur_freq); ++ mutex_lock(&kbdev->kctx_list_lock); + -+ kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &backend_jm->rate_listener); ++ list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { ++ struct pid *pid_struct; ++ struct task_struct *task; ++ struct pid *tgid_struct; ++ struct task_struct *tgid_task; + -+ spin_unlock_irqrestore(&rtm->lock, flags); ++ unsigned long task_alloc_total = ++ KBASE_PAGES_TO_KIB(atomic_read(&(kctx->used_pages))); + -+ /* ccswe was reset. The estimated cycle is zero. */ -+ backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0; ++ rcu_read_lock(); ++ pid_struct = find_get_pid(kctx->pid); ++ task = pid_task(pid_struct, PIDTYPE_PID); ++ tgid_struct = find_get_pid(kctx->tgid); ++ tgid_task = pid_task(tgid_struct, PIDTYPE_PID); ++ ++ dev_err(kbdev->dev, ++ "OOM notifier: tsk %s:%s tgid (%u) pid (%u) %lu kB\n", ++ tgid_task ? tgid_task->comm : "[null task]", ++ task ? task->comm : "[null comm]", kctx->tgid, ++ kctx->pid, task_alloc_total); ++ ++ put_pid(pid_struct); ++ rcu_read_unlock(); + } + -+ /* Keep clk_enable_map for dump_request. */ -+ backend_jm->clk_enable_map = clk_enable_map; ++ mutex_unlock(&kbdev->kctx_list_lock); ++ return NOTIFY_OK; +} + -+/** -+ * kbasep_hwcnt_backend_jm_cc_disable() - Disable cycle count tracking -+ * -+ * @backend_jm: Non-NULL pointer to backend. -+ */ -+static void kbasep_hwcnt_backend_jm_cc_disable(struct kbase_hwcnt_backend_jm *backend_jm) ++int kbase_device_misc_init(struct kbase_device * const kbdev) +{ -+ struct kbase_device *kbdev = backend_jm->kctx->kbdev; -+ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; -+ u64 clk_enable_map = backend_jm->clk_enable_map; ++ int err; ++#if IS_ENABLED(CONFIG_ARM64) ++ struct device_node *np = NULL; ++#endif /* CONFIG_ARM64 */ + -+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) { -+ /* turn off the cycle counter */ -+ kbase_pm_release_gpu_cycle_counter(kbdev); ++ spin_lock_init(&kbdev->mmu_mask_change); ++ mutex_init(&kbdev->mmu_hw_mutex); ++#if IS_ENABLED(CONFIG_ARM64) ++ kbdev->cci_snoop_enabled = false; ++ np = kbdev->dev->of_node; ++ if (np != NULL) { ++ if (of_property_read_u32(np, "snoop_enable_smc", ++ &kbdev->snoop_enable_smc)) ++ kbdev->snoop_enable_smc = 0; ++ if (of_property_read_u32(np, "snoop_disable_smc", ++ &kbdev->snoop_disable_smc)) ++ kbdev->snoop_disable_smc = 0; ++ /* Either both or none of the calls should be provided. */ ++ if (!((kbdev->snoop_disable_smc == 0 ++ && kbdev->snoop_enable_smc == 0) ++ || (kbdev->snoop_disable_smc != 0 ++ && kbdev->snoop_enable_smc != 0))) { ++ WARN_ON(1); ++ err = -EINVAL; ++ goto fail; ++ } + } ++#endif /* CONFIG_ARM64 */ + -+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { -+ kbase_clk_rate_trace_manager_unsubscribe(rtm, &backend_jm->rate_listener); -+ } -+} ++ /* Get the list of workarounds for issues on the current HW ++ * (identified by the GPU_ID register) ++ */ ++ err = kbase_hw_set_issues_mask(kbdev); ++ if (err) ++ goto fail; + -+/** -+ * kbasep_hwcnt_gpu_update_curr_config() - Update the destination buffer with -+ * current config information. -+ * @kbdev: Non-NULL pointer to kbase device. -+ * @curr_config: Non-NULL pointer to return the current configuration of -+ * hardware allocated to the GPU. -+ * -+ * The current configuration information is used for architectures where the -+ * max_config interface is available from the Arbiter. In this case the current -+ * allocated hardware is not always the same, so the current config information -+ * is used to correctly map the current allocated resources to the memory layout -+ * that is copied to the user space. -+ * -+ * Return: 0 on success, else error code. -+ */ -+static int kbasep_hwcnt_gpu_update_curr_config(struct kbase_device *kbdev, -+ struct kbase_hwcnt_curr_config *curr_config) -+{ -+ if (WARN_ON(!kbdev) || WARN_ON(!curr_config)) -+ return -EINVAL; ++ /* Set the list of features available on the current HW ++ * (identified by the GPU_ID register) ++ */ ++ kbase_hw_set_features_mask(kbdev); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ err = kbase_gpuprops_set_features(kbdev); ++ if (err) ++ goto fail; + -+ curr_config->num_l2_slices = kbdev->gpu_props.curr_config.l2_slices; -+ curr_config->shader_present = kbdev->gpu_props.curr_config.shader_present; -+ return 0; -+} ++ /* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our ++ * device structure was created by device-tree ++ */ ++ if (!kbdev->dev->dma_mask) ++ kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask; + -+/* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ -+static u64 kbasep_hwcnt_backend_jm_timestamp_ns(struct kbase_hwcnt_backend *backend) -+{ -+ (void)backend; -+ return ktime_get_raw_ns(); -+} ++ err = dma_set_mask(kbdev->dev, ++ DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); ++ if (err) ++ goto dma_set_mask_failed; + -+/* JM backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ -+static int -+kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend, -+ const struct kbase_hwcnt_enable_map *enable_map) -+{ -+ int errcode; -+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; -+ struct kbase_context *kctx; -+ struct kbase_device *kbdev; -+ struct kbase_hwcnt_physical_enable_map phys_enable_map; -+ enum kbase_hwcnt_physical_set phys_counter_set; -+ struct kbase_instr_hwcnt_enable enable; -+ u64 timestamp_ns; ++ err = dma_set_coherent_mask(kbdev->dev, ++ DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); ++ if (err) ++ goto dma_set_mask_failed; + -+ if (!backend_jm || !enable_map || backend_jm->enabled || -+ (enable_map->metadata != backend_jm->info->metadata)) -+ return -EINVAL; + -+ kctx = backend_jm->kctx; -+ kbdev = backend_jm->kctx->kbdev; ++ /* There is no limit for Mali, so set to max. */ ++ if (kbdev->dev->dma_parms) ++ err = dma_set_max_seg_size(kbdev->dev, UINT_MAX); ++ if (err) ++ goto dma_set_mask_failed; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces; + -+ kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map); ++ err = kbase_device_all_as_init(kbdev); ++ if (err) ++ goto dma_set_mask_failed; + -+ kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_jm->info->counter_set); ++ err = kbase_pbha_read_dtb(kbdev); ++ if (err) ++ goto term_as; + -+ enable.fe_bm = phys_enable_map.fe_bm; -+ enable.shader_bm = phys_enable_map.shader_bm; -+ enable.tiler_bm = phys_enable_map.tiler_bm; -+ enable.mmu_l2_bm = phys_enable_map.mmu_l2_bm; -+ enable.counter_set = phys_counter_set; -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ /* The dummy model needs the CPU mapping. */ -+ enable.dump_buffer = (uintptr_t)backend_jm->cpu_dump_va; -+#else -+ enable.dump_buffer = backend_jm->gpu_dump_va; -+#endif /* CONFIG_MALI_BIFROST_NO_MALI */ -+ enable.dump_buffer_bytes = backend_jm->info->dump_bytes; ++ init_waitqueue_head(&kbdev->cache_clean_wait); + -+ timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); ++ kbase_debug_assert_register_hook(&kbase_ktrace_hook_wrapper, kbdev); + -+ /* Update the current configuration information. */ -+ errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config); -+ if (errcode) -+ goto error; ++ atomic_set(&kbdev->ctx_num, 0); + -+ errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable); -+ if (errcode) -+ goto error; ++ kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD; + -+ backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev); ++#if MALI_USE_CSF ++ kbdev->reset_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT); ++#else ++ kbdev->reset_timeout_ms = JM_DEFAULT_RESET_TIMEOUT_MS; ++#endif /* MALI_USE_CSF */ + -+ backend_jm->enabled = true; ++ kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); ++ kbdev->mmu_as_inactive_wait_time_ms = ++ kbase_get_timeout_ms(kbdev, MMU_AS_INACTIVE_WAIT_TIMEOUT); ++ mutex_init(&kbdev->kctx_list_lock); ++ INIT_LIST_HEAD(&kbdev->kctx_list); + -+ kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns); ++ dev_dbg(kbdev->dev, "Registering mali_oom_notifier_handlern"); ++ kbdev->oom_notifier_block.notifier_call = mali_oom_notifier_handler; ++ err = register_oom_notifier(&kbdev->oom_notifier_block); ++ ++ if (err) { ++ dev_err(kbdev->dev, ++ "Unable to register OOM notifier for Mali - but will continue\n"); ++ kbdev->oom_notifier_block.notifier_call = NULL; ++ } ++ ++#if !MALI_USE_CSF ++ spin_lock_init(&kbdev->quick_reset_lock); ++ kbdev->quick_reset_enabled = true; ++ kbdev->num_of_atoms_hw_completed = 0; ++#endif + ++#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) ++ atomic_set(&kbdev->live_fence_metadata, 0); ++#endif + return 0; -+error: -+ return errcode; ++ ++term_as: ++ kbase_device_all_as_term(kbdev); ++dma_set_mask_failed: ++fail: ++ return err; +} + -+/* JM backend implementation of kbase_hwcnt_backend_dump_enable_fn */ -+static int kbasep_hwcnt_backend_jm_dump_enable(struct kbase_hwcnt_backend *backend, -+ const struct kbase_hwcnt_enable_map *enable_map) ++void kbase_device_misc_term(struct kbase_device *kbdev) +{ -+ unsigned long flags; -+ int errcode; -+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; -+ struct kbase_device *kbdev; -+ -+ if (!backend_jm) -+ return -EINVAL; ++ KBASE_DEBUG_ASSERT(kbdev); + -+ kbdev = backend_jm->kctx->kbdev; ++ WARN_ON(!list_empty(&kbdev->kctx_list)); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++#if KBASE_KTRACE_ENABLE ++ kbase_debug_assert_register_hook(NULL, NULL); ++#endif ++ kbase_device_all_as_term(kbdev); + -+ errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock(backend, enable_map); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (kbdev->oom_notifier_block.notifier_call) ++ unregister_oom_notifier(&kbdev->oom_notifier_block); + -+ return errcode; ++#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) ++ if (atomic_read(&kbdev->live_fence_metadata) > 0) ++ dev_warn(kbdev->dev, "Terminating Kbase device with live fence metadata!"); ++#endif +} + -+/* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */ -+static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *backend) ++#if !MALI_USE_CSF ++void kbase_enable_quick_reset(struct kbase_device *kbdev) +{ -+ int errcode; -+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; ++ spin_lock(&kbdev->quick_reset_lock); + -+ if (WARN_ON(!backend_jm) || !backend_jm->enabled) -+ return; ++ kbdev->quick_reset_enabled = true; ++ kbdev->num_of_atoms_hw_completed = 0; + -+ kbasep_hwcnt_backend_jm_cc_disable(backend_jm); ++ spin_unlock(&kbdev->quick_reset_lock); ++} + -+ errcode = kbase_instr_hwcnt_disable_internal(backend_jm->kctx); -+ WARN_ON(errcode); ++void kbase_disable_quick_reset(struct kbase_device *kbdev) ++{ ++ spin_lock(&kbdev->quick_reset_lock); + -+ backend_jm->enabled = false; ++ kbdev->quick_reset_enabled = false; ++ kbdev->num_of_atoms_hw_completed = 0; ++ ++ spin_unlock(&kbdev->quick_reset_lock); +} + -+/* JM backend implementation of kbase_hwcnt_backend_dump_clear_fn */ -+static int kbasep_hwcnt_backend_jm_dump_clear(struct kbase_hwcnt_backend *backend) ++bool kbase_is_quick_reset_enabled(struct kbase_device *kbdev) +{ -+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; ++ return kbdev->quick_reset_enabled; ++} ++#endif + -+ if (!backend_jm || !backend_jm->enabled) -+ return -EINVAL; ++void kbase_device_free(struct kbase_device *kbdev) ++{ ++ kfree(kbdev); ++} + -+ return kbase_instr_hwcnt_clear(backend_jm->kctx); ++void kbase_device_id_init(struct kbase_device *kbdev) ++{ ++ scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, ++ kbase_dev_nr); ++ kbdev->id = kbase_dev_nr; +} + -+/* JM backend implementation of kbase_hwcnt_backend_dump_request_fn */ -+static int kbasep_hwcnt_backend_jm_dump_request(struct kbase_hwcnt_backend *backend, -+ u64 *dump_time_ns) ++void kbase_increment_device_id(void) +{ -+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; -+ struct kbase_device *kbdev; -+ const struct kbase_hwcnt_metadata *metadata; -+ u64 current_cycle_count; -+ size_t clk; -+ int ret; -+ -+ if (!backend_jm || !backend_jm->enabled || !dump_time_ns) -+ return -EINVAL; -+ -+ kbdev = backend_jm->kctx->kbdev; -+ metadata = backend_jm->info->metadata; -+ -+ /* Disable pre-emption, to make the timestamp as accurate as possible */ -+ preempt_disable(); -+ { -+ *dump_time_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); -+ ret = kbase_instr_hwcnt_request_dump(backend_jm->kctx); -+ -+ kbase_hwcnt_metadata_for_each_clock(metadata, clk) -+ { -+ if (!kbase_hwcnt_clk_enable_map_enabled(backend_jm->clk_enable_map, clk)) -+ continue; -+ -+ if (clk == KBASE_CLOCK_DOMAIN_TOP) { -+ /* Read cycle count for top clock domain. */ -+ kbase_backend_get_gpu_time_norequest(kbdev, ¤t_cycle_count, -+ NULL, NULL); -+ } else { -+ /* -+ * Estimate cycle count for non-top clock -+ * domain. -+ */ -+ current_cycle_count = kbase_ccswe_cycle_at( -+ &backend_jm->ccswe_shader_cores, *dump_time_ns); -+ } -+ backend_jm->cycle_count_elapsed[clk] = -+ current_cycle_count - backend_jm->prev_cycle_count[clk]; -+ -+ /* -+ * Keep the current cycle count for later calculation. -+ */ -+ backend_jm->prev_cycle_count[clk] = current_cycle_count; -+ } -+ } -+ preempt_enable(); -+ -+ return ret; ++ kbase_dev_nr++; +} + -+/* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */ -+static int kbasep_hwcnt_backend_jm_dump_wait(struct kbase_hwcnt_backend *backend) ++int kbase_device_hwcnt_context_init(struct kbase_device *kbdev) +{ -+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; -+ -+ if (!backend_jm || !backend_jm->enabled) -+ return -EINVAL; -+ -+ return kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx); ++ return kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface, ++ &kbdev->hwcnt_gpu_ctx); +} + -+/* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */ -+static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend, -+ struct kbase_hwcnt_dump_buffer *dst, -+ const struct kbase_hwcnt_enable_map *dst_enable_map, -+ bool accumulate) ++void kbase_device_hwcnt_context_term(struct kbase_device *kbdev) +{ -+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; -+ size_t clk; -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ struct kbase_device *kbdev; -+ unsigned long flags; -+ int errcode; -+#endif /* CONFIG_MALI_BIFROST_NO_MALI */ -+ -+ if (!backend_jm || !dst || !dst_enable_map || -+ (backend_jm->info->metadata != dst->metadata) || -+ (dst_enable_map->metadata != dst->metadata)) -+ return -EINVAL; -+ -+ /* Invalidate the kernel buffer before reading from it. */ -+ kbase_sync_mem_regions(backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU); -+ -+ /* Dump sample to the internal 64-bit user buffer. */ -+ kbasep_hwcnt_backend_jm_dump_sample(backend_jm); -+ -+ /* Extract elapsed cycle count for each clock domain if enabled. */ -+ kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) -+ { -+ if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) -+ continue; -+ -+ /* Reset the counter to zero if accumulation is off. */ -+ if (!accumulate) -+ dst->clk_cnt_buf[clk] = 0; -+ dst->clk_cnt_buf[clk] += backend_jm->cycle_count_elapsed[clk]; -+ } -+ -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ kbdev = backend_jm->kctx->kbdev; -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ -+ /* Update the current configuration information. */ -+ errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config); -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ if (errcode) -+ return errcode; -+#endif /* CONFIG_MALI_BIFROST_NO_MALI */ -+ return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map, -+ backend_jm->pm_core_mask, &backend_jm->curr_config, -+ accumulate); ++ kbase_hwcnt_context_term(kbdev->hwcnt_gpu_ctx); +} + -+/** -+ * kbasep_hwcnt_backend_jm_dump_alloc() - Allocate a GPU dump buffer. -+ * @info: Non-NULL pointer to JM backend info. -+ * @kctx: Non-NULL pointer to kbase context. -+ * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address -+ * is stored on success. -+ * -+ * Return: 0 on success, else error code. -+ */ -+static int kbasep_hwcnt_backend_jm_dump_alloc(const struct kbase_hwcnt_backend_jm_info *info, -+ struct kbase_context *kctx, u64 *gpu_dump_va) ++int kbase_device_hwcnt_virtualizer_init(struct kbase_device *kbdev) +{ -+ struct kbase_va_region *reg; -+ u64 flags; -+ u64 nr_pages; -+ -+ /* Calls to this function are inherently asynchronous, with respect to -+ * MMU operations. -+ */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; -+ -+ WARN_ON(!info); -+ WARN_ON(!kctx); -+ WARN_ON(!gpu_dump_va); -+ -+ flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR | BASEP_MEM_PERMANENT_KERNEL_MAPPING | -+ BASE_MEM_CACHED_CPU | BASE_MEM_UNCACHED_GPU; -+ -+ nr_pages = PFN_UP(info->dump_bytes); -+ -+ reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va, mmu_sync_info); -+ -+ if (!reg) -+ return -ENOMEM; -+ -+ return 0; ++ return kbase_hwcnt_virtualizer_init(kbdev->hwcnt_gpu_ctx, ++ KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS, ++ &kbdev->hwcnt_gpu_virt); +} + -+/** -+ * kbasep_hwcnt_backend_jm_dump_free() - Free an allocated GPU dump buffer. -+ * @kctx: Non-NULL pointer to kbase context. -+ * @gpu_dump_va: GPU dump buffer virtual address. -+ */ -+static void kbasep_hwcnt_backend_jm_dump_free(struct kbase_context *kctx, u64 gpu_dump_va) ++void kbase_device_hwcnt_virtualizer_term(struct kbase_device *kbdev) +{ -+ WARN_ON(!kctx); -+ if (gpu_dump_va) -+ kbase_mem_free(kctx, gpu_dump_va); ++ kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); +} + -+/** -+ * kbasep_hwcnt_backend_jm_destroy() - Destroy a JM backend. -+ * @backend: Pointer to JM backend to destroy. -+ * -+ * Can be safely called on a backend in any state of partial construction. -+ */ -+static void kbasep_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_jm *backend) ++int kbase_device_timeline_init(struct kbase_device *kbdev) +{ -+ if (!backend) -+ return; -+ -+ if (backend->kctx) { -+ struct kbase_context *kctx = backend->kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ -+ if (backend->cpu_dump_va) -+ kbase_phy_alloc_mapping_put(kctx, backend->vmap); -+ -+ if (backend->gpu_dump_va) -+ kbasep_hwcnt_backend_jm_dump_free(kctx, backend->gpu_dump_va); -+ -+ kbasep_js_release_privileged_ctx(kbdev, kctx); -+ kbase_destroy_context(kctx); -+ } -+ -+ kfree(backend->to_user_buf); -+ -+ kfree(backend); ++ atomic_set(&kbdev->timeline_flags, 0); ++ return kbase_timeline_init(&kbdev->timeline, &kbdev->timeline_flags); +} + -+/** -+ * kbasep_hwcnt_backend_jm_create() - Create a JM backend. -+ * @info: Non-NULL pointer to backend info. -+ * @out_backend: Non-NULL pointer to where backend is stored on success. -+ * -+ * Return: 0 on success, else error code. -+ */ -+static int kbasep_hwcnt_backend_jm_create(const struct kbase_hwcnt_backend_jm_info *info, -+ struct kbase_hwcnt_backend_jm **out_backend) ++void kbase_device_timeline_term(struct kbase_device *kbdev) +{ -+ int errcode; -+ struct kbase_device *kbdev; -+ struct kbase_hwcnt_backend_jm *backend = NULL; -+ -+ WARN_ON(!info); -+ WARN_ON(!out_backend); -+ -+ kbdev = info->kbdev; -+ -+ backend = kzalloc(sizeof(*backend), GFP_KERNEL); -+ if (!backend) -+ goto alloc_error; -+ -+ backend->info = info; -+ kbasep_hwcnt_backend_jm_init_layout(&info->hwcnt_gpu_info, &backend->phys_layout); -+ -+ backend->kctx = kbase_create_context(kbdev, true, -+ BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL); -+ if (!backend->kctx) -+ goto alloc_error; -+ -+ kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx); -+ -+ errcode = kbasep_hwcnt_backend_jm_dump_alloc(info, backend->kctx, &backend->gpu_dump_va); -+ if (errcode) -+ goto error; -+ -+ backend->cpu_dump_va = -+ kbase_phy_alloc_mapping_get(backend->kctx, backend->gpu_dump_va, &backend->vmap); -+ if (!backend->cpu_dump_va || !backend->vmap) -+ goto alloc_error; ++ kbase_timeline_term(kbdev->timeline); ++} + -+ backend->to_user_buf = kzalloc(info->metadata->dump_buf_bytes, GFP_KERNEL); -+ if (!backend->to_user_buf) -+ goto alloc_error; ++int kbase_device_vinstr_init(struct kbase_device *kbdev) ++{ ++ return kbase_vinstr_init(kbdev->hwcnt_gpu_virt, &kbdev->vinstr_ctx); ++} + -+ kbase_ccswe_init(&backend->ccswe_shader_cores); -+ backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change; ++void kbase_device_vinstr_term(struct kbase_device *kbdev) ++{ ++ kbase_vinstr_term(kbdev->vinstr_ctx); ++} + -+ *out_backend = backend; -+ return 0; ++int kbase_device_kinstr_prfcnt_init(struct kbase_device *kbdev) ++{ ++ return kbase_kinstr_prfcnt_init(kbdev->hwcnt_gpu_virt, ++ &kbdev->kinstr_prfcnt_ctx); ++} + -+alloc_error: -+ errcode = -ENOMEM; -+error: -+ kbasep_hwcnt_backend_jm_destroy(backend); -+ return errcode; ++void kbase_device_kinstr_prfcnt_term(struct kbase_device *kbdev) ++{ ++ kbase_kinstr_prfcnt_term(kbdev->kinstr_prfcnt_ctx); +} + -+/* JM backend implementation of kbase_hwcnt_backend_metadata_fn */ -+static const struct kbase_hwcnt_metadata * -+kbasep_hwcnt_backend_jm_metadata(const struct kbase_hwcnt_backend_info *info) ++int kbase_device_io_history_init(struct kbase_device *kbdev) +{ -+ if (!info) -+ return NULL; ++ return kbase_io_history_init(&kbdev->io_history, ++ KBASEP_DEFAULT_REGISTER_HISTORY_SIZE); ++} + -+ return ((const struct kbase_hwcnt_backend_jm_info *)info)->metadata; ++void kbase_device_io_history_term(struct kbase_device *kbdev) ++{ ++ kbase_io_history_term(&kbdev->io_history); +} + -+/* JM backend implementation of kbase_hwcnt_backend_init_fn */ -+static int kbasep_hwcnt_backend_jm_init(const struct kbase_hwcnt_backend_info *info, -+ struct kbase_hwcnt_backend **out_backend) ++int kbase_device_misc_register(struct kbase_device *kbdev) +{ -+ int errcode; -+ struct kbase_hwcnt_backend_jm *backend = NULL; ++ return misc_register(&kbdev->mdev); ++} + -+ if (!info || !out_backend) -+ return -EINVAL; ++void kbase_device_misc_deregister(struct kbase_device *kbdev) ++{ ++ misc_deregister(&kbdev->mdev); ++} + -+ errcode = kbasep_hwcnt_backend_jm_create((const struct kbase_hwcnt_backend_jm_info *)info, -+ &backend); -+ if (errcode) -+ return errcode; ++int kbase_device_list_init(struct kbase_device *kbdev) ++{ ++ const struct list_head *dev_list; + -+ *out_backend = (struct kbase_hwcnt_backend *)backend; ++ dev_list = kbase_device_get_list(); ++ list_add(&kbdev->entry, &kbase_dev_list); ++ kbase_device_put_list(dev_list); + + return 0; +} + -+/* JM backend implementation of kbase_hwcnt_backend_term_fn */ -+static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend) ++void kbase_device_list_term(struct kbase_device *kbdev) +{ -+ if (!backend) -+ return; ++ const struct list_head *dev_list; + -+ kbasep_hwcnt_backend_jm_dump_disable(backend); -+ kbasep_hwcnt_backend_jm_destroy((struct kbase_hwcnt_backend_jm *)backend); ++ dev_list = kbase_device_get_list(); ++ list_del(&kbdev->entry); ++ kbase_device_put_list(dev_list); +} + -+/** -+ * kbasep_hwcnt_backend_jm_info_destroy() - Destroy a JM backend info. -+ * @info: Pointer to info to destroy. -+ * -+ * Can be safely called on a backend info in any state of partial construction. -+ */ -+static void kbasep_hwcnt_backend_jm_info_destroy(const struct kbase_hwcnt_backend_jm_info *info) ++const struct list_head *kbase_device_get_list(void) +{ -+ if (!info) -+ return; ++ mutex_lock(&kbase_dev_list_lock); ++ return &kbase_dev_list; ++} ++KBASE_EXPORT_TEST_API(kbase_device_get_list); + -+ kbase_hwcnt_jm_metadata_destroy(info->metadata); -+ kfree(info); ++void kbase_device_put_list(const struct list_head *dev_list) ++{ ++ mutex_unlock(&kbase_dev_list_lock); +} ++KBASE_EXPORT_TEST_API(kbase_device_put_list); + -+/** -+ * kbasep_hwcnt_backend_jm_info_create() - Create a JM backend info. -+ * @kbdev: Non_NULL pointer to kbase device. -+ * @out_info: Non-NULL pointer to where info is stored on success. -+ * -+ * Return: 0 on success, else error code. -+ */ -+static int kbasep_hwcnt_backend_jm_info_create(struct kbase_device *kbdev, -+ const struct kbase_hwcnt_backend_jm_info **out_info) ++int kbase_device_early_init(struct kbase_device *kbdev) +{ -+ int errcode = -ENOMEM; -+ struct kbase_hwcnt_backend_jm_info *info = NULL; ++ int err; + -+ WARN_ON(!kbdev); -+ WARN_ON(!out_info); ++ err = kbase_ktrace_init(kbdev); ++ if (err) ++ return err; + -+ info = kzalloc(sizeof(*info), GFP_KERNEL); -+ if (!info) -+ return errcode; + -+ info->kbdev = kbdev; ++ err = kbasep_platform_device_init(kbdev); ++ if (err) ++ goto ktrace_term; + -+#ifdef CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY -+ info->counter_set = KBASE_HWCNT_SET_SECONDARY; -+#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) -+ info->counter_set = KBASE_HWCNT_SET_TERTIARY; -+#else -+ /* Default to primary */ -+ info->counter_set = KBASE_HWCNT_SET_PRIMARY; -+#endif ++ err = kbase_pm_runtime_init(kbdev); ++ if (err) ++ goto fail_runtime_pm; + -+ errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev, &info->hwcnt_gpu_info); -+ if (errcode) -+ goto error; ++ /* This spinlock is initialized before doing the first access to GPU ++ * registers and installing interrupt handlers. ++ */ ++ spin_lock_init(&kbdev->hwaccess_lock); + -+ errcode = kbase_hwcnt_jm_metadata_create(&info->hwcnt_gpu_info, info->counter_set, -+ &info->metadata, &info->dump_bytes); -+ if (errcode) -+ goto error; ++ /* Ensure we can access the GPU registers */ ++ kbase_pm_register_access_enable(kbdev); + -+ *out_info = info; ++ /* ++ * Find out GPU properties based on the GPU feature registers. ++ * Note that this does not populate the few properties that depend on ++ * hw_features being initialized. Those are set by kbase_gpuprops_set_features ++ * soon after this in the init process. ++ */ ++ kbase_gpuprops_set(kbdev); ++ ++ /* We're done accessing the GPU registers for now. */ ++ kbase_pm_register_access_disable(kbdev); ++ ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (kbdev->arb.arb_if) ++ err = kbase_arbiter_pm_install_interrupts(kbdev); ++ else ++ err = kbase_install_interrupts(kbdev); ++#else ++ err = kbase_install_interrupts(kbdev); ++#endif ++ if (err) ++ goto fail_interrupts; + + return 0; -+error: -+ kbasep_hwcnt_backend_jm_info_destroy(info); -+ return errcode; -+} + -+int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev, -+ struct kbase_hwcnt_backend_interface *iface) -+{ -+ int errcode; -+ const struct kbase_hwcnt_backend_jm_info *info = NULL; ++fail_interrupts: ++ kbase_pm_runtime_term(kbdev); ++fail_runtime_pm: ++ kbasep_platform_device_term(kbdev); ++ktrace_term: ++ kbase_ktrace_term(kbdev); + -+ if (!kbdev || !iface) -+ return -EINVAL; ++ return err; ++} + -+ errcode = kbasep_hwcnt_backend_jm_info_create(kbdev, &info); ++void kbase_device_early_term(struct kbase_device *kbdev) ++{ ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (kbdev->arb.arb_if) ++ kbase_arbiter_pm_release_interrupts(kbdev); ++ else ++ kbase_release_interrupts(kbdev); ++#else ++ kbase_release_interrupts(kbdev); ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ kbase_pm_runtime_term(kbdev); ++ kbasep_platform_device_term(kbdev); ++ kbase_ktrace_term(kbdev); ++} + -+ if (errcode) -+ return errcode; ++int kbase_device_late_init(struct kbase_device *kbdev) ++{ ++ int err; + -+ iface->info = (struct kbase_hwcnt_backend_info *)info; -+ iface->metadata = kbasep_hwcnt_backend_jm_metadata; -+ iface->init = kbasep_hwcnt_backend_jm_init; -+ iface->term = kbasep_hwcnt_backend_jm_term; -+ iface->timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns; -+ iface->dump_enable = kbasep_hwcnt_backend_jm_dump_enable; -+ iface->dump_enable_nolock = kbasep_hwcnt_backend_jm_dump_enable_nolock; -+ iface->dump_disable = kbasep_hwcnt_backend_jm_dump_disable; -+ iface->dump_clear = kbasep_hwcnt_backend_jm_dump_clear; -+ iface->dump_request = kbasep_hwcnt_backend_jm_dump_request; -+ iface->dump_wait = kbasep_hwcnt_backend_jm_dump_wait; -+ iface->dump_get = kbasep_hwcnt_backend_jm_dump_get; ++ err = kbasep_platform_device_late_init(kbdev); + -+ return 0; ++ return err; +} + -+void kbase_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_interface *iface) ++void kbase_device_late_term(struct kbase_device *kbdev) +{ -+ if (!iface) -+ return; -+ -+ kbasep_hwcnt_backend_jm_info_destroy( -+ (const struct kbase_hwcnt_backend_jm_info *)iface->info); -+ memset(iface, 0, sizeof(*iface)); ++ kbasep_platform_device_late_term(kbdev); +} -diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h +diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.h b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h new file mode 100644 -index 000000000..4a6293c25 +index 000000000..f02501100 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h -@@ -0,0 +1,58 @@ ++++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h +@@ -0,0 +1,227 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -186822,888 +189154,519 @@ index 000000000..4a6293c25 + * + */ + -+/* -+ * Concrete implementation of mali_kbase_hwcnt_backend interface for JM -+ * backend. -+ */ -+ -+#ifndef _KBASE_HWCNT_BACKEND_JM_H_ -+#define _KBASE_HWCNT_BACKEND_JM_H_ -+ -+#include "hwcnt/backend/mali_kbase_hwcnt_backend.h" ++#include + -+struct kbase_device; ++/** ++ * kbase_device_get_list - get device list. ++ * Get access to device list. ++ * ++ * Return: Pointer to the linked list head. ++ */ ++const struct list_head *kbase_device_get_list(void); + +/** -+ * kbase_hwcnt_backend_jm_create() - Create a JM hardware counter backend -+ * interface. -+ * @kbdev: Non-NULL pointer to kbase device. -+ * @iface: Non-NULL pointer to backend interface structure that is filled in -+ * on creation success. ++ * kbase_device_put_list - put device list. + * -+ * Calls to iface->dump_enable_nolock() require kbdev->hwaccess_lock held. ++ * @dev_list: head of linked list containing device list. + * -+ * Return: 0 on success, else error code. ++ * Put access to the device list. + */ -+int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev, -+ struct kbase_hwcnt_backend_interface *iface); ++void kbase_device_put_list(const struct list_head *dev_list); + +/** -+ * kbase_hwcnt_backend_jm_destroy() - Destroy a JM hardware counter backend -+ * interface. -+ * @iface: Pointer to interface to destroy. ++ * kbase_increment_device_id - increment device id. + * -+ * Can be safely called on an all-zeroed interface, or on an already destroyed -+ * interface. ++ * Used to increment device id on successful initialization of the device. + */ -+void kbase_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_interface *iface); ++void kbase_increment_device_id(void); + -+#endif /* _KBASE_HWCNT_BACKEND_JM_H_ */ -diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c -new file mode 100644 -index 000000000..564700b2d ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c -@@ -0,0 +1,829 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* ++/** ++ * kbase_device_firmware_init_once - Initialize firmware and HWC + * -+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * @kbdev: An instance of the GPU platform device, allocated from the probe ++ * method of the driver. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * When a device file is opened for the first time, ++ * load firmware and initialize hardware counter components. + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * Return: 0 on success. An error code on failure. ++ */ ++int kbase_device_firmware_init_once(struct kbase_device *kbdev); ++ ++/** ++ * kbase_device_init - Device initialisation. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * ++ * This is called from device probe to initialise various other ++ * components needed. ++ * ++ * Return: 0 on success and non-zero value on failure. + */ ++int kbase_device_init(struct kbase_device *kbdev); + -+#include -+ -+#include -+#include -+ -+#include -+#include -+#include -+ -+#if IS_ENABLED(CONFIG_MALI_IS_FPGA) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+/* Backend watch dog timer interval in milliseconds: 18 seconds. */ -+static const u32 hwcnt_backend_watchdog_timer_interval_ms = 18000; -+#else -+/* Backend watch dog timer interval in milliseconds: 1 second. */ -+static const u32 hwcnt_backend_watchdog_timer_interval_ms = 1000; -+#endif /* IS_FPGA && !NO_MALI */ -+ -+/* -+ * IDLE_BUFFER_EMPTY -> USER_DUMPING_BUFFER_EMPTY on dump_request. -+ * IDLE_BUFFER_EMPTY -> TIMER_DUMPING after -+ * hwcnt_backend_watchdog_timer_interval_ms -+ * milliseconds, if no dump_request has been -+ * called in the meantime. -+ * IDLE_BUFFER_FULL -> USER_DUMPING_BUFFER_FULL on dump_request. -+ * IDLE_BUFFER_FULL -> TIMER_DUMPING after -+ * hwcnt_backend_watchdog_timer_interval_ms -+ * milliseconds, if no dump_request has been -+ * called in the meantime. -+ * IDLE_BUFFER_FULL -> IDLE_BUFFER_EMPTY on dump_disable, upon discarding undumped -+ * counter values since the last dump_get. -+ * IDLE_BUFFER_EMPTY -> BUFFER_CLEARING on dump_clear, before calling job manager -+ * backend dump_clear. -+ * IDLE_BUFFER_FULL -> BUFFER_CLEARING on dump_clear, before calling job manager -+ * backend dump_clear. -+ * USER_DUMPING_BUFFER_EMPTY -> BUFFER_CLEARING on dump_clear, before calling job manager -+ * backend dump_clear. -+ * USER_DUMPING_BUFFER_FULL -> BUFFER_CLEARING on dump_clear, before calling job manager -+ * backend dump_clear. -+ * BUFFER_CLEARING -> IDLE_BUFFER_EMPTY on dump_clear, upon job manager backend -+ * dump_clear completion. -+ * TIMER_DUMPING -> IDLE_BUFFER_FULL on timer's callback completion. -+ * TIMER_DUMPING -> TIMER_DUMPING_USER_CLEAR on dump_clear, notifies the callback thread -+ * that there is no need for dumping the buffer -+ * anymore, and that the client will proceed -+ * clearing the buffer. -+ * TIMER_DUMPING_USER_CLEAR -> IDLE_BUFFER_EMPTY on timer's callback completion, when a user -+ * requested a dump_clear. -+ * TIMER_DUMPING -> TIMER_DUMPING_USER_REQUESTED on dump_request, when a client performs a -+ * dump request while the timer is dumping (the -+ * timer will perform the dump and (once -+ * completed) the client will retrieve the value -+ * from the buffer). -+ * TIMER_DUMPING_USER_REQUESTED -> IDLE_BUFFER_EMPTY on dump_get, when a timer completed and the -+ * user reads the periodic dump buffer. -+ * Any -> ERROR if the job manager backend returns an error -+ * (of any kind). -+ * USER_DUMPING_BUFFER_EMPTY -> IDLE_BUFFER_EMPTY on dump_get (performs get, ignores the -+ * periodic dump buffer and returns). -+ * USER_DUMPING_BUFFER_FULL -> IDLE_BUFFER_EMPTY on dump_get (performs get, accumulates with -+ * periodic dump buffer and returns). ++/** ++ * kbase_device_term - Device termination. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * This is called from device remove to terminate various components that ++ * were initialised during kbase_device_init. + */ ++void kbase_device_term(struct kbase_device *kbdev); + -+/** enum backend_watchdog_state State used to synchronize timer callbacks with the main thread. -+ * @HWCNT_JM_WD_ERROR: Received an error from the job manager backend calls. -+ * @HWCNT_JM_WD_IDLE_BUFFER_EMPTY: Initial state. Watchdog timer enabled, periodic dump buffer is -+ * empty. -+ * @HWCNT_JM_WD_IDLE_BUFFER_FULL: Watchdog timer enabled, periodic dump buffer is full. -+ * @HWCNT_JM_WD_BUFFER_CLEARING: The client is performing a dump clear. A concurrent timer callback -+ * thread should just ignore and reschedule another callback in -+ * hwcnt_backend_watchdog_timer_interval_ms milliseconds. -+ * @HWCNT_JM_WD_TIMER_DUMPING: The timer ran out. The callback is performing a periodic dump. -+ * @HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED: While the timer is performing a periodic dump, user -+ * requested a dump. -+ * @HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR: While the timer is performing a dump, user requested a -+ * dump_clear. The timer has to complete the periodic dump -+ * and clear buffer (internal and job manager backend). -+ * @HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY: From IDLE state, user requested a dump. The periodic -+ * dump buffer is empty. -+ * @HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL: From IDLE state, user requested a dump. The periodic dump -+ * buffer is full. ++/** ++ * kbase_reg_write - write to GPU register ++ * @kbdev: Kbase device pointer ++ * @offset: Offset of register ++ * @value: Value to write + * -+ * While the state machine is in HWCNT_JM_WD_TIMER_DUMPING*, only the timer callback thread is -+ * allowed to call the job manager backend layer. ++ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). + */ -+enum backend_watchdog_state { -+ HWCNT_JM_WD_ERROR, -+ HWCNT_JM_WD_IDLE_BUFFER_EMPTY, -+ HWCNT_JM_WD_IDLE_BUFFER_FULL, -+ HWCNT_JM_WD_BUFFER_CLEARING, -+ HWCNT_JM_WD_TIMER_DUMPING, -+ HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED, -+ HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR, -+ HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY, -+ HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL, -+}; ++void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value); + -+/** enum wd_init_state - State machine for initialization / termination of the backend resources ++/** ++ * kbase_reg_read - read from GPU register ++ * @kbdev: Kbase device pointer ++ * @offset: Offset of register ++ * ++ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). ++ * ++ * Return: Value in desired register + */ -+enum wd_init_state { -+ HWCNT_JM_WD_INIT_START, -+ HWCNT_JM_WD_INIT_BACKEND = HWCNT_JM_WD_INIT_START, -+ HWCNT_JM_WD_INIT_ENABLE_MAP, -+ HWCNT_JM_WD_INIT_DUMP_BUFFER, -+ HWCNT_JM_WD_INIT_END -+}; ++u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset); + +/** -+ * struct kbase_hwcnt_backend_jm_watchdog_info - Immutable information used to initialize an -+ * instance of the job manager watchdog backend. -+ * @jm_backend_iface: Hardware counter backend interface. This module extends -+ * this interface with a watchdog that performs regular -+ * dumps. The new interface this module provides complies -+ * with the old backend interface. -+ * @dump_watchdog_iface: Dump watchdog interface, used to periodically dump the -+ * hardware counter in case no reads are requested within -+ * a certain time, used to avoid hardware counter's buffer -+ * saturation. ++ * kbase_is_gpu_removed() - Has the GPU been removed. ++ * @kbdev: Kbase device pointer ++ * ++ * When Kbase takes too long to give up the GPU, the Arbiter ++ * can remove it. This will then be followed by a GPU lost event. ++ * This function will return true if the GPU has been removed. ++ * When this happens register reads will be zero. A zero GPU_ID is ++ * invalid so this is used to detect when GPU is removed. ++ * ++ * Return: True if GPU removed + */ -+struct kbase_hwcnt_backend_jm_watchdog_info { -+ struct kbase_hwcnt_backend_interface *jm_backend_iface; -+ struct kbase_hwcnt_watchdog_interface *dump_watchdog_iface; -+}; ++bool kbase_is_gpu_removed(struct kbase_device *kbdev); + +/** -+ * struct kbase_hwcnt_backend_jm_watchdog - An instance of the job manager watchdog backend. -+ * @info: Immutable information used to create the job manager watchdog backend. -+ * @jm_backend: Job manager's backend internal state. To be passed as argument during parent calls. -+ * @timeout_ms: Time period in milliseconds for hardware counters dumping. -+ * @wd_dump_buffer: Used to store periodic dumps done by a timer callback function. Contents are -+ * valid in state %HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED, -+ * %HWCNT_JM_WD_IDLE_BUFFER_FULL or %HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL. -+ * @wd_enable_map: Watchdog backend internal buffer mask, initialized during dump_enable copying -+ * the enable_map passed as argument. -+ * @wd_dump_timestamp: Holds the dumping timestamp for potential future client dump_request, filled -+ * during watchdog timer dumps. -+ * @watchdog_complete: Used for synchronization between watchdog dumper thread and client calls. -+ * @locked: Members protected from concurrent access by different threads. -+ * @locked.watchdog_lock: Lock used to access fields within this struct (that require mutual -+ * exclusion). -+ * @locked.is_enabled: If true then the wrapped job manager hardware counter backend and the -+ * watchdog timer are both enabled. If false then both are disabled (or soon -+ * will be). Races between enable and disable have undefined behavior. -+ * @locked.state: State used to synchronize timer callbacks with the main thread. ++ * kbase_gpu_cache_flush_pa_range_and_busy_wait() - Start a cache physical range flush ++ * and busy wait ++ * ++ * @kbdev: kbase device to issue the MMU operation on. ++ * @phys: Starting address of the physical range to start the operation on. ++ * @nr_bytes: Number of bytes to work on. ++ * @flush_op: Flush command register value to be sent to HW ++ * ++ * Issue a cache flush physical range command, then busy wait an irq status. ++ * This function will clear FLUSH_PA_RANGE_COMPLETED irq mask bit ++ * and busy-wait the rawstat register. ++ * ++ * Return: 0 if successful or a negative error code on failure. + */ -+struct kbase_hwcnt_backend_jm_watchdog { -+ const struct kbase_hwcnt_backend_jm_watchdog_info *info; -+ struct kbase_hwcnt_backend *jm_backend; -+ u32 timeout_ms; -+ struct kbase_hwcnt_dump_buffer wd_dump_buffer; -+ struct kbase_hwcnt_enable_map wd_enable_map; -+ u64 wd_dump_timestamp; -+ struct completion watchdog_complete; -+ struct { -+ spinlock_t watchdog_lock; -+ bool is_enabled; -+ enum backend_watchdog_state state; -+ } locked; -+}; -+ -+/* timer's callback function */ -+static void kbasep_hwcnt_backend_jm_watchdog_timer_callback(void *backend) -+{ -+ struct kbase_hwcnt_backend_jm_watchdog *wd_backend = backend; -+ unsigned long flags; -+ bool wd_accumulate; -+ -+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); -+ -+ if (!wd_backend->locked.is_enabled || wd_backend->locked.state == HWCNT_JM_WD_ERROR) { -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); -+ return; -+ } -+ -+ if (!(wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_EMPTY || -+ wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_FULL)) { -+ /*resetting the timer. Calling modify on a disabled timer enables it.*/ -+ wd_backend->info->dump_watchdog_iface->modify( -+ wd_backend->info->dump_watchdog_iface->timer, wd_backend->timeout_ms); -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); -+ return; -+ } -+ /*start performing the dump*/ -+ -+ /* if there has been a previous timeout use accumulating dump_get() -+ * otherwise use non-accumulating to overwrite buffer -+ */ -+ wd_accumulate = (wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_FULL); -+ -+ wd_backend->locked.state = HWCNT_JM_WD_TIMER_DUMPING; -+ -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); -+ -+ if (wd_backend->info->jm_backend_iface->dump_request(wd_backend->jm_backend, -+ &wd_backend->wd_dump_timestamp) || -+ wd_backend->info->jm_backend_iface->dump_wait(wd_backend->jm_backend) || -+ wd_backend->info->jm_backend_iface->dump_get( -+ wd_backend->jm_backend, &wd_backend->wd_dump_buffer, &wd_backend->wd_enable_map, -+ wd_accumulate)) { -+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); -+ WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING && -+ wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR && -+ wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED); -+ wd_backend->locked.state = HWCNT_JM_WD_ERROR; -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); -+ /* Unblock user if it's waiting. */ -+ complete_all(&wd_backend->watchdog_complete); -+ return; -+ } -+ -+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); -+ WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING && -+ wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR && -+ wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED); -+ -+ if (wd_backend->locked.state == HWCNT_JM_WD_TIMER_DUMPING) { -+ /* If there is no user request/clear, transit to HWCNT_JM_WD_IDLE_BUFFER_FULL -+ * to indicate timer dump is done and the buffer is full. If state changed to -+ * HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED or -+ * HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR then user will transit the state -+ * machine to next state. -+ */ -+ wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_FULL; -+ } -+ if (wd_backend->locked.state != HWCNT_JM_WD_ERROR && wd_backend->locked.is_enabled) { -+ /* reset the timer to schedule another callback. Calling modify on a -+ * disabled timer enables it. -+ */ -+ /*The spin lock needs to be held in case the client calls dump_enable*/ -+ wd_backend->info->dump_watchdog_iface->modify( -+ wd_backend->info->dump_watchdog_iface->timer, wd_backend->timeout_ms); -+ } -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); -+ -+ /* Unblock user if it's waiting. */ -+ complete_all(&wd_backend->watchdog_complete); -+} -+ -+/* helper methods, info structure creation and destruction*/ -+ -+static struct kbase_hwcnt_backend_jm_watchdog_info * -+kbasep_hwcnt_backend_jm_watchdog_info_create(struct kbase_hwcnt_backend_interface *backend_iface, -+ struct kbase_hwcnt_watchdog_interface *watchdog_iface) -+{ -+ struct kbase_hwcnt_backend_jm_watchdog_info *const info = -+ kmalloc(sizeof(*info), GFP_KERNEL); -+ -+ if (!info) -+ return NULL; -+ -+ *info = (struct kbase_hwcnt_backend_jm_watchdog_info){ .jm_backend_iface = backend_iface, -+ .dump_watchdog_iface = -+ watchdog_iface }; ++#if MALI_USE_CSF ++int kbase_gpu_cache_flush_pa_range_and_busy_wait(struct kbase_device *kbdev, phys_addr_t phys, ++ size_t nr_bytes, u32 flush_op); ++#endif /* MALI_USE_CSF */ + -+ return info; -+} ++/** ++ * kbase_gpu_cache_flush_and_busy_wait - Start a cache flush and busy wait ++ * @kbdev: Kbase device ++ * @flush_op: Flush command register value to be sent to HW ++ * ++ * Issue a cache flush command to hardware, then busy wait an irq status. ++ * This function will clear CLEAN_CACHES_COMPLETED irq mask bit set by other ++ * threads through kbase_gpu_start_cache_clean(), and wake them up manually ++ * after the busy-wait is done. Any pended cache flush commands raised by ++ * other thread are handled in this function. ++ * hwaccess_lock must be held by the caller. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, ++ u32 flush_op); + -+/****** kbase_hwcnt_backend_interface implementation *******/ ++/** ++ * kbase_gpu_start_cache_clean - Start a cache clean ++ * @kbdev: Kbase device ++ * @flush_op: Flush command register value to be sent to HW ++ * ++ * Issue a given cache flush command to hardware. ++ * This function will take hwaccess_lock. ++ */ ++void kbase_gpu_start_cache_clean(struct kbase_device *kbdev, u32 flush_op); + -+/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_metadata_fn */ -+static const struct kbase_hwcnt_metadata * -+kbasep_hwcnt_backend_jm_watchdog_metadata(const struct kbase_hwcnt_backend_info *info) -+{ -+ const struct kbase_hwcnt_backend_jm_watchdog_info *wd_info = (void *)info; ++/** ++ * kbase_gpu_start_cache_clean_nolock - Start a cache clean ++ * @kbdev: Kbase device ++ * @flush_op: Flush command register value to be sent to HW ++ * ++ * Issue a given cache flush command to hardware. ++ * hwaccess_lock must be held by the caller. ++ */ ++void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev, ++ u32 flush_op); + -+ if (WARN_ON(!info)) -+ return NULL; ++/** ++ * kbase_gpu_wait_cache_clean - Wait for cache cleaning to finish ++ * @kbdev: Kbase device ++ * ++ * This function will take hwaccess_lock, and may sleep. ++ */ ++void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev); + -+ return wd_info->jm_backend_iface->metadata(wd_info->jm_backend_iface->info); -+} ++/** ++ * kbase_gpu_wait_cache_clean_timeout - Wait for certain time for cache ++ * cleaning to finish ++ * @kbdev: Kbase device ++ * @wait_timeout_ms: Time in milliseconds, to wait for cache clean to complete. ++ * ++ * This function will take hwaccess_lock, and may sleep. This is supposed to be ++ * called from paths (like GPU reset) where an indefinite wait for the ++ * completion of cache clean operation can cause deadlock, as the operation may ++ * never complete. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, ++ unsigned int wait_timeout_ms); + -+static void -+kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watchdog *wd_backend, -+ enum wd_init_state state) -+{ -+ if (!wd_backend) -+ return; ++/** ++ * kbase_gpu_cache_clean_wait_complete - Called after the cache cleaning is ++ * finished. Would also be called after ++ * the GPU reset. ++ * @kbdev: Kbase device ++ * ++ * Caller must hold the hwaccess_lock. ++ */ ++void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev); + -+ WARN_ON(state > HWCNT_JM_WD_INIT_END); ++/** ++ * kbase_clean_caches_done - Issue previously queued cache clean request or ++ * wake up the requester that issued cache clean. ++ * @kbdev: Kbase device ++ * ++ * Caller must hold the hwaccess_lock. ++ */ ++void kbase_clean_caches_done(struct kbase_device *kbdev); + -+ while (state-- > HWCNT_JM_WD_INIT_START) { -+ switch (state) { -+ case HWCNT_JM_WD_INIT_BACKEND: -+ wd_backend->info->jm_backend_iface->term(wd_backend->jm_backend); -+ break; -+ case HWCNT_JM_WD_INIT_ENABLE_MAP: -+ kbase_hwcnt_enable_map_free(&wd_backend->wd_enable_map); -+ break; -+ case HWCNT_JM_WD_INIT_DUMP_BUFFER: -+ kbase_hwcnt_dump_buffer_free(&wd_backend->wd_dump_buffer); -+ break; -+ case HWCNT_JM_WD_INIT_END: -+ break; -+ } -+ } ++/** ++ * kbase_gpu_interrupt - GPU interrupt handler ++ * @kbdev: Kbase device pointer ++ * @val: The value of the GPU IRQ status register which triggered the call ++ * ++ * This function is called from the interrupt handler when a GPU irq is to be ++ * handled. ++ */ ++void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val); +diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c +new file mode 100644 +index 000000000..d55495045 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c +@@ -0,0 +1,291 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ kfree(wd_backend); -+} ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_term_fn -+ * Calling term does *not* destroy the interface -+ */ -+static void kbasep_hwcnt_backend_jm_watchdog_term(struct kbase_hwcnt_backend *backend) ++#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++bool kbase_is_gpu_removed(struct kbase_device *kbdev) +{ -+ struct kbase_hwcnt_backend_jm_watchdog *wd_backend = -+ (struct kbase_hwcnt_backend_jm_watchdog *)backend; -+ -+ if (!backend) -+ return; ++ u32 val; + -+ /* disable timer thread to avoid concurrent access to shared resources */ -+ wd_backend->info->dump_watchdog_iface->disable( -+ wd_backend->info->dump_watchdog_iface->timer); ++ val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); + -+ kbasep_hwcnt_backend_jm_watchdog_term_partial(wd_backend, HWCNT_JM_WD_INIT_END); ++ return val == 0; +} ++#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ + -+/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_init_fn */ -+static int kbasep_hwcnt_backend_jm_watchdog_init(const struct kbase_hwcnt_backend_info *info, -+ struct kbase_hwcnt_backend **out_backend) ++static int busy_wait_on_irq(struct kbase_device *kbdev, u32 irq_bit) +{ -+ int errcode = 0; -+ struct kbase_hwcnt_backend_jm_watchdog *wd_backend = NULL; -+ struct kbase_hwcnt_backend_jm_watchdog_info *const wd_info = (void *)info; -+ const struct kbase_hwcnt_backend_info *jm_info; -+ const struct kbase_hwcnt_metadata *metadata; -+ enum wd_init_state state = HWCNT_JM_WD_INIT_START; -+ -+ if (WARN_ON(!info) || WARN_ON(!out_backend)) -+ return -EINVAL; -+ -+ jm_info = wd_info->jm_backend_iface->info; -+ metadata = wd_info->jm_backend_iface->metadata(wd_info->jm_backend_iface->info); ++ char *irq_flag_name; ++ /* Previously MMU-AS command was used for L2 cache flush on page-table update. ++ * And we're using the same max-loops count for GPU command, because amount of ++ * L2 cache flush overhead are same between them. ++ */ ++ unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; + -+ wd_backend = kmalloc(sizeof(*wd_backend), GFP_KERNEL); -+ if (!wd_backend) { -+ *out_backend = NULL; -+ return -ENOMEM; ++ /* Wait for the GPU cache clean operation to complete */ ++ while (--max_loops && ++ !(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & irq_bit)) { ++ ; + } + -+ *wd_backend = (struct kbase_hwcnt_backend_jm_watchdog){ -+ .info = wd_info, -+ .timeout_ms = hwcnt_backend_watchdog_timer_interval_ms, -+ .locked = { .state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY, .is_enabled = false } -+ }; -+ -+ while (state < HWCNT_JM_WD_INIT_END && !errcode) { -+ switch (state) { -+ case HWCNT_JM_WD_INIT_BACKEND: -+ errcode = wd_info->jm_backend_iface->init(jm_info, &wd_backend->jm_backend); -+ break; -+ case HWCNT_JM_WD_INIT_ENABLE_MAP: -+ errcode = -+ kbase_hwcnt_enable_map_alloc(metadata, &wd_backend->wd_enable_map); ++ /* reset gpu if time-out occurred */ ++ if (max_loops == 0) { ++ switch (irq_bit) { ++ case CLEAN_CACHES_COMPLETED: ++ irq_flag_name = "CLEAN_CACHES_COMPLETED"; + break; -+ case HWCNT_JM_WD_INIT_DUMP_BUFFER: -+ errcode = kbase_hwcnt_dump_buffer_alloc(metadata, -+ &wd_backend->wd_dump_buffer); ++ case FLUSH_PA_RANGE_COMPLETED: ++ irq_flag_name = "FLUSH_PA_RANGE_COMPLETED"; + break; -+ case HWCNT_JM_WD_INIT_END: ++ default: ++ irq_flag_name = "UNKNOWN"; + break; + } -+ if (!errcode) -+ state++; -+ } + -+ if (errcode) { -+ kbasep_hwcnt_backend_jm_watchdog_term_partial(wd_backend, state); -+ *out_backend = NULL; -+ return errcode; -+ } ++ dev_err(kbdev->dev, ++ "Stuck waiting on %s bit, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n", ++ irq_flag_name); + -+ WARN_ON(state != HWCNT_JM_WD_INIT_END); ++ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu_locked(kbdev); ++ return -EBUSY; ++ } + -+ spin_lock_init(&wd_backend->locked.watchdog_lock); -+ init_completion(&wd_backend->watchdog_complete); ++ /* Clear the interrupt bit. */ ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, irq_bit); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), irq_bit); + -+ *out_backend = (struct kbase_hwcnt_backend *)wd_backend; + return 0; +} + -+/* Job manager watchdog backend, implementation of timestamp_ns */ -+static u64 kbasep_hwcnt_backend_jm_watchdog_timestamp_ns(struct kbase_hwcnt_backend *backend) -+{ -+ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; -+ -+ return wd_backend->info->jm_backend_iface->timestamp_ns(wd_backend->jm_backend); -+} ++#if MALI_USE_CSF ++#define U64_LO_MASK ((1ULL << 32) - 1) ++#define U64_HI_MASK (~U64_LO_MASK) + -+static int kbasep_hwcnt_backend_jm_watchdog_dump_enable_common( -+ struct kbase_hwcnt_backend_jm_watchdog *wd_backend, -+ const struct kbase_hwcnt_enable_map *enable_map, kbase_hwcnt_backend_dump_enable_fn enabler) ++int kbase_gpu_cache_flush_pa_range_and_busy_wait(struct kbase_device *kbdev, phys_addr_t phys, ++ size_t nr_bytes, u32 flush_op) +{ -+ int errcode = -EPERM; -+ unsigned long flags; -+ -+ if (WARN_ON(!wd_backend) || WARN_ON(!enable_map)) -+ return -EINVAL; ++ u64 start_pa, end_pa; ++ int ret = 0; + -+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* If the backend is already enabled return an error */ -+ if (wd_backend->locked.is_enabled) { -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); -+ return -EPERM; -+ } ++ /* 1. Clear the interrupt FLUSH_PA_RANGE_COMPLETED bit. */ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), FLUSH_PA_RANGE_COMPLETED); + -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_PA_RANGE operation. */ ++ start_pa = phys; ++ end_pa = start_pa + nr_bytes - 1; + -+ /*We copy the enable map into our watchdog backend copy, for future usage*/ -+ kbase_hwcnt_enable_map_copy(&wd_backend->wd_enable_map, enable_map); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG0_LO), start_pa & U64_LO_MASK); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG0_HI), ++ (start_pa & U64_HI_MASK) >> 32); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG1_LO), end_pa & U64_LO_MASK); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG1_HI), (end_pa & U64_HI_MASK) >> 32); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op); + -+ errcode = enabler(wd_backend->jm_backend, enable_map); -+ if (!errcode) { -+ /*Enable dump watchdog*/ -+ errcode = wd_backend->info->dump_watchdog_iface->enable( -+ wd_backend->info->dump_watchdog_iface->timer, wd_backend->timeout_ms, -+ kbasep_hwcnt_backend_jm_watchdog_timer_callback, wd_backend); -+ if (!errcode) { -+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); -+ WARN_ON(wd_backend->locked.is_enabled); -+ wd_backend->locked.is_enabled = true; -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); -+ } else -+ /*Reverting the job manager backend back to disabled*/ -+ wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend); -+ } ++ /* 3. Busy-wait irq status to be enabled. */ ++ ret = busy_wait_on_irq(kbdev, (u32)FLUSH_PA_RANGE_COMPLETED); + -+ return errcode; ++ return ret; +} ++#endif /* MALI_USE_CSF */ + -+/* Job manager watchdog backend, implementation of dump_enable */ -+static int -+kbasep_hwcnt_backend_jm_watchdog_dump_enable(struct kbase_hwcnt_backend *backend, -+ const struct kbase_hwcnt_enable_map *enable_map) ++int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, ++ u32 flush_op) +{ -+ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; -+ -+ return kbasep_hwcnt_backend_jm_watchdog_dump_enable_common( -+ wd_backend, enable_map, wd_backend->info->jm_backend_iface->dump_enable); -+} ++ int need_to_wake_up = 0; ++ int ret = 0; + -+/* Job manager watchdog backend, implementation of dump_enable_nolock */ -+static int -+kbasep_hwcnt_backend_jm_watchdog_dump_enable_nolock(struct kbase_hwcnt_backend *backend, -+ const struct kbase_hwcnt_enable_map *enable_map) -+{ -+ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; ++ /* hwaccess_lock must be held to avoid any sync issue with ++ * kbase_gpu_start_cache_clean() / kbase_clean_caches_done() ++ */ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ return kbasep_hwcnt_backend_jm_watchdog_dump_enable_common( -+ wd_backend, enable_map, wd_backend->info->jm_backend_iface->dump_enable_nolock); -+} ++ /* 1. Check if kbdev->cache_clean_in_progress is set. ++ * If it is set, it means there are threads waiting for ++ * CLEAN_CACHES_COMPLETED irq to be raised and that the ++ * corresponding irq mask bit is set. ++ * We'll clear the irq mask bit and busy-wait for the cache ++ * clean operation to complete before submitting the cache ++ * clean command required after the GPU page table update. ++ * Pended flush commands will be merged to requested command. ++ */ ++ if (kbdev->cache_clean_in_progress) { ++ /* disable irq first */ ++ u32 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), ++ irq_mask & ~CLEAN_CACHES_COMPLETED); + -+/* Job manager watchdog backend, implementation of dump_disable */ -+static void kbasep_hwcnt_backend_jm_watchdog_dump_disable(struct kbase_hwcnt_backend *backend) -+{ -+ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; -+ unsigned long flags; ++ /* busy wait irq status to be enabled */ ++ ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED); ++ if (ret) ++ return ret; + -+ if (WARN_ON(!backend)) -+ return; ++ /* merge pended command if there's any */ ++ flush_op = GPU_COMMAND_FLUSH_CACHE_MERGE( ++ kbdev->cache_clean_queued, flush_op); + -+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); -+ if (!wd_backend->locked.is_enabled) { -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); -+ return; ++ /* enable wake up notify flag */ ++ need_to_wake_up = 1; ++ } else { ++ /* Clear the interrupt CLEAN_CACHES_COMPLETED bit. */ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), ++ CLEAN_CACHES_COMPLETED); + } + -+ wd_backend->locked.is_enabled = false; -+ -+ /* Discard undumped counter values since the last dump_get. */ -+ if (wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_FULL) -+ wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY; ++ /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHE operation. */ ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, flush_op); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op); + -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ /* 3. Busy-wait irq status to be enabled. */ ++ ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED); ++ if (ret) ++ return ret; + -+ wd_backend->info->dump_watchdog_iface->disable( -+ wd_backend->info->dump_watchdog_iface->timer); ++ /* 4. Wake-up blocked threads when there is any. */ ++ if (need_to_wake_up) ++ kbase_gpu_cache_clean_wait_complete(kbdev); + -+ wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend); ++ return ret; +} + -+/* Job manager watchdog backend, implementation of dump_clear */ -+static int kbasep_hwcnt_backend_jm_watchdog_dump_clear(struct kbase_hwcnt_backend *backend) ++void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev, ++ u32 flush_op) +{ -+ int errcode = -EPERM; -+ bool clear_wd_wait_completion = false; -+ unsigned long flags; -+ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; -+ -+ if (WARN_ON(!backend)) -+ return -EINVAL; ++ u32 irq_mask; + -+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); -+ if (!wd_backend->locked.is_enabled) { -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); -+ return -EPERM; -+ } ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ switch (wd_backend->locked.state) { -+ case HWCNT_JM_WD_IDLE_BUFFER_FULL: -+ case HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL: -+ case HWCNT_JM_WD_IDLE_BUFFER_EMPTY: -+ case HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY: -+ wd_backend->locked.state = HWCNT_JM_WD_BUFFER_CLEARING; -+ errcode = 0; -+ break; -+ case HWCNT_JM_WD_TIMER_DUMPING: -+ /* The timer asked for a dump request, when complete, the job manager backend -+ * buffer will be zero -+ */ -+ clear_wd_wait_completion = true; -+ /* This thread will have to wait for the callback to terminate and then call a -+ * dump_clear on the job manager backend. We change the state to -+ * HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR to notify the callback thread there is -+ * no more need to dump the buffer (since we will clear it right after anyway). -+ * We set up a wait queue to synchronize with the callback. ++ if (kbdev->cache_clean_in_progress) { ++ /* If this is called while another clean is in progress, we ++ * can't rely on the current one to flush any new changes in ++ * the cache. Instead, accumulate all cache clean operations ++ * and trigger that immediately after this one finishes. + */ -+ reinit_completion(&wd_backend->watchdog_complete); -+ wd_backend->locked.state = HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR; -+ errcode = 0; -+ break; -+ default: -+ errcode = -EPERM; -+ break; ++ kbdev->cache_clean_queued = GPU_COMMAND_FLUSH_CACHE_MERGE( ++ kbdev->cache_clean_queued, flush_op); ++ return; + } -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); -+ -+ if (!errcode) { -+ if (clear_wd_wait_completion) { -+ /* Waiting for the callback to finish */ -+ wait_for_completion(&wd_backend->watchdog_complete); -+ } -+ -+ /* Clearing job manager backend buffer */ -+ errcode = wd_backend->info->jm_backend_iface->dump_clear(wd_backend->jm_backend); -+ -+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); -+ -+ WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR && -+ wd_backend->locked.state != HWCNT_JM_WD_BUFFER_CLEARING && -+ wd_backend->locked.state != HWCNT_JM_WD_ERROR); -+ -+ WARN_ON(!wd_backend->locked.is_enabled); + -+ if (!errcode && wd_backend->locked.state != HWCNT_JM_WD_ERROR) { -+ /* Setting the internal buffer state to EMPTY */ -+ wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY; -+ /* Resetting the timer. Calling modify on a disabled timer -+ * enables it. -+ */ -+ wd_backend->info->dump_watchdog_iface->modify( -+ wd_backend->info->dump_watchdog_iface->timer, -+ wd_backend->timeout_ms); -+ } else { -+ wd_backend->locked.state = HWCNT_JM_WD_ERROR; -+ errcode = -EPERM; -+ } ++ /* Enable interrupt */ ++ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), ++ irq_mask | CLEAN_CACHES_COMPLETED); + -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); -+ } ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, flush_op); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op); + -+ return errcode; ++ kbdev->cache_clean_in_progress = true; +} + -+/* Job manager watchdog backend, implementation of dump_request */ -+static int kbasep_hwcnt_backend_jm_watchdog_dump_request(struct kbase_hwcnt_backend *backend, -+ u64 *dump_time_ns) ++void kbase_gpu_start_cache_clean(struct kbase_device *kbdev, u32 flush_op) +{ -+ bool call_dump_request = false; -+ int errcode = 0; + unsigned long flags; -+ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; -+ -+ if (WARN_ON(!backend) || WARN_ON(!dump_time_ns)) -+ return -EINVAL; -+ -+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); -+ -+ if (!wd_backend->locked.is_enabled) { -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); -+ return -EPERM; -+ } -+ -+ switch (wd_backend->locked.state) { -+ case HWCNT_JM_WD_IDLE_BUFFER_EMPTY: -+ /* progressing the state to avoid callbacks running while calling the job manager -+ * backend -+ */ -+ wd_backend->locked.state = HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY; -+ call_dump_request = true; -+ break; -+ case HWCNT_JM_WD_IDLE_BUFFER_FULL: -+ wd_backend->locked.state = HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL; -+ call_dump_request = true; -+ break; -+ case HWCNT_JM_WD_TIMER_DUMPING: -+ /* Retrieve timing information from previous dump_request */ -+ *dump_time_ns = wd_backend->wd_dump_timestamp; -+ /* On the next client call (dump_wait) the thread will have to wait for the -+ * callback to finish the dumping. -+ * We set up a wait queue to synchronize with the callback. -+ */ -+ reinit_completion(&wd_backend->watchdog_complete); -+ wd_backend->locked.state = HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED; -+ break; -+ default: -+ errcode = -EPERM; -+ break; -+ } -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); -+ -+ if (call_dump_request) { -+ errcode = wd_backend->info->jm_backend_iface->dump_request(wd_backend->jm_backend, -+ dump_time_ns); -+ if (!errcode) { -+ /*resetting the timer. Calling modify on a disabled timer enables it*/ -+ wd_backend->info->dump_watchdog_iface->modify( -+ wd_backend->info->dump_watchdog_iface->timer, -+ wd_backend->timeout_ms); -+ } else { -+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); -+ WARN_ON(!wd_backend->locked.is_enabled); -+ wd_backend->locked.state = HWCNT_JM_WD_ERROR; -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); -+ } -+ } + -+ return errcode; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_gpu_start_cache_clean_nolock(kbdev, flush_op); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + -+/* Job manager watchdog backend, implementation of dump_wait */ -+static int kbasep_hwcnt_backend_jm_watchdog_dump_wait(struct kbase_hwcnt_backend *backend) ++void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev) +{ -+ int errcode = -EPERM; -+ bool wait_for_auto_dump = false, wait_for_user_dump = false; -+ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; -+ unsigned long flags; -+ -+ if (WARN_ON(!backend)) -+ return -EINVAL; -+ -+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); -+ if (!wd_backend->locked.is_enabled) { -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); -+ return -EPERM; -+ } -+ -+ switch (wd_backend->locked.state) { -+ case HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED: -+ wait_for_auto_dump = true; -+ errcode = 0; -+ break; -+ case HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY: -+ case HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL: -+ wait_for_user_dump = true; -+ errcode = 0; -+ break; -+ default: -+ errcode = -EPERM; -+ break; -+ } -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); -+ -+ if (wait_for_auto_dump) -+ wait_for_completion(&wd_backend->watchdog_complete); -+ else if (wait_for_user_dump) { -+ errcode = wd_backend->info->jm_backend_iface->dump_wait(wd_backend->jm_backend); -+ if (errcode) { -+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); -+ WARN_ON(!wd_backend->locked.is_enabled); -+ wd_backend->locked.state = HWCNT_JM_WD_ERROR; -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); -+ } -+ } ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ return errcode; ++ kbdev->cache_clean_queued = 0; ++ kbdev->cache_clean_in_progress = false; ++ wake_up(&kbdev->cache_clean_wait); +} + -+/* Job manager watchdog backend, implementation of dump_get */ -+static int kbasep_hwcnt_backend_jm_watchdog_dump_get( -+ struct kbase_hwcnt_backend *backend, struct kbase_hwcnt_dump_buffer *dump_buffer, -+ const struct kbase_hwcnt_enable_map *enable_map, bool accumulate) ++void kbase_clean_caches_done(struct kbase_device *kbdev) +{ -+ bool call_dump_get = false; -+ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; ++ u32 irq_mask; + unsigned long flags; -+ int errcode = 0; -+ -+ if (WARN_ON(!backend) || WARN_ON(!dump_buffer) || WARN_ON(!enable_map)) -+ return -EINVAL; -+ -+ /* The resultant contents of the dump buffer are only well defined if a prior -+ * call to dump_wait returned successfully, and a new dump has not yet been -+ * requested by a call to dump_request. -+ */ -+ -+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); -+ -+ switch (wd_backend->locked.state) { -+ case HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED: -+ /*we assume dump_wait has been called and completed successfully*/ -+ if (accumulate) -+ kbase_hwcnt_dump_buffer_accumulate(dump_buffer, &wd_backend->wd_dump_buffer, -+ enable_map); -+ else -+ kbase_hwcnt_dump_buffer_copy(dump_buffer, &wd_backend->wd_dump_buffer, -+ enable_map); -+ -+ /*use state to indicate the the buffer is now empty*/ -+ wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY; -+ break; -+ case HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL: -+ /*accumulate or copy watchdog data to user buffer first so that dump_get can set -+ * the header correctly -+ */ -+ if (accumulate) -+ kbase_hwcnt_dump_buffer_accumulate(dump_buffer, &wd_backend->wd_dump_buffer, -+ enable_map); -+ else -+ kbase_hwcnt_dump_buffer_copy(dump_buffer, &wd_backend->wd_dump_buffer, -+ enable_map); -+ -+ /*accumulate backend data into user buffer on top of watchdog data*/ -+ accumulate = true; -+ call_dump_get = true; -+ break; -+ case HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY: -+ call_dump_get = true; -+ break; -+ default: -+ errcode = -EPERM; -+ break; -+ } + -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ if (call_dump_get && !errcode) { -+ /*we just dump the job manager backend into the user buffer, following -+ *accumulate flag ++ if (kbdev->cache_clean_in_progress) { ++ /* Clear the interrupt CLEAN_CACHES_COMPLETED bit if set. ++ * It might have already been done by kbase_gpu_cache_flush_and_busy_wait. + */ -+ errcode = wd_backend->info->jm_backend_iface->dump_get( -+ wd_backend->jm_backend, dump_buffer, enable_map, accumulate); ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, CLEAN_CACHES_COMPLETED); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), CLEAN_CACHES_COMPLETED); + -+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); ++ if (kbdev->cache_clean_queued) { ++ u32 pended_flush_op = kbdev->cache_clean_queued; + -+ WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY && -+ wd_backend->locked.state != HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL && -+ wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED); ++ kbdev->cache_clean_queued = 0; + -+ if (!errcode) -+ wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY; -+ else -+ wd_backend->locked.state = HWCNT_JM_WD_ERROR; ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, pended_flush_op); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), pended_flush_op); ++ } else { ++ /* Disable interrupt */ ++ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), ++ irq_mask & ~CLEAN_CACHES_COMPLETED); + -+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ kbase_gpu_cache_clean_wait_complete(kbdev); ++ } + } + -+ return errcode; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + -+/* exposed methods */ -+ -+int kbase_hwcnt_backend_jm_watchdog_create(struct kbase_hwcnt_backend_interface *backend_iface, -+ struct kbase_hwcnt_watchdog_interface *watchdog_iface, -+ struct kbase_hwcnt_backend_interface *out_iface) ++static inline bool get_cache_clean_flag(struct kbase_device *kbdev) +{ -+ struct kbase_hwcnt_backend_jm_watchdog_info *info = NULL; -+ -+ if (WARN_ON(!backend_iface) || WARN_ON(!watchdog_iface) || WARN_ON(!out_iface)) -+ return -EINVAL; -+ -+ info = kbasep_hwcnt_backend_jm_watchdog_info_create(backend_iface, watchdog_iface); -+ if (!info) -+ return -ENOMEM; ++ bool cache_clean_in_progress; ++ unsigned long flags; + -+ /*linking the info table with the output iface, to allow the callbacks below to access the -+ *info object later on -+ */ -+ *out_iface = (struct kbase_hwcnt_backend_interface){ -+ .info = (void *)info, -+ .metadata = kbasep_hwcnt_backend_jm_watchdog_metadata, -+ .init = kbasep_hwcnt_backend_jm_watchdog_init, -+ .term = kbasep_hwcnt_backend_jm_watchdog_term, -+ .timestamp_ns = kbasep_hwcnt_backend_jm_watchdog_timestamp_ns, -+ .dump_enable = kbasep_hwcnt_backend_jm_watchdog_dump_enable, -+ .dump_enable_nolock = kbasep_hwcnt_backend_jm_watchdog_dump_enable_nolock, -+ .dump_disable = kbasep_hwcnt_backend_jm_watchdog_dump_disable, -+ .dump_clear = kbasep_hwcnt_backend_jm_watchdog_dump_clear, -+ .dump_request = kbasep_hwcnt_backend_jm_watchdog_dump_request, -+ .dump_wait = kbasep_hwcnt_backend_jm_watchdog_dump_wait, -+ .dump_get = kbasep_hwcnt_backend_jm_watchdog_dump_get -+ }; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ cache_clean_in_progress = kbdev->cache_clean_in_progress; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ /*registering watchdog backend module methods on the output interface*/ ++ return cache_clean_in_progress; ++} + -+ return 0; ++void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev) ++{ ++ while (get_cache_clean_flag(kbdev)) { ++ wait_event_interruptible(kbdev->cache_clean_wait, ++ !kbdev->cache_clean_in_progress); ++ } +} + -+void kbase_hwcnt_backend_jm_watchdog_destroy(struct kbase_hwcnt_backend_interface *iface) ++int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, ++ unsigned int wait_timeout_ms) +{ -+ if (!iface || !iface->info) -+ return; ++ long remaining = msecs_to_jiffies(wait_timeout_ms); + -+ kfree((struct kbase_hwcnt_backend_jm_watchdog_info *)iface->info); ++ while (remaining && get_cache_clean_flag(kbdev)) { ++ remaining = wait_event_timeout(kbdev->cache_clean_wait, ++ !kbdev->cache_clean_in_progress, ++ remaining); ++ } + -+ /*blanking the watchdog backend interface*/ -+ memset(iface, 0, sizeof(*iface)); ++ return (remaining ? 0 : -ETIMEDOUT); +} -diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h +diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h b/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h new file mode 100644 -index 000000000..02a7952cc +index 000000000..6f77592df --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h -@@ -0,0 +1,65 @@ ++++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h +@@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -187721,60 +189684,129 @@ index 000000000..02a7952cc + * + */ + -+/* -+ * Concrete implementation of mali_kbase_hwcnt_backend interface for job manager -+ * backend. This module functionally interleaves between the hardware counter -+ * (hwcnt_accumulator) module (the interface consumer) and the job manager -+ * backend module (hwcnt_backend_jm). This module provides buffering -+ * functionality for the dumping requests requested by the hwcnt_accumulator -+ * consumer. This module is NOT multi-thread safe. The programmer must -+ * ensure the exposed methods are called by at most one thread at any time. ++#include ++ ++typedef int kbase_device_init_method(struct kbase_device *kbdev); ++typedef void kbase_device_term_method(struct kbase_device *kbdev); ++ ++/** ++ * struct kbase_device_init - Device init/term methods. ++ * @init: Function pointer to a initialise method. ++ * @term: Function pointer to a terminate method. ++ * @err_mes: Error message to be printed when init method fails. + */ ++struct kbase_device_init { ++ kbase_device_init_method *init; ++ kbase_device_term_method *term; ++ char *err_mes; ++}; + -+#ifndef _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_ -+#define _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_ ++int kbase_device_vinstr_init(struct kbase_device *kbdev); ++void kbase_device_vinstr_term(struct kbase_device *kbdev); + -+#include -+#include ++int kbase_device_kinstr_prfcnt_init(struct kbase_device *kbdev); ++void kbase_device_kinstr_prfcnt_term(struct kbase_device *kbdev); ++ ++int kbase_device_timeline_init(struct kbase_device *kbdev); ++void kbase_device_timeline_term(struct kbase_device *kbdev); ++ ++int kbase_device_hwcnt_context_init(struct kbase_device *kbdev); ++void kbase_device_hwcnt_context_term(struct kbase_device *kbdev); ++ ++int kbase_device_hwcnt_virtualizer_init(struct kbase_device *kbdev); ++void kbase_device_hwcnt_virtualizer_term(struct kbase_device *kbdev); ++ ++int kbase_device_list_init(struct kbase_device *kbdev); ++void kbase_device_list_term(struct kbase_device *kbdev); ++ ++int kbase_device_io_history_init(struct kbase_device *kbdev); ++void kbase_device_io_history_term(struct kbase_device *kbdev); ++ ++int kbase_device_misc_register(struct kbase_device *kbdev); ++void kbase_device_misc_deregister(struct kbase_device *kbdev); ++ ++void kbase_device_id_init(struct kbase_device *kbdev); + +/** -+ * kbase_hwcnt_backend_jm_watchdog_create() - Create a job manager hardware counter watchdog -+ * backend interface. -+ * @backend_iface: Non-NULL pointer to the backend interface structure that this module will -+ * extend. -+ * @watchdog_iface: Non-NULL pointer to an hardware counter watchdog interface. -+ * @out_iface: Non-NULL pointer to backend interface structure that is filled in -+ * on creation success. ++ * kbase_device_early_init - Perform any device-specific initialization. ++ * @kbdev: Device pointer + * -+ * Calls to out_iface->dump_enable_nolock() require kbdev->hwaccess_lock held. ++ * Return: 0 on success, or an error code on failure. ++ */ ++int kbase_device_early_init(struct kbase_device *kbdev); ++ ++/** ++ * kbase_device_early_term - Perform any device-specific termination. ++ * @kbdev: Device pointer ++ */ ++void kbase_device_early_term(struct kbase_device *kbdev); ++ ++/** ++ * kbase_device_late_init - Complete any device-specific initialization. ++ * @kbdev: Device pointer + * -+ * Return: 0 on success, error otherwise. ++ * Return: 0 on success, or an error code on failure. + */ -+int kbase_hwcnt_backend_jm_watchdog_create(struct kbase_hwcnt_backend_interface *backend_iface, -+ struct kbase_hwcnt_watchdog_interface *watchdog_iface, -+ struct kbase_hwcnt_backend_interface *out_iface); ++int kbase_device_late_init(struct kbase_device *kbdev); + +/** -+ * kbase_hwcnt_backend_jm_watchdog_destroy() - Destroy a job manager hardware counter watchdog -+ * backend interface. -+ * @iface: Pointer to interface to destroy. ++ * kbase_device_late_term - Complete any device-specific termination. ++ * @kbdev: Device pointer ++ */ ++void kbase_device_late_term(struct kbase_device *kbdev); ++ ++#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++/** ++ * kbase_is_register_accessible - Checks if register is accessible ++ * @offset: Register offset + * -+ * Can be safely called on an all-zeroed interface, or on an already destroyed -+ * interface. ++ * Return: true if the register is accessible, false otherwise. + */ -+void kbase_hwcnt_backend_jm_watchdog_destroy(struct kbase_hwcnt_backend_interface *iface); ++bool kbase_is_register_accessible(u32 offset); ++#endif /* MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ +diff --git a/drivers/gpu/arm/bifrost/gpu/Kbuild b/drivers/gpu/arm/bifrost/gpu/Kbuild +new file mode 100755 +index 000000000..f3a48caf6 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/gpu/Kbuild +@@ -0,0 +1,27 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2012, 2020-2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# + -+#endif /* _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_ */ -diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c ++bifrost_kbase-y += gpu/mali_kbase_gpu.o ++ ++ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) ++ bifrost_kbase-y += gpu/backend/mali_kbase_gpu_fault_csf.o ++else ++ bifrost_kbase-y += gpu/backend/mali_kbase_gpu_fault_jm.o ++endif +diff --git a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_csf.c b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_csf.c new file mode 100644 -index 000000000..34deb5d9e +index 000000000..60ba9beab --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c -@@ -0,0 +1,775 @@ ++++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_csf.c +@@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -187792,770 +189824,1025 @@ index 000000000..34deb5d9e + * + */ + -+/* -+ * Implementation of hardware counter context and accumulator APIs. -+ */ ++#include ++#include ++#include + -+#include "hwcnt/mali_kbase_hwcnt_context.h" -+#include "hwcnt/mali_kbase_hwcnt_accumulator.h" -+#include "hwcnt/backend/mali_kbase_hwcnt_backend.h" -+#include "hwcnt/mali_kbase_hwcnt_types.h" ++const char *kbase_gpu_exception_name(u32 const exception_code) ++{ ++ const char *e; + -+#include -+#include -+#include ++ switch (exception_code) { ++ /* CS exceptions */ ++ case CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED: ++ e = "CS_RESOURCE_TERMINATED"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT: ++ e = "CS_INHERIT_FAULT"; ++ break; ++ /* CS fatal exceptions */ ++ case CS_FATAL_EXCEPTION_TYPE_CS_CONFIG_FAULT: ++ e = "CS_CONFIG_FAULT"; ++ break; ++ case CS_FATAL_EXCEPTION_TYPE_CS_ENDPOINT_FAULT: ++ e = "FATAL_CS_ENDPOINT_FAULT"; ++ break; ++ case CS_FATAL_EXCEPTION_TYPE_CS_INVALID_INSTRUCTION: ++ e = "FATAL_CS_INVALID_INSTRUCTION"; ++ break; ++ case CS_FATAL_EXCEPTION_TYPE_CS_CALL_STACK_OVERFLOW: ++ e = "FATAL_CS_CALL_STACK_OVERFLOW"; ++ break; ++ /* ++ * CS_FAULT_EXCEPTION_TYPE_CS_BUS_FAULT and CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT share the same error code ++ * Type of CS_BUS_FAULT will be differentiated by CSF exception handler ++ */ ++ case CS_FAULT_EXCEPTION_TYPE_CS_BUS_FAULT: ++ e = "CS_BUS_FAULT"; ++ break; ++ /* Shader exceptions */ ++ case CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_PC: ++ e = "INSTR_INVALID_PC"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_ENC: ++ e = "INSTR_INVALID_ENC"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_INSTR_BARRIER_FAULT: ++ e = "INSTR_BARRIER_FAULT"; ++ break; ++ /* Iterator exceptions */ ++ case CS_FAULT_EXCEPTION_TYPE_KABOOM: ++ e = "KABOOM"; ++ break; ++ /* Misc exceptions */ ++ case CS_FAULT_EXCEPTION_TYPE_DATA_INVALID_FAULT: ++ e = "DATA_INVALID_FAULT"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_TILE_RANGE_FAULT: ++ e = "TILE_RANGE_FAULT"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_ADDR_RANGE_FAULT: ++ e = "ADDR_RANGE_FAULT"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_IMPRECISE_FAULT: ++ e = "IMPRECISE_FAULT"; ++ break; ++ /* FW exceptions */ ++ case CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR: ++ e = "FIRMWARE_INTERNAL_ERROR"; ++ break; ++ case CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE: ++ e = "CS_UNRECOVERABLE"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT: ++ e = "RESOURCE_EVICTION_TIMEOUT"; ++ break; ++ /* GPU Fault */ ++ case GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT: ++ e = "GPU_BUS_FAULT"; ++ break; ++ case GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_SHAREABILITY_FAULT: ++ e = "GPU_SHAREABILITY_FAULT"; ++ break; ++ case GPU_FAULTSTATUS_EXCEPTION_TYPE_SYSTEM_SHAREABILITY_FAULT: ++ e = "SYSTEM_SHAREABILITY_FAULT"; ++ break; ++ case GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT: ++ e = "GPU_CACHEABILITY_FAULT"; ++ break; ++ /* MMU Fault */ ++ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L0: ++ e = "TRANSLATION_FAULT at level 0"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L1: ++ e = "TRANSLATION_FAULT at level 1"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L2: ++ e = "TRANSLATION_FAULT at level 2"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L3: ++ e = "TRANSLATION_FAULT at level 3"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L4: ++ e = "TRANSLATION_FAULT"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_0: ++ e = "PERMISSION_FAULT at level 0"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_1: ++ e = "PERMISSION_FAULT at level 1"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_2: ++ e = "PERMISSION_FAULT at level 2"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_3: ++ e = "PERMISSION_FAULT at level 3"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_1: ++ e = "ACCESS_FLAG at level 1"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_2: ++ e = "ACCESS_FLAG at level 2"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_3: ++ e = "ACCESS_FLAG at level 3"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN: ++ e = "ADDRESS_SIZE_FAULT_IN"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_0: ++ e = "ADDRESS_SIZE_FAULT_OUT_0 at level 0"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_1: ++ e = "ADDRESS_SIZE_FAULT_OUT_1 at level 1"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_2: ++ e = "ADDRESS_SIZE_FAULT_OUT_2 at level 2"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_3: ++ e = "ADDRESS_SIZE_FAULT_OUT_3 at level 3"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0: ++ e = "MEMORY_ATTRIBUTE_FAULT_0 at level 0"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1: ++ e = "MEMORY_ATTRIBUTE_FAULT_1 at level 1"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2: ++ e = "MEMORY_ATTRIBUTE_FAULT_2 at level 2"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3: ++ e = "MEMORY_ATTRIBUTE_FAULT_3 at level 3"; ++ break; ++ /* Any other exception code is unknown */ ++ default: ++ e = "UNKNOWN"; ++ break; ++ } + -+/** -+ * enum kbase_hwcnt_accum_state - Hardware counter accumulator states. -+ * @ACCUM_STATE_ERROR: Error state, where all accumulator operations fail. -+ * @ACCUM_STATE_DISABLED: Disabled state, where dumping is always disabled. -+ * @ACCUM_STATE_ENABLED: Enabled state, where dumping is enabled if there are -+ * any enabled counters. ++ return e; ++} +diff --git a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_jm.c b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_jm.c +new file mode 100644 +index 000000000..7f3743ca6 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_jm.c +@@ -0,0 +1,176 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * + */ -+enum kbase_hwcnt_accum_state { ACCUM_STATE_ERROR, ACCUM_STATE_DISABLED, ACCUM_STATE_ENABLED }; + -+/** -+ * struct kbase_hwcnt_accumulator - Hardware counter accumulator structure. -+ * @metadata: Pointer to immutable hwcnt metadata. -+ * @backend: Pointer to created counter backend. -+ * @state: The current state of the accumulator. -+ * - State transition from disabled->enabled or -+ * disabled->error requires state_lock. -+ * - State transition from enabled->disabled or -+ * enabled->error requires both accum_lock and -+ * state_lock. -+ * - Error state persists until next disable. -+ * @enable_map: The current set of enabled counters. -+ * - Must only be modified while holding both -+ * accum_lock and state_lock. -+ * - Can be read while holding either lock. -+ * - Must stay in sync with enable_map_any_enabled. -+ * @enable_map_any_enabled: True if any counters in the map are enabled, else -+ * false. If true, and state is ACCUM_STATE_ENABLED, -+ * then the counter backend will be enabled. -+ * - Must only be modified while holding both -+ * accum_lock and state_lock. -+ * - Can be read while holding either lock. -+ * - Must stay in sync with enable_map. -+ * @scratch_map: Scratch enable map, used as temporary enable map -+ * storage during dumps. -+ * - Must only be read or modified while holding -+ * accum_lock. -+ * @accum_buf: Accumulation buffer, where dumps will be accumulated -+ * into on transition to a disable state. -+ * - Must only be read or modified while holding -+ * accum_lock. -+ * @accumulated: True if the accumulation buffer has been accumulated -+ * into and not subsequently read from yet, else false. -+ * - Must only be read or modified while holding -+ * accum_lock. -+ * @ts_last_dump_ns: Timestamp (ns) of the end time of the most recent -+ * dump that was requested by the user. -+ * - Must only be read or modified while holding -+ * accum_lock. -+ */ -+struct kbase_hwcnt_accumulator { -+ const struct kbase_hwcnt_metadata *metadata; -+ struct kbase_hwcnt_backend *backend; -+ enum kbase_hwcnt_accum_state state; -+ struct kbase_hwcnt_enable_map enable_map; -+ bool enable_map_any_enabled; -+ struct kbase_hwcnt_enable_map scratch_map; -+ struct kbase_hwcnt_dump_buffer accum_buf; -+ bool accumulated; -+ u64 ts_last_dump_ns; -+}; ++#include + -+/** -+ * struct kbase_hwcnt_context - Hardware counter context structure. -+ * @iface: Pointer to hardware counter backend interface. -+ * @state_lock: Spinlock protecting state. -+ * @disable_count: Disable count of the context. Initialised to 1. -+ * Decremented when the accumulator is acquired, and incremented -+ * on release. Incremented on calls to -+ * kbase_hwcnt_context_disable[_atomic], and decremented on -+ * calls to kbase_hwcnt_context_enable. -+ * - Must only be read or modified while holding state_lock. -+ * @accum_lock: Mutex protecting accumulator. -+ * @accum_inited: Flag to prevent concurrent accumulator initialisation and/or -+ * termination. Set to true before accumulator initialisation, -+ * and false after accumulator termination. -+ * - Must only be modified while holding both accum_lock and -+ * state_lock. -+ * - Can be read while holding either lock. -+ * @accum: Hardware counter accumulator structure. -+ * @wq: Centralized workqueue for users of hardware counters to -+ * submit async hardware counter related work. Never directly -+ * called, but it's expected that a lot of the functions in this -+ * API will end up called from the enqueued async work. -+ */ -+struct kbase_hwcnt_context { -+ const struct kbase_hwcnt_backend_interface *iface; -+ spinlock_t state_lock; -+ size_t disable_count; -+ struct mutex accum_lock; -+ bool accum_inited; -+ struct kbase_hwcnt_accumulator accum; -+ struct workqueue_struct *wq; -+}; ++#include + -+int kbase_hwcnt_context_init(const struct kbase_hwcnt_backend_interface *iface, -+ struct kbase_hwcnt_context **out_hctx) ++const char *kbase_gpu_exception_name(u32 const exception_code) +{ -+ struct kbase_hwcnt_context *hctx = NULL; -+ -+ if (!iface || !out_hctx) -+ return -EINVAL; -+ -+ hctx = kzalloc(sizeof(*hctx), GFP_KERNEL); -+ if (!hctx) -+ goto err_alloc_hctx; ++ const char *e; + -+ hctx->iface = iface; -+ spin_lock_init(&hctx->state_lock); -+ hctx->disable_count = 1; -+ mutex_init(&hctx->accum_lock); -+ hctx->accum_inited = false; -+ -+ hctx->wq = alloc_workqueue("mali_kbase_hwcnt", WQ_HIGHPRI | WQ_UNBOUND, 0); -+ if (!hctx->wq) -+ goto err_alloc_workqueue; -+ -+ *out_hctx = hctx; -+ -+ return 0; -+ -+err_alloc_workqueue: -+ kfree(hctx); -+err_alloc_hctx: -+ return -ENOMEM; -+} -+ -+void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx) -+{ -+ if (!hctx) -+ return; -+ -+ /* Make sure we didn't leak the accumulator */ -+ WARN_ON(hctx->accum_inited); -+ -+ /* We don't expect any work to be pending on this workqueue. -+ * Regardless, this will safely drain and complete the work. -+ */ -+ destroy_workqueue(hctx->wq); -+ kfree(hctx); -+} -+ -+/** -+ * kbasep_hwcnt_accumulator_term() - Terminate the accumulator for the context. -+ * @hctx: Non-NULL pointer to hardware counter context. -+ */ -+static void kbasep_hwcnt_accumulator_term(struct kbase_hwcnt_context *hctx) -+{ -+ WARN_ON(!hctx); -+ WARN_ON(!hctx->accum_inited); ++ switch (exception_code) { ++ /* Non-Fault Status code */ ++ case 0x00: ++ e = "NOT_STARTED/IDLE/OK"; ++ break; ++ case 0x01: ++ e = "DONE"; ++ break; ++ case 0x02: ++ e = "INTERRUPTED"; ++ break; ++ case 0x03: ++ e = "STOPPED"; ++ break; ++ case 0x04: ++ e = "TERMINATED"; ++ break; ++ case 0x08: ++ e = "ACTIVE"; ++ break; ++ /* Job exceptions */ ++ case 0x40: ++ e = "JOB_CONFIG_FAULT"; ++ break; ++ case 0x41: ++ e = "JOB_POWER_FAULT"; ++ break; ++ case 0x42: ++ e = "JOB_READ_FAULT"; ++ break; ++ case 0x43: ++ e = "JOB_WRITE_FAULT"; ++ break; ++ case 0x44: ++ e = "JOB_AFFINITY_FAULT"; ++ break; ++ case 0x48: ++ e = "JOB_BUS_FAULT"; ++ break; ++ case 0x50: ++ e = "INSTR_INVALID_PC"; ++ break; ++ case 0x51: ++ e = "INSTR_INVALID_ENC"; ++ break; ++ case 0x52: ++ e = "INSTR_TYPE_MISMATCH"; ++ break; ++ case 0x53: ++ e = "INSTR_OPERAND_FAULT"; ++ break; ++ case 0x54: ++ e = "INSTR_TLS_FAULT"; ++ break; ++ case 0x55: ++ e = "INSTR_BARRIER_FAULT"; ++ break; ++ case 0x56: ++ e = "INSTR_ALIGN_FAULT"; ++ break; ++ case 0x58: ++ e = "DATA_INVALID_FAULT"; ++ break; ++ case 0x59: ++ e = "TILE_RANGE_FAULT"; ++ break; ++ case 0x5A: ++ e = "ADDR_RANGE_FAULT"; ++ break; ++ case 0x60: ++ e = "OUT_OF_MEMORY"; ++ break; ++ /* GPU exceptions */ ++ case 0x80: ++ e = "DELAYED_BUS_FAULT"; ++ break; ++ case 0x88: ++ e = "SHAREABILITY_FAULT"; ++ break; ++ /* MMU exceptions */ ++ case 0xC0: ++ case 0xC1: ++ case 0xC2: ++ case 0xC3: ++ case 0xC4: ++ case 0xC5: ++ case 0xC6: ++ case 0xC7: ++ e = "TRANSLATION_FAULT"; ++ break; ++ case 0xC8: ++ case 0xC9: ++ case 0xCA: ++ case 0xCB: ++ case 0xCC: ++ case 0xCD: ++ case 0xCE: ++ case 0xCF: ++ e = "PERMISSION_FAULT"; ++ break; ++ case 0xD0: ++ case 0xD1: ++ case 0xD2: ++ case 0xD3: ++ case 0xD4: ++ case 0xD5: ++ case 0xD6: ++ case 0xD7: ++ e = "TRANSTAB_BUS_FAULT"; ++ break; ++ case 0xD8: ++ case 0xD9: ++ case 0xDA: ++ case 0xDB: ++ case 0xDC: ++ case 0xDD: ++ case 0xDE: ++ case 0xDF: ++ e = "ACCESS_FLAG"; ++ break; ++ case 0xE0: ++ case 0xE1: ++ case 0xE2: ++ case 0xE3: ++ case 0xE4: ++ case 0xE5: ++ case 0xE6: ++ case 0xE7: ++ e = "ADDRESS_SIZE_FAULT"; ++ break; ++ case 0xE8: ++ case 0xE9: ++ case 0xEA: ++ case 0xEB: ++ case 0xEC: ++ case 0xED: ++ case 0xEE: ++ case 0xEF: ++ e = "MEMORY_ATTRIBUTES_FAULT"; ++ break; ++ default: ++ e = "UNKNOWN"; ++ break; ++ } + -+ kbase_hwcnt_enable_map_free(&hctx->accum.scratch_map); -+ kbase_hwcnt_dump_buffer_free(&hctx->accum.accum_buf); -+ kbase_hwcnt_enable_map_free(&hctx->accum.enable_map); -+ hctx->iface->term(hctx->accum.backend); -+ memset(&hctx->accum, 0, sizeof(hctx->accum)); ++ return e; +} -+ -+/** -+ * kbasep_hwcnt_accumulator_init() - Initialise the accumulator for the context. -+ * @hctx: Non-NULL pointer to hardware counter context. +diff --git a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h +new file mode 100644 +index 000000000..e7457ddb5 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h +@@ -0,0 +1,381 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: 0 on success, else error code. -+ */ -+static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx) -+{ -+ int errcode; -+ -+ WARN_ON(!hctx); -+ WARN_ON(!hctx->accum_inited); -+ -+ errcode = hctx->iface->init(hctx->iface->info, &hctx->accum.backend); -+ if (errcode) -+ goto error; -+ -+ hctx->accum.metadata = hctx->iface->metadata(hctx->iface->info); -+ hctx->accum.state = ACCUM_STATE_ERROR; -+ -+ errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, &hctx->accum.enable_map); -+ if (errcode) -+ goto error; -+ -+ hctx->accum.enable_map_any_enabled = false; -+ -+ errcode = kbase_hwcnt_dump_buffer_alloc(hctx->accum.metadata, &hctx->accum.accum_buf); -+ if (errcode) -+ goto error; -+ -+ errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, &hctx->accum.scratch_map); -+ if (errcode) -+ goto error; -+ -+ hctx->accum.accumulated = false; -+ -+ hctx->accum.ts_last_dump_ns = hctx->iface->timestamp_ns(hctx->accum.backend); -+ -+ return 0; -+ -+error: -+ kbasep_hwcnt_accumulator_term(hctx); -+ return errcode; -+} -+ -+/** -+ * kbasep_hwcnt_accumulator_disable() - Transition the accumulator into the -+ * disabled state, from the enabled or -+ * error states. -+ * @hctx: Non-NULL pointer to hardware counter context. -+ * @accumulate: True if we should accumulate before disabling, else false. + */ -+static void kbasep_hwcnt_accumulator_disable(struct kbase_hwcnt_context *hctx, bool accumulate) -+{ -+ int errcode = 0; -+ bool backend_enabled = false; -+ struct kbase_hwcnt_accumulator *accum; -+ unsigned long flags; -+ u64 dump_time_ns; -+ -+ WARN_ON(!hctx); -+ lockdep_assert_held(&hctx->accum_lock); -+ WARN_ON(!hctx->accum_inited); -+ -+ accum = &hctx->accum; -+ -+ spin_lock_irqsave(&hctx->state_lock, flags); -+ -+ WARN_ON(hctx->disable_count != 0); -+ WARN_ON(hctx->accum.state == ACCUM_STATE_DISABLED); -+ -+ if ((hctx->accum.state == ACCUM_STATE_ENABLED) && (accum->enable_map_any_enabled)) -+ backend_enabled = true; + -+ if (!backend_enabled) -+ hctx->accum.state = ACCUM_STATE_DISABLED; ++#ifndef _KBASE_GPU_REGMAP_CSF_H_ ++#define _KBASE_GPU_REGMAP_CSF_H_ + -+ spin_unlock_irqrestore(&hctx->state_lock, flags); ++#include + -+ /* Early out if the backend is not already enabled */ -+ if (!backend_enabled) -+ return; ++#if !MALI_USE_CSF && defined(__KERNEL__) ++#error "Cannot be compiled with JM" ++#endif + -+ if (!accumulate) -+ goto disable; ++/* GPU_CONTROL_MCU base address */ ++#define GPU_CONTROL_MCU_BASE 0x3000 + -+ /* Try and accumulate before disabling */ -+ errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns); -+ if (errcode) -+ goto disable; ++/* MCU_SUBSYSTEM base address */ ++#define MCU_SUBSYSTEM_BASE 0x20000 + -+ errcode = hctx->iface->dump_wait(accum->backend); -+ if (errcode) -+ goto disable; ++/* IPA control registers */ ++#define COMMAND 0x000 /* (WO) Command register */ ++#define TIMER 0x008 /* (RW) Timer control register */ + -+ errcode = hctx->iface->dump_get(accum->backend, &accum->accum_buf, &accum->enable_map, -+ accum->accumulated); -+ if (errcode) -+ goto disable; ++#define SELECT_CSHW_LO 0x010 /* (RW) Counter select for CS hardware, low word */ ++#define SELECT_CSHW_HI 0x014 /* (RW) Counter select for CS hardware, high word */ ++#define SELECT_MEMSYS_LO 0x018 /* (RW) Counter select for Memory system, low word */ ++#define SELECT_MEMSYS_HI 0x01C /* (RW) Counter select for Memory system, high word */ ++#define SELECT_TILER_LO 0x020 /* (RW) Counter select for Tiler cores, low word */ ++#define SELECT_TILER_HI 0x024 /* (RW) Counter select for Tiler cores, high word */ ++#define SELECT_SHADER_LO 0x028 /* (RW) Counter select for Shader cores, low word */ ++#define SELECT_SHADER_HI 0x02C /* (RW) Counter select for Shader cores, high word */ + -+ accum->accumulated = true; ++/* Accumulated counter values for CS hardware */ ++#define VALUE_CSHW_BASE 0x100 ++#define VALUE_CSHW_REG_LO(n) (VALUE_CSHW_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ ++#define VALUE_CSHW_REG_HI(n) (VALUE_CSHW_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ + -+disable: -+ hctx->iface->dump_disable(accum->backend); ++/* Accumulated counter values for memory system */ ++#define VALUE_MEMSYS_BASE 0x140 ++#define VALUE_MEMSYS_REG_LO(n) (VALUE_MEMSYS_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ ++#define VALUE_MEMSYS_REG_HI(n) (VALUE_MEMSYS_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ + -+ /* Regardless of any errors during the accumulate, put the accumulator -+ * in the disabled state. -+ */ -+ spin_lock_irqsave(&hctx->state_lock, flags); ++#define VALUE_TILER_BASE 0x180 ++#define VALUE_TILER_REG_LO(n) (VALUE_TILER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ ++#define VALUE_TILER_REG_HI(n) (VALUE_TILER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ + -+ hctx->accum.state = ACCUM_STATE_DISABLED; ++#define VALUE_SHADER_BASE 0x1C0 ++#define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ ++#define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ + -+ spin_unlock_irqrestore(&hctx->state_lock, flags); -+} ++#define AS_STATUS_AS_ACTIVE_INT 0x2 + -+/** -+ * kbasep_hwcnt_accumulator_enable() - Transition the accumulator into the -+ * enabled state, from the disabled state. -+ * @hctx: Non-NULL pointer to hardware counter context. ++/* Set to implementation defined, outer caching */ ++#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull ++/* Set to write back memory, outer caching */ ++#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull ++/* Set to inner non-cacheable, outer-non-cacheable ++ * Setting defined by the alloc bits is ignored, but set to a valid encoding: ++ * - no-alloc on read ++ * - no alloc on write + */ -+static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx) -+{ -+ int errcode = 0; -+ struct kbase_hwcnt_accumulator *accum; -+ -+ WARN_ON(!hctx); -+ lockdep_assert_held(&hctx->state_lock); -+ WARN_ON(!hctx->accum_inited); -+ WARN_ON(hctx->accum.state != ACCUM_STATE_DISABLED); -+ -+ accum = &hctx->accum; -+ -+ /* The backend only needs enabling if any counters are enabled */ -+ if (accum->enable_map_any_enabled) -+ errcode = hctx->iface->dump_enable_nolock(accum->backend, &accum->enable_map); -+ -+ if (!errcode) -+ accum->state = ACCUM_STATE_ENABLED; -+ else -+ accum->state = ACCUM_STATE_ERROR; -+} -+ -+/** -+ * kbasep_hwcnt_accumulator_dump() - Perform a dump with the most up-to-date -+ * values of enabled counters possible, and -+ * optionally update the set of enabled -+ * counters. -+ * @hctx: Non-NULL pointer to the hardware counter context -+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will -+ * be written out to on success -+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will -+ * be written out to on success -+ * @dump_buf: Pointer to the buffer where the dump will be written out to on -+ * success. If non-NULL, must have the same metadata as the -+ * accumulator. If NULL, the dump will be discarded. -+ * @new_map: Pointer to the new counter enable map. If non-NULL, must have -+ * the same metadata as the accumulator. If NULL, the set of -+ * enabled counters will be unchanged. -+ * -+ * Return: 0 on success, else error code. ++#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull ++/* Set to shared memory, that is inner cacheable on ACE and inner or outer ++ * shared, otherwise inner non-cacheable. ++ * Outer cacheable if inner or outer shared, otherwise outer non-cacheable. + */ -+static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 *ts_start_ns, -+ u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf, -+ const struct kbase_hwcnt_enable_map *new_map) -+{ -+ int errcode = 0; -+ unsigned long flags; -+ enum kbase_hwcnt_accum_state state; -+ bool dump_requested = false; -+ bool dump_written = false; -+ bool cur_map_any_enabled; -+ struct kbase_hwcnt_enable_map *cur_map; -+ bool new_map_any_enabled = false; -+ u64 dump_time_ns = 0; -+ struct kbase_hwcnt_accumulator *accum; -+ -+ WARN_ON(!hctx); -+ WARN_ON(!ts_start_ns); -+ WARN_ON(!ts_end_ns); -+ WARN_ON(dump_buf && (dump_buf->metadata != hctx->accum.metadata)); -+ WARN_ON(new_map && (new_map->metadata != hctx->accum.metadata)); -+ WARN_ON(!hctx->accum_inited); -+ lockdep_assert_held(&hctx->accum_lock); -+ -+ accum = &hctx->accum; -+ cur_map = &accum->scratch_map; -+ -+ /* Save out info about the current enable map */ -+ cur_map_any_enabled = accum->enable_map_any_enabled; -+ kbase_hwcnt_enable_map_copy(cur_map, &accum->enable_map); -+ -+ if (new_map) -+ new_map_any_enabled = kbase_hwcnt_enable_map_any_enabled(new_map); -+ -+ /* -+ * We're holding accum_lock, so the accumulator state might transition -+ * from disabled to enabled during this function (as enabling is lock -+ * free), but it will never disable (as disabling needs to hold the -+ * accum_lock), nor will it ever transition from enabled to error (as -+ * an enable while we're already enabled is impossible). -+ * -+ * If we're already disabled, we'll only look at the accumulation buffer -+ * rather than do a real dump, so a concurrent enable does not affect -+ * us. -+ * -+ * If a concurrent enable fails, we might transition to the error -+ * state, but again, as we're only looking at the accumulation buffer, -+ * it's not an issue. -+ */ -+ spin_lock_irqsave(&hctx->state_lock, flags); -+ -+ state = accum->state; -+ -+ /* -+ * Update the new map now, such that if an enable occurs during this -+ * dump then that enable will set the new map. If we're already enabled, -+ * then we'll do it ourselves after the dump. -+ */ -+ if (new_map) { -+ kbase_hwcnt_enable_map_copy(&accum->enable_map, new_map); -+ accum->enable_map_any_enabled = new_map_any_enabled; -+ } -+ -+ spin_unlock_irqrestore(&hctx->state_lock, flags); -+ -+ /* Error state, so early out. No need to roll back any map updates */ -+ if (state == ACCUM_STATE_ERROR) -+ return -EIO; -+ -+ /* Initiate the dump if the backend is enabled. */ -+ if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) { -+ if (dump_buf) { -+ errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns); -+ dump_requested = true; -+ } else { -+ dump_time_ns = hctx->iface->timestamp_ns(accum->backend); -+ errcode = hctx->iface->dump_clear(accum->backend); -+ } -+ -+ if (errcode) -+ goto error; -+ } else { -+ dump_time_ns = hctx->iface->timestamp_ns(accum->backend); -+ } ++#define AS_MEMATTR_AARCH64_SHARED 0x8ull + -+ /* Copy any accumulation into the dest buffer */ -+ if (accum->accumulated && dump_buf) { -+ kbase_hwcnt_dump_buffer_copy(dump_buf, &accum->accum_buf, cur_map); -+ dump_written = true; -+ } ++/* Symbols for default MEMATTR to use ++ * Default is - HW implementation defined caching ++ */ ++#define AS_MEMATTR_INDEX_DEFAULT 0 ++#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 + -+ /* Wait for any requested dumps to complete */ -+ if (dump_requested) { -+ WARN_ON(state != ACCUM_STATE_ENABLED); -+ errcode = hctx->iface->dump_wait(accum->backend); -+ if (errcode) -+ goto error; -+ } ++/* HW implementation defined caching */ ++#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 ++/* Force cache on */ ++#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 ++/* Write-alloc */ ++#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 ++/* Outer coherent, inner implementation defined policy */ ++#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 ++/* Outer coherent, write alloc inner */ ++#define AS_MEMATTR_INDEX_OUTER_WA 4 ++/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ ++#define AS_MEMATTR_INDEX_NON_CACHEABLE 5 ++/* Normal memory, shared between MCU and Host */ ++#define AS_MEMATTR_INDEX_SHARED 6 + -+ /* If we're enabled and there's a new enable map, change the enabled set -+ * as soon after the dump has completed as possible. -+ */ -+ if ((state == ACCUM_STATE_ENABLED) && new_map) { -+ /* Backend is only enabled if there were any enabled counters */ -+ if (cur_map_any_enabled) -+ hctx->iface->dump_disable(accum->backend); ++/* Configuration bits for the CSF. */ ++#define CSF_CONFIG 0xF00 + -+ /* (Re-)enable the backend if the new map has enabled counters. -+ * No need to acquire the spinlock, as concurrent enable while -+ * we're already enabled and holding accum_lock is impossible. -+ */ -+ if (new_map_any_enabled) { -+ errcode = hctx->iface->dump_enable(accum->backend, new_map); -+ if (errcode) -+ goto error; -+ } -+ } ++/* CSF_CONFIG register */ ++#define CSF_CONFIG_FORCE_COHERENCY_FEATURES_SHIFT 2 + -+ /* Copy, accumulate, or zero into the dest buffer to finish */ -+ if (dump_buf) { -+ /* If we dumped, copy or accumulate it into the destination */ -+ if (dump_requested) { -+ WARN_ON(state != ACCUM_STATE_ENABLED); -+ errcode = hctx->iface->dump_get(accum->backend, dump_buf, cur_map, -+ dump_written); -+ if (errcode) -+ goto error; -+ dump_written = true; -+ } ++/* GPU control registers */ ++#define CORE_FEATURES 0x008 /* () Shader Core Features */ ++#define MCU_CONTROL 0x700 ++#define MCU_STATUS 0x704 + -+ /* If we've not written anything into the dump buffer so far, it -+ * means there was nothing to write. Zero any enabled counters. -+ */ -+ if (!dump_written) -+ kbase_hwcnt_dump_buffer_zero(dump_buf, cur_map); -+ } ++#define MCU_CNTRL_ENABLE (1 << 0) ++#define MCU_CNTRL_AUTO (1 << 1) ++#define MCU_CNTRL_DISABLE (0) + -+ /* Write out timestamps */ -+ *ts_start_ns = accum->ts_last_dump_ns; -+ *ts_end_ns = dump_time_ns; ++#define MCU_CNTRL_DOORBELL_DISABLE_SHIFT (31) ++#define MCU_CNTRL_DOORBELL_DISABLE_MASK (1 << MCU_CNTRL_DOORBELL_DISABLE_SHIFT) + -+ accum->accumulated = false; -+ accum->ts_last_dump_ns = dump_time_ns; ++#define MCU_STATUS_HALTED (1 << 1) + -+ return 0; -+error: -+ /* An error was only physically possible if the backend was enabled */ -+ WARN_ON(state != ACCUM_STATE_ENABLED); ++#define L2_CONFIG_PBHA_HWU_SHIFT GPU_U(12) ++#define L2_CONFIG_PBHA_HWU_MASK (GPU_U(0xF) << L2_CONFIG_PBHA_HWU_SHIFT) ++#define L2_CONFIG_PBHA_HWU_GET(reg_val) \ ++ (((reg_val)&L2_CONFIG_PBHA_HWU_MASK) >> L2_CONFIG_PBHA_HWU_SHIFT) ++#define L2_CONFIG_PBHA_HWU_SET(reg_val, value) \ ++ (((reg_val) & ~L2_CONFIG_PBHA_HWU_MASK) | \ ++ (((value) << L2_CONFIG_PBHA_HWU_SHIFT) & L2_CONFIG_PBHA_HWU_MASK)) + -+ /* Disable the backend, and transition to the error state */ -+ hctx->iface->dump_disable(accum->backend); -+ spin_lock_irqsave(&hctx->state_lock, flags); ++/* JOB IRQ flags */ ++#define JOB_IRQ_GLOBAL_IF (1u << 31) /* Global interface interrupt received */ + -+ accum->state = ACCUM_STATE_ERROR; ++/* GPU_COMMAND codes */ ++#define GPU_COMMAND_CODE_NOP 0x00 /* No operation, nothing happens */ ++#define GPU_COMMAND_CODE_RESET 0x01 /* Reset the GPU */ ++#define GPU_COMMAND_CODE_TIME 0x03 /* Configure time sources */ ++#define GPU_COMMAND_CODE_FLUSH_CACHES 0x04 /* Flush caches */ ++#define GPU_COMMAND_CODE_SET_PROTECTED_MODE 0x05 /* Places the GPU in protected mode */ ++#define GPU_COMMAND_CODE_FINISH_HALT 0x06 /* Halt CSF */ ++#define GPU_COMMAND_CODE_CLEAR_FAULT 0x07 /* Clear GPU_FAULTSTATUS and GPU_FAULTADDRESS, TODX */ ++#define GPU_COMMAND_CODE_FLUSH_PA_RANGE 0x08 /* Flush the GPU caches for a physical range, TITX */ + -+ spin_unlock_irqrestore(&hctx->state_lock, flags); ++/* GPU_COMMAND_RESET payloads */ + -+ return errcode; -+} ++/* This will leave the state of active jobs UNDEFINED, but will leave the external bus in a defined and idle state. ++ * Power domains will remain powered on. ++ */ ++#define GPU_COMMAND_RESET_PAYLOAD_FAST_RESET 0x00 + -+/** -+ * kbasep_hwcnt_context_disable() - Increment the disable count of the context. -+ * @hctx: Non-NULL pointer to hardware counter context. -+ * @accumulate: True if we should accumulate before disabling, else false. ++/* This will leave the state of active CSs UNDEFINED, but will leave the external bus in a defined and ++ * idle state. + */ -+static void kbasep_hwcnt_context_disable(struct kbase_hwcnt_context *hctx, bool accumulate) -+{ -+ unsigned long flags; ++#define GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET 0x01 + -+ WARN_ON(!hctx); -+ lockdep_assert_held(&hctx->accum_lock); ++/* This reset will leave the state of currently active streams UNDEFINED, will likely lose data, and may leave ++ * the system bus in an inconsistent state. Use only as a last resort when nothing else works. ++ */ ++#define GPU_COMMAND_RESET_PAYLOAD_HARD_RESET 0x02 + -+ if (!kbase_hwcnt_context_disable_atomic(hctx)) { -+ kbasep_hwcnt_accumulator_disable(hctx, accumulate); ++/* GPU_COMMAND_TIME payloads */ ++#define GPU_COMMAND_TIME_DISABLE 0x00 /* Disable cycle counter */ ++#define GPU_COMMAND_TIME_ENABLE 0x01 /* Enable cycle counter */ + -+ spin_lock_irqsave(&hctx->state_lock, flags); ++/* GPU_COMMAND_FLUSH_CACHES payloads bits for L2 caches */ ++#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_NONE 0x000 /* No flush */ ++#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN 0x001 /* CLN only */ ++#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE 0x003 /* CLN + INV */ + -+ /* Atomic disable failed and we're holding the mutex, so current -+ * disable count must be 0. -+ */ -+ WARN_ON(hctx->disable_count != 0); -+ hctx->disable_count++; ++/* GPU_COMMAND_FLUSH_CACHES payloads bits for Load-store caches */ ++#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_NONE 0x000 /* No flush */ ++#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN 0x010 /* CLN only */ ++#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE 0x030 /* CLN + INV */ + -+ spin_unlock_irqrestore(&hctx->state_lock, flags); -+ } -+} ++/* GPU_COMMAND_FLUSH_CACHES payloads bits for Other caches */ ++#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE 0x000 /* No flush */ ++#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_INVALIDATE 0x200 /* INV only */ + -+int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx, -+ struct kbase_hwcnt_accumulator **accum) -+{ -+ int errcode = 0; -+ unsigned long flags; ++/* GPU_COMMAND_FLUSH_PA_RANGE payload bits for flush modes */ ++#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_NONE 0x00 /* No flush */ ++#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN 0x01 /* CLN only */ ++#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_INVALIDATE 0x02 /* INV only */ ++#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE 0x03 /* CLN + INV */ + -+ if (!hctx || !accum) -+ return -EINVAL; ++/* GPU_COMMAND_FLUSH_PA_RANGE payload bits for which caches should be the target of the command */ ++#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_OTHER_CACHE 0x10 /* Other caches */ ++#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE 0x20 /* Load-store caches */ ++#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE 0x40 /* L2 caches */ + -+ mutex_lock(&hctx->accum_lock); -+ spin_lock_irqsave(&hctx->state_lock, flags); ++/* GPU_COMMAND command + payload */ ++#define GPU_COMMAND_CODE_PAYLOAD(opcode, payload) \ ++ ((__u32)opcode | ((__u32)payload << 8)) + -+ if (!hctx->accum_inited) -+ /* Set accum initing now to prevent concurrent init */ -+ hctx->accum_inited = true; -+ else -+ /* Already have an accum, or already being inited */ -+ errcode = -EBUSY; ++/* Final GPU_COMMAND form */ ++/* No operation, nothing happens */ ++#define GPU_COMMAND_NOP \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_NOP, 0) + -+ spin_unlock_irqrestore(&hctx->state_lock, flags); -+ mutex_unlock(&hctx->accum_lock); ++/* Stop all external bus interfaces, and then reset the entire GPU. */ ++#define GPU_COMMAND_SOFT_RESET \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET) + -+ if (errcode) -+ return errcode; ++/* Immediately reset the entire GPU. */ ++#define GPU_COMMAND_HARD_RESET \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_HARD_RESET) + -+ errcode = kbasep_hwcnt_accumulator_init(hctx); ++/* Starts the cycle counter, and system timestamp propagation */ ++#define GPU_COMMAND_CYCLE_COUNT_START \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_ENABLE) + -+ if (errcode) { -+ mutex_lock(&hctx->accum_lock); -+ spin_lock_irqsave(&hctx->state_lock, flags); ++/* Stops the cycle counter, and system timestamp propagation */ ++#define GPU_COMMAND_CYCLE_COUNT_STOP \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_DISABLE) + -+ hctx->accum_inited = false; ++/* Clean and invalidate L2 cache (Equivalent to FLUSH_PT) */ ++#define GPU_COMMAND_CACHE_CLN_INV_L2 \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ ++ (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \ ++ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_NONE | \ ++ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE)) + -+ spin_unlock_irqrestore(&hctx->state_lock, flags); -+ mutex_unlock(&hctx->accum_lock); ++/* Clean and invalidate L2 and LSC caches (Equivalent to FLUSH_MEM) */ ++#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ ++ (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \ ++ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \ ++ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE)) + -+ return errcode; -+ } ++/* Clean and invalidate L2, LSC, and Other caches */ ++#define GPU_COMMAND_CACHE_CLN_INV_FULL \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ ++ (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \ ++ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \ ++ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_INVALIDATE)) + -+ spin_lock_irqsave(&hctx->state_lock, flags); ++/* Clean and invalidate only LSC cache */ ++#define GPU_COMMAND_CACHE_CLN_INV_LSC \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \ ++ (GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_NONE | \ ++ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \ ++ GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE)) + -+ WARN_ON(hctx->disable_count == 0); -+ WARN_ON(hctx->accum.enable_map_any_enabled); ++/* Clean and invalidate physical range L2 cache (equivalent to FLUSH_PT) */ ++#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2 \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \ ++ (GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \ ++ GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE)) + -+ /* Decrement the disable count to allow the accumulator to be accessible -+ * now that it's fully constructed. -+ */ -+ hctx->disable_count--; ++/* Clean and invalidate physical range L2 and LSC cache (equivalent to FLUSH_MEM) */ ++#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \ ++ (GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \ ++ GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE | \ ++ GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE)) + -+ /* -+ * Make sure the accumulator is initialised to the correct state. -+ * Regardless of initial state, counters don't need to be enabled via -+ * the backend, as the initial enable map has no enabled counters. -+ */ -+ hctx->accum.state = (hctx->disable_count == 0) ? ACCUM_STATE_ENABLED : ACCUM_STATE_DISABLED; ++/* Clean and invalidate physical range L2, LSC and Other caches */ ++#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_FULL \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \ ++ (GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \ ++ GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_OTHER_CACHE | \ ++ GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE | \ ++ GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE)) + -+ spin_unlock_irqrestore(&hctx->state_lock, flags); ++/* Merge cache flush commands */ ++#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) ((cmd1) | (cmd2)) + -+ *accum = &hctx->accum; ++/* Places the GPU in protected mode */ ++#define GPU_COMMAND_SET_PROTECTED_MODE \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_SET_PROTECTED_MODE, 0) + -+ return 0; -+} ++/* Halt CSF */ ++#define GPU_COMMAND_FINISH_HALT \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FINISH_HALT, 0) + -+void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum) -+{ -+ unsigned long flags; -+ struct kbase_hwcnt_context *hctx; ++/* Clear GPU faults */ ++#define GPU_COMMAND_CLEAR_FAULT \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_CLEAR_FAULT, 0) + -+ if (!accum) -+ return; ++/* End Command Values */ + -+ hctx = container_of(accum, struct kbase_hwcnt_context, accum); ++/* GPU_FAULTSTATUS register */ ++#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 ++#define GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFFul) ++#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ ++ (((reg_val)&GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) \ ++ >> GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) ++#define GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 ++#define GPU_FAULTSTATUS_ACCESS_TYPE_MASK \ ++ (0x3ul << GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT) + -+ mutex_lock(&hctx->accum_lock); ++#define GPU_FAULTSTATUS_ADDR_VALID_SHIFT 10 ++#define GPU_FAULTSTATUS_ADDR_VALID_FLAG \ ++ (1ul << GPU_FAULTSTATUS_ADDR_VALID_SHIFT) + -+ /* Double release is a programming error */ -+ WARN_ON(!hctx->accum_inited); ++#define GPU_FAULTSTATUS_JASID_VALID_SHIFT 11 ++#define GPU_FAULTSTATUS_JASID_VALID_FLAG \ ++ (1ul << GPU_FAULTSTATUS_JASID_VALID_SHIFT) + -+ /* Disable the context to ensure the accumulator is inaccesible while -+ * we're destroying it. This performs the corresponding disable count -+ * increment to the decrement done during acquisition. -+ */ -+ kbasep_hwcnt_context_disable(hctx, false); ++#define GPU_FAULTSTATUS_JASID_SHIFT 12 ++#define GPU_FAULTSTATUS_JASID_MASK (0xF << GPU_FAULTSTATUS_JASID_SHIFT) ++#define GPU_FAULTSTATUS_JASID_GET(reg_val) \ ++ (((reg_val)&GPU_FAULTSTATUS_JASID_MASK) >> GPU_FAULTSTATUS_JASID_SHIFT) ++#define GPU_FAULTSTATUS_JASID_SET(reg_val, value) \ ++ (((reg_val) & ~GPU_FAULTSTATUS_JASID_MASK) | \ ++ (((value) << GPU_FAULTSTATUS_JASID_SHIFT) & GPU_FAULTSTATUS_JASID_MASK)) + -+ mutex_unlock(&hctx->accum_lock); ++#define GPU_FAULTSTATUS_SOURCE_ID_SHIFT 16 ++#define GPU_FAULTSTATUS_SOURCE_ID_MASK \ ++ (0xFFFFul << GPU_FAULTSTATUS_SOURCE_ID_SHIFT) ++/* End GPU_FAULTSTATUS register */ + -+ kbasep_hwcnt_accumulator_term(hctx); ++/* GPU_FAULTSTATUS_ACCESS_TYPE values */ ++#define GPU_FAULTSTATUS_ACCESS_TYPE_ATOMIC 0x0 ++#define GPU_FAULTSTATUS_ACCESS_TYPE_EXECUTE 0x1 ++#define GPU_FAULTSTATUS_ACCESS_TYPE_READ 0x2 ++#define GPU_FAULTSTATUS_ACCESS_TYPE_WRITE 0x3 ++/* End of GPU_FAULTSTATUS_ACCESS_TYPE values */ + -+ mutex_lock(&hctx->accum_lock); -+ spin_lock_irqsave(&hctx->state_lock, flags); ++/* Implementation-dependent exception codes used to indicate CSG ++ * and CS errors that are not specified in the specs. ++ */ ++#define GPU_EXCEPTION_TYPE_SW_FAULT_0 ((__u8)0x70) ++#define GPU_EXCEPTION_TYPE_SW_FAULT_1 ((__u8)0x71) ++#define GPU_EXCEPTION_TYPE_SW_FAULT_2 ((__u8)0x72) + -+ hctx->accum_inited = false; ++/* GPU_FAULTSTATUS_EXCEPTION_TYPE values */ ++#define GPU_FAULTSTATUS_EXCEPTION_TYPE_OK 0x00 ++#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT 0x80 ++#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_SHAREABILITY_FAULT 0x88 ++#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SYSTEM_SHAREABILITY_FAULT 0x89 ++#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT 0x8A ++/* End of GPU_FAULTSTATUS_EXCEPTION_TYPE values */ + -+ spin_unlock_irqrestore(&hctx->state_lock, flags); -+ mutex_unlock(&hctx->accum_lock); -+} ++#define GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT GPU_U(10) ++#define GPU_FAULTSTATUS_ADDRESS_VALID_MASK (GPU_U(0x1) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) ++#define GPU_FAULTSTATUS_ADDRESS_VALID_GET(reg_val) \ ++ (((reg_val)&GPU_FAULTSTATUS_ADDRESS_VALID_MASK) >> GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) ++#define GPU_FAULTSTATUS_ADDRESS_VALID_SET(reg_val, value) \ ++ (((reg_val) & ~GPU_FAULTSTATUS_ADDRESS_VALID_MASK) | \ ++ (((value) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) & GPU_FAULTSTATUS_ADDRESS_VALID_MASK)) + -+void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx) -+{ -+ if (WARN_ON(!hctx)) -+ return; ++/* IRQ flags */ ++#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ ++#define GPU_PROTECTED_FAULT (1 << 1) /* A GPU fault has occurred in protected mode */ ++#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ ++#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ ++#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ ++#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ ++#define DOORBELL_MIRROR (1 << 18) /* Mirrors the doorbell interrupt line to the CPU */ ++#define MCU_STATUS_GPU_IRQ (1 << 19) /* MCU requires attention */ ++#define FLUSH_PA_RANGE_COMPLETED \ ++ (1 << 20) /* Set when a physical range cache clean operation has completed. */ + -+ /* Try and atomically disable first, so we can avoid locking the mutex -+ * if we don't need to. -+ */ -+ if (kbase_hwcnt_context_disable_atomic(hctx)) -+ return; ++/* ++ * In Debug build, ++ * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and unmask interupts sources of GPU_IRQ ++ * by writing it onto GPU_IRQ_CLEAR/MASK registers. ++ * ++ * In Release build, ++ * GPU_IRQ_REG_COMMON is used. ++ * ++ * Note: ++ * CLEAN_CACHES_COMPLETED - Used separately for cache operation. ++ * DOORBELL_MIRROR - Do not have it included for GPU_IRQ_REG_COMMON ++ * as it can't be cleared by GPU_IRQ_CLEAR, thus interrupt storm might happen ++ */ ++#define GPU_IRQ_REG_COMMON (GPU_FAULT | GPU_PROTECTED_FAULT | RESET_COMPLETED \ ++ | POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ) + -+ mutex_lock(&hctx->accum_lock); ++/* GPU_FEATURES register */ ++#define GPU_FEATURES_RAY_TRACING_SHIFT GPU_U(2) ++#define GPU_FEATURES_RAY_TRACING_MASK (GPU_U(0x1) << GPU_FEATURES_RAY_TRACING_SHIFT) ++#define GPU_FEATURES_RAY_TRACING_GET(reg_val) \ ++ (((reg_val)&GPU_FEATURES_RAY_TRACING_MASK) >> GPU_FEATURES_RAY_TRACING_SHIFT) ++/* End of GPU_FEATURES register */ + -+ kbasep_hwcnt_context_disable(hctx, true); ++#endif /* _KBASE_GPU_REGMAP_CSF_H_ */ +diff --git a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h +new file mode 100644 +index 000000000..f86f493c7 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h +@@ -0,0 +1,276 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++#ifndef _KBASE_GPU_REGMAP_JM_H_ ++#define _KBASE_GPU_REGMAP_JM_H_ + -+ mutex_unlock(&hctx->accum_lock); -+} ++#if MALI_USE_CSF && defined(__KERNEL__) ++#error "Cannot be compiled with CSF" ++#endif + -+bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx) -+{ -+ unsigned long flags; -+ bool atomic_disabled = false; ++/* Set to implementation defined, outer caching */ ++#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull ++/* Set to write back memory, outer caching */ ++#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull ++/* Set to inner non-cacheable, outer-non-cacheable ++ * Setting defined by the alloc bits is ignored, but set to a valid encoding: ++ * - no-alloc on read ++ * - no alloc on write ++ */ ++#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull + -+ if (WARN_ON(!hctx)) -+ return false; ++/* Symbols for default MEMATTR to use ++ * Default is - HW implementation defined caching ++ */ ++#define AS_MEMATTR_INDEX_DEFAULT 0 ++#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 + -+ spin_lock_irqsave(&hctx->state_lock, flags); ++/* HW implementation defined caching */ ++#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 ++/* Force cache on */ ++#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 ++/* Write-alloc */ ++#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 ++/* Outer coherent, inner implementation defined policy */ ++#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 ++/* Outer coherent, write alloc inner */ ++#define AS_MEMATTR_INDEX_OUTER_WA 4 ++/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ ++#define AS_MEMATTR_INDEX_NON_CACHEABLE 5 + -+ if (!WARN_ON(hctx->disable_count == SIZE_MAX)) { -+ /* -+ * If disable count is non-zero, we can just bump the disable -+ * count. -+ * -+ * Otherwise, we can't disable in an atomic context. -+ */ -+ if (hctx->disable_count != 0) { -+ hctx->disable_count++; -+ atomic_disabled = true; -+ } -+ } ++/* GPU control registers */ + -+ spin_unlock_irqrestore(&hctx->state_lock, flags); ++#define CORE_FEATURES 0x008 /* (RO) Shader Core Features */ ++#define JS_PRESENT 0x01C /* (RO) Job slots present */ + -+ return atomic_disabled; -+} ++#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory ++ * region base address, low word ++ */ ++#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory ++ * region base address, high word ++ */ ++#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter ++ * configuration ++ */ ++#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable ++ * flags for Job Manager ++ */ ++#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable ++ * flags for shader cores ++ */ ++#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable ++ * flags for tiler ++ */ ++#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable ++ * flags for MMU/L2 cache ++ */ + -+void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx) -+{ -+ unsigned long flags; ++#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */ ++#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */ ++#define JS2_FEATURES 0x0C8 /* (RO) Features of job slot 2 */ ++#define JS3_FEATURES 0x0CC /* (RO) Features of job slot 3 */ ++#define JS4_FEATURES 0x0D0 /* (RO) Features of job slot 4 */ ++#define JS5_FEATURES 0x0D4 /* (RO) Features of job slot 5 */ ++#define JS6_FEATURES 0x0D8 /* (RO) Features of job slot 6 */ ++#define JS7_FEATURES 0x0DC /* (RO) Features of job slot 7 */ ++#define JS8_FEATURES 0x0E0 /* (RO) Features of job slot 8 */ ++#define JS9_FEATURES 0x0E4 /* (RO) Features of job slot 9 */ ++#define JS10_FEATURES 0x0E8 /* (RO) Features of job slot 10 */ ++#define JS11_FEATURES 0x0EC /* (RO) Features of job slot 11 */ ++#define JS12_FEATURES 0x0F0 /* (RO) Features of job slot 12 */ ++#define JS13_FEATURES 0x0F4 /* (RO) Features of job slot 13 */ ++#define JS14_FEATURES 0x0F8 /* (RO) Features of job slot 14 */ ++#define JS15_FEATURES 0x0FC /* (RO) Features of job slot 15 */ + -+ if (WARN_ON(!hctx)) -+ return; ++#define JS_FEATURES_REG(n) GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2)) + -+ spin_lock_irqsave(&hctx->state_lock, flags); ++#define JM_CONFIG 0xF00 /* (RW) Job manager configuration (implementation-specific) */ + -+ if (!WARN_ON(hctx->disable_count == 0)) { -+ if (hctx->disable_count == 1) -+ kbasep_hwcnt_accumulator_enable(hctx); ++/* Job control registers */ + -+ hctx->disable_count--; -+ } ++#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */ ++#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */ + -+ spin_unlock_irqrestore(&hctx->state_lock, flags); -+} ++#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */ ++#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */ ++#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */ ++#define JOB_SLOT4 0xA00 /* Configuration registers for job slot 4 */ ++#define JOB_SLOT5 0xA80 /* Configuration registers for job slot 5 */ ++#define JOB_SLOT6 0xB00 /* Configuration registers for job slot 6 */ ++#define JOB_SLOT7 0xB80 /* Configuration registers for job slot 7 */ ++#define JOB_SLOT8 0xC00 /* Configuration registers for job slot 8 */ ++#define JOB_SLOT9 0xC80 /* Configuration registers for job slot 9 */ ++#define JOB_SLOT10 0xD00 /* Configuration registers for job slot 10 */ ++#define JOB_SLOT11 0xD80 /* Configuration registers for job slot 11 */ ++#define JOB_SLOT12 0xE00 /* Configuration registers for job slot 12 */ ++#define JOB_SLOT13 0xE80 /* Configuration registers for job slot 13 */ ++#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */ ++#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */ + -+const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(struct kbase_hwcnt_context *hctx) -+{ -+ if (!hctx) -+ return NULL; ++#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job slot n*/ + -+ return hctx->iface->metadata(hctx->iface->info); -+} ++#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ ++#define JS_STATUS 0x24 /* (RO) Status register for job slot n */ + -+bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, struct work_struct *work) -+{ -+ if (WARN_ON(!hctx) || WARN_ON(!work)) -+ return false; ++#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for job slot n */ + -+ return queue_work(hctx->wq, work); -+} ++#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ + -+int kbase_hwcnt_accumulator_set_counters(struct kbase_hwcnt_accumulator *accum, -+ const struct kbase_hwcnt_enable_map *new_map, -+ u64 *ts_start_ns, u64 *ts_end_ns, -+ struct kbase_hwcnt_dump_buffer *dump_buf) -+{ -+ int errcode; -+ struct kbase_hwcnt_context *hctx; ++/* No JM-specific MMU control registers */ ++/* No JM-specific MMU address space control registers */ + -+ if (!accum || !new_map || !ts_start_ns || !ts_end_ns) -+ return -EINVAL; ++/* JS_COMMAND register commands */ ++#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */ ++#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */ ++#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */ ++#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */ ++#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */ ++#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */ ++#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ ++#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ + -+ hctx = container_of(accum, struct kbase_hwcnt_context, accum); ++#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */ + -+ if ((new_map->metadata != hctx->accum.metadata) || -+ (dump_buf && (dump_buf->metadata != hctx->accum.metadata))) -+ return -EINVAL; ++/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */ ++#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0) ++#define JS_CONFIG_START_FLUSH_CLEAN (1u << 8) ++#define JS_CONFIG_START_FLUSH_INV_SHADER_OTHER (2u << 8) ++#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8) ++#define JS_CONFIG_START_MMU (1u << 10) ++#define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11) ++#define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION ++#define JS_CONFIG_END_FLUSH_CLEAN (1u << 12) ++#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) ++#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14) ++#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15) ++#define JS_CONFIG_THREAD_PRI(n) ((n) << 16) + -+ mutex_lock(&hctx->accum_lock); ++/* JS_XAFFINITY register values */ ++#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0) ++#define JS_XAFFINITY_TILER_ENABLE (1u << 8) ++#define JS_XAFFINITY_CACHE_ENABLE (1u << 16) + -+ errcode = kbasep_hwcnt_accumulator_dump(hctx, ts_start_ns, ts_end_ns, dump_buf, new_map); ++/* JS_STATUS register values */ + -+ mutex_unlock(&hctx->accum_lock); ++/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h. ++ * The values are separated to avoid dependency of userspace and kernel code. ++ */ + -+ return errcode; -+} ++/* Group of values representing the job status instead of a particular fault */ ++#define JS_STATUS_NO_EXCEPTION_BASE 0x00 ++#define JS_STATUS_INTERRUPTED (JS_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */ ++#define JS_STATUS_STOPPED (JS_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */ ++#define JS_STATUS_TERMINATED (JS_STATUS_NO_EXCEPTION_BASE + 0x04) /* 0x04 means TERMINATED */ + -+int kbase_hwcnt_accumulator_dump(struct kbase_hwcnt_accumulator *accum, u64 *ts_start_ns, -+ u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf) -+{ -+ int errcode; -+ struct kbase_hwcnt_context *hctx; ++/* General fault values */ ++#define JS_STATUS_FAULT_BASE 0x40 ++#define JS_STATUS_CONFIG_FAULT (JS_STATUS_FAULT_BASE) /* 0x40 means CONFIG FAULT */ ++#define JS_STATUS_POWER_FAULT (JS_STATUS_FAULT_BASE + 0x01) /* 0x41 means POWER FAULT */ ++#define JS_STATUS_READ_FAULT (JS_STATUS_FAULT_BASE + 0x02) /* 0x42 means READ FAULT */ ++#define JS_STATUS_WRITE_FAULT (JS_STATUS_FAULT_BASE + 0x03) /* 0x43 means WRITE FAULT */ ++#define JS_STATUS_AFFINITY_FAULT (JS_STATUS_FAULT_BASE + 0x04) /* 0x44 means AFFINITY FAULT */ ++#define JS_STATUS_BUS_FAULT (JS_STATUS_FAULT_BASE + 0x08) /* 0x48 means BUS FAULT */ + -+ if (!accum || !ts_start_ns || !ts_end_ns) -+ return -EINVAL; ++/* Instruction or data faults */ ++#define JS_STATUS_INSTRUCTION_FAULT_BASE 0x50 ++#define JS_STATUS_INSTR_INVALID_PC (JS_STATUS_INSTRUCTION_FAULT_BASE) /* 0x50 means INSTR INVALID PC */ ++#define JS_STATUS_INSTR_INVALID_ENC (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01) /* 0x51 means INSTR INVALID ENC */ ++#define JS_STATUS_INSTR_TYPE_MISMATCH (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02) /* 0x52 means INSTR TYPE MISMATCH */ ++#define JS_STATUS_INSTR_OPERAND_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03) /* 0x53 means INSTR OPERAND FAULT */ ++#define JS_STATUS_INSTR_TLS_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04) /* 0x54 means INSTR TLS FAULT */ ++#define JS_STATUS_INSTR_BARRIER_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05) /* 0x55 means INSTR BARRIER FAULT */ ++#define JS_STATUS_INSTR_ALIGN_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06) /* 0x56 means INSTR ALIGN FAULT */ ++/* NOTE: No fault with 0x57 code defined in spec. */ ++#define JS_STATUS_DATA_INVALID_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08) /* 0x58 means DATA INVALID FAULT */ ++#define JS_STATUS_TILE_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09) /* 0x59 means TILE RANGE FAULT */ ++#define JS_STATUS_ADDRESS_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A) /* 0x5A means ADDRESS RANGE FAULT */ + -+ hctx = container_of(accum, struct kbase_hwcnt_context, accum); ++/* Other faults */ ++#define JS_STATUS_MEMORY_FAULT_BASE 0x60 ++#define JS_STATUS_OUT_OF_MEMORY (JS_STATUS_MEMORY_FAULT_BASE) /* 0x60 means OUT OF MEMORY */ ++#define JS_STATUS_UNKNOWN 0x7F /* 0x7F means UNKNOWN */ + -+ if (dump_buf && (dump_buf->metadata != hctx->accum.metadata)) -+ return -EINVAL; ++/* JS_FEATURES register */ ++#define JS_FEATURE_NULL_JOB (1u << 1) ++#define JS_FEATURE_SET_VALUE_JOB (1u << 2) ++#define JS_FEATURE_CACHE_FLUSH_JOB (1u << 3) ++#define JS_FEATURE_COMPUTE_JOB (1u << 4) ++#define JS_FEATURE_VERTEX_JOB (1u << 5) ++#define JS_FEATURE_GEOMETRY_JOB (1u << 6) ++#define JS_FEATURE_TILER_JOB (1u << 7) ++#define JS_FEATURE_FUSED_JOB (1u << 8) ++#define JS_FEATURE_FRAGMENT_JOB (1u << 9) + -+ mutex_lock(&hctx->accum_lock); ++/* JM_CONFIG register */ ++#define JM_TIMESTAMP_OVERRIDE (1ul << 0) ++#define JM_CLOCK_GATE_OVERRIDE (1ul << 1) ++#define JM_JOB_THROTTLE_ENABLE (1ul << 2) ++#define JM_JOB_THROTTLE_LIMIT_SHIFT (3) ++#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F) ++#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2) + -+ errcode = kbasep_hwcnt_accumulator_dump(hctx, ts_start_ns, ts_end_ns, dump_buf, NULL); ++/* GPU_COMMAND values */ ++#define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */ ++#define GPU_COMMAND_SOFT_RESET 0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */ ++#define GPU_COMMAND_HARD_RESET 0x02 /* Immediately reset the entire GPU. */ ++#define GPU_COMMAND_PRFCNT_CLEAR 0x03 /* Clear all performance counters, setting them all to zero. */ ++#define GPU_COMMAND_PRFCNT_SAMPLE 0x04 /* Sample all performance counters, writing them out to memory */ ++#define GPU_COMMAND_CYCLE_COUNT_START 0x05 /* Starts the cycle counter, and system timestamp propagation */ ++#define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */ ++#define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */ ++#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ ++#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ + -+ mutex_unlock(&hctx->accum_lock); ++/* GPU_COMMAND cache flush alias to CSF command payload */ ++#define GPU_COMMAND_CACHE_CLN_INV_L2 GPU_COMMAND_CLEAN_INV_CACHES ++#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC GPU_COMMAND_CLEAN_INV_CACHES ++#define GPU_COMMAND_CACHE_CLN_INV_FULL GPU_COMMAND_CLEAN_INV_CACHES ++#define GPU_COMMAND_CACHE_CLN_INV_LSC GPU_COMMAND_CLEAN_INV_CACHES + -+ return errcode; -+} ++/* Merge cache flush commands */ ++#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) \ ++ ((cmd1) > (cmd2) ? (cmd1) : (cmd2)) + -+u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum) -+{ -+ struct kbase_hwcnt_context *hctx; ++/* IRQ flags */ ++#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ ++#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ ++#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ ++#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ ++#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ ++#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ ++#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ ++#define FLUSH_PA_RANGE_COMPLETED \ ++ (1 << 20) /* Set when a physical range cache clean operation has completed. */ + -+ if (WARN_ON(!accum)) -+ return 0; ++/* ++ * In Debug build, ++ * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and enable interupts sources of GPU_IRQ ++ * by writing it onto GPU_IRQ_CLEAR/MASK registers. ++ * ++ * In Release build, ++ * GPU_IRQ_REG_COMMON is used. ++ * ++ * Note: ++ * CLEAN_CACHES_COMPLETED - Used separately for cache operation. ++ */ ++#define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ ++ | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) + -+ hctx = container_of(accum, struct kbase_hwcnt_context, accum); -+ return hctx->iface->timestamp_ns(accum->backend); -+} -diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_accumulator.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_accumulator.h ++#endif /* _KBASE_GPU_REGMAP_JM_H_ */ +diff --git a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.c new file mode 100644 -index 000000000..069e02068 +index 000000000..8a84ef541 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_accumulator.h -@@ -0,0 +1,139 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.c +@@ -0,0 +1,41 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -188573,134 +190860,36 @@ index 000000000..069e02068 + * + */ + -+/* -+ * Hardware counter accumulator API. -+ */ -+ -+#ifndef _KBASE_HWCNT_ACCUMULATOR_H_ -+#define _KBASE_HWCNT_ACCUMULATOR_H_ -+ -+#include -+ -+struct kbase_hwcnt_context; -+struct kbase_hwcnt_accumulator; -+struct kbase_hwcnt_enable_map; -+struct kbase_hwcnt_dump_buffer; -+ -+/** -+ * kbase_hwcnt_accumulator_acquire() - Acquire the hardware counter accumulator -+ * for a hardware counter context. -+ * @hctx: Non-NULL pointer to a hardware counter context. -+ * @accum: Non-NULL pointer to where the pointer to the created accumulator -+ * will be stored on success. -+ * -+ * There can exist at most one instance of the hardware counter accumulator per -+ * context at a time. -+ * -+ * If multiple clients need access to the hardware counters at the same time, -+ * then an abstraction built on top of the single instance to the hardware -+ * counter accumulator is required. -+ * -+ * No counters will be enabled with the returned accumulator. A subsequent call -+ * to kbase_hwcnt_accumulator_set_counters must be used to turn them on. -+ * -+ * There are four components to a hardware counter dump: -+ * - A set of enabled counters -+ * - A start time -+ * - An end time -+ * - A dump buffer containing the accumulated counter values for all enabled -+ * counters between the start and end times. -+ * -+ * For each dump, it is guaranteed that all enabled counters were active for the -+ * entirety of the period between the start and end times. -+ * -+ * It is also guaranteed that the start time of dump "n" is always equal to the -+ * end time of dump "n - 1". -+ * -+ * For all dumps, the values of any counters that were not enabled is undefined. -+ * -+ * Return: 0 on success or error code. -+ */ -+int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx, -+ struct kbase_hwcnt_accumulator **accum); -+ -+/** -+ * kbase_hwcnt_accumulator_release() - Release a hardware counter accumulator. -+ * @accum: Non-NULL pointer to the hardware counter accumulator. -+ * -+ * The accumulator must be released before the context the accumulator was -+ * created from is terminated. -+ */ -+void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum); -+ -+/** -+ * kbase_hwcnt_accumulator_set_counters() - Perform a dump of the currently -+ * enabled counters, and enable a new -+ * set of counters that will be used -+ * for subsequent dumps. -+ * @accum: Non-NULL pointer to the hardware counter accumulator. -+ * @new_map: Non-NULL pointer to the new counter enable map. Must have the -+ * same metadata as the accumulator. -+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will -+ * be written out to on success. -+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will -+ * be written out to on success. -+ * @dump_buf: Pointer to the buffer where the dump will be written out to on -+ * success. If non-NULL, must have the same metadata as the -+ * accumulator. If NULL, the dump will be discarded. -+ * -+ * If this function fails for some unexpected reason (i.e. anything other than -+ * invalid args), then the accumulator will be put into the error state until -+ * the parent context is next disabled. -+ * -+ * Return: 0 on success or error code. -+ */ -+int kbase_hwcnt_accumulator_set_counters(struct kbase_hwcnt_accumulator *accum, -+ const struct kbase_hwcnt_enable_map *new_map, -+ u64 *ts_start_ns, u64 *ts_end_ns, -+ struct kbase_hwcnt_dump_buffer *dump_buf); -+ -+/** -+ * kbase_hwcnt_accumulator_dump() - Perform a dump of the currently enabled -+ * counters. -+ * @accum: Non-NULL pointer to the hardware counter accumulator. -+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will -+ * be written out to on success. -+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will -+ * be written out to on success. -+ * @dump_buf: Pointer to the buffer where the dump will be written out to on -+ * success. If non-NULL, must have the same metadata as the -+ * accumulator. If NULL, the dump will be discarded. -+ * -+ * If this function fails for some unexpected reason (i.e. anything other than -+ * invalid args), then the accumulator will be put into the error state until -+ * the parent context is next disabled. -+ * -+ * Return: 0 on success or error code. -+ */ -+int kbase_hwcnt_accumulator_dump(struct kbase_hwcnt_accumulator *accum, u64 *ts_start_ns, -+ u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf); -+ -+/** -+ * kbase_hwcnt_accumulator_timestamp_ns() - Get the current accumulator backend -+ * timestamp. -+ * @accum: Non-NULL pointer to the hardware counter accumulator. -+ * -+ * Return: Accumulator backend timestamp in nanoseconds. -+ */ -+u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum); ++#include ++#include ++#include + -+#endif /* _KBASE_HWCNT_ACCUMULATOR_H_ */ -diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_context.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_context.h ++const char *kbase_gpu_access_type_name(u32 fault_status) ++{ ++ switch (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault_status)) { ++ case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: ++ return "ATOMIC"; ++ case AS_FAULTSTATUS_ACCESS_TYPE_READ: ++ return "READ"; ++ case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: ++ return "WRITE"; ++ case AS_FAULTSTATUS_ACCESS_TYPE_EX: ++ return "EXECUTE"; ++ default: ++ WARN_ON(1); ++ return NULL; ++ } ++} +diff --git a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_fault.h b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_fault.h new file mode 100644 -index 000000000..89732a908 +index 000000000..6a937a5ed --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_context.h -@@ -0,0 +1,148 @@ ++++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_fault.h +@@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -188718,143 +190907,43 @@ index 000000000..89732a908 + * + */ + -+/* -+ * Hardware counter context API. -+ */ -+ -+#ifndef _KBASE_HWCNT_CONTEXT_H_ -+#define _KBASE_HWCNT_CONTEXT_H_ -+ -+#include -+#include -+ -+struct kbase_hwcnt_backend_interface; -+struct kbase_hwcnt_context; -+ -+/** -+ * kbase_hwcnt_context_init() - Initialise a hardware counter context. -+ * @iface: Non-NULL pointer to a hardware counter backend interface. -+ * @out_hctx: Non-NULL pointer to where the pointer to the created context will -+ * be stored on success. -+ * -+ * On creation, the disable count of the context will be 0. -+ * A hardware counter accumulator can be acquired using a created context. -+ * -+ * Return: 0 on success, else error code. -+ */ -+int kbase_hwcnt_context_init(const struct kbase_hwcnt_backend_interface *iface, -+ struct kbase_hwcnt_context **out_hctx); -+ -+/** -+ * kbase_hwcnt_context_term() - Terminate a hardware counter context. -+ * @hctx: Pointer to context to be terminated. -+ */ -+void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx); -+ -+/** -+ * kbase_hwcnt_context_metadata() - Get the hardware counter metadata used by -+ * the context, so related counter data -+ * structures can be created. -+ * @hctx: Non-NULL pointer to the hardware counter context. -+ * -+ * Return: Non-NULL pointer to metadata, or NULL on error. -+ */ -+const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(struct kbase_hwcnt_context *hctx); -+ -+/** -+ * kbase_hwcnt_context_disable() - Increment the disable count of the context. -+ * @hctx: Non-NULL pointer to the hardware counter context. -+ * -+ * If a call to this function increments the disable count from 0 to 1, and -+ * an accumulator has been acquired, then a counter dump will be performed -+ * before counters are disabled via the backend interface. -+ * -+ * Subsequent dumps via the accumulator while counters are disabled will first -+ * return the accumulated dump, then will return dumps with zeroed counters. -+ * -+ * After this function call returns, it is guaranteed that counters will not be -+ * enabled via the backend interface. -+ */ -+void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx); -+ -+/** -+ * kbase_hwcnt_context_disable_atomic() - Increment the disable count of the -+ * context if possible in an atomic -+ * context. -+ * @hctx: Non-NULL pointer to the hardware counter context. -+ * -+ * This function will only succeed if hardware counters are effectively already -+ * disabled, i.e. there is no accumulator, the disable count is already -+ * non-zero, or the accumulator has no counters set. -+ * -+ * After this function call returns true, it is guaranteed that counters will -+ * not be enabled via the backend interface. -+ * -+ * Return: True if the disable count was incremented, else False. -+ */ -+bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx); ++#ifndef _KBASE_GPU_FAULT_H_ ++#define _KBASE_GPU_FAULT_H_ + +/** -+ * kbase_hwcnt_context_enable() - Decrement the disable count of the context. -+ * @hctx: Non-NULL pointer to the hardware counter context. -+ * -+ * If a call to this function decrements the disable count from 1 to 0, and -+ * an accumulator has been acquired, then counters will be re-enabled via the -+ * backend interface. ++ * kbase_gpu_exception_name() - Returns associated string of the exception code + * -+ * If an accumulator has been acquired and enabling counters fails for some -+ * reason, the accumulator will be placed into an error state. ++ * @exception_code: exception code + * -+ * It is only valid to call this function one time for each prior returned call -+ * to kbase_hwcnt_context_disable. ++ * This function is called by error handlers when GPU reports an error. + * -+ * The spinlock documented in the backend interface that was passed in to -+ * kbase_hwcnt_context_init() must be held before calling this function. ++ * Return: Error string associated with the exception code + */ -+void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx); ++const char *kbase_gpu_exception_name(u32 exception_code); + +/** -+ * kbase_hwcnt_context_queue_work() - Queue hardware counter related async -+ * work on a workqueue specialized for -+ * hardware counters. -+ * @hctx: Non-NULL pointer to the hardware counter context. -+ * @work: Non-NULL pointer to work to queue. -+ * -+ * Return: false if work was already on a queue, true otherwise. -+ * -+ * Performance counter related work is high priority, short running, and -+ * generally CPU locality is unimportant. There is no standard workqueue that -+ * can service this flavor of work. -+ * -+ * Rather than have each user of counters define their own workqueue, we have -+ * a centralized one in here that anybody using this hardware counter API -+ * should use. ++ * kbase_gpu_access_type_name - Convert MMU_AS_CONTROL.FAULTSTATUS.ACCESS_TYPE ++ * into string. ++ * @fault_status: value of FAULTSTATUS register. + * -+ * Before the context is destroyed, all work submitted must have been completed. -+ * Given that the work enqueued via this function is likely to be hardware -+ * counter related and will therefore use the context object, this is likely -+ * to be behavior that will occur naturally. ++ * After MMU fault, this function can be used to get readable information about ++ * access_type of the MMU fault. + * -+ * Historical note: prior to this centralized workqueue, the system_highpri_wq -+ * was used. This was generally fine, except when a particularly long running, -+ * higher priority thread ended up scheduled on the enqueuing CPU core. Given -+ * that hardware counters requires tight integration with power management, -+ * this meant progress through the power management states could be stalled -+ * for however long that higher priority thread took. ++ * Return: String of the access type. + */ -+bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, struct work_struct *work); ++const char *kbase_gpu_access_type_name(u32 fault_status); + -+#endif /* _KBASE_HWCNT_CONTEXT_H_ */ -diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c ++#endif /* _KBASE_GPU_FAULT_H_ */ +diff --git a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h new file mode 100644 -index 000000000..74916dab0 +index 000000000..6cef2bdd1 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c -@@ -0,0 +1,738 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h +@@ -0,0 +1,637 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -188872,729 +190961,671 @@ index 000000000..74916dab0 + * + */ + -+#include "hwcnt/mali_kbase_hwcnt_gpu.h" -+#include "hwcnt/mali_kbase_hwcnt_types.h" ++#ifndef _KBASE_GPU_REGMAP_H_ ++#define _KBASE_GPU_REGMAP_H_ + -+#include ++#include ++#include ++#include ++#if MALI_USE_CSF ++#include "backend/mali_kbase_gpu_regmap_csf.h" ++#else ++#include "backend/mali_kbase_gpu_regmap_jm.h" ++#endif + -+/** enum enable_map_idx - index into a block enable map that spans multiple u64 array elements -+ */ -+enum enable_map_idx { -+ EM_LO, -+ EM_HI, -+ EM_COUNT, -+}; ++/* GPU_U definition */ ++#ifdef __ASSEMBLER__ ++#define GPU_U(x) x ++#define GPU_UL(x) x ++#define GPU_ULL(x) x ++#else ++#define GPU_U(x) x##u ++#define GPU_UL(x) x##ul ++#define GPU_ULL(x) x##ull ++#endif /* __ASSEMBLER__ */ + -+static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf) -+{ -+ switch (counter_set) { -+ case KBASE_HWCNT_SET_PRIMARY: -+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE; -+ break; -+ case KBASE_HWCNT_SET_SECONDARY: -+ if (is_csf) -+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2; -+ else -+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED; -+ break; -+ case KBASE_HWCNT_SET_TERTIARY: -+ if (is_csf) -+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3; -+ else -+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED; -+ break; -+ default: -+ WARN_ON(true); -+ } -+} ++/* Begin Register Offsets */ ++/* GPU control registers */ + -+static void kbasep_get_tiler_block_type(u64 *dst, enum kbase_hwcnt_set counter_set) -+{ -+ switch (counter_set) { -+ case KBASE_HWCNT_SET_PRIMARY: -+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER; -+ break; -+ case KBASE_HWCNT_SET_SECONDARY: -+ case KBASE_HWCNT_SET_TERTIARY: -+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED; -+ break; -+ default: -+ WARN_ON(true); -+ } -+} ++#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ ++#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ ++#define MEM_FEATURES 0x010 /* (RO) Memory system features */ ++#define MMU_FEATURES 0x014 /* (RO) MMU features */ ++#define AS_PRESENT 0x018 /* (RO) Address space slots present */ ++#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ ++#define GPU_IRQ_MASK 0x028 /* (RW) */ + -+static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf) -+{ -+ switch (counter_set) { -+ case KBASE_HWCNT_SET_PRIMARY: -+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC; -+ break; -+ case KBASE_HWCNT_SET_SECONDARY: -+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2; -+ break; -+ case KBASE_HWCNT_SET_TERTIARY: -+ if (is_csf) -+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3; -+ else -+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED; -+ break; -+ default: -+ WARN_ON(true); -+ } -+} ++#define GPU_COMMAND 0x030 /* (WO) */ ++#define GPU_STATUS 0x034 /* (RO) */ + -+static void kbasep_get_memsys_block_type(u64 *dst, enum kbase_hwcnt_set counter_set) -+{ -+ switch (counter_set) { -+ case KBASE_HWCNT_SET_PRIMARY: -+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS; -+ break; -+ case KBASE_HWCNT_SET_SECONDARY: -+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2; -+ break; -+ case KBASE_HWCNT_SET_TERTIARY: -+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED; -+ break; -+ default: -+ WARN_ON(true); -+ } -+} ++#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ + -+/** -+ * kbasep_hwcnt_backend_gpu_metadata_create() - Create hardware counter metadata -+ * for the GPU. -+ * @gpu_info: Non-NULL pointer to hwcnt info for current GPU. -+ * @is_csf: true for CSF GPU, otherwise false. -+ * @counter_set: The performance counter set to use. -+ * @metadata: Non-NULL pointer to where created metadata is stored -+ * on success. -+ * -+ * Return: 0 on success, else error code. -+ */ -+static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, -+ const bool is_csf, -+ enum kbase_hwcnt_set counter_set, -+ const struct kbase_hwcnt_metadata **metadata) -+{ -+ struct kbase_hwcnt_description desc; -+ struct kbase_hwcnt_group_description group; -+ struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; -+ size_t non_sc_block_count; -+ size_t sc_block_count; ++#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ ++#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ ++#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ + -+ WARN_ON(!gpu_info); -+ WARN_ON(!metadata); ++#define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */ + -+ /* Calculate number of block instances that aren't shader cores */ -+ non_sc_block_count = 2 + gpu_info->l2_count; -+ /* Calculate number of block instances that are shader cores */ -+ sc_block_count = fls64(gpu_info->core_mask); ++#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ ++#define SUPER_L2_COHERENT (1 << 1) /* Shader cores within a core ++ * supergroup are l2 coherent ++ */ + -+ /* -+ * A system can have up to 64 shader cores, but the 64-bit -+ * availability mask can't physically represent that many cores as well -+ * as the other hardware blocks. -+ * Error out if there are more blocks than our implementation can -+ * support. -+ */ -+ if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS) -+ return -EINVAL; ++#define PWR_KEY 0x050 /* (WO) Power manager key register */ ++#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ ++#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ ++#define GPU_FEATURES_LO 0x060 /* (RO) GPU features, low word */ ++#define GPU_FEATURES_HI 0x064 /* (RO) GPU features, high word */ ++#define PRFCNT_FEATURES 0x068 /* (RO) Performance counter features */ ++#define TIMESTAMP_OFFSET_LO 0x088 /* (RW) Global time stamp offset, low word */ ++#define TIMESTAMP_OFFSET_HI 0x08C /* (RW) Global time stamp offset, high word */ ++#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ ++#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ ++#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ ++#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ + -+ /* One Front End block */ -+ kbasep_get_fe_block_type(&blks[0].type, counter_set, is_csf); -+ blks[0].inst_cnt = 1; -+ blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; -+ blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; ++#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ ++#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ ++#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ ++#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ ++#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that TLS must be allocated for */ + -+ /* One Tiler block */ -+ kbasep_get_tiler_block_type(&blks[1].type, counter_set); -+ blks[1].inst_cnt = 1; -+ blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; -+ blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; ++#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ ++#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ ++#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ ++#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */ + -+ /* l2_count memsys blks */ -+ kbasep_get_memsys_block_type(&blks[2].type, counter_set); -+ blks[2].inst_cnt = gpu_info->l2_count; -+ blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; -+ blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; ++#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) + -+ /* -+ * There are as many shader cores in the system as there are bits set in -+ * the core mask. However, the dump buffer memory requirements need to -+ * take into account the fact that the core mask may be non-contiguous. -+ * -+ * For example, a system with a core mask of 0b1011 has the same dump -+ * buffer memory requirements as a system with 0b1111, but requires more -+ * memory than a system with 0b0111. However, core 2 of the system with -+ * 0b1011 doesn't physically exist, and the dump buffer memory that -+ * accounts for that core will never be written to when we do a counter -+ * dump. -+ * -+ * We find the core mask's last set bit to determine the memory -+ * requirements, and embed the core mask into the availability mask so -+ * we can determine later which shader cores physically exist. -+ */ -+ kbasep_get_sc_block_type(&blks[3].type, counter_set, is_csf); -+ blks[3].inst_cnt = sc_block_count; -+ blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; -+ blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; ++#define GPU_COMMAND_ARG0_LO 0x0D0 /* (RW) Additional parameter 0 for GPU commands, low word */ ++#define GPU_COMMAND_ARG0_HI 0x0D4 /* (RW) Additional parameter 0 for GPU commands, high word */ ++#define GPU_COMMAND_ARG1_LO 0x0D8 /* (RW) Additional parameter 1 for GPU commands, low word */ ++#define GPU_COMMAND_ARG1_HI 0x0DC /* (RW) Additional parameter 1 for GPU commands, high word */ + -+ WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4); ++#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ ++#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ + -+ group.type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; -+ group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; -+ group.blks = blks; ++#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ ++#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ + -+ desc.grp_cnt = 1; -+ desc.grps = &group; -+ desc.clk_cnt = gpu_info->clk_cnt; ++#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ ++#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ + -+ /* The JM, Tiler, and L2s are always available, and are before cores */ -+ desc.avail_mask = (1ull << non_sc_block_count) - 1; -+ /* Embed the core mask directly in the availability mask */ -+ desc.avail_mask |= (gpu_info->core_mask << non_sc_block_count); ++#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ ++#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ + -+ return kbase_hwcnt_metadata_create(&desc, metadata); -+} ++#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ ++#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ + -+/** -+ * kbasep_hwcnt_backend_jm_dump_bytes() - Get the raw dump buffer size for the -+ * GPU. -+ * @gpu_info: Non-NULL pointer to hwcnt info for the GPU. -+ * -+ * Return: Size of buffer the GPU needs to perform a counter dump. -+ */ -+static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info) -+{ -+ WARN_ON(!gpu_info); ++#define SHADER_PWRFEATURES 0x188 /* (RW) Shader core power features */ + -+ return (2 + gpu_info->l2_count + fls64(gpu_info->core_mask)) * -+ gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES; -+} ++#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ ++#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ + -+int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, -+ enum kbase_hwcnt_set counter_set, -+ const struct kbase_hwcnt_metadata **out_metadata, -+ size_t *out_dump_bytes) -+{ -+ int errcode; -+ const struct kbase_hwcnt_metadata *metadata; -+ size_t dump_bytes; ++#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ ++#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ + -+ if (!gpu_info || !out_metadata || !out_dump_bytes) -+ return -EINVAL; ++#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ ++#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ + -+ /* -+ * For architectures where a max_config interface is available -+ * from the arbiter, the v5 dump bytes and the metadata v5 are -+ * based on the maximum possible allocation of the HW in the -+ * GPU cause it needs to be prepared for the worst case where -+ * all the available L2 cache and Shader cores are allocated. -+ */ -+ dump_bytes = kbasep_hwcnt_backend_jm_dump_bytes(gpu_info); -+ errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, false, counter_set, &metadata); -+ if (errcode) -+ return errcode; ++#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ ++#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ + -+ /* -+ * The physical dump size should be half of dump abstraction size in -+ * metadata since physical HW uses 32-bit per value but metadata -+ * specifies 64-bit per value. -+ */ -+ WARN_ON(dump_bytes * 2 != metadata->dump_buf_bytes); ++#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ ++#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ + -+ *out_metadata = metadata; -+ *out_dump_bytes = dump_bytes; ++#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ ++#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ + -+ return 0; -+} ++#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ ++#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ + -+void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) -+{ -+ if (!metadata) -+ return; ++#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ ++#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ + -+ kbase_hwcnt_metadata_destroy(metadata); -+} ++#define ASN_HASH_0 0x02C0 /* (RW) ASN hash function argument 0 */ ++#define ASN_HASH(n) (ASN_HASH_0 + (n)*4) ++#define ASN_HASH_COUNT 3 + -+int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, -+ enum kbase_hwcnt_set counter_set, -+ const struct kbase_hwcnt_metadata **out_metadata) -+{ -+ int errcode; -+ const struct kbase_hwcnt_metadata *metadata; ++#define SYSC_ALLOC0 0x0340 /* (RW) System cache allocation hint from source ID */ ++#define SYSC_ALLOC(n) (SYSC_ALLOC0 + (n)*4) ++#define SYSC_ALLOC_COUNT 8 + -+ if (!gpu_info || !out_metadata) -+ return -EINVAL; ++#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ ++#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ + -+ errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, true, counter_set, &metadata); -+ if (errcode) -+ return errcode; ++#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ ++#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ + -+ *out_metadata = metadata; ++#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ ++#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ + -+ return 0; -+} ++#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ ++#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ + -+void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) -+{ -+ if (!metadata) -+ return; ++#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ ++#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ + -+ kbase_hwcnt_metadata_destroy(metadata); -+} ++#define AMBA_FEATURES 0x300 /* (RO) AMBA bus supported features */ ++#define AMBA_ENABLE 0x304 /* (RW) AMBA features enable */ + -+static bool is_block_type_shader(const u64 grp_type, const u64 blk_type, const size_t blk) -+{ -+ bool is_shader = false; ++#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */ ++#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */ ++#define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */ + -+ /* Warn on unknown group type */ -+ if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5)) -+ return false; ++/* Job control registers */ + -+ if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC || -+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 || -+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3 || -+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED) -+ is_shader = true; ++#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ + -+ return is_shader; -+} ++/* MMU control registers */ + -+static bool is_block_type_l2_cache(const u64 grp_type, const u64 blk_type) -+{ -+ bool is_l2_cache = false; ++#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ ++#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ ++#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ ++#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ ++#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ ++#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ ++#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ ++#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ ++#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ ++#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ ++#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ ++#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ ++#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ ++#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ ++#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ + -+ switch (grp_type) { -+ case KBASE_HWCNT_GPU_GROUP_TYPE_V5: -+ if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS || -+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 || -+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED) -+ is_l2_cache = true; -+ break; -+ default: -+ /* Warn on unknown group type */ -+ WARN_ON(true); -+ } ++/* MMU address space control registers */ ++#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ ++#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ ++#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ ++#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ ++#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ ++#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ + -+ return is_l2_cache; -+} ++/* (RO) Secondary fault address for address space n, low word */ ++#define AS_FAULTEXTRA_LO 0x38 ++/* (RO) Secondary fault address for address space n, high word */ ++#define AS_FAULTEXTRA_HI 0x3C + -+int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, -+ const struct kbase_hwcnt_enable_map *dst_enable_map, u64 pm_core_mask, -+ const struct kbase_hwcnt_curr_config *curr_config, bool accumulate) -+{ -+ const struct kbase_hwcnt_metadata *metadata; -+ size_t grp, blk, blk_inst; -+ const u64 *dump_src = src; -+ size_t src_offset = 0; -+ u64 core_mask = pm_core_mask; ++/* End Register Offsets */ + -+ /* Variables to deal with the current configuration */ -+ int l2_count = 0; ++#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON) + -+ if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata)) -+ return -EINVAL; ++/* ++ * MMU_IRQ_RAWSTAT register values. Values are valid also for ++ * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. ++ */ + -+ metadata = dst->metadata; ++#define MMU_PAGE_FAULT_FLAGS 16 + -+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) -+ { -+ const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); -+ const size_t ctr_cnt = -+ kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); -+ const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); -+ const bool is_shader_core = is_block_type_shader( -+ kbase_hwcnt_metadata_group_type(metadata, grp), blk_type, blk); -+ const bool is_l2_cache = is_block_type_l2_cache( -+ kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); -+ const bool is_undefined = kbase_hwcnt_is_block_type_undefined( -+ kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); -+ bool hw_res_available = true; ++/* Macros returning a bitmask to retrieve page fault or bus error flags from ++ * MMU registers ++ */ ++#define MMU_PAGE_FAULT(n) (1UL << (n)) ++#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) + -+ /* -+ * If l2 blocks is greater than the current allocated number of -+ * L2 slices, there is no hw allocated to that block. -+ */ -+ if (is_l2_cache) { -+ l2_count++; -+ if (l2_count > curr_config->num_l2_slices) -+ hw_res_available = false; -+ else -+ hw_res_available = true; -+ } -+ /* -+ * For the shader cores, the current shader_mask allocated is -+ * always a subgroup of the maximum shader_mask, so after -+ * jumping any L2 cache not available the available shader cores -+ * will always have a matching set of blk instances available to -+ * accumulate them. -+ */ -+ else -+ hw_res_available = true; ++/* ++ * Begin AARCH64 MMU TRANSTAB register values ++ */ ++#define MMU_HW_OUTA_BITS 40 ++#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) + -+ /* -+ * Skip block if no values in the destination block are enabled. -+ */ -+ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) { -+ u64 *dst_blk = -+ kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); -+ const u64 *src_blk = dump_src + src_offset; -+ bool blk_powered; ++/* ++ * Begin MMU STATUS register values ++ */ ++#define AS_STATUS_AS_ACTIVE 0x01 + -+ if (!is_shader_core) { -+ /* Under the current PM system, counters will -+ * only be enabled after all non shader core -+ * blocks are powered up. -+ */ -+ blk_powered = true; -+ } else { -+ /* Check the PM core mask to see if the shader -+ * core is powered up. -+ */ -+ blk_powered = core_mask & 1; -+ } ++#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3) ++#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3) ++#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3) ++#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) ++#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) ++#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) ++#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) + -+ if (blk_powered && !is_undefined && hw_res_available) { -+ /* Only powered and defined blocks have valid data. */ -+ if (accumulate) { -+ kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, -+ hdr_cnt, ctr_cnt); -+ } else { -+ kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, -+ (hdr_cnt + ctr_cnt)); -+ } -+ } else { -+ /* Even though the block might be undefined, the -+ * user has enabled counter collection for it. -+ * We should not propagate garbage data. -+ */ -+ if (accumulate) { -+ /* No-op to preserve existing values */ -+ } else { -+ /* src is garbage, so zero the dst */ -+ kbase_hwcnt_dump_buffer_block_zero(dst_blk, -+ (hdr_cnt + ctr_cnt)); -+ } -+ } -+ } -+ -+ /* Just increase the src_offset if the HW is available */ -+ if (hw_res_available) -+ src_offset += (hdr_cnt + ctr_cnt); -+ if (is_shader_core) -+ core_mask = core_mask >> 1; -+ } -+ -+ return 0; -+} -+ -+int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, -+ const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate) -+{ -+ const struct kbase_hwcnt_metadata *metadata; -+ const u64 *dump_src = src; -+ size_t src_offset = 0; -+ size_t grp, blk, blk_inst; -+ -+ if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata)) -+ return -EINVAL; -+ -+ metadata = dst->metadata; -+ -+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) -+ { -+ const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); -+ const size_t ctr_cnt = -+ kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); -+ const uint64_t blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); -+ const bool is_undefined = kbase_hwcnt_is_block_type_undefined( -+ kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); ++#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 ++#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) ++#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ ++ (((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) ++#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0 0xC0 + -+ /* -+ * Skip block if no values in the destination block are enabled. -+ */ -+ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) { -+ u64 *dst_blk = -+ kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); -+ const u64 *src_blk = dump_src + src_offset; ++#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 ++#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) ++#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \ ++ (((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) + -+ if (!is_undefined) { -+ if (accumulate) { -+ kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, -+ hdr_cnt, ctr_cnt); -+ } else { -+ kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, -+ (hdr_cnt + ctr_cnt)); -+ } -+ } else { -+ /* Even though the block might be undefined, the -+ * user has enabled counter collection for it. -+ * We should not propagate garbage data. -+ */ -+ if (accumulate) { -+ /* No-op to preserve existing values */ -+ } else { -+ /* src is garbage, so zero the dst */ -+ kbase_hwcnt_dump_buffer_block_zero(dst_blk, -+ (hdr_cnt + ctr_cnt)); -+ } -+ } -+ } ++#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0) ++#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1) ++#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2) ++#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3) + -+ src_offset += (hdr_cnt + ctr_cnt); -+ } ++#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16 ++#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT) ++#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \ ++ (((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT) + -+ return 0; -+} ++#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT (0) ++#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK \ ++ ((0xFF) << PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT) ++#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(reg_val) \ ++ (((reg_val)&PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK) >> \ ++ PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT) + -+/** -+ * kbasep_hwcnt_backend_gpu_block_map_from_physical() - Convert from a physical -+ * block enable map to a -+ * block enable map -+ * abstraction. -+ * @phys: Physical 32-bit block enable map -+ * @lo: Non-NULL pointer to where low 64 bits of block enable map abstraction -+ * will be stored. -+ * @hi: Non-NULL pointer to where high 64 bits of block enable map abstraction -+ * will be stored. ++/* ++ * Begin MMU TRANSCFG register values + */ -+static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(u32 phys, u64 *lo, u64 *hi) -+{ -+ u64 dwords[2] = { 0, 0 }; -+ -+ size_t dword_idx; ++#define AS_TRANSCFG_ADRMODE_LEGACY 0 ++#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 ++#define AS_TRANSCFG_ADRMODE_IDENTITY 2 ++#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 ++#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 + -+ for (dword_idx = 0; dword_idx < 2; dword_idx++) { -+ const u16 packed = phys >> (16 * dword_idx); -+ u64 dword = 0; ++#define AS_TRANSCFG_ADRMODE_MASK 0xF + -+ size_t hword_bit; ++/* ++ * Begin TRANSCFG register values ++ */ ++#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24) ++#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24) ++#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24) + -+ for (hword_bit = 0; hword_bit < 16; hword_bit++) { -+ const size_t dword_bit = hword_bit * 4; -+ const u64 mask = (packed >> (hword_bit)) & 0x1; ++#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28)) ++#define AS_TRANSCFG_PTW_SH_OS (2ull << 28) ++#define AS_TRANSCFG_PTW_SH_IS (3ull << 28) ++#define AS_TRANSCFG_R_ALLOCATE (1ull << 30) + -+ dword |= mask << (dword_bit + 0); -+ dword |= mask << (dword_bit + 1); -+ dword |= mask << (dword_bit + 2); -+ dword |= mask << (dword_bit + 3); -+ } -+ dwords[dword_idx] = dword; -+ } -+ *lo = dwords[0]; -+ *hi = dwords[1]; -+} ++/* ++ * Begin Command Values ++ */ + -+void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst, -+ const struct kbase_hwcnt_enable_map *src) -+{ -+ const struct kbase_hwcnt_metadata *metadata; -+ u64 fe_bm[EM_COUNT] = { 0 }; -+ u64 shader_bm[EM_COUNT] = { 0 }; -+ u64 tiler_bm[EM_COUNT] = { 0 }; -+ u64 mmu_l2_bm[EM_COUNT] = { 0 }; -+ size_t grp, blk, blk_inst; ++/* AS_COMMAND register commands */ ++#define AS_COMMAND_NOP 0x00 /* NOP Operation */ ++#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ ++#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ ++#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ ++/* Flush all L2 caches then issue a flush region command to all MMUs */ ++#define AS_COMMAND_FLUSH_PT 0x04 ++/* Wait for memory accesses to complete, flush all the L1s cache then flush all ++ * L2 caches then issue a flush region command to all MMUs ++ */ ++#define AS_COMMAND_FLUSH_MEM 0x05 + -+ if (WARN_ON(!src) || WARN_ON(!dst)) -+ return; ++/* AS_LOCKADDR register */ ++#define AS_LOCKADDR_LOCKADDR_SIZE_SHIFT GPU_U(0) ++#define AS_LOCKADDR_LOCKADDR_SIZE_MASK \ ++ (GPU_U(0x3F) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) ++#define AS_LOCKADDR_LOCKADDR_SIZE_GET(reg_val) \ ++ (((reg_val)&AS_LOCKADDR_LOCKADDR_SIZE_MASK) >> \ ++ AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) ++#define AS_LOCKADDR_LOCKADDR_SIZE_SET(reg_val, value) \ ++ (((reg_val) & ~AS_LOCKADDR_LOCKADDR_SIZE_MASK) | \ ++ (((value) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) & \ ++ AS_LOCKADDR_LOCKADDR_SIZE_MASK)) ++#define AS_LOCKADDR_LOCKADDR_BASE_SHIFT GPU_U(12) ++#define AS_LOCKADDR_LOCKADDR_BASE_MASK \ ++ (GPU_ULL(0xFFFFFFFFFFFFF) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) ++#define AS_LOCKADDR_LOCKADDR_BASE_GET(reg_val) \ ++ (((reg_val)&AS_LOCKADDR_LOCKADDR_BASE_MASK) >> \ ++ AS_LOCKADDR_LOCKADDR_BASE_SHIFT) ++#define AS_LOCKADDR_LOCKADDR_BASE_SET(reg_val, value) \ ++ (((reg_val) & ~AS_LOCKADDR_LOCKADDR_BASE_MASK) | \ ++ (((value) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) & \ ++ AS_LOCKADDR_LOCKADDR_BASE_MASK)) ++#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT (6) ++#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK ((0xF) << AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT) ++#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_SET(reg_val, value) \ ++ (((reg_val) & ~AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK) | \ ++ ((value << AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT) & AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK)) + -+ metadata = src->metadata; ++/* GPU_STATUS values */ ++#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ ++#define GPU_STATUS_CYCLE_COUNT_ACTIVE (1 << 6) /* Set if the cycle counter is active. */ ++#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ + -+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) -+ { -+ const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp); -+ const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); -+ const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(src, grp, blk, blk_inst); ++/* PRFCNT_CONFIG register values */ ++#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ ++#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ ++#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ + -+ if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { -+ const size_t map_stride = -+ kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk); -+ size_t map_idx; ++/* The performance counters are disabled. */ ++#define PRFCNT_CONFIG_MODE_OFF 0 ++/* The performance counters are enabled, but are only written out when a ++ * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. ++ */ ++#define PRFCNT_CONFIG_MODE_MANUAL 1 ++/* The performance counters are enabled, and are written out each time a tile ++ * finishes rendering. ++ */ ++#define PRFCNT_CONFIG_MODE_TILE 2 + -+ for (map_idx = 0; map_idx < map_stride; ++map_idx) { -+ if (WARN_ON(map_idx >= EM_COUNT)) -+ break; ++/* AS_MEMATTR values from MMU_MEMATTR_STAGE1: */ ++/* Use GPU implementation-defined caching policy. */ ++#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull ++/* The attribute set to force all resources to be cached. */ ++#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full ++/* Inner write-alloc cache setup, no outer caching */ ++#define AS_MEMATTR_WRITE_ALLOC 0x8Dull + -+ switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: -+ /* Nothing to do in this case. */ -+ break; -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: -+ fe_bm[map_idx] |= blk_map[map_idx]; -+ break; -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: -+ tiler_bm[map_idx] |= blk_map[map_idx]; -+ break; -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: -+ shader_bm[map_idx] |= blk_map[map_idx]; -+ break; -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: -+ mmu_l2_bm[map_idx] |= blk_map[map_idx]; -+ break; -+ default: -+ WARN_ON(true); -+ } -+ } -+ } else { -+ WARN_ON(true); -+ } -+ } ++/* Use GPU implementation-defined caching policy. */ ++#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull ++/* The attribute set to force all resources to be cached. */ ++#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full ++/* Inner write-alloc cache setup, no outer caching */ ++#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull ++/* Set to implementation defined, outer caching */ ++#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull ++/* Set to write back memory, outer caching */ ++#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull ++/* There is no LPAE support for non-cacheable, since the memory type is always ++ * write-back. ++ * Marking this setting as reserved for LPAE ++ */ ++#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED + -+ dst->fe_bm = kbase_hwcnt_backend_gpu_block_map_to_physical(fe_bm[EM_LO], fe_bm[EM_HI]); -+ dst->shader_bm = -+ kbase_hwcnt_backend_gpu_block_map_to_physical(shader_bm[EM_LO], shader_bm[EM_HI]); -+ dst->tiler_bm = -+ kbase_hwcnt_backend_gpu_block_map_to_physical(tiler_bm[EM_LO], tiler_bm[EM_HI]); -+ dst->mmu_l2_bm = -+ kbase_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm[EM_LO], mmu_l2_bm[EM_HI]); -+} ++/* L2_MMU_CONFIG register */ ++#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) ++#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) + -+void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src) -+{ -+ switch (src) { -+ case KBASE_HWCNT_SET_PRIMARY: -+ *dst = KBASE_HWCNT_PHYSICAL_SET_PRIMARY; -+ break; -+ case KBASE_HWCNT_SET_SECONDARY: -+ *dst = KBASE_HWCNT_PHYSICAL_SET_SECONDARY; -+ break; -+ case KBASE_HWCNT_SET_TERTIARY: -+ *dst = KBASE_HWCNT_PHYSICAL_SET_TERTIARY; -+ break; -+ default: -+ WARN_ON(true); -+ } -+} ++/* End L2_MMU_CONFIG register */ + -+void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst, -+ const struct kbase_hwcnt_physical_enable_map *src) -+{ -+ const struct kbase_hwcnt_metadata *metadata; ++/* THREAD_* registers */ + -+ u64 fe_bm[EM_COUNT] = { 0 }; -+ u64 shader_bm[EM_COUNT] = { 0 }; -+ u64 tiler_bm[EM_COUNT] = { 0 }; -+ u64 mmu_l2_bm[EM_COUNT] = { 0 }; -+ size_t grp, blk, blk_inst; ++/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */ ++#define IMPLEMENTATION_UNSPECIFIED 0 ++#define IMPLEMENTATION_SILICON 1 ++#define IMPLEMENTATION_FPGA 2 ++#define IMPLEMENTATION_MODEL 3 + -+ if (WARN_ON(!src) || WARN_ON(!dst)) -+ return; ++/* Default values when registers are not supported by the implemented hardware */ ++#define THREAD_MT_DEFAULT 256 ++#define THREAD_MWS_DEFAULT 256 ++#define THREAD_MBS_DEFAULT 256 ++#define THREAD_MR_DEFAULT 1024 ++#define THREAD_MTQ_DEFAULT 4 ++#define THREAD_MTGS_DEFAULT 10 + -+ metadata = dst->metadata; ++/* End THREAD_* registers */ + -+ kbasep_hwcnt_backend_gpu_block_map_from_physical(src->fe_bm, &fe_bm[EM_LO], &fe_bm[EM_HI]); -+ kbasep_hwcnt_backend_gpu_block_map_from_physical(src->shader_bm, &shader_bm[EM_LO], -+ &shader_bm[EM_HI]); -+ kbasep_hwcnt_backend_gpu_block_map_from_physical(src->tiler_bm, &tiler_bm[EM_LO], -+ &tiler_bm[EM_HI]); -+ kbasep_hwcnt_backend_gpu_block_map_from_physical(src->mmu_l2_bm, &mmu_l2_bm[EM_LO], -+ &mmu_l2_bm[EM_HI]); ++/* SHADER_CONFIG register */ ++#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16) ++#define SC_TLS_HASH_ENABLE (1ul << 17) ++#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18) ++#define SC_VAR_ALGORITHM (1ul << 29) ++/* End SHADER_CONFIG register */ + -+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) -+ { -+ const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp); -+ const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); -+ u64 *blk_map = kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst); ++/* TILER_CONFIG register */ ++#define TC_CLOCK_GATE_OVERRIDE (1ul << 0) ++/* End TILER_CONFIG register */ + -+ if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { -+ const size_t map_stride = -+ kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk); -+ size_t map_idx; ++/* L2_CONFIG register */ ++#define L2_CONFIG_SIZE_SHIFT 16 ++#define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT) ++#define L2_CONFIG_HASH_SHIFT 24 ++#define L2_CONFIG_HASH_MASK (0xFFul << L2_CONFIG_HASH_SHIFT) ++#define L2_CONFIG_ASN_HASH_ENABLE_SHIFT 24 ++#define L2_CONFIG_ASN_HASH_ENABLE_MASK (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT) ++/* End L2_CONFIG register */ + -+ for (map_idx = 0; map_idx < map_stride; ++map_idx) { -+ if (WARN_ON(map_idx >= EM_COUNT)) -+ break; ++/* AMBA_FEATURES register */ ++#define AMBA_FEATURES_ACE_LITE_SHIFT GPU_U(0) ++#define AMBA_FEATURES_ACE_LITE_MASK (GPU_U(0x1) << AMBA_FEATURES_ACE_LITE_SHIFT) ++#define AMBA_FEATURES_ACE_LITE_GET(reg_val) \ ++ (((reg_val)&AMBA_FEATURES_ACE_LITE_MASK) >> \ ++ AMBA_FEATURES_ACE_LITE_SHIFT) ++#define AMBA_FEATURES_ACE_LITE_SET(reg_val, value) \ ++ (((reg_val) & ~AMBA_FEATURES_ACE_LITE_MASK) | \ ++ (((value) << AMBA_FEATURES_ACE_LITE_SHIFT) & \ ++ AMBA_FEATURES_ACE_LITE_MASK)) ++#define AMBA_FEATURES_ACE_SHIFT GPU_U(1) ++#define AMBA_FEATURES_ACE_MASK (GPU_U(0x1) << AMBA_FEATURES_ACE_SHIFT) ++#define AMBA_FEATURES_ACE_GET(reg_val) \ ++ (((reg_val)&AMBA_FEATURES_ACE_MASK) >> AMBA_FEATURES_ACE_SHIFT) ++#define AMBA_FEATURES_ACE_SET(reg_val, value) \ ++ (((reg_val) & ~AMBA_FEATURES_ACE_MASK) | \ ++ (((value) << AMBA_FEATURES_ACE_SHIFT) & AMBA_FEATURES_ACE_MASK)) ++#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT GPU_U(5) ++#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK \ ++ (GPU_U(0x1) << AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT) ++#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_GET(reg_val) \ ++ (((reg_val)&AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK) >> \ ++ AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT) ++#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SET(reg_val, value) \ ++ (((reg_val) & ~AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK) | \ ++ (((value) << AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT) & \ ++ AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK)) ++#define AMBA_FEATURES_INVALIDATE_HINT_SHIFT GPU_U(6) ++#define AMBA_FEATURES_INVALIDATE_HINT_MASK \ ++ (GPU_U(0x1) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT) ++#define AMBA_FEATURES_INVALIDATE_HINT_GET(reg_val) \ ++ (((reg_val)&AMBA_FEATURES_INVALIDATE_HINT_MASK) >> \ ++ AMBA_FEATURES_INVALIDATE_HINT_SHIFT) ++#define AMBA_FEATURES_INVALIDATE_HINT_SET(reg_val, value) \ ++ (((reg_val) & ~AMBA_FEATURES_INVALIDATE_HINT_MASK) | \ ++ (((value) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT) & \ ++ AMBA_FEATURES_INVALIDATE_HINT_MASK)) + -+ switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: -+ /* Nothing to do in this case. */ -+ break; -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: -+ blk_map[map_idx] = fe_bm[map_idx]; -+ break; -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: -+ blk_map[map_idx] = tiler_bm[map_idx]; -+ break; -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: -+ blk_map[map_idx] = shader_bm[map_idx]; -+ break; -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: -+ blk_map[map_idx] = mmu_l2_bm[map_idx]; -+ break; -+ default: -+ WARN_ON(true); -+ } -+ } -+ } else { -+ WARN_ON(true); -+ } -+ } -+} ++/* AMBA_ENABLE register */ ++#define AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT GPU_U(0) ++#define AMBA_ENABLE_COHERENCY_PROTOCOL_MASK \ ++ (GPU_U(0x1F) << AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT) ++#define AMBA_ENABLE_COHERENCY_PROTOCOL_GET(reg_val) \ ++ (((reg_val)&AMBA_ENABLE_COHERENCY_PROTOCOL_MASK) >> \ ++ AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT) ++#define AMBA_ENABLE_COHERENCY_PROTOCOL_SET(reg_val, value) \ ++ (((reg_val) & ~AMBA_ENABLE_COHERENCY_PROTOCOL_MASK) | \ ++ (((value) << AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT) & \ ++ AMBA_ENABLE_COHERENCY_PROTOCOL_MASK)) ++/* AMBA_ENABLE_coherency_protocol values */ ++#define AMBA_ENABLE_COHERENCY_PROTOCOL_ACE_LITE 0x0 ++#define AMBA_ENABLE_COHERENCY_PROTOCOL_ACE 0x1 ++#define AMBA_ENABLE_COHERENCY_PROTOCOL_NO_COHERENCY 0x1F ++/* End of AMBA_ENABLE_coherency_protocol values */ ++#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT GPU_U(5) ++#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK \ ++ (GPU_U(0x1) << AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT) ++#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_GET(reg_val) \ ++ (((reg_val)&AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK) >> \ ++ AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT) ++#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(reg_val, value) \ ++ (((reg_val) & ~AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK) | \ ++ (((value) << AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT) & \ ++ AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK)) ++#define AMBA_ENABLE_INVALIDATE_HINT_SHIFT GPU_U(6) ++#define AMBA_ENABLE_INVALIDATE_HINT_MASK \ ++ (GPU_U(0x1) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT) ++#define AMBA_ENABLE_INVALIDATE_HINT_GET(reg_val) \ ++ (((reg_val)&AMBA_ENABLE_INVALIDATE_HINT_MASK) >> \ ++ AMBA_ENABLE_INVALIDATE_HINT_SHIFT) ++#define AMBA_ENABLE_INVALIDATE_HINT_SET(reg_val, value) \ ++ (((reg_val) & ~AMBA_ENABLE_INVALIDATE_HINT_MASK) | \ ++ (((value) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT) & \ ++ AMBA_ENABLE_INVALIDATE_HINT_MASK)) + -+void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf, -+ const struct kbase_hwcnt_enable_map *enable_map) -+{ -+ const struct kbase_hwcnt_metadata *metadata; -+ size_t grp, blk, blk_inst; ++/* IDVS_GROUP register */ ++#define IDVS_GROUP_SIZE_SHIFT (16) ++#define IDVS_GROUP_MAX_SIZE (0x3F) + -+ if (WARN_ON(!buf) || WARN_ON(!enable_map) || WARN_ON(buf->metadata != enable_map->metadata)) -+ return; ++/* SYSC_ALLOC read IDs */ ++#define SYSC_ALLOC_ID_R_OTHER 0x00 ++#define SYSC_ALLOC_ID_R_CSF 0x02 ++#define SYSC_ALLOC_ID_R_MMU 0x04 ++#define SYSC_ALLOC_ID_R_TILER_VERT 0x08 ++#define SYSC_ALLOC_ID_R_TILER_PTR 0x09 ++#define SYSC_ALLOC_ID_R_TILER_INDEX 0x0A ++#define SYSC_ALLOC_ID_R_TILER_OTHER 0x0B ++#define SYSC_ALLOC_ID_R_IC 0x10 ++#define SYSC_ALLOC_ID_R_ATTR 0x11 ++#define SYSC_ALLOC_ID_R_SCM 0x12 ++#define SYSC_ALLOC_ID_R_FSDC 0x13 ++#define SYSC_ALLOC_ID_R_VL 0x14 ++#define SYSC_ALLOC_ID_R_PLR 0x15 ++#define SYSC_ALLOC_ID_R_TEX 0x18 ++#define SYSC_ALLOC_ID_R_LSC 0x1c + -+ metadata = buf->metadata; ++/* SYSC_ALLOC write IDs */ ++#define SYSC_ALLOC_ID_W_OTHER 0x00 ++#define SYSC_ALLOC_ID_W_CSF 0x02 ++#define SYSC_ALLOC_ID_W_PCB 0x07 ++#define SYSC_ALLOC_ID_W_TILER_PTR 0x09 ++#define SYSC_ALLOC_ID_W_TILER_VERT_PLIST 0x0A ++#define SYSC_ALLOC_ID_W_TILER_OTHER 0x0B ++#define SYSC_ALLOC_ID_W_L2_EVICT 0x0C ++#define SYSC_ALLOC_ID_W_L2_FLUSH 0x0D ++#define SYSC_ALLOC_ID_W_TIB_COLOR 0x10 ++#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCH 0x11 ++#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCB 0x12 ++#define SYSC_ALLOC_ID_W_TIB_CRC 0x13 ++#define SYSC_ALLOC_ID_W_TIB_DS 0x14 ++#define SYSC_ALLOC_ID_W_TIB_DS_AFBCH 0x15 ++#define SYSC_ALLOC_ID_W_TIB_DS_AFBCB 0x16 ++#define SYSC_ALLOC_ID_W_LSC 0x1C + -+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) -+ { -+ const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp); -+ u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst); -+ const u64 *blk_map = -+ kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst); ++/* SYSC_ALLOC values */ ++#define SYSC_ALLOC_L2_ALLOC 0x0 ++#define SYSC_ALLOC_NEVER_ALLOC 0x2 ++#define SYSC_ALLOC_ALWAYS_ALLOC 0x3 ++#define SYSC_ALLOC_PTL_ALLOC 0x4 ++#define SYSC_ALLOC_L2_PTL_ALLOC 0x5 + -+ if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { -+ const size_t map_stride = -+ kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk); -+ u64 prfcnt_bm[EM_COUNT] = { 0 }; -+ u32 prfcnt_en = 0; -+ size_t map_idx; ++/* SYSC_ALLOC register */ ++#define SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT (0) ++#define SYSC_ALLOC_R_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) ++#define SYSC_ALLOC_R_SYSC_ALLOC0_GET(reg_val) \ ++ (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC0_MASK) >> \ ++ SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) ++#define SYSC_ALLOC_R_SYSC_ALLOC0_SET(reg_val, value) \ ++ (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC0_MASK) | \ ++ (((value) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) & \ ++ SYSC_ALLOC_R_SYSC_ALLOC0_MASK)) ++/* End of SYSC_ALLOC_R_SYSC_ALLOC0 values */ ++#define SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT (4) ++#define SYSC_ALLOC_W_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) ++#define SYSC_ALLOC_W_SYSC_ALLOC0_GET(reg_val) \ ++ (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC0_MASK) >> \ ++ SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) ++#define SYSC_ALLOC_W_SYSC_ALLOC0_SET(reg_val, value) \ ++ (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC0_MASK) | \ ++ (((value) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) & \ ++ SYSC_ALLOC_W_SYSC_ALLOC0_MASK)) ++/* End of SYSC_ALLOC_W_SYSC_ALLOC0 values */ ++#define SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT (8) ++#define SYSC_ALLOC_R_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) ++#define SYSC_ALLOC_R_SYSC_ALLOC1_GET(reg_val) \ ++ (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC1_MASK) >> \ ++ SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) ++#define SYSC_ALLOC_R_SYSC_ALLOC1_SET(reg_val, value) \ ++ (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC1_MASK) | \ ++ (((value) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) & \ ++ SYSC_ALLOC_R_SYSC_ALLOC1_MASK)) ++/* End of SYSC_ALLOC_R_SYSC_ALLOC1 values */ ++#define SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT (12) ++#define SYSC_ALLOC_W_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) ++#define SYSC_ALLOC_W_SYSC_ALLOC1_GET(reg_val) \ ++ (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC1_MASK) >> \ ++ SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) ++#define SYSC_ALLOC_W_SYSC_ALLOC1_SET(reg_val, value) \ ++ (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC1_MASK) | \ ++ (((value) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) & \ ++ SYSC_ALLOC_W_SYSC_ALLOC1_MASK)) ++/* End of SYSC_ALLOC_W_SYSC_ALLOC1 values */ ++#define SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT (16) ++#define SYSC_ALLOC_R_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) ++#define SYSC_ALLOC_R_SYSC_ALLOC2_GET(reg_val) \ ++ (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC2_MASK) >> \ ++ SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) ++#define SYSC_ALLOC_R_SYSC_ALLOC2_SET(reg_val, value) \ ++ (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC2_MASK) | \ ++ (((value) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) & \ ++ SYSC_ALLOC_R_SYSC_ALLOC2_MASK)) ++/* End of SYSC_ALLOC_R_SYSC_ALLOC2 values */ ++#define SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT (20) ++#define SYSC_ALLOC_W_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) ++#define SYSC_ALLOC_W_SYSC_ALLOC2_GET(reg_val) \ ++ (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC2_MASK) >> \ ++ SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) ++#define SYSC_ALLOC_W_SYSC_ALLOC2_SET(reg_val, value) \ ++ (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC2_MASK) | \ ++ (((value) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) & \ ++ SYSC_ALLOC_W_SYSC_ALLOC2_MASK)) ++/* End of SYSC_ALLOC_W_SYSC_ALLOC2 values */ ++#define SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT (24) ++#define SYSC_ALLOC_R_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) ++#define SYSC_ALLOC_R_SYSC_ALLOC3_GET(reg_val) \ ++ (((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC3_MASK) >> \ ++ SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) ++#define SYSC_ALLOC_R_SYSC_ALLOC3_SET(reg_val, value) \ ++ (((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC3_MASK) | \ ++ (((value) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) & \ ++ SYSC_ALLOC_R_SYSC_ALLOC3_MASK)) ++/* End of SYSC_ALLOC_R_SYSC_ALLOC3 values */ ++#define SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT (28) ++#define SYSC_ALLOC_W_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) ++#define SYSC_ALLOC_W_SYSC_ALLOC3_GET(reg_val) \ ++ (((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC3_MASK) >> \ ++ SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) ++#define SYSC_ALLOC_W_SYSC_ALLOC3_SET(reg_val, value) \ ++ (((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC3_MASK) | \ ++ (((value) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) & \ ++ SYSC_ALLOC_W_SYSC_ALLOC3_MASK)) ++/* End of SYSC_ALLOC_W_SYSC_ALLOC3 values */ + -+ for (map_idx = 0; map_idx < map_stride; ++map_idx) { -+ if (WARN_ON(map_idx >= EM_COUNT)) -+ break; ++/* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. */ ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++#undef GPU_IRQ_REG_ALL ++#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE) ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ + -+ prfcnt_bm[map_idx] = blk_map[map_idx]; -+ } ++#endif /* _KBASE_GPU_REGMAP_H_ */ +diff --git a/drivers/gpu/arm/bifrost/hwcnt/Kbuild b/drivers/gpu/arm/bifrost/hwcnt/Kbuild +new file mode 100755 +index 000000000..c1a381b24 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/hwcnt/Kbuild +@@ -0,0 +1,37 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# + -+ prfcnt_en = kbase_hwcnt_backend_gpu_block_map_to_physical(prfcnt_bm[EM_LO], -+ prfcnt_bm[EM_HI]); ++bifrost_kbase-y += \ ++ hwcnt/mali_kbase_hwcnt.o \ ++ hwcnt/mali_kbase_hwcnt_gpu.o \ ++ hwcnt/mali_kbase_hwcnt_gpu_narrow.o \ ++ hwcnt/mali_kbase_hwcnt_types.o \ ++ hwcnt/mali_kbase_hwcnt_virtualizer.o \ ++ hwcnt/mali_kbase_hwcnt_watchdog_if_timer.o + -+ buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en; -+ } else { -+ WARN_ON(true); -+ } -+ } -+} -diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h ++ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) ++ bifrost_kbase-y += \ ++ hwcnt/backend/mali_kbase_hwcnt_backend_csf.o \ ++ hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.o ++else ++ bifrost_kbase-y += \ ++ hwcnt/backend/mali_kbase_hwcnt_backend_jm.o \ ++ hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.o ++endif +diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h new file mode 100644 -index 000000000..a49c31e52 +index 000000000..6cfa6f5ee --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h -@@ -0,0 +1,407 @@ ++++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h +@@ -0,0 +1,225 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * @@ -189616,402 +191647,220 @@ index 000000000..a49c31e52 + * + */ + -+#ifndef _KBASE_HWCNT_GPU_H_ -+#define _KBASE_HWCNT_GPU_H_ ++/* ++ * Virtual interface for hardware counter backends. ++ */ ++ ++#ifndef _KBASE_HWCNT_BACKEND_H_ ++#define _KBASE_HWCNT_BACKEND_H_ + -+#include +#include + -+struct kbase_device; +struct kbase_hwcnt_metadata; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + -+/* Hardware counter version 5 definitions, V5 is the only supported version. */ -+#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4 -+#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4 -+#define KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK 60 -+#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \ -+ (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK) -+ -+/* FrontEnd block count in V5 GPU hardware counter. */ -+#define KBASE_HWCNT_V5_FE_BLOCK_COUNT 1 -+/* Tiler block count in V5 GPU hardware counter. */ -+#define KBASE_HWCNT_V5_TILER_BLOCK_COUNT 1 -+ -+/* Index of the PRFCNT_EN header into a V5 counter block */ -+#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2 -+ -+/* Number of bytes for each counter value in hardware. */ -+#define KBASE_HWCNT_VALUE_HW_BYTES (sizeof(u32)) -+ -+/** -+ * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to -+ * identify metadata groups. -+ * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type. -+ */ -+enum kbase_hwcnt_gpu_group_type { -+ KBASE_HWCNT_GPU_GROUP_TYPE_V5, -+}; -+ -+/** -+ * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types, -+ * used to identify metadata blocks. -+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: Front End block (Job manager -+ * or CSF HW). -+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: Secondary Front End block (Job -+ * manager or CSF HW). -+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: Tertiary Front End block (Job -+ * manager or CSF HW). -+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: Undefined Front End block -+ * (e.g. if a counter set that -+ * a block doesn't support is -+ * used). -+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: Tiler block. -+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: Undefined Tiler block. -+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: Shader Core block. -+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: Secondary Shader Core block. -+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: Tertiary Shader Core block. -+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: Undefined Shader Core block. -+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block. -+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block. -+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: Undefined Memsys block. -+ */ -+enum kbase_hwcnt_gpu_v5_block_type { -+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE, -+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2, -+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3, -+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED, -+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER, -+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED, -+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC, -+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2, -+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3, -+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED, -+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS, -+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2, -+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED, -+}; -+ -+/** -+ * enum kbase_hwcnt_set - GPU hardware counter sets -+ * @KBASE_HWCNT_SET_PRIMARY: The Primary set of counters -+ * @KBASE_HWCNT_SET_SECONDARY: The Secondary set of counters -+ * @KBASE_HWCNT_SET_TERTIARY: The Tertiary set of counters -+ * @KBASE_HWCNT_SET_UNDEFINED: Undefined set of counters -+ */ -+enum kbase_hwcnt_set { -+ KBASE_HWCNT_SET_PRIMARY, -+ KBASE_HWCNT_SET_SECONDARY, -+ KBASE_HWCNT_SET_TERTIARY, -+ KBASE_HWCNT_SET_UNDEFINED = 255, -+}; -+ -+/** -+ * struct kbase_hwcnt_physical_enable_map - Representation of enable map -+ * directly used by GPU. -+ * @fe_bm: Front end (JM/CSHW) counters selection bitmask. -+ * @shader_bm: Shader counters selection bitmask. -+ * @tiler_bm: Tiler counters selection bitmask. -+ * @mmu_l2_bm: MMU_L2 counters selection bitmask. -+ */ -+struct kbase_hwcnt_physical_enable_map { -+ u32 fe_bm; -+ u32 shader_bm; -+ u32 tiler_bm; -+ u32 mmu_l2_bm; -+}; -+ +/* -+ * Values for Hardware Counter SET_SELECT value. -+ * Directly passed to HW. ++ * struct kbase_hwcnt_backend_info - Opaque pointer to information used to ++ * create an instance of a hardware counter ++ * backend. + */ -+enum kbase_hwcnt_physical_set { -+ KBASE_HWCNT_PHYSICAL_SET_PRIMARY = 0, -+ KBASE_HWCNT_PHYSICAL_SET_SECONDARY = 1, -+ KBASE_HWCNT_PHYSICAL_SET_TERTIARY = 2, -+}; ++struct kbase_hwcnt_backend_info; + -+/** -+ * struct kbase_hwcnt_gpu_info - Information about hwcnt blocks on the GPUs. -+ * @l2_count: L2 cache count. -+ * @core_mask: Shader core mask. May be sparse. -+ * @clk_cnt: Number of clock domains available. -+ * @prfcnt_values_per_block: Total entries (header + counters) of performance -+ * counter per block. ++/* ++ * struct kbase_hwcnt_backend - Opaque pointer to a hardware counter ++ * backend, used to perform dumps. + */ -+struct kbase_hwcnt_gpu_info { -+ size_t l2_count; -+ u64 core_mask; -+ u8 clk_cnt; -+ size_t prfcnt_values_per_block; -+}; ++struct kbase_hwcnt_backend; + -+/** -+ * struct kbase_hwcnt_curr_config - Current Configuration of HW allocated to the -+ * GPU. -+ * @num_l2_slices: Current number of L2 slices allocated to the GPU. -+ * @shader_present: Current shader present bitmap that is allocated to the GPU. -+ * -+ * For architectures with the max_config interface available from the Arbiter, -+ * the current resources allocated may change during runtime due to a -+ * re-partitioning (possible with partition manager). Thus, the HWC needs to be -+ * prepared to report any possible set of counters. For this reason the memory -+ * layout in the userspace is based on the maximum possible allocation. On the -+ * other hand, each partition has just the view of its currently allocated -+ * resources. Therefore, it is necessary to correctly map the dumped HWC values -+ * from the registers into this maximum memory layout so that it can be exposed -+ * to the userspace side correctly. ++/* ++ * typedef kbase_hwcnt_backend_metadata_fn - Get the immutable hardware counter ++ * metadata that describes the layout ++ * of the counter data structures. ++ * @info: Non-NULL pointer to backend info. + * -+ * For L2 cache just the number is enough once the allocated ones will be -+ * accumulated on the first L2 slots available in the destination buffer. ++ * Multiple calls to this function with the same info are guaranteed to return ++ * the same metadata object each time. + * -+ * For the correct mapping of the shader cores it is necessary to jump all the -+ * L2 cache slots in the destination buffer that are not allocated. But, it is -+ * not necessary to add any logic to map the shader cores bitmap into the memory -+ * layout because the shader_present allocated will always be a subset of the -+ * maximum shader_present. It is possible because: -+ * 1 - Partitions are made of slices and they are always ordered from the ones -+ * with more shader cores to the ones with less. -+ * 2 - The shader cores in a slice are always contiguous. -+ * 3 - A partition can only have a contiguous set of slices allocated to it. -+ * So, for example, if 4 slices are available in total, 1 with 4 cores, 2 with -+ * 3 cores and 1 with 2 cores. The maximum possible shader_present would be: -+ * 0x0011|0111|0111|1111 -> note the order and that the shader cores are -+ * contiguous in any slice. -+ * Supposing that a partition takes the two slices in the middle, the current -+ * config shader_present for this partition would be: -+ * 0x0111|0111 -> note that this is a subset of the maximum above and the slices -+ * are contiguous. -+ * Therefore, by directly copying any subset of the maximum possible -+ * shader_present the mapping is already achieved. ++ * Return: Non-NULL pointer to immutable hardware counter metadata. + */ -+struct kbase_hwcnt_curr_config { -+ size_t num_l2_slices; -+ u64 shader_present; -+}; ++typedef const struct kbase_hwcnt_metadata * ++kbase_hwcnt_backend_metadata_fn(const struct kbase_hwcnt_backend_info *info); + +/** -+ * kbase_hwcnt_is_block_type_undefined() - Check if a block type is undefined. ++ * typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend. ++ * @info: Non-NULL pointer to backend info. ++ * @out_backend: Non-NULL pointer to where backend is stored on success. + * -+ * @grp_type: Hardware counter group type. -+ * @blk_type: Hardware counter block type. ++ * All uses of the created hardware counter backend must be externally ++ * synchronised. + * -+ * Return: true if the block type is undefined, else false. ++ * Return: 0 on success, else error code. + */ -+static inline bool kbase_hwcnt_is_block_type_undefined(const uint64_t grp_type, -+ const uint64_t blk_type) -+{ -+ /* Warn on unknown group type */ -+ if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5)) -+ return false; -+ -+ return (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED || -+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED || -+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED || -+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED); -+} ++typedef int kbase_hwcnt_backend_init_fn(const struct kbase_hwcnt_backend_info *info, ++ struct kbase_hwcnt_backend **out_backend); + +/** -+ * kbase_hwcnt_jm_metadata_create() - Create hardware counter metadata for the -+ * JM GPUs. -+ * @info: Non-NULL pointer to info struct. -+ * @counter_set: The performance counter set used. -+ * @out_metadata: Non-NULL pointer to where created metadata is stored on -+ * success. -+ * @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump -+ * buffer is stored on success. -+ * -+ * Return: 0 on success, else error code. ++ * typedef kbase_hwcnt_backend_term_fn - Terminate a counter backend. ++ * @backend: Pointer to backend to be terminated. + */ -+int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *info, -+ enum kbase_hwcnt_set counter_set, -+ const struct kbase_hwcnt_metadata **out_metadata, -+ size_t *out_dump_bytes); ++typedef void kbase_hwcnt_backend_term_fn(struct kbase_hwcnt_backend *backend); + +/** -+ * kbase_hwcnt_jm_metadata_destroy() - Destroy JM GPU hardware counter metadata. ++ * typedef kbase_hwcnt_backend_timestamp_ns_fn - Get the current backend ++ * timestamp. ++ * @backend: Non-NULL pointer to backend. + * -+ * @metadata: Pointer to metadata to destroy. ++ * Return: Backend timestamp in nanoseconds. + */ -+void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); ++typedef u64 kbase_hwcnt_backend_timestamp_ns_fn(struct kbase_hwcnt_backend *backend); + +/** -+ * kbase_hwcnt_csf_metadata_create() - Create hardware counter metadata for the -+ * CSF GPUs. -+ * @info: Non-NULL pointer to info struct. -+ * @counter_set: The performance counter set used. -+ * @out_metadata: Non-NULL pointer to where created metadata is stored on -+ * success. ++ * typedef kbase_hwcnt_backend_dump_enable_fn - Start counter dumping with the ++ * backend. ++ * @backend: Non-NULL pointer to backend. ++ * @enable_map: Non-NULL pointer to enable map specifying enabled counters. ++ * ++ * The enable_map must have been created using the interface's metadata. ++ * If the backend has already been enabled, an error is returned. ++ * ++ * May be called in an atomic context. + * + * Return: 0 on success, else error code. + */ -+int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *info, -+ enum kbase_hwcnt_set counter_set, -+ const struct kbase_hwcnt_metadata **out_metadata); -+ -+/** -+ * kbase_hwcnt_csf_metadata_destroy() - Destroy CSF GPU hardware counter -+ * metadata. -+ * @metadata: Pointer to metadata to destroy. -+ */ -+void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); ++typedef int kbase_hwcnt_backend_dump_enable_fn(struct kbase_hwcnt_backend *backend, ++ const struct kbase_hwcnt_enable_map *enable_map); + +/** -+ * kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw -+ * dump buffer in src into the dump buffer -+ * abstraction in dst. -+ * @dst: Non-NULL pointer to destination dump buffer. -+ * @src: Non-NULL pointer to source raw dump buffer, of same length -+ * as dump_buf_bytes in the metadata of destination dump -+ * buffer. -+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. -+ * @pm_core_mask: PM state synchronized shaders core mask with the dump. -+ * @curr_config: Current allocated hardware resources to correctly map the -+ * source raw dump buffer to the destination dump buffer. -+ * @accumulate: True if counters in source should be accumulated into -+ * destination, rather than copied. ++ * typedef kbase_hwcnt_backend_dump_enable_nolock_fn - Start counter dumping ++ * with the backend. ++ * @backend: Non-NULL pointer to backend. ++ * @enable_map: Non-NULL pointer to enable map specifying enabled counters. + * -+ * The dst and dst_enable_map MUST have been created from the same metadata as -+ * returned from the call to kbase_hwcnt_jm_metadata_create as was used to get -+ * the length of src. ++ * Exactly the same as kbase_hwcnt_backend_dump_enable_fn(), except must be ++ * called in an atomic context with the spinlock documented by the specific ++ * backend interface held. + * + * Return: 0 on success, else error code. + */ -+int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, -+ const struct kbase_hwcnt_enable_map *dst_enable_map, -+ const u64 pm_core_mask, -+ const struct kbase_hwcnt_curr_config *curr_config, bool accumulate); ++typedef int ++kbase_hwcnt_backend_dump_enable_nolock_fn(struct kbase_hwcnt_backend *backend, ++ const struct kbase_hwcnt_enable_map *enable_map); + +/** -+ * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw -+ * dump buffer in src into the dump buffer -+ * abstraction in dst. -+ * @dst: Non-NULL pointer to destination dump buffer. -+ * @src: Non-NULL pointer to source raw dump buffer, of same length -+ * as dump_buf_bytes in the metadata of dst dump buffer. -+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. -+ * @accumulate: True if counters in src should be accumulated into -+ * destination, rather than copied. -+ * -+ * The dst and dst_enable_map MUST have been created from the same metadata as -+ * returned from the call to kbase_hwcnt_csf_metadata_create as was used to get -+ * the length of src. ++ * typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with ++ * the backend. ++ * @backend: Non-NULL pointer to backend. + * -+ * Return: 0 on success, else error code. ++ * If the backend is already disabled, does nothing. ++ * Any undumped counter values since the last dump get will be lost. + */ -+int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, -+ const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate); ++typedef void kbase_hwcnt_backend_dump_disable_fn(struct kbase_hwcnt_backend *backend); + +/** -+ * kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block -+ * enable map abstraction to -+ * a physical block enable -+ * map. -+ * @lo: Low 64 bits of block enable map abstraction. -+ * @hi: High 64 bits of block enable map abstraction. ++ * typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped ++ * counters. ++ * @backend: Non-NULL pointer to backend. + * -+ * The abstraction uses 128 bits to enable 128 block values, whereas the -+ * physical uses just 32 bits, as bit n enables values [n*4, n*4+3]. -+ * Therefore, this conversion is lossy. ++ * If the backend is not enabled, returns an error. + * -+ * Return: 32-bit physical block enable map. ++ * Return: 0 on success, else error code. + */ -+static inline u32 kbase_hwcnt_backend_gpu_block_map_to_physical(u64 lo, u64 hi) -+{ -+ u32 phys = 0; -+ u64 dwords[2] = { lo, hi }; -+ size_t dword_idx; -+ -+ for (dword_idx = 0; dword_idx < 2; dword_idx++) { -+ const u64 dword = dwords[dword_idx]; -+ u16 packed = 0; -+ -+ size_t hword_bit; -+ -+ for (hword_bit = 0; hword_bit < 16; hword_bit++) { -+ const size_t dword_bit = hword_bit * 4; -+ const u16 mask = ((dword >> (dword_bit + 0)) & 0x1) | -+ ((dword >> (dword_bit + 1)) & 0x1) | -+ ((dword >> (dword_bit + 2)) & 0x1) | -+ ((dword >> (dword_bit + 3)) & 0x1); -+ packed |= (mask << hword_bit); -+ } -+ phys |= ((u32)packed) << (16 * dword_idx); -+ } -+ return phys; -+} ++typedef int kbase_hwcnt_backend_dump_clear_fn(struct kbase_hwcnt_backend *backend); + +/** -+ * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction -+ * into a physical enable map. -+ * @dst: Non-NULL pointer to destination physical enable map. -+ * @src: Non-NULL pointer to source enable map abstraction. ++ * typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter ++ * dump. ++ * @backend: Non-NULL pointer to backend. ++ * @dump_time_ns: Non-NULL pointer where the timestamp of when the dump was ++ * requested will be written out to on success. + * -+ * The src must have been created from a metadata returned from a call to -+ * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create. ++ * If the backend is not enabled or another dump is already in progress, ++ * returns an error. + * -+ * This is a lossy conversion, as the enable map abstraction has one bit per -+ * individual counter block value, but the physical enable map uses 1 bit for -+ * every 4 counters, shared over all instances of a block. ++ * Return: 0 on success, else error code. + */ -+void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst, -+ const struct kbase_hwcnt_enable_map *src); ++typedef int kbase_hwcnt_backend_dump_request_fn(struct kbase_hwcnt_backend *backend, ++ u64 *dump_time_ns); + +/** -+ * kbase_hwcnt_gpu_set_to_physical() - Map counter set selection to physical -+ * SET_SELECT value. ++ * typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested ++ * counter dump has completed. ++ * @backend: Non-NULL pointer to backend. + * -+ * @dst: Non-NULL pointer to destination physical SET_SELECT value. -+ * @src: Non-NULL pointer to source counter set selection. ++ * If the backend is not enabled, returns an error. ++ * ++ * Return: 0 on success, else error code. + */ -+void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src); ++typedef int kbase_hwcnt_backend_dump_wait_fn(struct kbase_hwcnt_backend *backend); + +/** -+ * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to -+ * an enable map abstraction. -+ * @dst: Non-NULL pointer to destination enable map abstraction. -+ * @src: Non-NULL pointer to source physical enable map. ++ * typedef kbase_hwcnt_backend_dump_get_fn - Copy or accumulate enable the ++ * counters dumped after the last dump ++ * request into the dump buffer. ++ * @backend: Non-NULL pointer to backend. ++ * @dump_buffer: Non-NULL pointer to destination dump buffer. ++ * @enable_map: Non-NULL pointer to enable map specifying enabled values. ++ * @accumulate: True if counters should be accumulated into dump_buffer, rather ++ * than copied. + * -+ * The dst must have been created from a metadata returned from a call to -+ * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create. ++ * The resultant contents of the dump buffer are only well defined if a prior ++ * call to dump_wait returned successfully, and a new dump has not yet been ++ * requested by a call to dump_request. + * -+ * This is a lossy conversion, as the physical enable map can technically -+ * support counter blocks with 128 counters each, but no hardware actually uses -+ * more than 64, so the enable map abstraction has nowhere to store the enable -+ * information for the 64 non-existent counters. ++ * Return: 0 on success, else error code. + */ -+void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst, -+ const struct kbase_hwcnt_physical_enable_map *src); ++typedef int kbase_hwcnt_backend_dump_get_fn(struct kbase_hwcnt_backend *backend, ++ struct kbase_hwcnt_dump_buffer *dump_buffer, ++ const struct kbase_hwcnt_enable_map *enable_map, ++ bool accumulate); + +/** -+ * kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter -+ * enable headers in a dump buffer to -+ * reflect the specified enable map. -+ * @buf: Non-NULL pointer to dump buffer to patch. -+ * @enable_map: Non-NULL pointer to enable map. -+ * -+ * The buf and enable_map must have been created from a metadata returned from -+ * a call to kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create. -+ * -+ * This function should be used before handing off a dump buffer over the -+ * kernel-user boundary, to ensure the header is accurate for the enable map -+ * used by the user. ++ * struct kbase_hwcnt_backend_interface - Hardware counter backend virtual ++ * interface. ++ * @info: Immutable info used to initialise an instance of the ++ * backend. ++ * @metadata: Function ptr to get the immutable hardware counter ++ * metadata. ++ * @init: Function ptr to initialise an instance of the backend. ++ * @term: Function ptr to terminate an instance of the backend. ++ * @timestamp_ns: Function ptr to get the current backend timestamp. ++ * @dump_enable: Function ptr to enable dumping. ++ * @dump_enable_nolock: Function ptr to enable dumping while the ++ * backend-specific spinlock is already held. ++ * @dump_disable: Function ptr to disable dumping. ++ * @dump_clear: Function ptr to clear counters. ++ * @dump_request: Function ptr to request a dump. ++ * @dump_wait: Function ptr to wait until dump to complete. ++ * @dump_get: Function ptr to copy or accumulate dump into a dump ++ * buffer. + */ -+void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf, -+ const struct kbase_hwcnt_enable_map *enable_map); ++struct kbase_hwcnt_backend_interface { ++ const struct kbase_hwcnt_backend_info *info; ++ kbase_hwcnt_backend_metadata_fn *metadata; ++ kbase_hwcnt_backend_init_fn *init; ++ kbase_hwcnt_backend_term_fn *term; ++ kbase_hwcnt_backend_timestamp_ns_fn *timestamp_ns; ++ kbase_hwcnt_backend_dump_enable_fn *dump_enable; ++ kbase_hwcnt_backend_dump_enable_nolock_fn *dump_enable_nolock; ++ kbase_hwcnt_backend_dump_disable_fn *dump_disable; ++ kbase_hwcnt_backend_dump_clear_fn *dump_clear; ++ kbase_hwcnt_backend_dump_request_fn *dump_request; ++ kbase_hwcnt_backend_dump_wait_fn *dump_wait; ++ kbase_hwcnt_backend_dump_get_fn *dump_get; ++}; + -+#endif /* _KBASE_HWCNT_GPU_H_ */ -diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.c ++#endif /* _KBASE_HWCNT_BACKEND_H_ */ +diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c new file mode 100644 -index 000000000..0cf2f94cf +index 000000000..4a429a6cd --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.c -@@ -0,0 +1,298 @@ ++++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c +@@ -0,0 +1,1896 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -190029,1146 +191878,1891 @@ index 000000000..0cf2f94cf + * + */ + ++#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf.h" +#include "hwcnt/mali_kbase_hwcnt_gpu.h" -+#include "hwcnt/mali_kbase_hwcnt_gpu_narrow.h" ++#include "hwcnt/mali_kbase_hwcnt_types.h" + -+#include -+#include ++#include ++#include ++#include +#include ++#include ++#include ++#include ++#include + -+int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow, -+ const struct kbase_hwcnt_metadata *src_md) -+{ -+ struct kbase_hwcnt_description desc; -+ struct kbase_hwcnt_group_description group; -+ struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; -+ size_t prfcnt_values_per_block; -+ size_t blk; -+ int err; -+ struct kbase_hwcnt_metadata_narrow *metadata_narrow; ++#ifndef BASE_MAX_NR_CLOCKS_REGULATORS ++#define BASE_MAX_NR_CLOCKS_REGULATORS 4 ++#endif + -+ if (!dst_md_narrow || !src_md || !src_md->grp_metadata || -+ !src_md->grp_metadata[0].blk_metadata) -+ return -EINVAL; ++#if IS_ENABLED(CONFIG_MALI_IS_FPGA) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++/* Backend watch dog timer interval in milliseconds: 18 seconds. */ ++#define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)18000) ++#else ++/* Backend watch dog timer interval in milliseconds: 1 second. */ ++#define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)1000) ++#endif /* IS_FPGA && !NO_MALI */ + -+ /* Only support 1 group count and KBASE_HWCNT_V5_BLOCK_TYPE_COUNT block -+ * count in the metadata. -+ */ -+ if ((kbase_hwcnt_metadata_group_count(src_md) != 1) || -+ (kbase_hwcnt_metadata_block_count(src_md, 0) != KBASE_HWCNT_V5_BLOCK_TYPE_COUNT)) -+ return -EINVAL; ++/** ++ * enum kbase_hwcnt_backend_csf_dump_state - HWC CSF backend dumping states. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE: Initial state, or the state if there is ++ * an error. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED: A user dump has been requested and ++ * we are waiting for an ACK, this ACK could come from either PRFCNT_ACK, ++ * PROTMODE_ENTER_ACK, or if an error occurs. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED: A watchdog dump has been ++ * requested and we're waiting for an ACK - this ACK could come from either ++ * PRFCNT_ACK, or if an error occurs, PROTMODE_ENTER_ACK is not applied here ++ * since watchdog request can't be triggered in protected mode. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT: Checking the insert ++ * immediately after receiving the ACK, so we know which index corresponds to ++ * the buffer we requested. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED: The insert has been saved and ++ * now we have kicked off the worker. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING: The insert has been saved and now ++ * we have kicked off the worker to accumulate up to that insert and then copy ++ * the delta to the user buffer to prepare for dump_get(). ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED: The dump completed successfully. ++ * ++ * Valid state transitions: ++ * IDLE -> REQUESTED (on user dump request) ++ * IDLE -> WATCHDOG_REQUESTED (on watchdog request) ++ * IDLE -> QUERYING_INSERT (on user dump request in protected mode) ++ * REQUESTED -> QUERYING_INSERT (on dump acknowledged from firmware) ++ * WATCHDOG_REQUESTED -> REQUESTED (on user dump request) ++ * WATCHDOG_REQUESTED -> COMPLETED (on dump acknowledged from firmware for watchdog request) ++ * QUERYING_INSERT -> WORKER_LAUNCHED (on worker submission) ++ * WORKER_LAUNCHED -> ACCUMULATING (while the worker is accumulating) ++ * ACCUMULATING -> COMPLETED (on accumulation completion) ++ * COMPLETED -> QUERYING_INSERT (on user dump request in protected mode) ++ * COMPLETED -> REQUESTED (on user dump request) ++ * COMPLETED -> WATCHDOG_REQUESTED (on watchdog request) ++ * COMPLETED -> IDLE (on disable) ++ * ANY -> IDLE (on error) ++ */ ++enum kbase_hwcnt_backend_csf_dump_state { ++ KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE, ++ KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED, ++ KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED, ++ KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT, ++ KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED, ++ KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING, ++ KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED, ++}; + -+ /* Get the values count in the first block. */ -+ prfcnt_values_per_block = kbase_hwcnt_metadata_block_values_count(src_md, 0, 0); ++/** ++ * enum kbase_hwcnt_backend_csf_enable_state - HWC CSF backend enable states. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_DISABLED: Initial state, and the state when backend ++ * is disabled. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: Enable request is in ++ * progress, waiting for firmware acknowledgment. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_ENABLED: Enable request has been acknowledged, ++ * enable is done. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: Disable request is in ++ * progress, waiting for firmware acknowledgment. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: Disable request has been ++ * acknowledged, waiting for dump workers to be finished. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: An ++ * unrecoverable error happened, waiting for dump workers to be finished. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR: An unrecoverable error ++ * happened, and dump workers have finished, waiting for reset. ++ * ++ * Valid state transitions: ++ * DISABLED -> TRANSITIONING_TO_ENABLED (on enable) ++ * TRANSITIONING_TO_ENABLED -> ENABLED (on enable ack) ++ * ENABLED -> TRANSITIONING_TO_DISABLED (on disable) ++ * TRANSITIONING_TO_DISABLED -> DISABLED_WAIT_FOR_WORKER (on disable ack) ++ * DISABLED_WAIT_FOR_WORKER -> DISABLED (after workers are flushed) ++ * DISABLED -> UNRECOVERABLE_ERROR (on unrecoverable error) ++ * ANY but DISABLED -> UNRECOVERABLE_ERROR_WAIT_FOR_WORKER (on unrecoverable ++ * error) ++ * UNRECOVERABLE_ERROR -> DISABLED (on before reset) ++ */ ++enum kbase_hwcnt_backend_csf_enable_state { ++ KBASE_HWCNT_BACKEND_CSF_DISABLED, ++ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED, ++ KBASE_HWCNT_BACKEND_CSF_ENABLED, ++ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED, ++ KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER, ++ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER, ++ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR, ++}; + -+ /* check all blocks should have same values count. */ -+ for (blk = 1; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) { -+ size_t val_cnt = kbase_hwcnt_metadata_block_values_count(src_md, 0, blk); -+ if (val_cnt != prfcnt_values_per_block) -+ return -EINVAL; -+ } ++/** ++ * struct kbase_hwcnt_backend_csf_info - Information used to create an instance ++ * of a CSF hardware counter backend. ++ * @backend: Pointer to access CSF backend. ++ * @fw_in_protected_mode: True if FW is running in protected mode, else ++ * false. ++ * @unrecoverable_error_happened: True if an recoverable error happened, else ++ * false. ++ * @csf_if: CSF interface object pointer. ++ * @ring_buf_cnt: Dump buffer count in the ring buffer. ++ * @counter_set: The performance counter set to use. ++ * @metadata: Hardware counter metadata. ++ * @prfcnt_info: Performance counter information. ++ * @watchdog_if: Watchdog interface object pointer. ++ */ ++struct kbase_hwcnt_backend_csf_info { ++ struct kbase_hwcnt_backend_csf *backend; ++ bool fw_in_protected_mode; ++ bool unrecoverable_error_happened; ++ struct kbase_hwcnt_backend_csf_if *csf_if; ++ u32 ring_buf_cnt; ++ enum kbase_hwcnt_set counter_set; ++ const struct kbase_hwcnt_metadata *metadata; ++ struct kbase_hwcnt_backend_csf_if_prfcnt_info prfcnt_info; ++ struct kbase_hwcnt_watchdog_interface *watchdog_if; ++}; + -+ /* Only support 64 and 128 entries per block. */ -+ if ((prfcnt_values_per_block != 64) && (prfcnt_values_per_block != 128)) -+ return -EINVAL; ++/** ++ * struct kbase_hwcnt_csf_physical_layout - HWC sample memory physical layout ++ * information. ++ * @hw_block_cnt: Total number of hardware counters blocks. The hw counters blocks are ++ * sub-categorized into 4 classes: front-end, tiler, memory system, and shader. ++ * hw_block_cnt = fe_cnt + tiler_cnt + mmu_l2_cnt + shader_cnt. ++ * @fe_cnt: Front end block count. ++ * @tiler_cnt: Tiler block count. ++ * @mmu_l2_cnt: Memory system (MMU and L2 cache) block count. ++ * @shader_cnt: Shader Core block count. ++ * @fw_block_cnt: Total number of firmware counters blocks. ++ * @block_cnt: Total block count (sum of all counter blocks: hw_block_cnt + fw_block_cnt). ++ * @shader_avail_mask: Bitmap of all shader cores in the system. ++ * @enable_mask_offset: Offset in array elements of enable mask in each block ++ * starting from the beginning of block. ++ * @headers_per_block: For any block, the number of counters designated as block's header. ++ * @counters_per_block: For any block, the number of counters designated as block's payload. ++ * @values_per_block: For any block, the number of counters in total (header + payload). ++ */ ++struct kbase_hwcnt_csf_physical_layout { ++ u8 hw_block_cnt; ++ u8 fe_cnt; ++ u8 tiler_cnt; ++ u8 mmu_l2_cnt; ++ u8 shader_cnt; ++ u8 fw_block_cnt; ++ u8 block_cnt; ++ u64 shader_avail_mask; ++ size_t enable_mask_offset; ++ size_t headers_per_block; ++ size_t counters_per_block; ++ size_t values_per_block; ++}; + -+ metadata_narrow = kmalloc(sizeof(*metadata_narrow), GFP_KERNEL); -+ if (!metadata_narrow) -+ return -ENOMEM; ++/** ++ * struct kbase_hwcnt_backend_csf - Instance of a CSF hardware counter backend. ++ * @info: CSF Info used to create the backend. ++ * @dump_state: The dumping state of the backend. ++ * @enable_state: The CSF backend internal enabled state. ++ * @insert_index_to_accumulate: The insert index in the ring buffer which need ++ * to accumulate up to. ++ * @enable_state_waitq: Wait queue object used to notify the enable ++ * changing flag is done. ++ * @to_user_buf: HWC sample buffer for client user, size ++ * metadata.dump_buf_bytes. ++ * @accum_buf: HWC sample buffer used as an internal ++ * accumulator, size metadata.dump_buf_bytes. ++ * @old_sample_buf: HWC sample buffer to save the previous values ++ * for delta calculation, size ++ * prfcnt_info.dump_bytes. ++ * @watchdog_last_seen_insert_idx: The insert index which watchdog has last ++ * seen, to check any new firmware automatic ++ * samples generated during the watchdog ++ * period. ++ * @ring_buf: Opaque pointer for ring buffer object. ++ * @ring_buf_cpu_base: CPU base address of the allocated ring buffer. ++ * @clk_enable_map: The enable map specifying enabled clock domains. ++ * @cycle_count_elapsed: Cycle count elapsed for a given sample period. ++ * @prev_cycle_count: Previous cycle count to calculate the cycle ++ * count for sample period. ++ * @phys_layout: Physical memory layout information of HWC ++ * sample buffer. ++ * @dump_completed: Completion signaled by the dump worker when ++ * it is completed accumulating up to the ++ * insert_index_to_accumulate. ++ * Should be initialized to the "complete" state. ++ * @user_requested: Flag to indicate a dump_request called from ++ * user. ++ * @hwc_dump_workq: Single threaded work queue for HWC workers ++ * execution. ++ * @hwc_dump_work: Worker to accumulate samples. ++ * @hwc_threshold_work: Worker for consuming available samples when ++ * threshold interrupt raised. ++ */ ++struct kbase_hwcnt_backend_csf { ++ struct kbase_hwcnt_backend_csf_info *info; ++ enum kbase_hwcnt_backend_csf_dump_state dump_state; ++ enum kbase_hwcnt_backend_csf_enable_state enable_state; ++ u32 insert_index_to_accumulate; ++ wait_queue_head_t enable_state_waitq; ++ u64 *to_user_buf; ++ u64 *accum_buf; ++ u32 *old_sample_buf; ++ u32 watchdog_last_seen_insert_idx; ++ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf; ++ void *ring_buf_cpu_base; ++ u64 clk_enable_map; ++ u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ struct kbase_hwcnt_csf_physical_layout phys_layout; ++ struct completion dump_completed; ++ bool user_requested; ++ struct workqueue_struct *hwc_dump_workq; ++ struct work_struct hwc_dump_work; ++ struct work_struct hwc_threshold_work; ++}; + -+ /* Narrow to 64 entries per block to keep API backward compatibility. */ -+ prfcnt_values_per_block = 64; ++static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_csf_info *csf_info) ++{ ++ WARN_ON(!csf_info); ++ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); ++ return (csf_info->backend != NULL); ++} + -+ for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) { -+ size_t blk_hdr_cnt = kbase_hwcnt_metadata_block_headers_count(src_md, 0, blk); -+ blks[blk] = (struct kbase_hwcnt_block_description){ -+ .type = kbase_hwcnt_metadata_block_type(src_md, 0, blk), -+ .inst_cnt = kbase_hwcnt_metadata_block_instance_count(src_md, 0, blk), -+ .hdr_cnt = blk_hdr_cnt, -+ .ctr_cnt = prfcnt_values_per_block - blk_hdr_cnt, -+ }; ++/** ++ * kbasep_hwcnt_backend_csf_cc_initial_sample() - Initialize cycle count ++ * tracking. ++ * ++ * @backend_csf: Non-NULL pointer to backend. ++ * @enable_map: Non-NULL pointer to enable map specifying enabled counters. ++ */ ++static void ++kbasep_hwcnt_backend_csf_cc_initial_sample(struct kbase_hwcnt_backend_csf *backend_csf, ++ const struct kbase_hwcnt_enable_map *enable_map) ++{ ++ u64 clk_enable_map = enable_map->clk_enable_map; ++ u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ size_t clk; ++ ++ memset(cycle_counts, 0, sizeof(cycle_counts)); ++ ++ /* Read cycle count from CSF interface for both clock domains. */ ++ backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts, ++ clk_enable_map); ++ ++ kbase_hwcnt_metadata_for_each_clock(enable_map->metadata, clk) ++ { ++ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, clk)) ++ backend_csf->prev_cycle_count[clk] = cycle_counts[clk]; + } + -+ group = (struct kbase_hwcnt_group_description){ -+ .type = kbase_hwcnt_metadata_group_type(src_md, 0), -+ .blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT, -+ .blks = blks, -+ }; ++ /* Keep clk_enable_map for dump_request. */ ++ backend_csf->clk_enable_map = clk_enable_map; ++} + -+ desc = (struct kbase_hwcnt_description){ -+ .grp_cnt = kbase_hwcnt_metadata_group_count(src_md), -+ .avail_mask = src_md->avail_mask, -+ .clk_cnt = src_md->clk_cnt, -+ .grps = &group, -+ }; ++static void kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *backend_csf) ++{ ++ u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ size_t clk; + -+ err = kbase_hwcnt_metadata_create(&desc, &metadata_narrow->metadata); -+ if (!err) { -+ /* Narrow down the buffer size to half as the narrowed metadata -+ * only supports 32-bit but the created metadata uses 64-bit for -+ * block entry. -+ */ -+ metadata_narrow->dump_buf_bytes = metadata_narrow->metadata->dump_buf_bytes >> 1; -+ *dst_md_narrow = metadata_narrow; -+ } else { -+ kfree(metadata_narrow); ++ memset(cycle_counts, 0, sizeof(cycle_counts)); ++ ++ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); ++ ++ backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts, ++ backend_csf->clk_enable_map); ++ ++ kbase_hwcnt_metadata_for_each_clock(backend_csf->info->metadata, clk) ++ { ++ if (kbase_hwcnt_clk_enable_map_enabled(backend_csf->clk_enable_map, clk)) { ++ backend_csf->cycle_count_elapsed[clk] = ++ cycle_counts[clk] - backend_csf->prev_cycle_count[clk]; ++ backend_csf->prev_cycle_count[clk] = cycle_counts[clk]; ++ } + } ++} + -+ return err; ++/* CSF backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ ++static u64 kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *backend) ++{ ++ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; ++ ++ if (!backend_csf || !backend_csf->info || !backend_csf->info->csf_if) ++ return 0; ++ ++ return backend_csf->info->csf_if->timestamp_ns(backend_csf->info->csf_if->ctx); +} + -+void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow) ++/** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to ++ * guarantee headers are ++ * enabled if any counter is ++ * required. ++ *@phys_enable_map: HWC physical enable map to be processed. ++ */ ++static void ++kbasep_hwcnt_backend_csf_process_enable_map(struct kbase_hwcnt_physical_enable_map *phys_enable_map) +{ -+ if (!md_narrow) -+ return; ++ WARN_ON(!phys_enable_map); + -+ kbase_hwcnt_metadata_destroy(md_narrow->metadata); -+ kfree(md_narrow); ++ /* Enable header if any counter is required from user, the header is ++ * controlled by bit 0 of the enable mask. ++ */ ++ if (phys_enable_map->fe_bm) ++ phys_enable_map->fe_bm |= 1; ++ ++ if (phys_enable_map->tiler_bm) ++ phys_enable_map->tiler_bm |= 1; ++ ++ if (phys_enable_map->mmu_l2_bm) ++ phys_enable_map->mmu_l2_bm |= 1; ++ ++ if (phys_enable_map->shader_bm) ++ phys_enable_map->shader_bm |= 1; +} + -+int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow, -+ struct kbase_hwcnt_dump_buffer_narrow *dump_buf) ++static void kbasep_hwcnt_backend_csf_init_layout( ++ const struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info, ++ struct kbase_hwcnt_csf_physical_layout *phys_layout) +{ -+ size_t dump_buf_bytes; -+ size_t clk_cnt_buf_bytes; -+ u8 *buf; ++ size_t shader_core_cnt; ++ size_t values_per_block; ++ size_t fw_blocks_count; ++ size_t hw_blocks_count; + -+ if (!md_narrow || !dump_buf) -+ return -EINVAL; ++ WARN_ON(!prfcnt_info); ++ WARN_ON(!phys_layout); + -+ dump_buf_bytes = md_narrow->dump_buf_bytes; -+ clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * md_narrow->metadata->clk_cnt; ++ shader_core_cnt = fls64(prfcnt_info->core_mask); ++ values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES; ++ fw_blocks_count = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size); ++ hw_blocks_count = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size); + -+ /* Make a single allocation for both dump_buf and clk_cnt_buf. */ -+ buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL); -+ if (!buf) -+ return -ENOMEM; ++ /* The number of hardware counters reported by the GPU matches the legacy guess-work we ++ * have done in the past ++ */ ++ WARN_ON(hw_blocks_count != KBASE_HWCNT_V5_FE_BLOCK_COUNT + ++ KBASE_HWCNT_V5_TILER_BLOCK_COUNT + ++ prfcnt_info->l2_count + shader_core_cnt); + -+ *dump_buf = (struct kbase_hwcnt_dump_buffer_narrow){ -+ .md_narrow = md_narrow, -+ .dump_buf = (u32 *)buf, -+ .clk_cnt_buf = (u64 *)(buf + dump_buf_bytes), ++ *phys_layout = (struct kbase_hwcnt_csf_physical_layout){ ++ .fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT, ++ .tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT, ++ .mmu_l2_cnt = prfcnt_info->l2_count, ++ .shader_cnt = shader_core_cnt, ++ .fw_block_cnt = fw_blocks_count, ++ .hw_block_cnt = hw_blocks_count, ++ .block_cnt = fw_blocks_count + hw_blocks_count, ++ .shader_avail_mask = prfcnt_info->core_mask, ++ .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, ++ .values_per_block = values_per_block, ++ .counters_per_block = values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, ++ .enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER, + }; ++} + -+ return 0; ++static void ++kbasep_hwcnt_backend_csf_reset_internal_buffers(struct kbase_hwcnt_backend_csf *backend_csf) ++{ ++ size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes; ++ ++ memset(backend_csf->to_user_buf, 0, user_buf_bytes); ++ memset(backend_csf->accum_buf, 0, user_buf_bytes); ++ memset(backend_csf->old_sample_buf, 0, backend_csf->info->prfcnt_info.dump_bytes); +} + -+void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf_narrow) ++static void ++kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(struct kbase_hwcnt_backend_csf *backend_csf, ++ u32 *sample) +{ -+ if (!dump_buf_narrow) -+ return; ++ u32 block_idx; ++ const struct kbase_hwcnt_csf_physical_layout *phys_layout; ++ u32 *block_buf; + -+ kfree(dump_buf_narrow->dump_buf); -+ *dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ .md_narrow = NULL, -+ .dump_buf = NULL, -+ .clk_cnt_buf = NULL }; ++ phys_layout = &backend_csf->phys_layout; ++ ++ for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) { ++ block_buf = sample + block_idx * phys_layout->values_per_block; ++ block_buf[phys_layout->enable_mask_offset] = 0; ++ } +} + -+int kbase_hwcnt_dump_buffer_narrow_array_alloc( -+ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n, -+ struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs) ++static void ++kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(struct kbase_hwcnt_backend_csf *backend_csf) +{ -+ struct kbase_hwcnt_dump_buffer_narrow *buffers; -+ size_t buf_idx; -+ unsigned int order; -+ unsigned long addr; -+ size_t dump_buf_bytes; -+ size_t clk_cnt_buf_bytes; -+ size_t total_dump_buf_size; ++ u32 idx; ++ u32 *sample; ++ char *cpu_dump_base; ++ size_t dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; + -+ if (!md_narrow || !dump_bufs) -+ return -EINVAL; ++ cpu_dump_base = (char *)backend_csf->ring_buf_cpu_base; + -+ dump_buf_bytes = md_narrow->dump_buf_bytes; -+ clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * md_narrow->metadata->clk_cnt; ++ for (idx = 0; idx < backend_csf->info->ring_buf_cnt; idx++) { ++ sample = (u32 *)&cpu_dump_base[idx * dump_bytes]; ++ kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(backend_csf, sample); ++ } ++} + -+ /* Allocate memory for the dump buffer struct array */ -+ buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); -+ if (!buffers) -+ return -ENOMEM; ++static void kbasep_hwcnt_backend_csf_update_user_sample(struct kbase_hwcnt_backend_csf *backend_csf) ++{ ++ size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes; + -+ /* Allocate pages for the actual dump buffers, as they tend to be fairly -+ * large. ++ /* Copy the data into the sample and wait for the user to get it. */ ++ memcpy(backend_csf->to_user_buf, backend_csf->accum_buf, user_buf_bytes); ++ ++ /* After copied data into user sample, clear the accumulator values to ++ * prepare for the next accumulator, such as the next request or ++ * threshold. + */ -+ order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n); -+ addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); ++ memset(backend_csf->accum_buf, 0, user_buf_bytes); ++} + -+ if (!addr) { -+ kfree(buffers); -+ return -ENOMEM; -+ } ++static void kbasep_hwcnt_backend_csf_accumulate_sample( ++ const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes, ++ u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf, bool clearing_samples) ++{ ++ size_t block_idx; ++ const u32 *old_block = old_sample_buf; ++ const u32 *new_block = new_sample_buf; ++ u64 *acc_block = accum_buf; ++ const size_t values_per_block = phys_layout->values_per_block; + -+ *dump_bufs = (struct kbase_hwcnt_dump_buffer_narrow_array){ -+ .page_addr = addr, -+ .page_order = order, -+ .buf_cnt = n, -+ .bufs = buffers, -+ }; ++ /* Performance counter blocks for firmware are stored before blocks for hardware. ++ * We skip over the firmware's performance counter blocks (counters dumping is not ++ * supported for firmware blocks, only hardware ones). ++ */ ++ old_block += values_per_block * phys_layout->fw_block_cnt; ++ new_block += values_per_block * phys_layout->fw_block_cnt; + -+ total_dump_buf_size = dump_buf_bytes * n; -+ /* Set the buffer of each dump buf */ -+ for (buf_idx = 0; buf_idx < n; buf_idx++) { -+ const size_t dump_buf_offset = dump_buf_bytes * buf_idx; -+ const size_t clk_cnt_buf_offset = -+ total_dump_buf_size + (clk_cnt_buf_bytes * buf_idx); ++ for (block_idx = phys_layout->fw_block_cnt; block_idx < phys_layout->block_cnt; ++ block_idx++) { ++ const u32 old_enable_mask = old_block[phys_layout->enable_mask_offset]; ++ const u32 new_enable_mask = new_block[phys_layout->enable_mask_offset]; + -+ buffers[buf_idx] = (struct kbase_hwcnt_dump_buffer_narrow){ -+ .md_narrow = md_narrow, -+ .dump_buf = (u32 *)(addr + dump_buf_offset), -+ .clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset), -+ }; ++ if (new_enable_mask == 0) { ++ /* Hardware block was unavailable or we didn't turn on ++ * any counters. Do nothing. ++ */ ++ } else { ++ /* Hardware block was available and it had some counters ++ * enabled. We need to update the accumulation buffer. ++ */ ++ size_t ctr_idx; ++ ++ /* Unconditionally copy the headers. */ ++ for (ctr_idx = 0; ctr_idx < phys_layout->headers_per_block; ctr_idx++) { ++ acc_block[ctr_idx] = new_block[ctr_idx]; ++ } ++ ++ /* Accumulate counter samples ++ * ++ * When accumulating samples we need to take into ++ * account whether the counter sampling method involves ++ * clearing counters back to zero after each sample is ++ * taken. ++ * ++ * The intention for CSF was that all HW should use ++ * counters which wrap to zero when their maximum value ++ * is reached. This, combined with non-clearing ++ * sampling, enables multiple concurrent users to ++ * request samples without interfering with each other. ++ * ++ * However some early HW may not support wrapping ++ * counters, for these GPUs counters must be cleared on ++ * sample to avoid loss of data due to counters ++ * saturating at their maximum value. ++ */ ++ if (!clearing_samples) { ++ if (old_enable_mask == 0) { ++ /* Hardware block was previously ++ * unavailable. Accumulate the new ++ * counters only, as we know previous ++ * values are zeroes. ++ */ ++ for (ctr_idx = phys_layout->headers_per_block; ++ ctr_idx < values_per_block; ctr_idx++) { ++ acc_block[ctr_idx] += new_block[ctr_idx]; ++ } ++ } else { ++ /* Hardware block was previously ++ * available. Accumulate the delta ++ * between old and new counter values. ++ */ ++ for (ctr_idx = phys_layout->headers_per_block; ++ ctr_idx < values_per_block; ctr_idx++) { ++ acc_block[ctr_idx] += ++ new_block[ctr_idx] - old_block[ctr_idx]; ++ } ++ } ++ } else { ++ for (ctr_idx = phys_layout->headers_per_block; ++ ctr_idx < values_per_block; ctr_idx++) { ++ acc_block[ctr_idx] += new_block[ctr_idx]; ++ } ++ } ++ } ++ old_block += values_per_block; ++ new_block += values_per_block; ++ acc_block += values_per_block; + } + -+ return 0; ++ WARN_ON(old_block != old_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); ++ WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); ++ WARN_ON(acc_block != accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES) - ++ (values_per_block * phys_layout->fw_block_cnt)); ++ (void)dump_bytes; +} + -+void kbase_hwcnt_dump_buffer_narrow_array_free( -+ struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs) ++static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backend_csf *backend_csf, ++ u32 extract_index_to_start, ++ u32 insert_index_to_stop) +{ -+ if (!dump_bufs) ++ u32 raw_idx; ++ unsigned long flags = 0UL; ++ u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base; ++ const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt; ++ const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; ++ bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples; ++ u32 *old_sample_buf = backend_csf->old_sample_buf; ++ u32 *new_sample_buf = old_sample_buf; ++ ++ if (extract_index_to_start == insert_index_to_stop) ++ /* No samples to accumulate. Early out. */ + return; + -+ kfree(dump_bufs->bufs); -+ free_pages(dump_bufs->page_addr, dump_bufs->page_order); -+ memset(dump_bufs, 0, sizeof(*dump_bufs)); ++ /* Sync all the buffers to CPU side before read the data. */ ++ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, ++ backend_csf->ring_buf, extract_index_to_start, ++ insert_index_to_stop, true); ++ ++ /* Consider u32 wrap case, '!=' is used here instead of '<' operator */ ++ for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; raw_idx++) { ++ /* The logical "&" acts as a modulo operation since buf_count ++ * must be a power of two. ++ */ ++ const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); ++ ++ new_sample_buf = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; ++ ++ kbasep_hwcnt_backend_csf_accumulate_sample(&backend_csf->phys_layout, ++ buf_dump_bytes, backend_csf->accum_buf, ++ old_sample_buf, new_sample_buf, ++ clearing_samples); ++ ++ old_sample_buf = new_sample_buf; ++ } ++ ++ /* Save the newest buffer as the old buffer for next time. */ ++ memcpy(backend_csf->old_sample_buf, new_sample_buf, buf_dump_bytes); ++ ++ /* Reset the prfcnt_en header on each sample before releasing them. */ ++ for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; raw_idx++) { ++ const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); ++ u32 *sample = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; ++ ++ kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(backend_csf, sample); ++ } ++ ++ /* Sync zeroed buffers to avoid coherency issues on future use. */ ++ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, ++ backend_csf->ring_buf, extract_index_to_start, ++ insert_index_to_stop, false); ++ ++ /* After consuming all samples between extract_idx and insert_idx, ++ * set the raw extract index to insert_idx so that the sample buffers ++ * can be released back to the ring buffer pool. ++ */ ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ backend_csf->info->csf_if->set_extract_index(backend_csf->info->csf_if->ctx, ++ insert_index_to_stop); ++ /* Update the watchdog last seen index to check any new FW auto samples ++ * in next watchdog callback. ++ */ ++ backend_csf->watchdog_last_seen_insert_idx = insert_index_to_stop; ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); +} + -+void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk, -+ const u64 *blk_em, size_t val_cnt) ++static void kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ struct kbase_hwcnt_backend_csf *backend_csf, ++ enum kbase_hwcnt_backend_csf_enable_state new_state) +{ -+ size_t val; ++ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); + -+ for (val = 0; val < val_cnt; val++) { -+ bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val); -+ u32 src_val = (src_blk[val] > U32_MAX) ? U32_MAX : (u32)src_blk[val]; ++ if (backend_csf->enable_state != new_state) { ++ backend_csf->enable_state = new_state; + -+ dst_blk[val] = val_enabled ? src_val : 0; ++ wake_up(&backend_csf->enable_state_waitq); + } +} + -+void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow, -+ const struct kbase_hwcnt_dump_buffer *src, -+ const struct kbase_hwcnt_enable_map *dst_enable_map) ++static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info) +{ -+ const struct kbase_hwcnt_metadata_narrow *metadata_narrow; -+ size_t grp; -+ size_t clk; ++ struct kbase_hwcnt_backend_csf_info *csf_info = info; ++ struct kbase_hwcnt_backend_csf *backend_csf; ++ unsigned long flags = 0UL; + -+ if (WARN_ON(!dst_narrow) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || -+ WARN_ON(dst_narrow->md_narrow->metadata == src->metadata) || -+ WARN_ON(dst_narrow->md_narrow->metadata->grp_cnt != src->metadata->grp_cnt) || -+ WARN_ON(src->metadata->grp_cnt != 1) || -+ WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt != -+ src->metadata->grp_metadata[0].blk_cnt) || -+ WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt != -+ KBASE_HWCNT_V5_BLOCK_TYPE_COUNT) || -+ WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt > -+ src->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt)) ++ csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); ++ ++ if (WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info))) { ++ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); + return; ++ } + -+ /* Don't use src metadata since src buffer is bigger than dst buffer. */ -+ metadata_narrow = dst_narrow->md_narrow; ++ backend_csf = csf_info->backend; + -+ for (grp = 0; grp < kbase_hwcnt_metadata_narrow_group_count(metadata_narrow); grp++) { -+ size_t blk; -+ size_t blk_cnt = kbase_hwcnt_metadata_narrow_block_count(metadata_narrow, grp); ++ /* Only do watchdog request when all conditions are met: */ ++ if (/* 1. Backend is enabled. */ ++ (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) && ++ /* 2. FW is not in protected mode. */ ++ (!csf_info->fw_in_protected_mode) && ++ /* 3. dump state indicates no other dumping is in progress. */ ++ ((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) || ++ (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED))) { ++ u32 extract_index = 0U; ++ u32 insert_index = 0U; + -+ for (blk = 0; blk < blk_cnt; blk++) { -+ size_t blk_inst; -+ size_t blk_inst_cnt = kbase_hwcnt_metadata_narrow_block_instance_count( -+ metadata_narrow, grp, blk); ++ /* Read the raw extract and insert indexes from the CSF interface. */ ++ csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index, &insert_index); + -+ for (blk_inst = 0; blk_inst < blk_inst_cnt; blk_inst++) { -+ /* The narrowed down buffer is only 32-bit. */ -+ u32 *dst_blk = kbase_hwcnt_dump_buffer_narrow_block_instance( -+ dst_narrow, grp, blk, blk_inst); -+ const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance( -+ src, grp, blk, blk_inst); -+ const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( -+ dst_enable_map, grp, blk, blk_inst); -+ size_t val_cnt = kbase_hwcnt_metadata_narrow_block_values_count( -+ metadata_narrow, grp, blk); -+ /* Align upwards to include padding bytes */ -+ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( -+ val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / -+ KBASE_HWCNT_VALUE_BYTES)); ++ /* Do watchdog request if no new FW auto samples. */ ++ if (insert_index == backend_csf->watchdog_last_seen_insert_idx) { ++ /* Trigger the watchdog request. */ ++ csf_info->csf_if->dump_request(csf_info->csf_if->ctx); + -+ kbase_hwcnt_dump_buffer_block_copy_strict_narrow(dst_blk, src_blk, -+ blk_em, val_cnt); -+ } ++ /* A watchdog dump is required, change the state to ++ * start the request process. ++ */ ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED; + } + } + -+ for (clk = 0; clk < metadata_narrow->metadata->clk_cnt; clk++) { -+ bool clk_enabled = -+ kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk); -+ -+ dst_narrow->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0; ++ /* Must schedule another callback when in the transitional state because ++ * this function can be called for the first time before the performance ++ * counter enabled interrupt. ++ */ ++ if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) || ++ (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED)) { ++ /* Reschedule the timer for next watchdog callback. */ ++ csf_info->watchdog_if->modify(csf_info->watchdog_if->timer, ++ HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS); + } ++ ++ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); +} -diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.h -new file mode 100644 -index 000000000..afd236d71 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.h -@@ -0,0 +1,330 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ ++/** ++ * kbasep_hwcnt_backend_csf_dump_worker() - HWC dump worker. ++ * @work: Work structure. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * To accumulate all available samples in the ring buffer when a request has ++ * been done. + * + */ ++static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work) ++{ ++ unsigned long flags = 0ULL; ++ struct kbase_hwcnt_backend_csf *backend_csf; ++ u32 insert_index_to_acc; ++ u32 extract_index = 0U; ++ u32 insert_index = 0U; + -+#ifndef _KBASE_HWCNT_GPU_NARROW_H_ -+#define _KBASE_HWCNT_GPU_NARROW_H_ ++ WARN_ON(!work); ++ backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_dump_work); ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ /* Assert the backend is not destroyed. */ ++ WARN_ON(backend_csf != backend_csf->info->backend); + -+#include "hwcnt/mali_kbase_hwcnt_types.h" -+#include ++ /* The backend was disabled or had an error while the worker was being ++ * launched. ++ */ ++ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { ++ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); ++ WARN_ON(!completion_done(&backend_csf->dump_completed)); ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); ++ return; ++ } + -+struct kbase_device; -+struct kbase_hwcnt_metadata; -+struct kbase_hwcnt_enable_map; -+struct kbase_hwcnt_dump_buffer; ++ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED); + -+/** -+ * struct kbase_hwcnt_metadata_narrow - Narrow metadata describing the physical -+ * layout of narrow dump buffers. -+ * For backward compatibility, the narrow -+ * metadata only supports 64 counters per -+ * block and 32-bit per block entry. -+ * @metadata: Non-NULL pointer to the metadata before narrow down to -+ * 32-bit per block entry, it has 64 counters per block and -+ * 64-bit per value. -+ * @dump_buf_bytes: The size in bytes after narrow 64-bit to 32-bit per block -+ * entry. -+ */ -+struct kbase_hwcnt_metadata_narrow { -+ const struct kbase_hwcnt_metadata *metadata; -+ size_t dump_buf_bytes; -+}; ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING; ++ insert_index_to_acc = backend_csf->insert_index_to_accumulate; + -+/** -+ * struct kbase_hwcnt_dump_buffer_narrow - Hardware counter narrow dump buffer. -+ * @md_narrow: Non-NULL pointer to narrow metadata used to identify, and to -+ * describe the layout of the narrow dump buffer. -+ * @dump_buf: Non-NULL pointer to an array of u32 values, the array size -+ * is md_narrow->dump_buf_bytes. -+ * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed -+ * for each clock domain. -+ */ -+struct kbase_hwcnt_dump_buffer_narrow { -+ const struct kbase_hwcnt_metadata_narrow *md_narrow; -+ u32 *dump_buf; -+ u64 *clk_cnt_buf; -+}; ++ /* Read the raw extract and insert indexes from the CSF interface. */ ++ backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index, ++ &insert_index); + -+/** -+ * struct kbase_hwcnt_dump_buffer_narrow_array - Hardware counter narrow dump -+ * buffer array. -+ * @page_addr: Address of first allocated page. A single allocation is used for -+ * all narrow dump buffers in the array. -+ * @page_order: The allocation order of the pages, the order is on a logarithmic -+ * scale. -+ * @buf_cnt: The number of allocated dump buffers. -+ * @bufs: Non-NULL pointer to the array of narrow dump buffer descriptors. -+ */ -+struct kbase_hwcnt_dump_buffer_narrow_array { -+ unsigned long page_addr; -+ unsigned int page_order; -+ size_t buf_cnt; -+ struct kbase_hwcnt_dump_buffer_narrow *bufs; -+}; ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + -+/** -+ * kbase_hwcnt_metadata_narrow_group_count() - Get the number of groups from -+ * narrow metadata. -+ * @md_narrow: Non-NULL pointer to narrow metadata. -+ * -+ * Return: Number of hardware counter groups described by narrow metadata. -+ */ -+static inline size_t -+kbase_hwcnt_metadata_narrow_group_count(const struct kbase_hwcnt_metadata_narrow *md_narrow) -+{ -+ return kbase_hwcnt_metadata_group_count(md_narrow->metadata); ++ /* Accumulate up to the insert we grabbed at the prfcnt request ++ * interrupt. ++ */ ++ kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index, ++ insert_index_to_acc); ++ ++ /* Copy to the user buffer so if a threshold interrupt fires ++ * between now and get(), the accumulations are untouched. ++ */ ++ kbasep_hwcnt_backend_csf_update_user_sample(backend_csf); ++ ++ /* Dump done, set state back to COMPLETED for next request. */ ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ /* Assert the backend is not destroyed. */ ++ WARN_ON(backend_csf != backend_csf->info->backend); ++ ++ /* The backend was disabled or had an error while we were accumulating. ++ */ ++ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { ++ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); ++ WARN_ON(!completion_done(&backend_csf->dump_completed)); ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); ++ return; ++ } ++ ++ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING); ++ ++ /* Our work here is done - set the wait object and unblock waiters. */ ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; ++ complete_all(&backend_csf->dump_completed); ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); +} + +/** -+ * kbase_hwcnt_metadata_narrow_group_type() - Get the arbitrary type of a group -+ * from narrow metadata. -+ * @md_narrow: Non-NULL pointer to narrow metadata. -+ * @grp: Index of the group in the narrow metadata. ++ * kbasep_hwcnt_backend_csf_threshold_worker() - Threshold worker. + * -+ * Return: Type of the group grp. ++ * @work: Work structure. ++ * ++ * Called when a HWC threshold interrupt raised to consume all available samples ++ * in the ring buffer. + */ -+static inline u64 -+kbase_hwcnt_metadata_narrow_group_type(const struct kbase_hwcnt_metadata_narrow *md_narrow, -+ size_t grp) ++static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work) +{ -+ return kbase_hwcnt_metadata_group_type(md_narrow->metadata, grp); ++ unsigned long flags = 0ULL; ++ struct kbase_hwcnt_backend_csf *backend_csf; ++ u32 extract_index = 0U; ++ u32 insert_index = 0U; ++ ++ WARN_ON(!work); ++ ++ backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_threshold_work); ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ ++ /* Assert the backend is not destroyed. */ ++ WARN_ON(backend_csf != backend_csf->info->backend); ++ ++ /* Read the raw extract and insert indexes from the CSF interface. */ ++ backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index, ++ &insert_index); ++ ++ /* The backend was disabled or had an error while the worker was being ++ * launched. ++ */ ++ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); ++ return; ++ } ++ ++ /* Early out if we are not in the IDLE state or COMPLETED state, as this ++ * means a concurrent dump is in progress and we don't want to ++ * interfere. ++ */ ++ if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) && ++ (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) { ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); ++ return; ++ } ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); ++ ++ /* Accumulate everything we possibly can. We grabbed the insert index ++ * immediately after we acquired the lock but before we checked whether ++ * a concurrent dump was triggered. This ensures that if a concurrent ++ * dump was triggered between releasing the lock and now, we know for a ++ * fact that our insert will not exceed the concurrent dump's ++ * insert_to_accumulate, so we don't risk accumulating too much data. ++ */ ++ kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index, insert_index); ++ ++ /* No need to wake up anything since it is not a user dump request. */ +} + -+/** -+ * kbase_hwcnt_metadata_narrow_block_count() - Get the number of blocks in a -+ * group from narrow metadata. -+ * @md_narrow: Non-NULL pointer to narrow metadata. -+ * @grp: Index of the group in the narrow metadata. -+ * -+ * Return: Number of blocks in group grp. -+ */ -+static inline size_t -+kbase_hwcnt_metadata_narrow_block_count(const struct kbase_hwcnt_metadata_narrow *md_narrow, -+ size_t grp) ++static void ++kbase_hwcnt_backend_csf_submit_dump_worker(struct kbase_hwcnt_backend_csf_info *csf_info) +{ -+ return kbase_hwcnt_metadata_block_count(md_narrow->metadata, grp); ++ u32 extract_index; ++ ++ WARN_ON(!csf_info); ++ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); ++ ++ WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info)); ++ WARN_ON(csf_info->backend->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED); ++ WARN_ON(csf_info->backend->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT); ++ ++ /* Save insert index now so that the dump worker only accumulates the ++ * HWC data associated with this request. Extract index is not stored ++ * as that needs to be checked when accumulating to prevent re-reading ++ * buffers that have already been read and returned to the GPU. ++ */ ++ csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index, ++ &csf_info->backend->insert_index_to_accumulate); ++ csf_info->backend->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED; ++ ++ /* Submit the accumulator task into the work queue. */ ++ queue_work(csf_info->backend->hwc_dump_workq, &csf_info->backend->hwc_dump_work); +} + -+/** -+ * kbase_hwcnt_metadata_narrow_block_instance_count() - Get the number of -+ * instances of a block -+ * from narrow metadata. -+ * @md_narrow: Non-NULL pointer to narrow metadata. -+ * @grp: Index of the group in the narrow metadata. -+ * @blk: Index of the block in the group. -+ * -+ * Return: Number of instances of block blk in group grp. -+ */ -+static inline size_t kbase_hwcnt_metadata_narrow_block_instance_count( -+ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk) ++static void ++kbasep_hwcnt_backend_csf_get_physical_enable(struct kbase_hwcnt_backend_csf *backend_csf, ++ const struct kbase_hwcnt_enable_map *enable_map, ++ struct kbase_hwcnt_backend_csf_if_enable *enable) +{ -+ return kbase_hwcnt_metadata_block_instance_count(md_narrow->metadata, grp, blk); ++ enum kbase_hwcnt_physical_set phys_counter_set; ++ struct kbase_hwcnt_physical_enable_map phys_enable_map; ++ ++ kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map); ++ ++ /* process the enable_map to guarantee the block header is enabled which ++ * is needed for delta calculation. ++ */ ++ kbasep_hwcnt_backend_csf_process_enable_map(&phys_enable_map); ++ ++ kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_csf->info->counter_set); ++ ++ /* Use processed enable_map to enable HWC in HW level. */ ++ enable->fe_bm = phys_enable_map.fe_bm; ++ enable->shader_bm = phys_enable_map.shader_bm; ++ enable->tiler_bm = phys_enable_map.tiler_bm; ++ enable->mmu_l2_bm = phys_enable_map.mmu_l2_bm; ++ enable->counter_set = phys_counter_set; ++ enable->clk_enable_map = enable_map->clk_enable_map; +} + -+/** -+ * kbase_hwcnt_metadata_narrow_block_headers_count() - Get the number of counter -+ * headers from narrow -+ * metadata. -+ * @md_narrow: Non-NULL pointer to narrow metadata. -+ * @grp: Index of the group in the narrow metadata. -+ * @blk: Index of the block in the group. -+ * -+ * Return: Number of counter headers in each instance of block blk in group grp. -+ */ -+static inline size_t -+kbase_hwcnt_metadata_narrow_block_headers_count(const struct kbase_hwcnt_metadata_narrow *md_narrow, -+ size_t grp, size_t blk) ++/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ ++static int ++kbasep_hwcnt_backend_csf_dump_enable_nolock(struct kbase_hwcnt_backend *backend, ++ const struct kbase_hwcnt_enable_map *enable_map) +{ -+ return kbase_hwcnt_metadata_block_headers_count(md_narrow->metadata, grp, blk); ++ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; ++ struct kbase_hwcnt_backend_csf_if_enable enable; ++ int err; ++ ++ if (!backend_csf || !enable_map || (enable_map->metadata != backend_csf->info->metadata)) ++ return -EINVAL; ++ ++ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); ++ ++ kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map, &enable); ++ ++ /* enable_state should be DISABLED before we transfer it to enabled */ ++ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) ++ return -EIO; ++ ++ err = backend_csf->info->watchdog_if->enable(backend_csf->info->watchdog_if->timer, ++ HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS, ++ kbasep_hwcnt_backend_watchdog_timer_cb, ++ backend_csf->info); ++ if (err) ++ return err; ++ ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; ++ WARN_ON(!completion_done(&backend_csf->dump_completed)); ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED); ++ ++ backend_csf->info->csf_if->dump_enable(backend_csf->info->csf_if->ctx, ++ backend_csf->ring_buf, &enable); ++ ++ kbasep_hwcnt_backend_csf_cc_initial_sample(backend_csf, enable_map); ++ ++ return 0; +} + -+/** -+ * kbase_hwcnt_metadata_narrow_block_counters_count() - Get the number of -+ * counters from narrow -+ * metadata. -+ * @md_narrow: Non-NULL pointer to narrow metadata. -+ * @grp: Index of the group in the narrow metadata. -+ * @blk: Index of the block in the group. -+ * -+ * Return: Number of counters in each instance of block blk in group grp. -+ */ -+static inline size_t kbase_hwcnt_metadata_narrow_block_counters_count( -+ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk) ++/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_fn */ ++static int kbasep_hwcnt_backend_csf_dump_enable(struct kbase_hwcnt_backend *backend, ++ const struct kbase_hwcnt_enable_map *enable_map) +{ -+ return kbase_hwcnt_metadata_block_counters_count(md_narrow->metadata, grp, blk); ++ int errcode; ++ unsigned long flags = 0UL; ++ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; ++ ++ if (!backend_csf) ++ return -EINVAL; ++ ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ errcode = kbasep_hwcnt_backend_csf_dump_enable_nolock(backend, enable_map); ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); ++ return errcode; +} + -+/** -+ * kbase_hwcnt_metadata_narrow_block_values_count() - Get the number of values -+ * from narrow metadata. -+ * @md_narrow: Non-NULL pointer to narrow metadata. -+ * @grp: Index of the group in the narrow metadata. -+ * @blk: Index of the block in the group. -+ * -+ * Return: Number of headers plus counters in each instance of block blk -+ * in group grp. -+ */ -+static inline size_t -+kbase_hwcnt_metadata_narrow_block_values_count(const struct kbase_hwcnt_metadata_narrow *md_narrow, -+ size_t grp, size_t blk) ++static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete( ++ struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags) +{ -+ return kbase_hwcnt_metadata_narrow_block_counters_count(md_narrow, grp, blk) + -+ kbase_hwcnt_metadata_narrow_block_headers_count(md_narrow, grp, blk); ++ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); ++ ++ while ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) || ++ (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)) { ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, *lock_flags); ++ ++ wait_event(backend_csf->enable_state_waitq, ++ (backend_csf->enable_state != ++ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) && ++ (backend_csf->enable_state != ++ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)); ++ ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, lock_flags); ++ } +} + -+/** -+ * kbase_hwcnt_dump_buffer_narrow_block_instance() - Get the pointer to a -+ * narrowed block instance's -+ * dump buffer. -+ * @buf: Non-NULL pointer to narrow dump buffer. -+ * @grp: Index of the group in the narrow metadata. -+ * @blk: Index of the block in the group. -+ * @blk_inst: Index of the block instance in the block. -+ * -+ * Return: u32* to the dump buffer for the block instance. -+ */ -+static inline u32 * -+kbase_hwcnt_dump_buffer_narrow_block_instance(const struct kbase_hwcnt_dump_buffer_narrow *buf, -+ size_t grp, size_t blk, size_t blk_inst) ++/* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */ ++static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) +{ -+ return buf->dump_buf + buf->md_narrow->metadata->grp_metadata[grp].dump_buf_index + -+ buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index + -+ (buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride * -+ blk_inst); -+} ++ unsigned long flags = 0UL; ++ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; ++ bool do_disable = false; + -+/** -+ * kbase_hwcnt_gpu_metadata_narrow_create() - Create HWC metadata with HWC -+ * entries per block truncated to -+ * 64 entries and block entry size -+ * narrowed down to 32-bit. -+ * -+ * @dst_md_narrow: Non-NULL pointer to where created narrow metadata is stored -+ * on success. -+ * @src_md: Non-NULL pointer to the HWC metadata used as the source to -+ * create dst_md_narrow. -+ * -+ * For backward compatibility of the interface to user clients, a new metadata -+ * with entries per block truncated to 64 and block entry size narrowed down -+ * to 32-bit will be created for dst_md_narrow. -+ * The total entries per block in src_md must be 64 or 128, if it's other -+ * values, function returns error since it's not supported. -+ * -+ * Return: 0 on success, else error code. -+ */ -+int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow, -+ const struct kbase_hwcnt_metadata *src_md); ++ WARN_ON(!backend_csf); + -+/** -+ * kbase_hwcnt_gpu_metadata_narrow_destroy() - Destroy a hardware counter narrow -+ * metadata object. -+ * @md_narrow: Pointer to hardware counter narrow metadata. -+ */ -+void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow); ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + -+/** -+ * kbase_hwcnt_dump_buffer_narrow_alloc() - Allocate a narrow dump buffer. -+ * @md_narrow: Non-NULL pointer to narrow metadata. -+ * @dump_buf: Non-NULL pointer to narrow dump buffer to be initialised. Will be -+ * initialised to undefined values, so must be used as a copy -+ * destination, or cleared before use. -+ * -+ * Return: 0 on success, else error code. -+ */ -+int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow, -+ struct kbase_hwcnt_dump_buffer_narrow *dump_buf); ++ /* Make sure we wait until any previous enable or disable have completed ++ * before doing anything. ++ */ ++ kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, &flags); + -+/** -+ * kbase_hwcnt_dump_buffer_narrow_free() - Free a narrow dump buffer. -+ * @dump_buf: Dump buffer to be freed. -+ * -+ * Can be safely called on an all-zeroed narrow dump buffer structure, or on an -+ * already freed narrow dump buffer. -+ */ -+void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf); ++ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED || ++ backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) { ++ /* If we are already disabled or in an unrecoverable error ++ * state, there is nothing for us to do. ++ */ ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); ++ return; ++ } + -+/** -+ * kbase_hwcnt_dump_buffer_narrow_array_alloc() - Allocate an array of narrow -+ * dump buffers. -+ * @md_narrow: Non-NULL pointer to narrow metadata. -+ * @n: Number of narrow dump buffers to allocate -+ * @dump_bufs: Non-NULL pointer to a kbase_hwcnt_dump_buffer_narrow_array -+ * object to be initialised. -+ * -+ * A single zeroed contiguous page allocation will be used for all of the -+ * buffers inside the object, where: -+ * dump_bufs->bufs[n].dump_buf == page_addr + n * md_narrow.dump_buf_bytes -+ * -+ * Return: 0 on success, else error code. -+ */ -+int kbase_hwcnt_dump_buffer_narrow_array_alloc( -+ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n, -+ struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs); ++ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) { ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; ++ complete_all(&backend_csf->dump_completed); ++ /* Only disable if we were previously enabled - in all other ++ * cases the call to disable will have already been made. ++ */ ++ do_disable = true; ++ } + -+/** -+ * kbase_hwcnt_dump_buffer_narrow_array_free() - Free a narrow dump buffer -+ * array. -+ * @dump_bufs: Narrow Dump buffer array to be freed. -+ * -+ * Can be safely called on an all-zeroed narrow dump buffer array structure, or -+ * on an already freed narrow dump buffer array. -+ */ -+void kbase_hwcnt_dump_buffer_narrow_array_free( -+ struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs); ++ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); ++ WARN_ON(!completion_done(&backend_csf->dump_completed)); + -+/** -+ * kbase_hwcnt_dump_buffer_block_copy_strict_narrow() - Copy all enabled block -+ * values from source to -+ * destination. -+ * @dst_blk: Non-NULL pointer to destination block obtained from a call to -+ * kbase_hwcnt_dump_buffer_narrow_block_instance. -+ * @src_blk: Non-NULL pointer to source block obtained from a call to -+ * kbase_hwcnt_dump_buffer_block_instance. -+ * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to -+ * kbase_hwcnt_enable_map_block_instance. -+ * @val_cnt: Number of values in the block. -+ * -+ * After the copy, any disabled values in destination will be zero, the enabled -+ * values in destination will be saturated at U32_MAX if the corresponding -+ * source value is bigger than U32_MAX, or copy the value from source if the -+ * corresponding source value is less than or equal to U32_MAX. -+ */ -+void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk, -+ const u64 *blk_em, size_t val_cnt); ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + -+/** -+ * kbase_hwcnt_dump_buffer_copy_strict_narrow() - Copy all enabled values to a -+ * narrow dump buffer. -+ * @dst_narrow: Non-NULL pointer to destination dump buffer. -+ * @src: Non-NULL pointer to source dump buffer. -+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. -+ * -+ * After the operation, all non-enabled values (including padding bytes) will be -+ * zero. Slower than the non-strict variant. -+ * -+ * The enabled values in dst_narrow will be saturated at U32_MAX if the -+ * corresponding source value is bigger than U32_MAX, or copy the value from -+ * source if the corresponding source value is less than or equal to U32_MAX. -+ */ -+void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow, -+ const struct kbase_hwcnt_dump_buffer *src, -+ const struct kbase_hwcnt_enable_map *dst_enable_map); ++ /* Deregister the timer and block until any timer callback has completed. ++ * We've transitioned out of the ENABLED state so we can guarantee it ++ * won't reschedule itself. ++ */ ++ backend_csf->info->watchdog_if->disable(backend_csf->info->watchdog_if->timer); + -+#endif /* _KBASE_HWCNT_GPU_NARROW_H_ */ -diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c -new file mode 100644 -index 000000000..763eb315d ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c -@@ -0,0 +1,511 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ /* Block until any async work has completed. We have transitioned out of ++ * the ENABLED state so we can guarantee no new work will concurrently ++ * be submitted. ++ */ ++ flush_workqueue(backend_csf->hwc_dump_workq); + -+#include "hwcnt/mali_kbase_hwcnt_types.h" ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + -+#include ++ if (do_disable) ++ backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx); + -+int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc, -+ const struct kbase_hwcnt_metadata **out_metadata) -+{ -+ char *buf; -+ struct kbase_hwcnt_metadata *metadata; -+ struct kbase_hwcnt_group_metadata *grp_mds; -+ size_t grp; -+ size_t enable_map_count; /* Number of u64 bitfields (inc padding) */ -+ size_t dump_buf_count; /* Number of u64 values (inc padding) */ -+ size_t avail_mask_bits; /* Number of availability mask bits */ ++ kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, &flags); + -+ size_t size; -+ size_t offset; ++ switch (backend_csf->enable_state) { ++ case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED); ++ break; ++ case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR); ++ break; ++ default: ++ WARN_ON(true); ++ break; ++ } + -+ if (!desc || !out_metadata) -+ return -EINVAL; ++ backend_csf->user_requested = false; ++ backend_csf->watchdog_last_seen_insert_idx = 0; + -+ /* The maximum number of clock domains is 64. */ -+ if (desc->clk_cnt > (sizeof(u64) * BITS_PER_BYTE)) -+ return -EINVAL; ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + -+ /* Calculate the bytes needed to tightly pack the metadata */ ++ /* After disable, zero the header of all buffers in the ring buffer back ++ * to 0 to prepare for the next enable. ++ */ ++ kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf); + -+ /* Top level metadata */ -+ size = 0; -+ size += sizeof(struct kbase_hwcnt_metadata); ++ /* Sync zeroed buffers to avoid coherency issues on future use. */ ++ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, ++ backend_csf->ring_buf, 0, ++ backend_csf->info->ring_buf_cnt, false); + -+ /* Group metadata */ -+ size += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; ++ /* Reset accumulator, old_sample_buf and user_sample to all-0 to prepare ++ * for next enable. ++ */ ++ kbasep_hwcnt_backend_csf_reset_internal_buffers(backend_csf); ++} + -+ /* Block metadata */ -+ for (grp = 0; grp < desc->grp_cnt; grp++) { -+ size += sizeof(struct kbase_hwcnt_block_metadata) * desc->grps[grp].blk_cnt; -+ } ++/* CSF backend implementation of kbase_hwcnt_backend_dump_request_fn */ ++static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, ++ u64 *dump_time_ns) ++{ ++ unsigned long flags = 0UL; ++ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; ++ bool do_request = false; ++ bool watchdog_dumping = false; + -+ /* Single allocation for the entire metadata */ -+ buf = kmalloc(size, GFP_KERNEL); -+ if (!buf) -+ return -ENOMEM; ++ if (!backend_csf) ++ return -EINVAL; + -+ /* Use the allocated memory for the metadata and its members */ ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + -+ /* Bump allocate the top level metadata */ -+ offset = 0; -+ metadata = (struct kbase_hwcnt_metadata *)(buf + offset); -+ offset += sizeof(struct kbase_hwcnt_metadata); ++ /* If we're transitioning to enabled there's nothing to accumulate, and ++ * the user dump buffer is already zeroed. We can just short circuit to ++ * the DUMP_COMPLETED state. ++ */ ++ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; ++ *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); ++ kbasep_hwcnt_backend_csf_cc_update(backend_csf); ++ backend_csf->user_requested = true; ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); ++ return 0; ++ } + -+ /* Bump allocate the group metadata */ -+ grp_mds = (struct kbase_hwcnt_group_metadata *)(buf + offset); -+ offset += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; ++ /* Otherwise, make sure we're already enabled. */ ++ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); ++ return -EIO; ++ } + -+ enable_map_count = 0; -+ dump_buf_count = 0; -+ avail_mask_bits = 0; ++ /* Make sure that this is either the first request since enable or the ++ * previous user dump has completed or a watchdog dump is in progress, ++ * so we can avoid midway through a user dump. ++ * If user request comes while a watchdog dumping is in progress, ++ * the user request takes the ownership of the watchdog dumping sample by ++ * changing the dump_state so the interrupt for the watchdog ++ * request can be processed instead of ignored. ++ */ ++ if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) && ++ (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) && ++ (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)) { ++ /* HWC is disabled or another user dump is ongoing, ++ * or we're on fault. ++ */ ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); ++ /* HWC is disabled or another dump is ongoing, or we are on ++ * fault. ++ */ ++ return -EIO; ++ } + -+ for (grp = 0; grp < desc->grp_cnt; grp++) { -+ size_t blk; ++ /* Reset the completion so dump_wait() has something to wait on. */ ++ reinit_completion(&backend_csf->dump_completed); + -+ const struct kbase_hwcnt_group_description *grp_desc = desc->grps + grp; -+ struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp; ++ if (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED) ++ watchdog_dumping = true; + -+ size_t group_enable_map_count = 0; -+ size_t group_dump_buffer_count = 0; -+ size_t group_avail_mask_bits = 0; ++ if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) && ++ !backend_csf->info->fw_in_protected_mode) { ++ /* Only do the request if we are fully enabled and not in ++ * protected mode. ++ */ ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED; ++ do_request = true; ++ } else { ++ /* Skip the request and waiting for ack and go straight to ++ * checking the insert and kicking off the worker to do the dump ++ */ ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; ++ } + -+ /* Bump allocate this group's block metadata */ -+ struct kbase_hwcnt_block_metadata *blk_mds = -+ (struct kbase_hwcnt_block_metadata *)(buf + offset); -+ offset += sizeof(struct kbase_hwcnt_block_metadata) * grp_desc->blk_cnt; ++ /* CSF firmware might enter protected mode now, but still call request. ++ * That is fine, as we changed state while holding the lock, so the ++ * protected mode enter function will query the insert and launch the ++ * dumping worker. ++ * At some point we will get the dump request ACK saying a dump is done, ++ * but we can ignore it if we are not in the REQUESTED state and process ++ * it in next round dumping worker. ++ */ + -+ /* Fill in each block in the group's information */ -+ for (blk = 0; blk < grp_desc->blk_cnt; blk++) { -+ const struct kbase_hwcnt_block_description *blk_desc = grp_desc->blks + blk; -+ struct kbase_hwcnt_block_metadata *blk_md = blk_mds + blk; -+ const size_t n_values = blk_desc->hdr_cnt + blk_desc->ctr_cnt; ++ *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); ++ kbasep_hwcnt_backend_csf_cc_update(backend_csf); ++ backend_csf->user_requested = true; + -+ blk_md->type = blk_desc->type; -+ blk_md->inst_cnt = blk_desc->inst_cnt; -+ blk_md->hdr_cnt = blk_desc->hdr_cnt; -+ blk_md->ctr_cnt = blk_desc->ctr_cnt; -+ blk_md->enable_map_index = group_enable_map_count; -+ blk_md->enable_map_stride = kbase_hwcnt_bitfield_count(n_values); -+ blk_md->dump_buf_index = group_dump_buffer_count; -+ blk_md->dump_buf_stride = KBASE_HWCNT_ALIGN_UPWARDS( -+ n_values, -+ (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)); -+ blk_md->avail_mask_index = group_avail_mask_bits; ++ if (do_request) { ++ /* If a watchdog dumping is in progress, don't need to do ++ * another request, just update the dump_state and take the ++ * ownership of the sample which watchdog requested. ++ */ ++ if (!watchdog_dumping) ++ backend_csf->info->csf_if->dump_request(backend_csf->info->csf_if->ctx); ++ } else ++ kbase_hwcnt_backend_csf_submit_dump_worker(backend_csf->info); + -+ group_enable_map_count += blk_md->enable_map_stride * blk_md->inst_cnt; -+ group_dump_buffer_count += blk_md->dump_buf_stride * blk_md->inst_cnt; -+ group_avail_mask_bits += blk_md->inst_cnt; -+ } ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + -+ /* Fill in the group's information */ -+ grp_md->type = grp_desc->type; -+ grp_md->blk_cnt = grp_desc->blk_cnt; -+ grp_md->blk_metadata = blk_mds; -+ grp_md->enable_map_index = enable_map_count; -+ grp_md->dump_buf_index = dump_buf_count; -+ grp_md->avail_mask_index = avail_mask_bits; ++ /* Modify watchdog timer to delay the regular check time since ++ * just requested. ++ */ ++ backend_csf->info->watchdog_if->modify(backend_csf->info->watchdog_if->timer, ++ HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS); + -+ enable_map_count += group_enable_map_count; -+ dump_buf_count += group_dump_buffer_count; -+ avail_mask_bits += group_avail_mask_bits; -+ } ++ return 0; ++} + -+ /* Fill in the top level metadata's information */ -+ metadata->grp_cnt = desc->grp_cnt; -+ metadata->grp_metadata = grp_mds; -+ metadata->enable_map_bytes = enable_map_count * KBASE_HWCNT_BITFIELD_BYTES; -+ metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES; -+ metadata->avail_mask = desc->avail_mask; -+ metadata->clk_cnt = desc->clk_cnt; ++/* CSF backend implementation of kbase_hwcnt_backend_dump_wait_fn */ ++static int kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend) ++{ ++ unsigned long flags = 0UL; ++ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; ++ int errcode; + -+ WARN_ON(size != offset); -+ /* Due to the block alignment, there should be exactly one enable map -+ * bit per 4 bytes in the dump buffer. ++ if (!backend_csf) ++ return -EINVAL; ++ ++ wait_for_completion(&backend_csf->dump_completed); ++ ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ /* Make sure the last dump actually succeeded when user requested is ++ * set. + */ -+ WARN_ON(metadata->dump_buf_bytes != -+ (metadata->enable_map_bytes * BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES)); ++ if (backend_csf->user_requested && ++ ((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) || ++ (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED))) ++ errcode = 0; ++ else ++ errcode = -EIO; + -+ *out_metadata = metadata; -+ return 0; ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); ++ ++ return errcode; +} + -+void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) ++/* CSF backend implementation of kbase_hwcnt_backend_dump_clear_fn */ ++static int kbasep_hwcnt_backend_csf_dump_clear(struct kbase_hwcnt_backend *backend) +{ -+ kfree(metadata); ++ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; ++ int errcode; ++ u64 ts; ++ ++ if (!backend_csf) ++ return -EINVAL; ++ ++ /* Request a dump so we can clear all current counters. */ ++ errcode = kbasep_hwcnt_backend_csf_dump_request(backend, &ts); ++ if (!errcode) ++ /* Wait for the manual dump or auto dump to be done and ++ * accumulator to be updated. ++ */ ++ errcode = kbasep_hwcnt_backend_csf_dump_wait(backend); ++ ++ return errcode; +} + -+int kbase_hwcnt_enable_map_alloc(const struct kbase_hwcnt_metadata *metadata, -+ struct kbase_hwcnt_enable_map *enable_map) ++/* CSF backend implementation of kbase_hwcnt_backend_dump_get_fn */ ++static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend, ++ struct kbase_hwcnt_dump_buffer *dst, ++ const struct kbase_hwcnt_enable_map *dst_enable_map, ++ bool accumulate) +{ -+ u64 *enable_map_buf; ++ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; ++ int ret; ++ size_t clk; + -+ if (!metadata || !enable_map) ++ if (!backend_csf || !dst || !dst_enable_map || ++ (backend_csf->info->metadata != dst->metadata) || ++ (dst_enable_map->metadata != dst->metadata)) + return -EINVAL; + -+ if (metadata->enable_map_bytes > 0) { -+ enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL); -+ if (!enable_map_buf) -+ return -ENOMEM; -+ } else { -+ enable_map_buf = NULL; ++ /* Extract elapsed cycle count for each clock domain if enabled. */ ++ kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) ++ { ++ if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) ++ continue; ++ ++ /* Reset the counter to zero if accumulation is off. */ ++ if (!accumulate) ++ dst->clk_cnt_buf[clk] = 0; ++ dst->clk_cnt_buf[clk] += backend_csf->cycle_count_elapsed[clk]; + } + -+ enable_map->metadata = metadata; -+ enable_map->hwcnt_enable_map = enable_map_buf; -+ return 0; ++ /* We just return the user buffer without checking the current state, ++ * as it is undefined to call this function without a prior succeeding ++ * one to dump_wait(). ++ */ ++ ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, dst_enable_map, accumulate); ++ ++ return ret; +} + -+void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map) ++/** ++ * kbasep_hwcnt_backend_csf_destroy() - Destroy CSF backend. ++ * @backend_csf: Pointer to CSF backend to destroy. ++ * ++ * Can be safely called on a backend in any state of partial construction. ++ * ++ */ ++static void kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *backend_csf) +{ -+ if (!enable_map) ++ if (!backend_csf) + return; + -+ kfree(enable_map->hwcnt_enable_map); -+ enable_map->hwcnt_enable_map = NULL; -+ enable_map->metadata = NULL; ++ destroy_workqueue(backend_csf->hwc_dump_workq); ++ ++ backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx, ++ backend_csf->ring_buf); ++ ++ kfree(backend_csf->accum_buf); ++ backend_csf->accum_buf = NULL; ++ ++ kfree(backend_csf->old_sample_buf); ++ backend_csf->old_sample_buf = NULL; ++ ++ kfree(backend_csf->to_user_buf); ++ backend_csf->to_user_buf = NULL; ++ ++ kfree(backend_csf); +} + -+int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata, -+ struct kbase_hwcnt_dump_buffer *dump_buf) ++/** ++ * kbasep_hwcnt_backend_csf_create() - Create a CSF backend instance. ++ * ++ * @csf_info: Non-NULL pointer to backend info. ++ * @out_backend: Non-NULL pointer to where backend is stored on success. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info, ++ struct kbase_hwcnt_backend_csf **out_backend) +{ -+ size_t dump_buf_bytes; -+ size_t clk_cnt_buf_bytes; -+ u8 *buf; ++ struct kbase_hwcnt_backend_csf *backend_csf = NULL; ++ int errcode = -ENOMEM; + -+ if (!metadata || !dump_buf) -+ return -EINVAL; ++ WARN_ON(!csf_info); ++ WARN_ON(!out_backend); + -+ dump_buf_bytes = metadata->dump_buf_bytes; -+ clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt; ++ backend_csf = kzalloc(sizeof(*backend_csf), GFP_KERNEL); ++ if (!backend_csf) ++ goto alloc_error; + -+ /* Make a single allocation for both dump_buf and clk_cnt_buf. */ -+ buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL); -+ if (!buf) -+ return -ENOMEM; ++ backend_csf->info = csf_info; ++ kbasep_hwcnt_backend_csf_init_layout(&csf_info->prfcnt_info, &backend_csf->phys_layout); + -+ dump_buf->metadata = metadata; -+ dump_buf->dump_buf = (u64 *)buf; -+ dump_buf->clk_cnt_buf = (u64 *)(buf + dump_buf_bytes); ++ backend_csf->accum_buf = kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL); ++ if (!backend_csf->accum_buf) ++ goto err_alloc_acc_buf; + -+ return 0; -+} ++ backend_csf->old_sample_buf = kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); ++ if (!backend_csf->old_sample_buf) ++ goto err_alloc_pre_sample_buf; + -+void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf) -+{ -+ if (!dump_buf) -+ return; ++ backend_csf->to_user_buf = kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL); ++ if (!backend_csf->to_user_buf) ++ goto err_alloc_user_sample_buf; + -+ kfree(dump_buf->dump_buf); -+ memset(dump_buf, 0, sizeof(*dump_buf)); ++ errcode = csf_info->csf_if->ring_buf_alloc(csf_info->csf_if->ctx, csf_info->ring_buf_cnt, ++ &backend_csf->ring_buf_cpu_base, ++ &backend_csf->ring_buf); ++ if (errcode) ++ goto err_ring_buf_alloc; ++ errcode = -ENOMEM; ++ ++ /* Zero all performance enable header to prepare for first enable. */ ++ kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf); ++ ++ /* Sync zeroed buffers to avoid coherency issues on use. */ ++ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, ++ backend_csf->ring_buf, 0, ++ backend_csf->info->ring_buf_cnt, false); ++ ++ init_completion(&backend_csf->dump_completed); ++ ++ init_waitqueue_head(&backend_csf->enable_state_waitq); ++ ++ /* Allocate a single threaded work queue for dump worker and threshold ++ * worker. ++ */ ++ backend_csf->hwc_dump_workq = ++ alloc_workqueue("mali_hwc_dump_wq", WQ_HIGHPRI | WQ_UNBOUND, 1); ++ if (!backend_csf->hwc_dump_workq) ++ goto err_alloc_workqueue; ++ ++ INIT_WORK(&backend_csf->hwc_dump_work, kbasep_hwcnt_backend_csf_dump_worker); ++ INIT_WORK(&backend_csf->hwc_threshold_work, kbasep_hwcnt_backend_csf_threshold_worker); ++ ++ backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_DISABLED; ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; ++ complete_all(&backend_csf->dump_completed); ++ backend_csf->user_requested = false; ++ backend_csf->watchdog_last_seen_insert_idx = 0; ++ ++ *out_backend = backend_csf; ++ return 0; ++ ++err_alloc_workqueue: ++ backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx, ++ backend_csf->ring_buf); ++err_ring_buf_alloc: ++ kfree(backend_csf->to_user_buf); ++ backend_csf->to_user_buf = NULL; ++err_alloc_user_sample_buf: ++ kfree(backend_csf->old_sample_buf); ++ backend_csf->old_sample_buf = NULL; ++err_alloc_pre_sample_buf: ++ kfree(backend_csf->accum_buf); ++ backend_csf->accum_buf = NULL; ++err_alloc_acc_buf: ++ kfree(backend_csf); ++alloc_error: ++ return errcode; +} + -+int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n, -+ struct kbase_hwcnt_dump_buffer_array *dump_bufs) ++/* CSF backend implementation of kbase_hwcnt_backend_init_fn */ ++static int kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info, ++ struct kbase_hwcnt_backend **out_backend) +{ -+ struct kbase_hwcnt_dump_buffer *buffers; -+ size_t buf_idx; -+ unsigned int order; -+ unsigned long addr; -+ size_t dump_buf_bytes; -+ size_t clk_cnt_buf_bytes; ++ unsigned long flags = 0UL; ++ struct kbase_hwcnt_backend_csf *backend_csf = NULL; ++ struct kbase_hwcnt_backend_csf_info *csf_info = (struct kbase_hwcnt_backend_csf_info *)info; ++ int errcode; ++ bool success = false; + -+ if (!metadata || !dump_bufs) ++ if (!info || !out_backend) + return -EINVAL; + -+ dump_buf_bytes = metadata->dump_buf_bytes; -+ clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt; -+ -+ /* Allocate memory for the dump buffer struct array */ -+ buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); -+ if (!buffers) -+ return -ENOMEM; ++ /* Create the backend. */ ++ errcode = kbasep_hwcnt_backend_csf_create(csf_info, &backend_csf); ++ if (errcode) ++ return errcode; + -+ /* Allocate pages for the actual dump buffers, as they tend to be fairly -+ * large. ++ /* If it was not created before, attach it to csf_info. ++ * Use spin lock to avoid concurrent initialization. + */ -+ order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n); -+ addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ if (csf_info->backend == NULL) { ++ csf_info->backend = backend_csf; ++ *out_backend = (struct kbase_hwcnt_backend *)backend_csf; ++ success = true; ++ if (csf_info->unrecoverable_error_happened) ++ backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR; ++ } ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + -+ if (!addr) { -+ kfree(buffers); -+ return -ENOMEM; ++ /* Destroy the new created backend if the backend has already created ++ * before. In normal case, this won't happen if the client call init() ++ * function properly. ++ */ ++ if (!success) { ++ kbasep_hwcnt_backend_csf_destroy(backend_csf); ++ return -EBUSY; + } + -+ dump_bufs->page_addr = addr; -+ dump_bufs->page_order = order; -+ dump_bufs->buf_cnt = n; -+ dump_bufs->bufs = buffers; ++ return 0; ++} + -+ /* Set the buffer of each dump buf */ -+ for (buf_idx = 0; buf_idx < n; buf_idx++) { -+ const size_t dump_buf_offset = dump_buf_bytes * buf_idx; -+ const size_t clk_cnt_buf_offset = -+ (dump_buf_bytes * n) + (clk_cnt_buf_bytes * buf_idx); ++/* CSF backend implementation of kbase_hwcnt_backend_term_fn */ ++static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend) ++{ ++ unsigned long flags = 0UL; ++ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; + -+ buffers[buf_idx].metadata = metadata; -+ buffers[buf_idx].dump_buf = (u64 *)(addr + dump_buf_offset); -+ buffers[buf_idx].clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset); -+ } ++ if (!backend) ++ return; + -+ return 0; ++ kbasep_hwcnt_backend_csf_dump_disable(backend); ++ ++ /* Set the backend in csf_info to NULL so we won't handle any external ++ * notification anymore since we are terminating. ++ */ ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ backend_csf->info->backend = NULL; ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); ++ ++ kbasep_hwcnt_backend_csf_destroy(backend_csf); +} + -+void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs) ++/** ++ * kbasep_hwcnt_backend_csf_info_destroy() - Destroy a CSF backend info. ++ * @info: Pointer to info to destroy. ++ * ++ * Can be safely called on a backend info in any state of partial construction. ++ * ++ */ ++static void kbasep_hwcnt_backend_csf_info_destroy(const struct kbase_hwcnt_backend_csf_info *info) +{ -+ if (!dump_bufs) ++ if (!info) + return; + -+ kfree(dump_bufs->bufs); -+ free_pages(dump_bufs->page_addr, dump_bufs->page_order); -+ memset(dump_bufs, 0, sizeof(*dump_bufs)); ++ /* The backend should be destroyed before the info object destroy. */ ++ WARN_ON(info->backend != NULL); ++ ++ /* The metadata should be destroyed before the info object destroy. */ ++ WARN_ON(info->metadata != NULL); ++ ++ kfree(info); +} + -+void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst, -+ const struct kbase_hwcnt_enable_map *dst_enable_map) ++/** ++ * kbasep_hwcnt_backend_csf_info_create() - Create a CSF backend info. ++ * ++ * @csf_if: Non-NULL pointer to a hwcnt backend CSF interface structure ++ * used to create backend interface. ++ * @ring_buf_cnt: The buffer count of the CSF hwcnt backend ring buffer. ++ * MUST be power of 2. ++ * @watchdog_if: Non-NULL pointer to a hwcnt watchdog interface structure used to create ++ * backend interface. ++ * @out_info: Non-NULL pointer to where info is stored on success. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static int ++kbasep_hwcnt_backend_csf_info_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, ++ struct kbase_hwcnt_watchdog_interface *watchdog_if, ++ const struct kbase_hwcnt_backend_csf_info **out_info) +{ -+ const struct kbase_hwcnt_metadata *metadata; -+ size_t grp, blk, blk_inst; ++ struct kbase_hwcnt_backend_csf_info *info = NULL; + -+ if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) || -+ WARN_ON(dst->metadata != dst_enable_map->metadata)) -+ return; ++ if (WARN_ON(!csf_if) || WARN_ON(!watchdog_if) || WARN_ON(!out_info) || ++ WARN_ON(!is_power_of_2(ring_buf_cnt))) ++ return -EINVAL; + -+ metadata = dst->metadata; ++ info = kmalloc(sizeof(*info), GFP_KERNEL); ++ if (!info) ++ return -ENOMEM; + -+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) ++ *info = (struct kbase_hwcnt_backend_csf_info) + { -+ u64 *dst_blk; -+ size_t val_cnt; ++#if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY) ++ .counter_set = KBASE_HWCNT_SET_SECONDARY, ++#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) ++ .counter_set = KBASE_HWCNT_SET_TERTIARY, ++#else ++ /* Default to primary */ ++ .counter_set = KBASE_HWCNT_SET_PRIMARY, ++#endif ++ .backend = NULL, .csf_if = csf_if, .ring_buf_cnt = ring_buf_cnt, ++ .fw_in_protected_mode = false, .unrecoverable_error_happened = false, ++ .watchdog_if = watchdog_if, ++ }; ++ *out_info = info; + -+ if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) -+ continue; ++ return 0; ++} + -+ dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); -+ val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); ++/* CSF backend implementation of kbase_hwcnt_backend_metadata_fn */ ++static const struct kbase_hwcnt_metadata * ++kbasep_hwcnt_backend_csf_metadata(const struct kbase_hwcnt_backend_info *info) ++{ ++ if (!info) ++ return NULL; + -+ kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); -+ } ++ WARN_ON(!((const struct kbase_hwcnt_backend_csf_info *)info)->metadata); + -+ memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt); ++ return ((const struct kbase_hwcnt_backend_csf_info *)info)->metadata; +} + -+void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst) ++static void ++kbasep_hwcnt_backend_csf_handle_unrecoverable_error(struct kbase_hwcnt_backend_csf *backend_csf) +{ -+ if (WARN_ON(!dst)) ++ bool do_disable = false; ++ ++ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); ++ ++ /* We are already in or transitioning to the unrecoverable error state. ++ * Early out. ++ */ ++ if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) || ++ (backend_csf->enable_state == ++ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER)) + return; + -+ memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes); ++ /* If we are disabled, we know we have no pending workers, so skip the ++ * waiting state. ++ */ ++ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED) { ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR); ++ return; ++ } + -+ memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt); ++ /* Trigger a disable only if we are not already transitioning to ++ * disabled, we don't want to disable twice if an unrecoverable error ++ * happens while we are disabling. ++ */ ++ do_disable = ++ (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); ++ ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER); ++ ++ /* Transition the dump to the IDLE state and unblock any waiters. The ++ * IDLE state signifies an error. ++ */ ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; ++ complete_all(&backend_csf->dump_completed); ++ ++ /* Trigger a disable only if we are not already transitioning to ++ * disabled, - we don't want to disable twice if an unrecoverable error ++ * happens while we are disabling. ++ */ ++ if (do_disable) ++ backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx); +} + -+void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *dst, -+ const struct kbase_hwcnt_enable_map *dst_enable_map) ++static void ++kbasep_hwcnt_backend_csf_handle_recoverable_error(struct kbase_hwcnt_backend_csf *backend_csf) +{ -+ const struct kbase_hwcnt_metadata *metadata; -+ size_t grp, blk, blk_inst; ++ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); + -+ if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) || -+ WARN_ON(dst->metadata != dst_enable_map->metadata)) ++ switch (backend_csf->enable_state) { ++ case KBASE_HWCNT_BACKEND_CSF_DISABLED: ++ case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: ++ case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: ++ case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR: ++ case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: ++ /* Already disabled or disabling, or in an unrecoverable error. ++ * Nothing to be done to handle the error. ++ */ ++ return; ++ case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: ++ /* A seemingly recoverable error that occurs while we are ++ * transitioning to enabled is probably unrecoverable. ++ */ ++ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(backend_csf); + return; ++ case KBASE_HWCNT_BACKEND_CSF_ENABLED: ++ /* Start transitioning to the disabled state. We can't wait for ++ * it as this recoverable error might be triggered from an ++ * interrupt. The wait will be done in the eventual call to ++ * disable(). ++ */ ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); ++ /* Transition the dump to the IDLE state and unblock any ++ * waiters. The IDLE state signifies an error. ++ */ ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; ++ complete_all(&backend_csf->dump_completed); + -+ metadata = dst->metadata; ++ backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx); ++ return; ++ } ++} + -+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) -+ { -+ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); -+ const u64 *blk_em = -+ kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst); -+ size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); ++void kbase_hwcnt_backend_csf_protm_entered(struct kbase_hwcnt_backend_interface *iface) ++{ ++ struct kbase_hwcnt_backend_csf_info *csf_info = ++ (struct kbase_hwcnt_backend_csf_info *)iface->info; + -+ /* Align upwards to include padding bytes */ -+ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( -+ val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)); ++ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); ++ csf_info->fw_in_protected_mode = true; + -+ if (kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst)) { -+ /* Block available, so only zero non-enabled values */ -+ kbase_hwcnt_dump_buffer_block_zero_non_enabled(dst_blk, blk_em, val_cnt); -+ } else { -+ /* Block not available, so zero the entire thing */ -+ kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); -+ } -+ } ++ /* Call on_prfcnt_sample() to trigger collection of the protected mode ++ * entry auto-sample if there is currently a pending dump request. ++ */ ++ kbase_hwcnt_backend_csf_on_prfcnt_sample(iface); +} + -+void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst, -+ const struct kbase_hwcnt_dump_buffer *src, -+ const struct kbase_hwcnt_enable_map *dst_enable_map) ++void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *iface) +{ -+ const struct kbase_hwcnt_metadata *metadata; -+ size_t grp, blk, blk_inst; -+ size_t clk; ++ struct kbase_hwcnt_backend_csf_info *csf_info; + -+ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || -+ WARN_ON(dst->metadata != src->metadata) || -+ WARN_ON(dst->metadata != dst_enable_map->metadata)) ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; ++ ++ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); ++ csf_info->fw_in_protected_mode = false; ++} ++ ++void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface) ++{ ++ unsigned long flags = 0UL; ++ struct kbase_hwcnt_backend_csf_info *csf_info; ++ ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; ++ ++ csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); ++ csf_info->unrecoverable_error_happened = true; ++ /* Early out if the backend does not exist. */ ++ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { ++ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); + return; ++ } + -+ metadata = dst->metadata; ++ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); + -+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) -+ { -+ u64 *dst_blk; -+ const u64 *src_blk; -+ size_t val_cnt; ++ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); ++} + -+ if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) -+ continue; ++void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface) ++{ ++ unsigned long flags = 0UL; ++ struct kbase_hwcnt_backend_csf_info *csf_info; ++ struct kbase_hwcnt_backend_csf *backend_csf; + -+ dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); -+ src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); -+ val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + -+ kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt); ++ csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); ++ csf_info->unrecoverable_error_happened = false; ++ /* Early out if the backend does not exist. */ ++ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { ++ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); ++ return; + } ++ backend_csf = csf_info->backend; + -+ kbase_hwcnt_metadata_for_each_clock(metadata, clk) -+ { -+ if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) -+ dst->clk_cnt_buf[clk] = src->clk_cnt_buf[clk]; ++ if ((backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) && ++ (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR)) { ++ /* Before a reset occurs, we must either have been disabled ++ * (else we lose data) or we should have encountered an ++ * unrecoverable error. Either way, we will have disabled the ++ * interface and waited for any workers that might have still ++ * been in flight. ++ * If not in these states, fire off one more disable to make ++ * sure everything is turned off before the power is pulled. ++ * We can't wait for this disable to complete, but it doesn't ++ * really matter, the power is being pulled. ++ */ ++ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); ++ } ++ ++ /* A reset is the only way to exit the unrecoverable error state */ ++ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) { ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED); + } ++ ++ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); +} + -+void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst, -+ const struct kbase_hwcnt_dump_buffer *src, -+ const struct kbase_hwcnt_enable_map *dst_enable_map) ++void kbase_hwcnt_backend_csf_on_prfcnt_sample(struct kbase_hwcnt_backend_interface *iface) +{ -+ const struct kbase_hwcnt_metadata *metadata; -+ size_t grp, blk, blk_inst; -+ size_t clk; ++ struct kbase_hwcnt_backend_csf_info *csf_info; ++ struct kbase_hwcnt_backend_csf *backend_csf; + -+ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || -+ WARN_ON(dst->metadata != src->metadata) || -+ WARN_ON(dst->metadata != dst_enable_map->metadata)) ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; ++ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); ++ ++ /* Early out if the backend does not exist. */ ++ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) + return; ++ backend_csf = csf_info->backend; + -+ metadata = dst->metadata; ++ /* Skip the dump_work if it's a watchdog request. */ ++ if (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED) { ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; ++ return; ++ } + -+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) -+ { -+ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); -+ const u64 *src_blk = -+ kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); -+ const u64 *blk_em = -+ kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst); -+ size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); -+ /* Align upwards to include padding bytes */ -+ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( -+ val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)); ++ /* If the current state is not REQUESTED, this HWC sample will be ++ * skipped and processed in next dump_request. ++ */ ++ if (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED) ++ return; ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; + -+ kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, val_cnt); -+ } ++ kbase_hwcnt_backend_csf_submit_dump_worker(csf_info); ++} + -+ kbase_hwcnt_metadata_for_each_clock(metadata, clk) -+ { -+ bool clk_enabled = -+ kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk); ++void kbase_hwcnt_backend_csf_on_prfcnt_threshold(struct kbase_hwcnt_backend_interface *iface) ++{ ++ struct kbase_hwcnt_backend_csf_info *csf_info; ++ struct kbase_hwcnt_backend_csf *backend_csf; + -+ dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0; -+ } ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; ++ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); ++ ++ /* Early out if the backend does not exist. */ ++ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) ++ return; ++ backend_csf = csf_info->backend; ++ ++ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) ++ /* Submit the threshold work into the work queue to consume the ++ * available samples. ++ */ ++ queue_work(backend_csf->hwc_dump_workq, &backend_csf->hwc_threshold_work); +} + -+void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst, -+ const struct kbase_hwcnt_dump_buffer *src, -+ const struct kbase_hwcnt_enable_map *dst_enable_map) ++void kbase_hwcnt_backend_csf_on_prfcnt_overflow(struct kbase_hwcnt_backend_interface *iface) +{ -+ const struct kbase_hwcnt_metadata *metadata; -+ size_t grp, blk, blk_inst; -+ size_t clk; ++ struct kbase_hwcnt_backend_csf_info *csf_info; + -+ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || -+ WARN_ON(dst->metadata != src->metadata) || -+ WARN_ON(dst->metadata != dst_enable_map->metadata)) ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; ++ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); ++ ++ /* Early out if the backend does not exist. */ ++ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) + return; + -+ metadata = dst->metadata; ++ /* Called when an overflow occurs. We treat this as a recoverable error, ++ * so we start transitioning to the disabled state. ++ * We could try and handle it while enabled, but in a real system we ++ * never expect an overflow to occur so there is no point implementing ++ * complex recovery code when we can just turn ourselves off instead for ++ * a while. ++ */ ++ kbasep_hwcnt_backend_csf_handle_recoverable_error(csf_info->backend); ++} + -+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) -+ { -+ u64 *dst_blk; -+ const u64 *src_blk; -+ size_t hdr_cnt; -+ size_t ctr_cnt; ++void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interface *iface) ++{ ++ struct kbase_hwcnt_backend_csf_info *csf_info; ++ struct kbase_hwcnt_backend_csf *backend_csf; + -+ if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) -+ continue; ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; ++ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + -+ dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); -+ src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); -+ hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); -+ ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); ++ /* Early out if the backend does not exist. */ ++ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) ++ return; ++ backend_csf = csf_info->backend; + -+ kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, hdr_cnt, ctr_cnt); ++ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, KBASE_HWCNT_BACKEND_CSF_ENABLED); ++ } else if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) { ++ /* Unexpected, but we are already in the right state so just ++ * ignore it. ++ */ ++ } else { ++ /* Unexpected state change, assume everything is broken until ++ * we reset. ++ */ ++ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); + } ++} + -+ kbase_hwcnt_metadata_for_each_clock(metadata, clk) -+ { -+ if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) -+ dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk]; ++void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface) ++{ ++ struct kbase_hwcnt_backend_csf_info *csf_info; ++ struct kbase_hwcnt_backend_csf *backend_csf; ++ ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; ++ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); ++ ++ /* Early out if the backend does not exist. */ ++ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) ++ return; ++ backend_csf = csf_info->backend; ++ ++ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED) { ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER); ++ } else if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED) { ++ /* Unexpected, but we are already in the right state so just ++ * ignore it. ++ */ ++ } else { ++ /* Unexpected state change, assume everything is broken until ++ * we reset. ++ */ ++ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); + } +} + -+void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *dst, -+ const struct kbase_hwcnt_dump_buffer *src, -+ const struct kbase_hwcnt_enable_map *dst_enable_map) ++int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *iface) +{ -+ const struct kbase_hwcnt_metadata *metadata; -+ size_t grp, blk, blk_inst; -+ size_t clk; ++ struct kbase_hwcnt_backend_csf_info *csf_info; ++ struct kbase_hwcnt_gpu_info gpu_info; + -+ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || -+ WARN_ON(dst->metadata != src->metadata) || -+ WARN_ON(dst->metadata != dst_enable_map->metadata)) -+ return; ++ if (!iface) ++ return -EINVAL; + -+ metadata = dst->metadata; ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + -+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) -+ { -+ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); -+ const u64 *src_blk = -+ kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); -+ const u64 *blk_em = -+ kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst); -+ size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); -+ size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); -+ /* Align upwards to include padding bytes */ -+ ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS( -+ hdr_cnt + ctr_cnt, -+ (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES) - hdr_cnt); ++ WARN_ON(!csf_info->csf_if->get_prfcnt_info); + -+ kbase_hwcnt_dump_buffer_block_accumulate_strict(dst_blk, src_blk, blk_em, hdr_cnt, -+ ctr_cnt); -+ } ++ csf_info->csf_if->get_prfcnt_info(csf_info->csf_if->ctx, &csf_info->prfcnt_info); + -+ kbase_hwcnt_metadata_for_each_clock(metadata, clk) -+ { -+ if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) -+ dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk]; -+ else -+ dst->clk_cnt_buf[clk] = 0; ++ /* The clock domain counts should not exceed the number of maximum ++ * number of clock regulators. ++ */ ++ if (csf_info->prfcnt_info.clk_cnt > BASE_MAX_NR_CLOCKS_REGULATORS) ++ return -EIO; ++ ++ gpu_info.l2_count = csf_info->prfcnt_info.l2_count; ++ gpu_info.core_mask = csf_info->prfcnt_info.core_mask; ++ gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt; ++ gpu_info.prfcnt_values_per_block = ++ csf_info->prfcnt_info.prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES; ++ return kbase_hwcnt_csf_metadata_create(&gpu_info, csf_info->counter_set, ++ &csf_info->metadata); ++} ++ ++void kbase_hwcnt_backend_csf_metadata_term(struct kbase_hwcnt_backend_interface *iface) ++{ ++ struct kbase_hwcnt_backend_csf_info *csf_info; ++ ++ if (!iface) ++ return; ++ ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; ++ if (csf_info->metadata) { ++ kbase_hwcnt_csf_metadata_destroy(csf_info->metadata); ++ csf_info->metadata = NULL; + } +} -diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h ++ ++int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, ++ struct kbase_hwcnt_watchdog_interface *watchdog_if, ++ struct kbase_hwcnt_backend_interface *iface) ++{ ++ int errcode; ++ const struct kbase_hwcnt_backend_csf_info *info = NULL; ++ ++ if (!iface || !csf_if || !watchdog_if) ++ return -EINVAL; ++ ++ /* The buffer count must be power of 2 */ ++ if (!is_power_of_2(ring_buf_cnt)) ++ return -EINVAL; ++ ++ errcode = kbasep_hwcnt_backend_csf_info_create(csf_if, ring_buf_cnt, watchdog_if, &info); ++ if (errcode) ++ return errcode; ++ ++ iface->info = (struct kbase_hwcnt_backend_info *)info; ++ iface->metadata = kbasep_hwcnt_backend_csf_metadata; ++ iface->init = kbasep_hwcnt_backend_csf_init; ++ iface->term = kbasep_hwcnt_backend_csf_term; ++ iface->timestamp_ns = kbasep_hwcnt_backend_csf_timestamp_ns; ++ iface->dump_enable = kbasep_hwcnt_backend_csf_dump_enable; ++ iface->dump_enable_nolock = kbasep_hwcnt_backend_csf_dump_enable_nolock; ++ iface->dump_disable = kbasep_hwcnt_backend_csf_dump_disable; ++ iface->dump_clear = kbasep_hwcnt_backend_csf_dump_clear; ++ iface->dump_request = kbasep_hwcnt_backend_csf_dump_request; ++ iface->dump_wait = kbasep_hwcnt_backend_csf_dump_wait; ++ iface->dump_get = kbasep_hwcnt_backend_csf_dump_get; ++ ++ return 0; ++} ++ ++void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface) ++{ ++ if (!iface) ++ return; ++ ++ kbasep_hwcnt_backend_csf_info_destroy( ++ (const struct kbase_hwcnt_backend_csf_info *)iface->info); ++ memset(iface, 0, sizeof(*iface)); ++} +diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h new file mode 100644 -index 000000000..5c5ada401 +index 000000000..9c5a5c996 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h -@@ -0,0 +1,1231 @@ ++++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h +@@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -191187,1225 +193781,1303 @@ index 000000000..5c5ada401 + */ + +/* -+ * Hardware counter types. -+ * Contains structures for describing the physical layout of hardware counter -+ * dump buffers and enable maps within a system. -+ * -+ * Also contains helper functions for manipulation of these dump buffers and -+ * enable maps. -+ * -+ * Through use of these structures and functions, hardware counters can be -+ * enabled, copied, accumulated, and generally manipulated in a generic way, -+ * regardless of the physical counter dump layout. -+ * -+ * Terminology: -+ * -+ * Hardware Counter System: -+ * A collection of hardware counter groups, making a full hardware counter -+ * system. -+ * Hardware Counter Group: -+ * A group of Hardware Counter Blocks (e.g. a t62x might have more than one -+ * core group, so has one counter group per core group, where each group -+ * may have a different number and layout of counter blocks). -+ * Hardware Counter Block: -+ * A block of hardware counters (e.g. shader block, tiler block). -+ * Hardware Counter Block Instance: -+ * An instance of a Hardware Counter Block (e.g. an MP4 GPU might have -+ * 4 shader block instances). -+ * -+ * Block Header: -+ * A header value inside a counter block. Headers don't count anything, -+ * so it is only valid to copy or zero them. Headers are always the first -+ * values in the block. -+ * Block Counter: -+ * A counter value inside a counter block. Counters can be zeroed, copied, -+ * or accumulated. Counters are always immediately after the headers in the -+ * block. -+ * Block Value: -+ * A catch-all term for block headers and block counters. -+ * -+ * Enable Map: -+ * An array of u64 bitfields, where each bit either enables exactly one -+ * block value, or is unused (padding). -+ * Dump Buffer: -+ * An array of u64 values, where each u64 corresponds either to one block -+ * value, or is unused (padding). -+ * Availability Mask: -+ * A bitfield, where each bit corresponds to whether a block instance is -+ * physically available (e.g. an MP3 GPU may have a sparse core mask of -+ * 0b1011, meaning it only has 3 cores but for hardware counter dumps has the -+ * same dump buffer layout as an MP4 GPU with a core mask of 0b1111. In this -+ * case, the availability mask might be 0b1011111 (the exact layout will -+ * depend on the specific hardware architecture), with the 3 extra early bits -+ * corresponding to other block instances in the hardware counter system). -+ * Metadata: -+ * Structure describing the physical layout of the enable map and dump buffers -+ * for a specific hardware counter system. -+ * -+ */ -+ -+#ifndef _KBASE_HWCNT_TYPES_H_ -+#define _KBASE_HWCNT_TYPES_H_ -+ -+#include -+#include -+#include -+#include -+#include -+ -+/* Number of bytes in each bitfield */ -+#define KBASE_HWCNT_BITFIELD_BYTES (sizeof(u64)) -+ -+/* Number of bits in each bitfield */ -+#define KBASE_HWCNT_BITFIELD_BITS (KBASE_HWCNT_BITFIELD_BYTES * BITS_PER_BYTE) -+ -+/* Number of bytes for each counter value. -+ * Use 64-bit per counter in driver to avoid HW 32-bit register values -+ * overflow after a long time accumulation. ++ * Concrete implementation of mali_kbase_hwcnt_backend interface for CSF ++ * backend. + */ -+#define KBASE_HWCNT_VALUE_BYTES (sizeof(u64)) + -+/* Number of bits in an availability mask (i.e. max total number of block -+ * instances supported in a Hardware Counter System) -+ */ -+#define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE) ++#ifndef _KBASE_HWCNT_BACKEND_CSF_H_ ++#define _KBASE_HWCNT_BACKEND_CSF_H_ + -+/* Minimum alignment of each block of hardware counters */ -+#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES) ++#include "hwcnt/backend/mali_kbase_hwcnt_backend.h" ++#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h" ++#include "hwcnt/mali_kbase_hwcnt_watchdog_if.h" + +/** -+ * KBASE_HWCNT_ALIGN_UPWARDS() - Calculate next aligned value. -+ * @value: The value to align upwards. -+ * @alignment: The alignment boundary. ++ * kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend ++ * interface. ++ * @csf_if: Non-NULL pointer to a hwcnt backend CSF interface structure ++ * used to create backend interface. ++ * @ring_buf_cnt: The buffer count of CSF hwcnt backend, used when allocate ring ++ * buffer, MUST be power of 2. ++ * @watchdog_if: Non-NULL pointer to a hwcnt watchdog interface structure used ++ * to create backend interface. ++ * @iface: Non-NULL pointer to backend interface structure that is filled ++ * in on creation success. + * -+ * Return: Input value if already aligned to the specified boundary, or next -+ * (incrementing upwards) aligned value. ++ * Calls to iface->dump_enable_nolock() require the CSF Scheduler IRQ lock. ++ * ++ * Return: 0 on success, else error code. + */ -+#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \ -+ (value + ((alignment - (value % alignment)) % alignment)) ++int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, ++ struct kbase_hwcnt_watchdog_interface *watchdog_if, ++ struct kbase_hwcnt_backend_interface *iface); + +/** -+ * struct kbase_hwcnt_block_description - Description of one or more identical, -+ * contiguous, Hardware Counter Blocks. -+ * @type: The arbitrary identifier used to identify the type of the block. -+ * @inst_cnt: The number of Instances of the block. -+ * @hdr_cnt: The number of 64-bit Block Headers in the block. -+ * @ctr_cnt: The number of 64-bit Block Counters in the block. ++ * kbase_hwcnt_backend_csf_metadata_init() - Initialize the metadata for a CSF ++ * hardware counter backend. ++ * @iface: Non-NULL pointer to backend interface structure ++ * Return: 0 on success, else error code. + */ -+struct kbase_hwcnt_block_description { -+ u64 type; -+ size_t inst_cnt; -+ size_t hdr_cnt; -+ size_t ctr_cnt; -+}; ++int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *iface); + +/** -+ * struct kbase_hwcnt_group_description - Description of one or more identical, -+ * contiguous Hardware Counter Groups. -+ * @type: The arbitrary identifier used to identify the type of the group. -+ * @blk_cnt: The number of types of Hardware Counter Block in the group. -+ * @blks: Non-NULL pointer to an array of blk_cnt block descriptions, -+ * describing each type of Hardware Counter Block in the group. ++ * kbase_hwcnt_backend_csf_metadata_term() - Terminate the metadata for a CSF ++ * hardware counter backend. ++ * @iface: Non-NULL pointer to backend interface structure. + */ -+struct kbase_hwcnt_group_description { -+ u64 type; -+ size_t blk_cnt; -+ const struct kbase_hwcnt_block_description *blks; -+}; ++void kbase_hwcnt_backend_csf_metadata_term(struct kbase_hwcnt_backend_interface *iface); + +/** -+ * struct kbase_hwcnt_description - Description of a Hardware Counter System. -+ * @grp_cnt: The number of Hardware Counter Groups. -+ * @grps: Non-NULL pointer to an array of grp_cnt group descriptions, -+ * describing each Hardware Counter Group in the system. -+ * @avail_mask: Flat Availability Mask for all block instances in the system. -+ * @clk_cnt: The number of clock domains in the system. The maximum is 64. ++ * kbase_hwcnt_backend_csf_destroy() - Destroy a CSF hardware counter backend ++ * interface. ++ * @iface: Pointer to interface to destroy. ++ * ++ * Can be safely called on an all-zeroed interface, or on an already destroyed ++ * interface. + */ -+struct kbase_hwcnt_description { -+ size_t grp_cnt; -+ const struct kbase_hwcnt_group_description *grps; -+ u64 avail_mask; -+ u8 clk_cnt; -+}; ++void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface); + +/** -+ * struct kbase_hwcnt_block_metadata - Metadata describing the physical layout -+ * of a block in a Hardware Counter System's -+ * Dump Buffers and Enable Maps. -+ * @type: The arbitrary identifier used to identify the type of the -+ * block. -+ * @inst_cnt: The number of Instances of the block. -+ * @hdr_cnt: The number of 64-bit Block Headers in the block. -+ * @ctr_cnt: The number of 64-bit Block Counters in the block. -+ * @enable_map_index: Index in u64s into the parent's Enable Map where the -+ * Enable Map bitfields of the Block Instances described by -+ * this metadata start. -+ * @enable_map_stride: Stride in u64s between the Enable Maps of each of the -+ * Block Instances described by this metadata. -+ * @dump_buf_index: Index in u64s into the parent's Dump Buffer where the -+ * Dump Buffers of the Block Instances described by this -+ * metadata start. -+ * @dump_buf_stride: Stride in u64s between the Dump Buffers of each of the -+ * Block Instances described by this metadata. -+ * @avail_mask_index: Index in bits into the parent's Availability Mask where -+ * the Availability Masks of the Block Instances described -+ * by this metadata start. ++ * kbase_hwcnt_backend_csf_protm_entered() - CSF HWC backend function to receive ++ * notification that protected mode ++ * has been entered. ++ * @iface: Non-NULL pointer to HWC backend interface. + */ -+struct kbase_hwcnt_block_metadata { -+ u64 type; -+ size_t inst_cnt; -+ size_t hdr_cnt; -+ size_t ctr_cnt; -+ size_t enable_map_index; -+ size_t enable_map_stride; -+ size_t dump_buf_index; -+ size_t dump_buf_stride; -+ size_t avail_mask_index; -+}; ++void kbase_hwcnt_backend_csf_protm_entered(struct kbase_hwcnt_backend_interface *iface); + +/** -+ * struct kbase_hwcnt_group_metadata - Metadata describing the physical layout -+ * of a group of blocks in a Hardware -+ * Counter System's Dump Buffers and Enable -+ * Maps. -+ * @type: The arbitrary identifier used to identify the type of the -+ * group. -+ * @blk_cnt: The number of types of Hardware Counter Block in the -+ * group. -+ * @blk_metadata: Non-NULL pointer to an array of blk_cnt block metadata, -+ * describing the physical layout of each type of Hardware -+ * Counter Block in the group. -+ * @enable_map_index: Index in u64s into the parent's Enable Map where the -+ * Enable Maps of the blocks within the group described by -+ * this metadata start. -+ * @dump_buf_index: Index in u64s into the parent's Dump Buffer where the -+ * Dump Buffers of the blocks within the group described by -+ * metadata start. -+ * @avail_mask_index: Index in bits into the parent's Availability Mask where -+ * the Availability Masks of the blocks within the group -+ * described by this metadata start. ++ * kbase_hwcnt_backend_csf_protm_exited() - CSF HWC backend function to receive ++ * notification that protected mode has ++ * been exited. ++ * @iface: Non-NULL pointer to HWC backend interface. + */ -+struct kbase_hwcnt_group_metadata { -+ u64 type; -+ size_t blk_cnt; -+ const struct kbase_hwcnt_block_metadata *blk_metadata; -+ size_t enable_map_index; -+ size_t dump_buf_index; -+ size_t avail_mask_index; -+}; ++void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *iface); + +/** -+ * struct kbase_hwcnt_metadata - Metadata describing the memory layout -+ * of Dump Buffers and Enable Maps within a -+ * Hardware Counter System. -+ * @grp_cnt: The number of Hardware Counter Groups. -+ * @grp_metadata: Non-NULL pointer to an array of grp_cnt group metadata, -+ * describing the physical layout of each Hardware Counter -+ * Group in the system. -+ * @enable_map_bytes: The size in bytes of an Enable Map needed for the system. -+ * @dump_buf_bytes: The size in bytes of a Dump Buffer needed for the system. -+ * @avail_mask: The Availability Mask for the system. -+ * @clk_cnt: The number of clock domains in the system. ++ * kbase_hwcnt_backend_csf_on_unrecoverable_error() - CSF HWC backend function ++ * called when unrecoverable ++ * errors are detected. ++ * @iface: Non-NULL pointer to HWC backend interface. ++ * ++ * This should be called on encountering errors that can only be recovered from ++ * with reset, or that may put HWC logic in state that could result in hang. For ++ * example, on bus error, or when FW becomes unresponsive. + */ -+struct kbase_hwcnt_metadata { -+ size_t grp_cnt; -+ const struct kbase_hwcnt_group_metadata *grp_metadata; -+ size_t enable_map_bytes; -+ size_t dump_buf_bytes; -+ u64 avail_mask; -+ u8 clk_cnt; -+}; ++void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface); + +/** -+ * struct kbase_hwcnt_enable_map - Hardware Counter Enable Map. Array of u64 -+ * bitfields. -+ * @metadata: Non-NULL pointer to metadata used to identify, and to describe -+ * the layout of the enable map. -+ * @hwcnt_enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an -+ * array of u64 bitfields, each bit of which enables one hardware -+ * counter. -+ * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle -+ * counter for a given clock domain. ++ * kbase_hwcnt_backend_csf_on_before_reset() - CSF HWC backend function to be ++ * called immediately before a ++ * reset. Takes us out of the ++ * unrecoverable error state, if we ++ * were in it. ++ * @iface: Non-NULL pointer to HWC backend interface. + */ -+struct kbase_hwcnt_enable_map { -+ const struct kbase_hwcnt_metadata *metadata; -+ u64 *hwcnt_enable_map; -+ u64 clk_enable_map; -+}; ++void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface); + +/** -+ * struct kbase_hwcnt_dump_buffer - Hardware Counter Dump Buffer. -+ * @metadata: Non-NULL pointer to metadata used to identify, and to describe -+ * the layout of the Dump Buffer. -+ * @dump_buf: Non-NULL pointer to an array of u64 values, the array size is -+ * metadata->dump_buf_bytes. -+ * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed -+ * for each clock domain. ++ * kbase_hwcnt_backend_csf_on_prfcnt_sample() - CSF performance counter sample ++ * complete interrupt handler. ++ * @iface: Non-NULL pointer to HWC backend interface. + */ -+struct kbase_hwcnt_dump_buffer { -+ const struct kbase_hwcnt_metadata *metadata; -+ u64 *dump_buf; -+ u64 *clk_cnt_buf; -+}; ++void kbase_hwcnt_backend_csf_on_prfcnt_sample(struct kbase_hwcnt_backend_interface *iface); + +/** -+ * struct kbase_hwcnt_dump_buffer_array - Hardware Counter Dump Buffer array. -+ * @page_addr: Address of allocated pages. A single allocation is used for all -+ * Dump Buffers in the array. -+ * @page_order: The allocation order of the pages, the order is on a logarithmic -+ * scale. -+ * @buf_cnt: The number of allocated Dump Buffers. -+ * @bufs: Non-NULL pointer to the array of Dump Buffers. ++ * kbase_hwcnt_backend_csf_on_prfcnt_threshold() - CSF performance counter ++ * buffer reach threshold ++ * interrupt handler. ++ * @iface: Non-NULL pointer to HWC backend interface. + */ -+struct kbase_hwcnt_dump_buffer_array { -+ unsigned long page_addr; -+ unsigned int page_order; -+ size_t buf_cnt; -+ struct kbase_hwcnt_dump_buffer *bufs; -+}; ++void kbase_hwcnt_backend_csf_on_prfcnt_threshold(struct kbase_hwcnt_backend_interface *iface); + +/** -+ * kbase_hwcnt_metadata_create() - Create a hardware counter metadata object -+ * from a description. -+ * @desc: Non-NULL pointer to a hardware counter description. -+ * @metadata: Non-NULL pointer to where created metadata will be stored on -+ * success. -+ * -+ * Return: 0 on success, else error code. ++ * kbase_hwcnt_backend_csf_on_prfcnt_overflow() - CSF performance counter buffer ++ * overflow interrupt handler. ++ * @iface: Non-NULL pointer to HWC backend interface. + */ -+int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc, -+ const struct kbase_hwcnt_metadata **metadata); ++void kbase_hwcnt_backend_csf_on_prfcnt_overflow(struct kbase_hwcnt_backend_interface *iface); + +/** -+ * kbase_hwcnt_metadata_destroy() - Destroy a hardware counter metadata object. -+ * @metadata: Pointer to hardware counter metadata ++ * kbase_hwcnt_backend_csf_on_prfcnt_enable() - CSF performance counter enabled ++ * interrupt handler. ++ * @iface: Non-NULL pointer to HWC backend interface. + */ -+void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); ++void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interface *iface); + +/** -+ * kbase_hwcnt_metadata_group_count() - Get the number of groups. -+ * @metadata: Non-NULL pointer to metadata. -+ * -+ * Return: Number of hardware counter groups described by metadata. ++ * kbase_hwcnt_backend_csf_on_prfcnt_disable() - CSF performance counter ++ * disabled interrupt handler. ++ * @iface: Non-NULL pointer to HWC backend interface. + */ -+static inline size_t kbase_hwcnt_metadata_group_count(const struct kbase_hwcnt_metadata *metadata) -+{ -+ if (WARN_ON(!metadata)) -+ return 0; -+ -+ return metadata->grp_cnt; -+} ++void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface); + -+/** -+ * kbase_hwcnt_metadata_group_type() - Get the arbitrary type of a group. -+ * @metadata: Non-NULL pointer to metadata. -+ * @grp: Index of the group in the metadata. ++#endif /* _KBASE_HWCNT_BACKEND_CSF_H_ */ +diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h +new file mode 100644 +index 000000000..382a3adaa +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h +@@ -0,0 +1,302 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * Return: Type of the group grp. -+ */ -+static inline u64 kbase_hwcnt_metadata_group_type(const struct kbase_hwcnt_metadata *metadata, -+ size_t grp) -+{ -+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt)) -+ return 0; -+ -+ return metadata->grp_metadata[grp].type; -+} -+ -+/** -+ * kbase_hwcnt_metadata_block_count() - Get the number of blocks in a group. -+ * @metadata: Non-NULL pointer to metadata. -+ * @grp: Index of the group in the metadata. ++ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: Number of blocks in group grp. + */ -+static inline size_t kbase_hwcnt_metadata_block_count(const struct kbase_hwcnt_metadata *metadata, -+ size_t grp) -+{ -+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt)) -+ return 0; + -+ return metadata->grp_metadata[grp].blk_cnt; -+} -+ -+/** -+ * kbase_hwcnt_metadata_block_type() - Get the arbitrary type of a block. -+ * @metadata: Non-NULL pointer to metadata. -+ * @grp: Index of the group in the metadata. -+ * @blk: Index of the block in the group. -+ * -+ * Return: Type of the block blk in group grp. ++/* ++ * Virtual interface for CSF hardware counter backend. + */ -+static inline u64 kbase_hwcnt_metadata_block_type(const struct kbase_hwcnt_metadata *metadata, -+ size_t grp, size_t blk) -+{ -+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || -+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) -+ return 0; + -+ return metadata->grp_metadata[grp].blk_metadata[blk].type; -+} ++#ifndef _KBASE_HWCNT_BACKEND_CSF_IF_H_ ++#define _KBASE_HWCNT_BACKEND_CSF_IF_H_ + -+/** -+ * kbase_hwcnt_metadata_block_instance_count() - Get the number of instances of -+ * a block. -+ * @metadata: Non-NULL pointer to metadata. -+ * @grp: Index of the group in the metadata. -+ * @blk: Index of the block in the group. -+ * -+ * Return: Number of instances of block blk in group grp. -+ */ -+static inline size_t -+kbase_hwcnt_metadata_block_instance_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, -+ size_t blk) -+{ -+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || -+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) -+ return 0; ++#include + -+ return metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt; -+} ++struct kbase_hwcnt_backend_csf_if_ctx; ++ ++struct kbase_hwcnt_backend_csf_if_ring_buf; + +/** -+ * kbase_hwcnt_metadata_block_headers_count() - Get the number of counter -+ * headers. -+ * @metadata: Non-NULL pointer to metadata. -+ * @grp: Index of the group in the metadata. -+ * @blk: Index of the block in the group. -+ * -+ * Return: Number of counter headers in each instance of block blk in group grp. ++ * struct kbase_hwcnt_backend_csf_if_enable - enable hardware counter collection ++ * structure. ++ * @fe_bm: Front End counters selection bitmask. ++ * @shader_bm: Shader counters selection bitmask. ++ * @tiler_bm: Tiler counters selection bitmask. ++ * @mmu_l2_bm: MMU_L2 counters selection bitmask. ++ * @counter_set: The performance counter set to enable. ++ * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle ++ * counter for a given clock domain. + */ -+static inline size_t -+kbase_hwcnt_metadata_block_headers_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, -+ size_t blk) -+{ -+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || -+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) -+ return 0; ++struct kbase_hwcnt_backend_csf_if_enable { ++ u32 fe_bm; ++ u32 shader_bm; ++ u32 tiler_bm; ++ u32 mmu_l2_bm; ++ u8 counter_set; ++ u64 clk_enable_map; ++}; + -+ return metadata->grp_metadata[grp].blk_metadata[blk].hdr_cnt; -+} ++/** ++ * struct kbase_hwcnt_backend_csf_if_prfcnt_info - Performance counter ++ * information. ++ * @prfcnt_hw_size: Total length in bytes of all the hardware counters data. The hardware ++ * counters are sub-divided into 4 classes: front-end, shader, tiler, and ++ * memory system (l2 cache + MMU). ++ * @prfcnt_fw_size: Total length in bytes of all the firmware counters data. ++ * @dump_bytes: Bytes of GPU memory required to perform a performance ++ * counter dump. dump_bytes = prfcnt_hw_size + prfcnt_fw_size. ++ * @prfcnt_block_size: Bytes of each performance counter block. ++ * @l2_count: The MMU L2 cache count. ++ * @core_mask: Shader core mask. ++ * @clk_cnt: Clock domain count in the system. ++ * @clearing_samples: Indicates whether counters are cleared after each sample ++ * is taken. ++ */ ++struct kbase_hwcnt_backend_csf_if_prfcnt_info { ++ size_t prfcnt_hw_size; ++ size_t prfcnt_fw_size; ++ size_t dump_bytes; ++ size_t prfcnt_block_size; ++ size_t l2_count; ++ u64 core_mask; ++ u8 clk_cnt; ++ bool clearing_samples; ++}; + +/** -+ * kbase_hwcnt_metadata_block_counters_count() - Get the number of counters. -+ * @metadata: Non-NULL pointer to metadata. -+ * @grp: Index of the group in the metadata. -+ * @blk: Index of the block in the group. -+ * -+ * Return: Number of counters in each instance of block blk in group grp. ++ * typedef kbase_hwcnt_backend_csf_if_assert_lock_held_fn - Assert that the ++ * backend spinlock is ++ * held. ++ * @ctx: Non-NULL pointer to a CSF context. + */ -+static inline size_t -+kbase_hwcnt_metadata_block_counters_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, -+ size_t blk) -+{ -+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || -+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) -+ return 0; ++typedef void ++kbase_hwcnt_backend_csf_if_assert_lock_held_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); + -+ return metadata->grp_metadata[grp].blk_metadata[blk].ctr_cnt; -+} ++/** ++ * typedef kbase_hwcnt_backend_csf_if_lock_fn - Acquire backend spinlock. ++ * ++ * @ctx: Non-NULL pointer to a CSF context. ++ * @flags: Pointer to the memory location that would store the previous ++ * interrupt state. ++ */ ++typedef void kbase_hwcnt_backend_csf_if_lock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ unsigned long *flags); + +/** -+ * kbase_hwcnt_metadata_block_enable_map_stride() - Get the enable map stride. -+ * @metadata: Non-NULL pointer to metadata. -+ * @grp: Index of the group in the metadata. -+ * @blk: Index of the block in the group. ++ * typedef kbase_hwcnt_backend_csf_if_unlock_fn - Release backend spinlock. + * -+ * Return: enable map stride in each instance of block blk in group grp. ++ * @ctx: Non-NULL pointer to a CSF context. ++ * @flags: Previously stored interrupt state when Scheduler interrupt ++ * spinlock was acquired. + */ -+static inline size_t -+kbase_hwcnt_metadata_block_enable_map_stride(const struct kbase_hwcnt_metadata *metadata, -+ size_t grp, size_t blk) -+{ -+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || -+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) -+ return 0; ++typedef void kbase_hwcnt_backend_csf_if_unlock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ unsigned long flags); + -+ return metadata->grp_metadata[grp].blk_metadata[blk].enable_map_stride; -+} ++/** ++ * typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance ++ * counter information. ++ * @ctx: Non-NULL pointer to a CSF context. ++ * @prfcnt_info: Non-NULL pointer to struct where performance counter ++ * information should be stored. ++ */ ++typedef void kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info); + +/** -+ * kbase_hwcnt_metadata_block_values_count() - Get the number of values. -+ * @metadata: Non-NULL pointer to metadata. -+ * @grp: Index of the group in the metadata. -+ * @blk: Index of the block in the group. ++ * typedef kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn - Allocate a ring buffer ++ * for CSF interface. ++ * @ctx: Non-NULL pointer to a CSF context. ++ * @buf_count: The buffer count in the ring buffer to be allocated, ++ * MUST be power of 2. ++ * @cpu_dump_base: Non-NULL pointer to where ring buffer CPU base address is ++ * stored when success. ++ * @ring_buf: Non-NULL pointer to where ring buffer is stored when success. + * -+ * Return: Number of headers plus counters in each instance of block blk -+ * in group grp. ++ * A ring buffer is needed by the CSF interface to do manual HWC sample and ++ * automatic HWC samples, the buffer count in the ring buffer MUST be power ++ * of 2 to meet the hardware requirement. ++ * ++ * Return: 0 on success, else error code. + */ -+static inline size_t -+kbase_hwcnt_metadata_block_values_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, -+ size_t blk) -+{ -+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || -+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) -+ return 0; -+ -+ return kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) + -+ kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); -+} ++typedef int ++kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ u32 buf_count, void **cpu_dump_base, ++ struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf); + +/** -+ * kbase_hwcnt_metadata_for_each_block() - Iterate over each block instance in -+ * the metadata. -+ * @md: Non-NULL pointer to metadata. -+ * @grp: size_t variable used as group iterator. -+ * @blk: size_t variable used as block iterator. -+ * @blk_inst: size_t variable used as block instance iterator. ++ * typedef kbase_hwcnt_backend_csf_if_ring_buf_sync_fn - Sync HWC dump buffers ++ * memory. ++ * @ctx: Non-NULL pointer to a CSF context. ++ * @ring_buf: Non-NULL pointer to the ring buffer. ++ * @buf_index_first: The first buffer index in the ring buffer to be synced, ++ * inclusive. ++ * @buf_index_last: The last buffer index in the ring buffer to be synced, ++ * exclusive. ++ * @for_cpu: The direction of sync to be applied, set to true when CPU ++ * cache needs invalidating before reading the buffer, and set ++ * to false after CPU writes to flush these before this memory ++ * is overwritten by the GPU. + * -+ * Iteration order is group, then block, then block instance (i.e. linearly -+ * through memory). ++ * Flush cached HWC dump buffer data to ensure that all writes from GPU and CPU ++ * are correctly observed. + */ -+#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst) \ -+ for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++) \ -+ for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++) \ -+ for ((blk_inst) = 0; \ -+ (blk_inst) < \ -+ kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk)); \ -+ (blk_inst)++) ++typedef void ++kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, ++ u32 buf_index_first, u32 buf_index_last, bool for_cpu); + +/** -+ * kbase_hwcnt_metadata_block_avail_bit() - Get the bit index into the avail -+ * mask corresponding to the block. -+ * @metadata: Non-NULL pointer to metadata. -+ * @grp: Index of the group in the metadata. -+ * @blk: Index of the block in the group. ++ * typedef kbase_hwcnt_backend_csf_if_ring_buf_free_fn - Free a ring buffer for ++ * the CSF interface. + * -+ * Return: The bit index into the avail mask for the block. ++ * @ctx: Non-NULL pointer to a CSF interface context. ++ * @ring_buf: Non-NULL pointer to the ring buffer which to be freed. + */ -+static inline size_t -+kbase_hwcnt_metadata_block_avail_bit(const struct kbase_hwcnt_metadata *metadata, size_t grp, -+ size_t blk) -+{ -+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || -+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) -+ return 0; ++typedef void ++kbase_hwcnt_backend_csf_if_ring_buf_free_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf); + -+ return metadata->grp_metadata[grp].avail_mask_index + -+ metadata->grp_metadata[grp].blk_metadata[blk].avail_mask_index; -+} ++/** ++ * typedef kbase_hwcnt_backend_csf_if_timestamp_ns_fn - Get the current ++ * timestamp of the CSF ++ * interface. ++ * @ctx: Non-NULL pointer to a CSF interface context. ++ * ++ * Return: CSF interface timestamp in nanoseconds. ++ */ ++typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); + +/** -+ * kbase_hwcnt_metadata_block_instance_avail() - Check if a block instance is -+ * available. -+ * @metadata: Non-NULL pointer to metadata. -+ * @grp: Index of the group in the metadata. -+ * @blk: Index of the block in the group. -+ * @blk_inst: Index of the block instance in the block. ++ * typedef kbase_hwcnt_backend_csf_if_dump_enable_fn - Setup and enable hardware ++ * counter in CSF interface. ++ * @ctx: Non-NULL pointer to a CSF interface context. ++ * @ring_buf: Non-NULL pointer to the ring buffer which used to setup the HWC. ++ * @enable: Non-NULL pointer to the enable map of HWC. + * -+ * Return: true if the block instance is available, else false. ++ * Requires lock to be taken before calling. + */ -+static inline bool -+kbase_hwcnt_metadata_block_instance_avail(const struct kbase_hwcnt_metadata *metadata, size_t grp, -+ size_t blk, size_t blk_inst) -+{ -+ size_t bit; -+ u64 mask; ++typedef void ++kbase_hwcnt_backend_csf_if_dump_enable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, ++ struct kbase_hwcnt_backend_csf_if_enable *enable); + -+ if (WARN_ON(!metadata)) -+ return false; ++/** ++ * typedef kbase_hwcnt_backend_csf_if_dump_disable_fn - Disable hardware counter ++ * in CSF interface. ++ * @ctx: Non-NULL pointer to a CSF interface context. ++ * ++ * Requires lock to be taken before calling. ++ */ ++typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); + -+ bit = kbase_hwcnt_metadata_block_avail_bit(metadata, grp, blk) + blk_inst; -+ mask = 1ull << bit; ++/** ++ * typedef kbase_hwcnt_backend_csf_if_dump_request_fn - Request a HWC dump. ++ * ++ * @ctx: Non-NULL pointer to the interface context. ++ * ++ * Requires lock to be taken before calling. ++ */ ++typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); + -+ return (metadata->avail_mask & mask) != 0; -+} ++/** ++ * typedef kbase_hwcnt_backend_csf_if_get_indexes_fn - Get current extract and ++ * insert indexes of the ++ * ring buffer. ++ * ++ * @ctx: Non-NULL pointer to a CSF interface context. ++ * @extract_index: Non-NULL pointer where current extract index to be saved. ++ * @insert_index: Non-NULL pointer where current insert index to be saved. ++ * ++ * Requires lock to be taken before calling. ++ */ ++typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ u32 *extract_index, u32 *insert_index); + +/** -+ * kbase_hwcnt_enable_map_alloc() - Allocate an enable map. -+ * @metadata: Non-NULL pointer to metadata describing the system. -+ * @enable_map: Non-NULL pointer to enable map to be initialised. Will be -+ * initialised to all zeroes (i.e. all counters disabled). ++ * typedef kbase_hwcnt_backend_csf_if_set_extract_index_fn - Update the extract ++ * index of the ring ++ * buffer. + * -+ * Return: 0 on success, else error code. ++ * @ctx: Non-NULL pointer to a CSF interface context. ++ * @extract_index: New extract index to be set. ++ * ++ * Requires lock to be taken before calling. + */ -+int kbase_hwcnt_enable_map_alloc(const struct kbase_hwcnt_metadata *metadata, -+ struct kbase_hwcnt_enable_map *enable_map); ++typedef void ++kbase_hwcnt_backend_csf_if_set_extract_index_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ u32 extract_index); + +/** -+ * kbase_hwcnt_enable_map_free() - Free an enable map. -+ * @enable_map: Enable map to be freed. ++ * typedef kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn - Get the current ++ * GPU cycle count. ++ * @ctx: Non-NULL pointer to a CSF interface context. ++ * @cycle_counts: Non-NULL pointer to an array where cycle counts to be saved, ++ * the array size should be at least as big as the number of ++ * clock domains returned by get_prfcnt_info interface. ++ * @clk_enable_map: An array of bitfields, each bit specifies an enabled clock ++ * domain. + * -+ * Can be safely called on an all-zeroed enable map structure, or on an already -+ * freed enable map. ++ * Requires lock to be taken before calling. + */ -+void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map); ++typedef void ++kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ u64 *cycle_counts, u64 clk_enable_map); + +/** -+ * kbase_hwcnt_enable_map_block_instance() - Get the pointer to a block -+ * instance's enable map. -+ * @map: Non-NULL pointer to enable map. -+ * @grp: Index of the group in the metadata. -+ * @blk: Index of the block in the group. -+ * @blk_inst: Index of the block instance in the block. ++ * struct kbase_hwcnt_backend_csf_if - Hardware counter backend CSF virtual ++ * interface. ++ * @ctx: CSF interface context. ++ * @assert_lock_held: Function ptr to assert backend spinlock is held. ++ * @lock: Function ptr to acquire backend spinlock. ++ * @unlock: Function ptr to release backend spinlock. ++ * @get_prfcnt_info: Function ptr to get performance counter related ++ * information. ++ * @ring_buf_alloc: Function ptr to allocate ring buffer for CSF HWC. ++ * @ring_buf_sync: Function ptr to sync ring buffer to CPU. ++ * @ring_buf_free: Function ptr to free ring buffer for CSF HWC. ++ * @timestamp_ns: Function ptr to get the current CSF interface ++ * timestamp. ++ * @dump_enable: Function ptr to enable dumping. ++ * @dump_disable: Function ptr to disable dumping. ++ * @dump_request: Function ptr to request a dump. ++ * @get_indexes: Function ptr to get extract and insert indexes of the ++ * ring buffer. ++ * @set_extract_index: Function ptr to set extract index of ring buffer. ++ * @get_gpu_cycle_count: Function ptr to get the GPU cycle count. ++ */ ++struct kbase_hwcnt_backend_csf_if { ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx; ++ kbase_hwcnt_backend_csf_if_assert_lock_held_fn *assert_lock_held; ++ kbase_hwcnt_backend_csf_if_lock_fn *lock; ++ kbase_hwcnt_backend_csf_if_unlock_fn *unlock; ++ kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn *get_prfcnt_info; ++ kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn *ring_buf_alloc; ++ kbase_hwcnt_backend_csf_if_ring_buf_sync_fn *ring_buf_sync; ++ kbase_hwcnt_backend_csf_if_ring_buf_free_fn *ring_buf_free; ++ kbase_hwcnt_backend_csf_if_timestamp_ns_fn *timestamp_ns; ++ kbase_hwcnt_backend_csf_if_dump_enable_fn *dump_enable; ++ kbase_hwcnt_backend_csf_if_dump_disable_fn *dump_disable; ++ kbase_hwcnt_backend_csf_if_dump_request_fn *dump_request; ++ kbase_hwcnt_backend_csf_if_get_indexes_fn *get_indexes; ++ kbase_hwcnt_backend_csf_if_set_extract_index_fn *set_extract_index; ++ kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn *get_gpu_cycle_count; ++}; ++ ++#endif /* #define _KBASE_HWCNT_BACKEND_CSF_IF_H_ */ +diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c +new file mode 100644 +index 000000000..b11f3a4e5 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c +@@ -0,0 +1,787 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: u64* to the bitfield(s) used as the enable map for the -+ * block instance. + */ -+static inline u64 *kbase_hwcnt_enable_map_block_instance(const struct kbase_hwcnt_enable_map *map, -+ size_t grp, size_t blk, size_t blk_inst) -+{ -+ if (WARN_ON(!map) || WARN_ON(!map->hwcnt_enable_map)) -+ return NULL; + -+ if (WARN_ON(!map->metadata) || WARN_ON(grp >= map->metadata->grp_cnt) || -+ WARN_ON(blk >= map->metadata->grp_metadata[grp].blk_cnt) || -+ WARN_ON(blk_inst >= map->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt)) -+ return map->hwcnt_enable_map; ++/* ++ * CSF GPU HWC backend firmware interface APIs. ++ */ + -+ return map->hwcnt_enable_map + map->metadata->grp_metadata[grp].enable_map_index + -+ map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_index + -+ (map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_stride * blk_inst); -+} ++#include ++#include ++#include ++#include "hwcnt/mali_kbase_hwcnt_gpu.h" ++#include "hwcnt/mali_kbase_hwcnt_types.h" ++#include ++ ++#include "csf/mali_kbase_csf_firmware.h" ++#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h" ++#include "mali_kbase_hwaccess_time.h" ++#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" ++#include ++ ++#include ++#include "mali_kbase_ccswe.h" ++ ++ ++/* Ring buffer virtual address start at 4GB */ ++#define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32) + +/** -+ * kbase_hwcnt_bitfield_count() - Calculate the number of u64 bitfields required -+ * to have at minimum one bit per value. -+ * @val_cnt: Number of values. -+ * -+ * Return: Number of required bitfields. ++ * struct kbase_hwcnt_backend_csf_if_fw_ring_buf - ring buffer for CSF interface ++ * used to save the manual and ++ * auto HWC samples from ++ * firmware. ++ * @gpu_dump_base: Starting GPU base address of the ring buffer. ++ * @cpu_dump_base: Starting CPU address for the mapping. ++ * @buf_count: Buffer count in the ring buffer, MUST be power of 2. ++ * @as_nr: Address space number for the memory mapping. ++ * @phys: Physical memory allocation used by the mapping. ++ * @num_pages: Size of the mapping, in memory pages. + */ -+static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt) -+{ -+ return (val_cnt + KBASE_HWCNT_BITFIELD_BITS - 1) / KBASE_HWCNT_BITFIELD_BITS; -+} ++struct kbase_hwcnt_backend_csf_if_fw_ring_buf { ++ u64 gpu_dump_base; ++ void *cpu_dump_base; ++ size_t buf_count; ++ u32 as_nr; ++ struct tagged_addr *phys; ++ size_t num_pages; ++}; + +/** -+ * kbase_hwcnt_enable_map_block_disable_all() - Disable all values in a block. -+ * @dst: Non-NULL pointer to enable map. -+ * @grp: Index of the group in the metadata. -+ * @blk: Index of the block in the group. -+ * @blk_inst: Index of the block instance in the block. ++ * struct kbase_hwcnt_backend_csf_if_fw_ctx - Firmware context for the CSF ++ * interface, used to communicate ++ * with firmware. ++ * @kbdev: KBase device. ++ * @buf_bytes: The size in bytes for each buffer in the ring buffer. ++ * @clk_cnt: The number of clock domains in the system. ++ * The maximum is 64. ++ * @clk_enable_map: Bitmask of enabled clocks ++ * @rate_listener: Clock rate listener callback state. ++ * @ccswe_shader_cores: Shader cores cycle count software estimator. + */ -+static inline void kbase_hwcnt_enable_map_block_disable_all(struct kbase_hwcnt_enable_map *dst, -+ size_t grp, size_t blk, size_t blk_inst) ++struct kbase_hwcnt_backend_csf_if_fw_ctx { ++ struct kbase_device *kbdev; ++ size_t buf_bytes; ++ u8 clk_cnt; ++ u64 clk_enable_map; ++ struct kbase_clk_rate_listener rate_listener; ++ struct kbase_ccswe ccswe_shader_cores; ++}; ++ ++static void ++kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_if_ctx *ctx) +{ -+ size_t val_cnt; -+ size_t bitfld_cnt; -+ u64 *const block_enable_map = -+ kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst); ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; ++ struct kbase_device *kbdev; + -+ if (WARN_ON(!dst)) -+ return; ++ WARN_ON(!ctx); + -+ val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, grp, blk); -+ bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); ++ fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ kbdev = fw_ctx->kbdev; + -+ memset(block_enable_map, 0, bitfld_cnt * KBASE_HWCNT_BITFIELD_BYTES); ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); +} + -+/** -+ * kbase_hwcnt_enable_map_disable_all() - Disable all values in the enable map. -+ * @dst: Non-NULL pointer to enable map to zero. -+ */ -+static inline void kbase_hwcnt_enable_map_disable_all(struct kbase_hwcnt_enable_map *dst) ++static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ unsigned long *flags) ++ __acquires(&(struct kbase_hwcnt_backend_csf_if_fw_ctx) ++ ctx->kbdev->csf.scheduler.interrupt_lock) +{ -+ if (WARN_ON(!dst) || WARN_ON(!dst->metadata)) -+ return; ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; ++ struct kbase_device *kbdev; + -+ if (dst->hwcnt_enable_map != NULL) -+ memset(dst->hwcnt_enable_map, 0, dst->metadata->enable_map_bytes); ++ WARN_ON(!ctx); + -+ dst->clk_enable_map = 0; ++ fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ kbdev = fw_ctx->kbdev; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, flags); +} + -+/** -+ * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in a block. -+ * @dst: Non-NULL pointer to enable map. -+ * @grp: Index of the group in the metadata. -+ * @blk: Index of the block in the group. -+ * @blk_inst: Index of the block instance in the block. -+ */ -+static inline void kbase_hwcnt_enable_map_block_enable_all(struct kbase_hwcnt_enable_map *dst, -+ size_t grp, size_t blk, size_t blk_inst) ++static void kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ unsigned long flags) ++ __releases(&(struct kbase_hwcnt_backend_csf_if_fw_ctx) ++ ctx->kbdev->csf.scheduler.interrupt_lock) +{ -+ size_t val_cnt; -+ size_t bitfld_cnt; -+ u64 *const block_enable_map = -+ kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst); -+ size_t bitfld_idx; -+ -+ if (WARN_ON(!dst)) -+ return; -+ -+ val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, grp, blk); -+ bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; ++ struct kbase_device *kbdev; + -+ for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { -+ const u64 remaining_values = val_cnt - (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); -+ u64 block_enable_map_mask = U64_MAX; ++ WARN_ON(!ctx); + -+ if (remaining_values < KBASE_HWCNT_BITFIELD_BITS) -+ block_enable_map_mask = (1ull << remaining_values) - 1; ++ fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ kbdev = fw_ctx->kbdev; + -+ block_enable_map[bitfld_idx] = block_enable_map_mask; -+ } ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + +/** -+ * kbase_hwcnt_enable_map_enable_all() - Enable all values in an enable -+ * map. -+ * @dst: Non-NULL pointer to enable map. ++ * kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback ++ * ++ * @rate_listener: Callback state ++ * @clk_index: Clock index ++ * @clk_rate_hz: Clock frequency(hz) + */ -+static inline void kbase_hwcnt_enable_map_enable_all(struct kbase_hwcnt_enable_map *dst) ++static void ++kbasep_hwcnt_backend_csf_if_fw_on_freq_change(struct kbase_clk_rate_listener *rate_listener, ++ u32 clk_index, u32 clk_rate_hz) +{ -+ size_t grp, blk, blk_inst; ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = container_of( ++ rate_listener, struct kbase_hwcnt_backend_csf_if_fw_ctx, rate_listener); ++ u64 timestamp_ns; + -+ if (WARN_ON(!dst) || WARN_ON(!dst->metadata)) ++ if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES) + return; + -+ kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst) -+ kbase_hwcnt_enable_map_block_enable_all(dst, grp, blk, blk_inst); -+ -+ dst->clk_enable_map = (1ull << dst->metadata->clk_cnt) - 1; ++ timestamp_ns = ktime_get_raw_ns(); ++ kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, clk_rate_hz); +} + +/** -+ * kbase_hwcnt_enable_map_copy() - Copy an enable map to another. -+ * @dst: Non-NULL pointer to destination enable map. -+ * @src: Non-NULL pointer to source enable map. ++ * kbasep_hwcnt_backend_csf_if_fw_cc_enable() - Enable cycle count tracking + * -+ * The dst and src MUST have been created from the same metadata. ++ * @fw_ctx: Non-NULL pointer to CSF firmware interface context. ++ * @clk_enable_map: Non-NULL pointer to enable map specifying enabled counters. + */ -+static inline void kbase_hwcnt_enable_map_copy(struct kbase_hwcnt_enable_map *dst, -+ const struct kbase_hwcnt_enable_map *src) ++static void ++kbasep_hwcnt_backend_csf_if_fw_cc_enable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx, ++ u64 clk_enable_map) +{ -+ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst->metadata) || -+ WARN_ON(dst->metadata != src->metadata)) -+ return; ++ struct kbase_device *kbdev = fw_ctx->kbdev; + -+ if (dst->hwcnt_enable_map != NULL) { -+ if (WARN_ON(!src->hwcnt_enable_map)) -+ return; ++ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { ++ /* software estimation for non-top clock domains */ ++ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; ++ const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES]; ++ u32 cur_freq; ++ unsigned long flags; ++ u64 timestamp_ns; + -+ memcpy(dst->hwcnt_enable_map, src->hwcnt_enable_map, -+ dst->metadata->enable_map_bytes); ++ timestamp_ns = ktime_get_raw_ns(); ++ ++ spin_lock_irqsave(&rtm->lock, flags); ++ ++ cur_freq = (u32)clk_data->clock_val; ++ kbase_ccswe_reset(&fw_ctx->ccswe_shader_cores); ++ kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, cur_freq); ++ ++ kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &fw_ctx->rate_listener); ++ ++ spin_unlock_irqrestore(&rtm->lock, flags); + } + -+ dst->clk_enable_map = src->clk_enable_map; ++ fw_ctx->clk_enable_map = clk_enable_map; +} + +/** -+ * kbase_hwcnt_enable_map_union() - Union dst and src enable maps into dst. -+ * @dst: Non-NULL pointer to destination enable map. -+ * @src: Non-NULL pointer to source enable map. ++ * kbasep_hwcnt_backend_csf_if_fw_cc_disable() - Disable cycle count tracking + * -+ * The dst and src MUST have been created from the same metadata. ++ * @fw_ctx: Non-NULL pointer to CSF firmware interface context. + */ -+static inline void kbase_hwcnt_enable_map_union(struct kbase_hwcnt_enable_map *dst, -+ const struct kbase_hwcnt_enable_map *src) ++static void ++kbasep_hwcnt_backend_csf_if_fw_cc_disable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx) +{ -+ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst->metadata) || -+ WARN_ON(dst->metadata != src->metadata)) -+ return; ++ struct kbase_device *kbdev = fw_ctx->kbdev; ++ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; ++ u64 clk_enable_map = fw_ctx->clk_enable_map; + -+ if (dst->hwcnt_enable_map != NULL) { -+ size_t i; -+ size_t const bitfld_count = -+ dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES; ++ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) ++ kbase_clk_rate_trace_manager_unsubscribe(rtm, &fw_ctx->rate_listener); ++} + -+ if (WARN_ON(!src->hwcnt_enable_map)) -+ return; ++static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info) ++{ ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + -+ for (i = 0; i < bitfld_count; i++) -+ dst->hwcnt_enable_map[i] |= src->hwcnt_enable_map[i]; ++ *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){ ++ .l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS, ++ .core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1, ++ .prfcnt_hw_size = ++ KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE, ++ .prfcnt_fw_size = ++ KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE, ++ .dump_bytes = KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE, ++ .prfcnt_block_size = KBASE_DUMMY_MODEL_BLOCK_SIZE, ++ .clk_cnt = 1, ++ .clearing_samples = true, ++ }; ++ ++ fw_ctx->buf_bytes = prfcnt_info->dump_bytes; ++#else ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; ++ struct kbase_device *kbdev; ++ u32 prfcnt_size; ++ u32 prfcnt_hw_size; ++ u32 prfcnt_fw_size; ++ u32 prfcnt_block_size = ++ KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * KBASE_HWCNT_VALUE_HW_BYTES; ++ ++ WARN_ON(!ctx); ++ WARN_ON(!prfcnt_info); ++ ++ fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ kbdev = fw_ctx->kbdev; ++ prfcnt_size = kbdev->csf.global_iface.prfcnt_size; ++ prfcnt_hw_size = GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET(prfcnt_size); ++ prfcnt_fw_size = GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(prfcnt_size); ++ fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size; ++ ++ /* Read the block size if the GPU has the register PRFCNT_FEATURES ++ * which was introduced in architecture version 11.x.7. ++ */ ++ if ((kbdev->gpu_props.props.raw_props.gpu_id & GPU_ID2_PRODUCT_MODEL) >= ++ GPU_ID2_PRODUCT_TTUX) { ++ prfcnt_block_size = PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET( ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES))) ++ << 8; + } + -+ dst->clk_enable_map |= src->clk_enable_map; ++ *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){ ++ .prfcnt_hw_size = prfcnt_hw_size, ++ .prfcnt_fw_size = prfcnt_fw_size, ++ .dump_bytes = fw_ctx->buf_bytes, ++ .prfcnt_block_size = prfcnt_block_size, ++ .l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices, ++ .core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask, ++ .clk_cnt = fw_ctx->clk_cnt, ++ .clearing_samples = true, ++ }; ++ ++ /* Block size must be multiple of counter size. */ ++ WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) != 0); ++ /* Total size must be multiple of block size. */ ++ WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) != 0); ++#endif +} + -+/** -+ * kbase_hwcnt_enable_map_block_enabled() - Check if any values in a block -+ * instance are enabled. -+ * @enable_map: Non-NULL pointer to enable map. -+ * @grp: Index of the group in the metadata. -+ * @blk: Index of the block in the group. -+ * @blk_inst: Index of the block instance in the block. -+ * -+ * Return: true if any values in the block are enabled, else false. -+ */ -+static inline bool -+kbase_hwcnt_enable_map_block_enabled(const struct kbase_hwcnt_enable_map *enable_map, size_t grp, -+ size_t blk, size_t blk_inst) ++static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, void **cpu_dump_base, ++ struct kbase_hwcnt_backend_csf_if_ring_buf **out_ring_buf) +{ -+ bool any_enabled = false; -+ size_t val_cnt; -+ size_t bitfld_cnt; -+ const u64 *const block_enable_map = -+ kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst); -+ size_t bitfld_idx; ++ struct kbase_device *kbdev; ++ struct tagged_addr *phys; ++ struct page **page_list; ++ void *cpu_addr; ++ int ret; ++ int i; ++ size_t num_pages; ++ u64 flags; ++ struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf; + -+ if (WARN_ON(!enable_map)) -+ return false; ++ pgprot_t cpu_map_prot = PAGE_KERNEL; ++ u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START; + -+ val_cnt = kbase_hwcnt_metadata_block_values_count(enable_map->metadata, grp, blk); -+ bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + -+ for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { -+ const u64 remaining_values = val_cnt - (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); -+ u64 block_enable_map_mask = U64_MAX; ++ /* Calls to this function are inherently asynchronous, with respect to ++ * MMU operations. ++ */ ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + -+ if (remaining_values < KBASE_HWCNT_BITFIELD_BITS) -+ block_enable_map_mask = (1ull << remaining_values) - 1; ++ WARN_ON(!ctx); ++ WARN_ON(!cpu_dump_base); ++ WARN_ON(!out_ring_buf); + -+ any_enabled = any_enabled || (block_enable_map[bitfld_idx] & block_enable_map_mask); -+ } ++ kbdev = fw_ctx->kbdev; + -+ return any_enabled; ++ /* The buffer count must be power of 2 */ ++ if (!is_power_of_2(buf_count)) ++ return -EINVAL; ++ ++ /* alignment failure */ ++ if (gpu_va_base & (2048 - 1)) ++ return -EINVAL; ++ ++ fw_ring_buf = kzalloc(sizeof(*fw_ring_buf), GFP_KERNEL); ++ if (!fw_ring_buf) ++ return -ENOMEM; ++ ++ num_pages = PFN_UP(fw_ctx->buf_bytes * buf_count); ++ phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); ++ if (!phys) ++ goto phys_alloc_error; ++ ++ page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL); ++ if (!page_list) ++ goto page_list_alloc_error; ++ ++ /* Get physical page for the buffer */ ++ ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, ++ phys, false, NULL); ++ if (ret != num_pages) ++ goto phys_mem_pool_alloc_error; ++ ++ /* Get the CPU virtual address */ ++ for (i = 0; i < num_pages; i++) ++ page_list[i] = as_page(phys[i]); ++ ++ cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot); ++ if (!cpu_addr) ++ goto vmap_error; ++ ++ flags = KBASE_REG_GPU_WR | KBASE_REG_GPU_NX | ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); ++ ++ /* Update MMU table */ ++ ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys, ++ num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW, ++ mmu_sync_info, NULL, false); ++ if (ret) ++ goto mmu_insert_failed; ++ ++ kfree(page_list); ++ ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ fw_ring_buf->gpu_dump_base = (uintptr_t)cpu_addr; ++#else ++ fw_ring_buf->gpu_dump_base = gpu_va_base; ++#endif /* CONFIG_MALI_BIFROST_NO_MALI */ ++ fw_ring_buf->cpu_dump_base = cpu_addr; ++ fw_ring_buf->phys = phys; ++ fw_ring_buf->num_pages = num_pages; ++ fw_ring_buf->buf_count = buf_count; ++ fw_ring_buf->as_nr = MCU_AS_NR; ++ ++ *cpu_dump_base = fw_ring_buf->cpu_dump_base; ++ *out_ring_buf = (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf; ++ ++ return 0; ++ ++mmu_insert_failed: ++ vunmap(cpu_addr); ++vmap_error: ++ kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, phys, ++ false, false); ++phys_mem_pool_alloc_error: ++ kfree(page_list); ++page_list_alloc_error: ++ kfree(phys); ++phys_alloc_error: ++ kfree(fw_ring_buf); ++ return -ENOMEM; +} + -+/** -+ * kbase_hwcnt_enable_map_any_enabled() - Check if any values are enabled. -+ * @enable_map: Non-NULL pointer to enable map. -+ * -+ * Return: true if any values are enabled, else false. -+ */ -+static inline bool -+kbase_hwcnt_enable_map_any_enabled(const struct kbase_hwcnt_enable_map *enable_map) ++static void ++kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, ++ u32 buf_index_first, u32 buf_index_last, bool for_cpu) +{ -+ size_t grp, blk, blk_inst; -+ u64 clk_enable_map_mask; ++ struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = ++ (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ size_t i; ++ size_t pg_first; ++ size_t pg_last; ++ u64 start_address; ++ u64 stop_address; ++ u32 ring_buf_index_first; ++ u32 ring_buf_index_last; + -+ if (WARN_ON(!enable_map) || WARN_ON(!enable_map->metadata)) -+ return false; ++ WARN_ON(!ctx); ++ WARN_ON(!ring_buf); + -+ clk_enable_map_mask = (1ull << enable_map->metadata->clk_cnt) - 1; ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ /* When using the dummy backend syncing the ring buffer is unnecessary as ++ * the ring buffer is only accessed by the CPU. It may also cause data loss ++ * due to cache invalidation so return early. ++ */ ++ return; ++#endif /* CONFIG_MALI_BIFROST_NO_MALI */ + -+ if (enable_map->metadata->clk_cnt > 0 && (enable_map->clk_enable_map & clk_enable_map_mask)) -+ return true; ++ /* The index arguments for this function form an inclusive, exclusive ++ * range. ++ * However, when masking back to the available buffers we will make this ++ * inclusive at both ends so full flushes are not 0 -> 0. ++ */ ++ ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1); ++ ring_buf_index_last = (buf_index_last - 1) & (fw_ring_buf->buf_count - 1); + -+ kbase_hwcnt_metadata_for_each_block(enable_map->metadata, grp, blk, blk_inst) -+ { -+ if (kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst)) -+ return true; ++ /* The start address is the offset of the first buffer. */ ++ start_address = fw_ctx->buf_bytes * ring_buf_index_first; ++ pg_first = start_address >> PAGE_SHIFT; ++ ++ /* The stop address is the last byte in the final buffer. */ ++ stop_address = (fw_ctx->buf_bytes * (ring_buf_index_last + 1)) - 1; ++ pg_last = stop_address >> PAGE_SHIFT; ++ ++ /* Check whether the buffer range wraps. */ ++ if (start_address > stop_address) { ++ /* sync the first part to the end of ring buffer. */ ++ for (i = pg_first; i < fw_ring_buf->num_pages; i++) { ++ struct page *pg = as_page(fw_ring_buf->phys[i]); ++ ++ if (for_cpu) { ++ kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), ++ PAGE_SIZE, DMA_BIDIRECTIONAL); ++ } else { ++ kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg), ++ PAGE_SIZE, DMA_BIDIRECTIONAL); ++ } ++ } ++ ++ /* second part starts from page 0. */ ++ pg_first = 0; + } + -+ return false; ++ for (i = pg_first; i <= pg_last; i++) { ++ struct page *pg = as_page(fw_ring_buf->phys[i]); ++ ++ if (for_cpu) { ++ kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE, ++ DMA_BIDIRECTIONAL); ++ } else { ++ kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE, ++ DMA_BIDIRECTIONAL); ++ } ++ } +} + -+/** -+ * kbase_hwcnt_enable_map_block_value_enabled() - Check if a value in a block -+ * instance is enabled. -+ * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to -+ * kbase_hwcnt_enable_map_block_instance. -+ * @val_idx: Index of the value to check in the block instance. -+ * -+ * Return: true if the value was enabled, else false. -+ */ -+static inline bool kbase_hwcnt_enable_map_block_value_enabled(const u64 *bitfld, size_t val_idx) ++static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(struct kbase_hwcnt_backend_csf_if_ctx *ctx) +{ -+ const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; -+ const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; -+ const u64 mask = 1ull << bit; -+ -+ return (bitfld[idx] & mask) != 0; ++ CSTD_UNUSED(ctx); ++ return ktime_get_raw_ns(); +} + -+/** -+ * kbase_hwcnt_enable_map_block_enable_value() - Enable a value in a block -+ * instance. -+ * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to -+ * kbase_hwcnt_enable_map_block_instance. -+ * @val_idx: Index of the value to enable in the block instance. -+ */ -+static inline void kbase_hwcnt_enable_map_block_enable_value(u64 *bitfld, size_t val_idx) ++static void ++kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf) +{ -+ const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; -+ const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; -+ const u64 mask = 1ull << bit; ++ struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = ++ (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + -+ bitfld[idx] |= mask; ++ if (!fw_ring_buf) ++ return; ++ ++ if (fw_ring_buf->phys) { ++ u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START; ++ ++ WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu, ++ gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys, ++ fw_ring_buf->num_pages, fw_ring_buf->num_pages, ++ MCU_AS_NR, true)); ++ ++ vunmap(fw_ring_buf->cpu_dump_base); ++ ++ kbase_mem_pool_free_pages(&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ fw_ring_buf->num_pages, fw_ring_buf->phys, false, false); ++ ++ kfree(fw_ring_buf->phys); ++ ++ kfree(fw_ring_buf); ++ } +} + -+/** -+ * kbase_hwcnt_enable_map_block_disable_value() - Disable a value in a block -+ * instance. -+ * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to -+ * kbase_hwcnt_enable_map_block_instance. -+ * @val_idx: Index of the value to disable in the block instance. -+ */ -+static inline void kbase_hwcnt_enable_map_block_disable_value(u64 *bitfld, size_t val_idx) ++static void ++kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, ++ struct kbase_hwcnt_backend_csf_if_enable *enable) +{ -+ const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; -+ const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; -+ const u64 mask = 1ull << bit; ++ u32 prfcnt_config; ++ struct kbase_device *kbdev; ++ struct kbase_csf_global_iface *global_iface; ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = ++ (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; + -+ bitfld[idx] &= ~mask; -+} ++ WARN_ON(!ctx); ++ WARN_ON(!ring_buf); ++ WARN_ON(!enable); ++ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); + -+/** -+ * kbase_hwcnt_dump_buffer_alloc() - Allocate a dump buffer. -+ * @metadata: Non-NULL pointer to metadata describing the system. -+ * @dump_buf: Non-NULL pointer to dump buffer to be initialised. Will be -+ * initialised to undefined values, so must be used as a copy dest, -+ * or cleared before use. -+ * -+ * Return: 0 on success, else error code. -+ */ -+int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata, -+ struct kbase_hwcnt_dump_buffer *dump_buf); ++ kbdev = fw_ctx->kbdev; ++ global_iface = &kbdev->csf.global_iface; + -+/** -+ * kbase_hwcnt_dump_buffer_free() - Free a dump buffer. -+ * @dump_buf: Dump buffer to be freed. -+ * -+ * Can be safely called on an all-zeroed dump buffer structure, or on an already -+ * freed dump buffer. -+ */ -+void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf); ++ /* Configure */ ++ prfcnt_config = GLB_PRFCNT_CONFIG_SIZE_SET(0, fw_ring_buf->buf_count); ++ prfcnt_config = GLB_PRFCNT_CONFIG_SET_SELECT_SET(prfcnt_config, enable->counter_set); + -+/** -+ * kbase_hwcnt_dump_buffer_array_alloc() - Allocate an array of dump buffers. -+ * @metadata: Non-NULL pointer to metadata describing the system. -+ * @n: Number of dump buffers to allocate -+ * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised. -+ * -+ * A single zeroed contiguous page allocation will be used for all of the -+ * buffers inside the array, where: -+ * dump_bufs[n].dump_buf == page_addr + n * metadata.dump_buf_bytes -+ * -+ * Return: 0 on success, else error code. -+ */ -+int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n, -+ struct kbase_hwcnt_dump_buffer_array *dump_bufs); ++ /* Configure the ring buffer base address */ ++ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID, fw_ring_buf->as_nr); ++ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_LO, ++ fw_ring_buf->gpu_dump_base & U32_MAX); ++ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_HI, ++ fw_ring_buf->gpu_dump_base >> 32); + -+/** -+ * kbase_hwcnt_dump_buffer_array_free() - Free a dump buffer array. -+ * @dump_bufs: Dump buffer array to be freed. -+ * -+ * Can be safely called on an all-zeroed dump buffer array structure, or on an -+ * already freed dump buffer array. -+ */ -+void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs); ++ /* Set extract position to 0 */ ++ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_EXTRACT, 0); + -+/** -+ * kbase_hwcnt_dump_buffer_block_instance() - Get the pointer to a block -+ * instance's dump buffer. -+ * @buf: Non-NULL pointer to dump buffer. -+ * @grp: Index of the group in the metadata. -+ * @blk: Index of the block in the group. -+ * @blk_inst: Index of the block instance in the block. -+ * -+ * Return: u64* to the dump buffer for the block instance. -+ */ -+static inline u64 *kbase_hwcnt_dump_buffer_block_instance(const struct kbase_hwcnt_dump_buffer *buf, -+ size_t grp, size_t blk, size_t blk_inst) ++ /* Configure the enable bitmap */ ++ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN, enable->fe_bm); ++ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN, enable->shader_bm); ++ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN, enable->mmu_l2_bm); ++ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN, enable->tiler_bm); ++ ++ /* Configure the HWC set and buffer size */ ++ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG, prfcnt_config); ++ ++ kbdev->csf.hwcnt.enable_pending = true; ++ ++ /* Unmask the interrupts */ ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, ++ GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK, ++ GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK); ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, ++ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK, ++ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK); ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, ++ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK, ++ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK); ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, ++ GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK, ++ GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK); ++ ++ /* Enable the HWC */ ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, ++ (1 << GLB_REQ_PRFCNT_ENABLE_SHIFT), ++ GLB_REQ_PRFCNT_ENABLE_MASK); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ ++ prfcnt_config = kbase_csf_firmware_global_input_read(global_iface, GLB_PRFCNT_CONFIG); ++ ++ kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx, enable->clk_enable_map); ++} ++ ++static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(struct kbase_hwcnt_backend_csf_if_ctx *ctx) +{ -+ if (WARN_ON(!buf) || WARN_ON(!buf->dump_buf)) -+ return NULL; ++ struct kbase_device *kbdev; ++ struct kbase_csf_global_iface *global_iface; ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + -+ if (WARN_ON(!buf->metadata) || WARN_ON(grp >= buf->metadata->grp_cnt) || -+ WARN_ON(blk >= buf->metadata->grp_metadata[grp].blk_cnt) || -+ WARN_ON(blk_inst >= buf->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt)) -+ return buf->dump_buf; ++ WARN_ON(!ctx); ++ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); + -+ return buf->dump_buf + buf->metadata->grp_metadata[grp].dump_buf_index + -+ buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index + -+ (buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride * blk_inst); ++ kbdev = fw_ctx->kbdev; ++ global_iface = &kbdev->csf.global_iface; ++ ++ /* Disable the HWC */ ++ kbdev->csf.hwcnt.enable_pending = true; ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0, GLB_REQ_PRFCNT_ENABLE_MASK); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ ++ /* mask the interrupts */ ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0, ++ GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK); ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0, ++ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK); ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0, ++ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK); ++ ++ /* In case we have a previous request in flight when the disable ++ * happens. ++ */ ++ kbdev->csf.hwcnt.request_pending = false; ++ ++ kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx); +} + -+/** -+ * kbase_hwcnt_dump_buffer_zero() - Zero all enabled values in dst. -+ * After the operation, all non-enabled values -+ * will be undefined. -+ * @dst: Non-NULL pointer to dump buffer. -+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. -+ * -+ * The dst and dst_enable_map MUST have been created from the same metadata. -+ */ -+void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst, -+ const struct kbase_hwcnt_enable_map *dst_enable_map); ++static void kbasep_hwcnt_backend_csf_if_fw_dump_request(struct kbase_hwcnt_backend_csf_if_ctx *ctx) ++{ ++ u32 glb_req; ++ struct kbase_device *kbdev; ++ struct kbase_csf_global_iface *global_iface; ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + -+/** -+ * kbase_hwcnt_dump_buffer_block_zero() - Zero all values in a block. -+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to -+ * kbase_hwcnt_dump_buffer_block_instance. -+ * @val_cnt: Number of values in the block. -+ */ -+static inline void kbase_hwcnt_dump_buffer_block_zero(u64 *dst_blk, size_t val_cnt) ++ WARN_ON(!ctx); ++ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); ++ ++ kbdev = fw_ctx->kbdev; ++ global_iface = &kbdev->csf.global_iface; ++ ++ /* Trigger dumping */ ++ kbdev->csf.hwcnt.request_pending = true; ++ glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); ++ glb_req ^= GLB_REQ_PRFCNT_SAMPLE_MASK; ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, ++ GLB_REQ_PRFCNT_SAMPLE_MASK); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++} ++ ++static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ u32 *extract_index, u32 *insert_index) +{ -+ if (WARN_ON(!dst_blk)) -+ return; ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + -+ memset(dst_blk, 0, (val_cnt * KBASE_HWCNT_VALUE_BYTES)); ++ WARN_ON(!ctx); ++ WARN_ON(!extract_index); ++ WARN_ON(!insert_index); ++ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); ++ ++ *extract_index = kbase_csf_firmware_global_input_read(&fw_ctx->kbdev->csf.global_iface, ++ GLB_PRFCNT_EXTRACT); ++ *insert_index = kbase_csf_firmware_global_output(&fw_ctx->kbdev->csf.global_iface, ++ GLB_PRFCNT_INSERT); +} + -+/** -+ * kbase_hwcnt_dump_buffer_zero_strict() - Zero all values in dst. -+ * After the operation, all values -+ * (including padding bytes) will be -+ * zero. -+ * Slower than the non-strict variant. -+ * @dst: Non-NULL pointer to dump buffer. -+ */ -+void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst); ++static void ++kbasep_hwcnt_backend_csf_if_fw_set_extract_index(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ u32 extract_idx) ++{ ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + -+/** -+ * kbase_hwcnt_dump_buffer_zero_non_enabled() - Zero all non-enabled values in -+ * dst (including padding bytes and -+ * unavailable blocks). -+ * After the operation, all enabled -+ * values will be unchanged. -+ * @dst: Non-NULL pointer to dump buffer. -+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. -+ * -+ * The dst and dst_enable_map MUST have been created from the same metadata. -+ */ -+void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *dst, -+ const struct kbase_hwcnt_enable_map *dst_enable_map); ++ WARN_ON(!ctx); ++ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); + -+/** -+ * kbase_hwcnt_dump_buffer_block_zero_non_enabled() - Zero all non-enabled -+ * values in a block. -+ * After the operation, all -+ * enabled values will be -+ * unchanged. -+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to -+ * kbase_hwcnt_dump_buffer_block_instance. -+ * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to -+ * kbase_hwcnt_enable_map_block_instance. -+ * @val_cnt: Number of values in the block. -+ */ -+static inline void kbase_hwcnt_dump_buffer_block_zero_non_enabled(u64 *dst_blk, const u64 *blk_em, -+ size_t val_cnt) ++ /* Set the raw extract index to release the buffer back to the ring ++ * buffer. ++ */ ++ kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT, ++ extract_idx); ++} ++ ++static void ++kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ u64 *cycle_counts, u64 clk_enable_map) +{ -+ size_t val; ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ u8 clk; ++ u64 timestamp_ns = ktime_get_raw_ns(); + -+ if (WARN_ON(!dst_blk)) -+ return; ++ WARN_ON(!ctx); ++ WARN_ON(!cycle_counts); ++ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); + -+ for (val = 0; val < val_cnt; val++) { -+ if (!kbase_hwcnt_enable_map_block_value_enabled(blk_em, val)) -+ dst_blk[val] = 0; ++ for (clk = 0; clk < fw_ctx->clk_cnt; clk++) { ++ if (!(clk_enable_map & (1ull << clk))) ++ continue; ++ ++ if (clk == KBASE_CLOCK_DOMAIN_TOP) { ++ /* Read cycle count for top clock domain. */ ++ kbase_backend_get_gpu_time_norequest(fw_ctx->kbdev, &cycle_counts[clk], ++ NULL, NULL); ++ } else { ++ /* Estimate cycle count for non-top clock domain. */ ++ cycle_counts[clk] = ++ kbase_ccswe_cycle_at(&fw_ctx->ccswe_shader_cores, timestamp_ns); ++ } + } +} + +/** -+ * kbase_hwcnt_dump_buffer_copy() - Copy all enabled values from src to dst. -+ * After the operation, all non-enabled values -+ * will be undefined. -+ * @dst: Non-NULL pointer to dst dump buffer. -+ * @src: Non-NULL pointer to src dump buffer. -+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. ++ * kbasep_hwcnt_backend_csf_if_fw_ctx_destroy() - Destroy a CSF FW interface context. + * -+ * The dst, src, and dst_enable_map MUST have been created from the same -+ * metadata. -+ */ -+void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst, -+ const struct kbase_hwcnt_dump_buffer *src, -+ const struct kbase_hwcnt_enable_map *dst_enable_map); -+ -+/** -+ * kbase_hwcnt_dump_buffer_block_copy() - Copy all block values from src to dst. -+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to -+ * kbase_hwcnt_dump_buffer_block_instance. -+ * @src_blk: Non-NULL pointer to src block obtained from a call to -+ * kbase_hwcnt_dump_buffer_block_instance. -+ * @val_cnt: Number of values in the block. ++ * @fw_ctx: Pointer to context to destroy. + */ -+static inline void kbase_hwcnt_dump_buffer_block_copy(u64 *dst_blk, const u64 *src_blk, -+ size_t val_cnt) ++static void ++kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx) +{ -+ if (WARN_ON(!dst_blk) || WARN_ON(!src_blk)) ++ if (!fw_ctx) + return; + -+ /* Copy all the counters in the block instance. -+ * Values of non-enabled counters are undefined. -+ */ -+ memcpy(dst_blk, src_blk, (val_cnt * KBASE_HWCNT_VALUE_BYTES)); ++ kfree(fw_ctx); +} + +/** -+ * kbase_hwcnt_dump_buffer_copy_strict() - Copy all enabled values from src to -+ * dst. -+ * After the operation, all non-enabled -+ * values (including padding bytes) will -+ * be zero. -+ * Slower than the non-strict variant. -+ * @dst: Non-NULL pointer to dst dump buffer. -+ * @src: Non-NULL pointer to src dump buffer. -+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. -+ * -+ * The dst, src, and dst_enable_map MUST have been created from the same -+ * metadata. -+ */ -+void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst, -+ const struct kbase_hwcnt_dump_buffer *src, -+ const struct kbase_hwcnt_enable_map *dst_enable_map); -+ -+/** -+ * kbase_hwcnt_dump_buffer_block_copy_strict() - Copy all enabled block values -+ * from src to dst. -+ * After the operation, all -+ * non-enabled values will be -+ * zero. -+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to -+ * kbase_hwcnt_dump_buffer_block_instance. -+ * @src_blk: Non-NULL pointer to src block obtained from a call to -+ * kbase_hwcnt_dump_buffer_block_instance. -+ * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to -+ * kbase_hwcnt_enable_map_block_instance. -+ * @val_cnt: Number of values in the block. ++ * kbasep_hwcnt_backend_csf_if_fw_ctx_create() - Create a CSF Firmware context. + * -+ * After the copy, any disabled values in dst will be zero. ++ * @kbdev: Non_NULL pointer to kbase device. ++ * @out_ctx: Non-NULL pointer to where info is stored on success. ++ * Return: 0 on success, else error code. + */ -+static inline void kbase_hwcnt_dump_buffer_block_copy_strict(u64 *dst_blk, const u64 *src_blk, -+ const u64 *blk_em, size_t val_cnt) ++static int ++kbasep_hwcnt_backend_csf_if_fw_ctx_create(struct kbase_device *kbdev, ++ struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx) +{ -+ size_t val; ++ u8 clk; ++ int errcode = -ENOMEM; ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL; + -+ if (WARN_ON(!dst_blk) || WARN_ON(!src_blk)) -+ return; ++ WARN_ON(!kbdev); ++ WARN_ON(!out_ctx); + -+ for (val = 0; val < val_cnt; val++) { -+ bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val); ++ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); ++ if (!ctx) ++ goto error; + -+ dst_blk[val] = val_enabled ? src_blk[val] : 0; ++ ctx->kbdev = kbdev; ++ ++ /* Determine the number of available clock domains. */ ++ for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) { ++ if (kbdev->pm.clk_rtm.clks[clk] == NULL) ++ break; + } ++ ctx->clk_cnt = clk; ++ ++ ctx->clk_enable_map = 0; ++ kbase_ccswe_init(&ctx->ccswe_shader_cores); ++ ctx->rate_listener.notify = kbasep_hwcnt_backend_csf_if_fw_on_freq_change; ++ ++ *out_ctx = ctx; ++ ++ return 0; ++error: ++ kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(ctx); ++ return errcode; +} + -+/** -+ * kbase_hwcnt_dump_buffer_accumulate() - Copy all enabled headers and -+ * accumulate all enabled counters from -+ * src to dst. -+ * After the operation, all non-enabled -+ * values will be undefined. -+ * @dst: Non-NULL pointer to dst dump buffer. -+ * @src: Non-NULL pointer to src dump buffer. -+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. -+ * -+ * The dst, src, and dst_enable_map MUST have been created from the same -+ * metadata. -+ */ -+void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst, -+ const struct kbase_hwcnt_dump_buffer *src, -+ const struct kbase_hwcnt_enable_map *dst_enable_map); ++void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw) ++{ ++ if (!if_fw) ++ return; + -+/** -+ * kbase_hwcnt_dump_buffer_block_accumulate() - Copy all block headers and -+ * accumulate all block counters -+ * from src to dst. -+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to -+ * kbase_hwcnt_dump_buffer_block_instance. -+ * @src_blk: Non-NULL pointer to src block obtained from a call to -+ * kbase_hwcnt_dump_buffer_block_instance. -+ * @hdr_cnt: Number of headers in the block. -+ * @ctr_cnt: Number of counters in the block. -+ */ -+static inline void kbase_hwcnt_dump_buffer_block_accumulate(u64 *dst_blk, const u64 *src_blk, -+ size_t hdr_cnt, size_t ctr_cnt) ++ kbasep_hwcnt_backend_csf_if_fw_ctx_destroy( ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)if_fw->ctx); ++ memset(if_fw, 0, sizeof(*if_fw)); ++} ++ ++int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev, ++ struct kbase_hwcnt_backend_csf_if *if_fw) +{ -+ size_t ctr; ++ int errcode; ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL; + -+ if (WARN_ON(!dst_blk) || WARN_ON(!src_blk)) -+ return; ++ if (!kbdev || !if_fw) ++ return -EINVAL; + -+ /* Copy all the headers in the block instance. -+ * Values of non-enabled headers are undefined. -+ */ -+ memcpy(dst_blk, src_blk, hdr_cnt * KBASE_HWCNT_VALUE_BYTES); ++ errcode = kbasep_hwcnt_backend_csf_if_fw_ctx_create(kbdev, &ctx); ++ if (errcode) ++ return errcode; + -+ /* Accumulate all the counters in the block instance. -+ * Values of non-enabled counters are undefined. -+ */ -+ for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) -+ dst_blk[ctr] += src_blk[ctr]; -+} ++ if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx; ++ if_fw->assert_lock_held = kbasep_hwcnt_backend_csf_if_fw_assert_lock_held; ++ if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock; ++ if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock; ++ if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info; ++ if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc; ++ if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync; ++ if_fw->ring_buf_free = kbasep_hwcnt_backend_csf_if_fw_ring_buf_free; ++ if_fw->timestamp_ns = kbasep_hwcnt_backend_csf_if_fw_timestamp_ns; ++ if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable; ++ if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable; ++ if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request; ++ if_fw->get_gpu_cycle_count = kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count; ++ if_fw->get_indexes = kbasep_hwcnt_backend_csf_if_fw_get_indexes; ++ if_fw->set_extract_index = kbasep_hwcnt_backend_csf_if_fw_set_extract_index; + -+/** -+ * kbase_hwcnt_dump_buffer_accumulate_strict() - Copy all enabled headers and -+ * accumulate all enabled counters -+ * from src to dst. -+ * After the operation, all -+ * non-enabled values (including -+ * padding bytes) will be zero. -+ * Slower than the non-strict -+ * variant. -+ * @dst: Non-NULL pointer to dst dump buffer. -+ * @src: Non-NULL pointer to src dump buffer. -+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. ++ return 0; ++} +diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h +new file mode 100644 +index 000000000..71d150669 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h +@@ -0,0 +1,49 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * The dst, src, and dst_enable_map MUST have been created from the same -+ * metadata. + */ -+void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *dst, -+ const struct kbase_hwcnt_dump_buffer *src, -+ const struct kbase_hwcnt_enable_map *dst_enable_map); + -+/** -+ * kbase_hwcnt_dump_buffer_block_accumulate_strict() - Copy all enabled block -+ * headers and accumulate -+ * all block counters from -+ * src to dst. -+ * After the operation, all -+ * non-enabled values will -+ * be zero. -+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to -+ * kbase_hwcnt_dump_buffer_block_instance. -+ * @src_blk: Non-NULL pointer to src block obtained from a call to -+ * kbase_hwcnt_dump_buffer_block_instance. -+ * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to -+ * kbase_hwcnt_enable_map_block_instance. -+ * @hdr_cnt: Number of headers in the block. -+ * @ctr_cnt: Number of counters in the block. ++/* ++ * Concrete implementation of kbase_hwcnt_backend_csf_if interface for CSF FW + */ -+static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(u64 *dst_blk, const u64 *src_blk, -+ const u64 *blk_em, -+ size_t hdr_cnt, size_t ctr_cnt) -+{ -+ size_t ctr; -+ -+ if (WARN_ON(!dst_blk) || WARN_ON(!src_blk)) -+ return; -+ -+ kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, hdr_cnt); + -+ for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) { -+ bool ctr_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, ctr); ++#ifndef _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ ++#define _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ + -+ if (ctr_enabled) -+ dst_blk[ctr] += src_blk[ctr]; -+ else -+ dst_blk[ctr] = 0; -+ } -+} ++#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h" + +/** -+ * kbase_hwcnt_metadata_for_each_clock() - Iterate over each clock domain in the -+ * metadata. -+ * @md: Non-NULL pointer to metadata. -+ * @clk: size_t variable used as clock iterator. ++ * kbase_hwcnt_backend_csf_if_fw_create() - Create a firmware CSF interface ++ * of hardware counter backend. ++ * @kbdev: Non-NULL pointer to Kbase device. ++ * @if_fw: Non-NULL pointer to backend interface structure that is filled in on ++ * creation success. ++ * Return: 0 on success, else error code. + */ -+#define kbase_hwcnt_metadata_for_each_clock(md, clk) for ((clk) = 0; (clk) < (md)->clk_cnt; (clk)++) ++int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev, ++ struct kbase_hwcnt_backend_csf_if *if_fw); + +/** -+ * kbase_hwcnt_clk_enable_map_enabled() - Check if the given index is enabled -+ * in clk_enable_map. -+ * @clk_enable_map: An enable map for clock domains. -+ * @index: Index of the enable map for clock domain. -+ * -+ * Return: true if the index of the clock domain is enabled, else false. ++ * kbase_hwcnt_backend_csf_if_fw_destroy() - Destroy a firmware CSF interface of ++ * hardware counter backend. ++ * @if_fw: Pointer to a CSF interface to destroy. + */ -+static inline bool kbase_hwcnt_clk_enable_map_enabled(const u64 clk_enable_map, const size_t index) -+{ -+ if (WARN_ON(index >= 64)) -+ return false; -+ if (clk_enable_map & (1ull << index)) -+ return true; -+ return false; -+} ++void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw); + -+#endif /* _KBASE_HWCNT_TYPES_H_ */ -diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.c ++#endif /* _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ */ +diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c new file mode 100644 -index 000000000..d618764d3 +index 000000000..669701c29 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.c -@@ -0,0 +1,744 @@ ++++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c +@@ -0,0 +1,860 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -192423,735 +195095,851 @@ index 000000000..d618764d3 + * + */ + -+#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" -+#include "hwcnt/mali_kbase_hwcnt_accumulator.h" -+#include "hwcnt/mali_kbase_hwcnt_context.h" ++#include "hwcnt/backend/mali_kbase_hwcnt_backend_jm.h" ++#include "hwcnt/mali_kbase_hwcnt_gpu.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" ++#include "mali_kbase.h" ++#include "backend/gpu/mali_kbase_pm_ca.h" ++#include "mali_kbase_hwaccess_instr.h" ++#include "mali_kbase_hwaccess_time.h" ++#include "mali_kbase_ccswe.h" ++#include "backend/gpu/mali_kbase_model_linux.h" ++#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" + -+#include -+#include ++#include "backend/gpu/mali_kbase_pm_internal.h" + +/** -+ * struct kbase_hwcnt_virtualizer - Hardware counter virtualizer structure. -+ * @hctx: Hardware counter context being virtualized. -+ * @dump_threshold_ns: Minimum threshold period for dumps between different -+ * clients where a new accumulator dump will not be -+ * performed, and instead accumulated values will be used. -+ * If 0, rate limiting is disabled. -+ * @metadata: Hardware counter metadata. -+ * @lock: Lock acquired at all entrypoints, to protect mutable -+ * state. -+ * @client_count: Current number of virtualizer clients. -+ * @clients: List of virtualizer clients. -+ * @accum: Hardware counter accumulator. NULL if no clients. -+ * @scratch_map: Enable map used as scratch space during counter changes. -+ * @scratch_buf: Dump buffer used as scratch space during dumps. -+ * @ts_last_dump_ns: End time of most recent dump across all clients. ++ * struct kbase_hwcnt_backend_jm_info - Information used to create an instance ++ * of a JM hardware counter backend. ++ * @kbdev: KBase device. ++ * @counter_set: The performance counter set to use. ++ * @metadata: Hardware counter metadata. ++ * @dump_bytes: Bytes of GPU memory required to perform a ++ * hardware counter dump. ++ * @hwcnt_gpu_info: Hardware counter block information. + */ -+struct kbase_hwcnt_virtualizer { -+ struct kbase_hwcnt_context *hctx; -+ u64 dump_threshold_ns; ++struct kbase_hwcnt_backend_jm_info { ++ struct kbase_device *kbdev; ++ enum kbase_hwcnt_set counter_set; + const struct kbase_hwcnt_metadata *metadata; -+ struct mutex lock; -+ size_t client_count; -+ struct list_head clients; -+ struct kbase_hwcnt_accumulator *accum; -+ struct kbase_hwcnt_enable_map scratch_map; -+ struct kbase_hwcnt_dump_buffer scratch_buf; -+ u64 ts_last_dump_ns; ++ size_t dump_bytes; ++ struct kbase_hwcnt_gpu_info hwcnt_gpu_info; +}; + +/** -+ * struct kbase_hwcnt_virtualizer_client - Virtualizer client structure. -+ * @node: List node used for virtualizer client list. -+ * @hvirt: Hardware counter virtualizer. -+ * @enable_map: Enable map with client's current enabled counters. -+ * @accum_buf: Dump buffer with client's current accumulated counters. -+ * @has_accum: True if accum_buf contains any accumulated counters. -+ * @ts_start_ns: Counter collection start time of current dump. ++ * struct kbase_hwcnt_jm_physical_layout - HWC sample memory physical layout ++ * information. ++ * @fe_cnt: Front end block count. ++ * @tiler_cnt: Tiler block count. ++ * @mmu_l2_cnt: Memory system(MMU and L2 cache) block count. ++ * @shader_cnt: Shader Core block count. ++ * @block_cnt: Total block count (sum of all other block counts). ++ * @shader_avail_mask: Bitmap of all shader cores in the system. ++ * @enable_mask_offset: Offset in array elements of enable mask in each block ++ * starting from the beginning of block. ++ * @headers_per_block: Header size per block. ++ * @counters_per_block: Counters size per block. ++ * @values_per_block: Total size per block. + */ -+struct kbase_hwcnt_virtualizer_client { -+ struct list_head node; -+ struct kbase_hwcnt_virtualizer *hvirt; -+ struct kbase_hwcnt_enable_map enable_map; -+ struct kbase_hwcnt_dump_buffer accum_buf; -+ bool has_accum; -+ u64 ts_start_ns; ++struct kbase_hwcnt_jm_physical_layout { ++ u8 fe_cnt; ++ u8 tiler_cnt; ++ u8 mmu_l2_cnt; ++ u8 shader_cnt; ++ u8 block_cnt; ++ u64 shader_avail_mask; ++ size_t enable_mask_offset; ++ size_t headers_per_block; ++ size_t counters_per_block; ++ size_t values_per_block; +}; + -+const struct kbase_hwcnt_metadata * -+kbase_hwcnt_virtualizer_metadata(struct kbase_hwcnt_virtualizer *hvirt) -+{ -+ if (!hvirt) -+ return NULL; -+ -+ return hvirt->metadata; -+} -+ +/** -+ * kbasep_hwcnt_virtualizer_client_free - Free a virtualizer client's memory. -+ * @hvcli: Pointer to virtualizer client. -+ * -+ * Will safely free a client in any partial state of construction. ++ * struct kbase_hwcnt_backend_jm - Instance of a JM hardware counter backend. ++ * @info: Info used to create the backend. ++ * @kctx: KBase context used for GPU memory allocation and ++ * counter dumping. ++ * @gpu_dump_va: GPU hardware counter dump buffer virtual address. ++ * @cpu_dump_va: CPU mapping of gpu_dump_va. ++ * @vmap: Dump buffer vmap. ++ * @to_user_buf: HWC sample buffer for client user, size ++ * metadata.dump_buf_bytes. ++ * @enabled: True if dumping has been enabled, else false. ++ * @pm_core_mask: PM state sync-ed shaders core mask for the enabled ++ * dumping. ++ * @curr_config: Current allocated hardware resources to correctly map the ++ * source raw dump buffer to the destination dump buffer. ++ * @clk_enable_map: The enable map specifying enabled clock domains. ++ * @cycle_count_elapsed: ++ * Cycle count elapsed for a given sample period. ++ * The top clock cycle, index 0, is read directly from ++ * hardware, but the other clock domains need to be ++ * calculated with software estimation. ++ * @prev_cycle_count: Previous cycle count to calculate the cycle count for ++ * sample period. ++ * @rate_listener: Clock rate listener callback state. ++ * @ccswe_shader_cores: Shader cores cycle count software estimator. ++ * @phys_layout: Physical memory layout information of HWC sample buffer. + */ -+static void kbasep_hwcnt_virtualizer_client_free(struct kbase_hwcnt_virtualizer_client *hvcli) -+{ -+ if (!hvcli) -+ return; -+ -+ kbase_hwcnt_dump_buffer_free(&hvcli->accum_buf); -+ kbase_hwcnt_enable_map_free(&hvcli->enable_map); -+ kfree(hvcli); -+} ++struct kbase_hwcnt_backend_jm { ++ const struct kbase_hwcnt_backend_jm_info *info; ++ struct kbase_context *kctx; ++ u64 gpu_dump_va; ++ void *cpu_dump_va; ++ struct kbase_vmap_struct *vmap; ++ u64 *to_user_buf; ++ bool enabled; ++ u64 pm_core_mask; ++ struct kbase_hwcnt_curr_config curr_config; ++ u64 clk_enable_map; ++ u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ struct kbase_clk_rate_listener rate_listener; ++ struct kbase_ccswe ccswe_shader_cores; ++ struct kbase_hwcnt_jm_physical_layout phys_layout; ++}; + +/** -+ * kbasep_hwcnt_virtualizer_client_alloc - Allocate memory for a virtualizer -+ * client. -+ * @metadata: Non-NULL pointer to counter metadata. -+ * @out_hvcli: Non-NULL pointer to where created client will be stored on -+ * success. ++ * kbasep_hwcnt_backend_jm_gpu_info_init() - Initialise an info structure used ++ * to create the hwcnt metadata. ++ * @kbdev: Non-NULL pointer to kbase device. ++ * @info: Non-NULL pointer to data structure to be filled in. ++ * ++ * The initialised info struct will only be valid for use while kbdev is valid. + * + * Return: 0 on success, else error code. + */ -+static int kbasep_hwcnt_virtualizer_client_alloc(const struct kbase_hwcnt_metadata *metadata, -+ struct kbase_hwcnt_virtualizer_client **out_hvcli) ++static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev, ++ struct kbase_hwcnt_gpu_info *info) +{ -+ int errcode; -+ struct kbase_hwcnt_virtualizer_client *hvcli = NULL; ++ size_t clk; + -+ WARN_ON(!metadata); -+ WARN_ON(!out_hvcli); ++ if (!kbdev || !info) ++ return -EINVAL; + -+ hvcli = kzalloc(sizeof(*hvcli), GFP_KERNEL); -+ if (!hvcli) -+ return -ENOMEM; ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; ++ info->core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; ++ info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; ++#else /* CONFIG_MALI_BIFROST_NO_MALI */ ++ { ++ const struct base_gpu_props *props = &kbdev->gpu_props.props; ++ const size_t l2_count = props->l2_props.num_l2_slices; ++ const size_t core_mask = props->coherency_info.group[0].core_mask; + -+ errcode = kbase_hwcnt_enable_map_alloc(metadata, &hvcli->enable_map); -+ if (errcode) -+ goto error; ++ info->l2_count = l2_count; ++ info->core_mask = core_mask; ++ info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; ++ } ++#endif /* CONFIG_MALI_BIFROST_NO_MALI */ + -+ errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hvcli->accum_buf); -+ if (errcode) -+ goto error; ++ /* Determine the number of available clock domains. */ ++ for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) { ++ if (kbdev->pm.clk_rtm.clks[clk] == NULL) ++ break; ++ } ++ info->clk_cnt = clk; + -+ *out_hvcli = hvcli; + return 0; -+error: -+ kbasep_hwcnt_virtualizer_client_free(hvcli); -+ return errcode; +} + -+/** -+ * kbasep_hwcnt_virtualizer_client_accumulate - Accumulate a dump buffer into a -+ * client's accumulation buffer. -+ * @hvcli: Non-NULL pointer to virtualizer client. -+ * @dump_buf: Non-NULL pointer to dump buffer to accumulate from. -+ */ -+static void -+kbasep_hwcnt_virtualizer_client_accumulate(struct kbase_hwcnt_virtualizer_client *hvcli, -+ const struct kbase_hwcnt_dump_buffer *dump_buf) ++static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_info *gpu_info, ++ struct kbase_hwcnt_jm_physical_layout *phys_layout) +{ -+ WARN_ON(!hvcli); -+ WARN_ON(!dump_buf); -+ lockdep_assert_held(&hvcli->hvirt->lock); ++ u8 shader_core_cnt; + -+ if (hvcli->has_accum) { -+ /* If already some accumulation, accumulate */ -+ kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, dump_buf, &hvcli->enable_map); -+ } else { -+ /* If no accumulation, copy */ -+ kbase_hwcnt_dump_buffer_copy(&hvcli->accum_buf, dump_buf, &hvcli->enable_map); -+ } -+ hvcli->has_accum = true; ++ WARN_ON(!gpu_info); ++ WARN_ON(!phys_layout); ++ ++ shader_core_cnt = fls64(gpu_info->core_mask); ++ ++ *phys_layout = (struct kbase_hwcnt_jm_physical_layout){ ++ .fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT, ++ .tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT, ++ .mmu_l2_cnt = gpu_info->l2_count, ++ .shader_cnt = shader_core_cnt, ++ .block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT + ++ gpu_info->l2_count + shader_core_cnt, ++ .shader_avail_mask = gpu_info->core_mask, ++ .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, ++ .values_per_block = gpu_info->prfcnt_values_per_block, ++ .counters_per_block = ++ gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, ++ .enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER, ++ }; +} + -+/** -+ * kbasep_hwcnt_virtualizer_accumulator_term - Terminate the hardware counter -+ * accumulator after final client -+ * removal. -+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. -+ * -+ * Will safely terminate the accumulator in any partial state of initialisation. -+ */ -+static void kbasep_hwcnt_virtualizer_accumulator_term(struct kbase_hwcnt_virtualizer *hvirt) ++static void ++kbasep_hwcnt_backend_jm_dump_sample(const struct kbase_hwcnt_backend_jm *const backend_jm) +{ -+ WARN_ON(!hvirt); -+ lockdep_assert_held(&hvirt->lock); -+ WARN_ON(hvirt->client_count); ++ size_t block_idx; ++ const u32 *new_sample_buf = backend_jm->cpu_dump_va; ++ const u32 *new_block = new_sample_buf; ++ u64 *dst_buf = backend_jm->to_user_buf; ++ u64 *dst_block = dst_buf; ++ const size_t values_per_block = backend_jm->phys_layout.values_per_block; ++ const size_t dump_bytes = backend_jm->info->dump_bytes; + -+ kbase_hwcnt_dump_buffer_free(&hvirt->scratch_buf); -+ kbase_hwcnt_enable_map_free(&hvirt->scratch_map); -+ kbase_hwcnt_accumulator_release(hvirt->accum); -+ hvirt->accum = NULL; ++ for (block_idx = 0; block_idx < backend_jm->phys_layout.block_cnt; block_idx++) { ++ size_t ctr_idx; ++ ++ for (ctr_idx = 0; ctr_idx < values_per_block; ctr_idx++) ++ dst_block[ctr_idx] = new_block[ctr_idx]; ++ ++ new_block += values_per_block; ++ dst_block += values_per_block; ++ } ++ ++ WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); ++ WARN_ON(dst_block != dst_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); +} + +/** -+ * kbasep_hwcnt_virtualizer_accumulator_init - Initialise the hardware counter -+ * accumulator before first client -+ * addition. -+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. ++ * kbasep_hwcnt_backend_jm_on_freq_change() - On freq change callback + * -+ * Return: 0 on success, else error code. ++ * @rate_listener: Callback state ++ * @clk_index: Clock index ++ * @clk_rate_hz: Clock frequency(hz) + */ -+static int kbasep_hwcnt_virtualizer_accumulator_init(struct kbase_hwcnt_virtualizer *hvirt) ++static void kbasep_hwcnt_backend_jm_on_freq_change(struct kbase_clk_rate_listener *rate_listener, ++ u32 clk_index, u32 clk_rate_hz) +{ -+ int errcode; -+ -+ WARN_ON(!hvirt); -+ lockdep_assert_held(&hvirt->lock); -+ WARN_ON(hvirt->client_count); -+ WARN_ON(hvirt->accum); -+ -+ errcode = kbase_hwcnt_accumulator_acquire(hvirt->hctx, &hvirt->accum); -+ if (errcode) -+ goto error; -+ -+ errcode = kbase_hwcnt_enable_map_alloc(hvirt->metadata, &hvirt->scratch_map); -+ if (errcode) -+ goto error; ++ struct kbase_hwcnt_backend_jm *backend_jm = ++ container_of(rate_listener, struct kbase_hwcnt_backend_jm, rate_listener); ++ u64 timestamp_ns; + -+ errcode = kbase_hwcnt_dump_buffer_alloc(hvirt->metadata, &hvirt->scratch_buf); -+ if (errcode) -+ goto error; ++ if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES) ++ return; + -+ return 0; -+error: -+ kbasep_hwcnt_virtualizer_accumulator_term(hvirt); -+ return errcode; ++ timestamp_ns = ktime_get_raw_ns(); ++ kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz); +} + +/** -+ * kbasep_hwcnt_virtualizer_client_add - Add a newly allocated client to the -+ * virtualizer. -+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. -+ * @hvcli: Non-NULL pointer to the virtualizer client to add. -+ * @enable_map: Non-NULL pointer to client's initial enable map. ++ * kbasep_hwcnt_backend_jm_cc_enable() - Enable cycle count tracking + * -+ * Return: 0 on success, else error code. ++ * @backend_jm: Non-NULL pointer to backend. ++ * @enable_map: Non-NULL pointer to enable map specifying enabled counters. ++ * @timestamp_ns: Timestamp(ns) when HWCNT were enabled. + */ -+static int kbasep_hwcnt_virtualizer_client_add(struct kbase_hwcnt_virtualizer *hvirt, -+ struct kbase_hwcnt_virtualizer_client *hvcli, -+ const struct kbase_hwcnt_enable_map *enable_map) ++static void kbasep_hwcnt_backend_jm_cc_enable(struct kbase_hwcnt_backend_jm *backend_jm, ++ const struct kbase_hwcnt_enable_map *enable_map, ++ u64 timestamp_ns) +{ -+ int errcode = 0; -+ u64 ts_start_ns; -+ u64 ts_end_ns; ++ struct kbase_device *kbdev = backend_jm->kctx->kbdev; ++ u64 clk_enable_map = enable_map->clk_enable_map; ++ u64 cycle_count; + -+ WARN_ON(!hvirt); -+ WARN_ON(!hvcli); -+ WARN_ON(!enable_map); -+ lockdep_assert_held(&hvirt->lock); ++ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) { ++ /* turn on the cycle counter */ ++ kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev); ++ /* Read cycle count for top clock domain. */ ++ kbase_backend_get_gpu_time_norequest(kbdev, &cycle_count, NULL, NULL); + -+ if (hvirt->client_count == 0) -+ /* First client added, so initialise the accumulator */ -+ errcode = kbasep_hwcnt_virtualizer_accumulator_init(hvirt); -+ if (errcode) -+ return errcode; ++ backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] = cycle_count; ++ } + -+ hvirt->client_count += 1; ++ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { ++ /* software estimation for non-top clock domains */ ++ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; ++ const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES]; ++ u32 cur_freq; ++ unsigned long flags; + -+ if (hvirt->client_count == 1) { -+ /* First client, so just pass the enable map onwards as is */ -+ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, enable_map, -+ &ts_start_ns, &ts_end_ns, NULL); -+ } else { -+ struct kbase_hwcnt_virtualizer_client *pos; ++ spin_lock_irqsave(&rtm->lock, flags); + -+ /* Make the scratch enable map the union of all enable maps */ -+ kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map); -+ list_for_each_entry (pos, &hvirt->clients, node) -+ kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map); ++ cur_freq = (u32)clk_data->clock_val; ++ kbase_ccswe_reset(&backend_jm->ccswe_shader_cores); ++ kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, cur_freq); + -+ /* Set the counters with the new union enable map */ -+ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map, -+ &ts_start_ns, &ts_end_ns, -+ &hvirt->scratch_buf); -+ /* Accumulate into only existing clients' accumulation bufs */ -+ if (!errcode) -+ list_for_each_entry (pos, &hvirt->clients, node) -+ kbasep_hwcnt_virtualizer_client_accumulate(pos, -+ &hvirt->scratch_buf); -+ } -+ if (errcode) -+ goto error; ++ kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &backend_jm->rate_listener); + -+ list_add(&hvcli->node, &hvirt->clients); -+ hvcli->hvirt = hvirt; -+ kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); -+ hvcli->has_accum = false; -+ hvcli->ts_start_ns = ts_end_ns; ++ spin_unlock_irqrestore(&rtm->lock, flags); + -+ /* Store the most recent dump time for rate limiting */ -+ hvirt->ts_last_dump_ns = ts_end_ns; ++ /* ccswe was reset. The estimated cycle is zero. */ ++ backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0; ++ } + -+ return 0; -+error: -+ hvirt->client_count -= 1; -+ if (hvirt->client_count == 0) -+ kbasep_hwcnt_virtualizer_accumulator_term(hvirt); -+ return errcode; ++ /* Keep clk_enable_map for dump_request. */ ++ backend_jm->clk_enable_map = clk_enable_map; +} + +/** -+ * kbasep_hwcnt_virtualizer_client_remove - Remove a client from the -+ * virtualizer. -+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. -+ * @hvcli: Non-NULL pointer to the virtualizer client to remove. ++ * kbasep_hwcnt_backend_jm_cc_disable() - Disable cycle count tracking ++ * ++ * @backend_jm: Non-NULL pointer to backend. + */ -+static void kbasep_hwcnt_virtualizer_client_remove(struct kbase_hwcnt_virtualizer *hvirt, -+ struct kbase_hwcnt_virtualizer_client *hvcli) ++static void kbasep_hwcnt_backend_jm_cc_disable(struct kbase_hwcnt_backend_jm *backend_jm) +{ -+ int errcode = 0; -+ u64 ts_start_ns; -+ u64 ts_end_ns; ++ struct kbase_device *kbdev = backend_jm->kctx->kbdev; ++ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; ++ u64 clk_enable_map = backend_jm->clk_enable_map; + -+ WARN_ON(!hvirt); -+ WARN_ON(!hvcli); -+ lockdep_assert_held(&hvirt->lock); ++ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) { ++ /* turn off the cycle counter */ ++ kbase_pm_release_gpu_cycle_counter(kbdev); ++ } + -+ list_del(&hvcli->node); -+ hvirt->client_count -= 1; -+ -+ if (hvirt->client_count == 0) { -+ /* Last client removed, so terminate the accumulator */ -+ kbasep_hwcnt_virtualizer_accumulator_term(hvirt); -+ } else { -+ struct kbase_hwcnt_virtualizer_client *pos; -+ /* Make the scratch enable map the union of all enable maps */ -+ kbase_hwcnt_enable_map_disable_all(&hvirt->scratch_map); -+ list_for_each_entry (pos, &hvirt->clients, node) -+ kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map); -+ /* Set the counters with the new union enable map */ -+ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map, -+ &ts_start_ns, &ts_end_ns, -+ &hvirt->scratch_buf); -+ /* Accumulate into remaining clients' accumulation bufs */ -+ if (!errcode) { -+ list_for_each_entry (pos, &hvirt->clients, node) -+ kbasep_hwcnt_virtualizer_client_accumulate(pos, -+ &hvirt->scratch_buf); -+ -+ /* Store the most recent dump time for rate limiting */ -+ hvirt->ts_last_dump_ns = ts_end_ns; -+ } -+ } -+ WARN_ON(errcode); -+} ++ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { ++ kbase_clk_rate_trace_manager_unsubscribe(rtm, &backend_jm->rate_listener); ++ } ++} + +/** -+ * kbasep_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's -+ * currently enabled counters, -+ * and enable a new set of -+ * counters that will be used for -+ * subsequent dumps. -+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. -+ * @hvcli: Non-NULL pointer to the virtualizer client. -+ * @enable_map: Non-NULL pointer to the new counter enable map for the client. -+ * Must have the same metadata as the virtualizer. -+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will -+ * be written out to on success. -+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will -+ * be written out to on success. -+ * @dump_buf: Pointer to the buffer where the dump will be written out to on -+ * success. If non-NULL, must have the same metadata as the -+ * accumulator. If NULL, the dump will be discarded. ++ * kbasep_hwcnt_gpu_update_curr_config() - Update the destination buffer with ++ * current config information. ++ * @kbdev: Non-NULL pointer to kbase device. ++ * @curr_config: Non-NULL pointer to return the current configuration of ++ * hardware allocated to the GPU. + * -+ * Return: 0 on success or error code. ++ * The current configuration information is used for architectures where the ++ * max_config interface is available from the Arbiter. In this case the current ++ * allocated hardware is not always the same, so the current config information ++ * is used to correctly map the current allocated resources to the memory layout ++ * that is copied to the user space. ++ * ++ * Return: 0 on success, else error code. + */ -+static int kbasep_hwcnt_virtualizer_client_set_counters( -+ struct kbase_hwcnt_virtualizer *hvirt, struct kbase_hwcnt_virtualizer_client *hvcli, -+ const struct kbase_hwcnt_enable_map *enable_map, u64 *ts_start_ns, u64 *ts_end_ns, -+ struct kbase_hwcnt_dump_buffer *dump_buf) ++static int kbasep_hwcnt_gpu_update_curr_config(struct kbase_device *kbdev, ++ struct kbase_hwcnt_curr_config *curr_config) ++{ ++ if (WARN_ON(!kbdev) || WARN_ON(!curr_config)) ++ return -EINVAL; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ curr_config->num_l2_slices = kbdev->gpu_props.curr_config.l2_slices; ++ curr_config->shader_present = kbdev->gpu_props.curr_config.shader_present; ++ return 0; ++} ++ ++/* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ ++static u64 kbasep_hwcnt_backend_jm_timestamp_ns(struct kbase_hwcnt_backend *backend) ++{ ++ (void)backend; ++ return ktime_get_raw_ns(); ++} ++ ++/* JM backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ ++static int ++kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend, ++ const struct kbase_hwcnt_enable_map *enable_map) +{ + int errcode; -+ struct kbase_hwcnt_virtualizer_client *pos; ++ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; ++ struct kbase_context *kctx; ++ struct kbase_device *kbdev; ++ struct kbase_hwcnt_physical_enable_map phys_enable_map; ++ enum kbase_hwcnt_physical_set phys_counter_set; ++ struct kbase_instr_hwcnt_enable enable; ++ u64 timestamp_ns; + -+ WARN_ON(!hvirt); -+ WARN_ON(!hvcli); -+ WARN_ON(!enable_map); -+ WARN_ON(!ts_start_ns); -+ WARN_ON(!ts_end_ns); -+ WARN_ON(enable_map->metadata != hvirt->metadata); -+ WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); -+ lockdep_assert_held(&hvirt->lock); ++ if (!backend_jm || !enable_map || backend_jm->enabled || ++ (enable_map->metadata != backend_jm->info->metadata)) ++ return -EINVAL; + -+ /* Make the scratch enable map the union of all enable maps */ -+ kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map); -+ list_for_each_entry (pos, &hvirt->clients, node) -+ /* Ignore the enable map of the selected client */ -+ if (pos != hvcli) -+ kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map); ++ kctx = backend_jm->kctx; ++ kbdev = backend_jm->kctx->kbdev; + -+ /* Set the counters with the new union enable map */ -+ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map, -+ ts_start_ns, ts_end_ns, &hvirt->scratch_buf); ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map); ++ ++ kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_jm->info->counter_set); ++ ++ enable.fe_bm = phys_enable_map.fe_bm; ++ enable.shader_bm = phys_enable_map.shader_bm; ++ enable.tiler_bm = phys_enable_map.tiler_bm; ++ enable.mmu_l2_bm = phys_enable_map.mmu_l2_bm; ++ enable.counter_set = phys_counter_set; ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ /* The dummy model needs the CPU mapping. */ ++ enable.dump_buffer = (uintptr_t)backend_jm->cpu_dump_va; ++#else ++ enable.dump_buffer = backend_jm->gpu_dump_va; ++#endif /* CONFIG_MALI_BIFROST_NO_MALI */ ++ enable.dump_buffer_bytes = backend_jm->info->dump_bytes; ++ ++ timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); ++ ++ /* Update the current configuration information. */ ++ errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config); + if (errcode) -+ return errcode; ++ goto error; + -+ /* Accumulate into all accumulation bufs except the selected client's */ -+ list_for_each_entry (pos, &hvirt->clients, node) -+ if (pos != hvcli) -+ kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf); ++ errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable); ++ if (errcode) ++ goto error; + -+ /* Finally, write into the dump buf */ -+ if (dump_buf) { -+ const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf; ++ backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev); + -+ if (hvcli->has_accum) { -+ kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, src, -+ &hvcli->enable_map); -+ src = &hvcli->accum_buf; -+ } -+ kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map); -+ } -+ hvcli->has_accum = false; ++ backend_jm->enabled = true; + -+ /* Update the selected client's enable map */ -+ kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); ++ kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns); + -+ /* Fix up the timestamps */ -+ *ts_start_ns = hvcli->ts_start_ns; -+ hvcli->ts_start_ns = *ts_end_ns; ++ return 0; ++error: ++ return errcode; ++} + -+ /* Store the most recent dump time for rate limiting */ -+ hvirt->ts_last_dump_ns = *ts_end_ns; ++/* JM backend implementation of kbase_hwcnt_backend_dump_enable_fn */ ++static int kbasep_hwcnt_backend_jm_dump_enable(struct kbase_hwcnt_backend *backend, ++ const struct kbase_hwcnt_enable_map *enable_map) ++{ ++ unsigned long flags; ++ int errcode; ++ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; ++ struct kbase_device *kbdev; ++ ++ if (!backend_jm) ++ return -EINVAL; ++ ++ kbdev = backend_jm->kctx->kbdev; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock(backend, enable_map); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return errcode; +} + -+int kbase_hwcnt_virtualizer_client_set_counters(struct kbase_hwcnt_virtualizer_client *hvcli, -+ const struct kbase_hwcnt_enable_map *enable_map, -+ u64 *ts_start_ns, u64 *ts_end_ns, -+ struct kbase_hwcnt_dump_buffer *dump_buf) ++/* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */ ++static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *backend) +{ + int errcode; -+ struct kbase_hwcnt_virtualizer *hvirt; ++ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; + -+ if (!hvcli || !enable_map || !ts_start_ns || !ts_end_ns) ++ if (WARN_ON(!backend_jm) || !backend_jm->enabled) ++ return; ++ ++ kbasep_hwcnt_backend_jm_cc_disable(backend_jm); ++ ++ errcode = kbase_instr_hwcnt_disable_internal(backend_jm->kctx); ++ WARN_ON(errcode); ++ ++ backend_jm->enabled = false; ++} ++ ++/* JM backend implementation of kbase_hwcnt_backend_dump_clear_fn */ ++static int kbasep_hwcnt_backend_jm_dump_clear(struct kbase_hwcnt_backend *backend) ++{ ++ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; ++ ++ if (!backend_jm || !backend_jm->enabled) + return -EINVAL; + -+ hvirt = hvcli->hvirt; ++ return kbase_instr_hwcnt_clear(backend_jm->kctx); ++} + -+ if ((enable_map->metadata != hvirt->metadata) || -+ (dump_buf && (dump_buf->metadata != hvirt->metadata))) ++/* JM backend implementation of kbase_hwcnt_backend_dump_request_fn */ ++static int kbasep_hwcnt_backend_jm_dump_request(struct kbase_hwcnt_backend *backend, ++ u64 *dump_time_ns) ++{ ++ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; ++ struct kbase_device *kbdev; ++ const struct kbase_hwcnt_metadata *metadata; ++ u64 current_cycle_count; ++ size_t clk; ++ int ret; ++ ++ if (!backend_jm || !backend_jm->enabled || !dump_time_ns) + return -EINVAL; + -+ mutex_lock(&hvirt->lock); ++ kbdev = backend_jm->kctx->kbdev; ++ metadata = backend_jm->info->metadata; + -+ if ((hvirt->client_count == 1) && (!hvcli->has_accum)) { -+ /* -+ * If there's only one client with no prior accumulation, we can -+ * completely skip the virtualize and just pass through the call -+ * to the accumulator, saving a fair few copies and -+ * accumulations. -+ */ -+ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, enable_map, -+ ts_start_ns, ts_end_ns, dump_buf); ++ /* Disable pre-emption, to make the timestamp as accurate as possible */ ++ preempt_disable(); ++ { ++ *dump_time_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); ++ ret = kbase_instr_hwcnt_request_dump(backend_jm->kctx); + -+ if (!errcode) { -+ /* Update the selected client's enable map */ -+ kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); ++ kbase_hwcnt_metadata_for_each_clock(metadata, clk) ++ { ++ if (!kbase_hwcnt_clk_enable_map_enabled(backend_jm->clk_enable_map, clk)) ++ continue; + -+ /* Fix up the timestamps */ -+ *ts_start_ns = hvcli->ts_start_ns; -+ hvcli->ts_start_ns = *ts_end_ns; ++ if (clk == KBASE_CLOCK_DOMAIN_TOP) { ++ /* Read cycle count for top clock domain. */ ++ kbase_backend_get_gpu_time_norequest(kbdev, ¤t_cycle_count, ++ NULL, NULL); ++ } else { ++ /* ++ * Estimate cycle count for non-top clock ++ * domain. ++ */ ++ current_cycle_count = kbase_ccswe_cycle_at( ++ &backend_jm->ccswe_shader_cores, *dump_time_ns); ++ } ++ backend_jm->cycle_count_elapsed[clk] = ++ current_cycle_count - backend_jm->prev_cycle_count[clk]; + -+ /* Store the most recent dump time for rate limiting */ -+ hvirt->ts_last_dump_ns = *ts_end_ns; ++ /* ++ * Keep the current cycle count for later calculation. ++ */ ++ backend_jm->prev_cycle_count[clk] = current_cycle_count; + } -+ } else { -+ /* Otherwise, do the full virtualize */ -+ errcode = kbasep_hwcnt_virtualizer_client_set_counters( -+ hvirt, hvcli, enable_map, ts_start_ns, ts_end_ns, dump_buf); + } ++ preempt_enable(); + -+ mutex_unlock(&hvirt->lock); ++ return ret; ++} + -+ return errcode; ++/* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */ ++static int kbasep_hwcnt_backend_jm_dump_wait(struct kbase_hwcnt_backend *backend) ++{ ++ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; ++ ++ if (!backend_jm || !backend_jm->enabled) ++ return -EINVAL; ++ ++ return kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx); +} + -+/** -+ * kbasep_hwcnt_virtualizer_client_dump - Perform a dump of the client's -+ * currently enabled counters. -+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. -+ * @hvcli: Non-NULL pointer to the virtualizer client. -+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will -+ * be written out to on success. -+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will -+ * be written out to on success. -+ * @dump_buf: Pointer to the buffer where the dump will be written out to on -+ * success. If non-NULL, must have the same metadata as the -+ * accumulator. If NULL, the dump will be discarded. -+ * -+ * Return: 0 on success or error code. -+ */ -+static int kbasep_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer *hvirt, -+ struct kbase_hwcnt_virtualizer_client *hvcli, -+ u64 *ts_start_ns, u64 *ts_end_ns, -+ struct kbase_hwcnt_dump_buffer *dump_buf) ++/* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */ ++static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend, ++ struct kbase_hwcnt_dump_buffer *dst, ++ const struct kbase_hwcnt_enable_map *dst_enable_map, ++ bool accumulate) +{ ++ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; ++ size_t clk; ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ struct kbase_device *kbdev; ++ unsigned long flags; + int errcode; -+ struct kbase_hwcnt_virtualizer_client *pos; ++#endif /* CONFIG_MALI_BIFROST_NO_MALI */ + -+ WARN_ON(!hvirt); -+ WARN_ON(!hvcli); -+ WARN_ON(!ts_start_ns); -+ WARN_ON(!ts_end_ns); -+ WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); -+ lockdep_assert_held(&hvirt->lock); ++ if (!backend_jm || !dst || !dst_enable_map || ++ (backend_jm->info->metadata != dst->metadata) || ++ (dst_enable_map->metadata != dst->metadata)) ++ return -EINVAL; + -+ /* Perform the dump */ -+ errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, ts_start_ns, ts_end_ns, -+ &hvirt->scratch_buf); -+ if (errcode) -+ return errcode; ++ /* Invalidate the kernel buffer before reading from it. */ ++ kbase_sync_mem_regions(backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU); + -+ /* Accumulate into all accumulation bufs except the selected client's */ -+ list_for_each_entry (pos, &hvirt->clients, node) -+ if (pos != hvcli) -+ kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf); ++ /* Dump sample to the internal 64-bit user buffer. */ ++ kbasep_hwcnt_backend_jm_dump_sample(backend_jm); + -+ /* Finally, write into the dump buf */ -+ if (dump_buf) { -+ const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf; ++ /* Extract elapsed cycle count for each clock domain if enabled. */ ++ kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) ++ { ++ if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) ++ continue; + -+ if (hvcli->has_accum) { -+ kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, src, -+ &hvcli->enable_map); -+ src = &hvcli->accum_buf; -+ } -+ kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map); ++ /* Reset the counter to zero if accumulation is off. */ ++ if (!accumulate) ++ dst->clk_cnt_buf[clk] = 0; ++ dst->clk_cnt_buf[clk] += backend_jm->cycle_count_elapsed[clk]; + } -+ hvcli->has_accum = false; + -+ /* Fix up the timestamps */ -+ *ts_start_ns = hvcli->ts_start_ns; -+ hvcli->ts_start_ns = *ts_end_ns; ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ kbdev = backend_jm->kctx->kbdev; + -+ /* Store the most recent dump time for rate limiting */ -+ hvirt->ts_last_dump_ns = *ts_end_ns; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ return errcode; ++ /* Update the current configuration information. */ ++ errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ if (errcode) ++ return errcode; ++#endif /* CONFIG_MALI_BIFROST_NO_MALI */ ++ return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map, ++ backend_jm->pm_core_mask, &backend_jm->curr_config, ++ accumulate); +} + +/** -+ * kbasep_hwcnt_virtualizer_client_dump_rate_limited - Perform a dump of the -+ * client's currently enabled counters -+ * if it hasn't been rate limited, -+ * otherwise return the client's most -+ * recent accumulation. -+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. -+ * @hvcli: Non-NULL pointer to the virtualizer client. -+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will -+ * be written out to on success. -+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will -+ * be written out to on success. -+ * @dump_buf: Pointer to the buffer where the dump will be written out to on -+ * success. If non-NULL, must have the same metadata as the -+ * accumulator. If NULL, the dump will be discarded. ++ * kbasep_hwcnt_backend_jm_dump_alloc() - Allocate a GPU dump buffer. ++ * @info: Non-NULL pointer to JM backend info. ++ * @kctx: Non-NULL pointer to kbase context. ++ * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address ++ * is stored on success. + * -+ * Return: 0 on success or error code. ++ * Return: 0 on success, else error code. + */ -+static int kbasep_hwcnt_virtualizer_client_dump_rate_limited( -+ struct kbase_hwcnt_virtualizer *hvirt, struct kbase_hwcnt_virtualizer_client *hvcli, -+ u64 *ts_start_ns, u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf) ++static int kbasep_hwcnt_backend_jm_dump_alloc(const struct kbase_hwcnt_backend_jm_info *info, ++ struct kbase_context *kctx, u64 *gpu_dump_va) +{ -+ bool rate_limited = true; -+ -+ WARN_ON(!hvirt); -+ WARN_ON(!hvcli); -+ WARN_ON(!ts_start_ns); -+ WARN_ON(!ts_end_ns); -+ WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); -+ lockdep_assert_held(&hvirt->lock); ++ struct kbase_va_region *reg; ++ u64 flags; ++ u64 nr_pages; + -+ if (hvirt->dump_threshold_ns == 0) { -+ /* Threshold == 0, so rate limiting disabled */ -+ rate_limited = false; -+ } else if (hvirt->ts_last_dump_ns == hvcli->ts_start_ns) { -+ /* Last dump was performed by this client, and dumps from an -+ * individual client are never rate limited -+ */ -+ rate_limited = false; -+ } else { -+ const u64 ts_ns = kbase_hwcnt_accumulator_timestamp_ns(hvirt->accum); -+ const u64 time_since_last_dump_ns = ts_ns - hvirt->ts_last_dump_ns; ++ /* Calls to this function are inherently asynchronous, with respect to ++ * MMU operations. ++ */ ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + -+ /* Dump period equals or exceeds the threshold */ -+ if (time_since_last_dump_ns >= hvirt->dump_threshold_ns) -+ rate_limited = false; -+ } ++ WARN_ON(!info); ++ WARN_ON(!kctx); ++ WARN_ON(!gpu_dump_va); + -+ if (!rate_limited) -+ return kbasep_hwcnt_virtualizer_client_dump(hvirt, hvcli, ts_start_ns, ts_end_ns, -+ dump_buf); ++ flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR | BASEP_MEM_PERMANENT_KERNEL_MAPPING | ++ BASE_MEM_CACHED_CPU | BASE_MEM_UNCACHED_GPU; + -+ /* If we've gotten this far, the client must have something accumulated -+ * otherwise it is a logic error -+ */ -+ WARN_ON(!hvcli->has_accum); ++ nr_pages = PFN_UP(info->dump_bytes); + -+ if (dump_buf) -+ kbase_hwcnt_dump_buffer_copy(dump_buf, &hvcli->accum_buf, &hvcli->enable_map); -+ hvcli->has_accum = false; ++ reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va, mmu_sync_info); + -+ *ts_start_ns = hvcli->ts_start_ns; -+ *ts_end_ns = hvirt->ts_last_dump_ns; -+ hvcli->ts_start_ns = hvirt->ts_last_dump_ns; ++ if (!reg) ++ return -ENOMEM; + + return 0; +} + -+int kbase_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer_client *hvcli, -+ u64 *ts_start_ns, u64 *ts_end_ns, -+ struct kbase_hwcnt_dump_buffer *dump_buf) ++/** ++ * kbasep_hwcnt_backend_jm_dump_free() - Free an allocated GPU dump buffer. ++ * @kctx: Non-NULL pointer to kbase context. ++ * @gpu_dump_va: GPU dump buffer virtual address. ++ */ ++static void kbasep_hwcnt_backend_jm_dump_free(struct kbase_context *kctx, u64 gpu_dump_va) ++{ ++ WARN_ON(!kctx); ++ if (gpu_dump_va) ++ kbase_mem_free(kctx, gpu_dump_va); ++} ++ ++/** ++ * kbasep_hwcnt_backend_jm_destroy() - Destroy a JM backend. ++ * @backend: Pointer to JM backend to destroy. ++ * ++ * Can be safely called on a backend in any state of partial construction. ++ */ ++static void kbasep_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_jm *backend) ++{ ++ if (!backend) ++ return; ++ ++ if (backend->kctx) { ++ struct kbase_context *kctx = backend->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ ++ if (backend->cpu_dump_va) ++ kbase_phy_alloc_mapping_put(kctx, backend->vmap); ++ ++ if (backend->gpu_dump_va) ++ kbasep_hwcnt_backend_jm_dump_free(kctx, backend->gpu_dump_va); ++ ++ kbasep_js_release_privileged_ctx(kbdev, kctx); ++ kbase_destroy_context(kctx); ++ } ++ ++ kfree(backend->to_user_buf); ++ ++ kfree(backend); ++} ++ ++/** ++ * kbasep_hwcnt_backend_jm_create() - Create a JM backend. ++ * @info: Non-NULL pointer to backend info. ++ * @out_backend: Non-NULL pointer to where backend is stored on success. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static int kbasep_hwcnt_backend_jm_create(const struct kbase_hwcnt_backend_jm_info *info, ++ struct kbase_hwcnt_backend_jm **out_backend) +{ + int errcode; -+ struct kbase_hwcnt_virtualizer *hvirt; ++ struct kbase_device *kbdev; ++ struct kbase_hwcnt_backend_jm *backend = NULL; + -+ if (!hvcli || !ts_start_ns || !ts_end_ns) -+ return -EINVAL; ++ WARN_ON(!info); ++ WARN_ON(!out_backend); + -+ hvirt = hvcli->hvirt; ++ kbdev = info->kbdev; + -+ if (dump_buf && (dump_buf->metadata != hvirt->metadata)) -+ return -EINVAL; ++ backend = kzalloc(sizeof(*backend), GFP_KERNEL); ++ if (!backend) ++ goto alloc_error; + -+ mutex_lock(&hvirt->lock); ++ backend->info = info; ++ kbasep_hwcnt_backend_jm_init_layout(&info->hwcnt_gpu_info, &backend->phys_layout); + -+ if ((hvirt->client_count == 1) && (!hvcli->has_accum)) { -+ /* -+ * If there's only one client with no prior accumulation, we can -+ * completely skip the virtualize and just pass through the call -+ * to the accumulator, saving a fair few copies and -+ * accumulations. -+ */ -+ errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, ts_start_ns, ts_end_ns, -+ dump_buf); ++ backend->kctx = kbase_create_context(kbdev, true, ++ BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL); ++ if (!backend->kctx) ++ goto alloc_error; + -+ if (!errcode) { -+ /* Fix up the timestamps */ -+ *ts_start_ns = hvcli->ts_start_ns; -+ hvcli->ts_start_ns = *ts_end_ns; ++ kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx); + -+ /* Store the most recent dump time for rate limiting */ -+ hvirt->ts_last_dump_ns = *ts_end_ns; -+ } -+ } else { -+ /* Otherwise, do the full virtualize */ -+ errcode = kbasep_hwcnt_virtualizer_client_dump_rate_limited( -+ hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf); -+ } ++ errcode = kbasep_hwcnt_backend_jm_dump_alloc(info, backend->kctx, &backend->gpu_dump_va); ++ if (errcode) ++ goto error; + -+ mutex_unlock(&hvirt->lock); ++ backend->cpu_dump_va = ++ kbase_phy_alloc_mapping_get(backend->kctx, backend->gpu_dump_va, &backend->vmap); ++ if (!backend->cpu_dump_va || !backend->vmap) ++ goto alloc_error; + ++ backend->to_user_buf = kzalloc(info->metadata->dump_buf_bytes, GFP_KERNEL); ++ if (!backend->to_user_buf) ++ goto alloc_error; ++ ++ kbase_ccswe_init(&backend->ccswe_shader_cores); ++ backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change; ++ ++ *out_backend = backend; ++ return 0; ++ ++alloc_error: ++ errcode = -ENOMEM; ++error: ++ kbasep_hwcnt_backend_jm_destroy(backend); + return errcode; +} + -+int kbase_hwcnt_virtualizer_client_create(struct kbase_hwcnt_virtualizer *hvirt, -+ const struct kbase_hwcnt_enable_map *enable_map, -+ struct kbase_hwcnt_virtualizer_client **out_hvcli) ++/* JM backend implementation of kbase_hwcnt_backend_metadata_fn */ ++static const struct kbase_hwcnt_metadata * ++kbasep_hwcnt_backend_jm_metadata(const struct kbase_hwcnt_backend_info *info) ++{ ++ if (!info) ++ return NULL; ++ ++ return ((const struct kbase_hwcnt_backend_jm_info *)info)->metadata; ++} ++ ++/* JM backend implementation of kbase_hwcnt_backend_init_fn */ ++static int kbasep_hwcnt_backend_jm_init(const struct kbase_hwcnt_backend_info *info, ++ struct kbase_hwcnt_backend **out_backend) +{ + int errcode; -+ struct kbase_hwcnt_virtualizer_client *hvcli; ++ struct kbase_hwcnt_backend_jm *backend = NULL; + -+ if (!hvirt || !enable_map || !out_hvcli || (enable_map->metadata != hvirt->metadata)) ++ if (!info || !out_backend) + return -EINVAL; + -+ errcode = kbasep_hwcnt_virtualizer_client_alloc(hvirt->metadata, &hvcli); ++ errcode = kbasep_hwcnt_backend_jm_create((const struct kbase_hwcnt_backend_jm_info *)info, ++ &backend); + if (errcode) + return errcode; + -+ mutex_lock(&hvirt->lock); -+ -+ errcode = kbasep_hwcnt_virtualizer_client_add(hvirt, hvcli, enable_map); -+ -+ mutex_unlock(&hvirt->lock); -+ -+ if (errcode) { -+ kbasep_hwcnt_virtualizer_client_free(hvcli); -+ return errcode; -+ } ++ *out_backend = (struct kbase_hwcnt_backend *)backend; + -+ *out_hvcli = hvcli; + return 0; +} + -+void kbase_hwcnt_virtualizer_client_destroy(struct kbase_hwcnt_virtualizer_client *hvcli) ++/* JM backend implementation of kbase_hwcnt_backend_term_fn */ ++static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend) +{ -+ if (!hvcli) ++ if (!backend) + return; + -+ mutex_lock(&hvcli->hvirt->lock); -+ -+ kbasep_hwcnt_virtualizer_client_remove(hvcli->hvirt, hvcli); ++ kbasep_hwcnt_backend_jm_dump_disable(backend); ++ kbasep_hwcnt_backend_jm_destroy((struct kbase_hwcnt_backend_jm *)backend); ++} + -+ mutex_unlock(&hvcli->hvirt->lock); ++/** ++ * kbasep_hwcnt_backend_jm_info_destroy() - Destroy a JM backend info. ++ * @info: Pointer to info to destroy. ++ * ++ * Can be safely called on a backend info in any state of partial construction. ++ */ ++static void kbasep_hwcnt_backend_jm_info_destroy(const struct kbase_hwcnt_backend_jm_info *info) ++{ ++ if (!info) ++ return; + -+ kbasep_hwcnt_virtualizer_client_free(hvcli); ++ kbase_hwcnt_jm_metadata_destroy(info->metadata); ++ kfree(info); +} + -+int kbase_hwcnt_virtualizer_init(struct kbase_hwcnt_context *hctx, u64 dump_threshold_ns, -+ struct kbase_hwcnt_virtualizer **out_hvirt) ++/** ++ * kbasep_hwcnt_backend_jm_info_create() - Create a JM backend info. ++ * @kbdev: Non_NULL pointer to kbase device. ++ * @out_info: Non-NULL pointer to where info is stored on success. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static int kbasep_hwcnt_backend_jm_info_create(struct kbase_device *kbdev, ++ const struct kbase_hwcnt_backend_jm_info **out_info) +{ -+ struct kbase_hwcnt_virtualizer *virt; -+ const struct kbase_hwcnt_metadata *metadata; ++ int errcode = -ENOMEM; ++ struct kbase_hwcnt_backend_jm_info *info = NULL; + -+ if (!hctx || !out_hvirt) -+ return -EINVAL; ++ WARN_ON(!kbdev); ++ WARN_ON(!out_info); + -+ metadata = kbase_hwcnt_context_metadata(hctx); -+ if (!metadata) -+ return -EINVAL; ++ info = kzalloc(sizeof(*info), GFP_KERNEL); ++ if (!info) ++ return errcode; + -+ virt = kzalloc(sizeof(*virt), GFP_KERNEL); -+ if (!virt) -+ return -ENOMEM; ++ info->kbdev = kbdev; + -+ virt->hctx = hctx; -+ virt->dump_threshold_ns = dump_threshold_ns; -+ virt->metadata = metadata; ++#ifdef CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY ++ info->counter_set = KBASE_HWCNT_SET_SECONDARY; ++#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) ++ info->counter_set = KBASE_HWCNT_SET_TERTIARY; ++#else ++ /* Default to primary */ ++ info->counter_set = KBASE_HWCNT_SET_PRIMARY; ++#endif + -+ mutex_init(&virt->lock); -+ INIT_LIST_HEAD(&virt->clients); ++ errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev, &info->hwcnt_gpu_info); ++ if (errcode) ++ goto error; ++ ++ errcode = kbase_hwcnt_jm_metadata_create(&info->hwcnt_gpu_info, info->counter_set, ++ &info->metadata, &info->dump_bytes); ++ if (errcode) ++ goto error; ++ ++ *out_info = info; + -+ *out_hvirt = virt; + return 0; ++error: ++ kbasep_hwcnt_backend_jm_info_destroy(info); ++ return errcode; +} + -+void kbase_hwcnt_virtualizer_term(struct kbase_hwcnt_virtualizer *hvirt) ++int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev, ++ struct kbase_hwcnt_backend_interface *iface) +{ -+ if (!hvirt) -+ return; ++ int errcode; ++ const struct kbase_hwcnt_backend_jm_info *info = NULL; + -+ /* Non-zero client count implies client leak */ -+ if (WARN_ON(hvirt->client_count != 0)) { -+ struct kbase_hwcnt_virtualizer_client *pos, *n; ++ if (!kbdev || !iface) ++ return -EINVAL; + -+ list_for_each_entry_safe (pos, n, &hvirt->clients, node) -+ kbase_hwcnt_virtualizer_client_destroy(pos); -+ } ++ errcode = kbasep_hwcnt_backend_jm_info_create(kbdev, &info); + -+ WARN_ON(hvirt->client_count != 0); -+ WARN_ON(hvirt->accum); ++ if (errcode) ++ return errcode; + -+ kfree(hvirt); ++ iface->info = (struct kbase_hwcnt_backend_info *)info; ++ iface->metadata = kbasep_hwcnt_backend_jm_metadata; ++ iface->init = kbasep_hwcnt_backend_jm_init; ++ iface->term = kbasep_hwcnt_backend_jm_term; ++ iface->timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns; ++ iface->dump_enable = kbasep_hwcnt_backend_jm_dump_enable; ++ iface->dump_enable_nolock = kbasep_hwcnt_backend_jm_dump_enable_nolock; ++ iface->dump_disable = kbasep_hwcnt_backend_jm_dump_disable; ++ iface->dump_clear = kbasep_hwcnt_backend_jm_dump_clear; ++ iface->dump_request = kbasep_hwcnt_backend_jm_dump_request; ++ iface->dump_wait = kbasep_hwcnt_backend_jm_dump_wait; ++ iface->dump_get = kbasep_hwcnt_backend_jm_dump_get; ++ ++ return 0; +} + -+bool kbase_hwcnt_virtualizer_queue_work(struct kbase_hwcnt_virtualizer *hvirt, -+ struct work_struct *work) ++void kbase_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_interface *iface) +{ -+ if (WARN_ON(!hvirt) || WARN_ON(!work)) -+ return false; ++ if (!iface) ++ return; + -+ return kbase_hwcnt_context_queue_work(hvirt->hctx, work); ++ kbasep_hwcnt_backend_jm_info_destroy( ++ (const struct kbase_hwcnt_backend_jm_info *)iface->info); ++ memset(iface, 0, sizeof(*iface)); +} -diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.h +diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h new file mode 100644 -index 000000000..485ba7496 +index 000000000..4a6293c25 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.h -@@ -0,0 +1,151 @@ ++++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h +@@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * @@ -193174,142 +195962,49 @@ index 000000000..485ba7496 + */ + +/* -+ * Hardware counter virtualizer API. -+ * -+ * Virtualizes a hardware counter context, so multiple clients can access -+ * a single hardware counter resource as though each was the exclusive user. ++ * Concrete implementation of mali_kbase_hwcnt_backend interface for JM ++ * backend. + */ + -+#ifndef _KBASE_HWCNT_VIRTUALIZER_H_ -+#define _KBASE_HWCNT_VIRTUALIZER_H_ -+ -+#include -+#include -+ -+struct kbase_hwcnt_context; -+struct kbase_hwcnt_virtualizer; -+struct kbase_hwcnt_virtualizer_client; -+struct kbase_hwcnt_enable_map; -+struct kbase_hwcnt_dump_buffer; ++#ifndef _KBASE_HWCNT_BACKEND_JM_H_ ++#define _KBASE_HWCNT_BACKEND_JM_H_ + -+/** -+ * kbase_hwcnt_virtualizer_init - Initialise a hardware counter virtualizer. -+ * @hctx: Non-NULL pointer to the hardware counter context to -+ * virtualize. -+ * @dump_threshold_ns: Minimum threshold period for dumps between different -+ * clients where a new accumulator dump will not be -+ * performed, and instead accumulated values will be used. -+ * If 0, rate limiting will be disabled. -+ * @out_hvirt: Non-NULL pointer to where the pointer to the created -+ * virtualizer will be stored on success. -+ * -+ * Return: 0 on success, else error code. -+ */ -+int kbase_hwcnt_virtualizer_init(struct kbase_hwcnt_context *hctx, u64 dump_threshold_ns, -+ struct kbase_hwcnt_virtualizer **out_hvirt); ++#include "hwcnt/backend/mali_kbase_hwcnt_backend.h" + -+/** -+ * kbase_hwcnt_virtualizer_term - Terminate a hardware counter virtualizer. -+ * @hvirt: Pointer to virtualizer to be terminated. -+ */ -+void kbase_hwcnt_virtualizer_term(struct kbase_hwcnt_virtualizer *hvirt); ++struct kbase_device; + +/** -+ * kbase_hwcnt_virtualizer_metadata - Get the hardware counter metadata used by -+ * the virtualizer, so related counter data -+ * structures can be created. -+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. ++ * kbase_hwcnt_backend_jm_create() - Create a JM hardware counter backend ++ * interface. ++ * @kbdev: Non-NULL pointer to kbase device. ++ * @iface: Non-NULL pointer to backend interface structure that is filled in ++ * on creation success. + * -+ * Return: Non-NULL pointer to metadata, or NULL on error. -+ */ -+const struct kbase_hwcnt_metadata * -+kbase_hwcnt_virtualizer_metadata(struct kbase_hwcnt_virtualizer *hvirt); -+ -+/** -+ * kbase_hwcnt_virtualizer_client_create - Create a new virtualizer client. -+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. -+ * @enable_map: Non-NULL pointer to the enable map for the client. Must have the -+ * same metadata as the virtualizer. -+ * @out_hvcli: Non-NULL pointer to where the pointer to the created client will -+ * be stored on success. ++ * Calls to iface->dump_enable_nolock() require kbdev->hwaccess_lock held. + * + * Return: 0 on success, else error code. + */ -+int kbase_hwcnt_virtualizer_client_create(struct kbase_hwcnt_virtualizer *hvirt, -+ const struct kbase_hwcnt_enable_map *enable_map, -+ struct kbase_hwcnt_virtualizer_client **out_hvcli); -+ -+/** -+ * kbase_hwcnt_virtualizer_client_destroy() - Destroy a virtualizer client. -+ * @hvcli: Pointer to the hardware counter client. -+ */ -+void kbase_hwcnt_virtualizer_client_destroy(struct kbase_hwcnt_virtualizer_client *hvcli); -+ -+/** -+ * kbase_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's -+ * currently enabled counters, and -+ * enable a new set of counters -+ * that will be used for -+ * subsequent dumps. -+ * @hvcli: Non-NULL pointer to the virtualizer client. -+ * @enable_map: Non-NULL pointer to the new counter enable map for the client. -+ * Must have the same metadata as the virtualizer. -+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will -+ * be written out to on success. -+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will -+ * be written out to on success. -+ * @dump_buf: Pointer to the buffer where the dump will be written out to on -+ * success. If non-NULL, must have the same metadata as the -+ * accumulator. If NULL, the dump will be discarded. -+ * -+ * Return: 0 on success or error code. -+ */ -+int kbase_hwcnt_virtualizer_client_set_counters(struct kbase_hwcnt_virtualizer_client *hvcli, -+ const struct kbase_hwcnt_enable_map *enable_map, -+ u64 *ts_start_ns, u64 *ts_end_ns, -+ struct kbase_hwcnt_dump_buffer *dump_buf); -+ -+/** -+ * kbase_hwcnt_virtualizer_client_dump - Perform a dump of the client's -+ * currently enabled counters. -+ * @hvcli: Non-NULL pointer to the virtualizer client. -+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will -+ * be written out to on success. -+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will -+ * be written out to on success. -+ * @dump_buf: Pointer to the buffer where the dump will be written out to on -+ * success. If non-NULL, must have the same metadata as the -+ * accumulator. If NULL, the dump will be discarded. -+ * -+ * Return: 0 on success or error code. -+ */ -+int kbase_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer_client *hvcli, -+ u64 *ts_start_ns, u64 *ts_end_ns, -+ struct kbase_hwcnt_dump_buffer *dump_buf); ++int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev, ++ struct kbase_hwcnt_backend_interface *iface); + +/** -+ * kbase_hwcnt_virtualizer_queue_work() - Queue hardware counter related async -+ * work on a workqueue specialized for -+ * hardware counters. -+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. -+ * @work: Non-NULL pointer to work to queue. -+ * -+ * Return: false if work was already on a queue, true otherwise. ++ * kbase_hwcnt_backend_jm_destroy() - Destroy a JM hardware counter backend ++ * interface. ++ * @iface: Pointer to interface to destroy. + * -+ * This is a convenience function that directly calls the underlying -+ * kbase_hwcnt_context's kbase_hwcnt_context_queue_work. ++ * Can be safely called on an all-zeroed interface, or on an already destroyed ++ * interface. + */ -+bool kbase_hwcnt_virtualizer_queue_work(struct kbase_hwcnt_virtualizer *hvirt, -+ struct work_struct *work); ++void kbase_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_interface *iface); + -+#endif /* _KBASE_HWCNT_VIRTUALIZER_H_ */ -diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if.h ++#endif /* _KBASE_HWCNT_BACKEND_JM_H_ */ +diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c new file mode 100644 -index 000000000..501c0087b +index 000000000..564700b2d --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if.h -@@ -0,0 +1,89 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c +@@ -0,0 +1,829 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. @@ -193330,243 +196025,820 @@ index 000000000..501c0087b + * + */ + -+/* -+ * Virtual interface for hardware counter watchdog. -+ */ ++#include + -+#ifndef _KBASE_HWCNT_WATCHDOG_IF_H_ -+#define _KBASE_HWCNT_WATCHDOG_IF_H_ ++#include ++#include + -+#include ++#include ++#include ++#include + -+/* -+ * Opaque structure of information used to create a watchdog timer interface. -+ */ -+struct kbase_hwcnt_watchdog_info; ++#if IS_ENABLED(CONFIG_MALI_IS_FPGA) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++/* Backend watch dog timer interval in milliseconds: 18 seconds. */ ++static const u32 hwcnt_backend_watchdog_timer_interval_ms = 18000; ++#else ++/* Backend watch dog timer interval in milliseconds: 1 second. */ ++static const u32 hwcnt_backend_watchdog_timer_interval_ms = 1000; ++#endif /* IS_FPGA && !NO_MALI */ + -+/** -+ * typedef kbase_hwcnt_watchdog_callback_fn - Callback function when watchdog timer is done -+ * -+ * @user_data: Pointer to the callback user data. ++/* ++ * IDLE_BUFFER_EMPTY -> USER_DUMPING_BUFFER_EMPTY on dump_request. ++ * IDLE_BUFFER_EMPTY -> TIMER_DUMPING after ++ * hwcnt_backend_watchdog_timer_interval_ms ++ * milliseconds, if no dump_request has been ++ * called in the meantime. ++ * IDLE_BUFFER_FULL -> USER_DUMPING_BUFFER_FULL on dump_request. ++ * IDLE_BUFFER_FULL -> TIMER_DUMPING after ++ * hwcnt_backend_watchdog_timer_interval_ms ++ * milliseconds, if no dump_request has been ++ * called in the meantime. ++ * IDLE_BUFFER_FULL -> IDLE_BUFFER_EMPTY on dump_disable, upon discarding undumped ++ * counter values since the last dump_get. ++ * IDLE_BUFFER_EMPTY -> BUFFER_CLEARING on dump_clear, before calling job manager ++ * backend dump_clear. ++ * IDLE_BUFFER_FULL -> BUFFER_CLEARING on dump_clear, before calling job manager ++ * backend dump_clear. ++ * USER_DUMPING_BUFFER_EMPTY -> BUFFER_CLEARING on dump_clear, before calling job manager ++ * backend dump_clear. ++ * USER_DUMPING_BUFFER_FULL -> BUFFER_CLEARING on dump_clear, before calling job manager ++ * backend dump_clear. ++ * BUFFER_CLEARING -> IDLE_BUFFER_EMPTY on dump_clear, upon job manager backend ++ * dump_clear completion. ++ * TIMER_DUMPING -> IDLE_BUFFER_FULL on timer's callback completion. ++ * TIMER_DUMPING -> TIMER_DUMPING_USER_CLEAR on dump_clear, notifies the callback thread ++ * that there is no need for dumping the buffer ++ * anymore, and that the client will proceed ++ * clearing the buffer. ++ * TIMER_DUMPING_USER_CLEAR -> IDLE_BUFFER_EMPTY on timer's callback completion, when a user ++ * requested a dump_clear. ++ * TIMER_DUMPING -> TIMER_DUMPING_USER_REQUESTED on dump_request, when a client performs a ++ * dump request while the timer is dumping (the ++ * timer will perform the dump and (once ++ * completed) the client will retrieve the value ++ * from the buffer). ++ * TIMER_DUMPING_USER_REQUESTED -> IDLE_BUFFER_EMPTY on dump_get, when a timer completed and the ++ * user reads the periodic dump buffer. ++ * Any -> ERROR if the job manager backend returns an error ++ * (of any kind). ++ * USER_DUMPING_BUFFER_EMPTY -> IDLE_BUFFER_EMPTY on dump_get (performs get, ignores the ++ * periodic dump buffer and returns). ++ * USER_DUMPING_BUFFER_FULL -> IDLE_BUFFER_EMPTY on dump_get (performs get, accumulates with ++ * periodic dump buffer and returns). + */ -+typedef void kbase_hwcnt_watchdog_callback_fn(void *user_data); + -+/** -+ * typedef kbase_hwcnt_watchdog_enable_fn - Enable watchdog timer -+ * -+ * @timer: Non-NULL pointer to a watchdog timer interface context -+ * @period_ms: Period in milliseconds of the watchdog timer -+ * @callback: Non-NULL pointer to a watchdog callback function -+ * @user_data: Pointer to the user data, used when watchdog timer callback is called ++/** enum backend_watchdog_state State used to synchronize timer callbacks with the main thread. ++ * @HWCNT_JM_WD_ERROR: Received an error from the job manager backend calls. ++ * @HWCNT_JM_WD_IDLE_BUFFER_EMPTY: Initial state. Watchdog timer enabled, periodic dump buffer is ++ * empty. ++ * @HWCNT_JM_WD_IDLE_BUFFER_FULL: Watchdog timer enabled, periodic dump buffer is full. ++ * @HWCNT_JM_WD_BUFFER_CLEARING: The client is performing a dump clear. A concurrent timer callback ++ * thread should just ignore and reschedule another callback in ++ * hwcnt_backend_watchdog_timer_interval_ms milliseconds. ++ * @HWCNT_JM_WD_TIMER_DUMPING: The timer ran out. The callback is performing a periodic dump. ++ * @HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED: While the timer is performing a periodic dump, user ++ * requested a dump. ++ * @HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR: While the timer is performing a dump, user requested a ++ * dump_clear. The timer has to complete the periodic dump ++ * and clear buffer (internal and job manager backend). ++ * @HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY: From IDLE state, user requested a dump. The periodic ++ * dump buffer is empty. ++ * @HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL: From IDLE state, user requested a dump. The periodic dump ++ * buffer is full. + * -+ * Return: 0 if the watchdog timer enabled successfully, error code otherwise. ++ * While the state machine is in HWCNT_JM_WD_TIMER_DUMPING*, only the timer callback thread is ++ * allowed to call the job manager backend layer. + */ -+typedef int kbase_hwcnt_watchdog_enable_fn(const struct kbase_hwcnt_watchdog_info *timer, -+ u32 period_ms, -+ kbase_hwcnt_watchdog_callback_fn *callback, -+ void *user_data); ++enum backend_watchdog_state { ++ HWCNT_JM_WD_ERROR, ++ HWCNT_JM_WD_IDLE_BUFFER_EMPTY, ++ HWCNT_JM_WD_IDLE_BUFFER_FULL, ++ HWCNT_JM_WD_BUFFER_CLEARING, ++ HWCNT_JM_WD_TIMER_DUMPING, ++ HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED, ++ HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR, ++ HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY, ++ HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL, ++}; + -+/** -+ * typedef kbase_hwcnt_watchdog_disable_fn - Disable watchdog timer -+ * -+ * @timer: Non-NULL pointer to a watchdog timer interface context ++/** enum wd_init_state - State machine for initialization / termination of the backend resources + */ -+typedef void kbase_hwcnt_watchdog_disable_fn(const struct kbase_hwcnt_watchdog_info *timer); ++enum wd_init_state { ++ HWCNT_JM_WD_INIT_START, ++ HWCNT_JM_WD_INIT_BACKEND = HWCNT_JM_WD_INIT_START, ++ HWCNT_JM_WD_INIT_ENABLE_MAP, ++ HWCNT_JM_WD_INIT_DUMP_BUFFER, ++ HWCNT_JM_WD_INIT_END ++}; + +/** -+ * typedef kbase_hwcnt_watchdog_modify_fn - Modify watchdog timer's timeout -+ * -+ * @timer: Non-NULL pointer to a watchdog timer interface context -+ * @delay_ms: Watchdog timer expiration in milliseconds ++ * struct kbase_hwcnt_backend_jm_watchdog_info - Immutable information used to initialize an ++ * instance of the job manager watchdog backend. ++ * @jm_backend_iface: Hardware counter backend interface. This module extends ++ * this interface with a watchdog that performs regular ++ * dumps. The new interface this module provides complies ++ * with the old backend interface. ++ * @dump_watchdog_iface: Dump watchdog interface, used to periodically dump the ++ * hardware counter in case no reads are requested within ++ * a certain time, used to avoid hardware counter's buffer ++ * saturation. + */ -+typedef void kbase_hwcnt_watchdog_modify_fn(const struct kbase_hwcnt_watchdog_info *timer, -+ u32 delay_ms); ++struct kbase_hwcnt_backend_jm_watchdog_info { ++ struct kbase_hwcnt_backend_interface *jm_backend_iface; ++ struct kbase_hwcnt_watchdog_interface *dump_watchdog_iface; ++}; + +/** -+ * struct kbase_hwcnt_watchdog_interface - Hardware counter watchdog virtual interface. -+ * -+ * @timer: Immutable watchdog timer info -+ * @enable: Function ptr to enable watchdog -+ * @disable: Function ptr to disable watchdog -+ * @modify: Function ptr to modify watchdog ++ * struct kbase_hwcnt_backend_jm_watchdog - An instance of the job manager watchdog backend. ++ * @info: Immutable information used to create the job manager watchdog backend. ++ * @jm_backend: Job manager's backend internal state. To be passed as argument during parent calls. ++ * @timeout_ms: Time period in milliseconds for hardware counters dumping. ++ * @wd_dump_buffer: Used to store periodic dumps done by a timer callback function. Contents are ++ * valid in state %HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED, ++ * %HWCNT_JM_WD_IDLE_BUFFER_FULL or %HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL. ++ * @wd_enable_map: Watchdog backend internal buffer mask, initialized during dump_enable copying ++ * the enable_map passed as argument. ++ * @wd_dump_timestamp: Holds the dumping timestamp for potential future client dump_request, filled ++ * during watchdog timer dumps. ++ * @watchdog_complete: Used for synchronization between watchdog dumper thread and client calls. ++ * @locked: Members protected from concurrent access by different threads. ++ * @locked.watchdog_lock: Lock used to access fields within this struct (that require mutual ++ * exclusion). ++ * @locked.is_enabled: If true then the wrapped job manager hardware counter backend and the ++ * watchdog timer are both enabled. If false then both are disabled (or soon ++ * will be). Races between enable and disable have undefined behavior. ++ * @locked.state: State used to synchronize timer callbacks with the main thread. + */ -+struct kbase_hwcnt_watchdog_interface { -+ const struct kbase_hwcnt_watchdog_info *timer; -+ kbase_hwcnt_watchdog_enable_fn *enable; -+ kbase_hwcnt_watchdog_disable_fn *disable; -+ kbase_hwcnt_watchdog_modify_fn *modify; ++struct kbase_hwcnt_backend_jm_watchdog { ++ const struct kbase_hwcnt_backend_jm_watchdog_info *info; ++ struct kbase_hwcnt_backend *jm_backend; ++ u32 timeout_ms; ++ struct kbase_hwcnt_dump_buffer wd_dump_buffer; ++ struct kbase_hwcnt_enable_map wd_enable_map; ++ u64 wd_dump_timestamp; ++ struct completion watchdog_complete; ++ struct { ++ spinlock_t watchdog_lock; ++ bool is_enabled; ++ enum backend_watchdog_state state; ++ } locked; +}; + -+#endif /* _KBASE_HWCNT_WATCHDOG_IF_H_ */ -diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c -new file mode 100644 -index 000000000..4caa832cd ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c -@@ -0,0 +1,157 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++/* timer's callback function */ ++static void kbasep_hwcnt_backend_jm_watchdog_timer_callback(void *backend) ++{ ++ struct kbase_hwcnt_backend_jm_watchdog *wd_backend = backend; ++ unsigned long flags; ++ bool wd_accumulate; + -+#include "mali_kbase.h" -+#include "hwcnt/mali_kbase_hwcnt_watchdog_if.h" -+#include "hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h" ++ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); + -+#include -+#include ++ if (!wd_backend->locked.is_enabled || wd_backend->locked.state == HWCNT_JM_WD_ERROR) { ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ return; ++ } + -+/** -+ * struct kbase_hwcnt_watchdog_if_timer_info - Timer information for watchdog -+ * interface. -+ * -+ * @workq: Single threaded work queue in which to execute callbacks. -+ * @dwork: Worker to execute callback function. -+ * @timer_enabled: True if watchdog timer enabled, otherwise false -+ * @callback: Watchdog callback function -+ * @user_data: Pointer to user data passed as argument to the callback -+ * function -+ */ -+struct kbase_hwcnt_watchdog_if_timer_info { -+ struct workqueue_struct *workq; -+ struct delayed_work dwork; -+ bool timer_enabled; -+ kbase_hwcnt_watchdog_callback_fn *callback; -+ void *user_data; -+}; ++ if (!(wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_EMPTY || ++ wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_FULL)) { ++ /*resetting the timer. Calling modify on a disabled timer enables it.*/ ++ wd_backend->info->dump_watchdog_iface->modify( ++ wd_backend->info->dump_watchdog_iface->timer, wd_backend->timeout_ms); ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ return; ++ } ++ /*start performing the dump*/ + -+/** -+ * kbasep_hwcnt_watchdog_callback() - Watchdog callback -+ * -+ * @work: Work structure -+ * -+ * Function to be called in a work queue after watchdog timer has expired. ++ /* if there has been a previous timeout use accumulating dump_get() ++ * otherwise use non-accumulating to overwrite buffer ++ */ ++ wd_accumulate = (wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_FULL); ++ ++ wd_backend->locked.state = HWCNT_JM_WD_TIMER_DUMPING; ++ ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ ++ if (wd_backend->info->jm_backend_iface->dump_request(wd_backend->jm_backend, ++ &wd_backend->wd_dump_timestamp) || ++ wd_backend->info->jm_backend_iface->dump_wait(wd_backend->jm_backend) || ++ wd_backend->info->jm_backend_iface->dump_get( ++ wd_backend->jm_backend, &wd_backend->wd_dump_buffer, &wd_backend->wd_enable_map, ++ wd_accumulate)) { ++ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); ++ WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING && ++ wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR && ++ wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED); ++ wd_backend->locked.state = HWCNT_JM_WD_ERROR; ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ /* Unblock user if it's waiting. */ ++ complete_all(&wd_backend->watchdog_complete); ++ return; ++ } ++ ++ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); ++ WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING && ++ wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR && ++ wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED); ++ ++ if (wd_backend->locked.state == HWCNT_JM_WD_TIMER_DUMPING) { ++ /* If there is no user request/clear, transit to HWCNT_JM_WD_IDLE_BUFFER_FULL ++ * to indicate timer dump is done and the buffer is full. If state changed to ++ * HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED or ++ * HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR then user will transit the state ++ * machine to next state. ++ */ ++ wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_FULL; ++ } ++ if (wd_backend->locked.state != HWCNT_JM_WD_ERROR && wd_backend->locked.is_enabled) { ++ /* reset the timer to schedule another callback. Calling modify on a ++ * disabled timer enables it. ++ */ ++ /*The spin lock needs to be held in case the client calls dump_enable*/ ++ wd_backend->info->dump_watchdog_iface->modify( ++ wd_backend->info->dump_watchdog_iface->timer, wd_backend->timeout_ms); ++ } ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ ++ /* Unblock user if it's waiting. */ ++ complete_all(&wd_backend->watchdog_complete); ++} ++ ++/* helper methods, info structure creation and destruction*/ ++ ++static struct kbase_hwcnt_backend_jm_watchdog_info * ++kbasep_hwcnt_backend_jm_watchdog_info_create(struct kbase_hwcnt_backend_interface *backend_iface, ++ struct kbase_hwcnt_watchdog_interface *watchdog_iface) ++{ ++ struct kbase_hwcnt_backend_jm_watchdog_info *const info = ++ kmalloc(sizeof(*info), GFP_KERNEL); ++ ++ if (!info) ++ return NULL; ++ ++ *info = (struct kbase_hwcnt_backend_jm_watchdog_info){ .jm_backend_iface = backend_iface, ++ .dump_watchdog_iface = ++ watchdog_iface }; ++ ++ return info; ++} ++ ++/****** kbase_hwcnt_backend_interface implementation *******/ ++ ++/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_metadata_fn */ ++static const struct kbase_hwcnt_metadata * ++kbasep_hwcnt_backend_jm_watchdog_metadata(const struct kbase_hwcnt_backend_info *info) ++{ ++ const struct kbase_hwcnt_backend_jm_watchdog_info *wd_info = (void *)info; ++ ++ if (WARN_ON(!info)) ++ return NULL; ++ ++ return wd_info->jm_backend_iface->metadata(wd_info->jm_backend_iface->info); ++} ++ ++static void ++kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watchdog *wd_backend, ++ enum wd_init_state state) ++{ ++ if (!wd_backend) ++ return; ++ ++ WARN_ON(state > HWCNT_JM_WD_INIT_END); ++ ++ while (state-- > HWCNT_JM_WD_INIT_START) { ++ switch (state) { ++ case HWCNT_JM_WD_INIT_BACKEND: ++ wd_backend->info->jm_backend_iface->term(wd_backend->jm_backend); ++ break; ++ case HWCNT_JM_WD_INIT_ENABLE_MAP: ++ kbase_hwcnt_enable_map_free(&wd_backend->wd_enable_map); ++ break; ++ case HWCNT_JM_WD_INIT_DUMP_BUFFER: ++ kbase_hwcnt_dump_buffer_free(&wd_backend->wd_dump_buffer); ++ break; ++ case HWCNT_JM_WD_INIT_END: ++ break; ++ } ++ } ++ ++ kfree(wd_backend); ++} ++ ++/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_term_fn ++ * Calling term does *not* destroy the interface + */ -+static void kbasep_hwcnt_watchdog_callback(struct work_struct *const work) ++static void kbasep_hwcnt_backend_jm_watchdog_term(struct kbase_hwcnt_backend *backend) +{ -+ struct kbase_hwcnt_watchdog_if_timer_info *const info = -+ container_of(work, struct kbase_hwcnt_watchdog_if_timer_info, dwork.work); ++ struct kbase_hwcnt_backend_jm_watchdog *wd_backend = ++ (struct kbase_hwcnt_backend_jm_watchdog *)backend; + -+ if (info->callback) -+ info->callback(info->user_data); ++ if (!backend) ++ return; ++ ++ /* disable timer thread to avoid concurrent access to shared resources */ ++ wd_backend->info->dump_watchdog_iface->disable( ++ wd_backend->info->dump_watchdog_iface->timer); ++ ++ kbasep_hwcnt_backend_jm_watchdog_term_partial(wd_backend, HWCNT_JM_WD_INIT_END); +} + -+static int kbasep_hwcnt_watchdog_if_timer_enable( -+ const struct kbase_hwcnt_watchdog_info *const timer, u32 const period_ms, -+ kbase_hwcnt_watchdog_callback_fn *const callback, void *const user_data) ++/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_init_fn */ ++static int kbasep_hwcnt_backend_jm_watchdog_init(const struct kbase_hwcnt_backend_info *info, ++ struct kbase_hwcnt_backend **out_backend) +{ -+ struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer; ++ int errcode = 0; ++ struct kbase_hwcnt_backend_jm_watchdog *wd_backend = NULL; ++ struct kbase_hwcnt_backend_jm_watchdog_info *const wd_info = (void *)info; ++ const struct kbase_hwcnt_backend_info *jm_info; ++ const struct kbase_hwcnt_metadata *metadata; ++ enum wd_init_state state = HWCNT_JM_WD_INIT_START; + -+ if (WARN_ON(!timer) || WARN_ON(!callback) || WARN_ON(timer_info->timer_enabled)) ++ if (WARN_ON(!info) || WARN_ON(!out_backend)) + return -EINVAL; + -+ timer_info->callback = callback; -+ timer_info->user_data = user_data; ++ jm_info = wd_info->jm_backend_iface->info; ++ metadata = wd_info->jm_backend_iface->metadata(wd_info->jm_backend_iface->info); + -+ queue_delayed_work(timer_info->workq, &timer_info->dwork, msecs_to_jiffies(period_ms)); -+ timer_info->timer_enabled = true; ++ wd_backend = kmalloc(sizeof(*wd_backend), GFP_KERNEL); ++ if (!wd_backend) { ++ *out_backend = NULL; ++ return -ENOMEM; ++ } ++ ++ *wd_backend = (struct kbase_hwcnt_backend_jm_watchdog){ ++ .info = wd_info, ++ .timeout_ms = hwcnt_backend_watchdog_timer_interval_ms, ++ .locked = { .state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY, .is_enabled = false } ++ }; ++ ++ while (state < HWCNT_JM_WD_INIT_END && !errcode) { ++ switch (state) { ++ case HWCNT_JM_WD_INIT_BACKEND: ++ errcode = wd_info->jm_backend_iface->init(jm_info, &wd_backend->jm_backend); ++ break; ++ case HWCNT_JM_WD_INIT_ENABLE_MAP: ++ errcode = ++ kbase_hwcnt_enable_map_alloc(metadata, &wd_backend->wd_enable_map); ++ break; ++ case HWCNT_JM_WD_INIT_DUMP_BUFFER: ++ errcode = kbase_hwcnt_dump_buffer_alloc(metadata, ++ &wd_backend->wd_dump_buffer); ++ break; ++ case HWCNT_JM_WD_INIT_END: ++ break; ++ } ++ if (!errcode) ++ state++; ++ } ++ ++ if (errcode) { ++ kbasep_hwcnt_backend_jm_watchdog_term_partial(wd_backend, state); ++ *out_backend = NULL; ++ return errcode; ++ } ++ ++ WARN_ON(state != HWCNT_JM_WD_INIT_END); + ++ spin_lock_init(&wd_backend->locked.watchdog_lock); ++ init_completion(&wd_backend->watchdog_complete); ++ ++ *out_backend = (struct kbase_hwcnt_backend *)wd_backend; + return 0; +} + -+static void -+kbasep_hwcnt_watchdog_if_timer_disable(const struct kbase_hwcnt_watchdog_info *const timer) ++/* Job manager watchdog backend, implementation of timestamp_ns */ ++static u64 kbasep_hwcnt_backend_jm_watchdog_timestamp_ns(struct kbase_hwcnt_backend *backend) +{ -+ struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer; ++ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; + -+ if (WARN_ON(!timer)) ++ return wd_backend->info->jm_backend_iface->timestamp_ns(wd_backend->jm_backend); ++} ++ ++static int kbasep_hwcnt_backend_jm_watchdog_dump_enable_common( ++ struct kbase_hwcnt_backend_jm_watchdog *wd_backend, ++ const struct kbase_hwcnt_enable_map *enable_map, kbase_hwcnt_backend_dump_enable_fn enabler) ++{ ++ int errcode = -EPERM; ++ unsigned long flags; ++ ++ if (WARN_ON(!wd_backend) || WARN_ON(!enable_map)) ++ return -EINVAL; ++ ++ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); ++ ++ /* If the backend is already enabled return an error */ ++ if (wd_backend->locked.is_enabled) { ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ return -EPERM; ++ } ++ ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ ++ /*We copy the enable map into our watchdog backend copy, for future usage*/ ++ kbase_hwcnt_enable_map_copy(&wd_backend->wd_enable_map, enable_map); ++ ++ errcode = enabler(wd_backend->jm_backend, enable_map); ++ if (!errcode) { ++ /*Enable dump watchdog*/ ++ errcode = wd_backend->info->dump_watchdog_iface->enable( ++ wd_backend->info->dump_watchdog_iface->timer, wd_backend->timeout_ms, ++ kbasep_hwcnt_backend_jm_watchdog_timer_callback, wd_backend); ++ if (!errcode) { ++ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); ++ WARN_ON(wd_backend->locked.is_enabled); ++ wd_backend->locked.is_enabled = true; ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ } else ++ /*Reverting the job manager backend back to disabled*/ ++ wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend); ++ } ++ ++ return errcode; ++} ++ ++/* Job manager watchdog backend, implementation of dump_enable */ ++static int ++kbasep_hwcnt_backend_jm_watchdog_dump_enable(struct kbase_hwcnt_backend *backend, ++ const struct kbase_hwcnt_enable_map *enable_map) ++{ ++ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; ++ ++ return kbasep_hwcnt_backend_jm_watchdog_dump_enable_common( ++ wd_backend, enable_map, wd_backend->info->jm_backend_iface->dump_enable); ++} ++ ++/* Job manager watchdog backend, implementation of dump_enable_nolock */ ++static int ++kbasep_hwcnt_backend_jm_watchdog_dump_enable_nolock(struct kbase_hwcnt_backend *backend, ++ const struct kbase_hwcnt_enable_map *enable_map) ++{ ++ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; ++ ++ return kbasep_hwcnt_backend_jm_watchdog_dump_enable_common( ++ wd_backend, enable_map, wd_backend->info->jm_backend_iface->dump_enable_nolock); ++} ++ ++/* Job manager watchdog backend, implementation of dump_disable */ ++static void kbasep_hwcnt_backend_jm_watchdog_dump_disable(struct kbase_hwcnt_backend *backend) ++{ ++ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; ++ unsigned long flags; ++ ++ if (WARN_ON(!backend)) + return; + -+ if (!timer_info->timer_enabled) ++ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); ++ if (!wd_backend->locked.is_enabled) { ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + return; ++ } + -+ cancel_delayed_work_sync(&timer_info->dwork); -+ timer_info->timer_enabled = false; ++ wd_backend->locked.is_enabled = false; ++ ++ /* Discard undumped counter values since the last dump_get. */ ++ if (wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_FULL) ++ wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY; ++ ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ ++ wd_backend->info->dump_watchdog_iface->disable( ++ wd_backend->info->dump_watchdog_iface->timer); ++ ++ wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend); +} + -+static void -+kbasep_hwcnt_watchdog_if_timer_modify(const struct kbase_hwcnt_watchdog_info *const timer, -+ u32 const delay_ms) ++/* Job manager watchdog backend, implementation of dump_clear */ ++static int kbasep_hwcnt_backend_jm_watchdog_dump_clear(struct kbase_hwcnt_backend *backend) +{ -+ struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer; ++ int errcode = -EPERM; ++ bool clear_wd_wait_completion = false; ++ unsigned long flags; ++ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; + -+ if (WARN_ON(!timer) || WARN_ON(!timer_info->timer_enabled)) -+ return; ++ if (WARN_ON(!backend)) ++ return -EINVAL; + -+ mod_delayed_work(timer_info->workq, &timer_info->dwork, msecs_to_jiffies(delay_ms)); ++ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); ++ if (!wd_backend->locked.is_enabled) { ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ return -EPERM; ++ } ++ ++ switch (wd_backend->locked.state) { ++ case HWCNT_JM_WD_IDLE_BUFFER_FULL: ++ case HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL: ++ case HWCNT_JM_WD_IDLE_BUFFER_EMPTY: ++ case HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY: ++ wd_backend->locked.state = HWCNT_JM_WD_BUFFER_CLEARING; ++ errcode = 0; ++ break; ++ case HWCNT_JM_WD_TIMER_DUMPING: ++ /* The timer asked for a dump request, when complete, the job manager backend ++ * buffer will be zero ++ */ ++ clear_wd_wait_completion = true; ++ /* This thread will have to wait for the callback to terminate and then call a ++ * dump_clear on the job manager backend. We change the state to ++ * HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR to notify the callback thread there is ++ * no more need to dump the buffer (since we will clear it right after anyway). ++ * We set up a wait queue to synchronize with the callback. ++ */ ++ reinit_completion(&wd_backend->watchdog_complete); ++ wd_backend->locked.state = HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR; ++ errcode = 0; ++ break; ++ default: ++ errcode = -EPERM; ++ break; ++ } ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ ++ if (!errcode) { ++ if (clear_wd_wait_completion) { ++ /* Waiting for the callback to finish */ ++ wait_for_completion(&wd_backend->watchdog_complete); ++ } ++ ++ /* Clearing job manager backend buffer */ ++ errcode = wd_backend->info->jm_backend_iface->dump_clear(wd_backend->jm_backend); ++ ++ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); ++ ++ WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR && ++ wd_backend->locked.state != HWCNT_JM_WD_BUFFER_CLEARING && ++ wd_backend->locked.state != HWCNT_JM_WD_ERROR); ++ ++ WARN_ON(!wd_backend->locked.is_enabled); ++ ++ if (!errcode && wd_backend->locked.state != HWCNT_JM_WD_ERROR) { ++ /* Setting the internal buffer state to EMPTY */ ++ wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY; ++ /* Resetting the timer. Calling modify on a disabled timer ++ * enables it. ++ */ ++ wd_backend->info->dump_watchdog_iface->modify( ++ wd_backend->info->dump_watchdog_iface->timer, ++ wd_backend->timeout_ms); ++ } else { ++ wd_backend->locked.state = HWCNT_JM_WD_ERROR; ++ errcode = -EPERM; ++ } ++ ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ } ++ ++ return errcode; +} + -+void kbase_hwcnt_watchdog_if_timer_destroy(struct kbase_hwcnt_watchdog_interface *const watchdog_if) ++/* Job manager watchdog backend, implementation of dump_request */ ++static int kbasep_hwcnt_backend_jm_watchdog_dump_request(struct kbase_hwcnt_backend *backend, ++ u64 *dump_time_ns) +{ -+ struct kbase_hwcnt_watchdog_if_timer_info *timer_info; ++ bool call_dump_request = false; ++ int errcode = 0; ++ unsigned long flags; ++ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; + -+ if (WARN_ON(!watchdog_if)) -+ return; ++ if (WARN_ON(!backend) || WARN_ON(!dump_time_ns)) ++ return -EINVAL; + -+ timer_info = (void *)watchdog_if->timer; ++ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); + -+ if (WARN_ON(!timer_info)) -+ return; ++ if (!wd_backend->locked.is_enabled) { ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ return -EPERM; ++ } + -+ destroy_workqueue(timer_info->workq); -+ kfree(timer_info); ++ switch (wd_backend->locked.state) { ++ case HWCNT_JM_WD_IDLE_BUFFER_EMPTY: ++ /* progressing the state to avoid callbacks running while calling the job manager ++ * backend ++ */ ++ wd_backend->locked.state = HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY; ++ call_dump_request = true; ++ break; ++ case HWCNT_JM_WD_IDLE_BUFFER_FULL: ++ wd_backend->locked.state = HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL; ++ call_dump_request = true; ++ break; ++ case HWCNT_JM_WD_TIMER_DUMPING: ++ /* Retrieve timing information from previous dump_request */ ++ *dump_time_ns = wd_backend->wd_dump_timestamp; ++ /* On the next client call (dump_wait) the thread will have to wait for the ++ * callback to finish the dumping. ++ * We set up a wait queue to synchronize with the callback. ++ */ ++ reinit_completion(&wd_backend->watchdog_complete); ++ wd_backend->locked.state = HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED; ++ break; ++ default: ++ errcode = -EPERM; ++ break; ++ } ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + -+ *watchdog_if = (struct kbase_hwcnt_watchdog_interface){ -+ .timer = NULL, .enable = NULL, .disable = NULL, .modify = NULL -+ }; ++ if (call_dump_request) { ++ errcode = wd_backend->info->jm_backend_iface->dump_request(wd_backend->jm_backend, ++ dump_time_ns); ++ if (!errcode) { ++ /*resetting the timer. Calling modify on a disabled timer enables it*/ ++ wd_backend->info->dump_watchdog_iface->modify( ++ wd_backend->info->dump_watchdog_iface->timer, ++ wd_backend->timeout_ms); ++ } else { ++ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); ++ WARN_ON(!wd_backend->locked.is_enabled); ++ wd_backend->locked.state = HWCNT_JM_WD_ERROR; ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ } ++ } ++ ++ return errcode; +} + -+int kbase_hwcnt_watchdog_if_timer_create(struct kbase_hwcnt_watchdog_interface *const watchdog_if) ++/* Job manager watchdog backend, implementation of dump_wait */ ++static int kbasep_hwcnt_backend_jm_watchdog_dump_wait(struct kbase_hwcnt_backend *backend) +{ -+ struct kbase_hwcnt_watchdog_if_timer_info *timer_info; ++ int errcode = -EPERM; ++ bool wait_for_auto_dump = false, wait_for_user_dump = false; ++ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; ++ unsigned long flags; + -+ if (WARN_ON(!watchdog_if)) ++ if (WARN_ON(!backend)) + return -EINVAL; + -+ timer_info = kmalloc(sizeof(*timer_info), GFP_KERNEL); -+ if (!timer_info) -+ return -ENOMEM; ++ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); ++ if (!wd_backend->locked.is_enabled) { ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ return -EPERM; ++ } + -+ *timer_info = (struct kbase_hwcnt_watchdog_if_timer_info){ .timer_enabled = false }; ++ switch (wd_backend->locked.state) { ++ case HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED: ++ wait_for_auto_dump = true; ++ errcode = 0; ++ break; ++ case HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY: ++ case HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL: ++ wait_for_user_dump = true; ++ errcode = 0; ++ break; ++ default: ++ errcode = -EPERM; ++ break; ++ } ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + -+ INIT_DELAYED_WORK(&timer_info->dwork, kbasep_hwcnt_watchdog_callback); ++ if (wait_for_auto_dump) ++ wait_for_completion(&wd_backend->watchdog_complete); ++ else if (wait_for_user_dump) { ++ errcode = wd_backend->info->jm_backend_iface->dump_wait(wd_backend->jm_backend); ++ if (errcode) { ++ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); ++ WARN_ON(!wd_backend->locked.is_enabled); ++ wd_backend->locked.state = HWCNT_JM_WD_ERROR; ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ } ++ } + -+ *watchdog_if = (struct kbase_hwcnt_watchdog_interface){ -+ .timer = (void *)timer_info, -+ .enable = kbasep_hwcnt_watchdog_if_timer_enable, -+ .disable = kbasep_hwcnt_watchdog_if_timer_disable, -+ .modify = kbasep_hwcnt_watchdog_if_timer_modify, ++ return errcode; ++} ++ ++/* Job manager watchdog backend, implementation of dump_get */ ++static int kbasep_hwcnt_backend_jm_watchdog_dump_get( ++ struct kbase_hwcnt_backend *backend, struct kbase_hwcnt_dump_buffer *dump_buffer, ++ const struct kbase_hwcnt_enable_map *enable_map, bool accumulate) ++{ ++ bool call_dump_get = false; ++ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; ++ unsigned long flags; ++ int errcode = 0; ++ ++ if (WARN_ON(!backend) || WARN_ON(!dump_buffer) || WARN_ON(!enable_map)) ++ return -EINVAL; ++ ++ /* The resultant contents of the dump buffer are only well defined if a prior ++ * call to dump_wait returned successfully, and a new dump has not yet been ++ * requested by a call to dump_request. ++ */ ++ ++ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); ++ ++ switch (wd_backend->locked.state) { ++ case HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED: ++ /*we assume dump_wait has been called and completed successfully*/ ++ if (accumulate) ++ kbase_hwcnt_dump_buffer_accumulate(dump_buffer, &wd_backend->wd_dump_buffer, ++ enable_map); ++ else ++ kbase_hwcnt_dump_buffer_copy(dump_buffer, &wd_backend->wd_dump_buffer, ++ enable_map); ++ ++ /*use state to indicate the the buffer is now empty*/ ++ wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY; ++ break; ++ case HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL: ++ /*accumulate or copy watchdog data to user buffer first so that dump_get can set ++ * the header correctly ++ */ ++ if (accumulate) ++ kbase_hwcnt_dump_buffer_accumulate(dump_buffer, &wd_backend->wd_dump_buffer, ++ enable_map); ++ else ++ kbase_hwcnt_dump_buffer_copy(dump_buffer, &wd_backend->wd_dump_buffer, ++ enable_map); ++ ++ /*accumulate backend data into user buffer on top of watchdog data*/ ++ accumulate = true; ++ call_dump_get = true; ++ break; ++ case HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY: ++ call_dump_get = true; ++ break; ++ default: ++ errcode = -EPERM; ++ break; ++ } ++ ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ ++ if (call_dump_get && !errcode) { ++ /*we just dump the job manager backend into the user buffer, following ++ *accumulate flag ++ */ ++ errcode = wd_backend->info->jm_backend_iface->dump_get( ++ wd_backend->jm_backend, dump_buffer, enable_map, accumulate); ++ ++ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); ++ ++ WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY && ++ wd_backend->locked.state != HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL && ++ wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED); ++ ++ if (!errcode) ++ wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY; ++ else ++ wd_backend->locked.state = HWCNT_JM_WD_ERROR; ++ ++ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); ++ } ++ ++ return errcode; ++} ++ ++/* exposed methods */ ++ ++int kbase_hwcnt_backend_jm_watchdog_create(struct kbase_hwcnt_backend_interface *backend_iface, ++ struct kbase_hwcnt_watchdog_interface *watchdog_iface, ++ struct kbase_hwcnt_backend_interface *out_iface) ++{ ++ struct kbase_hwcnt_backend_jm_watchdog_info *info = NULL; ++ ++ if (WARN_ON(!backend_iface) || WARN_ON(!watchdog_iface) || WARN_ON(!out_iface)) ++ return -EINVAL; ++ ++ info = kbasep_hwcnt_backend_jm_watchdog_info_create(backend_iface, watchdog_iface); ++ if (!info) ++ return -ENOMEM; ++ ++ /*linking the info table with the output iface, to allow the callbacks below to access the ++ *info object later on ++ */ ++ *out_iface = (struct kbase_hwcnt_backend_interface){ ++ .info = (void *)info, ++ .metadata = kbasep_hwcnt_backend_jm_watchdog_metadata, ++ .init = kbasep_hwcnt_backend_jm_watchdog_init, ++ .term = kbasep_hwcnt_backend_jm_watchdog_term, ++ .timestamp_ns = kbasep_hwcnt_backend_jm_watchdog_timestamp_ns, ++ .dump_enable = kbasep_hwcnt_backend_jm_watchdog_dump_enable, ++ .dump_enable_nolock = kbasep_hwcnt_backend_jm_watchdog_dump_enable_nolock, ++ .dump_disable = kbasep_hwcnt_backend_jm_watchdog_dump_disable, ++ .dump_clear = kbasep_hwcnt_backend_jm_watchdog_dump_clear, ++ .dump_request = kbasep_hwcnt_backend_jm_watchdog_dump_request, ++ .dump_wait = kbasep_hwcnt_backend_jm_watchdog_dump_wait, ++ .dump_get = kbasep_hwcnt_backend_jm_watchdog_dump_get + }; + -+ timer_info->workq = alloc_workqueue("mali_hwc_watchdog_wq", WQ_HIGHPRI | WQ_UNBOUND, 1); -+ if (timer_info->workq) -+ return 0; ++ /*registering watchdog backend module methods on the output interface*/ + -+ kfree(timer_info); -+ return -ENOMEM; ++ return 0; +} -diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h ++ ++void kbase_hwcnt_backend_jm_watchdog_destroy(struct kbase_hwcnt_backend_interface *iface) ++{ ++ if (!iface || !iface->info) ++ return; ++ ++ kfree((struct kbase_hwcnt_backend_jm_watchdog_info *)iface->info); ++ ++ /*blanking the watchdog backend interface*/ ++ memset(iface, 0, sizeof(*iface)); ++} +diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h new file mode 100644 -index 000000000..a545ad3e3 +index 000000000..02a7952cc --- /dev/null -+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h -@@ -0,0 +1,48 @@ ++++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h +@@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * @@ -193589,83 +196861,59 @@ index 000000000..a545ad3e3 + */ + +/* -+ * Concrete implementation of kbase_hwcnt_watchdog_interface for HWC backend ++ * Concrete implementation of mali_kbase_hwcnt_backend interface for job manager ++ * backend. This module functionally interleaves between the hardware counter ++ * (hwcnt_accumulator) module (the interface consumer) and the job manager ++ * backend module (hwcnt_backend_jm). This module provides buffering ++ * functionality for the dumping requests requested by the hwcnt_accumulator ++ * consumer. This module is NOT multi-thread safe. The programmer must ++ * ensure the exposed methods are called by at most one thread at any time. + */ + -+#ifndef _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_ -+#define _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_ ++#ifndef _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_ ++#define _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_ + -+struct kbase_hwcnt_watchdog_interface; ++#include ++#include + +/** -+ * kbase_hwcnt_watchdog_if_timer_create() - Create a watchdog interface of hardware counter backend. ++ * kbase_hwcnt_backend_jm_watchdog_create() - Create a job manager hardware counter watchdog ++ * backend interface. ++ * @backend_iface: Non-NULL pointer to the backend interface structure that this module will ++ * extend. ++ * @watchdog_iface: Non-NULL pointer to an hardware counter watchdog interface. ++ * @out_iface: Non-NULL pointer to backend interface structure that is filled in ++ * on creation success. + * -+ * @watchdog_if: Non-NULL pointer to watchdog interface that is filled in on creation success ++ * Calls to out_iface->dump_enable_nolock() require kbdev->hwaccess_lock held. + * + * Return: 0 on success, error otherwise. + */ -+int kbase_hwcnt_watchdog_if_timer_create(struct kbase_hwcnt_watchdog_interface *watchdog_if); ++int kbase_hwcnt_backend_jm_watchdog_create(struct kbase_hwcnt_backend_interface *backend_iface, ++ struct kbase_hwcnt_watchdog_interface *watchdog_iface, ++ struct kbase_hwcnt_backend_interface *out_iface); + +/** -+ * kbase_hwcnt_watchdog_if_timer_destroy() - Destroy a watchdog interface of hardware counter -+ * backend. ++ * kbase_hwcnt_backend_jm_watchdog_destroy() - Destroy a job manager hardware counter watchdog ++ * backend interface. ++ * @iface: Pointer to interface to destroy. + * -+ * @watchdog_if: Pointer to watchdog interface to destroy ++ * Can be safely called on an all-zeroed interface, or on an already destroyed ++ * interface. + */ -+void kbase_hwcnt_watchdog_if_timer_destroy(struct kbase_hwcnt_watchdog_interface *watchdog_if); -+ -+#endif /* _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_ */ -diff --git a/drivers/gpu/arm/bifrost/ipa/Kbuild b/drivers/gpu/arm/bifrost/ipa/Kbuild -new file mode 100755 -index 000000000..0be664c47 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/ipa/Kbuild -@@ -0,0 +1,35 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2016-2018, 2020-2021 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# -+ -+bifrost_kbase-y += \ -+ ipa/mali_kbase_ipa_simple.o \ -+ ipa/mali_kbase_ipa.o -+ -+bifrost_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o ++void kbase_hwcnt_backend_jm_watchdog_destroy(struct kbase_hwcnt_backend_interface *iface); + -+ifeq ($(MALI_USE_CSF),1) -+ bifrost_kbase-y += \ -+ ipa/backend/mali_kbase_ipa_counter_csf.o \ -+ ipa/backend/mali_kbase_ipa_counter_common_csf.o -+else -+ bifrost_kbase-y += \ -+ ipa/backend/mali_kbase_ipa_counter_jm.o \ -+ ipa/backend/mali_kbase_ipa_counter_common_jm.o -+endif -diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.c ++#endif /* _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_ */ +diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c new file mode 100644 -index 000000000..60b061ef6 +index 000000000..34deb5d9e --- /dev/null -+++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.c -@@ -0,0 +1,457 @@ ++++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c +@@ -0,0 +1,775 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -193683,978 +196931,770 @@ index 000000000..60b061ef6 + * + */ + -+#include "mali_kbase_ipa_counter_common_csf.h" -+#include "ipa/mali_kbase_ipa_debugfs.h" ++/* ++ * Implementation of hardware counter context and accumulator APIs. ++ */ + -+#define DEFAULT_SCALING_FACTOR 5 ++#include "hwcnt/mali_kbase_hwcnt_context.h" ++#include "hwcnt/mali_kbase_hwcnt_accumulator.h" ++#include "hwcnt/backend/mali_kbase_hwcnt_backend.h" ++#include "hwcnt/mali_kbase_hwcnt_types.h" + -+/* If the value of GPU_ACTIVE is below this, use the simple model -+ * instead, to avoid extrapolating small amounts of counter data across -+ * large sample periods. ++#include ++#include ++#include ++ ++/** ++ * enum kbase_hwcnt_accum_state - Hardware counter accumulator states. ++ * @ACCUM_STATE_ERROR: Error state, where all accumulator operations fail. ++ * @ACCUM_STATE_DISABLED: Disabled state, where dumping is always disabled. ++ * @ACCUM_STATE_ENABLED: Enabled state, where dumping is enabled if there are ++ * any enabled counters. + */ -+#define DEFAULT_MIN_SAMPLE_CYCLES 10000 ++enum kbase_hwcnt_accum_state { ACCUM_STATE_ERROR, ACCUM_STATE_DISABLED, ACCUM_STATE_ENABLED }; + -+/* Typical value for the sampling interval is expected to be less than 100ms, -+ * So 5 seconds is a reasonable upper limit for the time gap between the -+ * 2 samples. ++/** ++ * struct kbase_hwcnt_accumulator - Hardware counter accumulator structure. ++ * @metadata: Pointer to immutable hwcnt metadata. ++ * @backend: Pointer to created counter backend. ++ * @state: The current state of the accumulator. ++ * - State transition from disabled->enabled or ++ * disabled->error requires state_lock. ++ * - State transition from enabled->disabled or ++ * enabled->error requires both accum_lock and ++ * state_lock. ++ * - Error state persists until next disable. ++ * @enable_map: The current set of enabled counters. ++ * - Must only be modified while holding both ++ * accum_lock and state_lock. ++ * - Can be read while holding either lock. ++ * - Must stay in sync with enable_map_any_enabled. ++ * @enable_map_any_enabled: True if any counters in the map are enabled, else ++ * false. If true, and state is ACCUM_STATE_ENABLED, ++ * then the counter backend will be enabled. ++ * - Must only be modified while holding both ++ * accum_lock and state_lock. ++ * - Can be read while holding either lock. ++ * - Must stay in sync with enable_map. ++ * @scratch_map: Scratch enable map, used as temporary enable map ++ * storage during dumps. ++ * - Must only be read or modified while holding ++ * accum_lock. ++ * @accum_buf: Accumulation buffer, where dumps will be accumulated ++ * into on transition to a disable state. ++ * - Must only be read or modified while holding ++ * accum_lock. ++ * @accumulated: True if the accumulation buffer has been accumulated ++ * into and not subsequently read from yet, else false. ++ * - Must only be read or modified while holding ++ * accum_lock. ++ * @ts_last_dump_ns: Timestamp (ns) of the end time of the most recent ++ * dump that was requested by the user. ++ * - Must only be read or modified while holding ++ * accum_lock. + */ -+#define MAX_SAMPLE_INTERVAL_MS ((s64)5000) ++struct kbase_hwcnt_accumulator { ++ const struct kbase_hwcnt_metadata *metadata; ++ struct kbase_hwcnt_backend *backend; ++ enum kbase_hwcnt_accum_state state; ++ struct kbase_hwcnt_enable_map enable_map; ++ bool enable_map_any_enabled; ++ struct kbase_hwcnt_enable_map scratch_map; ++ struct kbase_hwcnt_dump_buffer accum_buf; ++ bool accumulated; ++ u64 ts_last_dump_ns; ++}; + -+/* Maximum increment that is expected for a counter value during a sampling -+ * interval is derived assuming -+ * - max sampling interval of 1 second. -+ * - max GPU frequency of 2 GHz. -+ * - max number of cores as 32. -+ * - max increment of 4 in per core counter value at every clock cycle. -+ * -+ * So max increment = 2 * 10^9 * 32 * 4 = ~2^38. -+ * If a counter increases by an amount greater than this value, then an error -+ * will be returned and the simple power model will be used. ++/** ++ * struct kbase_hwcnt_context - Hardware counter context structure. ++ * @iface: Pointer to hardware counter backend interface. ++ * @state_lock: Spinlock protecting state. ++ * @disable_count: Disable count of the context. Initialised to 1. ++ * Decremented when the accumulator is acquired, and incremented ++ * on release. Incremented on calls to ++ * kbase_hwcnt_context_disable[_atomic], and decremented on ++ * calls to kbase_hwcnt_context_enable. ++ * - Must only be read or modified while holding state_lock. ++ * @accum_lock: Mutex protecting accumulator. ++ * @accum_inited: Flag to prevent concurrent accumulator initialisation and/or ++ * termination. Set to true before accumulator initialisation, ++ * and false after accumulator termination. ++ * - Must only be modified while holding both accum_lock and ++ * state_lock. ++ * - Can be read while holding either lock. ++ * @accum: Hardware counter accumulator structure. ++ * @wq: Centralized workqueue for users of hardware counters to ++ * submit async hardware counter related work. Never directly ++ * called, but it's expected that a lot of the functions in this ++ * API will end up called from the enqueued async work. + */ -+#define MAX_COUNTER_INCREMENT (((u64)1 << 38) - 1) ++struct kbase_hwcnt_context { ++ const struct kbase_hwcnt_backend_interface *iface; ++ spinlock_t state_lock; ++ size_t disable_count; ++ struct mutex accum_lock; ++ bool accum_inited; ++ struct kbase_hwcnt_accumulator accum; ++ struct workqueue_struct *wq; ++}; + -+static inline s64 kbase_ipa_add_saturate(s64 a, s64 b) ++int kbase_hwcnt_context_init(const struct kbase_hwcnt_backend_interface *iface, ++ struct kbase_hwcnt_context **out_hctx) +{ -+ s64 rtn; ++ struct kbase_hwcnt_context *hctx = NULL; + -+ if (a > 0 && (S64_MAX - a) < b) -+ rtn = S64_MAX; -+ else if (a < 0 && (S64_MIN - a) > b) -+ rtn = S64_MIN; -+ else -+ rtn = a + b; ++ if (!iface || !out_hctx) ++ return -EINVAL; + -+ return rtn; -+} ++ hctx = kzalloc(sizeof(*hctx), GFP_KERNEL); ++ if (!hctx) ++ goto err_alloc_hctx; + -+static s64 kbase_ipa_group_energy(s32 coeff, u64 counter_value) -+{ -+ /* Range: 0 < counter_value < 2^38 */ ++ hctx->iface = iface; ++ spin_lock_init(&hctx->state_lock); ++ hctx->disable_count = 1; ++ mutex_init(&hctx->accum_lock); ++ hctx->accum_inited = false; + -+ /* Range: -2^59 < ret < 2^59 (as -2^21 < coeff < 2^21) */ -+ return counter_value * (s64)coeff; -+} ++ hctx->wq = alloc_workqueue("mali_kbase_hwcnt", WQ_HIGHPRI | WQ_UNBOUND, 0); ++ if (!hctx->wq) ++ goto err_alloc_workqueue; + -+/** -+ * kbase_ipa_attach_ipa_control() - register with kbase_ipa_control -+ * @model_data: Pointer to counter model data -+ * -+ * Register IPA counter model as a client of kbase_ipa_control, which -+ * provides an interface to retreive the accumulated value of hardware -+ * counters to calculate energy consumption. -+ * -+ * Return: 0 on success, or an error code. -+ */ -+static int -+kbase_ipa_attach_ipa_control(struct kbase_ipa_counter_model_data *model_data) -+{ -+ struct kbase_device *kbdev = model_data->kbdev; -+ struct kbase_ipa_control_perf_counter *perf_counters; -+ u32 cnt_idx = 0; -+ int err; -+ size_t i; -+ -+ /* Value for GPU_ACTIVE counter also needs to be queried. It is required -+ * for the normalization of top-level and shader core counters. -+ */ -+ model_data->num_counters = 1 + model_data->num_top_level_cntrs + -+ model_data->num_shader_cores_cntrs; -+ -+ perf_counters = kcalloc(model_data->num_counters, -+ sizeof(*perf_counters), GFP_KERNEL); -+ -+ if (!perf_counters) { -+ dev_err(kbdev->dev, -+ "Failed to allocate memory for perf_counters array"); -+ return -ENOMEM; -+ } -+ -+ /* Fill in the description for GPU_ACTIVE counter which is always -+ * needed, as mentioned above, regardless of the energy model used -+ * by the CSF GPUs. -+ */ -+ perf_counters[cnt_idx].type = KBASE_IPA_CORE_TYPE_CSHW; -+ perf_counters[cnt_idx].idx = GPU_ACTIVE_CNT_IDX; -+ perf_counters[cnt_idx].gpu_norm = false; -+ perf_counters[cnt_idx].scaling_factor = 1; -+ cnt_idx++; -+ -+ for (i = 0; i < model_data->num_top_level_cntrs; ++i) { -+ const struct kbase_ipa_counter *counter = -+ &model_data->top_level_cntrs_def[i]; ++ *out_hctx = hctx; + -+ perf_counters[cnt_idx].type = counter->counter_block_type; -+ perf_counters[cnt_idx].idx = counter->counter_block_offset; -+ perf_counters[cnt_idx].gpu_norm = false; -+ perf_counters[cnt_idx].scaling_factor = 1; -+ cnt_idx++; -+ } ++ return 0; + -+ for (i = 0; i < model_data->num_shader_cores_cntrs; ++i) { -+ const struct kbase_ipa_counter *counter = -+ &model_data->shader_cores_cntrs_def[i]; ++err_alloc_workqueue: ++ kfree(hctx); ++err_alloc_hctx: ++ return -ENOMEM; ++} + -+ perf_counters[cnt_idx].type = counter->counter_block_type; -+ perf_counters[cnt_idx].idx = counter->counter_block_offset; -+ perf_counters[cnt_idx].gpu_norm = false; -+ perf_counters[cnt_idx].scaling_factor = 1; -+ cnt_idx++; -+ } ++void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx) ++{ ++ if (!hctx) ++ return; + -+ err = kbase_ipa_control_register(kbdev, perf_counters, -+ model_data->num_counters, -+ &model_data->ipa_control_client); -+ if (err) -+ dev_err(kbdev->dev, -+ "Failed to register IPA with kbase_ipa_control"); ++ /* Make sure we didn't leak the accumulator */ ++ WARN_ON(hctx->accum_inited); + -+ kfree(perf_counters); -+ return err; ++ /* We don't expect any work to be pending on this workqueue. ++ * Regardless, this will safely drain and complete the work. ++ */ ++ destroy_workqueue(hctx->wq); ++ kfree(hctx); +} + +/** -+ * kbase_ipa_detach_ipa_control() - De-register from kbase_ipa_control. -+ * @model_data: Pointer to counter model data ++ * kbasep_hwcnt_accumulator_term() - Terminate the accumulator for the context. ++ * @hctx: Non-NULL pointer to hardware counter context. + */ -+static void -+kbase_ipa_detach_ipa_control(struct kbase_ipa_counter_model_data *model_data) ++static void kbasep_hwcnt_accumulator_term(struct kbase_hwcnt_context *hctx) +{ -+ if (model_data->ipa_control_client) { -+ kbase_ipa_control_unregister(model_data->kbdev, -+ model_data->ipa_control_client); -+ model_data->ipa_control_client = NULL; -+ } ++ WARN_ON(!hctx); ++ WARN_ON(!hctx->accum_inited); ++ ++ kbase_hwcnt_enable_map_free(&hctx->accum.scratch_map); ++ kbase_hwcnt_dump_buffer_free(&hctx->accum.accum_buf); ++ kbase_hwcnt_enable_map_free(&hctx->accum.enable_map); ++ hctx->iface->term(hctx->accum.backend); ++ memset(&hctx->accum, 0, sizeof(hctx->accum)); +} + -+static int calculate_coeff(struct kbase_ipa_counter_model_data *model_data, -+ const struct kbase_ipa_counter *const cnt_defs, -+ size_t num_counters, s32 *counter_coeffs, -+ u64 *counter_values, u32 active_cycles, u32 *coeffp) ++/** ++ * kbasep_hwcnt_accumulator_init() - Initialise the accumulator for the context. ++ * @hctx: Non-NULL pointer to hardware counter context. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx) +{ -+ u64 coeff = 0, coeff_mul = 0; -+ s64 total_energy = 0; -+ size_t i; ++ int errcode; + -+ /* Range for the 'counter_value' is [0, 2^38) -+ * Range for the 'coeff' is [-2^21, 2^21] -+ * So range for the 'group_energy' is [-2^59, 2^59) and range for the -+ * 'total_energy' is +/- 2^59 * number of IPA groups (~16), i.e. -+ * [-2^63, 2^63). -+ */ -+ for (i = 0; i < num_counters; i++) { -+ s32 coeff = counter_coeffs[i]; -+ u64 counter_value = counter_values[i]; -+ s64 group_energy = kbase_ipa_group_energy(coeff, counter_value); ++ WARN_ON(!hctx); ++ WARN_ON(!hctx->accum_inited); + -+ if (counter_value > MAX_COUNTER_INCREMENT) { -+ dev_warn(model_data->kbdev->dev, -+ "Increment in counter %s more than expected", -+ cnt_defs[i].name); -+ return -ERANGE; -+ } ++ errcode = hctx->iface->init(hctx->iface->info, &hctx->accum.backend); ++ if (errcode) ++ goto error; + -+ total_energy = -+ kbase_ipa_add_saturate(total_energy, group_energy); -+ } ++ hctx->accum.metadata = hctx->iface->metadata(hctx->iface->info); ++ hctx->accum.state = ACCUM_STATE_ERROR; + -+ /* Range: 0 <= coeff < 2^63 */ -+ if (total_energy >= 0) -+ coeff = total_energy; -+ else -+ dev_dbg(model_data->kbdev->dev, -+ "Energy value came negative as %lld", total_energy); ++ errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, &hctx->accum.enable_map); ++ if (errcode) ++ goto error; + -+ /* Range: 0 <= coeff < 2^63 (because active_cycles >= 1). However, this -+ * can be constrained further: the value of counters that are being -+ * used for dynamic power estimation can only increment by about 128 -+ * maximum per clock cycle. This is because max number of shader -+ * cores is expected to be 32 (max number of L2 slices is expected to -+ * be 8) and some counters (per shader core) like SC_BEATS_RD_TEX_EXT & -+ * SC_EXEC_STARVE_ARITH can increment by 4 every clock cycle. -+ * Each "beat" is defined as 128 bits and each shader core can -+ * (currently) do 512 bits read and 512 bits write to/from the L2 -+ * cache per cycle, so the SC_BEATS_RD_TEX_EXT counter can increment -+ * [0, 4] per shader core per cycle. -+ * We can thus write the range of 'coeff' in terms of active_cycles: -+ * -+ * coeff = SUM(coeffN * counterN * num_cores_for_counterN) -+ * coeff <= SUM(coeffN * counterN) * max_cores -+ * coeff <= num_IPA_groups * max_coeff * max_counter * max_cores -+ * (substitute max_counter = 2^2 * active_cycles) -+ * coeff <= num_IPA_groups * max_coeff * 2^2 * active_cycles * max_cores -+ * coeff <= 2^4 * 2^21 * 2^2 * active_cycles * 2^5 -+ * coeff <= 2^32 * active_cycles -+ * -+ * So after the division: 0 <= coeff <= 2^32 -+ */ -+ coeff = div_u64(coeff, active_cycles); ++ hctx->accum.enable_map_any_enabled = false; + -+ /* Not all models were derived at the same reference voltage. Voltage -+ * scaling is done by multiplying by V^2, so we need to *divide* by -+ * Vref^2 here. -+ * Range: 0 <= coeff <= 2^35 -+ */ -+ coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); -+ /* Range: 0 <= coeff <= 2^38 */ -+ coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); ++ errcode = kbase_hwcnt_dump_buffer_alloc(hctx->accum.metadata, &hctx->accum.accum_buf); ++ if (errcode) ++ goto error; + -+ /* Scale by user-specified integer factor. -+ * Range: 0 <= coeff_mul < 2^43 -+ */ -+ coeff_mul = coeff * model_data->scaling_factor; ++ errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, &hctx->accum.scratch_map); ++ if (errcode) ++ goto error; + -+ /* The power models have results with units -+ * mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this -+ * becomes fW/(Hz V^2), which are the units of coeff_mul. However, -+ * kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide -+ * by 1000. -+ * Range: 0 <= coeff_mul < 2^33 -+ */ -+ coeff_mul = div_u64(coeff_mul, 1000u); ++ hctx->accum.accumulated = false; + -+ /* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */ -+ *coeffp = clamp(coeff_mul, (u64)0, (u64)1 << 16); ++ hctx->accum.ts_last_dump_ns = hctx->iface->timestamp_ns(hctx->accum.backend); + + return 0; ++ ++error: ++ kbasep_hwcnt_accumulator_term(hctx); ++ return errcode; +} + -+int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) ++/** ++ * kbasep_hwcnt_accumulator_disable() - Transition the accumulator into the ++ * disabled state, from the enabled or ++ * error states. ++ * @hctx: Non-NULL pointer to hardware counter context. ++ * @accumulate: True if we should accumulate before disabling, else false. ++ */ ++static void kbasep_hwcnt_accumulator_disable(struct kbase_hwcnt_context *hctx, bool accumulate) +{ -+ struct kbase_ipa_counter_model_data *model_data = -+ (struct kbase_ipa_counter_model_data *)model->model_data; -+ struct kbase_device *kbdev = model->kbdev; -+ s32 *counter_coeffs_p = model_data->counter_coeffs; -+ u64 *cnt_values_p = model_data->counter_values; -+ const u64 num_counters = model_data->num_counters; -+ u32 active_cycles; -+ ktime_t now, diff; -+ s64 diff_ms; -+ int ret; ++ int errcode = 0; ++ bool backend_enabled = false; ++ struct kbase_hwcnt_accumulator *accum; ++ unsigned long flags; ++ u64 dump_time_ns; + -+ lockdep_assert_held(&kbdev->ipa.lock); ++ WARN_ON(!hctx); ++ lockdep_assert_held(&hctx->accum_lock); ++ WARN_ON(!hctx->accum_inited); + -+ /* The last argument is supposed to be a pointer to the location that -+ * will store the time for which GPU has been in protected mode since -+ * last query. This can be passed as NULL as counter model itself will -+ * not be used when GPU enters protected mode, as IPA is supposed to -+ * switch to the simple power model. -+ */ -+ ret = kbase_ipa_control_query(kbdev, -+ model_data->ipa_control_client, -+ cnt_values_p, num_counters, NULL); -+ if (WARN_ON(ret)) -+ return ret; ++ accum = &hctx->accum; + -+ now = ktime_get_raw(); -+ diff = ktime_sub(now, kbdev->ipa.last_sample_time); -+ diff_ms = ktime_to_ms(diff); ++ spin_lock_irqsave(&hctx->state_lock, flags); + -+ kbdev->ipa.last_sample_time = now; ++ WARN_ON(hctx->disable_count != 0); ++ WARN_ON(hctx->accum.state == ACCUM_STATE_DISABLED); + -+ /* The counter values cannot be relied upon if the sampling interval was -+ * too long. Typically this will happen when the polling is started -+ * after the temperature has risen above a certain trip point. After -+ * that regular calls every 25-100 ms interval are expected. -+ */ -+ if (diff_ms > MAX_SAMPLE_INTERVAL_MS) { -+ dev_dbg(kbdev->dev, -+ "Last sample was taken %lld milli seconds ago", -+ diff_ms); -+ return -EOVERFLOW; -+ } ++ if ((hctx->accum.state == ACCUM_STATE_ENABLED) && (accum->enable_map_any_enabled)) ++ backend_enabled = true; + -+ /* Range: 0 (GPU not used at all), to the max sampling interval, say -+ * 1 seconds, * max GPU frequency (GPU 100% utilized). -+ * 0 <= active_cycles <= 1 * ~2GHz -+ * 0 <= active_cycles < 2^31 -+ */ -+ if (*cnt_values_p > U32_MAX) { -+ dev_warn(kbdev->dev, -+ "Increment in GPU_ACTIVE counter more than expected"); -+ return -ERANGE; -+ } ++ if (!backend_enabled) ++ hctx->accum.state = ACCUM_STATE_DISABLED; + -+ active_cycles = (u32)*cnt_values_p; ++ spin_unlock_irqrestore(&hctx->state_lock, flags); + -+ /* If the value of the active_cycles is less than the threshold, then -+ * return an error so that IPA framework can approximate using the -+ * cached simple model results instead. This may be more accurate -+ * than extrapolating using a very small counter dump. -+ */ -+ if (active_cycles < (u32)max(model_data->min_sample_cycles, 0)) -+ return -ENODATA; ++ /* Early out if the backend is not already enabled */ ++ if (!backend_enabled) ++ return; + -+ /* Range: 1 <= active_cycles < 2^31 */ -+ active_cycles = max(1u, active_cycles); ++ if (!accumulate) ++ goto disable; + -+ cnt_values_p++; -+ ret = calculate_coeff(model_data, model_data->top_level_cntrs_def, -+ model_data->num_top_level_cntrs, -+ counter_coeffs_p, cnt_values_p, active_cycles, -+ &coeffp[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); -+ if (ret) -+ return ret; ++ /* Try and accumulate before disabling */ ++ errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns); ++ if (errcode) ++ goto disable; + -+ cnt_values_p += model_data->num_top_level_cntrs; -+ counter_coeffs_p += model_data->num_top_level_cntrs; -+ ret = calculate_coeff(model_data, model_data->shader_cores_cntrs_def, -+ model_data->num_shader_cores_cntrs, -+ counter_coeffs_p, cnt_values_p, active_cycles, -+ &coeffp[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]); ++ errcode = hctx->iface->dump_wait(accum->backend); ++ if (errcode) ++ goto disable; + -+ return ret; -+} ++ errcode = hctx->iface->dump_get(accum->backend, &accum->accum_buf, &accum->enable_map, ++ accum->accumulated); ++ if (errcode) ++ goto disable; + -+void kbase_ipa_counter_reset_data(struct kbase_ipa_model *model) -+{ -+ struct kbase_ipa_counter_model_data *model_data = -+ (struct kbase_ipa_counter_model_data *)model->model_data; -+ u64 *cnt_values_p = model_data->counter_values; -+ const u64 num_counters = model_data->num_counters; -+ int ret; ++ accum->accumulated = true; + -+ lockdep_assert_held(&model->kbdev->ipa.lock); ++disable: ++ hctx->iface->dump_disable(accum->backend); + -+ ret = kbase_ipa_control_query(model->kbdev, -+ model_data->ipa_control_client, -+ cnt_values_p, num_counters, NULL); -+ WARN_ON(ret); ++ /* Regardless of any errors during the accumulate, put the accumulator ++ * in the disabled state. ++ */ ++ spin_lock_irqsave(&hctx->state_lock, flags); ++ ++ hctx->accum.state = ACCUM_STATE_DISABLED; ++ ++ spin_unlock_irqrestore(&hctx->state_lock, flags); +} + -+int kbase_ipa_counter_common_model_init(struct kbase_ipa_model *model, -+ const struct kbase_ipa_counter *top_level_cntrs_def, -+ size_t num_top_level_cntrs, -+ const struct kbase_ipa_counter *shader_cores_cntrs_def, -+ size_t num_shader_cores_cntrs, -+ s32 reference_voltage) ++/** ++ * kbasep_hwcnt_accumulator_enable() - Transition the accumulator into the ++ * enabled state, from the disabled state. ++ * @hctx: Non-NULL pointer to hardware counter context. ++ */ ++static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx) +{ -+ struct kbase_ipa_counter_model_data *model_data; -+ s32 *counter_coeffs_p; -+ int err = 0; -+ size_t i; ++ int errcode = 0; ++ struct kbase_hwcnt_accumulator *accum; + -+ if (!model || !top_level_cntrs_def || !shader_cores_cntrs_def || -+ !num_top_level_cntrs || !num_shader_cores_cntrs) -+ return -EINVAL; ++ WARN_ON(!hctx); ++ lockdep_assert_held(&hctx->state_lock); ++ WARN_ON(!hctx->accum_inited); ++ WARN_ON(hctx->accum.state != ACCUM_STATE_DISABLED); + -+ model_data = kzalloc(sizeof(*model_data), GFP_KERNEL); -+ if (!model_data) -+ return -ENOMEM; ++ accum = &hctx->accum; + -+ model_data->kbdev = model->kbdev; ++ /* The backend only needs enabling if any counters are enabled */ ++ if (accum->enable_map_any_enabled) ++ errcode = hctx->iface->dump_enable_nolock(accum->backend, &accum->enable_map); + -+ model_data->top_level_cntrs_def = top_level_cntrs_def; -+ model_data->num_top_level_cntrs = num_top_level_cntrs; ++ if (!errcode) ++ accum->state = ACCUM_STATE_ENABLED; ++ else ++ accum->state = ACCUM_STATE_ERROR; ++} + -+ model_data->shader_cores_cntrs_def = shader_cores_cntrs_def; -+ model_data->num_shader_cores_cntrs = num_shader_cores_cntrs; ++/** ++ * kbasep_hwcnt_accumulator_dump() - Perform a dump with the most up-to-date ++ * values of enabled counters possible, and ++ * optionally update the set of enabled ++ * counters. ++ * @hctx: Non-NULL pointer to the hardware counter context ++ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will ++ * be written out to on success ++ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will ++ * be written out to on success ++ * @dump_buf: Pointer to the buffer where the dump will be written out to on ++ * success. If non-NULL, must have the same metadata as the ++ * accumulator. If NULL, the dump will be discarded. ++ * @new_map: Pointer to the new counter enable map. If non-NULL, must have ++ * the same metadata as the accumulator. If NULL, the set of ++ * enabled counters will be unchanged. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 *ts_start_ns, ++ u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf, ++ const struct kbase_hwcnt_enable_map *new_map) ++{ ++ int errcode = 0; ++ unsigned long flags; ++ enum kbase_hwcnt_accum_state state; ++ bool dump_requested = false; ++ bool dump_written = false; ++ bool cur_map_any_enabled; ++ struct kbase_hwcnt_enable_map *cur_map; ++ bool new_map_any_enabled = false; ++ u64 dump_time_ns = 0; ++ struct kbase_hwcnt_accumulator *accum; + -+ model->model_data = (void *)model_data; ++ WARN_ON(!hctx); ++ WARN_ON(!ts_start_ns); ++ WARN_ON(!ts_end_ns); ++ WARN_ON(dump_buf && (dump_buf->metadata != hctx->accum.metadata)); ++ WARN_ON(new_map && (new_map->metadata != hctx->accum.metadata)); ++ WARN_ON(!hctx->accum_inited); ++ lockdep_assert_held(&hctx->accum_lock); + -+ counter_coeffs_p = model_data->counter_coeffs; ++ accum = &hctx->accum; ++ cur_map = &accum->scratch_map; + -+ for (i = 0; i < model_data->num_top_level_cntrs; ++i) { -+ const struct kbase_ipa_counter *counter = -+ &model_data->top_level_cntrs_def[i]; ++ /* Save out info about the current enable map */ ++ cur_map_any_enabled = accum->enable_map_any_enabled; ++ kbase_hwcnt_enable_map_copy(cur_map, &accum->enable_map); + -+ *counter_coeffs_p = counter->coeff_default_value; ++ if (new_map) ++ new_map_any_enabled = kbase_hwcnt_enable_map_any_enabled(new_map); + -+ err = kbase_ipa_model_add_param_s32( -+ model, counter->name, counter_coeffs_p, 1, false); -+ if (err) -+ goto exit; ++ /* ++ * We're holding accum_lock, so the accumulator state might transition ++ * from disabled to enabled during this function (as enabling is lock ++ * free), but it will never disable (as disabling needs to hold the ++ * accum_lock), nor will it ever transition from enabled to error (as ++ * an enable while we're already enabled is impossible). ++ * ++ * If we're already disabled, we'll only look at the accumulation buffer ++ * rather than do a real dump, so a concurrent enable does not affect ++ * us. ++ * ++ * If a concurrent enable fails, we might transition to the error ++ * state, but again, as we're only looking at the accumulation buffer, ++ * it's not an issue. ++ */ ++ spin_lock_irqsave(&hctx->state_lock, flags); + -+ counter_coeffs_p++; ++ state = accum->state; ++ ++ /* ++ * Update the new map now, such that if an enable occurs during this ++ * dump then that enable will set the new map. If we're already enabled, ++ * then we'll do it ourselves after the dump. ++ */ ++ if (new_map) { ++ kbase_hwcnt_enable_map_copy(&accum->enable_map, new_map); ++ accum->enable_map_any_enabled = new_map_any_enabled; + } + -+ for (i = 0; i < model_data->num_shader_cores_cntrs; ++i) { -+ const struct kbase_ipa_counter *counter = -+ &model_data->shader_cores_cntrs_def[i]; ++ spin_unlock_irqrestore(&hctx->state_lock, flags); + -+ *counter_coeffs_p = counter->coeff_default_value; ++ /* Error state, so early out. No need to roll back any map updates */ ++ if (state == ACCUM_STATE_ERROR) ++ return -EIO; + -+ err = kbase_ipa_model_add_param_s32( -+ model, counter->name, counter_coeffs_p, 1, false); -+ if (err) -+ goto exit; ++ /* Initiate the dump if the backend is enabled. */ ++ if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) { ++ if (dump_buf) { ++ errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns); ++ dump_requested = true; ++ } else { ++ dump_time_ns = hctx->iface->timestamp_ns(accum->backend); ++ errcode = hctx->iface->dump_clear(accum->backend); ++ } + -+ counter_coeffs_p++; ++ if (errcode) ++ goto error; ++ } else { ++ dump_time_ns = hctx->iface->timestamp_ns(accum->backend); + } + -+ model_data->scaling_factor = DEFAULT_SCALING_FACTOR; -+ err = kbase_ipa_model_add_param_s32( -+ model, "scale", &model_data->scaling_factor, 1, false); -+ if (err) -+ goto exit; -+ -+ model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES; -+ err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles", -+ &model_data->min_sample_cycles, 1, -+ false); -+ if (err) -+ goto exit; ++ /* Copy any accumulation into the dest buffer */ ++ if (accum->accumulated && dump_buf) { ++ kbase_hwcnt_dump_buffer_copy(dump_buf, &accum->accum_buf, cur_map); ++ dump_written = true; ++ } + -+ model_data->reference_voltage = reference_voltage; -+ err = kbase_ipa_model_add_param_s32(model, "reference_voltage", -+ &model_data->reference_voltage, 1, -+ false); -+ if (err) -+ goto exit; ++ /* Wait for any requested dumps to complete */ ++ if (dump_requested) { ++ WARN_ON(state != ACCUM_STATE_ENABLED); ++ errcode = hctx->iface->dump_wait(accum->backend); ++ if (errcode) ++ goto error; ++ } + -+ err = kbase_ipa_attach_ipa_control(model_data); ++ /* If we're enabled and there's a new enable map, change the enabled set ++ * as soon after the dump has completed as possible. ++ */ ++ if ((state == ACCUM_STATE_ENABLED) && new_map) { ++ /* Backend is only enabled if there were any enabled counters */ ++ if (cur_map_any_enabled) ++ hctx->iface->dump_disable(accum->backend); + -+exit: -+ if (err) { -+ kbase_ipa_model_param_free_all(model); -+ kfree(model_data); ++ /* (Re-)enable the backend if the new map has enabled counters. ++ * No need to acquire the spinlock, as concurrent enable while ++ * we're already enabled and holding accum_lock is impossible. ++ */ ++ if (new_map_any_enabled) { ++ errcode = hctx->iface->dump_enable(accum->backend, new_map); ++ if (errcode) ++ goto error; ++ } + } -+ return err; -+} + -+void kbase_ipa_counter_common_model_term(struct kbase_ipa_model *model) -+{ -+ struct kbase_ipa_counter_model_data *model_data = -+ (struct kbase_ipa_counter_model_data *)model->model_data; ++ /* Copy, accumulate, or zero into the dest buffer to finish */ ++ if (dump_buf) { ++ /* If we dumped, copy or accumulate it into the destination */ ++ if (dump_requested) { ++ WARN_ON(state != ACCUM_STATE_ENABLED); ++ errcode = hctx->iface->dump_get(accum->backend, dump_buf, cur_map, ++ dump_written); ++ if (errcode) ++ goto error; ++ dump_written = true; ++ } + -+ kbase_ipa_detach_ipa_control(model_data); -+ kfree(model_data); -+} -diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.h b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.h -new file mode 100644 -index 000000000..37d2efc59 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.h -@@ -0,0 +1,159 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ /* If we've not written anything into the dump buffer so far, it ++ * means there was nothing to write. Zero any enabled counters. ++ */ ++ if (!dump_written) ++ kbase_hwcnt_dump_buffer_zero(dump_buf, cur_map); ++ } + -+#ifndef _KBASE_IPA_COUNTER_COMMON_CSF_H_ -+#define _KBASE_IPA_COUNTER_COMMON_CSF_H_ ++ /* Write out timestamps */ ++ *ts_start_ns = accum->ts_last_dump_ns; ++ *ts_end_ns = dump_time_ns; + -+#include "mali_kbase.h" -+#include "csf/ipa_control/mali_kbase_csf_ipa_control.h" ++ accum->accumulated = false; ++ accum->ts_last_dump_ns = dump_time_ns; + -+/* Maximum number of HW counters used by the IPA counter model. */ -+#define KBASE_IPA_MAX_COUNTER_DEF_NUM 24 ++ return 0; ++error: ++ /* An error was only physically possible if the backend was enabled */ ++ WARN_ON(state != ACCUM_STATE_ENABLED); + -+struct kbase_ipa_counter_model_data; ++ /* Disable the backend, and transition to the error state */ ++ hctx->iface->dump_disable(accum->backend); ++ spin_lock_irqsave(&hctx->state_lock, flags); + -+/** -+ * struct kbase_ipa_counter_model_data - IPA counter model context per device -+ * @kbdev: Pointer to kbase device -+ * @ipa_control_client: Handle returned on registering IPA counter model as a -+ * client of kbase_ipa_control. -+ * @top_level_cntrs_def: Array of description of HW counters used by the IPA -+ * counter model for top-level. -+ * @num_top_level_cntrs: Number of elements in @top_level_cntrs_def array. -+ * @shader_cores_cntrs_def: Array of description of HW counters used by the IPA -+ * counter model for shader cores. -+ * @num_shader_cores_cntrs: Number of elements in @shader_cores_cntrs_def array. -+ * @counter_coeffs: Buffer to store coefficient value used for HW counters -+ * @counter_values: Buffer to store the accumulated value of HW counters -+ * retreived from kbase_ipa_control. -+ * @num_counters: Number of counters queried from kbase_ipa_control. -+ * @reference_voltage: voltage, in mV, of the operating point used when -+ * deriving the power model coefficients. Range approx -+ * 0.1V - 5V (~= 8V): 2^7 <= reference_voltage <= 2^13 -+ * @scaling_factor: User-specified power scaling factor. This is an -+ * integer, which is multiplied by the power coefficient -+ * just before OPP scaling. -+ * Range approx 0-32: 0 < scaling_factor < 2^5 -+ * @min_sample_cycles: If the value of the GPU_ACTIVE counter (the number of -+ * cycles the GPU was working) is less than -+ * min_sample_cycles, the counter model will return an -+ * error, causing the IPA framework to approximate using -+ * the cached simple model results instead. This may be -+ * more accurate than extrapolating using a very small -+ * counter dump. -+ */ -+struct kbase_ipa_counter_model_data { -+ struct kbase_device *kbdev; -+ void *ipa_control_client; -+ const struct kbase_ipa_counter *top_level_cntrs_def; -+ size_t num_top_level_cntrs; -+ const struct kbase_ipa_counter *shader_cores_cntrs_def; -+ size_t num_shader_cores_cntrs; -+ s32 counter_coeffs[KBASE_IPA_MAX_COUNTER_DEF_NUM]; -+ u64 counter_values[KBASE_IPA_MAX_COUNTER_DEF_NUM]; -+ u64 num_counters; -+ s32 reference_voltage; -+ s32 scaling_factor; -+ s32 min_sample_cycles; -+}; ++ accum->state = ACCUM_STATE_ERROR; + -+/** -+ * struct kbase_ipa_counter - represents a single HW counter used by IPA model -+ * @name: Name of the HW counter used by IPA counter model -+ * for energy estimation. -+ * @coeff_default_value: Default value of coefficient for the counter. -+ * Coefficients are interpreted as fractions where the -+ * denominator is 1000000. -+ * @counter_block_offset: Index to the counter within the counter block of -+ * type @counter_block_type. -+ * @counter_block_type: Type of the counter block. -+ */ -+struct kbase_ipa_counter { -+ const char *name; -+ s32 coeff_default_value; -+ u32 counter_block_offset; -+ enum kbase_ipa_core_type counter_block_type; -+}; ++ spin_unlock_irqrestore(&hctx->state_lock, flags); + -+/** -+ * kbase_ipa_counter_dynamic_coeff() - calculate dynamic power based on HW counters -+ * @model: pointer to instantiated model -+ * @coeffp: pointer to location where calculated power, in -+ * pW/(Hz V^2), is stored for top level and shader cores. -+ * -+ * This is a GPU-agnostic implementation of the get_dynamic_coeff() -+ * function of an IPA model. It relies on the model being populated -+ * with GPU-specific attributes at initialization time. -+ * -+ * Return: 0 on success, or an error code. -+ */ -+int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp); ++ return errcode; ++} + +/** -+ * kbase_ipa_counter_reset_data() - Reset the counters data used for dynamic -+ * power estimation -+ * @model: pointer to instantiated model -+ * -+ * Retrieve the accumulated value of HW counters from the kbase_ipa_control -+ * component, without doing any processing, which is effectively a reset as the -+ * next call to kbase_ipa_counter_dynamic_coeff() will see the increment in -+ * counter values from this point onwards. ++ * kbasep_hwcnt_context_disable() - Increment the disable count of the context. ++ * @hctx: Non-NULL pointer to hardware counter context. ++ * @accumulate: True if we should accumulate before disabling, else false. + */ -+void kbase_ipa_counter_reset_data(struct kbase_ipa_model *model); ++static void kbasep_hwcnt_context_disable(struct kbase_hwcnt_context *hctx, bool accumulate) ++{ ++ unsigned long flags; + -+/** -+ * kbase_ipa_counter_common_model_init() - initialize ipa power model -+ * @model: Pointer to the ipa power model to initialize -+ * @top_level_cntrs_def: Array corresponding to the HW counters used in the -+ * top level counter model, contains the counter index, -+ * default value of the coefficient. -+ * @num_top_level_cntrs: Number of elements in the array @top_level_cntrs_def -+ * @shader_cores_cntrs_def: Array corresponding to the HW counters used in the -+ * shader cores counter model, contains the counter index, -+ * default value of the coefficient. -+ * @num_shader_cores_cntrs: Number of elements in the array -+ * @shader_cores_cntrs_def. -+ * @reference_voltage: voltage, in mV, of the operating point used when -+ * deriving the power model coefficients. -+ * -+ * This function performs initialization steps common for ipa counter based -+ * model of all CSF GPUs. The set of counters and their respective weights -+ * could be different for each GPU. The tuple of counter index and weight -+ * is passed via @top_level_cntrs_def and @shader_cores_cntrs_def array. -+ * -+ * Return: 0 on success, error code otherwise -+ */ -+int kbase_ipa_counter_common_model_init(struct kbase_ipa_model *model, -+ const struct kbase_ipa_counter *top_level_cntrs_def, -+ size_t num_top_level_cntrs, -+ const struct kbase_ipa_counter *shader_cores_cntrs_def, -+ size_t num_shader_cores_cntrs, -+ s32 reference_voltage); -+/** -+ * kbase_ipa_counter_common_model_term() - terminate ipa power model -+ * @model: ipa power model to terminate -+ * -+ * This function performs all necessary steps to terminate ipa power model -+ * including clean up of resources allocated to hold model data. -+ */ -+void kbase_ipa_counter_common_model_term(struct kbase_ipa_model *model); ++ WARN_ON(!hctx); ++ lockdep_assert_held(&hctx->accum_lock); + -+#endif /* _KBASE_IPA_COUNTER_COMMON_CSF_H_ */ -diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.c -new file mode 100644 -index 000000000..34515a934 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.c -@@ -0,0 +1,355 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ if (!kbase_hwcnt_context_disable_atomic(hctx)) { ++ kbasep_hwcnt_accumulator_disable(hctx, accumulate); + -+#include "mali_kbase_ipa_counter_common_jm.h" -+#include "ipa/mali_kbase_ipa_debugfs.h" ++ spin_lock_irqsave(&hctx->state_lock, flags); + -+#define DEFAULT_SCALING_FACTOR 5 ++ /* Atomic disable failed and we're holding the mutex, so current ++ * disable count must be 0. ++ */ ++ WARN_ON(hctx->disable_count != 0); ++ hctx->disable_count++; + -+/* If the value of GPU_ACTIVE is below this, use the simple model -+ * instead, to avoid extrapolating small amounts of counter data across -+ * large sample periods. -+ */ -+#define DEFAULT_MIN_SAMPLE_CYCLES 10000 ++ spin_unlock_irqrestore(&hctx->state_lock, flags); ++ } ++} + -+/** -+ * kbase_ipa_read_hwcnt() - read a counter value -+ * @model_data: pointer to model data -+ * @offset: offset, in bytes, into vinstr buffer -+ * -+ * Return: A 32-bit counter value. Range: 0 < value < 2^27 (worst case would be -+ * incrementing every cycle over a ~100ms sample period at a high frequency, -+ * e.g. 1 GHz: 2^30 * 0.1seconds ~= 2^27. -+ */ -+static inline u32 kbase_ipa_read_hwcnt( -+ struct kbase_ipa_model_vinstr_data *model_data, -+ u32 offset) ++int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx, ++ struct kbase_hwcnt_accumulator **accum) +{ -+ u8 *p = (u8 *)model_data->dump_buf.dump_buf; -+ u64 val = *(u64 *)&p[offset]; ++ int errcode = 0; ++ unsigned long flags; + -+ return (val > U32_MAX) ? U32_MAX : (u32)val; -+} ++ if (!hctx || !accum) ++ return -EINVAL; + -+static inline s64 kbase_ipa_add_saturate(s64 a, s64 b) -+{ -+ s64 rtn; ++ mutex_lock(&hctx->accum_lock); ++ spin_lock_irqsave(&hctx->state_lock, flags); + -+ if (a > 0 && (S64_MAX - a) < b) -+ rtn = S64_MAX; -+ else if (a < 0 && (S64_MIN - a) > b) -+ rtn = S64_MIN; ++ if (!hctx->accum_inited) ++ /* Set accum initing now to prevent concurrent init */ ++ hctx->accum_inited = true; + else -+ rtn = a + b; ++ /* Already have an accum, or already being inited */ ++ errcode = -EBUSY; + -+ return rtn; -+} ++ spin_unlock_irqrestore(&hctx->state_lock, flags); ++ mutex_unlock(&hctx->accum_lock); + -+s64 kbase_ipa_sum_all_shader_cores( -+ struct kbase_ipa_model_vinstr_data *model_data, -+ s32 coeff, u32 counter) -+{ -+ struct kbase_device *kbdev = model_data->kbdev; -+ u64 core_mask; -+ u32 base = 0; -+ s64 ret = 0; ++ if (errcode) ++ return errcode; + -+ core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; -+ while (core_mask != 0ull) { -+ if ((core_mask & 1ull) != 0ull) { -+ /* 0 < counter_value < 2^27 */ -+ u32 counter_value = kbase_ipa_read_hwcnt(model_data, -+ base + counter); ++ errcode = kbasep_hwcnt_accumulator_init(hctx); + -+ /* 0 < ret < 2^27 * max_num_cores = 2^32 */ -+ ret = kbase_ipa_add_saturate(ret, counter_value); -+ } -+ base += KBASE_IPA_NR_BYTES_PER_BLOCK; -+ core_mask >>= 1; ++ if (errcode) { ++ mutex_lock(&hctx->accum_lock); ++ spin_lock_irqsave(&hctx->state_lock, flags); ++ ++ hctx->accum_inited = false; ++ ++ spin_unlock_irqrestore(&hctx->state_lock, flags); ++ mutex_unlock(&hctx->accum_lock); ++ ++ return errcode; + } + -+ /* Range: -2^54 < ret * coeff < 2^54 */ -+ return ret * coeff; -+} ++ spin_lock_irqsave(&hctx->state_lock, flags); + -+s64 kbase_ipa_sum_all_memsys_blocks( -+ struct kbase_ipa_model_vinstr_data *model_data, -+ s32 coeff, u32 counter) -+{ -+ struct kbase_device *kbdev = model_data->kbdev; -+ const u32 num_blocks = kbdev->gpu_props.props.l2_props.num_l2_slices; -+ u32 base = 0; -+ s64 ret = 0; -+ u32 i; ++ WARN_ON(hctx->disable_count == 0); ++ WARN_ON(hctx->accum.enable_map_any_enabled); + -+ for (i = 0; i < num_blocks; i++) { -+ /* 0 < counter_value < 2^27 */ -+ u32 counter_value = kbase_ipa_read_hwcnt(model_data, -+ base + counter); ++ /* Decrement the disable count to allow the accumulator to be accessible ++ * now that it's fully constructed. ++ */ ++ hctx->disable_count--; + -+ /* 0 < ret < 2^27 * max_num_memsys_blocks = 2^29 */ -+ ret = kbase_ipa_add_saturate(ret, counter_value); -+ base += KBASE_IPA_NR_BYTES_PER_BLOCK; -+ } ++ /* ++ * Make sure the accumulator is initialised to the correct state. ++ * Regardless of initial state, counters don't need to be enabled via ++ * the backend, as the initial enable map has no enabled counters. ++ */ ++ hctx->accum.state = (hctx->disable_count == 0) ? ACCUM_STATE_ENABLED : ACCUM_STATE_DISABLED; + -+ /* Range: -2^51 < ret * coeff < 2^51 */ -+ return ret * coeff; -+} ++ spin_unlock_irqrestore(&hctx->state_lock, flags); + -+s64 kbase_ipa_single_counter( -+ struct kbase_ipa_model_vinstr_data *model_data, -+ s32 coeff, u32 counter) -+{ -+ /* Range: 0 < counter_value < 2^27 */ -+ const u32 counter_value = kbase_ipa_read_hwcnt(model_data, counter); ++ *accum = &hctx->accum; + -+ /* Range: -2^49 < ret < 2^49 */ -+ return counter_value * (s64) coeff; ++ return 0; +} + -+int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) ++void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum) +{ -+ int errcode; -+ struct kbase_device *kbdev = model_data->kbdev; -+ struct kbase_hwcnt_virtualizer *hvirt = kbdev->hwcnt_gpu_virt; -+ struct kbase_hwcnt_enable_map enable_map; -+ const struct kbase_hwcnt_metadata *metadata = -+ kbase_hwcnt_virtualizer_metadata(hvirt); ++ unsigned long flags; ++ struct kbase_hwcnt_context *hctx; + -+ if (!metadata) -+ return -1; ++ if (!accum) ++ return; + -+ errcode = kbase_hwcnt_enable_map_alloc(metadata, &enable_map); -+ if (errcode) { -+ dev_err(kbdev->dev, "Failed to allocate IPA enable map"); -+ return errcode; -+ } ++ hctx = container_of(accum, struct kbase_hwcnt_context, accum); + -+ kbase_hwcnt_enable_map_enable_all(&enable_map); ++ mutex_lock(&hctx->accum_lock); + -+ /* Disable cycle counter only. */ -+ enable_map.clk_enable_map = 0; ++ /* Double release is a programming error */ ++ WARN_ON(!hctx->accum_inited); + -+ errcode = kbase_hwcnt_virtualizer_client_create( -+ hvirt, &enable_map, &model_data->hvirt_cli); -+ kbase_hwcnt_enable_map_free(&enable_map); -+ if (errcode) { -+ dev_err(kbdev->dev, "Failed to register IPA with virtualizer"); -+ model_data->hvirt_cli = NULL; -+ return errcode; -+ } ++ /* Disable the context to ensure the accumulator is inaccesible while ++ * we're destroying it. This performs the corresponding disable count ++ * increment to the decrement done during acquisition. ++ */ ++ kbasep_hwcnt_context_disable(hctx, false); + -+ errcode = kbase_hwcnt_dump_buffer_alloc( -+ metadata, &model_data->dump_buf); -+ if (errcode) { -+ dev_err(kbdev->dev, "Failed to allocate IPA dump buffer"); -+ kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli); -+ model_data->hvirt_cli = NULL; -+ return errcode; -+ } ++ mutex_unlock(&hctx->accum_lock); + -+ return 0; ++ kbasep_hwcnt_accumulator_term(hctx); ++ ++ mutex_lock(&hctx->accum_lock); ++ spin_lock_irqsave(&hctx->state_lock, flags); ++ ++ hctx->accum_inited = false; ++ ++ spin_unlock_irqrestore(&hctx->state_lock, flags); ++ mutex_unlock(&hctx->accum_lock); +} + -+void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) ++void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx) +{ -+ if (model_data->hvirt_cli) { -+ kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli); -+ kbase_hwcnt_dump_buffer_free(&model_data->dump_buf); -+ model_data->hvirt_cli = NULL; -+ } ++ if (WARN_ON(!hctx)) ++ return; ++ ++ /* Try and atomically disable first, so we can avoid locking the mutex ++ * if we don't need to. ++ */ ++ if (kbase_hwcnt_context_disable_atomic(hctx)) ++ return; ++ ++ mutex_lock(&hctx->accum_lock); ++ ++ kbasep_hwcnt_context_disable(hctx, true); ++ ++ mutex_unlock(&hctx->accum_lock); +} + -+int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) ++bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx) +{ -+ struct kbase_ipa_model_vinstr_data *model_data = -+ (struct kbase_ipa_model_vinstr_data *)model->model_data; -+ s64 energy = 0; -+ size_t i; -+ u64 coeff = 0, coeff_mul = 0; -+ u64 start_ts_ns, end_ts_ns; -+ u32 active_cycles; -+ int err = 0; ++ unsigned long flags; ++ bool atomic_disabled = false; + -+ err = kbase_hwcnt_virtualizer_client_dump(model_data->hvirt_cli, -+ &start_ts_ns, &end_ts_ns, &model_data->dump_buf); -+ if (err) -+ goto err0; ++ if (WARN_ON(!hctx)) ++ return false; + -+ /* Range: 0 (GPU not used at all), to the max sampling interval, say -+ * 1s, * max GPU frequency (GPU 100% utilized). -+ * 0 <= active_cycles <= 1 * ~2GHz -+ * 0 <= active_cycles < 2^31 -+ */ -+ active_cycles = model_data->get_active_cycles(model_data); ++ spin_lock_irqsave(&hctx->state_lock, flags); + -+ if (active_cycles < (u32) max(model_data->min_sample_cycles, 0)) { -+ err = -ENODATA; -+ goto err0; ++ if (!WARN_ON(hctx->disable_count == SIZE_MAX)) { ++ /* ++ * If disable count is non-zero, we can just bump the disable ++ * count. ++ * ++ * Otherwise, we can't disable in an atomic context. ++ */ ++ if (hctx->disable_count != 0) { ++ hctx->disable_count++; ++ atomic_disabled = true; ++ } + } + -+ /* Range: 1 <= active_cycles < 2^31 */ -+ active_cycles = max(1u, active_cycles); ++ spin_unlock_irqrestore(&hctx->state_lock, flags); + -+ /* Range of 'energy' is +/- 2^54 * number of IPA groups (~8), so around -+ * -2^57 < energy < 2^57 -+ */ -+ for (i = 0; i < model_data->groups_def_num; i++) { -+ const struct kbase_ipa_group *group = &model_data->groups_def[i]; -+ s32 coeff = model_data->group_values[i]; -+ s64 group_energy = group->op(model_data, coeff, -+ group->counter_block_offset); ++ return atomic_disabled; ++} + -+ energy = kbase_ipa_add_saturate(energy, group_energy); -+ } ++void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx) ++{ ++ unsigned long flags; + -+ /* Range: 0 <= coeff < 2^57 */ -+ if (energy > 0) -+ coeff = energy; ++ if (WARN_ON(!hctx)) ++ return; + -+ /* Range: 0 <= coeff < 2^57 (because active_cycles >= 1). However, this -+ * can be constrained further: Counter values can only be increased by -+ * a theoretical maximum of about 64k per clock cycle. Beyond this, -+ * we'd have to sample every 1ms to avoid them overflowing at the -+ * lowest clock frequency (say 100MHz). Therefore, we can write the -+ * range of 'coeff' in terms of active_cycles: -+ * -+ * coeff = SUM(coeffN * counterN * num_cores_for_counterN) -+ * coeff <= SUM(coeffN * counterN) * max_num_cores -+ * coeff <= num_IPA_groups * max_coeff * max_counter * max_num_cores -+ * (substitute max_counter = 2^16 * active_cycles) -+ * coeff <= num_IPA_groups * max_coeff * 2^16 * active_cycles * max_num_cores -+ * coeff <= 2^3 * 2^22 * 2^16 * active_cycles * 2^5 -+ * coeff <= 2^46 * active_cycles -+ * -+ * So after the division: 0 <= coeff <= 2^46 -+ */ -+ coeff = div_u64(coeff, active_cycles); ++ spin_lock_irqsave(&hctx->state_lock, flags); + -+ /* Not all models were derived at the same reference voltage. Voltage -+ * scaling is done by multiplying by V^2, so we need to *divide* by -+ * Vref^2 here. -+ * Range: 0 <= coeff <= 2^49 -+ */ -+ coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); -+ /* Range: 0 <= coeff <= 2^52 */ -+ coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); ++ if (!WARN_ON(hctx->disable_count == 0)) { ++ if (hctx->disable_count == 1) ++ kbasep_hwcnt_accumulator_enable(hctx); + -+ /* Scale by user-specified integer factor. -+ * Range: 0 <= coeff_mul < 2^57 -+ */ -+ coeff_mul = coeff * model_data->scaling_factor; ++ hctx->disable_count--; ++ } + -+ /* The power models have results with units -+ * mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this -+ * becomes fW/(Hz V^2), which are the units of coeff_mul. However, -+ * kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide -+ * by 1000. -+ * Range: 0 <= coeff_mul < 2^47 -+ */ -+ coeff_mul = div_u64(coeff_mul, 1000u); ++ spin_unlock_irqrestore(&hctx->state_lock, flags); ++} + -+err0: -+ /* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */ -+ *coeffp = clamp(coeff_mul, (u64) 0, (u64) 1 << 16); -+ return err; ++const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(struct kbase_hwcnt_context *hctx) ++{ ++ if (!hctx) ++ return NULL; ++ ++ return hctx->iface->metadata(hctx->iface->info); +} + -+void kbase_ipa_vinstr_reset_data(struct kbase_ipa_model *model) ++bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, struct work_struct *work) +{ -+ /* Currently not implemented */ -+ WARN_ON_ONCE(1); ++ if (WARN_ON(!hctx) || WARN_ON(!work)) ++ return false; ++ ++ return queue_work(hctx->wq, work); +} + -+int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model, -+ const struct kbase_ipa_group *ipa_groups_def, -+ size_t ipa_group_size, -+ kbase_ipa_get_active_cycles_callback get_active_cycles, -+ s32 reference_voltage) ++int kbase_hwcnt_accumulator_set_counters(struct kbase_hwcnt_accumulator *accum, ++ const struct kbase_hwcnt_enable_map *new_map, ++ u64 *ts_start_ns, u64 *ts_end_ns, ++ struct kbase_hwcnt_dump_buffer *dump_buf) +{ -+ int err = 0; -+ size_t i; -+ struct kbase_ipa_model_vinstr_data *model_data; ++ int errcode; ++ struct kbase_hwcnt_context *hctx; + -+ if (!model || !ipa_groups_def || !ipa_group_size || !get_active_cycles) ++ if (!accum || !new_map || !ts_start_ns || !ts_end_ns) + return -EINVAL; + -+ model_data = kzalloc(sizeof(*model_data), GFP_KERNEL); -+ if (!model_data) -+ return -ENOMEM; ++ hctx = container_of(accum, struct kbase_hwcnt_context, accum); + -+ model_data->kbdev = model->kbdev; -+ model_data->groups_def = ipa_groups_def; -+ model_data->groups_def_num = ipa_group_size; -+ model_data->get_active_cycles = get_active_cycles; ++ if ((new_map->metadata != hctx->accum.metadata) || ++ (dump_buf && (dump_buf->metadata != hctx->accum.metadata))) ++ return -EINVAL; + -+ model->model_data = (void *) model_data; ++ mutex_lock(&hctx->accum_lock); + -+ for (i = 0; i < model_data->groups_def_num; ++i) { -+ const struct kbase_ipa_group *group = &model_data->groups_def[i]; ++ errcode = kbasep_hwcnt_accumulator_dump(hctx, ts_start_ns, ts_end_ns, dump_buf, new_map); + -+ model_data->group_values[i] = group->default_value; -+ err = kbase_ipa_model_add_param_s32(model, group->name, -+ &model_data->group_values[i], -+ 1, false); -+ if (err) -+ goto exit; -+ } ++ mutex_unlock(&hctx->accum_lock); + -+ model_data->scaling_factor = DEFAULT_SCALING_FACTOR; -+ err = kbase_ipa_model_add_param_s32(model, "scale", -+ &model_data->scaling_factor, -+ 1, false); -+ if (err) -+ goto exit; ++ return errcode; ++} + -+ model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES; -+ err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles", -+ &model_data->min_sample_cycles, -+ 1, false); -+ if (err) -+ goto exit; ++int kbase_hwcnt_accumulator_dump(struct kbase_hwcnt_accumulator *accum, u64 *ts_start_ns, ++ u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf) ++{ ++ int errcode; ++ struct kbase_hwcnt_context *hctx; + -+ model_data->reference_voltage = reference_voltage; -+ err = kbase_ipa_model_add_param_s32(model, "reference_voltage", -+ &model_data->reference_voltage, -+ 1, false); -+ if (err) -+ goto exit; ++ if (!accum || !ts_start_ns || !ts_end_ns) ++ return -EINVAL; + -+ err = kbase_ipa_attach_vinstr(model_data); ++ hctx = container_of(accum, struct kbase_hwcnt_context, accum); + -+exit: -+ if (err) { -+ kbase_ipa_model_param_free_all(model); -+ kfree(model_data); -+ } -+ return err; ++ if (dump_buf && (dump_buf->metadata != hctx->accum.metadata)) ++ return -EINVAL; ++ ++ mutex_lock(&hctx->accum_lock); ++ ++ errcode = kbasep_hwcnt_accumulator_dump(hctx, ts_start_ns, ts_end_ns, dump_buf, NULL); ++ ++ mutex_unlock(&hctx->accum_lock); ++ ++ return errcode; +} + -+void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model) ++u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum) +{ -+ struct kbase_ipa_model_vinstr_data *model_data = -+ (struct kbase_ipa_model_vinstr_data *)model->model_data; ++ struct kbase_hwcnt_context *hctx; + -+ kbase_ipa_detach_vinstr(model_data); -+ kfree(model_data); ++ if (WARN_ON(!accum)) ++ return 0; ++ ++ hctx = container_of(accum, struct kbase_hwcnt_context, accum); ++ return hctx->iface->timestamp_ns(accum->backend); +} -diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.h b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.h +diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_accumulator.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_accumulator.h new file mode 100644 -index 000000000..608961084 +index 000000000..069e02068 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.h -@@ -0,0 +1,234 @@ ++++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_accumulator.h +@@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2017-2018, 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -194672,229 +197712,288 @@ index 000000000..608961084 + * + */ + -+#ifndef _KBASE_IPA_COUNTER_COMMON_JM_H_ -+#define _KBASE_IPA_COUNTER_COMMON_JM_H_ -+ -+#include "mali_kbase.h" -+#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" -+#include "hwcnt/mali_kbase_hwcnt_types.h" -+ -+/* Maximum number of IPA groups for an IPA model. */ -+#define KBASE_IPA_MAX_GROUP_DEF_NUM 16 -+ -+/* Number of bytes per hardware counter in a vinstr_buffer. */ -+#define KBASE_IPA_NR_BYTES_PER_CNT (sizeof(u64)) -+ -+/* Number of hardware counters per block in a vinstr_buffer. */ -+#define KBASE_IPA_NR_CNT_PER_BLOCK 64 ++/* ++ * Hardware counter accumulator API. ++ */ + -+/* Number of bytes per block in a vinstr_buffer. */ -+#define KBASE_IPA_NR_BYTES_PER_BLOCK \ -+ (KBASE_IPA_NR_CNT_PER_BLOCK * KBASE_IPA_NR_BYTES_PER_CNT) ++#ifndef _KBASE_HWCNT_ACCUMULATOR_H_ ++#define _KBASE_HWCNT_ACCUMULATOR_H_ + -+struct kbase_ipa_model_vinstr_data; ++#include + -+typedef u32 -+kbase_ipa_get_active_cycles_callback(struct kbase_ipa_model_vinstr_data *); ++struct kbase_hwcnt_context; ++struct kbase_hwcnt_accumulator; ++struct kbase_hwcnt_enable_map; ++struct kbase_hwcnt_dump_buffer; + +/** -+ * struct kbase_ipa_model_vinstr_data - IPA context per device -+ * @kbdev: pointer to kbase device -+ * @group_values: values of coefficients for IPA groups -+ * @groups_def: Array of IPA groups. -+ * @groups_def_num: Number of elements in the array of IPA groups. -+ * @get_active_cycles: Callback to return number of active cycles during -+ * counter sample period -+ * @hvirt_cli: hardware counter virtualizer client handle -+ * @dump_buf: buffer to dump hardware counters onto -+ * @reference_voltage: voltage, in mV, of the operating point used when -+ * deriving the power model coefficients. Range approx -+ * 0.1V - 5V (~= 8V): 2^7 <= reference_voltage <= 2^13 -+ * @scaling_factor: User-specified power scaling factor. This is an -+ * integer, which is multiplied by the power coefficient -+ * just before OPP scaling. -+ * Range approx 0-32: 0 < scaling_factor < 2^5 -+ * @min_sample_cycles: If the value of the GPU_ACTIVE counter (the number of -+ * cycles the GPU was working) is less than -+ * min_sample_cycles, the counter model will return an -+ * error, causing the IPA framework to approximate using -+ * the cached simple model results instead. This may be -+ * more accurate than extrapolating using a very small -+ * counter dump. ++ * kbase_hwcnt_accumulator_acquire() - Acquire the hardware counter accumulator ++ * for a hardware counter context. ++ * @hctx: Non-NULL pointer to a hardware counter context. ++ * @accum: Non-NULL pointer to where the pointer to the created accumulator ++ * will be stored on success. ++ * ++ * There can exist at most one instance of the hardware counter accumulator per ++ * context at a time. ++ * ++ * If multiple clients need access to the hardware counters at the same time, ++ * then an abstraction built on top of the single instance to the hardware ++ * counter accumulator is required. ++ * ++ * No counters will be enabled with the returned accumulator. A subsequent call ++ * to kbase_hwcnt_accumulator_set_counters must be used to turn them on. ++ * ++ * There are four components to a hardware counter dump: ++ * - A set of enabled counters ++ * - A start time ++ * - An end time ++ * - A dump buffer containing the accumulated counter values for all enabled ++ * counters between the start and end times. ++ * ++ * For each dump, it is guaranteed that all enabled counters were active for the ++ * entirety of the period between the start and end times. ++ * ++ * It is also guaranteed that the start time of dump "n" is always equal to the ++ * end time of dump "n - 1". ++ * ++ * For all dumps, the values of any counters that were not enabled is undefined. ++ * ++ * Return: 0 on success or error code. + */ -+struct kbase_ipa_model_vinstr_data { -+ struct kbase_device *kbdev; -+ s32 group_values[KBASE_IPA_MAX_GROUP_DEF_NUM]; -+ const struct kbase_ipa_group *groups_def; -+ size_t groups_def_num; -+ kbase_ipa_get_active_cycles_callback *get_active_cycles; -+ struct kbase_hwcnt_virtualizer_client *hvirt_cli; -+ struct kbase_hwcnt_dump_buffer dump_buf; -+ s32 reference_voltage; -+ s32 scaling_factor; -+ s32 min_sample_cycles; -+}; ++int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx, ++ struct kbase_hwcnt_accumulator **accum); + +/** -+ * struct kbase_ipa_group - represents a single IPA group -+ * @name: name of the IPA group -+ * @default_value: default value of coefficient for IPA group. -+ * Coefficients are interpreted as fractions where the -+ * denominator is 1000000. -+ * @op: which operation to be performed on the counter values -+ * @counter_block_offset: block offset in bytes of the counter used to calculate energy for IPA group ++ * kbase_hwcnt_accumulator_release() - Release a hardware counter accumulator. ++ * @accum: Non-NULL pointer to the hardware counter accumulator. ++ * ++ * The accumulator must be released before the context the accumulator was ++ * created from is terminated. + */ -+struct kbase_ipa_group { -+ const char *name; -+ s32 default_value; -+ s64 (*op)( -+ struct kbase_ipa_model_vinstr_data *model_data, -+ s32 coeff, -+ u32 counter_block_offset); -+ u32 counter_block_offset; -+}; ++void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum); + +/** -+ * kbase_ipa_sum_all_shader_cores() - sum a counter over all cores -+ * @model_data: pointer to model data -+ * @coeff: model coefficient. Unity is ~2^20, so range approx -+ * +/- 4.0: -2^22 < coeff < 2^22 -+ * @counter: offset in bytes of the counter used to calculate energy -+ * for IPA group ++ * kbase_hwcnt_accumulator_set_counters() - Perform a dump of the currently ++ * enabled counters, and enable a new ++ * set of counters that will be used ++ * for subsequent dumps. ++ * @accum: Non-NULL pointer to the hardware counter accumulator. ++ * @new_map: Non-NULL pointer to the new counter enable map. Must have the ++ * same metadata as the accumulator. ++ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will ++ * be written out to on success. ++ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will ++ * be written out to on success. ++ * @dump_buf: Pointer to the buffer where the dump will be written out to on ++ * success. If non-NULL, must have the same metadata as the ++ * accumulator. If NULL, the dump will be discarded. + * -+ * Calculate energy estimation based on hardware counter `counter' -+ * across all shader cores. ++ * If this function fails for some unexpected reason (i.e. anything other than ++ * invalid args), then the accumulator will be put into the error state until ++ * the parent context is next disabled. + * -+ * Return: Sum of counter values. Range: -2^54 < ret < 2^54 ++ * Return: 0 on success or error code. + */ -+s64 kbase_ipa_sum_all_shader_cores( -+ struct kbase_ipa_model_vinstr_data *model_data, -+ s32 coeff, u32 counter); ++int kbase_hwcnt_accumulator_set_counters(struct kbase_hwcnt_accumulator *accum, ++ const struct kbase_hwcnt_enable_map *new_map, ++ u64 *ts_start_ns, u64 *ts_end_ns, ++ struct kbase_hwcnt_dump_buffer *dump_buf); + +/** -+ * kbase_ipa_sum_all_memsys_blocks() - sum a counter over all mem system blocks -+ * @model_data: pointer to model data -+ * @coeff: model coefficient. Unity is ~2^20, so range approx -+ * +/- 4.0: -2^22 < coeff < 2^22 -+ * @counter: offset in bytes of the counter used to calculate energy -+ * for IPA group ++ * kbase_hwcnt_accumulator_dump() - Perform a dump of the currently enabled ++ * counters. ++ * @accum: Non-NULL pointer to the hardware counter accumulator. ++ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will ++ * be written out to on success. ++ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will ++ * be written out to on success. ++ * @dump_buf: Pointer to the buffer where the dump will be written out to on ++ * success. If non-NULL, must have the same metadata as the ++ * accumulator. If NULL, the dump will be discarded. + * -+ * Calculate energy estimation based on hardware counter `counter' across all -+ * memory system blocks. ++ * If this function fails for some unexpected reason (i.e. anything other than ++ * invalid args), then the accumulator will be put into the error state until ++ * the parent context is next disabled. + * -+ * Return: Sum of counter values. Range: -2^51 < ret < 2^51 ++ * Return: 0 on success or error code. + */ -+s64 kbase_ipa_sum_all_memsys_blocks( -+ struct kbase_ipa_model_vinstr_data *model_data, -+ s32 coeff, u32 counter); ++int kbase_hwcnt_accumulator_dump(struct kbase_hwcnt_accumulator *accum, u64 *ts_start_ns, ++ u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf); + +/** -+ * kbase_ipa_single_counter() - sum a single counter -+ * @model_data: pointer to model data -+ * @coeff: model coefficient. Unity is ~2^20, so range approx -+ * +/- 4.0: -2^22 < coeff < 2^22 -+ * @counter: offset in bytes of the counter used to calculate energy -+ * for IPA group ++ * kbase_hwcnt_accumulator_timestamp_ns() - Get the current accumulator backend ++ * timestamp. ++ * @accum: Non-NULL pointer to the hardware counter accumulator. + * -+ * Calculate energy estimation based on hardware counter `counter'. ++ * Return: Accumulator backend timestamp in nanoseconds. ++ */ ++u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum); ++ ++#endif /* _KBASE_HWCNT_ACCUMULATOR_H_ */ +diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_context.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_context.h +new file mode 100644 +index 000000000..89732a908 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_context.h +@@ -0,0 +1,148 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: Counter value. Range: -2^49 < ret < 2^49 + */ -+s64 kbase_ipa_single_counter( -+ struct kbase_ipa_model_vinstr_data *model_data, -+ s32 coeff, u32 counter); ++ ++/* ++ * Hardware counter context API. ++ */ ++ ++#ifndef _KBASE_HWCNT_CONTEXT_H_ ++#define _KBASE_HWCNT_CONTEXT_H_ ++ ++#include ++#include ++ ++struct kbase_hwcnt_backend_interface; ++struct kbase_hwcnt_context; + +/** -+ * kbase_ipa_attach_vinstr() - attach a vinstr_buffer to an IPA model. -+ * @model_data: pointer to model data ++ * kbase_hwcnt_context_init() - Initialise a hardware counter context. ++ * @iface: Non-NULL pointer to a hardware counter backend interface. ++ * @out_hctx: Non-NULL pointer to where the pointer to the created context will ++ * be stored on success. + * -+ * Attach a vinstr_buffer to an IPA model. The vinstr_buffer -+ * allows access to the hardware counters used to calculate -+ * energy consumption. ++ * On creation, the disable count of the context will be 0. ++ * A hardware counter accumulator can be acquired using a created context. + * -+ * Return: 0 on success, or an error code. ++ * Return: 0 on success, else error code. + */ -+int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data); ++int kbase_hwcnt_context_init(const struct kbase_hwcnt_backend_interface *iface, ++ struct kbase_hwcnt_context **out_hctx); + +/** -+ * kbase_ipa_detach_vinstr() - detach a vinstr_buffer from an IPA model. -+ * @model_data: pointer to model data ++ * kbase_hwcnt_context_term() - Terminate a hardware counter context. ++ * @hctx: Pointer to context to be terminated. ++ */ ++void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx); ++ ++/** ++ * kbase_hwcnt_context_metadata() - Get the hardware counter metadata used by ++ * the context, so related counter data ++ * structures can be created. ++ * @hctx: Non-NULL pointer to the hardware counter context. + * -+ * Detach a vinstr_buffer from an IPA model. ++ * Return: Non-NULL pointer to metadata, or NULL on error. + */ -+void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data); ++const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(struct kbase_hwcnt_context *hctx); + +/** -+ * kbase_ipa_vinstr_dynamic_coeff() - calculate dynamic power based on HW counters -+ * @model: pointer to instantiated model -+ * @coeffp: pointer to location where calculated power, in -+ * pW/(Hz V^2), is stored. ++ * kbase_hwcnt_context_disable() - Increment the disable count of the context. ++ * @hctx: Non-NULL pointer to the hardware counter context. + * -+ * This is a GPU-agnostic implementation of the get_dynamic_coeff() -+ * function of an IPA model. It relies on the model being populated -+ * with GPU-specific attributes at initialization time. ++ * If a call to this function increments the disable count from 0 to 1, and ++ * an accumulator has been acquired, then a counter dump will be performed ++ * before counters are disabled via the backend interface. + * -+ * Return: 0 on success, or an error code. ++ * Subsequent dumps via the accumulator while counters are disabled will first ++ * return the accumulated dump, then will return dumps with zeroed counters. ++ * ++ * After this function call returns, it is guaranteed that counters will not be ++ * enabled via the backend interface. + */ -+int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp); ++void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx); + +/** -+ * kbase_ipa_vinstr_reset_data() - Reset the counters data used for dynamic -+ * power estimation -+ * @model: pointer to instantiated model ++ * kbase_hwcnt_context_disable_atomic() - Increment the disable count of the ++ * context if possible in an atomic ++ * context. ++ * @hctx: Non-NULL pointer to the hardware counter context. + * -+ * Currently it is not implemented for JM GPUs. -+ * When implemented it is expected to retrieve the accumulated value of HW -+ * counters from the Vinstr component, without doing any processing, which is -+ * effectively a reset as the next call to kbase_ipa_counter_dynamic_coeff() -+ * will see the increment in counter values from this point onwards. ++ * This function will only succeed if hardware counters are effectively already ++ * disabled, i.e. there is no accumulator, the disable count is already ++ * non-zero, or the accumulator has no counters set. ++ * ++ * After this function call returns true, it is guaranteed that counters will ++ * not be enabled via the backend interface. ++ * ++ * Return: True if the disable count was incremented, else False. + */ -+void kbase_ipa_vinstr_reset_data(struct kbase_ipa_model *model); ++bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx); + +/** -+ * kbase_ipa_vinstr_common_model_init() - initialize ipa power model -+ * @model: ipa power model to initialize -+ * @ipa_groups_def: array of ipa groups which sets coefficients for -+ * the corresponding counters used in the ipa model -+ * @ipa_group_size: number of elements in the array @ipa_groups_def -+ * @get_active_cycles: callback to return the number of cycles the GPU was -+ * active during the counter sample period. -+ * @reference_voltage: voltage, in mV, of the operating point used when -+ * deriving the power model coefficients. ++ * kbase_hwcnt_context_enable() - Decrement the disable count of the context. ++ * @hctx: Non-NULL pointer to the hardware counter context. + * -+ * This initialization function performs initialization steps common -+ * for ipa models based on counter values. In each call, the model -+ * passes its specific coefficient values per ipa counter group via -+ * @ipa_groups_def array. ++ * If a call to this function decrements the disable count from 1 to 0, and ++ * an accumulator has been acquired, then counters will be re-enabled via the ++ * backend interface. + * -+ * Return: 0 on success, error code otherwise ++ * If an accumulator has been acquired and enabling counters fails for some ++ * reason, the accumulator will be placed into an error state. ++ * ++ * It is only valid to call this function one time for each prior returned call ++ * to kbase_hwcnt_context_disable. ++ * ++ * The spinlock documented in the backend interface that was passed in to ++ * kbase_hwcnt_context_init() must be held before calling this function. + */ -+int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model, -+ const struct kbase_ipa_group *ipa_groups_def, -+ size_t ipa_group_size, -+ kbase_ipa_get_active_cycles_callback *get_active_cycles, -+ s32 reference_voltage); ++void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx); + +/** -+ * kbase_ipa_vinstr_common_model_term() - terminate ipa power model -+ * @model: ipa power model to terminate ++ * kbase_hwcnt_context_queue_work() - Queue hardware counter related async ++ * work on a workqueue specialized for ++ * hardware counters. ++ * @hctx: Non-NULL pointer to the hardware counter context. ++ * @work: Non-NULL pointer to work to queue. + * -+ * This function performs all necessary steps to terminate ipa power model -+ * including clean up of resources allocated to hold model data. ++ * Return: false if work was already on a queue, true otherwise. ++ * ++ * Performance counter related work is high priority, short running, and ++ * generally CPU locality is unimportant. There is no standard workqueue that ++ * can service this flavor of work. ++ * ++ * Rather than have each user of counters define their own workqueue, we have ++ * a centralized one in here that anybody using this hardware counter API ++ * should use. ++ * ++ * Before the context is destroyed, all work submitted must have been completed. ++ * Given that the work enqueued via this function is likely to be hardware ++ * counter related and will therefore use the context object, this is likely ++ * to be behavior that will occur naturally. ++ * ++ * Historical note: prior to this centralized workqueue, the system_highpri_wq ++ * was used. This was generally fine, except when a particularly long running, ++ * higher priority thread ended up scheduled on the enqueuing CPU core. Given ++ * that hardware counters requires tight integration with power management, ++ * this meant progress through the power management states could be stalled ++ * for however long that higher priority thread took. + */ -+void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model); ++bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, struct work_struct *work); + -+#endif /* _KBASE_IPA_COUNTER_COMMON_JM_H_ */ -diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c ++#endif /* _KBASE_HWCNT_CONTEXT_H_ */ +diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c new file mode 100644 -index 000000000..21b4e5288 +index 000000000..74916dab0 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c -@@ -0,0 +1,280 @@ ++++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c +@@ -0,0 +1,738 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -194912,275 +198011,733 @@ index 000000000..21b4e5288 + * + */ + -+#include "mali_kbase_ipa_counter_common_csf.h" -+#include "mali_kbase.h" -+ -+/* MEMSYS counter block offsets */ -+#define L2_RD_MSG_IN_CU (13) -+#define L2_RD_MSG_IN (16) -+#define L2_WR_MSG_IN (18) -+#define L2_SNP_MSG_IN (20) -+#define L2_RD_MSG_OUT (22) -+#define L2_READ_LOOKUP (26) -+#define L2_EXT_READ_NOSNP (30) -+#define L2_EXT_WRITE_NOSNP_FULL (43) ++#include "hwcnt/mali_kbase_hwcnt_gpu.h" ++#include "hwcnt/mali_kbase_hwcnt_types.h" + -+/* SC counter block offsets */ -+#define FRAG_STARVING (8) -+#define FRAG_PARTIAL_QUADS_RAST (10) -+#define FRAG_QUADS_EZS_UPDATE (13) -+#define FULL_QUAD_WARPS (21) -+#define EXEC_INSTR_FMA (27) -+#define EXEC_INSTR_CVT (28) -+#define EXEC_INSTR_SFU (29) -+#define EXEC_INSTR_MSG (30) -+#define TEX_FILT_NUM_OPS (39) -+#define LS_MEM_READ_SHORT (45) -+#define LS_MEM_WRITE_SHORT (47) -+#define VARY_SLOT_16 (51) -+#define BEATS_RD_LSC_EXT (57) -+#define BEATS_RD_TEX (58) -+#define BEATS_RD_TEX_EXT (59) -+#define FRAG_QUADS_COARSE (68) ++#include + -+/* Tiler counter block offsets */ -+#define IDVS_POS_SHAD_STALL (23) -+#define PREFETCH_STALL (25) -+#define VFETCH_POS_READ_WAIT (29) -+#define VFETCH_VERTEX_WAIT (30) -+#define PRIMASSY_STALL (32) -+#define IDVS_VAR_SHAD_STALL (38) -+#define ITER_STALL (40) -+#define PMGR_PTR_RD_STALL (48) ++/** enum enable_map_idx - index into a block enable map that spans multiple u64 array elements ++ */ ++enum enable_map_idx { ++ EM_LO, ++ EM_HI, ++ EM_COUNT, ++}; + -+#define COUNTER_DEF(cnt_name, coeff, cnt_idx, block_type) \ -+ { \ -+ .name = cnt_name, \ -+ .coeff_default_value = coeff, \ -+ .counter_block_offset = cnt_idx, \ -+ .counter_block_type = block_type, \ ++static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf) ++{ ++ switch (counter_set) { ++ case KBASE_HWCNT_SET_PRIMARY: ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE; ++ break; ++ case KBASE_HWCNT_SET_SECONDARY: ++ if (is_csf) ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2; ++ else ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED; ++ break; ++ case KBASE_HWCNT_SET_TERTIARY: ++ if (is_csf) ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3; ++ else ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED; ++ break; ++ default: ++ WARN_ON(true); + } ++} + -+#define MEMSYS_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ -+ COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_MEMSYS) ++static void kbasep_get_tiler_block_type(u64 *dst, enum kbase_hwcnt_set counter_set) ++{ ++ switch (counter_set) { ++ case KBASE_HWCNT_SET_PRIMARY: ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER; ++ break; ++ case KBASE_HWCNT_SET_SECONDARY: ++ case KBASE_HWCNT_SET_TERTIARY: ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED; ++ break; ++ default: ++ WARN_ON(true); ++ } ++} + -+#define SC_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ -+ COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_SHADER) ++static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf) ++{ ++ switch (counter_set) { ++ case KBASE_HWCNT_SET_PRIMARY: ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC; ++ break; ++ case KBASE_HWCNT_SET_SECONDARY: ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2; ++ break; ++ case KBASE_HWCNT_SET_TERTIARY: ++ if (is_csf) ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3; ++ else ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED; ++ break; ++ default: ++ WARN_ON(true); ++ } ++} + -+#define TILER_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ -+ COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_TILER) ++static void kbasep_get_memsys_block_type(u64 *dst, enum kbase_hwcnt_set counter_set) ++{ ++ switch (counter_set) { ++ case KBASE_HWCNT_SET_PRIMARY: ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS; ++ break; ++ case KBASE_HWCNT_SET_SECONDARY: ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2; ++ break; ++ case KBASE_HWCNT_SET_TERTIARY: ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED; ++ break; ++ default: ++ WARN_ON(true); ++ } ++} + -+/* Tables of description of HW counters used by IPA counter model. ++/** ++ * kbasep_hwcnt_backend_gpu_metadata_create() - Create hardware counter metadata ++ * for the GPU. ++ * @gpu_info: Non-NULL pointer to hwcnt info for current GPU. ++ * @is_csf: true for CSF GPU, otherwise false. ++ * @counter_set: The performance counter set to use. ++ * @metadata: Non-NULL pointer to where created metadata is stored ++ * on success. + * -+ * These tables provide a description of each performance counter -+ * used by the top level counter model for energy estimation. ++ * Return: 0 on success, else error code. + */ -+static const struct kbase_ipa_counter ipa_top_level_cntrs_def_todx[] = { -+ MEMSYS_COUNTER_DEF("l2_rd_msg_in", 295631, L2_RD_MSG_IN), -+ MEMSYS_COUNTER_DEF("l2_ext_write_nosnp_ull", 325168, L2_EXT_WRITE_NOSNP_FULL), -+ -+ TILER_COUNTER_DEF("prefetch_stall", 145435, PREFETCH_STALL), -+ TILER_COUNTER_DEF("idvs_var_shad_stall", -171917, IDVS_VAR_SHAD_STALL), -+ TILER_COUNTER_DEF("idvs_pos_shad_stall", 109980, IDVS_POS_SHAD_STALL), -+ TILER_COUNTER_DEF("vfetch_pos_read_wait", -119118, VFETCH_POS_READ_WAIT), -+}; ++static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, ++ const bool is_csf, ++ enum kbase_hwcnt_set counter_set, ++ const struct kbase_hwcnt_metadata **metadata) ++{ ++ struct kbase_hwcnt_description desc; ++ struct kbase_hwcnt_group_description group; ++ struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; ++ size_t non_sc_block_count; ++ size_t sc_block_count; + -+static const struct kbase_ipa_counter ipa_top_level_cntrs_def_tgrx[] = { -+ MEMSYS_COUNTER_DEF("l2_rd_msg_in", 295631, L2_RD_MSG_IN), -+ MEMSYS_COUNTER_DEF("l2_ext_write_nosnp_ull", 325168, L2_EXT_WRITE_NOSNP_FULL), ++ WARN_ON(!gpu_info); ++ WARN_ON(!metadata); + -+ TILER_COUNTER_DEF("prefetch_stall", 145435, PREFETCH_STALL), -+ TILER_COUNTER_DEF("idvs_var_shad_stall", -171917, IDVS_VAR_SHAD_STALL), -+ TILER_COUNTER_DEF("idvs_pos_shad_stall", 109980, IDVS_POS_SHAD_STALL), -+ TILER_COUNTER_DEF("vfetch_pos_read_wait", -119118, VFETCH_POS_READ_WAIT), -+}; ++ /* Calculate number of block instances that aren't shader cores */ ++ non_sc_block_count = 2 + gpu_info->l2_count; ++ /* Calculate number of block instances that are shader cores */ ++ sc_block_count = fls64(gpu_info->core_mask); + -+static const struct kbase_ipa_counter ipa_top_level_cntrs_def_tvax[] = { -+ MEMSYS_COUNTER_DEF("l2_rd_msg_out", 491414, L2_RD_MSG_OUT), -+ MEMSYS_COUNTER_DEF("l2_wr_msg_in", 408645, L2_WR_MSG_IN), ++ /* ++ * A system can have up to 64 shader cores, but the 64-bit ++ * availability mask can't physically represent that many cores as well ++ * as the other hardware blocks. ++ * Error out if there are more blocks than our implementation can ++ * support. ++ */ ++ if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS) ++ return -EINVAL; + -+ TILER_COUNTER_DEF("iter_stall", 893324, ITER_STALL), -+ TILER_COUNTER_DEF("pmgr_ptr_rd_stall", -975117, PMGR_PTR_RD_STALL), -+ TILER_COUNTER_DEF("idvs_pos_shad_stall", 22555, IDVS_POS_SHAD_STALL), -+}; ++ /* One Front End block */ ++ kbasep_get_fe_block_type(&blks[0].type, counter_set, is_csf); ++ blks[0].inst_cnt = 1; ++ blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; ++ blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + -+static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttux[] = { -+ MEMSYS_COUNTER_DEF("l2_rd_msg_in", 800836, L2_RD_MSG_IN), -+ MEMSYS_COUNTER_DEF("l2_wr_msg_in", 415579, L2_WR_MSG_IN), -+ MEMSYS_COUNTER_DEF("l2_read_lookup", -198124, L2_READ_LOOKUP), ++ /* One Tiler block */ ++ kbasep_get_tiler_block_type(&blks[1].type, counter_set); ++ blks[1].inst_cnt = 1; ++ blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; ++ blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + -+ TILER_COUNTER_DEF("idvs_pos_shad_stall", 117358, IDVS_POS_SHAD_STALL), -+ TILER_COUNTER_DEF("vfetch_vertex_wait", -391964, VFETCH_VERTEX_WAIT), -+}; ++ /* l2_count memsys blks */ ++ kbasep_get_memsys_block_type(&blks[2].type, counter_set); ++ blks[2].inst_cnt = gpu_info->l2_count; ++ blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; ++ blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + -+static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttix[] = { -+ TILER_COUNTER_DEF("primassy_stall", 471953, PRIMASSY_STALL), -+ TILER_COUNTER_DEF("idvs_var_shad_stall", -460559, IDVS_VAR_SHAD_STALL), ++ /* ++ * There are as many shader cores in the system as there are bits set in ++ * the core mask. However, the dump buffer memory requirements need to ++ * take into account the fact that the core mask may be non-contiguous. ++ * ++ * For example, a system with a core mask of 0b1011 has the same dump ++ * buffer memory requirements as a system with 0b1111, but requires more ++ * memory than a system with 0b0111. However, core 2 of the system with ++ * 0b1011 doesn't physically exist, and the dump buffer memory that ++ * accounts for that core will never be written to when we do a counter ++ * dump. ++ * ++ * We find the core mask's last set bit to determine the memory ++ * requirements, and embed the core mask into the availability mask so ++ * we can determine later which shader cores physically exist. ++ */ ++ kbasep_get_sc_block_type(&blks[3].type, counter_set, is_csf); ++ blks[3].inst_cnt = sc_block_count; ++ blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; ++ blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + -+ MEMSYS_COUNTER_DEF("l2_rd_msg_in_cu", -6189604, L2_RD_MSG_IN_CU), -+ MEMSYS_COUNTER_DEF("l2_snp_msg_in", 6289609, L2_SNP_MSG_IN), -+ MEMSYS_COUNTER_DEF("l2_ext_read_nosnp", 512341, L2_EXT_READ_NOSNP), -+}; ++ WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4); + -+/* These tables provide a description of each performance counter -+ * used by the shader cores counter model for energy estimation. -+ */ -+static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_todx[] = { -+ SC_COUNTER_DEF("exec_instr_fma", 505449, EXEC_INSTR_FMA), -+ SC_COUNTER_DEF("tex_filt_num_operations", 574869, TEX_FILT_NUM_OPS), -+ SC_COUNTER_DEF("ls_mem_read_short", 60917, LS_MEM_READ_SHORT), -+ SC_COUNTER_DEF("frag_quads_ezs_update", 694555, FRAG_QUADS_EZS_UPDATE), -+ SC_COUNTER_DEF("ls_mem_write_short", 698290, LS_MEM_WRITE_SHORT), -+ SC_COUNTER_DEF("vary_slot_16", 181069, VARY_SLOT_16), -+}; ++ group.type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; ++ group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; ++ group.blks = blks; + -+static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_tgrx[] = { -+ SC_COUNTER_DEF("exec_instr_fma", 505449, EXEC_INSTR_FMA), -+ SC_COUNTER_DEF("tex_filt_num_operations", 574869, TEX_FILT_NUM_OPS), -+ SC_COUNTER_DEF("ls_mem_read_short", 60917, LS_MEM_READ_SHORT), -+ SC_COUNTER_DEF("frag_quads_ezs_update", 694555, FRAG_QUADS_EZS_UPDATE), -+ SC_COUNTER_DEF("ls_mem_write_short", 698290, LS_MEM_WRITE_SHORT), -+ SC_COUNTER_DEF("vary_slot_16", 181069, VARY_SLOT_16), -+}; ++ desc.grp_cnt = 1; ++ desc.grps = &group; ++ desc.clk_cnt = gpu_info->clk_cnt; + -+static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_tvax[] = { -+ SC_COUNTER_DEF("tex_filt_num_operations", 142536, TEX_FILT_NUM_OPS), -+ SC_COUNTER_DEF("exec_instr_fma", 243497, EXEC_INSTR_FMA), -+ SC_COUNTER_DEF("exec_instr_msg", 1344410, EXEC_INSTR_MSG), -+ SC_COUNTER_DEF("vary_slot_16", -119612, VARY_SLOT_16), -+ SC_COUNTER_DEF("frag_partial_quads_rast", 676201, FRAG_PARTIAL_QUADS_RAST), -+ SC_COUNTER_DEF("frag_starving", 62421, FRAG_STARVING), -+}; ++ /* The JM, Tiler, and L2s are always available, and are before cores */ ++ desc.avail_mask = (1ull << non_sc_block_count) - 1; ++ /* Embed the core mask directly in the availability mask */ ++ desc.avail_mask |= (gpu_info->core_mask << non_sc_block_count); + -+static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = { -+ SC_COUNTER_DEF("exec_instr_fma", 457012, EXEC_INSTR_FMA), -+ SC_COUNTER_DEF("tex_filt_num_operations", 441911, TEX_FILT_NUM_OPS), -+ SC_COUNTER_DEF("ls_mem_read_short", 322525, LS_MEM_READ_SHORT), -+ SC_COUNTER_DEF("full_quad_warps", 844124, FULL_QUAD_WARPS), -+ SC_COUNTER_DEF("exec_instr_cvt", 226411, EXEC_INSTR_CVT), -+ SC_COUNTER_DEF("frag_quads_ezs_update", 372032, FRAG_QUADS_EZS_UPDATE), -+}; ++ return kbase_hwcnt_metadata_create(&desc, metadata); ++} + -+static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttix[] = { -+ SC_COUNTER_DEF("exec_instr_fma", 192642, EXEC_INSTR_FMA), -+ SC_COUNTER_DEF("exec_instr_msg", 1326465, EXEC_INSTR_MSG), -+ SC_COUNTER_DEF("beats_rd_tex", 163518, BEATS_RD_TEX), -+ SC_COUNTER_DEF("beats_rd_lsc_ext", 127475, BEATS_RD_LSC_EXT), -+ SC_COUNTER_DEF("frag_quads_coarse", -36247, FRAG_QUADS_COARSE), -+ SC_COUNTER_DEF("ls_mem_write_short", 51547, LS_MEM_WRITE_SHORT), -+ SC_COUNTER_DEF("beats_rd_tex_ext", -43370, BEATS_RD_TEX_EXT), -+ SC_COUNTER_DEF("exec_instr_sfu", 31583, EXEC_INSTR_SFU), -+}; ++/** ++ * kbasep_hwcnt_backend_jm_dump_bytes() - Get the raw dump buffer size for the ++ * GPU. ++ * @gpu_info: Non-NULL pointer to hwcnt info for the GPU. ++ * ++ * Return: Size of buffer the GPU needs to perform a counter dump. ++ */ ++static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info) ++{ ++ WARN_ON(!gpu_info); + -+#define IPA_POWER_MODEL_OPS(gpu, init_token) \ -+ const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \ -+ .name = "mali-" #gpu "-power-model", \ -+ .init = kbase_ ## init_token ## _power_model_init, \ -+ .term = kbase_ipa_counter_common_model_term, \ -+ .get_dynamic_coeff = kbase_ipa_counter_dynamic_coeff, \ -+ .reset_counter_data = kbase_ipa_counter_reset_data, \ -+ }; \ -+ KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops) ++ return (2 + gpu_info->l2_count + fls64(gpu_info->core_mask)) * ++ gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES; ++} + -+#define STANDARD_POWER_MODEL(gpu, reference_voltage) \ -+ static int kbase_ ## gpu ## _power_model_init(\ -+ struct kbase_ipa_model *model) \ -+ { \ -+ BUILD_BUG_ON((1 + \ -+ ARRAY_SIZE(ipa_top_level_cntrs_def_ ## gpu) +\ -+ ARRAY_SIZE(ipa_shader_core_cntrs_def_ ## gpu)) > \ -+ KBASE_IPA_MAX_COUNTER_DEF_NUM); \ -+ return kbase_ipa_counter_common_model_init(model, \ -+ ipa_top_level_cntrs_def_ ## gpu, \ -+ ARRAY_SIZE(ipa_top_level_cntrs_def_ ## gpu), \ -+ ipa_shader_core_cntrs_def_ ## gpu, \ -+ ARRAY_SIZE(ipa_shader_core_cntrs_def_ ## gpu), \ -+ (reference_voltage)); \ -+ } \ -+ IPA_POWER_MODEL_OPS(gpu, gpu) ++int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, ++ enum kbase_hwcnt_set counter_set, ++ const struct kbase_hwcnt_metadata **out_metadata, ++ size_t *out_dump_bytes) ++{ ++ int errcode; ++ const struct kbase_hwcnt_metadata *metadata; ++ size_t dump_bytes; + ++ if (!gpu_info || !out_metadata || !out_dump_bytes) ++ return -EINVAL; + -+#define ALIAS_POWER_MODEL(gpu, as_gpu) \ -+ IPA_POWER_MODEL_OPS(gpu, as_gpu) ++ /* ++ * For architectures where a max_config interface is available ++ * from the arbiter, the v5 dump bytes and the metadata v5 are ++ * based on the maximum possible allocation of the HW in the ++ * GPU cause it needs to be prepared for the worst case where ++ * all the available L2 cache and Shader cores are allocated. ++ */ ++ dump_bytes = kbasep_hwcnt_backend_jm_dump_bytes(gpu_info); ++ errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, false, counter_set, &metadata); ++ if (errcode) ++ return errcode; + -+/* Reference voltage value is 750 mV. */ -+STANDARD_POWER_MODEL(todx, 750); -+STANDARD_POWER_MODEL(tgrx, 750); -+STANDARD_POWER_MODEL(tvax, 750); -+STANDARD_POWER_MODEL(ttux, 750); -+/* Reference voltage value is 550 mV. */ -+STANDARD_POWER_MODEL(ttix, 550); ++ /* ++ * The physical dump size should be half of dump abstraction size in ++ * metadata since physical HW uses 32-bit per value but metadata ++ * specifies 64-bit per value. ++ */ ++ WARN_ON(dump_bytes * 2 != metadata->dump_buf_bytes); + -+/* Assuming LODX is an alias of TODX for IPA */ -+ALIAS_POWER_MODEL(lodx, todx); ++ *out_metadata = metadata; ++ *out_dump_bytes = dump_bytes; + -+/* Assuming LTUX is an alias of TTUX for IPA */ -+ALIAS_POWER_MODEL(ltux, ttux); ++ return 0; ++} + -+/* Assuming LTUX is an alias of TTUX for IPA */ -+ALIAS_POWER_MODEL(ltix, ttix); ++void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) ++{ ++ if (!metadata) ++ return; + -+static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = { -+ &kbase_todx_ipa_model_ops, &kbase_lodx_ipa_model_ops, -+ &kbase_tgrx_ipa_model_ops, &kbase_tvax_ipa_model_ops, -+ &kbase_ttux_ipa_model_ops, &kbase_ltux_ipa_model_ops, -+ &kbase_ttix_ipa_model_ops, &kbase_ltix_ipa_model_ops, -+}; ++ kbase_hwcnt_metadata_destroy(metadata); ++} + -+const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( -+ struct kbase_device *kbdev, const char *name) ++int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, ++ enum kbase_hwcnt_set counter_set, ++ const struct kbase_hwcnt_metadata **out_metadata) +{ -+ int i; ++ int errcode; ++ const struct kbase_hwcnt_metadata *metadata; + -+ for (i = 0; i < ARRAY_SIZE(ipa_counter_model_ops); ++i) { -+ const struct kbase_ipa_model_ops *ops = -+ ipa_counter_model_ops[i]; ++ if (!gpu_info || !out_metadata) ++ return -EINVAL; + -+ if (!strcmp(ops->name, name)) -+ return ops; -+ } ++ errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, true, counter_set, &metadata); ++ if (errcode) ++ return errcode; + -+ dev_err(kbdev->dev, "power model \'%s\' not found\n", name); ++ *out_metadata = metadata; + -+ return NULL; ++ return 0; +} + -+const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id) ++void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) +{ -+ const u32 prod_id = -+ (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++ if (!metadata) ++ return; + -+ switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { -+ case GPU_ID2_PRODUCT_TODX: -+ return "mali-todx-power-model"; -+ case GPU_ID2_PRODUCT_LODX: -+ return "mali-lodx-power-model"; -+ case GPU_ID2_PRODUCT_TGRX: -+ return "mali-tgrx-power-model"; -+ case GPU_ID2_PRODUCT_TVAX: -+ return "mali-tvax-power-model"; -+ case GPU_ID2_PRODUCT_TTUX: -+ return "mali-ttux-power-model"; -+ case GPU_ID2_PRODUCT_LTUX: -+ return "mali-ltux-power-model"; -+ case GPU_ID2_PRODUCT_TTIX: -+ return "mali-ttix-power-model"; -+ case GPU_ID2_PRODUCT_LTIX: -+ return "mali-ltix-power-model"; ++ kbase_hwcnt_metadata_destroy(metadata); ++} ++ ++static bool is_block_type_shader(const u64 grp_type, const u64 blk_type, const size_t blk) ++{ ++ bool is_shader = false; ++ ++ /* Warn on unknown group type */ ++ if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5)) ++ return false; ++ ++ if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC || ++ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 || ++ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3 || ++ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED) ++ is_shader = true; ++ ++ return is_shader; ++} ++ ++static bool is_block_type_l2_cache(const u64 grp_type, const u64 blk_type) ++{ ++ bool is_l2_cache = false; ++ ++ switch (grp_type) { ++ case KBASE_HWCNT_GPU_GROUP_TYPE_V5: ++ if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS || ++ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 || ++ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED) ++ is_l2_cache = true; ++ break; + default: -+ return NULL; ++ /* Warn on unknown group type */ ++ WARN_ON(true); + } ++ ++ return is_l2_cache; +} -diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c ++ ++int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map, u64 pm_core_mask, ++ const struct kbase_hwcnt_curr_config *curr_config, bool accumulate) ++{ ++ const struct kbase_hwcnt_metadata *metadata; ++ size_t grp, blk, blk_inst; ++ const u64 *dump_src = src; ++ size_t src_offset = 0; ++ u64 core_mask = pm_core_mask; ++ ++ /* Variables to deal with the current configuration */ ++ int l2_count = 0; ++ ++ if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata)) ++ return -EINVAL; ++ ++ metadata = dst->metadata; ++ ++ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) ++ { ++ const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); ++ const size_t ctr_cnt = ++ kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); ++ const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); ++ const bool is_shader_core = is_block_type_shader( ++ kbase_hwcnt_metadata_group_type(metadata, grp), blk_type, blk); ++ const bool is_l2_cache = is_block_type_l2_cache( ++ kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); ++ const bool is_undefined = kbase_hwcnt_is_block_type_undefined( ++ kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); ++ bool hw_res_available = true; ++ ++ /* ++ * If l2 blocks is greater than the current allocated number of ++ * L2 slices, there is no hw allocated to that block. ++ */ ++ if (is_l2_cache) { ++ l2_count++; ++ if (l2_count > curr_config->num_l2_slices) ++ hw_res_available = false; ++ else ++ hw_res_available = true; ++ } ++ /* ++ * For the shader cores, the current shader_mask allocated is ++ * always a subgroup of the maximum shader_mask, so after ++ * jumping any L2 cache not available the available shader cores ++ * will always have a matching set of blk instances available to ++ * accumulate them. ++ */ ++ else ++ hw_res_available = true; ++ ++ /* ++ * Skip block if no values in the destination block are enabled. ++ */ ++ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) { ++ u64 *dst_blk = ++ kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); ++ const u64 *src_blk = dump_src + src_offset; ++ bool blk_powered; ++ ++ if (!is_shader_core) { ++ /* Under the current PM system, counters will ++ * only be enabled after all non shader core ++ * blocks are powered up. ++ */ ++ blk_powered = true; ++ } else { ++ /* Check the PM core mask to see if the shader ++ * core is powered up. ++ */ ++ blk_powered = core_mask & 1; ++ } ++ ++ if (blk_powered && !is_undefined && hw_res_available) { ++ /* Only powered and defined blocks have valid data. */ ++ if (accumulate) { ++ kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, ++ hdr_cnt, ctr_cnt); ++ } else { ++ kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, ++ (hdr_cnt + ctr_cnt)); ++ } ++ } else { ++ /* Even though the block might be undefined, the ++ * user has enabled counter collection for it. ++ * We should not propagate garbage data. ++ */ ++ if (accumulate) { ++ /* No-op to preserve existing values */ ++ } else { ++ /* src is garbage, so zero the dst */ ++ kbase_hwcnt_dump_buffer_block_zero(dst_blk, ++ (hdr_cnt + ctr_cnt)); ++ } ++ } ++ } ++ ++ /* Just increase the src_offset if the HW is available */ ++ if (hw_res_available) ++ src_offset += (hdr_cnt + ctr_cnt); ++ if (is_shader_core) ++ core_mask = core_mask >> 1; ++ } ++ ++ return 0; ++} ++ ++int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate) ++{ ++ const struct kbase_hwcnt_metadata *metadata; ++ const u64 *dump_src = src; ++ size_t src_offset = 0; ++ size_t grp, blk, blk_inst; ++ ++ if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata)) ++ return -EINVAL; ++ ++ metadata = dst->metadata; ++ ++ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) ++ { ++ const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); ++ const size_t ctr_cnt = ++ kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); ++ const uint64_t blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); ++ const bool is_undefined = kbase_hwcnt_is_block_type_undefined( ++ kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); ++ ++ /* ++ * Skip block if no values in the destination block are enabled. ++ */ ++ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) { ++ u64 *dst_blk = ++ kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); ++ const u64 *src_blk = dump_src + src_offset; ++ ++ if (!is_undefined) { ++ if (accumulate) { ++ kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, ++ hdr_cnt, ctr_cnt); ++ } else { ++ kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, ++ (hdr_cnt + ctr_cnt)); ++ } ++ } else { ++ /* Even though the block might be undefined, the ++ * user has enabled counter collection for it. ++ * We should not propagate garbage data. ++ */ ++ if (accumulate) { ++ /* No-op to preserve existing values */ ++ } else { ++ /* src is garbage, so zero the dst */ ++ kbase_hwcnt_dump_buffer_block_zero(dst_blk, ++ (hdr_cnt + ctr_cnt)); ++ } ++ } ++ } ++ ++ src_offset += (hdr_cnt + ctr_cnt); ++ } ++ ++ return 0; ++} ++ ++/** ++ * kbasep_hwcnt_backend_gpu_block_map_from_physical() - Convert from a physical ++ * block enable map to a ++ * block enable map ++ * abstraction. ++ * @phys: Physical 32-bit block enable map ++ * @lo: Non-NULL pointer to where low 64 bits of block enable map abstraction ++ * will be stored. ++ * @hi: Non-NULL pointer to where high 64 bits of block enable map abstraction ++ * will be stored. ++ */ ++static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(u32 phys, u64 *lo, u64 *hi) ++{ ++ u64 dwords[2] = { 0, 0 }; ++ ++ size_t dword_idx; ++ ++ for (dword_idx = 0; dword_idx < 2; dword_idx++) { ++ const u16 packed = phys >> (16 * dword_idx); ++ u64 dword = 0; ++ ++ size_t hword_bit; ++ ++ for (hword_bit = 0; hword_bit < 16; hword_bit++) { ++ const size_t dword_bit = hword_bit * 4; ++ const u64 mask = (packed >> (hword_bit)) & 0x1; ++ ++ dword |= mask << (dword_bit + 0); ++ dword |= mask << (dword_bit + 1); ++ dword |= mask << (dword_bit + 2); ++ dword |= mask << (dword_bit + 3); ++ } ++ dwords[dword_idx] = dword; ++ } ++ *lo = dwords[0]; ++ *hi = dwords[1]; ++} ++ ++void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst, ++ const struct kbase_hwcnt_enable_map *src) ++{ ++ const struct kbase_hwcnt_metadata *metadata; ++ u64 fe_bm[EM_COUNT] = { 0 }; ++ u64 shader_bm[EM_COUNT] = { 0 }; ++ u64 tiler_bm[EM_COUNT] = { 0 }; ++ u64 mmu_l2_bm[EM_COUNT] = { 0 }; ++ size_t grp, blk, blk_inst; ++ ++ if (WARN_ON(!src) || WARN_ON(!dst)) ++ return; ++ ++ metadata = src->metadata; ++ ++ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) ++ { ++ const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp); ++ const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); ++ const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(src, grp, blk, blk_inst); ++ ++ if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { ++ const size_t map_stride = ++ kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk); ++ size_t map_idx; ++ ++ for (map_idx = 0; map_idx < map_stride; ++map_idx) { ++ if (WARN_ON(map_idx >= EM_COUNT)) ++ break; ++ ++ switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: ++ /* Nothing to do in this case. */ ++ break; ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: ++ fe_bm[map_idx] |= blk_map[map_idx]; ++ break; ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: ++ tiler_bm[map_idx] |= blk_map[map_idx]; ++ break; ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: ++ shader_bm[map_idx] |= blk_map[map_idx]; ++ break; ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: ++ mmu_l2_bm[map_idx] |= blk_map[map_idx]; ++ break; ++ default: ++ WARN_ON(true); ++ } ++ } ++ } else { ++ WARN_ON(true); ++ } ++ } ++ ++ dst->fe_bm = kbase_hwcnt_backend_gpu_block_map_to_physical(fe_bm[EM_LO], fe_bm[EM_HI]); ++ dst->shader_bm = ++ kbase_hwcnt_backend_gpu_block_map_to_physical(shader_bm[EM_LO], shader_bm[EM_HI]); ++ dst->tiler_bm = ++ kbase_hwcnt_backend_gpu_block_map_to_physical(tiler_bm[EM_LO], tiler_bm[EM_HI]); ++ dst->mmu_l2_bm = ++ kbase_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm[EM_LO], mmu_l2_bm[EM_HI]); ++} ++ ++void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src) ++{ ++ switch (src) { ++ case KBASE_HWCNT_SET_PRIMARY: ++ *dst = KBASE_HWCNT_PHYSICAL_SET_PRIMARY; ++ break; ++ case KBASE_HWCNT_SET_SECONDARY: ++ *dst = KBASE_HWCNT_PHYSICAL_SET_SECONDARY; ++ break; ++ case KBASE_HWCNT_SET_TERTIARY: ++ *dst = KBASE_HWCNT_PHYSICAL_SET_TERTIARY; ++ break; ++ default: ++ WARN_ON(true); ++ } ++} ++ ++void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst, ++ const struct kbase_hwcnt_physical_enable_map *src) ++{ ++ const struct kbase_hwcnt_metadata *metadata; ++ ++ u64 fe_bm[EM_COUNT] = { 0 }; ++ u64 shader_bm[EM_COUNT] = { 0 }; ++ u64 tiler_bm[EM_COUNT] = { 0 }; ++ u64 mmu_l2_bm[EM_COUNT] = { 0 }; ++ size_t grp, blk, blk_inst; ++ ++ if (WARN_ON(!src) || WARN_ON(!dst)) ++ return; ++ ++ metadata = dst->metadata; ++ ++ kbasep_hwcnt_backend_gpu_block_map_from_physical(src->fe_bm, &fe_bm[EM_LO], &fe_bm[EM_HI]); ++ kbasep_hwcnt_backend_gpu_block_map_from_physical(src->shader_bm, &shader_bm[EM_LO], ++ &shader_bm[EM_HI]); ++ kbasep_hwcnt_backend_gpu_block_map_from_physical(src->tiler_bm, &tiler_bm[EM_LO], ++ &tiler_bm[EM_HI]); ++ kbasep_hwcnt_backend_gpu_block_map_from_physical(src->mmu_l2_bm, &mmu_l2_bm[EM_LO], ++ &mmu_l2_bm[EM_HI]); ++ ++ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) ++ { ++ const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp); ++ const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); ++ u64 *blk_map = kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst); ++ ++ if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { ++ const size_t map_stride = ++ kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk); ++ size_t map_idx; ++ ++ for (map_idx = 0; map_idx < map_stride; ++map_idx) { ++ if (WARN_ON(map_idx >= EM_COUNT)) ++ break; ++ ++ switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: ++ /* Nothing to do in this case. */ ++ break; ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: ++ blk_map[map_idx] = fe_bm[map_idx]; ++ break; ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: ++ blk_map[map_idx] = tiler_bm[map_idx]; ++ break; ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: ++ blk_map[map_idx] = shader_bm[map_idx]; ++ break; ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: ++ blk_map[map_idx] = mmu_l2_bm[map_idx]; ++ break; ++ default: ++ WARN_ON(true); ++ } ++ } ++ } else { ++ WARN_ON(true); ++ } ++ } ++} ++ ++void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf, ++ const struct kbase_hwcnt_enable_map *enable_map) ++{ ++ const struct kbase_hwcnt_metadata *metadata; ++ size_t grp, blk, blk_inst; ++ ++ if (WARN_ON(!buf) || WARN_ON(!enable_map) || WARN_ON(buf->metadata != enable_map->metadata)) ++ return; ++ ++ metadata = buf->metadata; ++ ++ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) ++ { ++ const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp); ++ u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst); ++ const u64 *blk_map = ++ kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst); ++ ++ if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { ++ const size_t map_stride = ++ kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk); ++ u64 prfcnt_bm[EM_COUNT] = { 0 }; ++ u32 prfcnt_en = 0; ++ size_t map_idx; ++ ++ for (map_idx = 0; map_idx < map_stride; ++map_idx) { ++ if (WARN_ON(map_idx >= EM_COUNT)) ++ break; ++ ++ prfcnt_bm[map_idx] = blk_map[map_idx]; ++ } ++ ++ prfcnt_en = kbase_hwcnt_backend_gpu_block_map_to_physical(prfcnt_bm[EM_LO], ++ prfcnt_bm[EM_HI]); ++ ++ buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en; ++ } else { ++ WARN_ON(true); ++ } ++ } ++} +diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h new file mode 100644 -index 000000000..2092db042 +index 000000000..a49c31e52 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c -@@ -0,0 +1,561 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h +@@ -0,0 +1,407 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2016-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -195198,556 +198755,402 @@ index 000000000..2092db042 + * + */ + -+#include ++#ifndef _KBASE_HWCNT_GPU_H_ ++#define _KBASE_HWCNT_GPU_H_ + -+#include "mali_kbase_ipa_counter_common_jm.h" -+#include "mali_kbase.h" -+#include ++#include ++#include + -+/* Performance counter blocks base offsets */ -+#define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK) -+#define MEMSYS_BASE (2 * KBASE_IPA_NR_BYTES_PER_BLOCK) ++struct kbase_device; ++struct kbase_hwcnt_metadata; ++struct kbase_hwcnt_enable_map; ++struct kbase_hwcnt_dump_buffer; + -+/* JM counter block offsets */ -+#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 6) ++/* Hardware counter version 5 definitions, V5 is the only supported version. */ ++#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4 ++#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4 ++#define KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK 60 ++#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \ ++ (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK) + -+/* MEMSYS counter block offsets */ -+#define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25) ++/* FrontEnd block count in V5 GPU hardware counter. */ ++#define KBASE_HWCNT_V5_FE_BLOCK_COUNT 1 ++/* Tiler block count in V5 GPU hardware counter. */ ++#define KBASE_HWCNT_V5_TILER_BLOCK_COUNT 1 + -+/* SC counter block offsets */ -+#define SC_EXEC_INSTR_FMA (KBASE_IPA_NR_BYTES_PER_CNT * 27) -+#define SC_EXEC_INSTR_COUNT (KBASE_IPA_NR_BYTES_PER_CNT * 28) -+#define SC_EXEC_INSTR_MSG (KBASE_IPA_NR_BYTES_PER_CNT * 30) -+#define SC_TEX_FILT_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 39) -+#define SC_TEX_COORD_ISSUE (KBASE_IPA_NR_BYTES_PER_CNT * 40) -+#define SC_TEX_TFCH_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 42) -+#define SC_VARY_INSTR (KBASE_IPA_NR_BYTES_PER_CNT * 49) -+#define SC_BEATS_WR_TIB (KBASE_IPA_NR_BYTES_PER_CNT * 62) ++/* Index of the PRFCNT_EN header into a V5 counter block */ ++#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2 ++ ++/* Number of bytes for each counter value in hardware. */ ++#define KBASE_HWCNT_VALUE_HW_BYTES (sizeof(u32)) + +/** -+ * kbase_g7x_power_model_get_jm_counter() - get performance counter offset -+ * inside the Job Manager block -+ * @model_data: pointer to GPU model data. -+ * @counter_block_offset: offset in bytes of the performance counter inside -+ * the Job Manager block. -+ * -+ * Return: Block offset in bytes of the required performance counter. ++ * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to ++ * identify metadata groups. ++ * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type. + */ -+static u32 kbase_g7x_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_data *model_data, -+ u32 counter_block_offset) -+{ -+ return JM_BASE + counter_block_offset; -+} ++enum kbase_hwcnt_gpu_group_type { ++ KBASE_HWCNT_GPU_GROUP_TYPE_V5, ++}; + +/** -+ * kbase_g7x_power_model_get_memsys_counter() - get performance counter offset -+ * inside the Memory System block -+ * @model_data: pointer to GPU model data. -+ * @counter_block_offset: offset in bytes of the performance counter inside -+ * the (first) Memory System block. -+ * -+ * Return: Block offset in bytes of the required performance counter. ++ * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types, ++ * used to identify metadata blocks. ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: Front End block (Job manager ++ * or CSF HW). ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: Secondary Front End block (Job ++ * manager or CSF HW). ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: Tertiary Front End block (Job ++ * manager or CSF HW). ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: Undefined Front End block ++ * (e.g. if a counter set that ++ * a block doesn't support is ++ * used). ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: Tiler block. ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: Undefined Tiler block. ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: Shader Core block. ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: Secondary Shader Core block. ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: Tertiary Shader Core block. ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: Undefined Shader Core block. ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block. ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block. ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: Undefined Memsys block. + */ -+static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinstr_data *model_data, -+ u32 counter_block_offset) -+{ -+ /* The base address of Memory System performance counters is always the same, although their number -+ * may vary based on the number of cores. For the moment it's ok to return a constant. -+ */ -+ return MEMSYS_BASE + counter_block_offset; -+} ++enum kbase_hwcnt_gpu_v5_block_type { ++ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE, ++ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2, ++ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3, ++ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED, ++ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER, ++ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED, ++ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC, ++ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2, ++ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3, ++ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED, ++ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS, ++ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2, ++ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED, ++}; + +/** -+ * kbase_g7x_power_model_get_sc_counter() - get performance counter offset -+ * inside the Shader Cores block -+ * @model_data: pointer to GPU model data. -+ * @counter_block_offset: offset in bytes of the performance counter inside -+ * the (first) Shader Cores block. -+ * -+ * Return: Block offset in bytes of the required performance counter. ++ * enum kbase_hwcnt_set - GPU hardware counter sets ++ * @KBASE_HWCNT_SET_PRIMARY: The Primary set of counters ++ * @KBASE_HWCNT_SET_SECONDARY: The Secondary set of counters ++ * @KBASE_HWCNT_SET_TERTIARY: The Tertiary set of counters ++ * @KBASE_HWCNT_SET_UNDEFINED: Undefined set of counters + */ -+static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data, -+ u32 counter_block_offset) -+{ -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ const u32 sc_base = MEMSYS_BASE + -+ (KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS * -+ KBASE_IPA_NR_BYTES_PER_BLOCK); -+#else -+ const u32 sc_base = MEMSYS_BASE + -+ (model_data->kbdev->gpu_props.props.l2_props.num_l2_slices * -+ KBASE_IPA_NR_BYTES_PER_BLOCK); -+#endif -+ return sc_base + counter_block_offset; -+} ++enum kbase_hwcnt_set { ++ KBASE_HWCNT_SET_PRIMARY, ++ KBASE_HWCNT_SET_SECONDARY, ++ KBASE_HWCNT_SET_TERTIARY, ++ KBASE_HWCNT_SET_UNDEFINED = 255, ++}; + +/** -+ * kbase_g7x_sum_all_memsys_blocks() - calculate energy for a single Memory -+ * System performance counter. -+ * @model_data: pointer to GPU model data. -+ * @coeff: default value of coefficient for IPA group. -+ * @counter_block_offset: offset in bytes of the counter inside the block it -+ * belongs to. -+ * -+ * Return: Energy estimation for a single Memory System performance counter. ++ * struct kbase_hwcnt_physical_enable_map - Representation of enable map ++ * directly used by GPU. ++ * @fe_bm: Front end (JM/CSHW) counters selection bitmask. ++ * @shader_bm: Shader counters selection bitmask. ++ * @tiler_bm: Tiler counters selection bitmask. ++ * @mmu_l2_bm: MMU_L2 counters selection bitmask. + */ -+static s64 kbase_g7x_sum_all_memsys_blocks( -+ struct kbase_ipa_model_vinstr_data *model_data, -+ s32 coeff, -+ u32 counter_block_offset) -+{ -+ u32 counter; ++struct kbase_hwcnt_physical_enable_map { ++ u32 fe_bm; ++ u32 shader_bm; ++ u32 tiler_bm; ++ u32 mmu_l2_bm; ++}; + -+ counter = kbase_g7x_power_model_get_memsys_counter(model_data, -+ counter_block_offset); -+ return kbase_ipa_sum_all_memsys_blocks(model_data, coeff, counter); -+} ++/* ++ * Values for Hardware Counter SET_SELECT value. ++ * Directly passed to HW. ++ */ ++enum kbase_hwcnt_physical_set { ++ KBASE_HWCNT_PHYSICAL_SET_PRIMARY = 0, ++ KBASE_HWCNT_PHYSICAL_SET_SECONDARY = 1, ++ KBASE_HWCNT_PHYSICAL_SET_TERTIARY = 2, ++}; + +/** -+ * kbase_g7x_sum_all_shader_cores() - calculate energy for a Shader Cores -+ * performance counter for all cores. -+ * @model_data: pointer to GPU model data. -+ * @coeff: default value of coefficient for IPA group. -+ * @counter_block_offset: offset in bytes of the counter inside the block it -+ * belongs to. -+ * -+ * Return: Energy estimation for a Shader Cores performance counter for all -+ * cores. ++ * struct kbase_hwcnt_gpu_info - Information about hwcnt blocks on the GPUs. ++ * @l2_count: L2 cache count. ++ * @core_mask: Shader core mask. May be sparse. ++ * @clk_cnt: Number of clock domains available. ++ * @prfcnt_values_per_block: Total entries (header + counters) of performance ++ * counter per block. + */ -+static s64 kbase_g7x_sum_all_shader_cores( -+ struct kbase_ipa_model_vinstr_data *model_data, -+ s32 coeff, -+ u32 counter_block_offset) -+{ -+ u32 counter; -+ -+ counter = kbase_g7x_power_model_get_sc_counter(model_data, -+ counter_block_offset); -+ return kbase_ipa_sum_all_shader_cores(model_data, coeff, counter); -+} ++struct kbase_hwcnt_gpu_info { ++ size_t l2_count; ++ u64 core_mask; ++ u8 clk_cnt; ++ size_t prfcnt_values_per_block; ++}; + +/** -+ * kbase_g7x_jm_single_counter() - calculate energy for a single Job Manager performance counter. -+ * @model_data: pointer to GPU model data. -+ * @coeff: default value of coefficient for IPA group. -+ * @counter_block_offset: offset in bytes of the counter inside the block it belongs to. ++ * struct kbase_hwcnt_curr_config - Current Configuration of HW allocated to the ++ * GPU. ++ * @num_l2_slices: Current number of L2 slices allocated to the GPU. ++ * @shader_present: Current shader present bitmap that is allocated to the GPU. + * -+ * Return: Energy estimation for a single Job Manager performance counter. ++ * For architectures with the max_config interface available from the Arbiter, ++ * the current resources allocated may change during runtime due to a ++ * re-partitioning (possible with partition manager). Thus, the HWC needs to be ++ * prepared to report any possible set of counters. For this reason the memory ++ * layout in the userspace is based on the maximum possible allocation. On the ++ * other hand, each partition has just the view of its currently allocated ++ * resources. Therefore, it is necessary to correctly map the dumped HWC values ++ * from the registers into this maximum memory layout so that it can be exposed ++ * to the userspace side correctly. ++ * ++ * For L2 cache just the number is enough once the allocated ones will be ++ * accumulated on the first L2 slots available in the destination buffer. ++ * ++ * For the correct mapping of the shader cores it is necessary to jump all the ++ * L2 cache slots in the destination buffer that are not allocated. But, it is ++ * not necessary to add any logic to map the shader cores bitmap into the memory ++ * layout because the shader_present allocated will always be a subset of the ++ * maximum shader_present. It is possible because: ++ * 1 - Partitions are made of slices and they are always ordered from the ones ++ * with more shader cores to the ones with less. ++ * 2 - The shader cores in a slice are always contiguous. ++ * 3 - A partition can only have a contiguous set of slices allocated to it. ++ * So, for example, if 4 slices are available in total, 1 with 4 cores, 2 with ++ * 3 cores and 1 with 2 cores. The maximum possible shader_present would be: ++ * 0x0011|0111|0111|1111 -> note the order and that the shader cores are ++ * contiguous in any slice. ++ * Supposing that a partition takes the two slices in the middle, the current ++ * config shader_present for this partition would be: ++ * 0x0111|0111 -> note that this is a subset of the maximum above and the slices ++ * are contiguous. ++ * Therefore, by directly copying any subset of the maximum possible ++ * shader_present the mapping is already achieved. + */ -+static s64 kbase_g7x_jm_single_counter( -+ struct kbase_ipa_model_vinstr_data *model_data, -+ s32 coeff, -+ u32 counter_block_offset) -+{ -+ u32 counter; -+ -+ counter = kbase_g7x_power_model_get_jm_counter(model_data, -+ counter_block_offset); -+ return kbase_ipa_single_counter(model_data, coeff, counter); -+} ++struct kbase_hwcnt_curr_config { ++ size_t num_l2_slices; ++ u64 shader_present; ++}; + +/** -+ * kbase_g7x_get_active_cycles() - return the GPU_ACTIVE counter -+ * @model_data: pointer to GPU model data. ++ * kbase_hwcnt_is_block_type_undefined() - Check if a block type is undefined. + * -+ * Return: the number of cycles the GPU was active during the counter sampling -+ * period. ++ * @grp_type: Hardware counter group type. ++ * @blk_type: Hardware counter block type. ++ * ++ * Return: true if the block type is undefined, else false. + */ -+static u32 kbase_g7x_get_active_cycles( -+ struct kbase_ipa_model_vinstr_data *model_data) ++static inline bool kbase_hwcnt_is_block_type_undefined(const uint64_t grp_type, ++ const uint64_t blk_type) +{ -+ u32 counter = kbase_g7x_power_model_get_jm_counter(model_data, JM_GPU_ACTIVE); ++ /* Warn on unknown group type */ ++ if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5)) ++ return false; + -+ /* Counters are only 32-bit, so we can safely multiply by 1 then cast -+ * the 64-bit result back to a u32. -+ */ -+ return kbase_ipa_single_counter(model_data, 1, counter); ++ return (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED || ++ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED || ++ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED || ++ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED); +} + -+/* Table of IPA group definitions. ++/** ++ * kbase_hwcnt_jm_metadata_create() - Create hardware counter metadata for the ++ * JM GPUs. ++ * @info: Non-NULL pointer to info struct. ++ * @counter_set: The performance counter set used. ++ * @out_metadata: Non-NULL pointer to where created metadata is stored on ++ * success. ++ * @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump ++ * buffer is stored on success. + * -+ * For each IPA group, this table defines a function to access the given performance block counter (or counters, -+ * if the operation needs to be iterated on multiple blocks) and calculate energy estimation. ++ * Return: 0 on success, else error code. + */ ++int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *info, ++ enum kbase_hwcnt_set counter_set, ++ const struct kbase_hwcnt_metadata **out_metadata, ++ size_t *out_dump_bytes); + -+static const struct kbase_ipa_group ipa_groups_def_g71[] = { -+ { -+ .name = "l2_access", -+ .default_value = 526300, -+ .op = kbase_g7x_sum_all_memsys_blocks, -+ .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, -+ }, -+ { -+ .name = "exec_instr_count", -+ .default_value = 301100, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_EXEC_INSTR_COUNT, -+ }, -+ { -+ .name = "tex_issue", -+ .default_value = 197400, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_TEX_COORD_ISSUE, -+ }, -+ { -+ .name = "tile_wb", -+ .default_value = -156400, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_BEATS_WR_TIB, -+ }, -+ { -+ .name = "gpu_active", -+ .default_value = 115800, -+ .op = kbase_g7x_jm_single_counter, -+ .counter_block_offset = JM_GPU_ACTIVE, -+ }, -+}; ++/** ++ * kbase_hwcnt_jm_metadata_destroy() - Destroy JM GPU hardware counter metadata. ++ * ++ * @metadata: Pointer to metadata to destroy. ++ */ ++void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); + -+static const struct kbase_ipa_group ipa_groups_def_g72[] = { -+ { -+ .name = "l2_access", -+ .default_value = 393000, -+ .op = kbase_g7x_sum_all_memsys_blocks, -+ .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, -+ }, -+ { -+ .name = "exec_instr_count", -+ .default_value = 227000, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_EXEC_INSTR_COUNT, -+ }, -+ { -+ .name = "tex_issue", -+ .default_value = 181900, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_TEX_COORD_ISSUE, -+ }, -+ { -+ .name = "tile_wb", -+ .default_value = -120200, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_BEATS_WR_TIB, -+ }, -+ { -+ .name = "gpu_active", -+ .default_value = 133100, -+ .op = kbase_g7x_jm_single_counter, -+ .counter_block_offset = JM_GPU_ACTIVE, -+ }, -+}; ++/** ++ * kbase_hwcnt_csf_metadata_create() - Create hardware counter metadata for the ++ * CSF GPUs. ++ * @info: Non-NULL pointer to info struct. ++ * @counter_set: The performance counter set used. ++ * @out_metadata: Non-NULL pointer to where created metadata is stored on ++ * success. ++ * ++ * Return: 0 on success, else error code. ++ */ ++int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *info, ++ enum kbase_hwcnt_set counter_set, ++ const struct kbase_hwcnt_metadata **out_metadata); + -+static const struct kbase_ipa_group ipa_groups_def_g76[] = { -+ { -+ .name = "gpu_active", -+ .default_value = 122000, -+ .op = kbase_g7x_jm_single_counter, -+ .counter_block_offset = JM_GPU_ACTIVE, -+ }, -+ { -+ .name = "exec_instr_count", -+ .default_value = 488900, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_EXEC_INSTR_COUNT, -+ }, -+ { -+ .name = "vary_instr", -+ .default_value = 212100, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_VARY_INSTR, -+ }, -+ { -+ .name = "tex_tfch_num_operations", -+ .default_value = 288000, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS, -+ }, -+ { -+ .name = "l2_access", -+ .default_value = 378100, -+ .op = kbase_g7x_sum_all_memsys_blocks, -+ .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, -+ }, -+}; ++/** ++ * kbase_hwcnt_csf_metadata_destroy() - Destroy CSF GPU hardware counter ++ * metadata. ++ * @metadata: Pointer to metadata to destroy. ++ */ ++void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); + -+static const struct kbase_ipa_group ipa_groups_def_g52_r1[] = { -+ { -+ .name = "gpu_active", -+ .default_value = 224200, -+ .op = kbase_g7x_jm_single_counter, -+ .counter_block_offset = JM_GPU_ACTIVE, -+ }, -+ { -+ .name = "exec_instr_count", -+ .default_value = 384700, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_EXEC_INSTR_COUNT, -+ }, -+ { -+ .name = "vary_instr", -+ .default_value = 271900, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_VARY_INSTR, -+ }, -+ { -+ .name = "tex_tfch_num_operations", -+ .default_value = 477700, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS, -+ }, -+ { -+ .name = "l2_access", -+ .default_value = 551400, -+ .op = kbase_g7x_sum_all_memsys_blocks, -+ .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, -+ }, -+}; ++/** ++ * kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw ++ * dump buffer in src into the dump buffer ++ * abstraction in dst. ++ * @dst: Non-NULL pointer to destination dump buffer. ++ * @src: Non-NULL pointer to source raw dump buffer, of same length ++ * as dump_buf_bytes in the metadata of destination dump ++ * buffer. ++ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. ++ * @pm_core_mask: PM state synchronized shaders core mask with the dump. ++ * @curr_config: Current allocated hardware resources to correctly map the ++ * source raw dump buffer to the destination dump buffer. ++ * @accumulate: True if counters in source should be accumulated into ++ * destination, rather than copied. ++ * ++ * The dst and dst_enable_map MUST have been created from the same metadata as ++ * returned from the call to kbase_hwcnt_jm_metadata_create as was used to get ++ * the length of src. ++ * ++ * Return: 0 on success, else error code. ++ */ ++int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map, ++ const u64 pm_core_mask, ++ const struct kbase_hwcnt_curr_config *curr_config, bool accumulate); + -+static const struct kbase_ipa_group ipa_groups_def_g51[] = { -+ { -+ .name = "gpu_active", -+ .default_value = 201400, -+ .op = kbase_g7x_jm_single_counter, -+ .counter_block_offset = JM_GPU_ACTIVE, -+ }, -+ { -+ .name = "exec_instr_count", -+ .default_value = 392700, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_EXEC_INSTR_COUNT, -+ }, -+ { -+ .name = "vary_instr", -+ .default_value = 274000, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_VARY_INSTR, -+ }, -+ { -+ .name = "tex_tfch_num_operations", -+ .default_value = 528000, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS, -+ }, -+ { -+ .name = "l2_access", -+ .default_value = 506400, -+ .op = kbase_g7x_sum_all_memsys_blocks, -+ .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, -+ }, -+}; ++/** ++ * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw ++ * dump buffer in src into the dump buffer ++ * abstraction in dst. ++ * @dst: Non-NULL pointer to destination dump buffer. ++ * @src: Non-NULL pointer to source raw dump buffer, of same length ++ * as dump_buf_bytes in the metadata of dst dump buffer. ++ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. ++ * @accumulate: True if counters in src should be accumulated into ++ * destination, rather than copied. ++ * ++ * The dst and dst_enable_map MUST have been created from the same metadata as ++ * returned from the call to kbase_hwcnt_csf_metadata_create as was used to get ++ * the length of src. ++ * ++ * Return: 0 on success, else error code. ++ */ ++int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate); + -+static const struct kbase_ipa_group ipa_groups_def_g77[] = { -+ { -+ .name = "l2_access", -+ .default_value = 710800, -+ .op = kbase_g7x_sum_all_memsys_blocks, -+ .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, -+ }, -+ { -+ .name = "exec_instr_msg", -+ .default_value = 2375300, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_EXEC_INSTR_MSG, -+ }, -+ { -+ .name = "exec_instr_fma", -+ .default_value = 656100, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_EXEC_INSTR_FMA, -+ }, -+ { -+ .name = "tex_filt_num_operations", -+ .default_value = 318800, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_TEX_FILT_NUM_OPERATIONS, -+ }, -+ { -+ .name = "gpu_active", -+ .default_value = 172800, -+ .op = kbase_g7x_jm_single_counter, -+ .counter_block_offset = JM_GPU_ACTIVE, -+ }, -+}; ++/** ++ * kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block ++ * enable map abstraction to ++ * a physical block enable ++ * map. ++ * @lo: Low 64 bits of block enable map abstraction. ++ * @hi: High 64 bits of block enable map abstraction. ++ * ++ * The abstraction uses 128 bits to enable 128 block values, whereas the ++ * physical uses just 32 bits, as bit n enables values [n*4, n*4+3]. ++ * Therefore, this conversion is lossy. ++ * ++ * Return: 32-bit physical block enable map. ++ */ ++static inline u32 kbase_hwcnt_backend_gpu_block_map_to_physical(u64 lo, u64 hi) ++{ ++ u32 phys = 0; ++ u64 dwords[2] = { lo, hi }; ++ size_t dword_idx; + -+static const struct kbase_ipa_group ipa_groups_def_tbex[] = { -+ { -+ .name = "l2_access", -+ .default_value = 599800, -+ .op = kbase_g7x_sum_all_memsys_blocks, -+ .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, -+ }, -+ { -+ .name = "exec_instr_msg", -+ .default_value = 1830200, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_EXEC_INSTR_MSG, -+ }, -+ { -+ .name = "exec_instr_fma", -+ .default_value = 407300, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_EXEC_INSTR_FMA, -+ }, -+ { -+ .name = "tex_filt_num_operations", -+ .default_value = 224500, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_TEX_FILT_NUM_OPERATIONS, -+ }, -+ { -+ .name = "gpu_active", -+ .default_value = 153800, -+ .op = kbase_g7x_jm_single_counter, -+ .counter_block_offset = JM_GPU_ACTIVE, -+ }, -+}; ++ for (dword_idx = 0; dword_idx < 2; dword_idx++) { ++ const u64 dword = dwords[dword_idx]; ++ u16 packed = 0; + -+static const struct kbase_ipa_group ipa_groups_def_tbax[] = { -+ { -+ .name = "l2_access", -+ .default_value = 599800, -+ .op = kbase_g7x_sum_all_memsys_blocks, -+ .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, -+ }, -+ { -+ .name = "exec_instr_msg", -+ .default_value = 1830200, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_EXEC_INSTR_MSG, -+ }, -+ { -+ .name = "exec_instr_fma", -+ .default_value = 407300, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_EXEC_INSTR_FMA, -+ }, -+ { -+ .name = "tex_filt_num_operations", -+ .default_value = 224500, -+ .op = kbase_g7x_sum_all_shader_cores, -+ .counter_block_offset = SC_TEX_FILT_NUM_OPERATIONS, -+ }, -+ { -+ .name = "gpu_active", -+ .default_value = 153800, -+ .op = kbase_g7x_jm_single_counter, -+ .counter_block_offset = JM_GPU_ACTIVE, -+ }, -+}; ++ size_t hword_bit; + -+#define IPA_POWER_MODEL_OPS(gpu, init_token) \ -+ static const struct kbase_ipa_model_ops kbase_##gpu##_ipa_model_ops = { \ -+ .name = "mali-" #gpu "-power-model", \ -+ .init = kbase_##init_token##_power_model_init, \ -+ .term = kbase_ipa_vinstr_common_model_term, \ -+ .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \ -+ .reset_counter_data = kbase_ipa_vinstr_reset_data, \ ++ for (hword_bit = 0; hword_bit < 16; hword_bit++) { ++ const size_t dword_bit = hword_bit * 4; ++ const u16 mask = ((dword >> (dword_bit + 0)) & 0x1) | ++ ((dword >> (dword_bit + 1)) & 0x1) | ++ ((dword >> (dword_bit + 2)) & 0x1) | ++ ((dword >> (dword_bit + 3)) & 0x1); ++ packed |= (mask << hword_bit); ++ } ++ phys |= ((u32)packed) << (16 * dword_idx); + } ++ return phys; ++} + -+#define STANDARD_POWER_MODEL(gpu, reference_voltage) \ -+ static int kbase_ ## gpu ## _power_model_init(\ -+ struct kbase_ipa_model *model) \ -+ { \ -+ BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def_ ## gpu) > \ -+ KBASE_IPA_MAX_GROUP_DEF_NUM); \ -+ return kbase_ipa_vinstr_common_model_init(model, \ -+ ipa_groups_def_ ## gpu, \ -+ ARRAY_SIZE(ipa_groups_def_ ## gpu), \ -+ kbase_g7x_get_active_cycles, \ -+ (reference_voltage)); \ -+ } \ -+ IPA_POWER_MODEL_OPS(gpu, gpu) -+ -+#define ALIAS_POWER_MODEL(gpu, as_gpu) \ -+ IPA_POWER_MODEL_OPS(gpu, as_gpu) -+ -+STANDARD_POWER_MODEL(g71, 800); -+STANDARD_POWER_MODEL(g72, 800); -+STANDARD_POWER_MODEL(g76, 800); -+STANDARD_POWER_MODEL(g52_r1, 1000); -+STANDARD_POWER_MODEL(g51, 1000); -+STANDARD_POWER_MODEL(g77, 1000); -+STANDARD_POWER_MODEL(tbex, 1000); -+STANDARD_POWER_MODEL(tbax, 1000); -+ -+/* g52 is an alias of g76 (TNOX) for IPA */ -+ALIAS_POWER_MODEL(g52, g76); -+/* tnax is an alias of g77 (TTRX) for IPA */ -+ALIAS_POWER_MODEL(tnax, g77); -+ -+static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = { -+ &kbase_g71_ipa_model_ops, -+ &kbase_g72_ipa_model_ops, -+ &kbase_g76_ipa_model_ops, -+ &kbase_g52_ipa_model_ops, -+ &kbase_g52_r1_ipa_model_ops, -+ &kbase_g51_ipa_model_ops, -+ &kbase_g77_ipa_model_ops, -+ &kbase_tnax_ipa_model_ops, -+ &kbase_tbex_ipa_model_ops, -+ &kbase_tbax_ipa_model_ops -+}; -+ -+const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( -+ struct kbase_device *kbdev, const char *name) -+{ -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(ipa_counter_model_ops); ++i) { -+ const struct kbase_ipa_model_ops *ops = -+ ipa_counter_model_ops[i]; -+ -+ if (!strcmp(ops->name, name)) -+ return ops; -+ } ++/** ++ * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction ++ * into a physical enable map. ++ * @dst: Non-NULL pointer to destination physical enable map. ++ * @src: Non-NULL pointer to source enable map abstraction. ++ * ++ * The src must have been created from a metadata returned from a call to ++ * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create. ++ * ++ * This is a lossy conversion, as the enable map abstraction has one bit per ++ * individual counter block value, but the physical enable map uses 1 bit for ++ * every 4 counters, shared over all instances of a block. ++ */ ++void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst, ++ const struct kbase_hwcnt_enable_map *src); + -+ dev_err(kbdev->dev, "power model \'%s\' not found\n", name); ++/** ++ * kbase_hwcnt_gpu_set_to_physical() - Map counter set selection to physical ++ * SET_SELECT value. ++ * ++ * @dst: Non-NULL pointer to destination physical SET_SELECT value. ++ * @src: Non-NULL pointer to source counter set selection. ++ */ ++void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src); + -+ return NULL; -+} ++/** ++ * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to ++ * an enable map abstraction. ++ * @dst: Non-NULL pointer to destination enable map abstraction. ++ * @src: Non-NULL pointer to source physical enable map. ++ * ++ * The dst must have been created from a metadata returned from a call to ++ * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create. ++ * ++ * This is a lossy conversion, as the physical enable map can technically ++ * support counter blocks with 128 counters each, but no hardware actually uses ++ * more than 64, so the enable map abstraction has nowhere to store the enable ++ * information for the 64 non-existent counters. ++ */ ++void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst, ++ const struct kbase_hwcnt_physical_enable_map *src); + -+const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id) -+{ -+ const u32 prod_id = -+ (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++/** ++ * kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter ++ * enable headers in a dump buffer to ++ * reflect the specified enable map. ++ * @buf: Non-NULL pointer to dump buffer to patch. ++ * @enable_map: Non-NULL pointer to enable map. ++ * ++ * The buf and enable_map must have been created from a metadata returned from ++ * a call to kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create. ++ * ++ * This function should be used before handing off a dump buffer over the ++ * kernel-user boundary, to ensure the header is accurate for the enable map ++ * used by the user. ++ */ ++void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf, ++ const struct kbase_hwcnt_enable_map *enable_map); + -+ switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { -+ case GPU_ID2_PRODUCT_TMIX: -+ return "mali-g71-power-model"; -+ case GPU_ID2_PRODUCT_THEX: -+ return "mali-g72-power-model"; -+ case GPU_ID2_PRODUCT_TNOX: -+ return "mali-g76-power-model"; -+ case GPU_ID2_PRODUCT_TSIX: -+ return "mali-g51-power-model"; -+ case GPU_ID2_PRODUCT_TGOX: -+ if ((gpu_id & GPU_ID2_VERSION_MAJOR) == -+ (0 << GPU_ID2_VERSION_MAJOR_SHIFT)) -+ /* g52 aliased to g76 power-model's ops */ -+ return "mali-g52-power-model"; -+ else -+ return "mali-g52_r1-power-model"; -+ case GPU_ID2_PRODUCT_TNAX: -+ return "mali-tnax-power-model"; -+ case GPU_ID2_PRODUCT_TTRX: -+ return "mali-g77-power-model"; -+ case GPU_ID2_PRODUCT_TBEX: -+ return "mali-tbex-power-model"; -+ case GPU_ID2_PRODUCT_TBAX: -+ return "mali-tbax-power-model"; -+ default: -+ return NULL; -+ } -+} -diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c ++#endif /* _KBASE_HWCNT_GPU_H_ */ +diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.c new file mode 100644 -index 000000000..b2e6bc459 +index 000000000..0cf2f94cf --- /dev/null -+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c -@@ -0,0 +1,778 @@ ++++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.c +@@ -0,0 +1,298 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -195765,1083 +199168,629 @@ index 000000000..b2e6bc459 + * + */ + -+#include -+#include -+#include -+#include "mali_kbase.h" -+#include "mali_kbase_ipa.h" -+#include "mali_kbase_ipa_debugfs.h" -+#include "mali_kbase_ipa_simple.h" -+#include "backend/gpu/mali_kbase_pm_internal.h" -+#include "backend/gpu/mali_kbase_devfreq.h" -+#include -+ -+#define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model" -+ -+/* Polling by thermal governor starts when the temperature exceeds the certain -+ * trip point. In order to have meaningful value for the counters, when the -+ * polling starts and first call to kbase_get_real_power() is made, it is -+ * required to reset the counter values every now and then. -+ * It is reasonable to do the reset every second if no polling is being done, -+ * the counter model implementation also assumes max sampling interval of 1 sec. -+ */ -+#define RESET_INTERVAL_MS ((s64)1000) -+ -+int kbase_ipa_model_recalculate(struct kbase_ipa_model *model) -+{ -+ int err = 0; -+ -+ lockdep_assert_held(&model->kbdev->ipa.lock); -+ -+ if (model->ops->recalculate) { -+ err = model->ops->recalculate(model); -+ if (err) { -+ dev_err(model->kbdev->dev, -+ "recalculation of power model %s returned error %d\n", -+ model->ops->name, err); -+ } -+ } -+ -+ return err; -+} -+ -+const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, -+ const char *name) -+{ -+ if (!strcmp(name, kbase_simple_ipa_model_ops.name)) -+ return &kbase_simple_ipa_model_ops; ++#include "hwcnt/mali_kbase_hwcnt_gpu.h" ++#include "hwcnt/mali_kbase_hwcnt_gpu_narrow.h" + -+ return kbase_ipa_counter_model_ops_find(kbdev, name); -+} -+KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find); ++#include ++#include ++#include + -+const char *kbase_ipa_model_name_from_id(u32 gpu_id) ++int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow, ++ const struct kbase_hwcnt_metadata *src_md) +{ -+ const char *model_name = -+ kbase_ipa_counter_model_name_from_id(gpu_id); ++ struct kbase_hwcnt_description desc; ++ struct kbase_hwcnt_group_description group; ++ struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; ++ size_t prfcnt_values_per_block; ++ size_t blk; ++ int err; ++ struct kbase_hwcnt_metadata_narrow *metadata_narrow; + -+ if (!model_name) -+ return KBASE_IPA_FALLBACK_MODEL_NAME; -+ else -+ return model_name; -+} -+KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id); ++ if (!dst_md_narrow || !src_md || !src_md->grp_metadata || ++ !src_md->grp_metadata[0].blk_metadata) ++ return -EINVAL; + -+static struct device_node *get_model_dt_node(struct kbase_ipa_model *model, -+ bool dt_required) -+{ -+ struct device_node *model_dt_node = NULL; -+ char compat_string[64]; ++ /* Only support 1 group count and KBASE_HWCNT_V5_BLOCK_TYPE_COUNT block ++ * count in the metadata. ++ */ ++ if ((kbase_hwcnt_metadata_group_count(src_md) != 1) || ++ (kbase_hwcnt_metadata_block_count(src_md, 0) != KBASE_HWCNT_V5_BLOCK_TYPE_COUNT)) ++ return -EINVAL; + -+ if (unlikely(!scnprintf(compat_string, sizeof(compat_string), "arm,%s", model->ops->name))) -+ return NULL; ++ /* Get the values count in the first block. */ ++ prfcnt_values_per_block = kbase_hwcnt_metadata_block_values_count(src_md, 0, 0); + -+ /* of_find_compatible_node() will call of_node_put() on the root node, -+ * so take a reference on it first. -+ */ -+ of_node_get(model->kbdev->dev->of_node); -+ model_dt_node = of_find_compatible_node(model->kbdev->dev->of_node, -+ NULL, compat_string); -+ if (!model_dt_node && !model->missing_dt_node_warning) { -+ if (dt_required) -+ dev_warn(model->kbdev->dev, -+ "Couldn't find power_model DT node matching \'%s\'\n", -+ compat_string); -+ model->missing_dt_node_warning = true; ++ /* check all blocks should have same values count. */ ++ for (blk = 1; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) { ++ size_t val_cnt = kbase_hwcnt_metadata_block_values_count(src_md, 0, blk); ++ if (val_cnt != prfcnt_values_per_block) ++ return -EINVAL; + } + -+ return model_dt_node; -+} ++ /* Only support 64 and 128 entries per block. */ ++ if ((prfcnt_values_per_block != 64) && (prfcnt_values_per_block != 128)) ++ return -EINVAL; + -+int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, -+ const char *name, s32 *addr, -+ size_t num_elems, bool dt_required) -+{ -+ int err = -EINVAL, i; -+ struct device_node *model_dt_node = get_model_dt_node(model, -+ dt_required); -+ char *origin; ++ metadata_narrow = kmalloc(sizeof(*metadata_narrow), GFP_KERNEL); ++ if (!metadata_narrow) ++ return -ENOMEM; + -+ err = of_property_read_u32_array(model_dt_node, name, (u32 *)addr, num_elems); -+ /* We're done with model_dt_node now, so drop the reference taken in -+ * get_model_dt_node()/of_find_compatible_node(). -+ */ -+ of_node_put(model_dt_node); ++ /* Narrow to 64 entries per block to keep API backward compatibility. */ ++ prfcnt_values_per_block = 64; + -+ if (err && dt_required) { -+ memset(addr, 0, sizeof(s32) * num_elems); -+ dev_warn(model->kbdev->dev, -+ "Error %d, no DT entry: %s.%s = %zu*[0]\n", -+ err, model->ops->name, name, num_elems); -+ origin = "zero"; -+ } else if (err && !dt_required) { -+ origin = "default"; -+ } else /* !err */ { -+ origin = "DT"; ++ for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) { ++ size_t blk_hdr_cnt = kbase_hwcnt_metadata_block_headers_count(src_md, 0, blk); ++ blks[blk] = (struct kbase_hwcnt_block_description){ ++ .type = kbase_hwcnt_metadata_block_type(src_md, 0, blk), ++ .inst_cnt = kbase_hwcnt_metadata_block_instance_count(src_md, 0, blk), ++ .hdr_cnt = blk_hdr_cnt, ++ .ctr_cnt = prfcnt_values_per_block - blk_hdr_cnt, ++ }; + } + -+ /* Create a unique debugfs entry for each element */ -+ for (i = 0; i < num_elems; ++i) { -+ char elem_name[32]; -+ -+ if (num_elems == 1) { -+ if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s", name))) { -+ err = -ENOMEM; -+ goto exit; -+ } -+ } else { -+ if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s.%d", name, i))) { -+ err = -ENOMEM; -+ goto exit; -+ } -+ } ++ group = (struct kbase_hwcnt_group_description){ ++ .type = kbase_hwcnt_metadata_group_type(src_md, 0), ++ .blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT, ++ .blks = blks, ++ }; + -+ dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n", -+ model->ops->name, elem_name, addr[i], origin); ++ desc = (struct kbase_hwcnt_description){ ++ .grp_cnt = kbase_hwcnt_metadata_group_count(src_md), ++ .avail_mask = src_md->avail_mask, ++ .clk_cnt = src_md->clk_cnt, ++ .grps = &group, ++ }; + -+ err = kbase_ipa_model_param_add(model, elem_name, -+ &addr[i], sizeof(s32), -+ PARAM_TYPE_S32); -+ if (err) -+ goto exit; ++ err = kbase_hwcnt_metadata_create(&desc, &metadata_narrow->metadata); ++ if (!err) { ++ /* Narrow down the buffer size to half as the narrowed metadata ++ * only supports 32-bit but the created metadata uses 64-bit for ++ * block entry. ++ */ ++ metadata_narrow->dump_buf_bytes = metadata_narrow->metadata->dump_buf_bytes >> 1; ++ *dst_md_narrow = metadata_narrow; ++ } else { ++ kfree(metadata_narrow); + } -+exit: ++ + return err; +} + -+int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, -+ const char *name, char *addr, -+ size_t size, bool dt_required) ++void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow) +{ -+ int err; -+ struct device_node *model_dt_node = get_model_dt_node(model, -+ dt_required); -+ const char *string_prop_value = ""; -+ char *origin; ++ if (!md_narrow) ++ return; + -+ err = of_property_read_string(model_dt_node, name, -+ &string_prop_value); ++ kbase_hwcnt_metadata_destroy(md_narrow->metadata); ++ kfree(md_narrow); ++} + -+ /* We're done with model_dt_node now, so drop the reference taken in -+ * get_model_dt_node()/of_find_compatible_node(). -+ */ -+ of_node_put(model_dt_node); ++int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow, ++ struct kbase_hwcnt_dump_buffer_narrow *dump_buf) ++{ ++ size_t dump_buf_bytes; ++ size_t clk_cnt_buf_bytes; ++ u8 *buf; + -+ if (err && dt_required) { -+ strncpy(addr, "", size - 1); -+ dev_warn(model->kbdev->dev, -+ "Error %d, no DT entry: %s.%s = \'%s\'\n", -+ err, model->ops->name, name, addr); -+ err = 0; -+ origin = "zero"; -+ } else if (err && !dt_required) { -+ origin = "default"; -+ } else /* !err */ { -+ strncpy(addr, string_prop_value, size - 1); -+ origin = "DT"; -+ } ++ if (!md_narrow || !dump_buf) ++ return -EINVAL; + -+ addr[size - 1] = '\0'; ++ dump_buf_bytes = md_narrow->dump_buf_bytes; ++ clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * md_narrow->metadata->clk_cnt; + -+ dev_dbg(model->kbdev->dev, "%s.%s = \'%s\' (%s)\n", -+ model->ops->name, name, string_prop_value, origin); ++ /* Make a single allocation for both dump_buf and clk_cnt_buf. */ ++ buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL); ++ if (!buf) ++ return -ENOMEM; + -+ err = kbase_ipa_model_param_add(model, name, addr, size, -+ PARAM_TYPE_STRING); -+ return err; ++ *dump_buf = (struct kbase_hwcnt_dump_buffer_narrow){ ++ .md_narrow = md_narrow, ++ .dump_buf = (u32 *)buf, ++ .clk_cnt_buf = (u64 *)(buf + dump_buf_bytes), ++ }; ++ ++ return 0; +} + -+void kbase_ipa_term_model(struct kbase_ipa_model *model) ++void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf_narrow) +{ -+ if (!model) ++ if (!dump_buf_narrow) + return; + -+ lockdep_assert_held(&model->kbdev->ipa.lock); -+ -+ if (model->ops->term) -+ model->ops->term(model); -+ -+ kbase_ipa_model_param_free_all(model); -+ -+ kfree(model); ++ kfree(dump_buf_narrow->dump_buf); ++ *dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ .md_narrow = NULL, ++ .dump_buf = NULL, ++ .clk_cnt_buf = NULL }; +} -+KBASE_EXPORT_TEST_API(kbase_ipa_term_model); + -+struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, -+ const struct kbase_ipa_model_ops *ops) ++int kbase_hwcnt_dump_buffer_narrow_array_alloc( ++ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n, ++ struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs) +{ -+ struct kbase_ipa_model *model; -+ int err; ++ struct kbase_hwcnt_dump_buffer_narrow *buffers; ++ size_t buf_idx; ++ unsigned int order; ++ unsigned long addr; ++ size_t dump_buf_bytes; ++ size_t clk_cnt_buf_bytes; ++ size_t total_dump_buf_size; + -+ lockdep_assert_held(&kbdev->ipa.lock); ++ if (!md_narrow || !dump_bufs) ++ return -EINVAL; + -+ if (!ops || !ops->name) -+ return NULL; ++ dump_buf_bytes = md_narrow->dump_buf_bytes; ++ clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * md_narrow->metadata->clk_cnt; + -+ model = kzalloc(sizeof(struct kbase_ipa_model), GFP_KERNEL); -+ if (!model) -+ return NULL; ++ /* Allocate memory for the dump buffer struct array */ ++ buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); ++ if (!buffers) ++ return -ENOMEM; + -+ model->kbdev = kbdev; -+ model->ops = ops; -+ INIT_LIST_HEAD(&model->params); ++ /* Allocate pages for the actual dump buffers, as they tend to be fairly ++ * large. ++ */ ++ order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n); ++ addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + -+ err = model->ops->init(model); -+ if (err) { -+ dev_err(kbdev->dev, -+ "init of power model \'%s\' returned error %d\n", -+ ops->name, err); -+ kfree(model); -+ return NULL; ++ if (!addr) { ++ kfree(buffers); ++ return -ENOMEM; + } + -+ err = kbase_ipa_model_recalculate(model); -+ if (err) { -+ kbase_ipa_term_model(model); -+ return NULL; ++ *dump_bufs = (struct kbase_hwcnt_dump_buffer_narrow_array){ ++ .page_addr = addr, ++ .page_order = order, ++ .buf_cnt = n, ++ .bufs = buffers, ++ }; ++ ++ total_dump_buf_size = dump_buf_bytes * n; ++ /* Set the buffer of each dump buf */ ++ for (buf_idx = 0; buf_idx < n; buf_idx++) { ++ const size_t dump_buf_offset = dump_buf_bytes * buf_idx; ++ const size_t clk_cnt_buf_offset = ++ total_dump_buf_size + (clk_cnt_buf_bytes * buf_idx); ++ ++ buffers[buf_idx] = (struct kbase_hwcnt_dump_buffer_narrow){ ++ .md_narrow = md_narrow, ++ .dump_buf = (u32 *)(addr + dump_buf_offset), ++ .clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset), ++ }; + } + -+ return model; ++ return 0; +} -+KBASE_EXPORT_TEST_API(kbase_ipa_init_model); + -+static void kbase_ipa_term_locked(struct kbase_device *kbdev) ++void kbase_hwcnt_dump_buffer_narrow_array_free( ++ struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs) +{ -+ lockdep_assert_held(&kbdev->ipa.lock); -+ -+ /* Clean up the models */ -+ if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model) -+ kbase_ipa_term_model(kbdev->ipa.configured_model); -+ kbase_ipa_term_model(kbdev->ipa.fallback_model); ++ if (!dump_bufs) ++ return; + -+ kbdev->ipa.configured_model = NULL; -+ kbdev->ipa.fallback_model = NULL; ++ kfree(dump_bufs->bufs); ++ free_pages(dump_bufs->page_addr, dump_bufs->page_order); ++ memset(dump_bufs, 0, sizeof(*dump_bufs)); +} + -+int kbase_ipa_init(struct kbase_device *kbdev) ++void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk, ++ const u64 *blk_em, size_t val_cnt) +{ ++ size_t val; + -+ const char *model_name; -+ const struct kbase_ipa_model_ops *ops; -+ struct kbase_ipa_model *default_model = NULL; -+ int err; -+ -+ mutex_init(&kbdev->ipa.lock); -+ /* -+ * Lock during init to avoid warnings from lockdep_assert_held (there -+ * shouldn't be any concurrent access yet). -+ */ -+ mutex_lock(&kbdev->ipa.lock); -+ -+ /* The simple IPA model must *always* be present.*/ -+ ops = kbase_ipa_model_ops_find(kbdev, KBASE_IPA_FALLBACK_MODEL_NAME); ++ for (val = 0; val < val_cnt; val++) { ++ bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val); ++ u32 src_val = (src_blk[val] > U32_MAX) ? U32_MAX : (u32)src_blk[val]; + -+ default_model = kbase_ipa_init_model(kbdev, ops); -+ if (!default_model) { -+ err = -EINVAL; -+ goto end; ++ dst_blk[val] = val_enabled ? src_val : 0; + } ++} + -+ kbdev->ipa.fallback_model = default_model; -+ err = of_property_read_string(kbdev->dev->of_node, -+ "ipa-model", -+ &model_name); -+ if (err) { -+ /* Attempt to load a match from GPU-ID */ -+ u32 gpu_id; ++void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow, ++ const struct kbase_hwcnt_dump_buffer *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map) ++{ ++ const struct kbase_hwcnt_metadata_narrow *metadata_narrow; ++ size_t grp; ++ size_t clk; + -+ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; -+ model_name = kbase_ipa_model_name_from_id(gpu_id); -+ dev_dbg(kbdev->dev, -+ "Inferring model from GPU ID 0x%x: \'%s\'\n", -+ gpu_id, model_name); -+ err = 0; -+ } else { -+ dev_dbg(kbdev->dev, -+ "Using ipa-model parameter from DT: \'%s\'\n", -+ model_name); -+ } ++ if (WARN_ON(!dst_narrow) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || ++ WARN_ON(dst_narrow->md_narrow->metadata == src->metadata) || ++ WARN_ON(dst_narrow->md_narrow->metadata->grp_cnt != src->metadata->grp_cnt) || ++ WARN_ON(src->metadata->grp_cnt != 1) || ++ WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt != ++ src->metadata->grp_metadata[0].blk_cnt) || ++ WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt != ++ KBASE_HWCNT_V5_BLOCK_TYPE_COUNT) || ++ WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt > ++ src->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt)) ++ return; + -+ if (strcmp(KBASE_IPA_FALLBACK_MODEL_NAME, model_name) != 0) { -+ ops = kbase_ipa_model_ops_find(kbdev, model_name); -+ kbdev->ipa.configured_model = kbase_ipa_init_model(kbdev, ops); -+ if (!kbdev->ipa.configured_model) { -+ dev_warn(kbdev->dev, -+ "Failed to initialize ipa-model: \'%s\'\n" -+ "Falling back on default model\n", -+ model_name); -+ kbdev->ipa.configured_model = default_model; -+ } -+ } else { -+ kbdev->ipa.configured_model = default_model; -+ } ++ /* Don't use src metadata since src buffer is bigger than dst buffer. */ ++ metadata_narrow = dst_narrow->md_narrow; + -+ kbdev->ipa.last_sample_time = ktime_get_raw(); ++ for (grp = 0; grp < kbase_hwcnt_metadata_narrow_group_count(metadata_narrow); grp++) { ++ size_t blk; ++ size_t blk_cnt = kbase_hwcnt_metadata_narrow_block_count(metadata_narrow, grp); + -+end: -+ if (err) -+ kbase_ipa_term_locked(kbdev); -+ else -+ dev_info(kbdev->dev, -+ "Using configured power model %s, and fallback %s\n", -+ kbdev->ipa.configured_model->ops->name, -+ kbdev->ipa.fallback_model->ops->name); ++ for (blk = 0; blk < blk_cnt; blk++) { ++ size_t blk_inst; ++ size_t blk_inst_cnt = kbase_hwcnt_metadata_narrow_block_instance_count( ++ metadata_narrow, grp, blk); + -+ mutex_unlock(&kbdev->ipa.lock); -+ return err; -+} -+KBASE_EXPORT_TEST_API(kbase_ipa_init); ++ for (blk_inst = 0; blk_inst < blk_inst_cnt; blk_inst++) { ++ /* The narrowed down buffer is only 32-bit. */ ++ u32 *dst_blk = kbase_hwcnt_dump_buffer_narrow_block_instance( ++ dst_narrow, grp, blk, blk_inst); ++ const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance( ++ src, grp, blk, blk_inst); ++ const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( ++ dst_enable_map, grp, blk, blk_inst); ++ size_t val_cnt = kbase_hwcnt_metadata_narrow_block_values_count( ++ metadata_narrow, grp, blk); ++ /* Align upwards to include padding bytes */ ++ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( ++ val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / ++ KBASE_HWCNT_VALUE_BYTES)); + -+void kbase_ipa_term(struct kbase_device *kbdev) -+{ -+ mutex_lock(&kbdev->ipa.lock); -+ kbase_ipa_term_locked(kbdev); -+ mutex_unlock(&kbdev->ipa.lock); ++ kbase_hwcnt_dump_buffer_block_copy_strict_narrow(dst_blk, src_blk, ++ blk_em, val_cnt); ++ } ++ } ++ } + -+ mutex_destroy(&kbdev->ipa.lock); -+} -+KBASE_EXPORT_TEST_API(kbase_ipa_term); ++ for (clk = 0; clk < metadata_narrow->metadata->clk_cnt; clk++) { ++ bool clk_enabled = ++ kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk); + -+/** -+ * kbase_scale_dynamic_power() - Scale a dynamic power coefficient to an OPP -+ * @c: Dynamic model coefficient, in pW/(Hz V^2). Should be in range -+ * 0 < c < 2^26 to prevent overflow. -+ * @freq: Frequency, in Hz. Range: 2^23 < freq < 2^30 (~8MHz to ~1GHz) -+ * @voltage: Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V) ++ dst_narrow->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0; ++ } ++} +diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.h +new file mode 100644 +index 000000000..afd236d71 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.h +@@ -0,0 +1,330 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * Keep a record of the approximate range of each value at every stage of the -+ * calculation, to ensure we don't overflow. This makes heavy use of the -+ * approximations 1000 = 2^10 and 1000000 = 2^20, but does the actual -+ * calculations in decimal for increased accuracy. ++ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W) + */ -+static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq, -+ const u32 voltage) -+{ -+ /* Range: 2^8 < v2 < 2^16 m(V^2) */ -+ const u32 v2 = (voltage * voltage) / 1000; + -+ /* Range: 2^3 < f_MHz < 2^10 MHz */ -+ const u32 f_MHz = freq / 1000000; ++#ifndef _KBASE_HWCNT_GPU_NARROW_H_ ++#define _KBASE_HWCNT_GPU_NARROW_H_ + -+ /* Range: 2^11 < v2f_big < 2^26 kHz V^2 */ -+ const u32 v2f_big = v2 * f_MHz; ++#include "hwcnt/mali_kbase_hwcnt_types.h" ++#include + -+ /* Range: 2^1 < v2f < 2^16 MHz V^2 */ -+ const u32 v2f = v2f_big / 1000; ++struct kbase_device; ++struct kbase_hwcnt_metadata; ++struct kbase_hwcnt_enable_map; ++struct kbase_hwcnt_dump_buffer; + -+ /* Range (working backwards from next line): 0 < v2fc < 2^23 uW. -+ * Must be < 2^42 to avoid overflowing the return value. -+ */ -+ const u64 v2fc = (u64) c * (u64) v2f; ++/** ++ * struct kbase_hwcnt_metadata_narrow - Narrow metadata describing the physical ++ * layout of narrow dump buffers. ++ * For backward compatibility, the narrow ++ * metadata only supports 64 counters per ++ * block and 32-bit per block entry. ++ * @metadata: Non-NULL pointer to the metadata before narrow down to ++ * 32-bit per block entry, it has 64 counters per block and ++ * 64-bit per value. ++ * @dump_buf_bytes: The size in bytes after narrow 64-bit to 32-bit per block ++ * entry. ++ */ ++struct kbase_hwcnt_metadata_narrow { ++ const struct kbase_hwcnt_metadata *metadata; ++ size_t dump_buf_bytes; ++}; + -+ /* Range: 0 < v2fc / 1000 < 2^13 mW */ -+ return div_u64(v2fc, 1000); -+} ++/** ++ * struct kbase_hwcnt_dump_buffer_narrow - Hardware counter narrow dump buffer. ++ * @md_narrow: Non-NULL pointer to narrow metadata used to identify, and to ++ * describe the layout of the narrow dump buffer. ++ * @dump_buf: Non-NULL pointer to an array of u32 values, the array size ++ * is md_narrow->dump_buf_bytes. ++ * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed ++ * for each clock domain. ++ */ ++struct kbase_hwcnt_dump_buffer_narrow { ++ const struct kbase_hwcnt_metadata_narrow *md_narrow; ++ u32 *dump_buf; ++ u64 *clk_cnt_buf; ++}; + +/** -+ * kbase_scale_static_power() - Scale a static power coefficient to an OPP -+ * @c: Static model coefficient, in uW/V^3. Should be in range -+ * 0 < c < 2^32 to prevent overflow. -+ * @voltage: Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V) ++ * struct kbase_hwcnt_dump_buffer_narrow_array - Hardware counter narrow dump ++ * buffer array. ++ * @page_addr: Address of first allocated page. A single allocation is used for ++ * all narrow dump buffers in the array. ++ * @page_order: The allocation order of the pages, the order is on a logarithmic ++ * scale. ++ * @buf_cnt: The number of allocated dump buffers. ++ * @bufs: Non-NULL pointer to the array of narrow dump buffer descriptors. ++ */ ++struct kbase_hwcnt_dump_buffer_narrow_array { ++ unsigned long page_addr; ++ unsigned int page_order; ++ size_t buf_cnt; ++ struct kbase_hwcnt_dump_buffer_narrow *bufs; ++}; ++ ++/** ++ * kbase_hwcnt_metadata_narrow_group_count() - Get the number of groups from ++ * narrow metadata. ++ * @md_narrow: Non-NULL pointer to narrow metadata. + * -+ * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W) ++ * Return: Number of hardware counter groups described by narrow metadata. + */ -+static u32 kbase_scale_static_power(const u32 c, const u32 voltage) ++static inline size_t ++kbase_hwcnt_metadata_narrow_group_count(const struct kbase_hwcnt_metadata_narrow *md_narrow) +{ -+ /* Range: 2^8 < v2 < 2^16 m(V^2) */ -+ const u32 v2 = (voltage * voltage) / 1000; -+ -+ /* Range: 2^17 < v3_big < 2^29 m(V^2) mV */ -+ const u32 v3_big = v2 * voltage; -+ -+ /* Range: 2^7 < v3 < 2^19 m(V^3) */ -+ const u32 v3 = v3_big / 1000; -+ -+ /* -+ * Range (working backwards from next line): 0 < v3c_big < 2^33 nW. -+ * The result should be < 2^52 to avoid overflowing the return value. -+ */ -+ const u64 v3c_big = (u64) c * (u64) v3; -+ -+ /* Range: 0 < v3c_big / 1000000 < 2^13 mW */ -+ return div_u64(v3c_big, 1000000); ++ return kbase_hwcnt_metadata_group_count(md_narrow->metadata); +} + -+void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev) ++/** ++ * kbase_hwcnt_metadata_narrow_group_type() - Get the arbitrary type of a group ++ * from narrow metadata. ++ * @md_narrow: Non-NULL pointer to narrow metadata. ++ * @grp: Index of the group in the narrow metadata. ++ * ++ * Return: Type of the group grp. ++ */ ++static inline u64 ++kbase_hwcnt_metadata_narrow_group_type(const struct kbase_hwcnt_metadata_narrow *md_narrow, ++ size_t grp) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ /* Record the event of GPU entering protected mode. */ -+ kbdev->ipa_protection_mode_switched = true; ++ return kbase_hwcnt_metadata_group_type(md_narrow->metadata, grp); +} + -+static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev) ++/** ++ * kbase_hwcnt_metadata_narrow_block_count() - Get the number of blocks in a ++ * group from narrow metadata. ++ * @md_narrow: Non-NULL pointer to narrow metadata. ++ * @grp: Index of the group in the narrow metadata. ++ * ++ * Return: Number of blocks in group grp. ++ */ ++static inline size_t ++kbase_hwcnt_metadata_narrow_block_count(const struct kbase_hwcnt_metadata_narrow *md_narrow, ++ size_t grp) +{ -+ struct kbase_ipa_model *model; -+ unsigned long flags; -+ -+ lockdep_assert_held(&kbdev->ipa.lock); -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ -+ if (kbdev->ipa_protection_mode_switched || -+ kbdev->ipa.force_fallback_model) -+ model = kbdev->ipa.fallback_model; -+ else -+ model = kbdev->ipa.configured_model; -+ -+ /* -+ * Having taken cognizance of the fact that whether GPU earlier -+ * protected mode or not, the event can be now reset (if GPU is not -+ * currently in protected mode) so that configured model is used -+ * for the next sample. -+ */ -+ if (!kbdev->protected_mode) -+ kbdev->ipa_protection_mode_switched = false; -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ return model; ++ return kbase_hwcnt_metadata_block_count(md_narrow->metadata, grp); +} + -+static u32 get_static_power_locked(struct kbase_device *kbdev, -+ struct kbase_ipa_model *model, -+ unsigned long voltage) ++/** ++ * kbase_hwcnt_metadata_narrow_block_instance_count() - Get the number of ++ * instances of a block ++ * from narrow metadata. ++ * @md_narrow: Non-NULL pointer to narrow metadata. ++ * @grp: Index of the group in the narrow metadata. ++ * @blk: Index of the block in the group. ++ * ++ * Return: Number of instances of block blk in group grp. ++ */ ++static inline size_t kbase_hwcnt_metadata_narrow_block_instance_count( ++ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk) +{ -+ u32 power = 0; -+ int err; -+ u32 power_coeff; -+ -+ lockdep_assert_held(&model->kbdev->ipa.lock); -+ -+ if (!model->ops->get_static_coeff) -+ model = kbdev->ipa.fallback_model; -+ -+ if (model->ops->get_static_coeff) { -+ err = model->ops->get_static_coeff(model, &power_coeff); -+ if (!err) -+ power = kbase_scale_static_power(power_coeff, -+ (u32) voltage); -+ } -+ -+ return power; ++ return kbase_hwcnt_metadata_block_instance_count(md_narrow->metadata, grp, blk); +} + -+#if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE -+#if defined(CONFIG_MALI_PWRSOFT_765) || \ -+ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE -+static unsigned long kbase_get_static_power(struct devfreq *df, -+ unsigned long voltage) -+#else -+static unsigned long kbase_get_static_power(unsigned long voltage) -+#endif ++/** ++ * kbase_hwcnt_metadata_narrow_block_headers_count() - Get the number of counter ++ * headers from narrow ++ * metadata. ++ * @md_narrow: Non-NULL pointer to narrow metadata. ++ * @grp: Index of the group in the narrow metadata. ++ * @blk: Index of the block in the group. ++ * ++ * Return: Number of counter headers in each instance of block blk in group grp. ++ */ ++static inline size_t ++kbase_hwcnt_metadata_narrow_block_headers_count(const struct kbase_hwcnt_metadata_narrow *md_narrow, ++ size_t grp, size_t blk) +{ -+ struct kbase_ipa_model *model; -+ u32 power = 0; -+#if defined(CONFIG_MALI_PWRSOFT_765) || \ -+ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE -+ struct kbase_device *kbdev = dev_get_drvdata(&df->dev); -+#else -+ struct kbase_device *kbdev = kbase_find_device(-1); -+#endif -+ -+ if (!kbdev) -+ return 0ul; -+ -+ mutex_lock(&kbdev->ipa.lock); -+ -+ model = get_current_model(kbdev); -+ power = get_static_power_locked(kbdev, model, voltage); -+ -+ mutex_unlock(&kbdev->ipa.lock); -+ -+#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ -+ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) -+ kbase_release_device(kbdev); -+#endif -+ -+ return power; ++ return kbase_hwcnt_metadata_block_headers_count(md_narrow->metadata, grp, blk); +} -+#endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */ + +/** -+ * opp_translate_freq_voltage() - Translate nominal OPP frequency from -+ * devicetree into the real frequency for -+ * top-level and shader cores. -+ * @kbdev: Device pointer -+ * @nominal_freq: Nominal frequency in Hz. -+ * @nominal_voltage: Nominal voltage, in mV. -+ * @freqs: Pointer to array of real frequency values. -+ * @volts: Pointer to array of voltages. ++ * kbase_hwcnt_metadata_narrow_block_counters_count() - Get the number of ++ * counters from narrow ++ * metadata. ++ * @md_narrow: Non-NULL pointer to narrow metadata. ++ * @grp: Index of the group in the narrow metadata. ++ * @blk: Index of the block in the group. + * -+ * If there are 2 clock domains, then top-level and shader cores can operate -+ * at different frequency and voltage level. The nominal frequency ("opp-hz") -+ * used by devfreq from the devicetree may not be same as the real frequency -+ * at which top-level and shader cores are operating, so a translation is -+ * needed. -+ * Nominal voltage shall always be same as the real voltage for top-level. ++ * Return: Number of counters in each instance of block blk in group grp. + */ -+static void opp_translate_freq_voltage(struct kbase_device *kbdev, -+ unsigned long nominal_freq, -+ unsigned long nominal_voltage, -+ unsigned long *freqs, -+ unsigned long *volts) -+{ -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ /* An arbitrary voltage and frequency value can be chosen for testing -+ * in no mali configuration which may not match with any OPP level. -+ */ -+ freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = nominal_freq; -+ volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = nominal_voltage; -+ -+ freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = nominal_freq; -+ volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = nominal_voltage; -+#else -+ u64 core_mask; -+ unsigned int i; -+ -+ kbase_devfreq_opp_translate(kbdev, nominal_freq, &core_mask, -+ freqs, volts); -+ CSTD_UNUSED(core_mask); -+ -+ /* Convert micro volts to milli volts */ -+ for (i = 0; i < kbdev->nr_clocks; i++) -+ volts[i] /= 1000; -+ -+ if (kbdev->nr_clocks == 1) { -+ freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = -+ freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]; -+ volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = -+ volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]; -+ } -+#endif -+} -+ -+#if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE -+#if defined(CONFIG_MALI_PWRSOFT_765) || \ -+ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE -+static unsigned long kbase_get_dynamic_power(struct devfreq *df, -+ unsigned long freq, -+ unsigned long voltage) -+#else -+static unsigned long kbase_get_dynamic_power(unsigned long freq, -+ unsigned long voltage) -+#endif -+{ -+ struct kbase_ipa_model *model; -+ unsigned long freqs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; -+ unsigned long volts[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; -+ u32 power_coeffs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; -+ u32 power = 0; -+ int err = 0; -+#if defined(CONFIG_MALI_PWRSOFT_765) || \ -+ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE -+ struct kbase_device *kbdev = dev_get_drvdata(&df->dev); -+#else -+ struct kbase_device *kbdev = kbase_find_device(-1); -+#endif -+ -+ if (!kbdev) -+ return 0ul; -+ -+ mutex_lock(&kbdev->ipa.lock); -+ -+ model = kbdev->ipa.fallback_model; -+ -+ err = model->ops->get_dynamic_coeff(model, power_coeffs); -+ -+ if (!err) { -+ opp_translate_freq_voltage(kbdev, freq, voltage, freqs, volts); -+ -+ power = kbase_scale_dynamic_power( -+ power_coeffs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], -+ freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], -+ volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); -+ -+ /* Here unlike kbase_get_real_power(), shader core frequency is -+ * used for the scaling as simple power model is used to obtain -+ * the value of dynamic coefficient (which is a fixed value -+ * retrieved from the device tree). -+ */ -+ power += kbase_scale_dynamic_power( -+ power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES], -+ freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES], -+ volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]); -+ } else -+ dev_err_ratelimited(kbdev->dev, -+ "Model %s returned error code %d\n", -+ model->ops->name, err); -+ -+ mutex_unlock(&kbdev->ipa.lock); -+ -+#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ -+ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) -+ kbase_release_device(kbdev); -+#endif -+ -+ return power; -+} -+#endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */ -+ -+int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, -+ unsigned long freq, -+ unsigned long voltage) -+{ -+ struct kbase_ipa_model *model; -+ unsigned long freqs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; -+ unsigned long volts[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; -+ u32 power_coeffs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; -+ struct kbasep_pm_metrics diff; -+ u64 total_time; -+ bool skip_utilization_scaling = false; -+ int err = 0; -+ -+ lockdep_assert_held(&kbdev->ipa.lock); -+ -+ kbase_pm_get_dvfs_metrics(kbdev, &kbdev->ipa.last_metrics, &diff); -+ -+ model = get_current_model(kbdev); -+ -+ err = model->ops->get_dynamic_coeff(model, power_coeffs); -+ -+ /* If the counter model returns an error (e.g. switching back to -+ * protected mode and failing to read counters, or a counter sample -+ * with too few cycles), revert to the fallback model. -+ */ -+ if (err && model != kbdev->ipa.fallback_model) { -+ /* No meaningful scaling for GPU utilization can be done if -+ * the sampling interval was too long. This is equivalent to -+ * assuming GPU was busy throughout (similar to what is done -+ * during protected mode). -+ */ -+ if (err == -EOVERFLOW) -+ skip_utilization_scaling = true; -+ -+ model = kbdev->ipa.fallback_model; -+ err = model->ops->get_dynamic_coeff(model, power_coeffs); -+ } -+ -+ if (WARN_ON(err)) -+ return err; -+ -+ opp_translate_freq_voltage(kbdev, freq, voltage, freqs, volts); -+ -+ *power = kbase_scale_dynamic_power( -+ power_coeffs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], -+ freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], -+ volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); -+ -+ if (power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]) { -+ unsigned long freq = freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]; -+ -+ /* As per the HW team, the top-level frequency needs to be used -+ * for the scaling if the counter based model was used as -+ * counter values are normalized with the GPU_ACTIVE counter -+ * value, which increments at the rate of top-level frequency. -+ */ -+ if (model != kbdev->ipa.fallback_model) -+ freq = freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]; -+ -+ *power += kbase_scale_dynamic_power( -+ power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES], -+ freq, volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]); -+ } -+ -+ if (!skip_utilization_scaling) { -+ /* time_busy / total_time cannot be >1, so assigning the 64-bit -+ * result of div_u64 to *power cannot overflow. -+ */ -+ total_time = diff.time_busy + (u64) diff.time_idle; -+ *power = div_u64(*power * (u64) diff.time_busy, -+ max(total_time, 1ull)); -+ } -+ -+ *power += get_static_power_locked(kbdev, model, -+ volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); -+ -+ return err; -+} -+KBASE_EXPORT_TEST_API(kbase_get_real_power_locked); -+ -+int kbase_get_real_power(struct devfreq *df, u32 *power, -+ unsigned long freq, -+ unsigned long voltage) -+{ -+ int ret; -+ struct kbase_device *kbdev = dev_get_drvdata(&df->dev); -+ -+ if (!kbdev) -+ return -ENODEV; -+ -+ mutex_lock(&kbdev->ipa.lock); -+ ret = kbase_get_real_power_locked(kbdev, power, freq, voltage); -+ mutex_unlock(&kbdev->ipa.lock); -+ -+ return ret; -+} -+KBASE_EXPORT_TEST_API(kbase_get_real_power); -+ -+struct devfreq_cooling_power kbase_ipa_power_model_ops = { -+#if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE -+ .get_static_power = &kbase_get_static_power, -+ .get_dynamic_power = &kbase_get_dynamic_power, -+#endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */ -+#if defined(CONFIG_MALI_PWRSOFT_765) || \ -+ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE -+ .get_real_power = &kbase_get_real_power, -+#endif -+}; -+KBASE_EXPORT_TEST_API(kbase_ipa_power_model_ops); -+ -+void kbase_ipa_reset_data(struct kbase_device *kbdev) ++static inline size_t kbase_hwcnt_metadata_narrow_block_counters_count( ++ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk) +{ -+ ktime_t now, diff; -+ s64 elapsed_time; -+ -+ mutex_lock(&kbdev->ipa.lock); -+ -+ now = ktime_get_raw(); -+ diff = ktime_sub(now, kbdev->ipa.last_sample_time); -+ elapsed_time = ktime_to_ms(diff); -+ -+ if (elapsed_time > RESET_INTERVAL_MS) { -+ struct kbasep_pm_metrics diff; -+ struct kbase_ipa_model *model; -+ -+ kbase_pm_get_dvfs_metrics( -+ kbdev, &kbdev->ipa.last_metrics, &diff); -+ -+ model = get_current_model(kbdev); -+ if (model != kbdev->ipa.fallback_model) -+ model->ops->reset_counter_data(model); -+ -+ kbdev->ipa.last_sample_time = ktime_get_raw(); -+ } -+ -+ mutex_unlock(&kbdev->ipa.lock); ++ return kbase_hwcnt_metadata_block_counters_count(md_narrow->metadata, grp, blk); +} -diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h -new file mode 100644 -index 000000000..c875ffb49 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h -@@ -0,0 +1,304 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+#ifndef _KBASE_IPA_H_ -+#define _KBASE_IPA_H_ -+ -+#if defined(CONFIG_MALI_BIFROST_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) -+ -+struct devfreq; -+ -+/** -+ * enum kbase_ipa_block_type - Type of block for which power estimation is done. -+ * -+ * @KBASE_IPA_BLOCK_TYPE_USING_CLK_MALI: -+ * Blocks using clk_mali in dts. -+ * @KBASE_IPA_BLOCK_TYPE_TOP_LEVEL: Top-level block, that covers CSHW, -+ * MEMSYS, Tiler. -+ * @KBASE_IPA_BLOCK_TYPE_SHADER_CORES: All Shader cores. -+ * @KBASE_IPA_BLOCK_TYPE_FOR_CLK_GPU: Dummy for clk_gpu in dts. -+ * @KBASE_IPA_BLOCK_TYPE_NUM: Number of blocks. -+ */ -+enum kbase_ipa_block_type { -+ KBASE_IPA_BLOCK_TYPE_USING_CLK_MALI, -+ KBASE_IPA_BLOCK_TYPE_TOP_LEVEL, -+ KBASE_IPA_BLOCK_TYPE_SHADER_CORES, -+ KBASE_IPA_BLOCK_TYPE_FOR_CLK_GPU, -+ KBASE_IPA_BLOCK_TYPE_NUM -+}; -+ -+/** -+ * struct kbase_ipa_model - Object describing a particular IPA model. -+ * @kbdev: pointer to kbase device -+ * @model_data: opaque pointer to model specific data, accessed -+ * only by model specific methods. -+ * @ops: pointer to object containing model specific methods. -+ * @params: head of the list of debugfs params added for model -+ * @missing_dt_node_warning: flag to limit the matching power model DT not found -+ * warning to once. -+ */ -+struct kbase_ipa_model { -+ struct kbase_device *kbdev; -+ void *model_data; -+ const struct kbase_ipa_model_ops *ops; -+ struct list_head params; -+ bool missing_dt_node_warning; -+}; + +/** -+ * kbase_ipa_model_add_param_s32 - Add an integer model parameter -+ * @model: pointer to IPA model -+ * @name: name of corresponding debugfs entry -+ * @addr: address where the value is stored -+ * @num_elems: number of elements (1 if not an array) -+ * @dt_required: if false, a corresponding devicetree entry is not required, -+ * and the current value will be used. If true, a warning is -+ * output and the data is zeroed ++ * kbase_hwcnt_metadata_narrow_block_values_count() - Get the number of values ++ * from narrow metadata. ++ * @md_narrow: Non-NULL pointer to narrow metadata. ++ * @grp: Index of the group in the narrow metadata. ++ * @blk: Index of the block in the group. + * -+ * Return: 0 on success, or an error code ++ * Return: Number of headers plus counters in each instance of block blk ++ * in group grp. + */ -+int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, -+ const char *name, s32 *addr, -+ size_t num_elems, bool dt_required); ++static inline size_t ++kbase_hwcnt_metadata_narrow_block_values_count(const struct kbase_hwcnt_metadata_narrow *md_narrow, ++ size_t grp, size_t blk) ++{ ++ return kbase_hwcnt_metadata_narrow_block_counters_count(md_narrow, grp, blk) + ++ kbase_hwcnt_metadata_narrow_block_headers_count(md_narrow, grp, blk); ++} + +/** -+ * kbase_ipa_model_add_param_string - Add a string model parameter -+ * @model: pointer to IPA model -+ * @name: name of corresponding debugfs entry -+ * @addr: address where the value is stored -+ * @size: size, in bytes, of the value storage (so the maximum string -+ * length is size - 1) -+ * @dt_required: if false, a corresponding devicetree entry is not required, -+ * and the current value will be used. If true, a warning is -+ * output and the data is zeroed ++ * kbase_hwcnt_dump_buffer_narrow_block_instance() - Get the pointer to a ++ * narrowed block instance's ++ * dump buffer. ++ * @buf: Non-NULL pointer to narrow dump buffer. ++ * @grp: Index of the group in the narrow metadata. ++ * @blk: Index of the block in the group. ++ * @blk_inst: Index of the block instance in the block. + * -+ * Return: 0 on success, or an error code ++ * Return: u32* to the dump buffer for the block instance. + */ -+int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, -+ const char *name, char *addr, -+ size_t size, bool dt_required); -+ -+struct kbase_ipa_model_ops { -+ char *name; -+ /* The init, recalculate and term ops on the default model are always -+ * called. However, all the other models are only invoked if the model -+ * is selected in the device tree. Otherwise they are never -+ * initialized. Additional resources can be acquired by models in -+ * init(), however they must be terminated in the term(). -+ */ -+ int (*init)(struct kbase_ipa_model *model); -+ /* Called immediately after init(), or when a parameter is changed, so -+ * that any coefficients derived from model parameters can be -+ * recalculated -+ */ -+ int (*recalculate)(struct kbase_ipa_model *model); -+ void (*term)(struct kbase_ipa_model *model); -+ /* -+ * get_dynamic_coeff() - calculate dynamic power coefficient -+ * @model: pointer to model -+ * @coeffp: pointer to return value location -+ * -+ * Calculate a dynamic power coefficient, with units pW/(Hz V^2), which -+ * is then scaled by the IPA framework according to the current OPP's -+ * frequency and voltage. -+ * -+ * Return: 0 on success, or an error code. -EOVERFLOW error code will -+ * indicate that sampling interval was too large and no meaningful -+ * scaling for GPU utiliation can be done. -+ */ -+ int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp); -+ /* -+ * get_static_coeff() - calculate static power coefficient -+ * @model: pointer to model -+ * @coeffp: pointer to return value location -+ * -+ * Calculate a static power coefficient, with units uW/(V^3), which is -+ * scaled by the IPA framework according to the current OPP's voltage. -+ * -+ * Return: 0 on success, or an error code. -+ */ -+ int (*get_static_coeff)(struct kbase_ipa_model *model, u32 *coeffp); -+ -+ /* -+ * reset_counter_data() - Reset the HW counter data used for calculating -+ * dynamic power coefficient -+ * @model: pointer to model -+ * -+ * This method is currently applicable only to the counter based model. -+ * The next call to get_dynamic_coeff() will have to calculate the -+ * dynamic power coefficient based on the HW counter data generated -+ * from this point onwards. -+ */ -+ void (*reset_counter_data)(struct kbase_ipa_model *model); -+}; ++static inline u32 * ++kbase_hwcnt_dump_buffer_narrow_block_instance(const struct kbase_hwcnt_dump_buffer_narrow *buf, ++ size_t grp, size_t blk, size_t blk_inst) ++{ ++ return buf->dump_buf + buf->md_narrow->metadata->grp_metadata[grp].dump_buf_index + ++ buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index + ++ (buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride * ++ blk_inst); ++} + +/** -+ * kbase_ipa_init - Initialize the IPA feature -+ * @kbdev: pointer to kbase device ++ * kbase_hwcnt_gpu_metadata_narrow_create() - Create HWC metadata with HWC ++ * entries per block truncated to ++ * 64 entries and block entry size ++ * narrowed down to 32-bit. + * -+ * simple IPA power model is initialized as a fallback model and if that -+ * initialization fails then IPA is not used. -+ * The device tree is read for the name of ipa model to be used, by using the -+ * property string "ipa-model". If that ipa model is supported then it is -+ * initialized but if the initialization fails then simple power model is used. ++ * @dst_md_narrow: Non-NULL pointer to where created narrow metadata is stored ++ * on success. ++ * @src_md: Non-NULL pointer to the HWC metadata used as the source to ++ * create dst_md_narrow. + * -+ * Return: 0 on success, negative -errno on error -+ */ -+int kbase_ipa_init(struct kbase_device *kbdev); -+ -+/** -+ * kbase_ipa_term - Terminate the IPA feature -+ * @kbdev: pointer to kbase device ++ * For backward compatibility of the interface to user clients, a new metadata ++ * with entries per block truncated to 64 and block entry size narrowed down ++ * to 32-bit will be created for dst_md_narrow. ++ * The total entries per block in src_md must be 64 or 128, if it's other ++ * values, function returns error since it's not supported. + * -+ * Both simple IPA power model and model retrieved from device tree are -+ * terminated. ++ * Return: 0 on success, else error code. + */ -+void kbase_ipa_term(struct kbase_device *kbdev); ++int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow, ++ const struct kbase_hwcnt_metadata *src_md); + +/** -+ * kbase_ipa_model_recalculate - Recalculate the model coefficients -+ * @model: pointer to the IPA model object, already initialized -+ * -+ * It shall be called immediately after the model has been initialized -+ * or when the model parameter has changed, so that any coefficients -+ * derived from parameters can be recalculated. -+ * Its a wrapper for the module specific recalculate() method. -+ * -+ * Return: 0 on success, negative -errno on error ++ * kbase_hwcnt_gpu_metadata_narrow_destroy() - Destroy a hardware counter narrow ++ * metadata object. ++ * @md_narrow: Pointer to hardware counter narrow metadata. + */ -+int kbase_ipa_model_recalculate(struct kbase_ipa_model *model); ++void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow); + +/** -+ * kbase_ipa_model_ops_find - Lookup an IPA model using its name -+ * @kbdev: pointer to kbase device -+ * @name: name of model to lookup ++ * kbase_hwcnt_dump_buffer_narrow_alloc() - Allocate a narrow dump buffer. ++ * @md_narrow: Non-NULL pointer to narrow metadata. ++ * @dump_buf: Non-NULL pointer to narrow dump buffer to be initialised. Will be ++ * initialised to undefined values, so must be used as a copy ++ * destination, or cleared before use. + * -+ * Return: Pointer to model's 'ops' structure, or NULL if the lookup failed. ++ * Return: 0 on success, else error code. + */ -+const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, -+ const char *name); ++int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow, ++ struct kbase_hwcnt_dump_buffer_narrow *dump_buf); + +/** -+ * kbase_ipa_counter_model_ops_find - Lookup an IPA counter model using its name -+ * @kbdev: pointer to kbase device -+ * @name: name of counter model to lookup ++ * kbase_hwcnt_dump_buffer_narrow_free() - Free a narrow dump buffer. ++ * @dump_buf: Dump buffer to be freed. + * -+ * Return: Pointer to counter model's 'ops' structure, or NULL if the lookup -+ * failed. ++ * Can be safely called on an all-zeroed narrow dump buffer structure, or on an ++ * already freed narrow dump buffer. + */ -+const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( -+ struct kbase_device *kbdev, const char *name); ++void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf); + +/** -+ * kbase_ipa_model_name_from_id - Find the best model for a given GPU ID -+ * @gpu_id: GPU ID of GPU the model will be used for ++ * kbase_hwcnt_dump_buffer_narrow_array_alloc() - Allocate an array of narrow ++ * dump buffers. ++ * @md_narrow: Non-NULL pointer to narrow metadata. ++ * @n: Number of narrow dump buffers to allocate ++ * @dump_bufs: Non-NULL pointer to a kbase_hwcnt_dump_buffer_narrow_array ++ * object to be initialised. + * -+ * Return: The name of the appropriate counter-based model, or the name of the -+ * fallback model if no counter model exists. -+ */ -+const char *kbase_ipa_model_name_from_id(u32 gpu_id); -+ -+/** -+ * kbase_ipa_counter_model_name_from_id - Find the best counter model for a -+ * given GPU ID -+ * @gpu_id: GPU ID of GPU the counter model will be used for ++ * A single zeroed contiguous page allocation will be used for all of the ++ * buffers inside the object, where: ++ * dump_bufs->bufs[n].dump_buf == page_addr + n * md_narrow.dump_buf_bytes + * -+ * Return: The name of the appropriate counter-based model, or NULL if the -+ * no counter model exists. ++ * Return: 0 on success, else error code. + */ -+const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id); ++int kbase_hwcnt_dump_buffer_narrow_array_alloc( ++ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n, ++ struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs); + +/** -+ * kbase_ipa_init_model - Initilaize the particular IPA model -+ * @kbdev: pointer to kbase device -+ * @ops: pointer to object containing model specific methods. -+ * -+ * Initialize the model corresponding to the @ops pointer passed. -+ * The init() method specified in @ops would be called. -+ * -+ * Return: pointer to kbase_ipa_model on success, NULL on error -+ */ -+struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, -+ const struct kbase_ipa_model_ops *ops); -+/** -+ * kbase_ipa_term_model - Terminate the particular IPA model -+ * @model: pointer to the IPA model object, already initialized ++ * kbase_hwcnt_dump_buffer_narrow_array_free() - Free a narrow dump buffer ++ * array. ++ * @dump_bufs: Narrow Dump buffer array to be freed. + * -+ * Terminate the model, using the term() method. -+ * Module specific parameters would be freed. ++ * Can be safely called on an all-zeroed narrow dump buffer array structure, or ++ * on an already freed narrow dump buffer array. + */ -+void kbase_ipa_term_model(struct kbase_ipa_model *model); ++void kbase_hwcnt_dump_buffer_narrow_array_free( ++ struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs); + +/** -+ * kbase_ipa_protection_mode_switch_event - Inform IPA of the GPU's entry into -+ * protected mode -+ * @kbdev: pointer to kbase device ++ * kbase_hwcnt_dump_buffer_block_copy_strict_narrow() - Copy all enabled block ++ * values from source to ++ * destination. ++ * @dst_blk: Non-NULL pointer to destination block obtained from a call to ++ * kbase_hwcnt_dump_buffer_narrow_block_instance. ++ * @src_blk: Non-NULL pointer to source block obtained from a call to ++ * kbase_hwcnt_dump_buffer_block_instance. ++ * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to ++ * kbase_hwcnt_enable_map_block_instance. ++ * @val_cnt: Number of values in the block. + * -+ * Makes IPA aware of the GPU switching to protected mode. ++ * After the copy, any disabled values in destination will be zero, the enabled ++ * values in destination will be saturated at U32_MAX if the corresponding ++ * source value is bigger than U32_MAX, or copy the value from source if the ++ * corresponding source value is less than or equal to U32_MAX. + */ -+void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev); ++void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk, ++ const u64 *blk_em, size_t val_cnt); + +/** -+ * kbase_get_real_power() - get the real power consumption of the GPU -+ * @df: dynamic voltage and frequency scaling information for the GPU. -+ * @power: where to store the power consumption, in mW. -+ * @freq: a frequency, in HZ. -+ * @voltage: a voltage, in mV. -+ * -+ * The returned value incorporates both static and dynamic power consumption. ++ * kbase_hwcnt_dump_buffer_copy_strict_narrow() - Copy all enabled values to a ++ * narrow dump buffer. ++ * @dst_narrow: Non-NULL pointer to destination dump buffer. ++ * @src: Non-NULL pointer to source dump buffer. ++ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * -+ * Return: 0 on success, or an error code. -+ */ -+int kbase_get_real_power(struct devfreq *df, u32 *power, -+ unsigned long freq, -+ unsigned long voltage); -+ -+/* Called by kbase_get_real_power() to invoke the power models. -+ * Must be called with kbdev->ipa.lock held. -+ * This function is only exposed for use by unit tests. -+ */ -+int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, -+ unsigned long freq, -+ unsigned long voltage); -+ -+extern struct devfreq_cooling_power kbase_ipa_power_model_ops; -+ -+/** -+ * kbase_ipa_reset_data() - Reset the data required for power estimation. -+ * @kbdev: Pointer to kbase device. ++ * After the operation, all non-enabled values (including padding bytes) will be ++ * zero. Slower than the non-strict variant. + * -+ * This function is called to ensure a meaningful baseline for -+ * kbase_get_real_power(), when thermal governor starts the polling, and -+ * that is achieved by updating the GPU utilization metrics and retrieving -+ * the accumulated value of HW counters. -+ * Basically this function collects all the data required for power estimation -+ * but does not process it. ++ * The enabled values in dst_narrow will be saturated at U32_MAX if the ++ * corresponding source value is bigger than U32_MAX, or copy the value from ++ * source if the corresponding source value is less than or equal to U32_MAX. + */ -+void kbase_ipa_reset_data(struct kbase_device *kbdev); -+ -+#else /* !(defined(CONFIG_MALI_BIFROST_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ -+ -+static inline void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev) -+{ } -+ -+#endif /* (defined(CONFIG_MALI_BIFROST_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ ++void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow, ++ const struct kbase_hwcnt_dump_buffer *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map); + -+#endif -diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c ++#endif /* _KBASE_HWCNT_GPU_NARROW_H_ */ +diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c new file mode 100644 -index 000000000..a0963bbb2 +index 000000000..763eb315d --- /dev/null -+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c -@@ -0,0 +1,324 @@ ++++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c +@@ -0,0 +1,511 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -196859,821 +199808,506 @@ index 000000000..a0963bbb2 + * + */ + -+#include -+#include -+#include -+ -+#include "mali_kbase.h" -+#include "mali_kbase_ipa.h" -+#include "mali_kbase_ipa_debugfs.h" -+ -+struct kbase_ipa_model_param { -+ char *name; -+ union { -+ void *voidp; -+ s32 *s32p; -+ char *str; -+ } addr; -+ size_t size; -+ enum kbase_ipa_model_param_type type; -+ struct kbase_ipa_model *model; -+ struct list_head link; -+}; -+ -+static int param_int_get(void *data, u64 *val) -+{ -+ struct kbase_ipa_model_param *param = data; -+ -+ mutex_lock(¶m->model->kbdev->ipa.lock); -+ *(s64 *) val = *param->addr.s32p; -+ mutex_unlock(¶m->model->kbdev->ipa.lock); ++#include "hwcnt/mali_kbase_hwcnt_types.h" + -+ return 0; -+} ++#include + -+static int param_int_set(void *data, u64 val) ++int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc, ++ const struct kbase_hwcnt_metadata **out_metadata) +{ -+ struct kbase_ipa_model_param *param = data; -+ struct kbase_ipa_model *model = param->model; -+ s64 sval = (s64) val; -+ s32 old_val; -+ int err = 0; -+ -+ if (sval < S32_MIN || sval > S32_MAX) -+ return -ERANGE; -+ -+ mutex_lock(¶m->model->kbdev->ipa.lock); -+ old_val = *param->addr.s32p; -+ *param->addr.s32p = val; -+ err = kbase_ipa_model_recalculate(model); -+ if (err < 0) -+ *param->addr.s32p = old_val; -+ mutex_unlock(¶m->model->kbdev->ipa.lock); -+ -+ return err; -+} ++ char *buf; ++ struct kbase_hwcnt_metadata *metadata; ++ struct kbase_hwcnt_group_metadata *grp_mds; ++ size_t grp; ++ size_t enable_map_count; /* Number of u64 bitfields (inc padding) */ ++ size_t dump_buf_count; /* Number of u64 values (inc padding) */ ++ size_t avail_mask_bits; /* Number of availability mask bits */ + -+DEFINE_DEBUGFS_ATTRIBUTE(fops_s32, param_int_get, param_int_set, "%lld\n"); ++ size_t size; ++ size_t offset; + -+static ssize_t param_string_get(struct file *file, char __user *user_buf, -+ size_t count, loff_t *ppos) -+{ -+ struct kbase_ipa_model_param *param = file->private_data; -+ ssize_t ret; -+ size_t len; ++ if (!desc || !out_metadata) ++ return -EINVAL; + -+ mutex_lock(¶m->model->kbdev->ipa.lock); -+ len = strnlen(param->addr.str, param->size - 1) + 1; -+ ret = simple_read_from_buffer(user_buf, count, ppos, -+ param->addr.str, len); -+ mutex_unlock(¶m->model->kbdev->ipa.lock); ++ /* The maximum number of clock domains is 64. */ ++ if (desc->clk_cnt > (sizeof(u64) * BITS_PER_BYTE)) ++ return -EINVAL; + -+ return ret; -+} ++ /* Calculate the bytes needed to tightly pack the metadata */ + -+static ssize_t param_string_set(struct file *file, const char __user *user_buf, -+ size_t count, loff_t *ppos) -+{ -+ struct kbase_ipa_model_param *param = file->private_data; -+ struct kbase_ipa_model *model = param->model; -+ char *old_str = NULL; -+ ssize_t ret = count; -+ size_t buf_size; -+ int err; ++ /* Top level metadata */ ++ size = 0; ++ size += sizeof(struct kbase_hwcnt_metadata); + -+ mutex_lock(&model->kbdev->ipa.lock); ++ /* Group metadata */ ++ size += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; + -+ if (count > param->size) { -+ ret = -EINVAL; -+ goto end; ++ /* Block metadata */ ++ for (grp = 0; grp < desc->grp_cnt; grp++) { ++ size += sizeof(struct kbase_hwcnt_block_metadata) * desc->grps[grp].blk_cnt; + } + -+ old_str = kstrndup(param->addr.str, param->size, GFP_KERNEL); -+ if (!old_str) { -+ ret = -ENOMEM; -+ goto end; -+ } ++ /* Single allocation for the entire metadata */ ++ buf = kmalloc(size, GFP_KERNEL); ++ if (!buf) ++ return -ENOMEM; + -+ buf_size = min(param->size - 1, count); -+ if (copy_from_user(param->addr.str, user_buf, buf_size)) { -+ ret = -EFAULT; -+ goto end; -+ } ++ /* Use the allocated memory for the metadata and its members */ + -+ param->addr.str[buf_size] = '\0'; ++ /* Bump allocate the top level metadata */ ++ offset = 0; ++ metadata = (struct kbase_hwcnt_metadata *)(buf + offset); ++ offset += sizeof(struct kbase_hwcnt_metadata); + -+ err = kbase_ipa_model_recalculate(model); -+ if (err < 0) { -+ u32 string_len = strscpy(param->addr.str, old_str, param->size); ++ /* Bump allocate the group metadata */ ++ grp_mds = (struct kbase_hwcnt_group_metadata *)(buf + offset); ++ offset += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; + -+ string_len += sizeof(char); -+ /* Make sure that the source string fit into the buffer. */ -+ KBASE_DEBUG_ASSERT(string_len <= param->size); -+ CSTD_UNUSED(string_len); ++ enable_map_count = 0; ++ dump_buf_count = 0; ++ avail_mask_bits = 0; + -+ ret = err; -+ } ++ for (grp = 0; grp < desc->grp_cnt; grp++) { ++ size_t blk; + -+end: -+ kfree(old_str); -+ mutex_unlock(&model->kbdev->ipa.lock); ++ const struct kbase_hwcnt_group_description *grp_desc = desc->grps + grp; ++ struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp; + -+ return ret; -+} ++ size_t group_enable_map_count = 0; ++ size_t group_dump_buffer_count = 0; ++ size_t group_avail_mask_bits = 0; + -+static const struct file_operations fops_string = { -+ .owner = THIS_MODULE, -+ .read = param_string_get, -+ .write = param_string_set, -+ .open = simple_open, -+ .llseek = default_llseek, -+}; ++ /* Bump allocate this group's block metadata */ ++ struct kbase_hwcnt_block_metadata *blk_mds = ++ (struct kbase_hwcnt_block_metadata *)(buf + offset); ++ offset += sizeof(struct kbase_hwcnt_block_metadata) * grp_desc->blk_cnt; + -+int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, -+ void *addr, size_t size, -+ enum kbase_ipa_model_param_type type) -+{ -+ struct kbase_ipa_model_param *param; ++ /* Fill in each block in the group's information */ ++ for (blk = 0; blk < grp_desc->blk_cnt; blk++) { ++ const struct kbase_hwcnt_block_description *blk_desc = grp_desc->blks + blk; ++ struct kbase_hwcnt_block_metadata *blk_md = blk_mds + blk; ++ const size_t n_values = blk_desc->hdr_cnt + blk_desc->ctr_cnt; + -+ param = kzalloc(sizeof(*param), GFP_KERNEL); ++ blk_md->type = blk_desc->type; ++ blk_md->inst_cnt = blk_desc->inst_cnt; ++ blk_md->hdr_cnt = blk_desc->hdr_cnt; ++ blk_md->ctr_cnt = blk_desc->ctr_cnt; ++ blk_md->enable_map_index = group_enable_map_count; ++ blk_md->enable_map_stride = kbase_hwcnt_bitfield_count(n_values); ++ blk_md->dump_buf_index = group_dump_buffer_count; ++ blk_md->dump_buf_stride = KBASE_HWCNT_ALIGN_UPWARDS( ++ n_values, ++ (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)); ++ blk_md->avail_mask_index = group_avail_mask_bits; + -+ if (!param) -+ return -ENOMEM; ++ group_enable_map_count += blk_md->enable_map_stride * blk_md->inst_cnt; ++ group_dump_buffer_count += blk_md->dump_buf_stride * blk_md->inst_cnt; ++ group_avail_mask_bits += blk_md->inst_cnt; ++ } + -+ /* 'name' is stack-allocated for array elements, so copy it into -+ * heap-allocated storage -+ */ -+ param->name = kstrdup(name, GFP_KERNEL); ++ /* Fill in the group's information */ ++ grp_md->type = grp_desc->type; ++ grp_md->blk_cnt = grp_desc->blk_cnt; ++ grp_md->blk_metadata = blk_mds; ++ grp_md->enable_map_index = enable_map_count; ++ grp_md->dump_buf_index = dump_buf_count; ++ grp_md->avail_mask_index = avail_mask_bits; + -+ if (!param->name) { -+ kfree(param); -+ return -ENOMEM; ++ enable_map_count += group_enable_map_count; ++ dump_buf_count += group_dump_buffer_count; ++ avail_mask_bits += group_avail_mask_bits; + } + -+ param->addr.voidp = addr; -+ param->size = size; -+ param->type = type; -+ param->model = model; ++ /* Fill in the top level metadata's information */ ++ metadata->grp_cnt = desc->grp_cnt; ++ metadata->grp_metadata = grp_mds; ++ metadata->enable_map_bytes = enable_map_count * KBASE_HWCNT_BITFIELD_BYTES; ++ metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES; ++ metadata->avail_mask = desc->avail_mask; ++ metadata->clk_cnt = desc->clk_cnt; + -+ list_add(¶m->link, &model->params); ++ WARN_ON(size != offset); ++ /* Due to the block alignment, there should be exactly one enable map ++ * bit per 4 bytes in the dump buffer. ++ */ ++ WARN_ON(metadata->dump_buf_bytes != ++ (metadata->enable_map_bytes * BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES)); + ++ *out_metadata = metadata; + return 0; +} + -+void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model) ++void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) +{ -+ struct kbase_ipa_model_param *param_p, *param_n; -+ -+ list_for_each_entry_safe(param_p, param_n, &model->params, link) { -+ list_del(¶m_p->link); -+ kfree(param_p->name); -+ kfree(param_p); -+ } ++ kfree(metadata); +} + -+static int force_fallback_model_get(void *data, u64 *val) ++int kbase_hwcnt_enable_map_alloc(const struct kbase_hwcnt_metadata *metadata, ++ struct kbase_hwcnt_enable_map *enable_map) +{ -+ struct kbase_device *kbdev = data; -+ -+ mutex_lock(&kbdev->ipa.lock); -+ *val = kbdev->ipa.force_fallback_model; -+ mutex_unlock(&kbdev->ipa.lock); -+ -+ return 0; -+} ++ u64 *enable_map_buf; + -+static int force_fallback_model_set(void *data, u64 val) -+{ -+ struct kbase_device *kbdev = data; ++ if (!metadata || !enable_map) ++ return -EINVAL; + -+ mutex_lock(&kbdev->ipa.lock); -+ kbdev->ipa.force_fallback_model = (val ? true : false); -+ mutex_unlock(&kbdev->ipa.lock); ++ if (metadata->enable_map_bytes > 0) { ++ enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL); ++ if (!enable_map_buf) ++ return -ENOMEM; ++ } else { ++ enable_map_buf = NULL; ++ } + ++ enable_map->metadata = metadata; ++ enable_map->hwcnt_enable_map = enable_map_buf; + return 0; +} + -+DEFINE_DEBUGFS_ATTRIBUTE(force_fallback_model, -+ force_fallback_model_get, -+ force_fallback_model_set, -+ "%llu\n"); -+ -+static int current_power_get(void *data, u64 *val) ++void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map) +{ -+ struct kbase_device *kbdev = data; -+ struct devfreq *df = kbdev->devfreq; -+ u32 power; -+ -+ kbase_pm_context_active(kbdev); -+ /* The current model assumes that there's no more than one voltage -+ * regulator currently available in the system. -+ */ -+ kbase_get_real_power(df, &power, -+ kbdev->current_nominal_freq, -+ (kbdev->current_voltages[0] / 1000)); -+ kbase_pm_context_idle(kbdev); -+ -+ *val = power; ++ if (!enable_map) ++ return; + -+ return 0; ++ kfree(enable_map->hwcnt_enable_map); ++ enable_map->hwcnt_enable_map = NULL; ++ enable_map->metadata = NULL; +} -+DEFINE_DEBUGFS_ATTRIBUTE(current_power, current_power_get, NULL, "%llu\n"); + -+static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model) ++int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata, ++ struct kbase_hwcnt_dump_buffer *dump_buf) +{ -+ struct list_head *it; -+ struct dentry *dir; -+ -+ lockdep_assert_held(&model->kbdev->ipa.lock); ++ size_t dump_buf_bytes; ++ size_t clk_cnt_buf_bytes; ++ u8 *buf; + -+ dir = debugfs_create_dir(model->ops->name, -+ model->kbdev->mali_debugfs_directory); ++ if (!metadata || !dump_buf) ++ return -EINVAL; + -+ if (IS_ERR_OR_NULL(dir)) { -+ dev_err(model->kbdev->dev, -+ "Couldn't create mali debugfs %s directory", -+ model->ops->name); -+ return; -+ } ++ dump_buf_bytes = metadata->dump_buf_bytes; ++ clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt; + -+ list_for_each(it, &model->params) { -+ struct kbase_ipa_model_param *param = -+ list_entry(it, -+ struct kbase_ipa_model_param, -+ link); -+ const struct file_operations *fops = NULL; ++ /* Make a single allocation for both dump_buf and clk_cnt_buf. */ ++ buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL); ++ if (!buf) ++ return -ENOMEM; + -+ switch (param->type) { -+ case PARAM_TYPE_S32: -+ fops = &fops_s32; -+ break; -+ case PARAM_TYPE_STRING: -+ fops = &fops_string; -+ break; -+ } ++ dump_buf->metadata = metadata; ++ dump_buf->dump_buf = (u64 *)buf; ++ dump_buf->clk_cnt_buf = (u64 *)(buf + dump_buf_bytes); + -+ if (unlikely(!fops)) { -+ dev_err(model->kbdev->dev, -+ "Type not set for %s parameter %s\n", -+ model->ops->name, param->name); -+ } else { -+ debugfs_create_file(param->name, 0644, -+ dir, param, fops); -+ } -+ } ++ return 0; +} + -+void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, -+ const char *name, s32 val) ++void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf) +{ -+ struct kbase_ipa_model_param *param; -+ -+ mutex_lock(&model->kbdev->ipa.lock); -+ -+ list_for_each_entry(param, &model->params, link) { -+ if (!strcmp(param->name, name)) { -+ if (param->type == PARAM_TYPE_S32) { -+ *param->addr.s32p = val; -+ } else { -+ dev_err(model->kbdev->dev, -+ "Wrong type for %s parameter %s\n", -+ model->ops->name, param->name); -+ } -+ break; -+ } -+ } ++ if (!dump_buf) ++ return; + -+ mutex_unlock(&model->kbdev->ipa.lock); ++ kfree(dump_buf->dump_buf); ++ memset(dump_buf, 0, sizeof(*dump_buf)); +} -+KBASE_EXPORT_TEST_API(kbase_ipa_model_param_set_s32); + -+void kbase_ipa_debugfs_init(struct kbase_device *kbdev) ++int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n, ++ struct kbase_hwcnt_dump_buffer_array *dump_bufs) +{ -+ mutex_lock(&kbdev->ipa.lock); -+ -+ if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model) -+ kbase_ipa_model_debugfs_init(kbdev->ipa.configured_model); -+ kbase_ipa_model_debugfs_init(kbdev->ipa.fallback_model); ++ struct kbase_hwcnt_dump_buffer *buffers; ++ size_t buf_idx; ++ unsigned int order; ++ unsigned long addr; ++ size_t dump_buf_bytes; ++ size_t clk_cnt_buf_bytes; + -+ debugfs_create_file("ipa_current_power", 0444, -+ kbdev->mali_debugfs_directory, kbdev, ¤t_power); -+ debugfs_create_file("ipa_force_fallback_model", 0644, -+ kbdev->mali_debugfs_directory, kbdev, &force_fallback_model); ++ if (!metadata || !dump_bufs) ++ return -EINVAL; + -+ mutex_unlock(&kbdev->ipa.lock); -+} -diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.h b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.h -new file mode 100644 -index 000000000..f69036775 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.h -@@ -0,0 +1,70 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ dump_buf_bytes = metadata->dump_buf_bytes; ++ clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt; + -+#ifndef _KBASE_IPA_DEBUGFS_H_ -+#define _KBASE_IPA_DEBUGFS_H_ ++ /* Allocate memory for the dump buffer struct array */ ++ buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); ++ if (!buffers) ++ return -ENOMEM; + -+enum kbase_ipa_model_param_type { -+ PARAM_TYPE_S32 = 1, -+ PARAM_TYPE_STRING, -+}; ++ /* Allocate pages for the actual dump buffers, as they tend to be fairly ++ * large. ++ */ ++ order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n); ++ addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + -+#if IS_ENABLED(CONFIG_DEBUG_FS) ++ if (!addr) { ++ kfree(buffers); ++ return -ENOMEM; ++ } + -+void kbase_ipa_debugfs_init(struct kbase_device *kbdev); -+int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, -+ void *addr, size_t size, -+ enum kbase_ipa_model_param_type type); -+void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model); ++ dump_bufs->page_addr = addr; ++ dump_bufs->page_order = order; ++ dump_bufs->buf_cnt = n; ++ dump_bufs->bufs = buffers; + -+/** -+ * kbase_ipa_model_param_set_s32 - Set an integer model parameter -+ * -+ * @model: pointer to IPA model -+ * @name: name of corresponding debugfs entry -+ * @val: new value of the parameter -+ * -+ * This function is only exposed for use by unit tests running in -+ * kernel space. Normally it is expected that parameter values will -+ * instead be set via debugfs. -+ */ -+void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, -+ const char *name, s32 val); ++ /* Set the buffer of each dump buf */ ++ for (buf_idx = 0; buf_idx < n; buf_idx++) { ++ const size_t dump_buf_offset = dump_buf_bytes * buf_idx; ++ const size_t clk_cnt_buf_offset = ++ (dump_buf_bytes * n) + (clk_cnt_buf_bytes * buf_idx); + -+#else /* CONFIG_DEBUG_FS */ ++ buffers[buf_idx].metadata = metadata; ++ buffers[buf_idx].dump_buf = (u64 *)(addr + dump_buf_offset); ++ buffers[buf_idx].clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset); ++ } + -+static inline int kbase_ipa_model_param_add(struct kbase_ipa_model *model, -+ const char *name, void *addr, -+ size_t size, -+ enum kbase_ipa_model_param_type type) -+{ + return 0; +} + -+static inline void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model) -+{ } -+ -+static inline void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, -+ const char *name, s32 val) -+{ } -+#endif /* CONFIG_DEBUG_FS */ -+ -+#endif /* _KBASE_IPA_DEBUGFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c -new file mode 100644 -index 000000000..8557fe872 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c -@@ -0,0 +1,370 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2016-2018, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+#include -+#include -+#include -+#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) -+#include -+#endif -+#include -+#include -+#include ++void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs) ++{ ++ if (!dump_bufs) ++ return; + -+#include "mali_kbase.h" -+#include "mali_kbase_defs.h" -+#include "mali_kbase_ipa_simple.h" -+#include "mali_kbase_ipa_debugfs.h" ++ kfree(dump_bufs->bufs); ++ free_pages(dump_bufs->page_addr, dump_bufs->page_order); ++ memset(dump_bufs, 0, sizeof(*dump_bufs)); ++} + -+#if MALI_USE_CSF ++void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst, ++ const struct kbase_hwcnt_enable_map *dst_enable_map) ++{ ++ const struct kbase_hwcnt_metadata *metadata; ++ size_t grp, blk, blk_inst; + -+/* This is used if the dynamic power for top-level is estimated separately -+ * through the counter model. To roughly match the contribution of top-level -+ * power in the total dynamic power, when calculated through counter model, -+ * this scalar is used for the dynamic coefficient specified in the device tree -+ * for simple power model. This value was provided by the HW team after -+ * taking all the power data collected and dividing top level power by shader -+ * core power and then averaging it across all samples. -+ */ -+#define TOP_LEVEL_DYN_COEFF_SCALER (3) ++ if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) || ++ WARN_ON(dst->metadata != dst_enable_map->metadata)) ++ return; + -+#endif /* MALI_USE_CSF */ ++ metadata = dst->metadata; + -+#if MALI_UNIT_TEST ++ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) ++ { ++ u64 *dst_blk; ++ size_t val_cnt; + -+static int dummy_temp; ++ if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) ++ continue; + -+static int kbase_simple_power_model_get_dummy_temp( -+ struct thermal_zone_device *tz, -+ int *temp) -+{ -+ *temp = READ_ONCE(dummy_temp); -+ return 0; -+} ++ dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); ++ val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); + -+/* Intercept calls to the kernel function using a macro */ -+#ifdef thermal_zone_get_temp -+#undef thermal_zone_get_temp -+#endif -+#define thermal_zone_get_temp(tz, temp) \ -+ kbase_simple_power_model_get_dummy_temp(tz, temp) ++ kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); ++ } + -+void kbase_simple_power_model_set_dummy_temp(int temp) -+{ -+ WRITE_ONCE(dummy_temp, temp); ++ memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt); +} -+KBASE_EXPORT_TEST_API(kbase_simple_power_model_set_dummy_temp); -+ -+#endif /* MALI_UNIT_TEST */ -+ -+/* -+ * This model is primarily designed for the Juno platform. It may not be -+ * suitable for other platforms. The additional resources in this model -+ * should preferably be minimal, as this model is rarely used when a dynamic -+ * model is available. -+ */ -+ -+/** -+ * struct kbase_ipa_model_simple_data - IPA context per device -+ * @dynamic_coefficient: dynamic coefficient of the model -+ * @static_coefficient: static coefficient of the model -+ * @ts: Thermal scaling coefficients of the model -+ * @tz_name: Thermal zone name -+ * @gpu_tz: thermal zone device -+ * @poll_temperature_thread: Handle for temperature polling thread -+ * @current_temperature: Most recent value of polled temperature -+ * @temperature_poll_interval_ms: How often temperature should be checked, in ms -+ */ -+ -+struct kbase_ipa_model_simple_data { -+ u32 dynamic_coefficient; -+ u32 static_coefficient; -+ s32 ts[4]; -+ char tz_name[THERMAL_NAME_LENGTH]; -+ struct thermal_zone_device *gpu_tz; -+ struct task_struct *poll_temperature_thread; -+ int current_temperature; -+ int temperature_poll_interval_ms; -+}; -+#define FALLBACK_STATIC_TEMPERATURE 55000 + -+/** -+ * calculate_temp_scaling_factor() - Calculate temperature scaling coefficient -+ * @ts: Signed coefficients, in order t^0 to t^3, with units Deg^-N -+ * @t: Temperature, in mDeg C. Range: -2^17 < t < 2^17 -+ * -+ * Scale the temperature according to a cubic polynomial whose coefficients are -+ * provided in the device tree. The result is used to scale the static power -+ * coefficient, where 1000000 means no change. -+ * -+ * Return: Temperature scaling factor. Range 0 <= ret <= 10,000,000. -+ */ -+static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t) ++void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst) +{ -+ /* Range: -2^24 < t2 < 2^24 m(Deg^2) */ -+ const s64 t2 = div_s64((t * t), 1000); -+ -+ /* Range: -2^31 < t3 < 2^31 m(Deg^3) */ -+ const s64 t3 = div_s64((t * t2), 1000); -+ -+ /* -+ * Sum the parts. t^[1-3] are in m(Deg^N), but the coefficients are in -+ * Deg^-N, so we need to multiply the last coefficient by 1000. -+ * Range: -2^63 < res_big < 2^63 -+ */ -+ const s64 res_big = ts[3] * t3 /* +/- 2^62 */ -+ + ts[2] * t2 /* +/- 2^55 */ -+ + ts[1] * t /* +/- 2^48 */ -+ + ts[0] * (s64)1000; /* +/- 2^41 */ ++ if (WARN_ON(!dst)) ++ return; + -+ /* Range: -2^60 < res_unclamped < 2^60 */ -+ s64 res_unclamped = div_s64(res_big, 1000); ++ memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes); + -+ /* Clamp to range of 0x to 10x the static power */ -+ return clamp(res_unclamped, (s64) 0, (s64) 10000000); ++ memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt); +} + -+/* We can't call thermal_zone_get_temp() directly in model_static_coeff(), -+ * because we don't know if tz->lock is held in the same thread. So poll it in -+ * a separate thread to get around this. -+ */ -+static int poll_temperature(void *data) ++void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *dst, ++ const struct kbase_hwcnt_enable_map *dst_enable_map) +{ -+ struct kbase_ipa_model_simple_data *model_data = -+ (struct kbase_ipa_model_simple_data *) data; -+ int temp; ++ const struct kbase_hwcnt_metadata *metadata; ++ size_t grp, blk, blk_inst; + -+ set_freezable(); ++ if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) || ++ WARN_ON(dst->metadata != dst_enable_map->metadata)) ++ return; + -+ while (!kthread_should_stop()) { -+ struct thermal_zone_device *tz = READ_ONCE(model_data->gpu_tz); ++ metadata = dst->metadata; + -+ if (tz) { -+ int ret; ++ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) ++ { ++ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); ++ const u64 *blk_em = ++ kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst); ++ size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); + -+ ret = thermal_zone_get_temp(tz, &temp); -+ if (ret) { -+ pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n", -+ ret); -+ temp = FALLBACK_STATIC_TEMPERATURE; -+ } ++ /* Align upwards to include padding bytes */ ++ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( ++ val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)); ++ ++ if (kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst)) { ++ /* Block available, so only zero non-enabled values */ ++ kbase_hwcnt_dump_buffer_block_zero_non_enabled(dst_blk, blk_em, val_cnt); + } else { -+ temp = FALLBACK_STATIC_TEMPERATURE; ++ /* Block not available, so zero the entire thing */ ++ kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); + } -+ -+ WRITE_ONCE(model_data->current_temperature, temp); -+ -+ msleep_interruptible(READ_ONCE(model_data->temperature_poll_interval_ms)); -+ -+ try_to_freeze(); + } -+ -+ return 0; +} + -+static int model_static_coeff(struct kbase_ipa_model *model, u32 *coeffp) ++void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst, ++ const struct kbase_hwcnt_dump_buffer *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map) +{ -+ u32 temp_scaling_factor; -+ struct kbase_ipa_model_simple_data *model_data = -+ (struct kbase_ipa_model_simple_data *) model->model_data; -+ u64 coeff_big; -+ int temp; ++ const struct kbase_hwcnt_metadata *metadata; ++ size_t grp, blk, blk_inst; ++ size_t clk; + -+ temp = READ_ONCE(model_data->current_temperature); ++ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || ++ WARN_ON(dst->metadata != src->metadata) || ++ WARN_ON(dst->metadata != dst_enable_map->metadata)) ++ return; + -+ /* Range: 0 <= temp_scaling_factor < 2^24 */ -+ temp_scaling_factor = calculate_temp_scaling_factor(model_data->ts, -+ temp); ++ metadata = dst->metadata; + -+ /* -+ * Range: 0 <= coeff_big < 2^52 to avoid overflowing *coeffp. This -+ * means static_coefficient must be in range -+ * 0 <= static_coefficient < 2^28. -+ */ -+ coeff_big = (u64) model_data->static_coefficient * (u64) temp_scaling_factor; -+ *coeffp = div_u64(coeff_big, 1000000); ++ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) ++ { ++ u64 *dst_blk; ++ const u64 *src_blk; ++ size_t val_cnt; + -+ return 0; -+} ++ if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) ++ continue; + -+static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) -+{ -+ struct kbase_ipa_model_simple_data *model_data = -+ (struct kbase_ipa_model_simple_data *) model->model_data; ++ dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); ++ src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); ++ val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); + -+#if MALI_USE_CSF -+ /* On CSF GPUs, the dynamic power for top-level and shader cores is -+ * estimated separately. Currently there is a single dynamic -+ * coefficient value provided in the device tree for simple model. -+ * As per the discussion with HW team the coefficient value needs to -+ * be scaled down for top-level to limit its contribution in the -+ * total dyanmic power. -+ */ -+ coeffp[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = -+ model_data->dynamic_coefficient / TOP_LEVEL_DYN_COEFF_SCALER; -+ coeffp[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = -+ model_data->dynamic_coefficient; -+#else -+ *coeffp = model_data->dynamic_coefficient; -+#endif ++ kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt); ++ } + -+ return 0; ++ kbase_hwcnt_metadata_for_each_clock(metadata, clk) ++ { ++ if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) ++ dst->clk_cnt_buf[clk] = src->clk_cnt_buf[clk]; ++ } +} + -+static int add_params(struct kbase_ipa_model *model) ++void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst, ++ const struct kbase_hwcnt_dump_buffer *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map) +{ -+ int err = 0; -+ struct kbase_ipa_model_simple_data *model_data = -+ (struct kbase_ipa_model_simple_data *)model->model_data; -+ -+ err = kbase_ipa_model_add_param_s32(model, "static-coefficient", -+ (s32 *)&model_data->static_coefficient, 1, true); -+ if (err) -+ goto end; -+ -+ err = kbase_ipa_model_add_param_s32(model, "dynamic-coefficient", -+ (s32 *)&model_data->dynamic_coefficient, 1, true); -+ if (err) -+ goto end; -+ -+ err = kbase_ipa_model_add_param_s32(model, "ts", -+ model_data->ts, 4, true); -+ if (err) -+ goto end; -+ -+ err = kbase_ipa_model_add_param_string(model, "thermal-zone", -+ model_data->tz_name, -+ sizeof(model_data->tz_name), true); -+ if (err) -+ goto end; -+ -+ model_data->temperature_poll_interval_ms = 200; -+ err = kbase_ipa_model_add_param_s32(model, "temp-poll-interval-ms", -+ &model_data->temperature_poll_interval_ms, -+ 1, false); -+ -+end: -+ return err; -+} ++ const struct kbase_hwcnt_metadata *metadata; ++ size_t grp, blk, blk_inst; ++ size_t clk; + -+static int kbase_simple_power_model_init(struct kbase_ipa_model *model) -+{ -+ int err; -+ struct kbase_ipa_model_simple_data *model_data; ++ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || ++ WARN_ON(dst->metadata != src->metadata) || ++ WARN_ON(dst->metadata != dst_enable_map->metadata)) ++ return; + -+ model_data = kzalloc(sizeof(struct kbase_ipa_model_simple_data), -+ GFP_KERNEL); -+ if (!model_data) -+ return -ENOMEM; ++ metadata = dst->metadata; + -+ model->model_data = (void *) model_data; ++ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) ++ { ++ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); ++ const u64 *src_blk = ++ kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); ++ const u64 *blk_em = ++ kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst); ++ size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); ++ /* Align upwards to include padding bytes */ ++ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( ++ val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)); + -+ model_data->current_temperature = FALLBACK_STATIC_TEMPERATURE; -+ model_data->poll_temperature_thread = kthread_run(poll_temperature, -+ (void *) model_data, -+ "mali-simple-power-model-temp-poll"); -+ if (IS_ERR(model_data->poll_temperature_thread)) { -+ err = PTR_ERR(model_data->poll_temperature_thread); -+ kfree(model_data); -+ return err; ++ kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, val_cnt); + } + -+ err = add_params(model); -+ if (err) { -+ kbase_ipa_model_param_free_all(model); -+ kthread_stop(model_data->poll_temperature_thread); -+ kfree(model_data); -+ } ++ kbase_hwcnt_metadata_for_each_clock(metadata, clk) ++ { ++ bool clk_enabled = ++ kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk); + -+ return err; ++ dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0; ++ } +} + -+static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model) ++void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst, ++ const struct kbase_hwcnt_dump_buffer *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map) +{ -+ struct kbase_ipa_model_simple_data *model_data = -+ (struct kbase_ipa_model_simple_data *)model->model_data; -+ struct thermal_zone_device *tz; ++ const struct kbase_hwcnt_metadata *metadata; ++ size_t grp, blk, blk_inst; ++ size_t clk; + -+ lockdep_assert_held(&model->kbdev->ipa.lock); ++ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || ++ WARN_ON(dst->metadata != src->metadata) || ++ WARN_ON(dst->metadata != dst_enable_map->metadata)) ++ return; + -+ if (!strnlen(model_data->tz_name, sizeof(model_data->tz_name))) { -+ model_data->gpu_tz = NULL; -+ } else { -+ char tz_name[THERMAL_NAME_LENGTH]; -+ u32 string_len = strscpy(tz_name, model_data->tz_name, sizeof(tz_name)); ++ metadata = dst->metadata; + -+ string_len += sizeof(char); -+ /* Make sure that the source string fit into the buffer. */ -+ KBASE_DEBUG_ASSERT(string_len <= sizeof(tz_name)); -+ CSTD_UNUSED(string_len); ++ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) ++ { ++ u64 *dst_blk; ++ const u64 *src_blk; ++ size_t hdr_cnt; ++ size_t ctr_cnt; + -+ /* Release ipa.lock so that thermal_list_lock is not acquired -+ * with ipa.lock held, thereby avoid lock ordering violation -+ * lockdep warning. The warning comes as a chain of locks -+ * ipa.lock --> thermal_list_lock --> tz->lock gets formed -+ * on registering devfreq cooling device when probe method -+ * of mali platform driver is invoked. -+ */ -+ mutex_unlock(&model->kbdev->ipa.lock); -+ tz = thermal_zone_get_zone_by_name(tz_name); -+ mutex_lock(&model->kbdev->ipa.lock); ++ if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) ++ continue; + -+ if (IS_ERR_OR_NULL(tz)) { -+ pr_warn_ratelimited( -+ "Error %d getting thermal zone \'%s\', not yet ready?\n", -+ PTR_ERR_OR_ZERO(tz), tz_name); -+ return -EPROBE_DEFER; -+ } ++ dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); ++ src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); ++ hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); ++ ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); + -+ /* Check if another thread raced against us & updated the -+ * thermal zone name string. Update the gpu_tz pointer only if -+ * the name string did not change whilst we retrieved the new -+ * thermal_zone_device pointer, otherwise model_data->tz_name & -+ * model_data->gpu_tz would become inconsistent with each other. -+ * The below check will succeed only for the thread which last -+ * updated the name string. -+ */ -+ if (strncmp(tz_name, model_data->tz_name, sizeof(tz_name)) == 0) -+ model_data->gpu_tz = tz; ++ kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, hdr_cnt, ctr_cnt); + } + -+ return 0; ++ kbase_hwcnt_metadata_for_each_clock(metadata, clk) ++ { ++ if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) ++ dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk]; ++ } +} + -+static void kbase_simple_power_model_term(struct kbase_ipa_model *model) ++void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *dst, ++ const struct kbase_hwcnt_dump_buffer *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map) +{ -+ struct kbase_ipa_model_simple_data *model_data = -+ (struct kbase_ipa_model_simple_data *)model->model_data; -+ -+ kthread_stop(model_data->poll_temperature_thread); -+ -+ kfree(model_data); -+} -+ -+struct kbase_ipa_model_ops kbase_simple_ipa_model_ops = { -+ .name = "mali-simple-power-model", -+ .init = &kbase_simple_power_model_init, -+ .recalculate = &kbase_simple_power_model_recalculate, -+ .term = &kbase_simple_power_model_term, -+ .get_dynamic_coeff = &model_dynamic_coeff, -+ .get_static_coeff = &model_static_coeff, -+}; -+KBASE_EXPORT_TEST_API(kbase_simple_ipa_model_ops); -diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.h b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.h -new file mode 100644 -index 000000000..dd17786a5 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.h -@@ -0,0 +1,44 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+#ifndef _KBASE_IPA_SIMPLE_H_ -+#define _KBASE_IPA_SIMPLE_H_ ++ const struct kbase_hwcnt_metadata *metadata; ++ size_t grp, blk, blk_inst; ++ size_t clk; + -+#if defined(CONFIG_MALI_BIFROST_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) ++ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || ++ WARN_ON(dst->metadata != src->metadata) || ++ WARN_ON(dst->metadata != dst_enable_map->metadata)) ++ return; + -+extern struct kbase_ipa_model_ops kbase_simple_ipa_model_ops; ++ metadata = dst->metadata; + -+#if MALI_UNIT_TEST -+/** -+ * kbase_simple_power_model_set_dummy_temp() - set a dummy temperature value -+ * @temp: Temperature of the thermal zone, in millidegrees celsius. -+ * -+ * This is only intended for use in unit tests, to ensure that the temperature -+ * values used by the simple power model are predictable. Deterministic -+ * behavior is necessary to allow validation of the static power values -+ * computed by this model. -+ */ -+void kbase_simple_power_model_set_dummy_temp(int temp); -+#endif /* MALI_UNIT_TEST */ ++ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) ++ { ++ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); ++ const u64 *src_blk = ++ kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); ++ const u64 *blk_em = ++ kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst); ++ size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); ++ size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); ++ /* Align upwards to include padding bytes */ ++ ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS( ++ hdr_cnt + ctr_cnt, ++ (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES) - hdr_cnt); + -+#endif /* (defined(CONFIG_MALI_BIFROST_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ ++ kbase_hwcnt_dump_buffer_block_accumulate_strict(dst_blk, src_blk, blk_em, hdr_cnt, ++ ctr_cnt); ++ } + -+#endif /* _KBASE_IPA_SIMPLE_H_ */ -diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h ++ kbase_hwcnt_metadata_for_each_clock(metadata, clk) ++ { ++ if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) ++ dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk]; ++ else ++ dst->clk_cnt_buf[clk] = 0; ++ } ++} +diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h new file mode 100644 -index 000000000..debc3ad25 +index 000000000..5c5ada401 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h -@@ -0,0 +1,878 @@ ++++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h +@@ -0,0 +1,1231 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -197692,1922 +200326,1975 @@ index 000000000..debc3ad25 + */ + +/* -+ * Definitions (types, defines, etcs) specific to Job Manager Kbase. -+ * They are placed here to allow the hierarchy of header files to work. -+ */ -+ -+#ifndef _KBASE_JM_DEFS_H_ -+#define _KBASE_JM_DEFS_H_ -+ -+#include "mali_kbase_js_defs.h" -+ -+/* Dump Job slot trace on error (only active if KBASE_KTRACE_ENABLE != 0) */ -+#define KBASE_KTRACE_DUMP_ON_JOB_SLOT_ERROR 1 -+ -+/* -+ * Number of milliseconds before resetting the GPU when a job cannot be "zapped" -+ * from the hardware. Note that the time is actually -+ * ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and -+ * the GPU actually being reset to give other contexts time for their jobs -+ * to be soft-stopped and removed from the hardware before resetting. -+ */ -+#define ZAP_TIMEOUT 1000 -+ -+/* -+ * Prevent soft-stops from occurring in scheduling situations ++ * Hardware counter types. ++ * Contains structures for describing the physical layout of hardware counter ++ * dump buffers and enable maps within a system. + * -+ * This is not due to HW issues, but when scheduling is desired to be more -+ * predictable. ++ * Also contains helper functions for manipulation of these dump buffers and ++ * enable maps. + * -+ * Therefore, soft stop may still be disabled due to HW issues. ++ * Through use of these structures and functions, hardware counters can be ++ * enabled, copied, accumulated, and generally manipulated in a generic way, ++ * regardless of the physical counter dump layout. + * -+ * Soft stop will still be used for non-scheduling purposes e.g. when -+ * terminating a context. ++ * Terminology: + * -+ * if not in use, define this value to 0 instead of being undefined. -+ */ -+#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0 -+ -+/* -+ * Prevent hard-stops from occurring in scheduling situations ++ * Hardware Counter System: ++ * A collection of hardware counter groups, making a full hardware counter ++ * system. ++ * Hardware Counter Group: ++ * A group of Hardware Counter Blocks (e.g. a t62x might have more than one ++ * core group, so has one counter group per core group, where each group ++ * may have a different number and layout of counter blocks). ++ * Hardware Counter Block: ++ * A block of hardware counters (e.g. shader block, tiler block). ++ * Hardware Counter Block Instance: ++ * An instance of a Hardware Counter Block (e.g. an MP4 GPU might have ++ * 4 shader block instances). + * -+ * This is not due to HW issues, but when scheduling is desired to be more -+ * predictable. ++ * Block Header: ++ * A header value inside a counter block. Headers don't count anything, ++ * so it is only valid to copy or zero them. Headers are always the first ++ * values in the block. ++ * Block Counter: ++ * A counter value inside a counter block. Counters can be zeroed, copied, ++ * or accumulated. Counters are always immediately after the headers in the ++ * block. ++ * Block Value: ++ * A catch-all term for block headers and block counters. + * -+ * Hard stop will still be used for non-scheduling purposes e.g. when -+ * terminating a context. ++ * Enable Map: ++ * An array of u64 bitfields, where each bit either enables exactly one ++ * block value, or is unused (padding). ++ * Dump Buffer: ++ * An array of u64 values, where each u64 corresponds either to one block ++ * value, or is unused (padding). ++ * Availability Mask: ++ * A bitfield, where each bit corresponds to whether a block instance is ++ * physically available (e.g. an MP3 GPU may have a sparse core mask of ++ * 0b1011, meaning it only has 3 cores but for hardware counter dumps has the ++ * same dump buffer layout as an MP4 GPU with a core mask of 0b1111. In this ++ * case, the availability mask might be 0b1011111 (the exact layout will ++ * depend on the specific hardware architecture), with the 3 extra early bits ++ * corresponding to other block instances in the hardware counter system). ++ * Metadata: ++ * Structure describing the physical layout of the enable map and dump buffers ++ * for a specific hardware counter system. + * -+ * if not in use, define this value to 0 instead of being undefined. -+ */ -+#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0 -+ -+/* Atom has been previously soft-stopped */ -+#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED (1<<1) -+/* Atom has been previously retried to execute */ -+#define KBASE_KATOM_FLAGS_RERUN (1<<2) -+/* Atom submitted with JOB_CHAIN_FLAG bit set in JS_CONFIG_NEXT register, helps -+ * to disambiguate short-running job chains during soft/hard stopping of jobs + */ -+#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3) -+/* Atom has been previously hard-stopped. */ -+#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4) -+/* Atom has caused us to enter disjoint state */ -+#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5) -+/* Atom blocked on cross-slot dependency */ -+#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7) -+/* Atom has fail dependency on cross-slot dependency */ -+#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8) -+/* Atom is currently in the list of atoms blocked on cross-slot dependencies */ -+#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9) -+/* Atom requires GPU to be in protected mode */ -+#define KBASE_KATOM_FLAG_PROTECTED (1<<11) -+/* Atom has been stored in runnable_tree */ -+#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12) -+/* Atom is waiting for L2 caches to power up in order to enter protected mode */ -+#define KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT (1<<13) + -+/* SW related flags about types of JS_COMMAND action -+ * NOTE: These must be masked off by JS_COMMAND_MASK -+ */ ++#ifndef _KBASE_HWCNT_TYPES_H_ ++#define _KBASE_HWCNT_TYPES_H_ + -+/* This command causes a disjoint event */ -+#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100 ++#include ++#include ++#include ++#include ++#include + -+/* Bitmask of all SW related flags */ -+#define JS_COMMAND_SW_BITS (JS_COMMAND_SW_CAUSES_DISJOINT) ++/* Number of bytes in each bitfield */ ++#define KBASE_HWCNT_BITFIELD_BYTES (sizeof(u64)) + -+#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK) -+#error "JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK." \ -+ "Must update JS_COMMAND_SW_<..> bitmasks" -+#endif ++/* Number of bits in each bitfield */ ++#define KBASE_HWCNT_BITFIELD_BITS (KBASE_HWCNT_BITFIELD_BYTES * BITS_PER_BYTE) + -+/* Soft-stop command that causes a Disjoint event. This of course isn't -+ * entirely masked off by JS_COMMAND_MASK ++/* Number of bytes for each counter value. ++ * Use 64-bit per counter in driver to avoid HW 32-bit register values ++ * overflow after a long time accumulation. + */ -+#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \ -+ (JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP) -+ -+#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT -+ -+/* Serialize atoms within a slot (ie only one atom per job slot) */ -+#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0) -+/* Serialize atoms between slots (ie only one job slot running at any time) */ -+#define KBASE_SERIALIZE_INTER_SLOT (1 << 1) -+/* Reset the GPU after each atom completion */ -+#define KBASE_SERIALIZE_RESET (1 << 2) ++#define KBASE_HWCNT_VALUE_BYTES (sizeof(u64)) + -+/** -+ * enum kbase_timeout_selector - The choice of which timeout to get scaled -+ * using the lowest GPU frequency. -+ * @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion -+ * of a MMU operation -+ * @JM_DEFAULT_JS_FREE_TIMEOUT: Maximum timeout to wait for JS_COMMAND_NEXT -+ * to be updated on HW side so a Job Slot is -+ * considered free. -+ * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in -+ * the enum. ++/* Number of bits in an availability mask (i.e. max total number of block ++ * instances supported in a Hardware Counter System) + */ -+enum kbase_timeout_selector { -+ MMU_AS_INACTIVE_WAIT_TIMEOUT, -+ JM_DEFAULT_JS_FREE_TIMEOUT, ++#define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE) + -+ /* Must be the last in the enum */ -+ KBASE_TIMEOUT_SELECTOR_COUNT -+}; ++/* Minimum alignment of each block of hardware counters */ ++#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES) + -+#if IS_ENABLED(CONFIG_DEBUG_FS) +/** -+ * struct base_job_fault_event - keeps track of the atom which faulted or which -+ * completed after the faulty atom but before the -+ * debug data for faulty atom was dumped. ++ * KBASE_HWCNT_ALIGN_UPWARDS() - Calculate next aligned value. ++ * @value: The value to align upwards. ++ * @alignment: The alignment boundary. + * -+ * @event_code: event code for the atom, should != BASE_JD_EVENT_DONE for -+ * the atom which faulted. -+ * @katom: pointer to the atom for which job fault occurred or which -+ * completed after the faulty atom. -+ * @job_fault_work: work item, queued only for the faulty atom, which waits for -+ * the dumping to get completed and then does the bottom half -+ * of job done for the atoms which followed the faulty atom. -+ * @head: List head used to store the atom in the global list of -+ * faulty atoms or context specific list of atoms which got -+ * completed during the dump. -+ * @reg_offset: offset of the register to be dumped next, only applicable -+ * for the faulty atom. ++ * Return: Input value if already aligned to the specified boundary, or next ++ * (incrementing upwards) aligned value. + */ -+struct base_job_fault_event { ++#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \ ++ (value + ((alignment - (value % alignment)) % alignment)) + -+ u32 event_code; -+ struct kbase_jd_atom *katom; -+ struct work_struct job_fault_work; -+ struct list_head head; -+ int reg_offset; ++/** ++ * struct kbase_hwcnt_block_description - Description of one or more identical, ++ * contiguous, Hardware Counter Blocks. ++ * @type: The arbitrary identifier used to identify the type of the block. ++ * @inst_cnt: The number of Instances of the block. ++ * @hdr_cnt: The number of 64-bit Block Headers in the block. ++ * @ctr_cnt: The number of 64-bit Block Counters in the block. ++ */ ++struct kbase_hwcnt_block_description { ++ u64 type; ++ size_t inst_cnt; ++ size_t hdr_cnt; ++ size_t ctr_cnt; +}; -+#endif + +/** -+ * struct kbase_jd_atom_dependency - Contains the dependency info for an atom. -+ * @atom: pointer to the dependee atom. -+ * @dep_type: type of dependency on the dependee @atom, i.e. order or data -+ * dependency. BASE_JD_DEP_TYPE_INVALID indicates no dependency. ++ * struct kbase_hwcnt_group_description - Description of one or more identical, ++ * contiguous Hardware Counter Groups. ++ * @type: The arbitrary identifier used to identify the type of the group. ++ * @blk_cnt: The number of types of Hardware Counter Block in the group. ++ * @blks: Non-NULL pointer to an array of blk_cnt block descriptions, ++ * describing each type of Hardware Counter Block in the group. + */ -+struct kbase_jd_atom_dependency { -+ struct kbase_jd_atom *atom; -+ u8 dep_type; ++struct kbase_hwcnt_group_description { ++ u64 type; ++ size_t blk_cnt; ++ const struct kbase_hwcnt_block_description *blks; +}; + +/** -+ * kbase_jd_katom_dep_atom - Retrieves a read-only reference to the -+ * dependee atom. -+ * @dep: pointer to the dependency info structure. -+ * -+ * Return: readonly reference to dependee atom. ++ * struct kbase_hwcnt_description - Description of a Hardware Counter System. ++ * @grp_cnt: The number of Hardware Counter Groups. ++ * @grps: Non-NULL pointer to an array of grp_cnt group descriptions, ++ * describing each Hardware Counter Group in the system. ++ * @avail_mask: Flat Availability Mask for all block instances in the system. ++ * @clk_cnt: The number of clock domains in the system. The maximum is 64. + */ -+static inline const struct kbase_jd_atom * -+kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) -+{ -+ return (const struct kbase_jd_atom *)(dep->atom); -+} ++struct kbase_hwcnt_description { ++ size_t grp_cnt; ++ const struct kbase_hwcnt_group_description *grps; ++ u64 avail_mask; ++ u8 clk_cnt; ++}; + +/** -+ * kbase_jd_katom_dep_type - Retrieves the dependency type info -+ * -+ * @dep: pointer to the dependency info structure. -+ * -+ * Return: the type of dependency there is on the dependee atom. ++ * struct kbase_hwcnt_block_metadata - Metadata describing the physical layout ++ * of a block in a Hardware Counter System's ++ * Dump Buffers and Enable Maps. ++ * @type: The arbitrary identifier used to identify the type of the ++ * block. ++ * @inst_cnt: The number of Instances of the block. ++ * @hdr_cnt: The number of 64-bit Block Headers in the block. ++ * @ctr_cnt: The number of 64-bit Block Counters in the block. ++ * @enable_map_index: Index in u64s into the parent's Enable Map where the ++ * Enable Map bitfields of the Block Instances described by ++ * this metadata start. ++ * @enable_map_stride: Stride in u64s between the Enable Maps of each of the ++ * Block Instances described by this metadata. ++ * @dump_buf_index: Index in u64s into the parent's Dump Buffer where the ++ * Dump Buffers of the Block Instances described by this ++ * metadata start. ++ * @dump_buf_stride: Stride in u64s between the Dump Buffers of each of the ++ * Block Instances described by this metadata. ++ * @avail_mask_index: Index in bits into the parent's Availability Mask where ++ * the Availability Masks of the Block Instances described ++ * by this metadata start. + */ -+static inline u8 kbase_jd_katom_dep_type( -+ const struct kbase_jd_atom_dependency *dep) -+{ -+ return dep->dep_type; -+} ++struct kbase_hwcnt_block_metadata { ++ u64 type; ++ size_t inst_cnt; ++ size_t hdr_cnt; ++ size_t ctr_cnt; ++ size_t enable_map_index; ++ size_t enable_map_stride; ++ size_t dump_buf_index; ++ size_t dump_buf_stride; ++ size_t avail_mask_index; ++}; + +/** -+ * kbase_jd_katom_dep_set - sets up the dependency info structure -+ * as per the values passed. -+ * @const_dep: pointer to the dependency info structure to be setup. -+ * @a: pointer to the dependee atom. -+ * @type: type of dependency there is on the dependee atom. ++ * struct kbase_hwcnt_group_metadata - Metadata describing the physical layout ++ * of a group of blocks in a Hardware ++ * Counter System's Dump Buffers and Enable ++ * Maps. ++ * @type: The arbitrary identifier used to identify the type of the ++ * group. ++ * @blk_cnt: The number of types of Hardware Counter Block in the ++ * group. ++ * @blk_metadata: Non-NULL pointer to an array of blk_cnt block metadata, ++ * describing the physical layout of each type of Hardware ++ * Counter Block in the group. ++ * @enable_map_index: Index in u64s into the parent's Enable Map where the ++ * Enable Maps of the blocks within the group described by ++ * this metadata start. ++ * @dump_buf_index: Index in u64s into the parent's Dump Buffer where the ++ * Dump Buffers of the blocks within the group described by ++ * metadata start. ++ * @avail_mask_index: Index in bits into the parent's Availability Mask where ++ * the Availability Masks of the blocks within the group ++ * described by this metadata start. + */ -+static inline void kbase_jd_katom_dep_set( -+ const struct kbase_jd_atom_dependency *const_dep, -+ struct kbase_jd_atom *a, u8 type) -+{ -+ struct kbase_jd_atom_dependency *dep; -+ -+ dep = (struct kbase_jd_atom_dependency *)const_dep; -+ -+ dep->atom = a; -+ dep->dep_type = type; -+} ++struct kbase_hwcnt_group_metadata { ++ u64 type; ++ size_t blk_cnt; ++ const struct kbase_hwcnt_block_metadata *blk_metadata; ++ size_t enable_map_index; ++ size_t dump_buf_index; ++ size_t avail_mask_index; ++}; + +/** -+ * kbase_jd_katom_dep_clear - resets the dependency info structure -+ * -+ * @const_dep: pointer to the dependency info structure to be setup. ++ * struct kbase_hwcnt_metadata - Metadata describing the memory layout ++ * of Dump Buffers and Enable Maps within a ++ * Hardware Counter System. ++ * @grp_cnt: The number of Hardware Counter Groups. ++ * @grp_metadata: Non-NULL pointer to an array of grp_cnt group metadata, ++ * describing the physical layout of each Hardware Counter ++ * Group in the system. ++ * @enable_map_bytes: The size in bytes of an Enable Map needed for the system. ++ * @dump_buf_bytes: The size in bytes of a Dump Buffer needed for the system. ++ * @avail_mask: The Availability Mask for the system. ++ * @clk_cnt: The number of clock domains in the system. + */ -+static inline void kbase_jd_katom_dep_clear( -+ const struct kbase_jd_atom_dependency *const_dep) -+{ -+ struct kbase_jd_atom_dependency *dep; -+ -+ dep = (struct kbase_jd_atom_dependency *)const_dep; -+ -+ dep->atom = NULL; -+ dep->dep_type = BASE_JD_DEP_TYPE_INVALID; -+} ++struct kbase_hwcnt_metadata { ++ size_t grp_cnt; ++ const struct kbase_hwcnt_group_metadata *grp_metadata; ++ size_t enable_map_bytes; ++ size_t dump_buf_bytes; ++ u64 avail_mask; ++ u8 clk_cnt; ++}; + +/** -+ * enum kbase_atom_gpu_rb_state - The state of an atom, pertinent after it -+ * becomes runnable, with respect to job slot -+ * ringbuffer/fifo. -+ * @KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: Atom not currently present in slot fifo, -+ * which implies that either atom has not become -+ * runnable due to dependency or has completed -+ * the execution on GPU. -+ * @KBASE_ATOM_GPU_RB_WAITING_BLOCKED: Atom has been added to slot fifo but is -+ * blocked due to cross slot dependency, -+ * can't be submitted to GPU. -+ * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: Atom has been added to slot -+ * fifo but is waiting for the completion of -+ * previously added atoms in current & other -+ * slots, as their protected mode requirements -+ * do not match with the current atom. -+ * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: Atom is in slot fifo -+ * and is waiting for completion of protected -+ * mode transition, needed before the atom is -+ * submitted to GPU. -+ * @KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: Atom is in slot fifo but is -+ * waiting for the cores, which are needed to -+ * execute the job chain represented by the atom, -+ * to become available -+ * @KBASE_ATOM_GPU_RB_READY: Atom is in slot fifo and can be submitted to -+ * GPU. -+ * @KBASE_ATOM_GPU_RB_SUBMITTED: Atom is in slot fifo and has been submitted -+ * to GPU. -+ * @KBASE_ATOM_GPU_RB_RETURN_TO_JS: Atom must be returned to JS due to some -+ * failure, but only after the previously added -+ * atoms in fifo have completed or have also -+ * been returned to JS. ++ * struct kbase_hwcnt_enable_map - Hardware Counter Enable Map. Array of u64 ++ * bitfields. ++ * @metadata: Non-NULL pointer to metadata used to identify, and to describe ++ * the layout of the enable map. ++ * @hwcnt_enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an ++ * array of u64 bitfields, each bit of which enables one hardware ++ * counter. ++ * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle ++ * counter for a given clock domain. + */ -+enum kbase_atom_gpu_rb_state { -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB, -+ KBASE_ATOM_GPU_RB_WAITING_BLOCKED, -+ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV, -+ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION, -+ KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE, -+ KBASE_ATOM_GPU_RB_READY, -+ KBASE_ATOM_GPU_RB_SUBMITTED, -+ KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1 ++struct kbase_hwcnt_enable_map { ++ const struct kbase_hwcnt_metadata *metadata; ++ u64 *hwcnt_enable_map; ++ u64 clk_enable_map; +}; + +/** -+ * enum kbase_atom_enter_protected_state - The state of an atom with respect to -+ * the preparation for GPU's entry into protected mode, -+ * becomes pertinent only after atom's state with respect -+ * to slot ringbuffer is -+ * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION -+ * @KBASE_ATOM_ENTER_PROTECTED_CHECK: Starting state. Check if there are any -+ * atoms currently submitted to GPU and protected mode -+ * transition is not already in progress. -+ * @KBASE_ATOM_ENTER_PROTECTED_HWCNT: Wait for hardware counter context to -+ * become disabled before entry into protected mode. -+ * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in -+ * preparation for the coherency change. L2 shall be -+ * powered down and GPU shall come out of fully -+ * coherent mode before entering protected mode. -+ * @KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: Prepare coherency change; -+ * for BASE_HW_ISSUE_TGOX_R1_1234 also request L2 power on -+ * so that coherency register contains correct value when -+ * GPU enters protected mode. -+ * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; for -+ * BASE_HW_ISSUE_TGOX_R1_1234 check -+ * that L2 is powered up and switch GPU to protected mode. ++ * struct kbase_hwcnt_dump_buffer - Hardware Counter Dump Buffer. ++ * @metadata: Non-NULL pointer to metadata used to identify, and to describe ++ * the layout of the Dump Buffer. ++ * @dump_buf: Non-NULL pointer to an array of u64 values, the array size is ++ * metadata->dump_buf_bytes. ++ * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed ++ * for each clock domain. + */ -+enum kbase_atom_enter_protected_state { -+ /* -+ * NOTE: The integer value of this must match -+ * KBASE_ATOM_EXIT_PROTECTED_CHECK. -+ */ -+ KBASE_ATOM_ENTER_PROTECTED_CHECK = 0, -+ KBASE_ATOM_ENTER_PROTECTED_HWCNT, -+ KBASE_ATOM_ENTER_PROTECTED_IDLE_L2, -+ KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY, -+ KBASE_ATOM_ENTER_PROTECTED_FINISHED, ++struct kbase_hwcnt_dump_buffer { ++ const struct kbase_hwcnt_metadata *metadata; ++ u64 *dump_buf; ++ u64 *clk_cnt_buf; +}; + +/** -+ * enum kbase_atom_exit_protected_state - The state of an atom with respect to -+ * the preparation for GPU's exit from protected mode, -+ * becomes pertinent only after atom's state with respect -+ * to slot ngbuffer is -+ * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION -+ * @KBASE_ATOM_EXIT_PROTECTED_CHECK: Starting state. Check if there are any -+ * atoms currently submitted to GPU and protected mode -+ * transition is not already in progress. -+ * @KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: Wait for the L2 to become idle in -+ * preparation for the reset, as exiting protected mode -+ * requires a reset. -+ * @KBASE_ATOM_EXIT_PROTECTED_RESET: Issue the reset to trigger exit from -+ * protected mode -+ * @KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: End state, Wait for the reset to -+ * complete ++ * struct kbase_hwcnt_dump_buffer_array - Hardware Counter Dump Buffer array. ++ * @page_addr: Address of allocated pages. A single allocation is used for all ++ * Dump Buffers in the array. ++ * @page_order: The allocation order of the pages, the order is on a logarithmic ++ * scale. ++ * @buf_cnt: The number of allocated Dump Buffers. ++ * @bufs: Non-NULL pointer to the array of Dump Buffers. + */ -+enum kbase_atom_exit_protected_state { -+ /* -+ * NOTE: The integer value of this must match -+ * KBASE_ATOM_ENTER_PROTECTED_CHECK. -+ */ -+ KBASE_ATOM_EXIT_PROTECTED_CHECK = 0, -+ KBASE_ATOM_EXIT_PROTECTED_IDLE_L2, -+ KBASE_ATOM_EXIT_PROTECTED_RESET, -+ KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, ++struct kbase_hwcnt_dump_buffer_array { ++ unsigned long page_addr; ++ unsigned int page_order; ++ size_t buf_cnt; ++ struct kbase_hwcnt_dump_buffer *bufs; +}; + +/** -+ * struct kbase_jd_atom - object representing the atom, containing the complete -+ * state and attributes of an atom. -+ * @work: work item for the bottom half processing of the atom, -+ * by JD or JS, after it got executed on GPU or the -+ * input fence got signaled -+ * @start_timestamp: time at which the atom was submitted to the GPU, by -+ * updating the JS_HEAD_NEXTn register. -+ * @udata: copy of the user data sent for the atom in -+ * base_jd_submit. -+ * @kctx: Pointer to the base context with which the atom is -+ * associated. -+ * @dep_head: Array of 2 list heads, pointing to the two list of -+ * atoms -+ * which are blocked due to dependency on this atom. -+ * @dep_item: Array of 2 list heads, used to store the atom in the -+ * list of other atoms depending on the same dependee -+ * atom. -+ * @dep: Array containing the dependency info for the 2 atoms -+ * on which the atom depends upon. -+ * @jd_item: List head used during job dispatch job_done -+ * processing - as dependencies may not be entirely -+ * resolved at this point, -+ * we need to use a separate list head. -+ * @in_jd_list: flag set to true if atom's @jd_item is currently on -+ * a list, prevents atom being processed twice. -+ * @jit_ids: Zero-terminated array of IDs of just-in-time memory -+ * allocations written to by the atom. When the atom -+ * completes, the value stored at the -+ * &struct_base_jit_alloc_info.heap_info_gpu_addr of -+ * each allocation is read in order to enforce an -+ * overall physical memory usage limit. -+ * @nr_extres: number of external resources referenced by the atom. -+ * @extres: Pointer to @nr_extres VA regions containing the external -+ * resource allocation and other information. -+ * @nr_extres external resources referenced by the atom. -+ * @device_nr: indicates the coregroup with which the atom is -+ * associated, when -+ * BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified. -+ * @jc: GPU address of the job-chain. -+ * @softjob_data: Copy of data read from the user space buffer that @jc -+ * points to. -+ * @fence: Stores either an input or output sync fence, -+ * depending on soft-job type -+ * @sync_waiter: Pointer to the sync fence waiter structure passed to -+ * the callback function on signaling of the input -+ * fence. -+ * @dma_fence: object containing pointers to both input & output -+ * fences and other related members used for explicit -+ * sync through soft jobs and for the implicit -+ * synchronization required on access to external -+ * resources. -+ * @dma_fence.fence_in: Points to the dma-buf input fence for this atom. -+ * The atom would complete only after the fence is -+ * signaled. -+ * @dma_fence.fence: Points to the dma-buf output fence for this atom. -+ * @dma_fence.fence_cb: The object that is passed at the time of adding the -+ * callback that gets invoked when @dma_fence.fence_in -+ * is signaled. -+ * @dma_fence.fence_cb_added: Flag to keep a track if the callback was successfully -+ * added for @dma_fence.fence_in, which is supposed to be -+ * invoked on the signaling of fence. -+ * @dma_fence.context: The dma-buf fence context number for this atom. A -+ * unique context number is allocated to each katom in -+ * the context on context creation. -+ * @dma_fence.seqno: The dma-buf fence sequence number for this atom. This -+ * is increased every time this katom uses dma-buf fence -+ * @event_code: Event code for the job chain represented by the atom, -+ * both HW and low-level SW events are represented by -+ * event codes. -+ * @core_req: bitmask of BASE_JD_REQ_* flags specifying either -+ * Hw or Sw requirements for the job chain represented -+ * by the atom. -+ * @ticks: Number of scheduling ticks for which atom has been -+ * running on the GPU. -+ * @sched_priority: Priority of the atom for Job scheduling, as per the -+ * KBASE_JS_ATOM_SCHED_PRIO_*. -+ * @completed: Wait queue to wait upon for the completion of atom. -+ * @status: Indicates at high level at what stage the atom is in, -+ * as per KBASE_JD_ATOM_STATE_*, that whether it is not -+ * in use or its queued in JD or given to JS or -+ * submitted to Hw or it completed the execution on Hw. -+ * @work_id: used for GPU tracepoints, its a snapshot of the -+ * 'work_id' counter in kbase_jd_context which is -+ * incremented on every call to base_jd_submit. -+ * @slot_nr: Job slot chosen for the atom. -+ * @atom_flags: bitmask of KBASE_KATOM_FLAG* flags capturing the -+ * excat low level state of the atom. -+ * @gpu_rb_state: bitmnask of KBASE_ATOM_GPU_RB_* flags, precisely -+ * tracking atom's state after it has entered -+ * Job scheduler on becoming runnable. Atom -+ * could be blocked due to cross slot dependency -+ * or waiting for the shader cores to become available -+ * or waiting for protected mode transitions to -+ * complete. -+ * @need_cache_flush_cores_retained: flag indicating that manual flush of GPU -+ * cache is needed for the atom and the shader cores -+ * used for atom have been kept on. -+ * @blocked: flag indicating that atom's resubmission to GPU is -+ * blocked till the work item is scheduled to return the -+ * atom to JS. -+ * @seq_nr: user-space sequence number, to order atoms in some -+ * temporal order -+ * @pre_dep: Pointer to atom that this atom has same-slot -+ * dependency on -+ * @post_dep: Pointer to atom that has same-slot dependency on -+ * this atom -+ * @x_pre_dep: Pointer to atom that this atom has cross-slot -+ * dependency on -+ * @x_post_dep: Pointer to atom that has cross-slot dependency on -+ * this atom -+ * @flush_id: The GPU's flush count recorded at the time of -+ * submission, -+ * used for the cache flush optimization -+ * @fault_event: Info for dumping the debug data on Job fault. -+ * @queue: List head used for 4 different purposes : -+ * Adds atom to the list of dma-buf fence waiting atoms. -+ * Adds atom to the list of atoms blocked due to cross -+ * slot dependency. -+ * Adds atom to the list of softjob atoms for which JIT -+ * allocation has been deferred -+ * Adds atom to the list of softjob atoms waiting for -+ * the signaling of fence. -+ * @jit_node: Used to keep track of all JIT free/alloc jobs in -+ * submission order -+ * @jit_blocked: Flag indicating that JIT allocation requested through -+ * softjob atom will be reattempted after the impending -+ * free of other active JIT allocations. -+ * @will_fail_event_code: If non-zero, this indicates that the atom will fail -+ * with the set event_code when the atom is processed. -+ * Used for special handling of atoms, which have a data -+ * dependency on the failed atoms. -+ * @protected_state: State of the atom, as per -+ * KBASE_ATOM_(ENTER|EXIT)_PROTECTED_*, -+ * when transitioning into or out of protected mode. -+ * Atom will be either entering or exiting the -+ * protected mode. -+ * @protected_state.enter: entering the protected mode. -+ * @protected_state.exit: exiting the protected mode. -+ * @runnable_tree_node: The node added to context's job slot specific rb tree -+ * when the atom becomes runnable. -+ * @age: Age of atom relative to other atoms in the context, -+ * is snapshot of the age_count counter in kbase -+ * context. -+ * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified. -+ * @renderpass_id:Renderpass identifier used to associate an atom that has -+ * BASE_JD_REQ_START_RENDERPASS set in its core requirements -+ * with an atom that has BASE_JD_REQ_END_RENDERPASS set. -+ * @jc_fragment: Set of GPU fragment job chains ++ * kbase_hwcnt_metadata_create() - Create a hardware counter metadata object ++ * from a description. ++ * @desc: Non-NULL pointer to a hardware counter description. ++ * @metadata: Non-NULL pointer to where created metadata will be stored on ++ * success. ++ * ++ * Return: 0 on success, else error code. + */ -+struct kbase_jd_atom { -+ struct work_struct work; -+ ktime_t start_timestamp; -+ -+ struct base_jd_udata udata; -+ struct kbase_context *kctx; -+ -+ struct list_head dep_head[2]; -+ struct list_head dep_item[2]; -+ const struct kbase_jd_atom_dependency dep[2]; -+ struct list_head jd_item; -+ bool in_jd_list; -+ -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ u8 jit_ids[2]; -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -+ -+ u16 nr_extres; -+ struct kbase_va_region **extres; -+ -+ u32 device_nr; -+ u64 jc; -+ void *softjob_data; -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ struct { -+ /* Use the functions/API defined in mali_kbase_fence.h to -+ * when working with this sub struct -+ */ -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ struct fence *fence_in; -+#else -+ struct dma_fence *fence_in; -+#endif -+#endif -+ /* This points to the dma-buf output fence for this atom. If -+ * this is NULL then there is no fence for this atom and the -+ * following fields related to dma_fence may have invalid data. -+ * -+ * The context and seqno fields contain the details for this -+ * fence. -+ * -+ * This fence is signaled when the katom is completed, -+ * regardless of the event_code of the katom (signal also on -+ * failure). -+ */ -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ struct fence *fence; -+#else -+ struct dma_fence *fence; -+#endif -+ -+ /* This is the callback object that is registered for the fence_in. -+ * The callback is invoked when the fence_in is signaled. -+ */ -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ struct fence_cb fence_cb; -+#else -+ struct dma_fence_cb fence_cb; -+#endif -+ bool fence_cb_added; -+ -+ unsigned int context; -+ atomic_t seqno; -+ } dma_fence; -+#endif /* CONFIG_SYNC_FILE */ -+ -+ /* Note: refer to kbasep_js_atom_retained_state, which will take a copy -+ * of some of the following members -+ */ -+ enum base_jd_event_code event_code; -+ base_jd_core_req core_req; -+ u8 jobslot; -+ u8 renderpass_id; -+ struct base_jd_fragment jc_fragment; -+ -+ u32 ticks; -+ int sched_priority; -+ -+ wait_queue_head_t completed; -+ enum kbase_jd_atom_state status; -+#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) -+ int work_id; -+#endif -+ unsigned int slot_nr; -+ -+ u32 atom_flags; -+ -+ enum kbase_atom_gpu_rb_state gpu_rb_state; -+ -+ bool need_cache_flush_cores_retained; -+ -+ atomic_t blocked; -+ -+ u64 seq_nr; -+ -+ struct kbase_jd_atom *pre_dep; -+ struct kbase_jd_atom *post_dep; -+ -+ struct kbase_jd_atom *x_pre_dep; -+ struct kbase_jd_atom *x_post_dep; -+ -+ u32 flush_id; -+ -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ struct base_job_fault_event fault_event; -+#endif -+ struct list_head queue; -+ -+ struct list_head jit_node; -+ bool jit_blocked; -+ -+ enum base_jd_event_code will_fail_event_code; -+ -+ union { -+ enum kbase_atom_enter_protected_state enter; -+ enum kbase_atom_exit_protected_state exit; -+ } protected_state; -+ -+ struct rb_node runnable_tree_node; ++int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc, ++ const struct kbase_hwcnt_metadata **metadata); + -+ u32 age; -+}; ++/** ++ * kbase_hwcnt_metadata_destroy() - Destroy a hardware counter metadata object. ++ * @metadata: Pointer to hardware counter metadata ++ */ ++void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); + -+static inline bool kbase_jd_katom_is_protected( -+ const struct kbase_jd_atom *katom) ++/** ++ * kbase_hwcnt_metadata_group_count() - Get the number of groups. ++ * @metadata: Non-NULL pointer to metadata. ++ * ++ * Return: Number of hardware counter groups described by metadata. ++ */ ++static inline size_t kbase_hwcnt_metadata_group_count(const struct kbase_hwcnt_metadata *metadata) +{ -+ return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED); ++ if (WARN_ON(!metadata)) ++ return 0; ++ ++ return metadata->grp_cnt; +} + +/** -+ * kbase_jd_atom_is_younger - query if one atom is younger by age than another -+ * -+ * @katom_a: the first atom -+ * @katom_b: the second atom ++ * kbase_hwcnt_metadata_group_type() - Get the arbitrary type of a group. ++ * @metadata: Non-NULL pointer to metadata. ++ * @grp: Index of the group in the metadata. + * -+ * Return: true if the first atom is strictly younger than the second, -+ * false otherwise. ++ * Return: Type of the group grp. + */ -+static inline bool kbase_jd_atom_is_younger(const struct kbase_jd_atom *katom_a, -+ const struct kbase_jd_atom *katom_b) ++static inline u64 kbase_hwcnt_metadata_group_type(const struct kbase_hwcnt_metadata *metadata, ++ size_t grp) +{ -+ return ((s32)(katom_a->age - katom_b->age) < 0); ++ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt)) ++ return 0; ++ ++ return metadata->grp_metadata[grp].type; +} + +/** -+ * kbase_jd_atom_is_earlier - Check whether the first atom has been submitted -+ * earlier than the second one -+ * -+ * @katom_a: the first atom -+ * @katom_b: the second atom ++ * kbase_hwcnt_metadata_block_count() - Get the number of blocks in a group. ++ * @metadata: Non-NULL pointer to metadata. ++ * @grp: Index of the group in the metadata. + * -+ * Return: true if the first atom has been submitted earlier than the -+ * second atom. It is used to understand if an atom that is ready has been -+ * submitted earlier than the currently running atom, so that the currently -+ * running atom should be preempted to allow the ready atom to run. ++ * Return: Number of blocks in group grp. + */ -+static inline bool kbase_jd_atom_is_earlier(const struct kbase_jd_atom *katom_a, -+ const struct kbase_jd_atom *katom_b) ++static inline size_t kbase_hwcnt_metadata_block_count(const struct kbase_hwcnt_metadata *metadata, ++ size_t grp) +{ -+ /* No seq_nr set? */ -+ if (!katom_a->seq_nr || !katom_b->seq_nr) -+ return false; ++ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt)) ++ return 0; + -+ /* Efficiently handle the unlikely case of wrapping. -+ * The following code assumes that the delta between the sequence number -+ * of the two atoms is less than INT64_MAX. -+ * In the extremely unlikely case where the delta is higher, the comparison -+ * defaults for no preemption. -+ * The code also assumes that the conversion from unsigned to signed types -+ * works because the signed integers are 2's complement. -+ */ -+ return (s64)(katom_a->seq_nr - katom_b->seq_nr) < 0; ++ return metadata->grp_metadata[grp].blk_cnt; +} + -+/* -+ * Theory of operations: -+ * -+ * Atom objects are statically allocated within the context structure. ++/** ++ * kbase_hwcnt_metadata_block_type() - Get the arbitrary type of a block. ++ * @metadata: Non-NULL pointer to metadata. ++ * @grp: Index of the group in the metadata. ++ * @blk: Index of the block in the group. + * -+ * Each atom is the head of two lists, one for the "left" set of dependencies, -+ * one for the "right" set. ++ * Return: Type of the block blk in group grp. + */ ++static inline u64 kbase_hwcnt_metadata_block_type(const struct kbase_hwcnt_metadata *metadata, ++ size_t grp, size_t blk) ++{ ++ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || ++ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) ++ return 0; + -+#define KBASE_JD_DEP_QUEUE_SIZE 256 ++ return metadata->grp_metadata[grp].blk_metadata[blk].type; ++} + +/** -+ * enum kbase_jd_renderpass_state - State of a renderpass -+ * @KBASE_JD_RP_COMPLETE: Unused or completed renderpass. Can only transition to -+ * START. -+ * @KBASE_JD_RP_START: Renderpass making a first attempt at tiling. -+ * Can transition to PEND_OOM or COMPLETE. -+ * @KBASE_JD_RP_PEND_OOM: Renderpass whose first attempt at tiling used too much -+ * memory and has a soft-stop pending. Can transition to -+ * OOM or COMPLETE. -+ * @KBASE_JD_RP_OOM: Renderpass whose first attempt at tiling used too much -+ * memory and therefore switched to incremental -+ * rendering. The fragment job chain is forced to run. -+ * Can only transition to RETRY. -+ * @KBASE_JD_RP_RETRY: Renderpass making a second or subsequent attempt at -+ * tiling. Can transition to RETRY_PEND_OOM or COMPLETE. -+ * @KBASE_JD_RP_RETRY_PEND_OOM: Renderpass whose second or subsequent attempt at -+ * tiling used too much memory again and has a -+ * soft-stop pending. Can transition to RETRY_OOM -+ * or COMPLETE. -+ * @KBASE_JD_RP_RETRY_OOM: Renderpass whose second or subsequent attempt at -+ * tiling used too much memory again. The fragment job -+ * chain is forced to run. Can only transition to RETRY. ++ * kbase_hwcnt_metadata_block_instance_count() - Get the number of instances of ++ * a block. ++ * @metadata: Non-NULL pointer to metadata. ++ * @grp: Index of the group in the metadata. ++ * @blk: Index of the block in the group. + * -+ * A state machine is used to control incremental rendering. ++ * Return: Number of instances of block blk in group grp. + */ -+enum kbase_jd_renderpass_state { -+ KBASE_JD_RP_COMPLETE, /* COMPLETE => START */ -+ KBASE_JD_RP_START, /* START => PEND_OOM or COMPLETE */ -+ KBASE_JD_RP_PEND_OOM, /* PEND_OOM => OOM or COMPLETE */ -+ KBASE_JD_RP_OOM, /* OOM => RETRY */ -+ KBASE_JD_RP_RETRY, /* RETRY => RETRY_PEND_OOM or COMPLETE */ -+ KBASE_JD_RP_RETRY_PEND_OOM, /* RETRY_PEND_OOM => RETRY_OOM or COMPLETE */ -+ KBASE_JD_RP_RETRY_OOM /* RETRY_OOM => RETRY */ -+}; ++static inline size_t ++kbase_hwcnt_metadata_block_instance_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, ++ size_t blk) ++{ ++ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || ++ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) ++ return 0; ++ ++ return metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt; ++} + +/** -+ * struct kbase_jd_renderpass - Data for a renderpass -+ * @state: Current state of the renderpass. If KBASE_JD_RP_COMPLETE then -+ * all other members are invalid. -+ * Both the job dispatcher context and hwaccess_lock must be -+ * locked to modify this so that it can be read with either -+ * (or both) locked. -+ * @start_katom: Address of the atom that is the start of a renderpass. -+ * Both the job dispatcher context and hwaccess_lock must be -+ * locked to modify this so that it can be read with either -+ * (or both) locked. -+ * @end_katom: Address of the atom that is the end of a renderpass, or NULL -+ * if that atom hasn't been added to the job scheduler yet. -+ * The job dispatcher context and hwaccess_lock must be -+ * locked to modify this so that it can be read with either -+ * (or both) locked. -+ * @oom_reg_list: A list of region structures which triggered out-of-memory. -+ * The hwaccess_lock must be locked to access this. ++ * kbase_hwcnt_metadata_block_headers_count() - Get the number of counter ++ * headers. ++ * @metadata: Non-NULL pointer to metadata. ++ * @grp: Index of the group in the metadata. ++ * @blk: Index of the block in the group. + * -+ * Atoms tagged with BASE_JD_REQ_START_RENDERPASS or BASE_JD_REQ_END_RENDERPASS -+ * are associated with an object of this type, which is created and maintained -+ * by kbase to keep track of each renderpass. ++ * Return: Number of counter headers in each instance of block blk in group grp. + */ -+struct kbase_jd_renderpass { -+ enum kbase_jd_renderpass_state state; -+ struct kbase_jd_atom *start_katom; -+ struct kbase_jd_atom *end_katom; -+ struct list_head oom_reg_list; -+}; ++static inline size_t ++kbase_hwcnt_metadata_block_headers_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, ++ size_t blk) ++{ ++ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || ++ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) ++ return 0; ++ ++ return metadata->grp_metadata[grp].blk_metadata[blk].hdr_cnt; ++} + +/** -+ * struct kbase_jd_context - per context object encapsulating all the -+ * Job dispatcher related state. -+ * @lock: lock to serialize the updates made to the -+ * Job dispatcher state and kbase_jd_atom objects. -+ * @sched_info: Structure encapsulating all the Job scheduling -+ * info. -+ * @atoms: Array of the objects representing atoms, -+ * containing the complete state and attributes -+ * of an atom. -+ * @renderpasses: Array of renderpass state for incremental -+ * rendering, indexed by user-specified renderpass -+ * ID. -+ * @job_nr: Tracks the number of atoms being processed by the -+ * kbase. This includes atoms that are not tracked by -+ * scheduler: 'not ready to run' & 'dependency-only' -+ * jobs. -+ * @zero_jobs_wait: Waitq that reflects whether there are no jobs -+ * (including SW-only dependency jobs). This is set -+ * when no jobs are present on the ctx, and clear -+ * when there are jobs. -+ * This must be updated atomically with @job_nr. -+ * note: Job Dispatcher knows about more jobs than -+ * the Job Scheduler as it is unaware of jobs that -+ * are blocked on dependencies and SW-only dependency -+ * jobs. This waitq can be waited upon to find out -+ * when the context jobs are all done/cancelled -+ * (including those that might've been blocked -+ * on dependencies) - and so, whether it can be -+ * terminated. However, it should only be terminated -+ * once it is not present in the run-pool. -+ * Since the waitq is only set under @lock, -+ * the waiter should also briefly obtain and drop -+ * @lock to guarantee that the setter has completed -+ * its work on the kbase_context -+ * @job_done_wq: Workqueue to which the per atom work item is -+ * queued for bottom half processing when the -+ * atom completes -+ * execution on GPU or the input fence get signaled. -+ * @tb_lock: Lock to serialize the write access made to @tb to -+ * store the register access trace messages. -+ * @tb: Pointer to the Userspace accessible buffer storing -+ * the trace messages for register read/write -+ * accesses made by the Kbase. The buffer is filled -+ * in circular fashion. -+ * @tb_wrap_offset: Offset to the end location in the trace buffer, -+ * the write pointer is moved to the beginning on -+ * reaching this offset. -+ * @work_id: atomic variable used for GPU tracepoints, -+ * incremented on every call to base_jd_submit. -+ * @jit_atoms_head: A list of the just-in-time memory soft-jobs, both -+ * allocate & free, in submission order, protected -+ * by kbase_jd_context.lock. -+ * @jit_pending_alloc: A list of just-in-time memory allocation -+ * soft-jobs which will be reattempted after the -+ * impending free of other active allocations. -+ * @max_priority: Max priority level allowed for this context. ++ * kbase_hwcnt_metadata_block_counters_count() - Get the number of counters. ++ * @metadata: Non-NULL pointer to metadata. ++ * @grp: Index of the group in the metadata. ++ * @blk: Index of the block in the group. ++ * ++ * Return: Number of counters in each instance of block blk in group grp. + */ -+struct kbase_jd_context { -+ struct mutex lock; -+ struct kbasep_js_kctx_info sched_info; -+ struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT]; -+ struct kbase_jd_renderpass renderpasses[BASE_JD_RP_COUNT]; -+ struct workqueue_struct *job_done_wq; -+ -+ wait_queue_head_t zero_jobs_wait; -+ spinlock_t tb_lock; -+ u32 *tb; -+ u32 job_nr; -+ size_t tb_wrap_offset; -+ -+#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) -+ atomic_t work_id; -+#endif ++static inline size_t ++kbase_hwcnt_metadata_block_counters_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, ++ size_t blk) ++{ ++ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || ++ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) ++ return 0; + -+ struct list_head jit_atoms_head; -+ struct list_head jit_pending_alloc; -+ int max_priority; -+}; ++ return metadata->grp_metadata[grp].blk_metadata[blk].ctr_cnt; ++} + +/** -+ * struct jsctx_queue - JS context atom queue -+ * @runnable_tree: Root of RB-tree containing currently runnable atoms on this -+ * job slot. -+ * @x_dep_head: Head item of the linked list of atoms blocked on cross-slot -+ * dependencies. Atoms on this list will be moved to the -+ * runnable_tree when the blocking atom completes. ++ * kbase_hwcnt_metadata_block_enable_map_stride() - Get the enable map stride. ++ * @metadata: Non-NULL pointer to metadata. ++ * @grp: Index of the group in the metadata. ++ * @blk: Index of the block in the group. + * -+ * hwaccess_lock must be held when accessing this structure. ++ * Return: enable map stride in each instance of block blk in group grp. + */ -+struct jsctx_queue { -+ struct rb_root runnable_tree; -+ struct list_head x_dep_head; -+}; ++static inline size_t ++kbase_hwcnt_metadata_block_enable_map_stride(const struct kbase_hwcnt_metadata *metadata, ++ size_t grp, size_t blk) ++{ ++ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || ++ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) ++ return 0; + -+/** -+ * struct kbase_as - Object representing an address space of GPU. -+ * @number: Index at which this address space structure is present -+ * in an array of address space structures embedded inside -+ * the &struct kbase_device. -+ * @pf_wq: Workqueue for processing work items related to -+ * Page fault and Bus fault handling. -+ * @work_pagefault: Work item for the Page fault handling. -+ * @work_busfault: Work item for the Bus fault handling. -+ * @pf_data: Data relating to Page fault. -+ * @bf_data: Data relating to Bus fault. -+ * @current_setup: Stores the MMU configuration for this address space. -+ * @is_unresponsive: Flag to indicate MMU is not responding. -+ * Set if a MMU command isn't completed within -+ * &kbase_device:mmu_as_inactive_wait_time_ms. -+ * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes. -+ */ -+struct kbase_as { -+ int number; -+ struct workqueue_struct *pf_wq; -+ struct work_struct work_pagefault; -+ struct work_struct work_busfault; -+ struct kbase_fault pf_data; -+ struct kbase_fault bf_data; -+ struct kbase_mmu_setup current_setup; -+ bool is_unresponsive; -+}; ++ return metadata->grp_metadata[grp].blk_metadata[blk].enable_map_stride; ++} + -+#endif /* _KBASE_JM_DEFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h -new file mode 100644 -index 000000000..53819caaf ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h -@@ -0,0 +1,1044 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++/** ++ * kbase_hwcnt_metadata_block_values_count() - Get the number of values. ++ * @metadata: Non-NULL pointer to metadata. ++ * @grp: Index of the group in the metadata. ++ * @blk: Index of the block in the group. + * ++ * Return: Number of headers plus counters in each instance of block blk ++ * in group grp. + */ ++static inline size_t ++kbase_hwcnt_metadata_block_values_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, ++ size_t blk) ++{ ++ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || ++ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) ++ return 0; + -+/* -+ * Job Scheduler Interface. -+ * These interfaces are Internal to KBase. -+ */ -+ -+#ifndef _KBASE_JM_JS_H_ -+#define _KBASE_JM_JS_H_ -+ -+#include "mali_kbase_js_ctx_attr.h" -+ -+#define JS_MAX_RUNNING_JOBS 8 ++ return kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) + ++ kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); ++} + +/** -+ * kbasep_js_devdata_init - Initialize the Job Scheduler -+ * @kbdev: The kbase_device to operate on -+ * -+ * The struct kbasep_js_device_data sub-structure of kbdev must be zero -+ * initialized before passing to the kbasep_js_devdata_init() function. This is -+ * to give efficient error path code. ++ * kbase_hwcnt_metadata_for_each_block() - Iterate over each block instance in ++ * the metadata. ++ * @md: Non-NULL pointer to metadata. ++ * @grp: size_t variable used as group iterator. ++ * @blk: size_t variable used as block iterator. ++ * @blk_inst: size_t variable used as block instance iterator. + * -+ * Return: 0 on success, error code otherwise. ++ * Iteration order is group, then block, then block instance (i.e. linearly ++ * through memory). + */ -+int kbasep_js_devdata_init(struct kbase_device * const kbdev); ++#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst) \ ++ for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++) \ ++ for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++) \ ++ for ((blk_inst) = 0; \ ++ (blk_inst) < \ ++ kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk)); \ ++ (blk_inst)++) + +/** -+ * kbasep_js_devdata_halt - Halt the Job Scheduler. -+ * @kbdev: The kbase_device to operate on -+ * -+ * It is safe to call this on kbdev even if it the kbasep_js_device_data -+ * sub-structure was never initialized/failed initialization, to give efficient -+ * error-path code. -+ * -+ * For this to work, the struct kbasep_js_device_data sub-structure of kbdev -+ * must be zero initialized before passing to the kbasep_js_devdata_init() -+ * function. This is to give efficient error path code. -+ * -+ * It is a programming error to call this whilst there are still kbase_context -+ * structures registered with this scheduler. ++ * kbase_hwcnt_metadata_block_avail_bit() - Get the bit index into the avail ++ * mask corresponding to the block. ++ * @metadata: Non-NULL pointer to metadata. ++ * @grp: Index of the group in the metadata. ++ * @blk: Index of the block in the group. + * ++ * Return: The bit index into the avail mask for the block. + */ -+void kbasep_js_devdata_halt(struct kbase_device *kbdev); ++static inline size_t ++kbase_hwcnt_metadata_block_avail_bit(const struct kbase_hwcnt_metadata *metadata, size_t grp, ++ size_t blk) ++{ ++ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || ++ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) ++ return 0; ++ ++ return metadata->grp_metadata[grp].avail_mask_index + ++ metadata->grp_metadata[grp].blk_metadata[blk].avail_mask_index; ++} + +/** -+ * kbasep_js_devdata_term - Terminate the Job Scheduler -+ * @kbdev: The kbase_device to operate on -+ * -+ * It is safe to call this on kbdev even if it the kbasep_js_device_data -+ * sub-structure was never initialized/failed initialization, to give efficient -+ * error-path code. -+ * -+ * For this to work, the struct kbasep_js_device_data sub-structure of kbdev -+ * must be zero initialized before passing to the kbasep_js_devdata_init() -+ * function. This is to give efficient error path code. ++ * kbase_hwcnt_metadata_block_instance_avail() - Check if a block instance is ++ * available. ++ * @metadata: Non-NULL pointer to metadata. ++ * @grp: Index of the group in the metadata. ++ * @blk: Index of the block in the group. ++ * @blk_inst: Index of the block instance in the block. + * -+ * It is a programming error to call this whilst there are still kbase_context -+ * structures registered with this scheduler. ++ * Return: true if the block instance is available, else false. + */ -+void kbasep_js_devdata_term(struct kbase_device *kbdev); ++static inline bool ++kbase_hwcnt_metadata_block_instance_avail(const struct kbase_hwcnt_metadata *metadata, size_t grp, ++ size_t blk, size_t blk_inst) ++{ ++ size_t bit; ++ u64 mask; ++ ++ if (WARN_ON(!metadata)) ++ return false; ++ ++ bit = kbase_hwcnt_metadata_block_avail_bit(metadata, grp, blk) + blk_inst; ++ mask = 1ull << bit; ++ ++ return (metadata->avail_mask & mask) != 0; ++} + +/** -+ * kbasep_js_kctx_init - Initialize the Scheduling Component of a -+ * struct kbase_context on the Job Scheduler. -+ * @kctx: The kbase_context to operate on -+ * -+ * This effectively registers a struct kbase_context with a Job Scheduler. -+ * -+ * It does not register any jobs owned by the struct kbase_context with -+ * the scheduler. Those must be separately registered by kbasep_js_add_job(). -+ * -+ * The struct kbase_context must be zero initialized before passing to the -+ * kbase_js_init() function. This is to give efficient error path code. ++ * kbase_hwcnt_enable_map_alloc() - Allocate an enable map. ++ * @metadata: Non-NULL pointer to metadata describing the system. ++ * @enable_map: Non-NULL pointer to enable map to be initialised. Will be ++ * initialised to all zeroes (i.e. all counters disabled). + * -+ * Return: 0 on success, error code otherwise. ++ * Return: 0 on success, else error code. + */ -+int kbasep_js_kctx_init(struct kbase_context *const kctx); ++int kbase_hwcnt_enable_map_alloc(const struct kbase_hwcnt_metadata *metadata, ++ struct kbase_hwcnt_enable_map *enable_map); + +/** -+ * kbasep_js_kctx_term - Terminate the Scheduling Component of a -+ * struct kbase_context on the Job Scheduler -+ * @kctx: The kbase_context to operate on -+ * -+ * This effectively de-registers a struct kbase_context from its Job Scheduler -+ * -+ * It is safe to call this on a struct kbase_context that has never had or -+ * failed initialization of its jctx.sched_info member, to give efficient -+ * error-path code. -+ * -+ * For this to work, the struct kbase_context must be zero intitialized before -+ * passing to the kbase_js_init() function. ++ * kbase_hwcnt_enable_map_free() - Free an enable map. ++ * @enable_map: Enable map to be freed. + * -+ * It is a Programming Error to call this whilst there are still jobs -+ * registered with this context. ++ * Can be safely called on an all-zeroed enable map structure, or on an already ++ * freed enable map. + */ -+void kbasep_js_kctx_term(struct kbase_context *kctx); ++void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map); + -+/* kbase_jsctx_slot_prio_blocked_set - Set a context as being blocked for a job -+ * slot at and below a given priority level -+ * @kctx: The kbase_context -+ * @js: The job slot -+ * @sched_prio: The priority levels that the context is blocked at for @js (all -+ * priority levels at this level and below will be blocked) -+ * -+ * To preserve ordering and dependencies of atoms on soft-stopping (both within -+ * an between priority levels), a context must be marked as blocked for that -+ * atom's job slot, for all priority levels at or below the atom's priority. -+ * -+ * This must only be called due to an atom that was pulled from the context, -+ * otherwise there will be no way of unblocking the context when the atom is -+ * completed/unpulled. ++/** ++ * kbase_hwcnt_enable_map_block_instance() - Get the pointer to a block ++ * instance's enable map. ++ * @map: Non-NULL pointer to enable map. ++ * @grp: Index of the group in the metadata. ++ * @blk: Index of the block in the group. ++ * @blk_inst: Index of the block instance in the block. + * -+ * Atoms of higher priority might still be able to be pulled from the context -+ * on @js. This helps with starting a high priority atom as soon as possible. ++ * Return: u64* to the bitfield(s) used as the enable map for the ++ * block instance. + */ -+static inline void kbase_jsctx_slot_prio_blocked_set(struct kbase_context *kctx, unsigned int js, -+ int sched_prio) ++static inline u64 *kbase_hwcnt_enable_map_block_instance(const struct kbase_hwcnt_enable_map *map, ++ size_t grp, size_t blk, size_t blk_inst) +{ -+ struct kbase_jsctx_slot_tracking *slot_tracking = -+ &kctx->slot_tracking[js]; ++ if (WARN_ON(!map) || WARN_ON(!map->hwcnt_enable_map)) ++ return NULL; + -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ WARN(!slot_tracking->atoms_pulled_pri[sched_prio], -+ "When marking slot %u as blocked for priority %d on a kctx, no atoms were pulled - the slot cannot become unblocked", -+ js, sched_prio); ++ if (WARN_ON(!map->metadata) || WARN_ON(grp >= map->metadata->grp_cnt) || ++ WARN_ON(blk >= map->metadata->grp_metadata[grp].blk_cnt) || ++ WARN_ON(blk_inst >= map->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt)) ++ return map->hwcnt_enable_map; + -+ slot_tracking->blocked |= ((kbase_js_prio_bitmap_t)1) << sched_prio; -+ KBASE_KTRACE_ADD_JM_SLOT_INFO(kctx->kbdev, JS_SLOT_PRIO_BLOCKED, kctx, -+ NULL, 0, js, (unsigned int)sched_prio); ++ return map->hwcnt_enable_map + map->metadata->grp_metadata[grp].enable_map_index + ++ map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_index + ++ (map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_stride * blk_inst); +} + -+/* kbase_jsctx_atoms_pulled - Return number of atoms pulled on a context -+ * @kctx: The kbase_context -+ * -+ * Having atoms pulled indicates the context is not idle. ++/** ++ * kbase_hwcnt_bitfield_count() - Calculate the number of u64 bitfields required ++ * to have at minimum one bit per value. ++ * @val_cnt: Number of values. + * -+ * Return: the number of atoms pulled on @kctx ++ * Return: Number of required bitfields. + */ -+static inline int kbase_jsctx_atoms_pulled(struct kbase_context *kctx) ++static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt) +{ -+ return atomic_read(&kctx->atoms_pulled_all_slots); ++ return (val_cnt + KBASE_HWCNT_BITFIELD_BITS - 1) / KBASE_HWCNT_BITFIELD_BITS; +} + +/** -+ * kbasep_js_add_job - Add a job chain to the Job Scheduler, -+ * and take necessary actions to -+ * schedule the context/run the job. -+ * @kctx: The kbase_context to operate on -+ * @atom: Atom to add -+ * -+ * This atomically does the following: -+ * * Update the numbers of jobs information -+ * * Add the job to the run pool if necessary (part of init_job) -+ * -+ * Once this is done, then an appropriate action is taken: -+ * * If the ctx is scheduled, it attempts to start the next job (which might be -+ * this added job) -+ * * Otherwise, and if this is the first job on the context, it enqueues it on -+ * the Policy Queue -+ * -+ * The Policy's Queue can be updated by this in the following ways: -+ * * In the above case that this is the first job on the context -+ * * If the context is high priority and the context is not scheduled, then it -+ * could cause the Policy to schedule out a low-priority context, allowing -+ * this context to be scheduled in. -+ * -+ * If the context is already scheduled on the RunPool, then adding a job to it -+ * is guaranteed not to update the Policy Queue. And so, the caller is -+ * guaranteed to not need to try scheduling a context from the Run Pool - it -+ * can safely assert that the result is false. -+ * -+ * It is a programming error to have more than U32_MAX jobs in flight at a time. -+ * -+ * The following locking conditions are made on the caller: -+ * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex. -+ * * it must not hold hwaccess_lock (as this will be obtained internally) -+ * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be -+ * obtained internally) -+ * * it must not hold kbasep_jd_device_data::queue_mutex (again, it's used -+ * internally). -+ * -+ * Return: true indicates that the Policy Queue was updated, and so the -+ * caller will need to try scheduling a context onto the Run Pool, -+ * false indicates that no updates were made to the Policy Queue, -+ * so no further action is required from the caller. This is always returned -+ * when the context is currently scheduled. ++ * kbase_hwcnt_enable_map_block_disable_all() - Disable all values in a block. ++ * @dst: Non-NULL pointer to enable map. ++ * @grp: Index of the group in the metadata. ++ * @blk: Index of the block in the group. ++ * @blk_inst: Index of the block instance in the block. + */ -+bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom); ++static inline void kbase_hwcnt_enable_map_block_disable_all(struct kbase_hwcnt_enable_map *dst, ++ size_t grp, size_t blk, size_t blk_inst) ++{ ++ size_t val_cnt; ++ size_t bitfld_cnt; ++ u64 *const block_enable_map = ++ kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst); ++ ++ if (WARN_ON(!dst)) ++ return; ++ ++ val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, grp, blk); ++ bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); ++ ++ memset(block_enable_map, 0, bitfld_cnt * KBASE_HWCNT_BITFIELD_BYTES); ++} + +/** -+ * kbasep_js_remove_job - Remove a job chain from the Job Scheduler, -+ * except for its 'retained state'. -+ * @kbdev: The kbase_device to operate on -+ * @kctx: The kbase_context to operate on -+ * @atom: Atom to remove -+ * -+ * Completely removing a job requires several calls: -+ * * kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of -+ * the atom -+ * * kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler -+ * * kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the -+ * remaining state held as part of the job having been run. -+ * -+ * In the common case of atoms completing normally, this set of actions is more -+ * optimal for spinlock purposes than having kbasep_js_remove_job() handle all -+ * of the actions. -+ * -+ * In the case of canceling atoms, it is easier to call -+ * kbasep_js_remove_cancelled_job(), which handles all the necessary actions. -+ * -+ * It is a programming error to call this when: -+ * * a atom is not a job belonging to kctx. -+ * * a atom has already been removed from the Job Scheduler. -+ * * a atom is still in the runpool -+ * -+ * Do not use this for removing jobs being killed by kbase_jd_cancel() - use -+ * kbasep_js_remove_cancelled_job() instead. -+ * -+ * The following locking conditions are made on the caller: -+ * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. -+ * ++ * kbase_hwcnt_enable_map_disable_all() - Disable all values in the enable map. ++ * @dst: Non-NULL pointer to enable map to zero. + */ -+void kbasep_js_remove_job(struct kbase_device *kbdev, -+ struct kbase_context *kctx, struct kbase_jd_atom *atom); ++static inline void kbase_hwcnt_enable_map_disable_all(struct kbase_hwcnt_enable_map *dst) ++{ ++ if (WARN_ON(!dst) || WARN_ON(!dst->metadata)) ++ return; ++ ++ if (dst->hwcnt_enable_map != NULL) ++ memset(dst->hwcnt_enable_map, 0, dst->metadata->enable_map_bytes); ++ ++ dst->clk_enable_map = 0; ++} + +/** -+ * kbasep_js_remove_cancelled_job - Completely remove a job chain from the -+ * Job Scheduler, in the case -+ * where the job chain was cancelled. -+ * @kbdev: The kbase_device to operate on -+ * @kctx: The kbase_context to operate on -+ * @katom: Atom to remove -+ * -+ * This is a variant of kbasep_js_remove_job() that takes care of removing all -+ * of the retained state too. This is generally useful for cancelled atoms, -+ * which need not be handled in an optimal way. -+ * -+ * It is a programming error to call this when: -+ * * a atom is not a job belonging to kctx. -+ * * a atom has already been removed from the Job Scheduler. -+ * * a atom is still in the runpool: -+ * * it is not being killed with kbasep_jd_cancel() -+ * -+ * The following locking conditions are made on the caller: -+ * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. -+ * * it must not hold the hwaccess_lock, (as this will be obtained -+ * internally) -+ * * it must not hold kbasep_js_device_data::runpool_mutex (as this could be -+ * obtained internally) -+ * -+ * Return: true indicates that ctx attributes have changed and the caller -+ * should call kbase_js_sched_all() to try to run more jobs and -+ * false otherwise. ++ * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in a block. ++ * @dst: Non-NULL pointer to enable map. ++ * @grp: Index of the group in the metadata. ++ * @blk: Index of the block in the group. ++ * @blk_inst: Index of the block instance in the block. + */ -+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ struct kbase_jd_atom *katom); ++static inline void kbase_hwcnt_enable_map_block_enable_all(struct kbase_hwcnt_enable_map *dst, ++ size_t grp, size_t blk, size_t blk_inst) ++{ ++ size_t val_cnt; ++ size_t bitfld_cnt; ++ u64 *const block_enable_map = ++ kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst); ++ size_t bitfld_idx; ++ ++ if (WARN_ON(!dst)) ++ return; ++ ++ val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, grp, blk); ++ bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); ++ ++ for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { ++ const u64 remaining_values = val_cnt - (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); ++ u64 block_enable_map_mask = U64_MAX; ++ ++ if (remaining_values < KBASE_HWCNT_BITFIELD_BITS) ++ block_enable_map_mask = (1ull << remaining_values) - 1; ++ ++ block_enable_map[bitfld_idx] = block_enable_map_mask; ++ } ++} + +/** -+ * kbasep_js_runpool_requeue_or_kill_ctx - Handling the requeuing/killing of a -+ * context that was evicted from the -+ * policy queue or runpool. -+ * @kbdev: The kbase_device to operate on -+ * @kctx: The kbase_context to operate on -+ * @has_pm_ref: tells whether to release Power Manager active reference -+ * -+ * This should be used whenever handing off a context that has been evicted -+ * from the policy queue or the runpool: -+ * * If the context is not dying and has jobs, it gets re-added to the policy -+ * queue -+ * * Otherwise, it is not added -+ * -+ * In addition, if the context is dying the jobs are killed asynchronously. -+ * -+ * In all cases, the Power Manager active reference is released -+ * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true. -+ * has_pm_ref must be set to false whenever the context was not previously in -+ * the runpool and does not hold a Power Manager active refcount. Note that -+ * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an -+ * active refcount even though they weren't in the runpool. -+ * -+ * The following locking conditions are made on the caller: -+ * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. -+ * * it must not hold kbasep_jd_device_data::queue_mutex (as this will be -+ * obtained internally) ++ * kbase_hwcnt_enable_map_enable_all() - Enable all values in an enable ++ * map. ++ * @dst: Non-NULL pointer to enable map. + */ -+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx, bool has_pm_ref); ++static inline void kbase_hwcnt_enable_map_enable_all(struct kbase_hwcnt_enable_map *dst) ++{ ++ size_t grp, blk, blk_inst; ++ ++ if (WARN_ON(!dst) || WARN_ON(!dst->metadata)) ++ return; ++ ++ kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst) ++ kbase_hwcnt_enable_map_block_enable_all(dst, grp, blk, blk_inst); ++ ++ dst->clk_enable_map = (1ull << dst->metadata->clk_cnt) - 1; ++} + +/** -+ * kbasep_js_runpool_release_ctx - Release a refcount of a context being busy, -+ * allowing it to be scheduled out. -+ * @kbdev: The kbase_device to operate on -+ * @kctx: The kbase_context to operate on -+ * -+ * When the refcount reaches zero and the context might be scheduled out -+ * (depending on whether the Scheduling Policy has deemed it so, or if it has -+ * run out of jobs). -+ * -+ * If the context does get scheduled out, then The following actions will be -+ * taken as part of deschduling a context: -+ * For the context being descheduled: -+ * * If the context is in the processing of dying (all the jobs are being -+ * removed from it), then descheduling also kills off any jobs remaining in the -+ * context. -+ * * If the context is not dying, and any jobs remain after descheduling the -+ * context then it is re-enqueued to the Policy's Queue. -+ * * Otherwise, the context is still known to the scheduler, but remains absent -+ * from the Policy Queue until a job is next added to it. -+ * * In all descheduling cases, the Power Manager active reference (obtained -+ * during kbasep_js_try_schedule_head_ctx()) is released -+ * (kbase_pm_context_idle()). -+ * -+ * Whilst the context is being descheduled, this also handles actions that -+ * cause more atoms to be run: -+ * * Attempt submitting atoms when the Context Attributes on the Runpool have -+ * changed. This is because the context being scheduled out could mean that -+ * there are more opportunities to run atoms. -+ * * Attempt submitting to a slot that was previously blocked due to affinity -+ * restrictions. This is usually only necessary when releasing a context -+ * happens as part of completing a previous job, but is harmless nonetheless. -+ * * Attempt scheduling in a new context (if one is available), and if -+ * necessary, running a job from that new context. -+ * -+ * Unlike retaining a context in the runpool, this function cannot be called -+ * from IRQ context. -+ * -+ * It is a programming error to call this on a kctx that is not currently -+ * scheduled, or that already has a zero refcount. -+ * -+ * The following locking conditions are made on the caller: -+ * * it must not hold the hwaccess_lock, because it will be used internally. -+ * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex. -+ * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be -+ * obtained internally) -+ * * it must not hold the kbase_device::mmu_hw_mutex (as this will be -+ * obtained internally) -+ * * it must not hold kbasep_jd_device_data::queue_mutex (as this will be -+ * obtained internally) ++ * kbase_hwcnt_enable_map_copy() - Copy an enable map to another. ++ * @dst: Non-NULL pointer to destination enable map. ++ * @src: Non-NULL pointer to source enable map. + * ++ * The dst and src MUST have been created from the same metadata. + */ -+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx); ++static inline void kbase_hwcnt_enable_map_copy(struct kbase_hwcnt_enable_map *dst, ++ const struct kbase_hwcnt_enable_map *src) ++{ ++ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst->metadata) || ++ WARN_ON(dst->metadata != src->metadata)) ++ return; ++ ++ if (dst->hwcnt_enable_map != NULL) { ++ if (WARN_ON(!src->hwcnt_enable_map)) ++ return; ++ ++ memcpy(dst->hwcnt_enable_map, src->hwcnt_enable_map, ++ dst->metadata->enable_map_bytes); ++ } ++ ++ dst->clk_enable_map = src->clk_enable_map; ++} + +/** -+ * kbasep_js_runpool_release_ctx_and_katom_retained_state - Variant of -+ * kbasep_js_runpool_release_ctx() that handles additional -+ * actions from completing an atom. -+ * -+ * @kbdev: KBase device -+ * @kctx: KBase context -+ * @katom_retained_state: Retained state from the atom -+ * -+ * This is usually called as part of completing an atom and releasing the -+ * refcount on the context held by the atom. -+ * -+ * Therefore, the extra actions carried out are part of handling actions queued -+ * on a completed atom, namely: -+ * * Releasing the atom's context attributes -+ * * Retrying the submission on a particular slot, because we couldn't submit -+ * on that slot from an IRQ handler. ++ * kbase_hwcnt_enable_map_union() - Union dst and src enable maps into dst. ++ * @dst: Non-NULL pointer to destination enable map. ++ * @src: Non-NULL pointer to source enable map. + * -+ * The locking conditions of this function are the same as those for -+ * kbasep_js_runpool_release_ctx() ++ * The dst and src MUST have been created from the same metadata. + */ -+void kbasep_js_runpool_release_ctx_and_katom_retained_state( -+ struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ struct kbasep_js_atom_retained_state *katom_retained_state); ++static inline void kbase_hwcnt_enable_map_union(struct kbase_hwcnt_enable_map *dst, ++ const struct kbase_hwcnt_enable_map *src) ++{ ++ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst->metadata) || ++ WARN_ON(dst->metadata != src->metadata)) ++ return; ++ ++ if (dst->hwcnt_enable_map != NULL) { ++ size_t i; ++ size_t const bitfld_count = ++ dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES; ++ ++ if (WARN_ON(!src->hwcnt_enable_map)) ++ return; ++ ++ for (i = 0; i < bitfld_count; i++) ++ dst->hwcnt_enable_map[i] |= src->hwcnt_enable_map[i]; ++ } ++ ++ dst->clk_enable_map |= src->clk_enable_map; ++} + +/** -+ * kbasep_js_runpool_release_ctx_nolock - Variant of kbase_js_runpool_release_ctx() -+ * without locks -+ * @kbdev: KBase device -+ * @kctx: KBase context ++ * kbase_hwcnt_enable_map_block_enabled() - Check if any values in a block ++ * instance are enabled. ++ * @enable_map: Non-NULL pointer to enable map. ++ * @grp: Index of the group in the metadata. ++ * @blk: Index of the block in the group. ++ * @blk_inst: Index of the block instance in the block. + * -+ * Variant of kbase_js_runpool_release_ctx() that assumes that -+ * kbasep_js_device_data::runpool_mutex and -+ * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not -+ * attempt to schedule new contexts. ++ * Return: true if any values in the block are enabled, else false. + */ -+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, -+ struct kbase_context *kctx); ++static inline bool ++kbase_hwcnt_enable_map_block_enabled(const struct kbase_hwcnt_enable_map *enable_map, size_t grp, ++ size_t blk, size_t blk_inst) ++{ ++ bool any_enabled = false; ++ size_t val_cnt; ++ size_t bitfld_cnt; ++ const u64 *const block_enable_map = ++ kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst); ++ size_t bitfld_idx; ++ ++ if (WARN_ON(!enable_map)) ++ return false; ++ ++ val_cnt = kbase_hwcnt_metadata_block_values_count(enable_map->metadata, grp, blk); ++ bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); ++ ++ for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { ++ const u64 remaining_values = val_cnt - (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); ++ u64 block_enable_map_mask = U64_MAX; ++ ++ if (remaining_values < KBASE_HWCNT_BITFIELD_BITS) ++ block_enable_map_mask = (1ull << remaining_values) - 1; ++ ++ any_enabled = any_enabled || (block_enable_map[bitfld_idx] & block_enable_map_mask); ++ } ++ ++ return any_enabled; ++} + +/** -+ * kbasep_js_schedule_privileged_ctx - Schedule in a privileged context -+ * -+ * @kbdev: KBase device -+ * @kctx: KBase context -+ * -+ * This schedules a context in regardless of the context priority. -+ * If the runpool is full, a context will be forced out of the runpool and the -+ * function will wait for the new context to be scheduled in. -+ * The context will be kept scheduled in (and the corresponding address space -+ * reserved) until kbasep_js_release_privileged_ctx is called). -+ * -+ * The following locking conditions are made on the caller: -+ * * it must not hold the hwaccess_lock, because it will be used internally. -+ * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be -+ * obtained internally) -+ * * it must not hold the kbase_device::mmu_hw_mutex (as this will be -+ * obtained internally) -+ * * it must not hold kbasep_jd_device_data::queue_mutex (again, it's used -+ * internally). -+ * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will -+ * be used internally. ++ * kbase_hwcnt_enable_map_any_enabled() - Check if any values are enabled. ++ * @enable_map: Non-NULL pointer to enable map. + * ++ * Return: true if any values are enabled, else false. + */ -+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx); ++static inline bool ++kbase_hwcnt_enable_map_any_enabled(const struct kbase_hwcnt_enable_map *enable_map) ++{ ++ size_t grp, blk, blk_inst; ++ u64 clk_enable_map_mask; ++ ++ if (WARN_ON(!enable_map) || WARN_ON(!enable_map->metadata)) ++ return false; ++ ++ clk_enable_map_mask = (1ull << enable_map->metadata->clk_cnt) - 1; ++ ++ if (enable_map->metadata->clk_cnt > 0 && (enable_map->clk_enable_map & clk_enable_map_mask)) ++ return true; ++ ++ kbase_hwcnt_metadata_for_each_block(enable_map->metadata, grp, blk, blk_inst) ++ { ++ if (kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst)) ++ return true; ++ } ++ ++ return false; ++} + +/** -+ * kbasep_js_release_privileged_ctx - Release a privileged context, -+ * allowing it to be scheduled out. -+ * @kbdev: KBase device -+ * @kctx: KBase context -+ * -+ * See kbasep_js_runpool_release_ctx for potential side effects. -+ * -+ * The following locking conditions are made on the caller: -+ * * it must not hold the hwaccess_lock, because it will be used internally. -+ * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex. -+ * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be -+ * obtained internally) -+ * * it must not hold the kbase_device::mmu_hw_mutex (as this will be -+ * obtained internally) ++ * kbase_hwcnt_enable_map_block_value_enabled() - Check if a value in a block ++ * instance is enabled. ++ * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to ++ * kbase_hwcnt_enable_map_block_instance. ++ * @val_idx: Index of the value to check in the block instance. + * ++ * Return: true if the value was enabled, else false. + */ -+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx); ++static inline bool kbase_hwcnt_enable_map_block_value_enabled(const u64 *bitfld, size_t val_idx) ++{ ++ const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; ++ const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; ++ const u64 mask = 1ull << bit; ++ ++ return (bitfld[idx] & mask) != 0; ++} + +/** -+ * kbase_js_try_run_jobs - Try to submit the next job on each slot -+ * @kbdev: KBase device -+ * -+ * The following locks may be used: -+ * * kbasep_js_device_data::runpool_mutex -+ * * hwaccess_lock ++ * kbase_hwcnt_enable_map_block_enable_value() - Enable a value in a block ++ * instance. ++ * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to ++ * kbase_hwcnt_enable_map_block_instance. ++ * @val_idx: Index of the value to enable in the block instance. + */ -+void kbase_js_try_run_jobs(struct kbase_device *kbdev); ++static inline void kbase_hwcnt_enable_map_block_enable_value(u64 *bitfld, size_t val_idx) ++{ ++ const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; ++ const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; ++ const u64 mask = 1ull << bit; ++ ++ bitfld[idx] |= mask; ++} + +/** -+ * kbasep_js_suspend - Suspend the job scheduler during a Power Management -+ * Suspend event. -+ * @kbdev: KBase device -+ * -+ * Causes all contexts to be removed from the runpool, and prevents any -+ * contexts from (re)entering the runpool. -+ * -+ * This does not handle suspending the one privileged context: the caller must -+ * instead do this by suspending the GPU HW Counter Instrumentation. -+ * -+ * This will eventually cause all Power Management active references held by -+ * contexts on the runpool to be released, without running any more atoms. -+ * -+ * The caller must then wait for all Power Management active refcount to become -+ * zero before completing the suspend. -+ * -+ * The emptying mechanism may take some time to complete, since it can wait for -+ * jobs to complete naturally instead of forcing them to end quickly. However, -+ * this is bounded by the Job Scheduler's Job Timeouts. Hence, this -+ * function is guaranteed to complete in a finite time. ++ * kbase_hwcnt_enable_map_block_disable_value() - Disable a value in a block ++ * instance. ++ * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to ++ * kbase_hwcnt_enable_map_block_instance. ++ * @val_idx: Index of the value to disable in the block instance. + */ -+void kbasep_js_suspend(struct kbase_device *kbdev); ++static inline void kbase_hwcnt_enable_map_block_disable_value(u64 *bitfld, size_t val_idx) ++{ ++ const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; ++ const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; ++ const u64 mask = 1ull << bit; ++ ++ bitfld[idx] &= ~mask; ++} + +/** -+ * kbasep_js_resume - Resume the Job Scheduler after a Power Management -+ * Resume event. -+ * @kbdev: KBase device ++ * kbase_hwcnt_dump_buffer_alloc() - Allocate a dump buffer. ++ * @metadata: Non-NULL pointer to metadata describing the system. ++ * @dump_buf: Non-NULL pointer to dump buffer to be initialised. Will be ++ * initialised to undefined values, so must be used as a copy dest, ++ * or cleared before use. + * -+ * This restores the actions from kbasep_js_suspend(): -+ * * Schedules contexts back into the runpool -+ * * Resumes running atoms on the GPU ++ * Return: 0 on success, else error code. + */ -+void kbasep_js_resume(struct kbase_device *kbdev); ++int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata, ++ struct kbase_hwcnt_dump_buffer *dump_buf); + +/** -+ * kbase_js_dep_resolved_submit - Submit an atom to the job scheduler. -+ * -+ * @kctx: Context pointer -+ * @katom: Pointer to the atom to submit -+ * -+ * The atom is enqueued on the context's ringbuffer. The caller must have -+ * ensured that all dependencies can be represented in the ringbuffer. -+ * -+ * Caller must hold jctx->lock ++ * kbase_hwcnt_dump_buffer_free() - Free a dump buffer. ++ * @dump_buf: Dump buffer to be freed. + * -+ * Return: true if the context requires to be enqueued, otherwise false. ++ * Can be safely called on an all-zeroed dump buffer structure, or on an already ++ * freed dump buffer. + */ -+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, -+ struct kbase_jd_atom *katom); ++void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf); + +/** -+ * kbase_js_pull - Pull an atom from a context in the job scheduler for -+ * execution. -+ * -+ * @kctx: Context to pull from -+ * @js: Job slot to pull from -+ * -+ * The atom will not be removed from the ringbuffer at this stage. ++ * kbase_hwcnt_dump_buffer_array_alloc() - Allocate an array of dump buffers. ++ * @metadata: Non-NULL pointer to metadata describing the system. ++ * @n: Number of dump buffers to allocate ++ * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised. + * -+ * The HW access lock must be held when calling this function. ++ * A single zeroed contiguous page allocation will be used for all of the ++ * buffers inside the array, where: ++ * dump_bufs[n].dump_buf == page_addr + n * metadata.dump_buf_bytes + * -+ * Return: a pointer to an atom, or NULL if there are no atoms for this -+ * slot that can be currently run. ++ * Return: 0 on success, else error code. + */ -+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js); ++int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n, ++ struct kbase_hwcnt_dump_buffer_array *dump_bufs); + +/** -+ * kbase_js_unpull - Return an atom to the job scheduler ringbuffer. -+ * -+ * @kctx: Context pointer -+ * @katom: Pointer to the atom to unpull -+ * -+ * An atom is 'unpulled' if execution is stopped but intended to be returned to -+ * later. The most common reason for this is that the atom has been -+ * soft-stopped. Another reason is if an end-of-renderpass atom completed -+ * but will need to be run again as part of the same renderpass. -+ * -+ * Note that if multiple atoms are to be 'unpulled', they must be returned in -+ * the reverse order to which they were originally pulled. It is a programming -+ * error to return atoms in any other order. -+ * -+ * The HW access lock must be held when calling this function. ++ * kbase_hwcnt_dump_buffer_array_free() - Free a dump buffer array. ++ * @dump_bufs: Dump buffer array to be freed. + * ++ * Can be safely called on an all-zeroed dump buffer array structure, or on an ++ * already freed dump buffer array. + */ -+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom); ++void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs); + +/** -+ * kbase_js_complete_atom_wq - Complete an atom from jd_done_worker(), -+ * removing it from the job -+ * scheduler ringbuffer. -+ * @kctx: Context pointer -+ * @katom: Pointer to the atom to complete -+ * -+ * If the atom failed then all dependee atoms marked for failure propagation -+ * will also fail. ++ * kbase_hwcnt_dump_buffer_block_instance() - Get the pointer to a block ++ * instance's dump buffer. ++ * @buf: Non-NULL pointer to dump buffer. ++ * @grp: Index of the group in the metadata. ++ * @blk: Index of the block in the group. ++ * @blk_inst: Index of the block instance in the block. + * -+ * Return: true if the context is now idle (no jobs pulled) false otherwise. ++ * Return: u64* to the dump buffer for the block instance. + */ -+bool kbase_js_complete_atom_wq(struct kbase_context *kctx, -+ struct kbase_jd_atom *katom); ++static inline u64 *kbase_hwcnt_dump_buffer_block_instance(const struct kbase_hwcnt_dump_buffer *buf, ++ size_t grp, size_t blk, size_t blk_inst) ++{ ++ if (WARN_ON(!buf) || WARN_ON(!buf->dump_buf)) ++ return NULL; ++ ++ if (WARN_ON(!buf->metadata) || WARN_ON(grp >= buf->metadata->grp_cnt) || ++ WARN_ON(blk >= buf->metadata->grp_metadata[grp].blk_cnt) || ++ WARN_ON(blk_inst >= buf->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt)) ++ return buf->dump_buf; ++ ++ return buf->dump_buf + buf->metadata->grp_metadata[grp].dump_buf_index + ++ buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index + ++ (buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride * blk_inst); ++} + +/** -+ * kbase_js_complete_atom - Complete an atom. -+ * -+ * @katom: Pointer to the atom to complete -+ * @end_timestamp: The time that the atom completed (may be NULL) -+ * -+ * Most of the work required to complete an atom will be performed by -+ * jd_done_worker(). -+ * -+ * The HW access lock must be held when calling this function. ++ * kbase_hwcnt_dump_buffer_zero() - Zero all enabled values in dst. ++ * After the operation, all non-enabled values ++ * will be undefined. ++ * @dst: Non-NULL pointer to dump buffer. ++ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * -+ * Return: a atom that has now been unblocked and can now be run, or NULL -+ * if none ++ * The dst and dst_enable_map MUST have been created from the same metadata. + */ -+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, -+ ktime_t *end_timestamp); ++void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst, ++ const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** -+ * kbase_js_atom_blocked_on_x_dep - Decide whether to ignore a cross-slot -+ * dependency -+ * @katom: Pointer to an atom in the slot ringbuffer -+ * -+ * A cross-slot dependency is ignored if necessary to unblock incremental -+ * rendering. If the atom at the start of a renderpass used too much memory -+ * and was soft-stopped then the atom at the end of a renderpass is submitted -+ * to hardware regardless of its dependency on the start-of-renderpass atom. -+ * This can happen multiple times for the same pair of atoms. -+ * -+ * Return: true to block the atom or false to allow it to be submitted to -+ * hardware. ++ * kbase_hwcnt_dump_buffer_block_zero() - Zero all values in a block. ++ * @dst_blk: Non-NULL pointer to dst block obtained from a call to ++ * kbase_hwcnt_dump_buffer_block_instance. ++ * @val_cnt: Number of values in the block. + */ -+bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *katom); ++static inline void kbase_hwcnt_dump_buffer_block_zero(u64 *dst_blk, size_t val_cnt) ++{ ++ if (WARN_ON(!dst_blk)) ++ return; ++ ++ memset(dst_blk, 0, (val_cnt * KBASE_HWCNT_VALUE_BYTES)); ++} + +/** -+ * kbase_js_sched - Submit atoms from all available contexts. -+ * -+ * @kbdev: Device pointer -+ * @js_mask: Mask of job slots to submit to -+ * -+ * This will attempt to submit as many jobs as possible to the provided job -+ * slots. It will exit when either all job slots are full, or all contexts have -+ * been used. -+ * ++ * kbase_hwcnt_dump_buffer_zero_strict() - Zero all values in dst. ++ * After the operation, all values ++ * (including padding bytes) will be ++ * zero. ++ * Slower than the non-strict variant. ++ * @dst: Non-NULL pointer to dump buffer. + */ -+void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask); ++void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst); + +/** -+ * kbase_js_zap_context - Attempt to deschedule a context that is being -+ * destroyed -+ * @kctx: Context pointer -+ * -+ * This will attempt to remove a context from any internal job scheduler queues -+ * and perform any other actions to ensure a context will not be submitted -+ * from. ++ * kbase_hwcnt_dump_buffer_zero_non_enabled() - Zero all non-enabled values in ++ * dst (including padding bytes and ++ * unavailable blocks). ++ * After the operation, all enabled ++ * values will be unchanged. ++ * @dst: Non-NULL pointer to dump buffer. ++ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * -+ * If the context is currently scheduled, then the caller must wait for all -+ * pending jobs to complete before taking any further action. ++ * The dst and dst_enable_map MUST have been created from the same metadata. + */ -+void kbase_js_zap_context(struct kbase_context *kctx); ++void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *dst, ++ const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** -+ * kbase_js_is_atom_valid - Validate an atom -+ * -+ * @kbdev: Device pointer -+ * @katom: Atom to validate -+ * -+ * This will determine whether the atom can be scheduled onto the GPU. Atoms -+ * with invalid combinations of core requirements will be rejected. -+ * -+ * Return: true if atom is valid false otherwise. ++ * kbase_hwcnt_dump_buffer_block_zero_non_enabled() - Zero all non-enabled ++ * values in a block. ++ * After the operation, all ++ * enabled values will be ++ * unchanged. ++ * @dst_blk: Non-NULL pointer to dst block obtained from a call to ++ * kbase_hwcnt_dump_buffer_block_instance. ++ * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to ++ * kbase_hwcnt_enable_map_block_instance. ++ * @val_cnt: Number of values in the block. + */ -+bool kbase_js_is_atom_valid(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom); ++static inline void kbase_hwcnt_dump_buffer_block_zero_non_enabled(u64 *dst_blk, const u64 *blk_em, ++ size_t val_cnt) ++{ ++ size_t val; ++ ++ if (WARN_ON(!dst_blk)) ++ return; ++ ++ for (val = 0; val < val_cnt; val++) { ++ if (!kbase_hwcnt_enable_map_block_value_enabled(blk_em, val)) ++ dst_blk[val] = 0; ++ } ++} + +/** -+ * kbase_js_set_timeouts - update all JS timeouts with user specified data -+ * -+ * @kbdev: Device pointer ++ * kbase_hwcnt_dump_buffer_copy() - Copy all enabled values from src to dst. ++ * After the operation, all non-enabled values ++ * will be undefined. ++ * @dst: Non-NULL pointer to dst dump buffer. ++ * @src: Non-NULL pointer to src dump buffer. ++ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * -+ * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is -+ * set to a positive number then that becomes the new value used, if a timeout -+ * is negative then the default is set. ++ * The dst, src, and dst_enable_map MUST have been created from the same ++ * metadata. + */ -+void kbase_js_set_timeouts(struct kbase_device *kbdev); ++void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst, ++ const struct kbase_hwcnt_dump_buffer *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** -+ * kbase_js_set_ctx_priority - set the context priority -+ * -+ * @kctx: Context pointer -+ * @new_priority: New priority value for the Context -+ * -+ * The context priority is set to a new value and it is moved to the -+ * pullable/unpullable list as per the new priority. ++ * kbase_hwcnt_dump_buffer_block_copy() - Copy all block values from src to dst. ++ * @dst_blk: Non-NULL pointer to dst block obtained from a call to ++ * kbase_hwcnt_dump_buffer_block_instance. ++ * @src_blk: Non-NULL pointer to src block obtained from a call to ++ * kbase_hwcnt_dump_buffer_block_instance. ++ * @val_cnt: Number of values in the block. + */ -+void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority); ++static inline void kbase_hwcnt_dump_buffer_block_copy(u64 *dst_blk, const u64 *src_blk, ++ size_t val_cnt) ++{ ++ if (WARN_ON(!dst_blk) || WARN_ON(!src_blk)) ++ return; ++ ++ /* Copy all the counters in the block instance. ++ * Values of non-enabled counters are undefined. ++ */ ++ memcpy(dst_blk, src_blk, (val_cnt * KBASE_HWCNT_VALUE_BYTES)); ++} + +/** -+ * kbase_js_update_ctx_priority - update the context priority -+ * -+ * @kctx: Context pointer ++ * kbase_hwcnt_dump_buffer_copy_strict() - Copy all enabled values from src to ++ * dst. ++ * After the operation, all non-enabled ++ * values (including padding bytes) will ++ * be zero. ++ * Slower than the non-strict variant. ++ * @dst: Non-NULL pointer to dst dump buffer. ++ * @src: Non-NULL pointer to src dump buffer. ++ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * -+ * The context priority gets updated as per the priority of atoms currently in -+ * use for that context, but only if system priority mode for context scheduling -+ * is being used. -+ */ -+void kbase_js_update_ctx_priority(struct kbase_context *kctx); -+ -+/* -+ * Helpers follow ++ * The dst, src, and dst_enable_map MUST have been created from the same ++ * metadata. + */ ++void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst, ++ const struct kbase_hwcnt_dump_buffer *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** -+ * kbasep_js_is_submit_allowed - Check that a context is allowed to submit -+ * jobs on this policy -+ * @js_devdata: KBase Job Scheduler Device Data -+ * @kctx: KBase context -+ * -+ * The purpose of this abstraction is to hide the underlying data size, -+ * and wrap up the long repeated line of code. -+ * -+ * As with any bool, never test the return value with true. -+ * -+ * The caller must hold hwaccess_lock. ++ * kbase_hwcnt_dump_buffer_block_copy_strict() - Copy all enabled block values ++ * from src to dst. ++ * After the operation, all ++ * non-enabled values will be ++ * zero. ++ * @dst_blk: Non-NULL pointer to dst block obtained from a call to ++ * kbase_hwcnt_dump_buffer_block_instance. ++ * @src_blk: Non-NULL pointer to src block obtained from a call to ++ * kbase_hwcnt_dump_buffer_block_instance. ++ * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to ++ * kbase_hwcnt_enable_map_block_instance. ++ * @val_cnt: Number of values in the block. + * -+ * Return: true if the context is allowed to submit jobs, false otherwise. ++ * After the copy, any disabled values in dst will be zero. + */ -+static inline bool kbasep_js_is_submit_allowed( -+ struct kbasep_js_device_data *js_devdata, -+ struct kbase_context *kctx) ++static inline void kbase_hwcnt_dump_buffer_block_copy_strict(u64 *dst_blk, const u64 *src_blk, ++ const u64 *blk_em, size_t val_cnt) +{ -+ u16 test_bit; -+ bool is_allowed; ++ size_t val; + -+ /* Ensure context really is scheduled in */ -+ if (WARN((kctx->as_nr == KBASEP_AS_NR_INVALID) || !kbase_ctx_flag(kctx, KCTX_SCHEDULED), -+ "%s: kctx %pK has assigned AS %d and context flag %d\n", __func__, (void *)kctx, -+ kctx->as_nr, atomic_read(&kctx->flags))) -+ return false; ++ if (WARN_ON(!dst_blk) || WARN_ON(!src_blk)) ++ return; + -+ test_bit = (u16) (1u << kctx->as_nr); ++ for (val = 0; val < val_cnt; val++) { ++ bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val); + -+ is_allowed = (bool) (js_devdata->runpool_irq.submit_allowed & test_bit); -+ dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %pK (as=%d)", -+ is_allowed ? "is" : "isn't", (void *)kctx, kctx->as_nr); -+ return is_allowed; ++ dst_blk[val] = val_enabled ? src_blk[val] : 0; ++ } +} + +/** -+ * kbasep_js_set_submit_allowed - Allow a context to submit jobs on this policy -+ * @js_devdata: KBase Job Scheduler Device Data -+ * @kctx: KBase context -+ * -+ * The purpose of this abstraction is to hide the underlying data size, -+ * and wrap up the long repeated line of code. ++ * kbase_hwcnt_dump_buffer_accumulate() - Copy all enabled headers and ++ * accumulate all enabled counters from ++ * src to dst. ++ * After the operation, all non-enabled ++ * values will be undefined. ++ * @dst: Non-NULL pointer to dst dump buffer. ++ * @src: Non-NULL pointer to src dump buffer. ++ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * -+ * The caller must hold hwaccess_lock. ++ * The dst, src, and dst_enable_map MUST have been created from the same ++ * metadata. + */ -+static inline void kbasep_js_set_submit_allowed( -+ struct kbasep_js_device_data *js_devdata, -+ struct kbase_context *kctx) ++void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst, ++ const struct kbase_hwcnt_dump_buffer *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map); ++ ++/** ++ * kbase_hwcnt_dump_buffer_block_accumulate() - Copy all block headers and ++ * accumulate all block counters ++ * from src to dst. ++ * @dst_blk: Non-NULL pointer to dst block obtained from a call to ++ * kbase_hwcnt_dump_buffer_block_instance. ++ * @src_blk: Non-NULL pointer to src block obtained from a call to ++ * kbase_hwcnt_dump_buffer_block_instance. ++ * @hdr_cnt: Number of headers in the block. ++ * @ctr_cnt: Number of counters in the block. ++ */ ++static inline void kbase_hwcnt_dump_buffer_block_accumulate(u64 *dst_blk, const u64 *src_blk, ++ size_t hdr_cnt, size_t ctr_cnt) +{ -+ u16 set_bit; ++ size_t ctr; + -+ /* Ensure context really is scheduled in */ -+ if (WARN((kctx->as_nr == KBASEP_AS_NR_INVALID) || !kbase_ctx_flag(kctx, KCTX_SCHEDULED), -+ "%s: kctx %pK has assigned AS %d and context flag %d\n", __func__, (void *)kctx, -+ kctx->as_nr, atomic_read(&kctx->flags))) ++ if (WARN_ON(!dst_blk) || WARN_ON(!src_blk)) + return; + -+ set_bit = (u16) (1u << kctx->as_nr); -+ -+ dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %pK (as=%d)", -+ kctx, kctx->as_nr); ++ /* Copy all the headers in the block instance. ++ * Values of non-enabled headers are undefined. ++ */ ++ memcpy(dst_blk, src_blk, hdr_cnt * KBASE_HWCNT_VALUE_BYTES); + -+ js_devdata->runpool_irq.submit_allowed |= set_bit; ++ /* Accumulate all the counters in the block instance. ++ * Values of non-enabled counters are undefined. ++ */ ++ for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) ++ dst_blk[ctr] += src_blk[ctr]; +} + +/** -+ * kbasep_js_clear_submit_allowed - Prevent a context from submitting more -+ * jobs on this policy -+ * @js_devdata: KBase Job Scheduler Device Data -+ * @kctx: KBase context -+ * -+ * The purpose of this abstraction is to hide the underlying data size, -+ * and wrap up the long repeated line of code. ++ * kbase_hwcnt_dump_buffer_accumulate_strict() - Copy all enabled headers and ++ * accumulate all enabled counters ++ * from src to dst. ++ * After the operation, all ++ * non-enabled values (including ++ * padding bytes) will be zero. ++ * Slower than the non-strict ++ * variant. ++ * @dst: Non-NULL pointer to dst dump buffer. ++ * @src: Non-NULL pointer to src dump buffer. ++ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * -+ * The caller must hold hwaccess_lock. ++ * The dst, src, and dst_enable_map MUST have been created from the same ++ * metadata. + */ -+static inline void kbasep_js_clear_submit_allowed( -+ struct kbasep_js_device_data *js_devdata, -+ struct kbase_context *kctx) ++void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *dst, ++ const struct kbase_hwcnt_dump_buffer *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map); ++ ++/** ++ * kbase_hwcnt_dump_buffer_block_accumulate_strict() - Copy all enabled block ++ * headers and accumulate ++ * all block counters from ++ * src to dst. ++ * After the operation, all ++ * non-enabled values will ++ * be zero. ++ * @dst_blk: Non-NULL pointer to dst block obtained from a call to ++ * kbase_hwcnt_dump_buffer_block_instance. ++ * @src_blk: Non-NULL pointer to src block obtained from a call to ++ * kbase_hwcnt_dump_buffer_block_instance. ++ * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to ++ * kbase_hwcnt_enable_map_block_instance. ++ * @hdr_cnt: Number of headers in the block. ++ * @ctr_cnt: Number of counters in the block. ++ */ ++static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(u64 *dst_blk, const u64 *src_blk, ++ const u64 *blk_em, ++ size_t hdr_cnt, size_t ctr_cnt) +{ -+ u16 clear_bit; -+ u16 clear_mask; ++ size_t ctr; + -+ /* Ensure context really is scheduled in */ -+ if (WARN((kctx->as_nr == KBASEP_AS_NR_INVALID) || !kbase_ctx_flag(kctx, KCTX_SCHEDULED), -+ "%s: kctx %pK has assigned AS %d and context flag %d\n", __func__, (void *)kctx, -+ kctx->as_nr, atomic_read(&kctx->flags))) ++ if (WARN_ON(!dst_blk) || WARN_ON(!src_blk)) + return; + -+ clear_bit = (u16) (1u << kctx->as_nr); -+ clear_mask = ~clear_bit; ++ kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, hdr_cnt); + -+ dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %pK (as=%d)", -+ kctx, kctx->as_nr); ++ for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) { ++ bool ctr_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, ctr); + -+ js_devdata->runpool_irq.submit_allowed &= clear_mask; ++ if (ctr_enabled) ++ dst_blk[ctr] += src_blk[ctr]; ++ else ++ dst_blk[ctr] = 0; ++ } +} + +/** -+ * kbasep_js_atom_retained_state_init_invalid - Create an initial 'invalid' -+ * atom retained state -+ * -+ * @retained_state: pointer where to create and initialize the state -+ * -+ * Create an initial 'invalid' atom retained state, that requires no -+ * atom-related work to be done on releasing with -+ * kbasep_js_runpool_release_ctx_and_katom_retained_state() ++ * kbase_hwcnt_metadata_for_each_clock() - Iterate over each clock domain in the ++ * metadata. ++ * @md: Non-NULL pointer to metadata. ++ * @clk: size_t variable used as clock iterator. + */ -+static inline void kbasep_js_atom_retained_state_init_invalid( -+ struct kbasep_js_atom_retained_state *retained_state) -+{ -+ retained_state->event_code = BASE_JD_EVENT_NOT_STARTED; -+ retained_state->core_req = -+ KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID; -+} ++#define kbase_hwcnt_metadata_for_each_clock(md, clk) for ((clk) = 0; (clk) < (md)->clk_cnt; (clk)++) + +/** -+ * kbasep_js_atom_retained_state_copy() - Copy atom state -+ * @retained_state: where to copy -+ * @katom: where to copy from ++ * kbase_hwcnt_clk_enable_map_enabled() - Check if the given index is enabled ++ * in clk_enable_map. ++ * @clk_enable_map: An enable map for clock domains. ++ * @index: Index of the enable map for clock domain. + * -+ * Copy atom state that can be made available after kbase_jd_done_nolock() is called -+ * on that atom. ++ * Return: true if the index of the clock domain is enabled, else false. + */ -+static inline void kbasep_js_atom_retained_state_copy( -+ struct kbasep_js_atom_retained_state *retained_state, -+ const struct kbase_jd_atom *katom) ++static inline bool kbase_hwcnt_clk_enable_map_enabled(const u64 clk_enable_map, const size_t index) +{ -+ retained_state->event_code = katom->event_code; -+ retained_state->core_req = katom->core_req; -+ retained_state->sched_priority = katom->sched_priority; -+ retained_state->device_nr = katom->device_nr; ++ if (WARN_ON(index >= 64)) ++ return false; ++ if (clk_enable_map & (1ull << index)) ++ return true; ++ return false; +} + -+/** -+ * kbasep_js_has_atom_finished - Determine whether an atom has finished -+ * (given its retained state), -+ * and so should be given back to -+ * userspace/removed from the system. ++#endif /* _KBASE_HWCNT_TYPES_H_ */ +diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.c +new file mode 100644 +index 000000000..d618764d3 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.c +@@ -0,0 +1,744 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * -+ * @katom_retained_state: the retained state of the atom to check ++ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * -+ * Reasons for an atom not finishing include: -+ * * Being soft-stopped (and so, the atom should be resubmitted sometime later) -+ * * It is an end of renderpass atom that was run to consume the output of a -+ * start-of-renderpass atom that was soft-stopped because it used too much -+ * memory. In this case, it will have to be run again later. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: false if the atom has not finished, true otherwise. + */ -+static inline bool kbasep_js_has_atom_finished( -+ const struct kbasep_js_atom_retained_state *katom_retained_state) ++ ++#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" ++#include "hwcnt/mali_kbase_hwcnt_accumulator.h" ++#include "hwcnt/mali_kbase_hwcnt_context.h" ++#include "hwcnt/mali_kbase_hwcnt_types.h" ++ ++#include ++#include ++ ++/** ++ * struct kbase_hwcnt_virtualizer - Hardware counter virtualizer structure. ++ * @hctx: Hardware counter context being virtualized. ++ * @dump_threshold_ns: Minimum threshold period for dumps between different ++ * clients where a new accumulator dump will not be ++ * performed, and instead accumulated values will be used. ++ * If 0, rate limiting is disabled. ++ * @metadata: Hardware counter metadata. ++ * @lock: Lock acquired at all entrypoints, to protect mutable ++ * state. ++ * @client_count: Current number of virtualizer clients. ++ * @clients: List of virtualizer clients. ++ * @accum: Hardware counter accumulator. NULL if no clients. ++ * @scratch_map: Enable map used as scratch space during counter changes. ++ * @scratch_buf: Dump buffer used as scratch space during dumps. ++ * @ts_last_dump_ns: End time of most recent dump across all clients. ++ */ ++struct kbase_hwcnt_virtualizer { ++ struct kbase_hwcnt_context *hctx; ++ u64 dump_threshold_ns; ++ const struct kbase_hwcnt_metadata *metadata; ++ struct mutex lock; ++ size_t client_count; ++ struct list_head clients; ++ struct kbase_hwcnt_accumulator *accum; ++ struct kbase_hwcnt_enable_map scratch_map; ++ struct kbase_hwcnt_dump_buffer scratch_buf; ++ u64 ts_last_dump_ns; ++}; ++ ++/** ++ * struct kbase_hwcnt_virtualizer_client - Virtualizer client structure. ++ * @node: List node used for virtualizer client list. ++ * @hvirt: Hardware counter virtualizer. ++ * @enable_map: Enable map with client's current enabled counters. ++ * @accum_buf: Dump buffer with client's current accumulated counters. ++ * @has_accum: True if accum_buf contains any accumulated counters. ++ * @ts_start_ns: Counter collection start time of current dump. ++ */ ++struct kbase_hwcnt_virtualizer_client { ++ struct list_head node; ++ struct kbase_hwcnt_virtualizer *hvirt; ++ struct kbase_hwcnt_enable_map enable_map; ++ struct kbase_hwcnt_dump_buffer accum_buf; ++ bool has_accum; ++ u64 ts_start_ns; ++}; ++ ++const struct kbase_hwcnt_metadata * ++kbase_hwcnt_virtualizer_metadata(struct kbase_hwcnt_virtualizer *hvirt) +{ -+ return (bool) (katom_retained_state->event_code != -+ BASE_JD_EVENT_STOPPED && -+ katom_retained_state->event_code != -+ BASE_JD_EVENT_REMOVED_FROM_NEXT && -+ katom_retained_state->event_code != -+ BASE_JD_EVENT_END_RP_DONE); ++ if (!hvirt) ++ return NULL; ++ ++ return hvirt->metadata; +} + +/** -+ * kbasep_js_atom_retained_state_is_valid - Determine whether a struct -+ * kbasep_js_atom_retained_state -+ * is valid -+ * @katom_retained_state: the atom's retained state to check -+ * -+ * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates -+ * that the code should just ignore it. ++ * kbasep_hwcnt_virtualizer_client_free - Free a virtualizer client's memory. ++ * @hvcli: Pointer to virtualizer client. + * -+ * Return: false if the retained state is invalid, true otherwise. ++ * Will safely free a client in any partial state of construction. + */ -+static inline bool kbasep_js_atom_retained_state_is_valid( -+ const struct kbasep_js_atom_retained_state *katom_retained_state) ++static void kbasep_hwcnt_virtualizer_client_free(struct kbase_hwcnt_virtualizer_client *hvcli) +{ -+ return (bool) (katom_retained_state->core_req != -+ KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID); ++ if (!hvcli) ++ return; ++ ++ kbase_hwcnt_dump_buffer_free(&hvcli->accum_buf); ++ kbase_hwcnt_enable_map_free(&hvcli->enable_map); ++ kfree(hvcli); +} + +/** -+ * kbase_js_runpool_inc_context_count - Increment number of running contexts. -+ * @kbdev: KBase device -+ * @kctx: KBase context ++ * kbasep_hwcnt_virtualizer_client_alloc - Allocate memory for a virtualizer ++ * client. ++ * @metadata: Non-NULL pointer to counter metadata. ++ * @out_hvcli: Non-NULL pointer to where created client will be stored on ++ * success. + * -+ * The following locking conditions are made on the caller: -+ * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. -+ * * The caller must hold the kbasep_js_device_data::runpool_mutex ++ * Return: 0 on success, else error code. + */ -+static inline void kbase_js_runpool_inc_context_count( -+ struct kbase_device *kbdev, -+ struct kbase_context *kctx) ++static int kbasep_hwcnt_virtualizer_client_alloc(const struct kbase_hwcnt_metadata *metadata, ++ struct kbase_hwcnt_virtualizer_client **out_hvcli) +{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbasep_js_kctx_info *js_kctx_info; ++ int errcode; ++ struct kbase_hwcnt_virtualizer_client *hvcli = NULL; + -+ js_devdata = &kbdev->js_data; -+ js_kctx_info = &kctx->jctx.sched_info; ++ WARN_ON(!metadata); ++ WARN_ON(!out_hvcli); + -+ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); -+ lockdep_assert_held(&js_devdata->runpool_mutex); ++ hvcli = kzalloc(sizeof(*hvcli), GFP_KERNEL); ++ if (!hvcli) ++ return -ENOMEM; + -+ /* Track total contexts */ -+ WARN_ON_ONCE(js_devdata->nr_all_contexts_running >= JS_MAX_RUNNING_JOBS); -+ ++(js_devdata->nr_all_contexts_running); ++ errcode = kbase_hwcnt_enable_map_alloc(metadata, &hvcli->enable_map); ++ if (errcode) ++ goto error; + -+ if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { -+ /* Track contexts that can submit jobs */ -+ WARN_ON_ONCE(js_devdata->nr_user_contexts_running >= JS_MAX_RUNNING_JOBS); -+ ++(js_devdata->nr_user_contexts_running); ++ errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hvcli->accum_buf); ++ if (errcode) ++ goto error; ++ ++ *out_hvcli = hvcli; ++ return 0; ++error: ++ kbasep_hwcnt_virtualizer_client_free(hvcli); ++ return errcode; ++} ++ ++/** ++ * kbasep_hwcnt_virtualizer_client_accumulate - Accumulate a dump buffer into a ++ * client's accumulation buffer. ++ * @hvcli: Non-NULL pointer to virtualizer client. ++ * @dump_buf: Non-NULL pointer to dump buffer to accumulate from. ++ */ ++static void ++kbasep_hwcnt_virtualizer_client_accumulate(struct kbase_hwcnt_virtualizer_client *hvcli, ++ const struct kbase_hwcnt_dump_buffer *dump_buf) ++{ ++ WARN_ON(!hvcli); ++ WARN_ON(!dump_buf); ++ lockdep_assert_held(&hvcli->hvirt->lock); ++ ++ if (hvcli->has_accum) { ++ /* If already some accumulation, accumulate */ ++ kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, dump_buf, &hvcli->enable_map); ++ } else { ++ /* If no accumulation, copy */ ++ kbase_hwcnt_dump_buffer_copy(&hvcli->accum_buf, dump_buf, &hvcli->enable_map); + } ++ hvcli->has_accum = true; +} + +/** -+ * kbase_js_runpool_dec_context_count - decrement number of running contexts. ++ * kbasep_hwcnt_virtualizer_accumulator_term - Terminate the hardware counter ++ * accumulator after final client ++ * removal. ++ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * -+ * @kbdev: KBase device -+ * @kctx: KBase context -+ * The following locking conditions are made on the caller: -+ * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. -+ * * The caller must hold the kbasep_js_device_data::runpool_mutex ++ * Will safely terminate the accumulator in any partial state of initialisation. + */ -+static inline void kbase_js_runpool_dec_context_count( -+ struct kbase_device *kbdev, -+ struct kbase_context *kctx) ++static void kbasep_hwcnt_virtualizer_accumulator_term(struct kbase_hwcnt_virtualizer *hvirt) +{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbasep_js_kctx_info *js_kctx_info; ++ WARN_ON(!hvirt); ++ lockdep_assert_held(&hvirt->lock); ++ WARN_ON(hvirt->client_count); + -+ js_devdata = &kbdev->js_data; -+ js_kctx_info = &kctx->jctx.sched_info; ++ kbase_hwcnt_dump_buffer_free(&hvirt->scratch_buf); ++ kbase_hwcnt_enable_map_free(&hvirt->scratch_map); ++ kbase_hwcnt_accumulator_release(hvirt->accum); ++ hvirt->accum = NULL; ++} + -+ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); -+ lockdep_assert_held(&js_devdata->runpool_mutex); ++/** ++ * kbasep_hwcnt_virtualizer_accumulator_init - Initialise the hardware counter ++ * accumulator before first client ++ * addition. ++ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static int kbasep_hwcnt_virtualizer_accumulator_init(struct kbase_hwcnt_virtualizer *hvirt) ++{ ++ int errcode; + -+ /* Track total contexts */ -+ --(js_devdata->nr_all_contexts_running); -+ WARN_ON_ONCE(js_devdata->nr_all_contexts_running < 0); ++ WARN_ON(!hvirt); ++ lockdep_assert_held(&hvirt->lock); ++ WARN_ON(hvirt->client_count); ++ WARN_ON(hvirt->accum); + -+ if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { -+ /* Track contexts that can submit jobs */ -+ --(js_devdata->nr_user_contexts_running); -+ WARN_ON_ONCE(js_devdata->nr_user_contexts_running < 0); -+ } ++ errcode = kbase_hwcnt_accumulator_acquire(hvirt->hctx, &hvirt->accum); ++ if (errcode) ++ goto error; ++ ++ errcode = kbase_hwcnt_enable_map_alloc(hvirt->metadata, &hvirt->scratch_map); ++ if (errcode) ++ goto error; ++ ++ errcode = kbase_hwcnt_dump_buffer_alloc(hvirt->metadata, &hvirt->scratch_buf); ++ if (errcode) ++ goto error; ++ ++ return 0; ++error: ++ kbasep_hwcnt_virtualizer_accumulator_term(hvirt); ++ return errcode; +} + +/** -+ * kbase_js_sched_all - Submit atoms from all available contexts to all -+ * job slots. -+ * -+ * @kbdev: Device pointer ++ * kbasep_hwcnt_virtualizer_client_add - Add a newly allocated client to the ++ * virtualizer. ++ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. ++ * @hvcli: Non-NULL pointer to the virtualizer client to add. ++ * @enable_map: Non-NULL pointer to client's initial enable map. + * -+ * This will attempt to submit as many jobs as possible. It will exit when -+ * either all job slots are full, or all contexts have been used. ++ * Return: 0 on success, else error code. + */ -+static inline void kbase_js_sched_all(struct kbase_device *kbdev) ++static int kbasep_hwcnt_virtualizer_client_add(struct kbase_hwcnt_virtualizer *hvirt, ++ struct kbase_hwcnt_virtualizer_client *hvcli, ++ const struct kbase_hwcnt_enable_map *enable_map) +{ -+ kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); -+} ++ int errcode = 0; ++ u64 ts_start_ns; ++ u64 ts_end_ns; + -+extern const int -+kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS]; ++ WARN_ON(!hvirt); ++ WARN_ON(!hvcli); ++ WARN_ON(!enable_map); ++ lockdep_assert_held(&hvirt->lock); + -+extern const base_jd_prio -+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; ++ if (hvirt->client_count == 0) ++ /* First client added, so initialise the accumulator */ ++ errcode = kbasep_hwcnt_virtualizer_accumulator_init(hvirt); ++ if (errcode) ++ return errcode; ++ ++ hvirt->client_count += 1; ++ ++ if (hvirt->client_count == 1) { ++ /* First client, so just pass the enable map onwards as is */ ++ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, enable_map, ++ &ts_start_ns, &ts_end_ns, NULL); ++ } else { ++ struct kbase_hwcnt_virtualizer_client *pos; ++ ++ /* Make the scratch enable map the union of all enable maps */ ++ kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map); ++ list_for_each_entry (pos, &hvirt->clients, node) ++ kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map); ++ ++ /* Set the counters with the new union enable map */ ++ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map, ++ &ts_start_ns, &ts_end_ns, ++ &hvirt->scratch_buf); ++ /* Accumulate into only existing clients' accumulation bufs */ ++ if (!errcode) ++ list_for_each_entry (pos, &hvirt->clients, node) ++ kbasep_hwcnt_virtualizer_client_accumulate(pos, ++ &hvirt->scratch_buf); ++ } ++ if (errcode) ++ goto error; ++ ++ list_add(&hvcli->node, &hvirt->clients); ++ hvcli->hvirt = hvirt; ++ kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); ++ hvcli->has_accum = false; ++ hvcli->ts_start_ns = ts_end_ns; ++ ++ /* Store the most recent dump time for rate limiting */ ++ hvirt->ts_last_dump_ns = ts_end_ns; ++ ++ return 0; ++error: ++ hvirt->client_count -= 1; ++ if (hvirt->client_count == 0) ++ kbasep_hwcnt_virtualizer_accumulator_term(hvirt); ++ return errcode; ++} + +/** -+ * kbasep_js_atom_prio_to_sched_prio - Convert atom priority (base_jd_prio) -+ * to relative ordering. -+ * @atom_prio: Priority ID to translate. -+ * -+ * Atom priority values for @ref base_jd_prio cannot be compared directly to -+ * find out which are higher or lower. -+ * -+ * This function will convert base_jd_prio values for successively lower -+ * priorities into a monotonically increasing sequence. That is, the lower the -+ * base_jd_prio priority, the higher the value produced by this function. This -+ * is in accordance with how the rest of the kernel treats priority. -+ * -+ * The mapping is 1:1 and the size of the valid input range is the same as the -+ * size of the valid output range, i.e. -+ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS -+ * -+ * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions -+ * -+ * Return: On success: a value in the inclusive range -+ * 0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure: -+ * KBASE_JS_ATOM_SCHED_PRIO_INVALID ++ * kbasep_hwcnt_virtualizer_client_remove - Remove a client from the ++ * virtualizer. ++ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. ++ * @hvcli: Non-NULL pointer to the virtualizer client to remove. + */ -+static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio) ++static void kbasep_hwcnt_virtualizer_client_remove(struct kbase_hwcnt_virtualizer *hvirt, ++ struct kbase_hwcnt_virtualizer_client *hvcli) +{ -+ if (atom_prio >= BASE_JD_NR_PRIO_LEVELS) -+ return KBASE_JS_ATOM_SCHED_PRIO_INVALID; ++ int errcode = 0; ++ u64 ts_start_ns; ++ u64 ts_end_ns; + -+ return kbasep_js_atom_priority_to_relative[atom_prio]; ++ WARN_ON(!hvirt); ++ WARN_ON(!hvcli); ++ lockdep_assert_held(&hvirt->lock); ++ ++ list_del(&hvcli->node); ++ hvirt->client_count -= 1; ++ ++ if (hvirt->client_count == 0) { ++ /* Last client removed, so terminate the accumulator */ ++ kbasep_hwcnt_virtualizer_accumulator_term(hvirt); ++ } else { ++ struct kbase_hwcnt_virtualizer_client *pos; ++ /* Make the scratch enable map the union of all enable maps */ ++ kbase_hwcnt_enable_map_disable_all(&hvirt->scratch_map); ++ list_for_each_entry (pos, &hvirt->clients, node) ++ kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map); ++ /* Set the counters with the new union enable map */ ++ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map, ++ &ts_start_ns, &ts_end_ns, ++ &hvirt->scratch_buf); ++ /* Accumulate into remaining clients' accumulation bufs */ ++ if (!errcode) { ++ list_for_each_entry (pos, &hvirt->clients, node) ++ kbasep_hwcnt_virtualizer_client_accumulate(pos, ++ &hvirt->scratch_buf); ++ ++ /* Store the most recent dump time for rate limiting */ ++ hvirt->ts_last_dump_ns = ts_end_ns; ++ } ++ } ++ WARN_ON(errcode); +} + +/** -+ * kbasep_js_sched_prio_to_atom_prio - Convert relative scheduler priority -+ * to atom priority (base_jd_prio). -+ * -+ * @kbdev: Device pointer -+ * @sched_prio: Relative scheduler priority to translate. -+ * -+ * This function will convert relative scheduler priority back into base_jd_prio -+ * values. It takes values which priorities are monotonically increasing -+ * and converts them to the corresponding base_jd_prio values. If an invalid number is -+ * passed in (i.e. not within the expected range) an error code is returned instead. -+ * -+ * The mapping is 1:1 and the size of the valid input range is the same as the -+ * size of the valid output range, i.e. -+ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS ++ * kbasep_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's ++ * currently enabled counters, ++ * and enable a new set of ++ * counters that will be used for ++ * subsequent dumps. ++ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. ++ * @hvcli: Non-NULL pointer to the virtualizer client. ++ * @enable_map: Non-NULL pointer to the new counter enable map for the client. ++ * Must have the same metadata as the virtualizer. ++ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will ++ * be written out to on success. ++ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will ++ * be written out to on success. ++ * @dump_buf: Pointer to the buffer where the dump will be written out to on ++ * success. If non-NULL, must have the same metadata as the ++ * accumulator. If NULL, the dump will be discarded. + * -+ * Return: On success: a value in the inclusive range -+ * 0..BASE_JD_NR_PRIO_LEVELS-1. On failure: BASE_JD_PRIO_INVALID. ++ * Return: 0 on success or error code. + */ -+static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(struct kbase_device *kbdev, -+ int sched_prio) ++static int kbasep_hwcnt_virtualizer_client_set_counters( ++ struct kbase_hwcnt_virtualizer *hvirt, struct kbase_hwcnt_virtualizer_client *hvcli, ++ const struct kbase_hwcnt_enable_map *enable_map, u64 *ts_start_ns, u64 *ts_end_ns, ++ struct kbase_hwcnt_dump_buffer *dump_buf) +{ -+ if (likely(sched_prio >= 0 && sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT)) -+ return kbasep_js_relative_priority_to_atom[sched_prio]; -+ /* Invalid priority value if reached here */ -+ dev_warn(kbdev->dev, "Unknown JS scheduling priority %d", sched_prio); -+ return BASE_JD_PRIO_INVALID; ++ int errcode; ++ struct kbase_hwcnt_virtualizer_client *pos; ++ ++ WARN_ON(!hvirt); ++ WARN_ON(!hvcli); ++ WARN_ON(!enable_map); ++ WARN_ON(!ts_start_ns); ++ WARN_ON(!ts_end_ns); ++ WARN_ON(enable_map->metadata != hvirt->metadata); ++ WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); ++ lockdep_assert_held(&hvirt->lock); ++ ++ /* Make the scratch enable map the union of all enable maps */ ++ kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map); ++ list_for_each_entry (pos, &hvirt->clients, node) ++ /* Ignore the enable map of the selected client */ ++ if (pos != hvcli) ++ kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map); ++ ++ /* Set the counters with the new union enable map */ ++ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map, ++ ts_start_ns, ts_end_ns, &hvirt->scratch_buf); ++ if (errcode) ++ return errcode; ++ ++ /* Accumulate into all accumulation bufs except the selected client's */ ++ list_for_each_entry (pos, &hvirt->clients, node) ++ if (pos != hvcli) ++ kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf); ++ ++ /* Finally, write into the dump buf */ ++ if (dump_buf) { ++ const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf; ++ ++ if (hvcli->has_accum) { ++ kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, src, ++ &hvcli->enable_map); ++ src = &hvcli->accum_buf; ++ } ++ kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map); ++ } ++ hvcli->has_accum = false; ++ ++ /* Update the selected client's enable map */ ++ kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); ++ ++ /* Fix up the timestamps */ ++ *ts_start_ns = hvcli->ts_start_ns; ++ hvcli->ts_start_ns = *ts_end_ns; ++ ++ /* Store the most recent dump time for rate limiting */ ++ hvirt->ts_last_dump_ns = *ts_end_ns; ++ ++ return errcode; ++} ++ ++int kbase_hwcnt_virtualizer_client_set_counters(struct kbase_hwcnt_virtualizer_client *hvcli, ++ const struct kbase_hwcnt_enable_map *enable_map, ++ u64 *ts_start_ns, u64 *ts_end_ns, ++ struct kbase_hwcnt_dump_buffer *dump_buf) ++{ ++ int errcode; ++ struct kbase_hwcnt_virtualizer *hvirt; ++ ++ if (!hvcli || !enable_map || !ts_start_ns || !ts_end_ns) ++ return -EINVAL; ++ ++ hvirt = hvcli->hvirt; ++ ++ if ((enable_map->metadata != hvirt->metadata) || ++ (dump_buf && (dump_buf->metadata != hvirt->metadata))) ++ return -EINVAL; ++ ++ mutex_lock(&hvirt->lock); ++ ++ if ((hvirt->client_count == 1) && (!hvcli->has_accum)) { ++ /* ++ * If there's only one client with no prior accumulation, we can ++ * completely skip the virtualize and just pass through the call ++ * to the accumulator, saving a fair few copies and ++ * accumulations. ++ */ ++ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, enable_map, ++ ts_start_ns, ts_end_ns, dump_buf); ++ ++ if (!errcode) { ++ /* Update the selected client's enable map */ ++ kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); ++ ++ /* Fix up the timestamps */ ++ *ts_start_ns = hvcli->ts_start_ns; ++ hvcli->ts_start_ns = *ts_end_ns; ++ ++ /* Store the most recent dump time for rate limiting */ ++ hvirt->ts_last_dump_ns = *ts_end_ns; ++ } ++ } else { ++ /* Otherwise, do the full virtualize */ ++ errcode = kbasep_hwcnt_virtualizer_client_set_counters( ++ hvirt, hvcli, enable_map, ts_start_ns, ts_end_ns, dump_buf); ++ } ++ ++ mutex_unlock(&hvirt->lock); ++ ++ return errcode; +} + +/** -+ * kbase_js_priority_check - Check the priority requested -+ * -+ * @kbdev: Device pointer -+ * @priority: Requested priority -+ * -+ * This will determine whether the requested priority can be satisfied. ++ * kbasep_hwcnt_virtualizer_client_dump - Perform a dump of the client's ++ * currently enabled counters. ++ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. ++ * @hvcli: Non-NULL pointer to the virtualizer client. ++ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will ++ * be written out to on success. ++ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will ++ * be written out to on success. ++ * @dump_buf: Pointer to the buffer where the dump will be written out to on ++ * success. If non-NULL, must have the same metadata as the ++ * accumulator. If NULL, the dump will be discarded. + * -+ * Return: The same or lower priority than requested. ++ * Return: 0 on success or error code. + */ -+base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio priority); ++static int kbasep_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer *hvirt, ++ struct kbase_hwcnt_virtualizer_client *hvcli, ++ u64 *ts_start_ns, u64 *ts_end_ns, ++ struct kbase_hwcnt_dump_buffer *dump_buf) ++{ ++ int errcode; ++ struct kbase_hwcnt_virtualizer_client *pos; ++ ++ WARN_ON(!hvirt); ++ WARN_ON(!hvcli); ++ WARN_ON(!ts_start_ns); ++ WARN_ON(!ts_end_ns); ++ WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); ++ lockdep_assert_held(&hvirt->lock); ++ ++ /* Perform the dump */ ++ errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, ts_start_ns, ts_end_ns, ++ &hvirt->scratch_buf); ++ if (errcode) ++ return errcode; ++ ++ /* Accumulate into all accumulation bufs except the selected client's */ ++ list_for_each_entry (pos, &hvirt->clients, node) ++ if (pos != hvcli) ++ kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf); ++ ++ /* Finally, write into the dump buf */ ++ if (dump_buf) { ++ const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf; ++ ++ if (hvcli->has_accum) { ++ kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, src, ++ &hvcli->enable_map); ++ src = &hvcli->accum_buf; ++ } ++ kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map); ++ } ++ hvcli->has_accum = false; ++ ++ /* Fix up the timestamps */ ++ *ts_start_ns = hvcli->ts_start_ns; ++ hvcli->ts_start_ns = *ts_end_ns; ++ ++ /* Store the most recent dump time for rate limiting */ ++ hvirt->ts_last_dump_ns = *ts_end_ns; ++ ++ return errcode; ++} + +/** -+ * kbase_js_atom_runs_before - determine if atoms for the same slot have an -+ * ordering relation -+ * @kbdev: kbase device -+ * @katom_a: the first atom -+ * @katom_b: the second atom. -+ * @order_flags: combination of KBASE_ATOM_ORDERING_FLAG_<...> for the ordering -+ * relation -+ * -+ * This is for making consistent decisions about the ordering of atoms when we -+ * need to do pre-emption on a slot, which includes stopping existing atoms -+ * when a new atom is ready to run, and also which other atoms to remove from -+ * the slot when the atom in JSn_HEAD is being pre-empted. -+ * -+ * This only handles @katom_a and @katom_b being for the same job slot, as -+ * pre-emption only operates within a slot. -+ * -+ * Note: there is currently no use-case for this as a sorting comparison -+ * functions, hence only a boolean returned instead of int -1, 0, +1 return. If -+ * required in future, a modification to do so would be better than calling -+ * twice with katom_a and katom_b swapped. ++ * kbasep_hwcnt_virtualizer_client_dump_rate_limited - Perform a dump of the ++ * client's currently enabled counters ++ * if it hasn't been rate limited, ++ * otherwise return the client's most ++ * recent accumulation. ++ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. ++ * @hvcli: Non-NULL pointer to the virtualizer client. ++ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will ++ * be written out to on success. ++ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will ++ * be written out to on success. ++ * @dump_buf: Pointer to the buffer where the dump will be written out to on ++ * success. If non-NULL, must have the same metadata as the ++ * accumulator. If NULL, the dump will be discarded. + * -+ * Return: -+ * true if @katom_a should run before @katom_b, false otherwise. -+ * A false return value does not distinguish between "no ordering relation" and -+ * "@katom_a should run after @katom_b". ++ * Return: 0 on success or error code. + */ -+bool kbase_js_atom_runs_before(struct kbase_device *kbdev, -+ const struct kbase_jd_atom *katom_a, -+ const struct kbase_jd_atom *katom_b, -+ const kbase_atom_ordering_flag_t order_flags); ++static int kbasep_hwcnt_virtualizer_client_dump_rate_limited( ++ struct kbase_hwcnt_virtualizer *hvirt, struct kbase_hwcnt_virtualizer_client *hvcli, ++ u64 *ts_start_ns, u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf) ++{ ++ bool rate_limited = true; + -+#endif /* _KBASE_JM_JS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h ++ WARN_ON(!hvirt); ++ WARN_ON(!hvcli); ++ WARN_ON(!ts_start_ns); ++ WARN_ON(!ts_end_ns); ++ WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); ++ lockdep_assert_held(&hvirt->lock); ++ ++ if (hvirt->dump_threshold_ns == 0) { ++ /* Threshold == 0, so rate limiting disabled */ ++ rate_limited = false; ++ } else if (hvirt->ts_last_dump_ns == hvcli->ts_start_ns) { ++ /* Last dump was performed by this client, and dumps from an ++ * individual client are never rate limited ++ */ ++ rate_limited = false; ++ } else { ++ const u64 ts_ns = kbase_hwcnt_accumulator_timestamp_ns(hvirt->accum); ++ const u64 time_since_last_dump_ns = ts_ns - hvirt->ts_last_dump_ns; ++ ++ /* Dump period equals or exceeds the threshold */ ++ if (time_since_last_dump_ns >= hvirt->dump_threshold_ns) ++ rate_limited = false; ++ } ++ ++ if (!rate_limited) ++ return kbasep_hwcnt_virtualizer_client_dump(hvirt, hvcli, ts_start_ns, ts_end_ns, ++ dump_buf); ++ ++ /* If we've gotten this far, the client must have something accumulated ++ * otherwise it is a logic error ++ */ ++ WARN_ON(!hvcli->has_accum); ++ ++ if (dump_buf) ++ kbase_hwcnt_dump_buffer_copy(dump_buf, &hvcli->accum_buf, &hvcli->enable_map); ++ hvcli->has_accum = false; ++ ++ *ts_start_ns = hvcli->ts_start_ns; ++ *ts_end_ns = hvirt->ts_last_dump_ns; ++ hvcli->ts_start_ns = hvirt->ts_last_dump_ns; ++ ++ return 0; ++} ++ ++int kbase_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer_client *hvcli, ++ u64 *ts_start_ns, u64 *ts_end_ns, ++ struct kbase_hwcnt_dump_buffer *dump_buf) ++{ ++ int errcode; ++ struct kbase_hwcnt_virtualizer *hvirt; ++ ++ if (!hvcli || !ts_start_ns || !ts_end_ns) ++ return -EINVAL; ++ ++ hvirt = hvcli->hvirt; ++ ++ if (dump_buf && (dump_buf->metadata != hvirt->metadata)) ++ return -EINVAL; ++ ++ mutex_lock(&hvirt->lock); ++ ++ if ((hvirt->client_count == 1) && (!hvcli->has_accum)) { ++ /* ++ * If there's only one client with no prior accumulation, we can ++ * completely skip the virtualize and just pass through the call ++ * to the accumulator, saving a fair few copies and ++ * accumulations. ++ */ ++ errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, ts_start_ns, ts_end_ns, ++ dump_buf); ++ ++ if (!errcode) { ++ /* Fix up the timestamps */ ++ *ts_start_ns = hvcli->ts_start_ns; ++ hvcli->ts_start_ns = *ts_end_ns; ++ ++ /* Store the most recent dump time for rate limiting */ ++ hvirt->ts_last_dump_ns = *ts_end_ns; ++ } ++ } else { ++ /* Otherwise, do the full virtualize */ ++ errcode = kbasep_hwcnt_virtualizer_client_dump_rate_limited( ++ hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf); ++ } ++ ++ mutex_unlock(&hvirt->lock); ++ ++ return errcode; ++} ++ ++int kbase_hwcnt_virtualizer_client_create(struct kbase_hwcnt_virtualizer *hvirt, ++ const struct kbase_hwcnt_enable_map *enable_map, ++ struct kbase_hwcnt_virtualizer_client **out_hvcli) ++{ ++ int errcode; ++ struct kbase_hwcnt_virtualizer_client *hvcli; ++ ++ if (!hvirt || !enable_map || !out_hvcli || (enable_map->metadata != hvirt->metadata)) ++ return -EINVAL; ++ ++ errcode = kbasep_hwcnt_virtualizer_client_alloc(hvirt->metadata, &hvcli); ++ if (errcode) ++ return errcode; ++ ++ mutex_lock(&hvirt->lock); ++ ++ errcode = kbasep_hwcnt_virtualizer_client_add(hvirt, hvcli, enable_map); ++ ++ mutex_unlock(&hvirt->lock); ++ ++ if (errcode) { ++ kbasep_hwcnt_virtualizer_client_free(hvcli); ++ return errcode; ++ } ++ ++ *out_hvcli = hvcli; ++ return 0; ++} ++ ++void kbase_hwcnt_virtualizer_client_destroy(struct kbase_hwcnt_virtualizer_client *hvcli) ++{ ++ if (!hvcli) ++ return; ++ ++ mutex_lock(&hvcli->hvirt->lock); ++ ++ kbasep_hwcnt_virtualizer_client_remove(hvcli->hvirt, hvcli); ++ ++ mutex_unlock(&hvcli->hvirt->lock); ++ ++ kbasep_hwcnt_virtualizer_client_free(hvcli); ++} ++ ++int kbase_hwcnt_virtualizer_init(struct kbase_hwcnt_context *hctx, u64 dump_threshold_ns, ++ struct kbase_hwcnt_virtualizer **out_hvirt) ++{ ++ struct kbase_hwcnt_virtualizer *virt; ++ const struct kbase_hwcnt_metadata *metadata; ++ ++ if (!hctx || !out_hvirt) ++ return -EINVAL; ++ ++ metadata = kbase_hwcnt_context_metadata(hctx); ++ if (!metadata) ++ return -EINVAL; ++ ++ virt = kzalloc(sizeof(*virt), GFP_KERNEL); ++ if (!virt) ++ return -ENOMEM; ++ ++ virt->hctx = hctx; ++ virt->dump_threshold_ns = dump_threshold_ns; ++ virt->metadata = metadata; ++ ++ mutex_init(&virt->lock); ++ INIT_LIST_HEAD(&virt->clients); ++ ++ *out_hvirt = virt; ++ return 0; ++} ++ ++void kbase_hwcnt_virtualizer_term(struct kbase_hwcnt_virtualizer *hvirt) ++{ ++ if (!hvirt) ++ return; ++ ++ /* Non-zero client count implies client leak */ ++ if (WARN_ON(hvirt->client_count != 0)) { ++ struct kbase_hwcnt_virtualizer_client *pos, *n; ++ ++ list_for_each_entry_safe (pos, n, &hvirt->clients, node) ++ kbase_hwcnt_virtualizer_client_destroy(pos); ++ } ++ ++ WARN_ON(hvirt->client_count != 0); ++ WARN_ON(hvirt->accum); ++ ++ kfree(hvirt); ++} ++ ++bool kbase_hwcnt_virtualizer_queue_work(struct kbase_hwcnt_virtualizer *hvirt, ++ struct work_struct *work) ++{ ++ if (WARN_ON(!hvirt) || WARN_ON(!work)) ++ return false; ++ ++ return kbase_hwcnt_context_queue_work(hvirt->hctx, work); ++} +diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.h new file mode 100644 -index 000000000..465cf7ec0 +index 000000000..485ba7496 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h -@@ -0,0 +1,447 @@ ++++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.h +@@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -199625,442 +202312,241 @@ index 000000000..465cf7ec0 + * + */ + -+/** -+ * DOC: Job Scheduler Type Definitions ++/* ++ * Hardware counter virtualizer API. ++ * ++ * Virtualizes a hardware counter context, so multiple clients can access ++ * a single hardware counter resource as though each was the exclusive user. + */ + -+#ifndef _KBASE_JS_DEFS_H_ -+#define _KBASE_JS_DEFS_H_ ++#ifndef _KBASE_HWCNT_VIRTUALIZER_H_ ++#define _KBASE_HWCNT_VIRTUALIZER_H_ + -+/* Forward decls */ -+struct kbase_device; -+struct kbase_jd_atom; ++#include ++#include + ++struct kbase_hwcnt_context; ++struct kbase_hwcnt_virtualizer; ++struct kbase_hwcnt_virtualizer_client; ++struct kbase_hwcnt_enable_map; ++struct kbase_hwcnt_dump_buffer; + -+typedef u32 kbase_context_flags; ++/** ++ * kbase_hwcnt_virtualizer_init - Initialise a hardware counter virtualizer. ++ * @hctx: Non-NULL pointer to the hardware counter context to ++ * virtualize. ++ * @dump_threshold_ns: Minimum threshold period for dumps between different ++ * clients where a new accumulator dump will not be ++ * performed, and instead accumulated values will be used. ++ * If 0, rate limiting will be disabled. ++ * @out_hvirt: Non-NULL pointer to where the pointer to the created ++ * virtualizer will be stored on success. ++ * ++ * Return: 0 on success, else error code. ++ */ ++int kbase_hwcnt_virtualizer_init(struct kbase_hwcnt_context *hctx, u64 dump_threshold_ns, ++ struct kbase_hwcnt_virtualizer **out_hvirt); + -+/* -+ * typedef kbasep_js_ctx_job_cb - Callback function run on all of a context's -+ * jobs registered with the Job Scheduler ++/** ++ * kbase_hwcnt_virtualizer_term - Terminate a hardware counter virtualizer. ++ * @hvirt: Pointer to virtualizer to be terminated. + */ -+typedef void kbasep_js_ctx_job_cb(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom); ++void kbase_hwcnt_virtualizer_term(struct kbase_hwcnt_virtualizer *hvirt); + -+/* -+ * @brief Maximum number of jobs that can be submitted to a job slot whilst -+ * inside the IRQ handler. ++/** ++ * kbase_hwcnt_virtualizer_metadata - Get the hardware counter metadata used by ++ * the virtualizer, so related counter data ++ * structures can be created. ++ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * -+ * This is important because GPU NULL jobs can complete whilst the IRQ handler -+ * is running. Otherwise, it potentially allows an unlimited number of GPU NULL -+ * jobs to be submitted inside the IRQ handler, which increases IRQ latency. ++ * Return: Non-NULL pointer to metadata, or NULL on error. + */ -+#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2 ++const struct kbase_hwcnt_metadata * ++kbase_hwcnt_virtualizer_metadata(struct kbase_hwcnt_virtualizer *hvirt); + +/** -+ * enum kbasep_js_ctx_attr - Context attributes -+ * @KBASEP_JS_CTX_ATTR_COMPUTE: Attribute indicating a context that contains -+ * Compute jobs. -+ * @KBASEP_JS_CTX_ATTR_NON_COMPUTE: Attribute indicating a context that contains -+ * Non-Compute jobs. -+ * @KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: Attribute indicating that a context -+ * contains compute-job atoms that aren't -+ * restricted to a coherent group, -+ * and can run on all cores. -+ * @KBASEP_JS_CTX_ATTR_COUNT: Must be the last in the enum -+ * -+ * Each context attribute can be thought of as a boolean value that caches some -+ * state information about either the runpool, or the context: -+ * - In the case of the runpool, it is a cache of "Do any contexts owned by -+ * the runpool have attribute X?" -+ * - In the case of a context, it is a cache of "Do any atoms owned by the -+ * context have attribute X?" -+ * -+ * The boolean value of the context attributes often affect scheduling -+ * decisions, such as affinities to use and job slots to use. -+ * -+ * To accomodate changes of state in the context, each attribute is refcounted -+ * in the context, and in the runpool for all running contexts. Specifically: -+ * - The runpool holds a refcount of how many contexts in the runpool have this -+ * attribute. -+ * - The context holds a refcount of how many atoms have this attribute. ++ * kbase_hwcnt_virtualizer_client_create - Create a new virtualizer client. ++ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. ++ * @enable_map: Non-NULL pointer to the enable map for the client. Must have the ++ * same metadata as the virtualizer. ++ * @out_hvcli: Non-NULL pointer to where the pointer to the created client will ++ * be stored on success. + * -+ * KBASEP_JS_CTX_ATTR_COMPUTE: -+ * Attribute indicating a context that contains Compute jobs. That is, -+ * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE ++ * Return: 0 on success, else error code. ++ */ ++int kbase_hwcnt_virtualizer_client_create(struct kbase_hwcnt_virtualizer *hvirt, ++ const struct kbase_hwcnt_enable_map *enable_map, ++ struct kbase_hwcnt_virtualizer_client **out_hvcli); ++ ++/** ++ * kbase_hwcnt_virtualizer_client_destroy() - Destroy a virtualizer client. ++ * @hvcli: Pointer to the hardware counter client. ++ */ ++void kbase_hwcnt_virtualizer_client_destroy(struct kbase_hwcnt_virtualizer_client *hvcli); ++ ++/** ++ * kbase_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's ++ * currently enabled counters, and ++ * enable a new set of counters ++ * that will be used for ++ * subsequent dumps. ++ * @hvcli: Non-NULL pointer to the virtualizer client. ++ * @enable_map: Non-NULL pointer to the new counter enable map for the client. ++ * Must have the same metadata as the virtualizer. ++ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will ++ * be written out to on success. ++ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will ++ * be written out to on success. ++ * @dump_buf: Pointer to the buffer where the dump will be written out to on ++ * success. If non-NULL, must have the same metadata as the ++ * accumulator. If NULL, the dump will be discarded. + * -+ * @note A context can be both 'Compute' and 'Non Compute' if it contains -+ * both types of jobs. ++ * Return: 0 on success or error code. ++ */ ++int kbase_hwcnt_virtualizer_client_set_counters(struct kbase_hwcnt_virtualizer_client *hvcli, ++ const struct kbase_hwcnt_enable_map *enable_map, ++ u64 *ts_start_ns, u64 *ts_end_ns, ++ struct kbase_hwcnt_dump_buffer *dump_buf); ++ ++/** ++ * kbase_hwcnt_virtualizer_client_dump - Perform a dump of the client's ++ * currently enabled counters. ++ * @hvcli: Non-NULL pointer to the virtualizer client. ++ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will ++ * be written out to on success. ++ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will ++ * be written out to on success. ++ * @dump_buf: Pointer to the buffer where the dump will be written out to on ++ * success. If non-NULL, must have the same metadata as the ++ * accumulator. If NULL, the dump will be discarded. + * -+ * KBASEP_JS_CTX_ATTR_NON_COMPUTE: -+ * Attribute indicating a context that contains Non-Compute jobs. That is, -+ * the context has some jobs that are \b not of type @ref -+ * BASE_JD_REQ_ONLY_COMPUTE. ++ * Return: 0 on success or error code. ++ */ ++int kbase_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer_client *hvcli, ++ u64 *ts_start_ns, u64 *ts_end_ns, ++ struct kbase_hwcnt_dump_buffer *dump_buf); ++ ++/** ++ * kbase_hwcnt_virtualizer_queue_work() - Queue hardware counter related async ++ * work on a workqueue specialized for ++ * hardware counters. ++ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. ++ * @work: Non-NULL pointer to work to queue. + * -+ * @note A context can be both 'Compute' and 'Non Compute' if it contains -+ * both types of jobs. ++ * Return: false if work was already on a queue, true otherwise. + * -+ * KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: -+ * Attribute indicating that a context contains compute-job atoms that -+ * aren't restricted to a coherent group, and can run on all cores. ++ * This is a convenience function that directly calls the underlying ++ * kbase_hwcnt_context's kbase_hwcnt_context_queue_work. ++ */ ++bool kbase_hwcnt_virtualizer_queue_work(struct kbase_hwcnt_virtualizer *hvirt, ++ struct work_struct *work); ++ ++#endif /* _KBASE_HWCNT_VIRTUALIZER_H_ */ +diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if.h +new file mode 100644 +index 000000000..501c0087b +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if.h +@@ -0,0 +1,89 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * Specifically, this is when the atom's \a core_req satisfy: -+ * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2 -+ * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups ++ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * -+ * Such atoms could be blocked from running if one of the coherent groups -+ * is being used by another job slot, so tracking this context attribute -+ * allows us to prevent such situations. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * @note This doesn't take into account the 1-coregroup case, where all -+ * compute atoms would effectively be able to run on 'all cores', but -+ * contexts will still not always get marked with this attribute. Instead, -+ * it is the caller's responsibility to take into account the number of -+ * coregroups when interpreting this attribute. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + * -+ * @note Whilst Tiler atoms are normally combined with -+ * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without -+ * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy -+ * enough to handle anyway. ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ -+enum kbasep_js_ctx_attr { -+ KBASEP_JS_CTX_ATTR_COMPUTE, -+ KBASEP_JS_CTX_ATTR_NON_COMPUTE, -+ KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, -+ KBASEP_JS_CTX_ATTR_COUNT -+}; -+ -+enum { -+ /* -+ * Bit indicating that new atom should be started because this atom -+ * completed -+ */ -+ KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0), -+ /* -+ * Bit indicating that the atom was evicted from the JS_NEXT registers -+ */ -+ KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1) -+}; -+ -+/** -+ * typedef kbasep_js_atom_done_code - Combination of KBASE_JS_ATOM_DONE_<...> -+ * bits -+ */ -+typedef u32 kbasep_js_atom_done_code; + +/* -+ * Context scheduling mode defines for kbase_device::js_ctx_scheduling_mode ++ * Virtual interface for hardware counter watchdog. + */ -+enum { -+ /* -+ * In this mode, higher priority atoms will be scheduled first, -+ * regardless of the context they belong to. Newly-runnable higher -+ * priority atoms can preempt lower priority atoms currently running on -+ * the GPU, even if they belong to a different context. -+ */ -+ KBASE_JS_SYSTEM_PRIORITY_MODE = 0, + -+ /* -+ * In this mode, the highest-priority atom will be chosen from each -+ * context in turn using a round-robin algorithm, so priority only has -+ * an effect within the context an atom belongs to. Newly-runnable -+ * higher priority atoms can preempt the lower priority atoms currently -+ * running on the GPU, but only if they belong to the same context. -+ */ -+ KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE, ++#ifndef _KBASE_HWCNT_WATCHDOG_IF_H_ ++#define _KBASE_HWCNT_WATCHDOG_IF_H_ + -+ /* Must be the last in the enum */ -+ KBASE_JS_PRIORITY_MODE_COUNT, -+}; ++#include + +/* -+ * Internal atom priority defines for kbase_jd_atom::sched_prio -+ */ -+enum { -+ KBASE_JS_ATOM_SCHED_PRIO_FIRST = 0, -+ KBASE_JS_ATOM_SCHED_PRIO_REALTIME = KBASE_JS_ATOM_SCHED_PRIO_FIRST, -+ KBASE_JS_ATOM_SCHED_PRIO_HIGH, -+ KBASE_JS_ATOM_SCHED_PRIO_MED, -+ KBASE_JS_ATOM_SCHED_PRIO_LOW, -+ KBASE_JS_ATOM_SCHED_PRIO_COUNT, -+}; -+ -+/* Invalid priority for kbase_jd_atom::sched_prio */ -+#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1 -+ -+/* Default priority in the case of contexts with no atoms, or being lenient -+ * about invalid priorities from userspace. -+ */ -+#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED -+ -+/* Atom priority bitmaps, where bit 0 is the highest priority, and higher bits -+ * indicate successively lower KBASE_JS_ATOM_SCHED_PRIO_<...> levels. -+ * -+ * Must be strictly larger than the number of bits to represent a bitmap of -+ * priorities, so that we can do calculations such as: -+ * (1 << KBASE_JS_ATOM_SCHED_PRIO_COUNT) - 1 -+ * ...without causing undefined behavior due to a shift beyond the width of the -+ * type -+ * -+ * If KBASE_JS_ATOM_SCHED_PRIO_COUNT starts requiring 32 bits, then it's worth -+ * moving to DECLARE_BITMAP() ++ * Opaque structure of information used to create a watchdog timer interface. + */ -+typedef u8 kbase_js_prio_bitmap_t; -+ -+/* Ordering modification for kbase_js_atom_runs_before() */ -+typedef u32 kbase_atom_ordering_flag_t; ++struct kbase_hwcnt_watchdog_info; + -+/* Atoms of the same context and priority should have their ordering decided by -+ * their seq_nr instead of their age. ++/** ++ * typedef kbase_hwcnt_watchdog_callback_fn - Callback function when watchdog timer is done + * -+ * seq_nr is used as a more slowly changing variant of age - it increases once -+ * per group of related atoms, as determined by user-space. Hence, it can be -+ * used to limit re-ordering decisions (such as pre-emption) to only re-order -+ * between such groups, rather than re-order within those groups of atoms. ++ * @user_data: Pointer to the callback user data. + */ -+#define KBASE_ATOM_ORDERING_FLAG_SEQNR (((kbase_atom_ordering_flag_t)1) << 0) ++typedef void kbase_hwcnt_watchdog_callback_fn(void *user_data); + +/** -+ * struct kbasep_js_device_data - KBase Device Data Job Scheduler sub-structure -+ * @runpool_irq: Sub-structure to collect together Job Scheduling data used in -+ * IRQ context. The hwaccess_lock must be held when accessing. -+ * @runpool_irq.submit_allowed: Bitvector indicating whether a currently -+ * scheduled context is allowed to submit jobs. -+ * When bit 'N' is set in this, it indicates whether -+ * the context bound to address space 'N' is -+ * allowed to submit jobs. -+ * @runpool_irq.ctx_attr_ref_count: Array of Context Attributes Ref_counters: -+ * Each is large enough to hold a refcount of the number of contexts -+ * that can fit into the runpool. This is currently BASE_MAX_NR_AS. -+ * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store -+ * the refcount. Hence, it's not worthwhile reducing this to -+ * bit-manipulation on u32s to save space (where in contrast, 4 bit -+ * sub-fields would be easy to do and would save space). -+ * Whilst this must not become negative, the sign bit is used for: -+ * - error detection in debug builds -+ * - Optimization: it is undefined for a signed int to overflow, and so -+ * the compiler can optimize for that never happening (thus, no masking -+ * is required on updating the variable) -+ * @runpool_irq.slot_affinities: Affinity management and tracking. Bitvector -+ * to aid affinity checking. -+ * Element 'n' bit 'i' indicates that slot 'n' -+ * is using core i (i.e. slot_affinity_refcount[n][i] > 0) -+ * @runpool_irq.slot_affinity_refcount: Array of fefcount for each core owned -+ * by each slot. Used to generate the slot_affinities array of bitvectors. -+ * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS, -+ * because it is refcounted only when a job is definitely about to be -+ * submitted to a slot, and is de-refcounted immediately after a job -+ * finishes -+ * @schedule_sem: Scheduling semaphore. This must be held when calling -+ * kbase_jm_kick() -+ * @ctx_list_pullable: List of contexts that can currently be pulled from -+ * @ctx_list_unpullable: List of contexts that can not currently be pulled -+ * from, but have jobs currently running. -+ * @nr_user_contexts_running: Number of currently scheduled user contexts -+ * (excluding ones that are not submitting jobs) -+ * @nr_all_contexts_running: Number of currently scheduled contexts (including -+ * ones that are not submitting jobs) -+ * @js_reqs: Core Requirements to match up with base_js_atom's core_req memeber -+ * @note This is a write-once member, and so no locking is required to -+ * read -+ * @scheduling_period_ns: Value for JS_SCHEDULING_PERIOD_NS -+ * @soft_stop_ticks: Value for JS_SOFT_STOP_TICKS -+ * @soft_stop_ticks_cl: Value for JS_SOFT_STOP_TICKS_CL -+ * @hard_stop_ticks_ss: Value for JS_HARD_STOP_TICKS_SS -+ * @hard_stop_ticks_cl: Value for JS_HARD_STOP_TICKS_CL -+ * @hard_stop_ticks_dumping: Value for JS_HARD_STOP_TICKS_DUMPING -+ * @gpu_reset_ticks_ss: Value for JS_RESET_TICKS_SS -+ * @gpu_reset_ticks_cl: Value for JS_RESET_TICKS_CL -+ * @gpu_reset_ticks_dumping: Value for JS_RESET_TICKS_DUMPING -+ * @ctx_timeslice_ns: Value for JS_CTX_TIMESLICE_NS -+ * @suspended_soft_jobs_list: List of suspended soft jobs -+ * @softstop_always: Support soft-stop on a single context -+ * @init_status:The initialized-flag is placed at the end, to avoid -+ * cache-pollution (we should only be using this during init/term paths). -+ * @note This is a write-once member, and so no locking is required to -+ * read -+ * @nr_contexts_pullable:Number of contexts that can currently be pulled from -+ * @nr_contexts_runnable:Number of contexts that can either be pulled from or -+ * arecurrently running -+ * @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT -+ * @js_free_wait_time_ms: Maximum waiting time in ms for a Job Slot to be seen free. -+ * @queue_mutex: Queue Lock, used to access the Policy's queue of contexts -+ * independently of the Run Pool. -+ * Of course, you don't need the Run Pool lock to access this. -+ * @runpool_mutex: Run Pool mutex, for managing contexts within the runpool. ++ * typedef kbase_hwcnt_watchdog_enable_fn - Enable watchdog timer + * -+ * This encapsulates the current context of the Job Scheduler on a particular -+ * device. This context is global to the device, and is not tied to any -+ * particular struct kbase_context running on the device. ++ * @timer: Non-NULL pointer to a watchdog timer interface context ++ * @period_ms: Period in milliseconds of the watchdog timer ++ * @callback: Non-NULL pointer to a watchdog callback function ++ * @user_data: Pointer to the user data, used when watchdog timer callback is called + * -+ * nr_contexts_running and as_free are optimized for packing together (by making -+ * them smaller types than u32). The operations on them should rarely involve -+ * masking. The use of signed types for arithmetic indicates to the compiler -+ * that the value will not rollover (which would be undefined behavior), and so -+ * under the Total License model, it is free to make optimizations based on -+ * that (i.e. to remove masking). ++ * Return: 0 if the watchdog timer enabled successfully, error code otherwise. + */ -+struct kbasep_js_device_data { -+ struct runpool_irq { -+ u16 submit_allowed; -+ s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; -+ u64 slot_affinities[BASE_JM_MAX_NR_SLOTS]; -+ s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64]; -+ } runpool_irq; -+ struct semaphore schedule_sem; -+ struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS] -+ [KBASE_JS_ATOM_SCHED_PRIO_COUNT]; -+ struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS] -+ [KBASE_JS_ATOM_SCHED_PRIO_COUNT]; -+ s8 nr_user_contexts_running; -+ s8 nr_all_contexts_running; -+ base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS]; -+ -+ u32 scheduling_period_ns; -+ u32 soft_stop_ticks; -+ u32 soft_stop_ticks_cl; -+ u32 hard_stop_ticks_ss; -+ u32 hard_stop_ticks_cl; -+ u32 hard_stop_ticks_dumping; -+ u32 gpu_reset_ticks_ss; -+ u32 gpu_reset_ticks_cl; -+ u32 gpu_reset_ticks_dumping; -+ u32 ctx_timeslice_ns; -+ -+ struct list_head suspended_soft_jobs_list; -+ -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ bool softstop_always; -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ -+ int init_status; -+ u32 nr_contexts_pullable; -+ atomic_t nr_contexts_runnable; -+ atomic_t soft_job_timeout_ms; -+ u32 js_free_wait_time_ms; -+ -+ struct mutex queue_mutex; -+ /* -+ * Run Pool mutex, for managing contexts within the runpool. -+ * Unless otherwise specified, you must hold this lock whilst accessing -+ * any members that follow -+ * -+ * In addition, this is used to access: -+ * * the kbasep_js_kctx_info::runpool substructure -+ */ -+ struct mutex runpool_mutex; -+}; ++typedef int kbase_hwcnt_watchdog_enable_fn(const struct kbase_hwcnt_watchdog_info *timer, ++ u32 period_ms, ++ kbase_hwcnt_watchdog_callback_fn *callback, ++ void *user_data); + +/** -+ * struct kbasep_js_kctx_info - KBase Context Job Scheduling information -+ * structure -+ * @ctx: Job Scheduler Context information sub-structure.Its members are -+ * accessed regardless of whether the context is: -+ * - In the Policy's Run Pool -+ * - In the Policy's Queue -+ * - Not queued nor in the Run Pool. -+ * You must obtain the @ctx.jsctx_mutex before accessing any other members -+ * of this substructure. -+ * You may not access any of its members from IRQ context. -+ * @ctx.jsctx_mutex: Job Scheduler Context lock -+ * @ctx.nr_jobs: Number of jobs ready to run - does \em not include -+ * the jobs waiting in the dispatcher, and dependency-only -+ * jobs. See kbase_jd_context::job_nr for such jobs -+ * @ctx.ctx_attr_ref_count: Context Attributes ref count. Each is large enough -+ * to hold a refcount of the number of atoms on the context. -+ * @ctx.is_scheduled_wait: Wait queue to wait for KCTX_SHEDULED flag state -+ * changes. -+ * @ctx.ctx_list_entry: Link implementing JS queues. Context can be present on -+ * one list per job slot. -+ * @init_status: The initalized-flag is placed at the end, to avoid -+ * cache-pollution (we should only be using this during init/term paths) ++ * typedef kbase_hwcnt_watchdog_disable_fn - Disable watchdog timer + * -+ * This is a substructure in the struct kbase_context that encapsulates all the -+ * scheduling information. ++ * @timer: Non-NULL pointer to a watchdog timer interface context + */ -+struct kbasep_js_kctx_info { -+ struct kbase_jsctx { -+ struct mutex jsctx_mutex; -+ -+ u32 nr_jobs; -+ u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; -+ wait_queue_head_t is_scheduled_wait; -+ struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS]; -+ } ctx; -+ int init_status; -+}; ++typedef void kbase_hwcnt_watchdog_disable_fn(const struct kbase_hwcnt_watchdog_info *timer); + +/** -+ * struct kbasep_js_atom_retained_state - Subset of atom state. -+ * @event_code: to determine whether the atom has finished -+ * @core_req: core requirements -+ * @sched_priority: priority -+ * @device_nr: Core group atom was executed on ++ * typedef kbase_hwcnt_watchdog_modify_fn - Modify watchdog timer's timeout + * -+ * Subset of atom state that can be available after kbase_jd_done_nolock() is called -+ * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(), -+ * because the original atom could disappear. -+ */ -+struct kbasep_js_atom_retained_state { -+ /* Event code - to determine whether the atom has finished */ -+ enum base_jd_event_code event_code; -+ /* core requirements */ -+ base_jd_core_req core_req; -+ /* priority */ -+ int sched_priority; -+ /* Core group atom was executed on */ -+ u32 device_nr; -+ -+}; -+ -+/* -+ * Value signifying 'no retry on a slot required' for: -+ * - kbase_js_atom_retained_state::retry_submit_on_slot -+ * - kbase_jd_atom::retry_submit_on_slot -+ */ -+#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1) -+ -+/* -+ * base_jd_core_req value signifying 'invalid' for a -+ * kbase_jd_atom_retained_state. See kbase_atom_retained_state_is_valid() -+ */ -+#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP -+ -+/* -+ * The JS timer resolution, in microseconds -+ * Any non-zero difference in time will be at least this size. ++ * @timer: Non-NULL pointer to a watchdog timer interface context ++ * @delay_ms: Watchdog timer expiration in milliseconds + */ -+#define KBASEP_JS_TICK_RESOLUTION_US 1 ++typedef void kbase_hwcnt_watchdog_modify_fn(const struct kbase_hwcnt_watchdog_info *timer, ++ u32 delay_ms); + +/** -+ * struct kbase_jsctx_slot_tracking - Job Scheduling tracking of a context's -+ * use of a job slot -+ * @blocked: bitmap of priorities that this slot is blocked at -+ * @atoms_pulled: counts of atoms that have been pulled from this slot, -+ * across all priority levels -+ * @atoms_pulled_pri: counts of atoms that have been pulled from this slot, per -+ * priority level ++ * struct kbase_hwcnt_watchdog_interface - Hardware counter watchdog virtual interface. + * -+ * Controls how a slot from the &struct kbase_context's jsctx_queue is managed, -+ * for example to ensure correct ordering of atoms when atoms of different -+ * priorities are unpulled. ++ * @timer: Immutable watchdog timer info ++ * @enable: Function ptr to enable watchdog ++ * @disable: Function ptr to disable watchdog ++ * @modify: Function ptr to modify watchdog + */ -+struct kbase_jsctx_slot_tracking { -+ kbase_js_prio_bitmap_t blocked; -+ atomic_t atoms_pulled; -+ int atoms_pulled_pri[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; ++struct kbase_hwcnt_watchdog_interface { ++ const struct kbase_hwcnt_watchdog_info *timer; ++ kbase_hwcnt_watchdog_enable_fn *enable; ++ kbase_hwcnt_watchdog_disable_fn *disable; ++ kbase_hwcnt_watchdog_modify_fn *modify; +}; + -+#endif /* _KBASE_JS_DEFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h ++#endif /* _KBASE_HWCNT_WATCHDOG_IF_H_ */ +diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c new file mode 100644 -index 000000000..11aedef80 +index 000000000..4caa832cd --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h -@@ -0,0 +1,185 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c +@@ -0,0 +1,157 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -200078,180 +202564,152 @@ index 000000000..11aedef80 + * + */ + -+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, -+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py -+ * For more information see base/tools/hwconfig_generator/README -+ */ ++#include "mali_kbase.h" ++#include "hwcnt/mali_kbase_hwcnt_watchdog_if.h" ++#include "hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h" + -+#ifndef _BASE_HWCONFIG_FEATURES_H_ -+#define _BASE_HWCONFIG_FEATURES_H_ ++#include ++#include + -+enum base_hw_feature { -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_TLS_HASHING, -+ BASE_HW_FEATURE_THREAD_GROUP_SPLIT, -+ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, -+ BASE_HW_FEATURE_IDVS_GROUP_SIZE, -+ BASE_HW_FEATURE_L2_CONFIG, -+ BASE_HW_FEATURE_ASN_HASH, -+ BASE_HW_FEATURE_GPU_SLEEP, -+ BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, -+ BASE_HW_FEATURE_CORE_FEATURES, -+ BASE_HW_FEATURE_PBHA_HWU, -+ BASE_HW_FEATURE_LARGE_PAGE_ALLOC, -+ BASE_HW_FEATURE_END ++/** ++ * struct kbase_hwcnt_watchdog_if_timer_info - Timer information for watchdog ++ * interface. ++ * ++ * @workq: Single threaded work queue in which to execute callbacks. ++ * @dwork: Worker to execute callback function. ++ * @timer_enabled: True if watchdog timer enabled, otherwise false ++ * @callback: Watchdog callback function ++ * @user_data: Pointer to user data passed as argument to the callback ++ * function ++ */ ++struct kbase_hwcnt_watchdog_if_timer_info { ++ struct workqueue_struct *workq; ++ struct delayed_work dwork; ++ bool timer_enabled; ++ kbase_hwcnt_watchdog_callback_fn *callback; ++ void *user_data; +}; + -+__attribute__((unused)) static const enum base_hw_feature base_hw_features_generic[] = { -+ BASE_HW_FEATURE_END -+}; ++/** ++ * kbasep_hwcnt_watchdog_callback() - Watchdog callback ++ * ++ * @work: Work structure ++ * ++ * Function to be called in a work queue after watchdog timer has expired. ++ */ ++static void kbasep_hwcnt_watchdog_callback(struct work_struct *const work) ++{ ++ struct kbase_hwcnt_watchdog_if_timer_info *const info = ++ container_of(work, struct kbase_hwcnt_watchdog_if_timer_info, dwork.work); + -+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tMIx[] = { -+ BASE_HW_FEATURE_THREAD_GROUP_SPLIT, -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_END -+}; ++ if (info->callback) ++ info->callback(info->user_data); ++} + -+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tHEx[] = { -+ BASE_HW_FEATURE_THREAD_GROUP_SPLIT, -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_END -+}; ++static int kbasep_hwcnt_watchdog_if_timer_enable( ++ const struct kbase_hwcnt_watchdog_info *const timer, u32 const period_ms, ++ kbase_hwcnt_watchdog_callback_fn *const callback, void *const user_data) ++{ ++ struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer; + -+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tSIx[] = { -+ BASE_HW_FEATURE_THREAD_GROUP_SPLIT, -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_END -+}; ++ if (WARN_ON(!timer) || WARN_ON(!callback) || WARN_ON(timer_info->timer_enabled)) ++ return -EINVAL; + -+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tDVx[] = { -+ BASE_HW_FEATURE_THREAD_GROUP_SPLIT, -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_END -+}; ++ timer_info->callback = callback; ++ timer_info->user_data = user_data; + -+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tNOx[] = { -+ BASE_HW_FEATURE_THREAD_GROUP_SPLIT, -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_TLS_HASHING, -+ BASE_HW_FEATURE_IDVS_GROUP_SIZE, -+ BASE_HW_FEATURE_END -+}; ++ queue_delayed_work(timer_info->workq, &timer_info->dwork, msecs_to_jiffies(period_ms)); ++ timer_info->timer_enabled = true; + -+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tGOx[] = { -+ BASE_HW_FEATURE_THREAD_GROUP_SPLIT, -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_TLS_HASHING, -+ BASE_HW_FEATURE_IDVS_GROUP_SIZE, -+ BASE_HW_FEATURE_CORE_FEATURES, -+ BASE_HW_FEATURE_END -+}; ++ return 0; ++} + -+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTRx[] = { -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_IDVS_GROUP_SIZE, -+ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, -+ BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, -+ BASE_HW_FEATURE_END -+}; ++static void ++kbasep_hwcnt_watchdog_if_timer_disable(const struct kbase_hwcnt_watchdog_info *const timer) ++{ ++ struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer; + -+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tNAx[] = { -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_IDVS_GROUP_SIZE, -+ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, -+ BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, -+ BASE_HW_FEATURE_END -+}; ++ if (WARN_ON(!timer)) ++ return; + -+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tBEx[] = { -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_IDVS_GROUP_SIZE, -+ BASE_HW_FEATURE_L2_CONFIG, -+ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, -+ BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, -+ BASE_HW_FEATURE_END -+}; ++ if (!timer_info->timer_enabled) ++ return; + -+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tBAx[] = { -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_IDVS_GROUP_SIZE, -+ BASE_HW_FEATURE_L2_CONFIG, -+ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, -+ BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, -+ BASE_HW_FEATURE_END -+}; ++ cancel_delayed_work_sync(&timer_info->dwork); ++ timer_info->timer_enabled = false; ++} + -+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tODx[] = { -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_L2_CONFIG, -+ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, -+ BASE_HW_FEATURE_END -+}; ++static void ++kbasep_hwcnt_watchdog_if_timer_modify(const struct kbase_hwcnt_watchdog_info *const timer, ++ u32 const delay_ms) ++{ ++ struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer; + -+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tGRx[] = { -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_L2_CONFIG, -+ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, -+ BASE_HW_FEATURE_CORE_FEATURES, -+ BASE_HW_FEATURE_END -+}; ++ if (WARN_ON(!timer) || WARN_ON(!timer_info->timer_enabled)) ++ return; + -+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tVAx[] = { -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_L2_CONFIG, -+ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, -+ BASE_HW_FEATURE_CORE_FEATURES, -+ BASE_HW_FEATURE_END -+}; ++ mod_delayed_work(timer_info->workq, &timer_info->dwork, msecs_to_jiffies(delay_ms)); ++} + -+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[] = { -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_L2_CONFIG, -+ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, -+ BASE_HW_FEATURE_ASN_HASH, -+ BASE_HW_FEATURE_GPU_SLEEP, -+ BASE_HW_FEATURE_CORE_FEATURES, -+ BASE_HW_FEATURE_END -+}; ++void kbase_hwcnt_watchdog_if_timer_destroy(struct kbase_hwcnt_watchdog_interface *const watchdog_if) ++{ ++ struct kbase_hwcnt_watchdog_if_timer_info *timer_info; + -+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTIx[] = { -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_L2_CONFIG, -+ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, -+ BASE_HW_FEATURE_ASN_HASH, -+ BASE_HW_FEATURE_GPU_SLEEP, -+ BASE_HW_FEATURE_CORE_FEATURES, -+ BASE_HW_FEATURE_PBHA_HWU, -+ BASE_HW_FEATURE_END -+}; ++ if (WARN_ON(!watchdog_if)) ++ return; + ++ timer_info = (void *)watchdog_if->timer; + -+#endif /* _BASE_HWCONFIG_FEATURES_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h ++ if (WARN_ON(!timer_info)) ++ return; ++ ++ destroy_workqueue(timer_info->workq); ++ kfree(timer_info); ++ ++ *watchdog_if = (struct kbase_hwcnt_watchdog_interface){ ++ .timer = NULL, .enable = NULL, .disable = NULL, .modify = NULL ++ }; ++} ++ ++int kbase_hwcnt_watchdog_if_timer_create(struct kbase_hwcnt_watchdog_interface *const watchdog_if) ++{ ++ struct kbase_hwcnt_watchdog_if_timer_info *timer_info; ++ ++ if (WARN_ON(!watchdog_if)) ++ return -EINVAL; ++ ++ timer_info = kmalloc(sizeof(*timer_info), GFP_KERNEL); ++ if (!timer_info) ++ return -ENOMEM; ++ ++ *timer_info = (struct kbase_hwcnt_watchdog_if_timer_info){ .timer_enabled = false }; ++ ++ INIT_DELAYED_WORK(&timer_info->dwork, kbasep_hwcnt_watchdog_callback); ++ ++ *watchdog_if = (struct kbase_hwcnt_watchdog_interface){ ++ .timer = (void *)timer_info, ++ .enable = kbasep_hwcnt_watchdog_if_timer_enable, ++ .disable = kbasep_hwcnt_watchdog_if_timer_disable, ++ .modify = kbasep_hwcnt_watchdog_if_timer_modify, ++ }; ++ ++ timer_info->workq = alloc_workqueue("mali_hwc_watchdog_wq", WQ_HIGHPRI | WQ_UNBOUND, 1); ++ if (timer_info->workq) ++ return 0; ++ ++ kfree(timer_info); ++ return -ENOMEM; ++} +diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h new file mode 100644 -index 000000000..0fbdec0bb +index 000000000..a545ad3e3 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h -@@ -0,0 +1,835 @@ ++++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h +@@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -200269,1703 +202727,1073 @@ index 000000000..0fbdec0bb + * + */ + -+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, -+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py -+ * For more information see base/tools/hwconfig_generator/README ++/* ++ * Concrete implementation of kbase_hwcnt_watchdog_interface for HWC backend + */ + -+#ifndef _BASE_HWCONFIG_ISSUES_H_ -+#define _BASE_HWCONFIG_ISSUES_H_ ++#ifndef _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_ ++#define _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_ + -+enum base_hw_issue { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10682, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_7940, -+ BASE_HW_ISSUE_TMIX_8042, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TMIX_8138, -+ BASE_HW_ISSUE_TMIX_8206, -+ BASE_HW_ISSUE_TMIX_8343, -+ BASE_HW_ISSUE_TMIX_8463, -+ BASE_HW_ISSUE_TMIX_8456, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_TNOX_1194, -+ BASE_HW_ISSUE_TGOX_R1_1234, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TSIX_1792, -+ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, -+ BASE_HW_ISSUE_TTRX_3076, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_TTRX_3414, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TTRX_3083, -+ BASE_HW_ISSUE_TTRX_3470, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TTRX_3485, -+ BASE_HW_ISSUE_GPU2019_3212, -+ BASE_HW_ISSUE_TURSEHW_1997, -+ BASE_HW_ISSUE_GPU2019_3878, -+ BASE_HW_ISSUE_TURSEHW_2716, -+ BASE_HW_ISSUE_GPU2019_3901, -+ BASE_HW_ISSUE_GPU2021PRO_290, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_TITANHW_2679, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++struct kbase_hwcnt_watchdog_interface; + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_generic[] = { -+ BASE_HW_ISSUE_END -+}; ++/** ++ * kbase_hwcnt_watchdog_if_timer_create() - Create a watchdog interface of hardware counter backend. ++ * ++ * @watchdog_if: Non-NULL pointer to watchdog interface that is filled in on creation success ++ * ++ * Return: 0 on success, error otherwise. ++ */ ++int kbase_hwcnt_watchdog_if_timer_create(struct kbase_hwcnt_watchdog_interface *watchdog_if); + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10682, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8042, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TMIX_8138, -+ BASE_HW_ISSUE_TMIX_8206, -+ BASE_HW_ISSUE_TMIX_8343, -+ BASE_HW_ISSUE_TMIX_8463, -+ BASE_HW_ISSUE_TMIX_8456, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++/** ++ * kbase_hwcnt_watchdog_if_timer_destroy() - Destroy a watchdog interface of hardware counter ++ * backend. ++ * ++ * @watchdog_if: Pointer to watchdog interface to destroy ++ */ ++void kbase_hwcnt_watchdog_if_timer_destroy(struct kbase_hwcnt_watchdog_interface *watchdog_if); + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10682, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_7940, -+ BASE_HW_ISSUE_TMIX_8042, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TMIX_8138, -+ BASE_HW_ISSUE_TMIX_8206, -+ BASE_HW_ISSUE_TMIX_8343, -+ BASE_HW_ISSUE_TMIX_8463, -+ BASE_HW_ISSUE_TMIX_8456, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++#endif /* _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_ */ +diff --git a/drivers/gpu/arm/bifrost/ipa/Kbuild b/drivers/gpu/arm/bifrost/ipa/Kbuild +new file mode 100755 +index 000000000..0be664c47 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/ipa/Kbuild +@@ -0,0 +1,35 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2016-2018, 2020-2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10682, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_7940, -+ BASE_HW_ISSUE_TMIX_8042, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TMIX_8138, -+ BASE_HW_ISSUE_TMIX_8206, -+ BASE_HW_ISSUE_TMIX_8343, -+ BASE_HW_ISSUE_TMIX_8463, -+ BASE_HW_ISSUE_TMIX_8456, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++bifrost_kbase-y += \ ++ ipa/mali_kbase_ipa_simple.o \ ++ ipa/mali_kbase_ipa.o + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tMIx[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_7940, -+ BASE_HW_ISSUE_TMIX_8042, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TMIX_8138, -+ BASE_HW_ISSUE_TMIX_8206, -+ BASE_HW_ISSUE_TMIX_8343, -+ BASE_HW_ISSUE_TMIX_8456, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++bifrost_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10682, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8042, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ifeq ($(MALI_USE_CSF),1) ++ bifrost_kbase-y += \ ++ ipa/backend/mali_kbase_ipa_counter_csf.o \ ++ ipa/backend/mali_kbase_ipa_counter_common_csf.o ++else ++ bifrost_kbase-y += \ ++ ipa/backend/mali_kbase_ipa_counter_jm.o \ ++ ipa/backend/mali_kbase_ipa_counter_common_jm.o ++endif +diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.c +new file mode 100644 +index 000000000..60b061ef6 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.c +@@ -0,0 +1,457 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10682, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8042, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++#include "mali_kbase_ipa_counter_common_csf.h" ++#include "ipa/mali_kbase_ipa_debugfs.h" + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10682, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8042, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++#define DEFAULT_SCALING_FACTOR 5 + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10682, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8042, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++/* If the value of GPU_ACTIVE is below this, use the simple model ++ * instead, to avoid extrapolating small amounts of counter data across ++ * large sample periods. ++ */ ++#define DEFAULT_MIN_SAMPLE_CYCLES 10000 + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tHEx[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8042, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++/* Typical value for the sampling interval is expected to be less than 100ms, ++ * So 5 seconds is a reasonable upper limit for the time gap between the ++ * 2 samples. ++ */ ++#define MAX_SAMPLE_INTERVAL_MS ((s64)5000) + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TSIX_1792, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++/* Maximum increment that is expected for a counter value during a sampling ++ * interval is derived assuming ++ * - max sampling interval of 1 second. ++ * - max GPU frequency of 2 GHz. ++ * - max number of cores as 32. ++ * - max increment of 4 in per core counter value at every clock cycle. ++ * ++ * So max increment = 2 * 10^9 * 32 * 4 = ~2^38. ++ * If a counter increases by an amount greater than this value, then an error ++ * will be returned and the simple power model will be used. ++ */ ++#define MAX_COUNTER_INCREMENT (((u64)1 << 38) - 1) + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TSIX_1792, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++static inline s64 kbase_ipa_add_saturate(s64 a, s64 b) ++{ ++ s64 rtn; + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ if (a > 0 && (S64_MAX - a) < b) ++ rtn = S64_MAX; ++ else if (a < 0 && (S64_MIN - a) > b) ++ rtn = S64_MIN; ++ else ++ rtn = a + b; + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ return rtn; ++} + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tSIx[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++static s64 kbase_ipa_group_energy(s32 coeff, u64 counter_value) ++{ ++ /* Range: 0 < counter_value < 2^38 */ + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ /* Range: -2^59 < ret < 2^59 (as -2^21 < coeff < 2^21) */ ++ return counter_value * (s64)coeff; ++} + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDVx[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++/** ++ * kbase_ipa_attach_ipa_control() - register with kbase_ipa_control ++ * @model_data: Pointer to counter model data ++ * ++ * Register IPA counter model as a client of kbase_ipa_control, which ++ * provides an interface to retreive the accumulated value of hardware ++ * counters to calculate energy consumption. ++ * ++ * Return: 0 on success, or an error code. ++ */ ++static int ++kbase_ipa_attach_ipa_control(struct kbase_ipa_counter_model_data *model_data) ++{ ++ struct kbase_device *kbdev = model_data->kbdev; ++ struct kbase_ipa_control_perf_counter *perf_counters; ++ u32 cnt_idx = 0; ++ int err; ++ size_t i; + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TNOX_1194, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ /* Value for GPU_ACTIVE counter also needs to be queried. It is required ++ * for the normalization of top-level and shader core counters. ++ */ ++ model_data->num_counters = 1 + model_data->num_top_level_cntrs + ++ model_data->num_shader_cores_cntrs; + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNOx[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ perf_counters = kcalloc(model_data->num_counters, ++ sizeof(*perf_counters), GFP_KERNEL); + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TNOX_1194, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ if (!perf_counters) { ++ dev_err(kbdev->dev, ++ "Failed to allocate memory for perf_counters array"); ++ return -ENOMEM; ++ } + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TGOX_R1_1234, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ /* Fill in the description for GPU_ACTIVE counter which is always ++ * needed, as mentioned above, regardless of the energy model used ++ * by the CSF GPUs. ++ */ ++ perf_counters[cnt_idx].type = KBASE_IPA_CORE_TYPE_CSHW; ++ perf_counters[cnt_idx].idx = GPU_ACTIVE_CNT_IDX; ++ perf_counters[cnt_idx].gpu_norm = false; ++ perf_counters[cnt_idx].scaling_factor = 1; ++ cnt_idx++; + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGOx[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ for (i = 0; i < model_data->num_top_level_cntrs; ++i) { ++ const struct kbase_ipa_counter *counter = ++ &model_data->top_level_cntrs_def[i]; + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, -+ BASE_HW_ISSUE_TTRX_3076, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_TTRX_3414, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TTRX_3083, -+ BASE_HW_ISSUE_TTRX_3470, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TTRX_3485, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ perf_counters[cnt_idx].type = counter->counter_block_type; ++ perf_counters[cnt_idx].idx = counter->counter_block_offset; ++ perf_counters[cnt_idx].gpu_norm = false; ++ perf_counters[cnt_idx].scaling_factor = 1; ++ cnt_idx++; ++ } + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, -+ BASE_HW_ISSUE_TTRX_3076, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_TTRX_3414, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TTRX_3083, -+ BASE_HW_ISSUE_TTRX_3470, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TTRX_3485, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ for (i = 0; i < model_data->num_shader_cores_cntrs; ++i) { ++ const struct kbase_ipa_counter *counter = ++ &model_data->shader_cores_cntrs_def[i]; + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, -+ BASE_HW_ISSUE_TTRX_3076, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_TTRX_3414, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TTRX_3083, -+ BASE_HW_ISSUE_TTRX_3470, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ perf_counters[cnt_idx].type = counter->counter_block_type; ++ perf_counters[cnt_idx].idx = counter->counter_block_offset; ++ perf_counters[cnt_idx].gpu_norm = false; ++ perf_counters[cnt_idx].scaling_factor = 1; ++ cnt_idx++; ++ } + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTRx[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TTRX_3414, -+ BASE_HW_ISSUE_TTRX_3083, -+ BASE_HW_ISSUE_TTRX_3470, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ err = kbase_ipa_control_register(kbdev, perf_counters, ++ model_data->num_counters, ++ &model_data->ipa_control_client); ++ if (err) ++ dev_err(kbdev->dev, ++ "Failed to register IPA with kbase_ipa_control"); + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, -+ BASE_HW_ISSUE_TTRX_3076, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_TTRX_3414, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TTRX_3083, -+ BASE_HW_ISSUE_TTRX_3470, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TTRX_3485, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ kfree(perf_counters); ++ return err; ++} + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, -+ BASE_HW_ISSUE_TTRX_3076, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_TTRX_3414, -+ BASE_HW_ISSUE_GPU2017_1336, -+ BASE_HW_ISSUE_TTRX_3083, -+ BASE_HW_ISSUE_TTRX_3470, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++/** ++ * kbase_ipa_detach_ipa_control() - De-register from kbase_ipa_control. ++ * @model_data: Pointer to counter model data ++ */ ++static void ++kbase_ipa_detach_ipa_control(struct kbase_ipa_counter_model_data *model_data) ++{ ++ if (model_data->ipa_control_client) { ++ kbase_ipa_control_unregister(model_data->kbdev, ++ model_data->ipa_control_client); ++ model_data->ipa_control_client = NULL; ++ } ++} + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNAx[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TTRX_3414, -+ BASE_HW_ISSUE_TTRX_3083, -+ BASE_HW_ISSUE_TTRX_3470, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++static int calculate_coeff(struct kbase_ipa_counter_model_data *model_data, ++ const struct kbase_ipa_counter *const cnt_defs, ++ size_t num_counters, s32 *counter_coeffs, ++ u64 *counter_values, u32 active_cycles, u32 *coeffp) ++{ ++ u64 coeff = 0, coeff_mul = 0; ++ s64 total_energy = 0; ++ size_t i; + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_TTRX_3414, -+ BASE_HW_ISSUE_TTRX_3083, -+ BASE_HW_ISSUE_TTRX_3470, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TTRX_3485, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ /* Range for the 'counter_value' is [0, 2^38) ++ * Range for the 'coeff' is [-2^21, 2^21] ++ * So range for the 'group_energy' is [-2^59, 2^59) and range for the ++ * 'total_energy' is +/- 2^59 * number of IPA groups (~16), i.e. ++ * [-2^63, 2^63). ++ */ ++ for (i = 0; i < num_counters; i++) { ++ s32 coeff = counter_coeffs[i]; ++ u64 counter_value = counter_values[i]; ++ s64 group_energy = kbase_ipa_group_energy(coeff, counter_value); + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_TTRX_3414, -+ BASE_HW_ISSUE_TTRX_3083, -+ BASE_HW_ISSUE_TTRX_3470, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ if (counter_value > MAX_COUNTER_INCREMENT) { ++ dev_warn(model_data->kbdev->dev, ++ "Increment in counter %s more than expected", ++ cnt_defs[i].name); ++ return -ERANGE; ++ } + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_TTRX_3414, -+ BASE_HW_ISSUE_TTRX_3083, -+ BASE_HW_ISSUE_TTRX_3470, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ total_energy = ++ kbase_ipa_add_saturate(total_energy, group_energy); ++ } + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_TTRX_3414, -+ BASE_HW_ISSUE_TTRX_3083, -+ BASE_HW_ISSUE_TTRX_3470, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ /* Range: 0 <= coeff < 2^63 */ ++ if (total_energy >= 0) ++ coeff = total_energy; ++ else ++ dev_dbg(model_data->kbdev->dev, ++ "Energy value came negative as %lld", total_energy); + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBEx[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TTRX_3414, -+ BASE_HW_ISSUE_TTRX_3083, -+ BASE_HW_ISSUE_TTRX_3470, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ /* Range: 0 <= coeff < 2^63 (because active_cycles >= 1). However, this ++ * can be constrained further: the value of counters that are being ++ * used for dynamic power estimation can only increment by about 128 ++ * maximum per clock cycle. This is because max number of shader ++ * cores is expected to be 32 (max number of L2 slices is expected to ++ * be 8) and some counters (per shader core) like SC_BEATS_RD_TEX_EXT & ++ * SC_EXEC_STARVE_ARITH can increment by 4 every clock cycle. ++ * Each "beat" is defined as 128 bits and each shader core can ++ * (currently) do 512 bits read and 512 bits write to/from the L2 ++ * cache per cycle, so the SC_BEATS_RD_TEX_EXT counter can increment ++ * [0, 4] per shader core per cycle. ++ * We can thus write the range of 'coeff' in terms of active_cycles: ++ * ++ * coeff = SUM(coeffN * counterN * num_cores_for_counterN) ++ * coeff <= SUM(coeffN * counterN) * max_cores ++ * coeff <= num_IPA_groups * max_coeff * max_counter * max_cores ++ * (substitute max_counter = 2^2 * active_cycles) ++ * coeff <= num_IPA_groups * max_coeff * 2^2 * active_cycles * max_cores ++ * coeff <= 2^4 * 2^21 * 2^2 * active_cycles * 2^5 ++ * coeff <= 2^32 * active_cycles ++ * ++ * So after the division: 0 <= coeff <= 2^32 ++ */ ++ coeff = div_u64(coeff, active_cycles); + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_TTRX_3414, -+ BASE_HW_ISSUE_TTRX_3083, -+ BASE_HW_ISSUE_TTRX_3470, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TTRX_3485, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ /* Not all models were derived at the same reference voltage. Voltage ++ * scaling is done by multiplying by V^2, so we need to *divide* by ++ * Vref^2 here. ++ * Range: 0 <= coeff <= 2^35 ++ */ ++ coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); ++ /* Range: 0 <= coeff <= 2^38 */ ++ coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_TTRX_3414, -+ BASE_HW_ISSUE_TTRX_3083, -+ BASE_HW_ISSUE_TTRX_3470, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ /* Scale by user-specified integer factor. ++ * Range: 0 <= coeff_mul < 2^43 ++ */ ++ coeff_mul = coeff * model_data->scaling_factor; + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_TTRX_3414, -+ BASE_HW_ISSUE_TTRX_3083, -+ BASE_HW_ISSUE_TTRX_3470, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ /* The power models have results with units ++ * mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this ++ * becomes fW/(Hz V^2), which are the units of coeff_mul. However, ++ * kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide ++ * by 1000. ++ * Range: 0 <= coeff_mul < 2^33 ++ */ ++ coeff_mul = div_u64(coeff_mul, 1000u); + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r1p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, -+ BASE_HW_ISSUE_TTRX_921, -+ BASE_HW_ISSUE_TTRX_3414, -+ BASE_HW_ISSUE_TTRX_3083, -+ BASE_HW_ISSUE_TTRX_3470, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ /* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */ ++ *coeffp = clamp(coeff_mul, (u64)0, (u64)1 << 16); + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBAx[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TTRX_3414, -+ BASE_HW_ISSUE_TTRX_3083, -+ BASE_HW_ISSUE_TTRX_3470, -+ BASE_HW_ISSUE_TTRX_3464, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ return 0; ++} + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_GPU2019_3212, -+ BASE_HW_ISSUE_GPU2019_3878, -+ BASE_HW_ISSUE_GPU2019_3901, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) ++{ ++ struct kbase_ipa_counter_model_data *model_data = ++ (struct kbase_ipa_counter_model_data *)model->model_data; ++ struct kbase_device *kbdev = model->kbdev; ++ s32 *counter_coeffs_p = model_data->counter_coeffs; ++ u64 *cnt_values_p = model_data->counter_values; ++ const u64 num_counters = model_data->num_counters; ++ u32 active_cycles; ++ ktime_t now, diff; ++ s64 diff_ms; ++ int ret; + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tODx[] = { -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_GPU2019_3212, -+ BASE_HW_ISSUE_GPU2019_3878, -+ BASE_HW_ISSUE_GPU2019_3901, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ lockdep_assert_held(&kbdev->ipa.lock); + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_GPU2019_3878, -+ BASE_HW_ISSUE_GPU2019_3901, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ /* The last argument is supposed to be a pointer to the location that ++ * will store the time for which GPU has been in protected mode since ++ * last query. This can be passed as NULL as counter model itself will ++ * not be used when GPU enters protected mode, as IPA is supposed to ++ * switch to the simple power model. ++ */ ++ ret = kbase_ipa_control_query(kbdev, ++ model_data->ipa_control_client, ++ cnt_values_p, num_counters, NULL); ++ if (WARN_ON(ret)) ++ return ret; + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGRx[] = { -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_GPU2019_3878, -+ BASE_HW_ISSUE_GPU2019_3901, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ now = ktime_get_raw(); ++ diff = ktime_sub(now, kbdev->ipa.last_sample_time); ++ diff_ms = ktime_to_ms(diff); + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_GPU2019_3878, -+ BASE_HW_ISSUE_GPU2019_3901, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ kbdev->ipa.last_sample_time = now; + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVAx[] = { -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_GPU2019_3878, -+ BASE_HW_ISSUE_GPU2019_3901, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ /* The counter values cannot be relied upon if the sampling interval was ++ * too long. Typically this will happen when the polling is started ++ * after the temperature has risen above a certain trip point. After ++ * that regular calls every 25-100 ms interval are expected. ++ */ ++ if (diff_ms > MAX_SAMPLE_INTERVAL_MS) { ++ dev_dbg(kbdev->dev, ++ "Last sample was taken %lld milli seconds ago", ++ diff_ms); ++ return -EOVERFLOW; ++ } + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = { -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TURSEHW_1997, -+ BASE_HW_ISSUE_GPU2019_3878, -+ BASE_HW_ISSUE_TURSEHW_2716, -+ BASE_HW_ISSUE_GPU2019_3901, -+ BASE_HW_ISSUE_GPU2021PRO_290, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_TITANHW_2679, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ /* Range: 0 (GPU not used at all), to the max sampling interval, say ++ * 1 seconds, * max GPU frequency (GPU 100% utilized). ++ * 0 <= active_cycles <= 1 * ~2GHz ++ * 0 <= active_cycles < 2^31 ++ */ ++ if (*cnt_values_p > U32_MAX) { ++ dev_warn(kbdev->dev, ++ "Increment in GPU_ACTIVE counter more than expected"); ++ return -ERANGE; ++ } + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p1[] = { -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TURSEHW_1997, -+ BASE_HW_ISSUE_GPU2019_3878, -+ BASE_HW_ISSUE_TURSEHW_2716, -+ BASE_HW_ISSUE_GPU2019_3901, -+ BASE_HW_ISSUE_GPU2021PRO_290, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_TITANHW_2679, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ active_cycles = (u32)*cnt_values_p; + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTUx[] = { -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_GPU2019_3878, -+ BASE_HW_ISSUE_TURSEHW_2716, -+ BASE_HW_ISSUE_GPU2019_3901, -+ BASE_HW_ISSUE_GPU2021PRO_290, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_TITANHW_2679, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ /* If the value of the active_cycles is less than the threshold, then ++ * return an error so that IPA framework can approximate using the ++ * cached simple model results instead. This may be more accurate ++ * than extrapolating using a very small counter dump. ++ */ ++ if (active_cycles < (u32)max(model_data->min_sample_cycles, 0)) ++ return -ENODATA; + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = { -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_GPU2019_3878, -+ BASE_HW_ISSUE_TURSEHW_2716, -+ BASE_HW_ISSUE_GPU2019_3901, -+ BASE_HW_ISSUE_GPU2021PRO_290, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_TITANHW_2679, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ /* Range: 1 <= active_cycles < 2^31 */ ++ active_cycles = max(1u, active_cycles); + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1[] = { -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_GPU2019_3878, -+ BASE_HW_ISSUE_TURSEHW_2716, -+ BASE_HW_ISSUE_GPU2019_3901, -+ BASE_HW_ISSUE_GPU2021PRO_290, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_TITANHW_2679, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ cnt_values_p++; ++ ret = calculate_coeff(model_data, model_data->top_level_cntrs_def, ++ model_data->num_top_level_cntrs, ++ counter_coeffs_p, cnt_values_p, active_cycles, ++ &coeffp[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); ++ if (ret) ++ return ret; + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2[] = { -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_GPU2019_3878, -+ BASE_HW_ISSUE_TURSEHW_2716, -+ BASE_HW_ISSUE_GPU2019_3901, -+ BASE_HW_ISSUE_GPU2021PRO_290, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_TITANHW_2679, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ cnt_values_p += model_data->num_top_level_cntrs; ++ counter_coeffs_p += model_data->num_top_level_cntrs; ++ ret = calculate_coeff(model_data, model_data->shader_cores_cntrs_def, ++ model_data->num_shader_cores_cntrs, ++ counter_coeffs_p, cnt_values_p, active_cycles, ++ &coeffp[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]); + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p3[] = { -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_GPU2019_3878, -+ BASE_HW_ISSUE_TURSEHW_2716, -+ BASE_HW_ISSUE_GPU2019_3901, -+ BASE_HW_ISSUE_GPU2021PRO_290, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_TITANHW_2679, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ return ret; ++} + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTIx[] = { -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TURSEHW_2716, -+ BASE_HW_ISSUE_GPU2021PRO_290, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_TITANHW_2679, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++void kbase_ipa_counter_reset_data(struct kbase_ipa_model *model) ++{ ++ struct kbase_ipa_counter_model_data *model_data = ++ (struct kbase_ipa_counter_model_data *)model->model_data; ++ u64 *cnt_values_p = model_data->counter_values; ++ const u64 num_counters = model_data->num_counters; ++ int ret; + -+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTIx_r0p0[] = { -+ BASE_HW_ISSUE_TSIX_2033, -+ BASE_HW_ISSUE_TTRX_1337, -+ BASE_HW_ISSUE_TURSEHW_2716, -+ BASE_HW_ISSUE_GPU2021PRO_290, -+ BASE_HW_ISSUE_TITANHW_2710, -+ BASE_HW_ISSUE_TITANHW_2679, -+ BASE_HW_ISSUE_GPU2022PRO_148, -+ BASE_HW_ISSUE_END -+}; ++ lockdep_assert_held(&model->kbdev->ipa.lock); + ++ ret = kbase_ipa_control_query(model->kbdev, ++ model_data->ipa_control_client, ++ cnt_values_p, num_counters, NULL); ++ WARN_ON(ret); ++} + -+#endif /* _BASE_HWCONFIG_ISSUES_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase.h b/drivers/gpu/arm/bifrost/mali_kbase.h -new file mode 100644 -index 000000000..542e8f63f ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase.h -@@ -0,0 +1,752 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++int kbase_ipa_counter_common_model_init(struct kbase_ipa_model *model, ++ const struct kbase_ipa_counter *top_level_cntrs_def, ++ size_t num_top_level_cntrs, ++ const struct kbase_ipa_counter *shader_cores_cntrs_def, ++ size_t num_shader_cores_cntrs, ++ s32 reference_voltage) ++{ ++ struct kbase_ipa_counter_model_data *model_data; ++ s32 *counter_coeffs_p; ++ int err = 0; ++ size_t i; + -+#ifndef _KBASE_H_ -+#define _KBASE_H_ ++ if (!model || !top_level_cntrs_def || !shader_cores_cntrs_def || ++ !num_top_level_cntrs || !num_shader_cores_cntrs) ++ return -EINVAL; + -+#include ++ model_data = kzalloc(sizeof(*model_data), GFP_KERNEL); ++ if (!model_data) ++ return -ENOMEM; + -+#include ++ model_data->kbdev = model->kbdev; + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) -+#include -+#endif -+#include -+#include -+#include -+#include -+#include -+#include ++ model_data->top_level_cntrs_def = top_level_cntrs_def; ++ model_data->num_top_level_cntrs = num_top_level_cntrs; + -+#include -+#include ++ model_data->shader_cores_cntrs_def = shader_cores_cntrs_def; ++ model_data->num_shader_cores_cntrs = num_shader_cores_cntrs; + -+/* -+ * Include mali_kbase_defs.h first as this provides types needed by other local -+ * header files. -+ */ -+#include "mali_kbase_defs.h" ++ model->model_data = (void *)model_data; + -+#include "debug/mali_kbase_debug_ktrace.h" -+#include "context/mali_kbase_context.h" -+#include "mali_kbase_strings.h" -+#include "mali_kbase_mem_lowlevel.h" -+#include "mali_kbase_utility.h" -+#include "mali_kbase_mem.h" -+#include "mmu/mali_kbase_mmu.h" -+#include "mali_kbase_gpu_memory_debugfs.h" -+#include "mali_kbase_mem_profile_debugfs.h" -+#include "mali_kbase_gpuprops.h" -+#include -+#if !MALI_USE_CSF -+#include "mali_kbase_debug_job_fault.h" -+#include "mali_kbase_jd_debugfs.h" -+#include "mali_kbase_jm.h" -+#include "mali_kbase_js.h" -+#else /* !MALI_USE_CSF */ -+#include "csf/mali_kbase_debug_csf_fault.h" -+#endif /* MALI_USE_CSF */ ++ counter_coeffs_p = model_data->counter_coeffs; + -+#include "ipa/mali_kbase_ipa.h" ++ for (i = 0; i < model_data->num_top_level_cntrs; ++i) { ++ const struct kbase_ipa_counter *counter = ++ &model_data->top_level_cntrs_def[i]; + -+#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) -+#include -+#endif ++ *counter_coeffs_p = counter->coeff_default_value; + -+#include "mali_linux_trace.h" ++ err = kbase_ipa_model_add_param_s32( ++ model, counter->name, counter_coeffs_p, 1, false); ++ if (err) ++ goto exit; + -+#if MALI_USE_CSF -+#include "csf/mali_kbase_csf.h" ++ counter_coeffs_p++; ++ } + -+/* Physical memory group ID for CSF user I/O. -+ */ -+#define KBASE_MEM_GROUP_CSF_IO BASE_MEM_GROUP_DEFAULT ++ for (i = 0; i < model_data->num_shader_cores_cntrs; ++i) { ++ const struct kbase_ipa_counter *counter = ++ &model_data->shader_cores_cntrs_def[i]; + -+/* Physical memory group ID for CSF firmware. -+ */ -+#define KBASE_MEM_GROUP_CSF_FW BASE_MEM_GROUP_DEFAULT -+#endif ++ *counter_coeffs_p = counter->coeff_default_value; + -+/* Physical memory group ID for a special page which can alias several regions. -+ */ -+#define KBASE_MEM_GROUP_SINK BASE_MEM_GROUP_DEFAULT ++ err = kbase_ipa_model_add_param_s32( ++ model, counter->name, counter_coeffs_p, 1, false); ++ if (err) ++ goto exit; + -+/* -+ * Kernel-side Base (KBase) APIs -+ */ ++ counter_coeffs_p++; ++ } + -+struct kbase_device *kbase_device_alloc(void); -+/* -+ * note: configuration attributes member of kbdev needs to have -+ * been setup before calling kbase_device_init -+ */ ++ model_data->scaling_factor = DEFAULT_SCALING_FACTOR; ++ err = kbase_ipa_model_add_param_s32( ++ model, "scale", &model_data->scaling_factor, 1, false); ++ if (err) ++ goto exit; + -+int kbase_device_misc_init(struct kbase_device *kbdev); -+void kbase_device_misc_term(struct kbase_device *kbdev); ++ model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES; ++ err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles", ++ &model_data->min_sample_cycles, 1, ++ false); ++ if (err) ++ goto exit; + -+#if !MALI_USE_CSF -+void kbase_enable_quick_reset(struct kbase_device *kbdev); -+void kbase_disable_quick_reset(struct kbase_device *kbdev); -+bool kbase_is_quick_reset_enabled(struct kbase_device *kbdev); -+#endif ++ model_data->reference_voltage = reference_voltage; ++ err = kbase_ipa_model_add_param_s32(model, "reference_voltage", ++ &model_data->reference_voltage, 1, ++ false); ++ if (err) ++ goto exit; + -+void kbase_device_free(struct kbase_device *kbdev); -+int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature); ++ err = kbase_ipa_attach_ipa_control(model_data); + -+/* Needed for gator integration and for reporting vsync information */ -+struct kbase_device *kbase_find_device(int minor); -+void kbase_release_device(struct kbase_device *kbdev); ++exit: ++ if (err) { ++ kbase_ipa_model_param_free_all(model); ++ kfree(model_data); ++ } ++ return err; ++} + -+/** -+ * kbase_context_get_unmapped_area() - get an address range which is currently -+ * unmapped. -+ * @kctx: A kernel base context (which has its own GPU address space). -+ * @addr: CPU mapped address (set to 0 since MAP_FIXED mapping is not allowed -+ * as Mali GPU driver decides about the mapping). -+ * @len: Length of the address range. -+ * @pgoff: Page offset within the GPU address space of the kbase context. -+ * @flags: Flags for the allocation. ++void kbase_ipa_counter_common_model_term(struct kbase_ipa_model *model) ++{ ++ struct kbase_ipa_counter_model_data *model_data = ++ (struct kbase_ipa_counter_model_data *)model->model_data; ++ ++ kbase_ipa_detach_ipa_control(model_data); ++ kfree(model_data); ++} +diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.h b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.h +new file mode 100644 +index 000000000..37d2efc59 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.h +@@ -0,0 +1,159 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * Finds the unmapped address range which satisfies requirements specific to -+ * GPU and those provided by the call parameters. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * -+ * 1) Requirement for allocations greater than 2MB: -+ * - alignment offset is set to 2MB and the alignment mask to 2MB decremented -+ * by 1. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * 2) Requirements imposed for the shader memory alignment: -+ * - alignment is decided by the number of GPU pc bits which can be read from -+ * GPU properties of the device associated with this kbase context; alignment -+ * offset is set to this value in bytes and the alignment mask to the offset -+ * decremented by 1. -+ * - allocations must not to be at 4GB boundaries. Such cases are indicated -+ * by the flag KBASE_REG_GPU_NX not being set (check the flags of the kbase -+ * region). 4GB boundaries can be checked against @ref BASE_MEM_MASK_4GB. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + * -+ * 3) Requirements imposed for tiler memory alignment, cases indicated by -+ * the flag @ref KBASE_REG_TILER_ALIGN_TOP (check the flags of the kbase -+ * region): -+ * - alignment offset is set to the difference between the kbase region -+ * extension (converted from the original value in pages to bytes) and the kbase -+ * region initial_commit (also converted from the original value in pages to -+ * bytes); alignment mask is set to the kbase region extension in bytes and -+ * decremented by 1. ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: if successful, address of the unmapped area aligned as required; -+ * error code (negative) in case of failure; + */ -+unsigned long kbase_context_get_unmapped_area(struct kbase_context *kctx, -+ const unsigned long addr, const unsigned long len, -+ const unsigned long pgoff, const unsigned long flags); + ++#ifndef _KBASE_IPA_COUNTER_COMMON_CSF_H_ ++#define _KBASE_IPA_COUNTER_COMMON_CSF_H_ + -+int assign_irqs(struct kbase_device *kbdev); -+ -+int kbase_sysfs_init(struct kbase_device *kbdev); -+void kbase_sysfs_term(struct kbase_device *kbdev); ++#include "mali_kbase.h" ++#include "csf/ipa_control/mali_kbase_csf_ipa_control.h" + ++/* Maximum number of HW counters used by the IPA counter model. */ ++#define KBASE_IPA_MAX_COUNTER_DEF_NUM 24 + -+int kbase_protected_mode_init(struct kbase_device *kbdev); -+void kbase_protected_mode_term(struct kbase_device *kbdev); ++struct kbase_ipa_counter_model_data; + +/** -+ * kbase_device_pm_init() - Performs power management initialization and -+ * Verifies device tree configurations. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Return: 0 if successful, otherwise a standard Linux error code ++ * struct kbase_ipa_counter_model_data - IPA counter model context per device ++ * @kbdev: Pointer to kbase device ++ * @ipa_control_client: Handle returned on registering IPA counter model as a ++ * client of kbase_ipa_control. ++ * @top_level_cntrs_def: Array of description of HW counters used by the IPA ++ * counter model for top-level. ++ * @num_top_level_cntrs: Number of elements in @top_level_cntrs_def array. ++ * @shader_cores_cntrs_def: Array of description of HW counters used by the IPA ++ * counter model for shader cores. ++ * @num_shader_cores_cntrs: Number of elements in @shader_cores_cntrs_def array. ++ * @counter_coeffs: Buffer to store coefficient value used for HW counters ++ * @counter_values: Buffer to store the accumulated value of HW counters ++ * retreived from kbase_ipa_control. ++ * @num_counters: Number of counters queried from kbase_ipa_control. ++ * @reference_voltage: voltage, in mV, of the operating point used when ++ * deriving the power model coefficients. Range approx ++ * 0.1V - 5V (~= 8V): 2^7 <= reference_voltage <= 2^13 ++ * @scaling_factor: User-specified power scaling factor. This is an ++ * integer, which is multiplied by the power coefficient ++ * just before OPP scaling. ++ * Range approx 0-32: 0 < scaling_factor < 2^5 ++ * @min_sample_cycles: If the value of the GPU_ACTIVE counter (the number of ++ * cycles the GPU was working) is less than ++ * min_sample_cycles, the counter model will return an ++ * error, causing the IPA framework to approximate using ++ * the cached simple model results instead. This may be ++ * more accurate than extrapolating using a very small ++ * counter dump. + */ -+int kbase_device_pm_init(struct kbase_device *kbdev); ++struct kbase_ipa_counter_model_data { ++ struct kbase_device *kbdev; ++ void *ipa_control_client; ++ const struct kbase_ipa_counter *top_level_cntrs_def; ++ size_t num_top_level_cntrs; ++ const struct kbase_ipa_counter *shader_cores_cntrs_def; ++ size_t num_shader_cores_cntrs; ++ s32 counter_coeffs[KBASE_IPA_MAX_COUNTER_DEF_NUM]; ++ u64 counter_values[KBASE_IPA_MAX_COUNTER_DEF_NUM]; ++ u64 num_counters; ++ s32 reference_voltage; ++ s32 scaling_factor; ++ s32 min_sample_cycles; ++}; + +/** -+ * kbase_device_pm_term() - Performs power management deinitialization and -+ * Free resources. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Clean up all the resources ++ * struct kbase_ipa_counter - represents a single HW counter used by IPA model ++ * @name: Name of the HW counter used by IPA counter model ++ * for energy estimation. ++ * @coeff_default_value: Default value of coefficient for the counter. ++ * Coefficients are interpreted as fractions where the ++ * denominator is 1000000. ++ * @counter_block_offset: Index to the counter within the counter block of ++ * type @counter_block_type. ++ * @counter_block_type: Type of the counter block. + */ -+void kbase_device_pm_term(struct kbase_device *kbdev); -+ -+ -+int power_control_init(struct kbase_device *kbdev); -+void power_control_term(struct kbase_device *kbdev); -+ -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+void kbase_device_debugfs_term(struct kbase_device *kbdev); -+int kbase_device_debugfs_init(struct kbase_device *kbdev); -+#else /* CONFIG_DEBUG_FS */ -+static inline int kbase_device_debugfs_init(struct kbase_device *kbdev) -+{ -+ return 0; -+} -+ -+static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { } -+#endif /* CONFIG_DEBUG_FS */ -+ -+int registers_map(struct kbase_device *kbdev); -+void registers_unmap(struct kbase_device *kbdev); -+ -+int kbase_device_coherency_init(struct kbase_device *kbdev); -+ -+ -+#if !MALI_USE_CSF -+int kbase_jd_init(struct kbase_context *kctx); -+void kbase_jd_exit(struct kbase_context *kctx); ++struct kbase_ipa_counter { ++ const char *name; ++ s32 coeff_default_value; ++ u32 counter_block_offset; ++ enum kbase_ipa_core_type counter_block_type; ++}; + +/** -+ * kbase_jd_submit - Submit atoms to the job dispatcher ++ * kbase_ipa_counter_dynamic_coeff() - calculate dynamic power based on HW counters ++ * @model: pointer to instantiated model ++ * @coeffp: pointer to location where calculated power, in ++ * pW/(Hz V^2), is stored for top level and shader cores. + * -+ * @kctx: The kbase context to submit to -+ * @user_addr: The address in user space of the struct base_jd_atom array -+ * @nr_atoms: The number of atoms in the array -+ * @stride: sizeof(struct base_jd_atom) -+ * @uk6_atom: true if the atoms are legacy atoms (struct base_jd_atom_v2_uk6) ++ * This is a GPU-agnostic implementation of the get_dynamic_coeff() ++ * function of an IPA model. It relies on the model being populated ++ * with GPU-specific attributes at initialization time. + * -+ * Return: 0 on success or error code ++ * Return: 0 on success, or an error code. + */ -+int kbase_jd_submit(struct kbase_context *kctx, -+ void __user *user_addr, u32 nr_atoms, u32 stride, -+ bool uk6_atom); ++int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp); + +/** -+ * kbase_jd_done_worker - Handle a job completion -+ * @data: a &struct work_struct -+ * -+ * This function requeues the job from the runpool (if it was soft-stopped or -+ * removed from NEXT registers). -+ * -+ * Removes it from the system if it finished/failed/was cancelled. -+ * -+ * Resolves dependencies to add dependent jobs to the context, potentially -+ * starting them if necessary (which may add more references to the context) -+ * -+ * Releases the reference to the context from the no-longer-running job. -+ * -+ * Handles retrying submission outside of IRQ context if it failed from within -+ * IRQ context. -+ */ -+void kbase_jd_done_worker(struct work_struct *data); -+ -+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp, -+ kbasep_js_atom_done_code done_code); -+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom); -+void kbase_jd_zap_context(struct kbase_context *kctx); -+ -+/* -+ * kbase_jd_done_nolock - Perform the necessary handling of an atom that has completed -+ * the execution. -+ * -+ * @katom: Pointer to the atom that completed the execution -+ * @post_immediately: Flag indicating that completion event can be posted -+ * immediately for @katom and the other atoms depdendent -+ * on @katom which also completed execution. The flag is -+ * false only for the case where the function is called by -+ * kbase_jd_done_worker() on the completion of atom running -+ * on the GPU. -+ * -+ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller -+ * is responsible for calling kbase_finish_soft_job *before* calling this function. ++ * kbase_ipa_counter_reset_data() - Reset the counters data used for dynamic ++ * power estimation ++ * @model: pointer to instantiated model + * -+ * The caller must hold the kbase_jd_context.lock. ++ * Retrieve the accumulated value of HW counters from the kbase_ipa_control ++ * component, without doing any processing, which is effectively a reset as the ++ * next call to kbase_ipa_counter_dynamic_coeff() will see the increment in ++ * counter values from this point onwards. + */ -+bool kbase_jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately); -+ -+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom); -+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom); ++void kbase_ipa_counter_reset_data(struct kbase_ipa_model *model); + +/** -+ * kbase_job_done - Process completed jobs from job interrupt -+ * @kbdev: Pointer to the kbase device. -+ * @done: Bitmask of done or failed jobs, from JOB_IRQ_STAT register ++ * kbase_ipa_counter_common_model_init() - initialize ipa power model ++ * @model: Pointer to the ipa power model to initialize ++ * @top_level_cntrs_def: Array corresponding to the HW counters used in the ++ * top level counter model, contains the counter index, ++ * default value of the coefficient. ++ * @num_top_level_cntrs: Number of elements in the array @top_level_cntrs_def ++ * @shader_cores_cntrs_def: Array corresponding to the HW counters used in the ++ * shader cores counter model, contains the counter index, ++ * default value of the coefficient. ++ * @num_shader_cores_cntrs: Number of elements in the array ++ * @shader_cores_cntrs_def. ++ * @reference_voltage: voltage, in mV, of the operating point used when ++ * deriving the power model coefficients. + * -+ * This function processes the completed, or failed, jobs from the GPU job -+ * slots, for the bits set in the @done bitmask. ++ * This function performs initialization steps common for ipa counter based ++ * model of all CSF GPUs. The set of counters and their respective weights ++ * could be different for each GPU. The tuple of counter index and weight ++ * is passed via @top_level_cntrs_def and @shader_cores_cntrs_def array. + * -+ * The hwaccess_lock must be held when calling this function. ++ * Return: 0 on success, error code otherwise + */ -+void kbase_job_done(struct kbase_device *kbdev, u32 done); -+ ++int kbase_ipa_counter_common_model_init(struct kbase_ipa_model *model, ++ const struct kbase_ipa_counter *top_level_cntrs_def, ++ size_t num_top_level_cntrs, ++ const struct kbase_ipa_counter *shader_cores_cntrs_def, ++ size_t num_shader_cores_cntrs, ++ s32 reference_voltage); +/** -+ * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms -+ * and soft stop them -+ * @kctx: Pointer to context to check. -+ * @katom: Pointer to priority atom. -+ * -+ * Atoms from @kctx on the same job slot as @katom, which have lower priority -+ * than @katom will be soft stopped and put back in the queue, so that atoms -+ * with higher priority can run. ++ * kbase_ipa_counter_common_model_term() - terminate ipa power model ++ * @model: ipa power model to terminate + * -+ * The hwaccess_lock must be held when calling this function. ++ * This function performs all necessary steps to terminate ipa power model ++ * including clean up of resources allocated to hold model data. + */ -+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, -+ struct kbase_jd_atom *katom); ++void kbase_ipa_counter_common_model_term(struct kbase_ipa_model *model); + -+/** -+ * kbase_job_slot_softstop_start_rp() - Soft-stop the atom at the start -+ * of a renderpass. -+ * @kctx: Pointer to a kernel base context. -+ * @reg: Reference of a growable GPU memory region in the same context. -+ * Takes ownership of the reference if successful. ++#endif /* _KBASE_IPA_COUNTER_COMMON_CSF_H_ */ +diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.c +new file mode 100644 +index 000000000..34515a934 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.c +@@ -0,0 +1,355 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * -+ * Used to switch to incremental rendering if we have nearly run out of -+ * virtual address space in a growable memory region and the atom currently -+ * executing on a job slot is the tiler job chain at the start of a renderpass. ++ * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved. + * -+ * Return: 0 if successful, otherwise a negative error code. -+ */ -+int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx, -+ struct kbase_va_region *reg); -+ -+/** -+ * kbase_job_slot_softstop - Soft-stop the specified job slot ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * @kbdev: The kbase device -+ * @js: The job slot to soft-stop -+ * @target_katom: The job that should be soft-stopped (or NULL for any job) -+ * Context: -+ * The job slot lock must be held when calling this function. -+ * The job slot must not already be in the process of being soft-stopped. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Where possible any job in the next register is evicted before the soft-stop. + */ -+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, -+ struct kbase_jd_atom *target_katom); + -+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js, -+ struct kbase_jd_atom *target_katom, u32 sw_flags); ++#include "mali_kbase_ipa_counter_common_jm.h" ++#include "ipa/mali_kbase_ipa_debugfs.h" + -+/** -+ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode -+ * @kbdev: kbase device -+ * @action: the event which has occurred -+ * @core_reqs: core requirements of the atom -+ * @target_katom: the atom which is being affected -+ * -+ * For a certain soft-stop action, work out whether to enter disjoint -+ * state. -+ * -+ * This does not register multiple disjoint events if the atom has already -+ * started a disjoint period -+ * -+ * @core_reqs can be supplied as 0 if the atom had not started on the hardware -+ * (and so a 'real' soft/hard-stop was not required, but it still interrupted -+ * flow, perhaps on another context) -+ * -+ * kbase_job_check_leave_disjoint() should be used to end the disjoint -+ * state when the soft/hard-stop action is complete ++#define DEFAULT_SCALING_FACTOR 5 ++ ++/* If the value of GPU_ACTIVE is below this, use the simple model ++ * instead, to avoid extrapolating small amounts of counter data across ++ * large sample periods. + */ -+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, -+ base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom); ++#define DEFAULT_MIN_SAMPLE_CYCLES 10000 + +/** -+ * kbase_job_check_leave_disjoint - potentially leave disjoint state -+ * @kbdev: kbase device -+ * @target_katom: atom which is finishing ++ * kbase_ipa_read_hwcnt() - read a counter value ++ * @model_data: pointer to model data ++ * @offset: offset, in bytes, into vinstr buffer + * -+ * Work out whether to leave disjoint state when finishing an atom that was -+ * originated by kbase_job_check_enter_disjoint(). ++ * Return: A 32-bit counter value. Range: 0 < value < 2^27 (worst case would be ++ * incrementing every cycle over a ~100ms sample period at a high frequency, ++ * e.g. 1 GHz: 2^30 * 0.1seconds ~= 2^27. + */ -+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, -+ struct kbase_jd_atom *target_katom); ++static inline u32 kbase_ipa_read_hwcnt( ++ struct kbase_ipa_model_vinstr_data *model_data, ++ u32 offset) ++{ ++ u8 *p = (u8 *)model_data->dump_buf.dump_buf; ++ u64 val = *(u64 *)&p[offset]; + -+#endif /* !MALI_USE_CSF */ ++ return (val > U32_MAX) ? U32_MAX : (u32)val; ++} + -+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event); -+#if !MALI_USE_CSF -+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent); -+#endif /* !MALI_USE_CSF */ -+int kbase_event_pending(struct kbase_context *ctx); -+int kbase_event_init(struct kbase_context *kctx); -+void kbase_event_close(struct kbase_context *kctx); -+void kbase_event_cleanup(struct kbase_context *kctx); -+void kbase_event_wakeup(struct kbase_context *kctx); ++static inline s64 kbase_ipa_add_saturate(s64 a, s64 b) ++{ ++ s64 rtn; + -+/** -+ * kbasep_jit_alloc_validate() - Validate the JIT allocation info. -+ * -+ * @kctx: Pointer to the kbase context within which the JIT -+ * allocation is to be validated. -+ * @info: Pointer to struct @base_jit_alloc_info -+ * which is to be validated. -+ * Return: 0 if jit allocation is valid; negative error code otherwise -+ */ -+int kbasep_jit_alloc_validate(struct kbase_context *kctx, -+ struct base_jit_alloc_info *info); ++ if (a > 0 && (S64_MAX - a) < b) ++ rtn = S64_MAX; ++ else if (a < 0 && (S64_MIN - a) > b) ++ rtn = S64_MIN; ++ else ++ rtn = a + b; + -+/** -+ * kbase_jit_retry_pending_alloc() - Retry blocked just-in-time memory -+ * allocations. -+ * -+ * @kctx: Pointer to the kbase context within which the just-in-time -+ * memory allocations are to be retried. -+ */ -+void kbase_jit_retry_pending_alloc(struct kbase_context *kctx); ++ return rtn; ++} + -+/** -+ * kbase_free_user_buffer() - Free memory allocated for struct -+ * @kbase_debug_copy_buffer. -+ * -+ * @buffer: Pointer to the memory location allocated for the object -+ * of the type struct @kbase_debug_copy_buffer. -+ */ -+static inline void kbase_free_user_buffer( -+ struct kbase_debug_copy_buffer *buffer) ++s64 kbase_ipa_sum_all_shader_cores( ++ struct kbase_ipa_model_vinstr_data *model_data, ++ s32 coeff, u32 counter) +{ -+ struct page **pages = buffer->extres_pages; -+ int nr_pages = buffer->nr_extres_pages; -+ -+ if (pages) { -+ int i; ++ struct kbase_device *kbdev = model_data->kbdev; ++ u64 core_mask; ++ u32 base = 0; ++ s64 ret = 0; + -+ for (i = 0; i < nr_pages; i++) { -+ struct page *pg = pages[i]; ++ core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; ++ while (core_mask != 0ull) { ++ if ((core_mask & 1ull) != 0ull) { ++ /* 0 < counter_value < 2^27 */ ++ u32 counter_value = kbase_ipa_read_hwcnt(model_data, ++ base + counter); + -+ if (pg) -+ put_page(pg); ++ /* 0 < ret < 2^27 * max_num_cores = 2^32 */ ++ ret = kbase_ipa_add_saturate(ret, counter_value); + } -+ kfree(pages); ++ base += KBASE_IPA_NR_BYTES_PER_BLOCK; ++ core_mask >>= 1; + } -+} + -+#if !MALI_USE_CSF -+int kbase_process_soft_job(struct kbase_jd_atom *katom); -+int kbase_prepare_soft_job(struct kbase_jd_atom *katom); -+void kbase_finish_soft_job(struct kbase_jd_atom *katom); -+void kbase_cancel_soft_job(struct kbase_jd_atom *katom); -+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev); -+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom); -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom); -+#endif -+int kbase_soft_event_update(struct kbase_context *kctx, -+ u64 event, -+ unsigned char new_status); ++ /* Range: -2^54 < ret * coeff < 2^54 */ ++ return ret * coeff; ++} + -+void kbasep_soft_job_timeout_worker(struct timer_list *timer); -+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt); -+#endif /* !MALI_USE_CSF */ ++s64 kbase_ipa_sum_all_memsys_blocks( ++ struct kbase_ipa_model_vinstr_data *model_data, ++ s32 coeff, u32 counter) ++{ ++ struct kbase_device *kbdev = model_data->kbdev; ++ const u32 num_blocks = kbdev->gpu_props.props.l2_props.num_l2_slices; ++ u32 base = 0; ++ s64 ret = 0; ++ u32 i; + -+void kbasep_as_do_poke(struct work_struct *work); ++ for (i = 0; i < num_blocks; i++) { ++ /* 0 < counter_value < 2^27 */ ++ u32 counter_value = kbase_ipa_read_hwcnt(model_data, ++ base + counter); + -+/** -+ * kbase_pm_is_suspending - Check whether a system suspend is in progress, -+ * or has already been suspended -+ * -+ * @kbdev: The kbase device structure for the device -+ * -+ * The caller should ensure that either kbdev->pm.active_count_lock is held, or -+ * a dmb was executed recently (to ensure the value is most -+ * up-to-date). However, without a lock the value could change afterwards. -+ * -+ * Return: -+ * * false if a suspend is not in progress -+ * * !=false otherwise -+ */ -+static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev) -+{ -+ return kbdev->pm.suspending; -+} ++ /* 0 < ret < 2^27 * max_num_memsys_blocks = 2^29 */ ++ ret = kbase_ipa_add_saturate(ret, counter_value); ++ base += KBASE_IPA_NR_BYTES_PER_BLOCK; ++ } + -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+/* -+ * Check whether a gpu lost is in progress -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Indicates whether a gpu lost has been received and jobs are no longer -+ * being scheduled -+ * -+ * Return: false if gpu is lost -+ * Return: != false otherwise -+ */ -+static inline bool kbase_pm_is_gpu_lost(struct kbase_device *kbdev) -+{ -+ return (atomic_read(&kbdev->pm.gpu_lost) == 0 ? false : true); ++ /* Range: -2^51 < ret * coeff < 2^51 */ ++ return ret * coeff; +} + -+/* -+ * Set or clear gpu lost state -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @gpu_lost: true to activate GPU lost state, FALSE is deactive it -+ * -+ * Puts power management code into gpu lost state or takes it out of the -+ * state. Once in gpu lost state new GPU jobs will no longer be -+ * scheduled. -+ */ -+static inline void kbase_pm_set_gpu_lost(struct kbase_device *kbdev, -+ bool gpu_lost) ++s64 kbase_ipa_single_counter( ++ struct kbase_ipa_model_vinstr_data *model_data, ++ s32 coeff, u32 counter) +{ -+ const int new_val = (gpu_lost ? 1 : 0); -+ const int cur_val = atomic_xchg(&kbdev->pm.gpu_lost, new_val); ++ /* Range: 0 < counter_value < 2^27 */ ++ const u32 counter_value = kbase_ipa_read_hwcnt(model_data, counter); + -+ if (new_val != cur_val) -+ KBASE_KTRACE_ADD(kbdev, ARB_GPU_LOST, NULL, new_val); ++ /* Range: -2^49 < ret < 2^49 */ ++ return counter_value * (s64) coeff; +} -+#endif + -+/** -+ * kbase_pm_is_active - Determine whether the GPU is active -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This takes into account whether there is an active context reference. -+ * -+ * Return: true if the GPU is active, false otherwise -+ */ -+static inline bool kbase_pm_is_active(struct kbase_device *kbdev) ++int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) +{ -+ return kbdev->pm.active_count > 0; -+} -+ -+/** -+ * kbase_pm_lowest_gpu_freq_init() - Find the lowest frequency that the GPU can -+ * run as using the device tree, and save this -+ * within kbdev. -+ * @kbdev: Pointer to kbase device. -+ * -+ * This function could be called from kbase_clk_rate_trace_manager_init, -+ * but is left separate as it can be called as soon as -+ * dev_pm_opp_of_add_table() has been called to initialize the OPP table, -+ * which occurs in power_control_init(). -+ * -+ * Return: 0 in any case. -+ */ -+int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev); ++ int errcode; ++ struct kbase_device *kbdev = model_data->kbdev; ++ struct kbase_hwcnt_virtualizer *hvirt = kbdev->hwcnt_gpu_virt; ++ struct kbase_hwcnt_enable_map enable_map; ++ const struct kbase_hwcnt_metadata *metadata = ++ kbase_hwcnt_virtualizer_metadata(hvirt); + -+/** -+ * kbase_pm_metrics_start - Start the utilization metrics timer -+ * @kbdev: Pointer to the kbase device for which to start the utilization -+ * metrics calculation thread. -+ * -+ * Start the timer that drives the metrics calculation, runs the custom DVFS. -+ */ -+void kbase_pm_metrics_start(struct kbase_device *kbdev); ++ if (!metadata) ++ return -1; + -+/** -+ * kbase_pm_metrics_stop - Stop the utilization metrics timer -+ * @kbdev: Pointer to the kbase device for which to stop the utilization -+ * metrics calculation thread. -+ * -+ * Stop the timer that drives the metrics calculation, runs the custom DVFS. -+ */ -+void kbase_pm_metrics_stop(struct kbase_device *kbdev); ++ errcode = kbase_hwcnt_enable_map_alloc(metadata, &enable_map); ++ if (errcode) { ++ dev_err(kbdev->dev, "Failed to allocate IPA enable map"); ++ return errcode; ++ } + -+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) -+/** -+ * kbase_pm_handle_runtime_suspend - Handle the runtime suspend of GPU -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This function is called from the runtime suspend callback function for -+ * saving the HW state and powering down GPU, if GPU was in sleep state mode. -+ * It does the following steps -+ * - Powers up the L2 cache and re-activates the MCU. -+ * - Suspend the CSGs -+ * - Halts the MCU -+ * - Powers down the L2 cache. -+ * - Invokes the power_off callback to power down the GPU. -+ * -+ * Return: 0 if the GPU was already powered down or no error was encountered -+ * in the power down, otherwise an error code. -+ */ -+int kbase_pm_handle_runtime_suspend(struct kbase_device *kbdev); ++ kbase_hwcnt_enable_map_enable_all(&enable_map); + -+/** -+ * kbase_pm_force_mcu_wakeup_after_sleep - Force the wake up of MCU from sleep -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This function forces the wake up of MCU from sleep state and wait for -+ * MCU to become active. -+ * It usually gets called from the runtime suspend callback function. -+ * It also gets called from the GPU reset handler or at the time of system -+ * suspend or when User tries to terminate/suspend the on-slot group. -+ * -+ * Note: @gpu_wakeup_override flag that forces the reactivation of MCU is -+ * set by this function and it is the caller's responsibility to -+ * clear the flag. -+ * -+ * Return: 0 if the wake up was successful. -+ */ -+int kbase_pm_force_mcu_wakeup_after_sleep(struct kbase_device *kbdev); -+#endif ++ /* Disable cycle counter only. */ ++ enable_map.clk_enable_map = 0; + -+#if !MALI_USE_CSF -+/** -+ * kbase_jd_atom_id - Return the atom's ID, as was originally supplied by userspace in -+ * base_jd_atom::atom_number -+ * @kctx: KBase context pointer -+ * @katom: Atome for which to return ID -+ * -+ * Return: the atom's ID. -+ */ -+static inline int kbase_jd_atom_id(struct kbase_context *kctx, -+ const struct kbase_jd_atom *katom) -+{ -+ int result; ++ errcode = kbase_hwcnt_virtualizer_client_create( ++ hvirt, &enable_map, &model_data->hvirt_cli); ++ kbase_hwcnt_enable_map_free(&enable_map); ++ if (errcode) { ++ dev_err(kbdev->dev, "Failed to register IPA with virtualizer"); ++ model_data->hvirt_cli = NULL; ++ return errcode; ++ } + -+ KBASE_DEBUG_ASSERT(kctx); -+ KBASE_DEBUG_ASSERT(katom); -+ KBASE_DEBUG_ASSERT(katom->kctx == kctx); ++ errcode = kbase_hwcnt_dump_buffer_alloc( ++ metadata, &model_data->dump_buf); ++ if (errcode) { ++ dev_err(kbdev->dev, "Failed to allocate IPA dump buffer"); ++ kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli); ++ model_data->hvirt_cli = NULL; ++ return errcode; ++ } + -+ result = katom - &kctx->jctx.atoms[0]; -+ KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT); -+ return result; ++ return 0; +} + -+/** -+ * kbase_jd_atom_from_id - Return the atom structure for the given atom ID -+ * @kctx: Context pointer -+ * @id: ID of atom to retrieve -+ * -+ * Return: Pointer to struct kbase_jd_atom associated with the supplied ID -+ */ -+static inline struct kbase_jd_atom *kbase_jd_atom_from_id( -+ struct kbase_context *kctx, int id) ++void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) +{ -+ return &kctx->jctx.atoms[id]; ++ if (model_data->hvirt_cli) { ++ kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli); ++ kbase_hwcnt_dump_buffer_free(&model_data->dump_buf); ++ model_data->hvirt_cli = NULL; ++ } +} -+#endif /* !MALI_USE_CSF */ -+ -+/** -+ * kbase_disjoint_init - Initialize the disjoint state -+ * -+ * @kbdev: The kbase device -+ * -+ * The disjoint event count and state are both set to zero. -+ * -+ * Disjoint functions usage: -+ * -+ * The disjoint event count should be incremented whenever a disjoint event occurs. -+ * -+ * There are several cases which are regarded as disjoint behavior. Rather than just increment -+ * the counter during disjoint events we also increment the counter when jobs may be affected -+ * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state. -+ * -+ * Disjoint state is entered during GPU reset. Increasing the disjoint state also increases -+ * the count of disjoint events. -+ * -+ * The disjoint state is then used to increase the count of disjoint events during job submission -+ * and job completion. Any atom submitted or completed while the disjoint state is greater than -+ * zero is regarded as a disjoint event. -+ * -+ * The disjoint event counter is also incremented immediately whenever a job is soft stopped -+ * and during context creation. -+ * -+ * Return: 0 on success and non-zero value on failure. -+ */ -+void kbase_disjoint_init(struct kbase_device *kbdev); + -+/** -+ * kbase_disjoint_event - Increase the count of disjoint events -+ * called when a disjoint event has happened -+ * -+ * @kbdev: The kbase device -+ */ -+void kbase_disjoint_event(struct kbase_device *kbdev); ++int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) ++{ ++ struct kbase_ipa_model_vinstr_data *model_data = ++ (struct kbase_ipa_model_vinstr_data *)model->model_data; ++ s64 energy = 0; ++ size_t i; ++ u64 coeff = 0, coeff_mul = 0; ++ u64 start_ts_ns, end_ts_ns; ++ u32 active_cycles; ++ int err = 0; + -+/** -+ * kbase_disjoint_event_potential - Increase the count of disjoint events -+ * only if the GPU is in a disjoint state -+ * -+ * @kbdev: The kbase device -+ * -+ * This should be called when something happens which could be disjoint if the GPU -+ * is in a disjoint state. The state refcount keeps track of this. -+ */ -+void kbase_disjoint_event_potential(struct kbase_device *kbdev); ++ err = kbase_hwcnt_virtualizer_client_dump(model_data->hvirt_cli, ++ &start_ts_ns, &end_ts_ns, &model_data->dump_buf); ++ if (err) ++ goto err0; + -+/** -+ * kbase_disjoint_event_get - Returns the count of disjoint events -+ * -+ * @kbdev: The kbase device -+ * Return: the count of disjoint events -+ */ -+u32 kbase_disjoint_event_get(struct kbase_device *kbdev); ++ /* Range: 0 (GPU not used at all), to the max sampling interval, say ++ * 1s, * max GPU frequency (GPU 100% utilized). ++ * 0 <= active_cycles <= 1 * ~2GHz ++ * 0 <= active_cycles < 2^31 ++ */ ++ active_cycles = model_data->get_active_cycles(model_data); + -+/** -+ * kbase_disjoint_state_up - Increment the refcount state indicating that -+ * the GPU is in a disjoint state. -+ * -+ * @kbdev: The kbase device -+ * -+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event) -+ * eventually after the disjoint state has completed @ref kbase_disjoint_state_down -+ * should be called -+ */ -+void kbase_disjoint_state_up(struct kbase_device *kbdev); ++ if (active_cycles < (u32) max(model_data->min_sample_cycles, 0)) { ++ err = -ENODATA; ++ goto err0; ++ } + -+/** -+ * kbase_disjoint_state_down - Decrement the refcount state -+ * -+ * @kbdev: The kbase device -+ * -+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event) -+ * -+ * Called after @ref kbase_disjoint_state_up once the disjoint state is over -+ */ -+void kbase_disjoint_state_down(struct kbase_device *kbdev); ++ /* Range: 1 <= active_cycles < 2^31 */ ++ active_cycles = max(1u, active_cycles); + -+/** -+ * kbase_device_pcm_dev_init() - Initialize the priority control manager device -+ * -+ * @kbdev: Pointer to the structure for the kbase device -+ * -+ * Pointer to the priority control manager device is retrieved from the device -+ * tree and a reference is taken on the module implementing the callbacks for -+ * priority control manager operations. -+ * -+ * Return: 0 if successful, or an error code on failure -+ */ -+int kbase_device_pcm_dev_init(struct kbase_device *const kbdev); ++ /* Range of 'energy' is +/- 2^54 * number of IPA groups (~8), so around ++ * -2^57 < energy < 2^57 ++ */ ++ for (i = 0; i < model_data->groups_def_num; i++) { ++ const struct kbase_ipa_group *group = &model_data->groups_def[i]; ++ s32 coeff = model_data->group_values[i]; ++ s64 group_energy = group->op(model_data, coeff, ++ group->counter_block_offset); + -+/** -+ * kbase_device_pcm_dev_term() - Performs priority control manager device -+ * deinitialization. -+ * -+ * @kbdev: Pointer to the structure for the kbase device -+ * -+ * Reference is released on the module implementing the callbacks for priority -+ * control manager operations. -+ */ -+void kbase_device_pcm_dev_term(struct kbase_device *const kbdev); ++ energy = kbase_ipa_add_saturate(energy, group_energy); ++ } + -+/** -+ * KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD - If a job is soft stopped -+ * and the number of contexts is >= this value it is reported as a disjoint event -+ */ -+#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2 ++ /* Range: 0 <= coeff < 2^57 */ ++ if (energy > 0) ++ coeff = energy; + -+#if !defined(UINT64_MAX) -+ #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) -+#endif ++ /* Range: 0 <= coeff < 2^57 (because active_cycles >= 1). However, this ++ * can be constrained further: Counter values can only be increased by ++ * a theoretical maximum of about 64k per clock cycle. Beyond this, ++ * we'd have to sample every 1ms to avoid them overflowing at the ++ * lowest clock frequency (say 100MHz). Therefore, we can write the ++ * range of 'coeff' in terms of active_cycles: ++ * ++ * coeff = SUM(coeffN * counterN * num_cores_for_counterN) ++ * coeff <= SUM(coeffN * counterN) * max_num_cores ++ * coeff <= num_IPA_groups * max_coeff * max_counter * max_num_cores ++ * (substitute max_counter = 2^16 * active_cycles) ++ * coeff <= num_IPA_groups * max_coeff * 2^16 * active_cycles * max_num_cores ++ * coeff <= 2^3 * 2^22 * 2^16 * active_cycles * 2^5 ++ * coeff <= 2^46 * active_cycles ++ * ++ * So after the division: 0 <= coeff <= 2^46 ++ */ ++ coeff = div_u64(coeff, active_cycles); + -+#endif -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c -new file mode 100644 -index 000000000..10dbeee02 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c -@@ -0,0 +1,109 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ /* Not all models were derived at the same reference voltage. Voltage ++ * scaling is done by multiplying by V^2, so we need to *divide* by ++ * Vref^2 here. ++ * Range: 0 <= coeff <= 2^49 ++ */ ++ coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); ++ /* Range: 0 <= coeff <= 2^52 */ ++ coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); + -+#include ++ /* Scale by user-specified integer factor. ++ * Range: 0 <= coeff_mul < 2^57 ++ */ ++ coeff_mul = coeff * model_data->scaling_factor; + -+#include -+#include -+#include ++ /* The power models have results with units ++ * mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this ++ * becomes fW/(Hz V^2), which are the units of coeff_mul. However, ++ * kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide ++ * by 1000. ++ * Range: 0 <= coeff_mul < 2^47 ++ */ ++ coeff_mul = div_u64(coeff_mul, 1000u); + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+#ifdef CONFIG_MALI_BIFROST_DEBUG ++err0: ++ /* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */ ++ *coeffp = clamp(coeff_mul, (u64) 0, (u64) 1 << 16); ++ return err; ++} + -+static int kbase_as_fault_read(struct seq_file *sfile, void *data) ++void kbase_ipa_vinstr_reset_data(struct kbase_ipa_model *model) +{ -+ uintptr_t as_no = (uintptr_t) sfile->private; ++ /* Currently not implemented */ ++ WARN_ON_ONCE(1); ++} + -+ struct list_head *entry; -+ const struct list_head *kbdev_list; -+ struct kbase_device *kbdev = NULL; ++int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model, ++ const struct kbase_ipa_group *ipa_groups_def, ++ size_t ipa_group_size, ++ kbase_ipa_get_active_cycles_callback get_active_cycles, ++ s32 reference_voltage) ++{ ++ int err = 0; ++ size_t i; ++ struct kbase_ipa_model_vinstr_data *model_data; + -+ kbdev_list = kbase_device_get_list(); ++ if (!model || !ipa_groups_def || !ipa_group_size || !get_active_cycles) ++ return -EINVAL; + -+ list_for_each(entry, kbdev_list) { -+ kbdev = list_entry(entry, struct kbase_device, entry); ++ model_data = kzalloc(sizeof(*model_data), GFP_KERNEL); ++ if (!model_data) ++ return -ENOMEM; + -+ if (kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) { ++ model_data->kbdev = model->kbdev; ++ model_data->groups_def = ipa_groups_def; ++ model_data->groups_def_num = ipa_group_size; ++ model_data->get_active_cycles = get_active_cycles; + -+ /* don't show this one again until another fault occors */ -+ kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no); ++ model->model_data = (void *) model_data; + -+ /* output the last page fault addr */ -+ seq_printf(sfile, "%llu\n", -+ (u64) kbdev->as[as_no].pf_data.addr); -+ } ++ for (i = 0; i < model_data->groups_def_num; ++i) { ++ const struct kbase_ipa_group *group = &model_data->groups_def[i]; + ++ model_data->group_values[i] = group->default_value; ++ err = kbase_ipa_model_add_param_s32(model, group->name, ++ &model_data->group_values[i], ++ 1, false); ++ if (err) ++ goto exit; + } + -+ kbase_device_put_list(kbdev_list); ++ model_data->scaling_factor = DEFAULT_SCALING_FACTOR; ++ err = kbase_ipa_model_add_param_s32(model, "scale", ++ &model_data->scaling_factor, ++ 1, false); ++ if (err) ++ goto exit; + -+ return 0; -+} ++ model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES; ++ err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles", ++ &model_data->min_sample_cycles, ++ 1, false); ++ if (err) ++ goto exit; + -+static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file) -+{ -+ return single_open(file, kbase_as_fault_read, in->i_private); -+} ++ model_data->reference_voltage = reference_voltage; ++ err = kbase_ipa_model_add_param_s32(model, "reference_voltage", ++ &model_data->reference_voltage, ++ 1, false); ++ if (err) ++ goto exit; + -+static const struct file_operations as_fault_fops = { -+ .owner = THIS_MODULE, -+ .open = kbase_as_fault_debugfs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; ++ err = kbase_ipa_attach_vinstr(model_data); + -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ -+#endif /* CONFIG_DEBUG_FS */ ++exit: ++ if (err) { ++ kbase_ipa_model_param_free_all(model); ++ kfree(model_data); ++ } ++ return err; ++} + -+/* -+ * Initialize debugfs entry for each address space -+ */ -+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev) ++void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model) +{ -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ uint i; -+ char as_name[64]; -+ struct dentry *debugfs_directory; -+ -+ kbdev->debugfs_as_read_bitmap = 0ULL; -+ -+ KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces); -+ KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].pf_data.addr) == sizeof(u64)); -+ -+ debugfs_directory = debugfs_create_dir("address_spaces", -+ kbdev->mali_debugfs_directory); -+ -+ if (IS_ERR_OR_NULL(debugfs_directory)) { -+ dev_warn(kbdev->dev, -+ "unable to create address_spaces debugfs directory"); -+ } else { -+ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { -+ if (likely(scnprintf(as_name, ARRAY_SIZE(as_name), "as%u", i))) -+ debugfs_create_file(as_name, 0444, debugfs_directory, -+ (void *)(uintptr_t)i, &as_fault_fops); -+ } -+ } ++ struct kbase_ipa_model_vinstr_data *model_data = ++ (struct kbase_ipa_model_vinstr_data *)model->model_data; + -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ -+#endif /* CONFIG_DEBUG_FS */ ++ kbase_ipa_detach_vinstr(model_data); ++ kfree(model_data); +} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.h +diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.h b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.h new file mode 100644 -index 000000000..59bbcf6f4 +index 000000000..608961084 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.h -@@ -0,0 +1,48 @@ ++++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.h +@@ -0,0 +1,234 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2016, 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017-2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -201983,80 +203811,229 @@ index 000000000..59bbcf6f4 + * + */ + -+#ifndef _KBASE_AS_FAULT_DEBUG_FS_H -+#define _KBASE_AS_FAULT_DEBUG_FS_H ++#ifndef _KBASE_IPA_COUNTER_COMMON_JM_H_ ++#define _KBASE_IPA_COUNTER_COMMON_JM_H_ + -+/** -+ * kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults -+ * -+ * @kbdev: Pointer to kbase_device -+ */ -+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev); ++#include "mali_kbase.h" ++#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" ++#include "hwcnt/mali_kbase_hwcnt_types.h" + -+/** -+ * kbase_as_fault_debugfs_new() - make the last fault available on debugfs -+ * -+ * @kbdev: Pointer to kbase_device -+ * @as_no: The address space the fault occurred on -+ */ -+static inline void -+kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no) -+{ -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ kbdev->debugfs_as_read_bitmap |= (1ULL << as_no); -+#endif /* CONFIG_DEBUG_FS */ -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ -+} ++/* Maximum number of IPA groups for an IPA model. */ ++#define KBASE_IPA_MAX_GROUP_DEF_NUM 16 + -+#endif /*_KBASE_AS_FAULT_DEBUG_FS_H*/ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_bits.h b/drivers/gpu/arm/bifrost/mali_kbase_bits.h -new file mode 100644 -index 000000000..a085fd86c ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_bits.h -@@ -0,0 +1,31 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++/* Number of bytes per hardware counter in a vinstr_buffer. */ ++#define KBASE_IPA_NR_BYTES_PER_CNT (sizeof(u64)) ++ ++/* Number of hardware counters per block in a vinstr_buffer. */ ++#define KBASE_IPA_NR_CNT_PER_BLOCK 64 ++ ++/* Number of bytes per block in a vinstr_buffer. */ ++#define KBASE_IPA_NR_BYTES_PER_BLOCK \ ++ (KBASE_IPA_NR_CNT_PER_BLOCK * KBASE_IPA_NR_BYTES_PER_CNT) ++ ++struct kbase_ipa_model_vinstr_data; ++ ++typedef u32 ++kbase_ipa_get_active_cycles_callback(struct kbase_ipa_model_vinstr_data *); ++ ++/** ++ * struct kbase_ipa_model_vinstr_data - IPA context per device ++ * @kbdev: pointer to kbase device ++ * @group_values: values of coefficients for IPA groups ++ * @groups_def: Array of IPA groups. ++ * @groups_def_num: Number of elements in the array of IPA groups. ++ * @get_active_cycles: Callback to return number of active cycles during ++ * counter sample period ++ * @hvirt_cli: hardware counter virtualizer client handle ++ * @dump_buf: buffer to dump hardware counters onto ++ * @reference_voltage: voltage, in mV, of the operating point used when ++ * deriving the power model coefficients. Range approx ++ * 0.1V - 5V (~= 8V): 2^7 <= reference_voltage <= 2^13 ++ * @scaling_factor: User-specified power scaling factor. This is an ++ * integer, which is multiplied by the power coefficient ++ * just before OPP scaling. ++ * Range approx 0-32: 0 < scaling_factor < 2^5 ++ * @min_sample_cycles: If the value of the GPU_ACTIVE counter (the number of ++ * cycles the GPU was working) is less than ++ * min_sample_cycles, the counter model will return an ++ * error, causing the IPA framework to approximate using ++ * the cached simple model results instead. This may be ++ * more accurate than extrapolating using a very small ++ * counter dump. ++ */ ++struct kbase_ipa_model_vinstr_data { ++ struct kbase_device *kbdev; ++ s32 group_values[KBASE_IPA_MAX_GROUP_DEF_NUM]; ++ const struct kbase_ipa_group *groups_def; ++ size_t groups_def_num; ++ kbase_ipa_get_active_cycles_callback *get_active_cycles; ++ struct kbase_hwcnt_virtualizer_client *hvirt_cli; ++ struct kbase_hwcnt_dump_buffer dump_buf; ++ s32 reference_voltage; ++ s32 scaling_factor; ++ s32 min_sample_cycles; ++}; ++ ++/** ++ * struct kbase_ipa_group - represents a single IPA group ++ * @name: name of the IPA group ++ * @default_value: default value of coefficient for IPA group. ++ * Coefficients are interpreted as fractions where the ++ * denominator is 1000000. ++ * @op: which operation to be performed on the counter values ++ * @counter_block_offset: block offset in bytes of the counter used to calculate energy for IPA group ++ */ ++struct kbase_ipa_group { ++ const char *name; ++ s32 default_value; ++ s64 (*op)( ++ struct kbase_ipa_model_vinstr_data *model_data, ++ s32 coeff, ++ u32 counter_block_offset); ++ u32 counter_block_offset; ++}; ++ ++/** ++ * kbase_ipa_sum_all_shader_cores() - sum a counter over all cores ++ * @model_data: pointer to model data ++ * @coeff: model coefficient. Unity is ~2^20, so range approx ++ * +/- 4.0: -2^22 < coeff < 2^22 ++ * @counter: offset in bytes of the counter used to calculate energy ++ * for IPA group + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * Calculate energy estimation based on hardware counter `counter' ++ * across all shader cores. + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * Return: Sum of counter values. Range: -2^54 < ret < 2^54 ++ */ ++s64 kbase_ipa_sum_all_shader_cores( ++ struct kbase_ipa_model_vinstr_data *model_data, ++ s32 coeff, u32 counter); ++ ++/** ++ * kbase_ipa_sum_all_memsys_blocks() - sum a counter over all mem system blocks ++ * @model_data: pointer to model data ++ * @coeff: model coefficient. Unity is ~2^20, so range approx ++ * +/- 4.0: -2^22 < coeff < 2^22 ++ * @counter: offset in bytes of the counter used to calculate energy ++ * for IPA group + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * Calculate energy estimation based on hardware counter `counter' across all ++ * memory system blocks. + * ++ * Return: Sum of counter values. Range: -2^51 < ret < 2^51 + */ ++s64 kbase_ipa_sum_all_memsys_blocks( ++ struct kbase_ipa_model_vinstr_data *model_data, ++ s32 coeff, u32 counter); + -+#ifndef _KBASE_BITS_H_ -+#define _KBASE_BITS_H_ ++/** ++ * kbase_ipa_single_counter() - sum a single counter ++ * @model_data: pointer to model data ++ * @coeff: model coefficient. Unity is ~2^20, so range approx ++ * +/- 4.0: -2^22 < coeff < 2^22 ++ * @counter: offset in bytes of the counter used to calculate energy ++ * for IPA group ++ * ++ * Calculate energy estimation based on hardware counter `counter'. ++ * ++ * Return: Counter value. Range: -2^49 < ret < 2^49 ++ */ ++s64 kbase_ipa_single_counter( ++ struct kbase_ipa_model_vinstr_data *model_data, ++ s32 coeff, u32 counter); + -+#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE) -+#include -+#else -+#include -+#endif ++/** ++ * kbase_ipa_attach_vinstr() - attach a vinstr_buffer to an IPA model. ++ * @model_data: pointer to model data ++ * ++ * Attach a vinstr_buffer to an IPA model. The vinstr_buffer ++ * allows access to the hardware counters used to calculate ++ * energy consumption. ++ * ++ * Return: 0 on success, or an error code. ++ */ ++int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data); + -+#endif /* _KBASE_BITS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.c b/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.c ++/** ++ * kbase_ipa_detach_vinstr() - detach a vinstr_buffer from an IPA model. ++ * @model_data: pointer to model data ++ * ++ * Detach a vinstr_buffer from an IPA model. ++ */ ++void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data); ++ ++/** ++ * kbase_ipa_vinstr_dynamic_coeff() - calculate dynamic power based on HW counters ++ * @model: pointer to instantiated model ++ * @coeffp: pointer to location where calculated power, in ++ * pW/(Hz V^2), is stored. ++ * ++ * This is a GPU-agnostic implementation of the get_dynamic_coeff() ++ * function of an IPA model. It relies on the model being populated ++ * with GPU-specific attributes at initialization time. ++ * ++ * Return: 0 on success, or an error code. ++ */ ++int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp); ++ ++/** ++ * kbase_ipa_vinstr_reset_data() - Reset the counters data used for dynamic ++ * power estimation ++ * @model: pointer to instantiated model ++ * ++ * Currently it is not implemented for JM GPUs. ++ * When implemented it is expected to retrieve the accumulated value of HW ++ * counters from the Vinstr component, without doing any processing, which is ++ * effectively a reset as the next call to kbase_ipa_counter_dynamic_coeff() ++ * will see the increment in counter values from this point onwards. ++ */ ++void kbase_ipa_vinstr_reset_data(struct kbase_ipa_model *model); ++ ++/** ++ * kbase_ipa_vinstr_common_model_init() - initialize ipa power model ++ * @model: ipa power model to initialize ++ * @ipa_groups_def: array of ipa groups which sets coefficients for ++ * the corresponding counters used in the ipa model ++ * @ipa_group_size: number of elements in the array @ipa_groups_def ++ * @get_active_cycles: callback to return the number of cycles the GPU was ++ * active during the counter sample period. ++ * @reference_voltage: voltage, in mV, of the operating point used when ++ * deriving the power model coefficients. ++ * ++ * This initialization function performs initialization steps common ++ * for ipa models based on counter values. In each call, the model ++ * passes its specific coefficient values per ipa counter group via ++ * @ipa_groups_def array. ++ * ++ * Return: 0 on success, error code otherwise ++ */ ++int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model, ++ const struct kbase_ipa_group *ipa_groups_def, ++ size_t ipa_group_size, ++ kbase_ipa_get_active_cycles_callback *get_active_cycles, ++ s32 reference_voltage); ++ ++/** ++ * kbase_ipa_vinstr_common_model_term() - terminate ipa power model ++ * @model: ipa power model to terminate ++ * ++ * This function performs all necessary steps to terminate ipa power model ++ * including clean up of resources allocated to hold model data. ++ */ ++void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model); ++ ++#endif /* _KBASE_IPA_COUNTER_COMMON_JM_H_ */ +diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c new file mode 100644 -index 000000000..4675025ba +index 000000000..21b4e5288 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.c -@@ -0,0 +1,64 @@ ++++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c +@@ -0,0 +1,280 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -202074,59 +204051,275 @@ index 000000000..4675025ba + * + */ + -+/* -+ * Cache Policy API. -+ */ ++#include "mali_kbase_ipa_counter_common_csf.h" ++#include "mali_kbase.h" + -+#include "mali_kbase_cache_policy.h" ++/* MEMSYS counter block offsets */ ++#define L2_RD_MSG_IN_CU (13) ++#define L2_RD_MSG_IN (16) ++#define L2_WR_MSG_IN (18) ++#define L2_SNP_MSG_IN (20) ++#define L2_RD_MSG_OUT (22) ++#define L2_READ_LOOKUP (26) ++#define L2_EXT_READ_NOSNP (30) ++#define L2_EXT_WRITE_NOSNP_FULL (43) + -+/* -+ * The output flags should be a combination of the following values: -+ * KBASE_REG_CPU_CACHED: CPU cache should be enabled -+ * KBASE_REG_GPU_CACHED: GPU cache should be enabled ++/* SC counter block offsets */ ++#define FRAG_STARVING (8) ++#define FRAG_PARTIAL_QUADS_RAST (10) ++#define FRAG_QUADS_EZS_UPDATE (13) ++#define FULL_QUAD_WARPS (21) ++#define EXEC_INSTR_FMA (27) ++#define EXEC_INSTR_CVT (28) ++#define EXEC_INSTR_SFU (29) ++#define EXEC_INSTR_MSG (30) ++#define TEX_FILT_NUM_OPS (39) ++#define LS_MEM_READ_SHORT (45) ++#define LS_MEM_WRITE_SHORT (47) ++#define VARY_SLOT_16 (51) ++#define BEATS_RD_LSC_EXT (57) ++#define BEATS_RD_TEX (58) ++#define BEATS_RD_TEX_EXT (59) ++#define FRAG_QUADS_COARSE (68) ++ ++/* Tiler counter block offsets */ ++#define IDVS_POS_SHAD_STALL (23) ++#define PREFETCH_STALL (25) ++#define VFETCH_POS_READ_WAIT (29) ++#define VFETCH_VERTEX_WAIT (30) ++#define PRIMASSY_STALL (32) ++#define IDVS_VAR_SHAD_STALL (38) ++#define ITER_STALL (40) ++#define PMGR_PTR_RD_STALL (48) ++ ++#define COUNTER_DEF(cnt_name, coeff, cnt_idx, block_type) \ ++ { \ ++ .name = cnt_name, \ ++ .coeff_default_value = coeff, \ ++ .counter_block_offset = cnt_idx, \ ++ .counter_block_type = block_type, \ ++ } ++ ++#define MEMSYS_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ ++ COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_MEMSYS) ++ ++#define SC_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ ++ COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_SHADER) ++ ++#define TILER_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ ++ COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_TILER) ++ ++/* Tables of description of HW counters used by IPA counter model. + * -+ * NOTE: Some components within the GPU might only be able to access memory -+ * that is KBASE_REG_GPU_CACHED. Refer to the specific GPU implementation for -+ * more details. ++ * These tables provide a description of each performance counter ++ * used by the top level counter model for energy estimation. + */ -+u32 kbase_cache_enabled(u32 flags, u32 nr_pages) -+{ -+ u32 cache_flags = 0; ++static const struct kbase_ipa_counter ipa_top_level_cntrs_def_todx[] = { ++ MEMSYS_COUNTER_DEF("l2_rd_msg_in", 295631, L2_RD_MSG_IN), ++ MEMSYS_COUNTER_DEF("l2_ext_write_nosnp_ull", 325168, L2_EXT_WRITE_NOSNP_FULL), + -+ CSTD_UNUSED(nr_pages); ++ TILER_COUNTER_DEF("prefetch_stall", 145435, PREFETCH_STALL), ++ TILER_COUNTER_DEF("idvs_var_shad_stall", -171917, IDVS_VAR_SHAD_STALL), ++ TILER_COUNTER_DEF("idvs_pos_shad_stall", 109980, IDVS_POS_SHAD_STALL), ++ TILER_COUNTER_DEF("vfetch_pos_read_wait", -119118, VFETCH_POS_READ_WAIT), ++}; + -+ if (!(flags & BASE_MEM_UNCACHED_GPU)) -+ cache_flags |= KBASE_REG_GPU_CACHED; ++static const struct kbase_ipa_counter ipa_top_level_cntrs_def_tgrx[] = { ++ MEMSYS_COUNTER_DEF("l2_rd_msg_in", 295631, L2_RD_MSG_IN), ++ MEMSYS_COUNTER_DEF("l2_ext_write_nosnp_ull", 325168, L2_EXT_WRITE_NOSNP_FULL), + -+ if (flags & BASE_MEM_CACHED_CPU) -+ cache_flags |= KBASE_REG_CPU_CACHED; ++ TILER_COUNTER_DEF("prefetch_stall", 145435, PREFETCH_STALL), ++ TILER_COUNTER_DEF("idvs_var_shad_stall", -171917, IDVS_VAR_SHAD_STALL), ++ TILER_COUNTER_DEF("idvs_pos_shad_stall", 109980, IDVS_POS_SHAD_STALL), ++ TILER_COUNTER_DEF("vfetch_pos_read_wait", -119118, VFETCH_POS_READ_WAIT), ++}; + -+ return cache_flags; -+} ++static const struct kbase_ipa_counter ipa_top_level_cntrs_def_tvax[] = { ++ MEMSYS_COUNTER_DEF("l2_rd_msg_out", 491414, L2_RD_MSG_OUT), ++ MEMSYS_COUNTER_DEF("l2_wr_msg_in", 408645, L2_WR_MSG_IN), + -+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, size_t size, -+ enum dma_data_direction dir) ++ TILER_COUNTER_DEF("iter_stall", 893324, ITER_STALL), ++ TILER_COUNTER_DEF("pmgr_ptr_rd_stall", -975117, PMGR_PTR_RD_STALL), ++ TILER_COUNTER_DEF("idvs_pos_shad_stall", 22555, IDVS_POS_SHAD_STALL), ++}; ++ ++static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttux[] = { ++ MEMSYS_COUNTER_DEF("l2_rd_msg_in", 800836, L2_RD_MSG_IN), ++ MEMSYS_COUNTER_DEF("l2_wr_msg_in", 415579, L2_WR_MSG_IN), ++ MEMSYS_COUNTER_DEF("l2_read_lookup", -198124, L2_READ_LOOKUP), ++ ++ TILER_COUNTER_DEF("idvs_pos_shad_stall", 117358, IDVS_POS_SHAD_STALL), ++ TILER_COUNTER_DEF("vfetch_vertex_wait", -391964, VFETCH_VERTEX_WAIT), ++}; ++ ++static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttix[] = { ++ TILER_COUNTER_DEF("primassy_stall", 471953, PRIMASSY_STALL), ++ TILER_COUNTER_DEF("idvs_var_shad_stall", -460559, IDVS_VAR_SHAD_STALL), ++ ++ MEMSYS_COUNTER_DEF("l2_rd_msg_in_cu", -6189604, L2_RD_MSG_IN_CU), ++ MEMSYS_COUNTER_DEF("l2_snp_msg_in", 6289609, L2_SNP_MSG_IN), ++ MEMSYS_COUNTER_DEF("l2_ext_read_nosnp", 512341, L2_EXT_READ_NOSNP), ++}; ++ ++/* These tables provide a description of each performance counter ++ * used by the shader cores counter model for energy estimation. ++ */ ++static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_todx[] = { ++ SC_COUNTER_DEF("exec_instr_fma", 505449, EXEC_INSTR_FMA), ++ SC_COUNTER_DEF("tex_filt_num_operations", 574869, TEX_FILT_NUM_OPS), ++ SC_COUNTER_DEF("ls_mem_read_short", 60917, LS_MEM_READ_SHORT), ++ SC_COUNTER_DEF("frag_quads_ezs_update", 694555, FRAG_QUADS_EZS_UPDATE), ++ SC_COUNTER_DEF("ls_mem_write_short", 698290, LS_MEM_WRITE_SHORT), ++ SC_COUNTER_DEF("vary_slot_16", 181069, VARY_SLOT_16), ++}; ++ ++static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_tgrx[] = { ++ SC_COUNTER_DEF("exec_instr_fma", 505449, EXEC_INSTR_FMA), ++ SC_COUNTER_DEF("tex_filt_num_operations", 574869, TEX_FILT_NUM_OPS), ++ SC_COUNTER_DEF("ls_mem_read_short", 60917, LS_MEM_READ_SHORT), ++ SC_COUNTER_DEF("frag_quads_ezs_update", 694555, FRAG_QUADS_EZS_UPDATE), ++ SC_COUNTER_DEF("ls_mem_write_short", 698290, LS_MEM_WRITE_SHORT), ++ SC_COUNTER_DEF("vary_slot_16", 181069, VARY_SLOT_16), ++}; ++ ++static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_tvax[] = { ++ SC_COUNTER_DEF("tex_filt_num_operations", 142536, TEX_FILT_NUM_OPS), ++ SC_COUNTER_DEF("exec_instr_fma", 243497, EXEC_INSTR_FMA), ++ SC_COUNTER_DEF("exec_instr_msg", 1344410, EXEC_INSTR_MSG), ++ SC_COUNTER_DEF("vary_slot_16", -119612, VARY_SLOT_16), ++ SC_COUNTER_DEF("frag_partial_quads_rast", 676201, FRAG_PARTIAL_QUADS_RAST), ++ SC_COUNTER_DEF("frag_starving", 62421, FRAG_STARVING), ++}; ++ ++static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = { ++ SC_COUNTER_DEF("exec_instr_fma", 457012, EXEC_INSTR_FMA), ++ SC_COUNTER_DEF("tex_filt_num_operations", 441911, TEX_FILT_NUM_OPS), ++ SC_COUNTER_DEF("ls_mem_read_short", 322525, LS_MEM_READ_SHORT), ++ SC_COUNTER_DEF("full_quad_warps", 844124, FULL_QUAD_WARPS), ++ SC_COUNTER_DEF("exec_instr_cvt", 226411, EXEC_INSTR_CVT), ++ SC_COUNTER_DEF("frag_quads_ezs_update", 372032, FRAG_QUADS_EZS_UPDATE), ++}; ++ ++static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttix[] = { ++ SC_COUNTER_DEF("exec_instr_fma", 192642, EXEC_INSTR_FMA), ++ SC_COUNTER_DEF("exec_instr_msg", 1326465, EXEC_INSTR_MSG), ++ SC_COUNTER_DEF("beats_rd_tex", 163518, BEATS_RD_TEX), ++ SC_COUNTER_DEF("beats_rd_lsc_ext", 127475, BEATS_RD_LSC_EXT), ++ SC_COUNTER_DEF("frag_quads_coarse", -36247, FRAG_QUADS_COARSE), ++ SC_COUNTER_DEF("ls_mem_write_short", 51547, LS_MEM_WRITE_SHORT), ++ SC_COUNTER_DEF("beats_rd_tex_ext", -43370, BEATS_RD_TEX_EXT), ++ SC_COUNTER_DEF("exec_instr_sfu", 31583, EXEC_INSTR_SFU), ++}; ++ ++#define IPA_POWER_MODEL_OPS(gpu, init_token) \ ++ const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \ ++ .name = "mali-" #gpu "-power-model", \ ++ .init = kbase_ ## init_token ## _power_model_init, \ ++ .term = kbase_ipa_counter_common_model_term, \ ++ .get_dynamic_coeff = kbase_ipa_counter_dynamic_coeff, \ ++ .reset_counter_data = kbase_ipa_counter_reset_data, \ ++ }; \ ++ KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops) ++ ++#define STANDARD_POWER_MODEL(gpu, reference_voltage) \ ++ static int kbase_ ## gpu ## _power_model_init(\ ++ struct kbase_ipa_model *model) \ ++ { \ ++ BUILD_BUG_ON((1 + \ ++ ARRAY_SIZE(ipa_top_level_cntrs_def_ ## gpu) +\ ++ ARRAY_SIZE(ipa_shader_core_cntrs_def_ ## gpu)) > \ ++ KBASE_IPA_MAX_COUNTER_DEF_NUM); \ ++ return kbase_ipa_counter_common_model_init(model, \ ++ ipa_top_level_cntrs_def_ ## gpu, \ ++ ARRAY_SIZE(ipa_top_level_cntrs_def_ ## gpu), \ ++ ipa_shader_core_cntrs_def_ ## gpu, \ ++ ARRAY_SIZE(ipa_shader_core_cntrs_def_ ## gpu), \ ++ (reference_voltage)); \ ++ } \ ++ IPA_POWER_MODEL_OPS(gpu, gpu) ++ ++ ++#define ALIAS_POWER_MODEL(gpu, as_gpu) \ ++ IPA_POWER_MODEL_OPS(gpu, as_gpu) ++ ++/* Reference voltage value is 750 mV. */ ++STANDARD_POWER_MODEL(todx, 750); ++STANDARD_POWER_MODEL(tgrx, 750); ++STANDARD_POWER_MODEL(tvax, 750); ++STANDARD_POWER_MODEL(ttux, 750); ++/* Reference voltage value is 550 mV. */ ++STANDARD_POWER_MODEL(ttix, 550); ++ ++/* Assuming LODX is an alias of TODX for IPA */ ++ALIAS_POWER_MODEL(lodx, todx); ++ ++/* Assuming LTUX is an alias of TTUX for IPA */ ++ALIAS_POWER_MODEL(ltux, ttux); ++ ++/* Assuming LTUX is an alias of TTUX for IPA */ ++ALIAS_POWER_MODEL(ltix, ttix); ++ ++static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = { ++ &kbase_todx_ipa_model_ops, &kbase_lodx_ipa_model_ops, ++ &kbase_tgrx_ipa_model_ops, &kbase_tvax_ipa_model_ops, ++ &kbase_ttux_ipa_model_ops, &kbase_ltux_ipa_model_ops, ++ &kbase_ttix_ipa_model_ops, &kbase_ltix_ipa_model_ops, ++}; ++ ++const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( ++ struct kbase_device *kbdev, const char *name) +{ -+ dma_sync_single_for_device(kbdev->dev, handle, size, dir); ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(ipa_counter_model_ops); ++i) { ++ const struct kbase_ipa_model_ops *ops = ++ ipa_counter_model_ops[i]; ++ ++ if (!strcmp(ops->name, name)) ++ return ops; ++ } ++ ++ dev_err(kbdev->dev, "power model \'%s\' not found\n", name); ++ ++ return NULL; +} -+KBASE_EXPORT_TEST_API(kbase_sync_single_for_device); + -+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size, -+ enum dma_data_direction dir) ++const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id) +{ -+ dma_sync_single_for_cpu(kbdev->dev, handle, size, dir); ++ const u32 prod_id = ++ (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++ ++ switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { ++ case GPU_ID2_PRODUCT_TODX: ++ return "mali-todx-power-model"; ++ case GPU_ID2_PRODUCT_LODX: ++ return "mali-lodx-power-model"; ++ case GPU_ID2_PRODUCT_TGRX: ++ return "mali-tgrx-power-model"; ++ case GPU_ID2_PRODUCT_TVAX: ++ return "mali-tvax-power-model"; ++ case GPU_ID2_PRODUCT_TTUX: ++ return "mali-ttux-power-model"; ++ case GPU_ID2_PRODUCT_LTUX: ++ return "mali-ltux-power-model"; ++ case GPU_ID2_PRODUCT_TTIX: ++ return "mali-ttix-power-model"; ++ case GPU_ID2_PRODUCT_LTIX: ++ return "mali-ltix-power-model"; ++ default: ++ return NULL; ++ } +} -+KBASE_EXPORT_TEST_API(kbase_sync_single_for_cpu); -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.h b/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.h +diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c new file mode 100644 -index 000000000..598d24538 +index 000000000..2092db042 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.h -@@ -0,0 +1,47 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c +@@ -0,0 +1,561 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2012-2013, 2015, 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2016-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -202144,118 +204337,556 @@ index 000000000..598d24538 + * + */ + -+/* -+ * Cache Policy API. -+ */ -+ -+#ifndef _KBASE_CACHE_POLICY_H_ -+#define _KBASE_CACHE_POLICY_H_ ++#include + ++#include "mali_kbase_ipa_counter_common_jm.h" +#include "mali_kbase.h" -+#include ++#include ++ ++/* Performance counter blocks base offsets */ ++#define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK) ++#define MEMSYS_BASE (2 * KBASE_IPA_NR_BYTES_PER_BLOCK) ++ ++/* JM counter block offsets */ ++#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 6) ++ ++/* MEMSYS counter block offsets */ ++#define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25) ++ ++/* SC counter block offsets */ ++#define SC_EXEC_INSTR_FMA (KBASE_IPA_NR_BYTES_PER_CNT * 27) ++#define SC_EXEC_INSTR_COUNT (KBASE_IPA_NR_BYTES_PER_CNT * 28) ++#define SC_EXEC_INSTR_MSG (KBASE_IPA_NR_BYTES_PER_CNT * 30) ++#define SC_TEX_FILT_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 39) ++#define SC_TEX_COORD_ISSUE (KBASE_IPA_NR_BYTES_PER_CNT * 40) ++#define SC_TEX_TFCH_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 42) ++#define SC_VARY_INSTR (KBASE_IPA_NR_BYTES_PER_CNT * 49) ++#define SC_BEATS_WR_TIB (KBASE_IPA_NR_BYTES_PER_CNT * 62) + +/** -+ * kbase_cache_enabled - Choose the cache policy for a specific region -+ * @flags: flags describing attributes of the region -+ * @nr_pages: total number of pages (backed or not) for the region -+ * -+ * Tells whether the CPU and GPU caches should be enabled or not for a specific -+ * region. -+ * This function can be modified to customize the cache policy depending on the -+ * flags and size of the region. ++ * kbase_g7x_power_model_get_jm_counter() - get performance counter offset ++ * inside the Job Manager block ++ * @model_data: pointer to GPU model data. ++ * @counter_block_offset: offset in bytes of the performance counter inside ++ * the Job Manager block. + * -+ * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED -+ * depending on the cache policy ++ * Return: Block offset in bytes of the required performance counter. + */ -+u32 kbase_cache_enabled(u32 flags, u32 nr_pages); ++static u32 kbase_g7x_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_data *model_data, ++ u32 counter_block_offset) ++{ ++ return JM_BASE + counter_block_offset; ++} + -+#endif /* _KBASE_CACHE_POLICY_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_caps.h b/drivers/gpu/arm/bifrost/mali_kbase_caps.h -new file mode 100644 -index 000000000..a92569d31 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_caps.h -@@ -0,0 +1,70 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++/** ++ * kbase_g7x_power_model_get_memsys_counter() - get performance counter offset ++ * inside the Memory System block ++ * @model_data: pointer to GPU model data. ++ * @counter_block_offset: offset in bytes of the performance counter inside ++ * the (first) Memory System block. + * ++ * Return: Block offset in bytes of the required performance counter. + */ ++static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinstr_data *model_data, ++ u32 counter_block_offset) ++{ ++ /* The base address of Memory System performance counters is always the same, although their number ++ * may vary based on the number of cores. For the moment it's ok to return a constant. ++ */ ++ return MEMSYS_BASE + counter_block_offset; ++} + +/** -+ * DOC: Driver Capability Queries. ++ * kbase_g7x_power_model_get_sc_counter() - get performance counter offset ++ * inside the Shader Cores block ++ * @model_data: pointer to GPU model data. ++ * @counter_block_offset: offset in bytes of the performance counter inside ++ * the (first) Shader Cores block. ++ * ++ * Return: Block offset in bytes of the required performance counter. + */ ++static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data, ++ u32 counter_block_offset) ++{ ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ const u32 sc_base = MEMSYS_BASE + ++ (KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS * ++ KBASE_IPA_NR_BYTES_PER_BLOCK); ++#else ++ const u32 sc_base = MEMSYS_BASE + ++ (model_data->kbdev->gpu_props.props.l2_props.num_l2_slices * ++ KBASE_IPA_NR_BYTES_PER_BLOCK); ++#endif ++ return sc_base + counter_block_offset; ++} + -+#ifndef _KBASE_CAPS_H_ -+#define _KBASE_CAPS_H_ ++/** ++ * kbase_g7x_sum_all_memsys_blocks() - calculate energy for a single Memory ++ * System performance counter. ++ * @model_data: pointer to GPU model data. ++ * @coeff: default value of coefficient for IPA group. ++ * @counter_block_offset: offset in bytes of the counter inside the block it ++ * belongs to. ++ * ++ * Return: Energy estimation for a single Memory System performance counter. ++ */ ++static s64 kbase_g7x_sum_all_memsys_blocks( ++ struct kbase_ipa_model_vinstr_data *model_data, ++ s32 coeff, ++ u32 counter_block_offset) ++{ ++ u32 counter; + -+#include ++ counter = kbase_g7x_power_model_get_memsys_counter(model_data, ++ counter_block_offset); ++ return kbase_ipa_sum_all_memsys_blocks(model_data, coeff, counter); ++} + +/** -+ * enum mali_kbase_cap - Enumeration for kbase capability ++ * kbase_g7x_sum_all_shader_cores() - calculate energy for a Shader Cores ++ * performance counter for all cores. ++ * @model_data: pointer to GPU model data. ++ * @coeff: default value of coefficient for IPA group. ++ * @counter_block_offset: offset in bytes of the counter inside the block it ++ * belongs to. + * -+ * @MALI_KBASE_CAP_SYSTEM_MONITOR: System Monitor -+ * @MALI_KBASE_CAP_JIT_PRESSURE_LIMIT: JIT Pressure limit -+ * @MALI_KBASE_CAP_MEM_GROW_ON_GPF: Memory grow on page fault -+ * @MALI_KBASE_CAP_MEM_PROTECTED: Protected memory -+ * @MALI_KBASE_NUM_CAPS: Delimiter ++ * Return: Energy estimation for a Shader Cores performance counter for all ++ * cores. + */ -+enum mali_kbase_cap { -+ MALI_KBASE_CAP_SYSTEM_MONITOR = 0, -+ MALI_KBASE_CAP_JIT_PRESSURE_LIMIT, -+ MALI_KBASE_CAP_MEM_GROW_ON_GPF, -+ MALI_KBASE_CAP_MEM_PROTECTED, -+ MALI_KBASE_NUM_CAPS -+}; ++static s64 kbase_g7x_sum_all_shader_cores( ++ struct kbase_ipa_model_vinstr_data *model_data, ++ s32 coeff, ++ u32 counter_block_offset) ++{ ++ u32 counter; + -+extern bool mali_kbase_supports_cap(unsigned long api_version, enum mali_kbase_cap cap); ++ counter = kbase_g7x_power_model_get_sc_counter(model_data, ++ counter_block_offset); ++ return kbase_ipa_sum_all_shader_cores(model_data, coeff, counter); ++} + -+static inline bool mali_kbase_supports_system_monitor(unsigned long api_version) ++/** ++ * kbase_g7x_jm_single_counter() - calculate energy for a single Job Manager performance counter. ++ * @model_data: pointer to GPU model data. ++ * @coeff: default value of coefficient for IPA group. ++ * @counter_block_offset: offset in bytes of the counter inside the block it belongs to. ++ * ++ * Return: Energy estimation for a single Job Manager performance counter. ++ */ ++static s64 kbase_g7x_jm_single_counter( ++ struct kbase_ipa_model_vinstr_data *model_data, ++ s32 coeff, ++ u32 counter_block_offset) +{ -+ return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_SYSTEM_MONITOR); ++ u32 counter; ++ ++ counter = kbase_g7x_power_model_get_jm_counter(model_data, ++ counter_block_offset); ++ return kbase_ipa_single_counter(model_data, coeff, counter); +} + -+static inline bool mali_kbase_supports_jit_pressure_limit(unsigned long api_version) ++/** ++ * kbase_g7x_get_active_cycles() - return the GPU_ACTIVE counter ++ * @model_data: pointer to GPU model data. ++ * ++ * Return: the number of cycles the GPU was active during the counter sampling ++ * period. ++ */ ++static u32 kbase_g7x_get_active_cycles( ++ struct kbase_ipa_model_vinstr_data *model_data) +{ -+ return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_JIT_PRESSURE_LIMIT); ++ u32 counter = kbase_g7x_power_model_get_jm_counter(model_data, JM_GPU_ACTIVE); ++ ++ /* Counters are only 32-bit, so we can safely multiply by 1 then cast ++ * the 64-bit result back to a u32. ++ */ ++ return kbase_ipa_single_counter(model_data, 1, counter); +} + -+static inline bool mali_kbase_supports_mem_grow_on_gpf(unsigned long api_version) ++/* Table of IPA group definitions. ++ * ++ * For each IPA group, this table defines a function to access the given performance block counter (or counters, ++ * if the operation needs to be iterated on multiple blocks) and calculate energy estimation. ++ */ ++ ++static const struct kbase_ipa_group ipa_groups_def_g71[] = { ++ { ++ .name = "l2_access", ++ .default_value = 526300, ++ .op = kbase_g7x_sum_all_memsys_blocks, ++ .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, ++ }, ++ { ++ .name = "exec_instr_count", ++ .default_value = 301100, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_EXEC_INSTR_COUNT, ++ }, ++ { ++ .name = "tex_issue", ++ .default_value = 197400, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_TEX_COORD_ISSUE, ++ }, ++ { ++ .name = "tile_wb", ++ .default_value = -156400, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_BEATS_WR_TIB, ++ }, ++ { ++ .name = "gpu_active", ++ .default_value = 115800, ++ .op = kbase_g7x_jm_single_counter, ++ .counter_block_offset = JM_GPU_ACTIVE, ++ }, ++}; ++ ++static const struct kbase_ipa_group ipa_groups_def_g72[] = { ++ { ++ .name = "l2_access", ++ .default_value = 393000, ++ .op = kbase_g7x_sum_all_memsys_blocks, ++ .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, ++ }, ++ { ++ .name = "exec_instr_count", ++ .default_value = 227000, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_EXEC_INSTR_COUNT, ++ }, ++ { ++ .name = "tex_issue", ++ .default_value = 181900, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_TEX_COORD_ISSUE, ++ }, ++ { ++ .name = "tile_wb", ++ .default_value = -120200, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_BEATS_WR_TIB, ++ }, ++ { ++ .name = "gpu_active", ++ .default_value = 133100, ++ .op = kbase_g7x_jm_single_counter, ++ .counter_block_offset = JM_GPU_ACTIVE, ++ }, ++}; ++ ++static const struct kbase_ipa_group ipa_groups_def_g76[] = { ++ { ++ .name = "gpu_active", ++ .default_value = 122000, ++ .op = kbase_g7x_jm_single_counter, ++ .counter_block_offset = JM_GPU_ACTIVE, ++ }, ++ { ++ .name = "exec_instr_count", ++ .default_value = 488900, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_EXEC_INSTR_COUNT, ++ }, ++ { ++ .name = "vary_instr", ++ .default_value = 212100, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_VARY_INSTR, ++ }, ++ { ++ .name = "tex_tfch_num_operations", ++ .default_value = 288000, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS, ++ }, ++ { ++ .name = "l2_access", ++ .default_value = 378100, ++ .op = kbase_g7x_sum_all_memsys_blocks, ++ .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, ++ }, ++}; ++ ++static const struct kbase_ipa_group ipa_groups_def_g52_r1[] = { ++ { ++ .name = "gpu_active", ++ .default_value = 224200, ++ .op = kbase_g7x_jm_single_counter, ++ .counter_block_offset = JM_GPU_ACTIVE, ++ }, ++ { ++ .name = "exec_instr_count", ++ .default_value = 384700, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_EXEC_INSTR_COUNT, ++ }, ++ { ++ .name = "vary_instr", ++ .default_value = 271900, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_VARY_INSTR, ++ }, ++ { ++ .name = "tex_tfch_num_operations", ++ .default_value = 477700, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS, ++ }, ++ { ++ .name = "l2_access", ++ .default_value = 551400, ++ .op = kbase_g7x_sum_all_memsys_blocks, ++ .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, ++ }, ++}; ++ ++static const struct kbase_ipa_group ipa_groups_def_g51[] = { ++ { ++ .name = "gpu_active", ++ .default_value = 201400, ++ .op = kbase_g7x_jm_single_counter, ++ .counter_block_offset = JM_GPU_ACTIVE, ++ }, ++ { ++ .name = "exec_instr_count", ++ .default_value = 392700, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_EXEC_INSTR_COUNT, ++ }, ++ { ++ .name = "vary_instr", ++ .default_value = 274000, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_VARY_INSTR, ++ }, ++ { ++ .name = "tex_tfch_num_operations", ++ .default_value = 528000, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS, ++ }, ++ { ++ .name = "l2_access", ++ .default_value = 506400, ++ .op = kbase_g7x_sum_all_memsys_blocks, ++ .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, ++ }, ++}; ++ ++static const struct kbase_ipa_group ipa_groups_def_g77[] = { ++ { ++ .name = "l2_access", ++ .default_value = 710800, ++ .op = kbase_g7x_sum_all_memsys_blocks, ++ .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, ++ }, ++ { ++ .name = "exec_instr_msg", ++ .default_value = 2375300, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_EXEC_INSTR_MSG, ++ }, ++ { ++ .name = "exec_instr_fma", ++ .default_value = 656100, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_EXEC_INSTR_FMA, ++ }, ++ { ++ .name = "tex_filt_num_operations", ++ .default_value = 318800, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_TEX_FILT_NUM_OPERATIONS, ++ }, ++ { ++ .name = "gpu_active", ++ .default_value = 172800, ++ .op = kbase_g7x_jm_single_counter, ++ .counter_block_offset = JM_GPU_ACTIVE, ++ }, ++}; ++ ++static const struct kbase_ipa_group ipa_groups_def_tbex[] = { ++ { ++ .name = "l2_access", ++ .default_value = 599800, ++ .op = kbase_g7x_sum_all_memsys_blocks, ++ .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, ++ }, ++ { ++ .name = "exec_instr_msg", ++ .default_value = 1830200, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_EXEC_INSTR_MSG, ++ }, ++ { ++ .name = "exec_instr_fma", ++ .default_value = 407300, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_EXEC_INSTR_FMA, ++ }, ++ { ++ .name = "tex_filt_num_operations", ++ .default_value = 224500, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_TEX_FILT_NUM_OPERATIONS, ++ }, ++ { ++ .name = "gpu_active", ++ .default_value = 153800, ++ .op = kbase_g7x_jm_single_counter, ++ .counter_block_offset = JM_GPU_ACTIVE, ++ }, ++}; ++ ++static const struct kbase_ipa_group ipa_groups_def_tbax[] = { ++ { ++ .name = "l2_access", ++ .default_value = 599800, ++ .op = kbase_g7x_sum_all_memsys_blocks, ++ .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, ++ }, ++ { ++ .name = "exec_instr_msg", ++ .default_value = 1830200, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_EXEC_INSTR_MSG, ++ }, ++ { ++ .name = "exec_instr_fma", ++ .default_value = 407300, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_EXEC_INSTR_FMA, ++ }, ++ { ++ .name = "tex_filt_num_operations", ++ .default_value = 224500, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_TEX_FILT_NUM_OPERATIONS, ++ }, ++ { ++ .name = "gpu_active", ++ .default_value = 153800, ++ .op = kbase_g7x_jm_single_counter, ++ .counter_block_offset = JM_GPU_ACTIVE, ++ }, ++}; ++ ++#define IPA_POWER_MODEL_OPS(gpu, init_token) \ ++ static const struct kbase_ipa_model_ops kbase_##gpu##_ipa_model_ops = { \ ++ .name = "mali-" #gpu "-power-model", \ ++ .init = kbase_##init_token##_power_model_init, \ ++ .term = kbase_ipa_vinstr_common_model_term, \ ++ .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \ ++ .reset_counter_data = kbase_ipa_vinstr_reset_data, \ ++ } ++ ++#define STANDARD_POWER_MODEL(gpu, reference_voltage) \ ++ static int kbase_ ## gpu ## _power_model_init(\ ++ struct kbase_ipa_model *model) \ ++ { \ ++ BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def_ ## gpu) > \ ++ KBASE_IPA_MAX_GROUP_DEF_NUM); \ ++ return kbase_ipa_vinstr_common_model_init(model, \ ++ ipa_groups_def_ ## gpu, \ ++ ARRAY_SIZE(ipa_groups_def_ ## gpu), \ ++ kbase_g7x_get_active_cycles, \ ++ (reference_voltage)); \ ++ } \ ++ IPA_POWER_MODEL_OPS(gpu, gpu) ++ ++#define ALIAS_POWER_MODEL(gpu, as_gpu) \ ++ IPA_POWER_MODEL_OPS(gpu, as_gpu) ++ ++STANDARD_POWER_MODEL(g71, 800); ++STANDARD_POWER_MODEL(g72, 800); ++STANDARD_POWER_MODEL(g76, 800); ++STANDARD_POWER_MODEL(g52_r1, 1000); ++STANDARD_POWER_MODEL(g51, 1000); ++STANDARD_POWER_MODEL(g77, 1000); ++STANDARD_POWER_MODEL(tbex, 1000); ++STANDARD_POWER_MODEL(tbax, 1000); ++ ++/* g52 is an alias of g76 (TNOX) for IPA */ ++ALIAS_POWER_MODEL(g52, g76); ++/* tnax is an alias of g77 (TTRX) for IPA */ ++ALIAS_POWER_MODEL(tnax, g77); ++ ++static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = { ++ &kbase_g71_ipa_model_ops, ++ &kbase_g72_ipa_model_ops, ++ &kbase_g76_ipa_model_ops, ++ &kbase_g52_ipa_model_ops, ++ &kbase_g52_r1_ipa_model_ops, ++ &kbase_g51_ipa_model_ops, ++ &kbase_g77_ipa_model_ops, ++ &kbase_tnax_ipa_model_ops, ++ &kbase_tbex_ipa_model_ops, ++ &kbase_tbax_ipa_model_ops ++}; ++ ++const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( ++ struct kbase_device *kbdev, const char *name) +{ -+ return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_GROW_ON_GPF); ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(ipa_counter_model_ops); ++i) { ++ const struct kbase_ipa_model_ops *ops = ++ ipa_counter_model_ops[i]; ++ ++ if (!strcmp(ops->name, name)) ++ return ops; ++ } ++ ++ dev_err(kbdev->dev, "power model \'%s\' not found\n", name); ++ ++ return NULL; +} + -+static inline bool mali_kbase_supports_mem_protected(unsigned long api_version) ++const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id) +{ -+ return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_PROTECTED); -+} ++ const u32 prod_id = ++ (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; + -+#endif /* __KBASE_CAPS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ccswe.c b/drivers/gpu/arm/bifrost/mali_kbase_ccswe.c ++ switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { ++ case GPU_ID2_PRODUCT_TMIX: ++ return "mali-g71-power-model"; ++ case GPU_ID2_PRODUCT_THEX: ++ return "mali-g72-power-model"; ++ case GPU_ID2_PRODUCT_TNOX: ++ return "mali-g76-power-model"; ++ case GPU_ID2_PRODUCT_TSIX: ++ return "mali-g51-power-model"; ++ case GPU_ID2_PRODUCT_TGOX: ++ if ((gpu_id & GPU_ID2_VERSION_MAJOR) == ++ (0 << GPU_ID2_VERSION_MAJOR_SHIFT)) ++ /* g52 aliased to g76 power-model's ops */ ++ return "mali-g52-power-model"; ++ else ++ return "mali-g52_r1-power-model"; ++ case GPU_ID2_PRODUCT_TNAX: ++ return "mali-tnax-power-model"; ++ case GPU_ID2_PRODUCT_TTRX: ++ return "mali-g77-power-model"; ++ case GPU_ID2_PRODUCT_TBEX: ++ return "mali-tbex-power-model"; ++ case GPU_ID2_PRODUCT_TBAX: ++ return "mali-tbax-power-model"; ++ default: ++ return NULL; ++ } ++} +diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c new file mode 100644 -index 000000000..6a1e7e4c5 +index 000000000..b2e6bc459 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_ccswe.c -@@ -0,0 +1,100 @@ ++++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c +@@ -0,0 +1,778 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -202273,306 +204904,773 @@ index 000000000..6a1e7e4c5 + * + */ + -+#include "mali_kbase_ccswe.h" -+#include "mali_kbase_linux.h" ++#include ++#include ++#include ++#include "mali_kbase.h" ++#include "mali_kbase_ipa.h" ++#include "mali_kbase_ipa_debugfs.h" ++#include "mali_kbase_ipa_simple.h" ++#include "backend/gpu/mali_kbase_pm_internal.h" ++#include "backend/gpu/mali_kbase_devfreq.h" ++#include + -+#include -+#include ++#define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model" + -+static u64 kbasep_ccswe_cycle_at_no_lock( -+ struct kbase_ccswe *self, u64 timestamp_ns) ++/* Polling by thermal governor starts when the temperature exceeds the certain ++ * trip point. In order to have meaningful value for the counters, when the ++ * polling starts and first call to kbase_get_real_power() is made, it is ++ * required to reset the counter values every now and then. ++ * It is reasonable to do the reset every second if no polling is being done, ++ * the counter model implementation also assumes max sampling interval of 1 sec. ++ */ ++#define RESET_INTERVAL_MS ((s64)1000) ++ ++int kbase_ipa_model_recalculate(struct kbase_ipa_model *model) +{ -+ s64 diff_s, diff_ns; -+ u32 gpu_freq; ++ int err = 0; + -+ lockdep_assert_held(&self->access); ++ lockdep_assert_held(&model->kbdev->ipa.lock); + -+ diff_ns = timestamp_ns - self->timestamp_ns; -+ gpu_freq = diff_ns > 0 ? self->gpu_freq : self->prev_gpu_freq; ++ if (model->ops->recalculate) { ++ err = model->ops->recalculate(model); ++ if (err) { ++ dev_err(model->kbdev->dev, ++ "recalculation of power model %s returned error %d\n", ++ model->ops->name, err); ++ } ++ } + -+ diff_s = div_s64(diff_ns, NSEC_PER_SEC); -+ diff_ns -= diff_s * NSEC_PER_SEC; ++ return err; ++} + -+ return self->cycles_elapsed + diff_s * gpu_freq -+ + div_s64(diff_ns * gpu_freq, NSEC_PER_SEC); ++const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, ++ const char *name) ++{ ++ if (!strcmp(name, kbase_simple_ipa_model_ops.name)) ++ return &kbase_simple_ipa_model_ops; ++ ++ return kbase_ipa_counter_model_ops_find(kbdev, name); +} ++KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find); + -+void kbase_ccswe_init(struct kbase_ccswe *self) ++const char *kbase_ipa_model_name_from_id(u32 gpu_id) +{ -+ memset(self, 0, sizeof(*self)); ++ const char *model_name = ++ kbase_ipa_counter_model_name_from_id(gpu_id); + -+ spin_lock_init(&self->access); ++ if (!model_name) ++ return KBASE_IPA_FALLBACK_MODEL_NAME; ++ else ++ return model_name; +} ++KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id); + -+u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns) ++static struct device_node *get_model_dt_node(struct kbase_ipa_model *model, ++ bool dt_required) +{ -+ unsigned long flags; -+ u64 result; ++ struct device_node *model_dt_node = NULL; ++ char compat_string[64]; + -+ spin_lock_irqsave(&self->access, flags); -+ result = kbasep_ccswe_cycle_at_no_lock(self, timestamp_ns); -+ spin_unlock_irqrestore(&self->access, flags); ++ if (unlikely(!scnprintf(compat_string, sizeof(compat_string), "arm,%s", model->ops->name))) ++ return NULL; + -+ return result; ++ /* of_find_compatible_node() will call of_node_put() on the root node, ++ * so take a reference on it first. ++ */ ++ of_node_get(model->kbdev->dev->of_node); ++ model_dt_node = of_find_compatible_node(model->kbdev->dev->of_node, ++ NULL, compat_string); ++ if (!model_dt_node && !model->missing_dt_node_warning) { ++ if (dt_required) ++ dev_warn(model->kbdev->dev, ++ "Couldn't find power_model DT node matching \'%s\'\n", ++ compat_string); ++ model->missing_dt_node_warning = true; ++ } ++ ++ return model_dt_node; +} + -+void kbase_ccswe_freq_change( -+ struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq) ++int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, ++ const char *name, s32 *addr, ++ size_t num_elems, bool dt_required) +{ -+ unsigned long flags; ++ int err = -EINVAL, i; ++ struct device_node *model_dt_node = get_model_dt_node(model, ++ dt_required); ++ char *origin; + -+ spin_lock_irqsave(&self->access, flags); ++ err = of_property_read_u32_array(model_dt_node, name, (u32 *)addr, num_elems); ++ /* We're done with model_dt_node now, so drop the reference taken in ++ * get_model_dt_node()/of_find_compatible_node(). ++ */ ++ of_node_put(model_dt_node); + -+ /* The time must go only forward. */ -+ if (WARN_ON(timestamp_ns < self->timestamp_ns)) -+ goto exit; ++ if (err && dt_required) { ++ memset(addr, 0, sizeof(s32) * num_elems); ++ dev_warn(model->kbdev->dev, ++ "Error %d, no DT entry: %s.%s = %zu*[0]\n", ++ err, model->ops->name, name, num_elems); ++ origin = "zero"; ++ } else if (err && !dt_required) { ++ origin = "default"; ++ } else /* !err */ { ++ origin = "DT"; ++ } + -+ /* If this is the first frequency change, cycles_elapsed is zero. */ -+ if (self->timestamp_ns) -+ self->cycles_elapsed = kbasep_ccswe_cycle_at_no_lock( -+ self, timestamp_ns); ++ /* Create a unique debugfs entry for each element */ ++ for (i = 0; i < num_elems; ++i) { ++ char elem_name[32]; + -+ self->timestamp_ns = timestamp_ns; -+ self->prev_gpu_freq = self->gpu_freq; -+ self->gpu_freq = gpu_freq; ++ if (num_elems == 1) { ++ if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s", name))) { ++ err = -ENOMEM; ++ goto exit; ++ } ++ } else { ++ if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s.%d", name, i))) { ++ err = -ENOMEM; ++ goto exit; ++ } ++ } ++ ++ dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n", ++ model->ops->name, elem_name, addr[i], origin); ++ ++ err = kbase_ipa_model_param_add(model, elem_name, ++ &addr[i], sizeof(s32), ++ PARAM_TYPE_S32); ++ if (err) ++ goto exit; ++ } +exit: -+ spin_unlock_irqrestore(&self->access, flags); ++ return err; +} + -+void kbase_ccswe_reset(struct kbase_ccswe *self) ++int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, ++ const char *name, char *addr, ++ size_t size, bool dt_required) +{ -+ unsigned long flags; ++ int err; ++ struct device_node *model_dt_node = get_model_dt_node(model, ++ dt_required); ++ const char *string_prop_value = ""; ++ char *origin; + -+ spin_lock_irqsave(&self->access, flags); ++ err = of_property_read_string(model_dt_node, name, ++ &string_prop_value); + -+ self->timestamp_ns = 0; -+ self->cycles_elapsed = 0; -+ self->gpu_freq = 0; -+ self->prev_gpu_freq = 0; ++ /* We're done with model_dt_node now, so drop the reference taken in ++ * get_model_dt_node()/of_find_compatible_node(). ++ */ ++ of_node_put(model_dt_node); + -+ spin_unlock_irqrestore(&self->access, flags); ++ if (err && dt_required) { ++ strncpy(addr, "", size - 1); ++ dev_warn(model->kbdev->dev, ++ "Error %d, no DT entry: %s.%s = \'%s\'\n", ++ err, model->ops->name, name, addr); ++ err = 0; ++ origin = "zero"; ++ } else if (err && !dt_required) { ++ origin = "default"; ++ } else /* !err */ { ++ strncpy(addr, string_prop_value, size - 1); ++ origin = "DT"; ++ } ++ ++ addr[size - 1] = '\0'; ++ ++ dev_dbg(model->kbdev->dev, "%s.%s = \'%s\' (%s)\n", ++ model->ops->name, name, string_prop_value, origin); ++ ++ err = kbase_ipa_model_param_add(model, name, addr, size, ++ PARAM_TYPE_STRING); ++ return err; +} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ccswe.h b/drivers/gpu/arm/bifrost/mali_kbase_ccswe.h -new file mode 100644 -index 000000000..f7fcf7780 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_ccswe.h -@@ -0,0 +1,95 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ + -+#ifndef _KBASE_CCSWE_H_ -+#define _KBASE_CCSWE_H_ ++void kbase_ipa_term_model(struct kbase_ipa_model *model) ++{ ++ if (!model) ++ return; + -+#include ++ lockdep_assert_held(&model->kbdev->ipa.lock); + -+/** -+ * struct kbase_ccswe - Cycle count software estimator. -+ * -+ * @access: Spinlock protecting this structure access. -+ * @timestamp_ns: Timestamp(ns) when the last frequency change -+ * occurred. -+ * @cycles_elapsed: Number of cycles elapsed before the last frequency -+ * change -+ * @gpu_freq: Current GPU frequency(Hz) value. -+ * @prev_gpu_freq: Previous GPU frequency(Hz) before the last frequency -+ * change. -+ */ -+struct kbase_ccswe { -+ spinlock_t access; -+ u64 timestamp_ns; -+ u64 cycles_elapsed; -+ u32 gpu_freq; -+ u32 prev_gpu_freq; -+}; ++ if (model->ops->term) ++ model->ops->term(model); + -+/** -+ * kbase_ccswe_init() - initialize the cycle count estimator. -+ * -+ * @self: Cycles count software estimator instance. -+ */ -+void kbase_ccswe_init(struct kbase_ccswe *self); ++ kbase_ipa_model_param_free_all(model); + -+/** -+ * kbase_ccswe_cycle_at() - Estimate cycle count at given timestamp. -+ * -+ * @self: Cycles count software estimator instance. -+ * @timestamp_ns: The timestamp(ns) for cycle count estimation. -+ * -+ * The timestamp must be bigger than the timestamp of the penultimate -+ * frequency change. If only one frequency change occurred, the -+ * timestamp must be bigger than the timestamp of the frequency change. -+ * This is to allow the following code to be executed w/o synchronization. -+ * If lines below executed atomically, it is safe to assume that only -+ * one frequency change may happen in between. -+ * -+ * u64 ts = ktime_get_raw_ns(); -+ * u64 cycle = kbase_ccswe_cycle_at(&ccswe, ts) -+ * -+ * Return: estimated value of cycle count at a given time. -+ */ -+u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns); ++ kfree(model); ++} ++KBASE_EXPORT_TEST_API(kbase_ipa_term_model); ++ ++struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, ++ const struct kbase_ipa_model_ops *ops) ++{ ++ struct kbase_ipa_model *model; ++ int err; ++ ++ lockdep_assert_held(&kbdev->ipa.lock); ++ ++ if (!ops || !ops->name) ++ return NULL; ++ ++ model = kzalloc(sizeof(struct kbase_ipa_model), GFP_KERNEL); ++ if (!model) ++ return NULL; ++ ++ model->kbdev = kbdev; ++ model->ops = ops; ++ INIT_LIST_HEAD(&model->params); ++ ++ err = model->ops->init(model); ++ if (err) { ++ dev_err(kbdev->dev, ++ "init of power model \'%s\' returned error %d\n", ++ ops->name, err); ++ kfree(model); ++ return NULL; ++ } ++ ++ err = kbase_ipa_model_recalculate(model); ++ if (err) { ++ kbase_ipa_term_model(model); ++ return NULL; ++ } ++ ++ return model; ++} ++KBASE_EXPORT_TEST_API(kbase_ipa_init_model); ++ ++static void kbase_ipa_term_locked(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->ipa.lock); ++ ++ /* Clean up the models */ ++ if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model) ++ kbase_ipa_term_model(kbdev->ipa.configured_model); ++ kbase_ipa_term_model(kbdev->ipa.fallback_model); ++ ++ kbdev->ipa.configured_model = NULL; ++ kbdev->ipa.fallback_model = NULL; ++} ++ ++int kbase_ipa_init(struct kbase_device *kbdev) ++{ ++ ++ const char *model_name; ++ const struct kbase_ipa_model_ops *ops; ++ struct kbase_ipa_model *default_model = NULL; ++ int err; ++ ++ mutex_init(&kbdev->ipa.lock); ++ /* ++ * Lock during init to avoid warnings from lockdep_assert_held (there ++ * shouldn't be any concurrent access yet). ++ */ ++ mutex_lock(&kbdev->ipa.lock); ++ ++ /* The simple IPA model must *always* be present.*/ ++ ops = kbase_ipa_model_ops_find(kbdev, KBASE_IPA_FALLBACK_MODEL_NAME); ++ ++ default_model = kbase_ipa_init_model(kbdev, ops); ++ if (!default_model) { ++ err = -EINVAL; ++ goto end; ++ } ++ ++ kbdev->ipa.fallback_model = default_model; ++ err = of_property_read_string(kbdev->dev->of_node, ++ "ipa-model", ++ &model_name); ++ if (err) { ++ /* Attempt to load a match from GPU-ID */ ++ u32 gpu_id; ++ ++ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; ++ model_name = kbase_ipa_model_name_from_id(gpu_id); ++ dev_dbg(kbdev->dev, ++ "Inferring model from GPU ID 0x%x: \'%s\'\n", ++ gpu_id, model_name); ++ err = 0; ++ } else { ++ dev_dbg(kbdev->dev, ++ "Using ipa-model parameter from DT: \'%s\'\n", ++ model_name); ++ } ++ ++ if (strcmp(KBASE_IPA_FALLBACK_MODEL_NAME, model_name) != 0) { ++ ops = kbase_ipa_model_ops_find(kbdev, model_name); ++ kbdev->ipa.configured_model = kbase_ipa_init_model(kbdev, ops); ++ if (!kbdev->ipa.configured_model) { ++ dev_warn(kbdev->dev, ++ "Failed to initialize ipa-model: \'%s\'\n" ++ "Falling back on default model\n", ++ model_name); ++ kbdev->ipa.configured_model = default_model; ++ } ++ } else { ++ kbdev->ipa.configured_model = default_model; ++ } ++ ++ kbdev->ipa.last_sample_time = ktime_get_raw(); ++ ++end: ++ if (err) ++ kbase_ipa_term_locked(kbdev); ++ else ++ dev_info(kbdev->dev, ++ "Using configured power model %s, and fallback %s\n", ++ kbdev->ipa.configured_model->ops->name, ++ kbdev->ipa.fallback_model->ops->name); ++ ++ mutex_unlock(&kbdev->ipa.lock); ++ return err; ++} ++KBASE_EXPORT_TEST_API(kbase_ipa_init); ++ ++void kbase_ipa_term(struct kbase_device *kbdev) ++{ ++ mutex_lock(&kbdev->ipa.lock); ++ kbase_ipa_term_locked(kbdev); ++ mutex_unlock(&kbdev->ipa.lock); ++ ++ mutex_destroy(&kbdev->ipa.lock); ++} ++KBASE_EXPORT_TEST_API(kbase_ipa_term); + +/** -+ * kbase_ccswe_freq_change() - update GPU frequency. ++ * kbase_scale_dynamic_power() - Scale a dynamic power coefficient to an OPP ++ * @c: Dynamic model coefficient, in pW/(Hz V^2). Should be in range ++ * 0 < c < 2^26 to prevent overflow. ++ * @freq: Frequency, in Hz. Range: 2^23 < freq < 2^30 (~8MHz to ~1GHz) ++ * @voltage: Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V) + * -+ * @self: Cycles count software estimator instance. -+ * @timestamp_ns: Timestamp(ns) when frequency change occurred. -+ * @gpu_freq: New GPU frequency value. ++ * Keep a record of the approximate range of each value at every stage of the ++ * calculation, to ensure we don't overflow. This makes heavy use of the ++ * approximations 1000 = 2^10 and 1000000 = 2^20, but does the actual ++ * calculations in decimal for increased accuracy. + * -+ * The timestamp must be bigger than the timestamp of the previous -+ * frequency change. The function is to be called at the frequency -+ * change moment (not later). ++ * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W) + */ -+void kbase_ccswe_freq_change( -+ struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq); ++static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq, ++ const u32 voltage) ++{ ++ /* Range: 2^8 < v2 < 2^16 m(V^2) */ ++ const u32 v2 = (voltage * voltage) / 1000; ++ ++ /* Range: 2^3 < f_MHz < 2^10 MHz */ ++ const u32 f_MHz = freq / 1000000; ++ ++ /* Range: 2^11 < v2f_big < 2^26 kHz V^2 */ ++ const u32 v2f_big = v2 * f_MHz; ++ ++ /* Range: 2^1 < v2f < 2^16 MHz V^2 */ ++ const u32 v2f = v2f_big / 1000; ++ ++ /* Range (working backwards from next line): 0 < v2fc < 2^23 uW. ++ * Must be < 2^42 to avoid overflowing the return value. ++ */ ++ const u64 v2fc = (u64) c * (u64) v2f; ++ ++ /* Range: 0 < v2fc / 1000 < 2^13 mW */ ++ return div_u64(v2fc, 1000); ++} + +/** -+ * kbase_ccswe_reset() - reset estimator state ++ * kbase_scale_static_power() - Scale a static power coefficient to an OPP ++ * @c: Static model coefficient, in uW/V^3. Should be in range ++ * 0 < c < 2^32 to prevent overflow. ++ * @voltage: Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V) + * -+ * @self: Cycles count software estimator instance. ++ * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W) + */ -+void kbase_ccswe_reset(struct kbase_ccswe *self); ++static u32 kbase_scale_static_power(const u32 c, const u32 voltage) ++{ ++ /* Range: 2^8 < v2 < 2^16 m(V^2) */ ++ const u32 v2 = (voltage * voltage) / 1000; + -+#endif /* _KBASE_CCSWE_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config.c b/drivers/gpu/arm/bifrost/mali_kbase_config.c -new file mode 100644 -index 000000000..37dbca129 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_config.c -@@ -0,0 +1,104 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2011-2015, 2017, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ /* Range: 2^17 < v3_big < 2^29 m(V^2) mV */ ++ const u32 v3_big = v2 * voltage; + -+#include -+#include -+#include ++ /* Range: 2^7 < v3 < 2^19 m(V^3) */ ++ const u32 v3 = v3_big / 1000; + -+int kbasep_platform_device_init(struct kbase_device *kbdev) -+{ -+ struct kbase_platform_funcs_conf *platform_funcs_p; ++ /* ++ * Range (working backwards from next line): 0 < v3c_big < 2^33 nW. ++ * The result should be < 2^52 to avoid overflowing the return value. ++ */ ++ const u64 v3c_big = (u64) c * (u64) v3; + -+ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; -+ if (platform_funcs_p && platform_funcs_p->platform_init_func) -+ return platform_funcs_p->platform_init_func(kbdev); ++ /* Range: 0 < v3c_big / 1000000 < 2^13 mW */ ++ return div_u64(v3c_big, 1000000); ++} + -+ return 0; ++void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ /* Record the event of GPU entering protected mode. */ ++ kbdev->ipa_protection_mode_switched = true; +} + -+void kbasep_platform_device_term(struct kbase_device *kbdev) ++static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev) +{ -+ struct kbase_platform_funcs_conf *platform_funcs_p; ++ struct kbase_ipa_model *model; ++ unsigned long flags; + -+ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; -+ if (platform_funcs_p && platform_funcs_p->platform_term_func) -+ platform_funcs_p->platform_term_func(kbdev); ++ lockdep_assert_held(&kbdev->ipa.lock); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ if (kbdev->ipa_protection_mode_switched || ++ kbdev->ipa.force_fallback_model) ++ model = kbdev->ipa.fallback_model; ++ else ++ model = kbdev->ipa.configured_model; ++ ++ /* ++ * Having taken cognizance of the fact that whether GPU earlier ++ * protected mode or not, the event can be now reset (if GPU is not ++ * currently in protected mode) so that configured model is used ++ * for the next sample. ++ */ ++ if (!kbdev->protected_mode) ++ kbdev->ipa_protection_mode_switched = false; ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ return model; +} + -+int kbasep_platform_device_late_init(struct kbase_device *kbdev) ++static u32 get_static_power_locked(struct kbase_device *kbdev, ++ struct kbase_ipa_model *model, ++ unsigned long voltage) +{ -+ struct kbase_platform_funcs_conf *platform_funcs_p; ++ u32 power = 0; ++ int err; ++ u32 power_coeff; + -+ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; -+ if (platform_funcs_p && platform_funcs_p->platform_late_init_func) -+ platform_funcs_p->platform_late_init_func(kbdev); ++ lockdep_assert_held(&model->kbdev->ipa.lock); + -+ return 0; ++ if (!model->ops->get_static_coeff) ++ model = kbdev->ipa.fallback_model; ++ ++ if (model->ops->get_static_coeff) { ++ err = model->ops->get_static_coeff(model, &power_coeff); ++ if (!err) ++ power = kbase_scale_static_power(power_coeff, ++ (u32) voltage); ++ } ++ ++ return power; +} + -+void kbasep_platform_device_late_term(struct kbase_device *kbdev) ++#if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE ++#if defined(CONFIG_MALI_PWRSOFT_765) || \ ++ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE ++static unsigned long kbase_get_static_power(struct devfreq *df, ++ unsigned long voltage) ++#else ++static unsigned long kbase_get_static_power(unsigned long voltage) ++#endif +{ -+ struct kbase_platform_funcs_conf *platform_funcs_p; ++ struct kbase_ipa_model *model; ++ u32 power = 0; ++#if defined(CONFIG_MALI_PWRSOFT_765) || \ ++ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE ++ struct kbase_device *kbdev = dev_get_drvdata(&df->dev); ++#else ++ struct kbase_device *kbdev = kbase_find_device(-1); ++#endif + -+ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; -+ if (platform_funcs_p && platform_funcs_p->platform_late_term_func) -+ platform_funcs_p->platform_late_term_func(kbdev); ++ if (!kbdev) ++ return 0ul; ++ ++ mutex_lock(&kbdev->ipa.lock); ++ ++ model = get_current_model(kbdev); ++ power = get_static_power_locked(kbdev, model, voltage); ++ ++ mutex_unlock(&kbdev->ipa.lock); ++ ++#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ ++ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) ++ kbase_release_device(kbdev); ++#endif ++ ++ return power; +} ++#endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */ + -+#if !MALI_USE_CSF -+int kbasep_platform_context_init(struct kbase_context *kctx) ++/** ++ * opp_translate_freq_voltage() - Translate nominal OPP frequency from ++ * devicetree into the real frequency for ++ * top-level and shader cores. ++ * @kbdev: Device pointer ++ * @nominal_freq: Nominal frequency in Hz. ++ * @nominal_voltage: Nominal voltage, in mV. ++ * @freqs: Pointer to array of real frequency values. ++ * @volts: Pointer to array of voltages. ++ * ++ * If there are 2 clock domains, then top-level and shader cores can operate ++ * at different frequency and voltage level. The nominal frequency ("opp-hz") ++ * used by devfreq from the devicetree may not be same as the real frequency ++ * at which top-level and shader cores are operating, so a translation is ++ * needed. ++ * Nominal voltage shall always be same as the real voltage for top-level. ++ */ ++static void opp_translate_freq_voltage(struct kbase_device *kbdev, ++ unsigned long nominal_freq, ++ unsigned long nominal_voltage, ++ unsigned long *freqs, ++ unsigned long *volts) +{ -+ struct kbase_platform_funcs_conf *platform_funcs_p; ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ /* An arbitrary voltage and frequency value can be chosen for testing ++ * in no mali configuration which may not match with any OPP level. ++ */ ++ freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = nominal_freq; ++ volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = nominal_voltage; + -+ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; -+ if (platform_funcs_p && platform_funcs_p->platform_handler_context_init_func) -+ return platform_funcs_p->platform_handler_context_init_func(kctx); ++ freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = nominal_freq; ++ volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = nominal_voltage; ++#else ++ u64 core_mask; ++ unsigned int i; + -+ return 0; ++ kbase_devfreq_opp_translate(kbdev, nominal_freq, &core_mask, ++ freqs, volts); ++ CSTD_UNUSED(core_mask); ++ ++ /* Convert micro volts to milli volts */ ++ for (i = 0; i < kbdev->nr_clocks; i++) ++ volts[i] /= 1000; ++ ++ if (kbdev->nr_clocks == 1) { ++ freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = ++ freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]; ++ volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = ++ volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]; ++ } ++#endif +} + -+void kbasep_platform_context_term(struct kbase_context *kctx) ++#if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE ++#if defined(CONFIG_MALI_PWRSOFT_765) || \ ++ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE ++static unsigned long kbase_get_dynamic_power(struct devfreq *df, ++ unsigned long freq, ++ unsigned long voltage) ++#else ++static unsigned long kbase_get_dynamic_power(unsigned long freq, ++ unsigned long voltage) ++#endif +{ -+ struct kbase_platform_funcs_conf *platform_funcs_p; ++ struct kbase_ipa_model *model; ++ unsigned long freqs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; ++ unsigned long volts[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; ++ u32 power_coeffs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; ++ u32 power = 0; ++ int err = 0; ++#if defined(CONFIG_MALI_PWRSOFT_765) || \ ++ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE ++ struct kbase_device *kbdev = dev_get_drvdata(&df->dev); ++#else ++ struct kbase_device *kbdev = kbase_find_device(-1); ++#endif + -+ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; -+ if (platform_funcs_p && platform_funcs_p->platform_handler_context_term_func) -+ platform_funcs_p->platform_handler_context_term_func(kctx); ++ if (!kbdev) ++ return 0ul; ++ ++ mutex_lock(&kbdev->ipa.lock); ++ ++ model = kbdev->ipa.fallback_model; ++ ++ err = model->ops->get_dynamic_coeff(model, power_coeffs); ++ ++ if (!err) { ++ opp_translate_freq_voltage(kbdev, freq, voltage, freqs, volts); ++ ++ power = kbase_scale_dynamic_power( ++ power_coeffs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], ++ freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], ++ volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); ++ ++ /* Here unlike kbase_get_real_power(), shader core frequency is ++ * used for the scaling as simple power model is used to obtain ++ * the value of dynamic coefficient (which is a fixed value ++ * retrieved from the device tree). ++ */ ++ power += kbase_scale_dynamic_power( ++ power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES], ++ freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES], ++ volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]); ++ } else ++ dev_err_ratelimited(kbdev->dev, ++ "Model %s returned error code %d\n", ++ model->ops->name, err); ++ ++ mutex_unlock(&kbdev->ipa.lock); ++ ++#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ ++ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) ++ kbase_release_device(kbdev); ++#endif ++ ++ return power; +} ++#endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */ + -+void kbasep_platform_event_atom_submit(struct kbase_jd_atom *katom) ++int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, ++ unsigned long freq, ++ unsigned long voltage) +{ -+ struct kbase_platform_funcs_conf *platform_funcs_p; ++ struct kbase_ipa_model *model; ++ unsigned long freqs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; ++ unsigned long volts[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; ++ u32 power_coeffs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; ++ struct kbasep_pm_metrics diff; ++ u64 total_time; ++ bool skip_utilization_scaling = false; ++ int err = 0; + -+ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; -+ if (platform_funcs_p && platform_funcs_p->platform_handler_atom_submit_func) -+ platform_funcs_p->platform_handler_atom_submit_func(katom); ++ lockdep_assert_held(&kbdev->ipa.lock); ++ ++ kbase_pm_get_dvfs_metrics(kbdev, &kbdev->ipa.last_metrics, &diff); ++ ++ model = get_current_model(kbdev); ++ ++ err = model->ops->get_dynamic_coeff(model, power_coeffs); ++ ++ /* If the counter model returns an error (e.g. switching back to ++ * protected mode and failing to read counters, or a counter sample ++ * with too few cycles), revert to the fallback model. ++ */ ++ if (err && model != kbdev->ipa.fallback_model) { ++ /* No meaningful scaling for GPU utilization can be done if ++ * the sampling interval was too long. This is equivalent to ++ * assuming GPU was busy throughout (similar to what is done ++ * during protected mode). ++ */ ++ if (err == -EOVERFLOW) ++ skip_utilization_scaling = true; ++ ++ model = kbdev->ipa.fallback_model; ++ err = model->ops->get_dynamic_coeff(model, power_coeffs); ++ } ++ ++ if (WARN_ON(err)) ++ return err; ++ ++ opp_translate_freq_voltage(kbdev, freq, voltage, freqs, volts); ++ ++ *power = kbase_scale_dynamic_power( ++ power_coeffs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], ++ freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], ++ volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); ++ ++ if (power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]) { ++ unsigned long freq = freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]; ++ ++ /* As per the HW team, the top-level frequency needs to be used ++ * for the scaling if the counter based model was used as ++ * counter values are normalized with the GPU_ACTIVE counter ++ * value, which increments at the rate of top-level frequency. ++ */ ++ if (model != kbdev->ipa.fallback_model) ++ freq = freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]; ++ ++ *power += kbase_scale_dynamic_power( ++ power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES], ++ freq, volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]); ++ } ++ ++ if (!skip_utilization_scaling) { ++ /* time_busy / total_time cannot be >1, so assigning the 64-bit ++ * result of div_u64 to *power cannot overflow. ++ */ ++ total_time = diff.time_busy + (u64) diff.time_idle; ++ *power = div_u64(*power * (u64) diff.time_busy, ++ max(total_time, 1ull)); ++ } ++ ++ *power += get_static_power_locked(kbdev, model, ++ volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); ++ ++ return err; +} ++KBASE_EXPORT_TEST_API(kbase_get_real_power_locked); + -+void kbasep_platform_event_atom_complete(struct kbase_jd_atom *katom) ++int kbase_get_real_power(struct devfreq *df, u32 *power, ++ unsigned long freq, ++ unsigned long voltage) +{ -+ struct kbase_platform_funcs_conf *platform_funcs_p; ++ int ret; ++ struct kbase_device *kbdev = dev_get_drvdata(&df->dev); + -+ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; -+ if (platform_funcs_p && platform_funcs_p->platform_handler_atom_complete_func) -+ platform_funcs_p->platform_handler_atom_complete_func(katom); ++ if (!kbdev) ++ return -ENODEV; ++ ++ mutex_lock(&kbdev->ipa.lock); ++ ret = kbase_get_real_power_locked(kbdev, power, freq, voltage); ++ mutex_unlock(&kbdev->ipa.lock); ++ ++ return ret; +} ++KBASE_EXPORT_TEST_API(kbase_get_real_power); ++ ++struct devfreq_cooling_power kbase_ipa_power_model_ops = { ++#if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE ++ .get_static_power = &kbase_get_static_power, ++ .get_dynamic_power = &kbase_get_dynamic_power, ++#endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */ ++#if defined(CONFIG_MALI_PWRSOFT_765) || \ ++ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE ++ .get_real_power = &kbase_get_real_power, +#endif -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config.h b/drivers/gpu/arm/bifrost/mali_kbase_config.h ++}; ++KBASE_EXPORT_TEST_API(kbase_ipa_power_model_ops); ++ ++void kbase_ipa_reset_data(struct kbase_device *kbdev) ++{ ++ ktime_t now, diff; ++ s64 elapsed_time; ++ ++ mutex_lock(&kbdev->ipa.lock); ++ ++ now = ktime_get_raw(); ++ diff = ktime_sub(now, kbdev->ipa.last_sample_time); ++ elapsed_time = ktime_to_ms(diff); ++ ++ if (elapsed_time > RESET_INTERVAL_MS) { ++ struct kbasep_pm_metrics diff; ++ struct kbase_ipa_model *model; ++ ++ kbase_pm_get_dvfs_metrics( ++ kbdev, &kbdev->ipa.last_metrics, &diff); ++ ++ model = get_current_model(kbdev); ++ if (model != kbdev->ipa.fallback_model) ++ model->ops->reset_counter_data(model); ++ ++ kbdev->ipa.last_sample_time = ktime_get_raw(); ++ } ++ ++ mutex_unlock(&kbdev->ipa.lock); ++} +diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h new file mode 100644 -index 000000000..ecfdb2876 +index 000000000..c875ffb49 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_config.h -@@ -0,0 +1,583 @@ ++++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h +@@ -0,0 +1,304 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2010-2017, 2019-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -202590,578 +205688,299 @@ index 000000000..ecfdb2876 + * + */ + -+/** -+ * DOC: Configuration API and Attributes for KBase -+ */ -+ -+#ifndef _KBASE_CONFIG_H_ -+#define _KBASE_CONFIG_H_ -+ -+#include -+#include -+#include -+#include -+ -+/* Forward declaration of struct kbase_device */ -+struct kbase_device; ++#ifndef _KBASE_IPA_H_ ++#define _KBASE_IPA_H_ + -+#if !MALI_USE_CSF -+/* Forward declaration of struct kbase_context */ -+struct kbase_context; ++#if defined(CONFIG_MALI_BIFROST_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) + -+/* Forward declaration of struct kbase_atom */ -+struct kbase_jd_atom; -+#endif ++struct devfreq; + +/** -+ * struct kbase_platform_funcs_conf - Specifies platform integration function -+ * pointers for DDK events such as device init and term. ++ * enum kbase_ipa_block_type - Type of block for which power estimation is done. + * -+ * Specifies the functions pointers for platform specific initialization and -+ * termination as well as other events. By default no functions are required. -+ * No additional platform specific control is necessary. ++ * @KBASE_IPA_BLOCK_TYPE_USING_CLK_MALI: ++ * Blocks using clk_mali in dts. ++ * @KBASE_IPA_BLOCK_TYPE_TOP_LEVEL: Top-level block, that covers CSHW, ++ * MEMSYS, Tiler. ++ * @KBASE_IPA_BLOCK_TYPE_SHADER_CORES: All Shader cores. ++ * @KBASE_IPA_BLOCK_TYPE_FOR_CLK_GPU: Dummy for clk_gpu in dts. ++ * @KBASE_IPA_BLOCK_TYPE_NUM: Number of blocks. + */ -+struct kbase_platform_funcs_conf { -+ /** -+ * @platform_init_func: platform specific init function pointer -+ * @kbdev - kbase_device pointer -+ * -+ * Returns 0 on success, negative error code otherwise. -+ * -+ * Function pointer for platform specific initialization or NULL if no -+ * initialization function is required. At the point this the GPU is -+ * not active and its power and clocks are in unknown (platform specific -+ * state) as kbase doesn't yet have control of power and clocks. -+ * -+ * The platform specific private pointer kbase_device::platform_context -+ * can be accessed (and possibly initialized) in here. -+ */ -+ int (*platform_init_func)(struct kbase_device *kbdev); -+ /** -+ * @platform_term_func: platform specific termination function pointer -+ * @kbdev - kbase_device pointer -+ * -+ * Function pointer for platform specific termination or NULL if no -+ * termination function is required. At the point this the GPU will be -+ * idle but still powered and clocked. -+ * -+ * The platform specific private pointer kbase_device::platform_context -+ * can be accessed (and possibly terminated) in here. -+ */ -+ void (*platform_term_func)(struct kbase_device *kbdev); -+ -+ /** -+ * @platform_late_init_func: platform specific late init function pointer -+ * @kbdev - kbase_device pointer -+ * -+ * Function pointer to inform that the kbase driver initialization completed -+ * or NULL if no such function is required. At this point the GPU driver will be -+ * fully initialized. -+ * -+ * The platform specific private pointer kbase_device::platform_context -+ * can be accessed (and possibly terminated) in here. -+ */ -+ int (*platform_late_init_func)(struct kbase_device *kbdev); -+ -+ /** -+ * @platform_late_term_func: platform specific late termination function pointer -+ * @kbdev - kbase_device pointer -+ * -+ * Function pointer for platform specific termination or NULL if no -+ * termination function is required. At this point the GPU driver will complete -+ * termination process -+ * -+ * The platform specific private pointer kbase_device::platform_context -+ * can be accessed (and possibly terminated) in here. -+ */ -+ void (*platform_late_term_func)(struct kbase_device *kbdev); ++enum kbase_ipa_block_type { ++ KBASE_IPA_BLOCK_TYPE_USING_CLK_MALI, ++ KBASE_IPA_BLOCK_TYPE_TOP_LEVEL, ++ KBASE_IPA_BLOCK_TYPE_SHADER_CORES, ++ KBASE_IPA_BLOCK_TYPE_FOR_CLK_GPU, ++ KBASE_IPA_BLOCK_TYPE_NUM ++}; + -+#if !MALI_USE_CSF -+ /** -+ * @platform_handler_context_init_func: platform specific handler for -+ * when a new kbase_context is created. -+ * @kctx - kbase_context pointer -+ * -+ * Returns 0 on success, negative error code otherwise. -+ * -+ * Function pointer for platform specific initialization of a kernel -+ * context or NULL if not required. Called at the last stage of kernel -+ * context initialization. -+ */ -+ int (*platform_handler_context_init_func)(struct kbase_context *kctx); -+ /** -+ * @platform_handler_context_term_func: platform specific handler for -+ * when a kbase_context is terminated. -+ * @kctx - kbase_context pointer -+ * -+ * Function pointer for platform specific termination of a kernel -+ * context or NULL if not required. Called at the first stage of kernel -+ * context termination. -+ */ -+ void (*platform_handler_context_term_func)(struct kbase_context *kctx); -+ /** -+ * @platform_handler_atom_submit_func: platform specific handler for -+ * when a kbase_jd_atom is submitted. -+ * @katom - kbase_jd_atom pointer -+ * -+ * Function pointer for platform specific handling at the point when an -+ * atom is submitted to the GPU or set to NULL if not required. The -+ * function cannot assume that it is running in a process context. -+ * -+ * Context: The caller must hold the hwaccess_lock. Function must be -+ * runnable in an interrupt context. -+ */ -+ void (*platform_handler_atom_submit_func)(struct kbase_jd_atom *katom); -+ /** -+ * @platform_handler_atom_complete_func: platform specific handler for -+ * when a kbase_jd_atom completes. -+ * @katom - kbase_jd_atom pointer -+ * -+ * Function pointer for platform specific handling at the point when an -+ * atom stops running on the GPU or set to NULL if not required. The -+ * function cannot assume that it is running in a process context. -+ * -+ * Context: The caller must hold the hwaccess_lock. Function must be -+ * runnable in an interrupt context. -+ */ -+ void (*platform_handler_atom_complete_func)( -+ struct kbase_jd_atom *katom); -+#endif ++/** ++ * struct kbase_ipa_model - Object describing a particular IPA model. ++ * @kbdev: pointer to kbase device ++ * @model_data: opaque pointer to model specific data, accessed ++ * only by model specific methods. ++ * @ops: pointer to object containing model specific methods. ++ * @params: head of the list of debugfs params added for model ++ * @missing_dt_node_warning: flag to limit the matching power model DT not found ++ * warning to once. ++ */ ++struct kbase_ipa_model { ++ struct kbase_device *kbdev; ++ void *model_data; ++ const struct kbase_ipa_model_ops *ops; ++ struct list_head params; ++ bool missing_dt_node_warning; +}; + -+/* -+ * @brief Specifies the callbacks for power management ++/** ++ * kbase_ipa_model_add_param_s32 - Add an integer model parameter ++ * @model: pointer to IPA model ++ * @name: name of corresponding debugfs entry ++ * @addr: address where the value is stored ++ * @num_elems: number of elements (1 if not an array) ++ * @dt_required: if false, a corresponding devicetree entry is not required, ++ * and the current value will be used. If true, a warning is ++ * output and the data is zeroed + * -+ * By default no callbacks will be made and the GPU must not be powered off. ++ * Return: 0 on success, or an error code + */ -+struct kbase_pm_callback_conf { -+ /** Callback for when the GPU is idle and the power to it can be switched off. -+ * -+ * The system integrator can decide whether to either do nothing, just switch off -+ * the clocks to the GPU, or to completely power down the GPU. -+ * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the -+ * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). -+ * -+ * If runtime PM is enabled and @power_runtime_gpu_idle_callback is used -+ * then this callback should power off the GPU (or switch off the clocks -+ * to GPU) immediately. If @power_runtime_gpu_idle_callback is not used, -+ * then this callback can set the autosuspend timeout (if desired) and -+ * let the GPU be powered down later. -+ */ -+ void (*power_off_callback)(struct kbase_device *kbdev); -+ -+ /** Callback for when the GPU is about to become active and power must be supplied. -+ * -+ * This function must not return until the GPU is powered and clocked sufficiently for register access to -+ * succeed. The return value specifies whether the GPU was powered down since the call to power_off_callback. -+ * If the GPU state has been lost then this function must return 1, otherwise it should return 0. -+ * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the -+ * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). -+ * -+ * The return value of the first call to this function is ignored. -+ * -+ * @return 1 if the GPU state may have been lost, 0 otherwise. -+ */ -+ int (*power_on_callback)(struct kbase_device *kbdev); -+ -+ /** Callback for when the system is requesting a suspend and GPU power -+ * must be switched off. -+ * -+ * Note that if this callback is present, then this may be called -+ * without a preceding call to power_off_callback. Therefore this -+ * callback must be able to take any action that might otherwise happen -+ * in power_off_callback. -+ * -+ * The platform specific private pointer kbase_device::platform_context -+ * can be accessed and modified in here. It is the platform \em -+ * callbacks responsibility to initialize and terminate this pointer if -+ * used (see @ref kbase_platform_funcs_conf). -+ */ -+ void (*power_suspend_callback)(struct kbase_device *kbdev); -+ -+ /** Callback for when the system is resuming from a suspend and GPU -+ * power must be switched on. -+ * -+ * Note that if this callback is present, then this may be called -+ * without a following call to power_on_callback. Therefore this -+ * callback must be able to take any action that might otherwise happen -+ * in power_on_callback. -+ * -+ * The platform specific private pointer kbase_device::platform_context -+ * can be accessed and modified in here. It is the platform \em -+ * callbacks responsibility to initialize and terminate this pointer if -+ * used (see @ref kbase_platform_funcs_conf). -+ */ -+ void (*power_resume_callback)(struct kbase_device *kbdev); -+ -+ /** Callback for handling runtime power management initialization. -+ * -+ * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback -+ * will become active from calls made to the OS from within this function. -+ * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback. -+ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. -+ * -+ * @return 0 on success, else int error code. -+ */ -+ int (*power_runtime_init_callback)(struct kbase_device *kbdev); -+ -+ /** Callback for handling runtime power management termination. -+ * -+ * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback -+ * should no longer be called by the OS on completion of this function. -+ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. -+ */ -+ void (*power_runtime_term_callback)(struct kbase_device *kbdev); ++int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, ++ const char *name, s32 *addr, ++ size_t num_elems, bool dt_required); + -+ /** Callback for runtime power-off power management callback -+ * -+ * For linux this callback will be called by the kernel runtime_suspend callback. -+ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. -+ */ -+ void (*power_runtime_off_callback)(struct kbase_device *kbdev); ++/** ++ * kbase_ipa_model_add_param_string - Add a string model parameter ++ * @model: pointer to IPA model ++ * @name: name of corresponding debugfs entry ++ * @addr: address where the value is stored ++ * @size: size, in bytes, of the value storage (so the maximum string ++ * length is size - 1) ++ * @dt_required: if false, a corresponding devicetree entry is not required, ++ * and the current value will be used. If true, a warning is ++ * output and the data is zeroed ++ * ++ * Return: 0 on success, or an error code ++ */ ++int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, ++ const char *name, char *addr, ++ size_t size, bool dt_required); + -+ /** Callback for runtime power-on power management callback -+ * -+ * For linux this callback will be called by the kernel runtime_resume callback. -+ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. -+ * -+ * @return 0 on success, else OS error code. ++struct kbase_ipa_model_ops { ++ char *name; ++ /* The init, recalculate and term ops on the default model are always ++ * called. However, all the other models are only invoked if the model ++ * is selected in the device tree. Otherwise they are never ++ * initialized. Additional resources can be acquired by models in ++ * init(), however they must be terminated in the term(). + */ -+ int (*power_runtime_on_callback)(struct kbase_device *kbdev); -+ -+ /* -+ * Optional callback for checking if GPU can be suspended when idle -+ * -+ * This callback will be called by the runtime power management core -+ * when the reference count goes to 0 to provide notification that the -+ * GPU now seems idle. -+ * -+ * If this callback finds that the GPU can't be powered off, or handles -+ * suspend by powering off directly or queueing up a power off, a -+ * non-zero value must be returned to prevent the runtime PM core from -+ * also triggering a suspend. -+ * -+ * Returning 0 will cause the runtime PM core to conduct a regular -+ * autosuspend. -+ * -+ * This callback is optional and if not provided regular autosuspend -+ * will be triggered. -+ * -+ * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use -+ * this feature. -+ * -+ * Return 0 if GPU can be suspended, positive value if it can not be -+ * suspeneded by runtime PM, else OS error code ++ int (*init)(struct kbase_ipa_model *model); ++ /* Called immediately after init(), or when a parameter is changed, so ++ * that any coefficients derived from model parameters can be ++ * recalculated + */ -+ int (*power_runtime_idle_callback)(struct kbase_device *kbdev); -+ ++ int (*recalculate)(struct kbase_ipa_model *model); ++ void (*term)(struct kbase_ipa_model *model); + /* -+ * Optional callback for software reset ++ * get_dynamic_coeff() - calculate dynamic power coefficient ++ * @model: pointer to model ++ * @coeffp: pointer to return value location + * -+ * This callback will be called by the power management core to trigger -+ * a GPU soft reset. ++ * Calculate a dynamic power coefficient, with units pW/(Hz V^2), which ++ * is then scaled by the IPA framework according to the current OPP's ++ * frequency and voltage. + * -+ * Return 0 if the soft reset was successful and the RESET_COMPLETED -+ * interrupt will be raised, or a positive value if the interrupt won't -+ * be raised. On error, return the corresponding OS error code. ++ * Return: 0 on success, or an error code. -EOVERFLOW error code will ++ * indicate that sampling interval was too large and no meaningful ++ * scaling for GPU utiliation can be done. + */ -+ int (*soft_reset_callback)(struct kbase_device *kbdev); -+ ++ int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp); + /* -+ * Optional callback invoked after GPU becomes idle, not supported on -+ * JM GPUs. -+ * -+ * This callback will be invoked by the Kbase when GPU becomes idle. -+ * For JM GPUs or when runtime PM is disabled, Kbase will not invoke -+ * this callback and @power_off_callback will be invoked directly. -+ * -+ * This callback is supposed to decrement the runtime PM core reference -+ * count to zero and trigger the auto-suspend timer, which implies that -+ * @power_off_callback shouldn't initiate the runtime suspend. ++ * get_static_coeff() - calculate static power coefficient ++ * @model: pointer to model ++ * @coeffp: pointer to return value location + * -+ * GPU registers still remain accessible until @power_off_callback gets -+ * invoked later on the expiry of auto-suspend timer. ++ * Calculate a static power coefficient, with units uW/(V^3), which is ++ * scaled by the IPA framework according to the current OPP's voltage. + * -+ * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use -+ * this feature. ++ * Return: 0 on success, or an error code. + */ -+ void (*power_runtime_gpu_idle_callback)(struct kbase_device *kbdev); ++ int (*get_static_coeff)(struct kbase_ipa_model *model, u32 *coeffp); + + /* -+ * Optional callback invoked to change the runtime PM core state to -+ * active. -+ * -+ * This callback will be invoked by Kbase when GPU needs to be -+ * reactivated, but only if @power_runtime_gpu_idle_callback was invoked -+ * previously. So both @power_runtime_gpu_idle_callback and this -+ * callback needs to be implemented at the same time. -+ * -+ * Kbase will invoke @power_on_callback first before invoking this -+ * callback if the GPU was powered down previously, otherwise directly. -+ * -+ * This callback is supposed to increment the runtime PM core reference -+ * count to 1, which implies that @power_on_callback shouldn't initiate -+ * the runtime resume. The runtime resume may not happen synchronously -+ * to avoid a potential deadlock due to the runtime suspend happening -+ * simultaneously from some other thread. ++ * reset_counter_data() - Reset the HW counter data used for calculating ++ * dynamic power coefficient ++ * @model: pointer to model + * -+ * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use -+ * this feature. ++ * This method is currently applicable only to the counter based model. ++ * The next call to get_dynamic_coeff() will have to calculate the ++ * dynamic power coefficient based on the HW counter data generated ++ * from this point onwards. + */ -+ void (*power_runtime_gpu_active_callback)(struct kbase_device *kbdev); ++ void (*reset_counter_data)(struct kbase_ipa_model *model); +}; + -+/* struct kbase_gpu_clk_notifier_data - Data for clock rate change notifier. ++/** ++ * kbase_ipa_init - Initialize the IPA feature ++ * @kbdev: pointer to kbase device + * -+ * Pointer to this structure is supposed to be passed to the gpu clock rate -+ * change notifier function. This structure is deliberately aligned with the -+ * common clock framework notification structure 'struct clk_notifier_data' -+ * and such alignment should be maintained. ++ * simple IPA power model is initialized as a fallback model and if that ++ * initialization fails then IPA is not used. ++ * The device tree is read for the name of ipa model to be used, by using the ++ * property string "ipa-model". If that ipa model is supported then it is ++ * initialized but if the initialization fails then simple power model is used. + * -+ * @gpu_clk_handle: Handle of the GPU clock for which notifier was registered. -+ * @old_rate: Previous rate of this GPU clock in Hz. -+ * @new_rate: New rate of this GPU clock in Hz. ++ * Return: 0 on success, negative -errno on error + */ -+struct kbase_gpu_clk_notifier_data { -+ void *gpu_clk_handle; -+ unsigned long old_rate; -+ unsigned long new_rate; -+}; ++int kbase_ipa_init(struct kbase_device *kbdev); + +/** -+ * struct kbase_clk_rate_trace_op_conf - Specifies GPU clock rate trace -+ * operations. ++ * kbase_ipa_term - Terminate the IPA feature ++ * @kbdev: pointer to kbase device + * -+ * Specifies the functions pointers for platform specific GPU clock rate trace -+ * operations. By default no functions are required. -+ */ -+struct kbase_clk_rate_trace_op_conf { -+ /** -+ * @enumerate_gpu_clk: Enumerate a GPU clock on the given index -+ * @kbdev - kbase_device pointer -+ * @index - GPU clock index -+ * -+ * Returns a handle unique to the given GPU clock, or NULL if the clock -+ * array has been exhausted at the given index value. -+ * -+ * Kbase will use this function pointer to enumerate the existence of a -+ * GPU clock on the given index. -+ */ -+ void *(*enumerate_gpu_clk)(struct kbase_device *kbdev, -+ unsigned int index); -+ -+ /** -+ * @get_gpu_clk_rate: Get the current rate for an enumerated clock. -+ * @kbdev - kbase_device pointer -+ * @gpu_clk_handle - Handle unique to the enumerated GPU clock -+ * -+ * Returns current rate of the GPU clock in unit of Hz. -+ */ -+ unsigned long (*get_gpu_clk_rate)(struct kbase_device *kbdev, -+ void *gpu_clk_handle); -+ -+ /** -+ * @gpu_clk_notifier_register: Register a clock rate change notifier. -+ * @kbdev - kbase_device pointer -+ * @gpu_clk_handle - Handle unique to the enumerated GPU clock -+ * @nb - notifier block containing the callback function -+ * pointer -+ * -+ * Returns 0 on success, negative error code otherwise. -+ * -+ * This function pointer is used to register a callback function that -+ * is supposed to be invoked whenever the rate of clock corresponding -+ * to @gpu_clk_handle changes. -+ * @nb contains the pointer to callback function. -+ * The callback function expects the pointer of type -+ * 'struct kbase_gpu_clk_notifier_data' as the third argument. -+ */ -+ int (*gpu_clk_notifier_register)(struct kbase_device *kbdev, -+ void *gpu_clk_handle, struct notifier_block *nb); -+ -+ /** -+ * @gpu_clk_notifier_unregister: Unregister clock rate change notifier -+ * @kbdev - kbase_device pointer -+ * @gpu_clk_handle - Handle unique to the enumerated GPU clock -+ * @nb - notifier block containing the callback function -+ * pointer -+ * -+ * This function pointer is used to unregister a callback function that -+ * was previously registered to get notified of the change in rate -+ * of clock corresponding to @gpu_clk_handle. -+ */ -+ void (*gpu_clk_notifier_unregister)(struct kbase_device *kbdev, -+ void *gpu_clk_handle, struct notifier_block *nb); -+}; -+ -+#if IS_ENABLED(CONFIG_OF) -+struct kbase_platform_config { -+}; -+#else -+ -+/* -+ * @brief Specifies start and end of I/O memory region. -+ */ -+struct kbase_io_memory_region { -+ u64 start; -+ u64 end; -+}; -+ -+/* -+ * @brief Specifies I/O related resources like IRQs and memory region for I/O operations. ++ * Both simple IPA power model and model retrieved from device tree are ++ * terminated. + */ -+struct kbase_io_resources { -+ u32 job_irq_number; -+ u32 mmu_irq_number; -+ u32 gpu_irq_number; -+ struct kbase_io_memory_region io_memory_region; -+}; -+ -+struct kbase_platform_config { -+ const struct kbase_io_resources *io_resources; -+}; -+ -+#endif /* CONFIG_OF */ ++void kbase_ipa_term(struct kbase_device *kbdev); + +/** -+ * kbase_get_platform_config - Gets the pointer to platform config. ++ * kbase_ipa_model_recalculate - Recalculate the model coefficients ++ * @model: pointer to the IPA model object, already initialized + * -+ * Return: Pointer to the platform config ++ * It shall be called immediately after the model has been initialized ++ * or when the model parameter has changed, so that any coefficients ++ * derived from parameters can be recalculated. ++ * Its a wrapper for the module specific recalculate() method. ++ * ++ * Return: 0 on success, negative -errno on error + */ -+struct kbase_platform_config *kbase_get_platform_config(void); ++int kbase_ipa_model_recalculate(struct kbase_ipa_model *model); + +/** -+ * kbasep_platform_device_init: - Platform specific call to initialize hardware -+ * @kbdev: kbase device pointer -+ * -+ * Function calls a platform defined routine if specified in the configuration -+ * attributes. The routine can initialize any hardware and context state that -+ * is required for the GPU block to function. ++ * kbase_ipa_model_ops_find - Lookup an IPA model using its name ++ * @kbdev: pointer to kbase device ++ * @name: name of model to lookup + * -+ * Return: 0 if no errors have been found in the config. -+ * Negative error code otherwise. ++ * Return: Pointer to model's 'ops' structure, or NULL if the lookup failed. + */ -+int kbasep_platform_device_init(struct kbase_device *kbdev); ++const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, ++ const char *name); + +/** -+ * kbasep_platform_device_term - Platform specific call to terminate hardware -+ * @kbdev: Kbase device pointer -+ * -+ * Function calls a platform defined routine if specified in the configuration -+ * attributes. The routine can destroy any platform specific context state and -+ * shut down any hardware functionality that are outside of the Power Management -+ * callbacks. ++ * kbase_ipa_counter_model_ops_find - Lookup an IPA counter model using its name ++ * @kbdev: pointer to kbase device ++ * @name: name of counter model to lookup + * ++ * Return: Pointer to counter model's 'ops' structure, or NULL if the lookup ++ * failed. + */ -+void kbasep_platform_device_term(struct kbase_device *kbdev); ++const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( ++ struct kbase_device *kbdev, const char *name); + +/** -+ * kbasep_platform_device_late_init: - Platform specific call to finish hardware -+ * initialization -+ * @kbdev: kbase device pointer -+ * -+ * Function calls a platform defined routine if specified in the configuration -+ * attributes. The routine can initialize any hardware and context state that -+ * is required for the GPU block to function. ++ * kbase_ipa_model_name_from_id - Find the best model for a given GPU ID ++ * @gpu_id: GPU ID of GPU the model will be used for + * -+ * Return: 0 if no errors have been found in the config. -+ * Negative error code otherwise. ++ * Return: The name of the appropriate counter-based model, or the name of the ++ * fallback model if no counter model exists. + */ -+int kbasep_platform_device_late_init(struct kbase_device *kbdev); ++const char *kbase_ipa_model_name_from_id(u32 gpu_id); + +/** -+ * kbasep_platform_device_late_term - Platform specific call to finish hardware -+ * termination -+ * @kbdev: Kbase device pointer -+ * -+ * Function calls a platform defined routine if specified in the configuration -+ * attributes. The routine can destroy any platform specific context state and -+ * shut down any hardware functionality that are outside of the Power Management -+ * callbacks. ++ * kbase_ipa_counter_model_name_from_id - Find the best counter model for a ++ * given GPU ID ++ * @gpu_id: GPU ID of GPU the counter model will be used for + * ++ * Return: The name of the appropriate counter-based model, or NULL if the ++ * no counter model exists. + */ -+void kbasep_platform_device_late_term(struct kbase_device *kbdev); ++const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id); + -+#if !MALI_USE_CSF +/** -+ * kbasep_platform_context_init - Platform specific callback when a kernel -+ * context is created -+ * @kctx: kbase_context pointer ++ * kbase_ipa_init_model - Initilaize the particular IPA model ++ * @kbdev: pointer to kbase device ++ * @ops: pointer to object containing model specific methods. + * -+ * Function calls a platform defined routine if specified in the configuration -+ * attributes. The routine can initialize any per kernel context structures -+ * that are required for the GPU block to function. ++ * Initialize the model corresponding to the @ops pointer passed. ++ * The init() method specified in @ops would be called. + * -+ * Return: 0 if no errors were encountered. Negative error code otherwise. ++ * Return: pointer to kbase_ipa_model on success, NULL on error + */ -+int kbasep_platform_context_init(struct kbase_context *kctx); -+ ++struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, ++ const struct kbase_ipa_model_ops *ops); +/** -+ * kbasep_platform_context_term - Platform specific callback when a kernel -+ * context is terminated -+ * @kctx: kbase_context pointer -+ * -+ * Function calls a platform defined routine if specified in the configuration -+ * attributes. The routine should terminate any per kernel context structures -+ * created as part of &kbasep_platform_context_init. ++ * kbase_ipa_term_model - Terminate the particular IPA model ++ * @model: pointer to the IPA model object, already initialized + * ++ * Terminate the model, using the term() method. ++ * Module specific parameters would be freed. + */ -+void kbasep_platform_context_term(struct kbase_context *kctx); ++void kbase_ipa_term_model(struct kbase_ipa_model *model); + +/** -+ * kbasep_platform_event_atom_submit - Platform specific callback when an atom -+ * is submitted to the GPU -+ * @katom: kbase_jd_atom pointer -+ * -+ * Function calls a platform defined routine if specified in the configuration -+ * attributes. The routine should not assume that it is in a process context. ++ * kbase_ipa_protection_mode_switch_event - Inform IPA of the GPU's entry into ++ * protected mode ++ * @kbdev: pointer to kbase device + * -+ * Return: 0 if no errors were encountered. Negative error code otherwise. ++ * Makes IPA aware of the GPU switching to protected mode. + */ -+void kbasep_platform_event_atom_submit(struct kbase_jd_atom *katom); ++void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev); + +/** -+ * kbasep_platform_event_atom_complete - Platform specific callback when an atom -+ * has stopped running on the GPU -+ * @katom: kbase_jd_atom pointer ++ * kbase_get_real_power() - get the real power consumption of the GPU ++ * @df: dynamic voltage and frequency scaling information for the GPU. ++ * @power: where to store the power consumption, in mW. ++ * @freq: a frequency, in HZ. ++ * @voltage: a voltage, in mV. + * -+ * Function calls a platform defined routine if specified in the configuration -+ * attributes. The routine should not assume that it is in a process context. ++ * The returned value incorporates both static and dynamic power consumption. + * ++ * Return: 0 on success, or an error code. + */ -+void kbasep_platform_event_atom_complete(struct kbase_jd_atom *katom); -+#endif ++int kbase_get_real_power(struct devfreq *df, u32 *power, ++ unsigned long freq, ++ unsigned long voltage); + -+#ifndef CONFIG_OF -+/** -+ * kbase_platform_register - Register a platform device for the GPU -+ * This can be used to register a platform device on systems where device tree -+ * is not enabled and the platform initialisation code in the kernel doesn't -+ * create the GPU device. Where possible device tree should be used instead. -+ * -+ * Return: 0 for success, any other fail causes module initialisation to fail ++/* Called by kbase_get_real_power() to invoke the power models. ++ * Must be called with kbdev->ipa.lock held. ++ * This function is only exposed for use by unit tests. + */ -+int kbase_platform_register(void); ++int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, ++ unsigned long freq, ++ unsigned long voltage); ++ ++extern struct devfreq_cooling_power kbase_ipa_power_model_ops; + +/** -+ * kbase_platform_unregister - Unregister a fake platform device ++ * kbase_ipa_reset_data() - Reset the data required for power estimation. ++ * @kbdev: Pointer to kbase device. + * -+ * Unregister the platform device created with kbase_platform_register() ++ * This function is called to ensure a meaningful baseline for ++ * kbase_get_real_power(), when thermal governor starts the polling, and ++ * that is achieved by updating the GPU utilization metrics and retrieving ++ * the accumulated value of HW counters. ++ * Basically this function collects all the data required for power estimation ++ * but does not process it. + */ -+void kbase_platform_unregister(void); -+#endif ++void kbase_ipa_reset_data(struct kbase_device *kbdev); + -+#endif /* _KBASE_CONFIG_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h ++#else /* !(defined(CONFIG_MALI_BIFROST_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ ++ ++static inline void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev) ++{ } ++ ++#endif /* (defined(CONFIG_MALI_BIFROST_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ ++ ++#endif +diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c new file mode 100644 -index 000000000..14493a77e +index 000000000..a0963bbb2 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h -@@ -0,0 +1,278 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c +@@ -0,0 +1,324 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -203179,273 +205998,319 @@ index 000000000..14493a77e + * + */ + -+/** -+ * DOC: Default values for configuration settings -+ * -+ */ ++#include ++#include ++#include + -+#ifndef _KBASE_CONFIG_DEFAULTS_H_ -+#define _KBASE_CONFIG_DEFAULTS_H_ ++#include "mali_kbase.h" ++#include "mali_kbase_ipa.h" ++#include "mali_kbase_ipa_debugfs.h" + -+/* Include mandatory definitions per platform */ -+#include ++struct kbase_ipa_model_param { ++ char *name; ++ union { ++ void *voidp; ++ s32 *s32p; ++ char *str; ++ } addr; ++ size_t size; ++ enum kbase_ipa_model_param_type type; ++ struct kbase_ipa_model *model; ++ struct list_head link; ++}; + -+enum { -+ /* Use unrestricted Address ID width on the AXI bus. */ -+ KBASE_AID_32 = 0x0, ++static int param_int_get(void *data, u64 *val) ++{ ++ struct kbase_ipa_model_param *param = data; + -+ /* Restrict GPU to a half of maximum Address ID count. -+ * This will reduce performance, but reduce bus load due to GPU. -+ */ -+ KBASE_AID_16 = 0x3, ++ mutex_lock(¶m->model->kbdev->ipa.lock); ++ *(s64 *) val = *param->addr.s32p; ++ mutex_unlock(¶m->model->kbdev->ipa.lock); + -+ /* Restrict GPU to a quarter of maximum Address ID count. -+ * This will reduce performance, but reduce bus load due to GPU. -+ */ -+ KBASE_AID_8 = 0x2, ++ return 0; ++} + -+ /* Restrict GPU to an eighth of maximum Address ID count. -+ * This will reduce performance, but reduce bus load due to GPU. -+ */ -+ KBASE_AID_4 = 0x1 -+}; ++static int param_int_set(void *data, u64 val) ++{ ++ struct kbase_ipa_model_param *param = data; ++ struct kbase_ipa_model *model = param->model; ++ s64 sval = (s64) val; ++ s32 old_val; ++ int err = 0; + -+enum { -+ /* Use unrestricted Address ID width on the AXI bus. -+ * Restricting ID width will reduce performance & bus load due to GPU. -+ */ -+ KBASE_3BIT_AID_32 = 0x0, ++ if (sval < S32_MIN || sval > S32_MAX) ++ return -ERANGE; + -+ /* Restrict GPU to 7/8 of maximum Address ID count. */ -+ KBASE_3BIT_AID_28 = 0x1, ++ mutex_lock(¶m->model->kbdev->ipa.lock); ++ old_val = *param->addr.s32p; ++ *param->addr.s32p = val; ++ err = kbase_ipa_model_recalculate(model); ++ if (err < 0) ++ *param->addr.s32p = old_val; ++ mutex_unlock(¶m->model->kbdev->ipa.lock); + -+ /* Restrict GPU to 3/4 of maximum Address ID count. */ -+ KBASE_3BIT_AID_24 = 0x2, ++ return err; ++} + -+ /* Restrict GPU to 5/8 of maximum Address ID count. */ -+ KBASE_3BIT_AID_20 = 0x3, ++DEFINE_DEBUGFS_ATTRIBUTE(fops_s32, param_int_get, param_int_set, "%lld\n"); + -+ /* Restrict GPU to 1/2 of maximum Address ID count. */ -+ KBASE_3BIT_AID_16 = 0x4, ++static ssize_t param_string_get(struct file *file, char __user *user_buf, ++ size_t count, loff_t *ppos) ++{ ++ struct kbase_ipa_model_param *param = file->private_data; ++ ssize_t ret; ++ size_t len; + -+ /* Restrict GPU to 3/8 of maximum Address ID count. */ -+ KBASE_3BIT_AID_12 = 0x5, ++ mutex_lock(¶m->model->kbdev->ipa.lock); ++ len = strnlen(param->addr.str, param->size - 1) + 1; ++ ret = simple_read_from_buffer(user_buf, count, ppos, ++ param->addr.str, len); ++ mutex_unlock(¶m->model->kbdev->ipa.lock); + -+ /* Restrict GPU to 1/4 of maximum Address ID count. */ -+ KBASE_3BIT_AID_8 = 0x6, ++ return ret; ++} + -+ /* Restrict GPU to 1/8 of maximum Address ID count. */ -+ KBASE_3BIT_AID_4 = 0x7 ++static ssize_t param_string_set(struct file *file, const char __user *user_buf, ++ size_t count, loff_t *ppos) ++{ ++ struct kbase_ipa_model_param *param = file->private_data; ++ struct kbase_ipa_model *model = param->model; ++ char *old_str = NULL; ++ ssize_t ret = count; ++ size_t buf_size; ++ int err; ++ ++ mutex_lock(&model->kbdev->ipa.lock); ++ ++ if (count > param->size) { ++ ret = -EINVAL; ++ goto end; ++ } ++ ++ old_str = kstrndup(param->addr.str, param->size, GFP_KERNEL); ++ if (!old_str) { ++ ret = -ENOMEM; ++ goto end; ++ } ++ ++ buf_size = min(param->size - 1, count); ++ if (copy_from_user(param->addr.str, user_buf, buf_size)) { ++ ret = -EFAULT; ++ goto end; ++ } ++ ++ param->addr.str[buf_size] = '\0'; ++ ++ err = kbase_ipa_model_recalculate(model); ++ if (err < 0) { ++ u32 string_len = strscpy(param->addr.str, old_str, param->size); ++ ++ string_len += sizeof(char); ++ /* Make sure that the source string fit into the buffer. */ ++ KBASE_DEBUG_ASSERT(string_len <= param->size); ++ CSTD_UNUSED(string_len); ++ ++ ret = err; ++ } ++ ++end: ++ kfree(old_str); ++ mutex_unlock(&model->kbdev->ipa.lock); ++ ++ return ret; ++} ++ ++static const struct file_operations fops_string = { ++ .owner = THIS_MODULE, ++ .read = param_string_get, ++ .write = param_string_set, ++ .open = simple_open, ++ .llseek = default_llseek, +}; + -+#if MALI_USE_CSF -+/* -+ * Default value for the TIMER register of the IPA Control interface, -+ * expressed in milliseconds. -+ * -+ * The chosen value is a trade off between two requirements: the IPA Control -+ * interface should sample counters with a resolution in the order of -+ * milliseconds, while keeping GPU overhead as limited as possible. -+ */ -+#define IPA_CONTROL_TIMER_DEFAULT_VALUE_MS ((u32)10) /* 10 milliseconds */ -+#endif /* MALI_USE_CSF */ ++int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, ++ void *addr, size_t size, ++ enum kbase_ipa_model_param_type type) ++{ ++ struct kbase_ipa_model_param *param; + -+/* Default period for DVFS sampling (can be overridden by platform header) */ -+#ifndef DEFAULT_PM_DVFS_PERIOD -+#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */ -+#endif ++ param = kzalloc(sizeof(*param), GFP_KERNEL); + -+/* Power Management poweroff tick granuality. This is in nanoseconds to -+ * allow HR timer support (can be overridden by platform header). -+ * -+ * On each scheduling tick, the power manager core may decide to: -+ * -# Power off one or more shader cores -+ * -# Power off the entire GPU -+ */ -+#ifndef DEFAULT_PM_GPU_POWEROFF_TICK_NS -+#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */ -+#endif ++ if (!param) ++ return -ENOMEM; + -+/* Power Manager number of ticks before shader cores are powered off -+ * (can be overridden by platform header). -+ */ -+#ifndef DEFAULT_PM_POWEROFF_TICK_SHADER -+#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */ -+#endif ++ /* 'name' is stack-allocated for array elements, so copy it into ++ * heap-allocated storage ++ */ ++ param->name = kstrdup(name, GFP_KERNEL); + -+/* Default scheduling tick granuality (can be overridden by platform header) */ -+#ifndef DEFAULT_JS_SCHEDULING_PERIOD_NS -+#define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */ -+#endif ++ if (!param->name) { ++ kfree(param); ++ return -ENOMEM; ++ } + -+/* Default minimum number of scheduling ticks before jobs are soft-stopped. -+ * -+ * This defines the time-slice for a job (which may be different from that of a -+ * context) -+ */ -+#define DEFAULT_JS_SOFT_STOP_TICKS (1) /* 100ms-200ms */ ++ param->addr.voidp = addr; ++ param->size = size; ++ param->type = type; ++ param->model = model; + -+/* Default minimum number of scheduling ticks before CL jobs are soft-stopped. */ -+#define DEFAULT_JS_SOFT_STOP_TICKS_CL (1) /* 100ms-200ms */ ++ list_add(¶m->link, &model->params); + -+/* Default minimum number of scheduling ticks before jobs are hard-stopped */ -+#define DEFAULT_JS_HARD_STOP_TICKS_SS (50) /* 5s */ ++ return 0; ++} + -+/* Default minimum number of scheduling ticks before CL jobs are hard-stopped. */ -+#define DEFAULT_JS_HARD_STOP_TICKS_CL (50) /* 5s */ ++void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model) ++{ ++ struct kbase_ipa_model_param *param_p, *param_n; + -+/* Default minimum number of scheduling ticks before jobs are hard-stopped -+ * during dumping -+ */ -+#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */ ++ list_for_each_entry_safe(param_p, param_n, &model->params, link) { ++ list_del(¶m_p->link); ++ kfree(param_p->name); ++ kfree(param_p); ++ } ++} + -+/* Default timeout for some software jobs, after which the software event wait -+ * jobs will be cancelled. -+ */ -+#define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */ ++static int force_fallback_model_get(void *data, u64 *val) ++{ ++ struct kbase_device *kbdev = data; + -+/* Default minimum number of scheduling ticks before the GPU is reset to clear a -+ * "stuck" job -+ */ -+#define DEFAULT_JS_RESET_TICKS_SS (55) /* 5.5s */ ++ mutex_lock(&kbdev->ipa.lock); ++ *val = kbdev->ipa.force_fallback_model; ++ mutex_unlock(&kbdev->ipa.lock); + -+/* Default minimum number of scheduling ticks before the GPU is reset to clear a -+ * "stuck" CL job. -+ */ -+#define DEFAULT_JS_RESET_TICKS_CL (55) /* 5.5s */ ++ return 0; ++} + -+/* Default minimum number of scheduling ticks before the GPU is reset to clear a -+ * "stuck" job during dumping. -+ */ -+#define DEFAULT_JS_RESET_TICKS_DUMPING (15020) /* 1502s */ ++static int force_fallback_model_set(void *data, u64 val) ++{ ++ struct kbase_device *kbdev = data; + -+/* Nominal reference frequency that was used to obtain all following -+ * <...>_TIMEOUT_CYCLES macros, in kHz. -+ * -+ * Timeouts are scaled based on the relation between this value and the lowest -+ * GPU clock frequency. -+ */ -+#define DEFAULT_REF_TIMEOUT_FREQ_KHZ (100000) ++ mutex_lock(&kbdev->ipa.lock); ++ kbdev->ipa.force_fallback_model = (val ? true : false); ++ mutex_unlock(&kbdev->ipa.lock); + -+#if MALI_USE_CSF -+/* Waiting timeout for status change acknowledgment, in clock cycles. -+ * -+ * This is also the default timeout to be used when an invalid timeout -+ * selector is used to retrieve the timeout on CSF GPUs. -+ * -+ * Based on 75000ms timeout at nominal 100MHz, as is required for Android - based -+ * on scaling from a 50MHz GPU system. -+ */ -+#define CSF_FIRMWARE_TIMEOUT_CYCLES (7500000000ull) ++ return 0; ++} + -+/* Timeout in clock cycles for GPU Power Management to reach the desired -+ * Shader, L2 and MCU state. -+ * -+ * Based on 2500ms timeout at nominal 100MHz, scaled from a 50MHz GPU system. -+ */ -+#define CSF_PM_TIMEOUT_CYCLES (250000000) ++DEFINE_DEBUGFS_ATTRIBUTE(force_fallback_model, ++ force_fallback_model_get, ++ force_fallback_model_set, ++ "%llu\n"); + -+/* Waiting timeout in clock cycles for GPU reset to complete. -+ * -+ * Based on 2500ms timeout at 100MHz, scaled from a 50MHz GPU system -+ */ -+#define CSF_GPU_RESET_TIMEOUT_CYCLES (250000000) ++static int current_power_get(void *data, u64 *val) ++{ ++ struct kbase_device *kbdev = data; ++ struct devfreq *df = kbdev->devfreq; ++ u32 power; + -+/* Waiting timeout in clock cycles for all active CSGs to be suspended. -+ * -+ * Based on 1500ms timeout at 100MHz, scaled from a 50MHz GPU system. -+ */ -+#define CSF_CSG_SUSPEND_TIMEOUT_CYCLES (150000000) ++ kbase_pm_context_active(kbdev); ++ /* The current model assumes that there's no more than one voltage ++ * regulator currently available in the system. ++ */ ++ kbase_get_real_power(df, &power, ++ kbdev->current_nominal_freq, ++ (kbdev->current_voltages[0] / 1000)); ++ kbase_pm_context_idle(kbdev); + -+/* Waiting timeout in clock cycles for GPU firmware to boot. -+ * -+ * Based on 250ms timeout at 100MHz, scaled from a 50MHz GPU system. -+ */ -+#define CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES (25000000) ++ *val = power; + -+/* Waiting timeout for a ping request to be acknowledged, in clock cycles. -+ * -+ * Based on 6000ms timeout at 100MHz, scaled from a 50MHz GPU system. -+ */ -+#define CSF_FIRMWARE_PING_TIMEOUT_CYCLES (600000000ull) ++ return 0; ++} ++DEFINE_DEBUGFS_ATTRIBUTE(current_power, current_power_get, NULL, "%llu\n"); + -+#else /* MALI_USE_CSF */ ++static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model) ++{ ++ struct list_head *it; ++ struct dentry *dir; + -+/* A default timeout in clock cycles to be used when an invalid timeout -+ * selector is used to retrieve the timeout, on JM GPUs. -+ */ -+#define JM_DEFAULT_TIMEOUT_CYCLES (150000000) ++ lockdep_assert_held(&model->kbdev->ipa.lock); + -+/* Default number of milliseconds given for other jobs on the GPU to be -+ * soft-stopped when the GPU needs to be reset. -+ */ -+#define JM_DEFAULT_RESET_TIMEOUT_MS (1) /* 1 ms */ ++ dir = debugfs_create_dir(model->ops->name, ++ model->kbdev->mali_debugfs_directory); + -+/* Default timeout in clock cycles to be used when checking if JS_COMMAND_NEXT -+ * is updated on HW side so a Job Slot is considered free. -+ * This timeout will only take effect on GPUs with low value for the minimum -+ * GPU clock frequency (<= 100MHz). -+ * -+ * Based on 1ms timeout at 100MHz. Will default to 0ms on GPUs with higher -+ * value for minimum GPU clock frequency. -+ */ -+#define JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES (100000) ++ if (IS_ERR_OR_NULL(dir)) { ++ dev_err(model->kbdev->dev, ++ "Couldn't create mali debugfs %s directory", ++ model->ops->name); ++ return; ++ } + -+#endif /* MALI_USE_CSF */ ++ list_for_each(it, &model->params) { ++ struct kbase_ipa_model_param *param = ++ list_entry(it, ++ struct kbase_ipa_model_param, ++ link); ++ const struct file_operations *fops = NULL; + -+/* Default timeslice that a context is scheduled in for, in nanoseconds. -+ * -+ * When a context has used up this amount of time across its jobs, it is -+ * scheduled out to let another run. -+ * -+ * @note the resolution is nanoseconds (ns) here, because that's the format -+ * often used by the OS. -+ */ -+#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */ ++ switch (param->type) { ++ case PARAM_TYPE_S32: ++ fops = &fops_s32; ++ break; ++ case PARAM_TYPE_STRING: ++ fops = &fops_string; ++ break; ++ } + -+/* Maximum frequency (in kHz) that the GPU can be clocked. For some platforms -+ * this isn't available, so we simply define a dummy value here. If devfreq -+ * is enabled the value will be read from there, otherwise this should be -+ * overridden by defining GPU_FREQ_KHZ_MAX in the platform file. -+ */ -+#define DEFAULT_GPU_FREQ_KHZ_MAX (5000) ++ if (unlikely(!fops)) { ++ dev_err(model->kbdev->dev, ++ "Type not set for %s parameter %s\n", ++ model->ops->name, param->name); ++ } else { ++ debugfs_create_file(param->name, 0644, ++ dir, param, fops); ++ } ++ } ++} + -+/* Default timeout for task execution on an endpoint -+ * -+ * Number of GPU clock cycles before the driver terminates a task that is -+ * making no forward progress on an endpoint (e.g. shader core). -+ * Value chosen is equivalent to the time after which a job is hard stopped -+ * which is 5 seconds (assuming the GPU is usually clocked at ~500 MHZ). -+ */ -+#define DEFAULT_PROGRESS_TIMEOUT ((u64)5 * 500 * 1024 * 1024) ++void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, ++ const char *name, s32 val) ++{ ++ struct kbase_ipa_model_param *param; + -+/* Default threshold at which to switch to incremental rendering -+ * -+ * Fraction of the maximum size of an allocation that grows on GPU page fault -+ * that can be used up before the driver switches to incremental rendering, -+ * in 256ths. 0 means disable incremental rendering. -+ */ -+#define DEFAULT_IR_THRESHOLD (192) ++ mutex_lock(&model->kbdev->ipa.lock); + -+/* Waiting time in clock cycles for the completion of a MMU operation. -+ * -+ * Ideally 1.6M GPU cycles required for the L2 cache (512KiB slice) flush. -+ * -+ * As a pessimistic value, 50M GPU cycles ( > 30 times bigger ) is chosen. -+ * It corresponds to 0.5s in GPU @ 100Mhz. -+ */ -+#define MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES ((u64)50 * 1024 * 1024) -+#endif /* _KBASE_CONFIG_DEFAULTS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c ++ list_for_each_entry(param, &model->params, link) { ++ if (!strcmp(param->name, name)) { ++ if (param->type == PARAM_TYPE_S32) { ++ *param->addr.s32p = val; ++ } else { ++ dev_err(model->kbdev->dev, ++ "Wrong type for %s parameter %s\n", ++ model->ops->name, param->name); ++ } ++ break; ++ } ++ } ++ ++ mutex_unlock(&model->kbdev->ipa.lock); ++} ++KBASE_EXPORT_TEST_API(kbase_ipa_model_param_set_s32); ++ ++void kbase_ipa_debugfs_init(struct kbase_device *kbdev) ++{ ++ mutex_lock(&kbdev->ipa.lock); ++ ++ if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model) ++ kbase_ipa_model_debugfs_init(kbdev->ipa.configured_model); ++ kbase_ipa_model_debugfs_init(kbdev->ipa.fallback_model); ++ ++ debugfs_create_file("ipa_current_power", 0444, ++ kbdev->mali_debugfs_directory, kbdev, ¤t_power); ++ debugfs_create_file("ipa_force_fallback_model", 0644, ++ kbdev->mali_debugfs_directory, kbdev, &force_fallback_model); ++ ++ mutex_unlock(&kbdev->ipa.lock); ++} +diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.h b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.h new file mode 100644 -index 000000000..68e4305af +index 000000000..f69036775 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c -@@ -0,0 +1,5856 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.h +@@ -0,0 +1,70 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -203463,5851 +206328,6263 @@ index 000000000..68e4305af + * + */ + -+#include -+#include -+#include -+#include -+#include -+#ifdef CONFIG_MALI_BIFROST_DEVFREQ -+#include -+#include -+#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) -+#include -+#endif /* CONFIG_DEVFREQ_THERMAL */ -+#endif /* CONFIG_MALI_BIFROST_DEVFREQ */ -+#include "backend/gpu/mali_kbase_model_linux.h" -+#include "uapi/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h" -+#include "mali_kbase_mem.h" -+#include "mali_kbase_mem_pool_debugfs.h" -+#include "mali_kbase_mem_pool_group.h" -+#include "mali_kbase_debugfs_helper.h" -+#include "mali_kbase_regs_history_debugfs.h" -+#include -+#include -+#if !MALI_USE_CSF -+#include -+#endif /* !MALI_USE_CSF */ -+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS -+#include -+#endif -+#include -+#include -+#if !MALI_USE_CSF -+#include "mali_kbase_kinstr_jm.h" -+#endif -+#include "hwcnt/mali_kbase_hwcnt_context.h" -+#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" -+#include "mali_kbase_kinstr_prfcnt.h" -+#include "mali_kbase_vinstr.h" -+#if MALI_USE_CSF -+#include "csf/mali_kbase_csf_firmware.h" -+#include "csf/mali_kbase_csf_tiler_heap.h" -+#include "csf/mali_kbase_csf_csg_debugfs.h" -+#include "csf/mali_kbase_csf_cpu_queue_debugfs.h" -+#include "csf/mali_kbase_csf_event.h" -+#endif -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+#include "arbiter/mali_kbase_arbiter_pm.h" -+#endif ++#ifndef _KBASE_IPA_DEBUGFS_H_ ++#define _KBASE_IPA_DEBUGFS_H_ + -+#include "mali_kbase_cs_experimental.h" ++enum kbase_ipa_model_param_type { ++ PARAM_TYPE_S32 = 1, ++ PARAM_TYPE_STRING, ++}; + -+#ifdef CONFIG_MALI_CINSTR_GWT -+#include "mali_kbase_gwt.h" -+#endif -+#include "backend/gpu/mali_kbase_pm_internal.h" -+#include "mali_kbase_dvfs_debugfs.h" +#if IS_ENABLED(CONFIG_DEBUG_FS) -+#include "mali_kbase_pbha_debugfs.h" -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include /* is_compat_task/in_compat_syscall */ -+#include -+#include -+#include -+#include -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+#include -+#endif /* CONFIG_SYNC_FILE */ -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include -+#include -+#include -+ -+#include -+ -+#include -+#include -+#include -+ -+#include -+ -+#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)" + -+/** -+ * KBASE_API_VERSION - KBase API Version -+ * @major: Kernel major version -+ * @minor: Kernel minor version -+ */ -+#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ -+ (((minor) & 0xFFF) << 8) | \ -+ ((0 & 0xFF) << 0)) ++void kbase_ipa_debugfs_init(struct kbase_device *kbdev); ++int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, ++ void *addr, size_t size, ++ enum kbase_ipa_model_param_type type); ++void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model); + +/** -+ * struct mali_kbase_capability_def - kbase capabilities table ++ * kbase_ipa_model_param_set_s32 - Set an integer model parameter + * -+ * @required_major: required major -+ * @required_minor: required minor -+ */ -+struct mali_kbase_capability_def { -+ u16 required_major; -+ u16 required_minor; -+}; -+ -+/* -+ * This must be kept in-sync with mali_kbase_cap ++ * @model: pointer to IPA model ++ * @name: name of corresponding debugfs entry ++ * @val: new value of the parameter + * -+ * TODO: The alternative approach would be to embed the cap enum values -+ * in the table. Less efficient but potentially safer. ++ * This function is only exposed for use by unit tests running in ++ * kernel space. Normally it is expected that parameter values will ++ * instead be set via debugfs. + */ -+static const struct mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CAPS] = { -+#if MALI_USE_CSF -+ { 1, 0 }, /* SYSTEM_MONITOR */ -+ { 1, 0 }, /* JIT_PRESSURE_LIMIT */ -+ { 1, 0 }, /* MEM_GROW_ON_GPF */ -+ { 1, 0 } /* MEM_PROTECTED */ -+#else -+ { 11, 15 }, /* SYSTEM_MONITOR */ -+ { 11, 25 }, /* JIT_PRESSURE_LIMIT */ -+ { 11, 2 }, /* MEM_GROW_ON_GPF */ -+ { 11, 2 } /* MEM_PROTECTED */ -+#endif -+}; ++void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, ++ const char *name, s32 val); + -+#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) -+/* Mutex to synchronize the probe of multiple kbase instances */ -+static struct mutex kbase_probe_mutex; -+#endif ++#else /* CONFIG_DEBUG_FS */ + -+/** -+ * mali_kbase_supports_cap - Query whether a kbase capability is supported -+ * -+ * @api_version: API version to convert -+ * @cap: Capability to query for - see mali_kbase_caps.h -+ * -+ * Return: true if the capability is supported -+ */ -+bool mali_kbase_supports_cap(unsigned long api_version, enum mali_kbase_cap cap) ++static inline int kbase_ipa_model_param_add(struct kbase_ipa_model *model, ++ const char *name, void *addr, ++ size_t size, ++ enum kbase_ipa_model_param_type type) +{ -+ bool supported = false; -+ unsigned long required_ver; -+ -+ struct mali_kbase_capability_def const *cap_def; -+ -+ if (WARN_ON(cap < 0)) -+ return false; -+ -+ if (WARN_ON(cap >= MALI_KBASE_NUM_CAPS)) -+ return false; -+ -+ cap_def = &kbase_caps_table[(int)cap]; -+ required_ver = KBASE_API_VERSION(cap_def->required_major, cap_def->required_minor); -+ supported = (api_version >= required_ver); -+ -+ return supported; ++ return 0; +} + -+/** -+ * kbase_file_new - Create an object representing a device file -+ * -+ * @kbdev: An instance of the GPU platform device, allocated from the probe -+ * method of the driver. -+ * @filp: Pointer to the struct file corresponding to device file -+ * /dev/malixx instance, passed to the file's open method. -+ * -+ * In its initial state, the device file has no context (i.e. no GPU -+ * address space) and no API version number. Both must be assigned before -+ * kbase_file_get_kctx_if_setup_complete() can be used successfully. -+ * -+ * Return: Address of an object representing a simulated device file, or NULL -+ * on failure. -+ */ -+static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev, -+ struct file *const filp) -+{ -+ struct kbase_file *const kfile = kmalloc(sizeof(*kfile), GFP_KERNEL); ++static inline void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model) ++{ } + -+ if (kfile) { -+ kfile->kbdev = kbdev; -+ kfile->filp = filp; -+ kfile->kctx = NULL; -+ kfile->api_version = 0; -+ atomic_set(&kfile->setup_state, KBASE_FILE_NEED_VSN); -+ } -+ return kfile; -+} ++static inline void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, ++ const char *name, s32 val) ++{ } ++#endif /* CONFIG_DEBUG_FS */ + -+/** -+ * kbase_file_set_api_version - Set the application programmer interface version ++#endif /* _KBASE_IPA_DEBUGFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c +new file mode 100644 +index 000000000..8557fe872 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c +@@ -0,0 +1,370 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * -+ * @kfile: A device file created by kbase_file_new() -+ * @major: Major version number (must not exceed 12 bits) -+ * @minor: Major version number (must not exceed 12 bits) ++ * (C) COPYRIGHT 2016-2018, 2020-2022 ARM Limited. All rights reserved. + * -+ * An application programmer interface (API) version must be specified -+ * before calling kbase_file_create_kctx(), otherwise an error is returned. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * If a version number was already set for the given @kfile (or is in the -+ * process of being set by another thread) then an error is returned. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: 0 if successful, otherwise a negative error code. + */ -+static int kbase_file_set_api_version(struct kbase_file *const kfile, -+ u16 const major, u16 const minor) -+{ -+ if (WARN_ON(!kfile)) -+ return -EINVAL; + -+ /* setup pending, try to signal that we'll do the setup, -+ * if setup was already in progress, err this call -+ */ -+ if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_NEED_VSN, -+ KBASE_FILE_VSN_IN_PROGRESS) != KBASE_FILE_NEED_VSN) -+ return -EPERM; ++#include ++#include ++#include ++#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) ++#include ++#endif ++#include ++#include ++#include + -+ /* save the proposed version number for later use */ -+ kfile->api_version = KBASE_API_VERSION(major, minor); ++#include "mali_kbase.h" ++#include "mali_kbase_defs.h" ++#include "mali_kbase_ipa_simple.h" ++#include "mali_kbase_ipa_debugfs.h" + -+ atomic_set(&kfile->setup_state, KBASE_FILE_NEED_CTX); -+ return 0; -+} ++#if MALI_USE_CSF + -+/** -+ * kbase_file_get_api_version - Get the application programmer interface version -+ * -+ * @kfile: A device file created by kbase_file_new() -+ * -+ * Return: The version number (encoded with KBASE_API_VERSION) or 0 if none has -+ * been set. ++/* This is used if the dynamic power for top-level is estimated separately ++ * through the counter model. To roughly match the contribution of top-level ++ * power in the total dynamic power, when calculated through counter model, ++ * this scalar is used for the dynamic coefficient specified in the device tree ++ * for simple power model. This value was provided by the HW team after ++ * taking all the power data collected and dividing top level power by shader ++ * core power and then averaging it across all samples. + */ -+static unsigned long kbase_file_get_api_version(struct kbase_file *const kfile) ++#define TOP_LEVEL_DYN_COEFF_SCALER (3) ++ ++#endif /* MALI_USE_CSF */ ++ ++#if MALI_UNIT_TEST ++ ++static int dummy_temp; ++ ++static int kbase_simple_power_model_get_dummy_temp( ++ struct thermal_zone_device *tz, ++ int *temp) +{ -+ if (WARN_ON(!kfile)) -+ return 0; ++ *temp = READ_ONCE(dummy_temp); ++ return 0; ++} + -+ if (atomic_read(&kfile->setup_state) < KBASE_FILE_NEED_CTX) -+ return 0; ++/* Intercept calls to the kernel function using a macro */ ++#ifdef thermal_zone_get_temp ++#undef thermal_zone_get_temp ++#endif ++#define thermal_zone_get_temp(tz, temp) \ ++ kbase_simple_power_model_get_dummy_temp(tz, temp) + -+ return kfile->api_version; ++void kbase_simple_power_model_set_dummy_temp(int temp) ++{ ++ WRITE_ONCE(dummy_temp, temp); +} ++KBASE_EXPORT_TEST_API(kbase_simple_power_model_set_dummy_temp); + -+/** -+ * kbase_file_create_kctx - Create a kernel base context -+ * -+ * @kfile: A device file created by kbase_file_new() -+ * @flags: Flags to set, which can be any combination of -+ * BASEP_CONTEXT_CREATE_KERNEL_FLAGS. -+ * -+ * This creates a new context for the GPU platform device instance that was -+ * specified when kbase_file_new() was called. Each context has its own GPU -+ * address space. If a context was already created for the given @kfile (or is -+ * in the process of being created for it by another thread) then an error is -+ * returned. -+ * -+ * An API version number must have been set by kbase_file_set_api_version() -+ * before calling this function, otherwise an error is returned. -+ * -+ * Return: 0 if a new context was created, otherwise a negative error code. ++#endif /* MALI_UNIT_TEST */ ++ ++/* ++ * This model is primarily designed for the Juno platform. It may not be ++ * suitable for other platforms. The additional resources in this model ++ * should preferably be minimal, as this model is rarely used when a dynamic ++ * model is available. + */ -+static int kbase_file_create_kctx(struct kbase_file *kfile, -+ base_context_create_flags flags); + +/** -+ * kbase_file_get_kctx_if_setup_complete - Get a kernel base context -+ * pointer from a device file -+ * -+ * @kfile: A device file created by kbase_file_new() -+ * -+ * This function returns NULL if no context has been created for the given @kfile. -+ * This makes it safe to use in circumstances where the order of initialization -+ * cannot be enforced, but only if the caller checks the return value. -+ * -+ * Return: Address of the kernel base context associated with the @kfile, or -+ * NULL if no context exists. ++ * struct kbase_ipa_model_simple_data - IPA context per device ++ * @dynamic_coefficient: dynamic coefficient of the model ++ * @static_coefficient: static coefficient of the model ++ * @ts: Thermal scaling coefficients of the model ++ * @tz_name: Thermal zone name ++ * @gpu_tz: thermal zone device ++ * @poll_temperature_thread: Handle for temperature polling thread ++ * @current_temperature: Most recent value of polled temperature ++ * @temperature_poll_interval_ms: How often temperature should be checked, in ms + */ -+static struct kbase_context *kbase_file_get_kctx_if_setup_complete( -+ struct kbase_file *const kfile) -+{ -+ if (WARN_ON(!kfile) || -+ atomic_read(&kfile->setup_state) != KBASE_FILE_COMPLETE || -+ WARN_ON(!kfile->kctx)) -+ return NULL; + -+ return kfile->kctx; -+} ++struct kbase_ipa_model_simple_data { ++ u32 dynamic_coefficient; ++ u32 static_coefficient; ++ s32 ts[4]; ++ char tz_name[THERMAL_NAME_LENGTH]; ++ struct thermal_zone_device *gpu_tz; ++ struct task_struct *poll_temperature_thread; ++ int current_temperature; ++ int temperature_poll_interval_ms; ++}; ++#define FALLBACK_STATIC_TEMPERATURE 55000 + +/** -+ * kbase_file_delete - Destroy an object representing a device file ++ * calculate_temp_scaling_factor() - Calculate temperature scaling coefficient ++ * @ts: Signed coefficients, in order t^0 to t^3, with units Deg^-N ++ * @t: Temperature, in mDeg C. Range: -2^17 < t < 2^17 + * -+ * @kfile: A device file created by kbase_file_new() ++ * Scale the temperature according to a cubic polynomial whose coefficients are ++ * provided in the device tree. The result is used to scale the static power ++ * coefficient, where 1000000 means no change. + * -+ * If any context was created for the @kfile then it is destroyed. ++ * Return: Temperature scaling factor. Range 0 <= ret <= 10,000,000. + */ -+static void kbase_file_delete(struct kbase_file *const kfile) ++static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t) +{ -+ struct kbase_device *kbdev = NULL; -+ -+ if (WARN_ON(!kfile)) -+ return; -+ -+ kfile->filp->private_data = NULL; -+ kbdev = kfile->kbdev; -+ -+ if (atomic_read(&kfile->setup_state) == KBASE_FILE_COMPLETE) { -+ struct kbase_context *kctx = kfile->kctx; -+ -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ kbasep_mem_profile_debugfs_remove(kctx); -+#endif -+ kbase_context_debugfs_term(kctx); ++ /* Range: -2^24 < t2 < 2^24 m(Deg^2) */ ++ const s64 t2 = div_s64((t * t), 1000); + -+ kbase_destroy_context(kctx); ++ /* Range: -2^31 < t3 < 2^31 m(Deg^3) */ ++ const s64 t3 = div_s64((t * t2), 1000); + -+ dev_dbg(kbdev->dev, "deleted base context\n"); -+ } ++ /* ++ * Sum the parts. t^[1-3] are in m(Deg^N), but the coefficients are in ++ * Deg^-N, so we need to multiply the last coefficient by 1000. ++ * Range: -2^63 < res_big < 2^63 ++ */ ++ const s64 res_big = ts[3] * t3 /* +/- 2^62 */ ++ + ts[2] * t2 /* +/- 2^55 */ ++ + ts[1] * t /* +/- 2^48 */ ++ + ts[0] * (s64)1000; /* +/- 2^41 */ + -+ kbase_release_device(kbdev); ++ /* Range: -2^60 < res_unclamped < 2^60 */ ++ s64 res_unclamped = div_s64(res_big, 1000); + -+ kfree(kfile); ++ /* Clamp to range of 0x to 10x the static power */ ++ return clamp(res_unclamped, (s64) 0, (s64) 10000000); +} + -+static int kbase_api_handshake(struct kbase_file *kfile, -+ struct kbase_ioctl_version_check *version) ++/* We can't call thermal_zone_get_temp() directly in model_static_coeff(), ++ * because we don't know if tz->lock is held in the same thread. So poll it in ++ * a separate thread to get around this. ++ */ ++static int poll_temperature(void *data) +{ -+ int err = 0; ++ struct kbase_ipa_model_simple_data *model_data = ++ (struct kbase_ipa_model_simple_data *) data; ++ int temp; + -+ switch (version->major) { -+ case BASE_UK_VERSION_MAJOR: -+ /* set minor to be the lowest common */ -+ version->minor = min_t(int, BASE_UK_VERSION_MINOR, -+ (int)version->minor); -+ break; -+ default: -+ /* We return our actual version regardless if it -+ * matches the version returned by userspace - -+ * userspace can bail if it can't handle this -+ * version -+ */ -+ version->major = BASE_UK_VERSION_MAJOR; -+ version->minor = BASE_UK_VERSION_MINOR; -+ break; -+ } ++ set_freezable(); + -+ /* save the proposed version number for later use */ -+ err = kbase_file_set_api_version(kfile, version->major, version->minor); -+ if (unlikely(err)) -+ return err; ++ while (!kthread_should_stop()) { ++ struct thermal_zone_device *tz = READ_ONCE(model_data->gpu_tz); + -+ /* For backward compatibility, we may need to create the context before -+ * the flags have been set. Originally it was created on file open -+ * (with job submission disabled) but we don't support that usage. -+ */ -+ if (!mali_kbase_supports_system_monitor(kbase_file_get_api_version(kfile))) -+ err = kbase_file_create_kctx(kfile, -+ BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED); ++ if (tz) { ++ int ret; + -+ return err; -+} ++ ret = thermal_zone_get_temp(tz, &temp); ++ if (ret) { ++ pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n", ++ ret); ++ temp = FALLBACK_STATIC_TEMPERATURE; ++ } ++ } else { ++ temp = FALLBACK_STATIC_TEMPERATURE; ++ } + -+static int kbase_api_handshake_dummy(struct kbase_file *kfile, -+ struct kbase_ioctl_version_check *version) -+{ -+ return -EPERM; -+} ++ WRITE_ONCE(model_data->current_temperature, temp); + -+static int kbase_api_kinstr_prfcnt_enum_info( -+ struct kbase_file *kfile, -+ struct kbase_ioctl_kinstr_prfcnt_enum_info *prfcnt_enum_info) -+{ -+ return kbase_kinstr_prfcnt_enum_info(kfile->kbdev->kinstr_prfcnt_ctx, -+ prfcnt_enum_info); -+} ++ msleep_interruptible(READ_ONCE(model_data->temperature_poll_interval_ms)); + -+static int kbase_api_kinstr_prfcnt_setup( -+ struct kbase_file *kfile, -+ union kbase_ioctl_kinstr_prfcnt_setup *prfcnt_setup) -+{ -+ return kbase_kinstr_prfcnt_setup(kfile->kbdev->kinstr_prfcnt_ctx, -+ prfcnt_setup); -+} ++ try_to_freeze(); ++ } + -+static struct kbase_device *to_kbase_device(struct device *dev) -+{ -+ return dev_get_drvdata(dev); ++ return 0; +} + -+int assign_irqs(struct kbase_device *kbdev) ++static int model_static_coeff(struct kbase_ipa_model *model, u32 *coeffp) +{ -+ static const char *const irq_names_caps[] = { "JOB", "MMU", "GPU" }; ++ u32 temp_scaling_factor; ++ struct kbase_ipa_model_simple_data *model_data = ++ (struct kbase_ipa_model_simple_data *) model->model_data; ++ u64 coeff_big; ++ int temp; + -+#if IS_ENABLED(CONFIG_OF) -+ static const char *const irq_names[] = { "job", "mmu", "gpu" }; -+#endif ++ temp = READ_ONCE(model_data->current_temperature); + -+ struct platform_device *pdev; -+ int i; ++ /* Range: 0 <= temp_scaling_factor < 2^24 */ ++ temp_scaling_factor = calculate_temp_scaling_factor(model_data->ts, ++ temp); + -+ if (!kbdev) -+ return -ENODEV; ++ /* ++ * Range: 0 <= coeff_big < 2^52 to avoid overflowing *coeffp. This ++ * means static_coefficient must be in range ++ * 0 <= static_coefficient < 2^28. ++ */ ++ coeff_big = (u64) model_data->static_coefficient * (u64) temp_scaling_factor; ++ *coeffp = div_u64(coeff_big, 1000000); + -+ pdev = to_platform_device(kbdev->dev); ++ return 0; ++} + -+ for (i = 0; i < ARRAY_SIZE(irq_names_caps); i++) { -+ int irq; ++static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) ++{ ++ struct kbase_ipa_model_simple_data *model_data = ++ (struct kbase_ipa_model_simple_data *) model->model_data; + -+#if IS_ENABLED(CONFIG_OF) -+ /* We recommend using Upper case for the irq names in dts, but if -+ * there are devices in the world using Lower case then we should -+ * avoid breaking support for them. So try using names in Upper case -+ * first then try using Lower case names. If both attempts fail then -+ * we assume there is no IRQ resource specified for the GPU. -+ */ -+ irq = platform_get_irq_byname(pdev, irq_names_caps[i]); -+ if (irq < 0) -+ irq = platform_get_irq_byname(pdev, irq_names[i]); ++#if MALI_USE_CSF ++ /* On CSF GPUs, the dynamic power for top-level and shader cores is ++ * estimated separately. Currently there is a single dynamic ++ * coefficient value provided in the device tree for simple model. ++ * As per the discussion with HW team the coefficient value needs to ++ * be scaled down for top-level to limit its contribution in the ++ * total dyanmic power. ++ */ ++ coeffp[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = ++ model_data->dynamic_coefficient / TOP_LEVEL_DYN_COEFF_SCALER; ++ coeffp[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = ++ model_data->dynamic_coefficient; +#else -+ irq = platform_get_irq(pdev, i); -+#endif /* CONFIG_OF */ -+ -+ if (irq < 0) { -+ dev_err(kbdev->dev, "No IRQ resource '%s'\n", irq_names_caps[i]); -+ return irq; -+ } -+ -+ kbdev->irqs[i].irq = irq; -+ kbdev->irqs[i].flags = irqd_get_trigger_type(irq_get_irq_data(irq)); -+ } ++ *coeffp = model_data->dynamic_coefficient; ++#endif + + return 0; +} + -+/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */ -+struct kbase_device *kbase_find_device(int minor) ++static int add_params(struct kbase_ipa_model *model) +{ -+ struct kbase_device *kbdev = NULL; -+ struct list_head *entry; -+ const struct list_head *dev_list = kbase_device_get_list(); ++ int err = 0; ++ struct kbase_ipa_model_simple_data *model_data = ++ (struct kbase_ipa_model_simple_data *)model->model_data; + -+ list_for_each(entry, dev_list) { -+ struct kbase_device *tmp; ++ err = kbase_ipa_model_add_param_s32(model, "static-coefficient", ++ (s32 *)&model_data->static_coefficient, 1, true); ++ if (err) ++ goto end; + -+ tmp = list_entry(entry, struct kbase_device, entry); -+ if (tmp->mdev.minor == minor || minor == -1) { -+ kbdev = tmp; -+ get_device(kbdev->dev); -+ break; -+ } -+ } -+ kbase_device_put_list(dev_list); ++ err = kbase_ipa_model_add_param_s32(model, "dynamic-coefficient", ++ (s32 *)&model_data->dynamic_coefficient, 1, true); ++ if (err) ++ goto end; + -+ return kbdev; -+} -+EXPORT_SYMBOL(kbase_find_device); ++ err = kbase_ipa_model_add_param_s32(model, "ts", ++ model_data->ts, 4, true); ++ if (err) ++ goto end; + -+void kbase_release_device(struct kbase_device *kbdev) -+{ -+ put_device(kbdev->dev); ++ err = kbase_ipa_model_add_param_string(model, "thermal-zone", ++ model_data->tz_name, ++ sizeof(model_data->tz_name), true); ++ if (err) ++ goto end; ++ ++ model_data->temperature_poll_interval_ms = 200; ++ err = kbase_ipa_model_add_param_s32(model, "temp-poll-interval-ms", ++ &model_data->temperature_poll_interval_ms, ++ 1, false); ++ ++end: ++ return err; +} -+EXPORT_SYMBOL(kbase_release_device); + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off) ++static int kbase_simple_power_model_init(struct kbase_ipa_model *model) +{ -+ struct kbase_context *kctx = f->private_data; + int err; -+ bool value; ++ struct kbase_ipa_model_simple_data *model_data; + -+ err = kstrtobool_from_user(ubuf, size, &value); -+ if (err) ++ model_data = kzalloc(sizeof(struct kbase_ipa_model_simple_data), ++ GFP_KERNEL); ++ if (!model_data) ++ return -ENOMEM; ++ ++ model->model_data = (void *) model_data; ++ ++ model_data->current_temperature = FALLBACK_STATIC_TEMPERATURE; ++ model_data->poll_temperature_thread = kthread_run(poll_temperature, ++ (void *) model_data, ++ "mali-simple-power-model-temp-poll"); ++ if (IS_ERR(model_data->poll_temperature_thread)) { ++ err = PTR_ERR(model_data->poll_temperature_thread); ++ kfree(model_data); + return err; ++ } + -+ if (value) -+ kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); -+ else -+ kbase_ctx_flag_clear(kctx, KCTX_INFINITE_CACHE); ++ err = add_params(model); ++ if (err) { ++ kbase_ipa_model_param_free_all(model); ++ kthread_stop(model_data->poll_temperature_thread); ++ kfree(model_data); ++ } + -+ return size; ++ return err; +} + -+static ssize_t read_ctx_infinite_cache(struct file *f, char __user *ubuf, size_t size, loff_t *off) ++static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model) +{ -+ struct kbase_context *kctx = f->private_data; -+ char buf[32]; -+ int count; -+ bool value; -+ -+ value = kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE); ++ struct kbase_ipa_model_simple_data *model_data = ++ (struct kbase_ipa_model_simple_data *)model->model_data; ++ struct thermal_zone_device *tz; + -+ count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N"); ++ lockdep_assert_held(&model->kbdev->ipa.lock); + -+ return simple_read_from_buffer(ubuf, size, off, buf, count); -+} ++ if (!strnlen(model_data->tz_name, sizeof(model_data->tz_name))) { ++ model_data->gpu_tz = NULL; ++ } else { ++ char tz_name[THERMAL_NAME_LENGTH]; ++ u32 string_len = strscpy(tz_name, model_data->tz_name, sizeof(tz_name)); + -+static const struct file_operations kbase_infinite_cache_fops = { -+ .owner = THIS_MODULE, -+ .open = simple_open, -+ .write = write_ctx_infinite_cache, -+ .read = read_ctx_infinite_cache, -+}; ++ string_len += sizeof(char); ++ /* Make sure that the source string fit into the buffer. */ ++ KBASE_DEBUG_ASSERT(string_len <= sizeof(tz_name)); ++ CSTD_UNUSED(string_len); + -+static ssize_t write_ctx_force_same_va(struct file *f, const char __user *ubuf, -+ size_t size, loff_t *off) -+{ -+ struct kbase_context *kctx = f->private_data; -+ int err; -+ bool value; ++ /* Release ipa.lock so that thermal_list_lock is not acquired ++ * with ipa.lock held, thereby avoid lock ordering violation ++ * lockdep warning. The warning comes as a chain of locks ++ * ipa.lock --> thermal_list_lock --> tz->lock gets formed ++ * on registering devfreq cooling device when probe method ++ * of mali platform driver is invoked. ++ */ ++ mutex_unlock(&model->kbdev->ipa.lock); ++ tz = thermal_zone_get_zone_by_name(tz_name); ++ mutex_lock(&model->kbdev->ipa.lock); + -+ err = kstrtobool_from_user(ubuf, size, &value); -+ if (err) -+ return err; ++ if (IS_ERR_OR_NULL(tz)) { ++ pr_warn_ratelimited( ++ "Error %d getting thermal zone \'%s\', not yet ready?\n", ++ PTR_ERR_OR_ZERO(tz), tz_name); ++ return -EPROBE_DEFER; ++ } + -+ if (value) { -+#if defined(CONFIG_64BIT) -+ /* 32-bit clients cannot force SAME_VA */ -+ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) -+ return -EINVAL; -+ kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA); -+#else /* defined(CONFIG_64BIT) */ -+ /* 32-bit clients cannot force SAME_VA */ -+ return -EINVAL; -+#endif /* defined(CONFIG_64BIT) */ -+ } else { -+ kbase_ctx_flag_clear(kctx, KCTX_FORCE_SAME_VA); ++ /* Check if another thread raced against us & updated the ++ * thermal zone name string. Update the gpu_tz pointer only if ++ * the name string did not change whilst we retrieved the new ++ * thermal_zone_device pointer, otherwise model_data->tz_name & ++ * model_data->gpu_tz would become inconsistent with each other. ++ * The below check will succeed only for the thread which last ++ * updated the name string. ++ */ ++ if (strncmp(tz_name, model_data->tz_name, sizeof(tz_name)) == 0) ++ model_data->gpu_tz = tz; + } + -+ return size; ++ return 0; +} + -+static ssize_t read_ctx_force_same_va(struct file *f, char __user *ubuf, -+ size_t size, loff_t *off) ++static void kbase_simple_power_model_term(struct kbase_ipa_model *model) +{ -+ struct kbase_context *kctx = f->private_data; -+ char buf[32]; -+ int count; -+ bool value; -+ -+ value = kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA); ++ struct kbase_ipa_model_simple_data *model_data = ++ (struct kbase_ipa_model_simple_data *)model->model_data; + -+ count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N"); ++ kthread_stop(model_data->poll_temperature_thread); + -+ return simple_read_from_buffer(ubuf, size, off, buf, count); ++ kfree(model_data); +} + -+static const struct file_operations kbase_force_same_va_fops = { -+ .owner = THIS_MODULE, -+ .open = simple_open, -+ .write = write_ctx_force_same_va, -+ .read = read_ctx_force_same_va, ++struct kbase_ipa_model_ops kbase_simple_ipa_model_ops = { ++ .name = "mali-simple-power-model", ++ .init = &kbase_simple_power_model_init, ++ .recalculate = &kbase_simple_power_model_recalculate, ++ .term = &kbase_simple_power_model_term, ++ .get_dynamic_coeff = &model_dynamic_coeff, ++ .get_static_coeff = &model_static_coeff, +}; -+#endif /* CONFIG_DEBUG_FS */ -+ -+static int kbase_file_create_kctx(struct kbase_file *const kfile, -+ base_context_create_flags const flags) -+{ -+ struct kbase_device *kbdev = NULL; -+ struct kbase_context *kctx = NULL; -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ char kctx_name[64]; -+#endif -+ -+ if (WARN_ON(!kfile)) -+ return -EINVAL; -+ -+ /* setup pending, try to signal that we'll do the setup, -+ * if setup was already in progress, err this call -+ */ -+ if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_NEED_CTX, -+ KBASE_FILE_CTX_IN_PROGRESS) != KBASE_FILE_NEED_CTX) -+ return -EPERM; ++KBASE_EXPORT_TEST_API(kbase_simple_ipa_model_ops); +diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.h b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.h +new file mode 100644 +index 000000000..dd17786a5 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.h +@@ -0,0 +1,44 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ kbdev = kfile->kbdev; ++#ifndef _KBASE_IPA_SIMPLE_H_ ++#define _KBASE_IPA_SIMPLE_H_ + -+ kctx = kbase_create_context(kbdev, in_compat_syscall(), -+ flags, kfile->api_version, kfile->filp); ++#if defined(CONFIG_MALI_BIFROST_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) + -+ /* if bad flags, will stay stuck in setup mode */ -+ if (!kctx) -+ return -ENOMEM; ++extern struct kbase_ipa_model_ops kbase_simple_ipa_model_ops; + -+ if (kbdev->infinite_cache_active_default) -+ kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); ++#if MALI_UNIT_TEST ++/** ++ * kbase_simple_power_model_set_dummy_temp() - set a dummy temperature value ++ * @temp: Temperature of the thermal zone, in millidegrees celsius. ++ * ++ * This is only intended for use in unit tests, to ensure that the temperature ++ * values used by the simple power model are predictable. Deterministic ++ * behavior is necessary to allow validation of the static power values ++ * computed by this model. ++ */ ++void kbase_simple_power_model_set_dummy_temp(int temp); ++#endif /* MALI_UNIT_TEST */ + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ if (unlikely(!scnprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id))) -+ return -ENOMEM; ++#endif /* (defined(CONFIG_MALI_BIFROST_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ + -+ mutex_init(&kctx->mem_profile_lock); ++#endif /* _KBASE_IPA_SIMPLE_H_ */ +diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h +new file mode 100644 +index 000000000..debc3ad25 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h +@@ -0,0 +1,878 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ kctx->kctx_dentry = debugfs_create_dir(kctx_name, -+ kbdev->debugfs_ctx_directory); ++/* ++ * Definitions (types, defines, etcs) specific to Job Manager Kbase. ++ * They are placed here to allow the hierarchy of header files to work. ++ */ + -+ if (IS_ERR_OR_NULL(kctx->kctx_dentry)) { -+ /* we don't treat this as a fail - just warn about it */ -+ dev_warn(kbdev->dev, "couldn't create debugfs dir for kctx\n"); -+ } else { -+ debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry, -+ kctx, &kbase_infinite_cache_fops); -+ debugfs_create_file("force_same_va", 0600, kctx->kctx_dentry, -+ kctx, &kbase_force_same_va_fops); ++#ifndef _KBASE_JM_DEFS_H_ ++#define _KBASE_JM_DEFS_H_ + -+ kbase_context_debugfs_init(kctx); -+ } -+#endif /* CONFIG_DEBUG_FS */ ++#include "mali_kbase_js_defs.h" + -+ dev_dbg(kbdev->dev, "created base context\n"); ++/* Dump Job slot trace on error (only active if KBASE_KTRACE_ENABLE != 0) */ ++#define KBASE_KTRACE_DUMP_ON_JOB_SLOT_ERROR 1 + -+ kfile->kctx = kctx; -+ atomic_set(&kfile->setup_state, KBASE_FILE_COMPLETE); ++/* ++ * Number of milliseconds before resetting the GPU when a job cannot be "zapped" ++ * from the hardware. Note that the time is actually ++ * ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and ++ * the GPU actually being reset to give other contexts time for their jobs ++ * to be soft-stopped and removed from the hardware before resetting. ++ */ ++#define ZAP_TIMEOUT 1000 + -+ return 0; -+} -+ -+static int kbase_open(struct inode *inode, struct file *filp) -+{ -+ struct kbase_device *kbdev = NULL; -+ struct kbase_file *kfile; -+ int ret = 0; ++/* ++ * Prevent soft-stops from occurring in scheduling situations ++ * ++ * This is not due to HW issues, but when scheduling is desired to be more ++ * predictable. ++ * ++ * Therefore, soft stop may still be disabled due to HW issues. ++ * ++ * Soft stop will still be used for non-scheduling purposes e.g. when ++ * terminating a context. ++ * ++ * if not in use, define this value to 0 instead of being undefined. ++ */ ++#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0 + -+ kbdev = kbase_find_device(iminor(inode)); ++/* ++ * Prevent hard-stops from occurring in scheduling situations ++ * ++ * This is not due to HW issues, but when scheduling is desired to be more ++ * predictable. ++ * ++ * Hard stop will still be used for non-scheduling purposes e.g. when ++ * terminating a context. ++ * ++ * if not in use, define this value to 0 instead of being undefined. ++ */ ++#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0 + -+ if (!kbdev) -+ return -ENODEV; ++/* Atom has been previously soft-stopped */ ++#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED (1<<1) ++/* Atom has been previously retried to execute */ ++#define KBASE_KATOM_FLAGS_RERUN (1<<2) ++/* Atom submitted with JOB_CHAIN_FLAG bit set in JS_CONFIG_NEXT register, helps ++ * to disambiguate short-running job chains during soft/hard stopping of jobs ++ */ ++#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3) ++/* Atom has been previously hard-stopped. */ ++#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4) ++/* Atom has caused us to enter disjoint state */ ++#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5) ++/* Atom blocked on cross-slot dependency */ ++#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7) ++/* Atom has fail dependency on cross-slot dependency */ ++#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8) ++/* Atom is currently in the list of atoms blocked on cross-slot dependencies */ ++#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9) ++/* Atom requires GPU to be in protected mode */ ++#define KBASE_KATOM_FLAG_PROTECTED (1<<11) ++/* Atom has been stored in runnable_tree */ ++#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12) ++/* Atom is waiting for L2 caches to power up in order to enter protected mode */ ++#define KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT (1<<13) + -+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) -+ /* Set address space operations for page migration */ -+ kbase_mem_migrate_set_address_space_ops(kbdev, filp); -+#endif ++/* SW related flags about types of JS_COMMAND action ++ * NOTE: These must be masked off by JS_COMMAND_MASK ++ */ + -+ /* Device-wide firmware load is moved here from probing to comply with -+ * Android GKI vendor guideline. -+ */ -+ ret = kbase_device_firmware_init_once(kbdev); -+ if (ret) -+ goto out; ++/* This command causes a disjoint event */ ++#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100 + -+ kfile = kbase_file_new(kbdev, filp); -+ if (!kfile) { -+ ret = -ENOMEM; -+ goto out; -+ } ++/* Bitmask of all SW related flags */ ++#define JS_COMMAND_SW_BITS (JS_COMMAND_SW_CAUSES_DISJOINT) + -+ filp->private_data = kfile; -+ filp->f_mode |= FMODE_UNSIGNED_OFFSET; ++#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK) ++#error "JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK." \ ++ "Must update JS_COMMAND_SW_<..> bitmasks" ++#endif + -+ return 0; ++/* Soft-stop command that causes a Disjoint event. This of course isn't ++ * entirely masked off by JS_COMMAND_MASK ++ */ ++#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \ ++ (JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP) + -+out: -+ kbase_release_device(kbdev); -+ return ret; -+} ++#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT + -+static int kbase_release(struct inode *inode, struct file *filp) -+{ -+ struct kbase_file *const kfile = filp->private_data; ++/* Serialize atoms within a slot (ie only one atom per job slot) */ ++#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0) ++/* Serialize atoms between slots (ie only one job slot running at any time) */ ++#define KBASE_SERIALIZE_INTER_SLOT (1 << 1) ++/* Reset the GPU after each atom completion */ ++#define KBASE_SERIALIZE_RESET (1 << 2) + -+ kbase_file_delete(kfile); -+ return 0; -+} ++/** ++ * enum kbase_timeout_selector - The choice of which timeout to get scaled ++ * using the lowest GPU frequency. ++ * @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion ++ * of a MMU operation ++ * @JM_DEFAULT_JS_FREE_TIMEOUT: Maximum timeout to wait for JS_COMMAND_NEXT ++ * to be updated on HW side so a Job Slot is ++ * considered free. ++ * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in ++ * the enum. ++ */ ++enum kbase_timeout_selector { ++ MMU_AS_INACTIVE_WAIT_TIMEOUT, ++ JM_DEFAULT_JS_FREE_TIMEOUT, + -+static int kbase_api_set_flags(struct kbase_file *kfile, -+ struct kbase_ioctl_set_flags *flags) -+{ -+ int err = 0; -+ unsigned long const api_version = kbase_file_get_api_version(kfile); -+ struct kbase_context *kctx = NULL; ++ /* Must be the last in the enum */ ++ KBASE_TIMEOUT_SELECTOR_COUNT ++}; + -+ /* Validate flags */ -+ if (flags->create_flags != -+ (flags->create_flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS)) -+ return -EINVAL; ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++/** ++ * struct base_job_fault_event - keeps track of the atom which faulted or which ++ * completed after the faulty atom but before the ++ * debug data for faulty atom was dumped. ++ * ++ * @event_code: event code for the atom, should != BASE_JD_EVENT_DONE for ++ * the atom which faulted. ++ * @katom: pointer to the atom for which job fault occurred or which ++ * completed after the faulty atom. ++ * @job_fault_work: work item, queued only for the faulty atom, which waits for ++ * the dumping to get completed and then does the bottom half ++ * of job done for the atoms which followed the faulty atom. ++ * @head: List head used to store the atom in the global list of ++ * faulty atoms or context specific list of atoms which got ++ * completed during the dump. ++ * @reg_offset: offset of the register to be dumped next, only applicable ++ * for the faulty atom. ++ */ ++struct base_job_fault_event { + -+ /* For backward compatibility, the context may have been created before -+ * the flags were set. -+ */ -+ if (mali_kbase_supports_system_monitor(api_version)) { -+ err = kbase_file_create_kctx(kfile, flags->create_flags); -+ } else { -+#if !MALI_USE_CSF -+ struct kbasep_js_kctx_info *js_kctx_info = NULL; -+ unsigned long irq_flags = 0; ++ u32 event_code; ++ struct kbase_jd_atom *katom; ++ struct work_struct job_fault_work; ++ struct list_head head; ++ int reg_offset; ++}; +#endif + -+ /* If setup is incomplete (e.g. because the API version -+ * wasn't set) then we have to give up. -+ */ -+ kctx = kbase_file_get_kctx_if_setup_complete(kfile); -+ if (unlikely(!kctx)) -+ return -EPERM; -+ -+#if MALI_USE_CSF -+ /* On CSF GPUs Job Manager interface isn't used to submit jobs -+ * (there are no job slots). So the legacy job manager path to -+ * submit jobs needs to remain disabled for CSF GPUs. -+ */ -+#else -+ js_kctx_info = &kctx->jctx.sched_info; -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); -+ /* Translate the flags */ -+ if ((flags->create_flags & -+ BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) -+ kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); -+ -+ -+ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+#endif -+ } ++/** ++ * struct kbase_jd_atom_dependency - Contains the dependency info for an atom. ++ * @atom: pointer to the dependee atom. ++ * @dep_type: type of dependency on the dependee @atom, i.e. order or data ++ * dependency. BASE_JD_DEP_TYPE_INVALID indicates no dependency. ++ */ ++struct kbase_jd_atom_dependency { ++ struct kbase_jd_atom *atom; ++ u8 dep_type; ++}; + -+ return err; ++/** ++ * kbase_jd_katom_dep_atom - Retrieves a read-only reference to the ++ * dependee atom. ++ * @dep: pointer to the dependency info structure. ++ * ++ * Return: readonly reference to dependee atom. ++ */ ++static inline const struct kbase_jd_atom * ++kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) ++{ ++ return (const struct kbase_jd_atom *)(dep->atom); +} + -+#if !MALI_USE_CSF -+static int kbase_api_job_submit(struct kbase_context *kctx, -+ struct kbase_ioctl_job_submit *submit) ++/** ++ * kbase_jd_katom_dep_type - Retrieves the dependency type info ++ * ++ * @dep: pointer to the dependency info structure. ++ * ++ * Return: the type of dependency there is on the dependee atom. ++ */ ++static inline u8 kbase_jd_katom_dep_type( ++ const struct kbase_jd_atom_dependency *dep) +{ -+ return kbase_jd_submit(kctx, u64_to_user_ptr(submit->addr), -+ submit->nr_atoms, -+ submit->stride, false); ++ return dep->dep_type; +} -+#endif /* !MALI_USE_CSF */ + -+static int kbase_api_get_gpuprops(struct kbase_file *kfile, -+ struct kbase_ioctl_get_gpuprops *get_props) ++/** ++ * kbase_jd_katom_dep_set - sets up the dependency info structure ++ * as per the values passed. ++ * @const_dep: pointer to the dependency info structure to be setup. ++ * @a: pointer to the dependee atom. ++ * @type: type of dependency there is on the dependee atom. ++ */ ++static inline void kbase_jd_katom_dep_set( ++ const struct kbase_jd_atom_dependency *const_dep, ++ struct kbase_jd_atom *a, u8 type) +{ -+ struct kbase_gpu_props *kprops = &kfile->kbdev->gpu_props; -+ int err; -+ -+ if (get_props->flags != 0) { -+ dev_err(kfile->kbdev->dev, "Unsupported flags to get_gpuprops"); -+ return -EINVAL; -+ } ++ struct kbase_jd_atom_dependency *dep; + -+ if (get_props->size == 0) -+ return kprops->prop_buffer_size; -+ if (get_props->size < kprops->prop_buffer_size) -+ return -EINVAL; ++ dep = (struct kbase_jd_atom_dependency *)const_dep; + -+ err = copy_to_user(u64_to_user_ptr(get_props->buffer), -+ kprops->prop_buffer, -+ kprops->prop_buffer_size); -+ if (err) -+ return -EFAULT; -+ return kprops->prop_buffer_size; ++ dep->atom = a; ++ dep->dep_type = type; +} + -+#if !MALI_USE_CSF -+static int kbase_api_post_term(struct kbase_context *kctx) ++/** ++ * kbase_jd_katom_dep_clear - resets the dependency info structure ++ * ++ * @const_dep: pointer to the dependency info structure to be setup. ++ */ ++static inline void kbase_jd_katom_dep_clear( ++ const struct kbase_jd_atom_dependency *const_dep) +{ -+ kbase_event_close(kctx); -+ return 0; -+} -+#endif /* !MALI_USE_CSF */ ++ struct kbase_jd_atom_dependency *dep; + -+#if MALI_USE_CSF -+static int kbase_api_mem_alloc_ex(struct kbase_context *kctx, -+ union kbase_ioctl_mem_alloc_ex *alloc_ex) -+{ -+ struct kbase_va_region *reg; -+ u64 flags = alloc_ex->in.flags; -+ u64 gpu_va; ++ dep = (struct kbase_jd_atom_dependency *)const_dep; + -+ /* Calls to this function are inherently asynchronous, with respect to -+ * MMU operations. -+ */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; ++ dep->atom = NULL; ++ dep->dep_type = BASE_JD_DEP_TYPE_INVALID; ++} + -+ bool gpu_executable = (flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx); -+ bool fixed_or_fixable = (flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE)); ++/** ++ * enum kbase_atom_gpu_rb_state - The state of an atom, pertinent after it ++ * becomes runnable, with respect to job slot ++ * ringbuffer/fifo. ++ * @KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: Atom not currently present in slot fifo, ++ * which implies that either atom has not become ++ * runnable due to dependency or has completed ++ * the execution on GPU. ++ * @KBASE_ATOM_GPU_RB_WAITING_BLOCKED: Atom has been added to slot fifo but is ++ * blocked due to cross slot dependency, ++ * can't be submitted to GPU. ++ * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: Atom has been added to slot ++ * fifo but is waiting for the completion of ++ * previously added atoms in current & other ++ * slots, as their protected mode requirements ++ * do not match with the current atom. ++ * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: Atom is in slot fifo ++ * and is waiting for completion of protected ++ * mode transition, needed before the atom is ++ * submitted to GPU. ++ * @KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: Atom is in slot fifo but is ++ * waiting for the cores, which are needed to ++ * execute the job chain represented by the atom, ++ * to become available ++ * @KBASE_ATOM_GPU_RB_READY: Atom is in slot fifo and can be submitted to ++ * GPU. ++ * @KBASE_ATOM_GPU_RB_SUBMITTED: Atom is in slot fifo and has been submitted ++ * to GPU. ++ * @KBASE_ATOM_GPU_RB_RETURN_TO_JS: Atom must be returned to JS due to some ++ * failure, but only after the previously added ++ * atoms in fifo have completed or have also ++ * been returned to JS. ++ */ ++enum kbase_atom_gpu_rb_state { ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB, ++ KBASE_ATOM_GPU_RB_WAITING_BLOCKED, ++ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV, ++ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION, ++ KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE, ++ KBASE_ATOM_GPU_RB_READY, ++ KBASE_ATOM_GPU_RB_SUBMITTED, ++ KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1 ++}; + -+ if (!kbase_mem_allow_alloc(kctx)) -+ return -EINVAL; ++/** ++ * enum kbase_atom_enter_protected_state - The state of an atom with respect to ++ * the preparation for GPU's entry into protected mode, ++ * becomes pertinent only after atom's state with respect ++ * to slot ringbuffer is ++ * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION ++ * @KBASE_ATOM_ENTER_PROTECTED_CHECK: Starting state. Check if there are any ++ * atoms currently submitted to GPU and protected mode ++ * transition is not already in progress. ++ * @KBASE_ATOM_ENTER_PROTECTED_HWCNT: Wait for hardware counter context to ++ * become disabled before entry into protected mode. ++ * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in ++ * preparation for the coherency change. L2 shall be ++ * powered down and GPU shall come out of fully ++ * coherent mode before entering protected mode. ++ * @KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: Prepare coherency change; ++ * for BASE_HW_ISSUE_TGOX_R1_1234 also request L2 power on ++ * so that coherency register contains correct value when ++ * GPU enters protected mode. ++ * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; for ++ * BASE_HW_ISSUE_TGOX_R1_1234 check ++ * that L2 is powered up and switch GPU to protected mode. ++ */ ++enum kbase_atom_enter_protected_state { ++ /* ++ * NOTE: The integer value of this must match ++ * KBASE_ATOM_EXIT_PROTECTED_CHECK. ++ */ ++ KBASE_ATOM_ENTER_PROTECTED_CHECK = 0, ++ KBASE_ATOM_ENTER_PROTECTED_HWCNT, ++ KBASE_ATOM_ENTER_PROTECTED_IDLE_L2, ++ KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY, ++ KBASE_ATOM_ENTER_PROTECTED_FINISHED, ++}; + -+ /* The driver counts the number of FIXABLE and FIXED allocations because -+ * they're not supposed to happen at the same time. However, that is not -+ * a security concern: nothing bad happens if the two types of allocations -+ * are made at the same time. The only reason why the driver is guarding -+ * against them is because there's no client use case that is supposed -+ * to need both of them at the same time, and the driver wants to help -+ * the user space catch some obvious mistake. -+ * -+ * The driver is able to switch from FIXABLE allocations to FIXED and -+ * vice versa, if all the allocations of one kind are freed before trying -+ * to create allocations of a different kind. ++/** ++ * enum kbase_atom_exit_protected_state - The state of an atom with respect to ++ * the preparation for GPU's exit from protected mode, ++ * becomes pertinent only after atom's state with respect ++ * to slot ngbuffer is ++ * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION ++ * @KBASE_ATOM_EXIT_PROTECTED_CHECK: Starting state. Check if there are any ++ * atoms currently submitted to GPU and protected mode ++ * transition is not already in progress. ++ * @KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: Wait for the L2 to become idle in ++ * preparation for the reset, as exiting protected mode ++ * requires a reset. ++ * @KBASE_ATOM_EXIT_PROTECTED_RESET: Issue the reset to trigger exit from ++ * protected mode ++ * @KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: End state, Wait for the reset to ++ * complete ++ */ ++enum kbase_atom_exit_protected_state { ++ /* ++ * NOTE: The integer value of this must match ++ * KBASE_ATOM_ENTER_PROTECTED_CHECK. + */ -+ if ((flags & BASE_MEM_FIXED) && (atomic64_read(&kctx->num_fixable_allocs) > 0)) -+ return -EINVAL; ++ KBASE_ATOM_EXIT_PROTECTED_CHECK = 0, ++ KBASE_ATOM_EXIT_PROTECTED_IDLE_L2, ++ KBASE_ATOM_EXIT_PROTECTED_RESET, ++ KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, ++}; + -+ if ((flags & BASE_MEM_FIXABLE) && (atomic64_read(&kctx->num_fixed_allocs) > 0)) -+ return -EINVAL; ++/** ++ * struct kbase_jd_atom - object representing the atom, containing the complete ++ * state and attributes of an atom. ++ * @work: work item for the bottom half processing of the atom, ++ * by JD or JS, after it got executed on GPU or the ++ * input fence got signaled ++ * @start_timestamp: time at which the atom was submitted to the GPU, by ++ * updating the JS_HEAD_NEXTn register. ++ * @udata: copy of the user data sent for the atom in ++ * base_jd_submit. ++ * @kctx: Pointer to the base context with which the atom is ++ * associated. ++ * @dep_head: Array of 2 list heads, pointing to the two list of ++ * atoms ++ * which are blocked due to dependency on this atom. ++ * @dep_item: Array of 2 list heads, used to store the atom in the ++ * list of other atoms depending on the same dependee ++ * atom. ++ * @dep: Array containing the dependency info for the 2 atoms ++ * on which the atom depends upon. ++ * @jd_item: List head used during job dispatch job_done ++ * processing - as dependencies may not be entirely ++ * resolved at this point, ++ * we need to use a separate list head. ++ * @in_jd_list: flag set to true if atom's @jd_item is currently on ++ * a list, prevents atom being processed twice. ++ * @jit_ids: Zero-terminated array of IDs of just-in-time memory ++ * allocations written to by the atom. When the atom ++ * completes, the value stored at the ++ * &struct_base_jit_alloc_info.heap_info_gpu_addr of ++ * each allocation is read in order to enforce an ++ * overall physical memory usage limit. ++ * @nr_extres: number of external resources referenced by the atom. ++ * @extres: Pointer to @nr_extres VA regions containing the external ++ * resource allocation and other information. ++ * @nr_extres external resources referenced by the atom. ++ * @device_nr: indicates the coregroup with which the atom is ++ * associated, when ++ * BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified. ++ * @jc: GPU address of the job-chain. ++ * @softjob_data: Copy of data read from the user space buffer that @jc ++ * points to. ++ * @fence: Stores either an input or output sync fence, ++ * depending on soft-job type ++ * @sync_waiter: Pointer to the sync fence waiter structure passed to ++ * the callback function on signaling of the input ++ * fence. ++ * @dma_fence: object containing pointers to both input & output ++ * fences and other related members used for explicit ++ * sync through soft jobs and for the implicit ++ * synchronization required on access to external ++ * resources. ++ * @dma_fence.fence_in: Points to the dma-buf input fence for this atom. ++ * The atom would complete only after the fence is ++ * signaled. ++ * @dma_fence.fence: Points to the dma-buf output fence for this atom. ++ * @dma_fence.fence_cb: The object that is passed at the time of adding the ++ * callback that gets invoked when @dma_fence.fence_in ++ * is signaled. ++ * @dma_fence.fence_cb_added: Flag to keep a track if the callback was successfully ++ * added for @dma_fence.fence_in, which is supposed to be ++ * invoked on the signaling of fence. ++ * @dma_fence.context: The dma-buf fence context number for this atom. A ++ * unique context number is allocated to each katom in ++ * the context on context creation. ++ * @dma_fence.seqno: The dma-buf fence sequence number for this atom. This ++ * is increased every time this katom uses dma-buf fence ++ * @event_code: Event code for the job chain represented by the atom, ++ * both HW and low-level SW events are represented by ++ * event codes. ++ * @core_req: bitmask of BASE_JD_REQ_* flags specifying either ++ * Hw or Sw requirements for the job chain represented ++ * by the atom. ++ * @ticks: Number of scheduling ticks for which atom has been ++ * running on the GPU. ++ * @sched_priority: Priority of the atom for Job scheduling, as per the ++ * KBASE_JS_ATOM_SCHED_PRIO_*. ++ * @completed: Wait queue to wait upon for the completion of atom. ++ * @status: Indicates at high level at what stage the atom is in, ++ * as per KBASE_JD_ATOM_STATE_*, that whether it is not ++ * in use or its queued in JD or given to JS or ++ * submitted to Hw or it completed the execution on Hw. ++ * @work_id: used for GPU tracepoints, its a snapshot of the ++ * 'work_id' counter in kbase_jd_context which is ++ * incremented on every call to base_jd_submit. ++ * @slot_nr: Job slot chosen for the atom. ++ * @atom_flags: bitmask of KBASE_KATOM_FLAG* flags capturing the ++ * excat low level state of the atom. ++ * @gpu_rb_state: bitmnask of KBASE_ATOM_GPU_RB_* flags, precisely ++ * tracking atom's state after it has entered ++ * Job scheduler on becoming runnable. Atom ++ * could be blocked due to cross slot dependency ++ * or waiting for the shader cores to become available ++ * or waiting for protected mode transitions to ++ * complete. ++ * @need_cache_flush_cores_retained: flag indicating that manual flush of GPU ++ * cache is needed for the atom and the shader cores ++ * used for atom have been kept on. ++ * @blocked: flag indicating that atom's resubmission to GPU is ++ * blocked till the work item is scheduled to return the ++ * atom to JS. ++ * @seq_nr: user-space sequence number, to order atoms in some ++ * temporal order ++ * @pre_dep: Pointer to atom that this atom has same-slot ++ * dependency on ++ * @post_dep: Pointer to atom that has same-slot dependency on ++ * this atom ++ * @x_pre_dep: Pointer to atom that this atom has cross-slot ++ * dependency on ++ * @x_post_dep: Pointer to atom that has cross-slot dependency on ++ * this atom ++ * @flush_id: The GPU's flush count recorded at the time of ++ * submission, ++ * used for the cache flush optimization ++ * @fault_event: Info for dumping the debug data on Job fault. ++ * @queue: List head used for 4 different purposes : ++ * Adds atom to the list of dma-buf fence waiting atoms. ++ * Adds atom to the list of atoms blocked due to cross ++ * slot dependency. ++ * Adds atom to the list of softjob atoms for which JIT ++ * allocation has been deferred ++ * Adds atom to the list of softjob atoms waiting for ++ * the signaling of fence. ++ * @jit_node: Used to keep track of all JIT free/alloc jobs in ++ * submission order ++ * @jit_blocked: Flag indicating that JIT allocation requested through ++ * softjob atom will be reattempted after the impending ++ * free of other active JIT allocations. ++ * @will_fail_event_code: If non-zero, this indicates that the atom will fail ++ * with the set event_code when the atom is processed. ++ * Used for special handling of atoms, which have a data ++ * dependency on the failed atoms. ++ * @protected_state: State of the atom, as per ++ * KBASE_ATOM_(ENTER|EXIT)_PROTECTED_*, ++ * when transitioning into or out of protected mode. ++ * Atom will be either entering or exiting the ++ * protected mode. ++ * @protected_state.enter: entering the protected mode. ++ * @protected_state.exit: exiting the protected mode. ++ * @runnable_tree_node: The node added to context's job slot specific rb tree ++ * when the atom becomes runnable. ++ * @age: Age of atom relative to other atoms in the context, ++ * is snapshot of the age_count counter in kbase ++ * context. ++ * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified. ++ * @renderpass_id:Renderpass identifier used to associate an atom that has ++ * BASE_JD_REQ_START_RENDERPASS set in its core requirements ++ * with an atom that has BASE_JD_REQ_END_RENDERPASS set. ++ * @jc_fragment: Set of GPU fragment job chains ++ */ ++struct kbase_jd_atom { ++ struct work_struct work; ++ ktime_t start_timestamp; + -+ if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY) -+ return -ENOMEM; ++ struct base_jd_udata udata; ++ struct kbase_context *kctx; + -+ /* The fixed_address parameter must be either a non-zero, page-aligned -+ * value for FIXED allocations or zero for any other kind of allocation. -+ */ -+ if (flags & BASE_MEM_FIXED) { -+ u64 aligned_fixed_address = alloc_ex->in.fixed_address & PAGE_MASK; ++ struct list_head dep_head[2]; ++ struct list_head dep_item[2]; ++ const struct kbase_jd_atom_dependency dep[2]; ++ struct list_head jd_item; ++ bool in_jd_list; + -+ if ((aligned_fixed_address == 0) || -+ (aligned_fixed_address != alloc_ex->in.fixed_address)) -+ return -EINVAL; ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ u8 jit_ids[2]; ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+ gpu_va = aligned_fixed_address; -+ } else if (alloc_ex->in.fixed_address != 0) { -+ return -EINVAL; -+ } ++ u16 nr_extres; ++ struct kbase_va_region **extres; + -+ /* For 64-bit clients, force SAME_VA up to 2^(47)-1. -+ * For 32-bit clients, force SAME_VA up to 2^(32)-1. -+ * -+ * In both cases, the executable and fixed/fixable zones, and -+ * the executable+fixed/fixable zone, are all above this range. -+ */ -+ if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) && -+ kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) { -+ if (!gpu_executable && !fixed_or_fixable) -+ flags |= BASE_MEM_SAME_VA; -+ } ++ u32 device_nr; ++ u64 jc; ++ void *softjob_data; ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ struct { ++ /* Use the functions/API defined in mali_kbase_fence.h to ++ * when working with this sub struct ++ */ ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence *fence_in; ++#else ++ struct dma_fence *fence_in; ++#endif ++#endif ++ /* This points to the dma-buf output fence for this atom. If ++ * this is NULL then there is no fence for this atom and the ++ * following fields related to dma_fence may have invalid data. ++ * ++ * The context and seqno fields contain the details for this ++ * fence. ++ * ++ * This fence is signaled when the katom is completed, ++ * regardless of the event_code of the katom (signal also on ++ * failure). ++ */ ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence *fence; ++#else ++ struct dma_fence *fence; ++#endif + -+ /* If CSF event memory allocation, need to force certain flags. -+ * SAME_VA - GPU address needs to be used as a CPU address, explicit -+ * mmap has to be avoided. -+ * CACHED_CPU - Frequent access to the event memory by CPU. -+ * COHERENT_SYSTEM - No explicit cache maintenance around the access -+ * to event memory so need to leverage the coherency support. -+ */ -+ if (flags & BASE_MEM_CSF_EVENT) { -+ /* We cannot honor this request */ -+ if (gpu_executable || fixed_or_fixable) -+ return -ENOMEM; ++ /* This is the callback object that is registered for the fence_in. ++ * The callback is invoked when the fence_in is signaled. ++ */ ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence_cb fence_cb; ++#else ++ struct dma_fence_cb fence_cb; ++#endif ++ bool fence_cb_added; + -+ flags |= (BASE_MEM_SAME_VA | -+ BASE_MEM_CACHED_CPU | -+ BASE_MEM_COHERENT_SYSTEM); -+ } ++ unsigned int context; ++ atomic_t seqno; ++ } dma_fence; ++#endif /* CONFIG_SYNC_FILE */ + -+ reg = kbase_mem_alloc(kctx, alloc_ex->in.va_pages, alloc_ex->in.commit_pages, -+ alloc_ex->in.extension, &flags, &gpu_va, mmu_sync_info); ++ /* Note: refer to kbasep_js_atom_retained_state, which will take a copy ++ * of some of the following members ++ */ ++ enum base_jd_event_code event_code; ++ base_jd_core_req core_req; ++ u8 jobslot; ++ u8 renderpass_id; ++ struct base_jd_fragment jc_fragment; + -+ if (!reg) -+ return -ENOMEM; ++ u32 ticks; ++ int sched_priority; + -+ alloc_ex->out.flags = flags; -+ alloc_ex->out.gpu_va = gpu_va; ++ wait_queue_head_t completed; ++ enum kbase_jd_atom_state status; ++#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) ++ int work_id; ++#endif ++ unsigned int slot_nr; + -+ return 0; -+} ++ u32 atom_flags; + -+static int kbase_api_mem_alloc(struct kbase_context *kctx, union kbase_ioctl_mem_alloc *alloc) -+{ -+ int ret; -+ union kbase_ioctl_mem_alloc_ex mem_alloc_ex = { { 0 } }; ++ enum kbase_atom_gpu_rb_state gpu_rb_state; + -+ mem_alloc_ex.in.va_pages = alloc->in.va_pages; -+ mem_alloc_ex.in.commit_pages = alloc->in.commit_pages; -+ mem_alloc_ex.in.extension = alloc->in.extension; -+ mem_alloc_ex.in.flags = alloc->in.flags; -+ mem_alloc_ex.in.fixed_address = 0; ++ bool need_cache_flush_cores_retained; + -+ ret = kbase_api_mem_alloc_ex(kctx, &mem_alloc_ex); ++ atomic_t blocked; + -+ alloc->out.flags = mem_alloc_ex.out.flags; -+ alloc->out.gpu_va = mem_alloc_ex.out.gpu_va; ++ u64 seq_nr; + -+ return ret; -+} -+#else -+static int kbase_api_mem_alloc(struct kbase_context *kctx, union kbase_ioctl_mem_alloc *alloc) -+{ -+ struct kbase_va_region *reg; -+ u64 flags = alloc->in.flags; -+ u64 gpu_va; ++ struct kbase_jd_atom *pre_dep; ++ struct kbase_jd_atom *post_dep; + -+ /* Calls to this function are inherently asynchronous, with respect to -+ * MMU operations. -+ */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; ++ struct kbase_jd_atom *x_pre_dep; ++ struct kbase_jd_atom *x_post_dep; + -+ if (!kbase_mem_allow_alloc(kctx)) -+ return -EINVAL; ++ u32 flush_id; + -+ if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY) -+ return -ENOMEM; ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ struct base_job_fault_event fault_event; ++#endif ++ struct list_head queue; + -+ /* Force SAME_VA if a 64-bit client. -+ * The only exception is GPU-executable memory if an EXEC_VA zone -+ * has been initialized. In that case, GPU-executable memory may -+ * or may not be SAME_VA. -+ */ -+ if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) && kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) { -+ if (!(flags & BASE_MEM_PROT_GPU_EX) || !kbase_has_exec_va_zone(kctx)) -+ flags |= BASE_MEM_SAME_VA; -+ } ++ struct list_head jit_node; ++ bool jit_blocked; + -+ reg = kbase_mem_alloc(kctx, alloc->in.va_pages, alloc->in.commit_pages, alloc->in.extension, -+ &flags, &gpu_va, mmu_sync_info); ++ enum base_jd_event_code will_fail_event_code; + -+ if (!reg) -+ return -ENOMEM; ++ union { ++ enum kbase_atom_enter_protected_state enter; ++ enum kbase_atom_exit_protected_state exit; ++ } protected_state; + -+ alloc->out.flags = flags; -+ alloc->out.gpu_va = gpu_va; ++ struct rb_node runnable_tree_node; + -+ return 0; -+} -+#endif /* MALI_USE_CSF */ ++ u32 age; ++}; + -+static int kbase_api_mem_query(struct kbase_context *kctx, -+ union kbase_ioctl_mem_query *query) ++static inline bool kbase_jd_katom_is_protected( ++ const struct kbase_jd_atom *katom) +{ -+ return kbase_mem_query(kctx, query->in.gpu_addr, -+ query->in.query, &query->out.value); ++ return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED); +} + -+static int kbase_api_mem_free(struct kbase_context *kctx, -+ struct kbase_ioctl_mem_free *free) ++/** ++ * kbase_jd_atom_is_younger - query if one atom is younger by age than another ++ * ++ * @katom_a: the first atom ++ * @katom_b: the second atom ++ * ++ * Return: true if the first atom is strictly younger than the second, ++ * false otherwise. ++ */ ++static inline bool kbase_jd_atom_is_younger(const struct kbase_jd_atom *katom_a, ++ const struct kbase_jd_atom *katom_b) +{ -+ return kbase_mem_free(kctx, free->gpu_addr); ++ return ((s32)(katom_a->age - katom_b->age) < 0); +} + -+#if !MALI_USE_CSF -+static int kbase_api_kinstr_jm_fd(struct kbase_context *kctx, -+ union kbase_kinstr_jm_fd *arg) ++/** ++ * kbase_jd_atom_is_earlier - Check whether the first atom has been submitted ++ * earlier than the second one ++ * ++ * @katom_a: the first atom ++ * @katom_b: the second atom ++ * ++ * Return: true if the first atom has been submitted earlier than the ++ * second atom. It is used to understand if an atom that is ready has been ++ * submitted earlier than the currently running atom, so that the currently ++ * running atom should be preempted to allow the ready atom to run. ++ */ ++static inline bool kbase_jd_atom_is_earlier(const struct kbase_jd_atom *katom_a, ++ const struct kbase_jd_atom *katom_b) +{ -+ return kbase_kinstr_jm_get_fd(kctx->kinstr_jm, arg); -+} -+#endif ++ /* No seq_nr set? */ ++ if (!katom_a->seq_nr || !katom_b->seq_nr) ++ return false; + -+static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx, -+ struct kbase_ioctl_hwcnt_reader_setup *setup) -+{ -+ return kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup); ++ /* Efficiently handle the unlikely case of wrapping. ++ * The following code assumes that the delta between the sequence number ++ * of the two atoms is less than INT64_MAX. ++ * In the extremely unlikely case where the delta is higher, the comparison ++ * defaults for no preemption. ++ * The code also assumes that the conversion from unsigned to signed types ++ * works because the signed integers are 2's complement. ++ */ ++ return (s64)(katom_a->seq_nr - katom_b->seq_nr) < 0; +} + -+static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx, -+ union kbase_ioctl_get_cpu_gpu_timeinfo *timeinfo) -+{ -+ u32 flags = timeinfo->in.request_flags; -+ struct timespec64 ts = { 0 }; -+ u64 timestamp = 0; -+ u64 cycle_cnt = 0; -+ -+ kbase_pm_context_active(kctx->kbdev); -+ -+ kbase_backend_get_gpu_time(kctx->kbdev, -+ (flags & BASE_TIMEINFO_CYCLE_COUNTER_FLAG) ? &cycle_cnt : NULL, -+ (flags & BASE_TIMEINFO_TIMESTAMP_FLAG) ? ×tamp : NULL, -+ (flags & BASE_TIMEINFO_MONOTONIC_FLAG) ? &ts : NULL); -+ -+ if (flags & BASE_TIMEINFO_TIMESTAMP_FLAG) -+ timeinfo->out.timestamp = timestamp; -+ -+ if (flags & BASE_TIMEINFO_CYCLE_COUNTER_FLAG) -+ timeinfo->out.cycle_counter = cycle_cnt; ++/* ++ * Theory of operations: ++ * ++ * Atom objects are statically allocated within the context structure. ++ * ++ * Each atom is the head of two lists, one for the "left" set of dependencies, ++ * one for the "right" set. ++ */ + -+ if (flags & BASE_TIMEINFO_MONOTONIC_FLAG) { -+ timeinfo->out.sec = ts.tv_sec; -+ timeinfo->out.nsec = ts.tv_nsec; -+ } ++#define KBASE_JD_DEP_QUEUE_SIZE 256 + -+ kbase_pm_context_idle(kctx->kbdev); ++/** ++ * enum kbase_jd_renderpass_state - State of a renderpass ++ * @KBASE_JD_RP_COMPLETE: Unused or completed renderpass. Can only transition to ++ * START. ++ * @KBASE_JD_RP_START: Renderpass making a first attempt at tiling. ++ * Can transition to PEND_OOM or COMPLETE. ++ * @KBASE_JD_RP_PEND_OOM: Renderpass whose first attempt at tiling used too much ++ * memory and has a soft-stop pending. Can transition to ++ * OOM or COMPLETE. ++ * @KBASE_JD_RP_OOM: Renderpass whose first attempt at tiling used too much ++ * memory and therefore switched to incremental ++ * rendering. The fragment job chain is forced to run. ++ * Can only transition to RETRY. ++ * @KBASE_JD_RP_RETRY: Renderpass making a second or subsequent attempt at ++ * tiling. Can transition to RETRY_PEND_OOM or COMPLETE. ++ * @KBASE_JD_RP_RETRY_PEND_OOM: Renderpass whose second or subsequent attempt at ++ * tiling used too much memory again and has a ++ * soft-stop pending. Can transition to RETRY_OOM ++ * or COMPLETE. ++ * @KBASE_JD_RP_RETRY_OOM: Renderpass whose second or subsequent attempt at ++ * tiling used too much memory again. The fragment job ++ * chain is forced to run. Can only transition to RETRY. ++ * ++ * A state machine is used to control incremental rendering. ++ */ ++enum kbase_jd_renderpass_state { ++ KBASE_JD_RP_COMPLETE, /* COMPLETE => START */ ++ KBASE_JD_RP_START, /* START => PEND_OOM or COMPLETE */ ++ KBASE_JD_RP_PEND_OOM, /* PEND_OOM => OOM or COMPLETE */ ++ KBASE_JD_RP_OOM, /* OOM => RETRY */ ++ KBASE_JD_RP_RETRY, /* RETRY => RETRY_PEND_OOM or COMPLETE */ ++ KBASE_JD_RP_RETRY_PEND_OOM, /* RETRY_PEND_OOM => RETRY_OOM or COMPLETE */ ++ KBASE_JD_RP_RETRY_OOM /* RETRY_OOM => RETRY */ ++}; + -+ return 0; -+} ++/** ++ * struct kbase_jd_renderpass - Data for a renderpass ++ * @state: Current state of the renderpass. If KBASE_JD_RP_COMPLETE then ++ * all other members are invalid. ++ * Both the job dispatcher context and hwaccess_lock must be ++ * locked to modify this so that it can be read with either ++ * (or both) locked. ++ * @start_katom: Address of the atom that is the start of a renderpass. ++ * Both the job dispatcher context and hwaccess_lock must be ++ * locked to modify this so that it can be read with either ++ * (or both) locked. ++ * @end_katom: Address of the atom that is the end of a renderpass, or NULL ++ * if that atom hasn't been added to the job scheduler yet. ++ * The job dispatcher context and hwaccess_lock must be ++ * locked to modify this so that it can be read with either ++ * (or both) locked. ++ * @oom_reg_list: A list of region structures which triggered out-of-memory. ++ * The hwaccess_lock must be locked to access this. ++ * ++ * Atoms tagged with BASE_JD_REQ_START_RENDERPASS or BASE_JD_REQ_END_RENDERPASS ++ * are associated with an object of this type, which is created and maintained ++ * by kbase to keep track of each renderpass. ++ */ ++struct kbase_jd_renderpass { ++ enum kbase_jd_renderpass_state state; ++ struct kbase_jd_atom *start_katom; ++ struct kbase_jd_atom *end_katom; ++ struct list_head oom_reg_list; ++}; + -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+static int kbase_api_hwcnt_set(struct kbase_context *kctx, -+ struct kbase_ioctl_hwcnt_values *values) -+{ -+ return gpu_model_set_dummy_prfcnt_user_sample(u64_to_user_ptr(values->data), values->size); -+} -+#endif /* CONFIG_MALI_BIFROST_NO_MALI */ ++/** ++ * struct kbase_jd_context - per context object encapsulating all the ++ * Job dispatcher related state. ++ * @lock: lock to serialize the updates made to the ++ * Job dispatcher state and kbase_jd_atom objects. ++ * @sched_info: Structure encapsulating all the Job scheduling ++ * info. ++ * @atoms: Array of the objects representing atoms, ++ * containing the complete state and attributes ++ * of an atom. ++ * @renderpasses: Array of renderpass state for incremental ++ * rendering, indexed by user-specified renderpass ++ * ID. ++ * @job_nr: Tracks the number of atoms being processed by the ++ * kbase. This includes atoms that are not tracked by ++ * scheduler: 'not ready to run' & 'dependency-only' ++ * jobs. ++ * @zero_jobs_wait: Waitq that reflects whether there are no jobs ++ * (including SW-only dependency jobs). This is set ++ * when no jobs are present on the ctx, and clear ++ * when there are jobs. ++ * This must be updated atomically with @job_nr. ++ * note: Job Dispatcher knows about more jobs than ++ * the Job Scheduler as it is unaware of jobs that ++ * are blocked on dependencies and SW-only dependency ++ * jobs. This waitq can be waited upon to find out ++ * when the context jobs are all done/cancelled ++ * (including those that might've been blocked ++ * on dependencies) - and so, whether it can be ++ * terminated. However, it should only be terminated ++ * once it is not present in the run-pool. ++ * Since the waitq is only set under @lock, ++ * the waiter should also briefly obtain and drop ++ * @lock to guarantee that the setter has completed ++ * its work on the kbase_context ++ * @job_done_wq: Workqueue to which the per atom work item is ++ * queued for bottom half processing when the ++ * atom completes ++ * execution on GPU or the input fence get signaled. ++ * @tb_lock: Lock to serialize the write access made to @tb to ++ * store the register access trace messages. ++ * @tb: Pointer to the Userspace accessible buffer storing ++ * the trace messages for register read/write ++ * accesses made by the Kbase. The buffer is filled ++ * in circular fashion. ++ * @tb_wrap_offset: Offset to the end location in the trace buffer, ++ * the write pointer is moved to the beginning on ++ * reaching this offset. ++ * @work_id: atomic variable used for GPU tracepoints, ++ * incremented on every call to base_jd_submit. ++ * @jit_atoms_head: A list of the just-in-time memory soft-jobs, both ++ * allocate & free, in submission order, protected ++ * by kbase_jd_context.lock. ++ * @jit_pending_alloc: A list of just-in-time memory allocation ++ * soft-jobs which will be reattempted after the ++ * impending free of other active allocations. ++ * @max_priority: Max priority level allowed for this context. ++ */ ++struct kbase_jd_context { ++ struct mutex lock; ++ struct kbasep_js_kctx_info sched_info; ++ struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT]; ++ struct kbase_jd_renderpass renderpasses[BASE_JD_RP_COUNT]; ++ struct workqueue_struct *job_done_wq; + -+static int kbase_api_disjoint_query(struct kbase_context *kctx, -+ struct kbase_ioctl_disjoint_query *query) -+{ -+ query->counter = kbase_disjoint_event_get(kctx->kbdev); ++ wait_queue_head_t zero_jobs_wait; ++ spinlock_t tb_lock; ++ u32 *tb; ++ u32 job_nr; ++ size_t tb_wrap_offset; + -+ return 0; -+} ++#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) ++ atomic_t work_id; ++#endif + -+static int kbase_api_get_ddk_version(struct kbase_context *kctx, -+ struct kbase_ioctl_get_ddk_version *version) -+{ -+ int ret; -+ int len = sizeof(KERNEL_SIDE_DDK_VERSION_STRING); ++ struct list_head jit_atoms_head; ++ struct list_head jit_pending_alloc; ++ int max_priority; ++}; + -+ if (version->version_buffer == 0) -+ return len; ++/** ++ * struct jsctx_queue - JS context atom queue ++ * @runnable_tree: Root of RB-tree containing currently runnable atoms on this ++ * job slot. ++ * @x_dep_head: Head item of the linked list of atoms blocked on cross-slot ++ * dependencies. Atoms on this list will be moved to the ++ * runnable_tree when the blocking atom completes. ++ * ++ * hwaccess_lock must be held when accessing this structure. ++ */ ++struct jsctx_queue { ++ struct rb_root runnable_tree; ++ struct list_head x_dep_head; ++}; + -+ if (version->size < len) -+ return -EOVERFLOW; ++/** ++ * struct kbase_as - Object representing an address space of GPU. ++ * @number: Index at which this address space structure is present ++ * in an array of address space structures embedded inside ++ * the &struct kbase_device. ++ * @pf_wq: Workqueue for processing work items related to ++ * Page fault and Bus fault handling. ++ * @work_pagefault: Work item for the Page fault handling. ++ * @work_busfault: Work item for the Bus fault handling. ++ * @pf_data: Data relating to Page fault. ++ * @bf_data: Data relating to Bus fault. ++ * @current_setup: Stores the MMU configuration for this address space. ++ * @is_unresponsive: Flag to indicate MMU is not responding. ++ * Set if a MMU command isn't completed within ++ * &kbase_device:mmu_as_inactive_wait_time_ms. ++ * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes. ++ */ ++struct kbase_as { ++ int number; ++ struct workqueue_struct *pf_wq; ++ struct work_struct work_pagefault; ++ struct work_struct work_busfault; ++ struct kbase_fault pf_data; ++ struct kbase_fault bf_data; ++ struct kbase_mmu_setup current_setup; ++ bool is_unresponsive; ++}; + -+ ret = copy_to_user(u64_to_user_ptr(version->version_buffer), -+ KERNEL_SIDE_DDK_VERSION_STRING, -+ sizeof(KERNEL_SIDE_DDK_VERSION_STRING)); ++#endif /* _KBASE_JM_DEFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h +new file mode 100644 +index 000000000..53819caaf +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h +@@ -0,0 +1,1044 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ if (ret) -+ return -EFAULT; ++/* ++ * Job Scheduler Interface. ++ * These interfaces are Internal to KBase. ++ */ + -+ return len; -+} ++#ifndef _KBASE_JM_JS_H_ ++#define _KBASE_JM_JS_H_ + -+static int kbase_api_mem_jit_init(struct kbase_context *kctx, -+ struct kbase_ioctl_mem_jit_init *jit_init) -+{ -+ int i; ++#include "mali_kbase_js_ctx_attr.h" + -+ for (i = 0; i < sizeof(jit_init->padding); i++) { -+ /* Ensure all padding bytes are 0 for potential future -+ * extension -+ */ -+ if (jit_init->padding[i]) -+ return -EINVAL; -+ } ++#define JS_MAX_RUNNING_JOBS 8 + -+ return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, -+ jit_init->max_allocations, jit_init->trim_level, -+ jit_init->group_id, jit_init->phys_pages); -+} ++/** ++ * kbasep_js_devdata_init - Initialize the Job Scheduler ++ * @kbdev: The kbase_device to operate on ++ * ++ * The struct kbasep_js_device_data sub-structure of kbdev must be zero ++ * initialized before passing to the kbasep_js_devdata_init() function. This is ++ * to give efficient error path code. ++ * ++ * Return: 0 on success, error code otherwise. ++ */ ++int kbasep_js_devdata_init(struct kbase_device * const kbdev); + -+static int kbase_api_mem_exec_init(struct kbase_context *kctx, -+ struct kbase_ioctl_mem_exec_init *exec_init) -+{ -+ return kbase_region_tracker_init_exec(kctx, exec_init->va_pages); -+} ++/** ++ * kbasep_js_devdata_halt - Halt the Job Scheduler. ++ * @kbdev: The kbase_device to operate on ++ * ++ * It is safe to call this on kbdev even if it the kbasep_js_device_data ++ * sub-structure was never initialized/failed initialization, to give efficient ++ * error-path code. ++ * ++ * For this to work, the struct kbasep_js_device_data sub-structure of kbdev ++ * must be zero initialized before passing to the kbasep_js_devdata_init() ++ * function. This is to give efficient error path code. ++ * ++ * It is a programming error to call this whilst there are still kbase_context ++ * structures registered with this scheduler. ++ * ++ */ ++void kbasep_js_devdata_halt(struct kbase_device *kbdev); + -+static int kbase_api_mem_sync(struct kbase_context *kctx, -+ struct kbase_ioctl_mem_sync *sync) -+{ -+ struct basep_syncset sset = { -+ .mem_handle.basep.handle = sync->handle, -+ .user_addr = sync->user_addr, -+ .size = sync->size, -+ .type = sync->type -+ }; ++/** ++ * kbasep_js_devdata_term - Terminate the Job Scheduler ++ * @kbdev: The kbase_device to operate on ++ * ++ * It is safe to call this on kbdev even if it the kbasep_js_device_data ++ * sub-structure was never initialized/failed initialization, to give efficient ++ * error-path code. ++ * ++ * For this to work, the struct kbasep_js_device_data sub-structure of kbdev ++ * must be zero initialized before passing to the kbasep_js_devdata_init() ++ * function. This is to give efficient error path code. ++ * ++ * It is a programming error to call this whilst there are still kbase_context ++ * structures registered with this scheduler. ++ */ ++void kbasep_js_devdata_term(struct kbase_device *kbdev); + -+ return kbase_sync_now(kctx, &sset); -+} ++/** ++ * kbasep_js_kctx_init - Initialize the Scheduling Component of a ++ * struct kbase_context on the Job Scheduler. ++ * @kctx: The kbase_context to operate on ++ * ++ * This effectively registers a struct kbase_context with a Job Scheduler. ++ * ++ * It does not register any jobs owned by the struct kbase_context with ++ * the scheduler. Those must be separately registered by kbasep_js_add_job(). ++ * ++ * The struct kbase_context must be zero initialized before passing to the ++ * kbase_js_init() function. This is to give efficient error path code. ++ * ++ * Return: 0 on success, error code otherwise. ++ */ ++int kbasep_js_kctx_init(struct kbase_context *const kctx); + -+static int kbase_api_mem_find_cpu_offset(struct kbase_context *kctx, -+ union kbase_ioctl_mem_find_cpu_offset *find) -+{ -+ return kbasep_find_enclosing_cpu_mapping_offset( -+ kctx, -+ find->in.cpu_addr, -+ find->in.size, -+ &find->out.offset); -+} ++/** ++ * kbasep_js_kctx_term - Terminate the Scheduling Component of a ++ * struct kbase_context on the Job Scheduler ++ * @kctx: The kbase_context to operate on ++ * ++ * This effectively de-registers a struct kbase_context from its Job Scheduler ++ * ++ * It is safe to call this on a struct kbase_context that has never had or ++ * failed initialization of its jctx.sched_info member, to give efficient ++ * error-path code. ++ * ++ * For this to work, the struct kbase_context must be zero intitialized before ++ * passing to the kbase_js_init() function. ++ * ++ * It is a Programming Error to call this whilst there are still jobs ++ * registered with this context. ++ */ ++void kbasep_js_kctx_term(struct kbase_context *kctx); + -+static int kbase_api_mem_find_gpu_start_and_offset(struct kbase_context *kctx, -+ union kbase_ioctl_mem_find_gpu_start_and_offset *find) ++/* kbase_jsctx_slot_prio_blocked_set - Set a context as being blocked for a job ++ * slot at and below a given priority level ++ * @kctx: The kbase_context ++ * @js: The job slot ++ * @sched_prio: The priority levels that the context is blocked at for @js (all ++ * priority levels at this level and below will be blocked) ++ * ++ * To preserve ordering and dependencies of atoms on soft-stopping (both within ++ * an between priority levels), a context must be marked as blocked for that ++ * atom's job slot, for all priority levels at or below the atom's priority. ++ * ++ * This must only be called due to an atom that was pulled from the context, ++ * otherwise there will be no way of unblocking the context when the atom is ++ * completed/unpulled. ++ * ++ * Atoms of higher priority might still be able to be pulled from the context ++ * on @js. This helps with starting a high priority atom as soon as possible. ++ */ ++static inline void kbase_jsctx_slot_prio_blocked_set(struct kbase_context *kctx, unsigned int js, ++ int sched_prio) +{ -+ return kbasep_find_enclosing_gpu_mapping_start_and_offset( -+ kctx, -+ find->in.gpu_addr, -+ find->in.size, -+ &find->out.start, -+ &find->out.offset); -+} ++ struct kbase_jsctx_slot_tracking *slot_tracking = ++ &kctx->slot_tracking[js]; + -+static int kbase_api_get_context_id(struct kbase_context *kctx, -+ struct kbase_ioctl_get_context_id *info) -+{ -+ info->id = kctx->id; ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ WARN(!slot_tracking->atoms_pulled_pri[sched_prio], ++ "When marking slot %u as blocked for priority %d on a kctx, no atoms were pulled - the slot cannot become unblocked", ++ js, sched_prio); + -+ return 0; ++ slot_tracking->blocked |= ((kbase_js_prio_bitmap_t)1) << sched_prio; ++ KBASE_KTRACE_ADD_JM_SLOT_INFO(kctx->kbdev, JS_SLOT_PRIO_BLOCKED, kctx, ++ NULL, 0, js, (unsigned int)sched_prio); +} + -+static int kbase_api_tlstream_acquire(struct kbase_context *kctx, -+ struct kbase_ioctl_tlstream_acquire *acquire) ++/* kbase_jsctx_atoms_pulled - Return number of atoms pulled on a context ++ * @kctx: The kbase_context ++ * ++ * Having atoms pulled indicates the context is not idle. ++ * ++ * Return: the number of atoms pulled on @kctx ++ */ ++static inline int kbase_jsctx_atoms_pulled(struct kbase_context *kctx) +{ -+ return kbase_timeline_io_acquire(kctx->kbdev, acquire->flags); ++ return atomic_read(&kctx->atoms_pulled_all_slots); +} + -+static int kbase_api_tlstream_flush(struct kbase_context *kctx) -+{ -+ kbase_timeline_streams_flush(kctx->kbdev->timeline); ++/** ++ * kbasep_js_add_job - Add a job chain to the Job Scheduler, ++ * and take necessary actions to ++ * schedule the context/run the job. ++ * @kctx: The kbase_context to operate on ++ * @atom: Atom to add ++ * ++ * This atomically does the following: ++ * * Update the numbers of jobs information ++ * * Add the job to the run pool if necessary (part of init_job) ++ * ++ * Once this is done, then an appropriate action is taken: ++ * * If the ctx is scheduled, it attempts to start the next job (which might be ++ * this added job) ++ * * Otherwise, and if this is the first job on the context, it enqueues it on ++ * the Policy Queue ++ * ++ * The Policy's Queue can be updated by this in the following ways: ++ * * In the above case that this is the first job on the context ++ * * If the context is high priority and the context is not scheduled, then it ++ * could cause the Policy to schedule out a low-priority context, allowing ++ * this context to be scheduled in. ++ * ++ * If the context is already scheduled on the RunPool, then adding a job to it ++ * is guaranteed not to update the Policy Queue. And so, the caller is ++ * guaranteed to not need to try scheduling a context from the Run Pool - it ++ * can safely assert that the result is false. ++ * ++ * It is a programming error to have more than U32_MAX jobs in flight at a time. ++ * ++ * The following locking conditions are made on the caller: ++ * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex. ++ * * it must not hold hwaccess_lock (as this will be obtained internally) ++ * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be ++ * obtained internally) ++ * * it must not hold kbasep_jd_device_data::queue_mutex (again, it's used ++ * internally). ++ * ++ * Return: true indicates that the Policy Queue was updated, and so the ++ * caller will need to try scheduling a context onto the Run Pool, ++ * false indicates that no updates were made to the Policy Queue, ++ * so no further action is required from the caller. This is always returned ++ * when the context is currently scheduled. ++ */ ++bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom); + -+ return 0; -+} ++/** ++ * kbasep_js_remove_job - Remove a job chain from the Job Scheduler, ++ * except for its 'retained state'. ++ * @kbdev: The kbase_device to operate on ++ * @kctx: The kbase_context to operate on ++ * @atom: Atom to remove ++ * ++ * Completely removing a job requires several calls: ++ * * kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of ++ * the atom ++ * * kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler ++ * * kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the ++ * remaining state held as part of the job having been run. ++ * ++ * In the common case of atoms completing normally, this set of actions is more ++ * optimal for spinlock purposes than having kbasep_js_remove_job() handle all ++ * of the actions. ++ * ++ * In the case of canceling atoms, it is easier to call ++ * kbasep_js_remove_cancelled_job(), which handles all the necessary actions. ++ * ++ * It is a programming error to call this when: ++ * * a atom is not a job belonging to kctx. ++ * * a atom has already been removed from the Job Scheduler. ++ * * a atom is still in the runpool ++ * ++ * Do not use this for removing jobs being killed by kbase_jd_cancel() - use ++ * kbasep_js_remove_cancelled_job() instead. ++ * ++ * The following locking conditions are made on the caller: ++ * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. ++ * ++ */ ++void kbasep_js_remove_job(struct kbase_device *kbdev, ++ struct kbase_context *kctx, struct kbase_jd_atom *atom); + -+static int kbase_api_mem_commit(struct kbase_context *kctx, -+ struct kbase_ioctl_mem_commit *commit) -+{ -+ return kbase_mem_commit(kctx, commit->gpu_addr, commit->pages); -+} ++/** ++ * kbasep_js_remove_cancelled_job - Completely remove a job chain from the ++ * Job Scheduler, in the case ++ * where the job chain was cancelled. ++ * @kbdev: The kbase_device to operate on ++ * @kctx: The kbase_context to operate on ++ * @katom: Atom to remove ++ * ++ * This is a variant of kbasep_js_remove_job() that takes care of removing all ++ * of the retained state too. This is generally useful for cancelled atoms, ++ * which need not be handled in an optimal way. ++ * ++ * It is a programming error to call this when: ++ * * a atom is not a job belonging to kctx. ++ * * a atom has already been removed from the Job Scheduler. ++ * * a atom is still in the runpool: ++ * * it is not being killed with kbasep_jd_cancel() ++ * ++ * The following locking conditions are made on the caller: ++ * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. ++ * * it must not hold the hwaccess_lock, (as this will be obtained ++ * internally) ++ * * it must not hold kbasep_js_device_data::runpool_mutex (as this could be ++ * obtained internally) ++ * ++ * Return: true indicates that ctx attributes have changed and the caller ++ * should call kbase_js_sched_all() to try to run more jobs and ++ * false otherwise. ++ */ ++bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ struct kbase_jd_atom *katom); + -+static int kbase_api_mem_alias(struct kbase_context *kctx, -+ union kbase_ioctl_mem_alias *alias) -+{ -+ struct base_mem_aliasing_info *ai; -+ u64 flags; -+ int err; ++/** ++ * kbasep_js_runpool_requeue_or_kill_ctx - Handling the requeuing/killing of a ++ * context that was evicted from the ++ * policy queue or runpool. ++ * @kbdev: The kbase_device to operate on ++ * @kctx: The kbase_context to operate on ++ * @has_pm_ref: tells whether to release Power Manager active reference ++ * ++ * This should be used whenever handing off a context that has been evicted ++ * from the policy queue or the runpool: ++ * * If the context is not dying and has jobs, it gets re-added to the policy ++ * queue ++ * * Otherwise, it is not added ++ * ++ * In addition, if the context is dying the jobs are killed asynchronously. ++ * ++ * In all cases, the Power Manager active reference is released ++ * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true. ++ * has_pm_ref must be set to false whenever the context was not previously in ++ * the runpool and does not hold a Power Manager active refcount. Note that ++ * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an ++ * active refcount even though they weren't in the runpool. ++ * ++ * The following locking conditions are made on the caller: ++ * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. ++ * * it must not hold kbasep_jd_device_data::queue_mutex (as this will be ++ * obtained internally) ++ */ ++void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx, bool has_pm_ref); + -+ if (alias->in.nents == 0 || alias->in.nents > BASE_MEM_ALIAS_MAX_ENTS) -+ return -EINVAL; ++/** ++ * kbasep_js_runpool_release_ctx - Release a refcount of a context being busy, ++ * allowing it to be scheduled out. ++ * @kbdev: The kbase_device to operate on ++ * @kctx: The kbase_context to operate on ++ * ++ * When the refcount reaches zero and the context might be scheduled out ++ * (depending on whether the Scheduling Policy has deemed it so, or if it has ++ * run out of jobs). ++ * ++ * If the context does get scheduled out, then The following actions will be ++ * taken as part of deschduling a context: ++ * For the context being descheduled: ++ * * If the context is in the processing of dying (all the jobs are being ++ * removed from it), then descheduling also kills off any jobs remaining in the ++ * context. ++ * * If the context is not dying, and any jobs remain after descheduling the ++ * context then it is re-enqueued to the Policy's Queue. ++ * * Otherwise, the context is still known to the scheduler, but remains absent ++ * from the Policy Queue until a job is next added to it. ++ * * In all descheduling cases, the Power Manager active reference (obtained ++ * during kbasep_js_try_schedule_head_ctx()) is released ++ * (kbase_pm_context_idle()). ++ * ++ * Whilst the context is being descheduled, this also handles actions that ++ * cause more atoms to be run: ++ * * Attempt submitting atoms when the Context Attributes on the Runpool have ++ * changed. This is because the context being scheduled out could mean that ++ * there are more opportunities to run atoms. ++ * * Attempt submitting to a slot that was previously blocked due to affinity ++ * restrictions. This is usually only necessary when releasing a context ++ * happens as part of completing a previous job, but is harmless nonetheless. ++ * * Attempt scheduling in a new context (if one is available), and if ++ * necessary, running a job from that new context. ++ * ++ * Unlike retaining a context in the runpool, this function cannot be called ++ * from IRQ context. ++ * ++ * It is a programming error to call this on a kctx that is not currently ++ * scheduled, or that already has a zero refcount. ++ * ++ * The following locking conditions are made on the caller: ++ * * it must not hold the hwaccess_lock, because it will be used internally. ++ * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex. ++ * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be ++ * obtained internally) ++ * * it must not hold the kbase_device::mmu_hw_mutex (as this will be ++ * obtained internally) ++ * * it must not hold kbasep_jd_device_data::queue_mutex (as this will be ++ * obtained internally) ++ * ++ */ ++void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx); + -+ ai = vmalloc(sizeof(*ai) * alias->in.nents); -+ if (!ai) -+ return -ENOMEM; ++/** ++ * kbasep_js_runpool_release_ctx_and_katom_retained_state - Variant of ++ * kbasep_js_runpool_release_ctx() that handles additional ++ * actions from completing an atom. ++ * ++ * @kbdev: KBase device ++ * @kctx: KBase context ++ * @katom_retained_state: Retained state from the atom ++ * ++ * This is usually called as part of completing an atom and releasing the ++ * refcount on the context held by the atom. ++ * ++ * Therefore, the extra actions carried out are part of handling actions queued ++ * on a completed atom, namely: ++ * * Releasing the atom's context attributes ++ * * Retrying the submission on a particular slot, because we couldn't submit ++ * on that slot from an IRQ handler. ++ * ++ * The locking conditions of this function are the same as those for ++ * kbasep_js_runpool_release_ctx() ++ */ ++void kbasep_js_runpool_release_ctx_and_katom_retained_state( ++ struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ struct kbasep_js_atom_retained_state *katom_retained_state); + -+ err = copy_from_user(ai, -+ u64_to_user_ptr(alias->in.aliasing_info), -+ sizeof(*ai) * alias->in.nents); -+ if (err) { -+ vfree(ai); -+ return -EFAULT; -+ } ++/** ++ * kbasep_js_runpool_release_ctx_nolock - Variant of kbase_js_runpool_release_ctx() ++ * without locks ++ * @kbdev: KBase device ++ * @kctx: KBase context ++ * ++ * Variant of kbase_js_runpool_release_ctx() that assumes that ++ * kbasep_js_device_data::runpool_mutex and ++ * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not ++ * attempt to schedule new contexts. ++ */ ++void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, ++ struct kbase_context *kctx); + -+ flags = alias->in.flags; -+ if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY) { -+ vfree(ai); -+ return -EINVAL; -+ } ++/** ++ * kbasep_js_schedule_privileged_ctx - Schedule in a privileged context ++ * ++ * @kbdev: KBase device ++ * @kctx: KBase context ++ * ++ * This schedules a context in regardless of the context priority. ++ * If the runpool is full, a context will be forced out of the runpool and the ++ * function will wait for the new context to be scheduled in. ++ * The context will be kept scheduled in (and the corresponding address space ++ * reserved) until kbasep_js_release_privileged_ctx is called). ++ * ++ * The following locking conditions are made on the caller: ++ * * it must not hold the hwaccess_lock, because it will be used internally. ++ * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be ++ * obtained internally) ++ * * it must not hold the kbase_device::mmu_hw_mutex (as this will be ++ * obtained internally) ++ * * it must not hold kbasep_jd_device_data::queue_mutex (again, it's used ++ * internally). ++ * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will ++ * be used internally. ++ * ++ */ ++void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx); + -+ alias->out.gpu_va = kbase_mem_alias(kctx, &flags, -+ alias->in.stride, alias->in.nents, -+ ai, &alias->out.va_pages); ++/** ++ * kbasep_js_release_privileged_ctx - Release a privileged context, ++ * allowing it to be scheduled out. ++ * @kbdev: KBase device ++ * @kctx: KBase context ++ * ++ * See kbasep_js_runpool_release_ctx for potential side effects. ++ * ++ * The following locking conditions are made on the caller: ++ * * it must not hold the hwaccess_lock, because it will be used internally. ++ * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex. ++ * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be ++ * obtained internally) ++ * * it must not hold the kbase_device::mmu_hw_mutex (as this will be ++ * obtained internally) ++ * ++ */ ++void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx); + -+ alias->out.flags = flags; ++/** ++ * kbase_js_try_run_jobs - Try to submit the next job on each slot ++ * @kbdev: KBase device ++ * ++ * The following locks may be used: ++ * * kbasep_js_device_data::runpool_mutex ++ * * hwaccess_lock ++ */ ++void kbase_js_try_run_jobs(struct kbase_device *kbdev); + -+ vfree(ai); ++/** ++ * kbasep_js_suspend - Suspend the job scheduler during a Power Management ++ * Suspend event. ++ * @kbdev: KBase device ++ * ++ * Causes all contexts to be removed from the runpool, and prevents any ++ * contexts from (re)entering the runpool. ++ * ++ * This does not handle suspending the one privileged context: the caller must ++ * instead do this by suspending the GPU HW Counter Instrumentation. ++ * ++ * This will eventually cause all Power Management active references held by ++ * contexts on the runpool to be released, without running any more atoms. ++ * ++ * The caller must then wait for all Power Management active refcount to become ++ * zero before completing the suspend. ++ * ++ * The emptying mechanism may take some time to complete, since it can wait for ++ * jobs to complete naturally instead of forcing them to end quickly. However, ++ * this is bounded by the Job Scheduler's Job Timeouts. Hence, this ++ * function is guaranteed to complete in a finite time. ++ */ ++void kbasep_js_suspend(struct kbase_device *kbdev); + -+ if (alias->out.gpu_va == 0) -+ return -ENOMEM; ++/** ++ * kbasep_js_resume - Resume the Job Scheduler after a Power Management ++ * Resume event. ++ * @kbdev: KBase device ++ * ++ * This restores the actions from kbasep_js_suspend(): ++ * * Schedules contexts back into the runpool ++ * * Resumes running atoms on the GPU ++ */ ++void kbasep_js_resume(struct kbase_device *kbdev); + -+ return 0; -+} ++/** ++ * kbase_js_dep_resolved_submit - Submit an atom to the job scheduler. ++ * ++ * @kctx: Context pointer ++ * @katom: Pointer to the atom to submit ++ * ++ * The atom is enqueued on the context's ringbuffer. The caller must have ++ * ensured that all dependencies can be represented in the ringbuffer. ++ * ++ * Caller must hold jctx->lock ++ * ++ * Return: true if the context requires to be enqueued, otherwise false. ++ */ ++bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, ++ struct kbase_jd_atom *katom); + -+static int kbase_api_mem_import(struct kbase_context *kctx, -+ union kbase_ioctl_mem_import *import) -+{ -+ int ret; -+ u64 flags = import->in.flags; ++/** ++ * kbase_js_pull - Pull an atom from a context in the job scheduler for ++ * execution. ++ * ++ * @kctx: Context to pull from ++ * @js: Job slot to pull from ++ * ++ * The atom will not be removed from the ringbuffer at this stage. ++ * ++ * The HW access lock must be held when calling this function. ++ * ++ * Return: a pointer to an atom, or NULL if there are no atoms for this ++ * slot that can be currently run. ++ */ ++struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js); + -+ if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY) -+ return -ENOMEM; ++/** ++ * kbase_js_unpull - Return an atom to the job scheduler ringbuffer. ++ * ++ * @kctx: Context pointer ++ * @katom: Pointer to the atom to unpull ++ * ++ * An atom is 'unpulled' if execution is stopped but intended to be returned to ++ * later. The most common reason for this is that the atom has been ++ * soft-stopped. Another reason is if an end-of-renderpass atom completed ++ * but will need to be run again as part of the same renderpass. ++ * ++ * Note that if multiple atoms are to be 'unpulled', they must be returned in ++ * the reverse order to which they were originally pulled. It is a programming ++ * error to return atoms in any other order. ++ * ++ * The HW access lock must be held when calling this function. ++ * ++ */ ++void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom); + -+ ret = kbase_mem_import(kctx, -+ import->in.type, -+ u64_to_user_ptr(import->in.phandle), -+ import->in.padding, -+ &import->out.gpu_va, -+ &import->out.va_pages, -+ &flags); ++/** ++ * kbase_js_complete_atom_wq - Complete an atom from jd_done_worker(), ++ * removing it from the job ++ * scheduler ringbuffer. ++ * @kctx: Context pointer ++ * @katom: Pointer to the atom to complete ++ * ++ * If the atom failed then all dependee atoms marked for failure propagation ++ * will also fail. ++ * ++ * Return: true if the context is now idle (no jobs pulled) false otherwise. ++ */ ++bool kbase_js_complete_atom_wq(struct kbase_context *kctx, ++ struct kbase_jd_atom *katom); + -+ import->out.flags = flags; ++/** ++ * kbase_js_complete_atom - Complete an atom. ++ * ++ * @katom: Pointer to the atom to complete ++ * @end_timestamp: The time that the atom completed (may be NULL) ++ * ++ * Most of the work required to complete an atom will be performed by ++ * jd_done_worker(). ++ * ++ * The HW access lock must be held when calling this function. ++ * ++ * Return: a atom that has now been unblocked and can now be run, or NULL ++ * if none ++ */ ++struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, ++ ktime_t *end_timestamp); + -+ return ret; -+} ++/** ++ * kbase_js_atom_blocked_on_x_dep - Decide whether to ignore a cross-slot ++ * dependency ++ * @katom: Pointer to an atom in the slot ringbuffer ++ * ++ * A cross-slot dependency is ignored if necessary to unblock incremental ++ * rendering. If the atom at the start of a renderpass used too much memory ++ * and was soft-stopped then the atom at the end of a renderpass is submitted ++ * to hardware regardless of its dependency on the start-of-renderpass atom. ++ * This can happen multiple times for the same pair of atoms. ++ * ++ * Return: true to block the atom or false to allow it to be submitted to ++ * hardware. ++ */ ++bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *katom); + -+static int kbase_api_mem_flags_change(struct kbase_context *kctx, -+ struct kbase_ioctl_mem_flags_change *change) -+{ -+ if (change->flags & BASEP_MEM_FLAGS_KERNEL_ONLY) -+ return -ENOMEM; ++/** ++ * kbase_js_sched - Submit atoms from all available contexts. ++ * ++ * @kbdev: Device pointer ++ * @js_mask: Mask of job slots to submit to ++ * ++ * This will attempt to submit as many jobs as possible to the provided job ++ * slots. It will exit when either all job slots are full, or all contexts have ++ * been used. ++ * ++ */ ++void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask); + -+ return kbase_mem_flags_change(kctx, change->gpu_va, -+ change->flags, change->mask); -+} ++/** ++ * kbase_js_zap_context - Attempt to deschedule a context that is being ++ * destroyed ++ * @kctx: Context pointer ++ * ++ * This will attempt to remove a context from any internal job scheduler queues ++ * and perform any other actions to ensure a context will not be submitted ++ * from. ++ * ++ * If the context is currently scheduled, then the caller must wait for all ++ * pending jobs to complete before taking any further action. ++ */ ++void kbase_js_zap_context(struct kbase_context *kctx); + -+static int kbase_api_stream_create(struct kbase_context *kctx, -+ struct kbase_ioctl_stream_create *stream) -+{ -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ int fd, ret; ++/** ++ * kbase_js_is_atom_valid - Validate an atom ++ * ++ * @kbdev: Device pointer ++ * @katom: Atom to validate ++ * ++ * This will determine whether the atom can be scheduled onto the GPU. Atoms ++ * with invalid combinations of core requirements will be rejected. ++ * ++ * Return: true if atom is valid false otherwise. ++ */ ++bool kbase_js_is_atom_valid(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom); + -+ /* Name must be NULL-terminated and padded with NULLs, so check last -+ * character is NULL -+ */ -+ if (stream->name[sizeof(stream->name)-1] != 0) -+ return -EINVAL; ++/** ++ * kbase_js_set_timeouts - update all JS timeouts with user specified data ++ * ++ * @kbdev: Device pointer ++ * ++ * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is ++ * set to a positive number then that becomes the new value used, if a timeout ++ * is negative then the default is set. ++ */ ++void kbase_js_set_timeouts(struct kbase_device *kbdev); + -+ ret = kbase_sync_fence_stream_create(stream->name, &fd); ++/** ++ * kbase_js_set_ctx_priority - set the context priority ++ * ++ * @kctx: Context pointer ++ * @new_priority: New priority value for the Context ++ * ++ * The context priority is set to a new value and it is moved to the ++ * pullable/unpullable list as per the new priority. ++ */ ++void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority); + -+ if (ret) -+ return ret; -+ return fd; -+#else -+ return -ENOENT; -+#endif -+} ++/** ++ * kbase_js_update_ctx_priority - update the context priority ++ * ++ * @kctx: Context pointer ++ * ++ * The context priority gets updated as per the priority of atoms currently in ++ * use for that context, but only if system priority mode for context scheduling ++ * is being used. ++ */ ++void kbase_js_update_ctx_priority(struct kbase_context *kctx); + -+static int kbase_api_fence_validate(struct kbase_context *kctx, -+ struct kbase_ioctl_fence_validate *validate) -+{ -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ return kbase_sync_fence_validate(validate->fd); -+#else -+ return -ENOENT; -+#endif -+} ++/* ++ * Helpers follow ++ */ + -+static int kbase_api_mem_profile_add(struct kbase_context *kctx, -+ struct kbase_ioctl_mem_profile_add *data) ++/** ++ * kbasep_js_is_submit_allowed - Check that a context is allowed to submit ++ * jobs on this policy ++ * @js_devdata: KBase Job Scheduler Device Data ++ * @kctx: KBase context ++ * ++ * The purpose of this abstraction is to hide the underlying data size, ++ * and wrap up the long repeated line of code. ++ * ++ * As with any bool, never test the return value with true. ++ * ++ * The caller must hold hwaccess_lock. ++ * ++ * Return: true if the context is allowed to submit jobs, false otherwise. ++ */ ++static inline bool kbasep_js_is_submit_allowed( ++ struct kbasep_js_device_data *js_devdata, ++ struct kbase_context *kctx) +{ -+ char *buf; -+ int err; -+ -+ if (data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) { -+ dev_err(kctx->kbdev->dev, "mem_profile_add: buffer too big"); -+ return -EINVAL; -+ } -+ -+ if (!data->len) { -+ dev_err(kctx->kbdev->dev, "mem_profile_add: buffer size is 0"); -+ /* Should return -EINVAL, but returning -ENOMEM for backwards compat */ -+ return -ENOMEM; -+ } -+ -+ buf = kmalloc(data->len, GFP_KERNEL); -+ if (!buf) -+ return -ENOMEM; -+ -+ err = copy_from_user(buf, u64_to_user_ptr(data->buffer), -+ data->len); -+ if (err) { -+ kfree(buf); -+ return -EFAULT; -+ } ++ u16 test_bit; ++ bool is_allowed; + -+ return kbasep_mem_profile_debugfs_insert(kctx, buf, data->len); -+} ++ /* Ensure context really is scheduled in */ ++ if (WARN((kctx->as_nr == KBASEP_AS_NR_INVALID) || !kbase_ctx_flag(kctx, KCTX_SCHEDULED), ++ "%s: kctx %pK has assigned AS %d and context flag %d\n", __func__, (void *)kctx, ++ kctx->as_nr, atomic_read(&kctx->flags))) ++ return false; + -+#if !MALI_USE_CSF -+static int kbase_api_soft_event_update(struct kbase_context *kctx, -+ struct kbase_ioctl_soft_event_update *update) -+{ -+ if (update->flags != 0) -+ return -EINVAL; ++ test_bit = (u16) (1u << kctx->as_nr); + -+ return kbase_soft_event_update(kctx, update->event, update->new_status); ++ is_allowed = (bool) (js_devdata->runpool_irq.submit_allowed & test_bit); ++ dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %pK (as=%d)", ++ is_allowed ? "is" : "isn't", (void *)kctx, kctx->as_nr); ++ return is_allowed; +} -+#endif /* !MALI_USE_CSF */ + -+static int kbase_api_sticky_resource_map(struct kbase_context *kctx, -+ struct kbase_ioctl_sticky_resource_map *map) ++/** ++ * kbasep_js_set_submit_allowed - Allow a context to submit jobs on this policy ++ * @js_devdata: KBase Job Scheduler Device Data ++ * @kctx: KBase context ++ * ++ * The purpose of this abstraction is to hide the underlying data size, ++ * and wrap up the long repeated line of code. ++ * ++ * The caller must hold hwaccess_lock. ++ */ ++static inline void kbasep_js_set_submit_allowed( ++ struct kbasep_js_device_data *js_devdata, ++ struct kbase_context *kctx) +{ -+ int ret; -+ u64 i; -+ u64 gpu_addr[BASE_EXT_RES_COUNT_MAX]; -+ -+ if (!map->count || map->count > BASE_EXT_RES_COUNT_MAX) -+ return -EOVERFLOW; -+ -+ ret = copy_from_user(gpu_addr, u64_to_user_ptr(map->address), -+ sizeof(u64) * map->count); -+ -+ if (ret != 0) -+ return -EFAULT; -+ -+ kbase_gpu_vm_lock(kctx); ++ u16 set_bit; + -+ for (i = 0; i < map->count; i++) { -+ if (!kbase_sticky_resource_acquire(kctx, gpu_addr[i])) { -+ /* Invalid resource */ -+ ret = -EINVAL; -+ break; -+ } -+ } ++ /* Ensure context really is scheduled in */ ++ if (WARN((kctx->as_nr == KBASEP_AS_NR_INVALID) || !kbase_ctx_flag(kctx, KCTX_SCHEDULED), ++ "%s: kctx %pK has assigned AS %d and context flag %d\n", __func__, (void *)kctx, ++ kctx->as_nr, atomic_read(&kctx->flags))) ++ return; + -+ if (ret != 0) { -+ while (i > 0) { -+ i--; -+ kbase_sticky_resource_release_force(kctx, NULL, gpu_addr[i]); -+ } -+ } ++ set_bit = (u16) (1u << kctx->as_nr); + -+ kbase_gpu_vm_unlock(kctx); ++ dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %pK (as=%d)", ++ kctx, kctx->as_nr); + -+ return ret; ++ js_devdata->runpool_irq.submit_allowed |= set_bit; +} + -+static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx, -+ struct kbase_ioctl_sticky_resource_unmap *unmap) ++/** ++ * kbasep_js_clear_submit_allowed - Prevent a context from submitting more ++ * jobs on this policy ++ * @js_devdata: KBase Job Scheduler Device Data ++ * @kctx: KBase context ++ * ++ * The purpose of this abstraction is to hide the underlying data size, ++ * and wrap up the long repeated line of code. ++ * ++ * The caller must hold hwaccess_lock. ++ */ ++static inline void kbasep_js_clear_submit_allowed( ++ struct kbasep_js_device_data *js_devdata, ++ struct kbase_context *kctx) +{ -+ int ret; -+ u64 i; -+ u64 gpu_addr[BASE_EXT_RES_COUNT_MAX]; -+ -+ if (!unmap->count || unmap->count > BASE_EXT_RES_COUNT_MAX) -+ return -EOVERFLOW; -+ -+ ret = copy_from_user(gpu_addr, u64_to_user_ptr(unmap->address), -+ sizeof(u64) * unmap->count); -+ -+ if (ret != 0) -+ return -EFAULT; ++ u16 clear_bit; ++ u16 clear_mask; + -+ kbase_gpu_vm_lock(kctx); ++ /* Ensure context really is scheduled in */ ++ if (WARN((kctx->as_nr == KBASEP_AS_NR_INVALID) || !kbase_ctx_flag(kctx, KCTX_SCHEDULED), ++ "%s: kctx %pK has assigned AS %d and context flag %d\n", __func__, (void *)kctx, ++ kctx->as_nr, atomic_read(&kctx->flags))) ++ return; + -+ for (i = 0; i < unmap->count; i++) { -+ if (!kbase_sticky_resource_release_force(kctx, NULL, gpu_addr[i])) { -+ /* Invalid resource, but we keep going anyway */ -+ ret = -EINVAL; -+ } -+ } ++ clear_bit = (u16) (1u << kctx->as_nr); ++ clear_mask = ~clear_bit; + -+ kbase_gpu_vm_unlock(kctx); ++ dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %pK (as=%d)", ++ kctx, kctx->as_nr); + -+ return ret; ++ js_devdata->runpool_irq.submit_allowed &= clear_mask; +} + -+#if MALI_UNIT_TEST -+ -+static int kbase_api_tlstream_stats(struct kbase_context *kctx, -+ struct kbase_ioctl_tlstream_stats *stats) ++/** ++ * kbasep_js_atom_retained_state_init_invalid - Create an initial 'invalid' ++ * atom retained state ++ * ++ * @retained_state: pointer where to create and initialize the state ++ * ++ * Create an initial 'invalid' atom retained state, that requires no ++ * atom-related work to be done on releasing with ++ * kbasep_js_runpool_release_ctx_and_katom_retained_state() ++ */ ++static inline void kbasep_js_atom_retained_state_init_invalid( ++ struct kbasep_js_atom_retained_state *retained_state) +{ -+ kbase_timeline_stats(kctx->kbdev->timeline, -+ &stats->bytes_collected, -+ &stats->bytes_generated); -+ -+ return 0; -+} -+#endif /* MALI_UNIT_TEST */ -+ -+#if MALI_USE_CSF -+static int kbasep_cs_event_signal(struct kbase_context *kctx) -+{ -+ kbase_csf_event_signal_notify_gpu(kctx); -+ return 0; -+} -+ -+static int kbasep_cs_queue_register(struct kbase_context *kctx, -+ struct kbase_ioctl_cs_queue_register *reg) -+{ -+ kctx->jit_group_id = BASE_MEM_GROUP_DEFAULT; -+ -+ return kbase_csf_queue_register(kctx, reg); -+} -+ -+static int kbasep_cs_queue_register_ex(struct kbase_context *kctx, -+ struct kbase_ioctl_cs_queue_register_ex *reg) -+{ -+ kctx->jit_group_id = BASE_MEM_GROUP_DEFAULT; -+ -+ return kbase_csf_queue_register_ex(kctx, reg); -+} -+ -+static int kbasep_cs_queue_terminate(struct kbase_context *kctx, -+ struct kbase_ioctl_cs_queue_terminate *term) -+{ -+ kbase_csf_queue_terminate(kctx, term); -+ -+ return 0; -+} -+ -+static int kbasep_cs_queue_bind(struct kbase_context *kctx, -+ union kbase_ioctl_cs_queue_bind *bind) -+{ -+ return kbase_csf_queue_bind(kctx, bind); -+} -+ -+static int kbasep_cs_queue_kick(struct kbase_context *kctx, -+ struct kbase_ioctl_cs_queue_kick *kick) -+{ -+ return kbase_csf_queue_kick(kctx, kick); -+} -+ -+static int kbasep_cs_queue_group_create_1_6( -+ struct kbase_context *kctx, -+ union kbase_ioctl_cs_queue_group_create_1_6 *create) -+{ -+ union kbase_ioctl_cs_queue_group_create -+ new_create = { .in = { -+ .tiler_mask = create->in.tiler_mask, -+ .fragment_mask = -+ create->in.fragment_mask, -+ .compute_mask = create->in.compute_mask, -+ .cs_min = create->in.cs_min, -+ .priority = create->in.priority, -+ .tiler_max = create->in.tiler_max, -+ .fragment_max = create->in.fragment_max, -+ .compute_max = create->in.compute_max, -+ } }; -+ -+ int ret = kbase_csf_queue_group_create(kctx, &new_create); -+ -+ create->out.group_handle = new_create.out.group_handle; -+ create->out.group_uid = new_create.out.group_uid; -+ -+ return ret; -+} -+static int kbasep_cs_queue_group_create(struct kbase_context *kctx, -+ union kbase_ioctl_cs_queue_group_create *create) -+{ -+ return kbase_csf_queue_group_create(kctx, create); -+} -+ -+static int kbasep_cs_queue_group_terminate(struct kbase_context *kctx, -+ struct kbase_ioctl_cs_queue_group_term *term) -+{ -+ kbase_csf_queue_group_terminate(kctx, term->group_handle); -+ -+ return 0; -+} -+ -+static int kbasep_kcpu_queue_new(struct kbase_context *kctx, -+ struct kbase_ioctl_kcpu_queue_new *new) -+{ -+ return kbase_csf_kcpu_queue_new(kctx, new); -+} -+ -+static int kbasep_kcpu_queue_delete(struct kbase_context *kctx, -+ struct kbase_ioctl_kcpu_queue_delete *delete) -+{ -+ return kbase_csf_kcpu_queue_delete(kctx, delete); -+} -+ -+static int kbasep_kcpu_queue_enqueue(struct kbase_context *kctx, -+ struct kbase_ioctl_kcpu_queue_enqueue *enqueue) -+{ -+ return kbase_csf_kcpu_queue_enqueue(kctx, enqueue); -+} -+ -+static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx, -+ union kbase_ioctl_cs_tiler_heap_init *heap_init) -+{ -+ if (heap_init->in.group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) -+ return -EINVAL; -+ -+ kctx->jit_group_id = heap_init->in.group_id; -+ -+ return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size, -+ heap_init->in.initial_chunks, heap_init->in.max_chunks, -+ heap_init->in.target_in_flight, heap_init->in.buf_desc_va, -+ &heap_init->out.gpu_heap_va, -+ &heap_init->out.first_chunk_va); -+} -+ -+static int kbasep_cs_tiler_heap_init_1_13(struct kbase_context *kctx, -+ union kbase_ioctl_cs_tiler_heap_init_1_13 *heap_init) -+{ -+ if (heap_init->in.group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) -+ return -EINVAL; -+ -+ kctx->jit_group_id = heap_init->in.group_id; -+ -+ return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size, -+ heap_init->in.initial_chunks, heap_init->in.max_chunks, -+ heap_init->in.target_in_flight, 0, -+ &heap_init->out.gpu_heap_va, -+ &heap_init->out.first_chunk_va); -+} -+ -+static int kbasep_cs_tiler_heap_term(struct kbase_context *kctx, -+ struct kbase_ioctl_cs_tiler_heap_term *heap_term) -+{ -+ return kbase_csf_tiler_heap_term(kctx, heap_term->gpu_heap_va); -+} -+ -+static int kbase_ioctl_cs_get_glb_iface(struct kbase_context *kctx, -+ union kbase_ioctl_cs_get_glb_iface *param) -+{ -+ struct basep_cs_stream_control *stream_data = NULL; -+ struct basep_cs_group_control *group_data = NULL; -+ void __user *user_groups, *user_streams; -+ int err = 0; -+ u32 const max_group_num = param->in.max_group_num; -+ u32 const max_total_stream_num = param->in.max_total_stream_num; -+ -+ if (max_group_num > MAX_SUPPORTED_CSGS) -+ return -EINVAL; -+ -+ if (max_total_stream_num > -+ MAX_SUPPORTED_CSGS * MAX_SUPPORTED_STREAMS_PER_GROUP) -+ return -EINVAL; -+ -+ user_groups = u64_to_user_ptr(param->in.groups_ptr); -+ user_streams = u64_to_user_ptr(param->in.streams_ptr); -+ -+ if (max_group_num > 0) { -+ if (!user_groups) -+ err = -EINVAL; -+ else { -+ group_data = kcalloc(max_group_num, -+ sizeof(*group_data), GFP_KERNEL); -+ if (!group_data) -+ err = -ENOMEM; -+ } -+ } -+ -+ if (max_total_stream_num > 0) { -+ if (!user_streams) -+ err = -EINVAL; -+ else { -+ stream_data = kcalloc(max_total_stream_num, -+ sizeof(*stream_data), GFP_KERNEL); -+ if (!stream_data) -+ err = -ENOMEM; -+ } -+ } -+ -+ if (!err) { -+ param->out.total_stream_num = kbase_csf_firmware_get_glb_iface( -+ kctx->kbdev, group_data, max_group_num, stream_data, -+ max_total_stream_num, ¶m->out.glb_version, -+ ¶m->out.features, ¶m->out.group_num, -+ ¶m->out.prfcnt_size, ¶m->out.instr_features); -+ -+ if (copy_to_user(user_groups, group_data, -+ MIN(max_group_num, param->out.group_num) * -+ sizeof(*group_data))) -+ err = -EFAULT; -+ } -+ -+ if (!err) -+ if (copy_to_user(user_streams, stream_data, -+ MIN(max_total_stream_num, param->out.total_stream_num) * -+ sizeof(*stream_data))) -+ err = -EFAULT; -+ -+ kfree(group_data); -+ kfree(stream_data); -+ return err; -+} -+ -+static int kbasep_ioctl_cs_cpu_queue_dump(struct kbase_context *kctx, -+ struct kbase_ioctl_cs_cpu_queue_info *cpu_queue_info) -+{ -+ return kbase_csf_cpu_queue_dump(kctx, cpu_queue_info->buffer, -+ cpu_queue_info->size); ++ retained_state->event_code = BASE_JD_EVENT_NOT_STARTED; ++ retained_state->core_req = ++ KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID; +} + -+static int kbase_ioctl_read_user_page(struct kbase_context *kctx, -+ union kbase_ioctl_read_user_page *user_page) ++/** ++ * kbasep_js_atom_retained_state_copy() - Copy atom state ++ * @retained_state: where to copy ++ * @katom: where to copy from ++ * ++ * Copy atom state that can be made available after kbase_jd_done_nolock() is called ++ * on that atom. ++ */ ++static inline void kbasep_js_atom_retained_state_copy( ++ struct kbasep_js_atom_retained_state *retained_state, ++ const struct kbase_jd_atom *katom) +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ unsigned long flags; -+ -+ /* As of now, only LATEST_FLUSH is supported */ -+ if (unlikely(user_page->in.offset != LATEST_FLUSH)) -+ return -EINVAL; -+ -+ /* Validating padding that must be zero */ -+ if (unlikely(user_page->in.padding != 0)) -+ return -EINVAL; -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ if (!kbdev->pm.backend.gpu_powered) -+ user_page->out.val_lo = POWER_DOWN_LATEST_FLUSH_VALUE; -+ else -+ user_page->out.val_lo = kbase_reg_read(kbdev, USER_REG(LATEST_FLUSH)); -+ user_page->out.val_hi = 0; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ return 0; ++ retained_state->event_code = katom->event_code; ++ retained_state->core_req = katom->core_req; ++ retained_state->sched_priority = katom->sched_priority; ++ retained_state->device_nr = katom->device_nr; +} -+#endif /* MALI_USE_CSF */ + -+static int kbasep_ioctl_context_priority_check(struct kbase_context *kctx, -+ struct kbase_ioctl_context_priority_check *priority_check) ++/** ++ * kbasep_js_has_atom_finished - Determine whether an atom has finished ++ * (given its retained state), ++ * and so should be given back to ++ * userspace/removed from the system. ++ * ++ * @katom_retained_state: the retained state of the atom to check ++ * ++ * Reasons for an atom not finishing include: ++ * * Being soft-stopped (and so, the atom should be resubmitted sometime later) ++ * * It is an end of renderpass atom that was run to consume the output of a ++ * start-of-renderpass atom that was soft-stopped because it used too much ++ * memory. In this case, it will have to be run again later. ++ * ++ * Return: false if the atom has not finished, true otherwise. ++ */ ++static inline bool kbasep_js_has_atom_finished( ++ const struct kbasep_js_atom_retained_state *katom_retained_state) +{ -+#if MALI_USE_CSF -+ priority_check->priority = kbase_csf_priority_check(kctx->kbdev, priority_check->priority); -+#else -+ base_jd_prio req_priority = (base_jd_prio)priority_check->priority; -+ -+ priority_check->priority = (u8)kbase_js_priority_check(kctx->kbdev, req_priority); -+#endif -+ return 0; ++ return (bool) (katom_retained_state->event_code != ++ BASE_JD_EVENT_STOPPED && ++ katom_retained_state->event_code != ++ BASE_JD_EVENT_REMOVED_FROM_NEXT && ++ katom_retained_state->event_code != ++ BASE_JD_EVENT_END_RP_DONE); +} + -+#define KBASE_HANDLE_IOCTL(cmd, function, arg) \ -+ do { \ -+ int ret; \ -+ BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \ -+ dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ -+ ret = function(arg); \ -+ dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ -+ #function); \ -+ return ret; \ -+ } while (0) -+ -+#define KBASE_HANDLE_IOCTL_IN(cmd, function, type, arg) \ -+ do { \ -+ type param; \ -+ int ret, err; \ -+ dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ -+ BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE); \ -+ BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ -+ err = copy_from_user(¶m, uarg, sizeof(param)); \ -+ if (err) \ -+ return -EFAULT; \ -+ ret = function(arg, ¶m); \ -+ dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ -+ #function); \ -+ return ret; \ -+ } while (0) -+ -+#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type, arg) \ -+ do { \ -+ type param; \ -+ int ret, err; \ -+ dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ -+ BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ); \ -+ BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ -+ memset(¶m, 0, sizeof(param)); \ -+ ret = function(arg, ¶m); \ -+ err = copy_to_user(uarg, ¶m, sizeof(param)); \ -+ if (err) \ -+ return -EFAULT; \ -+ dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ -+ #function); \ -+ return ret; \ -+ } while (0) -+ -+#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type, arg) \ -+ do { \ -+ type param; \ -+ int ret, err; \ -+ dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ -+ BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE | _IOC_READ)); \ -+ BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ -+ err = copy_from_user(¶m, uarg, sizeof(param)); \ -+ if (err) \ -+ return -EFAULT; \ -+ ret = function(arg, ¶m); \ -+ err = copy_to_user(uarg, ¶m, sizeof(param)); \ -+ if (err) \ -+ return -EFAULT; \ -+ dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ -+ #function); \ -+ return ret; \ -+ } while (0) -+ -+static int kbasep_ioctl_set_limited_core_count(struct kbase_context *kctx, -+ struct kbase_ioctl_set_limited_core_count *set_limited_core_count) ++/** ++ * kbasep_js_atom_retained_state_is_valid - Determine whether a struct ++ * kbasep_js_atom_retained_state ++ * is valid ++ * @katom_retained_state: the atom's retained state to check ++ * ++ * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates ++ * that the code should just ignore it. ++ * ++ * Return: false if the retained state is invalid, true otherwise. ++ */ ++static inline bool kbasep_js_atom_retained_state_is_valid( ++ const struct kbasep_js_atom_retained_state *katom_retained_state) +{ -+ const u64 shader_core_mask = -+ kbase_pm_get_present_cores(kctx->kbdev, KBASE_PM_CORE_SHADER); -+ const u64 limited_core_mask = -+ ((u64)1 << (set_limited_core_count->max_core_count)) - 1; -+ -+ if ((shader_core_mask & limited_core_mask) == 0) { -+ /* At least one shader core must be available after applying the mask */ -+ return -EINVAL; -+ } -+ -+ kctx->limited_core_mask = limited_core_mask; -+ return 0; ++ return (bool) (katom_retained_state->core_req != ++ KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID); +} + -+static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ++/** ++ * kbase_js_runpool_inc_context_count - Increment number of running contexts. ++ * @kbdev: KBase device ++ * @kctx: KBase context ++ * ++ * The following locking conditions are made on the caller: ++ * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. ++ * * The caller must hold the kbasep_js_device_data::runpool_mutex ++ */ ++static inline void kbase_js_runpool_inc_context_count( ++ struct kbase_device *kbdev, ++ struct kbase_context *kctx) +{ -+ struct kbase_file *const kfile = filp->private_data; -+ struct kbase_context *kctx = NULL; -+ struct kbase_device *kbdev = kfile->kbdev; -+ void __user *uarg = (void __user *)arg; -+ -+ /* Only these ioctls are available until setup is complete */ -+ switch (cmd) { -+ case KBASE_IOCTL_VERSION_CHECK: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK, -+ kbase_api_handshake, -+ struct kbase_ioctl_version_check, -+ kfile); -+ break; -+ -+ case KBASE_IOCTL_VERSION_CHECK_RESERVED: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK_RESERVED, -+ kbase_api_handshake_dummy, -+ struct kbase_ioctl_version_check, -+ kfile); -+ break; -+ -+ case KBASE_IOCTL_SET_FLAGS: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS, -+ kbase_api_set_flags, -+ struct kbase_ioctl_set_flags, -+ kfile); -+ break; -+ -+ case KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO: -+ KBASE_HANDLE_IOCTL_INOUT( -+ KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO, -+ kbase_api_kinstr_prfcnt_enum_info, -+ struct kbase_ioctl_kinstr_prfcnt_enum_info, kfile); -+ break; -+ -+ case KBASE_IOCTL_KINSTR_PRFCNT_SETUP: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_KINSTR_PRFCNT_SETUP, -+ kbase_api_kinstr_prfcnt_setup, -+ union kbase_ioctl_kinstr_prfcnt_setup, -+ kfile); -+ break; -+ case KBASE_IOCTL_GET_GPUPROPS: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS, kbase_api_get_gpuprops, -+ struct kbase_ioctl_get_gpuprops, kfile); -+ break; -+ } -+ -+ kctx = kbase_file_get_kctx_if_setup_complete(kfile); -+ if (unlikely(!kctx)) -+ return -EPERM; ++ struct kbasep_js_device_data *js_devdata; ++ struct kbasep_js_kctx_info *js_kctx_info; + -+ /* Normal ioctls */ -+ switch (cmd) { -+#if !MALI_USE_CSF -+ case KBASE_IOCTL_JOB_SUBMIT: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT, -+ kbase_api_job_submit, -+ struct kbase_ioctl_job_submit, -+ kctx); -+ break; -+#endif /* !MALI_USE_CSF */ -+#if !MALI_USE_CSF -+ case KBASE_IOCTL_POST_TERM: -+ KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM, -+ kbase_api_post_term, -+ kctx); -+ break; -+#endif /* !MALI_USE_CSF */ -+ case KBASE_IOCTL_MEM_ALLOC: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC, -+ kbase_api_mem_alloc, -+ union kbase_ioctl_mem_alloc, -+ kctx); -+ break; -+#if MALI_USE_CSF -+ case KBASE_IOCTL_MEM_ALLOC_EX: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC_EX, kbase_api_mem_alloc_ex, -+ union kbase_ioctl_mem_alloc_ex, kctx); -+ break; -+#endif -+ case KBASE_IOCTL_MEM_QUERY: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_QUERY, -+ kbase_api_mem_query, -+ union kbase_ioctl_mem_query, -+ kctx); -+ break; -+ case KBASE_IOCTL_MEM_FREE: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FREE, -+ kbase_api_mem_free, -+ struct kbase_ioctl_mem_free, -+ kctx); -+ break; -+ case KBASE_IOCTL_DISJOINT_QUERY: -+ KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_DISJOINT_QUERY, -+ kbase_api_disjoint_query, -+ struct kbase_ioctl_disjoint_query, -+ kctx); -+ break; -+ case KBASE_IOCTL_GET_DDK_VERSION: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_DDK_VERSION, -+ kbase_api_get_ddk_version, -+ struct kbase_ioctl_get_ddk_version, -+ kctx); -+ break; -+ case KBASE_IOCTL_MEM_JIT_INIT: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT, -+ kbase_api_mem_jit_init, -+ struct kbase_ioctl_mem_jit_init, -+ kctx); -+ break; -+ case KBASE_IOCTL_MEM_EXEC_INIT: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_EXEC_INIT, -+ kbase_api_mem_exec_init, -+ struct kbase_ioctl_mem_exec_init, -+ kctx); -+ break; -+ case KBASE_IOCTL_MEM_SYNC: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC, -+ kbase_api_mem_sync, -+ struct kbase_ioctl_mem_sync, -+ kctx); -+ break; -+ case KBASE_IOCTL_MEM_FIND_CPU_OFFSET: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_CPU_OFFSET, -+ kbase_api_mem_find_cpu_offset, -+ union kbase_ioctl_mem_find_cpu_offset, -+ kctx); -+ break; -+ case KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET, -+ kbase_api_mem_find_gpu_start_and_offset, -+ union kbase_ioctl_mem_find_gpu_start_and_offset, -+ kctx); -+ break; -+ case KBASE_IOCTL_GET_CONTEXT_ID: -+ KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_GET_CONTEXT_ID, -+ kbase_api_get_context_id, -+ struct kbase_ioctl_get_context_id, -+ kctx); -+ break; -+ case KBASE_IOCTL_TLSTREAM_ACQUIRE: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_ACQUIRE, -+ kbase_api_tlstream_acquire, -+ struct kbase_ioctl_tlstream_acquire, -+ kctx); -+ break; -+ case KBASE_IOCTL_TLSTREAM_FLUSH: -+ KBASE_HANDLE_IOCTL(KBASE_IOCTL_TLSTREAM_FLUSH, -+ kbase_api_tlstream_flush, -+ kctx); -+ break; -+ case KBASE_IOCTL_MEM_COMMIT: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_COMMIT, -+ kbase_api_mem_commit, -+ struct kbase_ioctl_mem_commit, -+ kctx); -+ break; -+ case KBASE_IOCTL_MEM_ALIAS: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALIAS, -+ kbase_api_mem_alias, -+ union kbase_ioctl_mem_alias, -+ kctx); -+ break; -+ case KBASE_IOCTL_MEM_IMPORT: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_IMPORT, -+ kbase_api_mem_import, -+ union kbase_ioctl_mem_import, -+ kctx); -+ break; -+ case KBASE_IOCTL_MEM_FLAGS_CHANGE: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FLAGS_CHANGE, -+ kbase_api_mem_flags_change, -+ struct kbase_ioctl_mem_flags_change, -+ kctx); -+ break; -+ case KBASE_IOCTL_STREAM_CREATE: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STREAM_CREATE, -+ kbase_api_stream_create, -+ struct kbase_ioctl_stream_create, -+ kctx); -+ break; -+ case KBASE_IOCTL_FENCE_VALIDATE: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_FENCE_VALIDATE, -+ kbase_api_fence_validate, -+ struct kbase_ioctl_fence_validate, -+ kctx); -+ break; -+ case KBASE_IOCTL_MEM_PROFILE_ADD: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_PROFILE_ADD, -+ kbase_api_mem_profile_add, -+ struct kbase_ioctl_mem_profile_add, -+ kctx); -+ break; ++ js_devdata = &kbdev->js_data; ++ js_kctx_info = &kctx->jctx.sched_info; + -+#if !MALI_USE_CSF -+ case KBASE_IOCTL_SOFT_EVENT_UPDATE: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE, -+ kbase_api_soft_event_update, -+ struct kbase_ioctl_soft_event_update, -+ kctx); -+ break; -+#endif /* !MALI_USE_CSF */ ++ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); ++ lockdep_assert_held(&js_devdata->runpool_mutex); + -+ case KBASE_IOCTL_STICKY_RESOURCE_MAP: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_MAP, -+ kbase_api_sticky_resource_map, -+ struct kbase_ioctl_sticky_resource_map, -+ kctx); -+ break; -+ case KBASE_IOCTL_STICKY_RESOURCE_UNMAP: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_UNMAP, -+ kbase_api_sticky_resource_unmap, -+ struct kbase_ioctl_sticky_resource_unmap, -+ kctx); -+ break; ++ /* Track total contexts */ ++ WARN_ON_ONCE(js_devdata->nr_all_contexts_running >= JS_MAX_RUNNING_JOBS); ++ ++(js_devdata->nr_all_contexts_running); + -+ /* Instrumentation. */ -+#if !MALI_USE_CSF -+ case KBASE_IOCTL_KINSTR_JM_FD: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_KINSTR_JM_FD, -+ kbase_api_kinstr_jm_fd, -+ union kbase_kinstr_jm_fd, -+ kctx); -+ break; -+#endif -+ case KBASE_IOCTL_HWCNT_READER_SETUP: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP, -+ kbase_api_hwcnt_reader_setup, -+ struct kbase_ioctl_hwcnt_reader_setup, -+ kctx); -+ break; -+ case KBASE_IOCTL_GET_CPU_GPU_TIMEINFO: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_GET_CPU_GPU_TIMEINFO, -+ kbase_api_get_cpu_gpu_timeinfo, -+ union kbase_ioctl_get_cpu_gpu_timeinfo, -+ kctx); -+ break; -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ case KBASE_IOCTL_HWCNT_SET: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_SET, -+ kbase_api_hwcnt_set, -+ struct kbase_ioctl_hwcnt_values, -+ kctx); -+ break; -+#endif /* CONFIG_MALI_BIFROST_NO_MALI */ -+#ifdef CONFIG_MALI_CINSTR_GWT -+ case KBASE_IOCTL_CINSTR_GWT_START: -+ KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START, -+ kbase_gpu_gwt_start, -+ kctx); -+ break; -+ case KBASE_IOCTL_CINSTR_GWT_STOP: -+ KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_STOP, -+ kbase_gpu_gwt_stop, -+ kctx); -+ break; -+ case KBASE_IOCTL_CINSTR_GWT_DUMP: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CINSTR_GWT_DUMP, -+ kbase_gpu_gwt_dump, -+ union kbase_ioctl_cinstr_gwt_dump, -+ kctx); -+ break; -+#endif -+#if MALI_USE_CSF -+ case KBASE_IOCTL_CS_EVENT_SIGNAL: -+ KBASE_HANDLE_IOCTL(KBASE_IOCTL_CS_EVENT_SIGNAL, -+ kbasep_cs_event_signal, -+ kctx); -+ break; -+ case KBASE_IOCTL_CS_QUEUE_REGISTER: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_REGISTER, -+ kbasep_cs_queue_register, -+ struct kbase_ioctl_cs_queue_register, -+ kctx); -+ break; -+ case KBASE_IOCTL_CS_QUEUE_REGISTER_EX: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_REGISTER_EX, -+ kbasep_cs_queue_register_ex, -+ struct kbase_ioctl_cs_queue_register_ex, -+ kctx); -+ break; -+ case KBASE_IOCTL_CS_QUEUE_TERMINATE: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_TERMINATE, -+ kbasep_cs_queue_terminate, -+ struct kbase_ioctl_cs_queue_terminate, -+ kctx); -+ break; -+ case KBASE_IOCTL_CS_QUEUE_BIND: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_BIND, -+ kbasep_cs_queue_bind, -+ union kbase_ioctl_cs_queue_bind, -+ kctx); -+ break; -+ case KBASE_IOCTL_CS_QUEUE_KICK: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_KICK, -+ kbasep_cs_queue_kick, -+ struct kbase_ioctl_cs_queue_kick, -+ kctx); -+ break; -+ case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6: -+ KBASE_HANDLE_IOCTL_INOUT( -+ KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6, -+ kbasep_cs_queue_group_create_1_6, -+ union kbase_ioctl_cs_queue_group_create_1_6, kctx); -+ break; -+ case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_GROUP_CREATE, -+ kbasep_cs_queue_group_create, -+ union kbase_ioctl_cs_queue_group_create, -+ kctx); -+ break; -+ case KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE, -+ kbasep_cs_queue_group_terminate, -+ struct kbase_ioctl_cs_queue_group_term, -+ kctx); -+ break; -+ case KBASE_IOCTL_KCPU_QUEUE_CREATE: -+ KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_KCPU_QUEUE_CREATE, -+ kbasep_kcpu_queue_new, -+ struct kbase_ioctl_kcpu_queue_new, -+ kctx); -+ break; -+ case KBASE_IOCTL_KCPU_QUEUE_DELETE: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_KCPU_QUEUE_DELETE, -+ kbasep_kcpu_queue_delete, -+ struct kbase_ioctl_kcpu_queue_delete, -+ kctx); -+ break; -+ case KBASE_IOCTL_KCPU_QUEUE_ENQUEUE: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_KCPU_QUEUE_ENQUEUE, -+ kbasep_kcpu_queue_enqueue, -+ struct kbase_ioctl_kcpu_queue_enqueue, -+ kctx); -+ break; -+ case KBASE_IOCTL_CS_TILER_HEAP_INIT: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_TILER_HEAP_INIT, -+ kbasep_cs_tiler_heap_init, -+ union kbase_ioctl_cs_tiler_heap_init, -+ kctx); -+ break; -+ case KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13, -+ kbasep_cs_tiler_heap_init_1_13, -+ union kbase_ioctl_cs_tiler_heap_init_1_13, kctx); -+ break; -+ case KBASE_IOCTL_CS_TILER_HEAP_TERM: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_TILER_HEAP_TERM, -+ kbasep_cs_tiler_heap_term, -+ struct kbase_ioctl_cs_tiler_heap_term, -+ kctx); -+ break; -+ case KBASE_IOCTL_CS_GET_GLB_IFACE: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_GET_GLB_IFACE, -+ kbase_ioctl_cs_get_glb_iface, -+ union kbase_ioctl_cs_get_glb_iface, -+ kctx); -+ break; -+ case KBASE_IOCTL_CS_CPU_QUEUE_DUMP: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_CPU_QUEUE_DUMP, -+ kbasep_ioctl_cs_cpu_queue_dump, -+ struct kbase_ioctl_cs_cpu_queue_info, -+ kctx); -+ break; -+ /* This IOCTL will be kept for backward compatibility */ -+ case KBASE_IOCTL_READ_USER_PAGE: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_READ_USER_PAGE, kbase_ioctl_read_user_page, -+ union kbase_ioctl_read_user_page, kctx); -+ break; -+#endif /* MALI_USE_CSF */ -+#if MALI_UNIT_TEST -+ case KBASE_IOCTL_TLSTREAM_STATS: -+ KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS, -+ kbase_api_tlstream_stats, -+ struct kbase_ioctl_tlstream_stats, -+ kctx); -+ break; -+#endif /* MALI_UNIT_TEST */ -+ case KBASE_IOCTL_CONTEXT_PRIORITY_CHECK: -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CONTEXT_PRIORITY_CHECK, -+ kbasep_ioctl_context_priority_check, -+ struct kbase_ioctl_context_priority_check, -+ kctx); -+ break; -+ case KBASE_IOCTL_SET_LIMITED_CORE_COUNT: -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_LIMITED_CORE_COUNT, -+ kbasep_ioctl_set_limited_core_count, -+ struct kbase_ioctl_set_limited_core_count, -+ kctx); -+ break; ++ if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { ++ /* Track contexts that can submit jobs */ ++ WARN_ON_ONCE(js_devdata->nr_user_contexts_running >= JS_MAX_RUNNING_JOBS); ++ ++(js_devdata->nr_user_contexts_running); + } -+ -+ dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); -+ -+ return -ENOIOCTLCMD; +} + -+#if MALI_USE_CSF -+static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) -+{ -+ struct kbase_file *const kfile = filp->private_data; -+ struct kbase_context *const kctx = -+ kbase_file_get_kctx_if_setup_complete(kfile); -+ struct base_csf_notification event_data = { -+ .type = BASE_CSF_NOTIFICATION_EVENT }; -+ const size_t data_size = sizeof(event_data); -+ bool read_event = false, read_error = false; -+ -+ if (unlikely(!kctx)) -+ return -EPERM; -+ -+ if (count < data_size) -+ return -ENOBUFS; -+ -+ if (atomic_read(&kctx->event_count)) -+ read_event = true; -+ else -+ read_error = kbase_csf_event_read_error(kctx, &event_data); -+ -+ if (!read_event && !read_error) { -+ bool dump = kbase_csf_cpu_queue_read_dump_req(kctx, -+ &event_data); -+ /* This condition is not treated as an error. -+ * It is possible that event handling thread was woken up due -+ * to a fault/error that occurred for a queue group, but before -+ * the corresponding fault data was read by the thread the -+ * queue group was already terminated by the userspace. -+ */ -+ if (!dump) -+ dev_dbg(kctx->kbdev->dev, -+ "Neither event nor error signaled"); -+ } -+ -+ if (copy_to_user(buf, &event_data, data_size) != 0) { -+ dev_warn(kctx->kbdev->dev, -+ "Failed to copy data\n"); -+ return -EFAULT; -+ } -+ -+ if (read_event) -+ atomic_set(&kctx->event_count, 0); -+ -+ return data_size; -+} -+#else /* MALI_USE_CSF */ -+static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) ++/** ++ * kbase_js_runpool_dec_context_count - decrement number of running contexts. ++ * ++ * @kbdev: KBase device ++ * @kctx: KBase context ++ * The following locking conditions are made on the caller: ++ * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. ++ * * The caller must hold the kbasep_js_device_data::runpool_mutex ++ */ ++static inline void kbase_js_runpool_dec_context_count( ++ struct kbase_device *kbdev, ++ struct kbase_context *kctx) +{ -+ struct kbase_file *const kfile = filp->private_data; -+ struct kbase_context *const kctx = -+ kbase_file_get_kctx_if_setup_complete(kfile); -+ struct base_jd_event_v2 uevent; -+ int out_count = 0; -+ -+ if (unlikely(!kctx)) -+ return -EPERM; -+ -+ if (count < sizeof(uevent)) -+ return -ENOBUFS; -+ -+ memset(&uevent, 0, sizeof(uevent)); -+ -+ do { -+ while (kbase_event_dequeue(kctx, &uevent)) { -+ if (out_count > 0) -+ goto out; -+ -+ if (filp->f_flags & O_NONBLOCK) -+ return -EAGAIN; -+ -+ if (wait_event_interruptible(kctx->event_queue, -+ kbase_event_pending(kctx)) != 0) -+ return -ERESTARTSYS; -+ } -+ if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) { -+ if (out_count == 0) -+ return -EPIPE; -+ goto out; -+ } -+ -+ if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0) -+ return -EFAULT; -+ -+ buf += sizeof(uevent); -+ out_count++; -+ count -= sizeof(uevent); -+ } while (count >= sizeof(uevent)); ++ struct kbasep_js_device_data *js_devdata; ++ struct kbasep_js_kctx_info *js_kctx_info; + -+ out: -+ return out_count * sizeof(uevent); -+} -+#endif /* MALI_USE_CSF */ ++ js_devdata = &kbdev->js_data; ++ js_kctx_info = &kctx->jctx.sched_info; + -+static __poll_t kbase_poll(struct file *filp, poll_table *wait) -+{ -+ struct kbase_file *const kfile = filp->private_data; -+ struct kbase_context *const kctx = -+ kbase_file_get_kctx_if_setup_complete(kfile); ++ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); ++ lockdep_assert_held(&js_devdata->runpool_mutex); + -+ if (unlikely(!kctx)) { -+#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) -+ return POLLERR; -+#else -+ return EPOLLERR; -+#endif -+ } ++ /* Track total contexts */ ++ --(js_devdata->nr_all_contexts_running); ++ WARN_ON_ONCE(js_devdata->nr_all_contexts_running < 0); + -+ poll_wait(filp, &kctx->event_queue, wait); -+ if (kbase_event_pending(kctx)) { -+#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) -+ return POLLIN | POLLRDNORM; -+#else -+ return EPOLLIN | EPOLLRDNORM; -+#endif ++ if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { ++ /* Track contexts that can submit jobs */ ++ --(js_devdata->nr_user_contexts_running); ++ WARN_ON_ONCE(js_devdata->nr_user_contexts_running < 0); + } -+ -+ return 0; -+} -+ -+void kbase_event_wakeup(struct kbase_context *kctx) -+{ -+ KBASE_DEBUG_ASSERT(kctx); -+ dev_dbg(kctx->kbdev->dev, "Waking event queue for context %pK\n", -+ (void *)kctx); -+ wake_up_interruptible(&kctx->event_queue); -+} -+ -+KBASE_EXPORT_TEST_API(kbase_event_wakeup); -+ -+#if MALI_USE_CSF -+int kbase_event_pending(struct kbase_context *ctx) -+{ -+ KBASE_DEBUG_ASSERT(ctx); -+ -+ if (unlikely(!ctx)) -+ return -EPERM; -+ -+ return (atomic_read(&ctx->event_count) != 0) || -+ kbase_csf_event_error_pending(ctx) || -+ kbase_csf_cpu_queue_dump_needed(ctx); -+} -+#else -+int kbase_event_pending(struct kbase_context *ctx) -+{ -+ KBASE_DEBUG_ASSERT(ctx); -+ -+ if (unlikely(!ctx)) -+ return -EPERM; -+ -+ return (atomic_read(&ctx->event_count) != 0) || -+ (atomic_read(&ctx->event_closed) != 0); +} -+#endif -+ -+KBASE_EXPORT_TEST_API(kbase_event_pending); + -+static int kbase_mmap(struct file *const filp, struct vm_area_struct *const vma) ++/** ++ * kbase_js_sched_all - Submit atoms from all available contexts to all ++ * job slots. ++ * ++ * @kbdev: Device pointer ++ * ++ * This will attempt to submit as many jobs as possible. It will exit when ++ * either all job slots are full, or all contexts have been used. ++ */ ++static inline void kbase_js_sched_all(struct kbase_device *kbdev) +{ -+ struct kbase_file *const kfile = filp->private_data; -+ struct kbase_context *const kctx = -+ kbase_file_get_kctx_if_setup_complete(kfile); -+ -+ if (unlikely(!kctx)) -+ return -EPERM; -+ -+ return kbase_context_mmap(kctx, vma); ++ kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); +} + -+static int kbase_check_flags(int flags) -+{ -+ /* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always -+ * closes the file descriptor in a child process. -+ */ -+ if (0 == (flags & O_CLOEXEC)) -+ return -EINVAL; ++extern const int ++kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS]; + -+ return 0; -+} ++extern const base_jd_prio ++kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + -+static unsigned long kbase_get_unmapped_area(struct file *const filp, -+ const unsigned long addr, const unsigned long len, -+ const unsigned long pgoff, const unsigned long flags) ++/** ++ * kbasep_js_atom_prio_to_sched_prio - Convert atom priority (base_jd_prio) ++ * to relative ordering. ++ * @atom_prio: Priority ID to translate. ++ * ++ * Atom priority values for @ref base_jd_prio cannot be compared directly to ++ * find out which are higher or lower. ++ * ++ * This function will convert base_jd_prio values for successively lower ++ * priorities into a monotonically increasing sequence. That is, the lower the ++ * base_jd_prio priority, the higher the value produced by this function. This ++ * is in accordance with how the rest of the kernel treats priority. ++ * ++ * The mapping is 1:1 and the size of the valid input range is the same as the ++ * size of the valid output range, i.e. ++ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS ++ * ++ * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions ++ * ++ * Return: On success: a value in the inclusive range ++ * 0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure: ++ * KBASE_JS_ATOM_SCHED_PRIO_INVALID ++ */ ++static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio) +{ -+ struct kbase_file *const kfile = filp->private_data; -+ struct kbase_context *const kctx = -+ kbase_file_get_kctx_if_setup_complete(kfile); -+ -+ if (unlikely(!kctx)) -+ return -EPERM; ++ if (atom_prio >= BASE_JD_NR_PRIO_LEVELS) ++ return KBASE_JS_ATOM_SCHED_PRIO_INVALID; + -+ return kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags); ++ return kbasep_js_atom_priority_to_relative[atom_prio]; +} + -+static const struct file_operations kbase_fops = { -+ .owner = THIS_MODULE, -+ .open = kbase_open, -+ .release = kbase_release, -+ .read = kbase_read, -+ .poll = kbase_poll, -+ .unlocked_ioctl = kbase_ioctl, -+ .compat_ioctl = kbase_ioctl, -+ .mmap = kbase_mmap, -+ .check_flags = kbase_check_flags, -+ .get_unmapped_area = kbase_get_unmapped_area, -+}; -+ +/** -+ * power_policy_show - Show callback for the power_policy sysfs file. ++ * kbasep_js_sched_prio_to_atom_prio - Convert relative scheduler priority ++ * to atom priority (base_jd_prio). + * -+ * @dev: The device this sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The output buffer for the sysfs file contents ++ * @kbdev: Device pointer ++ * @sched_prio: Relative scheduler priority to translate. + * -+ * This function is called to get the contents of the power_policy sysfs -+ * file. This is a list of the available policies with the currently active one -+ * surrounded by square brackets. ++ * This function will convert relative scheduler priority back into base_jd_prio ++ * values. It takes values which priorities are monotonically increasing ++ * and converts them to the corresponding base_jd_prio values. If an invalid number is ++ * passed in (i.e. not within the expected range) an error code is returned instead. + * -+ * Return: The number of bytes output to @buf. ++ * The mapping is 1:1 and the size of the valid input range is the same as the ++ * size of the valid output range, i.e. ++ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS ++ * ++ * Return: On success: a value in the inclusive range ++ * 0..BASE_JD_NR_PRIO_LEVELS-1. On failure: BASE_JD_PRIO_INVALID. + */ -+static ssize_t power_policy_show(struct device *dev, struct device_attribute *attr, char *const buf) ++static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(struct kbase_device *kbdev, ++ int sched_prio) +{ -+ struct kbase_device *kbdev; -+ const struct kbase_pm_policy *current_policy; -+ const struct kbase_pm_policy *const *policy_list; -+ int policy_count; -+ int i; -+ ssize_t ret = 0; -+ -+ kbdev = to_kbase_device(dev); -+ -+ if (!kbdev) -+ return -ENODEV; -+ -+ current_policy = kbase_pm_get_policy(kbdev); -+ -+ policy_count = kbase_pm_list_policies(kbdev, &policy_list); -+ -+ for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) { -+ if (policy_list[i] == current_policy) -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name); -+ else -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name); -+ } -+ -+ if (ret < PAGE_SIZE - 1) { -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); -+ } else { -+ buf[PAGE_SIZE - 2] = '\n'; -+ buf[PAGE_SIZE - 1] = '\0'; -+ ret = PAGE_SIZE - 1; -+ } -+ -+ return ret; ++ if (likely(sched_prio >= 0 && sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT)) ++ return kbasep_js_relative_priority_to_atom[sched_prio]; ++ /* Invalid priority value if reached here */ ++ dev_warn(kbdev->dev, "Unknown JS scheduling priority %d", sched_prio); ++ return BASE_JD_PRIO_INVALID; +} + +/** -+ * power_policy_store - Store callback for the power_policy sysfs file. ++ * kbase_js_priority_check - Check the priority requested + * -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes to write to the sysfs file ++ * @kbdev: Device pointer ++ * @priority: Requested priority + * -+ * This function is called when the power_policy sysfs file is written to. -+ * It matches the requested policy against the available policies and if a -+ * matching policy is found calls kbase_pm_set_policy() to change the -+ * policy. ++ * This will determine whether the requested priority can be satisfied. + * -+ * Return: @count if the function succeeded. An error code on failure. ++ * Return: The same or lower priority than requested. + */ -+static ssize_t power_policy_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ const struct kbase_pm_policy *new_policy = NULL; -+ const struct kbase_pm_policy *const *policy_list; -+ int policy_count; -+ int i; -+ -+ kbdev = to_kbase_device(dev); -+ -+ if (!kbdev) -+ return -ENODEV; -+ -+ policy_count = kbase_pm_list_policies(kbdev, &policy_list); -+ -+ for (i = 0; i < policy_count; i++) { -+ if (sysfs_streq(policy_list[i]->name, buf)) { -+ new_policy = policy_list[i]; -+ break; -+ } -+ } -+ -+ if (!new_policy) { -+ dev_err(dev, "power_policy: policy not found\n"); -+ return -EINVAL; -+ } -+ -+ kbase_pm_set_policy(kbdev, new_policy); -+ -+ return count; -+} ++base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio priority); + -+/* -+ * The sysfs file power_policy. ++/** ++ * kbase_js_atom_runs_before - determine if atoms for the same slot have an ++ * ordering relation ++ * @kbdev: kbase device ++ * @katom_a: the first atom ++ * @katom_b: the second atom. ++ * @order_flags: combination of KBASE_ATOM_ORDERING_FLAG_<...> for the ordering ++ * relation + * -+ * This is used for obtaining information about the available policies, -+ * determining which policy is currently active, and changing the active -+ * policy. ++ * This is for making consistent decisions about the ordering of atoms when we ++ * need to do pre-emption on a slot, which includes stopping existing atoms ++ * when a new atom is ready to run, and also which other atoms to remove from ++ * the slot when the atom in JSn_HEAD is being pre-empted. ++ * ++ * This only handles @katom_a and @katom_b being for the same job slot, as ++ * pre-emption only operates within a slot. ++ * ++ * Note: there is currently no use-case for this as a sorting comparison ++ * functions, hence only a boolean returned instead of int -1, 0, +1 return. If ++ * required in future, a modification to do so would be better than calling ++ * twice with katom_a and katom_b swapped. ++ * ++ * Return: ++ * true if @katom_a should run before @katom_b, false otherwise. ++ * A false return value does not distinguish between "no ordering relation" and ++ * "@katom_a should run after @katom_b". + */ -+static DEVICE_ATTR_RW(power_policy); ++bool kbase_js_atom_runs_before(struct kbase_device *kbdev, ++ const struct kbase_jd_atom *katom_a, ++ const struct kbase_jd_atom *katom_b, ++ const kbase_atom_ordering_flag_t order_flags); + ++#endif /* _KBASE_JM_JS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h +new file mode 100644 +index 000000000..465cf7ec0 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h +@@ -0,0 +1,447 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* -+ * core_mask_show - Show callback for the core_mask sysfs file. + * -+ * @dev: The device this sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The output buffer for the sysfs file contents ++ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. + * -+ * This function is called to get the contents of the core_mask sysfs file. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: The number of bytes output to @buf. + */ -+static ssize_t core_mask_show(struct device *dev, struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *kbdev; -+ unsigned long flags; -+ ssize_t ret = 0; + -+ kbdev = to_kbase_device(dev); ++/** ++ * DOC: Job Scheduler Type Definitions ++ */ + -+ if (!kbdev) -+ return -ENODEV; ++#ifndef _KBASE_JS_DEFS_H_ ++#define _KBASE_JS_DEFS_H_ + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++/* Forward decls */ ++struct kbase_device; ++struct kbase_jd_atom; + -+#if MALI_USE_CSF -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, -+ "Current debug core mask : 0x%llX\n", -+ kbdev->pm.debug_core_mask); -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, -+ "Current desired core mask : 0x%llX\n", -+ kbase_pm_ca_get_core_mask(kbdev)); -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, -+ "Current in use core mask : 0x%llX\n", -+ kbdev->pm.backend.shaders_avail); -+#else -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, -+ "Current core mask (JS0) : 0x%llX\n", -+ kbdev->pm.debug_core_mask[0]); -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, -+ "Current core mask (JS1) : 0x%llX\n", -+ kbdev->pm.debug_core_mask[1]); -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, -+ "Current core mask (JS2) : 0x%llX\n", -+ kbdev->pm.debug_core_mask[2]); -+#endif /* MALI_USE_CSF */ + -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, -+ "Available core mask : 0x%llX\n", -+ kbdev->gpu_props.props.raw_props.shader_present); ++typedef u32 kbase_context_flags; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++/* ++ * typedef kbasep_js_ctx_job_cb - Callback function run on all of a context's ++ * jobs registered with the Job Scheduler ++ */ ++typedef void kbasep_js_ctx_job_cb(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom); + -+ return ret; -+} ++/* ++ * @brief Maximum number of jobs that can be submitted to a job slot whilst ++ * inside the IRQ handler. ++ * ++ * This is important because GPU NULL jobs can complete whilst the IRQ handler ++ * is running. Otherwise, it potentially allows an unlimited number of GPU NULL ++ * jobs to be submitted inside the IRQ handler, which increases IRQ latency. ++ */ ++#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2 + +/** -+ * core_mask_store - Store callback for the core_mask sysfs file. ++ * enum kbasep_js_ctx_attr - Context attributes ++ * @KBASEP_JS_CTX_ATTR_COMPUTE: Attribute indicating a context that contains ++ * Compute jobs. ++ * @KBASEP_JS_CTX_ATTR_NON_COMPUTE: Attribute indicating a context that contains ++ * Non-Compute jobs. ++ * @KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: Attribute indicating that a context ++ * contains compute-job atoms that aren't ++ * restricted to a coherent group, ++ * and can run on all cores. ++ * @KBASEP_JS_CTX_ATTR_COUNT: Must be the last in the enum + * -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes to write to the sysfs file ++ * Each context attribute can be thought of as a boolean value that caches some ++ * state information about either the runpool, or the context: ++ * - In the case of the runpool, it is a cache of "Do any contexts owned by ++ * the runpool have attribute X?" ++ * - In the case of a context, it is a cache of "Do any atoms owned by the ++ * context have attribute X?" + * -+ * This function is called when the core_mask sysfs file is written to. ++ * The boolean value of the context attributes often affect scheduling ++ * decisions, such as affinities to use and job slots to use. ++ * ++ * To accomodate changes of state in the context, each attribute is refcounted ++ * in the context, and in the runpool for all running contexts. Specifically: ++ * - The runpool holds a refcount of how many contexts in the runpool have this ++ * attribute. ++ * - The context holds a refcount of how many atoms have this attribute. ++ * ++ * KBASEP_JS_CTX_ATTR_COMPUTE: ++ * Attribute indicating a context that contains Compute jobs. That is, ++ * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE ++ * ++ * @note A context can be both 'Compute' and 'Non Compute' if it contains ++ * both types of jobs. ++ * ++ * KBASEP_JS_CTX_ATTR_NON_COMPUTE: ++ * Attribute indicating a context that contains Non-Compute jobs. That is, ++ * the context has some jobs that are \b not of type @ref ++ * BASE_JD_REQ_ONLY_COMPUTE. ++ * ++ * @note A context can be both 'Compute' and 'Non Compute' if it contains ++ * both types of jobs. ++ * ++ * KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: ++ * Attribute indicating that a context contains compute-job atoms that ++ * aren't restricted to a coherent group, and can run on all cores. ++ * ++ * Specifically, this is when the atom's \a core_req satisfy: ++ * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2 ++ * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups ++ * ++ * Such atoms could be blocked from running if one of the coherent groups ++ * is being used by another job slot, so tracking this context attribute ++ * allows us to prevent such situations. ++ * ++ * @note This doesn't take into account the 1-coregroup case, where all ++ * compute atoms would effectively be able to run on 'all cores', but ++ * contexts will still not always get marked with this attribute. Instead, ++ * it is the caller's responsibility to take into account the number of ++ * coregroups when interpreting this attribute. ++ * ++ * @note Whilst Tiler atoms are normally combined with ++ * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without ++ * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy ++ * enough to handle anyway. + * -+ * Return: @count if the function succeeded. An error code on failure. + */ -+static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+#if MALI_USE_CSF -+ u64 new_core_mask; -+#else -+ u64 new_core_mask[3]; -+ u64 group0_core_mask; -+ int i; -+#endif /* MALI_USE_CSF */ -+ -+ int items; -+ ssize_t err = count; -+ unsigned long flags; -+ u64 shader_present; -+ -+ kbdev = to_kbase_device(dev); -+ -+ if (!kbdev) -+ return -ENODEV; -+ -+#if MALI_USE_CSF -+ items = sscanf(buf, "%llx", &new_core_mask); -+ -+ if (items != 1) { -+ dev_err(kbdev->dev, -+ "Couldn't process core mask write operation.\n" -+ "Use format \n"); -+ err = -EINVAL; -+ goto end; -+ } -+#else -+ items = sscanf(buf, "%llx %llx %llx", -+ &new_core_mask[0], &new_core_mask[1], -+ &new_core_mask[2]); -+ -+ if (items != 1 && items != 3) { -+ dev_err(kbdev->dev, "Couldn't process core mask write operation.\n" -+ "Use format \n" -+ "or \n"); -+ err = -EINVAL; -+ goto end; -+ } -+ -+ if (items == 1) -+ new_core_mask[1] = new_core_mask[2] = new_core_mask[0]; -+#endif -+ -+ mutex_lock(&kbdev->pm.lock); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++enum kbasep_js_ctx_attr { ++ KBASEP_JS_CTX_ATTR_COMPUTE, ++ KBASEP_JS_CTX_ATTR_NON_COMPUTE, ++ KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, ++ KBASEP_JS_CTX_ATTR_COUNT ++}; + -+ shader_present = kbdev->gpu_props.props.raw_props.shader_present; ++enum { ++ /* ++ * Bit indicating that new atom should be started because this atom ++ * completed ++ */ ++ KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0), ++ /* ++ * Bit indicating that the atom was evicted from the JS_NEXT registers ++ */ ++ KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1) ++}; + -+#if MALI_USE_CSF -+ if ((new_core_mask & shader_present) != new_core_mask) { -+ dev_err(dev, -+ "Invalid core mask 0x%llX: Includes non-existent cores (present = 0x%llX)", -+ new_core_mask, shader_present); -+ err = -EINVAL; -+ goto unlock; ++/** ++ * typedef kbasep_js_atom_done_code - Combination of KBASE_JS_ATOM_DONE_<...> ++ * bits ++ */ ++typedef u32 kbasep_js_atom_done_code; + -+ } else if (!(new_core_mask & shader_present & -+ kbdev->pm.backend.ca_cores_enabled)) { -+ dev_err(dev, -+ "Invalid core mask 0x%llX: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", -+ new_core_mask, -+ kbdev->gpu_props.props.raw_props.shader_present, -+ kbdev->pm.backend.ca_cores_enabled); -+ err = -EINVAL; -+ goto unlock; -+ } ++/* ++ * Context scheduling mode defines for kbase_device::js_ctx_scheduling_mode ++ */ ++enum { ++ /* ++ * In this mode, higher priority atoms will be scheduled first, ++ * regardless of the context they belong to. Newly-runnable higher ++ * priority atoms can preempt lower priority atoms currently running on ++ * the GPU, even if they belong to a different context. ++ */ ++ KBASE_JS_SYSTEM_PRIORITY_MODE = 0, + -+ if (kbdev->pm.debug_core_mask != new_core_mask) -+ kbase_pm_set_debug_core_mask(kbdev, new_core_mask); -+#else -+ group0_core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; ++ /* ++ * In this mode, the highest-priority atom will be chosen from each ++ * context in turn using a round-robin algorithm, so priority only has ++ * an effect within the context an atom belongs to. Newly-runnable ++ * higher priority atoms can preempt the lower priority atoms currently ++ * running on the GPU, but only if they belong to the same context. ++ */ ++ KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE, + -+ for (i = 0; i < 3; ++i) { -+ if ((new_core_mask[i] & shader_present) != new_core_mask[i]) { -+ dev_err(dev, "Invalid core mask 0x%llX for JS %d: Includes non-existent cores (present = 0x%llX)", -+ new_core_mask[i], i, shader_present); -+ err = -EINVAL; -+ goto unlock; ++ /* Must be the last in the enum */ ++ KBASE_JS_PRIORITY_MODE_COUNT, ++}; + -+ } else if (!(new_core_mask[i] & shader_present & kbdev->pm.backend.ca_cores_enabled)) { -+ dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", -+ new_core_mask[i], i, -+ kbdev->gpu_props.props.raw_props.shader_present, -+ kbdev->pm.backend.ca_cores_enabled); -+ err = -EINVAL; -+ goto unlock; ++/* ++ * Internal atom priority defines for kbase_jd_atom::sched_prio ++ */ ++enum { ++ KBASE_JS_ATOM_SCHED_PRIO_FIRST = 0, ++ KBASE_JS_ATOM_SCHED_PRIO_REALTIME = KBASE_JS_ATOM_SCHED_PRIO_FIRST, ++ KBASE_JS_ATOM_SCHED_PRIO_HIGH, ++ KBASE_JS_ATOM_SCHED_PRIO_MED, ++ KBASE_JS_ATOM_SCHED_PRIO_LOW, ++ KBASE_JS_ATOM_SCHED_PRIO_COUNT, ++}; + -+ } else if (!(new_core_mask[i] & group0_core_mask)) { -+ dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with group 0 core mask 0x%llX\n", -+ new_core_mask[i], i, group0_core_mask); -+ err = -EINVAL; -+ goto unlock; -+ } else if (!(new_core_mask[i] & kbdev->gpu_props.curr_config.shader_present)) { -+ dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with current core mask 0x%llX\n", -+ new_core_mask[i], i, kbdev->gpu_props.curr_config.shader_present); -+ err = -EINVAL; -+ goto unlock; -+ } -+ } ++/* Invalid priority for kbase_jd_atom::sched_prio */ ++#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1 + -+ if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] || -+ kbdev->pm.debug_core_mask[1] != -+ new_core_mask[1] || -+ kbdev->pm.debug_core_mask[2] != -+ new_core_mask[2]) { ++/* Default priority in the case of contexts with no atoms, or being lenient ++ * about invalid priorities from userspace. ++ */ ++#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED + -+ kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], -+ new_core_mask[1], new_core_mask[2]); -+ } -+#endif /* MALI_USE_CSF */ ++/* Atom priority bitmaps, where bit 0 is the highest priority, and higher bits ++ * indicate successively lower KBASE_JS_ATOM_SCHED_PRIO_<...> levels. ++ * ++ * Must be strictly larger than the number of bits to represent a bitmap of ++ * priorities, so that we can do calculations such as: ++ * (1 << KBASE_JS_ATOM_SCHED_PRIO_COUNT) - 1 ++ * ...without causing undefined behavior due to a shift beyond the width of the ++ * type ++ * ++ * If KBASE_JS_ATOM_SCHED_PRIO_COUNT starts requiring 32 bits, then it's worth ++ * moving to DECLARE_BITMAP() ++ */ ++typedef u8 kbase_js_prio_bitmap_t; + -+unlock: -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->pm.lock); -+end: -+ return err; -+} ++/* Ordering modification for kbase_js_atom_runs_before() */ ++typedef u32 kbase_atom_ordering_flag_t; + -+/* -+ * The sysfs file core_mask. ++/* Atoms of the same context and priority should have their ordering decided by ++ * their seq_nr instead of their age. + * -+ * This is used to restrict shader core availability for debugging purposes. -+ * Reading it will show the current core mask and the mask of cores available. -+ * Writing to it will set the current core mask. ++ * seq_nr is used as a more slowly changing variant of age - it increases once ++ * per group of related atoms, as determined by user-space. Hence, it can be ++ * used to limit re-ordering decisions (such as pre-emption) to only re-order ++ * between such groups, rather than re-order within those groups of atoms. + */ -+static DEVICE_ATTR_RW(core_mask); ++#define KBASE_ATOM_ORDERING_FLAG_SEQNR (((kbase_atom_ordering_flag_t)1) << 0) + -+#if !MALI_USE_CSF +/** -+ * soft_job_timeout_store - Store callback for the soft_job_timeout sysfs -+ * file. -+ * -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The value written to the sysfs file. -+ * @count: The number of bytes to write to the sysfs file. -+ * -+ * This allows setting the timeout for software jobs. Waiting soft event wait -+ * jobs will be cancelled after this period expires, while soft fence wait jobs -+ * will print debug information if the fence debug feature is enabled. ++ * struct kbasep_js_device_data - KBase Device Data Job Scheduler sub-structure ++ * @runpool_irq: Sub-structure to collect together Job Scheduling data used in ++ * IRQ context. The hwaccess_lock must be held when accessing. ++ * @runpool_irq.submit_allowed: Bitvector indicating whether a currently ++ * scheduled context is allowed to submit jobs. ++ * When bit 'N' is set in this, it indicates whether ++ * the context bound to address space 'N' is ++ * allowed to submit jobs. ++ * @runpool_irq.ctx_attr_ref_count: Array of Context Attributes Ref_counters: ++ * Each is large enough to hold a refcount of the number of contexts ++ * that can fit into the runpool. This is currently BASE_MAX_NR_AS. ++ * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store ++ * the refcount. Hence, it's not worthwhile reducing this to ++ * bit-manipulation on u32s to save space (where in contrast, 4 bit ++ * sub-fields would be easy to do and would save space). ++ * Whilst this must not become negative, the sign bit is used for: ++ * - error detection in debug builds ++ * - Optimization: it is undefined for a signed int to overflow, and so ++ * the compiler can optimize for that never happening (thus, no masking ++ * is required on updating the variable) ++ * @runpool_irq.slot_affinities: Affinity management and tracking. Bitvector ++ * to aid affinity checking. ++ * Element 'n' bit 'i' indicates that slot 'n' ++ * is using core i (i.e. slot_affinity_refcount[n][i] > 0) ++ * @runpool_irq.slot_affinity_refcount: Array of fefcount for each core owned ++ * by each slot. Used to generate the slot_affinities array of bitvectors. ++ * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS, ++ * because it is refcounted only when a job is definitely about to be ++ * submitted to a slot, and is de-refcounted immediately after a job ++ * finishes ++ * @schedule_sem: Scheduling semaphore. This must be held when calling ++ * kbase_jm_kick() ++ * @ctx_list_pullable: List of contexts that can currently be pulled from ++ * @ctx_list_unpullable: List of contexts that can not currently be pulled ++ * from, but have jobs currently running. ++ * @nr_user_contexts_running: Number of currently scheduled user contexts ++ * (excluding ones that are not submitting jobs) ++ * @nr_all_contexts_running: Number of currently scheduled contexts (including ++ * ones that are not submitting jobs) ++ * @js_reqs: Core Requirements to match up with base_js_atom's core_req memeber ++ * @note This is a write-once member, and so no locking is required to ++ * read ++ * @scheduling_period_ns: Value for JS_SCHEDULING_PERIOD_NS ++ * @soft_stop_ticks: Value for JS_SOFT_STOP_TICKS ++ * @soft_stop_ticks_cl: Value for JS_SOFT_STOP_TICKS_CL ++ * @hard_stop_ticks_ss: Value for JS_HARD_STOP_TICKS_SS ++ * @hard_stop_ticks_cl: Value for JS_HARD_STOP_TICKS_CL ++ * @hard_stop_ticks_dumping: Value for JS_HARD_STOP_TICKS_DUMPING ++ * @gpu_reset_ticks_ss: Value for JS_RESET_TICKS_SS ++ * @gpu_reset_ticks_cl: Value for JS_RESET_TICKS_CL ++ * @gpu_reset_ticks_dumping: Value for JS_RESET_TICKS_DUMPING ++ * @ctx_timeslice_ns: Value for JS_CTX_TIMESLICE_NS ++ * @suspended_soft_jobs_list: List of suspended soft jobs ++ * @softstop_always: Support soft-stop on a single context ++ * @init_status:The initialized-flag is placed at the end, to avoid ++ * cache-pollution (we should only be using this during init/term paths). ++ * @note This is a write-once member, and so no locking is required to ++ * read ++ * @nr_contexts_pullable:Number of contexts that can currently be pulled from ++ * @nr_contexts_runnable:Number of contexts that can either be pulled from or ++ * arecurrently running ++ * @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT ++ * @js_free_wait_time_ms: Maximum waiting time in ms for a Job Slot to be seen free. ++ * @queue_mutex: Queue Lock, used to access the Policy's queue of contexts ++ * independently of the Run Pool. ++ * Of course, you don't need the Run Pool lock to access this. ++ * @runpool_mutex: Run Pool mutex, for managing contexts within the runpool. + * -+ * This is expressed in milliseconds. ++ * This encapsulates the current context of the Job Scheduler on a particular ++ * device. This context is global to the device, and is not tied to any ++ * particular struct kbase_context running on the device. + * -+ * Return: count if the function succeeded. An error code on failure. ++ * nr_contexts_running and as_free are optimized for packing together (by making ++ * them smaller types than u32). The operations on them should rarely involve ++ * masking. The use of signed types for arithmetic indicates to the compiler ++ * that the value will not rollover (which would be undefined behavior), and so ++ * under the Total License model, it is free to make optimizations based on ++ * that (i.e. to remove masking). + */ -+static ssize_t soft_job_timeout_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ int soft_job_timeout_ms; ++struct kbasep_js_device_data { ++ struct runpool_irq { ++ u16 submit_allowed; ++ s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; ++ u64 slot_affinities[BASE_JM_MAX_NR_SLOTS]; ++ s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64]; ++ } runpool_irq; ++ struct semaphore schedule_sem; ++ struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS] ++ [KBASE_JS_ATOM_SCHED_PRIO_COUNT]; ++ struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS] ++ [KBASE_JS_ATOM_SCHED_PRIO_COUNT]; ++ s8 nr_user_contexts_running; ++ s8 nr_all_contexts_running; ++ base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS]; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++ u32 scheduling_period_ns; ++ u32 soft_stop_ticks; ++ u32 soft_stop_ticks_cl; ++ u32 hard_stop_ticks_ss; ++ u32 hard_stop_ticks_cl; ++ u32 hard_stop_ticks_dumping; ++ u32 gpu_reset_ticks_ss; ++ u32 gpu_reset_ticks_cl; ++ u32 gpu_reset_ticks_dumping; ++ u32 ctx_timeslice_ns; + -+ if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) || -+ (soft_job_timeout_ms <= 0)) -+ return -EINVAL; ++ struct list_head suspended_soft_jobs_list; + -+ atomic_set(&kbdev->js_data.soft_job_timeout_ms, -+ soft_job_timeout_ms); ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ bool softstop_always; ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++ int init_status; ++ u32 nr_contexts_pullable; ++ atomic_t nr_contexts_runnable; ++ atomic_t soft_job_timeout_ms; ++ u32 js_free_wait_time_ms; + -+ return count; -+} ++ struct mutex queue_mutex; ++ /* ++ * Run Pool mutex, for managing contexts within the runpool. ++ * Unless otherwise specified, you must hold this lock whilst accessing ++ * any members that follow ++ * ++ * In addition, this is used to access: ++ * * the kbasep_js_kctx_info::runpool substructure ++ */ ++ struct mutex runpool_mutex; ++}; + +/** -+ * soft_job_timeout_show - Show callback for the soft_job_timeout sysfs -+ * file. -+ * -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer for the sysfs file contents. -+ * -+ * This will return the timeout for the software jobs. ++ * struct kbasep_js_kctx_info - KBase Context Job Scheduling information ++ * structure ++ * @ctx: Job Scheduler Context information sub-structure.Its members are ++ * accessed regardless of whether the context is: ++ * - In the Policy's Run Pool ++ * - In the Policy's Queue ++ * - Not queued nor in the Run Pool. ++ * You must obtain the @ctx.jsctx_mutex before accessing any other members ++ * of this substructure. ++ * You may not access any of its members from IRQ context. ++ * @ctx.jsctx_mutex: Job Scheduler Context lock ++ * @ctx.nr_jobs: Number of jobs ready to run - does \em not include ++ * the jobs waiting in the dispatcher, and dependency-only ++ * jobs. See kbase_jd_context::job_nr for such jobs ++ * @ctx.ctx_attr_ref_count: Context Attributes ref count. Each is large enough ++ * to hold a refcount of the number of atoms on the context. ++ * @ctx.is_scheduled_wait: Wait queue to wait for KCTX_SHEDULED flag state ++ * changes. ++ * @ctx.ctx_list_entry: Link implementing JS queues. Context can be present on ++ * one list per job slot. ++ * @init_status: The initalized-flag is placed at the end, to avoid ++ * cache-pollution (we should only be using this during init/term paths) + * -+ * Return: The number of bytes output to buf. ++ * This is a substructure in the struct kbase_context that encapsulates all the ++ * scheduling information. + */ -+static ssize_t soft_job_timeout_show(struct device *dev, -+ struct device_attribute *attr, -+ char * const buf) -+{ -+ struct kbase_device *kbdev; ++struct kbasep_js_kctx_info { ++ struct kbase_jsctx { ++ struct mutex jsctx_mutex; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++ u32 nr_jobs; ++ u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; ++ wait_queue_head_t is_scheduled_wait; ++ struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS]; ++ } ctx; ++ int init_status; ++}; + -+ return scnprintf(buf, PAGE_SIZE, "%i\n", -+ atomic_read(&kbdev->js_data.soft_job_timeout_ms)); -+} ++/** ++ * struct kbasep_js_atom_retained_state - Subset of atom state. ++ * @event_code: to determine whether the atom has finished ++ * @core_req: core requirements ++ * @sched_priority: priority ++ * @device_nr: Core group atom was executed on ++ * ++ * Subset of atom state that can be available after kbase_jd_done_nolock() is called ++ * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(), ++ * because the original atom could disappear. ++ */ ++struct kbasep_js_atom_retained_state { ++ /* Event code - to determine whether the atom has finished */ ++ enum base_jd_event_code event_code; ++ /* core requirements */ ++ base_jd_core_req core_req; ++ /* priority */ ++ int sched_priority; ++ /* Core group atom was executed on */ ++ u32 device_nr; + -+static DEVICE_ATTR_RW(soft_job_timeout); ++}; + -+static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms, -+ int default_ticks, u32 old_ticks) -+{ -+ if (timeout_ms > 0) { -+ u64 ticks = timeout_ms * 1000000ULL; ++/* ++ * Value signifying 'no retry on a slot required' for: ++ * - kbase_js_atom_retained_state::retry_submit_on_slot ++ * - kbase_jd_atom::retry_submit_on_slot ++ */ ++#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1) + -+ do_div(ticks, kbdev->js_data.scheduling_period_ns); -+ if (!ticks) -+ return 1; -+ return ticks; -+ } else if (timeout_ms < 0) { -+ return default_ticks; -+ } else { -+ return old_ticks; -+ } -+} ++/* ++ * base_jd_core_req value signifying 'invalid' for a ++ * kbase_jd_atom_retained_state. See kbase_atom_retained_state_is_valid() ++ */ ++#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP ++ ++/* ++ * The JS timer resolution, in microseconds ++ * Any non-zero difference in time will be at least this size. ++ */ ++#define KBASEP_JS_TICK_RESOLUTION_US 1 + +/** -+ * js_timeouts_store - Store callback for the js_timeouts sysfs file. ++ * struct kbase_jsctx_slot_tracking - Job Scheduling tracking of a context's ++ * use of a job slot ++ * @blocked: bitmap of priorities that this slot is blocked at ++ * @atoms_pulled: counts of atoms that have been pulled from this slot, ++ * across all priority levels ++ * @atoms_pulled_pri: counts of atoms that have been pulled from this slot, per ++ * priority level + * -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes to write to the sysfs file ++ * Controls how a slot from the &struct kbase_context's jsctx_queue is managed, ++ * for example to ensure correct ordering of atoms when atoms of different ++ * priorities are unpulled. ++ */ ++struct kbase_jsctx_slot_tracking { ++ kbase_js_prio_bitmap_t blocked; ++ atomic_t atoms_pulled; ++ int atoms_pulled_pri[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; ++}; ++ ++#endif /* _KBASE_JS_DEFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h +new file mode 100644 +index 000000000..11aedef80 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h +@@ -0,0 +1,185 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * This function is called to get the contents of the js_timeouts sysfs -+ * file. This file contains five values separated by whitespace. The values -+ * are basically the same as %JS_SOFT_STOP_TICKS, %JS_HARD_STOP_TICKS_SS, -+ * %JS_HARD_STOP_TICKS_DUMPING, %JS_RESET_TICKS_SS, %JS_RESET_TICKS_DUMPING -+ * configuration values (in that order), with the difference that the js_timeout -+ * values are expressed in MILLISECONDS. ++ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * -+ * The js_timeouts sysfile file allows the current values in -+ * use by the job scheduler to get override. Note that a value needs to -+ * be other than 0 for it to override the current job scheduler value. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: @count if the function succeeded. An error code on failure. + */ -+static ssize_t js_timeouts_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ int items; -+ long js_soft_stop_ms; -+ long js_soft_stop_ms_cl; -+ long js_hard_stop_ms_ss; -+ long js_hard_stop_ms_cl; -+ long js_hard_stop_ms_dumping; -+ long js_reset_ms_ss; -+ long js_reset_ms_cl; -+ long js_reset_ms_dumping; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, ++ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py ++ * For more information see base/tools/hwconfig_generator/README ++ */ + -+ items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld", -+ &js_soft_stop_ms, &js_soft_stop_ms_cl, -+ &js_hard_stop_ms_ss, &js_hard_stop_ms_cl, -+ &js_hard_stop_ms_dumping, &js_reset_ms_ss, -+ &js_reset_ms_cl, &js_reset_ms_dumping); ++#ifndef _BASE_HWCONFIG_FEATURES_H_ ++#define _BASE_HWCONFIG_FEATURES_H_ + -+ if (items == 8) { -+ struct kbasep_js_device_data *js_data = &kbdev->js_data; -+ unsigned long flags; ++enum base_hw_feature { ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_TLS_HASHING, ++ BASE_HW_FEATURE_THREAD_GROUP_SPLIT, ++ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, ++ BASE_HW_FEATURE_IDVS_GROUP_SIZE, ++ BASE_HW_FEATURE_L2_CONFIG, ++ BASE_HW_FEATURE_ASN_HASH, ++ BASE_HW_FEATURE_GPU_SLEEP, ++ BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, ++ BASE_HW_FEATURE_CORE_FEATURES, ++ BASE_HW_FEATURE_PBHA_HWU, ++ BASE_HW_FEATURE_LARGE_PAGE_ALLOC, ++ BASE_HW_FEATURE_END ++}; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++__attribute__((unused)) static const enum base_hw_feature base_hw_features_generic[] = { ++ BASE_HW_FEATURE_END ++}; + -+#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\ -+ js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \ -+ default, js_data->ticks_name); \ -+ dev_dbg(kbdev->dev, "Overriding " #ticks_name \ -+ " with %lu ticks (%lu ms)\n", \ -+ (unsigned long)js_data->ticks_name, \ -+ ms_name); \ -+ } while (0) ++__attribute__((unused)) static const enum base_hw_feature base_hw_features_tMIx[] = { ++ BASE_HW_FEATURE_THREAD_GROUP_SPLIT, ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_END ++}; + -+ UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms, -+ DEFAULT_JS_SOFT_STOP_TICKS); -+ UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl, -+ DEFAULT_JS_SOFT_STOP_TICKS_CL); -+ UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss, -+ DEFAULT_JS_HARD_STOP_TICKS_SS); -+ UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl, -+ DEFAULT_JS_HARD_STOP_TICKS_CL); -+ UPDATE_TIMEOUT(hard_stop_ticks_dumping, -+ js_hard_stop_ms_dumping, -+ DEFAULT_JS_HARD_STOP_TICKS_DUMPING); -+ UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss, -+ DEFAULT_JS_RESET_TICKS_SS); -+ UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl, -+ DEFAULT_JS_RESET_TICKS_CL); -+ UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping, -+ DEFAULT_JS_RESET_TICKS_DUMPING); ++__attribute__((unused)) static const enum base_hw_feature base_hw_features_tHEx[] = { ++ BASE_HW_FEATURE_THREAD_GROUP_SPLIT, ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_END ++}; + -+ kbase_js_set_timeouts(kbdev); ++__attribute__((unused)) static const enum base_hw_feature base_hw_features_tSIx[] = { ++ BASE_HW_FEATURE_THREAD_GROUP_SPLIT, ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_END ++}; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++__attribute__((unused)) static const enum base_hw_feature base_hw_features_tDVx[] = { ++ BASE_HW_FEATURE_THREAD_GROUP_SPLIT, ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_END ++}; + -+ return count; -+ } ++__attribute__((unused)) static const enum base_hw_feature base_hw_features_tNOx[] = { ++ BASE_HW_FEATURE_THREAD_GROUP_SPLIT, ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_TLS_HASHING, ++ BASE_HW_FEATURE_IDVS_GROUP_SIZE, ++ BASE_HW_FEATURE_END ++}; + -+ dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n" -+ "Use format \n" -+ "Write 0 for no change, -1 to restore default timeout\n"); -+ return -EINVAL; -+} ++__attribute__((unused)) static const enum base_hw_feature base_hw_features_tGOx[] = { ++ BASE_HW_FEATURE_THREAD_GROUP_SPLIT, ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_TLS_HASHING, ++ BASE_HW_FEATURE_IDVS_GROUP_SIZE, ++ BASE_HW_FEATURE_CORE_FEATURES, ++ BASE_HW_FEATURE_END ++}; + -+static unsigned long get_js_timeout_in_ms( -+ u32 scheduling_period_ns, -+ u32 ticks) -+{ -+ u64 ms = (u64)ticks * scheduling_period_ns; ++__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTRx[] = { ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_IDVS_GROUP_SIZE, ++ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, ++ BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, ++ BASE_HW_FEATURE_END ++}; + -+ do_div(ms, 1000000UL); -+ return ms; -+} ++__attribute__((unused)) static const enum base_hw_feature base_hw_features_tNAx[] = { ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_IDVS_GROUP_SIZE, ++ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, ++ BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, ++ BASE_HW_FEATURE_END ++}; + -+/** -+ * js_timeouts_show - Show callback for the js_timeouts sysfs file. -+ * -+ * @dev: The device this sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The output buffer for the sysfs file contents -+ * -+ * This function is called to get the contents of the js_timeouts sysfs -+ * file. It returns the last set values written to the js_timeouts sysfs file. -+ * If the file didn't get written yet, the values will be current setting in -+ * use. -+ * -+ * Return: The number of bytes output to @buf. -+ */ -+static ssize_t js_timeouts_show(struct device *dev, struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *kbdev; -+ ssize_t ret; -+ unsigned long js_soft_stop_ms; -+ unsigned long js_soft_stop_ms_cl; -+ unsigned long js_hard_stop_ms_ss; -+ unsigned long js_hard_stop_ms_cl; -+ unsigned long js_hard_stop_ms_dumping; -+ unsigned long js_reset_ms_ss; -+ unsigned long js_reset_ms_cl; -+ unsigned long js_reset_ms_dumping; -+ u32 scheduling_period_ns; ++__attribute__((unused)) static const enum base_hw_feature base_hw_features_tBEx[] = { ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_IDVS_GROUP_SIZE, ++ BASE_HW_FEATURE_L2_CONFIG, ++ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, ++ BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, ++ BASE_HW_FEATURE_END ++}; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; -+ -+ scheduling_period_ns = kbdev->js_data.scheduling_period_ns; ++__attribute__((unused)) static const enum base_hw_feature base_hw_features_tBAx[] = { ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_IDVS_GROUP_SIZE, ++ BASE_HW_FEATURE_L2_CONFIG, ++ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, ++ BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, ++ BASE_HW_FEATURE_END ++}; + -+#define GET_TIMEOUT(name) get_js_timeout_in_ms(\ -+ scheduling_period_ns, \ -+ kbdev->js_data.name) ++__attribute__((unused)) static const enum base_hw_feature base_hw_features_tODx[] = { ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_L2_CONFIG, ++ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, ++ BASE_HW_FEATURE_END ++}; + -+ js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks); -+ js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl); -+ js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss); -+ js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl); -+ js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping); -+ js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss); -+ js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl); -+ js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping); ++__attribute__((unused)) static const enum base_hw_feature base_hw_features_tGRx[] = { ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_L2_CONFIG, ++ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, ++ BASE_HW_FEATURE_CORE_FEATURES, ++ BASE_HW_FEATURE_END ++}; + -+#undef GET_TIMEOUT ++__attribute__((unused)) static const enum base_hw_feature base_hw_features_tVAx[] = { ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_L2_CONFIG, ++ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, ++ BASE_HW_FEATURE_CORE_FEATURES, ++ BASE_HW_FEATURE_END ++}; + -+ ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n", -+ js_soft_stop_ms, js_soft_stop_ms_cl, -+ js_hard_stop_ms_ss, js_hard_stop_ms_cl, -+ js_hard_stop_ms_dumping, js_reset_ms_ss, -+ js_reset_ms_cl, js_reset_ms_dumping); ++__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[] = { ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_L2_CONFIG, ++ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, ++ BASE_HW_FEATURE_ASN_HASH, ++ BASE_HW_FEATURE_GPU_SLEEP, ++ BASE_HW_FEATURE_CORE_FEATURES, ++ BASE_HW_FEATURE_END ++}; + -+ if (ret >= PAGE_SIZE) { -+ buf[PAGE_SIZE - 2] = '\n'; -+ buf[PAGE_SIZE - 1] = '\0'; -+ ret = PAGE_SIZE - 1; -+ } ++__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTIx[] = { ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_L2_CONFIG, ++ BASE_HW_FEATURE_CLEAN_ONLY_SAFE, ++ BASE_HW_FEATURE_ASN_HASH, ++ BASE_HW_FEATURE_GPU_SLEEP, ++ BASE_HW_FEATURE_CORE_FEATURES, ++ BASE_HW_FEATURE_PBHA_HWU, ++ BASE_HW_FEATURE_END ++}; + -+ return ret; -+} + ++#endif /* _BASE_HWCONFIG_FEATURES_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h +new file mode 100644 +index 000000000..0fbdec0bb +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h +@@ -0,0 +1,835 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* -+ * The sysfs file js_timeouts. + * -+ * This is used to override the current job scheduler values for -+ * JS_STOP_STOP_TICKS_SS -+ * JS_STOP_STOP_TICKS_CL -+ * JS_HARD_STOP_TICKS_SS -+ * JS_HARD_STOP_TICKS_CL -+ * JS_HARD_STOP_TICKS_DUMPING -+ * JS_RESET_TICKS_SS -+ * JS_RESET_TICKS_CL -+ * JS_RESET_TICKS_DUMPING. -+ */ -+static DEVICE_ATTR_RW(js_timeouts); -+ -+static u32 get_new_js_timeout( -+ u32 old_period, -+ u32 old_ticks, -+ u32 new_scheduling_period_ns) -+{ -+ u64 ticks = (u64)old_period * (u64)old_ticks; -+ -+ do_div(ticks, new_scheduling_period_ns); -+ return ticks?ticks:1; -+} -+ -+/** -+ * js_scheduling_period_store - Store callback for the js_scheduling_period sysfs -+ * file -+ * @dev: The device the sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes to write to the sysfs file ++ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * -+ * This function is called when the js_scheduling_period sysfs file is written -+ * to. It checks the data written, and if valid updates the js_scheduling_period -+ * value ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: @count if the function succeeded. An error code on failure. + */ -+static ssize_t js_scheduling_period_store(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ int ret; -+ unsigned int js_scheduling_period; -+ u32 new_scheduling_period_ns; -+ u32 old_period; -+ struct kbasep_js_device_data *js_data; -+ unsigned long flags; -+ -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; + -+ js_data = &kbdev->js_data; ++/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, ++ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py ++ * For more information see base/tools/hwconfig_generator/README ++ */ + -+ ret = kstrtouint(buf, 0, &js_scheduling_period); -+ if (ret || !js_scheduling_period) { -+ dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n" -+ "Use format \n"); -+ return -EINVAL; -+ } ++#ifndef _BASE_HWCONFIG_ISSUES_H_ ++#define _BASE_HWCONFIG_ISSUES_H_ + -+ new_scheduling_period_ns = js_scheduling_period * 1000000; ++enum base_hw_issue { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10682, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_7940, ++ BASE_HW_ISSUE_TMIX_8042, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TMIX_8138, ++ BASE_HW_ISSUE_TMIX_8206, ++ BASE_HW_ISSUE_TMIX_8343, ++ BASE_HW_ISSUE_TMIX_8463, ++ BASE_HW_ISSUE_TMIX_8456, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_TNOX_1194, ++ BASE_HW_ISSUE_TGOX_R1_1234, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TSIX_1792, ++ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, ++ BASE_HW_ISSUE_TTRX_3076, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TTRX_3485, ++ BASE_HW_ISSUE_GPU2019_3212, ++ BASE_HW_ISSUE_TURSEHW_1997, ++ BASE_HW_ISSUE_GPU2019_3878, ++ BASE_HW_ISSUE_TURSEHW_2716, ++ BASE_HW_ISSUE_GPU2019_3901, ++ BASE_HW_ISSUE_GPU2021PRO_290, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_TITANHW_2679, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ /* Update scheduling timeouts */ -+ mutex_lock(&js_data->runpool_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_generic[] = { ++ BASE_HW_ISSUE_END ++}; + -+ /* If no contexts have been scheduled since js_timeouts was last written -+ * to, the new timeouts might not have been latched yet. So check if an -+ * update is pending and use the new values if necessary. -+ */ ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10682, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8042, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TMIX_8138, ++ BASE_HW_ISSUE_TMIX_8206, ++ BASE_HW_ISSUE_TMIX_8343, ++ BASE_HW_ISSUE_TMIX_8463, ++ BASE_HW_ISSUE_TMIX_8456, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ /* Use previous 'new' scheduling period as a base if present. */ -+ old_period = js_data->scheduling_period_ns; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10682, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_7940, ++ BASE_HW_ISSUE_TMIX_8042, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TMIX_8138, ++ BASE_HW_ISSUE_TMIX_8206, ++ BASE_HW_ISSUE_TMIX_8343, ++ BASE_HW_ISSUE_TMIX_8463, ++ BASE_HW_ISSUE_TMIX_8456, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+#define SET_TIMEOUT(name) \ -+ (js_data->name = get_new_js_timeout(\ -+ old_period, \ -+ kbdev->js_data.name, \ -+ new_scheduling_period_ns)) ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10682, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_7940, ++ BASE_HW_ISSUE_TMIX_8042, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TMIX_8138, ++ BASE_HW_ISSUE_TMIX_8206, ++ BASE_HW_ISSUE_TMIX_8343, ++ BASE_HW_ISSUE_TMIX_8463, ++ BASE_HW_ISSUE_TMIX_8456, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ SET_TIMEOUT(soft_stop_ticks); -+ SET_TIMEOUT(soft_stop_ticks_cl); -+ SET_TIMEOUT(hard_stop_ticks_ss); -+ SET_TIMEOUT(hard_stop_ticks_cl); -+ SET_TIMEOUT(hard_stop_ticks_dumping); -+ SET_TIMEOUT(gpu_reset_ticks_ss); -+ SET_TIMEOUT(gpu_reset_ticks_cl); -+ SET_TIMEOUT(gpu_reset_ticks_dumping); ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tMIx[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_7940, ++ BASE_HW_ISSUE_TMIX_8042, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TMIX_8138, ++ BASE_HW_ISSUE_TMIX_8206, ++ BASE_HW_ISSUE_TMIX_8343, ++ BASE_HW_ISSUE_TMIX_8456, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+#undef SET_TIMEOUT ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10682, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8042, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ js_data->scheduling_period_ns = new_scheduling_period_ns; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10682, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8042, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ kbase_js_set_timeouts(kbdev); ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10682, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8042, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&js_data->runpool_mutex); ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10682, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8042, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ dev_dbg(kbdev->dev, "JS scheduling period: %dms\n", -+ js_scheduling_period); ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tHEx[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8042, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ return count; -+} ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TSIX_1792, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+/** -+ * js_scheduling_period_show - Show callback for the js_scheduling_period sysfs -+ * entry. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the GPU information. -+ * -+ * This function is called to get the current period used for the JS scheduling -+ * period. -+ * -+ * Return: The number of bytes output to @buf. -+ */ -+static ssize_t js_scheduling_period_show(struct device *dev, -+ struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *kbdev; -+ u32 period; -+ ssize_t ret; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TSIX_1792, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ period = kbdev->js_data.scheduling_period_ns; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", -+ period / 1000000); ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tSIx[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ return ret; -+} ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+static DEVICE_ATTR_RW(js_scheduling_period); ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDVx[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TNOX_1194, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+static ssize_t js_softstop_always_store(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ int ret; -+ int softstop_always; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNOx[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TNOX_1194, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ ret = kstrtoint(buf, 0, &softstop_always); -+ if (ret || ((softstop_always != 0) && (softstop_always != 1))) { -+ dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n" -+ "Use format \n"); -+ return -EINVAL; -+ } ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TGOX_R1_1234, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ kbdev->js_data.softstop_always = (bool) softstop_always; -+ dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n", -+ (kbdev->js_data.softstop_always) ? -+ "Enabled" : "Disabled"); -+ return count; -+} ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGOx[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+static ssize_t js_softstop_always_show(struct device *dev, -+ struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *kbdev; -+ ssize_t ret; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, ++ BASE_HW_ISSUE_TTRX_3076, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TTRX_3485, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, ++ BASE_HW_ISSUE_TTRX_3076, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TTRX_3485, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always); ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, ++ BASE_HW_ISSUE_TTRX_3076, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ if (ret >= PAGE_SIZE) { -+ buf[PAGE_SIZE - 2] = '\n'; -+ buf[PAGE_SIZE - 1] = '\0'; -+ ret = PAGE_SIZE - 1; -+ } ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTRx[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ return ret; -+} ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, ++ BASE_HW_ISSUE_TTRX_3076, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TTRX_3485, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+/* -+ * By default, soft-stops are disabled when only a single context is present. -+ * The ability to enable soft-stop when only a single context is present can be -+ * used for debug and unit-testing purposes. -+ * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.) -+ */ -+static DEVICE_ATTR_RW(js_softstop_always); -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ -+#endif /* !MALI_USE_CSF */ ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, ++ BASE_HW_ISSUE_TTRX_3076, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_GPU2017_1336, ++ BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+typedef void kbasep_debug_command_func(struct kbase_device *); ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNAx[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+enum kbasep_debug_command_code { -+ KBASEP_DEBUG_COMMAND_DUMPTRACE, ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TTRX_3485, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ /* This must be the last enum */ -+ KBASEP_DEBUG_COMMAND_COUNT ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END +}; + -+struct kbasep_debug_command { -+ char *str; -+ kbasep_debug_command_func *func; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END +}; + -+static void kbasep_ktrace_dump_wrapper(struct kbase_device *kbdev) -+{ -+ KBASE_KTRACE_DUMP(kbdev); -+} ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+/* Debug commands supported by the driver */ -+static const struct kbasep_debug_command debug_commands[] = { -+ { -+ .str = "dumptrace", -+ .func = &kbasep_ktrace_dump_wrapper, -+ } ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBEx[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END +}; + -+/** -+ * debug_command_show - Show callback for the debug_command sysfs file. -+ * -+ * @dev: The device this sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The output buffer for the sysfs file contents -+ * -+ * This function is called to get the contents of the debug_command sysfs -+ * file. This is a list of the available debug commands, separated by newlines. -+ * -+ * Return: The number of bytes output to @buf. -+ */ -+static ssize_t debug_command_show(struct device *dev, struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *kbdev; -+ int i; -+ ssize_t ret = 0; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TTRX_3485, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ kbdev = to_kbase_device(dev); ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ if (!kbdev) -+ return -ENODEV; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++) -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str); ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r1p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ if (ret >= PAGE_SIZE) { -+ buf[PAGE_SIZE - 2] = '\n'; -+ buf[PAGE_SIZE - 1] = '\0'; -+ ret = PAGE_SIZE - 1; -+ } ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBAx[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ return ret; -+} ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_GPU2019_3212, ++ BASE_HW_ISSUE_GPU2019_3878, ++ BASE_HW_ISSUE_GPU2019_3901, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+/** -+ * debug_command_store - Store callback for the debug_command sysfs file. -+ * -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes written to the sysfs file -+ * -+ * This function is called when the debug_command sysfs file is written to. -+ * It matches the requested command against the available commands, and if -+ * a matching command is found calls the associated function from -+ * @debug_commands to issue the command. -+ * -+ * Return: @count if the function succeeded. An error code on failure. -+ */ -+static ssize_t debug_command_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ int i; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tODx[] = { ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_GPU2019_3212, ++ BASE_HW_ISSUE_GPU2019_3878, ++ BASE_HW_ISSUE_GPU2019_3901, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ kbdev = to_kbase_device(dev); ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_GPU2019_3878, ++ BASE_HW_ISSUE_GPU2019_3901, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ if (!kbdev) -+ return -ENODEV; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGRx[] = { ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_GPU2019_3878, ++ BASE_HW_ISSUE_GPU2019_3901, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) { -+ if (sysfs_streq(debug_commands[i].str, buf)) { -+ debug_commands[i].func(kbdev); -+ return count; -+ } -+ } ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_GPU2019_3878, ++ BASE_HW_ISSUE_GPU2019_3901, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ /* Debug Command not found */ -+ dev_err(dev, "debug_command: command not known\n"); -+ return -EINVAL; -+} ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVAx[] = { ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_GPU2019_3878, ++ BASE_HW_ISSUE_GPU2019_3901, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+/* The sysfs file debug_command. -+ * -+ * This is used to issue general debug commands to the device driver. -+ * Reading it will produce a list of debug commands, separated by newlines. -+ * Writing to it with one of those commands will issue said command. -+ */ -+static DEVICE_ATTR_RW(debug_command); -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = { ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TURSEHW_1997, ++ BASE_HW_ISSUE_GPU2019_3878, ++ BASE_HW_ISSUE_TURSEHW_2716, ++ BASE_HW_ISSUE_GPU2019_3901, ++ BASE_HW_ISSUE_GPU2021PRO_290, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_TITANHW_2679, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+/** -+ * gpuinfo_show - Show callback for the gpuinfo sysfs entry. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the GPU information. -+ * -+ * This function is called to get a description of the present Mali -+ * GPU via the gpuinfo sysfs entry. This includes the GPU family, the -+ * number of cores, the hardware version and the raw product id. For -+ * example -+ * -+ * Mali-T60x MP4 r0p0 0x6956 -+ * -+ * Return: The number of bytes output to @buf. -+ */ -+static ssize_t gpuinfo_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ static const struct gpu_product_id_name { -+ unsigned int id; -+ char *name; -+ } gpu_product_id_names[] = { -+ { .id = GPU_ID2_PRODUCT_TMIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-G71" }, -+ { .id = GPU_ID2_PRODUCT_THEX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-G72" }, -+ { .id = GPU_ID2_PRODUCT_TSIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-G51" }, -+ { .id = GPU_ID2_PRODUCT_TNOX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-G76" }, -+ { .id = GPU_ID2_PRODUCT_TDVX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-G31" }, -+ { .id = GPU_ID2_PRODUCT_TGOX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-G52" }, -+ { .id = GPU_ID2_PRODUCT_TTRX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-G77" }, -+ { .id = GPU_ID2_PRODUCT_TBEX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-G78" }, -+ { .id = GPU_ID2_PRODUCT_TBAX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-G78AE" }, -+ { .id = GPU_ID2_PRODUCT_LBEX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-G68" }, -+ { .id = GPU_ID2_PRODUCT_TNAX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-G57" }, -+ { .id = GPU_ID2_PRODUCT_TODX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-G710" }, -+ { .id = GPU_ID2_PRODUCT_LODX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-G610" }, -+ { .id = GPU_ID2_PRODUCT_TGRX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-G510" }, -+ { .id = GPU_ID2_PRODUCT_TVAX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-G310" }, -+ { .id = GPU_ID2_PRODUCT_TTIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-TTIX" }, -+ { .id = GPU_ID2_PRODUCT_LTIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-LTIX" }, -+ }; -+ const char *product_name = "(Unknown Mali GPU)"; -+ struct kbase_device *kbdev; -+ u32 gpu_id; -+ unsigned int product_id, product_id_mask; -+ unsigned int i; -+ struct kbase_gpu_props *gpu_props; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p1[] = { ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TURSEHW_1997, ++ BASE_HW_ISSUE_GPU2019_3878, ++ BASE_HW_ISSUE_TURSEHW_2716, ++ BASE_HW_ISSUE_GPU2019_3901, ++ BASE_HW_ISSUE_GPU2021PRO_290, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_TITANHW_2679, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTUx[] = { ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_GPU2019_3878, ++ BASE_HW_ISSUE_TURSEHW_2716, ++ BASE_HW_ISSUE_GPU2019_3901, ++ BASE_HW_ISSUE_GPU2021PRO_290, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_TITANHW_2679, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ gpu_props = &kbdev->gpu_props; -+ gpu_id = gpu_props->props.raw_props.gpu_id; -+ product_id = gpu_id >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; -+ product_id_mask = GPU_ID2_PRODUCT_MODEL >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = { ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_GPU2019_3878, ++ BASE_HW_ISSUE_TURSEHW_2716, ++ BASE_HW_ISSUE_GPU2019_3901, ++ BASE_HW_ISSUE_GPU2021PRO_290, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_TITANHW_2679, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) { -+ const struct gpu_product_id_name *p = &gpu_product_id_names[i]; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1[] = { ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_GPU2019_3878, ++ BASE_HW_ISSUE_TURSEHW_2716, ++ BASE_HW_ISSUE_GPU2019_3901, ++ BASE_HW_ISSUE_GPU2021PRO_290, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_TITANHW_2679, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ if ((p->id & product_id_mask) == -+ (product_id & product_id_mask)) { -+ product_name = p->name; -+ break; -+ } -+ } ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2[] = { ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_GPU2019_3878, ++ BASE_HW_ISSUE_TURSEHW_2716, ++ BASE_HW_ISSUE_GPU2019_3901, ++ BASE_HW_ISSUE_GPU2021PRO_290, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_TITANHW_2679, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+#if MALI_USE_CSF -+ if ((product_id & product_id_mask) == -+ ((GPU_ID2_PRODUCT_TTUX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT) & product_id_mask)) { -+ const bool rt_supported = -+ GPU_FEATURES_RAY_TRACING_GET(gpu_props->props.raw_props.gpu_features); -+ const u8 nr_cores = gpu_props->num_cores; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p3[] = { ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_GPU2019_3878, ++ BASE_HW_ISSUE_TURSEHW_2716, ++ BASE_HW_ISSUE_GPU2019_3901, ++ BASE_HW_ISSUE_GPU2021PRO_290, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_TITANHW_2679, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ /* Mali-G715-Immortalis if 10 < number of cores with ray tracing supproted. -+ * Mali-G715 if 10 < number of cores without ray tracing supported. -+ * Mali-G715 if 7 <= number of cores <= 10 regardless ray tracing. -+ * Mali-G615 if number of cores < 7. -+ */ -+ if ((nr_cores > 10) && rt_supported) -+ product_name = "Mali-G715-Immortalis"; -+ else if (nr_cores >= 7) -+ product_name = "Mali-G715"; ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTIx[] = { ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TURSEHW_2716, ++ BASE_HW_ISSUE_GPU2021PRO_290, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_TITANHW_2679, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ if (nr_cores < 7) { -+ dev_warn(kbdev->dev, "nr_cores(%u) GPU ID must be G615", nr_cores); -+ product_name = "Mali-G615"; -+ } else -+ dev_dbg(kbdev->dev, "GPU ID_Name: %s, nr_cores(%u)\n", product_name, -+ nr_cores); -+ } -+#endif /* MALI_USE_CSF */ ++__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTIx_r0p0[] = { ++ BASE_HW_ISSUE_TSIX_2033, ++ BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TURSEHW_2716, ++ BASE_HW_ISSUE_GPU2021PRO_290, ++ BASE_HW_ISSUE_TITANHW_2710, ++ BASE_HW_ISSUE_TITANHW_2679, ++ BASE_HW_ISSUE_GPU2022PRO_148, ++ BASE_HW_ISSUE_END ++}; + -+ return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n", product_name, -+ kbdev->gpu_props.num_cores, -+ (gpu_id & GPU_ID_VERSION_MAJOR) >> KBASE_GPU_ID_VERSION_MAJOR_SHIFT, -+ (gpu_id & GPU_ID_VERSION_MINOR) >> KBASE_GPU_ID_VERSION_MINOR_SHIFT, -+ product_id); -+} -+static DEVICE_ATTR_RO(gpuinfo); + -+/** -+ * dvfs_period_store - Store callback for the dvfs_period sysfs file. -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes written to the sysfs file ++#endif /* _BASE_HWCONFIG_ISSUES_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase.h b/drivers/gpu/arm/bifrost/mali_kbase.h +new file mode 100644 +index 000000000..542e8f63f +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase.h +@@ -0,0 +1,752 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * This function is called when the dvfs_period sysfs file is written to. It -+ * checks the data written, and if valid updates the DVFS period variable, ++ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: @count if the function succeeded. An error code on failure. + */ -+static ssize_t dvfs_period_store(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ int ret; -+ int dvfs_period; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++#ifndef _KBASE_H_ ++#define _KBASE_H_ + -+ ret = kstrtoint(buf, 0, &dvfs_period); -+ if (ret || dvfs_period <= 0) { -+ dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n" -+ "Use format \n"); -+ return -EINVAL; -+ } ++#include + -+ kbdev->pm.dvfs_period = dvfs_period; -+ dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period); ++#include + -+ return count; -+} ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) ++#include ++#endif ++#include ++#include ++#include ++#include ++#include ++#include + -+/** -+ * dvfs_period_show - Show callback for the dvfs_period sysfs entry. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the GPU information. -+ * -+ * This function is called to get the current period used for the DVFS sample -+ * timer. -+ * -+ * Return: The number of bytes output to @buf. -+ */ -+static ssize_t dvfs_period_show(struct device *dev, -+ struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *kbdev; -+ ssize_t ret; ++#include ++#include + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++/* ++ * Include mali_kbase_defs.h first as this provides types needed by other local ++ * header files. ++ */ ++#include "mali_kbase_defs.h" + -+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period); ++#include "debug/mali_kbase_debug_ktrace.h" ++#include "context/mali_kbase_context.h" ++#include "mali_kbase_strings.h" ++#include "mali_kbase_mem_lowlevel.h" ++#include "mali_kbase_utility.h" ++#include "mali_kbase_mem.h" ++#include "mmu/mali_kbase_mmu.h" ++#include "mali_kbase_gpu_memory_debugfs.h" ++#include "mali_kbase_mem_profile_debugfs.h" ++#include "mali_kbase_gpuprops.h" ++#include ++#if !MALI_USE_CSF ++#include "mali_kbase_debug_job_fault.h" ++#include "mali_kbase_jd_debugfs.h" ++#include "mali_kbase_jm.h" ++#include "mali_kbase_js.h" ++#else /* !MALI_USE_CSF */ ++#include "csf/mali_kbase_debug_csf_fault.h" ++#endif /* MALI_USE_CSF */ + -+ return ret; -+} ++#include "ipa/mali_kbase_ipa.h" + -+static DEVICE_ATTR_RW(dvfs_period); ++#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) ++#include ++#endif + -+int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev) -+{ -+ /* Uses default reference frequency defined in below macro */ -+ u64 lowest_freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ; ++#include "mali_linux_trace.h" + -+ /* Only check lowest frequency in cases when OPPs are used and -+ * present in the device tree. -+ */ -+#ifdef CONFIG_PM_OPP -+ struct dev_pm_opp *opp_ptr; -+ unsigned long found_freq = 0; ++#if MALI_USE_CSF ++#include "csf/mali_kbase_csf.h" + -+ /* find lowest frequency OPP */ -+ opp_ptr = dev_pm_opp_find_freq_ceil(kbdev->dev, &found_freq); -+ if (IS_ERR(opp_ptr)) { -+ dev_err(kbdev->dev, "No OPPs found in device tree! Scaling timeouts using %llu kHz", -+ (unsigned long long)lowest_freq_khz); -+ } else { -+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE -+ dev_pm_opp_put(opp_ptr); /* decrease OPP refcount */ -+#endif -+ /* convert found frequency to KHz */ -+ found_freq /= 1000; ++/* Physical memory group ID for CSF user I/O. ++ */ ++#define KBASE_MEM_GROUP_CSF_IO BASE_MEM_GROUP_DEFAULT + -+ /* If lowest frequency in OPP table is still higher -+ * than the reference, then keep the reference frequency -+ * as the one to use for scaling . -+ */ -+ if (found_freq < lowest_freq_khz) -+ lowest_freq_khz = found_freq; -+ } -+#else -+ dev_err(kbdev->dev, "No operating-points-v2 node or operating-points property in DT"); ++/* Physical memory group ID for CSF firmware. ++ */ ++#define KBASE_MEM_GROUP_CSF_FW BASE_MEM_GROUP_DEFAULT +#endif + -+ kbdev->lowest_gpu_freq_khz = lowest_freq_khz; -+ dev_dbg(kbdev->dev, "Lowest frequency identified is %llu kHz", kbdev->lowest_gpu_freq_khz); -+ return 0; -+} -+ -+/** -+ * pm_poweroff_store - Store callback for the pm_poweroff sysfs file. -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes written to the sysfs file -+ * -+ * This function is called when the pm_poweroff sysfs file is written to. -+ * -+ * This file contains three values separated by whitespace. The values -+ * are gpu_poweroff_time (the period of the poweroff timer, in ns), -+ * poweroff_shader_ticks (the number of poweroff timer ticks before an idle -+ * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer -+ * ticks before the GPU is powered off), in that order. -+ * -+ * Return: @count if the function succeeded. An error code on failure. ++/* Physical memory group ID for a special page which can alias several regions. + */ -+static ssize_t pm_poweroff_store(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ struct kbasep_pm_tick_timer_state *stt; -+ int items; -+ u64 gpu_poweroff_time; -+ unsigned int poweroff_shader_ticks, poweroff_gpu_ticks; -+ unsigned long flags; -+ -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++#define KBASE_MEM_GROUP_SINK BASE_MEM_GROUP_DEFAULT + -+ items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time, -+ &poweroff_shader_ticks, -+ &poweroff_gpu_ticks); -+ if (items != 3) { -+ dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n" -+ "Use format \n"); -+ return -EINVAL; -+ } ++/* ++ * Kernel-side Base (KBase) APIs ++ */ + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++struct kbase_device *kbase_device_alloc(void); ++/* ++ * note: configuration attributes member of kbdev needs to have ++ * been setup before calling kbase_device_init ++ */ + -+ stt = &kbdev->pm.backend.shader_tick_timer; -+ stt->configured_interval = HR_TIMER_DELAY_NSEC(gpu_poweroff_time); -+ stt->default_ticks = poweroff_shader_ticks; -+ stt->configured_ticks = stt->default_ticks; ++int kbase_device_misc_init(struct kbase_device *kbdev); ++void kbase_device_misc_term(struct kbase_device *kbdev); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++#if !MALI_USE_CSF ++void kbase_enable_quick_reset(struct kbase_device *kbdev); ++void kbase_disable_quick_reset(struct kbase_device *kbdev); ++bool kbase_is_quick_reset_enabled(struct kbase_device *kbdev); ++#endif + -+ if (poweroff_gpu_ticks != 0) -+ dev_warn(kbdev->dev, "Separate GPU poweroff delay no longer supported.\n"); ++void kbase_device_free(struct kbase_device *kbdev); ++int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature); + -+ return count; -+} ++/* Needed for gator integration and for reporting vsync information */ ++struct kbase_device *kbase_find_device(int minor); ++void kbase_release_device(struct kbase_device *kbdev); + +/** -+ * pm_poweroff_show - Show callback for the pm_poweroff sysfs entry. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the GPU information. ++ * kbase_context_get_unmapped_area() - get an address range which is currently ++ * unmapped. ++ * @kctx: A kernel base context (which has its own GPU address space). ++ * @addr: CPU mapped address (set to 0 since MAP_FIXED mapping is not allowed ++ * as Mali GPU driver decides about the mapping). ++ * @len: Length of the address range. ++ * @pgoff: Page offset within the GPU address space of the kbase context. ++ * @flags: Flags for the allocation. + * -+ * This function is called to get the current period used for the DVFS sample -+ * timer. ++ * Finds the unmapped address range which satisfies requirements specific to ++ * GPU and those provided by the call parameters. + * -+ * Return: The number of bytes output to @buf. ++ * 1) Requirement for allocations greater than 2MB: ++ * - alignment offset is set to 2MB and the alignment mask to 2MB decremented ++ * by 1. ++ * ++ * 2) Requirements imposed for the shader memory alignment: ++ * - alignment is decided by the number of GPU pc bits which can be read from ++ * GPU properties of the device associated with this kbase context; alignment ++ * offset is set to this value in bytes and the alignment mask to the offset ++ * decremented by 1. ++ * - allocations must not to be at 4GB boundaries. Such cases are indicated ++ * by the flag KBASE_REG_GPU_NX not being set (check the flags of the kbase ++ * region). 4GB boundaries can be checked against @ref BASE_MEM_MASK_4GB. ++ * ++ * 3) Requirements imposed for tiler memory alignment, cases indicated by ++ * the flag @ref KBASE_REG_TILER_ALIGN_TOP (check the flags of the kbase ++ * region): ++ * - alignment offset is set to the difference between the kbase region ++ * extension (converted from the original value in pages to bytes) and the kbase ++ * region initial_commit (also converted from the original value in pages to ++ * bytes); alignment mask is set to the kbase region extension in bytes and ++ * decremented by 1. ++ * ++ * Return: if successful, address of the unmapped area aligned as required; ++ * error code (negative) in case of failure; + */ -+static ssize_t pm_poweroff_show(struct device *dev, -+ struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *kbdev; -+ struct kbasep_pm_tick_timer_state *stt; -+ ssize_t ret; -+ unsigned long flags; -+ -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++unsigned long kbase_context_get_unmapped_area(struct kbase_context *kctx, ++ const unsigned long addr, const unsigned long len, ++ const unsigned long pgoff, const unsigned long flags); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ stt = &kbdev->pm.backend.shader_tick_timer; -+ ret = scnprintf(buf, PAGE_SIZE, "%llu %u 0\n", -+ ktime_to_ns(stt->configured_interval), -+ stt->default_ticks); ++int assign_irqs(struct kbase_device *kbdev); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++int kbase_sysfs_init(struct kbase_device *kbdev); ++void kbase_sysfs_term(struct kbase_device *kbdev); + -+ return ret; -+} + -+static DEVICE_ATTR_RW(pm_poweroff); ++int kbase_protected_mode_init(struct kbase_device *kbdev); ++void kbase_protected_mode_term(struct kbase_device *kbdev); + +/** -+ * reset_timeout_store - Store callback for the reset_timeout sysfs file. -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes written to the sysfs file -+ * -+ * This function is called when the reset_timeout sysfs file is written to. It -+ * checks the data written, and if valid updates the reset timeout. ++ * kbase_device_pm_init() - Performs power management initialization and ++ * Verifies device tree configurations. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * Return: @count if the function succeeded. An error code on failure. ++ * Return: 0 if successful, otherwise a standard Linux error code + */ -+static ssize_t reset_timeout_store(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ int ret; -+ int reset_timeout; -+ -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; -+ -+ ret = kstrtoint(buf, 0, &reset_timeout); -+ if (ret || reset_timeout <= 0) { -+ dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n" -+ "Use format \n"); -+ return -EINVAL; -+ } -+ -+ kbdev->reset_timeout_ms = reset_timeout; -+ dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout); -+ -+ return count; -+} ++int kbase_device_pm_init(struct kbase_device *kbdev); + +/** -+ * reset_timeout_show - Show callback for the reset_timeout sysfs entry. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the GPU information. -+ * -+ * This function is called to get the current reset timeout. ++ * kbase_device_pm_term() - Performs power management deinitialization and ++ * Free resources. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * Return: The number of bytes output to @buf. ++ * Clean up all the resources + */ -+static ssize_t reset_timeout_show(struct device *dev, -+ struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *kbdev; -+ ssize_t ret; -+ -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; -+ -+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms); ++void kbase_device_pm_term(struct kbase_device *kbdev); + -+ return ret; -+} + -+static DEVICE_ATTR_RW(reset_timeout); ++int power_control_init(struct kbase_device *kbdev); ++void power_control_term(struct kbase_device *kbdev); + -+static ssize_t mem_pool_size_show(struct device *dev, -+ struct device_attribute *attr, char * const buf) ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++void kbase_device_debugfs_term(struct kbase_device *kbdev); ++int kbase_device_debugfs_init(struct kbase_device *kbdev); ++#else /* CONFIG_DEBUG_FS */ ++static inline int kbase_device_debugfs_init(struct kbase_device *kbdev) +{ -+ struct kbase_device *const kbdev = to_kbase_device(dev); -+ -+ if (!kbdev) -+ return -ENODEV; -+ -+ return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, -+ kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, -+ kbase_mem_pool_debugfs_size); ++ return 0; +} + -+static ssize_t mem_pool_size_store(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *const kbdev = to_kbase_device(dev); -+ int err; -+ -+ if (!kbdev) -+ return -ENODEV; ++static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { } ++#endif /* CONFIG_DEBUG_FS */ + -+ err = kbase_debugfs_helper_set_attr_from_string(buf, -+ kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, -+ kbase_mem_pool_debugfs_trim); ++int registers_map(struct kbase_device *kbdev); ++void registers_unmap(struct kbase_device *kbdev); + -+ return err ? err : count; -+} ++int kbase_device_coherency_init(struct kbase_device *kbdev); + -+static DEVICE_ATTR_RW(mem_pool_size); + -+static ssize_t mem_pool_max_size_show(struct device *dev, -+ struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *const kbdev = to_kbase_device(dev); ++#if !MALI_USE_CSF ++int kbase_jd_init(struct kbase_context *kctx); ++void kbase_jd_exit(struct kbase_context *kctx); + -+ if (!kbdev) -+ return -ENODEV; ++/** ++ * kbase_jd_submit - Submit atoms to the job dispatcher ++ * ++ * @kctx: The kbase context to submit to ++ * @user_addr: The address in user space of the struct base_jd_atom array ++ * @nr_atoms: The number of atoms in the array ++ * @stride: sizeof(struct base_jd_atom) ++ * @uk6_atom: true if the atoms are legacy atoms (struct base_jd_atom_v2_uk6) ++ * ++ * Return: 0 on success or error code ++ */ ++int kbase_jd_submit(struct kbase_context *kctx, ++ void __user *user_addr, u32 nr_atoms, u32 stride, ++ bool uk6_atom); + -+ return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, -+ kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, -+ kbase_mem_pool_debugfs_max_size); -+} ++/** ++ * kbase_jd_done_worker - Handle a job completion ++ * @data: a &struct work_struct ++ * ++ * This function requeues the job from the runpool (if it was soft-stopped or ++ * removed from NEXT registers). ++ * ++ * Removes it from the system if it finished/failed/was cancelled. ++ * ++ * Resolves dependencies to add dependent jobs to the context, potentially ++ * starting them if necessary (which may add more references to the context) ++ * ++ * Releases the reference to the context from the no-longer-running job. ++ * ++ * Handles retrying submission outside of IRQ context if it failed from within ++ * IRQ context. ++ */ ++void kbase_jd_done_worker(struct work_struct *data); + -+static ssize_t mem_pool_max_size_store(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *const kbdev = to_kbase_device(dev); -+ int err; ++void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp, ++ kbasep_js_atom_done_code done_code); ++void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom); ++void kbase_jd_zap_context(struct kbase_context *kctx); + -+ if (!kbdev) -+ return -ENODEV; ++/* ++ * kbase_jd_done_nolock - Perform the necessary handling of an atom that has completed ++ * the execution. ++ * ++ * @katom: Pointer to the atom that completed the execution ++ * @post_immediately: Flag indicating that completion event can be posted ++ * immediately for @katom and the other atoms depdendent ++ * on @katom which also completed execution. The flag is ++ * false only for the case where the function is called by ++ * kbase_jd_done_worker() on the completion of atom running ++ * on the GPU. ++ * ++ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller ++ * is responsible for calling kbase_finish_soft_job *before* calling this function. ++ * ++ * The caller must hold the kbase_jd_context.lock. ++ */ ++bool kbase_jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately); + -+ err = kbase_debugfs_helper_set_attr_from_string(buf, -+ kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, -+ kbase_mem_pool_debugfs_set_max_size); ++void kbase_jd_free_external_resources(struct kbase_jd_atom *katom); ++void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom); + -+ return err ? err : count; -+} ++/** ++ * kbase_job_done - Process completed jobs from job interrupt ++ * @kbdev: Pointer to the kbase device. ++ * @done: Bitmask of done or failed jobs, from JOB_IRQ_STAT register ++ * ++ * This function processes the completed, or failed, jobs from the GPU job ++ * slots, for the bits set in the @done bitmask. ++ * ++ * The hwaccess_lock must be held when calling this function. ++ */ ++void kbase_job_done(struct kbase_device *kbdev, u32 done); + -+static DEVICE_ATTR_RW(mem_pool_max_size); ++/** ++ * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms ++ * and soft stop them ++ * @kctx: Pointer to context to check. ++ * @katom: Pointer to priority atom. ++ * ++ * Atoms from @kctx on the same job slot as @katom, which have lower priority ++ * than @katom will be soft stopped and put back in the queue, so that atoms ++ * with higher priority can run. ++ * ++ * The hwaccess_lock must be held when calling this function. ++ */ ++void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, ++ struct kbase_jd_atom *katom); + +/** -+ * lp_mem_pool_size_show - Show size of the large memory pages pool. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the pool size. ++ * kbase_job_slot_softstop_start_rp() - Soft-stop the atom at the start ++ * of a renderpass. ++ * @kctx: Pointer to a kernel base context. ++ * @reg: Reference of a growable GPU memory region in the same context. ++ * Takes ownership of the reference if successful. + * -+ * This function is called to get the number of large memory pages which currently populate the kbdev pool. ++ * Used to switch to incremental rendering if we have nearly run out of ++ * virtual address space in a growable memory region and the atom currently ++ * executing on a job slot is the tiler job chain at the start of a renderpass. + * -+ * Return: The number of bytes output to @buf. ++ * Return: 0 if successful, otherwise a negative error code. + */ -+static ssize_t lp_mem_pool_size_show(struct device *dev, -+ struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *const kbdev = to_kbase_device(dev); ++int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx, ++ struct kbase_va_region *reg); + -+ if (!kbdev) -+ return -ENODEV; ++/** ++ * kbase_job_slot_softstop - Soft-stop the specified job slot ++ * ++ * @kbdev: The kbase device ++ * @js: The job slot to soft-stop ++ * @target_katom: The job that should be soft-stopped (or NULL for any job) ++ * Context: ++ * The job slot lock must be held when calling this function. ++ * The job slot must not already be in the process of being soft-stopped. ++ * ++ * Where possible any job in the next register is evicted before the soft-stop. ++ */ ++void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, ++ struct kbase_jd_atom *target_katom); + -+ return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, -+ kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, -+ kbase_mem_pool_debugfs_size); -+} ++void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js, ++ struct kbase_jd_atom *target_katom, u32 sw_flags); + +/** -+ * lp_mem_pool_size_store - Set size of the large memory pages pool. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The value written to the sysfs file. -+ * @count: The number of bytes written to the sysfs file. ++ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode ++ * @kbdev: kbase device ++ * @action: the event which has occurred ++ * @core_reqs: core requirements of the atom ++ * @target_katom: the atom which is being affected + * -+ * This function is called to set the number of large memory pages which should populate the kbdev pool. -+ * This may cause existing pages to be removed from the pool, or new pages to be created and then added to the pool. ++ * For a certain soft-stop action, work out whether to enter disjoint ++ * state. + * -+ * Return: @count if the function succeeded. An error code on failure. ++ * This does not register multiple disjoint events if the atom has already ++ * started a disjoint period ++ * ++ * @core_reqs can be supplied as 0 if the atom had not started on the hardware ++ * (and so a 'real' soft/hard-stop was not required, but it still interrupted ++ * flow, perhaps on another context) ++ * ++ * kbase_job_check_leave_disjoint() should be used to end the disjoint ++ * state when the soft/hard-stop action is complete + */ -+static ssize_t lp_mem_pool_size_store(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *const kbdev = to_kbase_device(dev); -+ int err; -+ -+ if (!kbdev) -+ return -ENODEV; ++void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, ++ base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom); + -+ err = kbase_debugfs_helper_set_attr_from_string(buf, -+ kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, -+ kbase_mem_pool_debugfs_trim); ++/** ++ * kbase_job_check_leave_disjoint - potentially leave disjoint state ++ * @kbdev: kbase device ++ * @target_katom: atom which is finishing ++ * ++ * Work out whether to leave disjoint state when finishing an atom that was ++ * originated by kbase_job_check_enter_disjoint(). ++ */ ++void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, ++ struct kbase_jd_atom *target_katom); + -+ return err ? err : count; -+} ++#endif /* !MALI_USE_CSF */ + -+static DEVICE_ATTR_RW(lp_mem_pool_size); ++void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event); ++#if !MALI_USE_CSF ++int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent); ++#endif /* !MALI_USE_CSF */ ++int kbase_event_pending(struct kbase_context *ctx); ++int kbase_event_init(struct kbase_context *kctx); ++void kbase_event_close(struct kbase_context *kctx); ++void kbase_event_cleanup(struct kbase_context *kctx); ++void kbase_event_wakeup(struct kbase_context *kctx); + +/** -+ * lp_mem_pool_max_size_show - Show maximum size of the large memory pages pool. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the pool size. -+ * -+ * This function is called to get the maximum number of large memory pages that the kbdev pool can possibly contain. ++ * kbasep_jit_alloc_validate() - Validate the JIT allocation info. + * -+ * Return: The number of bytes output to @buf. ++ * @kctx: Pointer to the kbase context within which the JIT ++ * allocation is to be validated. ++ * @info: Pointer to struct @base_jit_alloc_info ++ * which is to be validated. ++ * Return: 0 if jit allocation is valid; negative error code otherwise + */ -+static ssize_t lp_mem_pool_max_size_show(struct device *dev, -+ struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *const kbdev = to_kbase_device(dev); -+ -+ if (!kbdev) -+ return -ENODEV; -+ -+ return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, -+ kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, -+ kbase_mem_pool_debugfs_max_size); -+} ++int kbasep_jit_alloc_validate(struct kbase_context *kctx, ++ struct base_jit_alloc_info *info); + +/** -+ * lp_mem_pool_max_size_store - Set maximum size of the large memory pages pool. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The value written to the sysfs file. -+ * @count: The number of bytes written to the sysfs file. ++ * kbase_jit_retry_pending_alloc() - Retry blocked just-in-time memory ++ * allocations. + * -+ * This function is called to set the maximum number of large memory pages that the kbdev pool can possibly contain. ++ * @kctx: Pointer to the kbase context within which the just-in-time ++ * memory allocations are to be retried. ++ */ ++void kbase_jit_retry_pending_alloc(struct kbase_context *kctx); ++ ++/** ++ * kbase_free_user_buffer() - Free memory allocated for struct ++ * @kbase_debug_copy_buffer. + * -+ * Return: @count if the function succeeded. An error code on failure. ++ * @buffer: Pointer to the memory location allocated for the object ++ * of the type struct @kbase_debug_copy_buffer. + */ -+static ssize_t lp_mem_pool_max_size_store(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) ++static inline void kbase_free_user_buffer( ++ struct kbase_debug_copy_buffer *buffer) +{ -+ struct kbase_device *const kbdev = to_kbase_device(dev); -+ int err; ++ struct page **pages = buffer->extres_pages; ++ int nr_pages = buffer->nr_extres_pages; + -+ if (!kbdev) -+ return -ENODEV; ++ if (pages) { ++ int i; + -+ err = kbase_debugfs_helper_set_attr_from_string(buf, -+ kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, -+ kbase_mem_pool_debugfs_set_max_size); ++ for (i = 0; i < nr_pages; i++) { ++ struct page *pg = pages[i]; + -+ return err ? err : count; ++ if (pg) ++ put_page(pg); ++ } ++ kfree(pages); ++ } +} + -+static DEVICE_ATTR_RW(lp_mem_pool_max_size); ++#if !MALI_USE_CSF ++int kbase_process_soft_job(struct kbase_jd_atom *katom); ++int kbase_prepare_soft_job(struct kbase_jd_atom *katom); ++void kbase_finish_soft_job(struct kbase_jd_atom *katom); ++void kbase_cancel_soft_job(struct kbase_jd_atom *katom); ++void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev); ++void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom); ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom); ++#endif ++int kbase_soft_event_update(struct kbase_context *kctx, ++ u64 event, ++ unsigned char new_status); ++ ++void kbasep_soft_job_timeout_worker(struct timer_list *timer); ++void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt); ++#endif /* !MALI_USE_CSF */ ++ ++void kbasep_as_do_poke(struct work_struct *work); + +/** -+ * show_simplified_mem_pool_max_size - Show the maximum size for the memory -+ * pool 0 of small (4KiB) pages. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the max size. ++ * kbase_pm_is_suspending - Check whether a system suspend is in progress, ++ * or has already been suspended + * -+ * This function is called to get the maximum size for the memory pool 0 of -+ * small (4KiB) pages. It is assumed that the maximum size value is same for -+ * all the pools. ++ * @kbdev: The kbase device structure for the device + * -+ * Return: The number of bytes output to @buf. ++ * The caller should ensure that either kbdev->pm.active_count_lock is held, or ++ * a dmb was executed recently (to ensure the value is most ++ * up-to-date). However, without a lock the value could change afterwards. ++ * ++ * Return: ++ * * false if a suspend is not in progress ++ * * !=false otherwise + */ -+static ssize_t show_simplified_mem_pool_max_size(struct device *dev, -+ struct device_attribute *attr, char * const buf) ++static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev) +{ -+ struct kbase_device *const kbdev = to_kbase_device(dev); -+ -+ if (!kbdev) -+ return -ENODEV; -+ -+ return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, -+ kbdev->mem_pools.small, 1, kbase_mem_pool_debugfs_max_size); ++ return kbdev->pm.suspending; +} + -+/** -+ * set_simplified_mem_pool_max_size - Set the same maximum size for all the -+ * memory pools of small (4KiB) pages. -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes written to the sysfs file ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++/* ++ * Check whether a gpu lost is in progress + * -+ * This function is called to set the same maximum size for all the memory -+ * pools of small (4KiB) pages. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * Return: The number of bytes output to @buf. ++ * Indicates whether a gpu lost has been received and jobs are no longer ++ * being scheduled ++ * ++ * Return: false if gpu is lost ++ * Return: != false otherwise + */ -+static ssize_t set_simplified_mem_pool_max_size(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) ++static inline bool kbase_pm_is_gpu_lost(struct kbase_device *kbdev) +{ -+ struct kbase_device *const kbdev = to_kbase_device(dev); -+ unsigned long new_size; -+ int gid; -+ int err; -+ -+ if (!kbdev) -+ return -ENODEV; -+ -+ err = kstrtoul(buf, 0, &new_size); -+ if (err) -+ return -EINVAL; -+ -+ for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) -+ kbase_mem_pool_debugfs_set_max_size( -+ kbdev->mem_pools.small, gid, (size_t)new_size); -+ -+ return count; ++ return (atomic_read(&kbdev->pm.gpu_lost) == 0 ? false : true); +} + -+static DEVICE_ATTR(max_size, 0600, show_simplified_mem_pool_max_size, -+ set_simplified_mem_pool_max_size); -+ -+/** -+ * show_simplified_lp_mem_pool_max_size - Show the maximum size for the memory -+ * pool 0 of large (2MiB) pages. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the total current pool size. ++/* ++ * Set or clear gpu lost state + * -+ * This function is called to get the maximum size for the memory pool 0 of -+ * large (2MiB) pages. It is assumed that the maximum size value is same for -+ * all the pools. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @gpu_lost: true to activate GPU lost state, FALSE is deactive it + * -+ * Return: The number of bytes output to @buf. ++ * Puts power management code into gpu lost state or takes it out of the ++ * state. Once in gpu lost state new GPU jobs will no longer be ++ * scheduled. + */ -+static ssize_t show_simplified_lp_mem_pool_max_size(struct device *dev, -+ struct device_attribute *attr, char * const buf) ++static inline void kbase_pm_set_gpu_lost(struct kbase_device *kbdev, ++ bool gpu_lost) +{ -+ struct kbase_device *const kbdev = to_kbase_device(dev); -+ -+ if (!kbdev) -+ return -ENODEV; ++ const int new_val = (gpu_lost ? 1 : 0); ++ const int cur_val = atomic_xchg(&kbdev->pm.gpu_lost, new_val); + -+ return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, -+ kbdev->mem_pools.large, 1, kbase_mem_pool_debugfs_max_size); ++ if (new_val != cur_val) ++ KBASE_KTRACE_ADD(kbdev, ARB_GPU_LOST, NULL, new_val); +} ++#endif + +/** -+ * set_simplified_lp_mem_pool_max_size - Set the same maximum size for all the -+ * memory pools of large (2MiB) pages. -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes written to the sysfs file ++ * kbase_pm_is_active - Determine whether the GPU is active + * -+ * This function is called to set the same maximum size for all the memory -+ * pools of large (2MiB) pages. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * Return: The number of bytes output to @buf. ++ * This takes into account whether there is an active context reference. ++ * ++ * Return: true if the GPU is active, false otherwise + */ -+static ssize_t set_simplified_lp_mem_pool_max_size(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) ++static inline bool kbase_pm_is_active(struct kbase_device *kbdev) +{ -+ struct kbase_device *const kbdev = to_kbase_device(dev); -+ unsigned long new_size; -+ int gid; -+ int err; -+ -+ if (!kbdev) -+ return -ENODEV; -+ -+ err = kstrtoul(buf, 0, &new_size); -+ if (err) -+ return -EINVAL; -+ -+ for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) -+ kbase_mem_pool_debugfs_set_max_size( -+ kbdev->mem_pools.large, gid, (size_t)new_size); -+ -+ return count; -+} -+ -+static DEVICE_ATTR(lp_max_size, 0600, show_simplified_lp_mem_pool_max_size, -+ set_simplified_lp_mem_pool_max_size); ++ return kbdev->pm.active_count > 0; ++} + +/** -+ * show_simplified_ctx_default_max_size - Show the default maximum size for the -+ * memory pool 0 of small (4KiB) pages. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the pool size. ++ * kbase_pm_lowest_gpu_freq_init() - Find the lowest frequency that the GPU can ++ * run as using the device tree, and save this ++ * within kbdev. ++ * @kbdev: Pointer to kbase device. + * -+ * This function is called to get the default ctx maximum size for the memory -+ * pool 0 of small (4KiB) pages. It is assumed that maximum size value is same -+ * for all the pools. The maximum size for the pool of large (2MiB) pages will -+ * be same as max size of the pool of small (4KiB) pages in terms of bytes. ++ * This function could be called from kbase_clk_rate_trace_manager_init, ++ * but is left separate as it can be called as soon as ++ * dev_pm_opp_of_add_table() has been called to initialize the OPP table, ++ * which occurs in power_control_init(). + * -+ * Return: The number of bytes output to @buf. ++ * Return: 0 in any case. + */ -+static ssize_t show_simplified_ctx_default_max_size(struct device *dev, -+ struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *kbdev = to_kbase_device(dev); -+ size_t max_size; -+ -+ if (!kbdev) -+ return -ENODEV; ++int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev); + -+ max_size = kbase_mem_pool_config_debugfs_max_size( -+ kbdev->mem_pool_defaults.small, 0); ++/** ++ * kbase_pm_metrics_start - Start the utilization metrics timer ++ * @kbdev: Pointer to the kbase device for which to start the utilization ++ * metrics calculation thread. ++ * ++ * Start the timer that drives the metrics calculation, runs the custom DVFS. ++ */ ++void kbase_pm_metrics_start(struct kbase_device *kbdev); + -+ return scnprintf(buf, PAGE_SIZE, "%zu\n", max_size); -+} ++/** ++ * kbase_pm_metrics_stop - Stop the utilization metrics timer ++ * @kbdev: Pointer to the kbase device for which to stop the utilization ++ * metrics calculation thread. ++ * ++ * Stop the timer that drives the metrics calculation, runs the custom DVFS. ++ */ ++void kbase_pm_metrics_stop(struct kbase_device *kbdev); + ++#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) +/** -+ * set_simplified_ctx_default_max_size - Set the same default maximum size for -+ * all the pools created for new -+ * contexts. This covers the pool of -+ * large pages as well and its max size -+ * will be same as max size of the pool -+ * of small pages in terms of bytes. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The value written to the sysfs file. -+ * @count: The number of bytes written to the sysfs file. ++ * kbase_pm_handle_runtime_suspend - Handle the runtime suspend of GPU + * -+ * This function is called to set the same maximum size for all pools created -+ * for new contexts. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * -+ * Return: @count if the function succeeded. An error code on failure. ++ * This function is called from the runtime suspend callback function for ++ * saving the HW state and powering down GPU, if GPU was in sleep state mode. ++ * It does the following steps ++ * - Powers up the L2 cache and re-activates the MCU. ++ * - Suspend the CSGs ++ * - Halts the MCU ++ * - Powers down the L2 cache. ++ * - Invokes the power_off callback to power down the GPU. ++ * ++ * Return: 0 if the GPU was already powered down or no error was encountered ++ * in the power down, otherwise an error code. + */ -+static ssize_t set_simplified_ctx_default_max_size(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ unsigned long new_size; -+ int err; -+ -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; -+ -+ err = kstrtoul(buf, 0, &new_size); -+ if (err) -+ return -EINVAL; -+ -+ kbase_mem_pool_group_config_set_max_size( -+ &kbdev->mem_pool_defaults, (size_t)new_size); -+ -+ return count; -+} ++int kbase_pm_handle_runtime_suspend(struct kbase_device *kbdev); + -+static DEVICE_ATTR(ctx_default_max_size, 0600, -+ show_simplified_ctx_default_max_size, -+ set_simplified_ctx_default_max_size); ++/** ++ * kbase_pm_force_mcu_wakeup_after_sleep - Force the wake up of MCU from sleep ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * This function forces the wake up of MCU from sleep state and wait for ++ * MCU to become active. ++ * It usually gets called from the runtime suspend callback function. ++ * It also gets called from the GPU reset handler or at the time of system ++ * suspend or when User tries to terminate/suspend the on-slot group. ++ * ++ * Note: @gpu_wakeup_override flag that forces the reactivation of MCU is ++ * set by this function and it is the caller's responsibility to ++ * clear the flag. ++ * ++ * Return: 0 if the wake up was successful. ++ */ ++int kbase_pm_force_mcu_wakeup_after_sleep(struct kbase_device *kbdev); ++#endif + +#if !MALI_USE_CSF +/** -+ * js_ctx_scheduling_mode_show - Show callback for js_ctx_scheduling_mode sysfs -+ * entry. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the context scheduling mode information. -+ * -+ * This function is called to get the context scheduling mode being used by JS. ++ * kbase_jd_atom_id - Return the atom's ID, as was originally supplied by userspace in ++ * base_jd_atom::atom_number ++ * @kctx: KBase context pointer ++ * @katom: Atome for which to return ID + * -+ * Return: The number of bytes output to @buf. ++ * Return: the atom's ID. + */ -+static ssize_t js_ctx_scheduling_mode_show(struct device *dev, -+ struct device_attribute *attr, char * const buf) ++static inline int kbase_jd_atom_id(struct kbase_context *kctx, ++ const struct kbase_jd_atom *katom) +{ -+ struct kbase_device *kbdev; ++ int result; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++ KBASE_DEBUG_ASSERT(kctx); ++ KBASE_DEBUG_ASSERT(katom); ++ KBASE_DEBUG_ASSERT(katom->kctx == kctx); + -+ return scnprintf(buf, PAGE_SIZE, "%u\n", kbdev->js_ctx_scheduling_mode); ++ result = katom - &kctx->jctx.atoms[0]; ++ KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT); ++ return result; +} + +/** -+ * js_ctx_scheduling_mode_store - Set callback for js_ctx_scheduling_mode sysfs -+ * entry. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The value written to the sysfs file. -+ * @count: The number of bytes written to the sysfs file. -+ * -+ * This function is called when the js_ctx_scheduling_mode sysfs file is written -+ * to. It checks the data written, and if valid updates the ctx scheduling mode -+ * being by JS. ++ * kbase_jd_atom_from_id - Return the atom structure for the given atom ID ++ * @kctx: Context pointer ++ * @id: ID of atom to retrieve + * -+ * Return: @count if the function succeeded. An error code on failure. ++ * Return: Pointer to struct kbase_jd_atom associated with the supplied ID + */ -+static ssize_t js_ctx_scheduling_mode_store(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) ++static inline struct kbase_jd_atom *kbase_jd_atom_from_id( ++ struct kbase_context *kctx, int id) +{ -+ struct kbase_context *kctx; -+ u32 new_js_ctx_scheduling_mode; -+ struct kbase_device *kbdev; -+ unsigned long flags; -+ int ret; -+ -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; -+ -+ ret = kstrtouint(buf, 0, &new_js_ctx_scheduling_mode); -+ if (ret || new_js_ctx_scheduling_mode >= KBASE_JS_PRIORITY_MODE_COUNT) { -+ dev_err(kbdev->dev, "Couldn't process js_ctx_scheduling_mode" -+ " write operation.\n" -+ "Use format \n"); -+ return -EINVAL; -+ } -+ -+ if (new_js_ctx_scheduling_mode == kbdev->js_ctx_scheduling_mode) -+ return count; -+ -+ mutex_lock(&kbdev->kctx_list_lock); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ -+ /* Update the context priority mode */ -+ kbdev->js_ctx_scheduling_mode = new_js_ctx_scheduling_mode; -+ -+ /* Adjust priority of all the contexts as per the new mode */ -+ list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) -+ kbase_js_update_ctx_priority(kctx); -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->kctx_list_lock); -+ -+ dev_dbg(kbdev->dev, "JS ctx scheduling mode: %u\n", new_js_ctx_scheduling_mode); -+ -+ return count; ++ return &kctx->jctx.atoms[id]; +} -+ -+static DEVICE_ATTR_RW(js_ctx_scheduling_mode); -+ -+/* Number of entries in serialize_jobs_settings[] */ -+#define NR_SERIALIZE_JOBS_SETTINGS 5 -+/* Maximum string length in serialize_jobs_settings[].name */ -+#define MAX_SERIALIZE_JOBS_NAME_LEN 16 -+ -+static struct -+{ -+ char *name; -+ u8 setting; -+} serialize_jobs_settings[NR_SERIALIZE_JOBS_SETTINGS] = { -+ {"none", 0}, -+ {"intra-slot", KBASE_SERIALIZE_INTRA_SLOT}, -+ {"inter-slot", KBASE_SERIALIZE_INTER_SLOT}, -+ {"full", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT}, -+ {"full-reset", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT | -+ KBASE_SERIALIZE_RESET} -+}; ++#endif /* !MALI_USE_CSF */ + +/** -+ * update_serialize_jobs_setting - Update the serialization setting for the -+ * submission of GPU jobs. ++ * kbase_disjoint_init - Initialize the disjoint state + * -+ * @kbdev: An instance of the GPU platform device, allocated from the probe -+ * method of the driver. -+ * @buf: Buffer containing the value written to the sysfs/debugfs file. -+ * @count: The number of bytes to write to the sysfs/debugfs file. ++ * @kbdev: The kbase device + * -+ * This function is called when the serialize_jobs sysfs/debugfs file is -+ * written to. It matches the requested setting against the available settings -+ * and if a matching setting is found updates kbdev->serialize_jobs. ++ * The disjoint event count and state are both set to zero. + * -+ * Return: @count if the function succeeded. An error code on failure. ++ * Disjoint functions usage: ++ * ++ * The disjoint event count should be incremented whenever a disjoint event occurs. ++ * ++ * There are several cases which are regarded as disjoint behavior. Rather than just increment ++ * the counter during disjoint events we also increment the counter when jobs may be affected ++ * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state. ++ * ++ * Disjoint state is entered during GPU reset. Increasing the disjoint state also increases ++ * the count of disjoint events. ++ * ++ * The disjoint state is then used to increase the count of disjoint events during job submission ++ * and job completion. Any atom submitted or completed while the disjoint state is greater than ++ * zero is regarded as a disjoint event. ++ * ++ * The disjoint event counter is also incremented immediately whenever a job is soft stopped ++ * and during context creation. ++ * ++ * Return: 0 on success and non-zero value on failure. + */ -+static ssize_t update_serialize_jobs_setting(struct kbase_device *kbdev, -+ const char *buf, size_t count) -+{ -+ int i; -+ bool valid = false; -+ -+ for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { -+ if (sysfs_streq(serialize_jobs_settings[i].name, buf)) { -+ kbdev->serialize_jobs = -+ serialize_jobs_settings[i].setting; -+ valid = true; -+ break; -+ } -+ } -+ -+ if (!valid) { -+ dev_err(kbdev->dev, "serialize_jobs: invalid setting"); -+ return -EINVAL; -+ } -+ -+ return count; -+} ++void kbase_disjoint_init(struct kbase_device *kbdev); + -+#if IS_ENABLED(CONFIG_DEBUG_FS) +/** -+ * kbasep_serialize_jobs_seq_debugfs_show - Show callback for the serialize_jobs -+ * debugfs file -+ * @sfile: seq_file pointer -+ * @data: Private callback data -+ * -+ * This function is called to get the contents of the serialize_jobs debugfs -+ * file. This is a list of the available settings with the currently active one -+ * surrounded by square brackets. ++ * kbase_disjoint_event - Increase the count of disjoint events ++ * called when a disjoint event has happened + * -+ * Return: 0 on success, or an error code on error ++ * @kbdev: The kbase device + */ -+static int kbasep_serialize_jobs_seq_debugfs_show(struct seq_file *sfile, -+ void *data) -+{ -+ struct kbase_device *kbdev = sfile->private; -+ int i; -+ -+ CSTD_UNUSED(data); -+ -+ for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { -+ if (kbdev->serialize_jobs == serialize_jobs_settings[i].setting) -+ seq_printf(sfile, "[%s] ", -+ serialize_jobs_settings[i].name); -+ else -+ seq_printf(sfile, "%s ", -+ serialize_jobs_settings[i].name); -+ } -+ -+ seq_puts(sfile, "\n"); -+ -+ return 0; -+} ++void kbase_disjoint_event(struct kbase_device *kbdev); + +/** -+ * kbasep_serialize_jobs_debugfs_write - Store callback for the serialize_jobs -+ * debugfs file. -+ * @file: File pointer -+ * @ubuf: User buffer containing data to store -+ * @count: Number of bytes in user buffer -+ * @ppos: File position ++ * kbase_disjoint_event_potential - Increase the count of disjoint events ++ * only if the GPU is in a disjoint state + * -+ * This function is called when the serialize_jobs debugfs file is written to. -+ * It matches the requested setting against the available settings and if a -+ * matching setting is found updates kbdev->serialize_jobs. ++ * @kbdev: The kbase device + * -+ * Return: @count if the function succeeded. An error code on failure. ++ * This should be called when something happens which could be disjoint if the GPU ++ * is in a disjoint state. The state refcount keeps track of this. + */ -+static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file, -+ const char __user *ubuf, size_t count, loff_t *ppos) -+{ -+ struct seq_file *s = file->private_data; -+ struct kbase_device *kbdev = s->private; -+ char buf[MAX_SERIALIZE_JOBS_NAME_LEN]; -+ -+ CSTD_UNUSED(ppos); -+ -+ count = min_t(size_t, sizeof(buf) - 1, count); -+ if (copy_from_user(buf, ubuf, count)) -+ return -EFAULT; -+ -+ buf[count] = 0; -+ -+ return update_serialize_jobs_setting(kbdev, buf, count); -+} ++void kbase_disjoint_event_potential(struct kbase_device *kbdev); + +/** -+ * kbasep_serialize_jobs_debugfs_open - Open callback for the serialize_jobs -+ * debugfs file -+ * @in: inode pointer -+ * @file: file pointer ++ * kbase_disjoint_event_get - Returns the count of disjoint events + * -+ * Return: Zero on success, error code on failure ++ * @kbdev: The kbase device ++ * Return: the count of disjoint events + */ -+static int kbasep_serialize_jobs_debugfs_open(struct inode *in, -+ struct file *file) -+{ -+ return single_open(file, kbasep_serialize_jobs_seq_debugfs_show, -+ in->i_private); -+} -+ -+static const struct file_operations kbasep_serialize_jobs_debugfs_fops = { -+ .owner = THIS_MODULE, -+ .open = kbasep_serialize_jobs_debugfs_open, -+ .read = seq_read, -+ .write = kbasep_serialize_jobs_debugfs_write, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; ++u32 kbase_disjoint_event_get(struct kbase_device *kbdev); + -+#endif /* CONFIG_DEBUG_FS */ ++/** ++ * kbase_disjoint_state_up - Increment the refcount state indicating that ++ * the GPU is in a disjoint state. ++ * ++ * @kbdev: The kbase device ++ * ++ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event) ++ * eventually after the disjoint state has completed @ref kbase_disjoint_state_down ++ * should be called ++ */ ++void kbase_disjoint_state_up(struct kbase_device *kbdev); + +/** -+ * show_serialize_jobs_sysfs - Show callback for serialize_jobs sysfs file. ++ * kbase_disjoint_state_down - Decrement the refcount state + * -+ * @dev: The device this sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The output buffer for the sysfs file contents ++ * @kbdev: The kbase device + * -+ * This function is called to get the contents of the serialize_jobs sysfs -+ * file. This is a list of the available settings with the currently active -+ * one surrounded by square brackets. ++ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event) + * -+ * Return: The number of bytes output to @buf. ++ * Called after @ref kbase_disjoint_state_up once the disjoint state is over + */ -+static ssize_t show_serialize_jobs_sysfs(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct kbase_device *kbdev = to_kbase_device(dev); -+ ssize_t ret = 0; -+ int i; -+ -+ for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { -+ if (kbdev->serialize_jobs == -+ serialize_jobs_settings[i].setting) -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s]", -+ serialize_jobs_settings[i].name); -+ else -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", -+ serialize_jobs_settings[i].name); -+ } -+ -+ if (ret < PAGE_SIZE - 1) { -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); -+ } else { -+ buf[PAGE_SIZE - 2] = '\n'; -+ buf[PAGE_SIZE - 1] = '\0'; -+ ret = PAGE_SIZE - 1; -+ } -+ -+ return ret; -+} ++void kbase_disjoint_state_down(struct kbase_device *kbdev); + +/** -+ * store_serialize_jobs_sysfs - Store callback for serialize_jobs sysfs file. ++ * kbase_device_pcm_dev_init() - Initialize the priority control manager device + * -+ * @dev: The device this sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes to write to the sysfs file ++ * @kbdev: Pointer to the structure for the kbase device + * -+ * This function is called when the serialize_jobs sysfs file is written to. -+ * It matches the requested setting against the available settings and if a -+ * matching setting is found updates kbdev->serialize_jobs. ++ * Pointer to the priority control manager device is retrieved from the device ++ * tree and a reference is taken on the module implementing the callbacks for ++ * priority control manager operations. + * -+ * Return: @count if the function succeeded. An error code on failure. ++ * Return: 0 if successful, or an error code on failure + */ -+static ssize_t store_serialize_jobs_sysfs(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ return update_serialize_jobs_setting(to_kbase_device(dev), buf, count); -+} -+ -+static DEVICE_ATTR(serialize_jobs, 0600, show_serialize_jobs_sysfs, -+ store_serialize_jobs_sysfs); -+#endif /* !MALI_USE_CSF */ ++int kbase_device_pcm_dev_init(struct kbase_device *const kbdev); + -+static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) -+{ -+ struct kbase_device *kbdev = container_of(data, struct kbase_device, -+ protected_mode_hwcnt_disable_work); -+ spinlock_t *backend_lock; -+ unsigned long flags; ++/** ++ * kbase_device_pcm_dev_term() - Performs priority control manager device ++ * deinitialization. ++ * ++ * @kbdev: Pointer to the structure for the kbase device ++ * ++ * Reference is released on the module implementing the callbacks for priority ++ * control manager operations. ++ */ ++void kbase_device_pcm_dev_term(struct kbase_device *const kbdev); + -+ bool do_disable; ++/** ++ * KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD - If a job is soft stopped ++ * and the number of contexts is >= this value it is reported as a disjoint event ++ */ ++#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2 + -+#if MALI_USE_CSF -+ backend_lock = &kbdev->csf.scheduler.interrupt_lock; -+#else -+ backend_lock = &kbdev->hwaccess_lock; ++#if !defined(UINT64_MAX) ++ #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) +#endif + -+ spin_lock_irqsave(backend_lock, flags); -+ do_disable = !kbdev->protected_mode_hwcnt_desired && -+ !kbdev->protected_mode_hwcnt_disabled; -+ spin_unlock_irqrestore(backend_lock, flags); -+ -+ if (!do_disable) -+ return; -+ -+ kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); -+ -+ spin_lock_irqsave(backend_lock, flags); -+ do_disable = !kbdev->protected_mode_hwcnt_desired && -+ !kbdev->protected_mode_hwcnt_disabled; -+ -+ if (do_disable) { -+ /* Protected mode state did not change while we were doing the -+ * disable, so commit the work we just performed and continue -+ * the state machine. -+ */ -+ kbdev->protected_mode_hwcnt_disabled = true; -+#if !MALI_USE_CSF -+ kbase_backend_slot_update(kbdev); -+#endif /* !MALI_USE_CSF */ -+ } else { -+ /* Protected mode state was updated while we were doing the -+ * disable, so we need to undo the disable we just performed. -+ */ -+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); -+ } ++#endif +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c +new file mode 100644 +index 000000000..10dbeee02 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c +@@ -0,0 +1,109 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ spin_unlock_irqrestore(backend_lock, flags); -+} ++#include + -+#ifndef PLATFORM_PROTECTED_CALLBACKS -+static int kbasep_protected_mode_enable(struct protected_mode_device *pdev) -+{ -+ struct kbase_device *kbdev = pdev->data; ++#include ++#include ++#include + -+ return kbase_pm_protected_mode_enable(kbdev); -+} ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++#ifdef CONFIG_MALI_BIFROST_DEBUG + -+static int kbasep_protected_mode_disable(struct protected_mode_device *pdev) ++static int kbase_as_fault_read(struct seq_file *sfile, void *data) +{ -+ struct kbase_device *kbdev = pdev->data; -+ -+ return kbase_pm_protected_mode_disable(kbdev); -+} ++ uintptr_t as_no = (uintptr_t) sfile->private; + -+static const struct protected_mode_ops kbasep_native_protected_ops = { -+ .protected_mode_enable = kbasep_protected_mode_enable, -+ .protected_mode_disable = kbasep_protected_mode_disable -+}; ++ struct list_head *entry; ++ const struct list_head *kbdev_list; ++ struct kbase_device *kbdev = NULL; + -+#define PLATFORM_PROTECTED_CALLBACKS (&kbasep_native_protected_ops) -+#endif /* PLATFORM_PROTECTED_CALLBACKS */ ++ kbdev_list = kbase_device_get_list(); + -+int kbase_protected_mode_init(struct kbase_device *kbdev) -+{ -+ /* Use native protected ops */ -+ kbdev->protected_dev = kzalloc(sizeof(*kbdev->protected_dev), -+ GFP_KERNEL); -+ if (!kbdev->protected_dev) -+ return -ENOMEM; -+ kbdev->protected_dev->data = kbdev; -+ kbdev->protected_ops = PLATFORM_PROTECTED_CALLBACKS; -+ INIT_WORK(&kbdev->protected_mode_hwcnt_disable_work, -+ kbasep_protected_mode_hwcnt_disable_worker); -+ kbdev->protected_mode_hwcnt_desired = true; -+ kbdev->protected_mode_hwcnt_disabled = false; -+ return 0; -+} ++ list_for_each(entry, kbdev_list) { ++ kbdev = list_entry(entry, struct kbase_device, entry); + -+void kbase_protected_mode_term(struct kbase_device *kbdev) -+{ -+ cancel_work_sync(&kbdev->protected_mode_hwcnt_disable_work); -+ kfree(kbdev->protected_dev); -+} ++ if (kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) { + -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+static int kbase_common_reg_map(struct kbase_device *kbdev) -+{ -+ return 0; -+} -+static void kbase_common_reg_unmap(struct kbase_device * const kbdev) -+{ -+} -+#else /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ -+static int kbase_common_reg_map(struct kbase_device *kbdev) -+{ -+ int err = 0; ++ /* don't show this one again until another fault occors */ ++ kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no); + -+ if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) { -+ dev_err(kbdev->dev, "Register window unavailable\n"); -+ err = -EIO; -+ goto out_region; -+ } ++ /* output the last page fault addr */ ++ seq_printf(sfile, "%llu\n", ++ (u64) kbdev->as[as_no].pf_data.addr); ++ } + -+ kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size); -+ if (!kbdev->reg) { -+ dev_err(kbdev->dev, "Can't remap register window\n"); -+ err = -EINVAL; -+ goto out_ioremap; + } + -+ return err; ++ kbase_device_put_list(kbdev_list); + -+out_ioremap: -+ release_mem_region(kbdev->reg_start, kbdev->reg_size); -+out_region: -+ return err; ++ return 0; +} + -+static void kbase_common_reg_unmap(struct kbase_device * const kbdev) ++static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file) +{ -+ if (kbdev->reg) { -+ iounmap(kbdev->reg); -+ release_mem_region(kbdev->reg_start, kbdev->reg_size); -+ kbdev->reg = NULL; -+ kbdev->reg_start = 0; -+ kbdev->reg_size = 0; -+ } ++ return single_open(file, kbase_as_fault_read, in->i_private); +} -+#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ -+ -+int registers_map(struct kbase_device * const kbdev) -+{ -+ /* the first memory resource is the physical address of the GPU -+ * registers. -+ */ -+ struct platform_device *pdev = to_platform_device(kbdev->dev); -+ struct resource *reg_res; -+ int err; -+ -+ reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); -+ if (!reg_res) { -+ dev_err(kbdev->dev, "Invalid register resource\n"); -+ return -ENOENT; -+ } -+ -+ kbdev->reg_start = reg_res->start; -+ kbdev->reg_size = resource_size(reg_res); -+ -+#if MALI_USE_CSF -+ if (kbdev->reg_size < -+ (CSF_HW_DOORBELL_PAGE_OFFSET + -+ CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE)) { -+ dev_err(kbdev->dev, "Insufficient register space, will override to the required size\n"); -+ kbdev->reg_size = CSF_HW_DOORBELL_PAGE_OFFSET + -+ CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE; -+ } -+#endif + -+ err = kbase_common_reg_map(kbdev); -+ if (err) { -+ dev_err(kbdev->dev, "Failed to map registers\n"); -+ return err; -+ } ++static const struct file_operations as_fault_fops = { ++ .owner = THIS_MODULE, ++ .open = kbase_as_fault_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + -+ return 0; -+} ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++#endif /* CONFIG_DEBUG_FS */ + -+void registers_unmap(struct kbase_device *kbdev) ++/* ++ * Initialize debugfs entry for each address space ++ */ ++void kbase_as_fault_debugfs_init(struct kbase_device *kbdev) +{ -+ kbase_common_reg_unmap(kbdev); -+} -+ -+#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ uint i; ++ char as_name[64]; ++ struct dentry *debugfs_directory; + -+static bool kbase_is_pm_enabled(const struct device_node *gpu_node) -+{ -+ const struct device_node *power_model_node; -+ const void *cooling_cells_node; -+ const void *operating_point_node; -+ bool is_pm_enable = false; ++ kbdev->debugfs_as_read_bitmap = 0ULL; + -+ power_model_node = of_get_child_by_name(gpu_node, -+ "power_model"); -+ if (power_model_node) -+ is_pm_enable = true; ++ KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces); ++ KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].pf_data.addr) == sizeof(u64)); + -+ cooling_cells_node = of_get_property(gpu_node, -+ "#cooling-cells", NULL); -+ if (cooling_cells_node) -+ is_pm_enable = true; ++ debugfs_directory = debugfs_create_dir("address_spaces", ++ kbdev->mali_debugfs_directory); + -+ operating_point_node = of_get_property(gpu_node, -+ "operating-points", NULL); -+ if (operating_point_node) -+ is_pm_enable = true; ++ if (IS_ERR_OR_NULL(debugfs_directory)) { ++ dev_warn(kbdev->dev, ++ "unable to create address_spaces debugfs directory"); ++ } else { ++ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { ++ if (likely(scnprintf(as_name, ARRAY_SIZE(as_name), "as%u", i))) ++ debugfs_create_file(as_name, 0444, debugfs_directory, ++ (void *)(uintptr_t)i, &as_fault_fops); ++ } ++ } + -+ return is_pm_enable; ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++#endif /* CONFIG_DEBUG_FS */ +} +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.h +new file mode 100644 +index 000000000..59bbcf6f4 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.h +@@ -0,0 +1,48 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2016, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+static bool kbase_is_pv_enabled(const struct device_node *gpu_node) -+{ -+ const void *arbiter_if_node; -+ -+ arbiter_if_node = of_get_property(gpu_node, -+ "arbiter_if", NULL); ++#ifndef _KBASE_AS_FAULT_DEBUG_FS_H ++#define _KBASE_AS_FAULT_DEBUG_FS_H + -+ return arbiter_if_node ? true : false; -+} ++/** ++ * kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults ++ * ++ * @kbdev: Pointer to kbase_device ++ */ ++void kbase_as_fault_debugfs_init(struct kbase_device *kbdev); + -+static bool kbase_is_full_coherency_enabled(const struct device_node *gpu_node) ++/** ++ * kbase_as_fault_debugfs_new() - make the last fault available on debugfs ++ * ++ * @kbdev: Pointer to kbase_device ++ * @as_no: The address space the fault occurred on ++ */ ++static inline void ++kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no) +{ -+ const void *coherency_dts; -+ u32 coherency; -+ -+ coherency_dts = of_get_property(gpu_node, -+ "system-coherency", -+ NULL); -+ if (coherency_dts) { -+ coherency = be32_to_cpup(coherency_dts); -+ if (coherency == COHERENCY_ACE) -+ return true; -+ } -+ return false; ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ kbdev->debugfs_as_read_bitmap |= (1ULL << as_no); ++#endif /* CONFIG_DEBUG_FS */ ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ +} + -+#endif /* CONFIG_MALI_ARBITER_SUPPORT && CONFIG_OF */ -+ -+int kbase_device_pm_init(struct kbase_device *kbdev) -+{ -+ int err = 0; -+ -+#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) -+ -+ u32 gpu_id; -+ u32 product_id; -+ u32 gpu_model_id; -+ -+ if (kbase_is_pv_enabled(kbdev->dev->of_node)) { -+ dev_info(kbdev->dev, "Arbitration interface enabled\n"); -+ if (kbase_is_pm_enabled(kbdev->dev->of_node)) { -+ /* Arbitration AND power management invalid */ -+ dev_err(kbdev->dev, "Invalid combination of arbitration AND power management\n"); -+ return -EPERM; -+ } -+ if (kbase_is_full_coherency_enabled(kbdev->dev->of_node)) { -+ /* Arbitration AND full coherency invalid */ -+ dev_err(kbdev->dev, "Invalid combination of arbitration AND full coherency\n"); -+ return -EPERM; -+ } -+ err = kbase_arbiter_pm_early_init(kbdev); -+ if (err == 0) { -+ /* Check if Arbitration is running on -+ * supported GPU platform -+ */ -+ kbase_pm_register_access_enable(kbdev); -+ gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); -+ kbase_pm_register_access_disable(kbdev); -+ product_id = -+ KBASE_UBFX32(gpu_id, KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, 16); -+ gpu_model_id = GPU_ID2_MODEL_MATCH_VALUE(product_id); ++#endif /*_KBASE_AS_FAULT_DEBUG_FS_H*/ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_bits.h b/drivers/gpu/arm/bifrost/mali_kbase_bits.h +new file mode 100644 +index 000000000..a085fd86c +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_bits.h +@@ -0,0 +1,31 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ if (gpu_model_id != GPU_ID2_PRODUCT_TGOX -+ && gpu_model_id != GPU_ID2_PRODUCT_TNOX -+ && gpu_model_id != GPU_ID2_PRODUCT_TBAX) { -+ kbase_arbiter_pm_early_term(kbdev); -+ dev_err(kbdev->dev, "GPU platform not suitable for arbitration\n"); -+ return -EPERM; -+ } -+ } -+ } else { -+ kbdev->arb.arb_if = NULL; -+ kbdev->arb.arb_dev = NULL; -+ err = power_control_init(kbdev); -+ } -+#else -+ err = power_control_init(kbdev); -+#endif /* CONFIG_MALI_ARBITER_SUPPORT && CONFIG_OF */ -+ return err; -+} ++#ifndef _KBASE_BITS_H_ ++#define _KBASE_BITS_H_ + -+void kbase_device_pm_term(struct kbase_device *kbdev) -+{ -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+#if IS_ENABLED(CONFIG_OF) -+ if (kbase_is_pv_enabled(kbdev->dev->of_node)) -+ kbase_arbiter_pm_early_term(kbdev); -+ else -+ power_control_term(kbdev); -+#endif /* CONFIG_OF */ ++#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE) ++#include +#else -+ power_control_term(kbdev); ++#include +#endif -+} -+ -+int power_control_init(struct kbase_device *kbdev) -+{ -+#ifndef CONFIG_OF -+ /* Power control initialization requires at least the capability to get -+ * regulators and clocks from the device tree, as well as parsing -+ * arrays of unsigned integer values. -+ * -+ * The whole initialization process shall simply be skipped if the -+ * minimum capability is not available. -+ */ -+ return 0; -+#else -+ struct platform_device *pdev; -+ int err = 0; -+ unsigned int i; -+#if defined(CONFIG_REGULATOR) -+ static const char * const regulator_names[] = { -+ "mali", "mem" -+ }; -+#endif /* CONFIG_REGULATOR */ -+ -+ if (!kbdev) -+ return -ENODEV; -+ -+ pdev = to_platform_device(kbdev->dev); + -+#if defined(CONFIG_REGULATOR) -+ /* Since the error code EPROBE_DEFER causes the entire probing -+ * procedure to be restarted from scratch at a later time, -+ * all regulators will be released before returning. -+ * -+ * Any other error is ignored and the driver will continue -+ * operating with a partial initialization of regulators. -+ */ -+ for (i = 0; i < ARRAY_SIZE(regulator_names); i++) { -+ kbdev->regulators[i] = regulator_get_optional(kbdev->dev, -+ regulator_names[i]); -+ if (IS_ERR(kbdev->regulators[i])) { -+ err = PTR_ERR(kbdev->regulators[i]); -+ kbdev->regulators[i] = NULL; -+ break; -+ } -+ } -+ if (err == -EPROBE_DEFER) { -+ while (i > 0) -+ regulator_put(kbdev->regulators[--i]); -+ return err; -+ } ++#endif /* _KBASE_BITS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.c b/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.c +new file mode 100644 +index 000000000..4675025ba +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.c +@@ -0,0 +1,64 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ kbdev->nr_regulators = i; -+ dev_dbg(&pdev->dev, "Regulators probed: %u\n", kbdev->nr_regulators); -+#endif ++/* ++ * Cache Policy API. ++ */ + -+ /* Having more clocks than regulators is acceptable, while the -+ * opposite shall not happen. -+ * -+ * Since the error code EPROBE_DEFER causes the entire probing -+ * procedure to be restarted from scratch at a later time, -+ * all clocks and regulators will be released before returning. -+ * -+ * Any other error is ignored and the driver will continue -+ * operating with a partial initialization of clocks. -+ */ -+ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { -+ kbdev->clocks[i] = of_clk_get(kbdev->dev->of_node, i); -+ if (IS_ERR(kbdev->clocks[i])) { -+ err = PTR_ERR(kbdev->clocks[i]); -+ kbdev->clocks[i] = NULL; -+ break; -+ } ++#include "mali_kbase_cache_policy.h" + -+ err = clk_prepare(kbdev->clocks[i]); -+ if (err) { -+ dev_err(kbdev->dev, -+ "Failed to prepare and enable clock (%d)\n", -+ err); -+ clk_put(kbdev->clocks[i]); -+ break; -+ } -+ } -+ if (err == -EPROBE_DEFER) { -+ while (i > 0) { -+ clk_disable_unprepare(kbdev->clocks[--i]); -+ clk_put(kbdev->clocks[i]); -+ } -+ goto clocks_probe_defer; -+ } ++/* ++ * The output flags should be a combination of the following values: ++ * KBASE_REG_CPU_CACHED: CPU cache should be enabled ++ * KBASE_REG_GPU_CACHED: GPU cache should be enabled ++ * ++ * NOTE: Some components within the GPU might only be able to access memory ++ * that is KBASE_REG_GPU_CACHED. Refer to the specific GPU implementation for ++ * more details. ++ */ ++u32 kbase_cache_enabled(u32 flags, u32 nr_pages) ++{ ++ u32 cache_flags = 0; + -+ kbdev->nr_clocks = i; -+ dev_dbg(&pdev->dev, "Clocks probed: %u\n", kbdev->nr_clocks); ++ CSTD_UNUSED(nr_pages); + -+ /* Any error in parsing the OPP table from the device file -+ * shall be ignored. The fact that the table may be absent or wrong -+ * on the device tree of the platform shouldn't prevent the driver -+ * from completing its initialization. -+ */ -+#if defined(CONFIG_PM_OPP) -+#ifdef CONFIG_ARCH_ROCKCHIP -+ err = kbase_platform_rk_init_opp_table(kbdev); -+ if (err) -+ dev_err(kbdev->dev, "Failed to init_opp_table (%d)\n", err); -+#else -+ err = dev_pm_opp_of_add_table(kbdev->dev); -+ CSTD_UNUSED(err); -+#endif -+#endif /* CONFIG_PM_OPP */ -+ return 0; ++ if (!(flags & BASE_MEM_UNCACHED_GPU)) ++ cache_flags |= KBASE_REG_GPU_CACHED; + -+#if defined(CONFIG_PM_OPP) && \ -+ ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && defined(CONFIG_REGULATOR)) -+ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { -+ if (kbdev->clocks[i]) { -+ if (__clk_is_enabled(kbdev->clocks[i])) -+ clk_disable_unprepare(kbdev->clocks[i]); -+ clk_put(kbdev->clocks[i]); -+ kbdev->clocks[i] = NULL; -+ } else -+ break; -+ } -+#endif ++ if (flags & BASE_MEM_CACHED_CPU) ++ cache_flags |= KBASE_REG_CPU_CACHED; + -+clocks_probe_defer: -+#if defined(CONFIG_REGULATOR) -+ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) -+ regulator_put(kbdev->regulators[i]); -+#endif -+ return err; -+#endif /* CONFIG_OF */ ++ return cache_flags; +} + -+void power_control_term(struct kbase_device *kbdev) ++void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, size_t size, ++ enum dma_data_direction dir) +{ -+ unsigned int i; -+ -+#if defined(CONFIG_PM_OPP) -+#ifdef CONFIG_ARCH_ROCKCHIP -+ kbase_platform_rk_uninit_opp_table(kbdev); -+#else -+ dev_pm_opp_of_remove_table(kbdev->dev); -+#endif -+#endif /* CONFIG_PM_OPP */ -+ -+ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { -+ if (kbdev->clocks[i]) { -+ clk_unprepare(kbdev->clocks[i]); -+ clk_put(kbdev->clocks[i]); -+ kbdev->clocks[i] = NULL; -+ } else -+ break; -+ } -+ -+#if defined(CONFIG_OF) && defined(CONFIG_REGULATOR) -+ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { -+ if (kbdev->regulators[i]) { -+ regulator_put(kbdev->regulators[i]); -+ kbdev->regulators[i] = NULL; -+ } -+ } -+#endif ++ dma_sync_single_for_device(kbdev->dev, handle, size, dir); +} ++KBASE_EXPORT_TEST_API(kbase_sync_single_for_device); + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ -+static void trigger_reset(struct kbase_device *kbdev) ++void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size, ++ enum dma_data_direction dir) +{ -+ kbase_pm_context_active(kbdev); -+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) -+ kbase_reset_gpu(kbdev); -+ kbase_pm_context_idle(kbdev); ++ dma_sync_single_for_cpu(kbdev->dev, handle, size, dir); +} -+ -+#define MAKE_QUIRK_ACCESSORS(type) \ -+static int type##_quirks_set(void *data, u64 val) \ -+{ \ -+ struct kbase_device *kbdev; \ -+ kbdev = (struct kbase_device *)data; \ -+ kbdev->hw_quirks_##type = (u32)val; \ -+ trigger_reset(kbdev); \ -+ return 0; \ -+} \ -+\ -+static int type##_quirks_get(void *data, u64 *val) \ -+{ \ -+ struct kbase_device *kbdev; \ -+ kbdev = (struct kbase_device *)data; \ -+ *val = kbdev->hw_quirks_##type; \ -+ return 0; \ -+} \ -+DEFINE_DEBUGFS_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get, \ -+ type##_quirks_set, "%llu\n") -+ -+MAKE_QUIRK_ACCESSORS(sc); -+MAKE_QUIRK_ACCESSORS(tiler); -+MAKE_QUIRK_ACCESSORS(mmu); -+MAKE_QUIRK_ACCESSORS(gpu); -+ -+/** -+ * kbase_device_debugfs_reset_write() - Reset the GPU ++KBASE_EXPORT_TEST_API(kbase_sync_single_for_cpu); +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.h b/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.h +new file mode 100644 +index 000000000..598d24538 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.h +@@ -0,0 +1,47 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * @data: Pointer to the Kbase device. -+ * @wait_for_reset: Value written to the file. ++ * (C) COPYRIGHT 2012-2013, 2015, 2020-2021 ARM Limited. All rights reserved. + * -+ * This function will perform the GPU reset, and if the value written to -+ * the file is 1 it will also wait for the reset to complete. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: 0 in case of no error otherwise a negative value. + */ -+static int kbase_device_debugfs_reset_write(void *data, u64 wait_for_reset) -+{ -+ struct kbase_device *kbdev = data; -+ -+ trigger_reset(kbdev); + -+ if (wait_for_reset == 1) -+ return kbase_reset_gpu_wait(kbdev); ++/* ++ * Cache Policy API. ++ */ + -+ return 0; -+} ++#ifndef _KBASE_CACHE_POLICY_H_ ++#define _KBASE_CACHE_POLICY_H_ + -+DEFINE_DEBUGFS_ATTRIBUTE(fops_trigger_reset, NULL, &kbase_device_debugfs_reset_write, "%llu\n"); ++#include "mali_kbase.h" ++#include + +/** -+ * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read -+ * @file: File object to read is for -+ * @buf: User buffer to populate with data -+ * @len: Length of user buffer -+ * @ppos: Offset within file object ++ * kbase_cache_enabled - Choose the cache policy for a specific region ++ * @flags: flags describing attributes of the region ++ * @nr_pages: total number of pages (backed or not) for the region + * -+ * Retrieves the current status of protected debug mode -+ * (0 = disabled, 1 = enabled) ++ * Tells whether the CPU and GPU caches should be enabled or not for a specific ++ * region. ++ * This function can be modified to customize the cache policy depending on the ++ * flags and size of the region. + * -+ * Return: Number of bytes added to user buffer ++ * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED ++ * depending on the cache policy + */ -+static ssize_t debugfs_protected_debug_mode_read(struct file *file, -+ char __user *buf, size_t len, loff_t *ppos) -+{ -+ struct kbase_device *kbdev = (struct kbase_device *)file->private_data; -+ u32 gpu_status; -+ ssize_t ret_val; ++u32 kbase_cache_enabled(u32 flags, u32 nr_pages); + -+ kbase_pm_context_active(kbdev); -+ gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)); -+ kbase_pm_context_idle(kbdev); ++#endif /* _KBASE_CACHE_POLICY_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_caps.h b/drivers/gpu/arm/bifrost/mali_kbase_caps.h +new file mode 100644 +index 000000000..a92569d31 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_caps.h +@@ -0,0 +1,70 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ if (gpu_status & GPU_DBGEN) -+ ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2); -+ else -+ ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2); ++/** ++ * DOC: Driver Capability Queries. ++ */ + -+ return ret_val; -+} ++#ifndef _KBASE_CAPS_H_ ++#define _KBASE_CAPS_H_ + -+/* -+ * struct fops_protected_debug_mode - "protected_debug_mode" debugfs fops ++#include ++ ++/** ++ * enum mali_kbase_cap - Enumeration for kbase capability + * -+ * Contains the file operations for the "protected_debug_mode" debugfs file ++ * @MALI_KBASE_CAP_SYSTEM_MONITOR: System Monitor ++ * @MALI_KBASE_CAP_JIT_PRESSURE_LIMIT: JIT Pressure limit ++ * @MALI_KBASE_CAP_MEM_GROW_ON_GPF: Memory grow on page fault ++ * @MALI_KBASE_CAP_MEM_PROTECTED: Protected memory ++ * @MALI_KBASE_NUM_CAPS: Delimiter + */ -+static const struct file_operations fops_protected_debug_mode = { -+ .owner = THIS_MODULE, -+ .open = simple_open, -+ .read = debugfs_protected_debug_mode_read, -+ .llseek = default_llseek, ++enum mali_kbase_cap { ++ MALI_KBASE_CAP_SYSTEM_MONITOR = 0, ++ MALI_KBASE_CAP_JIT_PRESSURE_LIMIT, ++ MALI_KBASE_CAP_MEM_GROW_ON_GPF, ++ MALI_KBASE_CAP_MEM_PROTECTED, ++ MALI_KBASE_NUM_CAPS +}; + -+static int kbase_device_debugfs_mem_pool_max_size_show(struct seq_file *sfile, -+ void *data) ++extern bool mali_kbase_supports_cap(unsigned long api_version, enum mali_kbase_cap cap); ++ ++static inline bool mali_kbase_supports_system_monitor(unsigned long api_version) +{ -+ CSTD_UNUSED(data); -+ return kbase_debugfs_helper_seq_read(sfile, -+ MEMORY_GROUP_MANAGER_NR_GROUPS, -+ kbase_mem_pool_config_debugfs_max_size); ++ return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_SYSTEM_MONITOR); +} + -+static ssize_t kbase_device_debugfs_mem_pool_max_size_write(struct file *file, -+ const char __user *ubuf, size_t count, loff_t *ppos) ++static inline bool mali_kbase_supports_jit_pressure_limit(unsigned long api_version) +{ -+ int err = 0; -+ -+ CSTD_UNUSED(ppos); -+ err = kbase_debugfs_helper_seq_write(file, ubuf, count, -+ MEMORY_GROUP_MANAGER_NR_GROUPS, -+ kbase_mem_pool_config_debugfs_set_max_size); -+ -+ return err ? err : count; ++ return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_JIT_PRESSURE_LIMIT); +} + -+static int kbase_device_debugfs_mem_pool_max_size_open(struct inode *in, -+ struct file *file) ++static inline bool mali_kbase_supports_mem_grow_on_gpf(unsigned long api_version) +{ -+ return single_open(file, kbase_device_debugfs_mem_pool_max_size_show, -+ in->i_private); ++ return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_GROW_ON_GPF); +} + -+static const struct file_operations -+ kbase_device_debugfs_mem_pool_max_size_fops = { -+ .owner = THIS_MODULE, -+ .open = kbase_device_debugfs_mem_pool_max_size_open, -+ .read = seq_read, -+ .write = kbase_device_debugfs_mem_pool_max_size_write, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; -+ -+/** -+ * debugfs_ctx_defaults_init - Create the default configuration of new contexts in debugfs -+ * @kbdev: An instance of the GPU platform device, allocated from the probe method of the driver. -+ * Return: A pointer to the last dentry that it tried to create, whether successful or not. -+ * Could be NULL or encode another error value. -+ */ -+static struct dentry *debugfs_ctx_defaults_init(struct kbase_device *const kbdev) ++static inline bool mali_kbase_supports_mem_protected(unsigned long api_version) +{ -+ /* prevent unprivileged use of debug file system -+ * in old kernel version -+ */ -+ const mode_t mode = 0644; -+ struct dentry *dentry = debugfs_create_dir("defaults", kbdev->debugfs_ctx_directory); -+ struct dentry *debugfs_ctx_defaults_directory = dentry; -+ -+ if (IS_ERR_OR_NULL(dentry)) { -+ dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n"); -+ return dentry; -+ } -+ -+ debugfs_create_bool("infinite_cache", mode, -+ debugfs_ctx_defaults_directory, -+ &kbdev->infinite_cache_active_default); -+ -+ dentry = debugfs_create_file("mem_pool_max_size", mode, debugfs_ctx_defaults_directory, -+ &kbdev->mem_pool_defaults.small, -+ &kbase_device_debugfs_mem_pool_max_size_fops); -+ if (IS_ERR_OR_NULL(dentry)) { -+ dev_err(kbdev->dev, "Unable to create mem_pool_max_size debugfs entry\n"); -+ return dentry; -+ } -+ -+ dentry = debugfs_create_file("lp_mem_pool_max_size", mode, debugfs_ctx_defaults_directory, -+ &kbdev->mem_pool_defaults.large, -+ &kbase_device_debugfs_mem_pool_max_size_fops); -+ if (IS_ERR_OR_NULL(dentry)) -+ dev_err(kbdev->dev, "Unable to create lp_mem_pool_max_size debugfs entry\n"); -+ -+ return dentry; ++ return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_PROTECTED); +} + -+/** -+ * init_debugfs - Create device-wide debugfs directories and files for the Mali driver -+ * @kbdev: An instance of the GPU platform device, allocated from the probe method of the driver. -+ * Return: A pointer to the last dentry that it tried to create, whether successful or not. -+ * Could be NULL or encode another error value. ++#endif /* __KBASE_CAPS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ccswe.c b/drivers/gpu/arm/bifrost/mali_kbase_ccswe.c +new file mode 100644 +index 000000000..6a1e7e4c5 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_ccswe.c +@@ -0,0 +1,100 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * + */ -+static struct dentry *init_debugfs(struct kbase_device *kbdev) -+{ -+ struct dentry *dentry = debugfs_create_dir(kbdev->devname, NULL); -+ -+ kbdev->mali_debugfs_directory = dentry; -+ if (IS_ERR_OR_NULL(dentry)) { -+ dev_err(kbdev->dev, -+ "Couldn't create mali debugfs directory: %s\n", -+ kbdev->devname); -+ return dentry; -+ } -+ -+ dentry = debugfs_create_dir("ctx", kbdev->mali_debugfs_directory); -+ kbdev->debugfs_ctx_directory = dentry; -+ if (IS_ERR_OR_NULL(dentry)) { -+ dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n"); -+ return dentry; -+ } -+ -+ dentry = debugfs_create_dir("instrumentation", kbdev->mali_debugfs_directory); -+ kbdev->debugfs_instr_directory = dentry; -+ if (IS_ERR_OR_NULL(dentry)) { -+ dev_err(kbdev->dev, "Couldn't create mali debugfs instrumentation directory\n"); -+ return dentry; -+ } -+ -+ kbasep_regs_history_debugfs_init(kbdev); -+ -+#if MALI_USE_CSF -+ kbase_debug_csf_fault_debugfs_init(kbdev); -+#else /* MALI_USE_CSF */ -+ kbase_debug_job_fault_debugfs_init(kbdev); -+#endif /* !MALI_USE_CSF */ -+ -+ kbasep_gpu_memory_debugfs_init(kbdev); -+ kbase_as_fault_debugfs_init(kbdev); -+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS -+ kbase_instr_backend_debugfs_init(kbdev); -+#endif -+ kbase_pbha_debugfs_init(kbdev); -+ -+ /* fops_* variables created by invocations of macro -+ * MAKE_QUIRK_ACCESSORS() above. -+ */ -+ dentry = debugfs_create_file("quirks_sc", 0644, -+ kbdev->mali_debugfs_directory, kbdev, -+ &fops_sc_quirks); -+ if (IS_ERR_OR_NULL(dentry)) { -+ dev_err(kbdev->dev, "Unable to create quirks_sc debugfs entry\n"); -+ return dentry; -+ } -+ -+ dentry = debugfs_create_file("quirks_tiler", 0644, -+ kbdev->mali_debugfs_directory, kbdev, -+ &fops_tiler_quirks); -+ if (IS_ERR_OR_NULL(dentry)) { -+ dev_err(kbdev->dev, "Unable to create quirks_tiler debugfs entry\n"); -+ return dentry; -+ } -+ -+ dentry = debugfs_create_file("quirks_mmu", 0644, -+ kbdev->mali_debugfs_directory, kbdev, -+ &fops_mmu_quirks); -+ if (IS_ERR_OR_NULL(dentry)) { -+ dev_err(kbdev->dev, "Unable to create quirks_mmu debugfs entry\n"); -+ return dentry; -+ } -+ -+ dentry = debugfs_create_file("quirks_gpu", 0644, kbdev->mali_debugfs_directory, -+ kbdev, &fops_gpu_quirks); -+ if (IS_ERR_OR_NULL(dentry)) { -+ dev_err(kbdev->dev, "Unable to create quirks_gpu debugfs entry\n"); -+ return dentry; -+ } -+ -+ dentry = debugfs_ctx_defaults_init(kbdev); -+ if (IS_ERR_OR_NULL(dentry)) -+ return dentry; + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { -+ dentry = debugfs_create_file("protected_debug_mode", 0444, -+ kbdev->mali_debugfs_directory, kbdev, -+ &fops_protected_debug_mode); -+ if (IS_ERR_OR_NULL(dentry)) { -+ dev_err(kbdev->dev, "Unable to create protected_debug_mode debugfs entry\n"); -+ return dentry; -+ } -+ } ++#include "mali_kbase_ccswe.h" ++#include "mali_kbase_linux.h" + -+ dentry = debugfs_create_file("reset", 0644, -+ kbdev->mali_debugfs_directory, kbdev, -+ &fops_trigger_reset); -+ if (IS_ERR_OR_NULL(dentry)) { -+ dev_err(kbdev->dev, "Unable to create reset debugfs entry\n"); -+ return dentry; -+ } ++#include ++#include + -+ kbase_ktrace_debugfs_init(kbdev); ++static u64 kbasep_ccswe_cycle_at_no_lock( ++ struct kbase_ccswe *self, u64 timestamp_ns) ++{ ++ s64 diff_s, diff_ns; ++ u32 gpu_freq; + -+#ifdef CONFIG_MALI_BIFROST_DEVFREQ -+#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) -+ if (kbdev->devfreq && kbdev->devfreq_cooling) -+ kbase_ipa_debugfs_init(kbdev); -+#endif /* CONFIG_DEVFREQ_THERMAL */ -+#endif /* CONFIG_MALI_BIFROST_DEVFREQ */ ++ lockdep_assert_held(&self->access); + -+#if !MALI_USE_CSF -+ dentry = debugfs_create_file("serialize_jobs", 0644, -+ kbdev->mali_debugfs_directory, kbdev, -+ &kbasep_serialize_jobs_debugfs_fops); -+ if (IS_ERR_OR_NULL(dentry)) { -+ dev_err(kbdev->dev, "Unable to create serialize_jobs debugfs entry\n"); -+ return dentry; -+ } -+ kbase_timeline_io_debugfs_init(kbdev); -+#endif -+ kbase_dvfs_status_debugfs_init(kbdev); ++ diff_ns = timestamp_ns - self->timestamp_ns; ++ gpu_freq = diff_ns > 0 ? self->gpu_freq : self->prev_gpu_freq; + ++ diff_s = div_s64(diff_ns, NSEC_PER_SEC); ++ diff_ns -= diff_s * NSEC_PER_SEC; + -+ return dentry; ++ return self->cycles_elapsed + diff_s * gpu_freq ++ + div_s64(diff_ns * gpu_freq, NSEC_PER_SEC); +} + -+int kbase_device_debugfs_init(struct kbase_device *kbdev) ++void kbase_ccswe_init(struct kbase_ccswe *self) +{ -+ struct dentry *dentry = init_debugfs(kbdev); -+ -+ if (IS_ERR_OR_NULL(dentry)) { -+ debugfs_remove_recursive(kbdev->mali_debugfs_directory); -+ return IS_ERR(dentry) ? PTR_ERR(dentry) : -ENOMEM; -+ } -+ return 0; -+} ++ memset(self, 0, sizeof(*self)); + -+void kbase_device_debugfs_term(struct kbase_device *kbdev) -+{ -+ debugfs_remove_recursive(kbdev->mali_debugfs_directory); ++ spin_lock_init(&self->access); +} -+#endif /* CONFIG_DEBUG_FS */ + -+int kbase_device_coherency_init(struct kbase_device *kbdev) ++u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns) +{ -+#if IS_ENABLED(CONFIG_OF) -+ u32 supported_coherency_bitmap = -+ kbdev->gpu_props.props.raw_props.coherency_mode; -+ const void *coherency_override_dts; -+ bool dma_coherent; -+ u32 override_coherency, gpu_id; -+ unsigned int prod_id; -+ -+ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; -+ gpu_id &= GPU_ID_VERSION_PRODUCT_ID; -+ prod_id = gpu_id >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++ unsigned long flags; ++ u64 result; + -+ /* Only for tMIx : -+ * (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly -+ * documented for tMIx so force correct value here. -+ */ -+ if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == -+ GPU_ID2_PRODUCT_TMIX) -+ if (supported_coherency_bitmap == -+ COHERENCY_FEATURE_BIT(COHERENCY_ACE)) -+ supported_coherency_bitmap |= -+ COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); ++ spin_lock_irqsave(&self->access, flags); ++ result = kbasep_ccswe_cycle_at_no_lock(self, timestamp_ns); ++ spin_unlock_irqrestore(&self->access, flags); + -+#endif /* CONFIG_OF */ ++ return result; ++} + -+ kbdev->system_coherency = COHERENCY_NONE; ++void kbase_ccswe_freq_change( ++ struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq) ++{ ++ unsigned long flags; + -+ /* device tree may override the coherency */ -+#if IS_ENABLED(CONFIG_OF) -+ /* treat "dma-coherency" as a synonym for ACE-lite */ -+ dma_coherent = of_dma_is_coherent(kbdev->dev->of_node); -+ coherency_override_dts = of_get_property(kbdev->dev->of_node, -+ "system-coherency", -+ NULL); -+ if (coherency_override_dts || dma_coherent) { -+ if (coherency_override_dts) { -+ override_coherency = be32_to_cpup(coherency_override_dts); -+ if (dma_coherent && override_coherency != COHERENCY_ACE_LITE) { -+ dev_err(kbdev->dev, -+ "system-coherency needs to be 0 when dma-coherent is set\n"); -+ return -EINVAL; -+ } -+ } else { -+ /* dma-coherent set and system-coherency not specified */ -+ override_coherency = COHERENCY_ACE_LITE; -+ } ++ spin_lock_irqsave(&self->access, flags); + -+#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ /* ACE coherency mode is not supported by Driver on CSF GPUs. -+ * Return an error to signal the invalid device tree configuration. -+ */ -+ if (override_coherency == COHERENCY_ACE) { -+ dev_err(kbdev->dev, -+ "ACE coherency not supported, wrong DT configuration"); -+ return -EINVAL; -+ } -+#endif ++ /* The time must go only forward. */ ++ if (WARN_ON(timestamp_ns < self->timestamp_ns)) ++ goto exit; + -+ if ((override_coherency <= COHERENCY_NONE) && -+ (supported_coherency_bitmap & -+ COHERENCY_FEATURE_BIT(override_coherency))) { ++ /* If this is the first frequency change, cycles_elapsed is zero. */ ++ if (self->timestamp_ns) ++ self->cycles_elapsed = kbasep_ccswe_cycle_at_no_lock( ++ self, timestamp_ns); + -+ kbdev->system_coherency = override_coherency; ++ self->timestamp_ns = timestamp_ns; ++ self->prev_gpu_freq = self->gpu_freq; ++ self->gpu_freq = gpu_freq; ++exit: ++ spin_unlock_irqrestore(&self->access, flags); ++} + -+ dev_info(kbdev->dev, -+ "Using coherency mode %u set from dtb", -+ override_coherency); -+ } else -+ dev_warn(kbdev->dev, -+ "Ignoring unsupported coherency mode %u set from dtb", -+ override_coherency); -+ } ++void kbase_ccswe_reset(struct kbase_ccswe *self) ++{ ++ unsigned long flags; + -+#endif /* CONFIG_OF */ ++ spin_lock_irqsave(&self->access, flags); + -+ kbdev->gpu_props.props.raw_props.coherency_mode = -+ kbdev->system_coherency; ++ self->timestamp_ns = 0; ++ self->cycles_elapsed = 0; ++ self->gpu_freq = 0; ++ self->prev_gpu_freq = 0; + -+ return 0; ++ spin_unlock_irqrestore(&self->access, flags); +} -+ -+ -+#if MALI_USE_CSF -+/** -+ * csg_scheduling_period_store - Store callback for the csg_scheduling_period -+ * sysfs file. -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes written to the sysfs file +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ccswe.h b/drivers/gpu/arm/bifrost/mali_kbase_ccswe.h +new file mode 100644 +index 000000000..f7fcf7780 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_ccswe.h +@@ -0,0 +1,95 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * This function is called when the csg_scheduling_period sysfs file is written -+ * to. It checks the data written, and if valid updates the reset timeout. ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: @count if the function succeeded. An error code on failure. + */ -+static ssize_t csg_scheduling_period_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ int ret; -+ unsigned int csg_scheduling_period; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++#ifndef _KBASE_CCSWE_H_ ++#define _KBASE_CCSWE_H_ + -+ ret = kstrtouint(buf, 0, &csg_scheduling_period); -+ if (ret || csg_scheduling_period == 0) { -+ dev_err(kbdev->dev, -+ "Couldn't process csg_scheduling_period write operation.\n" -+ "Use format 'csg_scheduling_period_ms', and csg_scheduling_period_ms > 0\n"); -+ return -EINVAL; -+ } ++#include + -+ kbase_csf_scheduler_lock(kbdev); -+ kbdev->csf.scheduler.csg_scheduling_period_ms = csg_scheduling_period; -+ dev_dbg(kbdev->dev, "CSG scheduling period: %ums\n", -+ csg_scheduling_period); -+ kbase_csf_scheduler_unlock(kbdev); ++/** ++ * struct kbase_ccswe - Cycle count software estimator. ++ * ++ * @access: Spinlock protecting this structure access. ++ * @timestamp_ns: Timestamp(ns) when the last frequency change ++ * occurred. ++ * @cycles_elapsed: Number of cycles elapsed before the last frequency ++ * change ++ * @gpu_freq: Current GPU frequency(Hz) value. ++ * @prev_gpu_freq: Previous GPU frequency(Hz) before the last frequency ++ * change. ++ */ ++struct kbase_ccswe { ++ spinlock_t access; ++ u64 timestamp_ns; ++ u64 cycles_elapsed; ++ u32 gpu_freq; ++ u32 prev_gpu_freq; ++}; + -+ return count; -+} ++/** ++ * kbase_ccswe_init() - initialize the cycle count estimator. ++ * ++ * @self: Cycles count software estimator instance. ++ */ ++void kbase_ccswe_init(struct kbase_ccswe *self); + +/** -+ * csg_scheduling_period_show - Show callback for the csg_scheduling_period -+ * sysfs entry. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the GPU information. ++ * kbase_ccswe_cycle_at() - Estimate cycle count at given timestamp. + * -+ * This function is called to get the current reset timeout. ++ * @self: Cycles count software estimator instance. ++ * @timestamp_ns: The timestamp(ns) for cycle count estimation. + * -+ * Return: The number of bytes output to @buf. ++ * The timestamp must be bigger than the timestamp of the penultimate ++ * frequency change. If only one frequency change occurred, the ++ * timestamp must be bigger than the timestamp of the frequency change. ++ * This is to allow the following code to be executed w/o synchronization. ++ * If lines below executed atomically, it is safe to assume that only ++ * one frequency change may happen in between. ++ * ++ * u64 ts = ktime_get_raw_ns(); ++ * u64 cycle = kbase_ccswe_cycle_at(&ccswe, ts) ++ * ++ * Return: estimated value of cycle count at a given time. + */ -+static ssize_t csg_scheduling_period_show(struct device *dev, -+ struct device_attribute *attr, -+ char *const buf) -+{ -+ struct kbase_device *kbdev; -+ ssize_t ret; -+ -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; -+ -+ ret = scnprintf(buf, PAGE_SIZE, "%u\n", -+ kbdev->csf.scheduler.csg_scheduling_period_ms); -+ -+ return ret; -+} ++u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns); + -+static DEVICE_ATTR_RW(csg_scheduling_period); ++/** ++ * kbase_ccswe_freq_change() - update GPU frequency. ++ * ++ * @self: Cycles count software estimator instance. ++ * @timestamp_ns: Timestamp(ns) when frequency change occurred. ++ * @gpu_freq: New GPU frequency value. ++ * ++ * The timestamp must be bigger than the timestamp of the previous ++ * frequency change. The function is to be called at the frequency ++ * change moment (not later). ++ */ ++void kbase_ccswe_freq_change( ++ struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq); + +/** -+ * fw_timeout_store - Store callback for the fw_timeout sysfs file. -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes written to the sysfs file ++ * kbase_ccswe_reset() - reset estimator state + * -+ * This function is called when the fw_timeout sysfs file is written to. It -+ * checks the data written, and if valid updates the reset timeout. ++ * @self: Cycles count software estimator instance. ++ */ ++void kbase_ccswe_reset(struct kbase_ccswe *self); ++ ++#endif /* _KBASE_CCSWE_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config.c b/drivers/gpu/arm/bifrost/mali_kbase_config.c +new file mode 100644 +index 000000000..37dbca129 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_config.c +@@ -0,0 +1,104 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2011-2015, 2017, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: @count if the function succeeded. An error code on failure. + */ -+static ssize_t fw_timeout_store(struct device *dev, -+ struct device_attribute *attr, const char *buf, -+ size_t count) -+{ -+ struct kbase_device *kbdev; -+ int ret; -+ unsigned int fw_timeout; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++#include ++#include ++#include + -+ ret = kstrtouint(buf, 0, &fw_timeout); -+ if (ret || fw_timeout == 0) { -+ dev_err(kbdev->dev, -+ "Couldn't process fw_timeout write operation.\n" -+ "Use format 'fw_timeout_ms', and fw_timeout_ms > 0\n" -+ "Default fw_timeout: %u", -+ kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_PING_TIMEOUT)); -+ return -EINVAL; -+ } ++int kbasep_platform_device_init(struct kbase_device *kbdev) ++{ ++ struct kbase_platform_funcs_conf *platform_funcs_p; + -+ kbase_csf_scheduler_lock(kbdev); -+ kbdev->csf.fw_timeout_ms = fw_timeout; -+ kbase_csf_scheduler_unlock(kbdev); -+ dev_dbg(kbdev->dev, "Firmware timeout: %ums\n", fw_timeout); ++ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; ++ if (platform_funcs_p && platform_funcs_p->platform_init_func) ++ return platform_funcs_p->platform_init_func(kbdev); + -+ return count; ++ return 0; +} + -+/** -+ * fw_timeout_show - Show callback for the firmware timeout sysfs entry. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the GPU information. -+ * -+ * This function is called to get the current reset timeout. -+ * -+ * Return: The number of bytes output to @buf. -+ */ -+static ssize_t fw_timeout_show(struct device *dev, -+ struct device_attribute *attr, char *const buf) ++void kbasep_platform_device_term(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev; -+ ssize_t ret; ++ struct kbase_platform_funcs_conf *platform_funcs_p; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; ++ if (platform_funcs_p && platform_funcs_p->platform_term_func) ++ platform_funcs_p->platform_term_func(kbdev); ++} + -+ ret = scnprintf(buf, PAGE_SIZE, "%u\n", kbdev->csf.fw_timeout_ms); ++int kbasep_platform_device_late_init(struct kbase_device *kbdev) ++{ ++ struct kbase_platform_funcs_conf *platform_funcs_p; + -+ return ret; -+} ++ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; ++ if (platform_funcs_p && platform_funcs_p->platform_late_init_func) ++ platform_funcs_p->platform_late_init_func(kbdev); + -+static DEVICE_ATTR_RW(fw_timeout); ++ return 0; ++} + -+/** -+ * idle_hysteresis_time_store - Store callback for CSF idle_hysteresis_time -+ * sysfs file. -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes written to the sysfs file -+ * -+ * This function is called when the idle_hysteresis_time sysfs file is -+ * written to. -+ * -+ * This file contains values of the idle hysteresis duration. -+ * -+ * Return: @count if the function succeeded. An error code on failure. -+ */ -+static ssize_t idle_hysteresis_time_store(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) ++void kbasep_platform_device_late_term(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev; -+ u32 dur = 0; ++ struct kbase_platform_funcs_conf *platform_funcs_p; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; ++ if (platform_funcs_p && platform_funcs_p->platform_late_term_func) ++ platform_funcs_p->platform_late_term_func(kbdev); ++} + -+ if (kstrtou32(buf, 0, &dur)) { -+ dev_err(kbdev->dev, "Couldn't process idle_hysteresis_time write operation.\n" -+ "Use format \n"); -+ return -EINVAL; -+ } ++#if !MALI_USE_CSF ++int kbasep_platform_context_init(struct kbase_context *kctx) ++{ ++ struct kbase_platform_funcs_conf *platform_funcs_p; + -+ kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur); ++ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; ++ if (platform_funcs_p && platform_funcs_p->platform_handler_context_init_func) ++ return platform_funcs_p->platform_handler_context_init_func(kctx); + -+ return count; ++ return 0; +} + -+/** -+ * idle_hysteresis_time_show - Show callback for CSF idle_hysteresis_time -+ * sysfs entry. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the GPU information. -+ * -+ * This function is called to get the current idle hysteresis duration in ms. -+ * -+ * Return: The number of bytes output to @buf. -+ */ -+static ssize_t idle_hysteresis_time_show(struct device *dev, -+ struct device_attribute *attr, char * const buf) ++void kbasep_platform_context_term(struct kbase_context *kctx) +{ -+ struct kbase_device *kbdev; -+ ssize_t ret; -+ u32 dur; ++ struct kbase_platform_funcs_conf *platform_funcs_p; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; ++ if (platform_funcs_p && platform_funcs_p->platform_handler_context_term_func) ++ platform_funcs_p->platform_handler_context_term_func(kctx); ++} + -+ dur = kbase_csf_firmware_get_gpu_idle_hysteresis_time(kbdev); -+ ret = scnprintf(buf, PAGE_SIZE, "%u\n", dur); ++void kbasep_platform_event_atom_submit(struct kbase_jd_atom *katom) ++{ ++ struct kbase_platform_funcs_conf *platform_funcs_p; + -+ return ret; ++ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; ++ if (platform_funcs_p && platform_funcs_p->platform_handler_atom_submit_func) ++ platform_funcs_p->platform_handler_atom_submit_func(katom); +} + -+static DEVICE_ATTR_RW(idle_hysteresis_time); ++void kbasep_platform_event_atom_complete(struct kbase_jd_atom *katom) ++{ ++ struct kbase_platform_funcs_conf *platform_funcs_p; + -+/** -+ * mcu_shader_pwroff_timeout_show - Get the MCU shader Core power-off time value. ++ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; ++ if (platform_funcs_p && platform_funcs_p->platform_handler_atom_complete_func) ++ platform_funcs_p->platform_handler_atom_complete_func(katom); ++} ++#endif +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config.h b/drivers/gpu/arm/bifrost/mali_kbase_config.h +new file mode 100644 +index 000000000..ecfdb2876 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_config.h +@@ -0,0 +1,583 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer for the sysfs file contents ++ * (C) COPYRIGHT 2010-2017, 2019-2022 ARM Limited. All rights reserved. + * -+ * Get the internally recorded MCU shader Core power-off (nominal) timeout value. -+ * The unit of the value is in micro-seconds. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: The number of bytes output to @buf if the -+ * function succeeded. A Negative value on failure. + */ -+static ssize_t mcu_shader_pwroff_timeout_show(struct device *dev, struct device_attribute *attr, -+ char *const buf) -+{ -+ struct kbase_device *kbdev = dev_get_drvdata(dev); -+ u32 pwroff; -+ -+ if (!kbdev) -+ return -ENODEV; -+ -+ pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev); -+ return scnprintf(buf, PAGE_SIZE, "%u\n", pwroff); -+} + +/** -+ * mcu_shader_pwroff_timeout_store - Set the MCU shader core power-off time value. -+ * -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes to write to the sysfs file -+ * -+ * The duration value (unit: micro-seconds) for configuring MCU Shader Core -+ * timer, when the shader cores' power transitions are delegated to the -+ * MCU (normal operational mode) -+ * -+ * Return: @count if the function succeeded. An error code on failure. ++ * DOC: Configuration API and Attributes for KBase + */ -+static ssize_t mcu_shader_pwroff_timeout_store(struct device *dev, struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev = dev_get_drvdata(dev); -+ u32 dur; + -+ if (!kbdev) -+ return -ENODEV; ++#ifndef _KBASE_CONFIG_H_ ++#define _KBASE_CONFIG_H_ + -+ if (kstrtouint(buf, 0, &dur)) -+ return -EINVAL; ++#include ++#include ++#include ++#include + -+ kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, dur); ++/* Forward declaration of struct kbase_device */ ++struct kbase_device; + -+ return count; -+} -+ -+static DEVICE_ATTR_RW(mcu_shader_pwroff_timeout); -+ -+#endif /* MALI_USE_CSF */ -+ -+static struct attribute *kbase_scheduling_attrs[] = { +#if !MALI_USE_CSF -+ &dev_attr_serialize_jobs.attr, -+#endif /* !MALI_USE_CSF */ -+ NULL -+}; ++/* Forward declaration of struct kbase_context */ ++struct kbase_context; + -+static struct attribute *kbase_attrs[] = { -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ &dev_attr_debug_command.attr, -+#if !MALI_USE_CSF -+ &dev_attr_js_softstop_always.attr, -+#endif /* !MALI_USE_CSF */ ++/* Forward declaration of struct kbase_atom */ ++struct kbase_jd_atom; +#endif -+#if !MALI_USE_CSF -+ &dev_attr_js_timeouts.attr, -+ &dev_attr_soft_job_timeout.attr, -+#endif /* !MALI_USE_CSF */ -+ &dev_attr_gpuinfo.attr, -+ &dev_attr_dvfs_period.attr, -+ &dev_attr_pm_poweroff.attr, -+ &dev_attr_reset_timeout.attr, -+#if !MALI_USE_CSF -+ &dev_attr_js_scheduling_period.attr, -+#else -+ &dev_attr_csg_scheduling_period.attr, -+ &dev_attr_fw_timeout.attr, -+ &dev_attr_idle_hysteresis_time.attr, -+ &dev_attr_mcu_shader_pwroff_timeout.attr, -+#endif /* !MALI_USE_CSF */ -+ &dev_attr_power_policy.attr, -+ &dev_attr_core_mask.attr, -+ &dev_attr_mem_pool_size.attr, -+ &dev_attr_mem_pool_max_size.attr, -+ &dev_attr_lp_mem_pool_size.attr, -+ &dev_attr_lp_mem_pool_max_size.attr, -+#if !MALI_USE_CSF -+ &dev_attr_js_ctx_scheduling_mode.attr, -+#endif /* !MALI_USE_CSF */ -+ NULL -+}; + -+static struct attribute *kbase_mempool_attrs[] = { -+ &dev_attr_max_size.attr, -+ &dev_attr_lp_max_size.attr, -+ &dev_attr_ctx_default_max_size.attr, -+ NULL -+}; ++/** ++ * struct kbase_platform_funcs_conf - Specifies platform integration function ++ * pointers for DDK events such as device init and term. ++ * ++ * Specifies the functions pointers for platform specific initialization and ++ * termination as well as other events. By default no functions are required. ++ * No additional platform specific control is necessary. ++ */ ++struct kbase_platform_funcs_conf { ++ /** ++ * @platform_init_func: platform specific init function pointer ++ * @kbdev - kbase_device pointer ++ * ++ * Returns 0 on success, negative error code otherwise. ++ * ++ * Function pointer for platform specific initialization or NULL if no ++ * initialization function is required. At the point this the GPU is ++ * not active and its power and clocks are in unknown (platform specific ++ * state) as kbase doesn't yet have control of power and clocks. ++ * ++ * The platform specific private pointer kbase_device::platform_context ++ * can be accessed (and possibly initialized) in here. ++ */ ++ int (*platform_init_func)(struct kbase_device *kbdev); ++ /** ++ * @platform_term_func: platform specific termination function pointer ++ * @kbdev - kbase_device pointer ++ * ++ * Function pointer for platform specific termination or NULL if no ++ * termination function is required. At the point this the GPU will be ++ * idle but still powered and clocked. ++ * ++ * The platform specific private pointer kbase_device::platform_context ++ * can be accessed (and possibly terminated) in here. ++ */ ++ void (*platform_term_func)(struct kbase_device *kbdev); + -+#define SYSFS_SCHEDULING_GROUP "scheduling" -+static const struct attribute_group kbase_scheduling_attr_group = { -+ .name = SYSFS_SCHEDULING_GROUP, -+ .attrs = kbase_scheduling_attrs, -+}; ++ /** ++ * @platform_late_init_func: platform specific late init function pointer ++ * @kbdev - kbase_device pointer ++ * ++ * Function pointer to inform that the kbase driver initialization completed ++ * or NULL if no such function is required. At this point the GPU driver will be ++ * fully initialized. ++ * ++ * The platform specific private pointer kbase_device::platform_context ++ * can be accessed (and possibly terminated) in here. ++ */ ++ int (*platform_late_init_func)(struct kbase_device *kbdev); + -+#define SYSFS_MEMPOOL_GROUP "mempool" -+static const struct attribute_group kbase_mempool_attr_group = { -+ .name = SYSFS_MEMPOOL_GROUP, -+ .attrs = kbase_mempool_attrs, -+}; ++ /** ++ * @platform_late_term_func: platform specific late termination function pointer ++ * @kbdev - kbase_device pointer ++ * ++ * Function pointer for platform specific termination or NULL if no ++ * termination function is required. At this point the GPU driver will complete ++ * termination process ++ * ++ * The platform specific private pointer kbase_device::platform_context ++ * can be accessed (and possibly terminated) in here. ++ */ ++ void (*platform_late_term_func)(struct kbase_device *kbdev); + -+static const struct attribute_group kbase_attr_group = { -+ .attrs = kbase_attrs, ++#if !MALI_USE_CSF ++ /** ++ * @platform_handler_context_init_func: platform specific handler for ++ * when a new kbase_context is created. ++ * @kctx - kbase_context pointer ++ * ++ * Returns 0 on success, negative error code otherwise. ++ * ++ * Function pointer for platform specific initialization of a kernel ++ * context or NULL if not required. Called at the last stage of kernel ++ * context initialization. ++ */ ++ int (*platform_handler_context_init_func)(struct kbase_context *kctx); ++ /** ++ * @platform_handler_context_term_func: platform specific handler for ++ * when a kbase_context is terminated. ++ * @kctx - kbase_context pointer ++ * ++ * Function pointer for platform specific termination of a kernel ++ * context or NULL if not required. Called at the first stage of kernel ++ * context termination. ++ */ ++ void (*platform_handler_context_term_func)(struct kbase_context *kctx); ++ /** ++ * @platform_handler_atom_submit_func: platform specific handler for ++ * when a kbase_jd_atom is submitted. ++ * @katom - kbase_jd_atom pointer ++ * ++ * Function pointer for platform specific handling at the point when an ++ * atom is submitted to the GPU or set to NULL if not required. The ++ * function cannot assume that it is running in a process context. ++ * ++ * Context: The caller must hold the hwaccess_lock. Function must be ++ * runnable in an interrupt context. ++ */ ++ void (*platform_handler_atom_submit_func)(struct kbase_jd_atom *katom); ++ /** ++ * @platform_handler_atom_complete_func: platform specific handler for ++ * when a kbase_jd_atom completes. ++ * @katom - kbase_jd_atom pointer ++ * ++ * Function pointer for platform specific handling at the point when an ++ * atom stops running on the GPU or set to NULL if not required. The ++ * function cannot assume that it is running in a process context. ++ * ++ * Context: The caller must hold the hwaccess_lock. Function must be ++ * runnable in an interrupt context. ++ */ ++ void (*platform_handler_atom_complete_func)( ++ struct kbase_jd_atom *katom); ++#endif +}; + -+int kbase_sysfs_init(struct kbase_device *kbdev) -+{ -+ int err = 0; ++/* ++ * @brief Specifies the callbacks for power management ++ * ++ * By default no callbacks will be made and the GPU must not be powered off. ++ */ ++struct kbase_pm_callback_conf { ++ /** Callback for when the GPU is idle and the power to it can be switched off. ++ * ++ * The system integrator can decide whether to either do nothing, just switch off ++ * the clocks to the GPU, or to completely power down the GPU. ++ * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the ++ * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). ++ * ++ * If runtime PM is enabled and @power_runtime_gpu_idle_callback is used ++ * then this callback should power off the GPU (or switch off the clocks ++ * to GPU) immediately. If @power_runtime_gpu_idle_callback is not used, ++ * then this callback can set the autosuspend timeout (if desired) and ++ * let the GPU be powered down later. ++ */ ++ void (*power_off_callback)(struct kbase_device *kbdev); + -+ kbdev->mdev.minor = MISC_DYNAMIC_MINOR; -+ kbdev->mdev.name = kbdev->devname; -+ kbdev->mdev.fops = &kbase_fops; -+ kbdev->mdev.parent = get_device(kbdev->dev); -+ kbdev->mdev.mode = 0666; ++ /** Callback for when the GPU is about to become active and power must be supplied. ++ * ++ * This function must not return until the GPU is powered and clocked sufficiently for register access to ++ * succeed. The return value specifies whether the GPU was powered down since the call to power_off_callback. ++ * If the GPU state has been lost then this function must return 1, otherwise it should return 0. ++ * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the ++ * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). ++ * ++ * The return value of the first call to this function is ignored. ++ * ++ * @return 1 if the GPU state may have been lost, 0 otherwise. ++ */ ++ int (*power_on_callback)(struct kbase_device *kbdev); + -+ err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); -+ if (err) -+ return err; ++ /** Callback for when the system is requesting a suspend and GPU power ++ * must be switched off. ++ * ++ * Note that if this callback is present, then this may be called ++ * without a preceding call to power_off_callback. Therefore this ++ * callback must be able to take any action that might otherwise happen ++ * in power_off_callback. ++ * ++ * The platform specific private pointer kbase_device::platform_context ++ * can be accessed and modified in here. It is the platform \em ++ * callbacks responsibility to initialize and terminate this pointer if ++ * used (see @ref kbase_platform_funcs_conf). ++ */ ++ void (*power_suspend_callback)(struct kbase_device *kbdev); + -+ err = sysfs_create_group(&kbdev->dev->kobj, -+ &kbase_scheduling_attr_group); -+ if (err) { -+ dev_err(kbdev->dev, "Creation of %s sysfs group failed", -+ SYSFS_SCHEDULING_GROUP); -+ sysfs_remove_group(&kbdev->dev->kobj, -+ &kbase_attr_group); -+ return err; -+ } ++ /** Callback for when the system is resuming from a suspend and GPU ++ * power must be switched on. ++ * ++ * Note that if this callback is present, then this may be called ++ * without a following call to power_on_callback. Therefore this ++ * callback must be able to take any action that might otherwise happen ++ * in power_on_callback. ++ * ++ * The platform specific private pointer kbase_device::platform_context ++ * can be accessed and modified in here. It is the platform \em ++ * callbacks responsibility to initialize and terminate this pointer if ++ * used (see @ref kbase_platform_funcs_conf). ++ */ ++ void (*power_resume_callback)(struct kbase_device *kbdev); + -+ err = sysfs_create_group(&kbdev->dev->kobj, -+ &kbase_mempool_attr_group); -+ if (err) { -+ dev_err(kbdev->dev, "Creation of %s sysfs group failed", -+ SYSFS_MEMPOOL_GROUP); -+ sysfs_remove_group(&kbdev->dev->kobj, -+ &kbase_scheduling_attr_group); -+ sysfs_remove_group(&kbdev->dev->kobj, -+ &kbase_attr_group); -+ } ++ /** Callback for handling runtime power management initialization. ++ * ++ * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback ++ * will become active from calls made to the OS from within this function. ++ * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback. ++ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. ++ * ++ * @return 0 on success, else int error code. ++ */ ++ int (*power_runtime_init_callback)(struct kbase_device *kbdev); + -+ return err; -+} ++ /** Callback for handling runtime power management termination. ++ * ++ * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback ++ * should no longer be called by the OS on completion of this function. ++ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. ++ */ ++ void (*power_runtime_term_callback)(struct kbase_device *kbdev); + -+void kbase_sysfs_term(struct kbase_device *kbdev) -+{ -+ sysfs_remove_group(&kbdev->dev->kobj, &kbase_mempool_attr_group); -+ sysfs_remove_group(&kbdev->dev->kobj, &kbase_scheduling_attr_group); -+ sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); -+ put_device(kbdev->dev); -+} ++ /** Callback for runtime power-off power management callback ++ * ++ * For linux this callback will be called by the kernel runtime_suspend callback. ++ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. ++ */ ++ void (*power_runtime_off_callback)(struct kbase_device *kbdev); + -+static int kbase_platform_device_remove(struct platform_device *pdev) -+{ -+ struct kbase_device *kbdev = to_kbase_device(&pdev->dev); ++ /** Callback for runtime power-on power management callback ++ * ++ * For linux this callback will be called by the kernel runtime_resume callback. ++ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. ++ * ++ * @return 0 on success, else OS error code. ++ */ ++ int (*power_runtime_on_callback)(struct kbase_device *kbdev); + -+ if (!kbdev) -+ return -ENODEV; ++ /* ++ * Optional callback for checking if GPU can be suspended when idle ++ * ++ * This callback will be called by the runtime power management core ++ * when the reference count goes to 0 to provide notification that the ++ * GPU now seems idle. ++ * ++ * If this callback finds that the GPU can't be powered off, or handles ++ * suspend by powering off directly or queueing up a power off, a ++ * non-zero value must be returned to prevent the runtime PM core from ++ * also triggering a suspend. ++ * ++ * Returning 0 will cause the runtime PM core to conduct a regular ++ * autosuspend. ++ * ++ * This callback is optional and if not provided regular autosuspend ++ * will be triggered. ++ * ++ * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use ++ * this feature. ++ * ++ * Return 0 if GPU can be suspended, positive value if it can not be ++ * suspeneded by runtime PM, else OS error code ++ */ ++ int (*power_runtime_idle_callback)(struct kbase_device *kbdev); + -+ kbase_device_term(kbdev); -+ dev_set_drvdata(kbdev->dev, NULL); -+ kbase_device_free(kbdev); ++ /* ++ * Optional callback for software reset ++ * ++ * This callback will be called by the power management core to trigger ++ * a GPU soft reset. ++ * ++ * Return 0 if the soft reset was successful and the RESET_COMPLETED ++ * interrupt will be raised, or a positive value if the interrupt won't ++ * be raised. On error, return the corresponding OS error code. ++ */ ++ int (*soft_reset_callback)(struct kbase_device *kbdev); + -+ return 0; -+} ++ /* ++ * Optional callback invoked after GPU becomes idle, not supported on ++ * JM GPUs. ++ * ++ * This callback will be invoked by the Kbase when GPU becomes idle. ++ * For JM GPUs or when runtime PM is disabled, Kbase will not invoke ++ * this callback and @power_off_callback will be invoked directly. ++ * ++ * This callback is supposed to decrement the runtime PM core reference ++ * count to zero and trigger the auto-suspend timer, which implies that ++ * @power_off_callback shouldn't initiate the runtime suspend. ++ * ++ * GPU registers still remain accessible until @power_off_callback gets ++ * invoked later on the expiry of auto-suspend timer. ++ * ++ * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use ++ * this feature. ++ */ ++ void (*power_runtime_gpu_idle_callback)(struct kbase_device *kbdev); + -+void kbase_backend_devfreq_term(struct kbase_device *kbdev) -+{ -+#ifdef CONFIG_MALI_BIFROST_DEVFREQ -+ if (kbdev->devfreq) -+ kbase_devfreq_term(kbdev); -+#endif -+} ++ /* ++ * Optional callback invoked to change the runtime PM core state to ++ * active. ++ * ++ * This callback will be invoked by Kbase when GPU needs to be ++ * reactivated, but only if @power_runtime_gpu_idle_callback was invoked ++ * previously. So both @power_runtime_gpu_idle_callback and this ++ * callback needs to be implemented at the same time. ++ * ++ * Kbase will invoke @power_on_callback first before invoking this ++ * callback if the GPU was powered down previously, otherwise directly. ++ * ++ * This callback is supposed to increment the runtime PM core reference ++ * count to 1, which implies that @power_on_callback shouldn't initiate ++ * the runtime resume. The runtime resume may not happen synchronously ++ * to avoid a potential deadlock due to the runtime suspend happening ++ * simultaneously from some other thread. ++ * ++ * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use ++ * this feature. ++ */ ++ void (*power_runtime_gpu_active_callback)(struct kbase_device *kbdev); ++}; + -+int kbase_backend_devfreq_init(struct kbase_device *kbdev) -+{ -+#ifdef CONFIG_MALI_BIFROST_DEVFREQ -+ /* Devfreq uses hardware counters, so must be initialized after it. */ -+ int err = kbase_devfreq_init(kbdev); ++/* struct kbase_gpu_clk_notifier_data - Data for clock rate change notifier. ++ * ++ * Pointer to this structure is supposed to be passed to the gpu clock rate ++ * change notifier function. This structure is deliberately aligned with the ++ * common clock framework notification structure 'struct clk_notifier_data' ++ * and such alignment should be maintained. ++ * ++ * @gpu_clk_handle: Handle of the GPU clock for which notifier was registered. ++ * @old_rate: Previous rate of this GPU clock in Hz. ++ * @new_rate: New rate of this GPU clock in Hz. ++ */ ++struct kbase_gpu_clk_notifier_data { ++ void *gpu_clk_handle; ++ unsigned long old_rate; ++ unsigned long new_rate; ++}; + -+ if (err) -+ dev_err(kbdev->dev, "Continuing without devfreq\n"); -+#endif /* CONFIG_MALI_BIFROST_DEVFREQ */ -+ return 0; -+} ++/** ++ * struct kbase_clk_rate_trace_op_conf - Specifies GPU clock rate trace ++ * operations. ++ * ++ * Specifies the functions pointers for platform specific GPU clock rate trace ++ * operations. By default no functions are required. ++ */ ++struct kbase_clk_rate_trace_op_conf { ++ /** ++ * @enumerate_gpu_clk: Enumerate a GPU clock on the given index ++ * @kbdev - kbase_device pointer ++ * @index - GPU clock index ++ * ++ * Returns a handle unique to the given GPU clock, or NULL if the clock ++ * array has been exhausted at the given index value. ++ * ++ * Kbase will use this function pointer to enumerate the existence of a ++ * GPU clock on the given index. ++ */ ++ void *(*enumerate_gpu_clk)(struct kbase_device *kbdev, ++ unsigned int index); + -+static int kbase_platform_device_probe(struct platform_device *pdev) -+{ -+ struct kbase_device *kbdev; -+ int err = 0; ++ /** ++ * @get_gpu_clk_rate: Get the current rate for an enumerated clock. ++ * @kbdev - kbase_device pointer ++ * @gpu_clk_handle - Handle unique to the enumerated GPU clock ++ * ++ * Returns current rate of the GPU clock in unit of Hz. ++ */ ++ unsigned long (*get_gpu_clk_rate)(struct kbase_device *kbdev, ++ void *gpu_clk_handle); + -+ mali_kbase_print_cs_experimental(); ++ /** ++ * @gpu_clk_notifier_register: Register a clock rate change notifier. ++ * @kbdev - kbase_device pointer ++ * @gpu_clk_handle - Handle unique to the enumerated GPU clock ++ * @nb - notifier block containing the callback function ++ * pointer ++ * ++ * Returns 0 on success, negative error code otherwise. ++ * ++ * This function pointer is used to register a callback function that ++ * is supposed to be invoked whenever the rate of clock corresponding ++ * to @gpu_clk_handle changes. ++ * @nb contains the pointer to callback function. ++ * The callback function expects the pointer of type ++ * 'struct kbase_gpu_clk_notifier_data' as the third argument. ++ */ ++ int (*gpu_clk_notifier_register)(struct kbase_device *kbdev, ++ void *gpu_clk_handle, struct notifier_block *nb); + -+ kbdev = kbase_device_alloc(); -+ if (!kbdev) { -+ dev_err(&pdev->dev, "Allocate device failed\n"); -+ return -ENOMEM; -+ } ++ /** ++ * @gpu_clk_notifier_unregister: Unregister clock rate change notifier ++ * @kbdev - kbase_device pointer ++ * @gpu_clk_handle - Handle unique to the enumerated GPU clock ++ * @nb - notifier block containing the callback function ++ * pointer ++ * ++ * This function pointer is used to unregister a callback function that ++ * was previously registered to get notified of the change in rate ++ * of clock corresponding to @gpu_clk_handle. ++ */ ++ void (*gpu_clk_notifier_unregister)(struct kbase_device *kbdev, ++ void *gpu_clk_handle, struct notifier_block *nb); ++}; + -+ kbdev->dev = &pdev->dev; ++#if IS_ENABLED(CONFIG_OF) ++struct kbase_platform_config { ++}; ++#else + -+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) -+ kbdev->token = -EPERM; -+#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ ++/* ++ * @brief Specifies start and end of I/O memory region. ++ */ ++struct kbase_io_memory_region { ++ u64 start; ++ u64 end; ++}; + -+ dev_set_drvdata(kbdev->dev, kbdev); -+#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) -+ mutex_lock(&kbase_probe_mutex); -+#endif -+ err = kbase_device_init(kbdev); ++/* ++ * @brief Specifies I/O related resources like IRQs and memory region for I/O operations. ++ */ ++struct kbase_io_resources { ++ u32 job_irq_number; ++ u32 mmu_irq_number; ++ u32 gpu_irq_number; ++ struct kbase_io_memory_region io_memory_region; ++}; + -+ if (err) { -+ if (err == -EPROBE_DEFER) -+ dev_info(kbdev->dev, -+ "Device initialization Deferred\n"); -+ else -+ dev_err(kbdev->dev, "Device initialization failed\n"); ++struct kbase_platform_config { ++ const struct kbase_io_resources *io_resources; ++}; + -+ dev_set_drvdata(kbdev->dev, NULL); -+ kbase_device_free(kbdev); -+#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) -+ mutex_unlock(&kbase_probe_mutex); -+#endif -+ } else { -+ dev_info(kbdev->dev, -+ "Probed as %s\n", dev_name(kbdev->mdev.this_device)); -+ kbase_increment_device_id(); -+#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) -+ mutex_unlock(&kbase_probe_mutex); -+#endif -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ mutex_lock(&kbdev->pm.lock); -+ kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_INITIALIZED_EVT); -+ mutex_unlock(&kbdev->pm.lock); -+#endif -+ } ++#endif /* CONFIG_OF */ + -+ return err; -+} ++/** ++ * kbase_get_platform_config - Gets the pointer to platform config. ++ * ++ * Return: Pointer to the platform config ++ */ ++struct kbase_platform_config *kbase_get_platform_config(void); + -+#undef KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ++/** ++ * kbasep_platform_device_init: - Platform specific call to initialize hardware ++ * @kbdev: kbase device pointer ++ * ++ * Function calls a platform defined routine if specified in the configuration ++ * attributes. The routine can initialize any hardware and context state that ++ * is required for the GPU block to function. ++ * ++ * Return: 0 if no errors have been found in the config. ++ * Negative error code otherwise. ++ */ ++int kbasep_platform_device_init(struct kbase_device *kbdev); + +/** -+ * kbase_device_suspend - Suspend callback from the OS. ++ * kbasep_platform_device_term - Platform specific call to terminate hardware ++ * @kbdev: Kbase device pointer + * -+ * @dev: The device to suspend ++ * Function calls a platform defined routine if specified in the configuration ++ * attributes. The routine can destroy any platform specific context state and ++ * shut down any hardware functionality that are outside of the Power Management ++ * callbacks. + * -+ * This is called by Linux when the device should suspend. ++ */ ++void kbasep_platform_device_term(struct kbase_device *kbdev); ++ ++/** ++ * kbasep_platform_device_late_init: - Platform specific call to finish hardware ++ * initialization ++ * @kbdev: kbase device pointer + * -+ * Return: A standard Linux error code on failure, 0 otherwise. ++ * Function calls a platform defined routine if specified in the configuration ++ * attributes. The routine can initialize any hardware and context state that ++ * is required for the GPU block to function. ++ * ++ * Return: 0 if no errors have been found in the config. ++ * Negative error code otherwise. + */ -+static int kbase_device_suspend(struct device *dev) -+{ -+ struct kbase_device *kbdev = to_kbase_device(dev); ++int kbasep_platform_device_late_init(struct kbase_device *kbdev); + -+ if (!kbdev) -+ return -ENODEV; ++/** ++ * kbasep_platform_device_late_term - Platform specific call to finish hardware ++ * termination ++ * @kbdev: Kbase device pointer ++ * ++ * Function calls a platform defined routine if specified in the configuration ++ * attributes. The routine can destroy any platform specific context state and ++ * shut down any hardware functionality that are outside of the Power Management ++ * callbacks. ++ * ++ */ ++void kbasep_platform_device_late_term(struct kbase_device *kbdev); + -+ if (kbase_pm_suspend(kbdev)) { -+ dev_warn(kbdev->dev, "Abort suspend as GPU suspension failed"); -+ return -EBUSY; -+ } ++#if !MALI_USE_CSF ++/** ++ * kbasep_platform_context_init - Platform specific callback when a kernel ++ * context is created ++ * @kctx: kbase_context pointer ++ * ++ * Function calls a platform defined routine if specified in the configuration ++ * attributes. The routine can initialize any per kernel context structures ++ * that are required for the GPU block to function. ++ * ++ * Return: 0 if no errors were encountered. Negative error code otherwise. ++ */ ++int kbasep_platform_context_init(struct kbase_context *kctx); + -+#ifdef CONFIG_MALI_BIFROST_DVFS -+ kbase_pm_metrics_stop(kbdev); -+#endif ++/** ++ * kbasep_platform_context_term - Platform specific callback when a kernel ++ * context is terminated ++ * @kctx: kbase_context pointer ++ * ++ * Function calls a platform defined routine if specified in the configuration ++ * attributes. The routine should terminate any per kernel context structures ++ * created as part of &kbasep_platform_context_init. ++ * ++ */ ++void kbasep_platform_context_term(struct kbase_context *kctx); + -+#ifdef CONFIG_MALI_BIFROST_DEVFREQ -+ dev_dbg(dev, "Callback %s\n", __func__); -+ if (kbdev->devfreq) { -+ kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND); -+ flush_workqueue(kbdev->devfreq_queue.workq); -+ } -+#endif ++/** ++ * kbasep_platform_event_atom_submit - Platform specific callback when an atom ++ * is submitted to the GPU ++ * @katom: kbase_jd_atom pointer ++ * ++ * Function calls a platform defined routine if specified in the configuration ++ * attributes. The routine should not assume that it is in a process context. ++ * ++ * Return: 0 if no errors were encountered. Negative error code otherwise. ++ */ ++void kbasep_platform_event_atom_submit(struct kbase_jd_atom *katom); + -+#ifdef CONFIG_ARCH_ROCKCHIP -+ kbase_platform_rk_enable_regulator(kbdev); ++/** ++ * kbasep_platform_event_atom_complete - Platform specific callback when an atom ++ * has stopped running on the GPU ++ * @katom: kbase_jd_atom pointer ++ * ++ * Function calls a platform defined routine if specified in the configuration ++ * attributes. The routine should not assume that it is in a process context. ++ * ++ */ ++void kbasep_platform_event_atom_complete(struct kbase_jd_atom *katom); +#endif + -+#ifdef KBASE_PM_RUNTIME -+ if (kbdev->is_runtime_resumed) { -+ if (kbdev->pm.backend.callback_power_runtime_off) -+ kbdev->pm.backend.callback_power_runtime_off(kbdev); -+ } -+#endif /* KBASE_PM_RUNTIME */ -+ -+ return 0; -+} ++#ifndef CONFIG_OF ++/** ++ * kbase_platform_register - Register a platform device for the GPU ++ * This can be used to register a platform device on systems where device tree ++ * is not enabled and the platform initialisation code in the kernel doesn't ++ * create the GPU device. Where possible device tree should be used instead. ++ * ++ * Return: 0 for success, any other fail causes module initialisation to fail ++ */ ++int kbase_platform_register(void); + +/** -+ * kbase_device_resume - Resume callback from the OS. ++ * kbase_platform_unregister - Unregister a fake platform device + * -+ * @dev: The device to resume ++ * Unregister the platform device created with kbase_platform_register() ++ */ ++void kbase_platform_unregister(void); ++#endif ++ ++#endif /* _KBASE_CONFIG_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h +new file mode 100644 +index 000000000..14493a77e +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h +@@ -0,0 +1,278 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * This is called by Linux when the device should resume from suspension. ++ * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: A standard Linux error code + */ -+static int kbase_device_resume(struct device *dev) -+{ -+ struct kbase_device *kbdev = to_kbase_device(dev); + -+ if (!kbdev) -+ return -ENODEV; ++/** ++ * DOC: Default values for configuration settings ++ * ++ */ + -+#ifdef KBASE_PM_RUNTIME -+ if (kbdev->is_runtime_resumed) { -+ if (kbdev->pm.backend.callback_power_runtime_on) -+ kbdev->pm.backend.callback_power_runtime_on(kbdev); -+ } -+#endif /* KBASE_PM_RUNTIME */ ++#ifndef _KBASE_CONFIG_DEFAULTS_H_ ++#define _KBASE_CONFIG_DEFAULTS_H_ + -+ kbase_pm_resume(kbdev); ++/* Include mandatory definitions per platform */ ++#include + -+#ifdef CONFIG_MALI_BIFROST_DVFS -+ kbase_pm_metrics_start(kbdev); -+#endif ++enum { ++ /* Use unrestricted Address ID width on the AXI bus. */ ++ KBASE_AID_32 = 0x0, + -+#ifdef CONFIG_MALI_BIFROST_DEVFREQ -+ dev_dbg(dev, "Callback %s\n", __func__); -+ if (kbdev->devfreq) -+ kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME); -+#endif ++ /* Restrict GPU to a half of maximum Address ID count. ++ * This will reduce performance, but reduce bus load due to GPU. ++ */ ++ KBASE_AID_16 = 0x3, + -+#if !MALI_USE_CSF -+ kbase_enable_quick_reset(kbdev); -+#endif ++ /* Restrict GPU to a quarter of maximum Address ID count. ++ * This will reduce performance, but reduce bus load due to GPU. ++ */ ++ KBASE_AID_8 = 0x2, + -+ return 0; -+} ++ /* Restrict GPU to an eighth of maximum Address ID count. ++ * This will reduce performance, but reduce bus load due to GPU. ++ */ ++ KBASE_AID_4 = 0x1 ++}; + -+/** -+ * kbase_device_runtime_suspend - Runtime suspend callback from the OS. -+ * -+ * @dev: The device to suspend -+ * -+ * This is called by Linux when the device should prepare for a condition in -+ * which it will not be able to communicate with the CPU(s) and RAM due to -+ * power management. -+ * -+ * Return: A standard Linux error code -+ */ -+#ifdef KBASE_PM_RUNTIME -+static int kbase_device_runtime_suspend(struct device *dev) -+{ -+ struct kbase_device *kbdev = to_kbase_device(dev); -+ int ret = 0; ++enum { ++ /* Use unrestricted Address ID width on the AXI bus. ++ * Restricting ID width will reduce performance & bus load due to GPU. ++ */ ++ KBASE_3BIT_AID_32 = 0x0, + -+ if (!kbdev) -+ return -ENODEV; ++ /* Restrict GPU to 7/8 of maximum Address ID count. */ ++ KBASE_3BIT_AID_28 = 0x1, + -+ dev_dbg(dev, "Callback %s\n", __func__); -+ KBASE_KTRACE_ADD(kbdev, PM_RUNTIME_SUSPEND_CALLBACK, NULL, 0); ++ /* Restrict GPU to 3/4 of maximum Address ID count. */ ++ KBASE_3BIT_AID_24 = 0x2, ++ ++ /* Restrict GPU to 5/8 of maximum Address ID count. */ ++ KBASE_3BIT_AID_20 = 0x3, ++ ++ /* Restrict GPU to 1/2 of maximum Address ID count. */ ++ KBASE_3BIT_AID_16 = 0x4, ++ ++ /* Restrict GPU to 3/8 of maximum Address ID count. */ ++ KBASE_3BIT_AID_12 = 0x5, ++ ++ /* Restrict GPU to 1/4 of maximum Address ID count. */ ++ KBASE_3BIT_AID_8 = 0x6, ++ ++ /* Restrict GPU to 1/8 of maximum Address ID count. */ ++ KBASE_3BIT_AID_4 = 0x7 ++}; + +#if MALI_USE_CSF -+ ret = kbase_pm_handle_runtime_suspend(kbdev); -+ if (ret) -+ return ret; ++/* ++ * Default value for the TIMER register of the IPA Control interface, ++ * expressed in milliseconds. ++ * ++ * The chosen value is a trade off between two requirements: the IPA Control ++ * interface should sample counters with a resolution in the order of ++ * milliseconds, while keeping GPU overhead as limited as possible. ++ */ ++#define IPA_CONTROL_TIMER_DEFAULT_VALUE_MS ((u32)10) /* 10 milliseconds */ ++#endif /* MALI_USE_CSF */ ++ ++/* Default period for DVFS sampling (can be overridden by platform header) */ ++#ifndef DEFAULT_PM_DVFS_PERIOD ++#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */ +#endif + -+#ifdef CONFIG_MALI_BIFROST_DVFS -+ kbase_pm_metrics_stop(kbdev); ++/* Power Management poweroff tick granuality. This is in nanoseconds to ++ * allow HR timer support (can be overridden by platform header). ++ * ++ * On each scheduling tick, the power manager core may decide to: ++ * -# Power off one or more shader cores ++ * -# Power off the entire GPU ++ */ ++#ifndef DEFAULT_PM_GPU_POWEROFF_TICK_NS ++#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */ +#endif + -+#ifdef CONFIG_MALI_BIFROST_DEVFREQ -+ if (kbdev->devfreq) -+ kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND); ++/* Power Manager number of ticks before shader cores are powered off ++ * (can be overridden by platform header). ++ */ ++#ifndef DEFAULT_PM_POWEROFF_TICK_SHADER ++#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */ +#endif + -+ if (kbdev->pm.backend.callback_power_runtime_off) { -+ kbdev->pm.backend.callback_power_runtime_off(kbdev); -+ kbdev->is_runtime_resumed = false; -+ dev_dbg(dev, "runtime suspend\n"); -+ } -+ return ret; -+} -+#endif /* KBASE_PM_RUNTIME */ ++/* Default scheduling tick granuality (can be overridden by platform header) */ ++#ifndef DEFAULT_JS_SCHEDULING_PERIOD_NS ++#define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */ ++#endif + -+/** -+ * kbase_device_runtime_resume - Runtime resume callback from the OS. -+ * -+ * @dev: The device to suspend -+ * -+ * This is called by Linux when the device should go into a fully active state. ++/* Default minimum number of scheduling ticks before jobs are soft-stopped. + * -+ * Return: A standard Linux error code ++ * This defines the time-slice for a job (which may be different from that of a ++ * context) + */ ++#define DEFAULT_JS_SOFT_STOP_TICKS (1) /* 100ms-200ms */ + -+#ifdef KBASE_PM_RUNTIME -+static int kbase_device_runtime_resume(struct device *dev) -+{ -+ int ret = 0; -+ struct kbase_device *kbdev = to_kbase_device(dev); -+ -+ if (!kbdev) -+ return -ENODEV; ++/* Default minimum number of scheduling ticks before CL jobs are soft-stopped. */ ++#define DEFAULT_JS_SOFT_STOP_TICKS_CL (1) /* 100ms-200ms */ + -+ dev_dbg(dev, "Callback %s\n", __func__); -+ // KBASE_KTRACE_ADD(kbdev, PM_RUNTIME_RESUME_CALLBACK, NULL, 0); -+ if (kbdev->pm.backend.callback_power_runtime_on) { -+ ret = kbdev->pm.backend.callback_power_runtime_on(kbdev); -+ kbdev->is_runtime_resumed = true; -+ dev_dbg(dev, "runtime resume\n"); -+ } ++/* Default minimum number of scheduling ticks before jobs are hard-stopped */ ++#define DEFAULT_JS_HARD_STOP_TICKS_SS (50) /* 5s */ + -+#ifdef CONFIG_MALI_BIFROST_DVFS -+ kbase_pm_metrics_start(kbdev); -+#endif ++/* Default minimum number of scheduling ticks before CL jobs are hard-stopped. */ ++#define DEFAULT_JS_HARD_STOP_TICKS_CL (50) /* 5s */ + -+#ifdef CONFIG_MALI_BIFROST_DEVFREQ -+ if (kbdev->devfreq) -+ kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME); -+#endif ++/* Default minimum number of scheduling ticks before jobs are hard-stopped ++ * during dumping ++ */ ++#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */ + -+ return ret; -+} -+#endif /* KBASE_PM_RUNTIME */ ++/* Default timeout for some software jobs, after which the software event wait ++ * jobs will be cancelled. ++ */ ++#define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */ + ++/* Default minimum number of scheduling ticks before the GPU is reset to clear a ++ * "stuck" job ++ */ ++#define DEFAULT_JS_RESET_TICKS_SS (55) /* 5.5s */ + -+#ifdef KBASE_PM_RUNTIME -+/** -+ * kbase_device_runtime_idle - Runtime idle callback from the OS. -+ * @dev: The device to suspend -+ * -+ * This is called by Linux when the device appears to be inactive and it might -+ * be placed into a low power state. -+ * -+ * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend, -+ * otherwise a standard Linux error code ++/* Default minimum number of scheduling ticks before the GPU is reset to clear a ++ * "stuck" CL job. + */ -+static int kbase_device_runtime_idle(struct device *dev) -+{ -+ struct kbase_device *kbdev = to_kbase_device(dev); ++#define DEFAULT_JS_RESET_TICKS_CL (55) /* 5.5s */ + -+ if (!kbdev) -+ return -ENODEV; ++/* Default minimum number of scheduling ticks before the GPU is reset to clear a ++ * "stuck" job during dumping. ++ */ ++#define DEFAULT_JS_RESET_TICKS_DUMPING (15020) /* 1502s */ + -+ dev_dbg(dev, "Callback %s\n", __func__); -+ /* Use platform specific implementation if it exists. */ -+ if (kbdev->pm.backend.callback_power_runtime_idle) -+ return kbdev->pm.backend.callback_power_runtime_idle(kbdev); ++/* Nominal reference frequency that was used to obtain all following ++ * <...>_TIMEOUT_CYCLES macros, in kHz. ++ * ++ * Timeouts are scaled based on the relation between this value and the lowest ++ * GPU clock frequency. ++ */ ++#define DEFAULT_REF_TIMEOUT_FREQ_KHZ (100000) + -+ /* Just need to update the device's last busy mark. Kernel will respect -+ * the autosuspend delay and so won't suspend the device immediately. -+ */ -+ pm_runtime_mark_last_busy(kbdev->dev); -+ return 0; -+} -+#endif /* KBASE_PM_RUNTIME */ ++#if MALI_USE_CSF ++/* Waiting timeout for status change acknowledgment, in clock cycles. ++ * ++ * This is also the default timeout to be used when an invalid timeout ++ * selector is used to retrieve the timeout on CSF GPUs. ++ * ++ * Based on 75000ms timeout at nominal 100MHz, as is required for Android - based ++ * on scaling from a 50MHz GPU system. ++ */ ++#define CSF_FIRMWARE_TIMEOUT_CYCLES (7500000000ull) + -+/* The power management operations for the platform driver. ++/* Timeout in clock cycles for GPU Power Management to reach the desired ++ * Shader, L2 and MCU state. ++ * ++ * Based on 2500ms timeout at nominal 100MHz, scaled from a 50MHz GPU system. + */ -+static const struct dev_pm_ops kbase_pm_ops = { -+ .suspend = kbase_device_suspend, -+ .resume = kbase_device_resume, -+#ifdef KBASE_PM_RUNTIME -+ .runtime_suspend = kbase_device_runtime_suspend, -+ .runtime_resume = kbase_device_runtime_resume, -+ .runtime_idle = kbase_device_runtime_idle, -+#endif /* KBASE_PM_RUNTIME */ -+}; ++#define CSF_PM_TIMEOUT_CYCLES (250000000) + -+#if IS_ENABLED(CONFIG_OF) -+static const struct of_device_id kbase_dt_ids[] = { { .compatible = "arm,malit6xx" }, -+ { .compatible = "arm,mali-midgard" }, -+ { .compatible = "arm,mali-bifrost" }, -+ { .compatible = "arm,mali-valhall" }, -+ { /* sentinel */ } }; -+MODULE_DEVICE_TABLE(of, kbase_dt_ids); -+#endif ++/* Waiting timeout in clock cycles for GPU reset to complete. ++ * ++ * Based on 2500ms timeout at 100MHz, scaled from a 50MHz GPU system ++ */ ++#define CSF_GPU_RESET_TIMEOUT_CYCLES (250000000) + -+static struct platform_driver kbase_platform_driver = { -+ .probe = kbase_platform_device_probe, -+ .remove = kbase_platform_device_remove, -+ .driver = { -+ .name = kbase_drv_name, -+ .pm = &kbase_pm_ops, -+ .of_match_table = of_match_ptr(kbase_dt_ids), -+ .probe_type = PROBE_PREFER_ASYNCHRONOUS, -+ }, -+}; ++/* Waiting timeout in clock cycles for all active CSGs to be suspended. ++ * ++ * Based on 1500ms timeout at 100MHz, scaled from a 50MHz GPU system. ++ */ ++#define CSF_CSG_SUSPEND_TIMEOUT_CYCLES (150000000) + -+#if (KERNEL_VERSION(5, 3, 0) > LINUX_VERSION_CODE) && IS_ENABLED(CONFIG_OF) -+module_platform_driver(kbase_platform_driver); -+#else -+static int __init kbase_driver_init(void) -+{ -+ int ret; ++/* Waiting timeout in clock cycles for GPU firmware to boot. ++ * ++ * Based on 250ms timeout at 100MHz, scaled from a 50MHz GPU system. ++ */ ++#define CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES (25000000) + -+#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) -+ mutex_init(&kbase_probe_mutex); -+#endif ++/* Waiting timeout for a ping request to be acknowledged, in clock cycles. ++ * ++ * Based on 6000ms timeout at 100MHz, scaled from a 50MHz GPU system. ++ */ ++#define CSF_FIRMWARE_PING_TIMEOUT_CYCLES (600000000ull) + -+#ifndef CONFIG_OF -+ ret = kbase_platform_register(); -+ if (ret) -+ return ret; -+#endif -+ ret = platform_driver_register(&kbase_platform_driver); -+#ifndef CONFIG_OF -+ if (ret) { -+ kbase_platform_unregister(); -+ return ret; -+ } -+#endif ++#else /* MALI_USE_CSF */ + -+ return ret; -+} ++/* A default timeout in clock cycles to be used when an invalid timeout ++ * selector is used to retrieve the timeout, on JM GPUs. ++ */ ++#define JM_DEFAULT_TIMEOUT_CYCLES (150000000) + -+static void __exit kbase_driver_exit(void) -+{ -+ platform_driver_unregister(&kbase_platform_driver); -+#ifndef CONFIG_OF -+ kbase_platform_unregister(); -+#endif -+} ++/* Default number of milliseconds given for other jobs on the GPU to be ++ * soft-stopped when the GPU needs to be reset. ++ */ ++#define JM_DEFAULT_RESET_TIMEOUT_MS (1) /* 1 ms */ + -+module_init(kbase_driver_init); -+module_exit(kbase_driver_exit); -+#endif -+MODULE_LICENSE("GPL"); -+MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \ -+ __stringify(BASE_UK_VERSION_MAJOR) "." \ -+ __stringify(BASE_UK_VERSION_MINOR) ")"); -+MODULE_SOFTDEP("pre: memory_group_manager"); -+MODULE_INFO(import_ns, "DMA_BUF"); ++/* Default timeout in clock cycles to be used when checking if JS_COMMAND_NEXT ++ * is updated on HW side so a Job Slot is considered free. ++ * This timeout will only take effect on GPUs with low value for the minimum ++ * GPU clock frequency (<= 100MHz). ++ * ++ * Based on 1ms timeout at 100MHz. Will default to 0ms on GPUs with higher ++ * value for minimum GPU clock frequency. ++ */ ++#define JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES (100000) + -+#define CREATE_TRACE_POINTS -+/* Create the trace points (otherwise we just get code to call a tracepoint) */ -+#include "mali_linux_trace.h" ++#endif /* MALI_USE_CSF */ + -+#ifdef CONFIG_MALI_BIFROST_GATOR_SUPPORT -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change); ++/* Default timeslice that a context is scheduled in for, in nanoseconds. ++ * ++ * When a context has used up this amount of time across its jobs, it is ++ * scheduled out to let another run. ++ * ++ * @note the resolution is nanoseconds (ns) here, because that's the format ++ * often used by the OS. ++ */ ++#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */ + -+void kbase_trace_mali_pm_status(u32 dev_id, u32 event, u64 value) -+{ -+ trace_mali_pm_status(dev_id, event, value); -+} ++/* Maximum frequency (in kHz) that the GPU can be clocked. For some platforms ++ * this isn't available, so we simply define a dummy value here. If devfreq ++ * is enabled the value will be read from there, otherwise this should be ++ * overridden by defining GPU_FREQ_KHZ_MAX in the platform file. ++ */ ++#define DEFAULT_GPU_FREQ_KHZ_MAX (5000) + -+void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, u8 atom_id) -+{ -+ trace_mali_job_slots_event(dev_id, event, -+ (kctx != NULL ? kctx->tgid : 0), -+ (kctx != NULL ? kctx->pid : 0), -+ atom_id); -+} ++/* Default timeout for task execution on an endpoint ++ * ++ * Number of GPU clock cycles before the driver terminates a task that is ++ * making no forward progress on an endpoint (e.g. shader core). ++ * Value chosen is equivalent to the time after which a job is hard stopped ++ * which is 5 seconds (assuming the GPU is usually clocked at ~500 MHZ). ++ */ ++#define DEFAULT_PROGRESS_TIMEOUT ((u64)5 * 500 * 1024 * 1024) + -+void kbase_trace_mali_page_fault_insert_pages(u32 dev_id, int event, u32 value) -+{ -+ trace_mali_page_fault_insert_pages(dev_id, event, value); -+} ++/* Default threshold at which to switch to incremental rendering ++ * ++ * Fraction of the maximum size of an allocation that grows on GPU page fault ++ * that can be used up before the driver switches to incremental rendering, ++ * in 256ths. 0 means disable incremental rendering. ++ */ ++#define DEFAULT_IR_THRESHOLD (192) + -+void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long event) -+{ -+ trace_mali_total_alloc_pages_change(dev_id, event); -+} -+#endif /* CONFIG_MALI_BIFROST_GATOR_SUPPORT */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h b/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h ++/* Waiting time in clock cycles for the completion of a MMU operation. ++ * ++ * Ideally 1.6M GPU cycles required for the L2 cache (512KiB slice) flush. ++ * ++ * As a pessimistic value, 50M GPU cycles ( > 30 times bigger ) is chosen. ++ * It corresponds to 0.5s in GPU @ 100Mhz. ++ */ ++#define MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES ((u64)50 * 1024 * 1024) ++#endif /* _KBASE_CONFIG_DEFAULTS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c new file mode 100644 -index 000000000..7e885ca46 +index 000000000..68e4305af --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h -@@ -0,0 +1,40 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c +@@ -0,0 +1,5856 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -209325,6044 +212602,5851 @@ index 000000000..7e885ca46 + * + */ + -+#ifndef _KBASE_CS_EXPERIMENTAL_H_ -+#define _KBASE_CS_EXPERIMENTAL_H_ ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_MALI_BIFROST_DEVFREQ ++#include ++#include ++#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) ++#include ++#endif /* CONFIG_DEVFREQ_THERMAL */ ++#endif /* CONFIG_MALI_BIFROST_DEVFREQ */ ++#include "backend/gpu/mali_kbase_model_linux.h" ++#include "uapi/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h" ++#include "mali_kbase_mem.h" ++#include "mali_kbase_mem_pool_debugfs.h" ++#include "mali_kbase_mem_pool_group.h" ++#include "mali_kbase_debugfs_helper.h" ++#include "mali_kbase_regs_history_debugfs.h" ++#include ++#include ++#if !MALI_USE_CSF ++#include ++#endif /* !MALI_USE_CSF */ ++#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS ++#include ++#endif ++#include ++#include ++#if !MALI_USE_CSF ++#include "mali_kbase_kinstr_jm.h" ++#endif ++#include "hwcnt/mali_kbase_hwcnt_context.h" ++#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" ++#include "mali_kbase_kinstr_prfcnt.h" ++#include "mali_kbase_vinstr.h" ++#if MALI_USE_CSF ++#include "csf/mali_kbase_csf_firmware.h" ++#include "csf/mali_kbase_csf_tiler_heap.h" ++#include "csf/mali_kbase_csf_csg_debugfs.h" ++#include "csf/mali_kbase_csf_cpu_queue_debugfs.h" ++#include "csf/mali_kbase_csf_event.h" ++#endif ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++#include "arbiter/mali_kbase_arbiter_pm.h" ++#endif ++ ++#include "mali_kbase_cs_experimental.h" ++ ++#ifdef CONFIG_MALI_CINSTR_GWT ++#include "mali_kbase_gwt.h" ++#endif ++#include "backend/gpu/mali_kbase_pm_internal.h" ++#include "mali_kbase_dvfs_debugfs.h" ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++#include "mali_kbase_pbha_debugfs.h" ++#endif + ++#include ++#include ++#include +#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include /* is_compat_task/in_compat_syscall */ ++#include ++#include ++#include ++#include ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++#include ++#endif /* CONFIG_SYNC_FILE */ ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++#include ++ ++#include ++ ++#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)" + +/** -+ * mali_kbase_print_cs_experimental() - Print a string for every Core Services -+ * experimental feature that is enabled ++ * KBASE_API_VERSION - KBase API Version ++ * @major: Kernel major version ++ * @minor: Kernel minor version + */ -+static inline void mali_kbase_print_cs_experimental(void) -+{ -+#if MALI_INCREMENTAL_RENDERING_JM -+ pr_info("mali_kbase: INCREMENTAL_RENDERING_JM (experimental) enabled"); -+#endif /* MALI_INCREMENTAL_RENDERING_JM */ -+} -+ -+#endif /* _KBASE_CS_EXPERIMENTAL_H_ */ ++#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ ++ (((minor) & 0xFFF) << 8) | \ ++ ((0 & 0xFF) << 0)) + ++/** ++ * struct mali_kbase_capability_def - kbase capabilities table ++ * ++ * @required_major: required major ++ * @required_minor: required minor ++ */ ++struct mali_kbase_capability_def { ++ u16 required_major; ++ u16 required_minor; ++}; + -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c -new file mode 100644 -index 000000000..dc6feb95a ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c -@@ -0,0 +1,433 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* ++ * This must be kept in-sync with mali_kbase_cap + * -+ * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * ++ * TODO: The alternative approach would be to embed the cap enum values ++ * in the table. Less efficient but potentially safer. + */ -+ -+#include -+#include -+#include "mali_kbase_ctx_sched.h" -+#include "tl/mali_kbase_tracepoints.h" ++static const struct mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CAPS] = { +#if MALI_USE_CSF -+#include "mali_kbase_reset_gpu.h" ++ { 1, 0 }, /* SYSTEM_MONITOR */ ++ { 1, 0 }, /* JIT_PRESSURE_LIMIT */ ++ { 1, 0 }, /* MEM_GROW_ON_GPF */ ++ { 1, 0 } /* MEM_PROTECTED */ +#else -+#include ++ { 11, 15 }, /* SYSTEM_MONITOR */ ++ { 11, 25 }, /* JIT_PRESSURE_LIMIT */ ++ { 11, 2 }, /* MEM_GROW_ON_GPF */ ++ { 11, 2 } /* MEM_PROTECTED */ +#endif ++}; + -+/* Helper for ktrace */ -+#if KBASE_KTRACE_ENABLE -+static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) ++#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) ++/* Mutex to synchronize the probe of multiple kbase instances */ ++static struct mutex kbase_probe_mutex; ++#endif ++ ++/** ++ * mali_kbase_supports_cap - Query whether a kbase capability is supported ++ * ++ * @api_version: API version to convert ++ * @cap: Capability to query for - see mali_kbase_caps.h ++ * ++ * Return: true if the capability is supported ++ */ ++bool mali_kbase_supports_cap(unsigned long api_version, enum mali_kbase_cap cap) +{ -+ return atomic_read(&kctx->refcount); ++ bool supported = false; ++ unsigned long required_ver; ++ ++ struct mali_kbase_capability_def const *cap_def; ++ ++ if (WARN_ON(cap < 0)) ++ return false; ++ ++ if (WARN_ON(cap >= MALI_KBASE_NUM_CAPS)) ++ return false; ++ ++ cap_def = &kbase_caps_table[(int)cap]; ++ required_ver = KBASE_API_VERSION(cap_def->required_major, cap_def->required_minor); ++ supported = (api_version >= required_ver); ++ ++ return supported; +} -+#else /* KBASE_KTRACE_ENABLE */ -+static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) ++ ++/** ++ * kbase_file_new - Create an object representing a device file ++ * ++ * @kbdev: An instance of the GPU platform device, allocated from the probe ++ * method of the driver. ++ * @filp: Pointer to the struct file corresponding to device file ++ * /dev/malixx instance, passed to the file's open method. ++ * ++ * In its initial state, the device file has no context (i.e. no GPU ++ * address space) and no API version number. Both must be assigned before ++ * kbase_file_get_kctx_if_setup_complete() can be used successfully. ++ * ++ * Return: Address of an object representing a simulated device file, or NULL ++ * on failure. ++ */ ++static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev, ++ struct file *const filp) +{ -+ CSTD_UNUSED(kctx); -+ return 0; ++ struct kbase_file *const kfile = kmalloc(sizeof(*kfile), GFP_KERNEL); ++ ++ if (kfile) { ++ kfile->kbdev = kbdev; ++ kfile->filp = filp; ++ kfile->kctx = NULL; ++ kfile->api_version = 0; ++ atomic_set(&kfile->setup_state, KBASE_FILE_NEED_VSN); ++ } ++ return kfile; +} -+#endif /* KBASE_KTRACE_ENABLE */ + -+int kbase_ctx_sched_init(struct kbase_device *kbdev) ++/** ++ * kbase_file_set_api_version - Set the application programmer interface version ++ * ++ * @kfile: A device file created by kbase_file_new() ++ * @major: Major version number (must not exceed 12 bits) ++ * @minor: Major version number (must not exceed 12 bits) ++ * ++ * An application programmer interface (API) version must be specified ++ * before calling kbase_file_create_kctx(), otherwise an error is returned. ++ * ++ * If a version number was already set for the given @kfile (or is in the ++ * process of being set by another thread) then an error is returned. ++ * ++ * Return: 0 if successful, otherwise a negative error code. ++ */ ++static int kbase_file_set_api_version(struct kbase_file *const kfile, ++ u16 const major, u16 const minor) +{ -+ int as_present = (1U << kbdev->nr_hw_address_spaces) - 1; ++ if (WARN_ON(!kfile)) ++ return -EINVAL; + -+ /* These two must be recalculated if nr_hw_address_spaces changes -+ * (e.g. for HW workarounds) ++ /* setup pending, try to signal that we'll do the setup, ++ * if setup was already in progress, err this call + */ -+ kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces; -+ kbdev->as_free = as_present; /* All ASs initially free */ ++ if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_NEED_VSN, ++ KBASE_FILE_VSN_IN_PROGRESS) != KBASE_FILE_NEED_VSN) ++ return -EPERM; + -+ memset(kbdev->as_to_kctx, 0, sizeof(kbdev->as_to_kctx)); ++ /* save the proposed version number for later use */ ++ kfile->api_version = KBASE_API_VERSION(major, minor); + ++ atomic_set(&kfile->setup_state, KBASE_FILE_NEED_CTX); + return 0; +} + -+void kbase_ctx_sched_term(struct kbase_device *kbdev) ++/** ++ * kbase_file_get_api_version - Get the application programmer interface version ++ * ++ * @kfile: A device file created by kbase_file_new() ++ * ++ * Return: The version number (encoded with KBASE_API_VERSION) or 0 if none has ++ * been set. ++ */ ++static unsigned long kbase_file_get_api_version(struct kbase_file *const kfile) +{ -+ s8 i; ++ if (WARN_ON(!kfile)) ++ return 0; + -+ /* Sanity checks */ -+ for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) { -+ WARN_ON(kbdev->as_to_kctx[i] != NULL); -+ WARN_ON(!(kbdev->as_free & (1u << i))); -+ } -+} ++ if (atomic_read(&kfile->setup_state) < KBASE_FILE_NEED_CTX) ++ return 0; + -+void kbase_ctx_sched_init_ctx(struct kbase_context *kctx) -+{ -+ kctx->as_nr = KBASEP_AS_NR_INVALID; -+ atomic_set(&kctx->refcount, 0); ++ return kfile->api_version; +} + -+/* kbasep_ctx_sched_find_as_for_ctx - Find a free address space ++/** ++ * kbase_file_create_kctx - Create a kernel base context + * -+ * @kbdev: The context for which to find a free address space ++ * @kfile: A device file created by kbase_file_new() ++ * @flags: Flags to set, which can be any combination of ++ * BASEP_CONTEXT_CREATE_KERNEL_FLAGS. + * -+ * Return: A valid AS if successful, otherwise KBASEP_AS_NR_INVALID ++ * This creates a new context for the GPU platform device instance that was ++ * specified when kbase_file_new() was called. Each context has its own GPU ++ * address space. If a context was already created for the given @kfile (or is ++ * in the process of being created for it by another thread) then an error is ++ * returned. + * -+ * This function returns an address space available for use. It would prefer -+ * returning an AS that has been previously assigned to the context to -+ * avoid having to reprogram the MMU. ++ * An API version number must have been set by kbase_file_set_api_version() ++ * before calling this function, otherwise an error is returned. ++ * ++ * Return: 0 if a new context was created, otherwise a negative error code. + */ -+static int kbasep_ctx_sched_find_as_for_ctx(struct kbase_context *kctx) -+{ -+ struct kbase_device *const kbdev = kctx->kbdev; -+ int free_as; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ /* First check if the previously assigned AS is available */ -+ if ((kctx->as_nr != KBASEP_AS_NR_INVALID) && -+ (kbdev->as_free & (1u << kctx->as_nr))) -+ return kctx->as_nr; ++static int kbase_file_create_kctx(struct kbase_file *kfile, ++ base_context_create_flags flags); + -+ /* The previously assigned AS was taken, we'll be returning any free -+ * AS at this point. -+ */ -+ free_as = ffs(kbdev->as_free) - 1; -+ if (free_as >= 0 && free_as < kbdev->nr_hw_address_spaces) -+ return free_as; ++/** ++ * kbase_file_get_kctx_if_setup_complete - Get a kernel base context ++ * pointer from a device file ++ * ++ * @kfile: A device file created by kbase_file_new() ++ * ++ * This function returns NULL if no context has been created for the given @kfile. ++ * This makes it safe to use in circumstances where the order of initialization ++ * cannot be enforced, but only if the caller checks the return value. ++ * ++ * Return: Address of the kernel base context associated with the @kfile, or ++ * NULL if no context exists. ++ */ ++static struct kbase_context *kbase_file_get_kctx_if_setup_complete( ++ struct kbase_file *const kfile) ++{ ++ if (WARN_ON(!kfile) || ++ atomic_read(&kfile->setup_state) != KBASE_FILE_COMPLETE || ++ WARN_ON(!kfile->kctx)) ++ return NULL; + -+ return KBASEP_AS_NR_INVALID; ++ return kfile->kctx; +} + -+int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx) ++/** ++ * kbase_file_delete - Destroy an object representing a device file ++ * ++ * @kfile: A device file created by kbase_file_new() ++ * ++ * If any context was created for the @kfile then it is destroyed. ++ */ ++static void kbase_file_delete(struct kbase_file *const kfile) +{ -+ struct kbase_device *const kbdev = kctx->kbdev; ++ struct kbase_device *kbdev = NULL; + -+ lockdep_assert_held(&kbdev->mmu_hw_mutex); -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (WARN_ON(!kfile)) ++ return; + -+ WARN_ON(!kbdev->pm.backend.gpu_powered); ++ kfile->filp->private_data = NULL; ++ kbdev = kfile->kbdev; + -+ if (atomic_inc_return(&kctx->refcount) == 1) { -+ int const free_as = kbasep_ctx_sched_find_as_for_ctx(kctx); ++ if (atomic_read(&kfile->setup_state) == KBASE_FILE_COMPLETE) { ++ struct kbase_context *kctx = kfile->kctx; + -+ if (free_as >= 0) { -+ kbdev->as_free &= ~(1u << free_as); -+ /* Only program the MMU if the context has not been -+ * assigned the same address space before. -+ */ -+ if (free_as != kctx->as_nr) { -+ struct kbase_context *const prev_kctx = -+ kbdev->as_to_kctx[free_as]; ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ kbasep_mem_profile_debugfs_remove(kctx); ++#endif ++ kbase_context_debugfs_term(kctx); + -+ if (prev_kctx) { -+ WARN_ON(atomic_read(&prev_kctx->refcount) != 0); -+ kbase_mmu_disable(prev_kctx); -+ KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( -+ kbdev, prev_kctx->id); -+ prev_kctx->as_nr = KBASEP_AS_NR_INVALID; -+ } -+ kctx->as_nr = free_as; -+ kbdev->as_to_kctx[free_as] = kctx; -+ KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS( -+ kbdev, kctx->id, free_as); -+ kbase_mmu_update(kbdev, &kctx->mmu, -+ kctx->as_nr); -+ } -+ } else { -+ atomic_dec(&kctx->refcount); ++ kbase_destroy_context(kctx); + -+ /* Failed to find an available address space, we must -+ * be returning an error at this point. -+ */ -+ WARN_ON(kctx->as_nr != KBASEP_AS_NR_INVALID); -+ } ++ dev_dbg(kbdev->dev, "deleted base context\n"); + } + -+ return kctx->as_nr; ++ kbase_release_device(kbdev); ++ ++ kfree(kfile); +} + -+void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx) ++static int kbase_api_handshake(struct kbase_file *kfile, ++ struct kbase_ioctl_version_check *version) +{ -+ struct kbase_device *const kbdev = kctx->kbdev; ++ int err = 0; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+#if MALI_USE_CSF -+ /* We expect the context to be active when this function is called, -+ * except for the case where a page fault is reported for it during -+ * the GPU reset sequence, in which case we can expect the refcount -+ * to be 0. ++ switch (version->major) { ++ case BASE_UK_VERSION_MAJOR: ++ /* set minor to be the lowest common */ ++ version->minor = min_t(int, BASE_UK_VERSION_MINOR, ++ (int)version->minor); ++ break; ++ default: ++ /* We return our actual version regardless if it ++ * matches the version returned by userspace - ++ * userspace can bail if it can't handle this ++ * version ++ */ ++ version->major = BASE_UK_VERSION_MAJOR; ++ version->minor = BASE_UK_VERSION_MINOR; ++ break; ++ } ++ ++ /* save the proposed version number for later use */ ++ err = kbase_file_set_api_version(kfile, version->major, version->minor); ++ if (unlikely(err)) ++ return err; ++ ++ /* For backward compatibility, we may need to create the context before ++ * the flags have been set. Originally it was created on file open ++ * (with job submission disabled) but we don't support that usage. + */ -+ WARN_ON(!atomic_read(&kctx->refcount) && !kbase_reset_gpu_is_active(kbdev)); -+#else -+ /* We expect the context to be active (and thus refcount should be non-zero) -+ * when this function is called -+ */ -+ WARN_ON(!atomic_read(&kctx->refcount)); -+#endif -+ if (likely((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS))) -+ WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx); -+ else -+ WARN(true, "Invalid as_nr(%d)", kctx->as_nr); ++ if (!mali_kbase_supports_system_monitor(kbase_file_get_api_version(kfile))) ++ err = kbase_file_create_kctx(kfile, ++ BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED); + -+ atomic_inc(&kctx->refcount); ++ return err; +} + -+void kbase_ctx_sched_release_ctx(struct kbase_context *kctx) ++static int kbase_api_handshake_dummy(struct kbase_file *kfile, ++ struct kbase_ioctl_version_check *version) +{ -+ struct kbase_device *const kbdev = kctx->kbdev; -+ int new_ref_count; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ new_ref_count = atomic_dec_return(&kctx->refcount); -+ if (new_ref_count == 0) { -+ if (likely((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS))) { -+ kbdev->as_free |= (1u << kctx->as_nr); -+ if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) { -+ KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(kbdev, kctx->id); -+ kbdev->as_to_kctx[kctx->as_nr] = NULL; -+ kctx->as_nr = KBASEP_AS_NR_INVALID; -+ kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT); -+#if !MALI_USE_CSF -+ kbase_backend_slot_kctx_purge_locked(kbdev, kctx); -+#endif -+ } -+ } -+ } -+ -+ KBASE_KTRACE_ADD(kbdev, SCHED_RELEASE_CTX, kctx, new_ref_count); ++ return -EPERM; +} + -+void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) ++static int kbase_api_kinstr_prfcnt_enum_info( ++ struct kbase_file *kfile, ++ struct kbase_ioctl_kinstr_prfcnt_enum_info *prfcnt_enum_info) +{ -+ struct kbase_device *const kbdev = kctx->kbdev; -+ unsigned long flags; ++ return kbase_kinstr_prfcnt_enum_info(kfile->kbdev->kinstr_prfcnt_ctx, ++ prfcnt_enum_info); ++} + -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++static int kbase_api_kinstr_prfcnt_setup( ++ struct kbase_file *kfile, ++ union kbase_ioctl_kinstr_prfcnt_setup *prfcnt_setup) ++{ ++ return kbase_kinstr_prfcnt_setup(kfile->kbdev->kinstr_prfcnt_ctx, ++ prfcnt_setup); ++} + -+ WARN_ON(atomic_read(&kctx->refcount) != 0); ++static struct kbase_device *to_kbase_device(struct device *dev) ++{ ++ return dev_get_drvdata(dev); ++} + -+ if ((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS)) { -+ if (kbdev->pm.backend.gpu_powered) -+ kbase_mmu_disable(kctx); ++int assign_irqs(struct kbase_device *kbdev) ++{ ++ static const char *const irq_names_caps[] = { "JOB", "MMU", "GPU" }; + -+ KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(kbdev, kctx->id); -+ kbdev->as_to_kctx[kctx->as_nr] = NULL; -+ kctx->as_nr = KBASEP_AS_NR_INVALID; -+ } ++#if IS_ENABLED(CONFIG_OF) ++ static const char *const irq_names[] = { "job", "mmu", "gpu" }; ++#endif + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+} ++ struct platform_device *pdev; ++ int i; + -+void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) -+{ -+ s8 i; ++ if (!kbdev) ++ return -ENODEV; + -+ lockdep_assert_held(&kbdev->mmu_hw_mutex); -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ pdev = to_platform_device(kbdev->dev); + -+ WARN_ON(!kbdev->pm.backend.gpu_powered); ++ for (i = 0; i < ARRAY_SIZE(irq_names_caps); i++) { ++ int irq; + -+ for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) { -+ struct kbase_context *kctx; ++#if IS_ENABLED(CONFIG_OF) ++ /* We recommend using Upper case for the irq names in dts, but if ++ * there are devices in the world using Lower case then we should ++ * avoid breaking support for them. So try using names in Upper case ++ * first then try using Lower case names. If both attempts fail then ++ * we assume there is no IRQ resource specified for the GPU. ++ */ ++ irq = platform_get_irq_byname(pdev, irq_names_caps[i]); ++ if (irq < 0) ++ irq = platform_get_irq_byname(pdev, irq_names[i]); ++#else ++ irq = platform_get_irq(pdev, i); ++#endif /* CONFIG_OF */ + -+ kbdev->as[i].is_unresponsive = false; -+#if MALI_USE_CSF -+ if ((i == MCU_AS_NR) && kbdev->csf.firmware_inited) { -+ kbase_mmu_update(kbdev, &kbdev->csf.mcu_mmu, -+ MCU_AS_NR); -+ continue; ++ if (irq < 0) { ++ dev_err(kbdev->dev, "No IRQ resource '%s'\n", irq_names_caps[i]); ++ return irq; + } -+#endif -+ kctx = kbdev->as_to_kctx[i]; -+ if (kctx) { -+ if (atomic_read(&kctx->refcount)) { -+ WARN_ON(kctx->as_nr != i); + -+ kbase_mmu_update(kbdev, &kctx->mmu, -+ kctx->as_nr); -+ kbase_ctx_flag_clear(kctx, -+ KCTX_AS_DISABLED_ON_FAULT); -+ } else { -+ /* This context might have been assigned an -+ * AS before, clear it. -+ */ -+ if (kctx->as_nr != KBASEP_AS_NR_INVALID) { -+ KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( -+ kbdev, kctx->id); -+ kbdev->as_to_kctx[kctx->as_nr] = NULL; -+ kctx->as_nr = KBASEP_AS_NR_INVALID; -+ } -+ } -+ } else { -+ kbase_mmu_disable_as(kbdev, i); -+ } ++ kbdev->irqs[i].irq = irq; ++ kbdev->irqs[i].flags = irqd_get_trigger_type(irq_get_irq_data(irq)); + } ++ ++ return 0; +} + -+struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( -+ struct kbase_device *kbdev, size_t as_nr) ++/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */ ++struct kbase_device *kbase_find_device(int minor) +{ -+ unsigned long flags; -+ struct kbase_context *found_kctx = NULL; ++ struct kbase_device *kbdev = NULL; ++ struct list_head *entry; ++ const struct list_head *dev_list = kbase_device_get_list(); + -+ if (WARN_ON(kbdev == NULL)) -+ return NULL; ++ list_for_each(entry, dev_list) { ++ struct kbase_device *tmp; + -+ if (WARN_ON(as_nr >= BASE_MAX_NR_AS)) -+ return NULL; ++ tmp = list_entry(entry, struct kbase_device, entry); ++ if (tmp->mdev.minor == minor || minor == -1) { ++ kbdev = tmp; ++ get_device(kbdev->dev); ++ break; ++ } ++ } ++ kbase_device_put_list(dev_list); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ return kbdev; ++} ++EXPORT_SYMBOL(kbase_find_device); + -+ found_kctx = kbdev->as_to_kctx[as_nr]; ++void kbase_release_device(struct kbase_device *kbdev) ++{ ++ put_device(kbdev->dev); ++} ++EXPORT_SYMBOL(kbase_release_device); + -+ if (!WARN_ON(found_kctx == NULL)) -+ kbase_ctx_sched_retain_ctx_refcount(found_kctx); ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off) ++{ ++ struct kbase_context *kctx = f->private_data; ++ int err; ++ bool value; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ err = kstrtobool_from_user(ubuf, size, &value); ++ if (err) ++ return err; + -+ return found_kctx; ++ if (value) ++ kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); ++ else ++ kbase_ctx_flag_clear(kctx, KCTX_INFINITE_CACHE); ++ ++ return size; +} + -+struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, -+ size_t as_nr) ++static ssize_t read_ctx_infinite_cache(struct file *f, char __user *ubuf, size_t size, loff_t *off) +{ -+ unsigned long flags; -+ struct kbase_context *found_kctx; -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ struct kbase_context *kctx = f->private_data; ++ char buf[32]; ++ int count; ++ bool value; + -+ found_kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as_nr); ++ value = kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N"); + -+ return found_kctx; ++ return simple_read_from_buffer(ubuf, size, off, buf, count); +} + -+struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock( -+ struct kbase_device *kbdev, size_t as_nr) ++static const struct file_operations kbase_infinite_cache_fops = { ++ .owner = THIS_MODULE, ++ .open = simple_open, ++ .write = write_ctx_infinite_cache, ++ .read = read_ctx_infinite_cache, ++}; ++ ++static ssize_t write_ctx_force_same_va(struct file *f, const char __user *ubuf, ++ size_t size, loff_t *off) +{ -+ struct kbase_context *found_kctx; ++ struct kbase_context *kctx = f->private_data; ++ int err; ++ bool value; + -+ if (WARN_ON(kbdev == NULL)) -+ return NULL; ++ err = kstrtobool_from_user(ubuf, size, &value); ++ if (err) ++ return err; + -+ if (WARN_ON(as_nr >= BASE_MAX_NR_AS)) -+ return NULL; ++ if (value) { ++#if defined(CONFIG_64BIT) ++ /* 32-bit clients cannot force SAME_VA */ ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) ++ return -EINVAL; ++ kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA); ++#else /* defined(CONFIG_64BIT) */ ++ /* 32-bit clients cannot force SAME_VA */ ++ return -EINVAL; ++#endif /* defined(CONFIG_64BIT) */ ++ } else { ++ kbase_ctx_flag_clear(kctx, KCTX_FORCE_SAME_VA); ++ } + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ return size; ++} + -+ found_kctx = kbdev->as_to_kctx[as_nr]; ++static ssize_t read_ctx_force_same_va(struct file *f, char __user *ubuf, ++ size_t size, loff_t *off) ++{ ++ struct kbase_context *kctx = f->private_data; ++ char buf[32]; ++ int count; ++ bool value; + -+ if (found_kctx) { -+ if (atomic_read(&found_kctx->refcount) <= 0) -+ found_kctx = NULL; -+ } ++ value = kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA); + -+ return found_kctx; ++ count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N"); ++ ++ return simple_read_from_buffer(ubuf, size, off, buf, count); +} + -+bool kbase_ctx_sched_inc_refcount_nolock(struct kbase_context *kctx) -+{ -+ bool result = false; ++static const struct file_operations kbase_force_same_va_fops = { ++ .owner = THIS_MODULE, ++ .open = simple_open, ++ .write = write_ctx_force_same_va, ++ .read = read_ctx_force_same_va, ++}; ++#endif /* CONFIG_DEBUG_FS */ + -+ if (WARN_ON(kctx == NULL)) -+ return result; ++static int kbase_file_create_kctx(struct kbase_file *const kfile, ++ base_context_create_flags const flags) ++{ ++ struct kbase_device *kbdev = NULL; ++ struct kbase_context *kctx = NULL; ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ char kctx_name[64]; ++#endif + -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ if (WARN_ON(!kfile)) ++ return -EINVAL; + -+ if (atomic_read(&kctx->refcount) > 0) { -+ KBASE_DEBUG_ASSERT(kctx->as_nr >= 0); ++ /* setup pending, try to signal that we'll do the setup, ++ * if setup was already in progress, err this call ++ */ ++ if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_NEED_CTX, ++ KBASE_FILE_CTX_IN_PROGRESS) != KBASE_FILE_NEED_CTX) ++ return -EPERM; + -+ kbase_ctx_sched_retain_ctx_refcount(kctx); -+ KBASE_KTRACE_ADD(kctx->kbdev, SCHED_RETAIN_CTX_NOLOCK, kctx, -+ kbase_ktrace_get_ctx_refcnt(kctx)); -+ result = true; -+ } ++ kbdev = kfile->kbdev; + -+ return result; -+} ++ kctx = kbase_create_context(kbdev, in_compat_syscall(), ++ flags, kfile->api_version, kfile->filp); + -+bool kbase_ctx_sched_inc_refcount(struct kbase_context *kctx) -+{ -+ unsigned long flags; -+ bool result = false; ++ /* if bad flags, will stay stuck in setup mode */ ++ if (!kctx) ++ return -ENOMEM; + -+ if (WARN_ON(kctx == NULL)) -+ return result; ++ if (kbdev->infinite_cache_active_default) ++ kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); + -+ if (WARN_ON(kctx->kbdev == NULL)) -+ return result; ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ if (unlikely(!scnprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id))) ++ return -ENOMEM; + -+ mutex_lock(&kctx->kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); -+ result = kbase_ctx_sched_inc_refcount_nolock(kctx); -+ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kctx->kbdev->mmu_hw_mutex); ++ mutex_init(&kctx->mem_profile_lock); + -+ return result; -+} ++ kctx->kctx_dentry = debugfs_create_dir(kctx_name, ++ kbdev->debugfs_ctx_directory); + -+void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx) -+{ -+ unsigned long flags; ++ if (IS_ERR_OR_NULL(kctx->kctx_dentry)) { ++ /* we don't treat this as a fail - just warn about it */ ++ dev_warn(kbdev->dev, "couldn't create debugfs dir for kctx\n"); ++ } else { ++ debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry, ++ kctx, &kbase_infinite_cache_fops); ++ debugfs_create_file("force_same_va", 0600, kctx->kctx_dentry, ++ kctx, &kbase_force_same_va_fops); + -+ if (WARN_ON(!kctx)) -+ return; ++ kbase_context_debugfs_init(kctx); ++ } ++#endif /* CONFIG_DEBUG_FS */ + -+ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); ++ dev_dbg(kbdev->dev, "created base context\n"); + -+ if (!WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID) && -+ !WARN_ON(atomic_read(&kctx->refcount) <= 0)) -+ kbase_ctx_sched_release_ctx(kctx); ++ kfile->kctx = kctx; ++ atomic_set(&kfile->setup_state, KBASE_FILE_COMPLETE); + -+ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); ++ return 0; +} + -+#if MALI_USE_CSF -+bool kbase_ctx_sched_inc_refcount_if_as_valid(struct kbase_context *kctx) ++static int kbase_open(struct inode *inode, struct file *filp) +{ -+ struct kbase_device *kbdev; -+ bool added_ref = false; -+ unsigned long flags; ++ struct kbase_device *kbdev = NULL; ++ struct kbase_file *kfile; ++ int ret = 0; + -+ if (WARN_ON(kctx == NULL)) -+ return added_ref; ++ kbdev = kbase_find_device(iminor(inode)); + -+ kbdev = kctx->kbdev; ++ if (!kbdev) ++ return -ENODEV; + -+ if (WARN_ON(kbdev == NULL)) -+ return added_ref; ++#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) ++ /* Set address space operations for page migration */ ++ kbase_mem_migrate_set_address_space_ops(kbdev, filp); ++#endif + -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ /* Device-wide firmware load is moved here from probing to comply with ++ * Android GKI vendor guideline. ++ */ ++ ret = kbase_device_firmware_init_once(kbdev); ++ if (ret) ++ goto out; + -+ if ((kctx->as_nr != KBASEP_AS_NR_INVALID) && -+ (kctx == kbdev->as_to_kctx[kctx->as_nr])) { -+ atomic_inc(&kctx->refcount); ++ kfile = kbase_file_new(kbdev, filp); ++ if (!kfile) { ++ ret = -ENOMEM; ++ goto out; ++ } + -+ if (kbdev->as_free & (1u << kctx->as_nr)) -+ kbdev->as_free &= ~(1u << kctx->as_nr); ++ filp->private_data = kfile; ++ filp->f_mode |= FMODE_UNSIGNED_OFFSET; + -+ KBASE_KTRACE_ADD(kbdev, SCHED_RETAIN_CTX_NOLOCK, kctx, -+ kbase_ktrace_get_ctx_refcnt(kctx)); -+ added_ref = true; -+ } ++ return 0; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); ++out: ++ kbase_release_device(kbdev); ++ return ret; ++} + -+ return added_ref; ++static int kbase_release(struct inode *inode, struct file *filp) ++{ ++ struct kbase_file *const kfile = filp->private_data; ++ ++ kbase_file_delete(kfile); ++ return 0; +} -+#endif -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h -new file mode 100644 -index 000000000..5a8d17547 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h -@@ -0,0 +1,247 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2017-2018, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+#ifndef _KBASE_CTX_SCHED_H_ -+#define _KBASE_CTX_SCHED_H_ + -+#include ++static int kbase_api_set_flags(struct kbase_file *kfile, ++ struct kbase_ioctl_set_flags *flags) ++{ ++ int err = 0; ++ unsigned long const api_version = kbase_file_get_api_version(kfile); ++ struct kbase_context *kctx = NULL; + -+/** -+ * DOC: The Context Scheduler manages address space assignment and reference -+ * counting to kbase_context. The interface has been designed to minimise -+ * interactions between the Job Scheduler and Power Management/MMU to support -+ * the existing Job Scheduler interface. -+ * -+ * The initial implementation of the Context Scheduler does not schedule -+ * contexts. Instead it relies on the Job Scheduler to make decisions of -+ * when to schedule/evict contexts if address spaces are starved. In the -+ * future, once an interface between the CS and JS has been devised to -+ * provide enough information about how each context is consuming GPU resources, -+ * those decisions can be made in the CS itself, thereby reducing duplicated -+ * code. -+ */ ++ /* Validate flags */ ++ if (flags->create_flags != ++ (flags->create_flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS)) ++ return -EINVAL; + -+/** -+ * kbase_ctx_sched_init() - Initialise the context scheduler -+ * @kbdev: The device for which the context scheduler needs to be initialised -+ * -+ * This must be called during device initialisation. The number of hardware -+ * address spaces must already be established before calling this function. -+ * -+ * Return: 0 for success, otherwise failure -+ */ -+int kbase_ctx_sched_init(struct kbase_device *kbdev); ++ /* For backward compatibility, the context may have been created before ++ * the flags were set. ++ */ ++ if (mali_kbase_supports_system_monitor(api_version)) { ++ err = kbase_file_create_kctx(kfile, flags->create_flags); ++ } else { ++#if !MALI_USE_CSF ++ struct kbasep_js_kctx_info *js_kctx_info = NULL; ++ unsigned long irq_flags = 0; ++#endif + -+/** -+ * kbase_ctx_sched_term - Terminate the context scheduler -+ * @kbdev: The device for which the context scheduler needs to be terminated -+ * -+ * This must be called during device termination after all contexts have been -+ * destroyed. -+ */ -+void kbase_ctx_sched_term(struct kbase_device *kbdev); ++ /* If setup is incomplete (e.g. because the API version ++ * wasn't set) then we have to give up. ++ */ ++ kctx = kbase_file_get_kctx_if_setup_complete(kfile); ++ if (unlikely(!kctx)) ++ return -EPERM; + -+/** -+ * kbase_ctx_sched_ctx_init - Initialize per-context data fields for scheduling -+ * @kctx: The context to initialize -+ * -+ * This must be called during context initialization before any other context -+ * scheduling functions are called on @kctx -+ */ -+void kbase_ctx_sched_init_ctx(struct kbase_context *kctx); ++#if MALI_USE_CSF ++ /* On CSF GPUs Job Manager interface isn't used to submit jobs ++ * (there are no job slots). So the legacy job manager path to ++ * submit jobs needs to remain disabled for CSF GPUs. ++ */ ++#else ++ js_kctx_info = &kctx->jctx.sched_info; ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); ++ /* Translate the flags */ ++ if ((flags->create_flags & ++ BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) ++ kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); + -+/** -+ * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context -+ * @kctx: The context to which to retain a reference -+ * -+ * This function should be called whenever an address space should be assigned -+ * to a context and programmed onto the MMU. It should typically be called -+ * when jobs are ready to be submitted to the GPU. -+ * -+ * It can be called as many times as necessary. The address space will be -+ * assigned to the context for as long as there is a reference to said context. -+ * -+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be -+ * held whilst calling this function. -+ * -+ * Return: The address space that the context has been assigned to or -+ * KBASEP_AS_NR_INVALID if no address space was available. -+ */ -+int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx); + -+/** -+ * kbase_ctx_sched_retain_ctx_refcount - Retain a reference to the @ref kbase_context -+ * @kctx: The context to which to retain a reference -+ * -+ * This function only retains a reference to the context. It must be called -+ * only when the context already has a reference. -+ * -+ * This is typically called inside an atomic session where we know the context -+ * is already scheduled in but want to take an extra reference to ensure that -+ * it doesn't get descheduled. -+ * -+ * The kbase_device::hwaccess_lock must be held whilst calling this function -+ */ -+void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx); ++ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++#endif ++ } + -+/** -+ * kbase_ctx_sched_release_ctx - Release a reference to the @ref kbase_context -+ * @kctx: The context from which to release a reference -+ * -+ * This function should be called whenever an address space could be unassigned -+ * from a context. When there are no more references to said context, the -+ * address space previously assigned to this context shall be reassigned to -+ * other contexts as needed. -+ * -+ * The kbase_device::hwaccess_lock must be held whilst calling this function -+ */ -+void kbase_ctx_sched_release_ctx(struct kbase_context *kctx); ++ return err; ++} + -+/** -+ * kbase_ctx_sched_remove_ctx - Unassign previously assigned address space -+ * @kctx: The context to be removed -+ * -+ * This function should be called when a context is being destroyed. The -+ * context must no longer have any reference. If it has been assigned an -+ * address space before then the AS will be unprogrammed. -+ */ -+void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx); ++#if !MALI_USE_CSF ++static int kbase_api_job_submit(struct kbase_context *kctx, ++ struct kbase_ioctl_job_submit *submit) ++{ ++ return kbase_jd_submit(kctx, u64_to_user_ptr(submit->addr), ++ submit->nr_atoms, ++ submit->stride, false); ++} ++#endif /* !MALI_USE_CSF */ + -+/** -+ * kbase_ctx_sched_restore_all_as - Reprogram all address spaces -+ * @kbdev: The device for which address spaces to be reprogrammed -+ * -+ * This function shall reprogram all address spaces previously assigned to -+ * contexts. It can be used after the GPU is reset. -+ * -+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be -+ * held whilst calling this function. -+ */ -+void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev); ++static int kbase_api_get_gpuprops(struct kbase_file *kfile, ++ struct kbase_ioctl_get_gpuprops *get_props) ++{ ++ struct kbase_gpu_props *kprops = &kfile->kbdev->gpu_props; ++ int err; + -+/** -+ * kbase_ctx_sched_as_to_ctx_refcount - Lookup a context based on its current -+ * address space and ensure that is stays scheduled in -+ * @kbdev: The device for which the returned context must belong -+ * @as_nr: address space assigned to the context of interest -+ * -+ * The context is refcounted as being busy to prevent it from scheduling -+ * out. It must be released with kbase_ctx_sched_release_ctx() when it is no -+ * longer required to stay scheduled in. -+ * -+ * This function can safely be called from IRQ context. -+ * -+ * The following locking conditions are made on the caller: -+ * * it must not hold the kbase_device::hwaccess_lock, because it will be used -+ * internally. -+ * -+ * Return: a valid struct kbase_context on success, which has been refcounted -+ * as being busy or return NULL on failure, indicating that no context was found -+ * in as_nr. -+ */ -+struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( -+ struct kbase_device *kbdev, size_t as_nr); ++ if (get_props->flags != 0) { ++ dev_err(kfile->kbdev->dev, "Unsupported flags to get_gpuprops"); ++ return -EINVAL; ++ } + -+/** -+ * kbase_ctx_sched_as_to_ctx - Lookup a context based on its current address -+ * space -+ * @kbdev: The device for which the returned context must belong -+ * @as_nr: address space assigned to the context of interest -+ * -+ * Return: a valid struct kbase_context on success or NULL on failure, -+ * indicating that no context was found in as_nr. -+ */ -+struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, -+ size_t as_nr); ++ if (get_props->size == 0) ++ return kprops->prop_buffer_size; ++ if (get_props->size < kprops->prop_buffer_size) ++ return -EINVAL; + -+/** -+ * kbase_ctx_sched_as_to_ctx_nolock - Lookup a context based on its current -+ * address space. -+ * @kbdev: The device for which the returned context must belong -+ * @as_nr: address space assigned to the context of interest -+ * -+ * The following lock must be held by the caller: -+ * * kbase_device::hwaccess_lock -+ * -+ * Return: a valid struct kbase_context on success or NULL on failure, -+ * indicating that no context was found in as_nr. -+ */ -+struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock( -+ struct kbase_device *kbdev, size_t as_nr); ++ err = copy_to_user(u64_to_user_ptr(get_props->buffer), ++ kprops->prop_buffer, ++ kprops->prop_buffer_size); ++ if (err) ++ return -EFAULT; ++ return kprops->prop_buffer_size; ++} + -+/** -+ * kbase_ctx_sched_inc_refcount_nolock - Refcount a context as being busy, -+ * preventing it from being scheduled out. -+ * @kctx: Context to be refcounted -+ * -+ * The following locks must be held by the caller: -+ * &kbase_device.mmu_hw_mutex -+ * &kbase_device.hwaccess_lock -+ * -+ * Return: true if refcount succeeded, and the context will not be scheduled -+ * out, false if the refcount failed (because the context is being/has been -+ * scheduled out). -+ */ -+bool kbase_ctx_sched_inc_refcount_nolock(struct kbase_context *kctx); ++#if !MALI_USE_CSF ++static int kbase_api_post_term(struct kbase_context *kctx) ++{ ++ kbase_event_close(kctx); ++ return 0; ++} ++#endif /* !MALI_USE_CSF */ + -+/** -+ * kbase_ctx_sched_inc_refcount - Refcount a context as being busy, preventing -+ * it from being scheduled out. -+ * @kctx: Context to be refcounted -+ * -+ * The following locking conditions are made on the caller: -+ * * it must not hold kbase_device::mmu_hw_mutex and -+ * kbase_device::hwaccess_lock, because they will be used internally. -+ * -+ * Return: true if refcount succeeded, and the context will not be scheduled -+ * out, false if the refcount failed (because the context is being/has been -+ * scheduled out). -+ */ -+bool kbase_ctx_sched_inc_refcount(struct kbase_context *kctx); ++#if MALI_USE_CSF ++static int kbase_api_mem_alloc_ex(struct kbase_context *kctx, ++ union kbase_ioctl_mem_alloc_ex *alloc_ex) ++{ ++ struct kbase_va_region *reg; ++ u64 flags = alloc_ex->in.flags; ++ u64 gpu_va; + -+/** -+ * kbase_ctx_sched_release_ctx_lock - Release a reference count of a context -+ * @kctx: Context for which refcount should be decreased -+ * -+ * Effectivelly, this is a wrapper for kbase_ctx_sched_release_ctx, but -+ * kbase_device::hwaccess_lock is required NOT to be locked. -+ */ -+void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx); ++ /* Calls to this function are inherently asynchronous, with respect to ++ * MMU operations. ++ */ ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + -+#if MALI_USE_CSF -+/** -+ * kbase_ctx_sched_inc_refcount_if_as_valid - Refcount the context if it has GPU -+ * address space slot assigned to it. -+ * -+ * @kctx: Context to be refcounted -+ * -+ * This function takes a reference on the context if it has a GPU address space -+ * slot assigned to it. The address space slot will not be available for -+ * re-assignment until the reference is released. -+ * -+ * Return: true if refcount succeeded and the address space slot will not be -+ * reassigned, false if the refcount failed (because the address space slot -+ * was not assigned). -+ */ -+bool kbase_ctx_sched_inc_refcount_if_as_valid(struct kbase_context *kctx); -+#endif ++ bool gpu_executable = (flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx); ++ bool fixed_or_fixable = (flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE)); + -+#endif /* _KBASE_CTX_SCHED_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug.c b/drivers/gpu/arm/bifrost/mali_kbase_debug.c -new file mode 100644 -index 000000000..6d3b1093b ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug.c -@@ -0,0 +1,41 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2012-2014, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ if (!kbase_mem_allow_alloc(kctx)) ++ return -EINVAL; + -+#include ++ /* The driver counts the number of FIXABLE and FIXED allocations because ++ * they're not supposed to happen at the same time. However, that is not ++ * a security concern: nothing bad happens if the two types of allocations ++ * are made at the same time. The only reason why the driver is guarding ++ * against them is because there's no client use case that is supposed ++ * to need both of them at the same time, and the driver wants to help ++ * the user space catch some obvious mistake. ++ * ++ * The driver is able to switch from FIXABLE allocations to FIXED and ++ * vice versa, if all the allocations of one kind are freed before trying ++ * to create allocations of a different kind. ++ */ ++ if ((flags & BASE_MEM_FIXED) && (atomic64_read(&kctx->num_fixable_allocs) > 0)) ++ return -EINVAL; + -+static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = { -+ NULL, -+ NULL -+}; ++ if ((flags & BASE_MEM_FIXABLE) && (atomic64_read(&kctx->num_fixed_allocs) > 0)) ++ return -EINVAL; + -+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param) -+{ -+ kbasep_debug_assert_registered_cb.func = func; -+ kbasep_debug_assert_registered_cb.param = param; -+} ++ if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY) ++ return -ENOMEM; + -+void kbasep_debug_assert_call_hook(void) -+{ -+ if (kbasep_debug_assert_registered_cb.func != NULL) -+ kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param); -+} -+KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook); ++ /* The fixed_address parameter must be either a non-zero, page-aligned ++ * value for FIXED allocations or zero for any other kind of allocation. ++ */ ++ if (flags & BASE_MEM_FIXED) { ++ u64 aligned_fixed_address = alloc_ex->in.fixed_address & PAGE_MASK; + -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug.h b/drivers/gpu/arm/bifrost/mali_kbase_debug.h -new file mode 100644 -index 000000000..9d6ff73aa ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug.h -@@ -0,0 +1,166 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2012-2015, 2017, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ if ((aligned_fixed_address == 0) || ++ (aligned_fixed_address != alloc_ex->in.fixed_address)) ++ return -EINVAL; + -+#ifndef _KBASE_DEBUG_H -+#define _KBASE_DEBUG_H ++ gpu_va = aligned_fixed_address; ++ } else if (alloc_ex->in.fixed_address != 0) { ++ return -EINVAL; ++ } + -+#include ++ /* For 64-bit clients, force SAME_VA up to 2^(47)-1. ++ * For 32-bit clients, force SAME_VA up to 2^(32)-1. ++ * ++ * In both cases, the executable and fixed/fixable zones, and ++ * the executable+fixed/fixable zone, are all above this range. ++ */ ++ if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) && ++ kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) { ++ if (!gpu_executable && !fixed_or_fixable) ++ flags |= BASE_MEM_SAME_VA; ++ } + -+/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */ -+#define KBASE_DEBUG_SKIP_TRACE 0 ++ /* If CSF event memory allocation, need to force certain flags. ++ * SAME_VA - GPU address needs to be used as a CPU address, explicit ++ * mmap has to be avoided. ++ * CACHED_CPU - Frequent access to the event memory by CPU. ++ * COHERENT_SYSTEM - No explicit cache maintenance around the access ++ * to event memory so need to leverage the coherency support. ++ */ ++ if (flags & BASE_MEM_CSF_EVENT) { ++ /* We cannot honor this request */ ++ if (gpu_executable || fixed_or_fixable) ++ return -ENOMEM; + -+/** @brief If different from 0, the trace will only contain the file and line. */ -+#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0 ++ flags |= (BASE_MEM_SAME_VA | ++ BASE_MEM_CACHED_CPU | ++ BASE_MEM_COHERENT_SYSTEM); ++ } + -+/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */ -+#ifndef KBASE_DEBUG_DISABLE_ASSERTS -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+#define KBASE_DEBUG_DISABLE_ASSERTS 0 -+#else -+#define KBASE_DEBUG_DISABLE_ASSERTS 1 -+#endif -+#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ ++ reg = kbase_mem_alloc(kctx, alloc_ex->in.va_pages, alloc_ex->in.commit_pages, ++ alloc_ex->in.extension, &flags, &gpu_va, mmu_sync_info); + -+/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */ -+typedef void kbase_debug_assert_hook(void *); ++ if (!reg) ++ return -ENOMEM; + -+struct kbasep_debug_assert_cb { -+ kbase_debug_assert_hook *func; -+ void *param; -+}; ++ alloc_ex->out.flags = flags; ++ alloc_ex->out.gpu_va = gpu_va; + -+/** -+ * KBASEP_DEBUG_PRINT_TRACE - Private macro containing the format of the trace -+ * to display before every message @sa KBASE_DEBUG_SKIP_TRACE, -+ * KBASE_DEBUG_SKIP_FUNCTION_NAME -+ */ -+#if !KBASE_DEBUG_SKIP_TRACE -+#define KBASEP_DEBUG_PRINT_TRACE \ -+ "In file: " __FILE__ " line: " CSTD_STR2(__LINE__) -+#if !KBASE_DEBUG_SKIP_FUNCTION_NAME -+#define KBASEP_DEBUG_PRINT_FUNCTION __func__ -+#else -+#define KBASEP_DEBUG_PRINT_FUNCTION "" -+#endif -+#else -+#define KBASEP_DEBUG_PRINT_TRACE "" -+#endif ++ return 0; ++} + -+/** -+ * KBASEP_DEBUG_ASSERT_OUT() - (Private) system printing -+ * function associated to the @ref KBASE_DEBUG_ASSERT_MSG event. -+ * @trace: location in the code from where the message is printed -+ * @function: function from where the message is printed -+ * @...: Format string followed by format arguments. -+ * -+ * @note function parameter cannot be concatenated with other strings -+ */ -+/* Select the correct system output function*/ -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) \ -+ do { \ -+ pr_err("Mali: %s function:%s ", trace, function); \ -+ pr_err(__VA_ARGS__); \ -+ pr_err("\n"); \ -+ } while (false) -+#else -+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP() -+#endif ++static int kbase_api_mem_alloc(struct kbase_context *kctx, union kbase_ioctl_mem_alloc *alloc) ++{ ++ int ret; ++ union kbase_ioctl_mem_alloc_ex mem_alloc_ex = { { 0 } }; + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook() -+#else -+#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP() -+#endif ++ mem_alloc_ex.in.va_pages = alloc->in.va_pages; ++ mem_alloc_ex.in.commit_pages = alloc->in.commit_pages; ++ mem_alloc_ex.in.extension = alloc->in.extension; ++ mem_alloc_ex.in.flags = alloc->in.flags; ++ mem_alloc_ex.in.fixed_address = 0; + -+/** -+ * KBASE_DEBUG_ASSERT(expr) - Calls @ref KBASE_PRINT_ASSERT and prints the -+ * expression @a expr if @a expr is false -+ * @expr: Boolean expression -+ * -+ * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1 -+ * -+ */ -+#define KBASE_DEBUG_ASSERT(expr) \ -+ KBASE_DEBUG_ASSERT_MSG(expr, #expr) ++ ret = kbase_api_mem_alloc_ex(kctx, &mem_alloc_ex); + -+#if KBASE_DEBUG_DISABLE_ASSERTS -+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP() -+#else -+/** -+ * KBASE_DEBUG_ASSERT_MSG() - Calls @ref KBASEP_DEBUG_ASSERT_OUT and prints the -+ * given message if @a expr is false -+ * @expr: Boolean expression -+ * @...: Message to display when @a expr is false, as a format string followed -+ * by format arguments. -+ * -+ * This macro does nothing if the flag KBASE_DEBUG_DISABLE_ASSERTS is set to 1 -+ */ -+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \ -+ do { \ -+ if (!(expr)) { \ -+ KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\ -+ KBASE_CALL_ASSERT_HOOK();\ -+ BUG();\ -+ } \ -+ } while (false) -+#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ ++ alloc->out.flags = mem_alloc_ex.out.flags; ++ alloc->out.gpu_va = mem_alloc_ex.out.gpu_va; + -+/** -+ * KBASE_DEBUG_CODE() - Executes the code inside the macro only in debug mode -+ * @X: Code to compile only in debug mode. -+ */ -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+#define KBASE_DEBUG_CODE(X) X ++ return ret; ++} +#else -+#define KBASE_DEBUG_CODE(X) CSTD_NOP() -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ -+ -+/** @} */ ++static int kbase_api_mem_alloc(struct kbase_context *kctx, union kbase_ioctl_mem_alloc *alloc) ++{ ++ struct kbase_va_region *reg; ++ u64 flags = alloc->in.flags; ++ u64 gpu_va; + -+/** -+ * kbase_debug_assert_register_hook - Register a function to call on ASSERT -+ * @func: the function to call when an assert is triggered. -+ * @param: the parameter to pass to \a func when calling it -+ * -+ * Such functions will \b only be called during Debug mode, and for debugging -+ * features \b only. Do not rely on them to be called in general use. -+ * -+ * To disable the hook, supply NULL to \a func. -+ * -+ * @note This function is not thread-safe, and should only be used to -+ * register/deregister once in the module's lifetime. -+ * -+ */ -+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param); ++ /* Calls to this function are inherently asynchronous, with respect to ++ * MMU operations. ++ */ ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + -+/** -+ * kbasep_debug_assert_call_hook - Call a debug assert hook previously -+ * registered with kbase_debug_assert_register_hook() -+ * -+ * @note This function is not thread-safe with respect to multiple threads -+ * registering functions and parameters with -+ * kbase_debug_assert_register_hook(). Otherwise, thread safety is the -+ * responsibility of the registered hook. -+ */ -+void kbasep_debug_assert_call_hook(void); ++ if (!kbase_mem_allow_alloc(kctx)) ++ return -EINVAL; + -+#endif /* _KBASE_DEBUG_H */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.c -new file mode 100644 -index 000000000..d6518b476 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.c -@@ -0,0 +1,573 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2012-2016, 2018-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY) ++ return -ENOMEM; + -+#include -+#include -+#include ++ /* Force SAME_VA if a 64-bit client. ++ * The only exception is GPU-executable memory if an EXEC_VA zone ++ * has been initialized. In that case, GPU-executable memory may ++ * or may not be SAME_VA. ++ */ ++ if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) && kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) { ++ if (!(flags & BASE_MEM_PROT_GPU_EX) || !kbase_has_exec_va_zone(kctx)) ++ flags |= BASE_MEM_SAME_VA; ++ } + -+#if IS_ENABLED(CONFIG_DEBUG_FS) ++ reg = kbase_mem_alloc(kctx, alloc->in.va_pages, alloc->in.commit_pages, alloc->in.extension, ++ &flags, &gpu_va, mmu_sync_info); + -+static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev) -+{ -+ struct list_head *event_list = &kbdev->job_fault_event_list; -+ unsigned long flags; -+ bool ret; ++ if (!reg) ++ return -ENOMEM; + -+ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); -+ ret = !list_empty(event_list); -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); ++ alloc->out.flags = flags; ++ alloc->out.gpu_va = gpu_va; + -+ return ret; ++ return 0; +} ++#endif /* MALI_USE_CSF */ + -+static void kbase_ctx_remove_pending_event(struct kbase_context *kctx) ++static int kbase_api_mem_query(struct kbase_context *kctx, ++ union kbase_ioctl_mem_query *query) +{ -+ struct list_head *event_list = &kctx->kbdev->job_fault_event_list; -+ struct base_job_fault_event *event; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&kctx->kbdev->job_fault_event_lock, flags); -+ list_for_each_entry(event, event_list, head) { -+ if (event->katom->kctx == kctx) { -+ list_del(&event->head); -+ spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags); -+ -+ wake_up(&kctx->kbdev->job_fault_resume_wq); -+ flush_work(&event->job_fault_work); -+ -+ /* job_fault_event_list can only have a single atom for -+ * each context. -+ */ -+ return; -+ } -+ } -+ spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags); ++ return kbase_mem_query(kctx, query->in.gpu_addr, ++ query->in.query, &query->out.value); +} + -+static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx) ++static int kbase_api_mem_free(struct kbase_context *kctx, ++ struct kbase_ioctl_mem_free *free) +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct list_head *event_list = &kctx->kbdev->job_fault_event_list; -+ struct base_job_fault_event *event; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); -+ if (list_empty(event_list)) { -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); -+ return true; -+ } -+ list_for_each_entry(event, event_list, head) { -+ if (event->katom->kctx == kctx) { -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, -+ flags); -+ return false; -+ } -+ } -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); -+ return true; ++ return kbase_mem_free(kctx, free->gpu_addr); +} + -+static int wait_for_job_fault(struct kbase_device *kbdev) ++#if !MALI_USE_CSF ++static int kbase_api_kinstr_jm_fd(struct kbase_context *kctx, ++ union kbase_kinstr_jm_fd *arg) +{ -+#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE -+ int ret = wait_event_interruptible_timeout(kbdev->job_fault_wq, -+ kbase_is_job_fault_event_pending(kbdev), -+ msecs_to_jiffies(2000)); -+ if (ret == 0) -+ return -EAGAIN; -+ else if (ret > 0) -+ return 0; -+ else -+ return ret; -+#else -+ return wait_event_interruptible(kbdev->job_fault_wq, -+ kbase_is_job_fault_event_pending(kbdev)); ++ return kbase_kinstr_jm_get_fd(kctx->kinstr_jm, arg); ++} +#endif ++ ++static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx, ++ struct kbase_ioctl_hwcnt_reader_setup *setup) ++{ ++ return kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup); +} + -+/* wait until the fault happen and copy the event */ -+static int kbase_job_fault_event_wait(struct kbase_device *kbdev, -+ struct base_job_fault_event *event) ++static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx, ++ union kbase_ioctl_get_cpu_gpu_timeinfo *timeinfo) +{ -+ struct list_head *event_list = &kbdev->job_fault_event_list; -+ struct base_job_fault_event *event_in; -+ unsigned long flags; ++ u32 flags = timeinfo->in.request_flags; ++ struct timespec64 ts = { 0 }; ++ u64 timestamp = 0; ++ u64 cycle_cnt = 0; + -+ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); -+ while (list_empty(event_list)) { -+ int err; ++ kbase_pm_context_active(kctx->kbdev); + -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); ++ kbase_backend_get_gpu_time(kctx->kbdev, ++ (flags & BASE_TIMEINFO_CYCLE_COUNTER_FLAG) ? &cycle_cnt : NULL, ++ (flags & BASE_TIMEINFO_TIMESTAMP_FLAG) ? ×tamp : NULL, ++ (flags & BASE_TIMEINFO_MONOTONIC_FLAG) ? &ts : NULL); + -+ err = wait_for_job_fault(kbdev); -+ if (err) -+ return err; ++ if (flags & BASE_TIMEINFO_TIMESTAMP_FLAG) ++ timeinfo->out.timestamp = timestamp; + -+ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); -+ } ++ if (flags & BASE_TIMEINFO_CYCLE_COUNTER_FLAG) ++ timeinfo->out.cycle_counter = cycle_cnt; + -+ event_in = list_entry(event_list->next, -+ struct base_job_fault_event, head); -+ event->event_code = event_in->event_code; -+ event->katom = event_in->katom; ++ if (flags & BASE_TIMEINFO_MONOTONIC_FLAG) { ++ timeinfo->out.sec = ts.tv_sec; ++ timeinfo->out.nsec = ts.tv_nsec; ++ } + -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); ++ kbase_pm_context_idle(kctx->kbdev); + + return 0; -+ +} + -+/* remove the event from the queue */ -+static struct base_job_fault_event *kbase_job_fault_event_dequeue( -+ struct kbase_device *kbdev, struct list_head *event_list) ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++static int kbase_api_hwcnt_set(struct kbase_context *kctx, ++ struct kbase_ioctl_hwcnt_values *values) +{ -+ struct base_job_fault_event *event; -+ -+ event = list_entry(event_list->next, -+ struct base_job_fault_event, head); -+ list_del(event_list->next); -+ -+ return event; -+ ++ return gpu_model_set_dummy_prfcnt_user_sample(u64_to_user_ptr(values->data), values->size); +} ++#endif /* CONFIG_MALI_BIFROST_NO_MALI */ + -+/* Remove all the following atoms after the failed atom in the same context -+ * Call the postponed bottom half of job done. -+ * Then, this context could be rescheduled. -+ */ -+static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx) ++static int kbase_api_disjoint_query(struct kbase_context *kctx, ++ struct kbase_ioctl_disjoint_query *query) +{ -+ struct list_head *event_list = &kctx->job_fault_resume_event_list; -+ -+ while (!list_empty(event_list)) { -+ struct base_job_fault_event *event; -+ -+ event = kbase_job_fault_event_dequeue(kctx->kbdev, -+ &kctx->job_fault_resume_event_list); -+ kbase_jd_done_worker(&event->katom->work); -+ } ++ query->counter = kbase_disjoint_event_get(kctx->kbdev); + ++ return 0; +} + -+static void kbase_job_fault_resume_worker(struct work_struct *data) ++static int kbase_api_get_ddk_version(struct kbase_context *kctx, ++ struct kbase_ioctl_get_ddk_version *version) +{ -+ struct base_job_fault_event *event = container_of(data, -+ struct base_job_fault_event, job_fault_work); -+ struct kbase_context *kctx; -+ struct kbase_jd_atom *katom; -+ -+ katom = event->katom; -+ kctx = katom->kctx; ++ int ret; ++ int len = sizeof(KERNEL_SIDE_DDK_VERSION_STRING); + -+ dev_info(kctx->kbdev->dev, "Job dumping wait\n"); ++ if (version->version_buffer == 0) ++ return len; + -+ /* When it was waked up, it need to check if queue is empty or the -+ * failed atom belongs to different context. If yes, wake up. Both -+ * of them mean the failed job has been dumped. Please note, it -+ * should never happen that the job_fault_event_list has the two -+ * atoms belong to the same context. -+ */ -+ wait_event(kctx->kbdev->job_fault_resume_wq, -+ kbase_ctx_has_no_event_pending(kctx)); ++ if (version->size < len) ++ return -EOVERFLOW; + -+ atomic_set(&kctx->job_fault_count, 0); -+ kbase_jd_done_worker(&katom->work); ++ ret = copy_to_user(u64_to_user_ptr(version->version_buffer), ++ KERNEL_SIDE_DDK_VERSION_STRING, ++ sizeof(KERNEL_SIDE_DDK_VERSION_STRING)); + -+ /* In case the following atoms were scheduled during failed job dump -+ * the job_done_worker was held. We need to rerun it after the dump -+ * was finished -+ */ -+ kbase_job_fault_resume_event_cleanup(kctx); ++ if (ret) ++ return -EFAULT; + -+ dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n"); ++ return len; +} + -+static struct base_job_fault_event *kbase_job_fault_event_queue( -+ struct list_head *event_list, -+ struct kbase_jd_atom *atom, -+ u32 completion_code) ++static int kbase_api_mem_jit_init(struct kbase_context *kctx, ++ struct kbase_ioctl_mem_jit_init *jit_init) +{ -+ struct base_job_fault_event *event; -+ -+ event = &atom->fault_event; -+ -+ event->katom = atom; -+ event->event_code = completion_code; -+ -+ list_add_tail(&event->head, event_list); ++ int i; + -+ return event; ++ for (i = 0; i < sizeof(jit_init->padding); i++) { ++ /* Ensure all padding bytes are 0 for potential future ++ * extension ++ */ ++ if (jit_init->padding[i]) ++ return -EINVAL; ++ } + ++ return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, ++ jit_init->max_allocations, jit_init->trim_level, ++ jit_init->group_id, jit_init->phys_pages); +} + -+static void kbase_job_fault_event_post(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom, u32 completion_code) ++static int kbase_api_mem_exec_init(struct kbase_context *kctx, ++ struct kbase_ioctl_mem_exec_init *exec_init) +{ -+ struct base_job_fault_event *event; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); -+ event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list, -+ katom, completion_code); -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); -+ -+ wake_up_interruptible(&kbdev->job_fault_wq); -+ -+ INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker); -+ queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work); ++ return kbase_region_tracker_init_exec(kctx, exec_init->va_pages); ++} + -+ dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d", -+ katom->kctx->tgid, katom->kctx->id); ++static int kbase_api_mem_sync(struct kbase_context *kctx, ++ struct kbase_ioctl_mem_sync *sync) ++{ ++ struct basep_syncset sset = { ++ .mem_handle.basep.handle = sync->handle, ++ .user_addr = sync->user_addr, ++ .size = sync->size, ++ .type = sync->type ++ }; + ++ return kbase_sync_now(kctx, &sset); +} + -+/* -+ * This function will process the job fault -+ * Get the register copy -+ * Send the failed job dump event -+ * Create a Wait queue to wait until the job dump finish -+ */ -+ -+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, -+ u32 completion_code) ++static int kbase_api_mem_find_cpu_offset(struct kbase_context *kctx, ++ union kbase_ioctl_mem_find_cpu_offset *find) +{ -+ struct kbase_context *kctx = katom->kctx; -+ -+ /* Check if dumping is in the process -+ * only one atom of each context can be dumped at the same time -+ * If the atom belongs to different context, it can be dumped -+ */ -+ if (atomic_read(&kctx->job_fault_count) > 0) { -+ kbase_job_fault_event_queue( -+ &kctx->job_fault_resume_event_list, -+ katom, completion_code); -+ dev_info(kctx->kbdev->dev, "queue:%d\n", -+ kbase_jd_atom_id(kctx, katom)); -+ return true; -+ } ++ return kbasep_find_enclosing_cpu_mapping_offset( ++ kctx, ++ find->in.cpu_addr, ++ find->in.size, ++ &find->out.offset); ++} + -+ if (kbase_ctx_flag(kctx, KCTX_DYING)) -+ return false; ++static int kbase_api_mem_find_gpu_start_and_offset(struct kbase_context *kctx, ++ union kbase_ioctl_mem_find_gpu_start_and_offset *find) ++{ ++ return kbasep_find_enclosing_gpu_mapping_start_and_offset( ++ kctx, ++ find->in.gpu_addr, ++ find->in.size, ++ &find->out.start, ++ &find->out.offset); ++} + -+ if (atomic_read(&kctx->kbdev->job_fault_debug) > 0) { ++static int kbase_api_get_context_id(struct kbase_context *kctx, ++ struct kbase_ioctl_get_context_id *info) ++{ ++ info->id = kctx->id; + -+ if (completion_code != BASE_JD_EVENT_DONE) { ++ return 0; ++} + -+ if (kbase_job_fault_get_reg_snapshot(kctx) == false) { -+ dev_warn(kctx->kbdev->dev, "get reg dump failed\n"); -+ return false; -+ } ++static int kbase_api_tlstream_acquire(struct kbase_context *kctx, ++ struct kbase_ioctl_tlstream_acquire *acquire) ++{ ++ return kbase_timeline_io_acquire(kctx->kbdev, acquire->flags); ++} + -+ kbase_job_fault_event_post(kctx->kbdev, katom, -+ completion_code); -+ atomic_inc(&kctx->job_fault_count); -+ dev_info(kctx->kbdev->dev, "post:%d\n", -+ kbase_jd_atom_id(kctx, katom)); -+ return true; ++static int kbase_api_tlstream_flush(struct kbase_context *kctx) ++{ ++ kbase_timeline_streams_flush(kctx->kbdev->timeline); + -+ } -+ } -+ return false; ++ return 0; ++} + ++static int kbase_api_mem_commit(struct kbase_context *kctx, ++ struct kbase_ioctl_mem_commit *commit) ++{ ++ return kbase_mem_commit(kctx, commit->gpu_addr, commit->pages); +} + -+static int debug_job_fault_show(struct seq_file *m, void *v) ++static int kbase_api_mem_alias(struct kbase_context *kctx, ++ union kbase_ioctl_mem_alias *alias) +{ -+ struct kbase_device *kbdev = m->private; -+ struct base_job_fault_event *event = (struct base_job_fault_event *)v; -+ struct kbase_context *kctx = event->katom->kctx; -+ int i; ++ struct base_mem_aliasing_info *ai; ++ u64 flags; ++ int err; + -+ dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d", -+ kctx->tgid, kctx->id, event->reg_offset); ++ if (alias->in.nents == 0 || alias->in.nents > BASE_MEM_ALIAS_MAX_ENTS) ++ return -EINVAL; + -+ if (kctx->reg_dump == NULL) { -+ dev_warn(kbdev->dev, "reg dump is NULL"); -+ return -1; ++ ai = vmalloc(sizeof(*ai) * alias->in.nents); ++ if (!ai) ++ return -ENOMEM; ++ ++ err = copy_from_user(ai, ++ u64_to_user_ptr(alias->in.aliasing_info), ++ sizeof(*ai) * alias->in.nents); ++ if (err) { ++ vfree(ai); ++ return -EFAULT; + } + -+ if (kctx->reg_dump[event->reg_offset] == -+ REGISTER_DUMP_TERMINATION_FLAG) { -+ /* Return the error here to stop the read. And the -+ * following next() will not be called. The stop can -+ * get the real event resource and release it -+ */ -+ return -1; ++ flags = alias->in.flags; ++ if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY) { ++ vfree(ai); ++ return -EINVAL; + } + -+ if (event->reg_offset == 0) -+ seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id); ++ alias->out.gpu_va = kbase_mem_alias(kctx, &flags, ++ alias->in.stride, alias->in.nents, ++ ai, &alias->out.va_pages); + -+ for (i = 0; i < 50; i++) { -+ if (kctx->reg_dump[event->reg_offset] == -+ REGISTER_DUMP_TERMINATION_FLAG) { -+ break; -+ } -+ seq_printf(m, "%08x: %08x\n", -+ kctx->reg_dump[event->reg_offset], -+ kctx->reg_dump[1+event->reg_offset]); -+ event->reg_offset += 2; ++ alias->out.flags = flags; + -+ } ++ vfree(ai); + ++ if (alias->out.gpu_va == 0) ++ return -ENOMEM; + + return 0; +} -+static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos) ++ ++static int kbase_api_mem_import(struct kbase_context *kctx, ++ union kbase_ioctl_mem_import *import) +{ -+ struct kbase_device *kbdev = m->private; -+ struct base_job_fault_event *event = (struct base_job_fault_event *)v; ++ int ret; ++ u64 flags = import->in.flags; + -+ dev_info(kbdev->dev, "debug job fault seq next:%d, %d", -+ event->reg_offset, (int)*pos); ++ if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY) ++ return -ENOMEM; + -+ return event; ++ ret = kbase_mem_import(kctx, ++ import->in.type, ++ u64_to_user_ptr(import->in.phandle), ++ import->in.padding, ++ &import->out.gpu_va, ++ &import->out.va_pages, ++ &flags); ++ ++ import->out.flags = flags; ++ ++ return ret; +} + -+static void *debug_job_fault_start(struct seq_file *m, loff_t *pos) ++static int kbase_api_mem_flags_change(struct kbase_context *kctx, ++ struct kbase_ioctl_mem_flags_change *change) +{ -+ struct kbase_device *kbdev = m->private; -+ struct base_job_fault_event *event; ++ if (change->flags & BASEP_MEM_FLAGS_KERNEL_ONLY) ++ return -ENOMEM; + -+ dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos); ++ return kbase_mem_flags_change(kctx, change->gpu_va, ++ change->flags, change->mask); ++} + -+ /* The condition is trick here. It needs make sure the -+ * fault hasn't happened and the dumping hasn't been started, -+ * or the dumping has finished ++static int kbase_api_stream_create(struct kbase_context *kctx, ++ struct kbase_ioctl_stream_create *stream) ++{ ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ int fd, ret; ++ ++ /* Name must be NULL-terminated and padded with NULLs, so check last ++ * character is NULL + */ -+ if (*pos == 0) { -+ event = kmalloc(sizeof(*event), GFP_KERNEL); -+ if (!event) -+ return NULL; -+ event->reg_offset = 0; -+ if (kbase_job_fault_event_wait(kbdev, event)) { -+ kfree(event); -+ return NULL; -+ } ++ if (stream->name[sizeof(stream->name)-1] != 0) ++ return -EINVAL; + -+ /* The cache flush workaround is called in bottom half of -+ * job done but we delayed it. Now we should clean cache -+ * earlier. Then the GPU memory dump should be correct. -+ */ -+ kbase_backend_cache_clean(kbdev, event->katom); -+ } else -+ return NULL; ++ ret = kbase_sync_fence_stream_create(stream->name, &fd); + -+ return event; ++ if (ret) ++ return ret; ++ return fd; ++#else ++ return -ENOENT; ++#endif +} + -+static void debug_job_fault_stop(struct seq_file *m, void *v) ++static int kbase_api_fence_validate(struct kbase_context *kctx, ++ struct kbase_ioctl_fence_validate *validate) +{ -+ struct kbase_device *kbdev = m->private; ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ return kbase_sync_fence_validate(validate->fd); ++#else ++ return -ENOENT; ++#endif ++} + -+ /* here we wake up the kbase_jd_done_worker after stop, it needs -+ * get the memory dump before the register dump in debug daemon, -+ * otherwise, the memory dump may be incorrect. -+ */ ++static int kbase_api_mem_profile_add(struct kbase_context *kctx, ++ struct kbase_ioctl_mem_profile_add *data) ++{ ++ char *buf; ++ int err; + -+ if (v != NULL) { -+ kfree(v); -+ dev_info(kbdev->dev, "debug job fault seq stop stage 1"); ++ if (data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) { ++ dev_err(kctx->kbdev->dev, "mem_profile_add: buffer too big"); ++ return -EINVAL; ++ } + -+ } else { -+ unsigned long flags; ++ if (!data->len) { ++ dev_err(kctx->kbdev->dev, "mem_profile_add: buffer size is 0"); ++ /* Should return -EINVAL, but returning -ENOMEM for backwards compat */ ++ return -ENOMEM; ++ } + -+ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); -+ if (!list_empty(&kbdev->job_fault_event_list)) { -+ kbase_job_fault_event_dequeue(kbdev, -+ &kbdev->job_fault_event_list); -+ wake_up(&kbdev->job_fault_resume_wq); -+ } -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); -+ dev_info(kbdev->dev, "debug job fault seq stop stage 2"); ++ buf = kmalloc(data->len, GFP_KERNEL); ++ if (!buf) ++ return -ENOMEM; ++ ++ err = copy_from_user(buf, u64_to_user_ptr(data->buffer), ++ data->len); ++ if (err) { ++ kfree(buf); ++ return -EFAULT; + } + ++ return kbasep_mem_profile_debugfs_insert(kctx, buf, data->len); +} + -+static const struct seq_operations ops = { -+ .start = debug_job_fault_start, -+ .next = debug_job_fault_next, -+ .stop = debug_job_fault_stop, -+ .show = debug_job_fault_show, -+}; ++#if !MALI_USE_CSF ++static int kbase_api_soft_event_update(struct kbase_context *kctx, ++ struct kbase_ioctl_soft_event_update *update) ++{ ++ if (update->flags != 0) ++ return -EINVAL; + -+static int debug_job_fault_open(struct inode *in, struct file *file) ++ return kbase_soft_event_update(kctx, update->event, update->new_status); ++} ++#endif /* !MALI_USE_CSF */ ++ ++static int kbase_api_sticky_resource_map(struct kbase_context *kctx, ++ struct kbase_ioctl_sticky_resource_map *map) +{ -+ struct kbase_device *kbdev = in->i_private; ++ int ret; ++ u64 i; ++ u64 gpu_addr[BASE_EXT_RES_COUNT_MAX]; + -+ if (atomic_cmpxchg(&kbdev->job_fault_debug, 0, 1) == 1) { -+ dev_warn(kbdev->dev, "debug job fault is busy, only a single client is allowed"); -+ return -EBUSY; -+ } ++ if (!map->count || map->count > BASE_EXT_RES_COUNT_MAX) ++ return -EOVERFLOW; + -+ seq_open(file, &ops); ++ ret = copy_from_user(gpu_addr, u64_to_user_ptr(map->address), ++ sizeof(u64) * map->count); + -+ ((struct seq_file *)file->private_data)->private = kbdev; -+ dev_info(kbdev->dev, "debug job fault seq open"); ++ if (ret != 0) ++ return -EFAULT; + ++ kbase_gpu_vm_lock(kctx); + -+ return 0; ++ for (i = 0; i < map->count; i++) { ++ if (!kbase_sticky_resource_acquire(kctx, gpu_addr[i])) { ++ /* Invalid resource */ ++ ret = -EINVAL; ++ break; ++ } ++ } ++ ++ if (ret != 0) { ++ while (i > 0) { ++ i--; ++ kbase_sticky_resource_release_force(kctx, NULL, gpu_addr[i]); ++ } ++ } + ++ kbase_gpu_vm_unlock(kctx); ++ ++ return ret; +} + -+static int debug_job_fault_release(struct inode *in, struct file *file) ++static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx, ++ struct kbase_ioctl_sticky_resource_unmap *unmap) +{ -+ struct kbase_device *kbdev = in->i_private; -+ struct list_head *event_list = &kbdev->job_fault_event_list; -+ unsigned long flags; ++ int ret; ++ u64 i; ++ u64 gpu_addr[BASE_EXT_RES_COUNT_MAX]; + -+ seq_release(in, file); ++ if (!unmap->count || unmap->count > BASE_EXT_RES_COUNT_MAX) ++ return -EOVERFLOW; + -+ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); ++ ret = copy_from_user(gpu_addr, u64_to_user_ptr(unmap->address), ++ sizeof(u64) * unmap->count); + -+ /* Disable job fault dumping. This will let kbase run jobs as normal, -+ * without blocking waiting for a job_fault client to read failed jobs. -+ * -+ * After this a new client may open the file, and may re-enable job -+ * fault dumping, but the job_fault_event_lock we hold here will block -+ * that from interfering until after we've completed the cleanup. -+ */ -+ atomic_dec(&kbdev->job_fault_debug); ++ if (ret != 0) ++ return -EFAULT; + -+ /* Clean the unprocessed job fault. After that, all the suspended -+ * contexts could be rescheduled. Remove all the failed atoms that -+ * belong to different contexts Resume all the contexts that were -+ * suspend due to failed job. -+ */ -+ while (!list_empty(event_list)) { -+ kbase_job_fault_event_dequeue(kbdev, event_list); -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); -+ wake_up(&kbdev->job_fault_resume_wq); -+ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); -+ } ++ kbase_gpu_vm_lock(kctx); + -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); ++ for (i = 0; i < unmap->count; i++) { ++ if (!kbase_sticky_resource_release_force(kctx, NULL, gpu_addr[i])) { ++ /* Invalid resource, but we keep going anyway */ ++ ret = -EINVAL; ++ } ++ } + -+ dev_info(kbdev->dev, "debug job fault seq close"); ++ kbase_gpu_vm_unlock(kctx); + -+ return 0; ++ return ret; +} + -+static const struct file_operations kbasep_debug_job_fault_fops = { -+ .owner = THIS_MODULE, -+ .open = debug_job_fault_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = debug_job_fault_release, -+}; ++#if MALI_UNIT_TEST + -+/* -+ * Initialize debugfs entry for job fault dump -+ */ -+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev) ++static int kbase_api_tlstream_stats(struct kbase_context *kctx, ++ struct kbase_ioctl_tlstream_stats *stats) +{ -+ debugfs_create_file("job_fault", 0400, -+ kbdev->mali_debugfs_directory, kbdev, -+ &kbasep_debug_job_fault_fops); ++ kbase_timeline_stats(kctx->kbdev->timeline, ++ &stats->bytes_collected, ++ &stats->bytes_generated); ++ ++ return 0; +} ++#endif /* MALI_UNIT_TEST */ + ++#if MALI_USE_CSF ++static int kbasep_cs_event_signal(struct kbase_context *kctx) ++{ ++ kbase_csf_event_signal_notify_gpu(kctx); ++ return 0; ++} + -+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) ++static int kbasep_cs_queue_register(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_register *reg) +{ ++ kctx->jit_group_id = BASE_MEM_GROUP_DEFAULT; + -+ INIT_LIST_HEAD(&kbdev->job_fault_event_list); ++ return kbase_csf_queue_register(kctx, reg); ++} + -+ init_waitqueue_head(&(kbdev->job_fault_wq)); -+ init_waitqueue_head(&(kbdev->job_fault_resume_wq)); -+ spin_lock_init(&kbdev->job_fault_event_lock); ++static int kbasep_cs_queue_register_ex(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_register_ex *reg) ++{ ++ kctx->jit_group_id = BASE_MEM_GROUP_DEFAULT; + -+ kbdev->job_fault_resume_workq = alloc_workqueue( -+ "kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1); -+ if (!kbdev->job_fault_resume_workq) -+ return -ENOMEM; ++ return kbase_csf_queue_register_ex(kctx, reg); ++} + -+ atomic_set(&kbdev->job_fault_debug, 0); ++static int kbasep_cs_queue_terminate(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_terminate *term) ++{ ++ kbase_csf_queue_terminate(kctx, term); + + return 0; +} + -+/* -+ * Release the relevant resource per device -+ */ -+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) ++static int kbasep_cs_queue_bind(struct kbase_context *kctx, ++ union kbase_ioctl_cs_queue_bind *bind) +{ -+ destroy_workqueue(kbdev->job_fault_resume_workq); ++ return kbase_csf_queue_bind(kctx, bind); +} + ++static int kbasep_cs_queue_kick(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_kick *kick) ++{ ++ return kbase_csf_queue_kick(kctx, kick); ++} + -+/* -+ * Initialize the relevant data structure per context -+ */ -+int kbase_debug_job_fault_context_init(struct kbase_context *kctx) ++static int kbasep_cs_queue_group_create_1_6( ++ struct kbase_context *kctx, ++ union kbase_ioctl_cs_queue_group_create_1_6 *create) +{ ++ union kbase_ioctl_cs_queue_group_create ++ new_create = { .in = { ++ .tiler_mask = create->in.tiler_mask, ++ .fragment_mask = ++ create->in.fragment_mask, ++ .compute_mask = create->in.compute_mask, ++ .cs_min = create->in.cs_min, ++ .priority = create->in.priority, ++ .tiler_max = create->in.tiler_max, ++ .fragment_max = create->in.fragment_max, ++ .compute_max = create->in.compute_max, ++ } }; + -+ /* We need allocate double size register range -+ * Because this memory will keep the register address and value -+ */ -+ kctx->reg_dump = vmalloc(0x4000 * 2); -+ if (kctx->reg_dump != NULL) { -+ if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == -+ false) { -+ vfree(kctx->reg_dump); -+ kctx->reg_dump = NULL; -+ } -+ INIT_LIST_HEAD(&kctx->job_fault_resume_event_list); -+ atomic_set(&kctx->job_fault_count, 0); -+ } ++ int ret = kbase_csf_queue_group_create(kctx, &new_create); + -+ return 0; -+} ++ create->out.group_handle = new_create.out.group_handle; ++ create->out.group_uid = new_create.out.group_uid; + -+/* -+ * release the relevant resource per context -+ */ -+void kbase_debug_job_fault_context_term(struct kbase_context *kctx) ++ return ret; ++} ++static int kbasep_cs_queue_group_create(struct kbase_context *kctx, ++ union kbase_ioctl_cs_queue_group_create *create) +{ -+ vfree(kctx->reg_dump); ++ return kbase_csf_queue_group_create(kctx, create); +} + -+void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx) ++static int kbasep_cs_queue_group_terminate(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_group_term *term) +{ -+ WARN_ON(!kbase_ctx_flag(kctx, KCTX_DYING)); -+ -+ /* Return early if the job fault part of the kbase_device is not -+ * initialized yet. An error can happen during the device probe after -+ * the privileged Kbase context was created for the HW counter dumping -+ * but before the job fault part is initialized. -+ */ -+ if (!kctx->kbdev->job_fault_resume_workq) -+ return; ++ kbase_csf_queue_group_terminate(kctx, term->group_handle); + -+ kbase_ctx_remove_pending_event(kctx); ++ return 0; +} + -+#else /* CONFIG_DEBUG_FS */ -+ -+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) ++static int kbasep_kcpu_queue_new(struct kbase_context *kctx, ++ struct kbase_ioctl_kcpu_queue_new *new) +{ -+ return 0; ++ return kbase_csf_kcpu_queue_new(kctx, new); +} + -+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) ++static int kbasep_kcpu_queue_delete(struct kbase_context *kctx, ++ struct kbase_ioctl_kcpu_queue_delete *delete) +{ ++ return kbase_csf_kcpu_queue_delete(kctx, delete); +} + -+#endif /* CONFIG_DEBUG_FS */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.h b/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.h -new file mode 100644 -index 000000000..059d9c455 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.h -@@ -0,0 +1,121 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2012-2016, 2018, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++static int kbasep_kcpu_queue_enqueue(struct kbase_context *kctx, ++ struct kbase_ioctl_kcpu_queue_enqueue *enqueue) ++{ ++ return kbase_csf_kcpu_queue_enqueue(kctx, enqueue); ++} + -+#ifndef _KBASE_DEBUG_JOB_FAULT_H -+#define _KBASE_DEBUG_JOB_FAULT_H ++static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx, ++ union kbase_ioctl_cs_tiler_heap_init *heap_init) ++{ ++ if (heap_init->in.group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) ++ return -EINVAL; + -+#include -+#include ++ kctx->jit_group_id = heap_init->in.group_id; + -+#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF ++ return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size, ++ heap_init->in.initial_chunks, heap_init->in.max_chunks, ++ heap_init->in.target_in_flight, heap_init->in.buf_desc_va, ++ &heap_init->out.gpu_heap_va, ++ &heap_init->out.first_chunk_va); ++} + -+/** -+ * kbase_debug_job_fault_dev_init - Create the fault event wait queue -+ * per device and initialize the required lists. -+ * @kbdev: Device pointer -+ * -+ * Return: Zero on success or a negative error code. -+ */ -+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev); ++static int kbasep_cs_tiler_heap_init_1_13(struct kbase_context *kctx, ++ union kbase_ioctl_cs_tiler_heap_init_1_13 *heap_init) ++{ ++ if (heap_init->in.group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) ++ return -EINVAL; + -+/** -+ * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs -+ * @kbdev: Device pointer -+ */ -+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev); ++ kctx->jit_group_id = heap_init->in.group_id; + -+/** -+ * kbase_debug_job_fault_dev_term - Clean up resources created in -+ * kbase_debug_job_fault_dev_init. -+ * @kbdev: Device pointer -+ */ -+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev); ++ return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size, ++ heap_init->in.initial_chunks, heap_init->in.max_chunks, ++ heap_init->in.target_in_flight, 0, ++ &heap_init->out.gpu_heap_va, ++ &heap_init->out.first_chunk_va); ++} + -+/** -+ * kbase_debug_job_fault_context_init - Initialize the relevant -+ * data structure per context -+ * @kctx: KBase context pointer -+ * Return: 0 on success -+ */ -+int kbase_debug_job_fault_context_init(struct kbase_context *kctx); ++static int kbasep_cs_tiler_heap_term(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_tiler_heap_term *heap_term) ++{ ++ return kbase_csf_tiler_heap_term(kctx, heap_term->gpu_heap_va); ++} + -+/** -+ * kbase_debug_job_fault_context_term - Release the relevant -+ * resource per context -+ * @kctx: KBase context pointer -+ */ -+void kbase_debug_job_fault_context_term(struct kbase_context *kctx); ++static int kbase_ioctl_cs_get_glb_iface(struct kbase_context *kctx, ++ union kbase_ioctl_cs_get_glb_iface *param) ++{ ++ struct basep_cs_stream_control *stream_data = NULL; ++ struct basep_cs_group_control *group_data = NULL; ++ void __user *user_groups, *user_streams; ++ int err = 0; ++ u32 const max_group_num = param->in.max_group_num; ++ u32 const max_total_stream_num = param->in.max_total_stream_num; + -+/** -+ * kbase_debug_job_fault_kctx_unblock - Unblock the atoms blocked on job fault -+ * dumping on context termination. -+ * -+ * @kctx: KBase context pointer -+ * -+ * This function is called during context termination to unblock the atom for -+ * which the job fault occurred and also the atoms following it. This is needed -+ * otherwise the wait for zero jobs could timeout (leading to an assertion -+ * failure, kernel panic in debug builds) in the pathological case where -+ * although the thread/daemon capturing the job fault events is running, -+ * but for some reasons has stopped consuming the events. -+ */ -+void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx); ++ if (max_group_num > MAX_SUPPORTED_CSGS) ++ return -EINVAL; + -+/** -+ * kbase_debug_job_fault_process - Process the failed job. -+ * -+ * @katom: The failed atom pointer -+ * @completion_code: the job status -+ * -+ * It will send a event and wake up the job fault waiting queue -+ * Then create a work queue to wait for job dump finish -+ * This function should be called in the interrupt handler and before -+ * jd_done that make sure the jd_done_worker will be delayed until the -+ * job dump finish -+ * -+ * Return: true if dump is going on -+ */ -+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, -+ u32 completion_code); ++ if (max_total_stream_num > ++ MAX_SUPPORTED_CSGS * MAX_SUPPORTED_STREAMS_PER_GROUP) ++ return -EINVAL; + -+/** -+ * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers -+ * address during the job fault process, the relevant registers will -+ * be saved when a job fault happen -+ * @kctx: KBase context pointer -+ * @reg_range: Maximum register address space -+ * -+ * Return: true if initializing successfully -+ */ -+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, -+ int reg_range); ++ user_groups = u64_to_user_ptr(param->in.groups_ptr); ++ user_streams = u64_to_user_ptr(param->in.streams_ptr); + -+/** -+ * kbase_job_fault_get_reg_snapshot - Read the interested registers for -+ * failed job dump -+ * -+ * @kctx: KBase context pointer -+ * -+ * Return: true if getting registers successfully -+ */ -+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx); ++ if (max_group_num > 0) { ++ if (!user_groups) ++ err = -EINVAL; ++ else { ++ group_data = kcalloc(max_group_num, ++ sizeof(*group_data), GFP_KERNEL); ++ if (!group_data) ++ err = -ENOMEM; ++ } ++ } + -+#endif /*_KBASE_DEBUG_JOB_FAULT_H*/ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c -new file mode 100644 -index 000000000..418bb1908 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c -@@ -0,0 +1,138 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ if (max_total_stream_num > 0) { ++ if (!user_streams) ++ err = -EINVAL; ++ else { ++ stream_data = kcalloc(max_total_stream_num, ++ sizeof(*stream_data), GFP_KERNEL); ++ if (!stream_data) ++ err = -ENOMEM; ++ } ++ } + -+/* -+ * Debugfs interface to dump information about GPU allocations in kctx -+ */ ++ if (!err) { ++ param->out.total_stream_num = kbase_csf_firmware_get_glb_iface( ++ kctx->kbdev, group_data, max_group_num, stream_data, ++ max_total_stream_num, ¶m->out.glb_version, ++ ¶m->out.features, ¶m->out.group_num, ++ ¶m->out.prfcnt_size, ¶m->out.instr_features); + -+#include "mali_kbase_debug_mem_allocs.h" -+#include "mali_kbase.h" ++ if (copy_to_user(user_groups, group_data, ++ MIN(max_group_num, param->out.group_num) * ++ sizeof(*group_data))) ++ err = -EFAULT; ++ } + -+#include -+#include -+#include ++ if (!err) ++ if (copy_to_user(user_streams, stream_data, ++ MIN(max_total_stream_num, param->out.total_stream_num) * ++ sizeof(*stream_data))) ++ err = -EFAULT; + -+#if IS_ENABLED(CONFIG_DEBUG_FS) ++ kfree(group_data); ++ kfree(stream_data); ++ return err; ++} + -+/** -+ * debug_zone_mem_allocs_show - Show information from specific rbtree -+ * @zone: Name of GPU virtual memory zone -+ * @rbtree: Pointer to the root of the rbtree associated with @zone -+ * @sfile: The debugfs entry -+ * -+ * This function is called to show information about all the GPU allocations of a -+ * a particular zone within GPU virtual memory space of a context. -+ * The information like the start virtual address and size (in bytes) is shown for -+ * every GPU allocation mapped in the zone. -+ */ -+static void debug_zone_mem_allocs_show(char *zone, struct rb_root *rbtree, struct seq_file *sfile) ++static int kbasep_ioctl_cs_cpu_queue_dump(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_cpu_queue_info *cpu_queue_info) +{ -+ struct rb_node *p; -+ struct kbase_va_region *reg; -+ const char *type_names[5] = { -+ "Native", -+ "Imported UMM", -+ "Imported user buf", -+ "Alias", -+ "Raw" -+ }; -+ -+#define MEM_ALLOCS_HEADER \ -+ " VA, VA size, Commit size, Flags, Mem type\n" -+ seq_printf(sfile, "Zone name: %s\n:", zone); -+ seq_printf(sfile, MEM_ALLOCS_HEADER); -+ for (p = rb_first(rbtree); p; p = rb_next(p)) { -+ reg = rb_entry(p, struct kbase_va_region, rblink); -+ if (!(reg->flags & KBASE_REG_FREE)) { -+ seq_printf(sfile, "%16llx, %16zx, %16zx, %8lx, %s\n", -+ reg->start_pfn << PAGE_SHIFT, reg->nr_pages << PAGE_SHIFT, -+ kbase_reg_current_backed_size(reg) << PAGE_SHIFT, -+ reg->flags, type_names[reg->gpu_alloc->type]); -+ } -+ } ++ return kbase_csf_cpu_queue_dump(kctx, cpu_queue_info->buffer, ++ cpu_queue_info->size); +} + -+/** -+ * debug_ctx_mem_allocs_show - Show information about GPU allocations in a kctx -+ * @sfile: The debugfs entry -+ * @data: Data associated with the entry -+ * -+ * Return: -+ * 0 if successfully prints data in debugfs entry file -+ * -1 if it encountered an error -+ */ -+static int debug_ctx_mem_allocs_show(struct seq_file *sfile, void *data) ++static int kbase_ioctl_read_user_page(struct kbase_context *kctx, ++ union kbase_ioctl_read_user_page *user_page) +{ -+ struct kbase_context *const kctx = sfile->private; ++ struct kbase_device *kbdev = kctx->kbdev; ++ unsigned long flags; + -+ kbase_gpu_vm_lock(kctx); ++ /* As of now, only LATEST_FLUSH is supported */ ++ if (unlikely(user_page->in.offset != LATEST_FLUSH)) ++ return -EINVAL; + -+ debug_zone_mem_allocs_show("SAME_VA:", &kctx->reg_rbtree_same, sfile); -+ debug_zone_mem_allocs_show("CUSTOM_VA:", &kctx->reg_rbtree_custom, sfile); -+ debug_zone_mem_allocs_show("EXEC_VA:", &kctx->reg_rbtree_exec, sfile); ++ /* Validating padding that must be zero */ ++ if (unlikely(user_page->in.padding != 0)) ++ return -EINVAL; + -+#if MALI_USE_CSF -+ debug_zone_mem_allocs_show("EXEC_VA_FIXED:", &kctx->reg_rbtree_exec_fixed, sfile); -+ debug_zone_mem_allocs_show("FIXED_VA:", &kctx->reg_rbtree_fixed, sfile); -+#endif /* MALI_USE_CSF */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (!kbdev->pm.backend.gpu_powered) ++ user_page->out.val_lo = POWER_DOWN_LATEST_FLUSH_VALUE; ++ else ++ user_page->out.val_lo = kbase_reg_read(kbdev, USER_REG(LATEST_FLUSH)); ++ user_page->out.val_hi = 0; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ kbase_gpu_vm_unlock(kctx); + return 0; +} ++#endif /* MALI_USE_CSF */ + -+/* -+ * File operations related to debugfs entry for mem_zones -+ */ -+static int debug_mem_allocs_open(struct inode *in, struct file *file) -+{ -+ return single_open(file, debug_ctx_mem_allocs_show, in->i_private); -+} -+ -+static const struct file_operations kbase_debug_mem_allocs_fops = { -+ .owner = THIS_MODULE, -+ .open = debug_mem_allocs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; -+ -+/* -+ * Initialize debugfs entry for mem_allocs -+ */ -+void kbase_debug_mem_allocs_init(struct kbase_context *const kctx) ++static int kbasep_ioctl_context_priority_check(struct kbase_context *kctx, ++ struct kbase_ioctl_context_priority_check *priority_check) +{ -+ /* Caller already ensures this, but we keep the pattern for -+ * maintenance safety. -+ */ -+ if (WARN_ON(!kctx) || WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) -+ return; -+ -+ debugfs_create_file("mem_allocs", 0400, kctx->kctx_dentry, kctx, -+ &kbase_debug_mem_allocs_fops); -+} ++#if MALI_USE_CSF ++ priority_check->priority = kbase_csf_priority_check(kctx->kbdev, priority_check->priority); +#else -+/* -+ * Stub functions for when debugfs is disabled -+ */ -+void kbase_debug_mem_allocs_init(struct kbase_context *const kctx) -+{ -+} -+#endif -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.h b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.h -new file mode 100644 -index 000000000..8cf69c2cb ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.h -@@ -0,0 +1,39 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+#ifndef _KBASE_DEBUG_MEM_ALLOCS_H -+#define _KBASE_DEBUG_MEM_ALLOCS_H -+ -+#include -+ -+/** -+ * kbase_debug_mem_allocs_init() - Initialize the mem_allocs debugfs file -+ * @kctx: Pointer to kernel base context -+ * -+ * This function creates a "mem_allocs" file for a context to show infor about the -+ * GPU allocations created for that context. -+ * -+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the -+ * parent directory. -+ */ -+void kbase_debug_mem_allocs_init(struct kbase_context *kctx); ++ base_jd_prio req_priority = (base_jd_prio)priority_check->priority; + ++ priority_check->priority = (u8)kbase_js_priority_check(kctx->kbdev, req_priority); +#endif -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c -new file mode 100644 -index 000000000..ce87a0070 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c -@@ -0,0 +1,410 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+/* -+ * Debugfs interface to dump the memory visible to the GPU -+ */ -+ -+#include "mali_kbase_debug_mem_view.h" -+#include "mali_kbase.h" -+ -+#include -+#include -+ -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ -+#define SHOW_GPU_MEM_DATA(type, format) \ -+{ \ -+ unsigned int i, j; \ -+ const type *ptr = (type *)cpu_addr; \ -+ const unsigned int col_width = sizeof(type); \ -+ const unsigned int row_width = (col_width == sizeof(u64)) ? 32 : 16; \ -+ const unsigned int num_cols = row_width / col_width; \ -+ for (i = 0; i < PAGE_SIZE; i += row_width) { \ -+ seq_printf(m, "%016llx:", gpu_addr + i); \ -+ for (j = 0; j < num_cols; j++) \ -+ seq_printf(m, format, ptr[j]); \ -+ ptr += num_cols; \ -+ seq_putc(m, '\n'); \ -+ } \ ++ return 0; +} + -+struct debug_mem_mapping { -+ struct list_head node; -+ -+ struct kbase_mem_phy_alloc *alloc; -+ unsigned long flags; ++#define KBASE_HANDLE_IOCTL(cmd, function, arg) \ ++ do { \ ++ int ret; \ ++ BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \ ++ dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ ++ ret = function(arg); \ ++ dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ ++ #function); \ ++ return ret; \ ++ } while (0) + -+ u64 start_pfn; -+ size_t nr_pages; -+}; ++#define KBASE_HANDLE_IOCTL_IN(cmd, function, type, arg) \ ++ do { \ ++ type param; \ ++ int ret, err; \ ++ dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ ++ BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE); \ ++ BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ ++ err = copy_from_user(¶m, uarg, sizeof(param)); \ ++ if (err) \ ++ return -EFAULT; \ ++ ret = function(arg, ¶m); \ ++ dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ ++ #function); \ ++ return ret; \ ++ } while (0) + -+struct debug_mem_data { -+ struct list_head mapping_list; -+ struct kbase_context *kctx; -+ unsigned int column_width; -+}; ++#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type, arg) \ ++ do { \ ++ type param; \ ++ int ret, err; \ ++ dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ ++ BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ); \ ++ BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ ++ memset(¶m, 0, sizeof(param)); \ ++ ret = function(arg, ¶m); \ ++ err = copy_to_user(uarg, ¶m, sizeof(param)); \ ++ if (err) \ ++ return -EFAULT; \ ++ dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ ++ #function); \ ++ return ret; \ ++ } while (0) + -+struct debug_mem_seq_off { -+ struct list_head *lh; -+ size_t offset; -+}; ++#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type, arg) \ ++ do { \ ++ type param; \ ++ int ret, err; \ ++ dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ ++ BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE | _IOC_READ)); \ ++ BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ ++ err = copy_from_user(¶m, uarg, sizeof(param)); \ ++ if (err) \ ++ return -EFAULT; \ ++ ret = function(arg, ¶m); \ ++ err = copy_to_user(uarg, ¶m, sizeof(param)); \ ++ if (err) \ ++ return -EFAULT; \ ++ dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ ++ #function); \ ++ return ret; \ ++ } while (0) + -+static void *debug_mem_start(struct seq_file *m, loff_t *_pos) ++static int kbasep_ioctl_set_limited_core_count(struct kbase_context *kctx, ++ struct kbase_ioctl_set_limited_core_count *set_limited_core_count) +{ -+ struct debug_mem_data *mem_data = m->private; -+ struct debug_mem_seq_off *data; -+ struct debug_mem_mapping *map; -+ loff_t pos = *_pos; ++ const u64 shader_core_mask = ++ kbase_pm_get_present_cores(kctx->kbdev, KBASE_PM_CORE_SHADER); ++ const u64 limited_core_mask = ++ ((u64)1 << (set_limited_core_count->max_core_count)) - 1; + -+ list_for_each_entry(map, &mem_data->mapping_list, node) { -+ if (pos >= map->nr_pages) { -+ pos -= map->nr_pages; -+ } else { -+ data = kmalloc(sizeof(*data), GFP_KERNEL); -+ if (!data) -+ return NULL; -+ data->lh = &map->node; -+ data->offset = pos; -+ return data; -+ } ++ if ((shader_core_mask & limited_core_mask) == 0) { ++ /* At least one shader core must be available after applying the mask */ ++ return -EINVAL; + } + -+ /* Beyond the end */ -+ return NULL; ++ kctx->limited_core_mask = limited_core_mask; ++ return 0; +} + -+static void debug_mem_stop(struct seq_file *m, void *v) ++static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ -+ kfree(v); -+} ++ struct kbase_file *const kfile = filp->private_data; ++ struct kbase_context *kctx = NULL; ++ struct kbase_device *kbdev = kfile->kbdev; ++ void __user *uarg = (void __user *)arg; + -+static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos) -+{ -+ struct debug_mem_data *mem_data = m->private; -+ struct debug_mem_seq_off *data = v; -+ struct debug_mem_mapping *map; ++ /* Only these ioctls are available until setup is complete */ ++ switch (cmd) { ++ case KBASE_IOCTL_VERSION_CHECK: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK, ++ kbase_api_handshake, ++ struct kbase_ioctl_version_check, ++ kfile); ++ break; + -+ map = list_entry(data->lh, struct debug_mem_mapping, node); ++ case KBASE_IOCTL_VERSION_CHECK_RESERVED: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK_RESERVED, ++ kbase_api_handshake_dummy, ++ struct kbase_ioctl_version_check, ++ kfile); ++ break; + -+ if (data->offset < map->nr_pages - 1) { -+ data->offset++; -+ ++*pos; -+ return data; -+ } ++ case KBASE_IOCTL_SET_FLAGS: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS, ++ kbase_api_set_flags, ++ struct kbase_ioctl_set_flags, ++ kfile); ++ break; + -+ if (list_is_last(data->lh, &mem_data->mapping_list)) { -+ kfree(data); -+ return NULL; -+ } ++ case KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO: ++ KBASE_HANDLE_IOCTL_INOUT( ++ KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO, ++ kbase_api_kinstr_prfcnt_enum_info, ++ struct kbase_ioctl_kinstr_prfcnt_enum_info, kfile); ++ break; + -+ data->lh = data->lh->next; -+ data->offset = 0; -+ ++*pos; ++ case KBASE_IOCTL_KINSTR_PRFCNT_SETUP: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_KINSTR_PRFCNT_SETUP, ++ kbase_api_kinstr_prfcnt_setup, ++ union kbase_ioctl_kinstr_prfcnt_setup, ++ kfile); ++ break; ++ case KBASE_IOCTL_GET_GPUPROPS: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS, kbase_api_get_gpuprops, ++ struct kbase_ioctl_get_gpuprops, kfile); ++ break; ++ } + -+ return data; -+} ++ kctx = kbase_file_get_kctx_if_setup_complete(kfile); ++ if (unlikely(!kctx)) ++ return -EPERM; + -+static int debug_mem_show(struct seq_file *m, void *v) -+{ -+ struct debug_mem_data *mem_data = m->private; -+ struct debug_mem_seq_off *data = v; -+ struct debug_mem_mapping *map; -+ unsigned long long gpu_addr; -+ struct page *page; -+ void *cpu_addr; -+ pgprot_t prot = PAGE_KERNEL; -+ -+ map = list_entry(data->lh, struct debug_mem_mapping, node); -+ -+ kbase_gpu_vm_lock(mem_data->kctx); -+ -+ if (data->offset >= map->alloc->nents) { -+ seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn + -+ data->offset) << PAGE_SHIFT); -+ goto out; -+ } -+ -+ if (!(map->flags & KBASE_REG_CPU_CACHED)) -+ prot = pgprot_writecombine(prot); ++ /* Normal ioctls */ ++ switch (cmd) { ++#if !MALI_USE_CSF ++ case KBASE_IOCTL_JOB_SUBMIT: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT, ++ kbase_api_job_submit, ++ struct kbase_ioctl_job_submit, ++ kctx); ++ break; ++#endif /* !MALI_USE_CSF */ ++#if !MALI_USE_CSF ++ case KBASE_IOCTL_POST_TERM: ++ KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM, ++ kbase_api_post_term, ++ kctx); ++ break; ++#endif /* !MALI_USE_CSF */ ++ case KBASE_IOCTL_MEM_ALLOC: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC, ++ kbase_api_mem_alloc, ++ union kbase_ioctl_mem_alloc, ++ kctx); ++ break; ++#if MALI_USE_CSF ++ case KBASE_IOCTL_MEM_ALLOC_EX: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC_EX, kbase_api_mem_alloc_ex, ++ union kbase_ioctl_mem_alloc_ex, kctx); ++ break; ++#endif ++ case KBASE_IOCTL_MEM_QUERY: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_QUERY, ++ kbase_api_mem_query, ++ union kbase_ioctl_mem_query, ++ kctx); ++ break; ++ case KBASE_IOCTL_MEM_FREE: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FREE, ++ kbase_api_mem_free, ++ struct kbase_ioctl_mem_free, ++ kctx); ++ break; ++ case KBASE_IOCTL_DISJOINT_QUERY: ++ KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_DISJOINT_QUERY, ++ kbase_api_disjoint_query, ++ struct kbase_ioctl_disjoint_query, ++ kctx); ++ break; ++ case KBASE_IOCTL_GET_DDK_VERSION: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_DDK_VERSION, ++ kbase_api_get_ddk_version, ++ struct kbase_ioctl_get_ddk_version, ++ kctx); ++ break; ++ case KBASE_IOCTL_MEM_JIT_INIT: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT, ++ kbase_api_mem_jit_init, ++ struct kbase_ioctl_mem_jit_init, ++ kctx); ++ break; ++ case KBASE_IOCTL_MEM_EXEC_INIT: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_EXEC_INIT, ++ kbase_api_mem_exec_init, ++ struct kbase_ioctl_mem_exec_init, ++ kctx); ++ break; ++ case KBASE_IOCTL_MEM_SYNC: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC, ++ kbase_api_mem_sync, ++ struct kbase_ioctl_mem_sync, ++ kctx); ++ break; ++ case KBASE_IOCTL_MEM_FIND_CPU_OFFSET: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_CPU_OFFSET, ++ kbase_api_mem_find_cpu_offset, ++ union kbase_ioctl_mem_find_cpu_offset, ++ kctx); ++ break; ++ case KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET, ++ kbase_api_mem_find_gpu_start_and_offset, ++ union kbase_ioctl_mem_find_gpu_start_and_offset, ++ kctx); ++ break; ++ case KBASE_IOCTL_GET_CONTEXT_ID: ++ KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_GET_CONTEXT_ID, ++ kbase_api_get_context_id, ++ struct kbase_ioctl_get_context_id, ++ kctx); ++ break; ++ case KBASE_IOCTL_TLSTREAM_ACQUIRE: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_ACQUIRE, ++ kbase_api_tlstream_acquire, ++ struct kbase_ioctl_tlstream_acquire, ++ kctx); ++ break; ++ case KBASE_IOCTL_TLSTREAM_FLUSH: ++ KBASE_HANDLE_IOCTL(KBASE_IOCTL_TLSTREAM_FLUSH, ++ kbase_api_tlstream_flush, ++ kctx); ++ break; ++ case KBASE_IOCTL_MEM_COMMIT: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_COMMIT, ++ kbase_api_mem_commit, ++ struct kbase_ioctl_mem_commit, ++ kctx); ++ break; ++ case KBASE_IOCTL_MEM_ALIAS: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALIAS, ++ kbase_api_mem_alias, ++ union kbase_ioctl_mem_alias, ++ kctx); ++ break; ++ case KBASE_IOCTL_MEM_IMPORT: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_IMPORT, ++ kbase_api_mem_import, ++ union kbase_ioctl_mem_import, ++ kctx); ++ break; ++ case KBASE_IOCTL_MEM_FLAGS_CHANGE: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FLAGS_CHANGE, ++ kbase_api_mem_flags_change, ++ struct kbase_ioctl_mem_flags_change, ++ kctx); ++ break; ++ case KBASE_IOCTL_STREAM_CREATE: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STREAM_CREATE, ++ kbase_api_stream_create, ++ struct kbase_ioctl_stream_create, ++ kctx); ++ break; ++ case KBASE_IOCTL_FENCE_VALIDATE: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_FENCE_VALIDATE, ++ kbase_api_fence_validate, ++ struct kbase_ioctl_fence_validate, ++ kctx); ++ break; ++ case KBASE_IOCTL_MEM_PROFILE_ADD: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_PROFILE_ADD, ++ kbase_api_mem_profile_add, ++ struct kbase_ioctl_mem_profile_add, ++ kctx); ++ break; + -+ page = as_page(map->alloc->pages[data->offset]); -+ cpu_addr = vmap(&page, 1, VM_MAP, prot); -+ if (!cpu_addr) -+ goto out; ++#if !MALI_USE_CSF ++ case KBASE_IOCTL_SOFT_EVENT_UPDATE: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE, ++ kbase_api_soft_event_update, ++ struct kbase_ioctl_soft_event_update, ++ kctx); ++ break; ++#endif /* !MALI_USE_CSF */ + -+ gpu_addr = (map->start_pfn + data->offset) << PAGE_SHIFT; ++ case KBASE_IOCTL_STICKY_RESOURCE_MAP: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_MAP, ++ kbase_api_sticky_resource_map, ++ struct kbase_ioctl_sticky_resource_map, ++ kctx); ++ break; ++ case KBASE_IOCTL_STICKY_RESOURCE_UNMAP: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_UNMAP, ++ kbase_api_sticky_resource_unmap, ++ struct kbase_ioctl_sticky_resource_unmap, ++ kctx); ++ break; + -+ /* Cases for 4 supported values of column_width for showing -+ * the GPU memory contents. -+ */ -+ switch (mem_data->column_width) { -+ case 1: -+ SHOW_GPU_MEM_DATA(u8, " %02hhx"); ++ /* Instrumentation. */ ++#if !MALI_USE_CSF ++ case KBASE_IOCTL_KINSTR_JM_FD: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_KINSTR_JM_FD, ++ kbase_api_kinstr_jm_fd, ++ union kbase_kinstr_jm_fd, ++ kctx); + break; -+ case 2: -+ SHOW_GPU_MEM_DATA(u16, " %04hx"); ++#endif ++ case KBASE_IOCTL_HWCNT_READER_SETUP: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP, ++ kbase_api_hwcnt_reader_setup, ++ struct kbase_ioctl_hwcnt_reader_setup, ++ kctx); + break; -+ case 4: -+ SHOW_GPU_MEM_DATA(u32, " %08x"); ++ case KBASE_IOCTL_GET_CPU_GPU_TIMEINFO: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_GET_CPU_GPU_TIMEINFO, ++ kbase_api_get_cpu_gpu_timeinfo, ++ union kbase_ioctl_get_cpu_gpu_timeinfo, ++ kctx); + break; -+ case 8: -+ SHOW_GPU_MEM_DATA(u64, " %016llx"); ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ case KBASE_IOCTL_HWCNT_SET: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_SET, ++ kbase_api_hwcnt_set, ++ struct kbase_ioctl_hwcnt_values, ++ kctx); ++ break; ++#endif /* CONFIG_MALI_BIFROST_NO_MALI */ ++#ifdef CONFIG_MALI_CINSTR_GWT ++ case KBASE_IOCTL_CINSTR_GWT_START: ++ KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START, ++ kbase_gpu_gwt_start, ++ kctx); ++ break; ++ case KBASE_IOCTL_CINSTR_GWT_STOP: ++ KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_STOP, ++ kbase_gpu_gwt_stop, ++ kctx); ++ break; ++ case KBASE_IOCTL_CINSTR_GWT_DUMP: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CINSTR_GWT_DUMP, ++ kbase_gpu_gwt_dump, ++ union kbase_ioctl_cinstr_gwt_dump, ++ kctx); ++ break; ++#endif ++#if MALI_USE_CSF ++ case KBASE_IOCTL_CS_EVENT_SIGNAL: ++ KBASE_HANDLE_IOCTL(KBASE_IOCTL_CS_EVENT_SIGNAL, ++ kbasep_cs_event_signal, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_QUEUE_REGISTER: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_REGISTER, ++ kbasep_cs_queue_register, ++ struct kbase_ioctl_cs_queue_register, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_QUEUE_REGISTER_EX: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_REGISTER_EX, ++ kbasep_cs_queue_register_ex, ++ struct kbase_ioctl_cs_queue_register_ex, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_QUEUE_TERMINATE: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_TERMINATE, ++ kbasep_cs_queue_terminate, ++ struct kbase_ioctl_cs_queue_terminate, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_QUEUE_BIND: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_BIND, ++ kbasep_cs_queue_bind, ++ union kbase_ioctl_cs_queue_bind, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_QUEUE_KICK: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_KICK, ++ kbasep_cs_queue_kick, ++ struct kbase_ioctl_cs_queue_kick, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6: ++ KBASE_HANDLE_IOCTL_INOUT( ++ KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6, ++ kbasep_cs_queue_group_create_1_6, ++ union kbase_ioctl_cs_queue_group_create_1_6, kctx); ++ break; ++ case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_GROUP_CREATE, ++ kbasep_cs_queue_group_create, ++ union kbase_ioctl_cs_queue_group_create, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE, ++ kbasep_cs_queue_group_terminate, ++ struct kbase_ioctl_cs_queue_group_term, ++ kctx); ++ break; ++ case KBASE_IOCTL_KCPU_QUEUE_CREATE: ++ KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_KCPU_QUEUE_CREATE, ++ kbasep_kcpu_queue_new, ++ struct kbase_ioctl_kcpu_queue_new, ++ kctx); ++ break; ++ case KBASE_IOCTL_KCPU_QUEUE_DELETE: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_KCPU_QUEUE_DELETE, ++ kbasep_kcpu_queue_delete, ++ struct kbase_ioctl_kcpu_queue_delete, ++ kctx); ++ break; ++ case KBASE_IOCTL_KCPU_QUEUE_ENQUEUE: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_KCPU_QUEUE_ENQUEUE, ++ kbasep_kcpu_queue_enqueue, ++ struct kbase_ioctl_kcpu_queue_enqueue, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_TILER_HEAP_INIT: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_TILER_HEAP_INIT, ++ kbasep_cs_tiler_heap_init, ++ union kbase_ioctl_cs_tiler_heap_init, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13, ++ kbasep_cs_tiler_heap_init_1_13, ++ union kbase_ioctl_cs_tiler_heap_init_1_13, kctx); ++ break; ++ case KBASE_IOCTL_CS_TILER_HEAP_TERM: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_TILER_HEAP_TERM, ++ kbasep_cs_tiler_heap_term, ++ struct kbase_ioctl_cs_tiler_heap_term, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_GET_GLB_IFACE: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_GET_GLB_IFACE, ++ kbase_ioctl_cs_get_glb_iface, ++ union kbase_ioctl_cs_get_glb_iface, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_CPU_QUEUE_DUMP: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_CPU_QUEUE_DUMP, ++ kbasep_ioctl_cs_cpu_queue_dump, ++ struct kbase_ioctl_cs_cpu_queue_info, ++ kctx); ++ break; ++ /* This IOCTL will be kept for backward compatibility */ ++ case KBASE_IOCTL_READ_USER_PAGE: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_READ_USER_PAGE, kbase_ioctl_read_user_page, ++ union kbase_ioctl_read_user_page, kctx); ++ break; ++#endif /* MALI_USE_CSF */ ++#if MALI_UNIT_TEST ++ case KBASE_IOCTL_TLSTREAM_STATS: ++ KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS, ++ kbase_api_tlstream_stats, ++ struct kbase_ioctl_tlstream_stats, ++ kctx); ++ break; ++#endif /* MALI_UNIT_TEST */ ++ case KBASE_IOCTL_CONTEXT_PRIORITY_CHECK: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CONTEXT_PRIORITY_CHECK, ++ kbasep_ioctl_context_priority_check, ++ struct kbase_ioctl_context_priority_check, ++ kctx); ++ break; ++ case KBASE_IOCTL_SET_LIMITED_CORE_COUNT: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_LIMITED_CORE_COUNT, ++ kbasep_ioctl_set_limited_core_count, ++ struct kbase_ioctl_set_limited_core_count, ++ kctx); + break; -+ default: -+ dev_warn(mem_data->kctx->kbdev->dev, "Unexpected column width"); + } + -+ vunmap(cpu_addr); -+ -+ seq_putc(m, '\n'); ++ dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); + -+out: -+ kbase_gpu_vm_unlock(mem_data->kctx); -+ return 0; ++ return -ENOIOCTLCMD; +} + -+static const struct seq_operations ops = { -+ .start = debug_mem_start, -+ .next = debug_mem_next, -+ .stop = debug_mem_stop, -+ .show = debug_mem_show, -+}; -+ -+static int debug_mem_zone_open(struct rb_root *rbtree, -+ struct debug_mem_data *mem_data) ++#if MALI_USE_CSF ++static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) +{ -+ int ret = 0; -+ struct rb_node *p; -+ struct kbase_va_region *reg; -+ struct debug_mem_mapping *mapping; ++ struct kbase_file *const kfile = filp->private_data; ++ struct kbase_context *const kctx = ++ kbase_file_get_kctx_if_setup_complete(kfile); ++ struct base_csf_notification event_data = { ++ .type = BASE_CSF_NOTIFICATION_EVENT }; ++ const size_t data_size = sizeof(event_data); ++ bool read_event = false, read_error = false; + -+ for (p = rb_first(rbtree); p; p = rb_next(p)) { -+ reg = rb_entry(p, struct kbase_va_region, rblink); ++ if (unlikely(!kctx)) ++ return -EPERM; + -+ if (reg->gpu_alloc == NULL) -+ /* Empty region - ignore */ -+ continue; ++ if (count < data_size) ++ return -ENOBUFS; + -+ if (reg->flags & KBASE_REG_PROTECTED) { -+ /* CPU access to protected memory is forbidden - so -+ * skip this GPU virtual region. -+ */ -+ continue; -+ } ++ if (atomic_read(&kctx->event_count)) ++ read_event = true; ++ else ++ read_error = kbase_csf_event_read_error(kctx, &event_data); + -+ mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); -+ if (!mapping) { -+ ret = -ENOMEM; -+ goto out; -+ } ++ if (!read_event && !read_error) { ++ bool dump = kbase_csf_cpu_queue_read_dump_req(kctx, ++ &event_data); ++ /* This condition is not treated as an error. ++ * It is possible that event handling thread was woken up due ++ * to a fault/error that occurred for a queue group, but before ++ * the corresponding fault data was read by the thread the ++ * queue group was already terminated by the userspace. ++ */ ++ if (!dump) ++ dev_dbg(kctx->kbdev->dev, ++ "Neither event nor error signaled"); ++ } + -+ mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); -+ mapping->start_pfn = reg->start_pfn; -+ mapping->nr_pages = reg->nr_pages; -+ mapping->flags = reg->flags; -+ list_add_tail(&mapping->node, &mem_data->mapping_list); ++ if (copy_to_user(buf, &event_data, data_size) != 0) { ++ dev_warn(kctx->kbdev->dev, ++ "Failed to copy data\n"); ++ return -EFAULT; + } + -+out: -+ return ret; -+} ++ if (read_event) ++ atomic_set(&kctx->event_count, 0); + -+static int debug_mem_open(struct inode *i, struct file *file) ++ return data_size; ++} ++#else /* MALI_USE_CSF */ ++static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) +{ -+ struct kbase_context *const kctx = i->i_private; -+ struct debug_mem_data *mem_data; -+ int ret; -+ -+ if (get_file_rcu(kctx->filp) == 0) -+ return -ENOENT; ++ struct kbase_file *const kfile = filp->private_data; ++ struct kbase_context *const kctx = ++ kbase_file_get_kctx_if_setup_complete(kfile); ++ struct base_jd_event_v2 uevent; ++ int out_count = 0; + -+ /* Check if file was opened in write mode. GPU memory contents -+ * are returned only when the file is not opened in write mode. -+ */ -+ if (file->f_mode & FMODE_WRITE) { -+ file->private_data = kctx; -+ return 0; -+ } ++ if (unlikely(!kctx)) ++ return -EPERM; + -+ ret = seq_open(file, &ops); -+ if (ret) -+ goto open_fail; ++ if (count < sizeof(uevent)) ++ return -ENOBUFS; + -+ mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL); -+ if (!mem_data) { -+ ret = -ENOMEM; -+ goto out; -+ } ++ memset(&uevent, 0, sizeof(uevent)); + -+ mem_data->kctx = kctx; ++ do { ++ while (kbase_event_dequeue(kctx, &uevent)) { ++ if (out_count > 0) ++ goto out; + -+ INIT_LIST_HEAD(&mem_data->mapping_list); ++ if (filp->f_flags & O_NONBLOCK) ++ return -EAGAIN; + -+ kbase_gpu_vm_lock(kctx); ++ if (wait_event_interruptible(kctx->event_queue, ++ kbase_event_pending(kctx)) != 0) ++ return -ERESTARTSYS; ++ } ++ if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) { ++ if (out_count == 0) ++ return -EPIPE; ++ goto out; ++ } + -+ mem_data->column_width = kctx->mem_view_column_width; ++ if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0) ++ return -EFAULT; + -+ ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data); -+ if (ret != 0) { -+ kbase_gpu_vm_unlock(kctx); -+ goto out; -+ } ++ buf += sizeof(uevent); ++ out_count++; ++ count -= sizeof(uevent); ++ } while (count >= sizeof(uevent)); + -+ ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data); -+ if (ret != 0) { -+ kbase_gpu_vm_unlock(kctx); -+ goto out; -+ } ++ out: ++ return out_count * sizeof(uevent); ++} ++#endif /* MALI_USE_CSF */ + -+ ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data); -+ if (ret != 0) { -+ kbase_gpu_vm_unlock(kctx); -+ goto out; -+ } ++static __poll_t kbase_poll(struct file *filp, poll_table *wait) ++{ ++ struct kbase_file *const kfile = filp->private_data; ++ struct kbase_context *const kctx = ++ kbase_file_get_kctx_if_setup_complete(kfile); + -+#if MALI_USE_CSF -+ ret = debug_mem_zone_open(&kctx->reg_rbtree_exec_fixed, mem_data); -+ if (ret != 0) { -+ kbase_gpu_vm_unlock(kctx); -+ goto out; ++ if (unlikely(!kctx)) { ++#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) ++ return POLLERR; ++#else ++ return EPOLLERR; ++#endif + } + -+ ret = debug_mem_zone_open(&kctx->reg_rbtree_fixed, mem_data); -+ if (ret != 0) { -+ kbase_gpu_vm_unlock(kctx); -+ goto out; -+ } ++ poll_wait(filp, &kctx->event_queue, wait); ++ if (kbase_event_pending(kctx)) { ++#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) ++ return POLLIN | POLLRDNORM; ++#else ++ return EPOLLIN | EPOLLRDNORM; +#endif ++ } + -+ kbase_gpu_vm_unlock(kctx); ++ return 0; ++} + -+ ((struct seq_file *)file->private_data)->private = mem_data; ++void kbase_event_wakeup(struct kbase_context *kctx) ++{ ++ KBASE_DEBUG_ASSERT(kctx); ++ dev_dbg(kctx->kbdev->dev, "Waking event queue for context %pK\n", ++ (void *)kctx); ++ wake_up_interruptible(&kctx->event_queue); ++} + -+ return 0; ++KBASE_EXPORT_TEST_API(kbase_event_wakeup); + -+out: -+ if (mem_data) { -+ while (!list_empty(&mem_data->mapping_list)) { -+ struct debug_mem_mapping *mapping; ++#if MALI_USE_CSF ++int kbase_event_pending(struct kbase_context *ctx) ++{ ++ KBASE_DEBUG_ASSERT(ctx); + -+ mapping = list_first_entry(&mem_data->mapping_list, -+ struct debug_mem_mapping, node); -+ kbase_mem_phy_alloc_put(mapping->alloc); -+ list_del(&mapping->node); -+ kfree(mapping); -+ } -+ kfree(mem_data); -+ } -+ seq_release(i, file); -+open_fail: -+ fput(kctx->filp); ++ if (unlikely(!ctx)) ++ return -EPERM; + -+ return ret; ++ return (atomic_read(&ctx->event_count) != 0) || ++ kbase_csf_event_error_pending(ctx) || ++ kbase_csf_cpu_queue_dump_needed(ctx); +} -+ -+static int debug_mem_release(struct inode *inode, struct file *file) ++#else ++int kbase_event_pending(struct kbase_context *ctx) +{ -+ struct kbase_context *const kctx = inode->i_private; ++ KBASE_DEBUG_ASSERT(ctx); + -+ /* If the file wasn't opened in write mode, then release the -+ * memory allocated to show the GPU memory contents. -+ */ -+ if (!(file->f_mode & FMODE_WRITE)) { -+ struct seq_file *sfile = file->private_data; -+ struct debug_mem_data *mem_data = sfile->private; -+ struct debug_mem_mapping *mapping; ++ if (unlikely(!ctx)) ++ return -EPERM; + -+ seq_release(inode, file); ++ return (atomic_read(&ctx->event_count) != 0) || ++ (atomic_read(&ctx->event_closed) != 0); ++} ++#endif + -+ while (!list_empty(&mem_data->mapping_list)) { -+ mapping = list_first_entry(&mem_data->mapping_list, -+ struct debug_mem_mapping, node); -+ kbase_mem_phy_alloc_put(mapping->alloc); -+ list_del(&mapping->node); -+ kfree(mapping); -+ } ++KBASE_EXPORT_TEST_API(kbase_event_pending); + -+ kfree(mem_data); -+ } ++static int kbase_mmap(struct file *const filp, struct vm_area_struct *const vma) ++{ ++ struct kbase_file *const kfile = filp->private_data; ++ struct kbase_context *const kctx = ++ kbase_file_get_kctx_if_setup_complete(kfile); + -+ fput(kctx->filp); ++ if (unlikely(!kctx)) ++ return -EPERM; + -+ return 0; ++ return kbase_context_mmap(kctx, vma); +} + -+static ssize_t debug_mem_write(struct file *file, const char __user *ubuf, -+ size_t count, loff_t *ppos) ++static int kbase_check_flags(int flags) +{ -+ struct kbase_context *const kctx = file->private_data; -+ unsigned int column_width = 0; -+ int ret = 0; -+ -+ CSTD_UNUSED(ppos); ++ /* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always ++ * closes the file descriptor in a child process. ++ */ ++ if (0 == (flags & O_CLOEXEC)) ++ return -EINVAL; + -+ ret = kstrtouint_from_user(ubuf, count, 0, &column_width); ++ return 0; ++} + -+ if (ret) -+ return ret; -+ if (!is_power_of_2(column_width)) { -+ dev_dbg(kctx->kbdev->dev, -+ "Column width %u not a multiple of power of 2", column_width); -+ return -EINVAL; -+ } -+ if (column_width > 8) { -+ dev_dbg(kctx->kbdev->dev, -+ "Column width %u greater than 8 not supported", column_width); -+ return -EINVAL; -+ } ++static unsigned long kbase_get_unmapped_area(struct file *const filp, ++ const unsigned long addr, const unsigned long len, ++ const unsigned long pgoff, const unsigned long flags) ++{ ++ struct kbase_file *const kfile = filp->private_data; ++ struct kbase_context *const kctx = ++ kbase_file_get_kctx_if_setup_complete(kfile); + -+ kbase_gpu_vm_lock(kctx); -+ kctx->mem_view_column_width = column_width; -+ kbase_gpu_vm_unlock(kctx); ++ if (unlikely(!kctx)) ++ return -EPERM; + -+ return count; ++ return kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags); +} + -+static const struct file_operations kbase_debug_mem_view_fops = { ++static const struct file_operations kbase_fops = { + .owner = THIS_MODULE, -+ .open = debug_mem_open, -+ .release = debug_mem_release, -+ .read = seq_read, -+ .write = debug_mem_write, -+ .llseek = seq_lseek ++ .open = kbase_open, ++ .release = kbase_release, ++ .read = kbase_read, ++ .poll = kbase_poll, ++ .unlocked_ioctl = kbase_ioctl, ++ .compat_ioctl = kbase_ioctl, ++ .mmap = kbase_mmap, ++ .check_flags = kbase_check_flags, ++ .get_unmapped_area = kbase_get_unmapped_area, +}; + -+void kbase_debug_mem_view_init(struct kbase_context *const kctx) -+{ -+ /* Caller already ensures this, but we keep the pattern for -+ * maintenance safety. -+ */ -+ if (WARN_ON(!kctx) || -+ WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) -+ return; -+ -+ /* Default column width is 4 */ -+ kctx->mem_view_column_width = sizeof(u32); -+ -+ debugfs_create_file("mem_view", 0400, kctx->kctx_dentry, kctx, -+ &kbase_debug_mem_view_fops); -+} -+ -+#endif -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.h b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.h -new file mode 100644 -index 000000000..cb8050d9b ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.h -@@ -0,0 +1,39 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2013-2015, 2019-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++/** ++ * power_policy_show - Show callback for the power_policy sysfs file. + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * @dev: The device this sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The output buffer for the sysfs file contents + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * This function is called to get the contents of the power_policy sysfs ++ * file. This is a list of the available policies with the currently active one ++ * surrounded by square brackets. + * ++ * Return: The number of bytes output to @buf. + */ ++static ssize_t power_policy_show(struct device *dev, struct device_attribute *attr, char *const buf) ++{ ++ struct kbase_device *kbdev; ++ const struct kbase_pm_policy *current_policy; ++ const struct kbase_pm_policy *const *policy_list; ++ int policy_count; ++ int i; ++ ssize_t ret = 0; + -+#ifndef _KBASE_DEBUG_MEM_VIEW_H -+#define _KBASE_DEBUG_MEM_VIEW_H -+ -+#include ++ kbdev = to_kbase_device(dev); + -+/** -+ * kbase_debug_mem_view_init - Initialize the mem_view debugfs file -+ * @kctx: Pointer to kernel base context -+ * -+ * This function creates a "mem_view" file which can be used to get a view of -+ * the context's memory as the GPU sees it (i.e. using the GPU's page tables). -+ * -+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the -+ * parent directory. -+ */ -+void kbase_debug_mem_view_init(struct kbase_context *kctx); ++ if (!kbdev) ++ return -ENODEV; + -+#endif -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_zones.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_zones.c -new file mode 100644 -index 000000000..1f8db32aa ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_zones.c -@@ -0,0 +1,116 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ current_policy = kbase_pm_get_policy(kbdev); + -+/* -+ * Debugfs interface to dump information about GPU_VA memory zones -+ */ ++ policy_count = kbase_pm_list_policies(kbdev, &policy_list); + -+#include "mali_kbase_debug_mem_zones.h" -+#include "mali_kbase.h" ++ for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) { ++ if (policy_list[i] == current_policy) ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name); ++ else ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name); ++ } + -+#include -+#include ++ if (ret < PAGE_SIZE - 1) { ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); ++ } else { ++ buf[PAGE_SIZE - 2] = '\n'; ++ buf[PAGE_SIZE - 1] = '\0'; ++ ret = PAGE_SIZE - 1; ++ } + -+#if IS_ENABLED(CONFIG_DEBUG_FS) ++ return ret; ++} + +/** -+ * debug_mem_zones_show - Show information about GPU_VA memory zones -+ * @sfile: The debugfs entry -+ * @data: Data associated with the entry ++ * power_policy_store - Store callback for the power_policy sysfs file. + * -+ * This function is called to get the contents of the @c mem_zones debugfs file. -+ * This lists the start address and size (in pages) of each initialized memory -+ * zone within GPU_VA memory. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes to write to the sysfs file + * -+ * Return: -+ * 0 if successfully prints data in debugfs entry file -+ * -1 if it encountered an error ++ * This function is called when the power_policy sysfs file is written to. ++ * It matches the requested policy against the available policies and if a ++ * matching policy is found calls kbase_pm_set_policy() to change the ++ * policy. ++ * ++ * Return: @count if the function succeeded. An error code on failure. + */ -+static int debug_mem_zones_show(struct seq_file *sfile, void *data) ++static ssize_t power_policy_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ -+ struct kbase_context *const kctx = sfile->private; -+ size_t i; ++ struct kbase_device *kbdev; ++ const struct kbase_pm_policy *new_policy = NULL; ++ const struct kbase_pm_policy *const *policy_list; ++ int policy_count; ++ int i; + -+ const char *zone_names[KBASE_REG_ZONE_MAX] = { -+ "SAME_VA", -+ "CUSTOM_VA", -+ "EXEC_VA" -+#if MALI_USE_CSF -+ , -+ "MCU_SHARED_VA", -+ "EXEC_FIXED_VA", -+ "FIXED_VA" -+#endif -+ }; ++ kbdev = to_kbase_device(dev); + -+ kbase_gpu_vm_lock(kctx); ++ if (!kbdev) ++ return -ENODEV; + -+ for (i = 0; i < KBASE_REG_ZONE_MAX; i++) { -+ struct kbase_reg_zone *reg_zone = &kctx->reg_zone[i]; ++ policy_count = kbase_pm_list_policies(kbdev, &policy_list); + -+ if (reg_zone->base_pfn) { -+ seq_printf(sfile, "%15s %zu 0x%.16llx 0x%.16llx\n", zone_names[i], i, -+ reg_zone->base_pfn, reg_zone->va_size_pages); ++ for (i = 0; i < policy_count; i++) { ++ if (sysfs_streq(policy_list[i]->name, buf)) { ++ new_policy = policy_list[i]; ++ break; + } + } + -+ kbase_gpu_vm_unlock(kctx); -+ return 0; -+} ++ if (!new_policy) { ++ dev_err(dev, "power_policy: policy not found\n"); ++ return -EINVAL; ++ } + -+/* -+ * File operations related to debugfs entry for mem_zones -+ */ -+static int debug_mem_zones_open(struct inode *in, struct file *file) -+{ -+ return single_open(file, debug_mem_zones_show, in->i_private); -+} ++ kbase_pm_set_policy(kbdev, new_policy); + -+static const struct file_operations kbase_debug_mem_zones_fops = { -+ .owner = THIS_MODULE, -+ .open = debug_mem_zones_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; ++ return count; ++} + +/* -+ * Initialize debugfs entry for mem_zones ++ * The sysfs file power_policy. ++ * ++ * This is used for obtaining information about the available policies, ++ * determining which policy is currently active, and changing the active ++ * policy. + */ -+void kbase_debug_mem_zones_init(struct kbase_context *const kctx) -+{ -+ /* Caller already ensures this, but we keep the pattern for -+ * maintenance safety. -+ */ -+ if (WARN_ON(!kctx) || WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) -+ return; ++static DEVICE_ATTR_RW(power_policy); + -+ debugfs_create_file("mem_zones", 0400, kctx->kctx_dentry, kctx, -+ &kbase_debug_mem_zones_fops); -+} -+#else -+/* -+ * Stub functions for when debugfs is disabled -+ */ -+void kbase_debug_mem_zones_init(struct kbase_context *const kctx) -+{ -+} -+#endif -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_zones.h b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_zones.h -new file mode 100644 -index 000000000..acf349b60 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_zones.h -@@ -0,0 +1,39 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* ++ * core_mask_show - Show callback for the core_mask sysfs file. + * -+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * @dev: The device this sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The output buffer for the sysfs file contents + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * This function is called to get the contents of the core_mask sysfs file. + * ++ * Return: The number of bytes output to @buf. + */ ++static ssize_t core_mask_show(struct device *dev, struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *kbdev; ++ unsigned long flags; ++ ssize_t ret = 0; + -+#ifndef _KBASE_DEBUG_MEM_ZONES_H -+#define _KBASE_DEBUG_MEM_ZONES_H ++ kbdev = to_kbase_device(dev); + -+#include ++ if (!kbdev) ++ return -ENODEV; + -+/** -+ * kbase_debug_mem_zones_init() - Initialize the mem_zones sysfs file -+ * @kctx: Pointer to kernel base context -+ * -+ * This function creates a "mem_zones" file which can be used to determine the -+ * address ranges of GPU memory zones, in the GPU Virtual-Address space. -+ * -+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the -+ * parent directory. -+ */ -+void kbase_debug_mem_zones_init(struct kbase_context *kctx); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+#endif -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c b/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c -new file mode 100644 -index 000000000..c846491e7 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c -@@ -0,0 +1,248 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++#if MALI_USE_CSF ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "Current debug core mask : 0x%llX\n", ++ kbdev->pm.debug_core_mask); ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "Current desired core mask : 0x%llX\n", ++ kbase_pm_ca_get_core_mask(kbdev)); ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "Current in use core mask : 0x%llX\n", ++ kbdev->pm.backend.shaders_avail); ++#else ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "Current core mask (JS0) : 0x%llX\n", ++ kbdev->pm.debug_core_mask[0]); ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "Current core mask (JS1) : 0x%llX\n", ++ kbdev->pm.debug_core_mask[1]); ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "Current core mask (JS2) : 0x%llX\n", ++ kbdev->pm.debug_core_mask[2]); ++#endif /* MALI_USE_CSF */ + -+#include -+#include -+#include -+#include ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "Available core mask : 0x%llX\n", ++ kbdev->gpu_props.props.raw_props.shader_present); + -+#include "mali_kbase_debugfs_helper.h" ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+/* Arbitrary maximum size to prevent user space allocating too much kernel -+ * memory -+ */ -+#define DEBUGFS_MEM_POOLS_MAX_WRITE_SIZE (256u) ++ return ret; ++} + +/** -+ * set_attr_from_string - Parse a string to set elements of an array ++ * core_mask_store - Store callback for the core_mask sysfs file. + * -+ * @buf: Input string to parse. Must be nul-terminated! -+ * @array: Address of an object that can be accessed like an array. -+ * @nelems: Number of elements in the array. -+ * @set_attr_fn: Function to be called back for each array element. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes to write to the sysfs file + * -+ * This is the core of the implementation of -+ * kbase_debugfs_helper_set_attr_from_string. The only difference between the -+ * two functions is that this one requires the input string to be writable. ++ * This function is called when the core_mask sysfs file is written to. + * -+ * Return: 0 if success, negative error code otherwise. ++ * Return: @count if the function succeeded. An error code on failure. + */ -+static int -+set_attr_from_string(char *const buf, void *const array, size_t const nelems, -+ kbase_debugfs_helper_set_attr_fn * const set_attr_fn) ++static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ -+ size_t index, err = 0; -+ char *ptr = buf; -+ -+ for (index = 0; index < nelems && *ptr; ++index) { -+ unsigned long new_size; -+ size_t len; -+ char sep; ++ struct kbase_device *kbdev; ++#if MALI_USE_CSF ++ u64 new_core_mask; ++#else ++ u64 new_core_mask[3]; ++ u64 group0_core_mask; ++ int i; ++#endif /* MALI_USE_CSF */ + -+ /* Drop leading spaces */ -+ while (*ptr == ' ') -+ ptr++; ++ int items; ++ ssize_t err = count; ++ unsigned long flags; ++ u64 shader_present; + -+ len = strcspn(ptr, "\n "); -+ if (len == 0) { -+ /* No more values (allow this) */ -+ break; -+ } ++ kbdev = to_kbase_device(dev); + -+ /* Substitute a nul terminator for a space character -+ * to make the substring valid for kstrtoul. -+ */ -+ sep = ptr[len]; -+ if (sep == ' ') -+ ptr[len++] = '\0'; ++ if (!kbdev) ++ return -ENODEV; + -+ err = kstrtoul(ptr, 0, &new_size); -+ if (err) -+ break; ++#if MALI_USE_CSF ++ items = sscanf(buf, "%llx", &new_core_mask); + -+ /* Skip the substring (including any premature nul terminator) -+ */ -+ ptr += len; ++ if (items != 1) { ++ dev_err(kbdev->dev, ++ "Couldn't process core mask write operation.\n" ++ "Use format \n"); ++ err = -EINVAL; ++ goto end; ++ } ++#else ++ items = sscanf(buf, "%llx %llx %llx", ++ &new_core_mask[0], &new_core_mask[1], ++ &new_core_mask[2]); + -+ set_attr_fn(array, index, new_size); ++ if (items != 1 && items != 3) { ++ dev_err(kbdev->dev, "Couldn't process core mask write operation.\n" ++ "Use format \n" ++ "or \n"); ++ err = -EINVAL; ++ goto end; + } + -+ return err; -+} ++ if (items == 1) ++ new_core_mask[1] = new_core_mask[2] = new_core_mask[0]; ++#endif + -+int kbase_debugfs_string_validator(char *const buf) -+{ -+ int err = 0; -+ char *ptr = buf; ++ mutex_lock(&kbdev->pm.lock); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ while (*ptr) { -+ unsigned long test_number; -+ size_t len; ++ shader_present = kbdev->gpu_props.props.raw_props.shader_present; + -+ /* Drop leading spaces */ -+ while (*ptr == ' ') -+ ptr++; ++#if MALI_USE_CSF ++ if ((new_core_mask & shader_present) != new_core_mask) { ++ dev_err(dev, ++ "Invalid core mask 0x%llX: Includes non-existent cores (present = 0x%llX)", ++ new_core_mask, shader_present); ++ err = -EINVAL; ++ goto unlock; + -+ /* Strings passed into the validator will be NULL terminated -+ * by nature, so here strcspn only needs to delimit by -+ * newlines, spaces and NULL terminator (delimited natively). -+ */ -+ len = strcspn(ptr, "\n "); -+ if (len == 0) { -+ /* No more values (allow this) */ -+ break; -+ } ++ } else if (!(new_core_mask & shader_present & ++ kbdev->pm.backend.ca_cores_enabled)) { ++ dev_err(dev, ++ "Invalid core mask 0x%llX: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", ++ new_core_mask, ++ kbdev->gpu_props.props.raw_props.shader_present, ++ kbdev->pm.backend.ca_cores_enabled); ++ err = -EINVAL; ++ goto unlock; ++ } + -+ /* Substitute a nul terminator for a space character to make -+ * the substring valid for kstrtoul, and then replace it back. -+ */ -+ if (ptr[len] == ' ') { -+ ptr[len] = '\0'; -+ err = kstrtoul(ptr, 0, &test_number); -+ ptr[len] = ' '; ++ if (kbdev->pm.debug_core_mask != new_core_mask) ++ kbase_pm_set_debug_core_mask(kbdev, new_core_mask); ++#else ++ group0_core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; + -+ /* len should only be incremented if there is a valid -+ * number to follow - otherwise this will skip over -+ * the NULL terminator in cases with no ending newline -+ */ -+ len++; -+ } else { -+ /* This would occur at the last element before a space -+ * or a NULL terminator. -+ */ -+ err = kstrtoul(ptr, 0, &test_number); -+ } ++ for (i = 0; i < 3; ++i) { ++ if ((new_core_mask[i] & shader_present) != new_core_mask[i]) { ++ dev_err(dev, "Invalid core mask 0x%llX for JS %d: Includes non-existent cores (present = 0x%llX)", ++ new_core_mask[i], i, shader_present); ++ err = -EINVAL; ++ goto unlock; + -+ if (err) -+ break; -+ /* Skip the substring (including any premature nul terminator) -+ */ -+ ptr += len; -+ } -+ return err; -+} ++ } else if (!(new_core_mask[i] & shader_present & kbdev->pm.backend.ca_cores_enabled)) { ++ dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", ++ new_core_mask[i], i, ++ kbdev->gpu_props.props.raw_props.shader_present, ++ kbdev->pm.backend.ca_cores_enabled); ++ err = -EINVAL; ++ goto unlock; + -+int kbase_debugfs_helper_set_attr_from_string( -+ const char *const buf, void *const array, size_t const nelems, -+ kbase_debugfs_helper_set_attr_fn * const set_attr_fn) -+{ -+ char *const wbuf = kstrdup(buf, GFP_KERNEL); -+ int err = 0; ++ } else if (!(new_core_mask[i] & group0_core_mask)) { ++ dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with group 0 core mask 0x%llX\n", ++ new_core_mask[i], i, group0_core_mask); ++ err = -EINVAL; ++ goto unlock; ++ } else if (!(new_core_mask[i] & kbdev->gpu_props.curr_config.shader_present)) { ++ dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with current core mask 0x%llX\n", ++ new_core_mask[i], i, kbdev->gpu_props.curr_config.shader_present); ++ err = -EINVAL; ++ goto unlock; ++ } ++ } + -+ if (!wbuf) -+ return -ENOMEM; ++ if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] || ++ kbdev->pm.debug_core_mask[1] != ++ new_core_mask[1] || ++ kbdev->pm.debug_core_mask[2] != ++ new_core_mask[2]) { + -+ /* validate string before actually writing values */ -+ err = kbase_debugfs_string_validator(wbuf); -+ if (err) { -+ kfree(wbuf); -+ return err; ++ kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], ++ new_core_mask[1], new_core_mask[2]); + } ++#endif /* MALI_USE_CSF */ + -+ err = set_attr_from_string(wbuf, array, nelems, -+ set_attr_fn); -+ -+ kfree(wbuf); ++unlock: ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->pm.lock); ++end: + return err; +} + -+ssize_t kbase_debugfs_helper_get_attr_to_string( -+ char *const buf, size_t const size, void *const array, -+ size_t const nelems, -+ kbase_debugfs_helper_get_attr_fn * const get_attr_fn) ++/* ++ * The sysfs file core_mask. ++ * ++ * This is used to restrict shader core availability for debugging purposes. ++ * Reading it will show the current core mask and the mask of cores available. ++ * Writing to it will set the current core mask. ++ */ ++static DEVICE_ATTR_RW(core_mask); ++ ++#if !MALI_USE_CSF ++/** ++ * soft_job_timeout_store - Store callback for the soft_job_timeout sysfs ++ * file. ++ * ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The value written to the sysfs file. ++ * @count: The number of bytes to write to the sysfs file. ++ * ++ * This allows setting the timeout for software jobs. Waiting soft event wait ++ * jobs will be cancelled after this period expires, while soft fence wait jobs ++ * will print debug information if the fence debug feature is enabled. ++ * ++ * This is expressed in milliseconds. ++ * ++ * Return: count if the function succeeded. An error code on failure. ++ */ ++static ssize_t soft_job_timeout_store(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t count) +{ -+ ssize_t total = 0; -+ size_t index; ++ struct kbase_device *kbdev; ++ int soft_job_timeout_ms; + -+ for (index = 0; index < nelems; ++index) { -+ const char *postfix = " "; ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ if (index == (nelems-1)) -+ postfix = "\n"; ++ if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) || ++ (soft_job_timeout_ms <= 0)) ++ return -EINVAL; + -+ total += scnprintf(buf + total, size - total, "%zu%s", -+ get_attr_fn(array, index), postfix); -+ } ++ atomic_set(&kbdev->js_data.soft_job_timeout_ms, ++ soft_job_timeout_ms); + -+ return total; ++ return count; +} + -+int kbase_debugfs_helper_seq_write( -+ struct file *const file, const char __user *const ubuf, -+ size_t const count, size_t const nelems, -+ kbase_debugfs_helper_set_attr_fn * const set_attr_fn) ++/** ++ * soft_job_timeout_show - Show callback for the soft_job_timeout sysfs ++ * file. ++ * ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer for the sysfs file contents. ++ * ++ * This will return the timeout for the software jobs. ++ * ++ * Return: The number of bytes output to buf. ++ */ ++static ssize_t soft_job_timeout_show(struct device *dev, ++ struct device_attribute *attr, ++ char * const buf) +{ -+ const struct seq_file *const sfile = file->private_data; -+ void *const array = sfile->private; -+ int err = 0; -+ char *buf; ++ struct kbase_device *kbdev; + -+ if (WARN_ON(!array)) -+ return -EINVAL; ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ if (WARN_ON(count > DEBUGFS_MEM_POOLS_MAX_WRITE_SIZE)) -+ return -EINVAL; ++ return scnprintf(buf, PAGE_SIZE, "%i\n", ++ atomic_read(&kbdev->js_data.soft_job_timeout_ms)); ++} + -+ buf = kmalloc(count + 1, GFP_KERNEL); -+ if (buf == NULL) -+ return -ENOMEM; ++static DEVICE_ATTR_RW(soft_job_timeout); + -+ if (copy_from_user(buf, ubuf, count)) { -+ kfree(buf); -+ return -EFAULT; ++static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms, ++ int default_ticks, u32 old_ticks) ++{ ++ if (timeout_ms > 0) { ++ u64 ticks = timeout_ms * 1000000ULL; ++ ++ do_div(ticks, kbdev->js_data.scheduling_period_ns); ++ if (!ticks) ++ return 1; ++ return ticks; ++ } else if (timeout_ms < 0) { ++ return default_ticks; ++ } else { ++ return old_ticks; + } ++} + -+ buf[count] = '\0'; ++/** ++ * js_timeouts_store - Store callback for the js_timeouts sysfs file. ++ * ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes to write to the sysfs file ++ * ++ * This function is called to get the contents of the js_timeouts sysfs ++ * file. This file contains five values separated by whitespace. The values ++ * are basically the same as %JS_SOFT_STOP_TICKS, %JS_HARD_STOP_TICKS_SS, ++ * %JS_HARD_STOP_TICKS_DUMPING, %JS_RESET_TICKS_SS, %JS_RESET_TICKS_DUMPING ++ * configuration values (in that order), with the difference that the js_timeout ++ * values are expressed in MILLISECONDS. ++ * ++ * The js_timeouts sysfile file allows the current values in ++ * use by the job scheduler to get override. Note that a value needs to ++ * be other than 0 for it to override the current job scheduler value. ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t js_timeouts_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *kbdev; ++ int items; ++ long js_soft_stop_ms; ++ long js_soft_stop_ms_cl; ++ long js_hard_stop_ms_ss; ++ long js_hard_stop_ms_cl; ++ long js_hard_stop_ms_dumping; ++ long js_reset_ms_ss; ++ long js_reset_ms_cl; ++ long js_reset_ms_dumping; + -+ /* validate string before actually writing values */ -+ err = kbase_debugfs_string_validator(buf); -+ if (err) { -+ kfree(buf); -+ return err; -+ } ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ err = set_attr_from_string(buf, -+ array, nelems, set_attr_fn); -+ kfree(buf); ++ items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld", ++ &js_soft_stop_ms, &js_soft_stop_ms_cl, ++ &js_hard_stop_ms_ss, &js_hard_stop_ms_cl, ++ &js_hard_stop_ms_dumping, &js_reset_ms_ss, ++ &js_reset_ms_cl, &js_reset_ms_dumping); + -+ return err; -+} ++ if (items == 8) { ++ struct kbasep_js_device_data *js_data = &kbdev->js_data; ++ unsigned long flags; + -+int kbase_debugfs_helper_seq_read( -+ struct seq_file * const sfile, size_t const nelems, -+ kbase_debugfs_helper_get_attr_fn * const get_attr_fn) -+{ -+ void *const array = sfile->private; -+ size_t index; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ if (WARN_ON(!array)) -+ return -EINVAL; ++#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\ ++ js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \ ++ default, js_data->ticks_name); \ ++ dev_dbg(kbdev->dev, "Overriding " #ticks_name \ ++ " with %lu ticks (%lu ms)\n", \ ++ (unsigned long)js_data->ticks_name, \ ++ ms_name); \ ++ } while (0) + -+ for (index = 0; index < nelems; ++index) { -+ const char *postfix = " "; ++ UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms, ++ DEFAULT_JS_SOFT_STOP_TICKS); ++ UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl, ++ DEFAULT_JS_SOFT_STOP_TICKS_CL); ++ UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss, ++ DEFAULT_JS_HARD_STOP_TICKS_SS); ++ UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl, ++ DEFAULT_JS_HARD_STOP_TICKS_CL); ++ UPDATE_TIMEOUT(hard_stop_ticks_dumping, ++ js_hard_stop_ms_dumping, ++ DEFAULT_JS_HARD_STOP_TICKS_DUMPING); ++ UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss, ++ DEFAULT_JS_RESET_TICKS_SS); ++ UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl, ++ DEFAULT_JS_RESET_TICKS_CL); ++ UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping, ++ DEFAULT_JS_RESET_TICKS_DUMPING); + -+ if (index == (nelems-1)) -+ postfix = "\n"; ++ kbase_js_set_timeouts(kbdev); + -+ seq_printf(sfile, "%zu%s", get_attr_fn(array, index), postfix); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ return count; + } -+ return 0; ++ ++ dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n" ++ "Use format \n" ++ "Write 0 for no change, -1 to restore default timeout\n"); ++ return -EINVAL; +} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.h b/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.h -new file mode 100644 -index 000000000..cbb24d6e0 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.h -@@ -0,0 +1,162 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ + -+#ifndef _KBASE_DEBUGFS_HELPER_H_ -+#define _KBASE_DEBUGFS_HELPER_H_ ++static unsigned long get_js_timeout_in_ms( ++ u32 scheduling_period_ns, ++ u32 ticks) ++{ ++ u64 ms = (u64)ticks * scheduling_period_ns; + -+/** -+ * typedef kbase_debugfs_helper_set_attr_fn - Type of function to set an -+ * attribute value from an array -+ * -+ * @array: Address of an object that can be accessed like an array. -+ * @index: An element index. The valid range depends on the use-case. -+ * @value: Attribute value to be set. -+ */ -+typedef void kbase_debugfs_helper_set_attr_fn(void *array, size_t index, -+ size_t value); ++ do_div(ms, 1000000UL); ++ return ms; ++} + +/** -+ * kbase_debugfs_helper_set_attr_from_string - Parse a string to reconfigure an -+ * array -+ * -+ * @buf: Input string to parse. Must be nul-terminated! -+ * @array: Address of an object that can be accessed like an array. -+ * @nelems: Number of elements in the array. -+ * @set_attr_fn: Function to be called back for each array element. ++ * js_timeouts_show - Show callback for the js_timeouts sysfs file. + * -+ * The given function is called once for each attribute value found in the -+ * input string. It is not an error if the string specifies fewer attribute -+ * values than the specified number of array elements. ++ * @dev: The device this sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The output buffer for the sysfs file contents + * -+ * The number base of each attribute value is detected automatically -+ * according to the standard rules (e.g. prefix "0x" for hexadecimal). -+ * Attribute values are separated by one or more space characters. -+ * Additional leading and trailing spaces are ignored. ++ * This function is called to get the contents of the js_timeouts sysfs ++ * file. It returns the last set values written to the js_timeouts sysfs file. ++ * If the file didn't get written yet, the values will be current setting in ++ * use. + * -+ * Return: 0 if success, negative error code otherwise. ++ * Return: The number of bytes output to @buf. + */ -+int kbase_debugfs_helper_set_attr_from_string( -+ const char *buf, void *array, size_t nelems, -+ kbase_debugfs_helper_set_attr_fn *set_attr_fn); ++static ssize_t js_timeouts_show(struct device *dev, struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *kbdev; ++ ssize_t ret; ++ unsigned long js_soft_stop_ms; ++ unsigned long js_soft_stop_ms_cl; ++ unsigned long js_hard_stop_ms_ss; ++ unsigned long js_hard_stop_ms_cl; ++ unsigned long js_hard_stop_ms_dumping; ++ unsigned long js_reset_ms_ss; ++ unsigned long js_reset_ms_cl; ++ unsigned long js_reset_ms_dumping; ++ u32 scheduling_period_ns; + -+/** -+ * kbase_debugfs_string_validator - Validate a string to be written to a -+ * debugfs file for any incorrect formats -+ * or wrong values. -+ * -+ * @buf: Null-terminated string to validate. -+ * -+ * This function is to be used before any writes to debugfs values are done -+ * such that any strings with erroneous values (such as octal 09 or -+ * hexadecimal 0xGH are fully ignored) - without this validation, any correct -+ * values before the first incorrect one will still be entered into the -+ * debugfs file. This essentially iterates the values through kstrtoul to see -+ * if it is valid. -+ * -+ * It is largely similar to set_attr_from_string to iterate through the values -+ * of the input string. This function also requires the input string to be -+ * writable. -+ * -+ * Return: 0 with no error, else -22 (the invalid return value of kstrtoul) if -+ * any value in the string was wrong or with an incorrect format. -+ */ -+int kbase_debugfs_string_validator(char *const buf); ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+/** -+ * typedef kbase_debugfs_helper_get_attr_fn - Type of function to get an -+ * attribute value from an array -+ * -+ * @array: Address of an object that can be accessed like an array. -+ * @index: An element index. The valid range depends on the use-case. -+ * -+ * Return: Value of attribute. -+ */ -+typedef size_t kbase_debugfs_helper_get_attr_fn(void *array, size_t index); ++ scheduling_period_ns = kbdev->js_data.scheduling_period_ns; + -+/** -+ * kbase_debugfs_helper_get_attr_to_string - Construct a formatted string -+ * from elements in an array -+ * -+ * @buf: Buffer in which to store the formatted output string. -+ * @size: The size of the buffer, in bytes. -+ * @array: Address of an object that can be accessed like an array. -+ * @nelems: Number of elements in the array. -+ * @get_attr_fn: Function to be called back for each array element. -+ * -+ * The given function is called once for each array element to get the -+ * value of the attribute to be inspected. The attribute values are -+ * written to the buffer as a formatted string of decimal numbers -+ * separated by spaces and terminated by a linefeed. -+ * -+ * Return: Number of characters written excluding the nul terminator. -+ */ -+ssize_t kbase_debugfs_helper_get_attr_to_string( -+ char *buf, size_t size, void *array, size_t nelems, -+ kbase_debugfs_helper_get_attr_fn *get_attr_fn); ++#define GET_TIMEOUT(name) get_js_timeout_in_ms(\ ++ scheduling_period_ns, \ ++ kbdev->js_data.name) + -+/** -+ * kbase_debugfs_helper_seq_read - Implements reads from a virtual file for an -+ * array -+ * -+ * @sfile: A virtual file previously opened by calling single_open. -+ * @nelems: Number of elements in the array. -+ * @get_attr_fn: Function to be called back for each array element. -+ * -+ * The virtual file must have been opened by calling single_open and passing -+ * the address of an object that can be accessed like an array. -+ * -+ * The given function is called once for each array element to get the -+ * value of the attribute to be inspected. The attribute values are -+ * written to the buffer as a formatted string of decimal numbers -+ * separated by spaces and terminated by a linefeed. -+ * -+ * Return: 0 if success, negative error code otherwise. -+ */ -+int kbase_debugfs_helper_seq_read( -+ struct seq_file *sfile, size_t nelems, -+ kbase_debugfs_helper_get_attr_fn *get_attr_fn); ++ js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks); ++ js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl); ++ js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss); ++ js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl); ++ js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping); ++ js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss); ++ js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl); ++ js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping); + -+/** -+ * kbase_debugfs_helper_seq_write - Implements writes to a virtual file for an -+ * array -+ * -+ * @file: A virtual file previously opened by calling single_open. -+ * @ubuf: Source address in user space. -+ * @count: Number of bytes written to the virtual file. -+ * @nelems: Number of elements in the array. -+ * @set_attr_fn: Function to be called back for each array element. -+ * -+ * The virtual file must have been opened by calling single_open and passing -+ * the address of an object that can be accessed like an array. -+ * -+ * The given function is called once for each attribute value found in the -+ * data written to the virtual file. For further details, refer to the -+ * description of set_attr_from_string. -+ * -+ * Return: 0 if success, negative error code otherwise. -+ */ -+int kbase_debugfs_helper_seq_write(struct file *file, -+ const char __user *ubuf, size_t count, -+ size_t nelems, -+ kbase_debugfs_helper_set_attr_fn *set_attr_fn); ++#undef GET_TIMEOUT + -+#endif /*_KBASE_DEBUGFS_HELPER_H_ */ ++ ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n", ++ js_soft_stop_ms, js_soft_stop_ms_cl, ++ js_hard_stop_ms_ss, js_hard_stop_ms_cl, ++ js_hard_stop_ms_dumping, js_reset_ms_ss, ++ js_reset_ms_cl, js_reset_ms_dumping); ++ ++ if (ret >= PAGE_SIZE) { ++ buf[PAGE_SIZE - 2] = '\n'; ++ buf[PAGE_SIZE - 1] = '\0'; ++ ret = PAGE_SIZE - 1; ++ } ++ ++ return ret; ++} + -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_defs.h -new file mode 100644 -index 000000000..809e73000 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_defs.h -@@ -0,0 +1,2105 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* ++ * The sysfs file js_timeouts. + * -+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * ++ * This is used to override the current job scheduler values for ++ * JS_STOP_STOP_TICKS_SS ++ * JS_STOP_STOP_TICKS_CL ++ * JS_HARD_STOP_TICKS_SS ++ * JS_HARD_STOP_TICKS_CL ++ * JS_HARD_STOP_TICKS_DUMPING ++ * JS_RESET_TICKS_SS ++ * JS_RESET_TICKS_CL ++ * JS_RESET_TICKS_DUMPING. + */ ++static DEVICE_ATTR_RW(js_timeouts); ++ ++static u32 get_new_js_timeout( ++ u32 old_period, ++ u32 old_ticks, ++ u32 new_scheduling_period_ns) ++{ ++ u64 ticks = (u64)old_period * (u64)old_ticks; ++ ++ do_div(ticks, new_scheduling_period_ns); ++ return ticks?ticks:1; ++} + +/** -+ * DOC: Defintions (types, defines, etcs) common to Kbase. They are placed here -+ * to allow the hierarchy of header files to work. ++ * js_scheduling_period_store - Store callback for the js_scheduling_period sysfs ++ * file ++ * @dev: The device the sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes to write to the sysfs file ++ * ++ * This function is called when the js_scheduling_period sysfs file is written ++ * to. It checks the data written, and if valid updates the js_scheduling_period ++ * value ++ * ++ * Return: @count if the function succeeded. An error code on failure. + */ ++static ssize_t js_scheduling_period_store(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *kbdev; ++ int ret; ++ unsigned int js_scheduling_period; ++ u32 new_scheduling_period_ns; ++ u32 old_period; ++ struct kbasep_js_device_data *js_data; ++ unsigned long flags; + -+#ifndef _KBASE_DEFS_H_ -+#define _KBASE_DEFS_H_ ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ js_data = &kbdev->js_data; + -+#if MALI_USE_CSF -+#include -+#else -+#include -+#include -+#endif ++ ret = kstrtouint(buf, 0, &js_scheduling_period); ++ if (ret || !js_scheduling_period) { ++ dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n" ++ "Use format \n"); ++ return -EINVAL; ++ } + -+#include ++ new_scheduling_period_ns = js_scheduling_period * 1000000; + -+#include -+#include -+#include -+#include -+#include ++ /* Update scheduling timeouts */ ++ mutex_lock(&js_data->runpool_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + ++ /* If no contexts have been scheduled since js_timeouts was last written ++ * to, the new timeouts might not have been latched yet. So check if an ++ * update is pending and use the new values if necessary. ++ */ + -+#include "mali_kbase_fence_defs.h" ++ /* Use previous 'new' scheduling period as a base if present. */ ++ old_period = js_data->scheduling_period_ns; + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+#include -+#endif /* CONFIG_DEBUG_FS */ ++#define SET_TIMEOUT(name) \ ++ (js_data->name = get_new_js_timeout(\ ++ old_period, \ ++ kbdev->js_data.name, \ ++ new_scheduling_period_ns)) + -+#ifdef CONFIG_MALI_BIFROST_DEVFREQ -+#include -+#endif /* CONFIG_MALI_BIFROST_DEVFREQ */ ++ SET_TIMEOUT(soft_stop_ticks); ++ SET_TIMEOUT(soft_stop_ticks_cl); ++ SET_TIMEOUT(hard_stop_ticks_ss); ++ SET_TIMEOUT(hard_stop_ticks_cl); ++ SET_TIMEOUT(hard_stop_ticks_dumping); ++ SET_TIMEOUT(gpu_reset_ticks_ss); ++ SET_TIMEOUT(gpu_reset_ticks_cl); ++ SET_TIMEOUT(gpu_reset_ticks_dumping); + -+#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) -+#include -+#endif ++#undef SET_TIMEOUT + -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+#include -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ js_data->scheduling_period_ns = new_scheduling_period_ns; + -+#include -+#include -+#include -+#include ++ kbase_js_set_timeouts(kbdev); + -+#include "debug/mali_kbase_debug_ktrace_defs.h" ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&js_data->runpool_mutex); + -+/** Number of milliseconds before we time out on a GPU soft/hard reset */ -+#define RESET_TIMEOUT 500 ++ dev_dbg(kbdev->dev, "JS scheduling period: %dms\n", ++ js_scheduling_period); + -+/** -+ * BASE_JM_MAX_NR_SLOTS - The maximum number of Job Slots to support in the Hardware. -+ * -+ * You can optimize this down if your target devices will only ever support a -+ * small number of job slots. -+ */ -+#define BASE_JM_MAX_NR_SLOTS 3 ++ return count; ++} + +/** -+ * BASE_MAX_NR_AS - The maximum number of Address Spaces to support in the Hardware. ++ * js_scheduling_period_show - Show callback for the js_scheduling_period sysfs ++ * entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the GPU information. + * -+ * You can optimize this down if your target devices will only ever support a -+ * small number of Address Spaces ++ * This function is called to get the current period used for the JS scheduling ++ * period. ++ * ++ * Return: The number of bytes output to @buf. + */ -+#define BASE_MAX_NR_AS 16 ++static ssize_t js_scheduling_period_show(struct device *dev, ++ struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *kbdev; ++ u32 period; ++ ssize_t ret; + -+/* mmu */ -+#define MIDGARD_MMU_LEVEL(x) (x) ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+#define MIDGARD_MMU_TOPLEVEL MIDGARD_MMU_LEVEL(0) ++ period = kbdev->js_data.scheduling_period_ns; + -+#define MIDGARD_MMU_BOTTOMLEVEL MIDGARD_MMU_LEVEL(3) ++ ret = scnprintf(buf, PAGE_SIZE, "%d\n", ++ period / 1000000); + -+#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR) ++ return ret; ++} + -+/** setting in kbase_context::as_nr that indicates it's invalid */ -+#define KBASEP_AS_NR_INVALID (-1) ++static DEVICE_ATTR_RW(js_scheduling_period); + -+/** -+ * KBASE_LOCK_REGION_MAX_SIZE_LOG2 - Maximum size in bytes of a MMU lock region, -+ * as a logarithm -+ */ -+#define KBASE_LOCK_REGION_MAX_SIZE_LOG2 (48) /* 256 TB */ + -+/** -+ * KBASE_REG_ZONE_MAX - Maximum number of GPU memory region zones -+ */ -+#if MALI_USE_CSF -+#define KBASE_REG_ZONE_MAX 6ul -+#else -+#define KBASE_REG_ZONE_MAX 4ul -+#endif ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++static ssize_t js_softstop_always_store(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *kbdev; ++ int ret; ++ int softstop_always; + -+#include "mali_kbase_hwaccess_defs.h" ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+/* Maximum number of pages of memory that require a permanent mapping, per -+ * kbase_context -+ */ -+#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((64 * 1024ul * 1024ul) >> PAGE_SHIFT) -+/* Minimum threshold period for hwcnt dumps between different hwcnt virtualizer -+ * clients, to reduce undesired system load. -+ * If a virtualizer client requests a dump within this threshold period after -+ * some other client has performed a dump, a new dump won't be performed and -+ * the accumulated counter values for that client will be returned instead. -+ */ -+#define KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS (200 * NSEC_PER_USEC) ++ ret = kstrtoint(buf, 0, &softstop_always); ++ if (ret || ((softstop_always != 0) && (softstop_always != 1))) { ++ dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n" ++ "Use format \n"); ++ return -EINVAL; ++ } + -+#if MALI_USE_CSF -+/* The buffer count of CSF hwcnt backend ring buffer, which is used when CSF -+ * hwcnt backend allocate the ring buffer to communicate with CSF firmware for -+ * HWC dump samples. -+ * To meet the hardware requirement, this number MUST be power of 2, otherwise, -+ * CSF hwcnt backend creation will be failed. -+ */ -+#define KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT (128) -+#endif ++ kbdev->js_data.softstop_always = (bool) softstop_always; ++ dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n", ++ (kbdev->js_data.softstop_always) ? ++ "Enabled" : "Disabled"); ++ return count; ++} + -+/* Maximum number of clock/regulator pairs that may be referenced by -+ * the device node. -+ * This is dependent on support for of_property_read_u64_array() in the -+ * kernel. -+ * While, the number of clocks could be more than regulators, -+ * as mentioned in power_control_init(). -+ */ -+#define BASE_MAX_NR_CLOCKS_REGULATORS (4) ++static ssize_t js_softstop_always_show(struct device *dev, ++ struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *kbdev; ++ ssize_t ret; + -+/* Forward declarations */ -+struct kbase_context; -+struct kbase_device; -+struct kbase_as; -+struct kbase_mmu_setup; -+struct kbase_kinstr_jm; ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+/** -+ * struct kbase_io_access - holds information about 1 register access -+ * -+ * @addr: first bit indicates r/w (r=0, w=1) -+ * @value: value written or read -+ */ -+struct kbase_io_access { -+ uintptr_t addr; -+ u32 value; -+}; ++ ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always); + -+/** -+ * struct kbase_io_history - keeps track of all recent register accesses -+ * -+ * @enabled: true if register accesses are recorded, false otherwise -+ * @lock: spinlock protecting kbase_io_access array -+ * @count: number of registers read/written -+ * @size: number of elements in kbase_io_access array -+ * @buf: array of kbase_io_access -+ */ -+struct kbase_io_history { -+ bool enabled; ++ if (ret >= PAGE_SIZE) { ++ buf[PAGE_SIZE - 2] = '\n'; ++ buf[PAGE_SIZE - 1] = '\0'; ++ ret = PAGE_SIZE - 1; ++ } + -+ spinlock_t lock; -+ size_t count; -+ u16 size; -+ struct kbase_io_access *buf; -+}; ++ return ret; ++} + -+/** -+ * struct kbase_debug_copy_buffer - information about the buffer to be copied. -+ * -+ * @size: size of the buffer in bytes -+ * @pages: pointer to an array of pointers to the pages which contain -+ * the buffer -+ * @is_vmalloc: true if @pages was allocated with vzalloc. false if @pages was -+ * allocated with kcalloc -+ * @nr_pages: number of pages -+ * @offset: offset into the pages -+ * @gpu_alloc: pointer to physical memory allocated by the GPU -+ * @extres_pages: array of pointers to the pages containing external resources -+ * for this buffer -+ * @nr_extres_pages: number of pages in @extres_pages ++/* ++ * By default, soft-stops are disabled when only a single context is present. ++ * The ability to enable soft-stop when only a single context is present can be ++ * used for debug and unit-testing purposes. ++ * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.) + */ -+struct kbase_debug_copy_buffer { -+ size_t size; -+ struct page **pages; -+ bool is_vmalloc; -+ int nr_pages; -+ size_t offset; -+ struct kbase_mem_phy_alloc *gpu_alloc; ++static DEVICE_ATTR_RW(js_softstop_always); ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++#endif /* !MALI_USE_CSF */ + -+ struct page **extres_pages; -+ int nr_extres_pages; ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++typedef void kbasep_debug_command_func(struct kbase_device *); ++ ++enum kbasep_debug_command_code { ++ KBASEP_DEBUG_COMMAND_DUMPTRACE, ++ ++ /* This must be the last enum */ ++ KBASEP_DEBUG_COMMAND_COUNT +}; + -+struct kbase_device_info { -+ u32 features; ++struct kbasep_debug_command { ++ char *str; ++ kbasep_debug_command_func *func; +}; + -+struct kbase_mmu_setup { -+ u64 transtab; -+ u64 memattr; -+ u64 transcfg; ++static void kbasep_ktrace_dump_wrapper(struct kbase_device *kbdev) ++{ ++ KBASE_KTRACE_DUMP(kbdev); ++} ++ ++/* Debug commands supported by the driver */ ++static const struct kbasep_debug_command debug_commands[] = { ++ { ++ .str = "dumptrace", ++ .func = &kbasep_ktrace_dump_wrapper, ++ } +}; + +/** -+ * struct kbase_fault - object containing data relating to a page or bus fault. -+ * @addr: Records the faulting address. -+ * @extra_addr: Records the secondary fault address. -+ * @status: Records the fault status as reported by Hw. -+ * @protected_mode: Flag indicating whether the fault occurred in protected mode -+ * or not. -+ */ -+struct kbase_fault { -+ u64 addr; -+ u64 extra_addr; -+ u32 status; -+ bool protected_mode; -+}; -+ -+/** Maximum number of memory pages that should be allocated for the array -+ * of pointers to free PGDs. ++ * debug_command_show - Show callback for the debug_command sysfs file. + * -+ * This number has been pre-calculated to deal with the maximum allocation -+ * size expressed by the default value of KBASE_MEM_ALLOC_MAX_SIZE. -+ * This is supposed to be enough for almost the entirety of MMU operations. -+ * Any size greater than KBASE_MEM_ALLOC_MAX_SIZE requires being broken down -+ * into multiple iterations, each dealing with at most KBASE_MEM_ALLOC_MAX_SIZE -+ * bytes. ++ * @dev: The device this sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The output buffer for the sysfs file contents + * -+ * Please update this value if KBASE_MEM_ALLOC_MAX_SIZE changes. ++ * This function is called to get the contents of the debug_command sysfs ++ * file. This is a list of the available debug commands, separated by newlines. ++ * ++ * Return: The number of bytes output to @buf. + */ -+#define MAX_PAGES_FOR_FREE_PGDS ((size_t)9) ++static ssize_t debug_command_show(struct device *dev, struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *kbdev; ++ int i; ++ ssize_t ret = 0; + -+/* Maximum number of pointers to free PGDs */ -+#define MAX_FREE_PGDS ((PAGE_SIZE / sizeof(struct page *)) * MAX_PAGES_FOR_FREE_PGDS) ++ kbdev = to_kbase_device(dev); + -+/** -+ * struct kbase_mmu_table - object representing a set of GPU page tables -+ * @mmu_lock: Lock to serialize the accesses made to multi level GPU -+ * page tables -+ * @pgd: Physical address of the page allocated for the top -+ * level page table of the context, this is used for -+ * MMU HW programming as the address translation will -+ * start from the top level page table. -+ * @group_id: A memory group ID to be passed to a platform-specific -+ * memory group manager. -+ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). -+ * @kctx: If this set of MMU tables belongs to a context then -+ * this is a back-reference to the context, otherwise -+ * it is NULL. -+ * @scratch_mem: Scratch memory used for MMU operations, which are -+ * serialized by the @mmu_lock. -+ */ -+struct kbase_mmu_table { -+ struct mutex mmu_lock; -+ phys_addr_t pgd; -+ u8 group_id; -+ struct kbase_context *kctx; -+ union { -+ /** -+ * @teardown_pages: Scratch memory used for backup copies of whole -+ * PGD pages when tearing down levels upon -+ * termination of the MMU table. -+ */ -+ struct { -+ /** -+ * @levels: Array of PGD pages, large enough to copy one PGD -+ * for each level of the MMU table. -+ */ -+ u64 levels[MIDGARD_MMU_BOTTOMLEVEL][PAGE_SIZE / sizeof(u64)]; -+ } teardown_pages; -+ /** -+ * @free_pgds: Scratch memory user for insertion, update and teardown -+ * operations to store a temporary list of PGDs to be freed -+ * at the end of the operation. -+ */ -+ struct { -+ /** @pgds: Array of pointers to PGDs to free. */ -+ struct page *pgds[MAX_FREE_PGDS]; -+ /** @head_index: Index of first free element in the PGDs array. */ -+ size_t head_index; -+ } free_pgds; -+ } scratch_mem; -+}; ++ if (!kbdev) ++ return -ENODEV; ++ ++ for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++) ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str); ++ ++ if (ret >= PAGE_SIZE) { ++ buf[PAGE_SIZE - 2] = '\n'; ++ buf[PAGE_SIZE - 1] = '\0'; ++ ret = PAGE_SIZE - 1; ++ } ++ ++ return ret; ++} + +/** -+ * struct kbase_reg_zone - Information about GPU memory region zones -+ * @base_pfn: Page Frame Number in GPU virtual address space for the start of -+ * the Zone -+ * @va_size_pages: Size of the Zone in pages ++ * debug_command_store - Store callback for the debug_command sysfs file. + * -+ * Track information about a zone KBASE_REG_ZONE() and related macros. -+ * In future, this could also store the &rb_root that are currently in -+ * &kbase_context and &kbase_csf_device. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * This function is called when the debug_command sysfs file is written to. ++ * It matches the requested command against the available commands, and if ++ * a matching command is found calls the associated function from ++ * @debug_commands to issue the command. ++ * ++ * Return: @count if the function succeeded. An error code on failure. + */ -+struct kbase_reg_zone { -+ u64 base_pfn; -+ u64 va_size_pages; -+}; ++static ssize_t debug_command_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *kbdev; ++ int i; + -+#if MALI_USE_CSF -+#include "csf/mali_kbase_csf_defs.h" -+#else -+#include "jm/mali_kbase_jm_defs.h" -+#endif ++ kbdev = to_kbase_device(dev); + -+#include "mali_kbase_hwaccess_time.h" ++ if (!kbdev) ++ return -ENODEV; + -+static inline int kbase_as_has_bus_fault(struct kbase_as *as, -+ struct kbase_fault *fault) -+{ -+ return (fault == &as->bf_data); -+} ++ for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) { ++ if (sysfs_streq(debug_commands[i].str, buf)) { ++ debug_commands[i].func(kbdev); ++ return count; ++ } ++ } + -+static inline int kbase_as_has_page_fault(struct kbase_as *as, -+ struct kbase_fault *fault) -+{ -+ return (fault == &as->pf_data); ++ /* Debug Command not found */ ++ dev_err(dev, "debug_command: command not known\n"); ++ return -EINVAL; +} + -+/** -+ * struct kbasep_mem_device - Data stored per device for memory allocation ++/* The sysfs file debug_command. + * -+ * @used_pages: Tracks usage of OS shared memory. Updated when OS memory is -+ * allocated/freed. -+ * @ir_threshold: Fraction of the maximum size of an allocation that grows -+ * on GPU page fault that can be used before the driver -+ * switches to incremental rendering, in 1/256ths. -+ * 0 means disabled. ++ * This is used to issue general debug commands to the device driver. ++ * Reading it will produce a list of debug commands, separated by newlines. ++ * Writing to it with one of those commands will issue said command. + */ -+struct kbasep_mem_device { -+ atomic_t used_pages; -+ atomic_t ir_threshold; -+}; -+ -+struct kbase_clk_rate_listener; ++static DEVICE_ATTR_RW(debug_command); ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ + +/** -+ * typedef kbase_clk_rate_listener_on_change_t() - Frequency change callback ++ * gpuinfo_show - Show callback for the gpuinfo sysfs entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the GPU information. + * -+ * @listener: Clock frequency change listener. -+ * @clk_index: Index of the clock for which the change has occurred. -+ * @clk_rate_hz: Clock frequency(Hz). ++ * This function is called to get a description of the present Mali ++ * GPU via the gpuinfo sysfs entry. This includes the GPU family, the ++ * number of cores, the hardware version and the raw product id. For ++ * example + * -+ * A callback to call when clock rate changes. The function must not -+ * sleep. No clock rate manager functions must be called from here, as -+ * its lock is taken. -+ */ -+typedef void -+kbase_clk_rate_listener_on_change_t(struct kbase_clk_rate_listener *listener, -+ u32 clk_index, u32 clk_rate_hz); -+ -+/** -+ * struct kbase_clk_rate_listener - Clock frequency listener ++ * Mali-T60x MP4 r0p0 0x6956 + * -+ * @node: List node. -+ * @notify: Callback to be called when GPU frequency changes. ++ * Return: The number of bytes output to @buf. + */ -+struct kbase_clk_rate_listener { -+ struct list_head node; -+ kbase_clk_rate_listener_on_change_t *notify; -+}; ++static ssize_t gpuinfo_show(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ static const struct gpu_product_id_name { ++ unsigned int id; ++ char *name; ++ } gpu_product_id_names[] = { ++ { .id = GPU_ID2_PRODUCT_TMIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-G71" }, ++ { .id = GPU_ID2_PRODUCT_THEX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-G72" }, ++ { .id = GPU_ID2_PRODUCT_TSIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-G51" }, ++ { .id = GPU_ID2_PRODUCT_TNOX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-G76" }, ++ { .id = GPU_ID2_PRODUCT_TDVX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-G31" }, ++ { .id = GPU_ID2_PRODUCT_TGOX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-G52" }, ++ { .id = GPU_ID2_PRODUCT_TTRX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-G77" }, ++ { .id = GPU_ID2_PRODUCT_TBEX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-G78" }, ++ { .id = GPU_ID2_PRODUCT_TBAX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-G78AE" }, ++ { .id = GPU_ID2_PRODUCT_LBEX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-G68" }, ++ { .id = GPU_ID2_PRODUCT_TNAX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-G57" }, ++ { .id = GPU_ID2_PRODUCT_TODX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-G710" }, ++ { .id = GPU_ID2_PRODUCT_LODX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-G610" }, ++ { .id = GPU_ID2_PRODUCT_TGRX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-G510" }, ++ { .id = GPU_ID2_PRODUCT_TVAX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-G310" }, ++ { .id = GPU_ID2_PRODUCT_TTIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-TTIX" }, ++ { .id = GPU_ID2_PRODUCT_LTIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-LTIX" }, ++ }; ++ const char *product_name = "(Unknown Mali GPU)"; ++ struct kbase_device *kbdev; ++ u32 gpu_id; ++ unsigned int product_id, product_id_mask; ++ unsigned int i; ++ struct kbase_gpu_props *gpu_props; + -+/** -+ * struct kbase_clk_rate_trace_manager - Data stored per device for GPU clock -+ * rate trace manager. -+ * -+ * @gpu_idle: Tracks the idle state of GPU. -+ * @clks: Array of pointer to structures storing data for every -+ * enumerated GPU clock. -+ * @clk_rate_trace_ops: Pointer to the platform specific GPU clock rate trace -+ * operations. -+ * @listeners: List of listener attached. -+ * @lock: Lock to serialize the actions of GPU clock rate trace -+ * manager. -+ */ -+struct kbase_clk_rate_trace_manager { -+ bool gpu_idle; -+ struct kbase_clk_data *clks[BASE_MAX_NR_CLOCKS_REGULATORS]; -+ struct kbase_clk_rate_trace_op_conf *clk_rate_trace_ops; -+ struct list_head listeners; -+ spinlock_t lock; -+}; ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+/** -+ * struct kbase_pm_device_data - Data stored per device for power management. -+ * @lock: The lock protecting Power Management structures accessed -+ * outside of IRQ. -+ * This lock must also be held whenever the GPU is being -+ * powered on or off. -+ * @active_count: The reference count of active contexts on this device. -+ * Note that some code paths keep shaders/the tiler -+ * powered whilst this is 0. -+ * Use kbase_pm_is_active() instead to check for such cases. -+ * @suspending: Flag indicating suspending/suspended -+ * @runtime_active: Flag to track if the GPU is in runtime suspended or active -+ * state. This ensures that runtime_put and runtime_get -+ * functions are called in pairs. For example if runtime_get -+ * has already been called from the power_on callback, then -+ * the call to it from runtime_gpu_active callback can be -+ * skipped. -+ * @gpu_lost: Flag indicating gpu lost -+ * This structure contains data for the power management framework. -+ * There is one instance of this structure per device in the system. -+ * @zero_active_count_wait: Wait queue set when active_count == 0 -+ * @resume_wait: system resume of GPU device. -+ * @debug_core_mask: Bit masks identifying the available shader cores that are -+ * specified via sysfs. One mask per job slot. -+ * @debug_core_mask_all: Bit masks identifying the available shader cores that -+ * are specified via sysfs. -+ * @callback_power_runtime_init: Callback for initializing the runtime power -+ * management. Return 0 on success, else error code -+ * @callback_power_runtime_term: Callback for terminating the runtime power -+ * management. -+ * @dvfs_period: Time in milliseconds between each dvfs sample -+ * @backend: KBase PM backend data -+ * @arb_vm_state: The state of the arbiter VM machine -+ * @gpu_users_waiting: Used by virtualization to notify the arbiter that there -+ * are users waiting for the GPU so that it can request -+ * and resume the driver. -+ * @clk_rtm: The state of the GPU clock rate trace manager -+ */ -+struct kbase_pm_device_data { -+ struct mutex lock; -+ int active_count; -+ bool suspending; -+#if MALI_USE_CSF -+ bool runtime_active; -+#endif -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ atomic_t gpu_lost; -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ -+ wait_queue_head_t zero_active_count_wait; -+ wait_queue_head_t resume_wait; ++ gpu_props = &kbdev->gpu_props; ++ gpu_id = gpu_props->props.raw_props.gpu_id; ++ product_id = gpu_id >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++ product_id_mask = GPU_ID2_PRODUCT_MODEL >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++ ++ for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) { ++ const struct gpu_product_id_name *p = &gpu_product_id_names[i]; ++ ++ if ((p->id & product_id_mask) == ++ (product_id & product_id_mask)) { ++ product_name = p->name; ++ break; ++ } ++ } + +#if MALI_USE_CSF -+ u64 debug_core_mask; -+#else -+ /* One mask per job slot. */ -+ u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS]; -+ u64 debug_core_mask_all; ++ if ((product_id & product_id_mask) == ++ ((GPU_ID2_PRODUCT_TTUX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT) & product_id_mask)) { ++ const bool rt_supported = ++ GPU_FEATURES_RAY_TRACING_GET(gpu_props->props.raw_props.gpu_features); ++ const u8 nr_cores = gpu_props->num_cores; ++ ++ /* Mali-G715-Immortalis if 10 < number of cores with ray tracing supproted. ++ * Mali-G715 if 10 < number of cores without ray tracing supported. ++ * Mali-G715 if 7 <= number of cores <= 10 regardless ray tracing. ++ * Mali-G615 if number of cores < 7. ++ */ ++ if ((nr_cores > 10) && rt_supported) ++ product_name = "Mali-G715-Immortalis"; ++ else if (nr_cores >= 7) ++ product_name = "Mali-G715"; ++ ++ if (nr_cores < 7) { ++ dev_warn(kbdev->dev, "nr_cores(%u) GPU ID must be G615", nr_cores); ++ product_name = "Mali-G615"; ++ } else ++ dev_dbg(kbdev->dev, "GPU ID_Name: %s, nr_cores(%u)\n", product_name, ++ nr_cores); ++ } +#endif /* MALI_USE_CSF */ + -+ int (*callback_power_runtime_init)(struct kbase_device *kbdev); -+ void (*callback_power_runtime_term)(struct kbase_device *kbdev); -+ u32 dvfs_period; -+ struct kbase_pm_backend_data backend; -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ struct kbase_arbiter_vm_state *arb_vm_state; -+ atomic_t gpu_users_waiting; -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ -+ struct kbase_clk_rate_trace_manager clk_rtm; -+}; ++ return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n", product_name, ++ kbdev->gpu_props.num_cores, ++ (gpu_id & GPU_ID_VERSION_MAJOR) >> KBASE_GPU_ID_VERSION_MAJOR_SHIFT, ++ (gpu_id & GPU_ID_VERSION_MINOR) >> KBASE_GPU_ID_VERSION_MINOR_SHIFT, ++ product_id); ++} ++static DEVICE_ATTR_RO(gpuinfo); + +/** -+ * struct kbase_mem_pool - Page based memory pool for kctx/kbdev -+ * @kbdev: Kbase device where memory is used -+ * @cur_size: Number of free pages currently in the pool (may exceed -+ * @max_size in some corner cases) -+ * @max_size: Maximum number of free pages in the pool -+ * @order: order = 0 refers to a pool of 4 KB pages -+ * order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB) -+ * @group_id: A memory group ID to be passed to a platform-specific -+ * memory group manager, if present. Immutable. -+ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). -+ * @pool_lock: Lock protecting the pool - must be held when modifying -+ * @cur_size and @page_list -+ * @page_list: List of free pages in the pool -+ * @reclaim: Shrinker for kernel reclaim of free pages -+ * @isolation_in_progress_cnt: Number of pages in pool undergoing page isolation. -+ * This is used to avoid race condition between pool termination -+ * and page isolation for page migration. -+ * @next_pool: Pointer to next pool where pages can be allocated when this -+ * pool is empty. Pages will spill over to the next pool when -+ * this pool is full. Can be NULL if there is no next pool. -+ * @dying: true if the pool is being terminated, and any ongoing -+ * operations should be abandoned -+ * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from -+ * this pool, eg during a grow operation ++ * dvfs_period_store - Store callback for the dvfs_period sysfs file. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * This function is called when the dvfs_period sysfs file is written to. It ++ * checks the data written, and if valid updates the DVFS period variable, ++ * ++ * Return: @count if the function succeeded. An error code on failure. + */ -+struct kbase_mem_pool { ++static ssize_t dvfs_period_store(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ + struct kbase_device *kbdev; -+ size_t cur_size; -+ size_t max_size; -+ u8 order; -+ u8 group_id; -+ spinlock_t pool_lock; -+ struct list_head page_list; -+ struct shrinker reclaim; -+ atomic_t isolation_in_progress_cnt; ++ int ret; ++ int dvfs_period; + -+ struct kbase_mem_pool *next_pool; ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ bool dying; -+ bool dont_reclaim; -+}; ++ ret = kstrtoint(buf, 0, &dvfs_period); ++ if (ret || dvfs_period <= 0) { ++ dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n" ++ "Use format \n"); ++ return -EINVAL; ++ } + -+/** -+ * struct kbase_mem_pool_group - a complete set of physical memory pools. -+ * -+ * @small: Array of objects containing the state for pools of 4 KiB size -+ * physical pages. -+ * @large: Array of objects containing the state for pools of 2 MiB size -+ * physical pages. -+ * -+ * Memory pools are used to allow efficient reallocation of previously-freed -+ * physical pages. A pair of memory pools is initialized for each physical -+ * memory group: one for 4 KiB pages and one for 2 MiB pages. These arrays -+ * should be indexed by physical memory group ID, the meaning of which is -+ * defined by the systems integrator. -+ */ -+struct kbase_mem_pool_group { -+ struct kbase_mem_pool small[MEMORY_GROUP_MANAGER_NR_GROUPS]; -+ struct kbase_mem_pool large[MEMORY_GROUP_MANAGER_NR_GROUPS]; -+}; ++ kbdev->pm.dvfs_period = dvfs_period; ++ dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period); + -+/** -+ * struct kbase_mem_pool_config - Initial configuration for a physical memory -+ * pool -+ * -+ * @max_size: Maximum number of free pages that the pool can hold. -+ */ -+struct kbase_mem_pool_config { -+ size_t max_size; -+}; ++ return count; ++} + +/** -+ * struct kbase_mem_pool_group_config - Initial configuration for a complete -+ * set of physical memory pools ++ * dvfs_period_show - Show callback for the dvfs_period sysfs entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the GPU information. + * -+ * @small: Array of initial configuration for pools of 4 KiB pages. -+ * @large: Array of initial configuration for pools of 2 MiB pages. ++ * This function is called to get the current period used for the DVFS sample ++ * timer. + * -+ * This array should be indexed by physical memory group ID, the meaning -+ * of which is defined by the systems integrator. ++ * Return: The number of bytes output to @buf. + */ -+struct kbase_mem_pool_group_config { -+ struct kbase_mem_pool_config small[MEMORY_GROUP_MANAGER_NR_GROUPS]; -+ struct kbase_mem_pool_config large[MEMORY_GROUP_MANAGER_NR_GROUPS]; -+}; ++static ssize_t dvfs_period_show(struct device *dev, ++ struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *kbdev; ++ ssize_t ret; + -+/** -+ * struct kbase_devfreq_opp - Lookup table for converting between nominal OPP -+ * frequency, real frequencies and core mask -+ * @real_freqs: Real GPU frequencies. -+ * @opp_volts: OPP voltages. -+ * @opp_freq: Nominal OPP frequency -+ * @core_mask: Shader core mask -+ */ -+struct kbase_devfreq_opp { -+ u64 opp_freq; -+ u64 core_mask; -+ u64 real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; -+ u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS]; -+}; ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+/* MMU mode flags */ -+#define KBASE_MMU_MODE_HAS_NON_CACHEABLE (1ul << 0) /* Has NON_CACHEABLE MEMATTR */ ++ ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period); ++ ++ return ret; ++} ++ ++static DEVICE_ATTR_RW(dvfs_period); ++ ++int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev) ++{ ++ /* Uses default reference frequency defined in below macro */ ++ u64 lowest_freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ; ++ ++ /* Only check lowest frequency in cases when OPPs are used and ++ * present in the device tree. ++ */ ++#ifdef CONFIG_PM_OPP ++ struct dev_pm_opp *opp_ptr; ++ unsigned long found_freq = 0; ++ ++ /* find lowest frequency OPP */ ++ opp_ptr = dev_pm_opp_find_freq_ceil(kbdev->dev, &found_freq); ++ if (IS_ERR(opp_ptr)) { ++ dev_err(kbdev->dev, "No OPPs found in device tree! Scaling timeouts using %llu kHz", ++ (unsigned long long)lowest_freq_khz); ++ } else { ++#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE ++ dev_pm_opp_put(opp_ptr); /* decrease OPP refcount */ ++#endif ++ /* convert found frequency to KHz */ ++ found_freq /= 1000; ++ ++ /* If lowest frequency in OPP table is still higher ++ * than the reference, then keep the reference frequency ++ * as the one to use for scaling . ++ */ ++ if (found_freq < lowest_freq_khz) ++ lowest_freq_khz = found_freq; ++ } ++#else ++ dev_err(kbdev->dev, "No operating-points-v2 node or operating-points property in DT"); ++#endif ++ ++ kbdev->lowest_gpu_freq_khz = lowest_freq_khz; ++ dev_dbg(kbdev->dev, "Lowest frequency identified is %llu kHz", kbdev->lowest_gpu_freq_khz); ++ return 0; ++} + +/** -+ * struct kbase_mmu_mode - object containing pointer to methods invoked for -+ * programming the MMU, as per the MMU mode supported -+ * by Hw. -+ * @update: enable & setup/configure one of the GPU address space. -+ * @get_as_setup: retrieve the configuration of one of the GPU address space. -+ * @disable_as: disable one of the GPU address space. -+ * @pte_to_phy_addr: retrieve the physical address encoded in the page table entry. -+ * @ate_is_valid: check if the pte is a valid address translation entry -+ * encoding the physical address of the actual mapped page. -+ * @pte_is_valid: check if the pte is a valid entry encoding the physical -+ * address of the next lower level page table. -+ * @entry_set_ate: program the pte to be a valid address translation entry to -+ * encode the physical address of the actual page being mapped. -+ * @entry_set_pte: program the pte to be a valid entry to encode the physical -+ * address of the next lower level page table and also update -+ * the number of valid entries. -+ * @entries_invalidate: clear out or invalidate a range of ptes. -+ * @get_num_valid_entries: returns the number of valid entries for a specific pgd. -+ * @set_num_valid_entries: sets the number of valid entries for a specific pgd -+ * @flags: bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants. ++ * pm_poweroff_store - Store callback for the pm_poweroff sysfs file. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * This function is called when the pm_poweroff sysfs file is written to. ++ * ++ * This file contains three values separated by whitespace. The values ++ * are gpu_poweroff_time (the period of the poweroff timer, in ns), ++ * poweroff_shader_ticks (the number of poweroff timer ticks before an idle ++ * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer ++ * ticks before the GPU is powered off), in that order. ++ * ++ * Return: @count if the function succeeded. An error code on failure. + */ -+struct kbase_mmu_mode { -+ void (*update)(struct kbase_device *kbdev, -+ struct kbase_mmu_table *mmut, -+ int as_nr); -+ void (*get_as_setup)(struct kbase_mmu_table *mmut, -+ struct kbase_mmu_setup * const setup); -+ void (*disable_as)(struct kbase_device *kbdev, int as_nr); -+ phys_addr_t (*pte_to_phy_addr)(u64 entry); -+ int (*ate_is_valid)(u64 ate, int level); -+ int (*pte_is_valid)(u64 pte, int level); -+ void (*entry_set_ate)(u64 *entry, struct tagged_addr phy, -+ unsigned long flags, int level); -+ void (*entry_set_pte)(u64 *entry, phys_addr_t phy); -+ void (*entries_invalidate)(u64 *entry, u32 count); -+ unsigned int (*get_num_valid_entries)(u64 *pgd); -+ void (*set_num_valid_entries)(u64 *pgd, -+ unsigned int num_of_valid_entries); ++static ssize_t pm_poweroff_store(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *kbdev; ++ struct kbasep_pm_tick_timer_state *stt; ++ int items; ++ u64 gpu_poweroff_time; ++ unsigned int poweroff_shader_ticks, poweroff_gpu_ticks; + unsigned long flags; -+}; + -+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+#define DEVNAME_SIZE 16 ++ items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time, ++ &poweroff_shader_ticks, ++ &poweroff_gpu_ticks); ++ if (items != 3) { ++ dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n" ++ "Use format \n"); ++ return -EINVAL; ++ } + -+/** -+ * enum kbase_devfreq_work_type - The type of work to perform in the devfreq -+ * suspend/resume worker. -+ * @DEVFREQ_WORK_NONE: Initilisation state. -+ * @DEVFREQ_WORK_SUSPEND: Call devfreq_suspend_device(). -+ * @DEVFREQ_WORK_RESUME: Call devfreq_resume_device(). -+ */ -+enum kbase_devfreq_work_type { -+ DEVFREQ_WORK_NONE, -+ DEVFREQ_WORK_SUSPEND, -+ DEVFREQ_WORK_RESUME -+}; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+/** -+ * struct kbase_devfreq_queue_info - Object representing an instance for managing -+ * the queued devfreq suspend/resume works. -+ * @workq: Workqueue for devfreq suspend/resume requests -+ * @work: Work item for devfreq suspend & resume -+ * @req_type: Requested work type to be performed by the devfreq -+ * suspend/resume worker -+ * @acted_type: Work type has been acted on by the worker, i.e. the -+ * internal recorded state of the suspend/resume -+ */ -+struct kbase_devfreq_queue_info { -+ struct workqueue_struct *workq; -+ struct work_struct work; -+ enum kbase_devfreq_work_type req_type; -+ enum kbase_devfreq_work_type acted_type; -+}; ++ stt = &kbdev->pm.backend.shader_tick_timer; ++ stt->configured_interval = HR_TIMER_DELAY_NSEC(gpu_poweroff_time); ++ stt->default_ticks = poweroff_shader_ticks; ++ stt->configured_ticks = stt->default_ticks; + -+/** -+ * struct kbase_process - Representing an object of a kbase process instantiated -+ * when the first kbase context is created under it. -+ * @tgid: Thread group ID. -+ * @total_gpu_pages: Total gpu pages allocated across all the contexts -+ * of this process, it accounts for both native allocations -+ * and dma_buf imported allocations. -+ * @kctx_list: List of kbase contexts created for the process. -+ * @kprcs_node: Node to a rb_tree, kbase_device will maintain a rb_tree -+ * based on key tgid, kprcs_node is the node link to -+ * &struct_kbase_device.process_root. -+ * @dma_buf_root: RB tree of the dma-buf imported allocations, imported -+ * across all the contexts created for this process. -+ * Used to ensure that pages of allocation are accounted -+ * only once for the process, even if the allocation gets -+ * imported multiple times for the process. -+ */ -+struct kbase_process { -+ pid_t tgid; -+ size_t total_gpu_pages; -+ struct list_head kctx_list; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ struct rb_node kprcs_node; -+ struct rb_root dma_buf_root; -+}; ++ if (poweroff_gpu_ticks != 0) ++ dev_warn(kbdev->dev, "Separate GPU poweroff delay no longer supported.\n"); ++ ++ return count; ++} + +/** -+ * struct kbase_mem_migrate - Object representing an instance for managing -+ * page migration. ++ * pm_poweroff_show - Show callback for the pm_poweroff sysfs entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the GPU information. + * -+ * @free_pages_list: List of deferred pages to free. Mostly used when page migration -+ * is enabled. Pages in memory pool that require migrating -+ * will be freed instead. However page cannot be freed -+ * right away as Linux will need to release the page lock. -+ * Therefore page will be added to this list and freed later. -+ * @free_pages_lock: This lock should be held when adding or removing pages -+ * from @free_pages_list. -+ * @free_pages_workq: Work queue to process the work items queued to free -+ * pages in @free_pages_list. -+ * @free_pages_work: Work item to free pages in @free_pages_list. -+ * @inode: Pointer to inode whose address space operations are used -+ * for page migration purposes. ++ * This function is called to get the current period used for the DVFS sample ++ * timer. ++ * ++ * Return: The number of bytes output to @buf. + */ -+struct kbase_mem_migrate { -+ struct list_head free_pages_list; -+ spinlock_t free_pages_lock; -+ struct workqueue_struct *free_pages_workq; -+ struct work_struct free_pages_work; -+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) -+ struct inode *inode; -+#endif -+}; ++static ssize_t pm_poweroff_show(struct device *dev, ++ struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *kbdev; ++ struct kbasep_pm_tick_timer_state *stt; ++ ssize_t ret; ++ unsigned long flags; + -+/** -+ * struct kbase_device - Object representing an instance of GPU platform device, -+ * allocated from the probe method of mali driver. -+ * @hw_quirks_sc: Configuration to be used for the shader cores as per -+ * the HW issues present in the GPU. -+ * @hw_quirks_tiler: Configuration to be used for the Tiler as per the HW -+ * issues present in the GPU. -+ * @hw_quirks_mmu: Configuration to be used for the MMU as per the HW -+ * issues present in the GPU. -+ * @hw_quirks_gpu: Configuration to be used for the Job Manager or CSF/MCU -+ * subsystems as per the HW issues present in the GPU. -+ * @entry: Links the device instance to the global list of GPU -+ * devices. The list would have as many entries as there -+ * are GPU device instances. -+ * @dev: Pointer to the kernel's generic/base representation -+ * of the GPU platform device. -+ * @mdev: Pointer to the miscellaneous device registered to -+ * provide Userspace access to kernel driver through the -+ * device file /dev/malixx. -+ * @reg_start: Base address of the region in physical address space -+ * where GPU registers have been mapped. -+ * @reg_size: Size of the region containing GPU registers -+ * @reg: Kernel virtual address of the region containing GPU -+ * registers, using which Driver will access the registers. -+ * @irqs: Array containing IRQ resource info for 3 types of -+ * interrupts : Job scheduling, MMU & GPU events (like -+ * power management, cache etc.) -+ * @irqs.irq: irq number -+ * @irqs.flags: irq flags -+ * @clocks: Pointer to the input clock resources referenced by -+ * the GPU device node. -+ * @scmi_clk: Pointer to the input scmi clock resources -+ * @nr_clocks: Number of clocks set in the clocks array. -+ * @regulators: Pointer to the structs corresponding to the -+ * regulators referenced by the GPU device node. -+ * @nr_regulators: Number of regulators set in the regulators array. -+ * @opp_table: Pointer to the device OPP structure maintaining the -+ * link to OPPs attached to a device. This is obtained -+ * after setting regulator names for the device. -+ * @token: Integer replacement for opp_table in kernel versions -+ * 6 and greater. Value is a token id number when 0 or greater, -+ * and a linux errno when negative. Must be initialised -+ * to an non-zero value as 0 is valid token id. -+ * @devname: string containing the name used for GPU device instance, -+ * miscellaneous device is registered using the same name. -+ * @id: Unique identifier for the device, indicates the number of -+ * devices which have been created so far. -+ * @model: Pointer, valid only when Driver is compiled to not access -+ * the real GPU Hw, to the dummy model which tries to mimic -+ * to some extent the state & behavior of GPU Hw in response -+ * to the register accesses made by the Driver. -+ * @irq_slab: slab cache for allocating the work items queued when -+ * model mimics raising of IRQ to cause an interrupt on CPU. -+ * @irq_workq: workqueue for processing the irq work items. -+ * @serving_job_irq: function to execute work items queued when model mimics -+ * the raising of JS irq, mimics the interrupt handler -+ * processing JS interrupts. -+ * @serving_gpu_irq: function to execute work items queued when model mimics -+ * the raising of GPU irq, mimics the interrupt handler -+ * processing GPU interrupts. -+ * @serving_mmu_irq: function to execute work items queued when model mimics -+ * the raising of MMU irq, mimics the interrupt handler -+ * processing MMU interrupts. -+ * @reg_op_lock: lock used by model to serialize the handling of register -+ * accesses made by the driver. -+ * @pm: Per device object for storing data for power management -+ * framework. -+ * @fw_load_lock: Mutex to protect firmware loading in @ref kbase_open. -+ * @csf: CSF object for the GPU device. -+ * @js_data: Per device object encapsulating the current context of -+ * Job Scheduler, which is global to the device and is not -+ * tied to any particular struct kbase_context running on -+ * the device -+ * @mem_pools: Global pools of free physical memory pages which can -+ * be used by all the contexts. -+ * @memdev: keeps track of the in use physical pages allocated by -+ * the Driver. -+ * @mmu_mode: Pointer to the object containing methods for programming -+ * the MMU, depending on the type of MMU supported by Hw. -+ * @mgm_dev: Pointer to the memory group manager device attached -+ * to the GPU device. This points to an internal memory -+ * group manager if no platform-specific memory group -+ * manager was retrieved through device tree. -+ * @as: Array of objects representing address spaces of GPU. -+ * @as_free: Bitpattern of free/available GPU address spaces. -+ * @as_to_kctx: Array of pointers to struct kbase_context, having -+ * GPU adrress spaces assigned to them. -+ * @mmu_mask_change: Lock to serialize the access to MMU interrupt mask -+ * register used in the handling of Bus & Page faults. -+ * @pagesize_2mb: Boolean to determine whether 2MiB page sizes are -+ * supported and used where possible. -+ * @gpu_props: Object containing complete information about the -+ * configuration/properties of GPU HW device in use. -+ * @hw_issues_mask: List of SW workarounds for HW issues -+ * @hw_features_mask: List of available HW features. -+ * @disjoint_event: struct for keeping track of the disjoint information, -+ * that whether the GPU is in a disjoint state and the -+ * number of disjoint events that have occurred on GPU. -+ * @disjoint_event.count: disjoint event count -+ * @disjoint_event.state: disjoint event state -+ * @nr_hw_address_spaces: Number of address spaces actually available in the -+ * GPU, remains constant after driver initialisation. -+ * @nr_user_address_spaces: Number of address spaces available to user contexts -+ * @hwcnt_backend_csf_if_fw: Firmware interface to access CSF GPU performance -+ * counters. -+ * @hwcnt: Structure used for instrumentation and HW counters -+ * dumping -+ * @hwcnt.lock: The lock should be used when accessing any of the -+ * following members -+ * @hwcnt.kctx: kbase context -+ * @hwcnt.addr: HW counter address -+ * @hwcnt.addr_bytes: HW counter size in bytes -+ * @hwcnt.backend: Kbase instrumentation backend -+ * @hwcnt_gpu_jm_backend: Job manager GPU backend interface, used as superclass reference -+ * pointer by hwcnt_gpu_iface, which wraps this implementation in -+ * order to extend it with periodic dumping functionality. -+ * @hwcnt_gpu_iface: Backend interface for GPU hardware counter access. -+ * @hwcnt_watchdog_timer: Watchdog interface, used by the GPU backend hwcnt_gpu_iface to -+ * perform periodic dumps in order to prevent hardware counter value -+ * overflow or saturation. -+ * @hwcnt_gpu_ctx: Context for GPU hardware counter access. -+ * @hwaccess_lock must be held when calling -+ * kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx. -+ * @hwcnt_gpu_virt: Virtualizer for GPU hardware counters. -+ * @vinstr_ctx: vinstr context created per device. -+ * @kinstr_prfcnt_ctx: kinstr_prfcnt context created per device. -+ * @timeline_flags: Bitmask defining which sets of timeline tracepoints -+ * are enabled. If zero, there is no timeline client and -+ * therefore timeline is disabled. -+ * @timeline: Timeline context created per device. -+ * @ktrace: kbase device's ktrace -+ * @reset_timeout_ms: Number of milliseconds to wait for the soft stop to -+ * complete for the GPU jobs before proceeding with the -+ * GPU reset. -+ * @lowest_gpu_freq_khz: Lowest frequency in KHz that the GPU can run at. Used -+ * to calculate suitable timeouts for wait operations. -+ * @backend_time: Kbase backend time related attributes. -+ * @cache_clean_in_progress: Set when a cache clean has been started, and -+ * cleared when it has finished. This prevents multiple -+ * cache cleans being done simultaneously. -+ * @cache_clean_queued: Pended cache clean operations invoked while another is -+ * in progress. If this is not 0, another cache clean needs -+ * to be triggered immediately after completion of the -+ * current one. -+ * @cache_clean_wait: Signalled when a cache clean has finished. -+ * @platform_context: Platform specific private data to be accessed by -+ * platform specific config files only. -+ * @kctx_list: List of kbase_contexts created for the device, -+ * including any contexts that might be created for -+ * hardware counters. -+ * @kctx_list_lock: Lock protecting concurrent accesses to @kctx_list. -+ * @devfreq_profile: Describes devfreq profile for the Mali GPU device, passed -+ * to devfreq_add_device() to add devfreq feature to Mali -+ * GPU device. -+ * @devfreq: Pointer to devfreq structure for Mali GPU device, -+ * returned on the call to devfreq_add_device(). -+ * @current_freqs: The real frequencies, corresponding to -+ * @current_nominal_freq, at which the Mali GPU device -+ * is currently operating, as retrieved from -+ * @devfreq_table in the target callback of -+ * @devfreq_profile. -+ * @current_nominal_freq: The nominal frequency currently used for the Mali GPU -+ * device as retrieved through devfreq_recommended_opp() -+ * using the freq value passed as an argument to target -+ * callback of @devfreq_profile -+ * @current_voltages: The voltages corresponding to @current_nominal_freq, -+ * as retrieved from @devfreq_table in the target -+ * callback of @devfreq_profile. -+ * @current_core_mask: bitmask of shader cores that are currently desired & -+ * enabled, corresponding to @current_nominal_freq as -+ * retrieved from @devfreq_table in the target callback -+ * of @devfreq_profile. -+ * @devfreq_table: Pointer to the lookup table for converting between -+ * nominal OPP (operating performance point) frequency, -+ * and real frequency and core mask. This table is -+ * constructed according to operating-points-v2-mali -+ * table in devicetree. -+ * @num_opps: Number of operating performance points available for the Mali -+ * GPU device. -+ * @last_devfreq_metrics: last PM metrics -+ * @devfreq_queue: Per device object for storing data that manages devfreq -+ * suspend & resume request queue and the related items. -+ * @devfreq_cooling: Pointer returned on registering devfreq cooling device -+ * corresponding to @devfreq. -+ * @ipa_protection_mode_switched: is set to TRUE when GPU is put into protected -+ * mode. It is a sticky flag which is cleared by IPA -+ * once it has made use of information that GPU had -+ * previously entered protected mode. -+ * @ipa: Top level structure for IPA, containing pointers to both -+ * configured & fallback models. -+ * @ipa.lock: Access to this struct must be with ipa.lock held -+ * @ipa.configured_model: ipa model to use -+ * @ipa.fallback_model: ipa fallback model -+ * @ipa.last_metrics: Values of the PM utilization metrics from last time -+ * the power model was invoked. The utilization is -+ * calculated as the difference between last_metrics -+ * and the current values. -+ * @ipa.force_fallback_model: true if use of fallback model has been forced by -+ * the User -+ * @ipa.last_sample_time: Records the time when counters, used for dynamic -+ * energy estimation, were last sampled. -+ * @previous_frequency: Previous frequency of GPU clock used for -+ * BASE_HW_ISSUE_GPU2017_1336 workaround, This clock is -+ * restored when L2 is powered on. -+ * @job_fault_debug: Flag to control the dumping of debug data for job faults, -+ * set when the 'job_fault' debugfs file is opened. -+ * @mali_debugfs_directory: Root directory for the debugfs files created by the driver -+ * @debugfs_ctx_directory: Directory inside the @mali_debugfs_directory containing -+ * a sub-directory for every context. -+ * @debugfs_instr_directory: Instrumentation debugfs directory -+ * @debugfs_as_read_bitmap: bitmap of address spaces for which the bus or page fault -+ * has occurred. -+ * @job_fault_wq: Waitqueue to block the job fault dumping daemon till the -+ * occurrence of a job fault. -+ * @job_fault_resume_wq: Waitqueue on which every context with a faulty job wait -+ * for the job fault dumping to complete before they can -+ * do bottom half of job done for the atoms which followed -+ * the faulty atom. -+ * @job_fault_resume_workq: workqueue to process the work items queued for the faulty -+ * atoms, whereby the work item function waits for the dumping -+ * to get completed. -+ * @job_fault_event_list: List of atoms, each belonging to a different context, which -+ * generated a job fault. -+ * @job_fault_event_lock: Lock to protect concurrent accesses to @job_fault_event_list -+ * @regs_dump_debugfs_data: Contains the offset of register to be read through debugfs -+ * file "read_register". -+ * @regs_dump_debugfs_data.reg_offset: Contains the offset of register to be -+ * read through debugfs file "read_register". -+ * @ctx_num: Total number of contexts created for the device. -+ * @io_history: Pointer to an object keeping a track of all recent -+ * register accesses. The history of register accesses -+ * can be read through "regs_history" debugfs file. -+ * @hwaccess: Contains a pointer to active kbase context and GPU -+ * backend specific data for HW access layer. -+ * @faults_pending: Count of page/bus faults waiting for bottom half processing -+ * via workqueues. -+ * @mmu_hw_operation_in_progress: Set before sending the MMU command and is -+ * cleared after the command is complete. Whilst this -+ * flag is set, the write to L2_PWROFF register will be -+ * skipped which is needed to workaround the HW issue -+ * GPU2019-3878. PM state machine is invoked after -+ * clearing this flag and @hwaccess_lock is used to -+ * serialize the access. -+ * @mmu_page_migrate_in_progress: Set before starting a MMU page migration transaction -+ * and cleared after the transaction completes. PM L2 state is -+ * prevented from entering powering up/down transitions when the -+ * flag is set, @hwaccess_lock is used to serialize the access. -+ * @poweroff_pending: Set when power off operation for GPU is started, reset when -+ * power on for GPU is started. -+ * @infinite_cache_active_default: Set to enable using infinite cache for all the -+ * allocations of a new context. -+ * @mem_pool_defaults: Default configuration for the group of memory pools -+ * created for a new context. -+ * @current_gpu_coherency_mode: coherency mode in use, which can be different -+ * from @system_coherency, when using protected mode. -+ * @system_coherency: coherency mode as retrieved from the device tree. -+ * @cci_snoop_enabled: Flag to track when CCI snoops have been enabled. -+ * @snoop_enable_smc: SMC function ID to call into Trusted firmware to -+ * enable cache snooping. Value of 0 indicates that it -+ * is not used. -+ * @snoop_disable_smc: SMC function ID to call disable cache snooping. -+ * @protected_ops: Pointer to the methods for switching in or out of the -+ * protected mode, as per the @protected_dev being used. -+ * @protected_dev: Pointer to the protected mode switcher device attached -+ * to the GPU device retrieved through device tree if -+ * GPU do not support protected mode switching natively. -+ * @protected_mode: set to TRUE when GPU is put into protected mode -+ * @protected_mode_transition: set to TRUE when GPU is transitioning into or -+ * out of protected mode. -+ * @protected_mode_hwcnt_desired: True if we want GPU hardware counters to be -+ * enabled. Counters must be disabled before transition -+ * into protected mode. -+ * @protected_mode_hwcnt_disabled: True if GPU hardware counters are not -+ * enabled. -+ * @protected_mode_hwcnt_disable_work: Work item to disable GPU hardware -+ * counters, used if atomic disable is not possible. -+ * @irq_reset_flush: Flag to indicate that GPU reset is in-flight and flush of -+ * IRQ + bottom half is being done, to prevent the writes -+ * to MMU_IRQ_CLEAR & MMU_IRQ_MASK registers. -+ * @inited_subsys: Bitmap of inited sub systems at the time of device probe. -+ * Used during device remove or for handling error in probe. -+ * @hwaccess_lock: Lock, which can be taken from IRQ context, to serialize -+ * the updates made to Job dispatcher + scheduler states. -+ * @mmu_hw_mutex: Protects access to MMU operations and address space -+ * related state. -+ * @serialize_jobs: Currently used mode for serialization of jobs, both -+ * intra & inter slots serialization is supported. -+ * @backup_serialize_jobs: Copy of the original value of @serialize_jobs taken -+ * when GWT is enabled. Used to restore the original value -+ * on disabling of GWT. -+ * @js_ctx_scheduling_mode: Context scheduling mode currently being used by -+ * Job Scheduler -+ * @l2_size_override: Used to set L2 cache size via device tree blob -+ * @l2_hash_override: Used to set L2 cache hash via device tree blob -+ * @l2_hash_values_override: true if @l2_hash_values is valid. -+ * @l2_hash_values: Used to set L2 asn_hash via device tree blob -+ * @sysc_alloc: Array containing values to be programmed into -+ * SYSC_ALLOC[0..7] GPU registers on L2 cache -+ * power down. These come from either DTB or -+ * via DebugFS (if it is available in kernel). -+ * @process_root: rb_tree root node for maintaining a rb_tree of -+ * kbase_process based on key tgid(thread group ID). -+ * @dma_buf_root: rb_tree root node for maintaining a rb_tree of -+ * &struct kbase_dma_buf based on key dma_buf. -+ * We maintain a rb_tree of dma_buf mappings under -+ * kbase_device and kbase_process, one indicates a -+ * mapping and gpu memory usage at device level and -+ * other one at process level. -+ * @total_gpu_pages: Total GPU pages used for the complete GPU device. -+ * @dma_buf_lock: This mutex should be held while accounting for -+ * @total_gpu_pages from imported dma buffers. -+ * @gpu_mem_usage_lock: This spinlock should be held while accounting -+ * @total_gpu_pages for both native and dma-buf imported -+ * allocations. -+ * @dummy_job_wa: struct for dummy job execution workaround for the -+ * GPU hang issue -+ * @dummy_job_wa.ctx: dummy job workaround context -+ * @dummy_job_wa.jc: dummy job workaround job -+ * @dummy_job_wa.slot: dummy job workaround slot -+ * @dummy_job_wa.flags: dummy job workaround flags -+ * @dummy_job_wa_loaded: Flag for indicating that the workaround blob has -+ * been loaded. Protected by @fw_load_lock. -+ * @arb: Pointer to the arbiter device -+ * @pcm_dev: The priority control manager device. -+ * @oom_notifier_block: notifier_block containing kernel-registered out-of- -+ * memory handler. -+ * @mem_migrate: Per device object for managing page migration. -+ * @live_fence_metadata: Count of live fence metadata structures created by -+ * KCPU queue. These structures may outlive kbase module -+ * itself. Therefore, in such a case, a warning should be -+ * be produced. -+ * @mmu_as_inactive_wait_time_ms: Maximum waiting time in ms for the completion of -+ * a MMU operation -+ * @va_region_slab: kmem_cache (slab) for allocated kbase_va_region structures. -+ */ -+struct kbase_device { -+ u32 hw_quirks_sc; -+ u32 hw_quirks_tiler; -+ u32 hw_quirks_mmu; -+ u32 hw_quirks_gpu; ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ struct list_head entry; -+ struct device *dev; -+ struct miscdevice mdev; -+ u64 reg_start; -+ size_t reg_size; -+ void __iomem *reg; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ struct { -+ int irq; -+ int flags; -+ } irqs[3]; ++ stt = &kbdev->pm.backend.shader_tick_timer; ++ ret = scnprintf(buf, PAGE_SIZE, "%llu %u 0\n", ++ ktime_to_ns(stt->configured_interval), ++ stt->default_ticks); + -+ struct clk *clocks[BASE_MAX_NR_CLOCKS_REGULATORS]; -+ unsigned int nr_clocks; -+#if IS_ENABLED(CONFIG_REGULATOR) -+ struct regulator *regulators[BASE_MAX_NR_CLOCKS_REGULATORS]; -+ unsigned int nr_regulators; -+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) -+ int token; -+#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) -+ struct opp_table *opp_table; -+#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ -+#endif /* CONFIG_REGULATOR */ -+ char devname[DEVNAME_SIZE]; -+ u32 id; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+#if !IS_ENABLED(CONFIG_MALI_REAL_HW) -+ void *model; -+ struct kmem_cache *irq_slab; -+ struct workqueue_struct *irq_workq; -+ atomic_t serving_job_irq; -+ atomic_t serving_gpu_irq; -+ atomic_t serving_mmu_irq; -+ spinlock_t reg_op_lock; -+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ -+ struct kbase_pm_device_data pm; ++ return ret; ++} + -+ struct kbase_mem_pool_group mem_pools; -+ struct kbasep_mem_device memdev; -+ struct kbase_mmu_mode const *mmu_mode; ++static DEVICE_ATTR_RW(pm_poweroff); + -+ struct memory_group_manager_device *mgm_dev; ++/** ++ * reset_timeout_store - Store callback for the reset_timeout sysfs file. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * This function is called when the reset_timeout sysfs file is written to. It ++ * checks the data written, and if valid updates the reset timeout. ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t reset_timeout_store(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *kbdev; ++ int ret; ++ int reset_timeout; + -+ struct kbase_as as[BASE_MAX_NR_AS]; -+ u16 as_free; -+ struct kbase_context *as_to_kctx[BASE_MAX_NR_AS]; ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ spinlock_t mmu_mask_change; ++ ret = kstrtoint(buf, 0, &reset_timeout); ++ if (ret || reset_timeout <= 0) { ++ dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n" ++ "Use format \n"); ++ return -EINVAL; ++ } + -+ bool pagesize_2mb; ++ kbdev->reset_timeout_ms = reset_timeout; ++ dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout); + -+ struct kbase_gpu_props gpu_props; ++ return count; ++} + -+ unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; -+ unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; ++/** ++ * reset_timeout_show - Show callback for the reset_timeout sysfs entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the GPU information. ++ * ++ * This function is called to get the current reset timeout. ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t reset_timeout_show(struct device *dev, ++ struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *kbdev; ++ ssize_t ret; + -+ struct { -+ atomic_t count; -+ atomic_t state; -+ } disjoint_event; ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ s8 nr_hw_address_spaces; -+ s8 nr_user_address_spaces; ++ ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms); + -+ /** -+ * @pbha_propagate_bits: Record of Page-Based Hardware Attribute Propagate bits to -+ * restore to L2_CONFIG upon GPU reset. -+ */ -+ u8 pbha_propagate_bits; ++ return ret; ++} + -+#if MALI_USE_CSF -+ struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw; -+#else -+ struct kbase_hwcnt { -+ spinlock_t lock; ++static DEVICE_ATTR_RW(reset_timeout); + -+ struct kbase_context *kctx; -+ u64 addr; -+ u64 addr_bytes; ++static ssize_t mem_pool_size_show(struct device *dev, ++ struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *const kbdev = to_kbase_device(dev); + -+ struct kbase_instr_backend backend; -+ } hwcnt; ++ if (!kbdev) ++ return -ENODEV; + -+ struct kbase_hwcnt_backend_interface hwcnt_gpu_jm_backend; -+#endif ++ return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, ++ kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, ++ kbase_mem_pool_debugfs_size); ++} + -+ struct kbase_hwcnt_backend_interface hwcnt_gpu_iface; -+ struct kbase_hwcnt_watchdog_interface hwcnt_watchdog_timer; ++static ssize_t mem_pool_size_store(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *const kbdev = to_kbase_device(dev); ++ int err; + -+ struct kbase_hwcnt_context *hwcnt_gpu_ctx; -+ struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt; -+ struct kbase_vinstr_context *vinstr_ctx; -+ struct kbase_kinstr_prfcnt_context *kinstr_prfcnt_ctx; ++ if (!kbdev) ++ return -ENODEV; + -+ atomic_t timeline_flags; -+ struct kbase_timeline *timeline; ++ err = kbase_debugfs_helper_set_attr_from_string(buf, ++ kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, ++ kbase_mem_pool_debugfs_trim); + -+#if KBASE_KTRACE_TARGET_RBUF -+ struct kbase_ktrace ktrace; -+#endif -+ u32 reset_timeout_ms; ++ return err ? err : count; ++} + -+ u64 lowest_gpu_freq_khz; ++static DEVICE_ATTR_RW(mem_pool_size); + -+#if MALI_USE_CSF -+ struct kbase_backend_time backend_time; -+#endif ++static ssize_t mem_pool_max_size_show(struct device *dev, ++ struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *const kbdev = to_kbase_device(dev); + -+ bool cache_clean_in_progress; -+ u32 cache_clean_queued; -+ wait_queue_head_t cache_clean_wait; ++ if (!kbdev) ++ return -ENODEV; + -+ void *platform_context; ++ return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, ++ kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, ++ kbase_mem_pool_debugfs_max_size); ++} + -+ struct list_head kctx_list; -+ struct mutex kctx_list_lock; ++static ssize_t mem_pool_max_size_store(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *const kbdev = to_kbase_device(dev); ++ int err; + -+ struct rockchip_opp_info opp_info; -+ bool is_runtime_resumed; -+ unsigned long current_nominal_freq; -+ struct monitor_dev_info *mdev_info; -+#ifdef CONFIG_MALI_BIFROST_DEVFREQ -+ struct devfreq_dev_profile devfreq_profile; -+ struct devfreq *devfreq; -+ unsigned long current_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; -+ unsigned long current_voltages[BASE_MAX_NR_CLOCKS_REGULATORS]; -+ u64 current_core_mask; -+ struct kbase_devfreq_opp *devfreq_table; -+ int num_opps; -+ struct kbasep_pm_metrics last_devfreq_metrics; -+ struct ipa_power_model_data *model_data; -+ struct kbase_devfreq_queue_info devfreq_queue; ++ if (!kbdev) ++ return -ENODEV; + -+#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) -+ struct devfreq_cooling_power dfc_power; -+ struct thermal_cooling_device *devfreq_cooling; -+ bool ipa_protection_mode_switched; -+ struct { -+ /* Access to this struct must be with ipa.lock held */ -+ struct mutex lock; -+ struct kbase_ipa_model *configured_model; -+ struct kbase_ipa_model *fallback_model; ++ err = kbase_debugfs_helper_set_attr_from_string(buf, ++ kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, ++ kbase_mem_pool_debugfs_set_max_size); + -+ /* Values of the PM utilization metrics from last time the -+ * power model was invoked. The utilization is calculated as -+ * the difference between last_metrics and the current values. -+ */ -+ struct kbasep_pm_metrics last_metrics; ++ return err ? err : count; ++} + -+ /* true if use of fallback model has been forced by the User */ -+ bool force_fallback_model; -+ /* Records the time when counters, used for dynamic energy -+ * estimation, were last sampled. -+ */ -+ ktime_t last_sample_time; -+ } ipa; -+#endif /* CONFIG_DEVFREQ_THERMAL */ -+#endif /* CONFIG_MALI_BIFROST_DEVFREQ */ -+ unsigned long previous_frequency; ++static DEVICE_ATTR_RW(mem_pool_max_size); + -+#if !MALI_USE_CSF -+ atomic_t job_fault_debug; -+#endif /* !MALI_USE_CSF */ ++/** ++ * lp_mem_pool_size_show - Show size of the large memory pages pool. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the pool size. ++ * ++ * This function is called to get the number of large memory pages which currently populate the kbdev pool. ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t lp_mem_pool_size_show(struct device *dev, ++ struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *const kbdev = to_kbase_device(dev); + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ struct dentry *mali_debugfs_directory; -+ struct dentry *debugfs_ctx_directory; -+ struct dentry *debugfs_instr_directory; ++ if (!kbdev) ++ return -ENODEV; + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ u64 debugfs_as_read_bitmap; -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++ return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, ++ kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, ++ kbase_mem_pool_debugfs_size); ++} + -+#if !MALI_USE_CSF -+ wait_queue_head_t job_fault_wq; -+ wait_queue_head_t job_fault_resume_wq; -+ struct workqueue_struct *job_fault_resume_workq; -+ struct list_head job_fault_event_list; -+ spinlock_t job_fault_event_lock; -+#endif /* !MALI_USE_CSF */ ++/** ++ * lp_mem_pool_size_store - Set size of the large memory pages pool. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The value written to the sysfs file. ++ * @count: The number of bytes written to the sysfs file. ++ * ++ * This function is called to set the number of large memory pages which should populate the kbdev pool. ++ * This may cause existing pages to be removed from the pool, or new pages to be created and then added to the pool. ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t lp_mem_pool_size_store(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *const kbdev = to_kbase_device(dev); ++ int err; + -+#if !MALI_CUSTOMER_RELEASE -+ struct { -+ u32 reg_offset; -+ } regs_dump_debugfs_data; -+#endif /* !MALI_CUSTOMER_RELEASE */ -+#endif /* CONFIG_DEBUG_FS */ ++ if (!kbdev) ++ return -ENODEV; + -+ atomic_t ctx_num; ++ err = kbase_debugfs_helper_set_attr_from_string(buf, ++ kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, ++ kbase_mem_pool_debugfs_trim); + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ struct kbase_io_history io_history; -+#endif /* CONFIG_DEBUG_FS */ ++ return err ? err : count; ++} + -+ struct kbase_hwaccess_data hwaccess; ++static DEVICE_ATTR_RW(lp_mem_pool_size); + -+ atomic_t faults_pending; ++/** ++ * lp_mem_pool_max_size_show - Show maximum size of the large memory pages pool. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the pool size. ++ * ++ * This function is called to get the maximum number of large memory pages that the kbdev pool can possibly contain. ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t lp_mem_pool_max_size_show(struct device *dev, ++ struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *const kbdev = to_kbase_device(dev); + -+#if MALI_USE_CSF -+ bool mmu_hw_operation_in_progress; -+#endif -+ bool mmu_page_migrate_in_progress; -+ bool poweroff_pending; ++ if (!kbdev) ++ return -ENODEV; + -+ bool infinite_cache_active_default; ++ return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, ++ kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, ++ kbase_mem_pool_debugfs_max_size); ++} + -+ struct kbase_mem_pool_group_config mem_pool_defaults; ++/** ++ * lp_mem_pool_max_size_store - Set maximum size of the large memory pages pool. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The value written to the sysfs file. ++ * @count: The number of bytes written to the sysfs file. ++ * ++ * This function is called to set the maximum number of large memory pages that the kbdev pool can possibly contain. ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t lp_mem_pool_max_size_store(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *const kbdev = to_kbase_device(dev); ++ int err; + -+ u32 current_gpu_coherency_mode; -+ u32 system_coherency; ++ if (!kbdev) ++ return -ENODEV; + -+ bool cci_snoop_enabled; ++ err = kbase_debugfs_helper_set_attr_from_string(buf, ++ kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, ++ kbase_mem_pool_debugfs_set_max_size); + -+ u32 snoop_enable_smc; -+ u32 snoop_disable_smc; ++ return err ? err : count; ++} + -+ const struct protected_mode_ops *protected_ops; ++static DEVICE_ATTR_RW(lp_mem_pool_max_size); + -+ struct protected_mode_device *protected_dev; ++/** ++ * show_simplified_mem_pool_max_size - Show the maximum size for the memory ++ * pool 0 of small (4KiB) pages. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the max size. ++ * ++ * This function is called to get the maximum size for the memory pool 0 of ++ * small (4KiB) pages. It is assumed that the maximum size value is same for ++ * all the pools. ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t show_simplified_mem_pool_max_size(struct device *dev, ++ struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *const kbdev = to_kbase_device(dev); + -+ bool protected_mode; ++ if (!kbdev) ++ return -ENODEV; + -+ bool protected_mode_transition; ++ return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, ++ kbdev->mem_pools.small, 1, kbase_mem_pool_debugfs_max_size); ++} + -+ bool protected_mode_hwcnt_desired; ++/** ++ * set_simplified_mem_pool_max_size - Set the same maximum size for all the ++ * memory pools of small (4KiB) pages. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * This function is called to set the same maximum size for all the memory ++ * pools of small (4KiB) pages. ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t set_simplified_mem_pool_max_size(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *const kbdev = to_kbase_device(dev); ++ unsigned long new_size; ++ int gid; ++ int err; + -+ bool protected_mode_hwcnt_disabled; ++ if (!kbdev) ++ return -ENODEV; + -+ struct work_struct protected_mode_hwcnt_disable_work; ++ err = kstrtoul(buf, 0, &new_size); ++ if (err) ++ return -EINVAL; + ++ for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) ++ kbase_mem_pool_debugfs_set_max_size( ++ kbdev->mem_pools.small, gid, (size_t)new_size); + -+ bool irq_reset_flush; ++ return count; ++} + -+ u32 inited_subsys; ++static DEVICE_ATTR(max_size, 0600, show_simplified_mem_pool_max_size, ++ set_simplified_mem_pool_max_size); + -+ spinlock_t hwaccess_lock; ++/** ++ * show_simplified_lp_mem_pool_max_size - Show the maximum size for the memory ++ * pool 0 of large (2MiB) pages. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the total current pool size. ++ * ++ * This function is called to get the maximum size for the memory pool 0 of ++ * large (2MiB) pages. It is assumed that the maximum size value is same for ++ * all the pools. ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t show_simplified_lp_mem_pool_max_size(struct device *dev, ++ struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *const kbdev = to_kbase_device(dev); + -+ struct mutex mmu_hw_mutex; ++ if (!kbdev) ++ return -ENODEV; + -+ u8 l2_size_override; -+ u8 l2_hash_override; -+ bool l2_hash_values_override; -+ u32 l2_hash_values[ASN_HASH_COUNT]; ++ return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, ++ kbdev->mem_pools.large, 1, kbase_mem_pool_debugfs_max_size); ++} + -+ u32 sysc_alloc[SYSC_ALLOC_COUNT]; ++/** ++ * set_simplified_lp_mem_pool_max_size - Set the same maximum size for all the ++ * memory pools of large (2MiB) pages. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * This function is called to set the same maximum size for all the memory ++ * pools of large (2MiB) pages. ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t set_simplified_lp_mem_pool_max_size(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *const kbdev = to_kbase_device(dev); ++ unsigned long new_size; ++ int gid; ++ int err; + -+ struct mutex fw_load_lock; -+#if MALI_USE_CSF -+ /* CSF object for the GPU device. */ -+ struct kbase_csf_device csf; -+#else -+ struct kbasep_js_device_data js_data; ++ if (!kbdev) ++ return -ENODEV; + -+ /* See KBASE_JS_*_PRIORITY_MODE for details. */ -+ u32 js_ctx_scheduling_mode; ++ err = kstrtoul(buf, 0, &new_size); ++ if (err) ++ return -EINVAL; + -+ /* See KBASE_SERIALIZE_* for details */ -+ u8 serialize_jobs; ++ for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) ++ kbase_mem_pool_debugfs_set_max_size( ++ kbdev->mem_pools.large, gid, (size_t)new_size); + -+#ifdef CONFIG_MALI_CINSTR_GWT -+ u8 backup_serialize_jobs; -+#endif /* CONFIG_MALI_CINSTR_GWT */ ++ return count; ++} + -+#endif /* MALI_USE_CSF */ ++static DEVICE_ATTR(lp_max_size, 0600, show_simplified_lp_mem_pool_max_size, ++ set_simplified_lp_mem_pool_max_size); + -+ struct rb_root process_root; -+ struct rb_root dma_buf_root; ++/** ++ * show_simplified_ctx_default_max_size - Show the default maximum size for the ++ * memory pool 0 of small (4KiB) pages. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the pool size. ++ * ++ * This function is called to get the default ctx maximum size for the memory ++ * pool 0 of small (4KiB) pages. It is assumed that maximum size value is same ++ * for all the pools. The maximum size for the pool of large (2MiB) pages will ++ * be same as max size of the pool of small (4KiB) pages in terms of bytes. ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t show_simplified_ctx_default_max_size(struct device *dev, ++ struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *kbdev = to_kbase_device(dev); ++ size_t max_size; + -+ size_t total_gpu_pages; -+ struct mutex dma_buf_lock; -+ spinlock_t gpu_mem_usage_lock; ++ if (!kbdev) ++ return -ENODEV; + -+ struct { -+ struct kbase_context *ctx; -+ u64 jc; -+ int slot; -+ u64 flags; -+ } dummy_job_wa; -+ bool dummy_job_wa_loaded; ++ max_size = kbase_mem_pool_config_debugfs_max_size( ++ kbdev->mem_pool_defaults.small, 0); + -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ struct kbase_arbiter_device arb; -+#endif -+ /* Priority Control Manager device */ -+ struct priority_control_manager_device *pcm_dev; ++ return scnprintf(buf, PAGE_SIZE, "%zu\n", max_size); ++} + -+ struct notifier_block oom_notifier_block; ++/** ++ * set_simplified_ctx_default_max_size - Set the same default maximum size for ++ * all the pools created for new ++ * contexts. This covers the pool of ++ * large pages as well and its max size ++ * will be same as max size of the pool ++ * of small pages in terms of bytes. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The value written to the sysfs file. ++ * @count: The number of bytes written to the sysfs file. ++ * ++ * This function is called to set the same maximum size for all pools created ++ * for new contexts. ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t set_simplified_ctx_default_max_size(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *kbdev; ++ unsigned long new_size; ++ int err; + -+#if !MALI_USE_CSF -+ spinlock_t quick_reset_lock; -+ bool quick_reset_enabled; -+ /* -+ * 进入 quck_reset_mode åŽ (quick_reset_enabled 为 true), -+ * 对已ç»è¿›å…¥ KBASE_JD_ATOM_STATE_HW_COMPLETED 状æ€çš„ atom 的计数. -+ * -+ * è‹¥ num_of_atoms_hw_completed 达到一定值, 将退出 quck_reset_mode. -+ * è§ kbase_js_complete_atom() 对 num_of_atoms_hw_completed 的引用. -+ */ -+ u32 num_of_atoms_hw_completed; -+#endif ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ struct kbase_mem_migrate mem_migrate; ++ err = kstrtoul(buf, 0, &new_size); ++ if (err) ++ return -EINVAL; + -+#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) -+ atomic_t live_fence_metadata; -+#endif -+ u32 mmu_as_inactive_wait_time_ms; -+ struct kmem_cache *va_region_slab; -+}; ++ kbase_mem_pool_group_config_set_max_size( ++ &kbdev->mem_pool_defaults, (size_t)new_size); ++ ++ return count; ++} ++ ++static DEVICE_ATTR(ctx_default_max_size, 0600, ++ show_simplified_ctx_default_max_size, ++ set_simplified_ctx_default_max_size); + ++#if !MALI_USE_CSF +/** -+ * enum kbase_file_state - Initialization state of a file opened by @kbase_open ++ * js_ctx_scheduling_mode_show - Show callback for js_ctx_scheduling_mode sysfs ++ * entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the context scheduling mode information. + * -+ * @KBASE_FILE_NEED_VSN: Initial state, awaiting API version. -+ * @KBASE_FILE_VSN_IN_PROGRESS: Indicates if setting an API version is in -+ * progress and other setup calls shall be -+ * rejected. -+ * @KBASE_FILE_NEED_CTX: Indicates if the API version handshake has -+ * completed, awaiting context creation flags. -+ * @KBASE_FILE_CTX_IN_PROGRESS: Indicates if the context's setup is in progress -+ * and other setup calls shall be rejected. -+ * @KBASE_FILE_COMPLETE: Indicates if the setup for context has -+ * completed, i.e. flags have been set for the -+ * context. ++ * This function is called to get the context scheduling mode being used by JS. + * -+ * The driver allows only limited interaction with user-space until setup -+ * is complete. ++ * Return: The number of bytes output to @buf. + */ -+enum kbase_file_state { -+ KBASE_FILE_NEED_VSN, -+ KBASE_FILE_VSN_IN_PROGRESS, -+ KBASE_FILE_NEED_CTX, -+ KBASE_FILE_CTX_IN_PROGRESS, -+ KBASE_FILE_COMPLETE -+}; ++static ssize_t js_ctx_scheduling_mode_show(struct device *dev, ++ struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *kbdev; ++ ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; ++ ++ return scnprintf(buf, PAGE_SIZE, "%u\n", kbdev->js_ctx_scheduling_mode); ++} + +/** -+ * struct kbase_file - Object representing a file opened by @kbase_open ++ * js_ctx_scheduling_mode_store - Set callback for js_ctx_scheduling_mode sysfs ++ * entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The value written to the sysfs file. ++ * @count: The number of bytes written to the sysfs file. + * -+ * @kbdev: Object representing an instance of GPU platform device, -+ * allocated from the probe method of the Mali driver. -+ * @filp: Pointer to the struct file corresponding to device file -+ * /dev/malixx instance, passed to the file's open method. -+ * @kctx: Object representing an entity, among which GPU is -+ * scheduled and which gets its own GPU address space. -+ * Invalid until @setup_state is KBASE_FILE_COMPLETE. -+ * @api_version: Contains the version number for User/kernel interface, -+ * used for compatibility check. Invalid until -+ * @setup_state is KBASE_FILE_NEED_CTX. -+ * @setup_state: Initialization state of the file. Values come from -+ * the kbase_file_state enumeration. ++ * This function is called when the js_ctx_scheduling_mode sysfs file is written ++ * to. It checks the data written, and if valid updates the ctx scheduling mode ++ * being by JS. ++ * ++ * Return: @count if the function succeeded. An error code on failure. + */ -+struct kbase_file { -+ struct kbase_device *kbdev; -+ struct file *filp; ++static ssize_t js_ctx_scheduling_mode_store(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ + struct kbase_context *kctx; -+ unsigned long api_version; -+ atomic_t setup_state; ++ u32 new_js_ctx_scheduling_mode; ++ struct kbase_device *kbdev; ++ unsigned long flags; ++ int ret; ++ ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; ++ ++ ret = kstrtouint(buf, 0, &new_js_ctx_scheduling_mode); ++ if (ret || new_js_ctx_scheduling_mode >= KBASE_JS_PRIORITY_MODE_COUNT) { ++ dev_err(kbdev->dev, "Couldn't process js_ctx_scheduling_mode" ++ " write operation.\n" ++ "Use format \n"); ++ return -EINVAL; ++ } ++ ++ if (new_js_ctx_scheduling_mode == kbdev->js_ctx_scheduling_mode) ++ return count; ++ ++ mutex_lock(&kbdev->kctx_list_lock); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ /* Update the context priority mode */ ++ kbdev->js_ctx_scheduling_mode = new_js_ctx_scheduling_mode; ++ ++ /* Adjust priority of all the contexts as per the new mode */ ++ list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) ++ kbase_js_update_ctx_priority(kctx); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->kctx_list_lock); ++ ++ dev_dbg(kbdev->dev, "JS ctx scheduling mode: %u\n", new_js_ctx_scheduling_mode); ++ ++ return count; ++} ++ ++static DEVICE_ATTR_RW(js_ctx_scheduling_mode); ++ ++/* Number of entries in serialize_jobs_settings[] */ ++#define NR_SERIALIZE_JOBS_SETTINGS 5 ++/* Maximum string length in serialize_jobs_settings[].name */ ++#define MAX_SERIALIZE_JOBS_NAME_LEN 16 ++ ++static struct ++{ ++ char *name; ++ u8 setting; ++} serialize_jobs_settings[NR_SERIALIZE_JOBS_SETTINGS] = { ++ {"none", 0}, ++ {"intra-slot", KBASE_SERIALIZE_INTRA_SLOT}, ++ {"inter-slot", KBASE_SERIALIZE_INTER_SLOT}, ++ {"full", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT}, ++ {"full-reset", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT | ++ KBASE_SERIALIZE_RESET} +}; -+#if MALI_JIT_PRESSURE_LIMIT_BASE ++ +/** -+ * enum kbase_context_flags - Flags for kbase contexts -+ * -+ * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit -+ * process on a 64-bit kernel. -+ * -+ * @KCTX_RUNNABLE_REF: Set when context is counted in -+ * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing. -+ * -+ * @KCTX_ACTIVE: Set when the context is active. ++ * update_serialize_jobs_setting - Update the serialization setting for the ++ * submission of GPU jobs. + * -+ * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this -+ * context. ++ * @kbdev: An instance of the GPU platform device, allocated from the probe ++ * method of the driver. ++ * @buf: Buffer containing the value written to the sysfs/debugfs file. ++ * @count: The number of bytes to write to the sysfs/debugfs file. + * -+ * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been -+ * initialized. ++ * This function is called when the serialize_jobs sysfs/debugfs file is ++ * written to. It matches the requested setting against the available settings ++ * and if a matching setting is found updates kbdev->serialize_jobs. + * -+ * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new -+ * allocations. Existing allocations will not change. ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t update_serialize_jobs_setting(struct kbase_device *kbdev, ++ const char *buf, size_t count) ++{ ++ int i; ++ bool valid = false; ++ ++ for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { ++ if (sysfs_streq(serialize_jobs_settings[i].name, buf)) { ++ kbdev->serialize_jobs = ++ serialize_jobs_settings[i].setting; ++ valid = true; ++ break; ++ } ++ } ++ ++ if (!valid) { ++ dev_err(kbdev->dev, "serialize_jobs: invalid setting"); ++ return -EINVAL; ++ } ++ ++ return count; ++} ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++/** ++ * kbasep_serialize_jobs_seq_debugfs_show - Show callback for the serialize_jobs ++ * debugfs file ++ * @sfile: seq_file pointer ++ * @data: Private callback data + * -+ * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs. ++ * This function is called to get the contents of the serialize_jobs debugfs ++ * file. This is a list of the available settings with the currently active one ++ * surrounded by square brackets. + * -+ * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept -+ * scheduled in. ++ * Return: 0 on success, or an error code on error ++ */ ++static int kbasep_serialize_jobs_seq_debugfs_show(struct seq_file *sfile, ++ void *data) ++{ ++ struct kbase_device *kbdev = sfile->private; ++ int i; ++ ++ CSTD_UNUSED(data); ++ ++ for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { ++ if (kbdev->serialize_jobs == serialize_jobs_settings[i].setting) ++ seq_printf(sfile, "[%s] ", ++ serialize_jobs_settings[i].name); ++ else ++ seq_printf(sfile, "%s ", ++ serialize_jobs_settings[i].name); ++ } ++ ++ seq_puts(sfile, "\n"); ++ ++ return 0; ++} ++ ++/** ++ * kbasep_serialize_jobs_debugfs_write - Store callback for the serialize_jobs ++ * debugfs file. ++ * @file: File pointer ++ * @ubuf: User buffer containing data to store ++ * @count: Number of bytes in user buffer ++ * @ppos: File position + * -+ * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool. -+ * This is only ever updated whilst the jsctx_mutex is held. ++ * This function is called when the serialize_jobs debugfs file is written to. ++ * It matches the requested setting against the available settings and if a ++ * matching setting is found updates kbdev->serialize_jobs. + * -+ * @KCTX_DYING: Set when the context process is in the process of being evicted. ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file, ++ const char __user *ubuf, size_t count, loff_t *ppos) ++{ ++ struct seq_file *s = file->private_data; ++ struct kbase_device *kbdev = s->private; ++ char buf[MAX_SERIALIZE_JOBS_NAME_LEN]; ++ ++ CSTD_UNUSED(ppos); ++ ++ count = min_t(size_t, sizeof(buf) - 1, count); ++ if (copy_from_user(buf, ubuf, count)) ++ return -EFAULT; ++ ++ buf[count] = 0; ++ ++ return update_serialize_jobs_setting(kbdev, buf, count); ++} ++ ++/** ++ * kbasep_serialize_jobs_debugfs_open - Open callback for the serialize_jobs ++ * debugfs file ++ * @in: inode pointer ++ * @file: file pointer + * -+ * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory -+ * allocations. For 64-bit clients it is enabled by default, and disabled by -+ * default on 32-bit clients. Being able to clear this flag is only used for -+ * testing purposes of the custom zone allocation on 64-bit user-space builds, -+ * where we also require more control than is available through e.g. the JIT -+ * allocation mechanism. However, the 64-bit user-space client must still -+ * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT ++ * Return: Zero on success, error code on failure ++ */ ++static int kbasep_serialize_jobs_debugfs_open(struct inode *in, ++ struct file *file) ++{ ++ return single_open(file, kbasep_serialize_jobs_seq_debugfs_show, ++ in->i_private); ++} ++ ++static const struct file_operations kbasep_serialize_jobs_debugfs_fops = { ++ .owner = THIS_MODULE, ++ .open = kbasep_serialize_jobs_debugfs_open, ++ .read = seq_read, ++ .write = kbasep_serialize_jobs_debugfs_write, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++#endif /* CONFIG_DEBUG_FS */ ++ ++/** ++ * show_serialize_jobs_sysfs - Show callback for serialize_jobs sysfs file. + * -+ * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled -+ * from it for job slot 0. This is reset when the context first goes active or -+ * is re-activated on that slot. ++ * @dev: The device this sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The output buffer for the sysfs file contents + * -+ * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled -+ * from it for job slot 1. This is reset when the context first goes active or -+ * is re-activated on that slot. ++ * This function is called to get the contents of the serialize_jobs sysfs ++ * file. This is a list of the available settings with the currently active ++ * one surrounded by square brackets. + * -+ * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled -+ * from it for job slot 2. This is reset when the context first goes active or -+ * is re-activated on that slot. ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t show_serialize_jobs_sysfs(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct kbase_device *kbdev = to_kbase_device(dev); ++ ssize_t ret = 0; ++ int i; ++ ++ for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { ++ if (kbdev->serialize_jobs == ++ serialize_jobs_settings[i].setting) ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s]", ++ serialize_jobs_settings[i].name); ++ else ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", ++ serialize_jobs_settings[i].name); ++ } ++ ++ if (ret < PAGE_SIZE - 1) { ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); ++ } else { ++ buf[PAGE_SIZE - 2] = '\n'; ++ buf[PAGE_SIZE - 1] = '\0'; ++ ret = PAGE_SIZE - 1; ++ } ++ ++ return ret; ++} ++ ++/** ++ * store_serialize_jobs_sysfs - Store callback for serialize_jobs sysfs file. + * -+ * @KCTX_AS_DISABLED_ON_FAULT: Set when the GPU address space is disabled for -+ * the context due to unhandled page(or bus) fault. It is cleared when the -+ * refcount for the context drops to 0 or on when the address spaces are -+ * re-enabled on GPU reset or power cycle. ++ * @dev: The device this sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes to write to the sysfs file + * -+ * @KCTX_JPL_ENABLED: Set when JIT physical page limit is less than JIT virtual -+ * address page limit, so we must take care to not exceed the physical limit ++ * This function is called when the serialize_jobs sysfs file is written to. ++ * It matches the requested setting against the available settings and if a ++ * matching setting is found updates kbdev->serialize_jobs. + * -+ * All members need to be separate bits. This enum is intended for use in a -+ * bitmask where multiple values get OR-ed together. ++ * Return: @count if the function succeeded. An error code on failure. + */ -+enum kbase_context_flags { -+ KCTX_COMPAT = 1U << 0, -+ KCTX_RUNNABLE_REF = 1U << 1, -+ KCTX_ACTIVE = 1U << 2, -+ KCTX_PULLED = 1U << 3, -+ KCTX_MEM_PROFILE_INITIALIZED = 1U << 4, -+ KCTX_INFINITE_CACHE = 1U << 5, -+ KCTX_SUBMIT_DISABLED = 1U << 6, -+ KCTX_PRIVILEGED = 1U << 7, -+ KCTX_SCHEDULED = 1U << 8, -+ KCTX_DYING = 1U << 9, -+ KCTX_FORCE_SAME_VA = 1U << 11, -+ KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, -+ KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, -+ KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, -+ KCTX_AS_DISABLED_ON_FAULT = 1U << 15, -+ KCTX_JPL_ENABLED = 1U << 16, -+}; -+#else -+/** -+ * enum kbase_context_flags - Flags for kbase contexts -+ * -+ * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit -+ * process on a 64-bit kernel. -+ * -+ * @KCTX_RUNNABLE_REF: Set when context is counted in -+ * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing. -+ * -+ * @KCTX_ACTIVE: Set when the context is active. -+ * -+ * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this -+ * context. -+ * -+ * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been -+ * initialized. -+ * -+ * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new -+ * allocations. Existing allocations will not change. -+ * -+ * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs. -+ * -+ * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept -+ * scheduled in. -+ * -+ * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool. -+ * This is only ever updated whilst the jsctx_mutex is held. -+ * -+ * @KCTX_DYING: Set when the context process is in the process of being evicted. -+ * -+ * -+ * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory -+ * allocations. For 64-bit clients it is enabled by default, and disabled by -+ * default on 32-bit clients. Being able to clear this flag is only used for -+ * testing purposes of the custom zone allocation on 64-bit user-space builds, -+ * where we also require more control than is available through e.g. the JIT -+ * allocation mechanism. However, the 64-bit user-space client must still -+ * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT -+ * -+ * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled -+ * from it for job slot 0. This is reset when the context first goes active or -+ * is re-activated on that slot. -+ * -+ * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled -+ * from it for job slot 1. This is reset when the context first goes active or -+ * is re-activated on that slot. -+ * -+ * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled -+ * from it for job slot 2. This is reset when the context first goes active or -+ * is re-activated on that slot. -+ * -+ * @KCTX_AS_DISABLED_ON_FAULT: Set when the GPU address space is disabled for -+ * the context due to unhandled page(or bus) fault. It is cleared when the -+ * refcount for the context drops to 0 or on when the address spaces are -+ * re-enabled on GPU reset or power cycle. -+ * -+ * All members need to be separate bits. This enum is intended for use in a -+ * bitmask where multiple values get OR-ed together. -+ */ -+enum kbase_context_flags { -+ KCTX_COMPAT = 1U << 0, -+ KCTX_RUNNABLE_REF = 1U << 1, -+ KCTX_ACTIVE = 1U << 2, -+ KCTX_PULLED = 1U << 3, -+ KCTX_MEM_PROFILE_INITIALIZED = 1U << 4, -+ KCTX_INFINITE_CACHE = 1U << 5, -+ KCTX_SUBMIT_DISABLED = 1U << 6, -+ KCTX_PRIVILEGED = 1U << 7, -+ KCTX_SCHEDULED = 1U << 8, -+ KCTX_DYING = 1U << 9, -+ KCTX_FORCE_SAME_VA = 1U << 11, -+ KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, -+ KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, -+ KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, -+ KCTX_AS_DISABLED_ON_FAULT = 1U << 15, -+}; -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -+ -+struct kbase_sub_alloc { -+ struct list_head link; -+ struct page *page; -+ DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K); -+}; -+ -+/** -+ * struct kbase_context - Kernel base context -+ * -+ * @filp: Pointer to the struct file corresponding to device file -+ * /dev/malixx instance, passed to the file's open method. -+ * @kbdev: Pointer to the Kbase device for which the context is created. -+ * @kctx_list_link: Node into Kbase device list of contexts. -+ * @mmu: Structure holding details of the MMU tables for this -+ * context -+ * @id: Unique identifier for the context, indicates the number of -+ * contexts which have been created for the device so far. -+ * @api_version: contains the version number for User/kernel interface, -+ * used for compatibility check. -+ * @event_list: list of posted events about completed atoms, to be sent to -+ * event handling thread of Userpsace. -+ * @event_coalesce_list: list containing events corresponding to successive atoms -+ * which have requested deferred delivery of the completion -+ * events to Userspace. -+ * @event_mutex: Lock to protect the concurrent access to @event_list & -+ * @event_mutex. -+ * @event_closed: Flag set through POST_TERM ioctl, indicates that Driver -+ * should stop posting events and also inform event handling -+ * thread that context termination is in progress. -+ * @event_workq: Workqueue for processing work items corresponding to atoms -+ * that do not return an event to userspace. -+ * @event_count: Count of the posted events to be consumed by Userspace. -+ * @event_coalesce_count: Count of the events present in @event_coalesce_list. -+ * @flags: bitmap of enums from kbase_context_flags, indicating the -+ * state & attributes for the context. -+ * @aliasing_sink_page: Special page used for KBASE_MEM_TYPE_ALIAS allocations, -+ * which can alias number of memory regions. The page is -+ * represent a region where it is mapped with a write-alloc -+ * cache setup, typically used when the write result of the -+ * GPU isn't needed, but the GPU must write anyway. -+ * @mem_partials_lock: Lock for protecting the operations done on the elements -+ * added to @mem_partials list. -+ * @mem_partials: List head for the list of large pages, 2MB in size, which -+ * have been split into 4 KB pages and are used partially -+ * for the allocations >= 2 MB in size. -+ * @reg_lock: Lock used for GPU virtual address space management operations, -+ * like adding/freeing a memory region in the address space. -+ * Can be converted to a rwlock ?. -+ * @reg_rbtree_same: RB tree of the memory regions allocated from the SAME_VA -+ * zone of the GPU virtual address space. Used for allocations -+ * having the same value for GPU & CPU virtual address. -+ * @reg_rbtree_custom: RB tree of the memory regions allocated from the CUSTOM_VA -+ * zone of the GPU virtual address space. -+ * @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC_VA -+ * zone of the GPU virtual address space. Used for GPU-executable -+ * allocations which don't need the SAME_VA property. -+ * @reg_rbtree_exec_fixed: RB tree of the memory regions allocated from the -+ * EXEC_FIXED_VA zone of the GPU virtual address space. Used for -+ * GPU-executable allocations with FIXED/FIXABLE GPU virtual -+ * addresses. -+ * @reg_rbtree_fixed: RB tree of the memory regions allocated from the FIXED_VA zone -+ * of the GPU virtual address space. Used for allocations with -+ * FIXED/FIXABLE GPU virtual addresses. -+ * @num_fixable_allocs: A count for the number of memory allocations with the -+ * BASE_MEM_FIXABLE property. -+ * @num_fixed_allocs: A count for the number of memory allocations with the -+ * BASE_MEM_FIXED property. -+ * @reg_zone: Zone information for the reg_rbtree_<...> members. -+ * @cookies: Bitmask containing of BITS_PER_LONG bits, used mainly for -+ * SAME_VA allocations to defer the reservation of memory region -+ * (from the GPU virtual address space) from base_mem_alloc -+ * ioctl to mmap system call. This helps returning unique -+ * handles, disguised as GPU VA, to Userspace from base_mem_alloc -+ * and later retrieving the pointer to memory region structure -+ * in the mmap handler. -+ * @pending_regions: Array containing pointers to memory region structures, -+ * used in conjunction with @cookies bitmask mainly for -+ * providing a mechansim to have the same value for CPU & -+ * GPU virtual address. -+ * @event_queue: Wait queue used for blocking the thread, which consumes -+ * the base_jd_event corresponding to an atom, when there -+ * are no more posted events. -+ * @tgid: Thread group ID of the process whose thread created -+ * the context (by calling KBASE_IOCTL_VERSION_CHECK or -+ * KBASE_IOCTL_SET_FLAGS, depending on the @api_version). -+ * This is usually, but not necessarily, the same as the -+ * process whose thread opened the device file -+ * /dev/malixx instance. -+ * @pid: ID of the thread, corresponding to process @tgid, -+ * which actually created the context. This is usually, -+ * but not necessarily, the same as the thread which -+ * opened the device file /dev/malixx instance. -+ * @csf: kbase csf context -+ * @jctx: object encapsulating all the Job dispatcher related state, -+ * including the array of atoms. -+ * @used_pages: Keeps a track of the number of 4KB physical pages in use -+ * for the context. -+ * @nonmapped_pages: Updated in the same way as @used_pages, except for the case -+ * when special tracking page is freed by userspace where it -+ * is reset to 0. -+ * @permanent_mapped_pages: Usage count of permanently mapped memory -+ * @mem_pools: Context-specific pools of free physical memory pages. -+ * @reclaim: Shrinker object registered with the kernel containing -+ * the pointer to callback function which is invoked under -+ * low memory conditions. In the callback function Driver -+ * frees up the memory for allocations marked as -+ * evictable/reclaimable. -+ * @evict_list: List head for the list containing the allocations which -+ * can be evicted or freed up in the shrinker callback. -+ * @evict_nents: Total number of pages allocated by the allocations within -+ * @evict_list (atomic). -+ * @waiting_soft_jobs: List head for the list containing softjob atoms, which -+ * are either waiting for the event set operation, or waiting -+ * for the signaling of input fence or waiting for the GPU -+ * device to powered on so as to dump the CPU/GPU timestamps. -+ * @waiting_soft_jobs_lock: Lock to protect @waiting_soft_jobs list from concurrent -+ * accesses. -+ * @dma_fence: Object containing list head for the list of dma-buf fence -+ * waiting atoms and the waitqueue to process the work item -+ * queued for the atoms blocked on the signaling of dma-buf -+ * fences. -+ * @dma_fence.waiting_resource: list head for the list of dma-buf fence -+ * @dma_fence.wq: waitqueue to process the work item queued -+ * @as_nr: id of the address space being used for the scheduled in -+ * context. This is effectively part of the Run Pool, because -+ * it only has a valid setting (!=KBASEP_AS_NR_INVALID) whilst -+ * the context is scheduled in. The hwaccess_lock must be held -+ * whilst accessing this. -+ * If the context relating to this value of as_nr is required, -+ * then the context must be retained to ensure that it doesn't -+ * disappear whilst it is being used. Alternatively, hwaccess_lock -+ * can be held to ensure the context doesn't disappear (but this -+ * has restrictions on what other locks can be taken simutaneously). -+ * @refcount: Keeps track of the number of users of this context. A user -+ * can be a job that is available for execution, instrumentation -+ * needing to 'pin' a context for counter collection, etc. -+ * If the refcount reaches 0 then this context is considered -+ * inactive and the previously programmed AS might be cleared -+ * at any point. -+ * Generally the reference count is incremented when the context -+ * is scheduled in and an atom is pulled from the context's per -+ * slot runnable tree in JM GPU or GPU command queue -+ * group is programmed on CSG slot in CSF GPU. -+ * @process_mm: Pointer to the memory descriptor of the process which -+ * created the context. Used for accounting the physical -+ * pages used for GPU allocations, done for the context, -+ * to the memory consumed by the process. A reference is taken -+ * on this descriptor for the Userspace created contexts so that -+ * Kbase can safely access it to update the memory usage counters. -+ * The reference is dropped on context termination. -+ * @gpu_va_end: End address of the GPU va space (in 4KB page units) -+ * @running_total_tiler_heap_nr_chunks: Running total of number of chunks in all -+ * tiler heaps of the kbase context. -+ * @running_total_tiler_heap_memory: Running total of the tiler heap memory in the -+ * kbase context. -+ * @peak_total_tiler_heap_memory: Peak value of the total tiler heap memory in the -+ * kbase context. -+ * @jit_va: Indicates if a JIT_VA zone has been created. -+ * @mem_profile_data: Buffer containing the profiling information provided by -+ * Userspace, can be read through the mem_profile debugfs file. -+ * @mem_profile_size: Size of the @mem_profile_data. -+ * @mem_profile_lock: Lock to serialize the operations related to mem_profile -+ * debugfs file. -+ * @kctx_dentry: Pointer to the debugfs directory created for every context, -+ * inside kbase_device::debugfs_ctx_directory, containing -+ * context specific files. -+ * @reg_dump: Buffer containing a register offset & value pair, used -+ * for dumping job fault debug info. -+ * @job_fault_count: Indicates that a job fault occurred for the context and -+ * dumping of its debug info is in progress. -+ * @job_fault_resume_event_list: List containing atoms completed after the faulty -+ * atom but before the debug data for faulty atom was dumped. -+ * @mem_view_column_width: Controls the number of bytes shown in every column of the -+ * output of "mem_view" debugfs file. -+ * @jsctx_queue: Per slot & priority arrays of object containing the root -+ * of RB-tree holding currently runnable atoms on the job slot -+ * and the head item of the linked list of atoms blocked on -+ * cross-slot dependencies. -+ * @slot_tracking: Tracking and control of this context's use of all job -+ * slots -+ * @atoms_pulled_all_slots: Total number of atoms currently pulled from the -+ * context, across all slots. -+ * @slots_pullable: Bitmask of slots, indicating the slots for which the -+ * context has pullable atoms in the runnable tree. -+ * @work: Work structure used for deferred ASID assignment. -+ * @completed_jobs: List containing completed atoms for which base_jd_event is -+ * to be posted. -+ * @work_count: Number of work items, corresponding to atoms, currently -+ * pending on job_done workqueue of @jctx. -+ * @soft_job_timeout: Timer object used for failing/cancelling the waiting -+ * soft-jobs which have been blocked for more than the -+ * timeout value used for the soft-jobs -+ * @jit_alloc: Array of 256 pointers to GPU memory regions, used for -+ * just-in-time memory allocations. -+ * @jit_max_allocations: Maximum allowed number of in-flight -+ * just-in-time memory allocations. -+ * @jit_current_allocations: Current number of in-flight just-in-time -+ * memory allocations. -+ * @jit_current_allocations_per_bin: Current number of in-flight just-in-time -+ * memory allocations per bin. -+ * @jit_group_id: A memory group ID to be passed to a platform-specific -+ * memory group manager. -+ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). -+ * @jit_phys_pages_limit: Limit of physical pages to apply across all -+ * just-in-time memory allocations, applied to -+ * @jit_current_phys_pressure. -+ * @jit_current_phys_pressure: Current 'pressure' on physical pages, which is -+ * the sum of the worst case estimate of pages that -+ * could be used (i.e. the -+ * &struct_kbase_va_region.nr_pages for all in-use -+ * just-in-time memory regions that have not yet had -+ * a usage report) and the actual number of pages -+ * that were used (i.e. the -+ * &struct_kbase_va_region.used_pages for regions -+ * that have had a usage report). -+ * @jit_phys_pages_to_be_allocated: Count of the physical pages that are being -+ * now allocated for just-in-time memory -+ * allocations of a context (across all the -+ * threads). This is supposed to be updated -+ * with @reg_lock held before allocating -+ * the backing pages. This helps ensure that -+ * total physical memory usage for just in -+ * time memory allocation remains within the -+ * @jit_phys_pages_limit in multi-threaded -+ * scenarios. -+ * @jit_active_head: List containing the just-in-time memory allocations -+ * which are in use. -+ * @jit_pool_head: List containing the just-in-time memory allocations -+ * which have been freed up by userspace and so not being -+ * used by them. -+ * Driver caches them to quickly fulfill requests for new -+ * JIT allocations. They are released in case of memory -+ * pressure as they are put on the @evict_list when they -+ * are freed up by userspace. -+ * @jit_destroy_head: List containing the just-in-time memory allocations -+ * which were moved to it from @jit_pool_head, in the -+ * shrinker callback, after freeing their backing -+ * physical pages. -+ * @jit_evict_lock: Lock used for operations done on just-in-time memory -+ * allocations and also for accessing @evict_list. -+ * @jit_work: Work item queued to defer the freeing of a memory -+ * region when a just-in-time memory allocation is moved -+ * to @jit_destroy_head. -+ * @ext_res_meta_head: A list of sticky external resources which were requested to -+ * be mapped on GPU side, through a softjob atom of type -+ * EXT_RES_MAP or STICKY_RESOURCE_MAP ioctl. -+ * @age_count: Counter incremented on every call to jd_submit_atom, -+ * atom is assigned the snapshot of this counter, which -+ * is used to determine the atom's age when it is added to -+ * the runnable RB-tree. -+ * @trim_level: Level of JIT allocation trimming to perform on free (0-100%) -+ * @kprcs: Reference to @struct kbase_process that the current -+ * kbase_context belongs to. -+ * @kprcs_link: List link for the list of kbase context maintained -+ * under kbase_process. -+ * @gwt_enabled: Indicates if tracking of GPU writes is enabled, protected by -+ * kbase_context.reg_lock. -+ * @gwt_was_enabled: Simple sticky bit flag to know if GWT was ever enabled. -+ * @gwt_current_list: A list of addresses for which GPU has generated write faults, -+ * after the last snapshot of it was sent to userspace. -+ * @gwt_snapshot_list: Snapshot of the @gwt_current_list for sending to user space. -+ * @priority: Indicates the context priority. Used along with @atoms_count -+ * for context scheduling, protected by hwaccess_lock. -+ * @atoms_count: Number of GPU atoms currently in use, per priority -+ * @create_flags: Flags used in context creation. -+ * @kinstr_jm: Kernel job manager instrumentation context handle -+ * @tl_kctx_list_node: List item into the device timeline's list of -+ * contexts, for timeline summarization. -+ * @limited_core_mask: The mask that is applied to the affinity in case of atoms -+ * marked with BASE_JD_REQ_LIMITED_CORE_MASK. -+ * @platform_data: Pointer to platform specific per-context data. -+ * @task: Pointer to the task structure of the main thread of the process -+ * that created the Kbase context. It would be set only for the -+ * contexts created by the Userspace and not for the contexts -+ * created internally by the Kbase. -+ * -+ * A kernel base context is an entity among which the GPU is scheduled. -+ * Each context has its own GPU address space. -+ * Up to one context can be created for each client that opens the device file -+ * /dev/malixx. Context creation is deferred until a special ioctl() system call -+ * is made on the device file. -+ */ -+struct kbase_context { -+ struct file *filp; -+ struct kbase_device *kbdev; -+ struct list_head kctx_list_link; -+ struct kbase_mmu_table mmu; -+ -+ u32 id; -+ unsigned long api_version; -+ struct list_head event_list; -+ struct list_head event_coalesce_list; -+ struct mutex event_mutex; -+#if !MALI_USE_CSF -+ atomic_t event_closed; -+#endif -+ struct workqueue_struct *event_workq; -+ atomic_t event_count; -+ int event_coalesce_count; -+ -+ atomic_t flags; -+ -+ struct tagged_addr aliasing_sink_page; ++static ssize_t store_serialize_jobs_sysfs(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t count) ++{ ++ return update_serialize_jobs_setting(to_kbase_device(dev), buf, count); ++} + -+ spinlock_t mem_partials_lock; -+ struct list_head mem_partials; ++static DEVICE_ATTR(serialize_jobs, 0600, show_serialize_jobs_sysfs, ++ store_serialize_jobs_sysfs); ++#endif /* !MALI_USE_CSF */ + -+ struct mutex reg_lock; ++static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) ++{ ++ struct kbase_device *kbdev = container_of(data, struct kbase_device, ++ protected_mode_hwcnt_disable_work); ++ spinlock_t *backend_lock; ++ unsigned long flags; + -+ struct rb_root reg_rbtree_same; -+ struct rb_root reg_rbtree_custom; -+ struct rb_root reg_rbtree_exec; -+#if MALI_USE_CSF -+ struct rb_root reg_rbtree_exec_fixed; -+ struct rb_root reg_rbtree_fixed; -+ atomic64_t num_fixable_allocs; -+ atomic64_t num_fixed_allocs; -+#endif -+ struct kbase_reg_zone reg_zone[KBASE_REG_ZONE_MAX]; ++ bool do_disable; + +#if MALI_USE_CSF -+ struct kbase_csf_context csf; ++ backend_lock = &kbdev->csf.scheduler.interrupt_lock; +#else -+ struct kbase_jd_context jctx; -+ struct jsctx_queue jsctx_queue -+ [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; -+ struct kbase_jsctx_slot_tracking slot_tracking[BASE_JM_MAX_NR_SLOTS]; -+ atomic_t atoms_pulled_all_slots; ++ backend_lock = &kbdev->hwaccess_lock; ++#endif + -+ struct list_head completed_jobs; -+ atomic_t work_count; -+ struct timer_list soft_job_timeout; ++ spin_lock_irqsave(backend_lock, flags); ++ do_disable = !kbdev->protected_mode_hwcnt_desired && ++ !kbdev->protected_mode_hwcnt_disabled; ++ spin_unlock_irqrestore(backend_lock, flags); + -+ int priority; -+ s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; -+ u32 slots_pullable; -+ u32 age_count; -+#endif /* MALI_USE_CSF */ ++ if (!do_disable) ++ return; + -+ DECLARE_BITMAP(cookies, BITS_PER_LONG); -+ struct kbase_va_region *pending_regions[BITS_PER_LONG]; ++ kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + -+ wait_queue_head_t event_queue; -+ pid_t tgid; -+ pid_t pid; -+ atomic_t used_pages; -+ atomic_t nonmapped_pages; -+ atomic_t permanent_mapped_pages; ++ spin_lock_irqsave(backend_lock, flags); ++ do_disable = !kbdev->protected_mode_hwcnt_desired && ++ !kbdev->protected_mode_hwcnt_disabled; + -+ struct kbase_mem_pool_group mem_pools; ++ if (do_disable) { ++ /* Protected mode state did not change while we were doing the ++ * disable, so commit the work we just performed and continue ++ * the state machine. ++ */ ++ kbdev->protected_mode_hwcnt_disabled = true; ++#if !MALI_USE_CSF ++ kbase_backend_slot_update(kbdev); ++#endif /* !MALI_USE_CSF */ ++ } else { ++ /* Protected mode state was updated while we were doing the ++ * disable, so we need to undo the disable we just performed. ++ */ ++ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); ++ } + -+ struct shrinker reclaim; -+ struct list_head evict_list; -+ atomic_t evict_nents; ++ spin_unlock_irqrestore(backend_lock, flags); ++} + -+ struct list_head waiting_soft_jobs; -+ spinlock_t waiting_soft_jobs_lock; ++#ifndef PLATFORM_PROTECTED_CALLBACKS ++static int kbasep_protected_mode_enable(struct protected_mode_device *pdev) ++{ ++ struct kbase_device *kbdev = pdev->data; + -+ int as_nr; ++ return kbase_pm_protected_mode_enable(kbdev); ++} + -+ atomic_t refcount; ++static int kbasep_protected_mode_disable(struct protected_mode_device *pdev) ++{ ++ struct kbase_device *kbdev = pdev->data; + -+ struct mm_struct *process_mm; -+ u64 gpu_va_end; -+#if MALI_USE_CSF -+ u32 running_total_tiler_heap_nr_chunks; -+ u64 running_total_tiler_heap_memory; -+ u64 peak_total_tiler_heap_memory; -+#endif -+ bool jit_va; ++ return kbase_pm_protected_mode_disable(kbdev); ++} + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ char *mem_profile_data; -+ size_t mem_profile_size; -+ struct mutex mem_profile_lock; -+ struct dentry *kctx_dentry; ++static const struct protected_mode_ops kbasep_native_protected_ops = { ++ .protected_mode_enable = kbasep_protected_mode_enable, ++ .protected_mode_disable = kbasep_protected_mode_disable ++}; + -+ unsigned int *reg_dump; -+ atomic_t job_fault_count; -+ struct list_head job_fault_resume_event_list; -+ unsigned int mem_view_column_width; ++#define PLATFORM_PROTECTED_CALLBACKS (&kbasep_native_protected_ops) ++#endif /* PLATFORM_PROTECTED_CALLBACKS */ + -+#endif /* CONFIG_DEBUG_FS */ -+ struct kbase_va_region *jit_alloc[1 + BASE_JIT_ALLOC_COUNT]; -+ u8 jit_max_allocations; -+ u8 jit_current_allocations; -+ u8 jit_current_allocations_per_bin[256]; -+ u8 jit_group_id; -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ u64 jit_phys_pages_limit; -+ u64 jit_current_phys_pressure; -+ u64 jit_phys_pages_to_be_allocated; -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -+ struct list_head jit_active_head; -+ struct list_head jit_pool_head; -+ struct list_head jit_destroy_head; -+ struct mutex jit_evict_lock; -+ struct work_struct jit_work; ++int kbase_protected_mode_init(struct kbase_device *kbdev) ++{ ++ /* Use native protected ops */ ++ kbdev->protected_dev = kzalloc(sizeof(*kbdev->protected_dev), ++ GFP_KERNEL); ++ if (!kbdev->protected_dev) ++ return -ENOMEM; ++ kbdev->protected_dev->data = kbdev; ++ kbdev->protected_ops = PLATFORM_PROTECTED_CALLBACKS; ++ INIT_WORK(&kbdev->protected_mode_hwcnt_disable_work, ++ kbasep_protected_mode_hwcnt_disable_worker); ++ kbdev->protected_mode_hwcnt_desired = true; ++ kbdev->protected_mode_hwcnt_disabled = false; ++ return 0; ++} + -+ struct list_head ext_res_meta_head; ++void kbase_protected_mode_term(struct kbase_device *kbdev) ++{ ++ cancel_work_sync(&kbdev->protected_mode_hwcnt_disable_work); ++ kfree(kbdev->protected_dev); ++} + -+ u8 trim_level; ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++static int kbase_common_reg_map(struct kbase_device *kbdev) ++{ ++ return 0; ++} ++static void kbase_common_reg_unmap(struct kbase_device * const kbdev) ++{ ++} ++#else /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ ++static int kbase_common_reg_map(struct kbase_device *kbdev) ++{ ++ int err = 0; + -+ struct kbase_process *kprcs; -+ struct list_head kprcs_link; ++ if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) { ++ dev_err(kbdev->dev, "Register window unavailable\n"); ++ err = -EIO; ++ goto out_region; ++ } + -+#ifdef CONFIG_MALI_CINSTR_GWT -+ bool gwt_enabled; -+ bool gwt_was_enabled; -+ struct list_head gwt_current_list; -+ struct list_head gwt_snapshot_list; -+#endif ++ kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size); ++ if (!kbdev->reg) { ++ dev_err(kbdev->dev, "Can't remap register window\n"); ++ err = -EINVAL; ++ goto out_ioremap; ++ } + -+ base_context_create_flags create_flags; ++ return err; + -+#if !MALI_USE_CSF -+ struct kbase_kinstr_jm *kinstr_jm; -+#endif -+ struct list_head tl_kctx_list_node; ++out_ioremap: ++ release_mem_region(kbdev->reg_start, kbdev->reg_size); ++out_region: ++ return err; ++} + -+ u64 limited_core_mask; ++static void kbase_common_reg_unmap(struct kbase_device * const kbdev) ++{ ++ if (kbdev->reg) { ++ iounmap(kbdev->reg); ++ release_mem_region(kbdev->reg_start, kbdev->reg_size); ++ kbdev->reg = NULL; ++ kbdev->reg_start = 0; ++ kbdev->reg_size = 0; ++ } ++} ++#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ + -+#if !MALI_USE_CSF -+ void *platform_data; -+#endif ++int registers_map(struct kbase_device * const kbdev) ++{ ++ /* the first memory resource is the physical address of the GPU ++ * registers. ++ */ ++ struct platform_device *pdev = to_platform_device(kbdev->dev); ++ struct resource *reg_res; ++ int err; + -+ struct task_struct *task; -+}; ++ reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ if (!reg_res) { ++ dev_err(kbdev->dev, "Invalid register resource\n"); ++ return -ENOENT; ++ } + -+#ifdef CONFIG_MALI_CINSTR_GWT -+/** -+ * struct kbasep_gwt_list_element - Structure used to collect GPU -+ * write faults. -+ * @link: List head for adding write faults. -+ * @region: Details of the region where we have the -+ * faulting page address. -+ * @page_addr: Page address where GPU write fault occurred. -+ * @num_pages: The number of pages modified. -+ * -+ * Using this structure all GPU write faults are stored in a list. -+ */ -+struct kbasep_gwt_list_element { -+ struct list_head link; -+ struct kbase_va_region *region; -+ u64 page_addr; -+ u64 num_pages; -+}; ++ kbdev->reg_start = reg_res->start; ++ kbdev->reg_size = resource_size(reg_res); + ++#if MALI_USE_CSF ++ if (kbdev->reg_size < ++ (CSF_HW_DOORBELL_PAGE_OFFSET + ++ CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE)) { ++ dev_err(kbdev->dev, "Insufficient register space, will override to the required size\n"); ++ kbdev->reg_size = CSF_HW_DOORBELL_PAGE_OFFSET + ++ CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE; ++ } +#endif + -+/** -+ * struct kbase_ctx_ext_res_meta - Structure which binds an external resource -+ * to a @kbase_context. -+ * @ext_res_node: List head for adding the metadata to a -+ * @kbase_context. -+ * @reg: External resource information, containing -+ * the corresponding VA region -+ * @ref: Reference count. -+ * -+ * External resources can be mapped into multiple contexts as well as the same -+ * context multiple times. -+ * As kbase_va_region is refcounted, we guarantee that it will be available -+ * for the duration of the external resource, meaning it is sufficient to use -+ * it to rederive any additional data, like the GPU address. -+ * This metadata structure binds a single external resource to a single -+ * context, ensuring that per context mapping is tracked separately so it can -+ * be overridden when needed and abuses by the application (freeing the resource -+ * multiple times) don't effect the refcount of the physical allocation. -+ */ -+struct kbase_ctx_ext_res_meta { -+ struct list_head ext_res_node; -+ struct kbase_va_region *reg; -+ u32 ref; -+}; -+ -+enum kbase_reg_access_type { -+ REG_READ, -+ REG_WRITE -+}; -+ -+enum kbase_share_attr_bits { -+ /* (1ULL << 8) bit is reserved */ -+ SHARE_BOTH_BITS = (2ULL << 8), /* inner and outer shareable coherency */ -+ SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */ -+}; -+ -+/** -+ * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent. -+ * @kbdev: kbase device -+ * -+ * Return: true if the device access are coherent, false if not. -+ */ -+static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) -+{ -+ if ((kbdev->system_coherency == COHERENCY_ACE_LITE) || -+ (kbdev->system_coherency == COHERENCY_ACE)) -+ return true; ++ err = kbase_common_reg_map(kbdev); ++ if (err) { ++ dev_err(kbdev->dev, "Failed to map registers\n"); ++ return err; ++ } + -+ return false; ++ return 0; +} + -+/** -+ * kbase_get_lock_region_min_size_log2 - Returns the minimum size of the MMU lock -+ * region, as a logarithm -+ * -+ * @gpu_props: GPU properties -+ * -+ * Return: the minimum size of the MMU lock region as dictated by the corresponding -+ * arch spec. -+ */ -+static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props const *gpu_props) ++void registers_unmap(struct kbase_device *kbdev) +{ -+ if (GPU_ID2_MODEL_MATCH_VALUE(gpu_props->props.core_props.product_id) >= -+ GPU_ID2_MODEL_MAKE(12, 0)) -+ return 12; /* 4 kB */ -+ -+ return 15; /* 32 kB */ ++ kbase_common_reg_unmap(kbdev); +} + -+/* Conversion helpers for setting up high resolution timers */ -+#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U)) -+#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) ++#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) + -+/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */ -+#define KBASE_CLEAN_CACHE_MAX_LOOPS 100000 -+/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */ -+#define KBASE_AS_INACTIVE_MAX_LOOPS 100000000 -+/* Maximum number of loops polling the GPU PRFCNT_ACTIVE bit before we assume the GPU has hung */ -+#define KBASE_PRFCNT_ACTIVE_MAX_LOOPS 100000000 -+#endif /* _KBASE_DEFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_disjoint_events.c b/drivers/gpu/arm/bifrost/mali_kbase_disjoint_events.c -new file mode 100644 -index 000000000..7d6e47558 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_disjoint_events.c -@@ -0,0 +1,80 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2014, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++static bool kbase_is_pm_enabled(const struct device_node *gpu_node) ++{ ++ const struct device_node *power_model_node; ++ const void *cooling_cells_node; ++ const void *operating_point_node; ++ bool is_pm_enable = false; + -+/* -+ * Base kernel disjoint events helper functions -+ */ ++ power_model_node = of_get_child_by_name(gpu_node, ++ "power_model"); ++ if (power_model_node) ++ is_pm_enable = true; + -+#include ++ cooling_cells_node = of_get_property(gpu_node, ++ "#cooling-cells", NULL); ++ if (cooling_cells_node) ++ is_pm_enable = true; + -+void kbase_disjoint_init(struct kbase_device *kbdev) -+{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ operating_point_node = of_get_property(gpu_node, ++ "operating-points", NULL); ++ if (operating_point_node) ++ is_pm_enable = true; + -+ atomic_set(&kbdev->disjoint_event.count, 0); -+ atomic_set(&kbdev->disjoint_event.state, 0); ++ return is_pm_enable; +} + -+/* increment the disjoint event count */ -+void kbase_disjoint_event(struct kbase_device *kbdev) ++static bool kbase_is_pv_enabled(const struct device_node *gpu_node) +{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ const void *arbiter_if_node; + -+ atomic_inc(&kbdev->disjoint_event.count); ++ arbiter_if_node = of_get_property(gpu_node, ++ "arbiter_if", NULL); ++ ++ return arbiter_if_node ? true : false; +} + -+/* increment the state and the event counter */ -+void kbase_disjoint_state_up(struct kbase_device *kbdev) ++static bool kbase_is_full_coherency_enabled(const struct device_node *gpu_node) +{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ -+ atomic_inc(&kbdev->disjoint_event.state); ++ const void *coherency_dts; ++ u32 coherency; + -+ kbase_disjoint_event(kbdev); ++ coherency_dts = of_get_property(gpu_node, ++ "system-coherency", ++ NULL); ++ if (coherency_dts) { ++ coherency = be32_to_cpup(coherency_dts); ++ if (coherency == COHERENCY_ACE) ++ return true; ++ } ++ return false; +} + -+/* decrement the state */ -+void kbase_disjoint_state_down(struct kbase_device *kbdev) ++#endif /* CONFIG_MALI_ARBITER_SUPPORT && CONFIG_OF */ ++ ++int kbase_device_pm_init(struct kbase_device *kbdev) +{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0); ++ int err = 0; + -+ kbase_disjoint_event(kbdev); ++#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) + -+ atomic_dec(&kbdev->disjoint_event.state); -+} ++ u32 gpu_id; ++ u32 product_id; ++ u32 gpu_model_id; + -+/* increments the count only if the state is > 0 */ -+void kbase_disjoint_event_potential(struct kbase_device *kbdev) -+{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ if (kbase_is_pv_enabled(kbdev->dev->of_node)) { ++ dev_info(kbdev->dev, "Arbitration interface enabled\n"); ++ if (kbase_is_pm_enabled(kbdev->dev->of_node)) { ++ /* Arbitration AND power management invalid */ ++ dev_err(kbdev->dev, "Invalid combination of arbitration AND power management\n"); ++ return -EPERM; ++ } ++ if (kbase_is_full_coherency_enabled(kbdev->dev->of_node)) { ++ /* Arbitration AND full coherency invalid */ ++ dev_err(kbdev->dev, "Invalid combination of arbitration AND full coherency\n"); ++ return -EPERM; ++ } ++ err = kbase_arbiter_pm_early_init(kbdev); ++ if (err == 0) { ++ /* Check if Arbitration is running on ++ * supported GPU platform ++ */ ++ kbase_pm_register_access_enable(kbdev); ++ gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); ++ kbase_pm_register_access_disable(kbdev); ++ product_id = ++ KBASE_UBFX32(gpu_id, KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, 16); ++ gpu_model_id = GPU_ID2_MODEL_MATCH_VALUE(product_id); + -+ if (atomic_read(&kbdev->disjoint_event.state)) -+ kbase_disjoint_event(kbdev); ++ if (gpu_model_id != GPU_ID2_PRODUCT_TGOX ++ && gpu_model_id != GPU_ID2_PRODUCT_TNOX ++ && gpu_model_id != GPU_ID2_PRODUCT_TBAX) { ++ kbase_arbiter_pm_early_term(kbdev); ++ dev_err(kbdev->dev, "GPU platform not suitable for arbitration\n"); ++ return -EPERM; ++ } ++ } ++ } else { ++ kbdev->arb.arb_if = NULL; ++ kbdev->arb.arb_dev = NULL; ++ err = power_control_init(kbdev); ++ } ++#else ++ err = power_control_init(kbdev); ++#endif /* CONFIG_MALI_ARBITER_SUPPORT && CONFIG_OF */ ++ return err; +} + -+u32 kbase_disjoint_event_get(struct kbase_device *kbdev) ++void kbase_device_pm_term(struct kbase_device *kbdev) +{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ -+ return atomic_read(&kbdev->disjoint_event.count); ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++#if IS_ENABLED(CONFIG_OF) ++ if (kbase_is_pv_enabled(kbdev->dev->of_node)) ++ kbase_arbiter_pm_early_term(kbdev); ++ else ++ power_control_term(kbdev); ++#endif /* CONFIG_OF */ ++#else ++ power_control_term(kbdev); ++#endif +} -+KBASE_EXPORT_TEST_API(kbase_disjoint_event_get); -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c -new file mode 100644 -index 000000000..4e2713511 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c -@@ -0,0 +1,452 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ + -+/* -+ * Implementation of the dummy job execution workaround for the GPU hang issue. -+ */ ++int power_control_init(struct kbase_device *kbdev) ++{ ++#ifndef CONFIG_OF ++ /* Power control initialization requires at least the capability to get ++ * regulators and clocks from the device tree, as well as parsing ++ * arrays of unsigned integer values. ++ * ++ * The whole initialization process shall simply be skipped if the ++ * minimum capability is not available. ++ */ ++ return 0; ++#else ++ struct platform_device *pdev; ++ int err = 0; ++ unsigned int i; ++#if defined(CONFIG_REGULATOR) ++ static const char * const regulator_names[] = { ++ "mali", "mem" ++ }; ++#endif /* CONFIG_REGULATOR */ + -+#include -+#include -+#include ++ if (!kbdev) ++ return -ENODEV; + -+#include -+#include ++ pdev = to_platform_device(kbdev->dev); + -+#define DUMMY_JOB_WA_BINARY_NAME "valhall-1691526.wa" ++#if defined(CONFIG_REGULATOR) ++ /* Since the error code EPROBE_DEFER causes the entire probing ++ * procedure to be restarted from scratch at a later time, ++ * all regulators will be released before returning. ++ * ++ * Any other error is ignored and the driver will continue ++ * operating with a partial initialization of regulators. ++ */ ++ for (i = 0; i < ARRAY_SIZE(regulator_names); i++) { ++ kbdev->regulators[i] = regulator_get_optional(kbdev->dev, ++ regulator_names[i]); ++ if (IS_ERR(kbdev->regulators[i])) { ++ err = PTR_ERR(kbdev->regulators[i]); ++ kbdev->regulators[i] = NULL; ++ break; ++ } ++ } ++ if (err == -EPROBE_DEFER) { ++ while (i > 0) ++ regulator_put(kbdev->regulators[--i]); ++ return err; ++ } + -+struct wa_header { -+ u16 signature; -+ u16 version; -+ u32 info_offset; -+} __packed; ++ kbdev->nr_regulators = i; ++ dev_dbg(&pdev->dev, "Regulators probed: %u\n", kbdev->nr_regulators); ++#endif + -+struct wa_v2_info { -+ u64 jc; -+ u32 js; -+ u32 blob_offset; -+ u64 flags; -+} __packed; ++ /* Having more clocks than regulators is acceptable, while the ++ * opposite shall not happen. ++ * ++ * Since the error code EPROBE_DEFER causes the entire probing ++ * procedure to be restarted from scratch at a later time, ++ * all clocks and regulators will be released before returning. ++ * ++ * Any other error is ignored and the driver will continue ++ * operating with a partial initialization of clocks. ++ */ ++ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { ++ kbdev->clocks[i] = of_clk_get(kbdev->dev->of_node, i); ++ if (IS_ERR(kbdev->clocks[i])) { ++ err = PTR_ERR(kbdev->clocks[i]); ++ kbdev->clocks[i] = NULL; ++ break; ++ } + -+struct wa_blob { -+ u64 base; -+ u32 size; -+ u32 map_flags; -+ u32 payload_offset; -+ u32 blob_offset; -+} __packed; ++ err = clk_prepare(kbdev->clocks[i]); ++ if (err) { ++ dev_err(kbdev->dev, ++ "Failed to prepare and enable clock (%d)\n", ++ err); ++ clk_put(kbdev->clocks[i]); ++ break; ++ } ++ } ++ if (err == -EPROBE_DEFER) { ++ while (i > 0) { ++ clk_disable_unprepare(kbdev->clocks[--i]); ++ clk_put(kbdev->clocks[i]); ++ } ++ goto clocks_probe_defer; ++ } + -+static bool in_range_offset(const u8 *base, const u8 *end, off_t off, size_t sz) -+{ -+ return !(end - base - off < sz); -+} ++ kbdev->nr_clocks = i; ++ dev_dbg(&pdev->dev, "Clocks probed: %u\n", kbdev->nr_clocks); + -+static u32 wait_any(struct kbase_device *kbdev, off_t offset, u32 bits) -+{ -+ int loop; -+ const int timeout = 100; -+ u32 val; ++ /* Any error in parsing the OPP table from the device file ++ * shall be ignored. The fact that the table may be absent or wrong ++ * on the device tree of the platform shouldn't prevent the driver ++ * from completing its initialization. ++ */ ++#if defined(CONFIG_PM_OPP) ++#ifdef CONFIG_ARCH_ROCKCHIP ++ err = kbase_platform_rk_init_opp_table(kbdev); ++ if (err) ++ dev_err(kbdev->dev, "Failed to init_opp_table (%d)\n", err); ++#else ++ err = dev_pm_opp_of_add_table(kbdev->dev); ++ CSTD_UNUSED(err); ++#endif ++#endif /* CONFIG_PM_OPP */ ++ return 0; + -+ for (loop = 0; loop < timeout; loop++) { -+ val = kbase_reg_read(kbdev, offset); -+ if (val & bits) ++#if defined(CONFIG_PM_OPP) && \ ++ ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && defined(CONFIG_REGULATOR)) ++ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { ++ if (kbdev->clocks[i]) { ++ if (__clk_is_enabled(kbdev->clocks[i])) ++ clk_disable_unprepare(kbdev->clocks[i]); ++ clk_put(kbdev->clocks[i]); ++ kbdev->clocks[i] = NULL; ++ } else + break; -+ udelay(10); -+ } -+ -+ if (loop == timeout) { -+ dev_err(kbdev->dev, -+ "Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n", -+ (unsigned long)offset, (unsigned long)bits, -+ (unsigned long)val); + } ++#endif + -+ return (val & bits); ++clocks_probe_defer: ++#if defined(CONFIG_REGULATOR) ++ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) ++ regulator_put(kbdev->regulators[i]); ++#endif ++ return err; ++#endif /* CONFIG_OF */ +} + -+static int wait(struct kbase_device *kbdev, off_t offset, u32 bits, bool set) ++void power_control_term(struct kbase_device *kbdev) +{ -+ int loop; -+ const int timeout = 100; -+ u32 val; -+ u32 target = 0; ++ unsigned int i; + -+ if (set) -+ target = bits; ++#if defined(CONFIG_PM_OPP) ++#ifdef CONFIG_ARCH_ROCKCHIP ++ kbase_platform_rk_uninit_opp_table(kbdev); ++#else ++ dev_pm_opp_of_remove_table(kbdev->dev); ++#endif ++#endif /* CONFIG_PM_OPP */ + -+ for (loop = 0; loop < timeout; loop++) { -+ val = kbase_reg_read(kbdev, (offset)); -+ if ((val & bits) == target) ++ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { ++ if (kbdev->clocks[i]) { ++ clk_unprepare(kbdev->clocks[i]); ++ clk_put(kbdev->clocks[i]); ++ kbdev->clocks[i] = NULL; ++ } else + break; -+ -+ udelay(10); + } + -+ if (loop == timeout) { -+ dev_err(kbdev->dev, -+ "Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n", -+ (unsigned long)offset, (unsigned long)bits, -+ (unsigned long)val); -+ return -ETIMEDOUT; ++#if defined(CONFIG_OF) && defined(CONFIG_REGULATOR) ++ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { ++ if (kbdev->regulators[i]) { ++ regulator_put(kbdev->regulators[i]); ++ kbdev->regulators[i] = NULL; ++ } + } -+ -+ return 0; ++#endif +} + -+static inline int run_job(struct kbase_device *kbdev, int as, int slot, -+ u64 cores, u64 jc) ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ ++static void trigger_reset(struct kbase_device *kbdev) +{ -+ u32 done; ++ kbase_pm_context_active(kbdev); ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(kbdev); ++ kbase_pm_context_idle(kbdev); ++} + -+ /* setup job */ -+ kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_LO), -+ jc & U32_MAX); -+ kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_HI), -+ jc >> 32); -+ kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_LO), -+ cores & U32_MAX); -+ kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_HI), -+ cores >> 32); -+ kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_CONFIG_NEXT), -+ JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK | as); ++#define MAKE_QUIRK_ACCESSORS(type) \ ++static int type##_quirks_set(void *data, u64 val) \ ++{ \ ++ struct kbase_device *kbdev; \ ++ kbdev = (struct kbase_device *)data; \ ++ kbdev->hw_quirks_##type = (u32)val; \ ++ trigger_reset(kbdev); \ ++ return 0; \ ++} \ ++\ ++static int type##_quirks_get(void *data, u64 *val) \ ++{ \ ++ struct kbase_device *kbdev; \ ++ kbdev = (struct kbase_device *)data; \ ++ *val = kbdev->hw_quirks_##type; \ ++ return 0; \ ++} \ ++DEFINE_DEBUGFS_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get, \ ++ type##_quirks_set, "%llu\n") + -+ /* go */ -+ kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_COMMAND_NEXT), -+ JS_COMMAND_START); ++MAKE_QUIRK_ACCESSORS(sc); ++MAKE_QUIRK_ACCESSORS(tiler); ++MAKE_QUIRK_ACCESSORS(mmu); ++MAKE_QUIRK_ACCESSORS(gpu); + -+ /* wait for the slot to finish (done, error) */ -+ done = wait_any(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), -+ (1ul << (16+slot)) | (1ul << slot)); -+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), done); ++/** ++ * kbase_device_debugfs_reset_write() - Reset the GPU ++ * ++ * @data: Pointer to the Kbase device. ++ * @wait_for_reset: Value written to the file. ++ * ++ * This function will perform the GPU reset, and if the value written to ++ * the file is 1 it will also wait for the reset to complete. ++ * ++ * Return: 0 in case of no error otherwise a negative value. ++ */ ++static int kbase_device_debugfs_reset_write(void *data, u64 wait_for_reset) ++{ ++ struct kbase_device *kbdev = data; + -+ if (done != (1ul << slot)) { -+ dev_err(kbdev->dev, -+ "Failed to run WA job on slot %d cores 0x%llx: done 0x%lx\n", -+ slot, (unsigned long long)cores, -+ (unsigned long)done); -+ dev_err(kbdev->dev, "JS_STATUS on failure: 0x%x\n", -+ kbase_reg_read(kbdev, JOB_SLOT_REG(slot, JS_STATUS))); ++ trigger_reset(kbdev); + -+ return -EFAULT; -+ } else { -+ return 0; -+ } ++ if (wait_for_reset == 1) ++ return kbase_reset_gpu_wait(kbdev); ++ ++ return 0; +} + -+/* To be called after power up & MMU init, but before everything else */ -+int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores) ++DEFINE_DEBUGFS_ATTRIBUTE(fops_trigger_reset, NULL, &kbase_device_debugfs_reset_write, "%llu\n"); ++ ++/** ++ * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read ++ * @file: File object to read is for ++ * @buf: User buffer to populate with data ++ * @len: Length of user buffer ++ * @ppos: Offset within file object ++ * ++ * Retrieves the current status of protected debug mode ++ * (0 = disabled, 1 = enabled) ++ * ++ * Return: Number of bytes added to user buffer ++ */ ++static ssize_t debugfs_protected_debug_mode_read(struct file *file, ++ char __user *buf, size_t len, loff_t *ppos) +{ -+ int as; -+ int slot; -+ u64 jc; -+ int failed = 0; -+ int runs = 0; -+ u32 old_gpu_mask; -+ u32 old_job_mask; ++ struct kbase_device *kbdev = (struct kbase_device *)file->private_data; ++ u32 gpu_status; ++ ssize_t ret_val; + -+ if (!kbdev) -+ return -EFAULT; ++ kbase_pm_context_active(kbdev); ++ gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)); ++ kbase_pm_context_idle(kbdev); + -+ if (!kbdev->dummy_job_wa.ctx) -+ return -EFAULT; ++ if (gpu_status & GPU_DBGEN) ++ ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2); ++ else ++ ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2); + -+ as = kbdev->dummy_job_wa.ctx->as_nr; -+ slot = kbdev->dummy_job_wa.slot; -+ jc = kbdev->dummy_job_wa.jc; ++ return ret_val; ++} + -+ /* mask off all but MMU IRQs */ -+ old_gpu_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); -+ old_job_mask = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0); -+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0); ++/* ++ * struct fops_protected_debug_mode - "protected_debug_mode" debugfs fops ++ * ++ * Contains the file operations for the "protected_debug_mode" debugfs file ++ */ ++static const struct file_operations fops_protected_debug_mode = { ++ .owner = THIS_MODULE, ++ .open = simple_open, ++ .read = debugfs_protected_debug_mode_read, ++ .llseek = default_llseek, ++}; + -+ /* power up requested cores */ -+ kbase_reg_write(kbdev, SHADER_PWRON_LO, (cores & U32_MAX)); -+ kbase_reg_write(kbdev, SHADER_PWRON_HI, (cores >> 32)); ++static int kbase_device_debugfs_mem_pool_max_size_show(struct seq_file *sfile, ++ void *data) ++{ ++ CSTD_UNUSED(data); ++ return kbase_debugfs_helper_seq_read(sfile, ++ MEMORY_GROUP_MANAGER_NR_GROUPS, ++ kbase_mem_pool_config_debugfs_max_size); ++} + -+ if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP) { -+ /* wait for power-ups */ -+ wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), true); -+ if (cores >> 32) -+ wait(kbdev, SHADER_READY_HI, (cores >> 32), true); -+ } ++static ssize_t kbase_device_debugfs_mem_pool_max_size_write(struct file *file, ++ const char __user *ubuf, size_t count, loff_t *ppos) ++{ ++ int err = 0; + -+ if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE) { -+ int i; ++ CSTD_UNUSED(ppos); ++ err = kbase_debugfs_helper_seq_write(file, ubuf, count, ++ MEMORY_GROUP_MANAGER_NR_GROUPS, ++ kbase_mem_pool_config_debugfs_set_max_size); + -+ /* do for each requested core */ -+ for (i = 0; i < sizeof(cores) * 8; i++) { -+ u64 affinity; ++ return err ? err : count; ++} + -+ affinity = 1ull << i; ++static int kbase_device_debugfs_mem_pool_max_size_open(struct inode *in, ++ struct file *file) ++{ ++ return single_open(file, kbase_device_debugfs_mem_pool_max_size_show, ++ in->i_private); ++} + -+ if (!(cores & affinity)) -+ continue; ++static const struct file_operations ++ kbase_device_debugfs_mem_pool_max_size_fops = { ++ .owner = THIS_MODULE, ++ .open = kbase_device_debugfs_mem_pool_max_size_open, ++ .read = seq_read, ++ .write = kbase_device_debugfs_mem_pool_max_size_write, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + -+ if (run_job(kbdev, as, slot, affinity, jc)) -+ failed++; -+ runs++; -+ } ++/** ++ * debugfs_ctx_defaults_init - Create the default configuration of new contexts in debugfs ++ * @kbdev: An instance of the GPU platform device, allocated from the probe method of the driver. ++ * Return: A pointer to the last dentry that it tried to create, whether successful or not. ++ * Could be NULL or encode another error value. ++ */ ++static struct dentry *debugfs_ctx_defaults_init(struct kbase_device *const kbdev) ++{ ++ /* prevent unprivileged use of debug file system ++ * in old kernel version ++ */ ++ const mode_t mode = 0644; ++ struct dentry *dentry = debugfs_create_dir("defaults", kbdev->debugfs_ctx_directory); ++ struct dentry *debugfs_ctx_defaults_directory = dentry; + -+ } else { -+ if (run_job(kbdev, as, slot, cores, jc)) -+ failed++; -+ runs++; ++ if (IS_ERR_OR_NULL(dentry)) { ++ dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n"); ++ return dentry; + } + -+ if (kbdev->dummy_job_wa.flags & -+ KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) { -+ /* power off shader cores (to reduce any dynamic leakage) */ -+ kbase_reg_write(kbdev, SHADER_PWROFF_LO, (cores & U32_MAX)); -+ kbase_reg_write(kbdev, SHADER_PWROFF_HI, (cores >> 32)); ++ debugfs_create_bool("infinite_cache", mode, ++ debugfs_ctx_defaults_directory, ++ &kbdev->infinite_cache_active_default); + -+ /* wait for power off complete */ -+ wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), false); -+ wait(kbdev, SHADER_PWRTRANS_LO, (cores & U32_MAX), false); -+ if (cores >> 32) { -+ wait(kbdev, SHADER_READY_HI, (cores >> 32), false); -+ wait(kbdev, SHADER_PWRTRANS_HI, (cores >> 32), false); -+ } -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), U32_MAX); ++ dentry = debugfs_create_file("mem_pool_max_size", mode, debugfs_ctx_defaults_directory, ++ &kbdev->mem_pool_defaults.small, ++ &kbase_device_debugfs_mem_pool_max_size_fops); ++ if (IS_ERR_OR_NULL(dentry)) { ++ dev_err(kbdev->dev, "Unable to create mem_pool_max_size debugfs entry\n"); ++ return dentry; + } + -+ /* restore IRQ masks */ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), old_gpu_mask); -+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), old_job_mask); -+ -+ if (failed) -+ dev_err(kbdev->dev, -+ "WA complete with %d failures out of %d runs\n", failed, -+ runs); ++ dentry = debugfs_create_file("lp_mem_pool_max_size", mode, debugfs_ctx_defaults_directory, ++ &kbdev->mem_pool_defaults.large, ++ &kbase_device_debugfs_mem_pool_max_size_fops); ++ if (IS_ERR_OR_NULL(dentry)) ++ dev_err(kbdev->dev, "Unable to create lp_mem_pool_max_size debugfs entry\n"); + -+ return failed ? -EFAULT : 0; ++ return dentry; +} + -+static ssize_t dummy_job_wa_info_show(struct device * const dev, -+ struct device_attribute * const attr, char * const buf) ++/** ++ * init_debugfs - Create device-wide debugfs directories and files for the Mali driver ++ * @kbdev: An instance of the GPU platform device, allocated from the probe method of the driver. ++ * Return: A pointer to the last dentry that it tried to create, whether successful or not. ++ * Could be NULL or encode another error value. ++ */ ++static struct dentry *init_debugfs(struct kbase_device *kbdev) +{ -+ struct kbase_device *const kbdev = dev_get_drvdata(dev); -+ int err; -+ -+ if (!kbdev || !kbdev->dummy_job_wa.ctx) -+ return -ENODEV; -+ -+ err = scnprintf(buf, PAGE_SIZE, "slot %u flags %llx\n", -+ kbdev->dummy_job_wa.slot, kbdev->dummy_job_wa.flags); ++ struct dentry *dentry = debugfs_create_dir(kbdev->devname, NULL); + -+ return err; -+} ++ kbdev->mali_debugfs_directory = dentry; ++ if (IS_ERR_OR_NULL(dentry)) { ++ dev_err(kbdev->dev, ++ "Couldn't create mali debugfs directory: %s\n", ++ kbdev->devname); ++ return dentry; ++ } + -+static DEVICE_ATTR_RO(dummy_job_wa_info); ++ dentry = debugfs_create_dir("ctx", kbdev->mali_debugfs_directory); ++ kbdev->debugfs_ctx_directory = dentry; ++ if (IS_ERR_OR_NULL(dentry)) { ++ dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n"); ++ return dentry; ++ } + -+static bool wa_blob_load_needed(struct kbase_device *kbdev) -+{ -+ if (of_machine_is_compatible("arm,juno")) -+ return false; ++ dentry = debugfs_create_dir("instrumentation", kbdev->mali_debugfs_directory); ++ kbdev->debugfs_instr_directory = dentry; ++ if (IS_ERR_OR_NULL(dentry)) { ++ dev_err(kbdev->dev, "Couldn't create mali debugfs instrumentation directory\n"); ++ return dentry; ++ } + -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3485)) -+ return true; ++ kbasep_regs_history_debugfs_init(kbdev); + -+ return false; -+} ++#if MALI_USE_CSF ++ kbase_debug_csf_fault_debugfs_init(kbdev); ++#else /* MALI_USE_CSF */ ++ kbase_debug_job_fault_debugfs_init(kbdev); ++#endif /* !MALI_USE_CSF */ + -+int kbase_dummy_job_wa_load(struct kbase_device *kbdev) -+{ -+ const struct firmware *firmware; -+ static const char wa_name[] = DUMMY_JOB_WA_BINARY_NAME; -+ const u32 signature = 0x4157; -+ const u32 version = 2; -+ const u8 *fw_end; -+ const u8 *fw; -+ const struct wa_header *header; -+ const struct wa_v2_info *v2_info; -+ u32 blob_offset; -+ int err; -+ struct kbase_context *kctx; ++ kbasep_gpu_memory_debugfs_init(kbdev); ++ kbase_as_fault_debugfs_init(kbdev); ++#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS ++ kbase_instr_backend_debugfs_init(kbdev); ++#endif ++ kbase_pbha_debugfs_init(kbdev); + -+ /* Calls to this function are inherently asynchronous, with respect to -+ * MMU operations. ++ /* fops_* variables created by invocations of macro ++ * MAKE_QUIRK_ACCESSORS() above. + */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; -+ -+ lockdep_assert_held(&kbdev->fw_load_lock); -+ -+ if (!wa_blob_load_needed(kbdev)) -+ return 0; -+ -+ /* load the wa */ -+ err = request_firmware(&firmware, wa_name, kbdev->dev); -+ -+ if (err) { -+ dev_err(kbdev->dev, "WA blob missing. Please refer to the Arm Mali DDK Valhall Release Notes, " -+ "Part number DC-06002 or contact support-mali@arm.com - driver probe will be failed"); -+ return -ENODEV; ++ dentry = debugfs_create_file("quirks_sc", 0644, ++ kbdev->mali_debugfs_directory, kbdev, ++ &fops_sc_quirks); ++ if (IS_ERR_OR_NULL(dentry)) { ++ dev_err(kbdev->dev, "Unable to create quirks_sc debugfs entry\n"); ++ return dentry; + } + -+ kctx = kbase_create_context(kbdev, true, -+ BASE_CONTEXT_CREATE_FLAG_NONE, 0, -+ NULL); -+ -+ if (!kctx) { -+ dev_err(kbdev->dev, "Failed to create WA context\n"); -+ goto no_ctx; ++ dentry = debugfs_create_file("quirks_tiler", 0644, ++ kbdev->mali_debugfs_directory, kbdev, ++ &fops_tiler_quirks); ++ if (IS_ERR_OR_NULL(dentry)) { ++ dev_err(kbdev->dev, "Unable to create quirks_tiler debugfs entry\n"); ++ return dentry; + } + -+ fw = firmware->data; -+ fw_end = fw + firmware->size; -+ -+ dev_dbg(kbdev->dev, "Loaded firmware of size %zu bytes\n", -+ firmware->size); ++ dentry = debugfs_create_file("quirks_mmu", 0644, ++ kbdev->mali_debugfs_directory, kbdev, ++ &fops_mmu_quirks); ++ if (IS_ERR_OR_NULL(dentry)) { ++ dev_err(kbdev->dev, "Unable to create quirks_mmu debugfs entry\n"); ++ return dentry; ++ } + -+ if (!in_range_offset(fw, fw_end, 0, sizeof(*header))) { -+ dev_err(kbdev->dev, "WA too small\n"); -+ goto bad_fw; ++ dentry = debugfs_create_file("quirks_gpu", 0644, kbdev->mali_debugfs_directory, ++ kbdev, &fops_gpu_quirks); ++ if (IS_ERR_OR_NULL(dentry)) { ++ dev_err(kbdev->dev, "Unable to create quirks_gpu debugfs entry\n"); ++ return dentry; + } + -+ header = (const struct wa_header *)(fw + 0); ++ dentry = debugfs_ctx_defaults_init(kbdev); ++ if (IS_ERR_OR_NULL(dentry)) ++ return dentry; + -+ if (header->signature != signature) { -+ dev_err(kbdev->dev, "WA signature failure: 0x%lx\n", -+ (unsigned long)header->signature); -+ goto bad_fw; ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { ++ dentry = debugfs_create_file("protected_debug_mode", 0444, ++ kbdev->mali_debugfs_directory, kbdev, ++ &fops_protected_debug_mode); ++ if (IS_ERR_OR_NULL(dentry)) { ++ dev_err(kbdev->dev, "Unable to create protected_debug_mode debugfs entry\n"); ++ return dentry; ++ } + } + -+ if (header->version != version) { -+ dev_err(kbdev->dev, "WA version 0x%lx not supported\n", -+ (unsigned long)header->version); -+ goto bad_fw; ++ dentry = debugfs_create_file("reset", 0644, ++ kbdev->mali_debugfs_directory, kbdev, ++ &fops_trigger_reset); ++ if (IS_ERR_OR_NULL(dentry)) { ++ dev_err(kbdev->dev, "Unable to create reset debugfs entry\n"); ++ return dentry; + } + -+ if (!in_range_offset(fw, fw_end, header->info_offset, sizeof(*v2_info))) { -+ dev_err(kbdev->dev, "WA info offset out of bounds\n"); -+ goto bad_fw; -+ } ++ kbase_ktrace_debugfs_init(kbdev); + -+ v2_info = (const struct wa_v2_info *)(fw + header->info_offset); ++#ifdef CONFIG_MALI_BIFROST_DEVFREQ ++#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) ++ if (kbdev->devfreq && kbdev->devfreq_cooling) ++ kbase_ipa_debugfs_init(kbdev); ++#endif /* CONFIG_DEVFREQ_THERMAL */ ++#endif /* CONFIG_MALI_BIFROST_DEVFREQ */ + -+ if (v2_info->flags & ~KBASE_DUMMY_JOB_WA_FLAGS) { -+ dev_err(kbdev->dev, "Unsupported WA flag(s): 0x%llx\n", -+ (unsigned long long)v2_info->flags); -+ goto bad_fw; ++#if !MALI_USE_CSF ++ dentry = debugfs_create_file("serialize_jobs", 0644, ++ kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_serialize_jobs_debugfs_fops); ++ if (IS_ERR_OR_NULL(dentry)) { ++ dev_err(kbdev->dev, "Unable to create serialize_jobs debugfs entry\n"); ++ return dentry; + } ++ kbase_timeline_io_debugfs_init(kbdev); ++#endif ++ kbase_dvfs_status_debugfs_init(kbdev); + -+ kbdev->dummy_job_wa.slot = v2_info->js; -+ kbdev->dummy_job_wa.jc = v2_info->jc; -+ kbdev->dummy_job_wa.flags = v2_info->flags; + -+ blob_offset = v2_info->blob_offset; ++ return dentry; ++} + -+ while (blob_offset) { -+ const struct wa_blob *blob; -+ size_t nr_pages; -+ u64 flags; -+ u64 gpu_va; -+ struct kbase_va_region *va_region; ++int kbase_device_debugfs_init(struct kbase_device *kbdev) ++{ ++ struct dentry *dentry = init_debugfs(kbdev); + -+ if (!in_range_offset(fw, fw_end, blob_offset, sizeof(*blob))) { -+ dev_err(kbdev->dev, "Blob offset out-of-range: 0x%lx\n", -+ (unsigned long)blob_offset); -+ goto bad_fw; -+ } ++ if (IS_ERR_OR_NULL(dentry)) { ++ debugfs_remove_recursive(kbdev->mali_debugfs_directory); ++ return IS_ERR(dentry) ? PTR_ERR(dentry) : -ENOMEM; ++ } ++ return 0; ++} + -+ blob = (const struct wa_blob *)(fw + blob_offset); -+ if (!in_range_offset(fw, fw_end, blob->payload_offset, blob->size)) { -+ dev_err(kbdev->dev, "Payload out-of-bounds\n"); -+ goto bad_fw; -+ } ++void kbase_device_debugfs_term(struct kbase_device *kbdev) ++{ ++ debugfs_remove_recursive(kbdev->mali_debugfs_directory); ++} ++#endif /* CONFIG_DEBUG_FS */ + -+ gpu_va = blob->base; -+ if (PAGE_ALIGN(gpu_va) != gpu_va) { -+ dev_err(kbdev->dev, "blob not page aligned\n"); -+ goto bad_fw; -+ } -+ nr_pages = PFN_UP(blob->size); -+ flags = blob->map_flags | BASE_MEM_FLAG_MAP_FIXED; ++int kbase_device_coherency_init(struct kbase_device *kbdev) ++{ ++#if IS_ENABLED(CONFIG_OF) ++ u32 supported_coherency_bitmap = ++ kbdev->gpu_props.props.raw_props.coherency_mode; ++ const void *coherency_override_dts; ++ bool dma_coherent; ++ u32 override_coherency, gpu_id; ++ unsigned int prod_id; + -+ va_region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, -+ &gpu_va, mmu_sync_info); ++ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; ++ gpu_id &= GPU_ID_VERSION_PRODUCT_ID; ++ prod_id = gpu_id >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT; + -+ if (!va_region) { -+ dev_err(kbdev->dev, "Failed to allocate for blob\n"); -+ } else { -+ struct kbase_vmap_struct vmap = { 0 }; -+ const u8 *payload; -+ void *dst; ++ /* Only for tMIx : ++ * (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly ++ * documented for tMIx so force correct value here. ++ */ ++ if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == ++ GPU_ID2_PRODUCT_TMIX) ++ if (supported_coherency_bitmap == ++ COHERENCY_FEATURE_BIT(COHERENCY_ACE)) ++ supported_coherency_bitmap |= ++ COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); + -+ /* copy the payload, */ -+ payload = fw + blob->payload_offset; ++#endif /* CONFIG_OF */ + -+ dst = kbase_vmap(kctx, -+ va_region->start_pfn << PAGE_SHIFT, -+ nr_pages << PAGE_SHIFT, &vmap); ++ kbdev->system_coherency = COHERENCY_NONE; + -+ if (dst) { -+ memcpy(dst, payload, blob->size); -+ kbase_vunmap(kctx, &vmap); -+ } else { ++ /* device tree may override the coherency */ ++#if IS_ENABLED(CONFIG_OF) ++ /* treat "dma-coherency" as a synonym for ACE-lite */ ++ dma_coherent = of_dma_is_coherent(kbdev->dev->of_node); ++ coherency_override_dts = of_get_property(kbdev->dev->of_node, ++ "system-coherency", ++ NULL); ++ if (coherency_override_dts || dma_coherent) { ++ if (coherency_override_dts) { ++ override_coherency = be32_to_cpup(coherency_override_dts); ++ if (dma_coherent && override_coherency != COHERENCY_ACE_LITE) { + dev_err(kbdev->dev, -+ "Failed to copy payload\n"); ++ "system-coherency needs to be 0 when dma-coherent is set\n"); ++ return -EINVAL; + } ++ } else { ++ /* dma-coherent set and system-coherency not specified */ ++ override_coherency = COHERENCY_ACE_LITE; ++ } + ++#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ /* ACE coherency mode is not supported by Driver on CSF GPUs. ++ * Return an error to signal the invalid device tree configuration. ++ */ ++ if (override_coherency == COHERENCY_ACE) { ++ dev_err(kbdev->dev, ++ "ACE coherency not supported, wrong DT configuration"); ++ return -EINVAL; + } -+ blob_offset = blob->blob_offset; /* follow chain */ -+ } ++#endif + -+ release_firmware(firmware); ++ if ((override_coherency <= COHERENCY_NONE) && ++ (supported_coherency_bitmap & ++ COHERENCY_FEATURE_BIT(override_coherency))) { + -+ kbasep_js_schedule_privileged_ctx(kbdev, kctx); ++ kbdev->system_coherency = override_coherency; + -+ kbdev->dummy_job_wa.ctx = kctx; ++ dev_info(kbdev->dev, ++ "Using coherency mode %u set from dtb", ++ override_coherency); ++ } else ++ dev_warn(kbdev->dev, ++ "Ignoring unsupported coherency mode %u set from dtb", ++ override_coherency); ++ } + -+ err = sysfs_create_file(&kbdev->dev->kobj, -+ &dev_attr_dummy_job_wa_info.attr); -+ if (err) -+ dev_err(kbdev->dev, "SysFS file creation for dummy job wa failed\n"); ++#endif /* CONFIG_OF */ + -+ return 0; ++ kbdev->gpu_props.props.raw_props.coherency_mode = ++ kbdev->system_coherency; + -+bad_fw: -+ kbase_destroy_context(kctx); -+no_ctx: -+ release_firmware(firmware); -+ return -EFAULT; ++ return 0; +} + -+void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev) ++ ++#if MALI_USE_CSF ++/** ++ * csg_scheduling_period_store - Store callback for the csg_scheduling_period ++ * sysfs file. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * This function is called when the csg_scheduling_period sysfs file is written ++ * to. It checks the data written, and if valid updates the reset timeout. ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t csg_scheduling_period_store(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t count) +{ -+ struct kbase_context *wa_ctx; ++ struct kbase_device *kbdev; ++ int ret; ++ unsigned int csg_scheduling_period; + -+ /* return if the dummy job has not been loaded */ -+ if (kbdev->dummy_job_wa_loaded == false) -+ return; ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ /* Can be safely called even if the file wasn't created on probe */ -+ sysfs_remove_file(&kbdev->dev->kobj, &dev_attr_dummy_job_wa_info.attr); ++ ret = kstrtouint(buf, 0, &csg_scheduling_period); ++ if (ret || csg_scheduling_period == 0) { ++ dev_err(kbdev->dev, ++ "Couldn't process csg_scheduling_period write operation.\n" ++ "Use format 'csg_scheduling_period_ms', and csg_scheduling_period_ms > 0\n"); ++ return -EINVAL; ++ } + -+ wa_ctx = READ_ONCE(kbdev->dummy_job_wa.ctx); -+ WRITE_ONCE(kbdev->dummy_job_wa.ctx, NULL); -+ /* make this write visible before we tear down the ctx */ -+ smp_mb(); ++ kbase_csf_scheduler_lock(kbdev); ++ kbdev->csf.scheduler.csg_scheduling_period_ms = csg_scheduling_period; ++ dev_dbg(kbdev->dev, "CSG scheduling period: %ums\n", ++ csg_scheduling_period); ++ kbase_csf_scheduler_unlock(kbdev); + -+ if (wa_ctx) { -+ kbasep_js_release_privileged_ctx(kbdev, wa_ctx); -+ kbase_destroy_context(wa_ctx); -+ } ++ return count; +} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.h b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.h -new file mode 100644 -index 000000000..8713ba1ea ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.h -@@ -0,0 +1,73 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ ++/** ++ * csg_scheduling_period_show - Show callback for the csg_scheduling_period ++ * sysfs entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the GPU information. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * This function is called to get the current reset timeout. + * ++ * Return: The number of bytes output to @buf. + */ ++static ssize_t csg_scheduling_period_show(struct device *dev, ++ struct device_attribute *attr, ++ char *const buf) ++{ ++ struct kbase_device *kbdev; ++ ssize_t ret; + -+#ifndef _KBASE_DUMMY_JOB_WORKAROUND_ -+#define _KBASE_DUMMY_JOB_WORKAROUND_ ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+#define KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE (1ull << 0) -+#define KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP (1ull << 1) -+#define KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER (1ull << 2) ++ ret = scnprintf(buf, PAGE_SIZE, "%u\n", ++ kbdev->csf.scheduler.csg_scheduling_period_ms); + -+#define KBASE_DUMMY_JOB_WA_FLAGS (KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE | \ -+ KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP | \ -+ KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) ++ return ret; ++} + -+#if MALI_USE_CSF ++static DEVICE_ATTR_RW(csg_scheduling_period); + -+static inline int kbase_dummy_job_wa_load(struct kbase_device *kbdev) ++/** ++ * fw_timeout_store - Store callback for the fw_timeout sysfs file. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * This function is called when the fw_timeout sysfs file is written to. It ++ * checks the data written, and if valid updates the reset timeout. ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t fw_timeout_store(struct device *dev, ++ struct device_attribute *attr, const char *buf, ++ size_t count) +{ -+ CSTD_UNUSED(kbdev); -+ return 0; -+} ++ struct kbase_device *kbdev; ++ int ret; ++ unsigned int fw_timeout; + -+static inline void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev) -+{ -+ CSTD_UNUSED(kbdev); -+} ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+static inline int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, -+ u64 cores) -+{ -+ CSTD_UNUSED(kbdev); -+ CSTD_UNUSED(cores); -+ return 0; ++ ret = kstrtouint(buf, 0, &fw_timeout); ++ if (ret || fw_timeout == 0) { ++ dev_err(kbdev->dev, ++ "Couldn't process fw_timeout write operation.\n" ++ "Use format 'fw_timeout_ms', and fw_timeout_ms > 0\n" ++ "Default fw_timeout: %u", ++ kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_PING_TIMEOUT)); ++ return -EINVAL; ++ } ++ ++ kbase_csf_scheduler_lock(kbdev); ++ kbdev->csf.fw_timeout_ms = fw_timeout; ++ kbase_csf_scheduler_unlock(kbdev); ++ dev_dbg(kbdev->dev, "Firmware timeout: %ums\n", fw_timeout); ++ ++ return count; +} + -+static inline bool kbase_dummy_job_wa_enabled(struct kbase_device *kbdev) ++/** ++ * fw_timeout_show - Show callback for the firmware timeout sysfs entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the GPU information. ++ * ++ * This function is called to get the current reset timeout. ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t fw_timeout_show(struct device *dev, ++ struct device_attribute *attr, char *const buf) +{ -+ CSTD_UNUSED(kbdev); -+ return false; -+} ++ struct kbase_device *kbdev; ++ ssize_t ret; + -+#else ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+int kbase_dummy_job_wa_load(struct kbase_device *kbdev); -+void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev); -+int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores); ++ ret = scnprintf(buf, PAGE_SIZE, "%u\n", kbdev->csf.fw_timeout_ms); + -+static inline bool kbase_dummy_job_wa_enabled(struct kbase_device *kbdev) -+{ -+ return (kbdev->dummy_job_wa.ctx != NULL); ++ return ret; +} + -+#endif /* MALI_USE_CSF */ ++static DEVICE_ATTR_RW(fw_timeout); + -+#endif /* _KBASE_DUMMY_JOB_WORKAROUND_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.c -new file mode 100644 -index 000000000..e4cb71632 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.c -@@ -0,0 +1,94 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++/** ++ * idle_hysteresis_time_store - Store callback for CSF idle_hysteresis_time ++ * sysfs file. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * This function is called when the idle_hysteresis_time sysfs file is ++ * written to. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * This file contains values of the idle hysteresis duration. + * ++ * Return: @count if the function succeeded. An error code on failure. + */ ++static ssize_t idle_hysteresis_time_store(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *kbdev; ++ u32 dur = 0; + -+#include "mali_kbase_dvfs_debugfs.h" -+#include -+#include ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+#if IS_ENABLED(CONFIG_DEBUG_FS) ++ if (kstrtou32(buf, 0, &dur)) { ++ dev_err(kbdev->dev, "Couldn't process idle_hysteresis_time write operation.\n" ++ "Use format \n"); ++ return -EINVAL; ++ } ++ ++ kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur); ++ ++ return count; ++} + +/** -+ * kbasep_dvfs_utilization_debugfs_show() - Print the DVFS utilization info ++ * idle_hysteresis_time_show - Show callback for CSF idle_hysteresis_time ++ * sysfs entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the GPU information. + * -+ * @file: The seq_file for printing to -+ * @data: The debugfs dentry private data, a pointer to kbase_context ++ * This function is called to get the current idle hysteresis duration in ms. + * -+ * Return: Negative error code or 0 on success. ++ * Return: The number of bytes output to @buf. + */ -+static int kbasep_dvfs_utilization_debugfs_show(struct seq_file *file, void *data) ++static ssize_t idle_hysteresis_time_show(struct device *dev, ++ struct device_attribute *attr, char * const buf) +{ -+ struct kbase_device *kbdev = file->private; ++ struct kbase_device *kbdev; ++ ssize_t ret; ++ u32 dur; + -+#if MALI_USE_CSF -+ seq_printf(file, "busy_time: %u idle_time: %u protm_time: %u\n", -+ kbdev->pm.backend.metrics.values.time_busy, -+ kbdev->pm.backend.metrics.values.time_idle, -+ kbdev->pm.backend.metrics.values.time_in_protm); -+#else -+ seq_printf(file, "busy_time: %u idle_time: %u\n", -+ kbdev->pm.backend.metrics.values.time_busy, -+ kbdev->pm.backend.metrics.values.time_idle); -+#endif ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ return 0; -+} ++ dur = kbase_csf_firmware_get_gpu_idle_hysteresis_time(kbdev); ++ ret = scnprintf(buf, PAGE_SIZE, "%u\n", dur); + -+static int kbasep_dvfs_utilization_debugfs_open(struct inode *in, -+ struct file *file) -+{ -+ return single_open(file, kbasep_dvfs_utilization_debugfs_show, -+ in->i_private); ++ return ret; +} + -+static const struct file_operations kbasep_dvfs_utilization_debugfs_fops = { -+ .open = kbasep_dvfs_utilization_debugfs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; ++static DEVICE_ATTR_RW(idle_hysteresis_time); + -+void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev) ++/** ++ * mcu_shader_pwroff_timeout_show - Get the MCU shader Core power-off time value. ++ * ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer for the sysfs file contents ++ * ++ * Get the internally recorded MCU shader Core power-off (nominal) timeout value. ++ * The unit of the value is in micro-seconds. ++ * ++ * Return: The number of bytes output to @buf if the ++ * function succeeded. A Negative value on failure. ++ */ ++static ssize_t mcu_shader_pwroff_timeout_show(struct device *dev, struct device_attribute *attr, ++ char *const buf) +{ -+ struct dentry *file; -+ const mode_t mode = 0444; -+ -+ if (WARN_ON(!kbdev || IS_ERR_OR_NULL(kbdev->mali_debugfs_directory))) -+ return; ++ struct kbase_device *kbdev = dev_get_drvdata(dev); ++ u32 pwroff; + -+ file = debugfs_create_file("dvfs_utilization", mode, -+ kbdev->mali_debugfs_directory, kbdev, -+ &kbasep_dvfs_utilization_debugfs_fops); ++ if (!kbdev) ++ return -ENODEV; + -+ if (IS_ERR_OR_NULL(file)) { -+ dev_warn(kbdev->dev, -+ "Unable to create dvfs debugfs entry"); -+ } ++ pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev); ++ return scnprintf(buf, PAGE_SIZE, "%u\n", pwroff); +} + -+#else -+/* -+ * Stub functions for when debugfs is disabled ++/** ++ * mcu_shader_pwroff_timeout_store - Set the MCU shader core power-off time value. ++ * ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes to write to the sysfs file ++ * ++ * The duration value (unit: micro-seconds) for configuring MCU Shader Core ++ * timer, when the shader cores' power transitions are delegated to the ++ * MCU (normal operational mode) ++ * ++ * Return: @count if the function succeeded. An error code on failure. + */ -+void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev) ++static ssize_t mcu_shader_pwroff_timeout_store(struct device *dev, struct device_attribute *attr, ++ const char *buf, size_t count) +{ -+} ++ struct kbase_device *kbdev = dev_get_drvdata(dev); ++ u32 dur; + -+#endif /* CONFIG_DEBUG_FS */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.h -new file mode 100644 -index 000000000..8334db7cc ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.h -@@ -0,0 +1,35 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ if (!kbdev) ++ return -ENODEV; + -+#ifndef _KBASE_DVFS_DEBUGFS_H_ -+#define _KBASE_DVFS_DEBUGFS_H_ ++ if (kstrtouint(buf, 0, &dur)) ++ return -EINVAL; + -+/* Forward declaration */ -+struct kbase_device; ++ kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, dur); + -+/** -+ * kbase_dvfs_status_debugfs_init() - Create a debugfs entry for DVFS queries -+ * -+ * @kbdev: Pointer to the GPU device for which to create the debugfs entry -+ */ -+void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev); ++ return count; ++} + -+#endif /* _KBASE_DVFS_DEBUGFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_event.c b/drivers/gpu/arm/bifrost/mali_kbase_event.c -new file mode 100644 -index 000000000..910c51170 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_event.c -@@ -0,0 +1,272 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2010-2016, 2018-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++static DEVICE_ATTR_RW(mcu_shader_pwroff_timeout); + -+#include -+#include -+#include -+#include ++#endif /* MALI_USE_CSF */ + -+static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom) ++static struct attribute *kbase_scheduling_attrs[] = { ++#if !MALI_USE_CSF ++ &dev_attr_serialize_jobs.attr, ++#endif /* !MALI_USE_CSF */ ++ NULL ++}; ++ ++static struct attribute *kbase_attrs[] = { ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ &dev_attr_debug_command.attr, ++#if !MALI_USE_CSF ++ &dev_attr_js_softstop_always.attr, ++#endif /* !MALI_USE_CSF */ ++#endif ++#if !MALI_USE_CSF ++ &dev_attr_js_timeouts.attr, ++ &dev_attr_soft_job_timeout.attr, ++#endif /* !MALI_USE_CSF */ ++ &dev_attr_gpuinfo.attr, ++ &dev_attr_dvfs_period.attr, ++ &dev_attr_pm_poweroff.attr, ++ &dev_attr_reset_timeout.attr, ++#if !MALI_USE_CSF ++ &dev_attr_js_scheduling_period.attr, ++#else ++ &dev_attr_csg_scheduling_period.attr, ++ &dev_attr_fw_timeout.attr, ++ &dev_attr_idle_hysteresis_time.attr, ++ &dev_attr_mcu_shader_pwroff_timeout.attr, ++#endif /* !MALI_USE_CSF */ ++ &dev_attr_power_policy.attr, ++ &dev_attr_core_mask.attr, ++ &dev_attr_mem_pool_size.attr, ++ &dev_attr_mem_pool_max_size.attr, ++ &dev_attr_lp_mem_pool_size.attr, ++ &dev_attr_lp_mem_pool_max_size.attr, ++#if !MALI_USE_CSF ++ &dev_attr_js_ctx_scheduling_mode.attr, ++#endif /* !MALI_USE_CSF */ ++ NULL ++}; ++ ++static struct attribute *kbase_mempool_attrs[] = { ++ &dev_attr_max_size.attr, ++ &dev_attr_lp_max_size.attr, ++ &dev_attr_ctx_default_max_size.attr, ++ NULL ++}; ++ ++#define SYSFS_SCHEDULING_GROUP "scheduling" ++static const struct attribute_group kbase_scheduling_attr_group = { ++ .name = SYSFS_SCHEDULING_GROUP, ++ .attrs = kbase_scheduling_attrs, ++}; ++ ++#define SYSFS_MEMPOOL_GROUP "mempool" ++static const struct attribute_group kbase_mempool_attr_group = { ++ .name = SYSFS_MEMPOOL_GROUP, ++ .attrs = kbase_mempool_attrs, ++}; ++ ++static const struct attribute_group kbase_attr_group = { ++ .attrs = kbase_attrs, ++}; ++ ++int kbase_sysfs_init(struct kbase_device *kbdev) +{ -+ struct base_jd_udata data; -+ struct kbase_device *kbdev; ++ int err = 0; + -+ lockdep_assert_held(&kctx->jctx.lock); ++ kbdev->mdev.minor = MISC_DYNAMIC_MINOR; ++ kbdev->mdev.name = kbdev->devname; ++ kbdev->mdev.fops = &kbase_fops; ++ kbdev->mdev.parent = get_device(kbdev->dev); ++ kbdev->mdev.mode = 0666; + -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(katom != NULL); -+ KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); ++ err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); ++ if (err) ++ return err; + -+ kbdev = kctx->kbdev; -+ data = katom->udata; ++ err = sysfs_create_group(&kbdev->dev->kobj, ++ &kbase_scheduling_attr_group); ++ if (err) { ++ dev_err(kbdev->dev, "Creation of %s sysfs group failed", ++ SYSFS_SCHEDULING_GROUP); ++ sysfs_remove_group(&kbdev->dev->kobj, ++ &kbase_attr_group); ++ return err; ++ } + -+ KBASE_TLSTREAM_TL_NRET_ATOM_CTX(kbdev, katom, kctx); -+ KBASE_TLSTREAM_TL_DEL_ATOM(kbdev, katom); ++ err = sysfs_create_group(&kbdev->dev->kobj, ++ &kbase_mempool_attr_group); ++ if (err) { ++ dev_err(kbdev->dev, "Creation of %s sysfs group failed", ++ SYSFS_MEMPOOL_GROUP); ++ sysfs_remove_group(&kbdev->dev->kobj, ++ &kbase_scheduling_attr_group); ++ sysfs_remove_group(&kbdev->dev->kobj, ++ &kbase_attr_group); ++ } + -+ katom->status = KBASE_JD_ATOM_STATE_UNUSED; -+ dev_dbg(kbdev->dev, "Atom %pK status to unused\n", (void *)katom); -+ wake_up(&katom->completed); ++ return err; ++} + -+ return data; ++void kbase_sysfs_term(struct kbase_device *kbdev) ++{ ++ sysfs_remove_group(&kbdev->dev->kobj, &kbase_mempool_attr_group); ++ sysfs_remove_group(&kbdev->dev->kobj, &kbase_scheduling_attr_group); ++ sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); ++ put_device(kbdev->dev); +} + -+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent) ++static int kbase_platform_device_remove(struct platform_device *pdev) +{ -+ struct kbase_jd_atom *atom; ++ struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + -+ KBASE_DEBUG_ASSERT(ctx); ++ if (!kbdev) ++ return -ENODEV; + -+ mutex_lock(&ctx->event_mutex); ++ kbase_device_term(kbdev); ++ dev_set_drvdata(kbdev->dev, NULL); ++ kbase_device_free(kbdev); + -+ if (list_empty(&ctx->event_list)) { -+ if (!atomic_read(&ctx->event_closed)) { -+ mutex_unlock(&ctx->event_mutex); -+ return -1; -+ } ++ return 0; ++} + -+ /* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */ -+ mutex_unlock(&ctx->event_mutex); -+ uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED; -+ memset(&uevent->udata, 0, sizeof(uevent->udata)); -+ dev_dbg(ctx->kbdev->dev, -+ "event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n", -+ BASE_JD_EVENT_DRV_TERMINATED); -+ return 0; -+ } ++void kbase_backend_devfreq_term(struct kbase_device *kbdev) ++{ ++#ifdef CONFIG_MALI_BIFROST_DEVFREQ ++ if (kbdev->devfreq) ++ kbase_devfreq_term(kbdev); ++#endif ++} + -+ /* normal event processing */ -+ atomic_dec(&ctx->event_count); -+ atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]); -+ list_del(ctx->event_list.next); ++int kbase_backend_devfreq_init(struct kbase_device *kbdev) ++{ ++#ifdef CONFIG_MALI_BIFROST_DEVFREQ ++ /* Devfreq uses hardware counters, so must be initialized after it. */ ++ int err = kbase_devfreq_init(kbdev); + -+ mutex_unlock(&ctx->event_mutex); ++ if (err) ++ dev_err(kbdev->dev, "Continuing without devfreq\n"); ++#endif /* CONFIG_MALI_BIFROST_DEVFREQ */ ++ return 0; ++} + -+ dev_dbg(ctx->kbdev->dev, "event dequeuing %pK\n", (void *)atom); -+ uevent->event_code = atom->event_code; ++static int kbase_platform_device_probe(struct platform_device *pdev) ++{ ++ struct kbase_device *kbdev; ++ int err = 0; + -+ uevent->atom_number = (atom - ctx->jctx.atoms); ++ mali_kbase_print_cs_experimental(); + -+ if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) -+ kbase_jd_free_external_resources(atom); ++ kbdev = kbase_device_alloc(); ++ if (!kbdev) { ++ dev_err(&pdev->dev, "Allocate device failed\n"); ++ return -ENOMEM; ++ } + -+ mutex_lock(&ctx->jctx.lock); -+ uevent->udata = kbase_event_process(ctx, atom); -+ mutex_unlock(&ctx->jctx.lock); ++ kbdev->dev = &pdev->dev; + -+ return 0; ++#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) ++ kbdev->token = -EPERM; ++#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ ++ ++ dev_set_drvdata(kbdev->dev, kbdev); ++#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) ++ mutex_lock(&kbase_probe_mutex); ++#endif ++ err = kbase_device_init(kbdev); ++ ++ if (err) { ++ if (err == -EPROBE_DEFER) ++ dev_info(kbdev->dev, ++ "Device initialization Deferred\n"); ++ else ++ dev_err(kbdev->dev, "Device initialization failed\n"); ++ ++ dev_set_drvdata(kbdev->dev, NULL); ++ kbase_device_free(kbdev); ++#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) ++ mutex_unlock(&kbase_probe_mutex); ++#endif ++ } else { ++ dev_info(kbdev->dev, ++ "Probed as %s\n", dev_name(kbdev->mdev.this_device)); ++ kbase_increment_device_id(); ++#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) ++ mutex_unlock(&kbase_probe_mutex); ++#endif ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ mutex_lock(&kbdev->pm.lock); ++ kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_INITIALIZED_EVT); ++ mutex_unlock(&kbdev->pm.lock); ++#endif ++ } ++ ++ return err; +} + -+KBASE_EXPORT_TEST_API(kbase_event_dequeue); ++#undef KBASEP_DEFAULT_REGISTER_HISTORY_SIZE + +/** -+ * kbase_event_process_noreport_worker - Worker for processing atoms that do not -+ * return an event but do have external -+ * resources -+ * @data: Work structure ++ * kbase_device_suspend - Suspend callback from the OS. ++ * ++ * @dev: The device to suspend ++ * ++ * This is called by Linux when the device should suspend. ++ * ++ * Return: A standard Linux error code on failure, 0 otherwise. + */ -+static void kbase_event_process_noreport_worker(struct work_struct *data) ++static int kbase_device_suspend(struct device *dev) +{ -+ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, -+ work); -+ struct kbase_context *kctx = katom->kctx; ++ struct kbase_device *kbdev = to_kbase_device(dev); + -+ if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) -+ kbase_jd_free_external_resources(katom); ++ if (!kbdev) ++ return -ENODEV; + -+ mutex_lock(&kctx->jctx.lock); -+ kbase_event_process(kctx, katom); -+ mutex_unlock(&kctx->jctx.lock); ++ if (kbase_pm_suspend(kbdev)) { ++ dev_warn(kbdev->dev, "Abort suspend as GPU suspension failed"); ++ return -EBUSY; ++ } ++ ++#ifdef CONFIG_MALI_BIFROST_DVFS ++ kbase_pm_metrics_stop(kbdev); ++#endif ++ ++#ifdef CONFIG_MALI_BIFROST_DEVFREQ ++ dev_dbg(dev, "Callback %s\n", __func__); ++ if (kbdev->devfreq) { ++ kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND); ++ flush_workqueue(kbdev->devfreq_queue.workq); ++ } ++#endif ++ ++#ifdef CONFIG_ARCH_ROCKCHIP ++ kbase_platform_rk_enable_regulator(kbdev); ++#endif ++ ++#ifdef KBASE_PM_RUNTIME ++ if (kbdev->is_runtime_resumed) { ++ if (kbdev->pm.backend.callback_power_runtime_off) ++ kbdev->pm.backend.callback_power_runtime_off(kbdev); ++ } ++#endif /* KBASE_PM_RUNTIME */ ++ ++ return 0; +} + +/** -+ * kbase_event_process_noreport - Process atoms that do not return an event -+ * @kctx: Context pointer -+ * @katom: Atom to be processed ++ * kbase_device_resume - Resume callback from the OS. + * -+ * Atoms that do not have external resources will be processed immediately. -+ * Atoms that do have external resources will be processed on a workqueue, in -+ * order to avoid locking issues. ++ * @dev: The device to resume ++ * ++ * This is called by Linux when the device should resume from suspension. ++ * ++ * Return: A standard Linux error code + */ -+static void kbase_event_process_noreport(struct kbase_context *kctx, -+ struct kbase_jd_atom *katom) ++static int kbase_device_resume(struct device *dev) +{ -+ if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { -+ INIT_WORK(&katom->work, kbase_event_process_noreport_worker); -+ queue_work(kctx->event_workq, &katom->work); -+ } else { -+ kbase_event_process(kctx, katom); ++ struct kbase_device *kbdev = to_kbase_device(dev); ++ ++ if (!kbdev) ++ return -ENODEV; ++ ++#ifdef KBASE_PM_RUNTIME ++ if (kbdev->is_runtime_resumed) { ++ if (kbdev->pm.backend.callback_power_runtime_on) ++ kbdev->pm.backend.callback_power_runtime_on(kbdev); + } ++#endif /* KBASE_PM_RUNTIME */ ++ ++ kbase_pm_resume(kbdev); ++ ++#ifdef CONFIG_MALI_BIFROST_DVFS ++ kbase_pm_metrics_start(kbdev); ++#endif ++ ++#ifdef CONFIG_MALI_BIFROST_DEVFREQ ++ dev_dbg(dev, "Callback %s\n", __func__); ++ if (kbdev->devfreq) ++ kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME); ++#endif ++ ++#if !MALI_USE_CSF ++ kbase_enable_quick_reset(kbdev); ++#endif ++ ++ return 0; +} + +/** -+ * kbase_event_coalesce - Move pending events to the main event list -+ * @kctx: Context pointer ++ * kbase_device_runtime_suspend - Runtime suspend callback from the OS. + * -+ * kctx->event_list and kctx->event_coalesce_count must be protected -+ * by a lock unless this is the last thread using them -+ * (and we're about to terminate the lock). ++ * @dev: The device to suspend + * -+ * Return: The number of pending events moved to the main event list ++ * This is called by Linux when the device should prepare for a condition in ++ * which it will not be able to communicate with the CPU(s) and RAM due to ++ * power management. ++ * ++ * Return: A standard Linux error code + */ -+static int kbase_event_coalesce(struct kbase_context *kctx) ++#ifdef KBASE_PM_RUNTIME ++static int kbase_device_runtime_suspend(struct device *dev) +{ -+ const int event_count = kctx->event_coalesce_count; ++ struct kbase_device *kbdev = to_kbase_device(dev); ++ int ret = 0; + -+ /* Join the list of pending events onto the tail of the main list -+ * and reset it -+ */ -+ list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list); -+ kctx->event_coalesce_count = 0; ++ if (!kbdev) ++ return -ENODEV; + -+ /* Return the number of events moved */ -+ return event_count; -+} ++ dev_dbg(dev, "Callback %s\n", __func__); ++ KBASE_KTRACE_ADD(kbdev, PM_RUNTIME_SUSPEND_CALLBACK, NULL, 0); + -+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) -+{ -+ struct kbase_device *kbdev = ctx->kbdev; ++#if MALI_USE_CSF ++ ret = kbase_pm_handle_runtime_suspend(kbdev); ++ if (ret) ++ return ret; ++#endif + -+ dev_dbg(kbdev->dev, "Posting event for atom %pK\n", (void *)atom); ++#ifdef CONFIG_MALI_BIFROST_DVFS ++ kbase_pm_metrics_stop(kbdev); ++#endif + -+ if (WARN_ON(atom->status != KBASE_JD_ATOM_STATE_COMPLETED)) { -+ dev_warn(kbdev->dev, -+ "%s: Atom %d (%pK) not completed (status %d)\n", -+ __func__, -+ kbase_jd_atom_id(atom->kctx, atom), -+ atom->kctx, -+ atom->status); -+ return; -+ } ++#ifdef CONFIG_MALI_BIFROST_DEVFREQ ++ if (kbdev->devfreq) ++ kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND); ++#endif + -+ if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { -+ if (atom->event_code == BASE_JD_EVENT_DONE) { -+ dev_dbg(kbdev->dev, "Suppressing event (atom done)\n"); -+ kbase_event_process_noreport(ctx, atom); -+ return; -+ } ++ if (kbdev->pm.backend.callback_power_runtime_off) { ++ kbdev->pm.backend.callback_power_runtime_off(kbdev); ++ kbdev->is_runtime_resumed = false; ++ dev_dbg(dev, "runtime suspend\n"); + } ++ return ret; ++} ++#endif /* KBASE_PM_RUNTIME */ + -+ if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) { -+ dev_dbg(kbdev->dev, "Suppressing event (never)\n"); -+ kbase_event_process_noreport(ctx, atom); -+ return; -+ } -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, atom, TL_ATOM_STATE_POSTED); -+ if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) { -+ /* Don't report the event until other event(s) have completed */ -+ dev_dbg(kbdev->dev, "Deferring event (coalesced)\n"); -+ mutex_lock(&ctx->event_mutex); -+ list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list); -+ ++ctx->event_coalesce_count; -+ mutex_unlock(&ctx->event_mutex); -+ } else { -+ /* Report the event and any pending events now */ -+ int event_count = 1; ++/** ++ * kbase_device_runtime_resume - Runtime resume callback from the OS. ++ * ++ * @dev: The device to suspend ++ * ++ * This is called by Linux when the device should go into a fully active state. ++ * ++ * Return: A standard Linux error code ++ */ + -+ mutex_lock(&ctx->event_mutex); -+ event_count += kbase_event_coalesce(ctx); -+ list_add_tail(&atom->dep_item[0], &ctx->event_list); -+ atomic_add(event_count, &ctx->event_count); -+ mutex_unlock(&ctx->event_mutex); -+ dev_dbg(kbdev->dev, "Reporting %d events\n", event_count); ++#ifdef KBASE_PM_RUNTIME ++static int kbase_device_runtime_resume(struct device *dev) ++{ ++ int ret = 0; ++ struct kbase_device *kbdev = to_kbase_device(dev); + -+ kbase_event_wakeup(ctx); ++ if (!kbdev) ++ return -ENODEV; + -+ /* Post-completion latency */ -+ trace_sysgraph(SGR_POST, ctx->id, -+ kbase_jd_atom_id(ctx, atom)); ++ dev_dbg(dev, "Callback %s\n", __func__); ++ // KBASE_KTRACE_ADD(kbdev, PM_RUNTIME_RESUME_CALLBACK, NULL, 0); ++ if (kbdev->pm.backend.callback_power_runtime_on) { ++ ret = kbdev->pm.backend.callback_power_runtime_on(kbdev); ++ kbdev->is_runtime_resumed = true; ++ dev_dbg(dev, "runtime resume\n"); + } -+} -+KBASE_EXPORT_TEST_API(kbase_event_post); + -+void kbase_event_close(struct kbase_context *kctx) -+{ -+ mutex_lock(&kctx->event_mutex); -+ atomic_set(&kctx->event_closed, true); -+ mutex_unlock(&kctx->event_mutex); -+ kbase_event_wakeup(kctx); ++#ifdef CONFIG_MALI_BIFROST_DVFS ++ kbase_pm_metrics_start(kbdev); ++#endif ++ ++#ifdef CONFIG_MALI_BIFROST_DEVFREQ ++ if (kbdev->devfreq) ++ kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME); ++#endif ++ ++ return ret; +} ++#endif /* KBASE_PM_RUNTIME */ + -+int kbase_event_init(struct kbase_context *kctx) ++ ++#ifdef KBASE_PM_RUNTIME ++/** ++ * kbase_device_runtime_idle - Runtime idle callback from the OS. ++ * @dev: The device to suspend ++ * ++ * This is called by Linux when the device appears to be inactive and it might ++ * be placed into a low power state. ++ * ++ * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend, ++ * otherwise a standard Linux error code ++ */ ++static int kbase_device_runtime_idle(struct device *dev) +{ -+ KBASE_DEBUG_ASSERT(kctx); ++ struct kbase_device *kbdev = to_kbase_device(dev); + -+ INIT_LIST_HEAD(&kctx->event_list); -+ INIT_LIST_HEAD(&kctx->event_coalesce_list); -+ mutex_init(&kctx->event_mutex); -+ kctx->event_coalesce_count = 0; -+ kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1); ++ if (!kbdev) ++ return -ENODEV; + -+ if (kctx->event_workq == NULL) -+ return -EINVAL; ++ dev_dbg(dev, "Callback %s\n", __func__); ++ /* Use platform specific implementation if it exists. */ ++ if (kbdev->pm.backend.callback_power_runtime_idle) ++ return kbdev->pm.backend.callback_power_runtime_idle(kbdev); + ++ /* Just need to update the device's last busy mark. Kernel will respect ++ * the autosuspend delay and so won't suspend the device immediately. ++ */ ++ pm_runtime_mark_last_busy(kbdev->dev); + return 0; +} ++#endif /* KBASE_PM_RUNTIME */ + -+KBASE_EXPORT_TEST_API(kbase_event_init); ++/* The power management operations for the platform driver. ++ */ ++static const struct dev_pm_ops kbase_pm_ops = { ++ .suspend = kbase_device_suspend, ++ .resume = kbase_device_resume, ++#ifdef KBASE_PM_RUNTIME ++ .runtime_suspend = kbase_device_runtime_suspend, ++ .runtime_resume = kbase_device_runtime_resume, ++ .runtime_idle = kbase_device_runtime_idle, ++#endif /* KBASE_PM_RUNTIME */ ++}; + -+void kbase_event_cleanup(struct kbase_context *kctx) ++#if IS_ENABLED(CONFIG_OF) ++static const struct of_device_id kbase_dt_ids[] = { { .compatible = "arm,malit6xx" }, ++ { .compatible = "arm,mali-midgard" }, ++ { .compatible = "arm,mali-bifrost" }, ++ { .compatible = "arm,mali-valhall" }, ++ { /* sentinel */ } }; ++MODULE_DEVICE_TABLE(of, kbase_dt_ids); ++#endif ++ ++static struct platform_driver kbase_platform_driver = { ++ .probe = kbase_platform_device_probe, ++ .remove = kbase_platform_device_remove, ++ .driver = { ++ .name = kbase_drv_name, ++ .pm = &kbase_pm_ops, ++ .of_match_table = of_match_ptr(kbase_dt_ids), ++ .probe_type = PROBE_PREFER_ASYNCHRONOUS, ++ }, ++}; ++ ++#if (KERNEL_VERSION(5, 3, 0) > LINUX_VERSION_CODE) && IS_ENABLED(CONFIG_OF) ++module_platform_driver(kbase_platform_driver); ++#else ++static int __init kbase_driver_init(void) +{ -+ int event_count; ++ int ret; + -+ KBASE_DEBUG_ASSERT(kctx); -+ KBASE_DEBUG_ASSERT(kctx->event_workq); ++#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) ++ mutex_init(&kbase_probe_mutex); ++#endif + -+ flush_workqueue(kctx->event_workq); -+ destroy_workqueue(kctx->event_workq); ++#ifndef CONFIG_OF ++ ret = kbase_platform_register(); ++ if (ret) ++ return ret; ++#endif ++ ret = platform_driver_register(&kbase_platform_driver); ++#ifndef CONFIG_OF ++ if (ret) { ++ kbase_platform_unregister(); ++ return ret; ++ } ++#endif + -+ /* We use kbase_event_dequeue to remove the remaining events as that -+ * deals with all the cleanup needed for the atoms. -+ * -+ * Note: use of kctx->event_list without a lock is safe because this must be the last -+ * thread using it (because we're about to terminate the lock) -+ */ -+ event_count = kbase_event_coalesce(kctx); -+ atomic_add(event_count, &kctx->event_count); ++ return ret; ++} + -+ while (!list_empty(&kctx->event_list)) { -+ struct base_jd_event_v2 event; ++static void __exit kbase_driver_exit(void) ++{ ++ platform_driver_unregister(&kbase_platform_driver); ++#ifndef CONFIG_OF ++ kbase_platform_unregister(); ++#endif ++} + -+ kbase_event_dequeue(kctx, &event); -+ } ++module_init(kbase_driver_init); ++module_exit(kbase_driver_exit); ++#endif ++MODULE_LICENSE("GPL"); ++MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \ ++ __stringify(BASE_UK_VERSION_MAJOR) "." \ ++ __stringify(BASE_UK_VERSION_MINOR) ")"); ++MODULE_SOFTDEP("pre: memory_group_manager"); ++MODULE_INFO(import_ns, "DMA_BUF"); ++ ++#define CREATE_TRACE_POINTS ++/* Create the trace points (otherwise we just get code to call a tracepoint) */ ++#include "mali_linux_trace.h" ++ ++#ifdef CONFIG_MALI_BIFROST_GATOR_SUPPORT ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change); ++ ++void kbase_trace_mali_pm_status(u32 dev_id, u32 event, u64 value) ++{ ++ trace_mali_pm_status(dev_id, event, value); +} + -+KBASE_EXPORT_TEST_API(kbase_event_cleanup); -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.c b/drivers/gpu/arm/bifrost/mali_kbase_fence.c ++void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, u8 atom_id) ++{ ++ trace_mali_job_slots_event(dev_id, event, ++ (kctx != NULL ? kctx->tgid : 0), ++ (kctx != NULL ? kctx->pid : 0), ++ atom_id); ++} ++ ++void kbase_trace_mali_page_fault_insert_pages(u32 dev_id, int event, u32 value) ++{ ++ trace_mali_page_fault_insert_pages(dev_id, event, value); ++} ++ ++void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long event) ++{ ++ trace_mali_total_alloc_pages_change(dev_id, event); ++} ++#endif /* CONFIG_MALI_BIFROST_GATOR_SUPPORT */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h b/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h new file mode 100644 -index 000000000..b16b27659 +index 000000000..7e885ca46 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.c -@@ -0,0 +1,61 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h +@@ -0,0 +1,40 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -215380,56 +218464,35 @@ index 000000000..b16b27659 + * + */ + -+#include -+#include -+#include -+#include -+#include ++#ifndef _KBASE_CS_EXPERIMENTAL_H_ ++#define _KBASE_CS_EXPERIMENTAL_H_ + -+/* Spin lock protecting all Mali fences as fence->lock. */ -+static DEFINE_SPINLOCK(kbase_fence_lock); ++#include + -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+struct fence * -+kbase_fence_out_new(struct kbase_jd_atom *katom) -+#else -+struct dma_fence * -+kbase_fence_out_new(struct kbase_jd_atom *katom) -+#endif ++/** ++ * mali_kbase_print_cs_experimental() - Print a string for every Core Services ++ * experimental feature that is enabled ++ */ ++static inline void mali_kbase_print_cs_experimental(void) +{ -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ struct fence *fence; -+#else -+ struct dma_fence *fence; -+#endif -+ -+ WARN_ON(katom->dma_fence.fence); -+ -+ fence = kzalloc(sizeof(*fence), GFP_KERNEL); -+ if (!fence) -+ return NULL; -+ -+ dma_fence_init(fence, -+ &kbase_fence_ops, -+ &kbase_fence_lock, -+ katom->dma_fence.context, -+ atomic_inc_return(&katom->dma_fence.seqno)); ++#if MALI_INCREMENTAL_RENDERING_JM ++ pr_info("mali_kbase: INCREMENTAL_RENDERING_JM (experimental) enabled"); ++#endif /* MALI_INCREMENTAL_RENDERING_JM */ ++} + -+ katom->dma_fence.fence = fence; ++#endif /* _KBASE_CS_EXPERIMENTAL_H_ */ + -+ return fence; -+} + -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.h b/drivers/gpu/arm/bifrost/mali_kbase_fence.h +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c new file mode 100644 -index 000000000..f4507ac43 +index 000000000..dc6feb95a --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.h -@@ -0,0 +1,259 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c +@@ -0,0 +1,433 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -215447,440 +218510,428 @@ index 000000000..f4507ac43 + * + */ + -+#ifndef _KBASE_FENCE_H_ -+#define _KBASE_FENCE_H_ -+ -+/* -+ * mali_kbase_fence.[hc] has fence code used only by -+ * - CONFIG_SYNC_FILE - explicit fences -+ */ -+ -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ -+#include -+#include "mali_kbase_fence_defs.h" -+#include "mali_kbase.h" -+#include "mali_kbase_refcount_defs.h" -+ ++#include ++#include ++#include "mali_kbase_ctx_sched.h" ++#include "tl/mali_kbase_tracepoints.h" +#if MALI_USE_CSF -+/* Maximum number of characters in DMA fence timeline name. */ -+#define MAX_TIMELINE_NAME (32) -+ -+/** -+ * struct kbase_kcpu_dma_fence_meta - Metadata structure for dma fence objects containing -+ * information about KCPU queue. One instance per KCPU -+ * queue. -+ * -+ * @refcount: Atomic value to keep track of number of references to an instance. -+ * An instance can outlive the KCPU queue itself. -+ * @kbdev: Pointer to Kbase device. -+ * @kctx_id: Kbase context ID. -+ * @timeline_name: String of timeline name for associated fence object. -+ */ -+struct kbase_kcpu_dma_fence_meta { -+ kbase_refcount_t refcount; -+ struct kbase_device *kbdev; -+ int kctx_id; -+ char timeline_name[MAX_TIMELINE_NAME]; -+}; -+ -+/** -+ * struct kbase_kcpu_dma_fence - Structure which extends a dma fence object to include a -+ * reference to metadata containing more informaiton about it. -+ * -+ * @base: Fence object itself. -+ * @metadata: Pointer to metadata structure. -+ */ -+struct kbase_kcpu_dma_fence { -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ struct fence base; ++#include "mali_kbase_reset_gpu.h" +#else -+ struct dma_fence base; -+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ -+ struct kbase_kcpu_dma_fence_meta *metadata; -+}; ++#include +#endif + -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+extern const struct fence_ops kbase_fence_ops; -+#else -+extern const struct dma_fence_ops kbase_fence_ops; -+#endif ++/* Helper for ktrace */ ++#if KBASE_KTRACE_ENABLE ++static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) ++{ ++ return atomic_read(&kctx->refcount); ++} ++#else /* KBASE_KTRACE_ENABLE */ ++static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) ++{ ++ CSTD_UNUSED(kctx); ++ return 0; ++} ++#endif /* KBASE_KTRACE_ENABLE */ + -+/** -+ * kbase_fence_out_new() - Creates a new output fence and puts it on the atom -+ * @katom: Atom to create an output fence for -+ * -+ * Return: A new fence object on success, NULL on failure. -+ */ -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+struct fence *kbase_fence_out_new(struct kbase_jd_atom *katom); -+#else -+struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom); -+#endif ++int kbase_ctx_sched_init(struct kbase_device *kbdev) ++{ ++ int as_present = (1U << kbdev->nr_hw_address_spaces) - 1; + -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+/** -+ * kbase_fence_fence_in_set() - Assign input fence to atom -+ * @katom: Atom to assign input fence to -+ * @fence: Input fence to assign to atom -+ * -+ * This function will take ownership of one fence reference! -+ */ -+#define kbase_fence_fence_in_set(katom, fence) \ -+ do { \ -+ WARN_ON((katom)->dma_fence.fence_in); \ -+ (katom)->dma_fence.fence_in = fence; \ -+ } while (0) -+#endif ++ /* These two must be recalculated if nr_hw_address_spaces changes ++ * (e.g. for HW workarounds) ++ */ ++ kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces; ++ kbdev->as_free = as_present; /* All ASs initially free */ + ++ memset(kbdev->as_to_kctx, 0, sizeof(kbdev->as_to_kctx)); + -+#if !MALI_USE_CSF -+/** -+ * kbase_fence_out_remove() - Removes the output fence from atom -+ * @katom: Atom to remove output fence for -+ * -+ * This will also release the reference to this fence which the atom keeps -+ */ -+static inline void kbase_fence_out_remove(struct kbase_jd_atom *katom) -+{ -+ if (katom->dma_fence.fence) { -+ dma_fence_put(katom->dma_fence.fence); -+ katom->dma_fence.fence = NULL; -+ } ++ return 0; +} + -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+/** -+ * kbase_fence_in_remove() - Removes the input fence from atom -+ * @katom: Atom to remove input fence for -+ * -+ * This will also release the reference to this fence which the atom keeps -+ */ -+static inline void kbase_fence_in_remove(struct kbase_jd_atom *katom) ++void kbase_ctx_sched_term(struct kbase_device *kbdev) +{ -+ if (katom->dma_fence.fence_in) { -+ dma_fence_put(katom->dma_fence.fence_in); -+ katom->dma_fence.fence_in = NULL; ++ s8 i; ++ ++ /* Sanity checks */ ++ for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) { ++ WARN_ON(kbdev->as_to_kctx[i] != NULL); ++ WARN_ON(!(kbdev->as_free & (1u << i))); + } +} -+#endif + -+/** -+ * kbase_fence_out_is_ours() - Check if atom has a valid fence created by us -+ * @katom: Atom to check output fence for -+ * -+ * Return: true if fence exists and is valid, otherwise false -+ */ -+static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom) ++void kbase_ctx_sched_init_ctx(struct kbase_context *kctx) +{ -+ return katom->dma_fence.fence && -+ katom->dma_fence.fence->ops == &kbase_fence_ops; ++ kctx->as_nr = KBASEP_AS_NR_INVALID; ++ atomic_set(&kctx->refcount, 0); +} + -+/** -+ * kbase_fence_out_signal() - Signal output fence of atom -+ * @katom: Atom to signal output fence for -+ * @status: Status to signal with (0 for success, < 0 for error) ++/* kbasep_ctx_sched_find_as_for_ctx - Find a free address space + * -+ * Return: 0 on success, < 0 on error ++ * @kbdev: The context for which to find a free address space ++ * ++ * Return: A valid AS if successful, otherwise KBASEP_AS_NR_INVALID ++ * ++ * This function returns an address space available for use. It would prefer ++ * returning an AS that has been previously assigned to the context to ++ * avoid having to reprogram the MMU. + */ -+static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom, -+ int status) ++static int kbasep_ctx_sched_find_as_for_ctx(struct kbase_context *kctx) +{ -+ if (status) { -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ -+ KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) -+ fence_set_error(katom->dma_fence.fence, status); -+#elif (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) -+ dma_fence_set_error(katom->dma_fence.fence, status); -+#else -+ katom->dma_fence.fence->status = status; -+#endif -+ } -+ return dma_fence_signal(katom->dma_fence.fence); ++ struct kbase_device *const kbdev = kctx->kbdev; ++ int free_as; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ /* First check if the previously assigned AS is available */ ++ if ((kctx->as_nr != KBASEP_AS_NR_INVALID) && ++ (kbdev->as_free & (1u << kctx->as_nr))) ++ return kctx->as_nr; ++ ++ /* The previously assigned AS was taken, we'll be returning any free ++ * AS at this point. ++ */ ++ free_as = ffs(kbdev->as_free) - 1; ++ if (free_as >= 0 && free_as < kbdev->nr_hw_address_spaces) ++ return free_as; ++ ++ return KBASEP_AS_NR_INVALID; +} + -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+/** -+ * kbase_fence_in_get() - Retrieve input fence for atom. -+ * @katom: Atom to get input fence from -+ * -+ * A ref will be taken for the fence, so use @kbase_fence_put() to release it -+ * -+ * Return: The fence, or NULL if there is no input fence for atom -+ */ -+#define kbase_fence_in_get(katom) dma_fence_get((katom)->dma_fence.fence_in) -+#endif ++int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx) ++{ ++ struct kbase_device *const kbdev = kctx->kbdev; + -+/** -+ * kbase_fence_out_get() - Retrieve output fence for atom. -+ * @katom: Atom to get output fence from -+ * -+ * A ref will be taken for the fence, so use @kbase_fence_put() to release it -+ * -+ * Return: The fence, or NULL if there is no output fence for atom -+ */ -+#define kbase_fence_out_get(katom) dma_fence_get((katom)->dma_fence.fence) ++ lockdep_assert_held(&kbdev->mmu_hw_mutex); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+#endif /* !MALI_USE_CSF */ ++ WARN_ON(!kbdev->pm.backend.gpu_powered); + -+/** -+ * kbase_fence_get() - Retrieve fence for a KCPUQ fence command. -+ * @fence_info: KCPUQ fence command -+ * -+ * A ref will be taken for the fence, so use @kbase_fence_put() to release it -+ * -+ * Return: The fence, or NULL if there is no fence for KCPUQ fence command -+ */ -+#define kbase_fence_get(fence_info) dma_fence_get((fence_info)->fence) ++ if (atomic_inc_return(&kctx->refcount) == 1) { ++ int const free_as = kbasep_ctx_sched_find_as_for_ctx(kctx); ++ ++ if (free_as >= 0) { ++ kbdev->as_free &= ~(1u << free_as); ++ /* Only program the MMU if the context has not been ++ * assigned the same address space before. ++ */ ++ if (free_as != kctx->as_nr) { ++ struct kbase_context *const prev_kctx = ++ kbdev->as_to_kctx[free_as]; ++ ++ if (prev_kctx) { ++ WARN_ON(atomic_read(&prev_kctx->refcount) != 0); ++ kbase_mmu_disable(prev_kctx); ++ KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( ++ kbdev, prev_kctx->id); ++ prev_kctx->as_nr = KBASEP_AS_NR_INVALID; ++ } ++ kctx->as_nr = free_as; ++ kbdev->as_to_kctx[free_as] = kctx; ++ KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS( ++ kbdev, kctx->id, free_as); ++ kbase_mmu_update(kbdev, &kctx->mmu, ++ kctx->as_nr); ++ } ++ } else { ++ atomic_dec(&kctx->refcount); ++ ++ /* Failed to find an available address space, we must ++ * be returning an error at this point. ++ */ ++ WARN_ON(kctx->as_nr != KBASEP_AS_NR_INVALID); ++ } ++ } + ++ return kctx->as_nr; ++} ++ ++void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx) ++{ ++ struct kbase_device *const kbdev = kctx->kbdev; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); +#if MALI_USE_CSF -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct fence *fence) ++ /* We expect the context to be active when this function is called, ++ * except for the case where a page fault is reported for it during ++ * the GPU reset sequence, in which case we can expect the refcount ++ * to be 0. ++ */ ++ WARN_ON(!atomic_read(&kctx->refcount) && !kbase_reset_gpu_is_active(kbdev)); +#else -+static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct dma_fence *fence) ++ /* We expect the context to be active (and thus refcount should be non-zero) ++ * when this function is called ++ */ ++ WARN_ON(!atomic_read(&kctx->refcount)); +#endif -+{ -+ if (fence->ops == &kbase_fence_ops) -+ return (struct kbase_kcpu_dma_fence *)fence; ++ if (likely((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS))) ++ WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx); ++ else ++ WARN(true, "Invalid as_nr(%d)", kctx->as_nr); + -+ return NULL; ++ atomic_inc(&kctx->refcount); +} + -+static inline void kbase_kcpu_dma_fence_meta_put(struct kbase_kcpu_dma_fence_meta *metadata) ++void kbase_ctx_sched_release_ctx(struct kbase_context *kctx) +{ -+ if (kbase_refcount_dec_and_test(&metadata->refcount)) { -+ atomic_dec(&metadata->kbdev->live_fence_metadata); -+ kfree(metadata); -+ } -+} ++ struct kbase_device *const kbdev = kctx->kbdev; ++ int new_ref_count; + -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+static inline void kbase_kcpu_dma_fence_put(struct fence *fence) -+#else -+static inline void kbase_kcpu_dma_fence_put(struct dma_fence *fence) ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ new_ref_count = atomic_dec_return(&kctx->refcount); ++ if (new_ref_count == 0) { ++ if (likely((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS))) { ++ kbdev->as_free |= (1u << kctx->as_nr); ++ if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) { ++ KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(kbdev, kctx->id); ++ kbdev->as_to_kctx[kctx->as_nr] = NULL; ++ kctx->as_nr = KBASEP_AS_NR_INVALID; ++ kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT); ++#if !MALI_USE_CSF ++ kbase_backend_slot_kctx_purge_locked(kbdev, kctx); +#endif -+{ -+ struct kbase_kcpu_dma_fence *kcpu_fence = kbase_kcpu_dma_fence_get(fence); ++ } ++ } ++ } + -+ if (kcpu_fence) -+ kbase_kcpu_dma_fence_meta_put(kcpu_fence->metadata); ++ KBASE_KTRACE_ADD(kbdev, SCHED_RELEASE_CTX, kctx, new_ref_count); +} -+#endif /* MALI_USE_CSF */ + -+/** -+ * kbase_fence_put() - Releases a reference to a fence -+ * @fence: Fence to release reference for. -+ */ -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+static inline void kbase_fence_put(struct fence *fence) -+#else -+static inline void kbase_fence_put(struct dma_fence *fence) -+#endif ++void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) +{ -+ dma_fence_put(fence); -+} ++ struct kbase_device *const kbdev = kctx->kbdev; ++ unsigned long flags; + -+#endif /* IS_ENABLED(CONFIG_SYNC_FILE) */ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+#endif /* _KBASE_FENCE_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h -new file mode 100644 -index 000000000..7a150bdf2 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h -@@ -0,0 +1,63 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2010-2018, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ WARN_ON(atomic_read(&kctx->refcount) != 0); + -+#ifndef _KBASE_FENCE_DEFS_H_ -+#define _KBASE_FENCE_DEFS_H_ ++ if ((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS)) { ++ if (kbdev->pm.backend.gpu_powered) ++ kbase_mmu_disable(kctx); + -+/* -+ * There was a big rename in the 4.10 kernel (fence* -> dma_fence*) -+ * This file hides the compatibility issues with this for the rest the driver -+ */ ++ KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(kbdev, kctx->id); ++ kbdev->as_to_kctx[kctx->as_nr] = NULL; ++ kctx->as_nr = KBASEP_AS_NR_INVALID; ++ } + -+#include ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++} + -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) ++{ ++ s8 i; + -+#include ++ lockdep_assert_held(&kbdev->mmu_hw_mutex); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+#define dma_fence_context_alloc(a) fence_context_alloc(a) -+#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e) -+#define dma_fence_get(a) fence_get(a) -+#define dma_fence_put(a) fence_put(a) -+#define dma_fence_signal(a) fence_signal(a) -+#define dma_fence_is_signaled(a) fence_is_signaled(a) -+#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c) -+#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b) ++ WARN_ON(!kbdev->pm.backend.gpu_powered); + -+#if (KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) -+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0) -+#else -+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0) ++ for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) { ++ struct kbase_context *kctx; ++ ++ kbdev->as[i].is_unresponsive = false; ++#if MALI_USE_CSF ++ if ((i == MCU_AS_NR) && kbdev->csf.firmware_inited) { ++ kbase_mmu_update(kbdev, &kbdev->csf.mcu_mmu, ++ MCU_AS_NR); ++ continue; ++ } +#endif ++ kctx = kbdev->as_to_kctx[i]; ++ if (kctx) { ++ if (atomic_read(&kctx->refcount)) { ++ WARN_ON(kctx->as_nr != i); + -+#else ++ kbase_mmu_update(kbdev, &kctx->mmu, ++ kctx->as_nr); ++ kbase_ctx_flag_clear(kctx, ++ KCTX_AS_DISABLED_ON_FAULT); ++ } else { ++ /* This context might have been assigned an ++ * AS before, clear it. ++ */ ++ if (kctx->as_nr != KBASEP_AS_NR_INVALID) { ++ KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( ++ kbdev, kctx->id); ++ kbdev->as_to_kctx[kctx->as_nr] = NULL; ++ kctx->as_nr = KBASEP_AS_NR_INVALID; ++ } ++ } ++ } else { ++ kbase_mmu_disable_as(kbdev, i); ++ } ++ } ++} + -+#include ++struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( ++ struct kbase_device *kbdev, size_t as_nr) ++{ ++ unsigned long flags; ++ struct kbase_context *found_kctx = NULL; + -+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) -+#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \ -+ (a)->status ?: 1 \ -+ : 0) -+#endif ++ if (WARN_ON(kbdev == NULL)) ++ return NULL; + -+#endif /* < 4.10.0 */ ++ if (WARN_ON(as_nr >= BASE_MAX_NR_AS)) ++ return NULL; + -+#endif /* _KBASE_FENCE_DEFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c b/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c -new file mode 100644 -index 000000000..25b4c9c03 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c -@@ -0,0 +1,111 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+#include -+#include -+#include -+#include ++ found_kctx = kbdev->as_to_kctx[as_nr]; + -+static const char * -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+kbase_fence_get_driver_name(struct fence *fence) -+#else -+kbase_fence_get_driver_name(struct dma_fence *fence) -+#endif ++ if (!WARN_ON(found_kctx == NULL)) ++ kbase_ctx_sched_retain_ctx_refcount(found_kctx); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ return found_kctx; ++} ++ ++struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, ++ size_t as_nr) +{ -+ return kbase_drv_name; ++ unsigned long flags; ++ struct kbase_context *found_kctx; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ found_kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as_nr); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ return found_kctx; +} + -+static const char * -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+kbase_fence_get_timeline_name(struct fence *fence) -+#else -+kbase_fence_get_timeline_name(struct dma_fence *fence) -+#endif ++struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock( ++ struct kbase_device *kbdev, size_t as_nr) +{ -+#if MALI_USE_CSF -+ struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence; ++ struct kbase_context *found_kctx; + -+ return kcpu_fence->metadata->timeline_name; -+#else -+ return kbase_timeline_name; -+#endif /* MALI_USE_CSF */ ++ if (WARN_ON(kbdev == NULL)) ++ return NULL; ++ ++ if (WARN_ON(as_nr >= BASE_MAX_NR_AS)) ++ return NULL; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ found_kctx = kbdev->as_to_kctx[as_nr]; ++ ++ if (found_kctx) { ++ if (atomic_read(&found_kctx->refcount) <= 0) ++ found_kctx = NULL; ++ } ++ ++ return found_kctx; +} + -+static bool -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+kbase_fence_enable_signaling(struct fence *fence) -+#else -+kbase_fence_enable_signaling(struct dma_fence *fence) -+#endif ++bool kbase_ctx_sched_inc_refcount_nolock(struct kbase_context *kctx) +{ -+ return true; ++ bool result = false; ++ ++ if (WARN_ON(kctx == NULL)) ++ return result; ++ ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ ++ if (atomic_read(&kctx->refcount) > 0) { ++ KBASE_DEBUG_ASSERT(kctx->as_nr >= 0); ++ ++ kbase_ctx_sched_retain_ctx_refcount(kctx); ++ KBASE_KTRACE_ADD(kctx->kbdev, SCHED_RETAIN_CTX_NOLOCK, kctx, ++ kbase_ktrace_get_ctx_refcnt(kctx)); ++ result = true; ++ } ++ ++ return result; +} + -+static void -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+kbase_fence_fence_value_str(struct fence *fence, char *str, int size) -+#else -+kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size) -+#endif ++bool kbase_ctx_sched_inc_refcount(struct kbase_context *kctx) +{ -+#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) -+ const char *format = "%u"; -+#else -+ const char *format = "%llu"; -+#endif -+ if (unlikely(!scnprintf(str, size, format, fence->seqno))) -+ pr_err("Fail to encode fence seqno to string"); ++ unsigned long flags; ++ bool result = false; ++ ++ if (WARN_ON(kctx == NULL)) ++ return result; ++ ++ if (WARN_ON(kctx->kbdev == NULL)) ++ return result; ++ ++ mutex_lock(&kctx->kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); ++ result = kbase_ctx_sched_inc_refcount_nolock(kctx); ++ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kctx->kbdev->mmu_hw_mutex); ++ ++ return result; +} + -+#if MALI_USE_CSF -+static void -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+kbase_fence_release(struct fence *fence) -+#else -+kbase_fence_release(struct dma_fence *fence) -+#endif ++void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx) +{ -+ struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence; ++ unsigned long flags; + -+ kbase_kcpu_dma_fence_meta_put(kcpu_fence->metadata); -+ kfree(kcpu_fence); ++ if (WARN_ON(!kctx)) ++ return; ++ ++ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); ++ ++ if (!WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID) && ++ !WARN_ON(atomic_read(&kctx->refcount) <= 0)) ++ kbase_ctx_sched_release_ctx(kctx); ++ ++ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); +} -+#endif + -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+extern const struct fence_ops kbase_fence_ops; /* silence checker warning */ -+const struct fence_ops kbase_fence_ops = { .wait = fence_default_wait, -+#else -+extern const struct dma_fence_ops kbase_fence_ops; /* silence checker warning */ -+const struct dma_fence_ops kbase_fence_ops = { .wait = dma_fence_default_wait, -+#endif -+ .get_driver_name = kbase_fence_get_driver_name, -+ .get_timeline_name = kbase_fence_get_timeline_name, -+ .enable_signaling = kbase_fence_enable_signaling, +#if MALI_USE_CSF -+ .fence_value_str = kbase_fence_fence_value_str, -+ .release = kbase_fence_release -+#else -+ .fence_value_str = kbase_fence_fence_value_str ++bool kbase_ctx_sched_inc_refcount_if_as_valid(struct kbase_context *kctx) ++{ ++ struct kbase_device *kbdev; ++ bool added_ref = false; ++ unsigned long flags; ++ ++ if (WARN_ON(kctx == NULL)) ++ return added_ref; ++ ++ kbdev = kctx->kbdev; ++ ++ if (WARN_ON(kbdev == NULL)) ++ return added_ref; ++ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ if ((kctx->as_nr != KBASEP_AS_NR_INVALID) && ++ (kctx == kbdev->as_to_kctx[kctx->as_nr])) { ++ atomic_inc(&kctx->refcount); ++ ++ if (kbdev->as_free & (1u << kctx->as_nr)) ++ kbdev->as_free &= ~(1u << kctx->as_nr); ++ ++ KBASE_KTRACE_ADD(kbdev, SCHED_RETAIN_CTX_NOLOCK, kctx, ++ kbase_ktrace_get_ctx_refcnt(kctx)); ++ added_ref = true; ++ } ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ ++ return added_ref; ++} +#endif -+}; -+KBASE_EXPORT_TEST_API(kbase_fence_ops); -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator.h b/drivers/gpu/arm/bifrost/mali_kbase_gator.h +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h new file mode 100644 -index 000000000..dd7df8746 +index 000000000..5a8d17547 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_gator.h -@@ -0,0 +1,52 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h +@@ -0,0 +1,247 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017-2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -215898,47 +218949,242 @@ index 000000000..dd7df8746 + * + */ + -+/* NB taken from gator */ -+/* -+ * List of possible actions to be controlled by DS-5 Streamline. -+ * The following numbers are used by gator to control the frame buffer dumping -+ * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because -+ * they are unknown inside gator. ++#ifndef _KBASE_CTX_SCHED_H_ ++#define _KBASE_CTX_SCHED_H_ ++ ++#include ++ ++/** ++ * DOC: The Context Scheduler manages address space assignment and reference ++ * counting to kbase_context. The interface has been designed to minimise ++ * interactions between the Job Scheduler and Power Management/MMU to support ++ * the existing Job Scheduler interface. ++ * ++ * The initial implementation of the Context Scheduler does not schedule ++ * contexts. Instead it relies on the Job Scheduler to make decisions of ++ * when to schedule/evict contexts if address spaces are starved. In the ++ * future, once an interface between the CS and JS has been devised to ++ * provide enough information about how each context is consuming GPU resources, ++ * those decisions can be made in the CS itself, thereby reducing duplicated ++ * code. + */ + -+#ifndef _KBASE_GATOR_H_ -+#define _KBASE_GATOR_H_ ++/** ++ * kbase_ctx_sched_init() - Initialise the context scheduler ++ * @kbdev: The device for which the context scheduler needs to be initialised ++ * ++ * This must be called during device initialisation. The number of hardware ++ * address spaces must already be established before calling this function. ++ * ++ * Return: 0 for success, otherwise failure ++ */ ++int kbase_ctx_sched_init(struct kbase_device *kbdev); + -+#include ++/** ++ * kbase_ctx_sched_term - Terminate the context scheduler ++ * @kbdev: The device for which the context scheduler needs to be terminated ++ * ++ * This must be called during device termination after all contexts have been ++ * destroyed. ++ */ ++void kbase_ctx_sched_term(struct kbase_device *kbdev); + -+#define GATOR_JOB_SLOT_START 1 -+#define GATOR_JOB_SLOT_STOP 2 -+#define GATOR_JOB_SLOT_SOFT_STOPPED 3 ++/** ++ * kbase_ctx_sched_ctx_init - Initialize per-context data fields for scheduling ++ * @kctx: The context to initialize ++ * ++ * This must be called during context initialization before any other context ++ * scheduling functions are called on @kctx ++ */ ++void kbase_ctx_sched_init_ctx(struct kbase_context *kctx); + -+#ifdef CONFIG_MALI_BIFROST_GATOR_SUPPORT ++/** ++ * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context ++ * @kctx: The context to which to retain a reference ++ * ++ * This function should be called whenever an address space should be assigned ++ * to a context and programmed onto the MMU. It should typically be called ++ * when jobs are ready to be submitted to the GPU. ++ * ++ * It can be called as many times as necessary. The address space will be ++ * assigned to the context for as long as there is a reference to said context. ++ * ++ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be ++ * held whilst calling this function. ++ * ++ * Return: The address space that the context has been assigned to or ++ * KBASEP_AS_NR_INVALID if no address space was available. ++ */ ++int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx); + -+#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16)) ++/** ++ * kbase_ctx_sched_retain_ctx_refcount - Retain a reference to the @ref kbase_context ++ * @kctx: The context to which to retain a reference ++ * ++ * This function only retains a reference to the context. It must be called ++ * only when the context already has a reference. ++ * ++ * This is typically called inside an atomic session where we know the context ++ * is already scheduled in but want to take an extra reference to ensure that ++ * it doesn't get descheduled. ++ * ++ * The kbase_device::hwaccess_lock must be held whilst calling this function ++ */ ++void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx); + -+struct kbase_context; ++/** ++ * kbase_ctx_sched_release_ctx - Release a reference to the @ref kbase_context ++ * @kctx: The context from which to release a reference ++ * ++ * This function should be called whenever an address space could be unassigned ++ * from a context. When there are no more references to said context, the ++ * address space previously assigned to this context shall be reassigned to ++ * other contexts as needed. ++ * ++ * The kbase_device::hwaccess_lock must be held whilst calling this function ++ */ ++void kbase_ctx_sched_release_ctx(struct kbase_context *kctx); + -+void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, u8 atom_id); -+void kbase_trace_mali_pm_status(u32 dev_id, u32 event, u64 value); -+void kbase_trace_mali_page_fault_insert_pages(u32 dev_id, int event, u32 value); -+void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long event); ++/** ++ * kbase_ctx_sched_remove_ctx - Unassign previously assigned address space ++ * @kctx: The context to be removed ++ * ++ * This function should be called when a context is being destroyed. The ++ * context must no longer have any reference. If it has been assigned an ++ * address space before then the AS will be unprogrammed. ++ */ ++void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx); + -+#endif /* CONFIG_MALI_BIFROST_GATOR_SUPPORT */ ++/** ++ * kbase_ctx_sched_restore_all_as - Reprogram all address spaces ++ * @kbdev: The device for which address spaces to be reprogrammed ++ * ++ * This function shall reprogram all address spaces previously assigned to ++ * contexts. It can be used after the GPU is reset. ++ * ++ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be ++ * held whilst calling this function. ++ */ ++void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev); + -+#endif /* _KBASE_GATOR_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c ++/** ++ * kbase_ctx_sched_as_to_ctx_refcount - Lookup a context based on its current ++ * address space and ensure that is stays scheduled in ++ * @kbdev: The device for which the returned context must belong ++ * @as_nr: address space assigned to the context of interest ++ * ++ * The context is refcounted as being busy to prevent it from scheduling ++ * out. It must be released with kbase_ctx_sched_release_ctx() when it is no ++ * longer required to stay scheduled in. ++ * ++ * This function can safely be called from IRQ context. ++ * ++ * The following locking conditions are made on the caller: ++ * * it must not hold the kbase_device::hwaccess_lock, because it will be used ++ * internally. ++ * ++ * Return: a valid struct kbase_context on success, which has been refcounted ++ * as being busy or return NULL on failure, indicating that no context was found ++ * in as_nr. ++ */ ++struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( ++ struct kbase_device *kbdev, size_t as_nr); ++ ++/** ++ * kbase_ctx_sched_as_to_ctx - Lookup a context based on its current address ++ * space ++ * @kbdev: The device for which the returned context must belong ++ * @as_nr: address space assigned to the context of interest ++ * ++ * Return: a valid struct kbase_context on success or NULL on failure, ++ * indicating that no context was found in as_nr. ++ */ ++struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, ++ size_t as_nr); ++ ++/** ++ * kbase_ctx_sched_as_to_ctx_nolock - Lookup a context based on its current ++ * address space. ++ * @kbdev: The device for which the returned context must belong ++ * @as_nr: address space assigned to the context of interest ++ * ++ * The following lock must be held by the caller: ++ * * kbase_device::hwaccess_lock ++ * ++ * Return: a valid struct kbase_context on success or NULL on failure, ++ * indicating that no context was found in as_nr. ++ */ ++struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock( ++ struct kbase_device *kbdev, size_t as_nr); ++ ++/** ++ * kbase_ctx_sched_inc_refcount_nolock - Refcount a context as being busy, ++ * preventing it from being scheduled out. ++ * @kctx: Context to be refcounted ++ * ++ * The following locks must be held by the caller: ++ * &kbase_device.mmu_hw_mutex ++ * &kbase_device.hwaccess_lock ++ * ++ * Return: true if refcount succeeded, and the context will not be scheduled ++ * out, false if the refcount failed (because the context is being/has been ++ * scheduled out). ++ */ ++bool kbase_ctx_sched_inc_refcount_nolock(struct kbase_context *kctx); ++ ++/** ++ * kbase_ctx_sched_inc_refcount - Refcount a context as being busy, preventing ++ * it from being scheduled out. ++ * @kctx: Context to be refcounted ++ * ++ * The following locking conditions are made on the caller: ++ * * it must not hold kbase_device::mmu_hw_mutex and ++ * kbase_device::hwaccess_lock, because they will be used internally. ++ * ++ * Return: true if refcount succeeded, and the context will not be scheduled ++ * out, false if the refcount failed (because the context is being/has been ++ * scheduled out). ++ */ ++bool kbase_ctx_sched_inc_refcount(struct kbase_context *kctx); ++ ++/** ++ * kbase_ctx_sched_release_ctx_lock - Release a reference count of a context ++ * @kctx: Context for which refcount should be decreased ++ * ++ * Effectivelly, this is a wrapper for kbase_ctx_sched_release_ctx, but ++ * kbase_device::hwaccess_lock is required NOT to be locked. ++ */ ++void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx); ++ ++#if MALI_USE_CSF ++/** ++ * kbase_ctx_sched_inc_refcount_if_as_valid - Refcount the context if it has GPU ++ * address space slot assigned to it. ++ * ++ * @kctx: Context to be refcounted ++ * ++ * This function takes a reference on the context if it has a GPU address space ++ * slot assigned to it. The address space slot will not be available for ++ * re-assignment until the reference is released. ++ * ++ * Return: true if refcount succeeded and the address space slot will not be ++ * reassigned, false if the refcount failed (because the address space slot ++ * was not assigned). ++ */ ++bool kbase_ctx_sched_inc_refcount_if_as_valid(struct kbase_context *kctx); ++#endif ++ ++#endif /* _KBASE_CTX_SCHED_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug.c b/drivers/gpu/arm/bifrost/mali_kbase_debug.c new file mode 100644 -index 000000000..bf5f259a0 +index 000000000..6d3b1093b --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c -@@ -0,0 +1,112 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_debug.c +@@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2012-2017, 2019-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2014, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -215957,106 +219203,35 @@ index 000000000..bf5f259a0 + */ + +#include -+#include + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+/** -+ * kbasep_gpu_memory_seq_show - Show callback for the @c gpu_memory debugfs file -+ * @sfile: The debugfs entry -+ * @data: Data associated with the entry -+ * -+ * This function is called to get the contents of the @c gpu_memory debugfs -+ * file. This is a report of current gpu memory usage. -+ * -+ * Return: -+ * * 0 if successfully prints data in debugfs entry file -+ * * -1 if it encountered an error -+ */ ++static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = { ++ NULL, ++ NULL ++}; + -+static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) ++void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param) +{ -+ struct list_head *entry; -+ const struct list_head *kbdev_list; -+ -+ kbdev_list = kbase_device_get_list(); -+ list_for_each(entry, kbdev_list) { -+ struct kbase_device *kbdev = NULL; -+ struct kbase_context *kctx; -+ -+ kbdev = list_entry(entry, struct kbase_device, entry); -+ /* output the total memory usage and cap for this device */ -+ seq_printf(sfile, " \n"); -+ seq_printf(sfile, "%-16s %10u\n", -+ kbdev->devname, -+ atomic_read(&(kbdev->memdev.used_pages))); -+ mutex_lock(&kbdev->kctx_list_lock); -+ seq_printf(sfile, " \n"); -+ list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { -+ struct pid *pid_struct; -+ struct task_struct *task; -+ -+ rcu_read_lock(); -+ pid_struct = find_get_pid(kctx->tgid); -+ task = pid_task(pid_struct, PIDTYPE_PID); -+ /* output the memory usage and cap for each kctx -+ * opened on this device -+ */ -+ seq_printf(sfile, " %s-0x%pK %-20s %-10d %10u\n", -+ "kctx", -+ kctx, -+ task ? task->comm : "[null comm]", -+ kctx->tgid, -+ atomic_read(&(kctx->used_pages))); -+ put_pid(pid_struct); -+ rcu_read_unlock(); -+ } -+ mutex_unlock(&kbdev->kctx_list_lock); -+ } -+ kbase_device_put_list(kbdev_list); -+ return 0; ++ kbasep_debug_assert_registered_cb.func = func; ++ kbasep_debug_assert_registered_cb.param = param; +} + -+/* -+ * File operations related to debugfs entry for gpu_memory -+ */ -+static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file) ++void kbasep_debug_assert_call_hook(void) +{ -+ return single_open(file, kbasep_gpu_memory_seq_show, NULL); ++ if (kbasep_debug_assert_registered_cb.func != NULL) ++ kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param); +} ++KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook); + -+static const struct file_operations kbasep_gpu_memory_debugfs_fops = { -+ .owner = THIS_MODULE, -+ .open = kbasep_gpu_memory_debugfs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; -+ -+/* -+ * Initialize debugfs entry for gpu_memory -+ */ -+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) -+{ -+ debugfs_create_file("gpu_memory", 0444, -+ kbdev->mali_debugfs_directory, NULL, -+ &kbasep_gpu_memory_debugfs_fops); -+} -+#else -+/* -+ * Stub functions for when debugfs is disabled -+ */ -+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) {} -+#endif -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.h +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug.h b/drivers/gpu/arm/bifrost/mali_kbase_debug.h new file mode 100644 -index 000000000..6d5423f37 +index 000000000..9d6ff73aa --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.h -@@ -0,0 +1,50 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_debug.h +@@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2012-2014, 2016, 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2015, 2017, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -216074,45 +219249,161 @@ index 000000000..6d5423f37 + * + */ + ++#ifndef _KBASE_DEBUG_H ++#define _KBASE_DEBUG_H ++ ++#include ++ ++/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */ ++#define KBASE_DEBUG_SKIP_TRACE 0 ++ ++/** @brief If different from 0, the trace will only contain the file and line. */ ++#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0 ++ ++/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */ ++#ifndef KBASE_DEBUG_DISABLE_ASSERTS ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++#define KBASE_DEBUG_DISABLE_ASSERTS 0 ++#else ++#define KBASE_DEBUG_DISABLE_ASSERTS 1 ++#endif ++#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ ++ ++/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */ ++typedef void kbase_debug_assert_hook(void *); ++ ++struct kbasep_debug_assert_cb { ++ kbase_debug_assert_hook *func; ++ void *param; ++}; ++ ++/** ++ * KBASEP_DEBUG_PRINT_TRACE - Private macro containing the format of the trace ++ * to display before every message @sa KBASE_DEBUG_SKIP_TRACE, ++ * KBASE_DEBUG_SKIP_FUNCTION_NAME ++ */ ++#if !KBASE_DEBUG_SKIP_TRACE ++#define KBASEP_DEBUG_PRINT_TRACE \ ++ "In file: " __FILE__ " line: " CSTD_STR2(__LINE__) ++#if !KBASE_DEBUG_SKIP_FUNCTION_NAME ++#define KBASEP_DEBUG_PRINT_FUNCTION __func__ ++#else ++#define KBASEP_DEBUG_PRINT_FUNCTION "" ++#endif ++#else ++#define KBASEP_DEBUG_PRINT_TRACE "" ++#endif ++ +/** -+ * DOC: Header file for gpu_memory entry in debugfs ++ * KBASEP_DEBUG_ASSERT_OUT() - (Private) system printing ++ * function associated to the @ref KBASE_DEBUG_ASSERT_MSG event. ++ * @trace: location in the code from where the message is printed ++ * @function: function from where the message is printed ++ * @...: Format string followed by format arguments. + * ++ * @note function parameter cannot be concatenated with other strings + */ ++/* Select the correct system output function*/ ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) \ ++ do { \ ++ pr_err("Mali: %s function:%s ", trace, function); \ ++ pr_err(__VA_ARGS__); \ ++ pr_err("\n"); \ ++ } while (false) ++#else ++#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP() ++#endif + -+#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H -+#define _KBASE_GPU_MEMORY_DEBUGFS_H ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook() ++#else ++#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP() ++#endif + -+#include -+#include ++/** ++ * KBASE_DEBUG_ASSERT(expr) - Calls @ref KBASE_PRINT_ASSERT and prints the ++ * expression @a expr if @a expr is false ++ * @expr: Boolean expression ++ * ++ * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1 ++ * ++ */ ++#define KBASE_DEBUG_ASSERT(expr) \ ++ KBASE_DEBUG_ASSERT_MSG(expr, #expr) + -+/* kbase_io_history_add - add new entry to the register access history ++#if KBASE_DEBUG_DISABLE_ASSERTS ++#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP() ++#else ++/** ++ * KBASE_DEBUG_ASSERT_MSG() - Calls @ref KBASEP_DEBUG_ASSERT_OUT and prints the ++ * given message if @a expr is false ++ * @expr: Boolean expression ++ * @...: Message to display when @a expr is false, as a format string followed ++ * by format arguments. + * -+ * @h: Pointer to the history data structure -+ * @addr: Register address -+ * @value: The value that is either read from or written to the register -+ * @write: 1 if it's a register write, 0 if it's a read ++ * This macro does nothing if the flag KBASE_DEBUG_DISABLE_ASSERTS is set to 1 + */ -+void kbase_io_history_add(struct kbase_io_history *h, void __iomem const *addr, -+ u32 value, u8 write); ++#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \ ++ do { \ ++ if (!(expr)) { \ ++ KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\ ++ KBASE_CALL_ASSERT_HOOK();\ ++ BUG();\ ++ } \ ++ } while (false) ++#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ + +/** -+ * kbasep_gpu_memory_debugfs_init - Initialize gpu_memory debugfs entry ++ * KBASE_DEBUG_CODE() - Executes the code inside the macro only in debug mode ++ * @X: Code to compile only in debug mode. ++ */ ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++#define KBASE_DEBUG_CODE(X) X ++#else ++#define KBASE_DEBUG_CODE(X) CSTD_NOP() ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++ ++/** @} */ ++ ++/** ++ * kbase_debug_assert_register_hook - Register a function to call on ASSERT ++ * @func: the function to call when an assert is triggered. ++ * @param: the parameter to pass to \a func when calling it ++ * ++ * Such functions will \b only be called during Debug mode, and for debugging ++ * features \b only. Do not rely on them to be called in general use. ++ * ++ * To disable the hook, supply NULL to \a func. ++ * ++ * @note This function is not thread-safe, and should only be used to ++ * register/deregister once in the module's lifetime. + * -+ * @kbdev: Device pointer + */ -+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev); ++void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param); + -+#endif /*_KBASE_GPU_MEMORY_DEBUGFS_H*/ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c ++/** ++ * kbasep_debug_assert_call_hook - Call a debug assert hook previously ++ * registered with kbase_debug_assert_register_hook() ++ * ++ * @note This function is not thread-safe with respect to multiple threads ++ * registering functions and parameters with ++ * kbase_debug_assert_register_hook(). Otherwise, thread safety is the ++ * responsibility of the registered hook. ++ */ ++void kbasep_debug_assert_call_hook(void); ++ ++#endif /* _KBASE_DEBUG_H */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.c new file mode 100644 -index 000000000..7a7d17ea5 +index 000000000..d6518b476 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c -@@ -0,0 +1,888 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.c +@@ -0,0 +1,573 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2016, 2018-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -216130,883 +219421,568 @@ index 000000000..7a7d17ea5 + * + */ + -+/* -+ * Base kernel property query APIs -+ */ -+ +#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++#include ++#include + ++#if IS_ENABLED(CONFIG_DEBUG_FS) + -+static void kbase_gpuprops_construct_coherent_groups( -+ struct base_gpu_props * const props) ++static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev) +{ -+ struct mali_base_gpu_coherent_group *current_group; -+ u64 group_present; -+ u64 group_mask; -+ u64 first_set, first_set_prev; -+ u32 num_groups = 0; -+ -+ KBASE_DEBUG_ASSERT(props != NULL); ++ struct list_head *event_list = &kbdev->job_fault_event_list; ++ unsigned long flags; ++ bool ret; + -+ props->coherency_info.coherency = props->raw_props.mem_features; -+ props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present); ++ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); ++ ret = !list_empty(event_list); ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + -+ if (props->coherency_info.coherency & GROUPS_L2_COHERENT) { -+ /* Group is l2 coherent */ -+ group_present = props->raw_props.l2_present; -+ } else { -+ /* Group is l1 coherent */ -+ group_present = props->raw_props.shader_present; -+ } ++ return ret; ++} + -+ /* -+ * The coherent group mask can be computed from the l2 present -+ * register. -+ * -+ * For the coherent group n: -+ * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1) -+ * where first_set is group_present with only its nth set-bit kept -+ * (i.e. the position from where a new group starts). -+ * -+ * For instance if the groups are l2 coherent and l2_present=0x0..01111: -+ * The first mask is: -+ * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1) -+ * = (0x0..010 - 1) & ~(0x0..01 - 1) -+ * = 0x0..00f -+ * The second mask is: -+ * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1) -+ * = (0x0..100 - 1) & ~(0x0..010 - 1) -+ * = 0x0..0f0 -+ * And so on until all the bits from group_present have been cleared -+ * (i.e. there is no group left). -+ */ ++static void kbase_ctx_remove_pending_event(struct kbase_context *kctx) ++{ ++ struct list_head *event_list = &kctx->kbdev->job_fault_event_list; ++ struct base_job_fault_event *event; ++ unsigned long flags; + -+ current_group = props->coherency_info.group; -+ first_set = group_present & ~(group_present - 1); ++ spin_lock_irqsave(&kctx->kbdev->job_fault_event_lock, flags); ++ list_for_each_entry(event, event_list, head) { ++ if (event->katom->kctx == kctx) { ++ list_del(&event->head); ++ spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags); + -+ while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) { -+ group_present -= first_set; /* Clear the current group bit */ -+ first_set_prev = first_set; ++ wake_up(&kctx->kbdev->job_fault_resume_wq); ++ flush_work(&event->job_fault_work); + -+ first_set = group_present & ~(group_present - 1); -+ group_mask = (first_set - 1) & ~(first_set_prev - 1); -+ -+ /* Populate the coherent_group structure for each group */ -+ current_group->core_mask = group_mask & props->raw_props.shader_present; -+ current_group->num_cores = hweight64(current_group->core_mask); -+ -+ num_groups++; -+ current_group++; ++ /* job_fault_event_list can only have a single atom for ++ * each context. ++ */ ++ return; ++ } + } -+ -+ if (group_present != 0) -+ pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS); -+ -+ props->coherency_info.num_groups = num_groups; ++ spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags); +} + -+/** -+ * kbase_gpuprops_get_curr_config_props - Get the current allocated resources -+ * @kbdev: The &struct kbase_device structure for the device -+ * @curr_config: The &struct curr_config_props structure to receive the result -+ * -+ * Fill the &struct curr_config_props structure with values from the GPU -+ * configuration registers. -+ * -+ * Return: Zero on success, Linux error code on failure -+ */ -+int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev, -+ struct curr_config_props * const curr_config) ++static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx) +{ -+ struct kbase_current_config_regdump curr_config_regdump; -+ int err; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct list_head *event_list = &kctx->kbdev->job_fault_event_list; ++ struct base_job_fault_event *event; ++ unsigned long flags; + -+ if (WARN_ON(!kbdev) || WARN_ON(!curr_config)) -+ return -EINVAL; ++ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); ++ if (list_empty(event_list)) { ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); ++ return true; ++ } ++ list_for_each_entry(event, event_list, head) { ++ if (event->katom->kctx == kctx) { ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, ++ flags); ++ return false; ++ } ++ } ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); ++ return true; ++} + -+ /* If update not needed just return. */ -+ if (!curr_config->update_needed) ++static int wait_for_job_fault(struct kbase_device *kbdev) ++{ ++#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE ++ int ret = wait_event_interruptible_timeout(kbdev->job_fault_wq, ++ kbase_is_job_fault_event_pending(kbdev), ++ msecs_to_jiffies(2000)); ++ if (ret == 0) ++ return -EAGAIN; ++ else if (ret > 0) + return 0; ++ else ++ return ret; ++#else ++ return wait_event_interruptible(kbdev->job_fault_wq, ++ kbase_is_job_fault_event_pending(kbdev)); ++#endif ++} + -+ /* Dump relevant registers */ -+ err = kbase_backend_gpuprops_get_curr_config(kbdev, -+ &curr_config_regdump); -+ if (err) -+ return err; ++/* wait until the fault happen and copy the event */ ++static int kbase_job_fault_event_wait(struct kbase_device *kbdev, ++ struct base_job_fault_event *event) ++{ ++ struct list_head *event_list = &kbdev->job_fault_event_list; ++ struct base_job_fault_event *event_in; ++ unsigned long flags; + -+ curr_config->l2_slices = -+ KBASE_UBFX32(curr_config_regdump.mem_features, 8U, 4) + 1; ++ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); ++ while (list_empty(event_list)) { ++ int err; + -+ curr_config->l2_present = -+ ((u64) curr_config_regdump.l2_present_hi << 32) + -+ curr_config_regdump.l2_present_lo; ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + -+ curr_config->shader_present = -+ ((u64) curr_config_regdump.shader_present_hi << 32) + -+ curr_config_regdump.shader_present_lo; ++ err = wait_for_job_fault(kbdev); ++ if (err) ++ return err; + -+ curr_config->num_cores = hweight64(curr_config->shader_present); ++ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); ++ } + -+ curr_config->update_needed = false; ++ event_in = list_entry(event_list->next, ++ struct base_job_fault_event, head); ++ event->event_code = event_in->event_code; ++ event->katom = event_in->katom; ++ ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + + return 0; ++ +} + -+/** -+ * kbase_gpuprops_req_curr_config_update - Request Current Config Update -+ * @kbdev: The &struct kbase_device structure for the device -+ * -+ * Requests the current configuration to be updated next time the -+ * kbase_gpuprops_get_curr_config_props() is called. -+ * -+ * Return: Zero on success, Linux error code on failure -+ */ -+int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev) ++/* remove the event from the queue */ ++static struct base_job_fault_event *kbase_job_fault_event_dequeue( ++ struct kbase_device *kbdev, struct list_head *event_list) +{ -+ if (WARN_ON(!kbdev)) -+ return -EINVAL; ++ struct base_job_fault_event *event; ++ ++ event = list_entry(event_list->next, ++ struct base_job_fault_event, head); ++ list_del(event_list->next); ++ ++ return event; + -+ kbdev->gpu_props.curr_config.update_needed = true; -+ return 0; +} + -+/** -+ * kbase_gpuprops_get_props - Get the GPU configuration -+ * @gpu_props: The &struct base_gpu_props structure -+ * @kbdev: The &struct kbase_device structure for the device -+ * -+ * Fill the &struct base_gpu_props structure with values from the GPU -+ * configuration registers. Only the raw properties are filled in this function. -+ * -+ * Return: Zero on success, Linux error code on failure ++/* Remove all the following atoms after the failed atom in the same context ++ * Call the postponed bottom half of job done. ++ * Then, this context could be rescheduled. + */ -+static int kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props, -+ struct kbase_device *kbdev) ++static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx) +{ -+ struct kbase_gpuprops_regdump regdump; -+ int i; -+ int err; ++ struct list_head *event_list = &kctx->job_fault_resume_event_list; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(gpu_props != NULL); ++ while (!list_empty(event_list)) { ++ struct base_job_fault_event *event; + -+ /* Dump relevant registers */ -+ err = kbase_backend_gpuprops_get(kbdev, ®dump); -+ if (err) -+ return err; ++ event = kbase_job_fault_event_dequeue(kctx->kbdev, ++ &kctx->job_fault_resume_event_list); ++ kbase_jd_done_worker(&event->katom->work); ++ } + -+ gpu_props->raw_props.gpu_id = regdump.gpu_id; -+ gpu_props->raw_props.tiler_features = regdump.tiler_features; -+ gpu_props->raw_props.mem_features = regdump.mem_features; -+ gpu_props->raw_props.mmu_features = regdump.mmu_features; -+ gpu_props->raw_props.l2_features = regdump.l2_features; ++} + -+ gpu_props->raw_props.as_present = regdump.as_present; -+ gpu_props->raw_props.js_present = regdump.js_present; -+ gpu_props->raw_props.shader_present = -+ ((u64) regdump.shader_present_hi << 32) + -+ regdump.shader_present_lo; -+ gpu_props->raw_props.tiler_present = -+ ((u64) regdump.tiler_present_hi << 32) + -+ regdump.tiler_present_lo; -+ gpu_props->raw_props.l2_present = -+ ((u64) regdump.l2_present_hi << 32) + -+ regdump.l2_present_lo; -+ gpu_props->raw_props.stack_present = -+ ((u64) regdump.stack_present_hi << 32) + -+ regdump.stack_present_lo; ++static void kbase_job_fault_resume_worker(struct work_struct *data) ++{ ++ struct base_job_fault_event *event = container_of(data, ++ struct base_job_fault_event, job_fault_work); ++ struct kbase_context *kctx; ++ struct kbase_jd_atom *katom; + -+ for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) -+ gpu_props->raw_props.js_features[i] = regdump.js_features[i]; ++ katom = event->katom; ++ kctx = katom->kctx; + -+ for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) -+ gpu_props->raw_props.texture_features[i] = regdump.texture_features[i]; ++ dev_info(kctx->kbdev->dev, "Job dumping wait\n"); + -+ gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size; -+ gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads; -+ gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size; -+ gpu_props->raw_props.thread_features = regdump.thread_features; -+ gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc; ++ /* When it was waked up, it need to check if queue is empty or the ++ * failed atom belongs to different context. If yes, wake up. Both ++ * of them mean the failed job has been dumped. Please note, it ++ * should never happen that the job_fault_event_list has the two ++ * atoms belong to the same context. ++ */ ++ wait_event(kctx->kbdev->job_fault_resume_wq, ++ kbase_ctx_has_no_event_pending(kctx)); + -+ gpu_props->raw_props.gpu_features = -+ ((u64) regdump.gpu_features_hi << 32) + -+ regdump.gpu_features_lo; ++ atomic_set(&kctx->job_fault_count, 0); ++ kbase_jd_done_worker(&katom->work); + -+ return 0; -+} ++ /* In case the following atoms were scheduled during failed job dump ++ * the job_done_worker was held. We need to rerun it after the dump ++ * was finished ++ */ ++ kbase_job_fault_resume_event_cleanup(kctx); + -+void kbase_gpuprops_update_core_props_gpu_id( -+ struct base_gpu_props * const gpu_props) -+{ -+ gpu_props->core_props.version_status = -+ KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4); -+ gpu_props->core_props.minor_revision = -+ KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8); -+ gpu_props->core_props.major_revision = -+ KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4); -+ gpu_props->core_props.product_id = -+ KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16); ++ dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n"); +} + -+/** -+ * kbase_gpuprops_update_max_config_props - Updates the max config properties in -+ * the base_gpu_props. -+ * @base_props: The &struct base_gpu_props structure -+ * @kbdev: The &struct kbase_device structure for the device -+ * -+ * Updates the &struct base_gpu_props structure with the max config properties. -+ */ -+static void kbase_gpuprops_update_max_config_props( -+ struct base_gpu_props * const base_props, struct kbase_device *kbdev) ++static struct base_job_fault_event *kbase_job_fault_event_queue( ++ struct list_head *event_list, ++ struct kbase_jd_atom *atom, ++ u32 completion_code) +{ -+ int l2_n = 0; ++ struct base_job_fault_event *event; + -+ if (WARN_ON(!kbdev) || WARN_ON(!base_props)) -+ return; ++ event = &atom->fault_event; + -+ /* return if the max_config is not set during arbif initialization */ -+ if (kbdev->gpu_props.max_config.core_mask == 0) -+ return; ++ event->katom = atom; ++ event->event_code = completion_code; ++ ++ list_add_tail(&event->head, event_list); ++ ++ return event; + -+ /* -+ * Set the base_props with the maximum config values to ensure that the -+ * user space will always be based on the maximum resources available. -+ */ -+ base_props->l2_props.num_l2_slices = -+ kbdev->gpu_props.max_config.l2_slices; -+ base_props->raw_props.shader_present = -+ kbdev->gpu_props.max_config.core_mask; -+ /* -+ * Update l2_present in the raw data to be consistent with the -+ * max_config.l2_slices number. -+ */ -+ base_props->raw_props.l2_present = 0; -+ for (l2_n = 0; l2_n < base_props->l2_props.num_l2_slices; l2_n++) { -+ base_props->raw_props.l2_present <<= 1; -+ base_props->raw_props.l2_present |= 0x1; -+ } -+ /* -+ * Update the coherency_info data using just one core group. For -+ * architectures where the max_config is provided by the arbiter it is -+ * not necessary to split the shader core groups in different coherent -+ * groups. -+ */ -+ base_props->coherency_info.coherency = -+ base_props->raw_props.mem_features; -+ base_props->coherency_info.num_core_groups = 1; -+ base_props->coherency_info.num_groups = 1; -+ base_props->coherency_info.group[0].core_mask = -+ kbdev->gpu_props.max_config.core_mask; -+ base_props->coherency_info.group[0].num_cores = -+ hweight32(kbdev->gpu_props.max_config.core_mask); +} + -+/** -+ * kbase_gpuprops_calculate_props - Calculate the derived properties -+ * @gpu_props: The &struct base_gpu_props structure -+ * @kbdev: The &struct kbase_device structure for the device -+ * -+ * Fill the &struct base_gpu_props structure with values derived from the GPU -+ * configuration registers -+ */ -+static void kbase_gpuprops_calculate_props( -+ struct base_gpu_props * const gpu_props, struct kbase_device *kbdev) ++static void kbase_job_fault_event_post(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom, u32 completion_code) +{ -+ int i; ++ struct base_job_fault_event *event; ++ unsigned long flags; + -+ /* Populate the base_gpu_props structure */ -+ kbase_gpuprops_update_core_props_gpu_id(gpu_props); -+ gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2; -+#if KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE -+ gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT; -+#else -+ gpu_props->core_props.gpu_available_memory_size = -+ totalram_pages() << PAGE_SHIFT; -+#endif ++ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); ++ event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list, ++ katom, completion_code); ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + -+ for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) -+ gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i]; ++ wake_up_interruptible(&kbdev->job_fault_wq); + -+ gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8); -+ gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8); ++ INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker); ++ queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work); + -+ /* Field with number of l2 slices is added to MEM_FEATURES register -+ * since t76x. Below code assumes that for older GPU reserved bits will -+ * be read as zero. -+ */ -+ gpu_props->l2_props.num_l2_slices = -+ KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1; ++ dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d", ++ katom->kctx->tgid, katom->kctx->id); + -+ gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6); -+ gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4); ++} + -+ if (gpu_props->raw_props.thread_max_threads == 0) -+ gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT; -+ else -+ gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads; ++/* ++ * This function will process the job fault ++ * Get the register copy ++ * Send the failed job dump event ++ * Create a Wait queue to wait until the job dump finish ++ */ + -+ if (gpu_props->raw_props.thread_max_workgroup_size == 0) -+ gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT; -+ else -+ gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size; ++bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, ++ u32 completion_code) ++{ ++ struct kbase_context *kctx = katom->kctx; + -+ if (gpu_props->raw_props.thread_max_barrier_size == 0) -+ gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT; -+ else -+ gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size; ++ /* Check if dumping is in the process ++ * only one atom of each context can be dumped at the same time ++ * If the atom belongs to different context, it can be dumped ++ */ ++ if (atomic_read(&kctx->job_fault_count) > 0) { ++ kbase_job_fault_event_queue( ++ &kctx->job_fault_resume_event_list, ++ katom, completion_code); ++ dev_info(kctx->kbdev->dev, "queue:%d\n", ++ kbase_jd_atom_id(kctx, katom)); ++ return true; ++ } + -+ if (gpu_props->raw_props.thread_tls_alloc == 0) -+ gpu_props->thread_props.tls_alloc = -+ gpu_props->thread_props.max_threads; -+ else -+ gpu_props->thread_props.tls_alloc = -+ gpu_props->raw_props.thread_tls_alloc; ++ if (kbase_ctx_flag(kctx, KCTX_DYING)) ++ return false; + -+#if MALI_USE_CSF -+ gpu_props->thread_props.max_registers = -+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 22); -+ gpu_props->thread_props.impl_tech = -+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 22U, 2); -+ gpu_props->thread_props.max_task_queue = -+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 8); -+ gpu_props->thread_props.max_thread_group_split = 0; -+#else -+ gpu_props->thread_props.max_registers = -+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16); -+ gpu_props->thread_props.max_task_queue = -+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8); -+ gpu_props->thread_props.max_thread_group_split = -+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6); -+ gpu_props->thread_props.impl_tech = -+ KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2); -+#endif ++ if (atomic_read(&kctx->kbdev->job_fault_debug) > 0) { + -+ /* If values are not specified, then use defaults */ -+ if (gpu_props->thread_props.max_registers == 0) { -+ gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT; -+ gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT; -+ gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT; -+ } ++ if (completion_code != BASE_JD_EVENT_DONE) { + -+ /* -+ * If the maximum resources allocated information is available it is -+ * necessary to update the base_gpu_props with the max_config info to -+ * the userspace. This is applicable to systems that receive this -+ * information from the arbiter. -+ */ -+ if (kbdev->gpu_props.max_config.core_mask) -+ /* Update the max config properties in the base_gpu_props */ -+ kbase_gpuprops_update_max_config_props(gpu_props, -+ kbdev); -+ else -+ /* Initialize the coherent_group structure for each group */ -+ kbase_gpuprops_construct_coherent_groups(gpu_props); -+} ++ if (kbase_job_fault_get_reg_snapshot(kctx) == false) { ++ dev_warn(kctx->kbdev->dev, "get reg dump failed\n"); ++ return false; ++ } + -+void kbase_gpuprops_set_max_config(struct kbase_device *kbdev, -+ const struct max_config_props *max_config) -+{ -+ if (WARN_ON(!kbdev) || WARN_ON(!max_config)) -+ return; ++ kbase_job_fault_event_post(kctx->kbdev, katom, ++ completion_code); ++ atomic_inc(&kctx->job_fault_count); ++ dev_info(kctx->kbdev->dev, "post:%d\n", ++ kbase_jd_atom_id(kctx, katom)); ++ return true; ++ ++ } ++ } ++ return false; + -+ kbdev->gpu_props.max_config.l2_slices = max_config->l2_slices; -+ kbdev->gpu_props.max_config.core_mask = max_config->core_mask; +} + -+void kbase_gpuprops_set(struct kbase_device *kbdev) ++static int debug_job_fault_show(struct seq_file *m, void *v) +{ -+ struct kbase_gpu_props *gpu_props; -+ struct gpu_raw_gpu_props *raw; ++ struct kbase_device *kbdev = m->private; ++ struct base_job_fault_event *event = (struct base_job_fault_event *)v; ++ struct kbase_context *kctx = event->katom->kctx; ++ int i; + -+ if (WARN_ON(!kbdev)) -+ return; -+ gpu_props = &kbdev->gpu_props; -+ raw = &gpu_props->props.raw_props; ++ dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d", ++ kctx->tgid, kctx->id, event->reg_offset); + -+ /* Initialize the base_gpu_props structure from the hardware */ -+ kbase_gpuprops_get_props(&gpu_props->props, kbdev); ++ if (kctx->reg_dump == NULL) { ++ dev_warn(kbdev->dev, "reg dump is NULL"); ++ return -1; ++ } + -+ /* Populate the derived properties */ -+ kbase_gpuprops_calculate_props(&gpu_props->props, kbdev); ++ if (kctx->reg_dump[event->reg_offset] == ++ REGISTER_DUMP_TERMINATION_FLAG) { ++ /* Return the error here to stop the read. And the ++ * following next() will not be called. The stop can ++ * get the real event resource and release it ++ */ ++ return -1; ++ } + -+ /* Populate kbase-only fields */ -+ gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8); -+ gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8); ++ if (event->reg_offset == 0) ++ seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id); + -+ gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1); ++ for (i = 0; i < 50; i++) { ++ if (kctx->reg_dump[event->reg_offset] == ++ REGISTER_DUMP_TERMINATION_FLAG) { ++ break; ++ } ++ seq_printf(m, "%08x: %08x\n", ++ kctx->reg_dump[event->reg_offset], ++ kctx->reg_dump[1+event->reg_offset]); ++ event->reg_offset += 2; + -+ gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8); -+ gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8); ++ } + -+ gpu_props->num_cores = hweight64(raw->shader_present); -+ gpu_props->num_core_groups = -+ gpu_props->props.coherency_info.num_core_groups; -+ gpu_props->num_address_spaces = hweight32(raw->as_present); -+ gpu_props->num_job_slots = hweight32(raw->js_present); + -+ /* -+ * Current configuration is used on HW interactions so that the maximum -+ * config is just used for user space avoiding interactions with parts -+ * of the hardware that might not be allocated to the kbase instance at -+ * that moment. -+ */ -+ kbase_gpuprops_req_curr_config_update(kbdev); -+ kbase_gpuprops_get_curr_config_props(kbdev, &gpu_props->curr_config); ++ return 0; +} -+ -+int kbase_gpuprops_set_features(struct kbase_device *kbdev) ++static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos) +{ -+ struct base_gpu_props *gpu_props; -+ struct kbase_gpuprops_regdump regdump; -+ int err; ++ struct kbase_device *kbdev = m->private; ++ struct base_job_fault_event *event = (struct base_job_fault_event *)v; + -+ gpu_props = &kbdev->gpu_props.props; ++ dev_info(kbdev->dev, "debug job fault seq next:%d, %d", ++ event->reg_offset, (int)*pos); + -+ /* Dump relevant registers */ -+ err = kbase_backend_gpuprops_get_features(kbdev, ®dump); -+ if (err) -+ return err; ++ return event; ++} + -+ /* -+ * Copy the raw value from the register, later this will get turned -+ * into the selected coherency mode. -+ * Additionally, add non-coherent mode, as this is always supported. -+ */ -+ gpu_props->raw_props.coherency_mode = regdump.coherency_features | -+ COHERENCY_FEATURE_BIT(COHERENCY_NONE); ++static void *debug_job_fault_start(struct seq_file *m, loff_t *pos) ++{ ++ struct kbase_device *kbdev = m->private; ++ struct base_job_fault_event *event; + -+ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT)) -+ gpu_props->thread_props.max_thread_group_split = 0; ++ dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos); + -+ /* -+ * The CORE_FEATURES register has different meanings depending on GPU. -+ * On tGOx, bits[3:0] encode num_exec_engines. -+ * On CSF GPUs, bits[7:0] is an enumeration that needs to be parsed, -+ * instead. -+ * GPUs like tTIx have additional fields like LSC_SIZE that are -+ * otherwise reserved/RAZ on older GPUs. ++ /* The condition is trick here. It needs make sure the ++ * fault hasn't happened and the dumping hasn't been started, ++ * or the dumping has finished + */ -+ gpu_props->raw_props.core_features = regdump.core_features; ++ if (*pos == 0) { ++ event = kmalloc(sizeof(*event), GFP_KERNEL); ++ if (!event) ++ return NULL; ++ event->reg_offset = 0; ++ if (kbase_job_fault_event_wait(kbdev, event)) { ++ kfree(event); ++ return NULL; ++ } + -+#if !MALI_USE_CSF -+ gpu_props->core_props.num_exec_engines = -+ KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4); -+#endif ++ /* The cache flush workaround is called in bottom half of ++ * job done but we delayed it. Now we should clean cache ++ * earlier. Then the GPU memory dump should be correct. ++ */ ++ kbase_backend_cache_clean(kbdev, event->katom); ++ } else ++ return NULL; + -+ return err; ++ return event; +} + -+/* -+ * Module parameters to allow the L2 size and hash configuration to be -+ * overridden. -+ * -+ * These parameters must be set on insmod to take effect, and are not visible -+ * in sysfs. -+ */ -+static u8 override_l2_size; -+module_param(override_l2_size, byte, 0000); -+MODULE_PARM_DESC(override_l2_size, "Override L2 size config for testing"); -+ -+static u8 override_l2_hash; -+module_param(override_l2_hash, byte, 0000); -+MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing"); -+ -+static u32 l2_hash_values[ASN_HASH_COUNT] = { -+ 0, -+}; -+static unsigned int num_override_l2_hash_values; -+module_param_array(l2_hash_values, uint, &num_override_l2_hash_values, 0000); -+MODULE_PARM_DESC(l2_hash_values, "Override L2 hash values config for testing"); -+ -+/* Definitions for range of supported user defined hash functions for GPUs -+ * that support L2_CONFIG and not ASN_HASH features. Supported hash function -+ * range from 0b1000-0b1111 inclusive. Selection of any other values will -+ * lead to undefined behavior. -+ */ -+#define USER_DEFINED_HASH_LO ((u8)0x08) -+#define USER_DEFINED_HASH_HI ((u8)0x0F) -+ -+enum l2_config_override_result { -+ L2_CONFIG_OVERRIDE_FAIL = -1, -+ L2_CONFIG_OVERRIDE_NONE, -+ L2_CONFIG_OVERRIDE_OK, -+}; -+ -+/** -+ * kbase_read_l2_config_from_dt - Read L2 configuration -+ * @kbdev: The kbase device for which to get the L2 configuration. -+ * -+ * Check for L2 configuration overrides in module parameters and device tree. -+ * Override values in module parameters take priority over override values in -+ * device tree. -+ * -+ * Return: L2_CONFIG_OVERRIDE_OK if either size or hash, or both was properly -+ * overridden, L2_CONFIG_OVERRIDE_NONE if no overrides are provided. -+ * L2_CONFIG_OVERRIDE_FAIL otherwise. -+ */ -+static enum l2_config_override_result -+kbase_read_l2_config_from_dt(struct kbase_device *const kbdev) ++static void debug_job_fault_stop(struct seq_file *m, void *v) +{ -+ struct device_node *np = kbdev->dev->of_node; -+ -+ if (!np) -+ return L2_CONFIG_OVERRIDE_NONE; -+ -+ if (override_l2_size) -+ kbdev->l2_size_override = override_l2_size; -+ else if (of_property_read_u8(np, "l2-size", &kbdev->l2_size_override)) -+ kbdev->l2_size_override = 0; ++ struct kbase_device *kbdev = m->private; + -+ /* Check overriding value is supported, if not will result in -+ * undefined behavior. ++ /* here we wake up the kbase_jd_done_worker after stop, it needs ++ * get the memory dump before the register dump in debug daemon, ++ * otherwise, the memory dump may be incorrect. + */ -+ if (override_l2_hash >= USER_DEFINED_HASH_LO && -+ override_l2_hash <= USER_DEFINED_HASH_HI) -+ kbdev->l2_hash_override = override_l2_hash; -+ else if (of_property_read_u8(np, "l2-hash", &kbdev->l2_hash_override)) -+ kbdev->l2_hash_override = 0; -+ -+ kbdev->l2_hash_values_override = false; -+ if (num_override_l2_hash_values) { -+ unsigned int i; -+ -+ kbdev->l2_hash_values_override = true; -+ for (i = 0; i < num_override_l2_hash_values; i++) -+ kbdev->l2_hash_values[i] = l2_hash_values[i]; -+ } else if (!of_property_read_u32_array(np, "l2-hash-values", -+ kbdev->l2_hash_values, -+ ASN_HASH_COUNT)) -+ kbdev->l2_hash_values_override = true; + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH) && -+ (kbdev->l2_hash_override)) { -+ dev_err(kbdev->dev, "l2-hash not supported\n"); -+ return L2_CONFIG_OVERRIDE_FAIL; -+ } ++ if (v != NULL) { ++ kfree(v); ++ dev_info(kbdev->dev, "debug job fault seq stop stage 1"); + -+ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH) && -+ (kbdev->l2_hash_values_override)) { -+ dev_err(kbdev->dev, "l2-hash-values not supported\n"); -+ return L2_CONFIG_OVERRIDE_FAIL; -+ } ++ } else { ++ unsigned long flags; + -+ if (kbdev->l2_hash_override && kbdev->l2_hash_values_override) { -+ dev_err(kbdev->dev, -+ "both l2-hash & l2-hash-values not supported\n"); -+ return L2_CONFIG_OVERRIDE_FAIL; ++ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); ++ if (!list_empty(&kbdev->job_fault_event_list)) { ++ kbase_job_fault_event_dequeue(kbdev, ++ &kbdev->job_fault_event_list); ++ wake_up(&kbdev->job_fault_resume_wq); ++ } ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); ++ dev_info(kbdev->dev, "debug job fault seq stop stage 2"); + } + -+ if (kbdev->l2_size_override || kbdev->l2_hash_override || -+ kbdev->l2_hash_values_override) -+ return L2_CONFIG_OVERRIDE_OK; -+ -+ return L2_CONFIG_OVERRIDE_NONE; +} + -+int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) -+{ -+ int err = 0; ++static const struct seq_operations ops = { ++ .start = debug_job_fault_start, ++ .next = debug_job_fault_next, ++ .stop = debug_job_fault_stop, ++ .show = debug_job_fault_show, ++}; + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { -+ struct kbase_gpuprops_regdump regdump; -+ struct base_gpu_props *gpu_props = &kbdev->gpu_props.props; ++static int debug_job_fault_open(struct inode *in, struct file *file) ++{ ++ struct kbase_device *kbdev = in->i_private; + -+ /* Check for L2 cache size & hash overrides */ -+ switch (kbase_read_l2_config_from_dt(kbdev)) { -+ case L2_CONFIG_OVERRIDE_FAIL: -+ err = -EIO; -+ goto exit; -+ case L2_CONFIG_OVERRIDE_NONE: -+ goto exit; -+ default: -+ break; -+ } ++ if (atomic_cmpxchg(&kbdev->job_fault_debug, 0, 1) == 1) { ++ dev_warn(kbdev->dev, "debug job fault is busy, only a single client is allowed"); ++ return -EBUSY; ++ } + -+ /* pm.active_count is expected to be 1 here, which is set in -+ * kbase_hwaccess_pm_powerup(). -+ */ -+ WARN_ON(kbdev->pm.active_count != 1); -+ /* The new settings for L2 cache can only be applied when it is -+ * off, so first do the power down. -+ */ -+ kbase_pm_context_idle(kbdev); -+ kbase_pm_wait_for_desired_state(kbdev); ++ seq_open(file, &ops); + -+ /* Need L2 to get powered to reflect to L2_FEATURES */ -+ kbase_pm_context_active(kbdev); ++ ((struct seq_file *)file->private_data)->private = kbdev; ++ dev_info(kbdev->dev, "debug job fault seq open"); + -+ /* Wait for the completion of L2 power transition */ -+ kbase_pm_wait_for_l2_powered(kbdev); + -+ /* Dump L2_FEATURES register */ -+ err = kbase_backend_gpuprops_get_l2_features(kbdev, ®dump); -+ if (err) -+ goto exit; ++ return 0; + -+ dev_info(kbdev->dev, "Reflected L2_FEATURES is 0x%x\n", -+ regdump.l2_features); -+ dev_info(kbdev->dev, "Reflected L2_CONFIG is 0x%08x\n", -+ regdump.l2_config); ++} + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)) { -+ int idx; -+ const bool asn_he = regdump.l2_config & -+ L2_CONFIG_ASN_HASH_ENABLE_MASK; -+#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ if (!asn_he && kbdev->l2_hash_values_override) -+ dev_err(kbdev->dev, -+ "Failed to use requested ASN_HASH, fallback to default"); -+#endif -+ for (idx = 0; idx < ASN_HASH_COUNT; idx++) -+ dev_info(kbdev->dev, -+ "%s ASN_HASH[%d] is [0x%08x]\n", -+ asn_he ? "Overridden" : "Default", idx, -+ regdump.l2_asn_hash[idx]); -+ } ++static int debug_job_fault_release(struct inode *in, struct file *file) ++{ ++ struct kbase_device *kbdev = in->i_private; ++ struct list_head *event_list = &kbdev->job_fault_event_list; ++ unsigned long flags; + -+ /* Update gpuprops with reflected L2_FEATURES */ -+ gpu_props->raw_props.l2_features = regdump.l2_features; -+ gpu_props->l2_props.log2_cache_size = -+ KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8); -+ } ++ seq_release(in, file); + -+exit: -+ return err; -+} ++ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + -+static struct { -+ u32 type; -+ size_t offset; -+ int size; -+} gpu_property_mapping[] = { -+#define PROP(name, member) \ -+ {KBASE_GPUPROP_ ## name, offsetof(struct base_gpu_props, member), \ -+ sizeof(((struct base_gpu_props *)0)->member)} -+ PROP(PRODUCT_ID, core_props.product_id), -+ PROP(VERSION_STATUS, core_props.version_status), -+ PROP(MINOR_REVISION, core_props.minor_revision), -+ PROP(MAJOR_REVISION, core_props.major_revision), -+ PROP(GPU_FREQ_KHZ_MAX, core_props.gpu_freq_khz_max), -+ PROP(LOG2_PROGRAM_COUNTER_SIZE, core_props.log2_program_counter_size), -+ PROP(TEXTURE_FEATURES_0, core_props.texture_features[0]), -+ PROP(TEXTURE_FEATURES_1, core_props.texture_features[1]), -+ PROP(TEXTURE_FEATURES_2, core_props.texture_features[2]), -+ PROP(TEXTURE_FEATURES_3, core_props.texture_features[3]), -+ PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size), ++ /* Disable job fault dumping. This will let kbase run jobs as normal, ++ * without blocking waiting for a job_fault client to read failed jobs. ++ * ++ * After this a new client may open the file, and may re-enable job ++ * fault dumping, but the job_fault_event_lock we hold here will block ++ * that from interfering until after we've completed the cleanup. ++ */ ++ atomic_dec(&kbdev->job_fault_debug); + -+#if MALI_USE_CSF -+#define BACKWARDS_COMPAT_PROP(name, type) \ -+ { \ -+ KBASE_GPUPROP_##name, SIZE_MAX, sizeof(type) \ ++ /* Clean the unprocessed job fault. After that, all the suspended ++ * contexts could be rescheduled. Remove all the failed atoms that ++ * belong to different contexts Resume all the contexts that were ++ * suspend due to failed job. ++ */ ++ while (!list_empty(event_list)) { ++ kbase_job_fault_event_dequeue(kbdev, event_list); ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); ++ wake_up(&kbdev->job_fault_resume_wq); ++ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + } -+ BACKWARDS_COMPAT_PROP(NUM_EXEC_ENGINES, u8), -+#else -+ PROP(NUM_EXEC_ENGINES, core_props.num_exec_engines), -+#endif + -+ PROP(L2_LOG2_LINE_SIZE, l2_props.log2_line_size), -+ PROP(L2_LOG2_CACHE_SIZE, l2_props.log2_cache_size), -+ PROP(L2_NUM_L2_SLICES, l2_props.num_l2_slices), -+ -+ PROP(TILER_BIN_SIZE_BYTES, tiler_props.bin_size_bytes), -+ PROP(TILER_MAX_ACTIVE_LEVELS, tiler_props.max_active_levels), ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + -+ PROP(MAX_THREADS, thread_props.max_threads), -+ PROP(MAX_WORKGROUP_SIZE, thread_props.max_workgroup_size), -+ PROP(MAX_BARRIER_SIZE, thread_props.max_barrier_size), -+ PROP(MAX_REGISTERS, thread_props.max_registers), -+ PROP(MAX_TASK_QUEUE, thread_props.max_task_queue), -+ PROP(MAX_THREAD_GROUP_SPLIT, thread_props.max_thread_group_split), -+ PROP(IMPL_TECH, thread_props.impl_tech), -+ PROP(TLS_ALLOC, thread_props.tls_alloc), ++ dev_info(kbdev->dev, "debug job fault seq close"); + -+ PROP(RAW_SHADER_PRESENT, raw_props.shader_present), -+ PROP(RAW_TILER_PRESENT, raw_props.tiler_present), -+ PROP(RAW_L2_PRESENT, raw_props.l2_present), -+ PROP(RAW_STACK_PRESENT, raw_props.stack_present), -+ PROP(RAW_L2_FEATURES, raw_props.l2_features), -+ PROP(RAW_CORE_FEATURES, raw_props.core_features), -+ PROP(RAW_MEM_FEATURES, raw_props.mem_features), -+ PROP(RAW_MMU_FEATURES, raw_props.mmu_features), -+ PROP(RAW_AS_PRESENT, raw_props.as_present), -+ PROP(RAW_JS_PRESENT, raw_props.js_present), -+ PROP(RAW_JS_FEATURES_0, raw_props.js_features[0]), -+ PROP(RAW_JS_FEATURES_1, raw_props.js_features[1]), -+ PROP(RAW_JS_FEATURES_2, raw_props.js_features[2]), -+ PROP(RAW_JS_FEATURES_3, raw_props.js_features[3]), -+ PROP(RAW_JS_FEATURES_4, raw_props.js_features[4]), -+ PROP(RAW_JS_FEATURES_5, raw_props.js_features[5]), -+ PROP(RAW_JS_FEATURES_6, raw_props.js_features[6]), -+ PROP(RAW_JS_FEATURES_7, raw_props.js_features[7]), -+ PROP(RAW_JS_FEATURES_8, raw_props.js_features[8]), -+ PROP(RAW_JS_FEATURES_9, raw_props.js_features[9]), -+ PROP(RAW_JS_FEATURES_10, raw_props.js_features[10]), -+ PROP(RAW_JS_FEATURES_11, raw_props.js_features[11]), -+ PROP(RAW_JS_FEATURES_12, raw_props.js_features[12]), -+ PROP(RAW_JS_FEATURES_13, raw_props.js_features[13]), -+ PROP(RAW_JS_FEATURES_14, raw_props.js_features[14]), -+ PROP(RAW_JS_FEATURES_15, raw_props.js_features[15]), -+ PROP(RAW_TILER_FEATURES, raw_props.tiler_features), -+ PROP(RAW_TEXTURE_FEATURES_0, raw_props.texture_features[0]), -+ PROP(RAW_TEXTURE_FEATURES_1, raw_props.texture_features[1]), -+ PROP(RAW_TEXTURE_FEATURES_2, raw_props.texture_features[2]), -+ PROP(RAW_TEXTURE_FEATURES_3, raw_props.texture_features[3]), -+ PROP(RAW_GPU_ID, raw_props.gpu_id), -+ PROP(RAW_THREAD_MAX_THREADS, raw_props.thread_max_threads), -+ PROP(RAW_THREAD_MAX_WORKGROUP_SIZE, raw_props.thread_max_workgroup_size), -+ PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size), -+ PROP(RAW_THREAD_FEATURES, raw_props.thread_features), -+ PROP(RAW_COHERENCY_MODE, raw_props.coherency_mode), -+ PROP(RAW_THREAD_TLS_ALLOC, raw_props.thread_tls_alloc), -+ PROP(RAW_GPU_FEATURES, raw_props.gpu_features), -+ PROP(COHERENCY_NUM_GROUPS, coherency_info.num_groups), -+ PROP(COHERENCY_NUM_CORE_GROUPS, coherency_info.num_core_groups), -+ PROP(COHERENCY_COHERENCY, coherency_info.coherency), -+ PROP(COHERENCY_GROUP_0, coherency_info.group[0].core_mask), -+ PROP(COHERENCY_GROUP_1, coherency_info.group[1].core_mask), -+ PROP(COHERENCY_GROUP_2, coherency_info.group[2].core_mask), -+ PROP(COHERENCY_GROUP_3, coherency_info.group[3].core_mask), -+ PROP(COHERENCY_GROUP_4, coherency_info.group[4].core_mask), -+ PROP(COHERENCY_GROUP_5, coherency_info.group[5].core_mask), -+ PROP(COHERENCY_GROUP_6, coherency_info.group[6].core_mask), -+ PROP(COHERENCY_GROUP_7, coherency_info.group[7].core_mask), -+ PROP(COHERENCY_GROUP_8, coherency_info.group[8].core_mask), -+ PROP(COHERENCY_GROUP_9, coherency_info.group[9].core_mask), -+ PROP(COHERENCY_GROUP_10, coherency_info.group[10].core_mask), -+ PROP(COHERENCY_GROUP_11, coherency_info.group[11].core_mask), -+ PROP(COHERENCY_GROUP_12, coherency_info.group[12].core_mask), -+ PROP(COHERENCY_GROUP_13, coherency_info.group[13].core_mask), -+ PROP(COHERENCY_GROUP_14, coherency_info.group[14].core_mask), -+ PROP(COHERENCY_GROUP_15, coherency_info.group[15].core_mask), ++ return 0; ++} + -+#undef PROP ++static const struct file_operations kbasep_debug_job_fault_fops = { ++ .owner = THIS_MODULE, ++ .open = debug_job_fault_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = debug_job_fault_release, +}; + -+int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev) ++/* ++ * Initialize debugfs entry for job fault dump ++ */ ++void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev) +{ -+ struct kbase_gpu_props *kprops = &kbdev->gpu_props; -+ struct base_gpu_props *props = &kprops->props; -+ u32 count = ARRAY_SIZE(gpu_property_mapping); -+ u32 i; -+ u32 size = 0; -+ u8 *p; ++ debugfs_create_file("job_fault", 0400, ++ kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_debug_job_fault_fops); ++} + -+ for (i = 0; i < count; i++) { -+ /* 4 bytes for the ID, and the size of the property */ -+ size += 4 + gpu_property_mapping[i].size; -+ } + -+ kprops->prop_buffer_size = size; -+ kprops->prop_buffer = kzalloc(size, GFP_KERNEL); ++int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) ++{ + -+ if (!kprops->prop_buffer) { -+ kprops->prop_buffer_size = 0; -+ return -ENOMEM; -+ } ++ INIT_LIST_HEAD(&kbdev->job_fault_event_list); + -+ p = kprops->prop_buffer; ++ init_waitqueue_head(&(kbdev->job_fault_wq)); ++ init_waitqueue_head(&(kbdev->job_fault_resume_wq)); ++ spin_lock_init(&kbdev->job_fault_event_lock); + -+#define WRITE_U8(v) (*p++ = (v) & 0xFF) -+#define WRITE_U16(v) do { WRITE_U8(v); WRITE_U8((v) >> 8); } while (0) -+#define WRITE_U32(v) do { WRITE_U16(v); WRITE_U16((v) >> 16); } while (0) -+#define WRITE_U64(v) do { WRITE_U32(v); WRITE_U32((v) >> 32); } while (0) ++ kbdev->job_fault_resume_workq = alloc_workqueue( ++ "kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1); ++ if (!kbdev->job_fault_resume_workq) ++ return -ENOMEM; + -+ for (i = 0; i < count; i++) { -+ u32 type = gpu_property_mapping[i].type; -+ u8 type_size; -+ const size_t offset = gpu_property_mapping[i].offset; -+ const u64 dummy_backwards_compat_value = (u64)0; -+ const void *field; ++ atomic_set(&kbdev->job_fault_debug, 0); + -+ if (likely(offset < sizeof(struct base_gpu_props))) -+ field = ((const u8 *)props) + offset; -+ else -+ field = &dummy_backwards_compat_value; ++ return 0; ++} + -+ switch (gpu_property_mapping[i].size) { -+ case 1: -+ type_size = KBASE_GPUPROP_VALUE_SIZE_U8; -+ break; -+ case 2: -+ type_size = KBASE_GPUPROP_VALUE_SIZE_U16; -+ break; -+ case 4: -+ type_size = KBASE_GPUPROP_VALUE_SIZE_U32; -+ break; -+ case 8: -+ type_size = KBASE_GPUPROP_VALUE_SIZE_U64; -+ break; -+ default: -+ dev_err(kbdev->dev, -+ "Invalid gpu_property_mapping type=%d size=%d", -+ type, gpu_property_mapping[i].size); -+ return -EINVAL; -+ } ++/* ++ * Release the relevant resource per device ++ */ ++void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) ++{ ++ destroy_workqueue(kbdev->job_fault_resume_workq); ++} + -+ WRITE_U32((type<<2) | type_size); + -+ switch (type_size) { -+ case KBASE_GPUPROP_VALUE_SIZE_U8: -+ WRITE_U8(*((const u8 *)field)); -+ break; -+ case KBASE_GPUPROP_VALUE_SIZE_U16: -+ WRITE_U16(*((const u16 *)field)); -+ break; -+ case KBASE_GPUPROP_VALUE_SIZE_U32: -+ WRITE_U32(*((const u32 *)field)); -+ break; -+ case KBASE_GPUPROP_VALUE_SIZE_U64: -+ WRITE_U64(*((const u64 *)field)); -+ break; -+ default: /* Cannot be reached */ -+ WARN_ON(1); -+ return -EINVAL; ++/* ++ * Initialize the relevant data structure per context ++ */ ++int kbase_debug_job_fault_context_init(struct kbase_context *kctx) ++{ ++ ++ /* We need allocate double size register range ++ * Because this memory will keep the register address and value ++ */ ++ kctx->reg_dump = vmalloc(0x4000 * 2); ++ if (kctx->reg_dump != NULL) { ++ if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == ++ false) { ++ vfree(kctx->reg_dump); ++ kctx->reg_dump = NULL; + } ++ INIT_LIST_HEAD(&kctx->job_fault_resume_event_list); ++ atomic_set(&kctx->job_fault_count, 0); + } + + return 0; +} + -+void kbase_gpuprops_free_user_buffer(struct kbase_device *kbdev) ++/* ++ * release the relevant resource per context ++ */ ++void kbase_debug_job_fault_context_term(struct kbase_context *kctx) +{ -+ kfree(kbdev->gpu_props.prop_buffer); ++ vfree(kctx->reg_dump); +} + -+int kbase_device_populate_max_freq(struct kbase_device *kbdev) ++void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx) +{ -+ struct mali_base_gpu_core_props *core_props; ++ WARN_ON(!kbase_ctx_flag(kctx, KCTX_DYING)); + -+ /* obtain max configured gpu frequency, if devfreq is enabled then -+ * this will be overridden by the highest operating point found ++ /* Return early if the job fault part of the kbase_device is not ++ * initialized yet. An error can happen during the device probe after ++ * the privileged Kbase context was created for the HW counter dumping ++ * but before the job fault part is initialized. + */ -+ core_props = &(kbdev->gpu_props.props.core_props); -+#ifdef GPU_FREQ_KHZ_MAX -+ core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; -+#else -+ core_props->gpu_freq_khz_max = DEFAULT_GPU_FREQ_KHZ_MAX; -+#endif ++ if (!kctx->kbdev->job_fault_resume_workq) ++ return; ++ ++ kbase_ctx_remove_pending_event(kctx); ++} + ++#else /* CONFIG_DEBUG_FS */ ++ ++int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) ++{ + return 0; +} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.h b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.h ++ ++void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) ++{ ++} ++ ++#endif /* CONFIG_DEBUG_FS */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.h b/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.h new file mode 100644 -index 000000000..f0a97312c +index 000000000..059d9c455 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.h -@@ -0,0 +1,154 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.h +@@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2011-2015, 2017, 2019-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2016, 2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -217024,149 +220000,116 @@ index 000000000..f0a97312c + * + */ + -+/** -+ * DOC: Base kernel property query APIs -+ */ -+ -+#ifndef _KBASE_GPUPROPS_H_ -+#define _KBASE_GPUPROPS_H_ -+ -+#include "mali_kbase_gpuprops_types.h" ++#ifndef _KBASE_DEBUG_JOB_FAULT_H ++#define _KBASE_DEBUG_JOB_FAULT_H + -+/* Forward definition - see mali_kbase.h */ -+struct kbase_device; ++#include ++#include + -+/** -+ * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield. -+ * @value: The value from which to extract bits. -+ * @offset: The first bit to extract (0 being the LSB). -+ * @size: The number of bits to extract. -+ * -+ * Context: @offset + @size <= 32. -+ * -+ * Return: Bits [@offset, @offset + @size) from @value. -+ */ -+/* from mali_cdsb.h */ -+#define KBASE_UBFX32(value, offset, size) \ -+ (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1)) ++#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF + +/** -+ * kbase_gpuprops_set - Set up Kbase GPU properties. -+ * @kbdev: The struct kbase_device structure for the device ++ * kbase_debug_job_fault_dev_init - Create the fault event wait queue ++ * per device and initialize the required lists. ++ * @kbdev: Device pointer + * -+ * Set up Kbase GPU properties with information from the GPU registers ++ * Return: Zero on success or a negative error code. + */ -+void kbase_gpuprops_set(struct kbase_device *kbdev); ++int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev); + +/** -+ * kbase_gpuprops_set_features - Set up Kbase GPU properties -+ * @kbdev: Device pointer -+ * -+ * This function sets up GPU properties that are dependent on the hardware -+ * features bitmask. This function must be preceeded by a call to -+ * kbase_hw_set_features_mask(). -+ * -+ * Return: Zero on success, Linux error code on failure ++ * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs ++ * @kbdev: Device pointer + */ -+int kbase_gpuprops_set_features(struct kbase_device *kbdev); ++void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev); + +/** -+ * kbase_gpuprops_update_l2_features - Update GPU property of L2_FEATURES -+ * @kbdev: Device pointer -+ * -+ * This function updates l2_features and the log2 cache size. -+ * The function expects GPU to be powered up and value of pm.active_count -+ * to be 1. -+ * -+ * Return: Zero on success, Linux error code for failure ++ * kbase_debug_job_fault_dev_term - Clean up resources created in ++ * kbase_debug_job_fault_dev_init. ++ * @kbdev: Device pointer + */ -+int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev); ++void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev); + +/** -+ * kbase_gpuprops_populate_user_buffer - Populate the GPU properties buffer -+ * @kbdev: The kbase device -+ * -+ * Fills prop_buffer with the GPU properties for user space to read. -+ * -+ * Return: MALI_ERROR_NONE on success. Any other value indicates failure. ++ * kbase_debug_job_fault_context_init - Initialize the relevant ++ * data structure per context ++ * @kctx: KBase context pointer ++ * Return: 0 on success + */ -+int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev); ++int kbase_debug_job_fault_context_init(struct kbase_context *kctx); + +/** -+ * kbase_gpuprops_free_user_buffer - Free the GPU properties buffer. -+ * @kbdev: kbase device pointer -+ * -+ * Free the GPU properties buffer allocated from -+ * kbase_gpuprops_populate_user_buffer. ++ * kbase_debug_job_fault_context_term - Release the relevant ++ * resource per context ++ * @kctx: KBase context pointer + */ -+void kbase_gpuprops_free_user_buffer(struct kbase_device *kbdev); ++void kbase_debug_job_fault_context_term(struct kbase_context *kctx); + +/** -+ * kbase_device_populate_max_freq - Populate max gpu frequency. -+ * @kbdev: kbase device pointer ++ * kbase_debug_job_fault_kctx_unblock - Unblock the atoms blocked on job fault ++ * dumping on context termination. + * -+ * Populate the maximum gpu frequency to be used when devfreq is disabled. ++ * @kctx: KBase context pointer + * -+ * Return: 0 on success and non-zero value on failure. ++ * This function is called during context termination to unblock the atom for ++ * which the job fault occurred and also the atoms following it. This is needed ++ * otherwise the wait for zero jobs could timeout (leading to an assertion ++ * failure, kernel panic in debug builds) in the pathological case where ++ * although the thread/daemon capturing the job fault events is running, ++ * but for some reasons has stopped consuming the events. + */ -+int kbase_device_populate_max_freq(struct kbase_device *kbdev); ++void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx); + +/** -+ * kbase_gpuprops_update_core_props_gpu_id - break down gpu id value -+ * @gpu_props: the &base_gpu_props structure ++ * kbase_debug_job_fault_process - Process the failed job. + * -+ * Break down gpu_id value stored in base_gpu_props::raw_props.gpu_id into -+ * separate fields (version_status, minor_revision, major_revision, product_id) -+ * stored in base_gpu_props::core_props. -+ */ -+void kbase_gpuprops_update_core_props_gpu_id( -+ struct base_gpu_props * const gpu_props); -+ -+/** -+ * kbase_gpuprops_set_max_config - Set the max config information -+ * @kbdev: Device pointer -+ * @max_config: Maximum configuration data to be updated ++ * @katom: The failed atom pointer ++ * @completion_code: the job status + * -+ * This function sets max_config in the kbase_gpu_props. ++ * It will send a event and wake up the job fault waiting queue ++ * Then create a work queue to wait for job dump finish ++ * This function should be called in the interrupt handler and before ++ * jd_done that make sure the jd_done_worker will be delayed until the ++ * job dump finish ++ * ++ * Return: true if dump is going on + */ -+void kbase_gpuprops_set_max_config(struct kbase_device *kbdev, -+ const struct max_config_props *max_config); ++bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, ++ u32 completion_code); + +/** -+ * kbase_gpuprops_get_curr_config_props - Get the current allocated resources -+ * @kbdev: The &struct kbase_device structure for the device -+ * @curr_config: The &struct curr_config_props structure to receive the result -+ * -+ * Fill the &struct curr_config_props structure with values from the GPU -+ * configuration registers. ++ * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers ++ * address during the job fault process, the relevant registers will ++ * be saved when a job fault happen ++ * @kctx: KBase context pointer ++ * @reg_range: Maximum register address space + * -+ * Return: Zero on success, Linux error code on failure ++ * Return: true if initializing successfully + */ -+int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev, -+ struct curr_config_props * const curr_config); ++bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, ++ int reg_range); + +/** -+ * kbase_gpuprops_req_curr_config_update - Request Current Config Update -+ * @kbdev: The &struct kbase_device structure for the device ++ * kbase_job_fault_get_reg_snapshot - Read the interested registers for ++ * failed job dump + * -+ * Requests the current configuration to be updated next time the -+ * kbase_gpuprops_get_curr_config_props() is called. ++ * @kctx: KBase context pointer + * -+ * Return: Zero on success, Linux error code on failure ++ * Return: true if getting registers successfully + */ -+int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev); ++bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx); + -+#endif /* _KBASE_GPUPROPS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h ++#endif /*_KBASE_DEBUG_JOB_FAULT_H*/ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c new file mode 100644 -index 000000000..45cb603fa +index 000000000..418bb1908 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h -@@ -0,0 +1,170 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c +@@ -0,0 +1,138 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -217184,165 +220127,178 @@ index 000000000..45cb603fa + * + */ + -+/** -+ * DOC: Base kernel property query APIs ++/* ++ * Debugfs interface to dump information about GPU allocations in kctx + */ + -+#ifndef _KBASE_GPUPROPS_TYPES_H_ -+#define _KBASE_GPUPROPS_TYPES_H_ -+ -+#include ++#include "mali_kbase_debug_mem_allocs.h" ++#include "mali_kbase.h" + -+#define KBASE_GPU_SPEED_MHZ 123 -+#define KBASE_GPU_PC_SIZE_LOG2 24U ++#include ++#include ++#include + -+struct kbase_gpuprops_regdump { -+ u32 gpu_id; -+ u32 l2_features; -+ u32 l2_config; -+ u32 l2_asn_hash[ASN_HASH_COUNT]; -+ u32 core_features; -+ u32 tiler_features; -+ u32 mem_features; -+ u32 mmu_features; -+ u32 as_present; -+ u32 js_present; -+ u32 thread_max_threads; -+ u32 thread_max_workgroup_size; -+ u32 thread_max_barrier_size; -+ u32 thread_features; -+ u32 thread_tls_alloc; -+ u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; -+ u32 js_features[GPU_MAX_JOB_SLOTS]; -+ u32 shader_present_lo; -+ u32 shader_present_hi; -+ u32 tiler_present_lo; -+ u32 tiler_present_hi; -+ u32 l2_present_lo; -+ u32 l2_present_hi; -+ u32 stack_present_lo; -+ u32 stack_present_hi; -+ u32 coherency_features; -+ u32 gpu_features_lo; -+ u32 gpu_features_hi; -+}; ++#if IS_ENABLED(CONFIG_DEBUG_FS) + +/** -+ * struct kbase_current_config_regdump - Register dump for current resources -+ * allocated to the GPU. -+ * @mem_features: Memory system features. Contains information about the -+ * features of the memory system. Used here to get the L2 slice -+ * count. -+ * @shader_present_lo: Shader core present bitmap. Low word. -+ * @shader_present_hi: Shader core present bitmap. High word. -+ * @l2_present_lo: L2 cache present bitmap. Low word. -+ * @l2_present_hi: L2 cache present bitmap. High word. ++ * debug_zone_mem_allocs_show - Show information from specific rbtree ++ * @zone: Name of GPU virtual memory zone ++ * @rbtree: Pointer to the root of the rbtree associated with @zone ++ * @sfile: The debugfs entry + * -+ * Register dump structure used to store the resgisters data realated to the -+ * current resources allocated to the GPU. ++ * This function is called to show information about all the GPU allocations of a ++ * a particular zone within GPU virtual memory space of a context. ++ * The information like the start virtual address and size (in bytes) is shown for ++ * every GPU allocation mapped in the zone. + */ -+struct kbase_current_config_regdump { -+ u32 mem_features; -+ u32 shader_present_lo; -+ u32 shader_present_hi; -+ u32 l2_present_lo; -+ u32 l2_present_hi; -+}; -+ -+struct kbase_gpu_cache_props { -+ u8 associativity; -+ u8 external_bus_width; -+}; -+ -+struct kbase_gpu_mem_props { -+ u8 core_group; -+}; ++static void debug_zone_mem_allocs_show(char *zone, struct rb_root *rbtree, struct seq_file *sfile) ++{ ++ struct rb_node *p; ++ struct kbase_va_region *reg; ++ const char *type_names[5] = { ++ "Native", ++ "Imported UMM", ++ "Imported user buf", ++ "Alias", ++ "Raw" ++ }; + -+struct kbase_gpu_mmu_props { -+ u8 va_bits; -+ u8 pa_bits; -+}; ++#define MEM_ALLOCS_HEADER \ ++ " VA, VA size, Commit size, Flags, Mem type\n" ++ seq_printf(sfile, "Zone name: %s\n:", zone); ++ seq_printf(sfile, MEM_ALLOCS_HEADER); ++ for (p = rb_first(rbtree); p; p = rb_next(p)) { ++ reg = rb_entry(p, struct kbase_va_region, rblink); ++ if (!(reg->flags & KBASE_REG_FREE)) { ++ seq_printf(sfile, "%16llx, %16zx, %16zx, %8lx, %s\n", ++ reg->start_pfn << PAGE_SHIFT, reg->nr_pages << PAGE_SHIFT, ++ kbase_reg_current_backed_size(reg) << PAGE_SHIFT, ++ reg->flags, type_names[reg->gpu_alloc->type]); ++ } ++ } ++} + +/** -+ * struct max_config_props - Properties based on the maximum resources -+ * available. -+ * @l2_slices: Maximum number of L2 slices that can be assinged to the GPU -+ * during runtime. -+ * @padding: Padding to a multiple of 64 bits. -+ * @core_mask: Largest core mask bitmap that can be assigned to the GPU during -+ * runtime. ++ * debug_ctx_mem_allocs_show - Show information about GPU allocations in a kctx ++ * @sfile: The debugfs entry ++ * @data: Data associated with the entry + * -+ * Properties based on the maximum resources available (not necessarly -+ * allocated at that moment). Used to provide the maximum configuration to the -+ * userspace allowing the applications to allocate enough resources in case the -+ * real allocated resources change. ++ * Return: ++ * 0 if successfully prints data in debugfs entry file ++ * -1 if it encountered an error + */ -+struct max_config_props { -+ u8 l2_slices; -+ u8 padding[3]; -+ u32 core_mask; -+}; ++static int debug_ctx_mem_allocs_show(struct seq_file *sfile, void *data) ++{ ++ struct kbase_context *const kctx = sfile->private; + -+/** -+ * struct curr_config_props - Properties based on the current resources -+ * allocated to the GPU. -+ * @l2_present: Current L2 present bitmap that is allocated to the GPU. -+ * @shader_present: Current shader present bitmap that is allocated to the GPU. -+ * @num_cores: Current number of shader cores allocated to the GPU. -+ * @l2_slices: Current number of L2 slices allocated to the GPU. -+ * @update_needed: Defines if it is necessary to re-read the registers to -+ * update the current allocated resources. -+ * @padding: Padding to a multiple of 64 bits. -+ * -+ * Properties based on the current resource available. Used for operations with -+ * hardware interactions to avoid using userspace data that can be based on -+ * the maximum resource available. -+ */ -+struct curr_config_props { -+ u64 l2_present; -+ u64 shader_present; -+ u16 num_cores; -+ u8 l2_slices; -+ bool update_needed; -+ u8 padding[4]; -+}; ++ kbase_gpu_vm_lock(kctx); + -+struct kbase_gpu_props { -+ /* kernel-only properties */ -+ u8 num_cores; -+ u8 num_core_groups; -+ u8 num_address_spaces; -+ u8 num_job_slots; ++ debug_zone_mem_allocs_show("SAME_VA:", &kctx->reg_rbtree_same, sfile); ++ debug_zone_mem_allocs_show("CUSTOM_VA:", &kctx->reg_rbtree_custom, sfile); ++ debug_zone_mem_allocs_show("EXEC_VA:", &kctx->reg_rbtree_exec, sfile); + -+ struct kbase_gpu_cache_props l2_props; ++#if MALI_USE_CSF ++ debug_zone_mem_allocs_show("EXEC_VA_FIXED:", &kctx->reg_rbtree_exec_fixed, sfile); ++ debug_zone_mem_allocs_show("FIXED_VA:", &kctx->reg_rbtree_fixed, sfile); ++#endif /* MALI_USE_CSF */ + -+ struct kbase_gpu_mem_props mem; -+ struct kbase_gpu_mmu_props mmu; ++ kbase_gpu_vm_unlock(kctx); ++ return 0; ++} + -+ /* Properties based on the current resource available */ -+ struct curr_config_props curr_config; ++/* ++ * File operations related to debugfs entry for mem_zones ++ */ ++static int debug_mem_allocs_open(struct inode *in, struct file *file) ++{ ++ return single_open(file, debug_ctx_mem_allocs_show, in->i_private); ++} + -+ /* Properties based on the maximum resource available */ -+ struct max_config_props max_config; ++static const struct file_operations kbase_debug_mem_allocs_fops = { ++ .owner = THIS_MODULE, ++ .open = debug_mem_allocs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + -+ /* Properties shared with userspace */ -+ struct base_gpu_props props; ++/* ++ * Initialize debugfs entry for mem_allocs ++ */ ++void kbase_debug_mem_allocs_init(struct kbase_context *const kctx) ++{ ++ /* Caller already ensures this, but we keep the pattern for ++ * maintenance safety. ++ */ ++ if (WARN_ON(!kctx) || WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) ++ return; + -+ u32 prop_buffer_size; -+ void *prop_buffer; -+}; ++ debugfs_create_file("mem_allocs", 0400, kctx->kctx_dentry, kctx, ++ &kbase_debug_mem_allocs_fops); ++} ++#else ++/* ++ * Stub functions for when debugfs is disabled ++ */ ++void kbase_debug_mem_allocs_init(struct kbase_context *const kctx) ++{ ++} ++#endif +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.h b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.h +new file mode 100644 +index 000000000..8cf69c2cb +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.h +@@ -0,0 +1,39 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+#endif /* _KBASE_GPUPROPS_TYPES_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gwt.c b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c ++#ifndef _KBASE_DEBUG_MEM_ALLOCS_H ++#define _KBASE_DEBUG_MEM_ALLOCS_H ++ ++#include ++ ++/** ++ * kbase_debug_mem_allocs_init() - Initialize the mem_allocs debugfs file ++ * @kctx: Pointer to kernel base context ++ * ++ * This function creates a "mem_allocs" file for a context to show infor about the ++ * GPU allocations created for that context. ++ * ++ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the ++ * parent directory. ++ */ ++void kbase_debug_mem_allocs_init(struct kbase_context *kctx); ++ ++#endif +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c new file mode 100644 -index 000000000..0eba889e5 +index 000000000..ce87a0070 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c -@@ -0,0 +1,275 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c +@@ -0,0 +1,410 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -217360,270 +220316,405 @@ index 000000000..0eba889e5 + * + */ + -+#include "mali_kbase_gwt.h" -+#include ++/* ++ * Debugfs interface to dump the memory visible to the GPU ++ */ + -+static inline void kbase_gpu_gwt_setup_page_permission( -+ struct kbase_context *kctx, -+ unsigned long flag, -+ struct rb_node *node) -+{ -+ struct rb_node *rbnode = node; ++#include "mali_kbase_debug_mem_view.h" ++#include "mali_kbase.h" + -+ while (rbnode) { -+ struct kbase_va_region *reg; -+ int err = 0; ++#include ++#include + -+ reg = rb_entry(rbnode, struct kbase_va_region, rblink); -+ if (reg->nr_pages && !kbase_is_region_invalid_or_free(reg) && -+ (reg->flags & KBASE_REG_GPU_WR)) { -+ err = kbase_mmu_update_pages(kctx, reg->start_pfn, -+ kbase_get_gpu_phy_pages(reg), -+ reg->gpu_alloc->nents, -+ reg->flags & flag, -+ reg->gpu_alloc->group_id); -+ if (err) -+ dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages failure\n"); -+ } ++#if IS_ENABLED(CONFIG_DEBUG_FS) + -+ rbnode = rb_next(rbnode); -+ } ++#define SHOW_GPU_MEM_DATA(type, format) \ ++{ \ ++ unsigned int i, j; \ ++ const type *ptr = (type *)cpu_addr; \ ++ const unsigned int col_width = sizeof(type); \ ++ const unsigned int row_width = (col_width == sizeof(u64)) ? 32 : 16; \ ++ const unsigned int num_cols = row_width / col_width; \ ++ for (i = 0; i < PAGE_SIZE; i += row_width) { \ ++ seq_printf(m, "%016llx:", gpu_addr + i); \ ++ for (j = 0; j < num_cols; j++) \ ++ seq_printf(m, format, ptr[j]); \ ++ ptr += num_cols; \ ++ seq_putc(m, '\n'); \ ++ } \ +} + -+static void kbase_gpu_gwt_setup_pages(struct kbase_context *kctx, -+ unsigned long flag) ++struct debug_mem_mapping { ++ struct list_head node; ++ ++ struct kbase_mem_phy_alloc *alloc; ++ unsigned long flags; ++ ++ u64 start_pfn; ++ size_t nr_pages; ++}; ++ ++struct debug_mem_data { ++ struct list_head mapping_list; ++ struct kbase_context *kctx; ++ unsigned int column_width; ++}; ++ ++struct debug_mem_seq_off { ++ struct list_head *lh; ++ size_t offset; ++}; ++ ++static void *debug_mem_start(struct seq_file *m, loff_t *_pos) +{ -+ kbase_gpu_gwt_setup_page_permission(kctx, flag, -+ rb_first(&(kctx->reg_rbtree_same))); -+ kbase_gpu_gwt_setup_page_permission(kctx, flag, -+ rb_first(&(kctx->reg_rbtree_custom))); ++ struct debug_mem_data *mem_data = m->private; ++ struct debug_mem_seq_off *data; ++ struct debug_mem_mapping *map; ++ loff_t pos = *_pos; ++ ++ list_for_each_entry(map, &mem_data->mapping_list, node) { ++ if (pos >= map->nr_pages) { ++ pos -= map->nr_pages; ++ } else { ++ data = kmalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) ++ return NULL; ++ data->lh = &map->node; ++ data->offset = pos; ++ return data; ++ } ++ } ++ ++ /* Beyond the end */ ++ return NULL; +} + ++static void debug_mem_stop(struct seq_file *m, void *v) ++{ ++ kfree(v); ++} + -+int kbase_gpu_gwt_start(struct kbase_context *kctx) ++static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos) +{ -+ kbase_gpu_vm_lock(kctx); -+ if (kctx->gwt_enabled) { -+ kbase_gpu_vm_unlock(kctx); -+ return -EBUSY; -+ } ++ struct debug_mem_data *mem_data = m->private; ++ struct debug_mem_seq_off *data = v; ++ struct debug_mem_mapping *map; + -+ INIT_LIST_HEAD(&kctx->gwt_current_list); -+ INIT_LIST_HEAD(&kctx->gwt_snapshot_list); ++ map = list_entry(data->lh, struct debug_mem_mapping, node); + -+#if !MALI_USE_CSF -+ /* If GWT is enabled using new vector dumping format -+ * from user space, back up status of the job serialization flag and -+ * use full serialisation of jobs for dumping. -+ * Status will be restored on end of dumping in gwt_stop. -+ */ -+ kctx->kbdev->backup_serialize_jobs = kctx->kbdev->serialize_jobs; -+ kctx->kbdev->serialize_jobs = KBASE_SERIALIZE_INTRA_SLOT | -+ KBASE_SERIALIZE_INTER_SLOT; ++ if (data->offset < map->nr_pages - 1) { ++ data->offset++; ++ ++*pos; ++ return data; ++ } + -+#endif -+ /* Mark gwt enabled before making pages read only in case a -+ * write page fault is triggered while we're still in this loop. -+ * (kbase_gpu_vm_lock() doesn't prevent this!) -+ */ -+ kctx->gwt_enabled = true; -+ kctx->gwt_was_enabled = true; ++ if (list_is_last(data->lh, &mem_data->mapping_list)) { ++ kfree(data); ++ return NULL; ++ } + -+ kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR); ++ data->lh = data->lh->next; ++ data->offset = 0; ++ ++*pos; + -+ kbase_gpu_vm_unlock(kctx); -+ return 0; ++ return data; +} + -+int kbase_gpu_gwt_stop(struct kbase_context *kctx) ++static int debug_mem_show(struct seq_file *m, void *v) +{ -+ struct kbasep_gwt_list_element *pos, *n; ++ struct debug_mem_data *mem_data = m->private; ++ struct debug_mem_seq_off *data = v; ++ struct debug_mem_mapping *map; ++ unsigned long long gpu_addr; ++ struct page *page; ++ void *cpu_addr; ++ pgprot_t prot = PAGE_KERNEL; + -+ kbase_gpu_vm_lock(kctx); -+ if (!kctx->gwt_enabled) { -+ kbase_gpu_vm_unlock(kctx); -+ return -EINVAL; -+ } ++ map = list_entry(data->lh, struct debug_mem_mapping, node); + -+ list_for_each_entry_safe(pos, n, &kctx->gwt_current_list, link) { -+ list_del(&pos->link); -+ kfree(pos); -+ } ++ kbase_gpu_vm_lock(mem_data->kctx); + -+ list_for_each_entry_safe(pos, n, &kctx->gwt_snapshot_list, link) { -+ list_del(&pos->link); -+ kfree(pos); ++ if (data->offset >= map->alloc->nents) { ++ seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn + ++ data->offset) << PAGE_SHIFT); ++ goto out; + } + -+#if !MALI_USE_CSF -+ kctx->kbdev->serialize_jobs = kctx->kbdev->backup_serialize_jobs; -+#endif ++ if (!(map->flags & KBASE_REG_CPU_CACHED)) ++ prot = pgprot_writecombine(prot); + -+ kbase_gpu_gwt_setup_pages(kctx, ~0UL); ++ page = as_page(map->alloc->pages[data->offset]); ++ cpu_addr = vmap(&page, 1, VM_MAP, prot); ++ if (!cpu_addr) ++ goto out; + -+ kctx->gwt_enabled = false; -+ kbase_gpu_vm_unlock(kctx); -+ return 0; -+} ++ gpu_addr = (map->start_pfn + data->offset) << PAGE_SHIFT; + -+#if (KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE) -+static int list_cmp_function(void *priv, const struct list_head *a, const struct list_head *b) -+#else -+static int list_cmp_function(void *priv, struct list_head *a, -+ struct list_head *b) -+#endif -+{ -+ const struct kbasep_gwt_list_element *elementA = -+ container_of(a, struct kbasep_gwt_list_element, link); -+ const struct kbasep_gwt_list_element *elementB = -+ container_of(b, struct kbasep_gwt_list_element, link); ++ /* Cases for 4 supported values of column_width for showing ++ * the GPU memory contents. ++ */ ++ switch (mem_data->column_width) { ++ case 1: ++ SHOW_GPU_MEM_DATA(u8, " %02hhx"); ++ break; ++ case 2: ++ SHOW_GPU_MEM_DATA(u16, " %04hx"); ++ break; ++ case 4: ++ SHOW_GPU_MEM_DATA(u32, " %08x"); ++ break; ++ case 8: ++ SHOW_GPU_MEM_DATA(u64, " %016llx"); ++ break; ++ default: ++ dev_warn(mem_data->kctx->kbdev->dev, "Unexpected column width"); ++ } + -+ CSTD_UNUSED(priv); ++ vunmap(cpu_addr); + -+ if (elementA->page_addr > elementB->page_addr) -+ return 1; -+ return -1; ++ seq_putc(m, '\n'); ++ ++out: ++ kbase_gpu_vm_unlock(mem_data->kctx); ++ return 0; +} + -+static void kbase_gpu_gwt_collate(struct kbase_context *kctx, -+ struct list_head *snapshot_list) ++static const struct seq_operations ops = { ++ .start = debug_mem_start, ++ .next = debug_mem_next, ++ .stop = debug_mem_stop, ++ .show = debug_mem_show, ++}; ++ ++static int debug_mem_zone_open(struct rb_root *rbtree, ++ struct debug_mem_data *mem_data) +{ -+ struct kbasep_gwt_list_element *pos, *n; -+ struct kbasep_gwt_list_element *collated = NULL; ++ int ret = 0; ++ struct rb_node *p; ++ struct kbase_va_region *reg; ++ struct debug_mem_mapping *mapping; + -+ /* Sort the list */ -+ list_sort(NULL, snapshot_list, list_cmp_function); ++ for (p = rb_first(rbtree); p; p = rb_next(p)) { ++ reg = rb_entry(p, struct kbase_va_region, rblink); + -+ /* Combine contiguous areas. */ -+ list_for_each_entry_safe(pos, n, snapshot_list, link) { -+ if (collated == NULL || collated->region != -+ pos->region || -+ (collated->page_addr + -+ (collated->num_pages * PAGE_SIZE)) != -+ pos->page_addr) { -+ /* This is the first time through, a new region or -+ * is not contiguous - start collating to this element ++ if (reg->gpu_alloc == NULL) ++ /* Empty region - ignore */ ++ continue; ++ ++ if (reg->flags & KBASE_REG_PROTECTED) { ++ /* CPU access to protected memory is forbidden - so ++ * skip this GPU virtual region. + */ -+ collated = pos; -+ } else { -+ /* contiguous so merge */ -+ collated->num_pages += pos->num_pages; -+ /* remove element from list */ -+ list_del(&pos->link); -+ kfree(pos); ++ continue; ++ } ++ ++ mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); ++ if (!mapping) { ++ ret = -ENOMEM; ++ goto out; + } ++ ++ mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); ++ mapping->start_pfn = reg->start_pfn; ++ mapping->nr_pages = reg->nr_pages; ++ mapping->flags = reg->flags; ++ list_add_tail(&mapping->node, &mem_data->mapping_list); + } ++ ++out: ++ return ret; +} + -+int kbase_gpu_gwt_dump(struct kbase_context *kctx, -+ union kbase_ioctl_cinstr_gwt_dump *gwt_dump) ++static int debug_mem_open(struct inode *i, struct file *file) +{ -+ const u32 ubuf_size = gwt_dump->in.len; -+ u32 ubuf_count = 0; -+ __user void *user_addr = (__user void *) -+ (uintptr_t)gwt_dump->in.addr_buffer; -+ __user void *user_sizes = (__user void *) -+ (uintptr_t)gwt_dump->in.size_buffer; ++ struct kbase_context *const kctx = i->i_private; ++ struct debug_mem_data *mem_data; ++ int ret; ++ ++ if (get_file_rcu(kctx->filp) == 0) ++ return -ENOENT; ++ ++ /* Check if file was opened in write mode. GPU memory contents ++ * are returned only when the file is not opened in write mode. ++ */ ++ if (file->f_mode & FMODE_WRITE) { ++ file->private_data = kctx; ++ return 0; ++ } ++ ++ ret = seq_open(file, &ops); ++ if (ret) ++ goto open_fail; ++ ++ mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL); ++ if (!mem_data) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ mem_data->kctx = kctx; ++ ++ INIT_LIST_HEAD(&mem_data->mapping_list); + + kbase_gpu_vm_lock(kctx); + -+ if (!kctx->gwt_enabled) { ++ mem_data->column_width = kctx->mem_view_column_width; ++ ++ ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data); ++ if (ret != 0) { + kbase_gpu_vm_unlock(kctx); -+ /* gwt_dump shouldn't be called when gwt is disabled */ -+ return -EPERM; ++ goto out; + } + -+ if (!gwt_dump->in.len || !gwt_dump->in.addr_buffer -+ || !gwt_dump->in.size_buffer) { ++ ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data); ++ if (ret != 0) { + kbase_gpu_vm_unlock(kctx); -+ /* We don't have any valid user space buffer to copy the -+ * write modified addresses. -+ */ -+ return -EINVAL; ++ goto out; + } + -+ if (list_empty(&kctx->gwt_snapshot_list) && -+ !list_empty(&kctx->gwt_current_list)) { -+ -+ list_replace_init(&kctx->gwt_current_list, -+ &kctx->gwt_snapshot_list); ++ ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data); ++ if (ret != 0) { ++ kbase_gpu_vm_unlock(kctx); ++ goto out; ++ } + -+ /* We have collected all write faults so far -+ * and they will be passed on to user space. -+ * Reset the page flags state to allow collection of -+ * further write faults. -+ */ -+ kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR); ++#if MALI_USE_CSF ++ ret = debug_mem_zone_open(&kctx->reg_rbtree_exec_fixed, mem_data); ++ if (ret != 0) { ++ kbase_gpu_vm_unlock(kctx); ++ goto out; ++ } + -+ /* Sort and combine consecutive pages in the dump list*/ -+ kbase_gpu_gwt_collate(kctx, &kctx->gwt_snapshot_list); ++ ret = debug_mem_zone_open(&kctx->reg_rbtree_fixed, mem_data); ++ if (ret != 0) { ++ kbase_gpu_vm_unlock(kctx); ++ goto out; + } ++#endif + -+ while ((!list_empty(&kctx->gwt_snapshot_list))) { -+ u64 addr_buffer[32]; -+ u64 num_page_buffer[32]; -+ u32 count = 0; -+ int err; -+ struct kbasep_gwt_list_element *dump_info, *n; ++ kbase_gpu_vm_unlock(kctx); + -+ list_for_each_entry_safe(dump_info, n, -+ &kctx->gwt_snapshot_list, link) { -+ addr_buffer[count] = dump_info->page_addr; -+ num_page_buffer[count] = dump_info->num_pages; -+ count++; -+ list_del(&dump_info->link); -+ kfree(dump_info); -+ if (ARRAY_SIZE(addr_buffer) == count || -+ ubuf_size == (ubuf_count + count)) -+ break; ++ ((struct seq_file *)file->private_data)->private = mem_data; ++ ++ return 0; ++ ++out: ++ if (mem_data) { ++ while (!list_empty(&mem_data->mapping_list)) { ++ struct debug_mem_mapping *mapping; ++ ++ mapping = list_first_entry(&mem_data->mapping_list, ++ struct debug_mem_mapping, node); ++ kbase_mem_phy_alloc_put(mapping->alloc); ++ list_del(&mapping->node); ++ kfree(mapping); + } ++ kfree(mem_data); ++ } ++ seq_release(i, file); ++open_fail: ++ fput(kctx->filp); + -+ if (count) { -+ err = copy_to_user((user_addr + -+ (ubuf_count * sizeof(u64))), -+ (void *)addr_buffer, -+ count * sizeof(u64)); -+ if (err) { -+ dev_err(kctx->kbdev->dev, "Copy to user failure\n"); -+ kbase_gpu_vm_unlock(kctx); -+ return err; -+ } -+ err = copy_to_user((user_sizes + -+ (ubuf_count * sizeof(u64))), -+ (void *)num_page_buffer, -+ count * sizeof(u64)); -+ if (err) { -+ dev_err(kctx->kbdev->dev, "Copy to user failure\n"); -+ kbase_gpu_vm_unlock(kctx); -+ return err; -+ } ++ return ret; ++} + -+ ubuf_count += count; ++static int debug_mem_release(struct inode *inode, struct file *file) ++{ ++ struct kbase_context *const kctx = inode->i_private; ++ ++ /* If the file wasn't opened in write mode, then release the ++ * memory allocated to show the GPU memory contents. ++ */ ++ if (!(file->f_mode & FMODE_WRITE)) { ++ struct seq_file *sfile = file->private_data; ++ struct debug_mem_data *mem_data = sfile->private; ++ struct debug_mem_mapping *mapping; ++ ++ seq_release(inode, file); ++ ++ while (!list_empty(&mem_data->mapping_list)) { ++ mapping = list_first_entry(&mem_data->mapping_list, ++ struct debug_mem_mapping, node); ++ kbase_mem_phy_alloc_put(mapping->alloc); ++ list_del(&mapping->node); ++ kfree(mapping); + } + -+ if (ubuf_count == ubuf_size) -+ break; ++ kfree(mem_data); + } + -+ if (!list_empty(&kctx->gwt_snapshot_list)) -+ gwt_dump->out.more_data_available = 1; -+ else -+ gwt_dump->out.more_data_available = 0; ++ fput(kctx->filp); + -+ gwt_dump->out.no_of_addr_collected = ubuf_count; -+ kbase_gpu_vm_unlock(kctx); + return 0; +} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gwt.h b/drivers/gpu/arm/bifrost/mali_kbase_gwt.h ++ ++static ssize_t debug_mem_write(struct file *file, const char __user *ubuf, ++ size_t count, loff_t *ppos) ++{ ++ struct kbase_context *const kctx = file->private_data; ++ unsigned int column_width = 0; ++ int ret = 0; ++ ++ CSTD_UNUSED(ppos); ++ ++ ret = kstrtouint_from_user(ubuf, count, 0, &column_width); ++ ++ if (ret) ++ return ret; ++ if (!is_power_of_2(column_width)) { ++ dev_dbg(kctx->kbdev->dev, ++ "Column width %u not a multiple of power of 2", column_width); ++ return -EINVAL; ++ } ++ if (column_width > 8) { ++ dev_dbg(kctx->kbdev->dev, ++ "Column width %u greater than 8 not supported", column_width); ++ return -EINVAL; ++ } ++ ++ kbase_gpu_vm_lock(kctx); ++ kctx->mem_view_column_width = column_width; ++ kbase_gpu_vm_unlock(kctx); ++ ++ return count; ++} ++ ++static const struct file_operations kbase_debug_mem_view_fops = { ++ .owner = THIS_MODULE, ++ .open = debug_mem_open, ++ .release = debug_mem_release, ++ .read = seq_read, ++ .write = debug_mem_write, ++ .llseek = seq_lseek ++}; ++ ++void kbase_debug_mem_view_init(struct kbase_context *const kctx) ++{ ++ /* Caller already ensures this, but we keep the pattern for ++ * maintenance safety. ++ */ ++ if (WARN_ON(!kctx) || ++ WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) ++ return; ++ ++ /* Default column width is 4 */ ++ kctx->mem_view_column_width = sizeof(u32); ++ ++ debugfs_create_file("mem_view", 0400, kctx->kctx_dentry, kctx, ++ &kbase_debug_mem_view_fops); ++} ++ ++#endif +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.h b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.h new file mode 100644 -index 000000000..9fdd68d62 +index 000000000..cb8050d9b --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_gwt.h -@@ -0,0 +1,54 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.h +@@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2010-2017, 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2013-2015, 2019-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -217641,49 +220732,34 @@ index 000000000..9fdd68d62 + * + */ + -+#if !defined(_KBASE_GWT_H) -+#define _KBASE_GWT_H ++#ifndef _KBASE_DEBUG_MEM_VIEW_H ++#define _KBASE_DEBUG_MEM_VIEW_H + +#include -+#include + +/** -+ * kbase_gpu_gwt_start - Start the GPU write tracking -+ * @kctx: Pointer to kernel context ++ * kbase_debug_mem_view_init - Initialize the mem_view debugfs file ++ * @kctx: Pointer to kernel base context + * -+ * Return: 0 on success, error on failure. ++ * This function creates a "mem_view" file which can be used to get a view of ++ * the context's memory as the GPU sees it (i.e. using the GPU's page tables). ++ * ++ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the ++ * parent directory. + */ -+int kbase_gpu_gwt_start(struct kbase_context *kctx); ++void kbase_debug_mem_view_init(struct kbase_context *kctx); + -+/** -+ * kbase_gpu_gwt_stop - Stop the GPU write tracking -+ * @kctx: Pointer to kernel context -+ * -+ * Return: 0 on success, error on failure. -+ */ -+int kbase_gpu_gwt_stop(struct kbase_context *kctx); -+ -+/** -+ * kbase_gpu_gwt_dump - Pass page address of faulting addresses to user space. -+ * @kctx: Pointer to kernel context -+ * @gwt_dump: User space data to be passed. -+ * -+ * Return: 0 on success, error on failure. -+ */ -+int kbase_gpu_gwt_dump(struct kbase_context *kctx, -+ union kbase_ioctl_cinstr_gwt_dump *gwt_dump); -+ -+#endif /* _KBASE_GWT_H */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.c b/drivers/gpu/arm/bifrost/mali_kbase_hw.c ++#endif +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_zones.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_zones.c new file mode 100644 -index 000000000..b07327a55 +index 000000000..1f8db32aa --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.c -@@ -0,0 +1,437 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_zones.c +@@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -217702,431 +220778,110 @@ index 000000000..b07327a55 + */ + +/* -+ * Run-time work-arounds helpers ++ * Debugfs interface to dump information about GPU_VA memory zones + */ + -+#include -+#include -+#include "gpu/mali_kbase_gpu_regmap.h" ++#include "mali_kbase_debug_mem_zones.h" +#include "mali_kbase.h" -+#include "mali_kbase_hw.h" -+ -+void kbase_hw_set_features_mask(struct kbase_device *kbdev) -+{ -+ const enum base_hw_feature *features; -+ u32 gpu_id; -+ -+ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; -+ -+ switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { -+ case GPU_ID2_PRODUCT_TMIX: -+ features = base_hw_features_tMIx; -+ break; -+ case GPU_ID2_PRODUCT_THEX: -+ features = base_hw_features_tHEx; -+ break; -+ case GPU_ID2_PRODUCT_TSIX: -+ features = base_hw_features_tSIx; -+ break; -+ case GPU_ID2_PRODUCT_TDVX: -+ features = base_hw_features_tDVx; -+ break; -+ case GPU_ID2_PRODUCT_TNOX: -+ features = base_hw_features_tNOx; -+ break; -+ case GPU_ID2_PRODUCT_TGOX: -+ features = base_hw_features_tGOx; -+ break; -+ case GPU_ID2_PRODUCT_TTRX: -+ features = base_hw_features_tTRx; -+ break; -+ case GPU_ID2_PRODUCT_TNAX: -+ features = base_hw_features_tNAx; -+ break; -+ case GPU_ID2_PRODUCT_LBEX: -+ case GPU_ID2_PRODUCT_TBEX: -+ features = base_hw_features_tBEx; -+ break; -+ case GPU_ID2_PRODUCT_TBAX: -+ features = base_hw_features_tBAx; -+ break; -+ case GPU_ID2_PRODUCT_TODX: -+ case GPU_ID2_PRODUCT_LODX: -+ features = base_hw_features_tODx; -+ break; -+ case GPU_ID2_PRODUCT_TGRX: -+ features = base_hw_features_tGRx; -+ break; -+ case GPU_ID2_PRODUCT_TVAX: -+ features = base_hw_features_tVAx; -+ break; -+ case GPU_ID2_PRODUCT_TTUX: -+ case GPU_ID2_PRODUCT_LTUX: -+ features = base_hw_features_tTUx; -+ break; -+ case GPU_ID2_PRODUCT_TTIX: -+ case GPU_ID2_PRODUCT_LTIX: -+ features = base_hw_features_tTIx; -+ break; -+ default: -+ features = base_hw_features_generic; -+ break; -+ } + -+ for (; *features != BASE_HW_FEATURE_END; features++) -+ set_bit(*features, &kbdev->hw_features_mask[0]); ++#include ++#include + -+#if defined(CONFIG_MALI_VECTOR_DUMP) -+ /* When dumping is enabled, need to disable flush reduction optimization -+ * for GPUs on which it is safe to have only cache clean operation at -+ * the end of job chain. -+ * This is required to make vector dump work. There is some discrepancy -+ * in the implementation of flush reduction optimization due to -+ * unclear or ambiguous ARCH spec. -+ */ -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE)) -+ clear_bit(BASE_HW_FEATURE_FLUSH_REDUCTION, -+ &kbdev->hw_features_mask[0]); -+#endif -+} ++#if IS_ENABLED(CONFIG_DEBUG_FS) + +/** -+ * kbase_hw_get_issues_for_new_id - Get the hardware issues for a new GPU ID -+ * @kbdev: Device pointer -+ * -+ * Return: pointer to an array of hardware issues, terminated by -+ * BASE_HW_ISSUE_END. ++ * debug_mem_zones_show - Show information about GPU_VA memory zones ++ * @sfile: The debugfs entry ++ * @data: Data associated with the entry + * -+ * In debugging versions of the driver, unknown versions of a known GPU will -+ * be treated as the most recent known version not later than the actual -+ * version. In such circumstances, the GPU ID in @kbdev will also be replaced -+ * with the most recent known version. ++ * This function is called to get the contents of the @c mem_zones debugfs file. ++ * This lists the start address and size (in pages) of each initialized memory ++ * zone within GPU_VA memory. + * -+ * Note: The GPU configuration must have been read by kbase_gpuprops_get_props() -+ * before calling this function. ++ * Return: ++ * 0 if successfully prints data in debugfs entry file ++ * -1 if it encountered an error + */ -+static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( -+ struct kbase_device *kbdev) ++static int debug_mem_zones_show(struct seq_file *sfile, void *data) +{ -+ const enum base_hw_issue *issues = NULL; ++ struct kbase_context *const kctx = sfile->private; ++ size_t i; + -+ struct base_hw_product { -+ u32 product_model; -+ struct { -+ u32 version; -+ const enum base_hw_issue *issues; -+ } map[7]; ++ const char *zone_names[KBASE_REG_ZONE_MAX] = { ++ "SAME_VA", ++ "CUSTOM_VA", ++ "EXEC_VA" ++#if MALI_USE_CSF ++ , ++ "MCU_SHARED_VA", ++ "EXEC_FIXED_VA", ++ "FIXED_VA" ++#endif + }; + -+ static const struct base_hw_product base_hw_products[] = { -+ { GPU_ID2_PRODUCT_TMIX, -+ { { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tMIx_r0p0_05dev0 }, -+ { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1 }, -+ { U32_MAX /* sentinel value */, NULL } } }, -+ -+ { GPU_ID2_PRODUCT_THEX, -+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1 }, -+ { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1 }, -+ { GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2 }, -+ { GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3 }, -+ { U32_MAX, NULL } } }, -+ -+ { GPU_ID2_PRODUCT_TSIX, -+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1 }, -+ { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0 }, -+ { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1 }, -+ { U32_MAX, NULL } } }, -+ -+ { GPU_ID2_PRODUCT_TDVX, -+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0 }, -+ { U32_MAX, NULL } } }, -+ -+ { GPU_ID2_PRODUCT_TNOX, -+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0 }, -+ { U32_MAX, NULL } } }, -+ -+ { GPU_ID2_PRODUCT_TGOX, -+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0 }, -+ { U32_MAX, NULL } } }, -+ -+ { GPU_ID2_PRODUCT_TTRX, -+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1 }, -+ { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1 }, -+ { GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2 }, -+ { U32_MAX, NULL } } }, -+ -+ { GPU_ID2_PRODUCT_TNAX, -+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1 }, -+ { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1 }, -+ { U32_MAX, NULL } } }, -+ -+ { GPU_ID2_PRODUCT_LBEX, -+ { { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_lBEx_r1p0 }, -+ { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_lBEx_r1p1 }, -+ { U32_MAX, NULL } } }, -+ -+ { GPU_ID2_PRODUCT_TBEX, -+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p1 }, -+ { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0 }, -+ { U32_MAX, NULL } } }, -+ -+ { GPU_ID2_PRODUCT_TBAX, -+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBAx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tBAx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tBAx_r0p0 }, -+ { U32_MAX, NULL } } }, -+ -+ { GPU_ID2_PRODUCT_TODX, -+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tODx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tODx_r0p0 }, -+ { U32_MAX, NULL } } }, -+ -+ { GPU_ID2_PRODUCT_LODX, -+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 }, -+ { U32_MAX, NULL } } }, -+ -+ { GPU_ID2_PRODUCT_TGRX, -+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGRx_r0p0 }, -+ { U32_MAX, NULL } } }, -+ -+ { GPU_ID2_PRODUCT_TVAX, -+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0 }, -+ { U32_MAX, NULL } } }, -+ -+ { GPU_ID2_PRODUCT_TTUX, -+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTUx_r0p1 }, -+ { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 }, -+ { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 }, -+ { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 }, -+ { GPU_ID2_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 }, -+ { U32_MAX, NULL } } }, -+ -+ { GPU_ID2_PRODUCT_LTUX, -+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 }, -+ { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 }, -+ { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 }, -+ { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 }, -+ { GPU_ID2_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 }, -+ { U32_MAX, NULL } } }, -+ -+ { GPU_ID2_PRODUCT_TTIX, -+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTIx_r0p0 }, -+ { U32_MAX, NULL } } }, -+ -+ { GPU_ID2_PRODUCT_LTIX, -+ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTIx_r0p0 }, -+ { U32_MAX, NULL } } }, -+ -+ }; ++ kbase_gpu_vm_lock(kctx); + -+ u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; -+ const u32 product_model = gpu_id & GPU_ID2_PRODUCT_MODEL; -+ const struct base_hw_product *product = NULL; -+ size_t p; ++ for (i = 0; i < KBASE_REG_ZONE_MAX; i++) { ++ struct kbase_reg_zone *reg_zone = &kctx->reg_zone[i]; + -+ /* Stop when we reach the end of the products array. */ -+ for (p = 0; p < ARRAY_SIZE(base_hw_products); ++p) { -+ if (product_model == base_hw_products[p].product_model) { -+ product = &base_hw_products[p]; -+ break; ++ if (reg_zone->base_pfn) { ++ seq_printf(sfile, "%15s %zu 0x%.16llx 0x%.16llx\n", zone_names[i], i, ++ reg_zone->base_pfn, reg_zone->va_size_pages); + } + } + -+ if (product != NULL) { -+ /* Found a matching product. */ -+ const u32 version = gpu_id & GPU_ID2_VERSION; -+ u32 fallback_version = 0; -+ const enum base_hw_issue *fallback_issues = NULL; -+ size_t v; -+ -+ /* Stop when we reach the end of the map. */ -+ for (v = 0; product->map[v].version != U32_MAX; ++v) { -+ -+ if (version == product->map[v].version) { -+ /* Exact match so stop. */ -+ issues = product->map[v].issues; -+ break; -+ } -+ -+ /* Check whether this is a candidate for most recent -+ * known version not later than the actual version. -+ */ -+ if ((version > product->map[v].version) && -+ (product->map[v].version >= fallback_version)) { -+#if MALI_CUSTOMER_RELEASE -+ /* Match on version's major and minor fields */ -+ if (((version ^ product->map[v].version) >> -+ GPU_ID2_VERSION_MINOR_SHIFT) == 0) -+#endif -+ { -+ fallback_version = product->map[v].version; -+ fallback_issues = product->map[v].issues; -+ } -+ } -+ } -+ -+ if ((issues == NULL) && (fallback_issues != NULL)) { -+ /* Fall back to the issue set of the most recent known -+ * version not later than the actual version. -+ */ -+ issues = fallback_issues; -+ -+ dev_notice(kbdev->dev, "r%dp%d status %d not found in HW issues table;\n", -+ (gpu_id & GPU_ID2_VERSION_MAJOR) >> GPU_ID2_VERSION_MAJOR_SHIFT, -+ (gpu_id & GPU_ID2_VERSION_MINOR) >> GPU_ID2_VERSION_MINOR_SHIFT, -+ (gpu_id & GPU_ID2_VERSION_STATUS) >> -+ GPU_ID2_VERSION_STATUS_SHIFT); -+ dev_notice(kbdev->dev, "falling back to closest match: r%dp%d status %d\n", -+ (fallback_version & GPU_ID2_VERSION_MAJOR) >> -+ GPU_ID2_VERSION_MAJOR_SHIFT, -+ (fallback_version & GPU_ID2_VERSION_MINOR) >> -+ GPU_ID2_VERSION_MINOR_SHIFT, -+ (fallback_version & GPU_ID2_VERSION_STATUS) >> -+ GPU_ID2_VERSION_STATUS_SHIFT); -+ dev_notice(kbdev->dev, -+ "Execution proceeding normally with fallback match\n"); -+ -+ gpu_id &= ~GPU_ID2_VERSION; -+ gpu_id |= fallback_version; -+ kbdev->gpu_props.props.raw_props.gpu_id = gpu_id; -+ -+ kbase_gpuprops_update_core_props_gpu_id( -+ &kbdev->gpu_props.props); -+ } -+ } -+ return issues; ++ kbase_gpu_vm_unlock(kctx); ++ return 0; +} + -+int kbase_hw_set_issues_mask(struct kbase_device *kbdev) ++/* ++ * File operations related to debugfs entry for mem_zones ++ */ ++static int debug_mem_zones_open(struct inode *in, struct file *file) +{ -+ const enum base_hw_issue *issues; -+ u32 gpu_id; -+ u32 impl_tech; -+ -+ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; -+ impl_tech = kbdev->gpu_props.props.thread_props.impl_tech; -+ -+ if (impl_tech != IMPLEMENTATION_MODEL) { -+ issues = kbase_hw_get_issues_for_new_id(kbdev); -+ if (issues == NULL) { -+ dev_err(kbdev->dev, -+ "HW product - Unknown GPU ID %x", gpu_id); -+ return -EINVAL; -+ } -+ -+#if !MALI_CUSTOMER_RELEASE -+ /* The GPU ID might have been replaced with the last -+ * known version of the same GPU. -+ */ -+ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; -+#endif -+ } else { -+ /* Software model */ -+ switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { -+ case GPU_ID2_PRODUCT_TMIX: -+ issues = base_hw_issues_model_tMIx; -+ break; -+ case GPU_ID2_PRODUCT_THEX: -+ issues = base_hw_issues_model_tHEx; -+ break; -+ case GPU_ID2_PRODUCT_TSIX: -+ issues = base_hw_issues_model_tSIx; -+ break; -+ case GPU_ID2_PRODUCT_TDVX: -+ issues = base_hw_issues_model_tDVx; -+ break; -+ case GPU_ID2_PRODUCT_TNOX: -+ issues = base_hw_issues_model_tNOx; -+ break; -+ case GPU_ID2_PRODUCT_TGOX: -+ issues = base_hw_issues_model_tGOx; -+ break; -+ case GPU_ID2_PRODUCT_TTRX: -+ issues = base_hw_issues_model_tTRx; -+ break; -+ case GPU_ID2_PRODUCT_TNAX: -+ issues = base_hw_issues_model_tNAx; -+ break; -+ case GPU_ID2_PRODUCT_LBEX: -+ case GPU_ID2_PRODUCT_TBEX: -+ issues = base_hw_issues_model_tBEx; -+ break; -+ case GPU_ID2_PRODUCT_TBAX: -+ issues = base_hw_issues_model_tBAx; -+ break; -+ case GPU_ID2_PRODUCT_TODX: -+ case GPU_ID2_PRODUCT_LODX: -+ issues = base_hw_issues_model_tODx; -+ break; -+ case GPU_ID2_PRODUCT_TGRX: -+ issues = base_hw_issues_model_tGRx; -+ break; -+ case GPU_ID2_PRODUCT_TVAX: -+ issues = base_hw_issues_model_tVAx; -+ break; -+ case GPU_ID2_PRODUCT_TTUX: -+ case GPU_ID2_PRODUCT_LTUX: -+ issues = base_hw_issues_model_tTUx; -+ break; -+ case GPU_ID2_PRODUCT_TTIX: -+ case GPU_ID2_PRODUCT_LTIX: -+ issues = base_hw_issues_model_tTIx; -+ break; -+ default: -+ dev_err(kbdev->dev, -+ "HW issues - Unknown GPU ID %x", gpu_id); -+ return -EINVAL; -+ } -+ } ++ return single_open(file, debug_mem_zones_show, in->i_private); ++} + -+ dev_info(kbdev->dev, -+ "GPU identified as 0x%x arch %d.%d.%d r%dp%d status %d", -+ (gpu_id & GPU_ID2_PRODUCT_MAJOR) >> -+ GPU_ID2_PRODUCT_MAJOR_SHIFT, -+ (gpu_id & GPU_ID2_ARCH_MAJOR) >> -+ GPU_ID2_ARCH_MAJOR_SHIFT, -+ (gpu_id & GPU_ID2_ARCH_MINOR) >> -+ GPU_ID2_ARCH_MINOR_SHIFT, -+ (gpu_id & GPU_ID2_ARCH_REV) >> -+ GPU_ID2_ARCH_REV_SHIFT, -+ (gpu_id & GPU_ID2_VERSION_MAJOR) >> -+ GPU_ID2_VERSION_MAJOR_SHIFT, -+ (gpu_id & GPU_ID2_VERSION_MINOR) >> -+ GPU_ID2_VERSION_MINOR_SHIFT, -+ (gpu_id & GPU_ID2_VERSION_STATUS) >> -+ GPU_ID2_VERSION_STATUS_SHIFT); ++static const struct file_operations kbase_debug_mem_zones_fops = { ++ .owner = THIS_MODULE, ++ .open = debug_mem_zones_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + -+ for (; *issues != BASE_HW_ISSUE_END; issues++) -+ set_bit(*issues, &kbdev->hw_issues_mask[0]); ++/* ++ * Initialize debugfs entry for mem_zones ++ */ ++void kbase_debug_mem_zones_init(struct kbase_context *const kctx) ++{ ++ /* Caller already ensures this, but we keep the pattern for ++ * maintenance safety. ++ */ ++ if (WARN_ON(!kctx) || WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) ++ return; + -+ return 0; ++ debugfs_create_file("mem_zones", 0400, kctx->kctx_dentry, kctx, ++ &kbase_debug_mem_zones_fops); +} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.h b/drivers/gpu/arm/bifrost/mali_kbase_hw.h ++#else ++/* ++ * Stub functions for when debugfs is disabled ++ */ ++void kbase_debug_mem_zones_init(struct kbase_context *const kctx) ++{ ++} ++#endif +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_zones.h b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_zones.h new file mode 100644 -index 000000000..ddcddaaa4 +index 000000000..acf349b60 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.h -@@ -0,0 +1,71 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_zones.h +@@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2012-2017, 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -218144,66 +220899,34 @@ index 000000000..ddcddaaa4 + * + */ + -+/** -+ * DOC: Run-time work-arounds helpers -+ */ -+ -+#ifndef _KBASE_HW_H_ -+#define _KBASE_HW_H_ -+ -+#include "mali_kbase_defs.h" -+ -+/** -+ * kbase_hw_has_issue - Tell whether a work-around should be enabled -+ * @kbdev: Device pointer -+ * @issue: issue to be checked -+ */ -+#define kbase_hw_has_issue(kbdev, issue)\ -+ test_bit(issue, &(kbdev)->hw_issues_mask[0]) ++#ifndef _KBASE_DEBUG_MEM_ZONES_H ++#define _KBASE_DEBUG_MEM_ZONES_H + -+/** -+ * kbase_hw_has_feature - Tell whether a feature is supported -+ * @kbdev: Device pointer -+ * @feature: feature to be checked -+ */ -+#define kbase_hw_has_feature(kbdev, feature)\ -+ test_bit(feature, &(kbdev)->hw_features_mask[0]) ++#include + +/** -+ * kbase_hw_set_issues_mask - Set the hardware issues mask based on the GPU ID -+ * @kbdev: Device pointer -+ * -+ * Return: 0 if the GPU ID was recognized, otherwise -EINVAL. -+ * -+ * The GPU ID is read from the @kbdev. ++ * kbase_debug_mem_zones_init() - Initialize the mem_zones sysfs file ++ * @kctx: Pointer to kernel base context + * -+ * In debugging versions of the driver, unknown versions of a known GPU with a -+ * new-format ID will be treated as the most recent known version not later -+ * than the actual version. In such circumstances, the GPU ID in @kbdev will -+ * also be replaced with the most recent known version. ++ * This function creates a "mem_zones" file which can be used to determine the ++ * address ranges of GPU memory zones, in the GPU Virtual-Address space. + * -+ * Note: The GPU configuration must have been read by -+ * kbase_gpuprops_get_props() before calling this function. -+ */ -+int kbase_hw_set_issues_mask(struct kbase_device *kbdev); -+ -+/** -+ * kbase_hw_set_features_mask - Set the features mask depending on the GPU ID -+ * @kbdev: Device pointer ++ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the ++ * parent directory. + */ -+void kbase_hw_set_features_mask(struct kbase_device *kbdev); ++void kbase_debug_mem_zones_init(struct kbase_context *kctx); + -+#endif /* _KBASE_HW_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_backend.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_backend.h ++#endif +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c b/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c new file mode 100644 -index 000000000..0da4eb258 +index 000000000..c846491e7 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_backend.h -@@ -0,0 +1,43 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c +@@ -0,0 +1,248 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2014-2015, 2019-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -218221,188 +220944,243 @@ index 000000000..0da4eb258 + * + */ + -+/* -+ * HW access backend common APIs -+ */ ++#include ++#include ++#include ++#include + -+#ifndef _KBASE_HWACCESS_BACKEND_H_ -+#define _KBASE_HWACCESS_BACKEND_H_ ++#include "mali_kbase_debugfs_helper.h" + -+/** -+ * kbase_backend_devfreq_init - Perform backend devfreq related initialization. -+ * @kbdev: Device pointer -+ * -+ * Return: 0 on success, or an error code on failure. ++/* Arbitrary maximum size to prevent user space allocating too much kernel ++ * memory + */ -+int kbase_backend_devfreq_init(struct kbase_device *kbdev); ++#define DEBUGFS_MEM_POOLS_MAX_WRITE_SIZE (256u) + +/** -+ * kbase_backend_devfreq_term - Perform backend-devfreq termination. -+ * @kbdev: Device pointer -+ */ -+void kbase_backend_devfreq_term(struct kbase_device *kbdev); -+ -+#endif /* _KBASE_HWACCESS_BACKEND_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_defs.h -new file mode 100644 -index 000000000..62a6ec51b ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_defs.h -@@ -0,0 +1,50 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2014, 2016-2018, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * set_attr_from_string - Parse a string to set elements of an array + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * @buf: Input string to parse. Must be nul-terminated! ++ * @array: Address of an object that can be accessed like an array. ++ * @nelems: Number of elements in the array. ++ * @set_attr_fn: Function to be called back for each array element. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * This is the core of the implementation of ++ * kbase_debugfs_helper_set_attr_from_string. The only difference between the ++ * two functions is that this one requires the input string to be writable. + * ++ * Return: 0 if success, negative error code otherwise. + */ ++static int ++set_attr_from_string(char *const buf, void *const array, size_t const nelems, ++ kbase_debugfs_helper_set_attr_fn * const set_attr_fn) ++{ ++ size_t index, err = 0; ++ char *ptr = buf; + -+/** -+ * DOC: HW access common definitions -+ */ ++ for (index = 0; index < nelems && *ptr; ++index) { ++ unsigned long new_size; ++ size_t len; ++ char sep; + -+#ifndef _KBASE_HWACCESS_DEFS_H_ -+#define _KBASE_HWACCESS_DEFS_H_ ++ /* Drop leading spaces */ ++ while (*ptr == ' ') ++ ptr++; + -+#include ++ len = strcspn(ptr, "\n "); ++ if (len == 0) { ++ /* No more values (allow this) */ ++ break; ++ } + -+/** -+ * struct kbase_hwaccess_data - object encapsulating the GPU backend specific -+ * data for the HW access layer. -+ * hwaccess_lock (a spinlock) must be held when -+ * accessing this structure. -+ * @active_kctx: pointer to active kbase context which last submitted an -+ * atom to GPU and while the context is active it can -+ * submit new atoms to GPU from the irq context also, without -+ * going through the bottom half of job completion path. -+ * @backend: GPU backend specific data for HW access layer -+ */ -+struct kbase_hwaccess_data { -+#if !MALI_USE_CSF -+ struct kbase_context *active_kctx[BASE_JM_MAX_NR_SLOTS]; -+#endif ++ /* Substitute a nul terminator for a space character ++ * to make the substring valid for kstrtoul. ++ */ ++ sep = ptr[len]; ++ if (sep == ' ') ++ ptr[len++] = '\0'; + -+ struct kbase_backend_data backend; -+}; ++ err = kstrtoul(ptr, 0, &new_size); ++ if (err) ++ break; + -+#endif /* _KBASE_HWACCESS_DEFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h -new file mode 100644 -index 000000000..f537b7f0a ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h -@@ -0,0 +1,88 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2014-2015, 2017-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ /* Skip the substring (including any premature nul terminator) ++ */ ++ ptr += len; + -+/** -+ * DOC: Base kernel property query backend APIs -+ */ ++ set_attr_fn(array, index, new_size); ++ } + -+#ifndef _KBASE_HWACCESS_GPUPROPS_H_ -+#define _KBASE_HWACCESS_GPUPROPS_H_ ++ return err; ++} + -+/** -+ * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from -+ * GPU -+ * @kbdev: Device pointer -+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure -+ * -+ * The caller should ensure that GPU remains powered-on during this function. -+ * -+ * Return: Zero for succeess or a Linux error code -+ */ -+int kbase_backend_gpuprops_get(struct kbase_device *kbdev, -+ struct kbase_gpuprops_regdump *regdump); ++int kbase_debugfs_string_validator(char *const buf) ++{ ++ int err = 0; ++ char *ptr = buf; + -+/** -+ * kbase_backend_gpuprops_get_curr_config() - Fill @curr_config_regdump with -+ * relevant GPU properties read from -+ * the GPU registers. -+ * @kbdev: Device pointer. -+ * @curr_config_regdump: Pointer to struct kbase_current_config_regdump -+ * structure. -+ * -+ * The caller should ensure that GPU remains powered-on during this function and -+ * the caller must ensure this function returns success before using the values -+ * returned in the curr_config_regdump in any part of the kernel. -+ * -+ * Return: Zero for succeess or a Linux error code -+ */ -+int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev, -+ struct kbase_current_config_regdump *curr_config_regdump); ++ while (*ptr) { ++ unsigned long test_number; ++ size_t len; + -+/** -+ * kbase_backend_gpuprops_get_features - Fill @regdump with GPU properties read -+ * from GPU -+ * @kbdev: Device pointer -+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure -+ * -+ * This function reads GPU properties that are dependent on the hardware -+ * features bitmask. It will power-on the GPU if required. -+ * -+ * Return: Zero for succeess or a Linux error code -+ */ -+int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, -+ struct kbase_gpuprops_regdump *regdump); ++ /* Drop leading spaces */ ++ while (*ptr == ' ') ++ ptr++; + -+/** -+ * kbase_backend_gpuprops_get_l2_features - Fill @regdump with L2_FEATURES read -+ * from GPU -+ * @kbdev: Device pointer -+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure -+ * -+ * This function reads L2_FEATURES register that is dependent on the hardware -+ * features bitmask. It will power-on the GPU if required. -+ * -+ * Return: Zero on success, Linux error code on failure -+ */ -+int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, -+ struct kbase_gpuprops_regdump *regdump); ++ /* Strings passed into the validator will be NULL terminated ++ * by nature, so here strcspn only needs to delimit by ++ * newlines, spaces and NULL terminator (delimited natively). ++ */ ++ len = strcspn(ptr, "\n "); ++ if (len == 0) { ++ /* No more values (allow this) */ ++ break; ++ } + ++ /* Substitute a nul terminator for a space character to make ++ * the substring valid for kstrtoul, and then replace it back. ++ */ ++ if (ptr[len] == ' ') { ++ ptr[len] = '\0'; ++ err = kstrtoul(ptr, 0, &test_number); ++ ptr[len] = ' '; + -+#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h ++ /* len should only be incremented if there is a valid ++ * number to follow - otherwise this will skip over ++ * the NULL terminator in cases with no ending newline ++ */ ++ len++; ++ } else { ++ /* This would occur at the last element before a space ++ * or a NULL terminator. ++ */ ++ err = kstrtoul(ptr, 0, &test_number); ++ } ++ ++ if (err) ++ break; ++ /* Skip the substring (including any premature nul terminator) ++ */ ++ ptr += len; ++ } ++ return err; ++} ++ ++int kbase_debugfs_helper_set_attr_from_string( ++ const char *const buf, void *const array, size_t const nelems, ++ kbase_debugfs_helper_set_attr_fn * const set_attr_fn) ++{ ++ char *const wbuf = kstrdup(buf, GFP_KERNEL); ++ int err = 0; ++ ++ if (!wbuf) ++ return -ENOMEM; ++ ++ /* validate string before actually writing values */ ++ err = kbase_debugfs_string_validator(wbuf); ++ if (err) { ++ kfree(wbuf); ++ return err; ++ } ++ ++ err = set_attr_from_string(wbuf, array, nelems, ++ set_attr_fn); ++ ++ kfree(wbuf); ++ return err; ++} ++ ++ssize_t kbase_debugfs_helper_get_attr_to_string( ++ char *const buf, size_t const size, void *const array, ++ size_t const nelems, ++ kbase_debugfs_helper_get_attr_fn * const get_attr_fn) ++{ ++ ssize_t total = 0; ++ size_t index; ++ ++ for (index = 0; index < nelems; ++index) { ++ const char *postfix = " "; ++ ++ if (index == (nelems-1)) ++ postfix = "\n"; ++ ++ total += scnprintf(buf + total, size - total, "%zu%s", ++ get_attr_fn(array, index), postfix); ++ } ++ ++ return total; ++} ++ ++int kbase_debugfs_helper_seq_write( ++ struct file *const file, const char __user *const ubuf, ++ size_t const count, size_t const nelems, ++ kbase_debugfs_helper_set_attr_fn * const set_attr_fn) ++{ ++ const struct seq_file *const sfile = file->private_data; ++ void *const array = sfile->private; ++ int err = 0; ++ char *buf; ++ ++ if (WARN_ON(!array)) ++ return -EINVAL; ++ ++ if (WARN_ON(count > DEBUGFS_MEM_POOLS_MAX_WRITE_SIZE)) ++ return -EINVAL; ++ ++ buf = kmalloc(count + 1, GFP_KERNEL); ++ if (buf == NULL) ++ return -ENOMEM; ++ ++ if (copy_from_user(buf, ubuf, count)) { ++ kfree(buf); ++ return -EFAULT; ++ } ++ ++ buf[count] = '\0'; ++ ++ /* validate string before actually writing values */ ++ err = kbase_debugfs_string_validator(buf); ++ if (err) { ++ kfree(buf); ++ return err; ++ } ++ ++ err = set_attr_from_string(buf, ++ array, nelems, set_attr_fn); ++ kfree(buf); ++ ++ return err; ++} ++ ++int kbase_debugfs_helper_seq_read( ++ struct seq_file * const sfile, size_t const nelems, ++ kbase_debugfs_helper_get_attr_fn * const get_attr_fn) ++{ ++ void *const array = sfile->private; ++ size_t index; ++ ++ if (WARN_ON(!array)) ++ return -EINVAL; ++ ++ for (index = 0; index < nelems; ++index) { ++ const char *postfix = " "; ++ ++ if (index == (nelems-1)) ++ postfix = "\n"; ++ ++ seq_printf(sfile, "%zu%s", get_attr_fn(array, index), postfix); ++ } ++ return 0; ++} +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.h b/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.h new file mode 100644 -index 000000000..37663101a +index 000000000..cbb24d6e0 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h -@@ -0,0 +1,170 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.h +@@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2014-2015, 2017-2018, 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -218420,165 +221198,157 @@ index 000000000..37663101a + * + */ + -+/* -+ * HW Access instrumentation common APIs -+ */ -+ -+#ifndef _KBASE_HWACCESS_INSTR_H_ -+#define _KBASE_HWACCESS_INSTR_H_ -+ -+#include ++#ifndef _KBASE_DEBUGFS_HELPER_H_ ++#define _KBASE_DEBUGFS_HELPER_H_ + +/** -+ * struct kbase_instr_hwcnt_enable - Enable hardware counter collection. -+ * @dump_buffer: GPU address to write counters to. -+ * @dump_buffer_bytes: Size in bytes of the buffer pointed to by dump_buffer. -+ * @fe_bm: counters selection bitmask (Front End). -+ * @shader_bm: counters selection bitmask (Shader). -+ * @tiler_bm: counters selection bitmask (Tiler). -+ * @mmu_l2_bm: counters selection bitmask (MMU_L2). -+ * @counter_set: the performance counter set to use. ++ * typedef kbase_debugfs_helper_set_attr_fn - Type of function to set an ++ * attribute value from an array ++ * ++ * @array: Address of an object that can be accessed like an array. ++ * @index: An element index. The valid range depends on the use-case. ++ * @value: Attribute value to be set. + */ -+struct kbase_instr_hwcnt_enable { -+ u64 dump_buffer; -+ u64 dump_buffer_bytes; -+ u32 fe_bm; -+ u32 shader_bm; -+ u32 tiler_bm; -+ u32 mmu_l2_bm; -+ u8 counter_set; -+}; ++typedef void kbase_debugfs_helper_set_attr_fn(void *array, size_t index, ++ size_t value); + +/** -+ * kbase_instr_hwcnt_enable_internal() - Enable HW counters collection -+ * @kbdev: Kbase device -+ * @kctx: Kbase context -+ * @enable: HW counter setup parameters ++ * kbase_debugfs_helper_set_attr_from_string - Parse a string to reconfigure an ++ * array + * -+ * Context: might sleep, waiting for reset to complete ++ * @buf: Input string to parse. Must be nul-terminated! ++ * @array: Address of an object that can be accessed like an array. ++ * @nelems: Number of elements in the array. ++ * @set_attr_fn: Function to be called back for each array element. + * -+ * Return: 0 on success -+ */ -+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ struct kbase_instr_hwcnt_enable *enable); -+ -+/** -+ * kbase_instr_hwcnt_disable_internal() - Disable HW counters collection -+ * @kctx: Kbase context ++ * The given function is called once for each attribute value found in the ++ * input string. It is not an error if the string specifies fewer attribute ++ * values than the specified number of array elements. + * -+ * Context: might sleep, waiting for an ongoing dump to complete ++ * The number base of each attribute value is detected automatically ++ * according to the standard rules (e.g. prefix "0x" for hexadecimal). ++ * Attribute values are separated by one or more space characters. ++ * Additional leading and trailing spaces are ignored. + * -+ * Return: 0 on success ++ * Return: 0 if success, negative error code otherwise. + */ -+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx); ++int kbase_debugfs_helper_set_attr_from_string( ++ const char *buf, void *array, size_t nelems, ++ kbase_debugfs_helper_set_attr_fn *set_attr_fn); + +/** -+ * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU -+ * @kctx: Kbase context ++ * kbase_debugfs_string_validator - Validate a string to be written to a ++ * debugfs file for any incorrect formats ++ * or wrong values. + * -+ * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true, -+ * of call kbase_instr_hwcnt_wait_for_dump(). ++ * @buf: Null-terminated string to validate. + * -+ * Return: 0 on success -+ */ -+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx); -+ -+/** -+ * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has -+ * completed. -+ * @kctx: Kbase context ++ * This function is to be used before any writes to debugfs values are done ++ * such that any strings with erroneous values (such as octal 09 or ++ * hexadecimal 0xGH are fully ignored) - without this validation, any correct ++ * values before the first incorrect one will still be entered into the ++ * debugfs file. This essentially iterates the values through kstrtoul to see ++ * if it is valid. + * -+ * Context: will sleep, waiting for dump to complete ++ * It is largely similar to set_attr_from_string to iterate through the values ++ * of the input string. This function also requires the input string to be ++ * writable. + * -+ * Return: 0 on success ++ * Return: 0 with no error, else -22 (the invalid return value of kstrtoul) if ++ * any value in the string was wrong or with an incorrect format. + */ -+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx); ++int kbase_debugfs_string_validator(char *const buf); + +/** -+ * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has -+ * completed -+ * @kctx: Kbase context -+ * @success: Set to true if successful ++ * typedef kbase_debugfs_helper_get_attr_fn - Type of function to get an ++ * attribute value from an array + * -+ * Context: does not sleep. ++ * @array: Address of an object that can be accessed like an array. ++ * @index: An element index. The valid range depends on the use-case. + * -+ * Return: true if the dump is complete ++ * Return: Value of attribute. + */ -+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, -+ bool * const success); ++typedef size_t kbase_debugfs_helper_get_attr_fn(void *array, size_t index); + +/** -+ * kbase_instr_hwcnt_clear() - Clear HW counters -+ * @kctx: Kbase context ++ * kbase_debugfs_helper_get_attr_to_string - Construct a formatted string ++ * from elements in an array + * -+ * Context: might sleep, waiting for reset to complete ++ * @buf: Buffer in which to store the formatted output string. ++ * @size: The size of the buffer, in bytes. ++ * @array: Address of an object that can be accessed like an array. ++ * @nelems: Number of elements in the array. ++ * @get_attr_fn: Function to be called back for each array element. + * -+ * Return: 0 on success ++ * The given function is called once for each array element to get the ++ * value of the attribute to be inspected. The attribute values are ++ * written to the buffer as a formatted string of decimal numbers ++ * separated by spaces and terminated by a linefeed. ++ * ++ * Return: Number of characters written excluding the nul terminator. + */ -+int kbase_instr_hwcnt_clear(struct kbase_context *kctx); ++ssize_t kbase_debugfs_helper_get_attr_to_string( ++ char *buf, size_t size, void *array, size_t nelems, ++ kbase_debugfs_helper_get_attr_fn *get_attr_fn); + +/** -+ * kbase_instr_backend_init() - Initialise the instrumentation backend -+ * @kbdev: Kbase device ++ * kbase_debugfs_helper_seq_read - Implements reads from a virtual file for an ++ * array + * -+ * This function should be called during driver initialization. ++ * @sfile: A virtual file previously opened by calling single_open. ++ * @nelems: Number of elements in the array. ++ * @get_attr_fn: Function to be called back for each array element. + * -+ * Return: 0 on success -+ */ -+int kbase_instr_backend_init(struct kbase_device *kbdev); -+ -+/** -+ * kbase_instr_backend_term() - Terminate the instrumentation backend -+ * @kbdev: Kbase device ++ * The virtual file must have been opened by calling single_open and passing ++ * the address of an object that can be accessed like an array. + * -+ * This function should be called during driver termination. -+ */ -+void kbase_instr_backend_term(struct kbase_device *kbdev); -+ -+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS -+/** -+ * kbase_instr_backend_debugfs_init() - Add a debugfs entry for the -+ * hardware counter set. -+ * @kbdev: kbase device ++ * The given function is called once for each array element to get the ++ * value of the attribute to be inspected. The attribute values are ++ * written to the buffer as a formatted string of decimal numbers ++ * separated by spaces and terminated by a linefeed. ++ * ++ * Return: 0 if success, negative error code otherwise. + */ -+void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev); -+#endif ++int kbase_debugfs_helper_seq_read( ++ struct seq_file *sfile, size_t nelems, ++ kbase_debugfs_helper_get_attr_fn *get_attr_fn); + +/** -+ * kbase_instr_hwcnt_on_unrecoverable_error() - JM HWC instr backend function -+ * called when unrecoverable errors -+ * are detected. -+ * @kbdev: Kbase device ++ * kbase_debugfs_helper_seq_write - Implements writes to a virtual file for an ++ * array + * -+ * This should be called on encountering errors that can only be recovered from -+ * with reset, or that may put HWC logic in state that could result in hang. For -+ * example, when HW becomes unresponsive. ++ * @file: A virtual file previously opened by calling single_open. ++ * @ubuf: Source address in user space. ++ * @count: Number of bytes written to the virtual file. ++ * @nelems: Number of elements in the array. ++ * @set_attr_fn: Function to be called back for each array element. + * -+ * Caller requires kbdev->hwaccess_lock held. ++ * The virtual file must have been opened by calling single_open and passing ++ * the address of an object that can be accessed like an array. ++ * ++ * The given function is called once for each attribute value found in the ++ * data written to the virtual file. For further details, refer to the ++ * description of set_attr_from_string. ++ * ++ * Return: 0 if success, negative error code otherwise. + */ -+void kbase_instr_hwcnt_on_unrecoverable_error(struct kbase_device *kbdev); ++int kbase_debugfs_helper_seq_write(struct file *file, ++ const char __user *ubuf, size_t count, ++ size_t nelems, ++ kbase_debugfs_helper_set_attr_fn *set_attr_fn); + -+/** -+ * kbase_instr_hwcnt_on_before_reset() - JM HWC instr backend function to be -+ * called immediately before a reset. -+ * Takes us out of the unrecoverable -+ * error state, if we were in it. -+ * @kbdev: Kbase device -+ */ -+void kbase_instr_hwcnt_on_before_reset(struct kbase_device *kbdev); ++#endif /*_KBASE_DEBUGFS_HELPER_H_ */ + -+#endif /* _KBASE_HWACCESS_INSTR_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_defs.h new file mode 100644 -index 000000000..ca77c192d +index 000000000..809e73000 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h -@@ -0,0 +1,318 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_defs.h +@@ -0,0 +1,2105 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -218596,2346 +221366,2723 @@ index 000000000..ca77c192d + * + */ + -+/* -+ * HW access job manager common APIs ++/** ++ * DOC: Defintions (types, defines, etcs) common to Kbase. They are placed here ++ * to allow the hierarchy of header files to work. + */ + -+#ifndef _KBASE_HWACCESS_JM_H_ -+#define _KBASE_HWACCESS_JM_H_ ++#ifndef _KBASE_DEFS_H_ ++#define _KBASE_DEFS_H_ + -+/** -+ * kbase_backend_run_atom() - Run an atom on the GPU -+ * @kbdev: Device pointer -+ * @katom: Atom to run -+ * -+ * Caller must hold the HW access lock -+ */ -+void kbase_backend_run_atom(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom); ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+/** -+ * kbase_backend_slot_update - Update state based on slot ringbuffers -+ * -+ * @kbdev: Device pointer -+ * -+ * Inspect the jobs in the slot ringbuffers and update state. -+ * -+ * This will cause jobs to be submitted to hardware if they are unblocked -+ */ -+void kbase_backend_slot_update(struct kbase_device *kbdev); ++#if MALI_USE_CSF ++#include ++#else ++#include ++#include ++#endif + -+/** -+ * kbase_backend_find_and_release_free_address_space() - Release a free AS -+ * @kbdev: Device pointer -+ * @kctx: Context pointer -+ * -+ * This function can evict an idle context from the runpool, freeing up the -+ * address space it was using. -+ * -+ * The address space is marked as in use. The caller must either assign a -+ * context using kbase_gpu_use_ctx(), or release it using -+ * kbase_ctx_sched_release() -+ * -+ * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none -+ * available -+ */ -+int kbase_backend_find_and_release_free_address_space( -+ struct kbase_device *kbdev, struct kbase_context *kctx); ++#include + -+/** -+ * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the -+ * provided address space. -+ * @kbdev: Device pointer -+ * @kctx: Context pointer. May be NULL -+ * @as_nr: Free address space to use -+ * -+ * kbase_gpu_next_job() will pull atoms from the active context. -+ * -+ * Return: true if successful, false if ASID not assigned. -+ */ -+bool kbase_backend_use_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ int as_nr); ++#include ++#include ++#include ++#include ++#include + -+/** -+ * kbase_backend_use_ctx_sched() - Activate a context. -+ * @kbdev: Device pointer -+ * @kctx: Context pointer -+ * @js: Job slot to activate context on -+ * -+ * kbase_gpu_next_job() will pull atoms from the active context. -+ * -+ * The context must already be scheduled and assigned to an address space. If -+ * the context is not scheduled, then kbase_gpu_use_ctx() should be used -+ * instead. -+ * -+ * Caller must hold hwaccess_lock -+ * -+ * Return: true if context is now active, false otherwise (ie if context does -+ * not have an address space assigned) -+ */ -+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_context *kctx, -+ unsigned int js); + -+/** -+ * kbase_backend_release_ctx_irq - Release a context from the GPU. This will -+ * de-assign the assigned address space. -+ * @kbdev: Device pointer -+ * @kctx: Context pointer -+ * -+ * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock -+ */ -+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, -+ struct kbase_context *kctx); ++#include "mali_kbase_fence_defs.h" + -+/** -+ * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will -+ * de-assign the assigned address space. -+ * @kbdev: Device pointer -+ * @kctx: Context pointer -+ * -+ * Caller must hold kbase_device->mmu_hw_mutex -+ * -+ * This function must perform any operations that could not be performed in IRQ -+ * context by kbase_backend_release_ctx_irq(). -+ */ -+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, -+ struct kbase_context *kctx); ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++#include ++#endif /* CONFIG_DEBUG_FS */ + -+/** -+ * kbase_backend_cache_clean - Perform a cache clean if the given atom requires -+ * one -+ * @kbdev: Device pointer -+ * @katom: Pointer to the failed atom -+ * -+ * On some GPUs, the GPU cache must be cleaned following a failed atom. This -+ * function performs a clean if it is required by @katom. -+ */ -+void kbase_backend_cache_clean(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom); ++#ifdef CONFIG_MALI_BIFROST_DEVFREQ ++#include ++#endif /* CONFIG_MALI_BIFROST_DEVFREQ */ + ++#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) ++#include ++#endif + -+/** -+ * kbase_backend_complete_wq() - Perform backend-specific actions required on -+ * completing an atom. -+ * @kbdev: Device pointer -+ * @katom: Pointer to the atom to complete -+ * -+ * This function should only be called from kbase_jd_done_worker() or -+ * js_return_worker(). -+ * -+ * Return: true if atom has completed, false if atom should be re-submitted -+ */ -+void kbase_backend_complete_wq(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom); ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++#include ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + -+#if !MALI_USE_CSF -+/** -+ * kbase_backend_complete_wq_post_sched - Perform backend-specific actions -+ * required on completing an atom, after -+ * any scheduling has taken place. -+ * @kbdev: Device pointer -+ * @core_req: Core requirements of atom -+ * -+ * This function should only be called from kbase_jd_done_worker() or -+ * js_return_worker(). -+ */ -+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, -+ base_jd_core_req core_req); -+#endif /* !MALI_USE_CSF */ ++#include ++#include ++#include ++#include + -+/** -+ * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU -+ * and remove any others from the ringbuffers. -+ * @kbdev: Device pointer -+ * @end_timestamp: Timestamp of reset -+ */ -+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp); ++#include "debug/mali_kbase_debug_ktrace_defs.h" + -+/** -+ * kbase_backend_inspect_tail - Return the atom currently at the tail of slot -+ * @js -+ * @kbdev: Device pointer -+ * @js: Job slot to inspect -+ * -+ * Return: Atom currently at the head of slot @js, or NULL -+ */ -+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, unsigned int js); ++/** Number of milliseconds before we time out on a GPU soft/hard reset */ ++#define RESET_TIMEOUT 500 + +/** -+ * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a -+ * slot. -+ * @kbdev: Device pointer -+ * @js: Job slot to inspect ++ * BASE_JM_MAX_NR_SLOTS - The maximum number of Job Slots to support in the Hardware. + * -+ * Return: Number of atoms currently on slot ++ * You can optimize this down if your target devices will only ever support a ++ * small number of job slots. + */ -+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js); ++#define BASE_JM_MAX_NR_SLOTS 3 + +/** -+ * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot -+ * that are currently on the GPU. -+ * @kbdev: Device pointer -+ * @js: Job slot to inspect ++ * BASE_MAX_NR_AS - The maximum number of Address Spaces to support in the Hardware. + * -+ * Return: Number of atoms currently on slot @js that are currently on the GPU. ++ * You can optimize this down if your target devices will only ever support a ++ * small number of Address Spaces + */ -+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js); ++#define BASE_MAX_NR_AS 16 + -+/** -+ * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs -+ * has changed. -+ * @kbdev: Device pointer -+ * -+ * Perform any required backend-specific actions (eg starting/stopping -+ * scheduling timers). -+ */ -+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev); ++/* mmu */ ++#define MIDGARD_MMU_LEVEL(x) (x) + -+/** -+ * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed. -+ * @kbdev: Device pointer -+ * -+ * Perform any required backend-specific actions (eg updating timeouts of -+ * currently running atoms). -+ */ -+void kbase_backend_timeouts_changed(struct kbase_device *kbdev); ++#define MIDGARD_MMU_TOPLEVEL MIDGARD_MMU_LEVEL(0) + -+/** -+ * kbase_backend_slot_free() - Return the number of jobs that can be currently -+ * submitted to slot @js. -+ * @kbdev: Device pointer -+ * @js: Job slot to inspect -+ * -+ * Return: Number of jobs that can be submitted. -+ */ -+int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js); ++#define MIDGARD_MMU_BOTTOMLEVEL MIDGARD_MMU_LEVEL(3) + -+/** -+ * kbase_job_check_leave_disjoint - potentially leave disjoint state -+ * @kbdev: kbase device -+ * @target_katom: atom which is finishing -+ * -+ * Work out whether to leave disjoint state when finishing an atom that was -+ * originated by kbase_job_check_enter_disjoint(). -+ */ -+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, -+ struct kbase_jd_atom *target_katom); ++#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR) + -+/** -+ * kbase_backend_jm_kill_running_jobs_from_kctx - Kill all jobs that are -+ * currently running on GPU from a context -+ * @kctx: Context pointer -+ * -+ * This is used in response to a page fault to remove all jobs from the faulting -+ * context from the hardware. -+ * -+ * Caller must hold hwaccess_lock. -+ */ -+void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx); ++/** setting in kbase_context::as_nr that indicates it's invalid */ ++#define KBASEP_AS_NR_INVALID (-1) + +/** -+ * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and -+ * to be descheduled. -+ * @kctx: Context pointer -+ * -+ * This should be called following kbase_js_zap_context(), to ensure the context -+ * can be safely destroyed. ++ * KBASE_LOCK_REGION_MAX_SIZE_LOG2 - Maximum size in bytes of a MMU lock region, ++ * as a logarithm + */ -+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx); ++#define KBASE_LOCK_REGION_MAX_SIZE_LOG2 (48) /* 256 TB */ + +/** -+ * kbase_backend_get_current_flush_id - Return the current flush ID -+ * -+ * @kbdev: Device pointer -+ * -+ * Return: the current flush ID to be recorded for each job chain ++ * KBASE_REG_ZONE_MAX - Maximum number of GPU memory region zones + */ -+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev); ++#if MALI_USE_CSF ++#define KBASE_REG_ZONE_MAX 6ul ++#else ++#define KBASE_REG_ZONE_MAX 4ul ++#endif + -+/** -+ * kbase_job_slot_hardstop - Hard-stop the specified job slot -+ * @kctx: The kbase context that contains the job(s) that should -+ * be hard-stopped -+ * @js: The job slot to hard-stop -+ * @target_katom: The job that should be hard-stopped (or NULL for all -+ * jobs from the context) -+ * Context: -+ * The job slot lock must be held when calling this function. -+ */ -+void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js, -+ struct kbase_jd_atom *target_katom); ++#include "mali_kbase_hwaccess_defs.h" + -+/** -+ * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms -+ * currently on the GPU -+ * @kbdev: Device pointer -+ * -+ * Return: true if there are any atoms on the GPU, false otherwise ++/* Maximum number of pages of memory that require a permanent mapping, per ++ * kbase_context + */ -+bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev); -+ -+/** -+ * kbase_backend_slot_kctx_purge_locked - Perform a purge on the slot_rb tracked -+ * kctx -+ * -+ * @kbdev: Device pointer -+ * @kctx: The kbase context that needs to be purged from slot_rb[] -+ * -+ * For JM GPUs, the L1 read only caches may need a start_flush invalidation, -+ * potentially on all slots (even if the kctx was only using a single slot), -+ * following a context termination or address-space ID recycle. This function -+ * performs a clean-up purge on the given kctx which if it has been tracked by -+ * slot_rb[] objects. -+ * -+ * Caller must hold kbase_device->hwaccess_lock. ++#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((64 * 1024ul * 1024ul) >> PAGE_SHIFT) ++/* Minimum threshold period for hwcnt dumps between different hwcnt virtualizer ++ * clients, to reduce undesired system load. ++ * If a virtualizer client requests a dump within this threshold period after ++ * some other client has performed a dump, a new dump won't be performed and ++ * the accumulated counter values for that client will be returned instead. + */ -+void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx); ++#define KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS (200 * NSEC_PER_USEC) + -+#endif /* _KBASE_HWACCESS_JM_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h -new file mode 100644 -index 000000000..effb2ffeb ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h -@@ -0,0 +1,229 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * ++#if MALI_USE_CSF ++/* The buffer count of CSF hwcnt backend ring buffer, which is used when CSF ++ * hwcnt backend allocate the ring buffer to communicate with CSF firmware for ++ * HWC dump samples. ++ * To meet the hardware requirement, this number MUST be power of 2, otherwise, ++ * CSF hwcnt backend creation will be failed. + */ ++#define KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT (128) ++#endif + -+/** -+ * DOC: HW access power manager common APIs ++/* Maximum number of clock/regulator pairs that may be referenced by ++ * the device node. ++ * This is dependent on support for of_property_read_u64_array() in the ++ * kernel. ++ * While, the number of clocks could be more than regulators, ++ * as mentioned in power_control_init(). + */ ++#define BASE_MAX_NR_CLOCKS_REGULATORS (4) + -+#ifndef _KBASE_HWACCESS_PM_H_ -+#define _KBASE_HWACCESS_PM_H_ -+ -+#include -+#include -+ -+#include -+ -+/* Forward definition - see mali_kbase.h */ ++/* Forward declarations */ ++struct kbase_context; +struct kbase_device; -+ -+/* Functions common to all HW access backends */ ++struct kbase_as; ++struct kbase_mmu_setup; ++struct kbase_kinstr_jm; + +/** -+ * kbase_hwaccess_pm_init - Initialize the power management framework. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Must be called before any other power management function ++ * struct kbase_io_access - holds information about 1 register access + * -+ * Return: 0 if the power management framework was successfully initialized. ++ * @addr: first bit indicates r/w (r=0, w=1) ++ * @value: value written or read + */ -+int kbase_hwaccess_pm_init(struct kbase_device *kbdev); ++struct kbase_io_access { ++ uintptr_t addr; ++ u32 value; ++}; + +/** -+ * kbase_hwaccess_pm_term - Terminate the power management framework. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * struct kbase_io_history - keeps track of all recent register accesses + * -+ * No power management functions may be called after this ++ * @enabled: true if register accesses are recorded, false otherwise ++ * @lock: spinlock protecting kbase_io_access array ++ * @count: number of registers read/written ++ * @size: number of elements in kbase_io_access array ++ * @buf: array of kbase_io_access + */ -+void kbase_hwaccess_pm_term(struct kbase_device *kbdev); ++struct kbase_io_history { ++ bool enabled; ++ ++ spinlock_t lock; ++ size_t count; ++ u16 size; ++ struct kbase_io_access *buf; ++}; + +/** -+ * kbase_hwaccess_pm_powerup - Power up the GPU. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @flags: Flags to pass on to kbase_pm_init_hw -+ * -+ * Power up GPU after all modules have been initialized and interrupt handlers -+ * installed. ++ * struct kbase_debug_copy_buffer - information about the buffer to be copied. + * -+ * Return: 0 if powerup was successful. ++ * @size: size of the buffer in bytes ++ * @pages: pointer to an array of pointers to the pages which contain ++ * the buffer ++ * @is_vmalloc: true if @pages was allocated with vzalloc. false if @pages was ++ * allocated with kcalloc ++ * @nr_pages: number of pages ++ * @offset: offset into the pages ++ * @gpu_alloc: pointer to physical memory allocated by the GPU ++ * @extres_pages: array of pointers to the pages containing external resources ++ * for this buffer ++ * @nr_extres_pages: number of pages in @extres_pages + */ -+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, -+ unsigned int flags); ++struct kbase_debug_copy_buffer { ++ size_t size; ++ struct page **pages; ++ bool is_vmalloc; ++ int nr_pages; ++ size_t offset; ++ struct kbase_mem_phy_alloc *gpu_alloc; ++ ++ struct page **extres_pages; ++ int nr_extres_pages; ++}; ++ ++struct kbase_device_info { ++ u32 features; ++}; ++ ++struct kbase_mmu_setup { ++ u64 transtab; ++ u64 memattr; ++ u64 transcfg; ++}; + +/** -+ * kbase_hwaccess_pm_halt - Halt the power management framework. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Should ensure that no new interrupts are generated, but allow any currently -+ * running interrupt handlers to complete successfully. The GPU is forced off by -+ * the time this function returns, regardless of whether or not the active power -+ * policy asks for the GPU to be powered off. ++ * struct kbase_fault - object containing data relating to a page or bus fault. ++ * @addr: Records the faulting address. ++ * @extra_addr: Records the secondary fault address. ++ * @status: Records the fault status as reported by Hw. ++ * @protected_mode: Flag indicating whether the fault occurred in protected mode ++ * or not. + */ -+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev); ++struct kbase_fault { ++ u64 addr; ++ u64 extra_addr; ++ u32 status; ++ bool protected_mode; ++}; + -+/** -+ * kbase_hwaccess_pm_suspend - Perform any backend-specific actions to suspend the GPU ++/** Maximum number of memory pages that should be allocated for the array ++ * of pointers to free PGDs. + * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * This number has been pre-calculated to deal with the maximum allocation ++ * size expressed by the default value of KBASE_MEM_ALLOC_MAX_SIZE. ++ * This is supposed to be enough for almost the entirety of MMU operations. ++ * Any size greater than KBASE_MEM_ALLOC_MAX_SIZE requires being broken down ++ * into multiple iterations, each dealing with at most KBASE_MEM_ALLOC_MAX_SIZE ++ * bytes. + * -+ * Return: 0 if suspend was successful. ++ * Please update this value if KBASE_MEM_ALLOC_MAX_SIZE changes. + */ -+int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev); ++#define MAX_PAGES_FOR_FREE_PGDS ((size_t)9) + -+/** -+ * kbase_hwaccess_pm_resume - Perform any backend-specific actions to resume the GPU -+ * from a suspend -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev); ++/* Maximum number of pointers to free PGDs */ ++#define MAX_FREE_PGDS ((PAGE_SIZE / sizeof(struct page *)) * MAX_PAGES_FOR_FREE_PGDS) + +/** -+ * kbase_hwaccess_pm_gpu_active - Perform any required actions for activating the GPU. -+ * Called when the first context goes active. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * struct kbase_mmu_table - object representing a set of GPU page tables ++ * @mmu_lock: Lock to serialize the accesses made to multi level GPU ++ * page tables ++ * @pgd: Physical address of the page allocated for the top ++ * level page table of the context, this is used for ++ * MMU HW programming as the address translation will ++ * start from the top level page table. ++ * @group_id: A memory group ID to be passed to a platform-specific ++ * memory group manager. ++ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * @kctx: If this set of MMU tables belongs to a context then ++ * this is a back-reference to the context, otherwise ++ * it is NULL. ++ * @scratch_mem: Scratch memory used for MMU operations, which are ++ * serialized by the @mmu_lock. + */ -+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev); ++struct kbase_mmu_table { ++ struct mutex mmu_lock; ++ phys_addr_t pgd; ++ u8 group_id; ++ struct kbase_context *kctx; ++ union { ++ /** ++ * @teardown_pages: Scratch memory used for backup copies of whole ++ * PGD pages when tearing down levels upon ++ * termination of the MMU table. ++ */ ++ struct { ++ /** ++ * @levels: Array of PGD pages, large enough to copy one PGD ++ * for each level of the MMU table. ++ */ ++ u64 levels[MIDGARD_MMU_BOTTOMLEVEL][PAGE_SIZE / sizeof(u64)]; ++ } teardown_pages; ++ /** ++ * @free_pgds: Scratch memory user for insertion, update and teardown ++ * operations to store a temporary list of PGDs to be freed ++ * at the end of the operation. ++ */ ++ struct { ++ /** @pgds: Array of pointers to PGDs to free. */ ++ struct page *pgds[MAX_FREE_PGDS]; ++ /** @head_index: Index of first free element in the PGDs array. */ ++ size_t head_index; ++ } free_pgds; ++ } scratch_mem; ++}; + +/** -+ * kbase_hwaccess_pm_gpu_idle - Perform any required actions for idling the GPU. -+ * Called when the last context goes idle. ++ * struct kbase_reg_zone - Information about GPU memory region zones ++ * @base_pfn: Page Frame Number in GPU virtual address space for the start of ++ * the Zone ++ * @va_size_pages: Size of the Zone in pages + * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * Track information about a zone KBASE_REG_ZONE() and related macros. ++ * In future, this could also store the &rb_root that are currently in ++ * &kbase_context and &kbase_csf_device. + */ -+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev); ++struct kbase_reg_zone { ++ u64 base_pfn; ++ u64 va_size_pages; ++}; + +#if MALI_USE_CSF -+/** -+ * kbase_pm_set_debug_core_mask - Set the debug core mask. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @new_core_mask: The core mask to use -+ * -+ * This determines which cores the power manager is allowed to use. -+ */ -+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, -+ u64 new_core_mask); ++#include "csf/mali_kbase_csf_defs.h" +#else ++#include "jm/mali_kbase_jm_defs.h" ++#endif ++ ++#include "mali_kbase_hwaccess_time.h" ++ ++static inline int kbase_as_has_bus_fault(struct kbase_as *as, ++ struct kbase_fault *fault) ++{ ++ return (fault == &as->bf_data); ++} ++ ++static inline int kbase_as_has_page_fault(struct kbase_as *as, ++ struct kbase_fault *fault) ++{ ++ return (fault == &as->pf_data); ++} ++ +/** -+ * kbase_pm_set_debug_core_mask - Set the debug core mask. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @new_core_mask_js0: The core mask to use for job slot 0 -+ * @new_core_mask_js1: The core mask to use for job slot 1 -+ * @new_core_mask_js2: The core mask to use for job slot 2 ++ * struct kbasep_mem_device - Data stored per device for memory allocation + * -+ * This determines which cores the power manager is allowed to use. ++ * @used_pages: Tracks usage of OS shared memory. Updated when OS memory is ++ * allocated/freed. ++ * @ir_threshold: Fraction of the maximum size of an allocation that grows ++ * on GPU page fault that can be used before the driver ++ * switches to incremental rendering, in 1/256ths. ++ * 0 means disabled. + */ -+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, -+ u64 new_core_mask_js0, u64 new_core_mask_js1, -+ u64 new_core_mask_js2); -+#endif /* MALI_USE_CSF */ ++struct kbasep_mem_device { ++ atomic_t used_pages; ++ atomic_t ir_threshold; ++}; ++ ++struct kbase_clk_rate_listener; + +/** -+ * kbase_pm_ca_get_policy - Get the current policy. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * typedef kbase_clk_rate_listener_on_change_t() - Frequency change callback + * -+ * Returns the policy that is currently active. ++ * @listener: Clock frequency change listener. ++ * @clk_index: Index of the clock for which the change has occurred. ++ * @clk_rate_hz: Clock frequency(Hz). + * -+ * Return: The current policy ++ * A callback to call when clock rate changes. The function must not ++ * sleep. No clock rate manager functions must be called from here, as ++ * its lock is taken. + */ -+const struct kbase_pm_ca_policy -+*kbase_pm_ca_get_policy(struct kbase_device *kbdev); ++typedef void ++kbase_clk_rate_listener_on_change_t(struct kbase_clk_rate_listener *listener, ++ u32 clk_index, u32 clk_rate_hz); + +/** -+ * kbase_pm_ca_set_policy - Change the policy to the one specified. ++ * struct kbase_clk_rate_listener - Clock frequency listener + * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @policy: The policy to change to (valid pointer returned from -+ * @ref kbase_pm_ca_list_policies) ++ * @node: List node. ++ * @notify: Callback to be called when GPU frequency changes. + */ -+void kbase_pm_ca_set_policy(struct kbase_device *kbdev, -+ const struct kbase_pm_ca_policy *policy); ++struct kbase_clk_rate_listener { ++ struct list_head node; ++ kbase_clk_rate_listener_on_change_t *notify; ++}; + +/** -+ * kbase_pm_ca_list_policies - Retrieve a static list of the available policies. -+ * -+ * @policies: An array pointer to take the list of policies. This may be NULL. -+ * The contents of this array must not be modified. ++ * struct kbase_clk_rate_trace_manager - Data stored per device for GPU clock ++ * rate trace manager. + * -+ * Return: The number of policies ++ * @gpu_idle: Tracks the idle state of GPU. ++ * @clks: Array of pointer to structures storing data for every ++ * enumerated GPU clock. ++ * @clk_rate_trace_ops: Pointer to the platform specific GPU clock rate trace ++ * operations. ++ * @listeners: List of listener attached. ++ * @lock: Lock to serialize the actions of GPU clock rate trace ++ * manager. + */ -+int -+kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies); ++struct kbase_clk_rate_trace_manager { ++ bool gpu_idle; ++ struct kbase_clk_data *clks[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ struct kbase_clk_rate_trace_op_conf *clk_rate_trace_ops; ++ struct list_head listeners; ++ spinlock_t lock; ++}; + +/** -+ * kbase_pm_get_policy - Get the current policy. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Returns the policy that is currently active. -+ * -+ * Return: The current policy ++ * struct kbase_pm_device_data - Data stored per device for power management. ++ * @lock: The lock protecting Power Management structures accessed ++ * outside of IRQ. ++ * This lock must also be held whenever the GPU is being ++ * powered on or off. ++ * @active_count: The reference count of active contexts on this device. ++ * Note that some code paths keep shaders/the tiler ++ * powered whilst this is 0. ++ * Use kbase_pm_is_active() instead to check for such cases. ++ * @suspending: Flag indicating suspending/suspended ++ * @runtime_active: Flag to track if the GPU is in runtime suspended or active ++ * state. This ensures that runtime_put and runtime_get ++ * functions are called in pairs. For example if runtime_get ++ * has already been called from the power_on callback, then ++ * the call to it from runtime_gpu_active callback can be ++ * skipped. ++ * @gpu_lost: Flag indicating gpu lost ++ * This structure contains data for the power management framework. ++ * There is one instance of this structure per device in the system. ++ * @zero_active_count_wait: Wait queue set when active_count == 0 ++ * @resume_wait: system resume of GPU device. ++ * @debug_core_mask: Bit masks identifying the available shader cores that are ++ * specified via sysfs. One mask per job slot. ++ * @debug_core_mask_all: Bit masks identifying the available shader cores that ++ * are specified via sysfs. ++ * @callback_power_runtime_init: Callback for initializing the runtime power ++ * management. Return 0 on success, else error code ++ * @callback_power_runtime_term: Callback for terminating the runtime power ++ * management. ++ * @dvfs_period: Time in milliseconds between each dvfs sample ++ * @backend: KBase PM backend data ++ * @arb_vm_state: The state of the arbiter VM machine ++ * @gpu_users_waiting: Used by virtualization to notify the arbiter that there ++ * are users waiting for the GPU so that it can request ++ * and resume the driver. ++ * @clk_rtm: The state of the GPU clock rate trace manager + */ -+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev); ++struct kbase_pm_device_data { ++ struct mutex lock; ++ int active_count; ++ bool suspending; ++#if MALI_USE_CSF ++ bool runtime_active; ++#endif ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ atomic_t gpu_lost; ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ wait_queue_head_t zero_active_count_wait; ++ wait_queue_head_t resume_wait; ++ ++#if MALI_USE_CSF ++ u64 debug_core_mask; ++#else ++ /* One mask per job slot. */ ++ u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS]; ++ u64 debug_core_mask_all; ++#endif /* MALI_USE_CSF */ ++ ++ int (*callback_power_runtime_init)(struct kbase_device *kbdev); ++ void (*callback_power_runtime_term)(struct kbase_device *kbdev); ++ u32 dvfs_period; ++ struct kbase_pm_backend_data backend; ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ struct kbase_arbiter_vm_state *arb_vm_state; ++ atomic_t gpu_users_waiting; ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ struct kbase_clk_rate_trace_manager clk_rtm; ++}; + +/** -+ * kbase_pm_set_policy - Change the policy to the one specified. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid -+ * pointer) -+ * @policy: The policy to change to (valid pointer returned from -+ * @ref kbase_pm_list_policies) ++ * struct kbase_mem_pool - Page based memory pool for kctx/kbdev ++ * @kbdev: Kbase device where memory is used ++ * @cur_size: Number of free pages currently in the pool (may exceed ++ * @max_size in some corner cases) ++ * @max_size: Maximum number of free pages in the pool ++ * @order: order = 0 refers to a pool of 4 KB pages ++ * order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB) ++ * @group_id: A memory group ID to be passed to a platform-specific ++ * memory group manager, if present. Immutable. ++ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * @pool_lock: Lock protecting the pool - must be held when modifying ++ * @cur_size and @page_list ++ * @page_list: List of free pages in the pool ++ * @reclaim: Shrinker for kernel reclaim of free pages ++ * @isolation_in_progress_cnt: Number of pages in pool undergoing page isolation. ++ * This is used to avoid race condition between pool termination ++ * and page isolation for page migration. ++ * @next_pool: Pointer to next pool where pages can be allocated when this ++ * pool is empty. Pages will spill over to the next pool when ++ * this pool is full. Can be NULL if there is no next pool. ++ * @dying: true if the pool is being terminated, and any ongoing ++ * operations should be abandoned ++ * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from ++ * this pool, eg during a grow operation + */ -+void kbase_pm_set_policy(struct kbase_device *kbdev, -+ const struct kbase_pm_policy *policy); ++struct kbase_mem_pool { ++ struct kbase_device *kbdev; ++ size_t cur_size; ++ size_t max_size; ++ u8 order; ++ u8 group_id; ++ spinlock_t pool_lock; ++ struct list_head page_list; ++ struct shrinker reclaim; ++ atomic_t isolation_in_progress_cnt; ++ ++ struct kbase_mem_pool *next_pool; ++ ++ bool dying; ++ bool dont_reclaim; ++}; + +/** -+ * kbase_pm_list_policies - Retrieve a static list of the available policies. ++ * struct kbase_mem_pool_group - a complete set of physical memory pools. + * -+ * @kbdev: The kbase device structure for the device. -+ * @list: An array pointer to take the list of policies. This may be NULL. -+ * The contents of this array must not be modified. ++ * @small: Array of objects containing the state for pools of 4 KiB size ++ * physical pages. ++ * @large: Array of objects containing the state for pools of 2 MiB size ++ * physical pages. + * -+ * Return: The number of policies ++ * Memory pools are used to allow efficient reallocation of previously-freed ++ * physical pages. A pair of memory pools is initialized for each physical ++ * memory group: one for 4 KiB pages and one for 2 MiB pages. These arrays ++ * should be indexed by physical memory group ID, the meaning of which is ++ * defined by the systems integrator. + */ -+int kbase_pm_list_policies(struct kbase_device *kbdev, -+ const struct kbase_pm_policy * const **list); ++struct kbase_mem_pool_group { ++ struct kbase_mem_pool small[MEMORY_GROUP_MANAGER_NR_GROUPS]; ++ struct kbase_mem_pool large[MEMORY_GROUP_MANAGER_NR_GROUPS]; ++}; + +/** -+ * kbase_pm_protected_mode_enable() - Enable protected mode -+ * -+ * @kbdev: Address of the instance of a GPU platform device. ++ * struct kbase_mem_pool_config - Initial configuration for a physical memory ++ * pool + * -+ * Return: Zero on success or an error code ++ * @max_size: Maximum number of free pages that the pool can hold. + */ -+int kbase_pm_protected_mode_enable(struct kbase_device *kbdev); ++struct kbase_mem_pool_config { ++ size_t max_size; ++}; + +/** -+ * kbase_pm_protected_mode_disable() - Disable protected mode ++ * struct kbase_mem_pool_group_config - Initial configuration for a complete ++ * set of physical memory pools + * -+ * @kbdev: Address of the instance of a GPU platform device. ++ * @small: Array of initial configuration for pools of 4 KiB pages. ++ * @large: Array of initial configuration for pools of 2 MiB pages. + * -+ * Return: Zero on success or an error code ++ * This array should be indexed by physical memory group ID, the meaning ++ * of which is defined by the systems integrator. + */ -+int kbase_pm_protected_mode_disable(struct kbase_device *kbdev); ++struct kbase_mem_pool_group_config { ++ struct kbase_mem_pool_config small[MEMORY_GROUP_MANAGER_NR_GROUPS]; ++ struct kbase_mem_pool_config large[MEMORY_GROUP_MANAGER_NR_GROUPS]; ++}; + -+#endif /* _KBASE_HWACCESS_PM_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h -new file mode 100644 -index 000000000..ac2a26d28 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h -@@ -0,0 +1,126 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2014, 2018-2021, 2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * ++/** ++ * struct kbase_devfreq_opp - Lookup table for converting between nominal OPP ++ * frequency, real frequencies and core mask ++ * @real_freqs: Real GPU frequencies. ++ * @opp_volts: OPP voltages. ++ * @opp_freq: Nominal OPP frequency ++ * @core_mask: Shader core mask + */ ++struct kbase_devfreq_opp { ++ u64 opp_freq; ++ u64 core_mask; ++ u64 real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS]; ++}; + -+#ifndef _KBASE_BACKEND_TIME_H_ -+#define _KBASE_BACKEND_TIME_H_ ++/* MMU mode flags */ ++#define KBASE_MMU_MODE_HAS_NON_CACHEABLE (1ul << 0) /* Has NON_CACHEABLE MEMATTR */ + -+#if MALI_USE_CSF +/** -+ * struct kbase_backend_time - System timestamp attributes. -+ * -+ * @multiplier: Numerator of the converter's fraction. -+ * @divisor: Denominator of the converter's fraction. -+ * @offset: Converter's offset term. -+ * -+ * According to Generic timer spec, system timer: -+ * - Increments at a fixed frequency -+ * - Starts operating from zero -+ * -+ * Hence CPU time is a linear function of System Time. -+ * -+ * CPU_ts = alpha * SYS_ts + beta -+ * -+ * Where -+ * - alpha = 10^9/SYS_ts_freq -+ * - beta is calculated by two timer samples taken at the same time: -+ * beta = CPU_ts_s - SYS_ts_s * alpha -+ * -+ * Since alpha is a rational number, we minimizing possible -+ * rounding error by simplifying the ratio. Thus alpha is stored -+ * as a simple `multiplier / divisor` ratio. -+ * ++ * struct kbase_mmu_mode - object containing pointer to methods invoked for ++ * programming the MMU, as per the MMU mode supported ++ * by Hw. ++ * @update: enable & setup/configure one of the GPU address space. ++ * @get_as_setup: retrieve the configuration of one of the GPU address space. ++ * @disable_as: disable one of the GPU address space. ++ * @pte_to_phy_addr: retrieve the physical address encoded in the page table entry. ++ * @ate_is_valid: check if the pte is a valid address translation entry ++ * encoding the physical address of the actual mapped page. ++ * @pte_is_valid: check if the pte is a valid entry encoding the physical ++ * address of the next lower level page table. ++ * @entry_set_ate: program the pte to be a valid address translation entry to ++ * encode the physical address of the actual page being mapped. ++ * @entry_set_pte: program the pte to be a valid entry to encode the physical ++ * address of the next lower level page table and also update ++ * the number of valid entries. ++ * @entries_invalidate: clear out or invalidate a range of ptes. ++ * @get_num_valid_entries: returns the number of valid entries for a specific pgd. ++ * @set_num_valid_entries: sets the number of valid entries for a specific pgd ++ * @flags: bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants. + */ -+struct kbase_backend_time { -+ u64 multiplier; -+ u64 divisor; -+ s64 offset; ++struct kbase_mmu_mode { ++ void (*update)(struct kbase_device *kbdev, ++ struct kbase_mmu_table *mmut, ++ int as_nr); ++ void (*get_as_setup)(struct kbase_mmu_table *mmut, ++ struct kbase_mmu_setup * const setup); ++ void (*disable_as)(struct kbase_device *kbdev, int as_nr); ++ phys_addr_t (*pte_to_phy_addr)(u64 entry); ++ int (*ate_is_valid)(u64 ate, int level); ++ int (*pte_is_valid)(u64 pte, int level); ++ void (*entry_set_ate)(u64 *entry, struct tagged_addr phy, ++ unsigned long flags, int level); ++ void (*entry_set_pte)(u64 *entry, phys_addr_t phy); ++ void (*entries_invalidate)(u64 *entry, u32 count); ++ unsigned int (*get_num_valid_entries)(u64 *pgd); ++ void (*set_num_valid_entries)(u64 *pgd, ++ unsigned int num_of_valid_entries); ++ unsigned long flags; +}; + -+/** -+ * kbase_backend_time_convert_gpu_to_cpu() - Convert GPU timestamp to CPU timestamp. -+ * -+ * @kbdev: Kbase device pointer -+ * @gpu_ts: System timestamp value to converter. -+ * -+ * Return: The CPU timestamp. -+ */ -+u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kbdev, u64 gpu_ts); -+#endif ++struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); ++ ++#define DEVNAME_SIZE 16 + +/** -+ * kbase_backend_get_gpu_time() - Get current GPU time -+ * @kbdev: Device pointer -+ * @cycle_counter: Pointer to u64 to store cycle counter in. -+ * @system_time: Pointer to u64 to store system time in -+ * @ts: Pointer to struct timespec to store current monotonic -+ * time in ++ * enum kbase_devfreq_work_type - The type of work to perform in the devfreq ++ * suspend/resume worker. ++ * @DEVFREQ_WORK_NONE: Initilisation state. ++ * @DEVFREQ_WORK_SUSPEND: Call devfreq_suspend_device(). ++ * @DEVFREQ_WORK_RESUME: Call devfreq_resume_device(). + */ -+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, -+ u64 *system_time, struct timespec64 *ts); ++enum kbase_devfreq_work_type { ++ DEVFREQ_WORK_NONE, ++ DEVFREQ_WORK_SUSPEND, ++ DEVFREQ_WORK_RESUME ++}; + +/** -+ * kbase_backend_get_gpu_time_norequest() - Get current GPU time without -+ * request/release cycle counter -+ * @kbdev: Device pointer -+ * @cycle_counter: Pointer to u64 to store cycle counter in -+ * @system_time: Pointer to u64 to store system time in -+ * @ts: Pointer to struct timespec to store current monotonic -+ * time in ++ * struct kbase_devfreq_queue_info - Object representing an instance for managing ++ * the queued devfreq suspend/resume works. ++ * @workq: Workqueue for devfreq suspend/resume requests ++ * @work: Work item for devfreq suspend & resume ++ * @req_type: Requested work type to be performed by the devfreq ++ * suspend/resume worker ++ * @acted_type: Work type has been acted on by the worker, i.e. the ++ * internal recorded state of the suspend/resume + */ -+void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, -+ u64 *cycle_counter, -+ u64 *system_time, -+ struct timespec64 *ts); ++struct kbase_devfreq_queue_info { ++ struct workqueue_struct *workq; ++ struct work_struct work; ++ enum kbase_devfreq_work_type req_type; ++ enum kbase_devfreq_work_type acted_type; ++}; ++ +/** -+ * kbase_get_timeout_ms - Choose a timeout value to get a timeout scaled -+ * GPU frequency, using a choice from -+ * kbase_timeout_selector. -+ * -+ * @kbdev: KBase device pointer. -+ * @selector: Value from kbase_scaled_timeout_selector enum. -+ * -+ * Return: Timeout in milliseconds, as an unsigned integer. ++ * struct kbase_process - Representing an object of a kbase process instantiated ++ * when the first kbase context is created under it. ++ * @tgid: Thread group ID. ++ * @total_gpu_pages: Total gpu pages allocated across all the contexts ++ * of this process, it accounts for both native allocations ++ * and dma_buf imported allocations. ++ * @kctx_list: List of kbase contexts created for the process. ++ * @kprcs_node: Node to a rb_tree, kbase_device will maintain a rb_tree ++ * based on key tgid, kprcs_node is the node link to ++ * &struct_kbase_device.process_root. ++ * @dma_buf_root: RB tree of the dma-buf imported allocations, imported ++ * across all the contexts created for this process. ++ * Used to ensure that pages of allocation are accounted ++ * only once for the process, even if the allocation gets ++ * imported multiple times for the process. + */ -+unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, -+ enum kbase_timeout_selector selector); ++struct kbase_process { ++ pid_t tgid; ++ size_t total_gpu_pages; ++ struct list_head kctx_list; ++ ++ struct rb_node kprcs_node; ++ struct rb_root dma_buf_root; ++}; + +/** -+ * kbase_backend_get_cycle_cnt - Reads the GPU cycle counter -+ * -+ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * struct kbase_mem_migrate - Object representing an instance for managing ++ * page migration. + * -+ * Return: Snapshot of the GPU cycle count register. ++ * @free_pages_list: List of deferred pages to free. Mostly used when page migration ++ * is enabled. Pages in memory pool that require migrating ++ * will be freed instead. However page cannot be freed ++ * right away as Linux will need to release the page lock. ++ * Therefore page will be added to this list and freed later. ++ * @free_pages_lock: This lock should be held when adding or removing pages ++ * from @free_pages_list. ++ * @free_pages_workq: Work queue to process the work items queued to free ++ * pages in @free_pages_list. ++ * @free_pages_work: Work item to free pages in @free_pages_list. ++ * @inode: Pointer to inode whose address space operations are used ++ * for page migration purposes. + */ -+u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev); ++struct kbase_mem_migrate { ++ struct list_head free_pages_list; ++ spinlock_t free_pages_lock; ++ struct workqueue_struct *free_pages_workq; ++ struct work_struct free_pages_work; ++#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) ++ struct inode *inode; ++#endif ++}; + +/** -+ * kbase_backend_time_init() - Initialize system timestamp converter. -+ * -+ * @kbdev: Kbase device pointer -+ * -+ * This function should only be called after GPU is powered-up and -+ * L2 cached power-up has been initiated. -+ * -+ * Return: Zero on success, error code otherwise. ++ * struct kbase_device - Object representing an instance of GPU platform device, ++ * allocated from the probe method of mali driver. ++ * @hw_quirks_sc: Configuration to be used for the shader cores as per ++ * the HW issues present in the GPU. ++ * @hw_quirks_tiler: Configuration to be used for the Tiler as per the HW ++ * issues present in the GPU. ++ * @hw_quirks_mmu: Configuration to be used for the MMU as per the HW ++ * issues present in the GPU. ++ * @hw_quirks_gpu: Configuration to be used for the Job Manager or CSF/MCU ++ * subsystems as per the HW issues present in the GPU. ++ * @entry: Links the device instance to the global list of GPU ++ * devices. The list would have as many entries as there ++ * are GPU device instances. ++ * @dev: Pointer to the kernel's generic/base representation ++ * of the GPU platform device. ++ * @mdev: Pointer to the miscellaneous device registered to ++ * provide Userspace access to kernel driver through the ++ * device file /dev/malixx. ++ * @reg_start: Base address of the region in physical address space ++ * where GPU registers have been mapped. ++ * @reg_size: Size of the region containing GPU registers ++ * @reg: Kernel virtual address of the region containing GPU ++ * registers, using which Driver will access the registers. ++ * @irqs: Array containing IRQ resource info for 3 types of ++ * interrupts : Job scheduling, MMU & GPU events (like ++ * power management, cache etc.) ++ * @irqs.irq: irq number ++ * @irqs.flags: irq flags ++ * @clocks: Pointer to the input clock resources referenced by ++ * the GPU device node. ++ * @scmi_clk: Pointer to the input scmi clock resources ++ * @nr_clocks: Number of clocks set in the clocks array. ++ * @regulators: Pointer to the structs corresponding to the ++ * regulators referenced by the GPU device node. ++ * @nr_regulators: Number of regulators set in the regulators array. ++ * @opp_table: Pointer to the device OPP structure maintaining the ++ * link to OPPs attached to a device. This is obtained ++ * after setting regulator names for the device. ++ * @token: Integer replacement for opp_table in kernel versions ++ * 6 and greater. Value is a token id number when 0 or greater, ++ * and a linux errno when negative. Must be initialised ++ * to an non-zero value as 0 is valid token id. ++ * @devname: string containing the name used for GPU device instance, ++ * miscellaneous device is registered using the same name. ++ * @id: Unique identifier for the device, indicates the number of ++ * devices which have been created so far. ++ * @model: Pointer, valid only when Driver is compiled to not access ++ * the real GPU Hw, to the dummy model which tries to mimic ++ * to some extent the state & behavior of GPU Hw in response ++ * to the register accesses made by the Driver. ++ * @irq_slab: slab cache for allocating the work items queued when ++ * model mimics raising of IRQ to cause an interrupt on CPU. ++ * @irq_workq: workqueue for processing the irq work items. ++ * @serving_job_irq: function to execute work items queued when model mimics ++ * the raising of JS irq, mimics the interrupt handler ++ * processing JS interrupts. ++ * @serving_gpu_irq: function to execute work items queued when model mimics ++ * the raising of GPU irq, mimics the interrupt handler ++ * processing GPU interrupts. ++ * @serving_mmu_irq: function to execute work items queued when model mimics ++ * the raising of MMU irq, mimics the interrupt handler ++ * processing MMU interrupts. ++ * @reg_op_lock: lock used by model to serialize the handling of register ++ * accesses made by the driver. ++ * @pm: Per device object for storing data for power management ++ * framework. ++ * @fw_load_lock: Mutex to protect firmware loading in @ref kbase_open. ++ * @csf: CSF object for the GPU device. ++ * @js_data: Per device object encapsulating the current context of ++ * Job Scheduler, which is global to the device and is not ++ * tied to any particular struct kbase_context running on ++ * the device ++ * @mem_pools: Global pools of free physical memory pages which can ++ * be used by all the contexts. ++ * @memdev: keeps track of the in use physical pages allocated by ++ * the Driver. ++ * @mmu_mode: Pointer to the object containing methods for programming ++ * the MMU, depending on the type of MMU supported by Hw. ++ * @mgm_dev: Pointer to the memory group manager device attached ++ * to the GPU device. This points to an internal memory ++ * group manager if no platform-specific memory group ++ * manager was retrieved through device tree. ++ * @as: Array of objects representing address spaces of GPU. ++ * @as_free: Bitpattern of free/available GPU address spaces. ++ * @as_to_kctx: Array of pointers to struct kbase_context, having ++ * GPU adrress spaces assigned to them. ++ * @mmu_mask_change: Lock to serialize the access to MMU interrupt mask ++ * register used in the handling of Bus & Page faults. ++ * @pagesize_2mb: Boolean to determine whether 2MiB page sizes are ++ * supported and used where possible. ++ * @gpu_props: Object containing complete information about the ++ * configuration/properties of GPU HW device in use. ++ * @hw_issues_mask: List of SW workarounds for HW issues ++ * @hw_features_mask: List of available HW features. ++ * @disjoint_event: struct for keeping track of the disjoint information, ++ * that whether the GPU is in a disjoint state and the ++ * number of disjoint events that have occurred on GPU. ++ * @disjoint_event.count: disjoint event count ++ * @disjoint_event.state: disjoint event state ++ * @nr_hw_address_spaces: Number of address spaces actually available in the ++ * GPU, remains constant after driver initialisation. ++ * @nr_user_address_spaces: Number of address spaces available to user contexts ++ * @hwcnt_backend_csf_if_fw: Firmware interface to access CSF GPU performance ++ * counters. ++ * @hwcnt: Structure used for instrumentation and HW counters ++ * dumping ++ * @hwcnt.lock: The lock should be used when accessing any of the ++ * following members ++ * @hwcnt.kctx: kbase context ++ * @hwcnt.addr: HW counter address ++ * @hwcnt.addr_bytes: HW counter size in bytes ++ * @hwcnt.backend: Kbase instrumentation backend ++ * @hwcnt_gpu_jm_backend: Job manager GPU backend interface, used as superclass reference ++ * pointer by hwcnt_gpu_iface, which wraps this implementation in ++ * order to extend it with periodic dumping functionality. ++ * @hwcnt_gpu_iface: Backend interface for GPU hardware counter access. ++ * @hwcnt_watchdog_timer: Watchdog interface, used by the GPU backend hwcnt_gpu_iface to ++ * perform periodic dumps in order to prevent hardware counter value ++ * overflow or saturation. ++ * @hwcnt_gpu_ctx: Context for GPU hardware counter access. ++ * @hwaccess_lock must be held when calling ++ * kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx. ++ * @hwcnt_gpu_virt: Virtualizer for GPU hardware counters. ++ * @vinstr_ctx: vinstr context created per device. ++ * @kinstr_prfcnt_ctx: kinstr_prfcnt context created per device. ++ * @timeline_flags: Bitmask defining which sets of timeline tracepoints ++ * are enabled. If zero, there is no timeline client and ++ * therefore timeline is disabled. ++ * @timeline: Timeline context created per device. ++ * @ktrace: kbase device's ktrace ++ * @reset_timeout_ms: Number of milliseconds to wait for the soft stop to ++ * complete for the GPU jobs before proceeding with the ++ * GPU reset. ++ * @lowest_gpu_freq_khz: Lowest frequency in KHz that the GPU can run at. Used ++ * to calculate suitable timeouts for wait operations. ++ * @backend_time: Kbase backend time related attributes. ++ * @cache_clean_in_progress: Set when a cache clean has been started, and ++ * cleared when it has finished. This prevents multiple ++ * cache cleans being done simultaneously. ++ * @cache_clean_queued: Pended cache clean operations invoked while another is ++ * in progress. If this is not 0, another cache clean needs ++ * to be triggered immediately after completion of the ++ * current one. ++ * @cache_clean_wait: Signalled when a cache clean has finished. ++ * @platform_context: Platform specific private data to be accessed by ++ * platform specific config files only. ++ * @kctx_list: List of kbase_contexts created for the device, ++ * including any contexts that might be created for ++ * hardware counters. ++ * @kctx_list_lock: Lock protecting concurrent accesses to @kctx_list. ++ * @devfreq_profile: Describes devfreq profile for the Mali GPU device, passed ++ * to devfreq_add_device() to add devfreq feature to Mali ++ * GPU device. ++ * @devfreq: Pointer to devfreq structure for Mali GPU device, ++ * returned on the call to devfreq_add_device(). ++ * @current_freqs: The real frequencies, corresponding to ++ * @current_nominal_freq, at which the Mali GPU device ++ * is currently operating, as retrieved from ++ * @devfreq_table in the target callback of ++ * @devfreq_profile. ++ * @current_nominal_freq: The nominal frequency currently used for the Mali GPU ++ * device as retrieved through devfreq_recommended_opp() ++ * using the freq value passed as an argument to target ++ * callback of @devfreq_profile ++ * @current_voltages: The voltages corresponding to @current_nominal_freq, ++ * as retrieved from @devfreq_table in the target ++ * callback of @devfreq_profile. ++ * @current_core_mask: bitmask of shader cores that are currently desired & ++ * enabled, corresponding to @current_nominal_freq as ++ * retrieved from @devfreq_table in the target callback ++ * of @devfreq_profile. ++ * @devfreq_table: Pointer to the lookup table for converting between ++ * nominal OPP (operating performance point) frequency, ++ * and real frequency and core mask. This table is ++ * constructed according to operating-points-v2-mali ++ * table in devicetree. ++ * @num_opps: Number of operating performance points available for the Mali ++ * GPU device. ++ * @last_devfreq_metrics: last PM metrics ++ * @devfreq_queue: Per device object for storing data that manages devfreq ++ * suspend & resume request queue and the related items. ++ * @devfreq_cooling: Pointer returned on registering devfreq cooling device ++ * corresponding to @devfreq. ++ * @ipa_protection_mode_switched: is set to TRUE when GPU is put into protected ++ * mode. It is a sticky flag which is cleared by IPA ++ * once it has made use of information that GPU had ++ * previously entered protected mode. ++ * @ipa: Top level structure for IPA, containing pointers to both ++ * configured & fallback models. ++ * @ipa.lock: Access to this struct must be with ipa.lock held ++ * @ipa.configured_model: ipa model to use ++ * @ipa.fallback_model: ipa fallback model ++ * @ipa.last_metrics: Values of the PM utilization metrics from last time ++ * the power model was invoked. The utilization is ++ * calculated as the difference between last_metrics ++ * and the current values. ++ * @ipa.force_fallback_model: true if use of fallback model has been forced by ++ * the User ++ * @ipa.last_sample_time: Records the time when counters, used for dynamic ++ * energy estimation, were last sampled. ++ * @previous_frequency: Previous frequency of GPU clock used for ++ * BASE_HW_ISSUE_GPU2017_1336 workaround, This clock is ++ * restored when L2 is powered on. ++ * @job_fault_debug: Flag to control the dumping of debug data for job faults, ++ * set when the 'job_fault' debugfs file is opened. ++ * @mali_debugfs_directory: Root directory for the debugfs files created by the driver ++ * @debugfs_ctx_directory: Directory inside the @mali_debugfs_directory containing ++ * a sub-directory for every context. ++ * @debugfs_instr_directory: Instrumentation debugfs directory ++ * @debugfs_as_read_bitmap: bitmap of address spaces for which the bus or page fault ++ * has occurred. ++ * @job_fault_wq: Waitqueue to block the job fault dumping daemon till the ++ * occurrence of a job fault. ++ * @job_fault_resume_wq: Waitqueue on which every context with a faulty job wait ++ * for the job fault dumping to complete before they can ++ * do bottom half of job done for the atoms which followed ++ * the faulty atom. ++ * @job_fault_resume_workq: workqueue to process the work items queued for the faulty ++ * atoms, whereby the work item function waits for the dumping ++ * to get completed. ++ * @job_fault_event_list: List of atoms, each belonging to a different context, which ++ * generated a job fault. ++ * @job_fault_event_lock: Lock to protect concurrent accesses to @job_fault_event_list ++ * @regs_dump_debugfs_data: Contains the offset of register to be read through debugfs ++ * file "read_register". ++ * @regs_dump_debugfs_data.reg_offset: Contains the offset of register to be ++ * read through debugfs file "read_register". ++ * @ctx_num: Total number of contexts created for the device. ++ * @io_history: Pointer to an object keeping a track of all recent ++ * register accesses. The history of register accesses ++ * can be read through "regs_history" debugfs file. ++ * @hwaccess: Contains a pointer to active kbase context and GPU ++ * backend specific data for HW access layer. ++ * @faults_pending: Count of page/bus faults waiting for bottom half processing ++ * via workqueues. ++ * @mmu_hw_operation_in_progress: Set before sending the MMU command and is ++ * cleared after the command is complete. Whilst this ++ * flag is set, the write to L2_PWROFF register will be ++ * skipped which is needed to workaround the HW issue ++ * GPU2019-3878. PM state machine is invoked after ++ * clearing this flag and @hwaccess_lock is used to ++ * serialize the access. ++ * @mmu_page_migrate_in_progress: Set before starting a MMU page migration transaction ++ * and cleared after the transaction completes. PM L2 state is ++ * prevented from entering powering up/down transitions when the ++ * flag is set, @hwaccess_lock is used to serialize the access. ++ * @poweroff_pending: Set when power off operation for GPU is started, reset when ++ * power on for GPU is started. ++ * @infinite_cache_active_default: Set to enable using infinite cache for all the ++ * allocations of a new context. ++ * @mem_pool_defaults: Default configuration for the group of memory pools ++ * created for a new context. ++ * @current_gpu_coherency_mode: coherency mode in use, which can be different ++ * from @system_coherency, when using protected mode. ++ * @system_coherency: coherency mode as retrieved from the device tree. ++ * @cci_snoop_enabled: Flag to track when CCI snoops have been enabled. ++ * @snoop_enable_smc: SMC function ID to call into Trusted firmware to ++ * enable cache snooping. Value of 0 indicates that it ++ * is not used. ++ * @snoop_disable_smc: SMC function ID to call disable cache snooping. ++ * @protected_ops: Pointer to the methods for switching in or out of the ++ * protected mode, as per the @protected_dev being used. ++ * @protected_dev: Pointer to the protected mode switcher device attached ++ * to the GPU device retrieved through device tree if ++ * GPU do not support protected mode switching natively. ++ * @protected_mode: set to TRUE when GPU is put into protected mode ++ * @protected_mode_transition: set to TRUE when GPU is transitioning into or ++ * out of protected mode. ++ * @protected_mode_hwcnt_desired: True if we want GPU hardware counters to be ++ * enabled. Counters must be disabled before transition ++ * into protected mode. ++ * @protected_mode_hwcnt_disabled: True if GPU hardware counters are not ++ * enabled. ++ * @protected_mode_hwcnt_disable_work: Work item to disable GPU hardware ++ * counters, used if atomic disable is not possible. ++ * @irq_reset_flush: Flag to indicate that GPU reset is in-flight and flush of ++ * IRQ + bottom half is being done, to prevent the writes ++ * to MMU_IRQ_CLEAR & MMU_IRQ_MASK registers. ++ * @inited_subsys: Bitmap of inited sub systems at the time of device probe. ++ * Used during device remove or for handling error in probe. ++ * @hwaccess_lock: Lock, which can be taken from IRQ context, to serialize ++ * the updates made to Job dispatcher + scheduler states. ++ * @mmu_hw_mutex: Protects access to MMU operations and address space ++ * related state. ++ * @serialize_jobs: Currently used mode for serialization of jobs, both ++ * intra & inter slots serialization is supported. ++ * @backup_serialize_jobs: Copy of the original value of @serialize_jobs taken ++ * when GWT is enabled. Used to restore the original value ++ * on disabling of GWT. ++ * @js_ctx_scheduling_mode: Context scheduling mode currently being used by ++ * Job Scheduler ++ * @l2_size_override: Used to set L2 cache size via device tree blob ++ * @l2_hash_override: Used to set L2 cache hash via device tree blob ++ * @l2_hash_values_override: true if @l2_hash_values is valid. ++ * @l2_hash_values: Used to set L2 asn_hash via device tree blob ++ * @sysc_alloc: Array containing values to be programmed into ++ * SYSC_ALLOC[0..7] GPU registers on L2 cache ++ * power down. These come from either DTB or ++ * via DebugFS (if it is available in kernel). ++ * @process_root: rb_tree root node for maintaining a rb_tree of ++ * kbase_process based on key tgid(thread group ID). ++ * @dma_buf_root: rb_tree root node for maintaining a rb_tree of ++ * &struct kbase_dma_buf based on key dma_buf. ++ * We maintain a rb_tree of dma_buf mappings under ++ * kbase_device and kbase_process, one indicates a ++ * mapping and gpu memory usage at device level and ++ * other one at process level. ++ * @total_gpu_pages: Total GPU pages used for the complete GPU device. ++ * @dma_buf_lock: This mutex should be held while accounting for ++ * @total_gpu_pages from imported dma buffers. ++ * @gpu_mem_usage_lock: This spinlock should be held while accounting ++ * @total_gpu_pages for both native and dma-buf imported ++ * allocations. ++ * @dummy_job_wa: struct for dummy job execution workaround for the ++ * GPU hang issue ++ * @dummy_job_wa.ctx: dummy job workaround context ++ * @dummy_job_wa.jc: dummy job workaround job ++ * @dummy_job_wa.slot: dummy job workaround slot ++ * @dummy_job_wa.flags: dummy job workaround flags ++ * @dummy_job_wa_loaded: Flag for indicating that the workaround blob has ++ * been loaded. Protected by @fw_load_lock. ++ * @arb: Pointer to the arbiter device ++ * @pcm_dev: The priority control manager device. ++ * @oom_notifier_block: notifier_block containing kernel-registered out-of- ++ * memory handler. ++ * @mem_migrate: Per device object for managing page migration. ++ * @live_fence_metadata: Count of live fence metadata structures created by ++ * KCPU queue. These structures may outlive kbase module ++ * itself. Therefore, in such a case, a warning should be ++ * be produced. ++ * @mmu_as_inactive_wait_time_ms: Maximum waiting time in ms for the completion of ++ * a MMU operation ++ * @va_region_slab: kmem_cache (slab) for allocated kbase_va_region structures. + */ -+int kbase_backend_time_init(struct kbase_device *kbdev); ++struct kbase_device { ++ u32 hw_quirks_sc; ++ u32 hw_quirks_tiler; ++ u32 hw_quirks_mmu; ++ u32 hw_quirks_gpu; + -+#endif /* _KBASE_BACKEND_TIME_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd.c b/drivers/gpu/arm/bifrost/mali_kbase_jd.c -new file mode 100644 -index 000000000..f44426a73 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_jd.c -@@ -0,0 +1,1660 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ struct list_head entry; ++ struct device *dev; ++ struct miscdevice mdev; ++ u64 reg_start; ++ size_t reg_size; ++ void __iomem *reg; + -+#include -+#if IS_ENABLED(CONFIG_COMPAT) -+#include -+#endif -+#include -+#include -+#include -+#include -+#include -+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE -+#include ++ struct { ++ int irq; ++ int flags; ++ } irqs[3]; ++ ++ struct clk *clocks[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ unsigned int nr_clocks; ++#if IS_ENABLED(CONFIG_REGULATOR) ++ struct regulator *regulators[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ unsigned int nr_regulators; ++#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) ++ int token; ++#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) ++ struct opp_table *opp_table; ++#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ ++#endif /* CONFIG_REGULATOR */ ++ char devname[DEVNAME_SIZE]; ++ u32 id; ++ ++#if !IS_ENABLED(CONFIG_MALI_REAL_HW) ++ void *model; ++ struct kmem_cache *irq_slab; ++ struct workqueue_struct *irq_workq; ++ atomic_t serving_job_irq; ++ atomic_t serving_gpu_irq; ++ atomic_t serving_mmu_irq; ++ spinlock_t reg_op_lock; ++#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ ++ struct kbase_pm_device_data pm; ++ ++ struct kbase_mem_pool_group mem_pools; ++ struct kbasep_mem_device memdev; ++ struct kbase_mmu_mode const *mmu_mode; ++ ++ struct memory_group_manager_device *mgm_dev; ++ ++ struct kbase_as as[BASE_MAX_NR_AS]; ++ u16 as_free; ++ struct kbase_context *as_to_kctx[BASE_MAX_NR_AS]; ++ ++ spinlock_t mmu_mask_change; ++ ++ bool pagesize_2mb; ++ ++ struct kbase_gpu_props gpu_props; ++ ++ unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; ++ unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; ++ ++ struct { ++ atomic_t count; ++ atomic_t state; ++ } disjoint_event; ++ ++ s8 nr_hw_address_spaces; ++ s8 nr_user_address_spaces; ++ ++ /** ++ * @pbha_propagate_bits: Record of Page-Based Hardware Attribute Propagate bits to ++ * restore to L2_CONFIG upon GPU reset. ++ */ ++ u8 pbha_propagate_bits; ++ ++#if MALI_USE_CSF ++ struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw; +#else -+#include -+#endif ++ struct kbase_hwcnt { ++ spinlock_t lock; + -+#include -+#include -+#include -+#include -+#include ++ struct kbase_context *kctx; ++ u64 addr; ++ u64 addr_bytes; + -+#include ++ struct kbase_instr_backend backend; ++ } hwcnt; + -+#include ++ struct kbase_hwcnt_backend_interface hwcnt_gpu_jm_backend; ++#endif + -+/* Return whether katom will run on the GPU or not. Currently only soft jobs and -+ * dependency-only atoms do not run on the GPU -+ */ -+#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) || \ -+ ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == \ -+ BASE_JD_REQ_DEP))) ++ struct kbase_hwcnt_backend_interface hwcnt_gpu_iface; ++ struct kbase_hwcnt_watchdog_interface hwcnt_watchdog_timer; + -+/* -+ * This is the kernel side of the API. Only entry points are: -+ * - kbase_jd_submit(): Called from userspace to submit a single bag -+ * - kbase_jd_done(): Called from interrupt context to track the -+ * completion of a job. -+ * Callouts: -+ * - to the job manager (enqueue a job) -+ * - to the event subsystem (signals the completion/failure of bag/job-chains). -+ */ ++ struct kbase_hwcnt_context *hwcnt_gpu_ctx; ++ struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt; ++ struct kbase_vinstr_context *vinstr_ctx; ++ struct kbase_kinstr_prfcnt_context *kinstr_prfcnt_ctx; + -+static void __user * -+get_compat_pointer(struct kbase_context *kctx, const u64 p) -+{ -+#if IS_ENABLED(CONFIG_COMPAT) -+ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) -+ return compat_ptr(p); ++ atomic_t timeline_flags; ++ struct kbase_timeline *timeline; ++ ++#if KBASE_KTRACE_TARGET_RBUF ++ struct kbase_ktrace ktrace; +#endif -+ return u64_to_user_ptr(p); -+} ++ u32 reset_timeout_ms; + -+/* Mark an atom as complete, and trace it in kinstr_jm */ -+static void jd_mark_atom_complete(struct kbase_jd_atom *katom) -+{ -+ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; -+ kbase_kinstr_jm_atom_complete(katom); -+ dev_dbg(katom->kctx->kbdev->dev, "Atom %pK status to completed\n", -+ (void *)katom); -+ KBASE_TLSTREAM_TL_JD_ATOM_COMPLETE(katom->kctx->kbdev, katom); -+} ++ u64 lowest_gpu_freq_khz; + -+/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs -+ * -+ * Returns whether the JS needs a reschedule. -+ * -+ * Note that the caller must also check the atom status and -+ * if it is KBASE_JD_ATOM_STATE_COMPLETED must call kbase_jd_done_nolock -+ */ -+static bool jd_run_atom(struct kbase_jd_atom *katom) -+{ -+ struct kbase_context *kctx = katom->kctx; ++#if MALI_USE_CSF ++ struct kbase_backend_time backend_time; ++#endif + -+ dev_dbg(kctx->kbdev->dev, "JD run atom %pK in kctx %pK\n", -+ (void *)katom, (void *)kctx); ++ bool cache_clean_in_progress; ++ u32 cache_clean_queued; ++ wait_queue_head_t cache_clean_wait; + -+ KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); ++ void *platform_context; + -+ if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) { -+ /* Dependency only atom */ -+ trace_sysgraph(SGR_SUBMIT, kctx->id, -+ kbase_jd_atom_id(katom->kctx, katom)); -+ jd_mark_atom_complete(katom); -+ return false; -+ } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { -+ /* Soft-job */ -+ if (katom->will_fail_event_code) { -+ kbase_finish_soft_job(katom); -+ jd_mark_atom_complete(katom); -+ return false; -+ } -+ if (kbase_process_soft_job(katom) == 0) { -+ kbase_finish_soft_job(katom); -+ jd_mark_atom_complete(katom); -+ } -+ return false; -+ } ++ struct list_head kctx_list; ++ struct mutex kctx_list_lock; + -+ katom->status = KBASE_JD_ATOM_STATE_IN_JS; -+ dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", (void *)katom); -+ /* Queue an action about whether we should try scheduling a context */ -+ return kbasep_js_add_job(kctx, katom); -+} ++ struct rockchip_opp_info opp_info; ++ bool is_runtime_resumed; ++ unsigned long current_nominal_freq; ++ struct monitor_dev_info *mdev_info; ++#ifdef CONFIG_MALI_BIFROST_DEVFREQ ++ struct devfreq_dev_profile devfreq_profile; ++ struct devfreq *devfreq; ++ unsigned long current_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ unsigned long current_voltages[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ u64 current_core_mask; ++ struct kbase_devfreq_opp *devfreq_table; ++ int num_opps; ++ struct kbasep_pm_metrics last_devfreq_metrics; ++ struct ipa_power_model_data *model_data; ++ struct kbase_devfreq_queue_info devfreq_queue; + -+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom) -+{ -+ struct kbase_device *kbdev; ++#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) ++ struct devfreq_cooling_power dfc_power; ++ struct thermal_cooling_device *devfreq_cooling; ++ bool ipa_protection_mode_switched; ++ struct { ++ /* Access to this struct must be with ipa.lock held */ ++ struct mutex lock; ++ struct kbase_ipa_model *configured_model; ++ struct kbase_ipa_model *fallback_model; + -+ KBASE_DEBUG_ASSERT(katom); -+ kbdev = katom->kctx->kbdev; -+ KBASE_DEBUG_ASSERT(kbdev); ++ /* Values of the PM utilization metrics from last time the ++ * power model was invoked. The utilization is calculated as ++ * the difference between last_metrics and the current values. ++ */ ++ struct kbasep_pm_metrics last_metrics; + -+ /* Check whether the atom's other dependencies were already met. If -+ * katom is a GPU atom then the job scheduler may be able to represent -+ * the dependencies, hence we may attempt to submit it before they are -+ * met. Other atoms must have had both dependencies resolved. -+ */ -+ if (IS_GPU_ATOM(katom) || -+ (!kbase_jd_katom_dep_atom(&katom->dep[0]) && -+ !kbase_jd_katom_dep_atom(&katom->dep[1]))) { -+ /* katom dep complete, attempt to run it */ -+ bool resched = false; ++ /* true if use of fallback model has been forced by the User */ ++ bool force_fallback_model; ++ /* Records the time when counters, used for dynamic energy ++ * estimation, were last sampled. ++ */ ++ ktime_t last_sample_time; ++ } ipa; ++#endif /* CONFIG_DEVFREQ_THERMAL */ ++#endif /* CONFIG_MALI_BIFROST_DEVFREQ */ ++ unsigned long previous_frequency; + -+ KBASE_TLSTREAM_TL_RUN_ATOM_START( -+ katom->kctx->kbdev, katom, -+ kbase_jd_atom_id(katom->kctx, katom)); -+ resched = jd_run_atom(katom); -+ KBASE_TLSTREAM_TL_RUN_ATOM_END(katom->kctx->kbdev, katom, -+ kbase_jd_atom_id(katom->kctx, -+ katom)); ++#if !MALI_USE_CSF ++ atomic_t job_fault_debug; ++#endif /* !MALI_USE_CSF */ + -+ if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) { -+ /* The atom has already finished */ -+ resched |= kbase_jd_done_nolock(katom, true); -+ } ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ struct dentry *mali_debugfs_directory; ++ struct dentry *debugfs_ctx_directory; ++ struct dentry *debugfs_instr_directory; + -+ if (resched) -+ kbase_js_sched_all(kbdev); -+ } -+} ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ u64 debugfs_as_read_bitmap; ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ + -+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom) -+{ -+} ++#if !MALI_USE_CSF ++ wait_queue_head_t job_fault_wq; ++ wait_queue_head_t job_fault_resume_wq; ++ struct workqueue_struct *job_fault_resume_workq; ++ struct list_head job_fault_event_list; ++ spinlock_t job_fault_event_lock; ++#endif /* !MALI_USE_CSF */ + -+static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) -+{ -+ KBASE_DEBUG_ASSERT(katom); -+ KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES); ++#if !MALI_CUSTOMER_RELEASE ++ struct { ++ u32 reg_offset; ++ } regs_dump_debugfs_data; ++#endif /* !MALI_CUSTOMER_RELEASE */ ++#endif /* CONFIG_DEBUG_FS */ + -+ kbase_gpu_vm_lock(katom->kctx); -+ /* only roll back if extres is non-NULL */ -+ if (katom->extres) { -+ u32 res_no; ++ atomic_t ctx_num; + -+ res_no = katom->nr_extres; -+ while (res_no-- > 0) { -+ kbase_unmap_external_resource(katom->kctx, katom->extres[res_no]); -+ } -+ kfree(katom->extres); -+ katom->extres = NULL; -+ } -+ kbase_gpu_vm_unlock(katom->kctx); -+} ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ struct kbase_io_history io_history; ++#endif /* CONFIG_DEBUG_FS */ + -+/* -+ * Set up external resources needed by this job. -+ * -+ * jctx.lock must be held when this is called. -+ */ ++ struct kbase_hwaccess_data hwaccess; + -+static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom *user_atom) -+{ -+ int err = -EINVAL; -+ u32 res_no; -+ struct base_external_resource *input_extres; ++ atomic_t faults_pending; + -+ KBASE_DEBUG_ASSERT(katom); -+ KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES); ++#if MALI_USE_CSF ++ bool mmu_hw_operation_in_progress; ++#endif ++ bool mmu_page_migrate_in_progress; ++ bool poweroff_pending; + -+ /* no resources encoded, early out */ -+ if (!katom->nr_extres) -+ return -EINVAL; ++ bool infinite_cache_active_default; + -+ katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL); -+ if (!katom->extres) -+ return -ENOMEM; ++ struct kbase_mem_pool_group_config mem_pool_defaults; + -+ input_extres = kmalloc_array(katom->nr_extres, sizeof(*input_extres), GFP_KERNEL); -+ if (!input_extres) { -+ err = -ENOMEM; -+ goto failed_input_alloc; -+ } ++ u32 current_gpu_coherency_mode; ++ u32 system_coherency; + -+ if (copy_from_user(input_extres, -+ get_compat_pointer(katom->kctx, user_atom->extres_list), -+ sizeof(*input_extres) * katom->nr_extres) != 0) { -+ err = -EINVAL; -+ goto failed_input_copy; -+ } ++ bool cci_snoop_enabled; + -+ /* Take the processes mmap lock */ -+ down_read(kbase_mem_get_process_mmap_lock()); ++ u32 snoop_enable_smc; ++ u32 snoop_disable_smc; + -+ /* need to keep the GPU VM locked while we set up UMM buffers */ -+ kbase_gpu_vm_lock(katom->kctx); -+ for (res_no = 0; res_no < katom->nr_extres; res_no++) { -+ struct base_external_resource *user_res = &input_extres[res_no]; -+ struct kbase_va_region *reg; ++ const struct protected_mode_ops *protected_ops; + -+ reg = kbase_region_tracker_find_region_enclosing_address( -+ katom->kctx, user_res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); -+ /* did we find a matching region object? */ -+ if (unlikely(kbase_is_region_invalid_or_free(reg))) { -+ /* roll back */ -+ goto failed_loop; -+ } ++ struct protected_mode_device *protected_dev; + -+ if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) && -+ (reg->flags & KBASE_REG_PROTECTED)) { -+ katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED; -+ } ++ bool protected_mode; + -+ err = kbase_map_external_resource(katom->kctx, reg, current->mm); -+ if (err) -+ goto failed_loop; ++ bool protected_mode_transition; + -+ katom->extres[res_no] = reg; -+ } -+ /* successfully parsed the extres array */ -+ /* drop the vm lock now */ -+ kbase_gpu_vm_unlock(katom->kctx); ++ bool protected_mode_hwcnt_desired; + -+ /* Release the processes mmap lock */ -+ up_read(kbase_mem_get_process_mmap_lock()); ++ bool protected_mode_hwcnt_disabled; + -+ /* Free the buffer holding data from userspace */ -+ kfree(input_extres); ++ struct work_struct protected_mode_hwcnt_disable_work; + -+ /* all done OK */ -+ return 0; + -+/* error handling section */ -+failed_loop: -+ /* undo the loop work. We are guaranteed to have access to the VA region -+ * as we hold a reference to it until it's unmapped -+ */ -+ while (res_no-- > 0) { -+ struct kbase_va_region *reg = katom->extres[res_no]; ++ bool irq_reset_flush; + -+ kbase_unmap_external_resource(katom->kctx, reg); -+ } -+ kbase_gpu_vm_unlock(katom->kctx); ++ u32 inited_subsys; + -+ /* Release the processes mmap lock */ -+ up_read(kbase_mem_get_process_mmap_lock()); ++ spinlock_t hwaccess_lock; + -+failed_input_copy: -+ kfree(input_extres); -+failed_input_alloc: -+ kfree(katom->extres); -+ katom->extres = NULL; -+ return err; -+} ++ struct mutex mmu_hw_mutex; + -+static inline void jd_resolve_dep(struct list_head *out_list, -+ struct kbase_jd_atom *katom, -+ u8 d, bool ctx_is_dying) -+{ -+ u8 other_d = !d; ++ u8 l2_size_override; ++ u8 l2_hash_override; ++ bool l2_hash_values_override; ++ u32 l2_hash_values[ASN_HASH_COUNT]; + -+ while (!list_empty(&katom->dep_head[d])) { -+ struct kbase_jd_atom *dep_atom; -+ struct kbase_jd_atom *other_dep_atom; -+ u8 dep_type; ++ u32 sysc_alloc[SYSC_ALLOC_COUNT]; + -+ dep_atom = list_entry(katom->dep_head[d].next, -+ struct kbase_jd_atom, dep_item[d]); -+ list_del(katom->dep_head[d].next); ++ struct mutex fw_load_lock; ++#if MALI_USE_CSF ++ /* CSF object for the GPU device. */ ++ struct kbase_csf_device csf; ++#else ++ struct kbasep_js_device_data js_data; + -+ dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]); -+ kbase_jd_katom_dep_clear(&dep_atom->dep[d]); ++ /* See KBASE_JS_*_PRIORITY_MODE for details. */ ++ u32 js_ctx_scheduling_mode; + -+ if (katom->event_code != BASE_JD_EVENT_DONE && -+ (dep_type != BASE_JD_DEP_TYPE_ORDER)) { -+ dep_atom->event_code = katom->event_code; -+ KBASE_DEBUG_ASSERT(dep_atom->status != -+ KBASE_JD_ATOM_STATE_UNUSED); ++ /* See KBASE_SERIALIZE_* for details */ ++ u8 serialize_jobs; + -+ dep_atom->will_fail_event_code = dep_atom->event_code; -+ } -+ other_dep_atom = (struct kbase_jd_atom *) -+ kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]); ++#ifdef CONFIG_MALI_CINSTR_GWT ++ u8 backup_serialize_jobs; ++#endif /* CONFIG_MALI_CINSTR_GWT */ + -+ if (!dep_atom->in_jd_list && (!other_dep_atom || -+ (IS_GPU_ATOM(dep_atom) && !ctx_is_dying && -+ !dep_atom->will_fail_event_code && -+ !other_dep_atom->will_fail_event_code))) { -+ dep_atom->in_jd_list = true; -+ list_add_tail(&dep_atom->jd_item, out_list); -+ } -+ } -+} ++#endif /* MALI_USE_CSF */ + -+/** -+ * is_dep_valid - Validate that a dependency is valid for early dependency -+ * submission -+ * @katom: Dependency atom to validate -+ * -+ * A dependency is valid if any of the following are true : -+ * - It does not exist (a non-existent dependency does not block submission) -+ * - It is in the job scheduler -+ * - It has completed, does not have a failure event code, and has not been -+ * marked to fail in the future -+ * -+ * Return: true if valid, false otherwise -+ */ -+static bool is_dep_valid(struct kbase_jd_atom *katom) -+{ -+ /* If there's no dependency then this is 'valid' from the perspective of -+ * early dependency submission -+ */ -+ if (!katom) -+ return true; ++ struct rb_root process_root; ++ struct rb_root dma_buf_root; + -+ /* Dependency must have reached the job scheduler */ -+ if (katom->status < KBASE_JD_ATOM_STATE_IN_JS) -+ return false; ++ size_t total_gpu_pages; ++ struct mutex dma_buf_lock; ++ spinlock_t gpu_mem_usage_lock; + -+ /* If dependency has completed and has failed or will fail then it is -+ * not valid -+ */ -+ if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED && -+ (katom->event_code != BASE_JD_EVENT_DONE || -+ katom->will_fail_event_code)) -+ return false; ++ struct { ++ struct kbase_context *ctx; ++ u64 jc; ++ int slot; ++ u64 flags; ++ } dummy_job_wa; ++ bool dummy_job_wa_loaded; + -+ return true; -+} ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ struct kbase_arbiter_device arb; ++#endif ++ /* Priority Control Manager device */ ++ struct priority_control_manager_device *pcm_dev; + -+static void jd_try_submitting_deps(struct list_head *out_list, -+ struct kbase_jd_atom *node) -+{ -+ int i; ++ struct notifier_block oom_notifier_block; + -+ for (i = 0; i < 2; i++) { -+ struct list_head *pos; ++#if !MALI_USE_CSF ++ spinlock_t quick_reset_lock; ++ bool quick_reset_enabled; ++ /* ++ * 进入 quck_reset_mode åŽ (quick_reset_enabled 为 true), ++ * 对已ç»è¿›å…¥ KBASE_JD_ATOM_STATE_HW_COMPLETED 状æ€çš„ atom 的计数. ++ * ++ * è‹¥ num_of_atoms_hw_completed 达到一定值, 将退出 quck_reset_mode. ++ * è§ kbase_js_complete_atom() 对 num_of_atoms_hw_completed 的引用. ++ */ ++ u32 num_of_atoms_hw_completed; ++#endif + -+ list_for_each(pos, &node->dep_head[i]) { -+ struct kbase_jd_atom *dep_atom = list_entry(pos, -+ struct kbase_jd_atom, dep_item[i]); ++ struct kbase_mem_migrate mem_migrate; + -+ if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) { -+ /*Check if atom deps look sane*/ -+ bool dep0_valid = is_dep_valid( -+ dep_atom->dep[0].atom); -+ bool dep1_valid = is_dep_valid( -+ dep_atom->dep[1].atom); ++#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) ++ atomic_t live_fence_metadata; ++#endif ++ u32 mmu_as_inactive_wait_time_ms; ++ struct kmem_cache *va_region_slab; ++}; + -+ if (dep0_valid && dep1_valid) { -+ dep_atom->in_jd_list = true; -+ list_add(&dep_atom->jd_item, out_list); -+ } -+ } -+ } -+ } -+} ++/** ++ * enum kbase_file_state - Initialization state of a file opened by @kbase_open ++ * ++ * @KBASE_FILE_NEED_VSN: Initial state, awaiting API version. ++ * @KBASE_FILE_VSN_IN_PROGRESS: Indicates if setting an API version is in ++ * progress and other setup calls shall be ++ * rejected. ++ * @KBASE_FILE_NEED_CTX: Indicates if the API version handshake has ++ * completed, awaiting context creation flags. ++ * @KBASE_FILE_CTX_IN_PROGRESS: Indicates if the context's setup is in progress ++ * and other setup calls shall be rejected. ++ * @KBASE_FILE_COMPLETE: Indicates if the setup for context has ++ * completed, i.e. flags have been set for the ++ * context. ++ * ++ * The driver allows only limited interaction with user-space until setup ++ * is complete. ++ */ ++enum kbase_file_state { ++ KBASE_FILE_NEED_VSN, ++ KBASE_FILE_VSN_IN_PROGRESS, ++ KBASE_FILE_NEED_CTX, ++ KBASE_FILE_CTX_IN_PROGRESS, ++ KBASE_FILE_COMPLETE ++}; + ++/** ++ * struct kbase_file - Object representing a file opened by @kbase_open ++ * ++ * @kbdev: Object representing an instance of GPU platform device, ++ * allocated from the probe method of the Mali driver. ++ * @filp: Pointer to the struct file corresponding to device file ++ * /dev/malixx instance, passed to the file's open method. ++ * @kctx: Object representing an entity, among which GPU is ++ * scheduled and which gets its own GPU address space. ++ * Invalid until @setup_state is KBASE_FILE_COMPLETE. ++ * @api_version: Contains the version number for User/kernel interface, ++ * used for compatibility check. Invalid until ++ * @setup_state is KBASE_FILE_NEED_CTX. ++ * @setup_state: Initialization state of the file. Values come from ++ * the kbase_file_state enumeration. ++ */ ++struct kbase_file { ++ struct kbase_device *kbdev; ++ struct file *filp; ++ struct kbase_context *kctx; ++ unsigned long api_version; ++ atomic_t setup_state; ++}; +#if MALI_JIT_PRESSURE_LIMIT_BASE +/** -+ * jd_update_jit_usage - Update just-in-time physical memory usage for an atom. ++ * enum kbase_context_flags - Flags for kbase contexts + * -+ * @katom: An atom that has just finished. ++ * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit ++ * process on a 64-bit kernel. + * -+ * Read back actual just-in-time memory region usage from atoms that provide -+ * this information, and update the current physical page pressure. ++ * @KCTX_RUNNABLE_REF: Set when context is counted in ++ * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing. + * -+ * The caller must hold the kbase_jd_context.lock. ++ * @KCTX_ACTIVE: Set when the context is active. ++ * ++ * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this ++ * context. ++ * ++ * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been ++ * initialized. ++ * ++ * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new ++ * allocations. Existing allocations will not change. ++ * ++ * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs. ++ * ++ * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept ++ * scheduled in. ++ * ++ * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool. ++ * This is only ever updated whilst the jsctx_mutex is held. ++ * ++ * @KCTX_DYING: Set when the context process is in the process of being evicted. ++ * ++ * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory ++ * allocations. For 64-bit clients it is enabled by default, and disabled by ++ * default on 32-bit clients. Being able to clear this flag is only used for ++ * testing purposes of the custom zone allocation on 64-bit user-space builds, ++ * where we also require more control than is available through e.g. the JIT ++ * allocation mechanism. However, the 64-bit user-space client must still ++ * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT ++ * ++ * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled ++ * from it for job slot 0. This is reset when the context first goes active or ++ * is re-activated on that slot. ++ * ++ * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled ++ * from it for job slot 1. This is reset when the context first goes active or ++ * is re-activated on that slot. ++ * ++ * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled ++ * from it for job slot 2. This is reset when the context first goes active or ++ * is re-activated on that slot. ++ * ++ * @KCTX_AS_DISABLED_ON_FAULT: Set when the GPU address space is disabled for ++ * the context due to unhandled page(or bus) fault. It is cleared when the ++ * refcount for the context drops to 0 or on when the address spaces are ++ * re-enabled on GPU reset or power cycle. ++ * ++ * @KCTX_JPL_ENABLED: Set when JIT physical page limit is less than JIT virtual ++ * address page limit, so we must take care to not exceed the physical limit ++ * ++ * All members need to be separate bits. This enum is intended for use in a ++ * bitmask where multiple values get OR-ed together. + */ -+static void jd_update_jit_usage(struct kbase_jd_atom *katom) -+{ -+ struct kbase_context *kctx = katom->kctx; -+ struct kbase_va_region *reg; -+ struct kbase_vmap_struct mapping; -+ u64 *ptr; -+ u64 used_pages; -+ unsigned int idx; ++enum kbase_context_flags { ++ KCTX_COMPAT = 1U << 0, ++ KCTX_RUNNABLE_REF = 1U << 1, ++ KCTX_ACTIVE = 1U << 2, ++ KCTX_PULLED = 1U << 3, ++ KCTX_MEM_PROFILE_INITIALIZED = 1U << 4, ++ KCTX_INFINITE_CACHE = 1U << 5, ++ KCTX_SUBMIT_DISABLED = 1U << 6, ++ KCTX_PRIVILEGED = 1U << 7, ++ KCTX_SCHEDULED = 1U << 8, ++ KCTX_DYING = 1U << 9, ++ KCTX_FORCE_SAME_VA = 1U << 11, ++ KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, ++ KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, ++ KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, ++ KCTX_AS_DISABLED_ON_FAULT = 1U << 15, ++ KCTX_JPL_ENABLED = 1U << 16, ++}; ++#else ++/** ++ * enum kbase_context_flags - Flags for kbase contexts ++ * ++ * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit ++ * process on a 64-bit kernel. ++ * ++ * @KCTX_RUNNABLE_REF: Set when context is counted in ++ * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing. ++ * ++ * @KCTX_ACTIVE: Set when the context is active. ++ * ++ * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this ++ * context. ++ * ++ * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been ++ * initialized. ++ * ++ * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new ++ * allocations. Existing allocations will not change. ++ * ++ * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs. ++ * ++ * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept ++ * scheduled in. ++ * ++ * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool. ++ * This is only ever updated whilst the jsctx_mutex is held. ++ * ++ * @KCTX_DYING: Set when the context process is in the process of being evicted. ++ * ++ * ++ * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory ++ * allocations. For 64-bit clients it is enabled by default, and disabled by ++ * default on 32-bit clients. Being able to clear this flag is only used for ++ * testing purposes of the custom zone allocation on 64-bit user-space builds, ++ * where we also require more control than is available through e.g. the JIT ++ * allocation mechanism. However, the 64-bit user-space client must still ++ * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT ++ * ++ * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled ++ * from it for job slot 0. This is reset when the context first goes active or ++ * is re-activated on that slot. ++ * ++ * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled ++ * from it for job slot 1. This is reset when the context first goes active or ++ * is re-activated on that slot. ++ * ++ * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled ++ * from it for job slot 2. This is reset when the context first goes active or ++ * is re-activated on that slot. ++ * ++ * @KCTX_AS_DISABLED_ON_FAULT: Set when the GPU address space is disabled for ++ * the context due to unhandled page(or bus) fault. It is cleared when the ++ * refcount for the context drops to 0 or on when the address spaces are ++ * re-enabled on GPU reset or power cycle. ++ * ++ * All members need to be separate bits. This enum is intended for use in a ++ * bitmask where multiple values get OR-ed together. ++ */ ++enum kbase_context_flags { ++ KCTX_COMPAT = 1U << 0, ++ KCTX_RUNNABLE_REF = 1U << 1, ++ KCTX_ACTIVE = 1U << 2, ++ KCTX_PULLED = 1U << 3, ++ KCTX_MEM_PROFILE_INITIALIZED = 1U << 4, ++ KCTX_INFINITE_CACHE = 1U << 5, ++ KCTX_SUBMIT_DISABLED = 1U << 6, ++ KCTX_PRIVILEGED = 1U << 7, ++ KCTX_SCHEDULED = 1U << 8, ++ KCTX_DYING = 1U << 9, ++ KCTX_FORCE_SAME_VA = 1U << 11, ++ KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, ++ KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, ++ KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, ++ KCTX_AS_DISABLED_ON_FAULT = 1U << 15, ++}; ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+ lockdep_assert_held(&kctx->jctx.lock); ++struct kbase_sub_alloc { ++ struct list_head link; ++ struct page *page; ++ DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K); ++}; + -+ /* If this atom wrote to JIT memory, find out how much it has written -+ * and update the usage information in the region. -+ */ -+ for (idx = 0; -+ idx < ARRAY_SIZE(katom->jit_ids) && katom->jit_ids[idx]; -+ idx++) { -+ enum heap_pointer { LOW = 0, HIGH, COUNT }; -+ size_t size_to_read; -+ u64 read_val; ++/** ++ * struct kbase_context - Kernel base context ++ * ++ * @filp: Pointer to the struct file corresponding to device file ++ * /dev/malixx instance, passed to the file's open method. ++ * @kbdev: Pointer to the Kbase device for which the context is created. ++ * @kctx_list_link: Node into Kbase device list of contexts. ++ * @mmu: Structure holding details of the MMU tables for this ++ * context ++ * @id: Unique identifier for the context, indicates the number of ++ * contexts which have been created for the device so far. ++ * @api_version: contains the version number for User/kernel interface, ++ * used for compatibility check. ++ * @event_list: list of posted events about completed atoms, to be sent to ++ * event handling thread of Userpsace. ++ * @event_coalesce_list: list containing events corresponding to successive atoms ++ * which have requested deferred delivery of the completion ++ * events to Userspace. ++ * @event_mutex: Lock to protect the concurrent access to @event_list & ++ * @event_mutex. ++ * @event_closed: Flag set through POST_TERM ioctl, indicates that Driver ++ * should stop posting events and also inform event handling ++ * thread that context termination is in progress. ++ * @event_workq: Workqueue for processing work items corresponding to atoms ++ * that do not return an event to userspace. ++ * @event_count: Count of the posted events to be consumed by Userspace. ++ * @event_coalesce_count: Count of the events present in @event_coalesce_list. ++ * @flags: bitmap of enums from kbase_context_flags, indicating the ++ * state & attributes for the context. ++ * @aliasing_sink_page: Special page used for KBASE_MEM_TYPE_ALIAS allocations, ++ * which can alias number of memory regions. The page is ++ * represent a region where it is mapped with a write-alloc ++ * cache setup, typically used when the write result of the ++ * GPU isn't needed, but the GPU must write anyway. ++ * @mem_partials_lock: Lock for protecting the operations done on the elements ++ * added to @mem_partials list. ++ * @mem_partials: List head for the list of large pages, 2MB in size, which ++ * have been split into 4 KB pages and are used partially ++ * for the allocations >= 2 MB in size. ++ * @reg_lock: Lock used for GPU virtual address space management operations, ++ * like adding/freeing a memory region in the address space. ++ * Can be converted to a rwlock ?. ++ * @reg_rbtree_same: RB tree of the memory regions allocated from the SAME_VA ++ * zone of the GPU virtual address space. Used for allocations ++ * having the same value for GPU & CPU virtual address. ++ * @reg_rbtree_custom: RB tree of the memory regions allocated from the CUSTOM_VA ++ * zone of the GPU virtual address space. ++ * @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC_VA ++ * zone of the GPU virtual address space. Used for GPU-executable ++ * allocations which don't need the SAME_VA property. ++ * @reg_rbtree_exec_fixed: RB tree of the memory regions allocated from the ++ * EXEC_FIXED_VA zone of the GPU virtual address space. Used for ++ * GPU-executable allocations with FIXED/FIXABLE GPU virtual ++ * addresses. ++ * @reg_rbtree_fixed: RB tree of the memory regions allocated from the FIXED_VA zone ++ * of the GPU virtual address space. Used for allocations with ++ * FIXED/FIXABLE GPU virtual addresses. ++ * @num_fixable_allocs: A count for the number of memory allocations with the ++ * BASE_MEM_FIXABLE property. ++ * @num_fixed_allocs: A count for the number of memory allocations with the ++ * BASE_MEM_FIXED property. ++ * @reg_zone: Zone information for the reg_rbtree_<...> members. ++ * @cookies: Bitmask containing of BITS_PER_LONG bits, used mainly for ++ * SAME_VA allocations to defer the reservation of memory region ++ * (from the GPU virtual address space) from base_mem_alloc ++ * ioctl to mmap system call. This helps returning unique ++ * handles, disguised as GPU VA, to Userspace from base_mem_alloc ++ * and later retrieving the pointer to memory region structure ++ * in the mmap handler. ++ * @pending_regions: Array containing pointers to memory region structures, ++ * used in conjunction with @cookies bitmask mainly for ++ * providing a mechansim to have the same value for CPU & ++ * GPU virtual address. ++ * @event_queue: Wait queue used for blocking the thread, which consumes ++ * the base_jd_event corresponding to an atom, when there ++ * are no more posted events. ++ * @tgid: Thread group ID of the process whose thread created ++ * the context (by calling KBASE_IOCTL_VERSION_CHECK or ++ * KBASE_IOCTL_SET_FLAGS, depending on the @api_version). ++ * This is usually, but not necessarily, the same as the ++ * process whose thread opened the device file ++ * /dev/malixx instance. ++ * @pid: ID of the thread, corresponding to process @tgid, ++ * which actually created the context. This is usually, ++ * but not necessarily, the same as the thread which ++ * opened the device file /dev/malixx instance. ++ * @csf: kbase csf context ++ * @jctx: object encapsulating all the Job dispatcher related state, ++ * including the array of atoms. ++ * @used_pages: Keeps a track of the number of 4KB physical pages in use ++ * for the context. ++ * @nonmapped_pages: Updated in the same way as @used_pages, except for the case ++ * when special tracking page is freed by userspace where it ++ * is reset to 0. ++ * @permanent_mapped_pages: Usage count of permanently mapped memory ++ * @mem_pools: Context-specific pools of free physical memory pages. ++ * @reclaim: Shrinker object registered with the kernel containing ++ * the pointer to callback function which is invoked under ++ * low memory conditions. In the callback function Driver ++ * frees up the memory for allocations marked as ++ * evictable/reclaimable. ++ * @evict_list: List head for the list containing the allocations which ++ * can be evicted or freed up in the shrinker callback. ++ * @evict_nents: Total number of pages allocated by the allocations within ++ * @evict_list (atomic). ++ * @waiting_soft_jobs: List head for the list containing softjob atoms, which ++ * are either waiting for the event set operation, or waiting ++ * for the signaling of input fence or waiting for the GPU ++ * device to powered on so as to dump the CPU/GPU timestamps. ++ * @waiting_soft_jobs_lock: Lock to protect @waiting_soft_jobs list from concurrent ++ * accesses. ++ * @dma_fence: Object containing list head for the list of dma-buf fence ++ * waiting atoms and the waitqueue to process the work item ++ * queued for the atoms blocked on the signaling of dma-buf ++ * fences. ++ * @dma_fence.waiting_resource: list head for the list of dma-buf fence ++ * @dma_fence.wq: waitqueue to process the work item queued ++ * @as_nr: id of the address space being used for the scheduled in ++ * context. This is effectively part of the Run Pool, because ++ * it only has a valid setting (!=KBASEP_AS_NR_INVALID) whilst ++ * the context is scheduled in. The hwaccess_lock must be held ++ * whilst accessing this. ++ * If the context relating to this value of as_nr is required, ++ * then the context must be retained to ensure that it doesn't ++ * disappear whilst it is being used. Alternatively, hwaccess_lock ++ * can be held to ensure the context doesn't disappear (but this ++ * has restrictions on what other locks can be taken simutaneously). ++ * @refcount: Keeps track of the number of users of this context. A user ++ * can be a job that is available for execution, instrumentation ++ * needing to 'pin' a context for counter collection, etc. ++ * If the refcount reaches 0 then this context is considered ++ * inactive and the previously programmed AS might be cleared ++ * at any point. ++ * Generally the reference count is incremented when the context ++ * is scheduled in and an atom is pulled from the context's per ++ * slot runnable tree in JM GPU or GPU command queue ++ * group is programmed on CSG slot in CSF GPU. ++ * @process_mm: Pointer to the memory descriptor of the process which ++ * created the context. Used for accounting the physical ++ * pages used for GPU allocations, done for the context, ++ * to the memory consumed by the process. A reference is taken ++ * on this descriptor for the Userspace created contexts so that ++ * Kbase can safely access it to update the memory usage counters. ++ * The reference is dropped on context termination. ++ * @gpu_va_end: End address of the GPU va space (in 4KB page units) ++ * @running_total_tiler_heap_nr_chunks: Running total of number of chunks in all ++ * tiler heaps of the kbase context. ++ * @running_total_tiler_heap_memory: Running total of the tiler heap memory in the ++ * kbase context. ++ * @peak_total_tiler_heap_memory: Peak value of the total tiler heap memory in the ++ * kbase context. ++ * @jit_va: Indicates if a JIT_VA zone has been created. ++ * @mem_profile_data: Buffer containing the profiling information provided by ++ * Userspace, can be read through the mem_profile debugfs file. ++ * @mem_profile_size: Size of the @mem_profile_data. ++ * @mem_profile_lock: Lock to serialize the operations related to mem_profile ++ * debugfs file. ++ * @kctx_dentry: Pointer to the debugfs directory created for every context, ++ * inside kbase_device::debugfs_ctx_directory, containing ++ * context specific files. ++ * @reg_dump: Buffer containing a register offset & value pair, used ++ * for dumping job fault debug info. ++ * @job_fault_count: Indicates that a job fault occurred for the context and ++ * dumping of its debug info is in progress. ++ * @job_fault_resume_event_list: List containing atoms completed after the faulty ++ * atom but before the debug data for faulty atom was dumped. ++ * @mem_view_column_width: Controls the number of bytes shown in every column of the ++ * output of "mem_view" debugfs file. ++ * @jsctx_queue: Per slot & priority arrays of object containing the root ++ * of RB-tree holding currently runnable atoms on the job slot ++ * and the head item of the linked list of atoms blocked on ++ * cross-slot dependencies. ++ * @slot_tracking: Tracking and control of this context's use of all job ++ * slots ++ * @atoms_pulled_all_slots: Total number of atoms currently pulled from the ++ * context, across all slots. ++ * @slots_pullable: Bitmask of slots, indicating the slots for which the ++ * context has pullable atoms in the runnable tree. ++ * @work: Work structure used for deferred ASID assignment. ++ * @completed_jobs: List containing completed atoms for which base_jd_event is ++ * to be posted. ++ * @work_count: Number of work items, corresponding to atoms, currently ++ * pending on job_done workqueue of @jctx. ++ * @soft_job_timeout: Timer object used for failing/cancelling the waiting ++ * soft-jobs which have been blocked for more than the ++ * timeout value used for the soft-jobs ++ * @jit_alloc: Array of 256 pointers to GPU memory regions, used for ++ * just-in-time memory allocations. ++ * @jit_max_allocations: Maximum allowed number of in-flight ++ * just-in-time memory allocations. ++ * @jit_current_allocations: Current number of in-flight just-in-time ++ * memory allocations. ++ * @jit_current_allocations_per_bin: Current number of in-flight just-in-time ++ * memory allocations per bin. ++ * @jit_group_id: A memory group ID to be passed to a platform-specific ++ * memory group manager. ++ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * @jit_phys_pages_limit: Limit of physical pages to apply across all ++ * just-in-time memory allocations, applied to ++ * @jit_current_phys_pressure. ++ * @jit_current_phys_pressure: Current 'pressure' on physical pages, which is ++ * the sum of the worst case estimate of pages that ++ * could be used (i.e. the ++ * &struct_kbase_va_region.nr_pages for all in-use ++ * just-in-time memory regions that have not yet had ++ * a usage report) and the actual number of pages ++ * that were used (i.e. the ++ * &struct_kbase_va_region.used_pages for regions ++ * that have had a usage report). ++ * @jit_phys_pages_to_be_allocated: Count of the physical pages that are being ++ * now allocated for just-in-time memory ++ * allocations of a context (across all the ++ * threads). This is supposed to be updated ++ * with @reg_lock held before allocating ++ * the backing pages. This helps ensure that ++ * total physical memory usage for just in ++ * time memory allocation remains within the ++ * @jit_phys_pages_limit in multi-threaded ++ * scenarios. ++ * @jit_active_head: List containing the just-in-time memory allocations ++ * which are in use. ++ * @jit_pool_head: List containing the just-in-time memory allocations ++ * which have been freed up by userspace and so not being ++ * used by them. ++ * Driver caches them to quickly fulfill requests for new ++ * JIT allocations. They are released in case of memory ++ * pressure as they are put on the @evict_list when they ++ * are freed up by userspace. ++ * @jit_destroy_head: List containing the just-in-time memory allocations ++ * which were moved to it from @jit_pool_head, in the ++ * shrinker callback, after freeing their backing ++ * physical pages. ++ * @jit_evict_lock: Lock used for operations done on just-in-time memory ++ * allocations and also for accessing @evict_list. ++ * @jit_work: Work item queued to defer the freeing of a memory ++ * region when a just-in-time memory allocation is moved ++ * to @jit_destroy_head. ++ * @ext_res_meta_head: A list of sticky external resources which were requested to ++ * be mapped on GPU side, through a softjob atom of type ++ * EXT_RES_MAP or STICKY_RESOURCE_MAP ioctl. ++ * @age_count: Counter incremented on every call to jd_submit_atom, ++ * atom is assigned the snapshot of this counter, which ++ * is used to determine the atom's age when it is added to ++ * the runnable RB-tree. ++ * @trim_level: Level of JIT allocation trimming to perform on free (0-100%) ++ * @kprcs: Reference to @struct kbase_process that the current ++ * kbase_context belongs to. ++ * @kprcs_link: List link for the list of kbase context maintained ++ * under kbase_process. ++ * @gwt_enabled: Indicates if tracking of GPU writes is enabled, protected by ++ * kbase_context.reg_lock. ++ * @gwt_was_enabled: Simple sticky bit flag to know if GWT was ever enabled. ++ * @gwt_current_list: A list of addresses for which GPU has generated write faults, ++ * after the last snapshot of it was sent to userspace. ++ * @gwt_snapshot_list: Snapshot of the @gwt_current_list for sending to user space. ++ * @priority: Indicates the context priority. Used along with @atoms_count ++ * for context scheduling, protected by hwaccess_lock. ++ * @atoms_count: Number of GPU atoms currently in use, per priority ++ * @create_flags: Flags used in context creation. ++ * @kinstr_jm: Kernel job manager instrumentation context handle ++ * @tl_kctx_list_node: List item into the device timeline's list of ++ * contexts, for timeline summarization. ++ * @limited_core_mask: The mask that is applied to the affinity in case of atoms ++ * marked with BASE_JD_REQ_LIMITED_CORE_MASK. ++ * @platform_data: Pointer to platform specific per-context data. ++ * @task: Pointer to the task structure of the main thread of the process ++ * that created the Kbase context. It would be set only for the ++ * contexts created by the Userspace and not for the contexts ++ * created internally by the Kbase. ++ * ++ * A kernel base context is an entity among which the GPU is scheduled. ++ * Each context has its own GPU address space. ++ * Up to one context can be created for each client that opens the device file ++ * /dev/malixx. Context creation is deferred until a special ioctl() system call ++ * is made on the device file. ++ */ ++struct kbase_context { ++ struct file *filp; ++ struct kbase_device *kbdev; ++ struct list_head kctx_list_link; ++ struct kbase_mmu_table mmu; + -+ reg = kctx->jit_alloc[katom->jit_ids[idx]]; ++ u32 id; ++ unsigned long api_version; ++ struct list_head event_list; ++ struct list_head event_coalesce_list; ++ struct mutex event_mutex; ++#if !MALI_USE_CSF ++ atomic_t event_closed; ++#endif ++ struct workqueue_struct *event_workq; ++ atomic_t event_count; ++ int event_coalesce_count; + -+ if (!reg) { -+ dev_warn(kctx->kbdev->dev, -+ "%s: JIT id[%u]=%u has no region\n", -+ __func__, idx, katom->jit_ids[idx]); -+ continue; -+ } ++ atomic_t flags; + -+ if (reg == KBASE_RESERVED_REG_JIT_ALLOC) { -+ dev_warn(kctx->kbdev->dev, -+ "%s: JIT id[%u]=%u has failed to allocate a region\n", -+ __func__, idx, katom->jit_ids[idx]); -+ continue; -+ } ++ struct tagged_addr aliasing_sink_page; + -+ if (!reg->heap_info_gpu_addr) -+ continue; ++ spinlock_t mem_partials_lock; ++ struct list_head mem_partials; + -+ size_to_read = sizeof(*ptr); -+ if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) -+ size_to_read = sizeof(u32); -+ else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) -+ size_to_read = sizeof(u64[COUNT]); ++ struct mutex reg_lock; + -+ ptr = kbase_vmap_prot(kctx, reg->heap_info_gpu_addr, size_to_read, -+ KBASE_REG_CPU_RD, &mapping); ++ struct rb_root reg_rbtree_same; ++ struct rb_root reg_rbtree_custom; ++ struct rb_root reg_rbtree_exec; ++#if MALI_USE_CSF ++ struct rb_root reg_rbtree_exec_fixed; ++ struct rb_root reg_rbtree_fixed; ++ atomic64_t num_fixable_allocs; ++ atomic64_t num_fixed_allocs; ++#endif ++ struct kbase_reg_zone reg_zone[KBASE_REG_ZONE_MAX]; + -+ if (!ptr) { -+ dev_warn(kctx->kbdev->dev, -+ "%s: JIT id[%u]=%u start=0x%llx unable to map end marker %llx\n", -+ __func__, idx, katom->jit_ids[idx], -+ reg->start_pfn << PAGE_SHIFT, -+ reg->heap_info_gpu_addr); -+ continue; -+ } ++#if MALI_USE_CSF ++ struct kbase_csf_context csf; ++#else ++ struct kbase_jd_context jctx; ++ struct jsctx_queue jsctx_queue ++ [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; ++ struct kbase_jsctx_slot_tracking slot_tracking[BASE_JM_MAX_NR_SLOTS]; ++ atomic_t atoms_pulled_all_slots; + -+ if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) { -+ read_val = READ_ONCE(*(u32 *)ptr); -+ used_pages = PFN_UP(read_val); -+ } else { -+ u64 addr_end; ++ struct list_head completed_jobs; ++ atomic_t work_count; ++ struct timer_list soft_job_timeout; + -+ if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { -+ const unsigned long extension_bytes = -+ reg->extension << PAGE_SHIFT; -+ const u64 low_ptr = ptr[LOW]; -+ const u64 high_ptr = ptr[HIGH]; ++ int priority; ++ s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; ++ u32 slots_pullable; ++ u32 age_count; ++#endif /* MALI_USE_CSF */ + -+ /* As either the low or high pointer could -+ * consume their partition and move onto the -+ * next chunk, we need to account for both. -+ * In the case where nothing has been allocated -+ * from the high pointer the whole chunk could -+ * be backed unnecessarily - but the granularity -+ * is the chunk size anyway and any non-zero -+ * offset of low pointer from the start of the -+ * chunk would result in the whole chunk being -+ * backed. -+ */ -+ read_val = max(high_ptr, low_ptr); ++ DECLARE_BITMAP(cookies, BITS_PER_LONG); ++ struct kbase_va_region *pending_regions[BITS_PER_LONG]; + -+ /* kbase_check_alloc_sizes() already satisfies -+ * this, but here to avoid future maintenance -+ * hazards -+ */ -+ WARN_ON(!is_power_of_2(extension_bytes)); -+ addr_end = ALIGN(read_val, extension_bytes); -+ } else { -+ addr_end = read_val = READ_ONCE(*ptr); -+ } ++ wait_queue_head_t event_queue; ++ pid_t tgid; ++ pid_t pid; ++ atomic_t used_pages; ++ atomic_t nonmapped_pages; ++ atomic_t permanent_mapped_pages; + -+ if (addr_end >= (reg->start_pfn << PAGE_SHIFT)) -+ used_pages = PFN_UP(addr_end) - reg->start_pfn; -+ else -+ used_pages = reg->used_pages; -+ } ++ struct kbase_mem_pool_group mem_pools; + -+ trace_mali_jit_report(katom, reg, idx, read_val, used_pages); -+ kbase_trace_jit_report_gpu_mem(kctx, reg, 0u); ++ struct shrinker reclaim; ++ struct list_head evict_list; ++ atomic_t evict_nents; + -+ /* We can never have used more pages than the VA size of the -+ * region -+ */ -+ if (used_pages > reg->nr_pages) { -+ dev_warn(kctx->kbdev->dev, -+ "%s: JIT id[%u]=%u start=0x%llx used_pages %llx > %zx (read 0x%llx as %s%s)\n", -+ __func__, idx, katom->jit_ids[idx], -+ reg->start_pfn << PAGE_SHIFT, -+ used_pages, reg->nr_pages, read_val, -+ (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) ? -+ "size" : "addr", -+ (reg->flags & KBASE_REG_TILER_ALIGN_TOP) ? -+ " with align" : ""); -+ used_pages = reg->nr_pages; -+ } -+ /* Note: one real use case has an atom correctly reporting 0 -+ * pages in use. This happens in normal use-cases but may only -+ * happen for a few of the application's frames. -+ */ ++ struct list_head waiting_soft_jobs; ++ spinlock_t waiting_soft_jobs_lock; + -+ kbase_vunmap(kctx, &mapping); ++ int as_nr; + -+ kbase_jit_report_update_pressure(kctx, reg, used_pages, 0u); -+ } ++ atomic_t refcount; + -+ kbase_jit_retry_pending_alloc(kctx); -+} -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ struct mm_struct *process_mm; ++ u64 gpu_va_end; ++#if MALI_USE_CSF ++ u32 running_total_tiler_heap_nr_chunks; ++ u64 running_total_tiler_heap_memory; ++ u64 peak_total_tiler_heap_memory; ++#endif ++ bool jit_va; + -+bool kbase_jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately) -+{ -+ struct kbase_context *kctx = katom->kctx; -+ struct list_head completed_jobs; -+ struct list_head runnable_jobs; -+ bool need_to_try_schedule_context = false; -+ int i; ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ char *mem_profile_data; ++ size_t mem_profile_size; ++ struct mutex mem_profile_lock; ++ struct dentry *kctx_dentry; + -+ lockdep_assert_held(&kctx->jctx.lock); ++ unsigned int *reg_dump; ++ atomic_t job_fault_count; ++ struct list_head job_fault_resume_event_list; ++ unsigned int mem_view_column_width; + -+ KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START(kctx->kbdev, katom); ++#endif /* CONFIG_DEBUG_FS */ ++ struct kbase_va_region *jit_alloc[1 + BASE_JIT_ALLOC_COUNT]; ++ u8 jit_max_allocations; ++ u8 jit_current_allocations; ++ u8 jit_current_allocations_per_bin[256]; ++ u8 jit_group_id; ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ u64 jit_phys_pages_limit; ++ u64 jit_current_phys_pressure; ++ u64 jit_phys_pages_to_be_allocated; ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ struct list_head jit_active_head; ++ struct list_head jit_pool_head; ++ struct list_head jit_destroy_head; ++ struct mutex jit_evict_lock; ++ struct work_struct jit_work; + -+ INIT_LIST_HEAD(&completed_jobs); -+ INIT_LIST_HEAD(&runnable_jobs); ++ struct list_head ext_res_meta_head; + -+ KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); ++ u8 trim_level; + -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ if (kbase_ctx_flag(kctx, KCTX_JPL_ENABLED)) -+ jd_update_jit_usage(katom); -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ struct kbase_process *kprcs; ++ struct list_head kprcs_link; + -+ /* This is needed in case an atom is failed due to being invalid, this -+ * can happen *before* the jobs that the atom depends on have completed -+ */ -+ for (i = 0; i < 2; i++) { -+ if (kbase_jd_katom_dep_atom(&katom->dep[i])) { -+ list_del(&katom->dep_item[i]); -+ kbase_jd_katom_dep_clear(&katom->dep[i]); -+ } -+ } ++#ifdef CONFIG_MALI_CINSTR_GWT ++ bool gwt_enabled; ++ bool gwt_was_enabled; ++ struct list_head gwt_current_list; ++ struct list_head gwt_snapshot_list; ++#endif + -+ jd_mark_atom_complete(katom); ++ base_context_create_flags create_flags; + -+ list_add_tail(&katom->jd_item, &completed_jobs); ++#if !MALI_USE_CSF ++ struct kbase_kinstr_jm *kinstr_jm; ++#endif ++ struct list_head tl_kctx_list_node; + -+ while (!list_empty(&completed_jobs)) { -+ katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item); -+ list_del(completed_jobs.prev); -+ KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); ++ u64 limited_core_mask; + -+ for (i = 0; i < 2; i++) -+ jd_resolve_dep(&runnable_jobs, katom, i, -+ kbase_ctx_flag(kctx, KCTX_DYING)); ++#if !MALI_USE_CSF ++ void *platform_data; ++#endif + -+ if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) -+ kbase_jd_post_external_resources(katom); ++ struct task_struct *task; ++}; + -+ while (!list_empty(&runnable_jobs)) { -+ struct kbase_jd_atom *node; ++#ifdef CONFIG_MALI_CINSTR_GWT ++/** ++ * struct kbasep_gwt_list_element - Structure used to collect GPU ++ * write faults. ++ * @link: List head for adding write faults. ++ * @region: Details of the region where we have the ++ * faulting page address. ++ * @page_addr: Page address where GPU write fault occurred. ++ * @num_pages: The number of pages modified. ++ * ++ * Using this structure all GPU write faults are stored in a list. ++ */ ++struct kbasep_gwt_list_element { ++ struct list_head link; ++ struct kbase_va_region *region; ++ u64 page_addr; ++ u64 num_pages; ++}; + -+ node = list_entry(runnable_jobs.next, -+ struct kbase_jd_atom, jd_item); -+ list_del(runnable_jobs.next); -+ node->in_jd_list = false; ++#endif + -+ dev_dbg(kctx->kbdev->dev, "List node %pK has status %d\n", -+ node, node->status); ++/** ++ * struct kbase_ctx_ext_res_meta - Structure which binds an external resource ++ * to a @kbase_context. ++ * @ext_res_node: List head for adding the metadata to a ++ * @kbase_context. ++ * @reg: External resource information, containing ++ * the corresponding VA region ++ * @ref: Reference count. ++ * ++ * External resources can be mapped into multiple contexts as well as the same ++ * context multiple times. ++ * As kbase_va_region is refcounted, we guarantee that it will be available ++ * for the duration of the external resource, meaning it is sufficient to use ++ * it to rederive any additional data, like the GPU address. ++ * This metadata structure binds a single external resource to a single ++ * context, ensuring that per context mapping is tracked separately so it can ++ * be overridden when needed and abuses by the application (freeing the resource ++ * multiple times) don't effect the refcount of the physical allocation. ++ */ ++struct kbase_ctx_ext_res_meta { ++ struct list_head ext_res_node; ++ struct kbase_va_region *reg; ++ u32 ref; ++}; + -+ KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); -+ if (node->status == KBASE_JD_ATOM_STATE_IN_JS) -+ continue; ++enum kbase_reg_access_type { ++ REG_READ, ++ REG_WRITE ++}; + -+ if (node->status != KBASE_JD_ATOM_STATE_COMPLETED && -+ !kbase_ctx_flag(kctx, KCTX_DYING)) { -+ KBASE_TLSTREAM_TL_RUN_ATOM_START( -+ kctx->kbdev, node, -+ kbase_jd_atom_id(kctx, node)); -+ need_to_try_schedule_context |= jd_run_atom(node); -+ KBASE_TLSTREAM_TL_RUN_ATOM_END( -+ kctx->kbdev, node, -+ kbase_jd_atom_id(kctx, node)); -+ } else { -+ node->event_code = katom->event_code; ++enum kbase_share_attr_bits { ++ /* (1ULL << 8) bit is reserved */ ++ SHARE_BOTH_BITS = (2ULL << 8), /* inner and outer shareable coherency */ ++ SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */ ++}; + -+ if (node->core_req & -+ BASE_JD_REQ_SOFT_JOB) { -+ WARN_ON(!list_empty(&node->queue)); -+ kbase_finish_soft_job(node); -+ } -+ node->status = KBASE_JD_ATOM_STATE_COMPLETED; -+ } ++/** ++ * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent. ++ * @kbdev: kbase device ++ * ++ * Return: true if the device access are coherent, false if not. ++ */ ++static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) ++{ ++ if ((kbdev->system_coherency == COHERENCY_ACE_LITE) || ++ (kbdev->system_coherency == COHERENCY_ACE)) ++ return true; + -+ if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) { -+ list_add_tail(&node->jd_item, &completed_jobs); -+ } else if (node->status == KBASE_JD_ATOM_STATE_IN_JS && -+ !node->will_fail_event_code) { -+ /* Node successfully submitted, try submitting -+ * dependencies as they may now be representable -+ * in JS -+ */ -+ jd_try_submitting_deps(&runnable_jobs, node); -+ } -+ } ++ return false; ++} + -+ /* Register a completed job as a disjoint event when the GPU -+ * is in a disjoint state (ie. being reset). -+ */ -+ kbase_disjoint_event_potential(kctx->kbdev); -+ if (post_immediately && list_empty(&kctx->completed_jobs)) -+ kbase_event_post(kctx, katom); -+ else -+ list_add_tail(&katom->jd_item, &kctx->completed_jobs); ++/** ++ * kbase_get_lock_region_min_size_log2 - Returns the minimum size of the MMU lock ++ * region, as a logarithm ++ * ++ * @gpu_props: GPU properties ++ * ++ * Return: the minimum size of the MMU lock region as dictated by the corresponding ++ * arch spec. ++ */ ++static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props const *gpu_props) ++{ ++ if (GPU_ID2_MODEL_MATCH_VALUE(gpu_props->props.core_props.product_id) >= ++ GPU_ID2_MODEL_MAKE(12, 0)) ++ return 12; /* 4 kB */ + -+ /* Decrement and check the TOTAL number of jobs. This includes -+ * those not tracked by the scheduler: 'not ready to run' and -+ * 'dependency-only' jobs. -+ */ -+ if (--kctx->jctx.job_nr == 0) -+ /* All events are safely queued now, and we can signal -+ * any waiter that we've got no more jobs (so we can be -+ * safely terminated) -+ */ -+ wake_up(&kctx->jctx.zero_jobs_wait); -+ } -+ KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_END(kctx->kbdev, katom); -+ return need_to_try_schedule_context; ++ return 15; /* 32 kB */ +} + -+KBASE_EXPORT_TEST_API(kbase_jd_done_nolock); ++/* Conversion helpers for setting up high resolution timers */ ++#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U)) ++#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) + -+#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) -+enum { -+ CORE_REQ_DEP_ONLY, -+ CORE_REQ_SOFT, -+ CORE_REQ_COMPUTE, -+ CORE_REQ_FRAGMENT, -+ CORE_REQ_VERTEX, -+ CORE_REQ_TILER, -+ CORE_REQ_FRAGMENT_VERTEX, -+ CORE_REQ_FRAGMENT_VERTEX_TILER, -+ CORE_REQ_FRAGMENT_TILER, -+ CORE_REQ_VERTEX_TILER, -+ CORE_REQ_UNKNOWN -+}; -+static const char * const core_req_strings[] = { -+ "Dependency Only Job", -+ "Soft Job", -+ "Compute Shader Job", -+ "Fragment Shader Job", -+ "Vertex/Geometry Shader Job", -+ "Tiler Job", -+ "Fragment Shader + Vertex/Geometry Shader Job", -+ "Fragment Shader + Vertex/Geometry Shader Job + Tiler Job", -+ "Fragment Shader + Tiler Job", -+ "Vertex/Geometry Shader Job + Tiler Job", -+ "Unknown Job" -+}; -+static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req) -+{ -+ if (core_req & BASE_JD_REQ_SOFT_JOB) -+ return core_req_strings[CORE_REQ_SOFT]; -+ if (core_req & BASE_JD_REQ_ONLY_COMPUTE) -+ return core_req_strings[CORE_REQ_COMPUTE]; -+ switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) { -+ case BASE_JD_REQ_DEP: -+ return core_req_strings[CORE_REQ_DEP_ONLY]; -+ case BASE_JD_REQ_FS: -+ return core_req_strings[CORE_REQ_FRAGMENT]; -+ case BASE_JD_REQ_CS: -+ return core_req_strings[CORE_REQ_VERTEX]; -+ case BASE_JD_REQ_T: -+ return core_req_strings[CORE_REQ_TILER]; -+ case (BASE_JD_REQ_FS | BASE_JD_REQ_CS): -+ return core_req_strings[CORE_REQ_FRAGMENT_VERTEX]; -+ case (BASE_JD_REQ_FS | BASE_JD_REQ_T): -+ return core_req_strings[CORE_REQ_FRAGMENT_TILER]; -+ case (BASE_JD_REQ_CS | BASE_JD_REQ_T): -+ return core_req_strings[CORE_REQ_VERTEX_TILER]; -+ case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T): -+ return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER]; -+ } -+ return core_req_strings[CORE_REQ_UNKNOWN]; -+} -+#endif ++/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */ ++#define KBASE_CLEAN_CACHE_MAX_LOOPS 100000 ++/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */ ++#define KBASE_AS_INACTIVE_MAX_LOOPS 100000000 ++/* Maximum number of loops polling the GPU PRFCNT_ACTIVE bit before we assume the GPU has hung */ ++#define KBASE_PRFCNT_ACTIVE_MAX_LOOPS 100000000 ++#endif /* _KBASE_DEFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_disjoint_events.c b/drivers/gpu/arm/bifrost/mali_kbase_disjoint_events.c +new file mode 100644 +index 000000000..7d6e47558 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_disjoint_events.c +@@ -0,0 +1,80 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2014, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+/* Trace an atom submission. */ -+static void jd_trace_atom_submit(struct kbase_context *const kctx, -+ struct kbase_jd_atom *const katom, -+ int *priority) ++/* ++ * Base kernel disjoint events helper functions ++ */ ++ ++#include ++ ++void kbase_disjoint_init(struct kbase_device *kbdev) +{ -+ struct kbase_device *const kbdev = kctx->kbdev; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ KBASE_TLSTREAM_TL_NEW_ATOM(kbdev, katom, kbase_jd_atom_id(kctx, katom)); -+ KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx); -+ if (priority) -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, *priority); -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_IDLE); -+ kbase_kinstr_jm_atom_queue(katom); ++ atomic_set(&kbdev->disjoint_event.count, 0); ++ atomic_set(&kbdev->disjoint_event.state, 0); +} + -+static bool jd_submit_atom(struct kbase_context *const kctx, -+ const struct base_jd_atom *const user_atom, -+ const struct base_jd_fragment *const user_jc_incr, -+ struct kbase_jd_atom *const katom) ++/* increment the disjoint event count */ ++void kbase_disjoint_event(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct kbase_jd_context *jctx = &kctx->jctx; -+ int queued = 0; -+ int i; -+ int sched_prio; -+ bool will_fail = false; -+ unsigned long flags; -+ enum kbase_jd_atom_state status; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ dev_dbg(kbdev->dev, "User did JD submit atom %pK\n", (void *)katom); ++ atomic_inc(&kbdev->disjoint_event.count); ++} + -+ /* Update the TOTAL number of jobs. This includes those not tracked by -+ * the scheduler: 'not ready to run' and 'dependency-only' jobs. -+ */ -+ jctx->job_nr++; ++/* increment the state and the event counter */ ++void kbase_disjoint_state_up(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE -+ katom->start_timestamp.tv64 = 0; -+#else -+ katom->start_timestamp = 0; -+#endif -+ katom->udata = user_atom->udata; -+ katom->kctx = kctx; -+ katom->nr_extres = user_atom->nr_extres; -+ katom->extres = NULL; -+ katom->device_nr = user_atom->device_nr; -+ katom->jc = user_atom->jc; -+ katom->core_req = user_atom->core_req; -+ katom->jobslot = user_atom->jobslot; -+ katom->seq_nr = user_atom->seq_nr; -+ katom->atom_flags = 0; -+ katom->need_cache_flush_cores_retained = 0; -+ katom->pre_dep = NULL; -+ katom->post_dep = NULL; -+ katom->x_pre_dep = NULL; -+ katom->x_post_dep = NULL; -+ katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED; -+ katom->softjob_data = NULL; ++ atomic_inc(&kbdev->disjoint_event.state); + -+ trace_sysgraph(SGR_ARRIVE, kctx->id, user_atom->atom_number); ++ kbase_disjoint_event(kbdev); ++} + -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ /* Older API version atoms might have random values where jit_id now -+ * lives, but we must maintain backwards compatibility - handle the -+ * issue. -+ */ -+ if (!mali_kbase_supports_jit_pressure_limit(kctx->api_version)) { -+ katom->jit_ids[0] = 0; -+ katom->jit_ids[1] = 0; -+ } else { -+ katom->jit_ids[0] = user_atom->jit_id[0]; -+ katom->jit_ids[1] = user_atom->jit_id[1]; -+ } -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++/* decrement the state */ ++void kbase_disjoint_state_down(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0); + -+ katom->renderpass_id = user_atom->renderpass_id; ++ kbase_disjoint_event(kbdev); + -+ /* Implicitly sets katom->protected_state.enter as well. */ -+ katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; ++ atomic_dec(&kbdev->disjoint_event.state); ++} + -+ katom->age = kctx->age_count++; ++/* increments the count only if the state is > 0 */ ++void kbase_disjoint_event_potential(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ INIT_LIST_HEAD(&katom->queue); -+ INIT_LIST_HEAD(&katom->jd_item); ++ if (atomic_read(&kbdev->disjoint_event.state)) ++ kbase_disjoint_event(kbdev); ++} + -+ /* Don't do anything if there is a mess up with dependencies. -+ * This is done in a separate cycle to check both the dependencies at ones, otherwise -+ * it will be extra complexity to deal with 1st dependency ( just added to the list ) -+ * if only the 2nd one has invalid config. -+ */ -+ for (i = 0; i < 2; i++) { -+ int dep_atom_number = user_atom->pre_dep[i].atom_id; -+ base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type; ++u32 kbase_disjoint_event_get(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ if (dep_atom_number) { -+ if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER && -+ dep_atom_type != BASE_JD_DEP_TYPE_DATA) { -+ katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; -+ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; -+ dev_dbg(kbdev->dev, -+ "Atom %pK status to completed\n", -+ (void *)katom); ++ return atomic_read(&kbdev->disjoint_event.count); ++} ++KBASE_EXPORT_TEST_API(kbase_disjoint_event_get); +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c +new file mode 100644 +index 000000000..4e2713511 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c +@@ -0,0 +1,452 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ /* Wrong dependency setup. Atom will be sent -+ * back to user space. Do not record any -+ * dependencies. -+ */ -+ jd_trace_atom_submit(kctx, katom, NULL); -+ return kbase_jd_done_nolock(katom, true); -+ } -+ } -+ } ++/* ++ * Implementation of the dummy job execution workaround for the GPU hang issue. ++ */ + -+ /* Add dependencies */ -+ for (i = 0; i < 2; i++) { -+ int dep_atom_number = user_atom->pre_dep[i].atom_id; -+ base_jd_dep_type dep_atom_type; -+ struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number]; ++#include ++#include ++#include + -+ dep_atom_type = user_atom->pre_dep[i].dependency_type; -+ kbase_jd_katom_dep_clear(&katom->dep[i]); ++#include ++#include + -+ if (!dep_atom_number) -+ continue; ++#define DUMMY_JOB_WA_BINARY_NAME "valhall-1691526.wa" + -+ if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED || -+ dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) { ++struct wa_header { ++ u16 signature; ++ u16 version; ++ u32 info_offset; ++} __packed; + -+ if (dep_atom->event_code == BASE_JD_EVENT_DONE) -+ continue; -+ /* don't stop this atom if it has an order dependency -+ * only to the failed one, try to submit it through -+ * the normal path -+ */ -+ if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER && -+ dep_atom->event_code > BASE_JD_EVENT_ACTIVE) { -+ continue; -+ } ++struct wa_v2_info { ++ u64 jc; ++ u32 js; ++ u32 blob_offset; ++ u64 flags; ++} __packed; + -+ /* Atom has completed, propagate the error code if any */ -+ katom->event_code = dep_atom->event_code; -+ katom->status = KBASE_JD_ATOM_STATE_QUEUED; -+ dev_dbg(kbdev->dev, "Atom %pK status to queued\n", -+ (void *)katom); ++struct wa_blob { ++ u64 base; ++ u32 size; ++ u32 map_flags; ++ u32 payload_offset; ++ u32 blob_offset; ++} __packed; + -+ /* This atom will be sent back to user space. -+ * Do not record any dependencies. -+ */ -+ jd_trace_atom_submit(kctx, katom, NULL); ++static bool in_range_offset(const u8 *base, const u8 *end, off_t off, size_t sz) ++{ ++ return !(end - base - off < sz); ++} + -+ will_fail = true; ++static u32 wait_any(struct kbase_device *kbdev, off_t offset, u32 bits) ++{ ++ int loop; ++ const int timeout = 100; ++ u32 val; + -+ } else { -+ /* Atom is in progress, add this atom to the list */ -+ list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]); -+ kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type); -+ queued = 1; -+ } ++ for (loop = 0; loop < timeout; loop++) { ++ val = kbase_reg_read(kbdev, offset); ++ if (val & bits) ++ break; ++ udelay(10); + } + -+ if (will_fail) { -+ if (!queued) { -+ if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { -+ /* This softjob has failed due to a previous -+ * dependency, however we should still run the -+ * prepare & finish functions -+ */ -+ int err = kbase_prepare_soft_job(katom); -+ -+ if (err >= 0) -+ kbase_finish_soft_job(katom); -+ } -+ return kbase_jd_done_nolock(katom, true); -+ } -+ -+ katom->will_fail_event_code = katom->event_code; ++ if (loop == timeout) { ++ dev_err(kbdev->dev, ++ "Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n", ++ (unsigned long)offset, (unsigned long)bits, ++ (unsigned long)val); + } + -+ /* These must occur after the above loop to ensure that an atom -+ * that depends on a previous atom with the same number behaves -+ * as expected -+ */ -+ katom->event_code = BASE_JD_EVENT_DONE; -+ katom->status = KBASE_JD_ATOM_STATE_QUEUED; -+ dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)katom); ++ return (val & bits); ++} + -+ /* For invalid priority, be most lenient and choose the default */ -+ sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); -+ if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID) -+ sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT; ++static int wait(struct kbase_device *kbdev, off_t offset, u32 bits, bool set) ++{ ++ int loop; ++ const int timeout = 100; ++ u32 val; ++ u32 target = 0; + -+ /* Cap the priority to jctx.max_priority */ -+ katom->sched_priority = (sched_prio < kctx->jctx.max_priority) ? -+ kctx->jctx.max_priority : sched_prio; ++ if (set) ++ target = bits; + -+ /* Create a new atom. */ -+ jd_trace_atom_submit(kctx, katom, &katom->sched_priority); ++ for (loop = 0; loop < timeout; loop++) { ++ val = kbase_reg_read(kbdev, (offset)); ++ if ((val & bits) == target) ++ break; + -+#if !MALI_INCREMENTAL_RENDERING_JM -+ /* Reject atoms for incremental rendering if not supported */ -+ if (katom->core_req & -+ (BASE_JD_REQ_START_RENDERPASS|BASE_JD_REQ_END_RENDERPASS)) { -+ dev_err(kctx->kbdev->dev, -+ "Rejecting atom with unsupported core_req 0x%x\n", -+ katom->core_req); -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ return kbase_jd_done_nolock(katom, true); ++ udelay(10); + } -+#endif /* !MALI_INCREMENTAL_RENDERING_JM */ + -+ if (katom->core_req & BASE_JD_REQ_END_RENDERPASS) { -+ WARN_ON(katom->jc != 0); -+ katom->jc_fragment = *user_jc_incr; -+ } else if (!katom->jc && -+ (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { -+ /* Reject atoms with job chain = NULL, as these cause issues -+ * with soft-stop -+ */ -+ dev_err(kctx->kbdev->dev, "Rejecting atom with jc = NULL\n"); -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ return kbase_jd_done_nolock(katom, true); ++ if (loop == timeout) { ++ dev_err(kbdev->dev, ++ "Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n", ++ (unsigned long)offset, (unsigned long)bits, ++ (unsigned long)val); ++ return -ETIMEDOUT; + } + -+ /* Reject atoms with an invalid device_nr */ -+ if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) && -+ (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) { -+ dev_err(kctx->kbdev->dev, -+ "Rejecting atom with invalid device_nr %d\n", -+ katom->device_nr); -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ return kbase_jd_done_nolock(katom, true); -+ } ++ return 0; ++} + -+ /* Reject atoms with invalid core requirements */ -+ if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && -+ (katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) { -+ dev_err(kctx->kbdev->dev, -+ "Rejecting atom with invalid core requirements\n"); -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE; -+ return kbase_jd_done_nolock(katom, true); -+ } ++static inline int run_job(struct kbase_device *kbdev, int as, int slot, ++ u64 cores, u64 jc) ++{ ++ u32 done; + -+ /* Reject soft-job atom of certain types from accessing external resources */ -+ if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && -+ (((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT) || -+ ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_ALLOC) || -+ ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_FREE))) { -+ dev_err(kctx->kbdev->dev, -+ "Rejecting soft-job atom accessing external resources\n"); -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ return kbase_jd_done_nolock(katom, true); -+ } ++ /* setup job */ ++ kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_LO), ++ jc & U32_MAX); ++ kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_HI), ++ jc >> 32); ++ kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_LO), ++ cores & U32_MAX); ++ kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_HI), ++ cores >> 32); ++ kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_CONFIG_NEXT), ++ JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK | as); + -+ if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { -+ /* handle what we need to do to access the external resources */ -+ if (kbase_jd_pre_external_resources(katom, user_atom) != 0) { -+ /* setup failed (no access, bad resource, unknown resource types, etc.) */ -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ return kbase_jd_done_nolock(katom, true); -+ } -+ } ++ /* go */ ++ kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_COMMAND_NEXT), ++ JS_COMMAND_START); + -+#if !MALI_JIT_PRESSURE_LIMIT_BASE -+ if (mali_kbase_supports_jit_pressure_limit(kctx->api_version) && -+ (user_atom->jit_id[0] || user_atom->jit_id[1])) { -+ /* JIT pressure limit is disabled, but we are receiving non-0 -+ * JIT IDs - atom is invalid. -+ */ -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ return kbase_jd_done_nolock(katom, true); -+ } -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ /* wait for the slot to finish (done, error) */ ++ done = wait_any(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), ++ (1ul << (16+slot)) | (1ul << slot)); ++ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), done); + -+ /* Validate the atom. Function will return error if the atom is -+ * malformed. -+ * -+ * Soft-jobs never enter the job scheduler but have their own initialize method. -+ * -+ * If either fail then we immediately complete the atom with an error. -+ */ -+ if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) { -+ if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) { -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ return kbase_jd_done_nolock(katom, true); -+ } ++ if (done != (1ul << slot)) { ++ dev_err(kbdev->dev, ++ "Failed to run WA job on slot %d cores 0x%llx: done 0x%lx\n", ++ slot, (unsigned long long)cores, ++ (unsigned long)done); ++ dev_err(kbdev->dev, "JS_STATUS on failure: 0x%x\n", ++ kbase_reg_read(kbdev, JOB_SLOT_REG(slot, JS_STATUS))); ++ ++ return -EFAULT; + } else { -+ /* Soft-job */ -+ if (kbase_prepare_soft_job(katom) != 0) { -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ return kbase_jd_done_nolock(katom, true); -+ } ++ return 0; + } ++} + -+#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) -+ katom->work_id = atomic_inc_return(&jctx->work_id); -+ trace_gpu_job_enqueue(kctx->id, katom->work_id, -+ kbasep_map_core_reqs_to_string(katom->core_req)); -+#endif ++/* To be called after power up & MMU init, but before everything else */ ++int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores) ++{ ++ int as; ++ int slot; ++ u64 jc; ++ int failed = 0; ++ int runs = 0; ++ u32 old_gpu_mask; ++ u32 old_job_mask; + -+ if (queued && !IS_GPU_ATOM(katom)) -+ return false; ++ if (!kbdev) ++ return -EFAULT; + -+ if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { -+ if (kbase_process_soft_job(katom) == 0) { -+ kbase_finish_soft_job(katom); -+ return kbase_jd_done_nolock(katom, true); -+ } -+ return false; -+ } ++ if (!kbdev->dummy_job_wa.ctx) ++ return -EFAULT; + -+ if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { -+ bool need_to_try_schedule_context; ++ as = kbdev->dummy_job_wa.ctx->as_nr; ++ slot = kbdev->dummy_job_wa.slot; ++ jc = kbdev->dummy_job_wa.jc; + -+ katom->status = KBASE_JD_ATOM_STATE_IN_JS; -+ dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", -+ (void *)katom); ++ /* mask off all but MMU IRQs */ ++ old_gpu_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); ++ old_job_mask = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0); ++ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0); + -+ need_to_try_schedule_context = kbasep_js_add_job(kctx, katom); -+ /* If job was cancelled then resolve immediately */ -+ if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) -+ return need_to_try_schedule_context; ++ /* power up requested cores */ ++ kbase_reg_write(kbdev, SHADER_PWRON_LO, (cores & U32_MAX)); ++ kbase_reg_write(kbdev, SHADER_PWRON_HI, (cores >> 32)); + -+ /* Synchronize with backend reset */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ status = katom->status; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ if (status == KBASE_JD_ATOM_STATE_HW_COMPLETED) { -+ dev_dbg(kctx->kbdev->dev, -+ "Atom %d cancelled on HW\n", -+ kbase_jd_atom_id(katom->kctx, katom)); -+ return need_to_try_schedule_context; -+ } ++ if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP) { ++ /* wait for power-ups */ ++ wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), true); ++ if (cores >> 32) ++ wait(kbdev, SHADER_READY_HI, (cores >> 32), true); + } + -+ /* This is a pure dependency. Resolve it immediately */ -+ return kbase_jd_done_nolock(katom, true); -+} -+ -+int kbase_jd_submit(struct kbase_context *kctx, -+ void __user *user_addr, u32 nr_atoms, u32 stride, -+ bool uk6_atom) -+{ -+ struct kbase_jd_context *jctx = &kctx->jctx; -+ int err = 0; -+ int i; -+ bool need_to_try_schedule_context = false; -+ struct kbase_device *kbdev; -+ u32 latest_flush; ++ if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE) { ++ int i; + -+ bool jd_atom_is_v2 = (stride == sizeof(struct base_jd_atom_v2) || -+ stride == offsetof(struct base_jd_atom_v2, renderpass_id)); ++ /* do for each requested core */ ++ for (i = 0; i < sizeof(cores) * 8; i++) { ++ u64 affinity; + -+ /* -+ * kbase_jd_submit isn't expected to fail and so all errors with the -+ * jobs are reported by immediately failing them (through event system) -+ */ -+ kbdev = kctx->kbdev; ++ affinity = 1ull << i; + -+ if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { -+ dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it\n"); -+ return -EINVAL; -+ } ++ if (!(cores & affinity)) ++ continue; + -+ if (stride != offsetof(struct base_jd_atom_v2, renderpass_id) && -+ stride != sizeof(struct base_jd_atom_v2) && -+ stride != offsetof(struct base_jd_atom, renderpass_id) && -+ stride != sizeof(struct base_jd_atom)) { -+ dev_err(kbdev->dev, -+ "Stride %u passed to job_submit isn't supported by the kernel\n", -+ stride); -+ return -EINVAL; -+ } ++ if (run_job(kbdev, as, slot, affinity, jc)) ++ failed++; ++ runs++; ++ } + -+ if (nr_atoms > BASE_JD_ATOM_COUNT) { -+ dev_dbg(kbdev->dev, "Invalid attempt to submit %u atoms at once for kctx %d_%d", -+ nr_atoms, kctx->tgid, kctx->id); -+ return -EINVAL; ++ } else { ++ if (run_job(kbdev, as, slot, cores, jc)) ++ failed++; ++ runs++; + } + -+ /* All atoms submitted in this call have the same flush ID */ -+ latest_flush = kbase_backend_get_current_flush_id(kbdev); -+ -+ for (i = 0; i < nr_atoms; i++) { -+ struct base_jd_atom user_atom = { -+ .seq_nr = 0, -+ }; -+ struct base_jd_fragment user_jc_incr; -+ struct kbase_jd_atom *katom; -+ -+ if (unlikely(jd_atom_is_v2)) { -+ if (copy_from_user(&user_atom.jc, user_addr, sizeof(struct base_jd_atom_v2)) != 0) { -+ dev_dbg(kbdev->dev, -+ "Invalid atom address %pK passed to job_submit\n", -+ user_addr); -+ err = -EFAULT; -+ break; -+ } ++ if (kbdev->dummy_job_wa.flags & ++ KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) { ++ /* power off shader cores (to reduce any dynamic leakage) */ ++ kbase_reg_write(kbdev, SHADER_PWROFF_LO, (cores & U32_MAX)); ++ kbase_reg_write(kbdev, SHADER_PWROFF_HI, (cores >> 32)); + -+ /* no seq_nr in v2 */ -+ user_atom.seq_nr = 0; -+ } else { -+ if (copy_from_user(&user_atom, user_addr, stride) != 0) { -+ dev_dbg(kbdev->dev, -+ "Invalid atom address %pK passed to job_submit\n", -+ user_addr); -+ err = -EFAULT; -+ break; -+ } ++ /* wait for power off complete */ ++ wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), false); ++ wait(kbdev, SHADER_PWRTRANS_LO, (cores & U32_MAX), false); ++ if (cores >> 32) { ++ wait(kbdev, SHADER_READY_HI, (cores >> 32), false); ++ wait(kbdev, SHADER_PWRTRANS_HI, (cores >> 32), false); + } ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), U32_MAX); ++ } + -+ if (stride == offsetof(struct base_jd_atom_v2, renderpass_id)) { -+ dev_dbg(kbdev->dev, "No renderpass ID: use 0\n"); -+ user_atom.renderpass_id = 0; -+ } else { -+ /* Ensure all padding bytes are 0 for potential future -+ * extension -+ */ -+ size_t j; -+ -+ dev_dbg(kbdev->dev, "Renderpass ID is %d\n", -+ user_atom.renderpass_id); -+ for (j = 0; j < sizeof(user_atom.padding); j++) { -+ if (user_atom.padding[j]) { -+ dev_err(kbdev->dev, -+ "Bad padding byte %zu: %d\n", -+ j, user_atom.padding[j]); -+ err = -EINVAL; -+ break; -+ } -+ } -+ if (err) -+ break; -+ } ++ /* restore IRQ masks */ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), old_gpu_mask); ++ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), old_job_mask); + -+ /* In this case 'jc' is the CPU address of a struct -+ * instead of a GPU address of a job chain. -+ */ -+ if (user_atom.core_req & BASE_JD_REQ_END_RENDERPASS) { -+ if (copy_from_user(&user_jc_incr, -+ u64_to_user_ptr(user_atom.jc), -+ sizeof(user_jc_incr))) { -+ dev_err(kbdev->dev, -+ "Invalid jc address 0x%llx passed to job_submit\n", -+ user_atom.jc); -+ err = -EFAULT; -+ break; -+ } -+ dev_dbg(kbdev->dev, "Copied IR jobchain addresses\n"); -+ user_atom.jc = 0; -+ } ++ if (failed) ++ dev_err(kbdev->dev, ++ "WA complete with %d failures out of %d runs\n", failed, ++ runs); + -+ user_addr = (void __user *)((uintptr_t) user_addr + stride); ++ return failed ? -EFAULT : 0; ++} + -+ mutex_lock(&jctx->lock); -+#ifndef compiletime_assert -+#define compiletime_assert_defined -+#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \ -+while (false) -+#endif -+ compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) == -+ BASE_JD_ATOM_COUNT, -+ "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); -+ compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) == -+ sizeof(user_atom.atom_number), -+ "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); -+#ifdef compiletime_assert_defined -+#undef compiletime_assert -+#undef compiletime_assert_defined -+#endif -+ katom = &jctx->atoms[user_atom.atom_number]; ++static ssize_t dummy_job_wa_info_show(struct device * const dev, ++ struct device_attribute * const attr, char * const buf) ++{ ++ struct kbase_device *const kbdev = dev_get_drvdata(dev); ++ int err; + -+ /* Record the flush ID for the cache flush optimisation */ -+ katom->flush_id = latest_flush; ++ if (!kbdev || !kbdev->dummy_job_wa.ctx) ++ return -ENODEV; + -+ while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) { -+ /* Atom number is already in use, wait for the atom to -+ * complete -+ */ -+ mutex_unlock(&jctx->lock); ++ err = scnprintf(buf, PAGE_SIZE, "slot %u flags %llx\n", ++ kbdev->dummy_job_wa.slot, kbdev->dummy_job_wa.flags); + -+ /* This thread will wait for the atom to complete. Due -+ * to thread scheduling we are not sure that the other -+ * thread that owns the atom will also schedule the -+ * context, so we force the scheduler to be active and -+ * hence eventually schedule this context at some point -+ * later. -+ */ -+ kbase_js_sched_all(kbdev); ++ return err; ++} + -+ if (wait_event_killable(katom->completed, -+ katom->status == -+ KBASE_JD_ATOM_STATE_UNUSED) != 0) { -+ /* We're being killed so the result code -+ * doesn't really matter -+ */ -+ return 0; -+ } -+ mutex_lock(&jctx->lock); -+ } -+ KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_START(kbdev, katom); -+ need_to_try_schedule_context |= jd_submit_atom(kctx, &user_atom, -+ &user_jc_incr, katom); -+ KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_END(kbdev, katom); -+ /* Register a completed job as a disjoint event when the GPU is in a disjoint state -+ * (ie. being reset). -+ */ -+ kbase_disjoint_event_potential(kbdev); ++static DEVICE_ATTR_RO(dummy_job_wa_info); + -+ mutex_unlock(&jctx->lock); -+ if (fatal_signal_pending(current)) { -+ dev_dbg(kbdev->dev, "Fatal signal pending for kctx %d_%d", -+ kctx->tgid, kctx->id); -+ /* We're being killed so the result code doesn't really matter */ -+ return 0; -+ } -+ } ++static bool wa_blob_load_needed(struct kbase_device *kbdev) ++{ ++ if (of_machine_is_compatible("arm,juno")) ++ return false; + -+ if (need_to_try_schedule_context) -+ kbase_js_sched_all(kbdev); ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3485)) ++ return true; + -+ return err; ++ return false; +} + -+KBASE_EXPORT_TEST_API(kbase_jd_submit); -+ -+void kbase_jd_done_worker(struct work_struct *data) ++int kbase_dummy_job_wa_load(struct kbase_device *kbdev) +{ -+ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); -+ struct kbase_jd_context *jctx; ++ const struct firmware *firmware; ++ static const char wa_name[] = DUMMY_JOB_WA_BINARY_NAME; ++ const u32 signature = 0x4157; ++ const u32 version = 2; ++ const u8 *fw_end; ++ const u8 *fw; ++ const struct wa_header *header; ++ const struct wa_v2_info *v2_info; ++ u32 blob_offset; ++ int err; + struct kbase_context *kctx; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ struct kbase_device *kbdev; -+ struct kbasep_js_device_data *js_devdata; -+ u64 cache_jc = katom->jc; -+ struct kbasep_js_atom_retained_state katom_retained_state; -+ bool context_idle; -+ base_jd_core_req core_req = katom->core_req; + -+ /* Soft jobs should never reach this function */ -+ KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); ++ /* Calls to this function are inherently asynchronous, with respect to ++ * MMU operations. ++ */ ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + -+ kctx = katom->kctx; -+ jctx = &kctx->jctx; -+ kbdev = kctx->kbdev; -+ js_kctx_info = &kctx->jctx.sched_info; -+ js_devdata = &kbdev->js_data; ++ lockdep_assert_held(&kbdev->fw_load_lock); + -+ dev_dbg(kbdev->dev, "Enter atom %pK done worker for kctx %pK\n", -+ (void *)katom, (void *)kctx); ++ if (!wa_blob_load_needed(kbdev)) ++ return 0; + -+ KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); ++ /* load the wa */ ++ err = request_firmware(&firmware, wa_name, kbdev->dev); + -+ kbase_backend_complete_wq(kbdev, katom); ++ if (err) { ++ dev_err(kbdev->dev, "WA blob missing. Please refer to the Arm Mali DDK Valhall Release Notes, " ++ "Part number DC-06002 or contact support-mali@arm.com - driver probe will be failed"); ++ return -ENODEV; ++ } + -+ /* -+ * Begin transaction on JD context and JS context -+ */ -+ mutex_lock(&jctx->lock); -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_DONE); -+ mutex_lock(&js_devdata->queue_mutex); -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ kctx = kbase_create_context(kbdev, true, ++ BASE_CONTEXT_CREATE_FLAG_NONE, 0, ++ NULL); + -+ /* This worker only gets called on contexts that are scheduled *in*. This is -+ * because it only happens in response to an IRQ from a job that was -+ * running. -+ */ -+ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++ if (!kctx) { ++ dev_err(kbdev->dev, "Failed to create WA context\n"); ++ goto no_ctx; ++ } + -+ if (katom->event_code == BASE_JD_EVENT_STOPPED) { -+ unsigned long flags; ++ fw = firmware->data; ++ fw_end = fw + firmware->size; + -+ dev_dbg(kbdev->dev, "Atom %pK has been promoted to stopped\n", -+ (void *)katom); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_unlock(&js_devdata->queue_mutex); ++ dev_dbg(kbdev->dev, "Loaded firmware of size %zu bytes\n", ++ firmware->size); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (!in_range_offset(fw, fw_end, 0, sizeof(*header))) { ++ dev_err(kbdev->dev, "WA too small\n"); ++ goto bad_fw; ++ } + -+ katom->status = KBASE_JD_ATOM_STATE_IN_JS; -+ dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", -+ (void *)katom); -+ kbase_js_unpull(kctx, katom); ++ header = (const struct wa_header *)(fw + 0); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&jctx->lock); ++ if (header->signature != signature) { ++ dev_err(kbdev->dev, "WA signature failure: 0x%lx\n", ++ (unsigned long)header->signature); ++ goto bad_fw; ++ } + -+ return; ++ if (header->version != version) { ++ dev_err(kbdev->dev, "WA version 0x%lx not supported\n", ++ (unsigned long)header->version); ++ goto bad_fw; + } + -+ if ((katom->event_code != BASE_JD_EVENT_DONE) && -+ (!kbase_ctx_flag(katom->kctx, KCTX_DYING))) { -+ if (!kbase_is_quick_reset_enabled(kbdev)) -+ dev_err(kbdev->dev, -+ "t6xx: GPU fault 0x%02lx from job slot %d\n", -+ (unsigned long)katom->event_code, -+ katom->slot_nr); ++ if (!in_range_offset(fw, fw_end, header->info_offset, sizeof(*v2_info))) { ++ dev_err(kbdev->dev, "WA info offset out of bounds\n"); ++ goto bad_fw; + } + -+ /* Retain state before the katom disappears */ -+ kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); ++ v2_info = (const struct wa_v2_info *)(fw + header->info_offset); + -+ context_idle = kbase_js_complete_atom_wq(kctx, katom); ++ if (v2_info->flags & ~KBASE_DUMMY_JOB_WA_FLAGS) { ++ dev_err(kbdev->dev, "Unsupported WA flag(s): 0x%llx\n", ++ (unsigned long long)v2_info->flags); ++ goto bad_fw; ++ } + -+ KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state)); ++ kbdev->dummy_job_wa.slot = v2_info->js; ++ kbdev->dummy_job_wa.jc = v2_info->jc; ++ kbdev->dummy_job_wa.flags = v2_info->flags; + -+ kbasep_js_remove_job(kbdev, kctx, katom); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_unlock(&js_devdata->queue_mutex); -+ /* kbase_jd_done_nolock() requires the jsctx_mutex lock to be dropped */ -+ kbase_jd_done_nolock(katom, false); ++ blob_offset = v2_info->blob_offset; + -+ /* katom may have been freed now, do not use! */ ++ while (blob_offset) { ++ const struct wa_blob *blob; ++ size_t nr_pages; ++ u64 flags; ++ u64 gpu_va; ++ struct kbase_va_region *va_region; + -+ if (context_idle) { -+ unsigned long flags; ++ if (!in_range_offset(fw, fw_end, blob_offset, sizeof(*blob))) { ++ dev_err(kbdev->dev, "Blob offset out-of-range: 0x%lx\n", ++ (unsigned long)blob_offset); ++ goto bad_fw; ++ } + -+ context_idle = false; -+ mutex_lock(&js_devdata->queue_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ blob = (const struct wa_blob *)(fw + blob_offset); ++ if (!in_range_offset(fw, fw_end, blob->payload_offset, blob->size)) { ++ dev_err(kbdev->dev, "Payload out-of-bounds\n"); ++ goto bad_fw; ++ } + -+ /* If kbase_sched() has scheduled this context back in then -+ * KCTX_ACTIVE will have been set after we marked it as -+ * inactive, and another pm reference will have been taken, so -+ * drop our reference. But do not call kbase_jm_idle_ctx(), as -+ * the context is active and fast-starting is allowed. -+ * -+ * If an atom has been fast-started then -+ * kbase_jsctx_atoms_pulled(kctx) will return non-zero but -+ * KCTX_ACTIVE will still be false (as the previous pm -+ * reference has been inherited). Do NOT drop our reference, as -+ * it has been re-used, and leave the context as active. -+ * -+ * If no new atoms have been started then KCTX_ACTIVE will -+ * still be false and kbase_jsctx_atoms_pulled(kctx) will -+ * return zero, so drop the reference and call -+ * kbase_jm_idle_ctx(). -+ * -+ * As the checks are done under both the queue_mutex and -+ * hwaccess_lock is should be impossible for this to race -+ * with the scheduler code. -+ */ -+ if (kbase_ctx_flag(kctx, KCTX_ACTIVE) || -+ !kbase_jsctx_atoms_pulled(kctx)) { -+ /* Calling kbase_jm_idle_ctx() here will ensure that -+ * atoms are not fast-started when we drop the -+ * hwaccess_lock. This is not performed if -+ * KCTX_ACTIVE is set as in that case another pm -+ * reference has been taken and a fast-start would be -+ * valid. -+ */ -+ if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) -+ kbase_jm_idle_ctx(kbdev, kctx); -+ context_idle = true; -+ } else { -+ kbase_ctx_flag_set(kctx, KCTX_ACTIVE); ++ gpu_va = blob->base; ++ if (PAGE_ALIGN(gpu_va) != gpu_va) { ++ dev_err(kbdev->dev, "blob not page aligned\n"); ++ goto bad_fw; + } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&js_devdata->queue_mutex); -+ } ++ nr_pages = PFN_UP(blob->size); ++ flags = blob->map_flags | BASE_MEM_FLAG_MAP_FIXED; + -+ /* -+ * Transaction complete -+ */ -+ mutex_unlock(&jctx->lock); ++ va_region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, ++ &gpu_va, mmu_sync_info); + -+ /* Job is now no longer running, so can now safely release the context -+ * reference, and handle any actions that were logged against the -+ * atom's retained state -+ */ ++ if (!va_region) { ++ dev_err(kbdev->dev, "Failed to allocate for blob\n"); ++ } else { ++ struct kbase_vmap_struct vmap = { 0 }; ++ const u8 *payload; ++ void *dst; + -+ kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state); ++ /* copy the payload, */ ++ payload = fw + blob->payload_offset; + -+ kbase_js_sched_all(kbdev); ++ dst = kbase_vmap(kctx, ++ va_region->start_pfn << PAGE_SHIFT, ++ nr_pages << PAGE_SHIFT, &vmap); + -+ if (!atomic_dec_return(&kctx->work_count)) { -+ /* If worker now idle then post all events that kbase_jd_done_nolock() -+ * has queued -+ */ -+ mutex_lock(&jctx->lock); -+ while (!list_empty(&kctx->completed_jobs)) { -+ struct kbase_jd_atom *atom = list_entry( -+ kctx->completed_jobs.next, -+ struct kbase_jd_atom, jd_item); -+ list_del(kctx->completed_jobs.next); ++ if (dst) { ++ memcpy(dst, payload, blob->size); ++ kbase_vunmap(kctx, &vmap); ++ } else { ++ dev_err(kbdev->dev, ++ "Failed to copy payload\n"); ++ } + -+ kbase_event_post(kctx, atom); + } -+ mutex_unlock(&jctx->lock); ++ blob_offset = blob->blob_offset; /* follow chain */ + } + -+ kbase_backend_complete_wq_post_sched(kbdev, core_req); -+ -+ if (context_idle) -+ kbase_pm_context_idle(kbdev); -+ -+ KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); -+ -+ dev_dbg(kbdev->dev, "Leave atom %pK done worker for kctx %pK\n", -+ (void *)katom, (void *)kctx); -+} ++ release_firmware(firmware); + -+/** -+ * jd_cancel_worker - Work queue job cancel function. -+ * @data: a &struct work_struct -+ * -+ * Only called as part of 'Zapping' a context (which occurs on termination). -+ * Operates serially with the kbase_jd_done_worker() on the work queue. -+ * -+ * This can only be called on contexts that aren't scheduled. -+ * -+ * We don't need to release most of the resources that would occur on -+ * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be -+ * running (by virtue of only being called on contexts that aren't -+ * scheduled). -+ */ -+static void jd_cancel_worker(struct work_struct *data) -+{ -+ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); -+ struct kbase_jd_context *jctx; -+ struct kbase_context *kctx; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ bool need_to_try_schedule_context; -+ bool attr_state_changed; -+ struct kbase_device *kbdev; -+ CSTD_UNUSED(need_to_try_schedule_context); ++ kbasep_js_schedule_privileged_ctx(kbdev, kctx); + -+ /* Soft jobs should never reach this function */ -+ KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); ++ kbdev->dummy_job_wa.ctx = kctx; + -+ kctx = katom->kctx; -+ kbdev = kctx->kbdev; -+ jctx = &kctx->jctx; -+ js_kctx_info = &kctx->jctx.sched_info; ++ err = sysfs_create_file(&kbdev->dev->kobj, ++ &dev_attr_dummy_job_wa_info.attr); ++ if (err) ++ dev_err(kbdev->dev, "SysFS file creation for dummy job wa failed\n"); + -+ KBASE_KTRACE_ADD_JM(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0); ++ return 0; + -+ /* This only gets called on contexts that are scheduled out. Hence, we must -+ * make sure we don't de-ref the number of running jobs (there aren't -+ * any), nor must we try to schedule out the context (it's already -+ * scheduled out). -+ */ -+ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++bad_fw: ++ kbase_destroy_context(kctx); ++no_ctx: ++ release_firmware(firmware); ++ return -EFAULT; ++} + -+ /* Scheduler: Remove the job from the system */ -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev) ++{ ++ struct kbase_context *wa_ctx; + -+ mutex_lock(&jctx->lock); ++ /* return if the dummy job has not been loaded */ ++ if (kbdev->dummy_job_wa_loaded == false) ++ return; + -+ need_to_try_schedule_context = kbase_jd_done_nolock(katom, true); -+ /* Because we're zapping, we're not adding any more jobs to this ctx, so no need to -+ * schedule the context. There's also no need for the jsctx_mutex to have been taken -+ * around this too. -+ */ -+ KBASE_DEBUG_ASSERT(!need_to_try_schedule_context); ++ /* Can be safely called even if the file wasn't created on probe */ ++ sysfs_remove_file(&kbdev->dev->kobj, &dev_attr_dummy_job_wa_info.attr); + -+ /* katom may have been freed now, do not use! */ -+ mutex_unlock(&jctx->lock); ++ wa_ctx = READ_ONCE(kbdev->dummy_job_wa.ctx); ++ WRITE_ONCE(kbdev->dummy_job_wa.ctx, NULL); ++ /* make this write visible before we tear down the ctx */ ++ smp_mb(); + -+ if (attr_state_changed) -+ kbase_js_sched_all(kbdev); ++ if (wa_ctx) { ++ kbasep_js_release_privileged_ctx(kbdev, wa_ctx); ++ kbase_destroy_context(wa_ctx); ++ } +} -+ -+/** -+ * kbase_jd_done - Complete a job that has been removed from the Hardware -+ * @katom: atom which has been completed -+ * @slot_nr: slot the atom was on -+ * @end_timestamp: completion time -+ * @done_code: completion code +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.h b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.h +new file mode 100644 +index 000000000..8713ba1ea +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.h +@@ -0,0 +1,73 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * This must be used whenever a job has been removed from the Hardware, e.g.: -+ * An IRQ indicates that the job finished (for both error and 'done' codes), or -+ * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * -+ * Some work is carried out immediately, and the rest is deferred onto a -+ * workqueue ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Context: -+ * This can be called safely from atomic context. -+ * The caller must hold kbdev->hwaccess_lock + */ -+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, -+ ktime_t *end_timestamp, kbasep_js_atom_done_code done_code) -+{ -+ struct kbase_context *kctx; -+ struct kbase_device *kbdev; -+ -+ KBASE_DEBUG_ASSERT(katom); -+ kctx = katom->kctx; -+ KBASE_DEBUG_ASSERT(kctx); -+ kbdev = kctx->kbdev; -+ KBASE_DEBUG_ASSERT(kbdev); -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) -+ katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; -+ -+ KBASE_KTRACE_ADD_JM(kbdev, JD_DONE, kctx, katom, katom->jc, 0); + -+ kbase_job_check_leave_disjoint(kbdev, katom); -+ -+ katom->slot_nr = slot_nr; -+ -+ atomic_inc(&kctx->work_count); ++#ifndef _KBASE_DUMMY_JOB_WORKAROUND_ ++#define _KBASE_DUMMY_JOB_WORKAROUND_ + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ /* a failed job happened and is waiting for dumping*/ -+ if (!katom->will_fail_event_code && -+ kbase_debug_job_fault_process(katom, katom->event_code)) -+ return; -+#endif ++#define KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE (1ull << 0) ++#define KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP (1ull << 1) ++#define KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER (1ull << 2) + -+ WARN_ON(work_pending(&katom->work)); -+ INIT_WORK(&katom->work, kbase_jd_done_worker); -+ queue_work(kctx->jctx.job_done_wq, &katom->work); -+} ++#define KBASE_DUMMY_JOB_WA_FLAGS (KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE | \ ++ KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP | \ ++ KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) + -+KBASE_EXPORT_TEST_API(kbase_jd_done); ++#if MALI_USE_CSF + -+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) ++static inline int kbase_dummy_job_wa_load(struct kbase_device *kbdev) +{ -+ struct kbase_context *kctx; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(katom != NULL); -+ kctx = katom->kctx; -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ -+ dev_dbg(kbdev->dev, "JD: cancelling atom %pK\n", (void *)katom); -+ KBASE_KTRACE_ADD_JM(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); -+ -+ /* This should only be done from a context that is not scheduled */ -+ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); -+ -+ WARN_ON(work_pending(&katom->work)); -+ -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; -+ -+ INIT_WORK(&katom->work, jd_cancel_worker); -+ queue_work(kctx->jctx.job_done_wq, &katom->work); ++ CSTD_UNUSED(kbdev); ++ return 0; +} + -+ -+void kbase_jd_zap_context(struct kbase_context *kctx) ++static inline void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev) +{ -+ struct kbase_jd_atom *katom; -+ struct list_head *entry, *tmp; -+ struct kbase_device *kbdev; -+ -+ KBASE_DEBUG_ASSERT(kctx); -+ -+ kbdev = kctx->kbdev; -+ -+ KBASE_KTRACE_ADD_JM(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u); -+ -+ kbase_js_zap_context(kctx); -+ -+ mutex_lock(&kctx->jctx.lock); -+ -+ /* -+ * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are -+ * queued outside the job scheduler. -+ */ -+ -+ del_timer_sync(&kctx->soft_job_timeout); -+ list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { -+ katom = list_entry(entry, struct kbase_jd_atom, queue); -+ kbase_cancel_soft_job(katom); -+ } -+ -+ mutex_unlock(&kctx->jctx.lock); -+ -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ kbase_debug_job_fault_kctx_unblock(kctx); -+#endif -+ -+ kbase_jm_wait_for_zero_jobs(kctx); ++ CSTD_UNUSED(kbdev); +} + -+KBASE_EXPORT_TEST_API(kbase_jd_zap_context); -+ -+int kbase_jd_init(struct kbase_context *kctx) ++static inline int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, ++ u64 cores) +{ -+ int i; -+ int mali_err = 0; -+ struct priority_control_manager_device *pcm_device = NULL; -+ -+ KBASE_DEBUG_ASSERT(kctx); -+ pcm_device = kctx->kbdev->pcm_dev; -+ kctx->jctx.max_priority = KBASE_JS_ATOM_SCHED_PRIO_REALTIME; -+ -+ kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", -+ WQ_HIGHPRI | WQ_UNBOUND, 1); -+ if (kctx->jctx.job_done_wq == NULL) { -+ mali_err = -ENOMEM; -+ goto out1; -+ } -+ -+ for (i = 0; i < BASE_JD_ATOM_COUNT; i++) { -+ init_waitqueue_head(&kctx->jctx.atoms[i].completed); -+ -+ INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]); -+ INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]); -+ -+ /* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */ -+ kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID; -+ kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED; -+ -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ kctx->jctx.atoms[i].dma_fence.context = -+ dma_fence_context_alloc(1); -+ atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0); -+#endif -+ } -+ -+ for (i = 0; i < BASE_JD_RP_COUNT; i++) -+ kctx->jctx.renderpasses[i].state = KBASE_JD_RP_COMPLETE; -+ -+ mutex_init(&kctx->jctx.lock); -+ -+ init_waitqueue_head(&kctx->jctx.zero_jobs_wait); -+ -+ spin_lock_init(&kctx->jctx.tb_lock); -+ -+ kctx->jctx.job_nr = 0; -+ INIT_LIST_HEAD(&kctx->completed_jobs); -+ atomic_set(&kctx->work_count, 0); -+ -+ /* Check if there are platform rules for maximum priority */ -+ if (pcm_device) -+ kctx->jctx.max_priority = pcm_device->ops.pcm_scheduler_priority_check( -+ pcm_device, current, KBASE_JS_ATOM_SCHED_PRIO_REALTIME); -+ ++ CSTD_UNUSED(kbdev); ++ CSTD_UNUSED(cores); + return 0; ++} + -+ out1: -+ return mali_err; ++static inline bool kbase_dummy_job_wa_enabled(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++ return false; +} + -+KBASE_EXPORT_TEST_API(kbase_jd_init); ++#else + -+void kbase_jd_exit(struct kbase_context *kctx) -+{ -+ KBASE_DEBUG_ASSERT(kctx); ++int kbase_dummy_job_wa_load(struct kbase_device *kbdev); ++void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev); ++int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores); + -+ /* Work queue is emptied by this */ -+ destroy_workqueue(kctx->jctx.job_done_wq); ++static inline bool kbase_dummy_job_wa_enabled(struct kbase_device *kbdev) ++{ ++ return (kbdev->dummy_job_wa.ctx != NULL); +} + -+KBASE_EXPORT_TEST_API(kbase_jd_exit); -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.c ++#endif /* MALI_USE_CSF */ ++ ++#endif /* _KBASE_DUMMY_JOB_WORKAROUND_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.c new file mode 100644 -index 000000000..6196c0985 +index 000000000..e4cb71632 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.c -@@ -0,0 +1,199 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.c +@@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -220953,194 +224100,89 @@ index 000000000..6196c0985 + * + */ + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ -+#include ++#include "mali_kbase_dvfs_debugfs.h" +#include -+#include -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+#include -+#endif -+#include -+ -+struct kbase_jd_debugfs_depinfo { -+ u8 id; -+ char type; -+}; -+ -+static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, -+ struct seq_file *sfile) -+{ -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ struct kbase_sync_fence_info info; -+ int res; -+ -+ switch (atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { -+ case BASE_JD_REQ_SOFT_FENCE_TRIGGER: -+ res = kbase_sync_fence_out_info_get(atom, &info); -+ if (res == 0) -+ seq_printf(sfile, "Sa([%pK]%d) ", -+ info.fence, info.status); -+ break; -+ case BASE_JD_REQ_SOFT_FENCE_WAIT: -+ res = kbase_sync_fence_in_info_get(atom, &info); -+ if (res == 0) -+ seq_printf(sfile, "Wa([%pK]%d) ", -+ info.fence, info.status); -+ break; -+ default: -+ break; -+ } -+#endif /* CONFIG_SYNC_FILE */ -+} -+ -+static void kbasep_jd_debugfs_atom_deps( -+ struct kbase_jd_debugfs_depinfo *deps, -+ struct kbase_jd_atom *atom) -+{ -+ struct kbase_context *kctx = atom->kctx; -+ int i; ++#include + -+ for (i = 0; i < 2; i++) { -+ deps[i].id = (unsigned int)(atom->dep[i].atom ? -+ kbase_jd_atom_id(kctx, atom->dep[i].atom) : 0); ++#if IS_ENABLED(CONFIG_DEBUG_FS) + -+ switch (atom->dep[i].dep_type) { -+ case BASE_JD_DEP_TYPE_INVALID: -+ deps[i].type = ' '; -+ break; -+ case BASE_JD_DEP_TYPE_DATA: -+ deps[i].type = 'D'; -+ break; -+ case BASE_JD_DEP_TYPE_ORDER: -+ deps[i].type = '>'; -+ break; -+ default: -+ deps[i].type = '?'; -+ break; -+ } -+ } -+} +/** -+ * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file. -+ * @sfile: The debugfs entry -+ * @data: Data associated with the entry ++ * kbasep_dvfs_utilization_debugfs_show() - Print the DVFS utilization info + * -+ * This function is called to get the contents of the JD atoms debugfs file. -+ * This is a report of all atoms managed by kbase_jd_context.atoms ++ * @file: The seq_file for printing to ++ * @data: The debugfs dentry private data, a pointer to kbase_context + * -+ * Return: 0 if successfully prints data in debugfs entry file, failure -+ * otherwise ++ * Return: Negative error code or 0 on success. + */ -+static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) ++static int kbasep_dvfs_utilization_debugfs_show(struct seq_file *file, void *data) +{ -+ struct kbase_context *kctx = sfile->private; -+ struct kbase_jd_atom *atoms; -+ unsigned long irq_flags; -+ int i; -+ -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ -+ /* Print version */ -+ seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION); -+ -+ /* Print U/K API version */ -+ seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR, -+ BASE_UK_VERSION_MINOR); -+ -+ /* Print table heading */ -+ seq_puts(sfile, " ID, Core req, St, Predeps, Start time, Additional info...\n"); -+ -+ atoms = kctx->jctx.atoms; -+ /* General atom states */ -+ mutex_lock(&kctx->jctx.lock); -+ /* JS-related states */ -+ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); -+ for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) { -+ struct kbase_jd_atom *atom = &atoms[i]; -+ s64 start_timestamp = 0; -+ struct kbase_jd_debugfs_depinfo deps[2]; -+ -+ if (atom->status == KBASE_JD_ATOM_STATE_UNUSED) -+ continue; -+ -+ /* start_timestamp is cleared as soon as the atom leaves UNUSED state -+ * and set before a job is submitted to the h/w, a non-zero value means -+ * it is valid -+ */ -+ if (ktime_to_ns(atom->start_timestamp)) -+ start_timestamp = -+ ktime_to_ns(ktime_sub(ktime_get_raw(), atom->start_timestamp)); -+ -+ kbasep_jd_debugfs_atom_deps(deps, atom); -+ -+ seq_printf(sfile, -+ "%3u, %8x, %2u, %c%3u %c%3u, %20lld, ", -+ i, atom->core_req, atom->status, -+ deps[0].type, deps[0].id, -+ deps[1].type, deps[1].id, -+ start_timestamp); -+ -+ -+ kbase_jd_debugfs_fence_info(atom, sfile); ++ struct kbase_device *kbdev = file->private; + -+ seq_puts(sfile, "\n"); -+ } -+ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); -+ mutex_unlock(&kctx->jctx.lock); ++#if MALI_USE_CSF ++ seq_printf(file, "busy_time: %u idle_time: %u protm_time: %u\n", ++ kbdev->pm.backend.metrics.values.time_busy, ++ kbdev->pm.backend.metrics.values.time_idle, ++ kbdev->pm.backend.metrics.values.time_in_protm); ++#else ++ seq_printf(file, "busy_time: %u idle_time: %u\n", ++ kbdev->pm.backend.metrics.values.time_busy, ++ kbdev->pm.backend.metrics.values.time_idle); ++#endif + + return 0; +} + -+ -+/** -+ * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file -+ * @in: &struct inode pointer -+ * @file: &struct file pointer -+ * -+ * Return: file descriptor -+ */ -+static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file) ++static int kbasep_dvfs_utilization_debugfs_open(struct inode *in, ++ struct file *file) +{ -+ return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private); ++ return single_open(file, kbasep_dvfs_utilization_debugfs_show, ++ in->i_private); +} + -+static const struct file_operations kbasep_jd_debugfs_atoms_fops = { -+ .owner = THIS_MODULE, -+ .open = kbasep_jd_debugfs_atoms_open, ++static const struct file_operations kbasep_dvfs_utilization_debugfs_fops = { ++ .open = kbasep_dvfs_utilization_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + -+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx) ++void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev) +{ ++ struct dentry *file; + const mode_t mode = 0444; + -+ /* Caller already ensures this, but we keep the pattern for -+ * maintenance safety. -+ */ -+ if (WARN_ON(!kctx) || -+ WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) ++ if (WARN_ON(!kbdev || IS_ERR_OR_NULL(kbdev->mali_debugfs_directory))) + return; + -+ /* Expose all atoms */ -+ debugfs_create_file("atoms", mode, kctx->kctx_dentry, kctx, -+ &kbasep_jd_debugfs_atoms_fops); ++ file = debugfs_create_file("dvfs_utilization", mode, ++ kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_dvfs_utilization_debugfs_fops); ++ ++ if (IS_ERR_OR_NULL(file)) { ++ dev_warn(kbdev->dev, ++ "Unable to create dvfs debugfs entry"); ++ } ++} + ++#else ++/* ++ * Stub functions for when debugfs is disabled ++ */ ++void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev) ++{ +} + +#endif /* CONFIG_DEBUG_FS */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.h +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.h new file mode 100644 -index 000000000..8e6140c43 +index 000000000..8334db7cc --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.h -@@ -0,0 +1,43 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.h +@@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -221158,38 +224200,30 @@ index 000000000..8e6140c43 + * + */ + -+/** -+ * DOC: Header file for job dispatcher-related entries in debugfs -+ */ -+ -+#ifndef _KBASE_JD_DEBUGFS_H -+#define _KBASE_JD_DEBUGFS_H -+ -+#include -+ -+#define MALI_JD_DEBUGFS_VERSION 3 ++#ifndef _KBASE_DVFS_DEBUGFS_H_ ++#define _KBASE_DVFS_DEBUGFS_H_ + -+/* Forward declarations */ -+struct kbase_context; ++/* Forward declaration */ ++struct kbase_device; + +/** -+ * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system ++ * kbase_dvfs_status_debugfs_init() - Create a debugfs entry for DVFS queries + * -+ * @kctx: Pointer to kbase_context ++ * @kbdev: Pointer to the GPU device for which to create the debugfs entry + */ -+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx); ++void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev); + -+#endif /*_KBASE_JD_DEBUGFS_H*/ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jm.c b/drivers/gpu/arm/bifrost/mali_kbase_jm.c ++#endif /* _KBASE_DVFS_DEBUGFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_event.c b/drivers/gpu/arm/bifrost/mali_kbase_event.c new file mode 100644 -index 000000000..1ac5cd3ea +index 000000000..910c51170 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_jm.c -@@ -0,0 +1,153 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_event.c +@@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2016, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -221207,148 +224241,267 @@ index 000000000..1ac5cd3ea + * + */ + -+/* -+ * HW access job manager common APIs -+ */ -+ +#include -+#include "mali_kbase_hwaccess_jm.h" -+#include "mali_kbase_jm.h" ++#include ++#include ++#include + -+#if !MALI_USE_CSF -+/** -+ * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot -+ * @js on the active context. -+ * @kbdev: Device pointer -+ * @js: Job slot to run on -+ * @nr_jobs_to_submit: Number of jobs to attempt to submit -+ * -+ * Return: true if slot can still be submitted on, false if slot is now full. -+ */ -+static bool kbase_jm_next_job(struct kbase_device *kbdev, unsigned int js, int nr_jobs_to_submit) ++static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ -+ struct kbase_context *kctx; -+ int i; ++ struct base_jd_udata data; ++ struct kbase_device *kbdev; + -+ kctx = kbdev->hwaccess.active_kctx[js]; -+ dev_dbg(kbdev->dev, "Trying to run the next %d jobs in kctx %pK (s:%u)\n", -+ nr_jobs_to_submit, (void *)kctx, js); ++ lockdep_assert_held(&kctx->jctx.lock); + -+ if (!kctx) -+ return true; ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(katom != NULL); ++ KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); + -+ for (i = 0; i < nr_jobs_to_submit; i++) { -+ struct kbase_jd_atom *katom = kbase_js_pull(kctx, js); ++ kbdev = kctx->kbdev; ++ data = katom->udata; + -+ if (!katom) -+ return true; /* Context has no jobs on this slot */ ++ KBASE_TLSTREAM_TL_NRET_ATOM_CTX(kbdev, katom, kctx); ++ KBASE_TLSTREAM_TL_DEL_ATOM(kbdev, katom); + -+ kbase_backend_run_atom(kbdev, katom); -+ } ++ katom->status = KBASE_JD_ATOM_STATE_UNUSED; ++ dev_dbg(kbdev->dev, "Atom %pK status to unused\n", (void *)katom); ++ wake_up(&katom->completed); + -+ dev_dbg(kbdev->dev, "Slot ringbuffer should now be full (s:%u)\n", js); -+ return false; ++ return data; +} + -+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask) ++int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent) +{ -+ u32 ret_mask = 0; ++ struct kbase_jd_atom *atom; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ dev_dbg(kbdev->dev, "JM kick slot mask 0x%x\n", js_mask); ++ KBASE_DEBUG_ASSERT(ctx); + -+ while (js_mask) { -+ unsigned int js = ffs(js_mask) - 1; -+ int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js); ++ mutex_lock(&ctx->event_mutex); + -+ if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit)) -+ ret_mask |= (1 << js); ++ if (list_empty(&ctx->event_list)) { ++ if (!atomic_read(&ctx->event_closed)) { ++ mutex_unlock(&ctx->event_mutex); ++ return -1; ++ } + -+ js_mask &= ~(1 << js); ++ /* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */ ++ mutex_unlock(&ctx->event_mutex); ++ uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED; ++ memset(&uevent->udata, 0, sizeof(uevent->udata)); ++ dev_dbg(ctx->kbdev->dev, ++ "event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n", ++ BASE_JD_EVENT_DRV_TERMINATED); ++ return 0; + } + -+ dev_dbg(kbdev->dev, "Can still submit to mask 0x%x\n", ret_mask); -+ return ret_mask; ++ /* normal event processing */ ++ atomic_dec(&ctx->event_count); ++ atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]); ++ list_del(ctx->event_list.next); ++ ++ mutex_unlock(&ctx->event_mutex); ++ ++ dev_dbg(ctx->kbdev->dev, "event dequeuing %pK\n", (void *)atom); ++ uevent->event_code = atom->event_code; ++ ++ uevent->atom_number = (atom - ctx->jctx.atoms); ++ ++ if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) ++ kbase_jd_free_external_resources(atom); ++ ++ mutex_lock(&ctx->jctx.lock); ++ uevent->udata = kbase_event_process(ctx, atom); ++ mutex_unlock(&ctx->jctx.lock); ++ ++ return 0; +} + -+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask) ++KBASE_EXPORT_TEST_API(kbase_event_dequeue); ++ ++/** ++ * kbase_event_process_noreport_worker - Worker for processing atoms that do not ++ * return an event but do have external ++ * resources ++ * @data: Work structure ++ */ ++static void kbase_event_process_noreport_worker(struct work_struct *data) +{ -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, ++ work); ++ struct kbase_context *kctx = katom->kctx; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) ++ kbase_jd_free_external_resources(katom); + -+ if (!down_trylock(&js_devdata->schedule_sem)) { -+ kbase_jm_kick(kbdev, js_mask); -+ up(&js_devdata->schedule_sem); ++ mutex_lock(&kctx->jctx.lock); ++ kbase_event_process(kctx, katom); ++ mutex_unlock(&kctx->jctx.lock); ++} ++ ++/** ++ * kbase_event_process_noreport - Process atoms that do not return an event ++ * @kctx: Context pointer ++ * @katom: Atom to be processed ++ * ++ * Atoms that do not have external resources will be processed immediately. ++ * Atoms that do have external resources will be processed on a workqueue, in ++ * order to avoid locking issues. ++ */ ++static void kbase_event_process_noreport(struct kbase_context *kctx, ++ struct kbase_jd_atom *katom) ++{ ++ if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { ++ INIT_WORK(&katom->work, kbase_event_process_noreport_worker); ++ queue_work(kctx->event_workq, &katom->work); ++ } else { ++ kbase_event_process(kctx, katom); + } +} + -+void kbase_jm_try_kick_all(struct kbase_device *kbdev) ++/** ++ * kbase_event_coalesce - Move pending events to the main event list ++ * @kctx: Context pointer ++ * ++ * kctx->event_list and kctx->event_coalesce_count must be protected ++ * by a lock unless this is the last thread using them ++ * (and we're about to terminate the lock). ++ * ++ * Return: The number of pending events moved to the main event list ++ */ ++static int kbase_event_coalesce(struct kbase_context *kctx) +{ -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ const int event_count = kctx->event_coalesce_count; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ /* Join the list of pending events onto the tail of the main list ++ * and reset it ++ */ ++ list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list); ++ kctx->event_coalesce_count = 0; + -+ if (!down_trylock(&js_devdata->schedule_sem)) { -+ kbase_jm_kick_all(kbdev); -+ up(&js_devdata->schedule_sem); -+ } ++ /* Return the number of events moved */ ++ return event_count; +} + -+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) ++void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) +{ -+ unsigned int js; ++ struct kbase_device *kbdev = ctx->kbdev; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ dev_dbg(kbdev->dev, "Posting event for atom %pK\n", (void *)atom); + -+ for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { -+ if (kbdev->hwaccess.active_kctx[js] == kctx) { -+ dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%u)\n", (void *)kctx, -+ js); -+ kbdev->hwaccess.active_kctx[js] = NULL; ++ if (WARN_ON(atom->status != KBASE_JD_ATOM_STATE_COMPLETED)) { ++ dev_warn(kbdev->dev, ++ "%s: Atom %d (%pK) not completed (status %d)\n", ++ __func__, ++ kbase_jd_atom_id(atom->kctx, atom), ++ atom->kctx, ++ atom->status); ++ return; ++ } ++ ++ if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { ++ if (atom->event_code == BASE_JD_EVENT_DONE) { ++ dev_dbg(kbdev->dev, "Suppressing event (atom done)\n"); ++ kbase_event_process_noreport(ctx, atom); ++ return; + } + } -+} + -+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) { ++ dev_dbg(kbdev->dev, "Suppressing event (never)\n"); ++ kbase_event_process_noreport(ctx, atom); ++ return; ++ } ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, atom, TL_ATOM_STATE_POSTED); ++ if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) { ++ /* Don't report the event until other event(s) have completed */ ++ dev_dbg(kbdev->dev, "Deferring event (coalesced)\n"); ++ mutex_lock(&ctx->event_mutex); ++ list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list); ++ ++ctx->event_coalesce_count; ++ mutex_unlock(&ctx->event_mutex); ++ } else { ++ /* Report the event and any pending events now */ ++ int event_count = 1; + -+ dev_dbg(kbdev->dev, "Atom %pK is returning with event code 0x%x\n", -+ (void *)katom, katom->event_code); ++ mutex_lock(&ctx->event_mutex); ++ event_count += kbase_event_coalesce(ctx); ++ list_add_tail(&atom->dep_item[0], &ctx->event_list); ++ atomic_add(event_count, &ctx->event_count); ++ mutex_unlock(&ctx->event_mutex); ++ dev_dbg(kbdev->dev, "Reporting %d events\n", event_count); + -+ KBASE_KTRACE_ADD_JM(kbdev, JM_RETURN_ATOM_TO_JS, katom->kctx, katom, -+ katom->jc, katom->event_code); ++ kbase_event_wakeup(ctx); + -+ if (katom->event_code != BASE_JD_EVENT_STOPPED && -+ katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) { -+ return kbase_js_complete_atom(katom, NULL); ++ /* Post-completion latency */ ++ trace_sysgraph(SGR_POST, ctx->id, ++ kbase_jd_atom_id(ctx, atom)); + } ++} ++KBASE_EXPORT_TEST_API(kbase_event_post); + -+ kbase_js_unpull(katom->kctx, katom); ++void kbase_event_close(struct kbase_context *kctx) ++{ ++ mutex_lock(&kctx->event_mutex); ++ atomic_set(&kctx->event_closed, true); ++ mutex_unlock(&kctx->event_mutex); ++ kbase_event_wakeup(kctx); ++} + -+ return NULL; ++int kbase_event_init(struct kbase_context *kctx) ++{ ++ KBASE_DEBUG_ASSERT(kctx); ++ ++ INIT_LIST_HEAD(&kctx->event_list); ++ INIT_LIST_HEAD(&kctx->event_coalesce_list); ++ mutex_init(&kctx->event_mutex); ++ kctx->event_coalesce_count = 0; ++ kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1); ++ ++ if (kctx->event_workq == NULL) ++ return -EINVAL; ++ ++ return 0; +} + -+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom, ktime_t *end_timestamp) ++KBASE_EXPORT_TEST_API(kbase_event_init); ++ ++void kbase_event_cleanup(struct kbase_context *kctx) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ int event_count; + -+ return kbase_js_complete_atom(katom, end_timestamp); ++ KBASE_DEBUG_ASSERT(kctx); ++ KBASE_DEBUG_ASSERT(kctx->event_workq); ++ ++ flush_workqueue(kctx->event_workq); ++ destroy_workqueue(kctx->event_workq); ++ ++ /* We use kbase_event_dequeue to remove the remaining events as that ++ * deals with all the cleanup needed for the atoms. ++ * ++ * Note: use of kctx->event_list without a lock is safe because this must be the last ++ * thread using it (because we're about to terminate the lock) ++ */ ++ event_count = kbase_event_coalesce(kctx); ++ atomic_add(event_count, &kctx->event_count); ++ ++ while (!list_empty(&kctx->event_list)) { ++ struct base_jd_event_v2 event; ++ ++ kbase_event_dequeue(kctx, &event); ++ } +} -+#endif /* !MALI_USE_CSF */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_jm.h ++ ++KBASE_EXPORT_TEST_API(kbase_event_cleanup); +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.c b/drivers/gpu/arm/bifrost/mali_kbase_fence.c new file mode 100644 -index 000000000..eeafcb6b1 +index 000000000..b16b27659 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_jm.h -@@ -0,0 +1,117 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.c +@@ -0,0 +1,61 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2013-2014, 2016, 2019-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -221366,112 +224519,56 @@ index 000000000..eeafcb6b1 + * + */ + -+/* -+ * Job manager common APIs -+ */ -+ -+#ifndef _KBASE_JM_H_ -+#define _KBASE_JM_H_ ++#include ++#include ++#include ++#include ++#include + -+#if !MALI_USE_CSF -+/** -+ * kbase_jm_kick() - Indicate that there are jobs ready to run. -+ * @kbdev: Device pointer -+ * @js_mask: Mask of the job slots that can be pulled from. -+ * -+ * Caller must hold the hwaccess_lock and schedule_sem semaphore -+ * -+ * Return: Mask of the job slots that can still be submitted to. -+ */ -+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask); ++/* Spin lock protecting all Mali fences as fence->lock. */ ++static DEFINE_SPINLOCK(kbase_fence_lock); + -+/** -+ * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job -+ * slots. -+ * @kbdev: Device pointer -+ * -+ * Caller must hold the hwaccess_lock and schedule_sem semaphore -+ * -+ * Return: Mask of the job slots that can still be submitted to. -+ */ -+static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++struct fence * ++kbase_fence_out_new(struct kbase_jd_atom *katom) ++#else ++struct dma_fence * ++kbase_fence_out_new(struct kbase_jd_atom *katom) ++#endif +{ -+ return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); -+} ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence *fence; ++#else ++ struct dma_fence *fence; ++#endif + -+/** -+ * kbase_jm_try_kick - Attempt to call kbase_jm_kick -+ * @kbdev: Device pointer -+ * @js_mask: Mask of the job slots that can be pulled from -+ * Context: Caller must hold hwaccess_lock -+ * -+ * If schedule_sem can be immediately obtained then this function will call -+ * kbase_jm_kick() otherwise it will do nothing. -+ */ -+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask); ++ WARN_ON(katom->dma_fence.fence); + -+/** -+ * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all -+ * @kbdev: Device pointer -+ * Context: Caller must hold hwaccess_lock -+ * -+ * If schedule_sem can be immediately obtained then this function will call -+ * kbase_jm_kick_all() otherwise it will do nothing. -+ */ -+void kbase_jm_try_kick_all(struct kbase_device *kbdev); -+#endif /* !MALI_USE_CSF */ ++ fence = kzalloc(sizeof(*fence), GFP_KERNEL); ++ if (!fence) ++ return NULL; + -+#if !MALI_USE_CSF -+/** -+ * kbase_jm_idle_ctx() - Mark a context as idle. -+ * @kbdev: Device pointer -+ * @kctx: Context to mark as idle -+ * -+ * No more atoms will be pulled from this context until it is marked as active -+ * by kbase_js_use_ctx(). -+ * -+ * The context should have no atoms currently pulled from it -+ * (kbase_jsctx_atoms_pulled(kctx) == 0). -+ * -+ * Caller must hold the hwaccess_lock -+ */ -+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); ++ dma_fence_init(fence, ++ &kbase_fence_ops, ++ &kbase_fence_lock, ++ katom->dma_fence.context, ++ atomic_inc_return(&katom->dma_fence.seqno)); + -+/** -+ * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has -+ * been soft-stopped or will fail due to a -+ * dependency -+ * @kbdev: Device pointer -+ * @katom: Atom that has been stopped or will be failed -+ * -+ * Return: Atom that has now been unblocked and can now be run, or NULL if none -+ */ -+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom); ++ katom->dma_fence.fence = fence; + -+/** -+ * kbase_jm_complete() - Complete an atom -+ * @kbdev: Device pointer -+ * @katom: Atom that has completed -+ * @end_timestamp: Timestamp of atom completion -+ * -+ * Return: Atom that has now been unblocked and can now be run, or NULL if none -+ */ -+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom, ktime_t *end_timestamp); -+#endif /* !MALI_USE_CSF */ ++ return fence; ++} + -+#endif /* _KBASE_JM_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.c b/drivers/gpu/arm/bifrost/mali_kbase_js.c +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.h b/drivers/gpu/arm/bifrost/mali_kbase_fence.h new file mode 100644 -index 000000000..8ce09212a +index 000000000..f4507ac43 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_js.c -@@ -0,0 +1,4007 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.h +@@ -0,0 +1,259 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -221489,4501 +224586,5495 @@ index 000000000..8ce09212a + * + */ + -+/* -+ * Job Scheduler Implementation -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include "mali_kbase_jm.h" -+#include "mali_kbase_hwaccess_jm.h" -+#include -+#include ++#ifndef _KBASE_FENCE_H_ ++#define _KBASE_FENCE_H_ + +/* -+ * Private types ++ * mali_kbase_fence.[hc] has fence code used only by ++ * - CONFIG_SYNC_FILE - explicit fences + */ + -+/* Bitpattern indicating the result of releasing a context */ -+enum { -+ /* The context was descheduled - caller should try scheduling in a new -+ * one to keep the runpool full -+ */ -+ KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0), -+ /* Ctx attributes were changed - caller should try scheduling all -+ * contexts -+ */ -+ KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1) -+}; ++#if IS_ENABLED(CONFIG_SYNC_FILE) + -+typedef u32 kbasep_js_release_result; ++#include ++#include "mali_kbase_fence_defs.h" ++#include "mali_kbase.h" ++#include "mali_kbase_refcount_defs.h" + -+const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = { -+ KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */ -+ KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */ -+ KBASE_JS_ATOM_SCHED_PRIO_LOW, /* BASE_JD_PRIO_LOW */ -+ KBASE_JS_ATOM_SCHED_PRIO_REALTIME /* BASE_JD_PRIO_REALTIME */ -+}; ++#if MALI_USE_CSF ++/* Maximum number of characters in DMA fence timeline name. */ ++#define MAX_TIMELINE_NAME (32) + -+const base_jd_prio -+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = { -+ BASE_JD_PRIO_REALTIME, /* KBASE_JS_ATOM_SCHED_PRIO_REALTIME */ -+ BASE_JD_PRIO_HIGH, /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */ -+ BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */ -+ BASE_JD_PRIO_LOW /* KBASE_JS_ATOM_SCHED_PRIO_LOW */ ++/** ++ * struct kbase_kcpu_dma_fence_meta - Metadata structure for dma fence objects containing ++ * information about KCPU queue. One instance per KCPU ++ * queue. ++ * ++ * @refcount: Atomic value to keep track of number of references to an instance. ++ * An instance can outlive the KCPU queue itself. ++ * @kbdev: Pointer to Kbase device. ++ * @kctx_id: Kbase context ID. ++ * @timeline_name: String of timeline name for associated fence object. ++ */ ++struct kbase_kcpu_dma_fence_meta { ++ kbase_refcount_t refcount; ++ struct kbase_device *kbdev; ++ int kctx_id; ++ char timeline_name[MAX_TIMELINE_NAME]; +}; + -+ -+/* -+ * Private function prototypes ++/** ++ * struct kbase_kcpu_dma_fence - Structure which extends a dma fence object to include a ++ * reference to metadata containing more informaiton about it. ++ * ++ * @base: Fence object itself. ++ * @metadata: Pointer to metadata structure. + */ -+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( -+ struct kbase_device *kbdev, struct kbase_context *kctx, -+ struct kbasep_js_atom_retained_state *katom_retained_state); -+ -+static unsigned int kbase_js_get_slot(struct kbase_device *kbdev, struct kbase_jd_atom *katom); -+ -+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, -+ kbasep_js_ctx_job_cb *callback); -+ -+/* Helper for ktrace */ -+#if KBASE_KTRACE_ENABLE -+static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) -+{ -+ return atomic_read(&kctx->refcount); -+} -+#else /* KBASE_KTRACE_ENABLE */ -+static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) -+{ -+ CSTD_UNUSED(kctx); -+ return 0; -+} -+#endif /* KBASE_KTRACE_ENABLE */ ++struct kbase_kcpu_dma_fence { ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence base; ++#else ++ struct dma_fence base; ++#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ ++ struct kbase_kcpu_dma_fence_meta *metadata; ++}; ++#endif + -+/* -+ * Private functions -+ */ ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++extern const struct fence_ops kbase_fence_ops; ++#else ++extern const struct dma_fence_ops kbase_fence_ops; ++#endif + +/** -+ * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements -+ * @features: JSn_FEATURE register value -+ * -+ * Given a JSn_FEATURE register value returns the core requirements that match ++ * kbase_fence_out_new() - Creates a new output fence and puts it on the atom ++ * @katom: Atom to create an output fence for + * -+ * Return: Core requirement bit mask ++ * Return: A new fence object on success, NULL on failure. + */ -+static base_jd_core_req core_reqs_from_jsn_features(u16 features) -+{ -+ base_jd_core_req core_req = 0u; -+ -+ if ((features & JS_FEATURE_SET_VALUE_JOB) != 0) -+ core_req |= BASE_JD_REQ_V; -+ -+ if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0) -+ core_req |= BASE_JD_REQ_CF; -+ -+ if ((features & JS_FEATURE_COMPUTE_JOB) != 0) -+ core_req |= BASE_JD_REQ_CS; -+ -+ if ((features & JS_FEATURE_TILER_JOB) != 0) -+ core_req |= BASE_JD_REQ_T; -+ -+ if ((features & JS_FEATURE_FRAGMENT_JOB) != 0) -+ core_req |= BASE_JD_REQ_FS; -+ -+ return core_req; -+} -+ -+static void kbase_js_sync_timers(struct kbase_device *kbdev) -+{ -+ mutex_lock(&kbdev->js_data.runpool_mutex); -+ kbase_backend_ctx_count_changed(kbdev); -+ mutex_unlock(&kbdev->js_data.runpool_mutex); -+} ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++struct fence *kbase_fence_out_new(struct kbase_jd_atom *katom); ++#else ++struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom); ++#endif + ++#if IS_ENABLED(CONFIG_SYNC_FILE) +/** -+ * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms -+ * @kctx: Pointer to kbase context with ring buffer. -+ * @js: Job slot id to check. -+ * @prio: Priority to check. -+ * -+ * Return true if there are no atoms to pull. There may be running atoms in the -+ * ring buffer even if there are no atoms to pull. It is also possible for the -+ * ring buffer to be full (with running atoms) when this functions returns -+ * true. ++ * kbase_fence_fence_in_set() - Assign input fence to atom ++ * @katom: Atom to assign input fence to ++ * @fence: Input fence to assign to atom + * -+ * Return: true if there are no atoms to pull, false otherwise. ++ * This function will take ownership of one fence reference! + */ -+static inline bool jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, unsigned int js, int prio) -+{ -+ bool none_to_pull; -+ struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; -+ -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ -+ none_to_pull = RB_EMPTY_ROOT(&rb->runnable_tree); -+ -+ dev_dbg(kctx->kbdev->dev, "Slot %u (prio %d) is %spullable in kctx %pK\n", js, prio, -+ none_to_pull ? "not " : "", kctx); ++#define kbase_fence_fence_in_set(katom, fence) \ ++ do { \ ++ WARN_ON((katom)->dma_fence.fence_in); \ ++ (katom)->dma_fence.fence_in = fence; \ ++ } while (0) ++#endif + -+ return none_to_pull; -+} + ++#if !MALI_USE_CSF +/** -+ * jsctx_rb_none_to_pull(): - Check if all priority ring buffers have no -+ * pullable atoms -+ * @kctx: Pointer to kbase context with ring buffer. -+ * @js: Job slot id to check. -+ * -+ * Caller must hold hwaccess_lock ++ * kbase_fence_out_remove() - Removes the output fence from atom ++ * @katom: Atom to remove output fence for + * -+ * Return: true if the ring buffers for all priorities have no pullable atoms, -+ * false otherwise. ++ * This will also release the reference to this fence which the atom keeps + */ -+static inline bool jsctx_rb_none_to_pull(struct kbase_context *kctx, unsigned int js) ++static inline void kbase_fence_out_remove(struct kbase_jd_atom *katom) +{ -+ int prio; -+ -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ -+ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; -+ prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { -+ if (!jsctx_rb_none_to_pull_prio(kctx, js, prio)) -+ return false; ++ if (katom->dma_fence.fence) { ++ dma_fence_put(katom->dma_fence.fence); ++ katom->dma_fence.fence = NULL; + } -+ -+ return true; +} + ++#if IS_ENABLED(CONFIG_SYNC_FILE) +/** -+ * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue. -+ * @kctx: Pointer to kbase context with the queue. -+ * @js: Job slot id to iterate. -+ * @prio: Priority id to iterate. -+ * @callback: Function pointer to callback. -+ * -+ * Iterate over a queue and invoke @callback for each entry in the queue, and -+ * remove the entry from the queue. -+ * -+ * If entries are added to the queue while this is running those entries may, or -+ * may not be covered. To ensure that all entries in the buffer have been -+ * enumerated when this function returns jsctx->lock must be held when calling -+ * this function. ++ * kbase_fence_in_remove() - Removes the input fence from atom ++ * @katom: Atom to remove input fence for + * -+ * The HW access lock must always be held when calling this function. ++ * This will also release the reference to this fence which the atom keeps + */ -+static void jsctx_queue_foreach_prio(struct kbase_context *kctx, unsigned int js, int prio, -+ kbasep_js_ctx_job_cb *callback) ++static inline void kbase_fence_in_remove(struct kbase_jd_atom *katom) +{ -+ struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; -+ -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ -+ while (!RB_EMPTY_ROOT(&queue->runnable_tree)) { -+ struct rb_node *node = rb_first(&queue->runnable_tree); -+ struct kbase_jd_atom *entry = rb_entry(node, -+ struct kbase_jd_atom, runnable_tree_node); -+ -+ rb_erase(node, &queue->runnable_tree); -+ callback(kctx->kbdev, entry); -+ -+ /* Runnable end-of-renderpass atoms can also be in the linked -+ * list of atoms blocked on cross-slot dependencies. Remove them -+ * to avoid calling the callback twice. -+ */ -+ if (entry->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) { -+ WARN_ON(!(entry->core_req & -+ BASE_JD_REQ_END_RENDERPASS)); -+ dev_dbg(kctx->kbdev->dev, -+ "Del runnable atom %pK from X_DEP list\n", -+ (void *)entry); -+ -+ list_del(&entry->queue); -+ entry->atom_flags &= -+ ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; -+ } -+ } -+ -+ while (!list_empty(&queue->x_dep_head)) { -+ struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next, -+ struct kbase_jd_atom, queue); -+ -+ WARN_ON(!(entry->atom_flags & -+ KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); -+ dev_dbg(kctx->kbdev->dev, -+ "Del blocked atom %pK from X_DEP list\n", -+ (void *)entry); -+ -+ list_del(queue->x_dep_head.next); -+ entry->atom_flags &= -+ ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; -+ -+ callback(kctx->kbdev, entry); ++ if (katom->dma_fence.fence_in) { ++ dma_fence_put(katom->dma_fence.fence_in); ++ katom->dma_fence.fence_in = NULL; + } +} ++#endif + +/** -+ * jsctx_queue_foreach(): - Execute callback for each entry in every queue -+ * @kctx: Pointer to kbase context with queue. -+ * @js: Job slot id to iterate. -+ * @callback: Function pointer to callback. ++ * kbase_fence_out_is_ours() - Check if atom has a valid fence created by us ++ * @katom: Atom to check output fence for + * -+ * Iterate over all the different priorities, and for each call -+ * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback -+ * for each entry, and remove the entry from the queue. ++ * Return: true if fence exists and is valid, otherwise false + */ -+static inline void jsctx_queue_foreach(struct kbase_context *kctx, unsigned int js, -+ kbasep_js_ctx_job_cb *callback) ++static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom) +{ -+ int prio; -+ -+ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; -+ prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) -+ jsctx_queue_foreach_prio(kctx, js, prio, callback); ++ return katom->dma_fence.fence && ++ katom->dma_fence.fence->ops == &kbase_fence_ops; +} + +/** -+ * jsctx_rb_peek_prio(): - Check buffer and get next atom -+ * @kctx: Pointer to kbase context with ring buffer. -+ * @js: Job slot id to check. -+ * @prio: Priority id to check. -+ * -+ * Check the ring buffer for the specified @js and @prio and return a pointer to -+ * the next atom, unless the ring buffer is empty. ++ * kbase_fence_out_signal() - Signal output fence of atom ++ * @katom: Atom to signal output fence for ++ * @status: Status to signal with (0 for success, < 0 for error) + * -+ * Return: Pointer to next atom in buffer, or NULL if there is no atom. ++ * Return: 0 on success, < 0 on error + */ -+static inline struct kbase_jd_atom *jsctx_rb_peek_prio(struct kbase_context *kctx, unsigned int js, -+ int prio) ++static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom, ++ int status) +{ -+ struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; -+ struct rb_node *node; -+ -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ dev_dbg(kctx->kbdev->dev, "Peeking runnable tree of kctx %pK for prio %d (s:%u)\n", -+ (void *)kctx, prio, js); -+ -+ node = rb_first(&rb->runnable_tree); -+ if (!node) { -+ dev_dbg(kctx->kbdev->dev, "Tree is empty\n"); -+ return NULL; ++ if (status) { ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ ++ KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) ++ fence_set_error(katom->dma_fence.fence, status); ++#elif (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) ++ dma_fence_set_error(katom->dma_fence.fence, status); ++#else ++ katom->dma_fence.fence->status = status; ++#endif + } -+ -+ return rb_entry(node, struct kbase_jd_atom, runnable_tree_node); ++ return dma_fence_signal(katom->dma_fence.fence); +} + ++#if IS_ENABLED(CONFIG_SYNC_FILE) +/** -+ * jsctx_rb_peek(): - Check all priority buffers and get next atom -+ * @kctx: Pointer to kbase context with ring buffer. -+ * @js: Job slot id to check. -+ * -+ * Check the ring buffers for all priorities, starting from -+ * KBASE_JS_ATOM_SCHED_PRIO_REALTIME, for the specified @js and @prio and return a -+ * pointer to the next atom, unless all the priority's ring buffers are empty. ++ * kbase_fence_in_get() - Retrieve input fence for atom. ++ * @katom: Atom to get input fence from + * -+ * Caller must hold the hwaccess_lock. ++ * A ref will be taken for the fence, so use @kbase_fence_put() to release it + * -+ * Return: Pointer to next atom in buffer, or NULL if there is no atom. ++ * Return: The fence, or NULL if there is no input fence for atom + */ -+static inline struct kbase_jd_atom *jsctx_rb_peek(struct kbase_context *kctx, unsigned int js) -+{ -+ int prio; -+ -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ -+ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; -+ prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { -+ struct kbase_jd_atom *katom; -+ -+ katom = jsctx_rb_peek_prio(kctx, js, prio); -+ if (katom) -+ return katom; -+ } -+ -+ return NULL; -+} ++#define kbase_fence_in_get(katom) dma_fence_get((katom)->dma_fence.fence_in) ++#endif + +/** -+ * jsctx_rb_pull(): - Mark atom in list as running -+ * @kctx: Pointer to kbase context with ring buffer. -+ * @katom: Pointer to katom to pull. ++ * kbase_fence_out_get() - Retrieve output fence for atom. ++ * @katom: Atom to get output fence from + * -+ * Mark an atom previously obtained from jsctx_rb_peek() as running. ++ * A ref will be taken for the fence, so use @kbase_fence_put() to release it + * -+ * @katom must currently be at the head of the ring buffer. ++ * Return: The fence, or NULL if there is no output fence for atom + */ -+static inline void -+jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) -+{ -+ int prio = katom->sched_priority; -+ unsigned int js = katom->slot_nr; -+ struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; ++#define kbase_fence_out_get(katom) dma_fence_get((katom)->dma_fence.fence) + -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++#endif /* !MALI_USE_CSF */ + -+ dev_dbg(kctx->kbdev->dev, "Erasing atom %pK from runnable tree of kctx %pK\n", -+ (void *)katom, (void *)kctx); ++/** ++ * kbase_fence_get() - Retrieve fence for a KCPUQ fence command. ++ * @fence_info: KCPUQ fence command ++ * ++ * A ref will be taken for the fence, so use @kbase_fence_put() to release it ++ * ++ * Return: The fence, or NULL if there is no fence for KCPUQ fence command ++ */ ++#define kbase_fence_get(fence_info) dma_fence_get((fence_info)->fence) + -+ /* Atoms must be pulled in the correct order. */ -+ WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio)); ++#if MALI_USE_CSF ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct fence *fence) ++#else ++static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct dma_fence *fence) ++#endif ++{ ++ if (fence->ops == &kbase_fence_ops) ++ return (struct kbase_kcpu_dma_fence *)fence; + -+ rb_erase(&katom->runnable_tree_node, &rb->runnable_tree); ++ return NULL; +} + -+static void -+jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) ++static inline void kbase_kcpu_dma_fence_meta_put(struct kbase_kcpu_dma_fence_meta *metadata) +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ int prio = katom->sched_priority; -+ unsigned int js = katom->slot_nr; -+ struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; -+ struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL; -+ -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ -+ dev_dbg(kbdev->dev, "Adding atom %pK to runnable tree of kctx %pK (s:%u)\n", (void *)katom, -+ (void *)kctx, js); -+ -+ while (*new) { -+ struct kbase_jd_atom *entry = container_of(*new, -+ struct kbase_jd_atom, runnable_tree_node); -+ -+ parent = *new; -+ if (kbase_jd_atom_is_younger(katom, entry)) -+ new = &((*new)->rb_left); -+ else -+ new = &((*new)->rb_right); ++ if (kbase_refcount_dec_and_test(&metadata->refcount)) { ++ atomic_dec(&metadata->kbdev->live_fence_metadata); ++ kfree(metadata); + } ++} + -+ /* Add new node and rebalance tree. */ -+ rb_link_node(&katom->runnable_tree_node, parent, new); -+ rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree); ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++static inline void kbase_kcpu_dma_fence_put(struct fence *fence) ++#else ++static inline void kbase_kcpu_dma_fence_put(struct dma_fence *fence) ++#endif ++{ ++ struct kbase_kcpu_dma_fence *kcpu_fence = kbase_kcpu_dma_fence_get(fence); + -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_READY); ++ if (kcpu_fence) ++ kbase_kcpu_dma_fence_meta_put(kcpu_fence->metadata); +} ++#endif /* MALI_USE_CSF */ + +/** -+ * jsctx_rb_unpull(): - Undo marking of atom in list as running -+ * @kctx: Pointer to kbase context with ring buffer. -+ * @katom: Pointer to katom to unpull. -+ * -+ * Undo jsctx_rb_pull() and put @katom back in the queue. -+ * -+ * jsctx_rb_unpull() must be called on atoms in the same order the atoms were -+ * pulled. ++ * kbase_fence_put() - Releases a reference to a fence ++ * @fence: Fence to release reference for. + */ -+static inline void -+jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++static inline void kbase_fence_put(struct fence *fence) ++#else ++static inline void kbase_fence_put(struct dma_fence *fence) ++#endif +{ -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ -+ KBASE_KTRACE_ADD_JM(kctx->kbdev, JS_UNPULL_JOB, kctx, katom, katom->jc, -+ 0u); -+ -+ jsctx_tree_add(kctx, katom); ++ dma_fence_put(fence); +} + -+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, unsigned int js, bool is_scheduled); -+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, -+ struct kbase_context *kctx, unsigned int js); -+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, -+ struct kbase_context *kctx, unsigned int js); -+ -+typedef bool(katom_ordering_func)(const struct kbase_jd_atom *, -+ const struct kbase_jd_atom *); -+ -+bool kbase_js_atom_runs_before(struct kbase_device *kbdev, -+ const struct kbase_jd_atom *katom_a, -+ const struct kbase_jd_atom *katom_b, -+ const kbase_atom_ordering_flag_t order_flags) -+{ -+ struct kbase_context *kctx_a = katom_a->kctx; -+ struct kbase_context *kctx_b = katom_b->kctx; -+ katom_ordering_func *samectxatomprio_ordering_func = -+ kbase_jd_atom_is_younger; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ if (order_flags & KBASE_ATOM_ORDERING_FLAG_SEQNR) -+ samectxatomprio_ordering_func = kbase_jd_atom_is_earlier; -+ -+ /* It only makes sense to make this test for atoms on the same slot */ -+ WARN_ON(katom_a->slot_nr != katom_b->slot_nr); -+ -+ if (kbdev->js_ctx_scheduling_mode == -+ KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE) { -+ /* In local priority mode, querying either way around for "a -+ * should run before b" and "b should run before a" should -+ * always be false when they're from different contexts -+ */ -+ if (kctx_a != kctx_b) -+ return false; -+ } else { -+ /* In system priority mode, ordering is done first strictly by -+ * context priority, even when katom_b might be lower priority -+ * than katom_a. This is due to scheduling of contexts in order -+ * of highest priority first, regardless of whether the atoms -+ * for a particular slot from such contexts have the highest -+ * priority or not. -+ */ -+ if (kctx_a != kctx_b) { -+ if (kctx_a->priority < kctx_b->priority) -+ return true; -+ if (kctx_a->priority > kctx_b->priority) -+ return false; -+ } -+ } ++#endif /* IS_ENABLED(CONFIG_SYNC_FILE) */ + -+ /* For same contexts/contexts with the same context priority (in system -+ * priority mode), ordering is next done by atom priority -+ */ -+ if (katom_a->sched_priority < katom_b->sched_priority) -+ return true; -+ if (katom_a->sched_priority > katom_b->sched_priority) -+ return false; -+ /* For atoms of same priority on the same kctx, they are -+ * ordered by seq_nr/age (dependent on caller) -+ */ -+ if (kctx_a == kctx_b && samectxatomprio_ordering_func(katom_a, katom_b)) -+ return true; ++#endif /* _KBASE_FENCE_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h +new file mode 100644 +index 000000000..7a150bdf2 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h +@@ -0,0 +1,63 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2010-2018, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ return false; -+} ++#ifndef _KBASE_FENCE_DEFS_H_ ++#define _KBASE_FENCE_DEFS_H_ + +/* -+ * Functions private to KBase ('Protected' functions) ++ * There was a big rename in the 4.10 kernel (fence* -> dma_fence*) ++ * This file hides the compatibility issues with this for the rest the driver + */ -+int kbasep_js_devdata_init(struct kbase_device * const kbdev) -+{ -+ struct kbasep_js_device_data *jsdd; -+ int i, j; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++#include + -+ jsdd = &kbdev->js_data; ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ /* Soft-stop will be disabled on a single context by default unless -+ * softstop_always is set -+ */ -+ jsdd->softstop_always = false; -+#endif /* CONFIG_MALI_BIFROST_DEBUG */ -+ jsdd->nr_all_contexts_running = 0; -+ jsdd->nr_user_contexts_running = 0; -+ jsdd->nr_contexts_pullable = 0; -+ atomic_set(&jsdd->nr_contexts_runnable, 0); -+ /* No ctx allowed to submit */ -+ jsdd->runpool_irq.submit_allowed = 0u; -+ memset(jsdd->runpool_irq.ctx_attr_ref_count, 0, -+ sizeof(jsdd->runpool_irq.ctx_attr_ref_count)); -+ memset(jsdd->runpool_irq.slot_affinities, 0, -+ sizeof(jsdd->runpool_irq.slot_affinities)); -+ memset(jsdd->runpool_irq.slot_affinity_refcount, 0, -+ sizeof(jsdd->runpool_irq.slot_affinity_refcount)); -+ INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list); ++#include + -+ /* Config attributes */ -+ jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS; -+ jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS; -+ jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL; -+ jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS; -+ jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL; -+ jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING; -+ jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS; -+ jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL; ++#define dma_fence_context_alloc(a) fence_context_alloc(a) ++#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e) ++#define dma_fence_get(a) fence_get(a) ++#define dma_fence_put(a) fence_put(a) ++#define dma_fence_signal(a) fence_signal(a) ++#define dma_fence_is_signaled(a) fence_is_signaled(a) ++#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c) ++#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b) + -+ jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING; -+ jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS; -+ atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT); -+ jsdd->js_free_wait_time_ms = kbase_get_timeout_ms(kbdev, JM_DEFAULT_JS_FREE_TIMEOUT); ++#if (KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) ++#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0) ++#else ++#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0) ++#endif + -+ dev_dbg(kbdev->dev, "JS Config Attribs: "); -+ dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u", -+ jsdd->scheduling_period_ns); -+ dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u", -+ jsdd->soft_stop_ticks); -+ dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u", -+ jsdd->soft_stop_ticks_cl); -+ dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u", -+ jsdd->hard_stop_ticks_ss); -+ dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u", -+ jsdd->hard_stop_ticks_cl); -+ dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u", -+ jsdd->hard_stop_ticks_dumping); -+ dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u", -+ jsdd->gpu_reset_ticks_ss); -+ dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u", -+ jsdd->gpu_reset_ticks_cl); -+ dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u", -+ jsdd->gpu_reset_ticks_dumping); -+ dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u", -+ jsdd->ctx_timeslice_ns); -+ dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i", -+ atomic_read(&jsdd->soft_job_timeout_ms)); -+ dev_dbg(kbdev->dev, "\tjs_free_wait_time_ms:%u", jsdd->js_free_wait_time_ms); ++#else + -+ if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss && -+ jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss && -+ jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping && -+ jsdd->hard_stop_ticks_dumping < -+ jsdd->gpu_reset_ticks_dumping)) { -+ dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n"); -+ return -EINVAL; -+ } ++#include + -+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS -+ dev_dbg(kbdev->dev, "Job Scheduling Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.", -+ jsdd->soft_stop_ticks, -+ jsdd->scheduling_period_ns); -+#endif -+#if KBASE_DISABLE_SCHEDULING_HARD_STOPS -+ dev_dbg(kbdev->dev, "Job Scheduling Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.", -+ jsdd->hard_stop_ticks_ss, -+ jsdd->hard_stop_ticks_dumping, -+ jsdd->scheduling_period_ns); -+#endif -+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS -+ dev_dbg(kbdev->dev, "Note: The JS tick timer (if coded) will still be run, but do nothing."); ++#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) ++#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \ ++ (a)->status ?: 1 \ ++ : 0) +#endif + -+ for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) -+ jsdd->js_reqs[i] = core_reqs_from_jsn_features( -+ kbdev->gpu_props.props.raw_props.js_features[i]); -+ -+ /* On error, we could continue on: providing none of the below resources -+ * rely on the ones above -+ */ -+ -+ mutex_init(&jsdd->runpool_mutex); -+ mutex_init(&jsdd->queue_mutex); -+ sema_init(&jsdd->schedule_sem, 1); ++#endif /* < 4.10.0 */ + -+ for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) { -+ for (j = KBASE_JS_ATOM_SCHED_PRIO_FIRST; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) { -+ INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i][j]); -+ INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i][j]); -+ } -+ } ++#endif /* _KBASE_FENCE_DEFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c b/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c +new file mode 100644 +index 000000000..25b4c9c03 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c +@@ -0,0 +1,111 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ return 0; -+} ++#include ++#include ++#include ++#include + -+void kbasep_js_devdata_halt(struct kbase_device *kbdev) ++static const char * ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++kbase_fence_get_driver_name(struct fence *fence) ++#else ++kbase_fence_get_driver_name(struct dma_fence *fence) ++#endif +{ -+ CSTD_UNUSED(kbdev); ++ return kbase_drv_name; +} + -+void kbasep_js_devdata_term(struct kbase_device *kbdev) ++static const char * ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++kbase_fence_get_timeline_name(struct fence *fence) ++#else ++kbase_fence_get_timeline_name(struct dma_fence *fence) ++#endif +{ -+ struct kbasep_js_device_data *js_devdata; -+ s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, }; -+ CSTD_UNUSED(js_devdata); -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ -+ js_devdata = &kbdev->js_data; ++#if MALI_USE_CSF ++ struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence; + -+ /* The caller must de-register all contexts before calling this -+ */ -+ KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0); -+ KBASE_DEBUG_ASSERT(memcmp( -+ js_devdata->runpool_irq.ctx_attr_ref_count, -+ zero_ctx_attr_ref_count, -+ sizeof(zero_ctx_attr_ref_count)) == 0); -+ CSTD_UNUSED(zero_ctx_attr_ref_count); ++ return kcpu_fence->metadata->timeline_name; ++#else ++ return kbase_timeline_name; ++#endif /* MALI_USE_CSF */ +} + -+int kbasep_js_kctx_init(struct kbase_context *const kctx) ++static bool ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++kbase_fence_enable_signaling(struct fence *fence) ++#else ++kbase_fence_enable_signaling(struct dma_fence *fence) ++#endif +{ -+ struct kbasep_js_kctx_info *js_kctx_info; -+ int i, j; -+ CSTD_UNUSED(js_kctx_info); ++ return true; ++} + -+ KBASE_DEBUG_ASSERT(kctx != NULL); ++static void ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++kbase_fence_fence_value_str(struct fence *fence, char *str, int size) ++#else ++kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size) ++#endif ++{ ++#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) ++ const char *format = "%u"; ++#else ++ const char *format = "%llu"; ++#endif ++ if (unlikely(!scnprintf(str, size, format, fence->seqno))) ++ pr_err("Fail to encode fence seqno to string"); ++} + -+ kbase_ctx_sched_init_ctx(kctx); ++#if MALI_USE_CSF ++static void ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++kbase_fence_release(struct fence *fence) ++#else ++kbase_fence_release(struct dma_fence *fence) ++#endif ++{ ++ struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence; + -+ for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) -+ INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]); ++ kbase_kcpu_dma_fence_meta_put(kcpu_fence->metadata); ++ kfree(kcpu_fence); ++} ++#endif + -+ js_kctx_info = &kctx->jctx.sched_info; ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++extern const struct fence_ops kbase_fence_ops; /* silence checker warning */ ++const struct fence_ops kbase_fence_ops = { .wait = fence_default_wait, ++#else ++extern const struct dma_fence_ops kbase_fence_ops; /* silence checker warning */ ++const struct dma_fence_ops kbase_fence_ops = { .wait = dma_fence_default_wait, ++#endif ++ .get_driver_name = kbase_fence_get_driver_name, ++ .get_timeline_name = kbase_fence_get_timeline_name, ++ .enable_signaling = kbase_fence_enable_signaling, ++#if MALI_USE_CSF ++ .fence_value_str = kbase_fence_fence_value_str, ++ .release = kbase_fence_release ++#else ++ .fence_value_str = kbase_fence_fence_value_str ++#endif ++}; ++KBASE_EXPORT_TEST_API(kbase_fence_ops); +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator.h b/drivers/gpu/arm/bifrost/mali_kbase_gator.h +new file mode 100644 +index 000000000..dd7df8746 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_gator.h +@@ -0,0 +1,52 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ kctx->slots_pullable = 0; -+ js_kctx_info->ctx.nr_jobs = 0; -+ kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); -+ kbase_ctx_flag_clear(kctx, KCTX_DYING); -+ memset(js_kctx_info->ctx.ctx_attr_ref_count, 0, -+ sizeof(js_kctx_info->ctx.ctx_attr_ref_count)); ++/* NB taken from gator */ ++/* ++ * List of possible actions to be controlled by DS-5 Streamline. ++ * The following numbers are used by gator to control the frame buffer dumping ++ * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because ++ * they are unknown inside gator. ++ */ + -+ /* Initially, the context is disabled from submission until the create -+ * flags are set -+ */ -+ kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED); ++#ifndef _KBASE_GATOR_H_ ++#define _KBASE_GATOR_H_ + -+ /* On error, we could continue on: providing none of the below resources -+ * rely on the ones above -+ */ -+ mutex_init(&js_kctx_info->ctx.jsctx_mutex); ++#include + -+ init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait); ++#define GATOR_JOB_SLOT_START 1 ++#define GATOR_JOB_SLOT_STOP 2 ++#define GATOR_JOB_SLOT_SOFT_STOPPED 3 + -+ for (i = KBASE_JS_ATOM_SCHED_PRIO_FIRST; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { -+ for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) { -+ INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head); -+ kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT; -+ } -+ } ++#ifdef CONFIG_MALI_BIFROST_GATOR_SUPPORT + -+ return 0; -+} ++#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16)) + -+void kbasep_js_kctx_term(struct kbase_context *kctx) -+{ -+ struct kbase_device *kbdev; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ unsigned int js; -+ bool update_ctx_count = false; -+ unsigned long flags; -+ CSTD_UNUSED(js_kctx_info); ++struct kbase_context; + -+ KBASE_DEBUG_ASSERT(kctx != NULL); ++void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, u8 atom_id); ++void kbase_trace_mali_pm_status(u32 dev_id, u32 event, u64 value); ++void kbase_trace_mali_page_fault_insert_pages(u32 dev_id, int event, u32 value); ++void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long event); + -+ kbdev = kctx->kbdev; -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++#endif /* CONFIG_MALI_BIFROST_GATOR_SUPPORT */ + -+ js_kctx_info = &kctx->jctx.sched_info; ++#endif /* _KBASE_GATOR_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c +new file mode 100644 +index 000000000..bf5f259a0 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c +@@ -0,0 +1,112 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2012-2017, 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ /* The caller must de-register all jobs before calling this */ -+ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); -+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0); ++#include ++#include + -+ mutex_lock(&kbdev->js_data.queue_mutex); -+ mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++/** ++ * kbasep_gpu_memory_seq_show - Show callback for the @c gpu_memory debugfs file ++ * @sfile: The debugfs entry ++ * @data: Data associated with the entry ++ * ++ * This function is called to get the contents of the @c gpu_memory debugfs ++ * file. This is a report of current gpu memory usage. ++ * ++ * Return: ++ * * 0 if successfully prints data in debugfs entry file ++ * * -1 if it encountered an error ++ */ + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) -+ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) ++{ ++ struct list_head *entry; ++ const struct list_head *kbdev_list; + -+ if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) { -+ WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0); -+ atomic_dec(&kbdev->js_data.nr_contexts_runnable); -+ update_ctx_count = true; -+ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); -+ } ++ kbdev_list = kbase_device_get_list(); ++ list_for_each(entry, kbdev_list) { ++ struct kbase_device *kbdev = NULL; ++ struct kbase_context *kctx; + -+ mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); -+ mutex_unlock(&kbdev->js_data.queue_mutex); ++ kbdev = list_entry(entry, struct kbase_device, entry); ++ /* output the total memory usage and cap for this device */ ++ seq_printf(sfile, " \n"); ++ seq_printf(sfile, "%-16s %10u\n", ++ kbdev->devname, ++ atomic_read(&(kbdev->memdev.used_pages))); ++ mutex_lock(&kbdev->kctx_list_lock); ++ seq_printf(sfile, " \n"); ++ list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { ++ struct pid *pid_struct; ++ struct task_struct *task; + -+ if (update_ctx_count) { -+ mutex_lock(&kbdev->js_data.runpool_mutex); -+ kbase_backend_ctx_count_changed(kbdev); -+ mutex_unlock(&kbdev->js_data.runpool_mutex); ++ rcu_read_lock(); ++ pid_struct = find_get_pid(kctx->tgid); ++ task = pid_task(pid_struct, PIDTYPE_PID); ++ /* output the memory usage and cap for each kctx ++ * opened on this device ++ */ ++ seq_printf(sfile, " %s-0x%pK %-20s %-10d %10u\n", ++ "kctx", ++ kctx, ++ task ? task->comm : "[null comm]", ++ kctx->tgid, ++ atomic_read(&(kctx->used_pages))); ++ put_pid(pid_struct); ++ rcu_read_unlock(); ++ } ++ mutex_unlock(&kbdev->kctx_list_lock); + } -+ -+ kbase_ctx_sched_remove_ctx(kctx); ++ kbase_device_put_list(kbdev_list); ++ return 0; +} + +/* -+ * Priority blocking management functions ++ * File operations related to debugfs entry for gpu_memory + */ -+ -+/* Should not normally use directly - use kbase_jsctx_slot_atom_pulled_dec() instead */ -+static void kbase_jsctx_slot_prio_blocked_clear(struct kbase_context *kctx, unsigned int js, -+ int sched_prio) ++static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file) +{ -+ struct kbase_jsctx_slot_tracking *slot_tracking = -+ &kctx->slot_tracking[js]; -+ -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ -+ slot_tracking->blocked &= ~(((kbase_js_prio_bitmap_t)1) << sched_prio); -+ KBASE_KTRACE_ADD_JM_SLOT_INFO(kctx->kbdev, JS_SLOT_PRIO_UNBLOCKED, kctx, -+ NULL, 0, js, (unsigned int)sched_prio); ++ return single_open(file, kbasep_gpu_memory_seq_show, NULL); +} + -+static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, unsigned int js) -+{ -+ return atomic_read(&kctx->slot_tracking[js].atoms_pulled); -+} ++static const struct file_operations kbasep_gpu_memory_debugfs_fops = { ++ .owner = THIS_MODULE, ++ .open = kbasep_gpu_memory_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + +/* -+ * A priority level on a slot is blocked when: -+ * - that priority level is blocked -+ * - or, any higher priority level is blocked ++ * Initialize debugfs entry for gpu_memory + */ -+static bool kbase_jsctx_slot_prio_is_blocked(struct kbase_context *kctx, unsigned int js, -+ int sched_prio) ++void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) +{ -+ struct kbase_jsctx_slot_tracking *slot_tracking = -+ &kctx->slot_tracking[js]; -+ kbase_js_prio_bitmap_t prio_bit, higher_prios_mask; -+ -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ -+ /* done in two separate shifts to prevent future undefined behavior -+ * should the number of priority levels == (bit width of the type) -+ */ -+ prio_bit = (((kbase_js_prio_bitmap_t)1) << sched_prio); -+ /* all bits of sched_prio or higher, with sched_prio = 0 being the -+ * highest priority -+ */ -+ higher_prios_mask = (prio_bit << 1) - 1u; -+ return (slot_tracking->blocked & higher_prios_mask) != 0u; ++ debugfs_create_file("gpu_memory", 0444, ++ kbdev->mali_debugfs_directory, NULL, ++ &kbasep_gpu_memory_debugfs_fops); +} -+ -+/** -+ * kbase_jsctx_slot_atom_pulled_inc - Increase counts of atoms that have being -+ * pulled for a slot from a ctx, based on -+ * this atom -+ * @kctx: kbase context -+ * @katom: atom pulled ++#else ++/* ++ * Stub functions for when debugfs is disabled ++ */ ++void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) {} ++#endif +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.h +new file mode 100644 +index 000000000..6d5423f37 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.h +@@ -0,0 +1,50 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * Manages counts of atoms pulled (including per-priority-level counts), for -+ * later determining when a ctx can become unblocked on a slot. ++ * (C) COPYRIGHT 2012-2014, 2016, 2020-2021 ARM Limited. All rights reserved. + * -+ * Once a slot has been blocked at @katom's priority level, it should not be -+ * pulled from, hence this function should not be called in that case. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * The return value is to aid tracking of when @kctx becomes runnable. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: new total count of atoms pulled from all slots on @kctx + */ -+static int kbase_jsctx_slot_atom_pulled_inc(struct kbase_context *kctx, -+ const struct kbase_jd_atom *katom) -+{ -+ unsigned int js = katom->slot_nr; -+ int sched_prio = katom->sched_priority; -+ struct kbase_jsctx_slot_tracking *slot_tracking = -+ &kctx->slot_tracking[js]; -+ int nr_atoms_pulled; + -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++/** ++ * DOC: Header file for gpu_memory entry in debugfs ++ * ++ */ + -+ WARN(kbase_jsctx_slot_prio_is_blocked(kctx, js, sched_prio), -+ "Should not have pulled atoms for slot %u from a context that is blocked at priority %d or higher", -+ js, sched_prio); ++#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H ++#define _KBASE_GPU_MEMORY_DEBUGFS_H + -+ nr_atoms_pulled = atomic_inc_return(&kctx->atoms_pulled_all_slots); -+ atomic_inc(&slot_tracking->atoms_pulled); -+ slot_tracking->atoms_pulled_pri[sched_prio]++; ++#include ++#include + -+ return nr_atoms_pulled; -+} ++/* kbase_io_history_add - add new entry to the register access history ++ * ++ * @h: Pointer to the history data structure ++ * @addr: Register address ++ * @value: The value that is either read from or written to the register ++ * @write: 1 if it's a register write, 0 if it's a read ++ */ ++void kbase_io_history_add(struct kbase_io_history *h, void __iomem const *addr, ++ u32 value, u8 write); + +/** -+ * kbase_jsctx_slot_atom_pulled_dec- Decrease counts of atoms that have being -+ * pulled for a slot from a ctx, and -+ * re-evaluate whether a context is blocked -+ * on this slot -+ * @kctx: kbase context -+ * @katom: atom that has just been removed from a job slot ++ * kbasep_gpu_memory_debugfs_init - Initialize gpu_memory debugfs entry + * -+ * @kctx can become unblocked on a slot for a priority level when it no longer -+ * has any pulled atoms at that priority level on that slot, and all higher -+ * (numerically lower) priority levels are also unblocked @kctx on that -+ * slot. The latter condition is to retain priority ordering within @kctx. ++ * @kbdev: Device pointer ++ */ ++void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev); ++ ++#endif /*_KBASE_GPU_MEMORY_DEBUGFS_H*/ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c +new file mode 100644 +index 000000000..7a7d17ea5 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c +@@ -0,0 +1,888 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: true if the slot was previously blocked but has now become unblocked -+ * at @katom's priority level, false otherwise. + */ -+static bool kbase_jsctx_slot_atom_pulled_dec(struct kbase_context *kctx, -+ const struct kbase_jd_atom *katom) -+{ -+ unsigned int js = katom->slot_nr; -+ int sched_prio = katom->sched_priority; -+ int atoms_pulled_pri; -+ struct kbase_jsctx_slot_tracking *slot_tracking = -+ &kctx->slot_tracking[js]; -+ bool slot_prio_became_unblocked = false; + -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++/* ++ * Base kernel property query APIs ++ */ + -+ atomic_dec(&kctx->atoms_pulled_all_slots); -+ atomic_dec(&slot_tracking->atoms_pulled); ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ atoms_pulled_pri = --(slot_tracking->atoms_pulled_pri[sched_prio]); + -+ /* We can safely clear this priority level's blocked status even if -+ * higher priority levels are still blocked: a subsequent query to -+ * kbase_jsctx_slot_prio_is_blocked() will still return true -+ */ -+ if (!atoms_pulled_pri && -+ kbase_jsctx_slot_prio_is_blocked(kctx, js, sched_prio)) { -+ kbase_jsctx_slot_prio_blocked_clear(kctx, js, sched_prio); ++static void kbase_gpuprops_construct_coherent_groups( ++ struct base_gpu_props * const props) ++{ ++ struct mali_base_gpu_coherent_group *current_group; ++ u64 group_present; ++ u64 group_mask; ++ u64 first_set, first_set_prev; ++ u32 num_groups = 0; + -+ if (!kbase_jsctx_slot_prio_is_blocked(kctx, js, sched_prio)) -+ slot_prio_became_unblocked = true; -+ } ++ KBASE_DEBUG_ASSERT(props != NULL); + -+ if (slot_prio_became_unblocked) -+ KBASE_KTRACE_ADD_JM_SLOT_INFO(kctx->kbdev, -+ JS_SLOT_PRIO_AND_HIGHER_UNBLOCKED, -+ kctx, katom, katom->jc, js, -+ (unsigned int)sched_prio); ++ props->coherency_info.coherency = props->raw_props.mem_features; ++ props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present); + -+ return slot_prio_became_unblocked; -+} ++ if (props->coherency_info.coherency & GROUPS_L2_COHERENT) { ++ /* Group is l2 coherent */ ++ group_present = props->raw_props.l2_present; ++ } else { ++ /* Group is l1 coherent */ ++ group_present = props->raw_props.shader_present; ++ } + -+/** -+ * kbase_js_ctx_list_add_pullable_nolock - Variant of -+ * kbase_jd_ctx_list_add_pullable() -+ * where the caller must hold -+ * hwaccess_lock -+ * @kbdev: Device pointer -+ * @kctx: Context to add to queue -+ * @js: Job slot to use -+ * -+ * Caller must hold hwaccess_lock -+ * -+ * Return: true if caller should call kbase_backend_ctx_count_changed() -+ */ -+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, -+ struct kbase_context *kctx, unsigned int js) -+{ -+ bool ret = false; ++ /* ++ * The coherent group mask can be computed from the l2 present ++ * register. ++ * ++ * For the coherent group n: ++ * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1) ++ * where first_set is group_present with only its nth set-bit kept ++ * (i.e. the position from where a new group starts). ++ * ++ * For instance if the groups are l2 coherent and l2_present=0x0..01111: ++ * The first mask is: ++ * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1) ++ * = (0x0..010 - 1) & ~(0x0..01 - 1) ++ * = 0x0..00f ++ * The second mask is: ++ * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1) ++ * = (0x0..100 - 1) & ~(0x0..010 - 1) ++ * = 0x0..0f0 ++ * And so on until all the bits from group_present have been cleared ++ * (i.e. there is no group left). ++ */ + -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ dev_dbg(kbdev->dev, "Add pullable tail kctx %pK (s:%u)\n", (void *)kctx, js); ++ current_group = props->coherency_info.group; ++ first_set = group_present & ~(group_present - 1); + -+ if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) -+ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); ++ while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) { ++ group_present -= first_set; /* Clear the current group bit */ ++ first_set_prev = first_set; + -+ list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], -+ &kbdev->js_data.ctx_list_pullable[js][kctx->priority]); ++ first_set = group_present & ~(group_present - 1); ++ group_mask = (first_set - 1) & ~(first_set_prev - 1); + -+ if (!kctx->slots_pullable) { -+ kbdev->js_data.nr_contexts_pullable++; -+ ret = true; -+ if (!kbase_jsctx_atoms_pulled(kctx)) { -+ WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); -+ kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); -+ atomic_inc(&kbdev->js_data.nr_contexts_runnable); -+ } ++ /* Populate the coherent_group structure for each group */ ++ current_group->core_mask = group_mask & props->raw_props.shader_present; ++ current_group->num_cores = hweight64(current_group->core_mask); ++ ++ num_groups++; ++ current_group++; + } -+ kctx->slots_pullable |= (1 << js); + -+ return ret; ++ if (group_present != 0) ++ pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS); ++ ++ props->coherency_info.num_groups = num_groups; +} + +/** -+ * kbase_js_ctx_list_add_pullable_head_nolock - Variant of -+ * kbase_js_ctx_list_add_pullable_head() -+ * where the caller must hold -+ * hwaccess_lock -+ * @kbdev: Device pointer -+ * @kctx: Context to add to queue -+ * @js: Job slot to use ++ * kbase_gpuprops_get_curr_config_props - Get the current allocated resources ++ * @kbdev: The &struct kbase_device structure for the device ++ * @curr_config: The &struct curr_config_props structure to receive the result + * -+ * Caller must hold hwaccess_lock ++ * Fill the &struct curr_config_props structure with values from the GPU ++ * configuration registers. + * -+ * Return: true if caller should call kbase_backend_ctx_count_changed() ++ * Return: Zero on success, Linux error code on failure + */ -+static bool kbase_js_ctx_list_add_pullable_head_nolock(struct kbase_device *kbdev, -+ struct kbase_context *kctx, unsigned int js) ++int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev, ++ struct curr_config_props * const curr_config) +{ -+ bool ret = false; ++ struct kbase_current_config_regdump curr_config_regdump; ++ int err; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ dev_dbg(kbdev->dev, "Add pullable head kctx %pK (s:%u)\n", (void *)kctx, js); ++ if (WARN_ON(!kbdev) || WARN_ON(!curr_config)) ++ return -EINVAL; + -+ if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) -+ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); ++ /* If update not needed just return. */ ++ if (!curr_config->update_needed) ++ return 0; + -+ list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], -+ &kbdev->js_data.ctx_list_pullable[js][kctx->priority]); ++ /* Dump relevant registers */ ++ err = kbase_backend_gpuprops_get_curr_config(kbdev, ++ &curr_config_regdump); ++ if (err) ++ return err; + -+ if (!kctx->slots_pullable) { -+ kbdev->js_data.nr_contexts_pullable++; -+ ret = true; -+ if (!kbase_jsctx_atoms_pulled(kctx)) { -+ WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); -+ kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); -+ atomic_inc(&kbdev->js_data.nr_contexts_runnable); -+ } -+ } -+ kctx->slots_pullable |= (1 << js); ++ curr_config->l2_slices = ++ KBASE_UBFX32(curr_config_regdump.mem_features, 8U, 4) + 1; + -+ return ret; -+} ++ curr_config->l2_present = ++ ((u64) curr_config_regdump.l2_present_hi << 32) + ++ curr_config_regdump.l2_present_lo; + -+/** -+ * kbase_js_ctx_list_add_pullable_head - Add context to the head of the -+ * per-slot pullable context queue -+ * @kbdev: Device pointer -+ * @kctx: Context to add to queue -+ * @js: Job slot to use -+ * -+ * If the context is on either the pullable or unpullable queues, then it is -+ * removed before being added to the head. -+ * -+ * This function should be used when a context has been scheduled, but no jobs -+ * can currently be pulled from it. -+ * -+ * Return: true if caller should call kbase_backend_ctx_count_changed() -+ */ -+static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, -+ struct kbase_context *kctx, unsigned int js) -+{ -+ bool ret; -+ unsigned long flags; ++ curr_config->shader_present = ++ ((u64) curr_config_regdump.shader_present_hi << 32) + ++ curr_config_regdump.shader_present_lo; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ curr_config->num_cores = hweight64(curr_config->shader_present); + -+ return ret; ++ curr_config->update_needed = false; ++ ++ return 0; +} + +/** -+ * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the -+ * per-slot unpullable context queue -+ * @kbdev: Device pointer -+ * @kctx: Context to add to queue -+ * @js: Job slot to use -+ * -+ * The context must already be on the per-slot pullable queue. It will be -+ * removed from the pullable queue before being added to the unpullable queue. -+ * -+ * This function should be used when a context has been pulled from, and there -+ * are no jobs remaining on the specified slot. ++ * kbase_gpuprops_req_curr_config_update - Request Current Config Update ++ * @kbdev: The &struct kbase_device structure for the device + * -+ * Caller must hold hwaccess_lock ++ * Requests the current configuration to be updated next time the ++ * kbase_gpuprops_get_curr_config_props() is called. + * -+ * Return: true if caller should call kbase_backend_ctx_count_changed() ++ * Return: Zero on success, Linux error code on failure + */ -+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, -+ struct kbase_context *kctx, unsigned int js) ++int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev) +{ -+ bool ret = false; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ dev_dbg(kbdev->dev, "Add unpullable tail kctx %pK (s:%u)\n", (void *)kctx, js); -+ -+ list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], -+ &kbdev->js_data.ctx_list_unpullable[js][kctx->priority]); -+ -+ if (kctx->slots_pullable == (1 << js)) { -+ kbdev->js_data.nr_contexts_pullable--; -+ ret = true; -+ if (!kbase_jsctx_atoms_pulled(kctx)) { -+ WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); -+ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); -+ atomic_dec(&kbdev->js_data.nr_contexts_runnable); -+ } -+ } -+ kctx->slots_pullable &= ~(1 << js); ++ if (WARN_ON(!kbdev)) ++ return -EINVAL; + -+ return ret; ++ kbdev->gpu_props.curr_config.update_needed = true; ++ return 0; +} + +/** -+ * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable -+ * or unpullable context queues -+ * @kbdev: Device pointer -+ * @kctx: Context to remove from queue -+ * @js: Job slot to use -+ * -+ * The context must already be on one of the queues. -+ * -+ * This function should be used when a context has no jobs on the GPU, and no -+ * jobs remaining for the specified slot. ++ * kbase_gpuprops_get_props - Get the GPU configuration ++ * @gpu_props: The &struct base_gpu_props structure ++ * @kbdev: The &struct kbase_device structure for the device + * -+ * Caller must hold hwaccess_lock ++ * Fill the &struct base_gpu_props structure with values from the GPU ++ * configuration registers. Only the raw properties are filled in this function. + * -+ * Return: true if caller should call kbase_backend_ctx_count_changed() ++ * Return: Zero on success, Linux error code on failure + */ -+static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, struct kbase_context *kctx, -+ unsigned int js) ++static int kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props, ++ struct kbase_device *kbdev) +{ -+ bool ret = false; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ struct kbase_gpuprops_regdump regdump; ++ int i; ++ int err; + -+ WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(gpu_props != NULL); + -+ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); ++ /* Dump relevant registers */ ++ err = kbase_backend_gpuprops_get(kbdev, ®dump); ++ if (err) ++ return err; + -+ if (kctx->slots_pullable == (1 << js)) { -+ kbdev->js_data.nr_contexts_pullable--; -+ ret = true; -+ if (!kbase_jsctx_atoms_pulled(kctx)) { -+ WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); -+ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); -+ atomic_dec(&kbdev->js_data.nr_contexts_runnable); -+ } -+ } -+ kctx->slots_pullable &= ~(1 << js); ++ gpu_props->raw_props.gpu_id = regdump.gpu_id; ++ gpu_props->raw_props.tiler_features = regdump.tiler_features; ++ gpu_props->raw_props.mem_features = regdump.mem_features; ++ gpu_props->raw_props.mmu_features = regdump.mmu_features; ++ gpu_props->raw_props.l2_features = regdump.l2_features; + -+ return ret; -+} ++ gpu_props->raw_props.as_present = regdump.as_present; ++ gpu_props->raw_props.js_present = regdump.js_present; ++ gpu_props->raw_props.shader_present = ++ ((u64) regdump.shader_present_hi << 32) + ++ regdump.shader_present_lo; ++ gpu_props->raw_props.tiler_present = ++ ((u64) regdump.tiler_present_hi << 32) + ++ regdump.tiler_present_lo; ++ gpu_props->raw_props.l2_present = ++ ((u64) regdump.l2_present_hi << 32) + ++ regdump.l2_present_lo; ++ gpu_props->raw_props.stack_present = ++ ((u64) regdump.stack_present_hi << 32) + ++ regdump.stack_present_lo; + -+/** -+ * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head() -+ * where the caller must hold -+ * hwaccess_lock -+ * @kbdev: Device pointer -+ * @js: Job slot to use -+ * -+ * Caller must hold hwaccess_lock -+ * -+ * Return: Context to use for specified slot. -+ * NULL if no contexts present for specified slot -+ */ -+static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(struct kbase_device *kbdev, -+ unsigned int js) -+{ -+ struct kbase_context *kctx; -+ int i; ++ for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) ++ gpu_props->raw_props.js_features[i] = regdump.js_features[i]; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) ++ gpu_props->raw_props.texture_features[i] = regdump.texture_features[i]; + -+ for (i = KBASE_JS_ATOM_SCHED_PRIO_FIRST; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { -+ if (list_empty(&kbdev->js_data.ctx_list_pullable[js][i])) -+ continue; ++ gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size; ++ gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads; ++ gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size; ++ gpu_props->raw_props.thread_features = regdump.thread_features; ++ gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc; + -+ kctx = list_entry(kbdev->js_data.ctx_list_pullable[js][i].next, -+ struct kbase_context, -+ jctx.sched_info.ctx.ctx_list_entry[js]); ++ gpu_props->raw_props.gpu_features = ++ ((u64) regdump.gpu_features_hi << 32) + ++ regdump.gpu_features_lo; + -+ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); -+ dev_dbg(kbdev->dev, "Popped %pK from the pullable queue (s:%u)\n", (void *)kctx, -+ js); -+ return kctx; -+ } -+ return NULL; ++ return 0; +} + -+/** -+ * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable -+ * queue. -+ * @kbdev: Device pointer -+ * @js: Job slot to use -+ * -+ * Return: Context to use for specified slot. -+ * NULL if no contexts present for specified slot -+ */ -+static struct kbase_context *kbase_js_ctx_list_pop_head(struct kbase_device *kbdev, unsigned int js) ++void kbase_gpuprops_update_core_props_gpu_id( ++ struct base_gpu_props * const gpu_props) +{ -+ struct kbase_context *kctx; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ return kctx; ++ gpu_props->core_props.version_status = ++ KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4); ++ gpu_props->core_props.minor_revision = ++ KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8); ++ gpu_props->core_props.major_revision = ++ KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4); ++ gpu_props->core_props.product_id = ++ KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16); +} + +/** -+ * kbase_js_ctx_pullable - Return if a context can be pulled from on the -+ * specified slot -+ * @kctx: Context pointer -+ * @js: Job slot to use -+ * @is_scheduled: true if the context is currently scheduled -+ * -+ * Caller must hold hwaccess_lock ++ * kbase_gpuprops_update_max_config_props - Updates the max config properties in ++ * the base_gpu_props. ++ * @base_props: The &struct base_gpu_props structure ++ * @kbdev: The &struct kbase_device structure for the device + * -+ * Return: true if context can be pulled from on specified slot -+ * false otherwise ++ * Updates the &struct base_gpu_props structure with the max config properties. + */ -+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, unsigned int js, bool is_scheduled) ++static void kbase_gpuprops_update_max_config_props( ++ struct base_gpu_props * const base_props, struct kbase_device *kbdev) +{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbase_jd_atom *katom; -+ struct kbase_device *kbdev = kctx->kbdev; ++ int l2_n = 0; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (WARN_ON(!kbdev) || WARN_ON(!base_props)) ++ return; + -+ js_devdata = &kbdev->js_data; ++ /* return if the max_config is not set during arbif initialization */ ++ if (kbdev->gpu_props.max_config.core_mask == 0) ++ return; + -+ if (is_scheduled) { -+ if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { -+ dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n", -+ (void *)kctx); -+ return false; -+ } -+ } -+ katom = jsctx_rb_peek(kctx, js); -+ if (!katom) { -+ dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%u)\n", (void *)kctx, js); -+ return false; /* No pullable atoms */ -+ } -+ if (kbase_jsctx_slot_prio_is_blocked(kctx, js, katom->sched_priority)) { -+ KBASE_KTRACE_ADD_JM_SLOT_INFO( -+ kctx->kbdev, JS_SLOT_PRIO_IS_BLOCKED, kctx, katom, -+ katom->jc, js, (unsigned int)katom->sched_priority); -+ dev_dbg(kbdev->dev, -+ "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%u)\n", -+ (void *)kctx, katom->sched_priority, js); -+ return false; -+ } -+ if (atomic_read(&katom->blocked)) { -+ dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_ctx_pullable\n", -+ (void *)katom); -+ return false; /* next atom blocked */ -+ } -+ if (kbase_js_atom_blocked_on_x_dep(katom)) { -+ if (katom->x_pre_dep->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || -+ katom->x_pre_dep->will_fail_event_code) { -+ dev_dbg(kbdev->dev, -+ "JS: X pre-dep %pK is not present in slot FIFO or will fail\n", -+ (void *)katom->x_pre_dep); -+ return false; -+ } -+ if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && -+ kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) { -+ dev_dbg(kbdev->dev, -+ "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%u)\n", -+ (void *)katom, js); -+ return false; -+ } ++ /* ++ * Set the base_props with the maximum config values to ensure that the ++ * user space will always be based on the maximum resources available. ++ */ ++ base_props->l2_props.num_l2_slices = ++ kbdev->gpu_props.max_config.l2_slices; ++ base_props->raw_props.shader_present = ++ kbdev->gpu_props.max_config.core_mask; ++ /* ++ * Update l2_present in the raw data to be consistent with the ++ * max_config.l2_slices number. ++ */ ++ base_props->raw_props.l2_present = 0; ++ for (l2_n = 0; l2_n < base_props->l2_props.num_l2_slices; l2_n++) { ++ base_props->raw_props.l2_present <<= 1; ++ base_props->raw_props.l2_present |= 0x1; + } -+ -+ dev_dbg(kbdev->dev, "JS: Atom %pK is pullable in kctx %pK (s:%u)\n", (void *)katom, -+ (void *)kctx, js); -+ -+ return true; ++ /* ++ * Update the coherency_info data using just one core group. For ++ * architectures where the max_config is provided by the arbiter it is ++ * not necessary to split the shader core groups in different coherent ++ * groups. ++ */ ++ base_props->coherency_info.coherency = ++ base_props->raw_props.mem_features; ++ base_props->coherency_info.num_core_groups = 1; ++ base_props->coherency_info.num_groups = 1; ++ base_props->coherency_info.group[0].core_mask = ++ kbdev->gpu_props.max_config.core_mask; ++ base_props->coherency_info.group[0].num_cores = ++ hweight32(kbdev->gpu_props.max_config.core_mask); +} + -+static bool kbase_js_dep_validate(struct kbase_context *kctx, -+ struct kbase_jd_atom *katom) ++/** ++ * kbase_gpuprops_calculate_props - Calculate the derived properties ++ * @gpu_props: The &struct base_gpu_props structure ++ * @kbdev: The &struct kbase_device structure for the device ++ * ++ * Fill the &struct base_gpu_props structure with values derived from the GPU ++ * configuration registers ++ */ ++static void kbase_gpuprops_calculate_props( ++ struct base_gpu_props * const gpu_props, struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ bool ret = true; -+ bool has_dep = false, has_x_dep = false; -+ unsigned int js = kbase_js_get_slot(kbdev, katom); -+ int prio = katom->sched_priority; + int i; + -+ for (i = 0; i < 2; i++) { -+ struct kbase_jd_atom *dep_atom = katom->dep[i].atom; -+ -+ if (dep_atom) { -+ unsigned int dep_js = kbase_js_get_slot(kbdev, dep_atom); -+ int dep_prio = dep_atom->sched_priority; -+ -+ dev_dbg(kbdev->dev, -+ "Checking dep %d of atom %pK (s:%d) on %pK (s:%d)\n", -+ i, (void *)katom, js, (void *)dep_atom, dep_js); -+ -+ /* Dependent atom must already have been submitted */ -+ if (!(dep_atom->atom_flags & -+ KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { -+ dev_dbg(kbdev->dev, -+ "Blocker not submitted yet\n"); -+ ret = false; -+ break; -+ } -+ -+ /* Dependencies with different priorities can't -+ * be represented in the ringbuffer -+ */ -+ if (prio != dep_prio) { -+ dev_dbg(kbdev->dev, -+ "Different atom priorities\n"); -+ ret = false; -+ break; -+ } -+ -+ if (js == dep_js) { -+ /* Only one same-slot dependency can be -+ * represented in the ringbuffer -+ */ -+ if (has_dep) { -+ dev_dbg(kbdev->dev, -+ "Too many same-slot deps\n"); -+ ret = false; -+ break; -+ } -+ /* Each dependee atom can only have one -+ * same-slot dependency -+ */ -+ if (dep_atom->post_dep) { -+ dev_dbg(kbdev->dev, -+ "Too many same-slot successors\n"); -+ ret = false; -+ break; -+ } -+ has_dep = true; -+ } else { -+ /* Only one cross-slot dependency can be -+ * represented in the ringbuffer -+ */ -+ if (has_x_dep) { -+ dev_dbg(kbdev->dev, -+ "Too many cross-slot deps\n"); -+ ret = false; -+ break; -+ } -+ /* Each dependee atom can only have one -+ * cross-slot dependency -+ */ -+ if (dep_atom->x_post_dep) { -+ dev_dbg(kbdev->dev, -+ "Too many cross-slot successors\n"); -+ ret = false; -+ break; -+ } -+ /* The dependee atom can not already be in the -+ * HW access ringbuffer -+ */ -+ if (dep_atom->gpu_rb_state != -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { -+ dev_dbg(kbdev->dev, -+ "Blocker already in ringbuffer (state:%d)\n", -+ dep_atom->gpu_rb_state); -+ ret = false; -+ break; -+ } -+ /* The dependee atom can not already have -+ * completed -+ */ -+ if (dep_atom->status != -+ KBASE_JD_ATOM_STATE_IN_JS) { -+ dev_dbg(kbdev->dev, -+ "Blocker already completed (status:%d)\n", -+ dep_atom->status); -+ ret = false; -+ break; -+ } ++ /* Populate the base_gpu_props structure */ ++ kbase_gpuprops_update_core_props_gpu_id(gpu_props); ++ gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2; ++#if KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE ++ gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT; ++#else ++ gpu_props->core_props.gpu_available_memory_size = ++ totalram_pages() << PAGE_SHIFT; ++#endif + -+ has_x_dep = true; -+ } ++ for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) ++ gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i]; + -+ /* Dependency can be represented in ringbuffers */ -+ } -+ } ++ gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8); ++ gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8); + -+ /* If dependencies can be represented by ringbuffer then clear them from -+ * atom structure ++ /* Field with number of l2 slices is added to MEM_FEATURES register ++ * since t76x. Below code assumes that for older GPU reserved bits will ++ * be read as zero. + */ -+ if (ret) { -+ for (i = 0; i < 2; i++) { -+ struct kbase_jd_atom *dep_atom = katom->dep[i].atom; ++ gpu_props->l2_props.num_l2_slices = ++ KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1; + -+ if (dep_atom) { -+ int dep_js = kbase_js_get_slot(kbdev, dep_atom); ++ gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6); ++ gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4); + -+ dev_dbg(kbdev->dev, -+ "Clearing dep %d of atom %pK (s:%d) on %pK (s:%d)\n", -+ i, (void *)katom, js, (void *)dep_atom, -+ dep_js); ++ if (gpu_props->raw_props.thread_max_threads == 0) ++ gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT; ++ else ++ gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads; + -+ if ((js != dep_js) && -+ (dep_atom->status != -+ KBASE_JD_ATOM_STATE_COMPLETED) -+ && (dep_atom->status != -+ KBASE_JD_ATOM_STATE_HW_COMPLETED) -+ && (dep_atom->status != -+ KBASE_JD_ATOM_STATE_UNUSED)) { ++ if (gpu_props->raw_props.thread_max_workgroup_size == 0) ++ gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT; ++ else ++ gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size; + -+ katom->atom_flags |= -+ KBASE_KATOM_FLAG_X_DEP_BLOCKED; ++ if (gpu_props->raw_props.thread_max_barrier_size == 0) ++ gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT; ++ else ++ gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size; + -+ dev_dbg(kbdev->dev, "Set X_DEP flag on atom %pK\n", -+ (void *)katom); ++ if (gpu_props->raw_props.thread_tls_alloc == 0) ++ gpu_props->thread_props.tls_alloc = ++ gpu_props->thread_props.max_threads; ++ else ++ gpu_props->thread_props.tls_alloc = ++ gpu_props->raw_props.thread_tls_alloc; + -+ katom->x_pre_dep = dep_atom; -+ dep_atom->x_post_dep = katom; -+ if (kbase_jd_katom_dep_type( -+ &katom->dep[i]) == -+ BASE_JD_DEP_TYPE_DATA) -+ katom->atom_flags |= -+ KBASE_KATOM_FLAG_FAIL_BLOCKER; -+ } -+ if ((kbase_jd_katom_dep_type(&katom->dep[i]) -+ == BASE_JD_DEP_TYPE_DATA) && -+ (js == dep_js)) { -+ katom->pre_dep = dep_atom; -+ dep_atom->post_dep = katom; -+ } ++#if MALI_USE_CSF ++ gpu_props->thread_props.max_registers = ++ KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 22); ++ gpu_props->thread_props.impl_tech = ++ KBASE_UBFX32(gpu_props->raw_props.thread_features, 22U, 2); ++ gpu_props->thread_props.max_task_queue = ++ KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 8); ++ gpu_props->thread_props.max_thread_group_split = 0; ++#else ++ gpu_props->thread_props.max_registers = ++ KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16); ++ gpu_props->thread_props.max_task_queue = ++ KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8); ++ gpu_props->thread_props.max_thread_group_split = ++ KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6); ++ gpu_props->thread_props.impl_tech = ++ KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2); ++#endif + -+ list_del(&katom->dep_item[i]); -+ kbase_jd_katom_dep_clear(&katom->dep[i]); -+ } -+ } -+ } else { -+ dev_dbg(kbdev->dev, -+ "Deps of atom %pK (s:%d) could not be represented\n", -+ (void *)katom, js); ++ /* If values are not specified, then use defaults */ ++ if (gpu_props->thread_props.max_registers == 0) { ++ gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT; ++ gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT; ++ gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT; + } + -+ return ret; ++ /* ++ * If the maximum resources allocated information is available it is ++ * necessary to update the base_gpu_props with the max_config info to ++ * the userspace. This is applicable to systems that receive this ++ * information from the arbiter. ++ */ ++ if (kbdev->gpu_props.max_config.core_mask) ++ /* Update the max config properties in the base_gpu_props */ ++ kbase_gpuprops_update_max_config_props(gpu_props, ++ kbdev); ++ else ++ /* Initialize the coherent_group structure for each group */ ++ kbase_gpuprops_construct_coherent_groups(gpu_props); +} + -+void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority) ++void kbase_gpuprops_set_max_config(struct kbase_device *kbdev, ++ const struct max_config_props *max_config) +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ unsigned int js; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ /* Move kctx to the pullable/upullable list as per the new priority */ -+ if (new_priority != kctx->priority) { -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ if (kctx->slots_pullable & (1 << js)) -+ list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], -+ &kbdev->js_data.ctx_list_pullable[js][new_priority]); -+ else -+ list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], -+ &kbdev->js_data.ctx_list_unpullable[js][new_priority]); -+ } ++ if (WARN_ON(!kbdev) || WARN_ON(!max_config)) ++ return; + -+ kctx->priority = new_priority; -+ } ++ kbdev->gpu_props.max_config.l2_slices = max_config->l2_slices; ++ kbdev->gpu_props.max_config.core_mask = max_config->core_mask; +} + -+void kbase_js_update_ctx_priority(struct kbase_context *kctx) ++void kbase_gpuprops_set(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ int new_priority = KBASE_JS_ATOM_SCHED_PRIO_LOW; -+ int prio; ++ struct kbase_gpu_props *gpu_props; ++ struct gpu_raw_gpu_props *raw; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (WARN_ON(!kbdev)) ++ return; ++ gpu_props = &kbdev->gpu_props; ++ raw = &gpu_props->props.raw_props; + -+ if (kbdev->js_ctx_scheduling_mode == KBASE_JS_SYSTEM_PRIORITY_MODE) { -+ /* Determine the new priority for context, as per the priority -+ * of currently in-use atoms. -+ */ -+ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; -+ prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { -+ if (kctx->atoms_count[prio]) { -+ new_priority = prio; -+ break; -+ } -+ } -+ } ++ /* Initialize the base_gpu_props structure from the hardware */ ++ kbase_gpuprops_get_props(&gpu_props->props, kbdev); + -+ kbase_js_set_ctx_priority(kctx, new_priority); -+} -+KBASE_EXPORT_TEST_API(kbase_js_update_ctx_priority); ++ /* Populate the derived properties */ ++ kbase_gpuprops_calculate_props(&gpu_props->props, kbdev); + -+/** -+ * js_add_start_rp() - Add an atom that starts a renderpass to the job scheduler -+ * @start_katom: Pointer to the atom to be added. -+ * Return: 0 if successful or a negative value on failure. -+ */ -+static int js_add_start_rp(struct kbase_jd_atom *const start_katom) -+{ -+ struct kbase_context *const kctx = start_katom->kctx; -+ struct kbase_jd_renderpass *rp; -+ struct kbase_device *const kbdev = kctx->kbdev; -+ unsigned long flags; ++ /* Populate kbase-only fields */ ++ gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8); ++ gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8); + -+ lockdep_assert_held(&kctx->jctx.lock); ++ gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1); + -+ if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) -+ return -EINVAL; ++ gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8); ++ gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8); + -+ if (start_katom->core_req & BASE_JD_REQ_END_RENDERPASS) -+ return -EINVAL; ++ gpu_props->num_cores = hweight64(raw->shader_present); ++ gpu_props->num_core_groups = ++ gpu_props->props.coherency_info.num_core_groups; ++ gpu_props->num_address_spaces = hweight32(raw->as_present); ++ gpu_props->num_job_slots = hweight32(raw->js_present); + -+ compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= -+ ARRAY_SIZE(kctx->jctx.renderpasses), -+ "Should check invalid access to renderpasses"); ++ /* ++ * Current configuration is used on HW interactions so that the maximum ++ * config is just used for user space avoiding interactions with parts ++ * of the hardware that might not be allocated to the kbase instance at ++ * that moment. ++ */ ++ kbase_gpuprops_req_curr_config_update(kbdev); ++ kbase_gpuprops_get_curr_config_props(kbdev, &gpu_props->curr_config); ++} + -+ rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; ++int kbase_gpuprops_set_features(struct kbase_device *kbdev) ++{ ++ struct base_gpu_props *gpu_props; ++ struct kbase_gpuprops_regdump regdump; ++ int err; + -+ if (rp->state != KBASE_JD_RP_COMPLETE) -+ return -EINVAL; ++ gpu_props = &kbdev->gpu_props.props; + -+ dev_dbg(kctx->kbdev->dev, "JS add start atom %pK of RP %d\n", -+ (void *)start_katom, start_katom->renderpass_id); ++ /* Dump relevant registers */ ++ err = kbase_backend_gpuprops_get_features(kbdev, ®dump); ++ if (err) ++ return err; + -+ /* The following members are read when updating the job slot -+ * ringbuffer/fifo therefore they require additional locking. ++ /* ++ * Copy the raw value from the register, later this will get turned ++ * into the selected coherency mode. ++ * Additionally, add non-coherent mode, as this is always supported. + */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ gpu_props->raw_props.coherency_mode = regdump.coherency_features | ++ COHERENCY_FEATURE_BIT(COHERENCY_NONE); + -+ rp->state = KBASE_JD_RP_START; -+ rp->start_katom = start_katom; -+ rp->end_katom = NULL; -+ INIT_LIST_HEAD(&rp->oom_reg_list); ++ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT)) ++ gpu_props->thread_props.max_thread_group_split = 0; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* ++ * The CORE_FEATURES register has different meanings depending on GPU. ++ * On tGOx, bits[3:0] encode num_exec_engines. ++ * On CSF GPUs, bits[7:0] is an enumeration that needs to be parsed, ++ * instead. ++ * GPUs like tTIx have additional fields like LSC_SIZE that are ++ * otherwise reserved/RAZ on older GPUs. ++ */ ++ gpu_props->raw_props.core_features = regdump.core_features; + -+ return 0; ++#if !MALI_USE_CSF ++ gpu_props->core_props.num_exec_engines = ++ KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4); ++#endif ++ ++ return err; +} + -+/** -+ * js_add_end_rp() - Add an atom that ends a renderpass to the job scheduler -+ * @end_katom: Pointer to the atom to be added. -+ * Return: 0 if successful or a negative value on failure. ++/* ++ * Module parameters to allow the L2 size and hash configuration to be ++ * overridden. ++ * ++ * These parameters must be set on insmod to take effect, and are not visible ++ * in sysfs. + */ -+static int js_add_end_rp(struct kbase_jd_atom *const end_katom) -+{ -+ struct kbase_context *const kctx = end_katom->kctx; -+ struct kbase_jd_renderpass *rp; -+ struct kbase_device *const kbdev = kctx->kbdev; -+ -+ lockdep_assert_held(&kctx->jctx.lock); ++static u8 override_l2_size; ++module_param(override_l2_size, byte, 0000); ++MODULE_PARM_DESC(override_l2_size, "Override L2 size config for testing"); + -+ if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) -+ return -EINVAL; ++static u8 override_l2_hash; ++module_param(override_l2_hash, byte, 0000); ++MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing"); + -+ if (end_katom->core_req & BASE_JD_REQ_START_RENDERPASS) -+ return -EINVAL; ++static u32 l2_hash_values[ASN_HASH_COUNT] = { ++ 0, ++}; ++static unsigned int num_override_l2_hash_values; ++module_param_array(l2_hash_values, uint, &num_override_l2_hash_values, 0000); ++MODULE_PARM_DESC(l2_hash_values, "Override L2 hash values config for testing"); + -+ compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= -+ ARRAY_SIZE(kctx->jctx.renderpasses), -+ "Should check invalid access to renderpasses"); ++/* Definitions for range of supported user defined hash functions for GPUs ++ * that support L2_CONFIG and not ASN_HASH features. Supported hash function ++ * range from 0b1000-0b1111 inclusive. Selection of any other values will ++ * lead to undefined behavior. ++ */ ++#define USER_DEFINED_HASH_LO ((u8)0x08) ++#define USER_DEFINED_HASH_HI ((u8)0x0F) + -+ rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; ++enum l2_config_override_result { ++ L2_CONFIG_OVERRIDE_FAIL = -1, ++ L2_CONFIG_OVERRIDE_NONE, ++ L2_CONFIG_OVERRIDE_OK, ++}; + -+ dev_dbg(kbdev->dev, "JS add end atom %pK in state %d of RP %d\n", -+ (void *)end_katom, (int)rp->state, end_katom->renderpass_id); ++/** ++ * kbase_read_l2_config_from_dt - Read L2 configuration ++ * @kbdev: The kbase device for which to get the L2 configuration. ++ * ++ * Check for L2 configuration overrides in module parameters and device tree. ++ * Override values in module parameters take priority over override values in ++ * device tree. ++ * ++ * Return: L2_CONFIG_OVERRIDE_OK if either size or hash, or both was properly ++ * overridden, L2_CONFIG_OVERRIDE_NONE if no overrides are provided. ++ * L2_CONFIG_OVERRIDE_FAIL otherwise. ++ */ ++static enum l2_config_override_result ++kbase_read_l2_config_from_dt(struct kbase_device *const kbdev) ++{ ++ struct device_node *np = kbdev->dev->of_node; + -+ if (rp->state == KBASE_JD_RP_COMPLETE) -+ return -EINVAL; ++ if (!np) ++ return L2_CONFIG_OVERRIDE_NONE; + -+ if (rp->end_katom == NULL) { -+ /* We can't be in a retry state until the fragment job chain -+ * has completed. -+ */ -+ unsigned long flags; ++ if (override_l2_size) ++ kbdev->l2_size_override = override_l2_size; ++ else if (of_property_read_u8(np, "l2-size", &kbdev->l2_size_override)) ++ kbdev->l2_size_override = 0; + -+ WARN_ON(rp->state == KBASE_JD_RP_RETRY); -+ WARN_ON(rp->state == KBASE_JD_RP_RETRY_PEND_OOM); -+ WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM); ++ /* Check overriding value is supported, if not will result in ++ * undefined behavior. ++ */ ++ if (override_l2_hash >= USER_DEFINED_HASH_LO && ++ override_l2_hash <= USER_DEFINED_HASH_HI) ++ kbdev->l2_hash_override = override_l2_hash; ++ else if (of_property_read_u8(np, "l2-hash", &kbdev->l2_hash_override)) ++ kbdev->l2_hash_override = 0; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ rp->end_katom = end_katom; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } else -+ WARN_ON(rp->end_katom != end_katom); ++ kbdev->l2_hash_values_override = false; ++ if (num_override_l2_hash_values) { ++ unsigned int i; + -+ return 0; ++ kbdev->l2_hash_values_override = true; ++ for (i = 0; i < num_override_l2_hash_values; i++) ++ kbdev->l2_hash_values[i] = l2_hash_values[i]; ++ } else if (!of_property_read_u32_array(np, "l2-hash-values", ++ kbdev->l2_hash_values, ++ ASN_HASH_COUNT)) ++ kbdev->l2_hash_values_override = true; ++ ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH) && ++ (kbdev->l2_hash_override)) { ++ dev_err(kbdev->dev, "l2-hash not supported\n"); ++ return L2_CONFIG_OVERRIDE_FAIL; ++ } ++ ++ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH) && ++ (kbdev->l2_hash_values_override)) { ++ dev_err(kbdev->dev, "l2-hash-values not supported\n"); ++ return L2_CONFIG_OVERRIDE_FAIL; ++ } ++ ++ if (kbdev->l2_hash_override && kbdev->l2_hash_values_override) { ++ dev_err(kbdev->dev, ++ "both l2-hash & l2-hash-values not supported\n"); ++ return L2_CONFIG_OVERRIDE_FAIL; ++ } ++ ++ if (kbdev->l2_size_override || kbdev->l2_hash_override || ++ kbdev->l2_hash_values_override) ++ return L2_CONFIG_OVERRIDE_OK; ++ ++ return L2_CONFIG_OVERRIDE_NONE; +} + -+bool kbasep_js_add_job(struct kbase_context *kctx, -+ struct kbase_jd_atom *atom) ++int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) +{ -+ unsigned long flags; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ struct kbase_device *kbdev; -+ struct kbasep_js_device_data *js_devdata; + int err = 0; + -+ bool enqueue_required = false; -+ bool timer_sync = false; ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { ++ struct kbase_gpuprops_regdump regdump; ++ struct base_gpu_props *gpu_props = &kbdev->gpu_props.props; + -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(atom != NULL); -+ lockdep_assert_held(&kctx->jctx.lock); ++ /* Check for L2 cache size & hash overrides */ ++ switch (kbase_read_l2_config_from_dt(kbdev)) { ++ case L2_CONFIG_OVERRIDE_FAIL: ++ err = -EIO; ++ goto exit; ++ case L2_CONFIG_OVERRIDE_NONE: ++ goto exit; ++ default: ++ break; ++ } + -+ kbdev = kctx->kbdev; -+ js_devdata = &kbdev->js_data; -+ js_kctx_info = &kctx->jctx.sched_info; ++ /* pm.active_count is expected to be 1 here, which is set in ++ * kbase_hwaccess_pm_powerup(). ++ */ ++ WARN_ON(kbdev->pm.active_count != 1); ++ /* The new settings for L2 cache can only be applied when it is ++ * off, so first do the power down. ++ */ ++ kbase_pm_context_idle(kbdev); ++ kbase_pm_wait_for_desired_state(kbdev); + -+ mutex_lock(&js_devdata->queue_mutex); -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ /* Need L2 to get powered to reflect to L2_FEATURES */ ++ kbase_pm_context_active(kbdev); + -+ if (atom->core_req & BASE_JD_REQ_START_RENDERPASS) -+ err = js_add_start_rp(atom); -+ else if (atom->core_req & BASE_JD_REQ_END_RENDERPASS) -+ err = js_add_end_rp(atom); ++ /* Wait for the completion of L2 power transition */ ++ kbase_pm_wait_for_l2_powered(kbdev); + -+ if (err < 0) { -+ atom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ atom->status = KBASE_JD_ATOM_STATE_COMPLETED; -+ goto out_unlock; -+ } ++ /* Dump L2_FEATURES register */ ++ err = kbase_backend_gpuprops_get_l2_features(kbdev, ®dump); ++ if (err) ++ goto exit; + -+ /* -+ * Begin Runpool transaction -+ */ -+ mutex_lock(&js_devdata->runpool_mutex); ++ dev_info(kbdev->dev, "Reflected L2_FEATURES is 0x%x\n", ++ regdump.l2_features); ++ dev_info(kbdev->dev, "Reflected L2_CONFIG is 0x%08x\n", ++ regdump.l2_config); + -+ /* Refcount ctx.nr_jobs */ -+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX); -+ ++(js_kctx_info->ctx.nr_jobs); -+ dev_dbg(kbdev->dev, "Add atom %pK to kctx %pK; now %d in ctx\n", -+ (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)) { ++ int idx; ++ const bool asn_he = regdump.l2_config & ++ L2_CONFIG_ASN_HASH_ENABLE_MASK; ++#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ if (!asn_he && kbdev->l2_hash_values_override) ++ dev_err(kbdev->dev, ++ "Failed to use requested ASN_HASH, fallback to default"); ++#endif ++ for (idx = 0; idx < ASN_HASH_COUNT; idx++) ++ dev_info(kbdev->dev, ++ "%s ASN_HASH[%d] is [0x%08x]\n", ++ asn_he ? "Overridden" : "Default", idx, ++ regdump.l2_asn_hash[idx]); ++ } + -+ /* Lock for state available during IRQ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ /* Update gpuprops with reflected L2_FEATURES */ ++ gpu_props->raw_props.l2_features = regdump.l2_features; ++ gpu_props->l2_props.log2_cache_size = ++ KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8); ++ } + -+ if (++kctx->atoms_count[atom->sched_priority] == 1) -+ kbase_js_update_ctx_priority(kctx); ++exit: ++ return err; ++} + -+ if (!kbase_js_dep_validate(kctx, atom)) { -+ /* Dependencies could not be represented */ -+ --(js_kctx_info->ctx.nr_jobs); -+ dev_dbg(kbdev->dev, -+ "Remove atom %pK from kctx %pK; now %d in ctx\n", -+ (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); ++static struct { ++ u32 type; ++ size_t offset; ++ int size; ++} gpu_property_mapping[] = { ++#define PROP(name, member) \ ++ {KBASE_GPUPROP_ ## name, offsetof(struct base_gpu_props, member), \ ++ sizeof(((struct base_gpu_props *)0)->member)} ++ PROP(PRODUCT_ID, core_props.product_id), ++ PROP(VERSION_STATUS, core_props.version_status), ++ PROP(MINOR_REVISION, core_props.minor_revision), ++ PROP(MAJOR_REVISION, core_props.major_revision), ++ PROP(GPU_FREQ_KHZ_MAX, core_props.gpu_freq_khz_max), ++ PROP(LOG2_PROGRAM_COUNTER_SIZE, core_props.log2_program_counter_size), ++ PROP(TEXTURE_FEATURES_0, core_props.texture_features[0]), ++ PROP(TEXTURE_FEATURES_1, core_props.texture_features[1]), ++ PROP(TEXTURE_FEATURES_2, core_props.texture_features[2]), ++ PROP(TEXTURE_FEATURES_3, core_props.texture_features[3]), ++ PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size), + -+ /* Setting atom status back to queued as it still has unresolved -+ * dependencies -+ */ -+ atom->status = KBASE_JD_ATOM_STATE_QUEUED; -+ dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)atom); ++#if MALI_USE_CSF ++#define BACKWARDS_COMPAT_PROP(name, type) \ ++ { \ ++ KBASE_GPUPROP_##name, SIZE_MAX, sizeof(type) \ ++ } ++ BACKWARDS_COMPAT_PROP(NUM_EXEC_ENGINES, u8), ++#else ++ PROP(NUM_EXEC_ENGINES, core_props.num_exec_engines), ++#endif + -+ /* Undo the count, as the atom will get added again later but -+ * leave the context priority adjusted or boosted, in case if -+ * this was the first higher priority atom received for this -+ * context. -+ * This will prevent the scenario of priority inversion, where -+ * another context having medium priority atoms keeps getting -+ * scheduled over this context, which is having both lower and -+ * higher priority atoms, but higher priority atoms are blocked -+ * due to dependency on lower priority atoms. With priority -+ * boost the high priority atom will get to run at earliest. -+ */ -+ kctx->atoms_count[atom->sched_priority]--; ++ PROP(L2_LOG2_LINE_SIZE, l2_props.log2_line_size), ++ PROP(L2_LOG2_CACHE_SIZE, l2_props.log2_cache_size), ++ PROP(L2_NUM_L2_SLICES, l2_props.num_l2_slices), + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&js_devdata->runpool_mutex); ++ PROP(TILER_BIN_SIZE_BYTES, tiler_props.bin_size_bytes), ++ PROP(TILER_MAX_ACTIVE_LEVELS, tiler_props.max_active_levels), + -+ goto out_unlock; -+ } ++ PROP(MAX_THREADS, thread_props.max_threads), ++ PROP(MAX_WORKGROUP_SIZE, thread_props.max_workgroup_size), ++ PROP(MAX_BARRIER_SIZE, thread_props.max_barrier_size), ++ PROP(MAX_REGISTERS, thread_props.max_registers), ++ PROP(MAX_TASK_QUEUE, thread_props.max_task_queue), ++ PROP(MAX_THREAD_GROUP_SPLIT, thread_props.max_thread_group_split), ++ PROP(IMPL_TECH, thread_props.impl_tech), ++ PROP(TLS_ALLOC, thread_props.tls_alloc), + -+ enqueue_required = kbase_js_dep_resolved_submit(kctx, atom); ++ PROP(RAW_SHADER_PRESENT, raw_props.shader_present), ++ PROP(RAW_TILER_PRESENT, raw_props.tiler_present), ++ PROP(RAW_L2_PRESENT, raw_props.l2_present), ++ PROP(RAW_STACK_PRESENT, raw_props.stack_present), ++ PROP(RAW_L2_FEATURES, raw_props.l2_features), ++ PROP(RAW_CORE_FEATURES, raw_props.core_features), ++ PROP(RAW_MEM_FEATURES, raw_props.mem_features), ++ PROP(RAW_MMU_FEATURES, raw_props.mmu_features), ++ PROP(RAW_AS_PRESENT, raw_props.as_present), ++ PROP(RAW_JS_PRESENT, raw_props.js_present), ++ PROP(RAW_JS_FEATURES_0, raw_props.js_features[0]), ++ PROP(RAW_JS_FEATURES_1, raw_props.js_features[1]), ++ PROP(RAW_JS_FEATURES_2, raw_props.js_features[2]), ++ PROP(RAW_JS_FEATURES_3, raw_props.js_features[3]), ++ PROP(RAW_JS_FEATURES_4, raw_props.js_features[4]), ++ PROP(RAW_JS_FEATURES_5, raw_props.js_features[5]), ++ PROP(RAW_JS_FEATURES_6, raw_props.js_features[6]), ++ PROP(RAW_JS_FEATURES_7, raw_props.js_features[7]), ++ PROP(RAW_JS_FEATURES_8, raw_props.js_features[8]), ++ PROP(RAW_JS_FEATURES_9, raw_props.js_features[9]), ++ PROP(RAW_JS_FEATURES_10, raw_props.js_features[10]), ++ PROP(RAW_JS_FEATURES_11, raw_props.js_features[11]), ++ PROP(RAW_JS_FEATURES_12, raw_props.js_features[12]), ++ PROP(RAW_JS_FEATURES_13, raw_props.js_features[13]), ++ PROP(RAW_JS_FEATURES_14, raw_props.js_features[14]), ++ PROP(RAW_JS_FEATURES_15, raw_props.js_features[15]), ++ PROP(RAW_TILER_FEATURES, raw_props.tiler_features), ++ PROP(RAW_TEXTURE_FEATURES_0, raw_props.texture_features[0]), ++ PROP(RAW_TEXTURE_FEATURES_1, raw_props.texture_features[1]), ++ PROP(RAW_TEXTURE_FEATURES_2, raw_props.texture_features[2]), ++ PROP(RAW_TEXTURE_FEATURES_3, raw_props.texture_features[3]), ++ PROP(RAW_GPU_ID, raw_props.gpu_id), ++ PROP(RAW_THREAD_MAX_THREADS, raw_props.thread_max_threads), ++ PROP(RAW_THREAD_MAX_WORKGROUP_SIZE, raw_props.thread_max_workgroup_size), ++ PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size), ++ PROP(RAW_THREAD_FEATURES, raw_props.thread_features), ++ PROP(RAW_COHERENCY_MODE, raw_props.coherency_mode), ++ PROP(RAW_THREAD_TLS_ALLOC, raw_props.thread_tls_alloc), ++ PROP(RAW_GPU_FEATURES, raw_props.gpu_features), ++ PROP(COHERENCY_NUM_GROUPS, coherency_info.num_groups), ++ PROP(COHERENCY_NUM_CORE_GROUPS, coherency_info.num_core_groups), ++ PROP(COHERENCY_COHERENCY, coherency_info.coherency), ++ PROP(COHERENCY_GROUP_0, coherency_info.group[0].core_mask), ++ PROP(COHERENCY_GROUP_1, coherency_info.group[1].core_mask), ++ PROP(COHERENCY_GROUP_2, coherency_info.group[2].core_mask), ++ PROP(COHERENCY_GROUP_3, coherency_info.group[3].core_mask), ++ PROP(COHERENCY_GROUP_4, coherency_info.group[4].core_mask), ++ PROP(COHERENCY_GROUP_5, coherency_info.group[5].core_mask), ++ PROP(COHERENCY_GROUP_6, coherency_info.group[6].core_mask), ++ PROP(COHERENCY_GROUP_7, coherency_info.group[7].core_mask), ++ PROP(COHERENCY_GROUP_8, coherency_info.group[8].core_mask), ++ PROP(COHERENCY_GROUP_9, coherency_info.group[9].core_mask), ++ PROP(COHERENCY_GROUP_10, coherency_info.group[10].core_mask), ++ PROP(COHERENCY_GROUP_11, coherency_info.group[11].core_mask), ++ PROP(COHERENCY_GROUP_12, coherency_info.group[12].core_mask), ++ PROP(COHERENCY_GROUP_13, coherency_info.group[13].core_mask), ++ PROP(COHERENCY_GROUP_14, coherency_info.group[14].core_mask), ++ PROP(COHERENCY_GROUP_15, coherency_info.group[15].core_mask), + -+ KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc, -+ kbase_ktrace_get_ctx_refcnt(kctx)); ++#undef PROP ++}; + -+ /* Context Attribute Refcounting */ -+ kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom); ++int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev) ++{ ++ struct kbase_gpu_props *kprops = &kbdev->gpu_props; ++ struct base_gpu_props *props = &kprops->props; ++ u32 count = ARRAY_SIZE(gpu_property_mapping); ++ u32 i; ++ u32 size = 0; ++ u8 *p; + -+ if (enqueue_required) { -+ if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false)) -+ timer_sync = kbase_js_ctx_list_add_pullable_nolock( -+ kbdev, kctx, atom->slot_nr); -+ else -+ timer_sync = kbase_js_ctx_list_add_unpullable_nolock( -+ kbdev, kctx, atom->slot_nr); ++ for (i = 0; i < count; i++) { ++ /* 4 bytes for the ID, and the size of the property */ ++ size += 4 + gpu_property_mapping[i].size; + } -+ /* If this context is active and the atom is the first on its slot, -+ * kick the job manager to attempt to fast-start the atom -+ */ -+ if (enqueue_required && kctx == -+ kbdev->hwaccess.active_kctx[atom->slot_nr]) -+ kbase_jm_try_kick(kbdev, 1 << atom->slot_nr); -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ if (timer_sync) -+ kbase_backend_ctx_count_changed(kbdev); -+ mutex_unlock(&js_devdata->runpool_mutex); -+ /* End runpool transaction */ + -+ if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { -+ if (kbase_ctx_flag(kctx, KCTX_DYING)) { -+ /* A job got added while/after kbase_job_zap_context() -+ * was called on a non-scheduled context. Kill that job -+ * by killing the context. -+ */ -+ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, -+ false); -+ } else if (js_kctx_info->ctx.nr_jobs == 1) { -+ /* Handle Refcount going from 0 to 1: schedule the -+ * context on the Queue -+ */ -+ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); -+ dev_dbg(kbdev->dev, "JS: Enqueue Context %pK", kctx); ++ kprops->prop_buffer_size = size; ++ kprops->prop_buffer = kzalloc(size, GFP_KERNEL); + -+ /* Queue was updated - caller must try to schedule the -+ * head context -+ */ -+ WARN_ON(!enqueue_required); -+ } ++ if (!kprops->prop_buffer) { ++ kprops->prop_buffer_size = 0; ++ return -ENOMEM; + } -+out_unlock: -+ dev_dbg(kbdev->dev, "Enqueue of kctx %pK is %srequired\n", -+ kctx, enqueue_required ? "" : "not "); -+ -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + -+ mutex_unlock(&js_devdata->queue_mutex); ++ p = kprops->prop_buffer; + -+ return enqueue_required; -+} ++#define WRITE_U8(v) (*p++ = (v) & 0xFF) ++#define WRITE_U16(v) do { WRITE_U8(v); WRITE_U8((v) >> 8); } while (0) ++#define WRITE_U32(v) do { WRITE_U16(v); WRITE_U16((v) >> 16); } while (0) ++#define WRITE_U64(v) do { WRITE_U32(v); WRITE_U32((v) >> 32); } while (0) + -+void kbasep_js_remove_job(struct kbase_device *kbdev, -+ struct kbase_context *kctx, struct kbase_jd_atom *atom) -+{ -+ struct kbasep_js_kctx_info *js_kctx_info; -+ unsigned long flags; ++ for (i = 0; i < count; i++) { ++ u32 type = gpu_property_mapping[i].type; ++ u8 type_size; ++ const size_t offset = gpu_property_mapping[i].offset; ++ const u64 dummy_backwards_compat_value = (u64)0; ++ const void *field; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(atom != NULL); ++ if (likely(offset < sizeof(struct base_gpu_props))) ++ field = ((const u8 *)props) + offset; ++ else ++ field = &dummy_backwards_compat_value; + -+ js_kctx_info = &kctx->jctx.sched_info; ++ switch (gpu_property_mapping[i].size) { ++ case 1: ++ type_size = KBASE_GPUPROP_VALUE_SIZE_U8; ++ break; ++ case 2: ++ type_size = KBASE_GPUPROP_VALUE_SIZE_U16; ++ break; ++ case 4: ++ type_size = KBASE_GPUPROP_VALUE_SIZE_U32; ++ break; ++ case 8: ++ type_size = KBASE_GPUPROP_VALUE_SIZE_U64; ++ break; ++ default: ++ dev_err(kbdev->dev, ++ "Invalid gpu_property_mapping type=%d size=%d", ++ type, gpu_property_mapping[i].size); ++ return -EINVAL; ++ } + -+ KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc, -+ kbase_ktrace_get_ctx_refcnt(kctx)); ++ WRITE_U32((type<<2) | type_size); + -+ /* De-refcount ctx.nr_jobs */ -+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0); -+ --(js_kctx_info->ctx.nr_jobs); -+ dev_dbg(kbdev->dev, -+ "Remove atom %pK from kctx %pK; now %d in ctx\n", -+ (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); ++ switch (type_size) { ++ case KBASE_GPUPROP_VALUE_SIZE_U8: ++ WRITE_U8(*((const u8 *)field)); ++ break; ++ case KBASE_GPUPROP_VALUE_SIZE_U16: ++ WRITE_U16(*((const u16 *)field)); ++ break; ++ case KBASE_GPUPROP_VALUE_SIZE_U32: ++ WRITE_U32(*((const u32 *)field)); ++ break; ++ case KBASE_GPUPROP_VALUE_SIZE_U64: ++ WRITE_U64(*((const u64 *)field)); ++ break; ++ default: /* Cannot be reached */ ++ WARN_ON(1); ++ return -EINVAL; ++ } ++ } + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ if (--kctx->atoms_count[atom->sched_priority] == 0) -+ kbase_js_update_ctx_priority(kctx); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return 0; +} + -+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, -+ struct kbase_context *kctx, struct kbase_jd_atom *katom) ++void kbase_gpuprops_free_user_buffer(struct kbase_device *kbdev) +{ -+ unsigned long flags; -+ struct kbasep_js_atom_retained_state katom_retained_state; -+ bool attr_state_changed; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(katom != NULL); -+ -+ kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); -+ kbasep_js_remove_job(kbdev, kctx, katom); ++ kfree(kbdev->gpu_props.prop_buffer); ++} + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++int kbase_device_populate_max_freq(struct kbase_device *kbdev) ++{ ++ struct mali_base_gpu_core_props *core_props; + -+ /* The atom has 'finished' (will not be re-run), so no need to call -+ * kbasep_js_has_atom_finished(). -+ * -+ * This is because it returns false for soft-stopped atoms, but we -+ * want to override that, because we're cancelling an atom regardless of -+ * whether it was soft-stopped or not ++ /* obtain max configured gpu frequency, if devfreq is enabled then ++ * this will be overridden by the highest operating point found + */ -+ attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, -+ &katom_retained_state); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ core_props = &(kbdev->gpu_props.props.core_props); ++#ifdef GPU_FREQ_KHZ_MAX ++ core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; ++#else ++ core_props->gpu_freq_khz_max = DEFAULT_GPU_FREQ_KHZ_MAX; ++#endif + -+ return attr_state_changed; ++ return 0; +} -+ -+/** -+ * kbasep_js_run_jobs_after_ctx_and_atom_release - Try running more jobs after -+ * releasing a context and/or atom -+ * @kbdev: The kbase_device to operate on -+ * @kctx: The kbase_context to operate on -+ * @katom_retained_state: Retained state from the atom -+ * @runpool_ctx_attr_change: True if the runpool context attributes have changed +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.h b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.h +new file mode 100644 +index 000000000..f0a97312c +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.h +@@ -0,0 +1,154 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * This collates a set of actions that must happen whilst hwaccess_lock is held. ++ * (C) COPYRIGHT 2011-2015, 2017, 2019-2022 ARM Limited. All rights reserved. + * -+ * This includes running more jobs when: -+ * - The previously released kctx caused a ctx attribute change, -+ * - The released atom caused a ctx attribute change, -+ * - Slots were previously blocked due to affinity restrictions, -+ * - Submission during IRQ handling failed. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were -+ * changed. The caller should try scheduling all contexts + */ -+static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release( -+ struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ struct kbasep_js_atom_retained_state *katom_retained_state, -+ bool runpool_ctx_attr_change) -+{ -+ struct kbasep_js_device_data *js_devdata; -+ kbasep_js_release_result result = 0; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(katom_retained_state != NULL); -+ js_devdata = &kbdev->js_data; ++/** ++ * DOC: Base kernel property query APIs ++ */ + -+ lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); -+ lockdep_assert_held(&js_devdata->runpool_mutex); -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++#ifndef _KBASE_GPUPROPS_H_ ++#define _KBASE_GPUPROPS_H_ + -+ if (js_devdata->nr_user_contexts_running != 0 && runpool_ctx_attr_change) { -+ /* A change in runpool ctx attributes might mean we can -+ * run more jobs than before -+ */ -+ result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL; ++#include "mali_kbase_gpuprops_types.h" + -+ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB, -+ kctx, NULL, 0u, 0); -+ } -+ return result; -+} ++/* Forward definition - see mali_kbase.h */ ++struct kbase_device; + +/** -+ * kbasep_js_runpool_release_ctx_internal - Internal function to release the reference -+ * on a ctx and an atom's "retained state", only -+ * taking the runpool and as transaction mutexes -+ * @kbdev: The kbase_device to operate on -+ * @kctx: The kbase_context to operate on -+ * @katom_retained_state: Retained state from the atom ++ * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield. ++ * @value: The value from which to extract bits. ++ * @offset: The first bit to extract (0 being the LSB). ++ * @size: The number of bits to extract. + * -+ * This also starts more jobs running in the case of an ctx-attribute state change ++ * Context: @offset + @size <= 32. + * -+ * This does none of the followup actions for scheduling: -+ * - It does not schedule in a new context -+ * - It does not requeue or handle dying contexts ++ * Return: Bits [@offset, @offset + @size) from @value. ++ */ ++/* from mali_cdsb.h */ ++#define KBASE_UBFX32(value, offset, size) \ ++ (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1)) ++ ++/** ++ * kbase_gpuprops_set - Set up Kbase GPU properties. ++ * @kbdev: The struct kbase_device structure for the device + * -+ * For those tasks, just call kbasep_js_runpool_release_ctx() instead ++ * Set up Kbase GPU properties with information from the GPU registers ++ */ ++void kbase_gpuprops_set(struct kbase_device *kbdev); ++ ++/** ++ * kbase_gpuprops_set_features - Set up Kbase GPU properties ++ * @kbdev: Device pointer + * -+ * Has following requirements -+ * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr -+ * - Context has a non-zero refcount -+ * - Caller holds js_kctx_info->ctx.jsctx_mutex -+ * - Caller holds js_devdata->runpool_mutex ++ * This function sets up GPU properties that are dependent on the hardware ++ * features bitmask. This function must be preceeded by a call to ++ * kbase_hw_set_features_mask(). + * -+ * Return: A bitpattern, containing KBASEP_JS_RELEASE_RESULT_* flags, indicating -+ * the result of releasing a context that whether the caller should try -+ * scheduling a new context or should try scheduling all contexts. ++ * Return: Zero on success, Linux error code on failure + */ -+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( -+ struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ struct kbasep_js_atom_retained_state *katom_retained_state) -+{ -+ unsigned long flags; -+ struct kbasep_js_device_data *js_devdata; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ -+ kbasep_js_release_result release_result = 0u; -+ bool runpool_ctx_attr_change = false; -+ int kctx_as_nr; -+ int new_ref_count; -+ CSTD_UNUSED(kctx_as_nr); ++int kbase_gpuprops_set_features(struct kbase_device *kbdev); + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ js_kctx_info = &kctx->jctx.sched_info; -+ js_devdata = &kbdev->js_data; ++/** ++ * kbase_gpuprops_update_l2_features - Update GPU property of L2_FEATURES ++ * @kbdev: Device pointer ++ * ++ * This function updates l2_features and the log2 cache size. ++ * The function expects GPU to be powered up and value of pm.active_count ++ * to be 1. ++ * ++ * Return: Zero on success, Linux error code for failure ++ */ ++int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev); + -+ /* Ensure context really is scheduled in */ -+ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++/** ++ * kbase_gpuprops_populate_user_buffer - Populate the GPU properties buffer ++ * @kbdev: The kbase device ++ * ++ * Fills prop_buffer with the GPU properties for user space to read. ++ * ++ * Return: MALI_ERROR_NONE on success. Any other value indicates failure. ++ */ ++int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev); + -+ kctx_as_nr = kctx->as_nr; -+ KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID); -+ KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0); ++/** ++ * kbase_gpuprops_free_user_buffer - Free the GPU properties buffer. ++ * @kbdev: kbase device pointer ++ * ++ * Free the GPU properties buffer allocated from ++ * kbase_gpuprops_populate_user_buffer. ++ */ ++void kbase_gpuprops_free_user_buffer(struct kbase_device *kbdev); + -+ /* -+ * Transaction begins on AS and runpool_irq -+ * -+ * Assert about out calling contract -+ */ -+ mutex_lock(&kbdev->pm.lock); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++/** ++ * kbase_device_populate_max_freq - Populate max gpu frequency. ++ * @kbdev: kbase device pointer ++ * ++ * Populate the maximum gpu frequency to be used when devfreq is disabled. ++ * ++ * Return: 0 on success and non-zero value on failure. ++ */ ++int kbase_device_populate_max_freq(struct kbase_device *kbdev); + -+ KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr); -+ KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0); ++/** ++ * kbase_gpuprops_update_core_props_gpu_id - break down gpu id value ++ * @gpu_props: the &base_gpu_props structure ++ * ++ * Break down gpu_id value stored in base_gpu_props::raw_props.gpu_id into ++ * separate fields (version_status, minor_revision, major_revision, product_id) ++ * stored in base_gpu_props::core_props. ++ */ ++void kbase_gpuprops_update_core_props_gpu_id( ++ struct base_gpu_props * const gpu_props); + -+ /* Update refcount */ -+ kbase_ctx_sched_release_ctx(kctx); -+ new_ref_count = atomic_read(&kctx->refcount); ++/** ++ * kbase_gpuprops_set_max_config - Set the max config information ++ * @kbdev: Device pointer ++ * @max_config: Maximum configuration data to be updated ++ * ++ * This function sets max_config in the kbase_gpu_props. ++ */ ++void kbase_gpuprops_set_max_config(struct kbase_device *kbdev, ++ const struct max_config_props *max_config); + -+ /* Release the atom if it finished (i.e. wasn't soft-stopped) */ -+ if (kbasep_js_has_atom_finished(katom_retained_state)) -+ runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom( -+ kbdev, kctx, katom_retained_state); ++/** ++ * kbase_gpuprops_get_curr_config_props - Get the current allocated resources ++ * @kbdev: The &struct kbase_device structure for the device ++ * @curr_config: The &struct curr_config_props structure to receive the result ++ * ++ * Fill the &struct curr_config_props structure with values from the GPU ++ * configuration registers. ++ * ++ * Return: Zero on success, Linux error code on failure ++ */ ++int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev, ++ struct curr_config_props * const curr_config); + -+ if (new_ref_count == 2 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) && -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ !kbase_pm_is_gpu_lost(kbdev) && -+#endif -+ !kbase_pm_is_suspending(kbdev)) { -+ /* Context is kept scheduled into an address space even when -+ * there are no jobs, in this case we have to handle the -+ * situation where all jobs have been evicted from the GPU and -+ * submission is disabled. -+ * -+ * At this point we re-enable submission to allow further jobs -+ * to be executed -+ */ -+ kbasep_js_set_submit_allowed(js_devdata, kctx); -+ } ++/** ++ * kbase_gpuprops_req_curr_config_update - Request Current Config Update ++ * @kbdev: The &struct kbase_device structure for the device ++ * ++ * Requests the current configuration to be updated next time the ++ * kbase_gpuprops_get_curr_config_props() is called. ++ * ++ * Return: Zero on success, Linux error code on failure ++ */ ++int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev); + -+ /* Make a set of checks to see if the context should be scheduled out. -+ * Note that there'll always be at least 1 reference to the context -+ * which was previously acquired by kbasep_js_schedule_ctx(). -+ */ -+ if (new_ref_count == 1 && -+ (!kbasep_js_is_submit_allowed(js_devdata, kctx) || -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ kbase_pm_is_gpu_lost(kbdev) || -+#endif -+ kbase_pm_is_suspending(kbdev))) { -+ int num_slots = kbdev->gpu_props.num_job_slots; -+ int slot; ++#endif /* _KBASE_GPUPROPS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h +new file mode 100644 +index 000000000..45cb603fa +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h +@@ -0,0 +1,170 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ /* Last reference, and we've been told to remove this context -+ * from the Run Pool -+ */ -+ dev_dbg(kbdev->dev, "JS: RunPool Remove Context %pK because refcount=%d, jobs=%d, allowed=%d", -+ kctx, new_ref_count, js_kctx_info->ctx.nr_jobs, -+ kbasep_js_is_submit_allowed(js_devdata, kctx)); ++/** ++ * DOC: Base kernel property query APIs ++ */ + -+ KBASE_TLSTREAM_TL_NRET_AS_CTX(kbdev, &kbdev->as[kctx->as_nr], kctx); ++#ifndef _KBASE_GPUPROPS_TYPES_H_ ++#define _KBASE_GPUPROPS_TYPES_H_ + -+ kbase_backend_release_ctx_irq(kbdev, kctx); ++#include + -+ for (slot = 0; slot < num_slots; slot++) { -+ if (kbdev->hwaccess.active_kctx[slot] == kctx) { -+ dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n", -+ (void *)kctx, slot); -+ kbdev->hwaccess.active_kctx[slot] = NULL; -+ } -+ } ++#define KBASE_GPU_SPEED_MHZ 123 ++#define KBASE_GPU_PC_SIZE_LOG2 24U + -+ /* Ctx Attribute handling -+ * -+ * Releasing atoms attributes must either happen before this, or -+ * after the KCTX_SHEDULED flag is changed, otherwise we -+ * double-decount the attributes -+ */ -+ runpool_ctx_attr_change |= -+ kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx); ++struct kbase_gpuprops_regdump { ++ u32 gpu_id; ++ u32 l2_features; ++ u32 l2_config; ++ u32 l2_asn_hash[ASN_HASH_COUNT]; ++ u32 core_features; ++ u32 tiler_features; ++ u32 mem_features; ++ u32 mmu_features; ++ u32 as_present; ++ u32 js_present; ++ u32 thread_max_threads; ++ u32 thread_max_workgroup_size; ++ u32 thread_max_barrier_size; ++ u32 thread_features; ++ u32 thread_tls_alloc; ++ u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; ++ u32 js_features[GPU_MAX_JOB_SLOTS]; ++ u32 shader_present_lo; ++ u32 shader_present_hi; ++ u32 tiler_present_lo; ++ u32 tiler_present_hi; ++ u32 l2_present_lo; ++ u32 l2_present_hi; ++ u32 stack_present_lo; ++ u32 stack_present_hi; ++ u32 coherency_features; ++ u32 gpu_features_lo; ++ u32 gpu_features_hi; ++}; + -+ /* Releasing the context and katom retained state can allow -+ * more jobs to run -+ */ -+ release_result |= -+ kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, -+ kctx, katom_retained_state, -+ runpool_ctx_attr_change); ++/** ++ * struct kbase_current_config_regdump - Register dump for current resources ++ * allocated to the GPU. ++ * @mem_features: Memory system features. Contains information about the ++ * features of the memory system. Used here to get the L2 slice ++ * count. ++ * @shader_present_lo: Shader core present bitmap. Low word. ++ * @shader_present_hi: Shader core present bitmap. High word. ++ * @l2_present_lo: L2 cache present bitmap. Low word. ++ * @l2_present_hi: L2 cache present bitmap. High word. ++ * ++ * Register dump structure used to store the resgisters data realated to the ++ * current resources allocated to the GPU. ++ */ ++struct kbase_current_config_regdump { ++ u32 mem_features; ++ u32 shader_present_lo; ++ u32 shader_present_hi; ++ u32 l2_present_lo; ++ u32 l2_present_hi; ++}; + -+ /* -+ * Transaction ends on AS and runpool_irq: -+ * -+ * By this point, the AS-related data is now clear and ready -+ * for re-use. -+ * -+ * Since releases only occur once for each previous successful -+ * retain, and no more retains are allowed on this context, no -+ * other thread will be operating in this -+ * code whilst we are -+ */ ++struct kbase_gpu_cache_props { ++ u8 associativity; ++ u8 external_bus_width; ++}; + -+ /* Recalculate pullable status for all slots */ -+ for (slot = 0; slot < num_slots; slot++) { -+ if (kbase_js_ctx_pullable(kctx, slot, false)) -+ kbase_js_ctx_list_add_pullable_nolock(kbdev, -+ kctx, slot); -+ } ++struct kbase_gpu_mem_props { ++ u8 core_group; ++}; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++struct kbase_gpu_mmu_props { ++ u8 va_bits; ++ u8 pa_bits; ++}; + -+ kbase_backend_release_ctx_noirq(kbdev, kctx); ++/** ++ * struct max_config_props - Properties based on the maximum resources ++ * available. ++ * @l2_slices: Maximum number of L2 slices that can be assinged to the GPU ++ * during runtime. ++ * @padding: Padding to a multiple of 64 bits. ++ * @core_mask: Largest core mask bitmap that can be assigned to the GPU during ++ * runtime. ++ * ++ * Properties based on the maximum resources available (not necessarly ++ * allocated at that moment). Used to provide the maximum configuration to the ++ * userspace allowing the applications to allocate enough resources in case the ++ * real allocated resources change. ++ */ ++struct max_config_props { ++ u8 l2_slices; ++ u8 padding[3]; ++ u32 core_mask; ++}; + -+ mutex_unlock(&kbdev->pm.lock); ++/** ++ * struct curr_config_props - Properties based on the current resources ++ * allocated to the GPU. ++ * @l2_present: Current L2 present bitmap that is allocated to the GPU. ++ * @shader_present: Current shader present bitmap that is allocated to the GPU. ++ * @num_cores: Current number of shader cores allocated to the GPU. ++ * @l2_slices: Current number of L2 slices allocated to the GPU. ++ * @update_needed: Defines if it is necessary to re-read the registers to ++ * update the current allocated resources. ++ * @padding: Padding to a multiple of 64 bits. ++ * ++ * Properties based on the current resource available. Used for operations with ++ * hardware interactions to avoid using userspace data that can be based on ++ * the maximum resource available. ++ */ ++struct curr_config_props { ++ u64 l2_present; ++ u64 shader_present; ++ u16 num_cores; ++ u8 l2_slices; ++ bool update_needed; ++ u8 padding[4]; ++}; + -+ /* Note: Don't reuse kctx_as_nr now */ ++struct kbase_gpu_props { ++ /* kernel-only properties */ ++ u8 num_cores; ++ u8 num_core_groups; ++ u8 num_address_spaces; ++ u8 num_job_slots; + -+ /* Synchronize with any timers */ -+ kbase_backend_ctx_count_changed(kbdev); ++ struct kbase_gpu_cache_props l2_props; + -+ /* update book-keeping info */ -+ kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); -+ /* Signal any waiter that the context is not scheduled, so is -+ * safe for termination - once the jsctx_mutex is also dropped, -+ * and jobs have finished. -+ */ -+ wake_up(&js_kctx_info->ctx.is_scheduled_wait); ++ struct kbase_gpu_mem_props mem; ++ struct kbase_gpu_mmu_props mmu; + -+ /* Queue an action to occur after we've dropped the lock */ -+ release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED | -+ KBASEP_JS_RELEASE_RESULT_SCHED_ALL; -+ } else { -+ kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, -+ katom_retained_state, runpool_ctx_attr_change); ++ /* Properties based on the current resource available */ ++ struct curr_config_props curr_config; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->pm.lock); -+ } ++ /* Properties based on the maximum resource available */ ++ struct max_config_props max_config; + -+ return release_result; -+} ++ /* Properties shared with userspace */ ++ struct base_gpu_props props; + -+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, -+ struct kbase_context *kctx) -+{ -+ struct kbasep_js_atom_retained_state katom_retained_state; ++ u32 prop_buffer_size; ++ void *prop_buffer; ++}; + -+ /* Setup a dummy katom_retained_state */ -+ kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); ++#endif /* _KBASE_GPUPROPS_TYPES_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gwt.c b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c +new file mode 100644 +index 000000000..0eba889e5 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c +@@ -0,0 +1,275 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ kbasep_js_runpool_release_ctx_internal(kbdev, kctx, -+ &katom_retained_state); -+} ++#include "mali_kbase_gwt.h" ++#include + -+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx, bool has_pm_ref) ++static inline void kbase_gpu_gwt_setup_page_permission( ++ struct kbase_context *kctx, ++ unsigned long flag, ++ struct rb_node *node) +{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); ++ struct rb_node *rbnode = node; + -+ /* This is called if and only if you've you've detached the context from -+ * the Runpool Queue, and not added it back to the Runpool -+ */ -+ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++ while (rbnode) { ++ struct kbase_va_region *reg; ++ int err = 0; + -+ if (kbase_ctx_flag(kctx, KCTX_DYING)) { -+ /* Dying: don't requeue, but kill all jobs on the context. This -+ * happens asynchronously -+ */ -+ dev_dbg(kbdev->dev, -+ "JS: ** Killing Context %pK on RunPool Remove **", kctx); -+ kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel); ++ reg = rb_entry(rbnode, struct kbase_va_region, rblink); ++ if (reg->nr_pages && !kbase_is_region_invalid_or_free(reg) && ++ (reg->flags & KBASE_REG_GPU_WR)) { ++ err = kbase_mmu_update_pages(kctx, reg->start_pfn, ++ kbase_get_gpu_phy_pages(reg), ++ reg->gpu_alloc->nents, ++ reg->flags & flag, ++ reg->gpu_alloc->group_id); ++ if (err) ++ dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages failure\n"); ++ } ++ ++ rbnode = rb_next(rbnode); + } +} + -+void kbasep_js_runpool_release_ctx_and_katom_retained_state( -+ struct kbase_device *kbdev, struct kbase_context *kctx, -+ struct kbasep_js_atom_retained_state *katom_retained_state) ++static void kbase_gpu_gwt_setup_pages(struct kbase_context *kctx, ++ unsigned long flag) +{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ kbasep_js_release_result release_result; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ js_kctx_info = &kctx->jctx.sched_info; -+ js_devdata = &kbdev->js_data; -+ -+ mutex_lock(&js_devdata->queue_mutex); -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_lock(&js_devdata->runpool_mutex); -+ -+ release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, -+ katom_retained_state); -+ -+ /* Drop the runpool mutex to allow requeing kctx */ -+ mutex_unlock(&js_devdata->runpool_mutex); ++ kbase_gpu_gwt_setup_page_permission(kctx, flag, ++ rb_first(&(kctx->reg_rbtree_same))); ++ kbase_gpu_gwt_setup_page_permission(kctx, flag, ++ rb_first(&(kctx->reg_rbtree_custom))); ++} + -+ if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) -+ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true); + -+ /* Drop the jsctx_mutex to allow scheduling in a new context */ ++int kbase_gpu_gwt_start(struct kbase_context *kctx) ++{ ++ kbase_gpu_vm_lock(kctx); ++ if (kctx->gwt_enabled) { ++ kbase_gpu_vm_unlock(kctx); ++ return -EBUSY; ++ } + -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_unlock(&js_devdata->queue_mutex); ++ INIT_LIST_HEAD(&kctx->gwt_current_list); ++ INIT_LIST_HEAD(&kctx->gwt_snapshot_list); + -+ if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL) -+ kbase_js_sched_all(kbdev); -+} ++#if !MALI_USE_CSF ++ /* If GWT is enabled using new vector dumping format ++ * from user space, back up status of the job serialization flag and ++ * use full serialisation of jobs for dumping. ++ * Status will be restored on end of dumping in gwt_stop. ++ */ ++ kctx->kbdev->backup_serialize_jobs = kctx->kbdev->serialize_jobs; ++ kctx->kbdev->serialize_jobs = KBASE_SERIALIZE_INTRA_SLOT | ++ KBASE_SERIALIZE_INTER_SLOT; + -+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx) -+{ -+ struct kbasep_js_atom_retained_state katom_retained_state; ++#endif ++ /* Mark gwt enabled before making pages read only in case a ++ * write page fault is triggered while we're still in this loop. ++ * (kbase_gpu_vm_lock() doesn't prevent this!) ++ */ ++ kctx->gwt_enabled = true; ++ kctx->gwt_was_enabled = true; + -+ kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); ++ kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR); + -+ kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, -+ &katom_retained_state); ++ kbase_gpu_vm_unlock(kctx); ++ return 0; +} + -+/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into -+ * kbase_js_sched_all() -+ */ -+static void kbasep_js_runpool_release_ctx_no_schedule( -+ struct kbase_device *kbdev, struct kbase_context *kctx) ++int kbase_gpu_gwt_stop(struct kbase_context *kctx) +{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ kbasep_js_release_result release_result; -+ struct kbasep_js_atom_retained_state katom_retained_state_struct; -+ struct kbasep_js_atom_retained_state *katom_retained_state = -+ &katom_retained_state_struct; ++ struct kbasep_gwt_list_element *pos, *n; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ js_kctx_info = &kctx->jctx.sched_info; -+ js_devdata = &kbdev->js_data; -+ kbasep_js_atom_retained_state_init_invalid(katom_retained_state); ++ kbase_gpu_vm_lock(kctx); ++ if (!kctx->gwt_enabled) { ++ kbase_gpu_vm_unlock(kctx); ++ return -EINVAL; ++ } + -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_lock(&js_devdata->runpool_mutex); ++ list_for_each_entry_safe(pos, n, &kctx->gwt_current_list, link) { ++ list_del(&pos->link); ++ kfree(pos); ++ } + -+ release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, -+ katom_retained_state); ++ list_for_each_entry_safe(pos, n, &kctx->gwt_snapshot_list, link) { ++ list_del(&pos->link); ++ kfree(pos); ++ } + -+ /* Drop the runpool mutex to allow requeing kctx */ -+ mutex_unlock(&js_devdata->runpool_mutex); -+ if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) -+ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true); ++#if !MALI_USE_CSF ++ kctx->kbdev->serialize_jobs = kctx->kbdev->backup_serialize_jobs; ++#endif + -+ /* Drop the jsctx_mutex to allow scheduling in a new context */ -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ kbase_gpu_gwt_setup_pages(kctx, ~0UL); + -+ /* NOTE: could return release_result if the caller would like to know -+ * whether it should schedule a new context, but currently no callers do -+ */ ++ kctx->gwt_enabled = false; ++ kbase_gpu_vm_unlock(kctx); ++ return 0; +} + -+void kbase_js_set_timeouts(struct kbase_device *kbdev) ++#if (KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE) ++static int list_cmp_function(void *priv, const struct list_head *a, const struct list_head *b) ++#else ++static int list_cmp_function(void *priv, struct list_head *a, ++ struct list_head *b) ++#endif +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ const struct kbasep_gwt_list_element *elementA = ++ container_of(a, struct kbasep_gwt_list_element, link); ++ const struct kbasep_gwt_list_element *elementB = ++ container_of(b, struct kbasep_gwt_list_element, link); + -+ kbase_backend_timeouts_changed(kbdev); ++ CSTD_UNUSED(priv); ++ ++ if (elementA->page_addr > elementB->page_addr) ++ return 1; ++ return -1; +} + -+static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, -+ unsigned int js) ++static void kbase_gpu_gwt_collate(struct kbase_context *kctx, ++ struct list_head *snapshot_list) +{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ unsigned long flags; -+ bool kctx_suspended = false; -+ int as_nr; -+ -+ dev_dbg(kbdev->dev, "Scheduling kctx %pK (s:%u)\n", kctx, js); ++ struct kbasep_gwt_list_element *pos, *n; ++ struct kbasep_gwt_list_element *collated = NULL; + -+ js_devdata = &kbdev->js_data; -+ js_kctx_info = &kctx->jctx.sched_info; ++ /* Sort the list */ ++ list_sort(NULL, snapshot_list, list_cmp_function); + -+ /* Pick available address space for this context */ -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ as_nr = kbase_ctx_sched_retain_ctx(kctx); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ if (as_nr == KBASEP_AS_NR_INVALID) { -+ as_nr = kbase_backend_find_and_release_free_address_space( -+ kbdev, kctx); -+ if (as_nr != KBASEP_AS_NR_INVALID) { -+ /* Attempt to retain the context again, this should -+ * succeed ++ /* Combine contiguous areas. */ ++ list_for_each_entry_safe(pos, n, snapshot_list, link) { ++ if (collated == NULL || collated->region != ++ pos->region || ++ (collated->page_addr + ++ (collated->num_pages * PAGE_SIZE)) != ++ pos->page_addr) { ++ /* This is the first time through, a new region or ++ * is not contiguous - start collating to this element + */ -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ as_nr = kbase_ctx_sched_retain_ctx(kctx); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ -+ WARN_ON(as_nr == KBASEP_AS_NR_INVALID); ++ collated = pos; ++ } else { ++ /* contiguous so merge */ ++ collated->num_pages += pos->num_pages; ++ /* remove element from list */ ++ list_del(&pos->link); ++ kfree(pos); + } + } -+ if ((as_nr < 0) || (as_nr >= BASE_MAX_NR_AS)) -+ return false; /* No address space currently available */ -+ -+ /* -+ * Atomic transaction on the Context and Run Pool begins -+ */ -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_lock(&js_devdata->runpool_mutex); -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++} + -+ /* Check to see if context is dying due to kbase_job_zap_context() */ -+ if (kbase_ctx_flag(kctx, KCTX_DYING)) { -+ /* Roll back the transaction so far and return */ -+ kbase_ctx_sched_release_ctx(kctx); ++int kbase_gpu_gwt_dump(struct kbase_context *kctx, ++ union kbase_ioctl_cinstr_gwt_dump *gwt_dump) ++{ ++ const u32 ubuf_size = gwt_dump->in.len; ++ u32 ubuf_count = 0; ++ __user void *user_addr = (__user void *) ++ (uintptr_t)gwt_dump->in.addr_buffer; ++ __user void *user_sizes = (__user void *) ++ (uintptr_t)gwt_dump->in.size_buffer; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ mutex_unlock(&js_devdata->runpool_mutex); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ kbase_gpu_vm_lock(kctx); + -+ return false; ++ if (!kctx->gwt_enabled) { ++ kbase_gpu_vm_unlock(kctx); ++ /* gwt_dump shouldn't be called when gwt is disabled */ ++ return -EPERM; + } + -+ KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL, -+ 0u, -+ kbase_ktrace_get_ctx_refcnt(kctx)); -+ -+ kbase_ctx_flag_set(kctx, KCTX_SCHEDULED); -+ -+ /* Assign context to previously chosen address space */ -+ if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) { -+ /* Roll back the transaction so far and return */ -+ kbase_ctx_sched_release_ctx(kctx); -+ kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ mutex_unlock(&js_devdata->runpool_mutex); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ -+ return false; ++ if (!gwt_dump->in.len || !gwt_dump->in.addr_buffer ++ || !gwt_dump->in.size_buffer) { ++ kbase_gpu_vm_unlock(kctx); ++ /* We don't have any valid user space buffer to copy the ++ * write modified addresses. ++ */ ++ return -EINVAL; + } + -+ kbdev->hwaccess.active_kctx[js] = kctx; -+ -+ KBASE_TLSTREAM_TL_RET_AS_CTX(kbdev, &kbdev->as[kctx->as_nr], kctx); -+ -+ /* Cause any future waiter-on-termination to wait until the context is -+ * descheduled -+ */ -+ wake_up(&js_kctx_info->ctx.is_scheduled_wait); ++ if (list_empty(&kctx->gwt_snapshot_list) && ++ !list_empty(&kctx->gwt_current_list)) { + -+ /* Re-check for suspending: a suspend could've occurred, and all the -+ * contexts could've been removed from the runpool before we took this -+ * lock. In this case, we don't want to allow this context to run jobs, -+ * we just want it out immediately. -+ * -+ * The DMB required to read the suspend flag was issued recently as part -+ * of the hwaccess_lock locking. If a suspend occurs *after* that lock -+ * was taken (i.e. this condition doesn't execute), then the -+ * kbasep_js_suspend() code will cleanup this context instead (by virtue -+ * of it being called strictly after the suspend flag is set, and will -+ * wait for this lock to drop) -+ */ -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) { -+#else -+ if (kbase_pm_is_suspending(kbdev)) { -+#endif -+ /* Cause it to leave at some later point */ -+ bool retained; -+ CSTD_UNUSED(retained); ++ list_replace_init(&kctx->gwt_current_list, ++ &kctx->gwt_snapshot_list); + -+ retained = kbase_ctx_sched_inc_refcount_nolock(kctx); -+ KBASE_DEBUG_ASSERT(retained); ++ /* We have collected all write faults so far ++ * and they will be passed on to user space. ++ * Reset the page flags state to allow collection of ++ * further write faults. ++ */ ++ kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR); + -+ kbasep_js_clear_submit_allowed(js_devdata, kctx); -+ kctx_suspended = true; ++ /* Sort and combine consecutive pages in the dump list*/ ++ kbase_gpu_gwt_collate(kctx, &kctx->gwt_snapshot_list); + } + -+ kbase_ctx_flag_clear(kctx, KCTX_PULLED_SINCE_ACTIVE_JS0 << js); ++ while ((!list_empty(&kctx->gwt_snapshot_list))) { ++ u64 addr_buffer[32]; ++ u64 num_page_buffer[32]; ++ u32 count = 0; ++ int err; ++ struct kbasep_gwt_list_element *dump_info, *n; + -+ /* Transaction complete */ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); ++ list_for_each_entry_safe(dump_info, n, ++ &kctx->gwt_snapshot_list, link) { ++ addr_buffer[count] = dump_info->page_addr; ++ num_page_buffer[count] = dump_info->num_pages; ++ count++; ++ list_del(&dump_info->link); ++ kfree(dump_info); ++ if (ARRAY_SIZE(addr_buffer) == count || ++ ubuf_size == (ubuf_count + count)) ++ break; ++ } + -+ /* Synchronize with any timers */ -+ kbase_backend_ctx_count_changed(kbdev); ++ if (count) { ++ err = copy_to_user((user_addr + ++ (ubuf_count * sizeof(u64))), ++ (void *)addr_buffer, ++ count * sizeof(u64)); ++ if (err) { ++ dev_err(kctx->kbdev->dev, "Copy to user failure\n"); ++ kbase_gpu_vm_unlock(kctx); ++ return err; ++ } ++ err = copy_to_user((user_sizes + ++ (ubuf_count * sizeof(u64))), ++ (void *)num_page_buffer, ++ count * sizeof(u64)); ++ if (err) { ++ dev_err(kctx->kbdev->dev, "Copy to user failure\n"); ++ kbase_gpu_vm_unlock(kctx); ++ return err; ++ } + -+ mutex_unlock(&js_devdata->runpool_mutex); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ /* Note: after this point, the context could potentially get scheduled -+ * out immediately -+ */ ++ ubuf_count += count; ++ } + -+ if (kctx_suspended) { -+ /* Finishing forcing out the context due to a suspend. Use a -+ * variant of kbasep_js_runpool_release_ctx() that doesn't -+ * schedule a new context, to prevent a risk of recursion back -+ * into this function -+ */ -+ kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx); -+ return false; ++ if (ubuf_count == ubuf_size) ++ break; + } -+ return true; -+} -+ -+static bool kbase_js_use_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, -+ unsigned int js) -+{ -+ unsigned long flags; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ -+ if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && -+ kbase_backend_use_ctx_sched(kbdev, kctx, js)) { -+ dev_dbg(kbdev->dev, "kctx %pK already has ASID - mark as active (s:%u)\n", -+ (void *)kctx, js); -+ -+ if (kbdev->hwaccess.active_kctx[js] != kctx) { -+ kbdev->hwaccess.active_kctx[js] = kctx; -+ kbase_ctx_flag_clear(kctx, -+ KCTX_PULLED_SINCE_ACTIVE_JS0 << js); -+ } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ return true; /* Context already scheduled */ -+ } ++ if (!list_empty(&kctx->gwt_snapshot_list)) ++ gwt_dump->out.more_data_available = 1; ++ else ++ gwt_dump->out.more_data_available = 0; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ return kbasep_js_schedule_ctx(kbdev, kctx, js); ++ gwt_dump->out.no_of_addr_collected = ubuf_count; ++ kbase_gpu_vm_unlock(kctx); ++ return 0; +} +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gwt.h b/drivers/gpu/arm/bifrost/mali_kbase_gwt.h +new file mode 100644 +index 000000000..9fdd68d62 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_gwt.h +@@ -0,0 +1,54 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2010-2017, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx) -+{ -+ struct kbasep_js_kctx_info *js_kctx_info; -+ struct kbasep_js_device_data *js_devdata; -+ bool is_scheduled; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ -+ js_devdata = &kbdev->js_data; -+ js_kctx_info = &kctx->jctx.sched_info; ++#if !defined(_KBASE_GWT_H) ++#define _KBASE_GWT_H + -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ /* This should only happen in response to a system call -+ * from a user-space thread. -+ * In a non-arbitrated environment this can never happen -+ * whilst suspending. -+ * -+ * In an arbitrated environment, user-space threads can run -+ * while we are suspended (for example GPU not available -+ * to this VM), however in that case we will block on -+ * the wait event for KCTX_SCHEDULED, since no context -+ * can be scheduled until we have the GPU again. -+ */ -+ if (kbdev->arb.arb_if == NULL) -+ if (WARN_ON(kbase_pm_is_suspending(kbdev))) -+ return; -+#else -+ /* This should only happen in response to a system call -+ * from a user-space thread. -+ * In a non-arbitrated environment this can never happen -+ * whilst suspending. -+ */ -+ if (WARN_ON(kbase_pm_is_suspending(kbdev))) -+ return; -+#endif ++#include ++#include + -+ mutex_lock(&js_devdata->queue_mutex); -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++/** ++ * kbase_gpu_gwt_start - Start the GPU write tracking ++ * @kctx: Pointer to kernel context ++ * ++ * Return: 0 on success, error on failure. ++ */ ++int kbase_gpu_gwt_start(struct kbase_context *kctx); + -+ /* Mark the context as privileged */ -+ kbase_ctx_flag_set(kctx, KCTX_PRIVILEGED); ++/** ++ * kbase_gpu_gwt_stop - Stop the GPU write tracking ++ * @kctx: Pointer to kernel context ++ * ++ * Return: 0 on success, error on failure. ++ */ ++int kbase_gpu_gwt_stop(struct kbase_context *kctx); + -+ is_scheduled = kbase_ctx_flag(kctx, KCTX_SCHEDULED); -+ if (!is_scheduled) { -+ /* Add the context to the pullable list */ -+ if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0)) -+ kbase_js_sync_timers(kbdev); ++/** ++ * kbase_gpu_gwt_dump - Pass page address of faulting addresses to user space. ++ * @kctx: Pointer to kernel context ++ * @gwt_dump: User space data to be passed. ++ * ++ * Return: 0 on success, error on failure. ++ */ ++int kbase_gpu_gwt_dump(struct kbase_context *kctx, ++ union kbase_ioctl_cinstr_gwt_dump *gwt_dump); + -+ /* Fast-starting requires the jsctx_mutex to be dropped, -+ * because it works on multiple ctxs -+ */ -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_unlock(&js_devdata->queue_mutex); ++#endif /* _KBASE_GWT_H */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.c b/drivers/gpu/arm/bifrost/mali_kbase_hw.c +new file mode 100644 +index 000000000..b07327a55 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.c +@@ -0,0 +1,437 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ /* Try to schedule the context in */ -+ kbase_js_sched_all(kbdev); ++/* ++ * Run-time work-arounds helpers ++ */ + -+ /* Wait for the context to be scheduled in */ -+ wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, -+ kbase_ctx_flag(kctx, KCTX_SCHEDULED)); -+ } else { -+ /* Already scheduled in - We need to retain it to keep the -+ * corresponding address space -+ */ -+ WARN_ON(!kbase_ctx_sched_inc_refcount(kctx)); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_unlock(&js_devdata->queue_mutex); -+ } -+} -+KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx); ++#include ++#include ++#include "gpu/mali_kbase_gpu_regmap.h" ++#include "mali_kbase.h" ++#include "mali_kbase_hw.h" + -+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx) ++void kbase_hw_set_features_mask(struct kbase_device *kbdev) +{ -+ struct kbasep_js_kctx_info *js_kctx_info; ++ const enum base_hw_feature *features; ++ u32 gpu_id; + -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ js_kctx_info = &kctx->jctx.sched_info; ++ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + -+ /* We don't need to use the address space anymore */ -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ kbase_ctx_flag_clear(kctx, KCTX_PRIVILEGED); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { ++ case GPU_ID2_PRODUCT_TMIX: ++ features = base_hw_features_tMIx; ++ break; ++ case GPU_ID2_PRODUCT_THEX: ++ features = base_hw_features_tHEx; ++ break; ++ case GPU_ID2_PRODUCT_TSIX: ++ features = base_hw_features_tSIx; ++ break; ++ case GPU_ID2_PRODUCT_TDVX: ++ features = base_hw_features_tDVx; ++ break; ++ case GPU_ID2_PRODUCT_TNOX: ++ features = base_hw_features_tNOx; ++ break; ++ case GPU_ID2_PRODUCT_TGOX: ++ features = base_hw_features_tGOx; ++ break; ++ case GPU_ID2_PRODUCT_TTRX: ++ features = base_hw_features_tTRx; ++ break; ++ case GPU_ID2_PRODUCT_TNAX: ++ features = base_hw_features_tNAx; ++ break; ++ case GPU_ID2_PRODUCT_LBEX: ++ case GPU_ID2_PRODUCT_TBEX: ++ features = base_hw_features_tBEx; ++ break; ++ case GPU_ID2_PRODUCT_TBAX: ++ features = base_hw_features_tBAx; ++ break; ++ case GPU_ID2_PRODUCT_TODX: ++ case GPU_ID2_PRODUCT_LODX: ++ features = base_hw_features_tODx; ++ break; ++ case GPU_ID2_PRODUCT_TGRX: ++ features = base_hw_features_tGRx; ++ break; ++ case GPU_ID2_PRODUCT_TVAX: ++ features = base_hw_features_tVAx; ++ break; ++ case GPU_ID2_PRODUCT_TTUX: ++ case GPU_ID2_PRODUCT_LTUX: ++ features = base_hw_features_tTUx; ++ break; ++ case GPU_ID2_PRODUCT_TTIX: ++ case GPU_ID2_PRODUCT_LTIX: ++ features = base_hw_features_tTIx; ++ break; ++ default: ++ features = base_hw_features_generic; ++ break; ++ } + -+ /* Release the context - it will be scheduled out */ -+ kbasep_js_runpool_release_ctx(kbdev, kctx); ++ for (; *features != BASE_HW_FEATURE_END; features++) ++ set_bit(*features, &kbdev->hw_features_mask[0]); + -+ kbase_js_sched_all(kbdev); ++#if defined(CONFIG_MALI_VECTOR_DUMP) ++ /* When dumping is enabled, need to disable flush reduction optimization ++ * for GPUs on which it is safe to have only cache clean operation at ++ * the end of job chain. ++ * This is required to make vector dump work. There is some discrepancy ++ * in the implementation of flush reduction optimization due to ++ * unclear or ambiguous ARCH spec. ++ */ ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE)) ++ clear_bit(BASE_HW_FEATURE_FLUSH_REDUCTION, ++ &kbdev->hw_features_mask[0]); ++#endif +} -+KBASE_EXPORT_TEST_API(kbasep_js_release_privileged_ctx); + -+void kbasep_js_suspend(struct kbase_device *kbdev) ++/** ++ * kbase_hw_get_issues_for_new_id - Get the hardware issues for a new GPU ID ++ * @kbdev: Device pointer ++ * ++ * Return: pointer to an array of hardware issues, terminated by ++ * BASE_HW_ISSUE_END. ++ * ++ * In debugging versions of the driver, unknown versions of a known GPU will ++ * be treated as the most recent known version not later than the actual ++ * version. In such circumstances, the GPU ID in @kbdev will also be replaced ++ * with the most recent known version. ++ * ++ * Note: The GPU configuration must have been read by kbase_gpuprops_get_props() ++ * before calling this function. ++ */ ++static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( ++ struct kbase_device *kbdev) +{ -+ unsigned long flags; -+ struct kbasep_js_device_data *js_devdata; -+ int i; -+ u16 retained = 0u; ++ const enum base_hw_issue *issues = NULL; + -+ KBASE_DEBUG_ASSERT(kbdev); -+ KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev)); -+ js_devdata = &kbdev->js_data; ++ struct base_hw_product { ++ u32 product_model; ++ struct { ++ u32 version; ++ const enum base_hw_issue *issues; ++ } map[7]; ++ }; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ static const struct base_hw_product base_hw_products[] = { ++ { GPU_ID2_PRODUCT_TMIX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tMIx_r0p0_05dev0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1 }, ++ { U32_MAX /* sentinel value */, NULL } } }, + -+ /* Prevent all contexts from submitting */ -+ js_devdata->runpool_irq.submit_allowed = 0; ++ { GPU_ID2_PRODUCT_THEX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1 }, ++ { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1 }, ++ { GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2 }, ++ { GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3 }, ++ { U32_MAX, NULL } } }, + -+ /* Retain each of the contexts, so we can cause it to leave even if it -+ * had no refcount to begin with -+ */ -+ for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) { -+ struct kbase_context *kctx = kbdev->as_to_kctx[i]; ++ { GPU_ID2_PRODUCT_TSIX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1 }, ++ { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0 }, ++ { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1 }, ++ { U32_MAX, NULL } } }, + -+ retained = retained << 1; ++ { GPU_ID2_PRODUCT_TDVX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0 }, ++ { U32_MAX, NULL } } }, + -+ if (kctx && !(kbdev->as_free & (1u << i))) { -+ kbase_ctx_sched_retain_ctx_refcount(kctx); -+ retained |= 1u; -+ /* This loop will not have an effect on the privileged -+ * contexts as they would have an extra ref count -+ * compared to the normal contexts, so they will hold -+ * on to their address spaces. MMU will re-enabled for -+ * them on resume. -+ */ -+ } -+ } ++ { GPU_ID2_PRODUCT_TNOX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0 }, ++ { U32_MAX, NULL } } }, + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ { GPU_ID2_PRODUCT_TGOX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0 }, ++ { U32_MAX, NULL } } }, + -+ /* De-ref the previous retain to ensure each context gets pulled out -+ * sometime later. -+ */ -+ for (i = 0; -+ i < BASE_MAX_NR_AS; -+ ++i, retained = retained >> 1) { -+ struct kbase_context *kctx = kbdev->as_to_kctx[i]; ++ { GPU_ID2_PRODUCT_TTRX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1 }, ++ { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1 }, ++ { GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2 }, ++ { U32_MAX, NULL } } }, + -+ if (retained & 1u) -+ kbasep_js_runpool_release_ctx(kbdev, kctx); -+ } ++ { GPU_ID2_PRODUCT_TNAX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1 }, ++ { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1 }, ++ { U32_MAX, NULL } } }, + -+ /* Caller must wait for all Power Manager active references to be -+ * dropped -+ */ -+} ++ { GPU_ID2_PRODUCT_LBEX, ++ { { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_lBEx_r1p0 }, ++ { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_lBEx_r1p1 }, ++ { U32_MAX, NULL } } }, + -+void kbasep_js_resume(struct kbase_device *kbdev) -+{ -+ struct kbasep_js_device_data *js_devdata; -+ int js, prio; ++ { GPU_ID2_PRODUCT_TBEX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p1 }, ++ { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0 }, ++ { U32_MAX, NULL } } }, + -+ KBASE_DEBUG_ASSERT(kbdev); -+ js_devdata = &kbdev->js_data; -+ KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); ++ { GPU_ID2_PRODUCT_TBAX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBAx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tBAx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tBAx_r0p0 }, ++ { U32_MAX, NULL } } }, + -+ mutex_lock(&js_devdata->queue_mutex); -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; -+ prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { -+ struct kbase_context *kctx, *n; -+ unsigned long flags; ++ { GPU_ID2_PRODUCT_TODX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tODx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tODx_r0p0 }, ++ { U32_MAX, NULL } } }, + -+#ifndef CONFIG_MALI_ARBITER_SUPPORT -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ { GPU_ID2_PRODUCT_LODX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 }, ++ { U32_MAX, NULL } } }, + -+ list_for_each_entry_safe(kctx, n, -+ &kbdev->js_data.ctx_list_unpullable[js][prio], -+ jctx.sched_info.ctx.ctx_list_entry[js]) { -+ struct kbasep_js_kctx_info *js_kctx_info; -+ bool timer_sync = false; ++ { GPU_ID2_PRODUCT_TGRX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGRx_r0p0 }, ++ { U32_MAX, NULL } } }, + -+ /* Drop lock so we can take kctx mutexes */ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, -+ flags); ++ { GPU_ID2_PRODUCT_TVAX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0 }, ++ { U32_MAX, NULL } } }, + -+ js_kctx_info = &kctx->jctx.sched_info; ++ { GPU_ID2_PRODUCT_TTUX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTUx_r0p1 }, ++ { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 }, ++ { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 }, ++ { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 }, ++ { GPU_ID2_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 }, ++ { U32_MAX, NULL } } }, + -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_lock(&js_devdata->runpool_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ { GPU_ID2_PRODUCT_LTUX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 }, ++ { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 }, ++ { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 }, ++ { GPU_ID2_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 }, ++ { U32_MAX, NULL } } }, + -+ if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && -+ kbase_js_ctx_pullable(kctx, js, false)) -+ timer_sync = -+ kbase_js_ctx_list_add_pullable_nolock( -+ kbdev, kctx, js); ++ { GPU_ID2_PRODUCT_TTIX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTIx_r0p0 }, ++ { U32_MAX, NULL } } }, + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, -+ flags); ++ { GPU_ID2_PRODUCT_LTIX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTIx_r0p0 }, ++ { U32_MAX, NULL } } }, + -+ if (timer_sync) -+ kbase_backend_ctx_count_changed(kbdev); ++ }; + -+ mutex_unlock(&js_devdata->runpool_mutex); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; ++ const u32 product_model = gpu_id & GPU_ID2_PRODUCT_MODEL; ++ const struct base_hw_product *product = NULL; ++ size_t p; + -+ /* Take lock before accessing list again */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+#else -+ bool timer_sync = false; ++ /* Stop when we reach the end of the products array. */ ++ for (p = 0; p < ARRAY_SIZE(base_hw_products); ++p) { ++ if (product_model == base_hw_products[p].product_model) { ++ product = &base_hw_products[p]; ++ break; ++ } ++ } + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (product != NULL) { ++ /* Found a matching product. */ ++ const u32 version = gpu_id & GPU_ID2_VERSION; ++ u32 fallback_version = 0; ++ const enum base_hw_issue *fallback_issues = NULL; ++ size_t v; + -+ list_for_each_entry_safe(kctx, n, -+ &kbdev->js_data.ctx_list_unpullable[js][prio], -+ jctx.sched_info.ctx.ctx_list_entry[js]) { ++ /* Stop when we reach the end of the map. */ ++ for (v = 0; product->map[v].version != U32_MAX; ++v) { + -+ if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && -+ kbase_js_ctx_pullable(kctx, js, false)) -+ timer_sync |= -+ kbase_js_ctx_list_add_pullable_nolock( -+ kbdev, kctx, js); ++ if (version == product->map[v].version) { ++ /* Exact match so stop. */ ++ issues = product->map[v].issues; ++ break; + } + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ if (timer_sync) { -+ mutex_lock(&js_devdata->runpool_mutex); -+ kbase_backend_ctx_count_changed(kbdev); -+ mutex_unlock(&js_devdata->runpool_mutex); -+ } ++ /* Check whether this is a candidate for most recent ++ * known version not later than the actual version. ++ */ ++ if ((version > product->map[v].version) && ++ (product->map[v].version >= fallback_version)) { ++#if MALI_CUSTOMER_RELEASE ++ /* Match on version's major and minor fields */ ++ if (((version ^ product->map[v].version) >> ++ GPU_ID2_VERSION_MINOR_SHIFT) == 0) +#endif ++ { ++ fallback_version = product->map[v].version; ++ fallback_issues = product->map[v].issues; ++ } ++ } + } -+ } -+ mutex_unlock(&js_devdata->queue_mutex); -+ -+ /* Restart atom processing */ -+ kbase_js_sched_all(kbdev); -+ -+ /* JS Resume complete */ -+} -+ -+bool kbase_js_is_atom_valid(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom) -+{ -+ if ((katom->core_req & BASE_JD_REQ_FS) && -+ (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | -+ BASE_JD_REQ_T))) -+ return false; -+ -+ if ((katom->core_req & BASE_JD_REQ_JOB_SLOT) && -+ (katom->jobslot >= BASE_JM_MAX_NR_SLOTS)) -+ return false; + -+ return true; -+} ++ if ((issues == NULL) && (fallback_issues != NULL)) { ++ /* Fall back to the issue set of the most recent known ++ * version not later than the actual version. ++ */ ++ issues = fallback_issues; + -+static unsigned int kbase_js_get_slot(struct kbase_device *kbdev, struct kbase_jd_atom *katom) -+{ -+ if (katom->core_req & BASE_JD_REQ_JOB_SLOT) -+ return katom->jobslot; ++ dev_notice(kbdev->dev, "r%dp%d status %d not found in HW issues table;\n", ++ (gpu_id & GPU_ID2_VERSION_MAJOR) >> GPU_ID2_VERSION_MAJOR_SHIFT, ++ (gpu_id & GPU_ID2_VERSION_MINOR) >> GPU_ID2_VERSION_MINOR_SHIFT, ++ (gpu_id & GPU_ID2_VERSION_STATUS) >> ++ GPU_ID2_VERSION_STATUS_SHIFT); ++ dev_notice(kbdev->dev, "falling back to closest match: r%dp%d status %d\n", ++ (fallback_version & GPU_ID2_VERSION_MAJOR) >> ++ GPU_ID2_VERSION_MAJOR_SHIFT, ++ (fallback_version & GPU_ID2_VERSION_MINOR) >> ++ GPU_ID2_VERSION_MINOR_SHIFT, ++ (fallback_version & GPU_ID2_VERSION_STATUS) >> ++ GPU_ID2_VERSION_STATUS_SHIFT); ++ dev_notice(kbdev->dev, ++ "Execution proceeding normally with fallback match\n"); + -+ if (katom->core_req & BASE_JD_REQ_FS) -+ return 0; ++ gpu_id &= ~GPU_ID2_VERSION; ++ gpu_id |= fallback_version; ++ kbdev->gpu_props.props.raw_props.gpu_id = gpu_id; + -+ if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { -+ if (katom->device_nr == 1 && -+ kbdev->gpu_props.num_core_groups == 2) -+ return 2; ++ kbase_gpuprops_update_core_props_gpu_id( ++ &kbdev->gpu_props.props); ++ } + } -+ -+ return 1; ++ return issues; +} + -+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, -+ struct kbase_jd_atom *katom) ++int kbase_hw_set_issues_mask(struct kbase_device *kbdev) +{ -+ bool enqueue_required, add_required = true; -+ -+ katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom); -+ -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ lockdep_assert_held(&kctx->jctx.lock); -+ -+ /* If slot will transition from unpullable to pullable then add to -+ * pullable list -+ */ -+ if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) -+ enqueue_required = true; -+ else -+ enqueue_required = false; -+ -+ if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) || -+ (katom->pre_dep && (katom->pre_dep->atom_flags & -+ KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { -+ int prio = katom->sched_priority; -+ unsigned int js = katom->slot_nr; -+ struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; ++ const enum base_hw_issue *issues; ++ u32 gpu_id; ++ u32 impl_tech; + -+ dev_dbg(kctx->kbdev->dev, "Add atom %pK to X_DEP list (s:%u)\n", (void *)katom, js); ++ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; ++ impl_tech = kbdev->gpu_props.props.thread_props.impl_tech; + -+ list_add_tail(&katom->queue, &queue->x_dep_head); -+ katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; -+ if (kbase_js_atom_blocked_on_x_dep(katom)) { -+ enqueue_required = false; -+ add_required = false; ++ if (impl_tech != IMPLEMENTATION_MODEL) { ++ issues = kbase_hw_get_issues_for_new_id(kbdev); ++ if (issues == NULL) { ++ dev_err(kbdev->dev, ++ "HW product - Unknown GPU ID %x", gpu_id); ++ return -EINVAL; + } -+ } else { -+ dev_dbg(kctx->kbdev->dev, "Atom %pK not added to X_DEP list\n", -+ (void *)katom); -+ } -+ -+ if (add_required) { -+ /* Check if there are lower priority jobs to soft stop */ -+ kbase_job_slot_ctx_priority_check_locked(kctx, katom); -+ -+ /* Add atom to ring buffer. */ -+ jsctx_tree_add(kctx, katom); -+ katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; -+ } + -+ dev_dbg(kctx->kbdev->dev, -+ "Enqueue of kctx %pK is %srequired to submit atom %pK\n", -+ kctx, enqueue_required ? "" : "not ", katom); ++#if !MALI_CUSTOMER_RELEASE ++ /* The GPU ID might have been replaced with the last ++ * known version of the same GPU. ++ */ ++ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; ++#endif ++ } else { ++ /* Software model */ ++ switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { ++ case GPU_ID2_PRODUCT_TMIX: ++ issues = base_hw_issues_model_tMIx; ++ break; ++ case GPU_ID2_PRODUCT_THEX: ++ issues = base_hw_issues_model_tHEx; ++ break; ++ case GPU_ID2_PRODUCT_TSIX: ++ issues = base_hw_issues_model_tSIx; ++ break; ++ case GPU_ID2_PRODUCT_TDVX: ++ issues = base_hw_issues_model_tDVx; ++ break; ++ case GPU_ID2_PRODUCT_TNOX: ++ issues = base_hw_issues_model_tNOx; ++ break; ++ case GPU_ID2_PRODUCT_TGOX: ++ issues = base_hw_issues_model_tGOx; ++ break; ++ case GPU_ID2_PRODUCT_TTRX: ++ issues = base_hw_issues_model_tTRx; ++ break; ++ case GPU_ID2_PRODUCT_TNAX: ++ issues = base_hw_issues_model_tNAx; ++ break; ++ case GPU_ID2_PRODUCT_LBEX: ++ case GPU_ID2_PRODUCT_TBEX: ++ issues = base_hw_issues_model_tBEx; ++ break; ++ case GPU_ID2_PRODUCT_TBAX: ++ issues = base_hw_issues_model_tBAx; ++ break; ++ case GPU_ID2_PRODUCT_TODX: ++ case GPU_ID2_PRODUCT_LODX: ++ issues = base_hw_issues_model_tODx; ++ break; ++ case GPU_ID2_PRODUCT_TGRX: ++ issues = base_hw_issues_model_tGRx; ++ break; ++ case GPU_ID2_PRODUCT_TVAX: ++ issues = base_hw_issues_model_tVAx; ++ break; ++ case GPU_ID2_PRODUCT_TTUX: ++ case GPU_ID2_PRODUCT_LTUX: ++ issues = base_hw_issues_model_tTUx; ++ break; ++ case GPU_ID2_PRODUCT_TTIX: ++ case GPU_ID2_PRODUCT_LTIX: ++ issues = base_hw_issues_model_tTIx; ++ break; ++ default: ++ dev_err(kbdev->dev, ++ "HW issues - Unknown GPU ID %x", gpu_id); ++ return -EINVAL; ++ } ++ } + -+ return enqueue_required; -+} ++ dev_info(kbdev->dev, ++ "GPU identified as 0x%x arch %d.%d.%d r%dp%d status %d", ++ (gpu_id & GPU_ID2_PRODUCT_MAJOR) >> ++ GPU_ID2_PRODUCT_MAJOR_SHIFT, ++ (gpu_id & GPU_ID2_ARCH_MAJOR) >> ++ GPU_ID2_ARCH_MAJOR_SHIFT, ++ (gpu_id & GPU_ID2_ARCH_MINOR) >> ++ GPU_ID2_ARCH_MINOR_SHIFT, ++ (gpu_id & GPU_ID2_ARCH_REV) >> ++ GPU_ID2_ARCH_REV_SHIFT, ++ (gpu_id & GPU_ID2_VERSION_MAJOR) >> ++ GPU_ID2_VERSION_MAJOR_SHIFT, ++ (gpu_id & GPU_ID2_VERSION_MINOR) >> ++ GPU_ID2_VERSION_MINOR_SHIFT, ++ (gpu_id & GPU_ID2_VERSION_STATUS) >> ++ GPU_ID2_VERSION_STATUS_SHIFT); + -+/** -+ * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the -+ * runnable_tree, ready for execution -+ * @katom: Atom to submit ++ for (; *issues != BASE_HW_ISSUE_END; issues++) ++ set_bit(*issues, &kbdev->hw_issues_mask[0]); ++ ++ return 0; ++} +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.h b/drivers/gpu/arm/bifrost/mali_kbase_hw.h +new file mode 100644 +index 000000000..ddcddaaa4 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.h +@@ -0,0 +1,71 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2012-2017, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set, -+ * but is still present in the x_dep list. If @katom has a same-slot dependent -+ * atom then that atom (and any dependents) will also be moved. + */ -+static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) -+{ -+ struct kbase_context *const kctx = katom->kctx; + -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++/** ++ * DOC: Run-time work-arounds helpers ++ */ + -+ while (katom) { -+ WARN_ON(!(katom->atom_flags & -+ KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); ++#ifndef _KBASE_HW_H_ ++#define _KBASE_HW_H_ + -+ if (!kbase_js_atom_blocked_on_x_dep(katom)) { -+ dev_dbg(kctx->kbdev->dev, -+ "Del atom %pK from X_DEP list in js_move_to_tree\n", -+ (void *)katom); ++#include "mali_kbase_defs.h" + -+ list_del(&katom->queue); -+ katom->atom_flags &= -+ ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; -+ /* For incremental rendering, an end-of-renderpass atom -+ * may have had its dependency on start-of-renderpass -+ * ignored and may therefore already be in the tree. -+ */ -+ if (!(katom->atom_flags & -+ KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { -+ jsctx_tree_add(kctx, katom); -+ katom->atom_flags |= -+ KBASE_KATOM_FLAG_JSCTX_IN_TREE; -+ } -+ } else { -+ dev_dbg(kctx->kbdev->dev, -+ "Atom %pK blocked on x-dep in js_move_to_tree\n", -+ (void *)katom); -+ break; -+ } ++/** ++ * kbase_hw_has_issue - Tell whether a work-around should be enabled ++ * @kbdev: Device pointer ++ * @issue: issue to be checked ++ */ ++#define kbase_hw_has_issue(kbdev, issue)\ ++ test_bit(issue, &(kbdev)->hw_issues_mask[0]) + -+ katom = katom->post_dep; -+ } -+} ++/** ++ * kbase_hw_has_feature - Tell whether a feature is supported ++ * @kbdev: Device pointer ++ * @feature: feature to be checked ++ */ ++#define kbase_hw_has_feature(kbdev, feature)\ ++ test_bit(feature, &(kbdev)->hw_features_mask[0]) + ++/** ++ * kbase_hw_set_issues_mask - Set the hardware issues mask based on the GPU ID ++ * @kbdev: Device pointer ++ * ++ * Return: 0 if the GPU ID was recognized, otherwise -EINVAL. ++ * ++ * The GPU ID is read from the @kbdev. ++ * ++ * In debugging versions of the driver, unknown versions of a known GPU with a ++ * new-format ID will be treated as the most recent known version not later ++ * than the actual version. In such circumstances, the GPU ID in @kbdev will ++ * also be replaced with the most recent known version. ++ * ++ * Note: The GPU configuration must have been read by ++ * kbase_gpuprops_get_props() before calling this function. ++ */ ++int kbase_hw_set_issues_mask(struct kbase_device *kbdev); + +/** -+ * kbase_js_evict_deps - Evict dependencies of a failed atom. -+ * @kctx: Context pointer -+ * @katom: Pointer to the atom that has failed. -+ * @js: The job slot the katom was run on. -+ * @prio: Priority of the katom. ++ * kbase_hw_set_features_mask - Set the features mask depending on the GPU ID ++ * @kbdev: Device pointer ++ */ ++void kbase_hw_set_features_mask(struct kbase_device *kbdev); ++ ++#endif /* _KBASE_HW_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_backend.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_backend.h +new file mode 100644 +index 000000000..0da4eb258 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_backend.h +@@ -0,0 +1,43 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * Remove all post dependencies of an atom from the context ringbuffers. ++ * (C) COPYRIGHT 2014-2015, 2019-2021 ARM Limited. All rights reserved. + * -+ * The original atom's event_code will be propogated to all dependent atoms. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Context: Caller must hold the HW access lock + */ -+static void kbase_js_evict_deps(struct kbase_context *kctx, struct kbase_jd_atom *katom, -+ unsigned int js, int prio) -+{ -+ struct kbase_jd_atom *x_dep = katom->x_post_dep; -+ struct kbase_jd_atom *next_katom = katom->post_dep; + -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++/* ++ * HW access backend common APIs ++ */ + -+ if (next_katom) { -+ KBASE_DEBUG_ASSERT(next_katom->status != -+ KBASE_JD_ATOM_STATE_HW_COMPLETED); -+ next_katom->will_fail_event_code = katom->event_code; ++#ifndef _KBASE_HWACCESS_BACKEND_H_ ++#define _KBASE_HWACCESS_BACKEND_H_ + -+ } ++/** ++ * kbase_backend_devfreq_init - Perform backend devfreq related initialization. ++ * @kbdev: Device pointer ++ * ++ * Return: 0 on success, or an error code on failure. ++ */ ++int kbase_backend_devfreq_init(struct kbase_device *kbdev); + -+ /* Has cross slot depenency. */ -+ if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE | -+ KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { -+ /* Remove dependency.*/ -+ x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; ++/** ++ * kbase_backend_devfreq_term - Perform backend-devfreq termination. ++ * @kbdev: Device pointer ++ */ ++void kbase_backend_devfreq_term(struct kbase_device *kbdev); + -+ dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %pK\n", -+ (void *)x_dep); ++#endif /* _KBASE_HWACCESS_BACKEND_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_defs.h +new file mode 100644 +index 000000000..62a6ec51b +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_defs.h +@@ -0,0 +1,50 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2014, 2016-2018, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ /* Fail if it had a data dependency. */ -+ if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) -+ x_dep->will_fail_event_code = katom->event_code; ++/** ++ * DOC: HW access common definitions ++ */ + -+ if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) -+ kbase_js_move_to_tree(x_dep); -+ } -+} ++#ifndef _KBASE_HWACCESS_DEFS_H_ ++#define _KBASE_HWACCESS_DEFS_H_ + -+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js) -+{ -+ struct kbase_jd_atom *katom; -+ struct kbasep_js_device_data *js_devdata; -+ struct kbase_device *kbdev; -+ int pulled; ++#include + -+ KBASE_DEBUG_ASSERT(kctx); ++/** ++ * struct kbase_hwaccess_data - object encapsulating the GPU backend specific ++ * data for the HW access layer. ++ * hwaccess_lock (a spinlock) must be held when ++ * accessing this structure. ++ * @active_kctx: pointer to active kbase context which last submitted an ++ * atom to GPU and while the context is active it can ++ * submit new atoms to GPU from the irq context also, without ++ * going through the bottom half of job completion path. ++ * @backend: GPU backend specific data for HW access layer ++ */ ++struct kbase_hwaccess_data { ++#if !MALI_USE_CSF ++ struct kbase_context *active_kctx[BASE_JM_MAX_NR_SLOTS]; ++#endif + -+ kbdev = kctx->kbdev; -+ dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %pK (s:%u)\n", (void *)kctx, js); ++ struct kbase_backend_data backend; ++}; + -+ js_devdata = &kbdev->js_data; -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++#endif /* _KBASE_HWACCESS_DEFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h +new file mode 100644 +index 000000000..f537b7f0a +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h +@@ -0,0 +1,88 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2014-2015, 2017-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { -+ dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n", -+ (void *)kctx); -+ return NULL; -+ } -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) -+#else -+ if (kbase_pm_is_suspending(kbdev)) -+#endif -+ return NULL; ++/** ++ * DOC: Base kernel property query backend APIs ++ */ + -+ katom = jsctx_rb_peek(kctx, js); -+ if (!katom) { -+ dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%u)\n", (void *)kctx, js); -+ return NULL; -+ } -+ if (kbase_jsctx_slot_prio_is_blocked(kctx, js, katom->sched_priority)) { -+ dev_dbg(kbdev->dev, -+ "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%u)\n", -+ (void *)kctx, katom->sched_priority, js); -+ return NULL; -+ } -+ if (atomic_read(&katom->blocked)) { -+ dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_pull\n", -+ (void *)katom); -+ return NULL; -+ } ++#ifndef _KBASE_HWACCESS_GPUPROPS_H_ ++#define _KBASE_HWACCESS_GPUPROPS_H_ + -+ /* Due to ordering restrictions when unpulling atoms on failure, we do -+ * not allow multiple runs of fail-dep atoms from the same context to be -+ * present on the same slot -+ */ -+ if (katom->pre_dep && kbase_jsctx_slot_atoms_pulled(kctx, js)) { -+ struct kbase_jd_atom *prev_atom = -+ kbase_backend_inspect_tail(kbdev, js); ++/** ++ * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from ++ * GPU ++ * @kbdev: Device pointer ++ * @regdump: Pointer to struct kbase_gpuprops_regdump structure ++ * ++ * The caller should ensure that GPU remains powered-on during this function. ++ * ++ * Return: Zero for succeess or a Linux error code ++ */ ++int kbase_backend_gpuprops_get(struct kbase_device *kbdev, ++ struct kbase_gpuprops_regdump *regdump); + -+ if (prev_atom && prev_atom->kctx != kctx) -+ return NULL; -+ } ++/** ++ * kbase_backend_gpuprops_get_curr_config() - Fill @curr_config_regdump with ++ * relevant GPU properties read from ++ * the GPU registers. ++ * @kbdev: Device pointer. ++ * @curr_config_regdump: Pointer to struct kbase_current_config_regdump ++ * structure. ++ * ++ * The caller should ensure that GPU remains powered-on during this function and ++ * the caller must ensure this function returns success before using the values ++ * returned in the curr_config_regdump in any part of the kernel. ++ * ++ * Return: Zero for succeess or a Linux error code ++ */ ++int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev, ++ struct kbase_current_config_regdump *curr_config_regdump); + -+ if (kbase_js_atom_blocked_on_x_dep(katom)) { -+ if (katom->x_pre_dep->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || -+ katom->x_pre_dep->will_fail_event_code) { -+ dev_dbg(kbdev->dev, -+ "JS: X pre-dep %pK is not present in slot FIFO or will fail\n", -+ (void *)katom->x_pre_dep); -+ return NULL; -+ } -+ if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && -+ kbase_backend_nr_atoms_on_slot(kbdev, js)) { -+ dev_dbg(kbdev->dev, -+ "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%u)\n", -+ (void *)katom, js); -+ return NULL; -+ } -+ } ++/** ++ * kbase_backend_gpuprops_get_features - Fill @regdump with GPU properties read ++ * from GPU ++ * @kbdev: Device pointer ++ * @regdump: Pointer to struct kbase_gpuprops_regdump structure ++ * ++ * This function reads GPU properties that are dependent on the hardware ++ * features bitmask. It will power-on the GPU if required. ++ * ++ * Return: Zero for succeess or a Linux error code ++ */ ++int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, ++ struct kbase_gpuprops_regdump *regdump); + -+ KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JS_PULL_JOB, kctx, katom, -+ katom->jc, js, katom->sched_priority); -+ kbase_ctx_flag_set(kctx, KCTX_PULLED); -+ kbase_ctx_flag_set(kctx, (KCTX_PULLED_SINCE_ACTIVE_JS0 << js)); ++/** ++ * kbase_backend_gpuprops_get_l2_features - Fill @regdump with L2_FEATURES read ++ * from GPU ++ * @kbdev: Device pointer ++ * @regdump: Pointer to struct kbase_gpuprops_regdump structure ++ * ++ * This function reads L2_FEATURES register that is dependent on the hardware ++ * features bitmask. It will power-on the GPU if required. ++ * ++ * Return: Zero on success, Linux error code on failure ++ */ ++int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, ++ struct kbase_gpuprops_regdump *regdump); + -+ pulled = kbase_jsctx_slot_atom_pulled_inc(kctx, katom); -+ if (pulled == 1 && !kctx->slots_pullable) { -+ WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); -+ kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); -+ atomic_inc(&kbdev->js_data.nr_contexts_runnable); -+ } -+ jsctx_rb_pull(kctx, katom); + -+ kbase_ctx_sched_retain_ctx_refcount(kctx); ++#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h +new file mode 100644 +index 000000000..37663101a +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h +@@ -0,0 +1,170 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2014-2015, 2017-2018, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ katom->ticks = 0; ++/* ++ * HW Access instrumentation common APIs ++ */ + -+ dev_dbg(kbdev->dev, "JS: successfully pulled atom %pK from kctx %pK (s:%u)\n", -+ (void *)katom, (void *)kctx, js); ++#ifndef _KBASE_HWACCESS_INSTR_H_ ++#define _KBASE_HWACCESS_INSTR_H_ + -+ return katom; -+} ++#include + +/** -+ * js_return_of_start_rp() - Handle soft-stop of an atom that starts a -+ * renderpass -+ * @start_katom: Pointer to the start-of-renderpass atom that was soft-stopped ++ * struct kbase_instr_hwcnt_enable - Enable hardware counter collection. ++ * @dump_buffer: GPU address to write counters to. ++ * @dump_buffer_bytes: Size in bytes of the buffer pointed to by dump_buffer. ++ * @fe_bm: counters selection bitmask (Front End). ++ * @shader_bm: counters selection bitmask (Shader). ++ * @tiler_bm: counters selection bitmask (Tiler). ++ * @mmu_l2_bm: counters selection bitmask (MMU_L2). ++ * @counter_set: the performance counter set to use. ++ */ ++struct kbase_instr_hwcnt_enable { ++ u64 dump_buffer; ++ u64 dump_buffer_bytes; ++ u32 fe_bm; ++ u32 shader_bm; ++ u32 tiler_bm; ++ u32 mmu_l2_bm; ++ u8 counter_set; ++}; ++ ++/** ++ * kbase_instr_hwcnt_enable_internal() - Enable HW counters collection ++ * @kbdev: Kbase device ++ * @kctx: Kbase context ++ * @enable: HW counter setup parameters + * -+ * This function is called to switch to incremental rendering if the tiler job -+ * chain at the start of a renderpass has used too much memory. It prevents the -+ * tiler job being pulled for execution in the job scheduler again until the -+ * next phase of incremental rendering is complete. ++ * Context: might sleep, waiting for reset to complete + * -+ * If the end-of-renderpass atom is already in the job scheduler (because a -+ * previous attempt at tiling used too much memory during the same renderpass) -+ * then it is unblocked; otherwise, it is run by handing it to the scheduler. ++ * Return: 0 on success + */ -+static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) -+{ -+ struct kbase_context *const kctx = start_katom->kctx; -+ struct kbase_device *const kbdev = kctx->kbdev; -+ struct kbase_jd_renderpass *rp; -+ struct kbase_jd_atom *end_katom; -+ unsigned long flags; ++int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ struct kbase_instr_hwcnt_enable *enable); + -+ lockdep_assert_held(&kctx->jctx.lock); ++/** ++ * kbase_instr_hwcnt_disable_internal() - Disable HW counters collection ++ * @kctx: Kbase context ++ * ++ * Context: might sleep, waiting for an ongoing dump to complete ++ * ++ * Return: 0 on success ++ */ ++int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx); + -+ if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) -+ return; ++/** ++ * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU ++ * @kctx: Kbase context ++ * ++ * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true, ++ * of call kbase_instr_hwcnt_wait_for_dump(). ++ * ++ * Return: 0 on success ++ */ ++int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx); + -+ compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= -+ ARRAY_SIZE(kctx->jctx.renderpasses), -+ "Should check invalid access to renderpasses"); ++/** ++ * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has ++ * completed. ++ * @kctx: Kbase context ++ * ++ * Context: will sleep, waiting for dump to complete ++ * ++ * Return: 0 on success ++ */ ++int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx); + -+ rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; ++/** ++ * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has ++ * completed ++ * @kctx: Kbase context ++ * @success: Set to true if successful ++ * ++ * Context: does not sleep. ++ * ++ * Return: true if the dump is complete ++ */ ++bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, ++ bool * const success); + -+ if (WARN_ON(rp->start_katom != start_katom)) -+ return; ++/** ++ * kbase_instr_hwcnt_clear() - Clear HW counters ++ * @kctx: Kbase context ++ * ++ * Context: might sleep, waiting for reset to complete ++ * ++ * Return: 0 on success ++ */ ++int kbase_instr_hwcnt_clear(struct kbase_context *kctx); + -+ dev_dbg(kctx->kbdev->dev, -+ "JS return start atom %pK in state %d of RP %d\n", -+ (void *)start_katom, (int)rp->state, -+ start_katom->renderpass_id); ++/** ++ * kbase_instr_backend_init() - Initialise the instrumentation backend ++ * @kbdev: Kbase device ++ * ++ * This function should be called during driver initialization. ++ * ++ * Return: 0 on success ++ */ ++int kbase_instr_backend_init(struct kbase_device *kbdev); + -+ if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) -+ return; ++/** ++ * kbase_instr_backend_term() - Terminate the instrumentation backend ++ * @kbdev: Kbase device ++ * ++ * This function should be called during driver termination. ++ */ ++void kbase_instr_backend_term(struct kbase_device *kbdev); + -+ /* The tiler job might have been soft-stopped for some reason other -+ * than running out of memory. -+ */ -+ if (rp->state == KBASE_JD_RP_START || rp->state == KBASE_JD_RP_RETRY) { -+ dev_dbg(kctx->kbdev->dev, -+ "JS return isn't OOM in state %d of RP %d\n", -+ (int)rp->state, start_katom->renderpass_id); -+ return; -+ } ++#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS ++/** ++ * kbase_instr_backend_debugfs_init() - Add a debugfs entry for the ++ * hardware counter set. ++ * @kbdev: kbase device ++ */ ++void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev); ++#endif + -+ dev_dbg(kctx->kbdev->dev, -+ "JS return confirm OOM in state %d of RP %d\n", -+ (int)rp->state, start_katom->renderpass_id); ++/** ++ * kbase_instr_hwcnt_on_unrecoverable_error() - JM HWC instr backend function ++ * called when unrecoverable errors ++ * are detected. ++ * @kbdev: Kbase device ++ * ++ * This should be called on encountering errors that can only be recovered from ++ * with reset, or that may put HWC logic in state that could result in hang. For ++ * example, when HW becomes unresponsive. ++ * ++ * Caller requires kbdev->hwaccess_lock held. ++ */ ++void kbase_instr_hwcnt_on_unrecoverable_error(struct kbase_device *kbdev); + -+ if (WARN_ON(rp->state != KBASE_JD_RP_PEND_OOM && -+ rp->state != KBASE_JD_RP_RETRY_PEND_OOM)) -+ return; ++/** ++ * kbase_instr_hwcnt_on_before_reset() - JM HWC instr backend function to be ++ * called immediately before a reset. ++ * Takes us out of the unrecoverable ++ * error state, if we were in it. ++ * @kbdev: Kbase device ++ */ ++void kbase_instr_hwcnt_on_before_reset(struct kbase_device *kbdev); + -+ /* Prevent the tiler job being pulled for execution in the -+ * job scheduler again. -+ */ -+ dev_dbg(kbdev->dev, "Blocking start atom %pK\n", -+ (void *)start_katom); -+ atomic_inc(&start_katom->blocked); ++#endif /* _KBASE_HWACCESS_INSTR_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h +new file mode 100644 +index 000000000..ca77c192d +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h +@@ -0,0 +1,318 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++/* ++ * HW access job manager common APIs ++ */ + -+ rp->state = (rp->state == KBASE_JD_RP_PEND_OOM) ? -+ KBASE_JD_RP_OOM : KBASE_JD_RP_RETRY_OOM; ++#ifndef _KBASE_HWACCESS_JM_H_ ++#define _KBASE_HWACCESS_JM_H_ + -+ /* Was the fragment job chain submitted to kbase yet? */ -+ end_katom = rp->end_katom; -+ if (end_katom) { -+ dev_dbg(kctx->kbdev->dev, "JS return add end atom %pK\n", -+ (void *)end_katom); ++/** ++ * kbase_backend_run_atom() - Run an atom on the GPU ++ * @kbdev: Device pointer ++ * @katom: Atom to run ++ * ++ * Caller must hold the HW access lock ++ */ ++void kbase_backend_run_atom(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom); + -+ if (rp->state == KBASE_JD_RP_RETRY_OOM) { -+ /* Allow the end of the renderpass to be pulled for -+ * execution again to continue incremental rendering. -+ */ -+ dev_dbg(kbdev->dev, "Unblocking end atom %pK\n", -+ (void *)end_katom); -+ atomic_dec(&end_katom->blocked); -+ WARN_ON(!(end_katom->atom_flags & -+ KBASE_KATOM_FLAG_JSCTX_IN_TREE)); -+ WARN_ON(end_katom->status != KBASE_JD_ATOM_STATE_IN_JS); ++/** ++ * kbase_backend_slot_update - Update state based on slot ringbuffers ++ * ++ * @kbdev: Device pointer ++ * ++ * Inspect the jobs in the slot ringbuffers and update state. ++ * ++ * This will cause jobs to be submitted to hardware if they are unblocked ++ */ ++void kbase_backend_slot_update(struct kbase_device *kbdev); + -+ kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, -+ end_katom->slot_nr); ++/** ++ * kbase_backend_find_and_release_free_address_space() - Release a free AS ++ * @kbdev: Device pointer ++ * @kctx: Context pointer ++ * ++ * This function can evict an idle context from the runpool, freeing up the ++ * address space it was using. ++ * ++ * The address space is marked as in use. The caller must either assign a ++ * context using kbase_gpu_use_ctx(), or release it using ++ * kbase_ctx_sched_release() ++ * ++ * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none ++ * available ++ */ ++int kbase_backend_find_and_release_free_address_space( ++ struct kbase_device *kbdev, struct kbase_context *kctx); + -+ /* Expect the fragment job chain to be scheduled without -+ * further action because this function is called when -+ * returning an atom to the job scheduler ringbuffer. -+ */ -+ end_katom = NULL; -+ } else { -+ WARN_ON(end_katom->status != -+ KBASE_JD_ATOM_STATE_QUEUED && -+ end_katom->status != KBASE_JD_ATOM_STATE_IN_JS); -+ } -+ } ++/** ++ * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the ++ * provided address space. ++ * @kbdev: Device pointer ++ * @kctx: Context pointer. May be NULL ++ * @as_nr: Free address space to use ++ * ++ * kbase_gpu_next_job() will pull atoms from the active context. ++ * ++ * Return: true if successful, false if ASID not assigned. ++ */ ++bool kbase_backend_use_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ int as_nr); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++/** ++ * kbase_backend_use_ctx_sched() - Activate a context. ++ * @kbdev: Device pointer ++ * @kctx: Context pointer ++ * @js: Job slot to activate context on ++ * ++ * kbase_gpu_next_job() will pull atoms from the active context. ++ * ++ * The context must already be scheduled and assigned to an address space. If ++ * the context is not scheduled, then kbase_gpu_use_ctx() should be used ++ * instead. ++ * ++ * Caller must hold hwaccess_lock ++ * ++ * Return: true if context is now active, false otherwise (ie if context does ++ * not have an address space assigned) ++ */ ++bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_context *kctx, ++ unsigned int js); + -+ if (end_katom) -+ kbase_jd_dep_clear_locked(end_katom); -+} ++/** ++ * kbase_backend_release_ctx_irq - Release a context from the GPU. This will ++ * de-assign the assigned address space. ++ * @kbdev: Device pointer ++ * @kctx: Context pointer ++ * ++ * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock ++ */ ++void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, ++ struct kbase_context *kctx); + +/** -+ * js_return_of_end_rp() - Handle completion of an atom that ends a renderpass -+ * @end_katom: Pointer to the end-of-renderpass atom that was completed ++ * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will ++ * de-assign the assigned address space. ++ * @kbdev: Device pointer ++ * @kctx: Context pointer + * -+ * This function is called to continue incremental rendering if the tiler job -+ * chain at the start of a renderpass used too much memory. It resets the -+ * mechanism for detecting excessive memory usage then allows the soft-stopped -+ * tiler job chain to be pulled for execution again. ++ * Caller must hold kbase_device->mmu_hw_mutex + * -+ * The start-of-renderpass atom must already been submitted to kbase. ++ * This function must perform any operations that could not be performed in IRQ ++ * context by kbase_backend_release_ctx_irq(). + */ -+static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) -+{ -+ struct kbase_context *const kctx = end_katom->kctx; -+ struct kbase_device *const kbdev = kctx->kbdev; -+ struct kbase_jd_renderpass *rp; -+ struct kbase_jd_atom *start_katom; -+ unsigned long flags; ++void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, ++ struct kbase_context *kctx); + -+ lockdep_assert_held(&kctx->jctx.lock); ++/** ++ * kbase_backend_cache_clean - Perform a cache clean if the given atom requires ++ * one ++ * @kbdev: Device pointer ++ * @katom: Pointer to the failed atom ++ * ++ * On some GPUs, the GPU cache must be cleaned following a failed atom. This ++ * function performs a clean if it is required by @katom. ++ */ ++void kbase_backend_cache_clean(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom); + -+ if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) -+ return; + -+ compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= -+ ARRAY_SIZE(kctx->jctx.renderpasses), -+ "Should check invalid access to renderpasses"); ++/** ++ * kbase_backend_complete_wq() - Perform backend-specific actions required on ++ * completing an atom. ++ * @kbdev: Device pointer ++ * @katom: Pointer to the atom to complete ++ * ++ * This function should only be called from kbase_jd_done_worker() or ++ * js_return_worker(). ++ * ++ * Return: true if atom has completed, false if atom should be re-submitted ++ */ ++void kbase_backend_complete_wq(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom); + -+ rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; ++#if !MALI_USE_CSF ++/** ++ * kbase_backend_complete_wq_post_sched - Perform backend-specific actions ++ * required on completing an atom, after ++ * any scheduling has taken place. ++ * @kbdev: Device pointer ++ * @core_req: Core requirements of atom ++ * ++ * This function should only be called from kbase_jd_done_worker() or ++ * js_return_worker(). ++ */ ++void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, ++ base_jd_core_req core_req); ++#endif /* !MALI_USE_CSF */ + -+ if (WARN_ON(rp->end_katom != end_katom)) -+ return; ++/** ++ * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU ++ * and remove any others from the ringbuffers. ++ * @kbdev: Device pointer ++ * @end_timestamp: Timestamp of reset ++ */ ++void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp); + -+ dev_dbg(kctx->kbdev->dev, -+ "JS return end atom %pK in state %d of RP %d\n", -+ (void *)end_katom, (int)rp->state, end_katom->renderpass_id); ++/** ++ * kbase_backend_inspect_tail - Return the atom currently at the tail of slot ++ * @js ++ * @kbdev: Device pointer ++ * @js: Job slot to inspect ++ * ++ * Return: Atom currently at the head of slot @js, or NULL ++ */ ++struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, unsigned int js); + -+ if (WARN_ON(rp->state != KBASE_JD_RP_OOM && -+ rp->state != KBASE_JD_RP_RETRY_OOM)) -+ return; ++/** ++ * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a ++ * slot. ++ * @kbdev: Device pointer ++ * @js: Job slot to inspect ++ * ++ * Return: Number of atoms currently on slot ++ */ ++int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js); + -+ /* Reduce the number of mapped pages in the memory regions that -+ * triggered out-of-memory last time so that we can detect excessive -+ * memory usage again. -+ */ -+ kbase_gpu_vm_lock(kctx); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++/** ++ * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot ++ * that are currently on the GPU. ++ * @kbdev: Device pointer ++ * @js: Job slot to inspect ++ * ++ * Return: Number of atoms currently on slot @js that are currently on the GPU. ++ */ ++int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js); + -+ while (!list_empty(&rp->oom_reg_list)) { -+ struct kbase_va_region *reg = -+ list_first_entry(&rp->oom_reg_list, -+ struct kbase_va_region, link); ++/** ++ * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs ++ * has changed. ++ * @kbdev: Device pointer ++ * ++ * Perform any required backend-specific actions (eg starting/stopping ++ * scheduling timers). ++ */ ++void kbase_backend_ctx_count_changed(struct kbase_device *kbdev); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++/** ++ * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed. ++ * @kbdev: Device pointer ++ * ++ * Perform any required backend-specific actions (eg updating timeouts of ++ * currently running atoms). ++ */ ++void kbase_backend_timeouts_changed(struct kbase_device *kbdev); + -+ dev_dbg(kbdev->dev, -+ "Reset backing to %zu pages for region %pK\n", -+ reg->threshold_pages, (void *)reg); ++/** ++ * kbase_backend_slot_free() - Return the number of jobs that can be currently ++ * submitted to slot @js. ++ * @kbdev: Device pointer ++ * @js: Job slot to inspect ++ * ++ * Return: Number of jobs that can be submitted. ++ */ ++int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js); + -+ if (!WARN_ON(reg->flags & KBASE_REG_VA_FREED)) -+ kbase_mem_shrink(kctx, reg, reg->threshold_pages); ++/** ++ * kbase_job_check_leave_disjoint - potentially leave disjoint state ++ * @kbdev: kbase device ++ * @target_katom: atom which is finishing ++ * ++ * Work out whether to leave disjoint state when finishing an atom that was ++ * originated by kbase_job_check_enter_disjoint(). ++ */ ++void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, ++ struct kbase_jd_atom *target_katom); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ dev_dbg(kbdev->dev, "Deleting region %pK from list\n", -+ (void *)reg); -+ list_del_init(®->link); -+ kbase_va_region_alloc_put(kctx, reg); -+ } ++/** ++ * kbase_backend_jm_kill_running_jobs_from_kctx - Kill all jobs that are ++ * currently running on GPU from a context ++ * @kctx: Context pointer ++ * ++ * This is used in response to a page fault to remove all jobs from the faulting ++ * context from the hardware. ++ * ++ * Caller must hold hwaccess_lock. ++ */ ++void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ kbase_gpu_vm_unlock(kctx); ++/** ++ * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and ++ * to be descheduled. ++ * @kctx: Context pointer ++ * ++ * This should be called following kbase_js_zap_context(), to ensure the context ++ * can be safely destroyed. ++ */ ++void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ rp->state = KBASE_JD_RP_RETRY; -+ dev_dbg(kbdev->dev, "Changed state to %d for retry\n", rp->state); ++/** ++ * kbase_backend_get_current_flush_id - Return the current flush ID ++ * ++ * @kbdev: Device pointer ++ * ++ * Return: the current flush ID to be recorded for each job chain ++ */ ++u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev); + -+ /* Allow the start of the renderpass to be pulled for execution again -+ * to begin/continue incremental rendering. -+ */ -+ start_katom = rp->start_katom; -+ if (!WARN_ON(!start_katom)) { -+ dev_dbg(kbdev->dev, "Unblocking start atom %pK\n", -+ (void *)start_katom); -+ atomic_dec(&start_katom->blocked); -+ (void)kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, -+ start_katom->slot_nr); -+ } ++/** ++ * kbase_job_slot_hardstop - Hard-stop the specified job slot ++ * @kctx: The kbase context that contains the job(s) that should ++ * be hard-stopped ++ * @js: The job slot to hard-stop ++ * @target_katom: The job that should be hard-stopped (or NULL for all ++ * jobs from the context) ++ * Context: ++ * The job slot lock must be held when calling this function. ++ */ ++void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js, ++ struct kbase_jd_atom *target_katom); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} ++/** ++ * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms ++ * currently on the GPU ++ * @kbdev: Device pointer ++ * ++ * Return: true if there are any atoms on the GPU, false otherwise ++ */ ++bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev); + -+static void js_return_worker(struct work_struct *data) -+{ -+ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, -+ work); -+ struct kbase_context *kctx = katom->kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; -+ struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; -+ struct kbasep_js_atom_retained_state retained_state; -+ int js = katom->slot_nr; -+ bool slot_became_unblocked; -+ bool timer_sync = false; -+ bool context_idle = false; -+ unsigned long flags; -+ base_jd_core_req core_req = katom->core_req; -+ u64 cache_jc = katom->jc; ++/** ++ * kbase_backend_slot_kctx_purge_locked - Perform a purge on the slot_rb tracked ++ * kctx ++ * ++ * @kbdev: Device pointer ++ * @kctx: The kbase context that needs to be purged from slot_rb[] ++ * ++ * For JM GPUs, the L1 read only caches may need a start_flush invalidation, ++ * potentially on all slots (even if the kctx was only using a single slot), ++ * following a context termination or address-space ID recycle. This function ++ * performs a clean-up purge on the given kctx which if it has been tracked by ++ * slot_rb[] objects. ++ * ++ * Caller must hold kbase_device->hwaccess_lock. ++ */ ++void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx); + -+ dev_dbg(kbdev->dev, "%s for atom %pK with event code 0x%x\n", -+ __func__, (void *)katom, katom->event_code); ++#endif /* _KBASE_HWACCESS_JM_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h +new file mode 100644 +index 000000000..effb2ffeb +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h +@@ -0,0 +1,229 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ KBASE_KTRACE_ADD_JM(kbdev, JS_RETURN_WORKER, kctx, katom, katom->jc, 0); ++/** ++ * DOC: HW access power manager common APIs ++ */ + -+ if (katom->event_code != BASE_JD_EVENT_END_RP_DONE) -+ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(kbdev, katom); ++#ifndef _KBASE_HWACCESS_PM_H_ ++#define _KBASE_HWACCESS_PM_H_ + -+ kbase_backend_complete_wq(kbdev, katom); ++#include ++#include + -+ kbasep_js_atom_retained_state_copy(&retained_state, katom); ++#include + -+ mutex_lock(&js_devdata->queue_mutex); -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++/* Forward definition - see mali_kbase.h */ ++struct kbase_device; + -+ if (katom->event_code != BASE_JD_EVENT_END_RP_DONE) -+ atomic_dec(&katom->blocked); ++/* Functions common to all HW access backends */ + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++/** ++ * kbase_hwaccess_pm_init - Initialize the power management framework. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Must be called before any other power management function ++ * ++ * Return: 0 if the power management framework was successfully initialized. ++ */ ++int kbase_hwaccess_pm_init(struct kbase_device *kbdev); + -+ slot_became_unblocked = kbase_jsctx_slot_atom_pulled_dec(kctx, katom); ++/** ++ * kbase_hwaccess_pm_term - Terminate the power management framework. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * No power management functions may be called after this ++ */ ++void kbase_hwaccess_pm_term(struct kbase_device *kbdev); + -+ if (!kbase_jsctx_slot_atoms_pulled(kctx, js) && -+ jsctx_rb_none_to_pull(kctx, js)) -+ timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js); ++/** ++ * kbase_hwaccess_pm_powerup - Power up the GPU. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @flags: Flags to pass on to kbase_pm_init_hw ++ * ++ * Power up GPU after all modules have been initialized and interrupt handlers ++ * installed. ++ * ++ * Return: 0 if powerup was successful. ++ */ ++int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, ++ unsigned int flags); + -+ /* If the context is now unblocked on this slot after soft-stopped -+ * atoms, then only mark it as pullable on this slot if it is not -+ * idle -+ */ -+ if (slot_became_unblocked && kbase_jsctx_atoms_pulled(kctx) && -+ kbase_js_ctx_pullable(kctx, js, true)) -+ timer_sync |= -+ kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, js); ++/** ++ * kbase_hwaccess_pm_halt - Halt the power management framework. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Should ensure that no new interrupts are generated, but allow any currently ++ * running interrupt handlers to complete successfully. The GPU is forced off by ++ * the time this function returns, regardless of whether or not the active power ++ * policy asks for the GPU to be powered off. ++ */ ++void kbase_hwaccess_pm_halt(struct kbase_device *kbdev); + -+ if (!kbase_jsctx_atoms_pulled(kctx)) { -+ dev_dbg(kbdev->dev, -+ "No atoms currently pulled from context %pK\n", -+ (void *)kctx); ++/** ++ * kbase_hwaccess_pm_suspend - Perform any backend-specific actions to suspend the GPU ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Return: 0 if suspend was successful. ++ */ ++int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev); + -+ if (!kctx->slots_pullable) { -+ dev_dbg(kbdev->dev, -+ "Context %pK %s counted as runnable\n", -+ (void *)kctx, -+ kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF) ? -+ "is" : "isn't"); ++/** ++ * kbase_hwaccess_pm_resume - Perform any backend-specific actions to resume the GPU ++ * from a suspend ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ */ ++void kbase_hwaccess_pm_resume(struct kbase_device *kbdev); + -+ WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); -+ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); -+ atomic_dec(&kbdev->js_data.nr_contexts_runnable); -+ timer_sync = true; -+ } ++/** ++ * kbase_hwaccess_pm_gpu_active - Perform any required actions for activating the GPU. ++ * Called when the first context goes active. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ */ ++void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev); + -+ if (kctx->as_nr != KBASEP_AS_NR_INVALID && -+ !kbase_ctx_flag(kctx, KCTX_DYING)) { -+ int num_slots = kbdev->gpu_props.num_job_slots; -+ int slot; ++/** ++ * kbase_hwaccess_pm_gpu_idle - Perform any required actions for idling the GPU. ++ * Called when the last context goes idle. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ */ ++void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev); + -+ if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) -+ kbasep_js_set_submit_allowed(js_devdata, kctx); ++#if MALI_USE_CSF ++/** ++ * kbase_pm_set_debug_core_mask - Set the debug core mask. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @new_core_mask: The core mask to use ++ * ++ * This determines which cores the power manager is allowed to use. ++ */ ++void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, ++ u64 new_core_mask); ++#else ++/** ++ * kbase_pm_set_debug_core_mask - Set the debug core mask. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @new_core_mask_js0: The core mask to use for job slot 0 ++ * @new_core_mask_js1: The core mask to use for job slot 1 ++ * @new_core_mask_js2: The core mask to use for job slot 2 ++ * ++ * This determines which cores the power manager is allowed to use. ++ */ ++void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, ++ u64 new_core_mask_js0, u64 new_core_mask_js1, ++ u64 new_core_mask_js2); ++#endif /* MALI_USE_CSF */ + -+ for (slot = 0; slot < num_slots; slot++) { -+ if (kbase_js_ctx_pullable(kctx, slot, true)) -+ timer_sync |= -+ kbase_js_ctx_list_add_pullable_nolock( -+ kbdev, kctx, slot); -+ } -+ } ++/** ++ * kbase_pm_ca_get_policy - Get the current policy. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Returns the policy that is currently active. ++ * ++ * Return: The current policy ++ */ ++const struct kbase_pm_ca_policy ++*kbase_pm_ca_get_policy(struct kbase_device *kbdev); + -+ kbase_jm_idle_ctx(kbdev, kctx); ++/** ++ * kbase_pm_ca_set_policy - Change the policy to the one specified. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @policy: The policy to change to (valid pointer returned from ++ * @ref kbase_pm_ca_list_policies) ++ */ ++void kbase_pm_ca_set_policy(struct kbase_device *kbdev, ++ const struct kbase_pm_ca_policy *policy); + -+ context_idle = true; -+ } ++/** ++ * kbase_pm_ca_list_policies - Retrieve a static list of the available policies. ++ * ++ * @policies: An array pointer to take the list of policies. This may be NULL. ++ * The contents of this array must not be modified. ++ * ++ * Return: The number of policies ++ */ ++int ++kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++/** ++ * kbase_pm_get_policy - Get the current policy. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Returns the policy that is currently active. ++ * ++ * Return: The current policy ++ */ ++const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev); + -+ if (context_idle) { -+ dev_dbg(kbdev->dev, -+ "Context %pK %s counted as active\n", -+ (void *)kctx, -+ kbase_ctx_flag(kctx, KCTX_ACTIVE) ? -+ "is" : "isn't"); -+ WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); -+ kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); -+ kbase_pm_context_idle(kbdev); -+ } ++/** ++ * kbase_pm_set_policy - Change the policy to the one specified. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid ++ * pointer) ++ * @policy: The policy to change to (valid pointer returned from ++ * @ref kbase_pm_list_policies) ++ */ ++void kbase_pm_set_policy(struct kbase_device *kbdev, ++ const struct kbase_pm_policy *policy); + -+ if (timer_sync) -+ kbase_js_sync_timers(kbdev); ++/** ++ * kbase_pm_list_policies - Retrieve a static list of the available policies. ++ * ++ * @kbdev: The kbase device structure for the device. ++ * @list: An array pointer to take the list of policies. This may be NULL. ++ * The contents of this array must not be modified. ++ * ++ * Return: The number of policies ++ */ ++int kbase_pm_list_policies(struct kbase_device *kbdev, ++ const struct kbase_pm_policy * const **list); + -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_unlock(&js_devdata->queue_mutex); ++/** ++ * kbase_pm_protected_mode_enable() - Enable protected mode ++ * ++ * @kbdev: Address of the instance of a GPU platform device. ++ * ++ * Return: Zero on success or an error code ++ */ ++int kbase_pm_protected_mode_enable(struct kbase_device *kbdev); + -+ if (katom->core_req & BASE_JD_REQ_START_RENDERPASS) { -+ mutex_lock(&kctx->jctx.lock); -+ js_return_of_start_rp(katom); -+ mutex_unlock(&kctx->jctx.lock); -+ } else if (katom->event_code == BASE_JD_EVENT_END_RP_DONE) { -+ mutex_lock(&kctx->jctx.lock); -+ js_return_of_end_rp(katom); -+ mutex_unlock(&kctx->jctx.lock); -+ } ++/** ++ * kbase_pm_protected_mode_disable() - Disable protected mode ++ * ++ * @kbdev: Address of the instance of a GPU platform device. ++ * ++ * Return: Zero on success or an error code ++ */ ++int kbase_pm_protected_mode_disable(struct kbase_device *kbdev); + -+ dev_dbg(kbdev->dev, "JS: retained state %s finished", -+ kbasep_js_has_atom_finished(&retained_state) ? -+ "has" : "hasn't"); ++#endif /* _KBASE_HWACCESS_PM_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h +new file mode 100644 +index 000000000..ac2a26d28 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h +@@ -0,0 +1,126 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2014, 2018-2021, 2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ WARN_ON(kbasep_js_has_atom_finished(&retained_state)); ++#ifndef _KBASE_BACKEND_TIME_H_ ++#define _KBASE_BACKEND_TIME_H_ + -+ kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, -+ &retained_state); ++#if MALI_USE_CSF ++/** ++ * struct kbase_backend_time - System timestamp attributes. ++ * ++ * @multiplier: Numerator of the converter's fraction. ++ * @divisor: Denominator of the converter's fraction. ++ * @offset: Converter's offset term. ++ * ++ * According to Generic timer spec, system timer: ++ * - Increments at a fixed frequency ++ * - Starts operating from zero ++ * ++ * Hence CPU time is a linear function of System Time. ++ * ++ * CPU_ts = alpha * SYS_ts + beta ++ * ++ * Where ++ * - alpha = 10^9/SYS_ts_freq ++ * - beta is calculated by two timer samples taken at the same time: ++ * beta = CPU_ts_s - SYS_ts_s * alpha ++ * ++ * Since alpha is a rational number, we minimizing possible ++ * rounding error by simplifying the ratio. Thus alpha is stored ++ * as a simple `multiplier / divisor` ratio. ++ * ++ */ ++struct kbase_backend_time { ++ u64 multiplier; ++ u64 divisor; ++ s64 offset; ++}; + -+ kbase_js_sched_all(kbdev); ++/** ++ * kbase_backend_time_convert_gpu_to_cpu() - Convert GPU timestamp to CPU timestamp. ++ * ++ * @kbdev: Kbase device pointer ++ * @gpu_ts: System timestamp value to converter. ++ * ++ * Return: The CPU timestamp. ++ */ ++u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kbdev, u64 gpu_ts); ++#endif + -+ kbase_backend_complete_wq_post_sched(kbdev, core_req); ++/** ++ * kbase_backend_get_gpu_time() - Get current GPU time ++ * @kbdev: Device pointer ++ * @cycle_counter: Pointer to u64 to store cycle counter in. ++ * @system_time: Pointer to u64 to store system time in ++ * @ts: Pointer to struct timespec to store current monotonic ++ * time in ++ */ ++void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, ++ u64 *system_time, struct timespec64 *ts); + -+ KBASE_KTRACE_ADD_JM(kbdev, JS_RETURN_WORKER_END, kctx, NULL, cache_jc, -+ 0); ++/** ++ * kbase_backend_get_gpu_time_norequest() - Get current GPU time without ++ * request/release cycle counter ++ * @kbdev: Device pointer ++ * @cycle_counter: Pointer to u64 to store cycle counter in ++ * @system_time: Pointer to u64 to store system time in ++ * @ts: Pointer to struct timespec to store current monotonic ++ * time in ++ */ ++void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, ++ u64 *cycle_counter, ++ u64 *system_time, ++ struct timespec64 *ts); ++/** ++ * kbase_get_timeout_ms - Choose a timeout value to get a timeout scaled ++ * GPU frequency, using a choice from ++ * kbase_timeout_selector. ++ * ++ * @kbdev: KBase device pointer. ++ * @selector: Value from kbase_scaled_timeout_selector enum. ++ * ++ * Return: Timeout in milliseconds, as an unsigned integer. ++ */ ++unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, ++ enum kbase_timeout_selector selector); + -+ dev_dbg(kbdev->dev, "Leaving %s for atom %pK\n", -+ __func__, (void *)katom); -+} ++/** ++ * kbase_backend_get_cycle_cnt - Reads the GPU cycle counter ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Return: Snapshot of the GPU cycle count register. ++ */ ++u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev); + -+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) -+{ -+ dev_dbg(kctx->kbdev->dev, "Unpulling atom %pK in kctx %pK\n", -+ (void *)katom, (void *)kctx); ++/** ++ * kbase_backend_time_init() - Initialize system timestamp converter. ++ * ++ * @kbdev: Kbase device pointer ++ * ++ * This function should only be called after GPU is powered-up and ++ * L2 cached power-up has been initiated. ++ * ++ * Return: Zero on success, error code otherwise. ++ */ ++int kbase_backend_time_init(struct kbase_device *kbdev); + -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++#endif /* _KBASE_BACKEND_TIME_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd.c b/drivers/gpu/arm/bifrost/mali_kbase_jd.c +new file mode 100644 +index 000000000..f44426a73 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_jd.c +@@ -0,0 +1,1660 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ jsctx_rb_unpull(kctx, katom); ++#include ++#if IS_ENABLED(CONFIG_COMPAT) ++#include ++#endif ++#include ++#include ++#include ++#include ++#include ++#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE ++#include ++#else ++#include ++#endif + -+ WARN_ON(work_pending(&katom->work)); ++#include ++#include ++#include ++#include ++#include + -+ /* Block re-submission until workqueue has run */ -+ atomic_inc(&katom->blocked); ++#include + -+ kbase_job_check_leave_disjoint(kctx->kbdev, katom); ++#include + -+ INIT_WORK(&katom->work, js_return_worker); -+ queue_work(kctx->jctx.job_done_wq, &katom->work); ++/* Return whether katom will run on the GPU or not. Currently only soft jobs and ++ * dependency-only atoms do not run on the GPU ++ */ ++#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) || \ ++ ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == \ ++ BASE_JD_REQ_DEP))) ++ ++/* ++ * This is the kernel side of the API. Only entry points are: ++ * - kbase_jd_submit(): Called from userspace to submit a single bag ++ * - kbase_jd_done(): Called from interrupt context to track the ++ * completion of a job. ++ * Callouts: ++ * - to the job manager (enqueue a job) ++ * - to the event subsystem (signals the completion/failure of bag/job-chains). ++ */ ++ ++static void __user * ++get_compat_pointer(struct kbase_context *kctx, const u64 p) ++{ ++#if IS_ENABLED(CONFIG_COMPAT) ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) ++ return compat_ptr(p); ++#endif ++ return u64_to_user_ptr(p); +} + -+/** -+ * js_complete_start_rp() - Handle completion of atom that starts a renderpass -+ * @kctx: Context pointer -+ * @start_katom: Pointer to the atom that completed -+ * -+ * Put any references to virtual memory regions that might have been added by -+ * kbase_job_slot_softstop_start_rp() because the tiler job chain completed -+ * despite any pending soft-stop request. ++/* Mark an atom as complete, and trace it in kinstr_jm */ ++static void jd_mark_atom_complete(struct kbase_jd_atom *katom) ++{ ++ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; ++ kbase_kinstr_jm_atom_complete(katom); ++ dev_dbg(katom->kctx->kbdev->dev, "Atom %pK status to completed\n", ++ (void *)katom); ++ KBASE_TLSTREAM_TL_JD_ATOM_COMPLETE(katom->kctx->kbdev, katom); ++} ++ ++/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs + * -+ * If the atom that just completed was soft-stopped during a previous attempt to -+ * run it then there should be a blocked end-of-renderpass atom waiting for it, -+ * which we must unblock to process the output of the tiler job chain. ++ * Returns whether the JS needs a reschedule. + * -+ * Return: true if caller should call kbase_backend_ctx_count_changed() ++ * Note that the caller must also check the atom status and ++ * if it is KBASE_JD_ATOM_STATE_COMPLETED must call kbase_jd_done_nolock + */ -+static bool js_complete_start_rp(struct kbase_context *kctx, -+ struct kbase_jd_atom *const start_katom) ++static bool jd_run_atom(struct kbase_jd_atom *katom) +{ -+ struct kbase_device *const kbdev = kctx->kbdev; -+ struct kbase_jd_renderpass *rp; -+ bool timer_sync = false; -+ -+ lockdep_assert_held(&kctx->jctx.lock); -+ -+ if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) -+ return false; ++ struct kbase_context *kctx = katom->kctx; + -+ compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= -+ ARRAY_SIZE(kctx->jctx.renderpasses), -+ "Should check invalid access to renderpasses"); ++ dev_dbg(kctx->kbdev->dev, "JD run atom %pK in kctx %pK\n", ++ (void *)katom, (void *)kctx); + -+ rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; ++ KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); + -+ if (WARN_ON(rp->start_katom != start_katom)) ++ if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) { ++ /* Dependency only atom */ ++ trace_sysgraph(SGR_SUBMIT, kctx->id, ++ kbase_jd_atom_id(katom->kctx, katom)); ++ jd_mark_atom_complete(katom); + return false; -+ -+ dev_dbg(kctx->kbdev->dev, -+ "Start atom %pK is done in state %d of RP %d\n", -+ (void *)start_katom, (int)rp->state, -+ start_katom->renderpass_id); -+ -+ if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) ++ } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { ++ /* Soft-job */ ++ if (katom->will_fail_event_code) { ++ kbase_finish_soft_job(katom); ++ jd_mark_atom_complete(katom); ++ return false; ++ } ++ if (kbase_process_soft_job(katom) == 0) { ++ kbase_finish_soft_job(katom); ++ jd_mark_atom_complete(katom); ++ } + return false; ++ } + -+ if (rp->state == KBASE_JD_RP_PEND_OOM || -+ rp->state == KBASE_JD_RP_RETRY_PEND_OOM) { -+ unsigned long flags; ++ katom->status = KBASE_JD_ATOM_STATE_IN_JS; ++ dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", (void *)katom); ++ /* Queue an action about whether we should try scheduling a context */ ++ return kbasep_js_add_job(kctx, katom); ++} + -+ dev_dbg(kctx->kbdev->dev, -+ "Start atom %pK completed before soft-stop\n", -+ (void *)start_katom); ++void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom) ++{ ++ struct kbase_device *kbdev; + -+ kbase_gpu_vm_lock(kctx); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ KBASE_DEBUG_ASSERT(katom); ++ kbdev = katom->kctx->kbdev; ++ KBASE_DEBUG_ASSERT(kbdev); + -+ while (!list_empty(&rp->oom_reg_list)) { -+ struct kbase_va_region *reg = -+ list_first_entry(&rp->oom_reg_list, -+ struct kbase_va_region, link); ++ /* Check whether the atom's other dependencies were already met. If ++ * katom is a GPU atom then the job scheduler may be able to represent ++ * the dependencies, hence we may attempt to submit it before they are ++ * met. Other atoms must have had both dependencies resolved. ++ */ ++ if (IS_GPU_ATOM(katom) || ++ (!kbase_jd_katom_dep_atom(&katom->dep[0]) && ++ !kbase_jd_katom_dep_atom(&katom->dep[1]))) { ++ /* katom dep complete, attempt to run it */ ++ bool resched = false; + -+ WARN_ON(reg->flags & KBASE_REG_VA_FREED); -+ dev_dbg(kctx->kbdev->dev, "Deleting region %pK from list\n", -+ (void *)reg); -+ list_del_init(®->link); -+ kbase_va_region_alloc_put(kctx, reg); -+ } ++ KBASE_TLSTREAM_TL_RUN_ATOM_START( ++ katom->kctx->kbdev, katom, ++ kbase_jd_atom_id(katom->kctx, katom)); ++ resched = jd_run_atom(katom); ++ KBASE_TLSTREAM_TL_RUN_ATOM_END(katom->kctx->kbdev, katom, ++ kbase_jd_atom_id(katom->kctx, ++ katom)); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ kbase_gpu_vm_unlock(kctx); -+ } else { -+ dev_dbg(kctx->kbdev->dev, -+ "Start atom %pK did not exceed memory threshold\n", -+ (void *)start_katom); ++ if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) { ++ /* The atom has already finished */ ++ resched |= kbase_jd_done_nolock(katom, true); ++ } + -+ WARN_ON(rp->state != KBASE_JD_RP_START && -+ rp->state != KBASE_JD_RP_RETRY); ++ if (resched) ++ kbase_js_sched_all(kbdev); + } ++} + -+ if (rp->state == KBASE_JD_RP_RETRY || -+ rp->state == KBASE_JD_RP_RETRY_PEND_OOM) { -+ struct kbase_jd_atom *const end_katom = rp->end_katom; ++void kbase_jd_free_external_resources(struct kbase_jd_atom *katom) ++{ ++} + -+ if (!WARN_ON(!end_katom)) { -+ unsigned long flags; ++static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) ++{ ++ KBASE_DEBUG_ASSERT(katom); ++ KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES); + -+ /* Allow the end of the renderpass to be pulled for -+ * execution again to continue incremental rendering. -+ */ -+ dev_dbg(kbdev->dev, "Unblocking end atom %pK!\n", -+ (void *)end_katom); -+ atomic_dec(&end_katom->blocked); ++ kbase_gpu_vm_lock(katom->kctx); ++ /* only roll back if extres is non-NULL */ ++ if (katom->extres) { ++ u32 res_no; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ timer_sync = kbase_js_ctx_list_add_pullable_nolock( -+ kbdev, kctx, end_katom->slot_nr); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ res_no = katom->nr_extres; ++ while (res_no-- > 0) { ++ kbase_unmap_external_resource(katom->kctx, katom->extres[res_no]); + } ++ kfree(katom->extres); ++ katom->extres = NULL; + } -+ -+ return timer_sync; ++ kbase_gpu_vm_unlock(katom->kctx); +} + -+/** -+ * js_complete_end_rp() - Handle final completion of atom that ends a renderpass -+ * @kctx: Context pointer -+ * @end_katom: Pointer to the atom that completed for the last time ++/* ++ * Set up external resources needed by this job. + * -+ * This function must only be called if the renderpass actually completed -+ * without the tiler job chain at the start using too much memory; otherwise -+ * completion of the end-of-renderpass atom is handled similarly to a soft-stop. ++ * jctx.lock must be held when this is called. + */ -+static void js_complete_end_rp(struct kbase_context *kctx, -+ struct kbase_jd_atom *const end_katom) -+{ -+ struct kbase_device *const kbdev = kctx->kbdev; -+ unsigned long flags; -+ struct kbase_jd_renderpass *rp; + -+ lockdep_assert_held(&kctx->jctx.lock); -+ -+ if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) -+ return; ++static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom *user_atom) ++{ ++ int err = -EINVAL; ++ u32 res_no; ++ struct base_external_resource *input_extres; + -+ compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= -+ ARRAY_SIZE(kctx->jctx.renderpasses), -+ "Should check invalid access to renderpasses"); ++ KBASE_DEBUG_ASSERT(katom); ++ KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES); + -+ rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; ++ /* no resources encoded, early out */ ++ if (!katom->nr_extres) ++ return -EINVAL; + -+ if (WARN_ON(rp->end_katom != end_katom)) -+ return; ++ katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL); ++ if (!katom->extres) ++ return -ENOMEM; + -+ dev_dbg(kbdev->dev, "End atom %pK is done in state %d of RP %d\n", -+ (void *)end_katom, (int)rp->state, end_katom->renderpass_id); ++ input_extres = kmalloc_array(katom->nr_extres, sizeof(*input_extres), GFP_KERNEL); ++ if (!input_extres) { ++ err = -ENOMEM; ++ goto failed_input_alloc; ++ } + -+ if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE) || -+ WARN_ON(rp->state == KBASE_JD_RP_OOM) || -+ WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM)) -+ return; ++ if (copy_from_user(input_extres, ++ get_compat_pointer(katom->kctx, user_atom->extres_list), ++ sizeof(*input_extres) * katom->nr_extres) != 0) { ++ err = -EINVAL; ++ goto failed_input_copy; ++ } + -+ /* Rendering completed without running out of memory. -+ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ WARN_ON(!list_empty(&rp->oom_reg_list)); -+ rp->state = KBASE_JD_RP_COMPLETE; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* Take the processes mmap lock */ ++ down_read(kbase_mem_get_process_mmap_lock()); + -+ dev_dbg(kbdev->dev, "Renderpass %d is complete\n", -+ end_katom->renderpass_id); -+} ++ /* need to keep the GPU VM locked while we set up UMM buffers */ ++ kbase_gpu_vm_lock(katom->kctx); ++ for (res_no = 0; res_no < katom->nr_extres; res_no++) { ++ struct base_external_resource *user_res = &input_extres[res_no]; ++ struct kbase_va_region *reg; + -+bool kbase_js_complete_atom_wq(struct kbase_context *kctx, -+ struct kbase_jd_atom *katom) -+{ -+ struct kbasep_js_kctx_info *js_kctx_info; -+ struct kbasep_js_device_data *js_devdata; -+ struct kbase_device *kbdev; -+ unsigned long flags; -+ bool timer_sync = false; -+ int atom_slot; -+ bool context_idle = false; -+ int prio = katom->sched_priority; ++ reg = kbase_region_tracker_find_region_enclosing_address( ++ katom->kctx, user_res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); ++ /* did we find a matching region object? */ ++ if (unlikely(kbase_is_region_invalid_or_free(reg))) { ++ /* roll back */ ++ goto failed_loop; ++ } + -+ kbdev = kctx->kbdev; -+ atom_slot = katom->slot_nr; ++ if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) && ++ (reg->flags & KBASE_REG_PROTECTED)) { ++ katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED; ++ } + -+ dev_dbg(kbdev->dev, "%s for atom %pK (s:%d)\n", -+ __func__, (void *)katom, atom_slot); ++ err = kbase_map_external_resource(katom->kctx, reg, current->mm); ++ if (err) ++ goto failed_loop; + -+ /* Update the incremental rendering state machine. -+ */ -+ if (katom->core_req & BASE_JD_REQ_START_RENDERPASS) -+ timer_sync |= js_complete_start_rp(kctx, katom); -+ else if (katom->core_req & BASE_JD_REQ_END_RENDERPASS) -+ js_complete_end_rp(kctx, katom); ++ katom->extres[res_no] = reg; ++ } ++ /* successfully parsed the extres array */ ++ /* drop the vm lock now */ ++ kbase_gpu_vm_unlock(katom->kctx); + -+ js_kctx_info = &kctx->jctx.sched_info; -+ js_devdata = &kbdev->js_data; ++ /* Release the processes mmap lock */ ++ up_read(kbase_mem_get_process_mmap_lock()); + -+ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); ++ /* Free the buffer holding data from userspace */ ++ kfree(input_extres); + -+ mutex_lock(&js_devdata->runpool_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ /* all done OK */ ++ return 0; + -+ if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { -+ bool slot_became_unblocked; ++/* error handling section */ ++failed_loop: ++ /* undo the loop work. We are guaranteed to have access to the VA region ++ * as we hold a reference to it until it's unmapped ++ */ ++ while (res_no-- > 0) { ++ struct kbase_va_region *reg = katom->extres[res_no]; + -+ dev_dbg(kbdev->dev, "Atom %pK is in runnable_tree\n", -+ (void *)katom); ++ kbase_unmap_external_resource(katom->kctx, reg); ++ } ++ kbase_gpu_vm_unlock(katom->kctx); + -+ slot_became_unblocked = -+ kbase_jsctx_slot_atom_pulled_dec(kctx, katom); -+ context_idle = !kbase_jsctx_atoms_pulled(kctx); ++ /* Release the processes mmap lock */ ++ up_read(kbase_mem_get_process_mmap_lock()); + -+ if (!kbase_jsctx_atoms_pulled(kctx) && !kctx->slots_pullable) { -+ WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); -+ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); -+ atomic_dec(&kbdev->js_data.nr_contexts_runnable); -+ timer_sync = true; -+ } ++failed_input_copy: ++ kfree(input_extres); ++failed_input_alloc: ++ kfree(katom->extres); ++ katom->extres = NULL; ++ return err; ++} + -+ /* If this slot has been blocked due to soft-stopped atoms, and -+ * all atoms have now been processed at this priority level and -+ * higher, then unblock the slot -+ */ -+ if (slot_became_unblocked) { -+ dev_dbg(kbdev->dev, -+ "kctx %pK is no longer blocked from submitting on slot %d at priority %d or higher\n", -+ (void *)kctx, atom_slot, prio); ++static inline void jd_resolve_dep(struct list_head *out_list, ++ struct kbase_jd_atom *katom, ++ u8 d, bool ctx_is_dying) ++{ ++ u8 other_d = !d; + -+ if (kbase_js_ctx_pullable(kctx, atom_slot, true)) -+ timer_sync |= -+ kbase_js_ctx_list_add_pullable_nolock( -+ kbdev, kctx, atom_slot); -+ } -+ } -+ WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)); ++ while (!list_empty(&katom->dep_head[d])) { ++ struct kbase_jd_atom *dep_atom; ++ struct kbase_jd_atom *other_dep_atom; ++ u8 dep_type; + -+ if (!kbase_jsctx_slot_atoms_pulled(kctx, atom_slot) && -+ jsctx_rb_none_to_pull(kctx, atom_slot)) { -+ if (!list_empty( -+ &kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot])) -+ timer_sync |= kbase_js_ctx_list_remove_nolock( -+ kctx->kbdev, kctx, atom_slot); -+ } ++ dep_atom = list_entry(katom->dep_head[d].next, ++ struct kbase_jd_atom, dep_item[d]); ++ list_del(katom->dep_head[d].next); + -+ /* -+ * If submission is disabled on this context (most likely due to an -+ * atom failure) and there are now no atoms left in the system then -+ * re-enable submission so that context can be scheduled again. -+ */ -+ if (!kbasep_js_is_submit_allowed(js_devdata, kctx) && -+ !kbase_jsctx_atoms_pulled(kctx) && -+ !kbase_ctx_flag(kctx, KCTX_DYING)) { -+ unsigned int js; ++ dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]); ++ kbase_jd_katom_dep_clear(&dep_atom->dep[d]); + -+ kbasep_js_set_submit_allowed(js_devdata, kctx); ++ if (katom->event_code != BASE_JD_EVENT_DONE && ++ (dep_type != BASE_JD_DEP_TYPE_ORDER)) { ++ dep_atom->event_code = katom->event_code; ++ KBASE_DEBUG_ASSERT(dep_atom->status != ++ KBASE_JD_ATOM_STATE_UNUSED); + -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ if (kbase_js_ctx_pullable(kctx, js, true)) -+ timer_sync |= -+ kbase_js_ctx_list_add_pullable_nolock( -+ kbdev, kctx, js); ++ dep_atom->will_fail_event_code = dep_atom->event_code; + } -+ } else if (katom->x_post_dep && -+ kbasep_js_is_submit_allowed(js_devdata, kctx)) { -+ unsigned int js; ++ other_dep_atom = (struct kbase_jd_atom *) ++ kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]); + -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ if (kbase_js_ctx_pullable(kctx, js, true)) -+ timer_sync |= -+ kbase_js_ctx_list_add_pullable_nolock( -+ kbdev, kctx, js); ++ if (!dep_atom->in_jd_list && (!other_dep_atom || ++ (IS_GPU_ATOM(dep_atom) && !ctx_is_dying && ++ !dep_atom->will_fail_event_code && ++ !other_dep_atom->will_fail_event_code))) { ++ dep_atom->in_jd_list = true; ++ list_add_tail(&dep_atom->jd_item, out_list); + } + } -+ -+ /* Mark context as inactive. The pm reference will be dropped later in -+ * jd_done_worker(). -+ */ -+ if (context_idle) { -+ dev_dbg(kbdev->dev, "kctx %pK is no longer active\n", -+ (void *)kctx); -+ kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); -+ } -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ if (timer_sync) -+ kbase_backend_ctx_count_changed(kbdev); -+ mutex_unlock(&js_devdata->runpool_mutex); -+ -+ dev_dbg(kbdev->dev, "Leaving %s\n", __func__); -+ return context_idle; +} + +/** -+ * js_end_rp_is_complete() - Check whether an atom that ends a renderpass has -+ * completed for the last time. -+ * -+ * @end_katom: Pointer to the atom that completed on the hardware. -+ * -+ * An atom that ends a renderpass may be run on the hardware several times -+ * before notifying userspace or allowing dependent atoms to be executed. ++ * is_dep_valid - Validate that a dependency is valid for early dependency ++ * submission ++ * @katom: Dependency atom to validate + * -+ * This function is used to decide whether or not to allow end-of-renderpass -+ * atom completion. It only returns false if the atom at the start of the -+ * renderpass was soft-stopped because it used too much memory during the most -+ * recent attempt at tiling. ++ * A dependency is valid if any of the following are true : ++ * - It does not exist (a non-existent dependency does not block submission) ++ * - It is in the job scheduler ++ * - It has completed, does not have a failure event code, and has not been ++ * marked to fail in the future + * -+ * Return: True if the atom completed for the last time. ++ * Return: true if valid, false otherwise + */ -+static bool js_end_rp_is_complete(struct kbase_jd_atom *const end_katom) ++static bool is_dep_valid(struct kbase_jd_atom *katom) +{ -+ struct kbase_context *const kctx = end_katom->kctx; -+ struct kbase_device *const kbdev = kctx->kbdev; -+ struct kbase_jd_renderpass *rp; -+ -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ -+ if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) ++ /* If there's no dependency then this is 'valid' from the perspective of ++ * early dependency submission ++ */ ++ if (!katom) + return true; + -+ compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= -+ ARRAY_SIZE(kctx->jctx.renderpasses), -+ "Should check invalid access to renderpasses"); ++ /* Dependency must have reached the job scheduler */ ++ if (katom->status < KBASE_JD_ATOM_STATE_IN_JS) ++ return false; + -+ rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; ++ /* If dependency has completed and has failed or will fail then it is ++ * not valid ++ */ ++ if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED && ++ (katom->event_code != BASE_JD_EVENT_DONE || ++ katom->will_fail_event_code)) ++ return false; + -+ if (WARN_ON(rp->end_katom != end_katom)) -+ return true; ++ return true; ++} + -+ dev_dbg(kbdev->dev, -+ "JS complete end atom %pK in state %d of RP %d\n", -+ (void *)end_katom, (int)rp->state, -+ end_katom->renderpass_id); ++static void jd_try_submitting_deps(struct list_head *out_list, ++ struct kbase_jd_atom *node) ++{ ++ int i; + -+ if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) -+ return true; ++ for (i = 0; i < 2; i++) { ++ struct list_head *pos; + -+ /* Failure of end-of-renderpass atoms must not return to the -+ * start of the renderpass. -+ */ -+ if (end_katom->event_code != BASE_JD_EVENT_DONE) -+ return true; ++ list_for_each(pos, &node->dep_head[i]) { ++ struct kbase_jd_atom *dep_atom = list_entry(pos, ++ struct kbase_jd_atom, dep_item[i]); + -+ if (rp->state != KBASE_JD_RP_OOM && -+ rp->state != KBASE_JD_RP_RETRY_OOM) -+ return true; ++ if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) { ++ /*Check if atom deps look sane*/ ++ bool dep0_valid = is_dep_valid( ++ dep_atom->dep[0].atom); ++ bool dep1_valid = is_dep_valid( ++ dep_atom->dep[1].atom); + -+ dev_dbg(kbdev->dev, "Suppressing end atom completion\n"); -+ return false; ++ if (dep0_valid && dep1_valid) { ++ dep_atom->in_jd_list = true; ++ list_add(&dep_atom->jd_item, out_list); ++ } ++ } ++ } ++ } +} + -+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, -+ ktime_t *end_timestamp) ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++/** ++ * jd_update_jit_usage - Update just-in-time physical memory usage for an atom. ++ * ++ * @katom: An atom that has just finished. ++ * ++ * Read back actual just-in-time memory region usage from atoms that provide ++ * this information, and update the current physical page pressure. ++ * ++ * The caller must hold the kbase_jd_context.lock. ++ */ ++static void jd_update_jit_usage(struct kbase_jd_atom *katom) +{ -+ struct kbase_device *kbdev; + struct kbase_context *kctx = katom->kctx; -+ struct kbase_jd_atom *x_dep = katom->x_post_dep; ++ struct kbase_va_region *reg; ++ struct kbase_vmap_struct mapping; ++ u64 *ptr; ++ u64 used_pages; ++ unsigned int idx; + -+ kbdev = kctx->kbdev; -+ dev_dbg(kbdev->dev, "Atom %pK complete in kctx %pK (post-dep %pK)\n", -+ (void *)katom, (void *)kctx, (void *)x_dep); ++ lockdep_assert_held(&kctx->jctx.lock); + -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ /* If this atom wrote to JIT memory, find out how much it has written ++ * and update the usage information in the region. ++ */ ++ for (idx = 0; ++ idx < ARRAY_SIZE(katom->jit_ids) && katom->jit_ids[idx]; ++ idx++) { ++ enum heap_pointer { LOW = 0, HIGH, COUNT }; ++ size_t size_to_read; ++ u64 read_val; + -+ if ((katom->core_req & BASE_JD_REQ_END_RENDERPASS) && -+ !js_end_rp_is_complete(katom)) { -+ katom->event_code = BASE_JD_EVENT_END_RP_DONE; -+ kbase_js_unpull(kctx, katom); -+ return NULL; -+ } ++ reg = kctx->jit_alloc[katom->jit_ids[idx]]; + -+ if (katom->will_fail_event_code) -+ katom->event_code = katom->will_fail_event_code; ++ if (!reg) { ++ dev_warn(kctx->kbdev->dev, ++ "%s: JIT id[%u]=%u has no region\n", ++ __func__, idx, katom->jit_ids[idx]); ++ continue; ++ } + -+ katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED; -+ dev_dbg(kbdev->dev, "Atom %pK status to HW completed\n", (void *)katom); -+ if (kbase_is_quick_reset_enabled(kbdev)) { -+ kbdev->num_of_atoms_hw_completed++; -+ if (kbdev->num_of_atoms_hw_completed >= 20) -+ kbase_disable_quick_reset(kbdev); -+ } ++ if (reg == KBASE_RESERVED_REG_JIT_ALLOC) { ++ dev_warn(kctx->kbdev->dev, ++ "%s: JIT id[%u]=%u has failed to allocate a region\n", ++ __func__, idx, katom->jit_ids[idx]); ++ continue; ++ } + -+ if (katom->event_code != BASE_JD_EVENT_DONE) { -+ kbase_js_evict_deps(kctx, katom, katom->slot_nr, -+ katom->sched_priority); -+ } ++ if (!reg->heap_info_gpu_addr) ++ continue; + -+ KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, NULL, -+ katom->slot_nr, 0, TL_JS_EVENT_STOP); ++ size_to_read = sizeof(*ptr); ++ if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) ++ size_to_read = sizeof(u32); ++ else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) ++ size_to_read = sizeof(u64[COUNT]); + -+ trace_sysgraph_gpu(SGR_COMPLETE, kctx->id, -+ kbase_jd_atom_id(katom->kctx, katom), katom->slot_nr); ++ ptr = kbase_vmap_prot(kctx, reg->heap_info_gpu_addr, size_to_read, ++ KBASE_REG_CPU_RD, &mapping); + -+ KBASE_TLSTREAM_TL_JD_DONE_START(kbdev, katom); -+ kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0); -+ KBASE_TLSTREAM_TL_JD_DONE_END(kbdev, katom); ++ if (!ptr) { ++ dev_warn(kctx->kbdev->dev, ++ "%s: JIT id[%u]=%u start=0x%llx unable to map end marker %llx\n", ++ __func__, idx, katom->jit_ids[idx], ++ reg->start_pfn << PAGE_SHIFT, ++ reg->heap_info_gpu_addr); ++ continue; ++ } + -+ /* Unblock cross dependency if present */ -+ if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE || -+ !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) && -+ (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)) { -+ bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr, -+ false); -+ x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; -+ dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %pK\n", -+ (void *)x_dep); ++ if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) { ++ read_val = READ_ONCE(*(u32 *)ptr); ++ used_pages = PFN_UP(read_val); ++ } else { ++ u64 addr_end; + -+ kbase_js_move_to_tree(x_dep); ++ if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { ++ const unsigned long extension_bytes = ++ reg->extension << PAGE_SHIFT; ++ const u64 low_ptr = ptr[LOW]; ++ const u64 high_ptr = ptr[HIGH]; + -+ if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr, -+ false)) -+ kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, -+ x_dep->slot_nr); ++ /* As either the low or high pointer could ++ * consume their partition and move onto the ++ * next chunk, we need to account for both. ++ * In the case where nothing has been allocated ++ * from the high pointer the whole chunk could ++ * be backed unnecessarily - but the granularity ++ * is the chunk size anyway and any non-zero ++ * offset of low pointer from the start of the ++ * chunk would result in the whole chunk being ++ * backed. ++ */ ++ read_val = max(high_ptr, low_ptr); + -+ if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { -+ dev_dbg(kbdev->dev, "Atom %pK is in runnable tree\n", -+ (void *)x_dep); -+ return x_dep; ++ /* kbase_check_alloc_sizes() already satisfies ++ * this, but here to avoid future maintenance ++ * hazards ++ */ ++ WARN_ON(!is_power_of_2(extension_bytes)); ++ addr_end = ALIGN(read_val, extension_bytes); ++ } else { ++ addr_end = read_val = READ_ONCE(*ptr); ++ } ++ ++ if (addr_end >= (reg->start_pfn << PAGE_SHIFT)) ++ used_pages = PFN_UP(addr_end) - reg->start_pfn; ++ else ++ used_pages = reg->used_pages; + } -+ } else { -+ dev_dbg(kbdev->dev, -+ "No cross-slot dep to unblock for atom %pK\n", -+ (void *)katom); ++ ++ trace_mali_jit_report(katom, reg, idx, read_val, used_pages); ++ kbase_trace_jit_report_gpu_mem(kctx, reg, 0u); ++ ++ /* We can never have used more pages than the VA size of the ++ * region ++ */ ++ if (used_pages > reg->nr_pages) { ++ dev_warn(kctx->kbdev->dev, ++ "%s: JIT id[%u]=%u start=0x%llx used_pages %llx > %zx (read 0x%llx as %s%s)\n", ++ __func__, idx, katom->jit_ids[idx], ++ reg->start_pfn << PAGE_SHIFT, ++ used_pages, reg->nr_pages, read_val, ++ (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) ? ++ "size" : "addr", ++ (reg->flags & KBASE_REG_TILER_ALIGN_TOP) ? ++ " with align" : ""); ++ used_pages = reg->nr_pages; ++ } ++ /* Note: one real use case has an atom correctly reporting 0 ++ * pages in use. This happens in normal use-cases but may only ++ * happen for a few of the application's frames. ++ */ ++ ++ kbase_vunmap(kctx, &mapping); ++ ++ kbase_jit_report_update_pressure(kctx, reg, used_pages, 0u); + } + -+ return NULL; ++ kbase_jit_retry_pending_alloc(kctx); +} ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+/** -+ * kbase_js_atom_blocked_on_x_dep - Decide whether to ignore a cross-slot -+ * dependency -+ * @katom: Pointer to an atom in the slot ringbuffer -+ * -+ * A cross-slot dependency is ignored if necessary to unblock incremental -+ * rendering. If the atom at the start of a renderpass used too much memory -+ * and was soft-stopped then the atom at the end of a renderpass is submitted -+ * to hardware regardless of its dependency on the start-of-renderpass atom. -+ * This can happen multiple times for the same pair of atoms. -+ * -+ * Return: true to block the atom or false to allow it to be submitted to -+ * hardware -+ */ -+bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) ++bool kbase_jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately) +{ -+ struct kbase_context *const kctx = katom->kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct kbase_jd_renderpass *rp; ++ struct kbase_context *kctx = katom->kctx; ++ struct list_head completed_jobs; ++ struct list_head runnable_jobs; ++ bool need_to_try_schedule_context = false; ++ int i; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ lockdep_assert_held(&kctx->jctx.lock); + -+ if (!(katom->atom_flags & -+ KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { -+ dev_dbg(kbdev->dev, "Atom %pK is not blocked on a cross-slot dependency", -+ (void *)katom); -+ return false; -+ } ++ KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START(kctx->kbdev, katom); + -+ if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) { -+ dev_dbg(kbdev->dev, "Atom %pK is blocked on a cross-slot dependency", -+ (void *)katom); -+ return true; -+ } ++ INIT_LIST_HEAD(&completed_jobs); ++ INIT_LIST_HEAD(&runnable_jobs); + -+ compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= -+ ARRAY_SIZE(kctx->jctx.renderpasses), -+ "Should check invalid access to renderpasses"); ++ KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); + -+ rp = &kctx->jctx.renderpasses[katom->renderpass_id]; -+ /* We can read a subset of renderpass state without holding -+ * higher-level locks (but not end_katom, for example). ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (kbase_ctx_flag(kctx, KCTX_JPL_ENABLED)) ++ jd_update_jit_usage(katom); ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ ++ /* This is needed in case an atom is failed due to being invalid, this ++ * can happen *before* the jobs that the atom depends on have completed + */ ++ for (i = 0; i < 2; i++) { ++ if (kbase_jd_katom_dep_atom(&katom->dep[i])) { ++ list_del(&katom->dep_item[i]); ++ kbase_jd_katom_dep_clear(&katom->dep[i]); ++ } ++ } + -+ WARN_ON(rp->state == KBASE_JD_RP_COMPLETE); ++ jd_mark_atom_complete(katom); + -+ dev_dbg(kbdev->dev, "End atom has cross-slot dep in state %d\n", -+ (int)rp->state); ++ list_add_tail(&katom->jd_item, &completed_jobs); + -+ if (rp->state != KBASE_JD_RP_OOM && rp->state != KBASE_JD_RP_RETRY_OOM) -+ return true; ++ while (!list_empty(&completed_jobs)) { ++ katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item); ++ list_del(completed_jobs.prev); ++ KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); + -+ /* Tiler ran out of memory so allow the fragment job chain to run -+ * if it only depends on the tiler job chain. -+ */ -+ if (katom->x_pre_dep != rp->start_katom) { -+ dev_dbg(kbdev->dev, "Dependency is on %pK not start atom %pK\n", -+ (void *)katom->x_pre_dep, (void *)rp->start_katom); -+ return true; ++ for (i = 0; i < 2; i++) ++ jd_resolve_dep(&runnable_jobs, katom, i, ++ kbase_ctx_flag(kctx, KCTX_DYING)); ++ ++ if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) ++ kbase_jd_post_external_resources(katom); ++ ++ while (!list_empty(&runnable_jobs)) { ++ struct kbase_jd_atom *node; ++ ++ node = list_entry(runnable_jobs.next, ++ struct kbase_jd_atom, jd_item); ++ list_del(runnable_jobs.next); ++ node->in_jd_list = false; ++ ++ dev_dbg(kctx->kbdev->dev, "List node %pK has status %d\n", ++ node, node->status); ++ ++ KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); ++ if (node->status == KBASE_JD_ATOM_STATE_IN_JS) ++ continue; ++ ++ if (node->status != KBASE_JD_ATOM_STATE_COMPLETED && ++ !kbase_ctx_flag(kctx, KCTX_DYING)) { ++ KBASE_TLSTREAM_TL_RUN_ATOM_START( ++ kctx->kbdev, node, ++ kbase_jd_atom_id(kctx, node)); ++ need_to_try_schedule_context |= jd_run_atom(node); ++ KBASE_TLSTREAM_TL_RUN_ATOM_END( ++ kctx->kbdev, node, ++ kbase_jd_atom_id(kctx, node)); ++ } else { ++ node->event_code = katom->event_code; ++ ++ if (node->core_req & ++ BASE_JD_REQ_SOFT_JOB) { ++ WARN_ON(!list_empty(&node->queue)); ++ kbase_finish_soft_job(node); ++ } ++ node->status = KBASE_JD_ATOM_STATE_COMPLETED; ++ } ++ ++ if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) { ++ list_add_tail(&node->jd_item, &completed_jobs); ++ } else if (node->status == KBASE_JD_ATOM_STATE_IN_JS && ++ !node->will_fail_event_code) { ++ /* Node successfully submitted, try submitting ++ * dependencies as they may now be representable ++ * in JS ++ */ ++ jd_try_submitting_deps(&runnable_jobs, node); ++ } ++ } ++ ++ /* Register a completed job as a disjoint event when the GPU ++ * is in a disjoint state (ie. being reset). ++ */ ++ kbase_disjoint_event_potential(kctx->kbdev); ++ if (post_immediately && list_empty(&kctx->completed_jobs)) ++ kbase_event_post(kctx, katom); ++ else ++ list_add_tail(&katom->jd_item, &kctx->completed_jobs); ++ ++ /* Decrement and check the TOTAL number of jobs. This includes ++ * those not tracked by the scheduler: 'not ready to run' and ++ * 'dependency-only' jobs. ++ */ ++ if (--kctx->jctx.job_nr == 0) ++ /* All events are safely queued now, and we can signal ++ * any waiter that we've got no more jobs (so we can be ++ * safely terminated) ++ */ ++ wake_up(&kctx->jctx.zero_jobs_wait); + } ++ KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_END(kctx->kbdev, katom); ++ return need_to_try_schedule_context; ++} + -+ dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %pK\n", -+ (void *)katom->x_pre_dep); ++KBASE_EXPORT_TEST_API(kbase_jd_done_nolock); + -+ return false; ++#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) ++enum { ++ CORE_REQ_DEP_ONLY, ++ CORE_REQ_SOFT, ++ CORE_REQ_COMPUTE, ++ CORE_REQ_FRAGMENT, ++ CORE_REQ_VERTEX, ++ CORE_REQ_TILER, ++ CORE_REQ_FRAGMENT_VERTEX, ++ CORE_REQ_FRAGMENT_VERTEX_TILER, ++ CORE_REQ_FRAGMENT_TILER, ++ CORE_REQ_VERTEX_TILER, ++ CORE_REQ_UNKNOWN ++}; ++static const char * const core_req_strings[] = { ++ "Dependency Only Job", ++ "Soft Job", ++ "Compute Shader Job", ++ "Fragment Shader Job", ++ "Vertex/Geometry Shader Job", ++ "Tiler Job", ++ "Fragment Shader + Vertex/Geometry Shader Job", ++ "Fragment Shader + Vertex/Geometry Shader Job + Tiler Job", ++ "Fragment Shader + Tiler Job", ++ "Vertex/Geometry Shader Job + Tiler Job", ++ "Unknown Job" ++}; ++static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req) ++{ ++ if (core_req & BASE_JD_REQ_SOFT_JOB) ++ return core_req_strings[CORE_REQ_SOFT]; ++ if (core_req & BASE_JD_REQ_ONLY_COMPUTE) ++ return core_req_strings[CORE_REQ_COMPUTE]; ++ switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) { ++ case BASE_JD_REQ_DEP: ++ return core_req_strings[CORE_REQ_DEP_ONLY]; ++ case BASE_JD_REQ_FS: ++ return core_req_strings[CORE_REQ_FRAGMENT]; ++ case BASE_JD_REQ_CS: ++ return core_req_strings[CORE_REQ_VERTEX]; ++ case BASE_JD_REQ_T: ++ return core_req_strings[CORE_REQ_TILER]; ++ case (BASE_JD_REQ_FS | BASE_JD_REQ_CS): ++ return core_req_strings[CORE_REQ_FRAGMENT_VERTEX]; ++ case (BASE_JD_REQ_FS | BASE_JD_REQ_T): ++ return core_req_strings[CORE_REQ_FRAGMENT_TILER]; ++ case (BASE_JD_REQ_CS | BASE_JD_REQ_T): ++ return core_req_strings[CORE_REQ_VERTEX_TILER]; ++ case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T): ++ return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER]; ++ } ++ return core_req_strings[CORE_REQ_UNKNOWN]; +} ++#endif + -+void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask) ++/* Trace an atom submission. */ ++static void jd_trace_atom_submit(struct kbase_context *const kctx, ++ struct kbase_jd_atom *const katom, ++ int *priority) +{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbase_context *last_active[BASE_JM_MAX_NR_SLOTS]; -+ bool timer_sync = false; -+ bool ctx_waiting[BASE_JM_MAX_NR_SLOTS]; -+ unsigned int js; ++ struct kbase_device *const kbdev = kctx->kbdev; + -+ KBASE_TLSTREAM_TL_JS_SCHED_START(kbdev, 0); ++ KBASE_TLSTREAM_TL_NEW_ATOM(kbdev, katom, kbase_jd_atom_id(kctx, katom)); ++ KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx); ++ if (priority) ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, *priority); ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_IDLE); ++ kbase_kinstr_jm_atom_queue(katom); ++} + -+ dev_dbg(kbdev->dev, "%s kbdev %pK mask 0x%x\n", -+ __func__, (void *)kbdev, (unsigned int)js_mask); ++static bool jd_submit_atom(struct kbase_context *const kctx, ++ const struct base_jd_atom *const user_atom, ++ const struct base_jd_fragment *const user_jc_incr, ++ struct kbase_jd_atom *const katom) ++{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_jd_context *jctx = &kctx->jctx; ++ int queued = 0; ++ int i; ++ int sched_prio; ++ bool will_fail = false; ++ unsigned long flags; ++ enum kbase_jd_atom_state status; + -+ js_devdata = &kbdev->js_data; ++ dev_dbg(kbdev->dev, "User did JD submit atom %pK\n", (void *)katom); + -+ down(&js_devdata->schedule_sem); -+ mutex_lock(&js_devdata->queue_mutex); ++ /* Update the TOTAL number of jobs. This includes those not tracked by ++ * the scheduler: 'not ready to run' and 'dependency-only' jobs. ++ */ ++ jctx->job_nr++; + -+ for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { -+ last_active[js] = kbdev->hwaccess.active_kctx[js]; -+ ctx_waiting[js] = false; ++#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE ++ katom->start_timestamp.tv64 = 0; ++#else ++ katom->start_timestamp = 0; ++#endif ++ katom->udata = user_atom->udata; ++ katom->kctx = kctx; ++ katom->nr_extres = user_atom->nr_extres; ++ katom->extres = NULL; ++ katom->device_nr = user_atom->device_nr; ++ katom->jc = user_atom->jc; ++ katom->core_req = user_atom->core_req; ++ katom->jobslot = user_atom->jobslot; ++ katom->seq_nr = user_atom->seq_nr; ++ katom->atom_flags = 0; ++ katom->need_cache_flush_cores_retained = 0; ++ katom->pre_dep = NULL; ++ katom->post_dep = NULL; ++ katom->x_pre_dep = NULL; ++ katom->x_post_dep = NULL; ++ katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED; ++ katom->softjob_data = NULL; ++ ++ trace_sysgraph(SGR_ARRIVE, kctx->id, user_atom->atom_number); ++ ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ /* Older API version atoms might have random values where jit_id now ++ * lives, but we must maintain backwards compatibility - handle the ++ * issue. ++ */ ++ if (!mali_kbase_supports_jit_pressure_limit(kctx->api_version)) { ++ katom->jit_ids[0] = 0; ++ katom->jit_ids[1] = 0; ++ } else { ++ katom->jit_ids[0] = user_atom->jit_id[0]; ++ katom->jit_ids[1] = user_atom->jit_id[1]; + } ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+ while (js_mask) { -+ js = ffs(js_mask) - 1; ++ katom->renderpass_id = user_atom->renderpass_id; + -+ while (1) { -+ struct kbase_context *kctx; -+ unsigned long flags; -+ bool context_idle = false; ++ /* Implicitly sets katom->protected_state.enter as well. */ ++ katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; + -+ kctx = kbase_js_ctx_list_pop_head(kbdev, js); ++ katom->age = kctx->age_count++; + -+ if (!kctx) { -+ js_mask &= ~(1 << js); -+ dev_dbg(kbdev->dev, "No kctx on pullable list (s:%u)\n", js); -+ break; -+ } ++ INIT_LIST_HEAD(&katom->queue); ++ INIT_LIST_HEAD(&katom->jd_item); + -+ if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) { -+ context_idle = true; ++ /* Don't do anything if there is a mess up with dependencies. ++ * This is done in a separate cycle to check both the dependencies at ones, otherwise ++ * it will be extra complexity to deal with 1st dependency ( just added to the list ) ++ * if only the 2nd one has invalid config. ++ */ ++ for (i = 0; i < 2; i++) { ++ int dep_atom_number = user_atom->pre_dep[i].atom_id; ++ base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type; + -+ dev_dbg(kbdev->dev, "kctx %pK is not active (s:%u)\n", (void *)kctx, -+ js); ++ if (dep_atom_number) { ++ if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER && ++ dep_atom_type != BASE_JD_DEP_TYPE_DATA) { ++ katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; ++ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; ++ dev_dbg(kbdev->dev, ++ "Atom %pK status to completed\n", ++ (void *)katom); + -+ if (kbase_pm_context_active_handle_suspend( -+ kbdev, -+ KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { -+ dev_dbg(kbdev->dev, "Suspend pending (s:%u)\n", js); -+ /* Suspend pending - return context to -+ * queue and stop scheduling -+ */ -+ mutex_lock( -+ &kctx->jctx.sched_info.ctx.jsctx_mutex); -+ if (kbase_js_ctx_list_add_pullable_head( -+ kctx->kbdev, kctx, js)) -+ kbase_js_sync_timers(kbdev); -+ mutex_unlock( -+ &kctx->jctx.sched_info.ctx.jsctx_mutex); -+ mutex_unlock(&js_devdata->queue_mutex); -+ up(&js_devdata->schedule_sem); -+ KBASE_TLSTREAM_TL_JS_SCHED_END(kbdev, -+ 0); -+ return; -+ } -+ kbase_ctx_flag_set(kctx, KCTX_ACTIVE); ++ /* Wrong dependency setup. Atom will be sent ++ * back to user space. Do not record any ++ * dependencies. ++ */ ++ jd_trace_atom_submit(kctx, katom, NULL); ++ return kbase_jd_done_nolock(katom, true); + } ++ } ++ } + -+ if (!kbase_js_use_ctx(kbdev, kctx, js)) { -+ mutex_lock( -+ &kctx->jctx.sched_info.ctx.jsctx_mutex); ++ /* Add dependencies */ ++ for (i = 0; i < 2; i++) { ++ int dep_atom_number = user_atom->pre_dep[i].atom_id; ++ base_jd_dep_type dep_atom_type; ++ struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number]; + -+ dev_dbg(kbdev->dev, -+ "kctx %pK cannot be used at this time\n", -+ kctx); ++ dep_atom_type = user_atom->pre_dep[i].dependency_type; ++ kbase_jd_katom_dep_clear(&katom->dep[i]); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ if (kbase_js_ctx_pullable(kctx, js, false) -+ || kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) -+ timer_sync |= -+ kbase_js_ctx_list_add_pullable_head_nolock( -+ kctx->kbdev, kctx, js); -+ else -+ timer_sync |= -+ kbase_js_ctx_list_add_unpullable_nolock( -+ kctx->kbdev, kctx, js); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, -+ flags); -+ mutex_unlock( -+ &kctx->jctx.sched_info.ctx.jsctx_mutex); -+ if (context_idle) { -+ WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); -+ kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); -+ kbase_pm_context_idle(kbdev); -+ } ++ if (!dep_atom_number) ++ continue; + -+ /* No more jobs can be submitted on this slot */ -+ js_mask &= ~(1 << js); -+ break; ++ if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED || ++ dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) { ++ ++ if (dep_atom->event_code == BASE_JD_EVENT_DONE) ++ continue; ++ /* don't stop this atom if it has an order dependency ++ * only to the failed one, try to submit it through ++ * the normal path ++ */ ++ if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER && ++ dep_atom->event_code > BASE_JD_EVENT_ACTIVE) { ++ continue; + } -+ mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ kbase_ctx_flag_clear(kctx, KCTX_PULLED); ++ /* Atom has completed, propagate the error code if any */ ++ katom->event_code = dep_atom->event_code; ++ katom->status = KBASE_JD_ATOM_STATE_QUEUED; ++ dev_dbg(kbdev->dev, "Atom %pK status to queued\n", ++ (void *)katom); + -+ if (!kbase_jm_kick(kbdev, 1 << js)) { -+ dev_dbg(kbdev->dev, "No more jobs can be submitted (s:%u)\n", js); -+ js_mask &= ~(1 << js); -+ } -+ if (!kbase_ctx_flag(kctx, KCTX_PULLED)) { -+ bool pullable; ++ /* This atom will be sent back to user space. ++ * Do not record any dependencies. ++ */ ++ jd_trace_atom_submit(kctx, katom, NULL); + -+ dev_dbg(kbdev->dev, "No atoms pulled from kctx %pK (s:%u)\n", -+ (void *)kctx, js); ++ will_fail = true; + -+ pullable = kbase_js_ctx_pullable(kctx, js, -+ true); ++ } else { ++ /* Atom is in progress, add this atom to the list */ ++ list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]); ++ kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type); ++ queued = 1; ++ } ++ } + -+ /* Failed to pull jobs - push to head of list. -+ * Unless this context is already 'active', in -+ * which case it's effectively already scheduled -+ * so push it to the back of the list. ++ if (will_fail) { ++ if (!queued) { ++ if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { ++ /* This softjob has failed due to a previous ++ * dependency, however we should still run the ++ * prepare & finish functions + */ -+ if (pullable && kctx == last_active[js] && -+ kbase_ctx_flag(kctx, -+ (KCTX_PULLED_SINCE_ACTIVE_JS0 << -+ js))) -+ timer_sync |= -+ kbase_js_ctx_list_add_pullable_nolock( -+ kctx->kbdev, -+ kctx, js); -+ else if (pullable) -+ timer_sync |= -+ kbase_js_ctx_list_add_pullable_head_nolock( -+ kctx->kbdev, -+ kctx, js); -+ else -+ timer_sync |= -+ kbase_js_ctx_list_add_unpullable_nolock( -+ kctx->kbdev, -+ kctx, js); ++ int err = kbase_prepare_soft_job(katom); + -+ /* If this context is not the active context, -+ * but the active context is pullable on this -+ * slot, then we need to remove the active -+ * marker to prevent it from submitting atoms in -+ * the IRQ handler, which would prevent this -+ * context from making progress. -+ */ -+ if (last_active[js] && kctx != last_active[js] -+ && kbase_js_ctx_pullable( -+ last_active[js], js, true)) -+ ctx_waiting[js] = true; ++ if (err >= 0) ++ kbase_finish_soft_job(katom); ++ } ++ return kbase_jd_done_nolock(katom, true); ++ } + -+ if (context_idle) { -+ kbase_jm_idle_ctx(kbdev, kctx); -+ spin_unlock_irqrestore( -+ &kbdev->hwaccess_lock, -+ flags); -+ WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); -+ kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); -+ kbase_pm_context_idle(kbdev); -+ } else { -+ spin_unlock_irqrestore( -+ &kbdev->hwaccess_lock, -+ flags); -+ } -+ mutex_unlock( -+ &kctx->jctx.sched_info.ctx.jsctx_mutex); ++ katom->will_fail_event_code = katom->event_code; ++ } + -+ js_mask &= ~(1 << js); -+ break; /* Could not run atoms on this slot */ -+ } ++ /* These must occur after the above loop to ensure that an atom ++ * that depends on a previous atom with the same number behaves ++ * as expected ++ */ ++ katom->event_code = BASE_JD_EVENT_DONE; ++ katom->status = KBASE_JD_ATOM_STATE_QUEUED; ++ dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)katom); + -+ dev_dbg(kbdev->dev, "Push kctx %pK to back of list\n", -+ (void *)kctx); -+ if (kbase_js_ctx_pullable(kctx, js, true)) -+ timer_sync |= -+ kbase_js_ctx_list_add_pullable_nolock( -+ kctx->kbdev, kctx, js); -+ else -+ timer_sync |= -+ kbase_js_ctx_list_add_unpullable_nolock( -+ kctx->kbdev, kctx, js); ++ /* For invalid priority, be most lenient and choose the default */ ++ sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); ++ if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID) ++ sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); ++ /* Cap the priority to jctx.max_priority */ ++ katom->sched_priority = (sched_prio < kctx->jctx.max_priority) ? ++ kctx->jctx.max_priority : sched_prio; ++ ++ /* Create a new atom. */ ++ jd_trace_atom_submit(kctx, katom, &katom->sched_priority); ++ ++#if !MALI_INCREMENTAL_RENDERING_JM ++ /* Reject atoms for incremental rendering if not supported */ ++ if (katom->core_req & ++ (BASE_JD_REQ_START_RENDERPASS|BASE_JD_REQ_END_RENDERPASS)) { ++ dev_err(kctx->kbdev->dev, ++ "Rejecting atom with unsupported core_req 0x%x\n", ++ katom->core_req); ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ return kbase_jd_done_nolock(katom, true); ++ } ++#endif /* !MALI_INCREMENTAL_RENDERING_JM */ ++ ++ if (katom->core_req & BASE_JD_REQ_END_RENDERPASS) { ++ WARN_ON(katom->jc != 0); ++ katom->jc_fragment = *user_jc_incr; ++ } else if (!katom->jc && ++ (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { ++ /* Reject atoms with job chain = NULL, as these cause issues ++ * with soft-stop ++ */ ++ dev_err(kctx->kbdev->dev, "Rejecting atom with jc = NULL\n"); ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ return kbase_jd_done_nolock(katom, true); ++ } ++ ++ /* Reject atoms with an invalid device_nr */ ++ if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) && ++ (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) { ++ dev_err(kctx->kbdev->dev, ++ "Rejecting atom with invalid device_nr %d\n", ++ katom->device_nr); ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ return kbase_jd_done_nolock(katom, true); ++ } ++ ++ /* Reject atoms with invalid core requirements */ ++ if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && ++ (katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) { ++ dev_err(kctx->kbdev->dev, ++ "Rejecting atom with invalid core requirements\n"); ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE; ++ return kbase_jd_done_nolock(katom, true); ++ } ++ ++ /* Reject soft-job atom of certain types from accessing external resources */ ++ if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && ++ (((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT) || ++ ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_ALLOC) || ++ ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_FREE))) { ++ dev_err(kctx->kbdev->dev, ++ "Rejecting soft-job atom accessing external resources\n"); ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ return kbase_jd_done_nolock(katom, true); ++ } ++ ++ if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { ++ /* handle what we need to do to access the external resources */ ++ if (kbase_jd_pre_external_resources(katom, user_atom) != 0) { ++ /* setup failed (no access, bad resource, unknown resource types, etc.) */ ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ return kbase_jd_done_nolock(katom, true); + } + } + -+ if (timer_sync) -+ kbase_js_sync_timers(kbdev); ++#if !MALI_JIT_PRESSURE_LIMIT_BASE ++ if (mali_kbase_supports_jit_pressure_limit(kctx->api_version) && ++ (user_atom->jit_id[0] || user_atom->jit_id[1])) { ++ /* JIT pressure limit is disabled, but we are receiving non-0 ++ * JIT IDs - atom is invalid. ++ */ ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ return kbase_jd_done_nolock(katom, true); ++ } ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+ for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { -+ if (kbdev->hwaccess.active_kctx[js] == last_active[js] && -+ ctx_waiting[js]) { -+ dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%u)\n", -+ (void *)last_active[js], js); -+ kbdev->hwaccess.active_kctx[js] = NULL; ++ /* Validate the atom. Function will return error if the atom is ++ * malformed. ++ * ++ * Soft-jobs never enter the job scheduler but have their own initialize method. ++ * ++ * If either fail then we immediately complete the atom with an error. ++ */ ++ if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) { ++ if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) { ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ return kbase_jd_done_nolock(katom, true); ++ } ++ } else { ++ /* Soft-job */ ++ if (kbase_prepare_soft_job(katom) != 0) { ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ return kbase_jd_done_nolock(katom, true); + } + } + -+ mutex_unlock(&js_devdata->queue_mutex); -+ up(&js_devdata->schedule_sem); -+ KBASE_TLSTREAM_TL_JS_SCHED_END(kbdev, 0); ++#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) ++ katom->work_id = atomic_inc_return(&jctx->work_id); ++ trace_gpu_job_enqueue(kctx->id, katom->work_id, ++ kbasep_map_core_reqs_to_string(katom->core_req)); ++#endif ++ ++ if (queued && !IS_GPU_ATOM(katom)) ++ return false; ++ ++ if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { ++ if (kbase_process_soft_job(katom) == 0) { ++ kbase_finish_soft_job(katom); ++ return kbase_jd_done_nolock(katom, true); ++ } ++ return false; ++ } ++ ++ if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { ++ bool need_to_try_schedule_context; ++ ++ katom->status = KBASE_JD_ATOM_STATE_IN_JS; ++ dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", ++ (void *)katom); ++ ++ need_to_try_schedule_context = kbasep_js_add_job(kctx, katom); ++ /* If job was cancelled then resolve immediately */ ++ if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) ++ return need_to_try_schedule_context; ++ ++ /* Synchronize with backend reset */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ status = katom->status; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (status == KBASE_JD_ATOM_STATE_HW_COMPLETED) { ++ dev_dbg(kctx->kbdev->dev, ++ "Atom %d cancelled on HW\n", ++ kbase_jd_atom_id(katom->kctx, katom)); ++ return need_to_try_schedule_context; ++ } ++ } ++ ++ /* This is a pure dependency. Resolve it immediately */ ++ return kbase_jd_done_nolock(katom, true); +} + -+void kbase_js_zap_context(struct kbase_context *kctx) ++int kbase_jd_submit(struct kbase_context *kctx, ++ void __user *user_addr, u32 nr_atoms, u32 stride, ++ bool uk6_atom) +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; -+ struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; ++ struct kbase_jd_context *jctx = &kctx->jctx; ++ int err = 0; ++ int i; ++ bool need_to_try_schedule_context = false; ++ struct kbase_device *kbdev; ++ u32 latest_flush; ++ ++ bool jd_atom_is_v2 = (stride == sizeof(struct base_jd_atom_v2) || ++ stride == offsetof(struct base_jd_atom_v2, renderpass_id)); + + /* -+ * Critical assumption: No more submission is possible outside of the -+ * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs) -+ * whilst the struct kbase_context is terminating. ++ * kbase_jd_submit isn't expected to fail and so all errors with the ++ * jobs are reported by immediately failing them (through event system) + */ ++ kbdev = kctx->kbdev; + -+ /* First, atomically do the following: -+ * - mark the context as dying -+ * - try to evict it from the queue -+ */ -+ mutex_lock(&kctx->jctx.lock); -+ mutex_lock(&js_devdata->queue_mutex); -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ kbase_ctx_flag_set(kctx, KCTX_DYING); ++ if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { ++ dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it\n"); ++ return -EINVAL; ++ } + -+ dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %pK", kctx); ++ if (stride != offsetof(struct base_jd_atom_v2, renderpass_id) && ++ stride != sizeof(struct base_jd_atom_v2) && ++ stride != offsetof(struct base_jd_atom, renderpass_id) && ++ stride != sizeof(struct base_jd_atom)) { ++ dev_err(kbdev->dev, ++ "Stride %u passed to job_submit isn't supported by the kernel\n", ++ stride); ++ return -EINVAL; ++ } + -+ /* -+ * At this point we know: -+ * - If eviction succeeded, it was in the queue, but now no -+ * longer is -+ * - We must cancel the jobs here. No Power Manager active reference to -+ * release. -+ * - This happens asynchronously - kbase_jd_zap_context() will wait for -+ * those jobs to be killed. -+ * - If eviction failed, then it wasn't in the queue. It is one -+ * of the following: -+ * - a. it didn't have any jobs, and so is not in the Queue or -+ * the Run Pool (not scheduled) -+ * - Hence, no more work required to cancel jobs. No Power Manager -+ * active reference to release. -+ * - b. it was in the middle of a scheduling transaction (and thus must -+ * have at least 1 job). This can happen from a syscall or a -+ * kernel thread. We still hold the jsctx_mutex, and so the thread -+ * must be waiting inside kbasep_js_try_schedule_head_ctx(), -+ * before checking whether the runpool is full. That thread will -+ * continue after we drop the mutex, and will notice the context -+ * is dying. It will rollback the transaction, killing all jobs at -+ * the same time. kbase_jd_zap_context() will wait for those jobs -+ * to be killed. -+ * - Hence, no more work required to cancel jobs, or to release the -+ * Power Manager active reference. -+ * - c. it is scheduled, and may or may not be running jobs -+ * - We must cause it to leave the runpool by stopping it from -+ * submitting any more jobs. When it finally does leave, -+ * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs -+ * (because it is dying), release the Power Manager active reference, -+ * and will not requeue the context in the queue. -+ * kbase_jd_zap_context() will wait for those jobs to be killed. -+ * - Hence, work required just to make it leave the runpool. Cancelling -+ * jobs and releasing the Power manager active reference will be -+ * handled when it leaves the runpool. -+ */ -+ if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { -+ unsigned long flags; -+ unsigned int js; ++ if (nr_atoms > BASE_JD_ATOM_COUNT) { ++ dev_dbg(kbdev->dev, "Invalid attempt to submit %u atoms at once for kctx %d_%d", ++ nr_atoms, kctx->tgid, kctx->id); ++ return -EINVAL; ++ } + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ if (!list_empty( -+ &kctx->jctx.sched_info.ctx.ctx_list_entry[js])) -+ list_del_init( -+ &kctx->jctx.sched_info.ctx.ctx_list_entry[js]); -+ } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* All atoms submitted in this call have the same flush ID */ ++ latest_flush = kbase_backend_get_current_flush_id(kbdev); + -+ /* The following events require us to kill off remaining jobs -+ * and update PM book-keeping: -+ * - we evicted it correctly (it must have jobs to be in the -+ * Queue) -+ * -+ * These events need no action, but take this path anyway: -+ * - Case a: it didn't have any jobs, and was never in the Queue -+ * - Case b: scheduling transaction will be partially rolled- -+ * back (this already cancels the jobs) -+ */ ++ for (i = 0; i < nr_atoms; i++) { ++ struct base_jd_atom user_atom = { ++ .seq_nr = 0, ++ }; ++ struct base_jd_fragment user_jc_incr; ++ struct kbase_jd_atom *katom; + -+ KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++ if (unlikely(jd_atom_is_v2)) { ++ if (copy_from_user(&user_atom.jc, user_addr, sizeof(struct base_jd_atom_v2)) != 0) { ++ dev_dbg(kbdev->dev, ++ "Invalid atom address %pK passed to job_submit\n", ++ user_addr); ++ err = -EFAULT; ++ break; ++ } + -+ dev_dbg(kbdev->dev, "Zap: Ctx %pK scheduled=0", kctx); ++ /* no seq_nr in v2 */ ++ user_atom.seq_nr = 0; ++ } else { ++ if (copy_from_user(&user_atom, user_addr, stride) != 0) { ++ dev_dbg(kbdev->dev, ++ "Invalid atom address %pK passed to job_submit\n", ++ user_addr); ++ err = -EFAULT; ++ break; ++ } ++ } + -+ /* Only cancel jobs when we evicted from the -+ * queue. No Power Manager active reference was held. -+ * -+ * Having is_dying set ensures that this kills, and doesn't -+ * requeue -+ */ -+ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false); ++ if (stride == offsetof(struct base_jd_atom_v2, renderpass_id)) { ++ dev_dbg(kbdev->dev, "No renderpass ID: use 0\n"); ++ user_atom.renderpass_id = 0; ++ } else { ++ /* Ensure all padding bytes are 0 for potential future ++ * extension ++ */ ++ size_t j; + -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_unlock(&js_devdata->queue_mutex); -+ mutex_unlock(&kctx->jctx.lock); -+ } else { -+ unsigned long flags; -+ bool was_retained; -+ CSTD_UNUSED(was_retained); ++ dev_dbg(kbdev->dev, "Renderpass ID is %d\n", ++ user_atom.renderpass_id); ++ for (j = 0; j < sizeof(user_atom.padding); j++) { ++ if (user_atom.padding[j]) { ++ dev_err(kbdev->dev, ++ "Bad padding byte %zu: %d\n", ++ j, user_atom.padding[j]); ++ err = -EINVAL; ++ break; ++ } ++ } ++ if (err) ++ break; ++ } + -+ /* Case c: didn't evict, but it is scheduled - it's in the Run -+ * Pool ++ /* In this case 'jc' is the CPU address of a struct ++ * instead of a GPU address of a job chain. + */ -+ KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); -+ dev_dbg(kbdev->dev, "Zap: Ctx %pK is in RunPool", kctx); ++ if (user_atom.core_req & BASE_JD_REQ_END_RENDERPASS) { ++ if (copy_from_user(&user_jc_incr, ++ u64_to_user_ptr(user_atom.jc), ++ sizeof(user_jc_incr))) { ++ dev_err(kbdev->dev, ++ "Invalid jc address 0x%llx passed to job_submit\n", ++ user_atom.jc); ++ err = -EFAULT; ++ break; ++ } ++ dev_dbg(kbdev->dev, "Copied IR jobchain addresses\n"); ++ user_atom.jc = 0; ++ } + -+ /* Disable the ctx from submitting any more jobs */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ user_addr = (void __user *)((uintptr_t) user_addr + stride); + -+ kbasep_js_clear_submit_allowed(js_devdata, kctx); ++ mutex_lock(&jctx->lock); ++#ifndef compiletime_assert ++#define compiletime_assert_defined ++#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \ ++while (false) ++#endif ++ compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) == ++ BASE_JD_ATOM_COUNT, ++ "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); ++ compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) == ++ sizeof(user_atom.atom_number), ++ "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); ++#ifdef compiletime_assert_defined ++#undef compiletime_assert ++#undef compiletime_assert_defined ++#endif ++ katom = &jctx->atoms[user_atom.atom_number]; + -+ /* Retain and (later) release the context whilst it is now -+ * disallowed from submitting jobs - ensures that someone -+ * somewhere will be removing the context later on -+ */ -+ was_retained = kbase_ctx_sched_inc_refcount_nolock(kctx); ++ /* Record the flush ID for the cache flush optimisation */ ++ katom->flush_id = latest_flush; + -+ /* Since it's scheduled and we have the jsctx_mutex, it must be -+ * retained successfully -+ */ -+ KBASE_DEBUG_ASSERT(was_retained); ++ while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) { ++ /* Atom number is already in use, wait for the atom to ++ * complete ++ */ ++ mutex_unlock(&jctx->lock); + -+ dev_dbg(kbdev->dev, "Zap: Ctx %pK Kill Any Running jobs", kctx); ++ /* This thread will wait for the atom to complete. Due ++ * to thread scheduling we are not sure that the other ++ * thread that owns the atom will also schedule the ++ * context, so we force the scheduler to be active and ++ * hence eventually schedule this context at some point ++ * later. ++ */ ++ kbase_js_sched_all(kbdev); + -+ /* Cancel any remaining running jobs for this kctx - if any. -+ * Submit is disallowed which takes effect immediately, so no -+ * more new jobs will appear after we do this. ++ if (wait_event_killable(katom->completed, ++ katom->status == ++ KBASE_JD_ATOM_STATE_UNUSED) != 0) { ++ /* We're being killed so the result code ++ * doesn't really matter ++ */ ++ return 0; ++ } ++ mutex_lock(&jctx->lock); ++ } ++ KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_START(kbdev, katom); ++ need_to_try_schedule_context |= jd_submit_atom(kctx, &user_atom, ++ &user_jc_incr, katom); ++ KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_END(kbdev, katom); ++ /* Register a completed job as a disjoint event when the GPU is in a disjoint state ++ * (ie. being reset). + */ -+ kbase_backend_jm_kill_running_jobs_from_kctx(kctx); -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_unlock(&js_devdata->queue_mutex); -+ mutex_unlock(&kctx->jctx.lock); -+ -+ dev_dbg(kbdev->dev, "Zap: Ctx %pK Release (may or may not schedule out immediately)", -+ kctx); ++ kbase_disjoint_event_potential(kbdev); + -+ kbasep_js_runpool_release_ctx(kbdev, kctx); ++ mutex_unlock(&jctx->lock); ++ if (fatal_signal_pending(current)) { ++ dev_dbg(kbdev->dev, "Fatal signal pending for kctx %d_%d", ++ kctx->tgid, kctx->id); ++ /* We're being killed so the result code doesn't really matter */ ++ return 0; ++ } + } + -+ KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u); ++ if (need_to_try_schedule_context) ++ kbase_js_sched_all(kbdev); + -+ /* After this, you must wait on both the -+ * kbase_jd_context::zero_jobs_wait and the -+ * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs -+ * to be destroyed, and the context to be de-scheduled (if it was on the -+ * runpool). -+ * -+ * kbase_jd_zap_context() will do this. -+ */ ++ return err; +} + -+static inline int trace_get_refcnt(struct kbase_device *kbdev, -+ struct kbase_context *kctx) -+{ -+ return atomic_read(&kctx->refcount); -+} ++KBASE_EXPORT_TEST_API(kbase_jd_submit); + -+/** -+ * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context -+ * @kctx: Pointer to context. -+ * @callback: Pointer to function to call for each job. -+ * -+ * Call a function on all jobs belonging to a non-queued, non-running -+ * context, and detach the jobs from the context as it goes. -+ * -+ * Due to the locks that might be held at the time of the call, the callback -+ * may need to defer work on a workqueue to complete its actions (e.g. when -+ * cancelling jobs) -+ * -+ * Atoms will be removed from the queue, so this must only be called when -+ * cancelling jobs (which occurs as part of context destruction). -+ * -+ * The locking conditions on the caller are as follows: -+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. -+ */ -+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, -+ kbasep_js_ctx_job_cb *callback) ++void kbase_jd_done_worker(struct work_struct *data) +{ ++ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); ++ struct kbase_jd_context *jctx; ++ struct kbase_context *kctx; ++ struct kbasep_js_kctx_info *js_kctx_info; + struct kbase_device *kbdev; -+ unsigned long flags; -+ unsigned int js; ++ struct kbasep_js_device_data *js_devdata; ++ u64 cache_jc = katom->jc; ++ struct kbasep_js_atom_retained_state katom_retained_state; ++ bool context_idle; ++ base_jd_core_req core_req = katom->core_req; + -+ kbdev = kctx->kbdev; ++ /* Soft jobs should never reach this function */ ++ KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kctx = katom->kctx; ++ jctx = &kctx->jctx; ++ kbdev = kctx->kbdev; ++ js_kctx_info = &kctx->jctx.sched_info; ++ js_devdata = &kbdev->js_data; + -+ KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL, -+ 0u, trace_get_refcnt(kbdev, kctx)); ++ dev_dbg(kbdev->dev, "Enter atom %pK done worker for kctx %pK\n", ++ (void *)katom, (void *)kctx); + -+ /* Invoke callback on jobs on each slot in turn */ -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) -+ jsctx_queue_foreach(kctx, js, callback); ++ KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} ++ kbase_backend_complete_wq(kbdev, katom); + -+base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio priority) -+{ -+ struct priority_control_manager_device *pcm_device = kbdev->pcm_dev; -+ int req_priority, out_priority; ++ /* ++ * Begin transaction on JD context and JS context ++ */ ++ mutex_lock(&jctx->lock); ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_DONE); ++ mutex_lock(&js_devdata->queue_mutex); ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + -+ req_priority = kbasep_js_atom_prio_to_sched_prio(priority); -+ out_priority = req_priority; -+ /* Does not use pcm defined priority check if PCM not defined or if -+ * kbasep_js_atom_prio_to_sched_prio returns an error -+ * (KBASE_JS_ATOM_SCHED_PRIO_INVALID). ++ /* This worker only gets called on contexts that are scheduled *in*. This is ++ * because it only happens in response to an IRQ from a job that was ++ * running. + */ -+ if (pcm_device && (req_priority != KBASE_JS_ATOM_SCHED_PRIO_INVALID)) -+ out_priority = pcm_device->ops.pcm_scheduler_priority_check(pcm_device, current, -+ req_priority); -+ return kbasep_js_sched_prio_to_atom_prio(kbdev, out_priority); -+} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.h b/drivers/gpu/arm/bifrost/mali_kbase_js.h -new file mode 100644 -index 000000000..89c3b45c7 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_js.h -@@ -0,0 +1,36 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+/** -+ * DOC: Job Scheduler APIs. -+ */ ++ if (katom->event_code == BASE_JD_EVENT_STOPPED) { ++ unsigned long flags; + -+#ifndef _KBASE_JS_H_ -+#define _KBASE_JS_H_ ++ dev_dbg(kbdev->dev, "Atom %pK has been promoted to stopped\n", ++ (void *)katom); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_unlock(&js_devdata->queue_mutex); + -+#include "context/mali_kbase_context.h" -+#include "mali_kbase_defs.h" -+#include "mali_kbase_debug.h" -+#include -+#include "jm/mali_kbase_jm_js.h" -+#include "jm/mali_kbase_js_defs.h" ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+#endif /* _KBASE_JS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.c -new file mode 100644 -index 000000000..04ea06b2f ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.c -@@ -0,0 +1,298 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2012-2016, 2018, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ katom->status = KBASE_JD_ATOM_STATE_IN_JS; ++ dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", ++ (void *)katom); ++ kbase_js_unpull(kctx, katom); + -+#include -+#include ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&jctx->lock); + -+/* -+ * Private functions follow -+ */ ++ return; ++ } + -+/** -+ * kbasep_js_ctx_attr_runpool_retain_attr - Check whether a ctx has a certain attribute -+ * and if so, retain that attribute on the runpool. -+ * -+ * @kbdev: Device pointer -+ * @kctx: KBase context -+ * @attribute: Atribute to check/retain -+ * -+ * Requires: -+ * - jsctx mutex -+ * - runpool_irq spinlock -+ * - ctx is scheduled on the runpool -+ * -+ * Return: true indicates a change in ctx attributes state of the runpool. -+ * In this state, the scheduler might be able to submit more jobs than -+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() -+ * or similar is called sometime later. -+ * false indicates no change in ctx attributes state of the runpool. -+ */ -+static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) -+{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ bool runpool_state_changed = false; ++ if ((katom->event_code != BASE_JD_EVENT_DONE) && ++ (!kbase_ctx_flag(katom->kctx, KCTX_DYING))) { ++ if (!kbase_is_quick_reset_enabled(kbdev)) ++ dev_err(kbdev->dev, ++ "t6xx: GPU fault 0x%02lx from job slot %d\n", ++ (unsigned long)katom->event_code, ++ katom->slot_nr); ++ } + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); -+ js_devdata = &kbdev->js_data; -+ js_kctx_info = &kctx->jctx.sched_info; ++ /* Retain state before the katom disappears */ ++ kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); + -+ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ context_idle = kbase_js_complete_atom_wq(kctx, katom); + -+ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++ KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state)); + -+ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { -+ KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX); -+ ++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]); ++ kbasep_js_remove_job(kbdev, kctx, katom); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_unlock(&js_devdata->queue_mutex); ++ /* kbase_jd_done_nolock() requires the jsctx_mutex lock to be dropped */ ++ kbase_jd_done_nolock(katom, false); + -+ if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) { -+ /* First refcount indicates a state change */ -+ runpool_state_changed = true; -+ KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute); ++ /* katom may have been freed now, do not use! */ ++ ++ if (context_idle) { ++ unsigned long flags; ++ ++ context_idle = false; ++ mutex_lock(&js_devdata->queue_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ /* If kbase_sched() has scheduled this context back in then ++ * KCTX_ACTIVE will have been set after we marked it as ++ * inactive, and another pm reference will have been taken, so ++ * drop our reference. But do not call kbase_jm_idle_ctx(), as ++ * the context is active and fast-starting is allowed. ++ * ++ * If an atom has been fast-started then ++ * kbase_jsctx_atoms_pulled(kctx) will return non-zero but ++ * KCTX_ACTIVE will still be false (as the previous pm ++ * reference has been inherited). Do NOT drop our reference, as ++ * it has been re-used, and leave the context as active. ++ * ++ * If no new atoms have been started then KCTX_ACTIVE will ++ * still be false and kbase_jsctx_atoms_pulled(kctx) will ++ * return zero, so drop the reference and call ++ * kbase_jm_idle_ctx(). ++ * ++ * As the checks are done under both the queue_mutex and ++ * hwaccess_lock is should be impossible for this to race ++ * with the scheduler code. ++ */ ++ if (kbase_ctx_flag(kctx, KCTX_ACTIVE) || ++ !kbase_jsctx_atoms_pulled(kctx)) { ++ /* Calling kbase_jm_idle_ctx() here will ensure that ++ * atoms are not fast-started when we drop the ++ * hwaccess_lock. This is not performed if ++ * KCTX_ACTIVE is set as in that case another pm ++ * reference has been taken and a fast-start would be ++ * valid. ++ */ ++ if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) ++ kbase_jm_idle_ctx(kbdev, kctx); ++ context_idle = true; ++ } else { ++ kbase_ctx_flag_set(kctx, KCTX_ACTIVE); + } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&js_devdata->queue_mutex); + } + -+ return runpool_state_changed; -+} ++ /* ++ * Transaction complete ++ */ ++ mutex_unlock(&jctx->lock); + -+/** -+ * kbasep_js_ctx_attr_runpool_release_attr - Check whether a ctx has a certain attribute, -+ * and if so, release that attribute on the runpool. -+ * -+ * @kbdev: Device pointer -+ * @kctx: KBase context -+ * @attribute: Atribute to release -+ * -+ * Requires: -+ * - jsctx mutex -+ * - runpool_irq spinlock -+ * - ctx is scheduled on the runpool -+ * -+ * Return: true indicates a change in ctx attributes state of the runpool. -+ * In this state, the scheduler might be able to submit more jobs than -+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() -+ * or similar is called sometime later. -+ * false indicates no change in ctx attributes state of the runpool. -+ */ -+static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) -+{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ bool runpool_state_changed = false; ++ /* Job is now no longer running, so can now safely release the context ++ * reference, and handle any actions that were logged against the ++ * atom's retained state ++ */ + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); -+ js_devdata = &kbdev->js_data; -+ js_kctx_info = &kctx->jctx.sched_info; ++ kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state); + -+ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++ kbase_js_sched_all(kbdev); + -+ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { -+ KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0); -+ --(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]); ++ if (!atomic_dec_return(&kctx->work_count)) { ++ /* If worker now idle then post all events that kbase_jd_done_nolock() ++ * has queued ++ */ ++ mutex_lock(&jctx->lock); ++ while (!list_empty(&kctx->completed_jobs)) { ++ struct kbase_jd_atom *atom = list_entry( ++ kctx->completed_jobs.next, ++ struct kbase_jd_atom, jd_item); ++ list_del(kctx->completed_jobs.next); + -+ if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) { -+ /* Last de-refcount indicates a state change */ -+ runpool_state_changed = true; -+ KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute); ++ kbase_event_post(kctx, atom); + } ++ mutex_unlock(&jctx->lock); + } + -+ return runpool_state_changed; ++ kbase_backend_complete_wq_post_sched(kbdev, core_req); ++ ++ if (context_idle) ++ kbase_pm_context_idle(kbdev); ++ ++ KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); ++ ++ dev_dbg(kbdev->dev, "Leave atom %pK done worker for kctx %pK\n", ++ (void *)katom, (void *)kctx); +} + +/** -+ * kbasep_js_ctx_attr_ctx_retain_attr - Retain a certain attribute on a ctx, -+ * also retaining it on the runpool if the context is scheduled. ++ * jd_cancel_worker - Work queue job cancel function. ++ * @data: a &struct work_struct + * -+ * @kbdev: Device pointer -+ * @kctx: KBase context -+ * @attribute: Atribute to retain ++ * Only called as part of 'Zapping' a context (which occurs on termination). ++ * Operates serially with the kbase_jd_done_worker() on the work queue. + * -+ * Requires: -+ * - jsctx mutex -+ * - If the context is scheduled, then runpool_irq spinlock must also be held ++ * This can only be called on contexts that aren't scheduled. + * -+ * Return: true indicates a change in ctx attributes state of the runpool. -+ * This may allow the scheduler to submit more jobs than previously. -+ * false indicates no change in ctx attributes state of the runpool. ++ * We don't need to release most of the resources that would occur on ++ * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be ++ * running (by virtue of only being called on contexts that aren't ++ * scheduled). + */ -+static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) ++static void jd_cancel_worker(struct work_struct *data) +{ ++ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); ++ struct kbase_jd_context *jctx; ++ struct kbase_context *kctx; + struct kbasep_js_kctx_info *js_kctx_info; -+ bool runpool_state_changed = false; ++ bool need_to_try_schedule_context; ++ bool attr_state_changed; ++ struct kbase_device *kbdev; ++ CSTD_UNUSED(need_to_try_schedule_context); + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); ++ /* Soft jobs should never reach this function */ ++ KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); ++ ++ kctx = katom->kctx; ++ kbdev = kctx->kbdev; ++ jctx = &kctx->jctx; + js_kctx_info = &kctx->jctx.sched_info; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); -+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX); ++ KBASE_KTRACE_ADD_JM(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0); + -+ ++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); ++ /* This only gets called on contexts that are scheduled out. Hence, we must ++ * make sure we don't de-ref the number of running jobs (there aren't ++ * any), nor must we try to schedule out the context (it's already ++ * scheduled out). ++ */ ++ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+ if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { -+ /* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */ -+ KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute); -+ runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute); -+ } ++ /* Scheduler: Remove the job from the system */ ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + -+ return runpool_state_changed; ++ mutex_lock(&jctx->lock); ++ ++ need_to_try_schedule_context = kbase_jd_done_nolock(katom, true); ++ /* Because we're zapping, we're not adding any more jobs to this ctx, so no need to ++ * schedule the context. There's also no need for the jsctx_mutex to have been taken ++ * around this too. ++ */ ++ KBASE_DEBUG_ASSERT(!need_to_try_schedule_context); ++ ++ /* katom may have been freed now, do not use! */ ++ mutex_unlock(&jctx->lock); ++ ++ if (attr_state_changed) ++ kbase_js_sched_all(kbdev); +} + +/** -+ * kbasep_js_ctx_attr_ctx_release_attr - Release a certain attribute on a ctx, -+ * also releasing it from the runpool if the context is scheduled. ++ * kbase_jd_done - Complete a job that has been removed from the Hardware ++ * @katom: atom which has been completed ++ * @slot_nr: slot the atom was on ++ * @end_timestamp: completion time ++ * @done_code: completion code + * -+ * @kbdev: Device pointer -+ * @kctx: KBase context -+ * @attribute: Atribute to release ++ * This must be used whenever a job has been removed from the Hardware, e.g.: ++ * An IRQ indicates that the job finished (for both error and 'done' codes), or ++ * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop. + * -+ * Requires: -+ * - jsctx mutex -+ * - If the context is scheduled, then runpool_irq spinlock must also be held ++ * Some work is carried out immediately, and the rest is deferred onto a ++ * workqueue + * -+ * Return: true indicates a change in ctx attributes state of the runpool. -+ * This may allow the scheduler to submit more jobs than previously. -+ * false indicates no change in ctx attributes state of the runpool. ++ * Context: ++ * This can be called safely from atomic context. ++ * The caller must hold kbdev->hwaccess_lock + */ -+static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) ++void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ++ ktime_t *end_timestamp, kbasep_js_atom_done_code done_code) +{ -+ struct kbasep_js_kctx_info *js_kctx_info; -+ bool runpool_state_changed = false; ++ struct kbase_context *kctx; ++ struct kbase_device *kbdev; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); -+ js_kctx_info = &kctx->jctx.sched_info; ++ KBASE_DEBUG_ASSERT(katom); ++ kctx = katom->kctx; ++ KBASE_DEBUG_ASSERT(kctx); ++ kbdev = kctx->kbdev; ++ KBASE_DEBUG_ASSERT(kbdev); + -+ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); -+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ /* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */ -+ runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute); -+ KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute); -+ } ++ if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) ++ katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; + -+ /* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */ -+ --(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); ++ KBASE_KTRACE_ADD_JM(kbdev, JD_DONE, kctx, katom, katom->jc, 0); + -+ return runpool_state_changed; -+} ++ kbase_job_check_leave_disjoint(kbdev, katom); + -+/* -+ * More commonly used public functions -+ */ ++ katom->slot_nr = slot_nr; + -+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) -+{ -+ bool runpool_state_changed; -+ int i; ++ atomic_inc(&kctx->work_count); + -+ /* Retain any existing attributes */ -+ for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { -+ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) { -+ /* The context is being scheduled in, so update the runpool with the new attributes */ -+ runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i); ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ /* a failed job happened and is waiting for dumping*/ ++ if (!katom->will_fail_event_code && ++ kbase_debug_job_fault_process(katom, katom->event_code)) ++ return; ++#endif + -+ /* We don't need to know about state changed, because retaining a -+ * context occurs on scheduling it, and that itself will also try -+ * to run new atoms -+ */ -+ CSTD_UNUSED(runpool_state_changed); -+ } -+ } ++ WARN_ON(work_pending(&katom->work)); ++ INIT_WORK(&katom->work, kbase_jd_done_worker); ++ queue_work(kctx->jctx.job_done_wq, &katom->work); +} + -+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) ++KBASE_EXPORT_TEST_API(kbase_jd_done); ++ ++void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) +{ -+ bool runpool_state_changed = false; -+ int i; ++ struct kbase_context *kctx; + -+ /* Release any existing attributes */ -+ for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { -+ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) { -+ /* The context is being scheduled out, so update the runpool on the removed attributes */ -+ runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i); -+ } -+ } ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(katom != NULL); ++ kctx = katom->kctx; ++ KBASE_DEBUG_ASSERT(kctx != NULL); + -+ return runpool_state_changed; ++ dev_dbg(kbdev->dev, "JD: cancelling atom %pK\n", (void *)katom); ++ KBASE_KTRACE_ADD_JM(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); ++ ++ /* This should only be done from a context that is not scheduled */ ++ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++ ++ WARN_ON(work_pending(&katom->work)); ++ ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ ++ INIT_WORK(&katom->work, jd_cancel_worker); ++ queue_work(kctx->jctx.job_done_wq, &katom->work); +} + -+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom) ++ ++void kbase_jd_zap_context(struct kbase_context *kctx) +{ -+ bool runpool_state_changed = false; -+ base_jd_core_req core_req; ++ struct kbase_jd_atom *katom; ++ struct list_head *entry, *tmp; ++ struct kbase_device *kbdev; + -+ KBASE_DEBUG_ASSERT(katom); -+ core_req = katom->core_req; ++ KBASE_DEBUG_ASSERT(kctx); + -+ if (core_req & BASE_JD_REQ_ONLY_COMPUTE) -+ runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); -+ else -+ runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); ++ kbdev = kctx->kbdev; + -+ if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { -+ /* Atom that can run on slot1 or slot2, and can use all cores */ -+ runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); -+ } ++ KBASE_KTRACE_ADD_JM(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u); + -+ /* We don't need to know about state changed, because retaining an atom -+ * occurs on adding it, and that itself will also try to run new atoms ++ kbase_js_zap_context(kctx); ++ ++ mutex_lock(&kctx->jctx.lock); ++ ++ /* ++ * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are ++ * queued outside the job scheduler. + */ -+ CSTD_UNUSED(runpool_state_changed); ++ ++ del_timer_sync(&kctx->soft_job_timeout); ++ list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { ++ katom = list_entry(entry, struct kbase_jd_atom, queue); ++ kbase_cancel_soft_job(katom); ++ } ++ ++ mutex_unlock(&kctx->jctx.lock); ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ kbase_debug_job_fault_kctx_unblock(kctx); ++#endif ++ ++ kbase_jm_wait_for_zero_jobs(kctx); +} + -+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state) ++KBASE_EXPORT_TEST_API(kbase_jd_zap_context); ++ ++int kbase_jd_init(struct kbase_context *kctx) +{ -+ bool runpool_state_changed = false; -+ base_jd_core_req core_req; ++ int i; ++ int mali_err = 0; ++ struct priority_control_manager_device *pcm_device = NULL; + -+ KBASE_DEBUG_ASSERT(katom_retained_state); -+ core_req = katom_retained_state->core_req; ++ KBASE_DEBUG_ASSERT(kctx); ++ pcm_device = kctx->kbdev->pcm_dev; ++ kctx->jctx.max_priority = KBASE_JS_ATOM_SCHED_PRIO_REALTIME; + -+ /* No-op for invalid atoms */ -+ if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false) -+ return false; ++ kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", ++ WQ_HIGHPRI | WQ_UNBOUND, 1); ++ if (kctx->jctx.job_done_wq == NULL) { ++ mali_err = -ENOMEM; ++ goto out1; ++ } + -+ if (core_req & BASE_JD_REQ_ONLY_COMPUTE) -+ runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); -+ else -+ runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); ++ for (i = 0; i < BASE_JD_ATOM_COUNT; i++) { ++ init_waitqueue_head(&kctx->jctx.atoms[i].completed); + -+ if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { -+ /* Atom that can run on slot1 or slot2, and can use all cores */ -+ runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); -+ } ++ INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]); ++ INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]); + -+ return runpool_state_changed; -+} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.h b/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.h -new file mode 100644 -index 000000000..2dc640d5a ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.h -@@ -0,0 +1,147 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2012-2015, 2018, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ /* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */ ++ kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID; ++ kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED; + -+/** -+ * DOC: Job Scheduler Context Attribute APIs -+ */ ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ kctx->jctx.atoms[i].dma_fence.context = ++ dma_fence_context_alloc(1); ++ atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0); ++#endif ++ } + -+#ifndef _KBASE_JS_CTX_ATTR_H_ -+#define _KBASE_JS_CTX_ATTR_H_ ++ for (i = 0; i < BASE_JD_RP_COUNT; i++) ++ kctx->jctx.renderpasses[i].state = KBASE_JD_RP_COMPLETE; + -+/** -+ * kbasep_js_ctx_attr_runpool_retain_ctx - Retain all attributes of a context -+ * -+ * @kbdev: KBase device -+ * @kctx: KBase context -+ * -+ * This occurs on scheduling in the context on the runpool (but after -+ * is_scheduled is set) -+ * -+ * Requires: -+ * - jsctx mutex -+ * - runpool_irq spinlock -+ * - ctx->is_scheduled is true -+ */ -+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); ++ mutex_init(&kctx->jctx.lock); + -+/** -+ * kbasep_js_ctx_attr_runpool_release_ctx - Release all attributes of a context -+ * -+ * @kbdev: KBase device -+ * @kctx: KBase context -+ * -+ * This occurs on scheduling out the context from the runpool (but before -+ * is_scheduled is cleared) -+ * -+ * Requires: -+ * - jsctx mutex -+ * - runpool_irq spinlock -+ * - ctx->is_scheduled is true -+ * -+ * Return: true indicates a change in ctx attributes state of the runpool. -+ * In this state, the scheduler might be able to submit more jobs than -+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() -+ * or similar is called sometime later. -+ * false indicates no change in ctx attributes state of the runpool. -+ */ -+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); ++ init_waitqueue_head(&kctx->jctx.zero_jobs_wait); + -+/** -+ * kbasep_js_ctx_attr_ctx_retain_atom - Retain all attributes of an atom -+ * -+ * @kbdev: KBase device -+ * @kctx: KBase context -+ * @katom: Atom -+ * -+ * This occurs on adding an atom to a context -+ * -+ * Requires: -+ * - jsctx mutex -+ * - If the context is scheduled, then runpool_irq spinlock must also be held -+ */ -+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom); ++ spin_lock_init(&kctx->jctx.tb_lock); + -+/** -+ * kbasep_js_ctx_attr_ctx_release_atom - Release all attributes of an atom, -+ * given its retained state. -+ * -+ * @kbdev: KBase device -+ * @kctx: KBase context -+ * @katom_retained_state: Retained state -+ * -+ * This occurs after (permanently) removing an atom from a context -+ * -+ * Requires: -+ * - jsctx mutex -+ * - If the context is scheduled, then runpool_irq spinlock must also be held -+ * -+ * This is a no-op when \a katom_retained_state is invalid. -+ * -+ * Return: true indicates a change in ctx attributes state of the runpool. -+ * In this state, the scheduler might be able to submit more jobs than -+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() -+ * or similar is called sometime later. -+ * false indicates no change in ctx attributes state of the runpool. -+ */ -+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); ++ kctx->jctx.job_nr = 0; ++ INIT_LIST_HEAD(&kctx->completed_jobs); ++ atomic_set(&kctx->work_count, 0); + -+/* -+ * Requires: -+ * - runpool_irq spinlock -+ */ -+static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute) -+{ -+ struct kbasep_js_device_data *js_devdata; ++ /* Check if there are platform rules for maximum priority */ ++ if (pcm_device) ++ kctx->jctx.max_priority = pcm_device->ops.pcm_scheduler_priority_check( ++ pcm_device, current, KBASE_JS_ATOM_SCHED_PRIO_REALTIME); + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); -+ js_devdata = &kbdev->js_data; ++ return 0; + -+ return js_devdata->runpool_irq.ctx_attr_ref_count[attribute]; ++ out1: ++ return mali_err; +} + -+/* -+ * Requires: -+ * - runpool_irq spinlock -+ */ -+static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute) -+{ -+ /* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */ -+ return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute); -+} ++KBASE_EXPORT_TEST_API(kbase_jd_init); + -+/* -+ * Requires: -+ * - jsctx mutex -+ */ -+static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) ++void kbase_jd_exit(struct kbase_context *kctx) +{ -+ struct kbasep_js_kctx_info *js_kctx_info; -+ -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); -+ js_kctx_info = &kctx->jctx.sched_info; ++ KBASE_DEBUG_ASSERT(kctx); + -+ /* In general, attributes are 'on' when they have a refcount (which should never be < 0) */ -+ return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]); ++ /* Work queue is emptied by this */ ++ destroy_workqueue(kctx->jctx.job_done_wq); +} + -+#endif /* _KBASE_JS_DEFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c ++KBASE_EXPORT_TEST_API(kbase_jd_exit); +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.c new file mode 100644 -index 000000000..14a730dc5 +index 000000000..6196c0985 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c -@@ -0,0 +1,896 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.c +@@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -226001,891 +230092,402 @@ index 000000000..14a730dc5 + * + */ + -+/* -+ * mali_kbase_kinstr_jm.c -+ * Kernel driver public interface to job manager atom tracing -+ */ -+ -+#include "mali_kbase_kinstr_jm.h" -+#include ++#if IS_ENABLED(CONFIG_DEBUG_FS) + -+#include "mali_kbase.h" -+#include "mali_kbase_linux.h" ++#include ++#include ++#include ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++#include ++#endif ++#include + -+#include ++struct kbase_jd_debugfs_depinfo { ++ u8 id; ++ char type; ++}; + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, ++ struct seq_file *sfile) ++{ ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ struct kbase_sync_fence_info info; ++ int res; + -+/* Explicitly include epoll header for old kernels. Not required from 4.16. */ -+#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE -+#include -+#endif -+ -+/* Define static_assert(). -+ * -+ * The macro was introduced in kernel 5.1. But older vendor kernels may define -+ * it too. -+ */ -+#if KERNEL_VERSION(5, 1, 0) <= LINUX_VERSION_CODE -+#include -+#elif !defined(static_assert) -+// Stringify the expression if no message is given. -+#define static_assert(e, ...) __static_assert(e, #__VA_ARGS__, #e) -+#define __static_assert(e, msg, ...) _Static_assert(e, msg) -+#endif -+ -+/* The module printing prefix */ -+#define PR_ "mali_kbase_kinstr_jm: " -+ -+/* Allows us to perform ASM goto for the tracing -+ * https://www.kernel.org/doc/Documentation/static-keys.txt -+ */ -+DEFINE_STATIC_KEY_FALSE(basep_kinstr_jm_reader_static_key); -+ -+#define KBASE_KINSTR_JM_VERSION 2 -+ -+/** -+ * struct kbase_kinstr_jm - The context for the kernel job manager atom tracing -+ * @readers: a bitlocked list of opened readers. Readers are attached to the -+ * private data of a file descriptor that the user opens with the -+ * KBASE_IOCTL_KINSTR_JM_FD IO control call. -+ * @refcount: reference count for the context. Any reader will have a link -+ * back to the context so that they can remove themselves from the -+ * list. -+ * -+ * This is opaque outside this compilation unit -+ */ -+struct kbase_kinstr_jm { -+ struct hlist_bl_head readers; -+ struct kref refcount; -+}; -+ -+/** -+ * struct kbase_kinstr_jm_atom_state_change - Represents an atom changing to a -+ * new state -+ * @timestamp: Raw monotonic nanoseconds of the state change -+ * @state: The state that the atom has moved to -+ * @atom: The atom number that has changed state -+ * @flags: Flags associated with the state change. See -+ * KBASE_KINSTR_JM_ATOM_STATE_FLAG_* defines. -+ * @reserved: Reserved for future use. -+ * @data: Extra data for the state change. Active member depends on state. -+ * @data.start: Extra data for the state change. Active member depends on -+ * state. -+ * @data.start.slot: Extra data for the state change. Active member depends on -+ * state. -+ * @data.padding: Padding -+ * -+ * We can add new fields to the structure and old user code will gracefully -+ * ignore the new fields. -+ * -+ * We can change the size of the structure and old user code will gracefully -+ * skip over the new size via `struct kbase_kinstr_jm_fd_out->size`. -+ * -+ * If we remove fields, the version field in `struct -+ * kbase_kinstr_jm_fd_out->version` will be incremented and old user code will -+ * gracefully fail and tell the user that the kernel API is too new and has -+ * backwards-incompatible changes. Note that one userspace can opt to handle -+ * multiple kernel major versions of the structure. -+ * -+ * If we need to change the _meaning_ of one of the fields, i.e. the state -+ * machine has had a incompatible change, we can keep the same members in the -+ * structure and update the version as above. User code will no longer -+ * recognise that it has the supported field and can gracefully explain to the -+ * user that the kernel API is no longer supported. -+ * -+ * When making changes to this structure, make sure they are either: -+ * - additions to the end (for minor version bumps (i.e. only a size increase)) -+ * such that the layout of existing fields doesn't change, or; -+ * - update the version reported to userspace so that it can fail explicitly. -+ */ -+struct kbase_kinstr_jm_atom_state_change { -+ u64 timestamp; -+ s8 state; /* enum kbase_kinstr_jm_reader_atom_state */ -+ u8 atom; -+ u8 flags; -+ u8 reserved[1]; -+ /* Tagged union based on state. Ensure members are aligned correctly! */ -+ union { -+ struct { -+ u8 slot; -+ } start; -+ u8 padding[4]; -+ } data; -+}; -+static_assert( -+ ((1 << 8 * sizeof(((struct kbase_kinstr_jm_atom_state_change *)0)->state)) - 1) >= -+ KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT); -+ -+#define KBASE_KINSTR_JM_ATOM_STATE_FLAG_OVERFLOW BIT(0) -+ -+/** -+ * struct reader_changes - The circular buffer of kernel atom state changes -+ * @data: The allocated buffer. This is allocated when the user requests -+ * the reader file descriptor. It is released when the user calls -+ * close() on the fd. When accessing this, lock the producer spin -+ * lock to prevent races on the allocated memory. The consume lock -+ * does not need to be held because newly-inserted data will always -+ * be outside the currenly-read range. -+ * @producer: The producing spinlock which allows us to push changes into the -+ * buffer at the same time as a user read occurring. This needs to -+ * be locked when saving/restoring the IRQ because we can receive an -+ * interrupt from the GPU when an atom completes. The CPU could have -+ * a task preempted that is holding this lock. -+ * @consumer: The consuming mutex which locks around the user read(). -+ * Must be held when updating the tail of the circular buffer. -+ * @head: The head of the circular buffer. Can be used with Linux @c CIRC_ -+ * helpers. The producer should lock and update this with an SMP -+ * store when a new change lands. The consumer can read with an -+ * SMP load. This allows the producer to safely insert new changes -+ * into the circular buffer. -+ * @tail: The tail of the circular buffer. Can be used with Linux @c CIRC_ -+ * helpers. The producer should do a READ_ONCE load and the consumer -+ * should SMP store. -+ * @size: The number of changes that are allowed in @c data. Can be used -+ * with Linux @c CIRC_ helpers. Will always be a power of two. The -+ * producer lock should be held when updating this and stored with -+ * an SMP release memory barrier. This means that the consumer can -+ * do an SMP load. -+ * @threshold: The number of changes above which threads polling on the reader -+ * file descriptor will be woken up. -+ */ -+struct reader_changes { -+ struct kbase_kinstr_jm_atom_state_change *data; -+ spinlock_t producer; -+ struct mutex consumer; -+ u32 head; -+ u32 tail; -+ u32 size; -+ u32 threshold; -+}; -+ -+/** -+ * reader_changes_is_valid_size() - Determines if requested changes buffer size -+ * is valid. -+ * @size: The requested memory size -+ * -+ * We have a constraint that the underlying physical buffer must be a -+ * power of two so that we can use the efficient circular buffer helpers that -+ * the kernel provides. It also needs to be representable within a u32. -+ * -+ * Return: -+ * * true - the size is valid -+ * * false - the size is invalid -+ */ -+static inline bool reader_changes_is_valid_size(const size_t size) -+{ -+ const size_t elem_size = sizeof(*((struct reader_changes *)0)->data); -+ const size_t size_size = sizeof(((struct reader_changes *)0)->size); -+ const size_t size_max = (1ull << (size_size * 8)) - 1; -+ -+ return is_power_of_2(size) && /* Is a power of two */ -+ ((size / elem_size) <= size_max); /* Small enough */ -+} -+ -+/** -+ * reader_changes_init() - Initializes the reader changes and allocates the -+ * changes buffer -+ * @changes: The context pointer, must point to a zero-inited allocated reader -+ * changes structure. We may support allocating the structure in the -+ * future. -+ * @size: The requested changes buffer size -+ * -+ * Return: -+ * (0, U16_MAX] - the number of data elements allocated -+ * -ERANGE - the requested memory size was invalid -+ * -ENOMEM - could not allocate the memory -+ */ -+static int reader_changes_init(struct reader_changes *const changes, -+ const size_t size) -+{ -+ BUILD_BUG_ON((PAGE_SIZE % sizeof(*changes->data)) != 0); -+ -+ if (!reader_changes_is_valid_size(size)) { -+ pr_warn(PR_ "invalid size %zu\n", size); -+ return -ERANGE; ++ switch (atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { ++ case BASE_JD_REQ_SOFT_FENCE_TRIGGER: ++ res = kbase_sync_fence_out_info_get(atom, &info); ++ if (res == 0) ++ seq_printf(sfile, "Sa([%pK]%d) ", ++ info.fence, info.status); ++ break; ++ case BASE_JD_REQ_SOFT_FENCE_WAIT: ++ res = kbase_sync_fence_in_info_get(atom, &info); ++ if (res == 0) ++ seq_printf(sfile, "Wa([%pK]%d) ", ++ info.fence, info.status); ++ break; ++ default: ++ break; + } -+ -+ changes->data = vmalloc(size); -+ if (!changes->data) -+ return -ENOMEM; -+ -+ spin_lock_init(&changes->producer); -+ mutex_init(&changes->consumer); -+ -+ changes->size = size / sizeof(*changes->data); -+ changes->threshold = min(((size_t)(changes->size)) / 4, -+ ((size_t)(PAGE_SIZE)) / sizeof(*changes->data)); -+ -+ return changes->size; -+} -+ -+/** -+ * reader_changes_term() - Cleans up a reader changes structure -+ * @changes: The context to clean up -+ * -+ * Releases the allocated state changes memory -+ */ -+static void reader_changes_term(struct reader_changes *const changes) -+{ -+ struct kbase_kinstr_jm_atom_state_change *data = NULL; -+ unsigned long irq; -+ -+ /* -+ * Although changes->data is used on the consumer side, too, no active -+ * consumer is possible by the time we clean up the reader changes, so -+ * no need to take the consumer lock. However, we do need the producer -+ * lock because the list removal can race with list traversal. -+ */ -+ spin_lock_irqsave(&changes->producer, irq); -+ swap(changes->data, data); -+ spin_unlock_irqrestore(&changes->producer, irq); -+ -+ mutex_destroy(&changes->consumer); -+ vfree(data); -+} -+ -+/** -+ * reader_changes_count_locked() - Retrieves the count of state changes from the -+ * tail to the physical end of the buffer -+ * @changes: The state changes context -+ * -+ * The consumer mutex must be held. Uses the CIRC_CNT_TO_END macro to -+ * determine the count, so there may be more items. However, that's the maximum -+ * number that can be read in one contiguous read. -+ * -+ * Return: the number of changes in the circular buffer until the end of the -+ * allocation -+ */ -+static u32 reader_changes_count_locked(struct reader_changes *const changes) -+{ -+ u32 head; -+ -+ lockdep_assert_held_once(&changes->consumer); -+ -+ head = smp_load_acquire(&changes->head); -+ -+ return CIRC_CNT_TO_END(head, changes->tail, changes->size); -+} -+ -+/** -+ * reader_changes_count() - Retrieves the count of state changes from the -+ * tail to the physical end of the buffer -+ * @changes: The state changes context -+ * -+ * Return: the number of changes in the circular buffer until the end of the -+ * allocation -+ */ -+static u32 reader_changes_count(struct reader_changes *const changes) -+{ -+ u32 ret; -+ -+ mutex_lock(&changes->consumer); -+ ret = reader_changes_count_locked(changes); -+ mutex_unlock(&changes->consumer); -+ return ret; ++#endif /* CONFIG_SYNC_FILE */ +} + -+/** -+ * reader_changes_push() - Pushes a change into the reader circular buffer. -+ * @changes: The buffer to insert the change into -+ * @change: Kernel atom change to insert -+ * @wait_queue: The queue to be kicked when changes should be read from -+ * userspace. Kicked when a threshold is reached or there is -+ * overflow. -+ */ -+static void reader_changes_push( -+ struct reader_changes *const changes, -+ const struct kbase_kinstr_jm_atom_state_change *const change, -+ wait_queue_head_t *const wait_queue) ++static void kbasep_jd_debugfs_atom_deps( ++ struct kbase_jd_debugfs_depinfo *deps, ++ struct kbase_jd_atom *atom) +{ -+ u32 head, tail, size, space; -+ unsigned long irq; -+ struct kbase_kinstr_jm_atom_state_change *data; -+ -+ spin_lock_irqsave(&changes->producer, irq); -+ -+ /* We may be called for a reader_changes that's awaiting cleanup. */ -+ data = changes->data; -+ if (!data) -+ goto unlock; ++ struct kbase_context *kctx = atom->kctx; ++ int i; + -+ size = changes->size; -+ head = changes->head; -+ tail = smp_load_acquire(&changes->tail); ++ for (i = 0; i < 2; i++) { ++ deps[i].id = (unsigned int)(atom->dep[i].atom ? ++ kbase_jd_atom_id(kctx, atom->dep[i].atom) : 0); + -+ space = CIRC_SPACE(head, tail, size); -+ if (space >= 1) { -+ data[head] = *change; -+ if (space == 1) { -+ data[head].flags |= -+ KBASE_KINSTR_JM_ATOM_STATE_FLAG_OVERFLOW; -+ pr_warn(PR_ "overflow of circular buffer\n"); ++ switch (atom->dep[i].dep_type) { ++ case BASE_JD_DEP_TYPE_INVALID: ++ deps[i].type = ' '; ++ break; ++ case BASE_JD_DEP_TYPE_DATA: ++ deps[i].type = 'D'; ++ break; ++ case BASE_JD_DEP_TYPE_ORDER: ++ deps[i].type = '>'; ++ break; ++ default: ++ deps[i].type = '?'; ++ break; + } -+ smp_store_release(&changes->head, (head + 1) & (size - 1)); + } -+ -+ /* Wake for either overflow or over-threshold cases. */ -+ if (CIRC_CNT(head + 1, tail, size) >= changes->threshold) -+ wake_up_interruptible(wait_queue); -+ -+unlock: -+ spin_unlock_irqrestore(&changes->producer, irq); +} -+ +/** -+ * struct reader - Allows the kernel state changes to be read by user space. -+ * @node: The node in the @c readers locked list -+ * @rcu_head: storage for the RCU callback to free this reader (see kfree_rcu) -+ * @changes: The circular buffer of user changes -+ * @wait_queue: A wait queue for poll -+ * @context: a pointer to the parent context that created this reader. Can be -+ * used to remove the reader from the list of readers. Reference -+ * counted. ++ * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file. ++ * @sfile: The debugfs entry ++ * @data: Data associated with the entry + * -+ * The reader is a circular buffer in kernel space. State changes are pushed -+ * into the buffer. The flow from user space is: ++ * This function is called to get the contents of the JD atoms debugfs file. ++ * This is a report of all atoms managed by kbase_jd_context.atoms + * -+ * * Request file descriptor with KBASE_IOCTL_KINSTR_JM_FD. This will -+ * allocate the kernel side circular buffer with a size specified in the -+ * ioctl argument. -+ * * The user will then poll the file descriptor for data -+ * * Upon receiving POLLIN, perform a read() on the file descriptor to get -+ * the data out. -+ * * The buffer memory will be freed when the file descriptor is closed -+ */ -+struct reader { -+ struct hlist_bl_node node; -+ struct rcu_head rcu_head; -+ struct reader_changes changes; -+ wait_queue_head_t wait_queue; -+ struct kbase_kinstr_jm *context; -+}; -+ -+static struct kbase_kinstr_jm * -+kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx); -+static void kbase_kinstr_jm_ref_put(struct kbase_kinstr_jm *const ctx); -+static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx, -+ struct reader *const reader); -+static void kbase_kinstr_jm_readers_del(struct kbase_kinstr_jm *const ctx, -+ struct reader *const reader); -+ -+/** -+ * reader_term() - Terminate a instrumentation job manager reader context. -+ * @reader: Pointer to context to be terminated. ++ * Return: 0 if successfully prints data in debugfs entry file, failure ++ * otherwise + */ -+static void reader_term(struct reader *const reader) ++static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) +{ -+ if (!reader) -+ return; -+ -+ kbase_kinstr_jm_readers_del(reader->context, reader); -+ reader_changes_term(&reader->changes); -+ kbase_kinstr_jm_ref_put(reader->context); -+ -+ kfree_rcu(reader, rcu_head); -+} ++ struct kbase_context *kctx = sfile->private; ++ struct kbase_jd_atom *atoms; ++ unsigned long irq_flags; ++ int i; + -+/** -+ * reader_init() - Initialise a instrumentation job manager reader context. -+ * @out_reader: Non-NULL pointer to where the pointer to the created context -+ * will be stored on success. -+ * @ctx: the pointer to the parent context. Reference count will be -+ * increased if initialization is successful -+ * @num_changes: The number of changes to allocate a buffer for -+ * -+ * Return: 0 on success, else error code. -+ */ -+static int reader_init(struct reader **const out_reader, -+ struct kbase_kinstr_jm *const ctx, -+ size_t const num_changes) -+{ -+ struct reader *reader = NULL; -+ const size_t change_size = sizeof(struct kbase_kinstr_jm_atom_state_change); -+ int status; ++ KBASE_DEBUG_ASSERT(kctx != NULL); + -+ if (!out_reader || !ctx || !num_changes) -+ return -EINVAL; ++ /* Print version */ ++ seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION); + -+ reader = kzalloc(sizeof(*reader), GFP_KERNEL); -+ if (!reader) -+ return -ENOMEM; ++ /* Print U/K API version */ ++ seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR, ++ BASE_UK_VERSION_MINOR); + -+ INIT_HLIST_BL_NODE(&reader->node); -+ init_waitqueue_head(&reader->wait_queue); ++ /* Print table heading */ ++ seq_puts(sfile, " ID, Core req, St, Predeps, Start time, Additional info...\n"); + -+ reader->context = kbase_kinstr_jm_ref_get(ctx); ++ atoms = kctx->jctx.atoms; ++ /* General atom states */ ++ mutex_lock(&kctx->jctx.lock); ++ /* JS-related states */ ++ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); ++ for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) { ++ struct kbase_jd_atom *atom = &atoms[i]; ++ s64 start_timestamp = 0; ++ struct kbase_jd_debugfs_depinfo deps[2]; + -+ status = reader_changes_init(&reader->changes, num_changes * change_size); -+ if (status < 0) -+ goto fail; ++ if (atom->status == KBASE_JD_ATOM_STATE_UNUSED) ++ continue; + -+ status = kbase_kinstr_jm_readers_add(ctx, reader); -+ if (status < 0) -+ goto fail; ++ /* start_timestamp is cleared as soon as the atom leaves UNUSED state ++ * and set before a job is submitted to the h/w, a non-zero value means ++ * it is valid ++ */ ++ if (ktime_to_ns(atom->start_timestamp)) ++ start_timestamp = ++ ktime_to_ns(ktime_sub(ktime_get_raw(), atom->start_timestamp)); + -+ *out_reader = reader; ++ kbasep_jd_debugfs_atom_deps(deps, atom); + -+ return 0; ++ seq_printf(sfile, ++ "%3u, %8x, %2u, %c%3u %c%3u, %20lld, ", ++ i, atom->core_req, atom->status, ++ deps[0].type, deps[0].id, ++ deps[1].type, deps[1].id, ++ start_timestamp); + -+fail: -+ kbase_kinstr_jm_ref_put(reader->context); -+ kfree(reader); -+ return status; -+} + -+/** -+ * reader_release() - Invoked when the reader file descriptor is released -+ * @node: The inode that the file descriptor that the file corresponds to. In -+ * our case our reader file descriptor is backed by an anonymous node so -+ * not much is in this. -+ * @file: the file data. Our reader context is held in the private data -+ * Return: zero on success -+ */ -+static int reader_release(struct inode *const node, struct file *const file) -+{ -+ struct reader *const reader = file->private_data; ++ kbase_jd_debugfs_fence_info(atom, sfile); + -+ reader_term(reader); -+ file->private_data = NULL; ++ seq_puts(sfile, "\n"); ++ } ++ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); ++ mutex_unlock(&kctx->jctx.lock); + + return 0; +} + ++ +/** -+ * reader_changes_copy_to_user() - Copy any changes from a changes structure to -+ * the user-provided buffer. -+ * @changes: The changes structure from which to copy. -+ * @buffer: The user buffer to copy the data to. -+ * @buffer_size: The number of bytes in the buffer. -+ * Return: The number of bytes copied or negative errno on failure. ++ * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file ++ * @in: &struct inode pointer ++ * @file: &struct file pointer ++ * ++ * Return: file descriptor + */ -+static ssize_t reader_changes_copy_to_user(struct reader_changes *const changes, -+ char __user *buffer, -+ size_t buffer_size) ++static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file) +{ -+ ssize_t ret = 0; -+ struct kbase_kinstr_jm_atom_state_change const *src_buf = READ_ONCE( -+ changes->data); -+ size_t const entry_size = sizeof(*src_buf); -+ size_t changes_tail, changes_count, read_size; ++ return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private); ++} + -+ /* Needed for the quick buffer capacity calculation below. -+ * Note that we can't use is_power_of_2() since old compilers don't -+ * understand it's a constant expression. -+ */ -+#define is_power_of_two(x) ((x) && !((x) & ((x) - 1))) -+ static_assert(is_power_of_two( -+ sizeof(struct kbase_kinstr_jm_atom_state_change))); -+#undef is_power_of_two ++static const struct file_operations kbasep_jd_debugfs_atoms_fops = { ++ .owner = THIS_MODULE, ++ .open = kbasep_jd_debugfs_atoms_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + -+ lockdep_assert_held_once(&changes->consumer); ++void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx) ++{ ++ const mode_t mode = 0444; + -+ /* Read continuously until either: -+ * - we've filled the output buffer, or -+ * - there are no changes when we check. -+ * -+ * If more changes arrive while we're copying to the user, we can copy -+ * those as well, space permitting. ++ /* Caller already ensures this, but we keep the pattern for ++ * maintenance safety. + */ -+ do { -+ changes_tail = changes->tail; -+ changes_count = reader_changes_count_locked(changes); -+ read_size = min(changes_count * entry_size, -+ buffer_size & ~(entry_size - 1)); -+ -+ if (!read_size) -+ break; -+ -+ if (copy_to_user(buffer, &(src_buf[changes_tail]), read_size)) -+ return -EFAULT; ++ if (WARN_ON(!kctx) || ++ WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) ++ return; + -+ buffer += read_size; -+ buffer_size -= read_size; -+ ret += read_size; -+ changes_tail = (changes_tail + read_size / entry_size) & -+ (changes->size - 1); -+ smp_store_release(&changes->tail, changes_tail); -+ } while (read_size); ++ /* Expose all atoms */ ++ debugfs_create_file("atoms", mode, kctx->kctx_dentry, kctx, ++ &kbasep_jd_debugfs_atoms_fops); + -+ return ret; +} + -+/** -+ * reader_read() - Handles a read call on the reader file descriptor ++#endif /* CONFIG_DEBUG_FS */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.h +new file mode 100644 +index 000000000..8e6140c43 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.h +@@ -0,0 +1,43 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * @filp: The file that the read was performed on -+ * @buffer: The destination buffer -+ * @buffer_size: The maximum number of bytes to read -+ * @offset: The offset into the 'file' to read from. ++ * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved. + * -+ * Note the destination buffer needs to be fully mapped in userspace or the read -+ * will fault. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * Return: -+ * * The number of bytes read or: -+ * * -EBADF - the file descriptor did not have an attached reader -+ * * -EFAULT - memory access fault -+ * * -EAGAIN - if the file is set to nonblocking reads with O_NONBLOCK and there -+ * is no data available ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Note: The number of bytes read will always be a multiple of the size of an -+ * entry. + */ -+static ssize_t reader_read(struct file *const filp, -+ char __user *const buffer, -+ size_t const buffer_size, -+ loff_t *const offset) -+{ -+ struct reader *const reader = filp->private_data; -+ struct reader_changes *changes; -+ ssize_t ret; -+ -+ if (!reader) -+ return -EBADF; -+ -+ if (buffer_size < sizeof(struct kbase_kinstr_jm_atom_state_change)) -+ return -ENOBUFS; -+ -+#if KERNEL_VERSION(5, 0, 0) <= LINUX_VERSION_CODE -+ if (!access_ok(buffer, buffer_size)) -+ return -EIO; -+#else -+ if (!access_ok(VERIFY_WRITE, buffer, buffer_size)) -+ return -EIO; -+#endif -+ -+ changes = &reader->changes; -+ -+ mutex_lock(&changes->consumer); -+ if (!reader_changes_count_locked(changes)) { -+ if (filp->f_flags & O_NONBLOCK) { -+ ret = -EAGAIN; -+ goto exit; -+ } -+ -+ if (wait_event_interruptible( -+ reader->wait_queue, -+ !!reader_changes_count_locked(changes))) { -+ ret = -EINTR; -+ goto exit; -+ } -+ } -+ -+ ret = reader_changes_copy_to_user(changes, buffer, buffer_size); -+ -+exit: -+ mutex_unlock(&changes->consumer); -+ return ret; -+} + +/** -+ * reader_poll() - Handles a poll call on the reader file descriptor -+ * @file: The file that the poll was performed on -+ * @wait: The poll table -+ * -+ * The results of the poll will be unreliable if there is no mapped memory as -+ * there is no circular buffer to push atom state changes into. -+ * -+ * Return: -+ * * 0 - no data ready -+ * * EPOLLIN | EPOLLRDNORM - state changes have been buffered -+ * * EPOLLHUP | EPOLLERR - IO control arguments were invalid or the file -+ * descriptor did not have an attached reader. ++ * DOC: Header file for job dispatcher-related entries in debugfs + */ -+static __poll_t reader_poll(struct file *const file, -+ struct poll_table_struct *const wait) -+{ -+ struct reader *reader; -+ struct reader_changes *changes; -+ __poll_t mask = 0; -+ -+ if (unlikely(!file || !wait)) -+ return EPOLLHUP | EPOLLERR; -+ -+ reader = file->private_data; -+ if (unlikely(!reader)) -+ return EPOLLHUP | EPOLLERR; -+ -+ changes = &reader->changes; -+ if (reader_changes_count(changes) >= changes->threshold) -+ return EPOLLIN | EPOLLRDNORM; -+ -+ poll_wait(file, &reader->wait_queue, wait); + -+ if (reader_changes_count(changes) > 0) -+ mask |= EPOLLIN | EPOLLRDNORM; ++#ifndef _KBASE_JD_DEBUGFS_H ++#define _KBASE_JD_DEBUGFS_H + -+ return mask; -+} ++#include + -+/* The file operations virtual function table */ -+static const struct file_operations file_operations = { -+ .owner = THIS_MODULE, -+ .llseek = no_llseek, -+ .read = reader_read, -+ .poll = reader_poll, -+ .release = reader_release -+}; ++#define MALI_JD_DEBUGFS_VERSION 3 + -+/* The maximum amount of readers that can be created on a context. */ -+static const size_t kbase_kinstr_jm_readers_max = 16; ++/* Forward declarations */ ++struct kbase_context; + +/** -+ * kbase_kinstr_jm_release() - Invoked when the reference count is dropped -+ * @ref: the context reference count ++ * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system ++ * ++ * @kctx: Pointer to kbase_context + */ -+static void kbase_kinstr_jm_release(struct kref *const ref) -+{ -+ struct kbase_kinstr_jm *const ctx = -+ container_of(ref, struct kbase_kinstr_jm, refcount); -+ -+ kfree(ctx); -+} ++void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx); + -+/** -+ * kbase_kinstr_jm_ref_get() - Reference counts the instrumentation context -+ * @ctx: the context to reference count -+ * Return: the reference counted context ++#endif /*_KBASE_JD_DEBUGFS_H*/ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jm.c b/drivers/gpu/arm/bifrost/mali_kbase_jm.c +new file mode 100644 +index 000000000..1ac5cd3ea +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_jm.c +@@ -0,0 +1,153 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * + */ -+static struct kbase_kinstr_jm * -+kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx) -+{ -+ if (likely(ctx)) -+ kref_get(&ctx->refcount); -+ return ctx; -+} + -+/** -+ * kbase_kinstr_jm_ref_put() - Dereferences the instrumentation context -+ * @ctx: the context to lower the reference count on ++/* ++ * HW access job manager common APIs + */ -+static void kbase_kinstr_jm_ref_put(struct kbase_kinstr_jm *const ctx) -+{ -+ if (likely(ctx)) -+ kref_put(&ctx->refcount, kbase_kinstr_jm_release); -+} + ++#include ++#include "mali_kbase_hwaccess_jm.h" ++#include "mali_kbase_jm.h" ++ ++#if !MALI_USE_CSF +/** -+ * kbase_kinstr_jm_readers_add() - Adds a reader to the list of readers -+ * @ctx: the instrumentation context -+ * @reader: the reader to add ++ * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot ++ * @js on the active context. ++ * @kbdev: Device pointer ++ * @js: Job slot to run on ++ * @nr_jobs_to_submit: Number of jobs to attempt to submit + * -+ * Return: -+ * 0 - success -+ * -ENOMEM - too many readers already added. ++ * Return: true if slot can still be submitted on, false if slot is now full. + */ -+static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx, -+ struct reader *const reader) ++static bool kbase_jm_next_job(struct kbase_device *kbdev, unsigned int js, int nr_jobs_to_submit) +{ -+ struct hlist_bl_head *const readers = &ctx->readers; -+ struct hlist_bl_node *node; -+ struct reader *temp; -+ size_t count = 0; -+ -+ hlist_bl_lock(readers); -+ -+ hlist_bl_for_each_entry_rcu(temp, node, readers, node) -+ ++count; -+ -+ if (kbase_kinstr_jm_readers_max < count) { -+ hlist_bl_unlock(readers); -+ return -ENOMEM; -+ } -+ -+ hlist_bl_add_head_rcu(&reader->node, readers); ++ struct kbase_context *kctx; ++ int i; + -+ hlist_bl_unlock(readers); ++ kctx = kbdev->hwaccess.active_kctx[js]; ++ dev_dbg(kbdev->dev, "Trying to run the next %d jobs in kctx %pK (s:%u)\n", ++ nr_jobs_to_submit, (void *)kctx, js); + -+ static_branch_inc(&basep_kinstr_jm_reader_static_key); ++ if (!kctx) ++ return true; + -+ return 0; -+} ++ for (i = 0; i < nr_jobs_to_submit; i++) { ++ struct kbase_jd_atom *katom = kbase_js_pull(kctx, js); + -+/** -+ * kbase_kinstr_jm_readers_del() - Deletes a reader from the list of readers -+ * @ctx: the instrumentation context -+ * @reader: the reader to delete -+ */ -+static void kbase_kinstr_jm_readers_del(struct kbase_kinstr_jm *const ctx, -+ struct reader *const reader) -+{ -+ struct hlist_bl_head *const readers = &ctx->readers; ++ if (!katom) ++ return true; /* Context has no jobs on this slot */ + -+ hlist_bl_lock(readers); -+ hlist_bl_del_rcu(&reader->node); -+ hlist_bl_unlock(readers); ++ kbase_backend_run_atom(kbdev, katom); ++ } + -+ static_branch_dec(&basep_kinstr_jm_reader_static_key); ++ dev_dbg(kbdev->dev, "Slot ringbuffer should now be full (s:%u)\n", js); ++ return false; +} + -+int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, -+ union kbase_kinstr_jm_fd *jm_fd_arg) ++u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask) +{ -+ struct kbase_kinstr_jm_fd_in const *in; -+ struct reader *reader; -+ size_t const change_size = sizeof(struct -+ kbase_kinstr_jm_atom_state_change); -+ int status; -+ int fd; -+ int i; -+ -+ if (!ctx || !jm_fd_arg) -+ return -EINVAL; -+ -+ in = &jm_fd_arg->in; -+ -+ if (!is_power_of_2(in->count)) -+ return -EINVAL; ++ u32 ret_mask = 0; + -+ for (i = 0; i < sizeof(in->padding); ++i) -+ if (in->padding[i]) -+ return -EINVAL; ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ dev_dbg(kbdev->dev, "JM kick slot mask 0x%x\n", js_mask); + -+ status = reader_init(&reader, ctx, in->count); -+ if (status < 0) -+ return status; ++ while (js_mask) { ++ unsigned int js = ffs(js_mask) - 1; ++ int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js); + -+ jm_fd_arg->out.version = KBASE_KINSTR_JM_VERSION; -+ jm_fd_arg->out.size = change_size; -+ memset(&jm_fd_arg->out.padding, 0, sizeof(jm_fd_arg->out.padding)); ++ if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit)) ++ ret_mask |= (1 << js); + -+ fd = anon_inode_getfd("[mali_kinstr_jm]", &file_operations, reader, -+ O_CLOEXEC); -+ if (fd < 0) -+ reader_term(reader); ++ js_mask &= ~(1 << js); ++ } + -+ return fd; ++ dev_dbg(kbdev->dev, "Can still submit to mask 0x%x\n", ret_mask); ++ return ret_mask; +} + -+int kbase_kinstr_jm_init(struct kbase_kinstr_jm **const out_ctx) ++void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask) +{ -+ struct kbase_kinstr_jm *ctx = NULL; -+ -+ if (!out_ctx) -+ return -EINVAL; -+ -+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); -+ if (!ctx) -+ return -ENOMEM; -+ -+ INIT_HLIST_BL_HEAD(&ctx->readers); -+ kref_init(&ctx->refcount); ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + -+ *out_ctx = ctx; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ return 0; ++ if (!down_trylock(&js_devdata->schedule_sem)) { ++ kbase_jm_kick(kbdev, js_mask); ++ up(&js_devdata->schedule_sem); ++ } +} + -+void kbase_kinstr_jm_term(struct kbase_kinstr_jm *const ctx) ++void kbase_jm_try_kick_all(struct kbase_device *kbdev) +{ -+ kbase_kinstr_jm_ref_put(ctx); ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ if (!down_trylock(&js_devdata->schedule_sem)) { ++ kbase_jm_kick_all(kbdev); ++ up(&js_devdata->schedule_sem); ++ } +} + -+void kbasep_kinstr_jm_atom_state( -+ struct kbase_jd_atom *const katom, -+ const enum kbase_kinstr_jm_reader_atom_state state) ++void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) +{ -+ struct kbase_context *const kctx = katom->kctx; -+ struct kbase_kinstr_jm *const ctx = kctx->kinstr_jm; -+ const u8 id = kbase_jd_atom_id(kctx, katom); -+ struct kbase_kinstr_jm_atom_state_change change = { -+ .timestamp = ktime_get_raw_ns(), .atom = id, .state = state -+ }; -+ struct reader *reader; -+ struct hlist_bl_node *node; ++ unsigned int js; + -+ WARN(KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT < state || 0 > state, -+ PR_ "unsupported katom (%u) state (%i)", id, state); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ switch (state) { -+ case KBASE_KINSTR_JM_READER_ATOM_STATE_START: -+ change.data.start.slot = katom->slot_nr; -+ break; -+ default: -+ break; ++ for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { ++ if (kbdev->hwaccess.active_kctx[js] == kctx) { ++ dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%u)\n", (void *)kctx, ++ js); ++ kbdev->hwaccess.active_kctx[js] = NULL; ++ } + } -+ -+ rcu_read_lock(); -+ hlist_bl_for_each_entry_rcu(reader, node, &ctx->readers, node) -+ reader_changes_push( -+ &reader->changes, &change, &reader->wait_queue); -+ rcu_read_unlock(); +} + -+KBASE_EXPORT_TEST_API(kbasep_kinstr_jm_atom_state); -+ -+void kbasep_kinstr_jm_atom_hw_submit(struct kbase_jd_atom *const katom) ++struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom) +{ -+ struct kbase_context *const kctx = katom->kctx; -+ struct kbase_device *const kbdev = kctx->kbdev; -+ const int slot = katom->slot_nr; -+ struct kbase_jd_atom *const submitted = kbase_gpu_inspect(kbdev, slot, 0); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ BUILD_BUG_ON(SLOT_RB_SIZE != 2); ++ dev_dbg(kbdev->dev, "Atom %pK is returning with event code 0x%x\n", ++ (void *)katom, katom->event_code); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ KBASE_KTRACE_ADD_JM(kbdev, JM_RETURN_ATOM_TO_JS, katom->kctx, katom, ++ katom->jc, katom->event_code); + -+ if (WARN_ON(slot < 0 || slot >= GPU_MAX_JOB_SLOTS)) -+ return; -+ if (WARN_ON(!submitted)) -+ return; ++ if (katom->event_code != BASE_JD_EVENT_STOPPED && ++ katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) { ++ return kbase_js_complete_atom(katom, NULL); ++ } + -+ if (submitted == katom) -+ kbase_kinstr_jm_atom_state_start(katom); ++ kbase_js_unpull(katom->kctx, katom); ++ ++ return NULL; +} + -+void kbasep_kinstr_jm_atom_hw_release(struct kbase_jd_atom *const katom) ++struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom, ktime_t *end_timestamp) +{ -+ struct kbase_context *const kctx = katom->kctx; -+ struct kbase_device *const kbdev = kctx->kbdev; -+ const int slot = katom->slot_nr; -+ struct kbase_jd_atom *const submitted = kbase_gpu_inspect(kbdev, slot, 0); -+ struct kbase_jd_atom *const queued = kbase_gpu_inspect(kbdev, slot, 1); -+ -+ BUILD_BUG_ON(SLOT_RB_SIZE != 2); -+ + lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (WARN_ON(slot < 0 || slot >= GPU_MAX_JOB_SLOTS)) -+ return; -+ if (WARN_ON(!submitted)) -+ return; -+ if (WARN_ON((submitted != katom) && (queued != katom))) -+ return; -+ -+ if (queued == katom) -+ return; -+ -+ if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) -+ kbase_kinstr_jm_atom_state_stop(katom); -+ if (queued && queued->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) -+ kbase_kinstr_jm_atom_state_start(queued); ++ return kbase_js_complete_atom(katom, end_timestamp); +} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.h ++#endif /* !MALI_USE_CSF */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_jm.h new file mode 100644 -index 000000000..9451d4cd9 +index 000000000..eeafcb6b1 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.h -@@ -0,0 +1,273 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_jm.h +@@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2013-2014, 2016, 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -226904,267 +230506,111 @@ index 000000000..9451d4cd9 + */ + +/* -+ * mali_kbase_kinstr_jm.h -+ * Kernel driver public interface to job manager atom tracing. This API provides -+ * a method to get the atom state changes into user space. -+ * -+ * The flow of operation is: -+ * -+ * | kernel | user | -+ * | ----------------------------------- | ----------------------------------- | -+ * | Initialize API with | | -+ * | kbase_kinstr_jm_init() | | -+ * | | | -+ * | Kernel code injects states with | | -+ * | kbase_kinstr_jm_atom_state_*() APIs | | -+ * | | Call ioctl() to get file descriptor | -+ * | | via KBASE_IOCTL_KINSTR_JM_FD | -+ * | Allocates a reader attached to FD | | -+ * | Allocates circular buffer and | | -+ * | patches, via ASM goto, the | | -+ * | kbase_kinstr_jm_atom_state_*() | | -+ * | | loop: | -+ * | | Call poll() on FD for POLLIN | -+ * | When threshold of changes is hit, | | -+ * | the poll is interrupted with | | -+ * | POLLIN. If circular buffer is | | -+ * | full then store the missed count | | -+ * | and interrupt poll | Call read() to get data from | -+ * | | circular buffer via the fd | -+ * | Kernel advances tail of circular | | -+ * | buffer | | -+ * | | Close file descriptor | -+ * | Deallocates circular buffer | | -+ * | | | -+ * | Terminate API with | | -+ * | kbase_kinstr_jm_term() | | -+ * -+ * All tracepoints are guarded on a static key. The static key is activated when -+ * a user space reader gets created. This means that there is negligible cost -+ * inserting the tracepoints into code when there are no readers. ++ * Job manager common APIs + */ + -+#ifndef _KBASE_KINSTR_JM_H_ -+#define _KBASE_KINSTR_JM_H_ -+ -+#include -+ -+#ifdef __KERNEL__ -+#include -+#include -+#else -+/* empty wrapper macros for userspace */ -+#define static_branch_unlikely(key) (1) -+#endif /* __KERNEL__ */ -+ -+/* Forward declarations */ -+struct kbase_context; -+struct kbase_kinstr_jm; -+struct kbase_jd_atom; -+union kbase_kinstr_jm_fd; ++#ifndef _KBASE_JM_H_ ++#define _KBASE_JM_H_ + ++#if !MALI_USE_CSF +/** -+ * kbase_kinstr_jm_init() - Initialise an instrumentation job manager context. -+ * @ctx: Non-NULL pointer to where the pointer to the created context will -+ * be stored on success. ++ * kbase_jm_kick() - Indicate that there are jobs ready to run. ++ * @kbdev: Device pointer ++ * @js_mask: Mask of the job slots that can be pulled from. + * -+ * Return: 0 on success, else error code. -+ */ -+int kbase_kinstr_jm_init(struct kbase_kinstr_jm **ctx); -+ -+/** -+ * kbase_kinstr_jm_term() - Terminate an instrumentation job manager context. -+ * @ctx: Pointer to context to be terminated. -+ */ -+void kbase_kinstr_jm_term(struct kbase_kinstr_jm *ctx); -+ -+/** -+ * kbase_kinstr_jm_get_fd() - Retrieves a file descriptor that can be used to -+ * read the atom state changes from userspace ++ * Caller must hold the hwaccess_lock and schedule_sem semaphore + * -+ * @ctx: Pointer to the initialized context -+ * @jm_fd_arg: Pointer to the union containing the in/out params -+ * Return: -1 on failure, valid file descriptor on success ++ * Return: Mask of the job slots that can still be submitted to. + */ -+int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, -+ union kbase_kinstr_jm_fd *jm_fd_arg); ++u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask); + +/** -+ * kbasep_kinstr_jm_atom_state() - Signifies that an atom has changed state -+ * @atom: The atom that has changed state -+ * @state: The new state of the atom ++ * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job ++ * slots. ++ * @kbdev: Device pointer + * -+ * This performs the actual storage of the state ready for user space to -+ * read the data. It is only called when the static key is enabled from -+ * kbase_kinstr_jm_atom_state(). There is almost never a need to invoke this -+ * function directly. -+ */ -+void kbasep_kinstr_jm_atom_state( -+ struct kbase_jd_atom *const atom, -+ const enum kbase_kinstr_jm_reader_atom_state state); -+ -+/* Allows ASM goto patching to reduce tracing overhead. This is -+ * incremented/decremented when readers are created and terminated. This really -+ * shouldn't be changed externally, but if you do, make sure you use -+ * a static_key_inc()/static_key_dec() pair. -+ */ -+extern struct static_key_false basep_kinstr_jm_reader_static_key; -+ -+/** -+ * kbase_kinstr_jm_atom_state() - Signifies that an atom has changed state -+ * @atom: The atom that has changed state -+ * @state: The new state of the atom ++ * Caller must hold the hwaccess_lock and schedule_sem semaphore + * -+ * This uses a static key to reduce overhead when tracing is disabled -+ */ -+static inline void kbase_kinstr_jm_atom_state( -+ struct kbase_jd_atom *const atom, -+ const enum kbase_kinstr_jm_reader_atom_state state) -+{ -+ if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) -+ kbasep_kinstr_jm_atom_state(atom, state); -+} -+ -+/** -+ * kbase_kinstr_jm_atom_state_queue() - Signifies that an atom has entered a -+ * hardware or software queue. -+ * @atom: The atom that has changed state -+ */ -+static inline void kbase_kinstr_jm_atom_state_queue( -+ struct kbase_jd_atom *const atom) -+{ -+ kbase_kinstr_jm_atom_state( -+ atom, KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE); -+} -+ -+/** -+ * kbase_kinstr_jm_atom_state_start() - Signifies that work has started on an -+ * atom -+ * @atom: The atom that has changed state -+ */ -+static inline void kbase_kinstr_jm_atom_state_start( -+ struct kbase_jd_atom *const atom) -+{ -+ kbase_kinstr_jm_atom_state( -+ atom, KBASE_KINSTR_JM_READER_ATOM_STATE_START); -+} -+ -+/** -+ * kbase_kinstr_jm_atom_state_stop() - Signifies that work has stopped on an -+ * atom -+ * @atom: The atom that has changed state -+ */ -+static inline void kbase_kinstr_jm_atom_state_stop( -+ struct kbase_jd_atom *const atom) -+{ -+ kbase_kinstr_jm_atom_state( -+ atom, KBASE_KINSTR_JM_READER_ATOM_STATE_STOP); -+} -+ -+/** -+ * kbase_kinstr_jm_atom_state_complete() - Signifies that all work has completed -+ * on an atom -+ * @atom: The atom that has changed state -+ */ -+static inline void kbase_kinstr_jm_atom_state_complete( -+ struct kbase_jd_atom *const atom) -+{ -+ kbase_kinstr_jm_atom_state( -+ atom, KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE); -+} -+ -+/** -+ * kbase_kinstr_jm_atom_queue() - A software *or* hardware atom is queued for -+ * execution -+ * @atom: The atom that has changed state -+ */ -+static inline void kbase_kinstr_jm_atom_queue(struct kbase_jd_atom *const atom) -+{ -+ kbase_kinstr_jm_atom_state_queue(atom); -+} -+ -+/** -+ * kbase_kinstr_jm_atom_complete() - A software *or* hardware atom is fully -+ * completed -+ * @atom: The atom that has changed state -+ */ -+static inline void kbase_kinstr_jm_atom_complete( -+ struct kbase_jd_atom *const atom) -+{ -+ kbase_kinstr_jm_atom_state_complete(atom); -+} -+ -+/** -+ * kbase_kinstr_jm_atom_sw_start() - A software atom has started work -+ * @atom: The atom that has changed state ++ * Return: Mask of the job slots that can still be submitted to. + */ -+static inline void kbase_kinstr_jm_atom_sw_start( -+ struct kbase_jd_atom *const atom) ++static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev) +{ -+ kbase_kinstr_jm_atom_state_start(atom); ++ return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); +} + +/** -+ * kbase_kinstr_jm_atom_sw_stop() - A software atom has stopped work -+ * @atom: The atom that has changed state ++ * kbase_jm_try_kick - Attempt to call kbase_jm_kick ++ * @kbdev: Device pointer ++ * @js_mask: Mask of the job slots that can be pulled from ++ * Context: Caller must hold hwaccess_lock ++ * ++ * If schedule_sem can be immediately obtained then this function will call ++ * kbase_jm_kick() otherwise it will do nothing. + */ -+static inline void kbase_kinstr_jm_atom_sw_stop( -+ struct kbase_jd_atom *const atom) -+{ -+ kbase_kinstr_jm_atom_state_stop(atom); -+} ++void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask); + +/** -+ * kbasep_kinstr_jm_atom_hw_submit() - A hardware atom has been submitted -+ * @atom: The atom that has been submitted ++ * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all ++ * @kbdev: Device pointer ++ * Context: Caller must hold hwaccess_lock + * -+ * This private implementation should not be called directly, it is protected -+ * by a static key in kbase_kinstr_jm_atom_hw_submit(). Use that instead. ++ * If schedule_sem can be immediately obtained then this function will call ++ * kbase_jm_kick_all() otherwise it will do nothing. + */ -+void kbasep_kinstr_jm_atom_hw_submit(struct kbase_jd_atom *const atom); ++void kbase_jm_try_kick_all(struct kbase_device *kbdev); ++#endif /* !MALI_USE_CSF */ + ++#if !MALI_USE_CSF +/** -+ * kbase_kinstr_jm_atom_hw_submit() - A hardware atom has been submitted -+ * @atom: The atom that has been submitted ++ * kbase_jm_idle_ctx() - Mark a context as idle. ++ * @kbdev: Device pointer ++ * @kctx: Context to mark as idle ++ * ++ * No more atoms will be pulled from this context until it is marked as active ++ * by kbase_js_use_ctx(). ++ * ++ * The context should have no atoms currently pulled from it ++ * (kbase_jsctx_atoms_pulled(kctx) == 0). ++ * ++ * Caller must hold the hwaccess_lock + */ -+static inline void kbase_kinstr_jm_atom_hw_submit( -+ struct kbase_jd_atom *const atom) -+{ -+ if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) -+ kbasep_kinstr_jm_atom_hw_submit(atom); -+} ++void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + +/** -+ * kbasep_kinstr_jm_atom_hw_release() - A hardware atom has been released -+ * @atom: The atom that has been released ++ * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has ++ * been soft-stopped or will fail due to a ++ * dependency ++ * @kbdev: Device pointer ++ * @katom: Atom that has been stopped or will be failed + * -+ * This private implementation should not be called directly, it is protected -+ * by a static key in kbase_kinstr_jm_atom_hw_release(). Use that instead. ++ * Return: Atom that has now been unblocked and can now be run, or NULL if none + */ -+void kbasep_kinstr_jm_atom_hw_release(struct kbase_jd_atom *const atom); ++struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom); + +/** -+ * kbase_kinstr_jm_atom_hw_release() - A hardware atom has been released -+ * @atom: The atom that has been released ++ * kbase_jm_complete() - Complete an atom ++ * @kbdev: Device pointer ++ * @katom: Atom that has completed ++ * @end_timestamp: Timestamp of atom completion ++ * ++ * Return: Atom that has now been unblocked and can now be run, or NULL if none + */ -+static inline void kbase_kinstr_jm_atom_hw_release( -+ struct kbase_jd_atom *const atom) -+{ -+ if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) -+ kbasep_kinstr_jm_atom_hw_release(atom); -+} ++struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom, ktime_t *end_timestamp); ++#endif /* !MALI_USE_CSF */ + -+#endif /* _KBASE_KINSTR_JM_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c ++#endif /* _KBASE_JM_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.c b/drivers/gpu/arm/bifrost/mali_kbase_js.c new file mode 100644 -index 000000000..823f9156e +index 000000000..8ce09212a --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c -@@ -0,0 +1,2021 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_js.c +@@ -0,0 +1,4007 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -227182,7800 +230628,7709 @@ index 000000000..823f9156e + * + */ + -+#include "mali_kbase.h" -+#include "mali_kbase_kinstr_prfcnt.h" -+#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" -+#include "hwcnt/mali_kbase_hwcnt_gpu.h" -+#include -+#include "mali_malisw.h" -+#include "mali_kbase_debug.h" ++/* ++ * Job Scheduler Implementation ++ */ ++#include ++#include ++#include ++#include ++#include ++#include + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++#include ++#include + -+/* Explicitly include epoll header for old kernels. Not required from 4.16. */ -+#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE -+#include -+#endif ++#include "mali_kbase_jm.h" ++#include "mali_kbase_hwaccess_jm.h" ++#include ++#include + -+/* The minimum allowed interval between dumps, in nanoseconds -+ * (equivalent to 10KHz) ++/* ++ * Private types + */ -+#define DUMP_INTERVAL_MIN_NS (100 * NSEC_PER_USEC) -+ -+/* The maximum allowed buffers per client */ -+#define MAX_BUFFER_COUNT 32 + -+/** -+ * struct kbase_kinstr_prfcnt_context - IOCTL interface for userspace hardware -+ * counters. -+ * @hvirt: Hardware counter virtualizer used by kinstr_prfcnt. -+ * @info_item_count: Number of metadata elements. -+ * @metadata: Hardware counter metadata provided by virtualizer. -+ * @lock: Lock protecting kinstr_prfcnt state. -+ * @suspend_count: Suspend reference count. If non-zero, timer and worker -+ * are prevented from being re-scheduled. -+ * @client_count: Number of kinstr_prfcnt clients. -+ * @clients: List of kinstr_prfcnt clients. -+ * @dump_timer: Timer that enqueues dump_work to a workqueue. -+ * @dump_work: Worker for performing periodic counter dumps. -+ */ -+struct kbase_kinstr_prfcnt_context { -+ struct kbase_hwcnt_virtualizer *hvirt; -+ u32 info_item_count; -+ const struct kbase_hwcnt_metadata *metadata; -+ struct mutex lock; -+ size_t suspend_count; -+ size_t client_count; -+ struct list_head clients; -+ struct hrtimer dump_timer; -+ struct work_struct dump_work; ++/* Bitpattern indicating the result of releasing a context */ ++enum { ++ /* The context was descheduled - caller should try scheduling in a new ++ * one to keep the runpool full ++ */ ++ KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0), ++ /* Ctx attributes were changed - caller should try scheduling all ++ * contexts ++ */ ++ KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1) +}; + -+/** -+ * struct kbase_kinstr_prfcnt_sample - Buffer and descriptor for sample data. -+ * @sample_meta: Pointer to sample metadata. -+ * @dump_buf: Dump buffer containing sample data. -+ */ -+struct kbase_kinstr_prfcnt_sample { -+ struct prfcnt_metadata *sample_meta; -+ struct kbase_hwcnt_dump_buffer dump_buf; -+}; ++typedef u32 kbasep_js_release_result; + -+/** -+ * struct kbase_kinstr_prfcnt_sample_array - Array of sample data. -+ * @user_buf: Address of allocated userspace buffer. A single allocation is used -+ * for all Dump Buffers in the array. -+ * @sample_count: Number of allocated samples. -+ * @samples: Non-NULL pointer to the array of Dump Buffers. -+ */ -+struct kbase_kinstr_prfcnt_sample_array { -+ u8 *user_buf; -+ size_t sample_count; -+ struct kbase_kinstr_prfcnt_sample *samples; ++const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = { ++ KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */ ++ KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */ ++ KBASE_JS_ATOM_SCHED_PRIO_LOW, /* BASE_JD_PRIO_LOW */ ++ KBASE_JS_ATOM_SCHED_PRIO_REALTIME /* BASE_JD_PRIO_REALTIME */ +}; + -+/** -+ * struct kbase_kinstr_prfcnt_client_config - Client session configuration. -+ * @prfcnt_mode: Sampling mode: either manual or periodic. -+ * @counter_set: Set of performance counter blocks. -+ * @scope: Scope of performance counters to capture. -+ * @buffer_count: Number of buffers used to store samples. -+ * @period_ns: Sampling period, in nanoseconds, or 0 if manual mode. -+ * @phys_em: Enable map used by the GPU. -+ */ -+struct kbase_kinstr_prfcnt_client_config { -+ u8 prfcnt_mode; -+ u8 counter_set; -+ u8 scope; -+ u16 buffer_count; -+ u64 period_ns; -+ struct kbase_hwcnt_physical_enable_map phys_em; ++const base_jd_prio ++kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = { ++ BASE_JD_PRIO_REALTIME, /* KBASE_JS_ATOM_SCHED_PRIO_REALTIME */ ++ BASE_JD_PRIO_HIGH, /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */ ++ BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */ ++ BASE_JD_PRIO_LOW /* KBASE_JS_ATOM_SCHED_PRIO_LOW */ +}; + -+/** -+ * enum kbase_kinstr_prfcnt_client_init_state - A list of -+ * initialisation states that the -+ * kinstr_prfcnt client can be at -+ * during initialisation. Useful -+ * for terminating a partially -+ * initialised client. -+ * -+ * @KINSTR_PRFCNT_UNINITIALISED : Client is uninitialised -+ * @KINSTR_PRFCNT_PARSE_SETUP : Parse the setup session -+ * @KINSTR_PRFCNT_ENABLE_MAP : Allocate memory for enable map -+ * @KINSTR_PRFCNT_DUMP_BUFFER : Allocate memory for dump buffer -+ * @KINSTR_PRFCNT_SAMPLE_ARRAY : Allocate memory for and initialise sample array -+ * @KINSTR_PRFCNT_VIRTUALIZER_CLIENT : Create virtualizer client -+ * @KINSTR_PRFCNT_WAITQ_MUTEX : Create and initialise mutex and waitqueue -+ * @KINSTR_PRFCNT_INITIALISED : Client is fully initialised -+ */ -+enum kbase_kinstr_prfcnt_client_init_state { -+ KINSTR_PRFCNT_UNINITIALISED, -+ KINSTR_PRFCNT_PARSE_SETUP = KINSTR_PRFCNT_UNINITIALISED, -+ KINSTR_PRFCNT_ENABLE_MAP, -+ KINSTR_PRFCNT_DUMP_BUFFER, -+ KINSTR_PRFCNT_SAMPLE_ARRAY, -+ KINSTR_PRFCNT_VIRTUALIZER_CLIENT, -+ KINSTR_PRFCNT_WAITQ_MUTEX, -+ KINSTR_PRFCNT_INITIALISED -+}; + -+/** -+ * struct kbase_kinstr_prfcnt_client - A kinstr_prfcnt client attached -+ * to a kinstr_prfcnt context. -+ * @kinstr_ctx: kinstr_prfcnt context client is attached to. -+ * @hvcli: Hardware counter virtualizer client. -+ * @node: Node used to attach this client to list in -+ * kinstr_prfcnt context. -+ * @cmd_sync_lock: Lock coordinating the reader interface for commands. -+ * @next_dump_time_ns: Time in ns when this client's next periodic dump must -+ * occur. If 0, not a periodic client. -+ * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic -+ * client. -+ * @sample_flags: Flags for the current active dumping sample, marking -+ * the conditions/events during the dump duration. -+ * @active: True if the client has been started. -+ * @config: Configuration of the client session. -+ * @enable_map: Counters enable map. -+ * @tmp_buf: Temporary buffer to use before handing over dump to -+ * client. -+ * @sample_arr: Array of dump buffers allocated by this client. -+ * @read_idx: Index of buffer read by userspace. -+ * @write_idx: Index of buffer being written by dump worker. -+ * @fetch_idx: Index of buffer being fetched by userspace, but -+ * pending a confirmation of being read (consumed) if it -+ * differs from the read_idx. -+ * @waitq: Client's notification queue. -+ * @sample_size: Size of the data required for one sample, in bytes. -+ * @sample_count: Number of samples the client is able to capture. -+ * @user_data: User data associated with the session. -+ * This is set when the session is started and stopped. -+ * This value is ignored for control commands that -+ * provide another value. ++/* ++ * Private function prototypes + */ -+struct kbase_kinstr_prfcnt_client { -+ struct kbase_kinstr_prfcnt_context *kinstr_ctx; -+ struct kbase_hwcnt_virtualizer_client *hvcli; -+ struct list_head node; -+ struct mutex cmd_sync_lock; -+ u64 next_dump_time_ns; -+ u32 dump_interval_ns; -+ u32 sample_flags; -+ bool active; -+ struct kbase_kinstr_prfcnt_client_config config; -+ struct kbase_hwcnt_enable_map enable_map; -+ struct kbase_hwcnt_dump_buffer tmp_buf; -+ struct kbase_kinstr_prfcnt_sample_array sample_arr; -+ atomic_t read_idx; -+ atomic_t write_idx; -+ atomic_t fetch_idx; -+ wait_queue_head_t waitq; -+ size_t sample_size; -+ size_t sample_count; -+ u64 user_data; -+}; ++static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( ++ struct kbase_device *kbdev, struct kbase_context *kctx, ++ struct kbasep_js_atom_retained_state *katom_retained_state); + -+static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = { -+ { -+ /* Request description for MODE request */ -+ .hdr = { -+ .item_type = PRFCNT_ENUM_TYPE_REQUEST, -+ .item_version = PRFCNT_READER_API_VERSION, -+ }, -+ .u.request = { -+ .request_item_type = PRFCNT_REQUEST_MODE, -+ .versions_mask = 0x1, -+ }, -+ }, -+ { -+ /* Request description for ENABLE request */ -+ .hdr = { -+ .item_type = PRFCNT_ENUM_TYPE_REQUEST, -+ .item_version = PRFCNT_READER_API_VERSION, -+ }, -+ .u.request = { -+ .request_item_type = PRFCNT_REQUEST_ENABLE, -+ .versions_mask = 0x1, -+ }, -+ }, -+}; ++static unsigned int kbase_js_get_slot(struct kbase_device *kbdev, struct kbase_jd_atom *katom); ++ ++static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, ++ kbasep_js_ctx_job_cb *callback); ++ ++/* Helper for ktrace */ ++#if KBASE_KTRACE_ENABLE ++static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) ++{ ++ return atomic_read(&kctx->refcount); ++} ++#else /* KBASE_KTRACE_ENABLE */ ++static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) ++{ ++ CSTD_UNUSED(kctx); ++ return 0; ++} ++#endif /* KBASE_KTRACE_ENABLE */ ++ ++/* ++ * Private functions ++ */ + +/** -+ * kbasep_kinstr_prfcnt_hwcnt_reader_poll() - hwcnt reader's poll. -+ * @filp: Non-NULL pointer to file structure. -+ * @wait: Non-NULL pointer to poll table. ++ * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements ++ * @features: JSn_FEATURE register value + * -+ * Return: EPOLLIN | EPOLLRDNORM if data can be read without blocking, 0 if -+ * data can not be read without blocking, else EPOLLHUP | EPOLLERR. ++ * Given a JSn_FEATURE register value returns the core requirements that match ++ * ++ * Return: Core requirement bit mask + */ -+static __poll_t -+kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp, -+ struct poll_table_struct *wait) ++static base_jd_core_req core_reqs_from_jsn_features(u16 features) +{ -+ struct kbase_kinstr_prfcnt_client *cli; ++ base_jd_core_req core_req = 0u; + -+ if (!filp || !wait) -+ return EPOLLHUP | EPOLLERR; ++ if ((features & JS_FEATURE_SET_VALUE_JOB) != 0) ++ core_req |= BASE_JD_REQ_V; + -+ cli = filp->private_data; ++ if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0) ++ core_req |= BASE_JD_REQ_CF; + -+ if (!cli) -+ return EPOLLHUP | EPOLLERR; ++ if ((features & JS_FEATURE_COMPUTE_JOB) != 0) ++ core_req |= BASE_JD_REQ_CS; + -+ poll_wait(filp, &cli->waitq, wait); ++ if ((features & JS_FEATURE_TILER_JOB) != 0) ++ core_req |= BASE_JD_REQ_T; + -+ if (atomic_read(&cli->write_idx) != atomic_read(&cli->fetch_idx)) -+ return EPOLLIN | EPOLLRDNORM; ++ if ((features & JS_FEATURE_FRAGMENT_JOB) != 0) ++ core_req |= BASE_JD_REQ_FS; + -+ return (__poll_t)0; ++ return core_req; ++} ++ ++static void kbase_js_sync_timers(struct kbase_device *kbdev) ++{ ++ mutex_lock(&kbdev->js_data.runpool_mutex); ++ kbase_backend_ctx_count_changed(kbdev); ++ mutex_unlock(&kbdev->js_data.runpool_mutex); +} + +/** -+ * kbasep_kinstr_prfcnt_next_dump_time_ns() - Calculate the next periodic -+ * dump time. -+ * @cur_ts_ns: Current time in nanoseconds. -+ * @interval: Interval between dumps in nanoseconds. ++ * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms ++ * @kctx: Pointer to kbase context with ring buffer. ++ * @js: Job slot id to check. ++ * @prio: Priority to check. + * -+ * Return: 0 if interval is 0 (i.e. a non-periodic client), or the next dump -+ * time that occurs after cur_ts_ns. ++ * Return true if there are no atoms to pull. There may be running atoms in the ++ * ring buffer even if there are no atoms to pull. It is also possible for the ++ * ring buffer to be full (with running atoms) when this functions returns ++ * true. ++ * ++ * Return: true if there are no atoms to pull, false otherwise. + */ -+static u64 kbasep_kinstr_prfcnt_next_dump_time_ns(u64 cur_ts_ns, u32 interval) ++static inline bool jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, unsigned int js, int prio) +{ -+ /* Non-periodic client */ -+ if (interval == 0) -+ return 0; ++ bool none_to_pull; ++ struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + -+ /* -+ * Return the next interval after the current time relative to t=0. -+ * This means multiple clients with the same period will synchronize, -+ * regardless of when they were started, allowing the worker to be -+ * scheduled less frequently. -+ */ -+ do_div(cur_ts_ns, interval); ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ return (cur_ts_ns + 1) * interval; ++ none_to_pull = RB_EMPTY_ROOT(&rb->runnable_tree); ++ ++ dev_dbg(kctx->kbdev->dev, "Slot %u (prio %d) is %spullable in kctx %pK\n", js, prio, ++ none_to_pull ? "not " : "", kctx); ++ ++ return none_to_pull; +} + +/** -+ * kbasep_kinstr_prfcnt_timestamp_ns() - Get the current time in nanoseconds. ++ * jsctx_rb_none_to_pull(): - Check if all priority ring buffers have no ++ * pullable atoms ++ * @kctx: Pointer to kbase context with ring buffer. ++ * @js: Job slot id to check. + * -+ * Return: Current time in nanoseconds. ++ * Caller must hold hwaccess_lock ++ * ++ * Return: true if the ring buffers for all priorities have no pullable atoms, ++ * false otherwise. + */ -+static u64 kbasep_kinstr_prfcnt_timestamp_ns(void) ++static inline bool jsctx_rb_none_to_pull(struct kbase_context *kctx, unsigned int js) +{ -+ return ktime_get_raw_ns(); ++ int prio; ++ ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ ++ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; ++ prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { ++ if (!jsctx_rb_none_to_pull_prio(kctx, js, prio)) ++ return false; ++ } ++ ++ return true; +} + +/** -+ * kbasep_kinstr_prfcnt_reschedule_worker() - Update next dump times for all -+ * periodic kinstr_prfcnt clients, -+ * then reschedule the dump worker -+ * appropriately. -+ * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context. ++ * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue. ++ * @kctx: Pointer to kbase context with the queue. ++ * @js: Job slot id to iterate. ++ * @prio: Priority id to iterate. ++ * @callback: Function pointer to callback. + * -+ * If there are no periodic clients, then the dump worker will not be -+ * rescheduled. Else, the dump worker will be rescheduled for the next -+ * periodic client dump. ++ * Iterate over a queue and invoke @callback for each entry in the queue, and ++ * remove the entry from the queue. ++ * ++ * If entries are added to the queue while this is running those entries may, or ++ * may not be covered. To ensure that all entries in the buffer have been ++ * enumerated when this function returns jsctx->lock must be held when calling ++ * this function. ++ * ++ * The HW access lock must always be held when calling this function. + */ -+static void kbasep_kinstr_prfcnt_reschedule_worker( -+ struct kbase_kinstr_prfcnt_context *kinstr_ctx) ++static void jsctx_queue_foreach_prio(struct kbase_context *kctx, unsigned int js, int prio, ++ kbasep_js_ctx_job_cb *callback) +{ -+ u64 cur_ts_ns; -+ u64 shortest_period_ns = U64_MAX; -+ struct kbase_kinstr_prfcnt_client *pos; ++ struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + -+ WARN_ON(!kinstr_ctx); -+ lockdep_assert_held(&kinstr_ctx->lock); -+ cur_ts_ns = kbasep_kinstr_prfcnt_timestamp_ns(); ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ /* -+ * This loop fulfills 2 separate tasks that don't affect each other: -+ * -+ * 1) Determine the shortest period. -+ * 2) Update the next dump time of clients that have already been -+ * dumped. It's important not to alter the next dump time of clients -+ * that haven't been dumped yet. -+ * -+ * For the sake of efficiency, the rescheduling decision ignores the time -+ * of the next dump and just uses the shortest period among all periodic -+ * clients. It is more efficient to serve multiple dump requests at once, -+ * rather than trying to reschedule the worker to serve each request -+ * individually. -+ */ -+ list_for_each_entry(pos, &kinstr_ctx->clients, node) { -+ /* Ignore clients that are not periodic or not active. */ -+ if (pos->active && pos->dump_interval_ns > 0) { -+ shortest_period_ns = -+ MIN(shortest_period_ns, pos->dump_interval_ns); ++ while (!RB_EMPTY_ROOT(&queue->runnable_tree)) { ++ struct rb_node *node = rb_first(&queue->runnable_tree); ++ struct kbase_jd_atom *entry = rb_entry(node, ++ struct kbase_jd_atom, runnable_tree_node); + -+ /* Next dump should happen exactly one period after the last dump. -+ * If last dump was overdue and scheduled to happen more than one -+ * period ago, compensate for that by scheduling next dump in the -+ * immediate future. -+ */ -+ if (pos->next_dump_time_ns < cur_ts_ns) -+ pos->next_dump_time_ns = -+ MAX(cur_ts_ns + 1, -+ pos->next_dump_time_ns + -+ pos->dump_interval_ns); -+ } -+ } ++ rb_erase(node, &queue->runnable_tree); ++ callback(kctx->kbdev, entry); + -+ /* Cancel the timer if it is already pending */ -+ hrtimer_cancel(&kinstr_ctx->dump_timer); ++ /* Runnable end-of-renderpass atoms can also be in the linked ++ * list of atoms blocked on cross-slot dependencies. Remove them ++ * to avoid calling the callback twice. ++ */ ++ if (entry->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) { ++ WARN_ON(!(entry->core_req & ++ BASE_JD_REQ_END_RENDERPASS)); ++ dev_dbg(kctx->kbdev->dev, ++ "Del runnable atom %pK from X_DEP list\n", ++ (void *)entry); + -+ /* Start the timer if there are periodic clients and kinstr_prfcnt is not -+ * suspended. -+ */ -+ if ((shortest_period_ns != U64_MAX) && -+ (kinstr_ctx->suspend_count == 0)) { -+ u64 next_schedule_time_ns = -+ kbasep_kinstr_prfcnt_next_dump_time_ns( -+ cur_ts_ns, shortest_period_ns); -+ hrtimer_start(&kinstr_ctx->dump_timer, -+ ns_to_ktime(next_schedule_time_ns - cur_ts_ns), -+ HRTIMER_MODE_REL); ++ list_del(&entry->queue); ++ entry->atom_flags &= ++ ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; ++ } + } -+} -+ -+static enum prfcnt_block_type -+kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(u64 type) -+{ -+ enum prfcnt_block_type block_type; + -+ switch (type) { -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: -+ block_type = PRFCNT_BLOCK_TYPE_FE; -+ break; -+ -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: -+ block_type = PRFCNT_BLOCK_TYPE_TILER; -+ break; ++ while (!list_empty(&queue->x_dep_head)) { ++ struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next, ++ struct kbase_jd_atom, queue); + -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: -+ block_type = PRFCNT_BLOCK_TYPE_SHADER_CORE; -+ break; ++ WARN_ON(!(entry->atom_flags & ++ KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); ++ dev_dbg(kctx->kbdev->dev, ++ "Del blocked atom %pK from X_DEP list\n", ++ (void *)entry); + -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: -+ block_type = PRFCNT_BLOCK_TYPE_MEMORY; -+ break; ++ list_del(queue->x_dep_head.next); ++ entry->atom_flags &= ++ ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; + -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: -+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: -+ default: -+ block_type = PRFCNT_BLOCK_TYPE_RESERVED; -+ break; ++ callback(kctx->kbdev, entry); + } -+ -+ return block_type; +} + -+static bool kbase_kinstr_is_block_type_reserved(const struct kbase_hwcnt_metadata *metadata, -+ size_t grp, size_t blk) ++/** ++ * jsctx_queue_foreach(): - Execute callback for each entry in every queue ++ * @kctx: Pointer to kbase context with queue. ++ * @js: Job slot id to iterate. ++ * @callback: Function pointer to callback. ++ * ++ * Iterate over all the different priorities, and for each call ++ * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback ++ * for each entry, and remove the entry from the queue. ++ */ ++static inline void jsctx_queue_foreach(struct kbase_context *kctx, unsigned int js, ++ kbasep_js_ctx_job_cb *callback) +{ -+ enum prfcnt_block_type block_type = kbase_hwcnt_metadata_block_type_to_prfcnt_block_type( -+ kbase_hwcnt_metadata_block_type(metadata, grp, blk)); ++ int prio; + -+ return block_type == PRFCNT_BLOCK_TYPE_RESERVED; ++ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; ++ prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) ++ jsctx_queue_foreach_prio(kctx, js, prio, callback); +} + +/** -+ * kbasep_kinstr_prfcnt_set_block_meta_items() - Populate a sample's block meta -+ * item array. -+ * @enable_map: Non-NULL pointer to the map of enabled counters. -+ * @dst: Non-NULL pointer to the sample's dump buffer object. -+ * @block_meta_base: Non-NULL double pointer to the start of the block meta -+ * data items. -+ * @base_addr: Address of allocated pages for array of samples. Used -+ * to calculate offset of block values. -+ * @counter_set: The SET which blocks represent. ++ * jsctx_rb_peek_prio(): - Check buffer and get next atom ++ * @kctx: Pointer to kbase context with ring buffer. ++ * @js: Job slot id to check. ++ * @prio: Priority id to check. + * -+ * Return: 0 on success, else error code. ++ * Check the ring buffer for the specified @js and @prio and return a pointer to ++ * the next atom, unless the ring buffer is empty. ++ * ++ * Return: Pointer to next atom in buffer, or NULL if there is no atom. + */ -+int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *enable_map, -+ struct kbase_hwcnt_dump_buffer *dst, -+ struct prfcnt_metadata **block_meta_base, -+ u8 *base_addr, u8 counter_set) ++static inline struct kbase_jd_atom *jsctx_rb_peek_prio(struct kbase_context *kctx, unsigned int js, ++ int prio) +{ -+ size_t grp, blk, blk_inst; -+ struct prfcnt_metadata **ptr_md = block_meta_base; -+ const struct kbase_hwcnt_metadata *metadata; -+ uint8_t block_idx = 0; ++ struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; ++ struct rb_node *node; + -+ if (!dst || !*block_meta_base) -+ return -EINVAL; ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ dev_dbg(kctx->kbdev->dev, "Peeking runnable tree of kctx %pK for prio %d (s:%u)\n", ++ (void *)kctx, prio, js); + -+ metadata = dst->metadata; -+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { -+ u8 *dst_blk; ++ node = rb_first(&rb->runnable_tree); ++ if (!node) { ++ dev_dbg(kctx->kbdev->dev, "Tree is empty\n"); ++ return NULL; ++ } + -+ /* Block indices must be reported with no gaps. */ -+ if (blk_inst == 0) -+ block_idx = 0; ++ return rb_entry(node, struct kbase_jd_atom, runnable_tree_node); ++} + -+ /* Skip unavailable or non-enabled blocks */ -+ if (kbase_kinstr_is_block_type_reserved(metadata, grp, blk) || -+ !kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst) || -+ !kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst)) -+ continue; ++/** ++ * jsctx_rb_peek(): - Check all priority buffers and get next atom ++ * @kctx: Pointer to kbase context with ring buffer. ++ * @js: Job slot id to check. ++ * ++ * Check the ring buffers for all priorities, starting from ++ * KBASE_JS_ATOM_SCHED_PRIO_REALTIME, for the specified @js and @prio and return a ++ * pointer to the next atom, unless all the priority's ring buffers are empty. ++ * ++ * Caller must hold the hwaccess_lock. ++ * ++ * Return: Pointer to next atom in buffer, or NULL if there is no atom. ++ */ ++static inline struct kbase_jd_atom *jsctx_rb_peek(struct kbase_context *kctx, unsigned int js) ++{ ++ int prio; + -+ dst_blk = (u8 *)kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); -+ (*ptr_md)->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_BLOCK; -+ (*ptr_md)->hdr.item_version = PRFCNT_READER_API_VERSION; -+ (*ptr_md)->u.block_md.block_type = -+ kbase_hwcnt_metadata_block_type_to_prfcnt_block_type( -+ kbase_hwcnt_metadata_block_type(metadata, grp, -+ blk)); -+ (*ptr_md)->u.block_md.block_idx = block_idx; -+ (*ptr_md)->u.block_md.set = counter_set; -+ (*ptr_md)->u.block_md.block_state = BLOCK_STATE_UNKNOWN; -+ (*ptr_md)->u.block_md.values_offset = (u32)(dst_blk - base_addr); ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ /* update the buf meta data block pointer to next item */ -+ (*ptr_md)++; -+ block_idx++; ++ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; ++ prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { ++ struct kbase_jd_atom *katom; ++ ++ katom = jsctx_rb_peek_prio(kctx, js, prio); ++ if (katom) ++ return katom; + } + -+ return 0; ++ return NULL; +} + +/** -+ * kbasep_kinstr_prfcnt_set_sample_metadata() - Set sample metadata for sample -+ * output. -+ * @cli: Non-NULL pointer to a kinstr_prfcnt client. -+ * @dump_buf: Non-NULL pointer to dump buffer where sample is stored. -+ * @ptr_md: Non-NULL pointer to sample metadata. ++ * jsctx_rb_pull(): - Mark atom in list as running ++ * @kctx: Pointer to kbase context with ring buffer. ++ * @katom: Pointer to katom to pull. ++ * ++ * Mark an atom previously obtained from jsctx_rb_peek() as running. ++ * ++ * @katom must currently be at the head of the ring buffer. + */ -+static void kbasep_kinstr_prfcnt_set_sample_metadata( -+ struct kbase_kinstr_prfcnt_client *cli, -+ struct kbase_hwcnt_dump_buffer *dump_buf, -+ struct prfcnt_metadata *ptr_md) ++static inline void ++jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ -+ u8 clk_cnt, i; -+ -+ clk_cnt = cli->kinstr_ctx->metadata->clk_cnt; -+ -+ /* PRFCNT_SAMPLE_META_TYPE_SAMPLE must be the first item */ -+ ptr_md->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_SAMPLE; -+ ptr_md->hdr.item_version = PRFCNT_READER_API_VERSION; -+ ptr_md->u.sample_md.seq = atomic_read(&cli->write_idx); -+ ptr_md->u.sample_md.flags = cli->sample_flags; ++ int prio = katom->sched_priority; ++ unsigned int js = katom->slot_nr; ++ struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + -+ /* Place the PRFCNT_SAMPLE_META_TYPE_CLOCK optionally as the 2nd */ -+ ptr_md++; -+ if (clk_cnt > MAX_REPORTED_DOMAINS) -+ clk_cnt = MAX_REPORTED_DOMAINS; ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ /* Handle the prfcnt_clock_metadata meta item */ -+ ptr_md->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_CLOCK; -+ ptr_md->hdr.item_version = PRFCNT_READER_API_VERSION; -+ ptr_md->u.clock_md.num_domains = clk_cnt; -+ for (i = 0; i < clk_cnt; i++) -+ ptr_md->u.clock_md.cycles[i] = dump_buf->clk_cnt_buf[i]; ++ dev_dbg(kctx->kbdev->dev, "Erasing atom %pK from runnable tree of kctx %pK\n", ++ (void *)katom, (void *)kctx); + -+ /* Dealing with counter blocks */ -+ ptr_md++; -+ if (WARN_ON(kbasep_kinstr_prfcnt_set_block_meta_items(&cli->enable_map, dump_buf, &ptr_md, -+ cli->sample_arr.user_buf, -+ cli->config.counter_set))) -+ return; ++ /* Atoms must be pulled in the correct order. */ ++ WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio)); + -+ /* Handle the last sentinel item */ -+ ptr_md->hdr.item_type = FLEX_LIST_TYPE_NONE; -+ ptr_md->hdr.item_version = 0; ++ rb_erase(&katom->runnable_tree_node, &rb->runnable_tree); +} + -+/** -+ * kbasep_kinstr_prfcnt_client_output_sample() - Assemble a sample for output. -+ * @cli: Non-NULL pointer to a kinstr_prfcnt client. -+ * @buf_idx: The index to the sample array for saving the sample. -+ * @user_data: User data to return to the user. -+ * @ts_start_ns: Time stamp for the start point of the sample dump. -+ * @ts_end_ns: Time stamp for the end point of the sample dump. -+ */ -+static void kbasep_kinstr_prfcnt_client_output_sample( -+ struct kbase_kinstr_prfcnt_client *cli, unsigned int buf_idx, -+ u64 user_data, u64 ts_start_ns, u64 ts_end_ns) ++static void ++jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ -+ struct kbase_hwcnt_dump_buffer *dump_buf; -+ struct kbase_hwcnt_dump_buffer *tmp_buf = &cli->tmp_buf; -+ struct prfcnt_metadata *ptr_md; ++ struct kbase_device *kbdev = kctx->kbdev; ++ int prio = katom->sched_priority; ++ unsigned int js = katom->slot_nr; ++ struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; ++ struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL; + -+ if (WARN_ON(buf_idx >= cli->sample_arr.sample_count)) -+ return; ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ dump_buf = &cli->sample_arr.samples[buf_idx].dump_buf; -+ ptr_md = cli->sample_arr.samples[buf_idx].sample_meta; ++ dev_dbg(kbdev->dev, "Adding atom %pK to runnable tree of kctx %pK (s:%u)\n", (void *)katom, ++ (void *)kctx, js); + -+ /* Patch the dump buf headers, to hide the counters that other hwcnt -+ * clients are using. -+ */ -+ kbase_hwcnt_gpu_patch_dump_headers(tmp_buf, &cli->enable_map); ++ while (*new) { ++ struct kbase_jd_atom *entry = container_of(*new, ++ struct kbase_jd_atom, runnable_tree_node); + -+ /* Copy the temp buffer to the userspace visible buffer. The strict -+ * variant will explicitly zero any non-enabled counters to ensure -+ * nothing except exactly what the user asked for is made visible. -+ */ -+ kbase_hwcnt_dump_buffer_copy_strict(dump_buf, tmp_buf, -+ &cli->enable_map); ++ parent = *new; ++ if (kbase_jd_atom_is_younger(katom, entry)) ++ new = &((*new)->rb_left); ++ else ++ new = &((*new)->rb_right); ++ } + -+ /* PRFCNT_SAMPLE_META_TYPE_SAMPLE must be the first item. -+ * Set timestamp and user data for real dump. -+ */ -+ ptr_md->u.sample_md.timestamp_start = ts_start_ns; -+ ptr_md->u.sample_md.timestamp_end = ts_end_ns; -+ ptr_md->u.sample_md.user_data = user_data; ++ /* Add new node and rebalance tree. */ ++ rb_link_node(&katom->runnable_tree_node, parent, new); ++ rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree); + -+ kbasep_kinstr_prfcnt_set_sample_metadata(cli, dump_buf, ptr_md); ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_READY); +} + +/** -+ * kbasep_kinstr_prfcnt_client_dump() - Perform a dump for a client. -+ * @cli: Non-NULL pointer to a kinstr_prfcnt client. -+ * @event_id: Event type that triggered the dump. -+ * @user_data: User data to return to the user. ++ * jsctx_rb_unpull(): - Undo marking of atom in list as running ++ * @kctx: Pointer to kbase context with ring buffer. ++ * @katom: Pointer to katom to unpull. + * -+ * Return: 0 on success, else error code. ++ * Undo jsctx_rb_pull() and put @katom back in the queue. ++ * ++ * jsctx_rb_unpull() must be called on atoms in the same order the atoms were ++ * pulled. + */ -+static int kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli, -+ enum base_hwcnt_reader_event event_id, u64 user_data) ++static inline void ++jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ -+ int ret; -+ u64 ts_start_ns = 0; -+ u64 ts_end_ns = 0; -+ unsigned int write_idx; -+ unsigned int read_idx; -+ size_t available_samples_count; ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ WARN_ON(!cli); -+ lockdep_assert_held(&cli->kinstr_ctx->lock); ++ KBASE_KTRACE_ADD_JM(kctx->kbdev, JS_UNPULL_JOB, kctx, katom, katom->jc, ++ 0u); + -+ write_idx = atomic_read(&cli->write_idx); -+ read_idx = atomic_read(&cli->read_idx); ++ jsctx_tree_add(kctx, katom); ++} + -+ /* Check if there is a place to copy HWC block into. Calculate the -+ * number of available samples count, by taking into account the type -+ * of dump. -+ */ -+ available_samples_count = cli->sample_arr.sample_count; -+ WARN_ON(available_samples_count < 1); -+ /* Reserve one slot to store the implicit sample taken on CMD_STOP */ -+ available_samples_count -= 1; -+ if (write_idx - read_idx == available_samples_count) { -+ /* For periodic sampling, the current active dump -+ * will be accumulated in the next sample, when -+ * a buffer becomes available. ++static bool kbase_js_ctx_pullable(struct kbase_context *kctx, unsigned int js, bool is_scheduled); ++static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, ++ struct kbase_context *kctx, unsigned int js); ++static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, ++ struct kbase_context *kctx, unsigned int js); ++ ++typedef bool(katom_ordering_func)(const struct kbase_jd_atom *, ++ const struct kbase_jd_atom *); ++ ++bool kbase_js_atom_runs_before(struct kbase_device *kbdev, ++ const struct kbase_jd_atom *katom_a, ++ const struct kbase_jd_atom *katom_b, ++ const kbase_atom_ordering_flag_t order_flags) ++{ ++ struct kbase_context *kctx_a = katom_a->kctx; ++ struct kbase_context *kctx_b = katom_b->kctx; ++ katom_ordering_func *samectxatomprio_ordering_func = ++ kbase_jd_atom_is_younger; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ if (order_flags & KBASE_ATOM_ORDERING_FLAG_SEQNR) ++ samectxatomprio_ordering_func = kbase_jd_atom_is_earlier; ++ ++ /* It only makes sense to make this test for atoms on the same slot */ ++ WARN_ON(katom_a->slot_nr != katom_b->slot_nr); ++ ++ if (kbdev->js_ctx_scheduling_mode == ++ KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE) { ++ /* In local priority mode, querying either way around for "a ++ * should run before b" and "b should run before a" should ++ * always be false when they're from different contexts + */ -+ if (event_id == BASE_HWCNT_READER_EVENT_PERIODIC) -+ cli->sample_flags |= SAMPLE_FLAG_OVERFLOW; -+ return -EBUSY; ++ if (kctx_a != kctx_b) ++ return false; ++ } else { ++ /* In system priority mode, ordering is done first strictly by ++ * context priority, even when katom_b might be lower priority ++ * than katom_a. This is due to scheduling of contexts in order ++ * of highest priority first, regardless of whether the atoms ++ * for a particular slot from such contexts have the highest ++ * priority or not. ++ */ ++ if (kctx_a != kctx_b) { ++ if (kctx_a->priority < kctx_b->priority) ++ return true; ++ if (kctx_a->priority > kctx_b->priority) ++ return false; ++ } + } + -+ /* For the rest of the function, use the actual sample_count -+ * that represents the real size of the array. ++ /* For same contexts/contexts with the same context priority (in system ++ * priority mode), ordering is next done by atom priority + */ -+ write_idx %= cli->sample_arr.sample_count; ++ if (katom_a->sched_priority < katom_b->sched_priority) ++ return true; ++ if (katom_a->sched_priority > katom_b->sched_priority) ++ return false; ++ /* For atoms of same priority on the same kctx, they are ++ * ordered by seq_nr/age (dependent on caller) ++ */ ++ if (kctx_a == kctx_b && samectxatomprio_ordering_func(katom_a, katom_b)) ++ return true; + -+ ret = kbase_hwcnt_virtualizer_client_dump(cli->hvcli, &ts_start_ns, &ts_end_ns, -+ &cli->tmp_buf); -+ /* HWC dump error, set the sample with error flag */ -+ if (ret) -+ cli->sample_flags |= SAMPLE_FLAG_ERROR; ++ return false; ++} + -+ /* Make the sample ready and copy it to the userspace mapped buffer */ -+ kbasep_kinstr_prfcnt_client_output_sample(cli, write_idx, user_data, ts_start_ns, -+ ts_end_ns); ++/* ++ * Functions private to KBase ('Protected' functions) ++ */ ++int kbasep_js_devdata_init(struct kbase_device * const kbdev) ++{ ++ struct kbasep_js_device_data *jsdd; ++ int i, j; + -+ /* Notify client. Make sure all changes to memory are visible. */ -+ wmb(); -+ atomic_inc(&cli->write_idx); -+ wake_up_interruptible(&cli->waitq); -+ /* Reset the flags for the next sample dump */ -+ cli->sample_flags = 0; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ return 0; -+} ++ jsdd = &kbdev->js_data; + -+static int -+kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli, -+ u64 user_data) -+{ -+ int ret; -+ u64 tm_start, tm_end; -+ unsigned int write_idx; -+ unsigned int read_idx; -+ size_t available_samples_count; ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ /* Soft-stop will be disabled on a single context by default unless ++ * softstop_always is set ++ */ ++ jsdd->softstop_always = false; ++#endif /* CONFIG_MALI_BIFROST_DEBUG */ ++ jsdd->nr_all_contexts_running = 0; ++ jsdd->nr_user_contexts_running = 0; ++ jsdd->nr_contexts_pullable = 0; ++ atomic_set(&jsdd->nr_contexts_runnable, 0); ++ /* No ctx allowed to submit */ ++ jsdd->runpool_irq.submit_allowed = 0u; ++ memset(jsdd->runpool_irq.ctx_attr_ref_count, 0, ++ sizeof(jsdd->runpool_irq.ctx_attr_ref_count)); ++ memset(jsdd->runpool_irq.slot_affinities, 0, ++ sizeof(jsdd->runpool_irq.slot_affinities)); ++ memset(jsdd->runpool_irq.slot_affinity_refcount, 0, ++ sizeof(jsdd->runpool_irq.slot_affinity_refcount)); ++ INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list); + -+ WARN_ON(!cli); -+ lockdep_assert_held(&cli->cmd_sync_lock); ++ /* Config attributes */ ++ jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS; ++ jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS; ++ jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL; ++ jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS; ++ jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL; ++ jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING; ++ jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS; ++ jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL; + -+ /* If the client is already started, the command is a no-op */ -+ if (cli->active) -+ return 0; ++ jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING; ++ jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS; ++ atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT); ++ jsdd->js_free_wait_time_ms = kbase_get_timeout_ms(kbdev, JM_DEFAULT_JS_FREE_TIMEOUT); + -+ write_idx = atomic_read(&cli->write_idx); -+ read_idx = atomic_read(&cli->read_idx); ++ dev_dbg(kbdev->dev, "JS Config Attribs: "); ++ dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u", ++ jsdd->scheduling_period_ns); ++ dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u", ++ jsdd->soft_stop_ticks); ++ dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u", ++ jsdd->soft_stop_ticks_cl); ++ dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u", ++ jsdd->hard_stop_ticks_ss); ++ dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u", ++ jsdd->hard_stop_ticks_cl); ++ dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u", ++ jsdd->hard_stop_ticks_dumping); ++ dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u", ++ jsdd->gpu_reset_ticks_ss); ++ dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u", ++ jsdd->gpu_reset_ticks_cl); ++ dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u", ++ jsdd->gpu_reset_ticks_dumping); ++ dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u", ++ jsdd->ctx_timeslice_ns); ++ dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i", ++ atomic_read(&jsdd->soft_job_timeout_ms)); ++ dev_dbg(kbdev->dev, "\tjs_free_wait_time_ms:%u", jsdd->js_free_wait_time_ms); + -+ /* Check whether there is space to store atleast an implicit sample -+ * corresponding to CMD_STOP. -+ */ -+ available_samples_count = cli->sample_count - (write_idx - read_idx); -+ if (!available_samples_count) -+ return -EBUSY; ++ if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss && ++ jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss && ++ jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping && ++ jsdd->hard_stop_ticks_dumping < ++ jsdd->gpu_reset_ticks_dumping)) { ++ dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n"); ++ return -EINVAL; ++ } + -+ kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, -+ &cli->config.phys_em); ++#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS ++ dev_dbg(kbdev->dev, "Job Scheduling Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.", ++ jsdd->soft_stop_ticks, ++ jsdd->scheduling_period_ns); ++#endif ++#if KBASE_DISABLE_SCHEDULING_HARD_STOPS ++ dev_dbg(kbdev->dev, "Job Scheduling Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.", ++ jsdd->hard_stop_ticks_ss, ++ jsdd->hard_stop_ticks_dumping, ++ jsdd->scheduling_period_ns); ++#endif ++#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS ++ dev_dbg(kbdev->dev, "Note: The JS tick timer (if coded) will still be run, but do nothing."); ++#endif + -+ /* Enable all the available clk_enable_map. */ -+ cli->enable_map.clk_enable_map = (1ull << cli->kinstr_ctx->metadata->clk_cnt) - 1; ++ for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) ++ jsdd->js_reqs[i] = core_reqs_from_jsn_features( ++ kbdev->gpu_props.props.raw_props.js_features[i]); + -+ mutex_lock(&cli->kinstr_ctx->lock); -+ /* Enable HWC from the configuration of the client creation */ -+ ret = kbase_hwcnt_virtualizer_client_set_counters( -+ cli->hvcli, &cli->enable_map, &tm_start, &tm_end, NULL); ++ /* On error, we could continue on: providing none of the below resources ++ * rely on the ones above ++ */ + -+ if (!ret) { -+ cli->active = true; -+ cli->user_data = user_data; -+ cli->sample_flags = 0; ++ mutex_init(&jsdd->runpool_mutex); ++ mutex_init(&jsdd->queue_mutex); ++ sema_init(&jsdd->schedule_sem, 1); + -+ if (cli->dump_interval_ns) -+ kbasep_kinstr_prfcnt_reschedule_worker(cli->kinstr_ctx); ++ for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) { ++ for (j = KBASE_JS_ATOM_SCHED_PRIO_FIRST; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) { ++ INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i][j]); ++ INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i][j]); ++ } + } + -+ mutex_unlock(&cli->kinstr_ctx->lock); -+ -+ return ret; ++ return 0; +} + -+static int -+kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, -+ u64 user_data) ++void kbasep_js_devdata_halt(struct kbase_device *kbdev) +{ -+ int ret; -+ u64 tm_start = 0; -+ u64 tm_end = 0; -+ struct kbase_hwcnt_physical_enable_map phys_em; -+ size_t available_samples_count; -+ unsigned int write_idx; -+ unsigned int read_idx; ++ CSTD_UNUSED(kbdev); ++} + -+ WARN_ON(!cli); -+ lockdep_assert_held(&cli->cmd_sync_lock); ++void kbasep_js_devdata_term(struct kbase_device *kbdev) ++{ ++ struct kbasep_js_device_data *js_devdata; ++ s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, }; ++ CSTD_UNUSED(js_devdata); + -+ /* If the client is not started, the command is invalid */ -+ if (!cli->active) -+ return -EINVAL; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ mutex_lock(&cli->kinstr_ctx->lock); ++ js_devdata = &kbdev->js_data; + -+ /* Disable counters under the lock, so we do not race with the -+ * sampling thread. ++ /* The caller must de-register all contexts before calling this + */ -+ phys_em.fe_bm = 0; -+ phys_em.tiler_bm = 0; -+ phys_em.mmu_l2_bm = 0; -+ phys_em.shader_bm = 0; ++ KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0); ++ KBASE_DEBUG_ASSERT(memcmp( ++ js_devdata->runpool_irq.ctx_attr_ref_count, ++ zero_ctx_attr_ref_count, ++ sizeof(zero_ctx_attr_ref_count)) == 0); ++ CSTD_UNUSED(zero_ctx_attr_ref_count); ++} + -+ kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &phys_em); ++int kbasep_js_kctx_init(struct kbase_context *const kctx) ++{ ++ struct kbasep_js_kctx_info *js_kctx_info; ++ int i, j; ++ CSTD_UNUSED(js_kctx_info); + -+ /* Check whether one has the buffer to hold the last sample */ -+ write_idx = atomic_read(&cli->write_idx); -+ read_idx = atomic_read(&cli->read_idx); ++ KBASE_DEBUG_ASSERT(kctx != NULL); + -+ available_samples_count = cli->sample_count - (write_idx - read_idx); ++ kbase_ctx_sched_init_ctx(kctx); + -+ ret = kbase_hwcnt_virtualizer_client_set_counters(cli->hvcli, -+ &cli->enable_map, -+ &tm_start, &tm_end, -+ &cli->tmp_buf); -+ /* If the last stop sample is in error, set the sample flag */ -+ if (ret) -+ cli->sample_flags |= SAMPLE_FLAG_ERROR; ++ for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) ++ INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]); + -+ /* There must be a place to save the last stop produced sample */ -+ if (!WARN_ON(!available_samples_count)) { -+ write_idx %= cli->sample_arr.sample_count; -+ /* Handle the last stop sample */ -+ kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, -+ &cli->config.phys_em); -+ /* As this is a stop sample, mark it as MANUAL */ -+ kbasep_kinstr_prfcnt_client_output_sample( -+ cli, write_idx, user_data, tm_start, tm_end); -+ /* Notify client. Make sure all changes to memory are visible. */ -+ wmb(); -+ atomic_inc(&cli->write_idx); -+ wake_up_interruptible(&cli->waitq); -+ } ++ js_kctx_info = &kctx->jctx.sched_info; + -+ cli->active = false; -+ cli->user_data = user_data; ++ kctx->slots_pullable = 0; ++ js_kctx_info->ctx.nr_jobs = 0; ++ kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); ++ kbase_ctx_flag_clear(kctx, KCTX_DYING); ++ memset(js_kctx_info->ctx.ctx_attr_ref_count, 0, ++ sizeof(js_kctx_info->ctx.ctx_attr_ref_count)); + -+ if (cli->dump_interval_ns) -+ kbasep_kinstr_prfcnt_reschedule_worker(cli->kinstr_ctx); ++ /* Initially, the context is disabled from submission until the create ++ * flags are set ++ */ ++ kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED); + -+ mutex_unlock(&cli->kinstr_ctx->lock); ++ /* On error, we could continue on: providing none of the below resources ++ * rely on the ones above ++ */ ++ mutex_init(&js_kctx_info->ctx.jsctx_mutex); ++ ++ init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait); ++ ++ for (i = KBASE_JS_ATOM_SCHED_PRIO_FIRST; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { ++ for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) { ++ INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head); ++ kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT; ++ } ++ } + + return 0; +} + -+static int -+kbasep_kinstr_prfcnt_client_sync_dump(struct kbase_kinstr_prfcnt_client *cli, -+ u64 user_data) ++void kbasep_js_kctx_term(struct kbase_context *kctx) +{ -+ int ret; ++ struct kbase_device *kbdev; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ unsigned int js; ++ bool update_ctx_count = false; ++ unsigned long flags; ++ CSTD_UNUSED(js_kctx_info); + -+ lockdep_assert_held(&cli->cmd_sync_lock); ++ KBASE_DEBUG_ASSERT(kctx != NULL); + -+ /* If the client is not started, or not manual, the command invalid */ -+ if (!cli->active || cli->dump_interval_ns) -+ return -EINVAL; ++ kbdev = kctx->kbdev; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ mutex_lock(&cli->kinstr_ctx->lock); ++ js_kctx_info = &kctx->jctx.sched_info; + -+ ret = kbasep_kinstr_prfcnt_client_dump(cli, BASE_HWCNT_READER_EVENT_MANUAL, user_data); ++ /* The caller must de-register all jobs before calling this */ ++ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0); + -+ mutex_unlock(&cli->kinstr_ctx->lock); ++ mutex_lock(&kbdev->js_data.queue_mutex); ++ mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + -+ return ret; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) ++ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) { ++ WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0); ++ atomic_dec(&kbdev->js_data.nr_contexts_runnable); ++ update_ctx_count = true; ++ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); ++ } ++ ++ mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); ++ mutex_unlock(&kbdev->js_data.queue_mutex); ++ ++ if (update_ctx_count) { ++ mutex_lock(&kbdev->js_data.runpool_mutex); ++ kbase_backend_ctx_count_changed(kbdev); ++ mutex_unlock(&kbdev->js_data.runpool_mutex); ++ } ++ ++ kbase_ctx_sched_remove_ctx(kctx); +} + -+static int -+kbasep_kinstr_prfcnt_client_discard(struct kbase_kinstr_prfcnt_client *cli) ++/* ++ * Priority blocking management functions ++ */ ++ ++/* Should not normally use directly - use kbase_jsctx_slot_atom_pulled_dec() instead */ ++static void kbase_jsctx_slot_prio_blocked_clear(struct kbase_context *kctx, unsigned int js, ++ int sched_prio) +{ -+ unsigned int write_idx; ++ struct kbase_jsctx_slot_tracking *slot_tracking = ++ &kctx->slot_tracking[js]; + -+ WARN_ON(!cli); -+ lockdep_assert_held(&cli->cmd_sync_lock); ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ mutex_lock(&cli->kinstr_ctx->lock); ++ slot_tracking->blocked &= ~(((kbase_js_prio_bitmap_t)1) << sched_prio); ++ KBASE_KTRACE_ADD_JM_SLOT_INFO(kctx->kbdev, JS_SLOT_PRIO_UNBLOCKED, kctx, ++ NULL, 0, js, (unsigned int)sched_prio); ++} + -+ write_idx = atomic_read(&cli->write_idx); ++static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, unsigned int js) ++{ ++ return atomic_read(&kctx->slot_tracking[js].atoms_pulled); ++} + -+ /* Discard (clear) all internally buffered samples. Note, if there -+ * is a fetched sample in flight, one should not touch the read index, -+ * leaving it alone for the put-sample operation to update it. The -+ * consistency between the read_idx and the fetch_idx is coordinated by -+ * holding the cli->cmd_sync_lock. -+ */ -+ if (atomic_read(&cli->fetch_idx) != atomic_read(&cli->read_idx)) { -+ atomic_set(&cli->fetch_idx, write_idx); -+ } else { -+ atomic_set(&cli->fetch_idx, write_idx); -+ atomic_set(&cli->read_idx, write_idx); -+ } ++/* ++ * A priority level on a slot is blocked when: ++ * - that priority level is blocked ++ * - or, any higher priority level is blocked ++ */ ++static bool kbase_jsctx_slot_prio_is_blocked(struct kbase_context *kctx, unsigned int js, ++ int sched_prio) ++{ ++ struct kbase_jsctx_slot_tracking *slot_tracking = ++ &kctx->slot_tracking[js]; ++ kbase_js_prio_bitmap_t prio_bit, higher_prios_mask; + -+ mutex_unlock(&cli->kinstr_ctx->lock); ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ return 0; ++ /* done in two separate shifts to prevent future undefined behavior ++ * should the number of priority levels == (bit width of the type) ++ */ ++ prio_bit = (((kbase_js_prio_bitmap_t)1) << sched_prio); ++ /* all bits of sched_prio or higher, with sched_prio = 0 being the ++ * highest priority ++ */ ++ higher_prios_mask = (prio_bit << 1) - 1u; ++ return (slot_tracking->blocked & higher_prios_mask) != 0u; +} + -+int kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client *cli, -+ struct prfcnt_control_cmd *control_cmd) ++/** ++ * kbase_jsctx_slot_atom_pulled_inc - Increase counts of atoms that have being ++ * pulled for a slot from a ctx, based on ++ * this atom ++ * @kctx: kbase context ++ * @katom: atom pulled ++ * ++ * Manages counts of atoms pulled (including per-priority-level counts), for ++ * later determining when a ctx can become unblocked on a slot. ++ * ++ * Once a slot has been blocked at @katom's priority level, it should not be ++ * pulled from, hence this function should not be called in that case. ++ * ++ * The return value is to aid tracking of when @kctx becomes runnable. ++ * ++ * Return: new total count of atoms pulled from all slots on @kctx ++ */ ++static int kbase_jsctx_slot_atom_pulled_inc(struct kbase_context *kctx, ++ const struct kbase_jd_atom *katom) +{ -+ int ret = 0; ++ unsigned int js = katom->slot_nr; ++ int sched_prio = katom->sched_priority; ++ struct kbase_jsctx_slot_tracking *slot_tracking = ++ &kctx->slot_tracking[js]; ++ int nr_atoms_pulled; + -+ mutex_lock(&cli->cmd_sync_lock); ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ switch (control_cmd->cmd) { -+ case PRFCNT_CONTROL_CMD_START: -+ ret = kbasep_kinstr_prfcnt_client_start(cli, -+ control_cmd->user_data); -+ break; -+ case PRFCNT_CONTROL_CMD_STOP: -+ ret = kbasep_kinstr_prfcnt_client_stop(cli, -+ control_cmd->user_data); -+ break; -+ case PRFCNT_CONTROL_CMD_SAMPLE_SYNC: -+ ret = kbasep_kinstr_prfcnt_client_sync_dump( -+ cli, control_cmd->user_data); -+ break; -+ case PRFCNT_CONTROL_CMD_DISCARD: -+ ret = kbasep_kinstr_prfcnt_client_discard(cli); -+ break; -+ default: -+ ret = -EINVAL; -+ break; -+ } ++ WARN(kbase_jsctx_slot_prio_is_blocked(kctx, js, sched_prio), ++ "Should not have pulled atoms for slot %u from a context that is blocked at priority %d or higher", ++ js, sched_prio); + -+ mutex_unlock(&cli->cmd_sync_lock); ++ nr_atoms_pulled = atomic_inc_return(&kctx->atoms_pulled_all_slots); ++ atomic_inc(&slot_tracking->atoms_pulled); ++ slot_tracking->atoms_pulled_pri[sched_prio]++; + -+ return ret; ++ return nr_atoms_pulled; +} + -+static int -+kbasep_kinstr_prfcnt_get_sample(struct kbase_kinstr_prfcnt_client *cli, -+ struct prfcnt_sample_access *sample_access) ++/** ++ * kbase_jsctx_slot_atom_pulled_dec- Decrease counts of atoms that have being ++ * pulled for a slot from a ctx, and ++ * re-evaluate whether a context is blocked ++ * on this slot ++ * @kctx: kbase context ++ * @katom: atom that has just been removed from a job slot ++ * ++ * @kctx can become unblocked on a slot for a priority level when it no longer ++ * has any pulled atoms at that priority level on that slot, and all higher ++ * (numerically lower) priority levels are also unblocked @kctx on that ++ * slot. The latter condition is to retain priority ordering within @kctx. ++ * ++ * Return: true if the slot was previously blocked but has now become unblocked ++ * at @katom's priority level, false otherwise. ++ */ ++static bool kbase_jsctx_slot_atom_pulled_dec(struct kbase_context *kctx, ++ const struct kbase_jd_atom *katom) +{ -+ unsigned int write_idx; -+ unsigned int read_idx; -+ unsigned int fetch_idx; -+ u64 sample_offset_bytes; -+ struct prfcnt_metadata *sample_meta; -+ int err = 0; ++ unsigned int js = katom->slot_nr; ++ int sched_prio = katom->sched_priority; ++ int atoms_pulled_pri; ++ struct kbase_jsctx_slot_tracking *slot_tracking = ++ &kctx->slot_tracking[js]; ++ bool slot_prio_became_unblocked = false; + -+ mutex_lock(&cli->cmd_sync_lock); -+ write_idx = atomic_read(&cli->write_idx); -+ read_idx = atomic_read(&cli->read_idx); ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ if (write_idx == read_idx) { -+ err = -EINVAL; -+ goto error_out; -+ } ++ atomic_dec(&kctx->atoms_pulled_all_slots); ++ atomic_dec(&slot_tracking->atoms_pulled); + -+ /* If the client interface has already had a sample been fetched, -+ * reflected by the fetch index not equal to read_idx, i.e., typically -+ * read_idx + 1 == fetch_idx, -+ * further fetch is not allowed until the previously fetched buffer -+ * is put back (which brings the read_idx == fetch_idx). As a design, -+ * the above add one equal condition (i.e. typical cases) may only be -+ * untrue if there had been an interface operation on sample discard, -+ * after the sample in question already been fetched, in which case, -+ * the fetch_idx could have a delta larger than 1 relative to the -+ * read_idx. -+ */ -+ fetch_idx = atomic_read(&cli->fetch_idx); -+ if (read_idx != fetch_idx) { -+ err = -EBUSY; -+ goto error_out; -+ } ++ atoms_pulled_pri = --(slot_tracking->atoms_pulled_pri[sched_prio]); + -+ read_idx %= cli->sample_arr.sample_count; -+ sample_meta = cli->sample_arr.samples[read_idx].sample_meta; -+ sample_offset_bytes = (u8 *)sample_meta - cli->sample_arr.user_buf; ++ /* We can safely clear this priority level's blocked status even if ++ * higher priority levels are still blocked: a subsequent query to ++ * kbase_jsctx_slot_prio_is_blocked() will still return true ++ */ ++ if (!atoms_pulled_pri && ++ kbase_jsctx_slot_prio_is_blocked(kctx, js, sched_prio)) { ++ kbase_jsctx_slot_prio_blocked_clear(kctx, js, sched_prio); + -+ sample_access->sequence = sample_meta->u.sample_md.seq; -+ sample_access->sample_offset_bytes = sample_offset_bytes; ++ if (!kbase_jsctx_slot_prio_is_blocked(kctx, js, sched_prio)) ++ slot_prio_became_unblocked = true; ++ } + -+ /* Marking a sample has been fetched by advancing the fetch index */ -+ atomic_inc(&cli->fetch_idx); ++ if (slot_prio_became_unblocked) ++ KBASE_KTRACE_ADD_JM_SLOT_INFO(kctx->kbdev, ++ JS_SLOT_PRIO_AND_HIGHER_UNBLOCKED, ++ kctx, katom, katom->jc, js, ++ (unsigned int)sched_prio); + -+error_out: -+ mutex_unlock(&cli->cmd_sync_lock); -+ return err; ++ return slot_prio_became_unblocked; +} + -+static int -+kbasep_kinstr_prfcnt_put_sample(struct kbase_kinstr_prfcnt_client *cli, -+ struct prfcnt_sample_access *sample_access) ++/** ++ * kbase_js_ctx_list_add_pullable_nolock - Variant of ++ * kbase_jd_ctx_list_add_pullable() ++ * where the caller must hold ++ * hwaccess_lock ++ * @kbdev: Device pointer ++ * @kctx: Context to add to queue ++ * @js: Job slot to use ++ * ++ * Caller must hold hwaccess_lock ++ * ++ * Return: true if caller should call kbase_backend_ctx_count_changed() ++ */ ++static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, ++ struct kbase_context *kctx, unsigned int js) +{ -+ unsigned int write_idx; -+ unsigned int read_idx; -+ unsigned int fetch_idx; -+ u64 sample_offset_bytes; -+ int err = 0; ++ bool ret = false; + -+ mutex_lock(&cli->cmd_sync_lock); -+ write_idx = atomic_read(&cli->write_idx); -+ read_idx = atomic_read(&cli->read_idx); ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ dev_dbg(kbdev->dev, "Add pullable tail kctx %pK (s:%u)\n", (void *)kctx, js); + -+ if (write_idx == read_idx || sample_access->sequence != read_idx) { -+ err = -EINVAL; -+ goto error_out; -+ } ++ if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) ++ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + -+ read_idx %= cli->sample_arr.sample_count; -+ sample_offset_bytes = -+ (u8 *)cli->sample_arr.samples[read_idx].sample_meta - cli->sample_arr.user_buf; ++ list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], ++ &kbdev->js_data.ctx_list_pullable[js][kctx->priority]); + -+ if (sample_access->sample_offset_bytes != sample_offset_bytes) { -+ err = -EINVAL; -+ goto error_out; ++ if (!kctx->slots_pullable) { ++ kbdev->js_data.nr_contexts_pullable++; ++ ret = true; ++ if (!kbase_jsctx_atoms_pulled(kctx)) { ++ WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); ++ kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); ++ atomic_inc(&kbdev->js_data.nr_contexts_runnable); ++ } + } ++ kctx->slots_pullable |= (1 << js); + -+ fetch_idx = atomic_read(&cli->fetch_idx); -+ WARN_ON(read_idx == fetch_idx); -+ /* Setting the read_idx matching the fetch_idx, signals no in-flight -+ * fetched sample. -+ */ -+ atomic_set(&cli->read_idx, fetch_idx); -+ -+error_out: -+ mutex_unlock(&cli->cmd_sync_lock); -+ return err; ++ return ret; +} + +/** -+ * kbasep_kinstr_prfcnt_hwcnt_reader_ioctl() - hwcnt reader's ioctl. -+ * @filp: Non-NULL pointer to file structure. -+ * @cmd: User command. -+ * @arg: Command's argument. ++ * kbase_js_ctx_list_add_pullable_head_nolock - Variant of ++ * kbase_js_ctx_list_add_pullable_head() ++ * where the caller must hold ++ * hwaccess_lock ++ * @kbdev: Device pointer ++ * @kctx: Context to add to queue ++ * @js: Job slot to use + * -+ * Return: 0 on success, else error code. ++ * Caller must hold hwaccess_lock ++ * ++ * Return: true if caller should call kbase_backend_ctx_count_changed() + */ -+static long kbasep_kinstr_prfcnt_hwcnt_reader_ioctl(struct file *filp, -+ unsigned int cmd, -+ unsigned long arg) ++static bool kbase_js_ctx_list_add_pullable_head_nolock(struct kbase_device *kbdev, ++ struct kbase_context *kctx, unsigned int js) +{ -+ long rcode = 0; -+ struct kbase_kinstr_prfcnt_client *cli; -+ void __user *uarg = (void __user *)arg; -+ -+ if (!filp) -+ return -EINVAL; -+ -+ cli = filp->private_data; -+ -+ if (!cli) -+ return -EINVAL; ++ bool ret = false; + -+ switch (_IOC_NR(cmd)) { -+ case _IOC_NR(KBASE_IOCTL_KINSTR_PRFCNT_CMD): { -+ struct prfcnt_control_cmd control_cmd; -+ int err; ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ dev_dbg(kbdev->dev, "Add pullable head kctx %pK (s:%u)\n", (void *)kctx, js); + -+ err = copy_from_user(&control_cmd, uarg, sizeof(control_cmd)); -+ if (err) -+ return -EFAULT; -+ rcode = kbasep_kinstr_prfcnt_cmd(cli, &control_cmd); -+ } break; -+ case _IOC_NR(KBASE_IOCTL_KINSTR_PRFCNT_GET_SAMPLE): { -+ struct prfcnt_sample_access sample_access; -+ int err; ++ if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) ++ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + -+ memset(&sample_access, 0, sizeof(sample_access)); -+ rcode = kbasep_kinstr_prfcnt_get_sample(cli, &sample_access); -+ err = copy_to_user(uarg, &sample_access, sizeof(sample_access)); -+ if (err) -+ return -EFAULT; -+ } break; -+ case _IOC_NR(KBASE_IOCTL_KINSTR_PRFCNT_PUT_SAMPLE): { -+ struct prfcnt_sample_access sample_access; -+ int err; ++ list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], ++ &kbdev->js_data.ctx_list_pullable[js][kctx->priority]); + -+ err = copy_from_user(&sample_access, uarg, -+ sizeof(sample_access)); -+ if (err) -+ return -EFAULT; -+ rcode = kbasep_kinstr_prfcnt_put_sample(cli, &sample_access); -+ } break; -+ default: -+ rcode = -EINVAL; -+ break; ++ if (!kctx->slots_pullable) { ++ kbdev->js_data.nr_contexts_pullable++; ++ ret = true; ++ if (!kbase_jsctx_atoms_pulled(kctx)) { ++ WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); ++ kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); ++ atomic_inc(&kbdev->js_data.nr_contexts_runnable); ++ } + } ++ kctx->slots_pullable |= (1 << js); + -+ return rcode; ++ return ret; +} + +/** -+ * kbasep_kinstr_prfcnt_hwcnt_reader_mmap() - hwcnt reader's mmap. -+ * @filp: Non-NULL pointer to file structure. -+ * @vma: Non-NULL pointer to vma structure. ++ * kbase_js_ctx_list_add_pullable_head - Add context to the head of the ++ * per-slot pullable context queue ++ * @kbdev: Device pointer ++ * @kctx: Context to add to queue ++ * @js: Job slot to use + * -+ * Return: 0 on success, else error code. ++ * If the context is on either the pullable or unpullable queues, then it is ++ * removed before being added to the head. ++ * ++ * This function should be used when a context has been scheduled, but no jobs ++ * can currently be pulled from it. ++ * ++ * Return: true if caller should call kbase_backend_ctx_count_changed() + */ -+static int kbasep_kinstr_prfcnt_hwcnt_reader_mmap(struct file *filp, -+ struct vm_area_struct *vma) ++static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, ++ struct kbase_context *kctx, unsigned int js) +{ -+ struct kbase_kinstr_prfcnt_client *cli; -+ -+ if (!filp || !vma) -+ return -EINVAL; ++ bool ret; ++ unsigned long flags; + -+ cli = filp->private_data; -+ if (!cli) -+ return -EINVAL; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ return remap_vmalloc_range(vma, cli->sample_arr.user_buf, 0); ++ return ret; +} + -+static void kbasep_kinstr_prfcnt_sample_array_free( -+ struct kbase_kinstr_prfcnt_sample_array *sample_arr) ++/** ++ * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the ++ * per-slot unpullable context queue ++ * @kbdev: Device pointer ++ * @kctx: Context to add to queue ++ * @js: Job slot to use ++ * ++ * The context must already be on the per-slot pullable queue. It will be ++ * removed from the pullable queue before being added to the unpullable queue. ++ * ++ * This function should be used when a context has been pulled from, and there ++ * are no jobs remaining on the specified slot. ++ * ++ * Caller must hold hwaccess_lock ++ * ++ * Return: true if caller should call kbase_backend_ctx_count_changed() ++ */ ++static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, ++ struct kbase_context *kctx, unsigned int js) +{ -+ if (!sample_arr) -+ return; ++ bool ret = false; + -+ kfree(sample_arr->samples); -+ vfree(sample_arr->user_buf); -+ memset(sample_arr, 0, sizeof(*sample_arr)); -+} ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ dev_dbg(kbdev->dev, "Add unpullable tail kctx %pK (s:%u)\n", (void *)kctx, js); + -+static void -+kbasep_kinstr_prfcnt_client_destroy_partial(struct kbase_kinstr_prfcnt_client *cli, -+ enum kbase_kinstr_prfcnt_client_init_state init_state) -+{ -+ if (!cli) -+ return; ++ list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], ++ &kbdev->js_data.ctx_list_unpullable[js][kctx->priority]); + -+ while (init_state-- > KINSTR_PRFCNT_UNINITIALISED) { -+ switch (init_state) { -+ case KINSTR_PRFCNT_INITIALISED: -+ /* This shouldn't be reached */ -+ break; -+ case KINSTR_PRFCNT_WAITQ_MUTEX: -+ mutex_destroy(&cli->cmd_sync_lock); -+ break; -+ case KINSTR_PRFCNT_VIRTUALIZER_CLIENT: -+ kbase_hwcnt_virtualizer_client_destroy(cli->hvcli); -+ break; -+ case KINSTR_PRFCNT_SAMPLE_ARRAY: -+ kbasep_kinstr_prfcnt_sample_array_free(&cli->sample_arr); -+ break; -+ case KINSTR_PRFCNT_DUMP_BUFFER: -+ kbase_hwcnt_dump_buffer_free(&cli->tmp_buf); -+ break; -+ case KINSTR_PRFCNT_ENABLE_MAP: -+ kbase_hwcnt_enable_map_free(&cli->enable_map); -+ break; -+ case KINSTR_PRFCNT_PARSE_SETUP: -+ /* Nothing to do here */ -+ break; ++ if (kctx->slots_pullable == (1 << js)) { ++ kbdev->js_data.nr_contexts_pullable--; ++ ret = true; ++ if (!kbase_jsctx_atoms_pulled(kctx)) { ++ WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); ++ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); ++ atomic_dec(&kbdev->js_data.nr_contexts_runnable); + } + } -+ kfree(cli); -+} ++ kctx->slots_pullable &= ~(1 << js); + -+void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli) -+{ -+ kbasep_kinstr_prfcnt_client_destroy_partial(cli, KINSTR_PRFCNT_INITIALISED); ++ return ret; +} + +/** -+ * kbasep_kinstr_prfcnt_hwcnt_reader_release() - hwcnt reader's release. -+ * @inode: Non-NULL pointer to inode structure. -+ * @filp: Non-NULL pointer to file structure. ++ * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable ++ * or unpullable context queues ++ * @kbdev: Device pointer ++ * @kctx: Context to remove from queue ++ * @js: Job slot to use + * -+ * Return: 0 always. ++ * The context must already be on one of the queues. ++ * ++ * This function should be used when a context has no jobs on the GPU, and no ++ * jobs remaining for the specified slot. ++ * ++ * Caller must hold hwaccess_lock ++ * ++ * Return: true if caller should call kbase_backend_ctx_count_changed() + */ -+static int kbasep_kinstr_prfcnt_hwcnt_reader_release(struct inode *inode, -+ struct file *filp) ++static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, struct kbase_context *kctx, ++ unsigned int js) +{ -+ struct kbase_kinstr_prfcnt_client *cli = filp->private_data; ++ bool ret = false; + -+ mutex_lock(&cli->kinstr_ctx->lock); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ WARN_ON(cli->kinstr_ctx->client_count == 0); -+ if (cli->kinstr_ctx->client_count > 0) -+ cli->kinstr_ctx->client_count--; -+ list_del(&cli->node); ++ WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])); + -+ mutex_unlock(&cli->kinstr_ctx->lock); ++ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + -+ kbasep_kinstr_prfcnt_client_destroy(cli); ++ if (kctx->slots_pullable == (1 << js)) { ++ kbdev->js_data.nr_contexts_pullable--; ++ ret = true; ++ if (!kbase_jsctx_atoms_pulled(kctx)) { ++ WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); ++ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); ++ atomic_dec(&kbdev->js_data.nr_contexts_runnable); ++ } ++ } ++ kctx->slots_pullable &= ~(1 << js); + -+ return 0; ++ return ret; +} + -+/* kinstr_prfcnt client file operations */ -+static const struct file_operations kinstr_prfcnt_client_fops = { -+ .owner = THIS_MODULE, -+ .poll = kbasep_kinstr_prfcnt_hwcnt_reader_poll, -+ .unlocked_ioctl = kbasep_kinstr_prfcnt_hwcnt_reader_ioctl, -+ .compat_ioctl = kbasep_kinstr_prfcnt_hwcnt_reader_ioctl, -+ .mmap = kbasep_kinstr_prfcnt_hwcnt_reader_mmap, -+ .release = kbasep_kinstr_prfcnt_hwcnt_reader_release, -+}; -+ -+size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadata *metadata, -+ struct kbase_hwcnt_enable_map *enable_map) ++/** ++ * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head() ++ * where the caller must hold ++ * hwaccess_lock ++ * @kbdev: Device pointer ++ * @js: Job slot to use ++ * ++ * Caller must hold hwaccess_lock ++ * ++ * Return: Context to use for specified slot. ++ * NULL if no contexts present for specified slot ++ */ ++static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(struct kbase_device *kbdev, ++ unsigned int js) +{ -+ size_t grp, blk, blk_inst; -+ size_t md_count = 0; ++ struct kbase_context *kctx; ++ int i; + -+ if (!metadata) -+ return 0; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { -+ /* Skip unavailable, non-enabled or reserved blocks */ -+ if (kbase_kinstr_is_block_type_reserved(metadata, grp, blk) || -+ !kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst) || -+ !kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst)) ++ for (i = KBASE_JS_ATOM_SCHED_PRIO_FIRST; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { ++ if (list_empty(&kbdev->js_data.ctx_list_pullable[js][i])) + continue; + -+ md_count++; -+ } -+ -+ /* add counts for clock_meta and sample meta, respectively */ -+ md_count += 2; -+ -+ /* Reserve one for last sentinel item. */ -+ md_count++; ++ kctx = list_entry(kbdev->js_data.ctx_list_pullable[js][i].next, ++ struct kbase_context, ++ jctx.sched_info.ctx.ctx_list_entry[js]); + -+ return md_count; ++ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); ++ dev_dbg(kbdev->dev, "Popped %pK from the pullable queue (s:%u)\n", (void *)kctx, ++ js); ++ return kctx; ++ } ++ return NULL; +} + -+static size_t kbasep_kinstr_prfcnt_get_sample_size(struct kbase_kinstr_prfcnt_client *cli, -+ const struct kbase_hwcnt_metadata *metadata) ++/** ++ * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable ++ * queue. ++ * @kbdev: Device pointer ++ * @js: Job slot to use ++ * ++ * Return: Context to use for specified slot. ++ * NULL if no contexts present for specified slot ++ */ ++static struct kbase_context *kbase_js_ctx_list_pop_head(struct kbase_device *kbdev, unsigned int js) +{ -+ size_t dump_buf_bytes; -+ size_t clk_cnt_buf_bytes; -+ size_t sample_meta_bytes; -+ struct kbase_hwcnt_dump_buffer *dump_buf = &cli->tmp_buf; -+ size_t md_count = kbasep_kinstr_prfcnt_get_sample_md_count(metadata, &cli->enable_map); -+ -+ if (!metadata) -+ return 0; ++ struct kbase_context *kctx; ++ unsigned long flags; + -+ sample_meta_bytes = sizeof(struct prfcnt_metadata) * md_count; -+ dump_buf_bytes = metadata->dump_buf_bytes; -+ clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ return (sample_meta_bytes + dump_buf_bytes + clk_cnt_buf_bytes); ++ return kctx; +} + +/** -+ * kbasep_kinstr_prfcnt_dump_worker()- Dump worker, that dumps all periodic -+ * clients that need to be dumped, then -+ * reschedules itself. -+ * @work: Work structure. ++ * kbase_js_ctx_pullable - Return if a context can be pulled from on the ++ * specified slot ++ * @kctx: Context pointer ++ * @js: Job slot to use ++ * @is_scheduled: true if the context is currently scheduled ++ * ++ * Caller must hold hwaccess_lock ++ * ++ * Return: true if context can be pulled from on specified slot ++ * false otherwise + */ -+static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work) ++static bool kbase_js_ctx_pullable(struct kbase_context *kctx, unsigned int js, bool is_scheduled) +{ -+ struct kbase_kinstr_prfcnt_context *kinstr_ctx = container_of( -+ work, struct kbase_kinstr_prfcnt_context, dump_work); -+ struct kbase_kinstr_prfcnt_client *pos; -+ u64 cur_time_ns; ++ struct kbasep_js_device_data *js_devdata; ++ struct kbase_jd_atom *katom; ++ struct kbase_device *kbdev = kctx->kbdev; + -+ mutex_lock(&kinstr_ctx->lock); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ cur_time_ns = kbasep_kinstr_prfcnt_timestamp_ns(); ++ js_devdata = &kbdev->js_data; + -+ list_for_each_entry(pos, &kinstr_ctx->clients, node) { -+ if (pos->active && (pos->next_dump_time_ns != 0) && -+ (pos->next_dump_time_ns < cur_time_ns)) -+ kbasep_kinstr_prfcnt_client_dump(pos, BASE_HWCNT_READER_EVENT_PERIODIC, -+ pos->user_data); ++ if (is_scheduled) { ++ if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { ++ dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n", ++ (void *)kctx); ++ return false; ++ } ++ } ++ katom = jsctx_rb_peek(kctx, js); ++ if (!katom) { ++ dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%u)\n", (void *)kctx, js); ++ return false; /* No pullable atoms */ ++ } ++ if (kbase_jsctx_slot_prio_is_blocked(kctx, js, katom->sched_priority)) { ++ KBASE_KTRACE_ADD_JM_SLOT_INFO( ++ kctx->kbdev, JS_SLOT_PRIO_IS_BLOCKED, kctx, katom, ++ katom->jc, js, (unsigned int)katom->sched_priority); ++ dev_dbg(kbdev->dev, ++ "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%u)\n", ++ (void *)kctx, katom->sched_priority, js); ++ return false; ++ } ++ if (atomic_read(&katom->blocked)) { ++ dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_ctx_pullable\n", ++ (void *)katom); ++ return false; /* next atom blocked */ ++ } ++ if (kbase_js_atom_blocked_on_x_dep(katom)) { ++ if (katom->x_pre_dep->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || ++ katom->x_pre_dep->will_fail_event_code) { ++ dev_dbg(kbdev->dev, ++ "JS: X pre-dep %pK is not present in slot FIFO or will fail\n", ++ (void *)katom->x_pre_dep); ++ return false; ++ } ++ if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && ++ kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) { ++ dev_dbg(kbdev->dev, ++ "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%u)\n", ++ (void *)katom, js); ++ return false; ++ } + } + -+ kbasep_kinstr_prfcnt_reschedule_worker(kinstr_ctx); ++ dev_dbg(kbdev->dev, "JS: Atom %pK is pullable in kctx %pK (s:%u)\n", (void *)katom, ++ (void *)kctx, js); + -+ mutex_unlock(&kinstr_ctx->lock); ++ return true; +} + -+/** -+ * kbasep_kinstr_prfcnt_dump_timer() - Dump timer that schedules the dump worker for -+ * execution as soon as possible. -+ * @timer: Timer structure. -+ * -+ * Return: HRTIMER_NORESTART always. -+ */ -+static enum hrtimer_restart -+kbasep_kinstr_prfcnt_dump_timer(struct hrtimer *timer) ++static bool kbase_js_dep_validate(struct kbase_context *kctx, ++ struct kbase_jd_atom *katom) +{ -+ struct kbase_kinstr_prfcnt_context *kinstr_ctx = container_of( -+ timer, struct kbase_kinstr_prfcnt_context, dump_timer); ++ struct kbase_device *kbdev = kctx->kbdev; ++ bool ret = true; ++ bool has_dep = false, has_x_dep = false; ++ unsigned int js = kbase_js_get_slot(kbdev, katom); ++ int prio = katom->sched_priority; ++ int i; + -+ /* We don't need to check kinstr_ctx->suspend_count here. -+ * Suspend and resume functions already ensure that the worker -+ * is cancelled when the driver is suspended, and resumed when -+ * the suspend_count reaches 0. -+ */ -+ kbase_hwcnt_virtualizer_queue_work(kinstr_ctx->hvirt, -+ &kinstr_ctx->dump_work); ++ for (i = 0; i < 2; i++) { ++ struct kbase_jd_atom *dep_atom = katom->dep[i].atom; + -+ return HRTIMER_NORESTART; -+} ++ if (dep_atom) { ++ unsigned int dep_js = kbase_js_get_slot(kbdev, dep_atom); ++ int dep_prio = dep_atom->sched_priority; + -+int kbase_kinstr_prfcnt_init(struct kbase_hwcnt_virtualizer *hvirt, -+ struct kbase_kinstr_prfcnt_context **out_kinstr_ctx) -+{ -+ struct kbase_kinstr_prfcnt_context *kinstr_ctx; -+ const struct kbase_hwcnt_metadata *metadata; ++ dev_dbg(kbdev->dev, ++ "Checking dep %d of atom %pK (s:%d) on %pK (s:%d)\n", ++ i, (void *)katom, js, (void *)dep_atom, dep_js); + -+ if (!hvirt || !out_kinstr_ctx) -+ return -EINVAL; ++ /* Dependent atom must already have been submitted */ ++ if (!(dep_atom->atom_flags & ++ KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { ++ dev_dbg(kbdev->dev, ++ "Blocker not submitted yet\n"); ++ ret = false; ++ break; ++ } + -+ metadata = kbase_hwcnt_virtualizer_metadata(hvirt); ++ /* Dependencies with different priorities can't ++ * be represented in the ringbuffer ++ */ ++ if (prio != dep_prio) { ++ dev_dbg(kbdev->dev, ++ "Different atom priorities\n"); ++ ret = false; ++ break; ++ } + -+ if (!metadata) -+ return -EINVAL; ++ if (js == dep_js) { ++ /* Only one same-slot dependency can be ++ * represented in the ringbuffer ++ */ ++ if (has_dep) { ++ dev_dbg(kbdev->dev, ++ "Too many same-slot deps\n"); ++ ret = false; ++ break; ++ } ++ /* Each dependee atom can only have one ++ * same-slot dependency ++ */ ++ if (dep_atom->post_dep) { ++ dev_dbg(kbdev->dev, ++ "Too many same-slot successors\n"); ++ ret = false; ++ break; ++ } ++ has_dep = true; ++ } else { ++ /* Only one cross-slot dependency can be ++ * represented in the ringbuffer ++ */ ++ if (has_x_dep) { ++ dev_dbg(kbdev->dev, ++ "Too many cross-slot deps\n"); ++ ret = false; ++ break; ++ } ++ /* Each dependee atom can only have one ++ * cross-slot dependency ++ */ ++ if (dep_atom->x_post_dep) { ++ dev_dbg(kbdev->dev, ++ "Too many cross-slot successors\n"); ++ ret = false; ++ break; ++ } ++ /* The dependee atom can not already be in the ++ * HW access ringbuffer ++ */ ++ if (dep_atom->gpu_rb_state != ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { ++ dev_dbg(kbdev->dev, ++ "Blocker already in ringbuffer (state:%d)\n", ++ dep_atom->gpu_rb_state); ++ ret = false; ++ break; ++ } ++ /* The dependee atom can not already have ++ * completed ++ */ ++ if (dep_atom->status != ++ KBASE_JD_ATOM_STATE_IN_JS) { ++ dev_dbg(kbdev->dev, ++ "Blocker already completed (status:%d)\n", ++ dep_atom->status); ++ ret = false; ++ break; ++ } + -+ kinstr_ctx = kzalloc(sizeof(*kinstr_ctx), GFP_KERNEL); ++ has_x_dep = true; ++ } + -+ if (!kinstr_ctx) -+ return -ENOMEM; ++ /* Dependency can be represented in ringbuffers */ ++ } ++ } + -+ kinstr_ctx->hvirt = hvirt; -+ kinstr_ctx->metadata = metadata; ++ /* If dependencies can be represented by ringbuffer then clear them from ++ * atom structure ++ */ ++ if (ret) { ++ for (i = 0; i < 2; i++) { ++ struct kbase_jd_atom *dep_atom = katom->dep[i].atom; + -+ mutex_init(&kinstr_ctx->lock); -+ INIT_LIST_HEAD(&kinstr_ctx->clients); -+ hrtimer_init(&kinstr_ctx->dump_timer, CLOCK_MONOTONIC, -+ HRTIMER_MODE_REL); -+ kinstr_ctx->dump_timer.function = kbasep_kinstr_prfcnt_dump_timer; -+ INIT_WORK(&kinstr_ctx->dump_work, kbasep_kinstr_prfcnt_dump_worker); ++ if (dep_atom) { ++ int dep_js = kbase_js_get_slot(kbdev, dep_atom); + -+ *out_kinstr_ctx = kinstr_ctx; -+ return 0; -+} ++ dev_dbg(kbdev->dev, ++ "Clearing dep %d of atom %pK (s:%d) on %pK (s:%d)\n", ++ i, (void *)katom, js, (void *)dep_atom, ++ dep_js); + -+void kbase_kinstr_prfcnt_term(struct kbase_kinstr_prfcnt_context *kinstr_ctx) -+{ -+ if (!kinstr_ctx) -+ return; ++ if ((js != dep_js) && ++ (dep_atom->status != ++ KBASE_JD_ATOM_STATE_COMPLETED) ++ && (dep_atom->status != ++ KBASE_JD_ATOM_STATE_HW_COMPLETED) ++ && (dep_atom->status != ++ KBASE_JD_ATOM_STATE_UNUSED)) { + -+ /* Non-zero client count implies client leak */ -+ if (WARN_ON(kinstr_ctx->client_count > 0)) { -+ struct kbase_kinstr_prfcnt_client *pos, *n; ++ katom->atom_flags |= ++ KBASE_KATOM_FLAG_X_DEP_BLOCKED; + -+ list_for_each_entry_safe (pos, n, &kinstr_ctx->clients, node) { -+ list_del(&pos->node); -+ kinstr_ctx->client_count--; -+ kbasep_kinstr_prfcnt_client_destroy(pos); ++ dev_dbg(kbdev->dev, "Set X_DEP flag on atom %pK\n", ++ (void *)katom); ++ ++ katom->x_pre_dep = dep_atom; ++ dep_atom->x_post_dep = katom; ++ if (kbase_jd_katom_dep_type( ++ &katom->dep[i]) == ++ BASE_JD_DEP_TYPE_DATA) ++ katom->atom_flags |= ++ KBASE_KATOM_FLAG_FAIL_BLOCKER; ++ } ++ if ((kbase_jd_katom_dep_type(&katom->dep[i]) ++ == BASE_JD_DEP_TYPE_DATA) && ++ (js == dep_js)) { ++ katom->pre_dep = dep_atom; ++ dep_atom->post_dep = katom; ++ } ++ ++ list_del(&katom->dep_item[i]); ++ kbase_jd_katom_dep_clear(&katom->dep[i]); ++ } + } ++ } else { ++ dev_dbg(kbdev->dev, ++ "Deps of atom %pK (s:%d) could not be represented\n", ++ (void *)katom, js); + } + -+ cancel_work_sync(&kinstr_ctx->dump_work); -+ -+ WARN_ON(kinstr_ctx->client_count > 0); -+ kfree(kinstr_ctx); ++ return ret; +} + -+void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx) ++void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority) +{ -+ if (WARN_ON(!kinstr_ctx)) -+ return; -+ -+ mutex_lock(&kinstr_ctx->lock); ++ struct kbase_device *kbdev = kctx->kbdev; ++ unsigned int js; + -+ if (!WARN_ON(kinstr_ctx->suspend_count == SIZE_MAX)) -+ kinstr_ctx->suspend_count++; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ mutex_unlock(&kinstr_ctx->lock); ++ /* Move kctx to the pullable/upullable list as per the new priority */ ++ if (new_priority != kctx->priority) { ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ if (kctx->slots_pullable & (1 << js)) ++ list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], ++ &kbdev->js_data.ctx_list_pullable[js][new_priority]); ++ else ++ list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], ++ &kbdev->js_data.ctx_list_unpullable[js][new_priority]); ++ } + -+ /* Always sync cancel the timer and then the worker, regardless of the -+ * new suspend count. -+ * -+ * This ensures concurrent calls to kbase_kinstr_prfcnt_suspend() always block -+ * until kinstr_prfcnt is fully suspended. -+ * -+ * The timer is canceled before the worker, as the timer -+ * unconditionally re-enqueues the worker, but the worker checks the -+ * suspend_count that we just incremented before rescheduling the timer. -+ * -+ * Therefore if we cancel the worker first, the timer might re-enqueue -+ * the worker before we cancel the timer, but the opposite is not -+ * possible. -+ */ -+ hrtimer_cancel(&kinstr_ctx->dump_timer); -+ cancel_work_sync(&kinstr_ctx->dump_work); ++ kctx->priority = new_priority; ++ } +} + -+void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx) ++void kbase_js_update_ctx_priority(struct kbase_context *kctx) +{ -+ if (WARN_ON(!kinstr_ctx)) -+ return; -+ -+ mutex_lock(&kinstr_ctx->lock); ++ struct kbase_device *kbdev = kctx->kbdev; ++ int new_priority = KBASE_JS_ATOM_SCHED_PRIO_LOW; ++ int prio; + -+ if (!WARN_ON(kinstr_ctx->suspend_count == 0)) { -+ kinstr_ctx->suspend_count--; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* Last resume, so re-enqueue the worker if we have any periodic -+ * clients. ++ if (kbdev->js_ctx_scheduling_mode == KBASE_JS_SYSTEM_PRIORITY_MODE) { ++ /* Determine the new priority for context, as per the priority ++ * of currently in-use atoms. + */ -+ if (kinstr_ctx->suspend_count == 0) { -+ struct kbase_kinstr_prfcnt_client *pos; -+ bool has_periodic_clients = false; -+ -+ list_for_each_entry (pos, &kinstr_ctx->clients, node) { -+ if (pos->dump_interval_ns != 0) { -+ has_periodic_clients = true; -+ break; -+ } ++ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; ++ prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { ++ if (kctx->atoms_count[prio]) { ++ new_priority = prio; ++ break; + } -+ -+ if (has_periodic_clients) -+ kbase_hwcnt_virtualizer_queue_work( -+ kinstr_ctx->hvirt, -+ &kinstr_ctx->dump_work); + } + } + -+ mutex_unlock(&kinstr_ctx->lock); ++ kbase_js_set_ctx_priority(kctx, new_priority); +} ++KBASE_EXPORT_TEST_API(kbase_js_update_ctx_priority); + -+static int kbasep_kinstr_prfcnt_sample_array_alloc(struct kbase_kinstr_prfcnt_client *cli, -+ const struct kbase_hwcnt_metadata *metadata) ++/** ++ * js_add_start_rp() - Add an atom that starts a renderpass to the job scheduler ++ * @start_katom: Pointer to the atom to be added. ++ * Return: 0 if successful or a negative value on failure. ++ */ ++static int js_add_start_rp(struct kbase_jd_atom *const start_katom) +{ -+ struct kbase_kinstr_prfcnt_sample_array *sample_arr = &cli->sample_arr; -+ struct kbase_kinstr_prfcnt_sample *samples; -+ size_t sample_idx; -+ size_t dump_buf_bytes; -+ size_t clk_cnt_buf_bytes; -+ size_t sample_meta_bytes; -+ size_t md_count; -+ size_t sample_size; -+ size_t buffer_count = cli->config.buffer_count; ++ struct kbase_context *const kctx = start_katom->kctx; ++ struct kbase_jd_renderpass *rp; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ unsigned long flags; + -+ if (!metadata || !sample_arr) ++ lockdep_assert_held(&kctx->jctx.lock); ++ ++ if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) + return -EINVAL; + -+ md_count = kbasep_kinstr_prfcnt_get_sample_md_count(metadata, &cli->enable_map); -+ sample_meta_bytes = sizeof(struct prfcnt_metadata) * md_count; -+ dump_buf_bytes = metadata->dump_buf_bytes; -+ clk_cnt_buf_bytes = -+ sizeof(*samples->dump_buf.clk_cnt_buf) * metadata->clk_cnt; -+ sample_size = sample_meta_bytes + dump_buf_bytes + clk_cnt_buf_bytes; ++ if (start_katom->core_req & BASE_JD_REQ_END_RENDERPASS) ++ return -EINVAL; + -+ samples = kmalloc_array(buffer_count, sizeof(*samples), GFP_KERNEL); ++ compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= ++ ARRAY_SIZE(kctx->jctx.renderpasses), ++ "Should check invalid access to renderpasses"); + -+ if (!samples) -+ return -ENOMEM; ++ rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; + -+ sample_arr->user_buf = vmalloc_user(sample_size * buffer_count); ++ if (rp->state != KBASE_JD_RP_COMPLETE) ++ return -EINVAL; + -+ if (!sample_arr->user_buf) { -+ kfree(samples); -+ return -ENOMEM; -+ } ++ dev_dbg(kctx->kbdev->dev, "JS add start atom %pK of RP %d\n", ++ (void *)start_katom, start_katom->renderpass_id); + -+ sample_arr->sample_count = buffer_count; -+ sample_arr->samples = samples; ++ /* The following members are read when updating the job slot ++ * ringbuffer/fifo therefore they require additional locking. ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ for (sample_idx = 0; sample_idx < buffer_count; sample_idx++) { -+ const size_t sample_meta_offset = sample_size * sample_idx; -+ const size_t dump_buf_offset = -+ sample_meta_offset + sample_meta_bytes; -+ const size_t clk_cnt_buf_offset = -+ dump_buf_offset + dump_buf_bytes; ++ rp->state = KBASE_JD_RP_START; ++ rp->start_katom = start_katom; ++ rp->end_katom = NULL; ++ INIT_LIST_HEAD(&rp->oom_reg_list); + -+ /* Internal layout in a sample buffer: [sample metadata, dump_buf, clk_cnt_buf]. */ -+ samples[sample_idx].dump_buf.metadata = metadata; -+ samples[sample_idx].sample_meta = -+ (struct prfcnt_metadata *)(sample_arr->user_buf + sample_meta_offset); -+ samples[sample_idx].dump_buf.dump_buf = -+ (u64 *)(sample_arr->user_buf + dump_buf_offset); -+ samples[sample_idx].dump_buf.clk_cnt_buf = -+ (u64 *)(sample_arr->user_buf + clk_cnt_buf_offset); -+ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return 0; +} + -+static bool prfcnt_mode_supported(u8 mode) -+{ -+ return (mode == PRFCNT_MODE_MANUAL) || (mode == PRFCNT_MODE_PERIODIC); -+} -+ -+static void -+kbasep_kinstr_prfcnt_block_enable_to_physical(uint32_t *phys_em, -+ const uint64_t *enable_mask) -+{ -+ *phys_em |= kbase_hwcnt_backend_gpu_block_map_to_physical( -+ enable_mask[0], enable_mask[1]); -+} -+ +/** -+ * kbasep_kinstr_prfcnt_parse_request_enable - Parse an enable request -+ * @req_enable: Performance counters enable request to parse. -+ * @config: Client object the session configuration should be written to. -+ * -+ * This function parses a performance counters enable request. -+ * This type of request specifies a bitmask of HW counters to enable -+ * for one performance counters block type. In addition to that, -+ * a performance counters enable request may also set "global" -+ * configuration properties that affect the whole session, like the -+ * performance counters set, which shall be compatible with the same value -+ * set by other performance request items. -+ * -+ * Return: 0 on success, else error code. ++ * js_add_end_rp() - Add an atom that ends a renderpass to the job scheduler ++ * @end_katom: Pointer to the atom to be added. ++ * Return: 0 if successful or a negative value on failure. + */ -+static int kbasep_kinstr_prfcnt_parse_request_enable( -+ const struct prfcnt_request_enable *req_enable, -+ struct kbase_kinstr_prfcnt_client_config *config) ++static int js_add_end_rp(struct kbase_jd_atom *const end_katom) +{ -+ int err = 0; -+ u8 req_set = KBASE_HWCNT_SET_UNDEFINED, default_set; ++ struct kbase_context *const kctx = end_katom->kctx; ++ struct kbase_jd_renderpass *rp; ++ struct kbase_device *const kbdev = kctx->kbdev; + -+ switch (req_enable->set) { -+ case PRFCNT_SET_PRIMARY: -+ req_set = KBASE_HWCNT_SET_PRIMARY; -+ break; -+ case PRFCNT_SET_SECONDARY: -+ req_set = KBASE_HWCNT_SET_SECONDARY; -+ break; -+ case PRFCNT_SET_TERTIARY: -+ req_set = KBASE_HWCNT_SET_TERTIARY; -+ break; -+ default: -+ err = -EINVAL; -+ break; -+ } ++ lockdep_assert_held(&kctx->jctx.lock); + -+ /* The performance counter set is a "global" property that affects -+ * the whole session. Either this is the first request that sets -+ * the value, or it shall be identical to all previous requests. -+ */ -+ if (!err) { -+ if (config->counter_set == KBASE_HWCNT_SET_UNDEFINED) -+ config->counter_set = req_set; -+ else if (config->counter_set != req_set) -+ err = -EINVAL; -+ } ++ if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) ++ return -EINVAL; + -+ /* Temporarily, the requested set cannot be different from the default -+ * set because it's the only one to be supported. This will change in -+ * the future. -+ */ -+#if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY) -+ default_set = KBASE_HWCNT_SET_SECONDARY; -+#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) -+ default_set = KBASE_HWCNT_SET_TERTIARY; -+#else -+ /* Default to primary */ -+ default_set = KBASE_HWCNT_SET_PRIMARY; -+#endif ++ if (end_katom->core_req & BASE_JD_REQ_START_RENDERPASS) ++ return -EINVAL; + -+ if (req_set != default_set) -+ err = -EINVAL; ++ compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= ++ ARRAY_SIZE(kctx->jctx.renderpasses), ++ "Should check invalid access to renderpasses"); + -+ if (err < 0) -+ return err; ++ rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + -+ /* Enable the performance counters based on the bitmask provided -+ * by the user space client. -+ * It is possible to receive multiple requests for the same counter -+ * block, in which case the bitmask will be a logical OR of all the -+ * bitmasks given by the client. -+ */ -+ switch (req_enable->block_type) { -+ case PRFCNT_BLOCK_TYPE_FE: -+ kbasep_kinstr_prfcnt_block_enable_to_physical( -+ &config->phys_em.fe_bm, req_enable->enable_mask); -+ break; -+ case PRFCNT_BLOCK_TYPE_TILER: -+ kbasep_kinstr_prfcnt_block_enable_to_physical( -+ &config->phys_em.tiler_bm, req_enable->enable_mask); -+ break; -+ case PRFCNT_BLOCK_TYPE_MEMORY: -+ kbasep_kinstr_prfcnt_block_enable_to_physical( -+ &config->phys_em.mmu_l2_bm, req_enable->enable_mask); -+ break; -+ case PRFCNT_BLOCK_TYPE_SHADER_CORE: -+ kbasep_kinstr_prfcnt_block_enable_to_physical( -+ &config->phys_em.shader_bm, req_enable->enable_mask); -+ break; -+ default: -+ err = -EINVAL; -+ break; -+ } ++ dev_dbg(kbdev->dev, "JS add end atom %pK in state %d of RP %d\n", ++ (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + -+ return err; -+} ++ if (rp->state == KBASE_JD_RP_COMPLETE) ++ return -EINVAL; + -+/** -+ * kbasep_kinstr_prfcnt_parse_request_scope - Parse a scope request -+ * @req_scope: Performance counters scope request to parse. -+ * @config: Client object the session configuration should be written to. -+ * -+ * This function parses a performance counters scope request. -+ * There are only 2 acceptable outcomes: either the client leaves the scope -+ * as undefined, or all the scope requests are set to the same value. -+ * -+ * Return: 0 on success, else error code. -+ */ -+static int kbasep_kinstr_prfcnt_parse_request_scope( -+ const struct prfcnt_request_scope *req_scope, -+ struct kbase_kinstr_prfcnt_client_config *config) -+{ -+ int err = 0; ++ if (rp->end_katom == NULL) { ++ /* We can't be in a retry state until the fragment job chain ++ * has completed. ++ */ ++ unsigned long flags; + -+ if (config->scope == PRFCNT_SCOPE_RESERVED) -+ config->scope = req_scope->scope; -+ else if (config->scope != req_scope->scope) -+ err = -EINVAL; ++ WARN_ON(rp->state == KBASE_JD_RP_RETRY); ++ WARN_ON(rp->state == KBASE_JD_RP_RETRY_PEND_OOM); ++ WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM); + -+ return err; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ rp->end_katom = end_katom; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } else ++ WARN_ON(rp->end_katom != end_katom); ++ ++ return 0; +} + -+/** -+ * kbasep_kinstr_prfcnt_parse_setup - Parse session setup -+ * @kinstr_ctx: Pointer to the kinstr_prfcnt context. -+ * @setup: Session setup information to parse. -+ * @config: Client object the session configuration should be written to. -+ * @req_arr: Pointer to array of request items for client session. -+ * -+ * This function parses the list of "request" items sent by the user space -+ * client, and writes the configuration for the new client to be created -+ * for the session. -+ * -+ * Return: 0 on success, else error code. -+ */ -+static int kbasep_kinstr_prfcnt_parse_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx, -+ union kbase_ioctl_kinstr_prfcnt_setup *setup, -+ struct kbase_kinstr_prfcnt_client_config *config, -+ struct prfcnt_request_item *req_arr) ++bool kbasep_js_add_job(struct kbase_context *kctx, ++ struct kbase_jd_atom *atom) +{ -+ uint32_t i; -+ unsigned int item_count = setup->in.request_item_count; ++ unsigned long flags; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ struct kbase_device *kbdev; ++ struct kbasep_js_device_data *js_devdata; + int err = 0; + -+ if (req_arr[item_count - 1].hdr.item_type != FLEX_LIST_TYPE_NONE || -+ req_arr[item_count - 1].hdr.item_version != 0) { -+ return -EINVAL; -+ } -+ -+ /* The session configuration can only feature one value for some -+ * properties (like capture mode, block counter set and scope), but the -+ * client may potential issue multiple requests and try to set more than -+ * one value for those properties. While issuing multiple requests for the -+ * same property is allowed by the protocol, asking for different values -+ * is illegal. Leaving these properties as undefined is illegal, too. -+ */ -+ config->prfcnt_mode = PRFCNT_MODE_RESERVED; -+ config->counter_set = KBASE_HWCNT_SET_UNDEFINED; -+ config->scope = PRFCNT_SCOPE_RESERVED; -+ -+ for (i = 0; i < item_count - 1; i++) { -+ if (req_arr[i].hdr.item_version > PRFCNT_READER_API_VERSION) { -+ err = -EINVAL; -+ break; -+ } -+ -+ switch (req_arr[i].hdr.item_type) { -+ /* Capture mode is initialized as undefined. -+ * The first request of this type sets the capture mode. -+ * The protocol allows the client to send redundant requests, -+ * but only if they replicate the same value that has already -+ * been set by the first request. -+ */ -+ case PRFCNT_REQUEST_TYPE_MODE: -+ if (!prfcnt_mode_supported(req_arr[i].u.req_mode.mode)) -+ err = -EINVAL; -+ else if (config->prfcnt_mode == PRFCNT_MODE_RESERVED) -+ config->prfcnt_mode = -+ req_arr[i].u.req_mode.mode; -+ else if (req_arr[i].u.req_mode.mode != -+ config->prfcnt_mode) -+ err = -EINVAL; -+ -+ if (err < 0) -+ break; -+ -+ if (config->prfcnt_mode == PRFCNT_MODE_PERIODIC) { -+ config->period_ns = -+ req_arr[i] -+ .u.req_mode.mode_config.periodic -+ .period_ns; -+ -+ if ((config->period_ns != 0) && -+ (config->period_ns < -+ DUMP_INTERVAL_MIN_NS)) { -+ config->period_ns = -+ DUMP_INTERVAL_MIN_NS; -+ } ++ bool enqueue_required = false; ++ bool timer_sync = false; + -+ if (config->period_ns == 0) -+ err = -EINVAL; -+ } -+ break; ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(atom != NULL); ++ lockdep_assert_held(&kctx->jctx.lock); + -+ case PRFCNT_REQUEST_TYPE_ENABLE: -+ err = kbasep_kinstr_prfcnt_parse_request_enable( -+ &req_arr[i].u.req_enable, config); -+ break; ++ kbdev = kctx->kbdev; ++ js_devdata = &kbdev->js_data; ++ js_kctx_info = &kctx->jctx.sched_info; + -+ case PRFCNT_REQUEST_TYPE_SCOPE: -+ err = kbasep_kinstr_prfcnt_parse_request_scope( -+ &req_arr[i].u.req_scope, config); -+ break; ++ mutex_lock(&js_devdata->queue_mutex); ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + -+ default: -+ err = -EINVAL; -+ break; -+ } ++ if (atom->core_req & BASE_JD_REQ_START_RENDERPASS) ++ err = js_add_start_rp(atom); ++ else if (atom->core_req & BASE_JD_REQ_END_RENDERPASS) ++ err = js_add_end_rp(atom); + -+ if (err < 0) -+ break; ++ if (err < 0) { ++ atom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ atom->status = KBASE_JD_ATOM_STATE_COMPLETED; ++ goto out_unlock; + } + -+ if (!err) { -+ /* Verify that properties (like capture mode and block counter -+ * set) have been defined by the user space client. -+ */ -+ if (config->prfcnt_mode == PRFCNT_MODE_RESERVED) -+ err = -EINVAL; -+ -+ if (config->counter_set == KBASE_HWCNT_SET_UNDEFINED) -+ err = -EINVAL; -+ } ++ /* ++ * Begin Runpool transaction ++ */ ++ mutex_lock(&js_devdata->runpool_mutex); + -+ return err; -+} ++ /* Refcount ctx.nr_jobs */ ++ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX); ++ ++(js_kctx_info->ctx.nr_jobs); ++ dev_dbg(kbdev->dev, "Add atom %pK to kctx %pK; now %d in ctx\n", ++ (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); + -+int kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context *kinstr_ctx, -+ union kbase_ioctl_kinstr_prfcnt_setup *setup, -+ struct kbase_kinstr_prfcnt_client **out_vcli, -+ struct prfcnt_request_item *req_arr) -+{ -+ int err; -+ struct kbase_kinstr_prfcnt_client *cli; -+ enum kbase_kinstr_prfcnt_client_init_state init_state; ++ /* Lock for state available during IRQ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ if (WARN_ON(!kinstr_ctx)) -+ return -EINVAL; ++ if (++kctx->atoms_count[atom->sched_priority] == 1) ++ kbase_js_update_ctx_priority(kctx); + -+ if (WARN_ON(!setup)) -+ return -EINVAL; ++ if (!kbase_js_dep_validate(kctx, atom)) { ++ /* Dependencies could not be represented */ ++ --(js_kctx_info->ctx.nr_jobs); ++ dev_dbg(kbdev->dev, ++ "Remove atom %pK from kctx %pK; now %d in ctx\n", ++ (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); + -+ if (WARN_ON(!req_arr)) -+ return -EINVAL; ++ /* Setting atom status back to queued as it still has unresolved ++ * dependencies ++ */ ++ atom->status = KBASE_JD_ATOM_STATE_QUEUED; ++ dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)atom); + -+ cli = kzalloc(sizeof(*cli), GFP_KERNEL); ++ /* Undo the count, as the atom will get added again later but ++ * leave the context priority adjusted or boosted, in case if ++ * this was the first higher priority atom received for this ++ * context. ++ * This will prevent the scenario of priority inversion, where ++ * another context having medium priority atoms keeps getting ++ * scheduled over this context, which is having both lower and ++ * higher priority atoms, but higher priority atoms are blocked ++ * due to dependency on lower priority atoms. With priority ++ * boost the high priority atom will get to run at earliest. ++ */ ++ kctx->atoms_count[atom->sched_priority]--; + -+ if (!cli) -+ return -ENOMEM; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&js_devdata->runpool_mutex); + -+ for (init_state = KINSTR_PRFCNT_UNINITIALISED; init_state < KINSTR_PRFCNT_INITIALISED; -+ init_state++) { -+ err = 0; -+ switch (init_state) { -+ case KINSTR_PRFCNT_PARSE_SETUP: -+ cli->kinstr_ctx = kinstr_ctx; -+ err = kbasep_kinstr_prfcnt_parse_setup(kinstr_ctx, setup, &cli->config, -+ req_arr); ++ goto out_unlock; ++ } + -+ break; ++ enqueue_required = kbase_js_dep_resolved_submit(kctx, atom); + -+ case KINSTR_PRFCNT_ENABLE_MAP: -+ cli->config.buffer_count = MAX_BUFFER_COUNT; -+ cli->dump_interval_ns = cli->config.period_ns; -+ cli->next_dump_time_ns = 0; -+ cli->active = false; -+ atomic_set(&cli->write_idx, 0); -+ atomic_set(&cli->read_idx, 0); -+ atomic_set(&cli->fetch_idx, 0); ++ KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc, ++ kbase_ktrace_get_ctx_refcnt(kctx)); + -+ err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata, &cli->enable_map); -+ break; ++ /* Context Attribute Refcounting */ ++ kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom); + -+ case KINSTR_PRFCNT_DUMP_BUFFER: -+ kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, -+ &cli->config.phys_em); ++ if (enqueue_required) { ++ if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false)) ++ timer_sync = kbase_js_ctx_list_add_pullable_nolock( ++ kbdev, kctx, atom->slot_nr); ++ else ++ timer_sync = kbase_js_ctx_list_add_unpullable_nolock( ++ kbdev, kctx, atom->slot_nr); ++ } ++ /* If this context is active and the atom is the first on its slot, ++ * kick the job manager to attempt to fast-start the atom ++ */ ++ if (enqueue_required && kctx == ++ kbdev->hwaccess.active_kctx[atom->slot_nr]) ++ kbase_jm_try_kick(kbdev, 1 << atom->slot_nr); + -+ cli->sample_count = cli->config.buffer_count; -+ cli->sample_size = -+ kbasep_kinstr_prfcnt_get_sample_size(cli, kinstr_ctx->metadata); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (timer_sync) ++ kbase_backend_ctx_count_changed(kbdev); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ /* End runpool transaction */ + -+ /* Use virtualizer's metadata to alloc tmp buffer which interacts with -+ * the HWC virtualizer. ++ if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { ++ if (kbase_ctx_flag(kctx, KCTX_DYING)) { ++ /* A job got added while/after kbase_job_zap_context() ++ * was called on a non-scheduled context. Kill that job ++ * by killing the context. + */ -+ err = kbase_hwcnt_dump_buffer_alloc(kinstr_ctx->metadata, &cli->tmp_buf); -+ break; -+ -+ case KINSTR_PRFCNT_SAMPLE_ARRAY: -+ /* Disable clock map in setup, and enable clock map when start */ -+ cli->enable_map.clk_enable_map = 0; -+ -+ /* Use metadata from virtualizer to allocate dump buffers if -+ * kinstr_prfcnt doesn't have the truncated metadata. ++ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, ++ false); ++ } else if (js_kctx_info->ctx.nr_jobs == 1) { ++ /* Handle Refcount going from 0 to 1: schedule the ++ * context on the Queue + */ -+ err = kbasep_kinstr_prfcnt_sample_array_alloc(cli, kinstr_ctx->metadata); -+ -+ break; ++ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++ dev_dbg(kbdev->dev, "JS: Enqueue Context %pK", kctx); + -+ case KINSTR_PRFCNT_VIRTUALIZER_CLIENT: -+ /* Set enable map to be 0 to prevent virtualizer to init and kick the -+ * backend to count. ++ /* Queue was updated - caller must try to schedule the ++ * head context + */ -+ kbase_hwcnt_gpu_enable_map_from_physical( -+ &cli->enable_map, &(struct kbase_hwcnt_physical_enable_map){ 0 }); -+ -+ err = kbase_hwcnt_virtualizer_client_create(kinstr_ctx->hvirt, -+ &cli->enable_map, &cli->hvcli); -+ break; -+ -+ case KINSTR_PRFCNT_WAITQ_MUTEX: -+ init_waitqueue_head(&cli->waitq); -+ mutex_init(&cli->cmd_sync_lock); -+ break; -+ -+ case KINSTR_PRFCNT_INITIALISED: -+ /* This shouldn't be reached */ -+ break; -+ } -+ -+ if (err < 0) { -+ kbasep_kinstr_prfcnt_client_destroy_partial(cli, init_state); -+ return err; ++ WARN_ON(!enqueue_required); + } + } -+ *out_vcli = cli; ++out_unlock: ++ dev_dbg(kbdev->dev, "Enqueue of kctx %pK is %srequired\n", ++ kctx, enqueue_required ? "" : "not "); + -+ return 0; ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ ++ mutex_unlock(&js_devdata->queue_mutex); + ++ return enqueue_required; +} + -+static size_t kbasep_kinstr_prfcnt_get_block_info_count( -+ const struct kbase_hwcnt_metadata *metadata) ++void kbasep_js_remove_job(struct kbase_device *kbdev, ++ struct kbase_context *kctx, struct kbase_jd_atom *atom) +{ -+ size_t grp, blk; -+ size_t block_info_count = 0; -+ -+ if (!metadata) -+ return 0; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ unsigned long flags; + -+ for (grp = 0; grp < kbase_hwcnt_metadata_group_count(metadata); grp++) { -+ for (blk = 0; blk < kbase_hwcnt_metadata_block_count(metadata, grp); blk++) { -+ if (!kbase_kinstr_is_block_type_reserved(metadata, grp, blk)) -+ block_info_count++; -+ } -+ } ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(atom != NULL); + -+ return block_info_count; -+} ++ js_kctx_info = &kctx->jctx.sched_info; + -+static void kbasep_kinstr_prfcnt_get_request_info_list( -+ struct prfcnt_enum_item *item_arr, size_t *arr_idx) -+{ -+ memcpy(&item_arr[*arr_idx], kinstr_prfcnt_supported_requests, -+ sizeof(kinstr_prfcnt_supported_requests)); -+ *arr_idx += ARRAY_SIZE(kinstr_prfcnt_supported_requests); -+} ++ KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc, ++ kbase_ktrace_get_ctx_refcnt(kctx)); + -+static void kbasep_kinstr_prfcnt_get_sample_info_item(const struct kbase_hwcnt_metadata *metadata, -+ struct prfcnt_enum_item *item_arr, -+ size_t *arr_idx) -+{ -+ struct prfcnt_enum_item sample_info = { -+ .hdr = { -+ .item_type = PRFCNT_ENUM_TYPE_SAMPLE_INFO, -+ .item_version = PRFCNT_READER_API_VERSION, -+ }, -+ .u.sample_info = { -+ .num_clock_domains = metadata->clk_cnt, -+ }, -+ }; ++ /* De-refcount ctx.nr_jobs */ ++ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0); ++ --(js_kctx_info->ctx.nr_jobs); ++ dev_dbg(kbdev->dev, ++ "Remove atom %pK from kctx %pK; now %d in ctx\n", ++ (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); + -+ item_arr[*arr_idx] = sample_info; -+ *arr_idx += 1; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (--kctx->atoms_count[atom->sched_priority] == 0) ++ kbase_js_update_ctx_priority(kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + -+int kbasep_kinstr_prfcnt_get_block_info_list(const struct kbase_hwcnt_metadata *metadata, -+ size_t block_set, struct prfcnt_enum_item *item_arr, -+ size_t *arr_idx) ++bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, ++ struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ -+ size_t grp, blk; -+ -+ if (!metadata || !item_arr || !arr_idx) -+ return -EINVAL; ++ unsigned long flags; ++ struct kbasep_js_atom_retained_state katom_retained_state; ++ bool attr_state_changed; + -+ for (grp = 0; grp < kbase_hwcnt_metadata_group_count(metadata); grp++) { -+ for (blk = 0; blk < kbase_hwcnt_metadata_block_count(metadata, grp); blk++) { -+ size_t blk_inst; -+ size_t unused_blk_inst_count = 0; -+ size_t blk_inst_count = -+ kbase_hwcnt_metadata_block_instance_count(metadata, grp, blk); -+ enum prfcnt_block_type block_type = -+ kbase_hwcnt_metadata_block_type_to_prfcnt_block_type( -+ kbase_hwcnt_metadata_block_type(metadata, grp, blk)); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(katom != NULL); + -+ if (block_type == PRFCNT_BLOCK_TYPE_RESERVED) -+ continue; ++ kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); ++ kbasep_js_remove_job(kbdev, kctx, katom); + -+ /* Count number of unused blocks to updated number of instances */ -+ for (blk_inst = 0; blk_inst < blk_inst_count; blk_inst++) { -+ if (!kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, -+ blk_inst)) -+ unused_blk_inst_count++; -+ } ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ item_arr[(*arr_idx)++] = (struct prfcnt_enum_item){ -+ .hdr = { -+ .item_type = PRFCNT_ENUM_TYPE_BLOCK, -+ .item_version = PRFCNT_READER_API_VERSION, -+ }, -+ .u.block_counter = { -+ .set = block_set, -+ .block_type = block_type, -+ .num_instances = blk_inst_count - unused_blk_inst_count, -+ .num_values = kbase_hwcnt_metadata_block_values_count( -+ metadata, grp, blk), -+ /* The bitmask of available counters should be dynamic. -+ * Temporarily, it is set to U64_MAX, waiting for the -+ * required functionality to be available in the future. -+ */ -+ .counter_mask = {U64_MAX, U64_MAX}, -+ }, -+ }; -+ } -+ } ++ /* The atom has 'finished' (will not be re-run), so no need to call ++ * kbasep_js_has_atom_finished(). ++ * ++ * This is because it returns false for soft-stopped atoms, but we ++ * want to override that, because we're cancelling an atom regardless of ++ * whether it was soft-stopped or not ++ */ ++ attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, ++ &katom_retained_state); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ return 0; ++ return attr_state_changed; +} + -+static int kbasep_kinstr_prfcnt_enum_info_count( -+ struct kbase_kinstr_prfcnt_context *kinstr_ctx, -+ struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info) ++/** ++ * kbasep_js_run_jobs_after_ctx_and_atom_release - Try running more jobs after ++ * releasing a context and/or atom ++ * @kbdev: The kbase_device to operate on ++ * @kctx: The kbase_context to operate on ++ * @katom_retained_state: Retained state from the atom ++ * @runpool_ctx_attr_change: True if the runpool context attributes have changed ++ * ++ * This collates a set of actions that must happen whilst hwaccess_lock is held. ++ * ++ * This includes running more jobs when: ++ * - The previously released kctx caused a ctx attribute change, ++ * - The released atom caused a ctx attribute change, ++ * - Slots were previously blocked due to affinity restrictions, ++ * - Submission during IRQ handling failed. ++ * ++ * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were ++ * changed. The caller should try scheduling all contexts ++ */ ++static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release( ++ struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ struct kbasep_js_atom_retained_state *katom_retained_state, ++ bool runpool_ctx_attr_change) +{ -+ uint32_t count = 0; -+ size_t block_info_count = 0; -+ const struct kbase_hwcnt_metadata *metadata; ++ struct kbasep_js_device_data *js_devdata; ++ kbasep_js_release_result result = 0; + -+ count = ARRAY_SIZE(kinstr_prfcnt_supported_requests); -+ metadata = kbase_hwcnt_virtualizer_metadata(kinstr_ctx->hvirt); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(katom_retained_state != NULL); ++ js_devdata = &kbdev->js_data; + -+ /* Add the sample_info (clock domain) descriptive item */ -+ count++; -+ -+ /* Other blocks based on meta data */ -+ block_info_count = kbasep_kinstr_prfcnt_get_block_info_count(metadata); -+ count += block_info_count; ++ lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); ++ lockdep_assert_held(&js_devdata->runpool_mutex); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* Reserve one for the last sentinel item. */ -+ count++; -+ enum_info->info_item_count = count; -+ enum_info->info_item_size = sizeof(struct prfcnt_enum_item); -+ kinstr_ctx->info_item_count = count; ++ if (js_devdata->nr_user_contexts_running != 0 && runpool_ctx_attr_change) { ++ /* A change in runpool ctx attributes might mean we can ++ * run more jobs than before ++ */ ++ result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL; + -+ return 0; ++ KBASE_KTRACE_ADD_JM_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB, ++ kctx, NULL, 0u, 0); ++ } ++ return result; +} + -+static int kbasep_kinstr_prfcnt_enum_info_list( -+ struct kbase_kinstr_prfcnt_context *kinstr_ctx, -+ struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info) ++/** ++ * kbasep_js_runpool_release_ctx_internal - Internal function to release the reference ++ * on a ctx and an atom's "retained state", only ++ * taking the runpool and as transaction mutexes ++ * @kbdev: The kbase_device to operate on ++ * @kctx: The kbase_context to operate on ++ * @katom_retained_state: Retained state from the atom ++ * ++ * This also starts more jobs running in the case of an ctx-attribute state change ++ * ++ * This does none of the followup actions for scheduling: ++ * - It does not schedule in a new context ++ * - It does not requeue or handle dying contexts ++ * ++ * For those tasks, just call kbasep_js_runpool_release_ctx() instead ++ * ++ * Has following requirements ++ * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr ++ * - Context has a non-zero refcount ++ * - Caller holds js_kctx_info->ctx.jsctx_mutex ++ * - Caller holds js_devdata->runpool_mutex ++ * ++ * Return: A bitpattern, containing KBASEP_JS_RELEASE_RESULT_* flags, indicating ++ * the result of releasing a context that whether the caller should try ++ * scheduling a new context or should try scheduling all contexts. ++ */ ++static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( ++ struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ struct kbasep_js_atom_retained_state *katom_retained_state) +{ -+ struct prfcnt_enum_item *prfcnt_item_arr; -+ size_t arr_idx = 0; -+ int err = 0; -+ size_t block_info_count = 0; -+ const struct kbase_hwcnt_metadata *metadata; ++ unsigned long flags; ++ struct kbasep_js_device_data *js_devdata; ++ struct kbasep_js_kctx_info *js_kctx_info; + -+ if ((enum_info->info_item_size == 0) || -+ (enum_info->info_item_count == 0) || !enum_info->info_list_ptr) -+ return -EINVAL; ++ kbasep_js_release_result release_result = 0u; ++ bool runpool_ctx_attr_change = false; ++ int kctx_as_nr; ++ int new_ref_count; ++ CSTD_UNUSED(kctx_as_nr); + -+ if (enum_info->info_item_count != kinstr_ctx->info_item_count) -+ return -EINVAL; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ js_kctx_info = &kctx->jctx.sched_info; ++ js_devdata = &kbdev->js_data; + -+ prfcnt_item_arr = kcalloc(enum_info->info_item_count, -+ sizeof(*prfcnt_item_arr), GFP_KERNEL); -+ if (!prfcnt_item_arr) -+ return -ENOMEM; ++ /* Ensure context really is scheduled in */ ++ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+ kbasep_kinstr_prfcnt_get_request_info_list(prfcnt_item_arr, &arr_idx); ++ kctx_as_nr = kctx->as_nr; ++ KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID); ++ KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0); + -+ metadata = kbase_hwcnt_virtualizer_metadata(kinstr_ctx->hvirt); -+ /* Place the sample_info item */ -+ kbasep_kinstr_prfcnt_get_sample_info_item(metadata, prfcnt_item_arr, &arr_idx); ++ /* ++ * Transaction begins on AS and runpool_irq ++ * ++ * Assert about out calling contract ++ */ ++ mutex_lock(&kbdev->pm.lock); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ block_info_count = kbasep_kinstr_prfcnt_get_block_info_count(metadata); ++ KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr); ++ KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0); + -+ if (arr_idx + block_info_count >= enum_info->info_item_count) -+ err = -EINVAL; ++ /* Update refcount */ ++ kbase_ctx_sched_release_ctx(kctx); ++ new_ref_count = atomic_read(&kctx->refcount); + -+ if (!err) { -+ size_t counter_set; ++ /* Release the atom if it finished (i.e. wasn't soft-stopped) */ ++ if (kbasep_js_has_atom_finished(katom_retained_state)) ++ runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom( ++ kbdev, kctx, katom_retained_state); + -+#if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY) -+ counter_set = KBASE_HWCNT_SET_SECONDARY; -+#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) -+ counter_set = KBASE_HWCNT_SET_TERTIARY; -+#else -+ /* Default to primary */ -+ counter_set = KBASE_HWCNT_SET_PRIMARY; ++ if (new_ref_count == 2 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) && ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ !kbase_pm_is_gpu_lost(kbdev) && +#endif -+ kbasep_kinstr_prfcnt_get_block_info_list( -+ metadata, counter_set, prfcnt_item_arr, &arr_idx); -+ if (arr_idx != enum_info->info_item_count - 1) -+ err = -EINVAL; ++ !kbase_pm_is_suspending(kbdev)) { ++ /* Context is kept scheduled into an address space even when ++ * there are no jobs, in this case we have to handle the ++ * situation where all jobs have been evicted from the GPU and ++ * submission is disabled. ++ * ++ * At this point we re-enable submission to allow further jobs ++ * to be executed ++ */ ++ kbasep_js_set_submit_allowed(js_devdata, kctx); + } + -+ /* The last sentinel item. */ -+ prfcnt_item_arr[enum_info->info_item_count - 1].hdr.item_type = -+ FLEX_LIST_TYPE_NONE; -+ prfcnt_item_arr[enum_info->info_item_count - 1].hdr.item_version = 0; ++ /* Make a set of checks to see if the context should be scheduled out. ++ * Note that there'll always be at least 1 reference to the context ++ * which was previously acquired by kbasep_js_schedule_ctx(). ++ */ ++ if (new_ref_count == 1 && ++ (!kbasep_js_is_submit_allowed(js_devdata, kctx) || ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ kbase_pm_is_gpu_lost(kbdev) || ++#endif ++ kbase_pm_is_suspending(kbdev))) { ++ int num_slots = kbdev->gpu_props.num_job_slots; ++ int slot; + -+ if (!err) { -+ unsigned long bytes = -+ enum_info->info_item_count * sizeof(*prfcnt_item_arr); ++ /* Last reference, and we've been told to remove this context ++ * from the Run Pool ++ */ ++ dev_dbg(kbdev->dev, "JS: RunPool Remove Context %pK because refcount=%d, jobs=%d, allowed=%d", ++ kctx, new_ref_count, js_kctx_info->ctx.nr_jobs, ++ kbasep_js_is_submit_allowed(js_devdata, kctx)); + -+ if (copy_to_user(u64_to_user_ptr(enum_info->info_list_ptr), -+ prfcnt_item_arr, bytes)) -+ err = -EFAULT; -+ } ++ KBASE_TLSTREAM_TL_NRET_AS_CTX(kbdev, &kbdev->as[kctx->as_nr], kctx); + -+ kfree(prfcnt_item_arr); -+ return err; -+} ++ kbase_backend_release_ctx_irq(kbdev, kctx); + -+int kbase_kinstr_prfcnt_enum_info( -+ struct kbase_kinstr_prfcnt_context *kinstr_ctx, -+ struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info) -+{ -+ int err; ++ for (slot = 0; slot < num_slots; slot++) { ++ if (kbdev->hwaccess.active_kctx[slot] == kctx) { ++ dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n", ++ (void *)kctx, slot); ++ kbdev->hwaccess.active_kctx[slot] = NULL; ++ } ++ } + -+ if (!kinstr_ctx || !enum_info) -+ return -EINVAL; ++ /* Ctx Attribute handling ++ * ++ * Releasing atoms attributes must either happen before this, or ++ * after the KCTX_SHEDULED flag is changed, otherwise we ++ * double-decount the attributes ++ */ ++ runpool_ctx_attr_change |= ++ kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx); + -+ if (!enum_info->info_list_ptr) -+ err = kbasep_kinstr_prfcnt_enum_info_count(kinstr_ctx, -+ enum_info); -+ else -+ err = kbasep_kinstr_prfcnt_enum_info_list(kinstr_ctx, -+ enum_info); ++ /* Releasing the context and katom retained state can allow ++ * more jobs to run ++ */ ++ release_result |= ++ kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, ++ kctx, katom_retained_state, ++ runpool_ctx_attr_change); + -+ return err; -+} ++ /* ++ * Transaction ends on AS and runpool_irq: ++ * ++ * By this point, the AS-related data is now clear and ready ++ * for re-use. ++ * ++ * Since releases only occur once for each previous successful ++ * retain, and no more retains are allowed on this context, no ++ * other thread will be operating in this ++ * code whilst we are ++ */ + -+int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx, -+ union kbase_ioctl_kinstr_prfcnt_setup *setup) -+{ -+ int err; -+ size_t item_count; -+ size_t bytes; -+ struct prfcnt_request_item *req_arr = NULL; -+ struct kbase_kinstr_prfcnt_client *cli = NULL; -+ const size_t max_bytes = 32 * sizeof(*req_arr); ++ /* Recalculate pullable status for all slots */ ++ for (slot = 0; slot < num_slots; slot++) { ++ if (kbase_js_ctx_pullable(kctx, slot, false)) ++ kbase_js_ctx_list_add_pullable_nolock(kbdev, ++ kctx, slot); ++ } + -+ if (!kinstr_ctx || !setup) -+ return -EINVAL; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ item_count = setup->in.request_item_count; ++ kbase_backend_release_ctx_noirq(kbdev, kctx); + -+ /* Limiting the request items to 2x of the expected: accommodating -+ * moderate duplications but rejecting excessive abuses. -+ */ -+ if (!setup->in.requests_ptr || (item_count < 2) || (setup->in.request_item_size == 0) || -+ item_count > 2 * kinstr_ctx->info_item_count) { -+ return -EINVAL; -+ } ++ mutex_unlock(&kbdev->pm.lock); + -+ if (check_mul_overflow(item_count, sizeof(*req_arr), &bytes)) -+ return -EINVAL; ++ /* Note: Don't reuse kctx_as_nr now */ + -+ /* Further limiting the max bytes to copy from userspace by setting it in the following -+ * fashion: a maximum of 1 mode item, 4 types of 3 sets for a total of 12 enable items, -+ * each currently at the size of prfcnt_request_item. -+ * -+ * Note: if more request types get added, this max limit needs to be updated. -+ */ -+ if (bytes > max_bytes) -+ return -EINVAL; ++ /* Synchronize with any timers */ ++ kbase_backend_ctx_count_changed(kbdev); + -+ req_arr = memdup_user(u64_to_user_ptr(setup->in.requests_ptr), bytes); ++ /* update book-keeping info */ ++ kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); ++ /* Signal any waiter that the context is not scheduled, so is ++ * safe for termination - once the jsctx_mutex is also dropped, ++ * and jobs have finished. ++ */ ++ wake_up(&js_kctx_info->ctx.is_scheduled_wait); + -+ if (IS_ERR(req_arr)) -+ return PTR_ERR(req_arr); ++ /* Queue an action to occur after we've dropped the lock */ ++ release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED | ++ KBASEP_JS_RELEASE_RESULT_SCHED_ALL; ++ } else { ++ kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, ++ katom_retained_state, runpool_ctx_attr_change); + -+ err = kbasep_kinstr_prfcnt_client_create(kinstr_ctx, setup, &cli, req_arr); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->pm.lock); ++ } + -+ if (err < 0) -+ goto error; ++ return release_result; ++} + -+ mutex_lock(&kinstr_ctx->lock); -+ kinstr_ctx->client_count++; -+ list_add(&cli->node, &kinstr_ctx->clients); -+ mutex_unlock(&kinstr_ctx->lock); ++void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ ++ struct kbasep_js_atom_retained_state katom_retained_state; + -+ setup->out.prfcnt_metadata_item_size = sizeof(struct prfcnt_metadata); -+ setup->out.prfcnt_mmap_size_bytes = -+ cli->sample_size * cli->sample_count; ++ /* Setup a dummy katom_retained_state */ ++ kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); + -+ /* Expose to user-space only once the client is fully initialized */ -+ err = anon_inode_getfd("[mali_kinstr_prfcnt_desc]", -+ &kinstr_prfcnt_client_fops, cli, -+ O_RDONLY | O_CLOEXEC); ++ kbasep_js_runpool_release_ctx_internal(kbdev, kctx, ++ &katom_retained_state); ++} + -+ if (err < 0) -+ goto client_installed_error; ++void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx, bool has_pm_ref) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); + -+ goto free_buf; ++ /* This is called if and only if you've you've detached the context from ++ * the Runpool Queue, and not added it back to the Runpool ++ */ ++ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+client_installed_error: -+ mutex_lock(&kinstr_ctx->lock); -+ kinstr_ctx->client_count--; -+ list_del(&cli->node); -+ mutex_unlock(&kinstr_ctx->lock); -+error: -+ kbasep_kinstr_prfcnt_client_destroy(cli); -+free_buf: -+ kfree(req_arr); -+ return err; ++ if (kbase_ctx_flag(kctx, KCTX_DYING)) { ++ /* Dying: don't requeue, but kill all jobs on the context. This ++ * happens asynchronously ++ */ ++ dev_dbg(kbdev->dev, ++ "JS: ** Killing Context %pK on RunPool Remove **", kctx); ++ kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel); ++ } +} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.h b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.h -new file mode 100644 -index 000000000..bbe33796e ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.h -@@ -0,0 +1,189 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ + -+/* -+ * Kinstr_prfcnt, used to provide an ioctl for userspace access to -+ * performance counters. -+ */ -+#ifndef _KBASE_KINSTR_PRFCNT_H_ -+#define _KBASE_KINSTR_PRFCNT_H_ ++void kbasep_js_runpool_release_ctx_and_katom_retained_state( ++ struct kbase_device *kbdev, struct kbase_context *kctx, ++ struct kbasep_js_atom_retained_state *katom_retained_state) ++{ ++ struct kbasep_js_device_data *js_devdata; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ kbasep_js_release_result release_result; + -+#include "hwcnt/mali_kbase_hwcnt_types.h" -+#include ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ js_kctx_info = &kctx->jctx.sched_info; ++ js_devdata = &kbdev->js_data; + -+struct kbase_kinstr_prfcnt_context; -+struct kbase_kinstr_prfcnt_client; -+struct kbase_hwcnt_virtualizer; -+struct kbase_ioctl_hwcnt_reader_setup; -+struct kbase_ioctl_kinstr_prfcnt_enum_info; -+union kbase_ioctl_kinstr_prfcnt_setup; ++ mutex_lock(&js_devdata->queue_mutex); ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_lock(&js_devdata->runpool_mutex); + -+/** -+ * kbase_kinstr_prfcnt_init() - Initialize a kinstr_prfcnt context. -+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. -+ * @out_kinstr_ctx: Non-NULL pointer to where the pointer to the created -+ * kinstr_prfcnt context will be stored on success. -+ * -+ * On creation, the suspend count of the context will be 0. -+ * -+ * Return: 0 on success, else error code. -+ */ -+int kbase_kinstr_prfcnt_init( -+ struct kbase_hwcnt_virtualizer *hvirt, -+ struct kbase_kinstr_prfcnt_context **out_kinstr_ctx); ++ release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, ++ katom_retained_state); + -+/** -+ * kbase_kinstr_prfcnt_term() - Terminate a kinstr_prfcnt context. -+ * @kinstr_ctx: Pointer to the kinstr_prfcnt context to be terminated. -+ */ -+void kbase_kinstr_prfcnt_term(struct kbase_kinstr_prfcnt_context *kinstr_ctx); ++ /* Drop the runpool mutex to allow requeing kctx */ ++ mutex_unlock(&js_devdata->runpool_mutex); + -+/** -+ * kbase_kinstr_prfcnt_suspend() - Increment the suspend count of the context. -+ * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context to be suspended. -+ * -+ * After this function call returns, it is guaranteed that all timers and -+ * workers in kinstr_prfcnt will be canceled, and will not be re-triggered until -+ * after the context has been resumed. In effect, this means no new counter -+ * dumps will occur for any existing or subsequently added periodic clients. -+ */ -+void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx); ++ if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) ++ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true); + -+/** -+ * kbase_kinstr_prfcnt_resume() - Decrement the suspend count of the context. -+ * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context to be resumed. -+ * -+ * If a call to this function decrements the suspend count from 1 to 0, then -+ * normal operation of kinstr_prfcnt will be resumed (i.e. counter dumps will once -+ * again be automatically triggered for all periodic clients). -+ * -+ * It is only valid to call this function one time for each prior returned call -+ * to kbase_kinstr_prfcnt_suspend. -+ */ -+void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx); ++ /* Drop the jsctx_mutex to allow scheduling in a new context */ + -+/** -+ * kbasep_kinstr_prfcnt_get_block_info_list() - Get list of all block types -+ * with their information. -+ * @metadata: Non-NULL pointer to the hardware counter metadata. -+ * @block_set: Which SET the blocks will represent. -+ * @item_arr: Non-NULL pointer to array of enumeration items to populate. -+ * @arr_idx: Non-NULL pointer to index of array @item_arr. -+ * -+ * Populate list of counter blocks with information for enumeration. -+ * -+ * Return: 0 on success, else error code. -+ */ -+int kbasep_kinstr_prfcnt_get_block_info_list(const struct kbase_hwcnt_metadata *metadata, -+ size_t block_set, struct prfcnt_enum_item *item_arr, -+ size_t *arr_idx); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_unlock(&js_devdata->queue_mutex); + -+/** -+ * kbasep_kinstr_prfcnt_get_sample_md_count() - Get count of sample -+ * metadata items. -+ * @metadata: Non-NULL pointer to the hardware counter metadata. -+ * @enable_map: Non-NULL pointer to the map of enabled counters. -+ * -+ * Return: Number of metadata items for available blocks in each sample. -+ */ -+size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadata *metadata, -+ struct kbase_hwcnt_enable_map *enable_map); ++ if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL) ++ kbase_js_sched_all(kbdev); ++} + -+/** -+ * kbasep_kinstr_prfcnt_set_block_meta_items() - Populate a sample's block meta -+ * item array. -+ * @enable_map: Non-NULL pointer to the map of enabled counters. -+ * @dst: Non-NULL pointer to the sample's dump buffer object. -+ * @block_meta_base: Non-NULL double pointer to the start of the block meta -+ * data items. -+ * @base_addr: Address of allocated pages for array of samples. Used -+ * to calculate offset of block values. -+ * @counter_set: The SET which blocks represent. -+ * -+ * Return: 0 on success, else error code. -+ */ -+int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *enable_map, -+ struct kbase_hwcnt_dump_buffer *dst, -+ struct prfcnt_metadata **block_meta_base, -+ u8 *base_addr, u8 counter_set); ++void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ ++ struct kbasep_js_atom_retained_state katom_retained_state; + -+/** -+ * kbasep_kinstr_prfcnt_client_create() - Create a kinstr_prfcnt client. -+ * Does not attach to the kinstr_prfcnt -+ * context. -+ * @kinstr_ctx: Non-NULL pointer to kinstr_prfcnt context. -+ * @setup: Non-NULL pointer to hardware counter ioctl setup structure. -+ * @out_vcli: Non-NULL pointer to where created client will be stored on -+ * success. -+ * @req_arr: Non-NULL pointer to array of request items for client session. -+ * -+ * Return: 0 on success, else error code. -+ */ -+int kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context *kinstr_ctx, -+ union kbase_ioctl_kinstr_prfcnt_setup *setup, -+ struct kbase_kinstr_prfcnt_client **out_vcli, -+ struct prfcnt_request_item *req_arr); ++ kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); + -+/** -+ * kbasep_kinstr_prfcnt_cmd() - Execute command for a client session. -+ * @cli: Non-NULL pointer to kinstr_prfcnt client. -+ * @control_cmd: Control command to execute. -+ * -+ * Return: 0 on success, else error code. -+ */ -+int kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client *cli, -+ struct prfcnt_control_cmd *control_cmd); ++ kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, ++ &katom_retained_state); ++} + -+/** -+ * kbasep_kinstr_prfcnt_client_destroy() - Destroy a kinstr_prfcnt client. -+ * @cli: kinstr_prfcnt client. Must not be attached to a kinstr_prfcnt context. ++/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into ++ * kbase_js_sched_all() + */ -+void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli); ++static void kbasep_js_runpool_release_ctx_no_schedule( ++ struct kbase_device *kbdev, struct kbase_context *kctx) ++{ ++ struct kbasep_js_device_data *js_devdata; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ kbasep_js_release_result release_result; ++ struct kbasep_js_atom_retained_state katom_retained_state_struct; ++ struct kbasep_js_atom_retained_state *katom_retained_state = ++ &katom_retained_state_struct; + -+/** -+ * kbase_kinstr_prfcnt_enum_info - Enumerate performance counter information. -+ * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context. -+ * @enum_info: Non-NULL pointer to the enumeration information. -+ * -+ * Enumerate which counter blocks and banks exist, and what counters are -+ * available within them. -+ * -+ * Return: 0 on success, else error code. -+ */ -+int kbase_kinstr_prfcnt_enum_info( -+ struct kbase_kinstr_prfcnt_context *kinstr_ctx, -+ struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ js_kctx_info = &kctx->jctx.sched_info; ++ js_devdata = &kbdev->js_data; ++ kbasep_js_atom_retained_state_init_invalid(katom_retained_state); + -+/** -+ * kbase_kinstr_prfcnt_setup() - Set up a new hardware counter reader client. -+ * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context. -+ * @setup: Non-NULL pointer to the hwcnt reader configuration. -+ * -+ * Start a session between a user client and the kinstr_prfcnt component. -+ * A file descriptor shall be provided to the client as a handle to the -+ * hardware counter reader client that represents the session. -+ * -+ * Return: file descriptor on success, else error code. -+ */ -+int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx, -+ union kbase_ioctl_kinstr_prfcnt_setup *setup); ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_lock(&js_devdata->runpool_mutex); + -+#endif /* _KBASE_KINSTR_PRFCNT_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_linux.h -new file mode 100644 -index 000000000..e5c6f7a0b ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_linux.h -@@ -0,0 +1,44 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2010-2014, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, ++ katom_retained_state); + -+/** -+ * DOC: Base kernel APIs, Linux implementation. -+ */ ++ /* Drop the runpool mutex to allow requeing kctx */ ++ mutex_unlock(&js_devdata->runpool_mutex); ++ if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) ++ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true); + -+#ifndef _KBASE_LINUX_H_ -+#define _KBASE_LINUX_H_ ++ /* Drop the jsctx_mutex to allow scheduling in a new context */ ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + -+/* All things that are needed for the Linux port. */ -+#include -+#include -+#include -+#include -+#include ++ /* NOTE: could return release_result if the caller would like to know ++ * whether it should schedule a new context, but currently no callers do ++ */ ++} + -+#if IS_ENABLED(MALI_KERNEL_TEST_API) -+ #define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func) -+#else -+ #define KBASE_EXPORT_TEST_API(func) -+#endif ++void kbase_js_set_timeouts(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func) ++ kbase_backend_timeouts_changed(kbdev); ++} + -+#endif /* _KBASE_LINUX_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.c b/drivers/gpu/arm/bifrost/mali_kbase_mem.c -new file mode 100644 -index 000000000..800a4199d ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.c -@@ -0,0 +1,5533 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, ++ unsigned int js) ++{ ++ struct kbasep_js_device_data *js_devdata; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ unsigned long flags; ++ bool kctx_suspended = false; ++ int as_nr; + -+/** -+ * DOC: Base kernel memory APIs -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#if IS_ENABLED(CONFIG_OF) -+#include -+#endif ++ dev_dbg(kbdev->dev, "Scheduling kctx %pK (s:%u)\n", kctx, js); + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ js_devdata = &kbdev->js_data; ++ js_kctx_info = &kctx->jctx.sched_info; + -+#define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-" -+#define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1) ++ /* Pick available address space for this context */ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ as_nr = kbase_ctx_sched_retain_ctx(kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ if (as_nr == KBASEP_AS_NR_INVALID) { ++ as_nr = kbase_backend_find_and_release_free_address_space( ++ kbdev, kctx); ++ if (as_nr != KBASEP_AS_NR_INVALID) { ++ /* Attempt to retain the context again, this should ++ * succeed ++ */ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ as_nr = kbase_ctx_sched_retain_ctx(kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); + -+#if MALI_JIT_PRESSURE_LIMIT_BASE ++ WARN_ON(as_nr == KBASEP_AS_NR_INVALID); ++ } ++ } ++ if ((as_nr < 0) || (as_nr >= BASE_MAX_NR_AS)) ++ return false; /* No address space currently available */ + -+/* -+ * Alignment of objects allocated by the GPU inside a just-in-time memory -+ * region whose size is given by an end address -+ * -+ * This is the alignment of objects allocated by the GPU, but possibly not -+ * fully written to. When taken into account with -+ * KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES it gives the maximum number of bytes -+ * that the JIT memory report size can exceed the actual backed memory size. -+ */ -+#define KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES (128u) ++ /* ++ * Atomic transaction on the Context and Run Pool begins ++ */ ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_lock(&js_devdata->runpool_mutex); ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+/* -+ * Maximum size of objects allocated by the GPU inside a just-in-time memory -+ * region whose size is given by an end address -+ * -+ * This is the maximum size of objects allocated by the GPU, but possibly not -+ * fully written to. When taken into account with -+ * KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES it gives the maximum number of bytes -+ * that the JIT memory report size can exceed the actual backed memory size. -+ */ -+#define KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES (512u) ++ /* Check to see if context is dying due to kbase_job_zap_context() */ ++ if (kbase_ctx_flag(kctx, KCTX_DYING)) { ++ /* Roll back the transaction so far and return */ ++ kbase_ctx_sched_release_ctx(kctx); + -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + -+/* Forward declarations */ -+static void free_partial_locked(struct kbase_context *kctx, -+ struct kbase_mem_pool *pool, struct tagged_addr tp); ++ return false; ++ } + -+static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx) -+{ -+#if defined(CONFIG_ARM64) -+ /* VA_BITS can be as high as 48 bits, but all bits are available for -+ * both user and kernel. ++ KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL, ++ 0u, ++ kbase_ktrace_get_ctx_refcnt(kctx)); ++ ++ kbase_ctx_flag_set(kctx, KCTX_SCHEDULED); ++ ++ /* Assign context to previously chosen address space */ ++ if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) { ++ /* Roll back the transaction so far and return */ ++ kbase_ctx_sched_release_ctx(kctx); ++ kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ ++ return false; ++ } ++ ++ kbdev->hwaccess.active_kctx[js] = kctx; ++ ++ KBASE_TLSTREAM_TL_RET_AS_CTX(kbdev, &kbdev->as[kctx->as_nr], kctx); ++ ++ /* Cause any future waiter-on-termination to wait until the context is ++ * descheduled + */ -+ size_t cpu_va_bits = VA_BITS; -+#elif defined(CONFIG_X86_64) -+ /* x86_64 can access 48 bits of VA, but the 48th is used to denote -+ * kernel (1) vs userspace (0), so the max here is 47. ++ wake_up(&js_kctx_info->ctx.is_scheduled_wait); ++ ++ /* Re-check for suspending: a suspend could've occurred, and all the ++ * contexts could've been removed from the runpool before we took this ++ * lock. In this case, we don't want to allow this context to run jobs, ++ * we just want it out immediately. ++ * ++ * The DMB required to read the suspend flag was issued recently as part ++ * of the hwaccess_lock locking. If a suspend occurs *after* that lock ++ * was taken (i.e. this condition doesn't execute), then the ++ * kbasep_js_suspend() code will cleanup this context instead (by virtue ++ * of it being called strictly after the suspend flag is set, and will ++ * wait for this lock to drop) + */ -+ size_t cpu_va_bits = 47; -+#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32) -+ size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE; ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) { +#else -+#error "Unknown CPU VA width for this architecture" ++ if (kbase_pm_is_suspending(kbdev)) { +#endif ++ /* Cause it to leave at some later point */ ++ bool retained; ++ CSTD_UNUSED(retained); + -+ if (kbase_ctx_compat_mode(kctx)) -+ cpu_va_bits = 32; ++ retained = kbase_ctx_sched_inc_refcount_nolock(kctx); ++ KBASE_DEBUG_ASSERT(retained); + -+ return cpu_va_bits; -+} ++ kbasep_js_clear_submit_allowed(js_devdata, kctx); ++ kctx_suspended = true; ++ } + -+/* This function finds out which RB tree the given pfn from the GPU VA belongs -+ * to based on the memory zone the pfn refers to -+ */ -+static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx, -+ u64 gpu_pfn) -+{ -+ struct rb_root *rbtree = NULL; ++ kbase_ctx_flag_clear(kctx, KCTX_PULLED_SINCE_ACTIVE_JS0 << js); + -+ struct kbase_reg_zone *exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); ++ /* Transaction complete */ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); + -+#if MALI_USE_CSF -+ struct kbase_reg_zone *fixed_va_zone = -+ kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_FIXED_VA); ++ /* Synchronize with any timers */ ++ kbase_backend_ctx_count_changed(kbdev); + -+ struct kbase_reg_zone *exec_fixed_va_zone = -+ kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ /* Note: after this point, the context could potentially get scheduled ++ * out immediately ++ */ + -+ if (gpu_pfn >= fixed_va_zone->base_pfn) { -+ rbtree = &kctx->reg_rbtree_fixed; -+ return rbtree; -+ } else if (gpu_pfn >= exec_fixed_va_zone->base_pfn) { -+ rbtree = &kctx->reg_rbtree_exec_fixed; -+ return rbtree; ++ if (kctx_suspended) { ++ /* Finishing forcing out the context due to a suspend. Use a ++ * variant of kbasep_js_runpool_release_ctx() that doesn't ++ * schedule a new context, to prevent a risk of recursion back ++ * into this function ++ */ ++ kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx); ++ return false; + } -+#endif -+ if (gpu_pfn >= exec_va_zone->base_pfn) -+ rbtree = &kctx->reg_rbtree_exec; -+ else { -+ u64 same_va_end; ++ return true; ++} + -+ if (kbase_ctx_compat_mode(kctx)) { -+ same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE; -+ } else { -+ struct kbase_reg_zone *same_va_zone = -+ kbase_ctx_reg_zone_get(kctx, -+ KBASE_REG_ZONE_SAME_VA); -+ same_va_end = kbase_reg_zone_end_pfn(same_va_zone); -+ } ++static bool kbase_js_use_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, ++ unsigned int js) ++{ ++ unsigned long flags; + -+ if (gpu_pfn >= same_va_end) -+ rbtree = &kctx->reg_rbtree_custom; -+ else -+ rbtree = &kctx->reg_rbtree_same; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && ++ kbase_backend_use_ctx_sched(kbdev, kctx, js)) { ++ dev_dbg(kbdev->dev, "kctx %pK already has ASID - mark as active (s:%u)\n", ++ (void *)kctx, js); ++ ++ if (kbdev->hwaccess.active_kctx[js] != kctx) { ++ kbdev->hwaccess.active_kctx[js] = kctx; ++ kbase_ctx_flag_clear(kctx, ++ KCTX_PULLED_SINCE_ACTIVE_JS0 << js); ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return true; /* Context already scheduled */ + } + -+ return rbtree; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return kbasep_js_schedule_ctx(kbdev, kctx, js); +} + -+/* This function inserts a region into the tree. */ -+static void kbase_region_tracker_insert(struct kbase_va_region *new_reg) ++void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx) +{ -+ u64 start_pfn = new_reg->start_pfn; -+ struct rb_node **link = NULL; -+ struct rb_node *parent = NULL; -+ struct rb_root *rbtree = NULL; -+ -+ rbtree = new_reg->rbtree; -+ -+ link = &(rbtree->rb_node); -+ /* Find the right place in the tree using tree search */ -+ while (*link) { -+ struct kbase_va_region *old_reg; -+ -+ parent = *link; -+ old_reg = rb_entry(parent, struct kbase_va_region, rblink); ++ struct kbasep_js_kctx_info *js_kctx_info; ++ struct kbasep_js_device_data *js_devdata; ++ bool is_scheduled; + -+ /* RBTree requires no duplicate entries. */ -+ KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); + -+ if (old_reg->start_pfn > start_pfn) -+ link = &(*link)->rb_left; -+ else -+ link = &(*link)->rb_right; -+ } ++ js_devdata = &kbdev->js_data; ++ js_kctx_info = &kctx->jctx.sched_info; + -+ /* Put the new node there, and rebalance tree */ -+ rb_link_node(&(new_reg->rblink), parent, link); ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ /* This should only happen in response to a system call ++ * from a user-space thread. ++ * In a non-arbitrated environment this can never happen ++ * whilst suspending. ++ * ++ * In an arbitrated environment, user-space threads can run ++ * while we are suspended (for example GPU not available ++ * to this VM), however in that case we will block on ++ * the wait event for KCTX_SCHEDULED, since no context ++ * can be scheduled until we have the GPU again. ++ */ ++ if (kbdev->arb.arb_if == NULL) ++ if (WARN_ON(kbase_pm_is_suspending(kbdev))) ++ return; ++#else ++ /* This should only happen in response to a system call ++ * from a user-space thread. ++ * In a non-arbitrated environment this can never happen ++ * whilst suspending. ++ */ ++ if (WARN_ON(kbase_pm_is_suspending(kbdev))) ++ return; ++#endif + -+ rb_insert_color(&(new_reg->rblink), rbtree); -+} ++ mutex_lock(&js_devdata->queue_mutex); ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + -+static struct kbase_va_region *find_region_enclosing_range_rbtree( -+ struct rb_root *rbtree, u64 start_pfn, size_t nr_pages) -+{ -+ struct rb_node *rbnode; -+ struct kbase_va_region *reg; -+ u64 end_pfn = start_pfn + nr_pages; ++ /* Mark the context as privileged */ ++ kbase_ctx_flag_set(kctx, KCTX_PRIVILEGED); + -+ rbnode = rbtree->rb_node; ++ is_scheduled = kbase_ctx_flag(kctx, KCTX_SCHEDULED); ++ if (!is_scheduled) { ++ /* Add the context to the pullable list */ ++ if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0)) ++ kbase_js_sync_timers(kbdev); + -+ while (rbnode) { -+ u64 tmp_start_pfn, tmp_end_pfn; ++ /* Fast-starting requires the jsctx_mutex to be dropped, ++ * because it works on multiple ctxs ++ */ ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_unlock(&js_devdata->queue_mutex); + -+ reg = rb_entry(rbnode, struct kbase_va_region, rblink); -+ tmp_start_pfn = reg->start_pfn; -+ tmp_end_pfn = reg->start_pfn + reg->nr_pages; ++ /* Try to schedule the context in */ ++ kbase_js_sched_all(kbdev); + -+ /* If start is lower than this, go left. */ -+ if (start_pfn < tmp_start_pfn) -+ rbnode = rbnode->rb_left; -+ /* If end is higher than this, then go right. */ -+ else if (end_pfn > tmp_end_pfn) -+ rbnode = rbnode->rb_right; -+ else /* Enclosing */ -+ return reg; ++ /* Wait for the context to be scheduled in */ ++ wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, ++ kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++ } else { ++ /* Already scheduled in - We need to retain it to keep the ++ * corresponding address space ++ */ ++ WARN_ON(!kbase_ctx_sched_inc_refcount(kctx)); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_unlock(&js_devdata->queue_mutex); + } -+ -+ return NULL; +} ++KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx); + -+struct kbase_va_region *kbase_find_region_enclosing_address( -+ struct rb_root *rbtree, u64 gpu_addr) ++void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx) +{ -+ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; -+ struct rb_node *rbnode; -+ struct kbase_va_region *reg; -+ -+ rbnode = rbtree->rb_node; ++ struct kbasep_js_kctx_info *js_kctx_info; + -+ while (rbnode) { -+ u64 tmp_start_pfn, tmp_end_pfn; ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ js_kctx_info = &kctx->jctx.sched_info; + -+ reg = rb_entry(rbnode, struct kbase_va_region, rblink); -+ tmp_start_pfn = reg->start_pfn; -+ tmp_end_pfn = reg->start_pfn + reg->nr_pages; ++ /* We don't need to use the address space anymore */ ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ kbase_ctx_flag_clear(kctx, KCTX_PRIVILEGED); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + -+ /* If start is lower than this, go left. */ -+ if (gpu_pfn < tmp_start_pfn) -+ rbnode = rbnode->rb_left; -+ /* If end is higher than this, then go right. */ -+ else if (gpu_pfn >= tmp_end_pfn) -+ rbnode = rbnode->rb_right; -+ else /* Enclosing */ -+ return reg; -+ } ++ /* Release the context - it will be scheduled out */ ++ kbasep_js_runpool_release_ctx(kbdev, kctx); + -+ return NULL; ++ kbase_js_sched_all(kbdev); +} ++KBASE_EXPORT_TEST_API(kbasep_js_release_privileged_ctx); + -+/* Find region enclosing given address. */ -+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( -+ struct kbase_context *kctx, u64 gpu_addr) ++void kbasep_js_suspend(struct kbase_device *kbdev) +{ -+ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; -+ struct rb_root *rbtree = NULL; ++ unsigned long flags; ++ struct kbasep_js_device_data *js_devdata; ++ int i; ++ u16 retained = 0u; + -+ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(kbdev); ++ KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev)); ++ js_devdata = &kbdev->js_data; + -+ lockdep_assert_held(&kctx->reg_lock); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); ++ /* Prevent all contexts from submitting */ ++ js_devdata->runpool_irq.submit_allowed = 0; + -+ return kbase_find_region_enclosing_address(rbtree, gpu_addr); -+} ++ /* Retain each of the contexts, so we can cause it to leave even if it ++ * had no refcount to begin with ++ */ ++ for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) { ++ struct kbase_context *kctx = kbdev->as_to_kctx[i]; + -+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address); ++ retained = retained << 1; + -+struct kbase_va_region *kbase_find_region_base_address( -+ struct rb_root *rbtree, u64 gpu_addr) -+{ -+ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; -+ struct rb_node *rbnode = NULL; -+ struct kbase_va_region *reg = NULL; ++ if (kctx && !(kbdev->as_free & (1u << i))) { ++ kbase_ctx_sched_retain_ctx_refcount(kctx); ++ retained |= 1u; ++ /* This loop will not have an effect on the privileged ++ * contexts as they would have an extra ref count ++ * compared to the normal contexts, so they will hold ++ * on to their address spaces. MMU will re-enabled for ++ * them on resume. ++ */ ++ } ++ } + -+ rbnode = rbtree->rb_node; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ while (rbnode) { -+ reg = rb_entry(rbnode, struct kbase_va_region, rblink); -+ if (reg->start_pfn > gpu_pfn) -+ rbnode = rbnode->rb_left; -+ else if (reg->start_pfn < gpu_pfn) -+ rbnode = rbnode->rb_right; -+ else -+ return reg; ++ /* De-ref the previous retain to ensure each context gets pulled out ++ * sometime later. ++ */ ++ for (i = 0; ++ i < BASE_MAX_NR_AS; ++ ++i, retained = retained >> 1) { ++ struct kbase_context *kctx = kbdev->as_to_kctx[i]; ++ ++ if (retained & 1u) ++ kbasep_js_runpool_release_ctx(kbdev, kctx); + } + -+ return NULL; ++ /* Caller must wait for all Power Manager active references to be ++ * dropped ++ */ +} + -+/* Find region with given base address */ -+struct kbase_va_region *kbase_region_tracker_find_region_base_address( -+ struct kbase_context *kctx, u64 gpu_addr) ++void kbasep_js_resume(struct kbase_device *kbdev) +{ -+ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; -+ struct rb_root *rbtree = NULL; -+ -+ lockdep_assert_held(&kctx->reg_lock); ++ struct kbasep_js_device_data *js_devdata; ++ int js, prio; + -+ rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); ++ KBASE_DEBUG_ASSERT(kbdev); ++ js_devdata = &kbdev->js_data; ++ KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); + -+ return kbase_find_region_base_address(rbtree, gpu_addr); -+} ++ mutex_lock(&js_devdata->queue_mutex); ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; ++ prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { ++ struct kbase_context *kctx, *n; ++ unsigned long flags; + -+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address); ++#ifndef CONFIG_MALI_ARBITER_SUPPORT ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+/* Find region meeting given requirements */ -+static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( -+ struct kbase_va_region *reg_reqs, -+ size_t nr_pages, size_t align_offset, size_t align_mask, -+ u64 *out_start_pfn) -+{ -+ struct rb_node *rbnode = NULL; -+ struct kbase_va_region *reg = NULL; -+ struct rb_root *rbtree = NULL; ++ list_for_each_entry_safe(kctx, n, ++ &kbdev->js_data.ctx_list_unpullable[js][prio], ++ jctx.sched_info.ctx.ctx_list_entry[js]) { ++ struct kbasep_js_kctx_info *js_kctx_info; ++ bool timer_sync = false; + -+ /* Note that this search is a linear search, as we do not have a target -+ * address in mind, so does not benefit from the rbtree search -+ */ -+ rbtree = reg_reqs->rbtree; ++ /* Drop lock so we can take kctx mutexes */ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, ++ flags); + -+ for (rbnode = rb_first(rbtree); rbnode; rbnode = rb_next(rbnode)) { -+ reg = rb_entry(rbnode, struct kbase_va_region, rblink); -+ if ((reg->nr_pages >= nr_pages) && -+ (reg->flags & KBASE_REG_FREE)) { -+ /* Check alignment */ -+ u64 start_pfn = reg->start_pfn; ++ js_kctx_info = &kctx->jctx.sched_info; + -+ /* When align_offset == align, this sequence is -+ * equivalent to: -+ * (start_pfn + align_mask) & ~(align_mask) -+ * -+ * Otherwise, it aligns to n*align + offset, for the -+ * lowest value n that makes this still >start_pfn -+ */ -+ start_pfn += align_mask; -+ start_pfn -= (start_pfn - align_offset) & (align_mask); ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_lock(&js_devdata->runpool_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ if (!(reg_reqs->flags & KBASE_REG_GPU_NX)) { -+ /* Can't end at 4GB boundary */ -+ if (0 == ((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB)) -+ start_pfn += align_offset; ++ if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && ++ kbase_js_ctx_pullable(kctx, js, false)) ++ timer_sync = ++ kbase_js_ctx_list_add_pullable_nolock( ++ kbdev, kctx, js); + -+ /* Can't start at 4GB boundary */ -+ if (0 == (start_pfn & BASE_MEM_PFN_MASK_4GB)) -+ start_pfn += align_offset; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, ++ flags); + -+ if (!((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB) || -+ !(start_pfn & BASE_MEM_PFN_MASK_4GB)) -+ continue; -+ } else if (reg_reqs->flags & -+ KBASE_REG_GPU_VA_SAME_4GB_PAGE) { -+ u64 end_pfn = start_pfn + nr_pages - 1; ++ if (timer_sync) ++ kbase_backend_ctx_count_changed(kbdev); + -+ if ((start_pfn & ~BASE_MEM_PFN_MASK_4GB) != -+ (end_pfn & ~BASE_MEM_PFN_MASK_4GB)) -+ start_pfn = end_pfn & ~BASE_MEM_PFN_MASK_4GB; -+ } ++ mutex_unlock(&js_devdata->runpool_mutex); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + -+ if ((start_pfn >= reg->start_pfn) && -+ (start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) && -+ ((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) { -+ *out_start_pfn = start_pfn; -+ return reg; ++ /* Take lock before accessing list again */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + } -+ } -+ } -+ -+ return NULL; -+} -+ -+/** -+ * kbase_remove_va_region - Remove a region object from the global list. -+ * -+ * @kbdev: The kbase device -+ * @reg: Region object to remove -+ * -+ * The region reg is removed, possibly by merging with other free and -+ * compatible adjacent regions. It must be called with the context -+ * region lock held. The associated memory is not released (see -+ * kbase_free_alloced_region). Internal use only. -+ */ -+void kbase_remove_va_region(struct kbase_device *kbdev, -+ struct kbase_va_region *reg) -+{ -+ struct rb_node *rbprev; -+ struct kbase_va_region *prev = NULL; -+ struct rb_node *rbnext; -+ struct kbase_va_region *next = NULL; -+ struct rb_root *reg_rbtree = NULL; -+ struct kbase_va_region *orig_reg = reg; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++#else ++ bool timer_sync = false; + -+ int merged_front = 0; -+ int merged_back = 0; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ reg_rbtree = reg->rbtree; ++ list_for_each_entry_safe(kctx, n, ++ &kbdev->js_data.ctx_list_unpullable[js][prio], ++ jctx.sched_info.ctx.ctx_list_entry[js]) { + -+ if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree))) -+ return; ++ if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && ++ kbase_js_ctx_pullable(kctx, js, false)) ++ timer_sync |= ++ kbase_js_ctx_list_add_pullable_nolock( ++ kbdev, kctx, js); ++ } + -+ /* Try to merge with the previous block first */ -+ rbprev = rb_prev(&(reg->rblink)); -+ if (rbprev) { -+ prev = rb_entry(rbprev, struct kbase_va_region, rblink); -+ if (prev->flags & KBASE_REG_FREE) { -+ /* We're compatible with the previous VMA, merge with -+ * it, handling any gaps for robustness. -+ */ -+ u64 prev_end_pfn = prev->start_pfn + prev->nr_pages; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) != -+ (reg->flags & KBASE_REG_ZONE_MASK)); -+ if (!WARN_ON(reg->start_pfn < prev_end_pfn)) -+ prev->nr_pages += reg->start_pfn - prev_end_pfn; -+ prev->nr_pages += reg->nr_pages; -+ rb_erase(&(reg->rblink), reg_rbtree); -+ reg = prev; -+ merged_front = 1; ++ if (timer_sync) { ++ mutex_lock(&js_devdata->runpool_mutex); ++ kbase_backend_ctx_count_changed(kbdev); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ } ++#endif + } + } ++ mutex_unlock(&js_devdata->queue_mutex); + -+ /* Try to merge with the next block second */ -+ /* Note we do the lookup here as the tree may have been rebalanced. */ -+ rbnext = rb_next(&(reg->rblink)); -+ if (rbnext) { -+ next = rb_entry(rbnext, struct kbase_va_region, rblink); -+ if (next->flags & KBASE_REG_FREE) { -+ /* We're compatible with the next VMA, merge with it, -+ * handling any gaps for robustness. -+ */ -+ u64 reg_end_pfn = reg->start_pfn + reg->nr_pages; ++ /* Restart atom processing */ ++ kbase_js_sched_all(kbdev); + -+ WARN_ON((next->flags & KBASE_REG_ZONE_MASK) != -+ (reg->flags & KBASE_REG_ZONE_MASK)); -+ if (!WARN_ON(next->start_pfn < reg_end_pfn)) -+ next->nr_pages += next->start_pfn - reg_end_pfn; -+ next->start_pfn = reg->start_pfn; -+ next->nr_pages += reg->nr_pages; -+ rb_erase(&(reg->rblink), reg_rbtree); -+ merged_back = 1; -+ } -+ } ++ /* JS Resume complete */ ++} + -+ if (merged_front && merged_back) { -+ /* We already merged with prev, free it */ -+ kfree(reg); -+ } else if (!(merged_front || merged_back)) { -+ /* If we failed to merge then we need to add a new block */ ++bool kbase_js_is_atom_valid(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom) ++{ ++ if ((katom->core_req & BASE_JD_REQ_FS) && ++ (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | ++ BASE_JD_REQ_T))) ++ return false; + -+ /* -+ * We didn't merge anything. Try to add a new free -+ * placeholder, and in any case, remove the original one. -+ */ -+ struct kbase_va_region *free_reg; ++ if ((katom->core_req & BASE_JD_REQ_JOB_SLOT) && ++ (katom->jobslot >= BASE_JM_MAX_NR_SLOTS)) ++ return false; + -+ free_reg = kbase_alloc_free_region(kbdev, reg_rbtree, reg->start_pfn, reg->nr_pages, -+ reg->flags & KBASE_REG_ZONE_MASK); -+ if (!free_reg) { -+ /* In case of failure, we cannot allocate a replacement -+ * free region, so we will be left with a 'gap' in the -+ * region tracker's address range (though, the rbtree -+ * will itself still be correct after erasing -+ * 'reg'). -+ * -+ * The gap will be rectified when an adjacent region is -+ * removed by one of the above merging paths. Other -+ * paths will gracefully fail to allocate if they try -+ * to allocate in the gap. -+ * -+ * There is nothing that the caller can do, since free -+ * paths must not fail. The existing 'reg' cannot be -+ * repurposed as the free region as callers must have -+ * freedom of use with it by virtue of it being owned -+ * by them, not the region tracker insert/remove code. -+ */ -+ dev_warn( -+ kbdev->dev, -+ "Could not alloc a replacement free region for 0x%.16llx..0x%.16llx", -+ (unsigned long long)reg->start_pfn << PAGE_SHIFT, -+ (unsigned long long)(reg->start_pfn + reg->nr_pages) << PAGE_SHIFT); -+ rb_erase(&(reg->rblink), reg_rbtree); ++ return true; ++} + -+ goto out; -+ } -+ rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree); -+ } ++static unsigned int kbase_js_get_slot(struct kbase_device *kbdev, struct kbase_jd_atom *katom) ++{ ++ if (katom->core_req & BASE_JD_REQ_JOB_SLOT) ++ return katom->jobslot; + -+ /* This operation is always safe because the function never frees -+ * the region. If the region has been merged to both front and back, -+ * then it's the previous region that is supposed to be freed. -+ */ -+ orig_reg->start_pfn = 0; ++ if (katom->core_req & BASE_JD_REQ_FS) ++ return 0; + -+out: -+ return; -+} ++ if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { ++ if (katom->device_nr == 1 && ++ kbdev->gpu_props.num_core_groups == 2) ++ return 2; ++ } + -+KBASE_EXPORT_TEST_API(kbase_remove_va_region); ++ return 1; ++} + -+/** -+ * kbase_insert_va_region_nolock - Insert a VA region to the list, -+ * replacing the existing one. -+ * -+ * @kbdev: The kbase device -+ * @new_reg: The new region to insert -+ * @at_reg: The region to replace -+ * @start_pfn: The Page Frame Number to insert at -+ * @nr_pages: The number of pages of the region -+ * -+ * Return: 0 on success, error code otherwise. -+ */ -+static int kbase_insert_va_region_nolock(struct kbase_device *kbdev, -+ struct kbase_va_region *new_reg, -+ struct kbase_va_region *at_reg, u64 start_pfn, -+ size_t nr_pages) ++bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, ++ struct kbase_jd_atom *katom) +{ -+ struct rb_root *reg_rbtree = NULL; -+ int err = 0; ++ bool enqueue_required, add_required = true; + -+ reg_rbtree = at_reg->rbtree; ++ katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom); + -+ /* Must be a free region */ -+ KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0); -+ /* start_pfn should be contained within at_reg */ -+ KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages)); -+ /* at least nr_pages from start_pfn should be contained within at_reg */ -+ KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages); -+ /* having at_reg means the rb_tree should not be empty */ -+ if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree))) -+ return -ENOMEM; ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ lockdep_assert_held(&kctx->jctx.lock); + -+ new_reg->start_pfn = start_pfn; -+ new_reg->nr_pages = nr_pages; ++ /* If slot will transition from unpullable to pullable then add to ++ * pullable list ++ */ ++ if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) ++ enqueue_required = true; ++ else ++ enqueue_required = false; + -+ /* Regions are a whole use, so swap and delete old one. */ -+ if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) { -+ rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), -+ reg_rbtree); -+ kfree(at_reg); -+ } -+ /* New region replaces the start of the old one, so insert before. */ -+ else if (at_reg->start_pfn == start_pfn) { -+ at_reg->start_pfn += nr_pages; -+ KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages); -+ at_reg->nr_pages -= nr_pages; ++ if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) || ++ (katom->pre_dep && (katom->pre_dep->atom_flags & ++ KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { ++ int prio = katom->sched_priority; ++ unsigned int js = katom->slot_nr; ++ struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + -+ kbase_region_tracker_insert(new_reg); -+ } -+ /* New region replaces the end of the old one, so insert after. */ -+ else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) { -+ at_reg->nr_pages -= nr_pages; ++ dev_dbg(kctx->kbdev->dev, "Add atom %pK to X_DEP list (s:%u)\n", (void *)katom, js); + -+ kbase_region_tracker_insert(new_reg); ++ list_add_tail(&katom->queue, &queue->x_dep_head); ++ katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; ++ if (kbase_js_atom_blocked_on_x_dep(katom)) { ++ enqueue_required = false; ++ add_required = false; ++ } ++ } else { ++ dev_dbg(kctx->kbdev->dev, "Atom %pK not added to X_DEP list\n", ++ (void *)katom); + } -+ /* New region splits the old one, so insert and create new */ -+ else { -+ struct kbase_va_region *new_front_reg; -+ -+ new_front_reg = kbase_alloc_free_region(kbdev, reg_rbtree, at_reg->start_pfn, -+ start_pfn - at_reg->start_pfn, -+ at_reg->flags & KBASE_REG_ZONE_MASK); + -+ if (new_front_reg) { -+ at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages; -+ at_reg->start_pfn = start_pfn + nr_pages; ++ if (add_required) { ++ /* Check if there are lower priority jobs to soft stop */ ++ kbase_job_slot_ctx_priority_check_locked(kctx, katom); + -+ kbase_region_tracker_insert(new_front_reg); -+ kbase_region_tracker_insert(new_reg); -+ } else { -+ err = -ENOMEM; -+ } ++ /* Add atom to ring buffer. */ ++ jsctx_tree_add(kctx, katom); ++ katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; + } + -+ return err; ++ dev_dbg(kctx->kbdev->dev, ++ "Enqueue of kctx %pK is %srequired to submit atom %pK\n", ++ kctx, enqueue_required ? "" : "not ", katom); ++ ++ return enqueue_required; +} + +/** -+ * kbase_add_va_region - Add a VA region to the region list for a context. -+ * -+ * @kctx: kbase context containing the region -+ * @reg: the region to add -+ * @addr: the address to insert the region at -+ * @nr_pages: the number of pages in the region -+ * @align: the minimum alignment in pages ++ * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the ++ * runnable_tree, ready for execution ++ * @katom: Atom to submit + * -+ * Return: 0 on success, error code otherwise. ++ * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set, ++ * but is still present in the x_dep list. If @katom has a same-slot dependent ++ * atom then that atom (and any dependents) will also be moved. + */ -+int kbase_add_va_region(struct kbase_context *kctx, -+ struct kbase_va_region *reg, u64 addr, -+ size_t nr_pages, size_t align) ++static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) +{ -+ int err = 0; -+ struct kbase_device *kbdev = kctx->kbdev; -+ int cpu_va_bits = kbase_get_num_cpu_va_bits(kctx); -+ int gpu_pc_bits = -+ kbdev->gpu_props.props.core_props.log2_program_counter_size; -+ -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(reg != NULL); ++ struct kbase_context *const kctx = katom->kctx; + -+ lockdep_assert_held(&kctx->reg_lock); ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ /* The executable allocation from the SAME_VA zone should already have an -+ * appropriately aligned GPU VA chosen for it. -+ * Also, executable allocations from EXEC_VA don't need the special -+ * alignment. -+ */ -+#if MALI_USE_CSF -+ /* The same is also true for the EXEC_FIXED_VA zone. -+ */ -+#endif -+ if (!(reg->flags & KBASE_REG_GPU_NX) && !addr && -+#if MALI_USE_CSF -+ ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_FIXED_VA) && -+#endif -+ ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_VA)) { -+ if (cpu_va_bits > gpu_pc_bits) { -+ align = max(align, (size_t)((1ULL << gpu_pc_bits) -+ >> PAGE_SHIFT)); -+ } -+ } ++ while (katom) { ++ WARN_ON(!(katom->atom_flags & ++ KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); + -+ do { -+ err = kbase_add_va_region_rbtree(kbdev, reg, addr, nr_pages, -+ align); -+ if (err != -ENOMEM) -+ break; ++ if (!kbase_js_atom_blocked_on_x_dep(katom)) { ++ dev_dbg(kctx->kbdev->dev, ++ "Del atom %pK from X_DEP list in js_move_to_tree\n", ++ (void *)katom); + -+ /* -+ * If the allocation is not from the same zone as JIT -+ * then don't retry, we're out of VA and there is -+ * nothing which can be done about it. -+ */ -+ if ((reg->flags & KBASE_REG_ZONE_MASK) != -+ KBASE_REG_ZONE_CUSTOM_VA) ++ list_del(&katom->queue); ++ katom->atom_flags &= ++ ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; ++ /* For incremental rendering, an end-of-renderpass atom ++ * may have had its dependency on start-of-renderpass ++ * ignored and may therefore already be in the tree. ++ */ ++ if (!(katom->atom_flags & ++ KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { ++ jsctx_tree_add(kctx, katom); ++ katom->atom_flags |= ++ KBASE_KATOM_FLAG_JSCTX_IN_TREE; ++ } ++ } else { ++ dev_dbg(kctx->kbdev->dev, ++ "Atom %pK blocked on x-dep in js_move_to_tree\n", ++ (void *)katom); + break; -+ } while (kbase_jit_evict(kctx)); ++ } + -+ return err; ++ katom = katom->post_dep; ++ } +} + -+KBASE_EXPORT_TEST_API(kbase_add_va_region); + +/** -+ * kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree ++ * kbase_js_evict_deps - Evict dependencies of a failed atom. ++ * @kctx: Context pointer ++ * @katom: Pointer to the atom that has failed. ++ * @js: The job slot the katom was run on. ++ * @prio: Priority of the katom. + * -+ * @kbdev: The kbase device -+ * @reg: The region to add -+ * @addr: The address to add the region at, or 0 to map at any available address -+ * @nr_pages: The size of the region in pages -+ * @align: The minimum alignment in pages ++ * Remove all post dependencies of an atom from the context ringbuffers. + * -+ * Insert a region into the rbtree that was specified when the region was -+ * created. If addr is 0 a free area in the rbtree is used, otherwise the -+ * specified address is used. ++ * The original atom's event_code will be propogated to all dependent atoms. + * -+ * Return: 0 on success, error code otherwise. ++ * Context: Caller must hold the HW access lock + */ -+int kbase_add_va_region_rbtree(struct kbase_device *kbdev, -+ struct kbase_va_region *reg, -+ u64 addr, size_t nr_pages, size_t align) ++static void kbase_js_evict_deps(struct kbase_context *kctx, struct kbase_jd_atom *katom, ++ unsigned int js, int prio) +{ -+ struct device *const dev = kbdev->dev; -+ struct rb_root *rbtree = NULL; -+ struct kbase_va_region *tmp; -+ u64 gpu_pfn = addr >> PAGE_SHIFT; -+ int err = 0; -+ -+ rbtree = reg->rbtree; ++ struct kbase_jd_atom *x_dep = katom->x_post_dep; ++ struct kbase_jd_atom *next_katom = katom->post_dep; + -+ if (!align) -+ align = 1; ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ /* must be a power of 2 */ -+ KBASE_DEBUG_ASSERT(is_power_of_2(align)); -+ KBASE_DEBUG_ASSERT(nr_pages > 0); ++ if (next_katom) { ++ KBASE_DEBUG_ASSERT(next_katom->status != ++ KBASE_JD_ATOM_STATE_HW_COMPLETED); ++ next_katom->will_fail_event_code = katom->event_code; + -+ /* Path 1: Map a specific address. Find the enclosing region, -+ * which *must* be free. -+ */ -+ if (gpu_pfn) { -+ KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1))); ++ } + -+ tmp = find_region_enclosing_range_rbtree(rbtree, gpu_pfn, -+ nr_pages); -+ if (kbase_is_region_invalid(tmp)) { -+ dev_warn(dev, "Enclosing region not found or invalid: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages); -+ err = -ENOMEM; -+ goto exit; -+ } else if (!kbase_is_region_free(tmp)) { -+ dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", -+ tmp->start_pfn, tmp->flags, -+ tmp->nr_pages, gpu_pfn, nr_pages); -+ err = -ENOMEM; -+ goto exit; -+ } ++ /* Has cross slot depenency. */ ++ if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE | ++ KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { ++ /* Remove dependency.*/ ++ x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + -+ err = kbase_insert_va_region_nolock(kbdev, reg, tmp, gpu_pfn, nr_pages); -+ if (err) { -+ dev_warn(dev, "Failed to insert va region"); -+ err = -ENOMEM; -+ } -+ } else { -+ /* Path 2: Map any free address which meets the requirements. */ -+ u64 start_pfn; -+ size_t align_offset = align; -+ size_t align_mask = align - 1; ++ dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %pK\n", ++ (void *)x_dep); + -+#if !MALI_USE_CSF -+ if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) { -+ WARN(align > 1, "%s with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory", -+ __func__, -+ (unsigned long)align); -+ align_mask = reg->extension - 1; -+ align_offset = reg->extension - reg->initial_commit; -+ } -+#endif /* !MALI_USE_CSF */ ++ /* Fail if it had a data dependency. */ ++ if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) ++ x_dep->will_fail_event_code = katom->event_code; + -+ tmp = kbase_region_tracker_find_region_meeting_reqs(reg, -+ nr_pages, align_offset, align_mask, -+ &start_pfn); -+ if (tmp) { -+ err = kbase_insert_va_region_nolock(kbdev, reg, tmp, start_pfn, nr_pages); -+ if (unlikely(err)) { -+ dev_warn(dev, "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages", -+ start_pfn, nr_pages); -+ } -+ } else { -+ dev_dbg(dev, "Failed to find a suitable region: %zu nr_pages, %zu align_offset, %zu align_mask\n", -+ nr_pages, align_offset, align_mask); -+ err = -ENOMEM; -+ } ++ if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) ++ kbase_js_move_to_tree(x_dep); + } -+ -+exit: -+ return err; +} + -+/* -+ * @brief Initialize the internal region tracker data structure. -+ */ -+#if MALI_USE_CSF -+static void kbase_region_tracker_ds_init(struct kbase_context *kctx, -+ struct kbase_va_region *same_va_reg, -+ struct kbase_va_region *custom_va_reg, -+ struct kbase_va_region *exec_va_reg, -+ struct kbase_va_region *exec_fixed_va_reg, -+ struct kbase_va_region *fixed_va_reg) ++struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js) +{ -+ u64 last_zone_end_pfn; ++ struct kbase_jd_atom *katom; ++ struct kbasep_js_device_data *js_devdata; ++ struct kbase_device *kbdev; ++ int pulled; + -+ kctx->reg_rbtree_same = RB_ROOT; -+ kbase_region_tracker_insert(same_va_reg); ++ KBASE_DEBUG_ASSERT(kctx); + -+ last_zone_end_pfn = same_va_reg->start_pfn + same_va_reg->nr_pages; ++ kbdev = kctx->kbdev; ++ dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %pK (s:%u)\n", (void *)kctx, js); + -+ /* Although custom_va_reg doesn't always exist, initialize -+ * unconditionally because of the mem_view debugfs -+ * implementation which relies on it being empty. -+ */ -+ kctx->reg_rbtree_custom = RB_ROOT; -+ kctx->reg_rbtree_exec = RB_ROOT; ++ js_devdata = &kbdev->js_data; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (custom_va_reg) { -+ WARN_ON(custom_va_reg->start_pfn < last_zone_end_pfn); -+ kbase_region_tracker_insert(custom_va_reg); -+ last_zone_end_pfn = custom_va_reg->start_pfn + custom_va_reg->nr_pages; ++ if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { ++ dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n", ++ (void *)kctx); ++ return NULL; + } ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) ++#else ++ if (kbase_pm_is_suspending(kbdev)) ++#endif ++ return NULL; + -+ /* Initialize exec, fixed and exec_fixed. These are always -+ * initialized at this stage, if they will exist at all. -+ */ -+ kctx->reg_rbtree_fixed = RB_ROOT; -+ kctx->reg_rbtree_exec_fixed = RB_ROOT; -+ -+ if (exec_va_reg) { -+ WARN_ON(exec_va_reg->start_pfn < last_zone_end_pfn); -+ kbase_region_tracker_insert(exec_va_reg); -+ last_zone_end_pfn = exec_va_reg->start_pfn + exec_va_reg->nr_pages; ++ katom = jsctx_rb_peek(kctx, js); ++ if (!katom) { ++ dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%u)\n", (void *)kctx, js); ++ return NULL; + } -+ -+ if (exec_fixed_va_reg) { -+ WARN_ON(exec_fixed_va_reg->start_pfn < last_zone_end_pfn); -+ kbase_region_tracker_insert(exec_fixed_va_reg); -+ last_zone_end_pfn = exec_fixed_va_reg->start_pfn + exec_fixed_va_reg->nr_pages; ++ if (kbase_jsctx_slot_prio_is_blocked(kctx, js, katom->sched_priority)) { ++ dev_dbg(kbdev->dev, ++ "JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%u)\n", ++ (void *)kctx, katom->sched_priority, js); ++ return NULL; + } -+ -+ if (fixed_va_reg) { -+ WARN_ON(fixed_va_reg->start_pfn < last_zone_end_pfn); -+ kbase_region_tracker_insert(fixed_va_reg); -+ last_zone_end_pfn = fixed_va_reg->start_pfn + fixed_va_reg->nr_pages; ++ if (atomic_read(&katom->blocked)) { ++ dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_pull\n", ++ (void *)katom); ++ return NULL; + } -+} -+#else -+static void kbase_region_tracker_ds_init(struct kbase_context *kctx, -+ struct kbase_va_region *same_va_reg, -+ struct kbase_va_region *custom_va_reg) -+{ -+ kctx->reg_rbtree_same = RB_ROOT; -+ kbase_region_tracker_insert(same_va_reg); + -+ /* Although custom_va_reg and exec_va_reg don't always exist, -+ * initialize unconditionally because of the mem_view debugfs -+ * implementation which relies on them being empty. -+ * -+ * The difference between the two is that the EXEC_VA region -+ * is never initialized at this stage. ++ /* Due to ordering restrictions when unpulling atoms on failure, we do ++ * not allow multiple runs of fail-dep atoms from the same context to be ++ * present on the same slot + */ -+ kctx->reg_rbtree_custom = RB_ROOT; -+ kctx->reg_rbtree_exec = RB_ROOT; -+ -+ if (custom_va_reg) -+ kbase_region_tracker_insert(custom_va_reg); -+} -+#endif /* MALI_USE_CSF */ ++ if (katom->pre_dep && kbase_jsctx_slot_atoms_pulled(kctx, js)) { ++ struct kbase_jd_atom *prev_atom = ++ kbase_backend_inspect_tail(kbdev, js); + -+static struct kbase_context *kbase_reg_flags_to_kctx(struct kbase_va_region *reg) -+{ -+ struct kbase_context *kctx = NULL; -+ struct rb_root *rbtree = reg->rbtree; ++ if (prev_atom && prev_atom->kctx != kctx) ++ return NULL; ++ } + -+ switch (reg->flags & KBASE_REG_ZONE_MASK) { -+ case KBASE_REG_ZONE_CUSTOM_VA: -+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_custom); -+ break; -+ case KBASE_REG_ZONE_SAME_VA: -+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_same); -+ break; -+ case KBASE_REG_ZONE_EXEC_VA: -+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec); -+ break; -+#if MALI_USE_CSF -+ case KBASE_REG_ZONE_EXEC_FIXED_VA: -+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed); -+ break; -+ case KBASE_REG_ZONE_FIXED_VA: -+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed); -+ break; -+ case KBASE_REG_ZONE_MCU_SHARED: -+ /* This is only expected to be called on driver unload. */ -+ break; -+#endif -+ default: -+ WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); -+ break; ++ if (kbase_js_atom_blocked_on_x_dep(katom)) { ++ if (katom->x_pre_dep->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || ++ katom->x_pre_dep->will_fail_event_code) { ++ dev_dbg(kbdev->dev, ++ "JS: X pre-dep %pK is not present in slot FIFO or will fail\n", ++ (void *)katom->x_pre_dep); ++ return NULL; ++ } ++ if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && ++ kbase_backend_nr_atoms_on_slot(kbdev, js)) { ++ dev_dbg(kbdev->dev, ++ "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%u)\n", ++ (void *)katom, js); ++ return NULL; ++ } + } + -+ return kctx; -+} ++ KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JS_PULL_JOB, kctx, katom, ++ katom->jc, js, katom->sched_priority); ++ kbase_ctx_flag_set(kctx, KCTX_PULLED); ++ kbase_ctx_flag_set(kctx, (KCTX_PULLED_SINCE_ACTIVE_JS0 << js)); + -+static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) -+{ -+ struct rb_node *rbnode; -+ struct kbase_va_region *reg; ++ pulled = kbase_jsctx_slot_atom_pulled_inc(kctx, katom); ++ if (pulled == 1 && !kctx->slots_pullable) { ++ WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); ++ kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); ++ atomic_inc(&kbdev->js_data.nr_contexts_runnable); ++ } ++ jsctx_rb_pull(kctx, katom); + -+ do { -+ rbnode = rb_first(rbtree); -+ if (rbnode) { -+ rb_erase(rbnode, rbtree); -+ reg = rb_entry(rbnode, struct kbase_va_region, rblink); -+ WARN_ON(kbase_refcount_read(®->va_refcnt) != 1); -+ if (kbase_page_migration_enabled) -+ kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg); -+ /* Reset the start_pfn - as the rbtree is being -+ * destroyed and we've already erased this region, there -+ * is no further need to attempt to remove it. -+ * This won't affect the cleanup if the region was -+ * being used as a sticky resource as the cleanup -+ * related to sticky resources anyways need to be -+ * performed before the term of region tracker. -+ */ -+ reg->start_pfn = 0; -+ kbase_free_alloced_region(reg); -+ } -+ } while (rbnode); -+} ++ kbase_ctx_sched_retain_ctx_refcount(kctx); + -+void kbase_region_tracker_term(struct kbase_context *kctx) -+{ -+ WARN(kctx->as_nr != KBASEP_AS_NR_INVALID, -+ "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions", -+ kctx->tgid, kctx->id); ++ katom->ticks = 0; + -+ kbase_gpu_vm_lock(kctx); -+ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); -+ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); -+ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); -+#if MALI_USE_CSF -+ WARN_ON(!list_empty(&kctx->csf.event_pages_head)); -+ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec_fixed); -+ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_fixed); ++ dev_dbg(kbdev->dev, "JS: successfully pulled atom %pK from kctx %pK (s:%u)\n", ++ (void *)katom, (void *)kctx, js); + -+#endif -+ kbase_gpu_vm_unlock(kctx); ++ return katom; +} + -+void kbase_region_tracker_term_rbtree(struct rb_root *rbtree) ++/** ++ * js_return_of_start_rp() - Handle soft-stop of an atom that starts a ++ * renderpass ++ * @start_katom: Pointer to the start-of-renderpass atom that was soft-stopped ++ * ++ * This function is called to switch to incremental rendering if the tiler job ++ * chain at the start of a renderpass has used too much memory. It prevents the ++ * tiler job being pulled for execution in the job scheduler again until the ++ * next phase of incremental rendering is complete. ++ * ++ * If the end-of-renderpass atom is already in the job scheduler (because a ++ * previous attempt at tiling used too much memory during the same renderpass) ++ * then it is unblocked; otherwise, it is run by handing it to the scheduler. ++ */ ++static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) +{ -+ kbase_region_tracker_erase_rbtree(rbtree); -+} ++ struct kbase_context *const kctx = start_katom->kctx; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ struct kbase_jd_renderpass *rp; ++ struct kbase_jd_atom *end_katom; ++ unsigned long flags; + -+static size_t kbase_get_same_va_bits(struct kbase_context *kctx) -+{ -+ return min_t(size_t, kbase_get_num_cpu_va_bits(kctx), -+ kctx->kbdev->gpu_props.mmu.va_bits); -+} ++ lockdep_assert_held(&kctx->jctx.lock); + -+int kbase_region_tracker_init(struct kbase_context *kctx) -+{ -+ struct kbase_va_region *same_va_reg; -+ struct kbase_va_region *custom_va_reg = NULL; -+ size_t same_va_bits = kbase_get_same_va_bits(kctx); -+ u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; -+ u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits; -+ u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT; -+ u64 same_va_pages; -+ u64 same_va_base = 1u; -+ int err; -+#if MALI_USE_CSF -+ struct kbase_va_region *exec_va_reg; -+ struct kbase_va_region *exec_fixed_va_reg; -+ struct kbase_va_region *fixed_va_reg; ++ if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) ++ return; + -+ u64 exec_va_base; -+ u64 fixed_va_end; -+ u64 exec_fixed_va_base; -+ u64 fixed_va_base; -+ u64 fixed_va_pages; -+#endif ++ compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= ++ ARRAY_SIZE(kctx->jctx.renderpasses), ++ "Should check invalid access to renderpasses"); + -+ /* Take the lock as kbase_free_alloced_region requires it */ -+ kbase_gpu_vm_lock(kctx); ++ rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; + -+ same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - same_va_base; ++ if (WARN_ON(rp->start_katom != start_katom)) ++ return; + -+#if MALI_USE_CSF -+ if ((same_va_base + same_va_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) { -+ /* Depending on how the kernel is configured, it's possible (eg on aarch64) for -+ * same_va_bits to reach 48 bits. Cap same_va_pages so that the same_va zone -+ * doesn't cross into the exec_va zone. -+ */ -+ same_va_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - same_va_base; -+ } -+#endif ++ dev_dbg(kctx->kbdev->dev, ++ "JS return start atom %pK in state %d of RP %d\n", ++ (void *)start_katom, (int)rp->state, ++ start_katom->renderpass_id); + -+ /* all have SAME_VA */ -+ same_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, same_va_base, -+ same_va_pages, KBASE_REG_ZONE_SAME_VA); ++ if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) ++ return; + -+ if (!same_va_reg) { -+ err = -ENOMEM; -+ goto fail_unlock; ++ /* The tiler job might have been soft-stopped for some reason other ++ * than running out of memory. ++ */ ++ if (rp->state == KBASE_JD_RP_START || rp->state == KBASE_JD_RP_RETRY) { ++ dev_dbg(kctx->kbdev->dev, ++ "JS return isn't OOM in state %d of RP %d\n", ++ (int)rp->state, start_katom->renderpass_id); ++ return; + } -+ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base, -+ same_va_pages); -+ -+ if (kbase_ctx_compat_mode(kctx)) { -+ if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { -+ err = -EINVAL; -+ goto fail_free_same_va; -+ } -+ /* If the current size of TMEM is out of range of the -+ * virtual address space addressable by the MMU then -+ * we should shrink it to fit -+ */ -+ if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit) -+ custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; -+ -+ custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, -+ KBASE_REG_ZONE_CUSTOM_VA_BASE, -+ custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); + -+ if (!custom_va_reg) { -+ err = -ENOMEM; -+ goto fail_free_same_va; -+ } -+ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, -+ KBASE_REG_ZONE_CUSTOM_VA_BASE, -+ custom_va_size); -+ } else { -+ custom_va_size = 0; -+ } ++ dev_dbg(kctx->kbdev->dev, ++ "JS return confirm OOM in state %d of RP %d\n", ++ (int)rp->state, start_katom->renderpass_id); + -+#if MALI_USE_CSF -+ /* The position of EXEC_VA depends on whether the client is 32-bit or 64-bit. */ -+ exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_64; ++ if (WARN_ON(rp->state != KBASE_JD_RP_PEND_OOM && ++ rp->state != KBASE_JD_RP_RETRY_PEND_OOM)) ++ return; + -+ /* Similarly the end of the FIXED_VA zone also depends on whether the client -+ * is 32 or 64-bits. ++ /* Prevent the tiler job being pulled for execution in the ++ * job scheduler again. + */ -+ fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64; -+ -+ if (kbase_ctx_compat_mode(kctx)) { -+ exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_32; -+ fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32; -+ } -+ -+ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, exec_va_base, -+ KBASE_REG_ZONE_EXEC_VA_SIZE); -+ -+ exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_base, -+ KBASE_REG_ZONE_EXEC_VA_SIZE, KBASE_REG_ZONE_EXEC_VA); ++ dev_dbg(kbdev->dev, "Blocking start atom %pK\n", ++ (void *)start_katom); ++ atomic_inc(&start_katom->blocked); + -+ if (!exec_va_reg) { -+ err = -ENOMEM; -+ goto fail_free_custom_va; -+ } ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ exec_fixed_va_base = exec_va_base + KBASE_REG_ZONE_EXEC_VA_SIZE; ++ rp->state = (rp->state == KBASE_JD_RP_PEND_OOM) ? ++ KBASE_JD_RP_OOM : KBASE_JD_RP_RETRY_OOM; + -+ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA, exec_fixed_va_base, -+ KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE); ++ /* Was the fragment job chain submitted to kbase yet? */ ++ end_katom = rp->end_katom; ++ if (end_katom) { ++ dev_dbg(kctx->kbdev->dev, "JS return add end atom %pK\n", ++ (void *)end_katom); + -+ exec_fixed_va_reg = -+ kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec_fixed, -+ exec_fixed_va_base, KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE, -+ KBASE_REG_ZONE_EXEC_FIXED_VA); -+ -+ if (!exec_fixed_va_reg) { -+ err = -ENOMEM; -+ goto fail_free_exec_va; -+ } -+ -+ fixed_va_base = exec_fixed_va_base + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE; -+ fixed_va_pages = fixed_va_end - fixed_va_base; -+ -+ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_FIXED_VA, fixed_va_base, fixed_va_pages); -+ -+ fixed_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_fixed, fixed_va_base, -+ fixed_va_pages, KBASE_REG_ZONE_FIXED_VA); ++ if (rp->state == KBASE_JD_RP_RETRY_OOM) { ++ /* Allow the end of the renderpass to be pulled for ++ * execution again to continue incremental rendering. ++ */ ++ dev_dbg(kbdev->dev, "Unblocking end atom %pK\n", ++ (void *)end_katom); ++ atomic_dec(&end_katom->blocked); ++ WARN_ON(!(end_katom->atom_flags & ++ KBASE_KATOM_FLAG_JSCTX_IN_TREE)); ++ WARN_ON(end_katom->status != KBASE_JD_ATOM_STATE_IN_JS); + -+ kctx->gpu_va_end = fixed_va_end; ++ kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, ++ end_katom->slot_nr); + -+ if (!fixed_va_reg) { -+ err = -ENOMEM; -+ goto fail_free_exec_fixed_va; ++ /* Expect the fragment job chain to be scheduled without ++ * further action because this function is called when ++ * returning an atom to the job scheduler ringbuffer. ++ */ ++ end_katom = NULL; ++ } else { ++ WARN_ON(end_katom->status != ++ KBASE_JD_ATOM_STATE_QUEUED && ++ end_katom->status != KBASE_JD_ATOM_STATE_IN_JS); ++ } + } + -+ kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg, exec_va_reg, -+ exec_fixed_va_reg, fixed_va_reg); -+ -+ INIT_LIST_HEAD(&kctx->csf.event_pages_head); -+#else -+ /* EXEC_VA zone's codepaths are slightly easier when its base_pfn is -+ * initially U64_MAX -+ */ -+ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, U64_MAX, 0u); -+ /* Other zones are 0: kbase_create_context() uses vzalloc */ -+ -+ kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg); -+ kctx->gpu_va_end = same_va_base + same_va_pages + custom_va_size; -+#endif -+ kctx->jit_va = false; -+ -+ kbase_gpu_vm_unlock(kctx); -+ return 0; -+ -+#if MALI_USE_CSF -+fail_free_exec_fixed_va: -+ kbase_free_alloced_region(exec_fixed_va_reg); -+fail_free_exec_va: -+ kbase_free_alloced_region(exec_va_reg); -+fail_free_custom_va: -+ if (custom_va_reg) -+ kbase_free_alloced_region(custom_va_reg); -+#endif -+ -+fail_free_same_va: -+ kbase_free_alloced_region(same_va_reg); -+fail_unlock: -+ kbase_gpu_vm_unlock(kctx); -+ return err; -+} -+ -+static bool kbase_has_exec_va_zone_locked(struct kbase_context *kctx) -+{ -+ struct kbase_reg_zone *exec_va_zone; -+ -+ lockdep_assert_held(&kctx->reg_lock); -+ exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); -+ -+ return (exec_va_zone->base_pfn != U64_MAX); -+} -+ -+bool kbase_has_exec_va_zone(struct kbase_context *kctx) -+{ -+ bool has_exec_va_zone; -+ -+ kbase_gpu_vm_lock(kctx); -+ has_exec_va_zone = kbase_has_exec_va_zone_locked(kctx); -+ kbase_gpu_vm_unlock(kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ return has_exec_va_zone; ++ if (end_katom) ++ kbase_jd_dep_clear_locked(end_katom); +} + +/** -+ * kbase_region_tracker_has_allocs - Determine if any allocations have been made -+ * on a context's region tracker -+ * -+ * @kctx: KBase context -+ * -+ * Check the context to determine if any allocations have been made yet from -+ * any of its zones. This check should be done before resizing a zone, e.g. to -+ * make space to add a second zone. -+ * -+ * Whilst a zone without allocations can be resized whilst other zones have -+ * allocations, we still check all of @kctx 's zones anyway: this is a stronger -+ * guarantee and should be adhered to when creating new zones anyway. ++ * js_return_of_end_rp() - Handle completion of an atom that ends a renderpass ++ * @end_katom: Pointer to the end-of-renderpass atom that was completed + * -+ * Allocations from kbdev zones are not counted. ++ * This function is called to continue incremental rendering if the tiler job ++ * chain at the start of a renderpass used too much memory. It resets the ++ * mechanism for detecting excessive memory usage then allows the soft-stopped ++ * tiler job chain to be pulled for execution again. + * -+ * Return: true if any allocs exist on any zone, false otherwise ++ * The start-of-renderpass atom must already been submitted to kbase. + */ -+static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx) ++static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) +{ -+ unsigned int zone_idx; -+ -+ lockdep_assert_held(&kctx->reg_lock); ++ struct kbase_context *const kctx = end_katom->kctx; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ struct kbase_jd_renderpass *rp; ++ struct kbase_jd_atom *start_katom; ++ unsigned long flags; + -+ for (zone_idx = 0; zone_idx < KBASE_REG_ZONE_MAX; ++zone_idx) { -+ struct kbase_reg_zone *zone; -+ struct kbase_va_region *reg; -+ u64 zone_base_addr; -+ unsigned long zone_bits = KBASE_REG_ZONE(zone_idx); -+ unsigned long reg_zone; ++ lockdep_assert_held(&kctx->jctx.lock); + -+ if (!kbase_is_ctx_reg_zone(zone_bits)) -+ continue; -+ zone = kbase_ctx_reg_zone_get(kctx, zone_bits); -+ zone_base_addr = zone->base_pfn << PAGE_SHIFT; ++ if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) ++ return; + -+ reg = kbase_region_tracker_find_region_base_address( -+ kctx, zone_base_addr); ++ compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= ++ ARRAY_SIZE(kctx->jctx.renderpasses), ++ "Should check invalid access to renderpasses"); + -+ if (!zone->va_size_pages) { -+ WARN(reg, -+ "Should not have found a region that starts at 0x%.16llx for zone 0x%lx", -+ (unsigned long long)zone_base_addr, zone_bits); -+ continue; -+ } ++ rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + -+ if (WARN(!reg, -+ "There should always be a region that starts at 0x%.16llx for zone 0x%lx, couldn't find it", -+ (unsigned long long)zone_base_addr, zone_bits)) -+ return true; /* Safest return value */ ++ if (WARN_ON(rp->end_katom != end_katom)) ++ return; + -+ reg_zone = reg->flags & KBASE_REG_ZONE_MASK; -+ if (WARN(reg_zone != zone_bits, -+ "The region that starts at 0x%.16llx should be in zone 0x%lx but was found in the wrong zone 0x%lx", -+ (unsigned long long)zone_base_addr, zone_bits, -+ reg_zone)) -+ return true; /* Safest return value */ ++ dev_dbg(kctx->kbdev->dev, ++ "JS return end atom %pK in state %d of RP %d\n", ++ (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + -+ /* Unless the region is completely free, of the same size as -+ * the original zone, then it has allocs -+ */ -+ if ((!(reg->flags & KBASE_REG_FREE)) || -+ (reg->nr_pages != zone->va_size_pages)) -+ return true; -+ } ++ if (WARN_ON(rp->state != KBASE_JD_RP_OOM && ++ rp->state != KBASE_JD_RP_RETRY_OOM)) ++ return; + -+ /* All zones are the same size as originally made, so there are no -+ * allocs ++ /* Reduce the number of mapped pages in the memory regions that ++ * triggered out-of-memory last time so that we can detect excessive ++ * memory usage again. + */ -+ return false; -+} -+ -+static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, -+ u64 jit_va_pages) -+{ -+ struct kbase_va_region *same_va_reg; -+ struct kbase_reg_zone *same_va_zone; -+ u64 same_va_zone_base_addr; -+ const unsigned long same_va_zone_bits = KBASE_REG_ZONE_SAME_VA; -+ struct kbase_va_region *custom_va_reg; -+ u64 jit_va_start; ++ kbase_gpu_vm_lock(kctx); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ lockdep_assert_held(&kctx->reg_lock); ++ while (!list_empty(&rp->oom_reg_list)) { ++ struct kbase_va_region *reg = ++ list_first_entry(&rp->oom_reg_list, ++ struct kbase_va_region, link); + -+ /* -+ * Modify the same VA free region after creation. The caller has -+ * ensured that allocations haven't been made, as any allocations could -+ * cause an overlap to happen with existing same VA allocations and the -+ * custom VA zone. -+ */ -+ same_va_zone = kbase_ctx_reg_zone_get(kctx, same_va_zone_bits); -+ same_va_zone_base_addr = same_va_zone->base_pfn << PAGE_SHIFT; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ same_va_reg = kbase_region_tracker_find_region_base_address( -+ kctx, same_va_zone_base_addr); -+ if (WARN(!same_va_reg, -+ "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx", -+ (unsigned long long)same_va_zone_base_addr, same_va_zone_bits)) -+ return -ENOMEM; ++ dev_dbg(kbdev->dev, ++ "Reset backing to %zu pages for region %pK\n", ++ reg->threshold_pages, (void *)reg); + -+ /* kbase_region_tracker_has_allocs() in the caller has already ensured -+ * that all of the zones have no allocs, so no need to check that again -+ * on same_va_reg -+ */ -+ WARN_ON((!(same_va_reg->flags & KBASE_REG_FREE)) || -+ same_va_reg->nr_pages != same_va_zone->va_size_pages); ++ if (!WARN_ON(reg->flags & KBASE_REG_VA_FREED)) ++ kbase_mem_shrink(kctx, reg, reg->threshold_pages); + -+ if (same_va_reg->nr_pages < jit_va_pages || -+ same_va_zone->va_size_pages < jit_va_pages) -+ return -ENOMEM; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ dev_dbg(kbdev->dev, "Deleting region %pK from list\n", ++ (void *)reg); ++ list_del_init(®->link); ++ kbase_va_region_alloc_put(kctx, reg); ++ } + -+ /* It's safe to adjust the same VA zone now */ -+ same_va_reg->nr_pages -= jit_va_pages; -+ same_va_zone->va_size_pages -= jit_va_pages; -+ jit_va_start = kbase_reg_zone_end_pfn(same_va_zone); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ kbase_gpu_vm_unlock(kctx); + -+ /* -+ * Create a custom VA zone at the end of the VA for allocations which -+ * JIT can use so it doesn't have to allocate VA from the kernel. -+ */ -+ custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, jit_va_start, -+ jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ rp->state = KBASE_JD_RP_RETRY; ++ dev_dbg(kbdev->dev, "Changed state to %d for retry\n", rp->state); + -+ /* -+ * The context will be destroyed if we fail here so no point -+ * reverting the change we made to same_va. -+ */ -+ if (!custom_va_reg) -+ return -ENOMEM; -+ /* Since this is 64-bit, the custom zone will not have been -+ * initialized, so initialize it now ++ /* Allow the start of the renderpass to be pulled for execution again ++ * to begin/continue incremental rendering. + */ -+ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, jit_va_start, -+ jit_va_pages); ++ start_katom = rp->start_katom; ++ if (!WARN_ON(!start_katom)) { ++ dev_dbg(kbdev->dev, "Unblocking start atom %pK\n", ++ (void *)start_katom); ++ atomic_dec(&start_katom->blocked); ++ (void)kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, ++ start_katom->slot_nr); ++ } + -+ kbase_region_tracker_insert(custom_va_reg); -+ return 0; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + -+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, -+ int max_allocations, int trim_level, int group_id, -+ u64 phys_pages_limit) ++static void js_return_worker(struct work_struct *data) +{ -+ int err = 0; -+ -+ if (trim_level < 0 || trim_level > BASE_JIT_MAX_TRIM_LEVEL) -+ return -EINVAL; -+ -+ if (group_id < 0 || group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) -+ return -EINVAL; ++ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, ++ work); ++ struct kbase_context *kctx = katom->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; ++ struct kbasep_js_atom_retained_state retained_state; ++ int js = katom->slot_nr; ++ bool slot_became_unblocked; ++ bool timer_sync = false; ++ bool context_idle = false; ++ unsigned long flags; ++ base_jd_core_req core_req = katom->core_req; ++ u64 cache_jc = katom->jc; + -+ if (phys_pages_limit > jit_va_pages) -+ return -EINVAL; ++ dev_dbg(kbdev->dev, "%s for atom %pK with event code 0x%x\n", ++ __func__, (void *)katom, katom->event_code); + -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ if (phys_pages_limit != jit_va_pages) -+ kbase_ctx_flag_set(kctx, KCTX_JPL_ENABLED); -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ KBASE_KTRACE_ADD_JM(kbdev, JS_RETURN_WORKER, kctx, katom, katom->jc, 0); + -+ kbase_gpu_vm_lock(kctx); ++ if (katom->event_code != BASE_JD_EVENT_END_RP_DONE) ++ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(kbdev, katom); + -+ /* Verify that a JIT_VA zone has not been created already. */ -+ if (kctx->jit_va) { -+ err = -EINVAL; -+ goto exit_unlock; -+ } ++ kbase_backend_complete_wq(kbdev, katom); + -+ /* If in 64-bit, we always lookup the SAME_VA zone. To ensure it has no -+ * allocs, we can ensure there are no allocs anywhere. -+ * -+ * This check is also useful in 32-bit, just to make sure init of the -+ * zone is always done before any allocs. -+ */ -+ if (kbase_region_tracker_has_allocs(kctx)) { -+ err = -ENOMEM; -+ goto exit_unlock; -+ } ++ kbasep_js_atom_retained_state_copy(&retained_state, katom); + -+ if (!kbase_ctx_compat_mode(kctx)) -+ err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages); -+ /* -+ * Nothing to do for 32-bit clients, JIT uses the existing -+ * custom VA zone. -+ */ ++ mutex_lock(&js_devdata->queue_mutex); ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + -+ if (!err) { -+ kctx->jit_max_allocations = max_allocations; -+ kctx->trim_level = trim_level; -+ kctx->jit_va = true; -+ kctx->jit_group_id = group_id; -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ kctx->jit_phys_pages_limit = phys_pages_limit; -+ dev_dbg(kctx->kbdev->dev, "phys_pages_limit set to %llu\n", -+ phys_pages_limit); -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -+ } ++ if (katom->event_code != BASE_JD_EVENT_END_RP_DONE) ++ atomic_dec(&katom->blocked); + -+exit_unlock: -+ kbase_gpu_vm_unlock(kctx); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ return err; -+} ++ slot_became_unblocked = kbase_jsctx_slot_atom_pulled_dec(kctx, katom); + -+int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages) -+{ -+#if !MALI_USE_CSF -+ struct kbase_va_region *exec_va_reg; -+ struct kbase_reg_zone *exec_va_zone; -+ struct kbase_reg_zone *target_zone; -+ struct kbase_va_region *target_reg; -+ u64 target_zone_base_addr; -+ unsigned long target_zone_bits; -+ u64 exec_va_start; -+ int err; -+#endif ++ if (!kbase_jsctx_slot_atoms_pulled(kctx, js) && ++ jsctx_rb_none_to_pull(kctx, js)) ++ timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js); + -+ /* The EXEC_VA zone shall be created by making space either: -+ * - for 64-bit clients, at the end of the process's address space -+ * - for 32-bit clients, in the CUSTOM zone -+ * -+ * Firstly, verify that the number of EXEC_VA pages requested by the -+ * client is reasonable and then make sure that it is not greater than -+ * the address space itself before calculating the base address of the -+ * new zone. ++ /* If the context is now unblocked on this slot after soft-stopped ++ * atoms, then only mark it as pullable on this slot if it is not ++ * idle + */ -+ if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES) -+ return -EINVAL; ++ if (slot_became_unblocked && kbase_jsctx_atoms_pulled(kctx) && ++ kbase_js_ctx_pullable(kctx, js, true)) ++ timer_sync |= ++ kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, js); + -+#if MALI_USE_CSF -+ /* For CSF GPUs we now setup the EXEC_VA zone during initialization, -+ * so this request is a null-op. -+ */ -+ return 0; -+#else -+ kbase_gpu_vm_lock(kctx); ++ if (!kbase_jsctx_atoms_pulled(kctx)) { ++ dev_dbg(kbdev->dev, ++ "No atoms currently pulled from context %pK\n", ++ (void *)kctx); + -+ /* Verify that we've not already created a EXEC_VA zone, and that the -+ * EXEC_VA zone must come before JIT's CUSTOM_VA. -+ */ -+ if (kbase_has_exec_va_zone_locked(kctx) || kctx->jit_va) { -+ err = -EPERM; -+ goto exit_unlock; -+ } ++ if (!kctx->slots_pullable) { ++ dev_dbg(kbdev->dev, ++ "Context %pK %s counted as runnable\n", ++ (void *)kctx, ++ kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF) ? ++ "is" : "isn't"); + -+ if (exec_va_pages > kctx->gpu_va_end) { -+ err = -ENOMEM; -+ goto exit_unlock; -+ } ++ WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); ++ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); ++ atomic_dec(&kbdev->js_data.nr_contexts_runnable); ++ timer_sync = true; ++ } + -+ /* Verify no allocations have already been made */ -+ if (kbase_region_tracker_has_allocs(kctx)) { -+ err = -ENOMEM; -+ goto exit_unlock; -+ } ++ if (kctx->as_nr != KBASEP_AS_NR_INVALID && ++ !kbase_ctx_flag(kctx, KCTX_DYING)) { ++ int num_slots = kbdev->gpu_props.num_job_slots; ++ int slot; + -+ if (kbase_ctx_compat_mode(kctx)) { -+ /* 32-bit client: take from CUSTOM_VA zone */ -+ target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA; -+ } else { -+ /* 64-bit client: take from SAME_VA zone */ -+ target_zone_bits = KBASE_REG_ZONE_SAME_VA; -+ } ++ if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) ++ kbasep_js_set_submit_allowed(js_devdata, kctx); + -+ target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits); -+ target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT; ++ for (slot = 0; slot < num_slots; slot++) { ++ if (kbase_js_ctx_pullable(kctx, slot, true)) ++ timer_sync |= ++ kbase_js_ctx_list_add_pullable_nolock( ++ kbdev, kctx, slot); ++ } ++ } + -+ target_reg = kbase_region_tracker_find_region_base_address( -+ kctx, target_zone_base_addr); -+ if (WARN(!target_reg, -+ "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx", -+ (unsigned long long)target_zone_base_addr, target_zone_bits)) { -+ err = -ENOMEM; -+ goto exit_unlock; -+ } -+ /* kbase_region_tracker_has_allocs() above has already ensured that all -+ * of the zones have no allocs, so no need to check that again on -+ * target_reg -+ */ -+ WARN_ON((!(target_reg->flags & KBASE_REG_FREE)) || -+ target_reg->nr_pages != target_zone->va_size_pages); ++ kbase_jm_idle_ctx(kbdev, kctx); + -+ if (target_reg->nr_pages <= exec_va_pages || -+ target_zone->va_size_pages <= exec_va_pages) { -+ err = -ENOMEM; -+ goto exit_unlock; ++ context_idle = true; + } + -+ /* Taken from the end of the target zone */ -+ exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_start, -+ exec_va_pages, KBASE_REG_ZONE_EXEC_VA); -+ if (!exec_va_reg) { -+ err = -ENOMEM; -+ goto exit_unlock; ++ if (context_idle) { ++ dev_dbg(kbdev->dev, ++ "Context %pK %s counted as active\n", ++ (void *)kctx, ++ kbase_ctx_flag(kctx, KCTX_ACTIVE) ? ++ "is" : "isn't"); ++ WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); ++ kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); ++ kbase_pm_context_idle(kbdev); + } -+ /* Update EXEC_VA zone -+ * -+ * not using kbase_ctx_reg_zone_init() - it was already initialized -+ */ -+ exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); -+ exec_va_zone->base_pfn = exec_va_start; -+ exec_va_zone->va_size_pages = exec_va_pages; + -+ /* Update target zone and corresponding region */ -+ target_reg->nr_pages -= exec_va_pages; -+ target_zone->va_size_pages -= exec_va_pages; ++ if (timer_sync) ++ kbase_js_sync_timers(kbdev); + -+ kbase_region_tracker_insert(exec_va_reg); -+ err = 0; ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_unlock(&js_devdata->queue_mutex); + -+exit_unlock: -+ kbase_gpu_vm_unlock(kctx); -+ return err; -+#endif /* MALI_USE_CSF */ -+} ++ if (katom->core_req & BASE_JD_REQ_START_RENDERPASS) { ++ mutex_lock(&kctx->jctx.lock); ++ js_return_of_start_rp(katom); ++ mutex_unlock(&kctx->jctx.lock); ++ } else if (katom->event_code == BASE_JD_EVENT_END_RP_DONE) { ++ mutex_lock(&kctx->jctx.lock); ++ js_return_of_end_rp(katom); ++ mutex_unlock(&kctx->jctx.lock); ++ } + -+#if MALI_USE_CSF -+void kbase_mcu_shared_interface_region_tracker_term(struct kbase_device *kbdev) -+{ -+ kbase_region_tracker_term_rbtree(&kbdev->csf.shared_reg_rbtree); -+} ++ dev_dbg(kbdev->dev, "JS: retained state %s finished", ++ kbasep_js_has_atom_finished(&retained_state) ? ++ "has" : "hasn't"); + -+int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev) -+{ -+ struct kbase_va_region *shared_reg; -+ u64 shared_reg_start_pfn; -+ u64 shared_reg_size; ++ WARN_ON(kbasep_js_has_atom_finished(&retained_state)); + -+ shared_reg_start_pfn = KBASE_REG_ZONE_MCU_SHARED_BASE; -+ shared_reg_size = KBASE_REG_ZONE_MCU_SHARED_SIZE; ++ kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, ++ &retained_state); + -+ kbdev->csf.shared_reg_rbtree = RB_ROOT; ++ kbase_js_sched_all(kbdev); + -+ shared_reg = -+ kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, shared_reg_start_pfn, -+ shared_reg_size, KBASE_REG_ZONE_MCU_SHARED); -+ if (!shared_reg) -+ return -ENOMEM; ++ kbase_backend_complete_wq_post_sched(kbdev, core_req); + -+ kbase_region_tracker_insert(shared_reg); -+ return 0; -+} -+#endif ++ KBASE_KTRACE_ADD_JM(kbdev, JS_RETURN_WORKER_END, kctx, NULL, cache_jc, ++ 0); + -+static void kbasep_mem_page_size_init(struct kbase_device *kbdev) -+{ -+#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) -+#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) -+ kbdev->pagesize_2mb = true; -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC) != 1) { -+ dev_warn( -+ kbdev->dev, -+ "2MB page is enabled by force while current GPU-HW doesn't meet the requirement to do so.\n"); -+ } -+#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */ -+ kbdev->pagesize_2mb = false; -+#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */ -+#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */ -+ /* Set it to the default based on which GPU is present */ -+ kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC); -+#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */ ++ dev_dbg(kbdev->dev, "Leaving %s for atom %pK\n", ++ __func__, (void *)katom); +} + -+int kbase_mem_init(struct kbase_device *kbdev) ++void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ -+ int err = 0; -+ struct kbasep_mem_device *memdev; -+ char va_region_slab_name[VA_REGION_SLAB_NAME_SIZE]; -+#if IS_ENABLED(CONFIG_OF) -+ struct device_node *mgm_node = NULL; -+#endif ++ dev_dbg(kctx->kbdev->dev, "Unpulling atom %pK in kctx %pK\n", ++ (void *)katom, (void *)kctx); + -+ KBASE_DEBUG_ASSERT(kbdev); ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ memdev = &kbdev->memdev; ++ jsctx_rb_unpull(kctx, katom); + -+ kbasep_mem_page_size_init(kbdev); ++ WARN_ON(work_pending(&katom->work)); + -+ scnprintf(va_region_slab_name, VA_REGION_SLAB_NAME_SIZE, VA_REGION_SLAB_NAME_PREFIX "%s", -+ kbdev->devname); ++ /* Block re-submission until workqueue has run */ ++ atomic_inc(&katom->blocked); + -+ /* Initialize slab cache for kbase_va_regions */ -+ kbdev->va_region_slab = -+ kmem_cache_create(va_region_slab_name, sizeof(struct kbase_va_region), 0, 0, NULL); -+ if (kbdev->va_region_slab == NULL) { -+ dev_err(kbdev->dev, "Failed to create va_region_slab\n"); -+ return -ENOMEM; -+ } ++ kbase_job_check_leave_disjoint(kctx->kbdev, katom); + -+ kbase_mem_migrate_init(kbdev); -+ kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults, -+ KBASE_MEM_POOL_MAX_SIZE_KCTX); ++ INIT_WORK(&katom->work, js_return_worker); ++ queue_work(kctx->jctx.job_done_wq, &katom->work); ++} + -+ /* Initialize memory usage */ -+ atomic_set(&memdev->used_pages, 0); ++/** ++ * js_complete_start_rp() - Handle completion of atom that starts a renderpass ++ * @kctx: Context pointer ++ * @start_katom: Pointer to the atom that completed ++ * ++ * Put any references to virtual memory regions that might have been added by ++ * kbase_job_slot_softstop_start_rp() because the tiler job chain completed ++ * despite any pending soft-stop request. ++ * ++ * If the atom that just completed was soft-stopped during a previous attempt to ++ * run it then there should be a blocked end-of-renderpass atom waiting for it, ++ * which we must unblock to process the output of the tiler job chain. ++ * ++ * Return: true if caller should call kbase_backend_ctx_count_changed() ++ */ ++static bool js_complete_start_rp(struct kbase_context *kctx, ++ struct kbase_jd_atom *const start_katom) ++{ ++ struct kbase_device *const kbdev = kctx->kbdev; ++ struct kbase_jd_renderpass *rp; ++ bool timer_sync = false; + -+ spin_lock_init(&kbdev->gpu_mem_usage_lock); -+ kbdev->total_gpu_pages = 0; -+ kbdev->process_root = RB_ROOT; -+ kbdev->dma_buf_root = RB_ROOT; -+ mutex_init(&kbdev->dma_buf_lock); ++ lockdep_assert_held(&kctx->jctx.lock); + -+#ifdef IR_THRESHOLD -+ atomic_set(&memdev->ir_threshold, IR_THRESHOLD); -+#else -+ atomic_set(&memdev->ir_threshold, DEFAULT_IR_THRESHOLD); -+#endif ++ if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) ++ return false; + -+ kbdev->mgm_dev = &kbase_native_mgm_dev; ++ compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= ++ ARRAY_SIZE(kctx->jctx.renderpasses), ++ "Should check invalid access to renderpasses"); + -+#if IS_ENABLED(CONFIG_OF) -+ /* Check to see whether or not a platform-specific memory group manager -+ * is configured and available. -+ */ -+ mgm_node = of_parse_phandle(kbdev->dev->of_node, -+ "physical-memory-group-manager", 0); -+ if (!mgm_node) { -+ dev_info(kbdev->dev, -+ "No memory group manager is configured\n"); -+ } else { -+ struct platform_device *const pdev = -+ of_find_device_by_node(mgm_node); ++ rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; + -+ if (!pdev) { -+ dev_err(kbdev->dev, -+ "The configured memory group manager was not found\n"); -+ } else { -+ kbdev->mgm_dev = platform_get_drvdata(pdev); -+ if (!kbdev->mgm_dev) { -+ dev_info(kbdev->dev, -+ "Memory group manager is not ready\n"); -+ err = -EPROBE_DEFER; -+ } else if (!try_module_get(kbdev->mgm_dev->owner)) { -+ dev_err(kbdev->dev, -+ "Failed to get memory group manger module\n"); -+ err = -ENODEV; -+ kbdev->mgm_dev = NULL; -+ } else { -+ dev_info(kbdev->dev, -+ "Memory group manager successfully loaded\n"); -+ } -+ } -+ of_node_put(mgm_node); -+ } -+#endif ++ if (WARN_ON(rp->start_katom != start_katom)) ++ return false; + -+ if (likely(!err)) { -+ struct kbase_mem_pool_group_config mem_pool_defaults; ++ dev_dbg(kctx->kbdev->dev, ++ "Start atom %pK is done in state %d of RP %d\n", ++ (void *)start_katom, (int)rp->state, ++ start_katom->renderpass_id); + -+ kbase_mem_pool_group_config_set_max_size(&mem_pool_defaults, -+ KBASE_MEM_POOL_MAX_SIZE_KBDEV); ++ if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) ++ return false; + -+ err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev, &mem_pool_defaults, NULL); -+ } ++ if (rp->state == KBASE_JD_RP_PEND_OOM || ++ rp->state == KBASE_JD_RP_RETRY_PEND_OOM) { ++ unsigned long flags; + -+ return err; -+} ++ dev_dbg(kctx->kbdev->dev, ++ "Start atom %pK completed before soft-stop\n", ++ (void *)start_katom); + -+void kbase_mem_halt(struct kbase_device *kbdev) -+{ -+ CSTD_UNUSED(kbdev); -+} ++ kbase_gpu_vm_lock(kctx); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+void kbase_mem_term(struct kbase_device *kbdev) -+{ -+ struct kbasep_mem_device *memdev; -+ int pages; ++ while (!list_empty(&rp->oom_reg_list)) { ++ struct kbase_va_region *reg = ++ list_first_entry(&rp->oom_reg_list, ++ struct kbase_va_region, link); + -+ KBASE_DEBUG_ASSERT(kbdev); ++ WARN_ON(reg->flags & KBASE_REG_VA_FREED); ++ dev_dbg(kctx->kbdev->dev, "Deleting region %pK from list\n", ++ (void *)reg); ++ list_del_init(®->link); ++ kbase_va_region_alloc_put(kctx, reg); ++ } + -+ memdev = &kbdev->memdev; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ kbase_gpu_vm_unlock(kctx); ++ } else { ++ dev_dbg(kctx->kbdev->dev, ++ "Start atom %pK did not exceed memory threshold\n", ++ (void *)start_katom); + -+ pages = atomic_read(&memdev->used_pages); -+ if (pages != 0) -+ dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); ++ WARN_ON(rp->state != KBASE_JD_RP_START && ++ rp->state != KBASE_JD_RP_RETRY); ++ } + -+ kbase_mem_pool_group_term(&kbdev->mem_pools); ++ if (rp->state == KBASE_JD_RP_RETRY || ++ rp->state == KBASE_JD_RP_RETRY_PEND_OOM) { ++ struct kbase_jd_atom *const end_katom = rp->end_katom; + -+ kbase_mem_migrate_term(kbdev); ++ if (!WARN_ON(!end_katom)) { ++ unsigned long flags; + -+ kmem_cache_destroy(kbdev->va_region_slab); -+ kbdev->va_region_slab = NULL; ++ /* Allow the end of the renderpass to be pulled for ++ * execution again to continue incremental rendering. ++ */ ++ dev_dbg(kbdev->dev, "Unblocking end atom %pK!\n", ++ (void *)end_katom); ++ atomic_dec(&end_katom->blocked); + -+ WARN_ON(kbdev->total_gpu_pages); -+ WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root)); -+ WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root)); -+ mutex_destroy(&kbdev->dma_buf_lock); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ timer_sync = kbase_js_ctx_list_add_pullable_nolock( ++ kbdev, kctx, end_katom->slot_nr); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } ++ } + -+ if (kbdev->mgm_dev) -+ module_put(kbdev->mgm_dev->owner); ++ return timer_sync; +} -+KBASE_EXPORT_TEST_API(kbase_mem_term); + +/** -+ * kbase_alloc_free_region - Allocate a free region object. -+ * -+ * @kbdev: kbase device -+ * @rbtree: Backlink to the red-black tree of memory regions. -+ * @start_pfn: The Page Frame Number in GPU virtual address space. -+ * @nr_pages: The size of the region in pages. -+ * @zone: KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA -+ * -+ * The allocated object is not part of any list yet, and is flagged as -+ * KBASE_REG_FREE. No mapping is allocated yet. -+ * -+ * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA. ++ * js_complete_end_rp() - Handle final completion of atom that ends a renderpass ++ * @kctx: Context pointer ++ * @end_katom: Pointer to the atom that completed for the last time + * -+ * Return: pointer to the allocated region object on success, NULL otherwise. ++ * This function must only be called if the renderpass actually completed ++ * without the tiler job chain at the start using too much memory; otherwise ++ * completion of the end-of-renderpass atom is handled similarly to a soft-stop. + */ -+struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree, -+ u64 start_pfn, size_t nr_pages, int zone) ++static void js_complete_end_rp(struct kbase_context *kctx, ++ struct kbase_jd_atom *const end_katom) +{ -+ struct kbase_va_region *new_reg; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ unsigned long flags; ++ struct kbase_jd_renderpass *rp; + -+ KBASE_DEBUG_ASSERT(rbtree != NULL); ++ lockdep_assert_held(&kctx->jctx.lock); + -+ /* zone argument should only contain zone related region flags */ -+ KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0); -+ KBASE_DEBUG_ASSERT(nr_pages > 0); -+ /* 64-bit address range is the max */ -+ KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE)); ++ if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) ++ return; + -+ new_reg = kmem_cache_zalloc(kbdev->va_region_slab, GFP_KERNEL); ++ compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= ++ ARRAY_SIZE(kctx->jctx.renderpasses), ++ "Should check invalid access to renderpasses"); + -+ if (!new_reg) -+ return NULL; ++ rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + -+ kbase_refcount_set(&new_reg->va_refcnt, 1); -+ atomic_set(&new_reg->no_user_free_count, 0); -+ new_reg->cpu_alloc = NULL; /* no alloc bound yet */ -+ new_reg->gpu_alloc = NULL; /* no alloc bound yet */ -+ new_reg->rbtree = rbtree; -+ new_reg->flags = zone | KBASE_REG_FREE; ++ if (WARN_ON(rp->end_katom != end_katom)) ++ return; + -+ new_reg->flags |= KBASE_REG_GROWABLE; ++ dev_dbg(kbdev->dev, "End atom %pK is done in state %d of RP %d\n", ++ (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + -+ new_reg->start_pfn = start_pfn; -+ new_reg->nr_pages = nr_pages; ++ if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE) || ++ WARN_ON(rp->state == KBASE_JD_RP_OOM) || ++ WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM)) ++ return; + -+ INIT_LIST_HEAD(&new_reg->jit_node); -+ INIT_LIST_HEAD(&new_reg->link); ++ /* Rendering completed without running out of memory. ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ WARN_ON(!list_empty(&rp->oom_reg_list)); ++ rp->state = KBASE_JD_RP_COMPLETE; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ return new_reg; ++ dev_dbg(kbdev->dev, "Renderpass %d is complete\n", ++ end_katom->renderpass_id); +} + -+KBASE_EXPORT_TEST_API(kbase_alloc_free_region); -+ -+/** -+ * kbase_free_alloced_region - Free a region object. -+ * -+ * @reg: Region -+ * -+ * The described region must be freed of any mapping. -+ * -+ * If the region is not flagged as KBASE_REG_FREE, the region's -+ * alloc object will be released. -+ * It is a bug if no alloc object exists for non-free regions. -+ * -+ * If region is KBASE_REG_ZONE_MCU_SHARED it is freed -+ */ -+void kbase_free_alloced_region(struct kbase_va_region *reg) ++bool kbase_js_complete_atom_wq(struct kbase_context *kctx, ++ struct kbase_jd_atom *katom) +{ -+#if MALI_USE_CSF -+ if ((reg->flags & KBASE_REG_ZONE_MASK) == -+ KBASE_REG_ZONE_MCU_SHARED) { -+ kfree(reg); -+ return; -+ } -+#endif -+ if (!(reg->flags & KBASE_REG_FREE)) { -+ struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); ++ struct kbasep_js_kctx_info *js_kctx_info; ++ struct kbasep_js_device_data *js_devdata; ++ struct kbase_device *kbdev; ++ unsigned long flags; ++ bool timer_sync = false; ++ int atom_slot; ++ bool context_idle = false; ++ int prio = katom->sched_priority; + -+ if (WARN_ON(!kctx)) -+ return; ++ kbdev = kctx->kbdev; ++ atom_slot = katom->slot_nr; + -+ if (WARN_ON(kbase_is_region_invalid(reg))) -+ return; ++ dev_dbg(kbdev->dev, "%s for atom %pK (s:%d)\n", ++ __func__, (void *)katom, atom_slot); + -+ dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n", -+ (void *)reg); -+#if MALI_USE_CSF -+ if (reg->flags & KBASE_REG_CSF_EVENT) -+ /* -+ * This should not be reachable if called from 'mcu_shared' functions -+ * such as: -+ * kbase_csf_firmware_mcu_shared_mapping_init -+ * kbase_csf_firmware_mcu_shared_mapping_term -+ */ ++ /* Update the incremental rendering state machine. ++ */ ++ if (katom->core_req & BASE_JD_REQ_START_RENDERPASS) ++ timer_sync |= js_complete_start_rp(kctx, katom); ++ else if (katom->core_req & BASE_JD_REQ_END_RENDERPASS) ++ js_complete_end_rp(kctx, katom); + -+ kbase_unlink_event_mem_page(kctx, reg); -+#endif ++ js_kctx_info = &kctx->jctx.sched_info; ++ js_devdata = &kbdev->js_data; + -+ mutex_lock(&kctx->jit_evict_lock); ++ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + -+ /* -+ * The physical allocation should have been removed from the -+ * eviction list before this function is called. However, in the -+ * case of abnormal process termination or the app leaking the -+ * memory kbase_mem_free_region is not called so it can still be -+ * on the list at termination time of the region tracker. -+ */ -+ if (!list_empty(®->gpu_alloc->evict_node)) { -+ /* -+ * Unlink the physical allocation before unmaking it -+ * evictable so that the allocation isn't grown back to -+ * its last backed size as we're going to unmap it -+ * anyway. -+ */ -+ reg->cpu_alloc->reg = NULL; -+ if (reg->cpu_alloc != reg->gpu_alloc) -+ reg->gpu_alloc->reg = NULL; ++ mutex_lock(&js_devdata->runpool_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ mutex_unlock(&kctx->jit_evict_lock); ++ if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { ++ bool slot_became_unblocked; + -+ /* -+ * If a region has been made evictable then we must -+ * unmake it before trying to free it. -+ * If the memory hasn't been reclaimed it will be -+ * unmapped and freed below, if it has been reclaimed -+ * then the operations below are no-ops. -+ */ -+ if (reg->flags & KBASE_REG_DONT_NEED) { -+ KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == -+ KBASE_MEM_TYPE_NATIVE); -+ kbase_mem_evictable_unmake(reg->gpu_alloc); -+ } -+ } else { -+ mutex_unlock(&kctx->jit_evict_lock); ++ dev_dbg(kbdev->dev, "Atom %pK is in runnable_tree\n", ++ (void *)katom); ++ ++ slot_became_unblocked = ++ kbase_jsctx_slot_atom_pulled_dec(kctx, katom); ++ context_idle = !kbase_jsctx_atoms_pulled(kctx); ++ ++ if (!kbase_jsctx_atoms_pulled(kctx) && !kctx->slots_pullable) { ++ WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); ++ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); ++ atomic_dec(&kbdev->js_data.nr_contexts_runnable); ++ timer_sync = true; + } + -+ /* -+ * Remove the region from the sticky resource metadata -+ * list should it be there. ++ /* If this slot has been blocked due to soft-stopped atoms, and ++ * all atoms have now been processed at this priority level and ++ * higher, then unblock the slot + */ -+ kbase_sticky_resource_release_force(kctx, NULL, -+ reg->start_pfn << PAGE_SHIFT); -+ -+ kbase_mem_phy_alloc_put(reg->cpu_alloc); -+ kbase_mem_phy_alloc_put(reg->gpu_alloc); ++ if (slot_became_unblocked) { ++ dev_dbg(kbdev->dev, ++ "kctx %pK is no longer blocked from submitting on slot %d at priority %d or higher\n", ++ (void *)kctx, atom_slot, prio); + -+ reg->flags |= KBASE_REG_VA_FREED; -+ kbase_va_region_alloc_put(kctx, reg); -+ } else { -+ kfree(reg); ++ if (kbase_js_ctx_pullable(kctx, atom_slot, true)) ++ timer_sync |= ++ kbase_js_ctx_list_add_pullable_nolock( ++ kbdev, kctx, atom_slot); ++ } + } -+} ++ WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)); + -+KBASE_EXPORT_TEST_API(kbase_free_alloced_region); ++ if (!kbase_jsctx_slot_atoms_pulled(kctx, atom_slot) && ++ jsctx_rb_none_to_pull(kctx, atom_slot)) { ++ if (!list_empty( ++ &kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot])) ++ timer_sync |= kbase_js_ctx_list_remove_nolock( ++ kctx->kbdev, kctx, atom_slot); ++ } + -+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, -+ u64 addr, size_t nr_pages, size_t align, -+ enum kbase_caller_mmu_sync_info mmu_sync_info) -+{ -+ int err; -+ size_t i = 0; -+ unsigned long attr; -+ unsigned long mask = ~KBASE_REG_MEMATTR_MASK; -+ unsigned long gwt_mask = ~0; -+ int group_id; -+ struct kbase_mem_phy_alloc *alloc; ++ /* ++ * If submission is disabled on this context (most likely due to an ++ * atom failure) and there are now no atoms left in the system then ++ * re-enable submission so that context can be scheduled again. ++ */ ++ if (!kbasep_js_is_submit_allowed(js_devdata, kctx) && ++ !kbase_jsctx_atoms_pulled(kctx) && ++ !kbase_ctx_flag(kctx, KCTX_DYING)) { ++ unsigned int js; + -+#ifdef CONFIG_MALI_CINSTR_GWT -+ if (kctx->gwt_enabled) -+ gwt_mask = ~KBASE_REG_GPU_WR; -+#endif ++ kbasep_js_set_submit_allowed(js_devdata, kctx); + -+ if ((kctx->kbdev->system_coherency == COHERENCY_ACE) && -+ (reg->flags & KBASE_REG_SHARE_BOTH)) -+ attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA); -+ else -+ attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC); ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ if (kbase_js_ctx_pullable(kctx, js, true)) ++ timer_sync |= ++ kbase_js_ctx_list_add_pullable_nolock( ++ kbdev, kctx, js); ++ } ++ } else if (katom->x_post_dep && ++ kbasep_js_is_submit_allowed(js_devdata, kctx)) { ++ unsigned int js; + -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(reg != NULL); ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ if (kbase_js_ctx_pullable(kctx, js, true)) ++ timer_sync |= ++ kbase_js_ctx_list_add_pullable_nolock( ++ kbdev, kctx, js); ++ } ++ } + -+ err = kbase_add_va_region(kctx, reg, addr, nr_pages, align); -+ if (err) -+ return err; ++ /* Mark context as inactive. The pm reference will be dropped later in ++ * jd_done_worker(). ++ */ ++ if (context_idle) { ++ dev_dbg(kbdev->dev, "kctx %pK is no longer active\n", ++ (void *)kctx); ++ kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); ++ } + -+ alloc = reg->gpu_alloc; -+ group_id = alloc->group_id; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (timer_sync) ++ kbase_backend_ctx_count_changed(kbdev); ++ mutex_unlock(&js_devdata->runpool_mutex); + -+ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { -+ u64 const stride = alloc->imported.alias.stride; ++ dev_dbg(kbdev->dev, "Leaving %s\n", __func__); ++ return context_idle; ++} + -+ KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased); -+ for (i = 0; i < alloc->imported.alias.nents; i++) { -+ if (alloc->imported.alias.aliased[i].alloc) { -+ err = kbase_mmu_insert_aliased_pages( -+ kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride), -+ alloc->imported.alias.aliased[i].alloc->pages + -+ alloc->imported.alias.aliased[i].offset, -+ alloc->imported.alias.aliased[i].length, -+ reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, -+ NULL); -+ if (err) -+ goto bad_aliased_insert; ++/** ++ * js_end_rp_is_complete() - Check whether an atom that ends a renderpass has ++ * completed for the last time. ++ * ++ * @end_katom: Pointer to the atom that completed on the hardware. ++ * ++ * An atom that ends a renderpass may be run on the hardware several times ++ * before notifying userspace or allowing dependent atoms to be executed. ++ * ++ * This function is used to decide whether or not to allow end-of-renderpass ++ * atom completion. It only returns false if the atom at the start of the ++ * renderpass was soft-stopped because it used too much memory during the most ++ * recent attempt at tiling. ++ * ++ * Return: True if the atom completed for the last time. ++ */ ++static bool js_end_rp_is_complete(struct kbase_jd_atom *const end_katom) ++{ ++ struct kbase_context *const kctx = end_katom->kctx; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ struct kbase_jd_renderpass *rp; + -+ /* Note: mapping count is tracked at alias -+ * creation time -+ */ -+ } else { -+ err = kbase_mmu_insert_single_aliased_page( -+ kctx, reg->start_pfn + i * stride, kctx->aliasing_sink_page, -+ alloc->imported.alias.aliased[i].length, -+ (reg->flags & mask & gwt_mask) | attr, group_id, -+ mmu_sync_info); ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ if (err) -+ goto bad_aliased_insert; -+ } -+ } -+ } else { -+ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM || -+ reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { ++ if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) ++ return true; + -+ err = kbase_mmu_insert_imported_pages( -+ kctx->kbdev, &kctx->mmu, reg->start_pfn, -+ kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), -+ reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, reg); -+ } else { -+ err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, -+ kbase_get_gpu_phy_pages(reg), -+ kbase_reg_current_backed_size(reg), -+ reg->flags & gwt_mask, kctx->as_nr, group_id, -+ mmu_sync_info, reg, true); -+ } ++ compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= ++ ARRAY_SIZE(kctx->jctx.renderpasses), ++ "Should check invalid access to renderpasses"); + -+ if (err) -+ goto bad_insert; -+ kbase_mem_phy_alloc_gpu_mapped(alloc); -+ } ++ rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + -+ if (reg->flags & KBASE_REG_IMPORT_PAD && -+ !WARN_ON(reg->nr_pages < reg->gpu_alloc->nents) && -+ reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM && -+ reg->gpu_alloc->imported.umm.current_mapping_usage_count) { -+ /* For padded imported dma-buf or user-buf memory, map the dummy -+ * aliasing page from the end of the imported pages, to the end of -+ * the region using a read only mapping. -+ * -+ * Only map when it's imported dma-buf memory that is currently -+ * mapped. -+ * -+ * Assume reg->gpu_alloc->nents is the number of actual pages -+ * in the dma-buf memory. -+ */ -+ err = kbase_mmu_insert_single_imported_page( -+ kctx, reg->start_pfn + reg->gpu_alloc->nents, kctx->aliasing_sink_page, -+ reg->nr_pages - reg->gpu_alloc->nents, -+ (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK, -+ mmu_sync_info); -+ if (err) -+ goto bad_insert; -+ } ++ if (WARN_ON(rp->end_katom != end_katom)) ++ return true; + -+ return err; ++ dev_dbg(kbdev->dev, ++ "JS complete end atom %pK in state %d of RP %d\n", ++ (void *)end_katom, (int)rp->state, ++ end_katom->renderpass_id); + -+bad_aliased_insert: -+ while (i-- > 0) { -+ struct tagged_addr *phys_alloc = NULL; -+ u64 const stride = alloc->imported.alias.stride; ++ if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) ++ return true; + -+ if (alloc->imported.alias.aliased[i].alloc != NULL) -+ phys_alloc = alloc->imported.alias.aliased[i].alloc->pages + -+ alloc->imported.alias.aliased[i].offset; ++ /* Failure of end-of-renderpass atoms must not return to the ++ * start of the renderpass. ++ */ ++ if (end_katom->event_code != BASE_JD_EVENT_DONE) ++ return true; + -+ kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride), -+ phys_alloc, alloc->imported.alias.aliased[i].length, -+ alloc->imported.alias.aliased[i].length, kctx->as_nr, -+ false); -+ } -+bad_insert: -+ kbase_remove_va_region(kctx->kbdev, reg); ++ if (rp->state != KBASE_JD_RP_OOM && ++ rp->state != KBASE_JD_RP_RETRY_OOM) ++ return true; + -+ return err; ++ dev_dbg(kbdev->dev, "Suppressing end atom completion\n"); ++ return false; +} + -+KBASE_EXPORT_TEST_API(kbase_gpu_mmap); -+ -+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, -+ struct kbase_va_region *reg, bool writeable); -+ -+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) ++struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, ++ ktime_t *end_timestamp) +{ -+ int err = 0; -+ struct kbase_mem_phy_alloc *alloc; -+ -+ if (reg->start_pfn == 0) -+ return 0; ++ struct kbase_device *kbdev; ++ struct kbase_context *kctx = katom->kctx; ++ struct kbase_jd_atom *x_dep = katom->x_post_dep; + -+ if (!reg->gpu_alloc) -+ return -EINVAL; ++ kbdev = kctx->kbdev; ++ dev_dbg(kbdev->dev, "Atom %pK complete in kctx %pK (post-dep %pK)\n", ++ (void *)katom, (void *)kctx, (void *)x_dep); + -+ alloc = reg->gpu_alloc; ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ /* Tear down GPU page tables, depending on memory type. */ -+ switch (alloc->type) { -+ case KBASE_MEM_TYPE_ALIAS: { -+ size_t i = 0; -+ /* Due to the way the number of valid PTEs and ATEs are tracked -+ * currently, only the GPU virtual range that is backed & mapped -+ * should be passed to the kbase_mmu_teardown_pages() function, -+ * hence individual aliased regions needs to be unmapped -+ * separately. -+ */ -+ for (i = 0; i < alloc->imported.alias.nents; i++) { -+ struct tagged_addr *phys_alloc = NULL; -+ int err_loop; ++ if ((katom->core_req & BASE_JD_REQ_END_RENDERPASS) && ++ !js_end_rp_is_complete(katom)) { ++ katom->event_code = BASE_JD_EVENT_END_RP_DONE; ++ kbase_js_unpull(kctx, katom); ++ return NULL; ++ } + -+ if (alloc->imported.alias.aliased[i].alloc != NULL) -+ phys_alloc = alloc->imported.alias.aliased[i].alloc->pages + -+ alloc->imported.alias.aliased[i].offset; ++ if (katom->will_fail_event_code) ++ katom->event_code = katom->will_fail_event_code; + -+ err_loop = kbase_mmu_teardown_pages( -+ kctx->kbdev, &kctx->mmu, -+ reg->start_pfn + (i * alloc->imported.alias.stride), -+ phys_alloc, alloc->imported.alias.aliased[i].length, -+ alloc->imported.alias.aliased[i].length, kctx->as_nr, -+ false); ++ katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED; ++ dev_dbg(kbdev->dev, "Atom %pK status to HW completed\n", (void *)katom); ++ if (kbase_is_quick_reset_enabled(kbdev)) { ++ kbdev->num_of_atoms_hw_completed++; ++ if (kbdev->num_of_atoms_hw_completed >= 20) ++ kbase_disable_quick_reset(kbdev); ++ } + -+ if (WARN_ON_ONCE(err_loop)) -+ err = err_loop; -+ } -+ } -+ break; -+ case KBASE_MEM_TYPE_IMPORTED_UMM: { -+ size_t nr_phys_pages = reg->nr_pages; -+ size_t nr_virt_pages = reg->nr_pages; -+ /* If the region has import padding and falls under the threshold for -+ * issuing a partial GPU cache flush, we want to reduce the number of -+ * physical pages that get flushed. ++ if (katom->event_code != BASE_JD_EVENT_DONE) { ++ kbase_js_evict_deps(kctx, katom, katom->slot_nr, ++ katom->sched_priority); ++ } + -+ * This is symmetric with case of mapping the memory, which first maps -+ * each imported physical page to a separate virtual page, and then -+ * maps the single aliasing sink page to each of the virtual padding -+ * pages. -+ */ -+ if (reg->flags & KBASE_REG_IMPORT_PAD) -+ nr_phys_pages = alloc->nents + 1; ++ KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, NULL, ++ katom->slot_nr, 0, TL_JS_EVENT_STOP); + -+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, -+ alloc->pages, nr_phys_pages, nr_virt_pages, -+ kctx->as_nr, true); -+ } -+ break; -+ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { -+ size_t nr_reg_pages = kbase_reg_current_backed_size(reg); ++ trace_sysgraph_gpu(SGR_COMPLETE, kctx->id, ++ kbase_jd_atom_id(katom->kctx, katom), katom->slot_nr); + -+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, -+ alloc->pages, nr_reg_pages, nr_reg_pages, -+ kctx->as_nr, true); -+ } -+ break; -+ default: { -+ size_t nr_reg_pages = kbase_reg_current_backed_size(reg); ++ KBASE_TLSTREAM_TL_JD_DONE_START(kbdev, katom); ++ kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0); ++ KBASE_TLSTREAM_TL_JD_DONE_END(kbdev, katom); + -+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, -+ alloc->pages, nr_reg_pages, nr_reg_pages, -+ kctx->as_nr, false); -+ } -+ break; -+ } ++ /* Unblock cross dependency if present */ ++ if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE || ++ !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) && ++ (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)) { ++ bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr, ++ false); ++ x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; ++ dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %pK\n", ++ (void *)x_dep); + -+ /* Update tracking, and other cleanup, depending on memory type. */ -+ switch (alloc->type) { -+ case KBASE_MEM_TYPE_ALIAS: -+ /* We mark the source allocs as unmapped from the GPU when -+ * putting reg's allocs -+ */ -+ break; -+ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { -+ struct kbase_alloc_import_user_buf *user_buf = &alloc->imported.user_buf; ++ kbase_js_move_to_tree(x_dep); + -+ if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) { -+ user_buf->current_mapping_usage_count &= ~PINNED_ON_IMPORT; ++ if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr, ++ false)) ++ kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, ++ x_dep->slot_nr); + -+ /* The allocation could still have active mappings. */ -+ if (user_buf->current_mapping_usage_count == 0) { -+ kbase_jd_user_buf_unmap(kctx, alloc, reg, -+ (reg->flags & -+ (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR))); -+ } ++ if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { ++ dev_dbg(kbdev->dev, "Atom %pK is in runnable tree\n", ++ (void *)x_dep); ++ return x_dep; + } -+ } -+ fallthrough; -+ default: -+ kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc); -+ break; ++ } else { ++ dev_dbg(kbdev->dev, ++ "No cross-slot dep to unblock for atom %pK\n", ++ (void *)katom); + } + -+ return err; ++ return NULL; +} + -+static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping( -+ struct kbase_context *kctx, -+ unsigned long uaddr, size_t size, u64 *offset) ++/** ++ * kbase_js_atom_blocked_on_x_dep - Decide whether to ignore a cross-slot ++ * dependency ++ * @katom: Pointer to an atom in the slot ringbuffer ++ * ++ * A cross-slot dependency is ignored if necessary to unblock incremental ++ * rendering. If the atom at the start of a renderpass used too much memory ++ * and was soft-stopped then the atom at the end of a renderpass is submitted ++ * to hardware regardless of its dependency on the start-of-renderpass atom. ++ * This can happen multiple times for the same pair of atoms. ++ * ++ * Return: true to block the atom or false to allow it to be submitted to ++ * hardware ++ */ ++bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) +{ -+ struct vm_area_struct *vma; -+ struct kbase_cpu_mapping *map; -+ unsigned long vm_pgoff_in_region; -+ unsigned long vm_off_in_region; -+ unsigned long map_start; -+ size_t map_size; -+ -+ lockdep_assert_held(kbase_mem_get_process_mmap_lock()); -+ -+ if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */ -+ return NULL; -+ -+ vma = find_vma_intersection(current->mm, uaddr, uaddr+size); -+ -+ if (!vma || vma->vm_start > uaddr) -+ return NULL; -+ if (vma->vm_ops != &kbase_vm_ops) -+ /* Not ours! */ -+ return NULL; -+ -+ map = vma->vm_private_data; ++ struct kbase_context *const kctx = katom->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_jd_renderpass *rp; + -+ if (map->kctx != kctx) -+ /* Not from this context! */ -+ return NULL; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ vm_pgoff_in_region = vma->vm_pgoff - map->region->start_pfn; -+ vm_off_in_region = vm_pgoff_in_region << PAGE_SHIFT; -+ map_start = vma->vm_start - vm_off_in_region; -+ map_size = map->region->nr_pages << PAGE_SHIFT; ++ if (!(katom->atom_flags & ++ KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { ++ dev_dbg(kbdev->dev, "Atom %pK is not blocked on a cross-slot dependency", ++ (void *)katom); ++ return false; ++ } + -+ if ((uaddr + size) > (map_start + map_size)) -+ /* Not within the CPU mapping */ -+ return NULL; ++ if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) { ++ dev_dbg(kbdev->dev, "Atom %pK is blocked on a cross-slot dependency", ++ (void *)katom); ++ return true; ++ } + -+ *offset = (uaddr - vma->vm_start) + vm_off_in_region; ++ compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= ++ ARRAY_SIZE(kctx->jctx.renderpasses), ++ "Should check invalid access to renderpasses"); + -+ return map; -+} ++ rp = &kctx->jctx.renderpasses[katom->renderpass_id]; ++ /* We can read a subset of renderpass state without holding ++ * higher-level locks (but not end_katom, for example). ++ */ + -+int kbasep_find_enclosing_cpu_mapping_offset( -+ struct kbase_context *kctx, -+ unsigned long uaddr, size_t size, u64 *offset) -+{ -+ struct kbase_cpu_mapping *map; ++ WARN_ON(rp->state == KBASE_JD_RP_COMPLETE); + -+ kbase_os_mem_map_lock(kctx); ++ dev_dbg(kbdev->dev, "End atom has cross-slot dep in state %d\n", ++ (int)rp->state); + -+ map = kbasep_find_enclosing_cpu_mapping(kctx, uaddr, size, offset); ++ if (rp->state != KBASE_JD_RP_OOM && rp->state != KBASE_JD_RP_RETRY_OOM) ++ return true; + -+ kbase_os_mem_map_unlock(kctx); ++ /* Tiler ran out of memory so allow the fragment job chain to run ++ * if it only depends on the tiler job chain. ++ */ ++ if (katom->x_pre_dep != rp->start_katom) { ++ dev_dbg(kbdev->dev, "Dependency is on %pK not start atom %pK\n", ++ (void *)katom->x_pre_dep, (void *)rp->start_katom); ++ return true; ++ } + -+ if (!map) -+ return -EINVAL; ++ dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %pK\n", ++ (void *)katom->x_pre_dep); + -+ return 0; ++ return false; +} + -+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset); -+ -+int kbasep_find_enclosing_gpu_mapping_start_and_offset(struct kbase_context *kctx, -+ u64 gpu_addr, size_t size, u64 *start, u64 *offset) ++void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask) +{ -+ struct kbase_va_region *region; -+ -+ kbase_gpu_vm_lock(kctx); ++ struct kbasep_js_device_data *js_devdata; ++ struct kbase_context *last_active[BASE_JM_MAX_NR_SLOTS]; ++ bool timer_sync = false; ++ bool ctx_waiting[BASE_JM_MAX_NR_SLOTS]; ++ unsigned int js; + -+ region = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); ++ KBASE_TLSTREAM_TL_JS_SCHED_START(kbdev, 0); + -+ if (!region) { -+ kbase_gpu_vm_unlock(kctx); -+ return -EINVAL; -+ } ++ dev_dbg(kbdev->dev, "%s kbdev %pK mask 0x%x\n", ++ __func__, (void *)kbdev, (unsigned int)js_mask); + -+ *start = region->start_pfn << PAGE_SHIFT; ++ js_devdata = &kbdev->js_data; + -+ *offset = gpu_addr - *start; ++ down(&js_devdata->schedule_sem); ++ mutex_lock(&js_devdata->queue_mutex); + -+ if (((region->start_pfn + region->nr_pages) << PAGE_SHIFT) < (gpu_addr + size)) { -+ kbase_gpu_vm_unlock(kctx); -+ return -EINVAL; ++ for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { ++ last_active[js] = kbdev->hwaccess.active_kctx[js]; ++ ctx_waiting[js] = false; + } + -+ kbase_gpu_vm_unlock(kctx); ++ while (js_mask) { ++ js = ffs(js_mask) - 1; + -+ return 0; -+} ++ while (1) { ++ struct kbase_context *kctx; ++ unsigned long flags; ++ bool context_idle = false; + -+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_gpu_mapping_start_and_offset); ++ kctx = kbase_js_ctx_list_pop_head(kbdev, js); + -+void kbase_sync_single(struct kbase_context *kctx, -+ struct tagged_addr t_cpu_pa, struct tagged_addr t_gpu_pa, -+ off_t offset, size_t size, enum kbase_sync_type sync_fn) -+{ -+ struct page *cpu_page; -+ phys_addr_t cpu_pa = as_phys_addr_t(t_cpu_pa); -+ phys_addr_t gpu_pa = as_phys_addr_t(t_gpu_pa); ++ if (!kctx) { ++ js_mask &= ~(1 << js); ++ dev_dbg(kbdev->dev, "No kctx on pullable list (s:%u)\n", js); ++ break; ++ } + -+ cpu_page = pfn_to_page(PFN_DOWN(cpu_pa)); ++ if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) { ++ context_idle = true; + -+ if (likely(cpu_pa == gpu_pa)) { -+ dma_addr_t dma_addr; ++ dev_dbg(kbdev->dev, "kctx %pK is not active (s:%u)\n", (void *)kctx, ++ js); + -+ BUG_ON(!cpu_page); -+ BUG_ON(offset + size > PAGE_SIZE); ++ if (kbase_pm_context_active_handle_suspend( ++ kbdev, ++ KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { ++ dev_dbg(kbdev->dev, "Suspend pending (s:%u)\n", js); ++ /* Suspend pending - return context to ++ * queue and stop scheduling ++ */ ++ mutex_lock( ++ &kctx->jctx.sched_info.ctx.jsctx_mutex); ++ if (kbase_js_ctx_list_add_pullable_head( ++ kctx->kbdev, kctx, js)) ++ kbase_js_sync_timers(kbdev); ++ mutex_unlock( ++ &kctx->jctx.sched_info.ctx.jsctx_mutex); ++ mutex_unlock(&js_devdata->queue_mutex); ++ up(&js_devdata->schedule_sem); ++ KBASE_TLSTREAM_TL_JS_SCHED_END(kbdev, ++ 0); ++ return; ++ } ++ kbase_ctx_flag_set(kctx, KCTX_ACTIVE); ++ } + -+ dma_addr = kbase_dma_addr_from_tagged(t_cpu_pa) + offset; ++ if (!kbase_js_use_ctx(kbdev, kctx, js)) { ++ mutex_lock( ++ &kctx->jctx.sched_info.ctx.jsctx_mutex); + -+ if (sync_fn == KBASE_SYNC_TO_CPU) -+ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, -+ size, DMA_BIDIRECTIONAL); -+ else if (sync_fn == KBASE_SYNC_TO_DEVICE) -+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, -+ size, DMA_BIDIRECTIONAL); -+ } else { -+ void *src = NULL; -+ void *dst = NULL; -+ struct page *gpu_page; -+ dma_addr_t dma_addr; ++ dev_dbg(kbdev->dev, ++ "kctx %pK cannot be used at this time\n", ++ kctx); + -+ if (WARN(!gpu_pa, "No GPU PA found for infinite cache op")) -+ return; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (kbase_js_ctx_pullable(kctx, js, false) ++ || kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) ++ timer_sync |= ++ kbase_js_ctx_list_add_pullable_head_nolock( ++ kctx->kbdev, kctx, js); ++ else ++ timer_sync |= ++ kbase_js_ctx_list_add_unpullable_nolock( ++ kctx->kbdev, kctx, js); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, ++ flags); ++ mutex_unlock( ++ &kctx->jctx.sched_info.ctx.jsctx_mutex); ++ if (context_idle) { ++ WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); ++ kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); ++ kbase_pm_context_idle(kbdev); ++ } + -+ gpu_page = pfn_to_page(PFN_DOWN(gpu_pa)); -+ dma_addr = kbase_dma_addr_from_tagged(t_gpu_pa) + offset; ++ /* No more jobs can be submitted on this slot */ ++ js_mask &= ~(1 << js); ++ break; ++ } ++ mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ if (sync_fn == KBASE_SYNC_TO_DEVICE) { -+ src = ((unsigned char *)kmap(cpu_page)) + offset; -+ dst = ((unsigned char *)kmap(gpu_page)) + offset; -+ } else if (sync_fn == KBASE_SYNC_TO_CPU) { -+ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, size, -+ DMA_BIDIRECTIONAL); -+ src = ((unsigned char *)kmap(gpu_page)) + offset; -+ dst = ((unsigned char *)kmap(cpu_page)) + offset; -+ } ++ kbase_ctx_flag_clear(kctx, KCTX_PULLED); + -+ memcpy(dst, src, size); -+ kunmap(gpu_page); -+ kunmap(cpu_page); -+ if (sync_fn == KBASE_SYNC_TO_DEVICE) -+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, size, -+ DMA_BIDIRECTIONAL); -+ } -+} ++ if (!kbase_jm_kick(kbdev, 1 << js)) { ++ dev_dbg(kbdev->dev, "No more jobs can be submitted (s:%u)\n", js); ++ js_mask &= ~(1 << js); ++ } ++ if (!kbase_ctx_flag(kctx, KCTX_PULLED)) { ++ bool pullable; + -+static int kbase_do_syncset(struct kbase_context *kctx, -+ struct basep_syncset *sset, enum kbase_sync_type sync_fn) -+{ -+ int err = 0; -+ struct kbase_va_region *reg; -+ struct kbase_cpu_mapping *map; -+ unsigned long start; -+ size_t size; -+ struct tagged_addr *cpu_pa; -+ struct tagged_addr *gpu_pa; -+ u64 page_off, page_count; -+ u64 i; -+ u64 offset; ++ dev_dbg(kbdev->dev, "No atoms pulled from kctx %pK (s:%u)\n", ++ (void *)kctx, js); + -+ kbase_os_mem_map_lock(kctx); -+ kbase_gpu_vm_lock(kctx); ++ pullable = kbase_js_ctx_pullable(kctx, js, ++ true); + -+ /* find the region where the virtual address is contained */ -+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, -+ sset->mem_handle.basep.handle); -+ if (kbase_is_region_invalid_or_free(reg)) { -+ dev_warn(kctx->kbdev->dev, "Can't find a valid region at VA 0x%016llX", -+ sset->mem_handle.basep.handle); -+ err = -EINVAL; -+ goto out_unlock; -+ } ++ /* Failed to pull jobs - push to head of list. ++ * Unless this context is already 'active', in ++ * which case it's effectively already scheduled ++ * so push it to the back of the list. ++ */ ++ if (pullable && kctx == last_active[js] && ++ kbase_ctx_flag(kctx, ++ (KCTX_PULLED_SINCE_ACTIVE_JS0 << ++ js))) ++ timer_sync |= ++ kbase_js_ctx_list_add_pullable_nolock( ++ kctx->kbdev, ++ kctx, js); ++ else if (pullable) ++ timer_sync |= ++ kbase_js_ctx_list_add_pullable_head_nolock( ++ kctx->kbdev, ++ kctx, js); ++ else ++ timer_sync |= ++ kbase_js_ctx_list_add_unpullable_nolock( ++ kctx->kbdev, ++ kctx, js); + -+ /* -+ * Handle imported memory before checking for KBASE_REG_CPU_CACHED. The -+ * CPU mapping cacheability is defined by the owner of the imported -+ * memory, and not by kbase, therefore we must assume that any imported -+ * memory may be cached. -+ */ -+ if (kbase_mem_is_imported(reg->gpu_alloc->type)) { -+ err = kbase_mem_do_sync_imported(kctx, reg, sync_fn); -+ goto out_unlock; -+ } ++ /* If this context is not the active context, ++ * but the active context is pullable on this ++ * slot, then we need to remove the active ++ * marker to prevent it from submitting atoms in ++ * the IRQ handler, which would prevent this ++ * context from making progress. ++ */ ++ if (last_active[js] && kctx != last_active[js] ++ && kbase_js_ctx_pullable( ++ last_active[js], js, true)) ++ ctx_waiting[js] = true; + -+ if (!(reg->flags & KBASE_REG_CPU_CACHED)) -+ goto out_unlock; ++ if (context_idle) { ++ kbase_jm_idle_ctx(kbdev, kctx); ++ spin_unlock_irqrestore( ++ &kbdev->hwaccess_lock, ++ flags); ++ WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); ++ kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); ++ kbase_pm_context_idle(kbdev); ++ } else { ++ spin_unlock_irqrestore( ++ &kbdev->hwaccess_lock, ++ flags); ++ } ++ mutex_unlock( ++ &kctx->jctx.sched_info.ctx.jsctx_mutex); + -+ start = (uintptr_t)sset->user_addr; -+ size = (size_t)sset->size; ++ js_mask &= ~(1 << js); ++ break; /* Could not run atoms on this slot */ ++ } + -+ map = kbasep_find_enclosing_cpu_mapping(kctx, start, size, &offset); -+ if (!map) { -+ dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX", -+ start, sset->mem_handle.basep.handle); -+ err = -EINVAL; -+ goto out_unlock; ++ dev_dbg(kbdev->dev, "Push kctx %pK to back of list\n", ++ (void *)kctx); ++ if (kbase_js_ctx_pullable(kctx, js, true)) ++ timer_sync |= ++ kbase_js_ctx_list_add_pullable_nolock( ++ kctx->kbdev, kctx, js); ++ else ++ timer_sync |= ++ kbase_js_ctx_list_add_unpullable_nolock( ++ kctx->kbdev, kctx, js); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); ++ } + } + -+ page_off = offset >> PAGE_SHIFT; -+ offset &= ~PAGE_MASK; -+ page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT; -+ cpu_pa = kbase_get_cpu_phy_pages(reg); -+ gpu_pa = kbase_get_gpu_phy_pages(reg); ++ if (timer_sync) ++ kbase_js_sync_timers(kbdev); + -+ if (page_off > reg->nr_pages || -+ page_off + page_count > reg->nr_pages) { -+ /* Sync overflows the region */ -+ err = -EINVAL; -+ goto out_unlock; ++ for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { ++ if (kbdev->hwaccess.active_kctx[js] == last_active[js] && ++ ctx_waiting[js]) { ++ dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%u)\n", ++ (void *)last_active[js], js); ++ kbdev->hwaccess.active_kctx[js] = NULL; ++ } + } + -+ /* Sync first page */ -+ if (as_phys_addr_t(cpu_pa[page_off])) { -+ size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); ++ mutex_unlock(&js_devdata->queue_mutex); ++ up(&js_devdata->schedule_sem); ++ KBASE_TLSTREAM_TL_JS_SCHED_END(kbdev, 0); ++} + -+ kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off], -+ offset, sz, sync_fn); -+ } ++void kbase_js_zap_context(struct kbase_context *kctx) ++{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; + -+ /* Sync middle pages (if any) */ -+ for (i = 1; page_count > 2 && i < page_count - 1; i++) { -+ /* we grow upwards, so bail on first non-present page */ -+ if (!as_phys_addr_t(cpu_pa[page_off + i])) -+ break; ++ /* ++ * Critical assumption: No more submission is possible outside of the ++ * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs) ++ * whilst the struct kbase_context is terminating. ++ */ + -+ kbase_sync_single(kctx, cpu_pa[page_off + i], -+ gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn); -+ } ++ /* First, atomically do the following: ++ * - mark the context as dying ++ * - try to evict it from the queue ++ */ ++ mutex_lock(&kctx->jctx.lock); ++ mutex_lock(&js_devdata->queue_mutex); ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ kbase_ctx_flag_set(kctx, KCTX_DYING); + -+ /* Sync last page (if any) */ -+ if (page_count > 1 && -+ as_phys_addr_t(cpu_pa[page_off + page_count - 1])) { -+ size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1; ++ dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %pK", kctx); + -+ kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1], -+ gpu_pa[page_off + page_count - 1], 0, sz, -+ sync_fn); -+ } ++ /* ++ * At this point we know: ++ * - If eviction succeeded, it was in the queue, but now no ++ * longer is ++ * - We must cancel the jobs here. No Power Manager active reference to ++ * release. ++ * - This happens asynchronously - kbase_jd_zap_context() will wait for ++ * those jobs to be killed. ++ * - If eviction failed, then it wasn't in the queue. It is one ++ * of the following: ++ * - a. it didn't have any jobs, and so is not in the Queue or ++ * the Run Pool (not scheduled) ++ * - Hence, no more work required to cancel jobs. No Power Manager ++ * active reference to release. ++ * - b. it was in the middle of a scheduling transaction (and thus must ++ * have at least 1 job). This can happen from a syscall or a ++ * kernel thread. We still hold the jsctx_mutex, and so the thread ++ * must be waiting inside kbasep_js_try_schedule_head_ctx(), ++ * before checking whether the runpool is full. That thread will ++ * continue after we drop the mutex, and will notice the context ++ * is dying. It will rollback the transaction, killing all jobs at ++ * the same time. kbase_jd_zap_context() will wait for those jobs ++ * to be killed. ++ * - Hence, no more work required to cancel jobs, or to release the ++ * Power Manager active reference. ++ * - c. it is scheduled, and may or may not be running jobs ++ * - We must cause it to leave the runpool by stopping it from ++ * submitting any more jobs. When it finally does leave, ++ * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs ++ * (because it is dying), release the Power Manager active reference, ++ * and will not requeue the context in the queue. ++ * kbase_jd_zap_context() will wait for those jobs to be killed. ++ * - Hence, work required just to make it leave the runpool. Cancelling ++ * jobs and releasing the Power manager active reference will be ++ * handled when it leaves the runpool. ++ */ ++ if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { ++ unsigned long flags; ++ unsigned int js; + -+out_unlock: -+ kbase_gpu_vm_unlock(kctx); -+ kbase_os_mem_map_unlock(kctx); -+ return err; -+} ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ if (!list_empty( ++ &kctx->jctx.sched_info.ctx.ctx_list_entry[js])) ++ list_del_init( ++ &kctx->jctx.sched_info.ctx.ctx_list_entry[js]); ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset) -+{ -+ int err = -EINVAL; ++ /* The following events require us to kill off remaining jobs ++ * and update PM book-keeping: ++ * - we evicted it correctly (it must have jobs to be in the ++ * Queue) ++ * ++ * These events need no action, but take this path anyway: ++ * - Case a: it didn't have any jobs, and was never in the Queue ++ * - Case b: scheduling transaction will be partially rolled- ++ * back (this already cancels the jobs) ++ */ + -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(sset != NULL); ++ KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+ if (sset->mem_handle.basep.handle & ~PAGE_MASK) { -+ dev_warn(kctx->kbdev->dev, -+ "mem_handle: passed parameter is invalid"); -+ return -EINVAL; -+ } ++ dev_dbg(kbdev->dev, "Zap: Ctx %pK scheduled=0", kctx); + -+ switch (sset->type) { -+ case BASE_SYNCSET_OP_MSYNC: -+ err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_DEVICE); -+ break; ++ /* Only cancel jobs when we evicted from the ++ * queue. No Power Manager active reference was held. ++ * ++ * Having is_dying set ensures that this kills, and doesn't ++ * requeue ++ */ ++ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false); + -+ case BASE_SYNCSET_OP_CSYNC: -+ err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_CPU); -+ break; ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_unlock(&js_devdata->queue_mutex); ++ mutex_unlock(&kctx->jctx.lock); ++ } else { ++ unsigned long flags; ++ bool was_retained; ++ CSTD_UNUSED(was_retained); + -+ default: -+ dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type); -+ break; -+ } ++ /* Case c: didn't evict, but it is scheduled - it's in the Run ++ * Pool ++ */ ++ KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++ dev_dbg(kbdev->dev, "Zap: Ctx %pK is in RunPool", kctx); + -+ return err; -+} ++ /* Disable the ctx from submitting any more jobs */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+KBASE_EXPORT_TEST_API(kbase_sync_now); ++ kbasep_js_clear_submit_allowed(js_devdata, kctx); + -+/* vm lock must be held */ -+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg) -+{ -+ int err; ++ /* Retain and (later) release the context whilst it is now ++ * disallowed from submitting jobs - ensures that someone ++ * somewhere will be removing the context later on ++ */ ++ was_retained = kbase_ctx_sched_inc_refcount_nolock(kctx); + -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(reg != NULL); -+ dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n", -+ __func__, (void *)reg, (void *)kctx); -+ lockdep_assert_held(&kctx->reg_lock); ++ /* Since it's scheduled and we have the jsctx_mutex, it must be ++ * retained successfully ++ */ ++ KBASE_DEBUG_ASSERT(was_retained); + -+ if (kbase_va_region_is_no_user_free(reg)) { -+ dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n"); -+ return -EINVAL; -+ } ++ dev_dbg(kbdev->dev, "Zap: Ctx %pK Kill Any Running jobs", kctx); + -+ /* If a region has been made evictable then we must unmake it -+ * before trying to free it. -+ * If the memory hasn't been reclaimed it will be unmapped and freed -+ * below, if it has been reclaimed then the operations below are no-ops. -+ */ -+ if (reg->flags & KBASE_REG_DONT_NEED) { -+ WARN_ON(reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE); -+ mutex_lock(&kctx->jit_evict_lock); -+ /* Unlink the physical allocation before unmaking it evictable so -+ * that the allocation isn't grown back to its last backed size -+ * as we're going to unmap it anyway. ++ /* Cancel any remaining running jobs for this kctx - if any. ++ * Submit is disallowed which takes effect immediately, so no ++ * more new jobs will appear after we do this. + */ -+ reg->cpu_alloc->reg = NULL; -+ if (reg->cpu_alloc != reg->gpu_alloc) -+ reg->gpu_alloc->reg = NULL; -+ mutex_unlock(&kctx->jit_evict_lock); -+ kbase_mem_evictable_unmake(reg->gpu_alloc); -+ } ++ kbase_backend_jm_kill_running_jobs_from_kctx(kctx); + -+ err = kbase_gpu_munmap(kctx, reg); -+ if (err) { -+ dev_warn(kctx->kbdev->dev, "Could not unmap from the GPU...\n"); -+ goto out; -+ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_unlock(&js_devdata->queue_mutex); ++ mutex_unlock(&kctx->jctx.lock); + -+#if MALI_USE_CSF -+ if (((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_FIXED_VA) || -+ ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_EXEC_FIXED_VA)) { -+ if (reg->flags & KBASE_REG_FIXED_ADDRESS) -+ atomic64_dec(&kctx->num_fixed_allocs); -+ else -+ atomic64_dec(&kctx->num_fixable_allocs); ++ dev_dbg(kbdev->dev, "Zap: Ctx %pK Release (may or may not schedule out immediately)", ++ kctx); ++ ++ kbasep_js_runpool_release_ctx(kbdev, kctx); + } -+#endif + -+ /* This will also free the physical pages */ -+ kbase_free_alloced_region(reg); ++ KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u); + -+out: -+ return err; ++ /* After this, you must wait on both the ++ * kbase_jd_context::zero_jobs_wait and the ++ * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs ++ * to be destroyed, and the context to be de-scheduled (if it was on the ++ * runpool). ++ * ++ * kbase_jd_zap_context() will do this. ++ */ +} + -+KBASE_EXPORT_TEST_API(kbase_mem_free_region); ++static inline int trace_get_refcnt(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ ++ return atomic_read(&kctx->refcount); ++} + +/** -+ * kbase_mem_free - Free the region from the GPU and unregister it. ++ * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context ++ * @kctx: Pointer to context. ++ * @callback: Pointer to function to call for each job. + * -+ * @kctx: KBase context -+ * @gpu_addr: GPU address to free ++ * Call a function on all jobs belonging to a non-queued, non-running ++ * context, and detach the jobs from the context as it goes. + * -+ * This function implements the free operation on a memory segment. -+ * It will loudly fail if called with outstanding mappings. ++ * Due to the locks that might be held at the time of the call, the callback ++ * may need to defer work on a workqueue to complete its actions (e.g. when ++ * cancelling jobs) + * -+ * Return: 0 on success. ++ * Atoms will be removed from the queue, so this must only be called when ++ * cancelling jobs (which occurs as part of context destruction). ++ * ++ * The locking conditions on the caller are as follows: ++ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. + */ -+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) ++static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, ++ kbasep_js_ctx_job_cb *callback) +{ -+ int err = 0; -+ struct kbase_va_region *reg; -+ -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %pK\n", -+ __func__, gpu_addr, (void *)kctx); -+ -+ if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) { -+ dev_warn(kctx->kbdev->dev, "%s: gpu_addr parameter is invalid", __func__); -+ return -EINVAL; -+ } ++ struct kbase_device *kbdev; ++ unsigned long flags; ++ unsigned int js; + -+ if (gpu_addr == 0) { -+ dev_warn(kctx->kbdev->dev, -+ "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using %s\n", -+ __func__); -+ return -EINVAL; -+ } -+ kbase_gpu_vm_lock(kctx); ++ kbdev = kctx->kbdev; + -+ if (gpu_addr >= BASE_MEM_COOKIE_BASE && -+ gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) { -+ int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ reg = kctx->pending_regions[cookie]; -+ if (!reg) { -+ err = -EINVAL; -+ goto out_unlock; -+ } ++ KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL, ++ 0u, trace_get_refcnt(kbdev, kctx)); + -+ /* ask to unlink the cookie as we'll free it */ ++ /* Invoke callback on jobs on each slot in turn */ ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) ++ jsctx_queue_foreach(kctx, js, callback); + -+ kctx->pending_regions[cookie] = NULL; -+ bitmap_set(kctx->cookies, cookie, 1); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} + -+ kbase_free_alloced_region(reg); -+ } else { -+ /* A real GPU va */ -+ /* Validate the region */ -+ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); -+ if (kbase_is_region_invalid_or_free(reg)) { -+ dev_warn(kctx->kbdev->dev, "%s called with nonexistent gpu_addr 0x%llX", -+ __func__, gpu_addr); -+ err = -EINVAL; -+ goto out_unlock; -+ } ++base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio priority) ++{ ++ struct priority_control_manager_device *pcm_device = kbdev->pcm_dev; ++ int req_priority, out_priority; + -+ if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) { -+ /* SAME_VA must be freed through munmap */ -+ dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__, -+ gpu_addr); -+ err = -EINVAL; -+ goto out_unlock; -+ } -+ err = kbase_mem_free_region(kctx, reg); -+ } -+ -+out_unlock: -+ kbase_gpu_vm_unlock(kctx); -+ return err; ++ req_priority = kbasep_js_atom_prio_to_sched_prio(priority); ++ out_priority = req_priority; ++ /* Does not use pcm defined priority check if PCM not defined or if ++ * kbasep_js_atom_prio_to_sched_prio returns an error ++ * (KBASE_JS_ATOM_SCHED_PRIO_INVALID). ++ */ ++ if (pcm_device && (req_priority != KBASE_JS_ATOM_SCHED_PRIO_INVALID)) ++ out_priority = pcm_device->ops.pcm_scheduler_priority_check(pcm_device, current, ++ req_priority); ++ return kbasep_js_sched_prio_to_atom_prio(kbdev, out_priority); +} +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.h b/drivers/gpu/arm/bifrost/mali_kbase_js.h +new file mode 100644 +index 000000000..89c3b45c7 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_js.h +@@ -0,0 +1,36 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+KBASE_EXPORT_TEST_API(kbase_mem_free); -+ -+int kbase_update_region_flags(struct kbase_context *kctx, -+ struct kbase_va_region *reg, unsigned long flags) -+{ -+ KBASE_DEBUG_ASSERT(reg != NULL); -+ KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0); -+ -+ reg->flags |= kbase_cache_enabled(flags, reg->nr_pages); -+ /* all memory is now growable */ -+ reg->flags |= KBASE_REG_GROWABLE; ++/** ++ * DOC: Job Scheduler APIs. ++ */ + -+ if (flags & BASE_MEM_GROW_ON_GPF) -+ reg->flags |= KBASE_REG_PF_GROW; ++#ifndef _KBASE_JS_H_ ++#define _KBASE_JS_H_ + -+ if (flags & BASE_MEM_PROT_CPU_WR) -+ reg->flags |= KBASE_REG_CPU_WR; ++#include "context/mali_kbase_context.h" ++#include "mali_kbase_defs.h" ++#include "mali_kbase_debug.h" ++#include ++#include "jm/mali_kbase_jm_js.h" ++#include "jm/mali_kbase_js_defs.h" + -+ if (flags & BASE_MEM_PROT_CPU_RD) -+ reg->flags |= KBASE_REG_CPU_RD; ++#endif /* _KBASE_JS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.c +new file mode 100644 +index 000000000..04ea06b2f +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.c +@@ -0,0 +1,298 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2012-2016, 2018, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ if (flags & BASE_MEM_PROT_GPU_WR) -+ reg->flags |= KBASE_REG_GPU_WR; ++#include ++#include + -+ if (flags & BASE_MEM_PROT_GPU_RD) -+ reg->flags |= KBASE_REG_GPU_RD; ++/* ++ * Private functions follow ++ */ + -+ if (0 == (flags & BASE_MEM_PROT_GPU_EX)) -+ reg->flags |= KBASE_REG_GPU_NX; ++/** ++ * kbasep_js_ctx_attr_runpool_retain_attr - Check whether a ctx has a certain attribute ++ * and if so, retain that attribute on the runpool. ++ * ++ * @kbdev: Device pointer ++ * @kctx: KBase context ++ * @attribute: Atribute to check/retain ++ * ++ * Requires: ++ * - jsctx mutex ++ * - runpool_irq spinlock ++ * - ctx is scheduled on the runpool ++ * ++ * Return: true indicates a change in ctx attributes state of the runpool. ++ * In this state, the scheduler might be able to submit more jobs than ++ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() ++ * or similar is called sometime later. ++ * false indicates no change in ctx attributes state of the runpool. ++ */ ++static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) ++{ ++ struct kbasep_js_device_data *js_devdata; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ bool runpool_state_changed = false; + -+ if (!kbase_device_is_cpu_coherent(kctx->kbdev)) { -+ if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED && -+ !(flags & BASE_MEM_UNCACHED_GPU)) -+ return -EINVAL; -+ } else if (flags & (BASE_MEM_COHERENT_SYSTEM | -+ BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { -+ reg->flags |= KBASE_REG_SHARE_BOTH; -+ } ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); ++ js_devdata = &kbdev->js_data; ++ js_kctx_info = &kctx->jctx.sched_info; + -+ if (!(reg->flags & KBASE_REG_SHARE_BOTH) && -+ flags & BASE_MEM_COHERENT_LOCAL) { -+ reg->flags |= KBASE_REG_SHARE_IN; -+ } ++ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+#if !MALI_USE_CSF -+ if (flags & BASE_MEM_TILER_ALIGN_TOP) -+ reg->flags |= KBASE_REG_TILER_ALIGN_TOP; -+#endif /* !MALI_USE_CSF */ ++ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+#if MALI_USE_CSF -+ if (flags & BASE_MEM_CSF_EVENT) { -+ reg->flags |= KBASE_REG_CSF_EVENT; -+ reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING; ++ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { ++ KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX); ++ ++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]); + -+ if (!(reg->flags & KBASE_REG_SHARE_BOTH)) { -+ /* On non coherent platforms need to map as uncached on -+ * both sides. -+ */ -+ reg->flags &= ~KBASE_REG_CPU_CACHED; -+ reg->flags &= ~KBASE_REG_GPU_CACHED; ++ if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) { ++ /* First refcount indicates a state change */ ++ runpool_state_changed = true; ++ KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute); + } + } -+#endif + -+ /* Set up default MEMATTR usage */ -+ if (!(reg->flags & KBASE_REG_GPU_CACHED)) { -+ if (kctx->kbdev->mmu_mode->flags & -+ KBASE_MMU_MODE_HAS_NON_CACHEABLE) { -+ /* Override shareability, and MEMATTR for uncached */ -+ reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH); -+ reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); -+ } else { -+ dev_warn(kctx->kbdev->dev, -+ "Can't allocate GPU uncached memory due to MMU in Legacy Mode\n"); -+ return -EINVAL; -+ } -+#if MALI_USE_CSF -+ } else if (reg->flags & KBASE_REG_CSF_EVENT) { -+ WARN_ON(!(reg->flags & KBASE_REG_SHARE_BOTH)); ++ return runpool_state_changed; ++} + -+ reg->flags |= -+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); -+#endif -+ } else if (kctx->kbdev->system_coherency == COHERENCY_ACE && -+ (reg->flags & KBASE_REG_SHARE_BOTH)) { -+ reg->flags |= -+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); -+ } else { -+ reg->flags |= -+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT); -+ } ++/** ++ * kbasep_js_ctx_attr_runpool_release_attr - Check whether a ctx has a certain attribute, ++ * and if so, release that attribute on the runpool. ++ * ++ * @kbdev: Device pointer ++ * @kctx: KBase context ++ * @attribute: Atribute to release ++ * ++ * Requires: ++ * - jsctx mutex ++ * - runpool_irq spinlock ++ * - ctx is scheduled on the runpool ++ * ++ * Return: true indicates a change in ctx attributes state of the runpool. ++ * In this state, the scheduler might be able to submit more jobs than ++ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() ++ * or similar is called sometime later. ++ * false indicates no change in ctx attributes state of the runpool. ++ */ ++static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) ++{ ++ struct kbasep_js_device_data *js_devdata; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ bool runpool_state_changed = false; + -+ if (flags & BASEP_MEM_PERMANENT_KERNEL_MAPPING) -+ reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); ++ js_devdata = &kbdev->js_data; ++ js_kctx_info = &kctx->jctx.sched_info; + -+ if (flags & BASEP_MEM_NO_USER_FREE) { -+ kbase_gpu_vm_lock(kctx); -+ kbase_va_region_no_user_free_inc(reg); -+ kbase_gpu_vm_unlock(kctx); -+ } ++ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+ if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) -+ reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE; ++ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { ++ KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0); ++ --(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]); + -+#if MALI_USE_CSF -+ if (flags & BASE_MEM_FIXED) -+ reg->flags |= KBASE_REG_FIXED_ADDRESS; -+#endif ++ if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) { ++ /* Last de-refcount indicates a state change */ ++ runpool_state_changed = true; ++ KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute); ++ } ++ } + -+ return 0; ++ return runpool_state_changed; +} + -+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, -+ size_t nr_pages_requested) ++/** ++ * kbasep_js_ctx_attr_ctx_retain_attr - Retain a certain attribute on a ctx, ++ * also retaining it on the runpool if the context is scheduled. ++ * ++ * @kbdev: Device pointer ++ * @kctx: KBase context ++ * @attribute: Atribute to retain ++ * ++ * Requires: ++ * - jsctx mutex ++ * - If the context is scheduled, then runpool_irq spinlock must also be held ++ * ++ * Return: true indicates a change in ctx attributes state of the runpool. ++ * This may allow the scheduler to submit more jobs than previously. ++ * false indicates no change in ctx attributes state of the runpool. ++ */ ++static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ -+ int new_page_count __maybe_unused; -+ size_t nr_left = nr_pages_requested; -+ int res; -+ struct kbase_context *kctx; -+ struct kbase_device *kbdev; -+ struct tagged_addr *tp; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ bool runpool_state_changed = false; + -+ if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) || -+ WARN_ON(alloc->imported.native.kctx == NULL) || -+ WARN_ON(alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { -+ return -EINVAL; -+ } ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); ++ js_kctx_info = &kctx->jctx.sched_info; + -+ if (alloc->reg) { -+ if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents) -+ goto invalid_request; ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); ++ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX); ++ ++ ++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); ++ ++ if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { ++ /* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */ ++ KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute); ++ runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute); + } + -+ kctx = alloc->imported.native.kctx; -+ kbdev = kctx->kbdev; ++ return runpool_state_changed; ++} + -+ if (nr_pages_requested == 0) -+ goto done; /*nothing to do*/ ++/** ++ * kbasep_js_ctx_attr_ctx_release_attr - Release a certain attribute on a ctx, ++ * also releasing it from the runpool if the context is scheduled. ++ * ++ * @kbdev: Device pointer ++ * @kctx: KBase context ++ * @attribute: Atribute to release ++ * ++ * Requires: ++ * - jsctx mutex ++ * - If the context is scheduled, then runpool_irq spinlock must also be held ++ * ++ * Return: true indicates a change in ctx attributes state of the runpool. ++ * This may allow the scheduler to submit more jobs than previously. ++ * false indicates no change in ctx attributes state of the runpool. ++ */ ++static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) ++{ ++ struct kbasep_js_kctx_info *js_kctx_info; ++ bool runpool_state_changed = false; + -+ new_page_count = atomic_add_return( -+ nr_pages_requested, &kctx->used_pages); -+ atomic_add(nr_pages_requested, -+ &kctx->kbdev->memdev.used_pages); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); ++ js_kctx_info = &kctx->jctx.sched_info; + -+ /* Increase mm counters before we allocate pages so that this -+ * allocation is visible to the OOM killer -+ */ -+ kbase_process_page_usage_inc(kctx, nr_pages_requested); ++ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); ++ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0); + -+ tp = alloc->pages + alloc->nents; ++ if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ /* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */ ++ runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute); ++ KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute); ++ } + -+ /* Check if we have enough pages requested so we can allocate a large -+ * page (512 * 4KB = 2MB ) -+ */ -+ if (kbdev->pagesize_2mb && nr_left >= (SZ_2M / SZ_4K)) { -+ int nr_lp = nr_left / (SZ_2M / SZ_4K); ++ /* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */ ++ --(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); + -+ res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id], -+ nr_lp * (SZ_2M / SZ_4K), tp, true, kctx->task); ++ return runpool_state_changed; ++} + -+ if (res > 0) { -+ nr_left -= res; -+ tp += res; -+ } ++/* ++ * More commonly used public functions ++ */ + -+ if (nr_left) { -+ struct kbase_sub_alloc *sa, *temp_sa; ++void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) ++{ ++ bool runpool_state_changed; ++ int i; + -+ spin_lock(&kctx->mem_partials_lock); ++ /* Retain any existing attributes */ ++ for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { ++ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) { ++ /* The context is being scheduled in, so update the runpool with the new attributes */ ++ runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i); + -+ list_for_each_entry_safe(sa, temp_sa, -+ &kctx->mem_partials, link) { -+ int pidx = 0; ++ /* We don't need to know about state changed, because retaining a ++ * context occurs on scheduling it, and that itself will also try ++ * to run new atoms ++ */ ++ CSTD_UNUSED(runpool_state_changed); ++ } ++ } ++} + -+ while (nr_left) { -+ pidx = find_next_zero_bit(sa->sub_pages, -+ SZ_2M / SZ_4K, -+ pidx); -+ bitmap_set(sa->sub_pages, pidx, 1); -+ *tp++ = as_tagged_tag(page_to_phys(sa->page + -+ pidx), -+ FROM_PARTIAL); -+ nr_left--; ++bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) ++{ ++ bool runpool_state_changed = false; ++ int i; + -+ if (bitmap_full(sa->sub_pages, SZ_2M / SZ_4K)) { -+ /* unlink from partial list when full */ -+ list_del_init(&sa->link); -+ break; -+ } -+ } -+ } -+ spin_unlock(&kctx->mem_partials_lock); ++ /* Release any existing attributes */ ++ for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { ++ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) { ++ /* The context is being scheduled out, so update the runpool on the removed attributes */ ++ runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i); + } ++ } + -+ /* only if we actually have a chunk left <512. If more it indicates -+ * that we couldn't allocate a 2MB above, so no point to retry here. -+ */ -+ if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) { -+ /* create a new partial and suballocate the rest from it */ -+ struct page *np = NULL; ++ return runpool_state_changed; ++} + -+ do { -+ int err; ++void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom) ++{ ++ bool runpool_state_changed = false; ++ base_jd_core_req core_req; + -+ np = kbase_mem_pool_alloc( -+ &kctx->mem_pools.large[ -+ alloc->group_id]); -+ if (np) -+ break; ++ KBASE_DEBUG_ASSERT(katom); ++ core_req = katom->core_req; + -+ err = kbase_mem_pool_grow( -+ &kctx->mem_pools.large[alloc->group_id], -+ 1, kctx->task); -+ if (err) -+ break; -+ } while (1); ++ if (core_req & BASE_JD_REQ_ONLY_COMPUTE) ++ runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); ++ else ++ runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); + -+ if (np) { -+ int i; -+ struct kbase_sub_alloc *sa; -+ struct page *p; ++ if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { ++ /* Atom that can run on slot1 or slot2, and can use all cores */ ++ runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); ++ } + -+ sa = kmalloc(sizeof(*sa), GFP_KERNEL); -+ if (!sa) { -+ kbase_mem_pool_free( -+ &kctx->mem_pools.large[ -+ alloc->group_id], -+ np, -+ false); -+ goto no_new_partial; -+ } ++ /* We don't need to know about state changed, because retaining an atom ++ * occurs on adding it, and that itself will also try to run new atoms ++ */ ++ CSTD_UNUSED(runpool_state_changed); ++} + -+ /* store pointers back to the control struct */ -+ np->lru.next = (void *)sa; -+ for (p = np; p < np + SZ_2M / SZ_4K; p++) -+ p->lru.prev = (void *)np; -+ INIT_LIST_HEAD(&sa->link); -+ bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K); -+ sa->page = np; ++bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state) ++{ ++ bool runpool_state_changed = false; ++ base_jd_core_req core_req; + -+ for (i = 0; i < nr_left; i++) -+ *tp++ = as_tagged_tag(page_to_phys(np + i), FROM_PARTIAL); ++ KBASE_DEBUG_ASSERT(katom_retained_state); ++ core_req = katom_retained_state->core_req; + -+ bitmap_set(sa->sub_pages, 0, nr_left); -+ nr_left = 0; ++ /* No-op for invalid atoms */ ++ if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false) ++ return false; + -+ /* expose for later use */ -+ spin_lock(&kctx->mem_partials_lock); -+ list_add(&sa->link, &kctx->mem_partials); -+ spin_unlock(&kctx->mem_partials_lock); -+ } -+ } -+ } ++ if (core_req & BASE_JD_REQ_ONLY_COMPUTE) ++ runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); ++ else ++ runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); + -+no_new_partial: -+ if (nr_left) { -+ res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[alloc->group_id], nr_left, -+ tp, false, kctx->task); -+ if (res <= 0) -+ goto alloc_failed; ++ if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { ++ /* Atom that can run on slot1 or slot2, and can use all cores */ ++ runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); + } + -+ KBASE_TLSTREAM_AUX_PAGESALLOC( -+ kbdev, -+ kctx->id, -+ (u64)new_page_count); ++ return runpool_state_changed; ++} +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.h b/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.h +new file mode 100644 +index 000000000..2dc640d5a +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.h +@@ -0,0 +1,147 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2012-2015, 2018, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ alloc->nents += nr_pages_requested; ++/** ++ * DOC: Job Scheduler Context Attribute APIs ++ */ + -+ kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); ++#ifndef _KBASE_JS_CTX_ATTR_H_ ++#define _KBASE_JS_CTX_ATTR_H_ + -+done: -+ return 0; ++/** ++ * kbasep_js_ctx_attr_runpool_retain_ctx - Retain all attributes of a context ++ * ++ * @kbdev: KBase device ++ * @kctx: KBase context ++ * ++ * This occurs on scheduling in the context on the runpool (but after ++ * is_scheduled is set) ++ * ++ * Requires: ++ * - jsctx mutex ++ * - runpool_irq spinlock ++ * - ctx->is_scheduled is true ++ */ ++void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + -+alloc_failed: -+ /* rollback needed if got one or more 2MB but failed later */ -+ if (nr_left != nr_pages_requested) { -+ size_t nr_pages_to_free = nr_pages_requested - nr_left; ++/** ++ * kbasep_js_ctx_attr_runpool_release_ctx - Release all attributes of a context ++ * ++ * @kbdev: KBase device ++ * @kctx: KBase context ++ * ++ * This occurs on scheduling out the context from the runpool (but before ++ * is_scheduled is cleared) ++ * ++ * Requires: ++ * - jsctx mutex ++ * - runpool_irq spinlock ++ * - ctx->is_scheduled is true ++ * ++ * Return: true indicates a change in ctx attributes state of the runpool. ++ * In this state, the scheduler might be able to submit more jobs than ++ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() ++ * or similar is called sometime later. ++ * false indicates no change in ctx attributes state of the runpool. ++ */ ++bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + -+ alloc->nents += nr_pages_to_free; ++/** ++ * kbasep_js_ctx_attr_ctx_retain_atom - Retain all attributes of an atom ++ * ++ * @kbdev: KBase device ++ * @kctx: KBase context ++ * @katom: Atom ++ * ++ * This occurs on adding an atom to a context ++ * ++ * Requires: ++ * - jsctx mutex ++ * - If the context is scheduled, then runpool_irq spinlock must also be held ++ */ ++void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom); + -+ kbase_process_page_usage_inc(kctx, nr_pages_to_free); -+ atomic_add(nr_pages_to_free, &kctx->used_pages); -+ atomic_add(nr_pages_to_free, -+ &kctx->kbdev->memdev.used_pages); ++/** ++ * kbasep_js_ctx_attr_ctx_release_atom - Release all attributes of an atom, ++ * given its retained state. ++ * ++ * @kbdev: KBase device ++ * @kctx: KBase context ++ * @katom_retained_state: Retained state ++ * ++ * This occurs after (permanently) removing an atom from a context ++ * ++ * Requires: ++ * - jsctx mutex ++ * - If the context is scheduled, then runpool_irq spinlock must also be held ++ * ++ * This is a no-op when \a katom_retained_state is invalid. ++ * ++ * Return: true indicates a change in ctx attributes state of the runpool. ++ * In this state, the scheduler might be able to submit more jobs than ++ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() ++ * or similar is called sometime later. ++ * false indicates no change in ctx attributes state of the runpool. ++ */ ++bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); + -+ kbase_free_phy_pages_helper(alloc, nr_pages_to_free); -+ } ++/* ++ * Requires: ++ * - runpool_irq spinlock ++ */ ++static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute) ++{ ++ struct kbasep_js_device_data *js_devdata; + -+ kbase_process_page_usage_dec(kctx, nr_pages_requested); -+ atomic_sub(nr_pages_requested, &kctx->used_pages); -+ atomic_sub(nr_pages_requested, -+ &kctx->kbdev->memdev.used_pages); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); ++ js_devdata = &kbdev->js_data; + -+invalid_request: -+ return -ENOMEM; ++ return js_devdata->runpool_irq.ctx_attr_ref_count[attribute]; +} + -+struct tagged_addr *kbase_alloc_phy_pages_helper_locked( -+ struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, -+ size_t nr_pages_requested, -+ struct kbase_sub_alloc **prealloc_sa) ++/* ++ * Requires: ++ * - runpool_irq spinlock ++ */ ++static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute) +{ -+ int new_page_count __maybe_unused; -+ size_t nr_left = nr_pages_requested; -+ int res; -+ struct kbase_context *kctx; -+ struct kbase_device *kbdev; -+ struct tagged_addr *tp; -+ struct tagged_addr *new_pages = NULL; ++ /* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */ ++ return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute); ++} + -+ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); -+ KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); ++/* ++ * Requires: ++ * - jsctx mutex ++ */ ++static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) ++{ ++ struct kbasep_js_kctx_info *js_kctx_info; + -+ lockdep_assert_held(&pool->pool_lock); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); ++ js_kctx_info = &kctx->jctx.sched_info; + -+ kctx = alloc->imported.native.kctx; -+ kbdev = kctx->kbdev; ++ /* In general, attributes are 'on' when they have a refcount (which should never be < 0) */ ++ return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]); ++} + -+ if (!kbdev->pagesize_2mb) -+ WARN_ON(pool->order); ++#endif /* _KBASE_JS_DEFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c +new file mode 100644 +index 000000000..14a730dc5 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c +@@ -0,0 +1,896 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ if (alloc->reg) { -+ if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents) -+ goto invalid_request; -+ } ++/* ++ * mali_kbase_kinstr_jm.c ++ * Kernel driver public interface to job manager atom tracing ++ */ + -+ lockdep_assert_held(&kctx->mem_partials_lock); ++#include "mali_kbase_kinstr_jm.h" ++#include + -+ if (nr_pages_requested == 0) -+ goto done; /*nothing to do*/ ++#include "mali_kbase.h" ++#include "mali_kbase_linux.h" + -+ new_page_count = atomic_add_return( -+ nr_pages_requested, &kctx->used_pages); -+ atomic_add(nr_pages_requested, -+ &kctx->kbdev->memdev.used_pages); ++#include + -+ /* Increase mm counters before we allocate pages so that this -+ * allocation is visible to the OOM killer -+ */ -+ kbase_process_page_usage_inc(kctx, nr_pages_requested); ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ tp = alloc->pages + alloc->nents; -+ new_pages = tp; ++/* Explicitly include epoll header for old kernels. Not required from 4.16. */ ++#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE ++#include ++#endif + -+ if (kbdev->pagesize_2mb && pool->order) { -+ int nr_lp = nr_left / (SZ_2M / SZ_4K); ++/* Define static_assert(). ++ * ++ * The macro was introduced in kernel 5.1. But older vendor kernels may define ++ * it too. ++ */ ++#if KERNEL_VERSION(5, 1, 0) <= LINUX_VERSION_CODE ++#include ++#elif !defined(static_assert) ++// Stringify the expression if no message is given. ++#define static_assert(e, ...) __static_assert(e, #__VA_ARGS__, #e) ++#define __static_assert(e, msg, ...) _Static_assert(e, msg) ++#endif + -+ res = kbase_mem_pool_alloc_pages_locked(pool, -+ nr_lp * (SZ_2M / SZ_4K), -+ tp); ++/* The module printing prefix */ ++#define PR_ "mali_kbase_kinstr_jm: " + -+ if (res > 0) { -+ nr_left -= res; -+ tp += res; -+ } ++/* Allows us to perform ASM goto for the tracing ++ * https://www.kernel.org/doc/Documentation/static-keys.txt ++ */ ++DEFINE_STATIC_KEY_FALSE(basep_kinstr_jm_reader_static_key); + -+ if (nr_left) { -+ struct kbase_sub_alloc *sa, *temp_sa; ++#define KBASE_KINSTR_JM_VERSION 2 + -+ list_for_each_entry_safe(sa, temp_sa, -+ &kctx->mem_partials, link) { -+ int pidx = 0; ++/** ++ * struct kbase_kinstr_jm - The context for the kernel job manager atom tracing ++ * @readers: a bitlocked list of opened readers. Readers are attached to the ++ * private data of a file descriptor that the user opens with the ++ * KBASE_IOCTL_KINSTR_JM_FD IO control call. ++ * @refcount: reference count for the context. Any reader will have a link ++ * back to the context so that they can remove themselves from the ++ * list. ++ * ++ * This is opaque outside this compilation unit ++ */ ++struct kbase_kinstr_jm { ++ struct hlist_bl_head readers; ++ struct kref refcount; ++}; + -+ while (nr_left) { -+ pidx = find_next_zero_bit(sa->sub_pages, -+ SZ_2M / SZ_4K, -+ pidx); -+ bitmap_set(sa->sub_pages, pidx, 1); -+ *tp++ = as_tagged_tag(page_to_phys( -+ sa->page + pidx), -+ FROM_PARTIAL); -+ nr_left--; ++/** ++ * struct kbase_kinstr_jm_atom_state_change - Represents an atom changing to a ++ * new state ++ * @timestamp: Raw monotonic nanoseconds of the state change ++ * @state: The state that the atom has moved to ++ * @atom: The atom number that has changed state ++ * @flags: Flags associated with the state change. See ++ * KBASE_KINSTR_JM_ATOM_STATE_FLAG_* defines. ++ * @reserved: Reserved for future use. ++ * @data: Extra data for the state change. Active member depends on state. ++ * @data.start: Extra data for the state change. Active member depends on ++ * state. ++ * @data.start.slot: Extra data for the state change. Active member depends on ++ * state. ++ * @data.padding: Padding ++ * ++ * We can add new fields to the structure and old user code will gracefully ++ * ignore the new fields. ++ * ++ * We can change the size of the structure and old user code will gracefully ++ * skip over the new size via `struct kbase_kinstr_jm_fd_out->size`. ++ * ++ * If we remove fields, the version field in `struct ++ * kbase_kinstr_jm_fd_out->version` will be incremented and old user code will ++ * gracefully fail and tell the user that the kernel API is too new and has ++ * backwards-incompatible changes. Note that one userspace can opt to handle ++ * multiple kernel major versions of the structure. ++ * ++ * If we need to change the _meaning_ of one of the fields, i.e. the state ++ * machine has had a incompatible change, we can keep the same members in the ++ * structure and update the version as above. User code will no longer ++ * recognise that it has the supported field and can gracefully explain to the ++ * user that the kernel API is no longer supported. ++ * ++ * When making changes to this structure, make sure they are either: ++ * - additions to the end (for minor version bumps (i.e. only a size increase)) ++ * such that the layout of existing fields doesn't change, or; ++ * - update the version reported to userspace so that it can fail explicitly. ++ */ ++struct kbase_kinstr_jm_atom_state_change { ++ u64 timestamp; ++ s8 state; /* enum kbase_kinstr_jm_reader_atom_state */ ++ u8 atom; ++ u8 flags; ++ u8 reserved[1]; ++ /* Tagged union based on state. Ensure members are aligned correctly! */ ++ union { ++ struct { ++ u8 slot; ++ } start; ++ u8 padding[4]; ++ } data; ++}; ++static_assert( ++ ((1 << 8 * sizeof(((struct kbase_kinstr_jm_atom_state_change *)0)->state)) - 1) >= ++ KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT); + -+ if (bitmap_full(sa->sub_pages, -+ SZ_2M / SZ_4K)) { -+ /* unlink from partial list when -+ * full -+ */ -+ list_del_init(&sa->link); -+ break; -+ } -+ } -+ } -+ } ++#define KBASE_KINSTR_JM_ATOM_STATE_FLAG_OVERFLOW BIT(0) + -+ /* only if we actually have a chunk left <512. If more it -+ * indicates that we couldn't allocate a 2MB above, so no point -+ * to retry here. -+ */ -+ if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) { -+ /* create a new partial and suballocate the rest from it -+ */ -+ struct page *np = NULL; ++/** ++ * struct reader_changes - The circular buffer of kernel atom state changes ++ * @data: The allocated buffer. This is allocated when the user requests ++ * the reader file descriptor. It is released when the user calls ++ * close() on the fd. When accessing this, lock the producer spin ++ * lock to prevent races on the allocated memory. The consume lock ++ * does not need to be held because newly-inserted data will always ++ * be outside the currenly-read range. ++ * @producer: The producing spinlock which allows us to push changes into the ++ * buffer at the same time as a user read occurring. This needs to ++ * be locked when saving/restoring the IRQ because we can receive an ++ * interrupt from the GPU when an atom completes. The CPU could have ++ * a task preempted that is holding this lock. ++ * @consumer: The consuming mutex which locks around the user read(). ++ * Must be held when updating the tail of the circular buffer. ++ * @head: The head of the circular buffer. Can be used with Linux @c CIRC_ ++ * helpers. The producer should lock and update this with an SMP ++ * store when a new change lands. The consumer can read with an ++ * SMP load. This allows the producer to safely insert new changes ++ * into the circular buffer. ++ * @tail: The tail of the circular buffer. Can be used with Linux @c CIRC_ ++ * helpers. The producer should do a READ_ONCE load and the consumer ++ * should SMP store. ++ * @size: The number of changes that are allowed in @c data. Can be used ++ * with Linux @c CIRC_ helpers. Will always be a power of two. The ++ * producer lock should be held when updating this and stored with ++ * an SMP release memory barrier. This means that the consumer can ++ * do an SMP load. ++ * @threshold: The number of changes above which threads polling on the reader ++ * file descriptor will be woken up. ++ */ ++struct reader_changes { ++ struct kbase_kinstr_jm_atom_state_change *data; ++ spinlock_t producer; ++ struct mutex consumer; ++ u32 head; ++ u32 tail; ++ u32 size; ++ u32 threshold; ++}; + -+ np = kbase_mem_pool_alloc_locked(pool); ++/** ++ * reader_changes_is_valid_size() - Determines if requested changes buffer size ++ * is valid. ++ * @size: The requested memory size ++ * ++ * We have a constraint that the underlying physical buffer must be a ++ * power of two so that we can use the efficient circular buffer helpers that ++ * the kernel provides. It also needs to be representable within a u32. ++ * ++ * Return: ++ * * true - the size is valid ++ * * false - the size is invalid ++ */ ++static inline bool reader_changes_is_valid_size(const size_t size) ++{ ++ const size_t elem_size = sizeof(*((struct reader_changes *)0)->data); ++ const size_t size_size = sizeof(((struct reader_changes *)0)->size); ++ const size_t size_max = (1ull << (size_size * 8)) - 1; + -+ if (np) { -+ int i; -+ struct kbase_sub_alloc *const sa = *prealloc_sa; -+ struct page *p; ++ return is_power_of_2(size) && /* Is a power of two */ ++ ((size / elem_size) <= size_max); /* Small enough */ ++} + -+ /* store pointers back to the control struct */ -+ np->lru.next = (void *)sa; -+ for (p = np; p < np + SZ_2M / SZ_4K; p++) -+ p->lru.prev = (void *)np; -+ INIT_LIST_HEAD(&sa->link); -+ bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K); -+ sa->page = np; ++/** ++ * reader_changes_init() - Initializes the reader changes and allocates the ++ * changes buffer ++ * @changes: The context pointer, must point to a zero-inited allocated reader ++ * changes structure. We may support allocating the structure in the ++ * future. ++ * @size: The requested changes buffer size ++ * ++ * Return: ++ * (0, U16_MAX] - the number of data elements allocated ++ * -ERANGE - the requested memory size was invalid ++ * -ENOMEM - could not allocate the memory ++ */ ++static int reader_changes_init(struct reader_changes *const changes, ++ const size_t size) ++{ ++ BUILD_BUG_ON((PAGE_SIZE % sizeof(*changes->data)) != 0); + -+ for (i = 0; i < nr_left; i++) -+ *tp++ = as_tagged_tag( -+ page_to_phys(np + i), -+ FROM_PARTIAL); ++ if (!reader_changes_is_valid_size(size)) { ++ pr_warn(PR_ "invalid size %zu\n", size); ++ return -ERANGE; ++ } + -+ bitmap_set(sa->sub_pages, 0, nr_left); -+ nr_left = 0; -+ /* Indicate to user that we'll free this memory -+ * later. -+ */ -+ *prealloc_sa = NULL; ++ changes->data = vmalloc(size); ++ if (!changes->data) ++ return -ENOMEM; + -+ /* expose for later use */ -+ list_add(&sa->link, &kctx->mem_partials); -+ } -+ } -+ if (nr_left) -+ goto alloc_failed; -+ } else { -+ res = kbase_mem_pool_alloc_pages_locked(pool, -+ nr_left, -+ tp); -+ if (res <= 0) -+ goto alloc_failed; -+ } ++ spin_lock_init(&changes->producer); ++ mutex_init(&changes->consumer); + -+ KBASE_TLSTREAM_AUX_PAGESALLOC( -+ kbdev, -+ kctx->id, -+ (u64)new_page_count); ++ changes->size = size / sizeof(*changes->data); ++ changes->threshold = min(((size_t)(changes->size)) / 4, ++ ((size_t)(PAGE_SIZE)) / sizeof(*changes->data)); + -+ alloc->nents += nr_pages_requested; ++ return changes->size; ++} + -+ kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); ++/** ++ * reader_changes_term() - Cleans up a reader changes structure ++ * @changes: The context to clean up ++ * ++ * Releases the allocated state changes memory ++ */ ++static void reader_changes_term(struct reader_changes *const changes) ++{ ++ struct kbase_kinstr_jm_atom_state_change *data = NULL; ++ unsigned long irq; + -+done: -+ return new_pages; ++ /* ++ * Although changes->data is used on the consumer side, too, no active ++ * consumer is possible by the time we clean up the reader changes, so ++ * no need to take the consumer lock. However, we do need the producer ++ * lock because the list removal can race with list traversal. ++ */ ++ spin_lock_irqsave(&changes->producer, irq); ++ swap(changes->data, data); ++ spin_unlock_irqrestore(&changes->producer, irq); + -+alloc_failed: -+ /* rollback needed if got one or more 2MB but failed later */ -+ if (nr_left != nr_pages_requested) { -+ size_t nr_pages_to_free = nr_pages_requested - nr_left; ++ mutex_destroy(&changes->consumer); ++ vfree(data); ++} + -+ struct tagged_addr *start_free = alloc->pages + alloc->nents; ++/** ++ * reader_changes_count_locked() - Retrieves the count of state changes from the ++ * tail to the physical end of the buffer ++ * @changes: The state changes context ++ * ++ * The consumer mutex must be held. Uses the CIRC_CNT_TO_END macro to ++ * determine the count, so there may be more items. However, that's the maximum ++ * number that can be read in one contiguous read. ++ * ++ * Return: the number of changes in the circular buffer until the end of the ++ * allocation ++ */ ++static u32 reader_changes_count_locked(struct reader_changes *const changes) ++{ ++ u32 head; + -+ if (kbdev->pagesize_2mb && pool->order) { -+ while (nr_pages_to_free) { -+ if (is_huge_head(*start_free)) { -+ kbase_mem_pool_free_pages_locked( -+ pool, 512, -+ start_free, -+ false, /* not dirty */ -+ true); /* return to pool */ -+ nr_pages_to_free -= 512; -+ start_free += 512; -+ } else if (is_partial(*start_free)) { -+ free_partial_locked(kctx, pool, -+ *start_free); -+ nr_pages_to_free--; -+ start_free++; -+ } -+ } -+ } else { -+ kbase_mem_pool_free_pages_locked(pool, -+ nr_pages_to_free, -+ start_free, -+ false, /* not dirty */ -+ true); /* return to pool */ -+ } -+ } ++ lockdep_assert_held_once(&changes->consumer); + -+ kbase_process_page_usage_dec(kctx, nr_pages_requested); -+ atomic_sub(nr_pages_requested, &kctx->used_pages); -+ atomic_sub(nr_pages_requested, &kctx->kbdev->memdev.used_pages); ++ head = smp_load_acquire(&changes->head); + -+invalid_request: -+ return NULL; ++ return CIRC_CNT_TO_END(head, changes->tail, changes->size); +} + -+static void free_partial(struct kbase_context *kctx, int group_id, struct -+ tagged_addr tp) ++/** ++ * reader_changes_count() - Retrieves the count of state changes from the ++ * tail to the physical end of the buffer ++ * @changes: The state changes context ++ * ++ * Return: the number of changes in the circular buffer until the end of the ++ * allocation ++ */ ++static u32 reader_changes_count(struct reader_changes *const changes) +{ -+ struct page *p, *head_page; -+ struct kbase_sub_alloc *sa; ++ u32 ret; + -+ p = as_page(tp); -+ head_page = (struct page *)p->lru.prev; -+ sa = (struct kbase_sub_alloc *)head_page->lru.next; -+ spin_lock(&kctx->mem_partials_lock); -+ clear_bit(p - head_page, sa->sub_pages); -+ if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) { -+ list_del(&sa->link); -+ kbase_mem_pool_free( -+ &kctx->mem_pools.large[group_id], -+ head_page, -+ true); -+ kfree(sa); -+ } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) == -+ SZ_2M / SZ_4K - 1) { -+ /* expose the partial again */ -+ list_add(&sa->link, &kctx->mem_partials); -+ } -+ spin_unlock(&kctx->mem_partials_lock); ++ mutex_lock(&changes->consumer); ++ ret = reader_changes_count_locked(changes); ++ mutex_unlock(&changes->consumer); ++ return ret; +} + -+int kbase_free_phy_pages_helper( -+ struct kbase_mem_phy_alloc *alloc, -+ size_t nr_pages_to_free) ++/** ++ * reader_changes_push() - Pushes a change into the reader circular buffer. ++ * @changes: The buffer to insert the change into ++ * @change: Kernel atom change to insert ++ * @wait_queue: The queue to be kicked when changes should be read from ++ * userspace. Kicked when a threshold is reached or there is ++ * overflow. ++ */ ++static void reader_changes_push( ++ struct reader_changes *const changes, ++ const struct kbase_kinstr_jm_atom_state_change *const change, ++ wait_queue_head_t *const wait_queue) +{ -+ struct kbase_context *kctx = alloc->imported.native.kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ bool syncback; -+ bool reclaimed = (alloc->evicted != 0); -+ struct tagged_addr *start_free; -+ int new_page_count __maybe_unused; -+ size_t freed = 0; -+ -+ if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) || -+ WARN_ON(alloc->imported.native.kctx == NULL) || -+ WARN_ON(alloc->nents < nr_pages_to_free) || -+ WARN_ON(alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { -+ return -EINVAL; -+ } ++ u32 head, tail, size, space; ++ unsigned long irq; ++ struct kbase_kinstr_jm_atom_state_change *data; + -+ /* early out if nothing to do */ -+ if (nr_pages_to_free == 0) -+ return 0; ++ spin_lock_irqsave(&changes->producer, irq); + -+ start_free = alloc->pages + alloc->nents - nr_pages_to_free; ++ /* We may be called for a reader_changes that's awaiting cleanup. */ ++ data = changes->data; ++ if (!data) ++ goto unlock; + -+ syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; ++ size = changes->size; ++ head = changes->head; ++ tail = smp_load_acquire(&changes->tail); + -+ /* pad start_free to a valid start location */ -+ while (nr_pages_to_free && is_huge(*start_free) && -+ !is_huge_head(*start_free)) { -+ nr_pages_to_free--; -+ start_free++; ++ space = CIRC_SPACE(head, tail, size); ++ if (space >= 1) { ++ data[head] = *change; ++ if (space == 1) { ++ data[head].flags |= ++ KBASE_KINSTR_JM_ATOM_STATE_FLAG_OVERFLOW; ++ pr_warn(PR_ "overflow of circular buffer\n"); ++ } ++ smp_store_release(&changes->head, (head + 1) & (size - 1)); + } + -+ while (nr_pages_to_free) { -+ if (is_huge_head(*start_free)) { -+ /* This is a 2MB entry, so free all the 512 pages that -+ * it points to -+ */ -+ kbase_mem_pool_free_pages( -+ &kctx->mem_pools.large[alloc->group_id], -+ 512, -+ start_free, -+ syncback, -+ reclaimed); -+ nr_pages_to_free -= 512; -+ start_free += 512; -+ freed += 512; -+ } else if (is_partial(*start_free)) { -+ free_partial(kctx, alloc->group_id, *start_free); -+ nr_pages_to_free--; -+ start_free++; -+ freed++; -+ } else { -+ struct tagged_addr *local_end_free; ++ /* Wake for either overflow or over-threshold cases. */ ++ if (CIRC_CNT(head + 1, tail, size) >= changes->threshold) ++ wake_up_interruptible(wait_queue); + -+ local_end_free = start_free; -+ while (nr_pages_to_free && -+ !is_huge(*local_end_free) && -+ !is_partial(*local_end_free)) { -+ local_end_free++; -+ nr_pages_to_free--; -+ } -+ kbase_mem_pool_free_pages( -+ &kctx->mem_pools.small[alloc->group_id], -+ local_end_free - start_free, -+ start_free, -+ syncback, -+ reclaimed); -+ freed += local_end_free - start_free; -+ start_free += local_end_free - start_free; -+ } -+ } ++unlock: ++ spin_unlock_irqrestore(&changes->producer, irq); ++} + -+ alloc->nents -= freed; ++/** ++ * struct reader - Allows the kernel state changes to be read by user space. ++ * @node: The node in the @c readers locked list ++ * @rcu_head: storage for the RCU callback to free this reader (see kfree_rcu) ++ * @changes: The circular buffer of user changes ++ * @wait_queue: A wait queue for poll ++ * @context: a pointer to the parent context that created this reader. Can be ++ * used to remove the reader from the list of readers. Reference ++ * counted. ++ * ++ * The reader is a circular buffer in kernel space. State changes are pushed ++ * into the buffer. The flow from user space is: ++ * ++ * * Request file descriptor with KBASE_IOCTL_KINSTR_JM_FD. This will ++ * allocate the kernel side circular buffer with a size specified in the ++ * ioctl argument. ++ * * The user will then poll the file descriptor for data ++ * * Upon receiving POLLIN, perform a read() on the file descriptor to get ++ * the data out. ++ * * The buffer memory will be freed when the file descriptor is closed ++ */ ++struct reader { ++ struct hlist_bl_node node; ++ struct rcu_head rcu_head; ++ struct reader_changes changes; ++ wait_queue_head_t wait_queue; ++ struct kbase_kinstr_jm *context; ++}; + -+ /* -+ * If the allocation was not evicted (i.e. evicted == 0) then -+ * the page accounting needs to be done. -+ */ -+ if (!reclaimed) { -+ kbase_process_page_usage_dec(kctx, freed); -+ new_page_count = atomic_sub_return(freed, -+ &kctx->used_pages); -+ atomic_sub(freed, -+ &kctx->kbdev->memdev.used_pages); ++static struct kbase_kinstr_jm * ++kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx); ++static void kbase_kinstr_jm_ref_put(struct kbase_kinstr_jm *const ctx); ++static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx, ++ struct reader *const reader); ++static void kbase_kinstr_jm_readers_del(struct kbase_kinstr_jm *const ctx, ++ struct reader *const reader); + -+ KBASE_TLSTREAM_AUX_PAGESALLOC( -+ kbdev, -+ kctx->id, -+ (u64)new_page_count); ++/** ++ * reader_term() - Terminate a instrumentation job manager reader context. ++ * @reader: Pointer to context to be terminated. ++ */ ++static void reader_term(struct reader *const reader) ++{ ++ if (!reader) ++ return; + -+ kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed); -+ } ++ kbase_kinstr_jm_readers_del(reader->context, reader); ++ reader_changes_term(&reader->changes); ++ kbase_kinstr_jm_ref_put(reader->context); + -+ return 0; ++ kfree_rcu(reader, rcu_head); +} + -+static void free_partial_locked(struct kbase_context *kctx, -+ struct kbase_mem_pool *pool, struct tagged_addr tp) ++/** ++ * reader_init() - Initialise a instrumentation job manager reader context. ++ * @out_reader: Non-NULL pointer to where the pointer to the created context ++ * will be stored on success. ++ * @ctx: the pointer to the parent context. Reference count will be ++ * increased if initialization is successful ++ * @num_changes: The number of changes to allocate a buffer for ++ * ++ * Return: 0 on success, else error code. ++ */ ++static int reader_init(struct reader **const out_reader, ++ struct kbase_kinstr_jm *const ctx, ++ size_t const num_changes) +{ -+ struct page *p, *head_page; -+ struct kbase_sub_alloc *sa; ++ struct reader *reader = NULL; ++ const size_t change_size = sizeof(struct kbase_kinstr_jm_atom_state_change); ++ int status; + -+ lockdep_assert_held(&pool->pool_lock); -+ lockdep_assert_held(&kctx->mem_partials_lock); ++ if (!out_reader || !ctx || !num_changes) ++ return -EINVAL; + -+ p = as_page(tp); -+ head_page = (struct page *)p->lru.prev; -+ sa = (struct kbase_sub_alloc *)head_page->lru.next; -+ clear_bit(p - head_page, sa->sub_pages); -+ if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) { -+ list_del(&sa->link); -+ kbase_mem_pool_free_locked(pool, head_page, true); -+ kfree(sa); -+ } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) == -+ SZ_2M / SZ_4K - 1) { -+ /* expose the partial again */ -+ list_add(&sa->link, &kctx->mem_partials); -+ } -+} ++ reader = kzalloc(sizeof(*reader), GFP_KERNEL); ++ if (!reader) ++ return -ENOMEM; + -+void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, -+ struct kbase_mem_pool *pool, struct tagged_addr *pages, -+ size_t nr_pages_to_free) -+{ -+ struct kbase_context *kctx = alloc->imported.native.kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ bool syncback; -+ bool reclaimed = (alloc->evicted != 0); -+ struct tagged_addr *start_free; -+ size_t freed = 0; ++ INIT_HLIST_BL_NODE(&reader->node); ++ init_waitqueue_head(&reader->wait_queue); + -+ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); -+ KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); -+ KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); ++ reader->context = kbase_kinstr_jm_ref_get(ctx); + -+ lockdep_assert_held(&pool->pool_lock); -+ lockdep_assert_held(&kctx->mem_partials_lock); ++ status = reader_changes_init(&reader->changes, num_changes * change_size); ++ if (status < 0) ++ goto fail; + -+ /* early out if nothing to do */ -+ if (!nr_pages_to_free) -+ return; ++ status = kbase_kinstr_jm_readers_add(ctx, reader); ++ if (status < 0) ++ goto fail; + -+ start_free = pages; ++ *out_reader = reader; + -+ syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; ++ return 0; + -+ /* pad start_free to a valid start location */ -+ while (nr_pages_to_free && is_huge(*start_free) && -+ !is_huge_head(*start_free)) { -+ nr_pages_to_free--; -+ start_free++; -+ } ++fail: ++ kbase_kinstr_jm_ref_put(reader->context); ++ kfree(reader); ++ return status; ++} + -+ while (nr_pages_to_free) { -+ if (is_huge_head(*start_free)) { -+ /* This is a 2MB entry, so free all the 512 pages that -+ * it points to -+ */ -+ WARN_ON(!pool->order); -+ kbase_mem_pool_free_pages_locked(pool, -+ 512, -+ start_free, -+ syncback, -+ reclaimed); -+ nr_pages_to_free -= 512; -+ start_free += 512; -+ freed += 512; -+ } else if (is_partial(*start_free)) { -+ WARN_ON(!pool->order); -+ free_partial_locked(kctx, pool, *start_free); -+ nr_pages_to_free--; -+ start_free++; -+ freed++; -+ } else { -+ struct tagged_addr *local_end_free; ++/** ++ * reader_release() - Invoked when the reader file descriptor is released ++ * @node: The inode that the file descriptor that the file corresponds to. In ++ * our case our reader file descriptor is backed by an anonymous node so ++ * not much is in this. ++ * @file: the file data. Our reader context is held in the private data ++ * Return: zero on success ++ */ ++static int reader_release(struct inode *const node, struct file *const file) ++{ ++ struct reader *const reader = file->private_data; + -+ WARN_ON(pool->order); -+ local_end_free = start_free; -+ while (nr_pages_to_free && -+ !is_huge(*local_end_free) && -+ !is_partial(*local_end_free)) { -+ local_end_free++; -+ nr_pages_to_free--; -+ } -+ kbase_mem_pool_free_pages_locked(pool, -+ local_end_free - start_free, -+ start_free, -+ syncback, -+ reclaimed); -+ freed += local_end_free - start_free; -+ start_free += local_end_free - start_free; -+ } -+ } ++ reader_term(reader); ++ file->private_data = NULL; + -+ alloc->nents -= freed; ++ return 0; ++} + -+ /* -+ * If the allocation was not evicted (i.e. evicted == 0) then -+ * the page accounting needs to be done. ++/** ++ * reader_changes_copy_to_user() - Copy any changes from a changes structure to ++ * the user-provided buffer. ++ * @changes: The changes structure from which to copy. ++ * @buffer: The user buffer to copy the data to. ++ * @buffer_size: The number of bytes in the buffer. ++ * Return: The number of bytes copied or negative errno on failure. ++ */ ++static ssize_t reader_changes_copy_to_user(struct reader_changes *const changes, ++ char __user *buffer, ++ size_t buffer_size) ++{ ++ ssize_t ret = 0; ++ struct kbase_kinstr_jm_atom_state_change const *src_buf = READ_ONCE( ++ changes->data); ++ size_t const entry_size = sizeof(*src_buf); ++ size_t changes_tail, changes_count, read_size; ++ ++ /* Needed for the quick buffer capacity calculation below. ++ * Note that we can't use is_power_of_2() since old compilers don't ++ * understand it's a constant expression. + */ -+ if (!reclaimed) { -+ int new_page_count; ++#define is_power_of_two(x) ((x) && !((x) & ((x) - 1))) ++ static_assert(is_power_of_two( ++ sizeof(struct kbase_kinstr_jm_atom_state_change))); ++#undef is_power_of_two + -+ kbase_process_page_usage_dec(kctx, freed); -+ new_page_count = atomic_sub_return(freed, -+ &kctx->used_pages); -+ atomic_sub(freed, -+ &kctx->kbdev->memdev.used_pages); ++ lockdep_assert_held_once(&changes->consumer); + -+ KBASE_TLSTREAM_AUX_PAGESALLOC( -+ kbdev, -+ kctx->id, -+ (u64)new_page_count); ++ /* Read continuously until either: ++ * - we've filled the output buffer, or ++ * - there are no changes when we check. ++ * ++ * If more changes arrive while we're copying to the user, we can copy ++ * those as well, space permitting. ++ */ ++ do { ++ changes_tail = changes->tail; ++ changes_count = reader_changes_count_locked(changes); ++ read_size = min(changes_count * entry_size, ++ buffer_size & ~(entry_size - 1)); + -+ kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed); -+ } ++ if (!read_size) ++ break; ++ ++ if (copy_to_user(buffer, &(src_buf[changes_tail]), read_size)) ++ return -EFAULT; ++ ++ buffer += read_size; ++ buffer_size -= read_size; ++ ret += read_size; ++ changes_tail = (changes_tail + read_size / entry_size) & ++ (changes->size - 1); ++ smp_store_release(&changes->tail, changes_tail); ++ } while (read_size); ++ ++ return ret; +} -+KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper_locked); + -+#if MALI_USE_CSF +/** -+ * kbase_jd_user_buf_unpin_pages - Release the pinned pages of a user buffer. -+ * @alloc: The allocation for the imported user buffer. ++ * reader_read() - Handles a read call on the reader file descriptor + * -+ * This must only be called when terminating an alloc, when its refcount -+ * (number of users) has become 0. This also ensures it is only called once all -+ * CPU mappings have been closed. ++ * @filp: The file that the read was performed on ++ * @buffer: The destination buffer ++ * @buffer_size: The maximum number of bytes to read ++ * @offset: The offset into the 'file' to read from. + * -+ * Instead call kbase_jd_user_buf_unmap() if you need to unpin pages on active -+ * allocations ++ * Note the destination buffer needs to be fully mapped in userspace or the read ++ * will fault. ++ * ++ * Return: ++ * * The number of bytes read or: ++ * * -EBADF - the file descriptor did not have an attached reader ++ * * -EFAULT - memory access fault ++ * * -EAGAIN - if the file is set to nonblocking reads with O_NONBLOCK and there ++ * is no data available ++ * ++ * Note: The number of bytes read will always be a multiple of the size of an ++ * entry. + */ -+static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc); -+#endif -+ -+void kbase_mem_kref_free(struct kref *kref) ++static ssize_t reader_read(struct file *const filp, ++ char __user *const buffer, ++ size_t const buffer_size, ++ loff_t *const offset) +{ -+ struct kbase_mem_phy_alloc *alloc; ++ struct reader *const reader = filp->private_data; ++ struct reader_changes *changes; ++ ssize_t ret; + -+ alloc = container_of(kref, struct kbase_mem_phy_alloc, kref); ++ if (!reader) ++ return -EBADF; + -+ switch (alloc->type) { -+ case KBASE_MEM_TYPE_NATIVE: { ++ if (buffer_size < sizeof(struct kbase_kinstr_jm_atom_state_change)) ++ return -ENOBUFS; + -+ if (!WARN_ON(!alloc->imported.native.kctx)) { -+ if (alloc->permanent_map) -+ kbase_phy_alloc_mapping_term( -+ alloc->imported.native.kctx, -+ alloc); ++#if KERNEL_VERSION(5, 0, 0) <= LINUX_VERSION_CODE ++ if (!access_ok(buffer, buffer_size)) ++ return -EIO; ++#else ++ if (!access_ok(VERIFY_WRITE, buffer, buffer_size)) ++ return -EIO; ++#endif + -+ /* -+ * The physical allocation must have been removed from -+ * the eviction list before trying to free it. -+ */ -+ mutex_lock( -+ &alloc->imported.native.kctx->jit_evict_lock); -+ WARN_ON(!list_empty(&alloc->evict_node)); -+ mutex_unlock( -+ &alloc->imported.native.kctx->jit_evict_lock); ++ changes = &reader->changes; + -+ kbase_process_page_usage_dec( -+ alloc->imported.native.kctx, -+ alloc->imported.native.nr_struct_pages); ++ mutex_lock(&changes->consumer); ++ if (!reader_changes_count_locked(changes)) { ++ if (filp->f_flags & O_NONBLOCK) { ++ ret = -EAGAIN; ++ goto exit; + } -+ kbase_free_phy_pages_helper(alloc, alloc->nents); -+ break; -+ } -+ case KBASE_MEM_TYPE_ALIAS: { -+ /* just call put on the underlying phy allocs */ -+ size_t i; -+ struct kbase_aliased *aliased; + -+ aliased = alloc->imported.alias.aliased; -+ if (aliased) { -+ for (i = 0; i < alloc->imported.alias.nents; i++) -+ if (aliased[i].alloc) { -+ kbase_mem_phy_alloc_gpu_unmapped(aliased[i].alloc); -+ kbase_mem_phy_alloc_put(aliased[i].alloc); -+ } -+ vfree(aliased); -+ } -+ break; -+ } -+ case KBASE_MEM_TYPE_RAW: -+ /* raw pages, external cleanup */ -+ break; -+ case KBASE_MEM_TYPE_IMPORTED_UMM: -+ if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) { -+ WARN_ONCE(alloc->imported.umm.current_mapping_usage_count != 1, -+ "WARNING: expected excatly 1 mapping, got %d", -+ alloc->imported.umm.current_mapping_usage_count); -+ dma_buf_unmap_attachment( -+ alloc->imported.umm.dma_attachment, -+ alloc->imported.umm.sgt, -+ DMA_BIDIRECTIONAL); -+ kbase_remove_dma_buf_usage(alloc->imported.umm.kctx, -+ alloc); ++ if (wait_event_interruptible( ++ reader->wait_queue, ++ !!reader_changes_count_locked(changes))) { ++ ret = -EINTR; ++ goto exit; + } -+ dma_buf_detach(alloc->imported.umm.dma_buf, -+ alloc->imported.umm.dma_attachment); -+ dma_buf_put(alloc->imported.umm.dma_buf); -+ break; -+ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: -+#if MALI_USE_CSF -+ kbase_jd_user_buf_unpin_pages(alloc); -+#endif -+ if (alloc->imported.user_buf.mm) -+ mmdrop(alloc->imported.user_buf.mm); -+ if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) -+ vfree(alloc->imported.user_buf.pages); -+ else -+ kfree(alloc->imported.user_buf.pages); -+ break; -+ default: -+ WARN(1, "Unexecpted free of type %d\n", alloc->type); -+ break; + } + -+ /* Free based on allocation type */ -+ if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) -+ vfree(alloc); -+ else -+ kfree(alloc); -+} ++ ret = reader_changes_copy_to_user(changes, buffer, buffer_size); + -+KBASE_EXPORT_TEST_API(kbase_mem_kref_free); ++exit: ++ mutex_unlock(&changes->consumer); ++ return ret; ++} + -+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size) ++/** ++ * reader_poll() - Handles a poll call on the reader file descriptor ++ * @file: The file that the poll was performed on ++ * @wait: The poll table ++ * ++ * The results of the poll will be unreliable if there is no mapped memory as ++ * there is no circular buffer to push atom state changes into. ++ * ++ * Return: ++ * * 0 - no data ready ++ * * EPOLLIN | EPOLLRDNORM - state changes have been buffered ++ * * EPOLLHUP | EPOLLERR - IO control arguments were invalid or the file ++ * descriptor did not have an attached reader. ++ */ ++static __poll_t reader_poll(struct file *const file, ++ struct poll_table_struct *const wait) +{ -+ KBASE_DEBUG_ASSERT(reg != NULL); -+ KBASE_DEBUG_ASSERT(vsize > 0); -+ -+ /* validate user provided arguments */ -+ if (size > vsize || vsize > reg->nr_pages) -+ goto out_term; ++ struct reader *reader; ++ struct reader_changes *changes; ++ __poll_t mask = 0; + -+ /* Prevent vsize*sizeof from wrapping around. -+ * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail. -+ */ -+ if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages))) -+ goto out_term; ++ if (unlikely(!file || !wait)) ++ return EPOLLHUP | EPOLLERR; + -+ KBASE_DEBUG_ASSERT(vsize != 0); ++ reader = file->private_data; ++ if (unlikely(!reader)) ++ return EPOLLHUP | EPOLLERR; + -+ if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0) -+ goto out_term; ++ changes = &reader->changes; ++ if (reader_changes_count(changes) >= changes->threshold) ++ return EPOLLIN | EPOLLRDNORM; + -+ reg->cpu_alloc->reg = reg; -+ if (reg->cpu_alloc != reg->gpu_alloc) { -+ if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0) -+ goto out_rollback; -+ reg->gpu_alloc->reg = reg; -+ } ++ poll_wait(file, &reader->wait_queue, wait); + -+ return 0; ++ if (reader_changes_count(changes) > 0) ++ mask |= EPOLLIN | EPOLLRDNORM; + -+out_rollback: -+ kbase_free_phy_pages_helper(reg->cpu_alloc, size); -+out_term: -+ return -1; ++ return mask; +} -+KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages); -+ -+void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc, -+ enum kbase_page_status status) -+{ -+ u32 i = 0; + -+ for (; i < alloc->nents; i++) { -+ struct tagged_addr phys = alloc->pages[i]; -+ struct kbase_page_metadata *page_md = kbase_page_private(as_page(phys)); ++/* The file operations virtual function table */ ++static const struct file_operations file_operations = { ++ .owner = THIS_MODULE, ++ .llseek = no_llseek, ++ .read = reader_read, ++ .poll = reader_poll, ++ .release = reader_release ++}; + -+ /* Skip the 4KB page that is part of a large page, as the large page is -+ * excluded from the migration process. -+ */ -+ if (is_huge(phys) || is_partial(phys)) -+ continue; ++/* The maximum amount of readers that can be created on a context. */ ++static const size_t kbase_kinstr_jm_readers_max = 16; + -+ if (!page_md) -+ continue; ++/** ++ * kbase_kinstr_jm_release() - Invoked when the reference count is dropped ++ * @ref: the context reference count ++ */ ++static void kbase_kinstr_jm_release(struct kref *const ref) ++{ ++ struct kbase_kinstr_jm *const ctx = ++ container_of(ref, struct kbase_kinstr_jm, refcount); + -+ spin_lock(&page_md->migrate_lock); -+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)status); -+ spin_unlock(&page_md->migrate_lock); -+ } ++ kfree(ctx); +} + -+bool kbase_check_alloc_flags(unsigned long flags) ++/** ++ * kbase_kinstr_jm_ref_get() - Reference counts the instrumentation context ++ * @ctx: the context to reference count ++ * Return: the reference counted context ++ */ ++static struct kbase_kinstr_jm * ++kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx) +{ -+ /* Only known input flags should be set. */ -+ if (flags & ~BASE_MEM_FLAGS_INPUT_MASK) -+ return false; -+ -+ /* At least one flag should be set */ -+ if (flags == 0) -+ return false; -+ -+ /* Either the GPU or CPU must be reading from the allocated memory */ -+ if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0) -+ return false; ++ if (likely(ctx)) ++ kref_get(&ctx->refcount); ++ return ctx; ++} + -+ /* Either the GPU or CPU must be writing to the allocated memory */ -+ if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0) -+ return false; ++/** ++ * kbase_kinstr_jm_ref_put() - Dereferences the instrumentation context ++ * @ctx: the context to lower the reference count on ++ */ ++static void kbase_kinstr_jm_ref_put(struct kbase_kinstr_jm *const ctx) ++{ ++ if (likely(ctx)) ++ kref_put(&ctx->refcount, kbase_kinstr_jm_release); ++} + -+ /* GPU executable memory cannot: -+ * - Be written by the GPU -+ * - Be grown on GPU page fault -+ */ -+ if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & -+ (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF))) -+ return false; ++/** ++ * kbase_kinstr_jm_readers_add() - Adds a reader to the list of readers ++ * @ctx: the instrumentation context ++ * @reader: the reader to add ++ * ++ * Return: ++ * 0 - success ++ * -ENOMEM - too many readers already added. ++ */ ++static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx, ++ struct reader *const reader) ++{ ++ struct hlist_bl_head *const readers = &ctx->readers; ++ struct hlist_bl_node *node; ++ struct reader *temp; ++ size_t count = 0; + -+#if !MALI_USE_CSF -+ /* GPU executable memory also cannot have the top of its initial -+ * commit aligned to 'extension' -+ */ -+ if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & -+ BASE_MEM_TILER_ALIGN_TOP)) -+ return false; -+#endif /* !MALI_USE_CSF */ ++ hlist_bl_lock(readers); + -+ /* To have an allocation lie within a 4GB chunk is required only for -+ * TLS memory, which will never be used to contain executable code. -+ */ -+ if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & -+ BASE_MEM_PROT_GPU_EX)) -+ return false; ++ hlist_bl_for_each_entry_rcu(temp, node, readers, node) ++ ++count; + -+#if !MALI_USE_CSF -+ /* TLS memory should also not be used for tiler heap */ -+ if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & -+ BASE_MEM_TILER_ALIGN_TOP)) -+ return false; -+#endif /* !MALI_USE_CSF */ ++ if (kbase_kinstr_jm_readers_max < count) { ++ hlist_bl_unlock(readers); ++ return -ENOMEM; ++ } + -+ /* GPU should have at least read or write access otherwise there is no -+ * reason for allocating. -+ */ -+ if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) -+ return false; ++ hlist_bl_add_head_rcu(&reader->node, readers); + -+ /* BASE_MEM_IMPORT_SHARED is only valid for imported memory */ -+ if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED) -+ return false; ++ hlist_bl_unlock(readers); + -+ /* BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP is only valid for imported memory -+ */ -+ if ((flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) == -+ BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) -+ return false; ++ static_branch_inc(&basep_kinstr_jm_reader_static_key); + -+ /* Should not combine BASE_MEM_COHERENT_LOCAL with -+ * BASE_MEM_COHERENT_SYSTEM -+ */ -+ if ((flags & (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) == -+ (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) -+ return false; ++ return 0; ++} + -+#if MALI_USE_CSF -+ if ((flags & BASE_MEM_SAME_VA) && (flags & (BASE_MEM_FIXABLE | BASE_MEM_FIXED))) -+ return false; ++/** ++ * kbase_kinstr_jm_readers_del() - Deletes a reader from the list of readers ++ * @ctx: the instrumentation context ++ * @reader: the reader to delete ++ */ ++static void kbase_kinstr_jm_readers_del(struct kbase_kinstr_jm *const ctx, ++ struct reader *const reader) ++{ ++ struct hlist_bl_head *const readers = &ctx->readers; + -+ if ((flags & BASE_MEM_FIXABLE) && (flags & BASE_MEM_FIXED)) -+ return false; -+#endif ++ hlist_bl_lock(readers); ++ hlist_bl_del_rcu(&reader->node); ++ hlist_bl_unlock(readers); + -+ return true; ++ static_branch_dec(&basep_kinstr_jm_reader_static_key); +} + -+bool kbase_check_import_flags(unsigned long flags) ++int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, ++ union kbase_kinstr_jm_fd *jm_fd_arg) +{ -+ /* Only known input flags should be set. */ -+ if (flags & ~BASE_MEM_FLAGS_INPUT_MASK) -+ return false; ++ struct kbase_kinstr_jm_fd_in const *in; ++ struct reader *reader; ++ size_t const change_size = sizeof(struct ++ kbase_kinstr_jm_atom_state_change); ++ int status; ++ int fd; ++ int i; + -+ /* At least one flag should be set */ -+ if (flags == 0) -+ return false; ++ if (!ctx || !jm_fd_arg) ++ return -EINVAL; + -+ /* Imported memory cannot be GPU executable */ -+ if (flags & BASE_MEM_PROT_GPU_EX) -+ return false; ++ in = &jm_fd_arg->in; + -+ /* Imported memory cannot grow on page fault */ -+ if (flags & BASE_MEM_GROW_ON_GPF) -+ return false; ++ if (!is_power_of_2(in->count)) ++ return -EINVAL; + -+#if MALI_USE_CSF -+ /* Imported memory cannot be fixed */ -+ if ((flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE))) -+ return false; -+#else -+ /* Imported memory cannot be aligned to the end of its initial commit */ -+ if (flags & BASE_MEM_TILER_ALIGN_TOP) -+ return false; -+#endif /* !MALI_USE_CSF */ ++ for (i = 0; i < sizeof(in->padding); ++i) ++ if (in->padding[i]) ++ return -EINVAL; + -+ /* GPU should have at least read or write access otherwise there is no -+ * reason for importing. -+ */ -+ if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) -+ return false; ++ status = reader_init(&reader, ctx, in->count); ++ if (status < 0) ++ return status; + -+ /* Protected memory cannot be read by the CPU */ -+ if ((flags & BASE_MEM_PROTECTED) && (flags & BASE_MEM_PROT_CPU_RD)) -+ return false; ++ jm_fd_arg->out.version = KBASE_KINSTR_JM_VERSION; ++ jm_fd_arg->out.size = change_size; ++ memset(&jm_fd_arg->out.padding, 0, sizeof(jm_fd_arg->out.padding)); + -+ return true; ++ fd = anon_inode_getfd("[mali_kinstr_jm]", &file_operations, reader, ++ O_CLOEXEC); ++ if (fd < 0) ++ reader_term(reader); ++ ++ return fd; +} + -+int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, -+ u64 va_pages, u64 commit_pages, u64 large_extension) ++int kbase_kinstr_jm_init(struct kbase_kinstr_jm **const out_ctx) +{ -+ struct device *dev = kctx->kbdev->dev; -+ int gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; -+ u64 gpu_pc_pages_max = 1ULL << gpu_pc_bits >> PAGE_SHIFT; -+ struct kbase_va_region test_reg; -+ -+ /* kbase_va_region's extension member can be of variable size, so check against that type */ -+ test_reg.extension = large_extension; -+ -+#define KBASE_MSG_PRE "GPU allocation attempted with " ++ struct kbase_kinstr_jm *ctx = NULL; + -+ if (va_pages == 0) { -+ dev_warn(dev, KBASE_MSG_PRE "0 va_pages!"); ++ if (!out_ctx) + return -EINVAL; -+ } + -+ if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) { -+ dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!", -+ (unsigned long long)va_pages); ++ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); ++ if (!ctx) + return -ENOMEM; -+ } + -+ /* Note: commit_pages is checked against va_pages during -+ * kbase_alloc_phy_pages() -+ */ ++ INIT_HLIST_BL_HEAD(&ctx->readers); ++ kref_init(&ctx->refcount); + -+ /* Limit GPU executable allocs to GPU PC size */ -+ if ((flags & BASE_MEM_PROT_GPU_EX) && (va_pages > gpu_pc_pages_max)) { -+ dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_PROT_GPU_EX and va_pages==%lld larger than GPU PC range %lld", -+ (unsigned long long)va_pages, -+ (unsigned long long)gpu_pc_pages_max); ++ *out_ctx = ctx; + -+ return -EINVAL; -+ } ++ return 0; ++} + -+ if ((flags & BASE_MEM_GROW_ON_GPF) && (test_reg.extension == 0)) { -+ dev_warn(dev, KBASE_MSG_PRE -+ "BASE_MEM_GROW_ON_GPF but extension == 0\n"); -+ return -EINVAL; -+ } ++void kbase_kinstr_jm_term(struct kbase_kinstr_jm *const ctx) ++{ ++ kbase_kinstr_jm_ref_put(ctx); ++} + -+#if !MALI_USE_CSF -+ if ((flags & BASE_MEM_TILER_ALIGN_TOP) && (test_reg.extension == 0)) { -+ dev_warn(dev, KBASE_MSG_PRE -+ "BASE_MEM_TILER_ALIGN_TOP but extension == 0\n"); -+ return -EINVAL; -+ } ++void kbasep_kinstr_jm_atom_state( ++ struct kbase_jd_atom *const katom, ++ const enum kbase_kinstr_jm_reader_atom_state state) ++{ ++ struct kbase_context *const kctx = katom->kctx; ++ struct kbase_kinstr_jm *const ctx = kctx->kinstr_jm; ++ const u8 id = kbase_jd_atom_id(kctx, katom); ++ struct kbase_kinstr_jm_atom_state_change change = { ++ .timestamp = ktime_get_raw_ns(), .atom = id, .state = state ++ }; ++ struct reader *reader; ++ struct hlist_bl_node *node; + -+ if (!(flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) && -+ test_reg.extension != 0) { -+ dev_warn( -+ dev, KBASE_MSG_PRE -+ "neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extension != 0\n"); -+ return -EINVAL; -+ } -+#else -+ if (!(flags & BASE_MEM_GROW_ON_GPF) && test_reg.extension != 0) { -+ dev_warn(dev, KBASE_MSG_PRE -+ "BASE_MEM_GROW_ON_GPF not set but extension != 0\n"); -+ return -EINVAL; ++ WARN(KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT < state || 0 > state, ++ PR_ "unsupported katom (%u) state (%i)", id, state); ++ ++ switch (state) { ++ case KBASE_KINSTR_JM_READER_ATOM_STATE_START: ++ change.data.start.slot = katom->slot_nr; ++ break; ++ default: ++ break; + } -+#endif /* !MALI_USE_CSF */ + -+#if !MALI_USE_CSF -+ /* BASE_MEM_TILER_ALIGN_TOP memory has a number of restrictions */ -+ if (flags & BASE_MEM_TILER_ALIGN_TOP) { -+#define KBASE_MSG_PRE_FLAG KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP and " -+ unsigned long small_extension; ++ rcu_read_lock(); ++ hlist_bl_for_each_entry_rcu(reader, node, &ctx->readers, node) ++ reader_changes_push( ++ &reader->changes, &change, &reader->wait_queue); ++ rcu_read_unlock(); ++} + -+ if (large_extension > -+ BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES) { -+ dev_warn(dev, -+ KBASE_MSG_PRE_FLAG -+ "extension==%lld pages exceeds limit %lld", -+ (unsigned long long)large_extension, -+ BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES); -+ return -EINVAL; -+ } -+ /* For use with is_power_of_2, which takes unsigned long, so -+ * must ensure e.g. on 32-bit kernel it'll fit in that type -+ */ -+ small_extension = (unsigned long)large_extension; ++KBASE_EXPORT_TEST_API(kbasep_kinstr_jm_atom_state); + -+ if (!is_power_of_2(small_extension)) { -+ dev_warn(dev, -+ KBASE_MSG_PRE_FLAG -+ "extension==%ld not a non-zero power of 2", -+ small_extension); -+ return -EINVAL; -+ } ++void kbasep_kinstr_jm_atom_hw_submit(struct kbase_jd_atom *const katom) ++{ ++ struct kbase_context *const kctx = katom->kctx; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ const int slot = katom->slot_nr; ++ struct kbase_jd_atom *const submitted = kbase_gpu_inspect(kbdev, slot, 0); + -+ if (commit_pages > large_extension) { -+ dev_warn(dev, -+ KBASE_MSG_PRE_FLAG -+ "commit_pages==%ld exceeds extension==%ld", -+ (unsigned long)commit_pages, -+ (unsigned long)large_extension); -+ return -EINVAL; -+ } -+#undef KBASE_MSG_PRE_FLAG -+ } -+#endif /* !MALI_USE_CSF */ ++ BUILD_BUG_ON(SLOT_RB_SIZE != 2); + -+ if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && -+ (va_pages > (BASE_MEM_PFN_MASK_4GB + 1))) { -+ dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GPU_VA_SAME_4GB_PAGE and va_pages==%lld greater than that needed for 4GB space", -+ (unsigned long long)va_pages); -+ return -EINVAL; -+ } ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ return 0; -+#undef KBASE_MSG_PRE ++ if (WARN_ON(slot < 0 || slot >= GPU_MAX_JOB_SLOTS)) ++ return; ++ if (WARN_ON(!submitted)) ++ return; ++ ++ if (submitted == katom) ++ kbase_kinstr_jm_atom_state_start(katom); +} + -+void kbase_gpu_vm_lock(struct kbase_context *kctx) ++void kbasep_kinstr_jm_atom_hw_release(struct kbase_jd_atom *const katom) +{ -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ mutex_lock(&kctx->reg_lock); -+} ++ struct kbase_context *const kctx = katom->kctx; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ const int slot = katom->slot_nr; ++ struct kbase_jd_atom *const submitted = kbase_gpu_inspect(kbdev, slot, 0); ++ struct kbase_jd_atom *const queued = kbase_gpu_inspect(kbdev, slot, 1); + -+KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock); ++ BUILD_BUG_ON(SLOT_RB_SIZE != 2); + -+void kbase_gpu_vm_unlock(struct kbase_context *kctx) -+{ -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ mutex_unlock(&kctx->reg_lock); ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ if (WARN_ON(slot < 0 || slot >= GPU_MAX_JOB_SLOTS)) ++ return; ++ if (WARN_ON(!submitted)) ++ return; ++ if (WARN_ON((submitted != katom) && (queued != katom))) ++ return; ++ ++ if (queued == katom) ++ return; ++ ++ if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) ++ kbase_kinstr_jm_atom_state_stop(katom); ++ if (queued && queued->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) ++ kbase_kinstr_jm_atom_state_start(queued); +} +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.h +new file mode 100644 +index 000000000..9451d4cd9 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.h +@@ -0,0 +1,273 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock); ++/* ++ * mali_kbase_kinstr_jm.h ++ * Kernel driver public interface to job manager atom tracing. This API provides ++ * a method to get the atom state changes into user space. ++ * ++ * The flow of operation is: ++ * ++ * | kernel | user | ++ * | ----------------------------------- | ----------------------------------- | ++ * | Initialize API with | | ++ * | kbase_kinstr_jm_init() | | ++ * | | | ++ * | Kernel code injects states with | | ++ * | kbase_kinstr_jm_atom_state_*() APIs | | ++ * | | Call ioctl() to get file descriptor | ++ * | | via KBASE_IOCTL_KINSTR_JM_FD | ++ * | Allocates a reader attached to FD | | ++ * | Allocates circular buffer and | | ++ * | patches, via ASM goto, the | | ++ * | kbase_kinstr_jm_atom_state_*() | | ++ * | | loop: | ++ * | | Call poll() on FD for POLLIN | ++ * | When threshold of changes is hit, | | ++ * | the poll is interrupted with | | ++ * | POLLIN. If circular buffer is | | ++ * | full then store the missed count | | ++ * | and interrupt poll | Call read() to get data from | ++ * | | circular buffer via the fd | ++ * | Kernel advances tail of circular | | ++ * | buffer | | ++ * | | Close file descriptor | ++ * | Deallocates circular buffer | | ++ * | | | ++ * | Terminate API with | | ++ * | kbase_kinstr_jm_term() | | ++ * ++ * All tracepoints are guarded on a static key. The static key is activated when ++ * a user space reader gets created. This means that there is negligible cost ++ * inserting the tracepoints into code when there are no readers. ++ */ + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+struct kbase_jit_debugfs_data { -+ int (*func)(struct kbase_jit_debugfs_data *data); -+ struct mutex lock; -+ struct kbase_context *kctx; -+ u64 active_value; -+ u64 pool_value; -+ u64 destroy_value; -+ char buffer[50]; -+}; ++#ifndef _KBASE_KINSTR_JM_H_ ++#define _KBASE_KINSTR_JM_H_ + -+static int kbase_jit_debugfs_common_open(struct inode *inode, -+ struct file *file, int (*func)(struct kbase_jit_debugfs_data *)) -+{ -+ struct kbase_jit_debugfs_data *data; ++#include + -+ data = kzalloc(sizeof(*data), GFP_KERNEL); -+ if (!data) -+ return -ENOMEM; ++#ifdef __KERNEL__ ++#include ++#include ++#else ++/* empty wrapper macros for userspace */ ++#define static_branch_unlikely(key) (1) ++#endif /* __KERNEL__ */ + -+ data->func = func; -+ mutex_init(&data->lock); -+ data->kctx = (struct kbase_context *) inode->i_private; ++/* Forward declarations */ ++struct kbase_context; ++struct kbase_kinstr_jm; ++struct kbase_jd_atom; ++union kbase_kinstr_jm_fd; + -+ file->private_data = data; ++/** ++ * kbase_kinstr_jm_init() - Initialise an instrumentation job manager context. ++ * @ctx: Non-NULL pointer to where the pointer to the created context will ++ * be stored on success. ++ * ++ * Return: 0 on success, else error code. ++ */ ++int kbase_kinstr_jm_init(struct kbase_kinstr_jm **ctx); + -+ return nonseekable_open(inode, file); ++/** ++ * kbase_kinstr_jm_term() - Terminate an instrumentation job manager context. ++ * @ctx: Pointer to context to be terminated. ++ */ ++void kbase_kinstr_jm_term(struct kbase_kinstr_jm *ctx); ++ ++/** ++ * kbase_kinstr_jm_get_fd() - Retrieves a file descriptor that can be used to ++ * read the atom state changes from userspace ++ * ++ * @ctx: Pointer to the initialized context ++ * @jm_fd_arg: Pointer to the union containing the in/out params ++ * Return: -1 on failure, valid file descriptor on success ++ */ ++int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, ++ union kbase_kinstr_jm_fd *jm_fd_arg); ++ ++/** ++ * kbasep_kinstr_jm_atom_state() - Signifies that an atom has changed state ++ * @atom: The atom that has changed state ++ * @state: The new state of the atom ++ * ++ * This performs the actual storage of the state ready for user space to ++ * read the data. It is only called when the static key is enabled from ++ * kbase_kinstr_jm_atom_state(). There is almost never a need to invoke this ++ * function directly. ++ */ ++void kbasep_kinstr_jm_atom_state( ++ struct kbase_jd_atom *const atom, ++ const enum kbase_kinstr_jm_reader_atom_state state); ++ ++/* Allows ASM goto patching to reduce tracing overhead. This is ++ * incremented/decremented when readers are created and terminated. This really ++ * shouldn't be changed externally, but if you do, make sure you use ++ * a static_key_inc()/static_key_dec() pair. ++ */ ++extern struct static_key_false basep_kinstr_jm_reader_static_key; ++ ++/** ++ * kbase_kinstr_jm_atom_state() - Signifies that an atom has changed state ++ * @atom: The atom that has changed state ++ * @state: The new state of the atom ++ * ++ * This uses a static key to reduce overhead when tracing is disabled ++ */ ++static inline void kbase_kinstr_jm_atom_state( ++ struct kbase_jd_atom *const atom, ++ const enum kbase_kinstr_jm_reader_atom_state state) ++{ ++ if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) ++ kbasep_kinstr_jm_atom_state(atom, state); +} + -+static ssize_t kbase_jit_debugfs_common_read(struct file *file, -+ char __user *buf, size_t len, loff_t *ppos) ++/** ++ * kbase_kinstr_jm_atom_state_queue() - Signifies that an atom has entered a ++ * hardware or software queue. ++ * @atom: The atom that has changed state ++ */ ++static inline void kbase_kinstr_jm_atom_state_queue( ++ struct kbase_jd_atom *const atom) +{ -+ struct kbase_jit_debugfs_data *data; -+ size_t size; -+ int ret; ++ kbase_kinstr_jm_atom_state( ++ atom, KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE); ++} + -+ data = (struct kbase_jit_debugfs_data *) file->private_data; -+ mutex_lock(&data->lock); ++/** ++ * kbase_kinstr_jm_atom_state_start() - Signifies that work has started on an ++ * atom ++ * @atom: The atom that has changed state ++ */ ++static inline void kbase_kinstr_jm_atom_state_start( ++ struct kbase_jd_atom *const atom) ++{ ++ kbase_kinstr_jm_atom_state( ++ atom, KBASE_KINSTR_JM_READER_ATOM_STATE_START); ++} + -+ if (*ppos) { -+ size = strnlen(data->buffer, sizeof(data->buffer)); -+ } else { -+ if (!data->func) { -+ ret = -EACCES; -+ goto out_unlock; -+ } ++/** ++ * kbase_kinstr_jm_atom_state_stop() - Signifies that work has stopped on an ++ * atom ++ * @atom: The atom that has changed state ++ */ ++static inline void kbase_kinstr_jm_atom_state_stop( ++ struct kbase_jd_atom *const atom) ++{ ++ kbase_kinstr_jm_atom_state( ++ atom, KBASE_KINSTR_JM_READER_ATOM_STATE_STOP); ++} + -+ if (data->func(data)) { -+ ret = -EACCES; -+ goto out_unlock; -+ } ++/** ++ * kbase_kinstr_jm_atom_state_complete() - Signifies that all work has completed ++ * on an atom ++ * @atom: The atom that has changed state ++ */ ++static inline void kbase_kinstr_jm_atom_state_complete( ++ struct kbase_jd_atom *const atom) ++{ ++ kbase_kinstr_jm_atom_state( ++ atom, KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE); ++} + -+ size = scnprintf(data->buffer, sizeof(data->buffer), -+ "%llu,%llu,%llu\n", data->active_value, -+ data->pool_value, data->destroy_value); -+ } ++/** ++ * kbase_kinstr_jm_atom_queue() - A software *or* hardware atom is queued for ++ * execution ++ * @atom: The atom that has changed state ++ */ ++static inline void kbase_kinstr_jm_atom_queue(struct kbase_jd_atom *const atom) ++{ ++ kbase_kinstr_jm_atom_state_queue(atom); ++} + -+ ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size); ++/** ++ * kbase_kinstr_jm_atom_complete() - A software *or* hardware atom is fully ++ * completed ++ * @atom: The atom that has changed state ++ */ ++static inline void kbase_kinstr_jm_atom_complete( ++ struct kbase_jd_atom *const atom) ++{ ++ kbase_kinstr_jm_atom_state_complete(atom); ++} + -+out_unlock: -+ mutex_unlock(&data->lock); -+ return ret; ++/** ++ * kbase_kinstr_jm_atom_sw_start() - A software atom has started work ++ * @atom: The atom that has changed state ++ */ ++static inline void kbase_kinstr_jm_atom_sw_start( ++ struct kbase_jd_atom *const atom) ++{ ++ kbase_kinstr_jm_atom_state_start(atom); +} + -+static int kbase_jit_debugfs_common_release(struct inode *inode, -+ struct file *file) ++/** ++ * kbase_kinstr_jm_atom_sw_stop() - A software atom has stopped work ++ * @atom: The atom that has changed state ++ */ ++static inline void kbase_kinstr_jm_atom_sw_stop( ++ struct kbase_jd_atom *const atom) +{ -+ kfree(file->private_data); -+ return 0; ++ kbase_kinstr_jm_atom_state_stop(atom); +} + -+#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \ -+static int __fops ## _open(struct inode *inode, struct file *file) \ -+{ \ -+ return kbase_jit_debugfs_common_open(inode, file, __func); \ -+} \ -+static const struct file_operations __fops = { \ -+ .owner = THIS_MODULE, \ -+ .open = __fops ## _open, \ -+ .release = kbase_jit_debugfs_common_release, \ -+ .read = kbase_jit_debugfs_common_read, \ -+ .write = NULL, \ -+ .llseek = generic_file_llseek, \ ++/** ++ * kbasep_kinstr_jm_atom_hw_submit() - A hardware atom has been submitted ++ * @atom: The atom that has been submitted ++ * ++ * This private implementation should not be called directly, it is protected ++ * by a static key in kbase_kinstr_jm_atom_hw_submit(). Use that instead. ++ */ ++void kbasep_kinstr_jm_atom_hw_submit(struct kbase_jd_atom *const atom); ++ ++/** ++ * kbase_kinstr_jm_atom_hw_submit() - A hardware atom has been submitted ++ * @atom: The atom that has been submitted ++ */ ++static inline void kbase_kinstr_jm_atom_hw_submit( ++ struct kbase_jd_atom *const atom) ++{ ++ if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) ++ kbasep_kinstr_jm_atom_hw_submit(atom); +} + -+static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data) ++/** ++ * kbasep_kinstr_jm_atom_hw_release() - A hardware atom has been released ++ * @atom: The atom that has been released ++ * ++ * This private implementation should not be called directly, it is protected ++ * by a static key in kbase_kinstr_jm_atom_hw_release(). Use that instead. ++ */ ++void kbasep_kinstr_jm_atom_hw_release(struct kbase_jd_atom *const atom); ++ ++/** ++ * kbase_kinstr_jm_atom_hw_release() - A hardware atom has been released ++ * @atom: The atom that has been released ++ */ ++static inline void kbase_kinstr_jm_atom_hw_release( ++ struct kbase_jd_atom *const atom) +{ -+ struct kbase_context *kctx = data->kctx; -+ struct list_head *tmp; ++ if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) ++ kbasep_kinstr_jm_atom_hw_release(atom); ++} + -+ mutex_lock(&kctx->jit_evict_lock); -+ list_for_each(tmp, &kctx->jit_active_head) { -+ data->active_value++; -+ } ++#endif /* _KBASE_KINSTR_JM_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c +new file mode 100644 +index 000000000..823f9156e +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c +@@ -0,0 +1,2021 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ list_for_each(tmp, &kctx->jit_pool_head) { -+ data->pool_value++; -+ } ++#include "mali_kbase.h" ++#include "mali_kbase_kinstr_prfcnt.h" ++#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" ++#include "hwcnt/mali_kbase_hwcnt_gpu.h" ++#include ++#include "mali_malisw.h" ++#include "mali_kbase_debug.h" + -+ list_for_each(tmp, &kctx->jit_destroy_head) { -+ data->destroy_value++; -+ } -+ mutex_unlock(&kctx->jit_evict_lock); ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ return 0; -+} -+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops, -+ kbase_jit_debugfs_count_get); ++/* Explicitly include epoll header for old kernels. Not required from 4.16. */ ++#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE ++#include ++#endif + -+static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data) ++/* The minimum allowed interval between dumps, in nanoseconds ++ * (equivalent to 10KHz) ++ */ ++#define DUMP_INTERVAL_MIN_NS (100 * NSEC_PER_USEC) ++ ++/* The maximum allowed buffers per client */ ++#define MAX_BUFFER_COUNT 32 ++ ++/** ++ * struct kbase_kinstr_prfcnt_context - IOCTL interface for userspace hardware ++ * counters. ++ * @hvirt: Hardware counter virtualizer used by kinstr_prfcnt. ++ * @info_item_count: Number of metadata elements. ++ * @metadata: Hardware counter metadata provided by virtualizer. ++ * @lock: Lock protecting kinstr_prfcnt state. ++ * @suspend_count: Suspend reference count. If non-zero, timer and worker ++ * are prevented from being re-scheduled. ++ * @client_count: Number of kinstr_prfcnt clients. ++ * @clients: List of kinstr_prfcnt clients. ++ * @dump_timer: Timer that enqueues dump_work to a workqueue. ++ * @dump_work: Worker for performing periodic counter dumps. ++ */ ++struct kbase_kinstr_prfcnt_context { ++ struct kbase_hwcnt_virtualizer *hvirt; ++ u32 info_item_count; ++ const struct kbase_hwcnt_metadata *metadata; ++ struct mutex lock; ++ size_t suspend_count; ++ size_t client_count; ++ struct list_head clients; ++ struct hrtimer dump_timer; ++ struct work_struct dump_work; ++}; ++ ++/** ++ * struct kbase_kinstr_prfcnt_sample - Buffer and descriptor for sample data. ++ * @sample_meta: Pointer to sample metadata. ++ * @dump_buf: Dump buffer containing sample data. ++ */ ++struct kbase_kinstr_prfcnt_sample { ++ struct prfcnt_metadata *sample_meta; ++ struct kbase_hwcnt_dump_buffer dump_buf; ++}; ++ ++/** ++ * struct kbase_kinstr_prfcnt_sample_array - Array of sample data. ++ * @user_buf: Address of allocated userspace buffer. A single allocation is used ++ * for all Dump Buffers in the array. ++ * @sample_count: Number of allocated samples. ++ * @samples: Non-NULL pointer to the array of Dump Buffers. ++ */ ++struct kbase_kinstr_prfcnt_sample_array { ++ u8 *user_buf; ++ size_t sample_count; ++ struct kbase_kinstr_prfcnt_sample *samples; ++}; ++ ++/** ++ * struct kbase_kinstr_prfcnt_client_config - Client session configuration. ++ * @prfcnt_mode: Sampling mode: either manual or periodic. ++ * @counter_set: Set of performance counter blocks. ++ * @scope: Scope of performance counters to capture. ++ * @buffer_count: Number of buffers used to store samples. ++ * @period_ns: Sampling period, in nanoseconds, or 0 if manual mode. ++ * @phys_em: Enable map used by the GPU. ++ */ ++struct kbase_kinstr_prfcnt_client_config { ++ u8 prfcnt_mode; ++ u8 counter_set; ++ u8 scope; ++ u16 buffer_count; ++ u64 period_ns; ++ struct kbase_hwcnt_physical_enable_map phys_em; ++}; ++ ++/** ++ * enum kbase_kinstr_prfcnt_client_init_state - A list of ++ * initialisation states that the ++ * kinstr_prfcnt client can be at ++ * during initialisation. Useful ++ * for terminating a partially ++ * initialised client. ++ * ++ * @KINSTR_PRFCNT_UNINITIALISED : Client is uninitialised ++ * @KINSTR_PRFCNT_PARSE_SETUP : Parse the setup session ++ * @KINSTR_PRFCNT_ENABLE_MAP : Allocate memory for enable map ++ * @KINSTR_PRFCNT_DUMP_BUFFER : Allocate memory for dump buffer ++ * @KINSTR_PRFCNT_SAMPLE_ARRAY : Allocate memory for and initialise sample array ++ * @KINSTR_PRFCNT_VIRTUALIZER_CLIENT : Create virtualizer client ++ * @KINSTR_PRFCNT_WAITQ_MUTEX : Create and initialise mutex and waitqueue ++ * @KINSTR_PRFCNT_INITIALISED : Client is fully initialised ++ */ ++enum kbase_kinstr_prfcnt_client_init_state { ++ KINSTR_PRFCNT_UNINITIALISED, ++ KINSTR_PRFCNT_PARSE_SETUP = KINSTR_PRFCNT_UNINITIALISED, ++ KINSTR_PRFCNT_ENABLE_MAP, ++ KINSTR_PRFCNT_DUMP_BUFFER, ++ KINSTR_PRFCNT_SAMPLE_ARRAY, ++ KINSTR_PRFCNT_VIRTUALIZER_CLIENT, ++ KINSTR_PRFCNT_WAITQ_MUTEX, ++ KINSTR_PRFCNT_INITIALISED ++}; ++ ++/** ++ * struct kbase_kinstr_prfcnt_client - A kinstr_prfcnt client attached ++ * to a kinstr_prfcnt context. ++ * @kinstr_ctx: kinstr_prfcnt context client is attached to. ++ * @hvcli: Hardware counter virtualizer client. ++ * @node: Node used to attach this client to list in ++ * kinstr_prfcnt context. ++ * @cmd_sync_lock: Lock coordinating the reader interface for commands. ++ * @next_dump_time_ns: Time in ns when this client's next periodic dump must ++ * occur. If 0, not a periodic client. ++ * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic ++ * client. ++ * @sample_flags: Flags for the current active dumping sample, marking ++ * the conditions/events during the dump duration. ++ * @active: True if the client has been started. ++ * @config: Configuration of the client session. ++ * @enable_map: Counters enable map. ++ * @tmp_buf: Temporary buffer to use before handing over dump to ++ * client. ++ * @sample_arr: Array of dump buffers allocated by this client. ++ * @read_idx: Index of buffer read by userspace. ++ * @write_idx: Index of buffer being written by dump worker. ++ * @fetch_idx: Index of buffer being fetched by userspace, but ++ * pending a confirmation of being read (consumed) if it ++ * differs from the read_idx. ++ * @waitq: Client's notification queue. ++ * @sample_size: Size of the data required for one sample, in bytes. ++ * @sample_count: Number of samples the client is able to capture. ++ * @user_data: User data associated with the session. ++ * This is set when the session is started and stopped. ++ * This value is ignored for control commands that ++ * provide another value. ++ */ ++struct kbase_kinstr_prfcnt_client { ++ struct kbase_kinstr_prfcnt_context *kinstr_ctx; ++ struct kbase_hwcnt_virtualizer_client *hvcli; ++ struct list_head node; ++ struct mutex cmd_sync_lock; ++ u64 next_dump_time_ns; ++ u32 dump_interval_ns; ++ u32 sample_flags; ++ bool active; ++ struct kbase_kinstr_prfcnt_client_config config; ++ struct kbase_hwcnt_enable_map enable_map; ++ struct kbase_hwcnt_dump_buffer tmp_buf; ++ struct kbase_kinstr_prfcnt_sample_array sample_arr; ++ atomic_t read_idx; ++ atomic_t write_idx; ++ atomic_t fetch_idx; ++ wait_queue_head_t waitq; ++ size_t sample_size; ++ size_t sample_count; ++ u64 user_data; ++}; ++ ++static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = { ++ { ++ /* Request description for MODE request */ ++ .hdr = { ++ .item_type = PRFCNT_ENUM_TYPE_REQUEST, ++ .item_version = PRFCNT_READER_API_VERSION, ++ }, ++ .u.request = { ++ .request_item_type = PRFCNT_REQUEST_MODE, ++ .versions_mask = 0x1, ++ }, ++ }, ++ { ++ /* Request description for ENABLE request */ ++ .hdr = { ++ .item_type = PRFCNT_ENUM_TYPE_REQUEST, ++ .item_version = PRFCNT_READER_API_VERSION, ++ }, ++ .u.request = { ++ .request_item_type = PRFCNT_REQUEST_ENABLE, ++ .versions_mask = 0x1, ++ }, ++ }, ++}; ++ ++/** ++ * kbasep_kinstr_prfcnt_hwcnt_reader_poll() - hwcnt reader's poll. ++ * @filp: Non-NULL pointer to file structure. ++ * @wait: Non-NULL pointer to poll table. ++ * ++ * Return: EPOLLIN | EPOLLRDNORM if data can be read without blocking, 0 if ++ * data can not be read without blocking, else EPOLLHUP | EPOLLERR. ++ */ ++static __poll_t ++kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp, ++ struct poll_table_struct *wait) +{ -+ struct kbase_context *kctx = data->kctx; -+ struct kbase_va_region *reg; ++ struct kbase_kinstr_prfcnt_client *cli; + -+ mutex_lock(&kctx->jit_evict_lock); -+ list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { -+ data->active_value += reg->nr_pages; -+ } ++ if (!filp || !wait) ++ return EPOLLHUP | EPOLLERR; + -+ list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { -+ data->pool_value += reg->nr_pages; -+ } ++ cli = filp->private_data; + -+ list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { -+ data->destroy_value += reg->nr_pages; -+ } -+ mutex_unlock(&kctx->jit_evict_lock); ++ if (!cli) ++ return EPOLLHUP | EPOLLERR; + -+ return 0; ++ poll_wait(filp, &cli->waitq, wait); ++ ++ if (atomic_read(&cli->write_idx) != atomic_read(&cli->fetch_idx)) ++ return EPOLLIN | EPOLLRDNORM; ++ ++ return (__poll_t)0; +} -+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops, -+ kbase_jit_debugfs_vm_get); + -+static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) ++/** ++ * kbasep_kinstr_prfcnt_next_dump_time_ns() - Calculate the next periodic ++ * dump time. ++ * @cur_ts_ns: Current time in nanoseconds. ++ * @interval: Interval between dumps in nanoseconds. ++ * ++ * Return: 0 if interval is 0 (i.e. a non-periodic client), or the next dump ++ * time that occurs after cur_ts_ns. ++ */ ++static u64 kbasep_kinstr_prfcnt_next_dump_time_ns(u64 cur_ts_ns, u32 interval) +{ -+ struct kbase_context *kctx = data->kctx; -+ struct kbase_va_region *reg; ++ /* Non-periodic client */ ++ if (interval == 0) ++ return 0; + -+ mutex_lock(&kctx->jit_evict_lock); -+ list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { -+ data->active_value += reg->gpu_alloc->nents; -+ } ++ /* ++ * Return the next interval after the current time relative to t=0. ++ * This means multiple clients with the same period will synchronize, ++ * regardless of when they were started, allowing the worker to be ++ * scheduled less frequently. ++ */ ++ do_div(cur_ts_ns, interval); + -+ list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { -+ data->pool_value += reg->gpu_alloc->nents; -+ } ++ return (cur_ts_ns + 1) * interval; ++} + -+ list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { -+ data->destroy_value += reg->gpu_alloc->nents; ++/** ++ * kbasep_kinstr_prfcnt_timestamp_ns() - Get the current time in nanoseconds. ++ * ++ * Return: Current time in nanoseconds. ++ */ ++static u64 kbasep_kinstr_prfcnt_timestamp_ns(void) ++{ ++ return ktime_get_raw_ns(); ++} ++ ++/** ++ * kbasep_kinstr_prfcnt_reschedule_worker() - Update next dump times for all ++ * periodic kinstr_prfcnt clients, ++ * then reschedule the dump worker ++ * appropriately. ++ * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context. ++ * ++ * If there are no periodic clients, then the dump worker will not be ++ * rescheduled. Else, the dump worker will be rescheduled for the next ++ * periodic client dump. ++ */ ++static void kbasep_kinstr_prfcnt_reschedule_worker( ++ struct kbase_kinstr_prfcnt_context *kinstr_ctx) ++{ ++ u64 cur_ts_ns; ++ u64 shortest_period_ns = U64_MAX; ++ struct kbase_kinstr_prfcnt_client *pos; ++ ++ WARN_ON(!kinstr_ctx); ++ lockdep_assert_held(&kinstr_ctx->lock); ++ cur_ts_ns = kbasep_kinstr_prfcnt_timestamp_ns(); ++ ++ /* ++ * This loop fulfills 2 separate tasks that don't affect each other: ++ * ++ * 1) Determine the shortest period. ++ * 2) Update the next dump time of clients that have already been ++ * dumped. It's important not to alter the next dump time of clients ++ * that haven't been dumped yet. ++ * ++ * For the sake of efficiency, the rescheduling decision ignores the time ++ * of the next dump and just uses the shortest period among all periodic ++ * clients. It is more efficient to serve multiple dump requests at once, ++ * rather than trying to reschedule the worker to serve each request ++ * individually. ++ */ ++ list_for_each_entry(pos, &kinstr_ctx->clients, node) { ++ /* Ignore clients that are not periodic or not active. */ ++ if (pos->active && pos->dump_interval_ns > 0) { ++ shortest_period_ns = ++ MIN(shortest_period_ns, pos->dump_interval_ns); ++ ++ /* Next dump should happen exactly one period after the last dump. ++ * If last dump was overdue and scheduled to happen more than one ++ * period ago, compensate for that by scheduling next dump in the ++ * immediate future. ++ */ ++ if (pos->next_dump_time_ns < cur_ts_ns) ++ pos->next_dump_time_ns = ++ MAX(cur_ts_ns + 1, ++ pos->next_dump_time_ns + ++ pos->dump_interval_ns); ++ } + } -+ mutex_unlock(&kctx->jit_evict_lock); + -+ return 0; ++ /* Cancel the timer if it is already pending */ ++ hrtimer_cancel(&kinstr_ctx->dump_timer); ++ ++ /* Start the timer if there are periodic clients and kinstr_prfcnt is not ++ * suspended. ++ */ ++ if ((shortest_period_ns != U64_MAX) && ++ (kinstr_ctx->suspend_count == 0)) { ++ u64 next_schedule_time_ns = ++ kbasep_kinstr_prfcnt_next_dump_time_ns( ++ cur_ts_ns, shortest_period_ns); ++ hrtimer_start(&kinstr_ctx->dump_timer, ++ ns_to_ktime(next_schedule_time_ns - cur_ts_ns), ++ HRTIMER_MODE_REL); ++ } +} -+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, -+ kbase_jit_debugfs_phys_get); + -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+static int kbase_jit_debugfs_used_get(struct kbase_jit_debugfs_data *data) ++static enum prfcnt_block_type ++kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(u64 type) +{ -+ struct kbase_context *kctx = data->kctx; -+ struct kbase_va_region *reg; ++ enum prfcnt_block_type block_type; + -+#if !MALI_USE_CSF -+ mutex_lock(&kctx->jctx.lock); -+#endif /* !MALI_USE_CSF */ -+ mutex_lock(&kctx->jit_evict_lock); -+ list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { -+ data->active_value += reg->used_pages; ++ switch (type) { ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: ++ block_type = PRFCNT_BLOCK_TYPE_FE; ++ break; ++ ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: ++ block_type = PRFCNT_BLOCK_TYPE_TILER; ++ break; ++ ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: ++ block_type = PRFCNT_BLOCK_TYPE_SHADER_CORE; ++ break; ++ ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: ++ block_type = PRFCNT_BLOCK_TYPE_MEMORY; ++ break; ++ ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: ++ default: ++ block_type = PRFCNT_BLOCK_TYPE_RESERVED; ++ break; + } -+ mutex_unlock(&kctx->jit_evict_lock); -+#if !MALI_USE_CSF -+ mutex_unlock(&kctx->jctx.lock); -+#endif /* !MALI_USE_CSF */ + -+ return 0; ++ return block_type; +} + -+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_used_fops, -+ kbase_jit_debugfs_used_get); ++static bool kbase_kinstr_is_block_type_reserved(const struct kbase_hwcnt_metadata *metadata, ++ size_t grp, size_t blk) ++{ ++ enum prfcnt_block_type block_type = kbase_hwcnt_metadata_block_type_to_prfcnt_block_type( ++ kbase_hwcnt_metadata_block_type(metadata, grp, blk)); + -+static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, -+ struct kbase_va_region *reg, size_t pages_needed, -+ size_t *freed, bool shrink); ++ return block_type == PRFCNT_BLOCK_TYPE_RESERVED; ++} + -+static int kbase_jit_debugfs_trim_get(struct kbase_jit_debugfs_data *data) ++/** ++ * kbasep_kinstr_prfcnt_set_block_meta_items() - Populate a sample's block meta ++ * item array. ++ * @enable_map: Non-NULL pointer to the map of enabled counters. ++ * @dst: Non-NULL pointer to the sample's dump buffer object. ++ * @block_meta_base: Non-NULL double pointer to the start of the block meta ++ * data items. ++ * @base_addr: Address of allocated pages for array of samples. Used ++ * to calculate offset of block values. ++ * @counter_set: The SET which blocks represent. ++ * ++ * Return: 0 on success, else error code. ++ */ ++int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *enable_map, ++ struct kbase_hwcnt_dump_buffer *dst, ++ struct prfcnt_metadata **block_meta_base, ++ u8 *base_addr, u8 counter_set) +{ -+ struct kbase_context *kctx = data->kctx; -+ struct kbase_va_region *reg; ++ size_t grp, blk, blk_inst; ++ struct prfcnt_metadata **ptr_md = block_meta_base; ++ const struct kbase_hwcnt_metadata *metadata; ++ uint8_t block_idx = 0; + -+#if !MALI_USE_CSF -+ mutex_lock(&kctx->jctx.lock); -+#endif /* !MALI_USE_CSF */ -+ kbase_gpu_vm_lock(kctx); -+ mutex_lock(&kctx->jit_evict_lock); -+ list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { -+ int err; -+ size_t freed = 0u; ++ if (!dst || !*block_meta_base) ++ return -EINVAL; + -+ err = kbase_mem_jit_trim_pages_from_region(kctx, reg, -+ SIZE_MAX, &freed, false); ++ metadata = dst->metadata; ++ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { ++ u8 *dst_blk; + -+ if (err) { -+ /* Failed to calculate, try the next region */ ++ /* Block indices must be reported with no gaps. */ ++ if (blk_inst == 0) ++ block_idx = 0; ++ ++ /* Skip unavailable or non-enabled blocks */ ++ if (kbase_kinstr_is_block_type_reserved(metadata, grp, blk) || ++ !kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst) || ++ !kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst)) + continue; -+ } + -+ data->active_value += freed; ++ dst_blk = (u8 *)kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); ++ (*ptr_md)->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_BLOCK; ++ (*ptr_md)->hdr.item_version = PRFCNT_READER_API_VERSION; ++ (*ptr_md)->u.block_md.block_type = ++ kbase_hwcnt_metadata_block_type_to_prfcnt_block_type( ++ kbase_hwcnt_metadata_block_type(metadata, grp, ++ blk)); ++ (*ptr_md)->u.block_md.block_idx = block_idx; ++ (*ptr_md)->u.block_md.set = counter_set; ++ (*ptr_md)->u.block_md.block_state = BLOCK_STATE_UNKNOWN; ++ (*ptr_md)->u.block_md.values_offset = (u32)(dst_blk - base_addr); ++ ++ /* update the buf meta data block pointer to next item */ ++ (*ptr_md)++; ++ block_idx++; + } -+ mutex_unlock(&kctx->jit_evict_lock); -+ kbase_gpu_vm_unlock(kctx); -+#if !MALI_USE_CSF -+ mutex_unlock(&kctx->jctx.lock); -+#endif /* !MALI_USE_CSF */ + + return 0; +} + -+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_trim_fops, -+ kbase_jit_debugfs_trim_get); -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -+ -+void kbase_jit_debugfs_init(struct kbase_context *kctx) ++/** ++ * kbasep_kinstr_prfcnt_set_sample_metadata() - Set sample metadata for sample ++ * output. ++ * @cli: Non-NULL pointer to a kinstr_prfcnt client. ++ * @dump_buf: Non-NULL pointer to dump buffer where sample is stored. ++ * @ptr_md: Non-NULL pointer to sample metadata. ++ */ ++static void kbasep_kinstr_prfcnt_set_sample_metadata( ++ struct kbase_kinstr_prfcnt_client *cli, ++ struct kbase_hwcnt_dump_buffer *dump_buf, ++ struct prfcnt_metadata *ptr_md) +{ -+ /* prevent unprivileged use of debug file system -+ * in old kernel version -+ */ -+ const mode_t mode = 0444; ++ u8 clk_cnt, i; + -+ /* Caller already ensures this, but we keep the pattern for -+ * maintenance safety. -+ */ -+ if (WARN_ON(!kctx) || -+ WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) ++ clk_cnt = cli->kinstr_ctx->metadata->clk_cnt; ++ ++ /* PRFCNT_SAMPLE_META_TYPE_SAMPLE must be the first item */ ++ ptr_md->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_SAMPLE; ++ ptr_md->hdr.item_version = PRFCNT_READER_API_VERSION; ++ ptr_md->u.sample_md.seq = atomic_read(&cli->write_idx); ++ ptr_md->u.sample_md.flags = cli->sample_flags; ++ ++ /* Place the PRFCNT_SAMPLE_META_TYPE_CLOCK optionally as the 2nd */ ++ ptr_md++; ++ if (clk_cnt > MAX_REPORTED_DOMAINS) ++ clk_cnt = MAX_REPORTED_DOMAINS; ++ ++ /* Handle the prfcnt_clock_metadata meta item */ ++ ptr_md->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_CLOCK; ++ ptr_md->hdr.item_version = PRFCNT_READER_API_VERSION; ++ ptr_md->u.clock_md.num_domains = clk_cnt; ++ for (i = 0; i < clk_cnt; i++) ++ ptr_md->u.clock_md.cycles[i] = dump_buf->clk_cnt_buf[i]; ++ ++ /* Dealing with counter blocks */ ++ ptr_md++; ++ if (WARN_ON(kbasep_kinstr_prfcnt_set_block_meta_items(&cli->enable_map, dump_buf, &ptr_md, ++ cli->sample_arr.user_buf, ++ cli->config.counter_set))) + return; + ++ /* Handle the last sentinel item */ ++ ptr_md->hdr.item_type = FLEX_LIST_TYPE_NONE; ++ ptr_md->hdr.item_version = 0; ++} + ++/** ++ * kbasep_kinstr_prfcnt_client_output_sample() - Assemble a sample for output. ++ * @cli: Non-NULL pointer to a kinstr_prfcnt client. ++ * @buf_idx: The index to the sample array for saving the sample. ++ * @user_data: User data to return to the user. ++ * @ts_start_ns: Time stamp for the start point of the sample dump. ++ * @ts_end_ns: Time stamp for the end point of the sample dump. ++ */ ++static void kbasep_kinstr_prfcnt_client_output_sample( ++ struct kbase_kinstr_prfcnt_client *cli, unsigned int buf_idx, ++ u64 user_data, u64 ts_start_ns, u64 ts_end_ns) ++{ ++ struct kbase_hwcnt_dump_buffer *dump_buf; ++ struct kbase_hwcnt_dump_buffer *tmp_buf = &cli->tmp_buf; ++ struct prfcnt_metadata *ptr_md; + -+ /* Debugfs entry for getting the number of JIT allocations. */ -+ debugfs_create_file("mem_jit_count", mode, kctx->kctx_dentry, -+ kctx, &kbase_jit_debugfs_count_fops); ++ if (WARN_ON(buf_idx >= cli->sample_arr.sample_count)) ++ return; + -+ /* -+ * Debugfs entry for getting the total number of virtual pages -+ * used by JIT allocations. -+ */ -+ debugfs_create_file("mem_jit_vm", mode, kctx->kctx_dentry, -+ kctx, &kbase_jit_debugfs_vm_fops); ++ dump_buf = &cli->sample_arr.samples[buf_idx].dump_buf; ++ ptr_md = cli->sample_arr.samples[buf_idx].sample_meta; + -+ /* -+ * Debugfs entry for getting the number of physical pages used -+ * by JIT allocations. ++ /* Patch the dump buf headers, to hide the counters that other hwcnt ++ * clients are using. + */ -+ debugfs_create_file("mem_jit_phys", mode, kctx->kctx_dentry, -+ kctx, &kbase_jit_debugfs_phys_fops); -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ /* -+ * Debugfs entry for getting the number of pages used -+ * by JIT allocations for estimating the physical pressure -+ * limit. ++ kbase_hwcnt_gpu_patch_dump_headers(tmp_buf, &cli->enable_map); ++ ++ /* Copy the temp buffer to the userspace visible buffer. The strict ++ * variant will explicitly zero any non-enabled counters to ensure ++ * nothing except exactly what the user asked for is made visible. + */ -+ debugfs_create_file("mem_jit_used", mode, kctx->kctx_dentry, -+ kctx, &kbase_jit_debugfs_used_fops); ++ kbase_hwcnt_dump_buffer_copy_strict(dump_buf, tmp_buf, ++ &cli->enable_map); + -+ /* -+ * Debugfs entry for getting the number of pages that could -+ * be trimmed to free space for more JIT allocations. ++ /* PRFCNT_SAMPLE_META_TYPE_SAMPLE must be the first item. ++ * Set timestamp and user data for real dump. + */ -+ debugfs_create_file("mem_jit_trim", mode, kctx->kctx_dentry, -+ kctx, &kbase_jit_debugfs_trim_fops); -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ ptr_md->u.sample_md.timestamp_start = ts_start_ns; ++ ptr_md->u.sample_md.timestamp_end = ts_end_ns; ++ ptr_md->u.sample_md.user_data = user_data; ++ ++ kbasep_kinstr_prfcnt_set_sample_metadata(cli, dump_buf, ptr_md); +} -+#endif /* CONFIG_DEBUG_FS */ + +/** -+ * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations -+ * @work: Work item ++ * kbasep_kinstr_prfcnt_client_dump() - Perform a dump for a client. ++ * @cli: Non-NULL pointer to a kinstr_prfcnt client. ++ * @event_id: Event type that triggered the dump. ++ * @user_data: User data to return to the user. + * -+ * This function does the work of freeing JIT allocations whose physical -+ * backing has been released. ++ * Return: 0 on success, else error code. + */ -+static void kbase_jit_destroy_worker(struct work_struct *work) ++static int kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli, ++ enum base_hwcnt_reader_event event_id, u64 user_data) +{ -+ struct kbase_context *kctx; -+ struct kbase_va_region *reg; -+ -+ kctx = container_of(work, struct kbase_context, jit_work); -+ do { -+ mutex_lock(&kctx->jit_evict_lock); -+ if (list_empty(&kctx->jit_destroy_head)) { -+ mutex_unlock(&kctx->jit_evict_lock); -+ break; -+ } -+ -+ reg = list_first_entry(&kctx->jit_destroy_head, -+ struct kbase_va_region, jit_node); ++ int ret; ++ u64 ts_start_ns = 0; ++ u64 ts_end_ns = 0; ++ unsigned int write_idx; ++ unsigned int read_idx; ++ size_t available_samples_count; + -+ list_del(®->jit_node); -+ mutex_unlock(&kctx->jit_evict_lock); ++ WARN_ON(!cli); ++ lockdep_assert_held(&cli->kinstr_ctx->lock); + -+ kbase_gpu_vm_lock(kctx); ++ write_idx = atomic_read(&cli->write_idx); ++ read_idx = atomic_read(&cli->read_idx); + -+ /* -+ * Incrementing the refcount is prevented on JIT regions. -+ * If/when this ever changes we would need to compensate -+ * by implementing "free on putting the last reference", -+ * but only for JIT regions. ++ /* Check if there is a place to copy HWC block into. Calculate the ++ * number of available samples count, by taking into account the type ++ * of dump. ++ */ ++ available_samples_count = cli->sample_arr.sample_count; ++ WARN_ON(available_samples_count < 1); ++ /* Reserve one slot to store the implicit sample taken on CMD_STOP */ ++ available_samples_count -= 1; ++ if (write_idx - read_idx == available_samples_count) { ++ /* For periodic sampling, the current active dump ++ * will be accumulated in the next sample, when ++ * a buffer becomes available. + */ -+ WARN_ON(atomic_read(®->no_user_free_count) > 1); -+ kbase_va_region_no_user_free_dec(reg); -+ kbase_mem_free_region(kctx, reg); -+ kbase_gpu_vm_unlock(kctx); -+ } while (1); -+} ++ if (event_id == BASE_HWCNT_READER_EVENT_PERIODIC) ++ cli->sample_flags |= SAMPLE_FLAG_OVERFLOW; ++ return -EBUSY; ++ } + -+int kbase_jit_init(struct kbase_context *kctx) -+{ -+ mutex_lock(&kctx->jit_evict_lock); -+ INIT_LIST_HEAD(&kctx->jit_active_head); -+ INIT_LIST_HEAD(&kctx->jit_pool_head); -+ INIT_LIST_HEAD(&kctx->jit_destroy_head); -+ INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker); ++ /* For the rest of the function, use the actual sample_count ++ * that represents the real size of the array. ++ */ ++ write_idx %= cli->sample_arr.sample_count; + -+#if MALI_USE_CSF -+ mutex_init(&kctx->csf.kcpu_queues.jit_lock); -+ INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_cmds_head); -+ INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_blocked_queues); -+#else /* !MALI_USE_CSF */ -+ INIT_LIST_HEAD(&kctx->jctx.jit_atoms_head); -+ INIT_LIST_HEAD(&kctx->jctx.jit_pending_alloc); -+#endif /* MALI_USE_CSF */ -+ mutex_unlock(&kctx->jit_evict_lock); ++ ret = kbase_hwcnt_virtualizer_client_dump(cli->hvcli, &ts_start_ns, &ts_end_ns, ++ &cli->tmp_buf); ++ /* HWC dump error, set the sample with error flag */ ++ if (ret) ++ cli->sample_flags |= SAMPLE_FLAG_ERROR; + -+ kctx->jit_max_allocations = 0; -+ kctx->jit_current_allocations = 0; -+ kctx->trim_level = 0; ++ /* Make the sample ready and copy it to the userspace mapped buffer */ ++ kbasep_kinstr_prfcnt_client_output_sample(cli, write_idx, user_data, ts_start_ns, ++ ts_end_ns); ++ ++ /* Notify client. Make sure all changes to memory are visible. */ ++ wmb(); ++ atomic_inc(&cli->write_idx); ++ wake_up_interruptible(&cli->waitq); ++ /* Reset the flags for the next sample dump */ ++ cli->sample_flags = 0; + + return 0; +} + -+/* Check if the allocation from JIT pool is of the same size as the new JIT -+ * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets -+ * the alignment requirements. -+ */ -+static bool meet_size_and_tiler_align_top_requirements( -+ const struct kbase_va_region *walker, -+ const struct base_jit_alloc_info *info) ++static int ++kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli, ++ u64 user_data) +{ -+ bool meet_reqs = true; ++ int ret; ++ u64 tm_start, tm_end; ++ unsigned int write_idx; ++ unsigned int read_idx; ++ size_t available_samples_count; + -+ if (walker->nr_pages != info->va_pages) -+ meet_reqs = false; ++ WARN_ON(!cli); ++ lockdep_assert_held(&cli->cmd_sync_lock); + -+#if !MALI_USE_CSF -+ if (meet_reqs && (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)) { -+ size_t align = info->extension; -+ size_t align_mask = align - 1; ++ /* If the client is already started, the command is a no-op */ ++ if (cli->active) ++ return 0; + -+ if ((walker->start_pfn + info->commit_pages) & align_mask) -+ meet_reqs = false; -+ } -+#endif /* !MALI_USE_CSF */ ++ write_idx = atomic_read(&cli->write_idx); ++ read_idx = atomic_read(&cli->read_idx); + -+ return meet_reqs; -+} ++ /* Check whether there is space to store atleast an implicit sample ++ * corresponding to CMD_STOP. ++ */ ++ available_samples_count = cli->sample_count - (write_idx - read_idx); ++ if (!available_samples_count) ++ return -EBUSY; + -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+/* Function will guarantee *@freed will not exceed @pages_needed -+ */ -+static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, -+ struct kbase_va_region *reg, size_t pages_needed, -+ size_t *freed, bool shrink) -+{ -+ int err = 0; -+ size_t available_pages = 0u; -+ const size_t old_pages = kbase_reg_current_backed_size(reg); -+ size_t new_pages = old_pages; -+ size_t to_free = 0u; -+ size_t max_allowed_pages = old_pages; ++ kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, ++ &cli->config.phys_em); + -+#if !MALI_USE_CSF -+ lockdep_assert_held(&kctx->jctx.lock); -+#endif /* !MALI_USE_CSF */ -+ lockdep_assert_held(&kctx->reg_lock); ++ /* Enable all the available clk_enable_map. */ ++ cli->enable_map.clk_enable_map = (1ull << cli->kinstr_ctx->metadata->clk_cnt) - 1; + -+ /* Is this a JIT allocation that has been reported on? */ -+ if (reg->used_pages == reg->nr_pages) -+ goto out; ++ mutex_lock(&cli->kinstr_ctx->lock); ++ /* Enable HWC from the configuration of the client creation */ ++ ret = kbase_hwcnt_virtualizer_client_set_counters( ++ cli->hvcli, &cli->enable_map, &tm_start, &tm_end, NULL); + -+ if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)) { -+ /* For address based memory usage calculation, the GPU -+ * allocates objects of up to size 's', but aligns every object -+ * to alignment 'a', with a < s. -+ * -+ * It also doesn't have to write to all bytes in an object of -+ * size 's'. -+ * -+ * Hence, we can observe the GPU's address for the end of used -+ * memory being up to (s - a) bytes into the first unallocated -+ * page. -+ * -+ * We allow for this and only warn when it exceeds this bound -+ * (rounded up to page sized units). Note, this is allowed to -+ * exceed reg->nr_pages. -+ */ -+ max_allowed_pages += PFN_UP( -+ KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES - -+ KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES); -+ } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { -+ /* The GPU could report being ready to write to the next -+ * 'extension' sized chunk, but didn't actually write to it, so we -+ * can report up to 'extension' size pages more than the backed -+ * size. -+ * -+ * Note, this is allowed to exceed reg->nr_pages. -+ */ -+ max_allowed_pages += reg->extension; ++ if (!ret) { ++ cli->active = true; ++ cli->user_data = user_data; ++ cli->sample_flags = 0; + -+ /* Also note that in these GPUs, the GPU may make a large (>1 -+ * page) initial allocation but not actually write out to all -+ * of it. Hence it might report that a much higher amount of -+ * memory was used than actually was written to. This does not -+ * result in a real warning because on growing this memory we -+ * round up the size of the allocation up to an 'extension' sized -+ * chunk, hence automatically bringing the backed size up to -+ * the reported size. -+ */ ++ if (cli->dump_interval_ns) ++ kbasep_kinstr_prfcnt_reschedule_worker(cli->kinstr_ctx); + } + -+ if (old_pages < reg->used_pages) { -+ /* Prevent overflow on available_pages, but only report the -+ * problem if it's in a scenario where used_pages should have -+ * been consistent with the backed size -+ * -+ * Note: In case of a size-based report, this legitimately -+ * happens in common use-cases: we allow for up to this size of -+ * memory being used, but depending on the content it doesn't -+ * have to use all of it. -+ * -+ * Hence, we're much more quiet about that in the size-based -+ * report case - it's not indicating a real problem, it's just -+ * for information -+ */ -+ if (max_allowed_pages < reg->used_pages) { -+ if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)) -+ dev_warn(kctx->kbdev->dev, -+ "%s: current backed pages %zu < reported used pages %zu (allowed to be up to %zu) on JIT 0x%llx vapages %zu\n", -+ __func__, -+ old_pages, reg->used_pages, -+ max_allowed_pages, -+ reg->start_pfn << PAGE_SHIFT, -+ reg->nr_pages); -+ else -+ dev_dbg(kctx->kbdev->dev, -+ "%s: no need to trim, current backed pages %zu < reported used pages %zu on size-report for JIT 0x%llx vapages %zu\n", -+ __func__, -+ old_pages, reg->used_pages, -+ reg->start_pfn << PAGE_SHIFT, -+ reg->nr_pages); -+ } -+ /* In any case, no error condition to report here, caller can -+ * try other regions -+ */ ++ mutex_unlock(&cli->kinstr_ctx->lock); + -+ goto out; -+ } -+ available_pages = old_pages - reg->used_pages; -+ to_free = min(available_pages, pages_needed); ++ return ret; ++} + -+ if (shrink) { -+ new_pages -= to_free; ++static int ++kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli, ++ u64 user_data) ++{ ++ int ret; ++ u64 tm_start = 0; ++ u64 tm_end = 0; ++ struct kbase_hwcnt_physical_enable_map phys_em; ++ size_t available_samples_count; ++ unsigned int write_idx; ++ unsigned int read_idx; + -+ err = kbase_mem_shrink(kctx, reg, new_pages); -+ } -+out: -+ trace_mali_jit_trim_from_region(reg, to_free, old_pages, -+ available_pages, new_pages); -+ *freed = to_free; -+ return err; -+} ++ WARN_ON(!cli); ++ lockdep_assert_held(&cli->cmd_sync_lock); + ++ /* If the client is not started, the command is invalid */ ++ if (!cli->active) ++ return -EINVAL; + -+/** -+ * kbase_mem_jit_trim_pages - Trim JIT regions until sufficient pages have been -+ * freed -+ * @kctx: Pointer to the kbase context whose active JIT allocations will be -+ * checked. -+ * @pages_needed: The maximum number of pages to trim. -+ * -+ * This functions checks all active JIT allocations in @kctx for unused pages -+ * at the end, and trim the backed memory regions of those allocations down to -+ * the used portion and free the unused pages into the page pool. -+ * -+ * Specifying @pages_needed allows us to stop early when there's enough -+ * physical memory freed to sufficiently bring down the total JIT physical page -+ * usage (e.g. to below the pressure limit) -+ * -+ * Return: Total number of successfully freed pages -+ */ -+static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, -+ size_t pages_needed) -+{ -+ struct kbase_va_region *reg, *tmp; -+ size_t total_freed = 0; ++ mutex_lock(&cli->kinstr_ctx->lock); + -+#if !MALI_USE_CSF -+ lockdep_assert_held(&kctx->jctx.lock); -+#endif /* !MALI_USE_CSF */ -+ lockdep_assert_held(&kctx->reg_lock); -+ lockdep_assert_held(&kctx->jit_evict_lock); ++ /* Disable counters under the lock, so we do not race with the ++ * sampling thread. ++ */ ++ phys_em.fe_bm = 0; ++ phys_em.tiler_bm = 0; ++ phys_em.mmu_l2_bm = 0; ++ phys_em.shader_bm = 0; + -+ list_for_each_entry_safe(reg, tmp, &kctx->jit_active_head, jit_node) { -+ int err; -+ size_t freed = 0u; ++ kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &phys_em); + -+ err = kbase_mem_jit_trim_pages_from_region(kctx, reg, -+ pages_needed, &freed, true); ++ /* Check whether one has the buffer to hold the last sample */ ++ write_idx = atomic_read(&cli->write_idx); ++ read_idx = atomic_read(&cli->read_idx); + -+ if (err) { -+ /* Failed to trim, try the next region */ -+ continue; -+ } ++ available_samples_count = cli->sample_count - (write_idx - read_idx); + -+ total_freed += freed; -+ WARN_ON(freed > pages_needed); -+ pages_needed -= freed; -+ if (!pages_needed) -+ break; ++ ret = kbase_hwcnt_virtualizer_client_set_counters(cli->hvcli, ++ &cli->enable_map, ++ &tm_start, &tm_end, ++ &cli->tmp_buf); ++ /* If the last stop sample is in error, set the sample flag */ ++ if (ret) ++ cli->sample_flags |= SAMPLE_FLAG_ERROR; ++ ++ /* There must be a place to save the last stop produced sample */ ++ if (!WARN_ON(!available_samples_count)) { ++ write_idx %= cli->sample_arr.sample_count; ++ /* Handle the last stop sample */ ++ kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, ++ &cli->config.phys_em); ++ /* As this is a stop sample, mark it as MANUAL */ ++ kbasep_kinstr_prfcnt_client_output_sample( ++ cli, write_idx, user_data, tm_start, tm_end); ++ /* Notify client. Make sure all changes to memory are visible. */ ++ wmb(); ++ atomic_inc(&cli->write_idx); ++ wake_up_interruptible(&cli->waitq); + } + -+ trace_mali_jit_trim(total_freed); ++ cli->active = false; ++ cli->user_data = user_data; + -+ return total_freed; ++ if (cli->dump_interval_ns) ++ kbasep_kinstr_prfcnt_reschedule_worker(cli->kinstr_ctx); ++ ++ mutex_unlock(&cli->kinstr_ctx->lock); ++ ++ return 0; +} -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+static int kbase_jit_grow(struct kbase_context *kctx, -+ const struct base_jit_alloc_info *info, -+ struct kbase_va_region *reg, -+ struct kbase_sub_alloc **prealloc_sas, -+ enum kbase_caller_mmu_sync_info mmu_sync_info) ++static int ++kbasep_kinstr_prfcnt_client_sync_dump(struct kbase_kinstr_prfcnt_client *cli, ++ u64 user_data) +{ -+ size_t delta; -+ size_t pages_required; -+ size_t old_size; -+ struct kbase_mem_pool *pool; -+ int ret = -ENOMEM; -+ struct tagged_addr *gpu_pages; ++ int ret; + -+ if (info->commit_pages > reg->nr_pages) { -+ /* Attempted to grow larger than maximum size */ ++ lockdep_assert_held(&cli->cmd_sync_lock); ++ ++ /* If the client is not started, or not manual, the command invalid */ ++ if (!cli->active || cli->dump_interval_ns) + return -EINVAL; -+ } + -+ lockdep_assert_held(&kctx->reg_lock); ++ mutex_lock(&cli->kinstr_ctx->lock); + -+ /* Make the physical backing no longer reclaimable */ -+ if (!kbase_mem_evictable_unmake(reg->gpu_alloc)) -+ goto update_failed; ++ ret = kbasep_kinstr_prfcnt_client_dump(cli, BASE_HWCNT_READER_EVENT_MANUAL, user_data); + -+ if (reg->gpu_alloc->nents >= info->commit_pages) -+ goto done; ++ mutex_unlock(&cli->kinstr_ctx->lock); + -+ /* Grow the backing */ -+ old_size = reg->gpu_alloc->nents; ++ return ret; ++} + -+ /* Allocate some more pages */ -+ delta = info->commit_pages - reg->gpu_alloc->nents; -+ pages_required = delta; ++static int ++kbasep_kinstr_prfcnt_client_discard(struct kbase_kinstr_prfcnt_client *cli) ++{ ++ unsigned int write_idx; + -+ if (kctx->kbdev->pagesize_2mb && pages_required >= (SZ_2M / SZ_4K)) { -+ pool = &kctx->mem_pools.large[kctx->jit_group_id]; -+ /* Round up to number of 2 MB pages required */ -+ pages_required += ((SZ_2M / SZ_4K) - 1); -+ pages_required /= (SZ_2M / SZ_4K); -+ } else { -+ pool = &kctx->mem_pools.small[kctx->jit_group_id]; -+ } ++ WARN_ON(!cli); ++ lockdep_assert_held(&cli->cmd_sync_lock); + -+ if (reg->cpu_alloc != reg->gpu_alloc) -+ pages_required *= 2; ++ mutex_lock(&cli->kinstr_ctx->lock); + -+ spin_lock(&kctx->mem_partials_lock); -+ kbase_mem_pool_lock(pool); ++ write_idx = atomic_read(&cli->write_idx); + -+ /* As we can not allocate memory from the kernel with the vm_lock held, -+ * grow the pool to the required size with the lock dropped. We hold the -+ * pool lock to prevent another thread from allocating from the pool -+ * between the grow and allocation. ++ /* Discard (clear) all internally buffered samples. Note, if there ++ * is a fetched sample in flight, one should not touch the read index, ++ * leaving it alone for the put-sample operation to update it. The ++ * consistency between the read_idx and the fetch_idx is coordinated by ++ * holding the cli->cmd_sync_lock. + */ -+ while (kbase_mem_pool_size(pool) < pages_required) { -+ int pool_delta = pages_required - kbase_mem_pool_size(pool); -+ int ret; ++ if (atomic_read(&cli->fetch_idx) != atomic_read(&cli->read_idx)) { ++ atomic_set(&cli->fetch_idx, write_idx); ++ } else { ++ atomic_set(&cli->fetch_idx, write_idx); ++ atomic_set(&cli->read_idx, write_idx); ++ } + -+ kbase_mem_pool_unlock(pool); -+ spin_unlock(&kctx->mem_partials_lock); ++ mutex_unlock(&cli->kinstr_ctx->lock); + -+ kbase_gpu_vm_unlock(kctx); -+ ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task); -+ kbase_gpu_vm_lock(kctx); ++ return 0; ++} + -+ if (ret) -+ goto update_failed; ++int kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client *cli, ++ struct prfcnt_control_cmd *control_cmd) ++{ ++ int ret = 0; + -+ spin_lock(&kctx->mem_partials_lock); -+ kbase_mem_pool_lock(pool); -+ } ++ mutex_lock(&cli->cmd_sync_lock); + -+ gpu_pages = kbase_alloc_phy_pages_helper_locked(reg->gpu_alloc, pool, -+ delta, &prealloc_sas[0]); -+ if (!gpu_pages) { -+ kbase_mem_pool_unlock(pool); -+ spin_unlock(&kctx->mem_partials_lock); -+ goto update_failed; ++ switch (control_cmd->cmd) { ++ case PRFCNT_CONTROL_CMD_START: ++ ret = kbasep_kinstr_prfcnt_client_start(cli, ++ control_cmd->user_data); ++ break; ++ case PRFCNT_CONTROL_CMD_STOP: ++ ret = kbasep_kinstr_prfcnt_client_stop(cli, ++ control_cmd->user_data); ++ break; ++ case PRFCNT_CONTROL_CMD_SAMPLE_SYNC: ++ ret = kbasep_kinstr_prfcnt_client_sync_dump( ++ cli, control_cmd->user_data); ++ break; ++ case PRFCNT_CONTROL_CMD_DISCARD: ++ ret = kbasep_kinstr_prfcnt_client_discard(cli); ++ break; ++ default: ++ ret = -EINVAL; ++ break; + } + -+ if (reg->cpu_alloc != reg->gpu_alloc) { -+ struct tagged_addr *cpu_pages; ++ mutex_unlock(&cli->cmd_sync_lock); + -+ cpu_pages = kbase_alloc_phy_pages_helper_locked(reg->cpu_alloc, -+ pool, delta, &prealloc_sas[1]); -+ if (!cpu_pages) { -+ kbase_free_phy_pages_helper_locked(reg->gpu_alloc, -+ pool, gpu_pages, delta); -+ kbase_mem_pool_unlock(pool); -+ spin_unlock(&kctx->mem_partials_lock); -+ goto update_failed; -+ } ++ return ret; ++} ++ ++static int ++kbasep_kinstr_prfcnt_get_sample(struct kbase_kinstr_prfcnt_client *cli, ++ struct prfcnt_sample_access *sample_access) ++{ ++ unsigned int write_idx; ++ unsigned int read_idx; ++ unsigned int fetch_idx; ++ u64 sample_offset_bytes; ++ struct prfcnt_metadata *sample_meta; ++ int err = 0; ++ ++ mutex_lock(&cli->cmd_sync_lock); ++ write_idx = atomic_read(&cli->write_idx); ++ read_idx = atomic_read(&cli->read_idx); ++ ++ if (write_idx == read_idx) { ++ err = -EINVAL; ++ goto error_out; + } -+ kbase_mem_pool_unlock(pool); -+ spin_unlock(&kctx->mem_partials_lock); + -+ ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages, -+ old_size, mmu_sync_info); -+ /* -+ * The grow failed so put the allocation back in the -+ * pool and return failure. ++ /* If the client interface has already had a sample been fetched, ++ * reflected by the fetch index not equal to read_idx, i.e., typically ++ * read_idx + 1 == fetch_idx, ++ * further fetch is not allowed until the previously fetched buffer ++ * is put back (which brings the read_idx == fetch_idx). As a design, ++ * the above add one equal condition (i.e. typical cases) may only be ++ * untrue if there had been an interface operation on sample discard, ++ * after the sample in question already been fetched, in which case, ++ * the fetch_idx could have a delta larger than 1 relative to the ++ * read_idx. + */ -+ if (ret) -+ goto update_failed; ++ fetch_idx = atomic_read(&cli->fetch_idx); ++ if (read_idx != fetch_idx) { ++ err = -EBUSY; ++ goto error_out; ++ } + -+done: -+ ret = 0; ++ read_idx %= cli->sample_arr.sample_count; ++ sample_meta = cli->sample_arr.samples[read_idx].sample_meta; ++ sample_offset_bytes = (u8 *)sample_meta - cli->sample_arr.user_buf; + -+ /* Update attributes of JIT allocation taken from the pool */ -+ reg->initial_commit = info->commit_pages; -+ reg->extension = info->extension; ++ sample_access->sequence = sample_meta->u.sample_md.seq; ++ sample_access->sample_offset_bytes = sample_offset_bytes; + -+update_failed: -+ return ret; ++ /* Marking a sample has been fetched by advancing the fetch index */ ++ atomic_inc(&cli->fetch_idx); ++ ++error_out: ++ mutex_unlock(&cli->cmd_sync_lock); ++ return err; +} + -+static void trace_jit_stats(struct kbase_context *kctx, -+ u32 bin_id, u32 max_allocations) ++static int ++kbasep_kinstr_prfcnt_put_sample(struct kbase_kinstr_prfcnt_client *cli, ++ struct prfcnt_sample_access *sample_access) +{ -+ const u32 alloc_count = -+ kctx->jit_current_allocations_per_bin[bin_id]; -+ struct kbase_device *kbdev = kctx->kbdev; ++ unsigned int write_idx; ++ unsigned int read_idx; ++ unsigned int fetch_idx; ++ u64 sample_offset_bytes; ++ int err = 0; + -+ struct kbase_va_region *walker; -+ u32 va_pages = 0; -+ u32 ph_pages = 0; ++ mutex_lock(&cli->cmd_sync_lock); ++ write_idx = atomic_read(&cli->write_idx); ++ read_idx = atomic_read(&cli->read_idx); + -+ mutex_lock(&kctx->jit_evict_lock); -+ list_for_each_entry(walker, &kctx->jit_active_head, jit_node) { -+ if (walker->jit_bin_id != bin_id) -+ continue; ++ if (write_idx == read_idx || sample_access->sequence != read_idx) { ++ err = -EINVAL; ++ goto error_out; ++ } + -+ va_pages += walker->nr_pages; -+ ph_pages += walker->gpu_alloc->nents; ++ read_idx %= cli->sample_arr.sample_count; ++ sample_offset_bytes = ++ (u8 *)cli->sample_arr.samples[read_idx].sample_meta - cli->sample_arr.user_buf; ++ ++ if (sample_access->sample_offset_bytes != sample_offset_bytes) { ++ err = -EINVAL; ++ goto error_out; + } -+ mutex_unlock(&kctx->jit_evict_lock); + -+ KBASE_TLSTREAM_AUX_JIT_STATS(kbdev, kctx->id, bin_id, -+ max_allocations, alloc_count, va_pages, ph_pages); ++ fetch_idx = atomic_read(&cli->fetch_idx); ++ WARN_ON(read_idx == fetch_idx); ++ /* Setting the read_idx matching the fetch_idx, signals no in-flight ++ * fetched sample. ++ */ ++ atomic_set(&cli->read_idx, fetch_idx); ++ ++error_out: ++ mutex_unlock(&cli->cmd_sync_lock); ++ return err; +} + -+#if MALI_JIT_PRESSURE_LIMIT_BASE +/** -+ * get_jit_phys_backing() - calculate the physical backing of all JIT -+ * allocations -+ * -+ * @kctx: Pointer to the kbase context whose active JIT allocations will be -+ * checked ++ * kbasep_kinstr_prfcnt_hwcnt_reader_ioctl() - hwcnt reader's ioctl. ++ * @filp: Non-NULL pointer to file structure. ++ * @cmd: User command. ++ * @arg: Command's argument. + * -+ * Return: number of pages that are committed by JIT allocations ++ * Return: 0 on success, else error code. + */ -+static size_t get_jit_phys_backing(struct kbase_context *kctx) ++static long kbasep_kinstr_prfcnt_hwcnt_reader_ioctl(struct file *filp, ++ unsigned int cmd, ++ unsigned long arg) +{ -+ struct kbase_va_region *walker; -+ size_t backing = 0; -+ -+ lockdep_assert_held(&kctx->jit_evict_lock); ++ long rcode = 0; ++ struct kbase_kinstr_prfcnt_client *cli; ++ void __user *uarg = (void __user *)arg; + -+ list_for_each_entry(walker, &kctx->jit_active_head, jit_node) { -+ backing += kbase_reg_current_backed_size(walker); -+ } ++ if (!filp) ++ return -EINVAL; + -+ return backing; -+} ++ cli = filp->private_data; + -+void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, -+ size_t needed_pages) -+{ -+ size_t jit_backing = 0; -+ size_t pages_to_trim = 0; ++ if (!cli) ++ return -EINVAL; + -+#if !MALI_USE_CSF -+ lockdep_assert_held(&kctx->jctx.lock); -+#endif /* !MALI_USE_CSF */ -+ lockdep_assert_held(&kctx->reg_lock); -+ lockdep_assert_held(&kctx->jit_evict_lock); ++ switch (_IOC_NR(cmd)) { ++ case _IOC_NR(KBASE_IOCTL_KINSTR_PRFCNT_CMD): { ++ struct prfcnt_control_cmd control_cmd; ++ int err; + -+ jit_backing = get_jit_phys_backing(kctx); ++ err = copy_from_user(&control_cmd, uarg, sizeof(control_cmd)); ++ if (err) ++ return -EFAULT; ++ rcode = kbasep_kinstr_prfcnt_cmd(cli, &control_cmd); ++ } break; ++ case _IOC_NR(KBASE_IOCTL_KINSTR_PRFCNT_GET_SAMPLE): { ++ struct prfcnt_sample_access sample_access; ++ int err; + -+ /* It is possible that this is the case - if this is the first -+ * allocation after "ignore_pressure_limit" allocation. -+ */ -+ if (jit_backing > kctx->jit_phys_pages_limit) { -+ pages_to_trim += (jit_backing - kctx->jit_phys_pages_limit) + -+ needed_pages; -+ } else { -+ size_t backed_diff = kctx->jit_phys_pages_limit - jit_backing; ++ memset(&sample_access, 0, sizeof(sample_access)); ++ rcode = kbasep_kinstr_prfcnt_get_sample(cli, &sample_access); ++ err = copy_to_user(uarg, &sample_access, sizeof(sample_access)); ++ if (err) ++ return -EFAULT; ++ } break; ++ case _IOC_NR(KBASE_IOCTL_KINSTR_PRFCNT_PUT_SAMPLE): { ++ struct prfcnt_sample_access sample_access; ++ int err; + -+ if (needed_pages > backed_diff) -+ pages_to_trim += needed_pages - backed_diff; ++ err = copy_from_user(&sample_access, uarg, ++ sizeof(sample_access)); ++ if (err) ++ return -EFAULT; ++ rcode = kbasep_kinstr_prfcnt_put_sample(cli, &sample_access); ++ } break; ++ default: ++ rcode = -EINVAL; ++ break; + } + -+ if (pages_to_trim) { -+ size_t trimmed_pages = -+ kbase_mem_jit_trim_pages(kctx, pages_to_trim); -+ -+ /* This should never happen - we already asserted that -+ * we are not violating JIT pressure limit in earlier -+ * checks, which means that in-flight JIT allocations -+ * must have enough unused pages to satisfy the new -+ * allocation -+ */ -+ WARN_ON(trimmed_pages < pages_to_trim); -+ } ++ return rcode; +} -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + +/** -+ * jit_allow_allocate() - check whether basic conditions are satisfied to allow -+ * a new JIT allocation -+ * -+ * @kctx: Pointer to the kbase context -+ * @info: Pointer to JIT allocation information for the new allocation -+ * @ignore_pressure_limit: Flag to indicate whether JIT pressure limit check -+ * should be ignored ++ * kbasep_kinstr_prfcnt_hwcnt_reader_mmap() - hwcnt reader's mmap. ++ * @filp: Non-NULL pointer to file structure. ++ * @vma: Non-NULL pointer to vma structure. + * -+ * Return: true if allocation can be executed, false otherwise ++ * Return: 0 on success, else error code. + */ -+static bool jit_allow_allocate(struct kbase_context *kctx, -+ const struct base_jit_alloc_info *info, -+ bool ignore_pressure_limit) ++static int kbasep_kinstr_prfcnt_hwcnt_reader_mmap(struct file *filp, ++ struct vm_area_struct *vma) +{ -+#if !MALI_USE_CSF -+ lockdep_assert_held(&kctx->jctx.lock); -+#else /* MALI_USE_CSF */ -+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); -+#endif /* !MALI_USE_CSF */ -+ -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ if (!ignore_pressure_limit && -+ ((kctx->jit_phys_pages_limit <= kctx->jit_current_phys_pressure) || -+ (info->va_pages > (kctx->jit_phys_pages_limit - kctx->jit_current_phys_pressure)))) { -+ dev_dbg(kctx->kbdev->dev, -+ "Max JIT page allocations limit reached: active pages %llu, max pages %llu\n", -+ kctx->jit_current_phys_pressure + info->va_pages, -+ kctx->jit_phys_pages_limit); -+ return false; -+ } -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ struct kbase_kinstr_prfcnt_client *cli; + -+ if (kctx->jit_current_allocations >= kctx->jit_max_allocations) { -+ /* Too many current allocations */ -+ dev_dbg(kctx->kbdev->dev, -+ "Max JIT allocations limit reached: active allocations %d, max allocations %d\n", -+ kctx->jit_current_allocations, -+ kctx->jit_max_allocations); -+ return false; -+ } ++ if (!filp || !vma) ++ return -EINVAL; + -+ if (info->max_allocations > 0 && -+ kctx->jit_current_allocations_per_bin[info->bin_id] >= -+ info->max_allocations) { -+ /* Too many current allocations in this bin */ -+ dev_dbg(kctx->kbdev->dev, -+ "Per bin limit of max JIT allocations reached: bin_id %d, active allocations %d, max allocations %d\n", -+ info->bin_id, -+ kctx->jit_current_allocations_per_bin[info->bin_id], -+ info->max_allocations); -+ return false; -+ } ++ cli = filp->private_data; ++ if (!cli) ++ return -EINVAL; + -+ return true; ++ return remap_vmalloc_range(vma, cli->sample_arr.user_buf, 0); +} + -+static struct kbase_va_region * -+find_reasonable_region(const struct base_jit_alloc_info *info, -+ struct list_head *pool_head, bool ignore_usage_id) ++static void kbasep_kinstr_prfcnt_sample_array_free( ++ struct kbase_kinstr_prfcnt_sample_array *sample_arr) +{ -+ struct kbase_va_region *closest_reg = NULL; -+ struct kbase_va_region *walker; -+ size_t current_diff = SIZE_MAX; -+ -+ list_for_each_entry(walker, pool_head, jit_node) { -+ if ((ignore_usage_id || -+ walker->jit_usage_id == info->usage_id) && -+ walker->jit_bin_id == info->bin_id && -+ meet_size_and_tiler_align_top_requirements(walker, info)) { -+ size_t min_size, max_size, diff; ++ if (!sample_arr) ++ return; + -+ /* -+ * The JIT allocations VA requirements have been met, -+ * it's suitable but other allocations might be a -+ * better fit. -+ */ -+ min_size = min_t(size_t, walker->gpu_alloc->nents, -+ info->commit_pages); -+ max_size = max_t(size_t, walker->gpu_alloc->nents, -+ info->commit_pages); -+ diff = max_size - min_size; ++ kfree(sample_arr->samples); ++ vfree(sample_arr->user_buf); ++ memset(sample_arr, 0, sizeof(*sample_arr)); ++} + -+ if (current_diff > diff) { -+ current_diff = diff; -+ closest_reg = walker; -+ } ++static void ++kbasep_kinstr_prfcnt_client_destroy_partial(struct kbase_kinstr_prfcnt_client *cli, ++ enum kbase_kinstr_prfcnt_client_init_state init_state) ++{ ++ if (!cli) ++ return; + -+ /* The allocation is an exact match */ -+ if (current_diff == 0) -+ break; ++ while (init_state-- > KINSTR_PRFCNT_UNINITIALISED) { ++ switch (init_state) { ++ case KINSTR_PRFCNT_INITIALISED: ++ /* This shouldn't be reached */ ++ break; ++ case KINSTR_PRFCNT_WAITQ_MUTEX: ++ mutex_destroy(&cli->cmd_sync_lock); ++ break; ++ case KINSTR_PRFCNT_VIRTUALIZER_CLIENT: ++ kbase_hwcnt_virtualizer_client_destroy(cli->hvcli); ++ break; ++ case KINSTR_PRFCNT_SAMPLE_ARRAY: ++ kbasep_kinstr_prfcnt_sample_array_free(&cli->sample_arr); ++ break; ++ case KINSTR_PRFCNT_DUMP_BUFFER: ++ kbase_hwcnt_dump_buffer_free(&cli->tmp_buf); ++ break; ++ case KINSTR_PRFCNT_ENABLE_MAP: ++ kbase_hwcnt_enable_map_free(&cli->enable_map); ++ break; ++ case KINSTR_PRFCNT_PARSE_SETUP: ++ /* Nothing to do here */ ++ break; + } + } ++ kfree(cli); ++} + -+ return closest_reg; ++void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli) ++{ ++ kbasep_kinstr_prfcnt_client_destroy_partial(cli, KINSTR_PRFCNT_INITIALISED); +} + -+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, -+ const struct base_jit_alloc_info *info, -+ bool ignore_pressure_limit) ++/** ++ * kbasep_kinstr_prfcnt_hwcnt_reader_release() - hwcnt reader's release. ++ * @inode: Non-NULL pointer to inode structure. ++ * @filp: Non-NULL pointer to file structure. ++ * ++ * Return: 0 always. ++ */ ++static int kbasep_kinstr_prfcnt_hwcnt_reader_release(struct inode *inode, ++ struct file *filp) +{ -+ struct kbase_va_region *reg = NULL; -+ struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; -+ int i; ++ struct kbase_kinstr_prfcnt_client *cli = filp->private_data; + -+ /* Calls to this function are inherently synchronous, with respect to -+ * MMU operations. -+ */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; ++ mutex_lock(&cli->kinstr_ctx->lock); + -+#if !MALI_USE_CSF -+ lockdep_assert_held(&kctx->jctx.lock); -+#else /* MALI_USE_CSF */ -+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); -+#endif /* !MALI_USE_CSF */ ++ WARN_ON(cli->kinstr_ctx->client_count == 0); ++ if (cli->kinstr_ctx->client_count > 0) ++ cli->kinstr_ctx->client_count--; ++ list_del(&cli->node); + -+ if (!jit_allow_allocate(kctx, info, ignore_pressure_limit)) -+ return NULL; ++ mutex_unlock(&cli->kinstr_ctx->lock); + -+ if (kctx->kbdev->pagesize_2mb) { -+ /* Preallocate memory for the sub-allocation structs */ -+ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { -+ prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); -+ if (!prealloc_sas[i]) -+ goto end; -+ } -+ } ++ kbasep_kinstr_prfcnt_client_destroy(cli); + -+ kbase_gpu_vm_lock(kctx); -+ mutex_lock(&kctx->jit_evict_lock); ++ return 0; ++} + -+ /* -+ * Scan the pool for an existing allocation which meets our -+ * requirements and remove it. -+ */ -+ if (info->usage_id != 0) -+ /* First scan for an allocation with the same usage ID */ -+ reg = find_reasonable_region(info, &kctx->jit_pool_head, false); ++/* kinstr_prfcnt client file operations */ ++static const struct file_operations kinstr_prfcnt_client_fops = { ++ .owner = THIS_MODULE, ++ .poll = kbasep_kinstr_prfcnt_hwcnt_reader_poll, ++ .unlocked_ioctl = kbasep_kinstr_prfcnt_hwcnt_reader_ioctl, ++ .compat_ioctl = kbasep_kinstr_prfcnt_hwcnt_reader_ioctl, ++ .mmap = kbasep_kinstr_prfcnt_hwcnt_reader_mmap, ++ .release = kbasep_kinstr_prfcnt_hwcnt_reader_release, ++}; + -+ if (!reg) -+ /* No allocation with the same usage ID, or usage IDs not in -+ * use. Search for an allocation we can reuse. -+ */ -+ reg = find_reasonable_region(info, &kctx->jit_pool_head, true); ++size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadata *metadata, ++ struct kbase_hwcnt_enable_map *enable_map) ++{ ++ size_t grp, blk, blk_inst; ++ size_t md_count = 0; + -+ if (reg) { -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ size_t needed_pages = 0; -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -+ int ret; ++ if (!metadata) ++ return 0; + -+ /* -+ * Remove the found region from the pool and add it to the -+ * active list. -+ */ -+ list_move(®->jit_node, &kctx->jit_active_head); ++ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { ++ /* Skip unavailable, non-enabled or reserved blocks */ ++ if (kbase_kinstr_is_block_type_reserved(metadata, grp, blk) || ++ !kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst) || ++ !kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst)) ++ continue; + -+ WARN_ON(reg->gpu_alloc->evicted); ++ md_count++; ++ } + -+ /* -+ * Remove the allocation from the eviction list as it's no -+ * longer eligible for eviction. This must be done before -+ * dropping the jit_evict_lock -+ */ -+ list_del_init(®->gpu_alloc->evict_node); ++ /* add counts for clock_meta and sample meta, respectively */ ++ md_count += 2; + -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ if (!ignore_pressure_limit) { -+ if (info->commit_pages > reg->gpu_alloc->nents) -+ needed_pages = info->commit_pages - -+ reg->gpu_alloc->nents; ++ /* Reserve one for last sentinel item. */ ++ md_count++; + -+ /* Update early the recycled JIT region's estimate of -+ * used_pages to ensure it doesn't get trimmed -+ * undesirably. This is needed as the recycled JIT -+ * region has been added to the active list but the -+ * number of used pages for it would be zero, so it -+ * could get trimmed instead of other allocations only -+ * to be regrown later resulting in a breach of the JIT -+ * physical pressure limit. -+ * Also that trimming would disturb the accounting of -+ * physical pages, i.e. the VM stats, as the number of -+ * backing pages would have changed when the call to -+ * kbase_mem_evictable_unmark_reclaim is made. -+ * -+ * The second call to update pressure at the end of -+ * this function would effectively be a nop. -+ */ -+ kbase_jit_report_update_pressure( -+ kctx, reg, info->va_pages, -+ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); ++ return md_count; ++} + -+ kbase_jit_request_phys_increase_locked(kctx, -+ needed_pages); -+ } -+#endif -+ mutex_unlock(&kctx->jit_evict_lock); ++static size_t kbasep_kinstr_prfcnt_get_sample_size(struct kbase_kinstr_prfcnt_client *cli, ++ const struct kbase_hwcnt_metadata *metadata) ++{ ++ size_t dump_buf_bytes; ++ size_t clk_cnt_buf_bytes; ++ size_t sample_meta_bytes; ++ struct kbase_hwcnt_dump_buffer *dump_buf = &cli->tmp_buf; ++ size_t md_count = kbasep_kinstr_prfcnt_get_sample_md_count(metadata, &cli->enable_map); + -+ /* kbase_jit_grow() can release & reacquire 'kctx->reg_lock', -+ * so any state protected by that lock might need to be -+ * re-evaluated if more code is added here in future. -+ */ -+ ret = kbase_jit_grow(kctx, info, reg, prealloc_sas, -+ mmu_sync_info); -+ -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ if (!ignore_pressure_limit) -+ kbase_jit_done_phys_increase(kctx, needed_pages); -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -+ -+ kbase_gpu_vm_unlock(kctx); -+ -+ if (ret < 0) { -+ /* -+ * An update to an allocation from the pool failed, -+ * chances are slim a new allocation would fare any -+ * better so return the allocation to the pool and -+ * return the function with failure. -+ */ -+ dev_dbg(kctx->kbdev->dev, -+ "JIT allocation resize failed: va_pages 0x%llx, commit_pages 0x%llx\n", -+ info->va_pages, info->commit_pages); -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ /* Undo the early change made to the recycled JIT -+ * region's estimate of used_pages. -+ */ -+ if (!ignore_pressure_limit) { -+ kbase_jit_report_update_pressure( -+ kctx, reg, 0, -+ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); -+ } -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -+ mutex_lock(&kctx->jit_evict_lock); -+ list_move(®->jit_node, &kctx->jit_pool_head); -+ mutex_unlock(&kctx->jit_evict_lock); -+ reg = NULL; -+ goto end; -+ } else { -+ /* A suitable JIT allocation existed on the evict list, so we need -+ * to make sure that the NOT_MOVABLE property is cleared. -+ */ -+ if (kbase_page_migration_enabled) { -+ kbase_gpu_vm_lock(kctx); -+ mutex_lock(&kctx->jit_evict_lock); -+ kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED); -+ mutex_unlock(&kctx->jit_evict_lock); -+ kbase_gpu_vm_unlock(kctx); -+ } -+ } -+ } else { -+ /* No suitable JIT allocation was found so create a new one */ -+ u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD | -+ BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF | -+ BASE_MEM_COHERENT_LOCAL | -+ BASEP_MEM_NO_USER_FREE; -+ u64 gpu_addr; -+ -+#if !MALI_USE_CSF -+ if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) -+ flags |= BASE_MEM_TILER_ALIGN_TOP; -+#endif /* !MALI_USE_CSF */ -+ -+ flags |= kbase_mem_group_id_set(kctx->jit_group_id); -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ if (!ignore_pressure_limit) { -+ flags |= BASEP_MEM_PERFORM_JIT_TRIM; -+ /* The corresponding call to 'done_phys_increase' would -+ * be made inside the kbase_mem_alloc(). -+ */ -+ kbase_jit_request_phys_increase_locked( -+ kctx, info->commit_pages); -+ } -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ if (!metadata) ++ return 0; + -+ mutex_unlock(&kctx->jit_evict_lock); -+ kbase_gpu_vm_unlock(kctx); ++ sample_meta_bytes = sizeof(struct prfcnt_metadata) * md_count; ++ dump_buf_bytes = metadata->dump_buf_bytes; ++ clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt; + -+ reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, info->extension, -+ &flags, &gpu_addr, mmu_sync_info); -+ if (!reg) { -+ /* Most likely not enough GPU virtual space left for -+ * the new JIT allocation. -+ */ -+ dev_dbg(kctx->kbdev->dev, -+ "Failed to allocate JIT memory: va_pages 0x%llx, commit_pages 0x%llx\n", -+ info->va_pages, info->commit_pages); -+ goto end; -+ } ++ return (sample_meta_bytes + dump_buf_bytes + clk_cnt_buf_bytes); ++} + -+ if (!ignore_pressure_limit) { -+ /* Due to enforcing of pressure limit, kbase_mem_alloc -+ * was instructed to perform the trimming which in turn -+ * would have ensured that the new JIT allocation is -+ * already in the jit_active_head list, so nothing to -+ * do here. -+ */ -+ WARN_ON(list_empty(®->jit_node)); -+ } else { -+ mutex_lock(&kctx->jit_evict_lock); -+ list_add(®->jit_node, &kctx->jit_active_head); -+ mutex_unlock(&kctx->jit_evict_lock); -+ } -+ } ++/** ++ * kbasep_kinstr_prfcnt_dump_worker()- Dump worker, that dumps all periodic ++ * clients that need to be dumped, then ++ * reschedules itself. ++ * @work: Work structure. ++ */ ++static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work) ++{ ++ struct kbase_kinstr_prfcnt_context *kinstr_ctx = container_of( ++ work, struct kbase_kinstr_prfcnt_context, dump_work); ++ struct kbase_kinstr_prfcnt_client *pos; ++ u64 cur_time_ns; + -+ /* Similarly to tiler heap init, there is a short window of time -+ * where the (either recycled or newly allocated, in our case) region has -+ * "no user free" count incremented but is still missing the DONT_NEED flag, and -+ * doesn't yet have the ACTIVE_JIT_ALLOC flag either. Temporarily leaking the -+ * allocation is the least bad option that doesn't lead to a security issue down the -+ * line (it will eventually be cleaned up during context termination). -+ * -+ * We also need to call kbase_gpu_vm_lock regardless, as we're updating the region -+ * flags. -+ */ -+ kbase_gpu_vm_lock(kctx); -+ if (unlikely(atomic_read(®->no_user_free_count) > 1)) { -+ kbase_gpu_vm_unlock(kctx); -+ dev_err(kctx->kbdev->dev, "JIT region has no_user_free_count > 1!\n"); ++ mutex_lock(&kinstr_ctx->lock); + -+ mutex_lock(&kctx->jit_evict_lock); -+ list_move(®->jit_node, &kctx->jit_pool_head); -+ mutex_unlock(&kctx->jit_evict_lock); ++ cur_time_ns = kbasep_kinstr_prfcnt_timestamp_ns(); + -+ reg = NULL; -+ goto end; ++ list_for_each_entry(pos, &kinstr_ctx->clients, node) { ++ if (pos->active && (pos->next_dump_time_ns != 0) && ++ (pos->next_dump_time_ns < cur_time_ns)) ++ kbasep_kinstr_prfcnt_client_dump(pos, BASE_HWCNT_READER_EVENT_PERIODIC, ++ pos->user_data); + } + -+ trace_mali_jit_alloc(reg, info->id); -+ -+ kctx->jit_current_allocations++; -+ kctx->jit_current_allocations_per_bin[info->bin_id]++; ++ kbasep_kinstr_prfcnt_reschedule_worker(kinstr_ctx); + -+ trace_jit_stats(kctx, info->bin_id, info->max_allocations); ++ mutex_unlock(&kinstr_ctx->lock); ++} + -+ reg->jit_usage_id = info->usage_id; -+ reg->jit_bin_id = info->bin_id; -+ reg->flags |= KBASE_REG_ACTIVE_JIT_ALLOC; -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ if (info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) -+ reg->flags = reg->flags | KBASE_REG_HEAP_INFO_IS_SIZE; -+ reg->heap_info_gpu_addr = info->heap_info_gpu_addr; -+ kbase_jit_report_update_pressure(kctx, reg, info->va_pages, -+ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -+ kbase_gpu_vm_unlock(kctx); ++/** ++ * kbasep_kinstr_prfcnt_dump_timer() - Dump timer that schedules the dump worker for ++ * execution as soon as possible. ++ * @timer: Timer structure. ++ * ++ * Return: HRTIMER_NORESTART always. ++ */ ++static enum hrtimer_restart ++kbasep_kinstr_prfcnt_dump_timer(struct hrtimer *timer) ++{ ++ struct kbase_kinstr_prfcnt_context *kinstr_ctx = container_of( ++ timer, struct kbase_kinstr_prfcnt_context, dump_timer); + -+end: -+ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) -+ kfree(prealloc_sas[i]); ++ /* We don't need to check kinstr_ctx->suspend_count here. ++ * Suspend and resume functions already ensure that the worker ++ * is cancelled when the driver is suspended, and resumed when ++ * the suspend_count reaches 0. ++ */ ++ kbase_hwcnt_virtualizer_queue_work(kinstr_ctx->hvirt, ++ &kinstr_ctx->dump_work); + -+ return reg; ++ return HRTIMER_NORESTART; +} + -+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) ++int kbase_kinstr_prfcnt_init(struct kbase_hwcnt_virtualizer *hvirt, ++ struct kbase_kinstr_prfcnt_context **out_kinstr_ctx) +{ -+ u64 old_pages; ++ struct kbase_kinstr_prfcnt_context *kinstr_ctx; ++ const struct kbase_hwcnt_metadata *metadata; + -+#if !MALI_USE_CSF -+ lockdep_assert_held(&kctx->jctx.lock); -+#else /* MALI_USE_CSF */ -+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); -+#endif /* !MALI_USE_CSF */ ++ if (!hvirt || !out_kinstr_ctx) ++ return -EINVAL; + -+ /* JIT id not immediately available here, so use 0u */ -+ trace_mali_jit_free(reg, 0u); ++ metadata = kbase_hwcnt_virtualizer_metadata(hvirt); + -+ /* Get current size of JIT region */ -+ old_pages = kbase_reg_current_backed_size(reg); -+ if (reg->initial_commit < old_pages) { -+ /* Free trim_level % of region, but don't go below initial -+ * commit size -+ */ -+ u64 new_size = MAX(reg->initial_commit, -+ div_u64(old_pages * (100 - kctx->trim_level), 100)); -+ u64 delta = old_pages - new_size; ++ if (!metadata) ++ return -EINVAL; + -+ if (delta) { -+ mutex_lock(&kctx->reg_lock); -+ kbase_mem_shrink(kctx, reg, old_pages - delta); -+ mutex_unlock(&kctx->reg_lock); -+ } -+ } ++ kinstr_ctx = kzalloc(sizeof(*kinstr_ctx), GFP_KERNEL); + -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ reg->heap_info_gpu_addr = 0; -+ kbase_jit_report_update_pressure(kctx, reg, 0, -+ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ if (!kinstr_ctx) ++ return -ENOMEM; + -+ kctx->jit_current_allocations--; -+ kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--; ++ kinstr_ctx->hvirt = hvirt; ++ kinstr_ctx->metadata = metadata; + -+ trace_jit_stats(kctx, reg->jit_bin_id, UINT_MAX); ++ mutex_init(&kinstr_ctx->lock); ++ INIT_LIST_HEAD(&kinstr_ctx->clients); ++ hrtimer_init(&kinstr_ctx->dump_timer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_REL); ++ kinstr_ctx->dump_timer.function = kbasep_kinstr_prfcnt_dump_timer; ++ INIT_WORK(&kinstr_ctx->dump_work, kbasep_kinstr_prfcnt_dump_worker); + -+ kbase_mem_evictable_mark_reclaim(reg->gpu_alloc); ++ *out_kinstr_ctx = kinstr_ctx; ++ return 0; ++} + -+ kbase_gpu_vm_lock(kctx); -+ reg->flags |= KBASE_REG_DONT_NEED; -+ reg->flags &= ~KBASE_REG_ACTIVE_JIT_ALLOC; -+ kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents); -+ kbase_gpu_vm_unlock(kctx); ++void kbase_kinstr_prfcnt_term(struct kbase_kinstr_prfcnt_context *kinstr_ctx) ++{ ++ if (!kinstr_ctx) ++ return; + -+ /* -+ * Add the allocation to the eviction list and the jit pool, after this -+ * point the shrink can reclaim it, or it may be reused. -+ */ -+ mutex_lock(&kctx->jit_evict_lock); ++ /* Non-zero client count implies client leak */ ++ if (WARN_ON(kinstr_ctx->client_count > 0)) { ++ struct kbase_kinstr_prfcnt_client *pos, *n; + -+ /* This allocation can't already be on a list. */ -+ WARN_ON(!list_empty(®->gpu_alloc->evict_node)); -+ list_add(®->gpu_alloc->evict_node, &kctx->evict_list); -+ atomic_add(reg->gpu_alloc->nents, &kctx->evict_nents); ++ list_for_each_entry_safe (pos, n, &kinstr_ctx->clients, node) { ++ list_del(&pos->node); ++ kinstr_ctx->client_count--; ++ kbasep_kinstr_prfcnt_client_destroy(pos); ++ } ++ } + -+ list_move(®->jit_node, &kctx->jit_pool_head); ++ cancel_work_sync(&kinstr_ctx->dump_work); + -+ /* Inactive JIT regions should be freed by the shrinker and not impacted -+ * by page migration. Once freed, they will enter into the page migration -+ * state machine via the mempools. -+ */ -+ if (kbase_page_migration_enabled) -+ kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE); -+ mutex_unlock(&kctx->jit_evict_lock); ++ WARN_ON(kinstr_ctx->client_count > 0); ++ kfree(kinstr_ctx); +} + -+void kbase_jit_backing_lost(struct kbase_va_region *reg) ++void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx) +{ -+ struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); -+ -+ if (WARN_ON(!kctx)) ++ if (WARN_ON(!kinstr_ctx)) + return; + -+ lockdep_assert_held(&kctx->jit_evict_lock); ++ mutex_lock(&kinstr_ctx->lock); + -+ /* -+ * JIT allocations will always be on a list, if the region -+ * is not on a list then it's not a JIT allocation. -+ */ -+ if (list_empty(®->jit_node)) -+ return; ++ if (!WARN_ON(kinstr_ctx->suspend_count == SIZE_MAX)) ++ kinstr_ctx->suspend_count++; + -+ /* -+ * Freeing the allocation requires locks we might not be able -+ * to take now, so move the allocation to the free list and kick -+ * the worker which will do the freeing. -+ */ -+ list_move(®->jit_node, &kctx->jit_destroy_head); ++ mutex_unlock(&kinstr_ctx->lock); + -+ schedule_work(&kctx->jit_work); ++ /* Always sync cancel the timer and then the worker, regardless of the ++ * new suspend count. ++ * ++ * This ensures concurrent calls to kbase_kinstr_prfcnt_suspend() always block ++ * until kinstr_prfcnt is fully suspended. ++ * ++ * The timer is canceled before the worker, as the timer ++ * unconditionally re-enqueues the worker, but the worker checks the ++ * suspend_count that we just incremented before rescheduling the timer. ++ * ++ * Therefore if we cancel the worker first, the timer might re-enqueue ++ * the worker before we cancel the timer, but the opposite is not ++ * possible. ++ */ ++ hrtimer_cancel(&kinstr_ctx->dump_timer); ++ cancel_work_sync(&kinstr_ctx->dump_work); +} + -+bool kbase_jit_evict(struct kbase_context *kctx) ++void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx) +{ -+ struct kbase_va_region *reg = NULL; ++ if (WARN_ON(!kinstr_ctx)) ++ return; + -+ lockdep_assert_held(&kctx->reg_lock); ++ mutex_lock(&kinstr_ctx->lock); + -+ /* Free the oldest allocation from the pool */ -+ mutex_lock(&kctx->jit_evict_lock); -+ if (!list_empty(&kctx->jit_pool_head)) { -+ reg = list_entry(kctx->jit_pool_head.prev, -+ struct kbase_va_region, jit_node); -+ list_del(®->jit_node); -+ list_del_init(®->gpu_alloc->evict_node); -+ } -+ mutex_unlock(&kctx->jit_evict_lock); ++ if (!WARN_ON(kinstr_ctx->suspend_count == 0)) { ++ kinstr_ctx->suspend_count--; + -+ if (reg) { -+ /* -+ * Incrementing the refcount is prevented on JIT regions. -+ * If/when this ever changes we would need to compensate -+ * by implementing "free on putting the last reference", -+ * but only for JIT regions. ++ /* Last resume, so re-enqueue the worker if we have any periodic ++ * clients. + */ -+ WARN_ON(atomic_read(®->no_user_free_count) > 1); -+ kbase_va_region_no_user_free_dec(reg); -+ kbase_mem_free_region(kctx, reg); ++ if (kinstr_ctx->suspend_count == 0) { ++ struct kbase_kinstr_prfcnt_client *pos; ++ bool has_periodic_clients = false; ++ ++ list_for_each_entry (pos, &kinstr_ctx->clients, node) { ++ if (pos->dump_interval_ns != 0) { ++ has_periodic_clients = true; ++ break; ++ } ++ } ++ ++ if (has_periodic_clients) ++ kbase_hwcnt_virtualizer_queue_work( ++ kinstr_ctx->hvirt, ++ &kinstr_ctx->dump_work); ++ } + } + -+ return (reg != NULL); ++ mutex_unlock(&kinstr_ctx->lock); +} + -+void kbase_jit_term(struct kbase_context *kctx) ++static int kbasep_kinstr_prfcnt_sample_array_alloc(struct kbase_kinstr_prfcnt_client *cli, ++ const struct kbase_hwcnt_metadata *metadata) +{ -+ struct kbase_va_region *walker; -+ -+ /* Free all allocations for this context */ ++ struct kbase_kinstr_prfcnt_sample_array *sample_arr = &cli->sample_arr; ++ struct kbase_kinstr_prfcnt_sample *samples; ++ size_t sample_idx; ++ size_t dump_buf_bytes; ++ size_t clk_cnt_buf_bytes; ++ size_t sample_meta_bytes; ++ size_t md_count; ++ size_t sample_size; ++ size_t buffer_count = cli->config.buffer_count; + -+ kbase_gpu_vm_lock(kctx); -+ mutex_lock(&kctx->jit_evict_lock); -+ /* Free all allocations from the pool */ -+ while (!list_empty(&kctx->jit_pool_head)) { -+ walker = list_first_entry(&kctx->jit_pool_head, -+ struct kbase_va_region, jit_node); -+ list_del(&walker->jit_node); -+ list_del_init(&walker->gpu_alloc->evict_node); -+ mutex_unlock(&kctx->jit_evict_lock); -+ /* -+ * Incrementing the refcount is prevented on JIT regions. -+ * If/when this ever changes we would need to compensate -+ * by implementing "free on putting the last reference", -+ * but only for JIT regions. -+ */ -+ WARN_ON(atomic_read(&walker->no_user_free_count) > 1); -+ kbase_va_region_no_user_free_dec(walker); -+ kbase_mem_free_region(kctx, walker); -+ mutex_lock(&kctx->jit_evict_lock); -+ } ++ if (!metadata || !sample_arr) ++ return -EINVAL; + -+ /* Free all allocations from active list */ -+ while (!list_empty(&kctx->jit_active_head)) { -+ walker = list_first_entry(&kctx->jit_active_head, -+ struct kbase_va_region, jit_node); -+ list_del(&walker->jit_node); -+ list_del_init(&walker->gpu_alloc->evict_node); -+ mutex_unlock(&kctx->jit_evict_lock); -+ /* -+ * Incrementing the refcount is prevented on JIT regions. -+ * If/when this ever changes we would need to compensate -+ * by implementing "free on putting the last reference", -+ * but only for JIT regions. -+ */ -+ WARN_ON(atomic_read(&walker->no_user_free_count) > 1); -+ kbase_va_region_no_user_free_dec(walker); -+ kbase_mem_free_region(kctx, walker); -+ mutex_lock(&kctx->jit_evict_lock); -+ } -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ WARN_ON(kctx->jit_phys_pages_to_be_allocated); -+#endif -+ mutex_unlock(&kctx->jit_evict_lock); -+ kbase_gpu_vm_unlock(kctx); ++ md_count = kbasep_kinstr_prfcnt_get_sample_md_count(metadata, &cli->enable_map); ++ sample_meta_bytes = sizeof(struct prfcnt_metadata) * md_count; ++ dump_buf_bytes = metadata->dump_buf_bytes; ++ clk_cnt_buf_bytes = ++ sizeof(*samples->dump_buf.clk_cnt_buf) * metadata->clk_cnt; ++ sample_size = sample_meta_bytes + dump_buf_bytes + clk_cnt_buf_bytes; + -+ /* -+ * Flush the freeing of allocations whose backing has been freed -+ * (i.e. everything in jit_destroy_head). -+ */ -+ cancel_work_sync(&kctx->jit_work); -+} ++ samples = kmalloc_array(buffer_count, sizeof(*samples), GFP_KERNEL); + -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, -+ struct kbase_va_region *reg, unsigned int flags) -+{ -+ /* Offset to the location used for a JIT report within the GPU memory -+ * -+ * This constants only used for this debugging function - not useful -+ * anywhere else in kbase -+ */ -+ const u64 jit_report_gpu_mem_offset = sizeof(u64)*2; ++ if (!samples) ++ return -ENOMEM; + -+ u64 addr_start; -+ struct kbase_vmap_struct mapping; -+ u64 *ptr; ++ sample_arr->user_buf = vmalloc_user(sample_size * buffer_count); + -+ if (reg->heap_info_gpu_addr == 0ull) -+ goto out; ++ if (!sample_arr->user_buf) { ++ kfree(samples); ++ return -ENOMEM; ++ } + -+ /* Nothing else to trace in the case the memory just contains the -+ * size. Other tracepoints already record the relevant area of memory. -+ */ -+ if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) -+ goto out; ++ sample_arr->sample_count = buffer_count; ++ sample_arr->samples = samples; + -+ addr_start = reg->heap_info_gpu_addr - jit_report_gpu_mem_offset; ++ for (sample_idx = 0; sample_idx < buffer_count; sample_idx++) { ++ const size_t sample_meta_offset = sample_size * sample_idx; ++ const size_t dump_buf_offset = ++ sample_meta_offset + sample_meta_bytes; ++ const size_t clk_cnt_buf_offset = ++ dump_buf_offset + dump_buf_bytes; + -+ ptr = kbase_vmap_prot(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE, -+ KBASE_REG_CPU_RD, &mapping); -+ if (!ptr) { -+ dev_warn(kctx->kbdev->dev, -+ "%s: JIT start=0x%llx unable to map memory near end pointer %llx\n", -+ __func__, reg->start_pfn << PAGE_SHIFT, -+ addr_start); -+ goto out; ++ /* Internal layout in a sample buffer: [sample metadata, dump_buf, clk_cnt_buf]. */ ++ samples[sample_idx].dump_buf.metadata = metadata; ++ samples[sample_idx].sample_meta = ++ (struct prfcnt_metadata *)(sample_arr->user_buf + sample_meta_offset); ++ samples[sample_idx].dump_buf.dump_buf = ++ (u64 *)(sample_arr->user_buf + dump_buf_offset); ++ samples[sample_idx].dump_buf.clk_cnt_buf = ++ (u64 *)(sample_arr->user_buf + clk_cnt_buf_offset); + } + -+ trace_mali_jit_report_gpu_mem(addr_start, reg->start_pfn << PAGE_SHIFT, -+ ptr, flags); -+ -+ kbase_vunmap(kctx, &mapping); -+out: -+ return; ++ return 0; +} -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+void kbase_jit_report_update_pressure(struct kbase_context *kctx, -+ struct kbase_va_region *reg, u64 new_used_pages, -+ unsigned int flags) ++static bool prfcnt_mode_supported(u8 mode) +{ -+ u64 diff; ++ return (mode == PRFCNT_MODE_MANUAL) || (mode == PRFCNT_MODE_PERIODIC); ++} + -+#if !MALI_USE_CSF -+ lockdep_assert_held(&kctx->jctx.lock); -+#endif /* !MALI_USE_CSF */ ++static void ++kbasep_kinstr_prfcnt_block_enable_to_physical(uint32_t *phys_em, ++ const uint64_t *enable_mask) ++{ ++ *phys_em |= kbase_hwcnt_backend_gpu_block_map_to_physical( ++ enable_mask[0], enable_mask[1]); ++} + -+ trace_mali_jit_report_pressure(reg, new_used_pages, -+ kctx->jit_current_phys_pressure + new_used_pages - -+ reg->used_pages, -+ flags); ++/** ++ * kbasep_kinstr_prfcnt_parse_request_enable - Parse an enable request ++ * @req_enable: Performance counters enable request to parse. ++ * @config: Client object the session configuration should be written to. ++ * ++ * This function parses a performance counters enable request. ++ * This type of request specifies a bitmask of HW counters to enable ++ * for one performance counters block type. In addition to that, ++ * a performance counters enable request may also set "global" ++ * configuration properties that affect the whole session, like the ++ * performance counters set, which shall be compatible with the same value ++ * set by other performance request items. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static int kbasep_kinstr_prfcnt_parse_request_enable( ++ const struct prfcnt_request_enable *req_enable, ++ struct kbase_kinstr_prfcnt_client_config *config) ++{ ++ int err = 0; ++ u8 req_set = KBASE_HWCNT_SET_UNDEFINED, default_set; + -+ if (WARN_ON(new_used_pages > reg->nr_pages)) -+ return; ++ switch (req_enable->set) { ++ case PRFCNT_SET_PRIMARY: ++ req_set = KBASE_HWCNT_SET_PRIMARY; ++ break; ++ case PRFCNT_SET_SECONDARY: ++ req_set = KBASE_HWCNT_SET_SECONDARY; ++ break; ++ case PRFCNT_SET_TERTIARY: ++ req_set = KBASE_HWCNT_SET_TERTIARY; ++ break; ++ default: ++ err = -EINVAL; ++ break; ++ } + -+ if (reg->used_pages > new_used_pages) { -+ /* We reduced the number of used pages */ -+ diff = reg->used_pages - new_used_pages; ++ /* The performance counter set is a "global" property that affects ++ * the whole session. Either this is the first request that sets ++ * the value, or it shall be identical to all previous requests. ++ */ ++ if (!err) { ++ if (config->counter_set == KBASE_HWCNT_SET_UNDEFINED) ++ config->counter_set = req_set; ++ else if (config->counter_set != req_set) ++ err = -EINVAL; ++ } + -+ if (!WARN_ON(diff > kctx->jit_current_phys_pressure)) -+ kctx->jit_current_phys_pressure -= diff; ++ /* Temporarily, the requested set cannot be different from the default ++ * set because it's the only one to be supported. This will change in ++ * the future. ++ */ ++#if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY) ++ default_set = KBASE_HWCNT_SET_SECONDARY; ++#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) ++ default_set = KBASE_HWCNT_SET_TERTIARY; ++#else ++ /* Default to primary */ ++ default_set = KBASE_HWCNT_SET_PRIMARY; ++#endif + -+ reg->used_pages = new_used_pages; -+ } else { -+ /* We increased the number of used pages */ -+ diff = new_used_pages - reg->used_pages; ++ if (req_set != default_set) ++ err = -EINVAL; + -+ if (!WARN_ON(diff > U64_MAX - kctx->jit_current_phys_pressure)) -+ kctx->jit_current_phys_pressure += diff; ++ if (err < 0) ++ return err; + -+ reg->used_pages = new_used_pages; ++ /* Enable the performance counters based on the bitmask provided ++ * by the user space client. ++ * It is possible to receive multiple requests for the same counter ++ * block, in which case the bitmask will be a logical OR of all the ++ * bitmasks given by the client. ++ */ ++ switch (req_enable->block_type) { ++ case PRFCNT_BLOCK_TYPE_FE: ++ kbasep_kinstr_prfcnt_block_enable_to_physical( ++ &config->phys_em.fe_bm, req_enable->enable_mask); ++ break; ++ case PRFCNT_BLOCK_TYPE_TILER: ++ kbasep_kinstr_prfcnt_block_enable_to_physical( ++ &config->phys_em.tiler_bm, req_enable->enable_mask); ++ break; ++ case PRFCNT_BLOCK_TYPE_MEMORY: ++ kbasep_kinstr_prfcnt_block_enable_to_physical( ++ &config->phys_em.mmu_l2_bm, req_enable->enable_mask); ++ break; ++ case PRFCNT_BLOCK_TYPE_SHADER_CORE: ++ kbasep_kinstr_prfcnt_block_enable_to_physical( ++ &config->phys_em.shader_bm, req_enable->enable_mask); ++ break; ++ default: ++ err = -EINVAL; ++ break; + } + ++ return err; +} -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+#if MALI_USE_CSF -+static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc) ++/** ++ * kbasep_kinstr_prfcnt_parse_request_scope - Parse a scope request ++ * @req_scope: Performance counters scope request to parse. ++ * @config: Client object the session configuration should be written to. ++ * ++ * This function parses a performance counters scope request. ++ * There are only 2 acceptable outcomes: either the client leaves the scope ++ * as undefined, or all the scope requests are set to the same value. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static int kbasep_kinstr_prfcnt_parse_request_scope( ++ const struct prfcnt_request_scope *req_scope, ++ struct kbase_kinstr_prfcnt_client_config *config) +{ -+ /* In CSF builds, we keep pages pinned until the last reference is -+ * released on the alloc. A refcount of 0 also means we can be sure -+ * that all CPU mappings have been closed on this alloc, and no more -+ * mappings of it will be created. -+ * -+ * Further, the WARN() below captures the restriction that this -+ * function will not handle anything other than the alloc termination -+ * path, because the caller of kbase_mem_phy_alloc_put() is not -+ * required to hold the kctx's reg_lock, and so we could not handle -+ * removing an existing CPU mapping here. -+ * -+ * Refer to this function's kernel-doc comments for alternatives for -+ * unpinning a User buffer. -+ */ ++ int err = 0; + -+ if (alloc->nents && !WARN(kref_read(&alloc->kref) != 0, -+ "must only be called on terminating an allocation")) { -+ struct page **pages = alloc->imported.user_buf.pages; -+ long i; ++ if (config->scope == PRFCNT_SCOPE_RESERVED) ++ config->scope = req_scope->scope; ++ else if (config->scope != req_scope->scope) ++ err = -EINVAL; + -+ WARN_ON(alloc->nents != alloc->imported.user_buf.nr_pages); ++ return err; ++} + -+ for (i = 0; i < alloc->nents; i++) -+ kbase_unpin_user_buf_page(pages[i]); ++/** ++ * kbasep_kinstr_prfcnt_parse_setup - Parse session setup ++ * @kinstr_ctx: Pointer to the kinstr_prfcnt context. ++ * @setup: Session setup information to parse. ++ * @config: Client object the session configuration should be written to. ++ * @req_arr: Pointer to array of request items for client session. ++ * ++ * This function parses the list of "request" items sent by the user space ++ * client, and writes the configuration for the new client to be created ++ * for the session. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static int kbasep_kinstr_prfcnt_parse_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx, ++ union kbase_ioctl_kinstr_prfcnt_setup *setup, ++ struct kbase_kinstr_prfcnt_client_config *config, ++ struct prfcnt_request_item *req_arr) ++{ ++ uint32_t i; ++ unsigned int item_count = setup->in.request_item_count; ++ int err = 0; + -+ alloc->nents = 0; ++ if (req_arr[item_count - 1].hdr.item_type != FLEX_LIST_TYPE_NONE || ++ req_arr[item_count - 1].hdr.item_version != 0) { ++ return -EINVAL; + } -+} -+#endif + -+int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, -+ struct kbase_va_region *reg) -+{ -+ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; -+ struct page **pages = alloc->imported.user_buf.pages; -+ unsigned long address = alloc->imported.user_buf.address; -+ struct mm_struct *mm = alloc->imported.user_buf.mm; -+ long pinned_pages; -+ long i; -+ int write; ++ /* The session configuration can only feature one value for some ++ * properties (like capture mode, block counter set and scope), but the ++ * client may potential issue multiple requests and try to set more than ++ * one value for those properties. While issuing multiple requests for the ++ * same property is allowed by the protocol, asking for different values ++ * is illegal. Leaving these properties as undefined is illegal, too. ++ */ ++ config->prfcnt_mode = PRFCNT_MODE_RESERVED; ++ config->counter_set = KBASE_HWCNT_SET_UNDEFINED; ++ config->scope = PRFCNT_SCOPE_RESERVED; + -+ lockdep_assert_held(&kctx->reg_lock); ++ for (i = 0; i < item_count - 1; i++) { ++ if (req_arr[i].hdr.item_version > PRFCNT_READER_API_VERSION) { ++ err = -EINVAL; ++ break; ++ } + -+ if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF)) -+ return -EINVAL; ++ switch (req_arr[i].hdr.item_type) { ++ /* Capture mode is initialized as undefined. ++ * The first request of this type sets the capture mode. ++ * The protocol allows the client to send redundant requests, ++ * but only if they replicate the same value that has already ++ * been set by the first request. ++ */ ++ case PRFCNT_REQUEST_TYPE_MODE: ++ if (!prfcnt_mode_supported(req_arr[i].u.req_mode.mode)) ++ err = -EINVAL; ++ else if (config->prfcnt_mode == PRFCNT_MODE_RESERVED) ++ config->prfcnt_mode = ++ req_arr[i].u.req_mode.mode; ++ else if (req_arr[i].u.req_mode.mode != ++ config->prfcnt_mode) ++ err = -EINVAL; + -+ if (alloc->nents) { -+ if (WARN_ON(alloc->nents != alloc->imported.user_buf.nr_pages)) -+ return -EINVAL; -+ else -+ return 0; -+ } ++ if (err < 0) ++ break; + -+ if (WARN_ON(reg->gpu_alloc->imported.user_buf.mm != current->mm)) -+ return -EINVAL; ++ if (config->prfcnt_mode == PRFCNT_MODE_PERIODIC) { ++ config->period_ns = ++ req_arr[i] ++ .u.req_mode.mode_config.periodic ++ .period_ns; + -+ write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); ++ if ((config->period_ns != 0) && ++ (config->period_ns < ++ DUMP_INTERVAL_MIN_NS)) { ++ config->period_ns = ++ DUMP_INTERVAL_MIN_NS; ++ } + -+#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE -+ pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, -+ write ? FOLL_WRITE : 0, pages, NULL); -+#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE -+ pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, -+ write ? FOLL_WRITE : 0, pages, NULL, NULL); -+#else -+ pinned_pages = pin_user_pages_remote(mm, address, alloc->imported.user_buf.nr_pages, -+ write ? FOLL_WRITE : 0, pages, NULL); -+#endif ++ if (config->period_ns == 0) ++ err = -EINVAL; ++ } ++ break; + -+ if (pinned_pages <= 0) -+ return pinned_pages; ++ case PRFCNT_REQUEST_TYPE_ENABLE: ++ err = kbasep_kinstr_prfcnt_parse_request_enable( ++ &req_arr[i].u.req_enable, config); ++ break; + -+ if (pinned_pages != alloc->imported.user_buf.nr_pages) { -+ /* Above code already ensures there will not have been a CPU -+ * mapping by ensuring alloc->nents is 0 -+ */ -+ for (i = 0; i < pinned_pages; i++) -+ kbase_unpin_user_buf_page(pages[i]); -+ return -ENOMEM; ++ case PRFCNT_REQUEST_TYPE_SCOPE: ++ err = kbasep_kinstr_prfcnt_parse_request_scope( ++ &req_arr[i].u.req_scope, config); ++ break; ++ ++ default: ++ err = -EINVAL; ++ break; ++ } ++ ++ if (err < 0) ++ break; + } + -+ alloc->nents = pinned_pages; ++ if (!err) { ++ /* Verify that properties (like capture mode and block counter ++ * set) have been defined by the user space client. ++ */ ++ if (config->prfcnt_mode == PRFCNT_MODE_RESERVED) ++ err = -EINVAL; + -+ return 0; ++ if (config->counter_set == KBASE_HWCNT_SET_UNDEFINED) ++ err = -EINVAL; ++ } ++ ++ return err; +} + -+static int kbase_jd_user_buf_map(struct kbase_context *kctx, -+ struct kbase_va_region *reg) ++int kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context *kinstr_ctx, ++ union kbase_ioctl_kinstr_prfcnt_setup *setup, ++ struct kbase_kinstr_prfcnt_client **out_vcli, ++ struct prfcnt_request_item *req_arr) +{ + int err; -+ long pinned_pages = 0; -+ struct kbase_mem_phy_alloc *alloc; -+ struct page **pages; -+ struct tagged_addr *pa; -+ long i, dma_mapped_pages; -+ struct device *dev; -+ unsigned long gwt_mask = ~0; -+ /* Calls to this function are inherently asynchronous, with respect to -+ * MMU operations. -+ */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; -+ -+ lockdep_assert_held(&kctx->reg_lock); -+ -+ err = kbase_jd_user_buf_pin_pages(kctx, reg); ++ struct kbase_kinstr_prfcnt_client *cli; ++ enum kbase_kinstr_prfcnt_client_init_state init_state; + -+ if (err) -+ return err; ++ if (WARN_ON(!kinstr_ctx)) ++ return -EINVAL; + -+ alloc = reg->gpu_alloc; -+ pa = kbase_get_gpu_phy_pages(reg); -+ pinned_pages = alloc->nents; -+ pages = alloc->imported.user_buf.pages; -+ dev = kctx->kbdev->dev; ++ if (WARN_ON(!setup)) ++ return -EINVAL; + -+ /* Manual CPU cache synchronization. -+ * -+ * The driver disables automatic CPU cache synchronization because the -+ * memory pages that enclose the imported region may also contain -+ * sub-regions which are not imported and that are allocated and used -+ * by the user process. This may be the case of memory at the beginning -+ * of the first page and at the end of the last page. Automatic CPU cache -+ * synchronization would force some operations on those memory allocations, -+ * unbeknown to the user process: in particular, a CPU cache invalidate -+ * upon unmapping would destroy the content of dirty CPU caches and cause -+ * the user process to lose CPU writes to the non-imported sub-regions. -+ * -+ * When the GPU claims ownership of the imported memory buffer, it shall -+ * commit CPU writes for the whole of all pages that enclose the imported -+ * region, otherwise the initial content of memory would be wrong. -+ */ -+ for (i = 0; i < pinned_pages; i++) { -+ dma_addr_t dma_addr; -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); -+#else -+ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL, -+ DMA_ATTR_SKIP_CPU_SYNC); -+#endif -+ err = dma_mapping_error(dev, dma_addr); -+ if (err) -+ goto unwind; ++ if (WARN_ON(!req_arr)) ++ return -EINVAL; + -+ alloc->imported.user_buf.dma_addrs[i] = dma_addr; -+ pa[i] = as_tagged(page_to_phys(pages[i])); ++ cli = kzalloc(sizeof(*cli), GFP_KERNEL); + -+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); -+ } ++ if (!cli) ++ return -ENOMEM; + -+#ifdef CONFIG_MALI_CINSTR_GWT -+ if (kctx->gwt_enabled) -+ gwt_mask = ~KBASE_REG_GPU_WR; -+#endif ++ for (init_state = KINSTR_PRFCNT_UNINITIALISED; init_state < KINSTR_PRFCNT_INITIALISED; ++ init_state++) { ++ err = 0; ++ switch (init_state) { ++ case KINSTR_PRFCNT_PARSE_SETUP: ++ cli->kinstr_ctx = kinstr_ctx; ++ err = kbasep_kinstr_prfcnt_parse_setup(kinstr_ctx, setup, &cli->config, ++ req_arr); + -+ err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa, -+ kbase_reg_current_backed_size(reg), -+ reg->flags & gwt_mask, kctx->as_nr, alloc->group_id, -+ mmu_sync_info, NULL); -+ if (err == 0) -+ return 0; ++ break; + -+ /* fall down */ -+unwind: -+ alloc->nents = 0; -+ dma_mapped_pages = i; -+ /* Run the unmap loop in the same order as map loop, and perform again -+ * CPU cache synchronization to re-write the content of dirty CPU caches -+ * to memory. This is precautionary measure in case a GPU job has taken -+ * advantage of a partially GPU-mapped range to write and corrupt the -+ * content of memory, either inside or outside the imported region. -+ * -+ * Notice that this error recovery path doesn't try to be optimal and just -+ * flushes the entire page range. -+ */ -+ for (i = 0; i < dma_mapped_pages; i++) { -+ dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; ++ case KINSTR_PRFCNT_ENABLE_MAP: ++ cli->config.buffer_count = MAX_BUFFER_COUNT; ++ cli->dump_interval_ns = cli->config.period_ns; ++ cli->next_dump_time_ns = 0; ++ cli->active = false; ++ atomic_set(&cli->write_idx, 0); ++ atomic_set(&cli->read_idx, 0); ++ atomic_set(&cli->fetch_idx, 0); + -+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); -+#else -+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, -+ DMA_ATTR_SKIP_CPU_SYNC); -+#endif -+ } ++ err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata, &cli->enable_map); ++ break; + -+ /* The user buffer could already have been previously pinned before -+ * entering this function, and hence there could potentially be CPU -+ * mappings of it -+ */ -+ kbase_mem_shrink_cpu_mapping(kctx, reg, 0, pinned_pages); ++ case KINSTR_PRFCNT_DUMP_BUFFER: ++ kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, ++ &cli->config.phys_em); + -+ for (i = 0; i < pinned_pages; i++) { -+ kbase_unpin_user_buf_page(pages[i]); -+ pages[i] = NULL; -+ } ++ cli->sample_count = cli->config.buffer_count; ++ cli->sample_size = ++ kbasep_kinstr_prfcnt_get_sample_size(cli, kinstr_ctx->metadata); + -+ return err; -+} ++ /* Use virtualizer's metadata to alloc tmp buffer which interacts with ++ * the HWC virtualizer. ++ */ ++ err = kbase_hwcnt_dump_buffer_alloc(kinstr_ctx->metadata, &cli->tmp_buf); ++ break; + -+/* This function would also perform the work of unpinning pages on Job Manager -+ * GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT -+ * have a corresponding call to kbase_jd_user_buf_unpin_pages(). -+ */ -+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, -+ struct kbase_va_region *reg, bool writeable) -+{ -+ long i; -+ struct page **pages; -+ unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK; -+ unsigned long remaining_size = alloc->imported.user_buf.size; ++ case KINSTR_PRFCNT_SAMPLE_ARRAY: ++ /* Disable clock map in setup, and enable clock map when start */ ++ cli->enable_map.clk_enable_map = 0; + -+ lockdep_assert_held(&kctx->reg_lock); ++ /* Use metadata from virtualizer to allocate dump buffers if ++ * kinstr_prfcnt doesn't have the truncated metadata. ++ */ ++ err = kbasep_kinstr_prfcnt_sample_array_alloc(cli, kinstr_ctx->metadata); + -+ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); -+ pages = alloc->imported.user_buf.pages; ++ break; + -+#if !MALI_USE_CSF -+ kbase_mem_shrink_cpu_mapping(kctx, reg, 0, alloc->nents); -+#else -+ CSTD_UNUSED(reg); -+#endif ++ case KINSTR_PRFCNT_VIRTUALIZER_CLIENT: ++ /* Set enable map to be 0 to prevent virtualizer to init and kick the ++ * backend to count. ++ */ ++ kbase_hwcnt_gpu_enable_map_from_physical( ++ &cli->enable_map, &(struct kbase_hwcnt_physical_enable_map){ 0 }); + -+ for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { -+ unsigned long imported_size = MIN(remaining_size, PAGE_SIZE - offset_within_page); -+ /* Notice: this is a temporary variable that is used for DMA sync -+ * operations, and that could be incremented by an offset if the -+ * current page contains both imported and non-imported memory -+ * sub-regions. -+ * -+ * It is valid to add an offset to this value, because the offset -+ * is always kept within the physically contiguous dma-mapped range -+ * and there's no need to translate to physical address to offset it. -+ * -+ * This variable is not going to be used for the actual DMA unmap -+ * operation, that shall always use the original DMA address of the -+ * whole memory page. -+ */ -+ dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; ++ err = kbase_hwcnt_virtualizer_client_create(kinstr_ctx->hvirt, ++ &cli->enable_map, &cli->hvcli); ++ break; + -+ /* Manual CPU cache synchronization. -+ * -+ * When the GPU returns ownership of the buffer to the CPU, the driver -+ * needs to treat imported and non-imported memory differently. -+ * -+ * The first case to consider is non-imported sub-regions at the -+ * beginning of the first page and at the end of last page. For these -+ * sub-regions: CPU cache shall be committed with a clean+invalidate, -+ * in order to keep the last CPU write. -+ * -+ * Imported region prefers the opposite treatment: this memory has been -+ * legitimately mapped and used by the GPU, hence GPU writes shall be -+ * committed to memory, while CPU cache shall be invalidated to make -+ * sure that CPU reads the correct memory content. -+ * -+ * The following diagram shows the expect value of the variables -+ * used in this loop in the corner case of an imported region encloed -+ * by a single memory page: -+ * -+ * page boundary ->|---------- | <- dma_addr (initial value) -+ * | | -+ * | - - - - - | <- offset_within_page -+ * |XXXXXXXXXXX|\ -+ * |XXXXXXXXXXX| \ -+ * |XXXXXXXXXXX| }- imported_size -+ * |XXXXXXXXXXX| / -+ * |XXXXXXXXXXX|/ -+ * | - - - - - | <- offset_within_page + imported_size -+ * | |\ -+ * | | }- PAGE_SIZE - imported_size - offset_within_page -+ * | |/ -+ * page boundary ->|-----------| -+ * -+ * If the imported region is enclosed by more than one page, then -+ * offset_within_page = 0 for any page after the first. -+ */ ++ case KINSTR_PRFCNT_WAITQ_MUTEX: ++ init_waitqueue_head(&cli->waitq); ++ mutex_init(&cli->cmd_sync_lock); ++ break; + -+ /* Only for first page: handle non-imported range at the beginning. */ -+ if (offset_within_page > 0) { -+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page, -+ DMA_BIDIRECTIONAL); -+ dma_addr += offset_within_page; ++ case KINSTR_PRFCNT_INITIALISED: ++ /* This shouldn't be reached */ ++ break; + } + -+ /* For every page: handle imported range. */ -+ if (imported_size > 0) -+ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size, -+ DMA_BIDIRECTIONAL); -+ -+ /* Only for last page (that may coincide with first page): -+ * handle non-imported range at the end. -+ */ -+ if ((imported_size + offset_within_page) < PAGE_SIZE) { -+ dma_addr += imported_size; -+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, -+ PAGE_SIZE - imported_size - offset_within_page, -+ DMA_BIDIRECTIONAL); ++ if (err < 0) { ++ kbasep_kinstr_prfcnt_client_destroy_partial(cli, init_state); ++ return err; + } ++ } ++ *out_vcli = cli; + -+ /* Notice: use the original DMA address to unmap the whole memory page. */ -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE, -+ DMA_BIDIRECTIONAL); -+#else -+ dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], -+ PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); -+#endif -+ if (writeable) -+ set_page_dirty_lock(pages[i]); -+#if !MALI_USE_CSF -+ kbase_unpin_user_buf_page(pages[i]); -+ pages[i] = NULL; -+#endif ++ return 0; + -+ remaining_size -= imported_size; -+ offset_within_page = 0; -+ } -+#if !MALI_USE_CSF -+ alloc->nents = 0; -+#endif +} + -+int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, -+ void *src_page, size_t *to_copy, unsigned int nr_pages, -+ unsigned int *target_page_nr, size_t offset) ++static size_t kbasep_kinstr_prfcnt_get_block_info_count( ++ const struct kbase_hwcnt_metadata *metadata) +{ -+ void *target_page = kmap(dest_pages[*target_page_nr]); -+ size_t chunk = PAGE_SIZE-offset; -+ -+ if (!target_page) { -+ pr_err("%s: kmap failure", __func__); -+ return -ENOMEM; -+ } -+ -+ chunk = min(chunk, *to_copy); -+ -+ memcpy(target_page + offset, src_page, chunk); -+ *to_copy -= chunk; -+ -+ kunmap(dest_pages[*target_page_nr]); ++ size_t grp, blk; ++ size_t block_info_count = 0; + -+ *target_page_nr += 1; -+ if (*target_page_nr >= nr_pages || *to_copy == 0) ++ if (!metadata) + return 0; + -+ target_page = kmap(dest_pages[*target_page_nr]); -+ if (!target_page) { -+ pr_err("%s: kmap failure", __func__); -+ return -ENOMEM; ++ for (grp = 0; grp < kbase_hwcnt_metadata_group_count(metadata); grp++) { ++ for (blk = 0; blk < kbase_hwcnt_metadata_block_count(metadata, grp); blk++) { ++ if (!kbase_kinstr_is_block_type_reserved(metadata, grp, blk)) ++ block_info_count++; ++ } + } + -+ KBASE_DEBUG_ASSERT(target_page); ++ return block_info_count; ++} + -+ chunk = min(offset, *to_copy); -+ memcpy(target_page, src_page + PAGE_SIZE-offset, chunk); -+ *to_copy -= chunk; ++static void kbasep_kinstr_prfcnt_get_request_info_list( ++ struct prfcnt_enum_item *item_arr, size_t *arr_idx) ++{ ++ memcpy(&item_arr[*arr_idx], kinstr_prfcnt_supported_requests, ++ sizeof(kinstr_prfcnt_supported_requests)); ++ *arr_idx += ARRAY_SIZE(kinstr_prfcnt_supported_requests); ++} + -+ kunmap(dest_pages[*target_page_nr]); ++static void kbasep_kinstr_prfcnt_get_sample_info_item(const struct kbase_hwcnt_metadata *metadata, ++ struct prfcnt_enum_item *item_arr, ++ size_t *arr_idx) ++{ ++ struct prfcnt_enum_item sample_info = { ++ .hdr = { ++ .item_type = PRFCNT_ENUM_TYPE_SAMPLE_INFO, ++ .item_version = PRFCNT_READER_API_VERSION, ++ }, ++ .u.sample_info = { ++ .num_clock_domains = metadata->clk_cnt, ++ }, ++ }; + -+ return 0; ++ item_arr[*arr_idx] = sample_info; ++ *arr_idx += 1; +} + -+int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg, -+ struct mm_struct *locked_mm) ++int kbasep_kinstr_prfcnt_get_block_info_list(const struct kbase_hwcnt_metadata *metadata, ++ size_t block_set, struct prfcnt_enum_item *item_arr, ++ size_t *arr_idx) +{ -+ int err = 0; -+ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; ++ size_t grp, blk; + -+ lockdep_assert_held(&kctx->reg_lock); ++ if (!metadata || !item_arr || !arr_idx) ++ return -EINVAL; + -+ /* decide what needs to happen for this resource */ -+ switch (reg->gpu_alloc->type) { -+ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { -+ if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) && -+ (!reg->gpu_alloc->nents)) -+ return -EINVAL; ++ for (grp = 0; grp < kbase_hwcnt_metadata_group_count(metadata); grp++) { ++ for (blk = 0; blk < kbase_hwcnt_metadata_block_count(metadata, grp); blk++) { ++ size_t blk_inst; ++ size_t unused_blk_inst_count = 0; ++ size_t blk_inst_count = ++ kbase_hwcnt_metadata_block_instance_count(metadata, grp, blk); ++ enum prfcnt_block_type block_type = ++ kbase_hwcnt_metadata_block_type_to_prfcnt_block_type( ++ kbase_hwcnt_metadata_block_type(metadata, grp, blk)); + -+ reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; -+ if (reg->gpu_alloc->imported.user_buf -+ .current_mapping_usage_count == 1) { -+ err = kbase_jd_user_buf_map(kctx, reg); -+ if (err) { -+ reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; -+ return err; ++ if (block_type == PRFCNT_BLOCK_TYPE_RESERVED) ++ continue; ++ ++ /* Count number of unused blocks to updated number of instances */ ++ for (blk_inst = 0; blk_inst < blk_inst_count; blk_inst++) { ++ if (!kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, ++ blk_inst)) ++ unused_blk_inst_count++; + } ++ ++ item_arr[(*arr_idx)++] = (struct prfcnt_enum_item){ ++ .hdr = { ++ .item_type = PRFCNT_ENUM_TYPE_BLOCK, ++ .item_version = PRFCNT_READER_API_VERSION, ++ }, ++ .u.block_counter = { ++ .set = block_set, ++ .block_type = block_type, ++ .num_instances = blk_inst_count - unused_blk_inst_count, ++ .num_values = kbase_hwcnt_metadata_block_values_count( ++ metadata, grp, blk), ++ /* The bitmask of available counters should be dynamic. ++ * Temporarily, it is set to U64_MAX, waiting for the ++ * required functionality to be available in the future. ++ */ ++ .counter_mask = {U64_MAX, U64_MAX}, ++ }, ++ }; + } + } -+ break; -+ case KBASE_MEM_TYPE_IMPORTED_UMM: { -+ err = kbase_mem_umm_map(kctx, reg); -+ if (err) -+ return err; -+ break; -+ } -+ default: -+ dev_dbg(kctx->kbdev->dev, -+ "Invalid external resource GPU allocation type (%x) on mapping", -+ alloc->type); -+ return -EINVAL; -+ } + -+ kbase_va_region_alloc_get(kctx, reg); -+ kbase_mem_phy_alloc_get(alloc); -+ return err; ++ return 0; +} + -+void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg) ++static int kbasep_kinstr_prfcnt_enum_info_count( ++ struct kbase_kinstr_prfcnt_context *kinstr_ctx, ++ struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info) +{ -+ /* gpu_alloc was used in kbase_map_external_resources, so we need to use it for the -+ * unmapping operation. -+ */ -+ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; -+ -+ lockdep_assert_held(&kctx->reg_lock); ++ uint32_t count = 0; ++ size_t block_info_count = 0; ++ const struct kbase_hwcnt_metadata *metadata; + -+ switch (alloc->type) { -+ case KBASE_MEM_TYPE_IMPORTED_UMM: { -+ kbase_mem_umm_unmap(kctx, reg, alloc); -+ } -+ break; -+ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { -+ alloc->imported.user_buf.current_mapping_usage_count--; ++ count = ARRAY_SIZE(kinstr_prfcnt_supported_requests); ++ metadata = kbase_hwcnt_virtualizer_metadata(kinstr_ctx->hvirt); + -+ if (alloc->imported.user_buf.current_mapping_usage_count == 0) { -+ bool writeable = true; ++ /* Add the sample_info (clock domain) descriptive item */ ++ count++; + -+ if (!kbase_is_region_invalid_or_free(reg)) { -+ kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, -+ alloc->pages, -+ kbase_reg_current_backed_size(reg), -+ kbase_reg_current_backed_size(reg), -+ kctx->as_nr, true); -+ } ++ /* Other blocks based on meta data */ ++ block_info_count = kbasep_kinstr_prfcnt_get_block_info_count(metadata); ++ count += block_info_count; + -+ if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0) -+ writeable = false; ++ /* Reserve one for the last sentinel item. */ ++ count++; ++ enum_info->info_item_count = count; ++ enum_info->info_item_size = sizeof(struct prfcnt_enum_item); ++ kinstr_ctx->info_item_count = count; + -+ kbase_jd_user_buf_unmap(kctx, alloc, reg, writeable); -+ } -+ } -+ break; -+ default: -+ WARN(1, "Invalid external resource GPU allocation type (%x) on unmapping", -+ alloc->type); -+ return; -+ } -+ kbase_mem_phy_alloc_put(alloc); -+ kbase_va_region_alloc_put(kctx, reg); ++ return 0; +} + -+static inline u64 kbasep_get_va_gpu_addr(struct kbase_va_region *reg) ++static int kbasep_kinstr_prfcnt_enum_info_list( ++ struct kbase_kinstr_prfcnt_context *kinstr_ctx, ++ struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info) +{ -+ return reg->start_pfn << PAGE_SHIFT; -+} ++ struct prfcnt_enum_item *prfcnt_item_arr; ++ size_t arr_idx = 0; ++ int err = 0; ++ size_t block_info_count = 0; ++ const struct kbase_hwcnt_metadata *metadata; + -+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( -+ struct kbase_context *kctx, u64 gpu_addr) -+{ -+ struct kbase_ctx_ext_res_meta *meta = NULL; -+ struct kbase_ctx_ext_res_meta *walker; ++ if ((enum_info->info_item_size == 0) || ++ (enum_info->info_item_count == 0) || !enum_info->info_list_ptr) ++ return -EINVAL; + -+ lockdep_assert_held(&kctx->reg_lock); ++ if (enum_info->info_item_count != kinstr_ctx->info_item_count) ++ return -EINVAL; + -+ /* -+ * Walk the per context external resource metadata list for the -+ * metadata which matches the region which is being acquired. -+ */ -+ list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { -+ if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) { -+ meta = walker; -+ meta->ref++; -+ break; -+ } -+ } ++ prfcnt_item_arr = kcalloc(enum_info->info_item_count, ++ sizeof(*prfcnt_item_arr), GFP_KERNEL); ++ if (!prfcnt_item_arr) ++ return -ENOMEM; + -+ /* No metadata exists so create one. */ -+ if (!meta) { -+ struct kbase_va_region *reg; ++ kbasep_kinstr_prfcnt_get_request_info_list(prfcnt_item_arr, &arr_idx); + -+ /* Find the region */ -+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); -+ if (kbase_is_region_invalid_or_free(reg)) -+ goto failed; ++ metadata = kbase_hwcnt_virtualizer_metadata(kinstr_ctx->hvirt); ++ /* Place the sample_info item */ ++ kbasep_kinstr_prfcnt_get_sample_info_item(metadata, prfcnt_item_arr, &arr_idx); + -+ /* Allocate the metadata object */ -+ meta = kzalloc(sizeof(*meta), GFP_KERNEL); -+ if (!meta) -+ goto failed; -+ /* -+ * Fill in the metadata object and acquire a reference -+ * for the physical resource. -+ */ -+ meta->reg = reg; ++ block_info_count = kbasep_kinstr_prfcnt_get_block_info_count(metadata); + -+ /* Map the external resource to the GPU allocation of the region -+ * and acquire the reference to the VA region -+ */ -+ if (kbase_map_external_resource(kctx, meta->reg, NULL)) -+ goto fail_map; -+ meta->ref = 1; ++ if (arr_idx + block_info_count >= enum_info->info_item_count) ++ err = -EINVAL; + -+ list_add(&meta->ext_res_node, &kctx->ext_res_meta_head); ++ if (!err) { ++ size_t counter_set; ++ ++#if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY) ++ counter_set = KBASE_HWCNT_SET_SECONDARY; ++#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) ++ counter_set = KBASE_HWCNT_SET_TERTIARY; ++#else ++ /* Default to primary */ ++ counter_set = KBASE_HWCNT_SET_PRIMARY; ++#endif ++ kbasep_kinstr_prfcnt_get_block_info_list( ++ metadata, counter_set, prfcnt_item_arr, &arr_idx); ++ if (arr_idx != enum_info->info_item_count - 1) ++ err = -EINVAL; + } + -+ return meta; ++ /* The last sentinel item. */ ++ prfcnt_item_arr[enum_info->info_item_count - 1].hdr.item_type = ++ FLEX_LIST_TYPE_NONE; ++ prfcnt_item_arr[enum_info->info_item_count - 1].hdr.item_version = 0; + -+fail_map: -+ kfree(meta); -+failed: -+ return NULL; ++ if (!err) { ++ unsigned long bytes = ++ enum_info->info_item_count * sizeof(*prfcnt_item_arr); ++ ++ if (copy_to_user(u64_to_user_ptr(enum_info->info_list_ptr), ++ prfcnt_item_arr, bytes)) ++ err = -EFAULT; ++ } ++ ++ kfree(prfcnt_item_arr); ++ return err; +} + -+static struct kbase_ctx_ext_res_meta * -+find_sticky_resource_meta(struct kbase_context *kctx, u64 gpu_addr) ++int kbase_kinstr_prfcnt_enum_info( ++ struct kbase_kinstr_prfcnt_context *kinstr_ctx, ++ struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info) +{ -+ struct kbase_ctx_ext_res_meta *walker; -+ -+ lockdep_assert_held(&kctx->reg_lock); ++ int err; + -+ /* -+ * Walk the per context external resource metadata list for the -+ * metadata which matches the region which is being released. -+ */ -+ list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) -+ if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) -+ return walker; ++ if (!kinstr_ctx || !enum_info) ++ return -EINVAL; + -+ return NULL; -+} ++ if (!enum_info->info_list_ptr) ++ err = kbasep_kinstr_prfcnt_enum_info_count(kinstr_ctx, ++ enum_info); ++ else ++ err = kbasep_kinstr_prfcnt_enum_info_list(kinstr_ctx, ++ enum_info); + -+static void release_sticky_resource_meta(struct kbase_context *kctx, -+ struct kbase_ctx_ext_res_meta *meta) -+{ -+ kbase_unmap_external_resource(kctx, meta->reg); -+ list_del(&meta->ext_res_node); -+ kfree(meta); ++ return err; +} + -+bool kbase_sticky_resource_release(struct kbase_context *kctx, -+ struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr) ++int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx, ++ union kbase_ioctl_kinstr_prfcnt_setup *setup) +{ -+ lockdep_assert_held(&kctx->reg_lock); -+ -+ /* Search of the metadata if one isn't provided. */ -+ if (!meta) -+ meta = find_sticky_resource_meta(kctx, gpu_addr); ++ int err; ++ size_t item_count; ++ size_t bytes; ++ struct prfcnt_request_item *req_arr = NULL; ++ struct kbase_kinstr_prfcnt_client *cli = NULL; ++ const size_t max_bytes = 32 * sizeof(*req_arr); + -+ /* No metadata so just return. */ -+ if (!meta) -+ return false; ++ if (!kinstr_ctx || !setup) ++ return -EINVAL; + -+ if (--meta->ref != 0) -+ return true; ++ item_count = setup->in.request_item_count; + -+ release_sticky_resource_meta(kctx, meta); ++ /* Limiting the request items to 2x of the expected: accommodating ++ * moderate duplications but rejecting excessive abuses. ++ */ ++ if (!setup->in.requests_ptr || (item_count < 2) || (setup->in.request_item_size == 0) || ++ item_count > 2 * kinstr_ctx->info_item_count) { ++ return -EINVAL; ++ } + -+ return true; -+} ++ if (check_mul_overflow(item_count, sizeof(*req_arr), &bytes)) ++ return -EINVAL; + -+bool kbase_sticky_resource_release_force(struct kbase_context *kctx, -+ struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr) -+{ -+ lockdep_assert_held(&kctx->reg_lock); ++ /* Further limiting the max bytes to copy from userspace by setting it in the following ++ * fashion: a maximum of 1 mode item, 4 types of 3 sets for a total of 12 enable items, ++ * each currently at the size of prfcnt_request_item. ++ * ++ * Note: if more request types get added, this max limit needs to be updated. ++ */ ++ if (bytes > max_bytes) ++ return -EINVAL; + -+ /* Search of the metadata if one isn't provided. */ -+ if (!meta) -+ meta = find_sticky_resource_meta(kctx, gpu_addr); ++ req_arr = memdup_user(u64_to_user_ptr(setup->in.requests_ptr), bytes); + -+ /* No metadata so just return. */ -+ if (!meta) -+ return false; ++ if (IS_ERR(req_arr)) ++ return PTR_ERR(req_arr); + -+ release_sticky_resource_meta(kctx, meta); ++ err = kbasep_kinstr_prfcnt_client_create(kinstr_ctx, setup, &cli, req_arr); + -+ return true; -+} ++ if (err < 0) ++ goto error; + -+int kbase_sticky_resource_init(struct kbase_context *kctx) -+{ -+ INIT_LIST_HEAD(&kctx->ext_res_meta_head); ++ mutex_lock(&kinstr_ctx->lock); ++ kinstr_ctx->client_count++; ++ list_add(&cli->node, &kinstr_ctx->clients); ++ mutex_unlock(&kinstr_ctx->lock); + -+ return 0; -+} ++ setup->out.prfcnt_metadata_item_size = sizeof(struct prfcnt_metadata); ++ setup->out.prfcnt_mmap_size_bytes = ++ cli->sample_size * cli->sample_count; + -+void kbase_sticky_resource_term(struct kbase_context *kctx) -+{ -+ struct kbase_ctx_ext_res_meta *walker; ++ /* Expose to user-space only once the client is fully initialized */ ++ err = anon_inode_getfd("[mali_kinstr_prfcnt_desc]", ++ &kinstr_prfcnt_client_fops, cli, ++ O_RDONLY | O_CLOEXEC); + -+ lockdep_assert_held(&kctx->reg_lock); ++ if (err < 0) ++ goto client_installed_error; + -+ /* -+ * Free any sticky resources which haven't been unmapped. -+ * -+ * Note: -+ * We don't care about refcounts at this point as no future -+ * references to the meta data will be made. -+ * Region termination would find these if we didn't free them -+ * here, but it's more efficient if we do the clean up here. -+ */ -+ while (!list_empty(&kctx->ext_res_meta_head)) { -+ walker = list_first_entry(&kctx->ext_res_meta_head, -+ struct kbase_ctx_ext_res_meta, ext_res_node); ++ goto free_buf; + -+ kbase_sticky_resource_release_force(kctx, walker, 0); -+ } ++client_installed_error: ++ mutex_lock(&kinstr_ctx->lock); ++ kinstr_ctx->client_count--; ++ list_del(&cli->node); ++ mutex_unlock(&kinstr_ctx->lock); ++error: ++ kbasep_kinstr_prfcnt_client_destroy(cli); ++free_buf: ++ kfree(req_arr); ++ return err; +} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.h b/drivers/gpu/arm/bifrost/mali_kbase_mem.h +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.h b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.h new file mode 100644 -index 000000000..1118b96fc +index 000000000..bbe33796e --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.h -@@ -0,0 +1,2560 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.h +@@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -234993,7103 +238348,5773 @@ index 000000000..1118b96fc + * + */ + -+/** -+ * DOC: Base kernel memory APIs ++/* ++ * Kinstr_prfcnt, used to provide an ioctl for userspace access to ++ * performance counters. + */ ++#ifndef _KBASE_KINSTR_PRFCNT_H_ ++#define _KBASE_KINSTR_PRFCNT_H_ + -+#ifndef _KBASE_MEM_H_ -+#define _KBASE_MEM_H_ -+ -+#ifndef _KBASE_H_ -+#error "Don't include this file directly, use mali_kbase.h instead" -+#endif -+ -+#include -+#include -+#include -+#include "mali_kbase_pm.h" -+#include "mali_kbase_defs.h" -+/* Required for kbase_mem_evictable_unmake */ -+#include "mali_kbase_mem_linux.h" -+#include "mali_kbase_mem_migrate.h" -+#include "mali_kbase_refcount_defs.h" -+ -+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, -+ int pages); ++#include "hwcnt/mali_kbase_hwcnt_types.h" ++#include + -+/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */ -+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */ ++struct kbase_kinstr_prfcnt_context; ++struct kbase_kinstr_prfcnt_client; ++struct kbase_hwcnt_virtualizer; ++struct kbase_ioctl_hwcnt_reader_setup; ++struct kbase_ioctl_kinstr_prfcnt_enum_info; ++union kbase_ioctl_kinstr_prfcnt_setup; + -+/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by -+ * 8 pages. The MMU reads in 8 page table entries from memory at a time, if we -+ * have more than one page fault within the same 8 pages and page tables are -+ * updated accordingly, the MMU does not re-read the page table entries from -+ * memory for the subsequent page table updates and generates duplicate page -+ * faults as the page table information used by the MMU is not valid. ++/** ++ * kbase_kinstr_prfcnt_init() - Initialize a kinstr_prfcnt context. ++ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. ++ * @out_kinstr_ctx: Non-NULL pointer to where the pointer to the created ++ * kinstr_prfcnt context will be stored on success. ++ * ++ * On creation, the suspend count of the context will be 0. ++ * ++ * Return: 0 on success, else error code. + */ -+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3) /* round to 8 pages */ -+ -+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0) /* round to 1 page */ -+ -+/* This must always be a power of 2 */ -+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2) -+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316) -+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630) ++int kbase_kinstr_prfcnt_init( ++ struct kbase_hwcnt_virtualizer *hvirt, ++ struct kbase_kinstr_prfcnt_context **out_kinstr_ctx); + -+/* -+ * A CPU mapping ++/** ++ * kbase_kinstr_prfcnt_term() - Terminate a kinstr_prfcnt context. ++ * @kinstr_ctx: Pointer to the kinstr_prfcnt context to be terminated. + */ -+struct kbase_cpu_mapping { -+ struct list_head mappings_list; -+ struct kbase_mem_phy_alloc *alloc; -+ struct kbase_context *kctx; -+ struct kbase_va_region *region; -+ int count; -+ int free_on_close; -+}; -+ -+enum kbase_memory_type { -+ KBASE_MEM_TYPE_NATIVE, -+ KBASE_MEM_TYPE_IMPORTED_UMM, -+ KBASE_MEM_TYPE_IMPORTED_USER_BUF, -+ KBASE_MEM_TYPE_ALIAS, -+ KBASE_MEM_TYPE_RAW -+}; ++void kbase_kinstr_prfcnt_term(struct kbase_kinstr_prfcnt_context *kinstr_ctx); + -+/* internal structure, mirroring base_mem_aliasing_info, -+ * but with alloc instead of a gpu va (handle) ++/** ++ * kbase_kinstr_prfcnt_suspend() - Increment the suspend count of the context. ++ * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context to be suspended. ++ * ++ * After this function call returns, it is guaranteed that all timers and ++ * workers in kinstr_prfcnt will be canceled, and will not be re-triggered until ++ * after the context has been resumed. In effect, this means no new counter ++ * dumps will occur for any existing or subsequently added periodic clients. + */ -+struct kbase_aliased { -+ struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */ -+ u64 offset; /* in pages */ -+ u64 length; /* in pages */ -+}; -+ -+/* Physical pages tracking object properties */ -+#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED (1u << 0) -+#define KBASE_MEM_PHY_ALLOC_LARGE (1u << 1) ++void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx); + -+/* struct kbase_mem_phy_alloc - Physical pages tracking object. ++/** ++ * kbase_kinstr_prfcnt_resume() - Decrement the suspend count of the context. ++ * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context to be resumed. + * -+ * Set up to track N pages. -+ * N not stored here, the creator holds that info. -+ * This object only tracks how many elements are actually valid (present). -+ * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc -+ * is not shared with another region or client. CPU mappings are OK to -+ * exist when changing, as long as the tracked mappings objects are -+ * updated as part of the change. ++ * If a call to this function decrements the suspend count from 1 to 0, then ++ * normal operation of kinstr_prfcnt will be resumed (i.e. counter dumps will once ++ * again be automatically triggered for all periodic clients). + * -+ * @kref: number of users of this alloc -+ * @gpu_mappings: count number of times mapped on the GPU. Indicates the number -+ * of references there are to the physical pages from different -+ * GPU VA regions. -+ * @kernel_mappings: count number of times mapped on the CPU, specifically in -+ * the kernel. Indicates the number of references there are -+ * to the physical pages to prevent flag changes or shrink -+ * while maps are still held. -+ * @nents: 0..N -+ * @pages: N elements, only 0..nents are valid -+ * @mappings: List of CPU mappings of this physical memory allocation. -+ * @evict_node: Node used to store this allocation on the eviction list -+ * @evicted: Physical backing size when the pages where evicted -+ * @reg: Back reference to the region structure which created this -+ * allocation, or NULL if it has been freed. -+ * @type: type of buffer -+ * @permanent_map: Kernel side mapping of the alloc, shall never be -+ * referred directly. kbase_phy_alloc_mapping_get() & -+ * kbase_phy_alloc_mapping_put() pair should be used -+ * around access to the kernel-side CPU mapping so that -+ * mapping doesn't disappear whilst it is being accessed. -+ * @properties: Bitmask of properties, e.g. KBASE_MEM_PHY_ALLOC_LARGE. -+ * @group_id: A memory group ID to be passed to a platform-specific -+ * memory group manager, if present. -+ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). -+ * @imported: member in union valid based on @a type ++ * It is only valid to call this function one time for each prior returned call ++ * to kbase_kinstr_prfcnt_suspend. + */ -+struct kbase_mem_phy_alloc { -+ struct kref kref; -+ atomic_t gpu_mappings; -+ atomic_t kernel_mappings; -+ size_t nents; -+ struct tagged_addr *pages; -+ struct list_head mappings; -+ struct list_head evict_node; -+ size_t evicted; -+ struct kbase_va_region *reg; -+ enum kbase_memory_type type; -+ struct kbase_vmap_struct *permanent_map; -+ u8 properties; -+ u8 group_id; -+ -+ union { -+ struct { -+ struct kbase_context *kctx; -+ struct dma_buf *dma_buf; -+ struct dma_buf_attachment *dma_attachment; -+ unsigned int current_mapping_usage_count; -+ struct sg_table *sgt; -+ bool need_sync; -+ } umm; -+ struct { -+ u64 stride; -+ size_t nents; -+ struct kbase_aliased *aliased; -+ } alias; -+ struct { -+ struct kbase_context *kctx; -+ /* Number of pages in this structure, including *pages. -+ * Used for kernel memory tracking. -+ */ -+ size_t nr_struct_pages; -+ } native; -+ struct kbase_alloc_import_user_buf { -+ unsigned long address; -+ unsigned long size; -+ unsigned long nr_pages; -+ struct page **pages; -+ /* top bit (1<<31) of current_mapping_usage_count -+ * specifies that this import was pinned on import -+ * See PINNED_ON_IMPORT -+ */ -+ u32 current_mapping_usage_count; -+ struct mm_struct *mm; -+ dma_addr_t *dma_addrs; -+ } user_buf; -+ } imported; -+}; ++void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx); + +/** -+ * enum kbase_page_status - Status of a page used for page migration. ++ * kbasep_kinstr_prfcnt_get_block_info_list() - Get list of all block types ++ * with their information. ++ * @metadata: Non-NULL pointer to the hardware counter metadata. ++ * @block_set: Which SET the blocks will represent. ++ * @item_arr: Non-NULL pointer to array of enumeration items to populate. ++ * @arr_idx: Non-NULL pointer to index of array @item_arr. + * -+ * @MEM_POOL: Stable state. Page is located in a memory pool and can safely -+ * be migrated. -+ * @ALLOCATE_IN_PROGRESS: Transitory state. A page is set to this status as -+ * soon as it leaves a memory pool. -+ * @SPILL_IN_PROGRESS: Transitory state. Corner case where pages in a memory -+ * pool of a dying context are being moved to the device -+ * memory pool. -+ * @NOT_MOVABLE: Stable state. Page has been allocated for an object that is -+ * not movable, but may return to be movable when the object -+ * is freed. -+ * @ALLOCATED_MAPPED: Stable state. Page has been allocated, mapped to GPU -+ * and has reference to kbase_mem_phy_alloc object. -+ * @PT_MAPPED: Stable state. Similar to ALLOCATED_MAPPED, but page doesn't -+ * reference kbase_mem_phy_alloc object. Used as a page in MMU -+ * page table. -+ * @FREE_IN_PROGRESS: Transitory state. A page is set to this status as soon as -+ * the driver manages to acquire a lock on the page while -+ * unmapping it. This status means that a memory release is -+ * happening and it's still not complete. -+ * @FREE_ISOLATED_IN_PROGRESS: Transitory state. This is a very particular corner case. -+ * A page is isolated while it is in ALLOCATED_MAPPED state, -+ * but then the driver tries to destroy the allocation. -+ * @FREE_PT_ISOLATED_IN_PROGRESS: Transitory state. This is a very particular corner case. -+ * A page is isolated while it is in PT_MAPPED state, but -+ * then the driver tries to destroy the allocation. ++ * Populate list of counter blocks with information for enumeration. + * -+ * Pages can only be migrated in stable states. ++ * Return: 0 on success, else error code. + */ -+enum kbase_page_status { -+ MEM_POOL = 0, -+ ALLOCATE_IN_PROGRESS, -+ SPILL_IN_PROGRESS, -+ NOT_MOVABLE, -+ ALLOCATED_MAPPED, -+ PT_MAPPED, -+ FREE_IN_PROGRESS, -+ FREE_ISOLATED_IN_PROGRESS, -+ FREE_PT_ISOLATED_IN_PROGRESS, -+}; -+ -+#define PGD_VPFN_LEVEL_MASK ((u64)0x3) -+#define PGD_VPFN_LEVEL_GET_LEVEL(pgd_vpfn_level) (pgd_vpfn_level & PGD_VPFN_LEVEL_MASK) -+#define PGD_VPFN_LEVEL_GET_VPFN(pgd_vpfn_level) (pgd_vpfn_level & ~PGD_VPFN_LEVEL_MASK) -+#define PGD_VPFN_LEVEL_SET(pgd_vpfn, level) \ -+ ((pgd_vpfn & ~PGD_VPFN_LEVEL_MASK) | (level & PGD_VPFN_LEVEL_MASK)) ++int kbasep_kinstr_prfcnt_get_block_info_list(const struct kbase_hwcnt_metadata *metadata, ++ size_t block_set, struct prfcnt_enum_item *item_arr, ++ size_t *arr_idx); + +/** -+ * struct kbase_page_metadata - Metadata for each page in kbase -+ * -+ * @kbdev: Pointer to kbase device. -+ * @dma_addr: DMA address mapped to page. -+ * @migrate_lock: A spinlock to protect the private metadata. -+ * @data: Member in union valid based on @status. -+ * @status: Status to keep track if page can be migrated at any -+ * given moment. MSB will indicate if page is isolated. -+ * Protected by @migrate_lock. -+ * @vmap_count: Counter of kernel mappings. -+ * @group_id: Memory group ID obtained at the time of page allocation. ++ * kbasep_kinstr_prfcnt_get_sample_md_count() - Get count of sample ++ * metadata items. ++ * @metadata: Non-NULL pointer to the hardware counter metadata. ++ * @enable_map: Non-NULL pointer to the map of enabled counters. + * -+ * Each 4KB page will have a reference to this struct in the private field. -+ * This will be used to keep track of information required for Linux page -+ * migration functionality as well as address for DMA mapping. ++ * Return: Number of metadata items for available blocks in each sample. + */ -+struct kbase_page_metadata { -+ dma_addr_t dma_addr; -+ spinlock_t migrate_lock; -+ -+ union { -+ struct { -+ struct kbase_mem_pool *pool; -+ /* Pool could be terminated after page is isolated and therefore -+ * won't be able to get reference to kbase device. -+ */ -+ struct kbase_device *kbdev; -+ } mem_pool; -+ struct { -+ struct kbase_va_region *reg; -+ struct kbase_mmu_table *mmut; -+ u64 vpfn; -+ } mapped; -+ struct { -+ struct kbase_mmu_table *mmut; -+ u64 pgd_vpfn_level; -+ } pt_mapped; -+ struct { -+ struct kbase_device *kbdev; -+ } free_isolated; -+ struct { -+ struct kbase_device *kbdev; -+ } free_pt_isolated; -+ } data; -+ -+ u8 status; -+ u8 vmap_count; -+ u8 group_id; -+}; ++size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadata *metadata, ++ struct kbase_hwcnt_enable_map *enable_map); + -+/* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is -+ * used to signify that a buffer was pinned when it was imported. Since the -+ * reference count is limited by the number of atoms that can be submitted at -+ * once there should be no danger of overflowing into this bit. -+ * Stealing the top bit also has the benefit that -+ * current_mapping_usage_count != 0 if and only if the buffer is mapped. ++/** ++ * kbasep_kinstr_prfcnt_set_block_meta_items() - Populate a sample's block meta ++ * item array. ++ * @enable_map: Non-NULL pointer to the map of enabled counters. ++ * @dst: Non-NULL pointer to the sample's dump buffer object. ++ * @block_meta_base: Non-NULL double pointer to the start of the block meta ++ * data items. ++ * @base_addr: Address of allocated pages for array of samples. Used ++ * to calculate offset of block values. ++ * @counter_set: The SET which blocks represent. ++ * ++ * Return: 0 on success, else error code. + */ -+#define PINNED_ON_IMPORT (1<<31) ++int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *enable_map, ++ struct kbase_hwcnt_dump_buffer *dst, ++ struct prfcnt_metadata **block_meta_base, ++ u8 *base_addr, u8 counter_set); + +/** -+ * enum kbase_jit_report_flags - Flags for just-in-time memory allocation -+ * pressure limit functions -+ * @KBASE_JIT_REPORT_ON_ALLOC_OR_FREE: Notifying about an update happening due -+ * to a just-in-time memory allocation or free ++ * kbasep_kinstr_prfcnt_client_create() - Create a kinstr_prfcnt client. ++ * Does not attach to the kinstr_prfcnt ++ * context. ++ * @kinstr_ctx: Non-NULL pointer to kinstr_prfcnt context. ++ * @setup: Non-NULL pointer to hardware counter ioctl setup structure. ++ * @out_vcli: Non-NULL pointer to where created client will be stored on ++ * success. ++ * @req_arr: Non-NULL pointer to array of request items for client session. + * -+ * Used to control flow within pressure limit related functions, or to provide -+ * extra debugging information ++ * Return: 0 on success, else error code. + */ -+enum kbase_jit_report_flags { -+ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE = (1u << 0) -+}; ++int kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context *kinstr_ctx, ++ union kbase_ioctl_kinstr_prfcnt_setup *setup, ++ struct kbase_kinstr_prfcnt_client **out_vcli, ++ struct prfcnt_request_item *req_arr); + +/** -+ * kbase_set_phy_alloc_page_status - Set the page migration status of the underlying -+ * physical allocation. -+ * @alloc: the physical allocation containing the pages whose metadata is going -+ * to be modified -+ * @status: the status the pages should end up in ++ * kbasep_kinstr_prfcnt_cmd() - Execute command for a client session. ++ * @cli: Non-NULL pointer to kinstr_prfcnt client. ++ * @control_cmd: Control command to execute. + * -+ * Note that this function does not go through all of the checking to ensure that -+ * proper states are set. Instead, it is only used when we change the allocation -+ * to NOT_MOVABLE or from NOT_MOVABLE to ALLOCATED_MAPPED ++ * Return: 0 on success, else error code. + */ -+void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc, -+ enum kbase_page_status status); -+ -+static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc) -+{ -+ KBASE_DEBUG_ASSERT(alloc); -+ /* we only track mappings of NATIVE buffers */ -+ if (alloc->type == KBASE_MEM_TYPE_NATIVE) -+ atomic_inc(&alloc->gpu_mappings); -+} ++int kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client *cli, ++ struct prfcnt_control_cmd *control_cmd); + -+static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc) -+{ -+ KBASE_DEBUG_ASSERT(alloc); -+ /* we only track mappings of NATIVE buffers */ -+ if (alloc->type == KBASE_MEM_TYPE_NATIVE) -+ if (atomic_dec_return(&alloc->gpu_mappings) < 0) { -+ pr_err("Mismatched %s:\n", __func__); -+ dump_stack(); -+ } -+} ++/** ++ * kbasep_kinstr_prfcnt_client_destroy() - Destroy a kinstr_prfcnt client. ++ * @cli: kinstr_prfcnt client. Must not be attached to a kinstr_prfcnt context. ++ */ ++void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli); + +/** -+ * kbase_mem_phy_alloc_kernel_mapped - Increment kernel_mappings counter for a -+ * memory region to prevent commit and flag -+ * changes ++ * kbase_kinstr_prfcnt_enum_info - Enumerate performance counter information. ++ * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context. ++ * @enum_info: Non-NULL pointer to the enumeration information. + * -+ * @alloc: Pointer to physical pages tracking object ++ * Enumerate which counter blocks and banks exist, and what counters are ++ * available within them. ++ * ++ * Return: 0 on success, else error code. + */ -+static inline void -+kbase_mem_phy_alloc_kernel_mapped(struct kbase_mem_phy_alloc *alloc) -+{ -+ atomic_inc(&alloc->kernel_mappings); -+} ++int kbase_kinstr_prfcnt_enum_info( ++ struct kbase_kinstr_prfcnt_context *kinstr_ctx, ++ struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info); + +/** -+ * kbase_mem_phy_alloc_kernel_unmapped - Decrement kernel_mappings -+ * counter for a memory region to allow commit and flag changes ++ * kbase_kinstr_prfcnt_setup() - Set up a new hardware counter reader client. ++ * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context. ++ * @setup: Non-NULL pointer to the hwcnt reader configuration. + * -+ * @alloc: Pointer to physical pages tracking object ++ * Start a session between a user client and the kinstr_prfcnt component. ++ * A file descriptor shall be provided to the client as a handle to the ++ * hardware counter reader client that represents the session. ++ * ++ * Return: file descriptor on success, else error code. + */ -+static inline void -+kbase_mem_phy_alloc_kernel_unmapped(struct kbase_mem_phy_alloc *alloc) -+{ -+ WARN_ON(atomic_dec_return(&alloc->kernel_mappings) < 0); -+} ++int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx, ++ union kbase_ioctl_kinstr_prfcnt_setup *setup); + -+/** -+ * kbase_mem_is_imported - Indicate whether a memory type is imported ++#endif /* _KBASE_KINSTR_PRFCNT_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_linux.h +new file mode 100644 +index 000000000..e5c6f7a0b +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_linux.h +@@ -0,0 +1,44 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * @type: the memory type ++ * (C) COPYRIGHT 2010-2014, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: true if the memory type is imported, false otherwise + */ -+static inline bool kbase_mem_is_imported(enum kbase_memory_type type) -+{ -+ return (type == KBASE_MEM_TYPE_IMPORTED_UMM) || -+ (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); -+} + -+void kbase_mem_kref_free(struct kref *kref); ++/** ++ * DOC: Base kernel APIs, Linux implementation. ++ */ + -+int kbase_mem_init(struct kbase_device *kbdev); -+void kbase_mem_halt(struct kbase_device *kbdev); -+void kbase_mem_term(struct kbase_device *kbdev); ++#ifndef _KBASE_LINUX_H_ ++#define _KBASE_LINUX_H_ + -+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc) -+{ -+ kref_get(&alloc->kref); -+ return alloc; -+} ++/* All things that are needed for the Linux port. */ ++#include ++#include ++#include ++#include ++#include + -+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc) -+{ -+ kref_put(&alloc->kref, kbase_mem_kref_free); -+ return NULL; -+} ++#if IS_ENABLED(MALI_KERNEL_TEST_API) ++ #define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func) ++#else ++ #define KBASE_EXPORT_TEST_API(func) ++#endif + -+/** -+ * struct kbase_va_region - A GPU memory region, and attributes for CPU mappings -+ * -+ * @rblink: Node in a red-black tree of memory regions within the same zone of -+ * the GPU's virtual address space. -+ * @link: Links to neighboring items in a list of growable memory regions -+ * that triggered incremental rendering by growing too much. -+ * @rbtree: Backlink to the red-black tree of memory regions. -+ * @start_pfn: The Page Frame Number in GPU virtual address space. -+ * @user_data: The address of GPU command queue when VA region represents -+ * a ring buffer. -+ * @nr_pages: The size of the region in pages. -+ * @initial_commit: Initial commit, for aligning the start address and -+ * correctly growing KBASE_REG_TILER_ALIGN_TOP regions. -+ * @threshold_pages: If non-zero and the amount of memory committed to a region -+ * that can grow on page fault exceeds this number of pages -+ * then the driver switches to incremental rendering. -+ * @flags: Flags -+ * @extension: Number of pages allocated on page fault. -+ * @cpu_alloc: The physical memory we mmap to the CPU when mapping this region. -+ * @gpu_alloc: The physical memory we mmap to the GPU when mapping this region. -+ * @jit_node: Links to neighboring regions in the just-in-time memory pool. -+ * @jit_usage_id: The last just-in-time memory usage ID for this region. -+ * @jit_bin_id: The just-in-time memory bin this region came from. -+ * @va_refcnt: Number of users of this region. Protected by reg_lock. -+ * @no_user_free_count: Number of contexts that want to prevent the region -+ * from being freed by userspace. -+ * @heap_info_gpu_addr: Pointer to an object in GPU memory defining an end of -+ * an allocated region -+ * The object can be one of: -+ * - u32 value defining the size of the region -+ * - u64 pointer first unused byte in the region -+ * The interpretation of the object depends on -+ * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE flag in -+ * jit_info_flags - if it is set, the heap info object -+ * should be interpreted as size. -+ * @used_pages: The current estimate of the number of pages used, which in -+ * normal use is either: -+ * - the initial estimate == va_pages -+ * - the actual pages used, as found by a JIT usage report -+ * Note that since the value is calculated from GPU memory after a -+ * JIT usage report, at any point in time it is allowed to take a -+ * random value that is no greater than va_pages (e.g. it may be -+ * greater than gpu_alloc->nents) -+ */ -+struct kbase_va_region { -+ struct rb_node rblink; -+ struct list_head link; -+ struct rb_root *rbtree; -+ u64 start_pfn; -+ void *user_data; -+ size_t nr_pages; -+ size_t initial_commit; -+ size_t threshold_pages; -+ -+/* Free region */ -+#define KBASE_REG_FREE (1ul << 0) -+/* CPU write access */ -+#define KBASE_REG_CPU_WR (1ul << 1) -+/* GPU write access */ -+#define KBASE_REG_GPU_WR (1ul << 2) -+/* No eXecute flag */ -+#define KBASE_REG_GPU_NX (1ul << 3) -+/* Is CPU cached? */ -+#define KBASE_REG_CPU_CACHED (1ul << 4) -+/* Is GPU cached? -+ * Some components within the GPU might only be able to access memory that is -+ * GPU cacheable. Refer to the specific GPU implementation for more details. -+ */ -+#define KBASE_REG_GPU_CACHED (1ul << 5) -+ -+#define KBASE_REG_GROWABLE (1ul << 6) -+/* Can grow on pf? */ -+#define KBASE_REG_PF_GROW (1ul << 7) -+ -+/* Allocation doesn't straddle the 4GB boundary in GPU virtual space */ -+#define KBASE_REG_GPU_VA_SAME_4GB_PAGE (1ul << 8) -+ -+/* inner shareable coherency */ -+#define KBASE_REG_SHARE_IN (1ul << 9) -+/* inner & outer shareable coherency */ -+#define KBASE_REG_SHARE_BOTH (1ul << 10) -+ -+#if MALI_USE_CSF -+/* Space for 8 different zones */ -+#define KBASE_REG_ZONE_BITS 3 -+#else -+/* Space for 4 different zones */ -+#define KBASE_REG_ZONE_BITS 2 -+#endif -+ -+#define KBASE_REG_ZONE_MASK (((1 << KBASE_REG_ZONE_BITS) - 1ul) << 11) -+#define KBASE_REG_ZONE(x) (((x) & ((1 << KBASE_REG_ZONE_BITS) - 1ul)) << 11) -+#define KBASE_REG_ZONE_IDX(x) (((x) & KBASE_REG_ZONE_MASK) >> 11) -+ -+#if KBASE_REG_ZONE_MAX > (1 << KBASE_REG_ZONE_BITS) -+#error "Too many zones for the number of zone bits defined" -+#endif -+ -+/* GPU read access */ -+#define KBASE_REG_GPU_RD (1ul << 14) -+/* CPU read access */ -+#define KBASE_REG_CPU_RD (1ul << 15) -+ -+/* Index of chosen MEMATTR for this region (0..7) */ -+#define KBASE_REG_MEMATTR_MASK (7ul << 16) -+#define KBASE_REG_MEMATTR_INDEX(x) (((x) & 7) << 16) -+#define KBASE_REG_MEMATTR_VALUE(x) (((x) & KBASE_REG_MEMATTR_MASK) >> 16) -+ -+#define KBASE_REG_PROTECTED (1ul << 19) ++#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func) + -+/* Region belongs to a shrinker. ++#endif /* _KBASE_LINUX_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.c b/drivers/gpu/arm/bifrost/mali_kbase_mem.c +new file mode 100644 +index 000000000..800a4199d +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.c +@@ -0,0 +1,5533 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * This can either mean that it is part of the JIT/Ephemeral or tiler heap -+ * shrinker paths. Should be removed only after making sure that there are -+ * no references remaining to it in these paths, as it may cause the physical -+ * backing of the region to disappear during use. + */ -+#define KBASE_REG_DONT_NEED (1ul << 20) + -+/* Imported buffer is padded? */ -+#define KBASE_REG_IMPORT_PAD (1ul << 21) -+ -+#if MALI_USE_CSF -+/* CSF event memory */ -+#define KBASE_REG_CSF_EVENT (1ul << 22) -+#else -+/* Bit 22 is reserved. -+ * -+ * Do not remove, use the next unreserved bit for new flags ++/** ++ * DOC: Base kernel memory APIs + */ -+#define KBASE_REG_RESERVED_BIT_22 (1ul << 22) ++#include ++#include ++#include ++#include ++#include ++#include ++#if IS_ENABLED(CONFIG_OF) ++#include +#endif + -+#if !MALI_USE_CSF -+/* The top of the initial commit is aligned to extension pages. -+ * Extent must be a power of 2 -+ */ -+#define KBASE_REG_TILER_ALIGN_TOP (1ul << 23) -+#else -+/* Bit 23 is reserved. -+ * -+ * Do not remove, use the next unreserved bit for new flags -+ */ -+#define KBASE_REG_RESERVED_BIT_23 (1ul << 23) -+#endif /* !MALI_USE_CSF */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+/* Bit 24 is currently unused and is available for use for a new flag */ ++#define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-" ++#define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1) + -+/* Memory has permanent kernel side mapping */ -+#define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25) ++#if MALI_JIT_PRESSURE_LIMIT_BASE + -+/* GPU VA region has been freed by the userspace, but still remains allocated -+ * due to the reference held by CPU mappings created on the GPU VA region. -+ * -+ * A region with this flag set has had kbase_gpu_munmap() called on it, but can -+ * still be looked-up in the region tracker as a non-free region. Hence must -+ * not create or update any more GPU mappings on such regions because they will -+ * not be unmapped when the region is finally destroyed. -+ * -+ * Since such regions are still present in the region tracker, new allocations -+ * attempted with BASE_MEM_SAME_VA might fail if their address intersects with -+ * a region with this flag set. ++/* ++ * Alignment of objects allocated by the GPU inside a just-in-time memory ++ * region whose size is given by an end address + * -+ * In addition, this flag indicates the gpu_alloc member might no longer valid -+ * e.g. in infinite cache simulation. ++ * This is the alignment of objects allocated by the GPU, but possibly not ++ * fully written to. When taken into account with ++ * KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES it gives the maximum number of bytes ++ * that the JIT memory report size can exceed the actual backed memory size. + */ -+#define KBASE_REG_VA_FREED (1ul << 26) ++#define KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES (128u) + -+/* If set, the heap info address points to a u32 holding the used size in bytes; -+ * otherwise it points to a u64 holding the lowest address of unused memory. ++/* ++ * Maximum size of objects allocated by the GPU inside a just-in-time memory ++ * region whose size is given by an end address ++ * ++ * This is the maximum size of objects allocated by the GPU, but possibly not ++ * fully written to. When taken into account with ++ * KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES it gives the maximum number of bytes ++ * that the JIT memory report size can exceed the actual backed memory size. + */ -+#define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27) ++#define KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES (512u) + -+/* Allocation is actively used for JIT memory */ -+#define KBASE_REG_ACTIVE_JIT_ALLOC (1ul << 28) ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+#if MALI_USE_CSF -+/* This flag only applies to allocations in the EXEC_FIXED_VA and FIXED_VA -+ * memory zones, and it determines whether they were created with a fixed -+ * GPU VA address requested by the user. -+ */ -+#define KBASE_REG_FIXED_ADDRESS (1ul << 29) ++/* Forward declarations */ ++static void free_partial_locked(struct kbase_context *kctx, ++ struct kbase_mem_pool *pool, struct tagged_addr tp); ++ ++static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx) ++{ ++#if defined(CONFIG_ARM64) ++ /* VA_BITS can be as high as 48 bits, but all bits are available for ++ * both user and kernel. ++ */ ++ size_t cpu_va_bits = VA_BITS; ++#elif defined(CONFIG_X86_64) ++ /* x86_64 can access 48 bits of VA, but the 48th is used to denote ++ * kernel (1) vs userspace (0), so the max here is 47. ++ */ ++ size_t cpu_va_bits = 47; ++#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32) ++ size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE; +#else -+#define KBASE_REG_RESERVED_BIT_29 (1ul << 29) ++#error "Unknown CPU VA width for this architecture" +#endif + -+#define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) ++ if (kbase_ctx_compat_mode(kctx)) ++ cpu_va_bits = 32; + -+#define KBASE_REG_ZONE_CUSTOM_VA KBASE_REG_ZONE(1) -+#define KBASE_REG_ZONE_CUSTOM_VA_BASE (0x100000000ULL >> PAGE_SHIFT) ++ return cpu_va_bits; ++} + -+#if MALI_USE_CSF -+/* only used with 32-bit clients */ -+/* On a 32bit platform, custom VA should be wired from 4GB to 2^(43). -+ */ -+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE \ -+ (((1ULL << 43) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) -+#else -+/* only used with 32-bit clients */ -+/* On a 32bit platform, custom VA should be wired from 4GB to the VA limit of the -+ * GPU. Unfortunately, the Linux mmap() interface limits us to 2^32 pages (2^44 -+ * bytes, see mmap64 man page for reference). So we put the default limit to the -+ * maximum possible on Linux and shrink it down, if required by the GPU, during -+ * initialization. ++/* This function finds out which RB tree the given pfn from the GPU VA belongs ++ * to based on the memory zone the pfn refers to + */ -+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE \ -+ (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) -+/* end 32-bit clients only */ -+#endif ++static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx, ++ u64 gpu_pfn) ++{ ++ struct rb_root *rbtree = NULL; + -+/* The starting address and size of the GPU-executable zone are dynamic -+ * and depend on the platform and the number of pages requested by the -+ * user process, with an upper limit of 4 GB. -+ */ -+#define KBASE_REG_ZONE_EXEC_VA KBASE_REG_ZONE(2) -+#define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */ ++ struct kbase_reg_zone *exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); + +#if MALI_USE_CSF -+#define KBASE_REG_ZONE_MCU_SHARED KBASE_REG_ZONE(3) -+#define KBASE_REG_ZONE_MCU_SHARED_BASE (0x04000000ULL >> PAGE_SHIFT) -+#define KBASE_REG_ZONE_MCU_SHARED_SIZE (((0x08000000ULL) >> PAGE_SHIFT) - \ -+ KBASE_REG_ZONE_MCU_SHARED_BASE) ++ struct kbase_reg_zone *fixed_va_zone = ++ kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_FIXED_VA); + -+/* For CSF GPUs, the EXEC_VA zone is always 4GB in size, and starts at 2^47 for 64-bit -+ * clients, and 2^43 for 32-bit clients. -+ */ -+#define KBASE_REG_ZONE_EXEC_VA_BASE_64 ((1ULL << 47) >> PAGE_SHIFT) -+#define KBASE_REG_ZONE_EXEC_VA_BASE_32 ((1ULL << 43) >> PAGE_SHIFT) -+#define KBASE_REG_ZONE_EXEC_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ++ struct kbase_reg_zone *exec_fixed_va_zone = ++ kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA); + -+/* Executable zone supporting FIXED/FIXABLE allocations. -+ * It is always 4GB in size. -+ */ ++ if (gpu_pfn >= fixed_va_zone->base_pfn) { ++ rbtree = &kctx->reg_rbtree_fixed; ++ return rbtree; ++ } else if (gpu_pfn >= exec_fixed_va_zone->base_pfn) { ++ rbtree = &kctx->reg_rbtree_exec_fixed; ++ return rbtree; ++ } ++#endif ++ if (gpu_pfn >= exec_va_zone->base_pfn) ++ rbtree = &kctx->reg_rbtree_exec; ++ else { ++ u64 same_va_end; + -+#define KBASE_REG_ZONE_EXEC_FIXED_VA KBASE_REG_ZONE(4) -+#define KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ++ if (kbase_ctx_compat_mode(kctx)) { ++ same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE; ++ } else { ++ struct kbase_reg_zone *same_va_zone = ++ kbase_ctx_reg_zone_get(kctx, ++ KBASE_REG_ZONE_SAME_VA); ++ same_va_end = kbase_reg_zone_end_pfn(same_va_zone); ++ } + -+/* Non-executable zone supporting FIXED/FIXABLE allocations. -+ * It extends from (2^47) up to (2^48)-1, for 64-bit userspace clients, and from -+ * (2^43) up to (2^44)-1 for 32-bit userspace clients. -+ */ -+#define KBASE_REG_ZONE_FIXED_VA KBASE_REG_ZONE(5) ++ if (gpu_pfn >= same_va_end) ++ rbtree = &kctx->reg_rbtree_custom; ++ else ++ rbtree = &kctx->reg_rbtree_same; ++ } + -+/* Again - 32-bit userspace cannot map addresses beyond 2^44, but 64-bit can - and so -+ * the end of the FIXED_VA zone for 64-bit clients is (2^48)-1. -+ */ -+#define KBASE_REG_ZONE_FIXED_VA_END_64 ((1ULL << 48) >> PAGE_SHIFT) -+#define KBASE_REG_ZONE_FIXED_VA_END_32 ((1ULL << 44) >> PAGE_SHIFT) ++ return rbtree; ++} + -+#endif ++/* This function inserts a region into the tree. */ ++static void kbase_region_tracker_insert(struct kbase_va_region *new_reg) ++{ ++ u64 start_pfn = new_reg->start_pfn; ++ struct rb_node **link = NULL; ++ struct rb_node *parent = NULL; ++ struct rb_root *rbtree = NULL; + -+ unsigned long flags; -+ size_t extension; -+ struct kbase_mem_phy_alloc *cpu_alloc; -+ struct kbase_mem_phy_alloc *gpu_alloc; -+ struct list_head jit_node; -+ u16 jit_usage_id; -+ u8 jit_bin_id; ++ rbtree = new_reg->rbtree; + -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ /* Pointer to an object in GPU memory defining an end of an allocated -+ * region -+ * -+ * The object can be one of: -+ * - u32 value defining the size of the region -+ * - u64 pointer first unused byte in the region -+ * -+ * The interpretation of the object depends on -+ * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE flag in jit_info_flags - if it is -+ * set, the heap info object should be interpreted as size. -+ */ -+ u64 heap_info_gpu_addr; ++ link = &(rbtree->rb_node); ++ /* Find the right place in the tree using tree search */ ++ while (*link) { ++ struct kbase_va_region *old_reg; + -+ /* The current estimate of the number of pages used, which in normal -+ * use is either: -+ * - the initial estimate == va_pages -+ * - the actual pages used, as found by a JIT usage report -+ * -+ * Note that since the value is calculated from GPU memory after a JIT -+ * usage report, at any point in time it is allowed to take a random -+ * value that is no greater than va_pages (e.g. it may be greater than -+ * gpu_alloc->nents) -+ */ -+ size_t used_pages; -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ parent = *link; ++ old_reg = rb_entry(parent, struct kbase_va_region, rblink); + -+ kbase_refcount_t va_refcnt; -+ atomic_t no_user_free_count; -+}; ++ /* RBTree requires no duplicate entries. */ ++ KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn); + -+/** -+ * kbase_is_ctx_reg_zone - determine whether a KBASE_REG_ZONE_<...> is for a -+ * context or for a device -+ * @zone_bits: A KBASE_REG_ZONE_<...> to query -+ * -+ * Return: True if the zone for @zone_bits is a context zone, False otherwise -+ */ -+static inline bool kbase_is_ctx_reg_zone(unsigned long zone_bits) -+{ -+ WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits); -+ return (zone_bits == KBASE_REG_ZONE_SAME_VA || -+#if MALI_USE_CSF -+ zone_bits == KBASE_REG_ZONE_EXEC_FIXED_VA || zone_bits == KBASE_REG_ZONE_FIXED_VA || -+#endif -+ zone_bits == KBASE_REG_ZONE_CUSTOM_VA || zone_bits == KBASE_REG_ZONE_EXEC_VA); -+} ++ if (old_reg->start_pfn > start_pfn) ++ link = &(*link)->rb_left; ++ else ++ link = &(*link)->rb_right; ++ } + -+/* Special marker for failed JIT allocations that still must be marked as -+ * in-use -+ */ -+#define KBASE_RESERVED_REG_JIT_ALLOC ((struct kbase_va_region *)-1) ++ /* Put the new node there, and rebalance tree */ ++ rb_link_node(&(new_reg->rblink), parent, link); + -+static inline bool kbase_is_region_free(struct kbase_va_region *reg) -+{ -+ return (!reg || reg->flags & KBASE_REG_FREE); ++ rb_insert_color(&(new_reg->rblink), rbtree); +} + -+static inline bool kbase_is_region_invalid(struct kbase_va_region *reg) ++static struct kbase_va_region *find_region_enclosing_range_rbtree( ++ struct rb_root *rbtree, u64 start_pfn, size_t nr_pages) +{ -+ return (!reg || reg->flags & KBASE_REG_VA_FREED); -+} ++ struct rb_node *rbnode; ++ struct kbase_va_region *reg; ++ u64 end_pfn = start_pfn + nr_pages; + -+static inline bool kbase_is_region_invalid_or_free(struct kbase_va_region *reg) -+{ -+ /* Possibly not all functions that find regions would be using this -+ * helper, so they need to be checked when maintaining this function. -+ */ -+ return (kbase_is_region_invalid(reg) || kbase_is_region_free(reg)); -+} ++ rbnode = rbtree->rb_node; + -+/** -+ * kbase_is_region_shrinkable - Check if a region is "shrinkable". -+ * A shrinkable regions is a region for which its backing pages (reg->gpu_alloc->pages) -+ * can be freed at any point, even though the kbase_va_region structure itself -+ * may have been refcounted. -+ * Regions that aren't on a shrinker, but could be shrunk at any point in future -+ * without warning are still considered "shrinkable" (e.g. Active JIT allocs) -+ * -+ * @reg: Pointer to region -+ * -+ * Return: true if the region is "shrinkable", false if not. -+ */ -+static inline bool kbase_is_region_shrinkable(struct kbase_va_region *reg) -+{ -+ return (reg->flags & KBASE_REG_DONT_NEED) || (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC); -+} ++ while (rbnode) { ++ u64 tmp_start_pfn, tmp_end_pfn; + -+void kbase_remove_va_region(struct kbase_device *kbdev, -+ struct kbase_va_region *reg); -+static inline void kbase_region_refcnt_free(struct kbase_device *kbdev, -+ struct kbase_va_region *reg) -+{ -+ /* If region was mapped then remove va region*/ -+ if (reg->start_pfn) -+ kbase_remove_va_region(kbdev, reg); ++ reg = rb_entry(rbnode, struct kbase_va_region, rblink); ++ tmp_start_pfn = reg->start_pfn; ++ tmp_end_pfn = reg->start_pfn + reg->nr_pages; + -+ /* To detect use-after-free in debug builds */ -+ KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE); -+ kfree(reg); ++ /* If start is lower than this, go left. */ ++ if (start_pfn < tmp_start_pfn) ++ rbnode = rbnode->rb_left; ++ /* If end is higher than this, then go right. */ ++ else if (end_pfn > tmp_end_pfn) ++ rbnode = rbnode->rb_right; ++ else /* Enclosing */ ++ return reg; ++ } ++ ++ return NULL; +} + -+static inline struct kbase_va_region *kbase_va_region_alloc_get( -+ struct kbase_context *kctx, struct kbase_va_region *region) ++struct kbase_va_region *kbase_find_region_enclosing_address( ++ struct rb_root *rbtree, u64 gpu_addr) +{ -+ WARN_ON(!kbase_refcount_read(®ion->va_refcnt)); -+ WARN_ON(kbase_refcount_read(®ion->va_refcnt) == INT_MAX); ++ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; ++ struct rb_node *rbnode; ++ struct kbase_va_region *reg; + -+ dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %pK\n", -+ kbase_refcount_read(®ion->va_refcnt), (void *)region); -+ kbase_refcount_inc(®ion->va_refcnt); ++ rbnode = rbtree->rb_node; + -+ return region; -+} ++ while (rbnode) { ++ u64 tmp_start_pfn, tmp_end_pfn; + -+static inline struct kbase_va_region *kbase_va_region_alloc_put( -+ struct kbase_context *kctx, struct kbase_va_region *region) -+{ -+ WARN_ON(kbase_refcount_read(®ion->va_refcnt) <= 0); -+ WARN_ON(region->flags & KBASE_REG_FREE); ++ reg = rb_entry(rbnode, struct kbase_va_region, rblink); ++ tmp_start_pfn = reg->start_pfn; ++ tmp_end_pfn = reg->start_pfn + reg->nr_pages; + -+ if (kbase_refcount_dec_and_test(®ion->va_refcnt)) -+ kbase_region_refcnt_free(kctx->kbdev, region); -+ else -+ dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %pK\n", -+ kbase_refcount_read(®ion->va_refcnt), (void *)region); ++ /* If start is lower than this, go left. */ ++ if (gpu_pfn < tmp_start_pfn) ++ rbnode = rbnode->rb_left; ++ /* If end is higher than this, then go right. */ ++ else if (gpu_pfn >= tmp_end_pfn) ++ rbnode = rbnode->rb_right; ++ else /* Enclosing */ ++ return reg; ++ } + + return NULL; +} + -+/** -+ * kbase_va_region_is_no_user_free - Check if user free is forbidden for the region. -+ * A region that must not be freed by userspace indicates that it is owned by some other -+ * kbase subsystem, for example tiler heaps, JIT memory or CSF queues. -+ * Such regions must not be shrunk (i.e. have their backing pages freed), except by the -+ * current owner. -+ * Hence, callers cannot rely on this check alone to determine if a region might be shrunk -+ * by any part of kbase. Instead they should use kbase_is_region_shrinkable(). -+ * -+ * @region: Pointer to region. -+ * -+ * Return: true if userspace cannot free the region, false if userspace can free the region. -+ */ -+static inline bool kbase_va_region_is_no_user_free(struct kbase_va_region *region) ++/* Find region enclosing given address. */ ++struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( ++ struct kbase_context *kctx, u64 gpu_addr) +{ -+ return atomic_read(®ion->no_user_free_count) > 0; -+} ++ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; ++ struct rb_root *rbtree = NULL; + -+/** -+ * kbase_va_region_no_user_free_inc - Increment "no user free" count for a region. -+ * Calling this function will prevent the region to be shrunk by parts of kbase that -+ * don't own the region (as long as the count stays above zero). Refer to -+ * kbase_va_region_is_no_user_free() for more information. -+ * -+ * @region: Pointer to region (not shrinkable). -+ * -+ * Return: the pointer to the region passed as argument. -+ */ -+static inline void kbase_va_region_no_user_free_inc(struct kbase_va_region *region) -+{ -+ WARN_ON(kbase_is_region_shrinkable(region)); -+ WARN_ON(atomic_read(®ion->no_user_free_count) == INT_MAX); ++ KBASE_DEBUG_ASSERT(kctx != NULL); + -+ /* non-atomic as kctx->reg_lock is held */ -+ atomic_inc(®ion->no_user_free_count); -+} ++ lockdep_assert_held(&kctx->reg_lock); + -+/** -+ * kbase_va_region_no_user_free_dec - Decrement "no user free" count for a region. -+ * -+ * @region: Pointer to region (not shrinkable). -+ */ -+static inline void kbase_va_region_no_user_free_dec(struct kbase_va_region *region) -+{ -+ WARN_ON(!kbase_va_region_is_no_user_free(region)); ++ rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); + -+ atomic_dec(®ion->no_user_free_count); ++ return kbase_find_region_enclosing_address(rbtree, gpu_addr); +} + -+/* Common functions */ -+static inline struct tagged_addr *kbase_get_cpu_phy_pages( -+ struct kbase_va_region *reg) ++KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address); ++ ++struct kbase_va_region *kbase_find_region_base_address( ++ struct rb_root *rbtree, u64 gpu_addr) +{ -+ KBASE_DEBUG_ASSERT(reg); -+ KBASE_DEBUG_ASSERT(reg->cpu_alloc); -+ KBASE_DEBUG_ASSERT(reg->gpu_alloc); -+ KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); ++ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; ++ struct rb_node *rbnode = NULL; ++ struct kbase_va_region *reg = NULL; + -+ return reg->cpu_alloc->pages; -+} ++ rbnode = rbtree->rb_node; + -+static inline struct tagged_addr *kbase_get_gpu_phy_pages( -+ struct kbase_va_region *reg) -+{ -+ KBASE_DEBUG_ASSERT(reg); -+ KBASE_DEBUG_ASSERT(reg->cpu_alloc); -+ KBASE_DEBUG_ASSERT(reg->gpu_alloc); -+ KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); ++ while (rbnode) { ++ reg = rb_entry(rbnode, struct kbase_va_region, rblink); ++ if (reg->start_pfn > gpu_pfn) ++ rbnode = rbnode->rb_left; ++ else if (reg->start_pfn < gpu_pfn) ++ rbnode = rbnode->rb_right; ++ else ++ return reg; ++ } + -+ return reg->gpu_alloc->pages; ++ return NULL; +} + -+static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg) ++/* Find region with given base address */ ++struct kbase_va_region *kbase_region_tracker_find_region_base_address( ++ struct kbase_context *kctx, u64 gpu_addr) +{ -+ KBASE_DEBUG_ASSERT(reg); -+ /* if no alloc object the backed size naturally is 0 */ -+ if (!reg->cpu_alloc) -+ return 0; ++ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; ++ struct rb_root *rbtree = NULL; + -+ KBASE_DEBUG_ASSERT(reg->cpu_alloc); -+ KBASE_DEBUG_ASSERT(reg->gpu_alloc); -+ KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); ++ lockdep_assert_held(&kctx->reg_lock); + -+ return reg->cpu_alloc->nents; ++ rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); ++ ++ return kbase_find_region_base_address(rbtree, gpu_addr); +} + -+#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */ ++KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address); + -+static inline struct kbase_mem_phy_alloc *kbase_alloc_create( -+ struct kbase_context *kctx, size_t nr_pages, -+ enum kbase_memory_type type, int group_id) ++/* Find region meeting given requirements */ ++static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( ++ struct kbase_va_region *reg_reqs, ++ size_t nr_pages, size_t align_offset, size_t align_mask, ++ u64 *out_start_pfn) +{ -+ struct kbase_mem_phy_alloc *alloc; -+ size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages; -+ size_t per_page_size = sizeof(*alloc->pages); -+ -+ /* Imported pages may have page private data already in use */ -+ if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { -+ alloc_size += nr_pages * -+ sizeof(*alloc->imported.user_buf.dma_addrs); -+ per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs); -+ } ++ struct rb_node *rbnode = NULL; ++ struct kbase_va_region *reg = NULL; ++ struct rb_root *rbtree = NULL; + -+ /* -+ * Prevent nr_pages*per_page_size + sizeof(*alloc) from -+ * wrapping around. ++ /* Note that this search is a linear search, as we do not have a target ++ * address in mind, so does not benefit from the rbtree search + */ -+ if (nr_pages > ((((size_t) -1) - sizeof(*alloc)) -+ / per_page_size)) -+ return ERR_PTR(-ENOMEM); -+ -+ /* Allocate based on the size to reduce internal fragmentation of vmem */ -+ if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD) -+ alloc = vzalloc(alloc_size); -+ else -+ alloc = kzalloc(alloc_size, GFP_KERNEL); -+ -+ if (!alloc) -+ return ERR_PTR(-ENOMEM); -+ -+ if (type == KBASE_MEM_TYPE_NATIVE) { -+ alloc->imported.native.nr_struct_pages = -+ (alloc_size + (PAGE_SIZE - 1)) >> PAGE_SHIFT; -+ kbase_process_page_usage_inc(kctx, -+ alloc->imported.native.nr_struct_pages); -+ } ++ rbtree = reg_reqs->rbtree; + -+ /* Store allocation method */ -+ if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD) -+ alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE; ++ for (rbnode = rb_first(rbtree); rbnode; rbnode = rb_next(rbnode)) { ++ reg = rb_entry(rbnode, struct kbase_va_region, rblink); ++ if ((reg->nr_pages >= nr_pages) && ++ (reg->flags & KBASE_REG_FREE)) { ++ /* Check alignment */ ++ u64 start_pfn = reg->start_pfn; + -+ kref_init(&alloc->kref); -+ atomic_set(&alloc->gpu_mappings, 0); -+ atomic_set(&alloc->kernel_mappings, 0); -+ alloc->nents = 0; -+ alloc->pages = (void *)(alloc + 1); -+ INIT_LIST_HEAD(&alloc->mappings); -+ alloc->type = type; -+ alloc->group_id = group_id; ++ /* When align_offset == align, this sequence is ++ * equivalent to: ++ * (start_pfn + align_mask) & ~(align_mask) ++ * ++ * Otherwise, it aligns to n*align + offset, for the ++ * lowest value n that makes this still >start_pfn ++ */ ++ start_pfn += align_mask; ++ start_pfn -= (start_pfn - align_offset) & (align_mask); + -+ if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) -+ alloc->imported.user_buf.dma_addrs = -+ (void *) (alloc->pages + nr_pages); ++ if (!(reg_reqs->flags & KBASE_REG_GPU_NX)) { ++ /* Can't end at 4GB boundary */ ++ if (0 == ((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB)) ++ start_pfn += align_offset; + -+ return alloc; -+} ++ /* Can't start at 4GB boundary */ ++ if (0 == (start_pfn & BASE_MEM_PFN_MASK_4GB)) ++ start_pfn += align_offset; + -+static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, -+ struct kbase_context *kctx, int group_id) -+{ -+ KBASE_DEBUG_ASSERT(reg); -+ KBASE_DEBUG_ASSERT(!reg->cpu_alloc); -+ KBASE_DEBUG_ASSERT(!reg->gpu_alloc); -+ KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE); ++ if (!((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB) || ++ !(start_pfn & BASE_MEM_PFN_MASK_4GB)) ++ continue; ++ } else if (reg_reqs->flags & ++ KBASE_REG_GPU_VA_SAME_4GB_PAGE) { ++ u64 end_pfn = start_pfn + nr_pages - 1; + -+ reg->cpu_alloc = kbase_alloc_create(kctx, reg->nr_pages, -+ KBASE_MEM_TYPE_NATIVE, group_id); -+ if (IS_ERR(reg->cpu_alloc)) -+ return PTR_ERR(reg->cpu_alloc); -+ else if (!reg->cpu_alloc) -+ return -ENOMEM; ++ if ((start_pfn & ~BASE_MEM_PFN_MASK_4GB) != ++ (end_pfn & ~BASE_MEM_PFN_MASK_4GB)) ++ start_pfn = end_pfn & ~BASE_MEM_PFN_MASK_4GB; ++ } + -+ reg->cpu_alloc->imported.native.kctx = kctx; -+ if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE) -+ && (reg->flags & KBASE_REG_CPU_CACHED)) { -+ reg->gpu_alloc = kbase_alloc_create(kctx, reg->nr_pages, -+ KBASE_MEM_TYPE_NATIVE, group_id); -+ if (IS_ERR_OR_NULL(reg->gpu_alloc)) { -+ kbase_mem_phy_alloc_put(reg->cpu_alloc); -+ return -ENOMEM; ++ if ((start_pfn >= reg->start_pfn) && ++ (start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) && ++ ((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) { ++ *out_start_pfn = start_pfn; ++ return reg; ++ } + } -+ reg->gpu_alloc->imported.native.kctx = kctx; -+ } else { -+ reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); + } + -+ mutex_lock(&kctx->jit_evict_lock); -+ INIT_LIST_HEAD(®->cpu_alloc->evict_node); -+ INIT_LIST_HEAD(®->gpu_alloc->evict_node); -+ mutex_unlock(&kctx->jit_evict_lock); -+ -+ reg->flags &= ~KBASE_REG_FREE; -+ -+ return 0; -+} -+ -+/* -+ * Max size for kbdev memory pool (in pages) -+ */ -+#define KBASE_MEM_POOL_MAX_SIZE_KBDEV (SZ_64M >> PAGE_SHIFT) -+ -+/* -+ * Max size for kctx memory pool (in pages) -+ */ -+#define KBASE_MEM_POOL_MAX_SIZE_KCTX (SZ_64M >> PAGE_SHIFT) -+ -+/* -+ * The order required for a 2MB page allocation (2^order * 4KB = 2MB) -+ */ -+#define KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER 9 -+ -+/* -+ * The order required for a 4KB page allocation -+ */ -+#define KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER 0 -+ -+/** -+ * kbase_mem_pool_config_set_max_size - Set maximum number of free pages in -+ * initial configuration of a memory pool -+ * -+ * @config: Initial configuration for a physical memory pool -+ * @max_size: Maximum number of free pages that a pool created from -+ * @config can hold -+ */ -+static inline void kbase_mem_pool_config_set_max_size( -+ struct kbase_mem_pool_config *const config, size_t const max_size) -+{ -+ WRITE_ONCE(config->max_size, max_size); ++ return NULL; +} + +/** -+ * kbase_mem_pool_config_get_max_size - Get maximum number of free pages from -+ * initial configuration of a memory pool ++ * kbase_remove_va_region - Remove a region object from the global list. + * -+ * @config: Initial configuration for a physical memory pool ++ * @kbdev: The kbase device ++ * @reg: Region object to remove + * -+ * Return: Maximum number of free pages that a pool created from @config -+ * can hold ++ * The region reg is removed, possibly by merging with other free and ++ * compatible adjacent regions. It must be called with the context ++ * region lock held. The associated memory is not released (see ++ * kbase_free_alloced_region). Internal use only. + */ -+static inline size_t kbase_mem_pool_config_get_max_size( -+ const struct kbase_mem_pool_config *const config) ++void kbase_remove_va_region(struct kbase_device *kbdev, ++ struct kbase_va_region *reg) +{ -+ return READ_ONCE(config->max_size); -+} ++ struct rb_node *rbprev; ++ struct kbase_va_region *prev = NULL; ++ struct rb_node *rbnext; ++ struct kbase_va_region *next = NULL; ++ struct rb_root *reg_rbtree = NULL; ++ struct kbase_va_region *orig_reg = reg; + -+/** -+ * kbase_mem_pool_init - Create a memory pool for a kbase device -+ * @pool: Memory pool to initialize -+ * @config: Initial configuration for the memory pool -+ * @order: Page order for physical page size (order=0=>4kB, order=9=>2MB) -+ * @group_id: A memory group ID to be passed to a platform-specific -+ * memory group manager, if present. -+ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). -+ * @kbdev: Kbase device where memory is used -+ * @next_pool: Pointer to the next pool or NULL. -+ * -+ * Allocations from @pool are in whole pages. Each @pool has a free list where -+ * pages can be quickly allocated from. The free list is initially empty and -+ * filled whenever pages are freed back to the pool. The number of free pages -+ * in the pool will in general not exceed @max_size, but the pool may in -+ * certain corner cases grow above @max_size. -+ * -+ * If @next_pool is not NULL, we will allocate from @next_pool before going to -+ * the memory group manager. Similarly pages can spill over to @next_pool when -+ * @pool is full. Pages are zeroed before they spill over to another pool, to -+ * prevent leaking information between applications. -+ * -+ * A shrinker is registered so that Linux mm can reclaim pages from the pool as -+ * needed. -+ * -+ * Return: 0 on success, negative -errno on error -+ */ -+int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool_config *config, -+ unsigned int order, int group_id, struct kbase_device *kbdev, -+ struct kbase_mem_pool *next_pool); ++ int merged_front = 0; ++ int merged_back = 0; + -+/** -+ * kbase_mem_pool_term - Destroy a memory pool -+ * @pool: Memory pool to destroy -+ * -+ * Pages in the pool will spill over to @next_pool (if available) or freed to -+ * the kernel. -+ */ -+void kbase_mem_pool_term(struct kbase_mem_pool *pool); ++ reg_rbtree = reg->rbtree; + -+/** -+ * kbase_mem_pool_alloc - Allocate a page from memory pool -+ * @pool: Memory pool to allocate from -+ * -+ * Allocations from the pool are made as follows: -+ * 1. If there are free pages in the pool, allocate a page from @pool. -+ * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page -+ * from @next_pool. -+ * 3. Return NULL if no memory in the pool -+ * -+ * Return: Pointer to allocated page, or NULL if allocation failed. -+ * -+ * Note : This function should not be used if the pool lock is held. Use -+ * kbase_mem_pool_alloc_locked() instead. -+ */ -+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool); ++ if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree))) ++ return; + -+/** -+ * kbase_mem_pool_alloc_locked - Allocate a page from memory pool -+ * @pool: Memory pool to allocate from -+ * -+ * If there are free pages in the pool, this function allocates a page from -+ * @pool. This function does not use @next_pool. -+ * -+ * Return: Pointer to allocated page, or NULL if allocation failed. -+ * -+ * Note : Caller must hold the pool lock. -+ */ -+struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool); ++ /* Try to merge with the previous block first */ ++ rbprev = rb_prev(&(reg->rblink)); ++ if (rbprev) { ++ prev = rb_entry(rbprev, struct kbase_va_region, rblink); ++ if (prev->flags & KBASE_REG_FREE) { ++ /* We're compatible with the previous VMA, merge with ++ * it, handling any gaps for robustness. ++ */ ++ u64 prev_end_pfn = prev->start_pfn + prev->nr_pages; + -+/** -+ * kbase_mem_pool_free - Free a page to memory pool -+ * @pool: Memory pool where page should be freed -+ * @page: Page to free to the pool -+ * @dirty: Whether some of the page may be dirty in the cache. -+ * -+ * Pages are freed to the pool as follows: -+ * 1. If @pool is not full, add @page to @pool. -+ * 2. Otherwise, if @next_pool is not NULL and not full, add @page to -+ * @next_pool. -+ * 3. Finally, free @page to the kernel. -+ * -+ * Note : This function should not be used if the pool lock is held. Use -+ * kbase_mem_pool_free_locked() instead. -+ */ -+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page, -+ bool dirty); ++ WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) != ++ (reg->flags & KBASE_REG_ZONE_MASK)); ++ if (!WARN_ON(reg->start_pfn < prev_end_pfn)) ++ prev->nr_pages += reg->start_pfn - prev_end_pfn; ++ prev->nr_pages += reg->nr_pages; ++ rb_erase(&(reg->rblink), reg_rbtree); ++ reg = prev; ++ merged_front = 1; ++ } ++ } + -+/** -+ * kbase_mem_pool_free_locked - Free a page to memory pool -+ * @pool: Memory pool where page should be freed -+ * @p: Page to free to the pool -+ * @dirty: Whether some of the page may be dirty in the cache. -+ * -+ * If @pool is not full, this function adds @page to @pool. Otherwise, @page is -+ * freed to the kernel. This function does not use @next_pool. -+ * -+ * Note : Caller must hold the pool lock. -+ */ -+void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, -+ bool dirty); ++ /* Try to merge with the next block second */ ++ /* Note we do the lookup here as the tree may have been rebalanced. */ ++ rbnext = rb_next(&(reg->rblink)); ++ if (rbnext) { ++ next = rb_entry(rbnext, struct kbase_va_region, rblink); ++ if (next->flags & KBASE_REG_FREE) { ++ /* We're compatible with the next VMA, merge with it, ++ * handling any gaps for robustness. ++ */ ++ u64 reg_end_pfn = reg->start_pfn + reg->nr_pages; + -+/** -+ * kbase_mem_pool_alloc_pages - Allocate pages from memory pool -+ * @pool: Memory pool to allocate from -+ * @nr_4k_pages: Number of pages to allocate -+ * @pages: Pointer to array where the physical address of the allocated -+ * pages will be stored. -+ * @partial_allowed: If fewer pages allocated is allowed -+ * @page_owner: Pointer to the task that created the Kbase context for which -+ * the pages are being allocated. It can be NULL if the pages -+ * won't be associated with any Kbase context. -+ * -+ * Like kbase_mem_pool_alloc() but optimized for allocating many pages. -+ * -+ * Return: -+ * On success number of pages allocated (could be less than nr_pages if -+ * partial_allowed). -+ * On error an error code. -+ * -+ * Note : This function should not be used if the pool lock is held. Use -+ * kbase_mem_pool_alloc_pages_locked() instead. -+ * -+ * The caller must not hold vm_lock, as this could cause a deadlock if -+ * the kernel OoM killer runs. If the caller must allocate pages while holding -+ * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead. -+ */ -+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, -+ struct tagged_addr *pages, bool partial_allowed, -+ struct task_struct *page_owner); ++ WARN_ON((next->flags & KBASE_REG_ZONE_MASK) != ++ (reg->flags & KBASE_REG_ZONE_MASK)); ++ if (!WARN_ON(next->start_pfn < reg_end_pfn)) ++ next->nr_pages += next->start_pfn - reg_end_pfn; ++ next->start_pfn = reg->start_pfn; ++ next->nr_pages += reg->nr_pages; ++ rb_erase(&(reg->rblink), reg_rbtree); ++ merged_back = 1; ++ } ++ } + -+/** -+ * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool -+ * @pool: Memory pool to allocate from -+ * @nr_4k_pages: Number of pages to allocate -+ * @pages: Pointer to array where the physical address of the allocated -+ * pages will be stored. -+ * -+ * Like kbase_mem_pool_alloc() but optimized for allocating many pages. This -+ * version does not allocate new pages from the kernel, and therefore will never -+ * trigger the OoM killer. Therefore, it can be run while the vm_lock is held. -+ * -+ * As new pages can not be allocated, the caller must ensure there are -+ * sufficient pages in the pool. Usage of this function should look like : -+ * -+ * kbase_gpu_vm_lock(kctx); -+ * kbase_mem_pool_lock(pool) -+ * while (kbase_mem_pool_size(pool) < pages_required) { -+ * kbase_mem_pool_unlock(pool) -+ * kbase_gpu_vm_unlock(kctx); -+ * kbase_mem_pool_grow(pool) -+ * kbase_gpu_vm_lock(kctx); -+ * kbase_mem_pool_lock(pool) -+ * } -+ * kbase_mem_pool_alloc_pages_locked(pool) -+ * kbase_mem_pool_unlock(pool) -+ * Perform other processing that requires vm_lock... -+ * kbase_gpu_vm_unlock(kctx); -+ * -+ * This ensures that the pool can be grown to the required size and that the -+ * allocation can complete without another thread using the newly grown pages. -+ * -+ * Return: -+ * On success number of pages allocated. -+ * On error an error code. -+ * -+ * Note : Caller must hold the pool lock. -+ */ -+int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, -+ size_t nr_4k_pages, struct tagged_addr *pages); ++ if (merged_front && merged_back) { ++ /* We already merged with prev, free it */ ++ kfree(reg); ++ } else if (!(merged_front || merged_back)) { ++ /* If we failed to merge then we need to add a new block */ + -+/** -+ * kbase_mem_pool_free_pages - Free pages to memory pool -+ * @pool: Memory pool where pages should be freed -+ * @nr_pages: Number of pages to free -+ * @pages: Pointer to array holding the physical addresses of the pages to -+ * free. -+ * @dirty: Whether any pages may be dirty in the cache. -+ * @reclaimed: Whether the pages where reclaimable and thus should bypass -+ * the pool and go straight to the kernel. -+ * -+ * Like kbase_mem_pool_free() but optimized for freeing many pages. -+ */ -+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, -+ struct tagged_addr *pages, bool dirty, bool reclaimed); ++ /* ++ * We didn't merge anything. Try to add a new free ++ * placeholder, and in any case, remove the original one. ++ */ ++ struct kbase_va_region *free_reg; + -+/** -+ * kbase_mem_pool_free_pages_locked - Free pages to memory pool -+ * @pool: Memory pool where pages should be freed -+ * @nr_pages: Number of pages to free -+ * @pages: Pointer to array holding the physical addresses of the pages to -+ * free. -+ * @dirty: Whether any pages may be dirty in the cache. -+ * @reclaimed: Whether the pages where reclaimable and thus should bypass -+ * the pool and go straight to the kernel. -+ * -+ * Like kbase_mem_pool_free() but optimized for freeing many pages. -+ */ -+void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, -+ size_t nr_pages, struct tagged_addr *pages, bool dirty, -+ bool reclaimed); ++ free_reg = kbase_alloc_free_region(kbdev, reg_rbtree, reg->start_pfn, reg->nr_pages, ++ reg->flags & KBASE_REG_ZONE_MASK); ++ if (!free_reg) { ++ /* In case of failure, we cannot allocate a replacement ++ * free region, so we will be left with a 'gap' in the ++ * region tracker's address range (though, the rbtree ++ * will itself still be correct after erasing ++ * 'reg'). ++ * ++ * The gap will be rectified when an adjacent region is ++ * removed by one of the above merging paths. Other ++ * paths will gracefully fail to allocate if they try ++ * to allocate in the gap. ++ * ++ * There is nothing that the caller can do, since free ++ * paths must not fail. The existing 'reg' cannot be ++ * repurposed as the free region as callers must have ++ * freedom of use with it by virtue of it being owned ++ * by them, not the region tracker insert/remove code. ++ */ ++ dev_warn( ++ kbdev->dev, ++ "Could not alloc a replacement free region for 0x%.16llx..0x%.16llx", ++ (unsigned long long)reg->start_pfn << PAGE_SHIFT, ++ (unsigned long long)(reg->start_pfn + reg->nr_pages) << PAGE_SHIFT); ++ rb_erase(&(reg->rblink), reg_rbtree); + -+/** -+ * kbase_mem_pool_size - Get number of free pages in memory pool -+ * @pool: Memory pool to inspect -+ * -+ * Note: the size of the pool may in certain corner cases exceed @max_size! -+ * -+ * Return: Number of free pages in the pool -+ */ -+static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool) -+{ -+ return READ_ONCE(pool->cur_size); -+} ++ goto out; ++ } ++ rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree); ++ } + -+/** -+ * kbase_mem_pool_max_size - Get maximum number of free pages in memory pool -+ * @pool: Memory pool to inspect -+ * -+ * Return: Maximum number of free pages in the pool -+ */ -+static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool) -+{ -+ return pool->max_size; -+} ++ /* This operation is always safe because the function never frees ++ * the region. If the region has been merged to both front and back, ++ * then it's the previous region that is supposed to be freed. ++ */ ++ orig_reg->start_pfn = 0; + ++out: ++ return; ++} + -+/** -+ * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool -+ * @pool: Memory pool to inspect -+ * @max_size: Maximum number of free pages the pool can hold -+ * -+ * If @max_size is reduced, the pool will be shrunk to adhere to the new limit. -+ * For details see kbase_mem_pool_shrink(). -+ */ -+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size); ++KBASE_EXPORT_TEST_API(kbase_remove_va_region); + +/** -+ * kbase_mem_pool_grow - Grow the pool -+ * @pool: Memory pool to grow -+ * @nr_to_grow: Number of pages to add to the pool -+ * @page_owner: Pointer to the task that created the Kbase context for which -+ * the memory pool is being grown. It can be NULL if the pages -+ * to be allocated won't be associated with any Kbase context. ++ * kbase_insert_va_region_nolock - Insert a VA region to the list, ++ * replacing the existing one. + * -+ * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to -+ * become larger than the maximum size specified. ++ * @kbdev: The kbase device ++ * @new_reg: The new region to insert ++ * @at_reg: The region to replace ++ * @start_pfn: The Page Frame Number to insert at ++ * @nr_pages: The number of pages of the region + * -+ * Return: 0 on success, -ENOMEM if unable to allocate sufficent pages ++ * Return: 0 on success, error code otherwise. + */ -+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow, -+ struct task_struct *page_owner); ++static int kbase_insert_va_region_nolock(struct kbase_device *kbdev, ++ struct kbase_va_region *new_reg, ++ struct kbase_va_region *at_reg, u64 start_pfn, ++ size_t nr_pages) ++{ ++ struct rb_root *reg_rbtree = NULL; ++ int err = 0; + -+/** -+ * kbase_mem_pool_trim - Grow or shrink the pool to a new size -+ * @pool: Memory pool to trim -+ * @new_size: New number of pages in the pool -+ * -+ * If @new_size > @cur_size, fill the pool with new pages from the kernel, but -+ * not above the max_size for the pool. -+ * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel. -+ */ -+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size); ++ reg_rbtree = at_reg->rbtree; + -+/** -+ * kbase_mem_pool_mark_dying - Mark that this pool is dying -+ * @pool: Memory pool -+ * -+ * This will cause any ongoing allocation operations (eg growing on page fault) -+ * to be terminated. -+ */ -+void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool); ++ /* Must be a free region */ ++ KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0); ++ /* start_pfn should be contained within at_reg */ ++ KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages)); ++ /* at least nr_pages from start_pfn should be contained within at_reg */ ++ KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages); ++ /* having at_reg means the rb_tree should not be empty */ ++ if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree))) ++ return -ENOMEM; + -+/** -+ * kbase_mem_alloc_page - Allocate a new page for a device -+ * @pool: Memory pool to allocate a page from -+ * -+ * Most uses should use kbase_mem_pool_alloc to allocate a page. However that -+ * function can fail in the event the pool is empty. -+ * -+ * Return: A new page or NULL if no memory -+ */ -+struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool); ++ new_reg->start_pfn = start_pfn; ++ new_reg->nr_pages = nr_pages; + -+/** -+ * kbase_mem_pool_free_page - Free a page from a memory pool. -+ * @pool: Memory pool to free a page from -+ * @p: Page to free -+ * -+ * This will free any associated data stored for the page and release -+ * the page back to the kernel. -+ */ -+void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p); ++ /* Regions are a whole use, so swap and delete old one. */ ++ if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) { ++ rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), ++ reg_rbtree); ++ kfree(at_reg); ++ } ++ /* New region replaces the start of the old one, so insert before. */ ++ else if (at_reg->start_pfn == start_pfn) { ++ at_reg->start_pfn += nr_pages; ++ KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages); ++ at_reg->nr_pages -= nr_pages; + -+/** -+ * kbase_region_tracker_init - Initialize the region tracker data structure -+ * @kctx: kbase context -+ * -+ * Return: 0 if success, negative error code otherwise. -+ */ -+int kbase_region_tracker_init(struct kbase_context *kctx); ++ kbase_region_tracker_insert(new_reg); ++ } ++ /* New region replaces the end of the old one, so insert after. */ ++ else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) { ++ at_reg->nr_pages -= nr_pages; + -+/** -+ * kbase_region_tracker_init_jit - Initialize the just-in-time memory -+ * allocation region -+ * @kctx: Kbase context. -+ * @jit_va_pages: Size of the JIT region in pages. -+ * @max_allocations: Maximum number of allocations allowed for the JIT region. -+ * Valid range is 0..%BASE_JIT_ALLOC_COUNT. -+ * @trim_level: Trim level for the JIT region. -+ * Valid range is 0..%BASE_JIT_MAX_TRIM_LEVEL. -+ * @group_id: The physical group ID from which to allocate JIT memory. -+ * Valid range is 0..(%MEMORY_GROUP_MANAGER_NR_GROUPS-1). -+ * @phys_pages_limit: Maximum number of physical pages to use to back the JIT -+ * region. Must not exceed @jit_va_pages. -+ * -+ * Return: 0 if success, negative error code otherwise. -+ */ -+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, -+ int max_allocations, int trim_level, int group_id, -+ u64 phys_pages_limit); ++ kbase_region_tracker_insert(new_reg); ++ } ++ /* New region splits the old one, so insert and create new */ ++ else { ++ struct kbase_va_region *new_front_reg; + -+/** -+ * kbase_region_tracker_init_exec - Initialize the GPU-executable memory region -+ * @kctx: kbase context -+ * @exec_va_pages: Size of the JIT region in pages. -+ * It must not be greater than 4 GB. -+ * -+ * Return: 0 if success, negative error code otherwise. -+ */ -+int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages); ++ new_front_reg = kbase_alloc_free_region(kbdev, reg_rbtree, at_reg->start_pfn, ++ start_pfn - at_reg->start_pfn, ++ at_reg->flags & KBASE_REG_ZONE_MASK); + -+/** -+ * kbase_region_tracker_term - Terminate the JIT region -+ * @kctx: kbase context -+ */ -+void kbase_region_tracker_term(struct kbase_context *kctx); ++ if (new_front_reg) { ++ at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages; ++ at_reg->start_pfn = start_pfn + nr_pages; + -+/** -+ * kbase_region_tracker_term_rbtree - Free memory for a region tracker -+ * -+ * @rbtree: Region tracker tree root -+ * -+ * This will free all the regions within the region tracker -+ */ -+void kbase_region_tracker_term_rbtree(struct rb_root *rbtree); ++ kbase_region_tracker_insert(new_front_reg); ++ kbase_region_tracker_insert(new_reg); ++ } else { ++ err = -ENOMEM; ++ } ++ } + -+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( -+ struct kbase_context *kctx, u64 gpu_addr); -+struct kbase_va_region *kbase_find_region_enclosing_address( -+ struct rb_root *rbtree, u64 gpu_addr); ++ return err; ++} + +/** -+ * kbase_region_tracker_find_region_base_address - Check that a pointer is -+ * actually a valid region. -+ * @kctx: kbase context containing the region -+ * @gpu_addr: pointer to check ++ * kbase_add_va_region - Add a VA region to the region list for a context. + * -+ * Must be called with context lock held. ++ * @kctx: kbase context containing the region ++ * @reg: the region to add ++ * @addr: the address to insert the region at ++ * @nr_pages: the number of pages in the region ++ * @align: the minimum alignment in pages + * -+ * Return: pointer to the valid region on success, NULL otherwise ++ * Return: 0 on success, error code otherwise. + */ -+struct kbase_va_region *kbase_region_tracker_find_region_base_address( -+ struct kbase_context *kctx, u64 gpu_addr); -+struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree, -+ u64 gpu_addr); ++int kbase_add_va_region(struct kbase_context *kctx, ++ struct kbase_va_region *reg, u64 addr, ++ size_t nr_pages, size_t align) ++{ ++ int err = 0; ++ struct kbase_device *kbdev = kctx->kbdev; ++ int cpu_va_bits = kbase_get_num_cpu_va_bits(kctx); ++ int gpu_pc_bits = ++ kbdev->gpu_props.props.core_props.log2_program_counter_size; + -+struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree, -+ u64 start_pfn, size_t nr_pages, int zone); -+void kbase_free_alloced_region(struct kbase_va_region *reg); -+int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, -+ u64 addr, size_t nr_pages, size_t align); -+int kbase_add_va_region_rbtree(struct kbase_device *kbdev, -+ struct kbase_va_region *reg, u64 addr, size_t nr_pages, -+ size_t align); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(reg != NULL); + -+bool kbase_check_alloc_flags(unsigned long flags); -+bool kbase_check_import_flags(unsigned long flags); ++ lockdep_assert_held(&kctx->reg_lock); + -+static inline bool kbase_import_size_is_valid(struct kbase_device *kbdev, u64 va_pages) -+{ -+ if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) { -+ dev_dbg( -+ kbdev->dev, -+ "Import attempted with va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!", -+ (unsigned long long)va_pages); -+ return false; ++ /* The executable allocation from the SAME_VA zone should already have an ++ * appropriately aligned GPU VA chosen for it. ++ * Also, executable allocations from EXEC_VA don't need the special ++ * alignment. ++ */ ++#if MALI_USE_CSF ++ /* The same is also true for the EXEC_FIXED_VA zone. ++ */ ++#endif ++ if (!(reg->flags & KBASE_REG_GPU_NX) && !addr && ++#if MALI_USE_CSF ++ ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_FIXED_VA) && ++#endif ++ ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_VA)) { ++ if (cpu_va_bits > gpu_pc_bits) { ++ align = max(align, (size_t)((1ULL << gpu_pc_bits) ++ >> PAGE_SHIFT)); ++ } + } + -+ return true; -+} ++ do { ++ err = kbase_add_va_region_rbtree(kbdev, reg, addr, nr_pages, ++ align); ++ if (err != -ENOMEM) ++ break; + -+static inline bool kbase_alias_size_is_valid(struct kbase_device *kbdev, u64 va_pages) -+{ -+ if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) { -+ dev_dbg( -+ kbdev->dev, -+ "Alias attempted with va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!", -+ (unsigned long long)va_pages); -+ return false; -+ } ++ /* ++ * If the allocation is not from the same zone as JIT ++ * then don't retry, we're out of VA and there is ++ * nothing which can be done about it. ++ */ ++ if ((reg->flags & KBASE_REG_ZONE_MASK) != ++ KBASE_REG_ZONE_CUSTOM_VA) ++ break; ++ } while (kbase_jit_evict(kctx)); + -+ return true; ++ return err; +} + -+/** -+ * kbase_check_alloc_sizes - check user space sizes parameters for an -+ * allocation -+ * -+ * @kctx: kbase context -+ * @flags: The flags passed from user space -+ * @va_pages: The size of the requested region, in pages. -+ * @commit_pages: Number of pages to commit initially. -+ * @extension: Number of pages to grow by on GPU page fault and/or alignment -+ * (depending on flags) -+ * -+ * Makes checks on the size parameters passed in from user space for a memory -+ * allocation call, with respect to the flags requested. -+ * -+ * Return: 0 if sizes are valid for these flags, negative error code otherwise -+ */ -+int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, -+ u64 va_pages, u64 commit_pages, u64 extension); ++KBASE_EXPORT_TEST_API(kbase_add_va_region); + +/** -+ * kbase_update_region_flags - Convert user space flags to kernel region flags ++ * kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree + * -+ * @kctx: kbase context -+ * @reg: The region to update the flags on -+ * @flags: The flags passed from user space ++ * @kbdev: The kbase device ++ * @reg: The region to add ++ * @addr: The address to add the region at, or 0 to map at any available address ++ * @nr_pages: The size of the region in pages ++ * @align: The minimum alignment in pages + * -+ * The user space flag BASE_MEM_COHERENT_SYSTEM_REQUIRED will be rejected and -+ * this function will fail if the system does not support system coherency. ++ * Insert a region into the rbtree that was specified when the region was ++ * created. If addr is 0 a free area in the rbtree is used, otherwise the ++ * specified address is used. + * -+ * Return: 0 if successful, -EINVAL if the flags are not supported ++ * Return: 0 on success, error code otherwise. + */ -+int kbase_update_region_flags(struct kbase_context *kctx, -+ struct kbase_va_region *reg, unsigned long flags); ++int kbase_add_va_region_rbtree(struct kbase_device *kbdev, ++ struct kbase_va_region *reg, ++ u64 addr, size_t nr_pages, size_t align) ++{ ++ struct device *const dev = kbdev->dev; ++ struct rb_root *rbtree = NULL; ++ struct kbase_va_region *tmp; ++ u64 gpu_pfn = addr >> PAGE_SHIFT; ++ int err = 0; + -+/** -+ * kbase_gpu_vm_lock() - Acquire the per-context region list lock -+ * @kctx: KBase context -+ * -+ * Care must be taken when making an allocation whilst holding this lock, because of interaction -+ * with the Kernel's OoM-killer and use of this lock in &vm_operations_struct close() handlers. -+ * -+ * If this lock is taken during a syscall, and/or the allocation is 'small' then it is safe to use. -+ * -+ * If the caller is not in a syscall, and the allocation is 'large', then it must not hold this -+ * lock. -+ * -+ * This is because the kernel OoM killer might target the process corresponding to that same kbase -+ * context, and attempt to call the context's close() handlers for its open VMAs. This is safe if -+ * the allocating caller is in a syscall, because the VMA close() handlers are delayed until all -+ * syscalls have finished (noting that no new syscalls can start as the remaining user threads will -+ * have been killed too), and so there is no possibility of contention between the thread -+ * allocating with this lock held, and the VMA close() handler. -+ * -+ * However, outside of a syscall (e.g. a kworker or other kthread), one of kbase's VMA close() -+ * handlers (kbase_cpu_vm_close()) also takes this lock, and so prevents the process from being -+ * killed until the caller of the function allocating memory has released this lock. On subsequent -+ * retries for allocating a page, the OoM killer would be re-invoked but skips over the process -+ * stuck in its close() handler. -+ * -+ * Also because the caller is not in a syscall, the page allocation code in the kernel is not aware -+ * that the allocation is being done on behalf of another process, and so does not realize that -+ * process has received a kill signal due to an OoM, and so will continually retry with the OoM -+ * killer until enough memory has been released, or until all other killable processes have been -+ * killed (at which point the kernel halts with a panic). -+ * -+ * However, if the allocation outside of a syscall is small enough to be satisfied by killing -+ * another process, then the allocation completes, the caller releases this lock, and -+ * kbase_cpu_vm_close() can unblock and allow the process to be killed. -+ * -+ * Hence, this is effectively a deadlock with kbase_cpu_vm_close(), except that if the memory -+ * allocation is small enough the deadlock can be resolved. For that reason, such a memory deadlock -+ * is NOT discovered with CONFIG_PROVE_LOCKING. -+ * -+ * If this may be called outside of a syscall, consider moving allocations outside of this lock, or -+ * use __GFP_NORETRY for such allocations (which will allow direct-reclaim attempts, but will -+ * prevent OoM kills to satisfy the allocation, and will just fail the allocation instead). -+ */ -+void kbase_gpu_vm_lock(struct kbase_context *kctx); ++ rbtree = reg->rbtree; + -+/** -+ * kbase_gpu_vm_unlock() - Release the per-context region list lock -+ * @kctx: KBase context -+ */ -+void kbase_gpu_vm_unlock(struct kbase_context *kctx); ++ if (!align) ++ align = 1; + -+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size); ++ /* must be a power of 2 */ ++ KBASE_DEBUG_ASSERT(is_power_of_2(align)); ++ KBASE_DEBUG_ASSERT(nr_pages > 0); + -+/** -+ * kbase_gpu_mmap - Register region and map it on the GPU. -+ * -+ * @kctx: kbase context containing the region -+ * @reg: the region to add -+ * @addr: the address to insert the region at -+ * @nr_pages: the number of pages in the region -+ * @align: the minimum alignment in pages -+ * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. -+ * -+ * Call kbase_add_va_region() and map the region on the GPU. -+ * -+ * Return: 0 on success, error code otherwise. -+ */ -+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, -+ u64 addr, size_t nr_pages, size_t align, -+ enum kbase_caller_mmu_sync_info mmu_sync_info); ++ /* Path 1: Map a specific address. Find the enclosing region, ++ * which *must* be free. ++ */ ++ if (gpu_pfn) { ++ KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1))); + -+/** -+ * kbase_gpu_munmap - Remove the region from the GPU and unregister it. -+ * -+ * @kctx: KBase context -+ * @reg: The region to remove -+ * -+ * Must be called with context lock held. -+ * -+ * Return: 0 on success, error code otherwise. -+ */ -+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg); ++ tmp = find_region_enclosing_range_rbtree(rbtree, gpu_pfn, ++ nr_pages); ++ if (kbase_is_region_invalid(tmp)) { ++ dev_warn(dev, "Enclosing region not found or invalid: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages); ++ err = -ENOMEM; ++ goto exit; ++ } else if (!kbase_is_region_free(tmp)) { ++ dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", ++ tmp->start_pfn, tmp->flags, ++ tmp->nr_pages, gpu_pfn, nr_pages); ++ err = -ENOMEM; ++ goto exit; ++ } + -+/** -+ * kbase_mmu_update - Configure an address space on the GPU to the specified -+ * MMU tables -+ * -+ * @kbdev: Kbase device structure -+ * @mmut: The set of MMU tables to be configured on the address space -+ * @as_nr: The address space to be configured -+ * -+ * The caller has the following locking conditions: -+ * - It must hold kbase_device->mmu_hw_mutex -+ * - It must hold the hwaccess_lock -+ */ -+void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, -+ int as_nr); ++ err = kbase_insert_va_region_nolock(kbdev, reg, tmp, gpu_pfn, nr_pages); ++ if (err) { ++ dev_warn(dev, "Failed to insert va region"); ++ err = -ENOMEM; ++ } ++ } else { ++ /* Path 2: Map any free address which meets the requirements. */ ++ u64 start_pfn; ++ size_t align_offset = align; ++ size_t align_mask = align - 1; + -+/** -+ * kbase_mmu_disable() - Disable the MMU for a previously active kbase context. -+ * @kctx: Kbase context -+ * -+ * Disable and perform the required cache maintenance to remove the all -+ * data from provided kbase context from the GPU caches. -+ * -+ * The caller has the following locking conditions: -+ * - It must hold kbase_device->mmu_hw_mutex -+ * - It must hold the hwaccess_lock -+ */ -+void kbase_mmu_disable(struct kbase_context *kctx); ++#if !MALI_USE_CSF ++ if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) { ++ WARN(align > 1, "%s with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory", ++ __func__, ++ (unsigned long)align); ++ align_mask = reg->extension - 1; ++ align_offset = reg->extension - reg->initial_commit; ++ } ++#endif /* !MALI_USE_CSF */ + -+/** -+ * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified -+ * address space. -+ * @kbdev: Kbase device -+ * @as_nr: The address space number to set to unmapped. -+ * -+ * This function must only be called during reset/power-up and it used to -+ * ensure the registers are in a known state. -+ * -+ * The caller must hold kbdev->mmu_hw_mutex. -+ */ -+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr); ++ tmp = kbase_region_tracker_find_region_meeting_reqs(reg, ++ nr_pages, align_offset, align_mask, ++ &start_pfn); ++ if (tmp) { ++ err = kbase_insert_va_region_nolock(kbdev, reg, tmp, start_pfn, nr_pages); ++ if (unlikely(err)) { ++ dev_warn(dev, "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages", ++ start_pfn, nr_pages); ++ } ++ } else { ++ dev_dbg(dev, "Failed to find a suitable region: %zu nr_pages, %zu align_offset, %zu align_mask\n", ++ nr_pages, align_offset, align_mask); ++ err = -ENOMEM; ++ } ++ } + -+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); ++exit: ++ return err; ++} + -+#if defined(CONFIG_MALI_VECTOR_DUMP) -+/** -+ * kbase_mmu_dump() - Dump the MMU tables to a buffer. -+ * -+ * @kctx: The kbase context to dump -+ * @nr_pages: The number of pages to allocate for the buffer. -+ * -+ * This function allocates a buffer (of @c nr_pages pages) to hold a dump -+ * of the MMU tables and fills it. If the buffer is too small -+ * then the return value will be NULL. -+ * -+ * The GPU vm lock must be held when calling this function. -+ * -+ * The buffer returned should be freed with @ref vfree when it is no longer -+ * required. -+ * -+ * Return: The address of the buffer containing the MMU dump or NULL on error -+ * (including if the @c nr_pages is too small) ++/* ++ * @brief Initialize the internal region tracker data structure. + */ -+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages); -+#endif ++#if MALI_USE_CSF ++static void kbase_region_tracker_ds_init(struct kbase_context *kctx, ++ struct kbase_va_region *same_va_reg, ++ struct kbase_va_region *custom_va_reg, ++ struct kbase_va_region *exec_va_reg, ++ struct kbase_va_region *exec_fixed_va_reg, ++ struct kbase_va_region *fixed_va_reg) ++{ ++ u64 last_zone_end_pfn; + -+/** -+ * kbase_sync_now - Perform cache maintenance on a memory region -+ * -+ * @kctx: The kbase context of the region -+ * @sset: A syncset structure describing the region and direction of the -+ * synchronisation required -+ * -+ * Return: 0 on success or error code -+ */ -+int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset); -+void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr cpu_pa, -+ struct tagged_addr gpu_pa, off_t offset, size_t size, -+ enum kbase_sync_type sync_fn); ++ kctx->reg_rbtree_same = RB_ROOT; ++ kbase_region_tracker_insert(same_va_reg); + -+/* OS specific functions */ -+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr); -+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg); -+void kbase_os_mem_map_lock(struct kbase_context *kctx); -+void kbase_os_mem_map_unlock(struct kbase_context *kctx); ++ last_zone_end_pfn = same_va_reg->start_pfn + same_va_reg->nr_pages; + -+/** -+ * kbasep_os_process_page_usage_update() - Update the memory allocation -+ * counters for the current process. -+ * -+ * @kctx: The kbase context -+ * @pages: The desired delta to apply to the memory usage counters. -+ * -+ * OS specific call to updates the current memory allocation counters -+ * for the current process with the supplied delta. -+ */ ++ /* Although custom_va_reg doesn't always exist, initialize ++ * unconditionally because of the mem_view debugfs ++ * implementation which relies on it being empty. ++ */ ++ kctx->reg_rbtree_custom = RB_ROOT; ++ kctx->reg_rbtree_exec = RB_ROOT; + -+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages); ++ if (custom_va_reg) { ++ WARN_ON(custom_va_reg->start_pfn < last_zone_end_pfn); ++ kbase_region_tracker_insert(custom_va_reg); ++ last_zone_end_pfn = custom_va_reg->start_pfn + custom_va_reg->nr_pages; ++ } + -+/** -+ * kbase_process_page_usage_inc() - Add to the memory allocation counters for -+ * the current process -+ * -+ * @kctx: The kernel base context used for the allocation. -+ * @pages: The desired delta to apply to the memory usage counters. -+ * -+ * OS specific call to add to the current memory allocation counters for -+ * the current process by the supplied amount. -+ */ ++ /* Initialize exec, fixed and exec_fixed. These are always ++ * initialized at this stage, if they will exist at all. ++ */ ++ kctx->reg_rbtree_fixed = RB_ROOT; ++ kctx->reg_rbtree_exec_fixed = RB_ROOT; + -+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages) -+{ -+ kbasep_os_process_page_usage_update(kctx, pages); -+} ++ if (exec_va_reg) { ++ WARN_ON(exec_va_reg->start_pfn < last_zone_end_pfn); ++ kbase_region_tracker_insert(exec_va_reg); ++ last_zone_end_pfn = exec_va_reg->start_pfn + exec_va_reg->nr_pages; ++ } + -+/** -+ * kbase_process_page_usage_dec() - Subtract from the memory allocation -+ * counters for the current process. -+ * -+ * @kctx: The kernel base context used for the allocation. -+ * @pages: The desired delta to apply to the memory usage counters. -+ * -+ * OS specific call to subtract from the current memory allocation counters -+ * for the current process by the supplied amount. -+ */ ++ if (exec_fixed_va_reg) { ++ WARN_ON(exec_fixed_va_reg->start_pfn < last_zone_end_pfn); ++ kbase_region_tracker_insert(exec_fixed_va_reg); ++ last_zone_end_pfn = exec_fixed_va_reg->start_pfn + exec_fixed_va_reg->nr_pages; ++ } + -+static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages) -+{ -+ kbasep_os_process_page_usage_update(kctx, 0 - pages); ++ if (fixed_va_reg) { ++ WARN_ON(fixed_va_reg->start_pfn < last_zone_end_pfn); ++ kbase_region_tracker_insert(fixed_va_reg); ++ last_zone_end_pfn = fixed_va_reg->start_pfn + fixed_va_reg->nr_pages; ++ } +} ++#else ++static void kbase_region_tracker_ds_init(struct kbase_context *kctx, ++ struct kbase_va_region *same_va_reg, ++ struct kbase_va_region *custom_va_reg) ++{ ++ kctx->reg_rbtree_same = RB_ROOT; ++ kbase_region_tracker_insert(same_va_reg); + -+/** -+ * kbasep_find_enclosing_cpu_mapping_offset() - Find the offset of the CPU -+ * mapping of a memory allocation containing a given address range -+ * -+ * @kctx: The kernel base context used for the allocation. -+ * @uaddr: Start of the CPU virtual address range. -+ * @size: Size of the CPU virtual address range (in bytes). -+ * @offset: The offset from the start of the allocation to the specified CPU -+ * virtual address. -+ * -+ * Searches for a CPU mapping of any part of any region that fully encloses the -+ * CPU virtual address range specified by @uaddr and @size. Returns a failure -+ * indication if only part of the address range lies within a CPU mapping. -+ * -+ * Return: 0 if offset was obtained successfully. Error code otherwise. -+ */ -+int kbasep_find_enclosing_cpu_mapping_offset( -+ struct kbase_context *kctx, -+ unsigned long uaddr, size_t size, u64 *offset); -+ -+/** -+ * kbasep_find_enclosing_gpu_mapping_start_and_offset() - Find the address of -+ * the start of GPU virtual memory region which encloses @gpu_addr for the -+ * @size length in bytes -+ * -+ * @kctx: The kernel base context within which the memory is searched. -+ * @gpu_addr: GPU virtual address for which the region is sought; defines -+ * the beginning of the provided region. -+ * @size: The length (in bytes) of the provided region for which the -+ * GPU virtual memory region is sought. -+ * @start: Pointer to the location where the address of the start of -+ * the found GPU virtual memory region is. -+ * @offset: Pointer to the location where the offset of @gpu_addr into -+ * the found GPU virtual memory region is. -+ * -+ * Searches for the memory region in GPU virtual memory space which contains -+ * the region defined by the @gpu_addr and @size, where @gpu_addr is the -+ * beginning and @size the length in bytes of the provided region. If found, -+ * the location of the start address of the GPU virtual memory region is -+ * passed in @start pointer and the location of the offset of the region into -+ * the GPU virtual memory region is passed in @offset pointer. -+ * -+ * Return: 0 on success, error code otherwise. -+ */ -+int kbasep_find_enclosing_gpu_mapping_start_and_offset( -+ struct kbase_context *kctx, -+ u64 gpu_addr, size_t size, u64 *start, u64 *offset); -+ -+/** -+ * kbase_alloc_phy_pages_helper - Allocates physical pages. -+ * @alloc: allocation object to add pages to -+ * @nr_pages_requested: number of physical pages to allocate -+ * -+ * Allocates @nr_pages_requested and updates the alloc object. -+ * -+ * Note: if kbase_gpu_vm_lock() is to be held around this function to ensure thread-safe updating -+ * of @alloc, then refer to the documentation of kbase_gpu_vm_lock() about the requirements of -+ * either calling during a syscall, or ensuring the allocation is small. These requirements prevent -+ * an effective deadlock between the kernel's OoM killer and kbase's VMA close() handlers, which -+ * could take kbase_gpu_vm_lock() too. -+ * -+ * If the requirements of kbase_gpu_vm_lock() cannot be satisfied when calling this function, but -+ * @alloc must still be updated in a thread-safe way, then instead use -+ * kbase_alloc_phy_pages_helper_locked() and restructure callers into the sequence outlined there. -+ * -+ * This function cannot be used from interrupt context -+ * -+ * Return: 0 if all pages have been successfully allocated. Error code otherwise -+ */ -+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, -+ size_t nr_pages_requested); -+ -+/** -+ * kbase_alloc_phy_pages_helper_locked - Allocates physical pages. -+ * @alloc: allocation object to add pages to -+ * @pool: Memory pool to allocate from -+ * @nr_pages_requested: number of physical pages to allocate -+ * -+ * @prealloc_sa: Information about the partial allocation if the amount of memory requested -+ * is not a multiple of 2MB. One instance of struct kbase_sub_alloc must be -+ * allocated by the caller if kbdev->pagesize_2mb is enabled. -+ * -+ * Allocates @nr_pages_requested and updates the alloc object. This function does not allocate new -+ * pages from the kernel, and therefore will never trigger the OoM killer. Therefore, it can be -+ * called whilst a thread operating outside of a syscall has held the region list lock -+ * (kbase_gpu_vm_lock()), as it will not cause an effective deadlock with VMA close() handlers used -+ * by the OoM killer. -+ * -+ * As new pages can not be allocated, the caller must ensure there are sufficient pages in the -+ * pool. Usage of this function should look like : -+ * -+ * kbase_gpu_vm_lock(kctx); -+ * kbase_mem_pool_lock(pool) -+ * while (kbase_mem_pool_size(pool) < pages_required) { -+ * kbase_mem_pool_unlock(pool) -+ * kbase_gpu_vm_unlock(kctx); -+ * kbase_mem_pool_grow(pool) -+ * kbase_gpu_vm_lock(kctx); -+ * kbase_mem_pool_lock(pool) -+ * } -+ * kbase_alloc_phy_pages_helper_locked(pool) -+ * kbase_mem_pool_unlock(pool) -+ * // Perform other processing that requires vm_lock... -+ * kbase_gpu_vm_unlock(kctx); -+ * -+ * This ensures that the pool can be grown to the required size and that the allocation can -+ * complete without another thread using the newly grown pages. -+ * -+ * If kbdev->pagesize_2mb is enabled and the allocation is >= 2MB, then @pool must be one of the -+ * pools from alloc->imported.native.kctx->mem_pools.large[]. Otherwise it must be one of the -+ * mempools from alloc->imported.native.kctx->mem_pools.small[]. -+ * -+ * @prealloc_sa is used to manage the non-2MB sub-allocation. It has to be pre-allocated because we -+ * must not sleep (due to the usage of kmalloc()) whilst holding pool->pool_lock. @prealloc_sa -+ * shall be set to NULL if it has been consumed by this function to indicate that the caller no -+ * longer owns it and should not access it further. -+ * -+ * Note: Caller must hold @pool->pool_lock -+ * -+ * Return: Pointer to array of allocated pages. NULL on failure. -+ */ -+struct tagged_addr *kbase_alloc_phy_pages_helper_locked( -+ struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, -+ size_t nr_pages_requested, -+ struct kbase_sub_alloc **prealloc_sa); -+ -+/** -+ * kbase_free_phy_pages_helper() - Free physical pages. -+ * -+ * @alloc: allocation object to free pages from -+ * @nr_pages_to_free: number of physical pages to free -+ * -+ * Free @nr_pages_to_free pages and updates the alloc object. -+ * -+ * Return: 0 on success, otherwise a negative error code -+ */ -+int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free); ++ /* Although custom_va_reg and exec_va_reg don't always exist, ++ * initialize unconditionally because of the mem_view debugfs ++ * implementation which relies on them being empty. ++ * ++ * The difference between the two is that the EXEC_VA region ++ * is never initialized at this stage. ++ */ ++ kctx->reg_rbtree_custom = RB_ROOT; ++ kctx->reg_rbtree_exec = RB_ROOT; + -+/** -+ * kbase_free_phy_pages_helper_locked - Free pages allocated with -+ * kbase_alloc_phy_pages_helper_locked() -+ * @alloc: Allocation object to free pages from -+ * @pool: Memory pool to return freed pages to -+ * @pages: Pages allocated by kbase_alloc_phy_pages_helper_locked() -+ * @nr_pages_to_free: Number of physical pages to free -+ * -+ * This function atomically frees pages allocated with -+ * kbase_alloc_phy_pages_helper_locked(). @pages is the pointer to the page -+ * array that is returned by that function. @pool must be the pool that the -+ * pages were originally allocated from. -+ * -+ * If the mem_pool has been unlocked since the allocation then -+ * kbase_free_phy_pages_helper() should be used instead. -+ */ -+void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, -+ struct kbase_mem_pool *pool, struct tagged_addr *pages, -+ size_t nr_pages_to_free); ++ if (custom_va_reg) ++ kbase_region_tracker_insert(custom_va_reg); ++} ++#endif /* MALI_USE_CSF */ + -+static inline void kbase_set_dma_addr_as_priv(struct page *p, dma_addr_t dma_addr) ++static struct kbase_context *kbase_reg_flags_to_kctx(struct kbase_va_region *reg) +{ -+ SetPagePrivate(p); -+ if (sizeof(dma_addr_t) > sizeof(p->private)) { -+ /* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the -+ * private field stays the same. So we have to be clever and -+ * use the fact that we only store DMA addresses of whole pages, -+ * so the low bits should be zero -+ */ -+ KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1))); -+ set_page_private(p, dma_addr >> PAGE_SHIFT); -+ } else { -+ set_page_private(p, dma_addr); ++ struct kbase_context *kctx = NULL; ++ struct rb_root *rbtree = reg->rbtree; ++ ++ switch (reg->flags & KBASE_REG_ZONE_MASK) { ++ case KBASE_REG_ZONE_CUSTOM_VA: ++ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_custom); ++ break; ++ case KBASE_REG_ZONE_SAME_VA: ++ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_same); ++ break; ++ case KBASE_REG_ZONE_EXEC_VA: ++ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec); ++ break; ++#if MALI_USE_CSF ++ case KBASE_REG_ZONE_EXEC_FIXED_VA: ++ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed); ++ break; ++ case KBASE_REG_ZONE_FIXED_VA: ++ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed); ++ break; ++ case KBASE_REG_ZONE_MCU_SHARED: ++ /* This is only expected to be called on driver unload. */ ++ break; ++#endif ++ default: ++ WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); ++ break; + } ++ ++ return kctx; +} + -+static inline dma_addr_t kbase_dma_addr_as_priv(struct page *p) ++static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) +{ -+ if (sizeof(dma_addr_t) > sizeof(p->private)) -+ return ((dma_addr_t)page_private(p)) << PAGE_SHIFT; ++ struct rb_node *rbnode; ++ struct kbase_va_region *reg; + -+ return (dma_addr_t)page_private(p); ++ do { ++ rbnode = rb_first(rbtree); ++ if (rbnode) { ++ rb_erase(rbnode, rbtree); ++ reg = rb_entry(rbnode, struct kbase_va_region, rblink); ++ WARN_ON(kbase_refcount_read(®->va_refcnt) != 1); ++ if (kbase_page_migration_enabled) ++ kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg); ++ /* Reset the start_pfn - as the rbtree is being ++ * destroyed and we've already erased this region, there ++ * is no further need to attempt to remove it. ++ * This won't affect the cleanup if the region was ++ * being used as a sticky resource as the cleanup ++ * related to sticky resources anyways need to be ++ * performed before the term of region tracker. ++ */ ++ reg->start_pfn = 0; ++ kbase_free_alloced_region(reg); ++ } ++ } while (rbnode); +} + -+static inline void kbase_clear_dma_addr_as_priv(struct page *p) ++void kbase_region_tracker_term(struct kbase_context *kctx) +{ -+ ClearPagePrivate(p); ++ WARN(kctx->as_nr != KBASEP_AS_NR_INVALID, ++ "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions", ++ kctx->tgid, kctx->id); ++ ++ kbase_gpu_vm_lock(kctx); ++ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); ++ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); ++ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); ++#if MALI_USE_CSF ++ WARN_ON(!list_empty(&kctx->csf.event_pages_head)); ++ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec_fixed); ++ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_fixed); ++ ++#endif ++ kbase_gpu_vm_unlock(kctx); +} + -+static inline struct kbase_page_metadata *kbase_page_private(struct page *p) ++void kbase_region_tracker_term_rbtree(struct rb_root *rbtree) +{ -+ return (struct kbase_page_metadata *)page_private(p); ++ kbase_region_tracker_erase_rbtree(rbtree); +} + -+static inline dma_addr_t kbase_dma_addr(struct page *p) ++static size_t kbase_get_same_va_bits(struct kbase_context *kctx) +{ -+ if (kbase_page_migration_enabled) -+ return kbase_page_private(p)->dma_addr; -+ -+ return kbase_dma_addr_as_priv(p); ++ return min_t(size_t, kbase_get_num_cpu_va_bits(kctx), ++ kctx->kbdev->gpu_props.mmu.va_bits); +} + -+static inline dma_addr_t kbase_dma_addr_from_tagged(struct tagged_addr tagged_pa) ++int kbase_region_tracker_init(struct kbase_context *kctx) +{ -+ phys_addr_t pa = as_phys_addr_t(tagged_pa); -+ struct page *page = pfn_to_page(PFN_DOWN(pa)); -+ dma_addr_t dma_addr = -+ is_huge(tagged_pa) ? kbase_dma_addr_as_priv(page) : kbase_dma_addr(page); ++ struct kbase_va_region *same_va_reg; ++ struct kbase_va_region *custom_va_reg = NULL; ++ size_t same_va_bits = kbase_get_same_va_bits(kctx); ++ u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; ++ u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits; ++ u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT; ++ u64 same_va_pages; ++ u64 same_va_base = 1u; ++ int err; ++#if MALI_USE_CSF ++ struct kbase_va_region *exec_va_reg; ++ struct kbase_va_region *exec_fixed_va_reg; ++ struct kbase_va_region *fixed_va_reg; + -+ return dma_addr; -+} ++ u64 exec_va_base; ++ u64 fixed_va_end; ++ u64 exec_fixed_va_base; ++ u64 fixed_va_base; ++ u64 fixed_va_pages; ++#endif + -+/** -+ * kbase_flush_mmu_wqs() - Flush MMU workqueues. -+ * @kbdev: Device pointer. -+ * -+ * This function will cause any outstanding page or bus faults to be processed. -+ * It should be called prior to powering off the GPU. -+ */ -+void kbase_flush_mmu_wqs(struct kbase_device *kbdev); ++ /* Take the lock as kbase_free_alloced_region requires it */ ++ kbase_gpu_vm_lock(kctx); + -+/** -+ * kbase_sync_single_for_device - update physical memory and give GPU ownership -+ * @kbdev: Device pointer -+ * @handle: DMA address of region -+ * @size: Size of region to sync -+ * @dir: DMA data direction -+ */ -+ -+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, -+ size_t size, enum dma_data_direction dir); -+ -+/** -+ * kbase_sync_single_for_cpu - update physical memory and give CPU ownership -+ * @kbdev: Device pointer -+ * @handle: DMA address of region -+ * @size: Size of region to sync -+ * @dir: DMA data direction -+ */ -+ -+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, -+ size_t size, enum dma_data_direction dir); -+ -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+/** -+ * kbase_jit_debugfs_init - Add per context debugfs entry for JIT. -+ * @kctx: kbase context -+ */ -+void kbase_jit_debugfs_init(struct kbase_context *kctx); -+#endif /* CONFIG_DEBUG_FS */ -+ -+/** -+ * kbase_jit_init - Initialize the JIT memory pool management -+ * @kctx: kbase context -+ * -+ * Return: zero on success or negative error number on failure. -+ */ -+int kbase_jit_init(struct kbase_context *kctx); ++ same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - same_va_base; + -+/** -+ * kbase_jit_allocate - Allocate JIT memory -+ * @kctx: kbase context -+ * @info: JIT allocation information -+ * @ignore_pressure_limit: Whether the JIT memory pressure limit is ignored -+ * -+ * Return: JIT allocation on success or NULL on failure. -+ */ -+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, -+ const struct base_jit_alloc_info *info, -+ bool ignore_pressure_limit); ++#if MALI_USE_CSF ++ if ((same_va_base + same_va_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) { ++ /* Depending on how the kernel is configured, it's possible (eg on aarch64) for ++ * same_va_bits to reach 48 bits. Cap same_va_pages so that the same_va zone ++ * doesn't cross into the exec_va zone. ++ */ ++ same_va_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - same_va_base; ++ } ++#endif + -+/** -+ * kbase_jit_free - Free a JIT allocation -+ * @kctx: kbase context -+ * @reg: JIT allocation -+ * -+ * Frees a JIT allocation and places it into the free pool for later reuse. -+ */ -+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg); ++ /* all have SAME_VA */ ++ same_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, same_va_base, ++ same_va_pages, KBASE_REG_ZONE_SAME_VA); + -+/** -+ * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing -+ * @reg: JIT allocation -+ */ -+void kbase_jit_backing_lost(struct kbase_va_region *reg); ++ if (!same_va_reg) { ++ err = -ENOMEM; ++ goto fail_unlock; ++ } ++ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base, ++ same_va_pages); + -+/** -+ * kbase_jit_evict - Evict a JIT allocation from the pool -+ * @kctx: kbase context -+ * -+ * Evict the least recently used JIT allocation from the pool. This can be -+ * required if normal VA allocations are failing due to VA exhaustion. -+ * -+ * Return: True if a JIT allocation was freed, false otherwise. -+ */ -+bool kbase_jit_evict(struct kbase_context *kctx); ++ if (kbase_ctx_compat_mode(kctx)) { ++ if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { ++ err = -EINVAL; ++ goto fail_free_same_va; ++ } ++ /* If the current size of TMEM is out of range of the ++ * virtual address space addressable by the MMU then ++ * we should shrink it to fit ++ */ ++ if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit) ++ custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; + -+/** -+ * kbase_jit_term - Terminate the JIT memory pool management -+ * @kctx: kbase context -+ */ -+void kbase_jit_term(struct kbase_context *kctx); ++ custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, ++ KBASE_REG_ZONE_CUSTOM_VA_BASE, ++ custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); + -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+/** -+ * kbase_trace_jit_report_gpu_mem_trace_enabled - variant of -+ * kbase_trace_jit_report_gpu_mem() that should only be called once the -+ * corresponding tracepoint is verified to be enabled -+ * @kctx: kbase context -+ * @reg: Just-in-time memory region to trace -+ * @flags: combination of values from enum kbase_jit_report_flags -+ */ -+void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, -+ struct kbase_va_region *reg, unsigned int flags); -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ if (!custom_va_reg) { ++ err = -ENOMEM; ++ goto fail_free_same_va; ++ } ++ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, ++ KBASE_REG_ZONE_CUSTOM_VA_BASE, ++ custom_va_size); ++ } else { ++ custom_va_size = 0; ++ } + -+/** -+ * kbase_trace_jit_report_gpu_mem - Trace information about the GPU memory used -+ * to make a JIT report -+ * @kctx: kbase context -+ * @reg: Just-in-time memory region to trace -+ * @flags: combination of values from enum kbase_jit_report_flags -+ * -+ * Information is traced using the trace_mali_jit_report_gpu_mem() tracepoint. -+ * -+ * In case that tracepoint is not enabled, this function should have the same -+ * low overheads as a tracepoint itself (i.e. use of 'jump labels' to avoid -+ * conditional branches) -+ * -+ * This can take the reg_lock on @kctx, do not use in places where this lock is -+ * already held. -+ * -+ * Note: this has to be a macro because at this stage the tracepoints have not -+ * been included. Also gives no opportunity for the compiler to mess up -+ * inlining it. -+ */ -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ -+ do { \ -+ if (trace_mali_jit_report_gpu_mem_enabled()) \ -+ kbase_trace_jit_report_gpu_mem_trace_enabled( \ -+ (kctx), (reg), (flags)); \ -+ } while (0) -+#else -+#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ -+ CSTD_NOP(kctx, reg, flags) -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++#if MALI_USE_CSF ++ /* The position of EXEC_VA depends on whether the client is 32-bit or 64-bit. */ ++ exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_64; + -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+/** -+ * kbase_jit_report_update_pressure - safely update the JIT physical page -+ * pressure and JIT region's estimate of used_pages -+ * @kctx: kbase context, to update the current physical pressure -+ * @reg: Just-in-time memory region to update with @new_used_pages -+ * @new_used_pages: new value of number of pages used in the JIT region -+ * @flags: combination of values from enum kbase_jit_report_flags -+ * -+ * Takes care of: -+ * - correctly updating the pressure given the current reg->used_pages and -+ * new_used_pages -+ * - then updating the %kbase_va_region used_pages member -+ * -+ * Precondition: -+ * - new_used_pages <= reg->nr_pages -+ */ -+void kbase_jit_report_update_pressure(struct kbase_context *kctx, -+ struct kbase_va_region *reg, u64 new_used_pages, -+ unsigned int flags); ++ /* Similarly the end of the FIXED_VA zone also depends on whether the client ++ * is 32 or 64-bits. ++ */ ++ fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64; + -+/** -+ * kbase_jit_trim_necessary_pages() - calculate and trim the least pages -+ * possible to satisfy a new JIT allocation -+ * -+ * @kctx: Pointer to the kbase context -+ * @needed_pages: Number of JIT physical pages by which trimming is requested. -+ * The actual number of pages trimmed could differ. -+ * -+ * Before allocating a new just-in-time memory region or reusing a previous -+ * one, ensure that the total JIT physical page usage also will not exceed the -+ * pressure limit. -+ * -+ * If there are no reported-on allocations, then we already guarantee this will -+ * be the case - because our current pressure then only comes from the va_pages -+ * of each JIT region, hence JIT physical page usage is guaranteed to be -+ * bounded by this. -+ * -+ * However as soon as JIT allocations become "reported on", the pressure is -+ * lowered to allow new JIT regions to be allocated. It is after such a point -+ * that the total JIT physical page usage could (either now or in the future on -+ * a grow-on-GPU-page-fault) exceed the pressure limit, but only on newly -+ * allocated JIT regions. Hence, trim any "reported on" regions. -+ * -+ * Any pages freed will go into the pool and be allocated from there in -+ * kbase_mem_alloc(). -+ */ -+void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, -+ size_t needed_pages); ++ if (kbase_ctx_compat_mode(kctx)) { ++ exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_32; ++ fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32; ++ } + -+/* -+ * Same as kbase_jit_request_phys_increase(), except that Caller is supposed -+ * to take jit_evict_lock also on @kctx before calling this function. -+ */ -+static inline void -+kbase_jit_request_phys_increase_locked(struct kbase_context *kctx, -+ size_t needed_pages) -+{ -+#if !MALI_USE_CSF -+ lockdep_assert_held(&kctx->jctx.lock); -+#endif /* !MALI_USE_CSF */ -+ lockdep_assert_held(&kctx->reg_lock); -+ lockdep_assert_held(&kctx->jit_evict_lock); ++ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, exec_va_base, ++ KBASE_REG_ZONE_EXEC_VA_SIZE); + -+ kctx->jit_phys_pages_to_be_allocated += needed_pages; ++ exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_base, ++ KBASE_REG_ZONE_EXEC_VA_SIZE, KBASE_REG_ZONE_EXEC_VA); + -+ kbase_jit_trim_necessary_pages(kctx, -+ kctx->jit_phys_pages_to_be_allocated); -+} ++ if (!exec_va_reg) { ++ err = -ENOMEM; ++ goto fail_free_custom_va; ++ } + -+/** -+ * kbase_jit_request_phys_increase() - Increment the backing pages count and do -+ * the required trimming before allocating pages for a JIT allocation. -+ * -+ * @kctx: Pointer to the kbase context -+ * @needed_pages: Number of pages to be allocated for the JIT allocation. -+ * -+ * This function needs to be called before allocating backing pages for a -+ * just-in-time memory region. The backing pages are currently allocated when, -+ * -+ * - A new JIT region is created. -+ * - An old JIT region is reused from the cached pool. -+ * - GPU page fault occurs for the active JIT region. -+ * - Backing is grown for the JIT region through the commit ioctl. -+ * -+ * This function would ensure that the total JIT physical page usage does not -+ * exceed the pressure limit even when the backing pages get allocated -+ * simultaneously for multiple JIT allocations from different threads. -+ * -+ * There should be a matching call to kbase_jit_done_phys_increase(), after -+ * the pages have been allocated and accounted against the active JIT -+ * allocation. -+ * -+ * Caller is supposed to take reg_lock on @kctx before calling this function. -+ */ -+static inline void kbase_jit_request_phys_increase(struct kbase_context *kctx, -+ size_t needed_pages) -+{ -+#if !MALI_USE_CSF -+ lockdep_assert_held(&kctx->jctx.lock); -+#endif /* !MALI_USE_CSF */ -+ lockdep_assert_held(&kctx->reg_lock); ++ exec_fixed_va_base = exec_va_base + KBASE_REG_ZONE_EXEC_VA_SIZE; + -+ mutex_lock(&kctx->jit_evict_lock); -+ kbase_jit_request_phys_increase_locked(kctx, needed_pages); -+ mutex_unlock(&kctx->jit_evict_lock); -+} ++ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA, exec_fixed_va_base, ++ KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE); + -+/** -+ * kbase_jit_done_phys_increase() - Decrement the backing pages count after the -+ * allocation of pages for a JIT allocation. -+ * -+ * @kctx: Pointer to the kbase context -+ * @needed_pages: Number of pages that were allocated for the JIT allocation. -+ * -+ * This function should be called after backing pages have been allocated and -+ * accounted against the active JIT allocation. -+ * The call should be made when the following have been satisfied: -+ * when the allocation is on the jit_active_head. -+ * when additional needed_pages have been allocated. -+ * kctx->reg_lock was held during the above and has not yet been unlocked. -+ * Failure to call this function before unlocking the kctx->reg_lock when -+ * either the above have changed may result in over-accounting the memory. -+ * This ensures kbase_jit_trim_necessary_pages() gets a consistent count of -+ * the memory. -+ * -+ * A matching call to kbase_jit_request_phys_increase() should have been made, -+ * before the allocation of backing pages. -+ * -+ * Caller is supposed to take reg_lock on @kctx before calling this function. -+ */ -+static inline void kbase_jit_done_phys_increase(struct kbase_context *kctx, -+ size_t needed_pages) -+{ -+ lockdep_assert_held(&kctx->reg_lock); ++ exec_fixed_va_reg = ++ kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec_fixed, ++ exec_fixed_va_base, KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE, ++ KBASE_REG_ZONE_EXEC_FIXED_VA); + -+ WARN_ON(kctx->jit_phys_pages_to_be_allocated < needed_pages); ++ if (!exec_fixed_va_reg) { ++ err = -ENOMEM; ++ goto fail_free_exec_va; ++ } + -+ kctx->jit_phys_pages_to_be_allocated -= needed_pages; -+} -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ fixed_va_base = exec_fixed_va_base + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE; ++ fixed_va_pages = fixed_va_end - fixed_va_base; + -+/** -+ * kbase_has_exec_va_zone - EXEC_VA zone predicate -+ * -+ * @kctx: kbase context -+ * -+ * Determine whether an EXEC_VA zone has been created for the GPU address space -+ * of the given kbase context. -+ * -+ * Return: True if the kbase context has an EXEC_VA zone. -+ */ -+bool kbase_has_exec_va_zone(struct kbase_context *kctx); ++ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_FIXED_VA, fixed_va_base, fixed_va_pages); + -+/** -+ * kbase_map_external_resource - Map an external resource to the GPU. -+ * @kctx: kbase context. -+ * @reg: External resource to map. -+ * @locked_mm: The mm_struct which has been locked for this operation. -+ * -+ * On successful mapping, the VA region and the gpu_alloc refcounts will be -+ * increased, making it safe to use and store both values directly. -+ * -+ * Return: Zero on success, or negative error code. -+ */ -+int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg, -+ struct mm_struct *locked_mm); ++ fixed_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_fixed, fixed_va_base, ++ fixed_va_pages, KBASE_REG_ZONE_FIXED_VA); + -+/** -+ * kbase_unmap_external_resource - Unmap an external resource from the GPU. -+ * @kctx: kbase context. -+ * @reg: VA region corresponding to external resource -+ * -+ * On successful unmapping, the VA region and the gpu_alloc refcounts will -+ * be decreased. If the refcount reaches zero, both @reg and the corresponding -+ * allocation may be freed, so using them after returning from this function -+ * requires the caller to explicitly check their state. -+ */ -+void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg); ++ kctx->gpu_va_end = fixed_va_end; + -+/** -+ * kbase_jd_user_buf_pin_pages - Pin the pages of a user buffer. -+ * @kctx: kbase context. -+ * @reg: The region associated with the imported user buffer. -+ * -+ * To successfully pin the pages for a user buffer the current mm_struct must -+ * be the same as the mm_struct of the user buffer. After successfully pinning -+ * the pages further calls to this function succeed without doing work. -+ * -+ * Return: zero on success or negative number on failure. -+ */ -+int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, -+ struct kbase_va_region *reg); ++ if (!fixed_va_reg) { ++ err = -ENOMEM; ++ goto fail_free_exec_fixed_va; ++ } + -+/** -+ * kbase_sticky_resource_init - Initialize sticky resource management. -+ * @kctx: kbase context -+ * -+ * Return: zero on success or negative error number on failure. -+ */ -+int kbase_sticky_resource_init(struct kbase_context *kctx); ++ kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg, exec_va_reg, ++ exec_fixed_va_reg, fixed_va_reg); + -+/** -+ * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource. -+ * @kctx: kbase context. -+ * @gpu_addr: The GPU address of the external resource. -+ * -+ * Return: The metadata object which represents the binding between the -+ * external resource and the kbase context on success or NULL on failure. -+ */ -+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( -+ struct kbase_context *kctx, u64 gpu_addr); ++ INIT_LIST_HEAD(&kctx->csf.event_pages_head); ++#else ++ /* EXEC_VA zone's codepaths are slightly easier when its base_pfn is ++ * initially U64_MAX ++ */ ++ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, U64_MAX, 0u); ++ /* Other zones are 0: kbase_create_context() uses vzalloc */ + -+/** -+ * kbase_sticky_resource_release - Release a reference on a sticky resource. -+ * @kctx: kbase context. -+ * @meta: Binding metadata. -+ * @gpu_addr: GPU address of the external resource. -+ * -+ * If meta is NULL then gpu_addr will be used to scan the metadata list and -+ * find the matching metadata (if any), otherwise the provided meta will be -+ * used and gpu_addr will be ignored. -+ * -+ * Return: True if the release found the metadata and the reference was dropped. -+ */ -+bool kbase_sticky_resource_release(struct kbase_context *kctx, -+ struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr); ++ kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg); ++ kctx->gpu_va_end = same_va_base + same_va_pages + custom_va_size; ++#endif ++ kctx->jit_va = false; + -+/** -+ * kbase_sticky_resource_release_force - Release a sticky resource. -+ * @kctx: kbase context. -+ * @meta: Binding metadata. -+ * @gpu_addr: GPU address of the external resource. -+ * -+ * If meta is NULL then gpu_addr will be used to scan the metadata list and -+ * find the matching metadata (if any), otherwise the provided meta will be -+ * used and gpu_addr will be ignored. -+ * -+ * Return: True if the release found the metadata and the resource was -+ * released. -+ */ -+bool kbase_sticky_resource_release_force(struct kbase_context *kctx, -+ struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr); ++ kbase_gpu_vm_unlock(kctx); ++ return 0; + -+/** -+ * kbase_sticky_resource_term - Terminate sticky resource management. -+ * @kctx: kbase context -+ */ -+void kbase_sticky_resource_term(struct kbase_context *kctx); ++#if MALI_USE_CSF ++fail_free_exec_fixed_va: ++ kbase_free_alloced_region(exec_fixed_va_reg); ++fail_free_exec_va: ++ kbase_free_alloced_region(exec_va_reg); ++fail_free_custom_va: ++ if (custom_va_reg) ++ kbase_free_alloced_region(custom_va_reg); ++#endif + -+/** -+ * kbase_mem_pool_lock - Lock a memory pool -+ * @pool: Memory pool to lock -+ */ -+static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool) -+{ -+ spin_lock(&pool->pool_lock); ++fail_free_same_va: ++ kbase_free_alloced_region(same_va_reg); ++fail_unlock: ++ kbase_gpu_vm_unlock(kctx); ++ return err; +} + -+/** -+ * kbase_mem_pool_unlock - Release a memory pool -+ * @pool: Memory pool to lock -+ */ -+static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool) ++static bool kbase_has_exec_va_zone_locked(struct kbase_context *kctx) +{ -+ spin_unlock(&pool->pool_lock); -+} -+ -+/** -+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable. -+ * @alloc: The physical allocation -+ */ -+void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc); ++ struct kbase_reg_zone *exec_va_zone; + -+#if MALI_USE_CSF -+/** -+ * kbase_link_event_mem_page - Add the new event memory region to the per -+ * context list of event pages. -+ * @kctx: Pointer to kbase context -+ * @reg: Pointer to the region allocated for event memory. -+ * -+ * The region being linked shouldn't have been marked as free and should -+ * have KBASE_REG_CSF_EVENT flag set for it. -+ */ -+static inline void kbase_link_event_mem_page(struct kbase_context *kctx, -+ struct kbase_va_region *reg) -+{ + lockdep_assert_held(&kctx->reg_lock); ++ exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); + -+ WARN_ON(reg->flags & KBASE_REG_FREE); -+ WARN_ON(!(reg->flags & KBASE_REG_CSF_EVENT)); -+ -+ list_add(®->link, &kctx->csf.event_pages_head); ++ return (exec_va_zone->base_pfn != U64_MAX); +} + -+/** -+ * kbase_unlink_event_mem_page - Remove the event memory region from the per -+ * context list of event pages. -+ * @kctx: Pointer to kbase context -+ * @reg: Pointer to the region allocated for event memory. -+ * -+ * The region being un-linked shouldn't have been marked as free and should -+ * have KBASE_REG_CSF_EVENT flag set for it. -+ */ -+static inline void kbase_unlink_event_mem_page(struct kbase_context *kctx, -+ struct kbase_va_region *reg) ++bool kbase_has_exec_va_zone(struct kbase_context *kctx) +{ -+ lockdep_assert_held(&kctx->reg_lock); ++ bool has_exec_va_zone; + -+ WARN_ON(reg->flags & KBASE_REG_FREE); -+ WARN_ON(!(reg->flags & KBASE_REG_CSF_EVENT)); ++ kbase_gpu_vm_lock(kctx); ++ has_exec_va_zone = kbase_has_exec_va_zone_locked(kctx); ++ kbase_gpu_vm_unlock(kctx); + -+ list_del(®->link); ++ return has_exec_va_zone; +} + +/** -+ * kbase_mcu_shared_interface_region_tracker_init - Initialize the rb tree to -+ * manage the shared interface segment of MCU firmware address space. -+ * @kbdev: Pointer to the kbase device -+ * -+ * Return: zero on success or negative error number on failure. -+ */ -+int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev); -+ -+/** -+ * kbase_mcu_shared_interface_region_tracker_term - Teardown the rb tree -+ * managing the shared interface segment of MCU firmware address space. -+ * @kbdev: Pointer to the kbase device -+ */ -+void kbase_mcu_shared_interface_region_tracker_term(struct kbase_device *kbdev); -+#endif -+ -+/** -+ * kbase_mem_umm_map - Map dma-buf -+ * @kctx: Pointer to the kbase context -+ * @reg: Pointer to the region of the imported dma-buf to map -+ * -+ * Map a dma-buf on the GPU. The mappings are reference counted. -+ * -+ * Return: 0 on success, or a negative error code. -+ */ -+int kbase_mem_umm_map(struct kbase_context *kctx, -+ struct kbase_va_region *reg); -+ -+/** -+ * kbase_mem_umm_unmap - Unmap dma-buf -+ * @kctx: Pointer to the kbase context -+ * @reg: Pointer to the region of the imported dma-buf to unmap -+ * @alloc: Pointer to the alloc to release -+ * -+ * Unmap a dma-buf from the GPU. The mappings are reference counted. -+ * -+ * @reg must be the original region with GPU mapping of @alloc; or NULL. If -+ * @reg is NULL, or doesn't match @alloc, the GPU page table entries matching -+ * @reg will not be updated. -+ * -+ * @alloc must be a valid physical allocation of type -+ * KBASE_MEM_TYPE_IMPORTED_UMM that was previously mapped by -+ * kbase_mem_umm_map(). The dma-buf attachment referenced by @alloc will -+ * release it's mapping reference, and if the refcount reaches 0, also be -+ * unmapped, regardless of the value of @reg. -+ */ -+void kbase_mem_umm_unmap(struct kbase_context *kctx, -+ struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc); -+ -+/** -+ * kbase_mem_do_sync_imported - Sync caches for imported memory -+ * @kctx: Pointer to the kbase context -+ * @reg: Pointer to the region with imported memory to sync -+ * @sync_fn: The type of sync operation to perform ++ * kbase_region_tracker_has_allocs - Determine if any allocations have been made ++ * on a context's region tracker + * -+ * Sync CPU caches for supported (currently only dma-buf (UMM)) memory. -+ * Attempting to sync unsupported imported memory types will result in an error -+ * code, -EINVAL. ++ * @kctx: KBase context + * -+ * Return: 0 on success, or a negative error code. -+ */ -+int kbase_mem_do_sync_imported(struct kbase_context *kctx, -+ struct kbase_va_region *reg, enum kbase_sync_type sync_fn); -+ -+/** -+ * kbase_mem_copy_to_pinned_user_pages - Memcpy from source input page to -+ * an unaligned address at a given offset from the start of a target page. ++ * Check the context to determine if any allocations have been made yet from ++ * any of its zones. This check should be done before resizing a zone, e.g. to ++ * make space to add a second zone. + * -+ * @dest_pages: Pointer to the array of pages to which the content is -+ * to be copied from the provided @src_page. -+ * @src_page: Pointer to the page which correspond to the source page -+ * from which the copying will take place. -+ * @to_copy: Total number of bytes pending to be copied from -+ * @src_page to @target_page_nr within @dest_pages. -+ * This will get decremented by number of bytes we -+ * managed to copy from source page to target pages. -+ * @nr_pages: Total number of pages present in @dest_pages. -+ * @target_page_nr: Target page number to which @src_page needs to be -+ * copied. This will get incremented by one if -+ * we are successful in copying from source page. -+ * @offset: Offset in bytes into the target pages from which the -+ * copying is to be performed. ++ * Whilst a zone without allocations can be resized whilst other zones have ++ * allocations, we still check all of @kctx 's zones anyway: this is a stronger ++ * guarantee and should be adhered to when creating new zones anyway. + * -+ * Return: 0 on success, or a negative error code. -+ */ -+int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, -+ void *src_page, size_t *to_copy, unsigned int nr_pages, -+ unsigned int *target_page_nr, size_t offset); -+ -+/** -+ * kbase_reg_zone_end_pfn - return the end Page Frame Number of @zone -+ * @zone: zone to query ++ * Allocations from kbdev zones are not counted. + * -+ * Return: The end of the zone corresponding to @zone -+ */ -+static inline u64 kbase_reg_zone_end_pfn(struct kbase_reg_zone *zone) -+{ -+ return zone->base_pfn + zone->va_size_pages; -+} -+ -+/** -+ * kbase_ctx_reg_zone_init - initialize a zone in @kctx -+ * @kctx: Pointer to kbase context -+ * @zone_bits: A KBASE_REG_ZONE_<...> to initialize -+ * @base_pfn: Page Frame Number in GPU virtual address space for the start of -+ * the Zone -+ * @va_size_pages: Size of the Zone in pages ++ * Return: true if any allocs exist on any zone, false otherwise + */ -+static inline void kbase_ctx_reg_zone_init(struct kbase_context *kctx, -+ unsigned long zone_bits, -+ u64 base_pfn, u64 va_size_pages) ++static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx) +{ -+ struct kbase_reg_zone *zone; ++ unsigned int zone_idx; + + lockdep_assert_held(&kctx->reg_lock); -+ WARN_ON(!kbase_is_ctx_reg_zone(zone_bits)); + -+ zone = &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; -+ *zone = (struct kbase_reg_zone){ -+ .base_pfn = base_pfn, .va_size_pages = va_size_pages, -+ }; -+} ++ for (zone_idx = 0; zone_idx < KBASE_REG_ZONE_MAX; ++zone_idx) { ++ struct kbase_reg_zone *zone; ++ struct kbase_va_region *reg; ++ u64 zone_base_addr; ++ unsigned long zone_bits = KBASE_REG_ZONE(zone_idx); ++ unsigned long reg_zone; + -+/** -+ * kbase_ctx_reg_zone_get_nolock - get a zone from @kctx where the caller does -+ * not have @kctx 's region lock -+ * @kctx: Pointer to kbase context -+ * @zone_bits: A KBASE_REG_ZONE_<...> to retrieve -+ * -+ * This should only be used in performance-critical paths where the code is -+ * resilient to a race with the zone changing. -+ * -+ * Return: The zone corresponding to @zone_bits -+ */ -+static inline struct kbase_reg_zone * -+kbase_ctx_reg_zone_get_nolock(struct kbase_context *kctx, -+ unsigned long zone_bits) -+{ -+ WARN_ON(!kbase_is_ctx_reg_zone(zone_bits)); ++ if (!kbase_is_ctx_reg_zone(zone_bits)) ++ continue; ++ zone = kbase_ctx_reg_zone_get(kctx, zone_bits); ++ zone_base_addr = zone->base_pfn << PAGE_SHIFT; + -+ return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; -+} ++ reg = kbase_region_tracker_find_region_base_address( ++ kctx, zone_base_addr); + -+/** -+ * kbase_ctx_reg_zone_get - get a zone from @kctx -+ * @kctx: Pointer to kbase context -+ * @zone_bits: A KBASE_REG_ZONE_<...> to retrieve -+ * -+ * The get is not refcounted - there is no corresponding 'put' operation -+ * -+ * Return: The zone corresponding to @zone_bits -+ */ -+static inline struct kbase_reg_zone * -+kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits) -+{ -+ lockdep_assert_held(&kctx->reg_lock); -+ WARN_ON(!kbase_is_ctx_reg_zone(zone_bits)); ++ if (!zone->va_size_pages) { ++ WARN(reg, ++ "Should not have found a region that starts at 0x%.16llx for zone 0x%lx", ++ (unsigned long long)zone_base_addr, zone_bits); ++ continue; ++ } + -+ return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; -+} ++ if (WARN(!reg, ++ "There should always be a region that starts at 0x%.16llx for zone 0x%lx, couldn't find it", ++ (unsigned long long)zone_base_addr, zone_bits)) ++ return true; /* Safest return value */ + -+/** -+ * kbase_mem_allow_alloc - Check if allocation of GPU memory is allowed -+ * @kctx: Pointer to kbase context -+ * -+ * Don't allow the allocation of GPU memory if the ioctl has been issued -+ * from the forked child process using the mali device file fd inherited from -+ * the parent process. -+ * -+ * Return: true if allocation is allowed. -+ */ -+static inline bool kbase_mem_allow_alloc(struct kbase_context *kctx) -+{ -+ return (kctx->process_mm == current->mm); -+} ++ reg_zone = reg->flags & KBASE_REG_ZONE_MASK; ++ if (WARN(reg_zone != zone_bits, ++ "The region that starts at 0x%.16llx should be in zone 0x%lx but was found in the wrong zone 0x%lx", ++ (unsigned long long)zone_base_addr, zone_bits, ++ reg_zone)) ++ return true; /* Safest return value */ + -+/** -+ * kbase_mem_mmgrab - Wrapper function to take reference on mm_struct of current process -+ */ -+static inline void kbase_mem_mmgrab(void) -+{ -+ /* This merely takes a reference on the memory descriptor structure -+ * i.e. mm_struct of current process and not on its address space and -+ * so won't block the freeing of address space on process exit. -+ */ -+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE -+ atomic_inc(¤t->mm->mm_count); -+#else -+ mmgrab(current->mm); -+#endif -+} ++ /* Unless the region is completely free, of the same size as ++ * the original zone, then it has allocs ++ */ ++ if ((!(reg->flags & KBASE_REG_FREE)) || ++ (reg->nr_pages != zone->va_size_pages)) ++ return true; ++ } + -+/** -+ * kbase_mem_group_id_get - Get group ID from flags -+ * @flags: Flags to pass to base_mem_alloc -+ * -+ * This inline function extracts the encoded group ID from flags -+ * and converts it into numeric value (0~15). -+ * -+ * Return: group ID(0~15) extracted from the parameter -+ */ -+static inline int kbase_mem_group_id_get(base_mem_alloc_flags flags) -+{ -+ KBASE_DEBUG_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0); -+ return (int)BASE_MEM_GROUP_ID_GET(flags); ++ /* All zones are the same size as originally made, so there are no ++ * allocs ++ */ ++ return false; +} + -+/** -+ * kbase_mem_group_id_set - Set group ID into base_mem_alloc_flags -+ * @id: group ID(0~15) you want to encode -+ * -+ * This inline function encodes specific group ID into base_mem_alloc_flags. -+ * Parameter 'id' should lie in-between 0 to 15. -+ * -+ * Return: base_mem_alloc_flags with the group ID (id) encoded -+ * -+ * The return value can be combined with other flags against base_mem_alloc -+ * to identify a specific memory group. -+ */ -+static inline base_mem_alloc_flags kbase_mem_group_id_set(int id) ++static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, ++ u64 jit_va_pages) +{ -+ return BASE_MEM_GROUP_ID_SET(id); -+} -+#endif /* _KBASE_MEM_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c -new file mode 100644 -index 000000000..ad0c17b63 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c -@@ -0,0 +1,3818 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ struct kbase_va_region *same_va_reg; ++ struct kbase_reg_zone *same_va_zone; ++ u64 same_va_zone_base_addr; ++ const unsigned long same_va_zone_bits = KBASE_REG_ZONE_SAME_VA; ++ struct kbase_va_region *custom_va_reg; ++ u64 jit_va_start; + -+/** -+ * DOC: Base kernel memory APIs, Linux implementation. -+ */ ++ lockdep_assert_held(&kctx->reg_lock); + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ /* ++ * Modify the same VA free region after creation. The caller has ++ * ensured that allocations haven't been made, as any allocations could ++ * cause an overlap to happen with existing same VA allocations and the ++ * custom VA zone. ++ */ ++ same_va_zone = kbase_ctx_reg_zone_get(kctx, same_va_zone_bits); ++ same_va_zone_base_addr = same_va_zone->base_pfn << PAGE_SHIFT; + -+#if ((KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) || \ -+ (KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE)) -+/* Enable workaround for ion for kernels prior to v5.0.0 and from v5.3.0 -+ * onwards. -+ * -+ * For kernels prior to v4.12, workaround is needed as ion lacks the cache -+ * maintenance in begin_cpu_access and end_cpu_access methods. -+ * -+ * For kernels prior to v4.17.2, workaround is needed to avoid the potentially -+ * disruptive warnings which can come if begin_cpu_access and end_cpu_access -+ * methods are not called in pairs. -+ * Note that some long term maintenance kernel versions (e.g. 4.9.x, 4.14.x) -+ * only require this workaround on their earlier releases. However it is still -+ * safe to use it on such releases, and it simplifies the version check. -+ * -+ * For kernels later than v4.17.2, workaround is needed as ion can potentially -+ * end up calling dma_sync_sg_for_* for a dma-buf importer that hasn't mapped -+ * the attachment. This would result in a kernel panic as ion populates the -+ * dma_address when the attachment is mapped and kernel derives the physical -+ * address for cache maintenance from the dma_address. -+ * With some multi-threaded tests it has been seen that the same dma-buf memory -+ * gets imported twice on Mali DDK side and so the problem of sync happening -+ * with an importer having an unmapped attachment comes at the time of 2nd -+ * import. The same problem can if there is another importer of dma-buf -+ * memory. -+ * -+ * Workaround can be safely disabled for kernels between v5.0.0 and v5.2.2, -+ * as all the above stated issues are not there. -+ * -+ * dma_sync_sg_for_* calls will be made directly as a workaround using the -+ * Kbase's attachment to dma-buf that was previously mapped. -+ */ -+#define KBASE_MEM_ION_SYNC_WORKAROUND -+#endif ++ same_va_reg = kbase_region_tracker_find_region_base_address( ++ kctx, same_va_zone_base_addr); ++ if (WARN(!same_va_reg, ++ "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx", ++ (unsigned long long)same_va_zone_base_addr, same_va_zone_bits)) ++ return -ENOMEM; + -+#define IR_THRESHOLD_STEPS (256u) ++ /* kbase_region_tracker_has_allocs() in the caller has already ensured ++ * that all of the zones have no allocs, so no need to check that again ++ * on same_va_reg ++ */ ++ WARN_ON((!(same_va_reg->flags & KBASE_REG_FREE)) || ++ same_va_reg->nr_pages != same_va_zone->va_size_pages); + -+#if MALI_USE_CSF -+static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, struct vm_area_struct *vma); -+static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, struct vm_area_struct *vma); -+#endif ++ if (same_va_reg->nr_pages < jit_va_pages || ++ same_va_zone->va_size_pages < jit_va_pages) ++ return -ENOMEM; + -+static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg, -+ u64 offset_bytes, size_t size, struct kbase_vmap_struct *map, -+ kbase_vmap_flag vmap_flags); -+static void kbase_vunmap_phy_pages(struct kbase_context *kctx, -+ struct kbase_vmap_struct *map); ++ /* It's safe to adjust the same VA zone now */ ++ same_va_reg->nr_pages -= jit_va_pages; ++ same_va_zone->va_size_pages -= jit_va_pages; ++ jit_va_start = kbase_reg_zone_end_pfn(same_va_zone); + -+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); ++ /* ++ * Create a custom VA zone at the end of the VA for allocations which ++ * JIT can use so it doesn't have to allocate VA from the kernel. ++ */ ++ custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, jit_va_start, ++ jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA); + -+static bool is_process_exiting(struct vm_area_struct *vma) -+{ -+ /* PF_EXITING flag can't be reliably used here for the detection -+ * of process exit, as 'mm_users' counter could still be non-zero -+ * when all threads of the process have exited. Later when the -+ * thread (which took a reference on the 'mm' of process that -+ * exited) drops it reference, the vm_ops->close method would be -+ * called for all the vmas (owned by 'mm' of process that exited) -+ * but the PF_EXITING flag may not be neccessarily set for the -+ * thread at that time. ++ /* ++ * The context will be destroyed if we fail here so no point ++ * reverting the change we made to same_va. + */ -+ if (atomic_read(&vma->vm_mm->mm_users)) -+ return false; ++ if (!custom_va_reg) ++ return -ENOMEM; ++ /* Since this is 64-bit, the custom zone will not have been ++ * initialized, so initialize it now ++ */ ++ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, jit_va_start, ++ jit_va_pages); + -+ return true; ++ kbase_region_tracker_insert(custom_va_reg); ++ return 0; +} + -+/* Retrieve the associated region pointer if the GPU address corresponds to -+ * one of the event memory pages. The enclosing region, if found, shouldn't -+ * have been marked as free. -+ */ -+static struct kbase_va_region *kbase_find_event_mem_region( -+ struct kbase_context *kctx, u64 gpu_addr) ++int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, ++ int max_allocations, int trim_level, int group_id, ++ u64 phys_pages_limit) +{ -+#if MALI_USE_CSF -+ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; -+ struct kbase_va_region *reg; ++ int err = 0; + -+ lockdep_assert_held(&kctx->reg_lock); ++ if (trim_level < 0 || trim_level > BASE_JIT_MAX_TRIM_LEVEL) ++ return -EINVAL; + -+ list_for_each_entry(reg, &kctx->csf.event_pages_head, link) { -+ if ((reg->start_pfn <= gpu_pfn) && -+ (gpu_pfn < (reg->start_pfn + reg->nr_pages))) { -+ if (WARN_ON(reg->flags & KBASE_REG_FREE)) -+ return NULL; ++ if (group_id < 0 || group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) ++ return -EINVAL; + -+ if (WARN_ON(!(reg->flags & KBASE_REG_CSF_EVENT))) -+ return NULL; ++ if (phys_pages_limit > jit_va_pages) ++ return -EINVAL; + -+ return reg; -+ } -+ } -+#endif ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (phys_pages_limit != jit_va_pages) ++ kbase_ctx_flag_set(kctx, KCTX_JPL_ENABLED); ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+ return NULL; -+} ++ kbase_gpu_vm_lock(kctx); + -+/** -+ * kbase_phy_alloc_mapping_init - Initialize the kernel side permanent mapping -+ * of the physical allocation belonging to a -+ * region -+ * @kctx: The kernel base context @reg belongs to. -+ * @reg: The region whose physical allocation is to be mapped -+ * @vsize: The size of the requested region, in pages -+ * @size: The size in pages initially committed to the region -+ * -+ * Return: 0 on success, otherwise an error code indicating failure -+ * -+ * Maps the physical allocation backing a non-free @reg, so it may be -+ * accessed directly from the kernel. This is only supported for physical -+ * allocations of type KBASE_MEM_TYPE_NATIVE, and will fail for other types of -+ * physical allocation. -+ * -+ * The mapping is stored directly in the allocation that backs @reg. The -+ * refcount is not incremented at this point. Instead, use of the mapping should -+ * be surrounded by kbase_phy_alloc_mapping_get() and -+ * kbase_phy_alloc_mapping_put() to ensure it does not disappear whilst the -+ * client is accessing it. -+ * -+ * Both cached and uncached regions are allowed, but any sync operations are the -+ * responsibility of the client using the permanent mapping. -+ * -+ * A number of checks are made to ensure that a region that needs a permanent -+ * mapping can actually be supported: -+ * - The region must be created as fully backed -+ * - The region must not be growable -+ * -+ * This function will fail if those checks are not satisfied. -+ * -+ * On success, the region will also be forced into a certain kind: -+ * - It will no longer be growable -+ */ -+static int kbase_phy_alloc_mapping_init(struct kbase_context *kctx, -+ struct kbase_va_region *reg, size_t vsize, size_t size) -+{ -+ size_t size_bytes = (size << PAGE_SHIFT); -+ struct kbase_vmap_struct *kern_mapping; -+ int err = 0; ++ /* Verify that a JIT_VA zone has not been created already. */ ++ if (kctx->jit_va) { ++ err = -EINVAL; ++ goto exit_unlock; ++ } + -+ /* Can only map in regions that are always fully committed -+ * Don't setup the mapping twice -+ * Only support KBASE_MEM_TYPE_NATIVE allocations ++ /* If in 64-bit, we always lookup the SAME_VA zone. To ensure it has no ++ * allocs, we can ensure there are no allocs anywhere. ++ * ++ * This check is also useful in 32-bit, just to make sure init of the ++ * zone is always done before any allocs. + */ -+ if (vsize != size || reg->cpu_alloc->permanent_map != NULL || -+ reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) -+ return -EINVAL; -+ -+ kern_mapping = kzalloc(sizeof(*kern_mapping), GFP_KERNEL); -+ if (!kern_mapping) -+ return -ENOMEM; ++ if (kbase_region_tracker_has_allocs(kctx)) { ++ err = -ENOMEM; ++ goto exit_unlock; ++ } + -+ err = kbase_vmap_phy_pages(kctx, reg, 0u, size_bytes, kern_mapping, -+ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING); -+ if (err < 0) -+ goto vmap_fail; ++ if (!kbase_ctx_compat_mode(kctx)) ++ err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages); ++ /* ++ * Nothing to do for 32-bit clients, JIT uses the existing ++ * custom VA zone. ++ */ + -+ /* No support for growing or shrinking mapped regions */ -+ reg->flags &= ~KBASE_REG_GROWABLE; ++ if (!err) { ++ kctx->jit_max_allocations = max_allocations; ++ kctx->trim_level = trim_level; ++ kctx->jit_va = true; ++ kctx->jit_group_id = group_id; ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ kctx->jit_phys_pages_limit = phys_pages_limit; ++ dev_dbg(kctx->kbdev->dev, "phys_pages_limit set to %llu\n", ++ phys_pages_limit); ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ } + -+ reg->cpu_alloc->permanent_map = kern_mapping; ++exit_unlock: ++ kbase_gpu_vm_unlock(kctx); + -+ return 0; -+vmap_fail: -+ kfree(kern_mapping); + return err; +} + -+void kbase_phy_alloc_mapping_term(struct kbase_context *kctx, -+ struct kbase_mem_phy_alloc *alloc) -+{ -+ WARN_ON(!alloc->permanent_map); -+ kbase_vunmap_phy_pages(kctx, alloc->permanent_map); -+ kfree(alloc->permanent_map); -+ -+ alloc->permanent_map = NULL; -+} -+ -+void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, -+ u64 gpu_addr, -+ struct kbase_vmap_struct **out_kern_mapping) ++int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages) +{ -+ struct kbase_va_region *reg; -+ void *kern_mem_ptr = NULL; -+ struct kbase_vmap_struct *kern_mapping; -+ u64 mapping_offset; ++#if !MALI_USE_CSF ++ struct kbase_va_region *exec_va_reg; ++ struct kbase_reg_zone *exec_va_zone; ++ struct kbase_reg_zone *target_zone; ++ struct kbase_va_region *target_reg; ++ u64 target_zone_base_addr; ++ unsigned long target_zone_bits; ++ u64 exec_va_start; ++ int err; ++#endif + -+ WARN_ON(!kctx); -+ WARN_ON(!out_kern_mapping); ++ /* The EXEC_VA zone shall be created by making space either: ++ * - for 64-bit clients, at the end of the process's address space ++ * - for 32-bit clients, in the CUSTOM zone ++ * ++ * Firstly, verify that the number of EXEC_VA pages requested by the ++ * client is reasonable and then make sure that it is not greater than ++ * the address space itself before calculating the base address of the ++ * new zone. ++ */ ++ if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES) ++ return -EINVAL; + ++#if MALI_USE_CSF ++ /* For CSF GPUs we now setup the EXEC_VA zone during initialization, ++ * so this request is a null-op. ++ */ ++ return 0; ++#else + kbase_gpu_vm_lock(kctx); + -+ /* First do a quick lookup in the list of event memory regions */ -+ reg = kbase_find_event_mem_region(kctx, gpu_addr); -+ -+ if (!reg) { -+ reg = kbase_region_tracker_find_region_enclosing_address( -+ kctx, gpu_addr); ++ /* Verify that we've not already created a EXEC_VA zone, and that the ++ * EXEC_VA zone must come before JIT's CUSTOM_VA. ++ */ ++ if (kbase_has_exec_va_zone_locked(kctx) || kctx->jit_va) { ++ err = -EPERM; ++ goto exit_unlock; + } + -+ if (kbase_is_region_invalid_or_free(reg)) -+ goto out_unlock; -+ -+ kern_mapping = reg->cpu_alloc->permanent_map; -+ if (kern_mapping == NULL) -+ goto out_unlock; ++ if (exec_va_pages > kctx->gpu_va_end) { ++ err = -ENOMEM; ++ goto exit_unlock; ++ } + -+ mapping_offset = gpu_addr - (reg->start_pfn << PAGE_SHIFT); ++ /* Verify no allocations have already been made */ ++ if (kbase_region_tracker_has_allocs(kctx)) { ++ err = -ENOMEM; ++ goto exit_unlock; ++ } + -+ /* Refcount the allocations to prevent them disappearing */ -+ WARN_ON(reg->cpu_alloc != kern_mapping->cpu_alloc); -+ WARN_ON(reg->gpu_alloc != kern_mapping->gpu_alloc); -+ (void)kbase_mem_phy_alloc_get(kern_mapping->cpu_alloc); -+ (void)kbase_mem_phy_alloc_get(kern_mapping->gpu_alloc); ++ if (kbase_ctx_compat_mode(kctx)) { ++ /* 32-bit client: take from CUSTOM_VA zone */ ++ target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA; ++ } else { ++ /* 64-bit client: take from SAME_VA zone */ ++ target_zone_bits = KBASE_REG_ZONE_SAME_VA; ++ } + -+ kern_mem_ptr = (void *)(uintptr_t)((uintptr_t)kern_mapping->addr + mapping_offset); -+ *out_kern_mapping = kern_mapping; -+out_unlock: -+ kbase_gpu_vm_unlock(kctx); -+ return kern_mem_ptr; -+} ++ target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits); ++ target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT; + -+void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, -+ struct kbase_vmap_struct *kern_mapping) -+{ -+ WARN_ON(!kctx); -+ WARN_ON(!kern_mapping); ++ target_reg = kbase_region_tracker_find_region_base_address( ++ kctx, target_zone_base_addr); ++ if (WARN(!target_reg, ++ "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx", ++ (unsigned long long)target_zone_base_addr, target_zone_bits)) { ++ err = -ENOMEM; ++ goto exit_unlock; ++ } ++ /* kbase_region_tracker_has_allocs() above has already ensured that all ++ * of the zones have no allocs, so no need to check that again on ++ * target_reg ++ */ ++ WARN_ON((!(target_reg->flags & KBASE_REG_FREE)) || ++ target_reg->nr_pages != target_zone->va_size_pages); + -+ WARN_ON(kctx != kern_mapping->cpu_alloc->imported.native.kctx); -+ WARN_ON(kern_mapping != kern_mapping->cpu_alloc->permanent_map); ++ if (target_reg->nr_pages <= exec_va_pages || ++ target_zone->va_size_pages <= exec_va_pages) { ++ err = -ENOMEM; ++ goto exit_unlock; ++ } + -+ kbase_mem_phy_alloc_put(kern_mapping->cpu_alloc); -+ kbase_mem_phy_alloc_put(kern_mapping->gpu_alloc); ++ /* Taken from the end of the target zone */ ++ exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages; + -+ /* kern_mapping and the gpu/cpu phy allocs backing it must not be used -+ * from now on ++ exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_start, ++ exec_va_pages, KBASE_REG_ZONE_EXEC_VA); ++ if (!exec_va_reg) { ++ err = -ENOMEM; ++ goto exit_unlock; ++ } ++ /* Update EXEC_VA zone ++ * ++ * not using kbase_ctx_reg_zone_init() - it was already initialized + */ -+} ++ exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); ++ exec_va_zone->base_pfn = exec_va_start; ++ exec_va_zone->va_size_pages = exec_va_pages; + -+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, -+ u64 extension, u64 *flags, u64 *gpu_va, -+ enum kbase_caller_mmu_sync_info mmu_sync_info) -+{ -+ int zone; -+ struct kbase_va_region *reg; -+ struct rb_root *rbtree; -+ struct device *dev; ++ /* Update target zone and corresponding region */ ++ target_reg->nr_pages -= exec_va_pages; ++ target_zone->va_size_pages -= exec_va_pages; + -+ KBASE_DEBUG_ASSERT(kctx); -+ KBASE_DEBUG_ASSERT(flags); -+ KBASE_DEBUG_ASSERT(gpu_va); ++ kbase_region_tracker_insert(exec_va_reg); ++ err = 0; + -+ dev = kctx->kbdev->dev; -+ dev_dbg(dev, -+ "Allocating %lld va_pages, %lld commit_pages, %lld extension, 0x%llX flags\n", -+ va_pages, commit_pages, extension, *flags); ++exit_unlock: ++ kbase_gpu_vm_unlock(kctx); ++ return err; ++#endif /* MALI_USE_CSF */ ++} + +#if MALI_USE_CSF -+ if (!(*flags & BASE_MEM_FIXED)) -+ *gpu_va = 0; /* return 0 on failure */ -+#else -+ if (!(*flags & BASE_MEM_FLAG_MAP_FIXED)) -+ *gpu_va = 0; /* return 0 on failure */ -+#endif -+ else -+ dev_dbg(dev, -+ "Keeping requested GPU VA of 0x%llx\n", -+ (unsigned long long)*gpu_va); -+ -+ if (!kbase_check_alloc_flags(*flags)) { -+ dev_warn(dev, -+ "%s called with bad flags (%llx)", -+ __func__, -+ (unsigned long long)*flags); -+ goto bad_flags; -+ } ++void kbase_mcu_shared_interface_region_tracker_term(struct kbase_device *kbdev) ++{ ++ kbase_region_tracker_term_rbtree(&kbdev->csf.shared_reg_rbtree); ++} + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+ if (unlikely(kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE))) { -+ /* Mask coherency flags if infinite cache is enabled to prevent -+ * the skipping of syncs from BASE side. -+ */ -+ *flags &= ~(BASE_MEM_COHERENT_SYSTEM_REQUIRED | -+ BASE_MEM_COHERENT_SYSTEM); -+ } -+#endif ++int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev) ++{ ++ struct kbase_va_region *shared_reg; ++ u64 shared_reg_start_pfn; ++ u64 shared_reg_size; + -+ if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 && -+ (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) { -+ /* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */ -+ *flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED; -+ } -+ if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && -+ !kbase_device_is_cpu_coherent(kctx->kbdev)) { -+ dev_warn(dev, "%s call required coherent mem when unavailable", -+ __func__); -+ goto bad_flags; -+ } -+ if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && -+ !kbase_device_is_cpu_coherent(kctx->kbdev)) { -+ /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ -+ *flags &= ~BASE_MEM_COHERENT_SYSTEM; -+ } ++ shared_reg_start_pfn = KBASE_REG_ZONE_MCU_SHARED_BASE; ++ shared_reg_size = KBASE_REG_ZONE_MCU_SHARED_SIZE; + -+ if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages, -+ extension)) -+ goto bad_sizes; ++ kbdev->csf.shared_reg_rbtree = RB_ROOT; + -+#ifdef CONFIG_MALI_MEMORY_FULLY_BACKED -+ /* Ensure that memory is fully physically-backed. */ -+ if (*flags & BASE_MEM_GROW_ON_GPF) -+ commit_pages = va_pages; -+#endif ++ shared_reg = ++ kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, shared_reg_start_pfn, ++ shared_reg_size, KBASE_REG_ZONE_MCU_SHARED); ++ if (!shared_reg) ++ return -ENOMEM; + -+ /* find out which VA zone to use */ -+ if (*flags & BASE_MEM_SAME_VA) { -+ rbtree = &kctx->reg_rbtree_same; -+ zone = KBASE_REG_ZONE_SAME_VA; -+ } -+#if MALI_USE_CSF -+ /* fixed va_zone always exists */ -+ else if (*flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE)) { -+ if (*flags & BASE_MEM_PROT_GPU_EX) { -+ rbtree = &kctx->reg_rbtree_exec_fixed; -+ zone = KBASE_REG_ZONE_EXEC_FIXED_VA; -+ } else { -+ rbtree = &kctx->reg_rbtree_fixed; -+ zone = KBASE_REG_ZONE_FIXED_VA; -+ } -+ } ++ kbase_region_tracker_insert(shared_reg); ++ return 0; ++} +#endif -+ else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) { -+ rbtree = &kctx->reg_rbtree_exec; -+ zone = KBASE_REG_ZONE_EXEC_VA; -+ } else { -+ rbtree = &kctx->reg_rbtree_custom; -+ zone = KBASE_REG_ZONE_CUSTOM_VA; -+ } -+ -+ reg = kbase_alloc_free_region(kctx->kbdev, rbtree, PFN_DOWN(*gpu_va), va_pages, zone); -+ -+ if (!reg) { -+ dev_err(dev, "Failed to allocate free region"); -+ goto no_region; -+ } + -+ if (kbase_update_region_flags(kctx, reg, *flags) != 0) -+ goto invalid_flags; -+ -+ if (kbase_reg_prepare_native(reg, kctx, -+ kbase_mem_group_id_get(*flags)) != 0) { -+ dev_err(dev, "Failed to prepare region"); -+ goto prepare_failed; ++static void kbasep_mem_page_size_init(struct kbase_device *kbdev) ++{ ++#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) ++#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) ++ kbdev->pagesize_2mb = true; ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC) != 1) { ++ dev_warn( ++ kbdev->dev, ++ "2MB page is enabled by force while current GPU-HW doesn't meet the requirement to do so.\n"); + } ++#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */ ++ kbdev->pagesize_2mb = false; ++#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */ ++#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */ ++ /* Set it to the default based on which GPU is present */ ++ kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC); ++#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */ ++} + -+ if (unlikely(reg->cpu_alloc != reg->gpu_alloc)) -+ *flags |= BASE_MEM_KERNEL_SYNC; ++int kbase_mem_init(struct kbase_device *kbdev) ++{ ++ int err = 0; ++ struct kbasep_mem_device *memdev; ++ char va_region_slab_name[VA_REGION_SLAB_NAME_SIZE]; ++#if IS_ENABLED(CONFIG_OF) ++ struct device_node *mgm_node = NULL; ++#endif + -+ /* make sure base knows if the memory is actually cached or not */ -+ if (reg->flags & KBASE_REG_CPU_CACHED) -+ *flags |= BASE_MEM_CACHED_CPU; -+ else -+ *flags &= ~BASE_MEM_CACHED_CPU; ++ KBASE_DEBUG_ASSERT(kbdev); + -+ if (*flags & BASE_MEM_GROW_ON_GPF) { -+ unsigned int const ir_threshold = atomic_read( -+ &kctx->kbdev->memdev.ir_threshold); ++ memdev = &kbdev->memdev; + -+ reg->threshold_pages = ((va_pages * ir_threshold) + -+ (IR_THRESHOLD_STEPS / 2)) / IR_THRESHOLD_STEPS; -+ } else -+ reg->threshold_pages = 0; ++ kbasep_mem_page_size_init(kbdev); + -+ if (*flags & BASE_MEM_GROW_ON_GPF) { -+ /* kbase_check_alloc_sizes() already checks extension is valid for -+ * assigning to reg->extension -+ */ -+ reg->extension = extension; -+#if !MALI_USE_CSF -+ } else if (*flags & BASE_MEM_TILER_ALIGN_TOP) { -+ reg->extension = extension; -+#endif /* !MALI_USE_CSF */ -+ } else { -+ reg->extension = 0; -+ } ++ scnprintf(va_region_slab_name, VA_REGION_SLAB_NAME_SIZE, VA_REGION_SLAB_NAME_PREFIX "%s", ++ kbdev->devname); + -+ if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) { -+ dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)", -+ (unsigned long long)commit_pages, -+ (unsigned long long)va_pages); -+ goto no_mem; ++ /* Initialize slab cache for kbase_va_regions */ ++ kbdev->va_region_slab = ++ kmem_cache_create(va_region_slab_name, sizeof(struct kbase_va_region), 0, 0, NULL); ++ if (kbdev->va_region_slab == NULL) { ++ dev_err(kbdev->dev, "Failed to create va_region_slab\n"); ++ return -ENOMEM; + } -+ reg->initial_commit = commit_pages; + -+ kbase_gpu_vm_lock(kctx); ++ kbase_mem_migrate_init(kbdev); ++ kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults, ++ KBASE_MEM_POOL_MAX_SIZE_KCTX); + -+ if (reg->flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) { -+ /* Permanent kernel mappings must happen as soon as -+ * reg->cpu_alloc->pages is ready. Currently this happens after -+ * kbase_alloc_phy_pages(). If we move that to setup pages -+ * earlier, also move this call too -+ */ -+ int err = kbase_phy_alloc_mapping_init(kctx, reg, va_pages, -+ commit_pages); -+ if (err < 0) { -+ kbase_gpu_vm_unlock(kctx); -+ goto no_kern_mapping; -+ } -+ } ++ /* Initialize memory usage */ ++ atomic_set(&memdev->used_pages, 0); + -+ /* mmap needed to setup VA? */ -+ if (*flags & BASE_MEM_SAME_VA) { -+ unsigned long cookie, cookie_nr; ++ spin_lock_init(&kbdev->gpu_mem_usage_lock); ++ kbdev->total_gpu_pages = 0; ++ kbdev->process_root = RB_ROOT; ++ kbdev->dma_buf_root = RB_ROOT; ++ mutex_init(&kbdev->dma_buf_lock); + -+ /* Bind to a cookie */ -+ if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) { -+ dev_err(dev, "No cookies available for allocation!"); -+ kbase_gpu_vm_unlock(kctx); -+ goto no_cookie; -+ } -+ /* return a cookie */ -+ cookie_nr = find_first_bit(kctx->cookies, BITS_PER_LONG); -+ bitmap_clear(kctx->cookies, cookie_nr, 1); -+ BUG_ON(kctx->pending_regions[cookie_nr]); -+ kctx->pending_regions[cookie_nr] = reg; ++#ifdef IR_THRESHOLD ++ atomic_set(&memdev->ir_threshold, IR_THRESHOLD); ++#else ++ atomic_set(&memdev->ir_threshold, DEFAULT_IR_THRESHOLD); ++#endif + -+ /* relocate to correct base */ -+ cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE); -+ cookie <<= PAGE_SHIFT; ++ kbdev->mgm_dev = &kbase_native_mgm_dev; + -+ *gpu_va = (u64) cookie; -+ } else /* we control the VA */ { -+ size_t align = 1; ++#if IS_ENABLED(CONFIG_OF) ++ /* Check to see whether or not a platform-specific memory group manager ++ * is configured and available. ++ */ ++ mgm_node = of_parse_phandle(kbdev->dev->of_node, ++ "physical-memory-group-manager", 0); ++ if (!mgm_node) { ++ dev_info(kbdev->dev, ++ "No memory group manager is configured\n"); ++ } else { ++ struct platform_device *const pdev = ++ of_find_device_by_node(mgm_node); + -+ if (kctx->kbdev->pagesize_2mb) { -+ /* If there's enough (> 33 bits) of GPU VA space, align to 2MB -+ * boundaries. The similar condition is used for mapping from -+ * the SAME_VA zone inside kbase_context_get_unmapped_area(). -+ */ -+ if (kctx->kbdev->gpu_props.mmu.va_bits > 33) { -+ if (va_pages >= (SZ_2M / SZ_4K)) -+ align = (SZ_2M / SZ_4K); ++ if (!pdev) { ++ dev_err(kbdev->dev, ++ "The configured memory group manager was not found\n"); ++ } else { ++ kbdev->mgm_dev = platform_get_drvdata(pdev); ++ if (!kbdev->mgm_dev) { ++ dev_info(kbdev->dev, ++ "Memory group manager is not ready\n"); ++ err = -EPROBE_DEFER; ++ } else if (!try_module_get(kbdev->mgm_dev->owner)) { ++ dev_err(kbdev->dev, ++ "Failed to get memory group manger module\n"); ++ err = -ENODEV; ++ kbdev->mgm_dev = NULL; ++ } else { ++ dev_info(kbdev->dev, ++ "Memory group manager successfully loaded\n"); + } -+ if (*gpu_va) -+ align = 1; -+#if !MALI_USE_CSF -+ if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) -+ align = 1; -+#endif /* !MALI_USE_CSF */ -+ } -+ if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, align, -+ mmu_sync_info) != 0) { -+ dev_warn(dev, "Failed to map memory on GPU"); -+ kbase_gpu_vm_unlock(kctx); -+ goto no_mmap; + } -+ /* return real GPU VA */ -+ *gpu_va = reg->start_pfn << PAGE_SHIFT; -+ } -+ -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ if (*flags & BASEP_MEM_PERFORM_JIT_TRIM) { -+ kbase_jit_done_phys_increase(kctx, commit_pages); -+ -+ mutex_lock(&kctx->jit_evict_lock); -+ WARN_ON(!list_empty(®->jit_node)); -+ list_add(®->jit_node, &kctx->jit_active_head); -+ mutex_unlock(&kctx->jit_evict_lock); ++ of_node_put(mgm_node); + } -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -+ -+ kbase_gpu_vm_unlock(kctx); -+ -+#if MALI_USE_CSF -+ if (*flags & BASE_MEM_FIXABLE) -+ atomic64_inc(&kctx->num_fixable_allocs); -+ else if (*flags & BASE_MEM_FIXED) -+ atomic64_inc(&kctx->num_fixed_allocs); +#endif + -+ return reg; ++ if (likely(!err)) { ++ struct kbase_mem_pool_group_config mem_pool_defaults; + -+no_mmap: -+no_cookie: -+no_kern_mapping: -+no_mem: -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ if (*flags & BASEP_MEM_PERFORM_JIT_TRIM) { -+ kbase_gpu_vm_lock(kctx); -+ kbase_jit_done_phys_increase(kctx, commit_pages); -+ kbase_gpu_vm_unlock(kctx); ++ kbase_mem_pool_group_config_set_max_size(&mem_pool_defaults, ++ KBASE_MEM_POOL_MAX_SIZE_KBDEV); ++ ++ err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev, &mem_pool_defaults, NULL); + } -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -+ kbase_mem_phy_alloc_put(reg->cpu_alloc); -+ kbase_mem_phy_alloc_put(reg->gpu_alloc); -+invalid_flags: -+prepare_failed: -+ kfree(reg); -+no_region: -+bad_sizes: -+bad_flags: -+ return NULL; ++ ++ return err; +} -+KBASE_EXPORT_TEST_API(kbase_mem_alloc); + -+int kbase_mem_query(struct kbase_context *kctx, -+ u64 gpu_addr, u64 query, u64 * const out) ++void kbase_mem_halt(struct kbase_device *kbdev) +{ -+ struct kbase_va_region *reg; -+ int ret = -EINVAL; ++ CSTD_UNUSED(kbdev); ++} + -+ KBASE_DEBUG_ASSERT(kctx); -+ KBASE_DEBUG_ASSERT(out); ++void kbase_mem_term(struct kbase_device *kbdev) ++{ ++ struct kbasep_mem_device *memdev; ++ int pages; + -+ if (gpu_addr & ~PAGE_MASK) { -+ dev_warn(kctx->kbdev->dev, "mem_query: gpu_addr: passed parameter is invalid"); -+ return -EINVAL; -+ } ++ KBASE_DEBUG_ASSERT(kbdev); + -+ kbase_gpu_vm_lock(kctx); ++ memdev = &kbdev->memdev; + -+ /* Validate the region */ -+ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); -+ if (kbase_is_region_invalid_or_free(reg)) -+ goto out_unlock; ++ pages = atomic_read(&memdev->used_pages); ++ if (pages != 0) ++ dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); + -+ switch (query) { -+ case KBASE_MEM_QUERY_COMMIT_SIZE: -+ if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) { -+ *out = kbase_reg_current_backed_size(reg); -+ } else { -+ size_t i; -+ struct kbase_aliased *aliased; -+ *out = 0; -+ aliased = reg->cpu_alloc->imported.alias.aliased; -+ for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++) -+ *out += aliased[i].length; -+ } -+ break; -+ case KBASE_MEM_QUERY_VA_SIZE: -+ *out = reg->nr_pages; -+ break; -+ case KBASE_MEM_QUERY_FLAGS: -+ { -+ *out = 0; -+ if (KBASE_REG_CPU_WR & reg->flags) -+ *out |= BASE_MEM_PROT_CPU_WR; -+ if (KBASE_REG_CPU_RD & reg->flags) -+ *out |= BASE_MEM_PROT_CPU_RD; -+ if (KBASE_REG_CPU_CACHED & reg->flags) -+ *out |= BASE_MEM_CACHED_CPU; -+ if (KBASE_REG_GPU_WR & reg->flags) -+ *out |= BASE_MEM_PROT_GPU_WR; -+ if (KBASE_REG_GPU_RD & reg->flags) -+ *out |= BASE_MEM_PROT_GPU_RD; -+ if (!(KBASE_REG_GPU_NX & reg->flags)) -+ *out |= BASE_MEM_PROT_GPU_EX; -+ if (KBASE_REG_SHARE_BOTH & reg->flags) -+ *out |= BASE_MEM_COHERENT_SYSTEM; -+ if (KBASE_REG_SHARE_IN & reg->flags) -+ *out |= BASE_MEM_COHERENT_LOCAL; -+ if (mali_kbase_supports_mem_grow_on_gpf(kctx->api_version)) { -+ /* Prior to this version, this was known about by -+ * user-side but we did not return them. Returning -+ * it caused certain clients that were not expecting -+ * it to fail, so we omit it as a special-case for -+ * compatibility reasons -+ */ -+ if (KBASE_REG_PF_GROW & reg->flags) -+ *out |= BASE_MEM_GROW_ON_GPF; -+ } -+ if (mali_kbase_supports_mem_protected(kctx->api_version)) { -+ /* Prior to this version, this was known about by -+ * user-side but we did not return them. Returning -+ * it caused certain clients that were not expecting -+ * it to fail, so we omit it as a special-case for -+ * compatibility reasons -+ */ -+ if (KBASE_REG_PROTECTED & reg->flags) -+ *out |= BASE_MEM_PROTECTED; -+ } -+#if !MALI_USE_CSF -+ if (KBASE_REG_TILER_ALIGN_TOP & reg->flags) -+ *out |= BASE_MEM_TILER_ALIGN_TOP; -+#endif /* !MALI_USE_CSF */ -+ if (!(KBASE_REG_GPU_CACHED & reg->flags)) -+ *out |= BASE_MEM_UNCACHED_GPU; -+#if MALI_USE_CSF -+ if (KBASE_REG_CSF_EVENT & reg->flags) -+ *out |= BASE_MEM_CSF_EVENT; -+ if (((KBASE_REG_ZONE_MASK & reg->flags) == KBASE_REG_ZONE_FIXED_VA) || -+ ((KBASE_REG_ZONE_MASK & reg->flags) == KBASE_REG_ZONE_EXEC_FIXED_VA)) { -+ if (KBASE_REG_FIXED_ADDRESS & reg->flags) -+ *out |= BASE_MEM_FIXED; -+ else -+ *out |= BASE_MEM_FIXABLE; -+ } -+#endif -+ if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags) -+ *out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE; ++ kbase_mem_pool_group_term(&kbdev->mem_pools); + -+ *out |= kbase_mem_group_id_set(reg->cpu_alloc->group_id); ++ kbase_mem_migrate_term(kbdev); + -+ WARN(*out & ~BASE_MEM_FLAGS_QUERYABLE, -+ "BASE_MEM_FLAGS_QUERYABLE needs updating\n"); -+ *out &= BASE_MEM_FLAGS_QUERYABLE; -+ break; -+ } -+ default: -+ *out = 0; -+ goto out_unlock; -+ } ++ kmem_cache_destroy(kbdev->va_region_slab); ++ kbdev->va_region_slab = NULL; + -+ ret = 0; ++ WARN_ON(kbdev->total_gpu_pages); ++ WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root)); ++ WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root)); ++ mutex_destroy(&kbdev->dma_buf_lock); + -+out_unlock: -+ kbase_gpu_vm_unlock(kctx); -+ return ret; ++ if (kbdev->mgm_dev) ++ module_put(kbdev->mgm_dev->owner); +} ++KBASE_EXPORT_TEST_API(kbase_mem_term); + +/** -+ * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the -+ * Ephemeral memory eviction list. -+ * @s: Shrinker -+ * @sc: Shrinker control ++ * kbase_alloc_free_region - Allocate a free region object. + * -+ * Return: Number of pages which can be freed or SHRINK_EMPTY if no page remains. ++ * @kbdev: kbase device ++ * @rbtree: Backlink to the red-black tree of memory regions. ++ * @start_pfn: The Page Frame Number in GPU virtual address space. ++ * @nr_pages: The size of the region in pages. ++ * @zone: KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA ++ * ++ * The allocated object is not part of any list yet, and is flagged as ++ * KBASE_REG_FREE. No mapping is allocated yet. ++ * ++ * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA. ++ * ++ * Return: pointer to the allocated region object on success, NULL otherwise. + */ -+static -+unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, -+ struct shrink_control *sc) ++struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree, ++ u64 start_pfn, size_t nr_pages, int zone) +{ -+ struct kbase_context *kctx = container_of(s, struct kbase_context, reclaim); -+ int evict_nents = atomic_read(&kctx->evict_nents); -+ unsigned long nr_freeable_items; ++ struct kbase_va_region *new_reg; + -+ WARN(in_atomic(), -+ "Shrinker called in atomic context. The caller must use GFP_ATOMIC or similar, then Shrinkers must not be called. gfp_mask==%x\n", -+ sc->gfp_mask); ++ KBASE_DEBUG_ASSERT(rbtree != NULL); + -+ if (unlikely(evict_nents < 0)) { -+ dev_err(kctx->kbdev->dev, "invalid evict_nents(%d)", evict_nents); -+ nr_freeable_items = 0; -+ } else { -+ nr_freeable_items = evict_nents; -+ } ++ /* zone argument should only contain zone related region flags */ ++ KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0); ++ KBASE_DEBUG_ASSERT(nr_pages > 0); ++ /* 64-bit address range is the max */ ++ KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE)); + -+#if KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE -+ if (nr_freeable_items == 0) -+ nr_freeable_items = SHRINK_EMPTY; -+#endif ++ new_reg = kmem_cache_zalloc(kbdev->va_region_slab, GFP_KERNEL); + -+ return nr_freeable_items; ++ if (!new_reg) ++ return NULL; ++ ++ kbase_refcount_set(&new_reg->va_refcnt, 1); ++ atomic_set(&new_reg->no_user_free_count, 0); ++ new_reg->cpu_alloc = NULL; /* no alloc bound yet */ ++ new_reg->gpu_alloc = NULL; /* no alloc bound yet */ ++ new_reg->rbtree = rbtree; ++ new_reg->flags = zone | KBASE_REG_FREE; ++ ++ new_reg->flags |= KBASE_REG_GROWABLE; ++ ++ new_reg->start_pfn = start_pfn; ++ new_reg->nr_pages = nr_pages; ++ ++ INIT_LIST_HEAD(&new_reg->jit_node); ++ INIT_LIST_HEAD(&new_reg->link); ++ ++ return new_reg; +} + ++KBASE_EXPORT_TEST_API(kbase_alloc_free_region); ++ +/** -+ * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction -+ * list for pages and try to reclaim them. -+ * @s: Shrinker -+ * @sc: Shrinker control ++ * kbase_free_alloced_region - Free a region object. + * -+ * Return: Number of pages freed (can be less then requested) or -+ * SHRINK_STOP if reclaim isn't possible. ++ * @reg: Region + * -+ * Note: -+ * This function accesses region structures without taking the region lock, -+ * this is required as the OOM killer can call the shrinker after the region -+ * lock has already been held. -+ * This is safe as we can guarantee that a region on the eviction list will -+ * not be freed (kbase_mem_free_region removes the allocation from the list -+ * before destroying it), or modified by other parts of the driver. -+ * The eviction list itself is guarded by the eviction lock and the MMU updates -+ * are protected by their own lock. ++ * The described region must be freed of any mapping. ++ * ++ * If the region is not flagged as KBASE_REG_FREE, the region's ++ * alloc object will be released. ++ * It is a bug if no alloc object exists for non-free regions. ++ * ++ * If region is KBASE_REG_ZONE_MCU_SHARED it is freed + */ -+static -+unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, -+ struct shrink_control *sc) ++void kbase_free_alloced_region(struct kbase_va_region *reg) +{ -+ struct kbase_context *kctx; -+ struct kbase_mem_phy_alloc *alloc; -+ struct kbase_mem_phy_alloc *tmp; -+ unsigned long freed = 0; -+ -+ kctx = container_of(s, struct kbase_context, reclaim); ++#if MALI_USE_CSF ++ if ((reg->flags & KBASE_REG_ZONE_MASK) == ++ KBASE_REG_ZONE_MCU_SHARED) { ++ kfree(reg); ++ return; ++ } ++#endif ++ if (!(reg->flags & KBASE_REG_FREE)) { ++ struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); + -+ mutex_lock(&kctx->jit_evict_lock); ++ if (WARN_ON(!kctx)) ++ return; + -+ list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) { -+ int err; ++ if (WARN_ON(kbase_is_region_invalid(reg))) ++ return; + -+ if (!alloc->reg) -+ continue; ++ dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n", ++ (void *)reg); ++#if MALI_USE_CSF ++ if (reg->flags & KBASE_REG_CSF_EVENT) ++ /* ++ * This should not be reachable if called from 'mcu_shared' functions ++ * such as: ++ * kbase_csf_firmware_mcu_shared_mapping_init ++ * kbase_csf_firmware_mcu_shared_mapping_term ++ */ + -+ err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg, -+ 0, alloc->nents); ++ kbase_unlink_event_mem_page(kctx, reg); ++#endif + -+ /* Failed to remove GPU mapping, proceed to next one. */ -+ if (err != 0) -+ continue; ++ mutex_lock(&kctx->jit_evict_lock); + + /* -+ * Update alloc->evicted before freeing the backing so the -+ * helper can determine that it needs to bypass the accounting -+ * and memory pool. ++ * The physical allocation should have been removed from the ++ * eviction list before this function is called. However, in the ++ * case of abnormal process termination or the app leaking the ++ * memory kbase_mem_free_region is not called so it can still be ++ * on the list at termination time of the region tracker. + */ -+ alloc->evicted = alloc->nents; ++ if (!list_empty(®->gpu_alloc->evict_node)) { ++ /* ++ * Unlink the physical allocation before unmaking it ++ * evictable so that the allocation isn't grown back to ++ * its last backed size as we're going to unmap it ++ * anyway. ++ */ ++ reg->cpu_alloc->reg = NULL; ++ if (reg->cpu_alloc != reg->gpu_alloc) ++ reg->gpu_alloc->reg = NULL; + -+ kbase_free_phy_pages_helper(alloc, alloc->evicted); -+ freed += alloc->evicted; -+ WARN_ON(atomic_sub_return(alloc->evicted, &kctx->evict_nents) < 0); -+ list_del_init(&alloc->evict_node); ++ mutex_unlock(&kctx->jit_evict_lock); ++ ++ /* ++ * If a region has been made evictable then we must ++ * unmake it before trying to free it. ++ * If the memory hasn't been reclaimed it will be ++ * unmapped and freed below, if it has been reclaimed ++ * then the operations below are no-ops. ++ */ ++ if (reg->flags & KBASE_REG_DONT_NEED) { ++ KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == ++ KBASE_MEM_TYPE_NATIVE); ++ kbase_mem_evictable_unmake(reg->gpu_alloc); ++ } ++ } else { ++ mutex_unlock(&kctx->jit_evict_lock); ++ } + + /* -+ * Inform the JIT allocator this region has lost backing -+ * as it might need to free the allocation. ++ * Remove the region from the sticky resource metadata ++ * list should it be there. + */ -+ kbase_jit_backing_lost(alloc->reg); -+ -+ /* Enough pages have been freed so stop now */ -+ if (freed > sc->nr_to_scan) -+ break; -+ } -+ -+ mutex_unlock(&kctx->jit_evict_lock); -+ -+ return freed; -+} -+ -+int kbase_mem_evictable_init(struct kbase_context *kctx) -+{ -+ INIT_LIST_HEAD(&kctx->evict_list); -+ mutex_init(&kctx->jit_evict_lock); ++ kbase_sticky_resource_release_force(kctx, NULL, ++ reg->start_pfn << PAGE_SHIFT); + -+ atomic_set(&kctx->evict_nents, 0); ++ kbase_mem_phy_alloc_put(reg->cpu_alloc); ++ kbase_mem_phy_alloc_put(reg->gpu_alloc); + -+ kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects; -+ kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects; -+ kctx->reclaim.seeks = DEFAULT_SEEKS; -+ /* Kernel versions prior to 3.1 : -+ * struct shrinker does not define batch -+ */ -+ kctx->reclaim.batch = 0; -+#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE -+ register_shrinker(&kctx->reclaim); -+#else -+ register_shrinker(&kctx->reclaim, "mali-mem"); -+#endif -+ return 0; ++ reg->flags |= KBASE_REG_VA_FREED; ++ kbase_va_region_alloc_put(kctx, reg); ++ } else { ++ kfree(reg); ++ } +} + -+void kbase_mem_evictable_deinit(struct kbase_context *kctx) -+{ -+ unregister_shrinker(&kctx->reclaim); -+} ++KBASE_EXPORT_TEST_API(kbase_free_alloced_region); + -+/** -+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable. -+ * @alloc: The physical allocation -+ */ -+void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) ++int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, ++ u64 addr, size_t nr_pages, size_t align, ++ enum kbase_caller_mmu_sync_info mmu_sync_info) +{ -+ struct kbase_context *kctx = alloc->imported.native.kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ int __maybe_unused new_page_count; ++ int err; ++ size_t i = 0; ++ unsigned long attr; ++ unsigned long mask = ~KBASE_REG_MEMATTR_MASK; ++ unsigned long gwt_mask = ~0; ++ int group_id; ++ struct kbase_mem_phy_alloc *alloc; + -+ kbase_process_page_usage_dec(kctx, alloc->nents); -+ new_page_count = atomic_sub_return(alloc->nents, -+ &kctx->used_pages); -+ atomic_sub(alloc->nents, &kctx->kbdev->memdev.used_pages); ++#ifdef CONFIG_MALI_CINSTR_GWT ++ if (kctx->gwt_enabled) ++ gwt_mask = ~KBASE_REG_GPU_WR; ++#endif + -+ KBASE_TLSTREAM_AUX_PAGESALLOC( -+ kbdev, -+ kctx->id, -+ (u64)new_page_count); -+ kbase_trace_gpu_mem_usage_dec(kbdev, kctx, alloc->nents); -+} ++ if ((kctx->kbdev->system_coherency == COHERENCY_ACE) && ++ (reg->flags & KBASE_REG_SHARE_BOTH)) ++ attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA); ++ else ++ attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC); + -+/** -+ * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable. -+ * @alloc: The physical allocation -+ */ -+static -+void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) -+{ -+ struct kbase_context *kctx = alloc->imported.native.kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ int __maybe_unused new_page_count; ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(reg != NULL); + -+ new_page_count = atomic_add_return(alloc->nents, -+ &kctx->used_pages); -+ atomic_add(alloc->nents, &kctx->kbdev->memdev.used_pages); ++ err = kbase_add_va_region(kctx, reg, addr, nr_pages, align); ++ if (err) ++ return err; + -+ /* Increase mm counters so that the allocation is accounted for -+ * against the process and thus is visible to the OOM killer, -+ */ -+ kbase_process_page_usage_inc(kctx, alloc->nents); ++ alloc = reg->gpu_alloc; ++ group_id = alloc->group_id; + -+ KBASE_TLSTREAM_AUX_PAGESALLOC( -+ kbdev, -+ kctx->id, -+ (u64)new_page_count); -+ kbase_trace_gpu_mem_usage_inc(kbdev, kctx, alloc->nents); -+} ++ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { ++ u64 const stride = alloc->imported.alias.stride; + -+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) -+{ -+ struct kbase_context *kctx = gpu_alloc->imported.native.kctx; ++ KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased); ++ for (i = 0; i < alloc->imported.alias.nents; i++) { ++ if (alloc->imported.alias.aliased[i].alloc) { ++ err = kbase_mmu_insert_aliased_pages( ++ kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride), ++ alloc->imported.alias.aliased[i].alloc->pages + ++ alloc->imported.alias.aliased[i].offset, ++ alloc->imported.alias.aliased[i].length, ++ reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, ++ NULL); ++ if (err) ++ goto bad_aliased_insert; + -+ lockdep_assert_held(&kctx->reg_lock); ++ /* Note: mapping count is tracked at alias ++ * creation time ++ */ ++ } else { ++ err = kbase_mmu_insert_single_aliased_page( ++ kctx, reg->start_pfn + i * stride, kctx->aliasing_sink_page, ++ alloc->imported.alias.aliased[i].length, ++ (reg->flags & mask & gwt_mask) | attr, group_id, ++ mmu_sync_info); + -+ /* Memory is in the process of transitioning to the shrinker, and -+ * should ignore migration attempts -+ */ -+ kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg, -+ 0, gpu_alloc->nents); ++ if (err) ++ goto bad_aliased_insert; ++ } ++ } ++ } else { ++ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM || ++ reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { + -+ mutex_lock(&kctx->jit_evict_lock); -+ /* This allocation can't already be on a list. */ -+ WARN_ON(!list_empty(&gpu_alloc->evict_node)); ++ err = kbase_mmu_insert_imported_pages( ++ kctx->kbdev, &kctx->mmu, reg->start_pfn, ++ kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), ++ reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, reg); ++ } else { ++ err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, ++ kbase_get_gpu_phy_pages(reg), ++ kbase_reg_current_backed_size(reg), ++ reg->flags & gwt_mask, kctx->as_nr, group_id, ++ mmu_sync_info, reg, true); ++ } + -+ /* Add the allocation to the eviction list, after this point the shrink -+ * can reclaim it. -+ */ -+ list_add(&gpu_alloc->evict_node, &kctx->evict_list); -+ atomic_add(gpu_alloc->nents, &kctx->evict_nents); ++ if (err) ++ goto bad_insert; ++ kbase_mem_phy_alloc_gpu_mapped(alloc); ++ } + -+ /* Indicate to page migration that the memory can be reclaimed by the shrinker. -+ */ -+ if (kbase_page_migration_enabled) -+ kbase_set_phy_alloc_page_status(gpu_alloc, NOT_MOVABLE); -+ -+ mutex_unlock(&kctx->jit_evict_lock); -+ kbase_mem_evictable_mark_reclaim(gpu_alloc); -+ -+ gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED; -+ return 0; -+} -+ -+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) -+{ -+ struct kbase_context *kctx = gpu_alloc->imported.native.kctx; -+ int err = 0; -+ -+ /* Calls to this function are inherently asynchronous, with respect to -+ * MMU operations. -+ */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; -+ -+ lockdep_assert_held(&kctx->reg_lock); -+ -+ mutex_lock(&kctx->jit_evict_lock); -+ /* -+ * First remove the allocation from the eviction list as it's no -+ * longer eligible for eviction. -+ */ -+ WARN_ON(atomic_sub_return(gpu_alloc->nents, &kctx->evict_nents) < 0); -+ list_del_init(&gpu_alloc->evict_node); -+ mutex_unlock(&kctx->jit_evict_lock); -+ -+ if (gpu_alloc->evicted == 0) { -+ /* -+ * The backing is still present, update the VM stats as it's -+ * in use again. ++ if (reg->flags & KBASE_REG_IMPORT_PAD && ++ !WARN_ON(reg->nr_pages < reg->gpu_alloc->nents) && ++ reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM && ++ reg->gpu_alloc->imported.umm.current_mapping_usage_count) { ++ /* For padded imported dma-buf or user-buf memory, map the dummy ++ * aliasing page from the end of the imported pages, to the end of ++ * the region using a read only mapping. ++ * ++ * Only map when it's imported dma-buf memory that is currently ++ * mapped. ++ * ++ * Assume reg->gpu_alloc->nents is the number of actual pages ++ * in the dma-buf memory. + */ -+ kbase_mem_evictable_unmark_reclaim(gpu_alloc); -+ } else { -+ /* If the region is still alive ... */ -+ if (gpu_alloc->reg) { -+ /* ... allocate replacement backing ... */ -+ err = kbase_alloc_phy_pages_helper(gpu_alloc, -+ gpu_alloc->evicted); ++ err = kbase_mmu_insert_single_imported_page( ++ kctx, reg->start_pfn + reg->gpu_alloc->nents, kctx->aliasing_sink_page, ++ reg->nr_pages - reg->gpu_alloc->nents, ++ (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK, ++ mmu_sync_info); ++ if (err) ++ goto bad_insert; ++ } + -+ /* -+ * ... and grow the mapping back to its -+ * pre-eviction size. -+ */ -+ if (!err) -+ err = kbase_mem_grow_gpu_mapping( -+ kctx, gpu_alloc->reg, -+ gpu_alloc->evicted, 0, mmu_sync_info); ++ return err; + -+ gpu_alloc->evicted = 0; ++bad_aliased_insert: ++ while (i-- > 0) { ++ struct tagged_addr *phys_alloc = NULL; ++ u64 const stride = alloc->imported.alias.stride; + -+ /* Since the allocation is no longer evictable, and we ensure that -+ * it grows back to its pre-eviction size, we will consider the -+ * state of it to be ALLOCATED_MAPPED, as that is the only state -+ * in which a physical allocation could transition to NOT_MOVABLE -+ * from. -+ */ -+ if (kbase_page_migration_enabled) -+ kbase_set_phy_alloc_page_status(gpu_alloc, ALLOCATED_MAPPED); -+ } -+ } ++ if (alloc->imported.alias.aliased[i].alloc != NULL) ++ phys_alloc = alloc->imported.alias.aliased[i].alloc->pages + ++ alloc->imported.alias.aliased[i].offset; + -+ /* If the region is still alive remove the DONT_NEED attribute. */ -+ if (gpu_alloc->reg) -+ gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED; ++ kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride), ++ phys_alloc, alloc->imported.alias.aliased[i].length, ++ alloc->imported.alias.aliased[i].length, kctx->as_nr, ++ false); ++ } ++bad_insert: ++ kbase_remove_va_region(kctx->kbdev, reg); + -+ return (err == 0); ++ return err; +} + -+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask) ++KBASE_EXPORT_TEST_API(kbase_gpu_mmap); ++ ++static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, ++ struct kbase_va_region *reg, bool writeable); ++ ++int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) +{ -+ struct kbase_va_region *reg; -+ int ret = -EINVAL; -+ unsigned int real_flags = 0; -+ unsigned int new_flags = 0; -+ bool prev_needed, new_needed; ++ int err = 0; ++ struct kbase_mem_phy_alloc *alloc; + -+ KBASE_DEBUG_ASSERT(kctx); ++ if (reg->start_pfn == 0) ++ return 0; + -+ if (!gpu_addr) ++ if (!reg->gpu_alloc) + return -EINVAL; + -+ if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) -+ return -EINVAL; ++ alloc = reg->gpu_alloc; + -+ /* nuke other bits */ -+ flags &= mask; ++ /* Tear down GPU page tables, depending on memory type. */ ++ switch (alloc->type) { ++ case KBASE_MEM_TYPE_ALIAS: { ++ size_t i = 0; ++ /* Due to the way the number of valid PTEs and ATEs are tracked ++ * currently, only the GPU virtual range that is backed & mapped ++ * should be passed to the kbase_mmu_teardown_pages() function, ++ * hence individual aliased regions needs to be unmapped ++ * separately. ++ */ ++ for (i = 0; i < alloc->imported.alias.nents; i++) { ++ struct tagged_addr *phys_alloc = NULL; ++ int err_loop; + -+ /* check for only supported flags */ -+ if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE)) -+ goto out; ++ if (alloc->imported.alias.aliased[i].alloc != NULL) ++ phys_alloc = alloc->imported.alias.aliased[i].alloc->pages + ++ alloc->imported.alias.aliased[i].offset; + -+ /* mask covers bits we don't support? */ -+ if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE)) -+ goto out; ++ err_loop = kbase_mmu_teardown_pages( ++ kctx->kbdev, &kctx->mmu, ++ reg->start_pfn + (i * alloc->imported.alias.stride), ++ phys_alloc, alloc->imported.alias.aliased[i].length, ++ alloc->imported.alias.aliased[i].length, kctx->as_nr, ++ false); + -+ /* convert flags */ -+ if (BASE_MEM_COHERENT_SYSTEM & flags) -+ real_flags |= KBASE_REG_SHARE_BOTH; -+ else if (BASE_MEM_COHERENT_LOCAL & flags) -+ real_flags |= KBASE_REG_SHARE_IN; ++ if (WARN_ON_ONCE(err_loop)) ++ err = err_loop; ++ } ++ } ++ break; ++ case KBASE_MEM_TYPE_IMPORTED_UMM: { ++ size_t nr_phys_pages = reg->nr_pages; ++ size_t nr_virt_pages = reg->nr_pages; ++ /* If the region has import padding and falls under the threshold for ++ * issuing a partial GPU cache flush, we want to reduce the number of ++ * physical pages that get flushed. + -+ /* now we can lock down the context, and find the region */ -+ down_write(kbase_mem_get_process_mmap_lock()); -+ kbase_gpu_vm_lock(kctx); ++ * This is symmetric with case of mapping the memory, which first maps ++ * each imported physical page to a separate virtual page, and then ++ * maps the single aliasing sink page to each of the virtual padding ++ * pages. ++ */ ++ if (reg->flags & KBASE_REG_IMPORT_PAD) ++ nr_phys_pages = alloc->nents + 1; + -+ /* Validate the region */ -+ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); -+ if (kbase_is_region_invalid_or_free(reg)) -+ goto out_unlock; ++ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, ++ alloc->pages, nr_phys_pages, nr_virt_pages, ++ kctx->as_nr, true); ++ } ++ break; ++ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { ++ size_t nr_reg_pages = kbase_reg_current_backed_size(reg); + -+ /* There is no use case to support MEM_FLAGS_CHANGE ioctl for allocations -+ * that have NO_USER_FREE flag set, to mark them as evictable/reclaimable. -+ * This would usually include JIT allocations, Tiler heap related allocations -+ * & GPU queue ringbuffer and none of them needs to be explicitly marked -+ * as evictable by Userspace. -+ */ -+ if (kbase_va_region_is_no_user_free(reg)) -+ goto out_unlock; ++ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, ++ alloc->pages, nr_reg_pages, nr_reg_pages, ++ kctx->as_nr, true); ++ } ++ break; ++ default: { ++ size_t nr_reg_pages = kbase_reg_current_backed_size(reg); + -+ /* Is the region being transitioning between not needed and needed? */ -+ prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED; -+ new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED; -+ if (prev_needed != new_needed) { -+ /* Aliased allocations can't be shrunk as the code doesn't -+ * support looking up: -+ * - all physical pages assigned to different GPU VAs -+ * - CPU mappings for the physical pages at different vm_pgoff -+ * (==GPU VA) locations. ++ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, ++ alloc->pages, nr_reg_pages, nr_reg_pages, ++ kctx->as_nr, false); ++ } ++ break; ++ } ++ ++ /* Update tracking, and other cleanup, depending on memory type. */ ++ switch (alloc->type) { ++ case KBASE_MEM_TYPE_ALIAS: ++ /* We mark the source allocs as unmapped from the GPU when ++ * putting reg's allocs + */ -+ if (atomic_read(®->cpu_alloc->gpu_mappings) > 1) -+ goto out_unlock; ++ break; ++ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { ++ struct kbase_alloc_import_user_buf *user_buf = &alloc->imported.user_buf; + -+ if (atomic_read(®->cpu_alloc->kernel_mappings) > 0) -+ goto out_unlock; ++ if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) { ++ user_buf->current_mapping_usage_count &= ~PINNED_ON_IMPORT; + -+ if (new_needed) { -+ /* Only native allocations can be marked not needed */ -+ if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { -+ ret = -EINVAL; -+ goto out_unlock; ++ /* The allocation could still have active mappings. */ ++ if (user_buf->current_mapping_usage_count == 0) { ++ kbase_jd_user_buf_unmap(kctx, alloc, reg, ++ (reg->flags & ++ (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR))); + } -+ ret = kbase_mem_evictable_make(reg->gpu_alloc); -+ if (ret) -+ goto out_unlock; -+ } else { -+ kbase_mem_evictable_unmake(reg->gpu_alloc); + } + } -+ -+ /* limit to imported memory */ -+ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM) -+ goto out_unlock; -+ -+ /* shareability flags are ignored for GPU uncached memory */ -+ if (!(reg->flags & KBASE_REG_GPU_CACHED)) { -+ ret = 0; -+ goto out_unlock; ++ fallthrough; ++ default: ++ kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc); ++ break; + } + -+ /* no change? */ -+ if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) { -+ ret = 0; -+ goto out_unlock; -+ } ++ return err; ++} + -+ new_flags = reg->flags & ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH); -+ new_flags |= real_flags; ++static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping( ++ struct kbase_context *kctx, ++ unsigned long uaddr, size_t size, u64 *offset) ++{ ++ struct vm_area_struct *vma; ++ struct kbase_cpu_mapping *map; ++ unsigned long vm_pgoff_in_region; ++ unsigned long vm_off_in_region; ++ unsigned long map_start; ++ size_t map_size; + -+ /* Currently supporting only imported memory */ -+ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM) { -+ ret = -EINVAL; -+ goto out_unlock; -+ } ++ lockdep_assert_held(kbase_mem_get_process_mmap_lock()); + -+ if (IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) { -+ /* Future use will use the new flags, existing mapping -+ * will NOT be updated as memory should not be in use -+ * by the GPU when updating the flags. -+ */ -+ WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count); -+ ret = 0; -+ } else if (reg->gpu_alloc->imported.umm.current_mapping_usage_count) { -+ /* -+ * When CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is not enabled the -+ * dma-buf GPU mapping should always be present, check that -+ * this is the case and warn and skip the page table update if -+ * not. -+ * -+ * Then update dma-buf GPU mapping with the new flags. -+ * -+ * Note: The buffer must not be in use on the GPU when -+ * changing flags. If the buffer is in active use on -+ * the GPU, there is a risk that the GPU may trigger a -+ * shareability fault, as it will see the same -+ * addresses from buffer with different shareability -+ * properties. -+ */ -+ dev_dbg(kctx->kbdev->dev, -+ "Updating page tables on mem flag change\n"); -+ ret = kbase_mmu_update_pages(kctx, reg->start_pfn, -+ kbase_get_gpu_phy_pages(reg), -+ kbase_reg_current_backed_size(reg), -+ new_flags, -+ reg->gpu_alloc->group_id); -+ if (ret) -+ dev_warn(kctx->kbdev->dev, -+ "Failed to update GPU page tables on flag change: %d\n", -+ ret); -+ } else -+ WARN_ON(!reg->gpu_alloc->imported.umm.current_mapping_usage_count); ++ if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */ ++ return NULL; + -+ /* If everything is good, then set the new flags on the region. */ -+ if (!ret) -+ reg->flags = new_flags; ++ vma = find_vma_intersection(current->mm, uaddr, uaddr+size); + -+out_unlock: -+ kbase_gpu_vm_unlock(kctx); -+ up_write(kbase_mem_get_process_mmap_lock()); -+out: -+ return ret; -+} ++ if (!vma || vma->vm_start > uaddr) ++ return NULL; ++ if (vma->vm_ops != &kbase_vm_ops) ++ /* Not ours! */ ++ return NULL; + -+#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS) ++ map = vma->vm_private_data; + -+int kbase_mem_do_sync_imported(struct kbase_context *kctx, -+ struct kbase_va_region *reg, enum kbase_sync_type sync_fn) -+{ -+ int ret = -EINVAL; -+ struct dma_buf __maybe_unused *dma_buf; -+ enum dma_data_direction dir = DMA_BIDIRECTIONAL; ++ if (map->kctx != kctx) ++ /* Not from this context! */ ++ return NULL; + -+ lockdep_assert_held(&kctx->reg_lock); ++ vm_pgoff_in_region = vma->vm_pgoff - map->region->start_pfn; ++ vm_off_in_region = vm_pgoff_in_region << PAGE_SHIFT; ++ map_start = vma->vm_start - vm_off_in_region; ++ map_size = map->region->nr_pages << PAGE_SHIFT; + -+ /* We assume that the same physical allocation object is used for both -+ * GPU and CPU for imported buffers. -+ */ -+ WARN_ON(reg->cpu_alloc != reg->gpu_alloc); ++ if ((uaddr + size) > (map_start + map_size)) ++ /* Not within the CPU mapping */ ++ return NULL; + -+ /* Currently only handle dma-bufs */ -+ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM) -+ return ret; -+ /* -+ * Attempting to sync with CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND -+ * enabled can expose us to a Linux Kernel issue between v4.6 and -+ * v4.19. We will not attempt to support cache syncs on dma-bufs that -+ * are mapped on demand (i.e. not on import), even on pre-4.6, neither -+ * on 4.20 or newer kernels, because this makes it difficult for -+ * userspace to know when they can rely on the cache sync. -+ * Instead, only support syncing when we always map dma-bufs on import, -+ * or if the particular buffer is mapped right now. -+ */ -+ if (IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND) && -+ !reg->gpu_alloc->imported.umm.current_mapping_usage_count) -+ return ret; ++ *offset = (uaddr - vma->vm_start) + vm_off_in_region; + -+ dma_buf = reg->gpu_alloc->imported.umm.dma_buf; ++ return map; ++} + -+ switch (sync_fn) { -+ case KBASE_SYNC_TO_DEVICE: -+ dev_dbg(kctx->kbdev->dev, -+ "Syncing imported buffer at GPU VA %llx to GPU\n", -+ reg->start_pfn); -+#ifdef KBASE_MEM_ION_SYNC_WORKAROUND -+ if (!WARN_ON(!reg->gpu_alloc->imported.umm.dma_attachment)) { -+ struct dma_buf_attachment *attachment = reg->gpu_alloc->imported.umm.dma_attachment; -+ struct sg_table *sgt = reg->gpu_alloc->imported.umm.sgt; ++int kbasep_find_enclosing_cpu_mapping_offset( ++ struct kbase_context *kctx, ++ unsigned long uaddr, size_t size, u64 *offset) ++{ ++ struct kbase_cpu_mapping *map; + -+ dma_sync_sg_for_device(attachment->dev, sgt->sgl, -+ sgt->nents, dir); -+ ret = 0; -+ } -+#else -+ ret = dma_buf_end_cpu_access(dma_buf, dir); -+#endif /* KBASE_MEM_ION_SYNC_WORKAROUND */ -+ break; -+ case KBASE_SYNC_TO_CPU: -+ dev_dbg(kctx->kbdev->dev, -+ "Syncing imported buffer at GPU VA %llx to CPU\n", -+ reg->start_pfn); -+#ifdef KBASE_MEM_ION_SYNC_WORKAROUND -+ if (!WARN_ON(!reg->gpu_alloc->imported.umm.dma_attachment)) { -+ struct dma_buf_attachment *attachment = reg->gpu_alloc->imported.umm.dma_attachment; -+ struct sg_table *sgt = reg->gpu_alloc->imported.umm.sgt; ++ kbase_os_mem_map_lock(kctx); + -+ dma_sync_sg_for_cpu(attachment->dev, sgt->sgl, -+ sgt->nents, dir); -+ ret = 0; -+ } -+#else -+ ret = dma_buf_begin_cpu_access(dma_buf, dir); -+#endif /* KBASE_MEM_ION_SYNC_WORKAROUND */ -+ break; -+ } ++ map = kbasep_find_enclosing_cpu_mapping(kctx, uaddr, size, offset); + -+ if (unlikely(ret)) -+ dev_warn(kctx->kbdev->dev, -+ "Failed to sync mem region %pK at GPU VA %llx: %d\n", -+ reg, reg->start_pfn, ret); ++ kbase_os_mem_map_unlock(kctx); + -+ return ret; ++ if (!map) ++ return -EINVAL; ++ ++ return 0; +} + -+/** -+ * kbase_mem_umm_unmap_attachment - Unmap dma-buf attachment -+ * @kctx: Pointer to kbase context -+ * @alloc: Pointer to allocation with imported dma-buf memory to unmap -+ * -+ * This will unmap a dma-buf. Must be called after the GPU page tables for the -+ * region have been torn down. -+ */ -+static void kbase_mem_umm_unmap_attachment(struct kbase_context *kctx, -+ struct kbase_mem_phy_alloc *alloc) -+{ -+ struct tagged_addr *pa = alloc->pages; ++KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset); + -+ dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, -+ alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); -+ alloc->imported.umm.sgt = NULL; ++int kbasep_find_enclosing_gpu_mapping_start_and_offset(struct kbase_context *kctx, ++ u64 gpu_addr, size_t size, u64 *start, u64 *offset) ++{ ++ struct kbase_va_region *region; + -+ kbase_remove_dma_buf_usage(kctx, alloc); ++ kbase_gpu_vm_lock(kctx); + -+ memset(pa, 0xff, sizeof(*pa) * alloc->nents); -+ alloc->nents = 0; -+} ++ region = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); + -+/* to replace sg_dma_len. */ -+#define MALI_SG_DMA_LEN(sg) ((sg)->length) ++ if (!region) { ++ kbase_gpu_vm_unlock(kctx); ++ return -EINVAL; ++ } + -+/** -+ * kbase_mem_umm_map_attachment - Prepare attached dma-buf for GPU mapping -+ * @kctx: Pointer to kbase context -+ * @reg: Pointer to region with imported dma-buf memory to map -+ * -+ * Map the dma-buf and prepare the page array with the tagged Mali physical -+ * addresses for GPU mapping. -+ * -+ * Return: 0 on success, or negative error code -+ */ -+static int kbase_mem_umm_map_attachment(struct kbase_context *kctx, -+ struct kbase_va_region *reg) -+{ -+ struct sg_table *sgt; -+ struct scatterlist *s; -+ int i; -+ struct tagged_addr *pa; -+ int err; -+ size_t count = 0; -+ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; ++ *start = region->start_pfn << PAGE_SHIFT; + -+ WARN_ON_ONCE(alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM); -+ WARN_ON_ONCE(alloc->imported.umm.sgt); ++ *offset = gpu_addr - *start; + -+ sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, -+ DMA_BIDIRECTIONAL); -+ if (IS_ERR_OR_NULL(sgt)) ++ if (((region->start_pfn + region->nr_pages) << PAGE_SHIFT) < (gpu_addr + size)) { ++ kbase_gpu_vm_unlock(kctx); + return -EINVAL; ++ } + -+ /* save for later */ -+ alloc->imported.umm.sgt = sgt; ++ kbase_gpu_vm_unlock(kctx); + -+ pa = kbase_get_gpu_phy_pages(reg); ++ return 0; ++} + -+ for_each_sg(sgt->sgl, s, sgt->nents, i) { -+ size_t j, pages = PFN_UP(MALI_SG_DMA_LEN(s)); ++KBASE_EXPORT_TEST_API(kbasep_find_enclosing_gpu_mapping_start_and_offset); + -+ WARN_ONCE(MALI_SG_DMA_LEN(s) & (PAGE_SIZE-1), -+ "MALI_SG_DMA_LEN(s)=%u is not a multiple of PAGE_SIZE\n", -+ MALI_SG_DMA_LEN(s)); ++void kbase_sync_single(struct kbase_context *kctx, ++ struct tagged_addr t_cpu_pa, struct tagged_addr t_gpu_pa, ++ off_t offset, size_t size, enum kbase_sync_type sync_fn) ++{ ++ struct page *cpu_page; ++ phys_addr_t cpu_pa = as_phys_addr_t(t_cpu_pa); ++ phys_addr_t gpu_pa = as_phys_addr_t(t_gpu_pa); + -+ WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1), -+ "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n", -+ (unsigned long long) sg_dma_address(s)); ++ cpu_page = pfn_to_page(PFN_DOWN(cpu_pa)); + -+ for (j = 0; (j < pages) && (count < reg->nr_pages); j++, count++) -+ *pa++ = as_tagged(sg_dma_address(s) + -+ (j << PAGE_SHIFT)); -+ WARN_ONCE(j < pages, -+ "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n", -+ alloc->imported.umm.dma_buf->size); -+ } ++ if (likely(cpu_pa == gpu_pa)) { ++ dma_addr_t dma_addr; + -+ if (!(reg->flags & KBASE_REG_IMPORT_PAD) && -+ WARN_ONCE(count < reg->nr_pages, -+ "sg list from dma_buf_map_attachment < dma_buf->size=%zu\n", -+ alloc->imported.umm.dma_buf->size)) { -+ err = -EINVAL; -+ goto err_unmap_attachment; -+ } ++ BUG_ON(!cpu_page); ++ BUG_ON(offset + size > PAGE_SIZE); + -+ /* Update nents as we now have pages to map */ -+ alloc->nents = count; -+ kbase_add_dma_buf_usage(kctx, alloc); ++ dma_addr = kbase_dma_addr_from_tagged(t_cpu_pa) + offset; + -+ return 0; ++ if (sync_fn == KBASE_SYNC_TO_CPU) ++ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, ++ size, DMA_BIDIRECTIONAL); ++ else if (sync_fn == KBASE_SYNC_TO_DEVICE) ++ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, ++ size, DMA_BIDIRECTIONAL); ++ } else { ++ void *src = NULL; ++ void *dst = NULL; ++ struct page *gpu_page; ++ dma_addr_t dma_addr; + -+err_unmap_attachment: -+ kbase_mem_umm_unmap_attachment(kctx, alloc); ++ if (WARN(!gpu_pa, "No GPU PA found for infinite cache op")) ++ return; + -+ return err; ++ gpu_page = pfn_to_page(PFN_DOWN(gpu_pa)); ++ dma_addr = kbase_dma_addr_from_tagged(t_gpu_pa) + offset; ++ ++ if (sync_fn == KBASE_SYNC_TO_DEVICE) { ++ src = ((unsigned char *)kmap(cpu_page)) + offset; ++ dst = ((unsigned char *)kmap(gpu_page)) + offset; ++ } else if (sync_fn == KBASE_SYNC_TO_CPU) { ++ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, size, ++ DMA_BIDIRECTIONAL); ++ src = ((unsigned char *)kmap(gpu_page)) + offset; ++ dst = ((unsigned char *)kmap(cpu_page)) + offset; ++ } ++ ++ memcpy(dst, src, size); ++ kunmap(gpu_page); ++ kunmap(cpu_page); ++ if (sync_fn == KBASE_SYNC_TO_DEVICE) ++ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, size, ++ DMA_BIDIRECTIONAL); ++ } +} + -+int kbase_mem_umm_map(struct kbase_context *kctx, -+ struct kbase_va_region *reg) ++static int kbase_do_syncset(struct kbase_context *kctx, ++ struct basep_syncset *sset, enum kbase_sync_type sync_fn) +{ -+ int err; -+ struct kbase_mem_phy_alloc *alloc; -+ unsigned long gwt_mask = ~0; ++ int err = 0; ++ struct kbase_va_region *reg; ++ struct kbase_cpu_mapping *map; ++ unsigned long start; ++ size_t size; ++ struct tagged_addr *cpu_pa; ++ struct tagged_addr *gpu_pa; ++ u64 page_off, page_count; ++ u64 i; ++ u64 offset; + -+ /* Calls to this function are inherently asynchronous, with respect to -+ * MMU operations. ++ kbase_os_mem_map_lock(kctx); ++ kbase_gpu_vm_lock(kctx); ++ ++ /* find the region where the virtual address is contained */ ++ reg = kbase_region_tracker_find_region_enclosing_address(kctx, ++ sset->mem_handle.basep.handle); ++ if (kbase_is_region_invalid_or_free(reg)) { ++ dev_warn(kctx->kbdev->dev, "Can't find a valid region at VA 0x%016llX", ++ sset->mem_handle.basep.handle); ++ err = -EINVAL; ++ goto out_unlock; ++ } ++ ++ /* ++ * Handle imported memory before checking for KBASE_REG_CPU_CACHED. The ++ * CPU mapping cacheability is defined by the owner of the imported ++ * memory, and not by kbase, therefore we must assume that any imported ++ * memory may be cached. + */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; ++ if (kbase_mem_is_imported(reg->gpu_alloc->type)) { ++ err = kbase_mem_do_sync_imported(kctx, reg, sync_fn); ++ goto out_unlock; ++ } + -+ lockdep_assert_held(&kctx->reg_lock); ++ if (!(reg->flags & KBASE_REG_CPU_CACHED)) ++ goto out_unlock; + -+ alloc = reg->gpu_alloc; ++ start = (uintptr_t)sset->user_addr; ++ size = (size_t)sset->size; + -+ alloc->imported.umm.current_mapping_usage_count++; -+ if (alloc->imported.umm.current_mapping_usage_count != 1) { -+ if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT) || -+ alloc->imported.umm.need_sync) { -+ if (!kbase_is_region_invalid_or_free(reg)) { -+ err = kbase_mem_do_sync_imported(kctx, reg, -+ KBASE_SYNC_TO_DEVICE); -+ WARN_ON_ONCE(err); -+ } -+ } -+ return 0; ++ map = kbasep_find_enclosing_cpu_mapping(kctx, start, size, &offset); ++ if (!map) { ++ dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX", ++ start, sset->mem_handle.basep.handle); ++ err = -EINVAL; ++ goto out_unlock; + } + -+ err = kbase_mem_umm_map_attachment(kctx, reg); -+ if (err) -+ goto bad_map_attachment; ++ page_off = offset >> PAGE_SHIFT; ++ offset &= ~PAGE_MASK; ++ page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT; ++ cpu_pa = kbase_get_cpu_phy_pages(reg); ++ gpu_pa = kbase_get_gpu_phy_pages(reg); + -+#ifdef CONFIG_MALI_CINSTR_GWT -+ if (kctx->gwt_enabled) -+ gwt_mask = ~KBASE_REG_GPU_WR; -+#endif ++ if (page_off > reg->nr_pages || ++ page_off + page_count > reg->nr_pages) { ++ /* Sync overflows the region */ ++ err = -EINVAL; ++ goto out_unlock; ++ } + -+ err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, -+ kbase_get_gpu_phy_pages(reg), -+ kbase_reg_current_backed_size(reg), -+ reg->flags & gwt_mask, kctx->as_nr, alloc->group_id, -+ mmu_sync_info, NULL); -+ if (err) -+ goto bad_insert; ++ /* Sync first page */ ++ if (as_phys_addr_t(cpu_pa[page_off])) { ++ size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); + -+ if (reg->flags & KBASE_REG_IMPORT_PAD && -+ !WARN_ON(reg->nr_pages < alloc->nents)) { -+ /* For padded imported dma-buf memory, map the dummy aliasing -+ * page from the end of the dma-buf pages, to the end of the -+ * region using a read only mapping. -+ * -+ * Assume alloc->nents is the number of actual pages in the -+ * dma-buf memory. -+ */ -+ err = kbase_mmu_insert_single_imported_page( -+ kctx, reg->start_pfn + alloc->nents, kctx->aliasing_sink_page, -+ reg->nr_pages - alloc->nents, -+ (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK, -+ mmu_sync_info); -+ if (err) -+ goto bad_pad_insert; ++ kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off], ++ offset, sz, sync_fn); + } + -+ return 0; ++ /* Sync middle pages (if any) */ ++ for (i = 1; page_count > 2 && i < page_count - 1; i++) { ++ /* we grow upwards, so bail on first non-present page */ ++ if (!as_phys_addr_t(cpu_pa[page_off + i])) ++ break; + -+bad_pad_insert: -+ kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, -+ alloc->nents, alloc->nents, kctx->as_nr, true); -+bad_insert: -+ kbase_mem_umm_unmap_attachment(kctx, alloc); -+bad_map_attachment: -+ alloc->imported.umm.current_mapping_usage_count--; ++ kbase_sync_single(kctx, cpu_pa[page_off + i], ++ gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn); ++ } ++ ++ /* Sync last page (if any) */ ++ if (page_count > 1 && ++ as_phys_addr_t(cpu_pa[page_off + page_count - 1])) { ++ size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1; ++ ++ kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1], ++ gpu_pa[page_off + page_count - 1], 0, sz, ++ sync_fn); ++ } + ++out_unlock: ++ kbase_gpu_vm_unlock(kctx); ++ kbase_os_mem_map_unlock(kctx); + return err; +} + -+void kbase_mem_umm_unmap(struct kbase_context *kctx, -+ struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) ++int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset) +{ -+ alloc->imported.umm.current_mapping_usage_count--; -+ if (alloc->imported.umm.current_mapping_usage_count) { -+ if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT) || -+ alloc->imported.umm.need_sync) { -+ if (!kbase_is_region_invalid_or_free(reg)) { -+ int err = kbase_mem_do_sync_imported(kctx, reg, -+ KBASE_SYNC_TO_CPU); -+ WARN_ON_ONCE(err); -+ } -+ } -+ return; ++ int err = -EINVAL; ++ ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(sset != NULL); ++ ++ if (sset->mem_handle.basep.handle & ~PAGE_MASK) { ++ dev_warn(kctx->kbdev->dev, ++ "mem_handle: passed parameter is invalid"); ++ return -EINVAL; + } + -+ if (!kbase_is_region_invalid_or_free(reg) && reg->gpu_alloc == alloc) { -+ int err; ++ switch (sset->type) { ++ case BASE_SYNCSET_OP_MSYNC: ++ err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_DEVICE); ++ break; + -+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, -+ alloc->pages, reg->nr_pages, reg->nr_pages, -+ kctx->as_nr, true); -+ WARN_ON(err); ++ case BASE_SYNCSET_OP_CSYNC: ++ err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_CPU); ++ break; ++ ++ default: ++ dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type); ++ break; + } + -+ kbase_mem_umm_unmap_attachment(kctx, alloc); ++ return err; +} + -+static int get_umm_memory_group_id(struct kbase_context *kctx, -+ struct dma_buf *dma_buf) ++KBASE_EXPORT_TEST_API(kbase_sync_now); ++ ++/* vm lock must be held */ ++int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg) +{ -+ int group_id = BASE_MEM_GROUP_DEFAULT; ++ int err; + -+ if (kctx->kbdev->mgm_dev->ops.mgm_get_import_memory_id) { -+ struct memory_group_manager_import_data mgm_import_data; ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(reg != NULL); ++ dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n", ++ __func__, (void *)reg, (void *)kctx); ++ lockdep_assert_held(&kctx->reg_lock); + -+ mgm_import_data.type = -+ MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF; -+ mgm_import_data.u.dma_buf = dma_buf; ++ if (kbase_va_region_is_no_user_free(reg)) { ++ dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n"); ++ return -EINVAL; ++ } + -+ group_id = kctx->kbdev->mgm_dev->ops.mgm_get_import_memory_id( -+ kctx->kbdev->mgm_dev, &mgm_import_data); ++ /* If a region has been made evictable then we must unmake it ++ * before trying to free it. ++ * If the memory hasn't been reclaimed it will be unmapped and freed ++ * below, if it has been reclaimed then the operations below are no-ops. ++ */ ++ if (reg->flags & KBASE_REG_DONT_NEED) { ++ WARN_ON(reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE); ++ mutex_lock(&kctx->jit_evict_lock); ++ /* Unlink the physical allocation before unmaking it evictable so ++ * that the allocation isn't grown back to its last backed size ++ * as we're going to unmap it anyway. ++ */ ++ reg->cpu_alloc->reg = NULL; ++ if (reg->cpu_alloc != reg->gpu_alloc) ++ reg->gpu_alloc->reg = NULL; ++ mutex_unlock(&kctx->jit_evict_lock); ++ kbase_mem_evictable_unmake(reg->gpu_alloc); + } + -+ return group_id; ++ err = kbase_gpu_munmap(kctx, reg); ++ if (err) { ++ dev_warn(kctx->kbdev->dev, "Could not unmap from the GPU...\n"); ++ goto out; ++ } ++ ++#if MALI_USE_CSF ++ if (((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_FIXED_VA) || ++ ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_EXEC_FIXED_VA)) { ++ if (reg->flags & KBASE_REG_FIXED_ADDRESS) ++ atomic64_dec(&kctx->num_fixed_allocs); ++ else ++ atomic64_dec(&kctx->num_fixable_allocs); ++ } ++#endif ++ ++ /* This will also free the physical pages */ ++ kbase_free_alloced_region(reg); ++ ++out: ++ return err; +} + ++KBASE_EXPORT_TEST_API(kbase_mem_free_region); ++ +/** -+ * kbase_mem_from_umm - Import dma-buf memory into kctx -+ * @kctx: Pointer to kbase context to import memory into -+ * @fd: File descriptor of dma-buf to import -+ * @va_pages: Pointer where virtual size of the region will be output -+ * @flags: Pointer to memory flags -+ * @padding: Number of read only padding pages to be inserted at the end of the -+ * GPU mapping of the dma-buf ++ * kbase_mem_free - Free the region from the GPU and unregister it. + * -+ * Return: Pointer to new kbase_va_region object of the imported dma-buf, or -+ * NULL on error. ++ * @kctx: KBase context ++ * @gpu_addr: GPU address to free + * -+ * This function imports a dma-buf into kctx, and created a kbase_va_region -+ * object that wraps the dma-buf. ++ * This function implements the free operation on a memory segment. ++ * It will loudly fail if called with outstanding mappings. ++ * ++ * Return: 0 on success. + */ -+static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, -+ int fd, u64 *va_pages, u64 *flags, u32 padding) ++int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) +{ ++ int err = 0; + struct kbase_va_region *reg; -+ struct dma_buf *dma_buf; -+ struct dma_buf_attachment *dma_attachment; -+ bool shared_zone = false; -+ bool need_sync = false; -+ int group_id; -+ -+ /* 64-bit address range is the max */ -+ if (*va_pages > (U64_MAX / PAGE_SIZE)) -+ return NULL; + -+ dma_buf = dma_buf_get(fd); -+ if (IS_ERR_OR_NULL(dma_buf)) -+ return NULL; ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %pK\n", ++ __func__, gpu_addr, (void *)kctx); + -+ dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev); -+ if (IS_ERR_OR_NULL(dma_attachment)) { -+ dma_buf_put(dma_buf); -+ return NULL; ++ if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) { ++ dev_warn(kctx->kbdev->dev, "%s: gpu_addr parameter is invalid", __func__); ++ return -EINVAL; + } + -+ *va_pages = (PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT) + padding; -+ if (!*va_pages) { -+ dma_buf_detach(dma_buf, dma_attachment); -+ dma_buf_put(dma_buf); -+ return NULL; ++ if (gpu_addr == 0) { ++ dev_warn(kctx->kbdev->dev, ++ "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using %s\n", ++ __func__); ++ return -EINVAL; + } ++ kbase_gpu_vm_lock(kctx); + -+ if (!kbase_import_size_is_valid(kctx->kbdev, *va_pages)) -+ return NULL; ++ if (gpu_addr >= BASE_MEM_COOKIE_BASE && ++ gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) { ++ int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE); + -+ /* ignore SAME_VA */ -+ *flags &= ~BASE_MEM_SAME_VA; ++ reg = kctx->pending_regions[cookie]; ++ if (!reg) { ++ err = -EINVAL; ++ goto out_unlock; ++ } + -+ /* -+ * Force CPU cached flag. -+ * -+ * We can't query the dma-buf exporter to get details about the CPU -+ * cache attributes of CPU mappings, so we have to assume that the -+ * buffer may be cached, and call into the exporter for cache -+ * maintenance, and rely on the exporter to do the right thing when -+ * handling our calls. -+ */ -+ *flags |= BASE_MEM_CACHED_CPU; ++ /* ask to unlink the cookie as we'll free it */ + -+ if (*flags & BASE_MEM_IMPORT_SHARED) -+ shared_zone = true; ++ kctx->pending_regions[cookie] = NULL; ++ bitmap_set(kctx->cookies, cookie, 1); + -+ if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) -+ need_sync = true; ++ kbase_free_alloced_region(reg); ++ } else { ++ /* A real GPU va */ ++ /* Validate the region */ ++ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); ++ if (kbase_is_region_invalid_or_free(reg)) { ++ dev_warn(kctx->kbdev->dev, "%s called with nonexistent gpu_addr 0x%llX", ++ __func__, gpu_addr); ++ err = -EINVAL; ++ goto out_unlock; ++ } + -+ if (!kbase_ctx_compat_mode(kctx)) { -+ /* -+ * 64-bit tasks require us to reserve VA on the CPU that we use -+ * on the GPU. -+ */ -+ shared_zone = true; ++ if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) { ++ /* SAME_VA must be freed through munmap */ ++ dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__, ++ gpu_addr); ++ err = -EINVAL; ++ goto out_unlock; ++ } ++ err = kbase_mem_free_region(kctx, reg); + } + -+ if (shared_zone) { -+ *flags |= BASE_MEM_NEED_MMAP; -+ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *va_pages, -+ KBASE_REG_ZONE_SAME_VA); -+ } else { -+ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *va_pages, -+ KBASE_REG_ZONE_CUSTOM_VA); -+ } ++out_unlock: ++ kbase_gpu_vm_unlock(kctx); ++ return err; ++} + -+ if (!reg) { -+ dma_buf_detach(dma_buf, dma_attachment); -+ dma_buf_put(dma_buf); -+ return NULL; -+ } ++KBASE_EXPORT_TEST_API(kbase_mem_free); + -+ group_id = get_umm_memory_group_id(kctx, dma_buf); ++int kbase_update_region_flags(struct kbase_context *kctx, ++ struct kbase_va_region *reg, unsigned long flags) ++{ ++ KBASE_DEBUG_ASSERT(reg != NULL); ++ KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0); + -+ reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages, -+ KBASE_MEM_TYPE_IMPORTED_UMM, group_id); -+ if (IS_ERR_OR_NULL(reg->gpu_alloc)) -+ goto no_alloc; ++ reg->flags |= kbase_cache_enabled(flags, reg->nr_pages); ++ /* all memory is now growable */ ++ reg->flags |= KBASE_REG_GROWABLE; + -+ reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); ++ if (flags & BASE_MEM_GROW_ON_GPF) ++ reg->flags |= KBASE_REG_PF_GROW; + -+ if (kbase_update_region_flags(kctx, reg, *flags) != 0) -+ goto error_out; ++ if (flags & BASE_MEM_PROT_CPU_WR) ++ reg->flags |= KBASE_REG_CPU_WR; + -+ /* No pages to map yet */ -+ reg->gpu_alloc->nents = 0; ++ if (flags & BASE_MEM_PROT_CPU_RD) ++ reg->flags |= KBASE_REG_CPU_RD; + -+ reg->flags &= ~KBASE_REG_FREE; -+ reg->flags |= KBASE_REG_GPU_NX; /* UMM is always No eXecute */ -+ reg->flags &= ~KBASE_REG_GROWABLE; /* UMM cannot be grown */ ++ if (flags & BASE_MEM_PROT_GPU_WR) ++ reg->flags |= KBASE_REG_GPU_WR; + -+ if (*flags & BASE_MEM_PROTECTED) -+ reg->flags |= KBASE_REG_PROTECTED; ++ if (flags & BASE_MEM_PROT_GPU_RD) ++ reg->flags |= KBASE_REG_GPU_RD; + -+ if (padding) -+ reg->flags |= KBASE_REG_IMPORT_PAD; ++ if (0 == (flags & BASE_MEM_PROT_GPU_EX)) ++ reg->flags |= KBASE_REG_GPU_NX; + -+ reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM; -+ reg->gpu_alloc->imported.umm.sgt = NULL; -+ reg->gpu_alloc->imported.umm.dma_buf = dma_buf; -+ reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment; -+ reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0; -+ reg->gpu_alloc->imported.umm.need_sync = need_sync; -+ reg->gpu_alloc->imported.umm.kctx = kctx; -+ reg->extension = 0; ++ if (!kbase_device_is_cpu_coherent(kctx->kbdev)) { ++ if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED && ++ !(flags & BASE_MEM_UNCACHED_GPU)) ++ return -EINVAL; ++ } else if (flags & (BASE_MEM_COHERENT_SYSTEM | ++ BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { ++ reg->flags |= KBASE_REG_SHARE_BOTH; ++ } + -+ if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) { -+ int err; ++ if (!(reg->flags & KBASE_REG_SHARE_BOTH) && ++ flags & BASE_MEM_COHERENT_LOCAL) { ++ reg->flags |= KBASE_REG_SHARE_IN; ++ } + -+ reg->gpu_alloc->imported.umm.current_mapping_usage_count = 1; ++#if !MALI_USE_CSF ++ if (flags & BASE_MEM_TILER_ALIGN_TOP) ++ reg->flags |= KBASE_REG_TILER_ALIGN_TOP; ++#endif /* !MALI_USE_CSF */ + -+ err = kbase_mem_umm_map_attachment(kctx, reg); -+ if (err) { ++#if MALI_USE_CSF ++ if (flags & BASE_MEM_CSF_EVENT) { ++ reg->flags |= KBASE_REG_CSF_EVENT; ++ reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING; ++ ++ if (!(reg->flags & KBASE_REG_SHARE_BOTH)) { ++ /* On non coherent platforms need to map as uncached on ++ * both sides. ++ */ ++ reg->flags &= ~KBASE_REG_CPU_CACHED; ++ reg->flags &= ~KBASE_REG_GPU_CACHED; ++ } ++ } ++#endif ++ ++ /* Set up default MEMATTR usage */ ++ if (!(reg->flags & KBASE_REG_GPU_CACHED)) { ++ if (kctx->kbdev->mmu_mode->flags & ++ KBASE_MMU_MODE_HAS_NON_CACHEABLE) { ++ /* Override shareability, and MEMATTR for uncached */ ++ reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH); ++ reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); ++ } else { + dev_warn(kctx->kbdev->dev, -+ "Failed to map dma-buf %pK on GPU: %d\n", -+ dma_buf, err); -+ goto error_out; ++ "Can't allocate GPU uncached memory due to MMU in Legacy Mode\n"); ++ return -EINVAL; + } ++#if MALI_USE_CSF ++ } else if (reg->flags & KBASE_REG_CSF_EVENT) { ++ WARN_ON(!(reg->flags & KBASE_REG_SHARE_BOTH)); + -+ *flags |= KBASE_MEM_IMPORT_HAVE_PAGES; ++ reg->flags |= ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); ++#endif ++ } else if (kctx->kbdev->system_coherency == COHERENCY_ACE && ++ (reg->flags & KBASE_REG_SHARE_BOTH)) { ++ reg->flags |= ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); ++ } else { ++ reg->flags |= ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT); + } + -+ return reg; ++ if (flags & BASEP_MEM_PERMANENT_KERNEL_MAPPING) ++ reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING; + -+error_out: -+ kbase_mem_phy_alloc_put(reg->gpu_alloc); -+ kbase_mem_phy_alloc_put(reg->cpu_alloc); -+no_alloc: -+ kfree(reg); ++ if (flags & BASEP_MEM_NO_USER_FREE) { ++ kbase_gpu_vm_lock(kctx); ++ kbase_va_region_no_user_free_inc(reg); ++ kbase_gpu_vm_unlock(kctx); ++ } + -+ return NULL; -+} ++ if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) ++ reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE; + -+u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev) -+{ -+ u32 cpu_cache_line_size = cache_line_size(); -+ u32 gpu_cache_line_size = -+ (1UL << kbdev->gpu_props.props.l2_props.log2_line_size); ++#if MALI_USE_CSF ++ if (flags & BASE_MEM_FIXED) ++ reg->flags |= KBASE_REG_FIXED_ADDRESS; ++#endif + -+ return ((cpu_cache_line_size > gpu_cache_line_size) ? -+ cpu_cache_line_size : -+ gpu_cache_line_size); ++ return 0; +} + -+static struct kbase_va_region *kbase_mem_from_user_buffer( -+ struct kbase_context *kctx, unsigned long address, -+ unsigned long size, u64 *va_pages, u64 *flags) ++int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, ++ size_t nr_pages_requested) +{ -+ long i, dma_mapped_pages; -+ struct kbase_va_region *reg; -+ struct rb_root *rbtree; -+ long faulted_pages; -+ int zone = KBASE_REG_ZONE_CUSTOM_VA; -+ bool shared_zone = false; -+ u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev); -+ struct kbase_alloc_import_user_buf *user_buf; -+ struct page **pages = NULL; -+ struct tagged_addr *pa; -+ struct device *dev; -+ int write; -+ -+ /* Flag supported only for dma-buf imported memory */ -+ if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) -+ return NULL; ++ int new_page_count __maybe_unused; ++ size_t nr_left = nr_pages_requested; ++ int res; ++ struct kbase_context *kctx; ++ struct kbase_device *kbdev; ++ struct tagged_addr *tp; + -+ if ((address & (cache_line_alignment - 1)) != 0 || -+ (size & (cache_line_alignment - 1)) != 0) { -+ if (*flags & BASE_MEM_UNCACHED_GPU) { -+ dev_warn(kctx->kbdev->dev, -+ "User buffer is not cache line aligned and marked as GPU uncached\n"); -+ goto bad_size; -+ } ++ if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) || ++ WARN_ON(alloc->imported.native.kctx == NULL) || ++ WARN_ON(alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { ++ return -EINVAL; ++ } + -+ /* Coherency must be enabled to handle partial cache lines */ -+ if (*flags & (BASE_MEM_COHERENT_SYSTEM | -+ BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { -+ /* Force coherent system required flag, import will -+ * then fail if coherency isn't available -+ */ -+ *flags |= BASE_MEM_COHERENT_SYSTEM_REQUIRED; -+ } else { -+ dev_warn(kctx->kbdev->dev, -+ "User buffer is not cache line aligned and no coherency enabled\n"); -+ goto bad_size; -+ } ++ if (alloc->reg) { ++ if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents) ++ goto invalid_request; + } + -+ *va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) - -+ PFN_DOWN(address); -+ if (!*va_pages) -+ goto bad_size; ++ kctx = alloc->imported.native.kctx; ++ kbdev = kctx->kbdev; + -+ if (*va_pages > (UINT64_MAX / PAGE_SIZE)) -+ /* 64-bit address range is the max */ -+ goto bad_size; ++ if (nr_pages_requested == 0) ++ goto done; /*nothing to do*/ + -+ if (!kbase_import_size_is_valid(kctx->kbdev, *va_pages)) -+ goto bad_size; ++ new_page_count = atomic_add_return( ++ nr_pages_requested, &kctx->used_pages); ++ atomic_add(nr_pages_requested, ++ &kctx->kbdev->memdev.used_pages); + -+ /* SAME_VA generally not supported with imported memory (no known use cases) */ -+ *flags &= ~BASE_MEM_SAME_VA; ++ /* Increase mm counters before we allocate pages so that this ++ * allocation is visible to the OOM killer ++ */ ++ kbase_process_page_usage_inc(kctx, nr_pages_requested); + -+ if (*flags & BASE_MEM_IMPORT_SHARED) -+ shared_zone = true; ++ tp = alloc->pages + alloc->nents; + -+ if (!kbase_ctx_compat_mode(kctx)) { -+ /* -+ * 64-bit tasks require us to reserve VA on the CPU that we use -+ * on the GPU. -+ */ -+ shared_zone = true; -+ } ++ /* Check if we have enough pages requested so we can allocate a large ++ * page (512 * 4KB = 2MB ) ++ */ ++ if (kbdev->pagesize_2mb && nr_left >= (SZ_2M / SZ_4K)) { ++ int nr_lp = nr_left / (SZ_2M / SZ_4K); + -+ if (shared_zone) { -+ *flags |= BASE_MEM_NEED_MMAP; -+ zone = KBASE_REG_ZONE_SAME_VA; -+ rbtree = &kctx->reg_rbtree_same; -+ } else -+ rbtree = &kctx->reg_rbtree_custom; ++ res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id], ++ nr_lp * (SZ_2M / SZ_4K), tp, true, kctx->task); + -+ reg = kbase_alloc_free_region(kctx->kbdev, rbtree, 0, *va_pages, zone); ++ if (res > 0) { ++ nr_left -= res; ++ tp += res; ++ } + -+ if (!reg) -+ goto no_region; ++ if (nr_left) { ++ struct kbase_sub_alloc *sa, *temp_sa; + -+ reg->gpu_alloc = kbase_alloc_create( -+ kctx, *va_pages, KBASE_MEM_TYPE_IMPORTED_USER_BUF, -+ BASE_MEM_GROUP_DEFAULT); -+ if (IS_ERR_OR_NULL(reg->gpu_alloc)) -+ goto no_alloc_obj; ++ spin_lock(&kctx->mem_partials_lock); + -+ reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); ++ list_for_each_entry_safe(sa, temp_sa, ++ &kctx->mem_partials, link) { ++ int pidx = 0; + -+ if (kbase_update_region_flags(kctx, reg, *flags) != 0) -+ goto invalid_flags; ++ while (nr_left) { ++ pidx = find_next_zero_bit(sa->sub_pages, ++ SZ_2M / SZ_4K, ++ pidx); ++ bitmap_set(sa->sub_pages, pidx, 1); ++ *tp++ = as_tagged_tag(page_to_phys(sa->page + ++ pidx), ++ FROM_PARTIAL); ++ nr_left--; + -+ reg->flags &= ~KBASE_REG_FREE; -+ reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */ -+ reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */ ++ if (bitmap_full(sa->sub_pages, SZ_2M / SZ_4K)) { ++ /* unlink from partial list when full */ ++ list_del_init(&sa->link); ++ break; ++ } ++ } ++ } ++ spin_unlock(&kctx->mem_partials_lock); ++ } + -+ user_buf = ®->gpu_alloc->imported.user_buf; ++ /* only if we actually have a chunk left <512. If more it indicates ++ * that we couldn't allocate a 2MB above, so no point to retry here. ++ */ ++ if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) { ++ /* create a new partial and suballocate the rest from it */ ++ struct page *np = NULL; + -+ user_buf->size = size; -+ user_buf->address = address; -+ user_buf->nr_pages = *va_pages; -+ user_buf->mm = current->mm; -+ kbase_mem_mmgrab(); -+ if (reg->gpu_alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) -+ user_buf->pages = vmalloc(*va_pages * sizeof(struct page *)); -+ else -+ user_buf->pages = kmalloc_array(*va_pages, -+ sizeof(struct page *), GFP_KERNEL); ++ do { ++ int err; + -+ if (!user_buf->pages) -+ goto no_page_array; ++ np = kbase_mem_pool_alloc( ++ &kctx->mem_pools.large[ ++ alloc->group_id]); ++ if (np) ++ break; + -+ /* If the region is coherent with the CPU then the memory is imported -+ * and mapped onto the GPU immediately. -+ * Otherwise get_user_pages is called as a sanity check, but with -+ * NULL as the pages argument which will fault the pages, but not -+ * pin them. The memory will then be pinned only around the jobs that -+ * specify the region as an external resource. -+ */ -+ if (reg->flags & KBASE_REG_SHARE_BOTH) { -+ pages = user_buf->pages; -+ *flags |= KBASE_MEM_IMPORT_HAVE_PAGES; -+ } ++ err = kbase_mem_pool_grow( ++ &kctx->mem_pools.large[alloc->group_id], ++ 1, kctx->task); ++ if (err) ++ break; ++ } while (1); + -+ down_read(kbase_mem_get_process_mmap_lock()); ++ if (np) { ++ int i; ++ struct kbase_sub_alloc *sa; ++ struct page *p; + -+ write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); ++ sa = kmalloc(sizeof(*sa), GFP_KERNEL); ++ if (!sa) { ++ kbase_mem_pool_free( ++ &kctx->mem_pools.large[ ++ alloc->group_id], ++ np, ++ false); ++ goto no_new_partial; ++ } + -+ faulted_pages = -+ kbase_get_user_pages(address, *va_pages, write ? FOLL_WRITE : 0, pages, NULL); ++ /* store pointers back to the control struct */ ++ np->lru.next = (void *)sa; ++ for (p = np; p < np + SZ_2M / SZ_4K; p++) ++ p->lru.prev = (void *)np; ++ INIT_LIST_HEAD(&sa->link); ++ bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K); ++ sa->page = np; + -+ up_read(kbase_mem_get_process_mmap_lock()); ++ for (i = 0; i < nr_left; i++) ++ *tp++ = as_tagged_tag(page_to_phys(np + i), FROM_PARTIAL); + -+ if (faulted_pages != *va_pages) -+ goto fault_mismatch; ++ bitmap_set(sa->sub_pages, 0, nr_left); ++ nr_left = 0; + -+ reg->gpu_alloc->nents = 0; -+ reg->extension = 0; ++ /* expose for later use */ ++ spin_lock(&kctx->mem_partials_lock); ++ list_add(&sa->link, &kctx->mem_partials); ++ spin_unlock(&kctx->mem_partials_lock); ++ } ++ } ++ } + -+ pa = kbase_get_gpu_phy_pages(reg); -+ dev = kctx->kbdev->dev; ++no_new_partial: ++ if (nr_left) { ++ res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[alloc->group_id], nr_left, ++ tp, false, kctx->task); ++ if (res <= 0) ++ goto alloc_failed; ++ } + -+ if (pages) { -+ /* Top bit signifies that this was pinned on import */ -+ user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT; ++ KBASE_TLSTREAM_AUX_PAGESALLOC( ++ kbdev, ++ kctx->id, ++ (u64)new_page_count); + -+ /* Manual CPU cache synchronization. -+ * -+ * The driver disables automatic CPU cache synchronization because the -+ * memory pages that enclose the imported region may also contain -+ * sub-regions which are not imported and that are allocated and used -+ * by the user process. This may be the case of memory at the beginning -+ * of the first page and at the end of the last page. Automatic CPU cache -+ * synchronization would force some operations on those memory allocations, -+ * unbeknown to the user process: in particular, a CPU cache invalidate -+ * upon unmapping would destroy the content of dirty CPU caches and cause -+ * the user process to lose CPU writes to the non-imported sub-regions. -+ * -+ * When the GPU claims ownership of the imported memory buffer, it shall -+ * commit CPU writes for the whole of all pages that enclose the imported -+ * region, otherwise the initial content of memory would be wrong. -+ */ -+ for (i = 0; i < faulted_pages; i++) { -+ dma_addr_t dma_addr; -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); -+#else -+ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, -+ DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); -+#endif -+ if (dma_mapping_error(dev, dma_addr)) -+ goto unwind_dma_map; ++ alloc->nents += nr_pages_requested; + -+ user_buf->dma_addrs[i] = dma_addr; -+ pa[i] = as_tagged(page_to_phys(pages[i])); ++ kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); + -+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); -+ } ++done: ++ return 0; + -+ reg->gpu_alloc->nents = faulted_pages; -+ } ++alloc_failed: ++ /* rollback needed if got one or more 2MB but failed later */ ++ if (nr_left != nr_pages_requested) { ++ size_t nr_pages_to_free = nr_pages_requested - nr_left; + -+ return reg; ++ alloc->nents += nr_pages_to_free; + -+unwind_dma_map: -+ dma_mapped_pages = i; -+ /* Run the unmap loop in the same order as map loop, and perform again -+ * CPU cache synchronization to re-write the content of dirty CPU caches -+ * to memory. This precautionary measure is kept here to keep this code -+ * aligned with kbase_jd_user_buf_map() to allow for a potential refactor -+ * in the future. -+ */ -+ for (i = 0; i < dma_mapped_pages; i++) { -+ dma_addr_t dma_addr = user_buf->dma_addrs[i]; ++ kbase_process_page_usage_inc(kctx, nr_pages_to_free); ++ atomic_add(nr_pages_to_free, &kctx->used_pages); ++ atomic_add(nr_pages_to_free, ++ &kctx->kbdev->memdev.used_pages); + -+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); -+#else -+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, -+ DMA_ATTR_SKIP_CPU_SYNC); -+#endif -+ } -+fault_mismatch: -+ if (pages) { -+ /* In this case, the region was not yet in the region tracker, -+ * and so there are no CPU mappings to remove before we unpin -+ * the page -+ */ -+ for (i = 0; i < faulted_pages; i++) -+ kbase_unpin_user_buf_page(pages[i]); ++ kbase_free_phy_pages_helper(alloc, nr_pages_to_free); + } -+no_page_array: -+invalid_flags: -+ kbase_mem_phy_alloc_put(reg->cpu_alloc); -+ kbase_mem_phy_alloc_put(reg->gpu_alloc); -+no_alloc_obj: -+ kfree(reg); -+no_region: -+bad_size: -+ return NULL; -+} + ++ kbase_process_page_usage_dec(kctx, nr_pages_requested); ++ atomic_sub(nr_pages_requested, &kctx->used_pages); ++ atomic_sub(nr_pages_requested, ++ &kctx->kbdev->memdev.used_pages); + -+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, -+ u64 nents, struct base_mem_aliasing_info *ai, -+ u64 *num_pages) ++invalid_request: ++ return -ENOMEM; ++} ++ ++struct tagged_addr *kbase_alloc_phy_pages_helper_locked( ++ struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, ++ size_t nr_pages_requested, ++ struct kbase_sub_alloc **prealloc_sa) +{ -+ struct kbase_va_region *reg; -+ u64 gpu_va; -+ size_t i; -+ bool coherent; -+ uint64_t max_stride; ++ int new_page_count __maybe_unused; ++ size_t nr_left = nr_pages_requested; ++ int res; ++ struct kbase_context *kctx; ++ struct kbase_device *kbdev; ++ struct tagged_addr *tp; ++ struct tagged_addr *new_pages = NULL; + -+ /* Calls to this function are inherently asynchronous, with respect to -+ * MMU operations. -+ */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; ++ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); ++ KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); + -+ KBASE_DEBUG_ASSERT(kctx); -+ KBASE_DEBUG_ASSERT(flags); -+ KBASE_DEBUG_ASSERT(ai); -+ KBASE_DEBUG_ASSERT(num_pages); ++ lockdep_assert_held(&pool->pool_lock); + -+ /* mask to only allowed flags */ -+ *flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | -+ BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL | -+ BASE_MEM_PROT_CPU_RD | BASE_MEM_COHERENT_SYSTEM_REQUIRED); ++ kctx = alloc->imported.native.kctx; ++ kbdev = kctx->kbdev; + -+ if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) { -+ dev_warn(kctx->kbdev->dev, -+ "%s called with bad flags (%llx)", -+ __func__, -+ (unsigned long long)*flags); -+ goto bad_flags; ++ if (!kbdev->pagesize_2mb) ++ WARN_ON(pool->order); ++ ++ if (alloc->reg) { ++ if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents) ++ goto invalid_request; + } -+ coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 || -+ (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0; + -+ if (!stride) -+ goto bad_stride; ++ lockdep_assert_held(&kctx->mem_partials_lock); + -+ if (!nents) -+ goto bad_nents; ++ if (nr_pages_requested == 0) ++ goto done; /*nothing to do*/ + -+ max_stride = div64_u64(U64_MAX, nents); ++ new_page_count = atomic_add_return( ++ nr_pages_requested, &kctx->used_pages); ++ atomic_add(nr_pages_requested, ++ &kctx->kbdev->memdev.used_pages); + -+ if (stride > max_stride) -+ goto bad_size; ++ /* Increase mm counters before we allocate pages so that this ++ * allocation is visible to the OOM killer ++ */ ++ kbase_process_page_usage_inc(kctx, nr_pages_requested); + -+ if ((nents * stride) > (U64_MAX / PAGE_SIZE)) -+ /* 64-bit address range is the max */ -+ goto bad_size; ++ tp = alloc->pages + alloc->nents; ++ new_pages = tp; + -+ /* calculate the number of pages this alias will cover */ -+ *num_pages = nents * stride; ++ if (kbdev->pagesize_2mb && pool->order) { ++ int nr_lp = nr_left / (SZ_2M / SZ_4K); + -+ if (!kbase_alias_size_is_valid(kctx->kbdev, *num_pages)) -+ goto bad_size; ++ res = kbase_mem_pool_alloc_pages_locked(pool, ++ nr_lp * (SZ_2M / SZ_4K), ++ tp); + -+ if (!kbase_ctx_compat_mode(kctx)) { -+ /* 64-bit tasks must MMAP anyway, but not expose this address to -+ * clients -+ */ -+ *flags |= BASE_MEM_NEED_MMAP; -+ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *num_pages, -+ KBASE_REG_ZONE_SAME_VA); -+ } else { -+ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *num_pages, -+ KBASE_REG_ZONE_CUSTOM_VA); -+ } ++ if (res > 0) { ++ nr_left -= res; ++ tp += res; ++ } + -+ if (!reg) -+ goto no_reg; ++ if (nr_left) { ++ struct kbase_sub_alloc *sa, *temp_sa; + -+ /* zero-sized page array, as we don't need one/can support one */ -+ reg->gpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_ALIAS, -+ BASE_MEM_GROUP_DEFAULT); -+ if (IS_ERR_OR_NULL(reg->gpu_alloc)) -+ goto no_alloc_obj; ++ list_for_each_entry_safe(sa, temp_sa, ++ &kctx->mem_partials, link) { ++ int pidx = 0; + -+ reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); ++ while (nr_left) { ++ pidx = find_next_zero_bit(sa->sub_pages, ++ SZ_2M / SZ_4K, ++ pidx); ++ bitmap_set(sa->sub_pages, pidx, 1); ++ *tp++ = as_tagged_tag(page_to_phys( ++ sa->page + pidx), ++ FROM_PARTIAL); ++ nr_left--; + -+ if (kbase_update_region_flags(kctx, reg, *flags) != 0) -+ goto invalid_flags; ++ if (bitmap_full(sa->sub_pages, ++ SZ_2M / SZ_4K)) { ++ /* unlink from partial list when ++ * full ++ */ ++ list_del_init(&sa->link); ++ break; ++ } ++ } ++ } ++ } + -+ reg->gpu_alloc->imported.alias.nents = nents; -+ reg->gpu_alloc->imported.alias.stride = stride; -+ reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents); -+ if (!reg->gpu_alloc->imported.alias.aliased) -+ goto no_aliased_array; ++ /* only if we actually have a chunk left <512. If more it ++ * indicates that we couldn't allocate a 2MB above, so no point ++ * to retry here. ++ */ ++ if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) { ++ /* create a new partial and suballocate the rest from it ++ */ ++ struct page *np = NULL; + -+ kbase_gpu_vm_lock(kctx); ++ np = kbase_mem_pool_alloc_locked(pool); + -+ /* validate and add src handles */ -+ for (i = 0; i < nents; i++) { -+ if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) { -+ if (ai[i].handle.basep.handle != -+ BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE) -+ goto bad_handle; /* unsupported magic handle */ -+ if (!ai[i].length) -+ goto bad_handle; /* must be > 0 */ -+ if (ai[i].length > stride) -+ goto bad_handle; /* can't be larger than the -+ * stride -+ */ -+ reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; -+ } else { -+ struct kbase_va_region *aliasing_reg; -+ struct kbase_mem_phy_alloc *alloc; ++ if (np) { ++ int i; ++ struct kbase_sub_alloc *const sa = *prealloc_sa; ++ struct page *p; + -+ aliasing_reg = kbase_region_tracker_find_region_base_address( -+ kctx, -+ (ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT); ++ /* store pointers back to the control struct */ ++ np->lru.next = (void *)sa; ++ for (p = np; p < np + SZ_2M / SZ_4K; p++) ++ p->lru.prev = (void *)np; ++ INIT_LIST_HEAD(&sa->link); ++ bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K); ++ sa->page = np; + -+ /* validate found region */ -+ if (kbase_is_region_invalid_or_free(aliasing_reg)) -+ goto bad_handle; /* Not found/already free */ -+ if (kbase_is_region_shrinkable(aliasing_reg)) -+ goto bad_handle; /* Ephemeral region */ -+ if (kbase_va_region_is_no_user_free(aliasing_reg)) -+ goto bad_handle; /* JIT regions can't be -+ * aliased. NO_USER_FREE flag -+ * covers the entire lifetime -+ * of JIT regions. The other -+ * types of regions covered -+ * by this flag also shall -+ * not be aliased. -+ */ -+ if (!(aliasing_reg->flags & KBASE_REG_GPU_CACHED)) -+ goto bad_handle; /* GPU uncached memory */ -+ if (!aliasing_reg->gpu_alloc) -+ goto bad_handle; /* No alloc */ -+ if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) -+ goto bad_handle; /* Not a native alloc */ -+ if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0)) -+ goto bad_handle; /* Non-coherent memory cannot -+ * alias coherent memory, and -+ * vice versa. -+ */ ++ for (i = 0; i < nr_left; i++) ++ *tp++ = as_tagged_tag( ++ page_to_phys(np + i), ++ FROM_PARTIAL); + -+ /* check size against stride */ -+ if (!ai[i].length) -+ goto bad_handle; /* must be > 0 */ -+ if (ai[i].length > stride) -+ goto bad_handle; /* can't be larger than the -+ * stride -+ */ ++ bitmap_set(sa->sub_pages, 0, nr_left); ++ nr_left = 0; ++ /* Indicate to user that we'll free this memory ++ * later. ++ */ ++ *prealloc_sa = NULL; + -+ alloc = aliasing_reg->gpu_alloc; ++ /* expose for later use */ ++ list_add(&sa->link, &kctx->mem_partials); ++ } ++ } ++ if (nr_left) ++ goto alloc_failed; ++ } else { ++ res = kbase_mem_pool_alloc_pages_locked(pool, ++ nr_left, ++ tp); ++ if (res <= 0) ++ goto alloc_failed; ++ } + -+ /* check against the alloc's size */ -+ if (ai[i].offset > alloc->nents) -+ goto bad_handle; /* beyond end */ -+ if (ai[i].offset + ai[i].length > alloc->nents) -+ goto bad_handle; /* beyond end */ ++ KBASE_TLSTREAM_AUX_PAGESALLOC( ++ kbdev, ++ kctx->id, ++ (u64)new_page_count); + -+ reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc); -+ reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; -+ reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset; ++ alloc->nents += nr_pages_requested; + -+ /* Ensure the underlying alloc is marked as being -+ * mapped at >1 different GPU VA immediately, even -+ * though mapping might not happen until later. -+ * -+ * Otherwise, we would (incorrectly) allow shrinking of -+ * the source region (aliasing_reg) and so freeing the -+ * physical pages (without freeing the entire alloc) -+ * whilst we still hold an implicit reference on those -+ * physical pages. -+ */ -+ kbase_mem_phy_alloc_gpu_mapped(alloc); -+ } -+ } ++ kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); + -+ if (!kbase_ctx_compat_mode(kctx)) { -+ /* Bind to a cookie */ -+ if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) { -+ dev_err(kctx->kbdev->dev, "No cookies available for allocation!"); -+ goto no_cookie; -+ } -+ /* return a cookie */ -+ gpu_va = find_first_bit(kctx->cookies, BITS_PER_LONG); -+ bitmap_clear(kctx->cookies, gpu_va, 1); -+ BUG_ON(kctx->pending_regions[gpu_va]); -+ kctx->pending_regions[gpu_va] = reg; ++done: ++ return new_pages; + -+ /* relocate to correct base */ -+ gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); -+ gpu_va <<= PAGE_SHIFT; -+ } else { -+ /* we control the VA */ -+ if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1, -+ mmu_sync_info) != 0) { -+ dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU"); -+ goto no_mmap; -+ } -+ /* return real GPU VA */ -+ gpu_va = reg->start_pfn << PAGE_SHIFT; -+ } ++alloc_failed: ++ /* rollback needed if got one or more 2MB but failed later */ ++ if (nr_left != nr_pages_requested) { ++ size_t nr_pages_to_free = nr_pages_requested - nr_left; + -+ reg->flags &= ~KBASE_REG_FREE; -+ reg->flags &= ~KBASE_REG_GROWABLE; ++ struct tagged_addr *start_free = alloc->pages + alloc->nents; + -+ kbase_gpu_vm_unlock(kctx); ++ if (kbdev->pagesize_2mb && pool->order) { ++ while (nr_pages_to_free) { ++ if (is_huge_head(*start_free)) { ++ kbase_mem_pool_free_pages_locked( ++ pool, 512, ++ start_free, ++ false, /* not dirty */ ++ true); /* return to pool */ ++ nr_pages_to_free -= 512; ++ start_free += 512; ++ } else if (is_partial(*start_free)) { ++ free_partial_locked(kctx, pool, ++ *start_free); ++ nr_pages_to_free--; ++ start_free++; ++ } ++ } ++ } else { ++ kbase_mem_pool_free_pages_locked(pool, ++ nr_pages_to_free, ++ start_free, ++ false, /* not dirty */ ++ true); /* return to pool */ ++ } ++ } + -+ return gpu_va; ++ kbase_process_page_usage_dec(kctx, nr_pages_requested); ++ atomic_sub(nr_pages_requested, &kctx->used_pages); ++ atomic_sub(nr_pages_requested, &kctx->kbdev->memdev.used_pages); + -+no_cookie: -+no_mmap: -+bad_handle: -+ /* Marking the source allocs as not being mapped on the GPU and putting -+ * them is handled by putting reg's allocs, so no rollback of those -+ * actions is done here. -+ */ -+ kbase_gpu_vm_unlock(kctx); -+no_aliased_array: -+invalid_flags: -+ kbase_mem_phy_alloc_put(reg->cpu_alloc); -+ kbase_mem_phy_alloc_put(reg->gpu_alloc); -+no_alloc_obj: -+ kfree(reg); -+no_reg: -+bad_size: -+bad_nents: -+bad_stride: -+bad_flags: -+ return 0; ++invalid_request: ++ return NULL; +} + -+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, -+ void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, -+ u64 *flags) ++static void free_partial(struct kbase_context *kctx, int group_id, struct ++ tagged_addr tp) +{ -+ struct kbase_va_region *reg; -+ -+ /* Calls to this function are inherently asynchronous, with respect to -+ * MMU operations. -+ */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; ++ struct page *p, *head_page; ++ struct kbase_sub_alloc *sa; + -+ KBASE_DEBUG_ASSERT(kctx); -+ KBASE_DEBUG_ASSERT(gpu_va); -+ KBASE_DEBUG_ASSERT(va_pages); -+ KBASE_DEBUG_ASSERT(flags); ++ p = as_page(tp); ++ head_page = (struct page *)p->lru.prev; ++ sa = (struct kbase_sub_alloc *)head_page->lru.next; ++ spin_lock(&kctx->mem_partials_lock); ++ clear_bit(p - head_page, sa->sub_pages); ++ if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) { ++ list_del(&sa->link); ++ kbase_mem_pool_free( ++ &kctx->mem_pools.large[group_id], ++ head_page, ++ true); ++ kfree(sa); ++ } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) == ++ SZ_2M / SZ_4K - 1) { ++ /* expose the partial again */ ++ list_add(&sa->link, &kctx->mem_partials); ++ } ++ spin_unlock(&kctx->mem_partials_lock); ++} + -+ if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) && -+ kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) -+ *flags |= BASE_MEM_SAME_VA; ++int kbase_free_phy_pages_helper( ++ struct kbase_mem_phy_alloc *alloc, ++ size_t nr_pages_to_free) ++{ ++ struct kbase_context *kctx = alloc->imported.native.kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ bool syncback; ++ bool reclaimed = (alloc->evicted != 0); ++ struct tagged_addr *start_free; ++ int new_page_count __maybe_unused; ++ size_t freed = 0; + -+ if (!kbase_check_import_flags(*flags)) { -+ dev_warn(kctx->kbdev->dev, -+ "%s called with bad flags (%llx)", -+ __func__, -+ (unsigned long long)*flags); -+ goto bad_flags; ++ if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) || ++ WARN_ON(alloc->imported.native.kctx == NULL) || ++ WARN_ON(alloc->nents < nr_pages_to_free) || ++ WARN_ON(alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { ++ return -EINVAL; + } + -+ if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 && -+ (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) { -+ /* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */ -+ *flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED; -+ } -+ if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && -+ !kbase_device_is_cpu_coherent(kctx->kbdev)) { -+ dev_warn(kctx->kbdev->dev, -+ "%s call required coherent mem when unavailable", -+ __func__); -+ goto bad_flags; -+ } -+ if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && -+ !kbase_device_is_cpu_coherent(kctx->kbdev)) { -+ /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ -+ *flags &= ~BASE_MEM_COHERENT_SYSTEM; -+ } -+ if (((*flags & BASE_MEM_CACHED_CPU) == 0) && (type == BASE_MEM_IMPORT_TYPE_USER_BUFFER)) { -+ dev_warn(kctx->kbdev->dev, "USER_BUFFER must be CPU cached"); -+ goto bad_flags; -+ } -+ if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) { -+ dev_warn(kctx->kbdev->dev, -+ "padding is only supported for UMM"); -+ goto bad_flags; -+ } ++ /* early out if nothing to do */ ++ if (nr_pages_to_free == 0) ++ return 0; + -+ switch (type) { -+ case BASE_MEM_IMPORT_TYPE_UMM: { -+ int fd; ++ start_free = alloc->pages + alloc->nents - nr_pages_to_free; + -+ if (get_user(fd, (int __user *)phandle)) -+ reg = NULL; -+ else -+ reg = kbase_mem_from_umm(kctx, fd, va_pages, flags, -+ padding); ++ syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; ++ ++ /* pad start_free to a valid start location */ ++ while (nr_pages_to_free && is_huge(*start_free) && ++ !is_huge_head(*start_free)) { ++ nr_pages_to_free--; ++ start_free++; + } -+ break; -+ case BASE_MEM_IMPORT_TYPE_USER_BUFFER: { -+ struct base_mem_import_user_buffer user_buffer; -+ void __user *uptr; + -+ if (copy_from_user(&user_buffer, phandle, -+ sizeof(user_buffer))) { -+ reg = NULL; ++ while (nr_pages_to_free) { ++ if (is_huge_head(*start_free)) { ++ /* This is a 2MB entry, so free all the 512 pages that ++ * it points to ++ */ ++ kbase_mem_pool_free_pages( ++ &kctx->mem_pools.large[alloc->group_id], ++ 512, ++ start_free, ++ syncback, ++ reclaimed); ++ nr_pages_to_free -= 512; ++ start_free += 512; ++ freed += 512; ++ } else if (is_partial(*start_free)) { ++ free_partial(kctx, alloc->group_id, *start_free); ++ nr_pages_to_free--; ++ start_free++; ++ freed++; + } else { -+#if IS_ENABLED(CONFIG_COMPAT) -+ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) -+ uptr = compat_ptr(user_buffer.ptr); -+ else -+#endif -+ uptr = u64_to_user_ptr(user_buffer.ptr); ++ struct tagged_addr *local_end_free; + -+ reg = kbase_mem_from_user_buffer(kctx, -+ (unsigned long)uptr, user_buffer.length, -+ va_pages, flags); ++ local_end_free = start_free; ++ while (nr_pages_to_free && ++ !is_huge(*local_end_free) && ++ !is_partial(*local_end_free)) { ++ local_end_free++; ++ nr_pages_to_free--; ++ } ++ kbase_mem_pool_free_pages( ++ &kctx->mem_pools.small[alloc->group_id], ++ local_end_free - start_free, ++ start_free, ++ syncback, ++ reclaimed); ++ freed += local_end_free - start_free; ++ start_free += local_end_free - start_free; + } -+ break; + } -+ default: { -+ reg = NULL; -+ break; -+ } -+ } -+ -+ if (!reg) -+ goto no_reg; + -+ kbase_gpu_vm_lock(kctx); ++ alloc->nents -= freed; + -+ /* mmap needed to setup VA? */ -+ if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) { -+ /* Bind to a cookie */ -+ if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) -+ goto no_cookie; -+ /* return a cookie */ -+ *gpu_va = find_first_bit(kctx->cookies, BITS_PER_LONG); -+ bitmap_clear(kctx->cookies, *gpu_va, 1); -+ BUG_ON(kctx->pending_regions[*gpu_va]); -+ kctx->pending_regions[*gpu_va] = reg; ++ /* ++ * If the allocation was not evicted (i.e. evicted == 0) then ++ * the page accounting needs to be done. ++ */ ++ if (!reclaimed) { ++ kbase_process_page_usage_dec(kctx, freed); ++ new_page_count = atomic_sub_return(freed, ++ &kctx->used_pages); ++ atomic_sub(freed, ++ &kctx->kbdev->memdev.used_pages); + -+ /* relocate to correct base */ -+ *gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); -+ *gpu_va <<= PAGE_SHIFT; ++ KBASE_TLSTREAM_AUX_PAGESALLOC( ++ kbdev, ++ kctx->id, ++ (u64)new_page_count); + -+ } else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES) { -+ /* we control the VA, mmap now to the GPU */ -+ if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1, mmu_sync_info) != -+ 0) -+ goto no_gpu_va; -+ /* return real GPU VA */ -+ *gpu_va = reg->start_pfn << PAGE_SHIFT; -+ } else { -+ /* we control the VA, but nothing to mmap yet */ -+ if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0) -+ goto no_gpu_va; -+ /* return real GPU VA */ -+ *gpu_va = reg->start_pfn << PAGE_SHIFT; ++ kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed); + } + -+ /* clear out private flags */ -+ *flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1); -+ -+ kbase_gpu_vm_unlock(kctx); -+ + return 0; -+ -+no_gpu_va: -+no_cookie: -+ kbase_gpu_vm_unlock(kctx); -+ kbase_mem_phy_alloc_put(reg->cpu_alloc); -+ kbase_mem_phy_alloc_put(reg->gpu_alloc); -+ kfree(reg); -+no_reg: -+bad_flags: -+ *gpu_va = 0; -+ *va_pages = 0; -+ *flags = 0; -+ return -ENOMEM; +} + -+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, -+ struct kbase_va_region *reg, u64 new_pages, -+ u64 old_pages, -+ enum kbase_caller_mmu_sync_info mmu_sync_info) ++static void free_partial_locked(struct kbase_context *kctx, ++ struct kbase_mem_pool *pool, struct tagged_addr tp) +{ -+ struct tagged_addr *phy_pages; -+ u64 delta = new_pages - old_pages; -+ int ret = 0; -+ -+ lockdep_assert_held(&kctx->reg_lock); ++ struct page *p, *head_page; ++ struct kbase_sub_alloc *sa; + -+ /* Map the new pages into the GPU */ -+ phy_pages = kbase_get_gpu_phy_pages(reg); -+ ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages, -+ phy_pages + old_pages, delta, reg->flags, kctx->as_nr, -+ reg->gpu_alloc->group_id, mmu_sync_info, reg, false); ++ lockdep_assert_held(&pool->pool_lock); ++ lockdep_assert_held(&kctx->mem_partials_lock); + -+ return ret; ++ p = as_page(tp); ++ head_page = (struct page *)p->lru.prev; ++ sa = (struct kbase_sub_alloc *)head_page->lru.next; ++ clear_bit(p - head_page, sa->sub_pages); ++ if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) { ++ list_del(&sa->link); ++ kbase_mem_pool_free_locked(pool, head_page, true); ++ kfree(sa); ++ } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) == ++ SZ_2M / SZ_4K - 1) { ++ /* expose the partial again */ ++ list_add(&sa->link, &kctx->mem_partials); ++ } +} + -+void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, -+ struct kbase_va_region *reg, -+ u64 new_pages, u64 old_pages) ++void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, ++ struct kbase_mem_pool *pool, struct tagged_addr *pages, ++ size_t nr_pages_to_free) +{ -+ u64 gpu_va_start = reg->start_pfn; -+ -+ if (new_pages == old_pages) -+ /* Nothing to do */ -+ return; ++ struct kbase_context *kctx = alloc->imported.native.kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ bool syncback; ++ bool reclaimed = (alloc->evicted != 0); ++ struct tagged_addr *start_free; ++ size_t freed = 0; + -+ unmap_mapping_range(kctx->filp->f_inode->i_mapping, -+ (gpu_va_start + new_pages)<gpu_alloc; -+ int ret = 0; -+ -+ ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages, -+ alloc->pages + new_pages, delta, delta, kctx->as_nr, false); ++ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); ++ KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); ++ KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); + -+ return ret; -+} ++ lockdep_assert_held(&pool->pool_lock); ++ lockdep_assert_held(&kctx->mem_partials_lock); + -+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) -+{ -+ u64 old_pages; -+ u64 delta = 0; -+ int res = -EINVAL; -+ struct kbase_va_region *reg; -+ bool read_locked = false; ++ /* early out if nothing to do */ ++ if (!nr_pages_to_free) ++ return; + -+ /* Calls to this function are inherently asynchronous, with respect to -+ * MMU operations. -+ */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; ++ start_free = pages; + -+ KBASE_DEBUG_ASSERT(kctx); -+ KBASE_DEBUG_ASSERT(gpu_addr != 0); ++ syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; + -+ if (gpu_addr & ~PAGE_MASK) { -+ dev_warn(kctx->kbdev->dev, "kbase:mem_commit: gpu_addr: passed parameter is invalid"); -+ return -EINVAL; ++ /* pad start_free to a valid start location */ ++ while (nr_pages_to_free && is_huge(*start_free) && ++ !is_huge_head(*start_free)) { ++ nr_pages_to_free--; ++ start_free++; + } + -+ down_write(kbase_mem_get_process_mmap_lock()); -+ kbase_gpu_vm_lock(kctx); -+ -+ /* Validate the region */ -+ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); -+ if (kbase_is_region_invalid_or_free(reg)) -+ goto out_unlock; -+ -+ KBASE_DEBUG_ASSERT(reg->cpu_alloc); -+ KBASE_DEBUG_ASSERT(reg->gpu_alloc); -+ -+ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) -+ goto out_unlock; -+ -+ if (0 == (reg->flags & KBASE_REG_GROWABLE)) -+ goto out_unlock; ++ while (nr_pages_to_free) { ++ if (is_huge_head(*start_free)) { ++ /* This is a 2MB entry, so free all the 512 pages that ++ * it points to ++ */ ++ WARN_ON(!pool->order); ++ kbase_mem_pool_free_pages_locked(pool, ++ 512, ++ start_free, ++ syncback, ++ reclaimed); ++ nr_pages_to_free -= 512; ++ start_free += 512; ++ freed += 512; ++ } else if (is_partial(*start_free)) { ++ WARN_ON(!pool->order); ++ free_partial_locked(kctx, pool, *start_free); ++ nr_pages_to_free--; ++ start_free++; ++ freed++; ++ } else { ++ struct tagged_addr *local_end_free; + -+ if (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC) -+ goto out_unlock; ++ WARN_ON(pool->order); ++ local_end_free = start_free; ++ while (nr_pages_to_free && ++ !is_huge(*local_end_free) && ++ !is_partial(*local_end_free)) { ++ local_end_free++; ++ nr_pages_to_free--; ++ } ++ kbase_mem_pool_free_pages_locked(pool, ++ local_end_free - start_free, ++ start_free, ++ syncback, ++ reclaimed); ++ freed += local_end_free - start_free; ++ start_free += local_end_free - start_free; ++ } ++ } + -+ /* Would overflow the VA region */ -+ if (new_pages > reg->nr_pages) -+ goto out_unlock; ++ alloc->nents -= freed; + -+ /* Can't shrink when physical pages are mapped to different GPU -+ * VAs. The code doesn't support looking up: -+ * - all physical pages assigned to different GPU VAs -+ * - CPU mappings for the physical pages at different vm_pgoff -+ * (==GPU VA) locations. -+ * -+ * Note that for Native allocs mapped at multiple GPU VAs, growth of -+ * such allocs is not a supported use-case. ++ /* ++ * If the allocation was not evicted (i.e. evicted == 0) then ++ * the page accounting needs to be done. + */ -+ if (atomic_read(®->gpu_alloc->gpu_mappings) > 1) -+ goto out_unlock; ++ if (!reclaimed) { ++ int new_page_count; + -+ if (atomic_read(®->cpu_alloc->kernel_mappings) > 0) -+ goto out_unlock; ++ kbase_process_page_usage_dec(kctx, freed); ++ new_page_count = atomic_sub_return(freed, ++ &kctx->used_pages); ++ atomic_sub(freed, ++ &kctx->kbdev->memdev.used_pages); + -+ if (kbase_is_region_shrinkable(reg)) -+ goto out_unlock; ++ KBASE_TLSTREAM_AUX_PAGESALLOC( ++ kbdev, ++ kctx->id, ++ (u64)new_page_count); + -+ if (kbase_va_region_is_no_user_free(reg)) -+ goto out_unlock; ++ kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed); ++ } ++} ++KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper_locked); + -+#ifdef CONFIG_MALI_MEMORY_FULLY_BACKED -+ /* Reject resizing commit size */ -+ if (reg->flags & KBASE_REG_PF_GROW) -+ new_pages = reg->nr_pages; ++#if MALI_USE_CSF ++/** ++ * kbase_jd_user_buf_unpin_pages - Release the pinned pages of a user buffer. ++ * @alloc: The allocation for the imported user buffer. ++ * ++ * This must only be called when terminating an alloc, when its refcount ++ * (number of users) has become 0. This also ensures it is only called once all ++ * CPU mappings have been closed. ++ * ++ * Instead call kbase_jd_user_buf_unmap() if you need to unpin pages on active ++ * allocations ++ */ ++static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc); +#endif + -+ if (new_pages == reg->gpu_alloc->nents) { -+ /* no change */ -+ res = 0; -+ goto out_unlock; -+ } ++void kbase_mem_kref_free(struct kref *kref) ++{ ++ struct kbase_mem_phy_alloc *alloc; + -+ old_pages = kbase_reg_current_backed_size(reg); -+ if (new_pages > old_pages) { -+ delta = new_pages - old_pages; ++ alloc = container_of(kref, struct kbase_mem_phy_alloc, kref); + -+ /* -+ * No update to the mm so downgrade the writer lock to a read -+ * lock so other readers aren't blocked after this point. -+ */ -+ downgrade_write(kbase_mem_get_process_mmap_lock()); -+ read_locked = true; ++ switch (alloc->type) { ++ case KBASE_MEM_TYPE_NATIVE: { + -+ /* Allocate some more pages */ -+ if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) { -+ res = -ENOMEM; -+ goto out_unlock; -+ } -+ if (reg->cpu_alloc != reg->gpu_alloc) { -+ if (kbase_alloc_phy_pages_helper( -+ reg->gpu_alloc, delta) != 0) { -+ res = -ENOMEM; -+ kbase_free_phy_pages_helper(reg->cpu_alloc, -+ delta); -+ goto out_unlock; -+ } -+ } ++ if (!WARN_ON(!alloc->imported.native.kctx)) { ++ if (alloc->permanent_map) ++ kbase_phy_alloc_mapping_term( ++ alloc->imported.native.kctx, ++ alloc); + -+ /* No update required for CPU mappings, that's done on fault. */ ++ /* ++ * The physical allocation must have been removed from ++ * the eviction list before trying to free it. ++ */ ++ mutex_lock( ++ &alloc->imported.native.kctx->jit_evict_lock); ++ WARN_ON(!list_empty(&alloc->evict_node)); ++ mutex_unlock( ++ &alloc->imported.native.kctx->jit_evict_lock); + -+ /* Update GPU mapping. */ -+ res = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages, -+ old_pages, mmu_sync_info); ++ kbase_process_page_usage_dec( ++ alloc->imported.native.kctx, ++ alloc->imported.native.nr_struct_pages); ++ } ++ kbase_free_phy_pages_helper(alloc, alloc->nents); ++ break; ++ } ++ case KBASE_MEM_TYPE_ALIAS: { ++ /* just call put on the underlying phy allocs */ ++ size_t i; ++ struct kbase_aliased *aliased; + -+ /* On error free the new pages */ -+ if (res) { -+ kbase_free_phy_pages_helper(reg->cpu_alloc, delta); -+ if (reg->cpu_alloc != reg->gpu_alloc) -+ kbase_free_phy_pages_helper(reg->gpu_alloc, -+ delta); -+ res = -ENOMEM; -+ goto out_unlock; ++ aliased = alloc->imported.alias.aliased; ++ if (aliased) { ++ for (i = 0; i < alloc->imported.alias.nents; i++) ++ if (aliased[i].alloc) { ++ kbase_mem_phy_alloc_gpu_unmapped(aliased[i].alloc); ++ kbase_mem_phy_alloc_put(aliased[i].alloc); ++ } ++ vfree(aliased); + } -+ } else { -+ res = kbase_mem_shrink(kctx, reg, new_pages); -+ if (res) -+ res = -ENOMEM; ++ break; ++ } ++ case KBASE_MEM_TYPE_RAW: ++ /* raw pages, external cleanup */ ++ break; ++ case KBASE_MEM_TYPE_IMPORTED_UMM: ++ if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) { ++ WARN_ONCE(alloc->imported.umm.current_mapping_usage_count != 1, ++ "WARNING: expected excatly 1 mapping, got %d", ++ alloc->imported.umm.current_mapping_usage_count); ++ dma_buf_unmap_attachment( ++ alloc->imported.umm.dma_attachment, ++ alloc->imported.umm.sgt, ++ DMA_BIDIRECTIONAL); ++ kbase_remove_dma_buf_usage(alloc->imported.umm.kctx, ++ alloc); ++ } ++ dma_buf_detach(alloc->imported.umm.dma_buf, ++ alloc->imported.umm.dma_attachment); ++ dma_buf_put(alloc->imported.umm.dma_buf); ++ break; ++ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: ++#if MALI_USE_CSF ++ kbase_jd_user_buf_unpin_pages(alloc); ++#endif ++ if (alloc->imported.user_buf.mm) ++ mmdrop(alloc->imported.user_buf.mm); ++ if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) ++ vfree(alloc->imported.user_buf.pages); ++ else ++ kfree(alloc->imported.user_buf.pages); ++ break; ++ default: ++ WARN(1, "Unexecpted free of type %d\n", alloc->type); ++ break; + } + -+out_unlock: -+ kbase_gpu_vm_unlock(kctx); -+ if (read_locked) -+ up_read(kbase_mem_get_process_mmap_lock()); ++ /* Free based on allocation type */ ++ if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) ++ vfree(alloc); + else -+ up_write(kbase_mem_get_process_mmap_lock()); -+ -+ return res; ++ kfree(alloc); +} + -+int kbase_mem_shrink(struct kbase_context *const kctx, -+ struct kbase_va_region *const reg, u64 new_pages) -+{ -+ u64 delta, old_pages; -+ int err; ++KBASE_EXPORT_TEST_API(kbase_mem_kref_free); + -+ lockdep_assert_held(&kctx->reg_lock); ++int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size) ++{ ++ KBASE_DEBUG_ASSERT(reg != NULL); ++ KBASE_DEBUG_ASSERT(vsize > 0); + -+ if (WARN_ON(!kctx)) -+ return -EINVAL; ++ /* validate user provided arguments */ ++ if (size > vsize || vsize > reg->nr_pages) ++ goto out_term; + -+ if (WARN_ON(!reg)) -+ return -EINVAL; ++ /* Prevent vsize*sizeof from wrapping around. ++ * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail. ++ */ ++ if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages))) ++ goto out_term; + -+ old_pages = kbase_reg_current_backed_size(reg); -+ if (WARN_ON(old_pages < new_pages)) -+ return -EINVAL; ++ KBASE_DEBUG_ASSERT(vsize != 0); + -+ delta = old_pages - new_pages; ++ if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0) ++ goto out_term; + -+ /* Update the GPU mapping */ -+ err = kbase_mem_shrink_gpu_mapping(kctx, reg, -+ new_pages, old_pages); -+ if (err >= 0) { -+ /* Update all CPU mapping(s) */ -+ kbase_mem_shrink_cpu_mapping(kctx, reg, -+ new_pages, old_pages); ++ reg->cpu_alloc->reg = reg; ++ if (reg->cpu_alloc != reg->gpu_alloc) { ++ if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0) ++ goto out_rollback; ++ reg->gpu_alloc->reg = reg; ++ } + -+ kbase_free_phy_pages_helper(reg->cpu_alloc, delta); -+ if (reg->cpu_alloc != reg->gpu_alloc) -+ kbase_free_phy_pages_helper(reg->gpu_alloc, delta); ++ return 0; + -+ if (kctx->kbdev->pagesize_2mb) { -+ if (kbase_reg_current_backed_size(reg) > new_pages) { -+ old_pages = new_pages; -+ new_pages = kbase_reg_current_backed_size(reg); ++out_rollback: ++ kbase_free_phy_pages_helper(reg->cpu_alloc, size); ++out_term: ++ return -1; ++} ++KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages); + -+ /* Update GPU mapping. */ -+ err = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages, old_pages, -+ CALLER_MMU_ASYNC); -+ } -+ } else { -+ WARN_ON(kbase_reg_current_backed_size(reg) != new_pages); -+ } -+ } ++void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc, ++ enum kbase_page_status status) ++{ ++ u32 i = 0; + -+ return err; -+} ++ for (; i < alloc->nents; i++) { ++ struct tagged_addr phys = alloc->pages[i]; ++ struct kbase_page_metadata *page_md = kbase_page_private(as_page(phys)); + ++ /* Skip the 4KB page that is part of a large page, as the large page is ++ * excluded from the migration process. ++ */ ++ if (is_huge(phys) || is_partial(phys)) ++ continue; + -+static void kbase_cpu_vm_open(struct vm_area_struct *vma) -+{ -+ struct kbase_cpu_mapping *map = vma->vm_private_data; ++ if (!page_md) ++ continue; + -+ KBASE_DEBUG_ASSERT(map); -+ KBASE_DEBUG_ASSERT(map->count > 0); -+ /* non-atomic as we're under Linux' mm lock */ -+ map->count++; ++ spin_lock(&page_md->migrate_lock); ++ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)status); ++ spin_unlock(&page_md->migrate_lock); ++ } +} + -+static void kbase_cpu_vm_close(struct vm_area_struct *vma) ++bool kbase_check_alloc_flags(unsigned long flags) +{ -+ struct kbase_cpu_mapping *map = vma->vm_private_data; ++ /* Only known input flags should be set. */ ++ if (flags & ~BASE_MEM_FLAGS_INPUT_MASK) ++ return false; + -+ KBASE_DEBUG_ASSERT(map); -+ KBASE_DEBUG_ASSERT(map->count > 0); ++ /* At least one flag should be set */ ++ if (flags == 0) ++ return false; + -+ /* non-atomic as we're under Linux' mm lock */ -+ if (--map->count) -+ return; ++ /* Either the GPU or CPU must be reading from the allocated memory */ ++ if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0) ++ return false; + -+ KBASE_DEBUG_ASSERT(map->kctx); -+ KBASE_DEBUG_ASSERT(map->alloc); ++ /* Either the GPU or CPU must be writing to the allocated memory */ ++ if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0) ++ return false; + -+ kbase_gpu_vm_lock(map->kctx); ++ /* GPU executable memory cannot: ++ * - Be written by the GPU ++ * - Be grown on GPU page fault ++ */ ++ if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & ++ (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF))) ++ return false; + -+ if (map->free_on_close) { -+ KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) == -+ KBASE_REG_ZONE_SAME_VA); -+ /* Avoid freeing memory on the process death which results in -+ * GPU Page Fault. Memory will be freed in kbase_destroy_context -+ */ -+ if (!is_process_exiting(vma)) -+ kbase_mem_free_region(map->kctx, map->region); -+ } ++#if !MALI_USE_CSF ++ /* GPU executable memory also cannot have the top of its initial ++ * commit aligned to 'extension' ++ */ ++ if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & ++ BASE_MEM_TILER_ALIGN_TOP)) ++ return false; ++#endif /* !MALI_USE_CSF */ + -+ list_del(&map->mappings_list); ++ /* To have an allocation lie within a 4GB chunk is required only for ++ * TLS memory, which will never be used to contain executable code. ++ */ ++ if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & ++ BASE_MEM_PROT_GPU_EX)) ++ return false; + -+ kbase_va_region_alloc_put(map->kctx, map->region); -+ kbase_gpu_vm_unlock(map->kctx); ++#if !MALI_USE_CSF ++ /* TLS memory should also not be used for tiler heap */ ++ if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & ++ BASE_MEM_TILER_ALIGN_TOP)) ++ return false; ++#endif /* !MALI_USE_CSF */ + -+ kbase_mem_phy_alloc_put(map->alloc); -+ kfree(map); -+} ++ /* GPU should have at least read or write access otherwise there is no ++ * reason for allocating. ++ */ ++ if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) ++ return false; + -+static struct kbase_aliased *get_aliased_alloc(struct vm_area_struct *vma, -+ struct kbase_va_region *reg, -+ pgoff_t *start_off, -+ size_t nr_pages) -+{ -+ struct kbase_aliased *aliased = -+ reg->cpu_alloc->imported.alias.aliased; ++ /* BASE_MEM_IMPORT_SHARED is only valid for imported memory */ ++ if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED) ++ return false; + -+ if (!reg->cpu_alloc->imported.alias.stride || -+ reg->nr_pages < (*start_off + nr_pages)) { -+ return NULL; -+ } ++ /* BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP is only valid for imported memory ++ */ ++ if ((flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) == ++ BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) ++ return false; + -+ while (*start_off >= reg->cpu_alloc->imported.alias.stride) { -+ aliased++; -+ *start_off -= reg->cpu_alloc->imported.alias.stride; -+ } ++ /* Should not combine BASE_MEM_COHERENT_LOCAL with ++ * BASE_MEM_COHERENT_SYSTEM ++ */ ++ if ((flags & (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) == ++ (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) ++ return false; + -+ if (!aliased->alloc) { -+ /* sink page not available for dumping map */ -+ return NULL; -+ } ++#if MALI_USE_CSF ++ if ((flags & BASE_MEM_SAME_VA) && (flags & (BASE_MEM_FIXABLE | BASE_MEM_FIXED))) ++ return false; + -+ if ((*start_off + nr_pages) > aliased->length) { -+ /* not fully backed by physical pages */ -+ return NULL; -+ } ++ if ((flags & BASE_MEM_FIXABLE) && (flags & BASE_MEM_FIXED)) ++ return false; ++#endif + -+ return aliased; ++ return true; +} + -+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) -+static vm_fault_t kbase_cpu_vm_fault(struct vm_area_struct *vma, -+ struct vm_fault *vmf) -+{ -+#else -+static vm_fault_t kbase_cpu_vm_fault(struct vm_fault *vmf) ++bool kbase_check_import_flags(unsigned long flags) +{ -+ struct vm_area_struct *vma = vmf->vma; -+#endif -+ struct kbase_cpu_mapping *map = vma->vm_private_data; -+ pgoff_t map_start_pgoff; -+ pgoff_t fault_pgoff; -+ size_t i; -+ pgoff_t addr; -+ size_t nents; -+ struct tagged_addr *pages; -+ vm_fault_t ret = VM_FAULT_SIGBUS; -+ struct memory_group_manager_device *mgm_dev; -+ -+ KBASE_DEBUG_ASSERT(map); -+ KBASE_DEBUG_ASSERT(map->count > 0); -+ KBASE_DEBUG_ASSERT(map->kctx); -+ KBASE_DEBUG_ASSERT(map->alloc); ++ /* Only known input flags should be set. */ ++ if (flags & ~BASE_MEM_FLAGS_INPUT_MASK) ++ return false; + -+ map_start_pgoff = vma->vm_pgoff - map->region->start_pfn; ++ /* At least one flag should be set */ ++ if (flags == 0) ++ return false; + -+ kbase_gpu_vm_lock(map->kctx); -+ if (unlikely(map->region->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS)) { -+ struct kbase_aliased *aliased = -+ get_aliased_alloc(vma, map->region, &map_start_pgoff, 1); ++ /* Imported memory cannot be GPU executable */ ++ if (flags & BASE_MEM_PROT_GPU_EX) ++ return false; + -+ if (!aliased) -+ goto exit; ++ /* Imported memory cannot grow on page fault */ ++ if (flags & BASE_MEM_GROW_ON_GPF) ++ return false; + -+ nents = aliased->length; -+ pages = aliased->alloc->pages + aliased->offset; -+ } else { -+ nents = map->alloc->nents; -+ pages = map->alloc->pages; -+ } ++#if MALI_USE_CSF ++ /* Imported memory cannot be fixed */ ++ if ((flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE))) ++ return false; ++#else ++ /* Imported memory cannot be aligned to the end of its initial commit */ ++ if (flags & BASE_MEM_TILER_ALIGN_TOP) ++ return false; ++#endif /* !MALI_USE_CSF */ + -+ fault_pgoff = map_start_pgoff + (vmf->pgoff - vma->vm_pgoff); ++ /* GPU should have at least read or write access otherwise there is no ++ * reason for importing. ++ */ ++ if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) ++ return false; + -+ if (fault_pgoff >= nents) -+ goto exit; ++ /* Protected memory cannot be read by the CPU */ ++ if ((flags & BASE_MEM_PROTECTED) && (flags & BASE_MEM_PROT_CPU_RD)) ++ return false; + -+ /* Fault on access to DONT_NEED regions */ -+ if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED)) -+ goto exit; ++ return true; ++} + -+ /* We are inserting all valid pages from the start of CPU mapping and -+ * not from the fault location (the mmap handler was previously doing -+ * the same). -+ */ -+ i = map_start_pgoff; -+ addr = (pgoff_t)(vma->vm_start >> PAGE_SHIFT); -+ mgm_dev = map->kctx->kbdev->mgm_dev; -+ while (i < nents && (addr < vma->vm_end >> PAGE_SHIFT)) { ++int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, ++ u64 va_pages, u64 commit_pages, u64 large_extension) ++{ ++ struct device *dev = kctx->kbdev->dev; ++ int gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; ++ u64 gpu_pc_pages_max = 1ULL << gpu_pc_bits >> PAGE_SHIFT; ++ struct kbase_va_region test_reg; + -+ ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, -+ map->alloc->group_id, vma, addr << PAGE_SHIFT, -+ PFN_DOWN(as_phys_addr_t(pages[i])), vma->vm_page_prot); ++ /* kbase_va_region's extension member can be of variable size, so check against that type */ ++ test_reg.extension = large_extension; + -+ if (ret != VM_FAULT_NOPAGE) -+ goto exit; ++#define KBASE_MSG_PRE "GPU allocation attempted with " + -+ i++; addr++; ++ if (va_pages == 0) { ++ dev_warn(dev, KBASE_MSG_PRE "0 va_pages!"); ++ return -EINVAL; + } + -+exit: -+ kbase_gpu_vm_unlock(map->kctx); -+ return ret; -+} ++ if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) { ++ dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!", ++ (unsigned long long)va_pages); ++ return -ENOMEM; ++ } + -+const struct vm_operations_struct kbase_vm_ops = { -+ .open = kbase_cpu_vm_open, -+ .close = kbase_cpu_vm_close, -+ .fault = kbase_cpu_vm_fault -+}; ++ /* Note: commit_pages is checked against va_pages during ++ * kbase_alloc_phy_pages() ++ */ + -+static int kbase_cpu_mmap(struct kbase_context *kctx, -+ struct kbase_va_region *reg, -+ struct vm_area_struct *vma, -+ void *kaddr, -+ size_t nr_pages, -+ unsigned long aligned_offset, -+ int free_on_close) -+{ -+ struct kbase_cpu_mapping *map; -+ int err = 0; ++ /* Limit GPU executable allocs to GPU PC size */ ++ if ((flags & BASE_MEM_PROT_GPU_EX) && (va_pages > gpu_pc_pages_max)) { ++ dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_PROT_GPU_EX and va_pages==%lld larger than GPU PC range %lld", ++ (unsigned long long)va_pages, ++ (unsigned long long)gpu_pc_pages_max); + -+ map = kzalloc(sizeof(*map), GFP_KERNEL); ++ return -EINVAL; ++ } + -+ if (!map) { -+ WARN_ON(1); -+ err = -ENOMEM; -+ goto out; ++ if ((flags & BASE_MEM_GROW_ON_GPF) && (test_reg.extension == 0)) { ++ dev_warn(dev, KBASE_MSG_PRE ++ "BASE_MEM_GROW_ON_GPF but extension == 0\n"); ++ return -EINVAL; + } + -+ /* -+ * VM_DONTCOPY - don't make this mapping available in fork'ed processes -+ * VM_DONTEXPAND - disable mremap on this region -+ * VM_IO - disables paging -+ * VM_DONTDUMP - Don't include in core dumps (3.7 only) -+ * VM_MIXEDMAP - Support mixing struct page*s and raw pfns. -+ * This is needed to support using the dedicated and -+ * the OS based memory backends together. -+ */ -+ /* -+ * This will need updating to propagate coherency flags -+ * See MIDBASE-1057 -+ */ ++#if !MALI_USE_CSF ++ if ((flags & BASE_MEM_TILER_ALIGN_TOP) && (test_reg.extension == 0)) { ++ dev_warn(dev, KBASE_MSG_PRE ++ "BASE_MEM_TILER_ALIGN_TOP but extension == 0\n"); ++ return -EINVAL; ++ } + -+ vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO); -+ vma->vm_ops = &kbase_vm_ops; -+ vma->vm_private_data = map; ++ if (!(flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) && ++ test_reg.extension != 0) { ++ dev_warn( ++ dev, KBASE_MSG_PRE ++ "neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extension != 0\n"); ++ return -EINVAL; ++ } ++#else ++ if (!(flags & BASE_MEM_GROW_ON_GPF) && test_reg.extension != 0) { ++ dev_warn(dev, KBASE_MSG_PRE ++ "BASE_MEM_GROW_ON_GPF not set but extension != 0\n"); ++ return -EINVAL; ++ } ++#endif /* !MALI_USE_CSF */ + -+ if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS && nr_pages) { -+ pgoff_t rel_pgoff = vma->vm_pgoff - reg->start_pfn + -+ (aligned_offset >> PAGE_SHIFT); -+ struct kbase_aliased *aliased = -+ get_aliased_alloc(vma, reg, &rel_pgoff, nr_pages); ++#if !MALI_USE_CSF ++ /* BASE_MEM_TILER_ALIGN_TOP memory has a number of restrictions */ ++ if (flags & BASE_MEM_TILER_ALIGN_TOP) { ++#define KBASE_MSG_PRE_FLAG KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP and " ++ unsigned long small_extension; + -+ if (!aliased) { -+ err = -EINVAL; -+ kfree(map); -+ goto out; ++ if (large_extension > ++ BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES) { ++ dev_warn(dev, ++ KBASE_MSG_PRE_FLAG ++ "extension==%lld pages exceeds limit %lld", ++ (unsigned long long)large_extension, ++ BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES); ++ return -EINVAL; + } -+ } -+ -+ if (!(reg->flags & KBASE_REG_CPU_CACHED) && -+ (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) { -+ /* We can't map vmalloc'd memory uncached. -+ * Other memory will have been returned from -+ * kbase_mem_pool which would be -+ * suitable for mapping uncached. ++ /* For use with is_power_of_2, which takes unsigned long, so ++ * must ensure e.g. on 32-bit kernel it'll fit in that type + */ -+ BUG_ON(kaddr); -+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); -+ } ++ small_extension = (unsigned long)large_extension; + -+ if (!kaddr) { -+ vm_flags_set(vma, VM_PFNMAP); -+ } else { -+ WARN_ON(aligned_offset); -+ /* MIXEDMAP so we can vfree the kaddr early and not track it after map time */ -+ vm_flags_set(vma, VM_MIXEDMAP); -+ /* vmalloc remaping is easy... */ -+ err = remap_vmalloc_range(vma, kaddr, 0); -+ WARN_ON(err); -+ } ++ if (!is_power_of_2(small_extension)) { ++ dev_warn(dev, ++ KBASE_MSG_PRE_FLAG ++ "extension==%ld not a non-zero power of 2", ++ small_extension); ++ return -EINVAL; ++ } + -+ if (err) { -+ kfree(map); -+ goto out; ++ if (commit_pages > large_extension) { ++ dev_warn(dev, ++ KBASE_MSG_PRE_FLAG ++ "commit_pages==%ld exceeds extension==%ld", ++ (unsigned long)commit_pages, ++ (unsigned long)large_extension); ++ return -EINVAL; ++ } ++#undef KBASE_MSG_PRE_FLAG + } ++#endif /* !MALI_USE_CSF */ + -+ map->region = kbase_va_region_alloc_get(kctx, reg); -+ map->free_on_close = free_on_close; -+ map->kctx = kctx; -+ map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); -+ map->count = 1; /* start with one ref */ -+ -+ if (reg->flags & KBASE_REG_CPU_CACHED) -+ map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; ++ if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && ++ (va_pages > (BASE_MEM_PFN_MASK_4GB + 1))) { ++ dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GPU_VA_SAME_4GB_PAGE and va_pages==%lld greater than that needed for 4GB space", ++ (unsigned long long)va_pages); ++ return -EINVAL; ++ } + -+ list_add(&map->mappings_list, &map->alloc->mappings); ++ return 0; ++#undef KBASE_MSG_PRE ++} + -+ out: -+ return err; ++void kbase_gpu_vm_lock(struct kbase_context *kctx) ++{ ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ mutex_lock(&kctx->reg_lock); +} + -+#ifdef CONFIG_MALI_VECTOR_DUMP -+static void kbase_free_unused_jit_allocations(struct kbase_context *kctx) ++KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock); ++ ++void kbase_gpu_vm_unlock(struct kbase_context *kctx) +{ -+ /* Free all cached/unused JIT allocations as their contents are not -+ * really needed for the replay. The GPU writes to them would already -+ * have been captured through the GWT mechanism. -+ * This considerably reduces the size of mmu-snapshot-file and it also -+ * helps avoid segmentation fault issue during vector dumping of -+ * complex contents when the unused JIT allocations are accessed to -+ * dump their contents (as they appear in the page tables snapshot) -+ * but they got freed by the shrinker under low memory scenarios -+ * (which do occur with complex contents). -+ */ -+ while (kbase_jit_evict(kctx)) -+ ; ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ mutex_unlock(&kctx->reg_lock); +} + -+static int kbase_mmu_dump_mmap(struct kbase_context *kctx, -+ struct vm_area_struct *vma, -+ struct kbase_va_region **const reg, -+ void **const kmap_addr) ++KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock); ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++struct kbase_jit_debugfs_data { ++ int (*func)(struct kbase_jit_debugfs_data *data); ++ struct mutex lock; ++ struct kbase_context *kctx; ++ u64 active_value; ++ u64 pool_value; ++ u64 destroy_value; ++ char buffer[50]; ++}; ++ ++static int kbase_jit_debugfs_common_open(struct inode *inode, ++ struct file *file, int (*func)(struct kbase_jit_debugfs_data *)) +{ -+ struct kbase_va_region *new_reg; -+ void *kaddr; -+ u32 nr_pages; -+ size_t size; -+ int err = 0; ++ struct kbase_jit_debugfs_data *data; + -+ lockdep_assert_held(&kctx->reg_lock); ++ data = kzalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) ++ return -ENOMEM; + -+ dev_dbg(kctx->kbdev->dev, "%s\n", __func__); -+ size = (vma->vm_end - vma->vm_start); -+ nr_pages = size >> PAGE_SHIFT; ++ data->func = func; ++ mutex_init(&data->lock); ++ data->kctx = (struct kbase_context *) inode->i_private; + -+ kbase_free_unused_jit_allocations(kctx); ++ file->private_data = data; + -+ kaddr = kbase_mmu_dump(kctx, nr_pages); ++ return nonseekable_open(inode, file); ++} + -+ if (!kaddr) { -+ err = -ENOMEM; -+ goto out; -+ } ++static ssize_t kbase_jit_debugfs_common_read(struct file *file, ++ char __user *buf, size_t len, loff_t *ppos) ++{ ++ struct kbase_jit_debugfs_data *data; ++ size_t size; ++ int ret; + -+ new_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, nr_pages, -+ KBASE_REG_ZONE_SAME_VA); -+ if (!new_reg) { -+ err = -ENOMEM; -+ WARN_ON(1); -+ goto out; -+ } ++ data = (struct kbase_jit_debugfs_data *) file->private_data; ++ mutex_lock(&data->lock); + -+ new_reg->cpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_RAW, -+ BASE_MEM_GROUP_DEFAULT); -+ if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) { -+ err = -ENOMEM; -+ new_reg->cpu_alloc = NULL; -+ WARN_ON(1); -+ goto out_no_alloc; -+ } ++ if (*ppos) { ++ size = strnlen(data->buffer, sizeof(data->buffer)); ++ } else { ++ if (!data->func) { ++ ret = -EACCES; ++ goto out_unlock; ++ } + -+ new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc); ++ if (data->func(data)) { ++ ret = -EACCES; ++ goto out_unlock; ++ } + -+ new_reg->flags &= ~KBASE_REG_FREE; -+ new_reg->flags |= KBASE_REG_CPU_CACHED; -+ if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) { -+ err = -ENOMEM; -+ WARN_ON(1); -+ goto out_va_region; ++ size = scnprintf(data->buffer, sizeof(data->buffer), ++ "%llu,%llu,%llu\n", data->active_value, ++ data->pool_value, data->destroy_value); + } + -+ *kmap_addr = kaddr; -+ *reg = new_reg; -+ -+ dev_dbg(kctx->kbdev->dev, "%s done\n", __func__); -+ return 0; ++ ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size); + -+out_no_alloc: -+out_va_region: -+ kbase_free_alloced_region(new_reg); -+out: -+ return err; ++out_unlock: ++ mutex_unlock(&data->lock); ++ return ret; +} -+#endif + -+void kbase_os_mem_map_lock(struct kbase_context *kctx) ++static int kbase_jit_debugfs_common_release(struct inode *inode, ++ struct file *file) +{ -+ (void)kctx; -+ down_read(kbase_mem_get_process_mmap_lock()); ++ kfree(file->private_data); ++ return 0; +} + -+void kbase_os_mem_map_unlock(struct kbase_context *kctx) -+{ -+ (void)kctx; -+ up_read(kbase_mem_get_process_mmap_lock()); ++#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \ ++static int __fops ## _open(struct inode *inode, struct file *file) \ ++{ \ ++ return kbase_jit_debugfs_common_open(inode, file, __func); \ ++} \ ++static const struct file_operations __fops = { \ ++ .owner = THIS_MODULE, \ ++ .open = __fops ## _open, \ ++ .release = kbase_jit_debugfs_common_release, \ ++ .read = kbase_jit_debugfs_common_read, \ ++ .write = NULL, \ ++ .llseek = generic_file_llseek, \ +} + -+static int kbasep_reg_mmap(struct kbase_context *kctx, -+ struct vm_area_struct *vma, -+ struct kbase_va_region **regm, -+ size_t *nr_pages, size_t *aligned_offset) -+ ++static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data) +{ -+ unsigned int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); -+ struct kbase_va_region *reg; -+ int err = 0; -+ -+ /* Calls to this function are inherently asynchronous, with respect to -+ * MMU operations. -+ */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; -+ -+ *aligned_offset = 0; -+ -+ dev_dbg(kctx->kbdev->dev, "%s\n", __func__); ++ struct kbase_context *kctx = data->kctx; ++ struct list_head *tmp; + -+ /* SAME_VA stuff, fetch the right region */ -+ reg = kctx->pending_regions[cookie]; -+ if (!reg) { -+ err = -ENOMEM; -+ goto out; ++ mutex_lock(&kctx->jit_evict_lock); ++ list_for_each(tmp, &kctx->jit_active_head) { ++ data->active_value++; + } + -+ if ((reg->flags & KBASE_REG_GPU_NX) && (reg->nr_pages != *nr_pages)) { -+ /* incorrect mmap size */ -+ /* leave the cookie for a potential later -+ * mapping, or to be reclaimed later when the -+ * context is freed -+ */ -+ err = -ENOMEM; -+ goto out; ++ list_for_each(tmp, &kctx->jit_pool_head) { ++ data->pool_value++; + } + -+ if ((vma->vm_flags & VM_READ && !(reg->flags & KBASE_REG_CPU_RD)) || -+ (vma->vm_flags & VM_WRITE && !(reg->flags & KBASE_REG_CPU_WR))) { -+ /* VM flags inconsistent with region flags */ -+ err = -EPERM; -+ dev_err(kctx->kbdev->dev, "%s:%d inconsistent VM flags\n", -+ __FILE__, __LINE__); -+ goto out; ++ list_for_each(tmp, &kctx->jit_destroy_head) { ++ data->destroy_value++; + } ++ mutex_unlock(&kctx->jit_evict_lock); + -+ /* adjust down nr_pages to what we have physically */ -+ *nr_pages = kbase_reg_current_backed_size(reg); -+ if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset, -+ reg->nr_pages, 1, mmu_sync_info) != 0) { -+ dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__); -+ /* Unable to map in GPU space. */ -+ WARN_ON(1); -+ err = -ENOMEM; -+ goto out; ++ return 0; ++} ++KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops, ++ kbase_jit_debugfs_count_get); ++ ++static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data) ++{ ++ struct kbase_context *kctx = data->kctx; ++ struct kbase_va_region *reg; ++ ++ mutex_lock(&kctx->jit_evict_lock); ++ list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { ++ data->active_value += reg->nr_pages; + } -+ /* no need for the cookie anymore */ -+ kctx->pending_regions[cookie] = NULL; -+ bitmap_set(kctx->cookies, cookie, 1); + -+#if MALI_USE_CSF -+ if (reg->flags & KBASE_REG_CSF_EVENT) -+ kbase_link_event_mem_page(kctx, reg); -+#endif ++ list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { ++ data->pool_value += reg->nr_pages; ++ } + -+ /* -+ * Overwrite the offset with the region start_pfn, so we effectively -+ * map from offset 0 in the region. However subtract the aligned -+ * offset so that when user space trims the mapping the beginning of -+ * the trimmed VMA has the correct vm_pgoff; -+ */ -+ vma->vm_pgoff = reg->start_pfn - ((*aligned_offset)>>PAGE_SHIFT); -+out: -+ *regm = reg; -+ dev_dbg(kctx->kbdev->dev, "%s done\n", __func__); ++ list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { ++ data->destroy_value += reg->nr_pages; ++ } ++ mutex_unlock(&kctx->jit_evict_lock); + -+ return err; ++ return 0; +} ++KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops, ++ kbase_jit_debugfs_vm_get); + -+int kbase_context_mmap(struct kbase_context *const kctx, -+ struct vm_area_struct *const vma) ++static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) +{ -+ struct kbase_va_region *reg = NULL; -+ void *kaddr = NULL; -+ size_t nr_pages = vma_pages(vma); -+ int err = 0; -+ int free_on_close = 0; -+ struct device *dev = kctx->kbdev->dev; -+ size_t aligned_offset = 0; -+ -+ dev_dbg(dev, "kbase_mmap\n"); ++ struct kbase_context *kctx = data->kctx; ++ struct kbase_va_region *reg; + -+ if (!(vma->vm_flags & VM_READ)) -+ vm_flags_clear(vma, VM_MAYREAD); -+ if (!(vma->vm_flags & VM_WRITE)) -+ vm_flags_clear(vma, VM_MAYWRITE); ++ mutex_lock(&kctx->jit_evict_lock); ++ list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { ++ data->active_value += reg->gpu_alloc->nents; ++ } + -+ if (nr_pages == 0) { -+ err = -EINVAL; -+ goto out; ++ list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { ++ data->pool_value += reg->gpu_alloc->nents; + } + -+ if (!(vma->vm_flags & VM_SHARED)) { -+ err = -EINVAL; -+ goto out; ++ list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { ++ data->destroy_value += reg->gpu_alloc->nents; + } ++ mutex_unlock(&kctx->jit_evict_lock); + -+ kbase_gpu_vm_lock(kctx); ++ return 0; ++} ++KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, ++ kbase_jit_debugfs_phys_get); + -+ if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) { -+ /* The non-mapped tracking helper page */ -+ err = kbase_tracking_page_setup(kctx, vma); -+ goto out_unlock; -+ } ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++static int kbase_jit_debugfs_used_get(struct kbase_jit_debugfs_data *data) ++{ ++ struct kbase_context *kctx = data->kctx; ++ struct kbase_va_region *reg; + -+ if (!kbase_mem_allow_alloc(kctx)) { -+ err = -EINVAL; -+ goto out_unlock; ++#if !MALI_USE_CSF ++ mutex_lock(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ ++ mutex_lock(&kctx->jit_evict_lock); ++ list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { ++ data->active_value += reg->used_pages; + } ++ mutex_unlock(&kctx->jit_evict_lock); ++#if !MALI_USE_CSF ++ mutex_unlock(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ + -+ switch (vma->vm_pgoff) { -+ case PFN_DOWN(BASEP_MEM_INVALID_HANDLE): -+ case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE): -+ /* Illegal handle for direct map */ -+ err = -EINVAL; -+ goto out_unlock; -+ case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE): -+#if defined(CONFIG_MALI_VECTOR_DUMP) -+ /* MMU dump */ -+ err = kbase_mmu_dump_mmap(kctx, vma, ®, &kaddr); -+ if (err != 0) -+ goto out_unlock; -+ /* free the region on munmap */ -+ free_on_close = 1; -+ break; -+#else -+ /* Illegal handle for direct map */ -+ err = -EINVAL; -+ goto out_unlock; -+#endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ -+#if MALI_USE_CSF -+ case PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE): -+ kbase_gpu_vm_unlock(kctx); -+ err = kbase_csf_cpu_mmap_user_reg_page(kctx, vma); -+ goto out; -+ case PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) ... -+ PFN_DOWN(BASE_MEM_COOKIE_BASE) - 1: { -+ kbase_gpu_vm_unlock(kctx); -+ mutex_lock(&kctx->csf.lock); -+ err = kbase_csf_cpu_mmap_user_io_pages(kctx, vma); -+ mutex_unlock(&kctx->csf.lock); -+ goto out; -+ } -+#endif -+ case PFN_DOWN(BASE_MEM_COOKIE_BASE) ... -+ PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: { -+ err = kbasep_reg_mmap(kctx, vma, ®, &nr_pages, -+ &aligned_offset); -+ if (err != 0) -+ goto out_unlock; -+ /* free the region on munmap */ -+ free_on_close = 1; -+ break; -+ } -+ default: { -+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, -+ (u64)vma->vm_pgoff << PAGE_SHIFT); ++ return 0; ++} + -+ if (!kbase_is_region_invalid_or_free(reg)) { -+ /* will this mapping overflow the size of the region? */ -+ if (nr_pages > (reg->nr_pages - -+ (vma->vm_pgoff - reg->start_pfn))) { -+ err = -ENOMEM; -+ goto out_unlock; -+ } ++KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_used_fops, ++ kbase_jit_debugfs_used_get); + -+ if ((vma->vm_flags & VM_READ && -+ !(reg->flags & KBASE_REG_CPU_RD)) || -+ (vma->vm_flags & VM_WRITE && -+ !(reg->flags & KBASE_REG_CPU_WR))) { -+ /* VM flags inconsistent with region flags */ -+ err = -EPERM; -+ dev_err(dev, "%s:%d inconsistent VM flags\n", -+ __FILE__, __LINE__); -+ goto out_unlock; -+ } ++static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, ++ struct kbase_va_region *reg, size_t pages_needed, ++ size_t *freed, bool shrink); + -+ if (KBASE_MEM_TYPE_IMPORTED_UMM == -+ reg->cpu_alloc->type) { -+ if (0 != (vma->vm_pgoff - reg->start_pfn)) { -+ err = -EINVAL; -+ dev_warn(dev, "%s:%d attempt to do a partial map in a dma_buf: non-zero offset to dma_buf mapping!\n", -+ __FILE__, __LINE__); -+ goto out_unlock; -+ } -+ err = dma_buf_mmap( -+ reg->cpu_alloc->imported.umm.dma_buf, -+ vma, vma->vm_pgoff - reg->start_pfn); -+ goto out_unlock; -+ } ++static int kbase_jit_debugfs_trim_get(struct kbase_jit_debugfs_data *data) ++{ ++ struct kbase_context *kctx = data->kctx; ++ struct kbase_va_region *reg; + -+ if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { -+ /* initial params check for aliased dumping map */ -+ if (nr_pages > reg->gpu_alloc->imported.alias.stride || -+ !reg->gpu_alloc->imported.alias.stride || -+ !nr_pages) { -+ err = -EINVAL; -+ dev_warn(dev, "mmap aliased: invalid params!\n"); -+ goto out_unlock; -+ } -+ } else if (reg->cpu_alloc->nents < -+ (vma->vm_pgoff - reg->start_pfn + nr_pages)) { -+ /* limit what we map to the amount currently backed */ -+ if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents) -+ nr_pages = 0; -+ else -+ nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn); -+ } -+ } else { -+ err = -ENOMEM; -+ goto out_unlock; ++#if !MALI_USE_CSF ++ mutex_lock(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ ++ kbase_gpu_vm_lock(kctx); ++ mutex_lock(&kctx->jit_evict_lock); ++ list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { ++ int err; ++ size_t freed = 0u; ++ ++ err = kbase_mem_jit_trim_pages_from_region(kctx, reg, ++ SIZE_MAX, &freed, false); ++ ++ if (err) { ++ /* Failed to calculate, try the next region */ ++ continue; + } -+ } /* default */ -+ } /* switch */ + -+ err = kbase_cpu_mmap(kctx, reg, vma, kaddr, nr_pages, aligned_offset, -+ free_on_close); -+#if defined(CONFIG_MALI_VECTOR_DUMP) -+ if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) { -+ /* MMU dump - userspace should now have a reference on -+ * the pages, so we can now free the kernel mapping -+ */ -+ vfree(kaddr); -+ /* CPU mapping of GPU allocations have GPU VA as the vm_pgoff -+ * and that is used to shrink the mapping when the commit size -+ * is reduced. So vm_pgoff for CPU mapping created to get the -+ * snapshot of GPU page tables shall not match with any GPU VA. -+ * That can be ensured by setting vm_pgoff as vma->vm_start -+ * because, -+ * - GPU VA of any SAME_VA allocation cannot match with -+ * vma->vm_start, as CPU VAs are unique. -+ * - GPU VA of CUSTOM_VA allocations are outside the CPU -+ * virtual address space. -+ */ -+ vma->vm_pgoff = PFN_DOWN(vma->vm_start); ++ data->active_value += freed; + } -+#endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ -+out_unlock: ++ mutex_unlock(&kctx->jit_evict_lock); + kbase_gpu_vm_unlock(kctx); -+out: -+ if (err) -+ dev_err(dev, "mmap failed %d\n", err); ++#if !MALI_USE_CSF ++ mutex_unlock(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ + -+ return err; ++ return 0; +} + -+KBASE_EXPORT_TEST_API(kbase_context_mmap); ++KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_trim_fops, ++ kbase_jit_debugfs_trim_get); ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+void kbase_sync_mem_regions(struct kbase_context *kctx, -+ struct kbase_vmap_struct *map, enum kbase_sync_type dest) ++void kbase_jit_debugfs_init(struct kbase_context *kctx) +{ -+ size_t i; -+ off_t const offset = map->offset_in_page; -+ size_t const page_count = PFN_UP(offset + map->size); ++ /* prevent unprivileged use of debug file system ++ * in old kernel version ++ */ ++ const mode_t mode = 0444; + -+ /* Sync first page */ -+ size_t sz = MIN(((size_t) PAGE_SIZE - offset), map->size); -+ struct tagged_addr cpu_pa = map->cpu_pages[0]; -+ struct tagged_addr gpu_pa = map->gpu_pages[0]; ++ /* Caller already ensures this, but we keep the pattern for ++ * maintenance safety. ++ */ ++ if (WARN_ON(!kctx) || ++ WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) ++ return; + -+ kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz, dest); + -+ /* Sync middle pages (if any) */ -+ for (i = 1; page_count > 2 && i < page_count - 1; i++) { -+ cpu_pa = map->cpu_pages[i]; -+ gpu_pa = map->gpu_pages[i]; -+ kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE, dest); -+ } + -+ /* Sync last page (if any) */ -+ if (page_count > 1) { -+ cpu_pa = map->cpu_pages[page_count - 1]; -+ gpu_pa = map->gpu_pages[page_count - 1]; -+ sz = ((offset + map->size - 1) & ~PAGE_MASK) + 1; -+ kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz, dest); -+ } ++ /* Debugfs entry for getting the number of JIT allocations. */ ++ debugfs_create_file("mem_jit_count", mode, kctx->kctx_dentry, ++ kctx, &kbase_jit_debugfs_count_fops); ++ ++ /* ++ * Debugfs entry for getting the total number of virtual pages ++ * used by JIT allocations. ++ */ ++ debugfs_create_file("mem_jit_vm", mode, kctx->kctx_dentry, ++ kctx, &kbase_jit_debugfs_vm_fops); ++ ++ /* ++ * Debugfs entry for getting the number of physical pages used ++ * by JIT allocations. ++ */ ++ debugfs_create_file("mem_jit_phys", mode, kctx->kctx_dentry, ++ kctx, &kbase_jit_debugfs_phys_fops); ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ /* ++ * Debugfs entry for getting the number of pages used ++ * by JIT allocations for estimating the physical pressure ++ * limit. ++ */ ++ debugfs_create_file("mem_jit_used", mode, kctx->kctx_dentry, ++ kctx, &kbase_jit_debugfs_used_fops); ++ ++ /* ++ * Debugfs entry for getting the number of pages that could ++ * be trimmed to free space for more JIT allocations. ++ */ ++ debugfs_create_file("mem_jit_trim", mode, kctx->kctx_dentry, ++ kctx, &kbase_jit_debugfs_trim_fops); ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ +} ++#endif /* CONFIG_DEBUG_FS */ + +/** -+ * kbase_vmap_phy_pages_migrate_count_increment - Increment VMAP count for -+ * array of physical pages -+ * -+ * @pages: Array of pages. -+ * @page_count: Number of pages. -+ * @flags: Region flags. -+ * -+ * This function is supposed to be called only if page migration support -+ * is enabled in the driver. ++ * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations ++ * @work: Work item + * -+ * The counter of kernel CPU mappings of the physical pages involved in a -+ * mapping operation is incremented by 1. Errors are handled by making pages -+ * not movable. Permanent kernel mappings will be marked as not movable, too. ++ * This function does the work of freeing JIT allocations whose physical ++ * backing has been released. + */ -+static void kbase_vmap_phy_pages_migrate_count_increment(struct tagged_addr *pages, -+ size_t page_count, unsigned long flags) ++static void kbase_jit_destroy_worker(struct work_struct *work) +{ -+ size_t i; ++ struct kbase_context *kctx; ++ struct kbase_va_region *reg; + -+ for (i = 0; i < page_count; i++) { -+ struct page *p = as_page(pages[i]); -+ struct kbase_page_metadata *page_md = kbase_page_private(p); ++ kctx = container_of(work, struct kbase_context, jit_work); ++ do { ++ mutex_lock(&kctx->jit_evict_lock); ++ if (list_empty(&kctx->jit_destroy_head)) { ++ mutex_unlock(&kctx->jit_evict_lock); ++ break; ++ } + -+ /* Skip the 4KB page that is part of a large page, as the large page is -+ * excluded from the migration process. -+ */ -+ if (is_huge(pages[i]) || is_partial(pages[i])) -+ continue; ++ reg = list_first_entry(&kctx->jit_destroy_head, ++ struct kbase_va_region, jit_node); + -+ spin_lock(&page_md->migrate_lock); -+ /* Mark permanent kernel mappings as NOT_MOVABLE because they're likely -+ * to stay mapped for a long time. However, keep on counting the number -+ * of mappings even for them: they don't represent an exception for the -+ * vmap_count. -+ * -+ * At the same time, errors need to be handled if a client tries to add -+ * too many mappings, hence a page may end up in the NOT_MOVABLE state -+ * anyway even if it's not a permanent kernel mapping. ++ list_del(®->jit_node); ++ mutex_unlock(&kctx->jit_evict_lock); ++ ++ kbase_gpu_vm_lock(kctx); ++ ++ /* ++ * Incrementing the refcount is prevented on JIT regions. ++ * If/when this ever changes we would need to compensate ++ * by implementing "free on putting the last reference", ++ * but only for JIT regions. + */ -+ if (flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) -+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); -+ if (page_md->vmap_count < U8_MAX) -+ page_md->vmap_count++; -+ else -+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); -+ spin_unlock(&page_md->migrate_lock); -+ } ++ WARN_ON(atomic_read(®->no_user_free_count) > 1); ++ kbase_va_region_no_user_free_dec(reg); ++ kbase_mem_free_region(kctx, reg); ++ kbase_gpu_vm_unlock(kctx); ++ } while (1); +} + -+/** -+ * kbase_vunmap_phy_pages_migrate_count_decrement - Decrement VMAP count for -+ * array of physical pages -+ * -+ * @pages: Array of pages. -+ * @page_count: Number of pages. -+ * -+ * This function is supposed to be called only if page migration support -+ * is enabled in the driver. -+ * -+ * The counter of kernel CPU mappings of the physical pages involved in a -+ * mapping operation is decremented by 1. Errors are handled by making pages -+ * not movable. -+ */ -+static void kbase_vunmap_phy_pages_migrate_count_decrement(struct tagged_addr *pages, -+ size_t page_count) ++int kbase_jit_init(struct kbase_context *kctx) +{ -+ size_t i; ++ mutex_lock(&kctx->jit_evict_lock); ++ INIT_LIST_HEAD(&kctx->jit_active_head); ++ INIT_LIST_HEAD(&kctx->jit_pool_head); ++ INIT_LIST_HEAD(&kctx->jit_destroy_head); ++ INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker); + -+ for (i = 0; i < page_count; i++) { -+ struct page *p = as_page(pages[i]); -+ struct kbase_page_metadata *page_md = kbase_page_private(p); ++#if MALI_USE_CSF ++ mutex_init(&kctx->csf.kcpu_queues.jit_lock); ++ INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_cmds_head); ++ INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_blocked_queues); ++#else /* !MALI_USE_CSF */ ++ INIT_LIST_HEAD(&kctx->jctx.jit_atoms_head); ++ INIT_LIST_HEAD(&kctx->jctx.jit_pending_alloc); ++#endif /* MALI_USE_CSF */ ++ mutex_unlock(&kctx->jit_evict_lock); + -+ /* Skip the 4KB page that is part of a large page, as the large page is -+ * excluded from the migration process. -+ */ -+ if (is_huge(pages[i]) || is_partial(pages[i])) -+ continue; ++ kctx->jit_max_allocations = 0; ++ kctx->jit_current_allocations = 0; ++ kctx->trim_level = 0; + -+ spin_lock(&page_md->migrate_lock); -+ /* Decrement the number of mappings for all kinds of pages, including -+ * pages which are NOT_MOVABLE (e.g. permanent kernel mappings). -+ * However, errors still need to be handled if a client tries to remove -+ * more mappings than created. -+ */ -+ if (page_md->vmap_count == 0) -+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); -+ else -+ page_md->vmap_count--; -+ spin_unlock(&page_md->migrate_lock); -+ } ++ return 0; +} + -+static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg, -+ u64 offset_bytes, size_t size, struct kbase_vmap_struct *map, -+ kbase_vmap_flag vmap_flags) ++/* Check if the allocation from JIT pool is of the same size as the new JIT ++ * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets ++ * the alignment requirements. ++ */ ++static bool meet_size_and_tiler_align_top_requirements( ++ const struct kbase_va_region *walker, ++ const struct base_jit_alloc_info *info) +{ -+ unsigned long page_index; -+ unsigned int offset_in_page = offset_bytes & ~PAGE_MASK; -+ size_t page_count = PFN_UP(offset_in_page + size); -+ struct tagged_addr *page_array; -+ struct page **pages; -+ void *cpu_addr = NULL; -+ pgprot_t prot; -+ size_t i; ++ bool meet_reqs = true; + -+ if (WARN_ON(vmap_flags & ~KBASE_VMAP_INPUT_FLAGS)) -+ return -EINVAL; ++ if (walker->nr_pages != info->va_pages) ++ meet_reqs = false; + -+ if (WARN_ON(kbase_is_region_invalid_or_free(reg))) -+ return -EINVAL; ++#if !MALI_USE_CSF ++ if (meet_reqs && (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)) { ++ size_t align = info->extension; ++ size_t align_mask = align - 1; + -+ if (!size || !map || !reg->cpu_alloc || !reg->gpu_alloc) -+ return -EINVAL; ++ if ((walker->start_pfn + info->commit_pages) & align_mask) ++ meet_reqs = false; ++ } ++#endif /* !MALI_USE_CSF */ + -+ /* check if page_count calculation will wrap */ -+ if (size > ((size_t)-1 / PAGE_SIZE)) -+ return -EINVAL; ++ return meet_reqs; ++} + -+ page_index = offset_bytes >> PAGE_SHIFT; ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++/* Function will guarantee *@freed will not exceed @pages_needed ++ */ ++static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, ++ struct kbase_va_region *reg, size_t pages_needed, ++ size_t *freed, bool shrink) ++{ ++ int err = 0; ++ size_t available_pages = 0u; ++ const size_t old_pages = kbase_reg_current_backed_size(reg); ++ size_t new_pages = old_pages; ++ size_t to_free = 0u; ++ size_t max_allowed_pages = old_pages; + -+ /* check if page_index + page_count will wrap */ -+ if (-1UL - page_count < page_index) -+ return -EINVAL; ++#if !MALI_USE_CSF ++ lockdep_assert_held(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ ++ lockdep_assert_held(&kctx->reg_lock); + -+ if (page_index + page_count > kbase_reg_current_backed_size(reg)) -+ return -ENOMEM; ++ /* Is this a JIT allocation that has been reported on? */ ++ if (reg->used_pages == reg->nr_pages) ++ goto out; + -+ if ((vmap_flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) && -+ (page_count > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES - -+ atomic_read(&kctx->permanent_mapped_pages)))) { -+ dev_warn( -+ kctx->kbdev->dev, -+ "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %d pages", -+ (u64)page_count, KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES, -+ atomic_read(&kctx->permanent_mapped_pages)); -+ return -ENOMEM; ++ if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)) { ++ /* For address based memory usage calculation, the GPU ++ * allocates objects of up to size 's', but aligns every object ++ * to alignment 'a', with a < s. ++ * ++ * It also doesn't have to write to all bytes in an object of ++ * size 's'. ++ * ++ * Hence, we can observe the GPU's address for the end of used ++ * memory being up to (s - a) bytes into the first unallocated ++ * page. ++ * ++ * We allow for this and only warn when it exceeds this bound ++ * (rounded up to page sized units). Note, this is allowed to ++ * exceed reg->nr_pages. ++ */ ++ max_allowed_pages += PFN_UP( ++ KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES - ++ KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES); ++ } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { ++ /* The GPU could report being ready to write to the next ++ * 'extension' sized chunk, but didn't actually write to it, so we ++ * can report up to 'extension' size pages more than the backed ++ * size. ++ * ++ * Note, this is allowed to exceed reg->nr_pages. ++ */ ++ max_allowed_pages += reg->extension; ++ ++ /* Also note that in these GPUs, the GPU may make a large (>1 ++ * page) initial allocation but not actually write out to all ++ * of it. Hence it might report that a much higher amount of ++ * memory was used than actually was written to. This does not ++ * result in a real warning because on growing this memory we ++ * round up the size of the allocation up to an 'extension' sized ++ * chunk, hence automatically bringing the backed size up to ++ * the reported size. ++ */ + } + -+ if (reg->flags & KBASE_REG_DONT_NEED) -+ return -EINVAL; ++ if (old_pages < reg->used_pages) { ++ /* Prevent overflow on available_pages, but only report the ++ * problem if it's in a scenario where used_pages should have ++ * been consistent with the backed size ++ * ++ * Note: In case of a size-based report, this legitimately ++ * happens in common use-cases: we allow for up to this size of ++ * memory being used, but depending on the content it doesn't ++ * have to use all of it. ++ * ++ * Hence, we're much more quiet about that in the size-based ++ * report case - it's not indicating a real problem, it's just ++ * for information ++ */ ++ if (max_allowed_pages < reg->used_pages) { ++ if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)) ++ dev_warn(kctx->kbdev->dev, ++ "%s: current backed pages %zu < reported used pages %zu (allowed to be up to %zu) on JIT 0x%llx vapages %zu\n", ++ __func__, ++ old_pages, reg->used_pages, ++ max_allowed_pages, ++ reg->start_pfn << PAGE_SHIFT, ++ reg->nr_pages); ++ else ++ dev_dbg(kctx->kbdev->dev, ++ "%s: no need to trim, current backed pages %zu < reported used pages %zu on size-report for JIT 0x%llx vapages %zu\n", ++ __func__, ++ old_pages, reg->used_pages, ++ reg->start_pfn << PAGE_SHIFT, ++ reg->nr_pages); ++ } ++ /* In any case, no error condition to report here, caller can ++ * try other regions ++ */ + -+ prot = PAGE_KERNEL; -+ if (!(reg->flags & KBASE_REG_CPU_CACHED)) { -+ /* Map uncached */ -+ prot = pgprot_writecombine(prot); ++ goto out; + } ++ available_pages = old_pages - reg->used_pages; ++ to_free = min(available_pages, pages_needed); + -+ page_array = kbase_get_cpu_phy_pages(reg); -+ if (!page_array) -+ return -ENOMEM; -+ -+ pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); -+ if (!pages) -+ return -ENOMEM; ++ if (shrink) { ++ new_pages -= to_free; + -+ for (i = 0; i < page_count; i++) -+ pages[i] = as_page(page_array[page_index + i]); ++ err = kbase_mem_shrink(kctx, reg, new_pages); ++ } ++out: ++ trace_mali_jit_trim_from_region(reg, to_free, old_pages, ++ available_pages, new_pages); ++ *freed = to_free; ++ return err; ++} + -+ /* Note: enforcing a RO prot_request onto prot is not done, since: -+ * - CPU-arch-specific integration required -+ * - kbase_vmap() requires no access checks to be made/enforced -+ */ -+ cpu_addr = vmap(pages, page_count, VM_MAP, prot); + -+ /* If page migration is enabled, increment the number of VMA mappings -+ * of all physical pages. In case of errors, e.g. too many mappings, -+ * make the page not movable to prevent trouble. -+ */ -+ if (kbase_page_migration_enabled && !kbase_mem_is_imported(reg->gpu_alloc->type)) -+ kbase_vmap_phy_pages_migrate_count_increment(page_array, page_count, reg->flags); ++/** ++ * kbase_mem_jit_trim_pages - Trim JIT regions until sufficient pages have been ++ * freed ++ * @kctx: Pointer to the kbase context whose active JIT allocations will be ++ * checked. ++ * @pages_needed: The maximum number of pages to trim. ++ * ++ * This functions checks all active JIT allocations in @kctx for unused pages ++ * at the end, and trim the backed memory regions of those allocations down to ++ * the used portion and free the unused pages into the page pool. ++ * ++ * Specifying @pages_needed allows us to stop early when there's enough ++ * physical memory freed to sufficiently bring down the total JIT physical page ++ * usage (e.g. to below the pressure limit) ++ * ++ * Return: Total number of successfully freed pages ++ */ ++static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, ++ size_t pages_needed) ++{ ++ struct kbase_va_region *reg, *tmp; ++ size_t total_freed = 0; + -+ kfree(pages); ++#if !MALI_USE_CSF ++ lockdep_assert_held(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ ++ lockdep_assert_held(&kctx->reg_lock); ++ lockdep_assert_held(&kctx->jit_evict_lock); + -+ if (!cpu_addr) -+ return -ENOMEM; ++ list_for_each_entry_safe(reg, tmp, &kctx->jit_active_head, jit_node) { ++ int err; ++ size_t freed = 0u; + -+ map->offset_in_page = offset_in_page; -+ map->cpu_alloc = reg->cpu_alloc; -+ map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index]; -+ map->gpu_alloc = reg->gpu_alloc; -+ map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index]; -+ map->addr = (void *)((uintptr_t)cpu_addr + offset_in_page); -+ map->size = size; -+ map->flags = vmap_flags; -+ if ((reg->flags & KBASE_REG_CPU_CACHED) && !kbase_mem_is_imported(map->gpu_alloc->type)) -+ map->flags |= KBASE_VMAP_FLAG_SYNC_NEEDED; ++ err = kbase_mem_jit_trim_pages_from_region(kctx, reg, ++ pages_needed, &freed, true); + -+ if (map->flags & KBASE_VMAP_FLAG_SYNC_NEEDED) -+ kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU); ++ if (err) { ++ /* Failed to trim, try the next region */ ++ continue; ++ } + -+ if (vmap_flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) -+ atomic_add(page_count, &kctx->permanent_mapped_pages); ++ total_freed += freed; ++ WARN_ON(freed > pages_needed); ++ pages_needed -= freed; ++ if (!pages_needed) ++ break; ++ } + -+ kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc); ++ trace_mali_jit_trim(total_freed); + -+ return 0; ++ return total_freed; +} ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+void *kbase_vmap_reg(struct kbase_context *kctx, struct kbase_va_region *reg, u64 gpu_addr, -+ size_t size, unsigned long prot_request, struct kbase_vmap_struct *map, -+ kbase_vmap_flag vmap_flags) ++static int kbase_jit_grow(struct kbase_context *kctx, ++ const struct base_jit_alloc_info *info, ++ struct kbase_va_region *reg, ++ struct kbase_sub_alloc **prealloc_sas, ++ enum kbase_caller_mmu_sync_info mmu_sync_info) +{ -+ u64 offset_bytes; -+ struct kbase_mem_phy_alloc *cpu_alloc; -+ struct kbase_mem_phy_alloc *gpu_alloc; -+ int err; -+ -+ lockdep_assert_held(&kctx->reg_lock); ++ size_t delta; ++ size_t pages_required; ++ size_t old_size; ++ struct kbase_mem_pool *pool; ++ int ret = -ENOMEM; ++ struct tagged_addr *gpu_pages; + -+ if (WARN_ON(kbase_is_region_invalid_or_free(reg))) -+ return NULL; ++ if (info->commit_pages > reg->nr_pages) { ++ /* Attempted to grow larger than maximum size */ ++ return -EINVAL; ++ } + -+ /* check access permissions can be satisfied -+ * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} -+ */ -+ if ((reg->flags & prot_request) != prot_request) -+ return NULL; ++ lockdep_assert_held(&kctx->reg_lock); + -+ offset_bytes = gpu_addr - (reg->start_pfn << PAGE_SHIFT); -+ cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); -+ gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); ++ /* Make the physical backing no longer reclaimable */ ++ if (!kbase_mem_evictable_unmake(reg->gpu_alloc)) ++ goto update_failed; + -+ err = kbase_vmap_phy_pages(kctx, reg, offset_bytes, size, map, vmap_flags); -+ if (err < 0) -+ goto fail_vmap_phy_pages; ++ if (reg->gpu_alloc->nents >= info->commit_pages) ++ goto done; + -+ return map->addr; ++ /* Grow the backing */ ++ old_size = reg->gpu_alloc->nents; + -+fail_vmap_phy_pages: -+ kbase_mem_phy_alloc_put(cpu_alloc); -+ kbase_mem_phy_alloc_put(gpu_alloc); -+ return NULL; -+} ++ /* Allocate some more pages */ ++ delta = info->commit_pages - reg->gpu_alloc->nents; ++ pages_required = delta; + -+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, -+ unsigned long prot_request, struct kbase_vmap_struct *map) -+{ -+ struct kbase_va_region *reg; -+ void *addr = NULL; ++ if (kctx->kbdev->pagesize_2mb && pages_required >= (SZ_2M / SZ_4K)) { ++ pool = &kctx->mem_pools.large[kctx->jit_group_id]; ++ /* Round up to number of 2 MB pages required */ ++ pages_required += ((SZ_2M / SZ_4K) - 1); ++ pages_required /= (SZ_2M / SZ_4K); ++ } else { ++ pool = &kctx->mem_pools.small[kctx->jit_group_id]; ++ } + -+ kbase_gpu_vm_lock(kctx); ++ if (reg->cpu_alloc != reg->gpu_alloc) ++ pages_required *= 2; + -+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); -+ if (kbase_is_region_invalid_or_free(reg)) -+ goto out_unlock; ++ spin_lock(&kctx->mem_partials_lock); ++ kbase_mem_pool_lock(pool); + -+ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) -+ goto out_unlock; ++ /* As we can not allocate memory from the kernel with the vm_lock held, ++ * grow the pool to the required size with the lock dropped. We hold the ++ * pool lock to prevent another thread from allocating from the pool ++ * between the grow and allocation. ++ */ ++ while (kbase_mem_pool_size(pool) < pages_required) { ++ int pool_delta = pages_required - kbase_mem_pool_size(pool); ++ int ret; + -+ addr = kbase_vmap_reg(kctx, reg, gpu_addr, size, prot_request, map, 0u); ++ kbase_mem_pool_unlock(pool); ++ spin_unlock(&kctx->mem_partials_lock); + -+out_unlock: -+ kbase_gpu_vm_unlock(kctx); -+ return addr; -+} ++ kbase_gpu_vm_unlock(kctx); ++ ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task); ++ kbase_gpu_vm_lock(kctx); + -+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, -+ struct kbase_vmap_struct *map) -+{ -+ /* 0 is specified for prot_request to indicate no access checks should -+ * be made. -+ * -+ * As mentioned in kbase_vmap_prot() this means that a kernel-side -+ * CPU-RO mapping is not enforced to allow this to work -+ */ -+ return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map); -+} -+KBASE_EXPORT_TEST_API(kbase_vmap); ++ if (ret) ++ goto update_failed; + -+static void kbase_vunmap_phy_pages(struct kbase_context *kctx, -+ struct kbase_vmap_struct *map) -+{ -+ void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK); ++ spin_lock(&kctx->mem_partials_lock); ++ kbase_mem_pool_lock(pool); ++ } + -+ vunmap(addr); ++ gpu_pages = kbase_alloc_phy_pages_helper_locked(reg->gpu_alloc, pool, ++ delta, &prealloc_sas[0]); ++ if (!gpu_pages) { ++ kbase_mem_pool_unlock(pool); ++ spin_unlock(&kctx->mem_partials_lock); ++ goto update_failed; ++ } + -+ /* If page migration is enabled, decrement the number of VMA mappings -+ * for all physical pages. Now is a good time to do it because references -+ * haven't been released yet. -+ */ -+ if (kbase_page_migration_enabled && !kbase_mem_is_imported(map->gpu_alloc->type)) { -+ const size_t page_count = PFN_UP(map->offset_in_page + map->size); -+ struct tagged_addr *pages_array = map->cpu_pages; ++ if (reg->cpu_alloc != reg->gpu_alloc) { ++ struct tagged_addr *cpu_pages; + -+ kbase_vunmap_phy_pages_migrate_count_decrement(pages_array, page_count); ++ cpu_pages = kbase_alloc_phy_pages_helper_locked(reg->cpu_alloc, ++ pool, delta, &prealloc_sas[1]); ++ if (!cpu_pages) { ++ kbase_free_phy_pages_helper_locked(reg->gpu_alloc, ++ pool, gpu_pages, delta); ++ kbase_mem_pool_unlock(pool); ++ spin_unlock(&kctx->mem_partials_lock); ++ goto update_failed; ++ } + } ++ kbase_mem_pool_unlock(pool); ++ spin_unlock(&kctx->mem_partials_lock); + -+ if (map->flags & KBASE_VMAP_FLAG_SYNC_NEEDED) -+ kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE); -+ if (map->flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) { -+ size_t page_count = PFN_UP(map->offset_in_page + map->size); ++ ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages, ++ old_size, mmu_sync_info); ++ /* ++ * The grow failed so put the allocation back in the ++ * pool and return failure. ++ */ ++ if (ret) ++ goto update_failed; + -+ WARN_ON(page_count > atomic_read(&kctx->permanent_mapped_pages)); -+ atomic_sub(page_count, &kctx->permanent_mapped_pages); -+ } ++done: ++ ret = 0; + -+ kbase_mem_phy_alloc_kernel_unmapped(map->cpu_alloc); ++ /* Update attributes of JIT allocation taken from the pool */ ++ reg->initial_commit = info->commit_pages; ++ reg->extension = info->extension; + -+ map->offset_in_page = 0; -+ map->cpu_pages = NULL; -+ map->gpu_pages = NULL; -+ map->addr = NULL; -+ map->size = 0; -+ map->flags = 0; ++update_failed: ++ return ret; +} + -+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) ++static void trace_jit_stats(struct kbase_context *kctx, ++ u32 bin_id, u32 max_allocations) +{ -+ kbase_vunmap_phy_pages(kctx, map); -+ map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc); -+ map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc); -+} -+KBASE_EXPORT_TEST_API(kbase_vunmap); ++ const u32 alloc_count = ++ kctx->jit_current_allocations_per_bin[bin_id]; ++ struct kbase_device *kbdev = kctx->kbdev; + -+static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value) -+{ -+#if (KERNEL_VERSION(6, 6, 0) <= LINUX_VERSION_CODE) -+ /* To avoid the build breakage due to the type change in rss_stat, -+ * we inline here the equivalent of 'add_mm_counter()' from linux kernel V6.2. -+ */ -+ percpu_counter_add(&mm->rss_stat[member], value); -+#elif (KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE) -+ /* To avoid the build breakage due to an unexported kernel symbol 'mm_trace_rss_stat', -+ * we inline here the equivalent of 'add_mm_counter()' from linux kernel V5.5. -+ */ -+ atomic_long_add(value, &mm->rss_stat.count[member]); -+#else -+ add_mm_counter(mm, member, value); -+#endif -+} ++ struct kbase_va_region *walker; ++ u32 va_pages = 0; ++ u32 ph_pages = 0; + -+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) -+{ -+ struct mm_struct *mm = kctx->process_mm; ++ mutex_lock(&kctx->jit_evict_lock); ++ list_for_each_entry(walker, &kctx->jit_active_head, jit_node) { ++ if (walker->jit_bin_id != bin_id) ++ continue; + -+ if (unlikely(!mm)) -+ return; ++ va_pages += walker->nr_pages; ++ ph_pages += walker->gpu_alloc->nents; ++ } ++ mutex_unlock(&kctx->jit_evict_lock); + -+ atomic_add(pages, &kctx->nonmapped_pages); -+#ifdef SPLIT_RSS_COUNTING -+ kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); -+#else -+ spin_lock(&mm->page_table_lock); -+ kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); -+ spin_unlock(&mm->page_table_lock); -+#endif ++ KBASE_TLSTREAM_AUX_JIT_STATS(kbdev, kctx->id, bin_id, ++ max_allocations, alloc_count, va_pages, ph_pages); +} + -+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma) ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++/** ++ * get_jit_phys_backing() - calculate the physical backing of all JIT ++ * allocations ++ * ++ * @kctx: Pointer to the kbase context whose active JIT allocations will be ++ * checked ++ * ++ * Return: number of pages that are committed by JIT allocations ++ */ ++static size_t get_jit_phys_backing(struct kbase_context *kctx) +{ -+ if (vma_pages(vma) != 1) -+ return -EINVAL; ++ struct kbase_va_region *walker; ++ size_t backing = 0; + -+ /* no real access */ -+ vm_flags_clear(vma, (VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC)); -+ vm_flags_set(vma, (VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO)); ++ lockdep_assert_held(&kctx->jit_evict_lock); + -+ return 0; ++ list_for_each_entry(walker, &kctx->jit_active_head, jit_node) { ++ backing += kbase_reg_current_backed_size(walker); ++ } ++ ++ return backing; +} + -+#if MALI_USE_CSF -+static unsigned long get_queue_doorbell_pfn(struct kbase_device *kbdev, -+ struct kbase_queue *queue) ++void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, ++ size_t needed_pages) +{ -+ lockdep_assert_held(&kbdev->csf.reg_lock); ++ size_t jit_backing = 0; ++ size_t pages_to_trim = 0; + -+ /* Return the real Hw doorbell page if queue has been -+ * assigned one, otherwise a dummy page. Always return the -+ * dummy page in no mali builds. ++#if !MALI_USE_CSF ++ lockdep_assert_held(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ ++ lockdep_assert_held(&kctx->reg_lock); ++ lockdep_assert_held(&kctx->jit_evict_lock); ++ ++ jit_backing = get_jit_phys_backing(kctx); ++ ++ /* It is possible that this is the case - if this is the first ++ * allocation after "ignore_pressure_limit" allocation. + */ -+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ return PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_db_page)); -+#else -+ if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID) -+ return PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_db_page)); -+#endif -+ return (PFN_DOWN(kbdev->reg_start + CSF_HW_DOORBELL_PAGE_OFFSET + -+ (u64)queue->doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE)); -+} ++ if (jit_backing > kctx->jit_phys_pages_limit) { ++ pages_to_trim += (jit_backing - kctx->jit_phys_pages_limit) + ++ needed_pages; ++ } else { ++ size_t backed_diff = kctx->jit_phys_pages_limit - jit_backing; + -+static int -+#if (KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE || \ -+ KERNEL_VERSION(5, 11, 0) > LINUX_VERSION_CODE) -+kbase_csf_user_io_pages_vm_mremap(struct vm_area_struct *vma) -+#else -+kbase_csf_user_io_pages_vm_mremap(struct vm_area_struct *vma, unsigned long flags) -+#endif -+{ -+ pr_debug("Unexpected call to mremap method for User IO pages mapping vma\n"); -+ return -EINVAL; -+} ++ if (needed_pages > backed_diff) ++ pages_to_trim += needed_pages - backed_diff; ++ } + -+static int kbase_csf_user_io_pages_vm_split(struct vm_area_struct *vma, unsigned long addr) -+{ -+ pr_debug("Unexpected call to split method for User IO pages mapping vma\n"); -+ return -EINVAL; -+} ++ if (pages_to_trim) { ++ size_t trimmed_pages = ++ kbase_mem_jit_trim_pages(kctx, pages_to_trim); + -+static void kbase_csf_user_io_pages_vm_open(struct vm_area_struct *vma) -+{ -+ pr_debug("Unexpected call to the open method for User IO pages mapping vma\n"); -+ vma->vm_private_data = NULL; ++ /* This should never happen - we already asserted that ++ * we are not violating JIT pressure limit in earlier ++ * checks, which means that in-flight JIT allocations ++ * must have enough unused pages to satisfy the new ++ * allocation ++ */ ++ WARN_ON(trimmed_pages < pages_to_trim); ++ } +} ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma) ++/** ++ * jit_allow_allocate() - check whether basic conditions are satisfied to allow ++ * a new JIT allocation ++ * ++ * @kctx: Pointer to the kbase context ++ * @info: Pointer to JIT allocation information for the new allocation ++ * @ignore_pressure_limit: Flag to indicate whether JIT pressure limit check ++ * should be ignored ++ * ++ * Return: true if allocation can be executed, false otherwise ++ */ ++static bool jit_allow_allocate(struct kbase_context *kctx, ++ const struct base_jit_alloc_info *info, ++ bool ignore_pressure_limit) +{ -+ struct kbase_queue *queue = vma->vm_private_data; -+ struct kbase_context *kctx; -+ struct kbase_device *kbdev; -+ int err; -+ bool reset_prevented = false; ++#if !MALI_USE_CSF ++ lockdep_assert_held(&kctx->jctx.lock); ++#else /* MALI_USE_CSF */ ++ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); ++#endif /* !MALI_USE_CSF */ + -+ if (!queue) { -+ pr_debug("Close method called for the new User IO pages mapping vma\n"); -+ return; ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (!ignore_pressure_limit && ++ ((kctx->jit_phys_pages_limit <= kctx->jit_current_phys_pressure) || ++ (info->va_pages > (kctx->jit_phys_pages_limit - kctx->jit_current_phys_pressure)))) { ++ dev_dbg(kctx->kbdev->dev, ++ "Max JIT page allocations limit reached: active pages %llu, max pages %llu\n", ++ kctx->jit_current_phys_pressure + info->va_pages, ++ kctx->jit_phys_pages_limit); ++ return false; + } ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+ kctx = queue->kctx; -+ kbdev = kctx->kbdev; ++ if (kctx->jit_current_allocations >= kctx->jit_max_allocations) { ++ /* Too many current allocations */ ++ dev_dbg(kctx->kbdev->dev, ++ "Max JIT allocations limit reached: active allocations %d, max allocations %d\n", ++ kctx->jit_current_allocations, ++ kctx->jit_max_allocations); ++ return false; ++ } + -+ err = kbase_reset_gpu_prevent_and_wait(kbdev); -+ if (err) -+ dev_warn( -+ kbdev->dev, -+ "Unsuccessful GPU reset detected when unbinding queue (csi_index=%d), attempting to unbind regardless", -+ queue->csi_index); -+ else -+ reset_prevented = true; ++ if (info->max_allocations > 0 && ++ kctx->jit_current_allocations_per_bin[info->bin_id] >= ++ info->max_allocations) { ++ /* Too many current allocations in this bin */ ++ dev_dbg(kctx->kbdev->dev, ++ "Per bin limit of max JIT allocations reached: bin_id %d, active allocations %d, max allocations %d\n", ++ info->bin_id, ++ kctx->jit_current_allocations_per_bin[info->bin_id], ++ info->max_allocations); ++ return false; ++ } + -+ mutex_lock(&kctx->csf.lock); -+ kbase_csf_queue_unbind(queue, is_process_exiting(vma)); -+ mutex_unlock(&kctx->csf.lock); ++ return true; ++} + -+ if (reset_prevented) -+ kbase_reset_gpu_allow(kbdev); ++static struct kbase_va_region * ++find_reasonable_region(const struct base_jit_alloc_info *info, ++ struct list_head *pool_head, bool ignore_usage_id) ++{ ++ struct kbase_va_region *closest_reg = NULL; ++ struct kbase_va_region *walker; ++ size_t current_diff = SIZE_MAX; + -+ /* Now as the vma is closed, drop the reference on mali device file */ -+ fput(kctx->filp); ++ list_for_each_entry(walker, pool_head, jit_node) { ++ if ((ignore_usage_id || ++ walker->jit_usage_id == info->usage_id) && ++ walker->jit_bin_id == info->bin_id && ++ meet_size_and_tiler_align_top_requirements(walker, info)) { ++ size_t min_size, max_size, diff; ++ ++ /* ++ * The JIT allocations VA requirements have been met, ++ * it's suitable but other allocations might be a ++ * better fit. ++ */ ++ min_size = min_t(size_t, walker->gpu_alloc->nents, ++ info->commit_pages); ++ max_size = max_t(size_t, walker->gpu_alloc->nents, ++ info->commit_pages); ++ diff = max_size - min_size; ++ ++ if (current_diff > diff) { ++ current_diff = diff; ++ closest_reg = walker; ++ } ++ ++ /* The allocation is an exact match */ ++ if (current_diff == 0) ++ break; ++ } ++ } ++ ++ return closest_reg; +} + -+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) -+static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_area_struct *vma, -+ struct vm_fault *vmf) -+{ -+#else -+static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_fault *vmf) ++struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, ++ const struct base_jit_alloc_info *info, ++ bool ignore_pressure_limit) +{ -+ struct vm_area_struct *vma = vmf->vma; -+#endif -+ struct kbase_queue *queue = vma->vm_private_data; -+ unsigned long doorbell_cpu_addr, input_cpu_addr, output_cpu_addr; -+ unsigned long doorbell_page_pfn, input_page_pfn, output_page_pfn; -+ pgprot_t doorbell_pgprot, input_page_pgprot, output_page_pgprot; -+ size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start); -+ vm_fault_t ret; -+ struct kbase_device *kbdev; -+ struct memory_group_manager_device *mgm_dev; ++ struct kbase_va_region *reg = NULL; ++ struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; ++ int i; + -+ /* Few sanity checks up front */ -+ if (!queue || (nr_pages != BASEP_QUEUE_NR_MMAP_USER_PAGES) || -+ (vma->vm_pgoff != queue->db_file_offset)) { -+ pr_warn("Unexpected CPU page fault on User IO pages mapping for process %s tgid %d pid %d\n", -+ current->comm, current->tgid, current->pid); -+ return VM_FAULT_SIGBUS; ++ /* Calls to this function are inherently synchronous, with respect to ++ * MMU operations. ++ */ ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; ++ ++#if !MALI_USE_CSF ++ lockdep_assert_held(&kctx->jctx.lock); ++#else /* MALI_USE_CSF */ ++ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); ++#endif /* !MALI_USE_CSF */ ++ ++ if (!jit_allow_allocate(kctx, info, ignore_pressure_limit)) ++ return NULL; ++ ++ if (kctx->kbdev->pagesize_2mb) { ++ /* Preallocate memory for the sub-allocation structs */ ++ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { ++ prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); ++ if (!prealloc_sas[i]) ++ goto end; ++ } + } + -+ kbdev = queue->kctx->kbdev; -+ mgm_dev = kbdev->mgm_dev; ++ kbase_gpu_vm_lock(kctx); ++ mutex_lock(&kctx->jit_evict_lock); + -+ mutex_lock(&kbdev->csf.reg_lock); ++ /* ++ * Scan the pool for an existing allocation which meets our ++ * requirements and remove it. ++ */ ++ if (info->usage_id != 0) ++ /* First scan for an allocation with the same usage ID */ ++ reg = find_reasonable_region(info, &kctx->jit_pool_head, false); + -+ /* Always map the doorbell page as uncached */ -+ doorbell_pgprot = pgprot_device(vma->vm_page_prot); ++ if (!reg) ++ /* No allocation with the same usage ID, or usage IDs not in ++ * use. Search for an allocation we can reuse. ++ */ ++ reg = find_reasonable_region(info, &kctx->jit_pool_head, true); + -+ if (kbdev->system_coherency == COHERENCY_NONE) { -+ input_page_pgprot = pgprot_writecombine(vma->vm_page_prot); -+ output_page_pgprot = pgprot_writecombine(vma->vm_page_prot); -+ } else { -+ input_page_pgprot = vma->vm_page_prot; -+ output_page_pgprot = vma->vm_page_prot; -+ } ++ if (reg) { ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ size_t needed_pages = 0; ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ int ret; + -+ doorbell_cpu_addr = vma->vm_start; ++ /* ++ * Remove the found region from the pool and add it to the ++ * active list. ++ */ ++ list_move(®->jit_node, &kctx->jit_active_head); + -+#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE -+ if ((unsigned long)vmf->virtual_address == doorbell_cpu_addr) { -+#else -+ if (vmf->address == doorbell_cpu_addr) { ++ WARN_ON(reg->gpu_alloc->evicted); ++ ++ /* ++ * Remove the allocation from the eviction list as it's no ++ * longer eligible for eviction. This must be done before ++ * dropping the jit_evict_lock ++ */ ++ list_del_init(®->gpu_alloc->evict_node); ++ ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (!ignore_pressure_limit) { ++ if (info->commit_pages > reg->gpu_alloc->nents) ++ needed_pages = info->commit_pages - ++ reg->gpu_alloc->nents; ++ ++ /* Update early the recycled JIT region's estimate of ++ * used_pages to ensure it doesn't get trimmed ++ * undesirably. This is needed as the recycled JIT ++ * region has been added to the active list but the ++ * number of used pages for it would be zero, so it ++ * could get trimmed instead of other allocations only ++ * to be regrown later resulting in a breach of the JIT ++ * physical pressure limit. ++ * Also that trimming would disturb the accounting of ++ * physical pages, i.e. the VM stats, as the number of ++ * backing pages would have changed when the call to ++ * kbase_mem_evictable_unmark_reclaim is made. ++ * ++ * The second call to update pressure at the end of ++ * this function would effectively be a nop. ++ */ ++ kbase_jit_report_update_pressure( ++ kctx, reg, info->va_pages, ++ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); ++ ++ kbase_jit_request_phys_increase_locked(kctx, ++ needed_pages); ++ } +#endif -+ doorbell_page_pfn = get_queue_doorbell_pfn(kbdev, queue); -+ ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, -+ KBASE_MEM_GROUP_CSF_IO, vma, doorbell_cpu_addr, -+ doorbell_page_pfn, doorbell_pgprot); ++ mutex_unlock(&kctx->jit_evict_lock); ++ ++ /* kbase_jit_grow() can release & reacquire 'kctx->reg_lock', ++ * so any state protected by that lock might need to be ++ * re-evaluated if more code is added here in future. ++ */ ++ ret = kbase_jit_grow(kctx, info, reg, prealloc_sas, ++ mmu_sync_info); ++ ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (!ignore_pressure_limit) ++ kbase_jit_done_phys_increase(kctx, needed_pages); ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ ++ kbase_gpu_vm_unlock(kctx); ++ ++ if (ret < 0) { ++ /* ++ * An update to an allocation from the pool failed, ++ * chances are slim a new allocation would fare any ++ * better so return the allocation to the pool and ++ * return the function with failure. ++ */ ++ dev_dbg(kctx->kbdev->dev, ++ "JIT allocation resize failed: va_pages 0x%llx, commit_pages 0x%llx\n", ++ info->va_pages, info->commit_pages); ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ /* Undo the early change made to the recycled JIT ++ * region's estimate of used_pages. ++ */ ++ if (!ignore_pressure_limit) { ++ kbase_jit_report_update_pressure( ++ kctx, reg, 0, ++ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); ++ } ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ mutex_lock(&kctx->jit_evict_lock); ++ list_move(®->jit_node, &kctx->jit_pool_head); ++ mutex_unlock(&kctx->jit_evict_lock); ++ reg = NULL; ++ goto end; ++ } else { ++ /* A suitable JIT allocation existed on the evict list, so we need ++ * to make sure that the NOT_MOVABLE property is cleared. ++ */ ++ if (kbase_page_migration_enabled) { ++ kbase_gpu_vm_lock(kctx); ++ mutex_lock(&kctx->jit_evict_lock); ++ kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED); ++ mutex_unlock(&kctx->jit_evict_lock); ++ kbase_gpu_vm_unlock(kctx); ++ } ++ } + } else { -+ /* Map the Input page */ -+ input_cpu_addr = doorbell_cpu_addr + PAGE_SIZE; -+ input_page_pfn = PFN_DOWN(as_phys_addr_t(queue->phys[0])); -+ ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, -+ KBASE_MEM_GROUP_CSF_IO, vma, input_cpu_addr, -+ input_page_pfn, input_page_pgprot); -+ if (ret != VM_FAULT_NOPAGE) -+ goto exit; ++ /* No suitable JIT allocation was found so create a new one */ ++ u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD | ++ BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF | ++ BASE_MEM_COHERENT_LOCAL | ++ BASEP_MEM_NO_USER_FREE; ++ u64 gpu_addr; + -+ /* Map the Output page */ -+ output_cpu_addr = input_cpu_addr + PAGE_SIZE; -+ output_page_pfn = PFN_DOWN(as_phys_addr_t(queue->phys[1])); -+ ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, -+ KBASE_MEM_GROUP_CSF_IO, vma, output_cpu_addr, -+ output_page_pfn, output_page_pgprot); ++#if !MALI_USE_CSF ++ if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) ++ flags |= BASE_MEM_TILER_ALIGN_TOP; ++#endif /* !MALI_USE_CSF */ ++ ++ flags |= kbase_mem_group_id_set(kctx->jit_group_id); ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (!ignore_pressure_limit) { ++ flags |= BASEP_MEM_PERFORM_JIT_TRIM; ++ /* The corresponding call to 'done_phys_increase' would ++ * be made inside the kbase_mem_alloc(). ++ */ ++ kbase_jit_request_phys_increase_locked( ++ kctx, info->commit_pages); ++ } ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ ++ mutex_unlock(&kctx->jit_evict_lock); ++ kbase_gpu_vm_unlock(kctx); ++ ++ reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, info->extension, ++ &flags, &gpu_addr, mmu_sync_info); ++ if (!reg) { ++ /* Most likely not enough GPU virtual space left for ++ * the new JIT allocation. ++ */ ++ dev_dbg(kctx->kbdev->dev, ++ "Failed to allocate JIT memory: va_pages 0x%llx, commit_pages 0x%llx\n", ++ info->va_pages, info->commit_pages); ++ goto end; ++ } ++ ++ if (!ignore_pressure_limit) { ++ /* Due to enforcing of pressure limit, kbase_mem_alloc ++ * was instructed to perform the trimming which in turn ++ * would have ensured that the new JIT allocation is ++ * already in the jit_active_head list, so nothing to ++ * do here. ++ */ ++ WARN_ON(list_empty(®->jit_node)); ++ } else { ++ mutex_lock(&kctx->jit_evict_lock); ++ list_add(®->jit_node, &kctx->jit_active_head); ++ mutex_unlock(&kctx->jit_evict_lock); ++ } + } + -+exit: -+ mutex_unlock(&kbdev->csf.reg_lock); -+ return ret; -+} ++ /* Similarly to tiler heap init, there is a short window of time ++ * where the (either recycled or newly allocated, in our case) region has ++ * "no user free" count incremented but is still missing the DONT_NEED flag, and ++ * doesn't yet have the ACTIVE_JIT_ALLOC flag either. Temporarily leaking the ++ * allocation is the least bad option that doesn't lead to a security issue down the ++ * line (it will eventually be cleaned up during context termination). ++ * ++ * We also need to call kbase_gpu_vm_lock regardless, as we're updating the region ++ * flags. ++ */ ++ kbase_gpu_vm_lock(kctx); ++ if (unlikely(atomic_read(®->no_user_free_count) > 1)) { ++ kbase_gpu_vm_unlock(kctx); ++ dev_err(kctx->kbdev->dev, "JIT region has no_user_free_count > 1!\n"); + -+static const struct vm_operations_struct kbase_csf_user_io_pages_vm_ops = { -+ .open = kbase_csf_user_io_pages_vm_open, -+ .close = kbase_csf_user_io_pages_vm_close, -+#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE -+ .may_split = kbase_csf_user_io_pages_vm_split, -+#else -+ .split = kbase_csf_user_io_pages_vm_split, -+#endif -+ .mremap = kbase_csf_user_io_pages_vm_mremap, -+ .fault = kbase_csf_user_io_pages_vm_fault -+}; ++ mutex_lock(&kctx->jit_evict_lock); ++ list_move(®->jit_node, &kctx->jit_pool_head); ++ mutex_unlock(&kctx->jit_evict_lock); + -+/* Program the client process's page table entries to map the pair of -+ * input/output pages & Hw doorbell page. The caller should have validated that -+ * vma->vm_pgoff maps to the range of csf cookies. -+ */ -+static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, -+ struct vm_area_struct *vma) -+{ -+ unsigned long cookie = -+ vma->vm_pgoff - PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE); -+ size_t nr_pages = vma_pages(vma); -+ struct kbase_queue *queue; -+ int err = 0; ++ reg = NULL; ++ goto end; ++ } + -+ lockdep_assert_held(&kctx->csf.lock); ++ trace_mali_jit_alloc(reg, info->id); + -+ queue = kctx->csf.user_pages_info[cookie]; ++ kctx->jit_current_allocations++; ++ kctx->jit_current_allocations_per_bin[info->bin_id]++; + -+ /* Looks like the bind has been aborted */ -+ if (!queue) -+ return -EINVAL; ++ trace_jit_stats(kctx, info->bin_id, info->max_allocations); + -+ if (WARN_ON(test_bit(cookie, kctx->csf.cookies))) -+ return -EINVAL; ++ reg->jit_usage_id = info->usage_id; ++ reg->jit_bin_id = info->bin_id; ++ reg->flags |= KBASE_REG_ACTIVE_JIT_ALLOC; ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) ++ reg->flags = reg->flags | KBASE_REG_HEAP_INFO_IS_SIZE; ++ reg->heap_info_gpu_addr = info->heap_info_gpu_addr; ++ kbase_jit_report_update_pressure(kctx, reg, info->va_pages, ++ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ kbase_gpu_vm_unlock(kctx); + -+ /* no need for the cookie anymore */ -+ kctx->csf.user_pages_info[cookie] = NULL; -+ bitmap_set(kctx->csf.cookies, cookie, 1); ++end: ++ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) ++ kfree(prealloc_sas[i]); + -+ /* Reset the handle to avoid (re)freeing the cookie (which can -+ * now get re-assigned) on unbind. -+ */ -+ queue->handle = BASEP_MEM_INVALID_HANDLE; ++ return reg; ++} + -+ if (nr_pages != BASEP_QUEUE_NR_MMAP_USER_PAGES) { -+ err = -EINVAL; -+ goto map_failed; ++void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) ++{ ++ u64 old_pages; ++ ++#if !MALI_USE_CSF ++ lockdep_assert_held(&kctx->jctx.lock); ++#else /* MALI_USE_CSF */ ++ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); ++#endif /* !MALI_USE_CSF */ ++ ++ /* JIT id not immediately available here, so use 0u */ ++ trace_mali_jit_free(reg, 0u); ++ ++ /* Get current size of JIT region */ ++ old_pages = kbase_reg_current_backed_size(reg); ++ if (reg->initial_commit < old_pages) { ++ /* Free trim_level % of region, but don't go below initial ++ * commit size ++ */ ++ u64 new_size = MAX(reg->initial_commit, ++ div_u64(old_pages * (100 - kctx->trim_level), 100)); ++ u64 delta = old_pages - new_size; ++ ++ if (delta) { ++ mutex_lock(&kctx->reg_lock); ++ kbase_mem_shrink(kctx, reg, old_pages - delta); ++ mutex_unlock(&kctx->reg_lock); ++ } + } + -+ err = kbase_csf_alloc_command_stream_user_pages(kctx, queue); -+ if (err) -+ goto map_failed; ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ reg->heap_info_gpu_addr = 0; ++ kbase_jit_report_update_pressure(kctx, reg, 0, ++ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+ vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO); -+ /* TODO use VM_MIXEDMAP, since it is more appropriate as both types of -+ * memory with and without "struct page" backing are being inserted here. -+ * Hw Doorbell pages comes from the device register area so kernel does -+ * not use "struct page" for them. -+ */ -+ vm_flags_set(vma, VM_PFNMAP); ++ kctx->jit_current_allocations--; ++ kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--; + -+ vma->vm_ops = &kbase_csf_user_io_pages_vm_ops; -+ vma->vm_private_data = queue; ++ trace_jit_stats(kctx, reg->jit_bin_id, UINT_MAX); + -+ /* Make vma point to the special internal file, but don't drop the -+ * reference on mali device file (that would be done later when the -+ * vma is closed). -+ */ -+ vma->vm_file = kctx->kbdev->csf.db_filp; -+ get_file(vma->vm_file); -+ /* Also adjust the vm_pgoff */ -+ vma->vm_pgoff = queue->db_file_offset; ++ kbase_mem_evictable_mark_reclaim(reg->gpu_alloc); + -+ return 0; ++ kbase_gpu_vm_lock(kctx); ++ reg->flags |= KBASE_REG_DONT_NEED; ++ reg->flags &= ~KBASE_REG_ACTIVE_JIT_ALLOC; ++ kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents); ++ kbase_gpu_vm_unlock(kctx); + -+map_failed: -+ /* The queue cannot have got to KBASE_CSF_QUEUE_BOUND state if we -+ * reached here, so safe to use a variant of unbind that only works on -+ * stopped queues -+ * -+ * This is so we don't enter the CSF scheduler from this path. ++ /* ++ * Add the allocation to the eviction list and the jit pool, after this ++ * point the shrink can reclaim it, or it may be reused. + */ -+ kbase_csf_queue_unbind_stopped(queue); ++ mutex_lock(&kctx->jit_evict_lock); + -+ return err; -+} ++ /* This allocation can't already be on a list. */ ++ WARN_ON(!list_empty(®->gpu_alloc->evict_node)); ++ list_add(®->gpu_alloc->evict_node, &kctx->evict_list); ++ atomic_add(reg->gpu_alloc->nents, &kctx->evict_nents); + -+/** -+ * kbase_csf_user_reg_vm_open - VMA open function for the USER page -+ * -+ * @vma: Pointer to the struct containing information about -+ * the userspace mapping of USER page. -+ * Note: -+ * This function isn't expected to be called. If called (i.e> mremap), -+ * set private_data as NULL to indicate to close() and fault() functions. -+ */ -+static void kbase_csf_user_reg_vm_open(struct vm_area_struct *vma) -+{ -+ pr_debug("Unexpected call to the open method for USER register mapping"); -+ vma->vm_private_data = NULL; ++ list_move(®->jit_node, &kctx->jit_pool_head); ++ ++ /* Inactive JIT regions should be freed by the shrinker and not impacted ++ * by page migration. Once freed, they will enter into the page migration ++ * state machine via the mempools. ++ */ ++ if (kbase_page_migration_enabled) ++ kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE); ++ mutex_unlock(&kctx->jit_evict_lock); +} + -+/** -+ * kbase_csf_user_reg_vm_close - VMA close function for the USER page -+ * -+ * @vma: Pointer to the struct containing information about -+ * the userspace mapping of USER page. -+ */ -+static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) ++void kbase_jit_backing_lost(struct kbase_va_region *reg) +{ -+ struct kbase_context *kctx = vma->vm_private_data; -+ struct kbase_device *kbdev; ++ struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); + -+ if (unlikely(!kctx)) { -+ pr_debug("Close function called for the unexpected mapping"); ++ if (WARN_ON(!kctx)) + return; -+ } -+ -+ kbdev = kctx->kbdev; + -+ if (unlikely(!kctx->csf.user_reg.vma)) -+ dev_warn(kbdev->dev, "user_reg VMA pointer unexpectedly NULL for ctx %d_%d", -+ kctx->tgid, kctx->id); ++ lockdep_assert_held(&kctx->jit_evict_lock); + -+ mutex_lock(&kbdev->csf.reg_lock); -+ list_del_init(&kctx->csf.user_reg.link); -+ mutex_unlock(&kbdev->csf.reg_lock); ++ /* ++ * JIT allocations will always be on a list, if the region ++ * is not on a list then it's not a JIT allocation. ++ */ ++ if (list_empty(®->jit_node)) ++ return; + -+ kctx->csf.user_reg.vma = NULL; ++ /* ++ * Freeing the allocation requires locks we might not be able ++ * to take now, so move the allocation to the free list and kick ++ * the worker which will do the freeing. ++ */ ++ list_move(®->jit_node, &kctx->jit_destroy_head); + -+ /* Now as the VMA is closed, drop the reference on mali device file */ -+ fput(kctx->filp); ++ schedule_work(&kctx->jit_work); +} + -+/** -+ * kbase_csf_user_reg_vm_mremap - VMA mremap function for the USER page -+ * -+ * @vma: Pointer to the struct containing information about -+ * the userspace mapping of USER page. -+ * -+ * Return: -EINVAL -+ * -+ * Note: -+ * User space must not attempt mremap on USER page mapping. -+ * This function will return an error to fail the attempt. -+ */ -+static int -+#if ((KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE) || \ -+ (KERNEL_VERSION(5, 11, 0) > LINUX_VERSION_CODE)) -+kbase_csf_user_reg_vm_mremap(struct vm_area_struct *vma) -+#else -+kbase_csf_user_reg_vm_mremap(struct vm_area_struct *vma, unsigned long flags) -+#endif ++bool kbase_jit_evict(struct kbase_context *kctx) +{ -+ pr_debug("Unexpected call to mremap method for USER page mapping vma\n"); -+ return -EINVAL; ++ struct kbase_va_region *reg = NULL; ++ ++ lockdep_assert_held(&kctx->reg_lock); ++ ++ /* Free the oldest allocation from the pool */ ++ mutex_lock(&kctx->jit_evict_lock); ++ if (!list_empty(&kctx->jit_pool_head)) { ++ reg = list_entry(kctx->jit_pool_head.prev, ++ struct kbase_va_region, jit_node); ++ list_del(®->jit_node); ++ list_del_init(®->gpu_alloc->evict_node); ++ } ++ mutex_unlock(&kctx->jit_evict_lock); ++ ++ if (reg) { ++ /* ++ * Incrementing the refcount is prevented on JIT regions. ++ * If/when this ever changes we would need to compensate ++ * by implementing "free on putting the last reference", ++ * but only for JIT regions. ++ */ ++ WARN_ON(atomic_read(®->no_user_free_count) > 1); ++ kbase_va_region_no_user_free_dec(reg); ++ kbase_mem_free_region(kctx, reg); ++ } ++ ++ return (reg != NULL); +} + -+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) -+static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_area_struct *vma, -+ struct vm_fault *vmf) -+{ -+#else -+static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf) ++void kbase_jit_term(struct kbase_context *kctx) +{ -+ struct vm_area_struct *vma = vmf->vma; -+#endif -+ struct kbase_context *kctx = vma->vm_private_data; -+ struct kbase_device *kbdev; -+ struct memory_group_manager_device *mgm_dev; -+ unsigned long pfn; -+ size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start); -+ vm_fault_t ret = VM_FAULT_SIGBUS; -+ unsigned long flags; ++ struct kbase_va_region *walker; + -+ /* Few sanity checks up front */ ++ /* Free all allocations for this context */ + -+ if (!kctx || (nr_pages != 1) || (vma != kctx->csf.user_reg.vma) || -+ (vma->vm_pgoff != kctx->csf.user_reg.file_offset)) { -+ pr_err("Unexpected CPU page fault on USER page mapping for process %s tgid %d pid %d\n", -+ current->comm, current->tgid, current->pid); -+ return VM_FAULT_SIGBUS; ++ kbase_gpu_vm_lock(kctx); ++ mutex_lock(&kctx->jit_evict_lock); ++ /* Free all allocations from the pool */ ++ while (!list_empty(&kctx->jit_pool_head)) { ++ walker = list_first_entry(&kctx->jit_pool_head, ++ struct kbase_va_region, jit_node); ++ list_del(&walker->jit_node); ++ list_del_init(&walker->gpu_alloc->evict_node); ++ mutex_unlock(&kctx->jit_evict_lock); ++ /* ++ * Incrementing the refcount is prevented on JIT regions. ++ * If/when this ever changes we would need to compensate ++ * by implementing "free on putting the last reference", ++ * but only for JIT regions. ++ */ ++ WARN_ON(atomic_read(&walker->no_user_free_count) > 1); ++ kbase_va_region_no_user_free_dec(walker); ++ kbase_mem_free_region(kctx, walker); ++ mutex_lock(&kctx->jit_evict_lock); + } + -+ kbdev = kctx->kbdev; -+ mgm_dev = kbdev->mgm_dev; -+ pfn = PFN_DOWN(kbdev->reg_start + USER_BASE); ++ /* Free all allocations from active list */ ++ while (!list_empty(&kctx->jit_active_head)) { ++ walker = list_first_entry(&kctx->jit_active_head, ++ struct kbase_va_region, jit_node); ++ list_del(&walker->jit_node); ++ list_del_init(&walker->gpu_alloc->evict_node); ++ mutex_unlock(&kctx->jit_evict_lock); ++ /* ++ * Incrementing the refcount is prevented on JIT regions. ++ * If/when this ever changes we would need to compensate ++ * by implementing "free on putting the last reference", ++ * but only for JIT regions. ++ */ ++ WARN_ON(atomic_read(&walker->no_user_free_count) > 1); ++ kbase_va_region_no_user_free_dec(walker); ++ kbase_mem_free_region(kctx, walker); ++ mutex_lock(&kctx->jit_evict_lock); ++ } ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ WARN_ON(kctx->jit_phys_pages_to_be_allocated); ++#endif ++ mutex_unlock(&kctx->jit_evict_lock); ++ kbase_gpu_vm_unlock(kctx); + -+ mutex_lock(&kbdev->csf.reg_lock); ++ /* ++ * Flush the freeing of allocations whose backing has been freed ++ * (i.e. everything in jit_destroy_head). ++ */ ++ cancel_work_sync(&kctx->jit_work); ++} + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ /* Dummy page will be mapped during GPU off. ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, ++ struct kbase_va_region *reg, unsigned int flags) ++{ ++ /* Offset to the location used for a JIT report within the GPU memory + * -+ * In no mail builds, always map in the dummy page. ++ * This constants only used for this debugging function - not useful ++ * anywhere else in kbase + */ -+ if (IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) || !kbdev->pm.backend.gpu_powered) -+ pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.user_reg.dummy_page)); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ const u64 jit_report_gpu_mem_offset = sizeof(u64)*2; + -+ list_move_tail(&kctx->csf.user_reg.link, &kbdev->csf.user_reg.list); -+ ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, -+ KBASE_MEM_GROUP_CSF_FW, vma, -+ vma->vm_start, pfn, -+ vma->vm_page_prot); ++ u64 addr_start; ++ struct kbase_vmap_struct mapping; ++ u64 *ptr; + -+ mutex_unlock(&kbdev->csf.reg_lock); ++ if (reg->heap_info_gpu_addr == 0ull) ++ goto out; + -+ return ret; -+} ++ /* Nothing else to trace in the case the memory just contains the ++ * size. Other tracepoints already record the relevant area of memory. ++ */ ++ if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) ++ goto out; + -+static const struct vm_operations_struct kbase_csf_user_reg_vm_ops = { -+ .open = kbase_csf_user_reg_vm_open, -+ .close = kbase_csf_user_reg_vm_close, -+ .mremap = kbase_csf_user_reg_vm_mremap, -+ .fault = kbase_csf_user_reg_vm_fault -+}; ++ addr_start = reg->heap_info_gpu_addr - jit_report_gpu_mem_offset; + -+static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, -+ struct vm_area_struct *vma) ++ ptr = kbase_vmap_prot(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE, ++ KBASE_REG_CPU_RD, &mapping); ++ if (!ptr) { ++ dev_warn(kctx->kbdev->dev, ++ "%s: JIT start=0x%llx unable to map memory near end pointer %llx\n", ++ __func__, reg->start_pfn << PAGE_SHIFT, ++ addr_start); ++ goto out; ++ } ++ ++ trace_mali_jit_report_gpu_mem(addr_start, reg->start_pfn << PAGE_SHIFT, ++ ptr, flags); ++ ++ kbase_vunmap(kctx, &mapping); ++out: ++ return; ++} ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++void kbase_jit_report_update_pressure(struct kbase_context *kctx, ++ struct kbase_va_region *reg, u64 new_used_pages, ++ unsigned int flags) +{ -+ size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start); -+ struct kbase_device *kbdev = kctx->kbdev; ++ u64 diff; + -+ /* Few sanity checks */ -+ if (kctx->csf.user_reg.vma) -+ return -EBUSY; ++#if !MALI_USE_CSF ++ lockdep_assert_held(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ + -+ if (nr_pages != 1) -+ return -EINVAL; ++ trace_mali_jit_report_pressure(reg, new_used_pages, ++ kctx->jit_current_phys_pressure + new_used_pages - ++ reg->used_pages, ++ flags); + -+ if (vma->vm_flags & (VM_WRITE | VM_MAYWRITE)) -+ return -EPERM; ++ if (WARN_ON(new_used_pages > reg->nr_pages)) ++ return; + -+ /* Map uncached */ -+ vma->vm_page_prot = pgprot_device(vma->vm_page_prot); ++ if (reg->used_pages > new_used_pages) { ++ /* We reduced the number of used pages */ ++ diff = reg->used_pages - new_used_pages; + -+ vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO; ++ if (!WARN_ON(diff > kctx->jit_current_phys_pressure)) ++ kctx->jit_current_phys_pressure -= diff; + -+ /* User register page comes from the device register area so -+ * "struct page" isn't available for it. -+ */ -+ vma->vm_flags |= VM_PFNMAP; ++ reg->used_pages = new_used_pages; ++ } else { ++ /* We increased the number of used pages */ ++ diff = new_used_pages - reg->used_pages; + -+ kctx->csf.user_reg.vma = vma; ++ if (!WARN_ON(diff > U64_MAX - kctx->jit_current_phys_pressure)) ++ kctx->jit_current_phys_pressure += diff; + -+ mutex_lock(&kbdev->csf.reg_lock); -+ kctx->csf.user_reg.file_offset = kbdev->csf.user_reg.file_offset++; -+ mutex_unlock(&kbdev->csf.reg_lock); ++ reg->used_pages = new_used_pages; ++ } + -+ /* Make VMA point to the special internal file, but don't drop the -+ * reference on mali device file (that would be done later when the -+ * VMA is closed). ++} ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ ++#if MALI_USE_CSF ++static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc) ++{ ++ /* In CSF builds, we keep pages pinned until the last reference is ++ * released on the alloc. A refcount of 0 also means we can be sure ++ * that all CPU mappings have been closed on this alloc, and no more ++ * mappings of it will be created. ++ * ++ * Further, the WARN() below captures the restriction that this ++ * function will not handle anything other than the alloc termination ++ * path, because the caller of kbase_mem_phy_alloc_put() is not ++ * required to hold the kctx's reg_lock, and so we could not handle ++ * removing an existing CPU mapping here. ++ * ++ * Refer to this function's kernel-doc comments for alternatives for ++ * unpinning a User buffer. + */ -+ vma->vm_file = kctx->kbdev->csf.user_reg.filp; -+ get_file(vma->vm_file); + -+ /* Also adjust the vm_pgoff */ -+ vma->vm_pgoff = kctx->csf.user_reg.file_offset; -+ vma->vm_ops = &kbase_csf_user_reg_vm_ops; -+ vma->vm_private_data = kctx; ++ if (alloc->nents && !WARN(kref_read(&alloc->kref) != 0, ++ "must only be called on terminating an allocation")) { ++ struct page **pages = alloc->imported.user_buf.pages; ++ long i; + -+ return 0; ++ WARN_ON(alloc->nents != alloc->imported.user_buf.nr_pages); ++ ++ for (i = 0; i < alloc->nents; i++) ++ kbase_unpin_user_buf_page(pages[i]); ++ ++ alloc->nents = 0; ++ } +} ++#endif + -+#endif /* MALI_USE_CSF */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h -new file mode 100644 -index 000000000..6dda44b9f ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h -@@ -0,0 +1,533 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2010, 2012-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, ++ struct kbase_va_region *reg) ++{ ++ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; ++ struct page **pages = alloc->imported.user_buf.pages; ++ unsigned long address = alloc->imported.user_buf.address; ++ struct mm_struct *mm = alloc->imported.user_buf.mm; ++ long pinned_pages; ++ long i; ++ int write; + -+/** -+ * DOC: Base kernel memory APIs, Linux implementation. -+ */ ++ lockdep_assert_held(&kctx->reg_lock); + -+#ifndef _KBASE_MEM_LINUX_H_ -+#define _KBASE_MEM_LINUX_H_ ++ if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF)) ++ return -EINVAL; + -+/* A HWC dump mapping */ -+struct kbase_hwc_dma_mapping { -+ void *cpu_va; -+ dma_addr_t dma_pa; -+ size_t size; -+}; ++ if (alloc->nents) { ++ if (WARN_ON(alloc->nents != alloc->imported.user_buf.nr_pages)) ++ return -EINVAL; ++ else ++ return 0; ++ } + -+/** -+ * kbase_mem_alloc - Create a new allocation for GPU -+ * -+ * @kctx: The kernel context -+ * @va_pages: The number of pages of virtual address space to reserve -+ * @commit_pages: The number of physical pages to allocate upfront -+ * @extension: The number of extra pages to allocate on each GPU fault which -+ * grows the region. -+ * @flags: bitmask of BASE_MEM_* flags to convey special requirements & -+ * properties for the new allocation. -+ * @gpu_va: Start address of the memory region which was allocated from GPU -+ * virtual address space. If the BASE_MEM_FLAG_MAP_FIXED is set -+ * then this parameter shall be provided by the caller. -+ * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. -+ * -+ * Return: 0 on success or error code -+ */ -+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, -+ u64 extension, u64 *flags, u64 *gpu_va, -+ enum kbase_caller_mmu_sync_info mmu_sync_info); ++ if (WARN_ON(reg->gpu_alloc->imported.user_buf.mm != current->mm)) ++ return -EINVAL; + -+/** -+ * kbase_mem_query - Query properties of a GPU memory region -+ * -+ * @kctx: The kernel context -+ * @gpu_addr: A GPU address contained within the memory region -+ * @query: The type of query, from KBASE_MEM_QUERY_* flags, which could be -+ * regarding the amount of backing physical memory allocated so far -+ * for the region or the size of the region or the flags associated -+ * with the region. -+ * @out: Pointer to the location to store the result of query. -+ * -+ * Return: 0 on success or error code -+ */ -+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, -+ u64 *const out); ++ write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); + -+/** -+ * kbase_mem_import - Import the external memory for use by the GPU -+ * -+ * @kctx: The kernel context -+ * @type: Type of external memory -+ * @phandle: Handle to the external memory interpreted as per the type. -+ * @padding: Amount of extra VA pages to append to the imported buffer -+ * @gpu_va: GPU address assigned to the imported external memory -+ * @va_pages: Size of the memory region reserved from the GPU address space -+ * @flags: bitmask of BASE_MEM_* flags to convey special requirements & -+ * properties for the new allocation representing the external -+ * memory. -+ * Return: 0 on success or error code -+ */ -+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, -+ void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, -+ u64 *flags); ++#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE ++ pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, ++ write ? FOLL_WRITE : 0, pages, NULL); ++#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE ++ pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, ++ write ? FOLL_WRITE : 0, pages, NULL, NULL); ++#else ++ pinned_pages = pin_user_pages_remote(mm, address, alloc->imported.user_buf.nr_pages, ++ write ? FOLL_WRITE : 0, pages, NULL); ++#endif + -+/** -+ * kbase_mem_alias - Create a new allocation for GPU, aliasing one or more -+ * memory regions -+ * -+ * @kctx: The kernel context -+ * @flags: bitmask of BASE_MEM_* flags. -+ * @stride: Bytes between start of each memory region -+ * @nents: The number of regions to pack together into the alias -+ * @ai: Pointer to the struct containing the memory aliasing info -+ * @num_pages: Number of pages the alias will cover -+ * -+ * Return: 0 on failure or otherwise the GPU VA for the alias -+ */ -+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages); ++ if (pinned_pages <= 0) ++ return pinned_pages; + -+/** -+ * kbase_mem_flags_change - Change the flags for a memory region -+ * -+ * @kctx: The kernel context -+ * @gpu_addr: A GPU address contained within the memory region to modify. -+ * @flags: The new flags to set -+ * @mask: Mask of the flags, from BASE_MEM_*, to modify. -+ * -+ * Return: 0 on success or error code -+ */ -+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask); ++ if (pinned_pages != alloc->imported.user_buf.nr_pages) { ++ /* Above code already ensures there will not have been a CPU ++ * mapping by ensuring alloc->nents is 0 ++ */ ++ for (i = 0; i < pinned_pages; i++) ++ kbase_unpin_user_buf_page(pages[i]); ++ return -ENOMEM; ++ } + -+/** -+ * kbase_mem_commit - Change the physical backing size of a region -+ * -+ * @kctx: The kernel context -+ * @gpu_addr: Handle to the memory region -+ * @new_pages: Number of physical pages to back the region with -+ * -+ * Return: 0 on success or error code -+ */ -+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages); ++ alloc->nents = pinned_pages; + -+/** -+ * kbase_mem_shrink - Shrink the physical backing size of a region -+ * -+ * @kctx: The kernel context -+ * @reg: The GPU region -+ * @new_pages: Number of physical pages to back the region with -+ * -+ * Return: 0 on success or error code -+ */ -+int kbase_mem_shrink(struct kbase_context *kctx, -+ struct kbase_va_region *reg, u64 new_pages); ++ return 0; ++} + -+/** -+ * kbase_context_mmap - Memory map method, gets invoked when mmap system call is -+ * issued on device file /dev/malixx. -+ * @kctx: The kernel context -+ * @vma: Pointer to the struct containing the info where the GPU allocation -+ * will be mapped in virtual address space of CPU. -+ * -+ * Return: 0 on success or error code -+ */ -+int kbase_context_mmap(struct kbase_context *kctx, struct vm_area_struct *vma); ++static int kbase_jd_user_buf_map(struct kbase_context *kctx, ++ struct kbase_va_region *reg) ++{ ++ int err; ++ long pinned_pages = 0; ++ struct kbase_mem_phy_alloc *alloc; ++ struct page **pages; ++ struct tagged_addr *pa; ++ long i, dma_mapped_pages; ++ struct device *dev; ++ unsigned long gwt_mask = ~0; ++ /* Calls to this function are inherently asynchronous, with respect to ++ * MMU operations. ++ */ ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + -+/** -+ * kbase_mem_evictable_init - Initialize the Ephemeral memory eviction -+ * mechanism. -+ * @kctx: The kbase context to initialize. -+ * -+ * Return: Zero on success or -errno on failure. -+ */ -+int kbase_mem_evictable_init(struct kbase_context *kctx); ++ lockdep_assert_held(&kctx->reg_lock); + -+/** -+ * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction -+ * mechanism. -+ * @kctx: The kbase context to de-initialize. -+ */ -+void kbase_mem_evictable_deinit(struct kbase_context *kctx); ++ err = kbase_jd_user_buf_pin_pages(kctx, reg); + -+/** -+ * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation -+ * @kctx: Context the region belongs to -+ * @reg: The GPU region -+ * @new_pages: The number of pages after the grow -+ * @old_pages: The number of pages before the grow -+ * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. -+ * -+ * Return: 0 on success, -errno on error. -+ * -+ * Expand the GPU mapping to encompass the new psychical pages which have -+ * been added to the allocation. -+ * -+ * Note: Caller must be holding the region lock. -+ */ -+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, -+ struct kbase_va_region *reg, u64 new_pages, -+ u64 old_pages, -+ enum kbase_caller_mmu_sync_info mmu_sync_info); ++ if (err) ++ return err; + -+/** -+ * kbase_mem_evictable_make - Make a physical allocation eligible for eviction -+ * @gpu_alloc: The physical allocation to make evictable -+ * -+ * Return: 0 on success, -errno on error. -+ * -+ * Take the provided region and make all the physical pages within it -+ * reclaimable by the kernel, updating the per-process VM stats as well. -+ * Remove any CPU mappings (as these can't be removed in the shrinker callback -+ * as mmap_sem/mmap_lock might already be taken) but leave the GPU mapping -+ * intact as and until the shrinker reclaims the allocation. -+ * -+ * Note: Must be called with the region lock of the containing context. -+ */ -+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc); ++ alloc = reg->gpu_alloc; ++ pa = kbase_get_gpu_phy_pages(reg); ++ pinned_pages = alloc->nents; ++ pages = alloc->imported.user_buf.pages; ++ dev = kctx->kbdev->dev; + -+/** -+ * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for -+ * eviction. -+ * @alloc: The physical allocation to remove eviction eligibility from. -+ * -+ * Return: True if the allocation had its backing restored and false if -+ * it hasn't. -+ * -+ * Make the physical pages in the region no longer reclaimable and update the -+ * per-process stats, if the shrinker has already evicted the memory then -+ * re-allocate it if the region is still alive. -+ * -+ * Note: Must be called with the region lock of the containing context. -+ */ -+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc); ++ /* Manual CPU cache synchronization. ++ * ++ * The driver disables automatic CPU cache synchronization because the ++ * memory pages that enclose the imported region may also contain ++ * sub-regions which are not imported and that are allocated and used ++ * by the user process. This may be the case of memory at the beginning ++ * of the first page and at the end of the last page. Automatic CPU cache ++ * synchronization would force some operations on those memory allocations, ++ * unbeknown to the user process: in particular, a CPU cache invalidate ++ * upon unmapping would destroy the content of dirty CPU caches and cause ++ * the user process to lose CPU writes to the non-imported sub-regions. ++ * ++ * When the GPU claims ownership of the imported memory buffer, it shall ++ * commit CPU writes for the whole of all pages that enclose the imported ++ * region, otherwise the initial content of memory would be wrong. ++ */ ++ for (i = 0; i < pinned_pages; i++) { ++ dma_addr_t dma_addr; ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); ++#else ++ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL, ++ DMA_ATTR_SKIP_CPU_SYNC); ++#endif ++ err = dma_mapping_error(dev, dma_addr); ++ if (err) ++ goto unwind; + -+typedef unsigned int kbase_vmap_flag; ++ alloc->imported.user_buf.dma_addrs[i] = dma_addr; ++ pa[i] = as_tagged(page_to_phys(pages[i])); + -+/* Sync operations are needed on beginning and ending of access to kernel-mapped GPU memory. -+ * -+ * This is internal to the struct kbase_vmap_struct and should not be passed in by callers of -+ * kbase_vmap-related functions. -+ */ -+#define KBASE_VMAP_FLAG_SYNC_NEEDED (((kbase_vmap_flag)1) << 0) ++ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); ++ } + -+/* Permanently mapped memory accounting (including enforcing limits) should be done on the -+ * kernel-mapped GPU memory. -+ * -+ * This should be used if the kernel mapping is going to live for a potentially long time, for -+ * example if it will persist after the caller has returned. -+ */ -+#define KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING (((kbase_vmap_flag)1) << 1) ++#ifdef CONFIG_MALI_CINSTR_GWT ++ if (kctx->gwt_enabled) ++ gwt_mask = ~KBASE_REG_GPU_WR; ++#endif + -+/* Set of flags that can be passed into kbase_vmap-related functions */ -+#define KBASE_VMAP_INPUT_FLAGS (KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) ++ err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa, ++ kbase_reg_current_backed_size(reg), ++ reg->flags & gwt_mask, kctx->as_nr, alloc->group_id, ++ mmu_sync_info, NULL); ++ if (err == 0) ++ return 0; + -+struct kbase_vmap_struct { -+ off_t offset_in_page; -+ struct kbase_mem_phy_alloc *cpu_alloc; -+ struct kbase_mem_phy_alloc *gpu_alloc; -+ struct tagged_addr *cpu_pages; -+ struct tagged_addr *gpu_pages; -+ void *addr; -+ size_t size; -+ kbase_vmap_flag flags; -+}; ++ /* fall down */ ++unwind: ++ alloc->nents = 0; ++ dma_mapped_pages = i; ++ /* Run the unmap loop in the same order as map loop, and perform again ++ * CPU cache synchronization to re-write the content of dirty CPU caches ++ * to memory. This is precautionary measure in case a GPU job has taken ++ * advantage of a partially GPU-mapped range to write and corrupt the ++ * content of memory, either inside or outside the imported region. ++ * ++ * Notice that this error recovery path doesn't try to be optimal and just ++ * flushes the entire page range. ++ */ ++ for (i = 0; i < dma_mapped_pages; i++) { ++ dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; + -+/** -+ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation -+ * @kctx: Context the region belongs to -+ * @reg: The GPU region or NULL if there isn't one -+ * @new_pages: The number of pages after the shrink -+ * @old_pages: The number of pages before the shrink -+ * -+ * Return: 0 on success, negative -errno on error -+ * -+ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region -+ * itself is unmodified as we still need to reserve the VA, only the page tables -+ * will be modified by this function. -+ */ -+int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, struct kbase_va_region *reg, -+ u64 new_pages, u64 old_pages); ++ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); ++#else ++ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, ++ DMA_ATTR_SKIP_CPU_SYNC); ++#endif ++ } + -+/** -+ * kbase_vmap_reg - Map part of an existing region into the kernel safely, only if the requested -+ * access permissions are supported -+ * @kctx: Context @reg belongs to -+ * @reg: The GPU region to map part of -+ * @gpu_addr: Start address of VA range to map, which must be within @reg -+ * @size: Size of VA range, which when added to @gpu_addr must be within @reg -+ * @prot_request: Flags indicating how the caller will then access the memory -+ * @map: Structure to be given to kbase_vunmap() on freeing -+ * @vmap_flags: Flags of type kbase_vmap_flag -+ * -+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error -+ * -+ * Variant of kbase_vmap_prot() that can be used given an existing region. -+ * -+ * The caller must satisfy one of the following for @reg: -+ * * It must have been obtained by finding it on the region tracker, and the region lock must not -+ * have been released in the mean time. -+ * * Or, it must have been refcounted with a call to kbase_va_region_alloc_get(), and the region -+ * lock is now held again. -+ * * Or, @reg has had NO_USER_FREE set at creation time or under the region lock, and the -+ * region lock is now held again. -+ * -+ * The acceptable @vmap_flags are those in %KBASE_VMAP_INPUT_FLAGS. -+ * -+ * Refer to kbase_vmap_prot() for more information on the operation of this function. -+ */ -+void *kbase_vmap_reg(struct kbase_context *kctx, struct kbase_va_region *reg, u64 gpu_addr, -+ size_t size, unsigned long prot_request, struct kbase_vmap_struct *map, -+ kbase_vmap_flag vmap_flags); ++ /* The user buffer could already have been previously pinned before ++ * entering this function, and hence there could potentially be CPU ++ * mappings of it ++ */ ++ kbase_mem_shrink_cpu_mapping(kctx, reg, 0, pinned_pages); + -+/** -+ * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the -+ * requested access permissions are supported -+ * @kctx: Context the VA range belongs to -+ * @gpu_addr: Start address of VA range -+ * @size: Size of VA range -+ * @prot_request: Flags indicating how the caller will then access the memory -+ * @map: Structure to be given to kbase_vunmap() on freeing -+ * -+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error -+ * -+ * Map a GPU VA Range into the kernel. The VA range must be contained within a -+ * GPU memory region. Appropriate CPU cache-flushing operations are made as -+ * required, dependent on the CPU mapping for the memory region. -+ * -+ * This is safer than using kmap() on the pages directly, -+ * because the pages here are refcounted to prevent freeing (and hence reuse -+ * elsewhere in the system) until an kbase_vunmap() -+ * -+ * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check -+ * whether the region should allow the intended access, and return an error if -+ * disallowed. This is essential for security of imported memory, particularly -+ * a user buf from SHM mapped into the process as RO. In that case, write -+ * access must be checked if the intention is for kernel to write to the -+ * memory. -+ * -+ * The checks are also there to help catch access errors on memory where -+ * security is not a concern: imported memory that is always RW, and memory -+ * that was allocated and owned by the process attached to @kctx. In this case, -+ * it helps to identify memory that was mapped with the wrong access type. -+ * -+ * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases -+ * where either the security of memory is solely dependent on those flags, or -+ * when userspace code was expecting only the GPU to access the memory (e.g. HW -+ * workarounds). -+ * -+ * All cache maintenance operations shall be ignored if the -+ * memory region has been imported. -+ * -+ */ -+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, -+ unsigned long prot_request, struct kbase_vmap_struct *map); ++ for (i = 0; i < pinned_pages; i++) { ++ kbase_unpin_user_buf_page(pages[i]); ++ pages[i] = NULL; ++ } + -+/** -+ * kbase_vmap - Map a GPU VA range into the kernel safely -+ * @kctx: Context the VA range belongs to -+ * @gpu_addr: Start address of VA range -+ * @size: Size of VA range -+ * @map: Structure to be given to kbase_vunmap() on freeing -+ * -+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error -+ * -+ * Map a GPU VA Range into the kernel. The VA range must be contained within a -+ * GPU memory region. Appropriate CPU cache-flushing operations are made as -+ * required, dependent on the CPU mapping for the memory region. -+ * -+ * This is safer than using kmap() on the pages directly, -+ * because the pages here are refcounted to prevent freeing (and hence reuse -+ * elsewhere in the system) until an kbase_vunmap() -+ * -+ * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no -+ * checks to ensure the security of e.g. imported user bufs from RO SHM. -+ * -+ * Note: All cache maintenance operations shall be ignored if the memory region -+ * has been imported. -+ */ -+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, -+ struct kbase_vmap_struct *map); ++ return err; ++} + -+/** -+ * kbase_vunmap - Unmap a GPU VA range from the kernel -+ * @kctx: Context the VA range belongs to -+ * @map: Structure describing the mapping from the corresponding kbase_vmap() -+ * call -+ * -+ * Unmaps a GPU VA range from the kernel, given its @map structure obtained -+ * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as -+ * required, dependent on the CPU mapping for the memory region. -+ * -+ * The reference taken on pages during kbase_vmap() is released. -+ * -+ * Note: All cache maintenance operations shall be ignored if the memory region -+ * has been imported. ++/* This function would also perform the work of unpinning pages on Job Manager ++ * GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT ++ * have a corresponding call to kbase_jd_user_buf_unpin_pages(). + */ -+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map); ++static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, ++ struct kbase_va_region *reg, bool writeable) ++{ ++ long i; ++ struct page **pages; ++ unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK; ++ unsigned long remaining_size = alloc->imported.user_buf.size; + -+extern const struct vm_operations_struct kbase_vm_ops; ++ lockdep_assert_held(&kctx->reg_lock); + -+/** -+ * kbase_sync_mem_regions - Perform the cache maintenance for the kernel mode -+ * CPU mapping. -+ * @kctx: Context the CPU mapping belongs to. -+ * @map: Structure describing the CPU mapping, setup previously by the -+ * kbase_vmap() call. -+ * @dest: Indicates the type of maintenance required (i.e. flush or invalidate) -+ * -+ * Note: The caller shall ensure that CPU mapping is not revoked & remains -+ * active whilst the maintenance is in progress. -+ */ -+void kbase_sync_mem_regions(struct kbase_context *kctx, -+ struct kbase_vmap_struct *map, enum kbase_sync_type dest); ++ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); ++ pages = alloc->imported.user_buf.pages; + -+/** -+ * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation -+ * @kctx: Context the region belongs to -+ * @reg: The GPU region -+ * @new_pages: The number of pages after the shrink -+ * @old_pages: The number of pages before the shrink -+ * -+ * Shrink (or completely remove) all CPU mappings which reference the shrunk -+ * part of the allocation. -+ */ -+void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, -+ struct kbase_va_region *reg, -+ u64 new_pages, u64 old_pages); ++#if !MALI_USE_CSF ++ kbase_mem_shrink_cpu_mapping(kctx, reg, 0, alloc->nents); ++#else ++ CSTD_UNUSED(reg); ++#endif + -+/** -+ * kbase_phy_alloc_mapping_term - Terminate the kernel side mapping of a -+ * physical allocation -+ * @kctx: The kernel base context associated with the mapping -+ * @alloc: Pointer to the allocation to terminate -+ * -+ * This function will unmap the kernel mapping, and free any structures used to -+ * track it. -+ */ -+void kbase_phy_alloc_mapping_term(struct kbase_context *kctx, -+ struct kbase_mem_phy_alloc *alloc); ++ for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { ++ unsigned long imported_size = MIN(remaining_size, PAGE_SIZE - offset_within_page); ++ /* Notice: this is a temporary variable that is used for DMA sync ++ * operations, and that could be incremented by an offset if the ++ * current page contains both imported and non-imported memory ++ * sub-regions. ++ * ++ * It is valid to add an offset to this value, because the offset ++ * is always kept within the physically contiguous dma-mapped range ++ * and there's no need to translate to physical address to offset it. ++ * ++ * This variable is not going to be used for the actual DMA unmap ++ * operation, that shall always use the original DMA address of the ++ * whole memory page. ++ */ ++ dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; + -+/** -+ * kbase_phy_alloc_mapping_get - Get a kernel-side CPU pointer to the permanent -+ * mapping of a physical allocation -+ * @kctx: The kernel base context @gpu_addr will be looked up in -+ * @gpu_addr: The gpu address to lookup for the kernel-side CPU mapping -+ * @out_kern_mapping: Pointer to storage for a struct kbase_vmap_struct pointer -+ * which will be used for a call to -+ * kbase_phy_alloc_mapping_put() -+ * -+ * Return: Pointer to a kernel-side accessible location that directly -+ * corresponds to @gpu_addr, or NULL on failure -+ * -+ * Looks up @gpu_addr to retrieve the CPU pointer that can be used to access -+ * that location kernel-side. Only certain kinds of memory have a permanent -+ * kernel mapping, refer to the internal functions -+ * kbase_reg_needs_kernel_mapping() and kbase_phy_alloc_mapping_init() for more -+ * information. -+ * -+ * If this function succeeds, a CPU access to the returned pointer will access -+ * the actual location represented by @gpu_addr. That is, the return value does -+ * not require any offset added to it to access the location specified in -+ * @gpu_addr -+ * -+ * The client must take care to either apply any necessary sync operations when -+ * accessing the data, or ensure that the enclosing region was coherent with -+ * the GPU, or uncached in the CPU. -+ * -+ * The refcount on the physical allocations backing the region are taken, so -+ * that they do not disappear whilst the client is accessing it. Once the -+ * client has finished accessing the memory, it must be released with a call to -+ * kbase_phy_alloc_mapping_put() -+ * -+ * Whilst this is expected to execute quickly (the mapping was already setup -+ * when the physical allocation was created), the call is not IRQ-safe due to -+ * the region lookup involved. -+ * -+ * An error code may indicate that: -+ * - a userside process has freed the allocation, and so @gpu_addr is no longer -+ * valid -+ * - the region containing @gpu_addr does not support a permanent kernel mapping -+ */ -+void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, u64 gpu_addr, -+ struct kbase_vmap_struct **out_kern_mapping); ++ /* Manual CPU cache synchronization. ++ * ++ * When the GPU returns ownership of the buffer to the CPU, the driver ++ * needs to treat imported and non-imported memory differently. ++ * ++ * The first case to consider is non-imported sub-regions at the ++ * beginning of the first page and at the end of last page. For these ++ * sub-regions: CPU cache shall be committed with a clean+invalidate, ++ * in order to keep the last CPU write. ++ * ++ * Imported region prefers the opposite treatment: this memory has been ++ * legitimately mapped and used by the GPU, hence GPU writes shall be ++ * committed to memory, while CPU cache shall be invalidated to make ++ * sure that CPU reads the correct memory content. ++ * ++ * The following diagram shows the expect value of the variables ++ * used in this loop in the corner case of an imported region encloed ++ * by a single memory page: ++ * ++ * page boundary ->|---------- | <- dma_addr (initial value) ++ * | | ++ * | - - - - - | <- offset_within_page ++ * |XXXXXXXXXXX|\ ++ * |XXXXXXXXXXX| \ ++ * |XXXXXXXXXXX| }- imported_size ++ * |XXXXXXXXXXX| / ++ * |XXXXXXXXXXX|/ ++ * | - - - - - | <- offset_within_page + imported_size ++ * | |\ ++ * | | }- PAGE_SIZE - imported_size - offset_within_page ++ * | |/ ++ * page boundary ->|-----------| ++ * ++ * If the imported region is enclosed by more than one page, then ++ * offset_within_page = 0 for any page after the first. ++ */ + -+/** -+ * kbase_phy_alloc_mapping_put - Put a reference to the kernel-side mapping of a -+ * physical allocation -+ * @kctx: The kernel base context associated with the mapping -+ * @kern_mapping: Pointer to a struct kbase_phy_alloc_mapping pointer obtained -+ * from a call to kbase_phy_alloc_mapping_get() -+ * -+ * Releases the reference to the allocations backing @kern_mapping that was -+ * obtained through a call to kbase_phy_alloc_mapping_get(). This must be used -+ * when the client no longer needs to access the kernel-side CPU pointer. -+ * -+ * If this was the last reference on the underlying physical allocations, they -+ * will go through the normal allocation free steps, which also includes an -+ * unmap of the permanent kernel mapping for those allocations. -+ * -+ * Due to these operations, the function is not IRQ-safe. However it is -+ * expected to execute quickly in the normal case, i.e. when the region holding -+ * the physical allocation is still present. -+ */ -+void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, -+ struct kbase_vmap_struct *kern_mapping); ++ /* Only for first page: handle non-imported range at the beginning. */ ++ if (offset_within_page > 0) { ++ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page, ++ DMA_BIDIRECTIONAL); ++ dma_addr += offset_within_page; ++ } + -+/** -+ * kbase_get_cache_line_alignment - Return cache line alignment -+ * -+ * @kbdev: Device pointer. -+ * -+ * Helper function to return the maximum cache line alignment considering -+ * both CPU and GPU cache sizes. -+ * -+ * Return: CPU and GPU cache line alignment, in bytes. -+ */ -+u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev); ++ /* For every page: handle imported range. */ ++ if (imported_size > 0) ++ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size, ++ DMA_BIDIRECTIONAL); + -+#if (KERNEL_VERSION(4, 20, 0) > LINUX_VERSION_CODE) -+static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, -+ unsigned long addr, unsigned long pfn, pgprot_t pgprot) -+{ -+ int err = vm_insert_pfn_prot(vma, addr, pfn, pgprot); ++ /* Only for last page (that may coincide with first page): ++ * handle non-imported range at the end. ++ */ ++ if ((imported_size + offset_within_page) < PAGE_SIZE) { ++ dma_addr += imported_size; ++ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, ++ PAGE_SIZE - imported_size - offset_within_page, ++ DMA_BIDIRECTIONAL); ++ } + -+ if (unlikely(err == -ENOMEM)) -+ return VM_FAULT_OOM; -+ if (unlikely(err < 0 && err != -EBUSY)) -+ return VM_FAULT_SIGBUS; ++ /* Notice: use the original DMA address to unmap the whole memory page. */ ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE, ++ DMA_BIDIRECTIONAL); ++#else ++ dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], ++ PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); ++#endif ++ if (writeable) ++ set_page_dirty_lock(pages[i]); ++#if !MALI_USE_CSF ++ kbase_unpin_user_buf_page(pages[i]); ++ pages[i] = NULL; ++#endif + -+ return VM_FAULT_NOPAGE; -+} ++ remaining_size -= imported_size; ++ offset_within_page = 0; ++ } ++#if !MALI_USE_CSF ++ alloc->nents = 0; +#endif ++} + -+/** -+ * kbase_mem_get_process_mmap_lock - Return the mmap lock for the current process -+ * -+ * Return: the mmap lock for the current process -+ */ -+static inline struct rw_semaphore *kbase_mem_get_process_mmap_lock(void) ++int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, ++ void *src_page, size_t *to_copy, unsigned int nr_pages, ++ unsigned int *target_page_nr, size_t offset) +{ -+#if KERNEL_VERSION(5, 8, 0) > LINUX_VERSION_CODE -+ return ¤t->mm->mmap_sem; -+#else /* KERNEL_VERSION(5, 8, 0) > LINUX_VERSION_CODE */ -+ return ¤t->mm->mmap_lock; -+#endif /* KERNEL_VERSION(5, 8, 0) > LINUX_VERSION_CODE */ -+} ++ void *target_page = kmap(dest_pages[*target_page_nr]); ++ size_t chunk = PAGE_SIZE-offset; + -+#endif /* _KBASE_MEM_LINUX_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_lowlevel.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_lowlevel.h -new file mode 100644 -index 000000000..5a1bb16cc ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_lowlevel.h -@@ -0,0 +1,179 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2012-2014, 2016-2018, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ if (!target_page) { ++ pr_err("%s: kmap failure", __func__); ++ return -ENOMEM; ++ } + -+#ifndef _KBASE_MEM_LOWLEVEL_H -+#define _KBASE_MEM_LOWLEVEL_H ++ chunk = min(chunk, *to_copy); + -+#ifndef _KBASE_H_ -+#error "Don't include this file directly, use mali_kbase.h instead" -+#endif ++ memcpy(target_page + offset, src_page, chunk); ++ *to_copy -= chunk; + -+#include ++ kunmap(dest_pages[*target_page_nr]); + -+/* Flags for kbase_phy_allocator_pages_alloc */ -+#define KBASE_PHY_PAGES_FLAG_DEFAULT (0) /** Default allocation flag */ -+#define KBASE_PHY_PAGES_FLAG_CLEAR (1 << 0) /** Clear the pages after allocation */ -+#define KBASE_PHY_PAGES_FLAG_POISON (1 << 1) /** Fill the memory with a poison value */ ++ *target_page_nr += 1; ++ if (*target_page_nr >= nr_pages || *to_copy == 0) ++ return 0; + -+#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON) ++ target_page = kmap(dest_pages[*target_page_nr]); ++ if (!target_page) { ++ pr_err("%s: kmap failure", __func__); ++ return -ENOMEM; ++ } + -+#define KBASE_PHY_PAGES_POISON_VALUE 0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */ ++ KBASE_DEBUG_ASSERT(target_page); + -+enum kbase_sync_type { -+ KBASE_SYNC_TO_CPU, -+ KBASE_SYNC_TO_DEVICE -+}; ++ chunk = min(offset, *to_copy); ++ memcpy(target_page, src_page + PAGE_SIZE-offset, chunk); ++ *to_copy -= chunk; + -+struct tagged_addr { phys_addr_t tagged_addr; }; ++ kunmap(dest_pages[*target_page_nr]); + -+#define HUGE_PAGE (1u << 0) -+#define HUGE_HEAD (1u << 1) -+#define FROM_PARTIAL (1u << 2) ++ return 0; ++} + -+#define NUM_4K_PAGES_IN_2MB_PAGE (SZ_2M / SZ_4K) ++int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg, ++ struct mm_struct *locked_mm) ++{ ++ int err = 0; ++ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; + -+/* -+ * Note: if macro for converting physical address to page is not defined -+ * in the kernel itself, it is defined hereby. This is to avoid build errors -+ * which are reported during builds for some architectures. -+ */ -+#ifndef phys_to_page -+#define phys_to_page(phys) (pfn_to_page((phys) >> PAGE_SHIFT)) -+#endif ++ lockdep_assert_held(&kctx->reg_lock); + -+/** -+ * as_phys_addr_t - Retrieve the physical address from tagged address by -+ * masking the lower order 12 bits. -+ * @t: tagged address to be translated. -+ * -+ * Return: physical address corresponding to tagged address. -+ */ -+static inline phys_addr_t as_phys_addr_t(struct tagged_addr t) ++ /* decide what needs to happen for this resource */ ++ switch (reg->gpu_alloc->type) { ++ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { ++ if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) && ++ (!reg->gpu_alloc->nents)) ++ return -EINVAL; ++ ++ reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; ++ if (reg->gpu_alloc->imported.user_buf ++ .current_mapping_usage_count == 1) { ++ err = kbase_jd_user_buf_map(kctx, reg); ++ if (err) { ++ reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; ++ return err; ++ } ++ } ++ } ++ break; ++ case KBASE_MEM_TYPE_IMPORTED_UMM: { ++ err = kbase_mem_umm_map(kctx, reg); ++ if (err) ++ return err; ++ break; ++ } ++ default: ++ dev_dbg(kctx->kbdev->dev, ++ "Invalid external resource GPU allocation type (%x) on mapping", ++ alloc->type); ++ return -EINVAL; ++ } ++ ++ kbase_va_region_alloc_get(kctx, reg); ++ kbase_mem_phy_alloc_get(alloc); ++ return err; ++} ++ ++void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg) +{ -+ return t.tagged_addr & PAGE_MASK; ++ /* gpu_alloc was used in kbase_map_external_resources, so we need to use it for the ++ * unmapping operation. ++ */ ++ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; ++ ++ lockdep_assert_held(&kctx->reg_lock); ++ ++ switch (alloc->type) { ++ case KBASE_MEM_TYPE_IMPORTED_UMM: { ++ kbase_mem_umm_unmap(kctx, reg, alloc); ++ } ++ break; ++ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { ++ alloc->imported.user_buf.current_mapping_usage_count--; ++ ++ if (alloc->imported.user_buf.current_mapping_usage_count == 0) { ++ bool writeable = true; ++ ++ if (!kbase_is_region_invalid_or_free(reg)) { ++ kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, ++ alloc->pages, ++ kbase_reg_current_backed_size(reg), ++ kbase_reg_current_backed_size(reg), ++ kctx->as_nr, true); ++ } ++ ++ if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0) ++ writeable = false; ++ ++ kbase_jd_user_buf_unmap(kctx, alloc, reg, writeable); ++ } ++ } ++ break; ++ default: ++ WARN(1, "Invalid external resource GPU allocation type (%x) on unmapping", ++ alloc->type); ++ return; ++ } ++ kbase_mem_phy_alloc_put(alloc); ++ kbase_va_region_alloc_put(kctx, reg); +} + -+/** -+ * as_page - Retrieve the struct page from a tagged address -+ * @t: tagged address to be translated. -+ * -+ * Return: pointer to struct page corresponding to tagged address. -+ */ -+static inline struct page *as_page(struct tagged_addr t) ++static inline u64 kbasep_get_va_gpu_addr(struct kbase_va_region *reg) +{ -+ return phys_to_page(as_phys_addr_t(t)); ++ return reg->start_pfn << PAGE_SHIFT; +} + -+/** -+ * as_tagged - Convert the physical address to tagged address type though -+ * there is no tag info present, the lower order 12 bits will be 0 -+ * @phys: physical address to be converted to tagged type -+ * -+ * This is used for 4KB physical pages allocated by the Driver or imported pages -+ * and is needed as physical pages tracking object stores the reference for -+ * physical pages using tagged address type in lieu of the type generally used -+ * for physical addresses. -+ * -+ * Return: address of tagged address type. -+ */ -+static inline struct tagged_addr as_tagged(phys_addr_t phys) ++struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( ++ struct kbase_context *kctx, u64 gpu_addr) +{ -+ struct tagged_addr t; ++ struct kbase_ctx_ext_res_meta *meta = NULL; ++ struct kbase_ctx_ext_res_meta *walker; + -+ t.tagged_addr = phys & PAGE_MASK; -+ return t; ++ lockdep_assert_held(&kctx->reg_lock); ++ ++ /* ++ * Walk the per context external resource metadata list for the ++ * metadata which matches the region which is being acquired. ++ */ ++ list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { ++ if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) { ++ meta = walker; ++ meta->ref++; ++ break; ++ } ++ } ++ ++ /* No metadata exists so create one. */ ++ if (!meta) { ++ struct kbase_va_region *reg; ++ ++ /* Find the region */ ++ reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); ++ if (kbase_is_region_invalid_or_free(reg)) ++ goto failed; ++ ++ /* Allocate the metadata object */ ++ meta = kzalloc(sizeof(*meta), GFP_KERNEL); ++ if (!meta) ++ goto failed; ++ /* ++ * Fill in the metadata object and acquire a reference ++ * for the physical resource. ++ */ ++ meta->reg = reg; ++ ++ /* Map the external resource to the GPU allocation of the region ++ * and acquire the reference to the VA region ++ */ ++ if (kbase_map_external_resource(kctx, meta->reg, NULL)) ++ goto fail_map; ++ meta->ref = 1; ++ ++ list_add(&meta->ext_res_node, &kctx->ext_res_meta_head); ++ } ++ ++ return meta; ++ ++fail_map: ++ kfree(meta); ++failed: ++ return NULL; +} + -+/** -+ * as_tagged_tag - Form the tagged address by storing the tag or metadata in the -+ * lower order 12 bits of physial address -+ * @phys: physical address to be converted to tagged address -+ * @tag: tag to be stored along with the physical address. -+ * -+ * The tag info is used while freeing up the pages -+ * -+ * Return: tagged address storing physical address & tag. -+ */ -+static inline struct tagged_addr as_tagged_tag(phys_addr_t phys, int tag) ++static struct kbase_ctx_ext_res_meta * ++find_sticky_resource_meta(struct kbase_context *kctx, u64 gpu_addr) +{ -+ struct tagged_addr t; ++ struct kbase_ctx_ext_res_meta *walker; + -+ t.tagged_addr = (phys & PAGE_MASK) | (tag & ~PAGE_MASK); -+ return t; ++ lockdep_assert_held(&kctx->reg_lock); ++ ++ /* ++ * Walk the per context external resource metadata list for the ++ * metadata which matches the region which is being released. ++ */ ++ list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) ++ if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) ++ return walker; ++ ++ return NULL; +} + -+/** -+ * is_huge - Check if the physical page is one of the 512 4KB pages of the -+ * large page which was not split to be used partially -+ * @t: tagged address storing the tag in the lower order bits. -+ * -+ * Return: true if page belongs to large page, or false -+ */ -+static inline bool is_huge(struct tagged_addr t) ++static void release_sticky_resource_meta(struct kbase_context *kctx, ++ struct kbase_ctx_ext_res_meta *meta) +{ -+ return t.tagged_addr & HUGE_PAGE; ++ kbase_unmap_external_resource(kctx, meta->reg); ++ list_del(&meta->ext_res_node); ++ kfree(meta); +} + -+/** -+ * is_huge_head - Check if the physical page is the first 4KB page of the -+ * 512 4KB pages within a large page which was not split -+ * to be used partially -+ * @t: tagged address storing the tag in the lower order bits. -+ * -+ * Return: true if page is the first page of a large page, or false -+ */ -+static inline bool is_huge_head(struct tagged_addr t) ++bool kbase_sticky_resource_release(struct kbase_context *kctx, ++ struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr) +{ -+ int mask = HUGE_HEAD | HUGE_PAGE; ++ lockdep_assert_held(&kctx->reg_lock); + -+ return mask == (t.tagged_addr & mask); ++ /* Search of the metadata if one isn't provided. */ ++ if (!meta) ++ meta = find_sticky_resource_meta(kctx, gpu_addr); ++ ++ /* No metadata so just return. */ ++ if (!meta) ++ return false; ++ ++ if (--meta->ref != 0) ++ return true; ++ ++ release_sticky_resource_meta(kctx, meta); ++ ++ return true; +} + -+/** -+ * is_partial - Check if the physical page is one of the 512 pages of the -+ * large page which was split in 4KB pages to be used -+ * partially for allocations >= 2 MB in size. -+ * @t: tagged address storing the tag in the lower order bits. -+ * -+ * Return: true if page was taken from large page used partially, or false -+ */ -+static inline bool is_partial(struct tagged_addr t) ++bool kbase_sticky_resource_release_force(struct kbase_context *kctx, ++ struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr) +{ -+ return t.tagged_addr & FROM_PARTIAL; ++ lockdep_assert_held(&kctx->reg_lock); ++ ++ /* Search of the metadata if one isn't provided. */ ++ if (!meta) ++ meta = find_sticky_resource_meta(kctx, gpu_addr); ++ ++ /* No metadata so just return. */ ++ if (!meta) ++ return false; ++ ++ release_sticky_resource_meta(kctx, meta); ++ ++ return true; +} + -+/** -+ * index_in_large_page() - Get index of a 4KB page within a 2MB page which -+ * wasn't split to be used partially. -+ * -+ * @t: Tagged physical address of the physical 4KB page that lies within -+ * the large (or 2 MB) physical page. -+ * -+ * Return: Index of the 4KB page within a 2MB page -+ */ -+static inline unsigned int index_in_large_page(struct tagged_addr t) ++int kbase_sticky_resource_init(struct kbase_context *kctx) +{ -+ WARN_ON(!is_huge(t)); ++ INIT_LIST_HEAD(&kctx->ext_res_meta_head); + -+ return (PFN_DOWN(as_phys_addr_t(t)) & (NUM_4K_PAGES_IN_2MB_PAGE - 1)); ++ return 0; +} + -+#endif /* _KBASE_LOWLEVEL_H */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c ++void kbase_sticky_resource_term(struct kbase_context *kctx) ++{ ++ struct kbase_ctx_ext_res_meta *walker; ++ ++ lockdep_assert_held(&kctx->reg_lock); ++ ++ /* ++ * Free any sticky resources which haven't been unmapped. ++ * ++ * Note: ++ * We don't care about refcounts at this point as no future ++ * references to the meta data will be made. ++ * Region termination would find these if we didn't free them ++ * here, but it's more efficient if we do the clean up here. ++ */ ++ while (!list_empty(&kctx->ext_res_meta_head)) { ++ walker = list_first_entry(&kctx->ext_res_meta_head, ++ struct kbase_ctx_ext_res_meta, ext_res_node); ++ ++ kbase_sticky_resource_release_force(kctx, walker, 0); ++ } ++} +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.h b/drivers/gpu/arm/bifrost/mali_kbase_mem.h new file mode 100644 -index 000000000..737f7da55 +index 000000000..1118b96fc --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c -@@ -0,0 +1,641 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.h +@@ -0,0 +1,2560 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -242108,2335 +244133,2554 @@ index 000000000..737f7da55 + */ + +/** -+ * DOC: Base kernel page migration implementation. ++ * DOC: Base kernel memory APIs + */ -+#include -+ -+#include -+#include -+#include + -+/* Global integer used to determine if module parameter value has been -+ * provided and if page migration feature is enabled. -+ * Feature is disabled on all platforms by default. -+ */ -+int kbase_page_migration_enabled; -+module_param(kbase_page_migration_enabled, int, 0444); -+KBASE_EXPORT_TEST_API(kbase_page_migration_enabled); ++#ifndef _KBASE_MEM_H_ ++#define _KBASE_MEM_H_ + -+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) -+static const struct movable_operations movable_ops; ++#ifndef _KBASE_H_ ++#error "Don't include this file directly, use mali_kbase.h instead" +#endif + -+bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr, -+ u8 group_id) -+{ -+ struct kbase_page_metadata *page_md = -+ kzalloc(sizeof(struct kbase_page_metadata), GFP_KERNEL); ++#include ++#include ++#include ++#include "mali_kbase_pm.h" ++#include "mali_kbase_defs.h" ++/* Required for kbase_mem_evictable_unmake */ ++#include "mali_kbase_mem_linux.h" ++#include "mali_kbase_mem_migrate.h" ++#include "mali_kbase_refcount_defs.h" + -+ if (!page_md) -+ return false; ++static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, ++ int pages); + -+ SetPagePrivate(p); -+ set_page_private(p, (unsigned long)page_md); -+ page_md->dma_addr = dma_addr; -+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATE_IN_PROGRESS); -+ page_md->vmap_count = 0; -+ page_md->group_id = group_id; -+ spin_lock_init(&page_md->migrate_lock); ++/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */ ++#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */ + -+ lock_page(p); -+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) -+ __SetPageMovable(p, &movable_ops); -+ page_md->status = PAGE_MOVABLE_SET(page_md->status); -+#else -+ /* In some corner cases, the driver may attempt to allocate memory pages -+ * even before the device file is open and the mapping for address space -+ * operations is created. In that case, it is impossible to assign address -+ * space operations to memory pages: simply pretend that they are movable, -+ * even if they are not. -+ * -+ * The page will go through all state transitions but it will never be -+ * actually considered movable by the kernel. This is due to the fact that -+ * the page cannot be marked as NOT_MOVABLE upon creation, otherwise the -+ * memory pool will always refuse to add it to the pool and schedule -+ * a worker thread to free it later. -+ * -+ * Page metadata may seem redundant in this case, but they are not, -+ * because memory pools expect metadata to be present when page migration -+ * is enabled and because the pages may always return to memory pools and -+ * gain the movable property later on in their life cycle. -+ */ -+ if (kbdev->mem_migrate.inode && kbdev->mem_migrate.inode->i_mapping) { -+ __SetPageMovable(p, kbdev->mem_migrate.inode->i_mapping); -+ page_md->status = PAGE_MOVABLE_SET(page_md->status); -+ } -+#endif -+ unlock_page(p); ++/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by ++ * 8 pages. The MMU reads in 8 page table entries from memory at a time, if we ++ * have more than one page fault within the same 8 pages and page tables are ++ * updated accordingly, the MMU does not re-read the page table entries from ++ * memory for the subsequent page table updates and generates duplicate page ++ * faults as the page table information used by the MMU is not valid. ++ */ ++#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3) /* round to 8 pages */ + -+ return true; -+} -+ -+static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p, u8 *group_id) -+{ -+ struct device *const dev = kbdev->dev; -+ struct kbase_page_metadata *page_md; -+ dma_addr_t dma_addr; -+ -+ page_md = kbase_page_private(p); -+ if (!page_md) -+ return; -+ -+ if (group_id) -+ *group_id = page_md->group_id; -+ dma_addr = kbase_dma_addr(p); -+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); -+ -+ kfree(page_md); -+ set_page_private(p, 0); -+ ClearPagePrivate(p); -+} ++#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0) /* round to 1 page */ + -+static void kbase_free_pages_worker(struct work_struct *work) -+{ -+ struct kbase_mem_migrate *mem_migrate = -+ container_of(work, struct kbase_mem_migrate, free_pages_work); -+ struct kbase_device *kbdev = container_of(mem_migrate, struct kbase_device, mem_migrate); -+ struct page *p, *tmp; -+ struct kbase_page_metadata *page_md; -+ LIST_HEAD(free_list); ++/* This must always be a power of 2 */ ++#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2) ++#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316) ++#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630) + -+ spin_lock(&mem_migrate->free_pages_lock); -+ list_splice_init(&mem_migrate->free_pages_list, &free_list); -+ spin_unlock(&mem_migrate->free_pages_lock); ++/* ++ * A CPU mapping ++ */ ++struct kbase_cpu_mapping { ++ struct list_head mappings_list; ++ struct kbase_mem_phy_alloc *alloc; ++ struct kbase_context *kctx; ++ struct kbase_va_region *region; ++ int count; ++ int free_on_close; ++}; + -+ list_for_each_entry_safe(p, tmp, &free_list, lru) { -+ u8 group_id = 0; -+ list_del_init(&p->lru); ++enum kbase_memory_type { ++ KBASE_MEM_TYPE_NATIVE, ++ KBASE_MEM_TYPE_IMPORTED_UMM, ++ KBASE_MEM_TYPE_IMPORTED_USER_BUF, ++ KBASE_MEM_TYPE_ALIAS, ++ KBASE_MEM_TYPE_RAW ++}; + -+ lock_page(p); -+ page_md = kbase_page_private(p); -+ if (IS_PAGE_MOVABLE(page_md->status)) { -+ __ClearPageMovable(p); -+ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); -+ } -+ unlock_page(p); ++/* internal structure, mirroring base_mem_aliasing_info, ++ * but with alloc instead of a gpu va (handle) ++ */ ++struct kbase_aliased { ++ struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */ ++ u64 offset; /* in pages */ ++ u64 length; /* in pages */ ++}; + -+ kbase_free_page_metadata(kbdev, p, &group_id); -+ kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, group_id, p, 0); -+ } -+} ++/* Physical pages tracking object properties */ ++#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED (1u << 0) ++#define KBASE_MEM_PHY_ALLOC_LARGE (1u << 1) + -+void kbase_free_page_later(struct kbase_device *kbdev, struct page *p) -+{ -+ struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; ++/* struct kbase_mem_phy_alloc - Physical pages tracking object. ++ * ++ * Set up to track N pages. ++ * N not stored here, the creator holds that info. ++ * This object only tracks how many elements are actually valid (present). ++ * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc ++ * is not shared with another region or client. CPU mappings are OK to ++ * exist when changing, as long as the tracked mappings objects are ++ * updated as part of the change. ++ * ++ * @kref: number of users of this alloc ++ * @gpu_mappings: count number of times mapped on the GPU. Indicates the number ++ * of references there are to the physical pages from different ++ * GPU VA regions. ++ * @kernel_mappings: count number of times mapped on the CPU, specifically in ++ * the kernel. Indicates the number of references there are ++ * to the physical pages to prevent flag changes or shrink ++ * while maps are still held. ++ * @nents: 0..N ++ * @pages: N elements, only 0..nents are valid ++ * @mappings: List of CPU mappings of this physical memory allocation. ++ * @evict_node: Node used to store this allocation on the eviction list ++ * @evicted: Physical backing size when the pages where evicted ++ * @reg: Back reference to the region structure which created this ++ * allocation, or NULL if it has been freed. ++ * @type: type of buffer ++ * @permanent_map: Kernel side mapping of the alloc, shall never be ++ * referred directly. kbase_phy_alloc_mapping_get() & ++ * kbase_phy_alloc_mapping_put() pair should be used ++ * around access to the kernel-side CPU mapping so that ++ * mapping doesn't disappear whilst it is being accessed. ++ * @properties: Bitmask of properties, e.g. KBASE_MEM_PHY_ALLOC_LARGE. ++ * @group_id: A memory group ID to be passed to a platform-specific ++ * memory group manager, if present. ++ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * @imported: member in union valid based on @a type ++ */ ++struct kbase_mem_phy_alloc { ++ struct kref kref; ++ atomic_t gpu_mappings; ++ atomic_t kernel_mappings; ++ size_t nents; ++ struct tagged_addr *pages; ++ struct list_head mappings; ++ struct list_head evict_node; ++ size_t evicted; ++ struct kbase_va_region *reg; ++ enum kbase_memory_type type; ++ struct kbase_vmap_struct *permanent_map; ++ u8 properties; ++ u8 group_id; + -+ spin_lock(&mem_migrate->free_pages_lock); -+ list_add(&p->lru, &mem_migrate->free_pages_list); -+ spin_unlock(&mem_migrate->free_pages_lock); -+} ++ union { ++ struct { ++ struct kbase_context *kctx; ++ struct dma_buf *dma_buf; ++ struct dma_buf_attachment *dma_attachment; ++ unsigned int current_mapping_usage_count; ++ struct sg_table *sgt; ++ bool need_sync; ++ } umm; ++ struct { ++ u64 stride; ++ size_t nents; ++ struct kbase_aliased *aliased; ++ } alias; ++ struct { ++ struct kbase_context *kctx; ++ /* Number of pages in this structure, including *pages. ++ * Used for kernel memory tracking. ++ */ ++ size_t nr_struct_pages; ++ } native; ++ struct kbase_alloc_import_user_buf { ++ unsigned long address; ++ unsigned long size; ++ unsigned long nr_pages; ++ struct page **pages; ++ /* top bit (1<<31) of current_mapping_usage_count ++ * specifies that this import was pinned on import ++ * See PINNED_ON_IMPORT ++ */ ++ u32 current_mapping_usage_count; ++ struct mm_struct *mm; ++ dma_addr_t *dma_addrs; ++ } user_buf; ++ } imported; ++}; + +/** -+ * kbasep_migrate_page_pt_mapped - Migrate a memory page that is mapped -+ * in a PGD of kbase_mmu_table. -+ * -+ * @old_page: Existing PGD page to remove -+ * @new_page: Destination for migrating the existing PGD page to ++ * enum kbase_page_status - Status of a page used for page migration. + * -+ * Replace an existing PGD page with a new page by migrating its content. More specifically: -+ * the new page shall replace the existing PGD page in the MMU page table. Before returning, -+ * the new page shall be set as movable and not isolated, while the old page shall lose -+ * the movable property. The meta data attached to the PGD page is transferred to the -+ * new (replacement) page. ++ * @MEM_POOL: Stable state. Page is located in a memory pool and can safely ++ * be migrated. ++ * @ALLOCATE_IN_PROGRESS: Transitory state. A page is set to this status as ++ * soon as it leaves a memory pool. ++ * @SPILL_IN_PROGRESS: Transitory state. Corner case where pages in a memory ++ * pool of a dying context are being moved to the device ++ * memory pool. ++ * @NOT_MOVABLE: Stable state. Page has been allocated for an object that is ++ * not movable, but may return to be movable when the object ++ * is freed. ++ * @ALLOCATED_MAPPED: Stable state. Page has been allocated, mapped to GPU ++ * and has reference to kbase_mem_phy_alloc object. ++ * @PT_MAPPED: Stable state. Similar to ALLOCATED_MAPPED, but page doesn't ++ * reference kbase_mem_phy_alloc object. Used as a page in MMU ++ * page table. ++ * @FREE_IN_PROGRESS: Transitory state. A page is set to this status as soon as ++ * the driver manages to acquire a lock on the page while ++ * unmapping it. This status means that a memory release is ++ * happening and it's still not complete. ++ * @FREE_ISOLATED_IN_PROGRESS: Transitory state. This is a very particular corner case. ++ * A page is isolated while it is in ALLOCATED_MAPPED state, ++ * but then the driver tries to destroy the allocation. ++ * @FREE_PT_ISOLATED_IN_PROGRESS: Transitory state. This is a very particular corner case. ++ * A page is isolated while it is in PT_MAPPED state, but ++ * then the driver tries to destroy the allocation. + * -+ * Return: 0 on migration success, or -EAGAIN for a later retry. Otherwise it's a failure -+ * and the migration is aborted. ++ * Pages can only be migrated in stable states. + */ -+static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new_page) -+{ -+ struct kbase_page_metadata *page_md = kbase_page_private(old_page); -+ struct kbase_context *kctx = page_md->data.pt_mapped.mmut->kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ dma_addr_t old_dma_addr = page_md->dma_addr; -+ dma_addr_t new_dma_addr; -+ int ret; -+ -+ /* Create a new dma map for the new page */ -+ new_dma_addr = dma_map_page(kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); -+ if (dma_mapping_error(kbdev->dev, new_dma_addr)) -+ return -ENOMEM; -+ -+ /* Lock context to protect access to the page in physical allocation. -+ * This blocks the CPU page fault handler from remapping pages. -+ * Only MCU's mmut is device wide, i.e. no corresponding kctx. -+ */ -+ kbase_gpu_vm_lock(kctx); -+ -+ ret = kbase_mmu_migrate_page( -+ as_tagged(page_to_phys(old_page)), as_tagged(page_to_phys(new_page)), old_dma_addr, -+ new_dma_addr, PGD_VPFN_LEVEL_GET_LEVEL(page_md->data.pt_mapped.pgd_vpfn_level)); -+ -+ if (ret == 0) { -+ dma_unmap_page(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); -+ __ClearPageMovable(old_page); -+ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); -+ ClearPagePrivate(old_page); -+ put_page(old_page); -+ -+ page_md = kbase_page_private(new_page); -+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) -+ __SetPageMovable(new_page, &movable_ops); -+ page_md->status = PAGE_MOVABLE_SET(page_md->status); -+#else -+ if (kbdev->mem_migrate.inode->i_mapping) { -+ __SetPageMovable(new_page, kbdev->mem_migrate.inode->i_mapping); -+ page_md->status = PAGE_MOVABLE_SET(page_md->status); -+ } -+#endif -+ SetPagePrivate(new_page); -+ get_page(new_page); -+ } else -+ dma_unmap_page(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); -+ -+ /* Page fault handler for CPU mapping unblocked. */ -+ kbase_gpu_vm_unlock(kctx); ++enum kbase_page_status { ++ MEM_POOL = 0, ++ ALLOCATE_IN_PROGRESS, ++ SPILL_IN_PROGRESS, ++ NOT_MOVABLE, ++ ALLOCATED_MAPPED, ++ PT_MAPPED, ++ FREE_IN_PROGRESS, ++ FREE_ISOLATED_IN_PROGRESS, ++ FREE_PT_ISOLATED_IN_PROGRESS, ++}; + -+ return ret; -+} ++#define PGD_VPFN_LEVEL_MASK ((u64)0x3) ++#define PGD_VPFN_LEVEL_GET_LEVEL(pgd_vpfn_level) (pgd_vpfn_level & PGD_VPFN_LEVEL_MASK) ++#define PGD_VPFN_LEVEL_GET_VPFN(pgd_vpfn_level) (pgd_vpfn_level & ~PGD_VPFN_LEVEL_MASK) ++#define PGD_VPFN_LEVEL_SET(pgd_vpfn, level) \ ++ ((pgd_vpfn & ~PGD_VPFN_LEVEL_MASK) | (level & PGD_VPFN_LEVEL_MASK)) + -+/* -+ * kbasep_migrate_page_allocated_mapped - Migrate a memory page that is both -+ * allocated and mapped. ++/** ++ * struct kbase_page_metadata - Metadata for each page in kbase + * -+ * @old_page: Page to remove. -+ * @new_page: Page to add. ++ * @kbdev: Pointer to kbase device. ++ * @dma_addr: DMA address mapped to page. ++ * @migrate_lock: A spinlock to protect the private metadata. ++ * @data: Member in union valid based on @status. ++ * @status: Status to keep track if page can be migrated at any ++ * given moment. MSB will indicate if page is isolated. ++ * Protected by @migrate_lock. ++ * @vmap_count: Counter of kernel mappings. ++ * @group_id: Memory group ID obtained at the time of page allocation. + * -+ * Replace an old page with a new page by migrating its content and all its -+ * CPU and GPU mappings. More specifically: the new page shall replace the -+ * old page in the MMU page table, as well as in the page array of the physical -+ * allocation, which is used to create CPU mappings. Before returning, the new -+ * page shall be set as movable and not isolated, while the old page shall lose -+ * the movable property. ++ * Each 4KB page will have a reference to this struct in the private field. ++ * This will be used to keep track of information required for Linux page ++ * migration functionality as well as address for DMA mapping. + */ -+static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct page *new_page) -+{ -+ struct kbase_page_metadata *page_md = kbase_page_private(old_page); -+ struct kbase_context *kctx = page_md->data.mapped.mmut->kctx; -+ dma_addr_t old_dma_addr, new_dma_addr; -+ int ret; -+ -+ old_dma_addr = page_md->dma_addr; -+ new_dma_addr = dma_map_page(kctx->kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); -+ if (dma_mapping_error(kctx->kbdev->dev, new_dma_addr)) -+ return -ENOMEM; ++struct kbase_page_metadata { ++ dma_addr_t dma_addr; ++ spinlock_t migrate_lock; + -+ /* Lock context to protect access to array of pages in physical allocation. -+ * This blocks the CPU page fault handler from remapping pages. -+ */ -+ kbase_gpu_vm_lock(kctx); ++ union { ++ struct { ++ struct kbase_mem_pool *pool; ++ /* Pool could be terminated after page is isolated and therefore ++ * won't be able to get reference to kbase device. ++ */ ++ struct kbase_device *kbdev; ++ } mem_pool; ++ struct { ++ struct kbase_va_region *reg; ++ struct kbase_mmu_table *mmut; ++ u64 vpfn; ++ } mapped; ++ struct { ++ struct kbase_mmu_table *mmut; ++ u64 pgd_vpfn_level; ++ } pt_mapped; ++ struct { ++ struct kbase_device *kbdev; ++ } free_isolated; ++ struct { ++ struct kbase_device *kbdev; ++ } free_pt_isolated; ++ } data; + -+ /* Unmap the old physical range. */ -+ unmap_mapping_range(kctx->filp->f_inode->i_mapping, page_md->data.mapped.vpfn << PAGE_SHIFT, -+ PAGE_SIZE, 1); ++ u8 status; ++ u8 vmap_count; ++ u8 group_id; ++}; + -+ ret = kbase_mmu_migrate_page(as_tagged(page_to_phys(old_page)), -+ as_tagged(page_to_phys(new_page)), old_dma_addr, new_dma_addr, -+ MIDGARD_MMU_BOTTOMLEVEL); ++/* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is ++ * used to signify that a buffer was pinned when it was imported. Since the ++ * reference count is limited by the number of atoms that can be submitted at ++ * once there should be no danger of overflowing into this bit. ++ * Stealing the top bit also has the benefit that ++ * current_mapping_usage_count != 0 if and only if the buffer is mapped. ++ */ ++#define PINNED_ON_IMPORT (1<<31) + -+ if (ret == 0) { -+ dma_unmap_page(kctx->kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); ++/** ++ * enum kbase_jit_report_flags - Flags for just-in-time memory allocation ++ * pressure limit functions ++ * @KBASE_JIT_REPORT_ON_ALLOC_OR_FREE: Notifying about an update happening due ++ * to a just-in-time memory allocation or free ++ * ++ * Used to control flow within pressure limit related functions, or to provide ++ * extra debugging information ++ */ ++enum kbase_jit_report_flags { ++ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE = (1u << 0) ++}; + -+ SetPagePrivate(new_page); -+ get_page(new_page); ++/** ++ * kbase_set_phy_alloc_page_status - Set the page migration status of the underlying ++ * physical allocation. ++ * @alloc: the physical allocation containing the pages whose metadata is going ++ * to be modified ++ * @status: the status the pages should end up in ++ * ++ * Note that this function does not go through all of the checking to ensure that ++ * proper states are set. Instead, it is only used when we change the allocation ++ * to NOT_MOVABLE or from NOT_MOVABLE to ALLOCATED_MAPPED ++ */ ++void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc, ++ enum kbase_page_status status); + -+ /* Clear PG_movable from the old page and release reference. */ -+ ClearPagePrivate(old_page); -+ __ClearPageMovable(old_page); -+ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); -+ put_page(old_page); ++static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc) ++{ ++ KBASE_DEBUG_ASSERT(alloc); ++ /* we only track mappings of NATIVE buffers */ ++ if (alloc->type == KBASE_MEM_TYPE_NATIVE) ++ atomic_inc(&alloc->gpu_mappings); ++} + -+ page_md = kbase_page_private(new_page); -+ /* Set PG_movable to the new page. */ -+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) -+ __SetPageMovable(new_page, &movable_ops); -+ page_md->status = PAGE_MOVABLE_SET(page_md->status); -+#else -+ if (kctx->kbdev->mem_migrate.inode->i_mapping) { -+ __SetPageMovable(new_page, kctx->kbdev->mem_migrate.inode->i_mapping); -+ page_md->status = PAGE_MOVABLE_SET(page_md->status); ++static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc) ++{ ++ KBASE_DEBUG_ASSERT(alloc); ++ /* we only track mappings of NATIVE buffers */ ++ if (alloc->type == KBASE_MEM_TYPE_NATIVE) ++ if (atomic_dec_return(&alloc->gpu_mappings) < 0) { ++ pr_err("Mismatched %s:\n", __func__); ++ dump_stack(); + } -+#endif -+ } else -+ dma_unmap_page(kctx->kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); -+ -+ /* Page fault handler for CPU mapping unblocked. */ -+ kbase_gpu_vm_unlock(kctx); -+ -+ return ret; +} + +/** -+ * kbase_page_isolate - Isolate a page for migration. -+ * -+ * @p: Pointer of the page struct of page to isolate. -+ * @mode: LRU Isolation modes. -+ * -+ * Callback function for Linux to isolate a page and prepare it for migration. ++ * kbase_mem_phy_alloc_kernel_mapped - Increment kernel_mappings counter for a ++ * memory region to prevent commit and flag ++ * changes + * -+ * Return: true on success, false otherwise. ++ * @alloc: Pointer to physical pages tracking object + */ -+static bool kbase_page_isolate(struct page *p, isolate_mode_t mode) ++static inline void ++kbase_mem_phy_alloc_kernel_mapped(struct kbase_mem_phy_alloc *alloc) +{ -+ bool status_mem_pool = false; -+ struct kbase_mem_pool *mem_pool = NULL; -+ struct kbase_page_metadata *page_md = kbase_page_private(p); -+ -+ CSTD_UNUSED(mode); -+ -+ if (!page_md || !IS_PAGE_MOVABLE(page_md->status)) -+ return false; -+ -+ if (!spin_trylock(&page_md->migrate_lock)) -+ return false; -+ -+ if (WARN_ON(IS_PAGE_ISOLATED(page_md->status))) { -+ spin_unlock(&page_md->migrate_lock); -+ return false; -+ } -+ -+ switch (PAGE_STATUS_GET(page_md->status)) { -+ case MEM_POOL: -+ /* Prepare to remove page from memory pool later only if pool is not -+ * in the process of termination. -+ */ -+ mem_pool = page_md->data.mem_pool.pool; -+ status_mem_pool = true; -+ preempt_disable(); -+ atomic_inc(&mem_pool->isolation_in_progress_cnt); -+ break; -+ case ALLOCATED_MAPPED: -+ /* Mark the page into isolated state, but only if it has no -+ * kernel CPU mappings -+ */ -+ if (page_md->vmap_count == 0) -+ page_md->status = PAGE_ISOLATE_SET(page_md->status, 1); -+ break; -+ case PT_MAPPED: -+ /* Mark the page into isolated state. */ -+ page_md->status = PAGE_ISOLATE_SET(page_md->status, 1); -+ break; -+ case SPILL_IN_PROGRESS: -+ case ALLOCATE_IN_PROGRESS: -+ case FREE_IN_PROGRESS: -+ break; -+ case NOT_MOVABLE: -+ /* Opportunistically clear the movable property for these pages */ -+ __ClearPageMovable(p); -+ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); -+ break; -+ default: -+ /* State should always fall in one of the previous cases! -+ * Also notice that FREE_ISOLATED_IN_PROGRESS or -+ * FREE_PT_ISOLATED_IN_PROGRESS is impossible because -+ * that state only applies to pages that are already isolated. -+ */ -+ page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); -+ break; -+ } -+ -+ spin_unlock(&page_md->migrate_lock); -+ -+ /* If the page is still in the memory pool: try to remove it. This will fail -+ * if pool lock is taken which could mean page no longer exists in pool. -+ */ -+ if (status_mem_pool) { -+ if (!spin_trylock(&mem_pool->pool_lock)) { -+ atomic_dec(&mem_pool->isolation_in_progress_cnt); -+ preempt_enable(); -+ return false; -+ } -+ -+ spin_lock(&page_md->migrate_lock); -+ /* Check status again to ensure page has not been removed from memory pool. */ -+ if (PAGE_STATUS_GET(page_md->status) == MEM_POOL) { -+ page_md->status = PAGE_ISOLATE_SET(page_md->status, 1); -+ list_del_init(&p->lru); -+ mem_pool->cur_size--; -+ } -+ spin_unlock(&page_md->migrate_lock); -+ spin_unlock(&mem_pool->pool_lock); -+ atomic_dec(&mem_pool->isolation_in_progress_cnt); -+ preempt_enable(); -+ } -+ -+ return IS_PAGE_ISOLATED(page_md->status); ++ atomic_inc(&alloc->kernel_mappings); +} + +/** -+ * kbase_page_migrate - Migrate content of old page to new page provided. -+ * -+ * @mapping: Pointer to address_space struct associated with pages. -+ * @new_page: Pointer to the page struct of new page. -+ * @old_page: Pointer to the page struct of old page. -+ * @mode: Mode to determine if migration will be synchronised. -+ * -+ * Callback function for Linux to migrate the content of the old page to the -+ * new page provided. ++ * kbase_mem_phy_alloc_kernel_unmapped - Decrement kernel_mappings ++ * counter for a memory region to allow commit and flag changes + * -+ * Return: 0 on success, error code otherwise. ++ * @alloc: Pointer to physical pages tracking object + */ -+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) -+static int kbase_page_migrate(struct address_space *mapping, struct page *new_page, -+ struct page *old_page, enum migrate_mode mode) -+#else -+static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum migrate_mode mode) -+#endif ++static inline void ++kbase_mem_phy_alloc_kernel_unmapped(struct kbase_mem_phy_alloc *alloc) +{ -+ int err = 0; -+ bool status_mem_pool = false; -+ bool status_free_pt_isolated_in_progress = false; -+ bool status_free_isolated_in_progress = false; -+ bool status_pt_mapped = false; -+ bool status_mapped = false; -+ bool status_not_movable = false; -+ struct kbase_page_metadata *page_md = kbase_page_private(old_page); -+ struct kbase_device *kbdev = NULL; -+ -+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) -+ CSTD_UNUSED(mapping); -+#endif -+ CSTD_UNUSED(mode); -+ -+ if (!page_md || !IS_PAGE_MOVABLE(page_md->status)) -+ return -EINVAL; -+ -+ if (!spin_trylock(&page_md->migrate_lock)) -+ return -EAGAIN; -+ -+ if (WARN_ON(!IS_PAGE_ISOLATED(page_md->status))) { -+ spin_unlock(&page_md->migrate_lock); -+ return -EINVAL; -+ } -+ -+ switch (PAGE_STATUS_GET(page_md->status)) { -+ case MEM_POOL: -+ status_mem_pool = true; -+ kbdev = page_md->data.mem_pool.kbdev; -+ break; -+ case ALLOCATED_MAPPED: -+ status_mapped = true; -+ break; -+ case PT_MAPPED: -+ status_pt_mapped = true; -+ break; -+ case FREE_ISOLATED_IN_PROGRESS: -+ status_free_isolated_in_progress = true; -+ kbdev = page_md->data.free_isolated.kbdev; -+ break; -+ case FREE_PT_ISOLATED_IN_PROGRESS: -+ status_free_pt_isolated_in_progress = true; -+ kbdev = page_md->data.free_pt_isolated.kbdev; -+ break; -+ case NOT_MOVABLE: -+ status_not_movable = true; -+ break; -+ default: -+ /* State should always fall in one of the previous cases! */ -+ err = -EAGAIN; -+ break; -+ } -+ -+ spin_unlock(&page_md->migrate_lock); -+ -+ if (status_mem_pool || status_free_isolated_in_progress || -+ status_free_pt_isolated_in_progress) { -+ struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; -+ -+ kbase_free_page_metadata(kbdev, old_page, NULL); -+ __ClearPageMovable(old_page); -+ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); -+ put_page(old_page); -+ -+ /* Just free new page to avoid lock contention. */ -+ INIT_LIST_HEAD(&new_page->lru); -+ get_page(new_page); -+ set_page_private(new_page, 0); -+ kbase_free_page_later(kbdev, new_page); -+ queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); -+ } else if (status_not_movable) { -+ err = -EINVAL; -+ } else if (status_mapped) { -+ err = kbasep_migrate_page_allocated_mapped(old_page, new_page); -+ } else if (status_pt_mapped) { -+ err = kbasep_migrate_page_pt_mapped(old_page, new_page); -+ } -+ -+ /* While we want to preserve the movability of pages for which we return -+ * EAGAIN, according to the kernel docs, movable pages for which a critical -+ * error is returned are called putback on, which may not be what we -+ * expect. -+ */ -+ if (err < 0 && err != -EAGAIN) { -+ __ClearPageMovable(old_page); -+ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); -+ } -+ -+ return err; ++ WARN_ON(atomic_dec_return(&alloc->kernel_mappings) < 0); +} + +/** -+ * kbase_page_putback - Return isolated page back to kbase. ++ * kbase_mem_is_imported - Indicate whether a memory type is imported + * -+ * @p: Pointer of the page struct of page. ++ * @type: the memory type + * -+ * Callback function for Linux to return isolated page back to kbase. This -+ * will only be called for a page that has been isolated but failed to -+ * migrate. This function will put back the given page to the state it was -+ * in before it was isolated. ++ * Return: true if the memory type is imported, false otherwise + */ -+static void kbase_page_putback(struct page *p) ++static inline bool kbase_mem_is_imported(enum kbase_memory_type type) +{ -+ bool status_mem_pool = false; -+ bool status_free_isolated_in_progress = false; -+ bool status_free_pt_isolated_in_progress = false; -+ struct kbase_page_metadata *page_md = kbase_page_private(p); -+ struct kbase_device *kbdev = NULL; ++ return (type == KBASE_MEM_TYPE_IMPORTED_UMM) || ++ (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); ++} + -+ /* If we don't have page metadata, the page may not belong to the -+ * driver or may already have been freed, and there's nothing we can do -+ */ -+ if (!page_md) -+ return; ++void kbase_mem_kref_free(struct kref *kref); + -+ spin_lock(&page_md->migrate_lock); ++int kbase_mem_init(struct kbase_device *kbdev); ++void kbase_mem_halt(struct kbase_device *kbdev); ++void kbase_mem_term(struct kbase_device *kbdev); + -+ if (WARN_ON(!IS_PAGE_ISOLATED(page_md->status))) { -+ spin_unlock(&page_md->migrate_lock); -+ return; -+ } ++static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc) ++{ ++ kref_get(&alloc->kref); ++ return alloc; ++} + -+ switch (PAGE_STATUS_GET(page_md->status)) { -+ case MEM_POOL: -+ status_mem_pool = true; -+ kbdev = page_md->data.mem_pool.kbdev; -+ break; -+ case ALLOCATED_MAPPED: -+ page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); -+ break; -+ case PT_MAPPED: -+ case NOT_MOVABLE: -+ /* Pages should no longer be isolated if they are in a stable state -+ * and used by the driver. -+ */ -+ page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); -+ break; -+ case FREE_ISOLATED_IN_PROGRESS: -+ status_free_isolated_in_progress = true; -+ kbdev = page_md->data.free_isolated.kbdev; -+ break; -+ case FREE_PT_ISOLATED_IN_PROGRESS: -+ status_free_pt_isolated_in_progress = true; -+ kbdev = page_md->data.free_pt_isolated.kbdev; -+ break; -+ default: -+ /* State should always fall in one of the previous cases! */ -+ break; -+ } ++static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc) ++{ ++ kref_put(&alloc->kref, kbase_mem_kref_free); ++ return NULL; ++} + -+ spin_unlock(&page_md->migrate_lock); ++/** ++ * struct kbase_va_region - A GPU memory region, and attributes for CPU mappings ++ * ++ * @rblink: Node in a red-black tree of memory regions within the same zone of ++ * the GPU's virtual address space. ++ * @link: Links to neighboring items in a list of growable memory regions ++ * that triggered incremental rendering by growing too much. ++ * @rbtree: Backlink to the red-black tree of memory regions. ++ * @start_pfn: The Page Frame Number in GPU virtual address space. ++ * @user_data: The address of GPU command queue when VA region represents ++ * a ring buffer. ++ * @nr_pages: The size of the region in pages. ++ * @initial_commit: Initial commit, for aligning the start address and ++ * correctly growing KBASE_REG_TILER_ALIGN_TOP regions. ++ * @threshold_pages: If non-zero and the amount of memory committed to a region ++ * that can grow on page fault exceeds this number of pages ++ * then the driver switches to incremental rendering. ++ * @flags: Flags ++ * @extension: Number of pages allocated on page fault. ++ * @cpu_alloc: The physical memory we mmap to the CPU when mapping this region. ++ * @gpu_alloc: The physical memory we mmap to the GPU when mapping this region. ++ * @jit_node: Links to neighboring regions in the just-in-time memory pool. ++ * @jit_usage_id: The last just-in-time memory usage ID for this region. ++ * @jit_bin_id: The just-in-time memory bin this region came from. ++ * @va_refcnt: Number of users of this region. Protected by reg_lock. ++ * @no_user_free_count: Number of contexts that want to prevent the region ++ * from being freed by userspace. ++ * @heap_info_gpu_addr: Pointer to an object in GPU memory defining an end of ++ * an allocated region ++ * The object can be one of: ++ * - u32 value defining the size of the region ++ * - u64 pointer first unused byte in the region ++ * The interpretation of the object depends on ++ * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE flag in ++ * jit_info_flags - if it is set, the heap info object ++ * should be interpreted as size. ++ * @used_pages: The current estimate of the number of pages used, which in ++ * normal use is either: ++ * - the initial estimate == va_pages ++ * - the actual pages used, as found by a JIT usage report ++ * Note that since the value is calculated from GPU memory after a ++ * JIT usage report, at any point in time it is allowed to take a ++ * random value that is no greater than va_pages (e.g. it may be ++ * greater than gpu_alloc->nents) ++ */ ++struct kbase_va_region { ++ struct rb_node rblink; ++ struct list_head link; ++ struct rb_root *rbtree; ++ u64 start_pfn; ++ void *user_data; ++ size_t nr_pages; ++ size_t initial_commit; ++ size_t threshold_pages; + -+ /* If page was in a memory pool then just free it to avoid lock contention. The -+ * same is also true to status_free_pt_isolated_in_progress. -+ */ -+ if (status_mem_pool || status_free_isolated_in_progress || -+ status_free_pt_isolated_in_progress) { -+ __ClearPageMovable(p); -+ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); ++/* Free region */ ++#define KBASE_REG_FREE (1ul << 0) ++/* CPU write access */ ++#define KBASE_REG_CPU_WR (1ul << 1) ++/* GPU write access */ ++#define KBASE_REG_GPU_WR (1ul << 2) ++/* No eXecute flag */ ++#define KBASE_REG_GPU_NX (1ul << 3) ++/* Is CPU cached? */ ++#define KBASE_REG_CPU_CACHED (1ul << 4) ++/* Is GPU cached? ++ * Some components within the GPU might only be able to access memory that is ++ * GPU cacheable. Refer to the specific GPU implementation for more details. ++ */ ++#define KBASE_REG_GPU_CACHED (1ul << 5) + -+ if (!WARN_ON_ONCE(!kbdev)) { -+ struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; ++#define KBASE_REG_GROWABLE (1ul << 6) ++/* Can grow on pf? */ ++#define KBASE_REG_PF_GROW (1ul << 7) + -+ kbase_free_page_later(kbdev, p); -+ queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); -+ } -+ } -+} ++/* Allocation doesn't straddle the 4GB boundary in GPU virtual space */ ++#define KBASE_REG_GPU_VA_SAME_4GB_PAGE (1ul << 8) + -+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) -+static const struct movable_operations movable_ops = { -+ .isolate_page = kbase_page_isolate, -+ .migrate_page = kbase_page_migrate, -+ .putback_page = kbase_page_putback, -+}; ++/* inner shareable coherency */ ++#define KBASE_REG_SHARE_IN (1ul << 9) ++/* inner & outer shareable coherency */ ++#define KBASE_REG_SHARE_BOTH (1ul << 10) ++ ++#if MALI_USE_CSF ++/* Space for 8 different zones */ ++#define KBASE_REG_ZONE_BITS 3 +#else -+static const struct address_space_operations kbase_address_space_ops = { -+ .isolate_page = kbase_page_isolate, -+ .migratepage = kbase_page_migrate, -+ .putback_page = kbase_page_putback, -+}; ++/* Space for 4 different zones */ ++#define KBASE_REG_ZONE_BITS 2 +#endif + -+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) -+void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp) -+{ -+ mutex_lock(&kbdev->fw_load_lock); -+ -+ if (filp) { -+ filp->f_inode->i_mapping->a_ops = &kbase_address_space_ops; -+ -+ if (!kbdev->mem_migrate.inode) { -+ kbdev->mem_migrate.inode = filp->f_inode; -+ /* This reference count increment is balanced by iput() -+ * upon termination. -+ */ -+ atomic_inc(&filp->f_inode->i_count); -+ } else { -+ WARN_ON(kbdev->mem_migrate.inode != filp->f_inode); -+ } -+ } ++#define KBASE_REG_ZONE_MASK (((1 << KBASE_REG_ZONE_BITS) - 1ul) << 11) ++#define KBASE_REG_ZONE(x) (((x) & ((1 << KBASE_REG_ZONE_BITS) - 1ul)) << 11) ++#define KBASE_REG_ZONE_IDX(x) (((x) & KBASE_REG_ZONE_MASK) >> 11) + -+ mutex_unlock(&kbdev->fw_load_lock); -+} ++#if KBASE_REG_ZONE_MAX > (1 << KBASE_REG_ZONE_BITS) ++#error "Too many zones for the number of zone bits defined" +#endif + -+void kbase_mem_migrate_init(struct kbase_device *kbdev) -+{ -+ struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; ++/* GPU read access */ ++#define KBASE_REG_GPU_RD (1ul << 14) ++/* CPU read access */ ++#define KBASE_REG_CPU_RD (1ul << 15) + -+ if (kbase_page_migration_enabled < 0) -+ kbase_page_migration_enabled = 0; ++/* Index of chosen MEMATTR for this region (0..7) */ ++#define KBASE_REG_MEMATTR_MASK (7ul << 16) ++#define KBASE_REG_MEMATTR_INDEX(x) (((x) & 7) << 16) ++#define KBASE_REG_MEMATTR_VALUE(x) (((x) & KBASE_REG_MEMATTR_MASK) >> 16) + -+ spin_lock_init(&mem_migrate->free_pages_lock); -+ INIT_LIST_HEAD(&mem_migrate->free_pages_list); ++#define KBASE_REG_PROTECTED (1ul << 19) + -+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) -+ mem_migrate->inode = NULL; -+#endif -+ mem_migrate->free_pages_workq = -+ alloc_workqueue("free_pages_workq", WQ_UNBOUND | WQ_MEM_RECLAIM, 1); -+ INIT_WORK(&mem_migrate->free_pages_work, kbase_free_pages_worker); -+} ++/* Region belongs to a shrinker. ++ * ++ * This can either mean that it is part of the JIT/Ephemeral or tiler heap ++ * shrinker paths. Should be removed only after making sure that there are ++ * no references remaining to it in these paths, as it may cause the physical ++ * backing of the region to disappear during use. ++ */ ++#define KBASE_REG_DONT_NEED (1ul << 20) + -+void kbase_mem_migrate_term(struct kbase_device *kbdev) -+{ -+ struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; ++/* Imported buffer is padded? */ ++#define KBASE_REG_IMPORT_PAD (1ul << 21) + -+ if (mem_migrate->free_pages_workq) -+ destroy_workqueue(mem_migrate->free_pages_workq); -+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) -+ iput(mem_migrate->inode); -+#endif -+} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h -new file mode 100644 -index 000000000..76bbc999e ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h -@@ -0,0 +1,108 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* ++#if MALI_USE_CSF ++/* CSF event memory */ ++#define KBASE_REG_CSF_EVENT (1ul << 22) ++#else ++/* Bit 22 is reserved. + * -+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. ++ * Do not remove, use the next unreserved bit for new flags ++ */ ++#define KBASE_REG_RESERVED_BIT_22 (1ul << 22) ++#endif ++ ++#if !MALI_USE_CSF ++/* The top of the initial commit is aligned to extension pages. ++ * Extent must be a power of 2 ++ */ ++#define KBASE_REG_TILER_ALIGN_TOP (1ul << 23) ++#else ++/* Bit 23 is reserved. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * Do not remove, use the next unreserved bit for new flags ++ */ ++#define KBASE_REG_RESERVED_BIT_23 (1ul << 23) ++#endif /* !MALI_USE_CSF */ ++ ++/* Bit 24 is currently unused and is available for use for a new flag */ ++ ++/* Memory has permanent kernel side mapping */ ++#define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25) ++ ++/* GPU VA region has been freed by the userspace, but still remains allocated ++ * due to the reference held by CPU mappings created on the GPU VA region. + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * A region with this flag set has had kbase_gpu_munmap() called on it, but can ++ * still be looked-up in the region tracker as a non-free region. Hence must ++ * not create or update any more GPU mappings on such regions because they will ++ * not be unmapped when the region is finally destroyed. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * Since such regions are still present in the region tracker, new allocations ++ * attempted with BASE_MEM_SAME_VA might fail if their address intersects with ++ * a region with this flag set. + * ++ * In addition, this flag indicates the gpu_alloc member might no longer valid ++ * e.g. in infinite cache simulation. + */ ++#define KBASE_REG_VA_FREED (1ul << 26) + -+/** -+ * DOC: Base kernel page migration implementation. ++/* If set, the heap info address points to a u32 holding the used size in bytes; ++ * otherwise it points to a u64 holding the lowest address of unused memory. + */ ++#define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27) + -+#define PAGE_STATUS_MASK ((u8)0x3F) -+#define PAGE_STATUS_GET(status) (status & PAGE_STATUS_MASK) -+#define PAGE_STATUS_SET(status, value) ((status & ~PAGE_STATUS_MASK) | (value & PAGE_STATUS_MASK)) ++/* Allocation is actively used for JIT memory */ ++#define KBASE_REG_ACTIVE_JIT_ALLOC (1ul << 28) + -+#define PAGE_ISOLATE_SHIFT (7) -+#define PAGE_ISOLATE_MASK ((u8)1 << PAGE_ISOLATE_SHIFT) -+#define PAGE_ISOLATE_SET(status, value) \ -+ ((status & ~PAGE_ISOLATE_MASK) | (value << PAGE_ISOLATE_SHIFT)) -+#define IS_PAGE_ISOLATED(status) ((bool)(status & PAGE_ISOLATE_MASK)) ++#if MALI_USE_CSF ++/* This flag only applies to allocations in the EXEC_FIXED_VA and FIXED_VA ++ * memory zones, and it determines whether they were created with a fixed ++ * GPU VA address requested by the user. ++ */ ++#define KBASE_REG_FIXED_ADDRESS (1ul << 29) ++#else ++#define KBASE_REG_RESERVED_BIT_29 (1ul << 29) ++#endif + -+#define PAGE_MOVABLE_SHIFT (6) -+#define PAGE_MOVABLE_MASK ((u8)1 << PAGE_MOVABLE_SHIFT) -+#define PAGE_MOVABLE_CLEAR(status) ((status) & ~PAGE_MOVABLE_MASK) -+#define PAGE_MOVABLE_SET(status) (status | PAGE_MOVABLE_MASK) ++#define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) + -+#define IS_PAGE_MOVABLE(status) ((bool)(status & PAGE_MOVABLE_MASK)) ++#define KBASE_REG_ZONE_CUSTOM_VA KBASE_REG_ZONE(1) ++#define KBASE_REG_ZONE_CUSTOM_VA_BASE (0x100000000ULL >> PAGE_SHIFT) + -+/* Global integer used to determine if module parameter value has been -+ * provided and if page migration feature is enabled. ++#if MALI_USE_CSF ++/* only used with 32-bit clients */ ++/* On a 32bit platform, custom VA should be wired from 4GB to 2^(43). + */ -+extern int kbase_page_migration_enabled; -+ -+/** -+ * kbase_alloc_page_metadata - Allocate and initialize page metadata -+ * @kbdev: Pointer to kbase device. -+ * @p: Page to assign metadata to. -+ * @dma_addr: DMA address mapped to paged. -+ * @group_id: Memory group ID associated with the entity that is -+ * allocating the page metadata. -+ * -+ * This will allocate memory for the page's metadata, initialize it and -+ * assign a reference to the page's private field. Importantly, once -+ * the metadata is set and ready this function will mark the page as -+ * movable. -+ * -+ * Return: true if successful or false otherwise. ++#define KBASE_REG_ZONE_CUSTOM_VA_SIZE \ ++ (((1ULL << 43) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) ++#else ++/* only used with 32-bit clients */ ++/* On a 32bit platform, custom VA should be wired from 4GB to the VA limit of the ++ * GPU. Unfortunately, the Linux mmap() interface limits us to 2^32 pages (2^44 ++ * bytes, see mmap64 man page for reference). So we put the default limit to the ++ * maximum possible on Linux and shrink it down, if required by the GPU, during ++ * initialization. + */ -+bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr, -+ u8 group_id); ++#define KBASE_REG_ZONE_CUSTOM_VA_SIZE \ ++ (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) ++/* end 32-bit clients only */ ++#endif + -+/** -+ * kbase_free_page_later - Defer freeing of given page. -+ * @kbdev: Pointer to kbase device -+ * @p: Page to free -+ * -+ * This will add given page to a list of pages which will be freed at -+ * a later time. ++/* The starting address and size of the GPU-executable zone are dynamic ++ * and depend on the platform and the number of pages requested by the ++ * user process, with an upper limit of 4 GB. + */ -+void kbase_free_page_later(struct kbase_device *kbdev, struct page *p); ++#define KBASE_REG_ZONE_EXEC_VA KBASE_REG_ZONE(2) ++#define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */ + -+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) -+/* -+ * kbase_mem_migrate_set_address_space_ops - Set address space operations -+ * -+ * @kbdev: Pointer to object representing an instance of GPU platform device. -+ * @filp: Pointer to the struct file corresponding to device file -+ * /dev/malixx instance, passed to the file's open method. -+ * -+ * Assign address space operations to the given file struct @filp and -+ * add a reference to @kbdev. ++#if MALI_USE_CSF ++#define KBASE_REG_ZONE_MCU_SHARED KBASE_REG_ZONE(3) ++#define KBASE_REG_ZONE_MCU_SHARED_BASE (0x04000000ULL >> PAGE_SHIFT) ++#define KBASE_REG_ZONE_MCU_SHARED_SIZE (((0x08000000ULL) >> PAGE_SHIFT) - \ ++ KBASE_REG_ZONE_MCU_SHARED_BASE) ++ ++/* For CSF GPUs, the EXEC_VA zone is always 4GB in size, and starts at 2^47 for 64-bit ++ * clients, and 2^43 for 32-bit clients. + */ -+void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp); -+#endif ++#define KBASE_REG_ZONE_EXEC_VA_BASE_64 ((1ULL << 47) >> PAGE_SHIFT) ++#define KBASE_REG_ZONE_EXEC_VA_BASE_32 ((1ULL << 43) >> PAGE_SHIFT) ++#define KBASE_REG_ZONE_EXEC_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES + -+/* -+ * kbase_mem_migrate_init - Initialise kbase page migration -+ * -+ * @kbdev: Pointer to kbase device -+ * -+ * Enables page migration by default based on GPU and setup work queue to -+ * defer freeing pages during page migration callbacks. ++/* Executable zone supporting FIXED/FIXABLE allocations. ++ * It is always 4GB in size. + */ -+void kbase_mem_migrate_init(struct kbase_device *kbdev); + -+/* -+ * kbase_mem_migrate_term - Terminate kbase page migration -+ * -+ * @kbdev: Pointer to kbase device -+ * -+ * This will flush any work left to free pages from page migration -+ * and destroy workqueue associated. ++#define KBASE_REG_ZONE_EXEC_FIXED_VA KBASE_REG_ZONE(4) ++#define KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ++ ++/* Non-executable zone supporting FIXED/FIXABLE allocations. ++ * It extends from (2^47) up to (2^48)-1, for 64-bit userspace clients, and from ++ * (2^43) up to (2^44)-1 for 32-bit userspace clients. + */ -+void kbase_mem_migrate_term(struct kbase_device *kbdev); -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c -new file mode 100644 -index 000000000..fa8f34d86 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c -@@ -0,0 +1,1029 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * ++#define KBASE_REG_ZONE_FIXED_VA KBASE_REG_ZONE(5) ++ ++/* Again - 32-bit userspace cannot map addresses beyond 2^44, but 64-bit can - and so ++ * the end of the FIXED_VA zone for 64-bit clients is (2^48)-1. + */ ++#define KBASE_REG_ZONE_FIXED_VA_END_64 ((1ULL << 48) >> PAGE_SHIFT) ++#define KBASE_REG_ZONE_FIXED_VA_END_32 ((1ULL << 44) >> PAGE_SHIFT) + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE -+#include -+#else -+#include +#endif + -+#define pool_dbg(pool, format, ...) \ -+ dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format, \ -+ (pool->next_pool) ? "kctx" : "kbdev", \ -+ kbase_mem_pool_size(pool), \ -+ kbase_mem_pool_max_size(pool), \ -+ ##__VA_ARGS__) ++ unsigned long flags; ++ size_t extension; ++ struct kbase_mem_phy_alloc *cpu_alloc; ++ struct kbase_mem_phy_alloc *gpu_alloc; ++ struct list_head jit_node; ++ u16 jit_usage_id; ++ u8 jit_bin_id; + -+#define NOT_DIRTY false -+#define NOT_RECLAIMED false ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ /* Pointer to an object in GPU memory defining an end of an allocated ++ * region ++ * ++ * The object can be one of: ++ * - u32 value defining the size of the region ++ * - u64 pointer first unused byte in the region ++ * ++ * The interpretation of the object depends on ++ * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE flag in jit_info_flags - if it is ++ * set, the heap info object should be interpreted as size. ++ */ ++ u64 heap_info_gpu_addr; ++ ++ /* The current estimate of the number of pages used, which in normal ++ * use is either: ++ * - the initial estimate == va_pages ++ * - the actual pages used, as found by a JIT usage report ++ * ++ * Note that since the value is calculated from GPU memory after a JIT ++ * usage report, at any point in time it is allowed to take a random ++ * value that is no greater than va_pages (e.g. it may be greater than ++ * gpu_alloc->nents) ++ */ ++ size_t used_pages; ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ ++ kbase_refcount_t va_refcnt; ++ atomic_t no_user_free_count; ++}; + +/** -+ * can_alloc_page() - Check if the current thread can allocate a physical page -+ * -+ * @pool: Pointer to the memory pool. -+ * @page_owner: Pointer to the task/process that created the Kbase context -+ * for which a page needs to be allocated. It can be NULL if -+ * the page won't be associated with Kbase context. -+ * @alloc_from_kthread: Flag indicating that the current thread is a kernel thread. -+ * -+ * This function checks if the current thread is a kernel thread and can make a -+ * request to kernel to allocate a physical page. If the kernel thread is allocating -+ * a page for the Kbase context and the process that created the context is exiting -+ * or is being killed, then there is no point in doing a page allocation. -+ * -+ * The check done by the function is particularly helpful when the system is running -+ * low on memory. When a page is allocated from the context of a kernel thread, OoM -+ * killer doesn't consider the kernel thread for killing and kernel keeps retrying -+ * to allocate the page as long as the OoM killer is able to kill processes. -+ * The check allows kernel thread to quickly exit the page allocation loop once OoM -+ * killer has initiated the killing of @page_owner, thereby unblocking the context -+ * termination for @page_owner and freeing of GPU memory allocated by it. This helps -+ * in preventing the kernel panic and also limits the number of innocent processes -+ * that get killed. ++ * kbase_is_ctx_reg_zone - determine whether a KBASE_REG_ZONE_<...> is for a ++ * context or for a device ++ * @zone_bits: A KBASE_REG_ZONE_<...> to query + * -+ * Return: true if the page can be allocated otherwise false. ++ * Return: True if the zone for @zone_bits is a context zone, False otherwise + */ -+static inline bool can_alloc_page(struct kbase_mem_pool *pool, struct task_struct *page_owner, -+ const bool alloc_from_kthread) ++static inline bool kbase_is_ctx_reg_zone(unsigned long zone_bits) +{ -+ if (likely(!alloc_from_kthread || !page_owner)) -+ return true; -+ -+ if ((page_owner->flags & PF_EXITING) || fatal_signal_pending(page_owner)) { -+ dev_info(pool->kbdev->dev, "%s : Process %s/%d exiting", -+ __func__, page_owner->comm, task_pid_nr(page_owner)); -+ return false; -+ } -+ -+ return true; ++ WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits); ++ return (zone_bits == KBASE_REG_ZONE_SAME_VA || ++#if MALI_USE_CSF ++ zone_bits == KBASE_REG_ZONE_EXEC_FIXED_VA || zone_bits == KBASE_REG_ZONE_FIXED_VA || ++#endif ++ zone_bits == KBASE_REG_ZONE_CUSTOM_VA || zone_bits == KBASE_REG_ZONE_EXEC_VA); +} + -+static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool) -+{ -+ ssize_t max_size = kbase_mem_pool_max_size(pool); -+ ssize_t cur_size = kbase_mem_pool_size(pool); -+ -+ return max(max_size - cur_size, (ssize_t)0); -+} ++/* Special marker for failed JIT allocations that still must be marked as ++ * in-use ++ */ ++#define KBASE_RESERVED_REG_JIT_ALLOC ((struct kbase_va_region *)-1) + -+static bool kbase_mem_pool_is_full(struct kbase_mem_pool *pool) ++static inline bool kbase_is_region_free(struct kbase_va_region *reg) +{ -+ return kbase_mem_pool_size(pool) >= kbase_mem_pool_max_size(pool); ++ return (!reg || reg->flags & KBASE_REG_FREE); +} + -+static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool) ++static inline bool kbase_is_region_invalid(struct kbase_va_region *reg) +{ -+ return kbase_mem_pool_size(pool) == 0; ++ return (!reg || reg->flags & KBASE_REG_VA_FREED); +} + -+static bool set_pool_new_page_metadata(struct kbase_mem_pool *pool, struct page *p, -+ struct list_head *page_list, size_t *list_size) ++static inline bool kbase_is_region_invalid_or_free(struct kbase_va_region *reg) +{ -+ struct kbase_page_metadata *page_md = kbase_page_private(p); -+ bool not_movable = false; -+ -+ lockdep_assert_held(&pool->pool_lock); -+ -+ /* Free the page instead of adding it to the pool if it's not movable. -+ * Only update page status and add the page to the memory pool if -+ * it is not isolated. ++ /* Possibly not all functions that find regions would be using this ++ * helper, so they need to be checked when maintaining this function. + */ -+ spin_lock(&page_md->migrate_lock); -+ if (PAGE_STATUS_GET(page_md->status) == (u8)NOT_MOVABLE) { -+ not_movable = true; -+ } else if (!WARN_ON_ONCE(IS_PAGE_ISOLATED(page_md->status))) { -+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)MEM_POOL); -+ page_md->data.mem_pool.pool = pool; -+ page_md->data.mem_pool.kbdev = pool->kbdev; -+ list_add(&p->lru, page_list); -+ (*list_size)++; -+ } -+ spin_unlock(&page_md->migrate_lock); -+ -+ if (not_movable) { -+ kbase_free_page_later(pool->kbdev, p); -+ pool_dbg(pool, "skipping a not movable page\n"); -+ } -+ -+ return not_movable; ++ return (kbase_is_region_invalid(reg) || kbase_is_region_free(reg)); +} + -+static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool, -+ struct page *p) ++/** ++ * kbase_is_region_shrinkable - Check if a region is "shrinkable". ++ * A shrinkable regions is a region for which its backing pages (reg->gpu_alloc->pages) ++ * can be freed at any point, even though the kbase_va_region structure itself ++ * may have been refcounted. ++ * Regions that aren't on a shrinker, but could be shrunk at any point in future ++ * without warning are still considered "shrinkable" (e.g. Active JIT allocs) ++ * ++ * @reg: Pointer to region ++ * ++ * Return: true if the region is "shrinkable", false if not. ++ */ ++static inline bool kbase_is_region_shrinkable(struct kbase_va_region *reg) +{ -+ bool queue_work_to_free = false; -+ -+ lockdep_assert_held(&pool->pool_lock); -+ -+ if (!pool->order && kbase_page_migration_enabled) { -+ if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size)) -+ queue_work_to_free = true; -+ } else { -+ list_add(&p->lru, &pool->page_list); -+ pool->cur_size++; -+ } -+ -+ if (queue_work_to_free) { -+ struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate; -+ -+ queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); -+ } -+ -+ pool_dbg(pool, "added page\n"); ++ return (reg->flags & KBASE_REG_DONT_NEED) || (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC); +} + -+static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p) ++void kbase_remove_va_region(struct kbase_device *kbdev, ++ struct kbase_va_region *reg); ++static inline void kbase_region_refcnt_free(struct kbase_device *kbdev, ++ struct kbase_va_region *reg) +{ -+ kbase_mem_pool_lock(pool); -+ kbase_mem_pool_add_locked(pool, p); -+ kbase_mem_pool_unlock(pool); ++ /* If region was mapped then remove va region*/ ++ if (reg->start_pfn) ++ kbase_remove_va_region(kbdev, reg); ++ ++ /* To detect use-after-free in debug builds */ ++ KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE); ++ kfree(reg); +} + -+static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool, -+ struct list_head *page_list, size_t nr_pages) ++static inline struct kbase_va_region *kbase_va_region_alloc_get( ++ struct kbase_context *kctx, struct kbase_va_region *region) +{ -+ bool queue_work_to_free = false; -+ -+ lockdep_assert_held(&pool->pool_lock); ++ WARN_ON(!kbase_refcount_read(®ion->va_refcnt)); ++ WARN_ON(kbase_refcount_read(®ion->va_refcnt) == INT_MAX); + -+ if (!pool->order && kbase_page_migration_enabled) { -+ struct page *p, *tmp; ++ dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %pK\n", ++ kbase_refcount_read(®ion->va_refcnt), (void *)region); ++ kbase_refcount_inc(®ion->va_refcnt); + -+ list_for_each_entry_safe(p, tmp, page_list, lru) { -+ list_del_init(&p->lru); -+ if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size)) -+ queue_work_to_free = true; -+ } -+ } else { -+ list_splice(page_list, &pool->page_list); -+ pool->cur_size += nr_pages; -+ } ++ return region; ++} + -+ if (queue_work_to_free) { -+ struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate; ++static inline struct kbase_va_region *kbase_va_region_alloc_put( ++ struct kbase_context *kctx, struct kbase_va_region *region) ++{ ++ WARN_ON(kbase_refcount_read(®ion->va_refcnt) <= 0); ++ WARN_ON(region->flags & KBASE_REG_FREE); + -+ queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); -+ } ++ if (kbase_refcount_dec_and_test(®ion->va_refcnt)) ++ kbase_region_refcnt_free(kctx->kbdev, region); ++ else ++ dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %pK\n", ++ kbase_refcount_read(®ion->va_refcnt), (void *)region); + -+ pool_dbg(pool, "added %zu pages\n", nr_pages); ++ return NULL; +} + -+static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool, -+ struct list_head *page_list, size_t nr_pages) ++/** ++ * kbase_va_region_is_no_user_free - Check if user free is forbidden for the region. ++ * A region that must not be freed by userspace indicates that it is owned by some other ++ * kbase subsystem, for example tiler heaps, JIT memory or CSF queues. ++ * Such regions must not be shrunk (i.e. have their backing pages freed), except by the ++ * current owner. ++ * Hence, callers cannot rely on this check alone to determine if a region might be shrunk ++ * by any part of kbase. Instead they should use kbase_is_region_shrinkable(). ++ * ++ * @region: Pointer to region. ++ * ++ * Return: true if userspace cannot free the region, false if userspace can free the region. ++ */ ++static inline bool kbase_va_region_is_no_user_free(struct kbase_va_region *region) +{ -+ kbase_mem_pool_lock(pool); -+ kbase_mem_pool_add_list_locked(pool, page_list, nr_pages); -+ kbase_mem_pool_unlock(pool); ++ return atomic_read(®ion->no_user_free_count) > 0; +} + -+static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool, -+ enum kbase_page_status status) ++/** ++ * kbase_va_region_no_user_free_inc - Increment "no user free" count for a region. ++ * Calling this function will prevent the region to be shrunk by parts of kbase that ++ * don't own the region (as long as the count stays above zero). Refer to ++ * kbase_va_region_is_no_user_free() for more information. ++ * ++ * @region: Pointer to region (not shrinkable). ++ * ++ * Return: the pointer to the region passed as argument. ++ */ ++static inline void kbase_va_region_no_user_free_inc(struct kbase_va_region *region) +{ -+ struct page *p; -+ -+ lockdep_assert_held(&pool->pool_lock); -+ -+ if (kbase_mem_pool_is_empty(pool)) -+ return NULL; -+ -+ p = list_first_entry(&pool->page_list, struct page, lru); -+ -+ if (!pool->order && kbase_page_migration_enabled) { -+ struct kbase_page_metadata *page_md = kbase_page_private(p); -+ -+ spin_lock(&page_md->migrate_lock); -+ WARN_ON(PAGE_STATUS_GET(page_md->status) != (u8)MEM_POOL); -+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)status); -+ spin_unlock(&page_md->migrate_lock); -+ } ++ WARN_ON(kbase_is_region_shrinkable(region)); ++ WARN_ON(atomic_read(®ion->no_user_free_count) == INT_MAX); + -+ list_del_init(&p->lru); -+ pool->cur_size--; ++ /* non-atomic as kctx->reg_lock is held */ ++ atomic_inc(®ion->no_user_free_count); ++} + -+ pool_dbg(pool, "removed page\n"); ++/** ++ * kbase_va_region_no_user_free_dec - Decrement "no user free" count for a region. ++ * ++ * @region: Pointer to region (not shrinkable). ++ */ ++static inline void kbase_va_region_no_user_free_dec(struct kbase_va_region *region) ++{ ++ WARN_ON(!kbase_va_region_is_no_user_free(region)); + -+ return p; ++ atomic_dec(®ion->no_user_free_count); +} + -+static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool, -+ enum kbase_page_status status) ++/* Common functions */ ++static inline struct tagged_addr *kbase_get_cpu_phy_pages( ++ struct kbase_va_region *reg) +{ -+ struct page *p; -+ -+ kbase_mem_pool_lock(pool); -+ p = kbase_mem_pool_remove_locked(pool, status); -+ kbase_mem_pool_unlock(pool); ++ KBASE_DEBUG_ASSERT(reg); ++ KBASE_DEBUG_ASSERT(reg->cpu_alloc); ++ KBASE_DEBUG_ASSERT(reg->gpu_alloc); ++ KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); + -+ return p; ++ return reg->cpu_alloc->pages; +} + -+static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool, -+ struct page *p) ++static inline struct tagged_addr *kbase_get_gpu_phy_pages( ++ struct kbase_va_region *reg) +{ -+ struct device *dev = pool->kbdev->dev; -+ dma_addr_t dma_addr = pool->order ? kbase_dma_addr_as_priv(p) : kbase_dma_addr(p); ++ KBASE_DEBUG_ASSERT(reg); ++ KBASE_DEBUG_ASSERT(reg->cpu_alloc); ++ KBASE_DEBUG_ASSERT(reg->gpu_alloc); ++ KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); + -+ dma_sync_single_for_device(dev, dma_addr, (PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL); ++ return reg->gpu_alloc->pages; +} + -+static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool, -+ struct page *p) ++static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg) +{ -+ int i; ++ KBASE_DEBUG_ASSERT(reg); ++ /* if no alloc object the backed size naturally is 0 */ ++ if (!reg->cpu_alloc) ++ return 0; + -+ for (i = 0; i < (1U << pool->order); i++) -+ clear_highpage(p+i); ++ KBASE_DEBUG_ASSERT(reg->cpu_alloc); ++ KBASE_DEBUG_ASSERT(reg->gpu_alloc); ++ KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); + -+ kbase_mem_pool_sync_page(pool, p); ++ return reg->cpu_alloc->nents; +} + -+static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool, -+ struct page *p) ++#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */ ++ ++static inline struct kbase_mem_phy_alloc *kbase_alloc_create( ++ struct kbase_context *kctx, size_t nr_pages, ++ enum kbase_memory_type type, int group_id) +{ -+ /* Zero page before spilling */ -+ kbase_mem_pool_zero_page(next_pool, p); ++ struct kbase_mem_phy_alloc *alloc; ++ size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages; ++ size_t per_page_size = sizeof(*alloc->pages); + -+ kbase_mem_pool_add(next_pool, p); -+} ++ /* Imported pages may have page private data already in use */ ++ if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { ++ alloc_size += nr_pages * ++ sizeof(*alloc->imported.user_buf.dma_addrs); ++ per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs); ++ } + -+struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) -+{ -+ struct page *p; -+ gfp_t gfp = __GFP_ZERO; -+ struct kbase_device *const kbdev = pool->kbdev; -+ struct device *const dev = kbdev->dev; -+ dma_addr_t dma_addr; -+ int i; ++ /* ++ * Prevent nr_pages*per_page_size + sizeof(*alloc) from ++ * wrapping around. ++ */ ++ if (nr_pages > ((((size_t) -1) - sizeof(*alloc)) ++ / per_page_size)) ++ return ERR_PTR(-ENOMEM); + -+ /* don't warn on higher order failures */ -+ if (pool->order) -+ gfp |= GFP_HIGHUSER | __GFP_NOWARN; ++ /* Allocate based on the size to reduce internal fragmentation of vmem */ ++ if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD) ++ alloc = vzalloc(alloc_size); + else -+ gfp |= kbase_page_migration_enabled ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER; -+ -+ p = kbdev->mgm_dev->ops.mgm_alloc_page(kbdev->mgm_dev, -+ pool->group_id, gfp, pool->order); -+ if (!p) -+ return NULL; ++ alloc = kzalloc(alloc_size, GFP_KERNEL); + -+ dma_addr = dma_map_page(dev, p, 0, (PAGE_SIZE << pool->order), -+ DMA_BIDIRECTIONAL); ++ if (!alloc) ++ return ERR_PTR(-ENOMEM); + -+ if (dma_mapping_error(dev, dma_addr)) { -+ kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, -+ pool->group_id, p, pool->order); -+ return NULL; ++ if (type == KBASE_MEM_TYPE_NATIVE) { ++ alloc->imported.native.nr_struct_pages = ++ (alloc_size + (PAGE_SIZE - 1)) >> PAGE_SHIFT; ++ kbase_process_page_usage_inc(kctx, ++ alloc->imported.native.nr_struct_pages); + } + -+ /* Setup page metadata for 4KB pages when page migration is enabled */ -+ if (!pool->order && kbase_page_migration_enabled) { -+ INIT_LIST_HEAD(&p->lru); -+ if (!kbase_alloc_page_metadata(kbdev, p, dma_addr, pool->group_id)) { -+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); -+ kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, pool->group_id, p, -+ pool->order); -+ return NULL; -+ } -+ } else { -+ WARN_ON(dma_addr != page_to_phys(p)); -+ for (i = 0; i < (1u << pool->order); i++) -+ kbase_set_dma_addr_as_priv(p + i, dma_addr + PAGE_SIZE * i); -+ } ++ /* Store allocation method */ ++ if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD) ++ alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE; + -+ return p; -+} ++ kref_init(&alloc->kref); ++ atomic_set(&alloc->gpu_mappings, 0); ++ atomic_set(&alloc->kernel_mappings, 0); ++ alloc->nents = 0; ++ alloc->pages = (void *)(alloc + 1); ++ INIT_LIST_HEAD(&alloc->mappings); ++ alloc->type = type; ++ alloc->group_id = group_id; + -+static void enqueue_free_pool_pages_work(struct kbase_mem_pool *pool) -+{ -+ struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate; ++ if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) ++ alloc->imported.user_buf.dma_addrs = ++ (void *) (alloc->pages + nr_pages); + -+ if (!pool->order && kbase_page_migration_enabled) -+ queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); ++ return alloc; +} + -+void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p) ++static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, ++ struct kbase_context *kctx, int group_id) +{ -+ struct kbase_device *kbdev; -+ -+ if (WARN_ON(!pool)) -+ return; -+ if (WARN_ON(!p)) -+ return; ++ KBASE_DEBUG_ASSERT(reg); ++ KBASE_DEBUG_ASSERT(!reg->cpu_alloc); ++ KBASE_DEBUG_ASSERT(!reg->gpu_alloc); ++ KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE); + -+ kbdev = pool->kbdev; ++ reg->cpu_alloc = kbase_alloc_create(kctx, reg->nr_pages, ++ KBASE_MEM_TYPE_NATIVE, group_id); ++ if (IS_ERR(reg->cpu_alloc)) ++ return PTR_ERR(reg->cpu_alloc); ++ else if (!reg->cpu_alloc) ++ return -ENOMEM; + -+ if (!pool->order && kbase_page_migration_enabled) { -+ kbase_free_page_later(kbdev, p); -+ pool_dbg(pool, "page to be freed to kernel later\n"); ++ reg->cpu_alloc->imported.native.kctx = kctx; ++ if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE) ++ && (reg->flags & KBASE_REG_CPU_CACHED)) { ++ reg->gpu_alloc = kbase_alloc_create(kctx, reg->nr_pages, ++ KBASE_MEM_TYPE_NATIVE, group_id); ++ if (IS_ERR_OR_NULL(reg->gpu_alloc)) { ++ kbase_mem_phy_alloc_put(reg->cpu_alloc); ++ return -ENOMEM; ++ } ++ reg->gpu_alloc->imported.native.kctx = kctx; + } else { -+ int i; -+ dma_addr_t dma_addr = kbase_dma_addr_as_priv(p); -+ -+ for (i = 0; i < (1u << pool->order); i++) -+ kbase_clear_dma_addr_as_priv(p + i); ++ reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); ++ } + -+ dma_unmap_page(kbdev->dev, dma_addr, (PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL); ++ mutex_lock(&kctx->jit_evict_lock); ++ INIT_LIST_HEAD(®->cpu_alloc->evict_node); ++ INIT_LIST_HEAD(®->gpu_alloc->evict_node); ++ mutex_unlock(&kctx->jit_evict_lock); + -+ kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, pool->group_id, p, pool->order); ++ reg->flags &= ~KBASE_REG_FREE; + -+ pool_dbg(pool, "freed page to kernel\n"); -+ } ++ return 0; +} + -+static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool, -+ size_t nr_to_shrink) -+{ -+ struct page *p; -+ size_t i; ++/* ++ * Max size for kbdev memory pool (in pages) ++ */ ++#define KBASE_MEM_POOL_MAX_SIZE_KBDEV (SZ_64M >> PAGE_SHIFT) + -+ lockdep_assert_held(&pool->pool_lock); ++/* ++ * Max size for kctx memory pool (in pages) ++ */ ++#define KBASE_MEM_POOL_MAX_SIZE_KCTX (SZ_64M >> PAGE_SHIFT) + -+ for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) { -+ p = kbase_mem_pool_remove_locked(pool, FREE_IN_PROGRESS); -+ kbase_mem_pool_free_page(pool, p); -+ } ++/* ++ * The order required for a 2MB page allocation (2^order * 4KB = 2MB) ++ */ ++#define KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER 9 + -+ /* Freeing of pages will be deferred when page migration is enabled. */ -+ enqueue_free_pool_pages_work(pool); ++/* ++ * The order required for a 4KB page allocation ++ */ ++#define KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER 0 + -+ return i; ++/** ++ * kbase_mem_pool_config_set_max_size - Set maximum number of free pages in ++ * initial configuration of a memory pool ++ * ++ * @config: Initial configuration for a physical memory pool ++ * @max_size: Maximum number of free pages that a pool created from ++ * @config can hold ++ */ ++static inline void kbase_mem_pool_config_set_max_size( ++ struct kbase_mem_pool_config *const config, size_t const max_size) ++{ ++ WRITE_ONCE(config->max_size, max_size); +} + -+static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool, -+ size_t nr_to_shrink) ++/** ++ * kbase_mem_pool_config_get_max_size - Get maximum number of free pages from ++ * initial configuration of a memory pool ++ * ++ * @config: Initial configuration for a physical memory pool ++ * ++ * Return: Maximum number of free pages that a pool created from @config ++ * can hold ++ */ ++static inline size_t kbase_mem_pool_config_get_max_size( ++ const struct kbase_mem_pool_config *const config) +{ -+ size_t nr_freed; ++ return READ_ONCE(config->max_size); ++} + -+ kbase_mem_pool_lock(pool); -+ nr_freed = kbase_mem_pool_shrink_locked(pool, nr_to_shrink); -+ kbase_mem_pool_unlock(pool); ++/** ++ * kbase_mem_pool_init - Create a memory pool for a kbase device ++ * @pool: Memory pool to initialize ++ * @config: Initial configuration for the memory pool ++ * @order: Page order for physical page size (order=0=>4kB, order=9=>2MB) ++ * @group_id: A memory group ID to be passed to a platform-specific ++ * memory group manager, if present. ++ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * @kbdev: Kbase device where memory is used ++ * @next_pool: Pointer to the next pool or NULL. ++ * ++ * Allocations from @pool are in whole pages. Each @pool has a free list where ++ * pages can be quickly allocated from. The free list is initially empty and ++ * filled whenever pages are freed back to the pool. The number of free pages ++ * in the pool will in general not exceed @max_size, but the pool may in ++ * certain corner cases grow above @max_size. ++ * ++ * If @next_pool is not NULL, we will allocate from @next_pool before going to ++ * the memory group manager. Similarly pages can spill over to @next_pool when ++ * @pool is full. Pages are zeroed before they spill over to another pool, to ++ * prevent leaking information between applications. ++ * ++ * A shrinker is registered so that Linux mm can reclaim pages from the pool as ++ * needed. ++ * ++ * Return: 0 on success, negative -errno on error ++ */ ++int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool_config *config, ++ unsigned int order, int group_id, struct kbase_device *kbdev, ++ struct kbase_mem_pool *next_pool); + -+ return nr_freed; -+} ++/** ++ * kbase_mem_pool_term - Destroy a memory pool ++ * @pool: Memory pool to destroy ++ * ++ * Pages in the pool will spill over to @next_pool (if available) or freed to ++ * the kernel. ++ */ ++void kbase_mem_pool_term(struct kbase_mem_pool *pool); + -+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow, -+ struct task_struct *page_owner) -+{ -+ struct page *p; -+ size_t i; -+ const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD); ++/** ++ * kbase_mem_pool_alloc - Allocate a page from memory pool ++ * @pool: Memory pool to allocate from ++ * ++ * Allocations from the pool are made as follows: ++ * 1. If there are free pages in the pool, allocate a page from @pool. ++ * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page ++ * from @next_pool. ++ * 3. Return NULL if no memory in the pool ++ * ++ * Return: Pointer to allocated page, or NULL if allocation failed. ++ * ++ * Note : This function should not be used if the pool lock is held. Use ++ * kbase_mem_pool_alloc_locked() instead. ++ */ ++struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool); + -+ kbase_mem_pool_lock(pool); ++/** ++ * kbase_mem_pool_alloc_locked - Allocate a page from memory pool ++ * @pool: Memory pool to allocate from ++ * ++ * If there are free pages in the pool, this function allocates a page from ++ * @pool. This function does not use @next_pool. ++ * ++ * Return: Pointer to allocated page, or NULL if allocation failed. ++ * ++ * Note : Caller must hold the pool lock. ++ */ ++struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool); + -+ pool->dont_reclaim = true; -+ for (i = 0; i < nr_to_grow; i++) { -+ if (pool->dying) { -+ pool->dont_reclaim = false; -+ kbase_mem_pool_shrink_locked(pool, nr_to_grow); -+ kbase_mem_pool_unlock(pool); ++/** ++ * kbase_mem_pool_free - Free a page to memory pool ++ * @pool: Memory pool where page should be freed ++ * @page: Page to free to the pool ++ * @dirty: Whether some of the page may be dirty in the cache. ++ * ++ * Pages are freed to the pool as follows: ++ * 1. If @pool is not full, add @page to @pool. ++ * 2. Otherwise, if @next_pool is not NULL and not full, add @page to ++ * @next_pool. ++ * 3. Finally, free @page to the kernel. ++ * ++ * Note : This function should not be used if the pool lock is held. Use ++ * kbase_mem_pool_free_locked() instead. ++ */ ++void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page, ++ bool dirty); + -+ return -ENOMEM; -+ } -+ kbase_mem_pool_unlock(pool); ++/** ++ * kbase_mem_pool_free_locked - Free a page to memory pool ++ * @pool: Memory pool where page should be freed ++ * @p: Page to free to the pool ++ * @dirty: Whether some of the page may be dirty in the cache. ++ * ++ * If @pool is not full, this function adds @page to @pool. Otherwise, @page is ++ * freed to the kernel. This function does not use @next_pool. ++ * ++ * Note : Caller must hold the pool lock. ++ */ ++void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, ++ bool dirty); + -+ if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread))) -+ return -ENOMEM; ++/** ++ * kbase_mem_pool_alloc_pages - Allocate pages from memory pool ++ * @pool: Memory pool to allocate from ++ * @nr_4k_pages: Number of pages to allocate ++ * @pages: Pointer to array where the physical address of the allocated ++ * pages will be stored. ++ * @partial_allowed: If fewer pages allocated is allowed ++ * @page_owner: Pointer to the task that created the Kbase context for which ++ * the pages are being allocated. It can be NULL if the pages ++ * won't be associated with any Kbase context. ++ * ++ * Like kbase_mem_pool_alloc() but optimized for allocating many pages. ++ * ++ * Return: ++ * On success number of pages allocated (could be less than nr_pages if ++ * partial_allowed). ++ * On error an error code. ++ * ++ * Note : This function should not be used if the pool lock is held. Use ++ * kbase_mem_pool_alloc_pages_locked() instead. ++ * ++ * The caller must not hold vm_lock, as this could cause a deadlock if ++ * the kernel OoM killer runs. If the caller must allocate pages while holding ++ * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead. ++ */ ++int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, ++ struct tagged_addr *pages, bool partial_allowed, ++ struct task_struct *page_owner); + -+ p = kbase_mem_alloc_page(pool); -+ if (!p) { -+ kbase_mem_pool_lock(pool); -+ pool->dont_reclaim = false; -+ kbase_mem_pool_unlock(pool); ++/** ++ * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool ++ * @pool: Memory pool to allocate from ++ * @nr_4k_pages: Number of pages to allocate ++ * @pages: Pointer to array where the physical address of the allocated ++ * pages will be stored. ++ * ++ * Like kbase_mem_pool_alloc() but optimized for allocating many pages. This ++ * version does not allocate new pages from the kernel, and therefore will never ++ * trigger the OoM killer. Therefore, it can be run while the vm_lock is held. ++ * ++ * As new pages can not be allocated, the caller must ensure there are ++ * sufficient pages in the pool. Usage of this function should look like : ++ * ++ * kbase_gpu_vm_lock(kctx); ++ * kbase_mem_pool_lock(pool) ++ * while (kbase_mem_pool_size(pool) < pages_required) { ++ * kbase_mem_pool_unlock(pool) ++ * kbase_gpu_vm_unlock(kctx); ++ * kbase_mem_pool_grow(pool) ++ * kbase_gpu_vm_lock(kctx); ++ * kbase_mem_pool_lock(pool) ++ * } ++ * kbase_mem_pool_alloc_pages_locked(pool) ++ * kbase_mem_pool_unlock(pool) ++ * Perform other processing that requires vm_lock... ++ * kbase_gpu_vm_unlock(kctx); ++ * ++ * This ensures that the pool can be grown to the required size and that the ++ * allocation can complete without another thread using the newly grown pages. ++ * ++ * Return: ++ * On success number of pages allocated. ++ * On error an error code. ++ * ++ * Note : Caller must hold the pool lock. ++ */ ++int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, ++ size_t nr_4k_pages, struct tagged_addr *pages); + -+ return -ENOMEM; -+ } ++/** ++ * kbase_mem_pool_free_pages - Free pages to memory pool ++ * @pool: Memory pool where pages should be freed ++ * @nr_pages: Number of pages to free ++ * @pages: Pointer to array holding the physical addresses of the pages to ++ * free. ++ * @dirty: Whether any pages may be dirty in the cache. ++ * @reclaimed: Whether the pages where reclaimable and thus should bypass ++ * the pool and go straight to the kernel. ++ * ++ * Like kbase_mem_pool_free() but optimized for freeing many pages. ++ */ ++void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, ++ struct tagged_addr *pages, bool dirty, bool reclaimed); + -+ kbase_mem_pool_lock(pool); -+ kbase_mem_pool_add_locked(pool, p); -+ } -+ pool->dont_reclaim = false; -+ kbase_mem_pool_unlock(pool); ++/** ++ * kbase_mem_pool_free_pages_locked - Free pages to memory pool ++ * @pool: Memory pool where pages should be freed ++ * @nr_pages: Number of pages to free ++ * @pages: Pointer to array holding the physical addresses of the pages to ++ * free. ++ * @dirty: Whether any pages may be dirty in the cache. ++ * @reclaimed: Whether the pages where reclaimable and thus should bypass ++ * the pool and go straight to the kernel. ++ * ++ * Like kbase_mem_pool_free() but optimized for freeing many pages. ++ */ ++void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, ++ size_t nr_pages, struct tagged_addr *pages, bool dirty, ++ bool reclaimed); + -+ return 0; ++/** ++ * kbase_mem_pool_size - Get number of free pages in memory pool ++ * @pool: Memory pool to inspect ++ * ++ * Note: the size of the pool may in certain corner cases exceed @max_size! ++ * ++ * Return: Number of free pages in the pool ++ */ ++static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool) ++{ ++ return READ_ONCE(pool->cur_size); +} -+KBASE_EXPORT_TEST_API(kbase_mem_pool_grow); + -+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size) ++/** ++ * kbase_mem_pool_max_size - Get maximum number of free pages in memory pool ++ * @pool: Memory pool to inspect ++ * ++ * Return: Maximum number of free pages in the pool ++ */ ++static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool) +{ -+ size_t cur_size; -+ int err = 0; -+ -+ cur_size = kbase_mem_pool_size(pool); ++ return pool->max_size; ++} + -+ if (new_size > pool->max_size) -+ new_size = pool->max_size; + -+ if (new_size < cur_size) -+ kbase_mem_pool_shrink(pool, cur_size - new_size); -+ else if (new_size > cur_size) -+ err = kbase_mem_pool_grow(pool, new_size - cur_size, NULL); ++/** ++ * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool ++ * @pool: Memory pool to inspect ++ * @max_size: Maximum number of free pages the pool can hold ++ * ++ * If @max_size is reduced, the pool will be shrunk to adhere to the new limit. ++ * For details see kbase_mem_pool_shrink(). ++ */ ++void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size); + -+ if (err) { -+ size_t grown_size = kbase_mem_pool_size(pool); ++/** ++ * kbase_mem_pool_grow - Grow the pool ++ * @pool: Memory pool to grow ++ * @nr_to_grow: Number of pages to add to the pool ++ * @page_owner: Pointer to the task that created the Kbase context for which ++ * the memory pool is being grown. It can be NULL if the pages ++ * to be allocated won't be associated with any Kbase context. ++ * ++ * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to ++ * become larger than the maximum size specified. ++ * ++ * Return: 0 on success, -ENOMEM if unable to allocate sufficent pages ++ */ ++int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow, ++ struct task_struct *page_owner); + -+ dev_warn(pool->kbdev->dev, -+ "Mem pool not grown to the required size of %zu bytes, grown for additional %zu bytes instead!\n", -+ (new_size - cur_size), (grown_size - cur_size)); -+ } -+} ++/** ++ * kbase_mem_pool_trim - Grow or shrink the pool to a new size ++ * @pool: Memory pool to trim ++ * @new_size: New number of pages in the pool ++ * ++ * If @new_size > @cur_size, fill the pool with new pages from the kernel, but ++ * not above the max_size for the pool. ++ * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel. ++ */ ++void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size); + -+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size) -+{ -+ size_t cur_size; -+ size_t nr_to_shrink; ++/** ++ * kbase_mem_pool_mark_dying - Mark that this pool is dying ++ * @pool: Memory pool ++ * ++ * This will cause any ongoing allocation operations (eg growing on page fault) ++ * to be terminated. ++ */ ++void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool); + -+ kbase_mem_pool_lock(pool); ++/** ++ * kbase_mem_alloc_page - Allocate a new page for a device ++ * @pool: Memory pool to allocate a page from ++ * ++ * Most uses should use kbase_mem_pool_alloc to allocate a page. However that ++ * function can fail in the event the pool is empty. ++ * ++ * Return: A new page or NULL if no memory ++ */ ++struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool); + -+ pool->max_size = max_size; ++/** ++ * kbase_mem_pool_free_page - Free a page from a memory pool. ++ * @pool: Memory pool to free a page from ++ * @p: Page to free ++ * ++ * This will free any associated data stored for the page and release ++ * the page back to the kernel. ++ */ ++void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p); + -+ cur_size = kbase_mem_pool_size(pool); -+ if (max_size < cur_size) { -+ nr_to_shrink = cur_size - max_size; -+ kbase_mem_pool_shrink_locked(pool, nr_to_shrink); -+ } ++/** ++ * kbase_region_tracker_init - Initialize the region tracker data structure ++ * @kctx: kbase context ++ * ++ * Return: 0 if success, negative error code otherwise. ++ */ ++int kbase_region_tracker_init(struct kbase_context *kctx); + -+ kbase_mem_pool_unlock(pool); -+} -+KBASE_EXPORT_TEST_API(kbase_mem_pool_set_max_size); ++/** ++ * kbase_region_tracker_init_jit - Initialize the just-in-time memory ++ * allocation region ++ * @kctx: Kbase context. ++ * @jit_va_pages: Size of the JIT region in pages. ++ * @max_allocations: Maximum number of allocations allowed for the JIT region. ++ * Valid range is 0..%BASE_JIT_ALLOC_COUNT. ++ * @trim_level: Trim level for the JIT region. ++ * Valid range is 0..%BASE_JIT_MAX_TRIM_LEVEL. ++ * @group_id: The physical group ID from which to allocate JIT memory. ++ * Valid range is 0..(%MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * @phys_pages_limit: Maximum number of physical pages to use to back the JIT ++ * region. Must not exceed @jit_va_pages. ++ * ++ * Return: 0 if success, negative error code otherwise. ++ */ ++int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, ++ int max_allocations, int trim_level, int group_id, ++ u64 phys_pages_limit); + -+static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s, -+ struct shrink_control *sc) -+{ -+ struct kbase_mem_pool *pool; -+ size_t pool_size; ++/** ++ * kbase_region_tracker_init_exec - Initialize the GPU-executable memory region ++ * @kctx: kbase context ++ * @exec_va_pages: Size of the JIT region in pages. ++ * It must not be greater than 4 GB. ++ * ++ * Return: 0 if success, negative error code otherwise. ++ */ ++int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages); + -+ pool = container_of(s, struct kbase_mem_pool, reclaim); -+ -+ kbase_mem_pool_lock(pool); -+ if (pool->dont_reclaim && !pool->dying) { -+ kbase_mem_pool_unlock(pool); -+ /* Tell shrinker to skip reclaim -+ * even though freeable pages are available -+ */ -+ return 0; -+ } -+ pool_size = kbase_mem_pool_size(pool); -+ kbase_mem_pool_unlock(pool); -+ -+ return pool_size; -+} -+ -+static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s, -+ struct shrink_control *sc) -+{ -+ struct kbase_mem_pool *pool; -+ unsigned long freed; -+ -+ pool = container_of(s, struct kbase_mem_pool, reclaim); -+ -+ kbase_mem_pool_lock(pool); -+ if (pool->dont_reclaim && !pool->dying) { -+ kbase_mem_pool_unlock(pool); -+ /* Tell shrinker that reclaim can't be made and -+ * do not attempt again for this reclaim context. -+ */ -+ return SHRINK_STOP; -+ } ++/** ++ * kbase_region_tracker_term - Terminate the JIT region ++ * @kctx: kbase context ++ */ ++void kbase_region_tracker_term(struct kbase_context *kctx); + -+ pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan); ++/** ++ * kbase_region_tracker_term_rbtree - Free memory for a region tracker ++ * ++ * @rbtree: Region tracker tree root ++ * ++ * This will free all the regions within the region tracker ++ */ ++void kbase_region_tracker_term_rbtree(struct rb_root *rbtree); + -+ freed = kbase_mem_pool_shrink_locked(pool, sc->nr_to_scan); ++struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( ++ struct kbase_context *kctx, u64 gpu_addr); ++struct kbase_va_region *kbase_find_region_enclosing_address( ++ struct rb_root *rbtree, u64 gpu_addr); + -+ kbase_mem_pool_unlock(pool); ++/** ++ * kbase_region_tracker_find_region_base_address - Check that a pointer is ++ * actually a valid region. ++ * @kctx: kbase context containing the region ++ * @gpu_addr: pointer to check ++ * ++ * Must be called with context lock held. ++ * ++ * Return: pointer to the valid region on success, NULL otherwise ++ */ ++struct kbase_va_region *kbase_region_tracker_find_region_base_address( ++ struct kbase_context *kctx, u64 gpu_addr); ++struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree, ++ u64 gpu_addr); + -+ pool_dbg(pool, "reclaim freed %ld pages\n", freed); ++struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree, ++ u64 start_pfn, size_t nr_pages, int zone); ++void kbase_free_alloced_region(struct kbase_va_region *reg); ++int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, ++ u64 addr, size_t nr_pages, size_t align); ++int kbase_add_va_region_rbtree(struct kbase_device *kbdev, ++ struct kbase_va_region *reg, u64 addr, size_t nr_pages, ++ size_t align); + -+ return freed; -+} ++bool kbase_check_alloc_flags(unsigned long flags); ++bool kbase_check_import_flags(unsigned long flags); + -+int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool_config *config, -+ unsigned int order, int group_id, struct kbase_device *kbdev, -+ struct kbase_mem_pool *next_pool) ++static inline bool kbase_import_size_is_valid(struct kbase_device *kbdev, u64 va_pages) +{ -+ if (WARN_ON(group_id < 0) || -+ WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { -+ return -EINVAL; ++ if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) { ++ dev_dbg( ++ kbdev->dev, ++ "Import attempted with va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!", ++ (unsigned long long)va_pages); ++ return false; + } + -+ pool->cur_size = 0; -+ pool->max_size = kbase_mem_pool_config_get_max_size(config); -+ pool->order = order; -+ pool->group_id = group_id; -+ pool->kbdev = kbdev; -+ pool->next_pool = next_pool; -+ pool->dying = false; -+ atomic_set(&pool->isolation_in_progress_cnt, 0); -+ -+ spin_lock_init(&pool->pool_lock); -+ INIT_LIST_HEAD(&pool->page_list); -+ -+ pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects; -+ pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects; -+ pool->reclaim.seeks = DEFAULT_SEEKS; -+ /* Kernel versions prior to 3.1 : -+ * struct shrinker does not define batch -+ */ -+ pool->reclaim.batch = 0; -+#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE -+ register_shrinker(&pool->reclaim); -+#else -+ register_shrinker(&pool->reclaim, "mali-mem-pool"); -+#endif -+ -+ pool_dbg(pool, "initialized\n"); -+ -+ return 0; -+} -+KBASE_EXPORT_TEST_API(kbase_mem_pool_init); -+ -+void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool) -+{ -+ kbase_mem_pool_lock(pool); -+ pool->dying = true; -+ kbase_mem_pool_unlock(pool); ++ return true; +} + -+void kbase_mem_pool_term(struct kbase_mem_pool *pool) ++static inline bool kbase_alias_size_is_valid(struct kbase_device *kbdev, u64 va_pages) +{ -+ struct kbase_mem_pool *next_pool = pool->next_pool; -+ struct page *p, *tmp; -+ size_t nr_to_spill = 0; -+ LIST_HEAD(spill_list); -+ LIST_HEAD(free_list); -+ int i; -+ -+ pool_dbg(pool, "terminate()\n"); ++ if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) { ++ dev_dbg( ++ kbdev->dev, ++ "Alias attempted with va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!", ++ (unsigned long long)va_pages); ++ return false; ++ } + -+ unregister_shrinker(&pool->reclaim); ++ return true; ++} + -+ kbase_mem_pool_lock(pool); -+ pool->max_size = 0; ++/** ++ * kbase_check_alloc_sizes - check user space sizes parameters for an ++ * allocation ++ * ++ * @kctx: kbase context ++ * @flags: The flags passed from user space ++ * @va_pages: The size of the requested region, in pages. ++ * @commit_pages: Number of pages to commit initially. ++ * @extension: Number of pages to grow by on GPU page fault and/or alignment ++ * (depending on flags) ++ * ++ * Makes checks on the size parameters passed in from user space for a memory ++ * allocation call, with respect to the flags requested. ++ * ++ * Return: 0 if sizes are valid for these flags, negative error code otherwise ++ */ ++int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, ++ u64 va_pages, u64 commit_pages, u64 extension); + -+ if (next_pool && !kbase_mem_pool_is_full(next_pool)) { -+ /* Spill to next pool (may overspill) */ -+ nr_to_spill = kbase_mem_pool_capacity(next_pool); -+ nr_to_spill = min(kbase_mem_pool_size(pool), nr_to_spill); ++/** ++ * kbase_update_region_flags - Convert user space flags to kernel region flags ++ * ++ * @kctx: kbase context ++ * @reg: The region to update the flags on ++ * @flags: The flags passed from user space ++ * ++ * The user space flag BASE_MEM_COHERENT_SYSTEM_REQUIRED will be rejected and ++ * this function will fail if the system does not support system coherency. ++ * ++ * Return: 0 if successful, -EINVAL if the flags are not supported ++ */ ++int kbase_update_region_flags(struct kbase_context *kctx, ++ struct kbase_va_region *reg, unsigned long flags); + -+ /* Zero pages first without holding the next_pool lock */ -+ for (i = 0; i < nr_to_spill; i++) { -+ p = kbase_mem_pool_remove_locked(pool, SPILL_IN_PROGRESS); -+ if (p) -+ list_add(&p->lru, &spill_list); -+ } -+ } ++/** ++ * kbase_gpu_vm_lock() - Acquire the per-context region list lock ++ * @kctx: KBase context ++ * ++ * Care must be taken when making an allocation whilst holding this lock, because of interaction ++ * with the Kernel's OoM-killer and use of this lock in &vm_operations_struct close() handlers. ++ * ++ * If this lock is taken during a syscall, and/or the allocation is 'small' then it is safe to use. ++ * ++ * If the caller is not in a syscall, and the allocation is 'large', then it must not hold this ++ * lock. ++ * ++ * This is because the kernel OoM killer might target the process corresponding to that same kbase ++ * context, and attempt to call the context's close() handlers for its open VMAs. This is safe if ++ * the allocating caller is in a syscall, because the VMA close() handlers are delayed until all ++ * syscalls have finished (noting that no new syscalls can start as the remaining user threads will ++ * have been killed too), and so there is no possibility of contention between the thread ++ * allocating with this lock held, and the VMA close() handler. ++ * ++ * However, outside of a syscall (e.g. a kworker or other kthread), one of kbase's VMA close() ++ * handlers (kbase_cpu_vm_close()) also takes this lock, and so prevents the process from being ++ * killed until the caller of the function allocating memory has released this lock. On subsequent ++ * retries for allocating a page, the OoM killer would be re-invoked but skips over the process ++ * stuck in its close() handler. ++ * ++ * Also because the caller is not in a syscall, the page allocation code in the kernel is not aware ++ * that the allocation is being done on behalf of another process, and so does not realize that ++ * process has received a kill signal due to an OoM, and so will continually retry with the OoM ++ * killer until enough memory has been released, or until all other killable processes have been ++ * killed (at which point the kernel halts with a panic). ++ * ++ * However, if the allocation outside of a syscall is small enough to be satisfied by killing ++ * another process, then the allocation completes, the caller releases this lock, and ++ * kbase_cpu_vm_close() can unblock and allow the process to be killed. ++ * ++ * Hence, this is effectively a deadlock with kbase_cpu_vm_close(), except that if the memory ++ * allocation is small enough the deadlock can be resolved. For that reason, such a memory deadlock ++ * is NOT discovered with CONFIG_PROVE_LOCKING. ++ * ++ * If this may be called outside of a syscall, consider moving allocations outside of this lock, or ++ * use __GFP_NORETRY for such allocations (which will allow direct-reclaim attempts, but will ++ * prevent OoM kills to satisfy the allocation, and will just fail the allocation instead). ++ */ ++void kbase_gpu_vm_lock(struct kbase_context *kctx); + -+ while (!kbase_mem_pool_is_empty(pool)) { -+ /* Free remaining pages to kernel */ -+ p = kbase_mem_pool_remove_locked(pool, FREE_IN_PROGRESS); -+ if (p) -+ list_add(&p->lru, &free_list); -+ } ++/** ++ * kbase_gpu_vm_unlock() - Release the per-context region list lock ++ * @kctx: KBase context ++ */ ++void kbase_gpu_vm_unlock(struct kbase_context *kctx); + -+ kbase_mem_pool_unlock(pool); ++int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size); + -+ if (next_pool && nr_to_spill) { -+ list_for_each_entry(p, &spill_list, lru) -+ kbase_mem_pool_zero_page(pool, p); ++/** ++ * kbase_gpu_mmap - Register region and map it on the GPU. ++ * ++ * @kctx: kbase context containing the region ++ * @reg: the region to add ++ * @addr: the address to insert the region at ++ * @nr_pages: the number of pages in the region ++ * @align: the minimum alignment in pages ++ * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. ++ * ++ * Call kbase_add_va_region() and map the region on the GPU. ++ * ++ * Return: 0 on success, error code otherwise. ++ */ ++int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, ++ u64 addr, size_t nr_pages, size_t align, ++ enum kbase_caller_mmu_sync_info mmu_sync_info); + -+ /* Add new page list to next_pool */ -+ kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill); ++/** ++ * kbase_gpu_munmap - Remove the region from the GPU and unregister it. ++ * ++ * @kctx: KBase context ++ * @reg: The region to remove ++ * ++ * Must be called with context lock held. ++ * ++ * Return: 0 on success, error code otherwise. ++ */ ++int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg); + -+ pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill); -+ } ++/** ++ * kbase_mmu_update - Configure an address space on the GPU to the specified ++ * MMU tables ++ * ++ * @kbdev: Kbase device structure ++ * @mmut: The set of MMU tables to be configured on the address space ++ * @as_nr: The address space to be configured ++ * ++ * The caller has the following locking conditions: ++ * - It must hold kbase_device->mmu_hw_mutex ++ * - It must hold the hwaccess_lock ++ */ ++void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, ++ int as_nr); + -+ list_for_each_entry_safe(p, tmp, &free_list, lru) { -+ list_del_init(&p->lru); -+ kbase_mem_pool_free_page(pool, p); -+ } ++/** ++ * kbase_mmu_disable() - Disable the MMU for a previously active kbase context. ++ * @kctx: Kbase context ++ * ++ * Disable and perform the required cache maintenance to remove the all ++ * data from provided kbase context from the GPU caches. ++ * ++ * The caller has the following locking conditions: ++ * - It must hold kbase_device->mmu_hw_mutex ++ * - It must hold the hwaccess_lock ++ */ ++void kbase_mmu_disable(struct kbase_context *kctx); + -+ /* Freeing of pages will be deferred when page migration is enabled. */ -+ enqueue_free_pool_pages_work(pool); ++/** ++ * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified ++ * address space. ++ * @kbdev: Kbase device ++ * @as_nr: The address space number to set to unmapped. ++ * ++ * This function must only be called during reset/power-up and it used to ++ * ensure the registers are in a known state. ++ * ++ * The caller must hold kbdev->mmu_hw_mutex. ++ */ ++void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr); + -+ /* Before returning wait to make sure there are no pages undergoing page isolation -+ * which will require reference to this pool. -+ */ -+ while (atomic_read(&pool->isolation_in_progress_cnt)) -+ cpu_relax(); ++void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); + -+ pool_dbg(pool, "terminated\n"); -+} -+KBASE_EXPORT_TEST_API(kbase_mem_pool_term); ++#if defined(CONFIG_MALI_VECTOR_DUMP) ++/** ++ * kbase_mmu_dump() - Dump the MMU tables to a buffer. ++ * ++ * @kctx: The kbase context to dump ++ * @nr_pages: The number of pages to allocate for the buffer. ++ * ++ * This function allocates a buffer (of @c nr_pages pages) to hold a dump ++ * of the MMU tables and fills it. If the buffer is too small ++ * then the return value will be NULL. ++ * ++ * The GPU vm lock must be held when calling this function. ++ * ++ * The buffer returned should be freed with @ref vfree when it is no longer ++ * required. ++ * ++ * Return: The address of the buffer containing the MMU dump or NULL on error ++ * (including if the @c nr_pages is too small) ++ */ ++void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages); ++#endif + -+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool) -+{ -+ struct page *p; ++/** ++ * kbase_sync_now - Perform cache maintenance on a memory region ++ * ++ * @kctx: The kbase context of the region ++ * @sset: A syncset structure describing the region and direction of the ++ * synchronisation required ++ * ++ * Return: 0 on success or error code ++ */ ++int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset); ++void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr cpu_pa, ++ struct tagged_addr gpu_pa, off_t offset, size_t size, ++ enum kbase_sync_type sync_fn); + -+ do { -+ pool_dbg(pool, "alloc()\n"); -+ p = kbase_mem_pool_remove(pool, ALLOCATE_IN_PROGRESS); ++/* OS specific functions */ ++int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr); ++int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg); ++void kbase_os_mem_map_lock(struct kbase_context *kctx); ++void kbase_os_mem_map_unlock(struct kbase_context *kctx); + -+ if (p) -+ return p; ++/** ++ * kbasep_os_process_page_usage_update() - Update the memory allocation ++ * counters for the current process. ++ * ++ * @kctx: The kbase context ++ * @pages: The desired delta to apply to the memory usage counters. ++ * ++ * OS specific call to updates the current memory allocation counters ++ * for the current process with the supplied delta. ++ */ + -+ pool = pool->next_pool; -+ } while (pool); ++void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages); + -+ return NULL; -+} ++/** ++ * kbase_process_page_usage_inc() - Add to the memory allocation counters for ++ * the current process ++ * ++ * @kctx: The kernel base context used for the allocation. ++ * @pages: The desired delta to apply to the memory usage counters. ++ * ++ * OS specific call to add to the current memory allocation counters for ++ * the current process by the supplied amount. ++ */ + -+struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool) ++static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages) +{ -+ lockdep_assert_held(&pool->pool_lock); -+ -+ pool_dbg(pool, "alloc_locked()\n"); -+ return kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS); ++ kbasep_os_process_page_usage_update(kctx, pages); +} + -+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p, -+ bool dirty) -+{ -+ struct kbase_mem_pool *next_pool = pool->next_pool; -+ -+ pool_dbg(pool, "free()\n"); -+ -+ if (!kbase_mem_pool_is_full(pool)) { -+ /* Add to our own pool */ -+ if (dirty) -+ kbase_mem_pool_sync_page(pool, p); -+ -+ kbase_mem_pool_add(pool, p); -+ } else if (next_pool && !kbase_mem_pool_is_full(next_pool)) { -+ /* Spill to next pool */ -+ kbase_mem_pool_spill(next_pool, p); -+ } else { -+ /* Free page */ -+ kbase_mem_pool_free_page(pool, p); -+ /* Freeing of pages will be deferred when page migration is enabled. */ -+ enqueue_free_pool_pages_work(pool); -+ } -+} ++/** ++ * kbase_process_page_usage_dec() - Subtract from the memory allocation ++ * counters for the current process. ++ * ++ * @kctx: The kernel base context used for the allocation. ++ * @pages: The desired delta to apply to the memory usage counters. ++ * ++ * OS specific call to subtract from the current memory allocation counters ++ * for the current process by the supplied amount. ++ */ + -+void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, -+ bool dirty) ++static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages) +{ -+ pool_dbg(pool, "free_locked()\n"); -+ -+ lockdep_assert_held(&pool->pool_lock); -+ -+ if (!kbase_mem_pool_is_full(pool)) { -+ /* Add to our own pool */ -+ if (dirty) -+ kbase_mem_pool_sync_page(pool, p); -+ -+ kbase_mem_pool_add_locked(pool, p); -+ } else { -+ /* Free page */ -+ kbase_mem_pool_free_page(pool, p); -+ /* Freeing of pages will be deferred when page migration is enabled. */ -+ enqueue_free_pool_pages_work(pool); -+ } ++ kbasep_os_process_page_usage_update(kctx, 0 - pages); +} + -+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, -+ struct tagged_addr *pages, bool partial_allowed, -+ struct task_struct *page_owner) -+{ -+ struct page *p; -+ size_t nr_from_pool; -+ size_t i = 0; -+ int err = -ENOMEM; -+ size_t nr_pages_internal; -+ const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD); -+ -+ nr_pages_internal = nr_4k_pages / (1u << (pool->order)); -+ -+ if (nr_pages_internal * (1u << pool->order) != nr_4k_pages) -+ return -EINVAL; -+ -+ pool_dbg(pool, "alloc_pages(4k=%zu):\n", nr_4k_pages); -+ pool_dbg(pool, "alloc_pages(internal=%zu):\n", nr_pages_internal); -+ -+ /* Get pages from this pool */ -+ kbase_mem_pool_lock(pool); -+ nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool)); ++/** ++ * kbasep_find_enclosing_cpu_mapping_offset() - Find the offset of the CPU ++ * mapping of a memory allocation containing a given address range ++ * ++ * @kctx: The kernel base context used for the allocation. ++ * @uaddr: Start of the CPU virtual address range. ++ * @size: Size of the CPU virtual address range (in bytes). ++ * @offset: The offset from the start of the allocation to the specified CPU ++ * virtual address. ++ * ++ * Searches for a CPU mapping of any part of any region that fully encloses the ++ * CPU virtual address range specified by @uaddr and @size. Returns a failure ++ * indication if only part of the address range lies within a CPU mapping. ++ * ++ * Return: 0 if offset was obtained successfully. Error code otherwise. ++ */ ++int kbasep_find_enclosing_cpu_mapping_offset( ++ struct kbase_context *kctx, ++ unsigned long uaddr, size_t size, u64 *offset); + -+ while (nr_from_pool--) { -+ int j; ++/** ++ * kbasep_find_enclosing_gpu_mapping_start_and_offset() - Find the address of ++ * the start of GPU virtual memory region which encloses @gpu_addr for the ++ * @size length in bytes ++ * ++ * @kctx: The kernel base context within which the memory is searched. ++ * @gpu_addr: GPU virtual address for which the region is sought; defines ++ * the beginning of the provided region. ++ * @size: The length (in bytes) of the provided region for which the ++ * GPU virtual memory region is sought. ++ * @start: Pointer to the location where the address of the start of ++ * the found GPU virtual memory region is. ++ * @offset: Pointer to the location where the offset of @gpu_addr into ++ * the found GPU virtual memory region is. ++ * ++ * Searches for the memory region in GPU virtual memory space which contains ++ * the region defined by the @gpu_addr and @size, where @gpu_addr is the ++ * beginning and @size the length in bytes of the provided region. If found, ++ * the location of the start address of the GPU virtual memory region is ++ * passed in @start pointer and the location of the offset of the region into ++ * the GPU virtual memory region is passed in @offset pointer. ++ * ++ * Return: 0 on success, error code otherwise. ++ */ ++int kbasep_find_enclosing_gpu_mapping_start_and_offset( ++ struct kbase_context *kctx, ++ u64 gpu_addr, size_t size, u64 *start, u64 *offset); + -+ p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS); ++/** ++ * kbase_alloc_phy_pages_helper - Allocates physical pages. ++ * @alloc: allocation object to add pages to ++ * @nr_pages_requested: number of physical pages to allocate ++ * ++ * Allocates @nr_pages_requested and updates the alloc object. ++ * ++ * Note: if kbase_gpu_vm_lock() is to be held around this function to ensure thread-safe updating ++ * of @alloc, then refer to the documentation of kbase_gpu_vm_lock() about the requirements of ++ * either calling during a syscall, or ensuring the allocation is small. These requirements prevent ++ * an effective deadlock between the kernel's OoM killer and kbase's VMA close() handlers, which ++ * could take kbase_gpu_vm_lock() too. ++ * ++ * If the requirements of kbase_gpu_vm_lock() cannot be satisfied when calling this function, but ++ * @alloc must still be updated in a thread-safe way, then instead use ++ * kbase_alloc_phy_pages_helper_locked() and restructure callers into the sequence outlined there. ++ * ++ * This function cannot be used from interrupt context ++ * ++ * Return: 0 if all pages have been successfully allocated. Error code otherwise ++ */ ++int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, ++ size_t nr_pages_requested); + -+ if (pool->order) { -+ pages[i++] = as_tagged_tag(page_to_phys(p), -+ HUGE_HEAD | HUGE_PAGE); -+ for (j = 1; j < (1u << pool->order); j++) -+ pages[i++] = as_tagged_tag(page_to_phys(p) + -+ PAGE_SIZE * j, -+ HUGE_PAGE); -+ } else { -+ pages[i++] = as_tagged(page_to_phys(p)); -+ } -+ } -+ kbase_mem_pool_unlock(pool); ++/** ++ * kbase_alloc_phy_pages_helper_locked - Allocates physical pages. ++ * @alloc: allocation object to add pages to ++ * @pool: Memory pool to allocate from ++ * @nr_pages_requested: number of physical pages to allocate ++ * ++ * @prealloc_sa: Information about the partial allocation if the amount of memory requested ++ * is not a multiple of 2MB. One instance of struct kbase_sub_alloc must be ++ * allocated by the caller if kbdev->pagesize_2mb is enabled. ++ * ++ * Allocates @nr_pages_requested and updates the alloc object. This function does not allocate new ++ * pages from the kernel, and therefore will never trigger the OoM killer. Therefore, it can be ++ * called whilst a thread operating outside of a syscall has held the region list lock ++ * (kbase_gpu_vm_lock()), as it will not cause an effective deadlock with VMA close() handlers used ++ * by the OoM killer. ++ * ++ * As new pages can not be allocated, the caller must ensure there are sufficient pages in the ++ * pool. Usage of this function should look like : ++ * ++ * kbase_gpu_vm_lock(kctx); ++ * kbase_mem_pool_lock(pool) ++ * while (kbase_mem_pool_size(pool) < pages_required) { ++ * kbase_mem_pool_unlock(pool) ++ * kbase_gpu_vm_unlock(kctx); ++ * kbase_mem_pool_grow(pool) ++ * kbase_gpu_vm_lock(kctx); ++ * kbase_mem_pool_lock(pool) ++ * } ++ * kbase_alloc_phy_pages_helper_locked(pool) ++ * kbase_mem_pool_unlock(pool) ++ * // Perform other processing that requires vm_lock... ++ * kbase_gpu_vm_unlock(kctx); ++ * ++ * This ensures that the pool can be grown to the required size and that the allocation can ++ * complete without another thread using the newly grown pages. ++ * ++ * If kbdev->pagesize_2mb is enabled and the allocation is >= 2MB, then @pool must be one of the ++ * pools from alloc->imported.native.kctx->mem_pools.large[]. Otherwise it must be one of the ++ * mempools from alloc->imported.native.kctx->mem_pools.small[]. ++ * ++ * @prealloc_sa is used to manage the non-2MB sub-allocation. It has to be pre-allocated because we ++ * must not sleep (due to the usage of kmalloc()) whilst holding pool->pool_lock. @prealloc_sa ++ * shall be set to NULL if it has been consumed by this function to indicate that the caller no ++ * longer owns it and should not access it further. ++ * ++ * Note: Caller must hold @pool->pool_lock ++ * ++ * Return: Pointer to array of allocated pages. NULL on failure. ++ */ ++struct tagged_addr *kbase_alloc_phy_pages_helper_locked( ++ struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, ++ size_t nr_pages_requested, ++ struct kbase_sub_alloc **prealloc_sa); + -+ if (i != nr_4k_pages && pool->next_pool) { -+ /* Allocate via next pool */ -+ err = kbase_mem_pool_alloc_pages(pool->next_pool, nr_4k_pages - i, pages + i, -+ partial_allowed, page_owner); ++/** ++ * kbase_free_phy_pages_helper() - Free physical pages. ++ * ++ * @alloc: allocation object to free pages from ++ * @nr_pages_to_free: number of physical pages to free ++ * ++ * Free @nr_pages_to_free pages and updates the alloc object. ++ * ++ * Return: 0 on success, otherwise a negative error code ++ */ ++int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free); + -+ if (err < 0) -+ goto err_rollback; ++/** ++ * kbase_free_phy_pages_helper_locked - Free pages allocated with ++ * kbase_alloc_phy_pages_helper_locked() ++ * @alloc: Allocation object to free pages from ++ * @pool: Memory pool to return freed pages to ++ * @pages: Pages allocated by kbase_alloc_phy_pages_helper_locked() ++ * @nr_pages_to_free: Number of physical pages to free ++ * ++ * This function atomically frees pages allocated with ++ * kbase_alloc_phy_pages_helper_locked(). @pages is the pointer to the page ++ * array that is returned by that function. @pool must be the pool that the ++ * pages were originally allocated from. ++ * ++ * If the mem_pool has been unlocked since the allocation then ++ * kbase_free_phy_pages_helper() should be used instead. ++ */ ++void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, ++ struct kbase_mem_pool *pool, struct tagged_addr *pages, ++ size_t nr_pages_to_free); + -+ i += err; ++static inline void kbase_set_dma_addr_as_priv(struct page *p, dma_addr_t dma_addr) ++{ ++ SetPagePrivate(p); ++ if (sizeof(dma_addr_t) > sizeof(p->private)) { ++ /* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the ++ * private field stays the same. So we have to be clever and ++ * use the fact that we only store DMA addresses of whole pages, ++ * so the low bits should be zero ++ */ ++ KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1))); ++ set_page_private(p, dma_addr >> PAGE_SHIFT); + } else { -+ /* Get any remaining pages from kernel */ -+ while (i != nr_4k_pages) { -+ if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread))) -+ goto err_rollback; -+ -+ p = kbase_mem_alloc_page(pool); -+ if (!p) { -+ if (partial_allowed) -+ goto done; -+ else -+ goto err_rollback; -+ } -+ -+ if (pool->order) { -+ int j; -+ -+ pages[i++] = as_tagged_tag(page_to_phys(p), -+ HUGE_PAGE | -+ HUGE_HEAD); -+ for (j = 1; j < (1u << pool->order); j++) { -+ phys_addr_t phys; -+ -+ phys = page_to_phys(p) + PAGE_SIZE * j; -+ pages[i++] = as_tagged_tag(phys, -+ HUGE_PAGE); -+ } -+ } else { -+ pages[i++] = as_tagged(page_to_phys(p)); -+ } -+ } ++ set_page_private(p, dma_addr); + } -+ -+done: -+ pool_dbg(pool, "alloc_pages(%zu) done\n", i); -+ return i; -+ -+err_rollback: -+ kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED); -+ return err; +} + -+int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, -+ size_t nr_4k_pages, struct tagged_addr *pages) ++static inline dma_addr_t kbase_dma_addr_as_priv(struct page *p) +{ -+ struct page *p; -+ size_t i; -+ size_t nr_pages_internal; -+ -+ lockdep_assert_held(&pool->pool_lock); -+ -+ nr_pages_internal = nr_4k_pages / (1u << (pool->order)); -+ -+ if (nr_pages_internal * (1u << pool->order) != nr_4k_pages) -+ return -EINVAL; -+ -+ pool_dbg(pool, "alloc_pages_locked(4k=%zu):\n", nr_4k_pages); -+ pool_dbg(pool, "alloc_pages_locked(internal=%zu):\n", -+ nr_pages_internal); -+ -+ if (kbase_mem_pool_size(pool) < nr_pages_internal) { -+ pool_dbg(pool, "Failed alloc\n"); -+ return -ENOMEM; -+ } -+ -+ for (i = 0; i < nr_pages_internal; i++) { -+ int j; -+ -+ p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS); -+ if (pool->order) { -+ *pages++ = as_tagged_tag(page_to_phys(p), -+ HUGE_HEAD | HUGE_PAGE); -+ for (j = 1; j < (1u << pool->order); j++) { -+ *pages++ = as_tagged_tag(page_to_phys(p) + -+ PAGE_SIZE * j, -+ HUGE_PAGE); -+ } -+ } else { -+ *pages++ = as_tagged(page_to_phys(p)); -+ } -+ } ++ if (sizeof(dma_addr_t) > sizeof(p->private)) ++ return ((dma_addr_t)page_private(p)) << PAGE_SHIFT; + -+ return nr_4k_pages; ++ return (dma_addr_t)page_private(p); +} + -+static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool, -+ size_t nr_pages, struct tagged_addr *pages, -+ bool zero, bool sync) ++static inline void kbase_clear_dma_addr_as_priv(struct page *p) +{ -+ struct page *p; -+ size_t nr_to_pool = 0; -+ LIST_HEAD(new_page_list); -+ size_t i; -+ -+ if (!nr_pages) -+ return; -+ -+ pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n", -+ nr_pages, zero, sync); -+ -+ /* Zero/sync pages first without holding the pool lock */ -+ for (i = 0; i < nr_pages; i++) { -+ if (unlikely(!as_phys_addr_t(pages[i]))) -+ continue; -+ -+ if (is_huge_head(pages[i]) || !is_huge(pages[i])) { -+ p = as_page(pages[i]); -+ if (zero) -+ kbase_mem_pool_zero_page(pool, p); -+ else if (sync) -+ kbase_mem_pool_sync_page(pool, p); -+ -+ list_add(&p->lru, &new_page_list); -+ nr_to_pool++; -+ } -+ pages[i] = as_tagged(0); -+ } -+ -+ /* Add new page list to pool */ -+ kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool); -+ -+ pool_dbg(pool, "add_array(%zu) added %zu pages\n", -+ nr_pages, nr_to_pool); ++ ClearPagePrivate(p); +} + -+static void kbase_mem_pool_add_array_locked(struct kbase_mem_pool *pool, -+ size_t nr_pages, struct tagged_addr *pages, -+ bool zero, bool sync) ++static inline struct kbase_page_metadata *kbase_page_private(struct page *p) +{ -+ struct page *p; -+ size_t nr_to_pool = 0; -+ LIST_HEAD(new_page_list); -+ size_t i; -+ -+ lockdep_assert_held(&pool->pool_lock); -+ -+ if (!nr_pages) -+ return; -+ -+ pool_dbg(pool, "add_array_locked(%zu, zero=%d, sync=%d):\n", -+ nr_pages, zero, sync); -+ -+ /* Zero/sync pages first */ -+ for (i = 0; i < nr_pages; i++) { -+ if (unlikely(!as_phys_addr_t(pages[i]))) -+ continue; -+ -+ if (is_huge_head(pages[i]) || !is_huge(pages[i])) { -+ p = as_page(pages[i]); -+ if (zero) -+ kbase_mem_pool_zero_page(pool, p); -+ else if (sync) -+ kbase_mem_pool_sync_page(pool, p); -+ -+ list_add(&p->lru, &new_page_list); -+ nr_to_pool++; -+ } -+ pages[i] = as_tagged(0); -+ } -+ -+ /* Add new page list to pool */ -+ kbase_mem_pool_add_list_locked(pool, &new_page_list, nr_to_pool); -+ -+ pool_dbg(pool, "add_array_locked(%zu) added %zu pages\n", -+ nr_pages, nr_to_pool); ++ return (struct kbase_page_metadata *)page_private(p); +} + -+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, -+ struct tagged_addr *pages, bool dirty, bool reclaimed) ++static inline dma_addr_t kbase_dma_addr(struct page *p) +{ -+ struct kbase_mem_pool *next_pool = pool->next_pool; -+ struct page *p; -+ size_t nr_to_pool; -+ LIST_HEAD(to_pool_list); -+ size_t i = 0; -+ bool pages_released = false; -+ -+ pool_dbg(pool, "free_pages(%zu):\n", nr_pages); -+ -+ if (!reclaimed) { -+ /* Add to this pool */ -+ nr_to_pool = kbase_mem_pool_capacity(pool); -+ nr_to_pool = min(nr_pages, nr_to_pool); -+ -+ kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty); -+ -+ i += nr_to_pool; -+ -+ if (i != nr_pages && next_pool) { -+ /* Spill to next pool (may overspill) */ -+ nr_to_pool = kbase_mem_pool_capacity(next_pool); -+ nr_to_pool = min(nr_pages - i, nr_to_pool); ++ if (kbase_page_migration_enabled) ++ return kbase_page_private(p)->dma_addr; + -+ kbase_mem_pool_add_array(next_pool, nr_to_pool, -+ pages + i, true, dirty); -+ i += nr_to_pool; -+ } -+ } ++ return kbase_dma_addr_as_priv(p); ++} + -+ /* Free any remaining pages to kernel */ -+ for (; i < nr_pages; i++) { -+ if (unlikely(!as_phys_addr_t(pages[i]))) -+ continue; ++static inline dma_addr_t kbase_dma_addr_from_tagged(struct tagged_addr tagged_pa) ++{ ++ phys_addr_t pa = as_phys_addr_t(tagged_pa); ++ struct page *page = pfn_to_page(PFN_DOWN(pa)); ++ dma_addr_t dma_addr = ++ is_huge(tagged_pa) ? kbase_dma_addr_as_priv(page) : kbase_dma_addr(page); + -+ if (is_huge(pages[i]) && !is_huge_head(pages[i])) { -+ pages[i] = as_tagged(0); -+ continue; -+ } -+ p = as_page(pages[i]); ++ return dma_addr; ++} + -+ kbase_mem_pool_free_page(pool, p); -+ pages[i] = as_tagged(0); -+ pages_released = true; -+ } ++/** ++ * kbase_flush_mmu_wqs() - Flush MMU workqueues. ++ * @kbdev: Device pointer. ++ * ++ * This function will cause any outstanding page or bus faults to be processed. ++ * It should be called prior to powering off the GPU. ++ */ ++void kbase_flush_mmu_wqs(struct kbase_device *kbdev); + -+ /* Freeing of pages will be deferred when page migration is enabled. */ -+ if (pages_released) -+ enqueue_free_pool_pages_work(pool); ++/** ++ * kbase_sync_single_for_device - update physical memory and give GPU ownership ++ * @kbdev: Device pointer ++ * @handle: DMA address of region ++ * @size: Size of region to sync ++ * @dir: DMA data direction ++ */ + -+ pool_dbg(pool, "free_pages(%zu) done\n", nr_pages); -+} ++void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, ++ size_t size, enum dma_data_direction dir); + ++/** ++ * kbase_sync_single_for_cpu - update physical memory and give CPU ownership ++ * @kbdev: Device pointer ++ * @handle: DMA address of region ++ * @size: Size of region to sync ++ * @dir: DMA data direction ++ */ + -+void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, -+ size_t nr_pages, struct tagged_addr *pages, bool dirty, -+ bool reclaimed) -+{ -+ struct page *p; -+ size_t nr_to_pool; -+ LIST_HEAD(to_pool_list); -+ size_t i = 0; -+ bool pages_released = false; ++void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, ++ size_t size, enum dma_data_direction dir); + -+ lockdep_assert_held(&pool->pool_lock); ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++/** ++ * kbase_jit_debugfs_init - Add per context debugfs entry for JIT. ++ * @kctx: kbase context ++ */ ++void kbase_jit_debugfs_init(struct kbase_context *kctx); ++#endif /* CONFIG_DEBUG_FS */ + -+ pool_dbg(pool, "free_pages_locked(%zu):\n", nr_pages); ++/** ++ * kbase_jit_init - Initialize the JIT memory pool management ++ * @kctx: kbase context ++ * ++ * Return: zero on success or negative error number on failure. ++ */ ++int kbase_jit_init(struct kbase_context *kctx); + -+ if (!reclaimed) { -+ /* Add to this pool */ -+ nr_to_pool = kbase_mem_pool_capacity(pool); -+ nr_to_pool = min(nr_pages, nr_to_pool); ++/** ++ * kbase_jit_allocate - Allocate JIT memory ++ * @kctx: kbase context ++ * @info: JIT allocation information ++ * @ignore_pressure_limit: Whether the JIT memory pressure limit is ignored ++ * ++ * Return: JIT allocation on success or NULL on failure. ++ */ ++struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, ++ const struct base_jit_alloc_info *info, ++ bool ignore_pressure_limit); + -+ kbase_mem_pool_add_array_locked(pool, nr_to_pool, pages, false, -+ dirty); ++/** ++ * kbase_jit_free - Free a JIT allocation ++ * @kctx: kbase context ++ * @reg: JIT allocation ++ * ++ * Frees a JIT allocation and places it into the free pool for later reuse. ++ */ ++void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg); + -+ i += nr_to_pool; -+ } ++/** ++ * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing ++ * @reg: JIT allocation ++ */ ++void kbase_jit_backing_lost(struct kbase_va_region *reg); + -+ /* Free any remaining pages to kernel */ -+ for (; i < nr_pages; i++) { -+ if (unlikely(!as_phys_addr_t(pages[i]))) -+ continue; ++/** ++ * kbase_jit_evict - Evict a JIT allocation from the pool ++ * @kctx: kbase context ++ * ++ * Evict the least recently used JIT allocation from the pool. This can be ++ * required if normal VA allocations are failing due to VA exhaustion. ++ * ++ * Return: True if a JIT allocation was freed, false otherwise. ++ */ ++bool kbase_jit_evict(struct kbase_context *kctx); + -+ if (is_huge(pages[i]) && !is_huge_head(pages[i])) { -+ pages[i] = as_tagged(0); -+ continue; -+ } ++/** ++ * kbase_jit_term - Terminate the JIT memory pool management ++ * @kctx: kbase context ++ */ ++void kbase_jit_term(struct kbase_context *kctx); + -+ p = as_page(pages[i]); ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++/** ++ * kbase_trace_jit_report_gpu_mem_trace_enabled - variant of ++ * kbase_trace_jit_report_gpu_mem() that should only be called once the ++ * corresponding tracepoint is verified to be enabled ++ * @kctx: kbase context ++ * @reg: Just-in-time memory region to trace ++ * @flags: combination of values from enum kbase_jit_report_flags ++ */ ++void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, ++ struct kbase_va_region *reg, unsigned int flags); ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+ kbase_mem_pool_free_page(pool, p); -+ pages[i] = as_tagged(0); -+ pages_released = true; -+ } ++/** ++ * kbase_trace_jit_report_gpu_mem - Trace information about the GPU memory used ++ * to make a JIT report ++ * @kctx: kbase context ++ * @reg: Just-in-time memory region to trace ++ * @flags: combination of values from enum kbase_jit_report_flags ++ * ++ * Information is traced using the trace_mali_jit_report_gpu_mem() tracepoint. ++ * ++ * In case that tracepoint is not enabled, this function should have the same ++ * low overheads as a tracepoint itself (i.e. use of 'jump labels' to avoid ++ * conditional branches) ++ * ++ * This can take the reg_lock on @kctx, do not use in places where this lock is ++ * already held. ++ * ++ * Note: this has to be a macro because at this stage the tracepoints have not ++ * been included. Also gives no opportunity for the compiler to mess up ++ * inlining it. ++ */ ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ ++ do { \ ++ if (trace_mali_jit_report_gpu_mem_enabled()) \ ++ kbase_trace_jit_report_gpu_mem_trace_enabled( \ ++ (kctx), (reg), (flags)); \ ++ } while (0) ++#else ++#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ ++ CSTD_NOP(kctx, reg, flags) ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+ /* Freeing of pages will be deferred when page migration is enabled. */ -+ if (pages_released) -+ enqueue_free_pool_pages_work(pool); ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++/** ++ * kbase_jit_report_update_pressure - safely update the JIT physical page ++ * pressure and JIT region's estimate of used_pages ++ * @kctx: kbase context, to update the current physical pressure ++ * @reg: Just-in-time memory region to update with @new_used_pages ++ * @new_used_pages: new value of number of pages used in the JIT region ++ * @flags: combination of values from enum kbase_jit_report_flags ++ * ++ * Takes care of: ++ * - correctly updating the pressure given the current reg->used_pages and ++ * new_used_pages ++ * - then updating the %kbase_va_region used_pages member ++ * ++ * Precondition: ++ * - new_used_pages <= reg->nr_pages ++ */ ++void kbase_jit_report_update_pressure(struct kbase_context *kctx, ++ struct kbase_va_region *reg, u64 new_used_pages, ++ unsigned int flags); + -+ pool_dbg(pool, "free_pages_locked(%zu) done\n", nr_pages); -+} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.c -new file mode 100644 -index 000000000..3b1b2bae1 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.c -@@ -0,0 +1,184 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* ++/** ++ * kbase_jit_trim_necessary_pages() - calculate and trim the least pages ++ * possible to satisfy a new JIT allocation + * -+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. ++ * @kctx: Pointer to the kbase context ++ * @needed_pages: Number of JIT physical pages by which trimming is requested. ++ * The actual number of pages trimmed could differ. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * Before allocating a new just-in-time memory region or reusing a previous ++ * one, ensure that the total JIT physical page usage also will not exceed the ++ * pressure limit. + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * If there are no reported-on allocations, then we already guarantee this will ++ * be the case - because our current pressure then only comes from the va_pages ++ * of each JIT region, hence JIT physical page usage is guaranteed to be ++ * bounded by this. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * However as soon as JIT allocations become "reported on", the pressure is ++ * lowered to allow new JIT regions to be allocated. It is after such a point ++ * that the total JIT physical page usage could (either now or in the future on ++ * a grow-on-GPU-page-fault) exceed the pressure limit, but only on newly ++ * allocated JIT regions. Hence, trim any "reported on" regions. + * ++ * Any pages freed will go into the pool and be allocated from there in ++ * kbase_mem_alloc(). + */ ++void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, ++ size_t needed_pages); + -+#include -+#include -+ -+#include "mali_kbase_mem_pool_debugfs.h" -+#include "mali_kbase_debugfs_helper.h" -+ -+void kbase_mem_pool_debugfs_trim(void *const array, size_t const index, -+ size_t const value) ++/* ++ * Same as kbase_jit_request_phys_increase(), except that Caller is supposed ++ * to take jit_evict_lock also on @kctx before calling this function. ++ */ ++static inline void ++kbase_jit_request_phys_increase_locked(struct kbase_context *kctx, ++ size_t needed_pages) +{ -+ struct kbase_mem_pool *const mem_pools = array; ++#if !MALI_USE_CSF ++ lockdep_assert_held(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ ++ lockdep_assert_held(&kctx->reg_lock); ++ lockdep_assert_held(&kctx->jit_evict_lock); + -+ if (WARN_ON(!mem_pools) || -+ WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) -+ return; ++ kctx->jit_phys_pages_to_be_allocated += needed_pages; + -+ kbase_mem_pool_trim(&mem_pools[index], value); ++ kbase_jit_trim_necessary_pages(kctx, ++ kctx->jit_phys_pages_to_be_allocated); +} + -+void kbase_mem_pool_debugfs_set_max_size(void *const array, -+ size_t const index, size_t const value) ++/** ++ * kbase_jit_request_phys_increase() - Increment the backing pages count and do ++ * the required trimming before allocating pages for a JIT allocation. ++ * ++ * @kctx: Pointer to the kbase context ++ * @needed_pages: Number of pages to be allocated for the JIT allocation. ++ * ++ * This function needs to be called before allocating backing pages for a ++ * just-in-time memory region. The backing pages are currently allocated when, ++ * ++ * - A new JIT region is created. ++ * - An old JIT region is reused from the cached pool. ++ * - GPU page fault occurs for the active JIT region. ++ * - Backing is grown for the JIT region through the commit ioctl. ++ * ++ * This function would ensure that the total JIT physical page usage does not ++ * exceed the pressure limit even when the backing pages get allocated ++ * simultaneously for multiple JIT allocations from different threads. ++ * ++ * There should be a matching call to kbase_jit_done_phys_increase(), after ++ * the pages have been allocated and accounted against the active JIT ++ * allocation. ++ * ++ * Caller is supposed to take reg_lock on @kctx before calling this function. ++ */ ++static inline void kbase_jit_request_phys_increase(struct kbase_context *kctx, ++ size_t needed_pages) +{ -+ struct kbase_mem_pool *const mem_pools = array; -+ -+ if (WARN_ON(!mem_pools) || -+ WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) -+ return; ++#if !MALI_USE_CSF ++ lockdep_assert_held(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ ++ lockdep_assert_held(&kctx->reg_lock); + -+ kbase_mem_pool_set_max_size(&mem_pools[index], value); ++ mutex_lock(&kctx->jit_evict_lock); ++ kbase_jit_request_phys_increase_locked(kctx, needed_pages); ++ mutex_unlock(&kctx->jit_evict_lock); +} + -+size_t kbase_mem_pool_debugfs_size(void *const array, size_t const index) ++/** ++ * kbase_jit_done_phys_increase() - Decrement the backing pages count after the ++ * allocation of pages for a JIT allocation. ++ * ++ * @kctx: Pointer to the kbase context ++ * @needed_pages: Number of pages that were allocated for the JIT allocation. ++ * ++ * This function should be called after backing pages have been allocated and ++ * accounted against the active JIT allocation. ++ * The call should be made when the following have been satisfied: ++ * when the allocation is on the jit_active_head. ++ * when additional needed_pages have been allocated. ++ * kctx->reg_lock was held during the above and has not yet been unlocked. ++ * Failure to call this function before unlocking the kctx->reg_lock when ++ * either the above have changed may result in over-accounting the memory. ++ * This ensures kbase_jit_trim_necessary_pages() gets a consistent count of ++ * the memory. ++ * ++ * A matching call to kbase_jit_request_phys_increase() should have been made, ++ * before the allocation of backing pages. ++ * ++ * Caller is supposed to take reg_lock on @kctx before calling this function. ++ */ ++static inline void kbase_jit_done_phys_increase(struct kbase_context *kctx, ++ size_t needed_pages) +{ -+ struct kbase_mem_pool *const mem_pools = array; ++ lockdep_assert_held(&kctx->reg_lock); + -+ if (WARN_ON(!mem_pools) || -+ WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) -+ return 0; ++ WARN_ON(kctx->jit_phys_pages_to_be_allocated < needed_pages); + -+ return kbase_mem_pool_size(&mem_pools[index]); ++ kctx->jit_phys_pages_to_be_allocated -= needed_pages; +} ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+size_t kbase_mem_pool_debugfs_max_size(void *const array, size_t const index) -+{ -+ struct kbase_mem_pool *const mem_pools = array; -+ -+ if (WARN_ON(!mem_pools) || -+ WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) -+ return 0; ++/** ++ * kbase_has_exec_va_zone - EXEC_VA zone predicate ++ * ++ * @kctx: kbase context ++ * ++ * Determine whether an EXEC_VA zone has been created for the GPU address space ++ * of the given kbase context. ++ * ++ * Return: True if the kbase context has an EXEC_VA zone. ++ */ ++bool kbase_has_exec_va_zone(struct kbase_context *kctx); + -+ return kbase_mem_pool_max_size(&mem_pools[index]); -+} ++/** ++ * kbase_map_external_resource - Map an external resource to the GPU. ++ * @kctx: kbase context. ++ * @reg: External resource to map. ++ * @locked_mm: The mm_struct which has been locked for this operation. ++ * ++ * On successful mapping, the VA region and the gpu_alloc refcounts will be ++ * increased, making it safe to use and store both values directly. ++ * ++ * Return: Zero on success, or negative error code. ++ */ ++int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg, ++ struct mm_struct *locked_mm); + -+void kbase_mem_pool_config_debugfs_set_max_size(void *const array, -+ size_t const index, size_t const value) -+{ -+ struct kbase_mem_pool_config *const configs = array; ++/** ++ * kbase_unmap_external_resource - Unmap an external resource from the GPU. ++ * @kctx: kbase context. ++ * @reg: VA region corresponding to external resource ++ * ++ * On successful unmapping, the VA region and the gpu_alloc refcounts will ++ * be decreased. If the refcount reaches zero, both @reg and the corresponding ++ * allocation may be freed, so using them after returning from this function ++ * requires the caller to explicitly check their state. ++ */ ++void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg); + -+ if (WARN_ON(!configs) || -+ WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) -+ return; ++/** ++ * kbase_jd_user_buf_pin_pages - Pin the pages of a user buffer. ++ * @kctx: kbase context. ++ * @reg: The region associated with the imported user buffer. ++ * ++ * To successfully pin the pages for a user buffer the current mm_struct must ++ * be the same as the mm_struct of the user buffer. After successfully pinning ++ * the pages further calls to this function succeed without doing work. ++ * ++ * Return: zero on success or negative number on failure. ++ */ ++int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, ++ struct kbase_va_region *reg); + -+ kbase_mem_pool_config_set_max_size(&configs[index], value); -+} ++/** ++ * kbase_sticky_resource_init - Initialize sticky resource management. ++ * @kctx: kbase context ++ * ++ * Return: zero on success or negative error number on failure. ++ */ ++int kbase_sticky_resource_init(struct kbase_context *kctx); + -+size_t kbase_mem_pool_config_debugfs_max_size(void *const array, -+ size_t const index) -+{ -+ struct kbase_mem_pool_config *const configs = array; ++/** ++ * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource. ++ * @kctx: kbase context. ++ * @gpu_addr: The GPU address of the external resource. ++ * ++ * Return: The metadata object which represents the binding between the ++ * external resource and the kbase context on success or NULL on failure. ++ */ ++struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( ++ struct kbase_context *kctx, u64 gpu_addr); + -+ if (WARN_ON(!configs) || -+ WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) -+ return 0; ++/** ++ * kbase_sticky_resource_release - Release a reference on a sticky resource. ++ * @kctx: kbase context. ++ * @meta: Binding metadata. ++ * @gpu_addr: GPU address of the external resource. ++ * ++ * If meta is NULL then gpu_addr will be used to scan the metadata list and ++ * find the matching metadata (if any), otherwise the provided meta will be ++ * used and gpu_addr will be ignored. ++ * ++ * Return: True if the release found the metadata and the reference was dropped. ++ */ ++bool kbase_sticky_resource_release(struct kbase_context *kctx, ++ struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr); + -+ return kbase_mem_pool_config_get_max_size(&configs[index]); -+} ++/** ++ * kbase_sticky_resource_release_force - Release a sticky resource. ++ * @kctx: kbase context. ++ * @meta: Binding metadata. ++ * @gpu_addr: GPU address of the external resource. ++ * ++ * If meta is NULL then gpu_addr will be used to scan the metadata list and ++ * find the matching metadata (if any), otherwise the provided meta will be ++ * used and gpu_addr will be ignored. ++ * ++ * Return: True if the release found the metadata and the resource was ++ * released. ++ */ ++bool kbase_sticky_resource_release_force(struct kbase_context *kctx, ++ struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr); + -+static int kbase_mem_pool_debugfs_size_show(struct seq_file *sfile, void *data) -+{ -+ CSTD_UNUSED(data); -+ return kbase_debugfs_helper_seq_read(sfile, -+ MEMORY_GROUP_MANAGER_NR_GROUPS, kbase_mem_pool_debugfs_size); -+} ++/** ++ * kbase_sticky_resource_term - Terminate sticky resource management. ++ * @kctx: kbase context ++ */ ++void kbase_sticky_resource_term(struct kbase_context *kctx); + -+static ssize_t kbase_mem_pool_debugfs_write(struct file *file, -+ const char __user *ubuf, size_t count, loff_t *ppos) ++/** ++ * kbase_mem_pool_lock - Lock a memory pool ++ * @pool: Memory pool to lock ++ */ ++static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool) +{ -+ int err; -+ -+ CSTD_UNUSED(ppos); -+ err = kbase_debugfs_helper_seq_write(file, ubuf, count, -+ MEMORY_GROUP_MANAGER_NR_GROUPS, kbase_mem_pool_debugfs_trim); -+ return err ? err : count; ++ spin_lock(&pool->pool_lock); +} + -+static int kbase_mem_pool_debugfs_open(struct inode *in, struct file *file) ++/** ++ * kbase_mem_pool_unlock - Release a memory pool ++ * @pool: Memory pool to lock ++ */ ++static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool) +{ -+ return single_open(file, kbase_mem_pool_debugfs_size_show, -+ in->i_private); ++ spin_unlock(&pool->pool_lock); +} + -+static const struct file_operations kbase_mem_pool_debugfs_fops = { -+ .owner = THIS_MODULE, -+ .open = kbase_mem_pool_debugfs_open, -+ .read = seq_read, -+ .write = kbase_mem_pool_debugfs_write, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; -+ -+static int kbase_mem_pool_debugfs_max_size_show(struct seq_file *sfile, -+ void *data) -+{ -+ CSTD_UNUSED(data); -+ return kbase_debugfs_helper_seq_read(sfile, -+ MEMORY_GROUP_MANAGER_NR_GROUPS, -+ kbase_mem_pool_debugfs_max_size); -+} ++/** ++ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable. ++ * @alloc: The physical allocation ++ */ ++void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc); + -+static ssize_t kbase_mem_pool_debugfs_max_size_write(struct file *file, -+ const char __user *ubuf, size_t count, loff_t *ppos) ++#if MALI_USE_CSF ++/** ++ * kbase_link_event_mem_page - Add the new event memory region to the per ++ * context list of event pages. ++ * @kctx: Pointer to kbase context ++ * @reg: Pointer to the region allocated for event memory. ++ * ++ * The region being linked shouldn't have been marked as free and should ++ * have KBASE_REG_CSF_EVENT flag set for it. ++ */ ++static inline void kbase_link_event_mem_page(struct kbase_context *kctx, ++ struct kbase_va_region *reg) +{ -+ int err; ++ lockdep_assert_held(&kctx->reg_lock); + -+ CSTD_UNUSED(ppos); -+ err = kbase_debugfs_helper_seq_write(file, ubuf, count, -+ MEMORY_GROUP_MANAGER_NR_GROUPS, -+ kbase_mem_pool_debugfs_set_max_size); -+ return err ? err : count; -+} ++ WARN_ON(reg->flags & KBASE_REG_FREE); ++ WARN_ON(!(reg->flags & KBASE_REG_CSF_EVENT)); + -+static int kbase_mem_pool_debugfs_max_size_open(struct inode *in, -+ struct file *file) -+{ -+ return single_open(file, kbase_mem_pool_debugfs_max_size_show, -+ in->i_private); ++ list_add(®->link, &kctx->csf.event_pages_head); +} + -+static const struct file_operations kbase_mem_pool_debugfs_max_size_fops = { -+ .owner = THIS_MODULE, -+ .open = kbase_mem_pool_debugfs_max_size_open, -+ .read = seq_read, -+ .write = kbase_mem_pool_debugfs_max_size_write, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; -+ -+void kbase_mem_pool_debugfs_init(struct dentry *parent, -+ struct kbase_context *kctx) -+{ -+ const mode_t mode = 0644; -+ -+ debugfs_create_file("mem_pool_size", mode, parent, -+ &kctx->mem_pools.small, &kbase_mem_pool_debugfs_fops); -+ -+ debugfs_create_file("mem_pool_max_size", mode, parent, -+ &kctx->mem_pools.small, &kbase_mem_pool_debugfs_max_size_fops); -+ -+ debugfs_create_file("lp_mem_pool_size", mode, parent, -+ &kctx->mem_pools.large, &kbase_mem_pool_debugfs_fops); -+ -+ debugfs_create_file("lp_mem_pool_max_size", mode, parent, -+ &kctx->mem_pools.large, &kbase_mem_pool_debugfs_max_size_fops); -+} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.h -new file mode 100644 -index 000000000..207b58536 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.h -@@ -0,0 +1,122 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++/** ++ * kbase_unlink_event_mem_page - Remove the event memory region from the per ++ * context list of event pages. ++ * @kctx: Pointer to kbase context ++ * @reg: Pointer to the region allocated for event memory. + * ++ * The region being un-linked shouldn't have been marked as free and should ++ * have KBASE_REG_CSF_EVENT flag set for it. + */ ++static inline void kbase_unlink_event_mem_page(struct kbase_context *kctx, ++ struct kbase_va_region *reg) ++{ ++ lockdep_assert_held(&kctx->reg_lock); + -+#ifndef _KBASE_MEM_POOL_DEBUGFS_H_ -+#define _KBASE_MEM_POOL_DEBUGFS_H_ ++ WARN_ON(reg->flags & KBASE_REG_FREE); ++ WARN_ON(!(reg->flags & KBASE_REG_CSF_EVENT)); + -+#include ++ list_del(®->link); ++} + +/** -+ * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool -+ * @parent: Parent debugfs dentry -+ * @kctx: The kbase context ++ * kbase_mcu_shared_interface_region_tracker_init - Initialize the rb tree to ++ * manage the shared interface segment of MCU firmware address space. ++ * @kbdev: Pointer to the kbase device + * -+ * Adds four debugfs files under @parent: -+ * - mem_pool_size: get/set the current sizes of @kctx: mem_pools -+ * - mem_pool_max_size: get/set the max sizes of @kctx: mem_pools -+ * - lp_mem_pool_size: get/set the current sizes of @kctx: lp_mem_pool -+ * - lp_mem_pool_max_size: get/set the max sizes of @kctx:lp_mem_pool ++ * Return: zero on success or negative error number on failure. + */ -+void kbase_mem_pool_debugfs_init(struct dentry *parent, -+ struct kbase_context *kctx); ++int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev); + +/** -+ * kbase_mem_pool_debugfs_trim - Grow or shrink a memory pool to a new size -+ * -+ * @array: Address of the first in an array of physical memory pools. -+ * @index: A memory group ID to be used as an index into the array of memory -+ * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). -+ * @value: New number of pages in the pool. -+ * -+ * If @value > current size, fill the pool with new pages from the kernel, but -+ * not above the max_size for the pool. -+ * If @value < current size, shrink the pool by freeing pages to the kernel. ++ * kbase_mcu_shared_interface_region_tracker_term - Teardown the rb tree ++ * managing the shared interface segment of MCU firmware address space. ++ * @kbdev: Pointer to the kbase device + */ -+void kbase_mem_pool_debugfs_trim(void *array, size_t index, size_t value); ++void kbase_mcu_shared_interface_region_tracker_term(struct kbase_device *kbdev); ++#endif + +/** -+ * kbase_mem_pool_debugfs_set_max_size - Set maximum number of free pages in -+ * memory pool ++ * kbase_mem_umm_map - Map dma-buf ++ * @kctx: Pointer to the kbase context ++ * @reg: Pointer to the region of the imported dma-buf to map + * -+ * @array: Address of the first in an array of physical memory pools. -+ * @index: A memory group ID to be used as an index into the array of memory -+ * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). -+ * @value: Maximum number of free pages the pool can hold. ++ * Map a dma-buf on the GPU. The mappings are reference counted. + * -+ * If the maximum size is reduced, the pool will be shrunk to adhere to the -+ * new limit. For details see kbase_mem_pool_shrink(). ++ * Return: 0 on success, or a negative error code. + */ -+void kbase_mem_pool_debugfs_set_max_size(void *array, size_t index, -+ size_t value); ++int kbase_mem_umm_map(struct kbase_context *kctx, ++ struct kbase_va_region *reg); + +/** -+ * kbase_mem_pool_debugfs_size - Get number of free pages in a memory pool ++ * kbase_mem_umm_unmap - Unmap dma-buf ++ * @kctx: Pointer to the kbase context ++ * @reg: Pointer to the region of the imported dma-buf to unmap ++ * @alloc: Pointer to the alloc to release + * -+ * @array: Address of the first in an array of physical memory pools. -+ * @index: A memory group ID to be used as an index into the array of memory -+ * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * Unmap a dma-buf from the GPU. The mappings are reference counted. + * -+ * Note: the size of the pool may in certain corner cases exceed @max_size! ++ * @reg must be the original region with GPU mapping of @alloc; or NULL. If ++ * @reg is NULL, or doesn't match @alloc, the GPU page table entries matching ++ * @reg will not be updated. + * -+ * Return: Number of free pages in the pool ++ * @alloc must be a valid physical allocation of type ++ * KBASE_MEM_TYPE_IMPORTED_UMM that was previously mapped by ++ * kbase_mem_umm_map(). The dma-buf attachment referenced by @alloc will ++ * release it's mapping reference, and if the refcount reaches 0, also be ++ * unmapped, regardless of the value of @reg. + */ -+size_t kbase_mem_pool_debugfs_size(void *array, size_t index); ++void kbase_mem_umm_unmap(struct kbase_context *kctx, ++ struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc); + +/** -+ * kbase_mem_pool_debugfs_max_size - Get maximum number of free pages in a -+ * memory pool -+ * -+ * @array: Address of the first in an array of physical memory pools. -+ * @index: A memory group ID to be used as an index into the array of memory -+ * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * kbase_mem_do_sync_imported - Sync caches for imported memory ++ * @kctx: Pointer to the kbase context ++ * @reg: Pointer to the region with imported memory to sync ++ * @sync_fn: The type of sync operation to perform + * -+ * Return: Maximum number of free pages in the pool -+ */ -+size_t kbase_mem_pool_debugfs_max_size(void *array, size_t index); -+ -+/** -+ * kbase_mem_pool_config_debugfs_set_max_size - Set maximum number of free pages -+ * in initial configuration of pool ++ * Sync CPU caches for supported (currently only dma-buf (UMM)) memory. ++ * Attempting to sync unsupported imported memory types will result in an error ++ * code, -EINVAL. + * -+ * @array: Array of initial configurations for a set of physical memory pools. -+ * @index: A memory group ID to be used as an index into the array. -+ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). -+ * @value : Maximum number of free pages that a memory pool created from the -+ * selected configuration can hold. ++ * Return: 0 on success, or a negative error code. + */ -+void kbase_mem_pool_config_debugfs_set_max_size(void *array, size_t index, -+ size_t value); ++int kbase_mem_do_sync_imported(struct kbase_context *kctx, ++ struct kbase_va_region *reg, enum kbase_sync_type sync_fn); + +/** -+ * kbase_mem_pool_config_debugfs_max_size - Get maximum number of free pages -+ * from initial configuration of pool ++ * kbase_mem_copy_to_pinned_user_pages - Memcpy from source input page to ++ * an unaligned address at a given offset from the start of a target page. + * -+ * @array: Array of initial configurations for a set of physical memory pools. -+ * @index: A memory group ID to be used as an index into the array. -+ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * @dest_pages: Pointer to the array of pages to which the content is ++ * to be copied from the provided @src_page. ++ * @src_page: Pointer to the page which correspond to the source page ++ * from which the copying will take place. ++ * @to_copy: Total number of bytes pending to be copied from ++ * @src_page to @target_page_nr within @dest_pages. ++ * This will get decremented by number of bytes we ++ * managed to copy from source page to target pages. ++ * @nr_pages: Total number of pages present in @dest_pages. ++ * @target_page_nr: Target page number to which @src_page needs to be ++ * copied. This will get incremented by one if ++ * we are successful in copying from source page. ++ * @offset: Offset in bytes into the target pages from which the ++ * copying is to be performed. + * -+ * Return: Maximum number of free pages that a memory pool created from the -+ * selected configuration can hold. ++ * Return: 0 on success, or a negative error code. + */ -+size_t kbase_mem_pool_config_debugfs_max_size(void *array, size_t index); -+ -+#endif /*_KBASE_MEM_POOL_DEBUGFS_H_ */ ++int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, ++ void *src_page, size_t *to_copy, unsigned int nr_pages, ++ unsigned int *target_page_nr, size_t offset); + -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.c -new file mode 100644 -index 000000000..49c4b041e ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.c -@@ -0,0 +1,107 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++/** ++ * kbase_reg_zone_end_pfn - return the end Page Frame Number of @zone ++ * @zone: zone to query + * ++ * Return: The end of the zone corresponding to @zone + */ -+ -+#include -+#include -+#include -+ -+#include -+ -+void kbase_mem_pool_group_config_set_max_size( -+ struct kbase_mem_pool_group_config *const configs, -+ size_t const max_size) ++static inline u64 kbase_reg_zone_end_pfn(struct kbase_reg_zone *zone) +{ -+ size_t const large_max_size = max_size >> -+ (KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER - -+ KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER); -+ int gid; -+ -+ for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { -+ kbase_mem_pool_config_set_max_size(&configs->small[gid], -+ max_size); -+ -+ kbase_mem_pool_config_set_max_size(&configs->large[gid], -+ large_max_size); -+ } ++ return zone->base_pfn + zone->va_size_pages; +} + -+int kbase_mem_pool_group_init(struct kbase_mem_pool_group *const mem_pools, -+ struct kbase_device *const kbdev, -+ const struct kbase_mem_pool_group_config *const configs, -+ struct kbase_mem_pool_group *next_pools) ++/** ++ * kbase_ctx_reg_zone_init - initialize a zone in @kctx ++ * @kctx: Pointer to kbase context ++ * @zone_bits: A KBASE_REG_ZONE_<...> to initialize ++ * @base_pfn: Page Frame Number in GPU virtual address space for the start of ++ * the Zone ++ * @va_size_pages: Size of the Zone in pages ++ */ ++static inline void kbase_ctx_reg_zone_init(struct kbase_context *kctx, ++ unsigned long zone_bits, ++ u64 base_pfn, u64 va_size_pages) +{ -+ int gid, err = 0; -+ -+ for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { -+ err = kbase_mem_pool_init(&mem_pools->small[gid], &configs->small[gid], -+ KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER, gid, kbdev, -+ next_pools ? &next_pools->small[gid] : NULL); -+ -+ if (!err) { -+ err = kbase_mem_pool_init(&mem_pools->large[gid], &configs->large[gid], -+ KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER, gid, kbdev, -+ next_pools ? &next_pools->large[gid] : NULL); -+ if (err) -+ kbase_mem_pool_term(&mem_pools->small[gid]); -+ } -+ -+ /* Break out of the loop early to avoid incrementing the count -+ * of memory pool pairs successfully initialized. -+ */ -+ if (err) -+ break; -+ } -+ -+ if (err) { -+ /* gid gives the number of memory pool pairs successfully -+ * initialized, which is one greater than the array index of the -+ * last group. -+ */ -+ while (gid-- > 0) { -+ kbase_mem_pool_term(&mem_pools->small[gid]); -+ kbase_mem_pool_term(&mem_pools->large[gid]); -+ } -+ } -+ -+ return err; -+} ++ struct kbase_reg_zone *zone; + -+void kbase_mem_pool_group_mark_dying( -+ struct kbase_mem_pool_group *const mem_pools) -+{ -+ int gid; ++ lockdep_assert_held(&kctx->reg_lock); ++ WARN_ON(!kbase_is_ctx_reg_zone(zone_bits)); + -+ for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { -+ kbase_mem_pool_mark_dying(&mem_pools->small[gid]); -+ kbase_mem_pool_mark_dying(&mem_pools->large[gid]); -+ } ++ zone = &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; ++ *zone = (struct kbase_reg_zone){ ++ .base_pfn = base_pfn, .va_size_pages = va_size_pages, ++ }; +} + -+void kbase_mem_pool_group_term( -+ struct kbase_mem_pool_group *const mem_pools) -+{ -+ int gid; -+ -+ for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { -+ kbase_mem_pool_term(&mem_pools->small[gid]); -+ kbase_mem_pool_term(&mem_pools->large[gid]); -+ } -+} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.h -new file mode 100644 -index 000000000..fe8ce7752 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.h -@@ -0,0 +1,114 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++/** ++ * kbase_ctx_reg_zone_get_nolock - get a zone from @kctx where the caller does ++ * not have @kctx 's region lock ++ * @kctx: Pointer to kbase context ++ * @zone_bits: A KBASE_REG_ZONE_<...> to retrieve + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * This should only be used in performance-critical paths where the code is ++ * resilient to a race with the zone changing. + * ++ * Return: The zone corresponding to @zone_bits + */ ++static inline struct kbase_reg_zone * ++kbase_ctx_reg_zone_get_nolock(struct kbase_context *kctx, ++ unsigned long zone_bits) ++{ ++ WARN_ON(!kbase_is_ctx_reg_zone(zone_bits)); + -+#ifndef _KBASE_MEM_POOL_GROUP_H_ -+#define _KBASE_MEM_POOL_GROUP_H_ -+ -+#include ++ return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; ++} + +/** -+ * kbase_mem_pool_group_select() - Select the memory pool to use. ++ * kbase_ctx_reg_zone_get - get a zone from @kctx ++ * @kctx: Pointer to kbase context ++ * @zone_bits: A KBASE_REG_ZONE_<...> to retrieve + * -+ * @kbdev: Device pointer. -+ * @mem_group_id: Physical memory group ID to use. -+ * @is_small_page: Flag used to select between the small and -+ * large memory pool. ++ * The get is not refcounted - there is no corresponding 'put' operation + * -+ * Return: A pointer to the selected memory pool. ++ * Return: The zone corresponding to @zone_bits + */ -+static inline struct kbase_mem_pool *kbase_mem_pool_group_select( -+ struct kbase_device *kbdev, u32 mem_group_id, bool is_small_page) ++static inline struct kbase_reg_zone * ++kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits) +{ -+ if (WARN_ON(unlikely(kbdev == NULL))) -+ return NULL; -+ -+ WARN_ON(mem_group_id > BASE_MEM_GROUP_COUNT); -+ -+ if (is_small_page) -+ return &kbdev->mem_pools.small[mem_group_id]; ++ lockdep_assert_held(&kctx->reg_lock); ++ WARN_ON(!kbase_is_ctx_reg_zone(zone_bits)); + -+ return &kbdev->mem_pools.large[mem_group_id]; ++ return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; +} + +/** -+ * kbase_mem_pool_group_config_set_max_size - Set the initial configuration for -+ * a set of memory pools ++ * kbase_mem_allow_alloc - Check if allocation of GPU memory is allowed ++ * @kctx: Pointer to kbase context + * -+ * @configs: Initial configuration for the set of memory pools -+ * @max_size: Maximum number of free 4 KiB pages each pool can hold ++ * Don't allow the allocation of GPU memory if the ioctl has been issued ++ * from the forked child process using the mali device file fd inherited from ++ * the parent process. + * -+ * This function sets the initial configuration for every memory pool so that -+ * the maximum amount of free memory that each pool can hold is identical. -+ * The equivalent number of 2 MiB pages is calculated automatically for the -+ * purpose of configuring the large page pools. ++ * Return: true if allocation is allowed. + */ -+void kbase_mem_pool_group_config_set_max_size( -+ struct kbase_mem_pool_group_config *configs, size_t max_size); ++static inline bool kbase_mem_allow_alloc(struct kbase_context *kctx) ++{ ++ return (kctx->process_mm == current->mm); ++} + +/** -+ * kbase_mem_pool_group_init - Initialize a set of memory pools -+ * -+ * @mem_pools: Set of memory pools to initialize -+ * @kbdev: Kbase device where memory is used -+ * @configs: Initial configuration for the set of memory pools -+ * @next_pools: Set of memory pools from which to allocate memory if there -+ * is no free memory in one of the @mem_pools -+ * -+ * Initializes a complete set of physical memory pools. Memory pools are used to -+ * allow efficient reallocation of previously-freed physical pages. A pair of -+ * memory pools is initialized for each physical memory group: one for 4 KiB -+ * pages and one for 2 MiB pages. -+ * -+ * If @next_pools is not NULL then a request to allocate memory from an -+ * empty pool in @mem_pools will attempt to allocate from the equivalent pool -+ * in @next_pools before going to the memory group manager. Similarly -+ * pages can spill over to the equivalent pool in @next_pools when a pool -+ * is full in @mem_pools. Pages are zeroed before they spill over to another -+ * pool, to prevent leaking information between applications. -+ * -+ * Return: 0 on success, otherwise a negative error code ++ * kbase_mem_mmgrab - Wrapper function to take reference on mm_struct of current process + */ -+int kbase_mem_pool_group_init(struct kbase_mem_pool_group *mem_pools, struct kbase_device *kbdev, -+ const struct kbase_mem_pool_group_config *configs, -+ struct kbase_mem_pool_group *next_pools); ++static inline void kbase_mem_mmgrab(void) ++{ ++ /* This merely takes a reference on the memory descriptor structure ++ * i.e. mm_struct of current process and not on its address space and ++ * so won't block the freeing of address space on process exit. ++ */ ++#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE ++ atomic_inc(¤t->mm->mm_count); ++#else ++ mmgrab(current->mm); ++#endif ++} + +/** -+ * kbase_mem_pool_group_mark_dying - Mark a set of memory pools as dying ++ * kbase_mem_group_id_get - Get group ID from flags ++ * @flags: Flags to pass to base_mem_alloc + * -+ * @mem_pools: Set of memory pools to mark ++ * This inline function extracts the encoded group ID from flags ++ * and converts it into numeric value (0~15). + * -+ * Marks a complete set of physical memory pools previously initialized by -+ * @kbase_mem_pool_group_init as dying. This will cause any ongoing allocation -+ * operations (eg growing on page fault) to be terminated. ++ * Return: group ID(0~15) extracted from the parameter + */ -+void kbase_mem_pool_group_mark_dying(struct kbase_mem_pool_group *mem_pools); ++static inline int kbase_mem_group_id_get(base_mem_alloc_flags flags) ++{ ++ KBASE_DEBUG_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0); ++ return (int)BASE_MEM_GROUP_ID_GET(flags); ++} + +/** -+ * kbase_mem_pool_group_term - Terminate a set of memory pools ++ * kbase_mem_group_id_set - Set group ID into base_mem_alloc_flags ++ * @id: group ID(0~15) you want to encode + * -+ * @mem_pools: Set of memory pools to terminate ++ * This inline function encodes specific group ID into base_mem_alloc_flags. ++ * Parameter 'id' should lie in-between 0 to 15. + * -+ * Terminates a complete set of physical memory pools previously initialized by -+ * @kbase_mem_pool_group_init. ++ * Return: base_mem_alloc_flags with the group ID (id) encoded ++ * ++ * The return value can be combined with other flags against base_mem_alloc ++ * to identify a specific memory group. + */ -+void kbase_mem_pool_group_term(struct kbase_mem_pool_group *mem_pools); -+ -+#endif /* _KBASE_MEM_POOL_GROUP_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.c ++static inline base_mem_alloc_flags kbase_mem_group_id_set(int id) ++{ ++ return BASE_MEM_GROUP_ID_SET(id); ++} ++#endif /* _KBASE_MEM_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c new file mode 100644 -index 000000000..9317023b7 +index 000000000..ad0c17b63 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.c -@@ -0,0 +1,131 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c +@@ -0,0 +1,3818 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2012-2017, 2019-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -244454,4791 +246698,3813 @@ index 000000000..9317023b7 + * + */ + -+#include ++/** ++ * DOC: Base kernel memory APIs, Linux implementation. ++ */ + -+#if IS_ENABLED(CONFIG_DEBUG_FS) ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+/** -+ * kbasep_mem_profile_seq_show - Show callback for the @c mem_profile debugfs file. ++#if ((KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) || \ ++ (KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE)) ++/* Enable workaround for ion for kernels prior to v5.0.0 and from v5.3.0 ++ * onwards. + * -+ * @sfile: The debugfs entry -+ * @data: Data associated with the entry ++ * For kernels prior to v4.12, workaround is needed as ion lacks the cache ++ * maintenance in begin_cpu_access and end_cpu_access methods. + * -+ * This function is called to get the contents of the @c mem_profile debugfs -+ * file. This is a report of current memory usage and distribution in userspace. ++ * For kernels prior to v4.17.2, workaround is needed to avoid the potentially ++ * disruptive warnings which can come if begin_cpu_access and end_cpu_access ++ * methods are not called in pairs. ++ * Note that some long term maintenance kernel versions (e.g. 4.9.x, 4.14.x) ++ * only require this workaround on their earlier releases. However it is still ++ * safe to use it on such releases, and it simplifies the version check. + * -+ * Return: 0 if it successfully prints data in debugfs entry file, non-zero -+ * otherwise ++ * For kernels later than v4.17.2, workaround is needed as ion can potentially ++ * end up calling dma_sync_sg_for_* for a dma-buf importer that hasn't mapped ++ * the attachment. This would result in a kernel panic as ion populates the ++ * dma_address when the attachment is mapped and kernel derives the physical ++ * address for cache maintenance from the dma_address. ++ * With some multi-threaded tests it has been seen that the same dma-buf memory ++ * gets imported twice on Mali DDK side and so the problem of sync happening ++ * with an importer having an unmapped attachment comes at the time of 2nd ++ * import. The same problem can if there is another importer of dma-buf ++ * memory. ++ * ++ * Workaround can be safely disabled for kernels between v5.0.0 and v5.2.2, ++ * as all the above stated issues are not there. ++ * ++ * dma_sync_sg_for_* calls will be made directly as a workaround using the ++ * Kbase's attachment to dma-buf that was previously mapped. + */ -+static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data) -+{ -+ struct kbase_context *kctx = sfile->private; ++#define KBASE_MEM_ION_SYNC_WORKAROUND ++#endif + -+ mutex_lock(&kctx->mem_profile_lock); ++#define IR_THRESHOLD_STEPS (256u) + -+ seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size); ++#if MALI_USE_CSF ++static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, struct vm_area_struct *vma); ++static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, struct vm_area_struct *vma); ++#endif + -+ seq_putc(sfile, '\n'); ++static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg, ++ u64 offset_bytes, size_t size, struct kbase_vmap_struct *map, ++ kbase_vmap_flag vmap_flags); ++static void kbase_vunmap_phy_pages(struct kbase_context *kctx, ++ struct kbase_vmap_struct *map); + -+ mutex_unlock(&kctx->mem_profile_lock); ++static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); + -+ return 0; ++static bool is_process_exiting(struct vm_area_struct *vma) ++{ ++ /* PF_EXITING flag can't be reliably used here for the detection ++ * of process exit, as 'mm_users' counter could still be non-zero ++ * when all threads of the process have exited. Later when the ++ * thread (which took a reference on the 'mm' of process that ++ * exited) drops it reference, the vm_ops->close method would be ++ * called for all the vmas (owned by 'mm' of process that exited) ++ * but the PF_EXITING flag may not be neccessarily set for the ++ * thread at that time. ++ */ ++ if (atomic_read(&vma->vm_mm->mm_users)) ++ return false; ++ ++ return true; +} + -+/* -+ * File operations related to debugfs entry for mem_profile ++/* Retrieve the associated region pointer if the GPU address corresponds to ++ * one of the event memory pages. The enclosing region, if found, shouldn't ++ * have been marked as free. + */ -+static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file) ++static struct kbase_va_region *kbase_find_event_mem_region( ++ struct kbase_context *kctx, u64 gpu_addr) +{ -+ return single_open(file, kbasep_mem_profile_seq_show, in->i_private); -+} ++#if MALI_USE_CSF ++ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; ++ struct kbase_va_region *reg; + -+static const struct file_operations kbasep_mem_profile_debugfs_fops = { -+ .owner = THIS_MODULE, -+ .open = kbasep_mem_profile_debugfs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; -+ -+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, -+ size_t size) -+{ -+ const mode_t mode = 0444; -+ int err = 0; ++ lockdep_assert_held(&kctx->reg_lock); + -+ mutex_lock(&kctx->mem_profile_lock); ++ list_for_each_entry(reg, &kctx->csf.event_pages_head, link) { ++ if ((reg->start_pfn <= gpu_pfn) && ++ (gpu_pfn < (reg->start_pfn + reg->nr_pages))) { ++ if (WARN_ON(reg->flags & KBASE_REG_FREE)) ++ return NULL; + -+ dev_dbg(kctx->kbdev->dev, "initialised: %d", -+ kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); ++ if (WARN_ON(!(reg->flags & KBASE_REG_CSF_EVENT))) ++ return NULL; + -+ if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { -+ if (IS_ERR_OR_NULL(kctx->kctx_dentry)) { -+ err = -ENOMEM; -+ } else if (IS_ERR_OR_NULL(debugfs_create_file("mem_profile", -+ mode, kctx->kctx_dentry, kctx, -+ &kbasep_mem_profile_debugfs_fops))) { -+ err = -EAGAIN; -+ } else { -+ kbase_ctx_flag_set(kctx, -+ KCTX_MEM_PROFILE_INITIALIZED); ++ return reg; + } + } ++#endif + -+ if (kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { -+ kfree(kctx->mem_profile_data); -+ kctx->mem_profile_data = data; -+ kctx->mem_profile_size = size; -+ } else { -+ kfree(data); -+ } ++ return NULL; ++} + -+ dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d", -+ err, kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); ++/** ++ * kbase_phy_alloc_mapping_init - Initialize the kernel side permanent mapping ++ * of the physical allocation belonging to a ++ * region ++ * @kctx: The kernel base context @reg belongs to. ++ * @reg: The region whose physical allocation is to be mapped ++ * @vsize: The size of the requested region, in pages ++ * @size: The size in pages initially committed to the region ++ * ++ * Return: 0 on success, otherwise an error code indicating failure ++ * ++ * Maps the physical allocation backing a non-free @reg, so it may be ++ * accessed directly from the kernel. This is only supported for physical ++ * allocations of type KBASE_MEM_TYPE_NATIVE, and will fail for other types of ++ * physical allocation. ++ * ++ * The mapping is stored directly in the allocation that backs @reg. The ++ * refcount is not incremented at this point. Instead, use of the mapping should ++ * be surrounded by kbase_phy_alloc_mapping_get() and ++ * kbase_phy_alloc_mapping_put() to ensure it does not disappear whilst the ++ * client is accessing it. ++ * ++ * Both cached and uncached regions are allowed, but any sync operations are the ++ * responsibility of the client using the permanent mapping. ++ * ++ * A number of checks are made to ensure that a region that needs a permanent ++ * mapping can actually be supported: ++ * - The region must be created as fully backed ++ * - The region must not be growable ++ * ++ * This function will fail if those checks are not satisfied. ++ * ++ * On success, the region will also be forced into a certain kind: ++ * - It will no longer be growable ++ */ ++static int kbase_phy_alloc_mapping_init(struct kbase_context *kctx, ++ struct kbase_va_region *reg, size_t vsize, size_t size) ++{ ++ size_t size_bytes = (size << PAGE_SHIFT); ++ struct kbase_vmap_struct *kern_mapping; ++ int err = 0; + -+ mutex_unlock(&kctx->mem_profile_lock); ++ /* Can only map in regions that are always fully committed ++ * Don't setup the mapping twice ++ * Only support KBASE_MEM_TYPE_NATIVE allocations ++ */ ++ if (vsize != size || reg->cpu_alloc->permanent_map != NULL || ++ reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) ++ return -EINVAL; + -+ return err; -+} ++ kern_mapping = kzalloc(sizeof(*kern_mapping), GFP_KERNEL); ++ if (!kern_mapping) ++ return -ENOMEM; + -+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx) -+{ -+ mutex_lock(&kctx->mem_profile_lock); ++ err = kbase_vmap_phy_pages(kctx, reg, 0u, size_bytes, kern_mapping, ++ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING); ++ if (err < 0) ++ goto vmap_fail; + -+ dev_dbg(kctx->kbdev->dev, "initialised: %d", -+ kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); ++ /* No support for growing or shrinking mapped regions */ ++ reg->flags &= ~KBASE_REG_GROWABLE; + -+ kfree(kctx->mem_profile_data); -+ kctx->mem_profile_data = NULL; -+ kctx->mem_profile_size = 0; ++ reg->cpu_alloc->permanent_map = kern_mapping; + -+ mutex_unlock(&kctx->mem_profile_lock); ++ return 0; ++vmap_fail: ++ kfree(kern_mapping); ++ return err; +} + -+#else /* CONFIG_DEBUG_FS */ -+ -+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, -+ size_t size) ++void kbase_phy_alloc_mapping_term(struct kbase_context *kctx, ++ struct kbase_mem_phy_alloc *alloc) +{ -+ kfree(data); -+ return 0; ++ WARN_ON(!alloc->permanent_map); ++ kbase_vunmap_phy_pages(kctx, alloc->permanent_map); ++ kfree(alloc->permanent_map); ++ ++ alloc->permanent_map = NULL; +} -+#endif /* CONFIG_DEBUG_FS */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.h -new file mode 100644 -index 000000000..c30fca665 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.h -@@ -0,0 +1,64 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2012-2016, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ + -+/** -+ * DOC: Header file for mem profiles entries in debugfs -+ * -+ */ ++void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, ++ u64 gpu_addr, ++ struct kbase_vmap_struct **out_kern_mapping) ++{ ++ struct kbase_va_region *reg; ++ void *kern_mem_ptr = NULL; ++ struct kbase_vmap_struct *kern_mapping; ++ u64 mapping_offset; + -+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H -+#define _KBASE_MEM_PROFILE_DEBUGFS_H ++ WARN_ON(!kctx); ++ WARN_ON(!out_kern_mapping); + -+#include -+#include ++ kbase_gpu_vm_lock(kctx); + -+/** -+ * kbasep_mem_profile_debugfs_remove - Remove entry from Mali memory profile debugfs -+ * -+ * @kctx: The context whose debugfs file @p data should be removed from -+ */ -+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx); ++ /* First do a quick lookup in the list of event memory regions */ ++ reg = kbase_find_event_mem_region(kctx, gpu_addr); + -+/** -+ * kbasep_mem_profile_debugfs_insert - Insert @p data to the debugfs file -+ * so it can be read by userspace -+ * -+ * @kctx: The context whose debugfs file @p data should be inserted to -+ * @data: A NULL-terminated string to be inserted to the debugfs file, -+ * without the trailing new line character -+ * @size: The length of the @p data string -+ * -+ * The function takes ownership of @p data and frees it later when new data -+ * is inserted. -+ * -+ * If the debugfs entry corresponding to the @p kctx doesn't exist, -+ * an attempt will be made to create it. -+ * -+ * Return: 0 if @p data inserted correctly, -EAGAIN in case of error -+ * -+ * @post @ref mem_profile_initialized will be set to @c true -+ * the first time this function succeeds. -+ */ -+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, -+ size_t size); ++ if (!reg) { ++ reg = kbase_region_tracker_find_region_enclosing_address( ++ kctx, gpu_addr); ++ } + -+#endif /*_KBASE_MEM_PROFILE_DEBUGFS_H*/ ++ if (kbase_is_region_invalid_or_free(reg)) ++ goto out_unlock; + -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mipe_gen_header.h b/drivers/gpu/arm/bifrost/mali_kbase_mipe_gen_header.h -new file mode 100644 -index 000000000..951079d11 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_mipe_gen_header.h -@@ -0,0 +1,229 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ kern_mapping = reg->cpu_alloc->permanent_map; ++ if (kern_mapping == NULL) ++ goto out_unlock; + -+/* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. -+ * DO NOT EDIT. -+ */ ++ mapping_offset = gpu_addr - (reg->start_pfn << PAGE_SHIFT); + -+/* clang-format off */ ++ /* Refcount the allocations to prevent them disappearing */ ++ WARN_ON(reg->cpu_alloc != kern_mapping->cpu_alloc); ++ WARN_ON(reg->gpu_alloc != kern_mapping->gpu_alloc); ++ (void)kbase_mem_phy_alloc_get(kern_mapping->cpu_alloc); ++ (void)kbase_mem_phy_alloc_get(kern_mapping->gpu_alloc); + -+#include "mali_kbase_mipe_proto.h" ++ kern_mem_ptr = (void *)(uintptr_t)((uintptr_t)kern_mapping->addr + mapping_offset); ++ *out_kern_mapping = kern_mapping; ++out_unlock: ++ kbase_gpu_vm_unlock(kctx); ++ return kern_mem_ptr; ++} + -+/* -+ * This header generates MIPE tracepoint declaration BLOB at -+ * compile time. -+ * -+ * It is intentional that there is no header guard. -+ * The header could be included multiple times for -+ * different blobs compilation. -+ * -+ * Before including this header MIPE_HEADER_* parameters must be -+ * defined. See documentation below: -+ */ ++void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, ++ struct kbase_vmap_struct *kern_mapping) ++{ ++ WARN_ON(!kctx); ++ WARN_ON(!kern_mapping); + -+/* -+ * The name of the variable where the result BLOB will be stored. -+ */ -+#if !defined(MIPE_HEADER_BLOB_VAR_NAME) -+#error "MIPE_HEADER_BLOB_VAR_NAME must be defined!" -+#endif ++ WARN_ON(kctx != kern_mapping->cpu_alloc->imported.native.kctx); ++ WARN_ON(kern_mapping != kern_mapping->cpu_alloc->permanent_map); + -+/* -+ * A compiler attribute for the BLOB variable. -+ * -+ * e.g. __attribute__((section("my_section"))) -+ * -+ * Default value is no attribute. -+ */ -+#if !defined(MIPE_HEADER_BLOB_VAR_ATTRIBUTE) -+#define MIPE_HEADER_BLOB_VAR_ATTRIBUTE -+#endif ++ kbase_mem_phy_alloc_put(kern_mapping->cpu_alloc); ++ kbase_mem_phy_alloc_put(kern_mapping->gpu_alloc); + -+/* -+ * A compiler attribute for packing structures -+ * -+ * e.g. __packed -+ * -+ * Default value is __attribute__((__packed__)) -+ */ -+#if !defined(MIPE_HEADER_PACKED_ATTRIBUTE) -+#define MIPE_HEADER_PACKED_ATTRIBUTE __attribute__((__packed__)) -+#endif ++ /* kern_mapping and the gpu/cpu phy allocs backing it must not be used ++ * from now on ++ */ ++} + -+/* -+ * MIPE stream id. -+ * -+ * See enum tl_stream_id. -+ */ -+#if !defined(MIPE_HEADER_STREAM_ID) -+#error "MIPE_HEADER_STREAM_ID must be defined!" -+#endif ++struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, ++ u64 extension, u64 *flags, u64 *gpu_va, ++ enum kbase_caller_mmu_sync_info mmu_sync_info) ++{ ++ int zone; ++ struct kbase_va_region *reg; ++ struct rb_root *rbtree; ++ struct device *dev; + -+/* -+ * MIPE packet class. -+ * -+ * See enum tl_packet_class. -+ */ -+#if !defined(MIPE_HEADER_PKT_CLASS) -+#error "MIPE_HEADER_PKT_CLASS must be defined!" -+#endif ++ KBASE_DEBUG_ASSERT(kctx); ++ KBASE_DEBUG_ASSERT(flags); ++ KBASE_DEBUG_ASSERT(gpu_va); + -+/* -+ * The list of tracepoints to process. -+ * -+ * It should be defined as follows: -+ * -+ * #define MIPE_HEADER_TRACEPOINT_LIST \ -+ * TRACEPOINT_DESC(FIRST_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ -+ * TRACEPOINT_DESC(SECOND_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ -+ * etc. -+ * -+ * Where the first argument is tracepoints name, the second -+ * argument is a short tracepoint description, the third argument -+ * types (see MIPE documentation), and the fourth argument -+ * is comma separated argument names. -+ */ -+#if !defined(MIPE_HEADER_TRACEPOINT_LIST) -+#error "MIPE_HEADER_TRACEPOINT_LIST must be defined!" ++ dev = kctx->kbdev->dev; ++ dev_dbg(dev, ++ "Allocating %lld va_pages, %lld commit_pages, %lld extension, 0x%llX flags\n", ++ va_pages, commit_pages, extension, *flags); ++ ++#if MALI_USE_CSF ++ if (!(*flags & BASE_MEM_FIXED)) ++ *gpu_va = 0; /* return 0 on failure */ ++#else ++ if (!(*flags & BASE_MEM_FLAG_MAP_FIXED)) ++ *gpu_va = 0; /* return 0 on failure */ +#endif ++ else ++ dev_dbg(dev, ++ "Keeping requested GPU VA of 0x%llx\n", ++ (unsigned long long)*gpu_va); + -+/* -+ * The number of entries in MIPE_HEADER_TRACEPOINT_LIST. -+ */ -+#if !defined(MIPE_HEADER_TRACEPOINT_LIST_SIZE) -+#error "MIPE_HEADER_TRACEPOINT_LIST_SIZE must be defined!" ++ if (!kbase_check_alloc_flags(*flags)) { ++ dev_warn(dev, ++ "%s called with bad flags (%llx)", ++ __func__, ++ (unsigned long long)*flags); ++ goto bad_flags; ++ } ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ if (unlikely(kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE))) { ++ /* Mask coherency flags if infinite cache is enabled to prevent ++ * the skipping of syncs from BASE side. ++ */ ++ *flags &= ~(BASE_MEM_COHERENT_SYSTEM_REQUIRED | ++ BASE_MEM_COHERENT_SYSTEM); ++ } +#endif + -+/* -+ * The list of enums to process. -+ * -+ * It should be defined as follows: -+ * -+ * #define MIPE_HEADER_ENUM_LIST \ -+ * ENUM_DESC(enum_arg_name, enum_value) \ -+ * ENUM_DESC(enum_arg_name, enum_value) \ -+ * etc. -+ * -+ * Where enum_arg_name is the name of a tracepoint argument being used with -+ * this enum. enum_value is a valid C enum value. -+ * -+ * Default value is an empty list. -+ */ -+#if defined(MIPE_HEADER_ENUM_LIST) ++ if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 && ++ (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) { ++ /* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */ ++ *flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED; ++ } ++ if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && ++ !kbase_device_is_cpu_coherent(kctx->kbdev)) { ++ dev_warn(dev, "%s call required coherent mem when unavailable", ++ __func__); ++ goto bad_flags; ++ } ++ if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && ++ !kbase_device_is_cpu_coherent(kctx->kbdev)) { ++ /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ ++ *flags &= ~BASE_MEM_COHERENT_SYSTEM; ++ } + -+/* -+ * Tracepoint message ID used for enums declaration. -+ */ -+#if !defined(MIPE_HEADER_ENUM_MSG_ID) -+#error "MIPE_HEADER_ENUM_MSG_ID must be defined!" ++ if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages, ++ extension)) ++ goto bad_sizes; ++ ++#ifdef CONFIG_MALI_MEMORY_FULLY_BACKED ++ /* Ensure that memory is fully physically-backed. */ ++ if (*flags & BASE_MEM_GROW_ON_GPF) ++ commit_pages = va_pages; +#endif + -+#else -+#define MIPE_HEADER_ENUM_LIST ++ /* find out which VA zone to use */ ++ if (*flags & BASE_MEM_SAME_VA) { ++ rbtree = &kctx->reg_rbtree_same; ++ zone = KBASE_REG_ZONE_SAME_VA; ++ } ++#if MALI_USE_CSF ++ /* fixed va_zone always exists */ ++ else if (*flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE)) { ++ if (*flags & BASE_MEM_PROT_GPU_EX) { ++ rbtree = &kctx->reg_rbtree_exec_fixed; ++ zone = KBASE_REG_ZONE_EXEC_FIXED_VA; ++ } else { ++ rbtree = &kctx->reg_rbtree_fixed; ++ zone = KBASE_REG_ZONE_FIXED_VA; ++ } ++ } +#endif ++ else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) { ++ rbtree = &kctx->reg_rbtree_exec; ++ zone = KBASE_REG_ZONE_EXEC_VA; ++ } else { ++ rbtree = &kctx->reg_rbtree_custom; ++ zone = KBASE_REG_ZONE_CUSTOM_VA; ++ } + -+/* -+ * The MIPE tracepoint declaration BLOB. -+ */ -+const struct -+{ -+ u32 _mipe_w0; -+ u32 _mipe_w1; -+ u8 _protocol_version; -+ u8 _pointer_size; -+ u32 _tp_count; -+#define TRACEPOINT_DESC(name, desc, arg_types, arg_names) \ -+ struct { \ -+ u32 _name; \ -+ u32 _size_string_name; \ -+ char _string_name[sizeof(#name)]; \ -+ u32 _size_desc; \ -+ char _desc[sizeof(desc)]; \ -+ u32 _size_arg_types; \ -+ char _arg_types[sizeof(arg_types)]; \ -+ u32 _size_arg_names; \ -+ char _arg_names[sizeof(arg_names)]; \ -+ } MIPE_HEADER_PACKED_ATTRIBUTE __ ## name; ++ reg = kbase_alloc_free_region(kctx->kbdev, rbtree, PFN_DOWN(*gpu_va), va_pages, zone); + -+#define ENUM_DESC(arg_name, value) \ -+ struct { \ -+ u32 _msg_id; \ -+ u32 _arg_name_len; \ -+ char _arg_name[sizeof(#arg_name)]; \ -+ u32 _value; \ -+ u32 _value_str_len; \ -+ char _value_str[sizeof(#value)]; \ -+ } MIPE_HEADER_PACKED_ATTRIBUTE __ ## arg_name ## _ ## value; ++ if (!reg) { ++ dev_err(dev, "Failed to allocate free region"); ++ goto no_region; ++ } + -+ MIPE_HEADER_TRACEPOINT_LIST -+ MIPE_HEADER_ENUM_LIST -+#undef TRACEPOINT_DESC -+#undef ENUM_DESC -+} MIPE_HEADER_PACKED_ATTRIBUTE MIPE_HEADER_BLOB_VAR_NAME MIPE_HEADER_BLOB_VAR_ATTRIBUTE = { -+ ._mipe_w0 = MIPE_PACKET_HEADER_W0( -+ TL_PACKET_FAMILY_TL, -+ MIPE_HEADER_PKT_CLASS, -+ TL_PACKET_TYPE_HEADER, -+ MIPE_HEADER_STREAM_ID), -+ ._mipe_w1 = MIPE_PACKET_HEADER_W1( -+ sizeof(MIPE_HEADER_BLOB_VAR_NAME) - PACKET_HEADER_SIZE, -+ 0), -+ ._protocol_version = SWTRACE_VERSION, -+ ._pointer_size = sizeof(void *), -+ ._tp_count = MIPE_HEADER_TRACEPOINT_LIST_SIZE, -+#define TRACEPOINT_DESC(name, desc, arg_types, arg_names) \ -+ .__ ## name = { \ -+ ._name = name, \ -+ ._size_string_name = sizeof(#name), \ -+ ._string_name = #name, \ -+ ._size_desc = sizeof(desc), \ -+ ._desc = desc, \ -+ ._size_arg_types = sizeof(arg_types), \ -+ ._arg_types = arg_types, \ -+ ._size_arg_names = sizeof(arg_names), \ -+ ._arg_names = arg_names \ -+ }, -+#define ENUM_DESC(arg_name, value) \ -+ .__ ## arg_name ## _ ## value = { \ -+ ._msg_id = MIPE_HEADER_ENUM_MSG_ID, \ -+ ._arg_name_len = sizeof(#arg_name), \ -+ ._arg_name = #arg_name, \ -+ ._value = value, \ -+ ._value_str_len = sizeof(#value), \ -+ ._value_str = #value \ -+ }, ++ if (kbase_update_region_flags(kctx, reg, *flags) != 0) ++ goto invalid_flags; + -+ MIPE_HEADER_TRACEPOINT_LIST -+ MIPE_HEADER_ENUM_LIST -+#undef TRACEPOINT_DESC -+#undef ENUM_DESC -+}; ++ if (kbase_reg_prepare_native(reg, kctx, ++ kbase_mem_group_id_get(*flags)) != 0) { ++ dev_err(dev, "Failed to prepare region"); ++ goto prepare_failed; ++ } + -+#undef MIPE_HEADER_BLOB_VAR_NAME -+#undef MIPE_HEADER_BLOB_VAR_ATTRIBUTE -+#undef MIPE_HEADER_STREAM_ID -+#undef MIPE_HEADER_PKT_CLASS -+#undef MIPE_HEADER_TRACEPOINT_LIST -+#undef MIPE_HEADER_TRACEPOINT_LIST_SIZE -+#undef MIPE_HEADER_ENUM_LIST -+#undef MIPE_HEADER_ENUM_MSG_ID ++ if (unlikely(reg->cpu_alloc != reg->gpu_alloc)) ++ *flags |= BASE_MEM_KERNEL_SYNC; + -+/* clang-format on */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mipe_proto.h b/drivers/gpu/arm/bifrost/mali_kbase_mipe_proto.h -new file mode 100644 -index 000000000..c35ee61ce ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_mipe_proto.h -@@ -0,0 +1,126 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ /* make sure base knows if the memory is actually cached or not */ ++ if (reg->flags & KBASE_REG_CPU_CACHED) ++ *flags |= BASE_MEM_CACHED_CPU; ++ else ++ *flags &= ~BASE_MEM_CACHED_CPU; + -+/* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. -+ * DO NOT EDIT. -+ */ ++ if (*flags & BASE_MEM_GROW_ON_GPF) { ++ unsigned int const ir_threshold = atomic_read( ++ &kctx->kbdev->memdev.ir_threshold); + -+/* clang-format off */ ++ reg->threshold_pages = ((va_pages * ir_threshold) + ++ (IR_THRESHOLD_STEPS / 2)) / IR_THRESHOLD_STEPS; ++ } else ++ reg->threshold_pages = 0; + -+#if !defined(_KBASE_MIPE_PROTO_H) -+#define _KBASE_MIPE_PROTO_H ++ if (*flags & BASE_MEM_GROW_ON_GPF) { ++ /* kbase_check_alloc_sizes() already checks extension is valid for ++ * assigning to reg->extension ++ */ ++ reg->extension = extension; ++#if !MALI_USE_CSF ++ } else if (*flags & BASE_MEM_TILER_ALIGN_TOP) { ++ reg->extension = extension; ++#endif /* !MALI_USE_CSF */ ++ } else { ++ reg->extension = 0; ++ } + -+#define _BITFIELD_MASK_FIELD(pos, len) \ -+ (((1u << len) - 1) << pos) ++ if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) { ++ dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)", ++ (unsigned long long)commit_pages, ++ (unsigned long long)va_pages); ++ goto no_mem; ++ } ++ reg->initial_commit = commit_pages; + -+#define _BITFIELD_SET_FIELD(pos, len, value) \ -+ (_BITFIELD_MASK_FIELD(pos, len) & (((u32) value) << pos)) ++ kbase_gpu_vm_lock(kctx); + -+#define BITFIELD_SET(field_name, value) \ -+ _BITFIELD_SET_FIELD(field_name ## _POS, field_name ## _LEN, value) ++ if (reg->flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) { ++ /* Permanent kernel mappings must happen as soon as ++ * reg->cpu_alloc->pages is ready. Currently this happens after ++ * kbase_alloc_phy_pages(). If we move that to setup pages ++ * earlier, also move this call too ++ */ ++ int err = kbase_phy_alloc_mapping_init(kctx, reg, va_pages, ++ commit_pages); ++ if (err < 0) { ++ kbase_gpu_vm_unlock(kctx); ++ goto no_kern_mapping; ++ } ++ } + -+/* The version of swtrace protocol used in timeline stream. */ -+#define SWTRACE_VERSION 3 ++ /* mmap needed to setup VA? */ ++ if (*flags & BASE_MEM_SAME_VA) { ++ unsigned long cookie, cookie_nr; + -+/* Packet header - first word. -+ * These values must be defined according to MIPE documentation. -+ */ -+#define PACKET_STREAMID_POS 0 -+#define PACKET_STREAMID_LEN 8 -+#define PACKET_RSVD1_POS (PACKET_STREAMID_POS + PACKET_STREAMID_LEN) -+#define PACKET_RSVD1_LEN 8 -+#define PACKET_TYPE_POS (PACKET_RSVD1_POS + PACKET_RSVD1_LEN) -+#define PACKET_TYPE_LEN 3 -+#define PACKET_CLASS_POS (PACKET_TYPE_POS + PACKET_TYPE_LEN) -+#define PACKET_CLASS_LEN 7 -+#define PACKET_FAMILY_POS (PACKET_CLASS_POS + PACKET_CLASS_LEN) -+#define PACKET_FAMILY_LEN 6 ++ /* Bind to a cookie */ ++ if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) { ++ dev_err(dev, "No cookies available for allocation!"); ++ kbase_gpu_vm_unlock(kctx); ++ goto no_cookie; ++ } ++ /* return a cookie */ ++ cookie_nr = find_first_bit(kctx->cookies, BITS_PER_LONG); ++ bitmap_clear(kctx->cookies, cookie_nr, 1); ++ BUG_ON(kctx->pending_regions[cookie_nr]); ++ kctx->pending_regions[cookie_nr] = reg; + -+/* Packet header - second word -+ * These values must be defined according to MIPE documentation. -+ */ -+#define PACKET_LENGTH_POS 0 -+#define PACKET_LENGTH_LEN 24 -+#define PACKET_SEQBIT_POS (PACKET_LENGTH_POS + PACKET_LENGTH_LEN) -+#define PACKET_SEQBIT_LEN 1 -+#define PACKET_RSVD2_POS (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN) -+#define PACKET_RSVD2_LEN 7 ++ /* relocate to correct base */ ++ cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE); ++ cookie <<= PAGE_SHIFT; + -+/* First word of a MIPE packet */ -+#define MIPE_PACKET_HEADER_W0(pkt_family, pkt_class, pkt_type, stream_id) \ -+ (0 \ -+ | BITFIELD_SET(PACKET_FAMILY, pkt_family) \ -+ | BITFIELD_SET(PACKET_CLASS, pkt_class) \ -+ | BITFIELD_SET(PACKET_TYPE, pkt_type) \ -+ | BITFIELD_SET(PACKET_STREAMID, stream_id)) ++ *gpu_va = (u64) cookie; ++ } else /* we control the VA */ { ++ size_t align = 1; + -+/* Second word of a MIPE packet */ -+#define MIPE_PACKET_HEADER_W1(packet_length, seqbit) \ -+ (0 \ -+ | BITFIELD_SET(PACKET_LENGTH, packet_length) \ -+ | BITFIELD_SET(PACKET_SEQBIT, seqbit)) ++ if (kctx->kbdev->pagesize_2mb) { ++ /* If there's enough (> 33 bits) of GPU VA space, align to 2MB ++ * boundaries. The similar condition is used for mapping from ++ * the SAME_VA zone inside kbase_context_get_unmapped_area(). ++ */ ++ if (kctx->kbdev->gpu_props.mmu.va_bits > 33) { ++ if (va_pages >= (SZ_2M / SZ_4K)) ++ align = (SZ_2M / SZ_4K); ++ } ++ if (*gpu_va) ++ align = 1; ++#if !MALI_USE_CSF ++ if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) ++ align = 1; ++#endif /* !MALI_USE_CSF */ ++ } ++ if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, align, ++ mmu_sync_info) != 0) { ++ dev_warn(dev, "Failed to map memory on GPU"); ++ kbase_gpu_vm_unlock(kctx); ++ goto no_mmap; ++ } ++ /* return real GPU VA */ ++ *gpu_va = reg->start_pfn << PAGE_SHIFT; ++ } + -+/* The number of bytes reserved for packet header. -+ * These value must be defined according to MIPE documentation. -+ */ -+#define PACKET_HEADER_SIZE 8 /* bytes */ ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (*flags & BASEP_MEM_PERFORM_JIT_TRIM) { ++ kbase_jit_done_phys_increase(kctx, commit_pages); + -+/* The number of bytes reserved for packet sequence number. -+ * These value must be defined according to MIPE documentation. -+ */ -+#define PACKET_NUMBER_SIZE 4 /* bytes */ ++ mutex_lock(&kctx->jit_evict_lock); ++ WARN_ON(!list_empty(®->jit_node)); ++ list_add(®->jit_node, &kctx->jit_active_head); ++ mutex_unlock(&kctx->jit_evict_lock); ++ } ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+/* Timeline packet family ids. -+ * Values are significant! Check MIPE documentation. -+ */ -+enum tl_packet_family { -+ TL_PACKET_FAMILY_CTRL = 0, /* control packets */ -+ TL_PACKET_FAMILY_TL = 1, /* timeline packets */ -+ TL_PACKET_FAMILY_COUNT -+}; ++ kbase_gpu_vm_unlock(kctx); + -+/* Packet classes used in timeline streams. -+ * Values are significant! Check MIPE documentation. -+ */ -+enum tl_packet_class { -+ TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */ -+ TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */ -+}; ++#if MALI_USE_CSF ++ if (*flags & BASE_MEM_FIXABLE) ++ atomic64_inc(&kctx->num_fixable_allocs); ++ else if (*flags & BASE_MEM_FIXED) ++ atomic64_inc(&kctx->num_fixed_allocs); ++#endif + -+/* Packet types used in timeline streams. -+ * Values are significant! Check MIPE documentation. -+ */ -+enum tl_packet_type { -+ TL_PACKET_TYPE_HEADER = 0, /* stream's header/directory */ -+ TL_PACKET_TYPE_BODY = 1, /* stream's body */ -+ TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */ -+}; ++ return reg; + -+/* Stream ID types (timeline family). */ -+enum tl_stream_id { -+ TL_STREAM_ID_USER = 0, /* User-space driver Timeline stream. */ -+ TL_STREAM_ID_KERNEL = 1, /* Kernel-space driver Timeline stream. */ -+ TL_STREAM_ID_CSFFW = 2, /* CSF firmware driver Timeline stream. */ -+}; ++no_mmap: ++no_cookie: ++no_kern_mapping: ++no_mem: ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (*flags & BASEP_MEM_PERFORM_JIT_TRIM) { ++ kbase_gpu_vm_lock(kctx); ++ kbase_jit_done_phys_increase(kctx, commit_pages); ++ kbase_gpu_vm_unlock(kctx); ++ } ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ kbase_mem_phy_alloc_put(reg->cpu_alloc); ++ kbase_mem_phy_alloc_put(reg->gpu_alloc); ++invalid_flags: ++prepare_failed: ++ kfree(reg); ++no_region: ++bad_sizes: ++bad_flags: ++ return NULL; ++} ++KBASE_EXPORT_TEST_API(kbase_mem_alloc); + -+#endif /* _KBASE_MIPE_PROTO_H */ ++int kbase_mem_query(struct kbase_context *kctx, ++ u64 gpu_addr, u64 query, u64 * const out) ++{ ++ struct kbase_va_region *reg; ++ int ret = -EINVAL; + -+/* clang-format on */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c -new file mode 100644 -index 000000000..10a7f506b ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c -@@ -0,0 +1,177 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ KBASE_DEBUG_ASSERT(kctx); ++ KBASE_DEBUG_ASSERT(out); + -+#include -+#include -+#include ++ if (gpu_addr & ~PAGE_MASK) { ++ dev_warn(kctx->kbdev->dev, "mem_query: gpu_addr: passed parameter is invalid"); ++ return -EINVAL; ++ } + -+#include -+#include ++ kbase_gpu_vm_lock(kctx); + -+/** -+ * kbase_native_mgm_alloc - Native physical memory allocation method -+ * -+ * @mgm_dev: The memory group manager the request is being made through. -+ * @group_id: A physical memory group ID, which must be valid but is not used. -+ * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. -+ * @gfp_mask: Bitmask of Get Free Page flags affecting allocator behavior. -+ * @order: Page order for physical page size (order=0 means 4 KiB, -+ * order=9 means 2 MiB). -+ * -+ * Delegates all memory allocation requests to the kernel's alloc_pages -+ * function. -+ * -+ * Return: Pointer to allocated page, or NULL if allocation failed. -+ */ -+static struct page *kbase_native_mgm_alloc( -+ struct memory_group_manager_device *mgm_dev, int group_id, -+ gfp_t gfp_mask, unsigned int order) -+{ -+ /* -+ * Check that the base and the mgm defines, from separate header files, -+ * for the max number of memory groups are compatible. -+ */ -+ BUILD_BUG_ON(BASE_MEM_GROUP_COUNT != MEMORY_GROUP_MANAGER_NR_GROUPS); -+ /* -+ * Check that the mask used for storing the memory group ID is big -+ * enough for the largest possible memory group ID. -+ */ -+ BUILD_BUG_ON((BASEP_CONTEXT_MMU_GROUP_ID_MASK -+ >> BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) -+ < (BASE_MEM_GROUP_COUNT - 1)); ++ /* Validate the region */ ++ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); ++ if (kbase_is_region_invalid_or_free(reg)) ++ goto out_unlock; + -+ CSTD_UNUSED(mgm_dev); -+ CSTD_UNUSED(group_id); ++ switch (query) { ++ case KBASE_MEM_QUERY_COMMIT_SIZE: ++ if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) { ++ *out = kbase_reg_current_backed_size(reg); ++ } else { ++ size_t i; ++ struct kbase_aliased *aliased; ++ *out = 0; ++ aliased = reg->cpu_alloc->imported.alias.aliased; ++ for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++) ++ *out += aliased[i].length; ++ } ++ break; ++ case KBASE_MEM_QUERY_VA_SIZE: ++ *out = reg->nr_pages; ++ break; ++ case KBASE_MEM_QUERY_FLAGS: ++ { ++ *out = 0; ++ if (KBASE_REG_CPU_WR & reg->flags) ++ *out |= BASE_MEM_PROT_CPU_WR; ++ if (KBASE_REG_CPU_RD & reg->flags) ++ *out |= BASE_MEM_PROT_CPU_RD; ++ if (KBASE_REG_CPU_CACHED & reg->flags) ++ *out |= BASE_MEM_CACHED_CPU; ++ if (KBASE_REG_GPU_WR & reg->flags) ++ *out |= BASE_MEM_PROT_GPU_WR; ++ if (KBASE_REG_GPU_RD & reg->flags) ++ *out |= BASE_MEM_PROT_GPU_RD; ++ if (!(KBASE_REG_GPU_NX & reg->flags)) ++ *out |= BASE_MEM_PROT_GPU_EX; ++ if (KBASE_REG_SHARE_BOTH & reg->flags) ++ *out |= BASE_MEM_COHERENT_SYSTEM; ++ if (KBASE_REG_SHARE_IN & reg->flags) ++ *out |= BASE_MEM_COHERENT_LOCAL; ++ if (mali_kbase_supports_mem_grow_on_gpf(kctx->api_version)) { ++ /* Prior to this version, this was known about by ++ * user-side but we did not return them. Returning ++ * it caused certain clients that were not expecting ++ * it to fail, so we omit it as a special-case for ++ * compatibility reasons ++ */ ++ if (KBASE_REG_PF_GROW & reg->flags) ++ *out |= BASE_MEM_GROW_ON_GPF; ++ } ++ if (mali_kbase_supports_mem_protected(kctx->api_version)) { ++ /* Prior to this version, this was known about by ++ * user-side but we did not return them. Returning ++ * it caused certain clients that were not expecting ++ * it to fail, so we omit it as a special-case for ++ * compatibility reasons ++ */ ++ if (KBASE_REG_PROTECTED & reg->flags) ++ *out |= BASE_MEM_PROTECTED; ++ } ++#if !MALI_USE_CSF ++ if (KBASE_REG_TILER_ALIGN_TOP & reg->flags) ++ *out |= BASE_MEM_TILER_ALIGN_TOP; ++#endif /* !MALI_USE_CSF */ ++ if (!(KBASE_REG_GPU_CACHED & reg->flags)) ++ *out |= BASE_MEM_UNCACHED_GPU; ++#if MALI_USE_CSF ++ if (KBASE_REG_CSF_EVENT & reg->flags) ++ *out |= BASE_MEM_CSF_EVENT; ++ if (((KBASE_REG_ZONE_MASK & reg->flags) == KBASE_REG_ZONE_FIXED_VA) || ++ ((KBASE_REG_ZONE_MASK & reg->flags) == KBASE_REG_ZONE_EXEC_FIXED_VA)) { ++ if (KBASE_REG_FIXED_ADDRESS & reg->flags) ++ *out |= BASE_MEM_FIXED; ++ else ++ *out |= BASE_MEM_FIXABLE; ++ } ++#endif ++ if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags) ++ *out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE; + -+ return alloc_pages(gfp_mask, order); -+} ++ *out |= kbase_mem_group_id_set(reg->cpu_alloc->group_id); + -+/** -+ * kbase_native_mgm_free - Native physical memory freeing method -+ * -+ * @mgm_dev: The memory group manager the request is being made through. -+ * @group_id: A physical memory group ID, which must be valid but is not used. -+ * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. -+ * @page: Address of the struct associated with a page of physical -+ * memory that was allocated by calling kbase_native_mgm_alloc -+ * with the same argument values. -+ * @order: Page order for physical page size (order=0 means 4 KiB, -+ * order=9 means 2 MiB). -+ * -+ * Delegates all memory freeing requests to the kernel's __free_pages function. -+ */ -+static void kbase_native_mgm_free(struct memory_group_manager_device *mgm_dev, -+ int group_id, struct page *page, unsigned int order) -+{ -+ CSTD_UNUSED(mgm_dev); -+ CSTD_UNUSED(group_id); ++ WARN(*out & ~BASE_MEM_FLAGS_QUERYABLE, ++ "BASE_MEM_FLAGS_QUERYABLE needs updating\n"); ++ *out &= BASE_MEM_FLAGS_QUERYABLE; ++ break; ++ } ++ default: ++ *out = 0; ++ goto out_unlock; ++ } + -+ __free_pages(page, order); ++ ret = 0; ++ ++out_unlock: ++ kbase_gpu_vm_unlock(kctx); ++ return ret; +} + +/** -+ * kbase_native_mgm_vmf_insert_pfn_prot - Native method to map a page on the CPU -+ * -+ * @mgm_dev: The memory group manager the request is being made through. -+ * @group_id: A physical memory group ID, which must be valid but is not used. -+ * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. -+ * @vma: The virtual memory area to insert the page into. -+ * @addr: An address contained in @vma to assign to the inserted page. -+ * @pfn: The kernel Page Frame Number to insert at @addr in @vma. -+ * @pgprot: Protection flags for the inserted page. -+ * -+ * Called from a CPU virtual memory page fault handler. Delegates all memory -+ * mapping requests to the kernel's vmf_insert_pfn_prot function. ++ * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the ++ * Ephemeral memory eviction list. ++ * @s: Shrinker ++ * @sc: Shrinker control + * -+ * Return: Type of fault that occurred or VM_FAULT_NOPAGE if the page table -+ * entry was successfully installed. ++ * Return: Number of pages which can be freed or SHRINK_EMPTY if no page remains. + */ -+static vm_fault_t kbase_native_mgm_vmf_insert_pfn_prot( -+ struct memory_group_manager_device *mgm_dev, int group_id, -+ struct vm_area_struct *vma, unsigned long addr, -+ unsigned long pfn, pgprot_t pgprot) ++static ++unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, ++ struct shrink_control *sc) +{ -+ CSTD_UNUSED(mgm_dev); -+ CSTD_UNUSED(group_id); ++ struct kbase_context *kctx = container_of(s, struct kbase_context, reclaim); ++ int evict_nents = atomic_read(&kctx->evict_nents); ++ unsigned long nr_freeable_items; + -+ return vmf_insert_pfn_prot(vma, addr, pfn, pgprot); -+} ++ WARN(in_atomic(), ++ "Shrinker called in atomic context. The caller must use GFP_ATOMIC or similar, then Shrinkers must not be called. gfp_mask==%x\n", ++ sc->gfp_mask); + -+/** -+ * kbase_native_mgm_update_gpu_pte - Native method to modify a GPU page table -+ * entry -+ * -+ * @mgm_dev: The memory group manager the request is being made through. -+ * @group_id: A physical memory group ID, which must be valid but is not used. -+ * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. -+ * @mmu_level: The level of the MMU page table where the page is getting mapped. -+ * @pte: The prepared page table entry. -+ * -+ * This function simply returns the @pte without modification. -+ * -+ * Return: A GPU page table entry to be stored in a page table. -+ */ -+static u64 -+kbase_native_mgm_update_gpu_pte(struct memory_group_manager_device *mgm_dev, -+ int group_id, int mmu_level, u64 pte) -+{ -+ CSTD_UNUSED(mgm_dev); -+ CSTD_UNUSED(group_id); -+ CSTD_UNUSED(mmu_level); ++ if (unlikely(evict_nents < 0)) { ++ dev_err(kctx->kbdev->dev, "invalid evict_nents(%d)", evict_nents); ++ nr_freeable_items = 0; ++ } else { ++ nr_freeable_items = evict_nents; ++ } + -+ return pte; ++#if KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE ++ if (nr_freeable_items == 0) ++ nr_freeable_items = SHRINK_EMPTY; ++#endif ++ ++ return nr_freeable_items; +} + +/** -+ * kbase_native_mgm_pte_to_original_pte - Native method to undo changes done in -+ * kbase_native_mgm_update_gpu_pte() -+ * -+ * @mgm_dev: The memory group manager the request is being made through. -+ * @group_id: A physical memory group ID, which must be valid but is not used. -+ * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. -+ * @mmu_level: The level of the MMU page table where the page is getting mapped. -+ * @pte: The prepared page table entry. ++ * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction ++ * list for pages and try to reclaim them. ++ * @s: Shrinker ++ * @sc: Shrinker control + * -+ * This function simply returns the @pte without modification. ++ * Return: Number of pages freed (can be less then requested) or ++ * SHRINK_STOP if reclaim isn't possible. + * -+ * Return: A GPU page table entry to be stored in a page table. ++ * Note: ++ * This function accesses region structures without taking the region lock, ++ * this is required as the OOM killer can call the shrinker after the region ++ * lock has already been held. ++ * This is safe as we can guarantee that a region on the eviction list will ++ * not be freed (kbase_mem_free_region removes the allocation from the list ++ * before destroying it), or modified by other parts of the driver. ++ * The eviction list itself is guarded by the eviction lock and the MMU updates ++ * are protected by their own lock. + */ -+static u64 kbase_native_mgm_pte_to_original_pte(struct memory_group_manager_device *mgm_dev, -+ int group_id, int mmu_level, u64 pte) ++static ++unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, ++ struct shrink_control *sc) +{ -+ CSTD_UNUSED(mgm_dev); -+ CSTD_UNUSED(group_id); -+ CSTD_UNUSED(mmu_level); ++ struct kbase_context *kctx; ++ struct kbase_mem_phy_alloc *alloc; ++ struct kbase_mem_phy_alloc *tmp; ++ unsigned long freed = 0; + -+ return pte; -+} ++ kctx = container_of(s, struct kbase_context, reclaim); + -+struct memory_group_manager_device kbase_native_mgm_dev = { -+ .ops = { -+ .mgm_alloc_page = kbase_native_mgm_alloc, -+ .mgm_free_page = kbase_native_mgm_free, -+ .mgm_get_import_memory_id = NULL, -+ .mgm_vmf_insert_pfn_prot = kbase_native_mgm_vmf_insert_pfn_prot, -+ .mgm_update_gpu_pte = kbase_native_mgm_update_gpu_pte, -+ .mgm_pte_to_original_pte = kbase_native_mgm_pte_to_original_pte, -+ }, -+ .data = NULL -+}; -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.h b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.h -new file mode 100644 -index 000000000..1eae2fcf9 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.h -@@ -0,0 +1,38 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ mutex_lock(&kctx->jit_evict_lock); + -+#ifndef _KBASE_NATIVE_MGM_H_ -+#define _KBASE_NATIVE_MGM_H_ ++ list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) { ++ int err; + -+#include ++ if (!alloc->reg) ++ continue; + -+/* -+ * kbase_native_mgm_dev - Native memory group manager device -+ * -+ * An implementation of the memory group manager interface that is intended for -+ * internal use when no platform-specific memory group manager is available. -+ * -+ * It ignores the specified group ID and delegates to the kernel's physical -+ * memory allocation and freeing functions. -+ */ -+extern struct memory_group_manager_device kbase_native_mgm_dev; ++ err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg, ++ 0, alloc->nents); + -+#endif /* _KBASE_NATIVE_MGM_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pbha.c b/drivers/gpu/arm/bifrost/mali_kbase_pbha.c -new file mode 100644 -index 000000000..b65f9e7b5 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_pbha.c -@@ -0,0 +1,306 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ /* Failed to remove GPU mapping, proceed to next one. */ ++ if (err != 0) ++ continue; + -+#include "mali_kbase_pbha.h" ++ /* ++ * Update alloc->evicted before freeing the backing so the ++ * helper can determine that it needs to bypass the accounting ++ * and memory pool. ++ */ ++ alloc->evicted = alloc->nents; + -+#include -+#include -+#define DTB_SET_SIZE 2 ++ kbase_free_phy_pages_helper(alloc, alloc->evicted); ++ freed += alloc->evicted; ++ WARN_ON(atomic_sub_return(alloc->evicted, &kctx->evict_nents) < 0); ++ list_del_init(&alloc->evict_node); + -+static bool read_setting_valid(unsigned int id, unsigned int read_setting) -+{ -+ switch (id) { -+ /* Valid ID - fall through all */ -+ case SYSC_ALLOC_ID_R_OTHER: -+ case SYSC_ALLOC_ID_R_CSF: -+ case SYSC_ALLOC_ID_R_MMU: -+ case SYSC_ALLOC_ID_R_TILER_VERT: -+ case SYSC_ALLOC_ID_R_TILER_PTR: -+ case SYSC_ALLOC_ID_R_TILER_INDEX: -+ case SYSC_ALLOC_ID_R_TILER_OTHER: -+ case SYSC_ALLOC_ID_R_IC: -+ case SYSC_ALLOC_ID_R_ATTR: -+ case SYSC_ALLOC_ID_R_SCM: -+ case SYSC_ALLOC_ID_R_FSDC: -+ case SYSC_ALLOC_ID_R_VL: -+ case SYSC_ALLOC_ID_R_PLR: -+ case SYSC_ALLOC_ID_R_TEX: -+ case SYSC_ALLOC_ID_R_LSC: -+ switch (read_setting) { -+ /* Valid setting value - fall through all */ -+ case SYSC_ALLOC_L2_ALLOC: -+ case SYSC_ALLOC_NEVER_ALLOC: -+ case SYSC_ALLOC_ALWAYS_ALLOC: -+ case SYSC_ALLOC_PTL_ALLOC: -+ case SYSC_ALLOC_L2_PTL_ALLOC: -+ return true; -+ default: -+ return false; -+ } -+ default: -+ return false; ++ /* ++ * Inform the JIT allocator this region has lost backing ++ * as it might need to free the allocation. ++ */ ++ kbase_jit_backing_lost(alloc->reg); ++ ++ /* Enough pages have been freed so stop now */ ++ if (freed > sc->nr_to_scan) ++ break; + } + -+ /* Unreachable */ -+ return false; ++ mutex_unlock(&kctx->jit_evict_lock); ++ ++ return freed; +} + -+static bool write_setting_valid(unsigned int id, unsigned int write_setting) ++int kbase_mem_evictable_init(struct kbase_context *kctx) +{ -+ switch (id) { -+ /* Valid ID - fall through all */ -+ case SYSC_ALLOC_ID_W_OTHER: -+ case SYSC_ALLOC_ID_W_CSF: -+ case SYSC_ALLOC_ID_W_PCB: -+ case SYSC_ALLOC_ID_W_TILER_PTR: -+ case SYSC_ALLOC_ID_W_TILER_VERT_PLIST: -+ case SYSC_ALLOC_ID_W_TILER_OTHER: -+ case SYSC_ALLOC_ID_W_L2_EVICT: -+ case SYSC_ALLOC_ID_W_L2_FLUSH: -+ case SYSC_ALLOC_ID_W_TIB_COLOR: -+ case SYSC_ALLOC_ID_W_TIB_COLOR_AFBCH: -+ case SYSC_ALLOC_ID_W_TIB_COLOR_AFBCB: -+ case SYSC_ALLOC_ID_W_TIB_CRC: -+ case SYSC_ALLOC_ID_W_TIB_DS: -+ case SYSC_ALLOC_ID_W_TIB_DS_AFBCH: -+ case SYSC_ALLOC_ID_W_TIB_DS_AFBCB: -+ case SYSC_ALLOC_ID_W_LSC: -+ switch (write_setting) { -+ /* Valid setting value - fall through all */ -+ case SYSC_ALLOC_L2_ALLOC: -+ case SYSC_ALLOC_NEVER_ALLOC: -+ case SYSC_ALLOC_ALWAYS_ALLOC: -+ case SYSC_ALLOC_PTL_ALLOC: -+ case SYSC_ALLOC_L2_PTL_ALLOC: -+ return true; -+ default: -+ return false; -+ } -+ default: -+ return false; -+ } ++ INIT_LIST_HEAD(&kctx->evict_list); ++ mutex_init(&kctx->jit_evict_lock); + -+ /* Unreachable */ -+ return false; ++ atomic_set(&kctx->evict_nents, 0); ++ ++ kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects; ++ kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects; ++ kctx->reclaim.seeks = DEFAULT_SEEKS; ++ /* Kernel versions prior to 3.1 : ++ * struct shrinker does not define batch ++ */ ++ kctx->reclaim.batch = 0; ++#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE ++ register_shrinker(&kctx->reclaim); ++#else ++ register_shrinker(&kctx->reclaim, "mali-mem"); ++#endif ++ return 0; +} + -+/* Private structure to be returned as setting validity status */ -+struct settings_status { -+ /* specifies whether id and either one of settings is valid */ -+ bool overall; -+ /* specifies whether read setting is valid */ -+ bool read; -+ /* specifies whether write setting is valid*/ -+ bool write; -+}; ++void kbase_mem_evictable_deinit(struct kbase_context *kctx) ++{ ++ unregister_shrinker(&kctx->reclaim); ++} + -+static struct settings_status settings_valid(unsigned int id, unsigned int read_setting, -+ unsigned int write_setting) ++/** ++ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable. ++ * @alloc: The physical allocation ++ */ ++void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) +{ -+ struct settings_status valid = { .overall = (id < SYSC_ALLOC_COUNT * sizeof(u32)) }; ++ struct kbase_context *kctx = alloc->imported.native.kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ int __maybe_unused new_page_count; + -+ if (valid.overall) { -+ valid.read = read_setting_valid(id, read_setting); -+ valid.write = write_setting_valid(id, write_setting); -+ valid.overall = valid.read || valid.write; -+ } ++ kbase_process_page_usage_dec(kctx, alloc->nents); ++ new_page_count = atomic_sub_return(alloc->nents, ++ &kctx->used_pages); ++ atomic_sub(alloc->nents, &kctx->kbdev->memdev.used_pages); + -+ return valid; ++ KBASE_TLSTREAM_AUX_PAGESALLOC( ++ kbdev, ++ kctx->id, ++ (u64)new_page_count); ++ kbase_trace_gpu_mem_usage_dec(kbdev, kctx, alloc->nents); +} + -+bool kbasep_pbha_supported(struct kbase_device *kbdev) ++/** ++ * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable. ++ * @alloc: The physical allocation ++ */ ++static ++void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) +{ -+ const u32 arch_maj_rev = -+ ARCH_MAJOR_REV_REG(kbdev->gpu_props.props.raw_props.gpu_id); ++ struct kbase_context *kctx = alloc->imported.native.kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ int __maybe_unused new_page_count; + -+ return (arch_maj_rev >= GPU_ID2_ARCH_MAJOR_REV_MAKE(11, 3)); ++ new_page_count = atomic_add_return(alloc->nents, ++ &kctx->used_pages); ++ atomic_add(alloc->nents, &kctx->kbdev->memdev.used_pages); ++ ++ /* Increase mm counters so that the allocation is accounted for ++ * against the process and thus is visible to the OOM killer, ++ */ ++ kbase_process_page_usage_inc(kctx, alloc->nents); ++ ++ KBASE_TLSTREAM_AUX_PAGESALLOC( ++ kbdev, ++ kctx->id, ++ (u64)new_page_count); ++ kbase_trace_gpu_mem_usage_inc(kbdev, kctx, alloc->nents); +} + -+int kbase_pbha_record_settings(struct kbase_device *kbdev, bool runtime, -+ unsigned int id, unsigned int read_setting, -+ unsigned int write_setting) ++int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) +{ -+ struct settings_status const valid = settings_valid(id, read_setting, write_setting); ++ struct kbase_context *kctx = gpu_alloc->imported.native.kctx; + -+ if (valid.overall) { -+ unsigned int const sysc_alloc_num = id / sizeof(u32); -+ u32 modified_reg; ++ lockdep_assert_held(&kctx->reg_lock); + -+ if (runtime) { -+ int i; ++ /* Memory is in the process of transitioning to the shrinker, and ++ * should ignore migration attempts ++ */ ++ kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg, ++ 0, gpu_alloc->nents); + -+ kbase_pm_context_active(kbdev); -+ /* Ensure host copy of SYSC_ALLOC is up to date */ -+ for (i = 0; i < SYSC_ALLOC_COUNT; i++) -+ kbdev->sysc_alloc[i] = kbase_reg_read( -+ kbdev, GPU_CONTROL_REG(SYSC_ALLOC(i))); -+ kbase_pm_context_idle(kbdev); -+ } ++ mutex_lock(&kctx->jit_evict_lock); ++ /* This allocation can't already be on a list. */ ++ WARN_ON(!list_empty(&gpu_alloc->evict_node)); + -+ modified_reg = kbdev->sysc_alloc[sysc_alloc_num]; ++ /* Add the allocation to the eviction list, after this point the shrink ++ * can reclaim it. ++ */ ++ list_add(&gpu_alloc->evict_node, &kctx->evict_list); ++ atomic_add(gpu_alloc->nents, &kctx->evict_nents); + -+ switch (id % sizeof(u32)) { -+ case 0: -+ modified_reg = valid.read ? SYSC_ALLOC_R_SYSC_ALLOC0_SET(modified_reg, -+ read_setting) : -+ modified_reg; -+ modified_reg = valid.write ? SYSC_ALLOC_W_SYSC_ALLOC0_SET(modified_reg, -+ write_setting) : -+ modified_reg; -+ break; -+ case 1: -+ modified_reg = valid.read ? SYSC_ALLOC_R_SYSC_ALLOC1_SET(modified_reg, -+ read_setting) : -+ modified_reg; -+ modified_reg = valid.write ? SYSC_ALLOC_W_SYSC_ALLOC1_SET(modified_reg, -+ write_setting) : -+ modified_reg; -+ break; -+ case 2: -+ modified_reg = valid.read ? SYSC_ALLOC_R_SYSC_ALLOC2_SET(modified_reg, -+ read_setting) : -+ modified_reg; -+ modified_reg = valid.write ? SYSC_ALLOC_W_SYSC_ALLOC2_SET(modified_reg, -+ write_setting) : -+ modified_reg; -+ break; -+ case 3: -+ modified_reg = valid.read ? SYSC_ALLOC_R_SYSC_ALLOC3_SET(modified_reg, -+ read_setting) : -+ modified_reg; -+ modified_reg = valid.write ? SYSC_ALLOC_W_SYSC_ALLOC3_SET(modified_reg, -+ write_setting) : -+ modified_reg; -+ break; -+ } ++ /* Indicate to page migration that the memory can be reclaimed by the shrinker. ++ */ ++ if (kbase_page_migration_enabled) ++ kbase_set_phy_alloc_page_status(gpu_alloc, NOT_MOVABLE); + -+ kbdev->sysc_alloc[sysc_alloc_num] = modified_reg; -+ } ++ mutex_unlock(&kctx->jit_evict_lock); ++ kbase_mem_evictable_mark_reclaim(gpu_alloc); + -+ return valid.overall ? 0 : -EINVAL; ++ gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED; ++ return 0; +} + -+void kbase_pbha_write_settings(struct kbase_device *kbdev) ++bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) +{ -+ if (kbasep_pbha_supported(kbdev)) { -+ int i; -+ -+ for (i = 0; i < SYSC_ALLOC_COUNT; ++i) -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(SYSC_ALLOC(i)), -+ kbdev->sysc_alloc[i]); -+ } -+} ++ struct kbase_context *kctx = gpu_alloc->imported.native.kctx; ++ int err = 0; + -+static int kbase_pbha_read_int_id_override_property(struct kbase_device *kbdev, -+ const struct device_node *pbha_node) -+{ -+ u32 dtb_data[SYSC_ALLOC_COUNT * sizeof(u32) * DTB_SET_SIZE]; -+ int sz, i; -+ bool valid = true; ++ /* Calls to this function are inherently asynchronous, with respect to ++ * MMU operations. ++ */ ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + -+ sz = of_property_count_elems_of_size(pbha_node, "int_id_override", -+ sizeof(u32)); -+ if (sz <= 0 || (sz % DTB_SET_SIZE != 0)) { -+ dev_err(kbdev->dev, "Bad DTB format: pbha.int_id_override\n"); -+ return -EINVAL; -+ } -+ if (of_property_read_u32_array(pbha_node, "int_id_override", dtb_data, -+ sz) != 0) { -+ dev_err(kbdev->dev, -+ "Failed to read DTB pbha.int_id_override\n"); -+ return -EINVAL; -+ } ++ lockdep_assert_held(&kctx->reg_lock); + -+ for (i = 0; valid && i < sz; i = i + DTB_SET_SIZE) { -+ unsigned int rdset = -+ SYSC_ALLOC_R_SYSC_ALLOC0_GET(dtb_data[i + 1]); -+ unsigned int wrset = -+ SYSC_ALLOC_W_SYSC_ALLOC0_GET(dtb_data[i + 1]); -+ valid = valid && -+ (kbase_pbha_record_settings(kbdev, false, dtb_data[i], -+ rdset, wrset) == 0); -+ if (valid) -+ dev_info(kbdev->dev, -+ "pbha.int_id_override 0x%x r0x%x w0x%x\n", -+ dtb_data[i], rdset, wrset); -+ } -+ if (i != sz || (!valid)) { -+ dev_err(kbdev->dev, -+ "Failed recording DTB data (pbha.int_id_override)\n"); -+ return -EINVAL; -+ } -+ return 0; -+} ++ mutex_lock(&kctx->jit_evict_lock); ++ /* ++ * First remove the allocation from the eviction list as it's no ++ * longer eligible for eviction. ++ */ ++ WARN_ON(atomic_sub_return(gpu_alloc->nents, &kctx->evict_nents) < 0); ++ list_del_init(&gpu_alloc->evict_node); ++ mutex_unlock(&kctx->jit_evict_lock); + -+#if MALI_USE_CSF -+static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev, -+ const struct device_node *pbha_node) -+{ -+ u32 bits; -+ int err; ++ if (gpu_alloc->evicted == 0) { ++ /* ++ * The backing is still present, update the VM stats as it's ++ * in use again. ++ */ ++ kbase_mem_evictable_unmark_reclaim(gpu_alloc); ++ } else { ++ /* If the region is still alive ... */ ++ if (gpu_alloc->reg) { ++ /* ... allocate replacement backing ... */ ++ err = kbase_alloc_phy_pages_helper(gpu_alloc, ++ gpu_alloc->evicted); + -+ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) -+ return 0; ++ /* ++ * ... and grow the mapping back to its ++ * pre-eviction size. ++ */ ++ if (!err) ++ err = kbase_mem_grow_gpu_mapping( ++ kctx, gpu_alloc->reg, ++ gpu_alloc->evicted, 0, mmu_sync_info); + -+ err = of_property_read_u32(pbha_node, "propagate_bits", &bits); ++ gpu_alloc->evicted = 0; + -+ if (err < 0) { -+ if (err != -EINVAL) { -+ dev_err(kbdev->dev, -+ "DTB value for propagate_bits is improperly formed (err=%d)\n", -+ err); -+ return err; ++ /* Since the allocation is no longer evictable, and we ensure that ++ * it grows back to its pre-eviction size, we will consider the ++ * state of it to be ALLOCATED_MAPPED, as that is the only state ++ * in which a physical allocation could transition to NOT_MOVABLE ++ * from. ++ */ ++ if (kbase_page_migration_enabled) ++ kbase_set_phy_alloc_page_status(gpu_alloc, ALLOCATED_MAPPED); + } + } + -+ if (bits > (L2_CONFIG_PBHA_HWU_MASK >> L2_CONFIG_PBHA_HWU_SHIFT)) { -+ dev_err(kbdev->dev, "Bad DTB value for propagate_bits: 0x%x\n", bits); -+ return -EINVAL; -+ } ++ /* If the region is still alive remove the DONT_NEED attribute. */ ++ if (gpu_alloc->reg) ++ gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED; + -+ kbdev->pbha_propagate_bits = bits; -+ return 0; ++ return (err == 0); +} -+#endif + -+int kbase_pbha_read_dtb(struct kbase_device *kbdev) ++int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask) +{ -+ const struct device_node *pbha_node; -+ int err; ++ struct kbase_va_region *reg; ++ int ret = -EINVAL; ++ unsigned int real_flags = 0; ++ unsigned int new_flags = 0; ++ bool prev_needed, new_needed; + -+ if (!kbasep_pbha_supported(kbdev)) -+ return 0; ++ KBASE_DEBUG_ASSERT(kctx); + -+ pbha_node = of_get_child_by_name(kbdev->dev->of_node, "pbha"); -+ if (!pbha_node) -+ return 0; ++ if (!gpu_addr) ++ return -EINVAL; + -+ err = kbase_pbha_read_int_id_override_property(kbdev, pbha_node); ++ if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) ++ return -EINVAL; + -+#if MALI_USE_CSF -+ if (err < 0) -+ return err; ++ /* nuke other bits */ ++ flags &= mask; + -+ err = kbase_pbha_read_propagate_bits_property(kbdev, pbha_node); -+#endif ++ /* check for only supported flags */ ++ if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE)) ++ goto out; + -+ return err; -+} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pbha.h b/drivers/gpu/arm/bifrost/mali_kbase_pbha.h -new file mode 100644 -index 000000000..79632194c ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_pbha.h -@@ -0,0 +1,77 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ /* mask covers bits we don't support? */ ++ if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE)) ++ goto out; + -+#ifndef _KBASE_PBHA_H -+#define _KBASE_PBHA_H ++ /* convert flags */ ++ if (BASE_MEM_COHERENT_SYSTEM & flags) ++ real_flags |= KBASE_REG_SHARE_BOTH; ++ else if (BASE_MEM_COHERENT_LOCAL & flags) ++ real_flags |= KBASE_REG_SHARE_IN; + -+#include ++ /* now we can lock down the context, and find the region */ ++ down_write(kbase_mem_get_process_mmap_lock()); ++ kbase_gpu_vm_lock(kctx); + -+/** -+ * kbasep_pbha_supported - check whether PBHA registers are -+ * available -+ * -+ * @kbdev: Device pointer -+ * -+ * Should only be used in mali_kbase_pbha* files - thus the -+ * kbase[p] prefix. -+ * -+ * Return: True if pbha is supported, false otherwise -+ */ -+bool kbasep_pbha_supported(struct kbase_device *kbdev); ++ /* Validate the region */ ++ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); ++ if (kbase_is_region_invalid_or_free(reg)) ++ goto out_unlock; + -+/** -+ * kbase_pbha_record_settings - record PBHA settings to be applied when -+ * L2 is powered down -+ * -+ * @kbdev: Device pointer -+ * @runtime: true if it's called at runtime and false if it's called on init. -+ * @id: memory access source ID -+ * @read_setting: Read setting -+ * @write_setting: Write setting -+ * -+ * Return: 0 on success, otherwise error code. -+ */ -+int kbase_pbha_record_settings(struct kbase_device *kbdev, bool runtime, -+ unsigned int id, unsigned int read_setting, -+ unsigned int write_setting); ++ /* There is no use case to support MEM_FLAGS_CHANGE ioctl for allocations ++ * that have NO_USER_FREE flag set, to mark them as evictable/reclaimable. ++ * This would usually include JIT allocations, Tiler heap related allocations ++ * & GPU queue ringbuffer and none of them needs to be explicitly marked ++ * as evictable by Userspace. ++ */ ++ if (kbase_va_region_is_no_user_free(reg)) ++ goto out_unlock; + -+/** -+ * kbase_pbha_write_settings - write recorded PBHA settings to GPU -+ * registers -+ * -+ * @kbdev: Device pointer -+ * -+ * Only valid to call this function when L2 is powered down, otherwise -+ * this will not affect PBHA settings. -+ */ -+void kbase_pbha_write_settings(struct kbase_device *kbdev); ++ /* Is the region being transitioning between not needed and needed? */ ++ prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED; ++ new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED; ++ if (prev_needed != new_needed) { ++ /* Aliased allocations can't be shrunk as the code doesn't ++ * support looking up: ++ * - all physical pages assigned to different GPU VAs ++ * - CPU mappings for the physical pages at different vm_pgoff ++ * (==GPU VA) locations. ++ */ ++ if (atomic_read(®->cpu_alloc->gpu_mappings) > 1) ++ goto out_unlock; + -+/** -+ * kbase_pbha_read_dtb - read PBHA settings from DTB and record it to be -+ * applied when L2 is powered down -+ * -+ * @kbdev: Device pointer -+ * -+ * Return: 0 on success, otherwise error code. -+ */ -+int kbase_pbha_read_dtb(struct kbase_device *kbdev); ++ if (atomic_read(®->cpu_alloc->kernel_mappings) > 0) ++ goto out_unlock; + -+#endif /* _KBASE_PBHA_H */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c -new file mode 100644 -index 000000000..1cc29c700 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c -@@ -0,0 +1,227 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ if (new_needed) { ++ /* Only native allocations can be marked not needed */ ++ if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { ++ ret = -EINVAL; ++ goto out_unlock; ++ } ++ ret = kbase_mem_evictable_make(reg->gpu_alloc); ++ if (ret) ++ goto out_unlock; ++ } else { ++ kbase_mem_evictable_unmake(reg->gpu_alloc); ++ } ++ } + -+#include "mali_kbase_pbha_debugfs.h" -+#include "mali_kbase_pbha.h" -+#include -+#include -+#include ++ /* limit to imported memory */ ++ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM) ++ goto out_unlock; + -+#if MALI_USE_CSF -+#include "backend/gpu/mali_kbase_pm_internal.h" -+#endif ++ /* shareability flags are ignored for GPU uncached memory */ ++ if (!(reg->flags & KBASE_REG_GPU_CACHED)) { ++ ret = 0; ++ goto out_unlock; ++ } + -+static int int_id_overrides_show(struct seq_file *sfile, void *data) -+{ -+ struct kbase_device *kbdev = sfile->private; -+ int i; ++ /* no change? */ ++ if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) { ++ ret = 0; ++ goto out_unlock; ++ } + -+ kbase_pm_context_active(kbdev); ++ new_flags = reg->flags & ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH); ++ new_flags |= real_flags; + -+ /* Minimal header for readability */ -+ seq_puts(sfile, "// R W\n"); -+ for (i = 0; i < SYSC_ALLOC_COUNT; ++i) { -+ int j; -+ u32 reg = kbase_reg_read(kbdev, GPU_CONTROL_REG(SYSC_ALLOC(i))); ++ /* Currently supporting only imported memory */ ++ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM) { ++ ret = -EINVAL; ++ goto out_unlock; ++ } + -+ for (j = 0; j < sizeof(u32); ++j) { -+ u8 r_val; -+ u8 w_val; ++ if (IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) { ++ /* Future use will use the new flags, existing mapping ++ * will NOT be updated as memory should not be in use ++ * by the GPU when updating the flags. ++ */ ++ WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count); ++ ret = 0; ++ } else if (reg->gpu_alloc->imported.umm.current_mapping_usage_count) { ++ /* ++ * When CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is not enabled the ++ * dma-buf GPU mapping should always be present, check that ++ * this is the case and warn and skip the page table update if ++ * not. ++ * ++ * Then update dma-buf GPU mapping with the new flags. ++ * ++ * Note: The buffer must not be in use on the GPU when ++ * changing flags. If the buffer is in active use on ++ * the GPU, there is a risk that the GPU may trigger a ++ * shareability fault, as it will see the same ++ * addresses from buffer with different shareability ++ * properties. ++ */ ++ dev_dbg(kctx->kbdev->dev, ++ "Updating page tables on mem flag change\n"); ++ ret = kbase_mmu_update_pages(kctx, reg->start_pfn, ++ kbase_get_gpu_phy_pages(reg), ++ kbase_reg_current_backed_size(reg), ++ new_flags, ++ reg->gpu_alloc->group_id); ++ if (ret) ++ dev_warn(kctx->kbdev->dev, ++ "Failed to update GPU page tables on flag change: %d\n", ++ ret); ++ } else ++ WARN_ON(!reg->gpu_alloc->imported.umm.current_mapping_usage_count); + -+ switch (j) { -+ case 0: -+ r_val = SYSC_ALLOC_R_SYSC_ALLOC0_GET(reg); -+ w_val = SYSC_ALLOC_W_SYSC_ALLOC0_GET(reg); -+ break; -+ case 1: -+ r_val = SYSC_ALLOC_R_SYSC_ALLOC1_GET(reg); -+ w_val = SYSC_ALLOC_W_SYSC_ALLOC1_GET(reg); -+ break; -+ case 2: -+ r_val = SYSC_ALLOC_R_SYSC_ALLOC2_GET(reg); -+ w_val = SYSC_ALLOC_W_SYSC_ALLOC2_GET(reg); -+ break; -+ case 3: -+ r_val = SYSC_ALLOC_R_SYSC_ALLOC3_GET(reg); -+ w_val = SYSC_ALLOC_W_SYSC_ALLOC3_GET(reg); -+ break; -+ } -+ seq_printf(sfile, "%2zu 0x%x 0x%x\n", -+ (i * sizeof(u32)) + j, r_val, w_val); -+ } -+ } -+ kbase_pm_context_idle(kbdev); ++ /* If everything is good, then set the new flags on the region. */ ++ if (!ret) ++ reg->flags = new_flags; + -+ return 0; ++out_unlock: ++ kbase_gpu_vm_unlock(kctx); ++ up_write(kbase_mem_get_process_mmap_lock()); ++out: ++ return ret; +} + -+static ssize_t int_id_overrides_write(struct file *file, -+ const char __user *ubuf, size_t count, -+ loff_t *ppos) ++#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS) ++ ++int kbase_mem_do_sync_imported(struct kbase_context *kctx, ++ struct kbase_va_region *reg, enum kbase_sync_type sync_fn) +{ -+ struct seq_file *sfile = file->private_data; -+ struct kbase_device *kbdev = sfile->private; -+ char raw_str[128]; -+ unsigned int id; -+ unsigned int r_val; -+ unsigned int w_val; ++ int ret = -EINVAL; ++ struct dma_buf __maybe_unused *dma_buf; ++ enum dma_data_direction dir = DMA_BIDIRECTIONAL; + -+ if (count >= sizeof(raw_str)) -+ return -E2BIG; -+ if (copy_from_user(raw_str, ubuf, count)) -+ return -EINVAL; -+ raw_str[count] = '\0'; ++ lockdep_assert_held(&kctx->reg_lock); + -+ if (sscanf(raw_str, "%u %x %x", &id, &r_val, &w_val) != 3) -+ return -EINVAL; ++ /* We assume that the same physical allocation object is used for both ++ * GPU and CPU for imported buffers. ++ */ ++ WARN_ON(reg->cpu_alloc != reg->gpu_alloc); + -+ if (kbase_pbha_record_settings(kbdev, true, id, r_val, w_val)) -+ return -EINVAL; ++ /* Currently only handle dma-bufs */ ++ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM) ++ return ret; ++ /* ++ * Attempting to sync with CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND ++ * enabled can expose us to a Linux Kernel issue between v4.6 and ++ * v4.19. We will not attempt to support cache syncs on dma-bufs that ++ * are mapped on demand (i.e. not on import), even on pre-4.6, neither ++ * on 4.20 or newer kernels, because this makes it difficult for ++ * userspace to know when they can rely on the cache sync. ++ * Instead, only support syncing when we always map dma-bufs on import, ++ * or if the particular buffer is mapped right now. ++ */ ++ if (IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND) && ++ !reg->gpu_alloc->imported.umm.current_mapping_usage_count) ++ return ret; + -+ /* This is a debugfs config write, so reset GPU such that changes take effect ASAP */ -+ kbase_pm_context_active(kbdev); -+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) -+ kbase_reset_gpu(kbdev); -+ kbase_pm_context_idle(kbdev); ++ dma_buf = reg->gpu_alloc->imported.umm.dma_buf; + -+ return count; -+} ++ switch (sync_fn) { ++ case KBASE_SYNC_TO_DEVICE: ++ dev_dbg(kctx->kbdev->dev, ++ "Syncing imported buffer at GPU VA %llx to GPU\n", ++ reg->start_pfn); ++#ifdef KBASE_MEM_ION_SYNC_WORKAROUND ++ if (!WARN_ON(!reg->gpu_alloc->imported.umm.dma_attachment)) { ++ struct dma_buf_attachment *attachment = reg->gpu_alloc->imported.umm.dma_attachment; ++ struct sg_table *sgt = reg->gpu_alloc->imported.umm.sgt; + -+static int int_id_overrides_open(struct inode *in, struct file *file) -+{ -+ return single_open(file, int_id_overrides_show, in->i_private); ++ dma_sync_sg_for_device(attachment->dev, sgt->sgl, ++ sgt->nents, dir); ++ ret = 0; ++ } ++#else ++ ret = dma_buf_end_cpu_access(dma_buf, dir); ++#endif /* KBASE_MEM_ION_SYNC_WORKAROUND */ ++ break; ++ case KBASE_SYNC_TO_CPU: ++ dev_dbg(kctx->kbdev->dev, ++ "Syncing imported buffer at GPU VA %llx to CPU\n", ++ reg->start_pfn); ++#ifdef KBASE_MEM_ION_SYNC_WORKAROUND ++ if (!WARN_ON(!reg->gpu_alloc->imported.umm.dma_attachment)) { ++ struct dma_buf_attachment *attachment = reg->gpu_alloc->imported.umm.dma_attachment; ++ struct sg_table *sgt = reg->gpu_alloc->imported.umm.sgt; ++ ++ dma_sync_sg_for_cpu(attachment->dev, sgt->sgl, ++ sgt->nents, dir); ++ ret = 0; ++ } ++#else ++ ret = dma_buf_begin_cpu_access(dma_buf, dir); ++#endif /* KBASE_MEM_ION_SYNC_WORKAROUND */ ++ break; ++ } ++ ++ if (unlikely(ret)) ++ dev_warn(kctx->kbdev->dev, ++ "Failed to sync mem region %pK at GPU VA %llx: %d\n", ++ reg, reg->start_pfn, ret); ++ ++ return ret; +} + -+#if MALI_USE_CSF +/** -+ * propagate_bits_show - Read PBHA bits from L2_CONFIG out to debugfs. -+ * -+ * @sfile: The debugfs entry. -+ * @data: Data associated with the entry. ++ * kbase_mem_umm_unmap_attachment - Unmap dma-buf attachment ++ * @kctx: Pointer to kbase context ++ * @alloc: Pointer to allocation with imported dma-buf memory to unmap + * -+ * Return: 0 in all cases. ++ * This will unmap a dma-buf. Must be called after the GPU page tables for the ++ * region have been torn down. + */ -+static int propagate_bits_show(struct seq_file *sfile, void *data) ++static void kbase_mem_umm_unmap_attachment(struct kbase_context *kctx, ++ struct kbase_mem_phy_alloc *alloc) +{ -+ struct kbase_device *kbdev = sfile->private; -+ u32 l2_config_val; ++ struct tagged_addr *pa = alloc->pages; + -+ kbase_csf_scheduler_pm_active(kbdev); -+ kbase_pm_wait_for_l2_powered(kbdev); -+ l2_config_val = L2_CONFIG_PBHA_HWU_GET(kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG))); -+ kbase_csf_scheduler_pm_idle(kbdev); ++ dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, ++ alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); ++ alloc->imported.umm.sgt = NULL; + -+ seq_printf(sfile, "PBHA Propagate Bits: 0x%x\n", l2_config_val); -+ return 0; -+} ++ kbase_remove_dma_buf_usage(kctx, alloc); + -+static int propagate_bits_open(struct inode *in, struct file *file) -+{ -+ return single_open(file, propagate_bits_show, in->i_private); ++ memset(pa, 0xff, sizeof(*pa) * alloc->nents); ++ alloc->nents = 0; +} + ++/* to replace sg_dma_len. */ ++#define MALI_SG_DMA_LEN(sg) ((sg)->length) ++ +/** -+ * propagate_bits_write - Write input value from debugfs to PBHA bits of L2_CONFIG register. ++ * kbase_mem_umm_map_attachment - Prepare attached dma-buf for GPU mapping ++ * @kctx: Pointer to kbase context ++ * @reg: Pointer to region with imported dma-buf memory to map + * -+ * @file: Pointer to file struct of debugfs node. -+ * @ubuf: Pointer to user buffer with value to be written. -+ * @count: Size of user buffer. -+ * @ppos: Not used. ++ * Map the dma-buf and prepare the page array with the tagged Mali physical ++ * addresses for GPU mapping. + * -+ * Return: Size of buffer passed in when successful, but error code E2BIG/EINVAL otherwise. ++ * Return: 0 on success, or negative error code + */ -+static ssize_t propagate_bits_write(struct file *file, const char __user *ubuf, size_t count, -+ loff_t *ppos) ++static int kbase_mem_umm_map_attachment(struct kbase_context *kctx, ++ struct kbase_va_region *reg) +{ -+ struct seq_file *sfile = file->private_data; -+ struct kbase_device *kbdev = sfile->private; -+ /* 32 characters should be enough for the input string in any base */ -+ char raw_str[32]; -+ unsigned long propagate_bits; ++ struct sg_table *sgt; ++ struct scatterlist *s; ++ int i; ++ struct tagged_addr *pa; ++ int err; ++ size_t count = 0; ++ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; + -+ if (count >= sizeof(raw_str)) -+ return -E2BIG; -+ if (copy_from_user(raw_str, ubuf, count)) -+ return -EINVAL; -+ raw_str[count] = '\0'; -+ if (kstrtoul(raw_str, 0, &propagate_bits)) -+ return -EINVAL; ++ WARN_ON_ONCE(alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM); ++ WARN_ON_ONCE(alloc->imported.umm.sgt); + -+ /* Check propagate_bits input argument does not -+ * exceed the maximum size of the propagate_bits mask. -+ */ -+ if (propagate_bits > (L2_CONFIG_PBHA_HWU_MASK >> L2_CONFIG_PBHA_HWU_SHIFT)) ++ sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, ++ DMA_BIDIRECTIONAL); ++ if (IS_ERR_OR_NULL(sgt)) + return -EINVAL; -+ /* Cast to u8 is safe as check is done already to ensure size is within -+ * correct limits. -+ */ -+ kbdev->pbha_propagate_bits = (u8)propagate_bits; + -+ /* GPU Reset will set new values in L2 config */ -+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) { -+ kbase_reset_gpu(kbdev); -+ kbase_reset_gpu_wait(kbdev); -+ } ++ /* save for later */ ++ alloc->imported.umm.sgt = sgt; + -+ return count; -+} ++ pa = kbase_get_gpu_phy_pages(reg); + -+static const struct file_operations pbha_propagate_bits_fops = { -+ .owner = THIS_MODULE, -+ .open = propagate_bits_open, -+ .read = seq_read, -+ .write = propagate_bits_write, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; -+#endif /* MALI_USE_CSF */ -+ -+static const struct file_operations pbha_int_id_overrides_fops = { -+ .owner = THIS_MODULE, -+ .open = int_id_overrides_open, -+ .read = seq_read, -+ .write = int_id_overrides_write, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; ++ for_each_sg(sgt->sgl, s, sgt->nents, i) { ++ size_t j, pages = PFN_UP(MALI_SG_DMA_LEN(s)); + -+void kbase_pbha_debugfs_init(struct kbase_device *kbdev) -+{ -+ if (kbasep_pbha_supported(kbdev)) { -+ const mode_t mode = 0644; -+ struct dentry *debugfs_pbha_dir = debugfs_create_dir( -+ "pbha", kbdev->mali_debugfs_directory); ++ WARN_ONCE(MALI_SG_DMA_LEN(s) & (PAGE_SIZE-1), ++ "MALI_SG_DMA_LEN(s)=%u is not a multiple of PAGE_SIZE\n", ++ MALI_SG_DMA_LEN(s)); + -+ if (IS_ERR_OR_NULL(debugfs_pbha_dir)) { -+ dev_err(kbdev->dev, -+ "Couldn't create mali debugfs page-based hardware attributes directory\n"); -+ return; -+ } ++ WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1), ++ "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n", ++ (unsigned long long) sg_dma_address(s)); + -+ debugfs_create_file("int_id_overrides", mode, debugfs_pbha_dir, -+ kbdev, &pbha_int_id_overrides_fops); -+#if MALI_USE_CSF -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) -+ debugfs_create_file("propagate_bits", mode, debugfs_pbha_dir, kbdev, -+ &pbha_propagate_bits_fops); -+#endif /* MALI_USE_CSF */ ++ for (j = 0; (j < pages) && (count < reg->nr_pages); j++, count++) ++ *pa++ = as_tagged(sg_dma_address(s) + ++ (j << PAGE_SHIFT)); ++ WARN_ONCE(j < pages, ++ "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n", ++ alloc->imported.umm.dma_buf->size); + } -+} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.h -new file mode 100644 -index 000000000..508ecdff9 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.h -@@ -0,0 +1,34 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ + -+#ifndef _KBASE_PBHA_DEBUGFS_H -+#define _KBASE_PBHA_DEBUGFS_H ++ if (!(reg->flags & KBASE_REG_IMPORT_PAD) && ++ WARN_ONCE(count < reg->nr_pages, ++ "sg list from dma_buf_map_attachment < dma_buf->size=%zu\n", ++ alloc->imported.umm.dma_buf->size)) { ++ err = -EINVAL; ++ goto err_unmap_attachment; ++ } + -+#include ++ /* Update nents as we now have pages to map */ ++ alloc->nents = count; ++ kbase_add_dma_buf_usage(kctx, alloc); + -+/** -+ * kbase_pbha_debugfs_init - Initialize pbha debugfs directory -+ * -+ * @kbdev: Device pointer -+ */ -+void kbase_pbha_debugfs_init(struct kbase_device *kbdev); ++ return 0; + -+#endif /* _KBASE_PBHA_DEBUGFS_H */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_platform_fake.c b/drivers/gpu/arm/bifrost/mali_kbase_platform_fake.c -new file mode 100644 -index 000000000..265c676f1 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_platform_fake.c -@@ -0,0 +1,119 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2011-2014, 2016-2017, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++err_unmap_attachment: ++ kbase_mem_umm_unmap_attachment(kctx, alloc); + -+#include -+#include -+#include -+#include -+#include ++ return err; ++} + -+/* -+ * This file is included only for type definitions and functions belonging to -+ * specific platform folders. Do not add dependencies with symbols that are -+ * defined somewhere else. -+ */ -+#include ++int kbase_mem_umm_map(struct kbase_context *kctx, ++ struct kbase_va_region *reg) ++{ ++ int err; ++ struct kbase_mem_phy_alloc *alloc; ++ unsigned long gwt_mask = ~0; + -+#ifndef CONFIG_OF ++ /* Calls to this function are inherently asynchronous, with respect to ++ * MMU operations. ++ */ ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + -+#define PLATFORM_CONFIG_RESOURCE_COUNT 4 ++ lockdep_assert_held(&kctx->reg_lock); + -+static struct platform_device *mali_device; ++ alloc = reg->gpu_alloc; + -+/** -+ * kbasep_config_parse_io_resources - Convert data in struct kbase_io_resources -+ * struct to Linux-specific resources -+ * @io_resources: Input IO resource data -+ * @linux_resources: Pointer to output array of Linux resource structures -+ * -+ * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function -+ * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT. -+ * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ. -+ */ -+static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources) -+{ -+ if (!io_resources || !linux_resources) { -+ pr_err("%s: couldn't find proper resources\n", __func__); -+ return; ++ alloc->imported.umm.current_mapping_usage_count++; ++ if (alloc->imported.umm.current_mapping_usage_count != 1) { ++ if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT) || ++ alloc->imported.umm.need_sync) { ++ if (!kbase_is_region_invalid_or_free(reg)) { ++ err = kbase_mem_do_sync_imported(kctx, reg, ++ KBASE_SYNC_TO_DEVICE); ++ WARN_ON_ONCE(err); ++ } ++ } ++ return 0; + } + -+ memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource)); ++ err = kbase_mem_umm_map_attachment(kctx, reg); ++ if (err) ++ goto bad_map_attachment; + -+ linux_resources[0].start = io_resources->io_memory_region.start; -+ linux_resources[0].end = io_resources->io_memory_region.end; -+ linux_resources[0].flags = IORESOURCE_MEM; ++#ifdef CONFIG_MALI_CINSTR_GWT ++ if (kctx->gwt_enabled) ++ gwt_mask = ~KBASE_REG_GPU_WR; ++#endif + -+ linux_resources[1].start = io_resources->job_irq_number; -+ linux_resources[1].end = io_resources->job_irq_number; -+ linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; ++ err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, ++ kbase_get_gpu_phy_pages(reg), ++ kbase_reg_current_backed_size(reg), ++ reg->flags & gwt_mask, kctx->as_nr, alloc->group_id, ++ mmu_sync_info, NULL); ++ if (err) ++ goto bad_insert; + -+ linux_resources[2].start = io_resources->mmu_irq_number; -+ linux_resources[2].end = io_resources->mmu_irq_number; -+ linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; ++ if (reg->flags & KBASE_REG_IMPORT_PAD && ++ !WARN_ON(reg->nr_pages < alloc->nents)) { ++ /* For padded imported dma-buf memory, map the dummy aliasing ++ * page from the end of the dma-buf pages, to the end of the ++ * region using a read only mapping. ++ * ++ * Assume alloc->nents is the number of actual pages in the ++ * dma-buf memory. ++ */ ++ err = kbase_mmu_insert_single_imported_page( ++ kctx, reg->start_pfn + alloc->nents, kctx->aliasing_sink_page, ++ reg->nr_pages - alloc->nents, ++ (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK, ++ mmu_sync_info); ++ if (err) ++ goto bad_pad_insert; ++ } + -+ linux_resources[3].start = io_resources->gpu_irq_number; -+ linux_resources[3].end = io_resources->gpu_irq_number; -+ linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; ++ return 0; ++ ++bad_pad_insert: ++ kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, ++ alloc->nents, alloc->nents, kctx->as_nr, true); ++bad_insert: ++ kbase_mem_umm_unmap_attachment(kctx, alloc); ++bad_map_attachment: ++ alloc->imported.umm.current_mapping_usage_count--; ++ ++ return err; +} + -+int kbase_platform_register(void) ++void kbase_mem_umm_unmap(struct kbase_context *kctx, ++ struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) +{ -+ struct kbase_platform_config *config; -+ struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT]; -+ int err; -+ -+ config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */ -+ if (config == NULL) { -+ pr_err("%s: couldn't get platform config\n", __func__); -+ return -ENODEV; ++ alloc->imported.umm.current_mapping_usage_count--; ++ if (alloc->imported.umm.current_mapping_usage_count) { ++ if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT) || ++ alloc->imported.umm.need_sync) { ++ if (!kbase_is_region_invalid_or_free(reg)) { ++ int err = kbase_mem_do_sync_imported(kctx, reg, ++ KBASE_SYNC_TO_CPU); ++ WARN_ON_ONCE(err); ++ } ++ } ++ return; + } + -+ mali_device = platform_device_alloc("mali", 0); -+ if (mali_device == NULL) -+ return -ENOMEM; -+ -+ kbasep_config_parse_io_resources(config->io_resources, resources); -+ err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT); -+ if (err) { -+ platform_device_put(mali_device); -+ mali_device = NULL; -+ return err; -+ } ++ if (!kbase_is_region_invalid_or_free(reg) && reg->gpu_alloc == alloc) { ++ int err; + -+ err = platform_device_add(mali_device); -+ if (err) { -+ platform_device_unregister(mali_device); -+ mali_device = NULL; -+ return err; ++ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, ++ alloc->pages, reg->nr_pages, reg->nr_pages, ++ kctx->as_nr, true); ++ WARN_ON(err); + } + -+ return 0; ++ kbase_mem_umm_unmap_attachment(kctx, alloc); +} -+EXPORT_SYMBOL(kbase_platform_register); + -+void kbase_platform_unregister(void) ++static int get_umm_memory_group_id(struct kbase_context *kctx, ++ struct dma_buf *dma_buf) +{ -+ if (mali_device) -+ platform_device_unregister(mali_device); ++ int group_id = BASE_MEM_GROUP_DEFAULT; ++ ++ if (kctx->kbdev->mgm_dev->ops.mgm_get_import_memory_id) { ++ struct memory_group_manager_import_data mgm_import_data; ++ ++ mgm_import_data.type = ++ MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF; ++ mgm_import_data.u.dma_buf = dma_buf; ++ ++ group_id = kctx->kbdev->mgm_dev->ops.mgm_get_import_memory_id( ++ kctx->kbdev->mgm_dev, &mgm_import_data); ++ } ++ ++ return group_id; +} -+EXPORT_SYMBOL(kbase_platform_unregister); + -+#endif /* CONFIG_OF */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pm.c b/drivers/gpu/arm/bifrost/mali_kbase_pm.c -new file mode 100644 -index 000000000..62a132816 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_pm.c -@@ -0,0 +1,311 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++/** ++ * kbase_mem_from_umm - Import dma-buf memory into kctx ++ * @kctx: Pointer to kbase context to import memory into ++ * @fd: File descriptor of dma-buf to import ++ * @va_pages: Pointer where virtual size of the region will be output ++ * @flags: Pointer to memory flags ++ * @padding: Number of read only padding pages to be inserted at the end of the ++ * GPU mapping of the dma-buf + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * Return: Pointer to new kbase_va_region object of the imported dma-buf, or ++ * NULL on error. + * ++ * This function imports a dma-buf into kctx, and created a kbase_va_region ++ * object that wraps the dma-buf. + */ ++static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, ++ int fd, u64 *va_pages, u64 *flags, u32 padding) ++{ ++ struct kbase_va_region *reg; ++ struct dma_buf *dma_buf; ++ struct dma_buf_attachment *dma_attachment; ++ bool shared_zone = false; ++ bool need_sync = false; ++ int group_id; + -+/** -+ * DOC: Base kernel power management APIs -+ */ ++ /* 64-bit address range is the max */ ++ if (*va_pages > (U64_MAX / PAGE_SIZE)) ++ return NULL; + -+#include -+#include -+#include -+#include -+#include ++ dma_buf = dma_buf_get(fd); ++ if (IS_ERR_OR_NULL(dma_buf)) ++ return NULL; + -+#include -+#include ++ dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev); ++ if (IS_ERR_OR_NULL(dma_attachment)) { ++ dma_buf_put(dma_buf); ++ return NULL; ++ } + -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+#include -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ *va_pages = (PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT) + padding; ++ if (!*va_pages) { ++ dma_buf_detach(dma_buf, dma_attachment); ++ dma_buf_put(dma_buf); ++ return NULL; ++ } + -+#include ++ if (!kbase_import_size_is_valid(kctx->kbdev, *va_pages)) ++ return NULL; + -+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags) -+{ -+ return kbase_hwaccess_pm_powerup(kbdev, flags); -+} ++ /* ignore SAME_VA */ ++ *flags &= ~BASE_MEM_SAME_VA; + -+void kbase_pm_halt(struct kbase_device *kbdev) -+{ -+ kbase_hwaccess_pm_halt(kbdev); -+} ++ /* ++ * Force CPU cached flag. ++ * ++ * We can't query the dma-buf exporter to get details about the CPU ++ * cache attributes of CPU mappings, so we have to assume that the ++ * buffer may be cached, and call into the exporter for cache ++ * maintenance, and rely on the exporter to do the right thing when ++ * handling our calls. ++ */ ++ *flags |= BASE_MEM_CACHED_CPU; + -+void kbase_pm_context_active(struct kbase_device *kbdev) -+{ -+ (void)kbase_pm_context_active_handle_suspend(kbdev, -+ KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); -+} ++ if (*flags & BASE_MEM_IMPORT_SHARED) ++ shared_zone = true; + -+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, -+ enum kbase_pm_suspend_handler suspend_handler) -+{ -+ int c; ++ if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) ++ need_sync = true; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ dev_dbg(kbdev->dev, "%s - reason = %d, pid = %d\n", __func__, -+ suspend_handler, current->pid); -+ kbase_pm_lock(kbdev); ++ if (!kbase_ctx_compat_mode(kctx)) { ++ /* ++ * 64-bit tasks require us to reserve VA on the CPU that we use ++ * on the GPU. ++ */ ++ shared_zone = true; ++ } + -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ if (kbase_arbiter_pm_ctx_active_handle_suspend(kbdev, -+ suspend_handler)) { -+ kbase_pm_unlock(kbdev); -+ return 1; ++ if (shared_zone) { ++ *flags |= BASE_MEM_NEED_MMAP; ++ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *va_pages, ++ KBASE_REG_ZONE_SAME_VA); ++ } else { ++ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *va_pages, ++ KBASE_REG_ZONE_CUSTOM_VA); + } -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + -+ if (kbase_pm_is_suspending(kbdev)) { -+ switch (suspend_handler) { -+ case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE: -+ if (kbdev->pm.active_count != 0) -+ break; -+ fallthrough; -+ case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE: -+ kbase_pm_unlock(kbdev); -+ return 1; ++ if (!reg) { ++ dma_buf_detach(dma_buf, dma_attachment); ++ dma_buf_put(dma_buf); ++ return NULL; ++ } + -+ case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE: -+ fallthrough; -+ default: -+ KBASE_DEBUG_ASSERT_MSG(false, "unreachable"); -+ break; ++ group_id = get_umm_memory_group_id(kctx, dma_buf); ++ ++ reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages, ++ KBASE_MEM_TYPE_IMPORTED_UMM, group_id); ++ if (IS_ERR_OR_NULL(reg->gpu_alloc)) ++ goto no_alloc; ++ ++ reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); ++ ++ if (kbase_update_region_flags(kctx, reg, *flags) != 0) ++ goto error_out; ++ ++ /* No pages to map yet */ ++ reg->gpu_alloc->nents = 0; ++ ++ reg->flags &= ~KBASE_REG_FREE; ++ reg->flags |= KBASE_REG_GPU_NX; /* UMM is always No eXecute */ ++ reg->flags &= ~KBASE_REG_GROWABLE; /* UMM cannot be grown */ ++ ++ if (*flags & BASE_MEM_PROTECTED) ++ reg->flags |= KBASE_REG_PROTECTED; ++ ++ if (padding) ++ reg->flags |= KBASE_REG_IMPORT_PAD; ++ ++ reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM; ++ reg->gpu_alloc->imported.umm.sgt = NULL; ++ reg->gpu_alloc->imported.umm.dma_buf = dma_buf; ++ reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment; ++ reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0; ++ reg->gpu_alloc->imported.umm.need_sync = need_sync; ++ reg->gpu_alloc->imported.umm.kctx = kctx; ++ reg->extension = 0; ++ ++ if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) { ++ int err; ++ ++ reg->gpu_alloc->imported.umm.current_mapping_usage_count = 1; ++ ++ err = kbase_mem_umm_map_attachment(kctx, reg); ++ if (err) { ++ dev_warn(kctx->kbdev->dev, ++ "Failed to map dma-buf %pK on GPU: %d\n", ++ dma_buf, err); ++ goto error_out; + } -+ } -+ c = ++kbdev->pm.active_count; -+ KBASE_KTRACE_ADD(kbdev, PM_CONTEXT_ACTIVE, NULL, c); + -+ if (c == 1) { -+ /* First context active: Power on the GPU and -+ * any cores requested by the policy -+ */ -+ kbase_hwaccess_pm_gpu_active(kbdev); -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_REF_EVENT); -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ -+ kbase_clk_rate_trace_manager_gpu_active(kbdev); ++ *flags |= KBASE_MEM_IMPORT_HAVE_PAGES; + } + -+ kbase_pm_unlock(kbdev); -+ dev_dbg(kbdev->dev, "%s %d\n", __func__, kbdev->pm.active_count); ++ return reg; + -+ return 0; ++error_out: ++ kbase_mem_phy_alloc_put(reg->gpu_alloc); ++ kbase_mem_phy_alloc_put(reg->cpu_alloc); ++no_alloc: ++ kfree(reg); ++ ++ return NULL; +} + -+KBASE_EXPORT_TEST_API(kbase_pm_context_active); ++u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev) ++{ ++ u32 cpu_cache_line_size = cache_line_size(); ++ u32 gpu_cache_line_size = ++ (1UL << kbdev->gpu_props.props.l2_props.log2_line_size); + -+void kbase_pm_context_idle(struct kbase_device *kbdev) ++ return ((cpu_cache_line_size > gpu_cache_line_size) ? ++ cpu_cache_line_size : ++ gpu_cache_line_size); ++} ++ ++static struct kbase_va_region *kbase_mem_from_user_buffer( ++ struct kbase_context *kctx, unsigned long address, ++ unsigned long size, u64 *va_pages, u64 *flags) +{ -+ int c; ++ long i, dma_mapped_pages; ++ struct kbase_va_region *reg; ++ struct rb_root *rbtree; ++ long faulted_pages; ++ int zone = KBASE_REG_ZONE_CUSTOM_VA; ++ bool shared_zone = false; ++ u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev); ++ struct kbase_alloc_import_user_buf *user_buf; ++ struct page **pages = NULL; ++ struct tagged_addr *pa; ++ struct device *dev; ++ int write; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ /* Flag supported only for dma-buf imported memory */ ++ if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) ++ return NULL; + ++ if ((address & (cache_line_alignment - 1)) != 0 || ++ (size & (cache_line_alignment - 1)) != 0) { ++ if (*flags & BASE_MEM_UNCACHED_GPU) { ++ dev_warn(kctx->kbdev->dev, ++ "User buffer is not cache line aligned and marked as GPU uncached\n"); ++ goto bad_size; ++ } + -+ kbase_pm_lock(kbdev); ++ /* Coherency must be enabled to handle partial cache lines */ ++ if (*flags & (BASE_MEM_COHERENT_SYSTEM | ++ BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { ++ /* Force coherent system required flag, import will ++ * then fail if coherency isn't available ++ */ ++ *flags |= BASE_MEM_COHERENT_SYSTEM_REQUIRED; ++ } else { ++ dev_warn(kctx->kbdev->dev, ++ "User buffer is not cache line aligned and no coherency enabled\n"); ++ goto bad_size; ++ } ++ } + -+ c = --kbdev->pm.active_count; -+ KBASE_KTRACE_ADD(kbdev, PM_CONTEXT_IDLE, NULL, c); ++ *va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) - ++ PFN_DOWN(address); ++ if (!*va_pages) ++ goto bad_size; + -+ KBASE_DEBUG_ASSERT(c >= 0); ++ if (*va_pages > (UINT64_MAX / PAGE_SIZE)) ++ /* 64-bit address range is the max */ ++ goto bad_size; + -+ if (c == 0) { -+ /* Last context has gone idle */ -+ kbase_hwaccess_pm_gpu_idle(kbdev); -+ kbase_clk_rate_trace_manager_gpu_idle(kbdev); ++ if (!kbase_import_size_is_valid(kctx->kbdev, *va_pages)) ++ goto bad_size; + -+ /* Wake up anyone waiting for this to become 0 (e.g. suspend). -+ * The waiters must synchronize with us by locking the pm.lock -+ * after waiting. ++ /* SAME_VA generally not supported with imported memory (no known use cases) */ ++ *flags &= ~BASE_MEM_SAME_VA; ++ ++ if (*flags & BASE_MEM_IMPORT_SHARED) ++ shared_zone = true; ++ ++ if (!kbase_ctx_compat_mode(kctx)) { ++ /* ++ * 64-bit tasks require us to reserve VA on the CPU that we use ++ * on the GPU. + */ -+ wake_up(&kbdev->pm.zero_active_count_wait); ++ shared_zone = true; + } + -+ kbase_pm_unlock(kbdev); -+ dev_dbg(kbdev->dev, "%s %d (pid = %d)\n", __func__, -+ kbdev->pm.active_count, current->pid); -+} ++ if (shared_zone) { ++ *flags |= BASE_MEM_NEED_MMAP; ++ zone = KBASE_REG_ZONE_SAME_VA; ++ rbtree = &kctx->reg_rbtree_same; ++ } else ++ rbtree = &kctx->reg_rbtree_custom; + -+KBASE_EXPORT_TEST_API(kbase_pm_context_idle); ++ reg = kbase_alloc_free_region(kctx->kbdev, rbtree, 0, *va_pages, zone); + -+int kbase_pm_driver_suspend(struct kbase_device *kbdev) -+{ -+ KBASE_DEBUG_ASSERT(kbdev); ++ if (!reg) ++ goto no_region; + -+ /* Suspend HW counter intermediaries. This blocks until workers and timers -+ * are no longer running. -+ */ -+ kbase_vinstr_suspend(kbdev->vinstr_ctx); -+ kbase_kinstr_prfcnt_suspend(kbdev->kinstr_prfcnt_ctx); ++ reg->gpu_alloc = kbase_alloc_create( ++ kctx, *va_pages, KBASE_MEM_TYPE_IMPORTED_USER_BUF, ++ BASE_MEM_GROUP_DEFAULT); ++ if (IS_ERR_OR_NULL(reg->gpu_alloc)) ++ goto no_alloc_obj; + -+ /* Disable GPU hardware counters. -+ * This call will block until counters are disabled. -+ */ -+ kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); ++ reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + -+ mutex_lock(&kbdev->pm.lock); -+ if (WARN_ON(kbase_pm_is_suspending(kbdev))) { -+ mutex_unlock(&kbdev->pm.lock); -+ return 0; -+ } -+ kbdev->pm.suspending = true; -+ mutex_unlock(&kbdev->pm.lock); ++ if (kbase_update_region_flags(kctx, reg, *flags) != 0) ++ goto invalid_flags; + -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ if (kbdev->arb.arb_if) { -+ int i; -+ unsigned long flags; ++ reg->flags &= ~KBASE_REG_FREE; ++ reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */ ++ reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */ + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->js_data.runpool_irq.submit_allowed = 0; -+ kbase_disjoint_state_up(kbdev); -+ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) -+ kbase_job_slot_softstop(kbdev, i, NULL); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ user_buf = ®->gpu_alloc->imported.user_buf; + -+ /* From now on, the active count will drop towards zero. Sometimes, -+ * it'll go up briefly before going down again. However, once -+ * it reaches zero it will stay there - guaranteeing that we've idled -+ * all pm references -+ */ ++ user_buf->size = size; ++ user_buf->address = address; ++ user_buf->nr_pages = *va_pages; ++ user_buf->mm = current->mm; ++ kbase_mem_mmgrab(); ++ if (reg->gpu_alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) ++ user_buf->pages = vmalloc(*va_pages * sizeof(struct page *)); ++ else ++ user_buf->pages = kmalloc_array(*va_pages, ++ sizeof(struct page *), GFP_KERNEL); + -+#if !MALI_USE_CSF -+ /* Suspend job scheduler and associated components, so that it releases all -+ * the PM active count references ++ if (!user_buf->pages) ++ goto no_page_array; ++ ++ /* If the region is coherent with the CPU then the memory is imported ++ * and mapped onto the GPU immediately. ++ * Otherwise get_user_pages is called as a sanity check, but with ++ * NULL as the pages argument which will fault the pages, but not ++ * pin them. The memory will then be pinned only around the jobs that ++ * specify the region as an external resource. + */ -+ kbasep_js_suspend(kbdev); -+#else -+ if (kbase_csf_scheduler_pm_suspend(kbdev)) { -+ mutex_lock(&kbdev->pm.lock); -+ kbdev->pm.suspending = false; -+ mutex_unlock(&kbdev->pm.lock); -+ return -1; ++ if (reg->flags & KBASE_REG_SHARE_BOTH) { ++ pages = user_buf->pages; ++ *flags |= KBASE_MEM_IMPORT_HAVE_PAGES; + } -+#endif + -+ /* Wait for the active count to reach zero. This is not the same as -+ * waiting for a power down, since not all policies power down when this -+ * reaches zero. -+ */ -+ dev_dbg(kbdev->dev, ">wait_event - waiting for active_count == 0 (pid = %d)\n", -+ current->pid); -+ wait_event(kbdev->pm.zero_active_count_wait, -+ kbdev->pm.active_count == 0); -+ dev_dbg(kbdev->dev, ">wait_event - waiting done\n"); ++ down_read(kbase_mem_get_process_mmap_lock()); + -+ /* NOTE: We synchronize with anything that was just finishing a -+ * kbase_pm_context_idle() call by locking the pm.lock below -+ */ -+ if (kbase_hwaccess_pm_suspend(kbdev)) { -+ mutex_lock(&kbdev->pm.lock); -+ kbdev->pm.suspending = false; -+ mutex_unlock(&kbdev->pm.lock); -+ return -1; -+ } ++ write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); + -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ if (kbdev->arb.arb_if) { -+ mutex_lock(&kbdev->pm.arb_vm_state->vm_state_lock); -+ kbase_arbiter_pm_vm_stopped(kbdev); -+ mutex_unlock(&kbdev->pm.arb_vm_state->vm_state_lock); -+ } -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ faulted_pages = ++ kbase_get_user_pages(address, *va_pages, write ? FOLL_WRITE : 0, pages, NULL); + -+ return 0; -+} ++ up_read(kbase_mem_get_process_mmap_lock()); + -+void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) -+{ -+ unsigned long flags; ++ if (faulted_pages != *va_pages) ++ goto fault_mismatch; + -+ /* MUST happen before any pm_context_active calls occur */ -+ kbase_hwaccess_pm_resume(kbdev); ++ reg->gpu_alloc->nents = 0; ++ reg->extension = 0; + -+ /* Initial active call, to power on the GPU/cores if needed */ -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ if (kbase_pm_context_active_handle_suspend(kbdev, -+ (arb_gpu_start ? -+ KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED : -+ KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE))) -+ return; ++ pa = kbase_get_gpu_phy_pages(reg); ++ dev = kctx->kbdev->dev; ++ ++ if (pages) { ++ /* Top bit signifies that this was pinned on import */ ++ user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT; ++ ++ /* Manual CPU cache synchronization. ++ * ++ * The driver disables automatic CPU cache synchronization because the ++ * memory pages that enclose the imported region may also contain ++ * sub-regions which are not imported and that are allocated and used ++ * by the user process. This may be the case of memory at the beginning ++ * of the first page and at the end of the last page. Automatic CPU cache ++ * synchronization would force some operations on those memory allocations, ++ * unbeknown to the user process: in particular, a CPU cache invalidate ++ * upon unmapping would destroy the content of dirty CPU caches and cause ++ * the user process to lose CPU writes to the non-imported sub-regions. ++ * ++ * When the GPU claims ownership of the imported memory buffer, it shall ++ * commit CPU writes for the whole of all pages that enclose the imported ++ * region, otherwise the initial content of memory would be wrong. ++ */ ++ for (i = 0; i < faulted_pages; i++) { ++ dma_addr_t dma_addr; ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); +#else -+ kbase_pm_context_active(kbdev); ++ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, ++ DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); +#endif ++ if (dma_mapping_error(dev, dma_addr)) ++ goto unwind_dma_map; + -+#if !MALI_USE_CSF -+ /* Resume any blocked atoms (which may cause contexts to be scheduled in -+ * and dependent atoms to run) -+ */ -+ kbase_resume_suspended_soft_jobs(kbdev); ++ user_buf->dma_addrs[i] = dma_addr; ++ pa[i] = as_tagged(page_to_phys(pages[i])); + -+ /* Resume the Job Scheduler and associated components, and start running -+ * atoms -+ */ -+ kbasep_js_resume(kbdev); -+#endif ++ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); ++ } + -+ /* Matching idle call, to power off the GPU/cores if we didn't actually -+ * need it and the policy doesn't want it on ++ reg->gpu_alloc->nents = faulted_pages; ++ } ++ ++ return reg; ++ ++unwind_dma_map: ++ dma_mapped_pages = i; ++ /* Run the unmap loop in the same order as map loop, and perform again ++ * CPU cache synchronization to re-write the content of dirty CPU caches ++ * to memory. This precautionary measure is kept here to keep this code ++ * aligned with kbase_jd_user_buf_map() to allow for a potential refactor ++ * in the future. + */ -+ kbase_pm_context_idle(kbdev); ++ for (i = 0; i < dma_mapped_pages; i++) { ++ dma_addr_t dma_addr = user_buf->dma_addrs[i]; + -+ /* Re-enable GPU hardware counters */ -+#if MALI_USE_CSF -+ kbase_csf_scheduler_spin_lock(kbdev, &flags); -+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); -+ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +#else -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, ++ DMA_ATTR_SKIP_CPU_SYNC); +#endif -+ -+ /* Resume HW counters intermediaries. */ -+ kbase_vinstr_resume(kbdev->vinstr_ctx); -+ kbase_kinstr_prfcnt_resume(kbdev->kinstr_prfcnt_ctx); ++ } ++fault_mismatch: ++ if (pages) { ++ /* In this case, the region was not yet in the region tracker, ++ * and so there are no CPU mappings to remove before we unpin ++ * the page ++ */ ++ for (i = 0; i < faulted_pages; i++) ++ kbase_unpin_user_buf_page(pages[i]); ++ } ++no_page_array: ++invalid_flags: ++ kbase_mem_phy_alloc_put(reg->cpu_alloc); ++ kbase_mem_phy_alloc_put(reg->gpu_alloc); ++no_alloc_obj: ++ kfree(reg); ++no_region: ++bad_size: ++ return NULL; +} + -+int kbase_pm_suspend(struct kbase_device *kbdev) ++ ++u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, ++ u64 nents, struct base_mem_aliasing_info *ai, ++ u64 *num_pages) +{ -+ int result = 0; -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ if (kbdev->arb.arb_if) -+ kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_OS_SUSPEND_EVENT); -+ else -+ result = kbase_pm_driver_suspend(kbdev); -+#else -+ result = kbase_pm_driver_suspend(kbdev); -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ struct kbase_va_region *reg; ++ u64 gpu_va; ++ size_t i; ++ bool coherent; ++ uint64_t max_stride; + -+ return result; -+} ++ /* Calls to this function are inherently asynchronous, with respect to ++ * MMU operations. ++ */ ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + -+void kbase_pm_resume(struct kbase_device *kbdev) -+{ -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ if (kbdev->arb.arb_if) -+ kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_OS_RESUME_EVENT); -+ else -+ kbase_pm_driver_resume(kbdev, false); -+#else -+ kbase_pm_driver_resume(kbdev, false); -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ -+} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pm.h b/drivers/gpu/arm/bifrost/mali_kbase_pm.h -new file mode 100644 -index 000000000..4bb90a4f6 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_pm.h -@@ -0,0 +1,257 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ KBASE_DEBUG_ASSERT(kctx); ++ KBASE_DEBUG_ASSERT(flags); ++ KBASE_DEBUG_ASSERT(ai); ++ KBASE_DEBUG_ASSERT(num_pages); + -+/** -+ * DOC: Power management API definitions -+ */ ++ /* mask to only allowed flags */ ++ *flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | ++ BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL | ++ BASE_MEM_PROT_CPU_RD | BASE_MEM_COHERENT_SYSTEM_REQUIRED); + -+#ifndef _KBASE_PM_H_ -+#define _KBASE_PM_H_ ++ if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) { ++ dev_warn(kctx->kbdev->dev, ++ "%s called with bad flags (%llx)", ++ __func__, ++ (unsigned long long)*flags); ++ goto bad_flags; ++ } ++ coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 || ++ (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0; + -+#include "mali_kbase_hwaccess_pm.h" ++ if (!stride) ++ goto bad_stride; + -+#define PM_ENABLE_IRQS 0x01 -+#define PM_HW_ISSUES_DETECT 0x02 ++ if (!nents) ++ goto bad_nents; + -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+/* In the case that the GPU was granted by the Arbiter, it will have -+ * already been reset. The following flag ensures it is not reset -+ * twice. -+ */ -+#define PM_NO_RESET 0x04 -+#endif ++ max_stride = div64_u64(U64_MAX, nents); + -+/** -+ * kbase_pm_init - Initialize the power management framework. -+ * -+ * @kbdev: The kbase device structure for the device -+ * (must be a valid pointer) -+ * -+ * Must be called before any other power management function -+ * -+ * Return: 0 if the power management framework was successfully initialized. -+ */ -+int kbase_pm_init(struct kbase_device *kbdev); ++ if (stride > max_stride) ++ goto bad_size; + -+/** -+ * kbase_pm_powerup - Power up GPU after all modules have been initialized -+ * and interrupt handlers installed. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @flags: Flags to pass on to kbase_pm_init_hw -+ * -+ * Return: 0 if powerup was successful. -+ */ -+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags); ++ if ((nents * stride) > (U64_MAX / PAGE_SIZE)) ++ /* 64-bit address range is the max */ ++ goto bad_size; + -+/** -+ * kbase_pm_halt - Halt the power management framework. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Should ensure that no new interrupts are generated, -+ * but allow any currently running interrupt handlers to complete successfully. -+ * The GPU is forced off by the time this function returns, regardless of -+ * whether or not the active power policy asks for the GPU to be powered off. -+ */ -+void kbase_pm_halt(struct kbase_device *kbdev); ++ /* calculate the number of pages this alias will cover */ ++ *num_pages = nents * stride; + -+/** -+ * kbase_pm_term - Terminate the power management framework. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * No power management functions may be called after this -+ * (except @ref kbase_pm_init) -+ */ -+void kbase_pm_term(struct kbase_device *kbdev); ++ if (!kbase_alias_size_is_valid(kctx->kbdev, *num_pages)) ++ goto bad_size; + -+/** -+ * kbase_pm_context_active - Increment the count of active contexts. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This function should be called when a context is about to submit a job. -+ * It informs the active power policy that the GPU is going to be in use shortly -+ * and the policy is expected to start turning on the GPU. -+ * -+ * This function will block until the GPU is available. -+ * -+ * This function ASSERTS if a suspend is occuring/has occurred whilst this is -+ * in use. Use kbase_pm_contect_active_unless_suspending() instead. -+ * -+ * @note a Suspend is only visible to Kernel threads; user-space threads in a -+ * syscall cannot witness a suspend, because they are frozen before the suspend -+ * begins. -+ */ -+void kbase_pm_context_active(struct kbase_device *kbdev); ++ if (!kbase_ctx_compat_mode(kctx)) { ++ /* 64-bit tasks must MMAP anyway, but not expose this address to ++ * clients ++ */ ++ *flags |= BASE_MEM_NEED_MMAP; ++ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *num_pages, ++ KBASE_REG_ZONE_SAME_VA); ++ } else { ++ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *num_pages, ++ KBASE_REG_ZONE_CUSTOM_VA); ++ } + ++ if (!reg) ++ goto no_reg; + -+/** Handler codes for doing kbase_pm_context_active_handle_suspend() */ -+enum kbase_pm_suspend_handler { -+ /** A suspend is not expected/not possible - this is the same as -+ * kbase_pm_context_active() -+ */ -+ KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE, -+ /** If we're suspending, fail and don't increase the active count */ -+ KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE, -+ /** If we're suspending, succeed and allow the active count to increase -+ * if it didn't go from 0->1 (i.e., we didn't re-activate the GPU). -+ * -+ * This should only be used when there is a bounded time on the activation -+ * (e.g. guarantee it's going to be idled very soon after) -+ */ -+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE, -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ /** Special case when Arbiter has notified we can use GPU. -+ * Active count should always start at 0 in this case. -+ */ -+ KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED, -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ -+}; ++ /* zero-sized page array, as we don't need one/can support one */ ++ reg->gpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_ALIAS, ++ BASE_MEM_GROUP_DEFAULT); ++ if (IS_ERR_OR_NULL(reg->gpu_alloc)) ++ goto no_alloc_obj; + -+/** -+ * kbase_pm_context_active_handle_suspend - Suspend 'safe' variant of kbase_pm_context_active() -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @suspend_handler: The handler code for how to handle a suspend that might occur -+ * -+ * If a suspend is in progress, this allows for various different ways of -+ * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details. -+ * -+ * We returns a status code indicating whether we're allowed to keep the GPU -+ * active during the suspend, depending on the handler code. If the status code -+ * indicates a failure, the caller must abort whatever operation it was -+ * attempting, and potentially queue it up for after the OS has resumed. -+ * -+ * Return: 0 on success, non-zero othrewise. -+ */ -+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler); ++ reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + -+/** -+ * kbase_pm_context_idle - Decrement the reference count of active contexts. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This function should be called when a context becomes idle. -+ * After this call the GPU may be turned off by the power policy so the calling -+ * code should ensure that it does not access the GPU's registers. -+ */ -+void kbase_pm_context_idle(struct kbase_device *kbdev); ++ if (kbase_update_region_flags(kctx, reg, *flags) != 0) ++ goto invalid_flags; + -+/* NOTE: kbase_pm_is_active() is in mali_kbase.h, because it is an inline -+ * function -+ */ ++ reg->gpu_alloc->imported.alias.nents = nents; ++ reg->gpu_alloc->imported.alias.stride = stride; ++ reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents); ++ if (!reg->gpu_alloc->imported.alias.aliased) ++ goto no_aliased_array; + -+/** -+ * kbase_pm_suspend - Suspend the GPU and prevent any further register accesses -+ * to it from Kernel threads. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This is called in response to an OS suspend event, and calls into the various -+ * kbase components to complete the suspend. -+ * -+ * @note the mechanisms used here rely on all user-space threads being frozen -+ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up -+ * the GPU e.g. via atom submission. -+ * -+ * Return: 0 on success. -+ */ -+int kbase_pm_suspend(struct kbase_device *kbdev); ++ kbase_gpu_vm_lock(kctx); + -+/** -+ * kbase_pm_resume - Resume the GPU, allow register accesses to it, -+ * and resume running atoms on the GPU. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This is called in response to an OS resume event, and calls into the various -+ * kbase components to complete the resume. -+ * -+ * Also called when using VM arbiter, when GPU access has been granted. -+ */ -+void kbase_pm_resume(struct kbase_device *kbdev); ++ /* validate and add src handles */ ++ for (i = 0; i < nents; i++) { ++ if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) { ++ if (ai[i].handle.basep.handle != ++ BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE) ++ goto bad_handle; /* unsupported magic handle */ ++ if (!ai[i].length) ++ goto bad_handle; /* must be > 0 */ ++ if (ai[i].length > stride) ++ goto bad_handle; /* can't be larger than the ++ * stride ++ */ ++ reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; ++ } else { ++ struct kbase_va_region *aliasing_reg; ++ struct kbase_mem_phy_alloc *alloc; + -+/** -+ * kbase_pm_vsync_callback - vsync callback -+ * -+ * @buffer_updated: 1 if a new frame was displayed, 0 otherwise -+ * @data: Pointer to the kbase device as returned by kbase_find_device() -+ * -+ * Callback function used to notify the power management code that a vsync has -+ * occurred on the display. -+ */ -+void kbase_pm_vsync_callback(int buffer_updated, void *data); ++ aliasing_reg = kbase_region_tracker_find_region_base_address( ++ kctx, ++ (ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT); + -+/** -+ * kbase_pm_driver_suspend() - Put GPU and driver in suspend state -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Suspend the GPU and prevent any further register accesses to it from Kernel -+ * threads. -+ * -+ * This is called in response to an OS suspend event, and calls into the various -+ * kbase components to complete the suspend. -+ * -+ * Despite kbase_pm_suspend(), it will ignore to update Arbiter -+ * status if MALI_ARBITER_SUPPORT is enabled. -+ * -+ * @note the mechanisms used here rely on all user-space threads being frozen -+ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up -+ * the GPU e.g. via atom submission. -+ * -+ * Return: 0 on success. -+ */ -+int kbase_pm_driver_suspend(struct kbase_device *kbdev); ++ /* validate found region */ ++ if (kbase_is_region_invalid_or_free(aliasing_reg)) ++ goto bad_handle; /* Not found/already free */ ++ if (kbase_is_region_shrinkable(aliasing_reg)) ++ goto bad_handle; /* Ephemeral region */ ++ if (kbase_va_region_is_no_user_free(aliasing_reg)) ++ goto bad_handle; /* JIT regions can't be ++ * aliased. NO_USER_FREE flag ++ * covers the entire lifetime ++ * of JIT regions. The other ++ * types of regions covered ++ * by this flag also shall ++ * not be aliased. ++ */ ++ if (!(aliasing_reg->flags & KBASE_REG_GPU_CACHED)) ++ goto bad_handle; /* GPU uncached memory */ ++ if (!aliasing_reg->gpu_alloc) ++ goto bad_handle; /* No alloc */ ++ if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) ++ goto bad_handle; /* Not a native alloc */ ++ if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0)) ++ goto bad_handle; /* Non-coherent memory cannot ++ * alias coherent memory, and ++ * vice versa. ++ */ + -+/** -+ * kbase_pm_driver_resume() - Put GPU and driver in resume -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @arb_gpu_start: Arbiter has notified we can use GPU -+ * -+ * Resume the GPU, allow register accesses to it, and resume running atoms on -+ * the GPU. -+ * -+ * This is called in response to an OS resume event, and calls into the various -+ * kbase components to complete the resume. -+ * -+ * Also called when using VM arbiter, when GPU access has been granted. -+ * -+ * Despite kbase_pm_resume(), it will ignore to update Arbiter -+ * status if MALI_ARBITER_SUPPORT is enabled. -+ */ -+void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start); ++ /* check size against stride */ ++ if (!ai[i].length) ++ goto bad_handle; /* must be > 0 */ ++ if (ai[i].length > stride) ++ goto bad_handle; /* can't be larger than the ++ * stride ++ */ + -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+/** -+ * kbase_pm_handle_gpu_lost() - Handle GPU Lost for the VM -+ * @kbdev: Device pointer -+ * -+ * Handles the case that the Arbiter has forced the GPU away from the VM, -+ * so that interrupts will not be received and registers are no longer -+ * accessible because replaced by dummy RAM. -+ * Kill any running tasks and put the driver into a GPU powered-off state. -+ */ -+void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev); -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ alloc = aliasing_reg->gpu_alloc; + -+#endif /* _KBASE_PM_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_refcount_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_refcount_defs.h -new file mode 100644 -index 000000000..c517a2d2a ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_refcount_defs.h -@@ -0,0 +1,57 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ /* check against the alloc's size */ ++ if (ai[i].offset > alloc->nents) ++ goto bad_handle; /* beyond end */ ++ if (ai[i].offset + ai[i].length > alloc->nents) ++ goto bad_handle; /* beyond end */ + -+#ifndef _KBASE_REFCOUNT_DEFS_H_ -+#define _KBASE_REFCOUNT_DEFS_H_ ++ reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc); ++ reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; ++ reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset; + -+/* -+ * The Refcount API is available from 4.11 onwards -+ * This file hides the compatibility issues with this for the rest the driver -+ */ ++ /* Ensure the underlying alloc is marked as being ++ * mapped at >1 different GPU VA immediately, even ++ * though mapping might not happen until later. ++ * ++ * Otherwise, we would (incorrectly) allow shrinking of ++ * the source region (aliasing_reg) and so freeing the ++ * physical pages (without freeing the entire alloc) ++ * whilst we still hold an implicit reference on those ++ * physical pages. ++ */ ++ kbase_mem_phy_alloc_gpu_mapped(alloc); ++ } ++ } + -+#include -+#include ++ if (!kbase_ctx_compat_mode(kctx)) { ++ /* Bind to a cookie */ ++ if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) { ++ dev_err(kctx->kbdev->dev, "No cookies available for allocation!"); ++ goto no_cookie; ++ } ++ /* return a cookie */ ++ gpu_va = find_first_bit(kctx->cookies, BITS_PER_LONG); ++ bitmap_clear(kctx->cookies, gpu_va, 1); ++ BUG_ON(kctx->pending_regions[gpu_va]); ++ kctx->pending_regions[gpu_va] = reg; + -+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) ++ /* relocate to correct base */ ++ gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); ++ gpu_va <<= PAGE_SHIFT; ++ } else { ++ /* we control the VA */ ++ if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1, ++ mmu_sync_info) != 0) { ++ dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU"); ++ goto no_mmap; ++ } ++ /* return real GPU VA */ ++ gpu_va = reg->start_pfn << PAGE_SHIFT; ++ } + -+#define kbase_refcount_t atomic_t -+#define kbase_refcount_read(x) atomic_read(x) -+#define kbase_refcount_set(x, v) atomic_set(x, v) -+#define kbase_refcount_dec_and_test(x) atomic_dec_and_test(x) -+#define kbase_refcount_dec(x) atomic_dec(x) -+#define kbase_refcount_inc_not_zero(x) atomic_inc_not_zero(x) -+#define kbase_refcount_inc(x) atomic_inc(x) ++ reg->flags &= ~KBASE_REG_FREE; ++ reg->flags &= ~KBASE_REG_GROWABLE; + -+#else ++ kbase_gpu_vm_unlock(kctx); + -+#include ++ return gpu_va; + -+#define kbase_refcount_t refcount_t -+#define kbase_refcount_read(x) refcount_read(x) -+#define kbase_refcount_set(x, v) refcount_set(x, v) -+#define kbase_refcount_dec_and_test(x) refcount_dec_and_test(x) -+#define kbase_refcount_dec(x) refcount_dec(x) -+#define kbase_refcount_inc_not_zero(x) refcount_inc_not_zero(x) -+#define kbase_refcount_inc(x) refcount_inc(x) ++no_cookie: ++no_mmap: ++bad_handle: ++ /* Marking the source allocs as not being mapped on the GPU and putting ++ * them is handled by putting reg's allocs, so no rollback of those ++ * actions is done here. ++ */ ++ kbase_gpu_vm_unlock(kctx); ++no_aliased_array: ++invalid_flags: ++ kbase_mem_phy_alloc_put(reg->cpu_alloc); ++ kbase_mem_phy_alloc_put(reg->gpu_alloc); ++no_alloc_obj: ++ kfree(reg); ++no_reg: ++bad_size: ++bad_nents: ++bad_stride: ++bad_flags: ++ return 0; ++} + -+#endif /* (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) */ ++int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, ++ void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, ++ u64 *flags) ++{ ++ struct kbase_va_region *reg; + -+#endif /* _KBASE_REFCOUNT_DEFS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.c -new file mode 100644 -index 000000000..147082c15 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.c -@@ -0,0 +1,239 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2014, 2016, 2019-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ /* Calls to this function are inherently asynchronous, with respect to ++ * MMU operations. ++ */ ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + -+#include "mali_kbase.h" -+#include "mali_kbase_regs_history_debugfs.h" ++ KBASE_DEBUG_ASSERT(kctx); ++ KBASE_DEBUG_ASSERT(gpu_va); ++ KBASE_DEBUG_ASSERT(va_pages); ++ KBASE_DEBUG_ASSERT(flags); + -+#if defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) && ++ kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) ++ *flags |= BASE_MEM_SAME_VA; + -+#include ++ if (!kbase_check_import_flags(*flags)) { ++ dev_warn(kctx->kbdev->dev, ++ "%s called with bad flags (%llx)", ++ __func__, ++ (unsigned long long)*flags); ++ goto bad_flags; ++ } + -+/** -+ * kbase_io_history_resize - resize the register access history buffer. -+ * -+ * @h: Pointer to a valid register history to resize -+ * @new_size: Number of accesses the buffer could hold -+ * -+ * A successful resize will clear all recent register accesses. -+ * If resizing fails for any reason (e.g., could not allocate memory, invalid -+ * buffer size) then the original buffer will be kept intact. -+ * -+ * Return: 0 if the buffer was resized, failure otherwise -+ */ -+static int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size) -+{ -+ struct kbase_io_access *old_buf; -+ struct kbase_io_access *new_buf; -+ unsigned long flags; ++ if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 && ++ (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) { ++ /* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */ ++ *flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED; ++ } ++ if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && ++ !kbase_device_is_cpu_coherent(kctx->kbdev)) { ++ dev_warn(kctx->kbdev->dev, ++ "%s call required coherent mem when unavailable", ++ __func__); ++ goto bad_flags; ++ } ++ if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && ++ !kbase_device_is_cpu_coherent(kctx->kbdev)) { ++ /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ ++ *flags &= ~BASE_MEM_COHERENT_SYSTEM; ++ } ++ if (((*flags & BASE_MEM_CACHED_CPU) == 0) && (type == BASE_MEM_IMPORT_TYPE_USER_BUFFER)) { ++ dev_warn(kctx->kbdev->dev, "USER_BUFFER must be CPU cached"); ++ goto bad_flags; ++ } ++ if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) { ++ dev_warn(kctx->kbdev->dev, ++ "padding is only supported for UMM"); ++ goto bad_flags; ++ } + -+ if (!new_size) -+ goto out_err; /* The new size must not be 0 */ ++ switch (type) { ++ case BASE_MEM_IMPORT_TYPE_UMM: { ++ int fd; + -+ new_buf = vmalloc(new_size * sizeof(*h->buf)); -+ if (!new_buf) -+ goto out_err; ++ if (get_user(fd, (int __user *)phandle)) ++ reg = NULL; ++ else ++ reg = kbase_mem_from_umm(kctx, fd, va_pages, flags, ++ padding); ++ } ++ break; ++ case BASE_MEM_IMPORT_TYPE_USER_BUFFER: { ++ struct base_mem_import_user_buffer user_buffer; ++ void __user *uptr; + -+ spin_lock_irqsave(&h->lock, flags); ++ if (copy_from_user(&user_buffer, phandle, ++ sizeof(user_buffer))) { ++ reg = NULL; ++ } else { ++#if IS_ENABLED(CONFIG_COMPAT) ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) ++ uptr = compat_ptr(user_buffer.ptr); ++ else ++#endif ++ uptr = u64_to_user_ptr(user_buffer.ptr); + -+ old_buf = h->buf; ++ reg = kbase_mem_from_user_buffer(kctx, ++ (unsigned long)uptr, user_buffer.length, ++ va_pages, flags); ++ } ++ break; ++ } ++ default: { ++ reg = NULL; ++ break; ++ } ++ } + -+ /* Note: we won't bother with copying the old data over. The dumping -+ * logic wouldn't work properly as it relies on 'count' both as a -+ * counter and as an index to the buffer which would have changed with -+ * the new array. This is a corner case that we don't need to support. -+ */ -+ h->count = 0; -+ h->size = new_size; -+ h->buf = new_buf; ++ if (!reg) ++ goto no_reg; + -+ spin_unlock_irqrestore(&h->lock, flags); ++ kbase_gpu_vm_lock(kctx); + -+ vfree(old_buf); ++ /* mmap needed to setup VA? */ ++ if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) { ++ /* Bind to a cookie */ ++ if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) ++ goto no_cookie; ++ /* return a cookie */ ++ *gpu_va = find_first_bit(kctx->cookies, BITS_PER_LONG); ++ bitmap_clear(kctx->cookies, *gpu_va, 1); ++ BUG_ON(kctx->pending_regions[*gpu_va]); ++ kctx->pending_regions[*gpu_va] = reg; + -+ return 0; ++ /* relocate to correct base */ ++ *gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); ++ *gpu_va <<= PAGE_SHIFT; + -+out_err: -+ return -1; -+} ++ } else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES) { ++ /* we control the VA, mmap now to the GPU */ ++ if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1, mmu_sync_info) != ++ 0) ++ goto no_gpu_va; ++ /* return real GPU VA */ ++ *gpu_va = reg->start_pfn << PAGE_SHIFT; ++ } else { ++ /* we control the VA, but nothing to mmap yet */ ++ if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0) ++ goto no_gpu_va; ++ /* return real GPU VA */ ++ *gpu_va = reg->start_pfn << PAGE_SHIFT; ++ } + -+int kbase_io_history_init(struct kbase_io_history *h, u16 n) -+{ -+ h->enabled = false; -+ spin_lock_init(&h->lock); -+ h->count = 0; -+ h->size = 0; -+ h->buf = NULL; -+ if (kbase_io_history_resize(h, n)) -+ return -1; ++ /* clear out private flags */ ++ *flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1); ++ ++ kbase_gpu_vm_unlock(kctx); + + return 0; -+} + -+void kbase_io_history_term(struct kbase_io_history *h) -+{ -+ vfree(h->buf); -+ h->buf = NULL; ++no_gpu_va: ++no_cookie: ++ kbase_gpu_vm_unlock(kctx); ++ kbase_mem_phy_alloc_put(reg->cpu_alloc); ++ kbase_mem_phy_alloc_put(reg->gpu_alloc); ++ kfree(reg); ++no_reg: ++bad_flags: ++ *gpu_va = 0; ++ *va_pages = 0; ++ *flags = 0; ++ return -ENOMEM; +} + -+void kbase_io_history_add(struct kbase_io_history *h, -+ void __iomem const *addr, u32 value, u8 write) ++int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, ++ struct kbase_va_region *reg, u64 new_pages, ++ u64 old_pages, ++ enum kbase_caller_mmu_sync_info mmu_sync_info) +{ -+ struct kbase_io_access *io; -+ unsigned long flags; ++ struct tagged_addr *phy_pages; ++ u64 delta = new_pages - old_pages; ++ int ret = 0; + -+ spin_lock_irqsave(&h->lock, flags); ++ lockdep_assert_held(&kctx->reg_lock); + -+ io = &h->buf[h->count % h->size]; -+ io->addr = (uintptr_t)addr | write; -+ io->value = value; -+ ++h->count; -+ /* If count overflows, move the index by the buffer size so the entire -+ * buffer will still be dumped later -+ */ -+ if (unlikely(!h->count)) -+ h->count = h->size; ++ /* Map the new pages into the GPU */ ++ phy_pages = kbase_get_gpu_phy_pages(reg); ++ ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages, ++ phy_pages + old_pages, delta, reg->flags, kctx->as_nr, ++ reg->gpu_alloc->group_id, mmu_sync_info, reg, false); + -+ spin_unlock_irqrestore(&h->lock, flags); ++ return ret; +} + -+void kbase_io_history_dump(struct kbase_device *kbdev) ++void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, ++ struct kbase_va_region *reg, ++ u64 new_pages, u64 old_pages) +{ -+ struct kbase_io_history *const h = &kbdev->io_history; -+ size_t i; -+ size_t iters; -+ unsigned long flags; ++ u64 gpu_va_start = reg->start_pfn; + -+ if (!unlikely(h->enabled)) ++ if (new_pages == old_pages) ++ /* Nothing to do */ + return; + -+ spin_lock_irqsave(&h->lock, flags); ++ unmap_mapping_range(kctx->filp->f_inode->i_mapping, ++ (gpu_va_start + new_pages)<dev, "Register IO History:"); -+ iters = (h->size > h->count) ? h->count : h->size; -+ dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters, -+ h->count); -+ for (i = 0; i < iters; ++i) { -+ struct kbase_io_access *io = -+ &h->buf[(h->count - iters + i) % h->size]; -+ char const access = (io->addr & 1) ? 'w' : 'r'; ++int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx, ++ struct kbase_va_region *const reg, u64 const new_pages, ++ u64 const old_pages) ++{ ++ u64 delta = old_pages - new_pages; ++ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; ++ int ret = 0; + -+ dev_err(kbdev->dev, "%6zu: %c: reg 0x%016lx val %08x\n", i, -+ access, (unsigned long)(io->addr & ~0x1), io->value); -+ } ++ ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages, ++ alloc->pages + new_pages, delta, delta, kctx->as_nr, false); + -+ spin_unlock_irqrestore(&h->lock, flags); ++ return ret; +} + -+static int regs_history_size_get(void *data, u64 *val) ++int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) +{ -+ struct kbase_io_history *const h = data; ++ u64 old_pages; ++ u64 delta = 0; ++ int res = -EINVAL; ++ struct kbase_va_region *reg; ++ bool read_locked = false; + -+ *val = h->size; ++ /* Calls to this function are inherently asynchronous, with respect to ++ * MMU operations. ++ */ ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + -+ return 0; -+} ++ KBASE_DEBUG_ASSERT(kctx); ++ KBASE_DEBUG_ASSERT(gpu_addr != 0); + -+static int regs_history_size_set(void *data, u64 val) -+{ -+ struct kbase_io_history *const h = data; ++ if (gpu_addr & ~PAGE_MASK) { ++ dev_warn(kctx->kbdev->dev, "kbase:mem_commit: gpu_addr: passed parameter is invalid"); ++ return -EINVAL; ++ } + -+ return kbase_io_history_resize(h, (u16)val); -+} ++ down_write(kbase_mem_get_process_mmap_lock()); ++ kbase_gpu_vm_lock(kctx); + -+DEFINE_DEBUGFS_ATTRIBUTE(regs_history_size_fops, regs_history_size_get, regs_history_size_set, -+ "%llu\n"); ++ /* Validate the region */ ++ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); ++ if (kbase_is_region_invalid_or_free(reg)) ++ goto out_unlock; + -+/** -+ * regs_history_show - show callback for the register access history file. -+ * -+ * @sfile: The debugfs entry -+ * @data: Data associated with the entry -+ * -+ * This function is called to dump all recent accesses to the GPU registers. -+ * -+ * Return: 0 if successfully prints data in debugfs entry file, failure otherwise -+ */ -+static int regs_history_show(struct seq_file *sfile, void *data) -+{ -+ struct kbase_io_history *const h = sfile->private; -+ size_t i; -+ size_t iters; -+ unsigned long flags; ++ KBASE_DEBUG_ASSERT(reg->cpu_alloc); ++ KBASE_DEBUG_ASSERT(reg->gpu_alloc); + -+ if (!h->enabled) { -+ seq_puts(sfile, "The register access history is disabled\n"); -+ goto out; ++ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) ++ goto out_unlock; ++ ++ if (0 == (reg->flags & KBASE_REG_GROWABLE)) ++ goto out_unlock; ++ ++ if (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC) ++ goto out_unlock; ++ ++ /* Would overflow the VA region */ ++ if (new_pages > reg->nr_pages) ++ goto out_unlock; ++ ++ /* Can't shrink when physical pages are mapped to different GPU ++ * VAs. The code doesn't support looking up: ++ * - all physical pages assigned to different GPU VAs ++ * - CPU mappings for the physical pages at different vm_pgoff ++ * (==GPU VA) locations. ++ * ++ * Note that for Native allocs mapped at multiple GPU VAs, growth of ++ * such allocs is not a supported use-case. ++ */ ++ if (atomic_read(®->gpu_alloc->gpu_mappings) > 1) ++ goto out_unlock; ++ ++ if (atomic_read(®->cpu_alloc->kernel_mappings) > 0) ++ goto out_unlock; ++ ++ if (kbase_is_region_shrinkable(reg)) ++ goto out_unlock; ++ ++ if (kbase_va_region_is_no_user_free(reg)) ++ goto out_unlock; ++ ++#ifdef CONFIG_MALI_MEMORY_FULLY_BACKED ++ /* Reject resizing commit size */ ++ if (reg->flags & KBASE_REG_PF_GROW) ++ new_pages = reg->nr_pages; ++#endif ++ ++ if (new_pages == reg->gpu_alloc->nents) { ++ /* no change */ ++ res = 0; ++ goto out_unlock; + } + -+ spin_lock_irqsave(&h->lock, flags); ++ old_pages = kbase_reg_current_backed_size(reg); ++ if (new_pages > old_pages) { ++ delta = new_pages - old_pages; + -+ iters = (h->size > h->count) ? h->count : h->size; -+ seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters, -+ h->count); -+ for (i = 0; i < iters; ++i) { -+ struct kbase_io_access *io = -+ &h->buf[(h->count - iters + i) % h->size]; -+ char const access = (io->addr & 1) ? 'w' : 'r'; ++ /* ++ * No update to the mm so downgrade the writer lock to a read ++ * lock so other readers aren't blocked after this point. ++ */ ++ downgrade_write(kbase_mem_get_process_mmap_lock()); ++ read_locked = true; + -+ seq_printf(sfile, "%6zu: %c: reg 0x%016lx val %08x\n", i, -+ access, (unsigned long)(io->addr & ~0x1), io->value); ++ /* Allocate some more pages */ ++ if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) { ++ res = -ENOMEM; ++ goto out_unlock; ++ } ++ if (reg->cpu_alloc != reg->gpu_alloc) { ++ if (kbase_alloc_phy_pages_helper( ++ reg->gpu_alloc, delta) != 0) { ++ res = -ENOMEM; ++ kbase_free_phy_pages_helper(reg->cpu_alloc, ++ delta); ++ goto out_unlock; ++ } ++ } ++ ++ /* No update required for CPU mappings, that's done on fault. */ ++ ++ /* Update GPU mapping. */ ++ res = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages, ++ old_pages, mmu_sync_info); ++ ++ /* On error free the new pages */ ++ if (res) { ++ kbase_free_phy_pages_helper(reg->cpu_alloc, delta); ++ if (reg->cpu_alloc != reg->gpu_alloc) ++ kbase_free_phy_pages_helper(reg->gpu_alloc, ++ delta); ++ res = -ENOMEM; ++ goto out_unlock; ++ } ++ } else { ++ res = kbase_mem_shrink(kctx, reg, new_pages); ++ if (res) ++ res = -ENOMEM; + } + -+ spin_unlock_irqrestore(&h->lock, flags); ++out_unlock: ++ kbase_gpu_vm_unlock(kctx); ++ if (read_locked) ++ up_read(kbase_mem_get_process_mmap_lock()); ++ else ++ up_write(kbase_mem_get_process_mmap_lock()); + -+out: -+ return 0; ++ return res; +} + -+/** -+ * regs_history_open - open operation for regs_history debugfs file -+ * -+ * @in: &struct inode pointer -+ * @file: &struct file pointer -+ * -+ * Return: file descriptor -+ */ -+static int regs_history_open(struct inode *in, struct file *file) ++int kbase_mem_shrink(struct kbase_context *const kctx, ++ struct kbase_va_region *const reg, u64 new_pages) +{ -+ return single_open(file, ®s_history_show, in->i_private); ++ u64 delta, old_pages; ++ int err; ++ ++ lockdep_assert_held(&kctx->reg_lock); ++ ++ if (WARN_ON(!kctx)) ++ return -EINVAL; ++ ++ if (WARN_ON(!reg)) ++ return -EINVAL; ++ ++ old_pages = kbase_reg_current_backed_size(reg); ++ if (WARN_ON(old_pages < new_pages)) ++ return -EINVAL; ++ ++ delta = old_pages - new_pages; ++ ++ /* Update the GPU mapping */ ++ err = kbase_mem_shrink_gpu_mapping(kctx, reg, ++ new_pages, old_pages); ++ if (err >= 0) { ++ /* Update all CPU mapping(s) */ ++ kbase_mem_shrink_cpu_mapping(kctx, reg, ++ new_pages, old_pages); ++ ++ kbase_free_phy_pages_helper(reg->cpu_alloc, delta); ++ if (reg->cpu_alloc != reg->gpu_alloc) ++ kbase_free_phy_pages_helper(reg->gpu_alloc, delta); ++ ++ if (kctx->kbdev->pagesize_2mb) { ++ if (kbase_reg_current_backed_size(reg) > new_pages) { ++ old_pages = new_pages; ++ new_pages = kbase_reg_current_backed_size(reg); ++ ++ /* Update GPU mapping. */ ++ err = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages, old_pages, ++ CALLER_MMU_ASYNC); ++ } ++ } else { ++ WARN_ON(kbase_reg_current_backed_size(reg) != new_pages); ++ } ++ } ++ ++ return err; +} + -+static const struct file_operations regs_history_fops = { -+ .owner = THIS_MODULE, -+ .open = ®s_history_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; + -+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev) ++static void kbase_cpu_vm_open(struct vm_area_struct *vma) +{ -+ debugfs_create_bool("regs_history_enabled", 0644, -+ kbdev->mali_debugfs_directory, -+ &kbdev->io_history.enabled); -+ debugfs_create_file("regs_history_size", 0644, -+ kbdev->mali_debugfs_directory, -+ &kbdev->io_history, ®s_history_size_fops); -+ debugfs_create_file("regs_history", 0444, -+ kbdev->mali_debugfs_directory, &kbdev->io_history, -+ ®s_history_fops); -+} -+#endif /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.h -new file mode 100644 -index 000000000..ae327dd79 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.h -@@ -0,0 +1,84 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2014, 2016, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ struct kbase_cpu_mapping *map = vma->vm_private_data; + -+/** -+ * DOC: Header file for register access history support via debugfs -+ * -+ * This interface is made available via /sys/kernel/debug/mali#/regs_history*. -+ * -+ * Usage: -+ * - regs_history_enabled: whether recording of register accesses is enabled. -+ * Write 'y' to enable, 'n' to disable. -+ * - regs_history_size: size of the register history buffer, must be > 0 -+ * - regs_history: return the information about last accesses to the registers. -+ */ ++ KBASE_DEBUG_ASSERT(map); ++ KBASE_DEBUG_ASSERT(map->count > 0); ++ /* non-atomic as we're under Linux' mm lock */ ++ map->count++; ++} + -+#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H -+#define _KBASE_REGS_HISTORY_DEBUGFS_H ++static void kbase_cpu_vm_close(struct vm_area_struct *vma) ++{ ++ struct kbase_cpu_mapping *map = vma->vm_private_data; + -+struct kbase_device; ++ KBASE_DEBUG_ASSERT(map); ++ KBASE_DEBUG_ASSERT(map->count > 0); + -+#if defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ /* non-atomic as we're under Linux' mm lock */ ++ if (--map->count) ++ return; + -+/** -+ * kbase_io_history_init - initialize data struct for register access history -+ * -+ * @h: The register history to initialize -+ * @n: The number of register accesses that the buffer could hold -+ * -+ * Return: 0 if successfully initialized, failure otherwise -+ */ -+int kbase_io_history_init(struct kbase_io_history *h, u16 n); ++ KBASE_DEBUG_ASSERT(map->kctx); ++ KBASE_DEBUG_ASSERT(map->alloc); + -+/** -+ * kbase_io_history_term - uninit all resources for the register access history -+ * -+ * @h: The register history to terminate -+ */ -+void kbase_io_history_term(struct kbase_io_history *h); ++ kbase_gpu_vm_lock(map->kctx); + -+/** -+ * kbase_io_history_dump - print the register history to the kernel ring buffer -+ * -+ * @kbdev: Pointer to kbase_device containing the register history to dump -+ */ -+void kbase_io_history_dump(struct kbase_device *kbdev); ++ if (map->free_on_close) { ++ KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) == ++ KBASE_REG_ZONE_SAME_VA); ++ /* Avoid freeing memory on the process death which results in ++ * GPU Page Fault. Memory will be freed in kbase_destroy_context ++ */ ++ if (!is_process_exiting(vma)) ++ kbase_mem_free_region(map->kctx, map->region); ++ } + -+/** -+ * kbasep_regs_history_debugfs_init - add debugfs entries for register history -+ * -+ * @kbdev: Pointer to kbase_device containing the register history -+ */ -+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev); ++ list_del(&map->mappings_list); + -+#else /* !defined(CONFIG_DEBUG_FS) || IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ ++ kbase_va_region_alloc_put(map->kctx, map->region); ++ kbase_gpu_vm_unlock(map->kctx); + -+#define kbase_io_history_init(...) (0) ++ kbase_mem_phy_alloc_put(map->alloc); ++ kfree(map); ++} + -+#define kbase_io_history_term CSTD_NOP ++static struct kbase_aliased *get_aliased_alloc(struct vm_area_struct *vma, ++ struct kbase_va_region *reg, ++ pgoff_t *start_off, ++ size_t nr_pages) ++{ ++ struct kbase_aliased *aliased = ++ reg->cpu_alloc->imported.alias.aliased; + -+#define kbase_io_history_dump CSTD_NOP ++ if (!reg->cpu_alloc->imported.alias.stride || ++ reg->nr_pages < (*start_off + nr_pages)) { ++ return NULL; ++ } + -+#define kbasep_regs_history_debugfs_init CSTD_NOP ++ while (*start_off >= reg->cpu_alloc->imported.alias.stride) { ++ aliased++; ++ *start_off -= reg->cpu_alloc->imported.alias.stride; ++ } + -+#endif /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ ++ if (!aliased->alloc) { ++ /* sink page not available for dumping map */ ++ return NULL; ++ } + -+#endif /*_KBASE_REGS_HISTORY_DEBUGFS_H*/ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_reset_gpu.h b/drivers/gpu/arm/bifrost/mali_kbase_reset_gpu.h -new file mode 100644 -index 000000000..48ea9954b ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_reset_gpu.h -@@ -0,0 +1,277 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ if ((*start_off + nr_pages) > aliased->length) { ++ /* not fully backed by physical pages */ ++ return NULL; ++ } + -+#ifndef _KBASE_RESET_GPU_H_ -+#define _KBASE_RESET_GPU_H_ ++ return aliased; ++} + -+/** -+ * kbase_reset_gpu_prevent_and_wait - Prevent GPU resets from starting whilst -+ * the current thread is accessing the GPU, -+ * and wait for any in-flight reset to -+ * finish. -+ * @kbdev: Device pointer -+ * -+ * This should be used when a potential access to the HW is going to be made -+ * from a non-atomic context. -+ * -+ * It will wait for any in-flight reset to finish before returning. Hence, -+ * correct lock ordering must be observed with respect to the calling thread -+ * and the reset worker thread. -+ * -+ * This does not synchronize general access to the HW, and so multiple threads -+ * can prevent GPU reset concurrently, whilst not being serialized. This is -+ * advantageous as the threads can make this call at points where they do not -+ * know for sure yet whether they will indeed access the GPU (for example, to -+ * respect lock ordering), without unnecessarily blocking others. -+ * -+ * Threads must still use other synchronization to ensure they access the HW -+ * consistently, at a point where they are certain it needs to be accessed. -+ * -+ * On success, ensure that when access to the GPU by the caller thread has -+ * finished, that it calls kbase_reset_gpu_allow() again to allow resets to -+ * happen. -+ * -+ * This may return a failure in cases such as a previous failure to reset the -+ * GPU within a reasonable time. If that happens, the GPU might be -+ * non-operational and the caller should not attempt any further access. -+ * -+ * Note: -+ * For atomic context, instead check kbase_reset_gpu_is_active(). -+ * -+ * Return: 0 on success, or negative error code on failure. -+ */ -+int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev); -+ -+/** -+ * kbase_reset_gpu_try_prevent - Attempt to prevent GPU resets from starting -+ * whilst the current thread is accessing the -+ * GPU, unless a reset is already in progress. -+ * @kbdev: Device pointer -+ * -+ * Similar to kbase_reset_gpu_prevent_and_wait(), but it does not wait for an -+ * existing reset to complete. This can be used on codepaths that the Reset -+ * worker waits on, where use of kbase_reset_gpu_prevent_and_wait() would -+ * otherwise deadlock. -+ * -+ * Instead, a reset that is currently happening will cause this function to -+ * return an error code indicating that, and further resets will not have been -+ * prevented. -+ * -+ * In such cases, the caller must check for -EAGAIN, and take similar actions -+ * as for handling reset in atomic context. That is, they must cancel any -+ * actions that depended on reset being prevented, possibly deferring them -+ * until after the reset. -+ * -+ * Otherwise a successful return means that the caller can continue its actions -+ * safely in the knowledge that reset is prevented, and the reset worker will -+ * correctly wait instead of deadlocking against this thread. -+ * -+ * On success, ensure that when access to the GPU by the caller thread has -+ * finished, that it calls kbase_reset_gpu_allow() again to allow resets to -+ * happen. -+ * -+ * Refer to kbase_reset_gpu_prevent_and_wait() for more information. -+ * -+ * Return: 0 on success. -EAGAIN if a reset is currently happening. Other -+ * negative error codes on failure, where -ENOMEM indicates that GPU reset -+ * had failed. -+ */ -+int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev); -+ -+/** -+ * kbase_reset_gpu_allow - Allow GPU resets to happen again after having been -+ * previously prevented. -+ * @kbdev: Device pointer -+ * -+ * This should be used when a potential access to the HW has finished from a -+ * non-atomic context. -+ * -+ * It must be used from the same thread that originally made a previously call -+ * to kbase_reset_gpu_prevent_and_wait(). It must not be deferred to another -+ * thread. -+ */ -+void kbase_reset_gpu_allow(struct kbase_device *kbdev); ++#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) ++static vm_fault_t kbase_cpu_vm_fault(struct vm_area_struct *vma, ++ struct vm_fault *vmf) ++{ ++#else ++static vm_fault_t kbase_cpu_vm_fault(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++#endif ++ struct kbase_cpu_mapping *map = vma->vm_private_data; ++ pgoff_t map_start_pgoff; ++ pgoff_t fault_pgoff; ++ size_t i; ++ pgoff_t addr; ++ size_t nents; ++ struct tagged_addr *pages; ++ vm_fault_t ret = VM_FAULT_SIGBUS; ++ struct memory_group_manager_device *mgm_dev; + -+/** -+ * kbase_reset_gpu_assert_prevented - Make debugging checks that GPU reset is -+ * currently prevented by the current -+ * thread. -+ * @kbdev: Device pointer -+ * -+ * Make debugging checks that the current thread has made a call to -+ * kbase_reset_gpu_prevent_and_wait(), but has yet to make a subsequent call to -+ * kbase_reset_gpu_allow(). -+ * -+ * CONFIG_LOCKDEP is required to prove that reset is indeed -+ * prevented. Otherwise only limited debugging checks can be made. -+ */ -+void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev); ++ KBASE_DEBUG_ASSERT(map); ++ KBASE_DEBUG_ASSERT(map->count > 0); ++ KBASE_DEBUG_ASSERT(map->kctx); ++ KBASE_DEBUG_ASSERT(map->alloc); + -+/** -+ * kbase_reset_gpu_assert_failed_or_prevented - Make debugging checks that -+ * either GPU reset previously -+ * failed, or is currently -+ * prevented. -+ * -+ * @kbdev: Device pointer -+ * -+ * As with kbase_reset_gpu_assert_prevented(), but also allow for paths where -+ * reset was not prevented due to a failure, yet we still need to execute the -+ * cleanup code following. -+ * -+ * Cleanup code following this call must handle any inconsistent state modified -+ * by the failed GPU reset, and must timeout any blocking operations instead of -+ * waiting forever. -+ */ -+void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev); ++ map_start_pgoff = vma->vm_pgoff - map->region->start_pfn; + -+/** -+ * RESET_FLAGS_NONE - Flags for kbase_prepare_to_reset_gpu -+ */ -+#define RESET_FLAGS_NONE (0U) ++ kbase_gpu_vm_lock(map->kctx); ++ if (unlikely(map->region->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS)) { ++ struct kbase_aliased *aliased = ++ get_aliased_alloc(vma, map->region, &map_start_pgoff, 1); + -+/* This reset should be treated as an unrecoverable error by HW counter logic */ -+#define RESET_FLAGS_HWC_UNRECOVERABLE_ERROR ((unsigned int)(1 << 0)) ++ if (!aliased) ++ goto exit; + -+/** -+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU. -+ * @kbdev: Device pointer -+ * @flags: Bitfield indicating impact of reset (see flag defines) -+ * -+ * Caller is expected to hold the kbdev->hwaccess_lock. -+ * -+ * Return: a boolean which should be interpreted as follows: -+ * - true - Prepared for reset, kbase_reset_gpu should be called. -+ * - false - Another thread is performing a reset, kbase_reset_gpu should -+ * not be called. -+ */ -+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, -+ unsigned int flags); ++ nents = aliased->length; ++ pages = aliased->alloc->pages + aliased->offset; ++ } else { ++ nents = map->alloc->nents; ++ pages = map->alloc->pages; ++ } + -+/** -+ * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU. -+ * @kbdev: Device pointer -+ * @flags: Bitfield indicating impact of reset (see flag defines) -+ * -+ * Return: a boolean which should be interpreted as follows: -+ * - true - Prepared for reset, kbase_reset_gpu should be called. -+ * - false - Another thread is performing a reset, kbase_reset_gpu should -+ * not be called. -+ */ -+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags); ++ fault_pgoff = map_start_pgoff + (vmf->pgoff - vma->vm_pgoff); + -+/** -+ * kbase_reset_gpu - Reset the GPU -+ * @kbdev: Device pointer -+ * -+ * This function should be called after kbase_prepare_to_reset_gpu if it returns -+ * true. It should never be called without a corresponding call to -+ * kbase_prepare_to_reset_gpu (only on Job Manager GPUs). -+ * -+ * After this function is called the caller should call kbase_reset_gpu_wait() -+ * to know when the reset has completed. -+ */ -+void kbase_reset_gpu(struct kbase_device *kbdev); ++ if (fault_pgoff >= nents) ++ goto exit; + -+/** -+ * kbase_reset_gpu_locked - Reset the GPU -+ * @kbdev: Device pointer -+ * -+ * This function should be called after kbase_prepare_to_reset_gpu_locked if it -+ * returns true. It should never be called without a corresponding call to -+ * kbase_prepare_to_reset_gpu (only on Job Manager GPUs). -+ * Caller is expected to hold the kbdev->hwaccess_lock. -+ * -+ * After this function is called, the caller should call kbase_reset_gpu_wait() -+ * to know when the reset has completed. -+ */ -+void kbase_reset_gpu_locked(struct kbase_device *kbdev); ++ /* Fault on access to DONT_NEED regions */ ++ if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED)) ++ goto exit; + -+/** -+ * kbase_reset_gpu_silent - Reset the GPU silently -+ * @kbdev: Device pointer -+ * -+ * Reset the GPU without trying to cancel jobs (applicable to Job Manager GPUs) -+ * and don't emit messages into the kernel log while doing the reset. -+ * -+ * This function should be used in cases where we are doing a controlled reset -+ * of the GPU as part of normal processing (e.g. exiting protected mode) where -+ * the driver will have ensured the scheduler has been idled and all other -+ * users of the GPU (e.g. instrumentation) have been suspended. -+ * -+ * Return: 0 if the reset was started successfully -+ * -EAGAIN if another reset is currently in progress -+ */ -+int kbase_reset_gpu_silent(struct kbase_device *kbdev); ++ /* We are inserting all valid pages from the start of CPU mapping and ++ * not from the fault location (the mmap handler was previously doing ++ * the same). ++ */ ++ i = map_start_pgoff; ++ addr = (pgoff_t)(vma->vm_start >> PAGE_SHIFT); ++ mgm_dev = map->kctx->kbdev->mgm_dev; ++ while (i < nents && (addr < vma->vm_end >> PAGE_SHIFT)) { + -+/** -+ * kbase_reset_gpu_is_active - Reports if the GPU is being reset -+ * @kbdev: Device pointer -+ * -+ * Any changes made to the HW when this returns true may be lost, overwritten -+ * or corrupted. -+ * -+ * Note that unless appropriate locks are held when using this function, the -+ * state could change immediately afterwards. -+ * -+ * Return: True if the GPU is in the process of being reset. -+ */ -+bool kbase_reset_gpu_is_active(struct kbase_device *kbdev); ++ ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, ++ map->alloc->group_id, vma, addr << PAGE_SHIFT, ++ PFN_DOWN(as_phys_addr_t(pages[i])), vma->vm_page_prot); + -+/** -+ * kbase_reset_gpu_not_pending - Reports if the GPU reset isn't pending -+ * -+ * @kbdev: Device pointer -+ * -+ * Note that unless appropriate locks are held when using this function, the -+ * state could change immediately afterwards. -+ * -+ * Return: True if the GPU reset isn't pending. -+ */ -+bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev); ++ if (ret != VM_FAULT_NOPAGE) ++ goto exit; + -+/** -+ * kbase_reset_gpu_wait - Wait for a GPU reset to complete -+ * @kbdev: Device pointer -+ * -+ * This function may wait indefinitely. -+ * -+ * Return: 0 if successful or a negative error code on failure. -+ */ -+int kbase_reset_gpu_wait(struct kbase_device *kbdev); ++ i++; addr++; ++ } + -+/** -+ * kbase_reset_gpu_init - Initialize the GPU reset handling mechanism. -+ * -+ * @kbdev: Device pointer -+ * -+ * Return: 0 if successful or a negative error code on failure. -+ */ -+int kbase_reset_gpu_init(struct kbase_device *kbdev); ++exit: ++ kbase_gpu_vm_unlock(map->kctx); ++ return ret; ++} + -+/** -+ * kbase_reset_gpu_term - Terminate the GPU reset handling mechanism. -+ * -+ * @kbdev: Device pointer -+ */ -+void kbase_reset_gpu_term(struct kbase_device *kbdev); ++const struct vm_operations_struct kbase_vm_ops = { ++ .open = kbase_cpu_vm_open, ++ .close = kbase_cpu_vm_close, ++ .fault = kbase_cpu_vm_fault ++}; + -+#endif -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_smc.c b/drivers/gpu/arm/bifrost/mali_kbase_smc.c -new file mode 100644 -index 000000000..abbe8d56d ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_smc.c -@@ -0,0 +1,90 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2015, 2018, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++static int kbase_cpu_mmap(struct kbase_context *kctx, ++ struct kbase_va_region *reg, ++ struct vm_area_struct *vma, ++ void *kaddr, ++ size_t nr_pages, ++ unsigned long aligned_offset, ++ int free_on_close) ++{ ++ struct kbase_cpu_mapping *map; ++ int err = 0; + -+#if IS_ENABLED(CONFIG_ARM64) ++ map = kzalloc(sizeof(*map), GFP_KERNEL); + -+#include -+#include ++ if (!map) { ++ WARN_ON(1); ++ err = -ENOMEM; ++ goto out; ++ } + -+#include ++ /* ++ * VM_DONTCOPY - don't make this mapping available in fork'ed processes ++ * VM_DONTEXPAND - disable mremap on this region ++ * VM_IO - disables paging ++ * VM_DONTDUMP - Don't include in core dumps (3.7 only) ++ * VM_MIXEDMAP - Support mixing struct page*s and raw pfns. ++ * This is needed to support using the dedicated and ++ * the OS based memory backends together. ++ */ ++ /* ++ * This will need updating to propagate coherency flags ++ * See MIDBASE-1057 ++ */ + -+/* __asmeq is not available on Kernel versions >= 4.20 */ -+#ifndef __asmeq -+/* -+ * This is used to ensure the compiler did actually allocate the register we -+ * asked it for some inline assembly sequences. Apparently we can't trust the -+ * compiler from one version to another so a bit of paranoia won't hurt. This -+ * string is meant to be concatenated with the inline asm string and will -+ * cause compilation to stop on mismatch. (for details, see gcc PR 15089) -+ */ -+#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t" -+#endif ++ vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO); ++ vma->vm_ops = &kbase_vm_ops; ++ vma->vm_private_data = map; + -+static noinline u64 invoke_smc_fid(u64 function_id, -+ u64 arg0, u64 arg1, u64 arg2) -+{ -+ register u64 x0 asm("x0") = function_id; -+ register u64 x1 asm("x1") = arg0; -+ register u64 x2 asm("x2") = arg1; -+ register u64 x3 asm("x3") = arg2; ++ if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS && nr_pages) { ++ pgoff_t rel_pgoff = vma->vm_pgoff - reg->start_pfn + ++ (aligned_offset >> PAGE_SHIFT); ++ struct kbase_aliased *aliased = ++ get_aliased_alloc(vma, reg, &rel_pgoff, nr_pages); + -+ asm volatile( -+ __asmeq("%0", "x0") -+ __asmeq("%1", "x1") -+ __asmeq("%2", "x2") -+ __asmeq("%3", "x3") -+ "smc #0\n" -+ : "+r" (x0) -+ : "r" (x1), "r" (x2), "r" (x3)); ++ if (!aliased) { ++ err = -EINVAL; ++ kfree(map); ++ goto out; ++ } ++ } + -+ return x0; -+} ++ if (!(reg->flags & KBASE_REG_CPU_CACHED) && ++ (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) { ++ /* We can't map vmalloc'd memory uncached. ++ * Other memory will have been returned from ++ * kbase_mem_pool which would be ++ * suitable for mapping uncached. ++ */ ++ BUG_ON(kaddr); ++ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); ++ } + -+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2) -+{ -+ /* Is fast call (bit 31 set) */ -+ KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL); -+ /* bits 16-23 must be zero for fast calls */ -+ KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0); ++ if (!kaddr) { ++ vm_flags_set(vma, VM_PFNMAP); ++ } else { ++ WARN_ON(aligned_offset); ++ /* MIXEDMAP so we can vfree the kaddr early and not track it after map time */ ++ vm_flags_set(vma, VM_MIXEDMAP); ++ /* vmalloc remaping is easy... */ ++ err = remap_vmalloc_range(vma, kaddr, 0); ++ WARN_ON(err); ++ } + -+ return invoke_smc_fid(fid, arg0, arg1, arg2); -+} ++ if (err) { ++ kfree(map); ++ goto out; ++ } + -+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, -+ u64 arg0, u64 arg1, u64 arg2) -+{ -+ u32 fid = 0; ++ map->region = kbase_va_region_alloc_get(kctx, reg); ++ map->free_on_close = free_on_close; ++ map->kctx = kctx; ++ map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); ++ map->count = 1; /* start with one ref */ + -+ /* Only the six bits allowed should be used. */ -+ KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0); ++ if (reg->flags & KBASE_REG_CPU_CACHED) ++ map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; + -+ fid |= SMC_FAST_CALL; /* Bit 31: Fast call */ -+ if (smc64) -+ fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */ -+ fid |= oen; /* Bit 29:24: OEN */ -+ /* Bit 23:16: Must be zero for fast calls */ -+ fid |= (function_number); /* Bit 15:0: function number */ ++ list_add(&map->mappings_list, &map->alloc->mappings); + -+ return kbase_invoke_smc_fid(fid, arg0, arg1, arg2); ++ out: ++ return err; +} + -+#endif /* CONFIG_ARM64 */ ++#ifdef CONFIG_MALI_VECTOR_DUMP ++static void kbase_free_unused_jit_allocations(struct kbase_context *kctx) ++{ ++ /* Free all cached/unused JIT allocations as their contents are not ++ * really needed for the replay. The GPU writes to them would already ++ * have been captured through the GWT mechanism. ++ * This considerably reduces the size of mmu-snapshot-file and it also ++ * helps avoid segmentation fault issue during vector dumping of ++ * complex contents when the unused JIT allocations are accessed to ++ * dump their contents (as they appear in the page tables snapshot) ++ * but they got freed by the shrinker under low memory scenarios ++ * (which do occur with complex contents). ++ */ ++ while (kbase_jit_evict(kctx)) ++ ; ++} + -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_smc.h b/drivers/gpu/arm/bifrost/mali_kbase_smc.h -new file mode 100644 -index 000000000..40a348388 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_smc.h -@@ -0,0 +1,69 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2015, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++static int kbase_mmu_dump_mmap(struct kbase_context *kctx, ++ struct vm_area_struct *vma, ++ struct kbase_va_region **const reg, ++ void **const kmap_addr) ++{ ++ struct kbase_va_region *new_reg; ++ void *kaddr; ++ u32 nr_pages; ++ size_t size; ++ int err = 0; + -+#ifndef _KBASE_SMC_H_ -+#define _KBASE_SMC_H_ ++ lockdep_assert_held(&kctx->reg_lock); + -+#if IS_ENABLED(CONFIG_ARM64) ++ dev_dbg(kctx->kbdev->dev, "%s\n", __func__); ++ size = (vma->vm_end - vma->vm_start); ++ nr_pages = size >> PAGE_SHIFT; + -+#include ++ kbase_free_unused_jit_allocations(kctx); + -+#define SMC_FAST_CALL (1 << 31) -+#define SMC_64 (1 << 30) ++ kaddr = kbase_mmu_dump(kctx, nr_pages); + -+#define SMC_OEN_OFFSET 24 -+#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */ -+#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET) -+#define SMC_OEN_STD (4 << SMC_OEN_OFFSET) ++ if (!kaddr) { ++ err = -ENOMEM; ++ goto out; ++ } + ++ new_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, nr_pages, ++ KBASE_REG_ZONE_SAME_VA); ++ if (!new_reg) { ++ err = -ENOMEM; ++ WARN_ON(1); ++ goto out; ++ } + -+/** -+ * kbase_invoke_smc_fid - Perform a secure monitor call -+ * @fid: The SMC function to call, see SMC Calling convention. -+ * @arg0: First argument to the SMC. -+ * @arg1: Second argument to the SMC. -+ * @arg2: Third argument to the SMC. -+ * -+ * See SMC Calling Convention for details. -+ * -+ * Return: the return value from the SMC. -+ */ -+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2); ++ new_reg->cpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_RAW, ++ BASE_MEM_GROUP_DEFAULT); ++ if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) { ++ err = -ENOMEM; ++ new_reg->cpu_alloc = NULL; ++ WARN_ON(1); ++ goto out_no_alloc; ++ } + -+/** -+ * kbase_invoke_smc - Perform a secure monitor call -+ * @oen: Owning Entity number (SIP, STD etc). -+ * @function_number: The function number within the OEN. -+ * @smc64: use SMC64 calling convention instead of SMC32. -+ * @arg0: First argument to the SMC. -+ * @arg1: Second argument to the SMC. -+ * @arg2: Third argument to the SMC. -+ * -+ * See SMC Calling Convention for details. -+ * -+ * Return: the return value from the SMC call. -+ */ -+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, -+ u64 arg0, u64 arg1, u64 arg2); ++ new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc); + -+#endif /* CONFIG_ARM64 */ ++ new_reg->flags &= ~KBASE_REG_FREE; ++ new_reg->flags |= KBASE_REG_CPU_CACHED; ++ if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) { ++ err = -ENOMEM; ++ WARN_ON(1); ++ goto out_va_region; ++ } + -+#endif /* _KBASE_SMC_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c -new file mode 100644 -index 000000000..a9312a0c4 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c -@@ -0,0 +1,1770 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ *kmap_addr = kaddr; ++ *reg = new_reg; + -+#include ++ dev_dbg(kctx->kbdev->dev, "%s done\n", __func__); ++ return 0; + -+#include -+#include -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+#include ++out_no_alloc: ++out_va_region: ++ kbase_free_alloced_region(new_reg); ++out: ++ return err; ++} +#endif -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include + -+#if !MALI_USE_CSF -+/** -+ * DOC: This file implements the logic behind software only jobs that are -+ * executed within the driver rather than being handed over to the GPU. -+ */ -+ -+static void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom) ++void kbase_os_mem_map_lock(struct kbase_context *kctx) +{ -+ struct kbase_context *kctx = katom->kctx; -+ unsigned long lflags; -+ -+ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); -+ list_add_tail(&katom->queue, &kctx->waiting_soft_jobs); -+ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); ++ (void)kctx; ++ down_read(kbase_mem_get_process_mmap_lock()); +} + -+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom) ++void kbase_os_mem_map_unlock(struct kbase_context *kctx) +{ -+ struct kbase_context *kctx = katom->kctx; -+ unsigned long lflags; -+ -+ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); -+ list_del(&katom->queue); -+ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); ++ (void)kctx; ++ up_read(kbase_mem_get_process_mmap_lock()); +} + -+static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom) ++static int kbasep_reg_mmap(struct kbase_context *kctx, ++ struct vm_area_struct *vma, ++ struct kbase_va_region **regm, ++ size_t *nr_pages, size_t *aligned_offset) ++ +{ -+ struct kbase_context *kctx = katom->kctx; ++ unsigned int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); ++ struct kbase_va_region *reg; ++ int err = 0; + -+ /* Record the start time of this atom so we could cancel it at -+ * the right time. ++ /* Calls to this function are inherently asynchronous, with respect to ++ * MMU operations. + */ -+ katom->start_timestamp = ktime_get_raw(); ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + -+ /* Add the atom to the waiting list before the timer is -+ * (re)started to make sure that it gets processed. -+ */ -+ kbasep_add_waiting_soft_job(katom); ++ *aligned_offset = 0; + -+ /* Schedule timeout of this atom after a period if it is not active */ -+ if (!timer_pending(&kctx->soft_job_timeout)) { -+ int timeout_ms = atomic_read( -+ &kctx->kbdev->js_data.soft_job_timeout_ms); -+ mod_timer(&kctx->soft_job_timeout, -+ jiffies + msecs_to_jiffies(timeout_ms)); ++ dev_dbg(kctx->kbdev->dev, "%s\n", __func__); ++ ++ /* SAME_VA stuff, fetch the right region */ ++ reg = kctx->pending_regions[cookie]; ++ if (!reg) { ++ err = -ENOMEM; ++ goto out; + } -+} + -+static int kbasep_read_soft_event_status( -+ struct kbase_context *kctx, u64 evt, unsigned char *status) -+{ -+ unsigned char *mapped_evt; -+ struct kbase_vmap_struct map; ++ if ((reg->flags & KBASE_REG_GPU_NX) && (reg->nr_pages != *nr_pages)) { ++ /* incorrect mmap size */ ++ /* leave the cookie for a potential later ++ * mapping, or to be reclaimed later when the ++ * context is freed ++ */ ++ err = -ENOMEM; ++ goto out; ++ } + -+ mapped_evt = kbase_vmap_prot(kctx, evt, sizeof(*mapped_evt), -+ KBASE_REG_CPU_RD, &map); -+ if (!mapped_evt) -+ return -EFAULT; ++ if ((vma->vm_flags & VM_READ && !(reg->flags & KBASE_REG_CPU_RD)) || ++ (vma->vm_flags & VM_WRITE && !(reg->flags & KBASE_REG_CPU_WR))) { ++ /* VM flags inconsistent with region flags */ ++ err = -EPERM; ++ dev_err(kctx->kbdev->dev, "%s:%d inconsistent VM flags\n", ++ __FILE__, __LINE__); ++ goto out; ++ } + -+ *status = *mapped_evt; ++ /* adjust down nr_pages to what we have physically */ ++ *nr_pages = kbase_reg_current_backed_size(reg); ++ if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset, ++ reg->nr_pages, 1, mmu_sync_info) != 0) { ++ dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__); ++ /* Unable to map in GPU space. */ ++ WARN_ON(1); ++ err = -ENOMEM; ++ goto out; ++ } ++ /* no need for the cookie anymore */ ++ kctx->pending_regions[cookie] = NULL; ++ bitmap_set(kctx->cookies, cookie, 1); + -+ kbase_vunmap(kctx, &map); ++#if MALI_USE_CSF ++ if (reg->flags & KBASE_REG_CSF_EVENT) ++ kbase_link_event_mem_page(kctx, reg); ++#endif + -+ return 0; ++ /* ++ * Overwrite the offset with the region start_pfn, so we effectively ++ * map from offset 0 in the region. However subtract the aligned ++ * offset so that when user space trims the mapping the beginning of ++ * the trimmed VMA has the correct vm_pgoff; ++ */ ++ vma->vm_pgoff = reg->start_pfn - ((*aligned_offset)>>PAGE_SHIFT); ++out: ++ *regm = reg; ++ dev_dbg(kctx->kbdev->dev, "%s done\n", __func__); ++ ++ return err; +} + -+static int kbasep_write_soft_event_status( -+ struct kbase_context *kctx, u64 evt, unsigned char new_status) ++int kbase_context_mmap(struct kbase_context *const kctx, ++ struct vm_area_struct *const vma) +{ -+ unsigned char *mapped_evt; -+ struct kbase_vmap_struct map; ++ struct kbase_va_region *reg = NULL; ++ void *kaddr = NULL; ++ size_t nr_pages = vma_pages(vma); ++ int err = 0; ++ int free_on_close = 0; ++ struct device *dev = kctx->kbdev->dev; ++ size_t aligned_offset = 0; + -+ if ((new_status != BASE_JD_SOFT_EVENT_SET) && -+ (new_status != BASE_JD_SOFT_EVENT_RESET)) -+ return -EINVAL; ++ dev_dbg(dev, "kbase_mmap\n"); + -+ mapped_evt = kbase_vmap_prot(kctx, evt, sizeof(*mapped_evt), -+ KBASE_REG_CPU_WR, &map); -+ if (!mapped_evt) -+ return -EFAULT; ++ if (!(vma->vm_flags & VM_READ)) ++ vm_flags_clear(vma, VM_MAYREAD); ++ if (!(vma->vm_flags & VM_WRITE)) ++ vm_flags_clear(vma, VM_MAYWRITE); + -+ *mapped_evt = new_status; ++ if (nr_pages == 0) { ++ err = -EINVAL; ++ goto out; ++ } + -+ kbase_vunmap(kctx, &map); ++ if (!(vma->vm_flags & VM_SHARED)) { ++ err = -EINVAL; ++ goto out; ++ } + -+ return 0; -+} ++ kbase_gpu_vm_lock(kctx); + -+static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) -+{ -+ struct kbase_vmap_struct map; -+ void *user_result; -+ struct timespec64 ts; -+ struct base_dump_cpu_gpu_counters data; -+ u64 system_time = 0ULL; -+ u64 cycle_counter; -+ u64 jc = katom->jc; -+ struct kbase_context *kctx = katom->kctx; -+ int pm_active_err; ++ if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) { ++ /* The non-mapped tracking helper page */ ++ err = kbase_tracking_page_setup(kctx, vma); ++ goto out_unlock; ++ } + -+ memset(&data, 0, sizeof(data)); ++ if (!kbase_mem_allow_alloc(kctx)) { ++ err = -EINVAL; ++ goto out_unlock; ++ } + -+ /* Take the PM active reference as late as possible - otherwise, it could -+ * delay suspend until we process the atom (which may be at the end of a -+ * long chain of dependencies -+ */ -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ atomic_inc(&kctx->kbdev->pm.gpu_users_waiting); -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ -+ pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); -+ if (pm_active_err) { -+ struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; ++ switch (vma->vm_pgoff) { ++ case PFN_DOWN(BASEP_MEM_INVALID_HANDLE): ++ case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE): ++ /* Illegal handle for direct map */ ++ err = -EINVAL; ++ goto out_unlock; ++ case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE): ++#if defined(CONFIG_MALI_VECTOR_DUMP) ++ /* MMU dump */ ++ err = kbase_mmu_dump_mmap(kctx, vma, ®, &kaddr); ++ if (err != 0) ++ goto out_unlock; ++ /* free the region on munmap */ ++ free_on_close = 1; ++ break; ++#else ++ /* Illegal handle for direct map */ ++ err = -EINVAL; ++ goto out_unlock; ++#endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ ++#if MALI_USE_CSF ++ case PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE): ++ kbase_gpu_vm_unlock(kctx); ++ err = kbase_csf_cpu_mmap_user_reg_page(kctx, vma); ++ goto out; ++ case PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) ... ++ PFN_DOWN(BASE_MEM_COOKIE_BASE) - 1: { ++ kbase_gpu_vm_unlock(kctx); ++ mutex_lock(&kctx->csf.lock); ++ err = kbase_csf_cpu_mmap_user_io_pages(kctx, vma); ++ mutex_unlock(&kctx->csf.lock); ++ goto out; ++ } ++#endif ++ case PFN_DOWN(BASE_MEM_COOKIE_BASE) ... ++ PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: { ++ err = kbasep_reg_mmap(kctx, vma, ®, &nr_pages, ++ &aligned_offset); ++ if (err != 0) ++ goto out_unlock; ++ /* free the region on munmap */ ++ free_on_close = 1; ++ break; ++ } ++ default: { ++ reg = kbase_region_tracker_find_region_enclosing_address(kctx, ++ (u64)vma->vm_pgoff << PAGE_SHIFT); + -+ /* We're suspended - queue this on the list of suspended jobs -+ * Use dep_item[1], because dep_item[0] was previously in use -+ * for 'waiting_soft_jobs'. -+ */ -+ mutex_lock(&js_devdata->runpool_mutex); -+ list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list); -+ mutex_unlock(&js_devdata->runpool_mutex); ++ if (!kbase_is_region_invalid_or_free(reg)) { ++ /* will this mapping overflow the size of the region? */ ++ if (nr_pages > (reg->nr_pages - ++ (vma->vm_pgoff - reg->start_pfn))) { ++ err = -ENOMEM; ++ goto out_unlock; ++ } + -+ /* Also adding this to the list of waiting soft job */ -+ kbasep_add_waiting_soft_job(katom); ++ if ((vma->vm_flags & VM_READ && ++ !(reg->flags & KBASE_REG_CPU_RD)) || ++ (vma->vm_flags & VM_WRITE && ++ !(reg->flags & KBASE_REG_CPU_WR))) { ++ /* VM flags inconsistent with region flags */ ++ err = -EPERM; ++ dev_err(dev, "%s:%d inconsistent VM flags\n", ++ __FILE__, __LINE__); ++ goto out_unlock; ++ } + -+ return pm_active_err; -+ } -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ else -+ atomic_dec(&kctx->kbdev->pm.gpu_users_waiting); -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ if (KBASE_MEM_TYPE_IMPORTED_UMM == ++ reg->cpu_alloc->type) { ++ if (0 != (vma->vm_pgoff - reg->start_pfn)) { ++ err = -EINVAL; ++ dev_warn(dev, "%s:%d attempt to do a partial map in a dma_buf: non-zero offset to dma_buf mapping!\n", ++ __FILE__, __LINE__); ++ goto out_unlock; ++ } ++ err = dma_buf_mmap( ++ reg->cpu_alloc->imported.umm.dma_buf, ++ vma, vma->vm_pgoff - reg->start_pfn); ++ goto out_unlock; ++ } + -+ kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time, -+ &ts); ++ if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { ++ /* initial params check for aliased dumping map */ ++ if (nr_pages > reg->gpu_alloc->imported.alias.stride || ++ !reg->gpu_alloc->imported.alias.stride || ++ !nr_pages) { ++ err = -EINVAL; ++ dev_warn(dev, "mmap aliased: invalid params!\n"); ++ goto out_unlock; ++ } ++ } else if (reg->cpu_alloc->nents < ++ (vma->vm_pgoff - reg->start_pfn + nr_pages)) { ++ /* limit what we map to the amount currently backed */ ++ if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents) ++ nr_pages = 0; ++ else ++ nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn); ++ } ++ } else { ++ err = -ENOMEM; ++ goto out_unlock; ++ } ++ } /* default */ ++ } /* switch */ + -+ kbase_pm_context_idle(kctx->kbdev); ++ err = kbase_cpu_mmap(kctx, reg, vma, kaddr, nr_pages, aligned_offset, ++ free_on_close); ++#if defined(CONFIG_MALI_VECTOR_DUMP) ++ if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) { ++ /* MMU dump - userspace should now have a reference on ++ * the pages, so we can now free the kernel mapping ++ */ ++ vfree(kaddr); ++ /* CPU mapping of GPU allocations have GPU VA as the vm_pgoff ++ * and that is used to shrink the mapping when the commit size ++ * is reduced. So vm_pgoff for CPU mapping created to get the ++ * snapshot of GPU page tables shall not match with any GPU VA. ++ * That can be ensured by setting vm_pgoff as vma->vm_start ++ * because, ++ * - GPU VA of any SAME_VA allocation cannot match with ++ * vma->vm_start, as CPU VAs are unique. ++ * - GPU VA of CUSTOM_VA allocations are outside the CPU ++ * virtual address space. ++ */ ++ vma->vm_pgoff = PFN_DOWN(vma->vm_start); ++ } ++#endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ ++out_unlock: ++ kbase_gpu_vm_unlock(kctx); ++out: ++ if (err) ++ dev_err(dev, "mmap failed %d\n", err); + -+ data.sec = ts.tv_sec; -+ data.usec = ts.tv_nsec / 1000; -+ data.system_time = system_time; -+ data.cycle_counter = cycle_counter; ++ return err; ++} + -+ /* Assume this atom will be cancelled until we know otherwise */ -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++KBASE_EXPORT_TEST_API(kbase_context_mmap); + -+ /* GPU_WR access is checked on the range for returning the result to -+ * userspace for the following reasons: -+ * - security, this is currently how imported user bufs are checked. -+ * - userspace ddk guaranteed to assume region was mapped as GPU_WR -+ */ -+ user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map); -+ if (!user_result) -+ return 0; ++void kbase_sync_mem_regions(struct kbase_context *kctx, ++ struct kbase_vmap_struct *map, enum kbase_sync_type dest) ++{ ++ size_t i; ++ off_t const offset = map->offset_in_page; ++ size_t const page_count = PFN_UP(offset + map->size); + -+ memcpy(user_result, &data, sizeof(data)); ++ /* Sync first page */ ++ size_t sz = MIN(((size_t) PAGE_SIZE - offset), map->size); ++ struct tagged_addr cpu_pa = map->cpu_pages[0]; ++ struct tagged_addr gpu_pa = map->gpu_pages[0]; + -+ kbase_vunmap(kctx, &map); ++ kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz, dest); + -+ /* Atom was fine - mark it as done */ -+ katom->event_code = BASE_JD_EVENT_DONE; ++ /* Sync middle pages (if any) */ ++ for (i = 1; page_count > 2 && i < page_count - 1; i++) { ++ cpu_pa = map->cpu_pages[i]; ++ gpu_pa = map->gpu_pages[i]; ++ kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE, dest); ++ } + -+ return 0; ++ /* Sync last page (if any) */ ++ if (page_count > 1) { ++ cpu_pa = map->cpu_pages[page_count - 1]; ++ gpu_pa = map->gpu_pages[page_count - 1]; ++ sz = ((offset + map->size - 1) & ~PAGE_MASK) + 1; ++ kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz, dest); ++ } +} + -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+/* Called by the explicit fence mechanism when a fence wait has completed */ -+void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom) ++/** ++ * kbase_vmap_phy_pages_migrate_count_increment - Increment VMAP count for ++ * array of physical pages ++ * ++ * @pages: Array of pages. ++ * @page_count: Number of pages. ++ * @flags: Region flags. ++ * ++ * This function is supposed to be called only if page migration support ++ * is enabled in the driver. ++ * ++ * The counter of kernel CPU mappings of the physical pages involved in a ++ * mapping operation is incremented by 1. Errors are handled by making pages ++ * not movable. Permanent kernel mappings will be marked as not movable, too. ++ */ ++static void kbase_vmap_phy_pages_migrate_count_increment(struct tagged_addr *pages, ++ size_t page_count, unsigned long flags) +{ -+ struct kbase_context *kctx = katom->kctx; ++ size_t i; + -+ mutex_lock(&kctx->jctx.lock); -+ kbasep_remove_waiting_soft_job(katom); -+ kbase_finish_soft_job(katom); -+ if (kbase_jd_done_nolock(katom, true)) -+ kbase_js_sched_all(kctx->kbdev); -+ mutex_unlock(&kctx->jctx.lock); ++ for (i = 0; i < page_count; i++) { ++ struct page *p = as_page(pages[i]); ++ struct kbase_page_metadata *page_md = kbase_page_private(p); ++ ++ /* Skip the 4KB page that is part of a large page, as the large page is ++ * excluded from the migration process. ++ */ ++ if (is_huge(pages[i]) || is_partial(pages[i])) ++ continue; ++ ++ spin_lock(&page_md->migrate_lock); ++ /* Mark permanent kernel mappings as NOT_MOVABLE because they're likely ++ * to stay mapped for a long time. However, keep on counting the number ++ * of mappings even for them: they don't represent an exception for the ++ * vmap_count. ++ * ++ * At the same time, errors need to be handled if a client tries to add ++ * too many mappings, hence a page may end up in the NOT_MOVABLE state ++ * anyway even if it's not a permanent kernel mapping. ++ */ ++ if (flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) ++ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); ++ if (page_md->vmap_count < U8_MAX) ++ page_md->vmap_count++; ++ else ++ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); ++ spin_unlock(&page_md->migrate_lock); ++ } +} -+#endif + -+static void kbasep_soft_event_complete_job(struct work_struct *work) ++/** ++ * kbase_vunmap_phy_pages_migrate_count_decrement - Decrement VMAP count for ++ * array of physical pages ++ * ++ * @pages: Array of pages. ++ * @page_count: Number of pages. ++ * ++ * This function is supposed to be called only if page migration support ++ * is enabled in the driver. ++ * ++ * The counter of kernel CPU mappings of the physical pages involved in a ++ * mapping operation is decremented by 1. Errors are handled by making pages ++ * not movable. ++ */ ++static void kbase_vunmap_phy_pages_migrate_count_decrement(struct tagged_addr *pages, ++ size_t page_count) +{ -+ struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, -+ work); -+ struct kbase_context *kctx = katom->kctx; -+ int resched; ++ size_t i; + -+ mutex_lock(&kctx->jctx.lock); -+ resched = kbase_jd_done_nolock(katom, true); -+ mutex_unlock(&kctx->jctx.lock); ++ for (i = 0; i < page_count; i++) { ++ struct page *p = as_page(pages[i]); ++ struct kbase_page_metadata *page_md = kbase_page_private(p); + -+ if (resched) -+ kbase_js_sched_all(kctx->kbdev); ++ /* Skip the 4KB page that is part of a large page, as the large page is ++ * excluded from the migration process. ++ */ ++ if (is_huge(pages[i]) || is_partial(pages[i])) ++ continue; ++ ++ spin_lock(&page_md->migrate_lock); ++ /* Decrement the number of mappings for all kinds of pages, including ++ * pages which are NOT_MOVABLE (e.g. permanent kernel mappings). ++ * However, errors still need to be handled if a client tries to remove ++ * more mappings than created. ++ */ ++ if (page_md->vmap_count == 0) ++ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); ++ else ++ page_md->vmap_count--; ++ spin_unlock(&page_md->migrate_lock); ++ } +} + -+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt) ++static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg, ++ u64 offset_bytes, size_t size, struct kbase_vmap_struct *map, ++ kbase_vmap_flag vmap_flags) +{ -+ int cancel_timer = 1; -+ struct list_head *entry, *tmp; -+ unsigned long lflags; ++ unsigned long page_index; ++ unsigned int offset_in_page = offset_bytes & ~PAGE_MASK; ++ size_t page_count = PFN_UP(offset_in_page + size); ++ struct tagged_addr *page_array; ++ struct page **pages; ++ void *cpu_addr = NULL; ++ pgprot_t prot; ++ size_t i; + -+ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); -+ list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { -+ struct kbase_jd_atom *katom = list_entry( -+ entry, struct kbase_jd_atom, queue); ++ if (WARN_ON(vmap_flags & ~KBASE_VMAP_INPUT_FLAGS)) ++ return -EINVAL; + -+ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { -+ case BASE_JD_REQ_SOFT_EVENT_WAIT: -+ if (katom->jc == evt) { -+ list_del(&katom->queue); ++ if (WARN_ON(kbase_is_region_invalid_or_free(reg))) ++ return -EINVAL; + -+ katom->event_code = BASE_JD_EVENT_DONE; -+ INIT_WORK(&katom->work, -+ kbasep_soft_event_complete_job); -+ queue_work(kctx->jctx.job_done_wq, -+ &katom->work); -+ } else { -+ /* There are still other waiting jobs, we cannot -+ * cancel the timer yet. -+ */ -+ cancel_timer = 0; -+ } -+ break; -+#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG -+ case BASE_JD_REQ_SOFT_FENCE_WAIT: -+ /* Keep the timer running if fence debug is enabled and -+ * there are waiting fence jobs. -+ */ -+ cancel_timer = 0; -+ break; -+#endif -+ } -+ } ++ if (!size || !map || !reg->cpu_alloc || !reg->gpu_alloc) ++ return -EINVAL; + -+ if (cancel_timer) -+ del_timer(&kctx->soft_job_timeout); -+ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); -+} ++ /* check if page_count calculation will wrap */ ++ if (size > ((size_t)-1 / PAGE_SIZE)) ++ return -EINVAL; + -+#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG -+static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom) -+{ -+ struct kbase_context *kctx = katom->kctx; -+ struct device *dev = kctx->kbdev->dev; -+ int i; ++ page_index = offset_bytes >> PAGE_SHIFT; + -+ for (i = 0; i < 2; i++) { -+ struct kbase_jd_atom *dep; ++ /* check if page_index + page_count will wrap */ ++ if (-1UL - page_count < page_index) ++ return -EINVAL; + -+ list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) { -+ if (dep->status == KBASE_JD_ATOM_STATE_UNUSED || -+ dep->status == KBASE_JD_ATOM_STATE_COMPLETED) -+ continue; ++ if (page_index + page_count > kbase_reg_current_backed_size(reg)) ++ return -ENOMEM; + -+ if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) -+ == BASE_JD_REQ_SOFT_FENCE_TRIGGER) { -+ /* Found blocked trigger fence. */ -+ struct kbase_sync_fence_info info; ++ if ((vmap_flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) && ++ (page_count > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES - ++ atomic_read(&kctx->permanent_mapped_pages)))) { ++ dev_warn( ++ kctx->kbdev->dev, ++ "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %d pages", ++ (u64)page_count, KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES, ++ atomic_read(&kctx->permanent_mapped_pages)); ++ return -ENOMEM; ++ } + -+ if (!kbase_sync_fence_in_info_get(dep, &info)) { -+ dev_warn(dev, -+ "\tVictim trigger atom %d fence [%pK] %s: %s\n", -+ kbase_jd_atom_id(kctx, dep), -+ info.fence, -+ info.name, -+ kbase_sync_status_string(info.status)); -+ } -+ } ++ if (reg->flags & KBASE_REG_DONT_NEED) ++ return -EINVAL; + -+ kbase_fence_debug_check_atom(dep); -+ } ++ prot = PAGE_KERNEL; ++ if (!(reg->flags & KBASE_REG_CPU_CACHED)) { ++ /* Map uncached */ ++ prot = pgprot_writecombine(prot); + } -+} + -+static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom) -+{ -+ struct kbase_context *kctx = katom->kctx; -+ struct device *dev = katom->kctx->kbdev->dev; -+ int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms); -+ unsigned long lflags; -+ struct kbase_sync_fence_info info; ++ page_array = kbase_get_cpu_phy_pages(reg); ++ if (!page_array) ++ return -ENOMEM; + -+ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); ++ pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); ++ if (!pages) ++ return -ENOMEM; + -+ if (kbase_sync_fence_in_info_get(katom, &info)) { -+ /* Fence must have signaled just after timeout. */ -+ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); -+ return; -+ } ++ for (i = 0; i < page_count; i++) ++ pages[i] = as_page(page_array[page_index + i]); + -+ dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%pK] after %dms\n", -+ kctx->tgid, kctx->id, -+ kbase_jd_atom_id(kctx, katom), -+ info.fence, timeout_ms); -+ dev_warn(dev, "\tGuilty fence [%pK] %s: %s\n", -+ info.fence, info.name, -+ kbase_sync_status_string(info.status)); ++ /* Note: enforcing a RO prot_request onto prot is not done, since: ++ * - CPU-arch-specific integration required ++ * - kbase_vmap() requires no access checks to be made/enforced ++ */ ++ cpu_addr = vmap(pages, page_count, VM_MAP, prot); + -+ /* Search for blocked trigger atoms */ -+ kbase_fence_debug_check_atom(katom); ++ /* If page migration is enabled, increment the number of VMA mappings ++ * of all physical pages. In case of errors, e.g. too many mappings, ++ * make the page not movable to prevent trouble. ++ */ ++ if (kbase_page_migration_enabled && !kbase_mem_is_imported(reg->gpu_alloc->type)) ++ kbase_vmap_phy_pages_migrate_count_increment(page_array, page_count, reg->flags); + -+ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); ++ kfree(pages); + -+ kbase_sync_fence_in_dump(katom); -+} ++ if (!cpu_addr) ++ return -ENOMEM; + -+struct kbase_fence_debug_work { -+ struct kbase_jd_atom *katom; -+ struct work_struct work; -+}; ++ map->offset_in_page = offset_in_page; ++ map->cpu_alloc = reg->cpu_alloc; ++ map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index]; ++ map->gpu_alloc = reg->gpu_alloc; ++ map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index]; ++ map->addr = (void *)((uintptr_t)cpu_addr + offset_in_page); ++ map->size = size; ++ map->flags = vmap_flags; ++ if ((reg->flags & KBASE_REG_CPU_CACHED) && !kbase_mem_is_imported(map->gpu_alloc->type)) ++ map->flags |= KBASE_VMAP_FLAG_SYNC_NEEDED; + -+static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work) -+{ -+ struct kbase_fence_debug_work *w = container_of(work, -+ struct kbase_fence_debug_work, work); -+ struct kbase_jd_atom *katom = w->katom; -+ struct kbase_context *kctx = katom->kctx; ++ if (map->flags & KBASE_VMAP_FLAG_SYNC_NEEDED) ++ kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU); + -+ mutex_lock(&kctx->jctx.lock); -+ kbase_fence_debug_wait_timeout(katom); -+ mutex_unlock(&kctx->jctx.lock); ++ if (vmap_flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) ++ atomic_add(page_count, &kctx->permanent_mapped_pages); + -+ kfree(w); ++ kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc); ++ ++ return 0; +} + -+static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom) ++void *kbase_vmap_reg(struct kbase_context *kctx, struct kbase_va_region *reg, u64 gpu_addr, ++ size_t size, unsigned long prot_request, struct kbase_vmap_struct *map, ++ kbase_vmap_flag vmap_flags) +{ -+ struct kbase_fence_debug_work *work; -+ struct kbase_context *kctx = katom->kctx; ++ u64 offset_bytes; ++ struct kbase_mem_phy_alloc *cpu_alloc; ++ struct kbase_mem_phy_alloc *gpu_alloc; ++ int err; + -+ /* Enqueue fence debug worker. Use job_done_wq to get -+ * debug print ordered with job completion. ++ lockdep_assert_held(&kctx->reg_lock); ++ ++ if (WARN_ON(kbase_is_region_invalid_or_free(reg))) ++ return NULL; ++ ++ /* check access permissions can be satisfied ++ * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} + */ -+ work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC); -+ /* Ignore allocation failure. */ -+ if (work) { -+ work->katom = katom; -+ INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker); -+ queue_work(kctx->jctx.job_done_wq, &work->work); -+ } ++ if ((reg->flags & prot_request) != prot_request) ++ return NULL; ++ ++ offset_bytes = gpu_addr - (reg->start_pfn << PAGE_SHIFT); ++ cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); ++ gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); ++ ++ err = kbase_vmap_phy_pages(kctx, reg, offset_bytes, size, map, vmap_flags); ++ if (err < 0) ++ goto fail_vmap_phy_pages; ++ ++ return map->addr; ++ ++fail_vmap_phy_pages: ++ kbase_mem_phy_alloc_put(cpu_alloc); ++ kbase_mem_phy_alloc_put(gpu_alloc); ++ return NULL; +} -+#endif /* CONFIG_MALI_BIFROST_FENCE_DEBUG */ + -+void kbasep_soft_job_timeout_worker(struct timer_list *timer) ++void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, ++ unsigned long prot_request, struct kbase_vmap_struct *map) +{ -+ struct kbase_context *kctx = container_of(timer, struct kbase_context, -+ soft_job_timeout); -+ u32 timeout_ms = (u32)atomic_read( -+ &kctx->kbdev->js_data.soft_job_timeout_ms); -+ ktime_t cur_time = ktime_get_raw(); -+ bool restarting = false; -+ unsigned long lflags; -+ struct list_head *entry, *tmp; ++ struct kbase_va_region *reg; ++ void *addr = NULL; + -+ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); -+ list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { -+ struct kbase_jd_atom *katom = list_entry(entry, -+ struct kbase_jd_atom, queue); -+ s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time, -+ katom->start_timestamp)); ++ kbase_gpu_vm_lock(kctx); + -+ if (elapsed_time < (s64)timeout_ms) { -+ restarting = true; -+ continue; -+ } ++ reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); ++ if (kbase_is_region_invalid_or_free(reg)) ++ goto out_unlock; + -+ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { -+ case BASE_JD_REQ_SOFT_EVENT_WAIT: -+ /* Take it out of the list to ensure that it -+ * will be cancelled in all cases -+ */ -+ list_del(&katom->queue); ++ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) ++ goto out_unlock; + -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; -+ INIT_WORK(&katom->work, kbasep_soft_event_complete_job); -+ queue_work(kctx->jctx.job_done_wq, &katom->work); -+ break; -+#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG -+ case BASE_JD_REQ_SOFT_FENCE_WAIT: -+ kbase_fence_debug_timeout(katom); -+ break; -+#endif -+ } -+ } ++ addr = kbase_vmap_reg(kctx, reg, gpu_addr, size, prot_request, map, 0u); + -+ if (restarting) -+ mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms)); -+ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); ++out_unlock: ++ kbase_gpu_vm_unlock(kctx); ++ return addr; +} + -+static int kbasep_soft_event_wait(struct kbase_jd_atom *katom) ++void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, ++ struct kbase_vmap_struct *map) +{ -+ struct kbase_context *kctx = katom->kctx; -+ unsigned char status; ++ /* 0 is specified for prot_request to indicate no access checks should ++ * be made. ++ * ++ * As mentioned in kbase_vmap_prot() this means that a kernel-side ++ * CPU-RO mapping is not enforced to allow this to work ++ */ ++ return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map); ++} ++KBASE_EXPORT_TEST_API(kbase_vmap); + -+ /* The status of this soft-job is stored in jc */ -+ if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) { -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; -+ return 0; ++static void kbase_vunmap_phy_pages(struct kbase_context *kctx, ++ struct kbase_vmap_struct *map) ++{ ++ void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK); ++ ++ vunmap(addr); ++ ++ /* If page migration is enabled, decrement the number of VMA mappings ++ * for all physical pages. Now is a good time to do it because references ++ * haven't been released yet. ++ */ ++ if (kbase_page_migration_enabled && !kbase_mem_is_imported(map->gpu_alloc->type)) { ++ const size_t page_count = PFN_UP(map->offset_in_page + map->size); ++ struct tagged_addr *pages_array = map->cpu_pages; ++ ++ kbase_vunmap_phy_pages_migrate_count_decrement(pages_array, page_count); + } + -+ if (status == BASE_JD_SOFT_EVENT_SET) -+ return 0; /* Event already set, nothing to do */ ++ if (map->flags & KBASE_VMAP_FLAG_SYNC_NEEDED) ++ kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE); ++ if (map->flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) { ++ size_t page_count = PFN_UP(map->offset_in_page + map->size); + -+ kbasep_add_waiting_with_timeout(katom); ++ WARN_ON(page_count > atomic_read(&kctx->permanent_mapped_pages)); ++ atomic_sub(page_count, &kctx->permanent_mapped_pages); ++ } + -+ return 1; ++ kbase_mem_phy_alloc_kernel_unmapped(map->cpu_alloc); ++ ++ map->offset_in_page = 0; ++ map->cpu_pages = NULL; ++ map->gpu_pages = NULL; ++ map->addr = NULL; ++ map->size = 0; ++ map->flags = 0; +} + -+static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom, -+ unsigned char new_status) ++void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) +{ -+ /* Complete jobs waiting on the same event */ -+ struct kbase_context *kctx = katom->kctx; ++ kbase_vunmap_phy_pages(kctx, map); ++ map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc); ++ map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc); ++} ++KBASE_EXPORT_TEST_API(kbase_vunmap); + -+ if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) { -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value) ++{ ++#if (KERNEL_VERSION(6, 6, 0) <= LINUX_VERSION_CODE) ++ /* To avoid the build breakage due to the type change in rss_stat, ++ * we inline here the equivalent of 'add_mm_counter()' from linux kernel V6.2. ++ */ ++ percpu_counter_add(&mm->rss_stat[member], value); ++#elif (KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE) ++ /* To avoid the build breakage due to an unexported kernel symbol 'mm_trace_rss_stat', ++ * we inline here the equivalent of 'add_mm_counter()' from linux kernel V5.5. ++ */ ++ atomic_long_add(value, &mm->rss_stat.count[member]); ++#else ++ add_mm_counter(mm, member, value); ++#endif ++} ++ ++void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) ++{ ++ struct mm_struct *mm = kctx->process_mm; ++ ++ if (unlikely(!mm)) + return; -+ } + -+ if (new_status == BASE_JD_SOFT_EVENT_SET) -+ kbasep_complete_triggered_soft_events(kctx, katom->jc); ++ atomic_add(pages, &kctx->nonmapped_pages); ++#ifdef SPLIT_RSS_COUNTING ++ kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); ++#else ++ spin_lock(&mm->page_table_lock); ++ kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); ++ spin_unlock(&mm->page_table_lock); ++#endif +} + -+/** -+ * kbase_soft_event_update() - Update soft event state -+ * @kctx: Pointer to context -+ * @event: Event to update -+ * @new_status: New status value of event -+ * -+ * Update the event, and wake up any atoms waiting for the event. -+ * -+ * Return: 0 on success, a negative error code on failure. -+ */ -+int kbase_soft_event_update(struct kbase_context *kctx, -+ u64 event, -+ unsigned char new_status) ++static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma) +{ -+ int err = 0; ++ if (vma_pages(vma) != 1) ++ return -EINVAL; + -+ mutex_lock(&kctx->jctx.lock); ++ /* no real access */ ++ vm_flags_clear(vma, (VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC)); ++ vm_flags_set(vma, (VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO)); + -+ if (kbasep_write_soft_event_status(kctx, event, new_status)) { -+ err = -ENOENT; -+ goto out; -+ } ++ return 0; ++} + -+ if (new_status == BASE_JD_SOFT_EVENT_SET) -+ kbasep_complete_triggered_soft_events(kctx, event); ++#if MALI_USE_CSF ++static unsigned long get_queue_doorbell_pfn(struct kbase_device *kbdev, ++ struct kbase_queue *queue) ++{ ++ lockdep_assert_held(&kbdev->csf.reg_lock); + -+out: -+ mutex_unlock(&kctx->jctx.lock); ++ /* Return the real Hw doorbell page if queue has been ++ * assigned one, otherwise a dummy page. Always return the ++ * dummy page in no mali builds. ++ */ ++#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ return PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_db_page)); ++#else ++ if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID) ++ return PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_db_page)); ++#endif ++ return (PFN_DOWN(kbdev->reg_start + CSF_HW_DOORBELL_PAGE_OFFSET + ++ (u64)queue->doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE)); ++} + -+ return err; ++static int ++#if (KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE || \ ++ KERNEL_VERSION(5, 11, 0) > LINUX_VERSION_CODE) ++kbase_csf_user_io_pages_vm_mremap(struct vm_area_struct *vma) ++#else ++kbase_csf_user_io_pages_vm_mremap(struct vm_area_struct *vma, unsigned long flags) ++#endif ++{ ++ pr_debug("Unexpected call to mremap method for User IO pages mapping vma\n"); ++ return -EINVAL; +} + -+static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) ++static int kbase_csf_user_io_pages_vm_split(struct vm_area_struct *vma, unsigned long addr) +{ -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; -+ if (kbase_jd_done_nolock(katom, true)) -+ kbase_js_sched_all(katom->kctx->kbdev); ++ pr_debug("Unexpected call to split method for User IO pages mapping vma\n"); ++ return -EINVAL; +} + -+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST -+static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) ++static void kbase_csf_user_io_pages_vm_open(struct vm_area_struct *vma) +{ -+ struct kbase_debug_copy_buffer *buffers = katom->softjob_data; -+ unsigned int i; -+ unsigned int nr = katom->nr_extres; ++ pr_debug("Unexpected call to the open method for User IO pages mapping vma\n"); ++ vma->vm_private_data = NULL; ++} + -+ if (!buffers) ++static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma) ++{ ++ struct kbase_queue *queue = vma->vm_private_data; ++ struct kbase_context *kctx; ++ struct kbase_device *kbdev; ++ int err; ++ bool reset_prevented = false; ++ ++ if (!queue) { ++ pr_debug("Close method called for the new User IO pages mapping vma\n"); + return; ++ } + -+ kbase_gpu_vm_lock(katom->kctx); -+ for (i = 0; i < nr; i++) { -+ int p; -+ struct kbase_mem_phy_alloc *gpu_alloc = buffers[i].gpu_alloc; ++ kctx = queue->kctx; ++ kbdev = kctx->kbdev; + -+ if (!buffers[i].pages) -+ break; -+ for (p = 0; p < buffers[i].nr_pages; p++) { -+ struct page *pg = buffers[i].pages[p]; ++ err = kbase_reset_gpu_prevent_and_wait(kbdev); ++ if (err) ++ dev_warn( ++ kbdev->dev, ++ "Unsuccessful GPU reset detected when unbinding queue (csi_index=%d), attempting to unbind regardless", ++ queue->csi_index); ++ else ++ reset_prevented = true; + -+ if (pg) -+ put_page(pg); -+ } -+ if (buffers[i].is_vmalloc) -+ vfree(buffers[i].pages); -+ else -+ kfree(buffers[i].pages); -+ if (gpu_alloc) { -+ switch (gpu_alloc->type) { -+ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: -+ { -+ kbase_free_user_buffer(&buffers[i]); -+ break; -+ } -+ default: -+ /* Nothing to be done. */ -+ break; -+ } -+ kbase_mem_phy_alloc_put(gpu_alloc); -+ } -+ } -+ kbase_gpu_vm_unlock(katom->kctx); -+ kfree(buffers); ++ mutex_lock(&kctx->csf.lock); ++ kbase_csf_queue_unbind(queue, is_process_exiting(vma)); ++ mutex_unlock(&kctx->csf.lock); + -+ katom->softjob_data = NULL; ++ if (reset_prevented) ++ kbase_reset_gpu_allow(kbdev); ++ ++ /* Now as the vma is closed, drop the reference on mali device file */ ++ fput(kctx->filp); +} + -+static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) ++#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) ++static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_area_struct *vma, ++ struct vm_fault *vmf) +{ -+ struct kbase_debug_copy_buffer *buffers; -+ struct base_jd_debug_copy_buffer *user_buffers = NULL; -+ unsigned int i; -+ unsigned int nr = katom->nr_extres; -+ int ret = 0; -+ void __user *user_structs = (void __user *)(uintptr_t)katom->jc; -+ -+ if (!user_structs) -+ return -EINVAL; ++#else ++static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++#endif ++ struct kbase_queue *queue = vma->vm_private_data; ++ unsigned long doorbell_cpu_addr, input_cpu_addr, output_cpu_addr; ++ unsigned long doorbell_page_pfn, input_page_pfn, output_page_pfn; ++ pgprot_t doorbell_pgprot, input_page_pgprot, output_page_pgprot; ++ size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start); ++ vm_fault_t ret; ++ struct kbase_device *kbdev; ++ struct memory_group_manager_device *mgm_dev; + -+ buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL); -+ if (!buffers) { -+ ret = -ENOMEM; -+ goto out_cleanup; ++ /* Few sanity checks up front */ ++ if (!queue || (nr_pages != BASEP_QUEUE_NR_MMAP_USER_PAGES) || ++ (vma->vm_pgoff != queue->db_file_offset)) { ++ pr_warn("Unexpected CPU page fault on User IO pages mapping for process %s tgid %d pid %d\n", ++ current->comm, current->tgid, current->pid); ++ return VM_FAULT_SIGBUS; + } -+ katom->softjob_data = buffers; + -+ user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL); ++ kbdev = queue->kctx->kbdev; ++ mgm_dev = kbdev->mgm_dev; + -+ if (!user_buffers) { -+ ret = -ENOMEM; -+ goto out_cleanup; -+ } ++ mutex_lock(&kbdev->csf.reg_lock); + -+ ret = copy_from_user(user_buffers, user_structs, -+ sizeof(*user_buffers)*nr); -+ if (ret) { -+ ret = -EFAULT; -+ goto out_cleanup; -+ } ++ /* Always map the doorbell page as uncached */ ++ doorbell_pgprot = pgprot_device(vma->vm_page_prot); + -+ for (i = 0; i < nr; i++) { -+ u64 addr = user_buffers[i].address; -+ u64 page_addr = addr & PAGE_MASK; -+ u64 end_page_addr = addr + user_buffers[i].size - 1; -+ u64 last_page_addr = end_page_addr & PAGE_MASK; -+ int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1; -+ int pinned_pages; -+ struct kbase_va_region *reg; -+ struct base_external_resource user_extres; ++ if (kbdev->system_coherency == COHERENCY_NONE) { ++ input_page_pgprot = pgprot_writecombine(vma->vm_page_prot); ++ output_page_pgprot = pgprot_writecombine(vma->vm_page_prot); ++ } else { ++ input_page_pgprot = vma->vm_page_prot; ++ output_page_pgprot = vma->vm_page_prot; ++ } + -+ if (!addr) -+ continue; ++ doorbell_cpu_addr = vma->vm_start; + -+ if (last_page_addr < page_addr) { -+ ret = -EINVAL; -+ goto out_cleanup; -+ } ++#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE ++ if ((unsigned long)vmf->virtual_address == doorbell_cpu_addr) { ++#else ++ if (vmf->address == doorbell_cpu_addr) { ++#endif ++ doorbell_page_pfn = get_queue_doorbell_pfn(kbdev, queue); ++ ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, ++ KBASE_MEM_GROUP_CSF_IO, vma, doorbell_cpu_addr, ++ doorbell_page_pfn, doorbell_pgprot); ++ } else { ++ /* Map the Input page */ ++ input_cpu_addr = doorbell_cpu_addr + PAGE_SIZE; ++ input_page_pfn = PFN_DOWN(as_phys_addr_t(queue->phys[0])); ++ ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, ++ KBASE_MEM_GROUP_CSF_IO, vma, input_cpu_addr, ++ input_page_pfn, input_page_pgprot); ++ if (ret != VM_FAULT_NOPAGE) ++ goto exit; + -+ buffers[i].nr_pages = nr_pages; -+ buffers[i].offset = addr & ~PAGE_MASK; -+ if (buffers[i].offset >= PAGE_SIZE) { -+ ret = -EINVAL; -+ goto out_cleanup; -+ } -+ buffers[i].size = user_buffers[i].size; ++ /* Map the Output page */ ++ output_cpu_addr = input_cpu_addr + PAGE_SIZE; ++ output_page_pfn = PFN_DOWN(as_phys_addr_t(queue->phys[1])); ++ ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, ++ KBASE_MEM_GROUP_CSF_IO, vma, output_cpu_addr, ++ output_page_pfn, output_page_pgprot); ++ } + -+ if (nr_pages > (KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD / -+ sizeof(struct page *))) { -+ buffers[i].is_vmalloc = true; -+ buffers[i].pages = vzalloc(nr_pages * -+ sizeof(struct page *)); -+ } else { -+ buffers[i].is_vmalloc = false; -+ buffers[i].pages = kcalloc(nr_pages, -+ sizeof(struct page *), GFP_KERNEL); -+ } ++exit: ++ mutex_unlock(&kbdev->csf.reg_lock); ++ return ret; ++} + -+ if (!buffers[i].pages) { -+ ret = -ENOMEM; -+ goto out_cleanup; -+ } ++static const struct vm_operations_struct kbase_csf_user_io_pages_vm_ops = { ++ .open = kbase_csf_user_io_pages_vm_open, ++ .close = kbase_csf_user_io_pages_vm_close, ++#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE ++ .may_split = kbase_csf_user_io_pages_vm_split, ++#else ++ .split = kbase_csf_user_io_pages_vm_split, ++#endif ++ .mremap = kbase_csf_user_io_pages_vm_mremap, ++ .fault = kbase_csf_user_io_pages_vm_fault ++}; + -+ pinned_pages = get_user_pages_fast(page_addr, -+ nr_pages, -+ 1, /* Write */ -+ buffers[i].pages); -+ if (pinned_pages < 0) { -+ /* get_user_pages_fast has failed - page array is not -+ * valid. Don't try to release any pages. -+ */ -+ buffers[i].nr_pages = 0; ++/* Program the client process's page table entries to map the pair of ++ * input/output pages & Hw doorbell page. The caller should have validated that ++ * vma->vm_pgoff maps to the range of csf cookies. ++ */ ++static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, ++ struct vm_area_struct *vma) ++{ ++ unsigned long cookie = ++ vma->vm_pgoff - PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE); ++ size_t nr_pages = vma_pages(vma); ++ struct kbase_queue *queue; ++ int err = 0; + -+ ret = pinned_pages; -+ goto out_cleanup; -+ } -+ if (pinned_pages != nr_pages) { -+ /* Adjust number of pages, so that we only attempt to -+ * release pages in the array that we know are valid. -+ */ -+ buffers[i].nr_pages = pinned_pages; ++ lockdep_assert_held(&kctx->csf.lock); + -+ ret = -EINVAL; -+ goto out_cleanup; -+ } ++ queue = kctx->csf.user_pages_info[cookie]; + -+ user_extres = user_buffers[i].extres; -+ if (user_extres.ext_resource == 0ULL) { -+ ret = -EINVAL; -+ goto out_cleanup; -+ } ++ /* Looks like the bind has been aborted */ ++ if (!queue) ++ return -EINVAL; + -+ kbase_gpu_vm_lock(katom->kctx); -+ reg = kbase_region_tracker_find_region_enclosing_address( -+ katom->kctx, user_extres.ext_resource & -+ ~BASE_EXT_RES_ACCESS_EXCLUSIVE); ++ if (WARN_ON(test_bit(cookie, kctx->csf.cookies))) ++ return -EINVAL; + -+ if (kbase_is_region_invalid_or_free(reg) || -+ reg->gpu_alloc == NULL) { -+ ret = -EINVAL; -+ goto out_unlock; -+ } ++ /* no need for the cookie anymore */ ++ kctx->csf.user_pages_info[cookie] = NULL; ++ bitmap_set(kctx->csf.cookies, cookie, 1); + -+ buffers[i].gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); -+ buffers[i].nr_extres_pages = reg->nr_pages; ++ /* Reset the handle to avoid (re)freeing the cookie (which can ++ * now get re-assigned) on unbind. ++ */ ++ queue->handle = BASEP_MEM_INVALID_HANDLE; + -+ if (reg->nr_pages*PAGE_SIZE != buffers[i].size) -+ dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n"); ++ if (nr_pages != BASEP_QUEUE_NR_MMAP_USER_PAGES) { ++ err = -EINVAL; ++ goto map_failed; ++ } + -+ switch (reg->gpu_alloc->type) { -+ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: -+ { -+ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; -+ const unsigned long nr_pages = alloc->imported.user_buf.nr_pages; -+ const unsigned long start = alloc->imported.user_buf.address; ++ err = kbase_csf_alloc_command_stream_user_pages(kctx, queue); ++ if (err) ++ goto map_failed; + -+ if (alloc->imported.user_buf.mm != current->mm) { -+ ret = -EINVAL; -+ goto out_unlock; -+ } -+ buffers[i].extres_pages = kcalloc(nr_pages, -+ sizeof(struct page *), GFP_KERNEL); -+ if (!buffers[i].extres_pages) { -+ ret = -ENOMEM; -+ goto out_unlock; -+ } -+ kbase_gpu_vm_unlock(katom->kctx); -+ ret = get_user_pages_fast(start, nr_pages, 0, buffers[i].extres_pages); -+ kbase_gpu_vm_lock(katom->kctx); -+ if (ret != nr_pages) { -+ /* Adjust number of pages, so that we only -+ * attempt to release pages in the array that we -+ * know are valid. -+ */ -+ if (ret < 0) -+ buffers[i].nr_extres_pages = 0; -+ else -+ buffers[i].nr_extres_pages = ret; ++ vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO); ++ /* TODO use VM_MIXEDMAP, since it is more appropriate as both types of ++ * memory with and without "struct page" backing are being inserted here. ++ * Hw Doorbell pages comes from the device register area so kernel does ++ * not use "struct page" for them. ++ */ ++ vm_flags_set(vma, VM_PFNMAP); + -+ goto out_unlock; -+ } -+ ret = 0; -+ break; -+ } -+ default: -+ /* Nothing to be done. */ -+ break; -+ } -+ kbase_gpu_vm_unlock(katom->kctx); -+ } -+ kfree(user_buffers); ++ vma->vm_ops = &kbase_csf_user_io_pages_vm_ops; ++ vma->vm_private_data = queue; + -+ return ret; ++ /* Make vma point to the special internal file, but don't drop the ++ * reference on mali device file (that would be done later when the ++ * vma is closed). ++ */ ++ vma->vm_file = kctx->kbdev->csf.db_filp; ++ get_file(vma->vm_file); ++ /* Also adjust the vm_pgoff */ ++ vma->vm_pgoff = queue->db_file_offset; + -+out_unlock: -+ kbase_gpu_vm_unlock(katom->kctx); ++ return 0; + -+out_cleanup: -+ /* Frees allocated memory for kbase_debug_copy_job struct, including -+ * members, and sets jc to 0 ++map_failed: ++ /* The queue cannot have got to KBASE_CSF_QUEUE_BOUND state if we ++ * reached here, so safe to use a variant of unbind that only works on ++ * stopped queues ++ * ++ * This is so we don't enter the CSF scheduler from this path. + */ -+ kbase_debug_copy_finish(katom); -+ kfree(user_buffers); ++ kbase_csf_queue_unbind_stopped(queue); + -+ return ret; ++ return err; +} + -+#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE -+static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, -+ unsigned long page_num, struct page **page) ++/** ++ * kbase_csf_user_reg_vm_open - VMA open function for the USER page ++ * ++ * @vma: Pointer to the struct containing information about ++ * the userspace mapping of USER page. ++ * Note: ++ * This function isn't expected to be called. If called (i.e> mremap), ++ * set private_data as NULL to indicate to close() and fault() functions. ++ */ ++static void kbase_csf_user_reg_vm_open(struct vm_area_struct *vma) +{ -+ struct sg_table *sgt = gpu_alloc->imported.umm.sgt; -+ struct sg_page_iter sg_iter; -+ unsigned long page_index = 0; ++ pr_debug("Unexpected call to the open method for USER register mapping"); ++ vma->vm_private_data = NULL; ++} + -+ if (WARN_ON(gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)) -+ return NULL; ++/** ++ * kbase_csf_user_reg_vm_close - VMA close function for the USER page ++ * ++ * @vma: Pointer to the struct containing information about ++ * the userspace mapping of USER page. ++ */ ++static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) ++{ ++ struct kbase_context *kctx = vma->vm_private_data; ++ struct kbase_device *kbdev; + -+ if (!sgt) -+ return NULL; ++ if (unlikely(!kctx)) { ++ pr_debug("Close function called for the unexpected mapping"); ++ return; ++ } + -+ if (WARN_ON(page_num >= gpu_alloc->nents)) -+ return NULL; ++ kbdev = kctx->kbdev; + -+ for_each_sg_page(sgt->sgl, &sg_iter, sgt->nents, 0) { -+ if (page_index == page_num) { -+ *page = sg_page_iter_page(&sg_iter); ++ if (unlikely(!kctx->csf.user_reg.vma)) ++ dev_warn(kbdev->dev, "user_reg VMA pointer unexpectedly NULL for ctx %d_%d", ++ kctx->tgid, kctx->id); + -+ return kmap(*page); -+ } -+ page_index++; -+ } ++ mutex_lock(&kbdev->csf.reg_lock); ++ list_del_init(&kctx->csf.user_reg.link); ++ mutex_unlock(&kbdev->csf.reg_lock); + -+ return NULL; ++ kctx->csf.user_reg.vma = NULL; ++ ++ /* Now as the VMA is closed, drop the reference on mali device file */ ++ fput(kctx->filp); +} -+#endif + +/** -+ * kbase_mem_copy_from_extres() - Copy from external resources. ++ * kbase_csf_user_reg_vm_mremap - VMA mremap function for the USER page + * -+ * @kctx: kbase context within which the copying is to take place. -+ * @buf_data: Pointer to the information about external resources: -+ * pages pertaining to the external resource, number of -+ * pages to copy. ++ * @vma: Pointer to the struct containing information about ++ * the userspace mapping of USER page. + * -+ * Return: 0 on success, error code otherwise. ++ * Return: -EINVAL ++ * ++ * Note: ++ * User space must not attempt mremap on USER page mapping. ++ * This function will return an error to fail the attempt. + */ -+static int kbase_mem_copy_from_extres(struct kbase_context *kctx, -+ struct kbase_debug_copy_buffer *buf_data) ++static int ++#if ((KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE) || \ ++ (KERNEL_VERSION(5, 11, 0) > LINUX_VERSION_CODE)) ++kbase_csf_user_reg_vm_mremap(struct vm_area_struct *vma) ++#else ++kbase_csf_user_reg_vm_mremap(struct vm_area_struct *vma, unsigned long flags) ++#endif +{ -+ unsigned int i; -+ unsigned int target_page_nr = 0; -+ struct page **pages = buf_data->pages; -+ u64 offset = buf_data->offset; -+ size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE; -+ size_t to_copy = min(extres_size, buf_data->size); -+ struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc; -+ int ret = 0; -+ size_t dma_to_copy; ++ pr_debug("Unexpected call to mremap method for USER page mapping vma\n"); ++ return -EINVAL; ++} + -+ KBASE_DEBUG_ASSERT(pages != NULL); ++#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) ++static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_area_struct *vma, ++ struct vm_fault *vmf) ++{ ++#else ++static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++#endif ++ struct kbase_context *kctx = vma->vm_private_data; ++ struct kbase_device *kbdev; ++ struct memory_group_manager_device *mgm_dev; ++ unsigned long pfn; ++ size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start); ++ vm_fault_t ret = VM_FAULT_SIGBUS; ++ unsigned long flags; + -+ kbase_gpu_vm_lock(kctx); -+ if (!gpu_alloc) { -+ ret = -EINVAL; -+ goto out_unlock; ++ /* Few sanity checks up front */ ++ ++ if (!kctx || (nr_pages != 1) || (vma != kctx->csf.user_reg.vma) || ++ (vma->vm_pgoff != kctx->csf.user_reg.file_offset)) { ++ pr_err("Unexpected CPU page fault on USER page mapping for process %s tgid %d pid %d\n", ++ current->comm, current->tgid, current->pid); ++ return VM_FAULT_SIGBUS; + } + -+ switch (gpu_alloc->type) { -+ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: -+ { -+ for (i = 0; i < buf_data->nr_extres_pages && -+ target_page_nr < buf_data->nr_pages; i++) { -+ struct page *pg = buf_data->extres_pages[i]; -+ void *extres_page = kmap(pg); ++ kbdev = kctx->kbdev; ++ mgm_dev = kbdev->mgm_dev; ++ pfn = PFN_DOWN(kbdev->reg_start + USER_BASE); + -+ if (extres_page) { -+ ret = kbase_mem_copy_to_pinned_user_pages( -+ pages, extres_page, &to_copy, -+ buf_data->nr_pages, -+ &target_page_nr, offset); -+ kunmap(pg); -+ if (ret) -+ goto out_unlock; -+ } -+ } -+ } -+ break; -+ case KBASE_MEM_TYPE_IMPORTED_UMM: { -+ struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf; -+ -+ KBASE_DEBUG_ASSERT(dma_buf != NULL); -+ if (dma_buf->size > buf_data->nr_extres_pages * PAGE_SIZE) -+ dev_warn(kctx->kbdev->dev, "External resources buffer size mismatch"); -+ -+ dma_to_copy = min(dma_buf->size, -+ (size_t)(buf_data->nr_extres_pages * PAGE_SIZE)); -+ ret = dma_buf_begin_cpu_access(dma_buf, DMA_FROM_DEVICE); -+ if (ret) -+ goto out_unlock; -+ -+ for (i = 0; i < dma_to_copy/PAGE_SIZE && -+ target_page_nr < buf_data->nr_pages; i++) { -+#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE -+ struct page *pg; -+ void *extres_page = dma_buf_kmap_page(gpu_alloc, i, &pg); -+#else -+ void *extres_page = dma_buf_kmap(dma_buf, i); -+#endif -+ if (extres_page) { -+ ret = kbase_mem_copy_to_pinned_user_pages( -+ pages, extres_page, &to_copy, -+ buf_data->nr_pages, -+ &target_page_nr, offset); -+ -+#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE -+ kunmap(pg); -+#else -+ dma_buf_kunmap(dma_buf, i, extres_page); -+#endif -+ if (ret) -+ break; -+ } -+ } -+ dma_buf_end_cpu_access(dma_buf, DMA_FROM_DEVICE); -+ break; -+ } -+ default: -+ ret = -EINVAL; -+ } -+out_unlock: -+ kbase_gpu_vm_unlock(kctx); -+ return ret; -+} -+ -+static int kbase_debug_copy(struct kbase_jd_atom *katom) -+{ -+ struct kbase_debug_copy_buffer *buffers = katom->softjob_data; -+ unsigned int i; -+ -+ if (WARN_ON(!buffers)) -+ return -EINVAL; -+ -+ for (i = 0; i < katom->nr_extres; i++) { -+ int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]); -+ -+ if (res) -+ return res; -+ } -+ -+ return 0; -+} -+#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ -+#endif /* !MALI_USE_CSF */ -+ -+#define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7) -+ -+int kbasep_jit_alloc_validate(struct kbase_context *kctx, -+ struct base_jit_alloc_info *info) -+{ -+ int j; -+ /* If the ID is zero, then fail the job */ -+ if (info->id == 0) -+ return -EINVAL; -+ -+ /* Sanity check that the PA fits within the VA */ -+ if (info->va_pages < info->commit_pages) -+ return -EINVAL; -+ -+ /* Ensure the GPU address is correctly aligned */ -+ if ((info->gpu_alloc_addr & KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT) != 0) -+ return -EINVAL; ++ mutex_lock(&kbdev->csf.reg_lock); + -+ /* Interface version 2 (introduced with kernel driver version 11.5) -+ * onward has padding and a flags member to validate. ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ /* Dummy page will be mapped during GPU off. + * -+ * Note: To support earlier versions the extra bytes will have been set -+ * to 0 by the caller. -+ */ -+ -+ /* Check padding is all zeroed */ -+ for (j = 0; j < sizeof(info->padding); j++) { -+ if (info->padding[j] != 0) -+ return -EINVAL; -+ } -+ -+ /* Only valid flags shall be set */ -+ if (info->flags & ~(BASE_JIT_ALLOC_VALID_FLAGS)) -+ return -EINVAL; -+ -+#if !MALI_JIT_PRESSURE_LIMIT_BASE -+ /* If just-in-time memory allocation pressure limit feature is disabled, -+ * heap_info_gpu_addr must be zeroed-out -+ */ -+ if (info->heap_info_gpu_addr) -+ return -EINVAL; -+#endif -+ -+#if !MALI_USE_CSF -+ /* If BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE is set, heap_info_gpu_addr -+ * cannot be 0 -+ */ -+ if ((info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) && -+ !info->heap_info_gpu_addr) -+ return -EINVAL; -+#endif /* !MALI_USE_CSF */ -+ -+ return 0; -+} -+ -+#if !MALI_USE_CSF -+ -+static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) -+{ -+ __user u8 *data = (__user u8 *)(uintptr_t) katom->jc; -+ struct base_jit_alloc_info *info; -+ struct kbase_context *kctx = katom->kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ u32 count; -+ int ret; -+ u32 i; -+ -+ if (!kbase_mem_allow_alloc(kctx)) { -+ dev_dbg(kbdev->dev, "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d", -+ current->comm, current->pid, kctx->tgid, kctx->id); -+ ret = -EINVAL; -+ goto fail; -+ } -+ -+ /* For backwards compatibility, and to prevent reading more than 1 jit -+ * info struct on jit version 1 ++ * In no mail builds, always map in the dummy page. + */ -+ if (katom->nr_extres == 0) -+ katom->nr_extres = 1; -+ count = katom->nr_extres; -+ -+ /* Sanity checks */ -+ if (!data || count > kctx->jit_max_allocations || -+ count > ARRAY_SIZE(kctx->jit_alloc)) { -+ ret = -EINVAL; -+ goto fail; -+ } -+ -+ /* Copy the information for safe access and future storage */ -+ info = kmalloc_array(count, sizeof(*info), GFP_KERNEL); -+ if (!info) { -+ ret = -ENOMEM; -+ goto fail; -+ } -+ -+ katom->softjob_data = info; -+ -+ for (i = 0; i < count; i++, info++, data += sizeof(*info)) { -+ if (copy_from_user(info, data, sizeof(*info)) != 0) { -+ ret = -EINVAL; -+ goto free_info; -+ } -+ -+ ret = kbasep_jit_alloc_validate(kctx, info); -+ if (ret) -+ goto free_info; -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO( -+ kbdev, katom, info->va_pages, info->commit_pages, -+ info->extension, info->id, info->bin_id, -+ info->max_allocations, info->flags, info->usage_id); -+ } -+ -+ katom->jit_blocked = false; ++ if (IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) || !kbdev->pm.backend.gpu_powered) ++ pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.user_reg.dummy_page)); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ lockdep_assert_held(&kctx->jctx.lock); -+ list_add_tail(&katom->jit_node, &kctx->jctx.jit_atoms_head); ++ list_move_tail(&kctx->csf.user_reg.link, &kbdev->csf.user_reg.list); ++ ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, ++ KBASE_MEM_GROUP_CSF_FW, vma, ++ vma->vm_start, pfn, ++ vma->vm_page_prot); + -+ /* -+ * Note: -+ * The provided info->gpu_alloc_addr isn't validated here as -+ * userland can cache allocations which means that even -+ * though the region is valid it doesn't represent the -+ * same thing it used to. -+ * -+ * Complete validation of va_pages, commit_pages and extension -+ * isn't done here as it will be done during the call to -+ * kbase_mem_alloc. -+ */ -+ return 0; ++ mutex_unlock(&kbdev->csf.reg_lock); + -+free_info: -+ kfree(katom->softjob_data); -+ katom->softjob_data = NULL; -+fail: + return ret; +} + -+static u8 *kbase_jit_free_get_ids(struct kbase_jd_atom *katom) -+{ -+ if (WARN_ON((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) != -+ BASE_JD_REQ_SOFT_JIT_FREE)) -+ return NULL; -+ -+ return (u8 *) katom->softjob_data; -+} -+ -+static void kbase_jit_add_to_pending_alloc_list(struct kbase_jd_atom *katom) -+{ -+ struct kbase_context *kctx = katom->kctx; -+ struct list_head *target_list_head = NULL; -+ struct kbase_jd_atom *entry; -+ -+ list_for_each_entry(entry, &kctx->jctx.jit_pending_alloc, queue) { -+ if (katom->age < entry->age) { -+ target_list_head = &entry->queue; -+ break; -+ } -+ } -+ -+ if (target_list_head == NULL) -+ target_list_head = &kctx->jctx.jit_pending_alloc; -+ -+ list_add_tail(&katom->queue, target_list_head); -+} -+ -+static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) -+{ -+ struct kbase_context *kctx = katom->kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct base_jit_alloc_info *info; -+ struct kbase_va_region *reg; -+ struct kbase_vmap_struct mapping; -+ u64 *ptr, new_addr; -+ u32 count = katom->nr_extres; -+ u32 i; -+ bool ignore_pressure_limit = false; -+ -+ trace_sysgraph(SGR_SUBMIT, kctx->id, -+ kbase_jd_atom_id(kctx, katom)); -+ -+ if (katom->jit_blocked) { -+ list_del(&katom->queue); -+ katom->jit_blocked = false; -+ } -+ -+ info = katom->softjob_data; -+ if (WARN_ON(!info)) { -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ return 0; -+ } -+ -+ for (i = 0; i < count; i++, info++) { -+ /* The JIT ID is still in use so fail the allocation */ -+ if (kctx->jit_alloc[info->id]) { -+ katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; -+ return 0; -+ } -+ } -+ -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ /* -+ * If this is the only JIT_ALLOC atom in-flight or if JIT pressure limit -+ * is disabled at the context scope, then bypass JIT pressure limit -+ * logic in kbase_jit_allocate(). -+ */ -+ if (!kbase_ctx_flag(kctx, KCTX_JPL_ENABLED) -+ || (kctx->jit_current_allocations == 0)) { -+ ignore_pressure_limit = true; -+ } -+#else -+ ignore_pressure_limit = true; -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -+ -+ for (i = 0, info = katom->softjob_data; i < count; i++, info++) { -+ if (kctx->jit_alloc[info->id]) { -+ /* The JIT ID is duplicated in this atom. Roll back -+ * previous allocations and fail. -+ */ -+ u32 j; -+ -+ info = katom->softjob_data; -+ for (j = 0; j < i; j++, info++) { -+ kbase_jit_free(kctx, kctx->jit_alloc[info->id]); -+ kctx->jit_alloc[info->id] = -+ KBASE_RESERVED_REG_JIT_ALLOC; -+ } -+ -+ katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; -+ return 0; -+ } -+ -+ /* Create a JIT allocation */ -+ reg = kbase_jit_allocate(kctx, info, ignore_pressure_limit); -+ if (!reg) { -+ struct kbase_jd_atom *jit_atom; -+ bool can_block = false; -+ -+ lockdep_assert_held(&kctx->jctx.lock); -+ -+ list_for_each_entry(jit_atom, &kctx->jctx.jit_atoms_head, jit_node) { -+ if (jit_atom == katom) -+ break; -+ -+ if ((jit_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == -+ BASE_JD_REQ_SOFT_JIT_FREE) { -+ u8 *free_ids = kbase_jit_free_get_ids(jit_atom); -+ -+ if (free_ids && *free_ids && -+ kctx->jit_alloc[*free_ids]) { -+ /* A JIT free which is active and -+ * submitted before this atom -+ */ -+ can_block = true; -+ break; -+ } -+ } -+ } -+ -+ if (!can_block) { -+ /* Mark the failed allocation as well as the -+ * other un-attempted allocations in the set, -+ * so we know they are in use even if the -+ * allocation itself failed. -+ */ -+ for (; i < count; i++, info++) { -+ kctx->jit_alloc[info->id] = -+ KBASE_RESERVED_REG_JIT_ALLOC; -+ } -+ -+ katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; -+ dev_warn_ratelimited(kbdev->dev, "JIT alloc softjob failed: atom id %d\n", -+ kbase_jd_atom_id(kctx, katom)); -+ return 0; -+ } -+ -+ /* There are pending frees for an active allocation -+ * so we should wait to see whether they free the -+ * memory. Add to the list of atoms for which JIT -+ * allocation is pending. -+ */ -+ kbase_jit_add_to_pending_alloc_list(katom); -+ katom->jit_blocked = true; -+ -+ /* Rollback, the whole set will be re-attempted */ -+ while (i-- > 0) { -+ info--; -+ kbase_jit_free(kctx, kctx->jit_alloc[info->id]); -+ kctx->jit_alloc[info->id] = NULL; -+ } -+ -+ return 1; -+ } -+ -+ /* Bind it to the user provided ID. */ -+ kctx->jit_alloc[info->id] = reg; -+ } -+ -+ for (i = 0, info = katom->softjob_data; i < count; i++, info++) { -+ u64 entry_mmu_flags = 0; -+ /* -+ * Write the address of the JIT allocation to the user provided -+ * GPU allocation. -+ */ -+ ptr = kbase_vmap_prot(kctx, info->gpu_alloc_addr, sizeof(*ptr), -+ KBASE_REG_CPU_WR, &mapping); -+ if (!ptr) { -+ /* -+ * Leave the allocations "live" as the JIT free atom -+ * will be submitted anyway. -+ */ -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ return 0; -+ } -+ -+ reg = kctx->jit_alloc[info->id]; -+ new_addr = reg->start_pfn << PAGE_SHIFT; -+ *ptr = new_addr; -+ -+#if defined(CONFIG_MALI_VECTOR_DUMP) -+ /* -+ * Retrieve the mmu flags for JIT allocation -+ * only if dumping is enabled -+ */ -+ entry_mmu_flags = kbase_mmu_create_ate(kbdev, -+ (struct tagged_addr){ 0 }, reg->flags, -+ MIDGARD_MMU_BOTTOMLEVEL, kctx->jit_group_id); -+#endif -+ -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT( -+ kbdev, katom, info->gpu_alloc_addr, new_addr, -+ info->flags, entry_mmu_flags, info->id, -+ info->commit_pages, info->extension, info->va_pages); -+ kbase_vunmap(kctx, &mapping); -+ -+ kbase_trace_jit_report_gpu_mem(kctx, reg, -+ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); -+ } -+ -+ katom->event_code = BASE_JD_EVENT_DONE; -+ -+ return 0; -+} -+ -+static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom) -+{ -+ struct base_jit_alloc_info *info; -+ -+ lockdep_assert_held(&katom->kctx->jctx.lock); -+ -+ if (WARN_ON(!katom->softjob_data)) -+ return; -+ -+ /* Remove atom from jit_atoms_head list */ -+ list_del(&katom->jit_node); -+ -+ if (katom->jit_blocked) { -+ list_del(&katom->queue); -+ katom->jit_blocked = false; -+ } -+ -+ info = katom->softjob_data; -+ /* Free the info structure */ -+ kfree(info); -+} ++static const struct vm_operations_struct kbase_csf_user_reg_vm_ops = { ++ .open = kbase_csf_user_reg_vm_open, ++ .close = kbase_csf_user_reg_vm_close, ++ .mremap = kbase_csf_user_reg_vm_mremap, ++ .fault = kbase_csf_user_reg_vm_fault ++}; + -+static int kbase_jit_free_prepare(struct kbase_jd_atom *katom) ++static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, ++ struct vm_area_struct *vma) +{ -+ struct kbase_context *kctx = katom->kctx; ++ size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start); + struct kbase_device *kbdev = kctx->kbdev; -+ __user void *data = (__user void *)(uintptr_t) katom->jc; -+ u8 *ids; -+ u32 count = MAX(katom->nr_extres, 1); -+ u32 i; -+ int ret; -+ -+ /* Sanity checks */ -+ if (count > ARRAY_SIZE(kctx->jit_alloc)) { -+ ret = -EINVAL; -+ goto fail; -+ } -+ -+ /* Copy the information for safe access and future storage */ -+ ids = kmalloc_array(count, sizeof(*ids), GFP_KERNEL); -+ if (!ids) { -+ ret = -ENOMEM; -+ goto fail; -+ } -+ -+ lockdep_assert_held(&kctx->jctx.lock); -+ katom->softjob_data = ids; -+ -+ /* For backwards compatibility */ -+ if (katom->nr_extres) { -+ /* Fail the job if there is no list of ids */ -+ if (!data) { -+ ret = -EINVAL; -+ goto free_info; -+ } -+ -+ if (copy_from_user(ids, data, sizeof(*ids)*count) != 0) { -+ ret = -EINVAL; -+ goto free_info; -+ } -+ } else { -+ katom->nr_extres = 1; -+ *ids = (u8)katom->jc; -+ } -+ for (i = 0; i < count; i++) -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO(kbdev, katom, ids[i]); -+ -+ list_add_tail(&katom->jit_node, &kctx->jctx.jit_atoms_head); -+ -+ return 0; -+ -+free_info: -+ kfree(katom->softjob_data); -+ katom->softjob_data = NULL; -+fail: -+ return ret; -+} -+ -+static void kbase_jit_free_process(struct kbase_jd_atom *katom) -+{ -+ struct kbase_context *kctx = katom->kctx; -+ u8 *ids = kbase_jit_free_get_ids(katom); -+ u32 count = katom->nr_extres; -+ u32 i; -+ -+ if (ids == NULL) { -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ return; -+ } -+ -+ for (i = 0; i < count; i++, ids++) { -+ /* -+ * If the ID is zero or it is not in use yet then fail the job. -+ */ -+ if ((*ids == 0) || (kctx->jit_alloc[*ids] == NULL)) { -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ return; -+ } -+ } -+} -+ -+static void kbasep_jit_finish_worker(struct work_struct *work) -+{ -+ struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, -+ work); -+ struct kbase_context *kctx = katom->kctx; -+ int resched; -+ -+ mutex_lock(&kctx->jctx.lock); -+ kbase_finish_soft_job(katom); -+ resched = kbase_jd_done_nolock(katom, true); -+ mutex_unlock(&kctx->jctx.lock); -+ -+ if (resched) -+ kbase_js_sched_all(kctx->kbdev); -+} -+ -+void kbase_jit_retry_pending_alloc(struct kbase_context *kctx) -+{ -+ LIST_HEAD(jit_pending_alloc_list); -+ struct list_head *i, *tmp; -+ -+ list_splice_tail_init(&kctx->jctx.jit_pending_alloc, -+ &jit_pending_alloc_list); -+ -+ list_for_each_safe(i, tmp, &jit_pending_alloc_list) { -+ struct kbase_jd_atom *pending_atom = list_entry(i, -+ struct kbase_jd_atom, queue); -+ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kctx->kbdev, pending_atom); -+ kbase_kinstr_jm_atom_sw_start(pending_atom); -+ if (kbase_jit_allocate_process(pending_atom) == 0) { -+ /* Atom has completed */ -+ INIT_WORK(&pending_atom->work, -+ kbasep_jit_finish_worker); -+ queue_work(kctx->jctx.job_done_wq, &pending_atom->work); -+ } -+ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kctx->kbdev, pending_atom); -+ kbase_kinstr_jm_atom_sw_stop(pending_atom); -+ } -+} -+ -+static void kbase_jit_free_finish(struct kbase_jd_atom *katom) -+{ -+ struct kbase_context *kctx = katom->kctx; -+ u8 *ids; -+ size_t j; -+ -+ lockdep_assert_held(&kctx->jctx.lock); -+ -+ ids = kbase_jit_free_get_ids(katom); -+ if (WARN_ON(ids == NULL)) -+ return; -+ -+ /* Remove this atom from the jit_atoms_head list */ -+ list_del(&katom->jit_node); -+ -+ for (j = 0; j != katom->nr_extres; ++j) { -+ if ((ids[j] != 0) && (kctx->jit_alloc[ids[j]] != NULL)) { -+ /* -+ * If the ID is valid but the allocation request failed -+ * still succeed this soft job but don't try and free -+ * the allocation. -+ */ -+ if (kctx->jit_alloc[ids[j]] != -+ KBASE_RESERVED_REG_JIT_ALLOC) { -+ KBASE_TLSTREAM_TL_JIT_USEDPAGES(kctx->kbdev, -+ kctx->jit_alloc[ids[j]]-> -+ gpu_alloc->nents, ids[j]); -+ kbase_jit_free(kctx, kctx->jit_alloc[ids[j]]); -+ } -+ kctx->jit_alloc[ids[j]] = NULL; -+ } -+ } -+ /* Free the list of ids */ -+ kfree(ids); -+ -+ kbase_jit_retry_pending_alloc(kctx); -+} -+ -+static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) -+{ -+ __user struct base_external_resource_list *user_ext_res; -+ struct base_external_resource_list *ext_res; -+ u64 count = 0; -+ size_t copy_size; + -+ user_ext_res = (__user struct base_external_resource_list *) -+ (uintptr_t) katom->jc; ++ /* Few sanity checks */ ++ if (kctx->csf.user_reg.vma) ++ return -EBUSY; + -+ /* Fail the job if there is no info structure */ -+ if (!user_ext_res) ++ if (nr_pages != 1) + return -EINVAL; + -+ if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) -+ return -EINVAL; ++ if (vma->vm_flags & (VM_WRITE | VM_MAYWRITE)) ++ return -EPERM; + -+ /* Is the number of external resources in range? */ -+ if (!count || count > BASE_EXT_RES_COUNT_MAX) -+ return -EINVAL; ++ /* Map uncached */ ++ vma->vm_page_prot = pgprot_device(vma->vm_page_prot); + -+ /* Copy the information for safe access and future storage */ -+ copy_size = sizeof(*ext_res); -+ copy_size += sizeof(struct base_external_resource) * (count - 1); -+ ext_res = memdup_user(user_ext_res, copy_size); -+ if (IS_ERR(ext_res)) -+ return PTR_ERR(ext_res); ++ vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO; + -+ /* -+ * Overwrite the count with the first value incase it was changed -+ * after the fact. ++ /* User register page comes from the device register area so ++ * "struct page" isn't available for it. + */ -+ ext_res->count = count; -+ -+ katom->softjob_data = ext_res; -+ -+ return 0; -+} -+ -+static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) -+{ -+ struct base_external_resource_list *ext_res; -+ int i; -+ bool failed = false; -+ -+ ext_res = katom->softjob_data; -+ if (!ext_res) -+ goto failed_jc; -+ -+ kbase_gpu_vm_lock(katom->kctx); ++ vma->vm_flags |= VM_PFNMAP; + -+ for (i = 0; i < ext_res->count; i++) { -+ u64 gpu_addr; ++ kctx->csf.user_reg.vma = vma; + -+ gpu_addr = ext_res->ext_res[i].ext_resource & -+ ~BASE_EXT_RES_ACCESS_EXCLUSIVE; -+ if (map) { -+ if (!kbase_sticky_resource_acquire(katom->kctx, -+ gpu_addr)) -+ goto failed_loop; -+ } else { -+ if (!kbase_sticky_resource_release_force(katom->kctx, NULL, -+ gpu_addr)) -+ failed = true; -+ } -+ } ++ mutex_lock(&kbdev->csf.reg_lock); ++ kctx->csf.user_reg.file_offset = kbdev->csf.user_reg.file_offset++; ++ mutex_unlock(&kbdev->csf.reg_lock); + -+ /* -+ * In the case of unmap we continue unmapping other resources in the -+ * case of failure but will always report failure if _any_ unmap -+ * request fails. ++ /* Make VMA point to the special internal file, but don't drop the ++ * reference on mali device file (that would be done later when the ++ * VMA is closed). + */ -+ if (failed) -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ else -+ katom->event_code = BASE_JD_EVENT_DONE; -+ -+ kbase_gpu_vm_unlock(katom->kctx); -+ -+ return; -+ -+failed_loop: -+ while (i > 0) { -+ u64 const gpu_addr = ext_res->ext_res[i - 1].ext_resource & -+ ~BASE_EXT_RES_ACCESS_EXCLUSIVE; -+ -+ kbase_sticky_resource_release_force(katom->kctx, NULL, gpu_addr); -+ -+ --i; -+ } -+ -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ kbase_gpu_vm_unlock(katom->kctx); -+ -+failed_jc: -+ return; -+} -+ -+static void kbase_ext_res_finish(struct kbase_jd_atom *katom) -+{ -+ struct base_external_resource_list *ext_res; -+ -+ ext_res = katom->softjob_data; -+ /* Free the info structure */ -+ kfree(ext_res); -+} -+ -+int kbase_process_soft_job(struct kbase_jd_atom *katom) -+{ -+ int ret = 0; -+ struct kbase_context *kctx = katom->kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ -+ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kbdev, katom); -+ kbase_kinstr_jm_atom_sw_start(katom); -+ -+ trace_sysgraph(SGR_SUBMIT, kctx->id, -+ kbase_jd_atom_id(kctx, katom)); -+ -+ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { -+ case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: -+ ret = kbase_dump_cpu_gpu_time(katom); -+ break; -+ -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ case BASE_JD_REQ_SOFT_FENCE_TRIGGER: -+ katom->event_code = kbase_sync_fence_out_trigger(katom, -+ katom->event_code == BASE_JD_EVENT_DONE ? -+ 0 : -EFAULT); -+ break; -+ case BASE_JD_REQ_SOFT_FENCE_WAIT: -+ { -+ ret = kbase_sync_fence_in_wait(katom); -+ -+ if (ret == 1) { -+#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG -+ kbasep_add_waiting_with_timeout(katom); -+#else -+ kbasep_add_waiting_soft_job(katom); -+#endif -+ } -+ break; -+ } -+#endif -+ case BASE_JD_REQ_SOFT_EVENT_WAIT: -+ ret = kbasep_soft_event_wait(katom); -+ break; -+ case BASE_JD_REQ_SOFT_EVENT_SET: -+ kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET); -+ break; -+ case BASE_JD_REQ_SOFT_EVENT_RESET: -+ kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET); -+ break; -+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST -+ case BASE_JD_REQ_SOFT_DEBUG_COPY: -+ { -+ int res = kbase_debug_copy(katom); -+ -+ if (res) -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ break; -+ } -+#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ -+ case BASE_JD_REQ_SOFT_JIT_ALLOC: -+ ret = kbase_jit_allocate_process(katom); -+ break; -+ case BASE_JD_REQ_SOFT_JIT_FREE: -+ kbase_jit_free_process(katom); -+ break; -+ case BASE_JD_REQ_SOFT_EXT_RES_MAP: -+ kbase_ext_res_process(katom, true); -+ break; -+ case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: -+ kbase_ext_res_process(katom, false); -+ break; -+ } -+ -+ /* Atom is complete */ -+ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kbdev, katom); -+ kbase_kinstr_jm_atom_sw_stop(katom); -+ return ret; -+} -+ -+void kbase_cancel_soft_job(struct kbase_jd_atom *katom) -+{ -+ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ case BASE_JD_REQ_SOFT_FENCE_WAIT: -+ kbase_sync_fence_in_cancel_wait(katom); -+ break; -+#endif -+ case BASE_JD_REQ_SOFT_EVENT_WAIT: -+ kbasep_soft_event_cancel_job(katom); -+ break; -+ default: -+ /* This soft-job doesn't support cancellation! */ -+ KBASE_DEBUG_ASSERT(0); -+ } -+} -+ -+int kbase_prepare_soft_job(struct kbase_jd_atom *katom) -+{ -+ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { -+ case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: -+ { -+ if (!IS_ALIGNED(katom->jc, cache_line_size())) -+ return -EINVAL; -+ } -+ break; -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ case BASE_JD_REQ_SOFT_FENCE_TRIGGER: -+ { -+ struct base_fence fence; -+ int fd; -+ -+ if (copy_from_user(&fence, -+ (__user void *)(uintptr_t)katom->jc, -+ sizeof(fence)) != 0) -+ return -EINVAL; -+ -+ fd = kbase_sync_fence_out_create(katom, -+ fence.basep.stream_fd); -+ if (fd < 0) -+ return -EINVAL; -+ -+ fence.basep.fd = fd; -+ if (copy_to_user((__user void *)(uintptr_t)katom->jc, -+ &fence, sizeof(fence)) != 0) { -+ kbase_sync_fence_out_remove(katom); -+ /* fd should have been closed here, but there's -+ * no good way of doing that. Since -+ * copy_to_user() very rarely fails, and the fd -+ * will get closed on process termination this -+ * won't be a problem. -+ */ -+ fence.basep.fd = -EINVAL; -+ return -EINVAL; -+ } -+ } -+ break; -+ case BASE_JD_REQ_SOFT_FENCE_WAIT: -+ { -+ struct base_fence fence; -+ int ret; ++ vma->vm_file = kctx->kbdev->csf.user_reg.filp; ++ get_file(vma->vm_file); + -+ if (copy_from_user(&fence, -+ (__user void *)(uintptr_t)katom->jc, -+ sizeof(fence)) != 0) -+ return -EINVAL; ++ /* Also adjust the vm_pgoff */ ++ vma->vm_pgoff = kctx->csf.user_reg.file_offset; ++ vma->vm_ops = &kbase_csf_user_reg_vm_ops; ++ vma->vm_private_data = kctx; + -+ /* Get a reference to the fence object */ -+ ret = kbase_sync_fence_in_from_fd(katom, -+ fence.basep.fd); -+ if (ret < 0) -+ return ret; -+ } -+ break; -+#endif /* CONFIG_SYNC_FILE */ -+ case BASE_JD_REQ_SOFT_JIT_ALLOC: -+ return kbase_jit_allocate_prepare(katom); -+ case BASE_JD_REQ_SOFT_JIT_FREE: -+ return kbase_jit_free_prepare(katom); -+ case BASE_JD_REQ_SOFT_EVENT_WAIT: -+ case BASE_JD_REQ_SOFT_EVENT_SET: -+ case BASE_JD_REQ_SOFT_EVENT_RESET: -+ if (katom->jc == 0) -+ return -EINVAL; -+ break; -+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST -+ case BASE_JD_REQ_SOFT_DEBUG_COPY: -+ return kbase_debug_copy_prepare(katom); -+#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ -+ case BASE_JD_REQ_SOFT_EXT_RES_MAP: -+ return kbase_ext_res_prepare(katom); -+ case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: -+ return kbase_ext_res_prepare(katom); -+ default: -+ /* Unsupported soft-job */ -+ return -EINVAL; -+ } + return 0; +} + -+void kbase_finish_soft_job(struct kbase_jd_atom *katom) -+{ -+ trace_sysgraph(SGR_COMPLETE, katom->kctx->id, -+ kbase_jd_atom_id(katom->kctx, katom)); -+ -+ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { -+ case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: -+ /* Nothing to do */ -+ break; -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+ case BASE_JD_REQ_SOFT_FENCE_TRIGGER: -+ /* If fence has not yet been signaled, do it now */ -+ kbase_sync_fence_out_trigger(katom, katom->event_code == -+ BASE_JD_EVENT_DONE ? 0 : -EFAULT); -+ break; -+ case BASE_JD_REQ_SOFT_FENCE_WAIT: -+ /* Release katom's reference to fence object */ -+ kbase_sync_fence_in_remove(katom); -+ break; -+#endif /* CONFIG_SYNC_FILE */ -+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST -+ case BASE_JD_REQ_SOFT_DEBUG_COPY: -+ kbase_debug_copy_finish(katom); -+ break; -+#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ -+ case BASE_JD_REQ_SOFT_JIT_ALLOC: -+ kbase_jit_allocate_finish(katom); -+ break; -+ case BASE_JD_REQ_SOFT_EXT_RES_MAP: -+ kbase_ext_res_finish(katom); -+ break; -+ case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: -+ kbase_ext_res_finish(katom); -+ break; -+ case BASE_JD_REQ_SOFT_JIT_FREE: -+ kbase_jit_free_finish(katom); -+ break; -+ } -+} -+ -+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) -+{ -+ LIST_HEAD(local_suspended_soft_jobs); -+ struct kbase_jd_atom *tmp_iter; -+ struct kbase_jd_atom *katom_iter; -+ struct kbasep_js_device_data *js_devdata; -+ bool resched = false; -+ -+ KBASE_DEBUG_ASSERT(kbdev); -+ -+ js_devdata = &kbdev->js_data; -+ -+ /* Move out the entire list */ -+ mutex_lock(&js_devdata->runpool_mutex); -+ list_splice_init(&js_devdata->suspended_soft_jobs_list, -+ &local_suspended_soft_jobs); -+ mutex_unlock(&js_devdata->runpool_mutex); -+ -+ /* -+ * Each atom must be detached from the list and ran separately - -+ * it could be re-added to the old list, but this is unlikely -+ */ -+ list_for_each_entry_safe(katom_iter, tmp_iter, -+ &local_suspended_soft_jobs, dep_item[1]) { -+ struct kbase_context *kctx = katom_iter->kctx; -+ -+ mutex_lock(&kctx->jctx.lock); -+ -+ /* Remove from the global list */ -+ list_del(&katom_iter->dep_item[1]); -+ /* Remove from the context's list of waiting soft jobs */ -+ kbasep_remove_waiting_soft_job(katom_iter); -+ -+ if (kbase_process_soft_job(katom_iter) == 0) { -+ kbase_finish_soft_job(katom_iter); -+ resched |= kbase_jd_done_nolock(katom_iter, true); -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ atomic_dec(&kbdev->pm.gpu_users_waiting); -+#endif /* CONFIG_MALI_ARBITER_SUPPORT */ -+ } -+ mutex_unlock(&kctx->jctx.lock); -+ } -+ -+ if (resched) -+ kbase_js_sched_all(kbdev); -+} -+#endif /* !MALI_USE_CSF */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_strings.c b/drivers/gpu/arm/bifrost/mali_kbase_strings.c ++#endif /* MALI_USE_CSF */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h new file mode 100644 -index 000000000..84784be6f +index 000000000..6dda44b9f --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_strings.c -@@ -0,0 +1,28 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h +@@ -0,0 +1,533 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010, 2012-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -249256,274 +250522,528 @@ index 000000000..84784be6f + * + */ + -+#include "mali_kbase_strings.h" ++/** ++ * DOC: Base kernel memory APIs, Linux implementation. ++ */ + -+#define KBASE_DRV_NAME "mali" -+#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline" ++#ifndef _KBASE_MEM_LINUX_H_ ++#define _KBASE_MEM_LINUX_H_ + -+const char kbase_drv_name[] = KBASE_DRV_NAME; -+const char kbase_timeline_name[] = KBASE_TIMELINE_NAME; -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_strings.h b/drivers/gpu/arm/bifrost/mali_kbase_strings.h -new file mode 100644 -index 000000000..c3f94f926 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_strings.h -@@ -0,0 +1,23 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* ++/* A HWC dump mapping */ ++struct kbase_hwc_dma_mapping { ++ void *cpu_va; ++ dma_addr_t dma_pa; ++ size_t size; ++}; ++ ++/** ++ * kbase_mem_alloc - Create a new allocation for GPU + * -+ * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved. ++ * @kctx: The kernel context ++ * @va_pages: The number of pages of virtual address space to reserve ++ * @commit_pages: The number of physical pages to allocate upfront ++ * @extension: The number of extra pages to allocate on each GPU fault which ++ * grows the region. ++ * @flags: bitmask of BASE_MEM_* flags to convey special requirements & ++ * properties for the new allocation. ++ * @gpu_va: Start address of the memory region which was allocated from GPU ++ * virtual address space. If the BASE_MEM_FLAG_MAP_FIXED is set ++ * then this parameter shall be provided by the caller. ++ * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * Return: 0 on success or error code ++ */ ++struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, ++ u64 extension, u64 *flags, u64 *gpu_va, ++ enum kbase_caller_mmu_sync_info mmu_sync_info); ++ ++/** ++ * kbase_mem_query - Query properties of a GPU memory region + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * @kctx: The kernel context ++ * @gpu_addr: A GPU address contained within the memory region ++ * @query: The type of query, from KBASE_MEM_QUERY_* flags, which could be ++ * regarding the amount of backing physical memory allocated so far ++ * for the region or the size of the region or the flags associated ++ * with the region. ++ * @out: Pointer to the location to store the result of query. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * Return: 0 on success or error code ++ */ ++int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, ++ u64 *const out); ++ ++/** ++ * kbase_mem_import - Import the external memory for use by the GPU + * ++ * @kctx: The kernel context ++ * @type: Type of external memory ++ * @phandle: Handle to the external memory interpreted as per the type. ++ * @padding: Amount of extra VA pages to append to the imported buffer ++ * @gpu_va: GPU address assigned to the imported external memory ++ * @va_pages: Size of the memory region reserved from the GPU address space ++ * @flags: bitmask of BASE_MEM_* flags to convey special requirements & ++ * properties for the new allocation representing the external ++ * memory. ++ * Return: 0 on success or error code + */ ++int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, ++ void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, ++ u64 *flags); + -+extern const char kbase_drv_name[]; -+extern const char kbase_timeline_name[]; -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_sync.h b/drivers/gpu/arm/bifrost/mali_kbase_sync.h -new file mode 100644 -index 000000000..3d2053bee ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_sync.h -@@ -0,0 +1,216 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* ++/** ++ * kbase_mem_alias - Create a new allocation for GPU, aliasing one or more ++ * memory regions + * -+ * (C) COPYRIGHT 2012-2016, 2018-2022 ARM Limited. All rights reserved. ++ * @kctx: The kernel context ++ * @flags: bitmask of BASE_MEM_* flags. ++ * @stride: Bytes between start of each memory region ++ * @nents: The number of regions to pack together into the alias ++ * @ai: Pointer to the struct containing the memory aliasing info ++ * @num_pages: Number of pages the alias will cover + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * Return: 0 on failure or otherwise the GPU VA for the alias ++ */ ++u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages); ++ ++/** ++ * kbase_mem_flags_change - Change the flags for a memory region + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * @kctx: The kernel context ++ * @gpu_addr: A GPU address contained within the memory region to modify. ++ * @flags: The new flags to set ++ * @mask: Mask of the flags, from BASE_MEM_*, to modify. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * Return: 0 on success or error code ++ */ ++int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask); ++ ++/** ++ * kbase_mem_commit - Change the physical backing size of a region + * ++ * @kctx: The kernel context ++ * @gpu_addr: Handle to the memory region ++ * @new_pages: Number of physical pages to back the region with ++ * ++ * Return: 0 on success or error code + */ ++int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages); + +/** -+ * DOC: This file contains our internal "API" for explicit fences. -+ * It hides the implementation details of the actual explicit fence mechanism -+ * used (Android fences or sync file with DMA fences). ++ * kbase_mem_shrink - Shrink the physical backing size of a region ++ * ++ * @kctx: The kernel context ++ * @reg: The GPU region ++ * @new_pages: Number of physical pages to back the region with ++ * ++ * Return: 0 on success or error code + */ ++int kbase_mem_shrink(struct kbase_context *kctx, ++ struct kbase_va_region *reg, u64 new_pages); + -+#ifndef MALI_KBASE_SYNC_H -+#define MALI_KBASE_SYNC_H ++/** ++ * kbase_context_mmap - Memory map method, gets invoked when mmap system call is ++ * issued on device file /dev/malixx. ++ * @kctx: The kernel context ++ * @vma: Pointer to the struct containing the info where the GPU allocation ++ * will be mapped in virtual address space of CPU. ++ * ++ * Return: 0 on success or error code ++ */ ++int kbase_context_mmap(struct kbase_context *kctx, struct vm_area_struct *vma); + -+#include -+#include -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+#include "mali_kbase_fence_defs.h" -+#include -+#endif ++/** ++ * kbase_mem_evictable_init - Initialize the Ephemeral memory eviction ++ * mechanism. ++ * @kctx: The kbase context to initialize. ++ * ++ * Return: Zero on success or -errno on failure. ++ */ ++int kbase_mem_evictable_init(struct kbase_context *kctx); + -+#include "mali_kbase.h" ++/** ++ * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction ++ * mechanism. ++ * @kctx: The kbase context to de-initialize. ++ */ ++void kbase_mem_evictable_deinit(struct kbase_context *kctx); + +/** -+ * struct kbase_sync_fence_info - Information about a fence -+ * @fence: Pointer to fence (type is void*, as underlaying struct can differ) -+ * @name: The name given to this fence when it was created -+ * @status: < 0 means error, 0 means active, 1 means signaled ++ * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation ++ * @kctx: Context the region belongs to ++ * @reg: The GPU region ++ * @new_pages: The number of pages after the grow ++ * @old_pages: The number of pages before the grow ++ * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. + * -+ * Use kbase_sync_fence_in_info_get() or kbase_sync_fence_out_info_get() -+ * to get the information. ++ * Return: 0 on success, -errno on error. ++ * ++ * Expand the GPU mapping to encompass the new psychical pages which have ++ * been added to the allocation. ++ * ++ * Note: Caller must be holding the region lock. + */ -+struct kbase_sync_fence_info { -+ void *fence; -+ char name[32]; -+ int status; -+}; ++int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, ++ struct kbase_va_region *reg, u64 new_pages, ++ u64 old_pages, ++ enum kbase_caller_mmu_sync_info mmu_sync_info); + +/** -+ * kbase_sync_fence_stream_create() - Create a stream object -+ * @name: Name of stream (only used to ease debugging/visualization) -+ * @out_fd: A file descriptor representing the created stream object ++ * kbase_mem_evictable_make - Make a physical allocation eligible for eviction ++ * @gpu_alloc: The physical allocation to make evictable + * -+ * Can map down to a timeline implementation in some implementations. -+ * Exposed as a file descriptor. -+ * Life-time controlled via the file descriptor: -+ * - dup to add a ref -+ * - close to remove a ref ++ * Return: 0 on success, -errno on error. + * -+ * Return: 0 on success, < 0 on error ++ * Take the provided region and make all the physical pages within it ++ * reclaimable by the kernel, updating the per-process VM stats as well. ++ * Remove any CPU mappings (as these can't be removed in the shrinker callback ++ * as mmap_sem/mmap_lock might already be taken) but leave the GPU mapping ++ * intact as and until the shrinker reclaims the allocation. ++ * ++ * Note: Must be called with the region lock of the containing context. + */ -+int kbase_sync_fence_stream_create(const char *name, int *const out_fd); ++int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc); + -+#if !MALI_USE_CSF +/** -+ * kbase_sync_fence_out_create - Create an explicit output fence to specified atom ++ * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for ++ * eviction. ++ * @alloc: The physical allocation to remove eviction eligibility from. + * -+ * @katom: Atom to assign the new explicit fence to -+ * @stream_fd: File descriptor for stream object to create fence on ++ * Return: True if the allocation had its backing restored and false if ++ * it hasn't. + * -+ * Return: Valid file descriptor to fence or < 0 on error ++ * Make the physical pages in the region no longer reclaimable and update the ++ * per-process stats, if the shrinker has already evicted the memory then ++ * re-allocate it if the region is still alive. ++ * ++ * Note: Must be called with the region lock of the containing context. + */ -+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd); ++bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc); ++ ++typedef unsigned int kbase_vmap_flag; ++ ++/* Sync operations are needed on beginning and ending of access to kernel-mapped GPU memory. ++ * ++ * This is internal to the struct kbase_vmap_struct and should not be passed in by callers of ++ * kbase_vmap-related functions. ++ */ ++#define KBASE_VMAP_FLAG_SYNC_NEEDED (((kbase_vmap_flag)1) << 0) ++ ++/* Permanently mapped memory accounting (including enforcing limits) should be done on the ++ * kernel-mapped GPU memory. ++ * ++ * This should be used if the kernel mapping is going to live for a potentially long time, for ++ * example if it will persist after the caller has returned. ++ */ ++#define KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING (((kbase_vmap_flag)1) << 1) ++ ++/* Set of flags that can be passed into kbase_vmap-related functions */ ++#define KBASE_VMAP_INPUT_FLAGS (KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) ++ ++struct kbase_vmap_struct { ++ off_t offset_in_page; ++ struct kbase_mem_phy_alloc *cpu_alloc; ++ struct kbase_mem_phy_alloc *gpu_alloc; ++ struct tagged_addr *cpu_pages; ++ struct tagged_addr *gpu_pages; ++ void *addr; ++ size_t size; ++ kbase_vmap_flag flags; ++}; + +/** -+ * kbase_sync_fence_in_from_fd() - Assigns an existing fence to specified atom -+ * @katom: Atom to assign the existing explicit fence to -+ * @fd: File descriptor to an existing fence ++ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation ++ * @kctx: Context the region belongs to ++ * @reg: The GPU region or NULL if there isn't one ++ * @new_pages: The number of pages after the shrink ++ * @old_pages: The number of pages before the shrink + * -+ * Assigns an explicit input fence to atom. -+ * This can later be waited for by calling @kbase_sync_fence_in_wait ++ * Return: 0 on success, negative -errno on error + * -+ * Return: 0 on success, < 0 on error ++ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region ++ * itself is unmodified as we still need to reserve the VA, only the page tables ++ * will be modified by this function. + */ -+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd); -+#endif /* !MALI_USE_CSF */ ++int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, struct kbase_va_region *reg, ++ u64 new_pages, u64 old_pages); + +/** -+ * kbase_sync_fence_validate() - Validate a fd to be a valid fence ++ * kbase_vmap_reg - Map part of an existing region into the kernel safely, only if the requested ++ * access permissions are supported ++ * @kctx: Context @reg belongs to ++ * @reg: The GPU region to map part of ++ * @gpu_addr: Start address of VA range to map, which must be within @reg ++ * @size: Size of VA range, which when added to @gpu_addr must be within @reg ++ * @prot_request: Flags indicating how the caller will then access the memory ++ * @map: Structure to be given to kbase_vunmap() on freeing ++ * @vmap_flags: Flags of type kbase_vmap_flag + * -+ * @fd: File descriptor to check ++ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error + * -+ * This function is only usable to catch unintentional user errors early, -+ * it does not stop malicious code changing the fd after this function returns. ++ * Variant of kbase_vmap_prot() that can be used given an existing region. + * -+ * Return: 0 if fd is for a valid fence, < 0 if invalid ++ * The caller must satisfy one of the following for @reg: ++ * * It must have been obtained by finding it on the region tracker, and the region lock must not ++ * have been released in the mean time. ++ * * Or, it must have been refcounted with a call to kbase_va_region_alloc_get(), and the region ++ * lock is now held again. ++ * * Or, @reg has had NO_USER_FREE set at creation time or under the region lock, and the ++ * region lock is now held again. ++ * ++ * The acceptable @vmap_flags are those in %KBASE_VMAP_INPUT_FLAGS. ++ * ++ * Refer to kbase_vmap_prot() for more information on the operation of this function. + */ -+int kbase_sync_fence_validate(int fd); ++void *kbase_vmap_reg(struct kbase_context *kctx, struct kbase_va_region *reg, u64 gpu_addr, ++ size_t size, unsigned long prot_request, struct kbase_vmap_struct *map, ++ kbase_vmap_flag vmap_flags); + -+#if !MALI_USE_CSF +/** -+ * kbase_sync_fence_out_trigger - Signal explicit output fence attached on katom -+ * @katom: Atom with an explicit fence to signal -+ * @result: < 0 means signal with error, 0 >= indicates success ++ * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the ++ * requested access permissions are supported ++ * @kctx: Context the VA range belongs to ++ * @gpu_addr: Start address of VA range ++ * @size: Size of VA range ++ * @prot_request: Flags indicating how the caller will then access the memory ++ * @map: Structure to be given to kbase_vunmap() on freeing + * -+ * Signal output fence attached on katom and remove the fence from the atom. ++ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error ++ * ++ * Map a GPU VA Range into the kernel. The VA range must be contained within a ++ * GPU memory region. Appropriate CPU cache-flushing operations are made as ++ * required, dependent on the CPU mapping for the memory region. ++ * ++ * This is safer than using kmap() on the pages directly, ++ * because the pages here are refcounted to prevent freeing (and hence reuse ++ * elsewhere in the system) until an kbase_vunmap() ++ * ++ * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check ++ * whether the region should allow the intended access, and return an error if ++ * disallowed. This is essential for security of imported memory, particularly ++ * a user buf from SHM mapped into the process as RO. In that case, write ++ * access must be checked if the intention is for kernel to write to the ++ * memory. ++ * ++ * The checks are also there to help catch access errors on memory where ++ * security is not a concern: imported memory that is always RW, and memory ++ * that was allocated and owned by the process attached to @kctx. In this case, ++ * it helps to identify memory that was mapped with the wrong access type. ++ * ++ * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases ++ * where either the security of memory is solely dependent on those flags, or ++ * when userspace code was expecting only the GPU to access the memory (e.g. HW ++ * workarounds). ++ * ++ * All cache maintenance operations shall be ignored if the ++ * memory region has been imported. + * -+ * Return: The "next" event code for atom, typically JOB_CANCELLED or EVENT_DONE + */ -+enum base_jd_event_code -+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result); ++void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, ++ unsigned long prot_request, struct kbase_vmap_struct *map); + +/** -+ * kbase_sync_fence_in_wait() - Wait for explicit input fence to be signaled -+ * @katom: Atom with explicit fence to wait for ++ * kbase_vmap - Map a GPU VA range into the kernel safely ++ * @kctx: Context the VA range belongs to ++ * @gpu_addr: Start address of VA range ++ * @size: Size of VA range ++ * @map: Structure to be given to kbase_vunmap() on freeing + * -+ * If the fence is already signaled, then 0 is returned, and the caller must -+ * continue processing of the katom. ++ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error + * -+ * If the fence isn't already signaled, then this kbase_sync framework will -+ * take responsibility to continue the processing once the fence is signaled. ++ * Map a GPU VA Range into the kernel. The VA range must be contained within a ++ * GPU memory region. Appropriate CPU cache-flushing operations are made as ++ * required, dependent on the CPU mapping for the memory region. + * -+ * Return: 0 if already signaled, otherwise 1 ++ * This is safer than using kmap() on the pages directly, ++ * because the pages here are refcounted to prevent freeing (and hence reuse ++ * elsewhere in the system) until an kbase_vunmap() ++ * ++ * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no ++ * checks to ensure the security of e.g. imported user bufs from RO SHM. ++ * ++ * Note: All cache maintenance operations shall be ignored if the memory region ++ * has been imported. + */ -+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom); ++void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, ++ struct kbase_vmap_struct *map); + +/** -+ * kbase_sync_fence_in_cancel_wait() - Cancel explicit input fence waits -+ * @katom: Atom to cancel wait for ++ * kbase_vunmap - Unmap a GPU VA range from the kernel ++ * @kctx: Context the VA range belongs to ++ * @map: Structure describing the mapping from the corresponding kbase_vmap() ++ * call + * -+ * This function is fully responsible for continuing processing of this atom -+ * (remove_waiting_soft_job + finish_soft_job + jd_done + js_sched_all) ++ * Unmaps a GPU VA range from the kernel, given its @map structure obtained ++ * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as ++ * required, dependent on the CPU mapping for the memory region. ++ * ++ * The reference taken on pages during kbase_vmap() is released. ++ * ++ * Note: All cache maintenance operations shall be ignored if the memory region ++ * has been imported. + */ -+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom); ++void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map); ++ ++extern const struct vm_operations_struct kbase_vm_ops; + +/** -+ * kbase_sync_fence_in_remove() - Remove the input fence from the katom -+ * @katom: Atom to remove explicit input fence for ++ * kbase_sync_mem_regions - Perform the cache maintenance for the kernel mode ++ * CPU mapping. ++ * @kctx: Context the CPU mapping belongs to. ++ * @map: Structure describing the CPU mapping, setup previously by the ++ * kbase_vmap() call. ++ * @dest: Indicates the type of maintenance required (i.e. flush or invalidate) + * -+ * This will also release the corresponding reference. ++ * Note: The caller shall ensure that CPU mapping is not revoked & remains ++ * active whilst the maintenance is in progress. + */ -+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom); ++void kbase_sync_mem_regions(struct kbase_context *kctx, ++ struct kbase_vmap_struct *map, enum kbase_sync_type dest); + +/** -+ * kbase_sync_fence_out_remove() - Remove the output fence from the katom -+ * @katom: Atom to remove explicit output fence for ++ * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation ++ * @kctx: Context the region belongs to ++ * @reg: The GPU region ++ * @new_pages: The number of pages after the shrink ++ * @old_pages: The number of pages before the shrink + * -+ * This will also release the corresponding reference. ++ * Shrink (or completely remove) all CPU mappings which reference the shrunk ++ * part of the allocation. + */ -+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom); -+#endif /* !MALI_USE_CSF */ ++void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, ++ struct kbase_va_region *reg, ++ u64 new_pages, u64 old_pages); + -+#if !MALI_USE_CSF +/** -+ * kbase_sync_fence_in_info_get() - Retrieves information about input fence -+ * @katom: Atom to get fence information from -+ * @info: Struct to be filled with fence information ++ * kbase_phy_alloc_mapping_term - Terminate the kernel side mapping of a ++ * physical allocation ++ * @kctx: The kernel base context associated with the mapping ++ * @alloc: Pointer to the allocation to terminate + * -+ * Return: 0 on success, < 0 on error ++ * This function will unmap the kernel mapping, and free any structures used to ++ * track it. + */ -+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, -+ struct kbase_sync_fence_info *info); ++void kbase_phy_alloc_mapping_term(struct kbase_context *kctx, ++ struct kbase_mem_phy_alloc *alloc); + +/** -+ * kbase_sync_fence_out_info_get() - Retrieves information about output fence -+ * @katom: Atom to get fence information from -+ * @info: Struct to be filled with fence information ++ * kbase_phy_alloc_mapping_get - Get a kernel-side CPU pointer to the permanent ++ * mapping of a physical allocation ++ * @kctx: The kernel base context @gpu_addr will be looked up in ++ * @gpu_addr: The gpu address to lookup for the kernel-side CPU mapping ++ * @out_kern_mapping: Pointer to storage for a struct kbase_vmap_struct pointer ++ * which will be used for a call to ++ * kbase_phy_alloc_mapping_put() + * -+ * Return: 0 on success, < 0 on error ++ * Return: Pointer to a kernel-side accessible location that directly ++ * corresponds to @gpu_addr, or NULL on failure ++ * ++ * Looks up @gpu_addr to retrieve the CPU pointer that can be used to access ++ * that location kernel-side. Only certain kinds of memory have a permanent ++ * kernel mapping, refer to the internal functions ++ * kbase_reg_needs_kernel_mapping() and kbase_phy_alloc_mapping_init() for more ++ * information. ++ * ++ * If this function succeeds, a CPU access to the returned pointer will access ++ * the actual location represented by @gpu_addr. That is, the return value does ++ * not require any offset added to it to access the location specified in ++ * @gpu_addr ++ * ++ * The client must take care to either apply any necessary sync operations when ++ * accessing the data, or ensure that the enclosing region was coherent with ++ * the GPU, or uncached in the CPU. ++ * ++ * The refcount on the physical allocations backing the region are taken, so ++ * that they do not disappear whilst the client is accessing it. Once the ++ * client has finished accessing the memory, it must be released with a call to ++ * kbase_phy_alloc_mapping_put() ++ * ++ * Whilst this is expected to execute quickly (the mapping was already setup ++ * when the physical allocation was created), the call is not IRQ-safe due to ++ * the region lookup involved. ++ * ++ * An error code may indicate that: ++ * - a userside process has freed the allocation, and so @gpu_addr is no longer ++ * valid ++ * - the region containing @gpu_addr does not support a permanent kernel mapping + */ -+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, -+ struct kbase_sync_fence_info *info); -+#endif /* !MALI_USE_CSF */ ++void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, u64 gpu_addr, ++ struct kbase_vmap_struct **out_kern_mapping); + -+#if IS_ENABLED(CONFIG_SYNC_FILE) -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+void kbase_sync_fence_info_get(struct fence *fence, -+ struct kbase_sync_fence_info *info); -+#else -+void kbase_sync_fence_info_get(struct dma_fence *fence, -+ struct kbase_sync_fence_info *info); -+#endif -+#endif ++/** ++ * kbase_phy_alloc_mapping_put - Put a reference to the kernel-side mapping of a ++ * physical allocation ++ * @kctx: The kernel base context associated with the mapping ++ * @kern_mapping: Pointer to a struct kbase_phy_alloc_mapping pointer obtained ++ * from a call to kbase_phy_alloc_mapping_get() ++ * ++ * Releases the reference to the allocations backing @kern_mapping that was ++ * obtained through a call to kbase_phy_alloc_mapping_get(). This must be used ++ * when the client no longer needs to access the kernel-side CPU pointer. ++ * ++ * If this was the last reference on the underlying physical allocations, they ++ * will go through the normal allocation free steps, which also includes an ++ * unmap of the permanent kernel mapping for those allocations. ++ * ++ * Due to these operations, the function is not IRQ-safe. However it is ++ * expected to execute quickly in the normal case, i.e. when the region holding ++ * the physical allocation is still present. ++ */ ++void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, ++ struct kbase_vmap_struct *kern_mapping); + +/** -+ * kbase_sync_status_string() - Get string matching @status -+ * @status: Value of fence status. ++ * kbase_get_cache_line_alignment - Return cache line alignment + * -+ * Return: Pointer to string describing @status. ++ * @kbdev: Device pointer. ++ * ++ * Helper function to return the maximum cache line alignment considering ++ * both CPU and GPU cache sizes. ++ * ++ * Return: CPU and GPU cache line alignment, in bytes. + */ -+const char *kbase_sync_status_string(int status); ++u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev); + ++#if (KERNEL_VERSION(4, 20, 0) > LINUX_VERSION_CODE) ++static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, ++ unsigned long addr, unsigned long pfn, pgprot_t pgprot) ++{ ++ int err = vm_insert_pfn_prot(vma, addr, pfn, pgprot); + -+#if !MALI_USE_CSF -+/* -+ * Internal worker used to continue processing of atom. -+ */ -+void kbase_sync_fence_wait_worker(struct work_struct *data); ++ if (unlikely(err == -ENOMEM)) ++ return VM_FAULT_OOM; ++ if (unlikely(err < 0 && err != -EBUSY)) ++ return VM_FAULT_SIGBUS; ++ ++ return VM_FAULT_NOPAGE; ++} ++#endif + -+#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG +/** -+ * kbase_sync_fence_in_dump() - Trigger a debug dump of atoms input fence state ++ * kbase_mem_get_process_mmap_lock - Return the mmap lock for the current process + * -+ * @katom: Atom to trigger fence debug dump for ++ * Return: the mmap lock for the current process + */ -+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom); -+#endif -+#endif /* !MALI_USE_CSF */ ++static inline struct rw_semaphore *kbase_mem_get_process_mmap_lock(void) ++{ ++#if KERNEL_VERSION(5, 8, 0) > LINUX_VERSION_CODE ++ return ¤t->mm->mmap_sem; ++#else /* KERNEL_VERSION(5, 8, 0) > LINUX_VERSION_CODE */ ++ return ¤t->mm->mmap_lock; ++#endif /* KERNEL_VERSION(5, 8, 0) > LINUX_VERSION_CODE */ ++} + -+#endif /* MALI_KBASE_SYNC_H */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_sync_common.c b/drivers/gpu/arm/bifrost/mali_kbase_sync_common.c ++#endif /* _KBASE_MEM_LINUX_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_lowlevel.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_lowlevel.h new file mode 100644 -index 000000000..5ee7fc3ce +index 000000000..5a1bb16cc --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_sync_common.c -@@ -0,0 +1,50 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_lowlevel.h +@@ -0,0 +1,179 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2014, 2016-2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -249541,45 +251061,174 @@ index 000000000..5ee7fc3ce + * + */ + ++#ifndef _KBASE_MEM_LOWLEVEL_H ++#define _KBASE_MEM_LOWLEVEL_H ++ ++#ifndef _KBASE_H_ ++#error "Don't include this file directly, use mali_kbase.h instead" ++#endif ++ ++#include ++ ++/* Flags for kbase_phy_allocator_pages_alloc */ ++#define KBASE_PHY_PAGES_FLAG_DEFAULT (0) /** Default allocation flag */ ++#define KBASE_PHY_PAGES_FLAG_CLEAR (1 << 0) /** Clear the pages after allocation */ ++#define KBASE_PHY_PAGES_FLAG_POISON (1 << 1) /** Fill the memory with a poison value */ ++ ++#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON) ++ ++#define KBASE_PHY_PAGES_POISON_VALUE 0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */ ++ ++enum kbase_sync_type { ++ KBASE_SYNC_TO_CPU, ++ KBASE_SYNC_TO_DEVICE ++}; ++ ++struct tagged_addr { phys_addr_t tagged_addr; }; ++ ++#define HUGE_PAGE (1u << 0) ++#define HUGE_HEAD (1u << 1) ++#define FROM_PARTIAL (1u << 2) ++ ++#define NUM_4K_PAGES_IN_2MB_PAGE (SZ_2M / SZ_4K) ++ +/* -+ * @file ++ * Note: if macro for converting physical address to page is not defined ++ * in the kernel itself, it is defined hereby. This is to avoid build errors ++ * which are reported during builds for some architectures. ++ */ ++#ifndef phys_to_page ++#define phys_to_page(phys) (pfn_to_page((phys) >> PAGE_SHIFT)) ++#endif ++ ++/** ++ * as_phys_addr_t - Retrieve the physical address from tagged address by ++ * masking the lower order 12 bits. ++ * @t: tagged address to be translated. + * -+ * Common code for our explicit fence functionality ++ * Return: physical address corresponding to tagged address. + */ ++static inline phys_addr_t as_phys_addr_t(struct tagged_addr t) ++{ ++ return t.tagged_addr & PAGE_MASK; ++} + -+#include -+#include "mali_kbase.h" -+#include "mali_kbase_sync.h" ++/** ++ * as_page - Retrieve the struct page from a tagged address ++ * @t: tagged address to be translated. ++ * ++ * Return: pointer to struct page corresponding to tagged address. ++ */ ++static inline struct page *as_page(struct tagged_addr t) ++{ ++ return phys_to_page(as_phys_addr_t(t)); ++} + -+#if !MALI_USE_CSF -+void kbase_sync_fence_wait_worker(struct work_struct *data) ++/** ++ * as_tagged - Convert the physical address to tagged address type though ++ * there is no tag info present, the lower order 12 bits will be 0 ++ * @phys: physical address to be converted to tagged type ++ * ++ * This is used for 4KB physical pages allocated by the Driver or imported pages ++ * and is needed as physical pages tracking object stores the reference for ++ * physical pages using tagged address type in lieu of the type generally used ++ * for physical addresses. ++ * ++ * Return: address of tagged address type. ++ */ ++static inline struct tagged_addr as_tagged(phys_addr_t phys) +{ -+ struct kbase_jd_atom *katom; ++ struct tagged_addr t; + -+ katom = container_of(data, struct kbase_jd_atom, work); -+ kbase_soft_event_wait_callback(katom); ++ t.tagged_addr = phys & PAGE_MASK; ++ return t; +} -+#endif /* !MALI_USE_CSF */ + -+const char *kbase_sync_status_string(int status) ++/** ++ * as_tagged_tag - Form the tagged address by storing the tag or metadata in the ++ * lower order 12 bits of physial address ++ * @phys: physical address to be converted to tagged address ++ * @tag: tag to be stored along with the physical address. ++ * ++ * The tag info is used while freeing up the pages ++ * ++ * Return: tagged address storing physical address & tag. ++ */ ++static inline struct tagged_addr as_tagged_tag(phys_addr_t phys, int tag) +{ -+ if (status == 0) -+ return "active"; -+ else if (status > 0) -+ return "signaled"; -+ else -+ return "error"; ++ struct tagged_addr t; ++ ++ t.tagged_addr = (phys & PAGE_MASK) | (tag & ~PAGE_MASK); ++ return t; +} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c b/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c ++ ++/** ++ * is_huge - Check if the physical page is one of the 512 4KB pages of the ++ * large page which was not split to be used partially ++ * @t: tagged address storing the tag in the lower order bits. ++ * ++ * Return: true if page belongs to large page, or false ++ */ ++static inline bool is_huge(struct tagged_addr t) ++{ ++ return t.tagged_addr & HUGE_PAGE; ++} ++ ++/** ++ * is_huge_head - Check if the physical page is the first 4KB page of the ++ * 512 4KB pages within a large page which was not split ++ * to be used partially ++ * @t: tagged address storing the tag in the lower order bits. ++ * ++ * Return: true if page is the first page of a large page, or false ++ */ ++static inline bool is_huge_head(struct tagged_addr t) ++{ ++ int mask = HUGE_HEAD | HUGE_PAGE; ++ ++ return mask == (t.tagged_addr & mask); ++} ++ ++/** ++ * is_partial - Check if the physical page is one of the 512 pages of the ++ * large page which was split in 4KB pages to be used ++ * partially for allocations >= 2 MB in size. ++ * @t: tagged address storing the tag in the lower order bits. ++ * ++ * Return: true if page was taken from large page used partially, or false ++ */ ++static inline bool is_partial(struct tagged_addr t) ++{ ++ return t.tagged_addr & FROM_PARTIAL; ++} ++ ++/** ++ * index_in_large_page() - Get index of a 4KB page within a 2MB page which ++ * wasn't split to be used partially. ++ * ++ * @t: Tagged physical address of the physical 4KB page that lies within ++ * the large (or 2 MB) physical page. ++ * ++ * Return: Index of the 4KB page within a 2MB page ++ */ ++static inline unsigned int index_in_large_page(struct tagged_addr t) ++{ ++ WARN_ON(!is_huge(t)); ++ ++ return (PFN_DOWN(as_phys_addr_t(t)) & (NUM_4K_PAGES_IN_2MB_PAGE - 1)); ++} ++ ++#endif /* _KBASE_LOWLEVEL_H */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c new file mode 100644 -index 000000000..9360324cf +index 000000000..737f7da55 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c -@@ -0,0 +1,409 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c +@@ -0,0 +1,641 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -249597,635 +251246,636 @@ index 000000000..9360324cf + * + */ + -+/* -+ * Code for supporting explicit Linux fences (CONFIG_SYNC_FILE) ++/** ++ * DOC: Base kernel page migration implementation. + */ ++#include + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "mali_kbase_fence_defs.h" -+#include "mali_kbase_sync.h" -+#include "mali_kbase_fence.h" -+#include "mali_kbase.h" ++#include ++#include ++#include + -+static const struct file_operations stream_fops = { -+ .owner = THIS_MODULE -+}; ++/* Global integer used to determine if module parameter value has been ++ * provided and if page migration feature is enabled. ++ * Feature is disabled on all platforms by default. ++ */ ++int kbase_page_migration_enabled; ++module_param(kbase_page_migration_enabled, int, 0444); ++KBASE_EXPORT_TEST_API(kbase_page_migration_enabled); + -+int kbase_sync_fence_stream_create(const char *name, int *const out_fd) ++#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) ++static const struct movable_operations movable_ops; ++#endif ++ ++bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr, ++ u8 group_id) +{ -+ if (!out_fd) -+ return -EINVAL; ++ struct kbase_page_metadata *page_md = ++ kzalloc(sizeof(struct kbase_page_metadata), GFP_KERNEL); + -+ *out_fd = anon_inode_getfd(name, &stream_fops, NULL, -+ O_RDONLY | O_CLOEXEC); -+ if (*out_fd < 0) -+ return -EINVAL; ++ if (!page_md) ++ return false; + -+ return 0; -+} ++ SetPagePrivate(p); ++ set_page_private(p, (unsigned long)page_md); ++ page_md->dma_addr = dma_addr; ++ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATE_IN_PROGRESS); ++ page_md->vmap_count = 0; ++ page_md->group_id = group_id; ++ spin_lock_init(&page_md->migrate_lock); + -+#if !MALI_USE_CSF -+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd) -+{ -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ struct fence *fence; ++ lock_page(p); ++#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) ++ __SetPageMovable(p, &movable_ops); ++ page_md->status = PAGE_MOVABLE_SET(page_md->status); +#else -+ struct dma_fence *fence; -+#endif -+ struct sync_file *sync_file; -+ int fd; -+ -+ fence = kbase_fence_out_new(katom); -+ if (!fence) -+ return -ENOMEM; -+ -+#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) -+ /* Take an extra reference to the fence on behalf of the sync_file. -+ * This is only needed on older kernels where sync_file_create() -+ * does not take its own reference. This was changed in v4.9.68, -+ * where sync_file_create() now takes its own reference. ++ /* In some corner cases, the driver may attempt to allocate memory pages ++ * even before the device file is open and the mapping for address space ++ * operations is created. In that case, it is impossible to assign address ++ * space operations to memory pages: simply pretend that they are movable, ++ * even if they are not. ++ * ++ * The page will go through all state transitions but it will never be ++ * actually considered movable by the kernel. This is due to the fact that ++ * the page cannot be marked as NOT_MOVABLE upon creation, otherwise the ++ * memory pool will always refuse to add it to the pool and schedule ++ * a worker thread to free it later. ++ * ++ * Page metadata may seem redundant in this case, but they are not, ++ * because memory pools expect metadata to be present when page migration ++ * is enabled and because the pages may always return to memory pools and ++ * gain the movable property later on in their life cycle. + */ -+ dma_fence_get(fence); ++ if (kbdev->mem_migrate.inode && kbdev->mem_migrate.inode->i_mapping) { ++ __SetPageMovable(p, kbdev->mem_migrate.inode->i_mapping); ++ page_md->status = PAGE_MOVABLE_SET(page_md->status); ++ } +#endif ++ unlock_page(p); + -+ /* create a sync_file fd representing the fence */ -+ sync_file = sync_file_create(fence); -+ if (!sync_file) { -+#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) -+ dma_fence_put(fence); -+#endif -+ kbase_fence_out_remove(katom); -+ return -ENOMEM; -+ } ++ return true; ++} + -+ fd = get_unused_fd_flags(O_CLOEXEC); -+ if (fd < 0) { -+ fput(sync_file->file); -+ kbase_fence_out_remove(katom); -+ return fd; -+ } ++static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p, u8 *group_id) ++{ ++ struct device *const dev = kbdev->dev; ++ struct kbase_page_metadata *page_md; ++ dma_addr_t dma_addr; + -+ fd_install(fd, sync_file->file); ++ page_md = kbase_page_private(p); ++ if (!page_md) ++ return; + -+ return fd; ++ if (group_id) ++ *group_id = page_md->group_id; ++ dma_addr = kbase_dma_addr(p); ++ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); ++ ++ kfree(page_md); ++ set_page_private(p, 0); ++ ClearPagePrivate(p); +} + -+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) ++static void kbase_free_pages_worker(struct work_struct *work) +{ -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ struct fence *fence = sync_file_get_fence(fd); -+#else -+ struct dma_fence *fence = sync_file_get_fence(fd); -+#endif ++ struct kbase_mem_migrate *mem_migrate = ++ container_of(work, struct kbase_mem_migrate, free_pages_work); ++ struct kbase_device *kbdev = container_of(mem_migrate, struct kbase_device, mem_migrate); ++ struct page *p, *tmp; ++ struct kbase_page_metadata *page_md; ++ LIST_HEAD(free_list); + -+ lockdep_assert_held(&katom->kctx->jctx.lock); ++ spin_lock(&mem_migrate->free_pages_lock); ++ list_splice_init(&mem_migrate->free_pages_list, &free_list); ++ spin_unlock(&mem_migrate->free_pages_lock); + -+ if (!fence) -+ return -ENOENT; ++ list_for_each_entry_safe(p, tmp, &free_list, lru) { ++ u8 group_id = 0; ++ list_del_init(&p->lru); + -+ kbase_fence_fence_in_set(katom, fence); -+ katom->dma_fence.fence_cb_added = false; ++ lock_page(p); ++ page_md = kbase_page_private(p); ++ if (IS_PAGE_MOVABLE(page_md->status)) { ++ __ClearPageMovable(p); ++ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); ++ } ++ unlock_page(p); + -+ return 0; ++ kbase_free_page_metadata(kbdev, p, &group_id); ++ kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, group_id, p, 0); ++ } +} -+#endif /* !MALI_USE_CSF */ + -+int kbase_sync_fence_validate(int fd) ++void kbase_free_page_later(struct kbase_device *kbdev, struct page *p) +{ -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ struct fence *fence = sync_file_get_fence(fd); -+#else -+ struct dma_fence *fence = sync_file_get_fence(fd); -+#endif -+ -+ if (!fence) -+ return -EINVAL; -+ -+ dma_fence_put(fence); ++ struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; + -+ return 0; /* valid */ ++ spin_lock(&mem_migrate->free_pages_lock); ++ list_add(&p->lru, &mem_migrate->free_pages_list); ++ spin_unlock(&mem_migrate->free_pages_lock); +} + -+#if !MALI_USE_CSF -+enum base_jd_event_code -+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) ++/** ++ * kbasep_migrate_page_pt_mapped - Migrate a memory page that is mapped ++ * in a PGD of kbase_mmu_table. ++ * ++ * @old_page: Existing PGD page to remove ++ * @new_page: Destination for migrating the existing PGD page to ++ * ++ * Replace an existing PGD page with a new page by migrating its content. More specifically: ++ * the new page shall replace the existing PGD page in the MMU page table. Before returning, ++ * the new page shall be set as movable and not isolated, while the old page shall lose ++ * the movable property. The meta data attached to the PGD page is transferred to the ++ * new (replacement) page. ++ * ++ * Return: 0 on migration success, or -EAGAIN for a later retry. Otherwise it's a failure ++ * and the migration is aborted. ++ */ ++static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new_page) +{ -+ int res; ++ struct kbase_page_metadata *page_md = kbase_page_private(old_page); ++ struct kbase_context *kctx = page_md->data.pt_mapped.mmut->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ dma_addr_t old_dma_addr = page_md->dma_addr; ++ dma_addr_t new_dma_addr; ++ int ret; + -+ if (!kbase_fence_out_is_ours(katom)) { -+ /* Not our fence */ -+ return BASE_JD_EVENT_JOB_CANCELLED; -+ } ++ /* Create a new dma map for the new page */ ++ new_dma_addr = dma_map_page(kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); ++ if (dma_mapping_error(kbdev->dev, new_dma_addr)) ++ return -ENOMEM; + -+ res = kbase_fence_out_signal(katom, result); -+ if (unlikely(res < 0)) { -+ dev_warn(katom->kctx->kbdev->dev, -+ "fence_signal() failed with %d\n", res); -+ } ++ /* Lock context to protect access to the page in physical allocation. ++ * This blocks the CPU page fault handler from remapping pages. ++ * Only MCU's mmut is device wide, i.e. no corresponding kctx. ++ */ ++ kbase_gpu_vm_lock(kctx); + -+ kbase_sync_fence_out_remove(katom); ++ ret = kbase_mmu_migrate_page( ++ as_tagged(page_to_phys(old_page)), as_tagged(page_to_phys(new_page)), old_dma_addr, ++ new_dma_addr, PGD_VPFN_LEVEL_GET_LEVEL(page_md->data.pt_mapped.pgd_vpfn_level)); + -+ return (result != 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE; -+} ++ if (ret == 0) { ++ dma_unmap_page(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); ++ __ClearPageMovable(old_page); ++ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); ++ ClearPagePrivate(old_page); ++ put_page(old_page); + -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+static void kbase_fence_wait_callback(struct fence *fence, -+ struct fence_cb *cb) ++ page_md = kbase_page_private(new_page); ++#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) ++ __SetPageMovable(new_page, &movable_ops); ++ page_md->status = PAGE_MOVABLE_SET(page_md->status); +#else -+static void kbase_fence_wait_callback(struct dma_fence *fence, -+ struct dma_fence_cb *cb) ++ if (kbdev->mem_migrate.inode->i_mapping) { ++ __SetPageMovable(new_page, kbdev->mem_migrate.inode->i_mapping); ++ page_md->status = PAGE_MOVABLE_SET(page_md->status); ++ } +#endif -+{ -+ struct kbase_jd_atom *katom = container_of(cb, struct kbase_jd_atom, -+ dma_fence.fence_cb); -+ struct kbase_context *kctx = katom->kctx; -+ -+ /* Cancel atom if fence is erroneous */ -+ if (dma_fence_is_signaled(katom->dma_fence.fence_in) && -+#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \ -+ (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ -+ KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) -+ katom->dma_fence.fence_in->error < 0) -+#else -+ katom->dma_fence.fence_in->status < 0) -+#endif -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ SetPagePrivate(new_page); ++ get_page(new_page); ++ } else ++ dma_unmap_page(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + ++ /* Page fault handler for CPU mapping unblocked. */ ++ kbase_gpu_vm_unlock(kctx); + -+ /* To prevent a potential deadlock we schedule the work onto the -+ * job_done_wq workqueue -+ * -+ * The issue is that we may signal the timeline while holding -+ * kctx->jctx.lock and the callbacks are run synchronously from -+ * sync_timeline_signal. So we simply defer the work. -+ */ -+ INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); -+ queue_work(kctx->jctx.job_done_wq, &katom->work); ++ return ret; +} + -+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) ++/* ++ * kbasep_migrate_page_allocated_mapped - Migrate a memory page that is both ++ * allocated and mapped. ++ * ++ * @old_page: Page to remove. ++ * @new_page: Page to add. ++ * ++ * Replace an old page with a new page by migrating its content and all its ++ * CPU and GPU mappings. More specifically: the new page shall replace the ++ * old page in the MMU page table, as well as in the page array of the physical ++ * allocation, which is used to create CPU mappings. Before returning, the new ++ * page shall be set as movable and not isolated, while the old page shall lose ++ * the movable property. ++ */ ++static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct page *new_page) +{ -+ int err; -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ struct fence *fence; -+#else -+ struct dma_fence *fence; -+#endif -+ -+ lockdep_assert_held(&katom->kctx->jctx.lock); -+ -+ fence = katom->dma_fence.fence_in; -+ if (!fence) -+ return 0; /* no input fence to wait for, good to go! */ ++ struct kbase_page_metadata *page_md = kbase_page_private(old_page); ++ struct kbase_context *kctx = page_md->data.mapped.mmut->kctx; ++ dma_addr_t old_dma_addr, new_dma_addr; ++ int ret; + -+ err = dma_fence_add_callback(fence, &katom->dma_fence.fence_cb, -+ kbase_fence_wait_callback); -+ if (err == -ENOENT) { -+ int fence_status = dma_fence_get_status(fence); ++ old_dma_addr = page_md->dma_addr; ++ new_dma_addr = dma_map_page(kctx->kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); ++ if (dma_mapping_error(kctx->kbdev->dev, new_dma_addr)) ++ return -ENOMEM; + -+ if (fence_status == 1) { -+ /* Fence is already signaled with no error. The completion -+ * for FENCE_WAIT softjob can be done right away. -+ */ -+ return 0; -+ } ++ /* Lock context to protect access to array of pages in physical allocation. ++ * This blocks the CPU page fault handler from remapping pages. ++ */ ++ kbase_gpu_vm_lock(kctx); + -+ /* Fence shouldn't be in not signaled state */ -+ if (!fence_status) { -+ struct kbase_sync_fence_info info; ++ /* Unmap the old physical range. */ ++ unmap_mapping_range(kctx->filp->f_inode->i_mapping, page_md->data.mapped.vpfn << PAGE_SHIFT, ++ PAGE_SIZE, 1); + -+ kbase_sync_fence_in_info_get(katom, &info); ++ ret = kbase_mmu_migrate_page(as_tagged(page_to_phys(old_page)), ++ as_tagged(page_to_phys(new_page)), old_dma_addr, new_dma_addr, ++ MIDGARD_MMU_BOTTOMLEVEL); + -+ dev_warn(katom->kctx->kbdev->dev, -+ "Unexpected status for fence %s of ctx:%d_%d atom:%d", -+ info.name, katom->kctx->tgid, katom->kctx->id, -+ kbase_jd_atom_id(katom->kctx, katom)); -+ } ++ if (ret == 0) { ++ dma_unmap_page(kctx->kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + -+ /* If fence is signaled with an error, then the FENCE_WAIT softjob is -+ * considered to be failed. -+ */ -+ } ++ SetPagePrivate(new_page); ++ get_page(new_page); + -+ if (unlikely(err)) { -+ /* We should cause the dependent jobs in the bag to be failed. */ -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ /* Clear PG_movable from the old page and release reference. */ ++ ClearPagePrivate(old_page); ++ __ClearPageMovable(old_page); ++ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); ++ put_page(old_page); + -+ /* The completion for FENCE_WAIT softjob can be done right away. */ -+ return 0; -+ } ++ page_md = kbase_page_private(new_page); ++ /* Set PG_movable to the new page. */ ++#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) ++ __SetPageMovable(new_page, &movable_ops); ++ page_md->status = PAGE_MOVABLE_SET(page_md->status); ++#else ++ if (kctx->kbdev->mem_migrate.inode->i_mapping) { ++ __SetPageMovable(new_page, kctx->kbdev->mem_migrate.inode->i_mapping); ++ page_md->status = PAGE_MOVABLE_SET(page_md->status); ++ } ++#endif ++ } else ++ dma_unmap_page(kctx->kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + -+ /* Callback was successfully installed */ -+ katom->dma_fence.fence_cb_added = true; ++ /* Page fault handler for CPU mapping unblocked. */ ++ kbase_gpu_vm_unlock(kctx); + -+ /* Completion to be done later by callback/worker */ -+ return 1; ++ return ret; +} + -+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) ++/** ++ * kbase_page_isolate - Isolate a page for migration. ++ * ++ * @p: Pointer of the page struct of page to isolate. ++ * @mode: LRU Isolation modes. ++ * ++ * Callback function for Linux to isolate a page and prepare it for migration. ++ * ++ * Return: true on success, false otherwise. ++ */ ++static bool kbase_page_isolate(struct page *p, isolate_mode_t mode) +{ -+ lockdep_assert_held(&katom->kctx->jctx.lock); -+ -+ if (katom->dma_fence.fence_cb_added) { -+ if (!dma_fence_remove_callback(katom->dma_fence.fence_in, -+ &katom->dma_fence.fence_cb)) { -+ /* The callback is already removed so leave the cleanup -+ * for kbase_fence_wait_callback. -+ */ -+ return; -+ } -+ } else { -+ struct kbase_sync_fence_info info; -+ -+ kbase_sync_fence_in_info_get(katom, &info); -+ dev_warn(katom->kctx->kbdev->dev, -+ "Callback was not added earlier for fence %s of ctx:%d_%d atom:%d", -+ info.name, katom->kctx->tgid, katom->kctx->id, -+ kbase_jd_atom_id(katom->kctx, katom)); -+ } ++ bool status_mem_pool = false; ++ struct kbase_mem_pool *mem_pool = NULL; ++ struct kbase_page_metadata *page_md = kbase_page_private(p); + -+ /* Wait was cancelled - zap the atoms */ -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ CSTD_UNUSED(mode); + -+ kbasep_remove_waiting_soft_job(katom); -+ kbase_finish_soft_job(katom); ++ if (!page_md || !IS_PAGE_MOVABLE(page_md->status)) ++ return false; + -+ if (kbase_jd_done_nolock(katom, true)) -+ kbase_js_sched_all(katom->kctx->kbdev); -+} ++ if (!spin_trylock(&page_md->migrate_lock)) ++ return false; + -+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom) -+{ -+ kbase_fence_out_remove(katom); -+} ++ if (WARN_ON(IS_PAGE_ISOLATED(page_md->status))) { ++ spin_unlock(&page_md->migrate_lock); ++ return false; ++ } + -+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom) -+{ -+ lockdep_assert_held(&katom->kctx->jctx.lock); ++ switch (PAGE_STATUS_GET(page_md->status)) { ++ case MEM_POOL: ++ /* Prepare to remove page from memory pool later only if pool is not ++ * in the process of termination. ++ */ ++ mem_pool = page_md->data.mem_pool.pool; ++ status_mem_pool = true; ++ preempt_disable(); ++ atomic_inc(&mem_pool->isolation_in_progress_cnt); ++ break; ++ case ALLOCATED_MAPPED: ++ /* Mark the page into isolated state, but only if it has no ++ * kernel CPU mappings ++ */ ++ if (page_md->vmap_count == 0) ++ page_md->status = PAGE_ISOLATE_SET(page_md->status, 1); ++ break; ++ case PT_MAPPED: ++ /* Mark the page into isolated state. */ ++ page_md->status = PAGE_ISOLATE_SET(page_md->status, 1); ++ break; ++ case SPILL_IN_PROGRESS: ++ case ALLOCATE_IN_PROGRESS: ++ case FREE_IN_PROGRESS: ++ break; ++ case NOT_MOVABLE: ++ /* Opportunistically clear the movable property for these pages */ ++ __ClearPageMovable(p); ++ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); ++ break; ++ default: ++ /* State should always fall in one of the previous cases! ++ * Also notice that FREE_ISOLATED_IN_PROGRESS or ++ * FREE_PT_ISOLATED_IN_PROGRESS is impossible because ++ * that state only applies to pages that are already isolated. ++ */ ++ page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); ++ break; ++ } + -+ if (katom->dma_fence.fence_cb_added) { -+ bool removed = dma_fence_remove_callback(katom->dma_fence.fence_in, -+ &katom->dma_fence.fence_cb); ++ spin_unlock(&page_md->migrate_lock); + -+ /* Here it is expected that the callback should have already been removed -+ * previously either by kbase_sync_fence_in_cancel_wait() or when the fence -+ * was signaled and kbase_sync_fence_wait_worker() was called. -+ */ -+ if (removed) { -+ struct kbase_sync_fence_info info; ++ /* If the page is still in the memory pool: try to remove it. This will fail ++ * if pool lock is taken which could mean page no longer exists in pool. ++ */ ++ if (status_mem_pool) { ++ if (!spin_trylock(&mem_pool->pool_lock)) { ++ atomic_dec(&mem_pool->isolation_in_progress_cnt); ++ preempt_enable(); ++ return false; ++ } + -+ kbase_sync_fence_in_info_get(katom, &info); -+ dev_warn(katom->kctx->kbdev->dev, -+ "Callback was not removed earlier for fence %s of ctx:%d_%d atom:%d", -+ info.name, katom->kctx->tgid, katom->kctx->id, -+ kbase_jd_atom_id(katom->kctx, katom)); ++ spin_lock(&page_md->migrate_lock); ++ /* Check status again to ensure page has not been removed from memory pool. */ ++ if (PAGE_STATUS_GET(page_md->status) == MEM_POOL) { ++ page_md->status = PAGE_ISOLATE_SET(page_md->status, 1); ++ list_del_init(&p->lru); ++ mem_pool->cur_size--; + } ++ spin_unlock(&page_md->migrate_lock); ++ spin_unlock(&mem_pool->pool_lock); ++ atomic_dec(&mem_pool->isolation_in_progress_cnt); ++ preempt_enable(); + } + -+ kbase_fence_in_remove(katom); -+ katom->dma_fence.fence_cb_added = false; ++ return IS_PAGE_ISOLATED(page_md->status); +} -+#endif /* !MALI_USE_CSF */ + -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+void kbase_sync_fence_info_get(struct fence *fence, -+ struct kbase_sync_fence_info *info) ++/** ++ * kbase_page_migrate - Migrate content of old page to new page provided. ++ * ++ * @mapping: Pointer to address_space struct associated with pages. ++ * @new_page: Pointer to the page struct of new page. ++ * @old_page: Pointer to the page struct of old page. ++ * @mode: Mode to determine if migration will be synchronised. ++ * ++ * Callback function for Linux to migrate the content of the old page to the ++ * new page provided. ++ * ++ * Return: 0 on success, error code otherwise. ++ */ ++#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) ++static int kbase_page_migrate(struct address_space *mapping, struct page *new_page, ++ struct page *old_page, enum migrate_mode mode) +#else -+void kbase_sync_fence_info_get(struct dma_fence *fence, -+ struct kbase_sync_fence_info *info) ++static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum migrate_mode mode) +#endif +{ -+ info->fence = fence; -+ -+ /* Translate into the following status, with support for error handling: -+ * < 0 : error -+ * 0 : active -+ * 1 : signaled -+ */ -+ if (dma_fence_is_signaled(fence)) { -+#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \ -+ (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ -+ KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) -+ int status = fence->error; -+#else -+ int status = fence->status; -+#endif -+ if (status < 0) -+ info->status = status; /* signaled with error */ -+ else -+ info->status = 1; /* signaled with success */ -+ } else { -+ info->status = 0; /* still active (unsignaled) */ -+ } -+ -+#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) -+ scnprintf(info->name, sizeof(info->name), "%llu#%u", -+ fence->context, fence->seqno); -+#else -+ scnprintf(info->name, sizeof(info->name), "%llu#%llu", -+ fence->context, fence->seqno); -+#endif -+} ++ int err = 0; ++ bool status_mem_pool = false; ++ bool status_free_pt_isolated_in_progress = false; ++ bool status_free_isolated_in_progress = false; ++ bool status_pt_mapped = false; ++ bool status_mapped = false; ++ bool status_not_movable = false; ++ struct kbase_page_metadata *page_md = kbase_page_private(old_page); ++ struct kbase_device *kbdev = NULL; + -+#if !MALI_USE_CSF -+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, -+ struct kbase_sync_fence_info *info) -+{ -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ struct fence *fence; -+#else -+ struct dma_fence *fence; ++#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) ++ CSTD_UNUSED(mapping); +#endif ++ CSTD_UNUSED(mode); + -+ fence = kbase_fence_in_get(katom); -+ if (!fence) -+ return -ENOENT; -+ -+ kbase_sync_fence_info_get(fence, info); ++ if (!page_md || !IS_PAGE_MOVABLE(page_md->status)) ++ return -EINVAL; + -+ kbase_fence_put(fence); ++ if (!spin_trylock(&page_md->migrate_lock)) ++ return -EAGAIN; + -+ return 0; -+} ++ if (WARN_ON(!IS_PAGE_ISOLATED(page_md->status))) { ++ spin_unlock(&page_md->migrate_lock); ++ return -EINVAL; ++ } + -+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, -+ struct kbase_sync_fence_info *info) -+{ -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -+ struct fence *fence; -+#else -+ struct dma_fence *fence; -+#endif ++ switch (PAGE_STATUS_GET(page_md->status)) { ++ case MEM_POOL: ++ status_mem_pool = true; ++ kbdev = page_md->data.mem_pool.kbdev; ++ break; ++ case ALLOCATED_MAPPED: ++ status_mapped = true; ++ break; ++ case PT_MAPPED: ++ status_pt_mapped = true; ++ break; ++ case FREE_ISOLATED_IN_PROGRESS: ++ status_free_isolated_in_progress = true; ++ kbdev = page_md->data.free_isolated.kbdev; ++ break; ++ case FREE_PT_ISOLATED_IN_PROGRESS: ++ status_free_pt_isolated_in_progress = true; ++ kbdev = page_md->data.free_pt_isolated.kbdev; ++ break; ++ case NOT_MOVABLE: ++ status_not_movable = true; ++ break; ++ default: ++ /* State should always fall in one of the previous cases! */ ++ err = -EAGAIN; ++ break; ++ } + -+ fence = kbase_fence_out_get(katom); -+ if (!fence) -+ return -ENOENT; ++ spin_unlock(&page_md->migrate_lock); + -+ kbase_sync_fence_info_get(fence, info); ++ if (status_mem_pool || status_free_isolated_in_progress || ++ status_free_pt_isolated_in_progress) { ++ struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; + -+ kbase_fence_put(fence); ++ kbase_free_page_metadata(kbdev, old_page, NULL); ++ __ClearPageMovable(old_page); ++ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); ++ put_page(old_page); + -+ return 0; -+} ++ /* Just free new page to avoid lock contention. */ ++ INIT_LIST_HEAD(&new_page->lru); ++ get_page(new_page); ++ set_page_private(new_page, 0); ++ kbase_free_page_later(kbdev, new_page); ++ queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); ++ } else if (status_not_movable) { ++ err = -EINVAL; ++ } else if (status_mapped) { ++ err = kbasep_migrate_page_allocated_mapped(old_page, new_page); ++ } else if (status_pt_mapped) { ++ err = kbasep_migrate_page_pt_mapped(old_page, new_page); ++ } + ++ /* While we want to preserve the movability of pages for which we return ++ * EAGAIN, according to the kernel docs, movable pages for which a critical ++ * error is returned are called putback on, which may not be what we ++ * expect. ++ */ ++ if (err < 0 && err != -EAGAIN) { ++ __ClearPageMovable(old_page); ++ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); ++ } + -+#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG -+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom) -+{ -+ /* Not implemented */ ++ return err; +} -+#endif -+#endif /* !MALI_USE_CSF*/ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.c b/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.c -new file mode 100644 -index 000000000..7df7d79b6 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.c -@@ -0,0 +1,225 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+#include -+#include -+#include -+#include -+ -+/** -+ * struct kbase_dma_buf - Object instantiated when a dma-buf imported allocation -+ * is mapped to GPU for the first time within a process. -+ * Another instantiation is done for the case when that -+ * allocation is mapped for the first time to GPU. -+ * -+ * @dma_buf: Reference to dma_buf been imported. -+ * @dma_buf_node: Link node to maintain a rb_tree of kbase_dma_buf. -+ * @import_count: The number of times the dma_buf was imported. -+ */ -+struct kbase_dma_buf { -+ struct dma_buf *dma_buf; -+ struct rb_node dma_buf_node; -+ u32 import_count; -+}; + +/** -+ * kbase_delete_dma_buf_mapping - Delete a dma buffer mapping. -+ * -+ * @kctx: Pointer to kbase context. -+ * @dma_buf: Pointer to a dma buffer mapping. -+ * @tree: Pointer to root of rb_tree containing the dma_buf's mapped. ++ * kbase_page_putback - Return isolated page back to kbase. + * -+ * when we un-map any dma mapping we need to remove them from rb_tree, -+ * rb_tree is maintained at kbase_device level and kbase_process level -+ * by passing the root of kbase_device or kbase_process we can remove -+ * the node from the tree. ++ * @p: Pointer of the page struct of page. + * -+ * Return: true on success. ++ * Callback function for Linux to return isolated page back to kbase. This ++ * will only be called for a page that has been isolated but failed to ++ * migrate. This function will put back the given page to the state it was ++ * in before it was isolated. + */ -+static bool kbase_delete_dma_buf_mapping(struct kbase_context *kctx, -+ struct dma_buf *dma_buf, -+ struct rb_root *tree) ++static void kbase_page_putback(struct page *p) +{ -+ struct kbase_dma_buf *buf_node = NULL; -+ struct rb_node *node = tree->rb_node; -+ bool mapping_removed = false; -+ -+ lockdep_assert_held(&kctx->kbdev->dma_buf_lock); -+ -+ while (node) { -+ buf_node = rb_entry(node, struct kbase_dma_buf, dma_buf_node); -+ -+ if (dma_buf == buf_node->dma_buf) { -+ WARN_ON(!buf_node->import_count); -+ -+ buf_node->import_count--; ++ bool status_mem_pool = false; ++ bool status_free_isolated_in_progress = false; ++ bool status_free_pt_isolated_in_progress = false; ++ struct kbase_page_metadata *page_md = kbase_page_private(p); ++ struct kbase_device *kbdev = NULL; + -+ if (!buf_node->import_count) { -+ rb_erase(&buf_node->dma_buf_node, tree); -+ kfree(buf_node); -+ mapping_removed = true; -+ } ++ /* If we don't have page metadata, the page may not belong to the ++ * driver or may already have been freed, and there's nothing we can do ++ */ ++ if (!page_md) ++ return; + -+ break; -+ } ++ spin_lock(&page_md->migrate_lock); + -+ if (dma_buf < buf_node->dma_buf) -+ node = node->rb_left; -+ else -+ node = node->rb_right; ++ if (WARN_ON(!IS_PAGE_ISOLATED(page_md->status))) { ++ spin_unlock(&page_md->migrate_lock); ++ return; + } + -+ WARN_ON(!buf_node); -+ return mapping_removed; -+} ++ switch (PAGE_STATUS_GET(page_md->status)) { ++ case MEM_POOL: ++ status_mem_pool = true; ++ kbdev = page_md->data.mem_pool.kbdev; ++ break; ++ case ALLOCATED_MAPPED: ++ page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); ++ break; ++ case PT_MAPPED: ++ case NOT_MOVABLE: ++ /* Pages should no longer be isolated if they are in a stable state ++ * and used by the driver. ++ */ ++ page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); ++ break; ++ case FREE_ISOLATED_IN_PROGRESS: ++ status_free_isolated_in_progress = true; ++ kbdev = page_md->data.free_isolated.kbdev; ++ break; ++ case FREE_PT_ISOLATED_IN_PROGRESS: ++ status_free_pt_isolated_in_progress = true; ++ kbdev = page_md->data.free_pt_isolated.kbdev; ++ break; ++ default: ++ /* State should always fall in one of the previous cases! */ ++ break; ++ } + -+/** -+ * kbase_capture_dma_buf_mapping - capture a dma buffer mapping. -+ * -+ * @kctx: Pointer to kbase context. -+ * @dma_buf: Pointer to a dma buffer mapping. -+ * @root: Pointer to root of rb_tree containing the dma_buf's. -+ * -+ * We maintain a kbase_device level and kbase_process level rb_tree -+ * of all unique dma_buf's mapped to gpu memory. So when attach any -+ * dma_buf add it the rb_tree's. To add the unique mapping we need -+ * check if the mapping is not a duplicate and then add them. -+ * -+ * Return: true on success -+ */ -+static bool kbase_capture_dma_buf_mapping(struct kbase_context *kctx, -+ struct dma_buf *dma_buf, -+ struct rb_root *root) -+{ -+ struct kbase_dma_buf *buf_node = NULL; -+ struct rb_node *node = root->rb_node; -+ bool unique_buf_imported = true; ++ spin_unlock(&page_md->migrate_lock); + -+ lockdep_assert_held(&kctx->kbdev->dma_buf_lock); ++ /* If page was in a memory pool then just free it to avoid lock contention. The ++ * same is also true to status_free_pt_isolated_in_progress. ++ */ ++ if (status_mem_pool || status_free_isolated_in_progress || ++ status_free_pt_isolated_in_progress) { ++ __ClearPageMovable(p); ++ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); + -+ while (node) { -+ buf_node = rb_entry(node, struct kbase_dma_buf, dma_buf_node); ++ if (!WARN_ON_ONCE(!kbdev)) { ++ struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; + -+ if (dma_buf == buf_node->dma_buf) { -+ unique_buf_imported = false; -+ break; ++ kbase_free_page_later(kbdev, p); ++ queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); + } -+ -+ if (dma_buf < buf_node->dma_buf) -+ node = node->rb_left; -+ else -+ node = node->rb_right; + } ++} + -+ if (unique_buf_imported) { -+ struct kbase_dma_buf *new_buf_node = -+ kzalloc(sizeof(*new_buf_node), GFP_KERNEL); ++#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) ++static const struct movable_operations movable_ops = { ++ .isolate_page = kbase_page_isolate, ++ .migrate_page = kbase_page_migrate, ++ .putback_page = kbase_page_putback, ++}; ++#else ++static const struct address_space_operations kbase_address_space_ops = { ++ .isolate_page = kbase_page_isolate, ++ .migratepage = kbase_page_migrate, ++ .putback_page = kbase_page_putback, ++}; ++#endif + -+ if (new_buf_node == NULL) { -+ dev_err(kctx->kbdev->dev, "Error allocating memory for kbase_dma_buf\n"); -+ /* Dont account for it if we fail to allocate memory */ -+ unique_buf_imported = false; -+ } else { -+ struct rb_node **new = &(root->rb_node), *parent = NULL; ++#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) ++void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp) ++{ ++ mutex_lock(&kbdev->fw_load_lock); + -+ new_buf_node->dma_buf = dma_buf; -+ new_buf_node->import_count = 1; -+ while (*new) { -+ struct kbase_dma_buf *new_node; ++ if (filp) { ++ filp->f_inode->i_mapping->a_ops = &kbase_address_space_ops; + -+ parent = *new; -+ new_node = rb_entry(parent, struct kbase_dma_buf, -+ dma_buf_node); -+ if (dma_buf < new_node->dma_buf) -+ new = &(*new)->rb_left; -+ else -+ new = &(*new)->rb_right; -+ } -+ rb_link_node(&new_buf_node->dma_buf_node, parent, new); -+ rb_insert_color(&new_buf_node->dma_buf_node, root); ++ if (!kbdev->mem_migrate.inode) { ++ kbdev->mem_migrate.inode = filp->f_inode; ++ /* This reference count increment is balanced by iput() ++ * upon termination. ++ */ ++ atomic_inc(&filp->f_inode->i_count); ++ } else { ++ WARN_ON(kbdev->mem_migrate.inode != filp->f_inode); + } -+ } else if (!WARN_ON(!buf_node)) { -+ buf_node->import_count++; + } + -+ return unique_buf_imported; ++ mutex_unlock(&kbdev->fw_load_lock); +} ++#endif + -+void kbase_remove_dma_buf_usage(struct kbase_context *kctx, -+ struct kbase_mem_phy_alloc *alloc) ++void kbase_mem_migrate_init(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ bool dev_mapping_removed, prcs_mapping_removed; -+ -+ mutex_lock(&kbdev->dma_buf_lock); -+ -+ dev_mapping_removed = kbase_delete_dma_buf_mapping( -+ kctx, alloc->imported.umm.dma_buf, &kbdev->dma_buf_root); -+ -+ prcs_mapping_removed = kbase_delete_dma_buf_mapping( -+ kctx, alloc->imported.umm.dma_buf, &kctx->kprcs->dma_buf_root); -+ -+ WARN_ON(dev_mapping_removed && !prcs_mapping_removed); -+ -+ spin_lock(&kbdev->gpu_mem_usage_lock); -+ if (dev_mapping_removed) -+ kbdev->total_gpu_pages -= alloc->nents; ++ struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; + -+ if (prcs_mapping_removed) -+ kctx->kprcs->total_gpu_pages -= alloc->nents; ++ if (kbase_page_migration_enabled < 0) ++ kbase_page_migration_enabled = 0; + -+ if (dev_mapping_removed || prcs_mapping_removed) -+ kbase_trace_gpu_mem_usage(kbdev, kctx); -+ spin_unlock(&kbdev->gpu_mem_usage_lock); ++ spin_lock_init(&mem_migrate->free_pages_lock); ++ INIT_LIST_HEAD(&mem_migrate->free_pages_list); + -+ mutex_unlock(&kbdev->dma_buf_lock); ++#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) ++ mem_migrate->inode = NULL; ++#endif ++ mem_migrate->free_pages_workq = ++ alloc_workqueue("free_pages_workq", WQ_UNBOUND | WQ_MEM_RECLAIM, 1); ++ INIT_WORK(&mem_migrate->free_pages_work, kbase_free_pages_worker); +} + -+void kbase_add_dma_buf_usage(struct kbase_context *kctx, -+ struct kbase_mem_phy_alloc *alloc) ++void kbase_mem_migrate_term(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ bool unique_dev_dmabuf, unique_prcs_dmabuf; -+ -+ mutex_lock(&kbdev->dma_buf_lock); -+ -+ /* add dma_buf to device and process. */ -+ unique_dev_dmabuf = kbase_capture_dma_buf_mapping( -+ kctx, alloc->imported.umm.dma_buf, &kbdev->dma_buf_root); -+ -+ unique_prcs_dmabuf = kbase_capture_dma_buf_mapping( -+ kctx, alloc->imported.umm.dma_buf, &kctx->kprcs->dma_buf_root); -+ -+ WARN_ON(unique_dev_dmabuf && !unique_prcs_dmabuf); -+ -+ spin_lock(&kbdev->gpu_mem_usage_lock); -+ if (unique_dev_dmabuf) -+ kbdev->total_gpu_pages += alloc->nents; -+ -+ if (unique_prcs_dmabuf) -+ kctx->kprcs->total_gpu_pages += alloc->nents; -+ -+ if (unique_prcs_dmabuf || unique_dev_dmabuf) -+ kbase_trace_gpu_mem_usage(kbdev, kctx); -+ spin_unlock(&kbdev->gpu_mem_usage_lock); ++ struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; + -+ mutex_unlock(&kbdev->dma_buf_lock); ++ if (mem_migrate->free_pages_workq) ++ destroy_workqueue(mem_migrate->free_pages_workq); ++#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) ++ iput(mem_migrate->inode); ++#endif +} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.h b/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.h +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h new file mode 100644 -index 000000000..fd871fcb3 +index 000000000..76bbc999e --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.h -@@ -0,0 +1,100 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h +@@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -250243,142 +251893,103 @@ index 000000000..fd871fcb3 + * + */ + -+#ifndef _KBASE_TRACE_GPU_MEM_H_ -+#define _KBASE_TRACE_GPU_MEM_H_ -+ -+#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) -+#include -+#endif -+ -+#define DEVICE_TGID ((u32) 0U) -+ -+static void kbase_trace_gpu_mem_usage(struct kbase_device *kbdev, -+ struct kbase_context *kctx) -+{ -+#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) -+ lockdep_assert_held(&kbdev->gpu_mem_usage_lock); -+ -+ trace_gpu_mem_total(kbdev->id, DEVICE_TGID, -+ kbdev->total_gpu_pages << PAGE_SHIFT); -+ -+ if (likely(kctx)) -+ trace_gpu_mem_total(kbdev->id, kctx->kprcs->tgid, -+ kctx->kprcs->total_gpu_pages << PAGE_SHIFT); -+#endif -+} -+ -+static inline void kbase_trace_gpu_mem_usage_dec(struct kbase_device *kbdev, -+ struct kbase_context *kctx, size_t pages) -+{ -+ spin_lock(&kbdev->gpu_mem_usage_lock); -+ -+ if (likely(kctx)) -+ kctx->kprcs->total_gpu_pages -= pages; -+ -+ kbdev->total_gpu_pages -= pages; -+ -+ kbase_trace_gpu_mem_usage(kbdev, kctx); -+ -+ spin_unlock(&kbdev->gpu_mem_usage_lock); -+} ++/** ++ * DOC: Base kernel page migration implementation. ++ */ + -+static inline void kbase_trace_gpu_mem_usage_inc(struct kbase_device *kbdev, -+ struct kbase_context *kctx, size_t pages) -+{ -+ spin_lock(&kbdev->gpu_mem_usage_lock); ++#define PAGE_STATUS_MASK ((u8)0x3F) ++#define PAGE_STATUS_GET(status) (status & PAGE_STATUS_MASK) ++#define PAGE_STATUS_SET(status, value) ((status & ~PAGE_STATUS_MASK) | (value & PAGE_STATUS_MASK)) + -+ if (likely(kctx)) -+ kctx->kprcs->total_gpu_pages += pages; ++#define PAGE_ISOLATE_SHIFT (7) ++#define PAGE_ISOLATE_MASK ((u8)1 << PAGE_ISOLATE_SHIFT) ++#define PAGE_ISOLATE_SET(status, value) \ ++ ((status & ~PAGE_ISOLATE_MASK) | (value << PAGE_ISOLATE_SHIFT)) ++#define IS_PAGE_ISOLATED(status) ((bool)(status & PAGE_ISOLATE_MASK)) + -+ kbdev->total_gpu_pages += pages; ++#define PAGE_MOVABLE_SHIFT (6) ++#define PAGE_MOVABLE_MASK ((u8)1 << PAGE_MOVABLE_SHIFT) ++#define PAGE_MOVABLE_CLEAR(status) ((status) & ~PAGE_MOVABLE_MASK) ++#define PAGE_MOVABLE_SET(status) (status | PAGE_MOVABLE_MASK) + -+ kbase_trace_gpu_mem_usage(kbdev, kctx); ++#define IS_PAGE_MOVABLE(status) ((bool)(status & PAGE_MOVABLE_MASK)) + -+ spin_unlock(&kbdev->gpu_mem_usage_lock); -+} ++/* Global integer used to determine if module parameter value has been ++ * provided and if page migration feature is enabled. ++ */ ++extern int kbase_page_migration_enabled; + +/** -+ * kbase_remove_dma_buf_usage - Remove a dma-buf entry captured. ++ * kbase_alloc_page_metadata - Allocate and initialize page metadata ++ * @kbdev: Pointer to kbase device. ++ * @p: Page to assign metadata to. ++ * @dma_addr: DMA address mapped to paged. ++ * @group_id: Memory group ID associated with the entity that is ++ * allocating the page metadata. + * -+ * @kctx: Pointer to the kbase context -+ * @alloc: Pointer to the alloc to unmap ++ * This will allocate memory for the page's metadata, initialize it and ++ * assign a reference to the page's private field. Importantly, once ++ * the metadata is set and ready this function will mark the page as ++ * movable. + * -+ * Remove reference to dma buf been unmapped from kbase_device level -+ * rb_tree and Kbase_process level dma buf rb_tree. ++ * Return: true if successful or false otherwise. + */ -+void kbase_remove_dma_buf_usage(struct kbase_context *kctx, -+ struct kbase_mem_phy_alloc *alloc); ++bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr, ++ u8 group_id); + +/** -+ * kbase_add_dma_buf_usage - Add a dma-buf entry captured. -+ * -+ * @kctx: Pointer to the kbase context -+ * @alloc: Pointer to the alloc to map in ++ * kbase_free_page_later - Defer freeing of given page. ++ * @kbdev: Pointer to kbase device ++ * @p: Page to free + * -+ * Add reference to dma buf been mapped to kbase_device level -+ * rb_tree and Kbase_process level dma buf rb_tree. ++ * This will add given page to a list of pages which will be freed at ++ * a later time. + */ -+void kbase_add_dma_buf_usage(struct kbase_context *kctx, -+ struct kbase_mem_phy_alloc *alloc); ++void kbase_free_page_later(struct kbase_device *kbdev, struct page *p); + -+#endif /* _KBASE_TRACE_GPU_MEM_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_utility.h b/drivers/gpu/arm/bifrost/mali_kbase_utility.h -new file mode 100644 -index 000000000..bd66f7167 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_utility.h -@@ -0,0 +1,41 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) +/* ++ * kbase_mem_migrate_set_address_space_ops - Set address space operations + * -+ * (C) COPYRIGHT 2012-2013, 2015, 2018, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * @kbdev: Pointer to object representing an instance of GPU platform device. ++ * @filp: Pointer to the struct file corresponding to device file ++ * /dev/malixx instance, passed to the file's open method. + * ++ * Assign address space operations to the given file struct @filp and ++ * add a reference to @kbdev. + */ -+ -+#ifndef _KBASE_UTILITY_H -+#define _KBASE_UTILITY_H -+ -+#ifndef _KBASE_H_ -+#error "Don't include this file directly, use mali_kbase.h instead" -+#endif -+ -+#ifndef WRITE_ONCE -+ #ifdef ASSIGN_ONCE -+ #define WRITE_ONCE(x, val) ASSIGN_ONCE(val, x) -+ #else -+ #define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val)) -+ #endif ++void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp); +#endif + -+#ifndef READ_ONCE -+ #define READ_ONCE(x) ACCESS_ONCE(x) -+#endif ++/* ++ * kbase_mem_migrate_init - Initialise kbase page migration ++ * ++ * @kbdev: Pointer to kbase device ++ * ++ * Enables page migration by default based on GPU and setup work queue to ++ * defer freeing pages during page migration callbacks. ++ */ ++void kbase_mem_migrate_init(struct kbase_device *kbdev); + -+#endif /* _KBASE_UTILITY_H */ -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c ++/* ++ * kbase_mem_migrate_term - Terminate kbase page migration ++ * ++ * @kbdev: Pointer to kbase device ++ * ++ * This will flush any work left to free pages from page migration ++ * and destroy workqueue associated. ++ */ ++void kbase_mem_migrate_term(struct kbase_device *kbdev); +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c new file mode 100644 -index 000000000..d770913e9 +index 000000000..fa8f34d86 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c -@@ -0,0 +1,1132 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c +@@ -0,0 +1,1029 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -250396,1223 +252007,1024 @@ index 000000000..d770913e9 + * + */ + -+#include "mali_kbase_vinstr.h" -+#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" -+#include "hwcnt/mali_kbase_hwcnt_types.h" -+#include -+#include "hwcnt/mali_kbase_hwcnt_gpu.h" -+#include "hwcnt/mali_kbase_hwcnt_gpu_narrow.h" -+#include -+#include "mali_malisw.h" -+#include "mali_kbase_debug.h" -+ -+#include -+#include -+#include -+#include -+#include ++#include +#include -+#include -+#include -+#include -+#include -+#include -+ -+/* Explicitly include epoll header for old kernels. Not required from 4.16. */ -+#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE -+#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE ++#include ++#else ++#include +#endif + -+/* Hwcnt reader API version */ -+#define HWCNT_READER_API 1 -+ -+/* The minimum allowed interval between dumps (equivalent to 10KHz) */ -+#define DUMP_INTERVAL_MIN_NS (100 * NSEC_PER_USEC) -+ -+/* The maximum allowed buffers per client */ -+#define MAX_BUFFER_COUNT 32 ++#define pool_dbg(pool, format, ...) \ ++ dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format, \ ++ (pool->next_pool) ? "kctx" : "kbdev", \ ++ kbase_mem_pool_size(pool), \ ++ kbase_mem_pool_max_size(pool), \ ++ ##__VA_ARGS__) + -+/** -+ * struct kbase_vinstr_context - IOCTL interface for userspace hardware -+ * counters. -+ * @hvirt: Hardware counter virtualizer used by vinstr. -+ * @metadata: Hardware counter metadata provided by virtualizer. -+ * @metadata_user: API compatible hardware counter metadata provided by vinstr. -+ * For compatibility with the user driver interface, this -+ * contains a narrowed version of the HWCNT metadata limited -+ * to 64 entries per block of 32 bits each. -+ * @lock: Lock protecting all vinstr state. -+ * @suspend_count: Suspend reference count. If non-zero, timer and worker are -+ * prevented from being re-scheduled. -+ * @client_count: Number of vinstr clients. -+ * @clients: List of vinstr clients. -+ * @dump_timer: Timer that enqueues dump_work to a workqueue. -+ * @dump_work: Worker for performing periodic counter dumps. -+ */ -+struct kbase_vinstr_context { -+ struct kbase_hwcnt_virtualizer *hvirt; -+ const struct kbase_hwcnt_metadata *metadata; -+ const struct kbase_hwcnt_metadata_narrow *metadata_user; -+ struct mutex lock; -+ size_t suspend_count; -+ size_t client_count; -+ struct list_head clients; -+ struct hrtimer dump_timer; -+ struct work_struct dump_work; -+}; ++#define NOT_DIRTY false ++#define NOT_RECLAIMED false + +/** -+ * struct kbase_vinstr_client - A vinstr client attached to a vinstr context. -+ * @vctx: Vinstr context client is attached to. -+ * @hvcli: Hardware counter virtualizer client. -+ * @node: Node used to attach this client to list in vinstr -+ * context. -+ * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic -+ * client. -+ * @next_dump_time_ns: Time in ns when this client's next periodic dump must -+ * occur. If 0, not a periodic client. -+ * @enable_map: Counters enable map. -+ * @tmp_buf: Temporary buffer to use before handing dump to client. -+ * @dump_bufs: Array of narrow dump buffers allocated by this client. -+ * @dump_bufs_meta: Metadata of hwcnt reader client buffers. -+ * @meta_idx: Index of metadata being accessed by userspace. -+ * @read_idx: Index of buffer read by userspace. -+ * @write_idx: Index of buffer being written by dump worker. -+ * @waitq: Client's notification queue. ++ * can_alloc_page() - Check if the current thread can allocate a physical page ++ * ++ * @pool: Pointer to the memory pool. ++ * @page_owner: Pointer to the task/process that created the Kbase context ++ * for which a page needs to be allocated. It can be NULL if ++ * the page won't be associated with Kbase context. ++ * @alloc_from_kthread: Flag indicating that the current thread is a kernel thread. ++ * ++ * This function checks if the current thread is a kernel thread and can make a ++ * request to kernel to allocate a physical page. If the kernel thread is allocating ++ * a page for the Kbase context and the process that created the context is exiting ++ * or is being killed, then there is no point in doing a page allocation. ++ * ++ * The check done by the function is particularly helpful when the system is running ++ * low on memory. When a page is allocated from the context of a kernel thread, OoM ++ * killer doesn't consider the kernel thread for killing and kernel keeps retrying ++ * to allocate the page as long as the OoM killer is able to kill processes. ++ * The check allows kernel thread to quickly exit the page allocation loop once OoM ++ * killer has initiated the killing of @page_owner, thereby unblocking the context ++ * termination for @page_owner and freeing of GPU memory allocated by it. This helps ++ * in preventing the kernel panic and also limits the number of innocent processes ++ * that get killed. ++ * ++ * Return: true if the page can be allocated otherwise false. + */ -+struct kbase_vinstr_client { -+ struct kbase_vinstr_context *vctx; -+ struct kbase_hwcnt_virtualizer_client *hvcli; -+ struct list_head node; -+ u64 next_dump_time_ns; -+ u32 dump_interval_ns; -+ struct kbase_hwcnt_enable_map enable_map; -+ struct kbase_hwcnt_dump_buffer tmp_buf; -+ struct kbase_hwcnt_dump_buffer_narrow_array dump_bufs; -+ struct kbase_hwcnt_reader_metadata *dump_bufs_meta; -+ atomic_t meta_idx; -+ atomic_t read_idx; -+ atomic_t write_idx; -+ wait_queue_head_t waitq; -+}; -+ -+static __poll_t kbasep_vinstr_hwcnt_reader_poll(struct file *filp, poll_table *wait); ++static inline bool can_alloc_page(struct kbase_mem_pool *pool, struct task_struct *page_owner, ++ const bool alloc_from_kthread) ++{ ++ if (likely(!alloc_from_kthread || !page_owner)) ++ return true; + -+static long kbasep_vinstr_hwcnt_reader_ioctl( -+ struct file *filp, -+ unsigned int cmd, -+ unsigned long arg); ++ if ((page_owner->flags & PF_EXITING) || fatal_signal_pending(page_owner)) { ++ dev_info(pool->kbdev->dev, "%s : Process %s/%d exiting", ++ __func__, page_owner->comm, task_pid_nr(page_owner)); ++ return false; ++ } + -+static int kbasep_vinstr_hwcnt_reader_mmap( -+ struct file *filp, -+ struct vm_area_struct *vma); ++ return true; ++} + -+static int kbasep_vinstr_hwcnt_reader_release( -+ struct inode *inode, -+ struct file *filp); ++static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool) ++{ ++ ssize_t max_size = kbase_mem_pool_max_size(pool); ++ ssize_t cur_size = kbase_mem_pool_size(pool); + -+/* Vinstr client file operations */ -+static const struct file_operations vinstr_client_fops = { -+ .owner = THIS_MODULE, -+ .poll = kbasep_vinstr_hwcnt_reader_poll, -+ .unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, -+ .compat_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, -+ .mmap = kbasep_vinstr_hwcnt_reader_mmap, -+ .release = kbasep_vinstr_hwcnt_reader_release, -+}; ++ return max(max_size - cur_size, (ssize_t)0); ++} + -+/** -+ * kbasep_vinstr_timestamp_ns() - Get the current time in nanoseconds. -+ * -+ * Return: Current time in nanoseconds. -+ */ -+static u64 kbasep_vinstr_timestamp_ns(void) ++static bool kbase_mem_pool_is_full(struct kbase_mem_pool *pool) +{ -+ return ktime_get_raw_ns(); ++ return kbase_mem_pool_size(pool) >= kbase_mem_pool_max_size(pool); +} + -+/** -+ * kbasep_vinstr_next_dump_time_ns() - Calculate the next periodic dump time. -+ * @cur_ts_ns: Current time in nanoseconds. -+ * @interval: Interval between dumps in nanoseconds. -+ * -+ * Return: 0 if interval is 0 (i.e. a non-periodic client), or the next dump -+ * time that occurs after cur_ts_ns. -+ */ -+static u64 kbasep_vinstr_next_dump_time_ns(u64 cur_ts_ns, u32 interval) ++static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool) +{ -+ /* Non-periodic client */ -+ if (interval == 0) -+ return 0; -+ -+ /* -+ * Return the next interval after the current time relative to t=0. -+ * This means multiple clients with the same period will synchronise, -+ * regardless of when they were started, allowing the worker to be -+ * scheduled less frequently. -+ */ -+ do_div(cur_ts_ns, interval); -+ return (cur_ts_ns + 1) * interval; ++ return kbase_mem_pool_size(pool) == 0; +} + -+/** -+ * kbasep_vinstr_client_dump() - Perform a dump for a client. -+ * @vcli: Non-NULL pointer to a vinstr client. -+ * @event_id: Event type that triggered the dump. -+ * -+ * Return: 0 on success, else error code. -+ */ -+static int kbasep_vinstr_client_dump( -+ struct kbase_vinstr_client *vcli, -+ enum base_hwcnt_reader_event event_id) ++static bool set_pool_new_page_metadata(struct kbase_mem_pool *pool, struct page *p, ++ struct list_head *page_list, size_t *list_size) +{ -+ int errcode; -+ u64 ts_start_ns; -+ u64 ts_end_ns; -+ unsigned int write_idx; -+ unsigned int read_idx; -+ struct kbase_hwcnt_dump_buffer *tmp_buf; -+ struct kbase_hwcnt_dump_buffer_narrow *dump_buf; -+ struct kbase_hwcnt_reader_metadata *meta; -+ u8 clk_cnt; ++ struct kbase_page_metadata *page_md = kbase_page_private(p); ++ bool not_movable = false; + -+ WARN_ON(!vcli); -+ lockdep_assert_held(&vcli->vctx->lock); ++ lockdep_assert_held(&pool->pool_lock); + -+ write_idx = atomic_read(&vcli->write_idx); -+ read_idx = atomic_read(&vcli->read_idx); ++ /* Free the page instead of adding it to the pool if it's not movable. ++ * Only update page status and add the page to the memory pool if ++ * it is not isolated. ++ */ ++ spin_lock(&page_md->migrate_lock); ++ if (PAGE_STATUS_GET(page_md->status) == (u8)NOT_MOVABLE) { ++ not_movable = true; ++ } else if (!WARN_ON_ONCE(IS_PAGE_ISOLATED(page_md->status))) { ++ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)MEM_POOL); ++ page_md->data.mem_pool.pool = pool; ++ page_md->data.mem_pool.kbdev = pool->kbdev; ++ list_add(&p->lru, page_list); ++ (*list_size)++; ++ } ++ spin_unlock(&page_md->migrate_lock); + -+ /* Check if there is a place to copy HWC block into. */ -+ if (write_idx - read_idx == vcli->dump_bufs.buf_cnt) -+ return -EBUSY; -+ write_idx %= vcli->dump_bufs.buf_cnt; ++ if (not_movable) { ++ kbase_free_page_later(pool->kbdev, p); ++ pool_dbg(pool, "skipping a not movable page\n"); ++ } + -+ dump_buf = &vcli->dump_bufs.bufs[write_idx]; -+ meta = &vcli->dump_bufs_meta[write_idx]; -+ tmp_buf = &vcli->tmp_buf; ++ return not_movable; ++} + -+ errcode = kbase_hwcnt_virtualizer_client_dump( -+ vcli->hvcli, &ts_start_ns, &ts_end_ns, tmp_buf); -+ if (errcode) -+ return errcode; ++static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool, ++ struct page *p) ++{ ++ bool queue_work_to_free = false; + -+ /* Patch the dump buf headers, to hide the counters that other hwcnt -+ * clients are using. -+ */ -+ kbase_hwcnt_gpu_patch_dump_headers(tmp_buf, &vcli->enable_map); ++ lockdep_assert_held(&pool->pool_lock); + -+ /* Copy the temp buffer to the userspace visible buffer. The strict -+ * variant will explicitly zero any non-enabled counters to ensure -+ * nothing except exactly what the user asked for is made visible. -+ * -+ * A narrow copy is required since virtualizer has a bigger buffer -+ * but user only needs part of it. -+ */ -+ kbase_hwcnt_dump_buffer_copy_strict_narrow(dump_buf, tmp_buf, -+ &vcli->enable_map); ++ if (!pool->order && kbase_page_migration_enabled) { ++ if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size)) ++ queue_work_to_free = true; ++ } else { ++ list_add(&p->lru, &pool->page_list); ++ pool->cur_size++; ++ } + -+ clk_cnt = vcli->vctx->metadata->clk_cnt; ++ if (queue_work_to_free) { ++ struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate; + -+ meta->timestamp = ts_end_ns; -+ meta->event_id = event_id; -+ meta->buffer_idx = write_idx; -+ meta->cycles.top = (clk_cnt > 0) ? dump_buf->clk_cnt_buf[0] : 0; -+ meta->cycles.shader_cores = -+ (clk_cnt > 1) ? dump_buf->clk_cnt_buf[1] : 0; ++ queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); ++ } + -+ /* Notify client. Make sure all changes to memory are visible. */ -+ wmb(); -+ atomic_inc(&vcli->write_idx); -+ wake_up_interruptible(&vcli->waitq); -+ return 0; ++ pool_dbg(pool, "added page\n"); +} + -+/** -+ * kbasep_vinstr_client_clear() - Reset all the client's counters to zero. -+ * @vcli: Non-NULL pointer to a vinstr client. -+ * -+ * Return: 0 on success, else error code. -+ */ -+static int kbasep_vinstr_client_clear(struct kbase_vinstr_client *vcli) ++static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p) +{ -+ u64 ts_start_ns; -+ u64 ts_end_ns; -+ -+ WARN_ON(!vcli); -+ lockdep_assert_held(&vcli->vctx->lock); -+ -+ /* A virtualizer dump with a NULL buffer will just clear the virtualizer -+ * client's buffer. -+ */ -+ return kbase_hwcnt_virtualizer_client_dump( -+ vcli->hvcli, &ts_start_ns, &ts_end_ns, NULL); ++ kbase_mem_pool_lock(pool); ++ kbase_mem_pool_add_locked(pool, p); ++ kbase_mem_pool_unlock(pool); +} + -+/** -+ * kbasep_vinstr_reschedule_worker() - Update next dump times for all periodic -+ * vinstr clients, then reschedule the dump -+ * worker appropriately. -+ * @vctx: Non-NULL pointer to the vinstr context. -+ * -+ * If there are no periodic clients, then the dump worker will not be -+ * rescheduled. Else, the dump worker will be rescheduled for the next periodic -+ * client dump. -+ */ -+static void kbasep_vinstr_reschedule_worker(struct kbase_vinstr_context *vctx) ++static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool, ++ struct list_head *page_list, size_t nr_pages) +{ -+ u64 cur_ts_ns; -+ u64 earliest_next_ns = U64_MAX; -+ struct kbase_vinstr_client *pos; ++ bool queue_work_to_free = false; + -+ WARN_ON(!vctx); -+ lockdep_assert_held(&vctx->lock); ++ lockdep_assert_held(&pool->pool_lock); + -+ cur_ts_ns = kbasep_vinstr_timestamp_ns(); ++ if (!pool->order && kbase_page_migration_enabled) { ++ struct page *p, *tmp; + -+ /* -+ * Update each client's next dump time, and find the earliest next -+ * dump time if any of the clients have a non-zero interval. -+ */ -+ list_for_each_entry(pos, &vctx->clients, node) { -+ const u64 cli_next_ns = -+ kbasep_vinstr_next_dump_time_ns( -+ cur_ts_ns, pos->dump_interval_ns); ++ list_for_each_entry_safe(p, tmp, page_list, lru) { ++ list_del_init(&p->lru); ++ if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size)) ++ queue_work_to_free = true; ++ } ++ } else { ++ list_splice(page_list, &pool->page_list); ++ pool->cur_size += nr_pages; ++ } + -+ /* Non-zero next dump time implies a periodic client */ -+ if ((cli_next_ns != 0) && (cli_next_ns < earliest_next_ns)) -+ earliest_next_ns = cli_next_ns; ++ if (queue_work_to_free) { ++ struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate; + -+ pos->next_dump_time_ns = cli_next_ns; ++ queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); + } + -+ /* Cancel the timer if it is already pending */ -+ hrtimer_cancel(&vctx->dump_timer); ++ pool_dbg(pool, "added %zu pages\n", nr_pages); ++} + -+ /* Start the timer if there are periodic clients and vinstr is not -+ * suspended. -+ */ -+ if ((earliest_next_ns != U64_MAX) && -+ (vctx->suspend_count == 0) && -+ !WARN_ON(earliest_next_ns < cur_ts_ns)) -+ hrtimer_start( -+ &vctx->dump_timer, -+ ns_to_ktime(earliest_next_ns - cur_ts_ns), -+ HRTIMER_MODE_REL); ++static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool, ++ struct list_head *page_list, size_t nr_pages) ++{ ++ kbase_mem_pool_lock(pool); ++ kbase_mem_pool_add_list_locked(pool, page_list, nr_pages); ++ kbase_mem_pool_unlock(pool); +} + -+/** -+ * kbasep_vinstr_dump_worker()- Dump worker, that dumps all periodic clients -+ * that need to be dumped, then reschedules itself. -+ * @work: Work structure. -+ */ -+static void kbasep_vinstr_dump_worker(struct work_struct *work) ++static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool, ++ enum kbase_page_status status) +{ -+ struct kbase_vinstr_context *vctx = -+ container_of(work, struct kbase_vinstr_context, dump_work); -+ struct kbase_vinstr_client *pos; -+ u64 cur_time_ns; ++ struct page *p; + -+ mutex_lock(&vctx->lock); ++ lockdep_assert_held(&pool->pool_lock); + -+ cur_time_ns = kbasep_vinstr_timestamp_ns(); ++ if (kbase_mem_pool_is_empty(pool)) ++ return NULL; + -+ /* Dump all periodic clients whose next dump time is before the current -+ * time. -+ */ -+ list_for_each_entry(pos, &vctx->clients, node) { -+ if ((pos->next_dump_time_ns != 0) && -+ (pos->next_dump_time_ns < cur_time_ns)) -+ kbasep_vinstr_client_dump( -+ pos, BASE_HWCNT_READER_EVENT_PERIODIC); ++ p = list_first_entry(&pool->page_list, struct page, lru); ++ ++ if (!pool->order && kbase_page_migration_enabled) { ++ struct kbase_page_metadata *page_md = kbase_page_private(p); ++ ++ spin_lock(&page_md->migrate_lock); ++ WARN_ON(PAGE_STATUS_GET(page_md->status) != (u8)MEM_POOL); ++ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)status); ++ spin_unlock(&page_md->migrate_lock); + } + -+ /* Update the next dump times of all periodic clients, then reschedule -+ * this worker at the earliest next dump time. -+ */ -+ kbasep_vinstr_reschedule_worker(vctx); ++ list_del_init(&p->lru); ++ pool->cur_size--; + -+ mutex_unlock(&vctx->lock); ++ pool_dbg(pool, "removed page\n"); ++ ++ return p; +} + -+/** -+ * kbasep_vinstr_dump_timer() - Dump timer that schedules the dump worker for -+ * execution as soon as possible. -+ * @timer: Timer structure. -+ * -+ * Return: HRTIMER_NORESTART always. -+ */ -+static enum hrtimer_restart kbasep_vinstr_dump_timer(struct hrtimer *timer) ++static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool, ++ enum kbase_page_status status) +{ -+ struct kbase_vinstr_context *vctx = -+ container_of(timer, struct kbase_vinstr_context, dump_timer); ++ struct page *p; + -+ /* We don't need to check vctx->suspend_count here, as the suspend -+ * function will ensure that any worker enqueued here is immediately -+ * cancelled, and the worker itself won't reschedule this timer if -+ * suspend_count != 0. -+ */ -+ kbase_hwcnt_virtualizer_queue_work(vctx->hvirt, &vctx->dump_work); -+ return HRTIMER_NORESTART; ++ kbase_mem_pool_lock(pool); ++ p = kbase_mem_pool_remove_locked(pool, status); ++ kbase_mem_pool_unlock(pool); ++ ++ return p; +} + -+/** -+ * kbasep_vinstr_client_destroy() - Destroy a vinstr client. -+ * @vcli: vinstr client. Must not be attached to a vinstr context. -+ */ -+static void kbasep_vinstr_client_destroy(struct kbase_vinstr_client *vcli) ++static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool, ++ struct page *p) +{ -+ if (!vcli) -+ return; ++ struct device *dev = pool->kbdev->dev; ++ dma_addr_t dma_addr = pool->order ? kbase_dma_addr_as_priv(p) : kbase_dma_addr(p); + -+ kbase_hwcnt_virtualizer_client_destroy(vcli->hvcli); -+ kfree(vcli->dump_bufs_meta); -+ kbase_hwcnt_dump_buffer_narrow_array_free(&vcli->dump_bufs); -+ kbase_hwcnt_dump_buffer_free(&vcli->tmp_buf); -+ kbase_hwcnt_enable_map_free(&vcli->enable_map); -+ kfree(vcli); ++ dma_sync_single_for_device(dev, dma_addr, (PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL); +} + -+/** -+ * kbasep_vinstr_client_create() - Create a vinstr client. Does not attach to -+ * the vinstr context. -+ * @vctx: Non-NULL pointer to vinstr context. -+ * @setup: Non-NULL pointer to hardware counter ioctl setup structure. -+ * setup->buffer_count must not be 0 and must be a power of 2. -+ * @out_vcli: Non-NULL pointer to where created client will be stored on -+ * success. -+ * -+ * Return: 0 on success, else error code. -+ */ -+static int kbasep_vinstr_client_create( -+ struct kbase_vinstr_context *vctx, -+ struct kbase_ioctl_hwcnt_reader_setup *setup, -+ struct kbase_vinstr_client **out_vcli) ++static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool, ++ struct page *p) +{ -+ int errcode; -+ struct kbase_vinstr_client *vcli; -+ struct kbase_hwcnt_physical_enable_map phys_em; -+ -+ WARN_ON(!vctx); -+ WARN_ON(!setup); -+ WARN_ON(setup->buffer_count == 0); -+ WARN_ON(!is_power_of_2(setup->buffer_count)); ++ int i; + -+ vcli = kzalloc(sizeof(*vcli), GFP_KERNEL); -+ if (!vcli) -+ return -ENOMEM; ++ for (i = 0; i < (1U << pool->order); i++) ++ clear_highpage(p+i); + -+ vcli->vctx = vctx; ++ kbase_mem_pool_sync_page(pool, p); ++} + -+ errcode = kbase_hwcnt_enable_map_alloc( -+ vctx->metadata, &vcli->enable_map); -+ if (errcode) -+ goto error; ++static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool, ++ struct page *p) ++{ ++ /* Zero page before spilling */ ++ kbase_mem_pool_zero_page(next_pool, p); + -+ phys_em.fe_bm = setup->fe_bm; -+ phys_em.shader_bm = setup->shader_bm; -+ phys_em.tiler_bm = setup->tiler_bm; -+ phys_em.mmu_l2_bm = setup->mmu_l2_bm; -+ kbase_hwcnt_gpu_enable_map_from_physical(&vcli->enable_map, &phys_em); ++ kbase_mem_pool_add(next_pool, p); ++} + -+ /* Use virtualizer's metadata to alloc tmp buffer which interacts with -+ * the HWC virtualizer. -+ */ -+ errcode = kbase_hwcnt_dump_buffer_alloc(vctx->metadata, &vcli->tmp_buf); -+ if (errcode) -+ goto error; ++struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) ++{ ++ struct page *p; ++ gfp_t gfp = __GFP_ZERO; ++ struct kbase_device *const kbdev = pool->kbdev; ++ struct device *const dev = kbdev->dev; ++ dma_addr_t dma_addr; ++ int i; + -+ /* Enable all the available clk_enable_map. */ -+ vcli->enable_map.clk_enable_map = (1ull << vctx->metadata->clk_cnt) - 1; ++ /* don't warn on higher order failures */ ++ if (pool->order) ++ gfp |= GFP_HIGHUSER | __GFP_NOWARN; ++ else ++ gfp |= kbase_page_migration_enabled ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER; + -+ /* Use vinstr's narrowed metadata to alloc narrow dump buffers which -+ * interact with clients. -+ */ -+ errcode = kbase_hwcnt_dump_buffer_narrow_array_alloc( -+ vctx->metadata_user, setup->buffer_count, &vcli->dump_bufs); -+ if (errcode) -+ goto error; ++ p = kbdev->mgm_dev->ops.mgm_alloc_page(kbdev->mgm_dev, ++ pool->group_id, gfp, pool->order); ++ if (!p) ++ return NULL; + -+ errcode = -ENOMEM; -+ vcli->dump_bufs_meta = kmalloc_array( -+ setup->buffer_count, sizeof(*vcli->dump_bufs_meta), GFP_KERNEL); -+ if (!vcli->dump_bufs_meta) -+ goto error; ++ dma_addr = dma_map_page(dev, p, 0, (PAGE_SIZE << pool->order), ++ DMA_BIDIRECTIONAL); + -+ errcode = kbase_hwcnt_virtualizer_client_create( -+ vctx->hvirt, &vcli->enable_map, &vcli->hvcli); -+ if (errcode) -+ goto error; ++ if (dma_mapping_error(dev, dma_addr)) { ++ kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, ++ pool->group_id, p, pool->order); ++ return NULL; ++ } + -+ init_waitqueue_head(&vcli->waitq); ++ /* Setup page metadata for 4KB pages when page migration is enabled */ ++ if (!pool->order && kbase_page_migration_enabled) { ++ INIT_LIST_HEAD(&p->lru); ++ if (!kbase_alloc_page_metadata(kbdev, p, dma_addr, pool->group_id)) { ++ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); ++ kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, pool->group_id, p, ++ pool->order); ++ return NULL; ++ } ++ } else { ++ WARN_ON(dma_addr != page_to_phys(p)); ++ for (i = 0; i < (1u << pool->order); i++) ++ kbase_set_dma_addr_as_priv(p + i, dma_addr + PAGE_SIZE * i); ++ } + -+ *out_vcli = vcli; -+ return 0; -+error: -+ kbasep_vinstr_client_destroy(vcli); -+ return errcode; ++ return p; +} + -+int kbase_vinstr_init( -+ struct kbase_hwcnt_virtualizer *hvirt, -+ struct kbase_vinstr_context **out_vctx) ++static void enqueue_free_pool_pages_work(struct kbase_mem_pool *pool) +{ -+ int errcode; -+ struct kbase_vinstr_context *vctx; -+ const struct kbase_hwcnt_metadata *metadata; ++ struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate; + -+ if (!hvirt || !out_vctx) -+ return -EINVAL; ++ if (!pool->order && kbase_page_migration_enabled) ++ queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); ++} + -+ metadata = kbase_hwcnt_virtualizer_metadata(hvirt); -+ if (!metadata) -+ return -EINVAL; ++void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p) ++{ ++ struct kbase_device *kbdev; + -+ vctx = kzalloc(sizeof(*vctx), GFP_KERNEL); -+ if (!vctx) -+ return -ENOMEM; ++ if (WARN_ON(!pool)) ++ return; ++ if (WARN_ON(!p)) ++ return; + -+ vctx->hvirt = hvirt; -+ vctx->metadata = metadata; -+ errcode = kbase_hwcnt_gpu_metadata_narrow_create(&vctx->metadata_user, -+ metadata); -+ if (errcode) -+ goto err_metadata_create; ++ kbdev = pool->kbdev; + -+ mutex_init(&vctx->lock); -+ INIT_LIST_HEAD(&vctx->clients); -+ hrtimer_init(&vctx->dump_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); -+ vctx->dump_timer.function = kbasep_vinstr_dump_timer; -+ INIT_WORK(&vctx->dump_work, kbasep_vinstr_dump_worker); ++ if (!pool->order && kbase_page_migration_enabled) { ++ kbase_free_page_later(kbdev, p); ++ pool_dbg(pool, "page to be freed to kernel later\n"); ++ } else { ++ int i; ++ dma_addr_t dma_addr = kbase_dma_addr_as_priv(p); + -+ *out_vctx = vctx; -+ return 0; ++ for (i = 0; i < (1u << pool->order); i++) ++ kbase_clear_dma_addr_as_priv(p + i); + -+err_metadata_create: -+ kfree(vctx); ++ dma_unmap_page(kbdev->dev, dma_addr, (PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL); + -+ return errcode; ++ kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, pool->group_id, p, pool->order); ++ ++ pool_dbg(pool, "freed page to kernel\n"); ++ } +} + -+void kbase_vinstr_term(struct kbase_vinstr_context *vctx) ++static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool, ++ size_t nr_to_shrink) +{ -+ if (!vctx) -+ return; ++ struct page *p; ++ size_t i; + -+ /* Non-zero client count implies client leak */ -+ if (WARN_ON(vctx->client_count != 0)) { -+ struct kbase_vinstr_client *pos, *n; ++ lockdep_assert_held(&pool->pool_lock); + -+ list_for_each_entry_safe(pos, n, &vctx->clients, node) { -+ list_del(&pos->node); -+ vctx->client_count--; -+ kbasep_vinstr_client_destroy(pos); -+ } ++ for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) { ++ p = kbase_mem_pool_remove_locked(pool, FREE_IN_PROGRESS); ++ kbase_mem_pool_free_page(pool, p); + } + -+ cancel_work_sync(&vctx->dump_work); -+ kbase_hwcnt_gpu_metadata_narrow_destroy(vctx->metadata_user); ++ /* Freeing of pages will be deferred when page migration is enabled. */ ++ enqueue_free_pool_pages_work(pool); + -+ WARN_ON(vctx->client_count != 0); -+ kfree(vctx); ++ return i; +} + -+void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx) ++static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool, ++ size_t nr_to_shrink) +{ -+ if (WARN_ON(!vctx)) -+ return; -+ -+ mutex_lock(&vctx->lock); -+ -+ if (!WARN_ON(vctx->suspend_count == SIZE_MAX)) -+ vctx->suspend_count++; ++ size_t nr_freed; + -+ mutex_unlock(&vctx->lock); ++ kbase_mem_pool_lock(pool); ++ nr_freed = kbase_mem_pool_shrink_locked(pool, nr_to_shrink); ++ kbase_mem_pool_unlock(pool); + -+ /* Always sync cancel the timer and then the worker, regardless of the -+ * new suspend count. -+ * -+ * This ensures concurrent calls to kbase_vinstr_suspend() always block -+ * until vinstr is fully suspended. -+ * -+ * The timer is cancelled before the worker, as the timer -+ * unconditionally re-enqueues the worker, but the worker checks the -+ * suspend_count that we just incremented before rescheduling the timer. -+ * -+ * Therefore if we cancel the worker first, the timer might re-enqueue -+ * the worker before we cancel the timer, but the opposite is not -+ * possible. -+ */ -+ hrtimer_cancel(&vctx->dump_timer); -+ cancel_work_sync(&vctx->dump_work); ++ return nr_freed; +} + -+void kbase_vinstr_resume(struct kbase_vinstr_context *vctx) ++int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow, ++ struct task_struct *page_owner) +{ -+ if (WARN_ON(!vctx)) -+ return; ++ struct page *p; ++ size_t i; ++ const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD); + -+ mutex_lock(&vctx->lock); ++ kbase_mem_pool_lock(pool); + -+ if (!WARN_ON(vctx->suspend_count == 0)) { -+ vctx->suspend_count--; ++ pool->dont_reclaim = true; ++ for (i = 0; i < nr_to_grow; i++) { ++ if (pool->dying) { ++ pool->dont_reclaim = false; ++ kbase_mem_pool_shrink_locked(pool, nr_to_grow); ++ kbase_mem_pool_unlock(pool); + -+ /* Last resume, so re-enqueue the worker if we have any periodic -+ * clients. -+ */ -+ if (vctx->suspend_count == 0) { -+ struct kbase_vinstr_client *pos; -+ bool has_periodic_clients = false; ++ return -ENOMEM; ++ } ++ kbase_mem_pool_unlock(pool); + -+ list_for_each_entry(pos, &vctx->clients, node) { -+ if (pos->dump_interval_ns != 0) { -+ has_periodic_clients = true; -+ break; -+ } -+ } ++ if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread))) ++ return -ENOMEM; + -+ if (has_periodic_clients) -+ kbase_hwcnt_virtualizer_queue_work( -+ vctx->hvirt, &vctx->dump_work); ++ p = kbase_mem_alloc_page(pool); ++ if (!p) { ++ kbase_mem_pool_lock(pool); ++ pool->dont_reclaim = false; ++ kbase_mem_pool_unlock(pool); ++ ++ return -ENOMEM; + } ++ ++ kbase_mem_pool_lock(pool); ++ kbase_mem_pool_add_locked(pool, p); + } ++ pool->dont_reclaim = false; ++ kbase_mem_pool_unlock(pool); + -+ mutex_unlock(&vctx->lock); ++ return 0; +} ++KBASE_EXPORT_TEST_API(kbase_mem_pool_grow); + -+int kbase_vinstr_hwcnt_reader_setup( -+ struct kbase_vinstr_context *vctx, -+ struct kbase_ioctl_hwcnt_reader_setup *setup) ++void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size) +{ -+ int errcode; -+ int fd; -+ struct kbase_vinstr_client *vcli = NULL; -+ -+ if (!vctx || !setup || -+ (setup->buffer_count == 0) || -+ (setup->buffer_count > MAX_BUFFER_COUNT) || -+ !is_power_of_2(setup->buffer_count)) -+ return -EINVAL; -+ -+ errcode = kbasep_vinstr_client_create(vctx, setup, &vcli); -+ if (errcode) -+ goto error; ++ size_t cur_size; ++ int err = 0; + -+ /* Add the new client. No need to reschedule worker, as not periodic */ -+ mutex_lock(&vctx->lock); ++ cur_size = kbase_mem_pool_size(pool); + -+ vctx->client_count++; -+ list_add(&vcli->node, &vctx->clients); ++ if (new_size > pool->max_size) ++ new_size = pool->max_size; + -+ mutex_unlock(&vctx->lock); ++ if (new_size < cur_size) ++ kbase_mem_pool_shrink(pool, cur_size - new_size); ++ else if (new_size > cur_size) ++ err = kbase_mem_pool_grow(pool, new_size - cur_size, NULL); + -+ /* Expose to user-space only once the client is fully initialized */ -+ errcode = anon_inode_getfd( -+ "[mali_vinstr_desc]", -+ &vinstr_client_fops, -+ vcli, -+ O_RDONLY | O_CLOEXEC); -+ if (errcode < 0) -+ goto client_installed_error; ++ if (err) { ++ size_t grown_size = kbase_mem_pool_size(pool); + -+ fd = errcode; ++ dev_warn(pool->kbdev->dev, ++ "Mem pool not grown to the required size of %zu bytes, grown for additional %zu bytes instead!\n", ++ (new_size - cur_size), (grown_size - cur_size)); ++ } ++} + -+ return fd; ++void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size) ++{ ++ size_t cur_size; ++ size_t nr_to_shrink; + -+client_installed_error: -+ mutex_lock(&vctx->lock); ++ kbase_mem_pool_lock(pool); + -+ vctx->client_count--; -+ list_del(&vcli->node); ++ pool->max_size = max_size; + -+ mutex_unlock(&vctx->lock); -+error: -+ kbasep_vinstr_client_destroy(vcli); -+ return errcode; -+} ++ cur_size = kbase_mem_pool_size(pool); ++ if (max_size < cur_size) { ++ nr_to_shrink = cur_size - max_size; ++ kbase_mem_pool_shrink_locked(pool, nr_to_shrink); ++ } + -+/** -+ * kbasep_vinstr_hwcnt_reader_buffer_ready() - Check if client has ready -+ * buffers. -+ * @cli: Non-NULL pointer to vinstr client. -+ * -+ * Return: Non-zero if client has at least one dumping buffer filled that was -+ * not notified to user yet. -+ */ -+static int kbasep_vinstr_hwcnt_reader_buffer_ready( -+ struct kbase_vinstr_client *cli) -+{ -+ WARN_ON(!cli); -+ return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx); ++ kbase_mem_pool_unlock(pool); +} ++KBASE_EXPORT_TEST_API(kbase_mem_pool_set_max_size); + -+/** -+ * kbasep_vinstr_hwcnt_reader_ioctl_dump() - Dump ioctl command. -+ * @cli: Non-NULL pointer to vinstr client. -+ * -+ * Return: 0 on success, else error code. -+ */ -+static long kbasep_vinstr_hwcnt_reader_ioctl_dump( -+ struct kbase_vinstr_client *cli) ++static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s, ++ struct shrink_control *sc) +{ -+ int errcode; ++ struct kbase_mem_pool *pool; ++ size_t pool_size; + -+ mutex_lock(&cli->vctx->lock); ++ pool = container_of(s, struct kbase_mem_pool, reclaim); + -+ errcode = kbasep_vinstr_client_dump( -+ cli, BASE_HWCNT_READER_EVENT_MANUAL); ++ kbase_mem_pool_lock(pool); ++ if (pool->dont_reclaim && !pool->dying) { ++ kbase_mem_pool_unlock(pool); ++ /* Tell shrinker to skip reclaim ++ * even though freeable pages are available ++ */ ++ return 0; ++ } ++ pool_size = kbase_mem_pool_size(pool); ++ kbase_mem_pool_unlock(pool); + -+ mutex_unlock(&cli->vctx->lock); -+ return errcode; ++ return pool_size; +} + -+/** -+ * kbasep_vinstr_hwcnt_reader_ioctl_clear() - Clear ioctl command. -+ * @cli: Non-NULL pointer to vinstr client. -+ * -+ * Return: 0 on success, else error code. -+ */ -+static long kbasep_vinstr_hwcnt_reader_ioctl_clear( -+ struct kbase_vinstr_client *cli) ++static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s, ++ struct shrink_control *sc) +{ -+ int errcode; ++ struct kbase_mem_pool *pool; ++ unsigned long freed; + -+ mutex_lock(&cli->vctx->lock); ++ pool = container_of(s, struct kbase_mem_pool, reclaim); + -+ errcode = kbasep_vinstr_client_clear(cli); ++ kbase_mem_pool_lock(pool); ++ if (pool->dont_reclaim && !pool->dying) { ++ kbase_mem_pool_unlock(pool); ++ /* Tell shrinker that reclaim can't be made and ++ * do not attempt again for this reclaim context. ++ */ ++ return SHRINK_STOP; ++ } + -+ mutex_unlock(&cli->vctx->lock); -+ return errcode; -+} ++ pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan); + -+/** -+ * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer() - Get buffer ioctl command. -+ * @cli: Non-NULL pointer to vinstr client. -+ * @buffer: Non-NULL pointer to userspace buffer. -+ * @size: Size of buffer. -+ * -+ * Return: 0 on success, else error code. -+ */ -+static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( -+ struct kbase_vinstr_client *cli, -+ void __user *buffer, -+ size_t size) -+{ -+ unsigned int meta_idx = atomic_read(&cli->meta_idx); -+ unsigned int idx = meta_idx % cli->dump_bufs.buf_cnt; ++ freed = kbase_mem_pool_shrink_locked(pool, sc->nr_to_scan); + -+ struct kbase_hwcnt_reader_metadata *meta = &cli->dump_bufs_meta[idx]; -+ const size_t meta_size = sizeof(struct kbase_hwcnt_reader_metadata); -+ const size_t min_size = min(size, meta_size); ++ kbase_mem_pool_unlock(pool); + -+ /* Metadata sanity check. */ -+ WARN_ON(idx != meta->buffer_idx); ++ pool_dbg(pool, "reclaim freed %ld pages\n", freed); + -+ /* Check if there is any buffer available. */ -+ if (unlikely(atomic_read(&cli->write_idx) == meta_idx)) -+ return -EAGAIN; ++ return freed; ++} + -+ /* Check if previously taken buffer was put back. */ -+ if (unlikely(atomic_read(&cli->read_idx) != meta_idx)) -+ return -EBUSY; ++int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool_config *config, ++ unsigned int order, int group_id, struct kbase_device *kbdev, ++ struct kbase_mem_pool *next_pool) ++{ ++ if (WARN_ON(group_id < 0) || ++ WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { ++ return -EINVAL; ++ } + -+ /* Clear user buffer to zero. */ -+ if (unlikely(meta_size < size && clear_user(buffer, size))) -+ return -EFAULT; ++ pool->cur_size = 0; ++ pool->max_size = kbase_mem_pool_config_get_max_size(config); ++ pool->order = order; ++ pool->group_id = group_id; ++ pool->kbdev = kbdev; ++ pool->next_pool = next_pool; ++ pool->dying = false; ++ atomic_set(&pool->isolation_in_progress_cnt, 0); + -+ /* Copy next available buffer's metadata to user. */ -+ if (unlikely(copy_to_user(buffer, meta, min_size))) -+ return -EFAULT; ++ spin_lock_init(&pool->pool_lock); ++ INIT_LIST_HEAD(&pool->page_list); + -+ /* Compare exchange meta idx to protect against concurrent getters */ -+ if (meta_idx != atomic_cmpxchg(&cli->meta_idx, meta_idx, meta_idx + 1)) -+ return -EBUSY; ++ pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects; ++ pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects; ++ pool->reclaim.seeks = DEFAULT_SEEKS; ++ /* Kernel versions prior to 3.1 : ++ * struct shrinker does not define batch ++ */ ++ pool->reclaim.batch = 0; ++#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE ++ register_shrinker(&pool->reclaim); ++#else ++ register_shrinker(&pool->reclaim, "mali-mem-pool"); ++#endif ++ ++ pool_dbg(pool, "initialized\n"); + + return 0; +} ++KBASE_EXPORT_TEST_API(kbase_mem_pool_init); + -+/** -+ * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer() - Put buffer ioctl command. -+ * @cli: Non-NULL pointer to vinstr client. -+ * @buffer: Non-NULL pointer to userspace buffer. -+ * @size: Size of buffer. -+ * -+ * Return: 0 on success, else error code. -+ */ -+static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( -+ struct kbase_vinstr_client *cli, -+ void __user *buffer, -+ size_t size) ++void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool) +{ -+ unsigned int read_idx = atomic_read(&cli->read_idx); -+ unsigned int idx = read_idx % cli->dump_bufs.buf_cnt; ++ kbase_mem_pool_lock(pool); ++ pool->dying = true; ++ kbase_mem_pool_unlock(pool); ++} + -+ struct kbase_hwcnt_reader_metadata *meta; -+ const size_t meta_size = sizeof(struct kbase_hwcnt_reader_metadata); -+ const size_t max_size = max(size, meta_size); -+ int ret = 0; -+ u8 stack_kbuf[64]; -+ u8 *kbuf = NULL; -+ size_t i; ++void kbase_mem_pool_term(struct kbase_mem_pool *pool) ++{ ++ struct kbase_mem_pool *next_pool = pool->next_pool; ++ struct page *p, *tmp; ++ size_t nr_to_spill = 0; ++ LIST_HEAD(spill_list); ++ LIST_HEAD(free_list); ++ int i; + -+ /* Check if any buffer was taken. */ -+ if (unlikely(atomic_read(&cli->meta_idx) == read_idx)) -+ return -EPERM; ++ pool_dbg(pool, "terminate()\n"); + -+ if (likely(max_size <= sizeof(stack_kbuf))) { -+ /* Use stack buffer when the size is small enough. */ -+ if (unlikely(meta_size > size)) -+ memset(stack_kbuf, 0, sizeof(stack_kbuf)); -+ kbuf = stack_kbuf; -+ } else { -+ kbuf = kzalloc(max_size, GFP_KERNEL); -+ if (unlikely(!kbuf)) -+ return -ENOMEM; -+ } ++ unregister_shrinker(&pool->reclaim); + -+ /* -+ * Copy user buffer to zero cleared kernel buffer which has enough -+ * space for both user buffer and kernel metadata. -+ */ -+ if (unlikely(copy_from_user(kbuf, buffer, size))) { -+ ret = -EFAULT; -+ goto out; -+ } ++ kbase_mem_pool_lock(pool); ++ pool->max_size = 0; + -+ /* -+ * Make sure any "extra" data passed from userspace is zero. -+ * It's meaningful only in case meta_size < size. -+ */ -+ for (i = meta_size; i < size; i++) { -+ /* Check if user data beyond meta size is zero. */ -+ if (unlikely(kbuf[i] != 0)) { -+ ret = -EINVAL; -+ goto out; -+ } -+ } ++ if (next_pool && !kbase_mem_pool_is_full(next_pool)) { ++ /* Spill to next pool (may overspill) */ ++ nr_to_spill = kbase_mem_pool_capacity(next_pool); ++ nr_to_spill = min(kbase_mem_pool_size(pool), nr_to_spill); + -+ /* Check if correct buffer is put back. */ -+ meta = (struct kbase_hwcnt_reader_metadata *)kbuf; -+ if (unlikely(idx != meta->buffer_idx)) { -+ ret = -EINVAL; -+ goto out; ++ /* Zero pages first without holding the next_pool lock */ ++ for (i = 0; i < nr_to_spill; i++) { ++ p = kbase_mem_pool_remove_locked(pool, SPILL_IN_PROGRESS); ++ if (p) ++ list_add(&p->lru, &spill_list); ++ } + } + -+ /* Compare exchange read idx to protect against concurrent putters */ -+ if (read_idx != -+ atomic_cmpxchg(&cli->read_idx, read_idx, read_idx + 1)) { -+ ret = -EPERM; -+ goto out; ++ while (!kbase_mem_pool_is_empty(pool)) { ++ /* Free remaining pages to kernel */ ++ p = kbase_mem_pool_remove_locked(pool, FREE_IN_PROGRESS); ++ if (p) ++ list_add(&p->lru, &free_list); + } + -+out: -+ if (unlikely(kbuf != stack_kbuf)) -+ kfree(kbuf); -+ return ret; -+} ++ kbase_mem_pool_unlock(pool); + -+/** -+ * kbasep_vinstr_hwcnt_reader_ioctl_set_interval() - Set interval ioctl command. -+ * @cli: Non-NULL pointer to vinstr client. -+ * @interval: Periodic dumping interval (disable periodic dumping if 0). -+ * -+ * Return: 0 always. -+ */ -+static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval( -+ struct kbase_vinstr_client *cli, -+ u32 interval) -+{ -+ mutex_lock(&cli->vctx->lock); ++ if (next_pool && nr_to_spill) { ++ list_for_each_entry(p, &spill_list, lru) ++ kbase_mem_pool_zero_page(pool, p); + -+ if ((interval != 0) && (interval < DUMP_INTERVAL_MIN_NS)) -+ interval = DUMP_INTERVAL_MIN_NS; -+ /* Update the interval, and put in a dummy next dump time */ -+ cli->dump_interval_ns = interval; -+ cli->next_dump_time_ns = 0; ++ /* Add new page list to next_pool */ ++ kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill); + -+ /* -+ * If it's a periodic client, kick off the worker early to do a proper -+ * timer reschedule. Return value is ignored, as we don't care if the -+ * worker is already queued. -+ */ -+ if ((interval != 0) && (cli->vctx->suspend_count == 0)) -+ kbase_hwcnt_virtualizer_queue_work(cli->vctx->hvirt, -+ &cli->vctx->dump_work); ++ pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill); ++ } + -+ mutex_unlock(&cli->vctx->lock); ++ list_for_each_entry_safe(p, tmp, &free_list, lru) { ++ list_del_init(&p->lru); ++ kbase_mem_pool_free_page(pool, p); ++ } + -+ return 0; -+} ++ /* Freeing of pages will be deferred when page migration is enabled. */ ++ enqueue_free_pool_pages_work(pool); + -+/** -+ * kbasep_vinstr_hwcnt_reader_ioctl_enable_event() - Enable event ioctl command. -+ * @cli: Non-NULL pointer to vinstr client. -+ * @event_id: ID of event to enable. -+ * -+ * Return: 0 always. -+ */ -+static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event( -+ struct kbase_vinstr_client *cli, -+ enum base_hwcnt_reader_event event_id) ++ /* Before returning wait to make sure there are no pages undergoing page isolation ++ * which will require reference to this pool. ++ */ ++ while (atomic_read(&pool->isolation_in_progress_cnt)) ++ cpu_relax(); ++ ++ pool_dbg(pool, "terminated\n"); ++} ++KBASE_EXPORT_TEST_API(kbase_mem_pool_term); ++ ++struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool) +{ -+ /* No-op, as events aren't supported */ -+ return 0; ++ struct page *p; ++ ++ do { ++ pool_dbg(pool, "alloc()\n"); ++ p = kbase_mem_pool_remove(pool, ALLOCATE_IN_PROGRESS); ++ ++ if (p) ++ return p; ++ ++ pool = pool->next_pool; ++ } while (pool); ++ ++ return NULL; +} + -+/** -+ * kbasep_vinstr_hwcnt_reader_ioctl_disable_event() - Disable event ioctl -+ * command. -+ * @cli: Non-NULL pointer to vinstr client. -+ * @event_id: ID of event to disable. -+ * -+ * Return: 0 always. -+ */ -+static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event( -+ struct kbase_vinstr_client *cli, -+ enum base_hwcnt_reader_event event_id) ++struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool) +{ -+ /* No-op, as events aren't supported */ -+ return 0; ++ lockdep_assert_held(&pool->pool_lock); ++ ++ pool_dbg(pool, "alloc_locked()\n"); ++ return kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS); +} + -+/** -+ * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver() - Get HW version ioctl command. -+ * @cli: Non-NULL pointer to vinstr client. -+ * @hwver: Non-NULL pointer to user buffer where HW version will be stored. -+ * -+ * Return: 0 on success, else error code. -+ */ -+static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( -+ struct kbase_vinstr_client *cli, -+ u32 __user *hwver) ++void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p, ++ bool dirty) +{ -+ u32 ver = 5; -+ const enum kbase_hwcnt_gpu_group_type type = -+ kbase_hwcnt_metadata_group_type(cli->vctx->metadata, 0); ++ struct kbase_mem_pool *next_pool = pool->next_pool; + -+ if (WARN_ON(type != KBASE_HWCNT_GPU_GROUP_TYPE_V5)) -+ return -EINVAL; ++ pool_dbg(pool, "free()\n"); + -+ return put_user(ver, hwver); ++ if (!kbase_mem_pool_is_full(pool)) { ++ /* Add to our own pool */ ++ if (dirty) ++ kbase_mem_pool_sync_page(pool, p); ++ ++ kbase_mem_pool_add(pool, p); ++ } else if (next_pool && !kbase_mem_pool_is_full(next_pool)) { ++ /* Spill to next pool */ ++ kbase_mem_pool_spill(next_pool, p); ++ } else { ++ /* Free page */ ++ kbase_mem_pool_free_page(pool, p); ++ /* Freeing of pages will be deferred when page migration is enabled. */ ++ enqueue_free_pool_pages_work(pool); ++ } +} + -+/** -+ * kbasep_vinstr_hwcnt_reader_ioctl_get_api_version() - get API version ioctl -+ * command. -+ * @cli: The non-NULL pointer to the client -+ * @arg: Command's argument. -+ * @size: Size of arg. -+ * -+ * Return: 0 on success, else error code. -+ */ -+static long kbasep_vinstr_hwcnt_reader_ioctl_get_api_version( -+ struct kbase_vinstr_client *cli, unsigned long arg, size_t size) ++void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, ++ bool dirty) +{ -+ long ret = -EINVAL; -+ -+ if (size == sizeof(u32)) { -+ ret = put_user(HWCNT_READER_API, (u32 __user *)arg); -+ } else if (size == sizeof(struct kbase_hwcnt_reader_api_version)) { -+ u8 clk_cnt = cli->vctx->metadata->clk_cnt; -+ unsigned long bytes = 0; -+ struct kbase_hwcnt_reader_api_version api_version = { -+ .version = HWCNT_READER_API, -+ .features = KBASE_HWCNT_READER_API_VERSION_NO_FEATURE, -+ }; ++ pool_dbg(pool, "free_locked()\n"); + -+ if (clk_cnt > 0) -+ api_version.features |= -+ KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP; -+ if (clk_cnt > 1) -+ api_version.features |= -+ KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES; ++ lockdep_assert_held(&pool->pool_lock); + -+ bytes = copy_to_user( -+ (void __user *)arg, &api_version, sizeof(api_version)); ++ if (!kbase_mem_pool_is_full(pool)) { ++ /* Add to our own pool */ ++ if (dirty) ++ kbase_mem_pool_sync_page(pool, p); + -+ /* copy_to_user returns zero in case of success. -+ * If it fails, it returns the number of bytes that could NOT be copied -+ */ -+ if (bytes == 0) -+ ret = 0; -+ else -+ ret = -EFAULT; ++ kbase_mem_pool_add_locked(pool, p); ++ } else { ++ /* Free page */ ++ kbase_mem_pool_free_page(pool, p); ++ /* Freeing of pages will be deferred when page migration is enabled. */ ++ enqueue_free_pool_pages_work(pool); + } -+ return ret; +} + -+/** -+ * kbasep_vinstr_hwcnt_reader_ioctl() - hwcnt reader's ioctl. -+ * @filp: Non-NULL pointer to file structure. -+ * @cmd: User command. -+ * @arg: Command's argument. -+ * -+ * Return: 0 on success, else error code. -+ */ -+static long kbasep_vinstr_hwcnt_reader_ioctl( -+ struct file *filp, -+ unsigned int cmd, -+ unsigned long arg) ++int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, ++ struct tagged_addr *pages, bool partial_allowed, ++ struct task_struct *page_owner) +{ -+ long rcode; -+ struct kbase_vinstr_client *cli; ++ struct page *p; ++ size_t nr_from_pool; ++ size_t i = 0; ++ int err = -ENOMEM; ++ size_t nr_pages_internal; ++ const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD); + -+ if (!filp || (_IOC_TYPE(cmd) != KBASE_HWCNT_READER)) ++ nr_pages_internal = nr_4k_pages / (1u << (pool->order)); ++ ++ if (nr_pages_internal * (1u << pool->order) != nr_4k_pages) + return -EINVAL; + -+ cli = filp->private_data; -+ if (!cli) ++ pool_dbg(pool, "alloc_pages(4k=%zu):\n", nr_4k_pages); ++ pool_dbg(pool, "alloc_pages(internal=%zu):\n", nr_pages_internal); ++ ++ /* Get pages from this pool */ ++ kbase_mem_pool_lock(pool); ++ nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool)); ++ ++ while (nr_from_pool--) { ++ int j; ++ ++ p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS); ++ ++ if (pool->order) { ++ pages[i++] = as_tagged_tag(page_to_phys(p), ++ HUGE_HEAD | HUGE_PAGE); ++ for (j = 1; j < (1u << pool->order); j++) ++ pages[i++] = as_tagged_tag(page_to_phys(p) + ++ PAGE_SIZE * j, ++ HUGE_PAGE); ++ } else { ++ pages[i++] = as_tagged(page_to_phys(p)); ++ } ++ } ++ kbase_mem_pool_unlock(pool); ++ ++ if (i != nr_4k_pages && pool->next_pool) { ++ /* Allocate via next pool */ ++ err = kbase_mem_pool_alloc_pages(pool->next_pool, nr_4k_pages - i, pages + i, ++ partial_allowed, page_owner); ++ ++ if (err < 0) ++ goto err_rollback; ++ ++ i += err; ++ } else { ++ /* Get any remaining pages from kernel */ ++ while (i != nr_4k_pages) { ++ if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread))) ++ goto err_rollback; ++ ++ p = kbase_mem_alloc_page(pool); ++ if (!p) { ++ if (partial_allowed) ++ goto done; ++ else ++ goto err_rollback; ++ } ++ ++ if (pool->order) { ++ int j; ++ ++ pages[i++] = as_tagged_tag(page_to_phys(p), ++ HUGE_PAGE | ++ HUGE_HEAD); ++ for (j = 1; j < (1u << pool->order); j++) { ++ phys_addr_t phys; ++ ++ phys = page_to_phys(p) + PAGE_SIZE * j; ++ pages[i++] = as_tagged_tag(phys, ++ HUGE_PAGE); ++ } ++ } else { ++ pages[i++] = as_tagged(page_to_phys(p)); ++ } ++ } ++ } ++ ++done: ++ pool_dbg(pool, "alloc_pages(%zu) done\n", i); ++ return i; ++ ++err_rollback: ++ kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED); ++ return err; ++} ++ ++int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, ++ size_t nr_4k_pages, struct tagged_addr *pages) ++{ ++ struct page *p; ++ size_t i; ++ size_t nr_pages_internal; ++ ++ lockdep_assert_held(&pool->pool_lock); ++ ++ nr_pages_internal = nr_4k_pages / (1u << (pool->order)); ++ ++ if (nr_pages_internal * (1u << pool->order) != nr_4k_pages) + return -EINVAL; + -+ switch (_IOC_NR(cmd)) { -+ case _IOC_NR(KBASE_HWCNT_READER_GET_API_VERSION): -+ rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_api_version( -+ cli, arg, _IOC_SIZE(cmd)); -+ break; -+ case _IOC_NR(KBASE_HWCNT_READER_GET_HWVER): -+ rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( -+ cli, (u32 __user *)arg); -+ break; -+ case _IOC_NR(KBASE_HWCNT_READER_GET_BUFFER_SIZE): -+ rcode = put_user((u32)cli->vctx->metadata_user->dump_buf_bytes, -+ (u32 __user *)arg); -+ break; -+ case _IOC_NR(KBASE_HWCNT_READER_DUMP): -+ rcode = kbasep_vinstr_hwcnt_reader_ioctl_dump(cli); -+ break; -+ case _IOC_NR(KBASE_HWCNT_READER_CLEAR): -+ rcode = kbasep_vinstr_hwcnt_reader_ioctl_clear(cli); -+ break; -+ case _IOC_NR(KBASE_HWCNT_READER_GET_BUFFER): -+ rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( -+ cli, (void __user *)arg, _IOC_SIZE(cmd)); -+ break; -+ case _IOC_NR(KBASE_HWCNT_READER_PUT_BUFFER): -+ rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( -+ cli, (void __user *)arg, _IOC_SIZE(cmd)); -+ break; -+ case _IOC_NR(KBASE_HWCNT_READER_SET_INTERVAL): -+ rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval( -+ cli, (u32)arg); -+ break; -+ case _IOC_NR(KBASE_HWCNT_READER_ENABLE_EVENT): -+ rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event( -+ cli, (enum base_hwcnt_reader_event)arg); -+ break; -+ case _IOC_NR(KBASE_HWCNT_READER_DISABLE_EVENT): -+ rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event( -+ cli, (enum base_hwcnt_reader_event)arg); -+ break; -+ default: -+ pr_warn("Unknown HWCNT ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); -+ rcode = -EINVAL; -+ break; ++ pool_dbg(pool, "alloc_pages_locked(4k=%zu):\n", nr_4k_pages); ++ pool_dbg(pool, "alloc_pages_locked(internal=%zu):\n", ++ nr_pages_internal); ++ ++ if (kbase_mem_pool_size(pool) < nr_pages_internal) { ++ pool_dbg(pool, "Failed alloc\n"); ++ return -ENOMEM; + } + -+ return rcode; ++ for (i = 0; i < nr_pages_internal; i++) { ++ int j; ++ ++ p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS); ++ if (pool->order) { ++ *pages++ = as_tagged_tag(page_to_phys(p), ++ HUGE_HEAD | HUGE_PAGE); ++ for (j = 1; j < (1u << pool->order); j++) { ++ *pages++ = as_tagged_tag(page_to_phys(p) + ++ PAGE_SIZE * j, ++ HUGE_PAGE); ++ } ++ } else { ++ *pages++ = as_tagged(page_to_phys(p)); ++ } ++ } ++ ++ return nr_4k_pages; +} + -+/** -+ * kbasep_vinstr_hwcnt_reader_poll() - hwcnt reader's poll. -+ * @filp: Non-NULL pointer to file structure. -+ * @wait: Non-NULL pointer to poll table. -+ * -+ * Return: EPOLLIN | EPOLLRDNORM if data can be read without blocking, 0 if -+ * data can not be read without blocking, else EPOLLHUP | EPOLLERR. -+ */ -+static __poll_t kbasep_vinstr_hwcnt_reader_poll(struct file *filp, poll_table *wait) ++static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool, ++ size_t nr_pages, struct tagged_addr *pages, ++ bool zero, bool sync) +{ -+ struct kbase_vinstr_client *cli; ++ struct page *p; ++ size_t nr_to_pool = 0; ++ LIST_HEAD(new_page_list); ++ size_t i; + -+ if (!filp || !wait) -+ return EPOLLHUP | EPOLLERR; ++ if (!nr_pages) ++ return; + -+ cli = filp->private_data; -+ if (!cli) -+ return EPOLLHUP | EPOLLERR; ++ pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n", ++ nr_pages, zero, sync); + -+ poll_wait(filp, &cli->waitq, wait); -+ if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli)) -+ return EPOLLIN | EPOLLRDNORM; ++ /* Zero/sync pages first without holding the pool lock */ ++ for (i = 0; i < nr_pages; i++) { ++ if (unlikely(!as_phys_addr_t(pages[i]))) ++ continue; + -+ return (__poll_t)0; ++ if (is_huge_head(pages[i]) || !is_huge(pages[i])) { ++ p = as_page(pages[i]); ++ if (zero) ++ kbase_mem_pool_zero_page(pool, p); ++ else if (sync) ++ kbase_mem_pool_sync_page(pool, p); ++ ++ list_add(&p->lru, &new_page_list); ++ nr_to_pool++; ++ } ++ pages[i] = as_tagged(0); ++ } ++ ++ /* Add new page list to pool */ ++ kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool); ++ ++ pool_dbg(pool, "add_array(%zu) added %zu pages\n", ++ nr_pages, nr_to_pool); +} + -+/** -+ * kbasep_vinstr_hwcnt_reader_mmap() - hwcnt reader's mmap. -+ * @filp: Non-NULL pointer to file structure. -+ * @vma: Non-NULL pointer to vma structure. -+ * -+ * Return: 0 on success, else error code. -+ */ -+static int kbasep_vinstr_hwcnt_reader_mmap( -+ struct file *filp, -+ struct vm_area_struct *vma) ++static void kbase_mem_pool_add_array_locked(struct kbase_mem_pool *pool, ++ size_t nr_pages, struct tagged_addr *pages, ++ bool zero, bool sync) +{ -+ struct kbase_vinstr_client *cli; -+ unsigned long vm_size, size, addr, pfn, offset; ++ struct page *p; ++ size_t nr_to_pool = 0; ++ LIST_HEAD(new_page_list); ++ size_t i; + -+ if (!filp || !vma) -+ return -EINVAL; ++ lockdep_assert_held(&pool->pool_lock); + -+ cli = filp->private_data; -+ if (!cli) -+ return -EINVAL; ++ if (!nr_pages) ++ return; + -+ vm_size = vma->vm_end - vma->vm_start; ++ pool_dbg(pool, "add_array_locked(%zu, zero=%d, sync=%d):\n", ++ nr_pages, zero, sync); + -+ /* The mapping is allowed to span the entirety of the page allocation, -+ * not just the chunk where the dump buffers are allocated. -+ * This accommodates the corner case where the combined size of the -+ * dump buffers is smaller than a single page. -+ * This does not pose a security risk as the pages are zeroed on -+ * allocation, and anything out of bounds of the dump buffers is never -+ * written to. -+ */ -+ size = (1ull << cli->dump_bufs.page_order) * PAGE_SIZE; ++ /* Zero/sync pages first */ ++ for (i = 0; i < nr_pages; i++) { ++ if (unlikely(!as_phys_addr_t(pages[i]))) ++ continue; + -+ if (vma->vm_pgoff > (size >> PAGE_SHIFT)) -+ return -EINVAL; ++ if (is_huge_head(pages[i]) || !is_huge(pages[i])) { ++ p = as_page(pages[i]); ++ if (zero) ++ kbase_mem_pool_zero_page(pool, p); ++ else if (sync) ++ kbase_mem_pool_sync_page(pool, p); + -+ offset = vma->vm_pgoff << PAGE_SHIFT; -+ if (vm_size > size - offset) -+ return -EINVAL; ++ list_add(&p->lru, &new_page_list); ++ nr_to_pool++; ++ } ++ pages[i] = as_tagged(0); ++ } + -+ addr = __pa(cli->dump_bufs.page_addr + offset); -+ pfn = addr >> PAGE_SHIFT; ++ /* Add new page list to pool */ ++ kbase_mem_pool_add_list_locked(pool, &new_page_list, nr_to_pool); + -+ return remap_pfn_range( -+ vma, vma->vm_start, pfn, vm_size, vma->vm_page_prot); ++ pool_dbg(pool, "add_array_locked(%zu) added %zu pages\n", ++ nr_pages, nr_to_pool); +} + -+/** -+ * kbasep_vinstr_hwcnt_reader_release() - hwcnt reader's release. -+ * @inode: Non-NULL pointer to inode structure. -+ * @filp: Non-NULL pointer to file structure. -+ * -+ * Return: 0 always. -+ */ -+static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode, -+ struct file *filp) ++void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, ++ struct tagged_addr *pages, bool dirty, bool reclaimed) +{ -+ struct kbase_vinstr_client *vcli = filp->private_data; ++ struct kbase_mem_pool *next_pool = pool->next_pool; ++ struct page *p; ++ size_t nr_to_pool; ++ LIST_HEAD(to_pool_list); ++ size_t i = 0; ++ bool pages_released = false; + -+ mutex_lock(&vcli->vctx->lock); ++ pool_dbg(pool, "free_pages(%zu):\n", nr_pages); + -+ vcli->vctx->client_count--; -+ list_del(&vcli->node); ++ if (!reclaimed) { ++ /* Add to this pool */ ++ nr_to_pool = kbase_mem_pool_capacity(pool); ++ nr_to_pool = min(nr_pages, nr_to_pool); + -+ mutex_unlock(&vcli->vctx->lock); ++ kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty); + -+ kbasep_vinstr_client_destroy(vcli); ++ i += nr_to_pool; + -+ return 0; ++ if (i != nr_pages && next_pool) { ++ /* Spill to next pool (may overspill) */ ++ nr_to_pool = kbase_mem_pool_capacity(next_pool); ++ nr_to_pool = min(nr_pages - i, nr_to_pool); ++ ++ kbase_mem_pool_add_array(next_pool, nr_to_pool, ++ pages + i, true, dirty); ++ i += nr_to_pool; ++ } ++ } ++ ++ /* Free any remaining pages to kernel */ ++ for (; i < nr_pages; i++) { ++ if (unlikely(!as_phys_addr_t(pages[i]))) ++ continue; ++ ++ if (is_huge(pages[i]) && !is_huge_head(pages[i])) { ++ pages[i] = as_tagged(0); ++ continue; ++ } ++ p = as_page(pages[i]); ++ ++ kbase_mem_pool_free_page(pool, p); ++ pages[i] = as_tagged(0); ++ pages_released = true; ++ } ++ ++ /* Freeing of pages will be deferred when page migration is enabled. */ ++ if (pages_released) ++ enqueue_free_pool_pages_work(pool); ++ ++ pool_dbg(pool, "free_pages(%zu) done\n", nr_pages); +} -diff --git a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h -new file mode 100644 -index 000000000..6747ec70a ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h -@@ -0,0 +1,90 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2015-2018, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ + -+/* -+ * Vinstr, used to provide an ioctl for userspace access to periodic hardware -+ * counters. -+ */ + -+#ifndef _KBASE_VINSTR_H_ -+#define _KBASE_VINSTR_H_ ++void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, ++ size_t nr_pages, struct tagged_addr *pages, bool dirty, ++ bool reclaimed) ++{ ++ struct page *p; ++ size_t nr_to_pool; ++ LIST_HEAD(to_pool_list); ++ size_t i = 0; ++ bool pages_released = false; + -+struct kbase_vinstr_context; -+struct kbase_hwcnt_virtualizer; -+struct kbase_ioctl_hwcnt_reader_setup; ++ lockdep_assert_held(&pool->pool_lock); + -+/** -+ * kbase_vinstr_init() - Initialise a vinstr context. -+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. -+ * @out_vctx: Non-NULL pointer to where the pointer to the created vinstr -+ * context will be stored on success. -+ * -+ * On creation, the suspend count of the context will be 0. -+ * -+ * Return: 0 on success, else error code. -+ */ -+int kbase_vinstr_init( -+ struct kbase_hwcnt_virtualizer *hvirt, -+ struct kbase_vinstr_context **out_vctx); ++ pool_dbg(pool, "free_pages_locked(%zu):\n", nr_pages); + -+/** -+ * kbase_vinstr_term() - Terminate a vinstr context. -+ * @vctx: Pointer to the vinstr context to be terminated. -+ */ -+void kbase_vinstr_term(struct kbase_vinstr_context *vctx); ++ if (!reclaimed) { ++ /* Add to this pool */ ++ nr_to_pool = kbase_mem_pool_capacity(pool); ++ nr_to_pool = min(nr_pages, nr_to_pool); + -+/** -+ * kbase_vinstr_suspend() - Increment the suspend count of the context. -+ * @vctx: Non-NULL pointer to the vinstr context to be suspended. -+ * -+ * After this function call returns, it is guaranteed that all timers and -+ * workers in vinstr will be cancelled, and will not be re-triggered until -+ * after the context has been resumed. In effect, this means no new counter -+ * dumps will occur for any existing or subsequently added periodic clients. -+ */ -+void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx); ++ kbase_mem_pool_add_array_locked(pool, nr_to_pool, pages, false, ++ dirty); + -+/** -+ * kbase_vinstr_resume() - Decrement the suspend count of the context. -+ * @vctx: Non-NULL pointer to the vinstr context to be resumed. -+ * -+ * If a call to this function decrements the suspend count from 1 to 0, then -+ * normal operation of vinstr will be resumed (i.e. counter dumps will once -+ * again be automatically triggered for all periodic clients). -+ * -+ * It is only valid to call this function one time for each prior returned call -+ * to kbase_vinstr_suspend. -+ */ -+void kbase_vinstr_resume(struct kbase_vinstr_context *vctx); ++ i += nr_to_pool; ++ } + -+/** -+ * kbase_vinstr_hwcnt_reader_setup() - Set up a new hardware counter reader -+ * client. -+ * @vinstr_ctx: Non-NULL pointer to the vinstr context. -+ * @setup: Non-NULL pointer to the hwcnt reader configuration. -+ * -+ * Return: file descriptor on success, else a (negative) error code. -+ */ -+int kbase_vinstr_hwcnt_reader_setup( -+ struct kbase_vinstr_context *vinstr_ctx, -+ struct kbase_ioctl_hwcnt_reader_setup *setup); ++ /* Free any remaining pages to kernel */ ++ for (; i < nr_pages; i++) { ++ if (unlikely(!as_phys_addr_t(pages[i]))) ++ continue; + -+#endif /* _KBASE_VINSTR_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_linux_trace.h b/drivers/gpu/arm/bifrost/mali_linux_trace.h ++ if (is_huge(pages[i]) && !is_huge_head(pages[i])) { ++ pages[i] = as_tagged(0); ++ continue; ++ } ++ ++ p = as_page(pages[i]); ++ ++ kbase_mem_pool_free_page(pool, p); ++ pages[i] = as_tagged(0); ++ pages_released = true; ++ } ++ ++ /* Freeing of pages will be deferred when page migration is enabled. */ ++ if (pages_released) ++ enqueue_free_pool_pages_work(pool); ++ ++ pool_dbg(pool, "free_pages_locked(%zu) done\n", nr_pages); ++} +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.c new file mode 100644 -index 000000000..52f17390c +index 000000000..3b1b2bae1 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_linux_trace.h -@@ -0,0 +1,547 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.c +@@ -0,0 +1,184 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2011-2016, 2018-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -251630,542 +253042,179 @@ index 000000000..52f17390c + * + */ + -+#undef TRACE_SYSTEM -+#define TRACE_SYSTEM mali -+ -+#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ) -+#define _TRACE_MALI_H -+ -+#include ++#include ++#include + -+#if defined(CONFIG_MALI_BIFROST_GATOR_SUPPORT) -+#define MALI_JOB_SLOTS_EVENT_CHANGED ++#include "mali_kbase_mem_pool_debugfs.h" ++#include "mali_kbase_debugfs_helper.h" + -+/* -+ * mali_job_slots_event - Reports change of job slot status. -+ * @gpu_id: Kbase device id -+ * @event_id: ORed together bitfields representing a type of event, -+ * made with the GATOR_MAKE_EVENT() macro. -+ */ -+TRACE_EVENT(mali_job_slots_event, -+ TP_PROTO(u32 gpu_id, u32 event_id, u32 tgid, u32 pid, -+ u8 job_id), -+ TP_ARGS(gpu_id, event_id, tgid, pid, job_id), -+ TP_STRUCT__entry( -+ __field(u32, gpu_id) -+ __field(u32, event_id) -+ __field(u32, tgid) -+ __field(u32, pid) -+ __field(u8, job_id) -+ ), -+ TP_fast_assign( -+ __entry->gpu_id = gpu_id; -+ __entry->event_id = event_id; -+ __entry->tgid = tgid; -+ __entry->pid = pid; -+ __entry->job_id = job_id; -+ ), -+ TP_printk("gpu=%u event=%u tgid=%u pid=%u job_id=%u", -+ __entry->gpu_id, __entry->event_id, -+ __entry->tgid, __entry->pid, __entry->job_id) -+); ++void kbase_mem_pool_debugfs_trim(void *const array, size_t const index, ++ size_t const value) ++{ ++ struct kbase_mem_pool *const mem_pools = array; + -+/** -+ * mali_pm_status - Reports change of power management status. -+ * @gpu_id: Kbase device id -+ * @event_id: Core type (shader, tiler, L2 cache) -+ * @value: 64bits bitmask reporting either power status of -+ * the cores (1-ON, 0-OFF) -+ */ -+TRACE_EVENT(mali_pm_status, -+ TP_PROTO(u32 gpu_id, u32 event_id, u64 value), -+ TP_ARGS(gpu_id, event_id, value), -+ TP_STRUCT__entry( -+ __field(u32, gpu_id) -+ __field(u32, event_id) -+ __field(u64, value) -+ ), -+ TP_fast_assign( -+ __entry->gpu_id = gpu_id; -+ __entry->event_id = event_id; -+ __entry->value = value; -+ ), -+ TP_printk("gpu=%u event %u = %llu", -+ __entry->gpu_id, __entry->event_id, __entry->value) -+); ++ if (WARN_ON(!mem_pools) || ++ WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) ++ return; + -+/** -+ * mali_page_fault_insert_pages - Reports an MMU page fault -+ * resulting in new pages being mapped. -+ * @gpu_id: Kbase device id -+ * @event_id: MMU address space number -+ * @value: Number of newly allocated pages -+ */ -+TRACE_EVENT(mali_page_fault_insert_pages, -+ TP_PROTO(u32 gpu_id, s32 event_id, u64 value), -+ TP_ARGS(gpu_id, event_id, value), -+ TP_STRUCT__entry( -+ __field(u32, gpu_id) -+ __field(s32, event_id) -+ __field(u64, value) -+ ), -+ TP_fast_assign( -+ __entry->gpu_id = gpu_id; -+ __entry->event_id = event_id; -+ __entry->value = value; -+ ), -+ TP_printk("gpu=%u event %d = %llu", -+ __entry->gpu_id, __entry->event_id, __entry->value) -+); ++ kbase_mem_pool_trim(&mem_pools[index], value); ++} + -+/** -+ * mali_total_alloc_pages_change - Reports that the total number of -+ * allocated pages has changed. -+ * @gpu_id: Kbase device id -+ * @event_id: Total number of pages allocated -+ */ -+TRACE_EVENT(mali_total_alloc_pages_change, -+ TP_PROTO(u32 gpu_id, s64 event_id), -+ TP_ARGS(gpu_id, event_id), -+ TP_STRUCT__entry( -+ __field(u32, gpu_id) -+ __field(s64, event_id) -+ ), -+ TP_fast_assign( -+ __entry->gpu_id = gpu_id; -+ __entry->event_id = event_id; -+ ), -+ TP_printk("gpu=%u event=%lld", __entry->gpu_id, __entry->event_id) -+); -+#endif /* CONFIG_MALI_BIFROST_GATOR_SUPPORT */ ++void kbase_mem_pool_debugfs_set_max_size(void *const array, ++ size_t const index, size_t const value) ++{ ++ struct kbase_mem_pool *const mem_pools = array; + -+/* -+ * MMU subsystem tracepoints -+ */ ++ if (WARN_ON(!mem_pools) || ++ WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) ++ return; + -+/* Fault status and exception code helpers -+ * -+ * Must be macros to allow use by user-side tracepoint tools -+ * -+ * bits 0:1 masked off code, and used for the level -+ * -+ * Tracepoint files get included more than once - protect against multiple -+ * definition -+ */ -+#ifndef __TRACE_MALI_MMU_HELPERS -+#define __TRACE_MALI_MMU_HELPERS -+/* Complex macros should be enclosed in parenthesis. -+ * -+ * We need to have those parentheses removed for our arrays of symbolic look-ups -+ * for __print_symbolic() whilst also being able to use them outside trace code -+ */ -+#define _ENSURE_PARENTHESIS(args...) args ++ kbase_mem_pool_set_max_size(&mem_pools[index], value); ++} + -+#define KBASE_MMU_FAULT_CODE_EXCEPTION_NAME_PRINT(code) \ -+ (!KBASE_MMU_FAULT_CODE_VALID(code) ? "UNKNOWN,level=" : \ -+ __print_symbolic(((code) & ~3u), \ -+ KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS)) -+#define KBASE_MMU_FAULT_CODE_LEVEL(code) \ -+ (((((code) & ~0x3u) == 0xC4) ? 4 : 0) + ((code) & 0x3u)) ++size_t kbase_mem_pool_debugfs_size(void *const array, size_t const index) ++{ ++ struct kbase_mem_pool *const mem_pools = array; + -+#define KBASE_MMU_FAULT_STATUS_CODE(status) \ -+ ((status) & 0xFFu) -+#define KBASE_MMU_FAULT_STATUS_DECODED_STRING(status) \ -+ (((status) & (1u << 10)) ? "DECODER_FAULT" : "SLAVE_FAULT") ++ if (WARN_ON(!mem_pools) || ++ WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) ++ return 0; + -+#define KBASE_MMU_FAULT_STATUS_EXCEPTION_NAME_PRINT(status) \ -+ KBASE_MMU_FAULT_CODE_EXCEPTION_NAME_PRINT( \ -+ KBASE_MMU_FAULT_STATUS_CODE(status)) ++ return kbase_mem_pool_size(&mem_pools[index]); ++} + -+#define KBASE_MMU_FAULT_STATUS_LEVEL(status) \ -+ KBASE_MMU_FAULT_CODE_LEVEL(KBASE_MMU_FAULT_STATUS_CODE(status)) ++size_t kbase_mem_pool_debugfs_max_size(void *const array, size_t const index) ++{ ++ struct kbase_mem_pool *const mem_pools = array; + -+#define KBASE_MMU_FAULT_STATUS_ACCESS(status) \ -+ ((status) & AS_FAULTSTATUS_ACCESS_TYPE_MASK) -+#define KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ -+ {AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC, "ATOMIC" }, \ -+ {AS_FAULTSTATUS_ACCESS_TYPE_EX, "EXECUTE"}, \ -+ {AS_FAULTSTATUS_ACCESS_TYPE_READ, "READ" }, \ -+ {AS_FAULTSTATUS_ACCESS_TYPE_WRITE, "WRITE" }) -+#define KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(status) \ -+ __print_symbolic(KBASE_MMU_FAULT_STATUS_ACCESS(status), \ -+ KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS) ++ if (WARN_ON(!mem_pools) || ++ WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) ++ return 0; + -+#if MALI_USE_CSF -+#define KBASE_MMU_FAULT_CODE_VALID(code) \ -+ ((code >= 0xC0 && code <= 0xEB) && \ -+ (!(code >= 0xC5 && code <= 0xC7)) && \ -+ (!(code >= 0xCC && code <= 0xD8)) && \ -+ (!(code >= 0xDC && code <= 0xDF)) && \ -+ (!(code >= 0xE1 && code <= 0xE3))) -+#define KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ -+ {0xC0, "TRANSLATION_FAULT_" }, \ -+ {0xC4, "TRANSLATION_FAULT_" }, \ -+ {0xC8, "PERMISSION_FAULT_" }, \ -+ {0xD0, "TRANSTAB_BUS_FAULT_" }, \ -+ {0xD8, "ACCESS_FLAG_" }, \ -+ {0xE0, "ADDRESS_SIZE_FAULT_IN" }, \ -+ {0xE4, "ADDRESS_SIZE_FAULT_OUT" }, \ -+ {0xE8, "MEMORY_ATTRIBUTES_FAULT_" }) -+#else /* MALI_USE_CSF */ -+#define KBASE_MMU_FAULT_CODE_VALID(code) \ -+ ((code >= 0xC0 && code <= 0xEF) && \ -+ (!(code >= 0xC5 && code <= 0xC6)) && \ -+ (!(code >= 0xCC && code <= 0xCF)) && \ -+ (!(code >= 0xD4 && code <= 0xD7)) && \ -+ (!(code >= 0xDC && code <= 0xDF))) -+#define KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ -+ {0xC0, "TRANSLATION_FAULT_" }, \ -+ {0xC4, "TRANSLATION_FAULT(_7==_IDENTITY)_" }, \ -+ {0xC8, "PERMISSION_FAULT_" }, \ -+ {0xD0, "TRANSTAB_BUS_FAULT_" }, \ -+ {0xD8, "ACCESS_FLAG_" }, \ -+ {0xE0, "ADDRESS_SIZE_FAULT_IN" }, \ -+ {0xE4, "ADDRESS_SIZE_FAULT_OUT" }, \ -+ {0xE8, "MEMORY_ATTRIBUTES_FAULT_" }, \ -+ {0xEC, "MEMORY_ATTRIBUTES_NONCACHEABLE_" }) -+#endif /* MALI_USE_CSF */ -+#endif /* __TRACE_MALI_MMU_HELPERS */ ++ return kbase_mem_pool_max_size(&mem_pools[index]); ++} + -+/* trace_mali_mmu_page_fault_grow -+ * -+ * Tracepoint about a successful grow of a region due to a GPU page fault -+ */ -+TRACE_EVENT(mali_mmu_page_fault_grow, -+ TP_PROTO(struct kbase_va_region *reg, struct kbase_fault *fault, -+ size_t new_pages), -+ TP_ARGS(reg, fault, new_pages), -+ TP_STRUCT__entry( -+ __field(u64, start_addr) -+ __field(u64, fault_addr) -+ __field(u64, fault_extra_addr) -+ __field(size_t, new_pages) -+ __field(u32, status) -+ ), -+ TP_fast_assign( -+ __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; -+ __entry->fault_addr = fault->addr; -+ __entry->fault_extra_addr = fault->extra_addr; -+ __entry->new_pages = new_pages; -+ __entry->status = fault->status; -+ ), -+ TP_printk("start=0x%llx fault_addr=0x%llx fault_extra_addr=0x%llx new_pages=%zu raw_fault_status=0x%x decoded_faultstatus=%s exception_type=0x%x,%s%u access_type=0x%x,%s source_id=0x%x", -+ __entry->start_addr, __entry->fault_addr, -+ __entry->fault_extra_addr, __entry->new_pages, -+ __entry->status, -+ KBASE_MMU_FAULT_STATUS_DECODED_STRING(__entry->status), -+ KBASE_MMU_FAULT_STATUS_CODE(__entry->status), -+ KBASE_MMU_FAULT_STATUS_EXCEPTION_NAME_PRINT(__entry->status), -+ KBASE_MMU_FAULT_STATUS_LEVEL(__entry->status), -+ KBASE_MMU_FAULT_STATUS_ACCESS(__entry->status) >> 8, -+ KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(__entry->status), -+ __entry->status >> 16) -+); ++void kbase_mem_pool_config_debugfs_set_max_size(void *const array, ++ size_t const index, size_t const value) ++{ ++ struct kbase_mem_pool_config *const configs = array; + ++ if (WARN_ON(!configs) || ++ WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) ++ return; + ++ kbase_mem_pool_config_set_max_size(&configs[index], value); ++} + ++size_t kbase_mem_pool_config_debugfs_max_size(void *const array, ++ size_t const index) ++{ ++ struct kbase_mem_pool_config *const configs = array; + -+/* -+ * Just-in-time memory allocation subsystem tracepoints -+ */ ++ if (WARN_ON(!configs) || ++ WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) ++ return 0; + -+/* Just-in-time memory allocation soft-job template. Override the TP_printk -+ * further if need be. jit_id can be 0. -+ */ -+DECLARE_EVENT_CLASS(mali_jit_softjob_template, -+ TP_PROTO(struct kbase_va_region *reg, u8 jit_id), -+ TP_ARGS(reg, jit_id), -+ TP_STRUCT__entry( -+ __field(u64, start_addr) -+ __field(size_t, nr_pages) -+ __field(size_t, backed_pages) -+ __field(u8, jit_id) -+ ), -+ TP_fast_assign( -+ __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; -+ __entry->nr_pages = reg->nr_pages; -+ __entry->backed_pages = kbase_reg_current_backed_size(reg); -+ __entry->jit_id = jit_id; -+ ), -+ TP_printk("jit_id=%u start=0x%llx va_pages=0x%zx backed_size=0x%zx", -+ __entry->jit_id, __entry->start_addr, __entry->nr_pages, -+ __entry->backed_pages) -+); ++ return kbase_mem_pool_config_get_max_size(&configs[index]); ++} + -+/* trace_mali_jit_alloc() -+ * -+ * Tracepoint about a just-in-time memory allocation soft-job successfully -+ * allocating memory -+ */ -+DEFINE_EVENT(mali_jit_softjob_template, mali_jit_alloc, -+ TP_PROTO(struct kbase_va_region *reg, u8 jit_id), -+ TP_ARGS(reg, jit_id)); ++static int kbase_mem_pool_debugfs_size_show(struct seq_file *sfile, void *data) ++{ ++ CSTD_UNUSED(data); ++ return kbase_debugfs_helper_seq_read(sfile, ++ MEMORY_GROUP_MANAGER_NR_GROUPS, kbase_mem_pool_debugfs_size); ++} + -+/* trace_mali_jit_free() -+ * -+ * Tracepoint about memory that was allocated just-in-time being freed -+ * (which may happen either on free soft-job, or during rollback error -+ * paths of an allocation soft-job, etc) -+ * -+ * Free doesn't immediately have the just-in-time memory allocation ID so -+ * it's currently suppressed from the output - set jit_id to 0 -+ */ -+DEFINE_EVENT_PRINT(mali_jit_softjob_template, mali_jit_free, -+ TP_PROTO(struct kbase_va_region *reg, u8 jit_id), -+ TP_ARGS(reg, jit_id), -+ TP_printk("start=0x%llx va_pages=0x%zx backed_size=0x%zx", -+ __entry->start_addr, __entry->nr_pages, __entry->backed_pages)); ++static ssize_t kbase_mem_pool_debugfs_write(struct file *file, ++ const char __user *ubuf, size_t count, loff_t *ppos) ++{ ++ int err; + -+#if !MALI_USE_CSF -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+/* trace_mali_jit_report -+ * -+ * Tracepoint about the GPU data structure read to form a just-in-time memory -+ * allocation report, and its calculated physical page usage -+ */ -+TRACE_EVENT(mali_jit_report, -+ TP_PROTO(struct kbase_jd_atom *katom, struct kbase_va_region *reg, -+ unsigned int id_idx, u64 read_val, u64 used_pages), -+ TP_ARGS(katom, reg, id_idx, read_val, used_pages), -+ TP_STRUCT__entry( -+ __field(u64, start_addr) -+ __field(u64, read_val) -+ __field(u64, used_pages) -+ __field(unsigned long, flags) -+ __field(u8, id_idx) -+ __field(u8, jit_id) -+ ), -+ TP_fast_assign( -+ __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; -+ __entry->read_val = read_val; -+ __entry->used_pages = used_pages; -+ __entry->flags = reg->flags; -+ __entry->id_idx = id_idx; -+ __entry->jit_id = katom->jit_ids[id_idx]; -+ ), -+ TP_printk("start=0x%llx jit_ids[%u]=%u read_type='%s' read_val=0x%llx used_pages=%llu", -+ __entry->start_addr, __entry->id_idx, __entry->jit_id, -+ __print_symbolic(__entry->flags, -+ { 0, "address"}, -+ { KBASE_REG_TILER_ALIGN_TOP, "address with align" }, -+ { KBASE_REG_HEAP_INFO_IS_SIZE, "size" }, -+ { KBASE_REG_HEAP_INFO_IS_SIZE | -+ KBASE_REG_TILER_ALIGN_TOP, -+ "size with align (invalid)" } -+ ), -+ __entry->read_val, __entry->used_pages) -+); -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -+#endif /* !MALI_USE_CSF */ ++ CSTD_UNUSED(ppos); ++ err = kbase_debugfs_helper_seq_write(file, ubuf, count, ++ MEMORY_GROUP_MANAGER_NR_GROUPS, kbase_mem_pool_debugfs_trim); ++ return err ? err : count; ++} + -+TRACE_DEFINE_ENUM(KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+/* trace_mali_jit_report_pressure -+ * -+ * Tracepoint about change in physical memory pressure, due to the information -+ * about a region changing. Examples include: -+ * - a report on a region that was allocated just-in-time -+ * - just-in-time allocation of a region -+ * - free of a region that was allocated just-in-time -+ */ -+TRACE_EVENT(mali_jit_report_pressure, -+ TP_PROTO(struct kbase_va_region *reg, u64 new_used_pages, -+ u64 new_pressure, unsigned int flags), -+ TP_ARGS(reg, new_used_pages, new_pressure, flags), -+ TP_STRUCT__entry( -+ __field(u64, start_addr) -+ __field(u64, used_pages) -+ __field(u64, new_used_pages) -+ __field(u64, new_pressure) -+ __field(unsigned int, flags) -+ ), -+ TP_fast_assign( -+ __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; -+ __entry->used_pages = reg->used_pages; -+ __entry->new_used_pages = new_used_pages; -+ __entry->new_pressure = new_pressure; -+ __entry->flags = flags; -+ ), -+ TP_printk("start=0x%llx old_used_pages=%llu new_used_pages=%llu new_pressure=%llu report_flags=%s", -+ __entry->start_addr, __entry->used_pages, -+ __entry->new_used_pages, __entry->new_pressure, -+ __print_flags(__entry->flags, "|", -+ { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE, -+ "HAPPENED_ON_ALLOC_OR_FREE" })) -+); -+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++static int kbase_mem_pool_debugfs_open(struct inode *in, struct file *file) ++{ ++ return single_open(file, kbase_mem_pool_debugfs_size_show, ++ in->i_private); ++} + -+#ifndef __TRACE_SYSGRAPH_ENUM -+#define __TRACE_SYSGRAPH_ENUM -+/* Enum of sysgraph message IDs */ -+enum sysgraph_msg { -+ SGR_ARRIVE, -+ SGR_SUBMIT, -+ SGR_COMPLETE, -+ SGR_POST, -+ SGR_ACTIVE, -+ SGR_INACTIVE ++static const struct file_operations kbase_mem_pool_debugfs_fops = { ++ .owner = THIS_MODULE, ++ .open = kbase_mem_pool_debugfs_open, ++ .read = seq_read, ++ .write = kbase_mem_pool_debugfs_write, ++ .llseek = seq_lseek, ++ .release = single_release, +}; -+#endif /* __TRACE_SYSGRAPH_ENUM */ -+ -+/* A template for SYSGRAPH events -+ * -+ * Most of the sysgraph events contain only one input argument -+ * which is atom_id therefore they will be using a common template -+ */ -+TRACE_EVENT(sysgraph, -+ TP_PROTO(enum sysgraph_msg message, unsigned int proc_id, -+ unsigned int atom_id), -+ TP_ARGS(message, proc_id, atom_id), -+ TP_STRUCT__entry( -+ __field(unsigned int, proc_id) -+ __field(enum sysgraph_msg, message) -+ __field(unsigned int, atom_id) -+ ), -+ TP_fast_assign( -+ __entry->proc_id = proc_id; -+ __entry->message = message; -+ __entry->atom_id = atom_id; -+ ), -+ TP_printk("msg=%u proc_id=%u, param1=%d", __entry->message, -+ __entry->proc_id, __entry->atom_id) -+); + -+/* A template for SYSGRAPH GPU events -+ * -+ * Sysgraph events that record start/complete events -+ * on GPU also record a js value in addition to the -+ * atom id. -+ */ -+TRACE_EVENT(sysgraph_gpu, -+ TP_PROTO(enum sysgraph_msg message, unsigned int proc_id, -+ unsigned int atom_id, unsigned int js), -+ TP_ARGS(message, proc_id, atom_id, js), -+ TP_STRUCT__entry( -+ __field(unsigned int, proc_id) -+ __field(enum sysgraph_msg, message) -+ __field(unsigned int, atom_id) -+ __field(unsigned int, js) -+ ), -+ TP_fast_assign( -+ __entry->proc_id = proc_id; -+ __entry->message = message; -+ __entry->atom_id = atom_id; -+ __entry->js = js; -+ ), -+ TP_printk("msg=%u proc_id=%u, param1=%d, param2=%d", -+ __entry->message, __entry->proc_id, -+ __entry->atom_id, __entry->js) -+); ++static int kbase_mem_pool_debugfs_max_size_show(struct seq_file *sfile, ++ void *data) ++{ ++ CSTD_UNUSED(data); ++ return kbase_debugfs_helper_seq_read(sfile, ++ MEMORY_GROUP_MANAGER_NR_GROUPS, ++ kbase_mem_pool_debugfs_max_size); ++} + -+/* Tracepoint files get included more than once - protect against multiple -+ * definition -+ */ -+#undef KBASE_JIT_REPORT_GPU_MEM_SIZE ++static ssize_t kbase_mem_pool_debugfs_max_size_write(struct file *file, ++ const char __user *ubuf, size_t count, loff_t *ppos) ++{ ++ int err; + -+/* Size in bytes of the memory surrounding the location used for a just-in-time -+ * memory allocation report -+ */ -+#define KBASE_JIT_REPORT_GPU_MEM_SIZE (4 * sizeof(u64)) ++ CSTD_UNUSED(ppos); ++ err = kbase_debugfs_helper_seq_write(file, ubuf, count, ++ MEMORY_GROUP_MANAGER_NR_GROUPS, ++ kbase_mem_pool_debugfs_set_max_size); ++ return err ? err : count; ++} + -+/* trace_mali_jit_report_gpu_mem -+ * -+ * Tracepoint about the GPU memory nearby the location used for a just-in-time -+ * memory allocation report -+ */ -+TRACE_EVENT(mali_jit_report_gpu_mem, -+ TP_PROTO(u64 base_addr, u64 reg_addr, u64 *gpu_mem, unsigned int flags), -+ TP_ARGS(base_addr, reg_addr, gpu_mem, flags), -+ TP_STRUCT__entry( -+ __field(u64, base_addr) -+ __field(u64, reg_addr) -+ __array(u64, mem_values, -+ KBASE_JIT_REPORT_GPU_MEM_SIZE / sizeof(u64)) -+ __field(unsigned int, flags) -+ ), -+ TP_fast_assign( -+ __entry->base_addr = base_addr; -+ __entry->reg_addr = reg_addr; -+ memcpy(__entry->mem_values, gpu_mem, -+ sizeof(__entry->mem_values)); -+ __entry->flags = flags; -+ ), -+ TP_printk("start=0x%llx read GPU memory base=0x%llx values=%s report_flags=%s", -+ __entry->reg_addr, __entry->base_addr, -+ __print_array(__entry->mem_values, -+ ARRAY_SIZE(__entry->mem_values), sizeof(u64)), -+ __print_flags(__entry->flags, "|", -+ { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE, -+ "HAPPENED_ON_ALLOC_OR_FREE" })) -+); ++static int kbase_mem_pool_debugfs_max_size_open(struct inode *in, ++ struct file *file) ++{ ++ return single_open(file, kbase_mem_pool_debugfs_max_size_show, ++ in->i_private); ++} + -+/* trace_mali_jit_trim_from_region -+ * -+ * Tracepoint about trimming physical pages from a region -+ */ -+TRACE_EVENT(mali_jit_trim_from_region, -+ TP_PROTO(struct kbase_va_region *reg, size_t freed_pages, -+ size_t old_pages, size_t available_pages, size_t new_pages), -+ TP_ARGS(reg, freed_pages, old_pages, available_pages, new_pages), -+ TP_STRUCT__entry( -+ __field(u64, start_addr) -+ __field(size_t, freed_pages) -+ __field(size_t, old_pages) -+ __field(size_t, available_pages) -+ __field(size_t, new_pages) -+ ), -+ TP_fast_assign( -+ __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; -+ __entry->freed_pages = freed_pages; -+ __entry->old_pages = old_pages; -+ __entry->available_pages = available_pages; -+ __entry->new_pages = new_pages; -+ ), -+ TP_printk("start=0x%llx freed_pages=%zu old_pages=%zu available_pages=%zu new_pages=%zu", -+ __entry->start_addr, __entry->freed_pages, __entry->old_pages, -+ __entry->available_pages, __entry->new_pages) -+); ++static const struct file_operations kbase_mem_pool_debugfs_max_size_fops = { ++ .owner = THIS_MODULE, ++ .open = kbase_mem_pool_debugfs_max_size_open, ++ .read = seq_read, ++ .write = kbase_mem_pool_debugfs_max_size_write, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + -+/* trace_mali_jit_trim -+ * -+ * Tracepoint about total trimmed physical pages -+ */ -+TRACE_EVENT(mali_jit_trim, -+ TP_PROTO(size_t freed_pages), -+ TP_ARGS(freed_pages), -+ TP_STRUCT__entry( -+ __field(size_t, freed_pages) -+ ), -+ TP_fast_assign( -+ __entry->freed_pages = freed_pages; -+ ), -+ TP_printk("freed_pages=%zu", __entry->freed_pages) -+); ++void kbase_mem_pool_debugfs_init(struct dentry *parent, ++ struct kbase_context *kctx) ++{ ++ const mode_t mode = 0644; + -+#include "debug/mali_kbase_debug_linux_ktrace.h" ++ debugfs_create_file("mem_pool_size", mode, parent, ++ &kctx->mem_pools.small, &kbase_mem_pool_debugfs_fops); + -+#endif /* _TRACE_MALI_H */ ++ debugfs_create_file("mem_pool_max_size", mode, parent, ++ &kctx->mem_pools.small, &kbase_mem_pool_debugfs_max_size_fops); + -+#undef TRACE_INCLUDE_PATH -+/* lwn.net/Articles/383362 suggests this should remain as '.', and instead -+ * extend CFLAGS -+ */ -+#define TRACE_INCLUDE_PATH . -+#undef TRACE_INCLUDE_FILE -+#define TRACE_INCLUDE_FILE mali_linux_trace ++ debugfs_create_file("lp_mem_pool_size", mode, parent, ++ &kctx->mem_pools.large, &kbase_mem_pool_debugfs_fops); + -+/* This part must be outside protection */ -+#include -diff --git a/drivers/gpu/arm/bifrost/mali_malisw.h b/drivers/gpu/arm/bifrost/mali_malisw.h ++ debugfs_create_file("lp_mem_pool_max_size", mode, parent, ++ &kctx->mem_pools.large, &kbase_mem_pool_debugfs_max_size_fops); ++} +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.h new file mode 100644 -index 000000000..d9db189e8 +index 000000000..207b58536 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_malisw.h -@@ -0,0 +1,108 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.h +@@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2014-2015, 2018, 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -252183,103 +253232,117 @@ index 000000000..d9db189e8 + * + */ + -+/* -+ * Kernel-wide include for common macros and types. -+ */ -+ -+#ifndef _MALISW_H_ -+#define _MALISW_H_ ++#ifndef _KBASE_MEM_POOL_DEBUGFS_H_ ++#define _KBASE_MEM_POOL_DEBUGFS_H_ + -+#include ++#include + +/** -+ * MIN - Return the lesser of two values. -+ * @x: value1 -+ * @y: value2 ++ * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool ++ * @parent: Parent debugfs dentry ++ * @kctx: The kbase context + * -+ * As a macro it may evaluate its arguments more than once. -+ * Refer to MAX macro for more details ++ * Adds four debugfs files under @parent: ++ * - mem_pool_size: get/set the current sizes of @kctx: mem_pools ++ * - mem_pool_max_size: get/set the max sizes of @kctx: mem_pools ++ * - lp_mem_pool_size: get/set the current sizes of @kctx: lp_mem_pool ++ * - lp_mem_pool_max_size: get/set the max sizes of @kctx:lp_mem_pool + */ -+#define MIN(x, y) ((x) < (y) ? (x) : (y)) ++void kbase_mem_pool_debugfs_init(struct dentry *parent, ++ struct kbase_context *kctx); + +/** -+ * MAX - Return the greater of two values. -+ * @x: value1 -+ * @y: value2 ++ * kbase_mem_pool_debugfs_trim - Grow or shrink a memory pool to a new size + * -+ * As a macro it may evaluate its arguments more than once. -+ * If called on the same two arguments as MIN it is guaranteed to return -+ * the one that MIN didn't return. This is significant for types where not -+ * all values are comparable e.g. NaNs in floating-point types. But if you want -+ * to retrieve the min and max of two values, consider using a conditional swap -+ * instead. ++ * @array: Address of the first in an array of physical memory pools. ++ * @index: A memory group ID to be used as an index into the array of memory ++ * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * @value: New number of pages in the pool. ++ * ++ * If @value > current size, fill the pool with new pages from the kernel, but ++ * not above the max_size for the pool. ++ * If @value < current size, shrink the pool by freeing pages to the kernel. + */ -+#define MAX(x, y) ((x) < (y) ? (y) : (x)) ++void kbase_mem_pool_debugfs_trim(void *array, size_t index, size_t value); + +/** -+ * CSTD_UNUSED - Function-like macro for suppressing unused variable warnings. ++ * kbase_mem_pool_debugfs_set_max_size - Set maximum number of free pages in ++ * memory pool + * -+ * @x: unused variable ++ * @array: Address of the first in an array of physical memory pools. ++ * @index: A memory group ID to be used as an index into the array of memory ++ * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * @value: Maximum number of free pages the pool can hold. + * -+ * Where possible such variables should be removed; this macro is present for -+ * cases where we much support API backwards compatibility. ++ * If the maximum size is reduced, the pool will be shrunk to adhere to the ++ * new limit. For details see kbase_mem_pool_shrink(). + */ -+#define CSTD_UNUSED(x) ((void)(x)) ++void kbase_mem_pool_debugfs_set_max_size(void *array, size_t index, ++ size_t value); + +/** -+ * CSTD_NOP - Function-like macro for use where "no behavior" is desired. -+ * @...: no-op ++ * kbase_mem_pool_debugfs_size - Get number of free pages in a memory pool + * -+ * This is useful when compile time macros turn a function-like macro in to a -+ * no-op, but where having no statement is otherwise invalid. ++ * @array: Address of the first in an array of physical memory pools. ++ * @index: A memory group ID to be used as an index into the array of memory ++ * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * ++ * Note: the size of the pool may in certain corner cases exceed @max_size! ++ * ++ * Return: Number of free pages in the pool + */ -+#define CSTD_NOP(...) ((void)#__VA_ARGS__) ++size_t kbase_mem_pool_debugfs_size(void *array, size_t index); + +/** -+ * CSTD_STR1 - Function-like macro for stringizing a single level macro. -+ * @x: macro's value ++ * kbase_mem_pool_debugfs_max_size - Get maximum number of free pages in a ++ * memory pool + * -+ * @code -+ * #define MY_MACRO 32 -+ * CSTD_STR1( MY_MACRO ) -+ * > "MY_MACRO" -+ * @endcode ++ * @array: Address of the first in an array of physical memory pools. ++ * @index: A memory group ID to be used as an index into the array of memory ++ * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * ++ * Return: Maximum number of free pages in the pool + */ -+#define CSTD_STR1(x) #x ++size_t kbase_mem_pool_debugfs_max_size(void *array, size_t index); + +/** -+ * CSTD_STR2 - Function-like macro for stringizing a macro's value. -+ * @x: macro's value ++ * kbase_mem_pool_config_debugfs_set_max_size - Set maximum number of free pages ++ * in initial configuration of pool + * -+ * This should not be used if the macro is defined in a way which may have no -+ * value; use the alternative @c CSTD_STR2N macro should be used instead. -+ * @code -+ * #define MY_MACRO 32 -+ * CSTD_STR2( MY_MACRO ) -+ * > "32" -+ * @endcode ++ * @array: Array of initial configurations for a set of physical memory pools. ++ * @index: A memory group ID to be used as an index into the array. ++ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * @value : Maximum number of free pages that a memory pool created from the ++ * selected configuration can hold. + */ -+#define CSTD_STR2(x) CSTD_STR1(x) ++void kbase_mem_pool_config_debugfs_set_max_size(void *array, size_t index, ++ size_t value); + -+ #ifndef fallthrough -+ #define fallthrough __fallthrough -+ #endif /* fallthrough */ ++/** ++ * kbase_mem_pool_config_debugfs_max_size - Get maximum number of free pages ++ * from initial configuration of pool ++ * ++ * @array: Array of initial configurations for a set of physical memory pools. ++ * @index: A memory group ID to be used as an index into the array. ++ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * ++ * Return: Maximum number of free pages that a memory pool created from the ++ * selected configuration can hold. ++ */ ++size_t kbase_mem_pool_config_debugfs_max_size(void *array, size_t index); + -+#ifndef __fallthrough -+#define __fallthrough __attribute__((fallthrough)) -+#endif /* __fallthrough */ ++#endif /*_KBASE_MEM_POOL_DEBUGFS_H_ */ + -+#endif /* _MALISW_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.c b/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.c +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.c new file mode 100644 -index 000000000..1db3abe2f +index 000000000..49c4b041e --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.c -@@ -0,0 +1,26 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.c +@@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -252297,21 +253360,102 @@ index 000000000..1db3abe2f + * + */ + -+/* Create the trace point if not configured in kernel */ -+#ifndef CONFIG_TRACE_POWER_GPU_FREQUENCY -+#define CREATE_TRACE_POINTS -+#include "mali_power_gpu_frequency_trace.h" -+#endif -diff --git a/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.h b/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.h ++#include ++#include ++#include ++ ++#include ++ ++void kbase_mem_pool_group_config_set_max_size( ++ struct kbase_mem_pool_group_config *const configs, ++ size_t const max_size) ++{ ++ size_t const large_max_size = max_size >> ++ (KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER - ++ KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER); ++ int gid; ++ ++ for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { ++ kbase_mem_pool_config_set_max_size(&configs->small[gid], ++ max_size); ++ ++ kbase_mem_pool_config_set_max_size(&configs->large[gid], ++ large_max_size); ++ } ++} ++ ++int kbase_mem_pool_group_init(struct kbase_mem_pool_group *const mem_pools, ++ struct kbase_device *const kbdev, ++ const struct kbase_mem_pool_group_config *const configs, ++ struct kbase_mem_pool_group *next_pools) ++{ ++ int gid, err = 0; ++ ++ for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { ++ err = kbase_mem_pool_init(&mem_pools->small[gid], &configs->small[gid], ++ KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER, gid, kbdev, ++ next_pools ? &next_pools->small[gid] : NULL); ++ ++ if (!err) { ++ err = kbase_mem_pool_init(&mem_pools->large[gid], &configs->large[gid], ++ KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER, gid, kbdev, ++ next_pools ? &next_pools->large[gid] : NULL); ++ if (err) ++ kbase_mem_pool_term(&mem_pools->small[gid]); ++ } ++ ++ /* Break out of the loop early to avoid incrementing the count ++ * of memory pool pairs successfully initialized. ++ */ ++ if (err) ++ break; ++ } ++ ++ if (err) { ++ /* gid gives the number of memory pool pairs successfully ++ * initialized, which is one greater than the array index of the ++ * last group. ++ */ ++ while (gid-- > 0) { ++ kbase_mem_pool_term(&mem_pools->small[gid]); ++ kbase_mem_pool_term(&mem_pools->large[gid]); ++ } ++ } ++ ++ return err; ++} ++ ++void kbase_mem_pool_group_mark_dying( ++ struct kbase_mem_pool_group *const mem_pools) ++{ ++ int gid; ++ ++ for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { ++ kbase_mem_pool_mark_dying(&mem_pools->small[gid]); ++ kbase_mem_pool_mark_dying(&mem_pools->large[gid]); ++ } ++} ++ ++void kbase_mem_pool_group_term( ++ struct kbase_mem_pool_group *const mem_pools) ++{ ++ int gid; ++ ++ for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { ++ kbase_mem_pool_term(&mem_pools->small[gid]); ++ kbase_mem_pool_term(&mem_pools->large[gid]); ++ } ++} +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.h new file mode 100644 -index 000000000..f156650a4 +index 000000000..fe8ce7752 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.h -@@ -0,0 +1,68 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.h +@@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -252329,99 +253473,109 @@ index 000000000..f156650a4 + * + */ + -+#ifndef _TRACE_POWER_GPU_FREQUENCY_MALI -+#define _TRACE_POWER_GPU_FREQUENCY_MALI -+#endif -+ -+#undef TRACE_SYSTEM -+#define TRACE_SYSTEM power -+#undef TRACE_INCLUDE_FILE -+#define TRACE_INCLUDE_FILE mali_power_gpu_frequency_trace -+#undef TRACE_INCLUDE_PATH -+#define TRACE_INCLUDE_PATH . -+ -+#if !defined(_TRACE_POWER_GPU_FREQUENCY_H) || defined(TRACE_HEADER_MULTI_READ) -+#define _TRACE_POWER_GPU_FREQUENCY_H -+ -+#include -+ -+DECLARE_EVENT_CLASS(gpu, -+ -+ TP_PROTO(unsigned int state, unsigned int gpu_id), -+ -+ TP_ARGS(state, gpu_id), ++#ifndef _KBASE_MEM_POOL_GROUP_H_ ++#define _KBASE_MEM_POOL_GROUP_H_ + -+ TP_STRUCT__entry( -+ __field(u32, state) -+ __field(u32, gpu_id) -+ ), ++#include + -+ TP_fast_assign( -+ __entry->state = state; -+ __entry->gpu_id = gpu_id; -+ ), ++/** ++ * kbase_mem_pool_group_select() - Select the memory pool to use. ++ * ++ * @kbdev: Device pointer. ++ * @mem_group_id: Physical memory group ID to use. ++ * @is_small_page: Flag used to select between the small and ++ * large memory pool. ++ * ++ * Return: A pointer to the selected memory pool. ++ */ ++static inline struct kbase_mem_pool *kbase_mem_pool_group_select( ++ struct kbase_device *kbdev, u32 mem_group_id, bool is_small_page) ++{ ++ if (WARN_ON(unlikely(kbdev == NULL))) ++ return NULL; + -+ TP_printk("state=%lu gpu_id=%lu", (unsigned long)__entry->state, -+ (unsigned long)__entry->gpu_id) -+); ++ WARN_ON(mem_group_id > BASE_MEM_GROUP_COUNT); + -+DEFINE_EVENT(gpu, gpu_frequency, ++ if (is_small_page) ++ return &kbdev->mem_pools.small[mem_group_id]; + -+ TP_PROTO(unsigned int frequency, unsigned int gpu_id), ++ return &kbdev->mem_pools.large[mem_group_id]; ++} + -+ TP_ARGS(frequency, gpu_id) -+); ++/** ++ * kbase_mem_pool_group_config_set_max_size - Set the initial configuration for ++ * a set of memory pools ++ * ++ * @configs: Initial configuration for the set of memory pools ++ * @max_size: Maximum number of free 4 KiB pages each pool can hold ++ * ++ * This function sets the initial configuration for every memory pool so that ++ * the maximum amount of free memory that each pool can hold is identical. ++ * The equivalent number of 2 MiB pages is calculated automatically for the ++ * purpose of configuring the large page pools. ++ */ ++void kbase_mem_pool_group_config_set_max_size( ++ struct kbase_mem_pool_group_config *configs, size_t max_size); + -+#endif /* _TRACE_POWER_GPU_FREQUENCY_H */ ++/** ++ * kbase_mem_pool_group_init - Initialize a set of memory pools ++ * ++ * @mem_pools: Set of memory pools to initialize ++ * @kbdev: Kbase device where memory is used ++ * @configs: Initial configuration for the set of memory pools ++ * @next_pools: Set of memory pools from which to allocate memory if there ++ * is no free memory in one of the @mem_pools ++ * ++ * Initializes a complete set of physical memory pools. Memory pools are used to ++ * allow efficient reallocation of previously-freed physical pages. A pair of ++ * memory pools is initialized for each physical memory group: one for 4 KiB ++ * pages and one for 2 MiB pages. ++ * ++ * If @next_pools is not NULL then a request to allocate memory from an ++ * empty pool in @mem_pools will attempt to allocate from the equivalent pool ++ * in @next_pools before going to the memory group manager. Similarly ++ * pages can spill over to the equivalent pool in @next_pools when a pool ++ * is full in @mem_pools. Pages are zeroed before they spill over to another ++ * pool, to prevent leaking information between applications. ++ * ++ * Return: 0 on success, otherwise a negative error code ++ */ ++int kbase_mem_pool_group_init(struct kbase_mem_pool_group *mem_pools, struct kbase_device *kbdev, ++ const struct kbase_mem_pool_group_config *configs, ++ struct kbase_mem_pool_group *next_pools); + -+/* This part must be outside protection */ -+#include -diff --git a/drivers/gpu/arm/bifrost/mmu/Kbuild b/drivers/gpu/arm/bifrost/mmu/Kbuild -new file mode 100755 -index 000000000..416432397 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mmu/Kbuild -@@ -0,0 +1,30 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++/** ++ * kbase_mem_pool_group_mark_dying - Mark a set of memory pools as dying ++ * ++ * @mem_pools: Set of memory pools to mark ++ * ++ * Marks a complete set of physical memory pools previously initialized by ++ * @kbase_mem_pool_group_init as dying. This will cause any ongoing allocation ++ * operations (eg growing on page fault) to be terminated. ++ */ ++void kbase_mem_pool_group_mark_dying(struct kbase_mem_pool_group *mem_pools); + -+bifrost_kbase-y += \ -+ mmu/mali_kbase_mmu.o \ -+ mmu/mali_kbase_mmu_hw_direct.o \ -+ mmu/mali_kbase_mmu_mode_aarch64.o ++/** ++ * kbase_mem_pool_group_term - Terminate a set of memory pools ++ * ++ * @mem_pools: Set of memory pools to terminate ++ * ++ * Terminates a complete set of physical memory pools previously initialized by ++ * @kbase_mem_pool_group_init. ++ */ ++void kbase_mem_pool_group_term(struct kbase_mem_pool_group *mem_pools); + -+ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) -+ bifrost_kbase-y += mmu/backend/mali_kbase_mmu_csf.o -+else -+ bifrost_kbase-y += mmu/backend/mali_kbase_mmu_jm.o -+endif -diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c ++#endif /* _KBASE_MEM_POOL_GROUP_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.c new file mode 100644 -index 000000000..4cac7876f +index 000000000..9317023b7 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c -@@ -0,0 +1,572 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.c +@@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2017, 2019-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -252439,567 +253593,563 @@ index 000000000..4cac7876f + * + */ + -+/** -+ * DOC: Base kernel MMU management specific for CSF GPU. -+ */ -+ +#include -+#include -+#include -+#include -+#include -+#include -+ -+void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, -+ struct kbase_mmu_setup * const setup) -+{ -+ /* Set up the required caching policies at the correct indices -+ * in the memattr register. -+ */ -+ setup->memattr = -+ (AS_MEMATTR_IMPL_DEF_CACHE_POLICY << -+ (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | -+ (AS_MEMATTR_FORCE_TO_CACHE_ALL << -+ (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | -+ (AS_MEMATTR_WRITE_ALLOC << -+ (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | -+ (AS_MEMATTR_AARCH64_OUTER_IMPL_DEF << -+ (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | -+ (AS_MEMATTR_AARCH64_OUTER_WA << -+ (AS_MEMATTR_INDEX_OUTER_WA * 8)) | -+ (AS_MEMATTR_AARCH64_NON_CACHEABLE << -+ (AS_MEMATTR_INDEX_NON_CACHEABLE * 8)) | -+ (AS_MEMATTR_AARCH64_SHARED << -+ (AS_MEMATTR_INDEX_SHARED * 8)); + -+ setup->transtab = (u64)mmut->pgd & AS_TRANSTAB_BASE_MASK; -+ setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K; -+} ++#if IS_ENABLED(CONFIG_DEBUG_FS) + +/** -+ * submit_work_pagefault() - Submit a work for MMU page fault. ++ * kbasep_mem_profile_seq_show - Show callback for the @c mem_profile debugfs file. + * -+ * @kbdev: Kbase device pointer -+ * @as_nr: Faulty address space -+ * @fault: Data relating to the fault ++ * @sfile: The debugfs entry ++ * @data: Data associated with the entry + * -+ * This function submits a work for reporting the details of MMU fault. ++ * This function is called to get the contents of the @c mem_profile debugfs ++ * file. This is a report of current memory usage and distribution in userspace. ++ * ++ * Return: 0 if it successfully prints data in debugfs entry file, non-zero ++ * otherwise + */ -+static void submit_work_pagefault(struct kbase_device *kbdev, u32 as_nr, -+ struct kbase_fault *fault) ++static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data) +{ -+ unsigned long flags; -+ struct kbase_as *const as = &kbdev->as[as_nr]; -+ struct kbase_context *kctx; ++ struct kbase_context *kctx = sfile->private; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as_nr); ++ mutex_lock(&kctx->mem_profile_lock); + -+ if (kctx) { -+ kbase_ctx_sched_retain_ctx_refcount(kctx); ++ seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size); + -+ as->pf_data = (struct kbase_fault) { -+ .status = fault->status, -+ .addr = fault->addr, -+ }; ++ seq_putc(sfile, '\n'); + -+ /* -+ * A page fault work item could already be pending for the -+ * context's address space, when the page fault occurs for -+ * MCU's address space. -+ */ -+ if (!queue_work(as->pf_wq, &as->work_pagefault)) { -+ dev_dbg(kbdev->dev, -+ "Page fault is already pending for as %u", as_nr); -+ kbase_ctx_sched_release_ctx(kctx); -+ } else { -+ atomic_inc(&kbdev->faults_pending); -+ } -+ } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kctx->mem_profile_lock); ++ ++ return 0; +} + -+void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, -+ struct kbase_fault *fault) ++/* ++ * File operations related to debugfs entry for mem_profile ++ */ ++static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file) +{ -+ /* decode the fault status */ -+ u32 exception_type = fault->status & 0xFF; -+ u32 access_type = (fault->status >> 8) & 0x3; -+ u32 source_id = (fault->status >> 16); -+ int as_no; -+ -+ /* terminal fault, print info about the fault */ -+ dev_err(kbdev->dev, -+ "Unexpected Page fault in firmware address space at VA 0x%016llX\n" -+ "raw fault status: 0x%X\n" -+ "exception type 0x%X: %s\n" -+ "access type 0x%X: %s\n" -+ "source id 0x%X\n", -+ fault->addr, -+ fault->status, -+ exception_type, kbase_gpu_exception_name(exception_type), -+ access_type, kbase_gpu_access_type_name(fault->status), -+ source_id); -+ -+ kbase_debug_csf_fault_notify(kbdev, NULL, DF_GPU_PAGE_FAULT); ++ return single_open(file, kbasep_mem_profile_seq_show, in->i_private); ++} + -+ /* Report MMU fault for all address spaces (except MCU_AS_NR) */ -+ for (as_no = 1; as_no < kbdev->nr_hw_address_spaces; as_no++) -+ submit_work_pagefault(kbdev, as_no, fault); ++static const struct file_operations kbasep_mem_profile_debugfs_fops = { ++ .owner = THIS_MODULE, ++ .open = kbasep_mem_profile_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + -+ /* GPU reset is required to recover */ -+ if (kbase_prepare_to_reset_gpu(kbdev, -+ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) -+ kbase_reset_gpu(kbdev); ++int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, ++ size_t size) ++{ ++ const mode_t mode = 0444; ++ int err = 0; + -+} -+KBASE_EXPORT_TEST_API(kbase_mmu_report_mcu_as_fault_and_reset); ++ mutex_lock(&kctx->mem_profile_lock); + -+void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, -+ struct kbase_as *as, struct kbase_fault *fault) -+{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ u32 const status = fault->status; -+ int exception_type = (status & GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> -+ GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT; -+ int access_type = (status & GPU_FAULTSTATUS_ACCESS_TYPE_MASK) >> -+ GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT; -+ int source_id = (status & GPU_FAULTSTATUS_SOURCE_ID_MASK) >> -+ GPU_FAULTSTATUS_SOURCE_ID_SHIFT; -+ const char *addr_valid = (status & GPU_FAULTSTATUS_ADDR_VALID_FLAG) ? -+ "true" : "false"; -+ int as_no = as->number; -+ unsigned long flags; -+ const uintptr_t fault_addr = fault->addr; ++ dev_dbg(kctx->kbdev->dev, "initialised: %d", ++ kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); + -+ /* terminal fault, print info about the fault */ -+ dev_err(kbdev->dev, -+ "GPU bus fault in AS%d at PA %pK\n" -+ "PA_VALID: %s\n" -+ "raw fault status: 0x%X\n" -+ "exception type 0x%X: %s\n" -+ "access type 0x%X: %s\n" -+ "source id 0x%X\n" -+ "pid: %d\n", -+ as_no, (void *)fault_addr, -+ addr_valid, -+ status, -+ exception_type, kbase_gpu_exception_name(exception_type), -+ access_type, kbase_gpu_access_type_name(access_type), -+ source_id, -+ kctx->pid); ++ if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { ++ if (IS_ERR_OR_NULL(kctx->kctx_dentry)) { ++ err = -ENOMEM; ++ } else if (IS_ERR_OR_NULL(debugfs_create_file("mem_profile", ++ mode, kctx->kctx_dentry, kctx, ++ &kbasep_mem_profile_debugfs_fops))) { ++ err = -EAGAIN; ++ } else { ++ kbase_ctx_flag_set(kctx, ++ KCTX_MEM_PROFILE_INITIALIZED); ++ } ++ } + -+ /* AS transaction begin */ -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_mmu_disable(kctx); -+ kbase_ctx_flag_set(kctx, KCTX_AS_DISABLED_ON_FAULT); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); ++ if (kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { ++ kfree(kctx->mem_profile_data); ++ kctx->mem_profile_data = data; ++ kctx->mem_profile_size = size; ++ } else { ++ kfree(data); ++ } + -+ /* Switching to UNMAPPED mode above would have enabled the firmware to -+ * recover from the fault (if the memory access was made by firmware) -+ * and it can then respond to CSG termination requests to be sent now. -+ * All GPU command queue groups associated with the context would be -+ * affected as they use the same GPU address space. -+ */ -+ kbase_csf_ctx_handle_fault(kctx, fault); ++ dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d", ++ err, kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); + -+ /* Now clear the GPU fault */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_CLEAR_FAULT); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kctx->mem_profile_lock); + ++ return err; +} + -+/* -+ * The caller must ensure it's retained the ctx to prevent it from being -+ * scheduled out whilst it's being worked on. -+ */ -+void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, -+ struct kbase_as *as, const char *reason_str, -+ struct kbase_fault *fault) ++void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx) +{ -+ unsigned long flags; -+ unsigned int exception_type; -+ unsigned int access_type; -+ unsigned int source_id; -+ int as_no; -+ struct kbase_device *kbdev; -+ const u32 status = fault->status; -+ -+ as_no = as->number; -+ kbdev = kctx->kbdev; -+ -+ /* Make sure the context was active */ -+ if (WARN_ON(atomic_read(&kctx->refcount) <= 0)) -+ return; -+ -+ /* decode the fault status */ -+ exception_type = AS_FAULTSTATUS_EXCEPTION_TYPE_GET(status); -+ access_type = AS_FAULTSTATUS_ACCESS_TYPE_GET(status); -+ source_id = AS_FAULTSTATUS_SOURCE_ID_GET(status); -+ -+ /* terminal fault, print info about the fault */ -+ dev_err(kbdev->dev, -+ "Unhandled Page fault in AS%d at VA 0x%016llX\n" -+ "Reason: %s\n" -+ "raw fault status: 0x%X\n" -+ "exception type 0x%X: %s\n" -+ "access type 0x%X: %s\n" -+ "source id 0x%X\n" -+ "pid: %d\n", -+ as_no, fault->addr, -+ reason_str, -+ status, -+ exception_type, kbase_gpu_exception_name(exception_type), -+ access_type, kbase_gpu_access_type_name(status), -+ source_id, -+ kctx->pid); -+ -+ /* AS transaction begin */ -+ mutex_lock(&kbdev->mmu_hw_mutex); ++ mutex_lock(&kctx->mem_profile_lock); + -+ /* switch to UNMAPPED mode, -+ * will abort all jobs and stop any hw counter dumping -+ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_mmu_disable(kctx); -+ kbase_ctx_flag_set(kctx, KCTX_AS_DISABLED_ON_FAULT); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ dev_dbg(kctx->kbdev->dev, "initialised: %d", ++ kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); + -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ /* AS transaction end */ ++ kfree(kctx->mem_profile_data); ++ kctx->mem_profile_data = NULL; ++ kctx->mem_profile_size = 0; + -+ kbase_debug_csf_fault_notify(kbdev, kctx, DF_GPU_PAGE_FAULT); -+ /* Switching to UNMAPPED mode above would have enabled the firmware to -+ * recover from the fault (if the memory access was made by firmware) -+ * and it can then respond to CSG termination requests to be sent now. -+ * All GPU command queue groups associated with the context would be -+ * affected as they use the same GPU address space. -+ */ -+ kbase_csf_ctx_handle_fault(kctx, fault); ++ mutex_unlock(&kctx->mem_profile_lock); ++} + -+ /* Clear down the fault */ -+ kbase_mmu_hw_clear_fault(kbdev, as, -+ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); -+ kbase_mmu_hw_enable_fault(kbdev, as, -+ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); ++#else /* CONFIG_DEBUG_FS */ + ++int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, ++ size_t size) ++{ ++ kfree(data); ++ return 0; +} ++#endif /* CONFIG_DEBUG_FS */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.h +new file mode 100644 +index 000000000..c30fca665 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.h +@@ -0,0 +1,64 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2012-2016, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + +/** -+ * kbase_mmu_interrupt_process() - Process a bus or page fault. -+ * @kbdev: The kbase_device the fault happened on -+ * @kctx: The kbase_context for the faulting address space if one was -+ * found. -+ * @as: The address space that has the fault -+ * @fault: Data relating to the fault ++ * DOC: Header file for mem profiles entries in debugfs + * -+ * This function will process a fault on a specific address space + */ -+static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, -+ struct kbase_context *kctx, struct kbase_as *as, -+ struct kbase_fault *fault) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (!kctx) { -+ dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n", -+ kbase_as_has_bus_fault(as, fault) ? -+ "Bus error" : "Page fault", -+ as->number, fault->addr); -+ -+ /* Since no ctx was found, the MMU must be disabled. */ -+ WARN_ON(as->current_setup.transtab); ++#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H ++#define _KBASE_MEM_PROFILE_DEBUGFS_H + -+ if (kbase_as_has_bus_fault(as, fault)) -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_CLEAR_FAULT); -+ else if (kbase_as_has_page_fault(as, fault)) { -+ kbase_mmu_hw_clear_fault(kbdev, as, -+ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); -+ kbase_mmu_hw_enable_fault(kbdev, as, -+ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); -+ } ++#include ++#include + -+ return; -+ } ++/** ++ * kbasep_mem_profile_debugfs_remove - Remove entry from Mali memory profile debugfs ++ * ++ * @kctx: The context whose debugfs file @p data should be removed from ++ */ ++void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx); + -+ if (kbase_as_has_bus_fault(as, fault)) { -+ /* -+ * We need to switch to UNMAPPED mode - but we do this in a -+ * worker so that we can sleep -+ */ -+ WARN_ON(!queue_work(as->pf_wq, &as->work_busfault)); -+ atomic_inc(&kbdev->faults_pending); -+ } else { -+ WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault)); -+ atomic_inc(&kbdev->faults_pending); -+ } -+} -+ -+int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, -+ u32 status, u32 as_nr) -+{ -+ struct kbase_context *kctx; -+ unsigned long flags; -+ struct kbase_as *as; -+ struct kbase_fault *fault; -+ -+ if (WARN_ON(as_nr == MCU_AS_NR)) -+ return -EINVAL; ++/** ++ * kbasep_mem_profile_debugfs_insert - Insert @p data to the debugfs file ++ * so it can be read by userspace ++ * ++ * @kctx: The context whose debugfs file @p data should be inserted to ++ * @data: A NULL-terminated string to be inserted to the debugfs file, ++ * without the trailing new line character ++ * @size: The length of the @p data string ++ * ++ * The function takes ownership of @p data and frees it later when new data ++ * is inserted. ++ * ++ * If the debugfs entry corresponding to the @p kctx doesn't exist, ++ * an attempt will be made to create it. ++ * ++ * Return: 0 if @p data inserted correctly, -EAGAIN in case of error ++ * ++ * @post @ref mem_profile_initialized will be set to @c true ++ * the first time this function succeeds. ++ */ ++int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, ++ size_t size); + -+ if (WARN_ON(as_nr >= BASE_MAX_NR_AS)) -+ return -EINVAL; ++#endif /*_KBASE_MEM_PROFILE_DEBUGFS_H*/ + -+ as = &kbdev->as[as_nr]; -+ fault = &as->bf_data; -+ fault->status = status; -+ fault->addr = (u64) kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; -+ fault->addr |= kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); -+ fault->protected_mode = false; +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mipe_gen_header.h b/drivers/gpu/arm/bifrost/mali_kbase_mipe_gen_header.h +new file mode 100644 +index 000000000..951079d11 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_mipe_gen_header.h +@@ -0,0 +1,229 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ /* report the fault to debugfs */ -+ kbase_as_fault_debugfs_new(kbdev, as_nr); ++/* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. ++ * DO NOT EDIT. ++ */ + -+ kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as_nr); ++/* clang-format off */ + -+ /* Process the bus fault interrupt for this address space */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_mmu_interrupt_process(kbdev, kctx, as, fault); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++#include "mali_kbase_mipe_proto.h" + -+ return 0; -+} ++/* ++ * This header generates MIPE tracepoint declaration BLOB at ++ * compile time. ++ * ++ * It is intentional that there is no header guard. ++ * The header could be included multiple times for ++ * different blobs compilation. ++ * ++ * Before including this header MIPE_HEADER_* parameters must be ++ * defined. See documentation below: ++ */ + -+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) -+{ -+ const int num_as = 16; -+ const int pf_shift = 0; -+ const unsigned long as_bit_mask = (1UL << num_as) - 1; -+ unsigned long flags; -+ u32 new_mask; -+ u32 tmp; -+ u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask); ++/* ++ * The name of the variable where the result BLOB will be stored. ++ */ ++#if !defined(MIPE_HEADER_BLOB_VAR_NAME) ++#error "MIPE_HEADER_BLOB_VAR_NAME must be defined!" ++#endif + -+ /* remember current mask */ -+ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); -+ new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); -+ /* mask interrupts for now */ -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); -+ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); ++/* ++ * A compiler attribute for the BLOB variable. ++ * ++ * e.g. __attribute__((section("my_section"))) ++ * ++ * Default value is no attribute. ++ */ ++#if !defined(MIPE_HEADER_BLOB_VAR_ATTRIBUTE) ++#define MIPE_HEADER_BLOB_VAR_ATTRIBUTE ++#endif + -+ while (pf_bits) { -+ struct kbase_context *kctx; -+ int as_no = ffs(pf_bits) - 1; -+ struct kbase_as *as = &kbdev->as[as_no]; -+ struct kbase_fault *fault = &as->pf_data; ++/* ++ * A compiler attribute for packing structures ++ * ++ * e.g. __packed ++ * ++ * Default value is __attribute__((__packed__)) ++ */ ++#if !defined(MIPE_HEADER_PACKED_ATTRIBUTE) ++#define MIPE_HEADER_PACKED_ATTRIBUTE __attribute__((__packed__)) ++#endif + -+ /* find faulting address */ -+ fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no, -+ AS_FAULTADDRESS_HI)); -+ fault->addr <<= 32; -+ fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no, -+ AS_FAULTADDRESS_LO)); ++/* ++ * MIPE stream id. ++ * ++ * See enum tl_stream_id. ++ */ ++#if !defined(MIPE_HEADER_STREAM_ID) ++#error "MIPE_HEADER_STREAM_ID must be defined!" ++#endif + -+ /* Mark the fault protected or not */ -+ fault->protected_mode = false; ++/* ++ * MIPE packet class. ++ * ++ * See enum tl_packet_class. ++ */ ++#if !defined(MIPE_HEADER_PKT_CLASS) ++#error "MIPE_HEADER_PKT_CLASS must be defined!" ++#endif + -+ /* report the fault to debugfs */ -+ kbase_as_fault_debugfs_new(kbdev, as_no); ++/* ++ * The list of tracepoints to process. ++ * ++ * It should be defined as follows: ++ * ++ * #define MIPE_HEADER_TRACEPOINT_LIST \ ++ * TRACEPOINT_DESC(FIRST_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ ++ * TRACEPOINT_DESC(SECOND_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ ++ * etc. ++ * ++ * Where the first argument is tracepoints name, the second ++ * argument is a short tracepoint description, the third argument ++ * types (see MIPE documentation), and the fourth argument ++ * is comma separated argument names. ++ */ ++#if !defined(MIPE_HEADER_TRACEPOINT_LIST) ++#error "MIPE_HEADER_TRACEPOINT_LIST must be defined!" ++#endif + -+ /* record the fault status */ -+ fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, -+ AS_FAULTSTATUS)); ++/* ++ * The number of entries in MIPE_HEADER_TRACEPOINT_LIST. ++ */ ++#if !defined(MIPE_HEADER_TRACEPOINT_LIST_SIZE) ++#error "MIPE_HEADER_TRACEPOINT_LIST_SIZE must be defined!" ++#endif + -+ fault->extra_addr = kbase_reg_read(kbdev, -+ MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); -+ fault->extra_addr <<= 32; -+ fault->extra_addr |= kbase_reg_read(kbdev, -+ MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); ++/* ++ * The list of enums to process. ++ * ++ * It should be defined as follows: ++ * ++ * #define MIPE_HEADER_ENUM_LIST \ ++ * ENUM_DESC(enum_arg_name, enum_value) \ ++ * ENUM_DESC(enum_arg_name, enum_value) \ ++ * etc. ++ * ++ * Where enum_arg_name is the name of a tracepoint argument being used with ++ * this enum. enum_value is a valid C enum value. ++ * ++ * Default value is an empty list. ++ */ ++#if defined(MIPE_HEADER_ENUM_LIST) + -+ /* Mark page fault as handled */ -+ pf_bits &= ~(1UL << as_no); ++/* ++ * Tracepoint message ID used for enums declaration. ++ */ ++#if !defined(MIPE_HEADER_ENUM_MSG_ID) ++#error "MIPE_HEADER_ENUM_MSG_ID must be defined!" ++#endif + -+ /* remove the queued PF from the mask */ -+ new_mask &= ~MMU_PAGE_FAULT(as_no); ++#else ++#define MIPE_HEADER_ENUM_LIST ++#endif + -+ if (as_no == MCU_AS_NR) { -+ kbase_mmu_report_mcu_as_fault_and_reset(kbdev, fault); -+ /* Pointless to handle remaining faults */ -+ break; -+ } ++/* ++ * The MIPE tracepoint declaration BLOB. ++ */ ++const struct ++{ ++ u32 _mipe_w0; ++ u32 _mipe_w1; ++ u8 _protocol_version; ++ u8 _pointer_size; ++ u32 _tp_count; ++#define TRACEPOINT_DESC(name, desc, arg_types, arg_names) \ ++ struct { \ ++ u32 _name; \ ++ u32 _size_string_name; \ ++ char _string_name[sizeof(#name)]; \ ++ u32 _size_desc; \ ++ char _desc[sizeof(desc)]; \ ++ u32 _size_arg_types; \ ++ char _arg_types[sizeof(arg_types)]; \ ++ u32 _size_arg_names; \ ++ char _arg_names[sizeof(arg_names)]; \ ++ } MIPE_HEADER_PACKED_ATTRIBUTE __ ## name; + -+ /* -+ * Refcount the kctx - it shouldn't disappear anyway, since -+ * Page faults _should_ only occur whilst GPU commands are -+ * executing, and a command causing the Page fault shouldn't -+ * complete until the MMU is updated. -+ * Reference is released at the end of bottom half of page -+ * fault handling. -+ */ -+ kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as_no); ++#define ENUM_DESC(arg_name, value) \ ++ struct { \ ++ u32 _msg_id; \ ++ u32 _arg_name_len; \ ++ char _arg_name[sizeof(#arg_name)]; \ ++ u32 _value; \ ++ u32 _value_str_len; \ ++ char _value_str[sizeof(#value)]; \ ++ } MIPE_HEADER_PACKED_ATTRIBUTE __ ## arg_name ## _ ## value; + -+ /* Process the interrupt for this address space */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_mmu_interrupt_process(kbdev, kctx, as, fault); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } ++ MIPE_HEADER_TRACEPOINT_LIST ++ MIPE_HEADER_ENUM_LIST ++#undef TRACEPOINT_DESC ++#undef ENUM_DESC ++} MIPE_HEADER_PACKED_ATTRIBUTE MIPE_HEADER_BLOB_VAR_NAME MIPE_HEADER_BLOB_VAR_ATTRIBUTE = { ++ ._mipe_w0 = MIPE_PACKET_HEADER_W0( ++ TL_PACKET_FAMILY_TL, ++ MIPE_HEADER_PKT_CLASS, ++ TL_PACKET_TYPE_HEADER, ++ MIPE_HEADER_STREAM_ID), ++ ._mipe_w1 = MIPE_PACKET_HEADER_W1( ++ sizeof(MIPE_HEADER_BLOB_VAR_NAME) - PACKET_HEADER_SIZE, ++ 0), ++ ._protocol_version = SWTRACE_VERSION, ++ ._pointer_size = sizeof(void *), ++ ._tp_count = MIPE_HEADER_TRACEPOINT_LIST_SIZE, ++#define TRACEPOINT_DESC(name, desc, arg_types, arg_names) \ ++ .__ ## name = { \ ++ ._name = name, \ ++ ._size_string_name = sizeof(#name), \ ++ ._string_name = #name, \ ++ ._size_desc = sizeof(desc), \ ++ ._desc = desc, \ ++ ._size_arg_types = sizeof(arg_types), \ ++ ._arg_types = arg_types, \ ++ ._size_arg_names = sizeof(arg_names), \ ++ ._arg_names = arg_names \ ++ }, ++#define ENUM_DESC(arg_name, value) \ ++ .__ ## arg_name ## _ ## value = { \ ++ ._msg_id = MIPE_HEADER_ENUM_MSG_ID, \ ++ ._arg_name_len = sizeof(#arg_name), \ ++ ._arg_name = #arg_name, \ ++ ._value = value, \ ++ ._value_str_len = sizeof(#value), \ ++ ._value_str = #value \ ++ }, + -+ /* reenable interrupts */ -+ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); -+ tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); -+ new_mask |= tmp; -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask); -+ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); -+} ++ MIPE_HEADER_TRACEPOINT_LIST ++ MIPE_HEADER_ENUM_LIST ++#undef TRACEPOINT_DESC ++#undef ENUM_DESC ++}; + -+int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, -+ struct kbase_va_region *const reg) -+{ -+ /* Can't soft-stop the provoking job */ -+ return -EPERM; -+} ++#undef MIPE_HEADER_BLOB_VAR_NAME ++#undef MIPE_HEADER_BLOB_VAR_ATTRIBUTE ++#undef MIPE_HEADER_STREAM_ID ++#undef MIPE_HEADER_PKT_CLASS ++#undef MIPE_HEADER_TRACEPOINT_LIST ++#undef MIPE_HEADER_TRACEPOINT_LIST_SIZE ++#undef MIPE_HEADER_ENUM_LIST ++#undef MIPE_HEADER_ENUM_MSG_ID + -+/** -+ * kbase_mmu_gpu_fault_worker() - Process a GPU fault for the device. ++/* clang-format on */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mipe_proto.h b/drivers/gpu/arm/bifrost/mali_kbase_mipe_proto.h +new file mode 100644 +index 000000000..c35ee61ce +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_mipe_proto.h +@@ -0,0 +1,126 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * @data: work_struct passed by queue_work() ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Report a GPU fatal error for all GPU command queue groups that are -+ * using the address space and terminate them. + */ -+static void kbase_mmu_gpu_fault_worker(struct work_struct *data) -+{ -+ struct kbase_as *const faulting_as = container_of(data, struct kbase_as, -+ work_gpufault); -+ const u32 as_nr = faulting_as->number; -+ struct kbase_device *const kbdev = container_of(faulting_as, struct -+ kbase_device, as[as_nr]); -+ struct kbase_fault *fault; -+ struct kbase_context *kctx; -+ u32 status; -+ u64 address; -+ u32 as_valid; -+ unsigned long flags; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ fault = &faulting_as->gf_data; -+ status = fault->status; -+ as_valid = status & GPU_FAULTSTATUS_JASID_VALID_FLAG; -+ address = fault->addr; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++/* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. ++ * DO NOT EDIT. ++ */ + -+ dev_warn(kbdev->dev, -+ "GPU Fault 0x%08x (%s) in AS%u at 0x%016llx\n" -+ "ASID_VALID: %s, ADDRESS_VALID: %s\n", -+ status, -+ kbase_gpu_exception_name( -+ GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(status)), -+ as_nr, address, -+ as_valid ? "true" : "false", -+ status & GPU_FAULTSTATUS_ADDR_VALID_FLAG ? "true" : "false"); ++/* clang-format off */ + -+ kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_nr); -+ kbase_csf_ctx_handle_fault(kctx, fault); -+ kbase_ctx_sched_release_ctx_lock(kctx); ++#if !defined(_KBASE_MIPE_PROTO_H) ++#define _KBASE_MIPE_PROTO_H + -+ /* A work for GPU fault is complete. -+ * Till reaching here, no further GPU fault will be reported. -+ * Now clear the GPU fault to allow next GPU fault interrupt report. -+ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_CLEAR_FAULT); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++#define _BITFIELD_MASK_FIELD(pos, len) \ ++ (((1u << len) - 1) << pos) + -+ atomic_dec(&kbdev->faults_pending); -+} ++#define _BITFIELD_SET_FIELD(pos, len, value) \ ++ (_BITFIELD_MASK_FIELD(pos, len) & (((u32) value) << pos)) + -+/** -+ * submit_work_gpufault() - Submit a work for GPU fault. -+ * -+ * @kbdev: Kbase device pointer -+ * @status: GPU fault status -+ * @as_nr: Faulty address space -+ * @address: GPU fault address -+ * -+ * This function submits a work for reporting the details of GPU fault. ++#define BITFIELD_SET(field_name, value) \ ++ _BITFIELD_SET_FIELD(field_name ## _POS, field_name ## _LEN, value) ++ ++/* The version of swtrace protocol used in timeline stream. */ ++#define SWTRACE_VERSION 3 ++ ++/* Packet header - first word. ++ * These values must be defined according to MIPE documentation. + */ -+static void submit_work_gpufault(struct kbase_device *kbdev, u32 status, -+ u32 as_nr, u64 address) -+{ -+ unsigned long flags; -+ struct kbase_as *const as = &kbdev->as[as_nr]; -+ struct kbase_context *kctx; ++#define PACKET_STREAMID_POS 0 ++#define PACKET_STREAMID_LEN 8 ++#define PACKET_RSVD1_POS (PACKET_STREAMID_POS + PACKET_STREAMID_LEN) ++#define PACKET_RSVD1_LEN 8 ++#define PACKET_TYPE_POS (PACKET_RSVD1_POS + PACKET_RSVD1_LEN) ++#define PACKET_TYPE_LEN 3 ++#define PACKET_CLASS_POS (PACKET_TYPE_POS + PACKET_TYPE_LEN) ++#define PACKET_CLASS_LEN 7 ++#define PACKET_FAMILY_POS (PACKET_CLASS_POS + PACKET_CLASS_LEN) ++#define PACKET_FAMILY_LEN 6 + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as_nr); ++/* Packet header - second word ++ * These values must be defined according to MIPE documentation. ++ */ ++#define PACKET_LENGTH_POS 0 ++#define PACKET_LENGTH_LEN 24 ++#define PACKET_SEQBIT_POS (PACKET_LENGTH_POS + PACKET_LENGTH_LEN) ++#define PACKET_SEQBIT_LEN 1 ++#define PACKET_RSVD2_POS (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN) ++#define PACKET_RSVD2_LEN 7 + -+ if (kctx) { -+ kbase_ctx_sched_retain_ctx_refcount(kctx); ++/* First word of a MIPE packet */ ++#define MIPE_PACKET_HEADER_W0(pkt_family, pkt_class, pkt_type, stream_id) \ ++ (0 \ ++ | BITFIELD_SET(PACKET_FAMILY, pkt_family) \ ++ | BITFIELD_SET(PACKET_CLASS, pkt_class) \ ++ | BITFIELD_SET(PACKET_TYPE, pkt_type) \ ++ | BITFIELD_SET(PACKET_STREAMID, stream_id)) + -+ as->gf_data = (struct kbase_fault) { -+ .status = status, -+ .addr = address, -+ }; ++/* Second word of a MIPE packet */ ++#define MIPE_PACKET_HEADER_W1(packet_length, seqbit) \ ++ (0 \ ++ | BITFIELD_SET(PACKET_LENGTH, packet_length) \ ++ | BITFIELD_SET(PACKET_SEQBIT, seqbit)) + -+ if (WARN_ON(!queue_work(as->pf_wq, &as->work_gpufault))) -+ kbase_ctx_sched_release_ctx(kctx); -+ else -+ atomic_inc(&kbdev->faults_pending); -+ } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} ++/* The number of bytes reserved for packet header. ++ * These value must be defined according to MIPE documentation. ++ */ ++#define PACKET_HEADER_SIZE 8 /* bytes */ + -+void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status, -+ u32 as_nr, u64 address, bool as_valid) -+{ -+ if (!as_valid || (as_nr == MCU_AS_NR)) { -+ int as; ++/* The number of bytes reserved for packet sequence number. ++ * These value must be defined according to MIPE documentation. ++ */ ++#define PACKET_NUMBER_SIZE 4 /* bytes */ + -+ /* Report GPU fault for all contexts (except MCU_AS_NR) in case either -+ * the address space is invalid or it's MCU address space. -+ */ -+ for (as = 1; as < kbdev->nr_hw_address_spaces; as++) -+ submit_work_gpufault(kbdev, status, as, address); -+ } else -+ submit_work_gpufault(kbdev, status, as_nr, address); -+} -+KBASE_EXPORT_TEST_API(kbase_mmu_gpu_fault_interrupt); ++/* Timeline packet family ids. ++ * Values are significant! Check MIPE documentation. ++ */ ++enum tl_packet_family { ++ TL_PACKET_FAMILY_CTRL = 0, /* control packets */ ++ TL_PACKET_FAMILY_TL = 1, /* timeline packets */ ++ TL_PACKET_FAMILY_COUNT ++}; + -+int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i) -+{ -+ kbdev->as[i].number = i; -+ kbdev->as[i].bf_data.addr = 0ULL; -+ kbdev->as[i].pf_data.addr = 0ULL; -+ kbdev->as[i].gf_data.addr = 0ULL; -+ kbdev->as[i].is_unresponsive = false; ++/* Packet classes used in timeline streams. ++ * Values are significant! Check MIPE documentation. ++ */ ++enum tl_packet_class { ++ TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */ ++ TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */ ++}; + -+ kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 1, i); -+ if (!kbdev->as[i].pf_wq) -+ return -ENOMEM; ++/* Packet types used in timeline streams. ++ * Values are significant! Check MIPE documentation. ++ */ ++enum tl_packet_type { ++ TL_PACKET_TYPE_HEADER = 0, /* stream's header/directory */ ++ TL_PACKET_TYPE_BODY = 1, /* stream's body */ ++ TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */ ++}; + -+ INIT_WORK(&kbdev->as[i].work_pagefault, kbase_mmu_page_fault_worker); -+ INIT_WORK(&kbdev->as[i].work_busfault, kbase_mmu_bus_fault_worker); -+ INIT_WORK(&kbdev->as[i].work_gpufault, kbase_mmu_gpu_fault_worker); ++/* Stream ID types (timeline family). */ ++enum tl_stream_id { ++ TL_STREAM_ID_USER = 0, /* User-space driver Timeline stream. */ ++ TL_STREAM_ID_KERNEL = 1, /* Kernel-space driver Timeline stream. */ ++ TL_STREAM_ID_CSFFW = 2, /* CSF firmware driver Timeline stream. */ ++}; + -+ return 0; -+} -diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c ++#endif /* _KBASE_MIPE_PROTO_H */ ++ ++/* clang-format on */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c new file mode 100644 -index 000000000..d716ce006 +index 000000000..10a7f506b --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c -@@ -0,0 +1,442 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c +@@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -253017,437 +254167,528 @@ index 000000000..d716ce006 + * + */ + -+/** -+ * DOC: Base kernel MMU management specific for Job Manager GPU. -+ */ ++#include ++#include ++#include + +#include -+#include -+#include -+#include -+#include -+#include ++#include + -+void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, -+ struct kbase_mmu_setup * const setup) ++/** ++ * kbase_native_mgm_alloc - Native physical memory allocation method ++ * ++ * @mgm_dev: The memory group manager the request is being made through. ++ * @group_id: A physical memory group ID, which must be valid but is not used. ++ * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. ++ * @gfp_mask: Bitmask of Get Free Page flags affecting allocator behavior. ++ * @order: Page order for physical page size (order=0 means 4 KiB, ++ * order=9 means 2 MiB). ++ * ++ * Delegates all memory allocation requests to the kernel's alloc_pages ++ * function. ++ * ++ * Return: Pointer to allocated page, or NULL if allocation failed. ++ */ ++static struct page *kbase_native_mgm_alloc( ++ struct memory_group_manager_device *mgm_dev, int group_id, ++ gfp_t gfp_mask, unsigned int order) +{ -+ /* Set up the required caching policies at the correct indices -+ * in the memattr register. ++ /* ++ * Check that the base and the mgm defines, from separate header files, ++ * for the max number of memory groups are compatible. + */ -+ setup->memattr = -+ (AS_MEMATTR_IMPL_DEF_CACHE_POLICY << -+ (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | -+ (AS_MEMATTR_FORCE_TO_CACHE_ALL << -+ (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | -+ (AS_MEMATTR_WRITE_ALLOC << -+ (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | -+ (AS_MEMATTR_AARCH64_OUTER_IMPL_DEF << -+ (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | -+ (AS_MEMATTR_AARCH64_OUTER_WA << -+ (AS_MEMATTR_INDEX_OUTER_WA * 8)) | -+ (AS_MEMATTR_AARCH64_NON_CACHEABLE << -+ (AS_MEMATTR_INDEX_NON_CACHEABLE * 8)); ++ BUILD_BUG_ON(BASE_MEM_GROUP_COUNT != MEMORY_GROUP_MANAGER_NR_GROUPS); ++ /* ++ * Check that the mask used for storing the memory group ID is big ++ * enough for the largest possible memory group ID. ++ */ ++ BUILD_BUG_ON((BASEP_CONTEXT_MMU_GROUP_ID_MASK ++ >> BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) ++ < (BASE_MEM_GROUP_COUNT - 1)); + -+ setup->transtab = (u64)mmut->pgd & AS_TRANSTAB_BASE_MASK; -+ setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K; ++ CSTD_UNUSED(mgm_dev); ++ CSTD_UNUSED(group_id); ++ ++ return alloc_pages(gfp_mask, order); +} + -+void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, -+ struct kbase_as *as, struct kbase_fault *fault) ++/** ++ * kbase_native_mgm_free - Native physical memory freeing method ++ * ++ * @mgm_dev: The memory group manager the request is being made through. ++ * @group_id: A physical memory group ID, which must be valid but is not used. ++ * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. ++ * @page: Address of the struct associated with a page of physical ++ * memory that was allocated by calling kbase_native_mgm_alloc ++ * with the same argument values. ++ * @order: Page order for physical page size (order=0 means 4 KiB, ++ * order=9 means 2 MiB). ++ * ++ * Delegates all memory freeing requests to the kernel's __free_pages function. ++ */ ++static void kbase_native_mgm_free(struct memory_group_manager_device *mgm_dev, ++ int group_id, struct page *page, unsigned int order) +{ -+ struct kbase_device *const kbdev = kctx->kbdev; -+ u32 const status = fault->status; -+ u32 const exception_type = (status & 0xFF); -+ u32 const exception_data = (status >> 8) & 0xFFFFFF; -+ int const as_no = as->number; -+ unsigned long flags; -+ const uintptr_t fault_addr = fault->addr; ++ CSTD_UNUSED(mgm_dev); ++ CSTD_UNUSED(group_id); + -+ /* terminal fault, print info about the fault */ -+ dev_err(kbdev->dev, -+ "GPU bus fault in AS%d at PA %pK\n" -+ "raw fault status: 0x%X\n" -+ "exception type 0x%X: %s\n" -+ "exception data 0x%X\n" -+ "pid: %d\n", -+ as_no, (void *)fault_addr, -+ status, -+ exception_type, kbase_gpu_exception_name(exception_type), -+ exception_data, -+ kctx->pid); ++ __free_pages(page, order); ++} + -+ /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter -+ * dumping AS transaction begin -+ */ -+ mutex_lock(&kbdev->mmu_hw_mutex); ++/** ++ * kbase_native_mgm_vmf_insert_pfn_prot - Native method to map a page on the CPU ++ * ++ * @mgm_dev: The memory group manager the request is being made through. ++ * @group_id: A physical memory group ID, which must be valid but is not used. ++ * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. ++ * @vma: The virtual memory area to insert the page into. ++ * @addr: An address contained in @vma to assign to the inserted page. ++ * @pfn: The kernel Page Frame Number to insert at @addr in @vma. ++ * @pgprot: Protection flags for the inserted page. ++ * ++ * Called from a CPU virtual memory page fault handler. Delegates all memory ++ * mapping requests to the kernel's vmf_insert_pfn_prot function. ++ * ++ * Return: Type of fault that occurred or VM_FAULT_NOPAGE if the page table ++ * entry was successfully installed. ++ */ ++static vm_fault_t kbase_native_mgm_vmf_insert_pfn_prot( ++ struct memory_group_manager_device *mgm_dev, int group_id, ++ struct vm_area_struct *vma, unsigned long addr, ++ unsigned long pfn, pgprot_t pgprot) ++{ ++ CSTD_UNUSED(mgm_dev); ++ CSTD_UNUSED(group_id); + -+ /* Set the MMU into unmapped mode */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_mmu_disable(kctx); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return vmf_insert_pfn_prot(vma, addr, pfn, pgprot); ++} + -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ /* AS transaction end */ ++/** ++ * kbase_native_mgm_update_gpu_pte - Native method to modify a GPU page table ++ * entry ++ * ++ * @mgm_dev: The memory group manager the request is being made through. ++ * @group_id: A physical memory group ID, which must be valid but is not used. ++ * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. ++ * @mmu_level: The level of the MMU page table where the page is getting mapped. ++ * @pte: The prepared page table entry. ++ * ++ * This function simply returns the @pte without modification. ++ * ++ * Return: A GPU page table entry to be stored in a page table. ++ */ ++static u64 ++kbase_native_mgm_update_gpu_pte(struct memory_group_manager_device *mgm_dev, ++ int group_id, int mmu_level, u64 pte) ++{ ++ CSTD_UNUSED(mgm_dev); ++ CSTD_UNUSED(group_id); ++ CSTD_UNUSED(mmu_level); + -+ kbase_mmu_hw_clear_fault(kbdev, as, -+ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); -+ kbase_mmu_hw_enable_fault(kbdev, as, -+ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); ++ return pte; ++} ++ ++/** ++ * kbase_native_mgm_pte_to_original_pte - Native method to undo changes done in ++ * kbase_native_mgm_update_gpu_pte() ++ * ++ * @mgm_dev: The memory group manager the request is being made through. ++ * @group_id: A physical memory group ID, which must be valid but is not used. ++ * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. ++ * @mmu_level: The level of the MMU page table where the page is getting mapped. ++ * @pte: The prepared page table entry. ++ * ++ * This function simply returns the @pte without modification. ++ * ++ * Return: A GPU page table entry to be stored in a page table. ++ */ ++static u64 kbase_native_mgm_pte_to_original_pte(struct memory_group_manager_device *mgm_dev, ++ int group_id, int mmu_level, u64 pte) ++{ ++ CSTD_UNUSED(mgm_dev); ++ CSTD_UNUSED(group_id); ++ CSTD_UNUSED(mmu_level); + ++ return pte; +} + ++struct memory_group_manager_device kbase_native_mgm_dev = { ++ .ops = { ++ .mgm_alloc_page = kbase_native_mgm_alloc, ++ .mgm_free_page = kbase_native_mgm_free, ++ .mgm_get_import_memory_id = NULL, ++ .mgm_vmf_insert_pfn_prot = kbase_native_mgm_vmf_insert_pfn_prot, ++ .mgm_update_gpu_pte = kbase_native_mgm_update_gpu_pte, ++ .mgm_pte_to_original_pte = kbase_native_mgm_pte_to_original_pte, ++ }, ++ .data = NULL ++}; +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.h b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.h +new file mode 100644 +index 000000000..1eae2fcf9 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.h +@@ -0,0 +1,38 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* -+ * The caller must ensure it's retained the ctx to prevent it from being -+ * scheduled out whilst it's being worked on. ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * + */ -+void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, -+ struct kbase_as *as, const char *reason_str, -+ struct kbase_fault *fault) -+{ -+ unsigned long flags; -+ u32 exception_type; -+ u32 access_type; -+ u32 source_id; -+ int as_no; -+ struct kbase_device *kbdev; -+ struct kbasep_js_device_data *js_devdata; + -+ as_no = as->number; -+ kbdev = kctx->kbdev; -+ js_devdata = &kbdev->js_data; ++#ifndef _KBASE_NATIVE_MGM_H_ ++#define _KBASE_NATIVE_MGM_H_ + -+ /* Make sure the context was active */ -+ if (WARN_ON(atomic_read(&kctx->refcount) <= 0)) -+ return; ++#include + -+ /* decode the fault status */ -+ exception_type = fault->status & 0xFF; -+ access_type = (fault->status >> 8) & 0x3; -+ source_id = (fault->status >> 16); ++/* ++ * kbase_native_mgm_dev - Native memory group manager device ++ * ++ * An implementation of the memory group manager interface that is intended for ++ * internal use when no platform-specific memory group manager is available. ++ * ++ * It ignores the specified group ID and delegates to the kernel's physical ++ * memory allocation and freeing functions. ++ */ ++extern struct memory_group_manager_device kbase_native_mgm_dev; + -+ /* terminal fault, print info about the fault */ -+ dev_err(kbdev->dev, -+ "Unhandled Page fault in AS%d at VA 0x%016llX\n" -+ "Reason: %s\n" -+ "raw fault status: 0x%X\n" -+ "exception type 0x%X: %s\n" -+ "access type 0x%X: %s\n" -+ "source id 0x%X\n" -+ "pid: %d\n", -+ as_no, fault->addr, -+ reason_str, -+ fault->status, -+ exception_type, kbase_gpu_exception_name(exception_type), -+ access_type, kbase_gpu_access_type_name(fault->status), -+ source_id, -+ kctx->pid); ++#endif /* _KBASE_NATIVE_MGM_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pbha.c b/drivers/gpu/arm/bifrost/mali_kbase_pbha.c +new file mode 100644 +index 000000000..b65f9e7b5 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_pbha.c +@@ -0,0 +1,306 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ /* hardware counters dump fault handling */ -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); -+ if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) && -+ (kbdev->hwcnt.backend.state == -+ KBASE_INSTR_STATE_DUMPING)) { -+ if ((fault->addr >= kbdev->hwcnt.addr) && -+ (fault->addr < (kbdev->hwcnt.addr + -+ kbdev->hwcnt.addr_bytes))) -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; ++#include "mali_kbase_pbha.h" ++ ++#include ++#include ++#define DTB_SET_SIZE 2 ++ ++static bool read_setting_valid(unsigned int id, unsigned int read_setting) ++{ ++ switch (id) { ++ /* Valid ID - fall through all */ ++ case SYSC_ALLOC_ID_R_OTHER: ++ case SYSC_ALLOC_ID_R_CSF: ++ case SYSC_ALLOC_ID_R_MMU: ++ case SYSC_ALLOC_ID_R_TILER_VERT: ++ case SYSC_ALLOC_ID_R_TILER_PTR: ++ case SYSC_ALLOC_ID_R_TILER_INDEX: ++ case SYSC_ALLOC_ID_R_TILER_OTHER: ++ case SYSC_ALLOC_ID_R_IC: ++ case SYSC_ALLOC_ID_R_ATTR: ++ case SYSC_ALLOC_ID_R_SCM: ++ case SYSC_ALLOC_ID_R_FSDC: ++ case SYSC_ALLOC_ID_R_VL: ++ case SYSC_ALLOC_ID_R_PLR: ++ case SYSC_ALLOC_ID_R_TEX: ++ case SYSC_ALLOC_ID_R_LSC: ++ switch (read_setting) { ++ /* Valid setting value - fall through all */ ++ case SYSC_ALLOC_L2_ALLOC: ++ case SYSC_ALLOC_NEVER_ALLOC: ++ case SYSC_ALLOC_ALWAYS_ALLOC: ++ case SYSC_ALLOC_PTL_ALLOC: ++ case SYSC_ALLOC_L2_PTL_ALLOC: ++ return true; ++ default: ++ return false; ++ } ++ default: ++ return false; + } -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + -+ /* Stop the kctx from submitting more jobs and cause it to be scheduled -+ * out/rescheduled - this will occur on releasing the context's refcount -+ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbasep_js_clear_submit_allowed(js_devdata, kctx); ++ /* Unreachable */ ++ return false; ++} + -+ /* Kill any running jobs from the context. Submit is disallowed, so no -+ * more jobs from this context can appear in the job slots from this -+ * point on -+ */ -+ kbase_backend_jm_kill_running_jobs_from_kctx(kctx); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++static bool write_setting_valid(unsigned int id, unsigned int write_setting) ++{ ++ switch (id) { ++ /* Valid ID - fall through all */ ++ case SYSC_ALLOC_ID_W_OTHER: ++ case SYSC_ALLOC_ID_W_CSF: ++ case SYSC_ALLOC_ID_W_PCB: ++ case SYSC_ALLOC_ID_W_TILER_PTR: ++ case SYSC_ALLOC_ID_W_TILER_VERT_PLIST: ++ case SYSC_ALLOC_ID_W_TILER_OTHER: ++ case SYSC_ALLOC_ID_W_L2_EVICT: ++ case SYSC_ALLOC_ID_W_L2_FLUSH: ++ case SYSC_ALLOC_ID_W_TIB_COLOR: ++ case SYSC_ALLOC_ID_W_TIB_COLOR_AFBCH: ++ case SYSC_ALLOC_ID_W_TIB_COLOR_AFBCB: ++ case SYSC_ALLOC_ID_W_TIB_CRC: ++ case SYSC_ALLOC_ID_W_TIB_DS: ++ case SYSC_ALLOC_ID_W_TIB_DS_AFBCH: ++ case SYSC_ALLOC_ID_W_TIB_DS_AFBCB: ++ case SYSC_ALLOC_ID_W_LSC: ++ switch (write_setting) { ++ /* Valid setting value - fall through all */ ++ case SYSC_ALLOC_L2_ALLOC: ++ case SYSC_ALLOC_NEVER_ALLOC: ++ case SYSC_ALLOC_ALWAYS_ALLOC: ++ case SYSC_ALLOC_PTL_ALLOC: ++ case SYSC_ALLOC_L2_PTL_ALLOC: ++ return true; ++ default: ++ return false; ++ } ++ default: ++ return false; ++ } + -+ /* AS transaction begin */ -+ mutex_lock(&kbdev->mmu_hw_mutex); ++ /* Unreachable */ ++ return false; ++} + -+ /* switch to UNMAPPED mode, will abort all jobs and stop -+ * any hw counter dumping -+ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_mmu_disable(kctx); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++/* Private structure to be returned as setting validity status */ ++struct settings_status { ++ /* specifies whether id and either one of settings is valid */ ++ bool overall; ++ /* specifies whether read setting is valid */ ++ bool read; ++ /* specifies whether write setting is valid*/ ++ bool write; ++}; + -+ mutex_unlock(&kbdev->mmu_hw_mutex); ++static struct settings_status settings_valid(unsigned int id, unsigned int read_setting, ++ unsigned int write_setting) ++{ ++ struct settings_status valid = { .overall = (id < SYSC_ALLOC_COUNT * sizeof(u32)) }; + -+ /* AS transaction end */ -+ /* Clear down the fault */ -+ kbase_mmu_hw_clear_fault(kbdev, as, -+ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); -+ kbase_mmu_hw_enable_fault(kbdev, as, -+ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); ++ if (valid.overall) { ++ valid.read = read_setting_valid(id, read_setting); ++ valid.write = write_setting_valid(id, write_setting); ++ valid.overall = valid.read || valid.write; ++ } + ++ return valid; +} + -+/** -+ * kbase_mmu_interrupt_process() - Process a bus or page fault. -+ * @kbdev: The kbase_device the fault happened on -+ * @kctx: The kbase_context for the faulting address space if one was -+ * found. -+ * @as: The address space that has the fault -+ * @fault: Data relating to the fault -+ * -+ * This function will process a fault on a specific address space -+ */ -+static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, -+ struct kbase_context *kctx, struct kbase_as *as, -+ struct kbase_fault *fault) ++bool kbasep_pbha_supported(struct kbase_device *kbdev) +{ -+ unsigned long flags; ++ const u32 arch_maj_rev = ++ ARCH_MAJOR_REV_REG(kbdev->gpu_props.props.raw_props.gpu_id); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ return (arch_maj_rev >= GPU_ID2_ARCH_MAJOR_REV_MAKE(11, 3)); ++} + -+ dev_dbg(kbdev->dev, -+ "Entering %s kctx %pK, as %pK\n", -+ __func__, (void *)kctx, (void *)as); ++int kbase_pbha_record_settings(struct kbase_device *kbdev, bool runtime, ++ unsigned int id, unsigned int read_setting, ++ unsigned int write_setting) ++{ ++ struct settings_status const valid = settings_valid(id, read_setting, write_setting); + -+ if (!kctx) { -+ dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n", -+ kbase_as_has_bus_fault(as, fault) ? -+ "Bus error" : "Page fault", -+ as->number, fault->addr); ++ if (valid.overall) { ++ unsigned int const sysc_alloc_num = id / sizeof(u32); ++ u32 modified_reg; + -+ /* Since no ctx was found, the MMU must be disabled. */ -+ WARN_ON(as->current_setup.transtab); ++ if (runtime) { ++ int i; + -+ if (kbase_as_has_bus_fault(as, fault)) { -+ kbase_mmu_hw_clear_fault(kbdev, as, -+ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); -+ kbase_mmu_hw_enable_fault(kbdev, as, -+ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); -+ } else if (kbase_as_has_page_fault(as, fault)) { -+ kbase_mmu_hw_clear_fault(kbdev, as, -+ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); -+ kbase_mmu_hw_enable_fault(kbdev, as, -+ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); ++ kbase_pm_context_active(kbdev); ++ /* Ensure host copy of SYSC_ALLOC is up to date */ ++ for (i = 0; i < SYSC_ALLOC_COUNT; i++) ++ kbdev->sysc_alloc[i] = kbase_reg_read( ++ kbdev, GPU_CONTROL_REG(SYSC_ALLOC(i))); ++ kbase_pm_context_idle(kbdev); + } + -+ return; -+ } -+ -+ if (kbase_as_has_bus_fault(as, fault)) { -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ modified_reg = kbdev->sysc_alloc[sysc_alloc_num]; + -+ /* -+ * hw counters dumping in progress, signal the -+ * other thread that it failed -+ */ -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); -+ if ((kbdev->hwcnt.kctx == kctx) && -+ (kbdev->hwcnt.backend.state == -+ KBASE_INSTR_STATE_DUMPING)) -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; ++ switch (id % sizeof(u32)) { ++ case 0: ++ modified_reg = valid.read ? SYSC_ALLOC_R_SYSC_ALLOC0_SET(modified_reg, ++ read_setting) : ++ modified_reg; ++ modified_reg = valid.write ? SYSC_ALLOC_W_SYSC_ALLOC0_SET(modified_reg, ++ write_setting) : ++ modified_reg; ++ break; ++ case 1: ++ modified_reg = valid.read ? SYSC_ALLOC_R_SYSC_ALLOC1_SET(modified_reg, ++ read_setting) : ++ modified_reg; ++ modified_reg = valid.write ? SYSC_ALLOC_W_SYSC_ALLOC1_SET(modified_reg, ++ write_setting) : ++ modified_reg; ++ break; ++ case 2: ++ modified_reg = valid.read ? SYSC_ALLOC_R_SYSC_ALLOC2_SET(modified_reg, ++ read_setting) : ++ modified_reg; ++ modified_reg = valid.write ? SYSC_ALLOC_W_SYSC_ALLOC2_SET(modified_reg, ++ write_setting) : ++ modified_reg; ++ break; ++ case 3: ++ modified_reg = valid.read ? SYSC_ALLOC_R_SYSC_ALLOC3_SET(modified_reg, ++ read_setting) : ++ modified_reg; ++ modified_reg = valid.write ? SYSC_ALLOC_W_SYSC_ALLOC3_SET(modified_reg, ++ write_setting) : ++ modified_reg; ++ break; ++ } + -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ kbdev->sysc_alloc[sysc_alloc_num] = modified_reg; ++ } + -+ /* -+ * Stop the kctx from submitting more jobs and cause it -+ * to be scheduled out/rescheduled when all references -+ * to it are released -+ */ -+ kbasep_js_clear_submit_allowed(js_devdata, kctx); ++ return valid.overall ? 0 : -EINVAL; ++} + -+ dev_warn(kbdev->dev, -+ "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", -+ as->number, fault->addr, -+ fault->extra_addr); ++void kbase_pbha_write_settings(struct kbase_device *kbdev) ++{ ++ if (kbasep_pbha_supported(kbdev)) { ++ int i; + -+ /* -+ * We need to switch to UNMAPPED mode - but we do this in a -+ * worker so that we can sleep -+ */ -+ WARN_ON(!queue_work(as->pf_wq, &as->work_busfault)); -+ atomic_inc(&kbdev->faults_pending); -+ } else { -+ WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault)); -+ atomic_inc(&kbdev->faults_pending); ++ for (i = 0; i < SYSC_ALLOC_COUNT; ++i) ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(SYSC_ALLOC(i)), ++ kbdev->sysc_alloc[i]); + } -+ -+ dev_dbg(kbdev->dev, -+ "Leaving %s kctx %pK, as %pK\n", -+ __func__, (void *)kctx, (void *)as); +} + -+static void validate_protected_page_fault(struct kbase_device *kbdev) ++static int kbase_pbha_read_int_id_override_property(struct kbase_device *kbdev, ++ const struct device_node *pbha_node) +{ -+ /* GPUs which support (native) protected mode shall not report page -+ * fault addresses unless it has protected debug mode and protected -+ * debug mode is turned on -+ */ -+ u32 protected_debug_mode = 0; ++ u32 dtb_data[SYSC_ALLOC_COUNT * sizeof(u32) * DTB_SET_SIZE]; ++ int sz, i; ++ bool valid = true; + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { -+ protected_debug_mode = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_STATUS)) & GPU_DBGEN; ++ sz = of_property_count_elems_of_size(pbha_node, "int_id_override", ++ sizeof(u32)); ++ if (sz <= 0 || (sz % DTB_SET_SIZE != 0)) { ++ dev_err(kbdev->dev, "Bad DTB format: pbha.int_id_override\n"); ++ return -EINVAL; ++ } ++ if (of_property_read_u32_array(pbha_node, "int_id_override", dtb_data, ++ sz) != 0) { ++ dev_err(kbdev->dev, ++ "Failed to read DTB pbha.int_id_override\n"); ++ return -EINVAL; + } + -+ if (!protected_debug_mode) { -+ /* fault_addr should never be reported in protected mode. -+ * However, we just continue by printing an error message -+ */ -+ dev_err(kbdev->dev, "Fault address reported in protected mode\n"); ++ for (i = 0; valid && i < sz; i = i + DTB_SET_SIZE) { ++ unsigned int rdset = ++ SYSC_ALLOC_R_SYSC_ALLOC0_GET(dtb_data[i + 1]); ++ unsigned int wrset = ++ SYSC_ALLOC_W_SYSC_ALLOC0_GET(dtb_data[i + 1]); ++ valid = valid && ++ (kbase_pbha_record_settings(kbdev, false, dtb_data[i], ++ rdset, wrset) == 0); ++ if (valid) ++ dev_info(kbdev->dev, ++ "pbha.int_id_override 0x%x r0x%x w0x%x\n", ++ dtb_data[i], rdset, wrset); ++ } ++ if (i != sz || (!valid)) { ++ dev_err(kbdev->dev, ++ "Failed recording DTB data (pbha.int_id_override)\n"); ++ return -EINVAL; + } ++ return 0; +} + -+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) ++#if MALI_USE_CSF ++static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev, ++ const struct device_node *pbha_node) +{ -+ const int num_as = 16; -+ const int busfault_shift = MMU_PAGE_FAULT_FLAGS; -+ const int pf_shift = 0; -+ const unsigned long as_bit_mask = (1UL << num_as) - 1; -+ unsigned long flags; -+ u32 new_mask; -+ u32 tmp, bf_bits, pf_bits; -+ -+ dev_dbg(kbdev->dev, "Entering %s irq_stat %u\n", -+ __func__, irq_stat); -+ /* bus faults */ -+ bf_bits = (irq_stat >> busfault_shift) & as_bit_mask; -+ /* page faults (note: Ignore ASes with both pf and bf) */ -+ pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits; -+ -+ if (WARN_ON(kbdev == NULL)) -+ return; -+ -+ /* remember current mask */ -+ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); -+ new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); -+ /* mask interrupts for now */ -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); -+ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); -+ -+ while (bf_bits | pf_bits) { -+ struct kbase_as *as; -+ unsigned int as_no; -+ struct kbase_context *kctx; -+ struct kbase_fault *fault; -+ -+ /* -+ * the while logic ensures we have a bit set, no need to check -+ * for not-found here -+ */ -+ as_no = ffs(bf_bits | pf_bits) - 1; -+ as = &kbdev->as[as_no]; -+ -+ /* find the fault type */ -+ if (bf_bits & (1 << as_no)) -+ fault = &as->bf_data; -+ else -+ fault = &as->pf_data; -+ -+ /* -+ * Refcount the kctx ASAP - it shouldn't disappear anyway, since -+ * Bus/Page faults _should_ only occur whilst jobs are running, -+ * and a job causing the Bus/Page fault shouldn't complete until -+ * the MMU is updated -+ */ -+ kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as_no); -+ -+ /* find faulting address */ -+ fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no, -+ AS_FAULTADDRESS_HI)); -+ fault->addr <<= 32; -+ fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no, -+ AS_FAULTADDRESS_LO)); -+ /* Mark the fault protected or not */ -+ fault->protected_mode = kbdev->protected_mode; -+ -+ if (kbdev->protected_mode && fault->addr) { -+ /* check if address reporting is allowed */ -+ validate_protected_page_fault(kbdev); -+ } -+ -+ /* report the fault to debugfs */ -+ kbase_as_fault_debugfs_new(kbdev, as_no); -+ -+ /* record the fault status */ -+ fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, -+ AS_FAULTSTATUS)); -+ fault->extra_addr = kbase_reg_read(kbdev, -+ MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); -+ fault->extra_addr <<= 32; -+ fault->extra_addr |= kbase_reg_read(kbdev, -+ MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); ++ u32 bits; ++ int err; + -+ if (kbase_as_has_bus_fault(as, fault)) { -+ /* Mark bus fault as handled. -+ * Note that a bus fault is processed first in case -+ * where both a bus fault and page fault occur. -+ */ -+ bf_bits &= ~(1UL << as_no); ++ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) ++ return 0; + -+ /* remove the queued BF (and PF) from the mask */ -+ new_mask &= ~(MMU_BUS_ERROR(as_no) | -+ MMU_PAGE_FAULT(as_no)); -+ } else { -+ /* Mark page fault as handled */ -+ pf_bits &= ~(1UL << as_no); ++ err = of_property_read_u32(pbha_node, "propagate_bits", &bits); + -+ /* remove the queued PF from the mask */ -+ new_mask &= ~MMU_PAGE_FAULT(as_no); ++ if (err < 0) { ++ if (err != -EINVAL) { ++ dev_err(kbdev->dev, ++ "DTB value for propagate_bits is improperly formed (err=%d)\n", ++ err); ++ return err; + } -+ -+ /* Process the interrupt for this address space */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_mmu_interrupt_process(kbdev, kctx, as, fault); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + -+ /* reenable interrupts */ -+ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); -+ tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); -+ new_mask |= tmp; -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask); -+ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); ++ if (bits > (L2_CONFIG_PBHA_HWU_MASK >> L2_CONFIG_PBHA_HWU_SHIFT)) { ++ dev_err(kbdev->dev, "Bad DTB value for propagate_bits: 0x%x\n", bits); ++ return -EINVAL; ++ } + -+ dev_dbg(kbdev->dev, "Leaving %s irq_stat %u\n", -+ __func__, irq_stat); ++ kbdev->pbha_propagate_bits = bits; ++ return 0; +} ++#endif + -+int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, -+ struct kbase_va_region *const reg) ++int kbase_pbha_read_dtb(struct kbase_device *kbdev) +{ -+ dev_dbg(kctx->kbdev->dev, -+ "Switching to incremental rendering for region %pK\n", -+ (void *)reg); -+ return kbase_job_slot_softstop_start_rp(kctx, reg); -+} ++ const struct device_node *pbha_node; ++ int err; + -+int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i) -+{ -+ kbdev->as[i].number = i; -+ kbdev->as[i].bf_data.addr = 0ULL; -+ kbdev->as[i].pf_data.addr = 0ULL; -+ kbdev->as[i].is_unresponsive = false; ++ if (!kbasep_pbha_supported(kbdev)) ++ return 0; + -+ kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 1, i); -+ if (!kbdev->as[i].pf_wq) -+ return -ENOMEM; ++ pbha_node = of_get_child_by_name(kbdev->dev->of_node, "pbha"); ++ if (!pbha_node) ++ return 0; + -+ INIT_WORK(&kbdev->as[i].work_pagefault, kbase_mmu_page_fault_worker); -+ INIT_WORK(&kbdev->as[i].work_busfault, kbase_mmu_bus_fault_worker); ++ err = kbase_pbha_read_int_id_override_property(kbdev, pbha_node); + -+ return 0; ++#if MALI_USE_CSF ++ if (err < 0) ++ return err; ++ ++ err = kbase_pbha_read_propagate_bits_property(kbdev, pbha_node); ++#endif ++ ++ return err; +} -diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pbha.h b/drivers/gpu/arm/bifrost/mali_kbase_pbha.h new file mode 100644 -index 000000000..d6d3fcdee +index 000000000..79632194c --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c -@@ -0,0 +1,3889 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/mali_kbase_pbha.h +@@ -0,0 +1,77 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -253465,3884 +254706,3678 @@ index 000000000..d6d3fcdee + * + */ + -+/** -+ * DOC: Base kernel MMU management. -+ */ ++#ifndef _KBASE_PBHA_H ++#define _KBASE_PBHA_H + -+#include -+#include -+#include +#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#if !MALI_USE_CSF -+#include -+#endif -+ -+#include -+#include -+ -+/* Threshold used to decide whether to flush full caches or just a physical range */ -+#define KBASE_PA_RANGE_THRESHOLD_NR_PAGES 20 -+#define MGM_DEFAULT_PTE_GROUP (0) -+ -+/* Macro to convert updated PDGs to flags indicating levels skip in flush */ -+#define pgd_level_to_skip_flush(dirty_pgds) (~(dirty_pgds) & 0xF) -+ -+/* Small wrapper function to factor out GPU-dependent context releasing */ -+static void release_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx) -+{ -+#if MALI_USE_CSF -+ CSTD_UNUSED(kbdev); -+ kbase_ctx_sched_release_ctx_lock(kctx); -+#else /* MALI_USE_CSF */ -+ kbasep_js_runpool_release_ctx(kbdev, kctx); -+#endif /* MALI_USE_CSF */ -+} -+ -+static void mmu_hw_operation_begin(struct kbase_device *kbdev) -+{ -+#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+#if MALI_USE_CSF -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3878)) { -+ unsigned long flags; -+ -+ lockdep_assert_held(&kbdev->mmu_hw_mutex); -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ WARN_ON_ONCE(kbdev->mmu_hw_operation_in_progress); -+ kbdev->mmu_hw_operation_in_progress = true; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } -+#endif /* MALI_USE_CSF */ -+#endif /* !CONFIG_MALI_BIFROST_NO_MALI */ -+} -+ -+static void mmu_hw_operation_end(struct kbase_device *kbdev) -+{ -+#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+#if MALI_USE_CSF -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3878)) { -+ unsigned long flags; -+ -+ lockdep_assert_held(&kbdev->mmu_hw_mutex); -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ WARN_ON_ONCE(!kbdev->mmu_hw_operation_in_progress); -+ kbdev->mmu_hw_operation_in_progress = false; -+ /* Invoke the PM state machine, the L2 power off may have been -+ * skipped due to the MMU command. -+ */ -+ kbase_pm_update_state(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } -+#endif /* MALI_USE_CSF */ -+#endif /* !CONFIG_MALI_BIFROST_NO_MALI */ -+} + +/** -+ * mmu_flush_cache_on_gpu_ctrl() - Check if cache flush needs to be done -+ * through GPU_CONTROL interface. ++ * kbasep_pbha_supported - check whether PBHA registers are ++ * available + * -+ * @kbdev: kbase device to check GPU model ID on. ++ * @kbdev: Device pointer + * -+ * This function returns whether a cache flush for page table update should -+ * run through GPU_CONTROL interface or MMU_AS_CONTROL interface. ++ * Should only be used in mali_kbase_pbha* files - thus the ++ * kbase[p] prefix. + * -+ * Return: True if cache flush should be done on GPU command. ++ * Return: True if pbha is supported, false otherwise + */ -+static bool mmu_flush_cache_on_gpu_ctrl(struct kbase_device *kbdev) -+{ -+ uint32_t const arch_maj_cur = (kbdev->gpu_props.props.raw_props.gpu_id & -+ GPU_ID2_ARCH_MAJOR) >> -+ GPU_ID2_ARCH_MAJOR_SHIFT; -+ -+ return arch_maj_cur > 11; -+} ++bool kbasep_pbha_supported(struct kbase_device *kbdev); + +/** -+ * mmu_flush_pa_range() - Flush physical address range ++ * kbase_pbha_record_settings - record PBHA settings to be applied when ++ * L2 is powered down + * -+ * @kbdev: kbase device to issue the MMU operation on. -+ * @phys: Starting address of the physical range to start the operation on. -+ * @nr_bytes: Number of bytes to work on. -+ * @op: Type of cache flush operation to perform. ++ * @kbdev: Device pointer ++ * @runtime: true if it's called at runtime and false if it's called on init. ++ * @id: memory access source ID ++ * @read_setting: Read setting ++ * @write_setting: Write setting + * -+ * Issue a cache flush physical range command. ++ * Return: 0 on success, otherwise error code. + */ -+#if MALI_USE_CSF -+static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, size_t nr_bytes, -+ enum kbase_mmu_op_type op) -+{ -+ u32 flush_op; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ /* Translate operation to command */ -+ if (op == KBASE_MMU_OP_FLUSH_PT) -+ flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2; -+ else if (op == KBASE_MMU_OP_FLUSH_MEM) -+ flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC; -+ else { -+ dev_warn(kbdev->dev, "Invalid flush request (op = %d)", op); -+ return; -+ } -+ -+ if (kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op)) -+ dev_err(kbdev->dev, "Flush for physical address range did not complete"); -+} -+#endif ++int kbase_pbha_record_settings(struct kbase_device *kbdev, bool runtime, ++ unsigned int id, unsigned int read_setting, ++ unsigned int write_setting); + +/** -+ * mmu_invalidate() - Perform an invalidate operation on MMU caches. -+ * @kbdev: The Kbase device. -+ * @kctx: The Kbase context. -+ * @as_nr: GPU address space number for which invalidate is required. -+ * @op_param: Non-NULL pointer to struct containing information about the MMU -+ * operation to perform. ++ * kbase_pbha_write_settings - write recorded PBHA settings to GPU ++ * registers + * -+ * Perform an MMU invalidate operation on a particual address space -+ * by issuing a UNLOCK command. ++ * @kbdev: Device pointer ++ * ++ * Only valid to call this function when L2 is powered down, otherwise ++ * this will not affect PBHA settings. + */ -+static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr, -+ const struct kbase_mmu_hw_op_param *op_param) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ -+ if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) { -+ as_nr = kctx ? kctx->as_nr : as_nr; -+ if (kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param)) -+ dev_err(kbdev->dev, -+ "Invalidate after GPU page table update did not complete"); -+ } -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} ++void kbase_pbha_write_settings(struct kbase_device *kbdev); + -+/* Perform a flush/invalidate on a particular address space ++/** ++ * kbase_pbha_read_dtb - read PBHA settings from DTB and record it to be ++ * applied when L2 is powered down ++ * ++ * @kbdev: Device pointer ++ * ++ * Return: 0 on success, otherwise error code. + */ -+static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as, -+ const struct kbase_mmu_hw_op_param *op_param) -+{ -+ unsigned long flags; -+ -+ /* AS transaction begin */ -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ -+ if (kbdev->pm.backend.gpu_powered && (kbase_mmu_hw_do_flush_locked(kbdev, as, op_param))) -+ dev_err(kbdev->dev, "Flush for GPU page table update did not complete"); -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ /* AS transaction end */ -+} ++int kbase_pbha_read_dtb(struct kbase_device *kbdev); + -+/** -+ * mmu_flush_invalidate() - Perform a flush operation on GPU caches. -+ * @kbdev: The Kbase device. -+ * @kctx: The Kbase context. -+ * @as_nr: GPU address space number for which flush + invalidate is required. -+ * @op_param: Non-NULL pointer to struct containing information about the MMU -+ * operation to perform. ++#endif /* _KBASE_PBHA_H */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c +new file mode 100644 +index 000000000..1cc29c700 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c +@@ -0,0 +1,227 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * -+ * This function performs the cache flush operation described by @op_param. -+ * The function retains a reference to the given @kctx and releases it -+ * after performing the flush operation. ++ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * -+ * If operation is set to KBASE_MMU_OP_FLUSH_PT then this function will issue -+ * a cache flush + invalidate to the L2 caches and invalidate the TLBs. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * If operation is set to KBASE_MMU_OP_FLUSH_MEM then this function will issue -+ * a cache flush + invalidate to the L2 and GPU Load/Store caches as well as -+ * invalidating the TLBs. + */ -+static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr, -+ const struct kbase_mmu_hw_op_param *op_param) -+{ -+ bool ctx_is_in_runpool; + -+ /* Early out if there is nothing to do */ -+ if (op_param->nr == 0) -+ return; ++#include "mali_kbase_pbha_debugfs.h" ++#include "mali_kbase_pbha.h" ++#include ++#include ++#include + -+ /* If no context is provided then MMU operation is performed on address -+ * space which does not belong to user space context. Otherwise, retain -+ * refcount to context provided and release after flush operation. -+ */ -+ if (!kctx) { -+ mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], op_param); -+ } else { -+#if !MALI_USE_CSF -+ mutex_lock(&kbdev->js_data.queue_mutex); -+ ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx); -+ mutex_unlock(&kbdev->js_data.queue_mutex); -+#else -+ ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx); -+#endif /* !MALI_USE_CSF */ ++#if MALI_USE_CSF ++#include "backend/gpu/mali_kbase_pm_internal.h" ++#endif + -+ if (ctx_is_in_runpool) { -+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); ++static int int_id_overrides_show(struct seq_file *sfile, void *data) ++{ ++ struct kbase_device *kbdev = sfile->private; ++ int i; + -+ mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr], op_param); ++ kbase_pm_context_active(kbdev); + -+ release_ctx(kbdev, kctx); ++ /* Minimal header for readability */ ++ seq_puts(sfile, "// R W\n"); ++ for (i = 0; i < SYSC_ALLOC_COUNT; ++i) { ++ int j; ++ u32 reg = kbase_reg_read(kbdev, GPU_CONTROL_REG(SYSC_ALLOC(i))); ++ ++ for (j = 0; j < sizeof(u32); ++j) { ++ u8 r_val; ++ u8 w_val; ++ ++ switch (j) { ++ case 0: ++ r_val = SYSC_ALLOC_R_SYSC_ALLOC0_GET(reg); ++ w_val = SYSC_ALLOC_W_SYSC_ALLOC0_GET(reg); ++ break; ++ case 1: ++ r_val = SYSC_ALLOC_R_SYSC_ALLOC1_GET(reg); ++ w_val = SYSC_ALLOC_W_SYSC_ALLOC1_GET(reg); ++ break; ++ case 2: ++ r_val = SYSC_ALLOC_R_SYSC_ALLOC2_GET(reg); ++ w_val = SYSC_ALLOC_W_SYSC_ALLOC2_GET(reg); ++ break; ++ case 3: ++ r_val = SYSC_ALLOC_R_SYSC_ALLOC3_GET(reg); ++ w_val = SYSC_ALLOC_W_SYSC_ALLOC3_GET(reg); ++ break; ++ } ++ seq_printf(sfile, "%2zu 0x%x 0x%x\n", ++ (i * sizeof(u32)) + j, r_val, w_val); + } + } ++ kbase_pm_context_idle(kbdev); ++ ++ return 0; +} + -+/** -+ * mmu_flush_invalidate_on_gpu_ctrl() - Perform a flush operation on GPU caches via -+ * the GPU_CONTROL interface -+ * @kbdev: The Kbase device. -+ * @kctx: The Kbase context. -+ * @as_nr: GPU address space number for which flush + invalidate is required. -+ * @op_param: Non-NULL pointer to struct containing information about the MMU -+ * operation to perform. -+ * -+ * Perform a flush/invalidate on a particular address space via the GPU_CONTROL -+ * interface. -+ */ -+static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_context *kctx, -+ int as_nr, const struct kbase_mmu_hw_op_param *op_param) ++static ssize_t int_id_overrides_write(struct file *file, ++ const char __user *ubuf, size_t count, ++ loff_t *ppos) +{ -+ unsigned long flags; ++ struct seq_file *sfile = file->private_data; ++ struct kbase_device *kbdev = sfile->private; ++ char raw_str[128]; ++ unsigned int id; ++ unsigned int r_val; ++ unsigned int w_val; + -+ /* AS transaction begin */ -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (count >= sizeof(raw_str)) ++ return -E2BIG; ++ if (copy_from_user(raw_str, ubuf, count)) ++ return -EINVAL; ++ raw_str[count] = '\0'; + -+ if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) { -+ as_nr = kctx ? kctx->as_nr : as_nr; -+ if (kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr], op_param)) -+ dev_err(kbdev->dev, "Flush for GPU page table update did not complete"); -+ } ++ if (sscanf(raw_str, "%u %x %x", &id, &r_val, &w_val) != 3) ++ return -EINVAL; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+} ++ if (kbase_pbha_record_settings(kbdev, true, id, r_val, w_val)) ++ return -EINVAL; + -+static void kbase_mmu_sync_pgd_gpu(struct kbase_device *kbdev, struct kbase_context *kctx, -+ phys_addr_t phys, size_t size, -+ enum kbase_mmu_op_type flush_op) -+{ -+ kbase_mmu_flush_pa_range(kbdev, kctx, phys, size, flush_op); ++ /* This is a debugfs config write, so reset GPU such that changes take effect ASAP */ ++ kbase_pm_context_active(kbdev); ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(kbdev); ++ kbase_pm_context_idle(kbdev); ++ ++ return count; +} + -+static void kbase_mmu_sync_pgd_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size) ++static int int_id_overrides_open(struct inode *in, struct file *file) +{ -+ /* In non-coherent system, ensure the GPU can read -+ * the pages from memory -+ */ -+ if (kbdev->system_coherency == COHERENCY_NONE) -+ dma_sync_single_for_device(kbdev->dev, handle, size, -+ DMA_TO_DEVICE); ++ return single_open(file, int_id_overrides_show, in->i_private); +} + ++#if MALI_USE_CSF +/** -+ * kbase_mmu_sync_pgd() - sync page directory to memory when needed. -+ * @kbdev: Device pointer. -+ * @kctx: Context pointer. -+ * @phys: Starting physical address of the destination region. -+ * @handle: Address of DMA region. -+ * @size: Size of the region to sync. -+ * @flush_op: MMU cache flush operation to perform on the physical address -+ * range, if GPU control is available. ++ * propagate_bits_show - Read PBHA bits from L2_CONFIG out to debugfs. + * -+ * This function is called whenever the association between a virtual address -+ * range and a physical address range changes, because a mapping is created or -+ * destroyed. -+ * One of the effects of this operation is performing an MMU cache flush -+ * operation only on the physical address range affected by this function, if -+ * GPU control is available. ++ * @sfile: The debugfs entry. ++ * @data: Data associated with the entry. + * -+ * This should be called after each page directory update. ++ * Return: 0 in all cases. + */ -+static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, struct kbase_context *kctx, -+ phys_addr_t phys, dma_addr_t handle, size_t size, -+ enum kbase_mmu_op_type flush_op) ++static int propagate_bits_show(struct seq_file *sfile, void *data) +{ ++ struct kbase_device *kbdev = sfile->private; ++ u32 l2_config_val; + -+ kbase_mmu_sync_pgd_cpu(kbdev, handle, size); -+ kbase_mmu_sync_pgd_gpu(kbdev, kctx, phys, size, flush_op); -+} ++ kbase_csf_scheduler_pm_active(kbdev); ++ kbase_pm_wait_for_l2_powered(kbdev); ++ l2_config_val = L2_CONFIG_PBHA_HWU_GET(kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG))); ++ kbase_csf_scheduler_pm_idle(kbdev); + -+/* -+ * Definitions: -+ * - PGD: Page Directory. -+ * - PTE: Page Table Entry. A 64bit value pointing to the next -+ * level of translation -+ * - ATE: Address Translation Entry. A 64bit value pointing to -+ * a 4kB physical page. -+ */ ++ seq_printf(sfile, "PBHA Propagate Bits: 0x%x\n", l2_config_val); ++ return 0; ++} + -+static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, -+ u64 vpfn, struct tagged_addr *phys, size_t nr, -+ unsigned long flags, int group_id, u64 *dirty_pgds); ++static int propagate_bits_open(struct inode *in, struct file *file) ++{ ++ return single_open(file, propagate_bits_show, in->i_private); ++} + +/** -+ * kbase_mmu_update_and_free_parent_pgds() - Update number of valid entries and -+ * free memory of the page directories ++ * propagate_bits_write - Write input value from debugfs to PBHA bits of L2_CONFIG register. + * -+ * @kbdev: Device pointer. -+ * @mmut: GPU MMU page table. -+ * @pgds: Physical addresses of page directories to be freed. -+ * @vpfn: The virtual page frame number. -+ * @level: The level of MMU page table. -+ * @flush_op: The type of MMU flush operation to perform. -+ * @dirty_pgds: Flags to track every level where a PGD has been updated. ++ * @file: Pointer to file struct of debugfs node. ++ * @ubuf: Pointer to user buffer with value to be written. ++ * @count: Size of user buffer. ++ * @ppos: Not used. ++ * ++ * Return: Size of buffer passed in when successful, but error code E2BIG/EINVAL otherwise. + */ -+static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, -+ struct kbase_mmu_table *mmut, phys_addr_t *pgds, -+ u64 vpfn, int level, -+ enum kbase_mmu_op_type flush_op, u64 *dirty_pgds); -+ -+static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) ++static ssize_t propagate_bits_write(struct file *file, const char __user *ubuf, size_t count, ++ loff_t *ppos) +{ -+ atomic_sub(1, &kbdev->memdev.used_pages); ++ struct seq_file *sfile = file->private_data; ++ struct kbase_device *kbdev = sfile->private; ++ /* 32 characters should be enough for the input string in any base */ ++ char raw_str[32]; ++ unsigned long propagate_bits; + -+ /* If MMU tables belong to a context then pages will have been accounted -+ * against it, so we must decrement the usage counts here. ++ if (count >= sizeof(raw_str)) ++ return -E2BIG; ++ if (copy_from_user(raw_str, ubuf, count)) ++ return -EINVAL; ++ raw_str[count] = '\0'; ++ if (kstrtoul(raw_str, 0, &propagate_bits)) ++ return -EINVAL; ++ ++ /* Check propagate_bits input argument does not ++ * exceed the maximum size of the propagate_bits mask. + */ -+ if (mmut->kctx) { -+ kbase_process_page_usage_dec(mmut->kctx, 1); -+ atomic_sub(1, &mmut->kctx->used_pages); ++ if (propagate_bits > (L2_CONFIG_PBHA_HWU_MASK >> L2_CONFIG_PBHA_HWU_SHIFT)) ++ return -EINVAL; ++ /* Cast to u8 is safe as check is done already to ensure size is within ++ * correct limits. ++ */ ++ kbdev->pbha_propagate_bits = (u8)propagate_bits; ++ ++ /* GPU Reset will set new values in L2 config */ ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) { ++ kbase_reset_gpu(kbdev); ++ kbase_reset_gpu_wait(kbdev); + } + -+ kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); ++ return count; +} + -+static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev, -+ struct kbase_mmu_table *mmut, -+ struct page *p) -+{ -+ struct kbase_page_metadata *page_md = kbase_page_private(p); -+ bool page_is_isolated = false; ++static const struct file_operations pbha_propagate_bits_fops = { ++ .owner = THIS_MODULE, ++ .open = propagate_bits_open, ++ .read = seq_read, ++ .write = propagate_bits_write, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++#endif /* MALI_USE_CSF */ + -+ lockdep_assert_held(&mmut->mmu_lock); ++static const struct file_operations pbha_int_id_overrides_fops = { ++ .owner = THIS_MODULE, ++ .open = int_id_overrides_open, ++ .read = seq_read, ++ .write = int_id_overrides_write, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + -+ if (!kbase_page_migration_enabled) -+ return false; ++void kbase_pbha_debugfs_init(struct kbase_device *kbdev) ++{ ++ if (kbasep_pbha_supported(kbdev)) { ++ const mode_t mode = 0644; ++ struct dentry *debugfs_pbha_dir = debugfs_create_dir( ++ "pbha", kbdev->mali_debugfs_directory); + -+ spin_lock(&page_md->migrate_lock); -+ if (PAGE_STATUS_GET(page_md->status) == PT_MAPPED) { -+ WARN_ON_ONCE(!mmut->kctx); -+ if (IS_PAGE_ISOLATED(page_md->status)) { -+ page_md->status = PAGE_STATUS_SET(page_md->status, -+ FREE_PT_ISOLATED_IN_PROGRESS); -+ page_md->data.free_pt_isolated.kbdev = kbdev; -+ page_is_isolated = true; -+ } else { -+ page_md->status = -+ PAGE_STATUS_SET(page_md->status, FREE_IN_PROGRESS); ++ if (IS_ERR_OR_NULL(debugfs_pbha_dir)) { ++ dev_err(kbdev->dev, ++ "Couldn't create mali debugfs page-based hardware attributes directory\n"); ++ return; + } -+ } else { -+ WARN_ON_ONCE(mmut->kctx); -+ WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != NOT_MOVABLE); -+ } -+ spin_unlock(&page_md->migrate_lock); + -+ if (unlikely(page_is_isolated)) { -+ /* Do the CPU cache flush and accounting here for the isolated -+ * PGD page, which is done inside kbase_mmu_free_pgd() for the -+ * PGD page that did not get isolated. -+ */ -+ dma_sync_single_for_device(kbdev->dev, kbase_dma_addr(p), PAGE_SIZE, -+ DMA_BIDIRECTIONAL); -+ kbase_mmu_account_freed_pgd(kbdev, mmut); ++ debugfs_create_file("int_id_overrides", mode, debugfs_pbha_dir, ++ kbdev, &pbha_int_id_overrides_fops); ++#if MALI_USE_CSF ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) ++ debugfs_create_file("propagate_bits", mode, debugfs_pbha_dir, kbdev, ++ &pbha_propagate_bits_fops); ++#endif /* MALI_USE_CSF */ + } -+ -+ return page_is_isolated; +} -+ -+/** -+ * kbase_mmu_free_pgd() - Free memory of the page directory +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.h +new file mode 100644 +index 000000000..508ecdff9 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.h +@@ -0,0 +1,34 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * @kbdev: Device pointer. -+ * @mmut: GPU MMU page table. -+ * @pgd: Physical address of page directory to be freed. ++ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * This function is supposed to be called with mmu_lock held and after -+ * ensuring that GPU won't be able to access the page. + */ -+static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, -+ phys_addr_t pgd) -+{ -+ struct page *p; -+ bool page_is_isolated = false; -+ -+ lockdep_assert_held(&mmut->mmu_lock); + -+ p = pfn_to_page(PFN_DOWN(pgd)); -+ page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p); ++#ifndef _KBASE_PBHA_DEBUGFS_H ++#define _KBASE_PBHA_DEBUGFS_H + -+ if (likely(!page_is_isolated)) { -+ kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true); -+ kbase_mmu_account_freed_pgd(kbdev, mmut); -+ } -+} ++#include + +/** -+ * kbase_mmu_free_pgds_list() - Free the PGD pages present in the list ++ * kbase_pbha_debugfs_init - Initialize pbha debugfs directory + * -+ * @kbdev: Device pointer. -+ * @mmut: GPU MMU page table. ++ * @kbdev: Device pointer ++ */ ++void kbase_pbha_debugfs_init(struct kbase_device *kbdev); ++ ++#endif /* _KBASE_PBHA_DEBUGFS_H */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_platform_fake.c b/drivers/gpu/arm/bifrost/mali_kbase_platform_fake.c +new file mode 100644 +index 000000000..265c676f1 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_platform_fake.c +@@ -0,0 +1,119 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * -+ * This function will call kbase_mmu_free_pgd() on each page directory page -+ * present in the list of free PGDs inside @mmut. ++ * (C) COPYRIGHT 2011-2014, 2016-2017, 2020-2022 ARM Limited. All rights reserved. + * -+ * The function is supposed to be called after the GPU cache and MMU TLB has -+ * been invalidated post the teardown loop. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * The mmu_lock shall be held prior to calling the function. + */ -+static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) -+{ -+ size_t i; -+ -+ lockdep_assert_held(&mmut->mmu_lock); + -+ for (i = 0; i < mmut->scratch_mem.free_pgds.head_index; i++) -+ kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(mmut->scratch_mem.free_pgds.pgds[i])); ++#include ++#include ++#include ++#include ++#include + -+ mmut->scratch_mem.free_pgds.head_index = 0; -+} ++/* ++ * This file is included only for type definitions and functions belonging to ++ * specific platform folders. Do not add dependencies with symbols that are ++ * defined somewhere else. ++ */ ++#include + -+static void kbase_mmu_add_to_free_pgds_list(struct kbase_mmu_table *mmut, struct page *p) -+{ -+ lockdep_assert_held(&mmut->mmu_lock); ++#ifndef CONFIG_OF + -+ if (WARN_ON_ONCE(mmut->scratch_mem.free_pgds.head_index > (MAX_FREE_PGDS - 1))) -+ return; ++#define PLATFORM_CONFIG_RESOURCE_COUNT 4 + -+ mmut->scratch_mem.free_pgds.pgds[mmut->scratch_mem.free_pgds.head_index++] = p; -+} -+ -+static inline void kbase_mmu_reset_free_pgds_list(struct kbase_mmu_table *mmut) -+{ -+ lockdep_assert_held(&mmut->mmu_lock); -+ -+ mmut->scratch_mem.free_pgds.head_index = 0; -+} ++static struct platform_device *mali_device; + +/** -+ * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to -+ * a region on a GPU page fault -+ * @kbdev: KBase device -+ * @reg: The region that will be backed with more pages -+ * @fault_rel_pfn: PFN of the fault relative to the start of the region -+ * -+ * This calculates how much to increase the backing of a region by, based on -+ * where a GPU page fault occurred and the flags in the region. -+ * -+ * This can be more than the minimum number of pages that would reach -+ * @fault_rel_pfn, for example to reduce the overall rate of page fault -+ * interrupts on a region, or to ensure that the end address is aligned. ++ * kbasep_config_parse_io_resources - Convert data in struct kbase_io_resources ++ * struct to Linux-specific resources ++ * @io_resources: Input IO resource data ++ * @linux_resources: Pointer to output array of Linux resource structures + * -+ * Return: the number of backed pages to increase by ++ * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function ++ * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT. ++ * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ. + */ -+static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, -+ struct kbase_va_region *reg, size_t fault_rel_pfn) ++static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources) +{ -+ size_t multiple = reg->extension; -+ size_t reg_current_size = kbase_reg_current_backed_size(reg); -+ size_t minimum_extra = fault_rel_pfn - reg_current_size + 1; -+ size_t remainder; -+ -+ if (!multiple) { -+ dev_warn( -+ kbdev->dev, -+ "VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW", -+ ((unsigned long long)reg->start_pfn) << PAGE_SHIFT); -+ return minimum_extra; ++ if (!io_resources || !linux_resources) { ++ pr_err("%s: couldn't find proper resources\n", __func__); ++ return; + } + -+ /* Calculate the remainder to subtract from minimum_extra to make it -+ * the desired (rounded down) multiple of the extension. -+ * Depending on reg's flags, the base used for calculating multiples is -+ * different -+ */ -+ -+ /* multiple is based from the current backed size, even if the -+ * current backed size/pfn for end of committed memory are not -+ * themselves aligned to multiple -+ */ -+ remainder = minimum_extra % multiple; -+ -+#if !MALI_USE_CSF -+ if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { -+ /* multiple is based from the top of the initial commit, which -+ * has been allocated in such a way that (start_pfn + -+ * initial_commit) is already aligned to multiple. Hence the -+ * pfn for the end of committed memory will also be aligned to -+ * multiple -+ */ -+ size_t initial_commit = reg->initial_commit; ++ memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource)); + -+ if (fault_rel_pfn < initial_commit) { -+ /* this case is just to catch in case it's been -+ * recommitted by userspace to be smaller than the -+ * initial commit -+ */ -+ minimum_extra = initial_commit - reg_current_size; -+ remainder = 0; -+ } else { -+ /* same as calculating -+ * (fault_rel_pfn - initial_commit + 1) -+ */ -+ size_t pages_after_initial = minimum_extra + -+ reg_current_size - initial_commit; ++ linux_resources[0].start = io_resources->io_memory_region.start; ++ linux_resources[0].end = io_resources->io_memory_region.end; ++ linux_resources[0].flags = IORESOURCE_MEM; + -+ remainder = pages_after_initial % multiple; -+ } -+ } -+#endif /* !MALI_USE_CSF */ ++ linux_resources[1].start = io_resources->job_irq_number; ++ linux_resources[1].end = io_resources->job_irq_number; ++ linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; + -+ if (remainder == 0) -+ return minimum_extra; ++ linux_resources[2].start = io_resources->mmu_irq_number; ++ linux_resources[2].end = io_resources->mmu_irq_number; ++ linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; + -+ return minimum_extra + multiple - remainder; ++ linux_resources[3].start = io_resources->gpu_irq_number; ++ linux_resources[3].end = io_resources->gpu_irq_number; ++ linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; +} + -+#ifdef CONFIG_MALI_CINSTR_GWT -+static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev, -+ struct kbase_as *faulting_as, -+ u64 start_pfn, size_t nr, -+ u32 kctx_id, u64 dirty_pgds) ++int kbase_platform_register(void) +{ -+ /* Calls to this function are inherently synchronous, with respect to -+ * MMU operations. -+ */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; -+ struct kbase_mmu_hw_op_param op_param; -+ int ret = 0; -+ -+ mutex_lock(&kbdev->mmu_hw_mutex); ++ struct kbase_platform_config *config; ++ struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT]; ++ int err; + -+ kbase_mmu_hw_clear_fault(kbdev, faulting_as, -+ KBASE_MMU_FAULT_TYPE_PAGE); ++ config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */ ++ if (config == NULL) { ++ pr_err("%s: couldn't get platform config\n", __func__); ++ return -ENODEV; ++ } + -+ /* flush L2 and unlock the VA (resumes the MMU) */ -+ op_param.vpfn = start_pfn; -+ op_param.nr = nr; -+ op_param.op = KBASE_MMU_OP_FLUSH_PT; -+ op_param.kctx_id = kctx_id; -+ op_param.mmu_sync_info = mmu_sync_info; -+ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { -+ unsigned long irq_flags; ++ mali_device = platform_device_alloc("mali", 0); ++ if (mali_device == NULL) ++ return -ENOMEM; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); -+ op_param.flush_skip_levels = -+ pgd_level_to_skip_flush(dirty_pgds); -+ ret = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, &op_param); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); -+ } else { -+ mmu_hw_operation_begin(kbdev); -+ ret = kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param); -+ mmu_hw_operation_end(kbdev); ++ kbasep_config_parse_io_resources(config->io_resources, resources); ++ err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT); ++ if (err) { ++ platform_device_put(mali_device); ++ mali_device = NULL; ++ return err; + } + -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ -+ if (ret) -+ dev_err(kbdev->dev, -+ "Flush for GPU page fault due to write access did not complete"); ++ err = platform_device_add(mali_device); ++ if (err) { ++ platform_device_unregister(mali_device); ++ mali_device = NULL; ++ return err; ++ } + -+ kbase_mmu_hw_enable_fault(kbdev, faulting_as, -+ KBASE_MMU_FAULT_TYPE_PAGE); ++ return 0; +} ++EXPORT_SYMBOL(kbase_platform_register); + -+static void set_gwt_element_page_addr_and_size( -+ struct kbasep_gwt_list_element *element, -+ u64 fault_page_addr, struct tagged_addr fault_phys) ++void kbase_platform_unregister(void) +{ -+ u64 fault_pfn = fault_page_addr >> PAGE_SHIFT; -+ unsigned int vindex = fault_pfn & (NUM_4K_PAGES_IN_2MB_PAGE - 1); -+ -+ /* If the fault address lies within a 2MB page, then consider -+ * the whole 2MB page for dumping to avoid incomplete dumps. -+ */ -+ if (is_huge(fault_phys) && (vindex == index_in_large_page(fault_phys))) { -+ element->page_addr = fault_page_addr & ~(SZ_2M - 1); -+ element->num_pages = NUM_4K_PAGES_IN_2MB_PAGE; -+ } else { -+ element->page_addr = fault_page_addr; -+ element->num_pages = 1; -+ } ++ if (mali_device) ++ platform_device_unregister(mali_device); +} ++EXPORT_SYMBOL(kbase_platform_unregister); + -+static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, -+ struct kbase_as *faulting_as) -+{ -+ struct kbasep_gwt_list_element *pos; -+ struct kbase_va_region *region; -+ struct kbase_device *kbdev; -+ struct tagged_addr *fault_phys_addr; -+ struct kbase_fault *fault; -+ u64 fault_pfn, pfn_offset; -+ int as_no; -+ u64 dirty_pgds = 0; -+ -+ as_no = faulting_as->number; -+ kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); -+ fault = &faulting_as->pf_data; -+ fault_pfn = fault->addr >> PAGE_SHIFT; -+ -+ kbase_gpu_vm_lock(kctx); -+ -+ /* Find region and check if it should be writable. */ -+ region = kbase_region_tracker_find_region_enclosing_address(kctx, -+ fault->addr); -+ if (kbase_is_region_invalid_or_free(region)) { -+ kbase_gpu_vm_unlock(kctx); -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Memory is not mapped on the GPU", -+ &faulting_as->pf_data); -+ return; -+ } -+ -+ if (!(region->flags & KBASE_REG_GPU_WR)) { -+ kbase_gpu_vm_unlock(kctx); -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Region does not have write permissions", -+ &faulting_as->pf_data); -+ return; -+ } ++#endif /* CONFIG_OF */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pm.c b/drivers/gpu/arm/bifrost/mali_kbase_pm.c +new file mode 100644 +index 000000000..62a132816 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_pm.c +@@ -0,0 +1,311 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ pfn_offset = fault_pfn - region->start_pfn; -+ fault_phys_addr = &kbase_get_gpu_phy_pages(region)[pfn_offset]; ++/** ++ * DOC: Base kernel power management APIs ++ */ + -+ /* Capture addresses of faulting write location -+ * for job dumping if write tracking is enabled. -+ */ -+ if (kctx->gwt_enabled) { -+ u64 fault_page_addr = fault->addr & PAGE_MASK; -+ bool found = false; -+ /* Check if this write was already handled. */ -+ list_for_each_entry(pos, &kctx->gwt_current_list, link) { -+ if (fault_page_addr == pos->page_addr) { -+ found = true; -+ break; -+ } -+ } ++#include ++#include ++#include ++#include ++#include + -+ if (!found) { -+ pos = kmalloc(sizeof(*pos), GFP_KERNEL); -+ if (pos) { -+ pos->region = region; -+ set_gwt_element_page_addr_and_size(pos, -+ fault_page_addr, *fault_phys_addr); -+ list_add(&pos->link, &kctx->gwt_current_list); -+ } else { -+ dev_warn(kbdev->dev, "kmalloc failure"); -+ } -+ } -+ } ++#include ++#include + -+ /* Now make this faulting page writable to GPU. */ -+ kbase_mmu_update_pages_no_flush(kbdev, &kctx->mmu, fault_pfn, fault_phys_addr, 1, -+ region->flags, region->gpu_alloc->group_id, &dirty_pgds); ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++#include ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + -+ kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1, -+ kctx->id, dirty_pgds); ++#include + -+ kbase_gpu_vm_unlock(kctx); ++int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags) ++{ ++ return kbase_hwaccess_pm_powerup(kbdev, flags); +} + -+static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx, -+ struct kbase_as *faulting_as) ++void kbase_pm_halt(struct kbase_device *kbdev) +{ -+ struct kbase_fault *fault = &faulting_as->pf_data; -+ -+ switch (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault->status)) { -+ case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: -+ case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: -+ kbase_gpu_mmu_handle_write_fault(kctx, faulting_as); -+ break; -+ case AS_FAULTSTATUS_ACCESS_TYPE_EX: -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Execute Permission fault", fault); -+ break; -+ case AS_FAULTSTATUS_ACCESS_TYPE_READ: -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Read Permission fault", fault); -+ break; -+ default: -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Unknown Permission fault", fault); -+ break; -+ } ++ kbase_hwaccess_pm_halt(kbdev); +} -+#endif + -+/** -+ * estimate_pool_space_required - Determine how much a pool should be grown by to support a future -+ * allocation -+ * @pool: The memory pool to check, including its linked pools -+ * @pages_required: Number of 4KiB pages require for the pool to support a future allocation -+ * -+ * The value returned is accounting for the size of @pool and the size of each memory pool linked to -+ * @pool. Hence, the caller should use @pool and (if not already satisfied) all its linked pools to -+ * allocate from. -+ * -+ * Note: this is only an estimate, because even during the calculation the memory pool(s) involved -+ * can be updated to be larger or smaller. Hence, the result is only a guide as to whether an -+ * allocation could succeed, or an estimate of the correct amount to grow the pool by. The caller -+ * should keep attempting an allocation and then re-growing with a new value queried form this -+ * function until the allocation succeeds. -+ * -+ * Return: an estimate of the amount of extra 4KiB pages in @pool that are required to satisfy an -+ * allocation, or 0 if @pool (including its linked pools) is likely to already satisfy the -+ * allocation. -+ */ -+static size_t estimate_pool_space_required(struct kbase_mem_pool *pool, const size_t pages_required) ++void kbase_pm_context_active(struct kbase_device *kbdev) +{ -+ size_t pages_still_required; -+ -+ for (pages_still_required = pages_required; pool != NULL && pages_still_required; -+ pool = pool->next_pool) { -+ size_t pool_size_4k; -+ -+ kbase_mem_pool_lock(pool); -+ -+ pool_size_4k = kbase_mem_pool_size(pool) << pool->order; -+ if (pool_size_4k >= pages_still_required) -+ pages_still_required = 0; -+ else -+ pages_still_required -= pool_size_4k; -+ -+ kbase_mem_pool_unlock(pool); -+ } -+ return pages_still_required; ++ (void)kbase_pm_context_active_handle_suspend(kbdev, ++ KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); +} + -+/** -+ * page_fault_try_alloc - Try to allocate memory from a context pool -+ * @kctx: Context pointer -+ * @region: Region to grow -+ * @new_pages: Number of 4 KiB pages to allocate -+ * @pages_to_grow: Pointer to variable to store number of outstanding pages on failure. This can be -+ * either 4 KiB or 2 MiB pages, depending on the number of pages requested. -+ * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true for 2 MiB, false for -+ * 4 KiB. -+ * @prealloc_sas: Pointer to kbase_sub_alloc structures -+ * -+ * This function will try to allocate as many pages as possible from the context pool, then if -+ * required will try to allocate the remaining pages from the device pool. -+ * -+ * This function will not allocate any new memory beyond that is already present in the context or -+ * device pools. This is because it is intended to be called whilst the thread has acquired the -+ * region list lock with kbase_gpu_vm_lock(), and a large enough memory allocation whilst that is -+ * held could invoke the OoM killer and cause an effective deadlock with kbase_cpu_vm_close(). -+ * -+ * If 2 MiB pages are enabled and new_pages is >= 2 MiB then pages_to_grow will be a count of 2 MiB -+ * pages, otherwise it will be a count of 4 KiB pages. -+ * -+ * Return: true if successful, false on failure -+ */ -+static bool page_fault_try_alloc(struct kbase_context *kctx, -+ struct kbase_va_region *region, size_t new_pages, -+ int *pages_to_grow, bool *grow_2mb_pool, -+ struct kbase_sub_alloc **prealloc_sas) ++int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, ++ enum kbase_pm_suspend_handler suspend_handler) +{ -+ size_t total_gpu_pages_alloced = 0; -+ size_t total_cpu_pages_alloced = 0; -+ struct kbase_mem_pool *pool, *root_pool; -+ bool alloc_failed = false; -+ size_t pages_still_required; -+ size_t total_mempools_free_4k = 0; -+ -+ lockdep_assert_held(&kctx->reg_lock); -+ lockdep_assert_held(&kctx->mem_partials_lock); ++ int c; + -+ if (WARN_ON(region->gpu_alloc->group_id >= -+ MEMORY_GROUP_MANAGER_NR_GROUPS)) { -+ /* Do not try to grow the memory pool */ -+ *pages_to_grow = 0; -+ return false; -+ } ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ dev_dbg(kbdev->dev, "%s - reason = %d, pid = %d\n", __func__, ++ suspend_handler, current->pid); ++ kbase_pm_lock(kbdev); + -+ if (kctx->kbdev->pagesize_2mb && new_pages >= (SZ_2M / SZ_4K)) { -+ root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id]; -+ *grow_2mb_pool = true; -+ } else { -+ root_pool = &kctx->mem_pools.small[region->gpu_alloc->group_id]; -+ *grow_2mb_pool = false; ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (kbase_arbiter_pm_ctx_active_handle_suspend(kbdev, ++ suspend_handler)) { ++ kbase_pm_unlock(kbdev); ++ return 1; + } ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + -+ if (region->gpu_alloc != region->cpu_alloc) -+ new_pages *= 2; ++ if (kbase_pm_is_suspending(kbdev)) { ++ switch (suspend_handler) { ++ case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE: ++ if (kbdev->pm.active_count != 0) ++ break; ++ fallthrough; ++ case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE: ++ kbase_pm_unlock(kbdev); ++ return 1; + -+ /* Determine how many pages are in the pools before trying to allocate. -+ * Don't attempt to allocate & free if the allocation can't succeed. -+ */ -+ pages_still_required = estimate_pool_space_required(root_pool, new_pages); ++ case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE: ++ fallthrough; ++ default: ++ KBASE_DEBUG_ASSERT_MSG(false, "unreachable"); ++ break; ++ } ++ } ++ c = ++kbdev->pm.active_count; ++ KBASE_KTRACE_ADD(kbdev, PM_CONTEXT_ACTIVE, NULL, c); + -+ if (pages_still_required) { -+ /* Insufficient pages in pools. Don't try to allocate - just -+ * request a grow. ++ if (c == 1) { ++ /* First context active: Power on the GPU and ++ * any cores requested by the policy + */ -+ *pages_to_grow = pages_still_required; -+ -+ return false; ++ kbase_hwaccess_pm_gpu_active(kbdev); ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_REF_EVENT); ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ kbase_clk_rate_trace_manager_gpu_active(kbdev); + } + -+ /* Since we're not holding any of the mempool locks, the amount of memory in the pools may -+ * change between the above estimate and the actual allocation. -+ */ -+ pages_still_required = new_pages; -+ for (pool = root_pool; pool != NULL && pages_still_required; pool = pool->next_pool) { -+ size_t pool_size_4k; -+ size_t pages_to_alloc_4k; -+ size_t pages_to_alloc_4k_per_alloc; ++ kbase_pm_unlock(kbdev); ++ dev_dbg(kbdev->dev, "%s %d\n", __func__, kbdev->pm.active_count); + -+ kbase_mem_pool_lock(pool); ++ return 0; ++} + -+ /* Allocate as much as possible from this pool*/ -+ pool_size_4k = kbase_mem_pool_size(pool) << pool->order; -+ total_mempools_free_4k += pool_size_4k; -+ pages_to_alloc_4k = MIN(pages_still_required, pool_size_4k); -+ if (region->gpu_alloc == region->cpu_alloc) -+ pages_to_alloc_4k_per_alloc = pages_to_alloc_4k; -+ else -+ pages_to_alloc_4k_per_alloc = pages_to_alloc_4k >> 1; ++KBASE_EXPORT_TEST_API(kbase_pm_context_active); + -+ if (pages_to_alloc_4k) { -+ struct tagged_addr *gpu_pages = -+ kbase_alloc_phy_pages_helper_locked(region->gpu_alloc, pool, -+ pages_to_alloc_4k_per_alloc, -+ &prealloc_sas[0]); ++void kbase_pm_context_idle(struct kbase_device *kbdev) ++{ ++ int c; + -+ if (!gpu_pages) -+ alloc_failed = true; -+ else -+ total_gpu_pages_alloced += pages_to_alloc_4k_per_alloc; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ if (!alloc_failed && region->gpu_alloc != region->cpu_alloc) { -+ struct tagged_addr *cpu_pages = kbase_alloc_phy_pages_helper_locked( -+ region->cpu_alloc, pool, pages_to_alloc_4k_per_alloc, -+ &prealloc_sas[1]); + -+ if (!cpu_pages) -+ alloc_failed = true; -+ else -+ total_cpu_pages_alloced += pages_to_alloc_4k_per_alloc; -+ } -+ } ++ kbase_pm_lock(kbdev); + -+ kbase_mem_pool_unlock(pool); ++ c = --kbdev->pm.active_count; ++ KBASE_KTRACE_ADD(kbdev, PM_CONTEXT_IDLE, NULL, c); + -+ if (alloc_failed) { -+ WARN_ON(!pages_still_required); -+ WARN_ON(pages_to_alloc_4k >= pages_still_required); -+ WARN_ON(pages_to_alloc_4k_per_alloc >= pages_still_required); -+ break; -+ } ++ KBASE_DEBUG_ASSERT(c >= 0); + -+ pages_still_required -= pages_to_alloc_4k; -+ } ++ if (c == 0) { ++ /* Last context has gone idle */ ++ kbase_hwaccess_pm_gpu_idle(kbdev); ++ kbase_clk_rate_trace_manager_gpu_idle(kbdev); + -+ if (pages_still_required) { -+ /* Allocation was unsuccessful. We have dropped the mem_pool lock after allocation, -+ * so must in any case use kbase_free_phy_pages_helper() rather than -+ * kbase_free_phy_pages_helper_locked() ++ /* Wake up anyone waiting for this to become 0 (e.g. suspend). ++ * The waiters must synchronize with us by locking the pm.lock ++ * after waiting. + */ -+ if (total_gpu_pages_alloced > 0) -+ kbase_free_phy_pages_helper(region->gpu_alloc, total_gpu_pages_alloced); -+ if (region->gpu_alloc != region->cpu_alloc && total_cpu_pages_alloced > 0) -+ kbase_free_phy_pages_helper(region->cpu_alloc, total_cpu_pages_alloced); -+ -+ if (alloc_failed) { -+ /* Note that in allocating from the above memory pools, we always ensure -+ * never to request more than is available in each pool with the pool's -+ * lock held. Hence failing to allocate in such situations would be unusual -+ * and we should cancel the growth instead (as re-growing the memory pool -+ * might not fix the situation) -+ */ -+ dev_warn( -+ kctx->kbdev->dev, -+ "Page allocation failure of %zu pages: managed %zu pages, mempool (inc linked pools) had %zu pages available", -+ new_pages, total_gpu_pages_alloced + total_cpu_pages_alloced, -+ total_mempools_free_4k); -+ *pages_to_grow = 0; -+ } else { -+ /* Tell the caller to try to grow the memory pool -+ * -+ * Freeing pages above may have spilled or returned them to the OS, so we -+ * have to take into account how many are still in the pool before giving a -+ * new estimate for growth required of the pool. We can just re-estimate a -+ * new value. -+ */ -+ pages_still_required = estimate_pool_space_required(root_pool, new_pages); -+ if (pages_still_required) { -+ *pages_to_grow = pages_still_required; -+ } else { -+ /* It's possible another thread could've grown the pool to be just -+ * big enough after we rolled back the allocation. Request at least -+ * one more page to ensure the caller doesn't fail the growth by -+ * conflating it with the alloc_failed case above -+ */ -+ *pages_to_grow = 1u; -+ } -+ } -+ -+ return false; ++ wake_up(&kbdev->pm.zero_active_count_wait); + } + -+ /* Allocation was successful. No pages to grow, return success. */ -+ *pages_to_grow = 0; -+ -+ return true; ++ kbase_pm_unlock(kbdev); ++ dev_dbg(kbdev->dev, "%s %d (pid = %d)\n", __func__, ++ kbdev->pm.active_count, current->pid); +} + -+void kbase_mmu_page_fault_worker(struct work_struct *data) ++KBASE_EXPORT_TEST_API(kbase_pm_context_idle); ++ ++int kbase_pm_driver_suspend(struct kbase_device *kbdev) +{ -+ u64 fault_pfn; -+ u32 fault_status; -+ size_t new_pages; -+ size_t fault_rel_pfn; -+ struct kbase_as *faulting_as; -+ int as_no; -+ struct kbase_context *kctx; -+ struct kbase_device *kbdev; -+ struct kbase_va_region *region; -+ struct kbase_fault *fault; -+ int err; -+ bool grown = false; -+ int pages_to_grow; -+ bool grow_2mb_pool; -+ struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; -+ int i; -+ size_t current_backed_size; -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ size_t pages_trimmed = 0; -+#endif ++ KBASE_DEBUG_ASSERT(kbdev); + -+ /* Calls to this function are inherently synchronous, with respect to -+ * MMU operations. ++ /* Suspend HW counter intermediaries. This blocks until workers and timers ++ * are no longer running. + */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; ++ kbase_vinstr_suspend(kbdev->vinstr_ctx); ++ kbase_kinstr_prfcnt_suspend(kbdev->kinstr_prfcnt_ctx); + -+ faulting_as = container_of(data, struct kbase_as, work_pagefault); -+ fault = &faulting_as->pf_data; -+ fault_pfn = fault->addr >> PAGE_SHIFT; -+ as_no = faulting_as->number; ++ /* Disable GPU hardware counters. ++ * This call will block until counters are disabled. ++ */ ++ kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + -+ kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); -+ dev_dbg(kbdev->dev, "Entering %s %pK, fault_pfn %lld, as_no %d", __func__, (void *)data, -+ fault_pfn, as_no); ++ mutex_lock(&kbdev->pm.lock); ++ if (WARN_ON(kbase_pm_is_suspending(kbdev))) { ++ mutex_unlock(&kbdev->pm.lock); ++ return 0; ++ } ++ kbdev->pm.suspending = true; ++ mutex_unlock(&kbdev->pm.lock); + -+ /* Grab the context that was already refcounted in kbase_mmu_interrupt() -+ * Therefore, it cannot be scheduled out of this AS until we explicitly -+ * release it -+ */ -+ kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_no); -+ if (!kctx) { -+ atomic_dec(&kbdev->faults_pending); -+ return; ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (kbdev->arb.arb_if) { ++ int i; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->js_data.runpool_irq.submit_allowed = 0; ++ kbase_disjoint_state_up(kbdev); ++ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) ++ kbase_job_slot_softstop(kbdev, i, NULL); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + -+ KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev); ++ /* From now on, the active count will drop towards zero. Sometimes, ++ * it'll go up briefly before going down again. However, once ++ * it reaches zero it will stay there - guaranteeing that we've idled ++ * all pm references ++ */ + -+#if MALI_JIT_PRESSURE_LIMIT_BASE +#if !MALI_USE_CSF -+ mutex_lock(&kctx->jctx.lock); -+#endif -+#endif -+ -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ /* check if we still have GPU */ -+ if (unlikely(kbase_is_gpu_removed(kbdev))) { -+ dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__); -+ goto fault_done; ++ /* Suspend job scheduler and associated components, so that it releases all ++ * the PM active count references ++ */ ++ kbasep_js_suspend(kbdev); ++#else ++ if (kbase_csf_scheduler_pm_suspend(kbdev)) { ++ mutex_lock(&kbdev->pm.lock); ++ kbdev->pm.suspending = false; ++ mutex_unlock(&kbdev->pm.lock); ++ return -1; + } +#endif + -+ if (unlikely(fault->protected_mode)) { -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Protected mode fault", fault); -+ kbase_mmu_hw_clear_fault(kbdev, faulting_as, -+ KBASE_MMU_FAULT_TYPE_PAGE); ++ /* Wait for the active count to reach zero. This is not the same as ++ * waiting for a power down, since not all policies power down when this ++ * reaches zero. ++ */ ++ dev_dbg(kbdev->dev, ">wait_event - waiting for active_count == 0 (pid = %d)\n", ++ current->pid); ++ wait_event(kbdev->pm.zero_active_count_wait, ++ kbdev->pm.active_count == 0); ++ dev_dbg(kbdev->dev, ">wait_event - waiting done\n"); + -+ goto fault_done; ++ /* NOTE: We synchronize with anything that was just finishing a ++ * kbase_pm_context_idle() call by locking the pm.lock below ++ */ ++ if (kbase_hwaccess_pm_suspend(kbdev)) { ++ mutex_lock(&kbdev->pm.lock); ++ kbdev->pm.suspending = false; ++ mutex_unlock(&kbdev->pm.lock); ++ return -1; + } + -+ fault_status = fault->status; -+ switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) { ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (kbdev->arb.arb_if) { ++ mutex_lock(&kbdev->pm.arb_vm_state->vm_state_lock); ++ kbase_arbiter_pm_vm_stopped(kbdev); ++ mutex_unlock(&kbdev->pm.arb_vm_state->vm_state_lock); ++ } ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + -+ case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT: -+ /* need to check against the region to handle this one */ -+ break; ++ return 0; ++} + -+ case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT: -+#ifdef CONFIG_MALI_CINSTR_GWT -+ /* If GWT was ever enabled then we need to handle -+ * write fault pages even if the feature was disabled later. -+ */ -+ if (kctx->gwt_was_enabled) { -+ kbase_gpu_mmu_handle_permission_fault(kctx, -+ faulting_as); -+ goto fault_done; -+ } ++void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) ++{ ++ unsigned long flags; ++ ++ /* MUST happen before any pm_context_active calls occur */ ++ kbase_hwaccess_pm_resume(kbdev); ++ ++ /* Initial active call, to power on the GPU/cores if needed */ ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (kbase_pm_context_active_handle_suspend(kbdev, ++ (arb_gpu_start ? ++ KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED : ++ KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE))) ++ return; ++#else ++ kbase_pm_context_active(kbdev); +#endif + -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Permission failure", fault); -+ goto fault_done; ++#if !MALI_USE_CSF ++ /* Resume any blocked atoms (which may cause contexts to be scheduled in ++ * and dependent atoms to run) ++ */ ++ kbase_resume_suspended_soft_jobs(kbdev); + -+ case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT: -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Translation table bus fault", fault); -+ goto fault_done; ++ /* Resume the Job Scheduler and associated components, and start running ++ * atoms ++ */ ++ kbasep_js_resume(kbdev); ++#endif + -+ case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG: -+ /* nothing to do, but we don't expect this fault currently */ -+ dev_warn(kbdev->dev, "Access flag unexpectedly set"); -+ goto fault_done; ++ /* Matching idle call, to power off the GPU/cores if we didn't actually ++ * need it and the policy doesn't want it on ++ */ ++ kbase_pm_context_idle(kbdev); + -+ case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT: -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Address size fault", fault); -+ goto fault_done; ++ /* Re-enable GPU hardware counters */ ++#if MALI_USE_CSF ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++#else ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++#endif + -+ case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT: -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Memory attributes fault", fault); -+ goto fault_done; ++ /* Resume HW counters intermediaries. */ ++ kbase_vinstr_resume(kbdev->vinstr_ctx); ++ kbase_kinstr_prfcnt_resume(kbdev->kinstr_prfcnt_ctx); ++} + -+ default: -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Unknown fault code", fault); -+ goto fault_done; -+ } ++int kbase_pm_suspend(struct kbase_device *kbdev) ++{ ++ int result = 0; ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (kbdev->arb.arb_if) ++ kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_OS_SUSPEND_EVENT); ++ else ++ result = kbase_pm_driver_suspend(kbdev); ++#else ++ result = kbase_pm_driver_suspend(kbdev); ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + -+page_fault_retry: -+ if (kbdev->pagesize_2mb) { -+ /* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */ -+ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { -+ if (!prealloc_sas[i]) { -+ prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); ++ return result; ++} + -+ if (!prealloc_sas[i]) { -+ kbase_mmu_report_fault_and_kill( -+ kctx, faulting_as, -+ "Failed pre-allocating memory for sub-allocations' metadata", -+ fault); -+ goto fault_done; -+ } -+ } -+ } -+ } ++void kbase_pm_resume(struct kbase_device *kbdev) ++{ ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (kbdev->arb.arb_if) ++ kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_OS_RESUME_EVENT); ++ else ++ kbase_pm_driver_resume(kbdev, false); ++#else ++ kbase_pm_driver_resume(kbdev, false); ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++} +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pm.h b/drivers/gpu/arm/bifrost/mali_kbase_pm.h +new file mode 100644 +index 000000000..4bb90a4f6 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_pm.h +@@ -0,0 +1,257 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ /* so we have a translation fault, -+ * let's see if it is for growable memory -+ */ -+ kbase_gpu_vm_lock(kctx); ++/** ++ * DOC: Power management API definitions ++ */ + -+ region = kbase_region_tracker_find_region_enclosing_address(kctx, -+ fault->addr); -+ if (kbase_is_region_invalid_or_free(region)) { -+ kbase_gpu_vm_unlock(kctx); -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Memory is not mapped on the GPU", fault); -+ goto fault_done; -+ } ++#ifndef _KBASE_PM_H_ ++#define _KBASE_PM_H_ + -+ if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { -+ kbase_gpu_vm_unlock(kctx); -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "DMA-BUF is not mapped on the GPU", fault); -+ goto fault_done; -+ } ++#include "mali_kbase_hwaccess_pm.h" + -+ if (region->gpu_alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) { -+ kbase_gpu_vm_unlock(kctx); -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Bad physical memory group ID", fault); -+ goto fault_done; -+ } ++#define PM_ENABLE_IRQS 0x01 ++#define PM_HW_ISSUES_DETECT 0x02 + -+ if ((region->flags & GROWABLE_FLAGS_REQUIRED) -+ != GROWABLE_FLAGS_REQUIRED) { -+ kbase_gpu_vm_unlock(kctx); -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Memory is not growable", fault); -+ goto fault_done; -+ } ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++/* In the case that the GPU was granted by the Arbiter, it will have ++ * already been reset. The following flag ensures it is not reset ++ * twice. ++ */ ++#define PM_NO_RESET 0x04 ++#endif + -+ if ((region->flags & KBASE_REG_DONT_NEED)) { -+ kbase_gpu_vm_unlock(kctx); -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Don't need memory can't be grown", fault); -+ goto fault_done; -+ } ++/** ++ * kbase_pm_init - Initialize the power management framework. ++ * ++ * @kbdev: The kbase device structure for the device ++ * (must be a valid pointer) ++ * ++ * Must be called before any other power management function ++ * ++ * Return: 0 if the power management framework was successfully initialized. ++ */ ++int kbase_pm_init(struct kbase_device *kbdev); + -+ if (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault_status) == -+ AS_FAULTSTATUS_ACCESS_TYPE_READ) -+ dev_warn(kbdev->dev, "Grow on pagefault while reading"); ++/** ++ * kbase_pm_powerup - Power up GPU after all modules have been initialized ++ * and interrupt handlers installed. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @flags: Flags to pass on to kbase_pm_init_hw ++ * ++ * Return: 0 if powerup was successful. ++ */ ++int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags); + -+ /* find the size we need to grow it by -+ * we know the result fit in a size_t due to -+ * kbase_region_tracker_find_region_enclosing_address -+ * validating the fault_address to be within a size_t from the start_pfn -+ */ -+ fault_rel_pfn = fault_pfn - region->start_pfn; ++/** ++ * kbase_pm_halt - Halt the power management framework. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Should ensure that no new interrupts are generated, ++ * but allow any currently running interrupt handlers to complete successfully. ++ * The GPU is forced off by the time this function returns, regardless of ++ * whether or not the active power policy asks for the GPU to be powered off. ++ */ ++void kbase_pm_halt(struct kbase_device *kbdev); + -+ current_backed_size = kbase_reg_current_backed_size(region); ++/** ++ * kbase_pm_term - Terminate the power management framework. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * No power management functions may be called after this ++ * (except @ref kbase_pm_init) ++ */ ++void kbase_pm_term(struct kbase_device *kbdev); + -+ if (fault_rel_pfn < current_backed_size) { -+ struct kbase_mmu_hw_op_param op_param; ++/** ++ * kbase_pm_context_active - Increment the count of active contexts. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * This function should be called when a context is about to submit a job. ++ * It informs the active power policy that the GPU is going to be in use shortly ++ * and the policy is expected to start turning on the GPU. ++ * ++ * This function will block until the GPU is available. ++ * ++ * This function ASSERTS if a suspend is occuring/has occurred whilst this is ++ * in use. Use kbase_pm_contect_active_unless_suspending() instead. ++ * ++ * @note a Suspend is only visible to Kernel threads; user-space threads in a ++ * syscall cannot witness a suspend, because they are frozen before the suspend ++ * begins. ++ */ ++void kbase_pm_context_active(struct kbase_device *kbdev); + -+ dev_dbg(kbdev->dev, -+ "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring", -+ fault->addr, region->start_pfn, -+ region->start_pfn + -+ current_backed_size); + -+ mutex_lock(&kbdev->mmu_hw_mutex); ++/** Handler codes for doing kbase_pm_context_active_handle_suspend() */ ++enum kbase_pm_suspend_handler { ++ /** A suspend is not expected/not possible - this is the same as ++ * kbase_pm_context_active() ++ */ ++ KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE, ++ /** If we're suspending, fail and don't increase the active count */ ++ KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE, ++ /** If we're suspending, succeed and allow the active count to increase ++ * if it didn't go from 0->1 (i.e., we didn't re-activate the GPU). ++ * ++ * This should only be used when there is a bounded time on the activation ++ * (e.g. guarantee it's going to be idled very soon after) ++ */ ++ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE, ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ /** Special case when Arbiter has notified we can use GPU. ++ * Active count should always start at 0 in this case. ++ */ ++ KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED, ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++}; + -+ kbase_mmu_hw_clear_fault(kbdev, faulting_as, -+ KBASE_MMU_FAULT_TYPE_PAGE); -+ /* [1] in case another page fault occurred while we were -+ * handling the (duplicate) page fault we need to ensure we -+ * don't loose the other page fault as result of us clearing -+ * the MMU IRQ. Therefore, after we clear the MMU IRQ we send -+ * an UNLOCK command that will retry any stalled memory -+ * transaction (which should cause the other page fault to be -+ * raised again). -+ */ -+ op_param.mmu_sync_info = mmu_sync_info; -+ op_param.kctx_id = kctx->id; -+ if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { -+ mmu_hw_operation_begin(kbdev); -+ err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as, -+ &op_param); -+ mmu_hw_operation_end(kbdev); -+ } else { -+ /* Can safely skip the invalidate for all levels in case -+ * of duplicate page faults. -+ */ -+ op_param.flush_skip_levels = 0xF; -+ op_param.vpfn = fault_pfn; -+ op_param.nr = 1; -+ err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, -+ &op_param); -+ } ++/** ++ * kbase_pm_context_active_handle_suspend - Suspend 'safe' variant of kbase_pm_context_active() ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @suspend_handler: The handler code for how to handle a suspend that might occur ++ * ++ * If a suspend is in progress, this allows for various different ways of ++ * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details. ++ * ++ * We returns a status code indicating whether we're allowed to keep the GPU ++ * active during the suspend, depending on the handler code. If the status code ++ * indicates a failure, the caller must abort whatever operation it was ++ * attempting, and potentially queue it up for after the OS has resumed. ++ * ++ * Return: 0 on success, non-zero othrewise. ++ */ ++int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler); + -+ if (err) { -+ dev_err(kbdev->dev, -+ "Invalidation for MMU did not complete on handling page fault @ 0x%llx", -+ fault->addr); -+ } ++/** ++ * kbase_pm_context_idle - Decrement the reference count of active contexts. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * This function should be called when a context becomes idle. ++ * After this call the GPU may be turned off by the power policy so the calling ++ * code should ensure that it does not access the GPU's registers. ++ */ ++void kbase_pm_context_idle(struct kbase_device *kbdev); + -+ mutex_unlock(&kbdev->mmu_hw_mutex); ++/* NOTE: kbase_pm_is_active() is in mali_kbase.h, because it is an inline ++ * function ++ */ + -+ kbase_mmu_hw_enable_fault(kbdev, faulting_as, -+ KBASE_MMU_FAULT_TYPE_PAGE); -+ kbase_gpu_vm_unlock(kctx); ++/** ++ * kbase_pm_suspend - Suspend the GPU and prevent any further register accesses ++ * to it from Kernel threads. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * This is called in response to an OS suspend event, and calls into the various ++ * kbase components to complete the suspend. ++ * ++ * @note the mechanisms used here rely on all user-space threads being frozen ++ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up ++ * the GPU e.g. via atom submission. ++ * ++ * Return: 0 on success. ++ */ ++int kbase_pm_suspend(struct kbase_device *kbdev); + -+ goto fault_done; -+ } ++/** ++ * kbase_pm_resume - Resume the GPU, allow register accesses to it, ++ * and resume running atoms on the GPU. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * This is called in response to an OS resume event, and calls into the various ++ * kbase components to complete the resume. ++ * ++ * Also called when using VM arbiter, when GPU access has been granted. ++ */ ++void kbase_pm_resume(struct kbase_device *kbdev); + -+ new_pages = reg_grow_calc_extra_pages(kbdev, region, fault_rel_pfn); ++/** ++ * kbase_pm_vsync_callback - vsync callback ++ * ++ * @buffer_updated: 1 if a new frame was displayed, 0 otherwise ++ * @data: Pointer to the kbase device as returned by kbase_find_device() ++ * ++ * Callback function used to notify the power management code that a vsync has ++ * occurred on the display. ++ */ ++void kbase_pm_vsync_callback(int buffer_updated, void *data); + -+ /* cap to max vsize */ -+ new_pages = min(new_pages, region->nr_pages - current_backed_size); -+ dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault", new_pages); ++/** ++ * kbase_pm_driver_suspend() - Put GPU and driver in suspend state ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Suspend the GPU and prevent any further register accesses to it from Kernel ++ * threads. ++ * ++ * This is called in response to an OS suspend event, and calls into the various ++ * kbase components to complete the suspend. ++ * ++ * Despite kbase_pm_suspend(), it will ignore to update Arbiter ++ * status if MALI_ARBITER_SUPPORT is enabled. ++ * ++ * @note the mechanisms used here rely on all user-space threads being frozen ++ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up ++ * the GPU e.g. via atom submission. ++ * ++ * Return: 0 on success. ++ */ ++int kbase_pm_driver_suspend(struct kbase_device *kbdev); + -+ if (new_pages == 0) { -+ struct kbase_mmu_hw_op_param op_param; ++/** ++ * kbase_pm_driver_resume() - Put GPU and driver in resume ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @arb_gpu_start: Arbiter has notified we can use GPU ++ * ++ * Resume the GPU, allow register accesses to it, and resume running atoms on ++ * the GPU. ++ * ++ * This is called in response to an OS resume event, and calls into the various ++ * kbase components to complete the resume. ++ * ++ * Also called when using VM arbiter, when GPU access has been granted. ++ * ++ * Despite kbase_pm_resume(), it will ignore to update Arbiter ++ * status if MALI_ARBITER_SUPPORT is enabled. ++ */ ++void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start); + -+ mutex_lock(&kbdev->mmu_hw_mutex); ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++/** ++ * kbase_pm_handle_gpu_lost() - Handle GPU Lost for the VM ++ * @kbdev: Device pointer ++ * ++ * Handles the case that the Arbiter has forced the GPU away from the VM, ++ * so that interrupts will not be received and registers are no longer ++ * accessible because replaced by dummy RAM. ++ * Kill any running tasks and put the driver into a GPU powered-off state. ++ */ ++void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev); ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + -+ /* Duplicate of a fault we've already handled, nothing to do */ -+ kbase_mmu_hw_clear_fault(kbdev, faulting_as, -+ KBASE_MMU_FAULT_TYPE_PAGE); ++#endif /* _KBASE_PM_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_refcount_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_refcount_defs.h +new file mode 100644 +index 000000000..c517a2d2a +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_refcount_defs.h +@@ -0,0 +1,57 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ /* See comment [1] about UNLOCK usage */ -+ op_param.mmu_sync_info = mmu_sync_info; -+ op_param.kctx_id = kctx->id; -+ if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { -+ mmu_hw_operation_begin(kbdev); -+ err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as, -+ &op_param); -+ mmu_hw_operation_end(kbdev); -+ } else { -+ /* Can safely skip the invalidate for all levels in case -+ * of duplicate page faults. -+ */ -+ op_param.flush_skip_levels = 0xF; -+ op_param.vpfn = fault_pfn; -+ op_param.nr = 1; -+ err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, -+ &op_param); -+ } ++#ifndef _KBASE_REFCOUNT_DEFS_H_ ++#define _KBASE_REFCOUNT_DEFS_H_ + -+ if (err) { -+ dev_err(kbdev->dev, -+ "Invalidation for MMU did not complete on handling page fault @ 0x%llx", -+ fault->addr); -+ } ++/* ++ * The Refcount API is available from 4.11 onwards ++ * This file hides the compatibility issues with this for the rest the driver ++ */ + -+ mutex_unlock(&kbdev->mmu_hw_mutex); ++#include ++#include + -+ kbase_mmu_hw_enable_fault(kbdev, faulting_as, -+ KBASE_MMU_FAULT_TYPE_PAGE); -+ kbase_gpu_vm_unlock(kctx); -+ goto fault_done; -+ } ++#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) + -+ pages_to_grow = 0; ++#define kbase_refcount_t atomic_t ++#define kbase_refcount_read(x) atomic_read(x) ++#define kbase_refcount_set(x, v) atomic_set(x, v) ++#define kbase_refcount_dec_and_test(x) atomic_dec_and_test(x) ++#define kbase_refcount_dec(x) atomic_dec(x) ++#define kbase_refcount_inc_not_zero(x) atomic_inc_not_zero(x) ++#define kbase_refcount_inc(x) atomic_inc(x) + -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ if ((region->flags & KBASE_REG_ACTIVE_JIT_ALLOC) && !pages_trimmed) { -+ kbase_jit_request_phys_increase(kctx, new_pages); -+ pages_trimmed = new_pages; -+ } -+#endif ++#else + -+ spin_lock(&kctx->mem_partials_lock); -+ grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow, -+ &grow_2mb_pool, prealloc_sas); -+ spin_unlock(&kctx->mem_partials_lock); ++#include + -+ if (grown) { -+ u64 dirty_pgds = 0; -+ u64 pfn_offset; -+ struct kbase_mmu_hw_op_param op_param; ++#define kbase_refcount_t refcount_t ++#define kbase_refcount_read(x) refcount_read(x) ++#define kbase_refcount_set(x, v) refcount_set(x, v) ++#define kbase_refcount_dec_and_test(x) refcount_dec_and_test(x) ++#define kbase_refcount_dec(x) refcount_dec(x) ++#define kbase_refcount_inc_not_zero(x) refcount_inc_not_zero(x) ++#define kbase_refcount_inc(x) refcount_inc(x) + -+ /* alloc success */ -+ WARN_ON(kbase_reg_current_backed_size(region) > -+ region->nr_pages); ++#endif /* (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) */ + -+ /* set up the new pages */ -+ pfn_offset = kbase_reg_current_backed_size(region) - new_pages; -+ /* -+ * Note: -+ * Issuing an MMU operation will unlock the MMU and cause the -+ * translation to be replayed. If the page insertion fails then -+ * rather then trying to continue the context should be killed -+ * so the no_flush version of insert_pages is used which allows -+ * us to unlock the MMU as we see fit. -+ */ -+ err = kbase_mmu_insert_pages_no_flush( -+ kbdev, &kctx->mmu, region->start_pfn + pfn_offset, -+ &kbase_get_gpu_phy_pages(region)[pfn_offset], new_pages, region->flags, -+ region->gpu_alloc->group_id, &dirty_pgds, region, false); -+ if (err) { -+ kbase_free_phy_pages_helper(region->gpu_alloc, -+ new_pages); -+ if (region->gpu_alloc != region->cpu_alloc) -+ kbase_free_phy_pages_helper(region->cpu_alloc, -+ new_pages); -+ kbase_gpu_vm_unlock(kctx); -+ /* The locked VA region will be unlocked and the cache -+ * invalidated in here -+ */ -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Page table update failure", fault); -+ goto fault_done; -+ } -+ KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, kctx->id, as_no, -+ (u64)new_pages); -+ trace_mali_mmu_page_fault_grow(region, fault, new_pages); ++#endif /* _KBASE_REFCOUNT_DEFS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.c +new file mode 100644 +index 000000000..147082c15 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.c +@@ -0,0 +1,239 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2014, 2016, 2019-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+#if MALI_INCREMENTAL_RENDERING_JM -+ /* Switch to incremental rendering if we have nearly run out of -+ * memory in a JIT memory allocation. -+ */ -+ if (region->threshold_pages && -+ kbase_reg_current_backed_size(region) > -+ region->threshold_pages) { -+ dev_dbg(kctx->kbdev->dev, "%zu pages exceeded IR threshold %zu", -+ new_pages + current_backed_size, region->threshold_pages); ++#include "mali_kbase.h" ++#include "mali_kbase_regs_history_debugfs.h" + -+ if (kbase_mmu_switch_to_ir(kctx, region) >= 0) { -+ dev_dbg(kctx->kbdev->dev, "Get region %pK for IR", (void *)region); -+ kbase_va_region_alloc_get(kctx, region); -+ } -+ } -+#endif ++#if defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) + -+ /* AS transaction begin */ -+ mutex_lock(&kbdev->mmu_hw_mutex); ++#include + -+ /* clear MMU interrupt - this needs to be done after updating -+ * the page tables but before issuing a FLUSH command. The -+ * FLUSH cmd has a side effect that it restarts stalled memory -+ * transactions in other address spaces which may cause -+ * another fault to occur. If we didn't clear the interrupt at -+ * this stage a new IRQ might not be raised when the GPU finds -+ * a MMU IRQ is already pending. -+ */ -+ kbase_mmu_hw_clear_fault(kbdev, faulting_as, -+ KBASE_MMU_FAULT_TYPE_PAGE); ++/** ++ * kbase_io_history_resize - resize the register access history buffer. ++ * ++ * @h: Pointer to a valid register history to resize ++ * @new_size: Number of accesses the buffer could hold ++ * ++ * A successful resize will clear all recent register accesses. ++ * If resizing fails for any reason (e.g., could not allocate memory, invalid ++ * buffer size) then the original buffer will be kept intact. ++ * ++ * Return: 0 if the buffer was resized, failure otherwise ++ */ ++static int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size) ++{ ++ struct kbase_io_access *old_buf; ++ struct kbase_io_access *new_buf; ++ unsigned long flags; + -+ op_param.vpfn = region->start_pfn + pfn_offset; -+ op_param.nr = new_pages; -+ op_param.op = KBASE_MMU_OP_FLUSH_PT; -+ op_param.kctx_id = kctx->id; -+ op_param.mmu_sync_info = mmu_sync_info; -+ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { -+ /* Unlock to invalidate the TLB (and resume the MMU) */ -+ op_param.flush_skip_levels = -+ pgd_level_to_skip_flush(dirty_pgds); -+ err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, -+ &op_param); -+ } else { -+ /* flush L2 and unlock the VA (resumes the MMU) */ -+ mmu_hw_operation_begin(kbdev); -+ err = kbase_mmu_hw_do_flush(kbdev, faulting_as, -+ &op_param); -+ mmu_hw_operation_end(kbdev); -+ } ++ if (!new_size) ++ goto out_err; /* The new size must not be 0 */ + -+ if (err) { -+ dev_err(kbdev->dev, -+ "Flush for GPU page table update did not complete on handling page fault @ 0x%llx", -+ fault->addr); -+ } ++ new_buf = vmalloc(new_size * sizeof(*h->buf)); ++ if (!new_buf) ++ goto out_err; + -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ /* AS transaction end */ ++ spin_lock_irqsave(&h->lock, flags); + -+ /* reenable this in the mask */ -+ kbase_mmu_hw_enable_fault(kbdev, faulting_as, -+ KBASE_MMU_FAULT_TYPE_PAGE); ++ old_buf = h->buf; + -+#ifdef CONFIG_MALI_CINSTR_GWT -+ if (kctx->gwt_enabled) { -+ /* GWT also tracks growable regions. */ -+ struct kbasep_gwt_list_element *pos; ++ /* Note: we won't bother with copying the old data over. The dumping ++ * logic wouldn't work properly as it relies on 'count' both as a ++ * counter and as an index to the buffer which would have changed with ++ * the new array. This is a corner case that we don't need to support. ++ */ ++ h->count = 0; ++ h->size = new_size; ++ h->buf = new_buf; + -+ pos = kmalloc(sizeof(*pos), GFP_KERNEL); -+ if (pos) { -+ pos->region = region; -+ pos->page_addr = (region->start_pfn + -+ pfn_offset) << -+ PAGE_SHIFT; -+ pos->num_pages = new_pages; -+ list_add(&pos->link, -+ &kctx->gwt_current_list); -+ } else { -+ dev_warn(kbdev->dev, "kmalloc failure"); -+ } -+ } -+#endif ++ spin_unlock_irqrestore(&h->lock, flags); + -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ if (pages_trimmed) { -+ kbase_jit_done_phys_increase(kctx, pages_trimmed); -+ pages_trimmed = 0; -+ } -+#endif -+ kbase_gpu_vm_unlock(kctx); -+ } else { -+ int ret = -ENOMEM; ++ vfree(old_buf); + -+ kbase_gpu_vm_unlock(kctx); ++ return 0; + -+ /* If the memory pool was insufficient then grow it and retry. -+ * Otherwise fail the allocation. -+ */ -+ if (pages_to_grow > 0) { -+ if (kbdev->pagesize_2mb && grow_2mb_pool) { -+ /* Round page requirement up to nearest 2 MB */ -+ struct kbase_mem_pool *const lp_mem_pool = -+ &kctx->mem_pools.large[ -+ region->gpu_alloc->group_id]; ++out_err: ++ return -1; ++} + -+ pages_to_grow = (pages_to_grow + -+ ((1 << lp_mem_pool->order) - 1)) -+ >> lp_mem_pool->order; ++int kbase_io_history_init(struct kbase_io_history *h, u16 n) ++{ ++ h->enabled = false; ++ spin_lock_init(&h->lock); ++ h->count = 0; ++ h->size = 0; ++ h->buf = NULL; ++ if (kbase_io_history_resize(h, n)) ++ return -1; + -+ ret = kbase_mem_pool_grow(lp_mem_pool, -+ pages_to_grow, kctx->task); -+ } else { -+ struct kbase_mem_pool *const mem_pool = -+ &kctx->mem_pools.small[ -+ region->gpu_alloc->group_id]; ++ return 0; ++} + -+ ret = kbase_mem_pool_grow(mem_pool, -+ pages_to_grow, kctx->task); -+ } -+ } -+ if (ret < 0) { -+ /* failed to extend, handle as a normal PF */ -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Page allocation failure", fault); -+ } else { -+ dev_dbg(kbdev->dev, "Try again after pool_grow"); -+ goto page_fault_retry; -+ } -+ } ++void kbase_io_history_term(struct kbase_io_history *h) ++{ ++ vfree(h->buf); ++ h->buf = NULL; ++} + -+fault_done: -+#if MALI_JIT_PRESSURE_LIMIT_BASE -+ if (pages_trimmed) { -+ kbase_gpu_vm_lock(kctx); -+ kbase_jit_done_phys_increase(kctx, pages_trimmed); -+ kbase_gpu_vm_unlock(kctx); -+ } -+#if !MALI_USE_CSF -+ mutex_unlock(&kctx->jctx.lock); -+#endif -+#endif ++void kbase_io_history_add(struct kbase_io_history *h, ++ void __iomem const *addr, u32 value, u8 write) ++{ ++ struct kbase_io_access *io; ++ unsigned long flags; + -+ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) -+ kfree(prealloc_sas[i]); ++ spin_lock_irqsave(&h->lock, flags); + -+ /* -+ * By this point, the fault was handled in some way, -+ * so release the ctx refcount ++ io = &h->buf[h->count % h->size]; ++ io->addr = (uintptr_t)addr | write; ++ io->value = value; ++ ++h->count; ++ /* If count overflows, move the index by the buffer size so the entire ++ * buffer will still be dumped later + */ -+ release_ctx(kbdev, kctx); ++ if (unlikely(!h->count)) ++ h->count = h->size; + -+ atomic_dec(&kbdev->faults_pending); -+ dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK", (void *)data); ++ spin_unlock_irqrestore(&h->lock, flags); +} + -+static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, -+ struct kbase_mmu_table *mmut) ++void kbase_io_history_dump(struct kbase_device *kbdev) +{ -+ u64 *page; -+ struct page *p; -+ phys_addr_t pgd; -+ -+ p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]); -+ if (!p) -+ return KBASE_MMU_INVALID_PGD_ADDRESS; ++ struct kbase_io_history *const h = &kbdev->io_history; ++ size_t i; ++ size_t iters; ++ unsigned long flags; + -+ page = kmap(p); -+ if (page == NULL) -+ goto alloc_free; ++ if (!unlikely(h->enabled)) ++ return; + -+ pgd = page_to_phys(p); ++ spin_lock_irqsave(&h->lock, flags); + -+ /* If the MMU tables belong to a context then account the memory usage -+ * to that context, otherwise the MMU tables are device wide and are -+ * only accounted to the device. -+ */ -+ if (mmut->kctx) { -+ int new_page_count; ++ dev_err(kbdev->dev, "Register IO History:"); ++ iters = (h->size > h->count) ? h->count : h->size; ++ dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters, ++ h->count); ++ for (i = 0; i < iters; ++i) { ++ struct kbase_io_access *io = ++ &h->buf[(h->count - iters + i) % h->size]; ++ char const access = (io->addr & 1) ? 'w' : 'r'; + -+ new_page_count = atomic_add_return(1, -+ &mmut->kctx->used_pages); -+ KBASE_TLSTREAM_AUX_PAGESALLOC( -+ kbdev, -+ mmut->kctx->id, -+ (u64)new_page_count); -+ kbase_process_page_usage_inc(mmut->kctx, 1); ++ dev_err(kbdev->dev, "%6zu: %c: reg 0x%016lx val %08x\n", i, ++ access, (unsigned long)(io->addr & ~0x1), io->value); + } + -+ atomic_add(1, &kbdev->memdev.used_pages); -+ -+ kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1); ++ spin_unlock_irqrestore(&h->lock, flags); ++} + -+ kbdev->mmu_mode->entries_invalidate(page, KBASE_MMU_PAGE_ENTRIES); ++static int regs_history_size_get(void *data, u64 *val) ++{ ++ struct kbase_io_history *const h = data; + -+ /* As this page is newly created, therefore there is no content to -+ * clean or invalidate in the GPU caches. -+ */ -+ kbase_mmu_sync_pgd_cpu(kbdev, kbase_dma_addr(p), PAGE_SIZE); ++ *val = h->size; + -+ kunmap(p); -+ return pgd; ++ return 0; ++} + -+alloc_free: -+ kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, false); ++static int regs_history_size_set(void *data, u64 val) ++{ ++ struct kbase_io_history *const h = data; + -+ return KBASE_MMU_INVALID_PGD_ADDRESS; ++ return kbase_io_history_resize(h, (u16)val); +} + ++DEFINE_DEBUGFS_ATTRIBUTE(regs_history_size_fops, regs_history_size_get, regs_history_size_set, ++ "%llu\n"); ++ +/** -+ * mmu_get_next_pgd() - Given PGD PFN for level N, return PGD PFN for level N+1 ++ * regs_history_show - show callback for the register access history file. + * -+ * @kbdev: Device pointer. -+ * @mmut: GPU MMU page table. -+ * @pgd: Physical addresse of level N page directory. -+ * @vpfn: The virtual page frame number. -+ * @level: The level of MMU page table (N). ++ * @sfile: The debugfs entry ++ * @data: Data associated with the entry + * -+ * Return: -+ * * 0 - OK -+ * * -EFAULT - level N+1 PGD does not exist -+ * * -EINVAL - kmap() failed for level N PGD PFN ++ * This function is called to dump all recent accesses to the GPU registers. ++ * ++ * Return: 0 if successfully prints data in debugfs entry file, failure otherwise + */ -+static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, -+ phys_addr_t *pgd, u64 vpfn, int level) ++static int regs_history_show(struct seq_file *sfile, void *data) +{ -+ u64 *page; -+ phys_addr_t target_pgd; -+ struct page *p; ++ struct kbase_io_history *const h = sfile->private; ++ size_t i; ++ size_t iters; ++ unsigned long flags; + -+ lockdep_assert_held(&mmut->mmu_lock); ++ if (!h->enabled) { ++ seq_puts(sfile, "The register access history is disabled\n"); ++ goto out; ++ } + -+ /* -+ * Architecture spec defines level-0 as being the top-most. -+ * This is a bit unfortunate here, but we keep the same convention. -+ */ -+ vpfn >>= (3 - level) * 9; -+ vpfn &= 0x1FF; ++ spin_lock_irqsave(&h->lock, flags); + -+ p = pfn_to_page(PFN_DOWN(*pgd)); -+ page = kmap(p); -+ if (page == NULL) { -+ dev_err(kbdev->dev, "%s: kmap failure", __func__); -+ return -EINVAL; -+ } ++ iters = (h->size > h->count) ? h->count : h->size; ++ seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters, ++ h->count); ++ for (i = 0; i < iters; ++i) { ++ struct kbase_io_access *io = ++ &h->buf[(h->count - iters + i) % h->size]; ++ char const access = (io->addr & 1) ? 'w' : 'r'; + -+ if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) { -+ dev_dbg(kbdev->dev, "%s: invalid PTE at level %d vpfn 0x%llx", __func__, level, -+ vpfn); -+ kunmap(p); -+ return -EFAULT; -+ } else { -+ target_pgd = kbdev->mmu_mode->pte_to_phy_addr( -+ kbdev->mgm_dev->ops.mgm_pte_to_original_pte( -+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[vpfn])); ++ seq_printf(sfile, "%6zu: %c: reg 0x%016lx val %08x\n", i, ++ access, (unsigned long)(io->addr & ~0x1), io->value); + } + -+ kunmap(p); -+ *pgd = target_pgd; ++ spin_unlock_irqrestore(&h->lock, flags); + ++out: + return 0; +} + +/** -+ * mmu_get_lowest_valid_pgd() - Find a valid PGD at or closest to in_level -+ * -+ * @kbdev: Device pointer. -+ * @mmut: GPU MMU page table. -+ * @vpfn: The virtual page frame number. -+ * @in_level: The level of MMU page table (N). -+ * @out_level: Set to the level of the lowest valid PGD found on success. -+ * Invalid on error. -+ * @out_pgd: Set to the lowest valid PGD found on success. -+ * Invalid on error. -+ * -+ * Does a page table walk starting from top level (L0) to in_level to find a valid PGD at or -+ * closest to in_level ++ * regs_history_open - open operation for regs_history debugfs file + * -+ * Terminology: -+ * Level-0 = Top-level = highest -+ * Level-3 = Bottom-level = lowest ++ * @in: &struct inode pointer ++ * @file: &struct file pointer + * -+ * Return: -+ * * 0 - OK -+ * * -EINVAL - kmap() failed during page table walk. ++ * Return: file descriptor + */ -+static int mmu_get_lowest_valid_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, -+ u64 vpfn, int in_level, int *out_level, phys_addr_t *out_pgd) ++static int regs_history_open(struct inode *in, struct file *file) +{ -+ phys_addr_t pgd; -+ int l; -+ int err = 0; -+ -+ lockdep_assert_held(&mmut->mmu_lock); -+ pgd = mmut->pgd; -+ -+ for (l = MIDGARD_MMU_TOPLEVEL; l < in_level; l++) { -+ err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l); -+ -+ /* Handle failure condition */ -+ if (err) { -+ dev_dbg(kbdev->dev, -+ "%s: mmu_get_next_pgd() failed to find a valid pgd at level %d", -+ __func__, l + 1); -+ break; -+ } -+ } -+ -+ *out_pgd = pgd; -+ *out_level = l; ++ return single_open(file, ®s_history_show, in->i_private); ++} + -+ /* -EFAULT indicates that pgd param was valid but the next pgd entry at vpfn was invalid. -+ * This implies that we have found the lowest valid pgd. Reset the error code. -+ */ -+ if (err == -EFAULT) -+ err = 0; ++static const struct file_operations regs_history_fops = { ++ .owner = THIS_MODULE, ++ .open = ®s_history_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + -+ return err; ++void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev) ++{ ++ debugfs_create_bool("regs_history_enabled", 0644, ++ kbdev->mali_debugfs_directory, ++ &kbdev->io_history.enabled); ++ debugfs_create_file("regs_history_size", 0644, ++ kbdev->mali_debugfs_directory, ++ &kbdev->io_history, ®s_history_size_fops); ++ debugfs_create_file("regs_history", 0444, ++ kbdev->mali_debugfs_directory, &kbdev->io_history, ++ ®s_history_fops); +} -+ ++#endif /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.h +new file mode 100644 +index 000000000..ae327dd79 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.h +@@ -0,0 +1,84 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* -+ * On success, sets out_pgd to the PGD for the specified level of translation -+ * Returns -EFAULT if a valid PGD is not found ++ * ++ * (C) COPYRIGHT 2014, 2016, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * + */ -+static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, -+ int level, phys_addr_t *out_pgd) -+{ -+ phys_addr_t pgd; -+ int l; -+ -+ lockdep_assert_held(&mmut->mmu_lock); -+ pgd = mmut->pgd; -+ -+ for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) { -+ int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l); -+ /* Handle failure condition */ -+ if (err) { -+ dev_err(kbdev->dev, -+ "%s: mmu_get_next_pgd() failed to find a valid pgd at level %d", -+ __func__, l + 1); -+ return err; -+ } -+ } -+ -+ *out_pgd = pgd; -+ -+ return 0; -+} -+ -+static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, -+ struct kbase_mmu_table *mmut, u64 from_vpfn, -+ u64 to_vpfn, u64 *dirty_pgds, -+ struct tagged_addr *phys, bool ignore_page_migration) -+{ -+ u64 vpfn = from_vpfn; -+ struct kbase_mmu_mode const *mmu_mode; -+ -+ /* 64-bit address range is the max */ -+ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); -+ KBASE_DEBUG_ASSERT(from_vpfn <= to_vpfn); -+ -+ lockdep_assert_held(&mmut->mmu_lock); -+ -+ mmu_mode = kbdev->mmu_mode; -+ kbase_mmu_reset_free_pgds_list(mmut); -+ -+ while (vpfn < to_vpfn) { -+ unsigned int idx = vpfn & 0x1FF; -+ unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx; -+ unsigned int pcount = 0; -+ unsigned int left = to_vpfn - vpfn; -+ int level; -+ u64 *page; -+ phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; -+ phys_addr_t pgd = mmut->pgd; -+ struct page *p = phys_to_page(pgd); -+ -+ register unsigned int num_of_valid_entries; -+ -+ if (count > left) -+ count = left; -+ -+ /* need to check if this is a 2MB page or a 4kB */ -+ for (level = MIDGARD_MMU_TOPLEVEL; -+ level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { -+ idx = (vpfn >> ((3 - level) * 9)) & 0x1FF; -+ pgds[level] = pgd; -+ page = kmap(p); -+ if (mmu_mode->ate_is_valid(page[idx], level)) -+ break; /* keep the mapping */ -+ kunmap(p); -+ pgd = mmu_mode->pte_to_phy_addr(kbdev->mgm_dev->ops.mgm_pte_to_original_pte( -+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[idx])); -+ p = phys_to_page(pgd); -+ } + -+ switch (level) { -+ case MIDGARD_MMU_LEVEL(2): -+ /* remap to single entry to update */ -+ pcount = 1; -+ break; -+ case MIDGARD_MMU_BOTTOMLEVEL: -+ /* page count is the same as the logical count */ -+ pcount = count; -+ break; -+ default: -+ dev_warn(kbdev->dev, "%sNo support for ATEs at level %d", __func__, level); -+ goto next; -+ } ++/** ++ * DOC: Header file for register access history support via debugfs ++ * ++ * This interface is made available via /sys/kernel/debug/mali#/regs_history*. ++ * ++ * Usage: ++ * - regs_history_enabled: whether recording of register accesses is enabled. ++ * Write 'y' to enable, 'n' to disable. ++ * - regs_history_size: size of the register history buffer, must be > 0 ++ * - regs_history: return the information about last accesses to the registers. ++ */ + -+ if (dirty_pgds && pcount > 0) -+ *dirty_pgds |= 1ULL << level; ++#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H ++#define _KBASE_REGS_HISTORY_DEBUGFS_H + -+ num_of_valid_entries = mmu_mode->get_num_valid_entries(page); -+ if (WARN_ON_ONCE(num_of_valid_entries < pcount)) -+ num_of_valid_entries = 0; -+ else -+ num_of_valid_entries -= pcount; ++struct kbase_device; + -+ /* Invalidate the entries we added */ -+ mmu_mode->entries_invalidate(&page[idx], pcount); ++#if defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) + -+ if (!num_of_valid_entries) { -+ kunmap(p); ++/** ++ * kbase_io_history_init - initialize data struct for register access history ++ * ++ * @h: The register history to initialize ++ * @n: The number of register accesses that the buffer could hold ++ * ++ * Return: 0 if successfully initialized, failure otherwise ++ */ ++int kbase_io_history_init(struct kbase_io_history *h, u16 n); + -+ kbase_mmu_add_to_free_pgds_list(mmut, p); ++/** ++ * kbase_io_history_term - uninit all resources for the register access history ++ * ++ * @h: The register history to terminate ++ */ ++void kbase_io_history_term(struct kbase_io_history *h); + -+ kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, -+ KBASE_MMU_OP_NONE, dirty_pgds); -+ vpfn += count; -+ continue; -+ } ++/** ++ * kbase_io_history_dump - print the register history to the kernel ring buffer ++ * ++ * @kbdev: Pointer to kbase_device containing the register history to dump ++ */ ++void kbase_io_history_dump(struct kbase_device *kbdev); + -+ mmu_mode->set_num_valid_entries(page, num_of_valid_entries); ++/** ++ * kbasep_regs_history_debugfs_init - add debugfs entries for register history ++ * ++ * @kbdev: Pointer to kbase_device containing the register history ++ */ ++void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev); + -+ /* MMU cache flush strategy is NONE because GPU cache maintenance is -+ * going to be done by the caller -+ */ -+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (idx * sizeof(u64)), -+ kbase_dma_addr(p) + sizeof(u64) * idx, sizeof(u64) * pcount, -+ KBASE_MMU_OP_NONE); -+ kunmap(p); -+next: -+ vpfn += count; -+ } ++#else /* !defined(CONFIG_DEBUG_FS) || IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ + -+ /* If page migration is enabled: the only way to recover from failure -+ * is to mark all pages as not movable. It is not predictable what's -+ * going to happen to these pages at this stage. They might return -+ * movable once they are returned to a memory pool. -+ */ -+ if (kbase_page_migration_enabled && !ignore_page_migration && phys) { -+ const u64 num_pages = to_vpfn - from_vpfn + 1; -+ u64 i; ++#define kbase_io_history_init(...) (0) + -+ for (i = 0; i < num_pages; i++) { -+ struct page *phys_page = as_page(phys[i]); -+ struct kbase_page_metadata *page_md = kbase_page_private(phys_page); ++#define kbase_io_history_term CSTD_NOP + -+ if (page_md) { -+ spin_lock(&page_md->migrate_lock); -+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); -+ spin_unlock(&page_md->migrate_lock); -+ } -+ } -+ } -+} ++#define kbase_io_history_dump CSTD_NOP + -+static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, -+ struct kbase_mmu_table *mmut, const u64 vpfn, -+ size_t nr, u64 dirty_pgds, -+ enum kbase_caller_mmu_sync_info mmu_sync_info, -+ bool insert_pages_failed) -+{ -+ struct kbase_mmu_hw_op_param op_param; -+ int as_nr = 0; ++#define kbasep_regs_history_debugfs_init CSTD_NOP + -+ op_param.vpfn = vpfn; -+ op_param.nr = nr; -+ op_param.op = KBASE_MMU_OP_FLUSH_PT; -+ op_param.mmu_sync_info = mmu_sync_info; -+ op_param.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF; -+ op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); ++#endif /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ + -+#if MALI_USE_CSF -+ as_nr = mmut->kctx ? mmut->kctx->as_nr : MCU_AS_NR; -+#else -+ WARN_ON(!mmut->kctx); -+#endif ++#endif /*_KBASE_REGS_HISTORY_DEBUGFS_H*/ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_reset_gpu.h b/drivers/gpu/arm/bifrost/mali_kbase_reset_gpu.h +new file mode 100644 +index 000000000..48ea9954b +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_reset_gpu.h +@@ -0,0 +1,277 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ /* MMU cache flush strategy depends on whether GPU control commands for -+ * flushing physical address ranges are supported. The new physical pages -+ * are not present in GPU caches therefore they don't need any cache -+ * maintenance, but PGDs in the page table may or may not be created anew. -+ * -+ * Operations that affect the whole GPU cache shall only be done if it's -+ * impossible to update physical ranges. -+ * -+ * On GPUs where flushing by physical address range is supported, -+ * full cache flush is done when an error occurs during -+ * insert_pages() to keep the error handling simpler. -+ */ -+ if (mmu_flush_cache_on_gpu_ctrl(kbdev) && !insert_pages_failed) -+ mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param); -+ else -+ mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param); -+} ++#ifndef _KBASE_RESET_GPU_H_ ++#define _KBASE_RESET_GPU_H_ + +/** -+ * update_parent_pgds() - Updates the page table from bottom level towards -+ * the top level to insert a new ATE ++ * kbase_reset_gpu_prevent_and_wait - Prevent GPU resets from starting whilst ++ * the current thread is accessing the GPU, ++ * and wait for any in-flight reset to ++ * finish. ++ * @kbdev: Device pointer + * -+ * @kbdev: Device pointer. -+ * @mmut: GPU MMU page table. -+ * @cur_level: The level of MMU page table where the ATE needs to be added. -+ * The bottom PGD level. -+ * @insert_level: The level of MMU page table where the chain of newly allocated -+ * PGDs needs to be linked-in/inserted. -+ * The top-most PDG level to be updated. -+ * @insert_vpfn: The virtual page frame number for the ATE. -+ * @pgds_to_insert: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) that contains -+ * the physical addresses of newly allocated PGDs from index -+ * insert_level+1 to cur_level, and an existing PGD at index -+ * insert_level. ++ * This should be used when a potential access to the HW is going to be made ++ * from a non-atomic context. + * -+ * The newly allocated PGDs are linked from the bottom level up and inserted into the PGD -+ * at insert_level which already exists in the MMU Page Tables.Migration status is also -+ * updated for all the newly allocated PGD pages. ++ * It will wait for any in-flight reset to finish before returning. Hence, ++ * correct lock ordering must be observed with respect to the calling thread ++ * and the reset worker thread. + * -+ * Return: -+ * * 0 - OK -+ * * -EFAULT - level N+1 PGD does not exist -+ * * -EINVAL - kmap() failed for level N PGD PFN ++ * This does not synchronize general access to the HW, and so multiple threads ++ * can prevent GPU reset concurrently, whilst not being serialized. This is ++ * advantageous as the threads can make this call at points where they do not ++ * know for sure yet whether they will indeed access the GPU (for example, to ++ * respect lock ordering), without unnecessarily blocking others. ++ * ++ * Threads must still use other synchronization to ensure they access the HW ++ * consistently, at a point where they are certain it needs to be accessed. ++ * ++ * On success, ensure that when access to the GPU by the caller thread has ++ * finished, that it calls kbase_reset_gpu_allow() again to allow resets to ++ * happen. ++ * ++ * This may return a failure in cases such as a previous failure to reset the ++ * GPU within a reasonable time. If that happens, the GPU might be ++ * non-operational and the caller should not attempt any further access. ++ * ++ * Note: ++ * For atomic context, instead check kbase_reset_gpu_is_active(). ++ * ++ * Return: 0 on success, or negative error code on failure. + */ -+static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, -+ int cur_level, int insert_level, u64 insert_vpfn, -+ phys_addr_t *pgds_to_insert) -+{ -+ int pgd_index; -+ int err = 0; ++int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev); + -+ /* Add a PTE for the new PGD page at pgd_index into the parent PGD at (pgd_index-1) -+ * Loop runs from the bottom-most to the top-most level so that all entries in the chain -+ * are valid when they are inserted into the MMU Page table via the insert_level PGD. -+ */ -+ for (pgd_index = cur_level; pgd_index > insert_level; pgd_index--) { -+ int parent_index = pgd_index - 1; -+ phys_addr_t parent_pgd = pgds_to_insert[parent_index]; -+ unsigned int current_valid_entries; -+ u64 pte; -+ phys_addr_t target_pgd = pgds_to_insert[pgd_index]; -+ u64 parent_vpfn = (insert_vpfn >> ((3 - parent_index) * 9)) & 0x1FF; -+ struct page *parent_page = pfn_to_page(PFN_DOWN(parent_pgd)); -+ u64 *parent_page_va; ++/** ++ * kbase_reset_gpu_try_prevent - Attempt to prevent GPU resets from starting ++ * whilst the current thread is accessing the ++ * GPU, unless a reset is already in progress. ++ * @kbdev: Device pointer ++ * ++ * Similar to kbase_reset_gpu_prevent_and_wait(), but it does not wait for an ++ * existing reset to complete. This can be used on codepaths that the Reset ++ * worker waits on, where use of kbase_reset_gpu_prevent_and_wait() would ++ * otherwise deadlock. ++ * ++ * Instead, a reset that is currently happening will cause this function to ++ * return an error code indicating that, and further resets will not have been ++ * prevented. ++ * ++ * In such cases, the caller must check for -EAGAIN, and take similar actions ++ * as for handling reset in atomic context. That is, they must cancel any ++ * actions that depended on reset being prevented, possibly deferring them ++ * until after the reset. ++ * ++ * Otherwise a successful return means that the caller can continue its actions ++ * safely in the knowledge that reset is prevented, and the reset worker will ++ * correctly wait instead of deadlocking against this thread. ++ * ++ * On success, ensure that when access to the GPU by the caller thread has ++ * finished, that it calls kbase_reset_gpu_allow() again to allow resets to ++ * happen. ++ * ++ * Refer to kbase_reset_gpu_prevent_and_wait() for more information. ++ * ++ * Return: 0 on success. -EAGAIN if a reset is currently happening. Other ++ * negative error codes on failure, where -ENOMEM indicates that GPU reset ++ * had failed. ++ */ ++int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev); + -+ if (WARN_ON_ONCE(target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS)) { -+ err = -EFAULT; -+ goto failure_recovery; -+ } ++/** ++ * kbase_reset_gpu_allow - Allow GPU resets to happen again after having been ++ * previously prevented. ++ * @kbdev: Device pointer ++ * ++ * This should be used when a potential access to the HW has finished from a ++ * non-atomic context. ++ * ++ * It must be used from the same thread that originally made a previously call ++ * to kbase_reset_gpu_prevent_and_wait(). It must not be deferred to another ++ * thread. ++ */ ++void kbase_reset_gpu_allow(struct kbase_device *kbdev); + -+ parent_page_va = kmap(parent_page); -+ if (unlikely(parent_page_va == NULL)) { -+ dev_err(kbdev->dev, "%s: kmap failure", __func__); -+ err = -EINVAL; -+ goto failure_recovery; -+ } ++/** ++ * kbase_reset_gpu_assert_prevented - Make debugging checks that GPU reset is ++ * currently prevented by the current ++ * thread. ++ * @kbdev: Device pointer ++ * ++ * Make debugging checks that the current thread has made a call to ++ * kbase_reset_gpu_prevent_and_wait(), but has yet to make a subsequent call to ++ * kbase_reset_gpu_allow(). ++ * ++ * CONFIG_LOCKDEP is required to prove that reset is indeed ++ * prevented. Otherwise only limited debugging checks can be made. ++ */ ++void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev); + -+ current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(parent_page_va); ++/** ++ * kbase_reset_gpu_assert_failed_or_prevented - Make debugging checks that ++ * either GPU reset previously ++ * failed, or is currently ++ * prevented. ++ * ++ * @kbdev: Device pointer ++ * ++ * As with kbase_reset_gpu_assert_prevented(), but also allow for paths where ++ * reset was not prevented due to a failure, yet we still need to execute the ++ * cleanup code following. ++ * ++ * Cleanup code following this call must handle any inconsistent state modified ++ * by the failed GPU reset, and must timeout any blocking operations instead of ++ * waiting forever. ++ */ ++void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev); + -+ kbdev->mmu_mode->entry_set_pte(&pte, target_pgd); -+ parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte( -+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, parent_index, pte); -+ kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1); -+ kunmap(parent_page); ++/** ++ * RESET_FLAGS_NONE - Flags for kbase_prepare_to_reset_gpu ++ */ ++#define RESET_FLAGS_NONE (0U) + -+ if (parent_index != insert_level) { -+ /* Newly allocated PGDs */ -+ kbase_mmu_sync_pgd_cpu( -+ kbdev, kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)), -+ sizeof(u64)); -+ } else { -+ /* A new valid entry is added to an existing PGD. Perform the -+ * invalidate operation for GPU cache as it could be having a -+ * cacheline that contains the entry (in an invalid form). -+ */ -+ kbase_mmu_sync_pgd( -+ kbdev, mmut->kctx, parent_pgd + (parent_vpfn * sizeof(u64)), -+ kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)), -+ sizeof(u64), KBASE_MMU_OP_FLUSH_PT); -+ } ++/* This reset should be treated as an unrecoverable error by HW counter logic */ ++#define RESET_FLAGS_HWC_UNRECOVERABLE_ERROR ((unsigned int)(1 << 0)) + -+ /* Update the new target_pgd page to its stable state */ -+ if (kbase_page_migration_enabled) { -+ struct kbase_page_metadata *page_md = -+ kbase_page_private(phys_to_page(target_pgd)); ++/** ++ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU. ++ * @kbdev: Device pointer ++ * @flags: Bitfield indicating impact of reset (see flag defines) ++ * ++ * Caller is expected to hold the kbdev->hwaccess_lock. ++ * ++ * Return: a boolean which should be interpreted as follows: ++ * - true - Prepared for reset, kbase_reset_gpu should be called. ++ * - false - Another thread is performing a reset, kbase_reset_gpu should ++ * not be called. ++ */ ++bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, ++ unsigned int flags); + -+ spin_lock(&page_md->migrate_lock); ++/** ++ * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU. ++ * @kbdev: Device pointer ++ * @flags: Bitfield indicating impact of reset (see flag defines) ++ * ++ * Return: a boolean which should be interpreted as follows: ++ * - true - Prepared for reset, kbase_reset_gpu should be called. ++ * - false - Another thread is performing a reset, kbase_reset_gpu should ++ * not be called. ++ */ ++bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags); + -+ WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS || -+ IS_PAGE_ISOLATED(page_md->status)); ++/** ++ * kbase_reset_gpu - Reset the GPU ++ * @kbdev: Device pointer ++ * ++ * This function should be called after kbase_prepare_to_reset_gpu if it returns ++ * true. It should never be called without a corresponding call to ++ * kbase_prepare_to_reset_gpu (only on Job Manager GPUs). ++ * ++ * After this function is called the caller should call kbase_reset_gpu_wait() ++ * to know when the reset has completed. ++ */ ++void kbase_reset_gpu(struct kbase_device *kbdev); + -+ if (mmut->kctx) { -+ page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED); -+ page_md->data.pt_mapped.mmut = mmut; -+ page_md->data.pt_mapped.pgd_vpfn_level = -+ PGD_VPFN_LEVEL_SET(insert_vpfn, parent_index); -+ } else { -+ page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE); -+ } ++/** ++ * kbase_reset_gpu_locked - Reset the GPU ++ * @kbdev: Device pointer ++ * ++ * This function should be called after kbase_prepare_to_reset_gpu_locked if it ++ * returns true. It should never be called without a corresponding call to ++ * kbase_prepare_to_reset_gpu (only on Job Manager GPUs). ++ * Caller is expected to hold the kbdev->hwaccess_lock. ++ * ++ * After this function is called, the caller should call kbase_reset_gpu_wait() ++ * to know when the reset has completed. ++ */ ++void kbase_reset_gpu_locked(struct kbase_device *kbdev); + -+ spin_unlock(&page_md->migrate_lock); -+ } -+ } ++/** ++ * kbase_reset_gpu_silent - Reset the GPU silently ++ * @kbdev: Device pointer ++ * ++ * Reset the GPU without trying to cancel jobs (applicable to Job Manager GPUs) ++ * and don't emit messages into the kernel log while doing the reset. ++ * ++ * This function should be used in cases where we are doing a controlled reset ++ * of the GPU as part of normal processing (e.g. exiting protected mode) where ++ * the driver will have ensured the scheduler has been idled and all other ++ * users of the GPU (e.g. instrumentation) have been suspended. ++ * ++ * Return: 0 if the reset was started successfully ++ * -EAGAIN if another reset is currently in progress ++ */ ++int kbase_reset_gpu_silent(struct kbase_device *kbdev); + -+ return 0; ++/** ++ * kbase_reset_gpu_is_active - Reports if the GPU is being reset ++ * @kbdev: Device pointer ++ * ++ * Any changes made to the HW when this returns true may be lost, overwritten ++ * or corrupted. ++ * ++ * Note that unless appropriate locks are held when using this function, the ++ * state could change immediately afterwards. ++ * ++ * Return: True if the GPU is in the process of being reset. ++ */ ++bool kbase_reset_gpu_is_active(struct kbase_device *kbdev); + -+failure_recovery: -+ /* Cleanup PTEs from PGDs. The Parent PGD in the loop above is just "PGD" here */ -+ for (; pgd_index < cur_level; pgd_index++) { -+ phys_addr_t pgd = pgds_to_insert[pgd_index]; -+ struct page *pgd_page = pfn_to_page(PFN_DOWN(pgd)); -+ u64 *pgd_page_va = kmap(pgd_page); -+ u64 vpfn = (insert_vpfn >> ((3 - pgd_index) * 9)) & 0x1FF; ++/** ++ * kbase_reset_gpu_not_pending - Reports if the GPU reset isn't pending ++ * ++ * @kbdev: Device pointer ++ * ++ * Note that unless appropriate locks are held when using this function, the ++ * state could change immediately afterwards. ++ * ++ * Return: True if the GPU reset isn't pending. ++ */ ++bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev); + -+ kbdev->mmu_mode->entries_invalidate(&pgd_page_va[vpfn], 1); -+ kunmap(pgd_page); -+ } ++/** ++ * kbase_reset_gpu_wait - Wait for a GPU reset to complete ++ * @kbdev: Device pointer ++ * ++ * This function may wait indefinitely. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++int kbase_reset_gpu_wait(struct kbase_device *kbdev); + -+ return err; -+} ++/** ++ * kbase_reset_gpu_init - Initialize the GPU reset handling mechanism. ++ * ++ * @kbdev: Device pointer ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++int kbase_reset_gpu_init(struct kbase_device *kbdev); + +/** -+ * mmu_insert_alloc_pgds() - allocate memory for PGDs from level_low to -+ * level_high (inclusive) ++ * kbase_reset_gpu_term - Terminate the GPU reset handling mechanism. + * -+ * @kbdev: Device pointer. -+ * @mmut: GPU MMU page table. -+ * @level_low: The lower bound for the levels for which the PGD allocs are required -+ * @level_high: The higher bound for the levels for which the PGD allocs are required -+ * @new_pgds: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) to write the -+ * newly allocated PGD addresses to. ++ * @kbdev: Device pointer ++ */ ++void kbase_reset_gpu_term(struct kbase_device *kbdev); ++ ++#endif +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_smc.c b/drivers/gpu/arm/bifrost/mali_kbase_smc.c +new file mode 100644 +index 000000000..abbe8d56d +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_smc.c +@@ -0,0 +1,90 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * -+ * Numerically, level_low < level_high, not to be confused with top level and -+ * bottom level concepts for MMU PGDs. They are only used as low and high bounds -+ * in an incrementing for-loop. ++ * (C) COPYRIGHT 2015, 2018, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: -+ * * 0 - OK -+ * * -ENOMEM - allocation failed for a PGD. + */ -+static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, -+ phys_addr_t *new_pgds, int level_low, int level_high) -+{ -+ int err = 0; -+ int i; + -+ lockdep_assert_held(&mmut->mmu_lock); ++#if IS_ENABLED(CONFIG_ARM64) + -+ for (i = level_low; i <= level_high; i++) { -+ do { -+ new_pgds[i] = kbase_mmu_alloc_pgd(kbdev, mmut); -+ if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS) -+ break; ++#include ++#include + -+ mutex_unlock(&mmut->mmu_lock); -+ err = kbase_mem_pool_grow(&kbdev->mem_pools.small[mmut->group_id], -+ level_high, NULL); -+ mutex_lock(&mmut->mmu_lock); -+ if (err) { -+ dev_err(kbdev->dev, "%s: kbase_mem_pool_grow() returned error %d", -+ __func__, err); ++#include + -+ /* Free all PGDs allocated in previous successful iterations -+ * from (i-1) to level_low -+ */ -+ for (i = (i - 1); i >= level_low; i--) { -+ if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS) -+ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[i]); -+ } ++/* __asmeq is not available on Kernel versions >= 4.20 */ ++#ifndef __asmeq ++/* ++ * This is used to ensure the compiler did actually allocate the register we ++ * asked it for some inline assembly sequences. Apparently we can't trust the ++ * compiler from one version to another so a bit of paranoia won't hurt. This ++ * string is meant to be concatenated with the inline asm string and will ++ * cause compilation to stop on mismatch. (for details, see gcc PR 15089) ++ */ ++#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t" ++#endif + -+ return err; -+ } -+ } while (1); -+ } ++static noinline u64 invoke_smc_fid(u64 function_id, ++ u64 arg0, u64 arg1, u64 arg2) ++{ ++ register u64 x0 asm("x0") = function_id; ++ register u64 x1 asm("x1") = arg0; ++ register u64 x2 asm("x2") = arg1; ++ register u64 x3 asm("x3") = arg2; + -+ return 0; ++ asm volatile( ++ __asmeq("%0", "x0") ++ __asmeq("%1", "x1") ++ __asmeq("%2", "x2") ++ __asmeq("%3", "x3") ++ "smc #0\n" ++ : "+r" (x0) ++ : "r" (x1), "r" (x2), "r" (x3)); ++ ++ return x0; +} + -+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn, -+ struct tagged_addr phys, size_t nr, unsigned long flags, -+ int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, -+ bool ignore_page_migration) ++u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2) +{ -+ phys_addr_t pgd; -+ u64 *pgd_page; -+ u64 insert_vpfn = start_vpfn; -+ size_t remain = nr; -+ int err; -+ struct kbase_device *kbdev; -+ u64 dirty_pgds = 0; -+ unsigned int i; -+ phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; -+ enum kbase_mmu_op_type flush_op; -+ struct kbase_mmu_table *mmut = &kctx->mmu; -+ int l, cur_level, insert_level; ++ /* Is fast call (bit 31 set) */ ++ KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL); ++ /* bits 16-23 must be zero for fast calls */ ++ KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0); + -+ if (WARN_ON(kctx == NULL)) -+ return -EINVAL; ++ return invoke_smc_fid(fid, arg0, arg1, arg2); ++} + -+ /* 64-bit address range is the max */ -+ KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE)); ++u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, ++ u64 arg0, u64 arg1, u64 arg2) ++{ ++ u32 fid = 0; + -+ kbdev = kctx->kbdev; ++ /* Only the six bits allowed should be used. */ ++ KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0); + -+ /* Early out if there is nothing to do */ -+ if (nr == 0) -+ return 0; ++ fid |= SMC_FAST_CALL; /* Bit 31: Fast call */ ++ if (smc64) ++ fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */ ++ fid |= oen; /* Bit 29:24: OEN */ ++ /* Bit 23:16: Must be zero for fast calls */ ++ fid |= (function_number); /* Bit 15:0: function number */ + -+ /* If page migration is enabled, pages involved in multiple GPU mappings -+ * are always treated as not movable. -+ */ -+ if (kbase_page_migration_enabled && !ignore_page_migration) { -+ struct page *phys_page = as_page(phys); -+ struct kbase_page_metadata *page_md = kbase_page_private(phys_page); ++ return kbase_invoke_smc_fid(fid, arg0, arg1, arg2); ++} + -+ if (page_md) { -+ spin_lock(&page_md->migrate_lock); -+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); -+ spin_unlock(&page_md->migrate_lock); -+ } -+ } ++#endif /* CONFIG_ARM64 */ + -+ mutex_lock(&mmut->mmu_lock); +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_smc.h b/drivers/gpu/arm/bifrost/mali_kbase_smc.h +new file mode 100644 +index 000000000..40a348388 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_smc.h +@@ -0,0 +1,69 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2015, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ while (remain) { -+ unsigned int vindex = insert_vpfn & 0x1FF; -+ unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex; -+ struct page *p; -+ register unsigned int num_of_valid_entries; -+ bool newly_created_pgd = false; ++#ifndef _KBASE_SMC_H_ ++#define _KBASE_SMC_H_ + -+ if (count > remain) -+ count = remain; ++#if IS_ENABLED(CONFIG_ARM64) + -+ cur_level = MIDGARD_MMU_BOTTOMLEVEL; -+ insert_level = cur_level; ++#include + -+ /* -+ * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly -+ * suboptimal. We don't have to re-parse the whole tree -+ * each time (just cache the l0-l2 sequence). -+ * On the other hand, it's only a gain when we map more than -+ * 256 pages at once (on average). Do we really care? -+ */ -+ /* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */ -+ err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level, -+ &pgd); ++#define SMC_FAST_CALL (1 << 31) ++#define SMC_64 (1 << 30) + -+ if (err) { -+ dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d", -+ __func__, err); -+ goto fail_unlock; -+ } ++#define SMC_OEN_OFFSET 24 ++#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */ ++#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET) ++#define SMC_OEN_STD (4 << SMC_OEN_OFFSET) + -+ /* No valid pgd at cur_level */ -+ if (insert_level != cur_level) { -+ /* Allocate new pgds for all missing levels from the required level -+ * down to the lowest valid pgd at insert_level -+ */ -+ err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1), -+ cur_level); -+ if (err) -+ goto fail_unlock; + -+ newly_created_pgd = true; ++/** ++ * kbase_invoke_smc_fid - Perform a secure monitor call ++ * @fid: The SMC function to call, see SMC Calling convention. ++ * @arg0: First argument to the SMC. ++ * @arg1: Second argument to the SMC. ++ * @arg2: Third argument to the SMC. ++ * ++ * See SMC Calling Convention for details. ++ * ++ * Return: the return value from the SMC. ++ */ ++u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2); + -+ new_pgds[insert_level] = pgd; ++/** ++ * kbase_invoke_smc - Perform a secure monitor call ++ * @oen: Owning Entity number (SIP, STD etc). ++ * @function_number: The function number within the OEN. ++ * @smc64: use SMC64 calling convention instead of SMC32. ++ * @arg0: First argument to the SMC. ++ * @arg1: Second argument to the SMC. ++ * @arg2: Third argument to the SMC. ++ * ++ * See SMC Calling Convention for details. ++ * ++ * Return: the return value from the SMC call. ++ */ ++u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, ++ u64 arg0, u64 arg1, u64 arg2); + -+ /* If we didn't find an existing valid pgd at cur_level, -+ * we've now allocated one. The ATE in the next step should -+ * be inserted in this newly allocated pgd. -+ */ -+ pgd = new_pgds[cur_level]; -+ } ++#endif /* CONFIG_ARM64 */ + -+ p = pfn_to_page(PFN_DOWN(pgd)); -+ pgd_page = kmap(p); -+ if (!pgd_page) { -+ dev_err(kbdev->dev, "%s: kmap failure", __func__); -+ err = -ENOMEM; ++#endif /* _KBASE_SMC_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c +new file mode 100644 +index 000000000..a9312a0c4 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c +@@ -0,0 +1,1770 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ goto fail_unlock_free_pgds; -+ } ++#include + -+ num_of_valid_entries = -+ kbdev->mmu_mode->get_num_valid_entries(pgd_page); ++#include ++#include ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++#include ++#endif ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ for (i = 0; i < count; i++) { -+ unsigned int ofs = vindex + i; ++#if !MALI_USE_CSF ++/** ++ * DOC: This file implements the logic behind software only jobs that are ++ * executed within the driver rather than being handed over to the GPU. ++ */ + -+ /* Fail if the current page is a valid ATE entry */ -+ KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL)); ++static void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom) ++{ ++ struct kbase_context *kctx = katom->kctx; ++ unsigned long lflags; + -+ pgd_page[ofs] = kbase_mmu_create_ate(kbdev, -+ phys, flags, MIDGARD_MMU_BOTTOMLEVEL, group_id); -+ } ++ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); ++ list_add_tail(&katom->queue, &kctx->waiting_soft_jobs); ++ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); ++} + -+ kbdev->mmu_mode->set_num_valid_entries( -+ pgd_page, num_of_valid_entries + count); ++void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom) ++{ ++ struct kbase_context *kctx = katom->kctx; ++ unsigned long lflags; + -+ dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : MIDGARD_MMU_BOTTOMLEVEL); ++ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); ++ list_del(&katom->queue); ++ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); ++} + -+ /* MMU cache flush operation here will depend on whether bottom level -+ * PGD is newly created or not. -+ * -+ * If bottom level PGD is newly created then no GPU cache maintenance is -+ * required as the PGD will not exist in GPU cache. Otherwise GPU cache -+ * maintenance is required for existing PGD. -+ */ -+ flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT; ++static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom) ++{ ++ struct kbase_context *kctx = katom->kctx; + -+ kbase_mmu_sync_pgd(kbdev, kctx, pgd + (vindex * sizeof(u64)), -+ kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64), -+ flush_op); ++ /* Record the start time of this atom so we could cancel it at ++ * the right time. ++ */ ++ katom->start_timestamp = ktime_get_raw(); + -+ if (newly_created_pgd) { -+ err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn, -+ new_pgds); -+ if (err) { -+ dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)", -+ __func__, err); ++ /* Add the atom to the waiting list before the timer is ++ * (re)started to make sure that it gets processed. ++ */ ++ kbasep_add_waiting_soft_job(katom); + -+ kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count); ++ /* Schedule timeout of this atom after a period if it is not active */ ++ if (!timer_pending(&kctx->soft_job_timeout)) { ++ int timeout_ms = atomic_read( ++ &kctx->kbdev->js_data.soft_job_timeout_ms); ++ mod_timer(&kctx->soft_job_timeout, ++ jiffies + msecs_to_jiffies(timeout_ms)); ++ } ++} + -+ kunmap(p); -+ goto fail_unlock_free_pgds; -+ } -+ } ++static int kbasep_read_soft_event_status( ++ struct kbase_context *kctx, u64 evt, unsigned char *status) ++{ ++ unsigned char *mapped_evt; ++ struct kbase_vmap_struct map; + -+ insert_vpfn += count; -+ remain -= count; -+ kunmap(p); -+ } ++ mapped_evt = kbase_vmap_prot(kctx, evt, sizeof(*mapped_evt), ++ KBASE_REG_CPU_RD, &map); ++ if (!mapped_evt) ++ return -EFAULT; + -+ mutex_unlock(&mmut->mmu_lock); ++ *status = *mapped_evt; + -+ mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info, -+ false); ++ kbase_vunmap(kctx, &map); + + return 0; ++} + -+fail_unlock_free_pgds: -+ /* Free the pgds allocated by us from insert_level+1 to bottom level */ -+ for (l = cur_level; l > insert_level; l--) -+ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]); ++static int kbasep_write_soft_event_status( ++ struct kbase_context *kctx, u64 evt, unsigned char new_status) ++{ ++ unsigned char *mapped_evt; ++ struct kbase_vmap_struct map; + -+fail_unlock: -+ if (insert_vpfn != start_vpfn) { -+ /* Invalidate the pages we have partially completed */ -+ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, &dirty_pgds, -+ NULL, true); -+ } ++ if ((new_status != BASE_JD_SOFT_EVENT_SET) && ++ (new_status != BASE_JD_SOFT_EVENT_RESET)) ++ return -EINVAL; + -+ mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info, -+ true); -+ kbase_mmu_free_pgds_list(kbdev, mmut); -+ mutex_unlock(&mmut->mmu_lock); ++ mapped_evt = kbase_vmap_prot(kctx, evt, sizeof(*mapped_evt), ++ KBASE_REG_CPU_WR, &map); ++ if (!mapped_evt) ++ return -EFAULT; + -+ return err; -+} ++ *mapped_evt = new_status; + -+int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn, -+ struct tagged_addr phys, size_t nr, unsigned long flags, -+ int const group_id, -+ enum kbase_caller_mmu_sync_info mmu_sync_info) -+{ -+ /* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */ -+ return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info, -+ false); -+} ++ kbase_vunmap(kctx, &map); + -+int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn, -+ struct tagged_addr phys, size_t nr, unsigned long flags, -+ int const group_id, -+ enum kbase_caller_mmu_sync_info mmu_sync_info) -+{ -+ /* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */ -+ return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info, -+ false); ++ return 0; +} + -+static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys, -+ struct kbase_va_region *reg, -+ struct kbase_mmu_table *mmut, const u64 vpfn) ++static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) +{ -+ struct page *phys_page = as_page(phys); -+ struct kbase_page_metadata *page_md = kbase_page_private(phys_page); ++ struct kbase_vmap_struct map; ++ void *user_result; ++ struct timespec64 ts; ++ struct base_dump_cpu_gpu_counters data; ++ u64 system_time = 0ULL; ++ u64 cycle_counter; ++ u64 jc = katom->jc; ++ struct kbase_context *kctx = katom->kctx; ++ int pm_active_err; + -+ spin_lock(&page_md->migrate_lock); ++ memset(&data, 0, sizeof(data)); + -+ /* If no GPU va region is given: the metadata provided are -+ * invalid. -+ * -+ * If the page is already allocated and mapped: this is -+ * an additional GPU mapping, probably to create a memory -+ * alias, which means it is no longer possible to migrate -+ * the page easily because tracking all the GPU mappings -+ * would be too costly. -+ * -+ * In any case: the page becomes not movable. It is kept -+ * alive, but attempts to migrate it will fail. The page -+ * will be freed if it is still not movable when it returns -+ * to a memory pool. Notice that the movable flag is not -+ * cleared because that would require taking the page lock. ++ /* Take the PM active reference as late as possible - otherwise, it could ++ * delay suspend until we process the atom (which may be at the end of a ++ * long chain of dependencies + */ -+ if (!reg || PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATED_MAPPED) { -+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); -+ } else if (PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATE_IN_PROGRESS) { -+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATED_MAPPED); -+ page_md->data.mapped.reg = reg; -+ page_md->data.mapped.mmut = mmut; -+ page_md->data.mapped.vpfn = vpfn; ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ atomic_inc(&kctx->kbdev->pm.gpu_users_waiting); ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); ++ if (pm_active_err) { ++ struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; ++ ++ /* We're suspended - queue this on the list of suspended jobs ++ * Use dep_item[1], because dep_item[0] was previously in use ++ * for 'waiting_soft_jobs'. ++ */ ++ mutex_lock(&js_devdata->runpool_mutex); ++ list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ ++ /* Also adding this to the list of waiting soft job */ ++ kbasep_add_waiting_soft_job(katom); ++ ++ return pm_active_err; + } ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ else ++ atomic_dec(&kctx->kbdev->pm.gpu_users_waiting); ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + -+ spin_unlock(&page_md->migrate_lock); -+} ++ kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time, ++ &ts); + -+static void kbase_mmu_progress_migration_on_teardown(struct kbase_device *kbdev, -+ struct tagged_addr *phys, size_t requested_nr) -+{ -+ size_t i; ++ kbase_pm_context_idle(kctx->kbdev); + -+ for (i = 0; i < requested_nr; i++) { -+ struct page *phys_page = as_page(phys[i]); -+ struct kbase_page_metadata *page_md = kbase_page_private(phys_page); ++ data.sec = ts.tv_sec; ++ data.usec = ts.tv_nsec / 1000; ++ data.system_time = system_time; ++ data.cycle_counter = cycle_counter; + -+ /* Skip the 4KB page that is part of a large page, as the large page is -+ * excluded from the migration process. -+ */ -+ if (is_huge(phys[i]) || is_partial(phys[i])) -+ continue; ++ /* Assume this atom will be cancelled until we know otherwise */ ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + -+ if (page_md) { -+ u8 status; ++ /* GPU_WR access is checked on the range for returning the result to ++ * userspace for the following reasons: ++ * - security, this is currently how imported user bufs are checked. ++ * - userspace ddk guaranteed to assume region was mapped as GPU_WR ++ */ ++ user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map); ++ if (!user_result) ++ return 0; + -+ spin_lock(&page_md->migrate_lock); -+ status = PAGE_STATUS_GET(page_md->status); ++ memcpy(user_result, &data, sizeof(data)); + -+ if (status == ALLOCATED_MAPPED) { -+ if (IS_PAGE_ISOLATED(page_md->status)) { -+ page_md->status = PAGE_STATUS_SET( -+ page_md->status, (u8)FREE_ISOLATED_IN_PROGRESS); -+ page_md->data.free_isolated.kbdev = kbdev; -+ /* At this point, we still have a reference -+ * to the page via its page migration metadata, -+ * and any page with the FREE_ISOLATED_IN_PROGRESS -+ * status will subsequently be freed in either -+ * kbase_page_migrate() or kbase_page_putback() -+ */ -+ phys[i] = as_tagged(0); -+ } else -+ page_md->status = PAGE_STATUS_SET(page_md->status, -+ (u8)FREE_IN_PROGRESS); -+ } ++ kbase_vunmap(kctx, &map); + -+ spin_unlock(&page_md->migrate_lock); -+ } -+ } ++ /* Atom was fine - mark it as done */ ++ katom->event_code = BASE_JD_EVENT_DONE; ++ ++ return 0; +} + -+u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, -+ struct tagged_addr const phy, unsigned long const flags, -+ int const level, int const group_id) ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++/* Called by the explicit fence mechanism when a fence wait has completed */ ++void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom) +{ -+ u64 entry; ++ struct kbase_context *kctx = katom->kctx; + -+ kbdev->mmu_mode->entry_set_ate(&entry, phy, flags, level); -+ return kbdev->mgm_dev->ops.mgm_update_gpu_pte(kbdev->mgm_dev, -+ group_id, level, entry); ++ mutex_lock(&kctx->jctx.lock); ++ kbasep_remove_waiting_soft_job(katom); ++ kbase_finish_soft_job(katom); ++ if (kbase_jd_done_nolock(katom, true)) ++ kbase_js_sched_all(kctx->kbdev); ++ mutex_unlock(&kctx->jctx.lock); +} ++#endif + -+int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, -+ const u64 start_vpfn, struct tagged_addr *phys, size_t nr, -+ unsigned long flags, int const group_id, u64 *dirty_pgds, -+ struct kbase_va_region *reg, bool ignore_page_migration) ++static void kbasep_soft_event_complete_job(struct work_struct *work) +{ -+ phys_addr_t pgd; -+ u64 *pgd_page; -+ u64 insert_vpfn = start_vpfn; -+ size_t remain = nr; -+ int err; -+ struct kbase_mmu_mode const *mmu_mode; -+ unsigned int i; -+ phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; -+ int l, cur_level, insert_level; -+ -+ /* Note that 0 is a valid start_vpfn */ -+ /* 64-bit address range is the max */ -+ KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE)); -+ -+ mmu_mode = kbdev->mmu_mode; -+ -+ /* Early out if there is nothing to do */ -+ if (nr == 0) -+ return 0; -+ -+ mutex_lock(&mmut->mmu_lock); ++ struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, ++ work); ++ struct kbase_context *kctx = katom->kctx; ++ int resched; + -+ while (remain) { -+ unsigned int vindex = insert_vpfn & 0x1FF; -+ unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex; -+ struct page *p; -+ register unsigned int num_of_valid_entries; -+ bool newly_created_pgd = false; -+ enum kbase_mmu_op_type flush_op; ++ mutex_lock(&kctx->jctx.lock); ++ resched = kbase_jd_done_nolock(katom, true); ++ mutex_unlock(&kctx->jctx.lock); + -+ if (count > remain) -+ count = remain; ++ if (resched) ++ kbase_js_sched_all(kctx->kbdev); ++} + -+ if (!vindex && is_huge_head(*phys)) -+ cur_level = MIDGARD_MMU_LEVEL(2); -+ else -+ cur_level = MIDGARD_MMU_BOTTOMLEVEL; ++void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt) ++{ ++ int cancel_timer = 1; ++ struct list_head *entry, *tmp; ++ unsigned long lflags; + -+ insert_level = cur_level; ++ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); ++ list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { ++ struct kbase_jd_atom *katom = list_entry( ++ entry, struct kbase_jd_atom, queue); + -+ /* -+ * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly -+ * suboptimal. We don't have to re-parse the whole tree -+ * each time (just cache the l0-l2 sequence). -+ * On the other hand, it's only a gain when we map more than -+ * 256 pages at once (on average). Do we really care? -+ */ -+ /* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */ -+ err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level, -+ &pgd); ++ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { ++ case BASE_JD_REQ_SOFT_EVENT_WAIT: ++ if (katom->jc == evt) { ++ list_del(&katom->queue); + -+ if (err) { -+ dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d", -+ __func__, err); -+ goto fail_unlock; ++ katom->event_code = BASE_JD_EVENT_DONE; ++ INIT_WORK(&katom->work, ++ kbasep_soft_event_complete_job); ++ queue_work(kctx->jctx.job_done_wq, ++ &katom->work); ++ } else { ++ /* There are still other waiting jobs, we cannot ++ * cancel the timer yet. ++ */ ++ cancel_timer = 0; ++ } ++ break; ++#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG ++ case BASE_JD_REQ_SOFT_FENCE_WAIT: ++ /* Keep the timer running if fence debug is enabled and ++ * there are waiting fence jobs. ++ */ ++ cancel_timer = 0; ++ break; ++#endif + } ++ } + -+ /* No valid pgd at cur_level */ -+ if (insert_level != cur_level) { -+ /* Allocate new pgds for all missing levels from the required level -+ * down to the lowest valid pgd at insert_level -+ */ -+ err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1), -+ cur_level); -+ if (err) -+ goto fail_unlock; ++ if (cancel_timer) ++ del_timer(&kctx->soft_job_timeout); ++ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); ++} + -+ newly_created_pgd = true; ++#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG ++static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom) ++{ ++ struct kbase_context *kctx = katom->kctx; ++ struct device *dev = kctx->kbdev->dev; ++ int i; + -+ new_pgds[insert_level] = pgd; ++ for (i = 0; i < 2; i++) { ++ struct kbase_jd_atom *dep; + -+ /* If we didn't find an existing valid pgd at cur_level, -+ * we've now allocated one. The ATE in the next step should -+ * be inserted in this newly allocated pgd. -+ */ -+ pgd = new_pgds[cur_level]; -+ } ++ list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) { ++ if (dep->status == KBASE_JD_ATOM_STATE_UNUSED || ++ dep->status == KBASE_JD_ATOM_STATE_COMPLETED) ++ continue; + -+ p = pfn_to_page(PFN_DOWN(pgd)); -+ pgd_page = kmap(p); -+ if (!pgd_page) { -+ dev_err(kbdev->dev, "%s: kmap failure", __func__); -+ err = -ENOMEM; ++ if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ++ == BASE_JD_REQ_SOFT_FENCE_TRIGGER) { ++ /* Found blocked trigger fence. */ ++ struct kbase_sync_fence_info info; + -+ goto fail_unlock_free_pgds; -+ } ++ if (!kbase_sync_fence_in_info_get(dep, &info)) { ++ dev_warn(dev, ++ "\tVictim trigger atom %d fence [%pK] %s: %s\n", ++ kbase_jd_atom_id(kctx, dep), ++ info.fence, ++ info.name, ++ kbase_sync_status_string(info.status)); ++ } ++ } + -+ num_of_valid_entries = -+ mmu_mode->get_num_valid_entries(pgd_page); ++ kbase_fence_debug_check_atom(dep); ++ } ++ } ++} + -+ if (cur_level == MIDGARD_MMU_LEVEL(2)) { -+ int level_index = (insert_vpfn >> 9) & 0x1FF; -+ pgd_page[level_index] = -+ kbase_mmu_create_ate(kbdev, *phys, flags, cur_level, group_id); ++static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom) ++{ ++ struct kbase_context *kctx = katom->kctx; ++ struct device *dev = katom->kctx->kbdev->dev; ++ int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms); ++ unsigned long lflags; ++ struct kbase_sync_fence_info info; + -+ num_of_valid_entries++; -+ } else { -+ for (i = 0; i < count; i++) { -+ unsigned int ofs = vindex + i; -+ u64 *target = &pgd_page[ofs]; ++ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + -+ /* Warn if the current page is a valid ATE -+ * entry. The page table shouldn't have anything -+ * in the place where we are trying to put a -+ * new entry. Modification to page table entries -+ * should be performed with -+ * kbase_mmu_update_pages() -+ */ -+ WARN_ON((*target & 1UL) != 0); ++ if (kbase_sync_fence_in_info_get(katom, &info)) { ++ /* Fence must have signaled just after timeout. */ ++ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); ++ return; ++ } + -+ *target = kbase_mmu_create_ate(kbdev, -+ phys[i], flags, cur_level, group_id); ++ dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%pK] after %dms\n", ++ kctx->tgid, kctx->id, ++ kbase_jd_atom_id(kctx, katom), ++ info.fence, timeout_ms); ++ dev_warn(dev, "\tGuilty fence [%pK] %s: %s\n", ++ info.fence, info.name, ++ kbase_sync_status_string(info.status)); + -+ /* If page migration is enabled, this is the right time -+ * to update the status of the page. -+ */ -+ if (kbase_page_migration_enabled && !ignore_page_migration && -+ !is_huge(phys[i]) && !is_partial(phys[i])) -+ kbase_mmu_progress_migration_on_insert(phys[i], reg, mmut, -+ insert_vpfn + i); -+ } -+ num_of_valid_entries += count; -+ } ++ /* Search for blocked trigger atoms */ ++ kbase_fence_debug_check_atom(katom); + -+ mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries); ++ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); + -+ if (dirty_pgds) -+ *dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : cur_level); ++ kbase_sync_fence_in_dump(katom); ++} + -+ /* MMU cache flush operation here will depend on whether bottom level -+ * PGD is newly created or not. -+ * -+ * If bottom level PGD is newly created then no GPU cache maintenance is -+ * required as the PGD will not exist in GPU cache. Otherwise GPU cache -+ * maintenance is required for existing PGD. -+ */ -+ flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT; ++struct kbase_fence_debug_work { ++ struct kbase_jd_atom *katom; ++ struct work_struct work; ++}; + -+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (vindex * sizeof(u64)), -+ kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64), -+ flush_op); ++static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work) ++{ ++ struct kbase_fence_debug_work *w = container_of(work, ++ struct kbase_fence_debug_work, work); ++ struct kbase_jd_atom *katom = w->katom; ++ struct kbase_context *kctx = katom->kctx; + -+ if (newly_created_pgd) { -+ err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn, -+ new_pgds); -+ if (err) { -+ dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)", -+ __func__, err); ++ mutex_lock(&kctx->jctx.lock); ++ kbase_fence_debug_wait_timeout(katom); ++ mutex_unlock(&kctx->jctx.lock); + -+ kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count); ++ kfree(w); ++} + -+ kunmap(p); -+ goto fail_unlock_free_pgds; -+ } -+ } ++static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom) ++{ ++ struct kbase_fence_debug_work *work; ++ struct kbase_context *kctx = katom->kctx; + -+ phys += count; -+ insert_vpfn += count; -+ remain -= count; -+ kunmap(p); ++ /* Enqueue fence debug worker. Use job_done_wq to get ++ * debug print ordered with job completion. ++ */ ++ work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC); ++ /* Ignore allocation failure. */ ++ if (work) { ++ work->katom = katom; ++ INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker); ++ queue_work(kctx->jctx.job_done_wq, &work->work); + } ++} ++#endif /* CONFIG_MALI_BIFROST_FENCE_DEBUG */ + -+ mutex_unlock(&mmut->mmu_lock); ++void kbasep_soft_job_timeout_worker(struct timer_list *timer) ++{ ++ struct kbase_context *kctx = container_of(timer, struct kbase_context, ++ soft_job_timeout); ++ u32 timeout_ms = (u32)atomic_read( ++ &kctx->kbdev->js_data.soft_job_timeout_ms); ++ ktime_t cur_time = ktime_get_raw(); ++ bool restarting = false; ++ unsigned long lflags; ++ struct list_head *entry, *tmp; + -+ return 0; ++ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); ++ list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { ++ struct kbase_jd_atom *katom = list_entry(entry, ++ struct kbase_jd_atom, queue); ++ s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time, ++ katom->start_timestamp)); + -+fail_unlock_free_pgds: -+ /* Free the pgds allocated by us from insert_level+1 to bottom level */ -+ for (l = cur_level; l > insert_level; l--) -+ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]); ++ if (elapsed_time < (s64)timeout_ms) { ++ restarting = true; ++ continue; ++ } + -+fail_unlock: -+ if (insert_vpfn != start_vpfn) { -+ /* Invalidate the pages we have partially completed */ -+ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, dirty_pgds, -+ phys, ignore_page_migration); -+ } ++ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { ++ case BASE_JD_REQ_SOFT_EVENT_WAIT: ++ /* Take it out of the list to ensure that it ++ * will be cancelled in all cases ++ */ ++ list_del(&katom->queue); + -+ mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, -+ dirty_pgds ? *dirty_pgds : 0xF, CALLER_MMU_ASYNC, true); -+ kbase_mmu_free_pgds_list(kbdev, mmut); -+ mutex_unlock(&mmut->mmu_lock); ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ INIT_WORK(&katom->work, kbasep_soft_event_complete_job); ++ queue_work(kctx->jctx.job_done_wq, &katom->work); ++ break; ++#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG ++ case BASE_JD_REQ_SOFT_FENCE_WAIT: ++ kbase_fence_debug_timeout(katom); ++ break; ++#endif ++ } ++ } + -+ return err; ++ if (restarting) ++ mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms)); ++ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} + -+/* -+ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space -+ * number 'as_nr'. -+ */ -+int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, -+ struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr, -+ int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, -+ struct kbase_va_region *reg, bool ignore_page_migration) ++static int kbasep_soft_event_wait(struct kbase_jd_atom *katom) +{ -+ int err; -+ u64 dirty_pgds = 0; ++ struct kbase_context *kctx = katom->kctx; ++ unsigned char status; + -+ /* Early out if there is nothing to do */ -+ if (nr == 0) ++ /* The status of this soft-job is stored in jc */ ++ if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) { ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + return 0; ++ } + -+ err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, -+ &dirty_pgds, reg, ignore_page_migration); -+ if (err) -+ return err; ++ if (status == BASE_JD_SOFT_EVENT_SET) ++ return 0; /* Event already set, nothing to do */ + -+ mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false); ++ kbasep_add_waiting_with_timeout(katom); + -+ return 0; ++ return 1; +} + -+KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); -+ -+int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, -+ u64 vpfn, struct tagged_addr *phys, size_t nr, -+ unsigned long flags, int as_nr, int const group_id, -+ enum kbase_caller_mmu_sync_info mmu_sync_info, -+ struct kbase_va_region *reg) ++static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom, ++ unsigned char new_status) +{ -+ int err; -+ u64 dirty_pgds = 0; -+ -+ /* Early out if there is nothing to do */ -+ if (nr == 0) -+ return 0; -+ -+ /* Imported allocations don't have metadata and therefore always ignore the -+ * page migration logic. -+ */ -+ err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, -+ &dirty_pgds, reg, true); -+ if (err) -+ return err; ++ /* Complete jobs waiting on the same event */ ++ struct kbase_context *kctx = katom->kctx; + -+ mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false); ++ if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) { ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ return; ++ } + -+ return 0; ++ if (new_status == BASE_JD_SOFT_EVENT_SET) ++ kbasep_complete_triggered_soft_events(kctx, katom->jc); +} + -+int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, -+ u64 vpfn, struct tagged_addr *phys, size_t nr, -+ unsigned long flags, int as_nr, int const group_id, -+ enum kbase_caller_mmu_sync_info mmu_sync_info, -+ struct kbase_va_region *reg) ++/** ++ * kbase_soft_event_update() - Update soft event state ++ * @kctx: Pointer to context ++ * @event: Event to update ++ * @new_status: New status value of event ++ * ++ * Update the event, and wake up any atoms waiting for the event. ++ * ++ * Return: 0 on success, a negative error code on failure. ++ */ ++int kbase_soft_event_update(struct kbase_context *kctx, ++ u64 event, ++ unsigned char new_status) +{ -+ int err; -+ u64 dirty_pgds = 0; ++ int err = 0; + -+ /* Early out if there is nothing to do */ -+ if (nr == 0) -+ return 0; ++ mutex_lock(&kctx->jctx.lock); + -+ /* Memory aliases are always built on top of existing allocations, -+ * therefore the state of physical pages shall be updated. -+ */ -+ err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, -+ &dirty_pgds, reg, false); -+ if (err) -+ return err; ++ if (kbasep_write_soft_event_status(kctx, event, new_status)) { ++ err = -ENOENT; ++ goto out; ++ } + -+ mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false); ++ if (new_status == BASE_JD_SOFT_EVENT_SET) ++ kbasep_complete_triggered_soft_events(kctx, event); + -+ return 0; ++out: ++ mutex_unlock(&kctx->jctx.lock); ++ ++ return err; +} + -+void kbase_mmu_update(struct kbase_device *kbdev, -+ struct kbase_mmu_table *mmut, -+ int as_nr) ++static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ lockdep_assert_held(&kbdev->mmu_hw_mutex); -+ KBASE_DEBUG_ASSERT(as_nr != KBASEP_AS_NR_INVALID); -+ -+ kbdev->mmu_mode->update(kbdev, mmut, as_nr); ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ if (kbase_jd_done_nolock(katom, true)) ++ kbase_js_sched_all(katom->kctx->kbdev); +} -+KBASE_EXPORT_TEST_API(kbase_mmu_update); + -+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) ++#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST ++static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ lockdep_assert_held(&kbdev->mmu_hw_mutex); ++ struct kbase_debug_copy_buffer *buffers = katom->softjob_data; ++ unsigned int i; ++ unsigned int nr = katom->nr_extres; + -+ kbdev->mmu_mode->disable_as(kbdev, as_nr); -+} ++ if (!buffers) ++ return; + -+void kbase_mmu_disable(struct kbase_context *kctx) -+{ -+ /* Calls to this function are inherently asynchronous, with respect to -+ * MMU operations. -+ */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct kbase_mmu_hw_op_param op_param = { 0 }; -+ int lock_err, flush_err; ++ kbase_gpu_vm_lock(katom->kctx); ++ for (i = 0; i < nr; i++) { ++ int p; ++ struct kbase_mem_phy_alloc *gpu_alloc = buffers[i].gpu_alloc; + -+ /* ASSERT that the context has a valid as_nr, which is only the case -+ * when it's scheduled in. -+ * -+ * as_nr won't change because the caller has the hwaccess_lock -+ */ -+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); ++ if (!buffers[i].pages) ++ break; ++ for (p = 0; p < buffers[i].nr_pages; p++) { ++ struct page *pg = buffers[i].pages[p]; + -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); ++ if (pg) ++ put_page(pg); ++ } ++ if (buffers[i].is_vmalloc) ++ vfree(buffers[i].pages); ++ else ++ kfree(buffers[i].pages); ++ if (gpu_alloc) { ++ switch (gpu_alloc->type) { ++ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: ++ { ++ kbase_free_user_buffer(&buffers[i]); ++ break; ++ } ++ default: ++ /* Nothing to be done. */ ++ break; ++ } ++ kbase_mem_phy_alloc_put(gpu_alloc); ++ } ++ } ++ kbase_gpu_vm_unlock(katom->kctx); ++ kfree(buffers); + -+ op_param.vpfn = 0; -+ op_param.nr = ~0; -+ op_param.op = KBASE_MMU_OP_FLUSH_MEM; -+ op_param.kctx_id = kctx->id; -+ op_param.mmu_sync_info = mmu_sync_info; ++ katom->softjob_data = NULL; ++} + -+#if MALI_USE_CSF -+ /* 0xF value used to prevent skipping of any levels when flushing */ -+ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) -+ op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF); -+#endif ++static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) ++{ ++ struct kbase_debug_copy_buffer *buffers; ++ struct base_jd_debug_copy_buffer *user_buffers = NULL; ++ unsigned int i; ++ unsigned int nr = katom->nr_extres; ++ int ret = 0; ++ void __user *user_structs = (void __user *)(uintptr_t)katom->jc; + -+ /* lock MMU to prevent existing jobs on GPU from executing while the AS is -+ * not yet disabled -+ */ -+ lock_err = kbase_mmu_hw_do_lock(kbdev, &kbdev->as[kctx->as_nr], &op_param); -+ if (lock_err) -+ dev_err(kbdev->dev, "Failed to lock AS %d for ctx %d_%d", kctx->as_nr, kctx->tgid, -+ kctx->id); ++ if (!user_structs) ++ return -EINVAL; + -+ /* Issue the flush command only when L2 cache is in stable power on state. -+ * Any other state for L2 cache implies that shader cores are powered off, -+ * which in turn implies there is no execution happening on the GPU. -+ */ -+ if (kbdev->pm.backend.l2_state == KBASE_L2_ON) { -+ flush_err = kbase_gpu_cache_flush_and_busy_wait(kbdev, -+ GPU_COMMAND_CACHE_CLN_INV_L2_LSC); -+ if (flush_err) -+ dev_err(kbdev->dev, -+ "Failed to flush GPU cache when disabling AS %d for ctx %d_%d", -+ kctx->as_nr, kctx->tgid, kctx->id); ++ buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL); ++ if (!buffers) { ++ ret = -ENOMEM; ++ goto out_cleanup; + } -+ kbdev->mmu_mode->disable_as(kbdev, kctx->as_nr); ++ katom->softjob_data = buffers; + -+ if (!lock_err) { -+ /* unlock the MMU to allow it to resume */ -+ lock_err = -+ kbase_mmu_hw_do_unlock_no_addr(kbdev, &kbdev->as[kctx->as_nr], &op_param); -+ if (lock_err) -+ dev_err(kbdev->dev, "Failed to unlock AS %d for ctx %d_%d", kctx->as_nr, -+ kctx->tgid, kctx->id); ++ user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL); ++ ++ if (!user_buffers) { ++ ret = -ENOMEM; ++ goto out_cleanup; + } + -+#if !MALI_USE_CSF -+ /* -+ * JM GPUs has some L1 read only caches that need to be invalidated -+ * with START_FLUSH configuration. Purge the MMU disabled kctx from -+ * the slot_rb tracking field so such invalidation is performed when -+ * a new katom is executed on the affected slots. -+ */ -+ kbase_backend_slot_kctx_purge_locked(kbdev, kctx); -+#endif -+} -+KBASE_EXPORT_TEST_API(kbase_mmu_disable); ++ ret = copy_from_user(user_buffers, user_structs, ++ sizeof(*user_buffers)*nr); ++ if (ret) { ++ ret = -EFAULT; ++ goto out_cleanup; ++ } + -+static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, -+ struct kbase_mmu_table *mmut, phys_addr_t *pgds, -+ u64 vpfn, int level, -+ enum kbase_mmu_op_type flush_op, u64 *dirty_pgds) -+{ -+ int current_level; ++ for (i = 0; i < nr; i++) { ++ u64 addr = user_buffers[i].address; ++ u64 page_addr = addr & PAGE_MASK; ++ u64 end_page_addr = addr + user_buffers[i].size - 1; ++ u64 last_page_addr = end_page_addr & PAGE_MASK; ++ int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1; ++ int pinned_pages; ++ struct kbase_va_region *reg; ++ struct base_external_resource user_extres; + -+ lockdep_assert_held(&mmut->mmu_lock); ++ if (!addr) ++ continue; + -+ for (current_level = level - 1; current_level >= MIDGARD_MMU_LEVEL(0); -+ current_level--) { -+ phys_addr_t current_pgd = pgds[current_level]; -+ struct page *p = phys_to_page(current_pgd); -+ u64 *current_page = kmap(p); -+ unsigned int current_valid_entries = -+ kbdev->mmu_mode->get_num_valid_entries(current_page); -+ int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FF; ++ if (last_page_addr < page_addr) { ++ ret = -EINVAL; ++ goto out_cleanup; ++ } + -+ /* We need to track every level that needs updating */ -+ if (dirty_pgds) -+ *dirty_pgds |= 1ULL << current_level; ++ buffers[i].nr_pages = nr_pages; ++ buffers[i].offset = addr & ~PAGE_MASK; ++ if (buffers[i].offset >= PAGE_SIZE) { ++ ret = -EINVAL; ++ goto out_cleanup; ++ } ++ buffers[i].size = user_buffers[i].size; + -+ kbdev->mmu_mode->entries_invalidate(¤t_page[index], 1); -+ if (current_valid_entries == 1 && -+ current_level != MIDGARD_MMU_LEVEL(0)) { -+ kunmap(p); ++ if (nr_pages > (KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD / ++ sizeof(struct page *))) { ++ buffers[i].is_vmalloc = true; ++ buffers[i].pages = vzalloc(nr_pages * ++ sizeof(struct page *)); ++ } else { ++ buffers[i].is_vmalloc = false; ++ buffers[i].pages = kcalloc(nr_pages, ++ sizeof(struct page *), GFP_KERNEL); ++ } + -+ /* Ensure the cacheline containing the last valid entry -+ * of PGD is invalidated from the GPU cache, before the -+ * PGD page is freed. ++ if (!buffers[i].pages) { ++ ret = -ENOMEM; ++ goto out_cleanup; ++ } ++ ++ pinned_pages = get_user_pages_fast(page_addr, ++ nr_pages, ++ 1, /* Write */ ++ buffers[i].pages); ++ if (pinned_pages < 0) { ++ /* get_user_pages_fast has failed - page array is not ++ * valid. Don't try to release any pages. + */ -+ kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, -+ current_pgd + (index * sizeof(u64)), -+ sizeof(u64), flush_op); ++ buffers[i].nr_pages = 0; + -+ kbase_mmu_add_to_free_pgds_list(mmut, p); -+ } else { -+ current_valid_entries--; ++ ret = pinned_pages; ++ goto out_cleanup; ++ } ++ if (pinned_pages != nr_pages) { ++ /* Adjust number of pages, so that we only attempt to ++ * release pages in the array that we know are valid. ++ */ ++ buffers[i].nr_pages = pinned_pages; + -+ kbdev->mmu_mode->set_num_valid_entries( -+ current_page, current_valid_entries); ++ ret = -EINVAL; ++ goto out_cleanup; ++ } + -+ kunmap(p); ++ user_extres = user_buffers[i].extres; ++ if (user_extres.ext_resource == 0ULL) { ++ ret = -EINVAL; ++ goto out_cleanup; ++ } + -+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)), -+ kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64), -+ flush_op); -+ break; ++ kbase_gpu_vm_lock(katom->kctx); ++ reg = kbase_region_tracker_find_region_enclosing_address( ++ katom->kctx, user_extres.ext_resource & ++ ~BASE_EXT_RES_ACCESS_EXCLUSIVE); ++ ++ if (kbase_is_region_invalid_or_free(reg) || ++ reg->gpu_alloc == NULL) { ++ ret = -EINVAL; ++ goto out_unlock; + } -+ } -+} + -+/** -+ * mmu_flush_invalidate_teardown_pages() - Perform flush operation after unmapping pages. -+ * -+ * @kbdev: Pointer to kbase device. -+ * @kctx: Pointer to kbase context. -+ * @as_nr: Address space number, for GPU cache maintenance operations -+ * that happen outside a specific kbase context. -+ * @phys: Array of physical pages to flush. -+ * @phys_page_nr: Number of physical pages to flush. -+ * @op_param: Non-NULL pointer to struct containing information about the flush -+ * operation to perform. -+ * -+ * This function will do one of three things: -+ * 1. Invalidate the MMU caches, followed by a partial GPU cache flush of the -+ * individual pages that were unmapped if feature is supported on GPU. -+ * 2. Perform a full GPU cache flush through the GPU_CONTROL interface if feature is -+ * supported on GPU or, -+ * 3. Perform a full GPU cache flush through the MMU_CONTROL interface. -+ * -+ * When performing a partial GPU cache flush, the number of physical -+ * pages does not have to be identical to the number of virtual pages on the MMU, -+ * to support a single physical address flush for an aliased page. -+ */ -+static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, -+ struct kbase_context *kctx, int as_nr, -+ struct tagged_addr *phys, size_t phys_page_nr, -+ struct kbase_mmu_hw_op_param *op_param) -+{ -+ if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { -+ /* Full cache flush through the MMU_COMMAND */ -+ mmu_flush_invalidate(kbdev, kctx, as_nr, op_param); -+ } else if (op_param->op == KBASE_MMU_OP_FLUSH_MEM) { -+ /* Full cache flush through the GPU_CONTROL */ -+ mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, op_param); -+ } -+#if MALI_USE_CSF -+ else { -+ /* Partial GPU cache flush with MMU cache invalidation */ -+ unsigned long irq_flags; -+ unsigned int i; -+ bool flush_done = false; ++ buffers[i].gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); ++ buffers[i].nr_extres_pages = reg->nr_pages; + -+ mmu_invalidate(kbdev, kctx, as_nr, op_param); ++ if (reg->nr_pages*PAGE_SIZE != buffers[i].size) ++ dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n"); + -+ for (i = 0; !flush_done && i < phys_page_nr; i++) { -+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); -+ if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) -+ mmu_flush_pa_range(kbdev, as_phys_addr_t(phys[i]), PAGE_SIZE, -+ KBASE_MMU_OP_FLUSH_MEM); -+ else -+ flush_done = true; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++ switch (reg->gpu_alloc->type) { ++ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: ++ { ++ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; ++ const unsigned long nr_pages = alloc->imported.user_buf.nr_pages; ++ const unsigned long start = alloc->imported.user_buf.address; ++ ++ if (alloc->imported.user_buf.mm != current->mm) { ++ ret = -EINVAL; ++ goto out_unlock; ++ } ++ buffers[i].extres_pages = kcalloc(nr_pages, ++ sizeof(struct page *), GFP_KERNEL); ++ if (!buffers[i].extres_pages) { ++ ret = -ENOMEM; ++ goto out_unlock; ++ } ++ kbase_gpu_vm_unlock(katom->kctx); ++ ret = get_user_pages_fast(start, nr_pages, 0, buffers[i].extres_pages); ++ kbase_gpu_vm_lock(katom->kctx); ++ if (ret != nr_pages) { ++ /* Adjust number of pages, so that we only ++ * attempt to release pages in the array that we ++ * know are valid. ++ */ ++ if (ret < 0) ++ buffers[i].nr_extres_pages = 0; ++ else ++ buffers[i].nr_extres_pages = ret; ++ ++ goto out_unlock; ++ } ++ ret = 0; ++ break; ++ } ++ default: ++ /* Nothing to be done. */ ++ break; + } ++ kbase_gpu_vm_unlock(katom->kctx); + } -+#endif ++ kfree(user_buffers); ++ ++ return ret; ++ ++out_unlock: ++ kbase_gpu_vm_unlock(katom->kctx); ++ ++out_cleanup: ++ /* Frees allocated memory for kbase_debug_copy_job struct, including ++ * members, and sets jc to 0 ++ */ ++ kbase_debug_copy_finish(katom); ++ kfree(user_buffers); ++ ++ return ret; +} + -+static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, -+ u64 vpfn, size_t nr, u64 *dirty_pgds, -+ struct list_head *free_pgds_list, -+ enum kbase_mmu_op_type flush_op) ++#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE ++static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, ++ unsigned long page_num, struct page **page) +{ -+ struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode; ++ struct sg_table *sgt = gpu_alloc->imported.umm.sgt; ++ struct sg_page_iter sg_iter; ++ unsigned long page_index = 0; + -+ lockdep_assert_held(&mmut->mmu_lock); -+ kbase_mmu_reset_free_pgds_list(mmut); ++ if (WARN_ON(gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)) ++ return NULL; + -+ while (nr) { -+ unsigned int index = vpfn & 0x1FF; -+ unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; -+ unsigned int pcount; -+ int level; -+ u64 *page; -+ phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; -+ register unsigned int num_of_valid_entries; -+ phys_addr_t pgd = mmut->pgd; -+ struct page *p = phys_to_page(pgd); ++ if (!sgt) ++ return NULL; + -+ if (count > nr) -+ count = nr; ++ if (WARN_ON(page_num >= gpu_alloc->nents)) ++ return NULL; + -+ /* need to check if this is a 2MB page or a 4kB */ -+ for (level = MIDGARD_MMU_TOPLEVEL; -+ level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { -+ phys_addr_t next_pgd; ++ for_each_sg_page(sgt->sgl, &sg_iter, sgt->nents, 0) { ++ if (page_index == page_num) { ++ *page = sg_page_iter_page(&sg_iter); + -+ index = (vpfn >> ((3 - level) * 9)) & 0x1FF; -+ page = kmap(p); -+ if (mmu_mode->ate_is_valid(page[index], level)) -+ break; /* keep the mapping */ -+ else if (!mmu_mode->pte_is_valid(page[index], level)) { -+ /* nothing here, advance */ -+ switch (level) { -+ case MIDGARD_MMU_LEVEL(0): -+ count = 134217728; -+ break; -+ case MIDGARD_MMU_LEVEL(1): -+ count = 262144; -+ break; -+ case MIDGARD_MMU_LEVEL(2): -+ count = 512; -+ break; -+ case MIDGARD_MMU_LEVEL(3): -+ count = 1; -+ break; -+ } -+ if (count > nr) -+ count = nr; -+ goto next; -+ } -+ next_pgd = mmu_mode->pte_to_phy_addr( -+ kbdev->mgm_dev->ops.mgm_pte_to_original_pte( -+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[index])); -+ kunmap(p); -+ pgds[level] = pgd; -+ pgd = next_pgd; -+ p = phys_to_page(pgd); ++ return kmap(*page); + } ++ page_index++; ++ } + -+ switch (level) { -+ case MIDGARD_MMU_LEVEL(0): -+ case MIDGARD_MMU_LEVEL(1): -+ dev_warn(kbdev->dev, "%s: No support for ATEs at level %d", __func__, -+ level); -+ kunmap(p); -+ goto out; -+ case MIDGARD_MMU_LEVEL(2): -+ /* can only teardown if count >= 512 */ -+ if (count >= 512) { -+ pcount = 1; -+ } else { -+ dev_warn( -+ kbdev->dev, -+ "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down", -+ __func__, count); -+ pcount = 0; -+ } -+ break; -+ case MIDGARD_MMU_BOTTOMLEVEL: -+ /* page count is the same as the logical count */ -+ pcount = count; -+ break; -+ default: -+ dev_err(kbdev->dev, "%s: found non-mapped memory, early out", __func__); -+ vpfn += count; -+ nr -= count; -+ continue; -+ } ++ return NULL; ++} ++#endif + -+ if (pcount > 0) -+ *dirty_pgds |= 1ULL << level; ++/** ++ * kbase_mem_copy_from_extres() - Copy from external resources. ++ * ++ * @kctx: kbase context within which the copying is to take place. ++ * @buf_data: Pointer to the information about external resources: ++ * pages pertaining to the external resource, number of ++ * pages to copy. ++ * ++ * Return: 0 on success, error code otherwise. ++ */ ++static int kbase_mem_copy_from_extres(struct kbase_context *kctx, ++ struct kbase_debug_copy_buffer *buf_data) ++{ ++ unsigned int i; ++ unsigned int target_page_nr = 0; ++ struct page **pages = buf_data->pages; ++ u64 offset = buf_data->offset; ++ size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE; ++ size_t to_copy = min(extres_size, buf_data->size); ++ struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc; ++ int ret = 0; ++ size_t dma_to_copy; + -+ num_of_valid_entries = mmu_mode->get_num_valid_entries(page); -+ if (WARN_ON_ONCE(num_of_valid_entries < pcount)) -+ num_of_valid_entries = 0; -+ else -+ num_of_valid_entries -= pcount; ++ KBASE_DEBUG_ASSERT(pages != NULL); + -+ /* Invalidate the entries we added */ -+ mmu_mode->entries_invalidate(&page[index], pcount); ++ kbase_gpu_vm_lock(kctx); ++ if (!gpu_alloc) { ++ ret = -EINVAL; ++ goto out_unlock; ++ } + -+ if (!num_of_valid_entries) { -+ kunmap(p); ++ switch (gpu_alloc->type) { ++ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: ++ { ++ for (i = 0; i < buf_data->nr_extres_pages && ++ target_page_nr < buf_data->nr_pages; i++) { ++ struct page *pg = buf_data->extres_pages[i]; ++ void *extres_page = kmap(pg); + -+ /* Ensure the cacheline(s) containing the last valid entries -+ * of PGD is invalidated from the GPU cache, before the -+ * PGD page is freed. -+ */ -+ kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, -+ pgd + (index * sizeof(u64)), -+ pcount * sizeof(u64), flush_op); ++ if (extres_page) { ++ ret = kbase_mem_copy_to_pinned_user_pages( ++ pages, extres_page, &to_copy, ++ buf_data->nr_pages, ++ &target_page_nr, offset); ++ kunmap(pg); ++ if (ret) ++ goto out_unlock; ++ } ++ } ++ } ++ break; ++ case KBASE_MEM_TYPE_IMPORTED_UMM: { ++ struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf; + -+ kbase_mmu_add_to_free_pgds_list(mmut, p); ++ KBASE_DEBUG_ASSERT(dma_buf != NULL); ++ if (dma_buf->size > buf_data->nr_extres_pages * PAGE_SIZE) ++ dev_warn(kctx->kbdev->dev, "External resources buffer size mismatch"); + -+ kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, -+ flush_op, dirty_pgds); ++ dma_to_copy = min(dma_buf->size, ++ (size_t)(buf_data->nr_extres_pages * PAGE_SIZE)); ++ ret = dma_buf_begin_cpu_access(dma_buf, DMA_FROM_DEVICE); ++ if (ret) ++ goto out_unlock; + -+ vpfn += count; -+ nr -= count; -+ continue; ++ for (i = 0; i < dma_to_copy/PAGE_SIZE && ++ target_page_nr < buf_data->nr_pages; i++) { ++#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE ++ struct page *pg; ++ void *extres_page = dma_buf_kmap_page(gpu_alloc, i, &pg); ++#else ++ void *extres_page = dma_buf_kmap(dma_buf, i); ++#endif ++ if (extres_page) { ++ ret = kbase_mem_copy_to_pinned_user_pages( ++ pages, extres_page, &to_copy, ++ buf_data->nr_pages, ++ &target_page_nr, offset); ++ ++#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE ++ kunmap(pg); ++#else ++ dma_buf_kunmap(dma_buf, i, extres_page); ++#endif ++ if (ret) ++ break; ++ } + } ++ dma_buf_end_cpu_access(dma_buf, DMA_FROM_DEVICE); ++ break; ++ } ++ default: ++ ret = -EINVAL; ++ } ++out_unlock: ++ kbase_gpu_vm_unlock(kctx); ++ return ret; ++} + -+ mmu_mode->set_num_valid_entries(page, num_of_valid_entries); ++static int kbase_debug_copy(struct kbase_jd_atom *katom) ++{ ++ struct kbase_debug_copy_buffer *buffers = katom->softjob_data; ++ unsigned int i; + -+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), -+ kbase_dma_addr(p) + (index * sizeof(u64)), pcount * sizeof(u64), -+ flush_op); -+next: -+ kunmap(p); -+ vpfn += count; -+ nr -= count; ++ if (WARN_ON(!buffers)) ++ return -EINVAL; ++ ++ for (i = 0; i < katom->nr_extres; i++) { ++ int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]); ++ ++ if (res) ++ return res; + } -+out: ++ + return 0; +} ++#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ ++#endif /* !MALI_USE_CSF */ + -+int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, -+ struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages, -+ int as_nr, bool ignore_page_migration) -+{ -+ u64 start_vpfn = vpfn; -+ enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; -+ struct kbase_mmu_hw_op_param op_param; -+ int err = -EFAULT; -+ u64 dirty_pgds = 0; -+ LIST_HEAD(free_pgds_list); ++#define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7) + -+ /* Calls to this function are inherently asynchronous, with respect to -+ * MMU operations. -+ */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; ++int kbasep_jit_alloc_validate(struct kbase_context *kctx, ++ struct base_jit_alloc_info *info) ++{ ++ int j; ++ /* If the ID is zero, then fail the job */ ++ if (info->id == 0) ++ return -EINVAL; + -+ /* This function performs two operations: MMU maintenance and flushing -+ * the caches. To ensure internal consistency between the caches and the -+ * MMU, it does not make sense to be able to flush only the physical pages -+ * from the cache and keep the PTE, nor does it make sense to use this -+ * function to remove a PTE and keep the physical pages in the cache. -+ * -+ * However, we have legitimate cases where we can try to tear down a mapping -+ * with zero virtual and zero physical pages, so we must have the following -+ * behaviour: -+ * - if both physical and virtual page counts are zero, return early -+ * - if either physical and virtual page counts are zero, return early -+ * - if there are fewer physical pages than virtual pages, return -EINVAL -+ */ -+ if (unlikely(nr_virt_pages == 0 || nr_phys_pages == 0)) -+ return 0; ++ /* Sanity check that the PA fits within the VA */ ++ if (info->va_pages < info->commit_pages) ++ return -EINVAL; + -+ if (unlikely(nr_virt_pages < nr_phys_pages)) ++ /* Ensure the GPU address is correctly aligned */ ++ if ((info->gpu_alloc_addr & KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT) != 0) + return -EINVAL; + -+ /* MMU cache flush strategy depends on the number of pages to unmap. In both cases -+ * the operation is invalidate but the granularity of cache maintenance may change -+ * according to the situation. -+ * -+ * If GPU control command operations are present and the number of pages is "small", -+ * then the optimal strategy is flushing on the physical address range of the pages -+ * which are affected by the operation. That implies both the PGDs which are modified -+ * or removed from the page table and the physical pages which are freed from memory. ++ /* Interface version 2 (introduced with kernel driver version 11.5) ++ * onward has padding and a flags member to validate. + * -+ * Otherwise, there's no alternative to invalidating the whole GPU cache. ++ * Note: To support earlier versions the extra bytes will have been set ++ * to 0 by the caller. + */ -+ if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys && -+ nr_phys_pages <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES) -+ flush_op = KBASE_MMU_OP_FLUSH_PT; + -+ mutex_lock(&mmut->mmu_lock); ++ /* Check padding is all zeroed */ ++ for (j = 0; j < sizeof(info->padding); j++) { ++ if (info->padding[j] != 0) ++ return -EINVAL; ++ } + -+ err = kbase_mmu_teardown_pgd_pages(kbdev, mmut, vpfn, nr_virt_pages, &dirty_pgds, -+ &free_pgds_list, flush_op); ++ /* Only valid flags shall be set */ ++ if (info->flags & ~(BASE_JIT_ALLOC_VALID_FLAGS)) ++ return -EINVAL; + -+ /* Set up MMU operation parameters. See above about MMU cache flush strategy. */ -+ op_param = (struct kbase_mmu_hw_op_param){ -+ .vpfn = start_vpfn, -+ .nr = nr_virt_pages, -+ .mmu_sync_info = mmu_sync_info, -+ .kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF, -+ .op = (flush_op == KBASE_MMU_OP_FLUSH_PT) ? KBASE_MMU_OP_FLUSH_PT : -+ KBASE_MMU_OP_FLUSH_MEM, -+ .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds), -+ }; -+ mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, nr_phys_pages, -+ &op_param); ++#if !MALI_JIT_PRESSURE_LIMIT_BASE ++ /* If just-in-time memory allocation pressure limit feature is disabled, ++ * heap_info_gpu_addr must be zeroed-out ++ */ ++ if (info->heap_info_gpu_addr) ++ return -EINVAL; ++#endif + -+ /* If page migration is enabled: the status of all physical pages involved -+ * shall be updated, unless they are not movable. Their status shall be -+ * updated before releasing the lock to protect against concurrent -+ * requests to migrate the pages, if they have been isolated. ++#if !MALI_USE_CSF ++ /* If BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE is set, heap_info_gpu_addr ++ * cannot be 0 + */ -+ if (kbase_page_migration_enabled && phys && !ignore_page_migration) -+ kbase_mmu_progress_migration_on_teardown(kbdev, phys, nr_phys_pages); ++ if ((info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) && ++ !info->heap_info_gpu_addr) ++ return -EINVAL; ++#endif /* !MALI_USE_CSF */ + -+ kbase_mmu_free_pgds_list(kbdev, mmut); -+ -+ mutex_unlock(&mmut->mmu_lock); -+ -+ return err; ++ return 0; +} -+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); -+ -+/** -+ * kbase_mmu_update_pages_no_flush() - Update phy pages and attributes data in GPU -+ * page table entries -+ * -+ * @kbdev: Pointer to kbase device. -+ * @mmut: The involved MMU table -+ * @vpfn: Virtual PFN (Page Frame Number) of the first page to update -+ * @phys: Pointer to the array of tagged physical addresses of the physical -+ * pages that are pointed to by the page table entries (that need to -+ * be updated). The pointer should be within the reg->gpu_alloc->pages -+ * array. -+ * @nr: Number of pages to update -+ * @flags: Flags -+ * @group_id: The physical memory group in which the page was allocated. -+ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). -+ * @dirty_pgds: Flags to track every level where a PGD has been updated. -+ * -+ * This will update page table entries that already exist on the GPU based on -+ * new flags and replace any existing phy pages that are passed (the PGD pages -+ * remain unchanged). It is used as a response to the changes of phys as well -+ * as the the memory attributes. -+ * -+ * The caller is responsible for validating the memory attributes. -+ * -+ * Return: 0 if the attributes data in page table entries were updated -+ * successfully, otherwise an error code. -+ */ -+static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, -+ u64 vpfn, struct tagged_addr *phys, size_t nr, -+ unsigned long flags, int const group_id, u64 *dirty_pgds) -+{ -+ phys_addr_t pgd; -+ u64 *pgd_page; -+ int err; -+ -+ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); -+ -+ /* Early out if there is nothing to do */ -+ if (nr == 0) -+ return 0; + -+ mutex_lock(&mmut->mmu_lock); -+ -+ while (nr) { -+ unsigned int i; -+ unsigned int index = vpfn & 0x1FF; -+ size_t count = KBASE_MMU_PAGE_ENTRIES - index; -+ struct page *p; -+ register unsigned int num_of_valid_entries; -+ int cur_level = MIDGARD_MMU_BOTTOMLEVEL; -+ -+ if (count > nr) -+ count = nr; ++#if !MALI_USE_CSF + -+ if (is_huge(*phys) && (index == index_in_large_page(*phys))) -+ cur_level = MIDGARD_MMU_LEVEL(2); ++static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) ++{ ++ __user u8 *data = (__user u8 *)(uintptr_t) katom->jc; ++ struct base_jit_alloc_info *info; ++ struct kbase_context *kctx = katom->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ u32 count; ++ int ret; ++ u32 i; + -+ err = mmu_get_pgd_at_level(kbdev, mmut, vpfn, cur_level, &pgd); -+ if (WARN_ON(err)) -+ goto fail_unlock; ++ if (!kbase_mem_allow_alloc(kctx)) { ++ dev_dbg(kbdev->dev, "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d", ++ current->comm, current->pid, kctx->tgid, kctx->id); ++ ret = -EINVAL; ++ goto fail; ++ } + -+ p = pfn_to_page(PFN_DOWN(pgd)); -+ pgd_page = kmap(p); -+ if (!pgd_page) { -+ dev_warn(kbdev->dev, "kmap failure on update_pages"); -+ err = -ENOMEM; -+ goto fail_unlock; -+ } ++ /* For backwards compatibility, and to prevent reading more than 1 jit ++ * info struct on jit version 1 ++ */ ++ if (katom->nr_extres == 0) ++ katom->nr_extres = 1; ++ count = katom->nr_extres; + -+ num_of_valid_entries = -+ kbdev->mmu_mode->get_num_valid_entries(pgd_page); ++ /* Sanity checks */ ++ if (!data || count > kctx->jit_max_allocations || ++ count > ARRAY_SIZE(kctx->jit_alloc)) { ++ ret = -EINVAL; ++ goto fail; ++ } + -+ if (cur_level == MIDGARD_MMU_LEVEL(2)) { -+ int level_index = (vpfn >> 9) & 0x1FF; -+ struct tagged_addr *target_phys = -+ phys - index_in_large_page(*phys); ++ /* Copy the information for safe access and future storage */ ++ info = kmalloc_array(count, sizeof(*info), GFP_KERNEL); ++ if (!info) { ++ ret = -ENOMEM; ++ goto fail; ++ } + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid( -+ pgd_page[level_index], MIDGARD_MMU_LEVEL(2))); -+#endif -+ pgd_page[level_index] = kbase_mmu_create_ate(kbdev, -+ *target_phys, flags, MIDGARD_MMU_LEVEL(2), -+ group_id); -+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (level_index * sizeof(u64)), -+ kbase_dma_addr(p) + (level_index * sizeof(u64)), -+ sizeof(u64), KBASE_MMU_OP_NONE); -+ } else { -+ for (i = 0; i < count; i++) { -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid( -+ pgd_page[index + i], -+ MIDGARD_MMU_BOTTOMLEVEL)); -+#endif -+ pgd_page[index + i] = kbase_mmu_create_ate(kbdev, -+ phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL, -+ group_id); -+ } ++ katom->softjob_data = info; + -+ /* MMU cache flush strategy is NONE because GPU cache maintenance -+ * will be done by the caller. -+ */ -+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), -+ kbase_dma_addr(p) + (index * sizeof(u64)), -+ count * sizeof(u64), KBASE_MMU_OP_NONE); ++ for (i = 0; i < count; i++, info++, data += sizeof(*info)) { ++ if (copy_from_user(info, data, sizeof(*info)) != 0) { ++ ret = -EINVAL; ++ goto free_info; + } + -+ kbdev->mmu_mode->set_num_valid_entries(pgd_page, -+ num_of_valid_entries); -+ -+ if (dirty_pgds && count > 0) -+ *dirty_pgds |= 1ULL << cur_level; -+ -+ phys += count; -+ vpfn += count; -+ nr -= count; -+ -+ kunmap(p); ++ ret = kbasep_jit_alloc_validate(kctx, info); ++ if (ret) ++ goto free_info; ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO( ++ kbdev, katom, info->va_pages, info->commit_pages, ++ info->extension, info->id, info->bin_id, ++ info->max_allocations, info->flags, info->usage_id); + } + -+ mutex_unlock(&mmut->mmu_lock); -+ return 0; ++ katom->jit_blocked = false; + -+fail_unlock: -+ mutex_unlock(&mmut->mmu_lock); -+ return err; -+} ++ lockdep_assert_held(&kctx->jctx.lock); ++ list_add_tail(&katom->jit_node, &kctx->jctx.jit_atoms_head); + -+static int kbase_mmu_update_pages_common(struct kbase_device *kbdev, struct kbase_context *kctx, -+ u64 vpfn, struct tagged_addr *phys, size_t nr, -+ unsigned long flags, int const group_id) -+{ -+ int err; -+ struct kbase_mmu_hw_op_param op_param; -+ u64 dirty_pgds = 0; -+ struct kbase_mmu_table *mmut; -+ /* Calls to this function are inherently asynchronous, with respect to -+ * MMU operations. ++ /* ++ * Note: ++ * The provided info->gpu_alloc_addr isn't validated here as ++ * userland can cache allocations which means that even ++ * though the region is valid it doesn't represent the ++ * same thing it used to. ++ * ++ * Complete validation of va_pages, commit_pages and extension ++ * isn't done here as it will be done during the call to ++ * kbase_mem_alloc. + */ -+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; -+ int as_nr; -+ -+#if !MALI_USE_CSF -+ if (unlikely(kctx == NULL)) -+ return -EINVAL; -+ -+ as_nr = kctx->as_nr; -+ mmut = &kctx->mmu; -+#else -+ if (kctx) { -+ mmut = &kctx->mmu; -+ as_nr = kctx->as_nr; -+ } else { -+ mmut = &kbdev->csf.mcu_mmu; -+ as_nr = MCU_AS_NR; -+ } -+#endif -+ -+ err = kbase_mmu_update_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, -+ &dirty_pgds); -+ -+ op_param = (const struct kbase_mmu_hw_op_param){ -+ .vpfn = vpfn, -+ .nr = nr, -+ .op = KBASE_MMU_OP_FLUSH_MEM, -+ .kctx_id = kctx ? kctx->id : 0xFFFFFFFF, -+ .mmu_sync_info = mmu_sync_info, -+ .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds), -+ }; -+ -+ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) -+ mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, &op_param); -+ else -+ mmu_flush_invalidate(kbdev, kctx, as_nr, &op_param); ++ return 0; + -+ return err; ++free_info: ++ kfree(katom->softjob_data); ++ katom->softjob_data = NULL; ++fail: ++ return ret; +} + -+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys, -+ size_t nr, unsigned long flags, int const group_id) ++static u8 *kbase_jit_free_get_ids(struct kbase_jd_atom *katom) +{ -+ if (unlikely(kctx == NULL)) -+ return -EINVAL; ++ if (WARN_ON((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) != ++ BASE_JD_REQ_SOFT_JIT_FREE)) ++ return NULL; + -+ return kbase_mmu_update_pages_common(kctx->kbdev, kctx, vpfn, phys, nr, flags, group_id); ++ return (u8 *) katom->softjob_data; +} + -+#if MALI_USE_CSF -+int kbase_mmu_update_csf_mcu_pages(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys, -+ size_t nr, unsigned long flags, int const group_id) ++static void kbase_jit_add_to_pending_alloc_list(struct kbase_jd_atom *katom) +{ -+ return kbase_mmu_update_pages_common(kbdev, NULL, vpfn, phys, nr, flags, group_id); -+} -+#endif /* MALI_USE_CSF */ ++ struct kbase_context *kctx = katom->kctx; ++ struct list_head *target_list_head = NULL; ++ struct kbase_jd_atom *entry; + -+static void mmu_page_migration_transaction_begin(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ list_for_each_entry(entry, &kctx->jctx.jit_pending_alloc, queue) { ++ if (katom->age < entry->age) { ++ target_list_head = &entry->queue; ++ break; ++ } ++ } + -+ WARN_ON_ONCE(kbdev->mmu_page_migrate_in_progress); -+ kbdev->mmu_page_migrate_in_progress = true; -+} ++ if (target_list_head == NULL) ++ target_list_head = &kctx->jctx.jit_pending_alloc; + -+static void mmu_page_migration_transaction_end(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ WARN_ON_ONCE(!kbdev->mmu_page_migrate_in_progress); -+ kbdev->mmu_page_migrate_in_progress = false; -+ /* Invoke the PM state machine, as the MMU page migration session -+ * may have deferred a transition in L2 state machine. -+ */ -+ kbase_pm_update_state(kbdev); ++ list_add_tail(&katom->queue, target_list_head); +} + -+int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys, -+ dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level) ++static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) +{ -+ struct kbase_page_metadata *page_md = kbase_page_private(as_page(old_phys)); -+ struct kbase_mmu_hw_op_param op_param; -+ struct kbase_mmu_table *mmut = (level == MIDGARD_MMU_BOTTOMLEVEL) ? -+ page_md->data.mapped.mmut : -+ page_md->data.pt_mapped.mmut; -+ struct kbase_device *kbdev; -+ phys_addr_t pgd; -+ u64 *old_page, *new_page, *pgd_page, *target, vpfn; -+ int index, check_state, ret = 0; -+ unsigned long hwaccess_flags = 0; -+ unsigned int num_of_valid_entries; -+ u8 vmap_count = 0; -+ -+ /* Due to the hard binding of mmu_command_instr with kctx_id via kbase_mmu_hw_op_param, -+ * here we skip the no kctx case, which is only used with MCU's mmut. -+ */ -+ if (!mmut->kctx) -+ return -EINVAL; -+ -+ if (level > MIDGARD_MMU_BOTTOMLEVEL) -+ return -EINVAL; -+ else if (level == MIDGARD_MMU_BOTTOMLEVEL) -+ vpfn = page_md->data.mapped.vpfn; -+ else -+ vpfn = PGD_VPFN_LEVEL_GET_VPFN(page_md->data.pt_mapped.pgd_vpfn_level); ++ struct kbase_context *kctx = katom->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct base_jit_alloc_info *info; ++ struct kbase_va_region *reg; ++ struct kbase_vmap_struct mapping; ++ u64 *ptr, new_addr; ++ u32 count = katom->nr_extres; ++ u32 i; ++ bool ignore_pressure_limit = false; + -+ kbdev = mmut->kctx->kbdev; -+ index = (vpfn >> ((3 - level) * 9)) & 0x1FF; ++ trace_sysgraph(SGR_SUBMIT, kctx->id, ++ kbase_jd_atom_id(kctx, katom)); + -+ /* Create all mappings before copying content. -+ * This is done as early as possible because is the only operation that may -+ * fail. It is possible to do this before taking any locks because the -+ * pages to migrate are not going to change and even the parent PGD is not -+ * going to be affected by any other concurrent operation, since the page -+ * has been isolated before migration and therefore it cannot disappear in -+ * the middle of this function. -+ */ -+ old_page = kmap(as_page(old_phys)); -+ if (!old_page) { -+ dev_warn(kbdev->dev, "%s: kmap failure for old page.", __func__); -+ ret = -EINVAL; -+ goto old_page_map_error; ++ if (katom->jit_blocked) { ++ list_del(&katom->queue); ++ katom->jit_blocked = false; + } + -+ new_page = kmap(as_page(new_phys)); -+ if (!new_page) { -+ dev_warn(kbdev->dev, "%s: kmap failure for new page.", __func__); -+ ret = -EINVAL; -+ goto new_page_map_error; ++ info = katom->softjob_data; ++ if (WARN_ON(!info)) { ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ return 0; + } + -+ /* GPU cache maintenance affects both memory content and page table, -+ * but at two different stages. A single virtual memory page is affected -+ * by the migration. -+ * -+ * Notice that the MMU maintenance is done in the following steps: -+ * -+ * 1) The MMU region is locked without performing any other operation. -+ * This lock must cover the entire migration process, in order to -+ * prevent any GPU access to the virtual page whose physical page -+ * is being migrated. -+ * 2) Immediately after locking: the MMU region content is flushed via -+ * GPU control while the lock is taken and without unlocking. -+ * The region must stay locked for the duration of the whole page -+ * migration procedure. -+ * This is necessary to make sure that pending writes to the old page -+ * are finalized before copying content to the new page. -+ * 3) Before unlocking: changes to the page table are flushed. -+ * Finer-grained GPU control operations are used if possible, otherwise -+ * the whole GPU cache shall be flushed again. -+ * This is necessary to make sure that the GPU accesses the new page -+ * after migration. -+ * 4) The MMU region is unlocked. -+ */ -+#define PGD_VPFN_MASK(level) (~((((u64)1) << ((3 - level) * 9)) - 1)) -+ op_param.mmu_sync_info = CALLER_MMU_ASYNC; -+ op_param.kctx_id = mmut->kctx->id; -+ op_param.vpfn = vpfn & PGD_VPFN_MASK(level); -+ op_param.nr = 1 << ((3 - level) * 9); -+ op_param.op = KBASE_MMU_OP_FLUSH_PT; -+ /* When level is not MIDGARD_MMU_BOTTOMLEVEL, it is assumed PGD page migration */ -+ op_param.flush_skip_levels = (level == MIDGARD_MMU_BOTTOMLEVEL) ? -+ pgd_level_to_skip_flush(1ULL << level) : -+ pgd_level_to_skip_flush(3ULL << level); -+ -+ mutex_lock(&mmut->mmu_lock); -+ -+ /* The state was evaluated before entering this function, but it could -+ * have changed before the mmu_lock was taken. However, the state -+ * transitions which are possible at this point are only two, and in both -+ * cases it is a stable state progressing to a "free in progress" state. -+ * -+ * After taking the mmu_lock the state can no longer change: read it again -+ * and make sure that it hasn't changed before continuing. -+ */ -+ spin_lock(&page_md->migrate_lock); -+ check_state = PAGE_STATUS_GET(page_md->status); -+ if (level == MIDGARD_MMU_BOTTOMLEVEL) -+ vmap_count = page_md->vmap_count; -+ spin_unlock(&page_md->migrate_lock); -+ -+ if (level == MIDGARD_MMU_BOTTOMLEVEL) { -+ if (check_state != ALLOCATED_MAPPED) { -+ dev_dbg(kbdev->dev, -+ "%s: state changed to %d (was %d), abort page migration", __func__, -+ check_state, ALLOCATED_MAPPED); -+ ret = -EAGAIN; -+ goto page_state_change_out; -+ } else if (vmap_count > 0) { -+ dev_dbg(kbdev->dev, "%s: page was multi-mapped, abort page migration", -+ __func__); -+ ret = -EAGAIN; -+ goto page_state_change_out; -+ } -+ } else { -+ if (check_state != PT_MAPPED) { -+ dev_dbg(kbdev->dev, -+ "%s: state changed to %d (was %d), abort PGD page migration", -+ __func__, check_state, PT_MAPPED); -+ WARN_ON_ONCE(check_state != FREE_PT_ISOLATED_IN_PROGRESS); -+ ret = -EAGAIN; -+ goto page_state_change_out; ++ for (i = 0; i < count; i++, info++) { ++ /* The JIT ID is still in use so fail the allocation */ ++ if (kctx->jit_alloc[info->id]) { ++ katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; ++ return 0; + } + } + -+ ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd); -+ if (ret) { -+ dev_err(kbdev->dev, "%s: failed to find PGD for old page.", __func__); -+ goto get_pgd_at_level_error; -+ } -+ -+ pgd_page = kmap(phys_to_page(pgd)); -+ if (!pgd_page) { -+ dev_warn(kbdev->dev, "%s: kmap failure for PGD page.", __func__); -+ ret = -EINVAL; -+ goto pgd_page_map_error; ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ /* ++ * If this is the only JIT_ALLOC atom in-flight or if JIT pressure limit ++ * is disabled at the context scope, then bypass JIT pressure limit ++ * logic in kbase_jit_allocate(). ++ */ ++ if (!kbase_ctx_flag(kctx, KCTX_JPL_ENABLED) ++ || (kctx->jit_current_allocations == 0)) { ++ ignore_pressure_limit = true; + } ++#else ++ ignore_pressure_limit = true; ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + -+ mutex_lock(&kbdev->pm.lock); -+ mutex_lock(&kbdev->mmu_hw_mutex); ++ for (i = 0, info = katom->softjob_data; i < count; i++, info++) { ++ if (kctx->jit_alloc[info->id]) { ++ /* The JIT ID is duplicated in this atom. Roll back ++ * previous allocations and fail. ++ */ ++ u32 j; + -+ /* Lock MMU region and flush GPU cache by using GPU control, -+ * in order to keep MMU region locked. -+ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); -+ if (unlikely(!kbase_pm_l2_allow_mmu_page_migration(kbdev))) { -+ /* Defer the migration as L2 is in a transitional phase */ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ mutex_unlock(&kbdev->pm.lock); -+ dev_dbg(kbdev->dev, "%s: L2 in transtion, abort PGD page migration", __func__); -+ ret = -EAGAIN; -+ goto l2_state_defer_out; -+ } -+ /* Prevent transitional phases in L2 by starting the transaction */ -+ mmu_page_migration_transaction_begin(kbdev); -+ if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) { -+ int as_nr = mmut->kctx->as_nr; -+ struct kbase_as *as = &kbdev->as[as_nr]; ++ info = katom->softjob_data; ++ for (j = 0; j < i; j++, info++) { ++ kbase_jit_free(kctx, kctx->jit_alloc[info->id]); ++ kctx->jit_alloc[info->id] = ++ KBASE_RESERVED_REG_JIT_ALLOC; ++ } + -+ ret = kbase_mmu_hw_do_lock(kbdev, as, &op_param); -+ if (!ret) { -+ ret = kbase_gpu_cache_flush_and_busy_wait( -+ kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC); ++ katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; ++ return 0; + } -+ if (ret) -+ mmu_page_migration_transaction_end(kbdev); -+ } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); + -+ if (ret < 0) { -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ mutex_unlock(&kbdev->pm.lock); -+ dev_err(kbdev->dev, "%s: failed to lock MMU region or flush GPU cache", __func__); -+ goto undo_mappings; -+ } ++ /* Create a JIT allocation */ ++ reg = kbase_jit_allocate(kctx, info, ignore_pressure_limit); ++ if (!reg) { ++ struct kbase_jd_atom *jit_atom; ++ bool can_block = false; + -+ /* Copy memory content. -+ * -+ * It is necessary to claim the ownership of the DMA buffer for the old -+ * page before performing the copy, to make sure of reading a consistent -+ * version of its content, before copying. After the copy, ownership of -+ * the DMA buffer for the new page is given to the GPU in order to make -+ * the content visible to potential GPU access that may happen as soon as -+ * this function releases the lock on the MMU region. -+ */ -+ dma_sync_single_for_cpu(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); -+ memcpy(new_page, old_page, PAGE_SIZE); -+ dma_sync_single_for_device(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); ++ lockdep_assert_held(&kctx->jctx.lock); + -+ /* Remap GPU virtual page. -+ * -+ * This code rests on the assumption that page migration is only enabled -+ * for 4 kB pages, that necessarily live in the bottom level of the MMU -+ * page table. For this reason, the PGD level tells us inequivocably -+ * whether the page being migrated is a "content page" or another PGD -+ * of the page table: -+ * -+ * - Bottom level implies ATE (Address Translation Entry) -+ * - Any other level implies PTE (Page Table Entry) -+ * -+ * The current implementation doesn't handle the case of a level 0 PGD, -+ * that is: the root PGD of the page table. -+ */ -+ target = &pgd_page[index]; ++ list_for_each_entry(jit_atom, &kctx->jctx.jit_atoms_head, jit_node) { ++ if (jit_atom == katom) ++ break; + -+ /* Certain entries of a page table page encode the count of valid entries -+ * present in that page. So need to save & restore the count information -+ * when updating the PTE/ATE to point to the new page. -+ */ -+ num_of_valid_entries = kbdev->mmu_mode->get_num_valid_entries(pgd_page); ++ if ((jit_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == ++ BASE_JD_REQ_SOFT_JIT_FREE) { ++ u8 *free_ids = kbase_jit_free_get_ids(jit_atom); + -+ if (level == MIDGARD_MMU_BOTTOMLEVEL) { -+ WARN_ON_ONCE((*target & 1UL) == 0); -+ *target = -+ kbase_mmu_create_ate(kbdev, new_phys, page_md->data.mapped.reg->flags, -+ level, page_md->data.mapped.reg->gpu_alloc->group_id); -+ } else { -+ u64 managed_pte; ++ if (free_ids && *free_ids && ++ kctx->jit_alloc[*free_ids]) { ++ /* A JIT free which is active and ++ * submitted before this atom ++ */ ++ can_block = true; ++ break; ++ } ++ } ++ } + -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ /* The PTE should be pointing to the page being migrated */ -+ WARN_ON_ONCE(as_phys_addr_t(old_phys) != kbdev->mmu_mode->pte_to_phy_addr( -+ kbdev->mgm_dev->ops.mgm_pte_to_original_pte( -+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, pgd_page[index]))); -+#endif -+ kbdev->mmu_mode->entry_set_pte(&managed_pte, as_phys_addr_t(new_phys)); -+ *target = kbdev->mgm_dev->ops.mgm_update_gpu_pte( -+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte); -+ } ++ if (!can_block) { ++ /* Mark the failed allocation as well as the ++ * other un-attempted allocations in the set, ++ * so we know they are in use even if the ++ * allocation itself failed. ++ */ ++ for (; i < count; i++, info++) { ++ kctx->jit_alloc[info->id] = ++ KBASE_RESERVED_REG_JIT_ALLOC; ++ } + -+ kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries); ++ katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; ++ dev_warn_ratelimited(kbdev->dev, "JIT alloc softjob failed: atom id %d\n", ++ kbase_jd_atom_id(kctx, katom)); ++ return 0; ++ } + -+ /* This function always updates a single entry inside an existing PGD, -+ * therefore cache maintenance is necessary and affects a single entry. -+ */ -+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), -+ kbase_dma_addr(phys_to_page(pgd)) + (index * sizeof(u64)), sizeof(u64), -+ KBASE_MMU_OP_FLUSH_PT); ++ /* There are pending frees for an active allocation ++ * so we should wait to see whether they free the ++ * memory. Add to the list of atoms for which JIT ++ * allocation is pending. ++ */ ++ kbase_jit_add_to_pending_alloc_list(katom); ++ katom->jit_blocked = true; + -+ /* Unlock MMU region. -+ * -+ * Notice that GPUs which don't issue flush commands via GPU control -+ * still need an additional GPU cache flush here, this time only -+ * for the page table, because the function call above to sync PGDs -+ * won't have any effect on them. -+ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); -+ if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) { -+ int as_nr = mmut->kctx->as_nr; -+ struct kbase_as *as = &kbdev->as[as_nr]; ++ /* Rollback, the whole set will be re-attempted */ ++ while (i-- > 0) { ++ info--; ++ kbase_jit_free(kctx, kctx->jit_alloc[info->id]); ++ kctx->jit_alloc[info->id] = NULL; ++ } + -+ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { -+ ret = kbase_mmu_hw_do_unlock(kbdev, as, &op_param); -+ } else { -+ ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, -+ GPU_COMMAND_CACHE_CLN_INV_L2); -+ if (!ret) -+ ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, &op_param); ++ return 1; + } -+ } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); -+ /* Releasing locks before checking the migration transaction error state */ -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ mutex_unlock(&kbdev->pm.lock); -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); -+ /* Release the transition prevention in L2 by ending the transaction */ -+ mmu_page_migration_transaction_end(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); + -+ /* Checking the final migration transaction error state */ -+ if (ret < 0) { -+ dev_err(kbdev->dev, "%s: failed to unlock MMU region.", __func__); -+ goto undo_mappings; ++ /* Bind it to the user provided ID. */ ++ kctx->jit_alloc[info->id] = reg; + } + -+ /* Undertaking metadata transfer, while we are holding the mmu_lock */ -+ spin_lock(&page_md->migrate_lock); -+ if (level == MIDGARD_MMU_BOTTOMLEVEL) { -+ size_t page_array_index = -+ page_md->data.mapped.vpfn - page_md->data.mapped.reg->start_pfn; ++ for (i = 0, info = katom->softjob_data; i < count; i++, info++) { ++ u64 entry_mmu_flags = 0; ++ /* ++ * Write the address of the JIT allocation to the user provided ++ * GPU allocation. ++ */ ++ ptr = kbase_vmap_prot(kctx, info->gpu_alloc_addr, sizeof(*ptr), ++ KBASE_REG_CPU_WR, &mapping); ++ if (!ptr) { ++ /* ++ * Leave the allocations "live" as the JIT free atom ++ * will be submitted anyway. ++ */ ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ return 0; ++ } + -+ WARN_ON(PAGE_STATUS_GET(page_md->status) != ALLOCATED_MAPPED); ++ reg = kctx->jit_alloc[info->id]; ++ new_addr = reg->start_pfn << PAGE_SHIFT; ++ *ptr = new_addr; + -+ /* Replace page in array of pages of the physical allocation. */ -+ page_md->data.mapped.reg->gpu_alloc->pages[page_array_index] = new_phys; -+ } -+ /* Update the new page dma_addr with the transferred metadata from the old_page */ -+ page_md->dma_addr = new_dma_addr; -+ page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); -+ spin_unlock(&page_md->migrate_lock); -+ set_page_private(as_page(new_phys), (unsigned long)page_md); -+ /* Old page metatdata pointer cleared as it now owned by the new page */ -+ set_page_private(as_page(old_phys), 0); ++#if defined(CONFIG_MALI_VECTOR_DUMP) ++ /* ++ * Retrieve the mmu flags for JIT allocation ++ * only if dumping is enabled ++ */ ++ entry_mmu_flags = kbase_mmu_create_ate(kbdev, ++ (struct tagged_addr){ 0 }, reg->flags, ++ MIDGARD_MMU_BOTTOMLEVEL, kctx->jit_group_id); ++#endif + -+l2_state_defer_out: -+ kunmap(phys_to_page(pgd)); -+pgd_page_map_error: -+get_pgd_at_level_error: -+page_state_change_out: -+ mutex_unlock(&mmut->mmu_lock); ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT( ++ kbdev, katom, info->gpu_alloc_addr, new_addr, ++ info->flags, entry_mmu_flags, info->id, ++ info->commit_pages, info->extension, info->va_pages); ++ kbase_vunmap(kctx, &mapping); + -+ kunmap(as_page(new_phys)); -+new_page_map_error: -+ kunmap(as_page(old_phys)); -+old_page_map_error: -+ return ret; ++ kbase_trace_jit_report_gpu_mem(kctx, reg, ++ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); ++ } + -+undo_mappings: -+ /* Unlock the MMU table and undo mappings. */ -+ mutex_unlock(&mmut->mmu_lock); -+ kunmap(phys_to_page(pgd)); -+ kunmap(as_page(new_phys)); -+ kunmap(as_page(old_phys)); ++ katom->event_code = BASE_JD_EVENT_DONE; + -+ return ret; ++ return 0; +} + -+static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, -+ phys_addr_t pgd, unsigned int level) ++static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom) +{ -+ u64 *pgd_page; -+ int i; -+ struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev; -+ struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode; -+ u64 *pgd_page_buffer = NULL; -+ struct page *p = phys_to_page(pgd); ++ struct base_jit_alloc_info *info; + -+ lockdep_assert_held(&mmut->mmu_lock); ++ lockdep_assert_held(&katom->kctx->jctx.lock); + -+ pgd_page = kmap_atomic(p); -+ /* kmap_atomic should NEVER fail. */ -+ if (WARN_ON_ONCE(pgd_page == NULL)) ++ if (WARN_ON(!katom->softjob_data)) + return; -+ if (level < MIDGARD_MMU_BOTTOMLEVEL) { -+ /* Copy the page to our preallocated buffer so that we can minimize -+ * kmap_atomic usage -+ */ -+ pgd_page_buffer = mmut->scratch_mem.teardown_pages.levels[level]; -+ memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); -+ } -+ -+ /* When page migration is enabled, kbase_region_tracker_term() would ensure -+ * there are no pages left mapped on the GPU for a context. Hence the count -+ * of valid entries is expected to be zero here. -+ */ -+ if (kbase_page_migration_enabled && mmut->kctx) -+ WARN_ON_ONCE(kbdev->mmu_mode->get_num_valid_entries(pgd_page)); -+ /* Invalidate page after copying */ -+ mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES); -+ kunmap_atomic(pgd_page); -+ pgd_page = pgd_page_buffer; + -+ if (level < MIDGARD_MMU_BOTTOMLEVEL) { -+ for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { -+ if (mmu_mode->pte_is_valid(pgd_page[i], level)) { -+ phys_addr_t target_pgd = mmu_mode->pte_to_phy_addr( -+ mgm_dev->ops.mgm_pte_to_original_pte(mgm_dev, -+ MGM_DEFAULT_PTE_GROUP, -+ level, pgd_page[i])); ++ /* Remove atom from jit_atoms_head list */ ++ list_del(&katom->jit_node); + -+ mmu_teardown_level(kbdev, mmut, target_pgd, level + 1); -+ } -+ } ++ if (katom->jit_blocked) { ++ list_del(&katom->queue); ++ katom->jit_blocked = false; + } + -+ kbase_mmu_free_pgd(kbdev, mmut, pgd); ++ info = katom->softjob_data; ++ /* Free the info structure */ ++ kfree(info); +} + -+int kbase_mmu_init(struct kbase_device *const kbdev, -+ struct kbase_mmu_table *const mmut, struct kbase_context *const kctx, -+ int const group_id) ++static int kbase_jit_free_prepare(struct kbase_jd_atom *katom) +{ -+ if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) || -+ WARN_ON(group_id < 0)) -+ return -EINVAL; ++ struct kbase_context *kctx = katom->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ __user void *data = (__user void *)(uintptr_t) katom->jc; ++ u8 *ids; ++ u32 count = MAX(katom->nr_extres, 1); ++ u32 i; ++ int ret; + -+ compiletime_assert(KBASE_MEM_ALLOC_MAX_SIZE <= (((8ull << 30) >> PAGE_SHIFT)), -+ "List of free PGDs may not be large enough."); -+ compiletime_assert(MAX_PAGES_FOR_FREE_PGDS >= MIDGARD_MMU_BOTTOMLEVEL, -+ "Array of MMU levels is not large enough."); ++ /* Sanity checks */ ++ if (count > ARRAY_SIZE(kctx->jit_alloc)) { ++ ret = -EINVAL; ++ goto fail; ++ } + -+ mmut->group_id = group_id; -+ mutex_init(&mmut->mmu_lock); -+ mmut->kctx = kctx; -+ mmut->pgd = KBASE_MMU_INVALID_PGD_ADDRESS; ++ /* Copy the information for safe access and future storage */ ++ ids = kmalloc_array(count, sizeof(*ids), GFP_KERNEL); ++ if (!ids) { ++ ret = -ENOMEM; ++ goto fail; ++ } + -+ /* We allocate pages into the kbdev memory pool, then -+ * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to -+ * avoid allocations from the kernel happening with the lock held. -+ */ -+ while (mmut->pgd == KBASE_MMU_INVALID_PGD_ADDRESS) { -+ int err; ++ lockdep_assert_held(&kctx->jctx.lock); ++ katom->softjob_data = ids; + -+ err = kbase_mem_pool_grow( -+ &kbdev->mem_pools.small[mmut->group_id], -+ MIDGARD_MMU_BOTTOMLEVEL, kctx ? kctx->task : NULL); -+ if (err) { -+ kbase_mmu_term(kbdev, mmut); -+ return -ENOMEM; ++ /* For backwards compatibility */ ++ if (katom->nr_extres) { ++ /* Fail the job if there is no list of ids */ ++ if (!data) { ++ ret = -EINVAL; ++ goto free_info; + } + -+ mutex_lock(&mmut->mmu_lock); -+ mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut); -+ mutex_unlock(&mmut->mmu_lock); ++ if (copy_from_user(ids, data, sizeof(*ids)*count) != 0) { ++ ret = -EINVAL; ++ goto free_info; ++ } ++ } else { ++ katom->nr_extres = 1; ++ *ids = (u8)katom->jc; + } ++ for (i = 0; i < count; i++) ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO(kbdev, katom, ids[i]); ++ ++ list_add_tail(&katom->jit_node, &kctx->jctx.jit_atoms_head); + + return 0; ++ ++free_info: ++ kfree(katom->softjob_data); ++ katom->softjob_data = NULL; ++fail: ++ return ret; +} + -+void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) ++static void kbase_jit_free_process(struct kbase_jd_atom *katom) +{ -+ WARN((mmut->kctx) && (mmut->kctx->as_nr != KBASEP_AS_NR_INVALID), -+ "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables", -+ mmut->kctx->tgid, mmut->kctx->id); -+ -+ if (mmut->pgd != KBASE_MMU_INVALID_PGD_ADDRESS) { -+ mutex_lock(&mmut->mmu_lock); -+ mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL); -+ mutex_unlock(&mmut->mmu_lock); ++ struct kbase_context *kctx = katom->kctx; ++ u8 *ids = kbase_jit_free_get_ids(katom); ++ u32 count = katom->nr_extres; ++ u32 i; + -+ if (mmut->kctx) -+ KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0); ++ if (ids == NULL) { ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ return; + } + -+ mutex_destroy(&mmut->mmu_lock); ++ for (i = 0; i < count; i++, ids++) { ++ /* ++ * If the ID is zero or it is not in use yet then fail the job. ++ */ ++ if ((*ids == 0) || (kctx->jit_alloc[*ids] == NULL)) { ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ return; ++ } ++ } +} + -+void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i) ++static void kbasep_jit_finish_worker(struct work_struct *work) +{ -+ destroy_workqueue(kbdev->as[i].pf_wq); -+} ++ struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, ++ work); ++ struct kbase_context *kctx = katom->kctx; ++ int resched; + -+void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx, -+ phys_addr_t phys, size_t size, -+ enum kbase_mmu_op_type flush_op) -+{ -+#if MALI_USE_CSF -+ unsigned long irq_flags; ++ mutex_lock(&kctx->jctx.lock); ++ kbase_finish_soft_job(katom); ++ resched = kbase_jd_done_nolock(katom, true); ++ mutex_unlock(&kctx->jctx.lock); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); -+ if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) && -+ kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) -+ mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); -+#endif ++ if (resched) ++ kbase_js_sched_all(kctx->kbdev); +} + -+#ifdef CONFIG_MALI_VECTOR_DUMP -+static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, -+ int level, char ** const buffer, size_t *size_left) ++void kbase_jit_retry_pending_alloc(struct kbase_context *kctx) +{ -+ phys_addr_t target_pgd; -+ u64 *pgd_page; -+ int i; -+ size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64); -+ size_t dump_size; -+ struct kbase_device *kbdev; -+ struct kbase_mmu_mode const *mmu_mode; -+ -+ if (WARN_ON(kctx == NULL)) -+ return 0; -+ lockdep_assert_held(&kctx->mmu.mmu_lock); ++ LIST_HEAD(jit_pending_alloc_list); ++ struct list_head *i, *tmp; + -+ kbdev = kctx->kbdev; -+ mmu_mode = kbdev->mmu_mode; ++ list_splice_tail_init(&kctx->jctx.jit_pending_alloc, ++ &jit_pending_alloc_list); + -+ pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd))); -+ if (!pgd_page) { -+ dev_warn(kbdev->dev, "%s: kmap failure", __func__); -+ return 0; ++ list_for_each_safe(i, tmp, &jit_pending_alloc_list) { ++ struct kbase_jd_atom *pending_atom = list_entry(i, ++ struct kbase_jd_atom, queue); ++ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kctx->kbdev, pending_atom); ++ kbase_kinstr_jm_atom_sw_start(pending_atom); ++ if (kbase_jit_allocate_process(pending_atom) == 0) { ++ /* Atom has completed */ ++ INIT_WORK(&pending_atom->work, ++ kbasep_jit_finish_worker); ++ queue_work(kctx->jctx.job_done_wq, &pending_atom->work); ++ } ++ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kctx->kbdev, pending_atom); ++ kbase_kinstr_jm_atom_sw_stop(pending_atom); + } ++} + -+ if (*size_left >= size) { -+ /* A modified physical address that contains -+ * the page table level -+ */ -+ u64 m_pgd = pgd | level; -+ -+ /* Put the modified physical address in the output buffer */ -+ memcpy(*buffer, &m_pgd, sizeof(m_pgd)); -+ *buffer += sizeof(m_pgd); ++static void kbase_jit_free_finish(struct kbase_jd_atom *katom) ++{ ++ struct kbase_context *kctx = katom->kctx; ++ u8 *ids; ++ size_t j; + -+ /* Followed by the page table itself */ -+ memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES); -+ *buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES; ++ lockdep_assert_held(&kctx->jctx.lock); + -+ *size_left -= size; -+ } ++ ids = kbase_jit_free_get_ids(katom); ++ if (WARN_ON(ids == NULL)) ++ return; + -+ if (level < MIDGARD_MMU_BOTTOMLEVEL) { -+ for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { -+ if (mmu_mode->pte_is_valid(pgd_page[i], level)) { -+ target_pgd = mmu_mode->pte_to_phy_addr( -+ kbdev->mgm_dev->ops.mgm_pte_to_original_pte( -+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, -+ level, pgd_page[i])); ++ /* Remove this atom from the jit_atoms_head list */ ++ list_del(&katom->jit_node); + -+ dump_size = kbasep_mmu_dump_level(kctx, -+ target_pgd, level + 1, -+ buffer, size_left); -+ if (!dump_size) { -+ kunmap(pfn_to_page(PFN_DOWN(pgd))); -+ return 0; -+ } -+ size += dump_size; ++ for (j = 0; j != katom->nr_extres; ++j) { ++ if ((ids[j] != 0) && (kctx->jit_alloc[ids[j]] != NULL)) { ++ /* ++ * If the ID is valid but the allocation request failed ++ * still succeed this soft job but don't try and free ++ * the allocation. ++ */ ++ if (kctx->jit_alloc[ids[j]] != ++ KBASE_RESERVED_REG_JIT_ALLOC) { ++ KBASE_TLSTREAM_TL_JIT_USEDPAGES(kctx->kbdev, ++ kctx->jit_alloc[ids[j]]-> ++ gpu_alloc->nents, ids[j]); ++ kbase_jit_free(kctx, kctx->jit_alloc[ids[j]]); + } ++ kctx->jit_alloc[ids[j]] = NULL; + } + } ++ /* Free the list of ids */ ++ kfree(ids); + -+ kunmap(pfn_to_page(PFN_DOWN(pgd))); -+ -+ return size; ++ kbase_jit_retry_pending_alloc(kctx); +} + -+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) ++static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) +{ -+ void *kaddr; -+ size_t size_left; ++ __user struct base_external_resource_list *user_ext_res; ++ struct base_external_resource_list *ext_res; ++ u64 count = 0; ++ size_t copy_size; + -+ KBASE_DEBUG_ASSERT(kctx); ++ user_ext_res = (__user struct base_external_resource_list *) ++ (uintptr_t) katom->jc; + -+ if (nr_pages == 0) { -+ /* can't dump in a 0 sized buffer, early out */ -+ return NULL; -+ } ++ /* Fail the job if there is no info structure */ ++ if (!user_ext_res) ++ return -EINVAL; + -+ size_left = nr_pages * PAGE_SIZE; ++ if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) ++ return -EINVAL; + -+ if (WARN_ON(size_left == 0)) -+ return NULL; -+ kaddr = vmalloc_user(size_left); ++ /* Is the number of external resources in range? */ ++ if (!count || count > BASE_EXT_RES_COUNT_MAX) ++ return -EINVAL; + -+ mutex_lock(&kctx->mmu.mmu_lock); ++ /* Copy the information for safe access and future storage */ ++ copy_size = sizeof(*ext_res); ++ copy_size += sizeof(struct base_external_resource) * (count - 1); ++ ext_res = memdup_user(user_ext_res, copy_size); ++ if (IS_ERR(ext_res)) ++ return PTR_ERR(ext_res); + -+ if (kaddr) { -+ u64 end_marker = 0xFFULL; -+ char *buffer; -+ char *mmu_dump_buffer; -+ u64 config[3]; -+ size_t dump_size, size = 0; -+ struct kbase_mmu_setup as_setup; ++ /* ++ * Overwrite the count with the first value incase it was changed ++ * after the fact. ++ */ ++ ext_res->count = count; + -+ buffer = (char *)kaddr; -+ mmu_dump_buffer = buffer; ++ katom->softjob_data = ext_res; + -+ kctx->kbdev->mmu_mode->get_as_setup(&kctx->mmu, -+ &as_setup); -+ config[0] = as_setup.transtab; -+ config[1] = as_setup.memattr; -+ config[2] = as_setup.transcfg; -+ memcpy(buffer, &config, sizeof(config)); -+ mmu_dump_buffer += sizeof(config); -+ size_left -= sizeof(config); -+ size += sizeof(config); ++ return 0; ++} + -+ dump_size = kbasep_mmu_dump_level(kctx, -+ kctx->mmu.pgd, -+ MIDGARD_MMU_TOPLEVEL, -+ &mmu_dump_buffer, -+ &size_left); ++static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) ++{ ++ struct base_external_resource_list *ext_res; ++ int i; ++ bool failed = false; + -+ if (!dump_size) -+ goto fail_free; ++ ext_res = katom->softjob_data; ++ if (!ext_res) ++ goto failed_jc; + -+ size += dump_size; ++ kbase_gpu_vm_lock(katom->kctx); + -+ /* Add on the size for the end marker */ -+ size += sizeof(u64); ++ for (i = 0; i < ext_res->count; i++) { ++ u64 gpu_addr; + -+ if (size > (nr_pages * PAGE_SIZE)) { -+ /* The buffer isn't big enough - free the memory and -+ * return failure -+ */ -+ goto fail_free; ++ gpu_addr = ext_res->ext_res[i].ext_resource & ++ ~BASE_EXT_RES_ACCESS_EXCLUSIVE; ++ if (map) { ++ if (!kbase_sticky_resource_acquire(katom->kctx, ++ gpu_addr)) ++ goto failed_loop; ++ } else { ++ if (!kbase_sticky_resource_release_force(katom->kctx, NULL, ++ gpu_addr)) ++ failed = true; + } ++ } + -+ /* Add the end marker */ -+ memcpy(mmu_dump_buffer, &end_marker, sizeof(u64)); ++ /* ++ * In the case of unmap we continue unmapping other resources in the ++ * case of failure but will always report failure if _any_ unmap ++ * request fails. ++ */ ++ if (failed) ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ else ++ katom->event_code = BASE_JD_EVENT_DONE; ++ ++ kbase_gpu_vm_unlock(katom->kctx); ++ ++ return; ++ ++failed_loop: ++ while (i > 0) { ++ u64 const gpu_addr = ext_res->ext_res[i - 1].ext_resource & ++ ~BASE_EXT_RES_ACCESS_EXCLUSIVE; ++ ++ kbase_sticky_resource_release_force(katom->kctx, NULL, gpu_addr); ++ ++ --i; + } + -+ mutex_unlock(&kctx->mmu.mmu_lock); -+ return kaddr; ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ kbase_gpu_vm_unlock(katom->kctx); + -+fail_free: -+ vfree(kaddr); -+ mutex_unlock(&kctx->mmu.mmu_lock); -+ return NULL; ++failed_jc: ++ return; +} -+KBASE_EXPORT_TEST_API(kbase_mmu_dump); -+#endif /* CONFIG_MALI_VECTOR_DUMP */ + -+void kbase_mmu_bus_fault_worker(struct work_struct *data) ++static void kbase_ext_res_finish(struct kbase_jd_atom *katom) +{ -+ struct kbase_as *faulting_as; -+ int as_no; -+ struct kbase_context *kctx; -+ struct kbase_device *kbdev; -+ struct kbase_fault *fault; ++ struct base_external_resource_list *ext_res; + -+ faulting_as = container_of(data, struct kbase_as, work_busfault); -+ fault = &faulting_as->bf_data; ++ ext_res = katom->softjob_data; ++ /* Free the info structure */ ++ kfree(ext_res); ++} + -+ /* Ensure that any pending page fault worker has completed */ -+ flush_work(&faulting_as->work_pagefault); ++int kbase_process_soft_job(struct kbase_jd_atom *katom) ++{ ++ int ret = 0; ++ struct kbase_context *kctx = katom->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; + -+ as_no = faulting_as->number; ++ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kbdev, katom); ++ kbase_kinstr_jm_atom_sw_start(katom); + -+ kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); ++ trace_sysgraph(SGR_SUBMIT, kctx->id, ++ kbase_jd_atom_id(kctx, katom)); + -+ /* Grab the context, already refcounted in kbase_mmu_interrupt() on -+ * flagging of the bus-fault. Therefore, it cannot be scheduled out of -+ * this AS until we explicitly release it -+ */ -+ kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_no); -+ if (!kctx) { -+ atomic_dec(&kbdev->faults_pending); -+ return; -+ } ++ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { ++ case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: ++ ret = kbase_dump_cpu_gpu_time(katom); ++ break; + -+#ifdef CONFIG_MALI_ARBITER_SUPPORT -+ /* check if we still have GPU */ -+ if (unlikely(kbase_is_gpu_removed(kbdev))) { -+ dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__); -+ release_ctx(kbdev, kctx); -+ atomic_dec(&kbdev->faults_pending); -+ return; ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ case BASE_JD_REQ_SOFT_FENCE_TRIGGER: ++ katom->event_code = kbase_sync_fence_out_trigger(katom, ++ katom->event_code == BASE_JD_EVENT_DONE ? ++ 0 : -EFAULT); ++ break; ++ case BASE_JD_REQ_SOFT_FENCE_WAIT: ++ { ++ ret = kbase_sync_fence_in_wait(katom); ++ ++ if (ret == 1) { ++#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG ++ kbasep_add_waiting_with_timeout(katom); ++#else ++ kbasep_add_waiting_soft_job(katom); ++#endif ++ } ++ break; + } +#endif ++ case BASE_JD_REQ_SOFT_EVENT_WAIT: ++ ret = kbasep_soft_event_wait(katom); ++ break; ++ case BASE_JD_REQ_SOFT_EVENT_SET: ++ kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET); ++ break; ++ case BASE_JD_REQ_SOFT_EVENT_RESET: ++ kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET); ++ break; ++#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST ++ case BASE_JD_REQ_SOFT_DEBUG_COPY: ++ { ++ int res = kbase_debug_copy(katom); + -+ if (unlikely(fault->protected_mode)) { -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Permission failure", fault); -+ kbase_mmu_hw_clear_fault(kbdev, faulting_as, -+ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); -+ release_ctx(kbdev, kctx); -+ atomic_dec(&kbdev->faults_pending); -+ return; ++ if (res) ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ break; ++ } ++#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ ++ case BASE_JD_REQ_SOFT_JIT_ALLOC: ++ ret = kbase_jit_allocate_process(katom); ++ break; ++ case BASE_JD_REQ_SOFT_JIT_FREE: ++ kbase_jit_free_process(katom); ++ break; ++ case BASE_JD_REQ_SOFT_EXT_RES_MAP: ++ kbase_ext_res_process(katom, true); ++ break; ++ case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: ++ kbase_ext_res_process(katom, false); ++ break; ++ } ++ ++ /* Atom is complete */ ++ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kbdev, katom); ++ kbase_kinstr_jm_atom_sw_stop(katom); ++ return ret; ++} + ++void kbase_cancel_soft_job(struct kbase_jd_atom *katom) ++{ ++ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ case BASE_JD_REQ_SOFT_FENCE_WAIT: ++ kbase_sync_fence_in_cancel_wait(katom); ++ break; ++#endif ++ case BASE_JD_REQ_SOFT_EVENT_WAIT: ++ kbasep_soft_event_cancel_job(katom); ++ break; ++ default: ++ /* This soft-job doesn't support cancellation! */ ++ KBASE_DEBUG_ASSERT(0); + } ++} + -+#if MALI_USE_CSF -+ /* Before the GPU power off, wait is done for the completion of -+ * in-flight MMU fault work items. So GPU is expected to remain -+ * powered up whilst the bus fault handling is being done. -+ */ -+ kbase_gpu_report_bus_fault_and_kill(kctx, faulting_as, fault); -+#else -+ /* NOTE: If GPU already powered off for suspend, -+ * we don't need to switch to unmapped -+ */ -+ if (!kbase_pm_context_active_handle_suspend(kbdev, -+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { -+ kbase_gpu_report_bus_fault_and_kill(kctx, faulting_as, fault); -+ kbase_pm_context_idle(kbdev); ++int kbase_prepare_soft_job(struct kbase_jd_atom *katom) ++{ ++ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { ++ case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: ++ { ++ if (!IS_ALIGNED(katom->jc, cache_line_size())) ++ return -EINVAL; ++ } ++ break; ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ case BASE_JD_REQ_SOFT_FENCE_TRIGGER: ++ { ++ struct base_fence fence; ++ int fd; ++ ++ if (copy_from_user(&fence, ++ (__user void *)(uintptr_t)katom->jc, ++ sizeof(fence)) != 0) ++ return -EINVAL; ++ ++ fd = kbase_sync_fence_out_create(katom, ++ fence.basep.stream_fd); ++ if (fd < 0) ++ return -EINVAL; ++ ++ fence.basep.fd = fd; ++ if (copy_to_user((__user void *)(uintptr_t)katom->jc, ++ &fence, sizeof(fence)) != 0) { ++ kbase_sync_fence_out_remove(katom); ++ /* fd should have been closed here, but there's ++ * no good way of doing that. Since ++ * copy_to_user() very rarely fails, and the fd ++ * will get closed on process termination this ++ * won't be a problem. ++ */ ++ fence.basep.fd = -EINVAL; ++ return -EINVAL; ++ } ++ } ++ break; ++ case BASE_JD_REQ_SOFT_FENCE_WAIT: ++ { ++ struct base_fence fence; ++ int ret; ++ ++ if (copy_from_user(&fence, ++ (__user void *)(uintptr_t)katom->jc, ++ sizeof(fence)) != 0) ++ return -EINVAL; ++ ++ /* Get a reference to the fence object */ ++ ret = kbase_sync_fence_in_from_fd(katom, ++ fence.basep.fd); ++ if (ret < 0) ++ return ret; ++ } ++ break; ++#endif /* CONFIG_SYNC_FILE */ ++ case BASE_JD_REQ_SOFT_JIT_ALLOC: ++ return kbase_jit_allocate_prepare(katom); ++ case BASE_JD_REQ_SOFT_JIT_FREE: ++ return kbase_jit_free_prepare(katom); ++ case BASE_JD_REQ_SOFT_EVENT_WAIT: ++ case BASE_JD_REQ_SOFT_EVENT_SET: ++ case BASE_JD_REQ_SOFT_EVENT_RESET: ++ if (katom->jc == 0) ++ return -EINVAL; ++ break; ++#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST ++ case BASE_JD_REQ_SOFT_DEBUG_COPY: ++ return kbase_debug_copy_prepare(katom); ++#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ ++ case BASE_JD_REQ_SOFT_EXT_RES_MAP: ++ return kbase_ext_res_prepare(katom); ++ case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: ++ return kbase_ext_res_prepare(katom); ++ default: ++ /* Unsupported soft-job */ ++ return -EINVAL; + } -+#endif ++ return 0; ++} + -+ release_ctx(kbdev, kctx); ++void kbase_finish_soft_job(struct kbase_jd_atom *katom) ++{ ++ trace_sysgraph(SGR_COMPLETE, katom->kctx->id, ++ kbase_jd_atom_id(katom->kctx, katom)); + -+ atomic_dec(&kbdev->faults_pending); ++ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { ++ case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: ++ /* Nothing to do */ ++ break; ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ case BASE_JD_REQ_SOFT_FENCE_TRIGGER: ++ /* If fence has not yet been signaled, do it now */ ++ kbase_sync_fence_out_trigger(katom, katom->event_code == ++ BASE_JD_EVENT_DONE ? 0 : -EFAULT); ++ break; ++ case BASE_JD_REQ_SOFT_FENCE_WAIT: ++ /* Release katom's reference to fence object */ ++ kbase_sync_fence_in_remove(katom); ++ break; ++#endif /* CONFIG_SYNC_FILE */ ++#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST ++ case BASE_JD_REQ_SOFT_DEBUG_COPY: ++ kbase_debug_copy_finish(katom); ++ break; ++#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */ ++ case BASE_JD_REQ_SOFT_JIT_ALLOC: ++ kbase_jit_allocate_finish(katom); ++ break; ++ case BASE_JD_REQ_SOFT_EXT_RES_MAP: ++ kbase_ext_res_finish(katom); ++ break; ++ case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: ++ kbase_ext_res_finish(katom); ++ break; ++ case BASE_JD_REQ_SOFT_JIT_FREE: ++ kbase_jit_free_finish(katom); ++ break; ++ } +} + -+void kbase_flush_mmu_wqs(struct kbase_device *kbdev) ++void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) +{ -+ int i; ++ LIST_HEAD(local_suspended_soft_jobs); ++ struct kbase_jd_atom *tmp_iter; ++ struct kbase_jd_atom *katom_iter; ++ struct kbasep_js_device_data *js_devdata; ++ bool resched = false; + -+ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { -+ struct kbase_as *as = &kbdev->as[i]; ++ KBASE_DEBUG_ASSERT(kbdev); + -+ flush_workqueue(as->pf_wq); ++ js_devdata = &kbdev->js_data; ++ ++ /* Move out the entire list */ ++ mutex_lock(&js_devdata->runpool_mutex); ++ list_splice_init(&js_devdata->suspended_soft_jobs_list, ++ &local_suspended_soft_jobs); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ ++ /* ++ * Each atom must be detached from the list and ran separately - ++ * it could be re-added to the old list, but this is unlikely ++ */ ++ list_for_each_entry_safe(katom_iter, tmp_iter, ++ &local_suspended_soft_jobs, dep_item[1]) { ++ struct kbase_context *kctx = katom_iter->kctx; ++ ++ mutex_lock(&kctx->jctx.lock); ++ ++ /* Remove from the global list */ ++ list_del(&katom_iter->dep_item[1]); ++ /* Remove from the context's list of waiting soft jobs */ ++ kbasep_remove_waiting_soft_job(katom_iter); ++ ++ if (kbase_process_soft_job(katom_iter) == 0) { ++ kbase_finish_soft_job(katom_iter); ++ resched |= kbase_jd_done_nolock(katom_iter, true); ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ atomic_dec(&kbdev->pm.gpu_users_waiting); ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ } ++ mutex_unlock(&kctx->jctx.lock); + } ++ ++ if (resched) ++ kbase_js_sched_all(kbdev); +} -diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h ++#endif /* !MALI_USE_CSF */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_strings.c b/drivers/gpu/arm/bifrost/mali_kbase_strings.c new file mode 100644 -index 000000000..699b1f340 +index 000000000..84784be6f --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h -@@ -0,0 +1,341 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_strings.c +@@ -0,0 +1,28 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -257360,336 +258395,274 @@ index 000000000..699b1f340 + * + */ + -+#ifndef _KBASE_MMU_H_ -+#define _KBASE_MMU_H_ ++#include "mali_kbase_strings.h" + -+#include ++#define KBASE_DRV_NAME "mali" ++#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline" + -+#define KBASE_MMU_PAGE_ENTRIES 512 -+#define KBASE_MMU_INVALID_PGD_ADDRESS (~(phys_addr_t)0) ++const char kbase_drv_name[] = KBASE_DRV_NAME; ++const char kbase_timeline_name[] = KBASE_TIMELINE_NAME; +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_strings.h b/drivers/gpu/arm/bifrost/mali_kbase_strings.h +new file mode 100644 +index 000000000..c3f94f926 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_strings.h +@@ -0,0 +1,23 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+struct kbase_context; -+struct kbase_mmu_table; -+struct kbase_va_region; ++extern const char kbase_drv_name[]; ++extern const char kbase_timeline_name[]; +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_sync.h b/drivers/gpu/arm/bifrost/mali_kbase_sync.h +new file mode 100644 +index 000000000..3d2053bee +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_sync.h +@@ -0,0 +1,216 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2012-2016, 2018-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + +/** -+ * enum kbase_caller_mmu_sync_info - MMU-synchronous caller info. -+ * A pointer to this type is passed down from the outer-most callers in the kbase -+ * module - where the information resides as to the synchronous / asynchronous -+ * nature of the call flow, with respect to MMU operations. ie - does the call flow relate to -+ * existing GPU work does it come from requests (like ioctl) from user-space, power management, -+ * etc. -+ * -+ * @CALLER_MMU_UNSET_SYNCHRONICITY: default value must be invalid to avoid accidental choice -+ * of a 'valid' value -+ * @CALLER_MMU_SYNC: Arbitrary value for 'synchronous that isn't easy to choose by accident -+ * @CALLER_MMU_ASYNC: Also hard to choose by accident ++ * DOC: This file contains our internal "API" for explicit fences. ++ * It hides the implementation details of the actual explicit fence mechanism ++ * used (Android fences or sync file with DMA fences). + */ -+enum kbase_caller_mmu_sync_info { -+ CALLER_MMU_UNSET_SYNCHRONICITY, -+ CALLER_MMU_SYNC = 0x02, -+ CALLER_MMU_ASYNC -+}; ++ ++#ifndef MALI_KBASE_SYNC_H ++#define MALI_KBASE_SYNC_H ++ ++#include ++#include ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++#include "mali_kbase_fence_defs.h" ++#include ++#endif ++ ++#include "mali_kbase.h" + +/** -+ * enum kbase_mmu_op_type - enum for MMU operations -+ * @KBASE_MMU_OP_NONE: To help catch uninitialized struct -+ * @KBASE_MMU_OP_FIRST: The lower boundary of enum -+ * @KBASE_MMU_OP_LOCK: Lock memory region -+ * @KBASE_MMU_OP_UNLOCK: Unlock memory region -+ * @KBASE_MMU_OP_FLUSH_PT: Flush page table (CLN+INV L2 only) -+ * @KBASE_MMU_OP_FLUSH_MEM: Flush memory (CLN+INV L2+LSC) -+ * @KBASE_MMU_OP_COUNT: The upper boundary of enum ++ * struct kbase_sync_fence_info - Information about a fence ++ * @fence: Pointer to fence (type is void*, as underlaying struct can differ) ++ * @name: The name given to this fence when it was created ++ * @status: < 0 means error, 0 means active, 1 means signaled ++ * ++ * Use kbase_sync_fence_in_info_get() or kbase_sync_fence_out_info_get() ++ * to get the information. + */ -+enum kbase_mmu_op_type { -+ KBASE_MMU_OP_NONE = 0, /* Must be zero */ -+ KBASE_MMU_OP_FIRST, /* Must be the first non-zero op */ -+ KBASE_MMU_OP_LOCK = KBASE_MMU_OP_FIRST, -+ KBASE_MMU_OP_UNLOCK, -+ KBASE_MMU_OP_FLUSH_PT, -+ KBASE_MMU_OP_FLUSH_MEM, -+ KBASE_MMU_OP_COUNT /* Must be the last in enum */ ++struct kbase_sync_fence_info { ++ void *fence; ++ char name[32]; ++ int status; +}; + +/** -+ * kbase_mmu_as_init() - Initialising GPU address space object. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer). -+ * @i: Array index of address space object. ++ * kbase_sync_fence_stream_create() - Create a stream object ++ * @name: Name of stream (only used to ease debugging/visualization) ++ * @out_fd: A file descriptor representing the created stream object + * -+ * This is called from device probe to initialise an address space object -+ * of the device. ++ * Can map down to a timeline implementation in some implementations. ++ * Exposed as a file descriptor. ++ * Life-time controlled via the file descriptor: ++ * - dup to add a ref ++ * - close to remove a ref + * -+ * Return: 0 on success and non-zero value on failure. ++ * Return: 0 on success, < 0 on error + */ -+int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i); ++int kbase_sync_fence_stream_create(const char *name, int *const out_fd); + ++#if !MALI_USE_CSF +/** -+ * kbase_mmu_as_term() - Terminate address space object. ++ * kbase_sync_fence_out_create - Create an explicit output fence to specified atom + * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer). -+ * @i: Array index of address space object. ++ * @katom: Atom to assign the new explicit fence to ++ * @stream_fd: File descriptor for stream object to create fence on + * -+ * This is called upon device termination to destroy -+ * the address space object of the device. ++ * Return: Valid file descriptor to fence or < 0 on error + */ -+void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i); ++int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd); + +/** -+ * kbase_mmu_init - Initialise an object representing GPU page tables -+ * -+ * @kbdev: Instance of GPU platform device, allocated from the probe method. -+ * @mmut: GPU page tables to be initialized. -+ * @kctx: Optional kbase context, may be NULL if this set of MMU tables -+ * is not associated with a context. -+ * @group_id: The physical group ID from which to allocate GPU page tables. -+ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * kbase_sync_fence_in_from_fd() - Assigns an existing fence to specified atom ++ * @katom: Atom to assign the existing explicit fence to ++ * @fd: File descriptor to an existing fence + * -+ * The structure should be terminated using kbase_mmu_term() ++ * Assigns an explicit input fence to atom. ++ * This can later be waited for by calling @kbase_sync_fence_in_wait + * -+ * Return: 0 if successful, otherwise a negative error code. ++ * Return: 0 on success, < 0 on error + */ -+int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, -+ struct kbase_context *kctx, int group_id); ++int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd); ++#endif /* !MALI_USE_CSF */ + +/** -+ * kbase_mmu_interrupt - Process an MMU interrupt. ++ * kbase_sync_fence_validate() - Validate a fd to be a valid fence + * -+ * @kbdev: Pointer to the kbase device for which the interrupt happened. -+ * @irq_stat: Value of the MMU_IRQ_STATUS register. ++ * @fd: File descriptor to check + * -+ * Process the MMU interrupt that was reported by the &kbase_device. ++ * This function is only usable to catch unintentional user errors early, ++ * it does not stop malicious code changing the fd after this function returns. ++ * ++ * Return: 0 if fd is for a valid fence, < 0 if invalid + */ -+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); ++int kbase_sync_fence_validate(int fd); + ++#if !MALI_USE_CSF +/** -+ * kbase_mmu_term - Terminate an object representing GPU page tables ++ * kbase_sync_fence_out_trigger - Signal explicit output fence attached on katom ++ * @katom: Atom with an explicit fence to signal ++ * @result: < 0 means signal with error, 0 >= indicates success + * -+ * @kbdev: Instance of GPU platform device, allocated from the probe method. -+ * @mmut: GPU page tables to be destroyed. ++ * Signal output fence attached on katom and remove the fence from the atom. + * -+ * This will free any page tables that have been allocated ++ * Return: The "next" event code for atom, typically JOB_CANCELLED or EVENT_DONE + */ -+void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut); ++enum base_jd_event_code ++kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result); + +/** -+ * kbase_mmu_create_ate - Create an address translation entry ++ * kbase_sync_fence_in_wait() - Wait for explicit input fence to be signaled ++ * @katom: Atom with explicit fence to wait for + * -+ * @kbdev: Instance of GPU platform device, allocated from the probe method. -+ * @phy: Physical address of the page to be mapped for GPU access. -+ * @flags: Bitmask of attributes of the GPU memory region being mapped. -+ * @level: Page table level for which to build an address translation entry. -+ * @group_id: The physical memory group in which the page was allocated. -+ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * If the fence is already signaled, then 0 is returned, and the caller must ++ * continue processing of the katom. + * -+ * This function creates an address translation entry to encode the physical -+ * address of a page to be mapped for access by the GPU, along with any extra -+ * attributes required for the GPU memory region. ++ * If the fence isn't already signaled, then this kbase_sync framework will ++ * take responsibility to continue the processing once the fence is signaled. + * -+ * Return: An address translation entry, either in LPAE or AArch64 format -+ * (depending on the driver's configuration). ++ * Return: 0 if already signaled, otherwise 1 + */ -+u64 kbase_mmu_create_ate(struct kbase_device *kbdev, -+ struct tagged_addr phy, unsigned long flags, int level, int group_id); -+ -+int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, -+ u64 vpfn, struct tagged_addr *phys, size_t nr, -+ unsigned long flags, int group_id, u64 *dirty_pgds, -+ struct kbase_va_region *reg, bool ignore_page_migration); -+int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, -+ struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr, -+ int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, -+ struct kbase_va_region *reg, bool ignore_page_migration); -+int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, -+ u64 vpfn, struct tagged_addr *phys, size_t nr, -+ unsigned long flags, int as_nr, int group_id, -+ enum kbase_caller_mmu_sync_info mmu_sync_info, -+ struct kbase_va_region *reg); -+int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, -+ u64 vpfn, struct tagged_addr *phys, size_t nr, -+ unsigned long flags, int as_nr, int group_id, -+ enum kbase_caller_mmu_sync_info mmu_sync_info, -+ struct kbase_va_region *reg); -+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, struct tagged_addr phys, -+ size_t nr, unsigned long flags, int group_id, -+ enum kbase_caller_mmu_sync_info mmu_sync_info, -+ bool ignore_page_migration); -+int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn, -+ struct tagged_addr phys, size_t nr, unsigned long flags, -+ int group_id, -+ enum kbase_caller_mmu_sync_info mmu_sync_info); -+int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn, -+ struct tagged_addr phys, size_t nr, unsigned long flags, -+ int group_id, -+ enum kbase_caller_mmu_sync_info mmu_sync_info); ++int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom); + +/** -+ * kbase_mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table -+ * -+ * @kbdev: Pointer to kbase device. -+ * @mmut: Pointer to GPU MMU page table. -+ * @vpfn: Start page frame number of the GPU virtual pages to unmap. -+ * @phys: Array of physical pages currently mapped to the virtual -+ * pages to unmap, or NULL. This is used for GPU cache maintenance -+ * and page migration support. -+ * @nr_phys_pages: Number of physical pages to flush. -+ * @nr_virt_pages: Number of virtual pages whose PTEs should be destroyed. -+ * @as_nr: Address space number, for GPU cache maintenance operations -+ * that happen outside a specific kbase context. -+ * @ignore_page_migration: Whether page migration metadata should be ignored. -+ * -+ * We actually discard the ATE and free the page table pages if no valid entries -+ * exist in PGD. -+ * -+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is -+ * currently scheduled into the runpool, and so potentially uses a lot of locks. -+ * These locks must be taken in the correct order with respect to others -+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more -+ * information. -+ * -+ * The @p phys pointer to physical pages is not necessary for unmapping virtual memory, -+ * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL, -+ * GPU cache maintenance will be done as usual, that is invalidating the whole GPU caches -+ * instead of specific physical address ranges. ++ * kbase_sync_fence_in_cancel_wait() - Cancel explicit input fence waits ++ * @katom: Atom to cancel wait for + * -+ * Return: 0 on success, otherwise an error code. ++ * This function is fully responsible for continuing processing of this atom ++ * (remove_waiting_soft_job + finish_soft_job + jd_done + js_sched_all) + */ -+int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, -+ struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages, -+ int as_nr, bool ignore_page_migration); ++void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom); + -+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, -+ struct tagged_addr *phys, size_t nr, -+ unsigned long flags, int const group_id); -+#if MALI_USE_CSF +/** -+ * kbase_mmu_update_csf_mcu_pages - Update MCU mappings with changes of phys and flags -+ * -+ * @kbdev: Pointer to kbase device. -+ * @vpfn: Virtual PFN (Page Frame Number) of the first page to update -+ * @phys: Pointer to the array of tagged physical addresses of the physical -+ * pages that are pointed to by the page table entries (that need to -+ * be updated). -+ * @nr: Number of pages to update -+ * @flags: Flags -+ * @group_id: The physical memory group in which the page was allocated. -+ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * kbase_sync_fence_in_remove() - Remove the input fence from the katom ++ * @katom: Atom to remove explicit input fence for + * -+ * Return: 0 on success, otherwise an error code. ++ * This will also release the corresponding reference. + */ -+int kbase_mmu_update_csf_mcu_pages(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys, -+ size_t nr, unsigned long flags, int const group_id); -+#endif ++void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom); + +/** -+ * kbase_mmu_migrate_page - Migrate GPU mappings and content between memory pages -+ * -+ * @old_phys: Old physical page to be replaced. -+ * @new_phys: New physical page used to replace old physical page. -+ * @old_dma_addr: DMA address of the old page. -+ * @new_dma_addr: DMA address of the new page. -+ * @level: MMU page table level of the provided PGD. -+ * -+ * The page migration process is made of 2 big steps: -+ * -+ * 1) Copy the content of the old page to the new page. -+ * 2) Remap the virtual page, that is: replace either the ATE (if the old page -+ * was a regular page) or the PTE (if the old page was used as a PGD) in the -+ * MMU page table with the new page. -+ * -+ * During the process, the MMU region is locked to prevent GPU access to the -+ * virtual memory page that is being remapped. -+ * -+ * Before copying the content of the old page to the new page and while the -+ * MMU region is locked, a GPU cache flush is performed to make sure that -+ * pending GPU writes are finalized to the old page before copying. -+ * That is necessary because otherwise there's a risk that GPU writes might -+ * be finalized to the old page, and not new page, after migration. -+ * The MMU region is unlocked only at the end of the migration operation. ++ * kbase_sync_fence_out_remove() - Remove the output fence from the katom ++ * @katom: Atom to remove explicit output fence for + * -+ * Return: 0 on success, otherwise an error code. ++ * This will also release the corresponding reference. + */ -+int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys, -+ dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level); ++void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom); ++#endif /* !MALI_USE_CSF */ + ++#if !MALI_USE_CSF +/** -+ * kbase_mmu_flush_pa_range() - Flush physical address range from the GPU caches -+ * -+ * @kbdev: Instance of GPU platform device, allocated from the probe method. -+ * @kctx: Pointer to kbase context, it can be NULL if the physical address -+ * range is not associated with User created context. -+ * @phys: Starting address of the physical range to start the operation on. -+ * @size: Number of bytes to work on. -+ * @flush_op: Type of cache flush operation to perform. ++ * kbase_sync_fence_in_info_get() - Retrieves information about input fence ++ * @katom: Atom to get fence information from ++ * @info: Struct to be filled with fence information + * -+ * Issue a cache flush physical range command. This function won't perform any -+ * flush if the GPU doesn't support FLUSH_PA_RANGE command. The flush would be -+ * performed only if the context has a JASID assigned to it. -+ * This function is basically a wrapper for kbase_gpu_cache_flush_pa_range_and_busy_wait(). ++ * Return: 0 on success, < 0 on error + */ -+void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx, -+ phys_addr_t phys, size_t size, -+ enum kbase_mmu_op_type flush_op); ++int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, ++ struct kbase_sync_fence_info *info); + +/** -+ * kbase_mmu_bus_fault_interrupt - Process a bus fault interrupt. -+ * -+ * @kbdev: Pointer to the kbase device for which bus fault was reported. -+ * @status: Value of the GPU_FAULTSTATUS register. -+ * @as_nr: GPU address space for which the bus fault occurred. -+ * -+ * Process the bus fault interrupt that was reported for a particular GPU -+ * address space. ++ * kbase_sync_fence_out_info_get() - Retrieves information about output fence ++ * @katom: Atom to get fence information from ++ * @info: Struct to be filled with fence information + * -+ * Return: zero if the operation was successful, non-zero otherwise. ++ * Return: 0 on success, < 0 on error + */ -+int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, u32 status, -+ u32 as_nr); ++int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, ++ struct kbase_sync_fence_info *info); ++#endif /* !MALI_USE_CSF */ ++ ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++void kbase_sync_fence_info_get(struct fence *fence, ++ struct kbase_sync_fence_info *info); ++#else ++void kbase_sync_fence_info_get(struct dma_fence *fence, ++ struct kbase_sync_fence_info *info); ++#endif ++#endif + +/** -+ * kbase_mmu_gpu_fault_interrupt() - Report a GPU fault. -+ * -+ * @kbdev: Kbase device pointer -+ * @status: GPU fault status -+ * @as_nr: Faulty address space -+ * @address: GPU fault address -+ * @as_valid: true if address space is valid ++ * kbase_sync_status_string() - Get string matching @status ++ * @status: Value of fence status. + * -+ * This function builds GPU fault information to submit a work -+ * for reporting the details of the fault. ++ * Return: Pointer to string describing @status. + */ -+void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status, -+ u32 as_nr, u64 address, bool as_valid); ++const char *kbase_sync_status_string(int status); ++ ++ ++#if !MALI_USE_CSF ++/* ++ * Internal worker used to continue processing of atom. ++ */ ++void kbase_sync_fence_wait_worker(struct work_struct *data); + ++#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG +/** -+ * kbase_context_mmu_group_id_get - Decode a memory group ID from -+ * base_context_create_flags -+ * -+ * @flags: Bitmask of flags to pass to base_context_init. -+ * -+ * Memory allocated for GPU page tables will come from the returned group. ++ * kbase_sync_fence_in_dump() - Trigger a debug dump of atoms input fence state + * -+ * Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1). ++ * @katom: Atom to trigger fence debug dump for + */ -+static inline int -+kbase_context_mmu_group_id_get(base_context_create_flags const flags) -+{ -+ KBASE_DEBUG_ASSERT(flags == -+ (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS)); -+ return (int)BASE_CONTEXT_MMU_GROUP_ID_GET(flags); -+} ++void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom); ++#endif ++#endif /* !MALI_USE_CSF */ + -+#endif /* _KBASE_MMU_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h ++#endif /* MALI_KBASE_SYNC_H */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_sync_common.c b/drivers/gpu/arm/bifrost/mali_kbase_sync_common.c new file mode 100644 -index 000000000..50d2ea5d0 +index 000000000..5ee7fc3ce --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h -@@ -0,0 +1,214 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_sync_common.c +@@ -0,0 +1,50 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -257707,914 +258680,460 @@ index 000000000..50d2ea5d0 + * + */ + -+/** -+ * DOC: Interface file for accessing MMU hardware functionality ++/* ++ * @file + * -+ * This module provides an abstraction for accessing the functionality provided -+ * by the midgard MMU and thus allows all MMU HW access to be contained within -+ * one common place and allows for different backends (implementations) to -+ * be provided. ++ * Common code for our explicit fence functionality + */ + -+#ifndef _KBASE_MMU_HW_H_ -+#define _KBASE_MMU_HW_H_ -+ -+#include "mali_kbase_mmu.h" -+ -+/* Forward declarations */ -+struct kbase_device; -+struct kbase_as; -+struct kbase_context; -+ -+/** -+ * enum kbase_mmu_fault_type - MMU fault type descriptor. -+ * @KBASE_MMU_FAULT_TYPE_UNKNOWN: unknown fault -+ * @KBASE_MMU_FAULT_TYPE_PAGE: page fault -+ * @KBASE_MMU_FAULT_TYPE_BUS: nus fault -+ * @KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED: page_unexpected fault -+ * @KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED: bus_unexpected fault -+ */ -+enum kbase_mmu_fault_type { -+ KBASE_MMU_FAULT_TYPE_UNKNOWN = 0, -+ KBASE_MMU_FAULT_TYPE_PAGE, -+ KBASE_MMU_FAULT_TYPE_BUS, -+ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED, -+ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED -+}; ++#include ++#include "mali_kbase.h" ++#include "mali_kbase_sync.h" + -+/** -+ * struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_* functions -+ * @vpfn: MMU Virtual Page Frame Number to start the operation on. -+ * @nr: Number of pages to work on. -+ * @op: Operation type (written to ASn_COMMAND). -+ * @kctx_id: Kernel context ID for MMU command tracepoint. -+ * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. -+ * @flush_skip_levels: Page table levels to skip flushing. (Only -+ * applicable if GPU supports feature) -+ */ -+struct kbase_mmu_hw_op_param { -+ u64 vpfn; -+ u32 nr; -+ enum kbase_mmu_op_type op; -+ u32 kctx_id; -+ enum kbase_caller_mmu_sync_info mmu_sync_info; -+ u64 flush_skip_levels; -+}; ++#if !MALI_USE_CSF ++void kbase_sync_fence_wait_worker(struct work_struct *data) ++{ ++ struct kbase_jd_atom *katom; + -+/** -+ * kbase_mmu_hw_configure - Configure an address space for use. -+ * @kbdev: kbase device to configure. -+ * @as: address space to configure. -+ * -+ * Configure the MMU using the address space details setup in the -+ * kbase_context structure. -+ */ -+void kbase_mmu_hw_configure(struct kbase_device *kbdev, -+ struct kbase_as *as); ++ katom = container_of(data, struct kbase_jd_atom, work); ++ kbase_soft_event_wait_callback(katom); ++} ++#endif /* !MALI_USE_CSF */ + -+/** -+ * kbase_mmu_hw_do_lock - Issue LOCK command to the MMU and program -+ * the LOCKADDR register. -+ * -+ * @kbdev: Kbase device to issue the MMU operation on. -+ * @as: Address space to issue the MMU operation on. -+ * @op_param: Pointer to struct containing information about the MMU -+ * operation to perform. ++const char *kbase_sync_status_string(int status) ++{ ++ if (status == 0) ++ return "active"; ++ else if (status > 0) ++ return "signaled"; ++ else ++ return "error"; ++} +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c b/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c +new file mode 100644 +index 000000000..9360324cf +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c +@@ -0,0 +1,409 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * -+ * hwaccess_lock needs to be held when calling this function. ++ * (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved. + * -+ * Return: 0 if issuing the command was successful, otherwise an error code. -+ */ -+int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, -+ const struct kbase_mmu_hw_op_param *op_param); -+ -+/** -+ * kbase_mmu_hw_do_unlock_no_addr - Issue UNLOCK command to the MMU without -+ * programming the LOCKADDR register and wait -+ * for it to complete before returning. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * @kbdev: Kbase device to issue the MMU operation on. -+ * @as: Address space to issue the MMU operation on. -+ * @op_param: Pointer to struct containing information about the MMU -+ * operation to perform. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + * -+ * This function should be called for GPU where GPU command is used to flush -+ * the cache(s) instead of MMU command. ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Return: 0 if issuing the command was successful, otherwise an error code. + */ -+int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as, -+ const struct kbase_mmu_hw_op_param *op_param); + -+/** -+ * kbase_mmu_hw_do_unlock - Issue UNLOCK command to the MMU and wait for it -+ * to complete before returning. -+ * -+ * @kbdev: Kbase device to issue the MMU operation on. -+ * @as: Address space to issue the MMU operation on. -+ * @op_param: Pointer to struct containing information about the MMU -+ * operation to perform. -+ * -+ * Return: 0 if issuing the command was successful, otherwise an error code. -+ */ -+int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as, -+ const struct kbase_mmu_hw_op_param *op_param); -+/** -+ * kbase_mmu_hw_do_flush - Issue a flush operation to the MMU. -+ * -+ * @kbdev: Kbase device to issue the MMU operation on. -+ * @as: Address space to issue the MMU operation on. -+ * @op_param: Pointer to struct containing information about the MMU -+ * operation to perform. -+ * -+ * Issue a flush operation on the address space as per the information -+ * specified inside @op_param. This function should not be called for -+ * GPUs where MMU command to flush the cache(s) is deprecated. -+ * mmu_hw_mutex needs to be held when calling this function. -+ * -+ * Return: 0 if the operation was successful, non-zero otherwise. ++/* ++ * Code for supporting explicit Linux fences (CONFIG_SYNC_FILE) + */ -+int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, -+ const struct kbase_mmu_hw_op_param *op_param); + -+/** -+ * kbase_mmu_hw_do_flush_locked - Issue a flush operation to the MMU. -+ * -+ * @kbdev: Kbase device to issue the MMU operation on. -+ * @as: Address space to issue the MMU operation on. -+ * @op_param: Pointer to struct containing information about the MMU -+ * operation to perform. -+ * -+ * Issue a flush operation on the address space as per the information -+ * specified inside @op_param. This function should not be called for -+ * GPUs where MMU command to flush the cache(s) is deprecated. -+ * Both mmu_hw_mutex and hwaccess_lock need to be held when calling this -+ * function. -+ * -+ * Return: 0 if the operation was successful, non-zero otherwise. -+ */ -+int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as, -+ const struct kbase_mmu_hw_op_param *op_param); -+ -+/** -+ * kbase_mmu_hw_do_flush_on_gpu_ctrl - Issue a flush operation to the MMU. -+ * -+ * @kbdev: Kbase device to issue the MMU operation on. -+ * @as: Address space to issue the MMU operation on. -+ * @op_param: Pointer to struct containing information about the MMU -+ * operation to perform. -+ * -+ * Issue a flush operation on the address space as per the information -+ * specified inside @op_param. GPU command is used to flush the cache(s) -+ * instead of the MMU command. -+ * -+ * Return: 0 if the operation was successful, non-zero otherwise. -+ */ -+int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as, -+ const struct kbase_mmu_hw_op_param *op_param); -+ -+/** -+ * kbase_mmu_hw_clear_fault - Clear a fault that has been previously reported by -+ * the MMU. -+ * @kbdev: kbase device to clear the fault from. -+ * @as: address space to clear the fault from. -+ * @type: The type of fault that needs to be cleared. -+ * -+ * Clear a bus error or page fault that has been reported by the MMU. -+ */ -+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, -+ enum kbase_mmu_fault_type type); -+ -+/** -+ * kbase_mmu_hw_enable_fault - Enable fault that has been previously reported by -+ * the MMU. -+ * @kbdev: kbase device to again enable the fault from. -+ * @as: address space to again enable the fault from. -+ * @type: The type of fault that needs to be enabled again. -+ * -+ * After a page fault or bus error has been reported by the MMU these -+ * will be disabled. After these are handled this function needs to be -+ * called to enable the page fault or bus error fault again. -+ */ -+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, -+ enum kbase_mmu_fault_type type); -+ -+#endif /* _KBASE_MMU_HW_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c -new file mode 100644 -index 000000000..3f6da35d8 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c -@@ -0,0 +1,699 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#if MALI_USE_CSF -+/** -+ * mmu_has_flush_skip_pgd_levels() - Check if the GPU has the feature -+ * AS_LOCKADDR_FLUSH_SKIP_LEVELS -+ * -+ * @gpu_props: GPU properties for the GPU instance. -+ * -+ * This function returns whether a cache flush can apply the skip flags of -+ * AS_LOCKADDR_FLUSH_SKIP_LEVELS. -+ * -+ * Return: True if cache flush has the said feature. -+ */ -+static bool mmu_has_flush_skip_pgd_levels(struct kbase_gpu_props const *gpu_props) -+{ -+ u32 const signature = -+ gpu_props->props.raw_props.gpu_id & (GPU_ID2_ARCH_MAJOR | GPU_ID2_ARCH_REV); ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "mali_kbase_fence_defs.h" ++#include "mali_kbase_sync.h" ++#include "mali_kbase_fence.h" ++#include "mali_kbase.h" + -+ return signature >= (u32)GPU_ID2_PRODUCT_MAKE(12, 0, 4, 0); -+} -+#endif ++static const struct file_operations stream_fops = { ++ .owner = THIS_MODULE ++}; + -+/** -+ * lock_region() - Generate lockaddr to lock memory region in MMU -+ * -+ * @gpu_props: GPU properties for finding the MMU lock region size. -+ * @lockaddr: Address and size of memory region to lock. -+ * @op_param: Pointer to a struct containing the starting page frame number of -+ * the region to lock, the number of pages to lock and page table -+ * levels to skip when flushing (if supported). -+ * -+ * The lockaddr value is a combination of the starting address and -+ * the size of the region that encompasses all the memory pages to lock. -+ * -+ * Bits 5:0 are used to represent the size, which must be a power of 2. -+ * The smallest amount of memory to be locked corresponds to 32 kB, -+ * i.e. 8 memory pages, because a MMU cache line is made of 64 bytes -+ * and every page table entry is 8 bytes. Therefore it is not possible -+ * to lock less than 8 memory pages at a time. -+ * -+ * The size is expressed as a logarithm minus one: -+ * - A value of 14 is thus interpreted as log(32 kB) = 15, where 32 kB -+ * is the smallest possible size. -+ * - Likewise, a value of 47 is interpreted as log(256 TB) = 48, where 256 TB -+ * is the largest possible size (implementation defined value according -+ * to the HW spec). -+ * -+ * Bits 11:6 are reserved. -+ * -+ * Bits 63:12 are used to represent the base address of the region to lock. -+ * Only the upper bits of the address are used; lowest bits are cleared -+ * to avoid confusion. -+ * -+ * The address is aligned to a multiple of the region size. This has profound -+ * implications on the region size itself: often the MMU will lock a region -+ * larger than the given number of pages, because the lock region cannot start -+ * from any arbitrary address. -+ * -+ * Return: 0 if success, or an error code on failure. -+ */ -+static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr, -+ const struct kbase_mmu_hw_op_param *op_param) ++int kbase_sync_fence_stream_create(const char *name, int *const out_fd) +{ -+ const u64 lockaddr_base = op_param->vpfn << PAGE_SHIFT; -+ const u64 lockaddr_end = ((op_param->vpfn + op_param->nr) << PAGE_SHIFT) - 1; -+ u64 lockaddr_size_log2; -+ -+ if (op_param->nr == 0) ++ if (!out_fd) + return -EINVAL; + -+ /* The MMU lock region is a self-aligned region whose size -+ * is a power of 2 and that contains both start and end -+ * of the address range determined by pfn and num_pages. -+ * The size of the MMU lock region can be defined as the -+ * largest divisor that yields the same result when both -+ * start and end addresses are divided by it. -+ * -+ * For instance: pfn=0x4F000 num_pages=2 describe the -+ * address range between 0x4F000 and 0x50FFF. It is only -+ * 2 memory pages. However there isn't a single lock region -+ * of 8 kB that encompasses both addresses because 0x4F000 -+ * would fall into the [0x4E000, 0x4FFFF] region while -+ * 0x50000 would fall into the [0x50000, 0x51FFF] region. -+ * The minimum lock region size that includes the entire -+ * address range is 128 kB, and the region would be -+ * [0x40000, 0x5FFFF]. -+ * -+ * The region size can be found by comparing the desired -+ * start and end addresses and finding the highest bit -+ * that differs. The smallest naturally aligned region -+ * must include this bit change, hence the desired region -+ * starts with this bit (and subsequent bits) set to 0 -+ * and ends with the bit (and subsequent bits) set to 1. -+ * -+ * In the example above: 0x4F000 ^ 0x50FFF = 0x1FFFF -+ * therefore the highest bit that differs is bit #16 -+ * and the region size (as a logarithm) is 16 + 1 = 17, i.e. 128 kB. -+ */ -+ lockaddr_size_log2 = fls64(lockaddr_base ^ lockaddr_end); -+ -+ /* Cap the size against minimum and maximum values allowed. */ -+ if (lockaddr_size_log2 > KBASE_LOCK_REGION_MAX_SIZE_LOG2) ++ *out_fd = anon_inode_getfd(name, &stream_fops, NULL, ++ O_RDONLY | O_CLOEXEC); ++ if (*out_fd < 0) + return -EINVAL; + -+ lockaddr_size_log2 = -+ MAX(lockaddr_size_log2, kbase_get_lock_region_min_size_log2(gpu_props)); -+ -+ /* Represent the result in a way that is compatible with HW spec. -+ * -+ * Upper bits are used for the base address, whose lower bits -+ * are cleared to avoid confusion because they are going to be ignored -+ * by the MMU anyway, since lock regions shall be aligned with -+ * a multiple of their size and cannot start from any address. -+ * -+ * Lower bits are used for the size, which is represented as -+ * logarithm minus one of the actual size. -+ */ -+ *lockaddr = lockaddr_base & ~((1ull << lockaddr_size_log2) - 1); -+ *lockaddr |= lockaddr_size_log2 - 1; -+ -+#if MALI_USE_CSF -+ if (mmu_has_flush_skip_pgd_levels(gpu_props)) -+ *lockaddr = -+ AS_LOCKADDR_FLUSH_SKIP_LEVELS_SET(*lockaddr, op_param->flush_skip_levels); -+#endif -+ + return 0; +} + -+/** -+ * wait_ready() - Wait for previously issued MMU command to complete. -+ * -+ * @kbdev: Kbase device to wait for a MMU command to complete. -+ * @as_nr: Address space to wait for a MMU command to complete. -+ * -+ * Reset GPU if the wait for previously issued command fails. -+ * -+ * Return: 0 on successful completion. negative error on failure. -+ */ -+static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr) ++#if !MALI_USE_CSF ++int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd) +{ -+ const ktime_t wait_loop_start = ktime_get_raw(); -+ const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms; -+ s64 diff; -+ -+ if (unlikely(kbdev->as[as_nr].is_unresponsive)) -+ return -EBUSY; -+ -+ do { -+ unsigned int i; -+ -+ for (i = 0; i < 1000; i++) { -+ /* Wait for the MMU status to indicate there is no active command */ -+ if (!(kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) & -+ AS_STATUS_AS_ACTIVE)) -+ return 0; -+ } -+ -+ diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start)); -+ } while (diff < mmu_as_inactive_wait_time_ms); -+ -+ dev_err(kbdev->dev, -+ "AS_ACTIVE bit stuck for as %u. Might be caused by unstable GPU clk/pwr or faulty system", -+ as_nr); -+ kbdev->as[as_nr].is_unresponsive = true; -+ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) -+ kbase_reset_gpu_locked(kbdev); ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence *fence; ++#else ++ struct dma_fence *fence; ++#endif ++ struct sync_file *sync_file; ++ int fd; + -+ return -ETIMEDOUT; -+} ++ fence = kbase_fence_out_new(katom); ++ if (!fence) ++ return -ENOMEM; + -+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd) -+{ -+ /* write AS_COMMAND when MMU is ready to accept another command */ -+ const int status = wait_ready(kbdev, as_nr); ++#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) ++ /* Take an extra reference to the fence on behalf of the sync_file. ++ * This is only needed on older kernels where sync_file_create() ++ * does not take its own reference. This was changed in v4.9.68, ++ * where sync_file_create() now takes its own reference. ++ */ ++ dma_fence_get(fence); ++#endif + -+ if (likely(status == 0)) -+ kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd); -+ else if (status == -EBUSY) { -+ dev_dbg(kbdev->dev, -+ "Skipped the wait for AS_ACTIVE bit for as %u, before sending MMU command %u", -+ as_nr, cmd); -+ } else { -+ dev_err(kbdev->dev, -+ "Wait for AS_ACTIVE bit failed for as %u, before sending MMU command %u", -+ as_nr, cmd); ++ /* create a sync_file fd representing the fence */ ++ sync_file = sync_file_create(fence); ++ if (!sync_file) { ++#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) ++ dma_fence_put(fence); ++#endif ++ kbase_fence_out_remove(katom); ++ return -ENOMEM; + } + -+ return status; -+} -+ -+#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+static int wait_cores_power_trans_complete(struct kbase_device *kbdev) -+{ -+#define WAIT_TIMEOUT 1000 /* 1ms timeout */ -+#define DELAY_TIME_IN_US 1 -+ const int max_iterations = WAIT_TIMEOUT; -+ int loop; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ for (loop = 0; loop < max_iterations; loop++) { -+ u32 lo = -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_LO)); -+ u32 hi = -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_HI)); -+ -+ if (!lo && !hi) -+ break; -+ -+ udelay(DELAY_TIME_IN_US); ++ fd = get_unused_fd_flags(O_CLOEXEC); ++ if (fd < 0) { ++ fput(sync_file->file); ++ kbase_fence_out_remove(katom); ++ return fd; + } + -+ if (loop == max_iterations) { -+ dev_warn(kbdev->dev, "SHADER_PWRTRANS set for too long"); -+ return -ETIMEDOUT; -+ } ++ fd_install(fd, sync_file->file); + -+ return 0; ++ return fd; +} + -+/** -+ * apply_hw_issue_GPU2019_3901_wa - Apply WA for the HW issue GPU2019_3901 -+ * -+ * @kbdev: Kbase device to issue the MMU operation on. -+ * @mmu_cmd: Pointer to the variable contain the value of MMU command -+ * that needs to be sent to flush the L2 cache and do an -+ * implicit unlock. -+ * @as_nr: Address space number for which MMU command needs to be -+ * sent. -+ * -+ * This function ensures that the flush of LSC is not missed for the pages that -+ * were unmapped from the GPU, due to the power down transition of shader cores. -+ * -+ * Return: 0 if the WA was successfully applied, non-zero otherwise. -+ */ -+static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev, u32 *mmu_cmd, -+ unsigned int as_nr) ++int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) +{ -+ int ret = 0; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ /* Check if L2 is OFF. The cores also must be OFF if L2 is not up, so -+ * the workaround can be safely skipped. -+ */ -+ if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) { -+ if (*mmu_cmd != AS_COMMAND_FLUSH_MEM) { -+ dev_warn(kbdev->dev, -+ "Unexpected mmu command received"); -+ return -EINVAL; -+ } -+ -+ /* Wait for the LOCK MMU command to complete, issued by the caller */ -+ ret = wait_ready(kbdev, as_nr); -+ if (unlikely(ret)) -+ return ret; ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence *fence = sync_file_get_fence(fd); ++#else ++ struct dma_fence *fence = sync_file_get_fence(fd); ++#endif + -+ ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, -+ GPU_COMMAND_CACHE_CLN_INV_LSC); -+ if (unlikely(ret)) -+ return ret; ++ lockdep_assert_held(&katom->kctx->jctx.lock); + -+ ret = wait_cores_power_trans_complete(kbdev); -+ if (unlikely(ret)) { -+ if (kbase_prepare_to_reset_gpu_locked(kbdev, -+ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) -+ kbase_reset_gpu_locked(kbdev); -+ return ret; -+ } ++ if (!fence) ++ return -ENOENT; + -+ /* As LSC is guaranteed to have been flushed we can use FLUSH_PT -+ * MMU command to only flush the L2. -+ */ -+ *mmu_cmd = AS_COMMAND_FLUSH_PT; -+ } ++ kbase_fence_fence_in_set(katom, fence); ++ katom->dma_fence.fence_cb_added = false; + -+ return ret; ++ return 0; +} -+#endif ++#endif /* !MALI_USE_CSF */ + -+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) ++int kbase_sync_fence_validate(int fd) +{ -+ struct kbase_mmu_setup *current_setup = &as->current_setup; -+ u64 transcfg = 0; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ lockdep_assert_held(&kbdev->mmu_hw_mutex); -+ -+ transcfg = current_setup->transcfg; -+ -+ /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK -+ * Clear PTW_MEMATTR bits -+ */ -+ transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; -+ /* Enable correct PTW_MEMATTR bits */ -+ transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; -+ /* Ensure page-tables reads use read-allocate cache-policy in -+ * the L2 -+ */ -+ transcfg |= AS_TRANSCFG_R_ALLOCATE; -+ -+ if (kbdev->system_coherency != COHERENCY_NONE) { -+ /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) -+ * Clear PTW_SH bits -+ */ -+ transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); -+ /* Enable correct PTW_SH bits */ -+ transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); -+ } -+ -+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), -+ transcfg); -+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), -+ (transcfg >> 32) & 0xFFFFFFFFUL); -+ -+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), -+ current_setup->transtab & 0xFFFFFFFFUL); -+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI), -+ (current_setup->transtab >> 32) & 0xFFFFFFFFUL); -+ -+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO), -+ current_setup->memattr & 0xFFFFFFFFUL); -+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI), -+ (current_setup->memattr >> 32) & 0xFFFFFFFFUL); -+ -+ KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(kbdev, as, -+ current_setup->transtab, -+ current_setup->memattr, -+ transcfg); -+ -+ write_cmd(kbdev, as->number, AS_COMMAND_UPDATE); -+#if MALI_USE_CSF -+ /* Wait for UPDATE command to complete */ -+ wait_ready(kbdev, as->number); ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence *fence = sync_file_get_fence(fd); ++#else ++ struct dma_fence *fence = sync_file_get_fence(fd); +#endif -+} + -+/** -+ * mmu_command_instr - Record an MMU command for instrumentation purposes. -+ * -+ * @kbdev: Kbase device used to issue MMU operation on. -+ * @kctx_id: Kernel context ID for MMU command tracepoint. -+ * @cmd: Command issued to the MMU. -+ * @lock_addr: Address of memory region locked for the operation. -+ * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. -+ */ -+static void mmu_command_instr(struct kbase_device *kbdev, u32 kctx_id, u32 cmd, u64 lock_addr, -+ enum kbase_caller_mmu_sync_info mmu_sync_info) -+{ -+ u64 lock_addr_base = AS_LOCKADDR_LOCKADDR_BASE_GET(lock_addr); -+ u32 lock_addr_size = AS_LOCKADDR_LOCKADDR_SIZE_GET(lock_addr); ++ if (!fence) ++ return -EINVAL; + -+ bool is_mmu_synchronous = (mmu_sync_info == CALLER_MMU_SYNC); ++ dma_fence_put(fence); + -+ KBASE_TLSTREAM_AUX_MMU_COMMAND(kbdev, kctx_id, cmd, is_mmu_synchronous, lock_addr_base, -+ lock_addr_size); ++ return 0; /* valid */ +} + -+/* Helper function to program the LOCKADDR register before LOCK/UNLOCK command -+ * is issued. -+ */ -+static int mmu_hw_set_lock_addr(struct kbase_device *kbdev, int as_nr, u64 *lock_addr, -+ const struct kbase_mmu_hw_op_param *op_param) ++#if !MALI_USE_CSF ++enum base_jd_event_code ++kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) +{ -+ int ret; -+ -+ ret = lock_region(&kbdev->gpu_props, lock_addr, op_param); ++ int res; + -+ if (!ret) { -+ /* Set the region that needs to be updated */ -+ kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_LO), -+ *lock_addr & 0xFFFFFFFFUL); -+ kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_HI), -+ (*lock_addr >> 32) & 0xFFFFFFFFUL); ++ if (!kbase_fence_out_is_ours(katom)) { ++ /* Not our fence */ ++ return BASE_JD_EVENT_JOB_CANCELLED; + } -+ return ret; -+} -+ -+/** -+ * mmu_hw_do_lock_no_wait - Issue LOCK command to the MMU and return without -+ * waiting for it's completion. -+ * -+ * @kbdev: Kbase device to issue the MMU operation on. -+ * @as: Address space to issue the MMU operation on. -+ * @lock_addr: Address of memory region locked for this operation. -+ * @op_param: Pointer to a struct containing information about the MMU operation. -+ * -+ * Return: 0 if issuing the command was successful, otherwise an error code. -+ */ -+static int mmu_hw_do_lock_no_wait(struct kbase_device *kbdev, struct kbase_as *as, u64 *lock_addr, -+ const struct kbase_mmu_hw_op_param *op_param) -+{ -+ int ret; + -+ ret = mmu_hw_set_lock_addr(kbdev, as->number, lock_addr, op_param); ++ res = kbase_fence_out_signal(katom, result); ++ if (unlikely(res < 0)) { ++ dev_warn(katom->kctx->kbdev->dev, ++ "fence_signal() failed with %d\n", res); ++ } + -+ if (likely(!ret)) -+ ret = write_cmd(kbdev, as->number, AS_COMMAND_LOCK); ++ kbase_sync_fence_out_remove(katom); + -+ return ret; ++ return (result != 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE; +} + -+/** -+ * mmu_hw_do_lock - Issue LOCK command to the MMU and wait for its completion. -+ * -+ * @kbdev: Kbase device to issue the MMU operation on. -+ * @as: Address space to issue the MMU operation on. -+ * @op_param: Pointer to a struct containing information about the MMU operation. -+ * -+ * Return: 0 if issuing the LOCK command was successful, otherwise an error code. -+ */ -+static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, -+ const struct kbase_mmu_hw_op_param *op_param) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++static void kbase_fence_wait_callback(struct fence *fence, ++ struct fence_cb *cb) ++#else ++static void kbase_fence_wait_callback(struct dma_fence *fence, ++ struct dma_fence_cb *cb) ++#endif +{ -+ int ret; -+ u64 lock_addr = 0x0; -+ -+ if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) -+ return -EINVAL; -+ -+ ret = mmu_hw_do_lock_no_wait(kbdev, as, &lock_addr, op_param); ++ struct kbase_jd_atom *katom = container_of(cb, struct kbase_jd_atom, ++ dma_fence.fence_cb); ++ struct kbase_context *kctx = katom->kctx; + -+ if (!ret) -+ ret = wait_ready(kbdev, as->number); ++ /* Cancel atom if fence is erroneous */ ++ if (dma_fence_is_signaled(katom->dma_fence.fence_in) && ++#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \ ++ (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ ++ KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) ++ katom->dma_fence.fence_in->error < 0) ++#else ++ katom->dma_fence.fence_in->status < 0) ++#endif ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + -+ if (!ret) -+ mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_LOCK, lock_addr, -+ op_param->mmu_sync_info); + -+ return ret; ++ /* To prevent a potential deadlock we schedule the work onto the ++ * job_done_wq workqueue ++ * ++ * The issue is that we may signal the timeline while holding ++ * kctx->jctx.lock and the callbacks are run synchronously from ++ * sync_timeline_signal. So we simply defer the work. ++ */ ++ INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); ++ queue_work(kctx->jctx.job_done_wq, &katom->work); +} + -+int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, -+ const struct kbase_mmu_hw_op_param *op_param) ++int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ return mmu_hw_do_lock(kbdev, as, op_param); -+} ++ int err; ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence *fence; ++#else ++ struct dma_fence *fence; ++#endif + -+int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as, -+ const struct kbase_mmu_hw_op_param *op_param) -+{ -+ int ret = 0; ++ lockdep_assert_held(&katom->kctx->jctx.lock); + -+ if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) -+ return -EINVAL; ++ fence = katom->dma_fence.fence_in; ++ if (!fence) ++ return 0; /* no input fence to wait for, good to go! */ + -+ ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK); ++ err = dma_fence_add_callback(fence, &katom->dma_fence.fence_cb, ++ kbase_fence_wait_callback); ++ if (err == -ENOENT) { ++ int fence_status = dma_fence_get_status(fence); + -+ /* Wait for UNLOCK command to complete */ -+ if (likely(!ret)) -+ ret = wait_ready(kbdev, as->number); ++ if (fence_status == 1) { ++ /* Fence is already signaled with no error. The completion ++ * for FENCE_WAIT softjob can be done right away. ++ */ ++ return 0; ++ } + -+ if (likely(!ret)) { -+ u64 lock_addr = 0x0; -+ /* read MMU_AS_CONTROL.LOCKADDR register */ -+ lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI)) -+ << 32; -+ lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO)); ++ /* Fence shouldn't be in not signaled state */ ++ if (!fence_status) { ++ struct kbase_sync_fence_info info; + -+ mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_UNLOCK, -+ lock_addr, op_param->mmu_sync_info); -+ } ++ kbase_sync_fence_in_info_get(katom, &info); + -+ return ret; -+} ++ dev_warn(katom->kctx->kbdev->dev, ++ "Unexpected status for fence %s of ctx:%d_%d atom:%d", ++ info.name, katom->kctx->tgid, katom->kctx->id, ++ kbase_jd_atom_id(katom->kctx, katom)); ++ } + -+int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as, -+ const struct kbase_mmu_hw_op_param *op_param) -+{ -+ int ret = 0; -+ u64 lock_addr = 0x0; ++ /* If fence is signaled with an error, then the FENCE_WAIT softjob is ++ * considered to be failed. ++ */ ++ } + -+ if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) -+ return -EINVAL; ++ if (unlikely(err)) { ++ /* We should cause the dependent jobs in the bag to be failed. */ ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + -+ ret = mmu_hw_set_lock_addr(kbdev, as->number, &lock_addr, op_param); ++ /* The completion for FENCE_WAIT softjob can be done right away. */ ++ return 0; ++ } + -+ if (!ret) -+ ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, -+ op_param); ++ /* Callback was successfully installed */ ++ katom->dma_fence.fence_cb_added = true; + -+ return ret; ++ /* Completion to be done later by callback/worker */ ++ return 1; +} + -+/** -+ * mmu_hw_do_flush - Flush MMU and wait for its completion. -+ * -+ * @kbdev: Kbase device to issue the MMU operation on. -+ * @as: Address space to issue the MMU operation on. -+ * @op_param: Pointer to a struct containing information about the MMU operation. -+ * @hwaccess_locked: Flag to indicate if the lock has been held. -+ * -+ * Return: 0 if flushing MMU was successful, otherwise an error code. -+ */ -+static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, -+ const struct kbase_mmu_hw_op_param *op_param, bool hwaccess_locked) ++void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) +{ -+ int ret; -+ u64 lock_addr = 0x0; -+ u32 mmu_cmd = AS_COMMAND_FLUSH_MEM; -+ -+ if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) -+ return -EINVAL; -+ -+ /* MMU operations can be either FLUSH_PT or FLUSH_MEM, anything else at -+ * this point would be unexpected. -+ */ -+ if (op_param->op != KBASE_MMU_OP_FLUSH_PT && -+ op_param->op != KBASE_MMU_OP_FLUSH_MEM) { -+ dev_err(kbdev->dev, "Unexpected flush operation received"); -+ return -EINVAL; -+ } -+ -+ lockdep_assert_held(&kbdev->mmu_hw_mutex); -+ -+ if (op_param->op == KBASE_MMU_OP_FLUSH_PT) -+ mmu_cmd = AS_COMMAND_FLUSH_PT; -+ -+ /* Lock the region that needs to be updated */ -+ ret = mmu_hw_do_lock_no_wait(kbdev, as, &lock_addr, op_param); -+ if (ret) -+ return ret; -+ -+#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) -+ /* WA for the BASE_HW_ISSUE_GPU2019_3901. */ -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3901) && -+ mmu_cmd == AS_COMMAND_FLUSH_MEM) { -+ if (!hwaccess_locked) { -+ unsigned long flags = 0; ++ lockdep_assert_held(&katom->kctx->jctx.lock); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd, as->number); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } else { -+ ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd, as->number); ++ if (katom->dma_fence.fence_cb_added) { ++ if (!dma_fence_remove_callback(katom->dma_fence.fence_in, ++ &katom->dma_fence.fence_cb)) { ++ /* The callback is already removed so leave the cleanup ++ * for kbase_fence_wait_callback. ++ */ ++ return; + } ++ } else { ++ struct kbase_sync_fence_info info; + -+ if (ret) -+ return ret; ++ kbase_sync_fence_in_info_get(katom, &info); ++ dev_warn(katom->kctx->kbdev->dev, ++ "Callback was not added earlier for fence %s of ctx:%d_%d atom:%d", ++ info.name, katom->kctx->tgid, katom->kctx->id, ++ kbase_jd_atom_id(katom->kctx, katom)); + } -+#endif -+ -+ ret = write_cmd(kbdev, as->number, mmu_cmd); + -+ /* Wait for the command to complete */ -+ if (likely(!ret)) -+ ret = wait_ready(kbdev, as->number); ++ /* Wait was cancelled - zap the atoms */ ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + -+ if (likely(!ret)) -+ mmu_command_instr(kbdev, op_param->kctx_id, mmu_cmd, lock_addr, -+ op_param->mmu_sync_info); ++ kbasep_remove_waiting_soft_job(katom); ++ kbase_finish_soft_job(katom); + -+ return ret; ++ if (kbase_jd_done_nolock(katom, true)) ++ kbase_js_sched_all(katom->kctx->kbdev); +} + -+int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as, -+ const struct kbase_mmu_hw_op_param *op_param) ++void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ return mmu_hw_do_flush(kbdev, as, op_param, true); ++ kbase_fence_out_remove(katom); +} + -+int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, -+ const struct kbase_mmu_hw_op_param *op_param) ++void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom) +{ -+ return mmu_hw_do_flush(kbdev, as, op_param, false); -+} ++ lockdep_assert_held(&katom->kctx->jctx.lock); + -+int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as, -+ const struct kbase_mmu_hw_op_param *op_param) -+{ -+ int ret, ret2; -+ u32 gpu_cmd = GPU_COMMAND_CACHE_CLN_INV_L2_LSC; ++ if (katom->dma_fence.fence_cb_added) { ++ bool removed = dma_fence_remove_callback(katom->dma_fence.fence_in, ++ &katom->dma_fence.fence_cb); + -+ if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) -+ return -EINVAL; ++ /* Here it is expected that the callback should have already been removed ++ * previously either by kbase_sync_fence_in_cancel_wait() or when the fence ++ * was signaled and kbase_sync_fence_wait_worker() was called. ++ */ ++ if (removed) { ++ struct kbase_sync_fence_info info; + -+ /* MMU operations can be either FLUSH_PT or FLUSH_MEM, anything else at -+ * this point would be unexpected. -+ */ -+ if (op_param->op != KBASE_MMU_OP_FLUSH_PT && -+ op_param->op != KBASE_MMU_OP_FLUSH_MEM) { -+ dev_err(kbdev->dev, "Unexpected flush operation received"); -+ return -EINVAL; ++ kbase_sync_fence_in_info_get(katom, &info); ++ dev_warn(katom->kctx->kbdev->dev, ++ "Callback was not removed earlier for fence %s of ctx:%d_%d atom:%d", ++ info.name, katom->kctx->tgid, katom->kctx->id, ++ kbase_jd_atom_id(katom->kctx, katom)); ++ } + } + -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ lockdep_assert_held(&kbdev->mmu_hw_mutex); -+ -+ if (op_param->op == KBASE_MMU_OP_FLUSH_PT) -+ gpu_cmd = GPU_COMMAND_CACHE_CLN_INV_L2; -+ -+ /* 1. Issue MMU_AS_CONTROL.COMMAND.LOCK operation. */ -+ ret = mmu_hw_do_lock(kbdev, as, op_param); -+ if (ret) -+ return ret; ++ kbase_fence_in_remove(katom); ++ katom->dma_fence.fence_cb_added = false; ++} ++#endif /* !MALI_USE_CSF */ + -+ /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHES operation */ -+ ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, gpu_cmd); ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++void kbase_sync_fence_info_get(struct fence *fence, ++ struct kbase_sync_fence_info *info) ++#else ++void kbase_sync_fence_info_get(struct dma_fence *fence, ++ struct kbase_sync_fence_info *info) ++#endif ++{ ++ info->fence = fence; + -+ /* 3. Issue MMU_AS_CONTROL.COMMAND.UNLOCK operation. */ -+ ret2 = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, op_param); ++ /* Translate into the following status, with support for error handling: ++ * < 0 : error ++ * 0 : active ++ * 1 : signaled ++ */ ++ if (dma_fence_is_signaled(fence)) { ++#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \ ++ (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ ++ KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) ++ int status = fence->error; ++#else ++ int status = fence->status; ++#endif ++ if (status < 0) ++ info->status = status; /* signaled with error */ ++ else ++ info->status = 1; /* signaled with success */ ++ } else { ++ info->status = 0; /* still active (unsignaled) */ ++ } + -+ return ret ?: ret2; ++#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) ++ scnprintf(info->name, sizeof(info->name), "%llu#%u", ++ fence->context, fence->seqno); ++#else ++ scnprintf(info->name, sizeof(info->name), "%llu#%llu", ++ fence->context, fence->seqno); ++#endif +} + -+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, -+ enum kbase_mmu_fault_type type) ++#if !MALI_USE_CSF ++int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, ++ struct kbase_sync_fence_info *info) +{ -+ unsigned long flags; -+ u32 pf_bf_mask; ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence *fence; ++#else ++ struct dma_fence *fence; ++#endif + -+ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); ++ fence = kbase_fence_in_get(katom); ++ if (!fence) ++ return -ENOENT; + -+ /* -+ * A reset is in-flight and we're flushing the IRQ + bottom half -+ * so don't update anything as it could race with the reset code. -+ */ -+ if (kbdev->irq_reset_flush) -+ goto unlock; ++ kbase_sync_fence_info_get(fence, info); + -+ /* Clear the page (and bus fault IRQ as well in case one occurred) */ -+ pf_bf_mask = MMU_PAGE_FAULT(as->number); -+#if !MALI_USE_CSF -+ if (type == KBASE_MMU_FAULT_TYPE_BUS || -+ type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) -+ pf_bf_mask |= MMU_BUS_ERROR(as->number); -+#endif -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask); ++ kbase_fence_put(fence); + -+unlock: -+ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); ++ return 0; +} + -+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, -+ enum kbase_mmu_fault_type type) ++int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, ++ struct kbase_sync_fence_info *info) +{ -+ unsigned long flags; -+ u32 irq_mask; ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence *fence; ++#else ++ struct dma_fence *fence; ++#endif + -+ /* Enable the page fault IRQ -+ * (and bus fault IRQ as well in case one occurred) -+ */ -+ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); ++ fence = kbase_fence_out_get(katom); ++ if (!fence) ++ return -ENOENT; + -+ /* -+ * A reset is in-flight and we're flushing the IRQ + bottom half -+ * so don't update anything as it could race with the reset code. -+ */ -+ if (kbdev->irq_reset_flush) -+ goto unlock; ++ kbase_sync_fence_info_get(fence, info); + -+ irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)) | -+ MMU_PAGE_FAULT(as->number); ++ kbase_fence_put(fence); + -+#if !MALI_USE_CSF -+ if (type == KBASE_MMU_FAULT_TYPE_BUS || -+ type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) -+ irq_mask |= MMU_BUS_ERROR(as->number); -+#endif -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask); ++ return 0; ++} + -+unlock: -+ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); ++ ++#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG ++void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom) ++{ ++ /* Not implemented */ +} -diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h ++#endif ++#endif /* !MALI_USE_CSF*/ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.c b/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.c new file mode 100644 -index 000000000..9d7ce4856 +index 000000000..7df7d79b6 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h -@@ -0,0 +1,72 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.c +@@ -0,0 +1,225 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -258632,344 +259151,220 @@ index 000000000..9d7ce4856 + * + */ + -+#ifndef _KBASE_MMU_INTERNAL_H_ -+#define _KBASE_MMU_INTERNAL_H_ -+ -+void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, -+ struct kbase_mmu_setup * const setup); -+ -+/** -+ * kbase_mmu_report_mcu_as_fault_and_reset - Report page fault for all -+ * address spaces and reset the GPU. -+ * @kbdev: The kbase_device the fault happened on -+ * @fault: Data relating to the fault -+ */ -+void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, -+ struct kbase_fault *fault); -+ -+void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, -+ struct kbase_as *as, struct kbase_fault *fault); -+ -+void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, -+ struct kbase_as *as, const char *reason_str, -+ struct kbase_fault *fault); -+ -+/** -+ * kbase_mmu_switch_to_ir() - Switch to incremental rendering if possible -+ * @kctx: kbase_context for the faulting address space. -+ * @reg: of a growable GPU memory region in the same context. -+ * Takes ownership of the reference if successful. -+ * -+ * Used to switch to incremental rendering if we have nearly run out of -+ * virtual address space in a growable memory region. -+ * -+ * Return: 0 if successful, otherwise a negative error code. -+ */ -+int kbase_mmu_switch_to_ir(struct kbase_context *kctx, -+ struct kbase_va_region *reg); ++#include ++#include ++#include ++#include + +/** -+ * kbase_mmu_page_fault_worker() - Process a page fault. ++ * struct kbase_dma_buf - Object instantiated when a dma-buf imported allocation ++ * is mapped to GPU for the first time within a process. ++ * Another instantiation is done for the case when that ++ * allocation is mapped for the first time to GPU. + * -+ * @data: work_struct passed by queue_work() ++ * @dma_buf: Reference to dma_buf been imported. ++ * @dma_buf_node: Link node to maintain a rb_tree of kbase_dma_buf. ++ * @import_count: The number of times the dma_buf was imported. + */ -+void kbase_mmu_page_fault_worker(struct work_struct *data); ++struct kbase_dma_buf { ++ struct dma_buf *dma_buf; ++ struct rb_node dma_buf_node; ++ u32 import_count; ++}; + +/** -+ * kbase_mmu_bus_fault_worker() - Process a bus fault. -+ * -+ * @data: work_struct passed by queue_work() -+ */ -+void kbase_mmu_bus_fault_worker(struct work_struct *data); -+ -+#endif /* _KBASE_MMU_INTERNAL_H_ */ -diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c -new file mode 100644 -index 000000000..1464320cb ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c -@@ -0,0 +1,209 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * kbase_delete_dma_buf_mapping - Delete a dma buffer mapping. + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * @kctx: Pointer to kbase context. ++ * @dma_buf: Pointer to a dma buffer mapping. ++ * @tree: Pointer to root of rb_tree containing the dma_buf's mapped. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * when we un-map any dma mapping we need to remove them from rb_tree, ++ * rb_tree is maintained at kbase_device level and kbase_process level ++ * by passing the root of kbase_device or kbase_process we can remove ++ * the node from the tree. + * ++ * Return: true on success. + */ -+ -+#include "mali_kbase.h" -+#include -+#include "mali_kbase_defs.h" -+#include -+#include -+ -+#define ENTRY_TYPE_MASK 3ULL -+/* For valid ATEs bit 1 = ((level == 3) ? 1 : 0). -+ * Valid ATE entries at level 3 are flagged with the value 3. -+ * Valid ATE entries at level 0-2 are flagged with the value 1. -+ */ -+#define ENTRY_IS_ATE_L3 3ULL -+#define ENTRY_IS_ATE_L02 1ULL -+#define ENTRY_IS_INVAL 2ULL -+#define ENTRY_IS_PTE 3ULL -+ -+#define ENTRY_ACCESS_RW (1ULL << 6) /* bits 6:7 */ -+#define ENTRY_ACCESS_RO (3ULL << 6) -+#define ENTRY_ACCESS_BIT (1ULL << 10) -+#define ENTRY_NX_BIT (1ULL << 54) -+ -+#define UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR (55) -+#define VALID_ENTRY_MASK ((u64)0xF << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR) -+ -+/* Helper Function to perform assignment of page table entries, to -+ * ensure the use of strd, which is required on LPAE systems. -+ */ -+static inline void page_table_entry_set(u64 *pte, u64 phy) ++static bool kbase_delete_dma_buf_mapping(struct kbase_context *kctx, ++ struct dma_buf *dma_buf, ++ struct rb_root *tree) +{ -+ WRITE_ONCE(*pte, phy); -+} ++ struct kbase_dma_buf *buf_node = NULL; ++ struct rb_node *node = tree->rb_node; ++ bool mapping_removed = false; + -+static void mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, int as_nr) -+{ -+ struct kbase_as *as; -+ struct kbase_mmu_setup *current_setup; ++ lockdep_assert_held(&kctx->kbdev->dma_buf_lock); + -+ if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID)) -+ return; ++ while (node) { ++ buf_node = rb_entry(node, struct kbase_dma_buf, dma_buf_node); + -+ as = &kbdev->as[as_nr]; -+ current_setup = &as->current_setup; ++ if (dma_buf == buf_node->dma_buf) { ++ WARN_ON(!buf_node->import_count); + -+ kbase_mmu_get_as_setup(mmut, current_setup); ++ buf_node->import_count--; + -+ /* Apply the address space setting */ -+ kbase_mmu_hw_configure(kbdev, as); -+} ++ if (!buf_node->import_count) { ++ rb_erase(&buf_node->dma_buf_node, tree); ++ kfree(buf_node); ++ mapping_removed = true; ++ } + -+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) -+{ -+ struct kbase_as *const as = &kbdev->as[as_nr]; -+ struct kbase_mmu_setup *const current_setup = &as->current_setup; ++ break; ++ } + -+ current_setup->transtab = 0ULL; -+ current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED; ++ if (dma_buf < buf_node->dma_buf) ++ node = node->rb_left; ++ else ++ node = node->rb_right; ++ } + -+ /* Apply the address space setting */ -+ kbase_mmu_hw_configure(kbdev, as); ++ WARN_ON(!buf_node); ++ return mapping_removed; +} + -+static phys_addr_t pte_to_phy_addr(u64 entry) ++/** ++ * kbase_capture_dma_buf_mapping - capture a dma buffer mapping. ++ * ++ * @kctx: Pointer to kbase context. ++ * @dma_buf: Pointer to a dma buffer mapping. ++ * @root: Pointer to root of rb_tree containing the dma_buf's. ++ * ++ * We maintain a kbase_device level and kbase_process level rb_tree ++ * of all unique dma_buf's mapped to gpu memory. So when attach any ++ * dma_buf add it the rb_tree's. To add the unique mapping we need ++ * check if the mapping is not a duplicate and then add them. ++ * ++ * Return: true on success ++ */ ++static bool kbase_capture_dma_buf_mapping(struct kbase_context *kctx, ++ struct dma_buf *dma_buf, ++ struct rb_root *root) +{ -+ if (!(entry & 1)) -+ return 0; ++ struct kbase_dma_buf *buf_node = NULL; ++ struct rb_node *node = root->rb_node; ++ bool unique_buf_imported = true; + -+ entry &= ~VALID_ENTRY_MASK; -+ return entry & ~0xFFF; -+} ++ lockdep_assert_held(&kctx->kbdev->dma_buf_lock); + -+static int ate_is_valid(u64 ate, int const level) -+{ -+ if (level == MIDGARD_MMU_BOTTOMLEVEL) -+ return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L3); -+ else -+ return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L02); -+} ++ while (node) { ++ buf_node = rb_entry(node, struct kbase_dma_buf, dma_buf_node); + -+static int pte_is_valid(u64 pte, int const level) -+{ -+ /* PTEs cannot exist at the bottom level */ -+ if (level == MIDGARD_MMU_BOTTOMLEVEL) -+ return false; -+ return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE); -+} ++ if (dma_buf == buf_node->dma_buf) { ++ unique_buf_imported = false; ++ break; ++ } + -+/* -+ * Map KBASE_REG flags to MMU flags -+ */ -+static u64 get_mmu_flags(unsigned long flags) -+{ -+ u64 mmu_flags; ++ if (dma_buf < buf_node->dma_buf) ++ node = node->rb_left; ++ else ++ node = node->rb_right; ++ } + -+ /* store mem_attr index as 4:2 (macro called ensures 3 bits already) */ -+ mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2; ++ if (unique_buf_imported) { ++ struct kbase_dma_buf *new_buf_node = ++ kzalloc(sizeof(*new_buf_node), GFP_KERNEL); + -+ /* Set access flags - note that AArch64 stage 1 does not support -+ * write-only access, so we use read/write instead -+ */ -+ if (flags & KBASE_REG_GPU_WR) -+ mmu_flags |= ENTRY_ACCESS_RW; -+ else if (flags & KBASE_REG_GPU_RD) -+ mmu_flags |= ENTRY_ACCESS_RO; ++ if (new_buf_node == NULL) { ++ dev_err(kctx->kbdev->dev, "Error allocating memory for kbase_dma_buf\n"); ++ /* Dont account for it if we fail to allocate memory */ ++ unique_buf_imported = false; ++ } else { ++ struct rb_node **new = &(root->rb_node), *parent = NULL; + -+ /* nx if requested */ -+ mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0; ++ new_buf_node->dma_buf = dma_buf; ++ new_buf_node->import_count = 1; ++ while (*new) { ++ struct kbase_dma_buf *new_node; + -+ if (flags & KBASE_REG_SHARE_BOTH) { -+ /* inner and outer shareable */ -+ mmu_flags |= SHARE_BOTH_BITS; -+ } else if (flags & KBASE_REG_SHARE_IN) { -+ /* inner shareable coherency */ -+ mmu_flags |= SHARE_INNER_BITS; ++ parent = *new; ++ new_node = rb_entry(parent, struct kbase_dma_buf, ++ dma_buf_node); ++ if (dma_buf < new_node->dma_buf) ++ new = &(*new)->rb_left; ++ else ++ new = &(*new)->rb_right; ++ } ++ rb_link_node(&new_buf_node->dma_buf_node, parent, new); ++ rb_insert_color(&new_buf_node->dma_buf_node, root); ++ } ++ } else if (!WARN_ON(!buf_node)) { ++ buf_node->import_count++; + } + -+ return mmu_flags; ++ return unique_buf_imported; +} + -+static void entry_set_ate(u64 *entry, struct tagged_addr phy, unsigned long flags, int const level) ++void kbase_remove_dma_buf_usage(struct kbase_context *kctx, ++ struct kbase_mem_phy_alloc *alloc) +{ -+ if (level == MIDGARD_MMU_BOTTOMLEVEL) -+ page_table_entry_set(entry, as_phys_addr_t(phy) | get_mmu_flags(flags) | -+ ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L3); -+ else -+ page_table_entry_set(entry, as_phys_addr_t(phy) | get_mmu_flags(flags) | -+ ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02); -+} ++ struct kbase_device *kbdev = kctx->kbdev; ++ bool dev_mapping_removed, prcs_mapping_removed; + -+static unsigned int get_num_valid_entries(u64 *pgd) -+{ -+ register unsigned int num_of_valid_entries; ++ mutex_lock(&kbdev->dma_buf_lock); + -+ num_of_valid_entries = (unsigned int)((pgd[2] & VALID_ENTRY_MASK) >> -+ (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR - 8)); -+ num_of_valid_entries |= (unsigned int)((pgd[1] & VALID_ENTRY_MASK) >> -+ (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR - 4)); -+ num_of_valid_entries |= (unsigned int)((pgd[0] & VALID_ENTRY_MASK) >> -+ (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR)); ++ dev_mapping_removed = kbase_delete_dma_buf_mapping( ++ kctx, alloc->imported.umm.dma_buf, &kbdev->dma_buf_root); + -+ return num_of_valid_entries; -+} ++ prcs_mapping_removed = kbase_delete_dma_buf_mapping( ++ kctx, alloc->imported.umm.dma_buf, &kctx->kprcs->dma_buf_root); + -+static void set_num_valid_entries(u64 *pgd, unsigned int num_of_valid_entries) -+{ -+ WARN_ON_ONCE(num_of_valid_entries > KBASE_MMU_PAGE_ENTRIES); ++ WARN_ON(dev_mapping_removed && !prcs_mapping_removed); + -+ pgd[0] &= ~VALID_ENTRY_MASK; -+ pgd[0] |= ((u64)(num_of_valid_entries & 0xF) << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR); ++ spin_lock(&kbdev->gpu_mem_usage_lock); ++ if (dev_mapping_removed) ++ kbdev->total_gpu_pages -= alloc->nents; + -+ pgd[1] &= ~VALID_ENTRY_MASK; -+ pgd[1] |= ((u64)((num_of_valid_entries >> 4) & 0xF) -+ << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR); ++ if (prcs_mapping_removed) ++ kctx->kprcs->total_gpu_pages -= alloc->nents; + -+ pgd[2] &= ~VALID_ENTRY_MASK; -+ pgd[2] |= ((u64)((num_of_valid_entries >> 8) & 0xF) -+ << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR); -+} ++ if (dev_mapping_removed || prcs_mapping_removed) ++ kbase_trace_gpu_mem_usage(kbdev, kctx); ++ spin_unlock(&kbdev->gpu_mem_usage_lock); + -+static void entry_set_pte(u64 *entry, phys_addr_t phy) -+{ -+ page_table_entry_set(entry, (phy & PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_PTE); ++ mutex_unlock(&kbdev->dma_buf_lock); +} + -+static void entries_invalidate(u64 *entry, u32 count) ++void kbase_add_dma_buf_usage(struct kbase_context *kctx, ++ struct kbase_mem_phy_alloc *alloc) +{ -+ u32 i; ++ struct kbase_device *kbdev = kctx->kbdev; ++ bool unique_dev_dmabuf, unique_prcs_dmabuf; + -+ for (i = 0; i < count; i++) -+ page_table_entry_set(entry + i, ENTRY_IS_INVAL); -+} ++ mutex_lock(&kbdev->dma_buf_lock); + -+static const struct kbase_mmu_mode aarch64_mode = { .update = mmu_update, -+ .get_as_setup = kbase_mmu_get_as_setup, -+ .disable_as = mmu_disable_as, -+ .pte_to_phy_addr = pte_to_phy_addr, -+ .ate_is_valid = ate_is_valid, -+ .pte_is_valid = pte_is_valid, -+ .entry_set_ate = entry_set_ate, -+ .entry_set_pte = entry_set_pte, -+ .entries_invalidate = entries_invalidate, -+ .get_num_valid_entries = get_num_valid_entries, -+ .set_num_valid_entries = set_num_valid_entries, -+ .flags = KBASE_MMU_MODE_HAS_NON_CACHEABLE }; ++ /* add dma_buf to device and process. */ ++ unique_dev_dmabuf = kbase_capture_dma_buf_mapping( ++ kctx, alloc->imported.umm.dma_buf, &kbdev->dma_buf_root); + -+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void) -+{ -+ return &aarch64_mode; -+} -diff --git a/drivers/gpu/arm/bifrost/platform/Kconfig b/drivers/gpu/arm/bifrost/platform/Kconfig -new file mode 100644 -index 000000000..3e1bd235b ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/Kconfig -@@ -0,0 +1,26 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2012-2013, 2017, 2021 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++ unique_prcs_dmabuf = kbase_capture_dma_buf_mapping( ++ kctx, alloc->imported.umm.dma_buf, &kctx->kprcs->dma_buf_root); + -+# Add your platform specific Kconfig file here -+# -+# "drivers/gpu/arm/bifrost/platform/xxx/Kconfig" -+# -+# Where xxx is the platform name is the name set in MALI_PLATFORM_NAME -+# -diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/Kbuild b/drivers/gpu/arm/bifrost/platform/devicetree/Kbuild -new file mode 100755 -index 000000000..60a52d80f ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/devicetree/Kbuild -@@ -0,0 +1,24 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2012-2017, 2020-2021 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++ WARN_ON(unique_dev_dmabuf && !unique_prcs_dmabuf); + -+bifrost_kbase-y += \ -+ platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \ -+ platform/$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \ -+ platform/$(MALI_PLATFORM_DIR)/mali_kbase_clk_rate_trace.o -diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_clk_rate_trace.c b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_clk_rate_trace.c ++ spin_lock(&kbdev->gpu_mem_usage_lock); ++ if (unique_dev_dmabuf) ++ kbdev->total_gpu_pages += alloc->nents; ++ ++ if (unique_prcs_dmabuf) ++ kctx->kprcs->total_gpu_pages += alloc->nents; ++ ++ if (unique_prcs_dmabuf || unique_dev_dmabuf) ++ kbase_trace_gpu_mem_usage(kbdev, kctx); ++ spin_unlock(&kbdev->gpu_mem_usage_lock); ++ ++ mutex_unlock(&kbdev->dma_buf_lock); ++} +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.h b/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.h new file mode 100644 -index 000000000..4bcd5854d +index 000000000..fd871fcb3 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_clk_rate_trace.c -@@ -0,0 +1,105 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.h +@@ -0,0 +1,100 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2015, 2017-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -258987,159 +259382,95 @@ index 000000000..4bcd5854d + * + */ + -+#include -+#include -+#include -+#include "mali_kbase_config_platform.h" ++#ifndef _KBASE_TRACE_GPU_MEM_H_ ++#define _KBASE_TRACE_GPU_MEM_H_ + -+#if MALI_USE_CSF -+#include ++#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) ++#include +#endif + -+static void *enumerate_gpu_clk(struct kbase_device *kbdev, -+ unsigned int index) -+{ -+ if (index >= kbdev->nr_clocks) -+ return NULL; ++#define DEVICE_TGID ((u32) 0U) + -+#if MALI_USE_CSF -+ if (of_machine_is_compatible("arm,juno")) -+ WARN_ON(kbdev->nr_clocks != 1); -+#endif ++static void kbase_trace_gpu_mem_usage(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ ++#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) ++ lockdep_assert_held(&kbdev->gpu_mem_usage_lock); + -+ return kbdev->clocks[index]; -+} ++ trace_gpu_mem_total(kbdev->id, DEVICE_TGID, ++ kbdev->total_gpu_pages << PAGE_SHIFT); + -+static unsigned long get_gpu_clk_rate(struct kbase_device *kbdev, -+ void *gpu_clk_handle) -+{ -+#if MALI_USE_CSF -+ /* On Juno fpga platforms, the GPU clock rate is reported as 600 MHZ at -+ * the boot time. Then after the first call to kbase_devfreq_target() -+ * the clock rate is reported as 450 MHZ and the frequency does not -+ * change after that. But the actual frequency at which GPU operates -+ * is always 50 MHz, which is equal to the frequency of system counter -+ * and HW counters also increment at the same rate. -+ * DVFS, which is a client of kbase_ipa_control, needs normalization of -+ * GPU_ACTIVE counter to calculate the time for which GPU has been busy. -+ * So for the correct normalization need to return the system counter -+ * frequency value. -+ * This is a reasonable workaround as the frequency value remains same -+ * throughout. It can be removed after GPUCORE-25693. -+ */ -+ if (of_machine_is_compatible("arm,juno")) -+ return arch_timer_get_cntfrq(); ++ if (likely(kctx)) ++ trace_gpu_mem_total(kbdev->id, kctx->kprcs->tgid, ++ kctx->kprcs->total_gpu_pages << PAGE_SHIFT); +#endif -+ -+ return clk_get_rate((struct clk *)gpu_clk_handle); +} + -+static int gpu_clk_notifier_register(struct kbase_device *kbdev, -+ void *gpu_clk_handle, struct notifier_block *nb) ++static inline void kbase_trace_gpu_mem_usage_dec(struct kbase_device *kbdev, ++ struct kbase_context *kctx, size_t pages) +{ -+ compiletime_assert(offsetof(struct clk_notifier_data, clk) == -+ offsetof(struct kbase_gpu_clk_notifier_data, gpu_clk_handle), -+ "mismatch in the offset of clk member"); ++ spin_lock(&kbdev->gpu_mem_usage_lock); + -+ compiletime_assert(sizeof(((struct clk_notifier_data *)0)->clk) == -+ sizeof(((struct kbase_gpu_clk_notifier_data *)0)->gpu_clk_handle), -+ "mismatch in the size of clk member"); ++ if (likely(kctx)) ++ kctx->kprcs->total_gpu_pages -= pages; + -+#if MALI_USE_CSF -+ /* Frequency is fixed on Juno platforms */ -+ if (of_machine_is_compatible("arm,juno")) -+ return 0; -+#endif ++ kbdev->total_gpu_pages -= pages; + -+ return clk_notifier_register((struct clk *)gpu_clk_handle, nb); ++ kbase_trace_gpu_mem_usage(kbdev, kctx); ++ ++ spin_unlock(&kbdev->gpu_mem_usage_lock); +} + -+static void gpu_clk_notifier_unregister(struct kbase_device *kbdev, -+ void *gpu_clk_handle, struct notifier_block *nb) ++static inline void kbase_trace_gpu_mem_usage_inc(struct kbase_device *kbdev, ++ struct kbase_context *kctx, size_t pages) +{ -+#if MALI_USE_CSF -+ if (of_machine_is_compatible("arm,juno")) -+ return; -+#endif ++ spin_lock(&kbdev->gpu_mem_usage_lock); + -+ clk_notifier_unregister((struct clk *)gpu_clk_handle, nb); ++ if (likely(kctx)) ++ kctx->kprcs->total_gpu_pages += pages; ++ ++ kbdev->total_gpu_pages += pages; ++ ++ kbase_trace_gpu_mem_usage(kbdev, kctx); ++ ++ spin_unlock(&kbdev->gpu_mem_usage_lock); +} + -+struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops = { -+ .get_gpu_clk_rate = get_gpu_clk_rate, -+ .enumerate_gpu_clk = enumerate_gpu_clk, -+ .gpu_clk_notifier_register = gpu_clk_notifier_register, -+ .gpu_clk_notifier_unregister = gpu_clk_notifier_unregister, -+}; -diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_devicetree.c b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_devicetree.c -new file mode 100644 -index 000000000..a0b4a434e ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_devicetree.c -@@ -0,0 +1,53 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2015, 2017, 2020-2021 ARM Limited. All rights reserved. ++/** ++ * kbase_remove_dma_buf_usage - Remove a dma-buf entry captured. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * @kctx: Pointer to the kbase context ++ * @alloc: Pointer to the alloc to unmap + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * Remove reference to dma buf been unmapped from kbase_device level ++ * rb_tree and Kbase_process level dma buf rb_tree. ++ */ ++void kbase_remove_dma_buf_usage(struct kbase_context *kctx, ++ struct kbase_mem_phy_alloc *alloc); ++ ++/** ++ * kbase_add_dma_buf_usage - Add a dma-buf entry captured. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * @kctx: Pointer to the kbase context ++ * @alloc: Pointer to the alloc to map in + * ++ * Add reference to dma buf been mapped to kbase_device level ++ * rb_tree and Kbase_process level dma buf rb_tree. + */ ++void kbase_add_dma_buf_usage(struct kbase_context *kctx, ++ struct kbase_mem_phy_alloc *alloc); + -+#include -+#include -+#include -+ -+static struct kbase_platform_config dummy_platform_config; -+ -+struct kbase_platform_config *kbase_get_platform_config(void) -+{ -+ return &dummy_platform_config; -+} -+ -+#ifndef CONFIG_OF -+int kbase_platform_register(void) -+{ -+ return 0; -+} -+ -+void kbase_platform_unregister(void) -+{ -+} -+#endif -+ -+#ifdef CONFIG_MALI_BIFROST_DVFS -+#if MALI_USE_CSF -+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) -+#else -+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) -+#endif -+{ -+ return 1; -+} -+#endif /* CONFIG_MALI_BIFROST_DVFS */ -diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_platform.h b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_platform.h ++#endif /* _KBASE_TRACE_GPU_MEM_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_utility.h b/drivers/gpu/arm/bifrost/mali_kbase_utility.h new file mode 100644 -index 000000000..584a7217d +index 000000000..bd66f7167 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_platform.h -@@ -0,0 +1,47 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_utility.h +@@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2014-2017, 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2013, 2015, 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -259157,42 +259488,36 @@ index 000000000..584a7217d + * + */ + -+/** -+ * POWER_MANAGEMENT_CALLBACKS - Power management configuration -+ * -+ * Attached value: pointer to @ref kbase_pm_callback_conf -+ * Default value: See @ref kbase_pm_callback_conf -+ */ -+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) ++#ifndef _KBASE_UTILITY_H ++#define _KBASE_UTILITY_H + -+/** -+ * PLATFORM_FUNCS - Platform specific configuration functions -+ * -+ * Attached value: pointer to @ref kbase_platform_funcs_conf -+ * Default value: See @ref kbase_platform_funcs_conf -+ */ -+#define PLATFORM_FUNCS (NULL) ++#ifndef _KBASE_H_ ++#error "Don't include this file directly, use mali_kbase.h instead" ++#endif + -+#define CLK_RATE_TRACE_OPS (&clk_rate_trace_ops) ++#ifndef WRITE_ONCE ++ #ifdef ASSIGN_ONCE ++ #define WRITE_ONCE(x, val) ASSIGN_ONCE(val, x) ++ #else ++ #define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val)) ++ #endif ++#endif + -+extern struct kbase_pm_callback_conf pm_callbacks; -+extern struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops; -+/** -+ * AUTO_SUSPEND_DELAY - Autosuspend delay -+ * -+ * The delay time (in milliseconds) to be used for autosuspend -+ */ -+#define AUTO_SUSPEND_DELAY (100) -diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c ++#ifndef READ_ONCE ++ #define READ_ONCE(x) ACCESS_ONCE(x) ++#endif ++ ++#endif /* _KBASE_UTILITY_H */ +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c new file mode 100644 -index 000000000..2687bee96 +index 000000000..d770913e9 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c -@@ -0,0 +1,298 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c +@@ -0,0 +1,1132 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -259210,1855 +259535,1127 @@ index 000000000..2687bee96 + * + */ + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "mali_kbase_config_platform.h" ++#include "mali_kbase_vinstr.h" ++#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" ++#include "hwcnt/mali_kbase_hwcnt_types.h" ++#include ++#include "hwcnt/mali_kbase_hwcnt_gpu.h" ++#include "hwcnt/mali_kbase_hwcnt_gpu_narrow.h" ++#include ++#include "mali_malisw.h" ++#include "mali_kbase_debug.h" + -+static void enable_gpu_power_control(struct kbase_device *kbdev) -+{ -+ unsigned int i; ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+#if defined(CONFIG_REGULATOR) -+ for (i = 0; i < kbdev->nr_regulators; i++) { -+ if (WARN_ON(kbdev->regulators[i] == NULL)) -+ ; -+ else if (!regulator_is_enabled(kbdev->regulators[i])) -+ WARN_ON(regulator_enable(kbdev->regulators[i])); -+ } ++/* Explicitly include epoll header for old kernels. Not required from 4.16. */ ++#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE ++#include +#endif + -+ for (i = 0; i < kbdev->nr_clocks; i++) { -+ if (WARN_ON(kbdev->clocks[i] == NULL)) -+ ; -+ else if (!__clk_is_enabled(kbdev->clocks[i])) -+ WARN_ON(clk_prepare_enable(kbdev->clocks[i])); -+ } -+} ++/* Hwcnt reader API version */ ++#define HWCNT_READER_API 1 + -+static void disable_gpu_power_control(struct kbase_device *kbdev) -+{ -+ unsigned int i; ++/* The minimum allowed interval between dumps (equivalent to 10KHz) */ ++#define DUMP_INTERVAL_MIN_NS (100 * NSEC_PER_USEC) + -+ for (i = 0; i < kbdev->nr_clocks; i++) { -+ if (WARN_ON(kbdev->clocks[i] == NULL)) -+ ; -+ else if (__clk_is_enabled(kbdev->clocks[i])) { -+ clk_disable_unprepare(kbdev->clocks[i]); -+ WARN_ON(__clk_is_enabled(kbdev->clocks[i])); -+ } ++/* The maximum allowed buffers per client */ ++#define MAX_BUFFER_COUNT 32 + -+ } ++/** ++ * struct kbase_vinstr_context - IOCTL interface for userspace hardware ++ * counters. ++ * @hvirt: Hardware counter virtualizer used by vinstr. ++ * @metadata: Hardware counter metadata provided by virtualizer. ++ * @metadata_user: API compatible hardware counter metadata provided by vinstr. ++ * For compatibility with the user driver interface, this ++ * contains a narrowed version of the HWCNT metadata limited ++ * to 64 entries per block of 32 bits each. ++ * @lock: Lock protecting all vinstr state. ++ * @suspend_count: Suspend reference count. If non-zero, timer and worker are ++ * prevented from being re-scheduled. ++ * @client_count: Number of vinstr clients. ++ * @clients: List of vinstr clients. ++ * @dump_timer: Timer that enqueues dump_work to a workqueue. ++ * @dump_work: Worker for performing periodic counter dumps. ++ */ ++struct kbase_vinstr_context { ++ struct kbase_hwcnt_virtualizer *hvirt; ++ const struct kbase_hwcnt_metadata *metadata; ++ const struct kbase_hwcnt_metadata_narrow *metadata_user; ++ struct mutex lock; ++ size_t suspend_count; ++ size_t client_count; ++ struct list_head clients; ++ struct hrtimer dump_timer; ++ struct work_struct dump_work; ++}; + -+#if defined(CONFIG_REGULATOR) -+ for (i = 0; i < kbdev->nr_regulators; i++) { -+ if (WARN_ON(kbdev->regulators[i] == NULL)) -+ ; -+ else if (regulator_is_enabled(kbdev->regulators[i])) -+ WARN_ON(regulator_disable(kbdev->regulators[i])); -+ } -+#endif ++/** ++ * struct kbase_vinstr_client - A vinstr client attached to a vinstr context. ++ * @vctx: Vinstr context client is attached to. ++ * @hvcli: Hardware counter virtualizer client. ++ * @node: Node used to attach this client to list in vinstr ++ * context. ++ * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic ++ * client. ++ * @next_dump_time_ns: Time in ns when this client's next periodic dump must ++ * occur. If 0, not a periodic client. ++ * @enable_map: Counters enable map. ++ * @tmp_buf: Temporary buffer to use before handing dump to client. ++ * @dump_bufs: Array of narrow dump buffers allocated by this client. ++ * @dump_bufs_meta: Metadata of hwcnt reader client buffers. ++ * @meta_idx: Index of metadata being accessed by userspace. ++ * @read_idx: Index of buffer read by userspace. ++ * @write_idx: Index of buffer being written by dump worker. ++ * @waitq: Client's notification queue. ++ */ ++struct kbase_vinstr_client { ++ struct kbase_vinstr_context *vctx; ++ struct kbase_hwcnt_virtualizer_client *hvcli; ++ struct list_head node; ++ u64 next_dump_time_ns; ++ u32 dump_interval_ns; ++ struct kbase_hwcnt_enable_map enable_map; ++ struct kbase_hwcnt_dump_buffer tmp_buf; ++ struct kbase_hwcnt_dump_buffer_narrow_array dump_bufs; ++ struct kbase_hwcnt_reader_metadata *dump_bufs_meta; ++ atomic_t meta_idx; ++ atomic_t read_idx; ++ atomic_t write_idx; ++ wait_queue_head_t waitq; ++}; + -+} ++static __poll_t kbasep_vinstr_hwcnt_reader_poll(struct file *filp, poll_table *wait); + -+static int pm_callback_power_on(struct kbase_device *kbdev) -+{ -+ int ret = 1; /* Assume GPU has been powered off */ -+ int error; -+ unsigned long flags; ++static long kbasep_vinstr_hwcnt_reader_ioctl( ++ struct file *filp, ++ unsigned int cmd, ++ unsigned long arg); + -+ dev_dbg(kbdev->dev, "%s %pK\n", __func__, (void *)kbdev->dev->pm_domain); ++static int kbasep_vinstr_hwcnt_reader_mmap( ++ struct file *filp, ++ struct vm_area_struct *vma); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ WARN_ON(kbdev->pm.backend.gpu_powered); -+#if MALI_USE_CSF -+ if (likely(kbdev->csf.firmware_inited)) { -+ WARN_ON(!kbdev->pm.active_count); -+ WARN_ON(kbdev->pm.runtime_active); -+ } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++static int kbasep_vinstr_hwcnt_reader_release( ++ struct inode *inode, ++ struct file *filp); + -+ enable_gpu_power_control(kbdev); -+ CSTD_UNUSED(error); -+#else -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++/* Vinstr client file operations */ ++static const struct file_operations vinstr_client_fops = { ++ .owner = THIS_MODULE, ++ .poll = kbasep_vinstr_hwcnt_reader_poll, ++ .unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, ++ .compat_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, ++ .mmap = kbasep_vinstr_hwcnt_reader_mmap, ++ .release = kbasep_vinstr_hwcnt_reader_release, ++}; + -+#ifdef KBASE_PM_RUNTIME -+ error = pm_runtime_get_sync(kbdev->dev); -+ if (error == 1) { -+ /* -+ * Let core know that the chip has not been -+ * powered off, so we can save on re-initialization. -+ */ -+ ret = 0; -+ } -+ dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error); -+#else -+ enable_gpu_power_control(kbdev); -+#endif /* KBASE_PM_RUNTIME */ ++/** ++ * kbasep_vinstr_timestamp_ns() - Get the current time in nanoseconds. ++ * ++ * Return: Current time in nanoseconds. ++ */ ++static u64 kbasep_vinstr_timestamp_ns(void) ++{ ++ return ktime_get_raw_ns(); ++} + -+#endif /* MALI_USE_CSF */ ++/** ++ * kbasep_vinstr_next_dump_time_ns() - Calculate the next periodic dump time. ++ * @cur_ts_ns: Current time in nanoseconds. ++ * @interval: Interval between dumps in nanoseconds. ++ * ++ * Return: 0 if interval is 0 (i.e. a non-periodic client), or the next dump ++ * time that occurs after cur_ts_ns. ++ */ ++static u64 kbasep_vinstr_next_dump_time_ns(u64 cur_ts_ns, u32 interval) ++{ ++ /* Non-periodic client */ ++ if (interval == 0) ++ return 0; + -+ return ret; ++ /* ++ * Return the next interval after the current time relative to t=0. ++ * This means multiple clients with the same period will synchronise, ++ * regardless of when they were started, allowing the worker to be ++ * scheduled less frequently. ++ */ ++ do_div(cur_ts_ns, interval); ++ return (cur_ts_ns + 1) * interval; +} + -+static void pm_callback_power_off(struct kbase_device *kbdev) ++/** ++ * kbasep_vinstr_client_dump() - Perform a dump for a client. ++ * @vcli: Non-NULL pointer to a vinstr client. ++ * @event_id: Event type that triggered the dump. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static int kbasep_vinstr_client_dump( ++ struct kbase_vinstr_client *vcli, ++ enum base_hwcnt_reader_event event_id) +{ -+ unsigned long flags; ++ int errcode; ++ u64 ts_start_ns; ++ u64 ts_end_ns; ++ unsigned int write_idx; ++ unsigned int read_idx; ++ struct kbase_hwcnt_dump_buffer *tmp_buf; ++ struct kbase_hwcnt_dump_buffer_narrow *dump_buf; ++ struct kbase_hwcnt_reader_metadata *meta; ++ u8 clk_cnt; + -+ dev_dbg(kbdev->dev, "%s\n", __func__); ++ WARN_ON(!vcli); ++ lockdep_assert_held(&vcli->vctx->lock); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ WARN_ON(kbdev->pm.backend.gpu_powered); -+#if MALI_USE_CSF -+ if (likely(kbdev->csf.firmware_inited)) { -+#ifdef CONFIG_MALI_BIFROST_DEBUG -+ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev)); -+#endif -+ WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_OFF); -+ } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ write_idx = atomic_read(&vcli->write_idx); ++ read_idx = atomic_read(&vcli->read_idx); + -+ /* Power down the GPU immediately */ -+ disable_gpu_power_control(kbdev); -+#else /* MALI_USE_CSF */ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* Check if there is a place to copy HWC block into. */ ++ if (write_idx - read_idx == vcli->dump_bufs.buf_cnt) ++ return -EBUSY; ++ write_idx %= vcli->dump_bufs.buf_cnt; + -+#ifdef KBASE_PM_RUNTIME -+ pm_runtime_mark_last_busy(kbdev->dev); -+ pm_runtime_put_autosuspend(kbdev->dev); -+#else -+ /* Power down the GPU immediately as runtime PM is disabled */ -+ disable_gpu_power_control(kbdev); -+#endif -+#endif /* MALI_USE_CSF */ -+} ++ dump_buf = &vcli->dump_bufs.bufs[write_idx]; ++ meta = &vcli->dump_bufs_meta[write_idx]; ++ tmp_buf = &vcli->tmp_buf; + -+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) -+static void pm_callback_runtime_gpu_active(struct kbase_device *kbdev) -+{ -+ unsigned long flags; -+ int error; ++ errcode = kbase_hwcnt_virtualizer_client_dump( ++ vcli->hvcli, &ts_start_ns, &ts_end_ns, tmp_buf); ++ if (errcode) ++ return errcode; + -+ lockdep_assert_held(&kbdev->pm.lock); ++ /* Patch the dump buf headers, to hide the counters that other hwcnt ++ * clients are using. ++ */ ++ kbase_hwcnt_gpu_patch_dump_headers(tmp_buf, &vcli->enable_map); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ WARN_ON(!kbdev->pm.backend.gpu_powered); -+ WARN_ON(!kbdev->pm.active_count); -+ WARN_ON(kbdev->pm.runtime_active); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* Copy the temp buffer to the userspace visible buffer. The strict ++ * variant will explicitly zero any non-enabled counters to ensure ++ * nothing except exactly what the user asked for is made visible. ++ * ++ * A narrow copy is required since virtualizer has a bigger buffer ++ * but user only needs part of it. ++ */ ++ kbase_hwcnt_dump_buffer_copy_strict_narrow(dump_buf, tmp_buf, ++ &vcli->enable_map); + -+ if (pm_runtime_status_suspended(kbdev->dev)) { -+ error = pm_runtime_get_sync(kbdev->dev); -+ dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d", error); -+ } else { -+ /* Call the async version here, otherwise there could be -+ * a deadlock if the runtime suspend operation is ongoing. -+ * Caller would have taken the kbdev->pm.lock and/or the -+ * scheduler lock, and the runtime suspend callback function -+ * will also try to acquire the same lock(s). -+ */ -+ error = pm_runtime_get(kbdev->dev); -+ dev_dbg(kbdev->dev, "pm_runtime_get returned %d", error); -+ } ++ clk_cnt = vcli->vctx->metadata->clk_cnt; + -+ kbdev->pm.runtime_active = true; ++ meta->timestamp = ts_end_ns; ++ meta->event_id = event_id; ++ meta->buffer_idx = write_idx; ++ meta->cycles.top = (clk_cnt > 0) ? dump_buf->clk_cnt_buf[0] : 0; ++ meta->cycles.shader_cores = ++ (clk_cnt > 1) ? dump_buf->clk_cnt_buf[1] : 0; ++ ++ /* Notify client. Make sure all changes to memory are visible. */ ++ wmb(); ++ atomic_inc(&vcli->write_idx); ++ wake_up_interruptible(&vcli->waitq); ++ return 0; +} + -+static void pm_callback_runtime_gpu_idle(struct kbase_device *kbdev) ++/** ++ * kbasep_vinstr_client_clear() - Reset all the client's counters to zero. ++ * @vcli: Non-NULL pointer to a vinstr client. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static int kbasep_vinstr_client_clear(struct kbase_vinstr_client *vcli) +{ -+ unsigned long flags; -+ -+ lockdep_assert_held(&kbdev->pm.lock); -+ -+ dev_dbg(kbdev->dev, "%s", __func__); ++ u64 ts_start_ns; ++ u64 ts_end_ns; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ WARN_ON(!kbdev->pm.backend.gpu_powered); -+ WARN_ON(kbdev->pm.backend.l2_state != KBASE_L2_OFF); -+ WARN_ON(kbdev->pm.active_count); -+ WARN_ON(!kbdev->pm.runtime_active); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ WARN_ON(!vcli); ++ lockdep_assert_held(&vcli->vctx->lock); + -+ pm_runtime_mark_last_busy(kbdev->dev); -+ pm_runtime_put_autosuspend(kbdev->dev); -+ kbdev->pm.runtime_active = false; ++ /* A virtualizer dump with a NULL buffer will just clear the virtualizer ++ * client's buffer. ++ */ ++ return kbase_hwcnt_virtualizer_client_dump( ++ vcli->hvcli, &ts_start_ns, &ts_end_ns, NULL); +} -+#endif + -+#ifdef KBASE_PM_RUNTIME -+static int kbase_device_runtime_init(struct kbase_device *kbdev) ++/** ++ * kbasep_vinstr_reschedule_worker() - Update next dump times for all periodic ++ * vinstr clients, then reschedule the dump ++ * worker appropriately. ++ * @vctx: Non-NULL pointer to the vinstr context. ++ * ++ * If there are no periodic clients, then the dump worker will not be ++ * rescheduled. Else, the dump worker will be rescheduled for the next periodic ++ * client dump. ++ */ ++static void kbasep_vinstr_reschedule_worker(struct kbase_vinstr_context *vctx) +{ -+ int ret = 0; ++ u64 cur_ts_ns; ++ u64 earliest_next_ns = U64_MAX; ++ struct kbase_vinstr_client *pos; + -+ dev_dbg(kbdev->dev, "%s\n", __func__); ++ WARN_ON(!vctx); ++ lockdep_assert_held(&vctx->lock); + -+ pm_runtime_set_autosuspend_delay(kbdev->dev, AUTO_SUSPEND_DELAY); -+ pm_runtime_use_autosuspend(kbdev->dev); ++ cur_ts_ns = kbasep_vinstr_timestamp_ns(); + -+ pm_runtime_set_active(kbdev->dev); -+ pm_runtime_enable(kbdev->dev); ++ /* ++ * Update each client's next dump time, and find the earliest next ++ * dump time if any of the clients have a non-zero interval. ++ */ ++ list_for_each_entry(pos, &vctx->clients, node) { ++ const u64 cli_next_ns = ++ kbasep_vinstr_next_dump_time_ns( ++ cur_ts_ns, pos->dump_interval_ns); + -+ if (!pm_runtime_enabled(kbdev->dev)) { -+ dev_warn(kbdev->dev, "pm_runtime not enabled"); -+ ret = -EINVAL; -+ } else if (atomic_read(&kbdev->dev->power.usage_count)) { -+ dev_warn(kbdev->dev, -+ "%s: Device runtime usage count unexpectedly non zero %d", -+ __func__, atomic_read(&kbdev->dev->power.usage_count)); -+ ret = -EINVAL; -+ } ++ /* Non-zero next dump time implies a periodic client */ ++ if ((cli_next_ns != 0) && (cli_next_ns < earliest_next_ns)) ++ earliest_next_ns = cli_next_ns; + -+ return ret; -+} ++ pos->next_dump_time_ns = cli_next_ns; ++ } + -+static void kbase_device_runtime_disable(struct kbase_device *kbdev) -+{ -+ dev_dbg(kbdev->dev, "%s\n", __func__); ++ /* Cancel the timer if it is already pending */ ++ hrtimer_cancel(&vctx->dump_timer); + -+ if (atomic_read(&kbdev->dev->power.usage_count)) -+ dev_warn(kbdev->dev, -+ "%s: Device runtime usage count unexpectedly non zero %d", -+ __func__, atomic_read(&kbdev->dev->power.usage_count)); -+ -+ pm_runtime_disable(kbdev->dev); ++ /* Start the timer if there are periodic clients and vinstr is not ++ * suspended. ++ */ ++ if ((earliest_next_ns != U64_MAX) && ++ (vctx->suspend_count == 0) && ++ !WARN_ON(earliest_next_ns < cur_ts_ns)) ++ hrtimer_start( ++ &vctx->dump_timer, ++ ns_to_ktime(earliest_next_ns - cur_ts_ns), ++ HRTIMER_MODE_REL); +} -+#endif /* KBASE_PM_RUNTIME */ + -+static int pm_callback_runtime_on(struct kbase_device *kbdev) ++/** ++ * kbasep_vinstr_dump_worker()- Dump worker, that dumps all periodic clients ++ * that need to be dumped, then reschedules itself. ++ * @work: Work structure. ++ */ ++static void kbasep_vinstr_dump_worker(struct work_struct *work) +{ -+ dev_dbg(kbdev->dev, "%s\n", __func__); -+ -+#if !MALI_USE_CSF -+ enable_gpu_power_control(kbdev); -+#endif -+ return 0; -+} ++ struct kbase_vinstr_context *vctx = ++ container_of(work, struct kbase_vinstr_context, dump_work); ++ struct kbase_vinstr_client *pos; ++ u64 cur_time_ns; + -+static void pm_callback_runtime_off(struct kbase_device *kbdev) -+{ -+ dev_dbg(kbdev->dev, "%s\n", __func__); ++ mutex_lock(&vctx->lock); + -+#if !MALI_USE_CSF -+ disable_gpu_power_control(kbdev); -+#endif -+} ++ cur_time_ns = kbasep_vinstr_timestamp_ns(); + -+static void pm_callback_resume(struct kbase_device *kbdev) -+{ -+ int ret = pm_callback_runtime_on(kbdev); ++ /* Dump all periodic clients whose next dump time is before the current ++ * time. ++ */ ++ list_for_each_entry(pos, &vctx->clients, node) { ++ if ((pos->next_dump_time_ns != 0) && ++ (pos->next_dump_time_ns < cur_time_ns)) ++ kbasep_vinstr_client_dump( ++ pos, BASE_HWCNT_READER_EVENT_PERIODIC); ++ } + -+ WARN_ON(ret); -+} ++ /* Update the next dump times of all periodic clients, then reschedule ++ * this worker at the earliest next dump time. ++ */ ++ kbasep_vinstr_reschedule_worker(vctx); + -+static void pm_callback_suspend(struct kbase_device *kbdev) -+{ -+ pm_callback_runtime_off(kbdev); ++ mutex_unlock(&vctx->lock); +} + -+ -+struct kbase_pm_callback_conf pm_callbacks = { -+ .power_on_callback = pm_callback_power_on, -+ .power_off_callback = pm_callback_power_off, -+ .power_suspend_callback = pm_callback_suspend, -+ .power_resume_callback = pm_callback_resume, -+#ifdef KBASE_PM_RUNTIME -+ .power_runtime_init_callback = kbase_device_runtime_init, -+ .power_runtime_term_callback = kbase_device_runtime_disable, -+ .power_runtime_on_callback = pm_callback_runtime_on, -+ .power_runtime_off_callback = pm_callback_runtime_off, -+#else /* KBASE_PM_RUNTIME */ -+ .power_runtime_init_callback = NULL, -+ .power_runtime_term_callback = NULL, -+ .power_runtime_on_callback = NULL, -+ .power_runtime_off_callback = NULL, -+#endif /* KBASE_PM_RUNTIME */ -+ -+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) -+ .power_runtime_gpu_idle_callback = pm_callback_runtime_gpu_idle, -+ .power_runtime_gpu_active_callback = pm_callback_runtime_gpu_active, -+#else -+ .power_runtime_gpu_idle_callback = NULL, -+ .power_runtime_gpu_active_callback = NULL, -+#endif -+}; -diff --git a/drivers/gpu/arm/bifrost/platform/meson/Kbuild b/drivers/gpu/arm/bifrost/platform/meson/Kbuild -new file mode 100755 -index 000000000..9b3de96ba ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/meson/Kbuild -@@ -0,0 +1,23 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2012-2017, 2019-2021 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# -+ -+bifrost_kbase-y += \ -+ platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_meson.o \ -+ platform/$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o -diff --git a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_meson.c b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_meson.c -new file mode 100644 -index 000000000..7b896b602 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_meson.c -@@ -0,0 +1,53 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2015, 2017, 2019, 2021, 2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++/** ++ * kbasep_vinstr_dump_timer() - Dump timer that schedules the dump worker for ++ * execution as soon as possible. ++ * @timer: Timer structure. + * ++ * Return: HRTIMER_NORESTART always. + */ -+ -+#include -+#include -+#include -+ -+static struct kbase_platform_config dummy_platform_config; -+ -+struct kbase_platform_config *kbase_get_platform_config(void) -+{ -+ return &dummy_platform_config; -+} -+ -+#ifndef CONFIG_OF -+int kbase_platform_register(void) -+{ -+ return 0; -+} -+ -+void kbase_platform_unregister(void) ++static enum hrtimer_restart kbasep_vinstr_dump_timer(struct hrtimer *timer) +{ -+} -+#endif ++ struct kbase_vinstr_context *vctx = ++ container_of(timer, struct kbase_vinstr_context, dump_timer); + -+#ifdef CONFIG_MALI_BIFROST_DVFS -+#if MALI_USE_CSF -+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) -+#else -+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) -+#endif -+{ -+ return 1; ++ /* We don't need to check vctx->suspend_count here, as the suspend ++ * function will ensure that any worker enqueued here is immediately ++ * cancelled, and the worker itself won't reschedule this timer if ++ * suspend_count != 0. ++ */ ++ kbase_hwcnt_virtualizer_queue_work(vctx->hvirt, &vctx->dump_work); ++ return HRTIMER_NORESTART; +} -+#endif /* CONFIG_MALI_BIFROST_DVFS */ -diff --git a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_platform.h b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_platform.h -new file mode 100644 -index 000000000..06279e2f6 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_platform.h -@@ -0,0 +1,45 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2014-2017, 2019-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+/** -+ * Power management configuration -+ * -+ * Attached value: pointer to @ref kbase_pm_callback_conf -+ * Default value: See @ref kbase_pm_callback_conf -+ */ -+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + +/** -+ * Platform specific configuration functions -+ * -+ * Attached value: pointer to @ref kbase_platform_funcs_conf -+ * Default value: See @ref kbase_platform_funcs_conf ++ * kbasep_vinstr_client_destroy() - Destroy a vinstr client. ++ * @vcli: vinstr client. Must not be attached to a vinstr context. + */ -+#define PLATFORM_FUNCS (NULL) ++static void kbasep_vinstr_client_destroy(struct kbase_vinstr_client *vcli) ++{ ++ if (!vcli) ++ return; + -+extern struct kbase_pm_callback_conf pm_callbacks; ++ kbase_hwcnt_virtualizer_client_destroy(vcli->hvcli); ++ kfree(vcli->dump_bufs_meta); ++ kbase_hwcnt_dump_buffer_narrow_array_free(&vcli->dump_bufs); ++ kbase_hwcnt_dump_buffer_free(&vcli->tmp_buf); ++ kbase_hwcnt_enable_map_free(&vcli->enable_map); ++ kfree(vcli); ++} + +/** -+ * Autosuspend delay -+ * -+ * The delay time (in milliseconds) to be used for autosuspend -+ */ -+#define AUTO_SUSPEND_DELAY (100) -diff --git a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_runtime_pm.c b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_runtime_pm.c -new file mode 100644 -index 000000000..910d4b4fd ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_runtime_pm.c -@@ -0,0 +1,265 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2015, 2017-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * kbasep_vinstr_client_create() - Create a vinstr client. Does not attach to ++ * the vinstr context. ++ * @vctx: Non-NULL pointer to vinstr context. ++ * @setup: Non-NULL pointer to hardware counter ioctl setup structure. ++ * setup->buffer_count must not be 0 and must be a power of 2. ++ * @out_vcli: Non-NULL pointer to where created client will be stored on ++ * success. + * ++ * Return: 0 on success, else error code. + */ -+ -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "mali_kbase_config_platform.h" -+ -+ -+static struct reset_control **resets; -+static int nr_resets; -+ -+static int resets_init(struct kbase_device *kbdev) ++static int kbasep_vinstr_client_create( ++ struct kbase_vinstr_context *vctx, ++ struct kbase_ioctl_hwcnt_reader_setup *setup, ++ struct kbase_vinstr_client **out_vcli) +{ -+ struct device_node *np; -+ int i; -+ int err = 0; -+ -+ np = kbdev->dev->of_node; ++ int errcode; ++ struct kbase_vinstr_client *vcli; ++ struct kbase_hwcnt_physical_enable_map phys_em; + -+ nr_resets = of_count_phandle_with_args(np, "resets", "#reset-cells"); -+ if (nr_resets <= 0) { -+ dev_err(kbdev->dev, "Failed to get GPU resets from dtb\n"); -+ return nr_resets; -+ } ++ WARN_ON(!vctx); ++ WARN_ON(!setup); ++ WARN_ON(setup->buffer_count == 0); ++ WARN_ON(!is_power_of_2(setup->buffer_count)); + -+ resets = devm_kcalloc(kbdev->dev, nr_resets, sizeof(*resets), -+ GFP_KERNEL); -+ if (!resets) ++ vcli = kzalloc(sizeof(*vcli), GFP_KERNEL); ++ if (!vcli) + return -ENOMEM; + -+ for (i = 0; i < nr_resets; ++i) { -+ resets[i] = devm_reset_control_get_exclusive_by_index( -+ kbdev->dev, i); -+ if (IS_ERR(resets[i])) { -+ err = PTR_ERR(resets[i]); -+ nr_resets = i; -+ break; -+ } -+ } ++ vcli->vctx = vctx; + -+ return err; -+} ++ errcode = kbase_hwcnt_enable_map_alloc( ++ vctx->metadata, &vcli->enable_map); ++ if (errcode) ++ goto error; + -+static int pm_callback_soft_reset(struct kbase_device *kbdev) -+{ -+ int ret, i; ++ phys_em.fe_bm = setup->fe_bm; ++ phys_em.shader_bm = setup->shader_bm; ++ phys_em.tiler_bm = setup->tiler_bm; ++ phys_em.mmu_l2_bm = setup->mmu_l2_bm; ++ kbase_hwcnt_gpu_enable_map_from_physical(&vcli->enable_map, &phys_em); + -+ if (!resets) { -+ ret = resets_init(kbdev); -+ if (ret) -+ return ret; -+ } ++ /* Use virtualizer's metadata to alloc tmp buffer which interacts with ++ * the HWC virtualizer. ++ */ ++ errcode = kbase_hwcnt_dump_buffer_alloc(vctx->metadata, &vcli->tmp_buf); ++ if (errcode) ++ goto error; + -+ for (i = 0; i < nr_resets; ++i) -+ reset_control_assert(resets[i]); ++ /* Enable all the available clk_enable_map. */ ++ vcli->enable_map.clk_enable_map = (1ull << vctx->metadata->clk_cnt) - 1; + -+ udelay(10); ++ /* Use vinstr's narrowed metadata to alloc narrow dump buffers which ++ * interact with clients. ++ */ ++ errcode = kbase_hwcnt_dump_buffer_narrow_array_alloc( ++ vctx->metadata_user, setup->buffer_count, &vcli->dump_bufs); ++ if (errcode) ++ goto error; + -+ for (i = 0; i < nr_resets; ++i) -+ reset_control_deassert(resets[i]); ++ errcode = -ENOMEM; ++ vcli->dump_bufs_meta = kmalloc_array( ++ setup->buffer_count, sizeof(*vcli->dump_bufs_meta), GFP_KERNEL); ++ if (!vcli->dump_bufs_meta) ++ goto error; + -+ udelay(10); ++ errcode = kbase_hwcnt_virtualizer_client_create( ++ vctx->hvirt, &vcli->enable_map, &vcli->hvcli); ++ if (errcode) ++ goto error; + -+ /* Override Power Management Settings, values from manufacturer's defaults */ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), -+ 0xfff | (0x20 << 16)); ++ init_waitqueue_head(&vcli->waitq); + -+ /* -+ * RESET_COMPLETED interrupt will be raised, so continue with -+ * the normal soft reset procedure -+ */ ++ *out_vcli = vcli; + return 0; ++error: ++ kbasep_vinstr_client_destroy(vcli); ++ return errcode; +} + -+static void enable_gpu_power_control(struct kbase_device *kbdev) -+{ -+ unsigned int i; -+ -+#if defined(CONFIG_REGULATOR) -+ for (i = 0; i < kbdev->nr_regulators; i++) { -+ if (WARN_ON(kbdev->regulators[i] == NULL)) -+ ; -+ else if (!regulator_is_enabled(kbdev->regulators[i])) -+ WARN_ON(regulator_enable(kbdev->regulators[i])); -+ } -+#endif -+ -+ for (i = 0; i < kbdev->nr_clocks; i++) { -+ if (WARN_ON(kbdev->clocks[i] == NULL)) -+ ; -+ else if (!__clk_is_enabled(kbdev->clocks[i])) -+ WARN_ON(clk_prepare_enable(kbdev->clocks[i])); -+ } -+} -+ -+static void disable_gpu_power_control(struct kbase_device *kbdev) ++int kbase_vinstr_init( ++ struct kbase_hwcnt_virtualizer *hvirt, ++ struct kbase_vinstr_context **out_vctx) +{ -+ unsigned int i; ++ int errcode; ++ struct kbase_vinstr_context *vctx; ++ const struct kbase_hwcnt_metadata *metadata; + -+ for (i = 0; i < kbdev->nr_clocks; i++) { -+ if (WARN_ON(kbdev->clocks[i] == NULL)) -+ ; -+ else if (__clk_is_enabled(kbdev->clocks[i])) { -+ clk_disable_unprepare(kbdev->clocks[i]); -+ WARN_ON(__clk_is_enabled(kbdev->clocks[i])); -+ } -+ } ++ if (!hvirt || !out_vctx) ++ return -EINVAL; + -+#if defined(CONFIG_REGULATOR) -+ for (i = 0; i < kbdev->nr_regulators; i++) { -+ if (WARN_ON(kbdev->regulators[i] == NULL)) -+ ; -+ else if (regulator_is_enabled(kbdev->regulators[i])) -+ WARN_ON(regulator_disable(kbdev->regulators[i])); -+ } -+#endif -+} ++ metadata = kbase_hwcnt_virtualizer_metadata(hvirt); ++ if (!metadata) ++ return -EINVAL; + -+static int pm_callback_power_on(struct kbase_device *kbdev) -+{ -+ int ret = 1; /* Assume GPU has been powered off */ -+ int error; ++ vctx = kzalloc(sizeof(*vctx), GFP_KERNEL); ++ if (!vctx) ++ return -ENOMEM; + -+ dev_dbg(kbdev->dev, "%s %pK\n", __func__, (void *)kbdev->dev->pm_domain); ++ vctx->hvirt = hvirt; ++ vctx->metadata = metadata; ++ errcode = kbase_hwcnt_gpu_metadata_narrow_create(&vctx->metadata_user, ++ metadata); ++ if (errcode) ++ goto err_metadata_create; + -+#ifdef KBASE_PM_RUNTIME -+ error = pm_runtime_get_sync(kbdev->dev); -+ if (error == 1) { -+ /* -+ * Let core know that the chip has not been -+ * powered off, so we can save on re-initialization. -+ */ -+ ret = 0; -+ } -+ dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error); -+#else -+ enable_gpu_power_control(kbdev); -+#endif ++ mutex_init(&vctx->lock); ++ INIT_LIST_HEAD(&vctx->clients); ++ hrtimer_init(&vctx->dump_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ vctx->dump_timer.function = kbasep_vinstr_dump_timer; ++ INIT_WORK(&vctx->dump_work, kbasep_vinstr_dump_worker); + -+ return ret; -+} ++ *out_vctx = vctx; ++ return 0; + -+static void pm_callback_power_off(struct kbase_device *kbdev) -+{ -+ dev_dbg(kbdev->dev, "%s\n", __func__); ++err_metadata_create: ++ kfree(vctx); + -+#ifdef KBASE_PM_RUNTIME -+ pm_runtime_mark_last_busy(kbdev->dev); -+ pm_runtime_put_autosuspend(kbdev->dev); -+#else -+ /* Power down the GPU immediately as runtime PM is disabled */ -+ disable_gpu_power_control(kbdev); -+#endif ++ return errcode; +} + -+#ifdef KBASE_PM_RUNTIME -+static int kbase_device_runtime_init(struct kbase_device *kbdev) ++void kbase_vinstr_term(struct kbase_vinstr_context *vctx) +{ -+ int ret = 0; -+ -+ dev_dbg(kbdev->dev, "%s\n", __func__); -+ -+ pm_runtime_set_autosuspend_delay(kbdev->dev, AUTO_SUSPEND_DELAY); -+ pm_runtime_use_autosuspend(kbdev->dev); ++ if (!vctx) ++ return; + -+ pm_runtime_set_active(kbdev->dev); -+ pm_runtime_enable(kbdev->dev); ++ /* Non-zero client count implies client leak */ ++ if (WARN_ON(vctx->client_count != 0)) { ++ struct kbase_vinstr_client *pos, *n; + -+ if (!pm_runtime_enabled(kbdev->dev)) { -+ dev_warn(kbdev->dev, "pm_runtime not enabled"); -+ ret = -EINVAL; -+ } else if (atomic_read(&kbdev->dev->power.usage_count)) { -+ dev_warn(kbdev->dev, "%s: Device runtime usage count unexpectedly non zero %d", -+ __func__, atomic_read(&kbdev->dev->power.usage_count)); -+ ret = -EINVAL; ++ list_for_each_entry_safe(pos, n, &vctx->clients, node) { ++ list_del(&pos->node); ++ vctx->client_count--; ++ kbasep_vinstr_client_destroy(pos); ++ } + } + -+ return ret; -+} -+ -+static void kbase_device_runtime_disable(struct kbase_device *kbdev) -+{ -+ dev_dbg(kbdev->dev, "%s\n", __func__); -+ -+ if (atomic_read(&kbdev->dev->power.usage_count)) -+ dev_warn(kbdev->dev, "%s: Device runtime usage count unexpectedly non zero %d", -+ __func__, atomic_read(&kbdev->dev->power.usage_count)); ++ cancel_work_sync(&vctx->dump_work); ++ kbase_hwcnt_gpu_metadata_narrow_destroy(vctx->metadata_user); + -+ pm_runtime_disable(kbdev->dev); ++ WARN_ON(vctx->client_count != 0); ++ kfree(vctx); +} -+#endif /* KBASE_PM_RUNTIME */ + -+static int pm_callback_runtime_on(struct kbase_device *kbdev) ++void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx) +{ -+ dev_dbg(kbdev->dev, "%s\n", __func__); -+ -+ enable_gpu_power_control(kbdev); -+ return 0; -+} ++ if (WARN_ON(!vctx)) ++ return; + -+static void pm_callback_runtime_off(struct kbase_device *kbdev) -+{ -+ dev_dbg(kbdev->dev, "%s\n", __func__); ++ mutex_lock(&vctx->lock); + -+ disable_gpu_power_control(kbdev); -+} ++ if (!WARN_ON(vctx->suspend_count == SIZE_MAX)) ++ vctx->suspend_count++; + -+static void pm_callback_resume(struct kbase_device *kbdev) -+{ -+ int ret = pm_callback_runtime_on(kbdev); ++ mutex_unlock(&vctx->lock); + -+ WARN_ON(ret); ++ /* Always sync cancel the timer and then the worker, regardless of the ++ * new suspend count. ++ * ++ * This ensures concurrent calls to kbase_vinstr_suspend() always block ++ * until vinstr is fully suspended. ++ * ++ * The timer is cancelled before the worker, as the timer ++ * unconditionally re-enqueues the worker, but the worker checks the ++ * suspend_count that we just incremented before rescheduling the timer. ++ * ++ * Therefore if we cancel the worker first, the timer might re-enqueue ++ * the worker before we cancel the timer, but the opposite is not ++ * possible. ++ */ ++ hrtimer_cancel(&vctx->dump_timer); ++ cancel_work_sync(&vctx->dump_work); +} + -+static void pm_callback_suspend(struct kbase_device *kbdev) ++void kbase_vinstr_resume(struct kbase_vinstr_context *vctx) +{ -+ pm_callback_runtime_off(kbdev); -+} -+ -+struct kbase_pm_callback_conf pm_callbacks = { -+ .power_on_callback = pm_callback_power_on, -+ .power_off_callback = pm_callback_power_off, -+ .power_suspend_callback = pm_callback_suspend, -+ .power_resume_callback = pm_callback_resume, -+ .soft_reset_callback = pm_callback_soft_reset, -+#ifdef KBASE_PM_RUNTIME -+ .power_runtime_init_callback = kbase_device_runtime_init, -+ .power_runtime_term_callback = kbase_device_runtime_disable, -+ .power_runtime_on_callback = pm_callback_runtime_on, -+ .power_runtime_off_callback = pm_callback_runtime_off, -+#else /* KBASE_PM_RUNTIME */ -+ .power_runtime_init_callback = NULL, -+ .power_runtime_term_callback = NULL, -+ .power_runtime_on_callback = NULL, -+ .power_runtime_off_callback = NULL, -+#endif /* KBASE_PM_RUNTIME */ -+}; -diff --git a/drivers/gpu/arm/bifrost/platform/rk/Kbuild b/drivers/gpu/arm/bifrost/platform/rk/Kbuild -new file mode 100755 -index 000000000..a1e78cfc4 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/rk/Kbuild -@@ -0,0 +1,16 @@ -+# -+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# -+ -+bifrost_kbase-y += \ -+ platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_rk.o -diff --git a/drivers/gpu/arm/bifrost/platform/rk/custom_log.h b/drivers/gpu/arm/bifrost/platform/rk/custom_log.h -new file mode 100644 -index 000000000..5de70ee13 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/rk/custom_log.h -@@ -0,0 +1,192 @@ -+/* -+ * (C) COPYRIGHT RockChip Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ */ -+ -+#ifndef __CUSTOM_LOG_H__ -+#define __CUSTOM_LOG_H__ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* ----------------------------------------------------------------------------- -+ * Include Files -+ * ----------------------------------------------------------------------------- -+ */ -+#include -+#include -+ -+/* ----------------------------------------------------------------------------- -+ * Macros Definition -+ * ----------------------------------------------------------------------------- -+ */ -+ -+/** 若下列 macro 有被定义, æ‰ ä½¿èƒ½ log 输出. */ -+/* #define ENABLE_DEBUG_LOG */ -+ -+/*----------------------------------------------------------------------------*/ -+ -+#ifdef ENABLE_VERBOSE_LOG -+/** Verbose log. */ -+#define V(fmt, args...) \ -+ pr_debug("V : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ -+ "\n", \ -+ __FILE__, \ -+ __LINE__, \ -+ __func__, \ -+ ## args) -+#else -+#define V(...) ((void)0) -+#endif -+ -+#ifdef ENABLE_DEBUG_LOG -+/** Debug log. */ -+#define D(fmt, args...) \ -+ pr_info("D : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ -+ "\n", \ -+ __FILE__, \ -+ __LINE__, \ -+ __func__, \ -+ ## args) -+#else -+#define D(...) ((void)0) -+#endif -+ -+#define I(fmt, args...) \ -+ pr_info("I : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ -+ "\n", \ -+ __FILE__, \ -+ __LINE__, \ -+ __func__, \ -+ ## args) ++ if (WARN_ON(!vctx)) ++ return; + -+#define W(fmt, args...) \ -+ pr_warn("W : [File] : %s; [Line] : %d; [Func] : %s(); " \ -+ fmt "\n", \ -+ __FILE__, \ -+ __LINE__, \ -+ __func__, \ -+ ## args) ++ mutex_lock(&vctx->lock); + -+#define E(fmt, args...) \ -+ pr_err("E : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ -+ "\n", \ -+ __FILE__, \ -+ __LINE__, \ -+ __func__, \ -+ ## args) ++ if (!WARN_ON(vctx->suspend_count == 0)) { ++ vctx->suspend_count--; + -+/*-------------------------------------------------------*/ ++ /* Last resume, so re-enqueue the worker if we have any periodic ++ * clients. ++ */ ++ if (vctx->suspend_count == 0) { ++ struct kbase_vinstr_client *pos; ++ bool has_periodic_clients = false; + -+/** 使用 D(), 以åè¿›åˆ¶çš„å½¢å¼æ‰“å°å˜é‡ 'var' çš„ value. */ -+#define D_DEC(var) D(#var " = %d.", var) ++ list_for_each_entry(pos, &vctx->clients, node) { ++ if (pos->dump_interval_ns != 0) { ++ has_periodic_clients = true; ++ break; ++ } ++ } + -+#define E_DEC(var) E(#var " = %d.", var) ++ if (has_periodic_clients) ++ kbase_hwcnt_virtualizer_queue_work( ++ vctx->hvirt, &vctx->dump_work); ++ } ++ } + -+/** 使用 D(), 以åå…­è¿›åˆ¶çš„å½¢å¼æ‰“å°å˜é‡ 'var' çš„ value. */ -+#define D_HEX(var) D(#var " = 0x%x.", var) ++ mutex_unlock(&vctx->lock); ++} + -+#define E_HEX(var) E(#var " = 0x%x.", var) ++int kbase_vinstr_hwcnt_reader_setup( ++ struct kbase_vinstr_context *vctx, ++ struct kbase_ioctl_hwcnt_reader_setup *setup) ++{ ++ int errcode; ++ int fd; ++ struct kbase_vinstr_client *vcli = NULL; + -+/** -+ * 使用 D(), 以å六进制的形å¼, -+ * æ‰“å°æŒ‡é’ˆç±»åž‹å˜é‡ 'ptr' çš„ value. -+ */ -+#define D_PTR(ptr) D(#ptr " = %p.", ptr) ++ if (!vctx || !setup || ++ (setup->buffer_count == 0) || ++ (setup->buffer_count > MAX_BUFFER_COUNT) || ++ !is_power_of_2(setup->buffer_count)) ++ return -EINVAL; + -+#define E_PTR(ptr) E(#ptr " = %p.", ptr) ++ errcode = kbasep_vinstr_client_create(vctx, setup, &vcli); ++ if (errcode) ++ goto error; + -+/** 使用 D(), æ‰“å° char 字串. */ -+#define D_STR(p_str) \ -+do { \ -+ if (!p_str) { \ -+ D(#p_str " = NULL."); \ -+ else \ -+ D(#p_str " = '%s'.", p_str); \ -+} while (0) ++ /* Add the new client. No need to reschedule worker, as not periodic */ ++ mutex_lock(&vctx->lock); + -+#define E_STR(p_str) \ -+do { \ -+ if (!p_str) \ -+ E(#p_str " = NULL."); \ -+ else \ -+ E(#p_str " = '%s'.", p_str); \ -+} while (0) ++ vctx->client_count++; ++ list_add(&vcli->node, &vctx->clients); + -+#ifdef ENABLE_DEBUG_LOG -+/** -+ * log 从 'p_start' 地å€å¼€å§‹çš„ 'len' 个字节的数æ®. -+ */ -+#define D_MEM(p_start, len) \ -+do { \ -+ int i = 0; \ -+ char *p = (char *)(p_start); \ -+ D("dump memory from addr of '" #p_start "', from %p, length %d' : ", \ -+ (p_start), \ -+ (len)); \ -+ pr_debug("\t\t"); \ -+ for (i = 0; i < (len); i++) \ -+ pr_debug("0x%02x, ", p[i]); \ -+ pr_debug("\n"); \ -+} while (0) -+#else -+#define D_MEM(...) ((void)0) -+#endif ++ mutex_unlock(&vctx->lock); + -+/*-------------------------------------------------------*/ ++ /* Expose to user-space only once the client is fully initialized */ ++ errcode = anon_inode_getfd( ++ "[mali_vinstr_desc]", ++ &vinstr_client_fops, ++ vcli, ++ O_RDONLY | O_CLOEXEC); ++ if (errcode < 0) ++ goto client_installed_error; + -+/** -+ * 在特定æ¡ä»¶ä¸‹, 判定 error å‘生, -+ * å°†å˜é‡ 'ret_var' 设置 'err_code', -+ * log 输出对应的 Error Caution, -+ * ç„¶åŽè·³è½¬ 'label' 指定的代ç å¤„执行. -+ * @param msg -+ * 纯字串形å¼çš„æç¤ºä¿¡æ¯. -+ * @param ret_var -+ * æ ‡è¯†å‡½æ•°æ‰§è¡ŒçŠ¶æ€æˆ–者结果的å˜é‡, -+ * 将被设置具体的 Error Code. -+ * 通常是 'ret' or 'result'. -+ * @param err_code -+ * 表å¾ç‰¹å®š error 的常数标识, -+ * 通常是 å®çš„å½¢æ€. -+ * @param label -+ * 程åºå°†è¦è·³è½¬åˆ°çš„错误处ç†ä»£ç çš„æ ‡å·, -+ * 通常就是 'EXIT'. -+ * @param args... -+ * 对应 'msg_fmt' 实å‚中, -+ * '%s', '%d', ... 等转æ¢è¯´æ˜Žç¬¦çš„具体å¯å˜é•¿å®žå‚. -+ */ -+#define SET_ERROR_AND_JUMP(msg_fmt, ret_var, err_code, label, args...) \ -+do { \ -+ E("To set '" #ret_var "' to %d('" #err_code "'), because : " msg_fmt, \ -+ (err_code), \ -+ ## args); \ -+ (ret_var) = (err_code); \ -+ goto label; \ -+} while (0) ++ fd = errcode; + -+/* ----------------------------------------------------------------------------- -+ * Types and Structures Definition -+ * ----------------------------------------------------------------------------- -+ */ ++ return fd; + -+/* ----------------------------------------------------------------------------- -+ * Global Functions' Prototype -+ * ----------------------------------------------------------------------------- -+ */ ++client_installed_error: ++ mutex_lock(&vctx->lock); + -+/* ----------------------------------------------------------------------------- -+ * Inline Functions Implementation -+ * ----------------------------------------------------------------------------- -+ */ ++ vctx->client_count--; ++ list_del(&vcli->node); + -+#ifdef __cplusplus ++ mutex_unlock(&vctx->lock); ++error: ++ kbasep_vinstr_client_destroy(vcli); ++ return errcode; +} -+#endif -+ -+#endif /* __CUSTOM_LOG_H__ */ -diff --git a/drivers/gpu/arm/bifrost/platform/rk/mali_kbase_config_platform.h b/drivers/gpu/arm/bifrost/platform/rk/mali_kbase_config_platform.h -new file mode 100644 -index 000000000..ea01d502c ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/rk/mali_kbase_config_platform.h -@@ -0,0 +1,94 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ */ + +/** -+ * @file mali_kbase_config_platform.h -+ * 声明 platform_config_of_rk (platform_rk çš„ platform_config). -+ */ -+ -+/** -+ * Maximum frequency GPU will be clocked at. -+ * Given in kHz. -+ * This must be specified as there is no default value. -+ * -+ * Attached value: number in kHz -+ * Default value: NA -+ */ -+#define GPU_FREQ_KHZ_MAX (5000) -+ -+/** -+ * Minimum frequency GPU will be clocked at. -+ * Given in kHz. -+ * This must be specified as there is no default value. ++ * kbasep_vinstr_hwcnt_reader_buffer_ready() - Check if client has ready ++ * buffers. ++ * @cli: Non-NULL pointer to vinstr client. + * -+ * Attached value: number in kHz -+ * Default value: NA ++ * Return: Non-zero if client has at least one dumping buffer filled that was ++ * not notified to user yet. + */ -+#define GPU_FREQ_KHZ_MIN (5000) ++static int kbasep_vinstr_hwcnt_reader_buffer_ready( ++ struct kbase_vinstr_client *cli) ++{ ++ WARN_ON(!cli); ++ return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx); ++} + +/** -+ * CPU_SPEED_FUNC -+ * - A pointer to a function that calculates the CPU clock -+ * -+ * CPU clock speed of the platform is in MHz -+ * - see kbase_cpu_clk_speed_func for the function prototype. ++ * kbasep_vinstr_hwcnt_reader_ioctl_dump() - Dump ioctl command. ++ * @cli: Non-NULL pointer to vinstr client. + * -+ * Attached value: A kbase_cpu_clk_speed_func. -+ * Default Value: NA ++ * Return: 0 on success, else error code. + */ -+#define CPU_SPEED_FUNC (NULL) ++static long kbasep_vinstr_hwcnt_reader_ioctl_dump( ++ struct kbase_vinstr_client *cli) ++{ ++ int errcode; + -+/** -+ * GPU_SPEED_FUNC -+ * - A pointer to a function that calculates the GPU clock -+ * -+ * GPU clock speed of the platform in MHz -+ * - see kbase_gpu_clk_speed_func for the function prototype. -+ * -+ * Attached value: A kbase_gpu_clk_speed_func. -+ * Default Value: NA -+ */ -+#define GPU_SPEED_FUNC (NULL) ++ mutex_lock(&cli->vctx->lock); + -+/** -+ * Power management configuration -+ * -+ * Attached value: -+ * pointer to @ref kbase_pm_callback_conf -+ * Default value: -+ * See @ref kbase_pm_callback_conf -+ */ -+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) -+extern struct kbase_pm_callback_conf pm_callbacks; ++ errcode = kbasep_vinstr_client_dump( ++ cli, BASE_HWCNT_READER_EVENT_MANUAL); + -+/** -+ * Platform specific configuration functions -+ * -+ * Attached value: -+ * pointer to @ref kbase_platform_funcs_conf -+ * Default value: -+ * See @ref kbase_platform_funcs_conf -+ */ -+#define PLATFORM_FUNCS (&platform_funcs) -+extern struct kbase_platform_funcs_conf platform_funcs; ++ mutex_unlock(&cli->vctx->lock); ++ return errcode; ++} + +/** -+ * Secure mode switch -+ * -+ * Attached value: pointer to @ref kbase_secure_ops -+ */ -+#define SECURE_CALLBACKS (NULL) -+ -+#define CLK_RATE_TRACE_OPS (&clk_rate_trace_ops) -+extern struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops; -+ -+int kbase_platform_rk_init_opp_table(struct kbase_device *kbdev); -+void kbase_platform_rk_uninit_opp_table(struct kbase_device *kbdev); -+int kbase_platform_rk_enable_regulator(struct kbase_device *kbdev); -diff --git a/drivers/gpu/arm/bifrost/platform/rk/mali_kbase_config_rk.c b/drivers/gpu/arm/bifrost/platform/rk/mali_kbase_config_rk.c -new file mode 100644 -index 000000000..3ac4aef79 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/rk/mali_kbase_config_rk.c -@@ -0,0 +1,724 @@ -+/* -+ * (C) COPYRIGHT RockChip Limited. All rights reserved. ++ * kbasep_vinstr_hwcnt_reader_ioctl_clear() - Clear ioctl command. ++ * @cli: Non-NULL pointer to vinstr client. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ * Return: 0 on success, else error code. + */ ++static long kbasep_vinstr_hwcnt_reader_ioctl_clear( ++ struct kbase_vinstr_client *cli) ++{ ++ int errcode; + -+/* #define ENABLE_DEBUG_LOG */ -+#include "custom_log.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#if MALI_USE_CSF -+#include -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ mutex_lock(&cli->vctx->lock); + -+#include "mali_kbase_config_platform.h" -+#include "mali_kbase_rk.h" ++ errcode = kbasep_vinstr_client_clear(cli); + -+#define POWER_DOWN_FREQ 200000000 ++ mutex_unlock(&cli->vctx->lock); ++ return errcode; ++} + +/** -+ * @file mali_kbase_config_rk.c -+ * 对 platform_config_of_rk 的具体实现. ++ * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer() - Get buffer ioctl command. ++ * @cli: Non-NULL pointer to vinstr client. ++ * @buffer: Non-NULL pointer to userspace buffer. ++ * @size: Size of buffer. + * -+ * mali_device_driver 包å«ä¸¤éƒ¨åˆ† : -+ * .DP : platform_dependent_part_in_mdd : -+ * ä¾èµ– platform 部分, -+ * æºç åœ¨ /platform// -+ * 在 mali_device_driver 内部, -+ * 记为 platform_dependent_part, -+ * 也被记为 platform_specific_code. -+ * .DP : common_parts_in_mdd : -+ * arm 实现的通用的部分, -+ * æºç åœ¨ / 下. -+ * 在 mali_device_driver 内部, 记为 common_parts. ++ * Return: 0 on success, else error code. + */ -+ -+/*---------------------------------------------------------------------------*/ -+#ifndef CONFIG_MALI_BIFROST_DEVFREQ -+static inline void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, -+ struct kbasep_pm_metrics *last, -+ struct kbasep_pm_metrics *diff) -+{ -+} -+#endif -+ -+#ifdef CONFIG_REGULATOR -+static int rk_pm_enable_regulator(struct kbase_device *kbdev); -+static void rk_pm_disable_regulator(struct kbase_device *kbdev); -+#else -+static inline int rk_pm_enable_regulator(struct kbase_device *kbdev) -+{ -+ return 0; -+} -+ -+static inline void rk_pm_disable_regulator(struct kbase_device *kbdev) -+{ -+} -+#endif -+ -+static int rk_pm_enable_clk(struct kbase_device *kbdev); -+ -+static void rk_pm_disable_clk(struct kbase_device *kbdev); -+ -+static int kbase_platform_rk_create_sysfs_files(struct device *dev); -+ -+static void kbase_platform_rk_remove_sysfs_files(struct device *dev); -+ -+/*---------------------------------------------------------------------------*/ -+ -+static void rk_pm_power_off_delay_work(struct work_struct *work) -+{ -+ struct rk_context *platform = -+ container_of(to_delayed_work(work), struct rk_context, work); -+ struct kbase_device *kbdev = platform->kbdev; -+ struct rockchip_opp_info *opp_info = &kbdev->opp_info; -+ -+ mutex_lock(&platform->lock); -+ -+ if (!platform->is_powered) { -+ D("mali_dev is already powered off."); -+ mutex_unlock(&platform->lock); -+ return; -+ } -+ -+ rockchip_opp_dvfs_lock(opp_info); -+ if (pm_runtime_enabled(kbdev->dev)) { -+ D("to put_sync_suspend mali_dev."); -+ pm_runtime_put_sync_suspend(kbdev->dev); -+ } -+ rockchip_opp_dvfs_unlock(opp_info); -+ -+ rk_pm_disable_clk(kbdev); -+ -+ if (pm_runtime_suspended(kbdev->dev)) { -+ rk_pm_disable_regulator(kbdev); -+ platform->is_regulator_on = false; -+ } -+ -+ platform->is_powered = false; -+ wake_unlock(&platform->wake_lock); -+ -+ mutex_unlock(&platform->lock); -+} -+ -+static int kbase_platform_rk_init(struct kbase_device *kbdev) ++static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( ++ struct kbase_vinstr_client *cli, ++ void __user *buffer, ++ size_t size) +{ -+ int ret = 0; -+ struct rk_context *platform; -+ -+ platform = kzalloc(sizeof(*platform), GFP_KERNEL); -+ if (!platform) { -+ E("err."); -+ return -ENOMEM; -+ } -+ -+ platform->is_powered = false; -+ platform->kbdev = kbdev; ++ unsigned int meta_idx = atomic_read(&cli->meta_idx); ++ unsigned int idx = meta_idx % cli->dump_bufs.buf_cnt; + -+ platform->delay_ms = 200; -+ if (of_property_read_u32(kbdev->dev->of_node, "power-off-delay-ms", -+ &platform->delay_ms)) -+ W("power-off-delay-ms not available."); ++ struct kbase_hwcnt_reader_metadata *meta = &cli->dump_bufs_meta[idx]; ++ const size_t meta_size = sizeof(struct kbase_hwcnt_reader_metadata); ++ const size_t min_size = min(size, meta_size); + -+ platform->power_off_wq = create_freezable_workqueue("gpu_power_off_wq"); -+ if (!platform->power_off_wq) { -+ E("couldn't create workqueue"); -+ ret = -ENOMEM; -+ goto err_wq; -+ } -+ INIT_DEFERRABLE_WORK(&platform->work, rk_pm_power_off_delay_work); ++ /* Metadata sanity check. */ ++ WARN_ON(idx != meta->buffer_idx); + -+ wake_lock_init(&platform->wake_lock, WAKE_LOCK_SUSPEND, "gpu"); ++ /* Check if there is any buffer available. */ ++ if (unlikely(atomic_read(&cli->write_idx) == meta_idx)) ++ return -EAGAIN; + -+ platform->utilisation_period = DEFAULT_UTILISATION_PERIOD_IN_MS; ++ /* Check if previously taken buffer was put back. */ ++ if (unlikely(atomic_read(&cli->read_idx) != meta_idx)) ++ return -EBUSY; + -+ ret = kbase_platform_rk_create_sysfs_files(kbdev->dev); -+ if (ret) { -+ E("fail to create sysfs_files. ret = %d.", ret); -+ goto err_sysfs_files; -+ } ++ /* Clear user buffer to zero. */ ++ if (unlikely(meta_size < size && clear_user(buffer, size))) ++ return -EFAULT; + -+ kbdev->platform_context = (void *)platform; -+ pm_runtime_enable(kbdev->dev); ++ /* Copy next available buffer's metadata to user. */ ++ if (unlikely(copy_to_user(buffer, meta, min_size))) ++ return -EFAULT; + -+ mutex_init(&platform->lock); ++ /* Compare exchange meta idx to protect against concurrent getters */ ++ if (meta_idx != atomic_cmpxchg(&cli->meta_idx, meta_idx, meta_idx + 1)) ++ return -EBUSY; + + return 0; -+ -+err_sysfs_files: -+ wake_lock_destroy(&platform->wake_lock); -+ destroy_workqueue(platform->power_off_wq); -+err_wq: -+ return ret; +} + -+static void kbase_platform_rk_term(struct kbase_device *kbdev) ++/** ++ * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer() - Put buffer ioctl command. ++ * @cli: Non-NULL pointer to vinstr client. ++ * @buffer: Non-NULL pointer to userspace buffer. ++ * @size: Size of buffer. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( ++ struct kbase_vinstr_client *cli, ++ void __user *buffer, ++ size_t size) +{ -+ struct rk_context *platform = -+ (struct rk_context *)kbdev->platform_context; -+ -+ pm_runtime_disable(kbdev->dev); -+ kbdev->platform_context = NULL; -+ -+ if (platform) { -+ cancel_delayed_work_sync(&platform->work); -+ wake_lock_destroy(&platform->wake_lock); -+ destroy_workqueue(platform->power_off_wq); -+ platform->is_powered = false; -+ platform->kbdev = NULL; -+ kfree(platform); -+ } -+ kbase_platform_rk_remove_sysfs_files(kbdev->dev); -+} -+ -+struct kbase_platform_funcs_conf platform_funcs = { -+ .platform_init_func = &kbase_platform_rk_init, -+ .platform_term_func = &kbase_platform_rk_term, -+}; -+ -+/*---------------------------------------------------------------------------*/ ++ unsigned int read_idx = atomic_read(&cli->read_idx); ++ unsigned int idx = read_idx % cli->dump_bufs.buf_cnt; + -+static int rk_pm_callback_runtime_on(struct kbase_device *kbdev) -+{ -+ struct rockchip_opp_info *opp_info = &kbdev->opp_info; ++ struct kbase_hwcnt_reader_metadata *meta; ++ const size_t meta_size = sizeof(struct kbase_hwcnt_reader_metadata); ++ const size_t max_size = max(size, meta_size); + int ret = 0; ++ u8 stack_kbuf[64]; ++ u8 *kbuf = NULL; ++ size_t i; + -+ if (!kbdev->current_nominal_freq) -+ return 0; -+ -+ ret = clk_bulk_prepare_enable(opp_info->nclocks, opp_info->clocks); -+ if (ret) { -+ dev_err(kbdev->dev, "failed to enable opp clks\n"); -+ return ret; -+ } -+ if (opp_info->data && opp_info->data->set_read_margin) -+ opp_info->data->set_read_margin(kbdev->dev, opp_info, -+ opp_info->target_rm); -+ if (opp_info->is_scmi_clk) { -+ if (clk_set_rate(opp_info->clk, kbdev->current_nominal_freq)) -+ dev_err(kbdev->dev, "failed to restore clk rate\n"); -+ } -+ clk_bulk_disable_unprepare(opp_info->nclocks, opp_info->clocks); -+ -+ return 0; -+} -+ -+static void rk_pm_callback_runtime_off(struct kbase_device *kbdev) -+{ -+ struct rockchip_opp_info *opp_info = &kbdev->opp_info; ++ /* Check if any buffer was taken. */ ++ if (unlikely(atomic_read(&cli->meta_idx) == read_idx)) ++ return -EPERM; + -+ if (opp_info->is_scmi_clk) { -+ if (clk_set_rate(opp_info->clk, POWER_DOWN_FREQ)) -+ dev_err(kbdev->dev, "failed to set power down rate\n"); ++ if (likely(max_size <= sizeof(stack_kbuf))) { ++ /* Use stack buffer when the size is small enough. */ ++ if (unlikely(meta_size > size)) ++ memset(stack_kbuf, 0, sizeof(stack_kbuf)); ++ kbuf = stack_kbuf; ++ } else { ++ kbuf = kzalloc(max_size, GFP_KERNEL); ++ if (unlikely(!kbuf)) ++ return -ENOMEM; + } -+ opp_info->current_rm = UINT_MAX; -+} -+ -+static int rk_pm_callback_power_on(struct kbase_device *kbdev) -+{ -+ int ret = 1; /* Assume GPU has been powered off */ -+ int err = 0; -+ struct rk_context *platform = get_rk_context(kbdev); -+ struct rockchip_opp_info *opp_info = &kbdev->opp_info; -+ -+ cancel_delayed_work_sync(&platform->work); -+ -+ mutex_lock(&platform->lock); + -+ if (platform->is_powered) { -+ D("mali_device is already powered."); -+ ret = 0; ++ /* ++ * Copy user buffer to zero cleared kernel buffer which has enough ++ * space for both user buffer and kernel metadata. ++ */ ++ if (unlikely(copy_from_user(kbuf, buffer, size))) { ++ ret = -EFAULT; + goto out; + } + -+ /* we must enable vdd_gpu before pd_gpu_in_chip. */ -+ if (!platform->is_regulator_on) { -+ err = rk_pm_enable_regulator(kbdev); -+ if (err) { -+ E("fail to enable regulator, err : %d.", err); -+ ret = err; ++ /* ++ * Make sure any "extra" data passed from userspace is zero. ++ * It's meaningful only in case meta_size < size. ++ */ ++ for (i = meta_size; i < size; i++) { ++ /* Check if user data beyond meta size is zero. */ ++ if (unlikely(kbuf[i] != 0)) { ++ ret = -EINVAL; + goto out; + } -+ platform->is_regulator_on = true; + } + -+ err = rk_pm_enable_clk(kbdev); -+ if (err) { -+ E("failed to enable clk: %d", err); -+ ret = err; ++ /* Check if correct buffer is put back. */ ++ meta = (struct kbase_hwcnt_reader_metadata *)kbuf; ++ if (unlikely(idx != meta->buffer_idx)) { ++ ret = -EINVAL; + goto out; + } + -+ rockchip_opp_dvfs_lock(opp_info); -+ /* è‹¥ mali_dev çš„ runtime_pm 是 enabled çš„, 则... */ -+ if (pm_runtime_enabled(kbdev->dev)) { -+ D("to resume mali_dev syncly."); -+ /* 对 pd_in_chip çš„ on æ“作, -+ * 将在 pm_domain çš„ runtime_pm_callbacks 中完æˆ. -+ */ -+ err = pm_runtime_get_sync(kbdev->dev); -+ if (err < 0) { -+ rockchip_opp_dvfs_unlock(opp_info); -+ E("failed to runtime resume device: %d.", err); -+ ret = err; -+ goto out; -+ } else if (err == 1) { /* runtime_pm_status is still active */ -+ D("chip has NOT been powered off, no need to re-init."); -+ ret = 0; -+ } ++ /* Compare exchange read idx to protect against concurrent putters */ ++ if (read_idx != ++ atomic_cmpxchg(&cli->read_idx, read_idx, read_idx + 1)) { ++ ret = -EPERM; ++ goto out; + } -+ rockchip_opp_dvfs_unlock(opp_info); -+ -+ platform->is_powered = true; -+ wake_lock(&platform->wake_lock); + +out: -+ mutex_unlock(&platform->lock); -+ return ret; -+} -+ -+static void rk_pm_callback_power_off(struct kbase_device *kbdev) -+{ -+ struct rk_context *platform = get_rk_context(kbdev); -+ -+ D("enter"); -+ -+ queue_delayed_work(platform->power_off_wq, &platform->work, -+ msecs_to_jiffies(platform->delay_ms)); -+} -+ -+static int rk_kbase_device_runtime_init(struct kbase_device *kbdev) -+{ -+ return 0; -+} -+ -+static void rk_kbase_device_runtime_disable(struct kbase_device *kbdev) -+{ -+} -+ -+struct kbase_pm_callback_conf pm_callbacks = { -+ .power_on_callback = rk_pm_callback_power_on, -+ .power_off_callback = rk_pm_callback_power_off, -+#ifdef CONFIG_PM -+ .power_runtime_init_callback = rk_kbase_device_runtime_init, -+ .power_runtime_term_callback = rk_kbase_device_runtime_disable, -+ .power_runtime_on_callback = rk_pm_callback_runtime_on, -+ .power_runtime_off_callback = rk_pm_callback_runtime_off, -+#else /* CONFIG_PM */ -+ .power_runtime_init_callback = NULL, -+ .power_runtime_term_callback = NULL, -+ .power_runtime_on_callback = NULL, -+ .power_runtime_off_callback = NULL, -+#endif /* CONFIG_PM */ -+}; -+ -+/*---------------------------------------------------------------------------*/ -+ -+#ifdef CONFIG_REGULATOR -+static int rk_pm_enable_regulator(struct kbase_device *kbdev) -+{ -+ int ret = 0; -+ unsigned int i; -+ -+ for (i = 0; i < kbdev->nr_regulators; i++) { -+ struct regulator *regulator = kbdev->regulators[i]; -+ if (!regulator) { -+ W("no mali regulator control, no need to enable."); -+ goto EXIT; -+ } -+ -+ D("to enable regulator."); -+ ret = regulator_enable(regulator); -+ if (ret) { -+ E("fail to enable regulator, ret : %d.", ret); -+ goto EXIT; -+ } -+ } -+ -+EXIT: ++ if (unlikely(kbuf != stack_kbuf)) ++ kfree(kbuf); + return ret; +} + -+static void rk_pm_disable_regulator(struct kbase_device *kbdev) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < kbdev->nr_regulators; i++) { -+ struct regulator *regulator = kbdev->regulators[i]; -+ -+ if (!regulator) { -+ W("no mali regulator control, no need to disable."); -+ return; -+ } -+ -+ D("to disable regulator."); -+ regulator_disable(regulator); -+ } -+} -+#endif -+ -+static int rk_pm_enable_clk(struct kbase_device *kbdev) ++/** ++ * kbasep_vinstr_hwcnt_reader_ioctl_set_interval() - Set interval ioctl command. ++ * @cli: Non-NULL pointer to vinstr client. ++ * @interval: Periodic dumping interval (disable periodic dumping if 0). ++ * ++ * Return: 0 always. ++ */ ++static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval( ++ struct kbase_vinstr_client *cli, ++ u32 interval) +{ -+ int err = 0; -+ unsigned int i; -+ -+ for (i = 0; i < kbdev->nr_clocks; i++) { -+ struct clk *clock = kbdev->clocks[i]; -+ -+ if (!clock) { -+ W("no mali clock control, no need to enable."); -+ } else { -+ D("to enable clk."); -+ err = clk_enable(clock); -+ if (err) -+ E("failed to enable clk: %d.", err); -+ } -+ } ++ mutex_lock(&cli->vctx->lock); + -+ return err; -+} ++ if ((interval != 0) && (interval < DUMP_INTERVAL_MIN_NS)) ++ interval = DUMP_INTERVAL_MIN_NS; ++ /* Update the interval, and put in a dummy next dump time */ ++ cli->dump_interval_ns = interval; ++ cli->next_dump_time_ns = 0; + -+static void rk_pm_disable_clk(struct kbase_device *kbdev) -+{ -+ unsigned int i; ++ /* ++ * If it's a periodic client, kick off the worker early to do a proper ++ * timer reschedule. Return value is ignored, as we don't care if the ++ * worker is already queued. ++ */ ++ if ((interval != 0) && (cli->vctx->suspend_count == 0)) ++ kbase_hwcnt_virtualizer_queue_work(cli->vctx->hvirt, ++ &cli->vctx->dump_work); + -+ for (i = 0; i < kbdev->nr_clocks; i++) { -+ struct clk *clock = kbdev->clocks[i]; ++ mutex_unlock(&cli->vctx->lock); + -+ if (!clock) { -+ W("no mali clock control, no need to disable."); -+ } else { -+ D("to disable clk."); -+ clk_disable(clock); -+ } -+ } ++ return 0; +} + -+/*---------------------------------------------------------------------------*/ -+ -+static ssize_t utilisation_period_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) ++/** ++ * kbasep_vinstr_hwcnt_reader_ioctl_enable_event() - Enable event ioctl command. ++ * @cli: Non-NULL pointer to vinstr client. ++ * @event_id: ID of event to enable. ++ * ++ * Return: 0 always. ++ */ ++static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event( ++ struct kbase_vinstr_client *cli, ++ enum base_hwcnt_reader_event event_id) +{ -+ struct kbase_device *kbdev = dev_get_drvdata(dev); -+ struct rk_context *platform = get_rk_context(kbdev); -+ ssize_t ret = 0; -+ -+ ret += snprintf(buf, PAGE_SIZE, "%u\n", platform->utilisation_period); -+ -+ return ret; ++ /* No-op, as events aren't supported */ ++ return 0; +} + -+static ssize_t utilisation_period_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, -+ size_t count) ++/** ++ * kbasep_vinstr_hwcnt_reader_ioctl_disable_event() - Disable event ioctl ++ * command. ++ * @cli: Non-NULL pointer to vinstr client. ++ * @event_id: ID of event to disable. ++ * ++ * Return: 0 always. ++ */ ++static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event( ++ struct kbase_vinstr_client *cli, ++ enum base_hwcnt_reader_event event_id) +{ -+ struct kbase_device *kbdev = dev_get_drvdata(dev); -+ struct rk_context *platform = get_rk_context(kbdev); -+ int ret = 0; -+ -+ ret = kstrtouint(buf, 0, &platform->utilisation_period); -+ if (ret) { -+ E("invalid input period : %s.", buf); -+ return ret; -+ } -+ D("set utilisation_period to '%d'.", platform->utilisation_period); -+ -+ return count; ++ /* No-op, as events aren't supported */ ++ return 0; +} + -+static ssize_t utilisation_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) ++/** ++ * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver() - Get HW version ioctl command. ++ * @cli: Non-NULL pointer to vinstr client. ++ * @hwver: Non-NULL pointer to user buffer where HW version will be stored. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( ++ struct kbase_vinstr_client *cli, ++ u32 __user *hwver) +{ -+ struct kbase_device *kbdev = dev_get_drvdata(dev); -+ struct rk_context *platform = get_rk_context(kbdev); -+ ssize_t ret = 0; -+ unsigned long period_in_us = platform->utilisation_period * 1000; -+ u32 utilisation; -+ struct kbasep_pm_metrics metrics_when_start; -+ struct kbasep_pm_metrics metrics_diff = {}; /* between start and end. */ -+ u32 total_time = 0; -+ u32 busy_time = 0; -+ -+ /* get current metrics data. */ -+ kbase_pm_get_dvfs_metrics(kbdev, &metrics_when_start, &metrics_diff); -+ /* sleep for 'period_in_us'. */ -+ usleep_range(period_in_us, period_in_us + 100); -+ /* get metrics data between start and end. */ -+ kbase_pm_get_dvfs_metrics(kbdev, &metrics_when_start, &metrics_diff); -+ -+ total_time = metrics_diff.time_busy + metrics_diff.time_idle; -+ busy_time = metrics_diff.time_busy; -+ D("total_time : %u, busy_time : %u.", total_time, busy_time); ++ u32 ver = 5; ++ const enum kbase_hwcnt_gpu_group_type type = ++ kbase_hwcnt_metadata_group_type(cli->vctx->metadata, 0); + -+ utilisation = busy_time * 100 / total_time; -+ ret += snprintf(buf, PAGE_SIZE, "%d\n", utilisation); ++ if (WARN_ON(type != KBASE_HWCNT_GPU_GROUP_TYPE_V5)) ++ return -EINVAL; + -+ return ret; ++ return put_user(ver, hwver); +} + -+static DEVICE_ATTR_RW(utilisation_period); -+static DEVICE_ATTR_RO(utilisation); -+ -+static int kbase_platform_rk_create_sysfs_files(struct device *dev) ++/** ++ * kbasep_vinstr_hwcnt_reader_ioctl_get_api_version() - get API version ioctl ++ * command. ++ * @cli: The non-NULL pointer to the client ++ * @arg: Command's argument. ++ * @size: Size of arg. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static long kbasep_vinstr_hwcnt_reader_ioctl_get_api_version( ++ struct kbase_vinstr_client *cli, unsigned long arg, size_t size) +{ -+ int ret = 0; -+ -+ ret = device_create_file(dev, &dev_attr_utilisation_period); -+ if (ret) { -+ E("fail to create sysfs file 'utilisation_period'."); -+ goto out; -+ } -+ -+ ret = device_create_file(dev, &dev_attr_utilisation); -+ if (ret) { -+ E("fail to create sysfs file 'utilisation'."); -+ goto remove_utilisation_period; -+ } -+ -+ return 0; -+ -+remove_utilisation_period: -+ device_remove_file(dev, &dev_attr_utilisation_period); -+out: -+ return ret; -+} ++ long ret = -EINVAL; + -+static void kbase_platform_rk_remove_sysfs_files(struct device *dev) -+{ -+ device_remove_file(dev, &dev_attr_utilisation_period); -+ device_remove_file(dev, &dev_attr_utilisation); -+} ++ if (size == sizeof(u32)) { ++ ret = put_user(HWCNT_READER_API, (u32 __user *)arg); ++ } else if (size == sizeof(struct kbase_hwcnt_reader_api_version)) { ++ u8 clk_cnt = cli->vctx->metadata->clk_cnt; ++ unsigned long bytes = 0; ++ struct kbase_hwcnt_reader_api_version api_version = { ++ .version = HWCNT_READER_API, ++ .features = KBASE_HWCNT_READER_API_VERSION_NO_FEATURE, ++ }; + -+static int rk3588_gpu_get_soc_info(struct device *dev, struct device_node *np, -+ int *bin, int *process) -+{ -+ int ret = 0; -+ u8 value = 0; ++ if (clk_cnt > 0) ++ api_version.features |= ++ KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP; ++ if (clk_cnt > 1) ++ api_version.features |= ++ KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES; + -+ if (!bin) -+ return 0; ++ bytes = copy_to_user( ++ (void __user *)arg, &api_version, sizeof(api_version)); + -+ if (of_property_match_string(np, "nvmem-cell-names", -+ "specification_serial_number") >= 0) { -+ ret = rockchip_nvmem_cell_read_u8(np, -+ "specification_serial_number", -+ &value); -+ if (ret) { -+ dev_err(dev, -+ "Failed to get specification_serial_number\n"); -+ return ret; -+ } -+ /* RK3588M */ -+ if (value == 0xd) -+ *bin = 1; -+ /* RK3588J */ -+ else if (value == 0xa) -+ *bin = 2; ++ /* copy_to_user returns zero in case of success. ++ * If it fails, it returns the number of bytes that could NOT be copied ++ */ ++ if (bytes == 0) ++ ret = 0; ++ else ++ ret = -EFAULT; + } -+ if (*bin < 0) -+ *bin = 0; -+ dev_info(dev, "bin=%d\n", *bin); -+ + return ret; +} + -+static int rk3588_gpu_set_soc_info(struct device *dev, struct device_node *np, -+ struct rockchip_opp_info *opp_info) -+{ -+ int bin = opp_info->bin; -+ -+ if (opp_info->volt_sel < 0) -+ return 0; -+ if (bin < 0) -+ bin = 0; -+ -+ if (!of_property_read_bool(np, "rockchip,supported-hw")) -+ return 0; -+ -+ /* SoC Version */ -+ opp_info->supported_hw[0] = BIT(bin); -+ /* Speed Grade */ -+ opp_info->supported_hw[1] = BIT(opp_info->volt_sel); -+ -+ return 0; -+} -+ -+static int rk3588_gpu_set_read_margin(struct device *dev, -+ struct rockchip_opp_info *opp_info, -+ u32 rm) ++/** ++ * kbasep_vinstr_hwcnt_reader_ioctl() - hwcnt reader's ioctl. ++ * @filp: Non-NULL pointer to file structure. ++ * @cmd: User command. ++ * @arg: Command's argument. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static long kbasep_vinstr_hwcnt_reader_ioctl( ++ struct file *filp, ++ unsigned int cmd, ++ unsigned long arg) +{ -+ int ret = 0; -+ u32 val; -+ -+ if (!opp_info->grf || !opp_info->volt_rm_tbl) -+ return 0; -+ if (rm == opp_info->current_rm || rm == UINT_MAX) -+ return 0; ++ long rcode; ++ struct kbase_vinstr_client *cli; + -+ dev_dbg(dev, "set rm to %d\n", rm); ++ if (!filp || (_IOC_TYPE(cmd) != KBASE_HWCNT_READER)) ++ return -EINVAL; + -+ ret = regmap_read(opp_info->grf, 0x24, &val); -+ if (ret < 0) { -+ dev_err(dev, "failed to get rm from 0x24\n"); -+ return ret; -+ } -+ val &= ~0x1c; -+ regmap_write(opp_info->grf, 0x24, val | (rm << 2)); ++ cli = filp->private_data; ++ if (!cli) ++ return -EINVAL; + -+ ret = regmap_read(opp_info->grf, 0x28, &val); -+ if (ret < 0) { -+ dev_err(dev, "failed to get rm from 0x28\n"); -+ return ret; ++ switch (_IOC_NR(cmd)) { ++ case _IOC_NR(KBASE_HWCNT_READER_GET_API_VERSION): ++ rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_api_version( ++ cli, arg, _IOC_SIZE(cmd)); ++ break; ++ case _IOC_NR(KBASE_HWCNT_READER_GET_HWVER): ++ rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( ++ cli, (u32 __user *)arg); ++ break; ++ case _IOC_NR(KBASE_HWCNT_READER_GET_BUFFER_SIZE): ++ rcode = put_user((u32)cli->vctx->metadata_user->dump_buf_bytes, ++ (u32 __user *)arg); ++ break; ++ case _IOC_NR(KBASE_HWCNT_READER_DUMP): ++ rcode = kbasep_vinstr_hwcnt_reader_ioctl_dump(cli); ++ break; ++ case _IOC_NR(KBASE_HWCNT_READER_CLEAR): ++ rcode = kbasep_vinstr_hwcnt_reader_ioctl_clear(cli); ++ break; ++ case _IOC_NR(KBASE_HWCNT_READER_GET_BUFFER): ++ rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( ++ cli, (void __user *)arg, _IOC_SIZE(cmd)); ++ break; ++ case _IOC_NR(KBASE_HWCNT_READER_PUT_BUFFER): ++ rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( ++ cli, (void __user *)arg, _IOC_SIZE(cmd)); ++ break; ++ case _IOC_NR(KBASE_HWCNT_READER_SET_INTERVAL): ++ rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval( ++ cli, (u32)arg); ++ break; ++ case _IOC_NR(KBASE_HWCNT_READER_ENABLE_EVENT): ++ rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event( ++ cli, (enum base_hwcnt_reader_event)arg); ++ break; ++ case _IOC_NR(KBASE_HWCNT_READER_DISABLE_EVENT): ++ rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event( ++ cli, (enum base_hwcnt_reader_event)arg); ++ break; ++ default: ++ pr_warn("Unknown HWCNT ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); ++ rcode = -EINVAL; ++ break; + } -+ val &= ~0x1c; -+ regmap_write(opp_info->grf, 0x28, val | (rm << 2)); -+ -+ opp_info->current_rm = rm; -+ -+ return 0; -+} + -+static int gpu_opp_config_regulators(struct device *dev, -+ struct dev_pm_opp *old_opp, -+ struct dev_pm_opp *new_opp, -+ struct regulator **regulators, -+ unsigned int count) -+{ -+ struct kbase_device *kbdev = dev_get_drvdata(dev); -+ -+ return rockchip_opp_config_regulators(dev, old_opp, new_opp, regulators, -+ count, &kbdev->opp_info); ++ return rcode; +} + -+static int gpu_opp_config_clks(struct device *dev, struct opp_table *opp_table, -+ struct dev_pm_opp *opp, void *data, -+ bool scaling_down) ++/** ++ * kbasep_vinstr_hwcnt_reader_poll() - hwcnt reader's poll. ++ * @filp: Non-NULL pointer to file structure. ++ * @wait: Non-NULL pointer to poll table. ++ * ++ * Return: EPOLLIN | EPOLLRDNORM if data can be read without blocking, 0 if ++ * data can not be read without blocking, else EPOLLHUP | EPOLLERR. ++ */ ++static __poll_t kbasep_vinstr_hwcnt_reader_poll(struct file *filp, poll_table *wait) +{ -+ struct kbase_device *kbdev = dev_get_drvdata(dev); -+ -+ return rockchip_opp_config_clks(dev, opp_table, opp, data, scaling_down, -+ &kbdev->opp_info); -+} -+ -+static const struct rockchip_opp_data rk3588_gpu_opp_data = { -+ .get_soc_info = rk3588_gpu_get_soc_info, -+ .set_soc_info = rk3588_gpu_set_soc_info, -+ .set_read_margin = rk3588_gpu_set_read_margin, -+ .config_regulators = gpu_opp_config_regulators, -+ .config_clks = gpu_opp_config_clks, -+}; -+ -+static const struct rockchip_opp_data rockchip_gpu_opp_data = { -+ .config_clks = gpu_opp_config_clks, -+}; -+ -+static const struct of_device_id rockchip_mali_of_match[] = { -+ { -+ .compatible = "rockchip,rk3588", -+ .data = (void *)&rk3588_gpu_opp_data, -+ }, -+ {}, -+}; ++ struct kbase_vinstr_client *cli; + -+int kbase_platform_rk_init_opp_table(struct kbase_device *kbdev) -+{ -+ struct rockchip_opp_info *info = &kbdev->opp_info; ++ if (!filp || !wait) ++ return EPOLLHUP | EPOLLERR; + -+ info->data = &rockchip_gpu_opp_data; -+ rockchip_get_opp_data(rockchip_mali_of_match, &kbdev->opp_info); ++ cli = filp->private_data; ++ if (!cli) ++ return EPOLLHUP | EPOLLERR; + -+ return rockchip_init_opp_table(kbdev->dev, &kbdev->opp_info, -+ "clk_mali", "mali"); -+} ++ poll_wait(filp, &cli->waitq, wait); ++ if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli)) ++ return EPOLLIN | EPOLLRDNORM; + -+void kbase_platform_rk_uninit_opp_table(struct kbase_device *kbdev) -+{ -+ rockchip_uninit_opp_table(kbdev->dev, &kbdev->opp_info); ++ return (__poll_t)0; +} + -+int kbase_platform_rk_enable_regulator(struct kbase_device *kbdev) ++/** ++ * kbasep_vinstr_hwcnt_reader_mmap() - hwcnt reader's mmap. ++ * @filp: Non-NULL pointer to file structure. ++ * @vma: Non-NULL pointer to vma structure. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static int kbasep_vinstr_hwcnt_reader_mmap( ++ struct file *filp, ++ struct vm_area_struct *vma) +{ -+ struct rk_context *platform = get_rk_context(kbdev); -+ int err = 0; -+ -+ if (!platform->is_regulator_on) { -+ err = rk_pm_enable_regulator(kbdev); -+ if (err) { -+ E("fail to enable regulator, err : %d.", err); -+ return err; -+ } -+ platform->is_regulator_on = true; -+ } -+ -+ return 0; -+} ++ struct kbase_vinstr_client *cli; ++ unsigned long vm_size, size, addr, pfn, offset; + -+/*---------------------------------------------------------------------------*/ ++ if (!filp || !vma) ++ return -EINVAL; + -+static void *enumerate_gpu_clk(struct kbase_device *kbdev, -+ unsigned int index) -+{ -+ if (index >= kbdev->nr_clocks) -+ return NULL; ++ cli = filp->private_data; ++ if (!cli) ++ return -EINVAL; + -+ return kbdev->clocks[index]; -+} ++ vm_size = vma->vm_end - vma->vm_start; + -+static unsigned long get_gpu_clk_rate(struct kbase_device *kbdev, -+ void *gpu_clk_handle) -+{ -+ return clk_get_rate((struct clk *)gpu_clk_handle); -+} ++ /* The mapping is allowed to span the entirety of the page allocation, ++ * not just the chunk where the dump buffers are allocated. ++ * This accommodates the corner case where the combined size of the ++ * dump buffers is smaller than a single page. ++ * This does not pose a security risk as the pages are zeroed on ++ * allocation, and anything out of bounds of the dump buffers is never ++ * written to. ++ */ ++ size = (1ull << cli->dump_bufs.page_order) * PAGE_SIZE; + -+static int gpu_clk_notifier_register(struct kbase_device *kbdev, -+ void *gpu_clk_handle, struct notifier_block *nb) -+{ -+ compiletime_assert(offsetof(struct clk_notifier_data, clk) == -+ offsetof(struct kbase_gpu_clk_notifier_data, gpu_clk_handle), -+ "mismatch in the offset of clk member"); ++ if (vma->vm_pgoff > (size >> PAGE_SHIFT)) ++ return -EINVAL; + -+ compiletime_assert(sizeof(((struct clk_notifier_data *)0)->clk) == -+ sizeof(((struct kbase_gpu_clk_notifier_data *)0)->gpu_clk_handle), -+ "mismatch in the size of clk member"); ++ offset = vma->vm_pgoff << PAGE_SHIFT; ++ if (vm_size > size - offset) ++ return -EINVAL; + -+ return clk_notifier_register((struct clk *)gpu_clk_handle, nb); -+} ++ addr = __pa(cli->dump_bufs.page_addr + offset); ++ pfn = addr >> PAGE_SHIFT; + -+static void gpu_clk_notifier_unregister(struct kbase_device *kbdev, -+ void *gpu_clk_handle, struct notifier_block *nb) -+{ -+ clk_notifier_unregister((struct clk *)gpu_clk_handle, nb); ++ return remap_pfn_range( ++ vma, vma->vm_start, pfn, vm_size, vma->vm_page_prot); +} + -+struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops = { -+ .get_gpu_clk_rate = get_gpu_clk_rate, -+ .enumerate_gpu_clk = enumerate_gpu_clk, -+ .gpu_clk_notifier_register = gpu_clk_notifier_register, -+ .gpu_clk_notifier_unregister = gpu_clk_notifier_unregister, -+}; -diff --git a/drivers/gpu/arm/bifrost/platform/rk/mali_kbase_rk.h b/drivers/gpu/arm/bifrost/platform/rk/mali_kbase_rk.h -new file mode 100644 -index 000000000..0a42559df ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/rk/mali_kbase_rk.h -@@ -0,0 +1,67 @@ -+/* drivers/gpu/t6xx/kbase/src/platform/rk/mali_kbase_platform.h -+ * Rockchip SoC Mali-Midgard platform-dependent codes -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software FoundatIon. -+ */ -+ +/** -+ * @file mali_kbase_rk.h ++ * kbasep_vinstr_hwcnt_reader_release() - hwcnt reader's release. ++ * @inode: Non-NULL pointer to inode structure. ++ * @filp: Non-NULL pointer to file structure. + * -+ * defines work_context type of platform_dependent_part. -+ */ -+ -+#ifndef _MALI_KBASE_RK_H_ -+#define _MALI_KBASE_RK_H_ -+ -+#include -+ -+/*---------------------------------------------------------------------------*/ -+ -+#define DEFAULT_UTILISATION_PERIOD_IN_MS (100) -+ -+/*---------------------------------------------------------------------------*/ -+ -+/* -+ * struct rk_context - work_context of platform_dependent_part_of_rk. ++ * Return: 0 always. + */ -+struct rk_context { -+ /* -+ * record the status of common_parts calling 'power_on_callback' -+ * and 'power_off_callback'. -+ */ -+ bool is_powered; -+ -+ bool is_regulator_on; -+ -+ struct kbase_device *kbdev; -+ -+ struct workqueue_struct *power_off_wq; -+ /* delayed_work_to_power_off_gpu. */ -+ struct delayed_work work; -+ unsigned int delay_ms; ++static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode, ++ struct file *filp) ++{ ++ struct kbase_vinstr_client *vcli = filp->private_data; + -+ /* -+ * WAKE_LOCK_SUSPEND for ensuring to run -+ * delayed_work_to_power_off_gpu before suspend. -+ */ -+ struct wake_lock wake_lock; ++ mutex_lock(&vcli->vctx->lock); + -+ /* debug only, the period in ms to count gpu_utilisation. */ -+ unsigned int utilisation_period; ++ vcli->vctx->client_count--; ++ list_del(&vcli->node); + -+ /* to protect operations on 'is_powered' and clks, pd, vd of gpu. */ -+ struct mutex lock; -+}; ++ mutex_unlock(&vcli->vctx->lock); + -+/*---------------------------------------------------------------------------*/ ++ kbasep_vinstr_client_destroy(vcli); + -+static inline struct rk_context *get_rk_context( -+ const struct kbase_device *kbdev) -+{ -+ return (struct rk_context *)(kbdev->platform_context); ++ return 0; +} -+ -+#endif /* _MALI_KBASE_RK_H_ */ -+ -diff --git a/drivers/gpu/arm/bifrost/platform/vexpress/Kbuild b/drivers/gpu/arm/bifrost/platform/vexpress/Kbuild -new file mode 100755 -index 000000000..e1398fde3 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/vexpress/Kbuild -@@ -0,0 +1,23 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2012-2013, 2016-2017, 2020-2021 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# -+ -+bifrost_kbase-y += \ -+ platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ -+ mali_kbase_platform_fake.o -diff --git a/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_platform.h b/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_platform.h +diff --git a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h new file mode 100644 -index 000000000..28f453161 +index 000000000..6747ec70a --- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_platform.h -@@ -0,0 +1,38 @@ ++++ b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h +@@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2014-2017, 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -261076,33 +260673,85 @@ index 000000000..28f453161 + * + */ + -+/** -+ * POWER_MANAGEMENT_CALLBACKS - Power management configuration ++/* ++ * Vinstr, used to provide an ioctl for userspace access to periodic hardware ++ * counters. ++ */ ++ ++#ifndef _KBASE_VINSTR_H_ ++#define _KBASE_VINSTR_H_ ++ ++struct kbase_vinstr_context; ++struct kbase_hwcnt_virtualizer; ++struct kbase_ioctl_hwcnt_reader_setup; ++ ++/** ++ * kbase_vinstr_init() - Initialise a vinstr context. ++ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. ++ * @out_vctx: Non-NULL pointer to where the pointer to the created vinstr ++ * context will be stored on success. + * -+ * Attached value: pointer to @ref kbase_pm_callback_conf -+ * Default value: See @ref kbase_pm_callback_conf ++ * On creation, the suspend count of the context will be 0. ++ * ++ * Return: 0 on success, else error code. + */ -+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) ++int kbase_vinstr_init( ++ struct kbase_hwcnt_virtualizer *hvirt, ++ struct kbase_vinstr_context **out_vctx); + +/** -+ * PLATFORM_FUNCS - Platform specific configuration functions ++ * kbase_vinstr_term() - Terminate a vinstr context. ++ * @vctx: Pointer to the vinstr context to be terminated. ++ */ ++void kbase_vinstr_term(struct kbase_vinstr_context *vctx); ++ ++/** ++ * kbase_vinstr_suspend() - Increment the suspend count of the context. ++ * @vctx: Non-NULL pointer to the vinstr context to be suspended. + * -+ * Attached value: pointer to @ref kbase_platform_funcs_conf -+ * Default value: See @ref kbase_platform_funcs_conf ++ * After this function call returns, it is guaranteed that all timers and ++ * workers in vinstr will be cancelled, and will not be re-triggered until ++ * after the context has been resumed. In effect, this means no new counter ++ * dumps will occur for any existing or subsequently added periodic clients. + */ -+#define PLATFORM_FUNCS (NULL) ++void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx); + -+extern struct kbase_pm_callback_conf pm_callbacks; -diff --git a/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_vexpress.c b/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_vexpress.c ++/** ++ * kbase_vinstr_resume() - Decrement the suspend count of the context. ++ * @vctx: Non-NULL pointer to the vinstr context to be resumed. ++ * ++ * If a call to this function decrements the suspend count from 1 to 0, then ++ * normal operation of vinstr will be resumed (i.e. counter dumps will once ++ * again be automatically triggered for all periodic clients). ++ * ++ * It is only valid to call this function one time for each prior returned call ++ * to kbase_vinstr_suspend. ++ */ ++void kbase_vinstr_resume(struct kbase_vinstr_context *vctx); ++ ++/** ++ * kbase_vinstr_hwcnt_reader_setup() - Set up a new hardware counter reader ++ * client. ++ * @vinstr_ctx: Non-NULL pointer to the vinstr context. ++ * @setup: Non-NULL pointer to the hwcnt reader configuration. ++ * ++ * Return: file descriptor on success, else a (negative) error code. ++ */ ++int kbase_vinstr_hwcnt_reader_setup( ++ struct kbase_vinstr_context *vinstr_ctx, ++ struct kbase_ioctl_hwcnt_reader_setup *setup); ++ ++#endif /* _KBASE_VINSTR_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_linux_trace.h b/drivers/gpu/arm/bifrost/mali_linux_trace.h new file mode 100644 -index 000000000..8add708d0 +index 000000000..52f17390c --- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_vexpress.c -@@ -0,0 +1,79 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/mali_linux_trace.h +@@ -0,0 +1,547 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2011-2017, 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2016, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -261120,260 +260769,542 @@ index 000000000..8add708d0 + * + */ + -+#include -+#include -+#include -+#include -+#include "mali_kbase_config_platform.h" -+ -+#include ++#undef TRACE_SYSTEM ++#define TRACE_SYSTEM mali + -+#ifndef CONFIG_OF -+static struct kbase_io_resources io_resources = { -+ .job_irq_number = 68, -+ .mmu_irq_number = 69, -+ .gpu_irq_number = 70, -+ .io_memory_region = { -+ .start = 0xFC010000, -+ .end = 0xFC010000 + (4096 * 4) - 1 -+ } -+}; -+#endif /* CONFIG_OF */ ++#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ) ++#define _TRACE_MALI_H + -+static int pm_callback_power_on(struct kbase_device *kbdev) -+{ -+ /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ -+ return 1; -+} ++#include + -+static void pm_callback_power_off(struct kbase_device *kbdev) -+{ -+} ++#if defined(CONFIG_MALI_BIFROST_GATOR_SUPPORT) ++#define MALI_JOB_SLOTS_EVENT_CHANGED + -+struct kbase_pm_callback_conf pm_callbacks = { -+ .power_on_callback = pm_callback_power_on, -+ .power_off_callback = pm_callback_power_off, -+ .power_suspend_callback = NULL, -+ .power_resume_callback = NULL -+}; ++/* ++ * mali_job_slots_event - Reports change of job slot status. ++ * @gpu_id: Kbase device id ++ * @event_id: ORed together bitfields representing a type of event, ++ * made with the GATOR_MAKE_EVENT() macro. ++ */ ++TRACE_EVENT(mali_job_slots_event, ++ TP_PROTO(u32 gpu_id, u32 event_id, u32 tgid, u32 pid, ++ u8 job_id), ++ TP_ARGS(gpu_id, event_id, tgid, pid, job_id), ++ TP_STRUCT__entry( ++ __field(u32, gpu_id) ++ __field(u32, event_id) ++ __field(u32, tgid) ++ __field(u32, pid) ++ __field(u8, job_id) ++ ), ++ TP_fast_assign( ++ __entry->gpu_id = gpu_id; ++ __entry->event_id = event_id; ++ __entry->tgid = tgid; ++ __entry->pid = pid; ++ __entry->job_id = job_id; ++ ), ++ TP_printk("gpu=%u event=%u tgid=%u pid=%u job_id=%u", ++ __entry->gpu_id, __entry->event_id, ++ __entry->tgid, __entry->pid, __entry->job_id) ++); + -+static struct kbase_platform_config versatile_platform_config = { -+#ifndef CONFIG_OF -+ .io_resources = &io_resources -+#endif -+}; ++/** ++ * mali_pm_status - Reports change of power management status. ++ * @gpu_id: Kbase device id ++ * @event_id: Core type (shader, tiler, L2 cache) ++ * @value: 64bits bitmask reporting either power status of ++ * the cores (1-ON, 0-OFF) ++ */ ++TRACE_EVENT(mali_pm_status, ++ TP_PROTO(u32 gpu_id, u32 event_id, u64 value), ++ TP_ARGS(gpu_id, event_id, value), ++ TP_STRUCT__entry( ++ __field(u32, gpu_id) ++ __field(u32, event_id) ++ __field(u64, value) ++ ), ++ TP_fast_assign( ++ __entry->gpu_id = gpu_id; ++ __entry->event_id = event_id; ++ __entry->value = value; ++ ), ++ TP_printk("gpu=%u event %u = %llu", ++ __entry->gpu_id, __entry->event_id, __entry->value) ++); + -+struct kbase_platform_config *kbase_get_platform_config(void) -+{ -+ return &versatile_platform_config; -+} ++/** ++ * mali_page_fault_insert_pages - Reports an MMU page fault ++ * resulting in new pages being mapped. ++ * @gpu_id: Kbase device id ++ * @event_id: MMU address space number ++ * @value: Number of newly allocated pages ++ */ ++TRACE_EVENT(mali_page_fault_insert_pages, ++ TP_PROTO(u32 gpu_id, s32 event_id, u64 value), ++ TP_ARGS(gpu_id, event_id, value), ++ TP_STRUCT__entry( ++ __field(u32, gpu_id) ++ __field(s32, event_id) ++ __field(u64, value) ++ ), ++ TP_fast_assign( ++ __entry->gpu_id = gpu_id; ++ __entry->event_id = event_id; ++ __entry->value = value; ++ ), ++ TP_printk("gpu=%u event %d = %llu", ++ __entry->gpu_id, __entry->event_id, __entry->value) ++); + -+#ifdef CONFIG_MALI_BIFROST_DVFS -+#if MALI_USE_CSF -+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) -+#else -+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) -+#endif -+{ -+ return 1; -+} -+#endif /* CONFIG_MALI_BIFROST_DVFS */ -diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/Kbuild b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/Kbuild -new file mode 100755 -index 000000000..e1398fde3 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/Kbuild -@@ -0,0 +1,23 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2012-2013, 2016-2017, 2020-2021 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++/** ++ * mali_total_alloc_pages_change - Reports that the total number of ++ * allocated pages has changed. ++ * @gpu_id: Kbase device id ++ * @event_id: Total number of pages allocated ++ */ ++TRACE_EVENT(mali_total_alloc_pages_change, ++ TP_PROTO(u32 gpu_id, s64 event_id), ++ TP_ARGS(gpu_id, event_id), ++ TP_STRUCT__entry( ++ __field(u32, gpu_id) ++ __field(s64, event_id) ++ ), ++ TP_fast_assign( ++ __entry->gpu_id = gpu_id; ++ __entry->event_id = event_id; ++ ), ++ TP_printk("gpu=%u event=%lld", __entry->gpu_id, __entry->event_id) ++); ++#endif /* CONFIG_MALI_BIFROST_GATOR_SUPPORT */ + -+bifrost_kbase-y += \ -+ platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ -+ mali_kbase_platform_fake.o -diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h -new file mode 100644 -index 000000000..28f453161 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h -@@ -0,0 +1,38 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* ++ * MMU subsystem tracepoints ++ */ ++ ++/* Fault status and exception code helpers + * -+ * (C) COPYRIGHT 2014-2017, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * Must be macros to allow use by user-side tracepoint tools + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * bits 0:1 masked off code, and used for the level + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * Tracepoint files get included more than once - protect against multiple ++ * definition ++ */ ++#ifndef __TRACE_MALI_MMU_HELPERS ++#define __TRACE_MALI_MMU_HELPERS ++/* Complex macros should be enclosed in parenthesis. + * ++ * We need to have those parentheses removed for our arrays of symbolic look-ups ++ * for __print_symbolic() whilst also being able to use them outside trace code + */ ++#define _ENSURE_PARENTHESIS(args...) args + -+/** -+ * POWER_MANAGEMENT_CALLBACKS - Power management configuration ++#define KBASE_MMU_FAULT_CODE_EXCEPTION_NAME_PRINT(code) \ ++ (!KBASE_MMU_FAULT_CODE_VALID(code) ? "UNKNOWN,level=" : \ ++ __print_symbolic(((code) & ~3u), \ ++ KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS)) ++#define KBASE_MMU_FAULT_CODE_LEVEL(code) \ ++ (((((code) & ~0x3u) == 0xC4) ? 4 : 0) + ((code) & 0x3u)) ++ ++#define KBASE_MMU_FAULT_STATUS_CODE(status) \ ++ ((status) & 0xFFu) ++#define KBASE_MMU_FAULT_STATUS_DECODED_STRING(status) \ ++ (((status) & (1u << 10)) ? "DECODER_FAULT" : "SLAVE_FAULT") ++ ++#define KBASE_MMU_FAULT_STATUS_EXCEPTION_NAME_PRINT(status) \ ++ KBASE_MMU_FAULT_CODE_EXCEPTION_NAME_PRINT( \ ++ KBASE_MMU_FAULT_STATUS_CODE(status)) ++ ++#define KBASE_MMU_FAULT_STATUS_LEVEL(status) \ ++ KBASE_MMU_FAULT_CODE_LEVEL(KBASE_MMU_FAULT_STATUS_CODE(status)) ++ ++#define KBASE_MMU_FAULT_STATUS_ACCESS(status) \ ++ ((status) & AS_FAULTSTATUS_ACCESS_TYPE_MASK) ++#define KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ ++ {AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC, "ATOMIC" }, \ ++ {AS_FAULTSTATUS_ACCESS_TYPE_EX, "EXECUTE"}, \ ++ {AS_FAULTSTATUS_ACCESS_TYPE_READ, "READ" }, \ ++ {AS_FAULTSTATUS_ACCESS_TYPE_WRITE, "WRITE" }) ++#define KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(status) \ ++ __print_symbolic(KBASE_MMU_FAULT_STATUS_ACCESS(status), \ ++ KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS) ++ ++#if MALI_USE_CSF ++#define KBASE_MMU_FAULT_CODE_VALID(code) \ ++ ((code >= 0xC0 && code <= 0xEB) && \ ++ (!(code >= 0xC5 && code <= 0xC7)) && \ ++ (!(code >= 0xCC && code <= 0xD8)) && \ ++ (!(code >= 0xDC && code <= 0xDF)) && \ ++ (!(code >= 0xE1 && code <= 0xE3))) ++#define KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ ++ {0xC0, "TRANSLATION_FAULT_" }, \ ++ {0xC4, "TRANSLATION_FAULT_" }, \ ++ {0xC8, "PERMISSION_FAULT_" }, \ ++ {0xD0, "TRANSTAB_BUS_FAULT_" }, \ ++ {0xD8, "ACCESS_FLAG_" }, \ ++ {0xE0, "ADDRESS_SIZE_FAULT_IN" }, \ ++ {0xE4, "ADDRESS_SIZE_FAULT_OUT" }, \ ++ {0xE8, "MEMORY_ATTRIBUTES_FAULT_" }) ++#else /* MALI_USE_CSF */ ++#define KBASE_MMU_FAULT_CODE_VALID(code) \ ++ ((code >= 0xC0 && code <= 0xEF) && \ ++ (!(code >= 0xC5 && code <= 0xC6)) && \ ++ (!(code >= 0xCC && code <= 0xCF)) && \ ++ (!(code >= 0xD4 && code <= 0xD7)) && \ ++ (!(code >= 0xDC && code <= 0xDF))) ++#define KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ ++ {0xC0, "TRANSLATION_FAULT_" }, \ ++ {0xC4, "TRANSLATION_FAULT(_7==_IDENTITY)_" }, \ ++ {0xC8, "PERMISSION_FAULT_" }, \ ++ {0xD0, "TRANSTAB_BUS_FAULT_" }, \ ++ {0xD8, "ACCESS_FLAG_" }, \ ++ {0xE0, "ADDRESS_SIZE_FAULT_IN" }, \ ++ {0xE4, "ADDRESS_SIZE_FAULT_OUT" }, \ ++ {0xE8, "MEMORY_ATTRIBUTES_FAULT_" }, \ ++ {0xEC, "MEMORY_ATTRIBUTES_NONCACHEABLE_" }) ++#endif /* MALI_USE_CSF */ ++#endif /* __TRACE_MALI_MMU_HELPERS */ ++ ++/* trace_mali_mmu_page_fault_grow + * -+ * Attached value: pointer to @ref kbase_pm_callback_conf -+ * Default value: See @ref kbase_pm_callback_conf ++ * Tracepoint about a successful grow of a region due to a GPU page fault + */ -+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) ++TRACE_EVENT(mali_mmu_page_fault_grow, ++ TP_PROTO(struct kbase_va_region *reg, struct kbase_fault *fault, ++ size_t new_pages), ++ TP_ARGS(reg, fault, new_pages), ++ TP_STRUCT__entry( ++ __field(u64, start_addr) ++ __field(u64, fault_addr) ++ __field(u64, fault_extra_addr) ++ __field(size_t, new_pages) ++ __field(u32, status) ++ ), ++ TP_fast_assign( ++ __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; ++ __entry->fault_addr = fault->addr; ++ __entry->fault_extra_addr = fault->extra_addr; ++ __entry->new_pages = new_pages; ++ __entry->status = fault->status; ++ ), ++ TP_printk("start=0x%llx fault_addr=0x%llx fault_extra_addr=0x%llx new_pages=%zu raw_fault_status=0x%x decoded_faultstatus=%s exception_type=0x%x,%s%u access_type=0x%x,%s source_id=0x%x", ++ __entry->start_addr, __entry->fault_addr, ++ __entry->fault_extra_addr, __entry->new_pages, ++ __entry->status, ++ KBASE_MMU_FAULT_STATUS_DECODED_STRING(__entry->status), ++ KBASE_MMU_FAULT_STATUS_CODE(__entry->status), ++ KBASE_MMU_FAULT_STATUS_EXCEPTION_NAME_PRINT(__entry->status), ++ KBASE_MMU_FAULT_STATUS_LEVEL(__entry->status), ++ KBASE_MMU_FAULT_STATUS_ACCESS(__entry->status) >> 8, ++ KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(__entry->status), ++ __entry->status >> 16) ++); + -+/** -+ * PLATFORM_FUNCS - Platform specific configuration functions ++ ++ ++ ++/* ++ * Just-in-time memory allocation subsystem tracepoints ++ */ ++ ++/* Just-in-time memory allocation soft-job template. Override the TP_printk ++ * further if need be. jit_id can be 0. ++ */ ++DECLARE_EVENT_CLASS(mali_jit_softjob_template, ++ TP_PROTO(struct kbase_va_region *reg, u8 jit_id), ++ TP_ARGS(reg, jit_id), ++ TP_STRUCT__entry( ++ __field(u64, start_addr) ++ __field(size_t, nr_pages) ++ __field(size_t, backed_pages) ++ __field(u8, jit_id) ++ ), ++ TP_fast_assign( ++ __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; ++ __entry->nr_pages = reg->nr_pages; ++ __entry->backed_pages = kbase_reg_current_backed_size(reg); ++ __entry->jit_id = jit_id; ++ ), ++ TP_printk("jit_id=%u start=0x%llx va_pages=0x%zx backed_size=0x%zx", ++ __entry->jit_id, __entry->start_addr, __entry->nr_pages, ++ __entry->backed_pages) ++); ++ ++/* trace_mali_jit_alloc() + * -+ * Attached value: pointer to @ref kbase_platform_funcs_conf -+ * Default value: See @ref kbase_platform_funcs_conf ++ * Tracepoint about a just-in-time memory allocation soft-job successfully ++ * allocating memory + */ -+#define PLATFORM_FUNCS (NULL) ++DEFINE_EVENT(mali_jit_softjob_template, mali_jit_alloc, ++ TP_PROTO(struct kbase_va_region *reg, u8 jit_id), ++ TP_ARGS(reg, jit_id)); + -+extern struct kbase_pm_callback_conf pm_callbacks; -diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c -new file mode 100644 -index 000000000..835b7587c ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c -@@ -0,0 +1,77 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* ++/* trace_mali_jit_free() + * -+ * (C) COPYRIGHT 2011-2014, 2017, 2020-2021 ARM Limited. All rights reserved. ++ * Tracepoint about memory that was allocated just-in-time being freed ++ * (which may happen either on free soft-job, or during rollback error ++ * paths of an allocation soft-job, etc) + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * Free doesn't immediately have the just-in-time memory allocation ID so ++ * it's currently suppressed from the output - set jit_id to 0 ++ */ ++DEFINE_EVENT_PRINT(mali_jit_softjob_template, mali_jit_free, ++ TP_PROTO(struct kbase_va_region *reg, u8 jit_id), ++ TP_ARGS(reg, jit_id), ++ TP_printk("start=0x%llx va_pages=0x%zx backed_size=0x%zx", ++ __entry->start_addr, __entry->nr_pages, __entry->backed_pages)); ++ ++#if !MALI_USE_CSF ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++/* trace_mali_jit_report + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * Tracepoint about the GPU data structure read to form a just-in-time memory ++ * allocation report, and its calculated physical page usage ++ */ ++TRACE_EVENT(mali_jit_report, ++ TP_PROTO(struct kbase_jd_atom *katom, struct kbase_va_region *reg, ++ unsigned int id_idx, u64 read_val, u64 used_pages), ++ TP_ARGS(katom, reg, id_idx, read_val, used_pages), ++ TP_STRUCT__entry( ++ __field(u64, start_addr) ++ __field(u64, read_val) ++ __field(u64, used_pages) ++ __field(unsigned long, flags) ++ __field(u8, id_idx) ++ __field(u8, jit_id) ++ ), ++ TP_fast_assign( ++ __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; ++ __entry->read_val = read_val; ++ __entry->used_pages = used_pages; ++ __entry->flags = reg->flags; ++ __entry->id_idx = id_idx; ++ __entry->jit_id = katom->jit_ids[id_idx]; ++ ), ++ TP_printk("start=0x%llx jit_ids[%u]=%u read_type='%s' read_val=0x%llx used_pages=%llu", ++ __entry->start_addr, __entry->id_idx, __entry->jit_id, ++ __print_symbolic(__entry->flags, ++ { 0, "address"}, ++ { KBASE_REG_TILER_ALIGN_TOP, "address with align" }, ++ { KBASE_REG_HEAP_INFO_IS_SIZE, "size" }, ++ { KBASE_REG_HEAP_INFO_IS_SIZE | ++ KBASE_REG_TILER_ALIGN_TOP, ++ "size with align (invalid)" } ++ ), ++ __entry->read_val, __entry->used_pages) ++); ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++#endif /* !MALI_USE_CSF */ ++ ++TRACE_DEFINE_ENUM(KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++/* trace_mali_jit_report_pressure + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * Tracepoint about change in physical memory pressure, due to the information ++ * about a region changing. Examples include: ++ * - a report on a region that was allocated just-in-time ++ * - just-in-time allocation of a region ++ * - free of a region that was allocated just-in-time ++ */ ++TRACE_EVENT(mali_jit_report_pressure, ++ TP_PROTO(struct kbase_va_region *reg, u64 new_used_pages, ++ u64 new_pressure, unsigned int flags), ++ TP_ARGS(reg, new_used_pages, new_pressure, flags), ++ TP_STRUCT__entry( ++ __field(u64, start_addr) ++ __field(u64, used_pages) ++ __field(u64, new_used_pages) ++ __field(u64, new_pressure) ++ __field(unsigned int, flags) ++ ), ++ TP_fast_assign( ++ __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; ++ __entry->used_pages = reg->used_pages; ++ __entry->new_used_pages = new_used_pages; ++ __entry->new_pressure = new_pressure; ++ __entry->flags = flags; ++ ), ++ TP_printk("start=0x%llx old_used_pages=%llu new_used_pages=%llu new_pressure=%llu report_flags=%s", ++ __entry->start_addr, __entry->used_pages, ++ __entry->new_used_pages, __entry->new_pressure, ++ __print_flags(__entry->flags, "|", ++ { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE, ++ "HAPPENED_ON_ALLOC_OR_FREE" })) ++); ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ ++#ifndef __TRACE_SYSGRAPH_ENUM ++#define __TRACE_SYSGRAPH_ENUM ++/* Enum of sysgraph message IDs */ ++enum sysgraph_msg { ++ SGR_ARRIVE, ++ SGR_SUBMIT, ++ SGR_COMPLETE, ++ SGR_POST, ++ SGR_ACTIVE, ++ SGR_INACTIVE ++}; ++#endif /* __TRACE_SYSGRAPH_ENUM */ ++ ++/* A template for SYSGRAPH events + * ++ * Most of the sysgraph events contain only one input argument ++ * which is atom_id therefore they will be using a common template + */ ++TRACE_EVENT(sysgraph, ++ TP_PROTO(enum sysgraph_msg message, unsigned int proc_id, ++ unsigned int atom_id), ++ TP_ARGS(message, proc_id, atom_id), ++ TP_STRUCT__entry( ++ __field(unsigned int, proc_id) ++ __field(enum sysgraph_msg, message) ++ __field(unsigned int, atom_id) ++ ), ++ TP_fast_assign( ++ __entry->proc_id = proc_id; ++ __entry->message = message; ++ __entry->atom_id = atom_id; ++ ), ++ TP_printk("msg=%u proc_id=%u, param1=%d", __entry->message, ++ __entry->proc_id, __entry->atom_id) ++); + -+#include -+#include -+#include -+#include ++/* A template for SYSGRAPH GPU events ++ * ++ * Sysgraph events that record start/complete events ++ * on GPU also record a js value in addition to the ++ * atom id. ++ */ ++TRACE_EVENT(sysgraph_gpu, ++ TP_PROTO(enum sysgraph_msg message, unsigned int proc_id, ++ unsigned int atom_id, unsigned int js), ++ TP_ARGS(message, proc_id, atom_id, js), ++ TP_STRUCT__entry( ++ __field(unsigned int, proc_id) ++ __field(enum sysgraph_msg, message) ++ __field(unsigned int, atom_id) ++ __field(unsigned int, js) ++ ), ++ TP_fast_assign( ++ __entry->proc_id = proc_id; ++ __entry->message = message; ++ __entry->atom_id = atom_id; ++ __entry->js = js; ++ ), ++ TP_printk("msg=%u proc_id=%u, param1=%d, param2=%d", ++ __entry->message, __entry->proc_id, ++ __entry->atom_id, __entry->js) ++); + -+#include ++/* Tracepoint files get included more than once - protect against multiple ++ * definition ++ */ ++#undef KBASE_JIT_REPORT_GPU_MEM_SIZE + -+#ifndef CONFIG_OF -+static struct kbase_io_resources io_resources = { -+ .job_irq_number = 68, -+ .mmu_irq_number = 69, -+ .gpu_irq_number = 70, -+ .io_memory_region = { -+ .start = 0x2f010000, -+ .end = 0x2f010000 + (4096 * 4) - 1} -+}; -+#endif ++/* Size in bytes of the memory surrounding the location used for a just-in-time ++ * memory allocation report ++ */ ++#define KBASE_JIT_REPORT_GPU_MEM_SIZE (4 * sizeof(u64)) + -+static int pm_callback_power_on(struct kbase_device *kbdev) -+{ -+ /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ -+ return 1; -+} ++/* trace_mali_jit_report_gpu_mem ++ * ++ * Tracepoint about the GPU memory nearby the location used for a just-in-time ++ * memory allocation report ++ */ ++TRACE_EVENT(mali_jit_report_gpu_mem, ++ TP_PROTO(u64 base_addr, u64 reg_addr, u64 *gpu_mem, unsigned int flags), ++ TP_ARGS(base_addr, reg_addr, gpu_mem, flags), ++ TP_STRUCT__entry( ++ __field(u64, base_addr) ++ __field(u64, reg_addr) ++ __array(u64, mem_values, ++ KBASE_JIT_REPORT_GPU_MEM_SIZE / sizeof(u64)) ++ __field(unsigned int, flags) ++ ), ++ TP_fast_assign( ++ __entry->base_addr = base_addr; ++ __entry->reg_addr = reg_addr; ++ memcpy(__entry->mem_values, gpu_mem, ++ sizeof(__entry->mem_values)); ++ __entry->flags = flags; ++ ), ++ TP_printk("start=0x%llx read GPU memory base=0x%llx values=%s report_flags=%s", ++ __entry->reg_addr, __entry->base_addr, ++ __print_array(__entry->mem_values, ++ ARRAY_SIZE(__entry->mem_values), sizeof(u64)), ++ __print_flags(__entry->flags, "|", ++ { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE, ++ "HAPPENED_ON_ALLOC_OR_FREE" })) ++); + -+static void pm_callback_power_off(struct kbase_device *kbdev) -+{ -+} ++/* trace_mali_jit_trim_from_region ++ * ++ * Tracepoint about trimming physical pages from a region ++ */ ++TRACE_EVENT(mali_jit_trim_from_region, ++ TP_PROTO(struct kbase_va_region *reg, size_t freed_pages, ++ size_t old_pages, size_t available_pages, size_t new_pages), ++ TP_ARGS(reg, freed_pages, old_pages, available_pages, new_pages), ++ TP_STRUCT__entry( ++ __field(u64, start_addr) ++ __field(size_t, freed_pages) ++ __field(size_t, old_pages) ++ __field(size_t, available_pages) ++ __field(size_t, new_pages) ++ ), ++ TP_fast_assign( ++ __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; ++ __entry->freed_pages = freed_pages; ++ __entry->old_pages = old_pages; ++ __entry->available_pages = available_pages; ++ __entry->new_pages = new_pages; ++ ), ++ TP_printk("start=0x%llx freed_pages=%zu old_pages=%zu available_pages=%zu new_pages=%zu", ++ __entry->start_addr, __entry->freed_pages, __entry->old_pages, ++ __entry->available_pages, __entry->new_pages) ++); + -+struct kbase_pm_callback_conf pm_callbacks = { -+ .power_on_callback = pm_callback_power_on, -+ .power_off_callback = pm_callback_power_off, -+ .power_suspend_callback = NULL, -+ .power_resume_callback = NULL -+}; ++/* trace_mali_jit_trim ++ * ++ * Tracepoint about total trimmed physical pages ++ */ ++TRACE_EVENT(mali_jit_trim, ++ TP_PROTO(size_t freed_pages), ++ TP_ARGS(freed_pages), ++ TP_STRUCT__entry( ++ __field(size_t, freed_pages) ++ ), ++ TP_fast_assign( ++ __entry->freed_pages = freed_pages; ++ ), ++ TP_printk("freed_pages=%zu", __entry->freed_pages) ++); + -+static struct kbase_platform_config versatile_platform_config = { -+#ifndef CONFIG_OF -+ .io_resources = &io_resources -+#endif -+}; ++#include "debug/mali_kbase_debug_linux_ktrace.h" + -+struct kbase_platform_config *kbase_get_platform_config(void) -+{ -+ return &versatile_platform_config; -+} ++#endif /* _TRACE_MALI_H */ + -+#ifdef CONFIG_MALI_BIFROST_DVFS -+#if MALI_USE_CSF -+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) -+#else -+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) -+#endif -+{ -+ return 1; -+} -+#endif /* CONFIG_MALI_BIFROST_DVFS */ -diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/Kbuild b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/Kbuild -new file mode 100755 -index 000000000..10f7dc8cf ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/Kbuild -@@ -0,0 +1,24 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2012-2013, 2016-2017, 2021 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++#undef TRACE_INCLUDE_PATH ++/* lwn.net/Articles/383362 suggests this should remain as '.', and instead ++ * extend CFLAGS ++ */ ++#define TRACE_INCLUDE_PATH . ++#undef TRACE_INCLUDE_FILE ++#define TRACE_INCLUDE_FILE mali_linux_trace + -+bifrost_kbase-y += \ -+ platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ -+ platform/$(MALI_PLATFORM_DIR)/mali_kbase_cpu_vexpress.o \ -+ mali_kbase_platform_fake.o -diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h ++/* This part must be outside protection */ ++#include +diff --git a/drivers/gpu/arm/bifrost/mali_malisw.h b/drivers/gpu/arm/bifrost/mali_malisw.h new file mode 100644 -index 000000000..28f453161 +index 000000000..d9db189e8 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h -@@ -0,0 +1,38 @@ ++++ b/drivers/gpu/arm/bifrost/mali_malisw.h +@@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2014-2017, 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2015, 2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -261391,33 +261322,103 @@ index 000000000..28f453161 + * + */ + ++/* ++ * Kernel-wide include for common macros and types. ++ */ ++ ++#ifndef _MALISW_H_ ++#define _MALISW_H_ ++ ++#include ++ +/** -+ * POWER_MANAGEMENT_CALLBACKS - Power management configuration ++ * MIN - Return the lesser of two values. ++ * @x: value1 ++ * @y: value2 + * -+ * Attached value: pointer to @ref kbase_pm_callback_conf -+ * Default value: See @ref kbase_pm_callback_conf ++ * As a macro it may evaluate its arguments more than once. ++ * Refer to MAX macro for more details + */ -+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) ++#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +/** -+ * PLATFORM_FUNCS - Platform specific configuration functions ++ * MAX - Return the greater of two values. ++ * @x: value1 ++ * @y: value2 + * -+ * Attached value: pointer to @ref kbase_platform_funcs_conf -+ * Default value: See @ref kbase_platform_funcs_conf ++ * As a macro it may evaluate its arguments more than once. ++ * If called on the same two arguments as MIN it is guaranteed to return ++ * the one that MIN didn't return. This is significant for types where not ++ * all values are comparable e.g. NaNs in floating-point types. But if you want ++ * to retrieve the min and max of two values, consider using a conditional swap ++ * instead. + */ -+#define PLATFORM_FUNCS (NULL) ++#define MAX(x, y) ((x) < (y) ? (y) : (x)) + -+extern struct kbase_pm_callback_conf pm_callbacks; -diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c ++/** ++ * CSTD_UNUSED - Function-like macro for suppressing unused variable warnings. ++ * ++ * @x: unused variable ++ * ++ * Where possible such variables should be removed; this macro is present for ++ * cases where we much support API backwards compatibility. ++ */ ++#define CSTD_UNUSED(x) ((void)(x)) ++ ++/** ++ * CSTD_NOP - Function-like macro for use where "no behavior" is desired. ++ * @...: no-op ++ * ++ * This is useful when compile time macros turn a function-like macro in to a ++ * no-op, but where having no statement is otherwise invalid. ++ */ ++#define CSTD_NOP(...) ((void)#__VA_ARGS__) ++ ++/** ++ * CSTD_STR1 - Function-like macro for stringizing a single level macro. ++ * @x: macro's value ++ * ++ * @code ++ * #define MY_MACRO 32 ++ * CSTD_STR1( MY_MACRO ) ++ * > "MY_MACRO" ++ * @endcode ++ */ ++#define CSTD_STR1(x) #x ++ ++/** ++ * CSTD_STR2 - Function-like macro for stringizing a macro's value. ++ * @x: macro's value ++ * ++ * This should not be used if the macro is defined in a way which may have no ++ * value; use the alternative @c CSTD_STR2N macro should be used instead. ++ * @code ++ * #define MY_MACRO 32 ++ * CSTD_STR2( MY_MACRO ) ++ * > "32" ++ * @endcode ++ */ ++#define CSTD_STR2(x) CSTD_STR1(x) ++ ++ #ifndef fallthrough ++ #define fallthrough __fallthrough ++ #endif /* fallthrough */ ++ ++#ifndef __fallthrough ++#define __fallthrough __attribute__((fallthrough)) ++#endif /* __fallthrough */ ++ ++#endif /* _MALISW_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.c b/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.c new file mode 100644 -index 000000000..8be30fb25 +index 000000000..1db3abe2f --- /dev/null -+++ b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c -@@ -0,0 +1,77 @@ ++++ b/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.c +@@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2011-2014, 2017, 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -261435,72 +261436,21 @@ index 000000000..8be30fb25 + * + */ + -+#include -+#include -+#include -+#include -+ -+#include -+ -+#ifndef CONFIG_OF -+static struct kbase_io_resources io_resources = { -+ .job_irq_number = 75, -+ .mmu_irq_number = 76, -+ .gpu_irq_number = 77, -+ .io_memory_region = { -+ .start = 0x2F000000, -+ .end = 0x2F000000 + (4096 * 4) - 1} -+}; -+#endif -+ -+static int pm_callback_power_on(struct kbase_device *kbdev) -+{ -+ /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ -+ return 1; -+} -+ -+static void pm_callback_power_off(struct kbase_device *kbdev) -+{ -+} -+ -+struct kbase_pm_callback_conf pm_callbacks = { -+ .power_on_callback = pm_callback_power_on, -+ .power_off_callback = pm_callback_power_off, -+ .power_suspend_callback = NULL, -+ .power_resume_callback = NULL -+}; -+ -+static struct kbase_platform_config versatile_platform_config = { -+#ifndef CONFIG_OF -+ .io_resources = &io_resources -+#endif -+}; -+ -+struct kbase_platform_config *kbase_get_platform_config(void) -+{ -+ return &versatile_platform_config; -+} -+ -+#ifdef CONFIG_MALI_BIFROST_DVFS -+#if MALI_USE_CSF -+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) -+#else -+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) ++/* Create the trace point if not configured in kernel */ ++#ifndef CONFIG_TRACE_POWER_GPU_FREQUENCY ++#define CREATE_TRACE_POINTS ++#include "mali_power_gpu_frequency_trace.h" +#endif -+{ -+ return 1; -+} -+#endif /* CONFIG_MALI_BIFROST_DVFS */ -diff --git a/drivers/gpu/arm/bifrost/protected_mode_switcher.h b/drivers/gpu/arm/bifrost/protected_mode_switcher.h +diff --git a/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.h b/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.h new file mode 100644 -index 000000000..9dd9253c7 +index 000000000..f156650a4 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/protected_mode_switcher.h -@@ -0,0 +1,56 @@ ++++ b/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.h +@@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -261518,87 +261468,62 @@ index 000000000..9dd9253c7 + * + */ + -+#ifndef _PROTECTED_MODE_SWITCH_H_ -+#define _PROTECTED_MODE_SWITCH_H_ ++#ifndef _TRACE_POWER_GPU_FREQUENCY_MALI ++#define _TRACE_POWER_GPU_FREQUENCY_MALI ++#endif + -+struct protected_mode_device; ++#undef TRACE_SYSTEM ++#define TRACE_SYSTEM power ++#undef TRACE_INCLUDE_FILE ++#define TRACE_INCLUDE_FILE mali_power_gpu_frequency_trace ++#undef TRACE_INCLUDE_PATH ++#define TRACE_INCLUDE_PATH . + -+/** -+ * struct protected_mode_ops - Callbacks for protected mode switch operations -+ * -+ * @protected_mode_enable: Callback to enable protected mode for device, and -+ * reset device -+ * Returns 0 on success, non-zero on error -+ * @protected_mode_disable: Callback to disable protected mode for device -+ * Returns 0 on success, non-zero on error -+ */ -+struct protected_mode_ops { -+ int (*protected_mode_enable)( -+ struct protected_mode_device *protected_dev); -+ int (*protected_mode_disable)( -+ struct protected_mode_device *protected_dev); -+}; ++#if !defined(_TRACE_POWER_GPU_FREQUENCY_H) || defined(TRACE_HEADER_MULTI_READ) ++#define _TRACE_POWER_GPU_FREQUENCY_H + -+/** -+ * struct protected_mode_device - Device structure for protected mode devices -+ * @ops: Callbacks associated with this device -+ * @data: Pointer to device private data -+ * -+ * This structure should be registered with the platform device using -+ * platform_set_drvdata(). -+ */ -+struct protected_mode_device { -+ struct protected_mode_ops ops; -+ void *data; -+}; ++#include + -+#endif /* _PROTECTED_MODE_SWITCH_H_ */ -diff --git a/drivers/gpu/arm/bifrost/tests/Kbuild b/drivers/gpu/arm/bifrost/tests/Kbuild -new file mode 100755 -index 000000000..38e4dd4d7 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/Kbuild -@@ -0,0 +1,31 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++DECLARE_EVENT_CLASS(gpu, + -+ccflags-y += -I$(src)/include \ -+ -I$(src) ++ TP_PROTO(unsigned int state, unsigned int gpu_id), + -+subdir-ccflags-y += -I$(src)/include \ -+ -I$(src) ++ TP_ARGS(state, gpu_id), + -+obj-$(CONFIG_MALI_KUTF) += kutf/ -+obj-$(CONFIG_MALI_KUTF_IRQ_TEST) += mali_kutf_irq_test/ -+obj-$(CONFIG_MALI_KUTF_CLK_RATE_TRACE) += mali_kutf_clk_rate_trace/kernel/ -+obj-$(CONFIG_MALI_KUTF_MGM_INTEGRATION) += mali_kutf_mgm_integration_test/ ++ TP_STRUCT__entry( ++ __field(u32, state) ++ __field(u32, gpu_id) ++ ), + -diff --git a/drivers/gpu/arm/bifrost/tests/Kconfig b/drivers/gpu/arm/bifrost/tests/Kconfig -new file mode 100644 -index 000000000..e9fe22771 ++ TP_fast_assign( ++ __entry->state = state; ++ __entry->gpu_id = gpu_id; ++ ), ++ ++ TP_printk("state=%lu gpu_id=%lu", (unsigned long)__entry->state, ++ (unsigned long)__entry->gpu_id) ++); ++ ++DEFINE_EVENT(gpu, gpu_frequency, ++ ++ TP_PROTO(unsigned int frequency, unsigned int gpu_id), ++ ++ TP_ARGS(frequency, gpu_id) ++); ++ ++#endif /* _TRACE_POWER_GPU_FREQUENCY_H */ ++ ++/* This part must be outside protection */ ++#include +diff --git a/drivers/gpu/arm/bifrost/mmu/Kbuild b/drivers/gpu/arm/bifrost/mmu/Kbuild +new file mode 100755 +index 000000000..416432397 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/Kconfig -@@ -0,0 +1,69 @@ ++++ b/drivers/gpu/arm/bifrost/mmu/Kbuild +@@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# -+# (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software @@ -261616,65 +261541,26 @@ index 000000000..e9fe22771 +# +# + -+menuconfig MALI_KUTF -+ bool "Build Mali Kernel Unit Test Framework modules" -+ depends on MALI_BIFROST && MALI_BIFROST_DEBUG -+ default y if MALI_BIFROST_DEBUG -+ help -+ This option will build the Mali testing framework modules. -+ -+ Modules: -+ - kutf.ko -+ - kutf_test.ko -+ -+config MALI_KUTF_IRQ_TEST -+ bool "Build Mali KUTF IRQ test module" -+ depends on MALI_KUTF -+ default y -+ help -+ This option will build the IRQ latency measurement test module. -+ It can determine the latency of the Mali GPU IRQ on your system. -+ -+ Modules: -+ - mali_kutf_irq_test.ko -+ -+config MALI_KUTF_CLK_RATE_TRACE -+ bool "Build Mali KUTF Clock rate trace test module" -+ depends on MALI_KUTF -+ default y -+ help -+ This option will build the clock rate trace portal test module. -+ It can test the clocks integration into the platform and exercise some -+ basic trace test in the system. -+ -+ Modules: -+ - mali_kutf_clk_rate_trace_test_portal.ko -+ -+config MALI_KUTF_MGM_INTEGRATION_TEST -+ bool "Build Mali KUTF MGM integration test module" -+ depends on MALI_KUTF -+ default y -+ help -+ This option will build the MGM integration test module. -+ It can test the implementation of PTE translation for specific -+ group ids. -+ -+ Modules: -+ - mali_kutf_mgm_integration_test.ko -+ ++bifrost_kbase-y += \ ++ mmu/mali_kbase_mmu.o \ ++ mmu/mali_kbase_mmu_hw_direct.o \ ++ mmu/mali_kbase_mmu_mode_aarch64.o + -+comment "Enable MALI_BIFROST_DEBUG for KUTF modules support" -+ depends on MALI_BIFROST && !MALI_BIFROST_DEBUG && MALI_KUTF -diff --git a/drivers/gpu/arm/bifrost/tests/build.bp b/drivers/gpu/arm/bifrost/tests/build.bp -new file mode 100755 -index 000000000..5581ba934 ++ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) ++ bifrost_kbase-y += mmu/backend/mali_kbase_mmu_csf.o ++else ++ bifrost_kbase-y += mmu/backend/mali_kbase_mmu_jm.o ++endif +diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c +new file mode 100644 +index 000000000..4cac7876f --- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/build.bp -@@ -0,0 +1,46 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c +@@ -0,0 +1,572 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -261692,424 +261578,567 @@ index 000000000..5581ba934 + * + */ + -+bob_defaults { -+ name: "kernel_test_includes", -+ local_include_dirs: [ -+ "include", -+ "./../../", -+ "./../", -+ "./", -+ ], -+} ++/** ++ * DOC: Base kernel MMU management specific for CSF GPU. ++ */ + -+bob_defaults { -+ name: "kernel_test_configs", -+ mali_kutf: { -+ kbuild_options: ["CONFIG_MALI_KUTF=y"], -+ }, -+ unit_test_kernel_modules: { -+ kbuild_options: ["CONFIG_UNIT_TEST_KERNEL_MODULES=y"], -+ }, -+} ++#include ++#include ++#include ++#include ++#include ++#include + -+bob_defaults { -+ name: "kernel_unit_tests", -+ add_to_alias: ["unit_tests"], -+ srcs: [".*_unit_test/"], ++void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, ++ struct kbase_mmu_setup * const setup) ++{ ++ /* Set up the required caching policies at the correct indices ++ * in the memattr register. ++ */ ++ setup->memattr = ++ (AS_MEMATTR_IMPL_DEF_CACHE_POLICY << ++ (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | ++ (AS_MEMATTR_FORCE_TO_CACHE_ALL << ++ (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | ++ (AS_MEMATTR_WRITE_ALLOC << ++ (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | ++ (AS_MEMATTR_AARCH64_OUTER_IMPL_DEF << ++ (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | ++ (AS_MEMATTR_AARCH64_OUTER_WA << ++ (AS_MEMATTR_INDEX_OUTER_WA * 8)) | ++ (AS_MEMATTR_AARCH64_NON_CACHEABLE << ++ (AS_MEMATTR_INDEX_NON_CACHEABLE * 8)) | ++ (AS_MEMATTR_AARCH64_SHARED << ++ (AS_MEMATTR_INDEX_SHARED * 8)); ++ ++ setup->transtab = (u64)mmut->pgd & AS_TRANSTAB_BASE_MASK; ++ setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K; +} -diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h -new file mode 100644 -index 000000000..3f68efa42 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h -@@ -0,0 +1,109 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ ++/** ++ * submit_work_pagefault() - Submit a work for MMU page fault. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * @kbdev: Kbase device pointer ++ * @as_nr: Faulty address space ++ * @fault: Data relating to the fault + * ++ * This function submits a work for reporting the details of MMU fault. + */ ++static void submit_work_pagefault(struct kbase_device *kbdev, u32 as_nr, ++ struct kbase_fault *fault) ++{ ++ unsigned long flags; ++ struct kbase_as *const as = &kbdev->as[as_nr]; ++ struct kbase_context *kctx; + -+#ifndef _KERNEL_UTF_HELPERS_H_ -+#define _KERNEL_UTF_HELPERS_H_ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as_nr); + -+/* kutf_helpers.h -+ * Test helper functions for the kernel UTF test infrastructure. -+ * -+ * These functions provide methods for enqueuing/dequeuing lines of text sent -+ * by user space. They are used to implement the transfer of "userdata" from -+ * user space to kernel. -+ */ ++ if (kctx) { ++ kbase_ctx_sched_retain_ctx_refcount(kctx); + -+#include -+#include ++ as->pf_data = (struct kbase_fault) { ++ .status = fault->status, ++ .addr = fault->addr, ++ }; + -+/** -+ * kutf_helper_pending_input() - Check any pending lines sent by user space -+ * @context: KUTF context -+ * -+ * Return: true if there are pending lines, otherwise false -+ */ -+bool kutf_helper_pending_input(struct kutf_context *context); ++ /* ++ * A page fault work item could already be pending for the ++ * context's address space, when the page fault occurs for ++ * MCU's address space. ++ */ ++ if (!queue_work(as->pf_wq, &as->work_pagefault)) { ++ dev_dbg(kbdev->dev, ++ "Page fault is already pending for as %u", as_nr); ++ kbase_ctx_sched_release_ctx(kctx); ++ } else { ++ atomic_inc(&kbdev->faults_pending); ++ } ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} + -+/** -+ * kutf_helper_input_dequeue() - Dequeue a line sent by user space -+ * @context: KUTF context -+ * @str_size: Pointer to an integer to receive the size of the string -+ * -+ * If no line is available then this function will wait (interruptibly) until -+ * a line is available. -+ * -+ * Return: The line dequeued, ERR_PTR(-EINTR) if interrupted or NULL on end -+ * of data. -+ */ -+char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size); ++void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, ++ struct kbase_fault *fault) ++{ ++ /* decode the fault status */ ++ u32 exception_type = fault->status & 0xFF; ++ u32 access_type = (fault->status >> 8) & 0x3; ++ u32 source_id = (fault->status >> 16); ++ int as_no; + -+/** -+ * kutf_helper_input_enqueue() - Enqueue a line sent by user space -+ * @context: KUTF context -+ * @str: The user space address of the line -+ * @size: The length in bytes of the string -+ * -+ * This function will use copy_from_user to copy the string out of user space. -+ * The string need not be NULL-terminated (@size should not include the NULL -+ * termination). -+ * -+ * As a special case @str==NULL and @size==0 is valid to mark the end of input, -+ * but callers should use kutf_helper_input_enqueue_end_of_data() instead. -+ * -+ * Return: 0 on success, -EFAULT if the line cannot be copied from user space, -+ * -ENOMEM if out of memory. -+ */ -+int kutf_helper_input_enqueue(struct kutf_context *context, -+ const char __user *str, size_t size); ++ /* terminal fault, print info about the fault */ ++ dev_err(kbdev->dev, ++ "Unexpected Page fault in firmware address space at VA 0x%016llX\n" ++ "raw fault status: 0x%X\n" ++ "exception type 0x%X: %s\n" ++ "access type 0x%X: %s\n" ++ "source id 0x%X\n", ++ fault->addr, ++ fault->status, ++ exception_type, kbase_gpu_exception_name(exception_type), ++ access_type, kbase_gpu_access_type_name(fault->status), ++ source_id); + -+/** -+ * kutf_helper_input_enqueue_end_of_data() - Signal no more data is to be sent -+ * @context: KUTF context -+ * -+ * After this function has been called, kutf_helper_input_dequeue() will always -+ * return NULL. -+ */ -+void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context); ++ kbase_debug_csf_fault_notify(kbdev, NULL, DF_GPU_PAGE_FAULT); + -+/** -+ * kutf_helper_ignore_dmesg() - Write message in dmesg to instruct parser -+ * to ignore errors, until the counterpart -+ * is written to dmesg to stop ignoring errors. -+ * @dev: Device pointer to write to dmesg using. -+ * -+ * This function writes "Start ignoring dmesg warnings" to dmesg, which -+ * the parser will read and not log any errors. Only to be used in cases where -+ * we expect an error to be produced in dmesg but that we do not want to be -+ * flagged as an error. -+ */ -+void kutf_helper_ignore_dmesg(struct device *dev); ++ /* Report MMU fault for all address spaces (except MCU_AS_NR) */ ++ for (as_no = 1; as_no < kbdev->nr_hw_address_spaces; as_no++) ++ submit_work_pagefault(kbdev, as_no, fault); + -+/** -+ * kutf_helper_stop_ignoring_dmesg() - Write message in dmesg to instruct parser -+ * to stop ignoring errors. -+ * @dev: Device pointer to write to dmesg using. -+ * -+ * This function writes "Stop ignoring dmesg warnings" to dmesg, which -+ * the parser will read and continue to log any errors. Counterpart to -+ * kutf_helper_ignore_dmesg(). -+ */ -+void kutf_helper_stop_ignoring_dmesg(struct device *dev); ++ /* GPU reset is required to recover */ ++ if (kbase_prepare_to_reset_gpu(kbdev, ++ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) ++ kbase_reset_gpu(kbdev); + -+#endif /* _KERNEL_UTF_HELPERS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers_user.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers_user.h -new file mode 100644 -index 000000000..e147cbb90 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers_user.h -@@ -0,0 +1,184 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++} ++KBASE_EXPORT_TEST_API(kbase_mmu_report_mcu_as_fault_and_reset); + -+#ifndef _KERNEL_UTF_HELPERS_USER_H_ -+#define _KERNEL_UTF_HELPERS_USER_H_ ++void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, ++ struct kbase_as *as, struct kbase_fault *fault) ++{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ u32 const status = fault->status; ++ int exception_type = (status & GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> ++ GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT; ++ int access_type = (status & GPU_FAULTSTATUS_ACCESS_TYPE_MASK) >> ++ GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT; ++ int source_id = (status & GPU_FAULTSTATUS_SOURCE_ID_MASK) >> ++ GPU_FAULTSTATUS_SOURCE_ID_SHIFT; ++ const char *addr_valid = (status & GPU_FAULTSTATUS_ADDR_VALID_FLAG) ? ++ "true" : "false"; ++ int as_no = as->number; ++ unsigned long flags; ++ const uintptr_t fault_addr = fault->addr; + -+/* kutf_helpers.h -+ * Test helper functions for the kernel UTF test infrastructure, whose -+ * implementation mirrors that of similar functions for kutf-userside -+ */ ++ /* terminal fault, print info about the fault */ ++ dev_err(kbdev->dev, ++ "GPU bus fault in AS%d at PA %pK\n" ++ "PA_VALID: %s\n" ++ "raw fault status: 0x%X\n" ++ "exception type 0x%X: %s\n" ++ "access type 0x%X: %s\n" ++ "source id 0x%X\n" ++ "pid: %d\n", ++ as_no, (void *)fault_addr, ++ addr_valid, ++ status, ++ exception_type, kbase_gpu_exception_name(exception_type), ++ access_type, kbase_gpu_access_type_name(access_type), ++ source_id, ++ kctx->pid); + -+#include -+#include ++ /* AS transaction begin */ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_mmu_disable(kctx); ++ kbase_ctx_flag_set(kctx, KCTX_AS_DISABLED_ON_FAULT); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); + ++ /* Switching to UNMAPPED mode above would have enabled the firmware to ++ * recover from the fault (if the memory access was made by firmware) ++ * and it can then respond to CSG termination requests to be sent now. ++ * All GPU command queue groups associated with the context would be ++ * affected as they use the same GPU address space. ++ */ ++ kbase_csf_ctx_handle_fault(kctx, fault); + -+#define KUTF_HELPER_MAX_VAL_NAME_LEN 255 ++ /* Now clear the GPU fault */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CLEAR_FAULT); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+enum kutf_helper_valtype { -+ KUTF_HELPER_VALTYPE_INVALID, -+ KUTF_HELPER_VALTYPE_U64, -+ KUTF_HELPER_VALTYPE_STR, ++} + -+ KUTF_HELPER_VALTYPE_COUNT /* Must be last */ -+}; ++/* ++ * The caller must ensure it's retained the ctx to prevent it from being ++ * scheduled out whilst it's being worked on. ++ */ ++void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, ++ struct kbase_as *as, const char *reason_str, ++ struct kbase_fault *fault) ++{ ++ unsigned long flags; ++ unsigned int exception_type; ++ unsigned int access_type; ++ unsigned int source_id; ++ int as_no; ++ struct kbase_device *kbdev; ++ const u32 status = fault->status; + -+struct kutf_helper_named_val { -+ enum kutf_helper_valtype type; -+ char *val_name; -+ union { -+ u64 val_u64; -+ char *val_str; -+ } u; -+}; ++ as_no = as->number; ++ kbdev = kctx->kbdev; + -+/* Extra error values for certain helpers when we want to distinguish between -+ * Linux's own error values too. -+ * -+ * These can only be used on certain functions returning an int type that are -+ * documented as returning one of these potential values, they cannot be used -+ * from functions return a ptr type, since we can't decode it with PTR_ERR -+ * -+ * No negative values are used - Linux error codes should be used instead, and -+ * indicate a problem in accessing the data file itself (are generally -+ * unrecoverable) -+ * -+ * Positive values indicate correct access but invalid parsing (can be -+ * recovered from assuming data in the future is correct) -+ */ -+enum kutf_helper_err { -+ /* No error - must be zero */ -+ KUTF_HELPER_ERR_NONE = 0, -+ /* Named value parsing encountered an invalid name */ -+ KUTF_HELPER_ERR_INVALID_NAME, -+ /* Named value parsing of string or u64 type encountered extra -+ * characters after the value (after the last digit for a u64 type or -+ * after the string end delimiter for string type) ++ /* Make sure the context was active */ ++ if (WARN_ON(atomic_read(&kctx->refcount) <= 0)) ++ return; ++ ++ /* decode the fault status */ ++ exception_type = AS_FAULTSTATUS_EXCEPTION_TYPE_GET(status); ++ access_type = AS_FAULTSTATUS_ACCESS_TYPE_GET(status); ++ source_id = AS_FAULTSTATUS_SOURCE_ID_GET(status); ++ ++ /* terminal fault, print info about the fault */ ++ dev_err(kbdev->dev, ++ "Unhandled Page fault in AS%d at VA 0x%016llX\n" ++ "Reason: %s\n" ++ "raw fault status: 0x%X\n" ++ "exception type 0x%X: %s\n" ++ "access type 0x%X: %s\n" ++ "source id 0x%X\n" ++ "pid: %d\n", ++ as_no, fault->addr, ++ reason_str, ++ status, ++ exception_type, kbase_gpu_exception_name(exception_type), ++ access_type, kbase_gpu_access_type_name(status), ++ source_id, ++ kctx->pid); ++ ++ /* AS transaction begin */ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ ++ /* switch to UNMAPPED mode, ++ * will abort all jobs and stop any hw counter dumping + */ -+ KUTF_HELPER_ERR_CHARS_AFTER_VAL, -+ /* Named value parsing of string type couldn't find the string end -+ * delimiter. -+ * -+ * This cannot be encountered when the NAME="value" message exceeds the -+ * textbuf's maximum line length, because such messages are not checked -+ * for an end string delimiter ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_mmu_disable(kctx); ++ kbase_ctx_flag_set(kctx, KCTX_AS_DISABLED_ON_FAULT); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ /* AS transaction end */ ++ ++ kbase_debug_csf_fault_notify(kbdev, kctx, DF_GPU_PAGE_FAULT); ++ /* Switching to UNMAPPED mode above would have enabled the firmware to ++ * recover from the fault (if the memory access was made by firmware) ++ * and it can then respond to CSG termination requests to be sent now. ++ * All GPU command queue groups associated with the context would be ++ * affected as they use the same GPU address space. + */ -+ KUTF_HELPER_ERR_NO_END_DELIMITER, -+ /* Named value didn't parse as any of the known types */ -+ KUTF_HELPER_ERR_INVALID_VALUE, -+}; ++ kbase_csf_ctx_handle_fault(kctx, fault); + ++ /* Clear down the fault */ ++ kbase_mmu_hw_clear_fault(kbdev, as, ++ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); ++ kbase_mmu_hw_enable_fault(kbdev, as, ++ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + -+/* Send named NAME=value pair, u64 value -+ * -+ * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long -+ * -+ * Any failure will be logged on the suite's current test fixture -+ * -+ * Returns 0 on success, non-zero on failure -+ */ -+int kutf_helper_send_named_u64(struct kutf_context *context, -+ const char *val_name, u64 val); ++} + -+/* Get the maximum length of a string that can be represented as a particular -+ * NAME="value" pair without string-value truncation in the kernel's buffer -+ * -+ * Given val_name and the kernel buffer's size, this can be used to determine -+ * the maximum length of a string that can be sent as val_name="value" pair -+ * without having the string value truncated. Any string longer than this will -+ * be truncated at some point during communication to this size. -+ * -+ * It is assumed that val_name is a valid name for -+ * kutf_helper_send_named_str(), and no checking will be made to -+ * ensure this. ++/** ++ * kbase_mmu_interrupt_process() - Process a bus or page fault. ++ * @kbdev: The kbase_device the fault happened on ++ * @kctx: The kbase_context for the faulting address space if one was ++ * found. ++ * @as: The address space that has the fault ++ * @fault: Data relating to the fault + * -+ * Returns the maximum string length that can be represented, or a negative -+ * value if the NAME="value" encoding itself wouldn't fit in kern_buf_sz ++ * This function will process a fault on a specific address space + */ -+int kutf_helper_max_str_len_for_kern(const char *val_name, int kern_buf_sz); ++static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, ++ struct kbase_context *kctx, struct kbase_as *as, ++ struct kbase_fault *fault) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+/* Send named NAME="str" pair -+ * -+ * no escaping allowed in str. Any of the following characters will terminate -+ * the string: '"' '\\' '\n' -+ * -+ * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long -+ * -+ * Any failure will be logged on the suite's current test fixture -+ * -+ * Returns 0 on success, non-zero on failure -+ */ -+int kutf_helper_send_named_str(struct kutf_context *context, -+ const char *val_name, const char *val_str); ++ if (!kctx) { ++ dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n", ++ kbase_as_has_bus_fault(as, fault) ? ++ "Bus error" : "Page fault", ++ as->number, fault->addr); + -+/* Receive named NAME=value pair -+ * -+ * This can receive u64 and string values - check named_val->type -+ * -+ * If you are not planning on dynamic handling of the named value's name and -+ * type, then kutf_helper_receive_check_val() is more useful as a -+ * convenience function. -+ * -+ * String members of named_val will come from memory allocated on the fixture's mempool -+ * -+ * Returns 0 on success. Negative value on failure to receive from the 'run' -+ * file, positive value indicates an enum kutf_helper_err value for correct -+ * reception of data but invalid parsing -+ */ -+int kutf_helper_receive_named_val( -+ struct kutf_context *context, -+ struct kutf_helper_named_val *named_val); ++ /* Since no ctx was found, the MMU must be disabled. */ ++ WARN_ON(as->current_setup.transtab); + -+/* Receive and validate NAME=value pair -+ * -+ * As with kutf_helper_receive_named_val, but validate that the -+ * name and type are as expected, as a convenience for a common pattern found -+ * in tests. -+ * -+ * NOTE: this only returns an error value if there was actually a problem -+ * receiving data. -+ * -+ * NOTE: If the underlying data was received correctly, but: -+ * - isn't of the expected name -+ * - isn't the expected type -+ * - isn't correctly parsed for the type -+ * then the following happens: -+ * - failure result is recorded -+ * - named_val->type will be KUTF_HELPER_VALTYPE_INVALID -+ * - named_val->u will contain some default value that should be relatively -+ * harmless for the test, including being writable in the case of string -+ * values -+ * - return value will be 0 to indicate success -+ * -+ * The rationale behind this is that we'd prefer to continue the rest of the -+ * test with failures propagated, rather than hitting a timeout -+ */ -+int kutf_helper_receive_check_val( -+ struct kutf_helper_named_val *named_val, -+ struct kutf_context *context, -+ const char *expect_val_name, -+ enum kutf_helper_valtype expect_val_type); ++ if (kbase_as_has_bus_fault(as, fault)) ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CLEAR_FAULT); ++ else if (kbase_as_has_page_fault(as, fault)) { ++ kbase_mmu_hw_clear_fault(kbdev, as, ++ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); ++ kbase_mmu_hw_enable_fault(kbdev, as, ++ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); ++ } + -+/* Output a named value to kmsg */ -+void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val); ++ return; ++ } + ++ if (kbase_as_has_bus_fault(as, fault)) { ++ /* ++ * We need to switch to UNMAPPED mode - but we do this in a ++ * worker so that we can sleep ++ */ ++ WARN_ON(!queue_work(as->pf_wq, &as->work_busfault)); ++ atomic_inc(&kbdev->faults_pending); ++ } else { ++ WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault)); ++ atomic_inc(&kbdev->faults_pending); ++ } ++} + -+#endif /* _KERNEL_UTF_HELPERS_USER_H_ */ -diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_mem.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_mem.h -new file mode 100644 -index 000000000..5d4d96ef3 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_mem.h -@@ -0,0 +1,72 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, ++ u32 status, u32 as_nr) ++{ ++ struct kbase_context *kctx; ++ unsigned long flags; ++ struct kbase_as *as; ++ struct kbase_fault *fault; + -+#ifndef _KERNEL_UTF_MEM_H_ -+#define _KERNEL_UTF_MEM_H_ ++ if (WARN_ON(as_nr == MCU_AS_NR)) ++ return -EINVAL; + -+/* kutf_mem.h -+ * Functions for management of memory pools in the kernel. -+ * -+ * This module implements a memory pool allocator, allowing a test -+ * implementation to allocate linked allocations which can then be freed by a -+ * single free which releases all of the resources held by the entire pool. -+ * -+ * Note that it is not possible to free single resources within the pool once -+ * allocated. -+ */ ++ if (WARN_ON(as_nr >= BASE_MAX_NR_AS)) ++ return -EINVAL; + -+#include -+#include ++ as = &kbdev->as[as_nr]; ++ fault = &as->bf_data; ++ fault->status = status; ++ fault->addr = (u64) kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; ++ fault->addr |= kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); ++ fault->protected_mode = false; + -+/** -+ * struct kutf_mempool - the memory pool context management structure -+ * @head: list head on which the allocations in this context are added to -+ * @lock: mutex for concurrent allocation from multiple threads -+ * -+ */ -+struct kutf_mempool { -+ struct list_head head; -+ struct mutex lock; -+}; ++ /* report the fault to debugfs */ ++ kbase_as_fault_debugfs_new(kbdev, as_nr); ++ ++ kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as_nr); ++ ++ /* Process the bus fault interrupt for this address space */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_mmu_interrupt_process(kbdev, kctx, as, fault); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ return 0; ++} ++ ++void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) ++{ ++ const int num_as = 16; ++ const int pf_shift = 0; ++ const unsigned long as_bit_mask = (1UL << num_as) - 1; ++ unsigned long flags; ++ u32 new_mask; ++ u32 tmp; ++ u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask); ++ ++ /* remember current mask */ ++ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); ++ new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); ++ /* mask interrupts for now */ ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); ++ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); ++ ++ while (pf_bits) { ++ struct kbase_context *kctx; ++ int as_no = ffs(pf_bits) - 1; ++ struct kbase_as *as = &kbdev->as[as_no]; ++ struct kbase_fault *fault = &as->pf_data; ++ ++ /* find faulting address */ ++ fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no, ++ AS_FAULTADDRESS_HI)); ++ fault->addr <<= 32; ++ fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no, ++ AS_FAULTADDRESS_LO)); ++ ++ /* Mark the fault protected or not */ ++ fault->protected_mode = false; ++ ++ /* report the fault to debugfs */ ++ kbase_as_fault_debugfs_new(kbdev, as_no); ++ ++ /* record the fault status */ ++ fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, ++ AS_FAULTSTATUS)); ++ ++ fault->extra_addr = kbase_reg_read(kbdev, ++ MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); ++ fault->extra_addr <<= 32; ++ fault->extra_addr |= kbase_reg_read(kbdev, ++ MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); ++ ++ /* Mark page fault as handled */ ++ pf_bits &= ~(1UL << as_no); ++ ++ /* remove the queued PF from the mask */ ++ new_mask &= ~MMU_PAGE_FAULT(as_no); ++ ++ if (as_no == MCU_AS_NR) { ++ kbase_mmu_report_mcu_as_fault_and_reset(kbdev, fault); ++ /* Pointless to handle remaining faults */ ++ break; ++ } ++ ++ /* ++ * Refcount the kctx - it shouldn't disappear anyway, since ++ * Page faults _should_ only occur whilst GPU commands are ++ * executing, and a command causing the Page fault shouldn't ++ * complete until the MMU is updated. ++ * Reference is released at the end of bottom half of page ++ * fault handling. ++ */ ++ kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as_no); ++ ++ /* Process the interrupt for this address space */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_mmu_interrupt_process(kbdev, kctx, as, fault); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } ++ ++ /* reenable interrupts */ ++ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); ++ tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); ++ new_mask |= tmp; ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask); ++ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); ++} ++ ++int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, ++ struct kbase_va_region *const reg) ++{ ++ /* Can't soft-stop the provoking job */ ++ return -EPERM; ++} + +/** -+ * kutf_mempool_init() - Initialize a memory pool. -+ * @pool: Memory pool structure to initialize, provided by the user ++ * kbase_mmu_gpu_fault_worker() - Process a GPU fault for the device. + * -+ * Return: zero on success ++ * @data: work_struct passed by queue_work() ++ * ++ * Report a GPU fatal error for all GPU command queue groups that are ++ * using the address space and terminate them. + */ -+int kutf_mempool_init(struct kutf_mempool *pool); ++static void kbase_mmu_gpu_fault_worker(struct work_struct *data) ++{ ++ struct kbase_as *const faulting_as = container_of(data, struct kbase_as, ++ work_gpufault); ++ const u32 as_nr = faulting_as->number; ++ struct kbase_device *const kbdev = container_of(faulting_as, struct ++ kbase_device, as[as_nr]); ++ struct kbase_fault *fault; ++ struct kbase_context *kctx; ++ u32 status; ++ u64 address; ++ u32 as_valid; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ fault = &faulting_as->gf_data; ++ status = fault->status; ++ as_valid = status & GPU_FAULTSTATUS_JASID_VALID_FLAG; ++ address = fault->addr; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ dev_warn(kbdev->dev, ++ "GPU Fault 0x%08x (%s) in AS%u at 0x%016llx\n" ++ "ASID_VALID: %s, ADDRESS_VALID: %s\n", ++ status, ++ kbase_gpu_exception_name( ++ GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(status)), ++ as_nr, address, ++ as_valid ? "true" : "false", ++ status & GPU_FAULTSTATUS_ADDR_VALID_FLAG ? "true" : "false"); ++ ++ kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_nr); ++ kbase_csf_ctx_handle_fault(kctx, fault); ++ kbase_ctx_sched_release_ctx_lock(kctx); ++ ++ /* A work for GPU fault is complete. ++ * Till reaching here, no further GPU fault will be reported. ++ * Now clear the GPU fault to allow next GPU fault interrupt report. ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CLEAR_FAULT); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ atomic_dec(&kbdev->faults_pending); ++} + +/** -+ * kutf_mempool_alloc() - Allocate memory from a pool -+ * @pool: Memory pool to allocate from -+ * @size: Size of memory wanted in number of bytes ++ * submit_work_gpufault() - Submit a work for GPU fault. + * -+ * Return: Pointer to memory on success, NULL on failure. ++ * @kbdev: Kbase device pointer ++ * @status: GPU fault status ++ * @as_nr: Faulty address space ++ * @address: GPU fault address ++ * ++ * This function submits a work for reporting the details of GPU fault. + */ -+void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size); ++static void submit_work_gpufault(struct kbase_device *kbdev, u32 status, ++ u32 as_nr, u64 address) ++{ ++ unsigned long flags; ++ struct kbase_as *const as = &kbdev->as[as_nr]; ++ struct kbase_context *kctx; + -+/** -+ * kutf_mempool_destroy() - Destroy a memory pool, freeing all memory within it. -+ * @pool: The memory pool to free -+ */ -+void kutf_mempool_destroy(struct kutf_mempool *pool); -+#endif /* _KERNEL_UTF_MEM_H_ */ -diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_resultset.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_resultset.h ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as_nr); ++ ++ if (kctx) { ++ kbase_ctx_sched_retain_ctx_refcount(kctx); ++ ++ as->gf_data = (struct kbase_fault) { ++ .status = status, ++ .addr = address, ++ }; ++ ++ if (WARN_ON(!queue_work(as->pf_wq, &as->work_gpufault))) ++ kbase_ctx_sched_release_ctx(kctx); ++ else ++ atomic_inc(&kbdev->faults_pending); ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} ++ ++void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status, ++ u32 as_nr, u64 address, bool as_valid) ++{ ++ if (!as_valid || (as_nr == MCU_AS_NR)) { ++ int as; ++ ++ /* Report GPU fault for all contexts (except MCU_AS_NR) in case either ++ * the address space is invalid or it's MCU address space. ++ */ ++ for (as = 1; as < kbdev->nr_hw_address_spaces; as++) ++ submit_work_gpufault(kbdev, status, as, address); ++ } else ++ submit_work_gpufault(kbdev, status, as_nr, address); ++} ++KBASE_EXPORT_TEST_API(kbase_mmu_gpu_fault_interrupt); ++ ++int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i) ++{ ++ kbdev->as[i].number = i; ++ kbdev->as[i].bf_data.addr = 0ULL; ++ kbdev->as[i].pf_data.addr = 0ULL; ++ kbdev->as[i].gf_data.addr = 0ULL; ++ kbdev->as[i].is_unresponsive = false; ++ ++ kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 1, i); ++ if (!kbdev->as[i].pf_wq) ++ return -ENOMEM; ++ ++ INIT_WORK(&kbdev->as[i].work_pagefault, kbase_mmu_page_fault_worker); ++ INIT_WORK(&kbdev->as[i].work_busfault, kbase_mmu_bus_fault_worker); ++ INIT_WORK(&kbdev->as[i].work_gpufault, kbase_mmu_gpu_fault_worker); ++ ++ return 0; ++} +diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c new file mode 100644 -index 000000000..2fb1a47a5 +index 000000000..d716ce006 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_resultset.h -@@ -0,0 +1,180 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c +@@ -0,0 +1,442 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -262127,752 +262156,437 @@ index 000000000..2fb1a47a5 + * + */ + -+#ifndef _KERNEL_UTF_RESULTSET_H_ -+#define _KERNEL_UTF_RESULTSET_H_ -+ -+/* kutf_resultset.h -+ * Functions and structures for handling test results and result sets. -+ * -+ * This section of the kernel UTF contains structures and functions used for the -+ * management of Results and Result Sets. -+ */ -+ +/** -+ * enum kutf_result_status - Status values for a single Test error. -+ * @KUTF_RESULT_BENCHMARK: Result is a meta-result containing benchmark -+ * results. -+ * @KUTF_RESULT_SKIP: The test was skipped. -+ * @KUTF_RESULT_UNKNOWN: The test has an unknown result. -+ * @KUTF_RESULT_PASS: The test result passed. -+ * @KUTF_RESULT_DEBUG: The test result passed, but raised a debug -+ * message. -+ * @KUTF_RESULT_INFO: The test result passed, but raised -+ * an informative message. -+ * @KUTF_RESULT_WARN: The test result passed, but raised a warning -+ * message. -+ * @KUTF_RESULT_FAIL: The test result failed with a non-fatal error. -+ * @KUTF_RESULT_FATAL: The test result failed with a fatal error. -+ * @KUTF_RESULT_ABORT: The test result failed due to a non-UTF -+ * assertion failure. -+ * @KUTF_RESULT_USERDATA: User data is ready to be read, -+ * this is not seen outside the kernel -+ * @KUTF_RESULT_USERDATA_WAIT: Waiting for user data to be sent, -+ * this is not seen outside the kernel -+ * @KUTF_RESULT_TEST_FINISHED: The test has finished, no more results will -+ * be produced. This is not seen outside kutf ++ * DOC: Base kernel MMU management specific for Job Manager GPU. + */ -+enum kutf_result_status { -+ KUTF_RESULT_BENCHMARK = -3, -+ KUTF_RESULT_SKIP = -2, -+ KUTF_RESULT_UNKNOWN = -1, + -+ KUTF_RESULT_PASS = 0, -+ KUTF_RESULT_DEBUG = 1, -+ KUTF_RESULT_INFO = 2, -+ KUTF_RESULT_WARN = 3, -+ KUTF_RESULT_FAIL = 4, -+ KUTF_RESULT_FATAL = 5, -+ KUTF_RESULT_ABORT = 6, -+ -+ KUTF_RESULT_USERDATA = 7, -+ KUTF_RESULT_USERDATA_WAIT = 8, -+ KUTF_RESULT_TEST_FINISHED = 9 -+}; -+ -+/* The maximum size of a kutf_result_status result when -+ * converted to a string -+ */ -+#define KUTF_ERROR_MAX_NAME_SIZE 21 -+ -+#ifdef __KERNEL__ -+ -+#include -+#include -+ -+struct kutf_context; -+ -+/** -+ * struct kutf_result - Represents a single test result. -+ * @node: Next result in the list of results. -+ * @status: The status summary (pass / warn / fail / etc). -+ * @message: A more verbose status message. -+ */ -+struct kutf_result { -+ struct list_head node; -+ enum kutf_result_status status; -+ const char *message; -+}; ++#include ++#include ++#include ++#include ++#include ++#include + -+/** -+ * KUTF_RESULT_SET_WAITING_FOR_INPUT - Test is waiting for user data -+ * -+ * This flag is set within a struct kutf_result_set whenever the test is blocked -+ * waiting for user data. Attempts to dequeue results when this flag is set -+ * will cause a dummy %KUTF_RESULT_USERDATA_WAIT result to be produced. This -+ * is used to output a warning message and end of file. -+ */ -+#define KUTF_RESULT_SET_WAITING_FOR_INPUT 1 ++void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, ++ struct kbase_mmu_setup * const setup) ++{ ++ /* Set up the required caching policies at the correct indices ++ * in the memattr register. ++ */ ++ setup->memattr = ++ (AS_MEMATTR_IMPL_DEF_CACHE_POLICY << ++ (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | ++ (AS_MEMATTR_FORCE_TO_CACHE_ALL << ++ (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | ++ (AS_MEMATTR_WRITE_ALLOC << ++ (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | ++ (AS_MEMATTR_AARCH64_OUTER_IMPL_DEF << ++ (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | ++ (AS_MEMATTR_AARCH64_OUTER_WA << ++ (AS_MEMATTR_INDEX_OUTER_WA * 8)) | ++ (AS_MEMATTR_AARCH64_NON_CACHEABLE << ++ (AS_MEMATTR_INDEX_NON_CACHEABLE * 8)); + -+/** -+ * struct kutf_result_set - Represents a set of results. -+ * @results: List head of a struct kutf_result list for storing the results -+ * @waitq: Wait queue signalled whenever new results are added. -+ * @flags: Flags see %KUTF_RESULT_SET_WAITING_FOR_INPUT -+ */ -+struct kutf_result_set { -+ struct list_head results; -+ wait_queue_head_t waitq; -+ int flags; -+}; ++ setup->transtab = (u64)mmut->pgd & AS_TRANSTAB_BASE_MASK; ++ setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K; ++} + -+/** -+ * kutf_create_result_set() - Create a new result set -+ * to which results can be added. -+ * -+ * Return: The created result set. -+ */ -+struct kutf_result_set *kutf_create_result_set(void); ++void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, ++ struct kbase_as *as, struct kbase_fault *fault) ++{ ++ struct kbase_device *const kbdev = kctx->kbdev; ++ u32 const status = fault->status; ++ u32 const exception_type = (status & 0xFF); ++ u32 const exception_data = (status >> 8) & 0xFFFFFF; ++ int const as_no = as->number; ++ unsigned long flags; ++ const uintptr_t fault_addr = fault->addr; + -+/** -+ * kutf_add_result() - Add a result to the end of an existing result set. -+ * -+ * @context: The kutf context -+ * @status: The result status to add. -+ * @message: The result message to add. -+ * -+ * Return: 0 if the result is successfully added. -ENOMEM if allocation fails. -+ */ -+int kutf_add_result(struct kutf_context *context, -+ enum kutf_result_status status, const char *message); ++ /* terminal fault, print info about the fault */ ++ dev_err(kbdev->dev, ++ "GPU bus fault in AS%d at PA %pK\n" ++ "raw fault status: 0x%X\n" ++ "exception type 0x%X: %s\n" ++ "exception data 0x%X\n" ++ "pid: %d\n", ++ as_no, (void *)fault_addr, ++ status, ++ exception_type, kbase_gpu_exception_name(exception_type), ++ exception_data, ++ kctx->pid); + -+/** -+ * kutf_remove_result() - Remove a result from the head of a result set. -+ * @set: The result set. -+ * -+ * This function will block until there is a result to read. The wait is -+ * interruptible, so this function will return with an ERR_PTR if interrupted. -+ * -+ * Return: result or ERR_PTR if interrupted -+ */ -+struct kutf_result *kutf_remove_result( -+ struct kutf_result_set *set); ++ /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter ++ * dumping AS transaction begin ++ */ ++ mutex_lock(&kbdev->mmu_hw_mutex); + -+/** -+ * kutf_destroy_result_set() - Free a previously created result set. -+ * -+ * @results: The result set whose resources to free. -+ */ -+void kutf_destroy_result_set(struct kutf_result_set *results); ++ /* Set the MMU into unmapped mode */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_mmu_disable(kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+/** -+ * kutf_set_waiting_for_input() - The test is waiting for userdata -+ * -+ * @set: The result set to update -+ * -+ * Causes the result set to always have results and return a fake -+ * %KUTF_RESULT_USERDATA_WAIT result. -+ */ -+void kutf_set_waiting_for_input(struct kutf_result_set *set); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ /* AS transaction end */ + -+/** -+ * kutf_clear_waiting_for_input() - The test is no longer waiting for userdata -+ * -+ * @set: The result set to update -+ * -+ * Cancels the effect of kutf_set_waiting_for_input() -+ */ -+void kutf_clear_waiting_for_input(struct kutf_result_set *set); ++ kbase_mmu_hw_clear_fault(kbdev, as, ++ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); ++ kbase_mmu_hw_enable_fault(kbdev, as, ++ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + -+#endif /* __KERNEL__ */ ++} + -+#endif /* _KERNEL_UTF_RESULTSET_H_ */ -diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_suite.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_suite.h -new file mode 100644 -index 000000000..9e459c556 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_suite.h -@@ -0,0 +1,571 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* -+ * -+ * (C) COPYRIGHT 2014, 2017, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ -+ -+#ifndef _KERNEL_UTF_SUITE_H_ -+#define _KERNEL_UTF_SUITE_H_ -+ -+/* kutf_suite.h -+ * Functions for management of test suites. -+ * -+ * This collection of data structures, macros, and functions are used to -+ * create Test Suites, Tests within those Test Suites, and Fixture variants -+ * of each test. ++ * The caller must ensure it's retained the ctx to prevent it from being ++ * scheduled out whilst it's being worked on. + */ ++void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, ++ struct kbase_as *as, const char *reason_str, ++ struct kbase_fault *fault) ++{ ++ unsigned long flags; ++ u32 exception_type; ++ u32 access_type; ++ u32 source_id; ++ int as_no; ++ struct kbase_device *kbdev; ++ struct kbasep_js_device_data *js_devdata; + -+#include -+#include -+#include -+ -+#include -+#include ++ as_no = as->number; ++ kbdev = kctx->kbdev; ++ js_devdata = &kbdev->js_data; + -+/* Arbitrary maximum size to prevent user space allocating too much kernel -+ * memory -+ */ -+#define KUTF_MAX_LINE_LENGTH (1024u) ++ /* Make sure the context was active */ ++ if (WARN_ON(atomic_read(&kctx->refcount) <= 0)) ++ return; + -+/** -+ * KUTF_F_TEST_NONE - Pseudo-flag indicating an absence of any specified test class. -+ * Note that tests should not be annotated with this constant as it is simply a zero -+ * value; tests without a more specific class must be marked with the flag -+ * KUTF_F_TEST_GENERIC. -+ */ -+#define KUTF_F_TEST_NONE ((unsigned int)(0)) ++ /* decode the fault status */ ++ exception_type = fault->status & 0xFF; ++ access_type = (fault->status >> 8) & 0x3; ++ source_id = (fault->status >> 16); + -+/** -+ * KUTF_F_TEST_SMOKETEST - Class indicating this test is a smoke test. -+ * A given set of smoke tests should be quick to run, enabling rapid turn-around -+ * of "regress-on-commit" test runs. -+ */ -+#define KUTF_F_TEST_SMOKETEST ((unsigned int)(1 << 1)) ++ /* terminal fault, print info about the fault */ ++ dev_err(kbdev->dev, ++ "Unhandled Page fault in AS%d at VA 0x%016llX\n" ++ "Reason: %s\n" ++ "raw fault status: 0x%X\n" ++ "exception type 0x%X: %s\n" ++ "access type 0x%X: %s\n" ++ "source id 0x%X\n" ++ "pid: %d\n", ++ as_no, fault->addr, ++ reason_str, ++ fault->status, ++ exception_type, kbase_gpu_exception_name(exception_type), ++ access_type, kbase_gpu_access_type_name(fault->status), ++ source_id, ++ kctx->pid); + -+/** -+ * KUTF_F_TEST_PERFORMANCE - Class indicating this test is a performance test. -+ * These tests typically produce a performance metric, such as "time to run" or -+ * "frames per second", -+ */ -+#define KUTF_F_TEST_PERFORMANCE ((unsigned int)(1 << 2)) ++ /* hardware counters dump fault handling */ ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) && ++ (kbdev->hwcnt.backend.state == ++ KBASE_INSTR_STATE_DUMPING)) { ++ if ((fault->addr >= kbdev->hwcnt.addr) && ++ (fault->addr < (kbdev->hwcnt.addr + ++ kbdev->hwcnt.addr_bytes))) ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; ++ } ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + -+/** -+ * KUTF_F_TEST_DEPRECATED - Class indicating that this test is a deprecated test. -+ * These tests have typically been replaced by an alternative test which is -+ * more efficient, or has better coverage. -+ */ -+#define KUTF_F_TEST_DEPRECATED ((unsigned int)(1 << 3)) ++ /* Stop the kctx from submitting more jobs and cause it to be scheduled ++ * out/rescheduled - this will occur on releasing the context's refcount ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbasep_js_clear_submit_allowed(js_devdata, kctx); + -+/** -+ * KUTF_F_TEST_EXPECTED_FAILURE - Class indicating that this test is a known failure. -+ * These tests have typically been run and failed, but marking them as a known -+ * failure means it is easier to triage results. -+ * -+ * It is typically more convenient to triage known failures using the -+ * results database and web UI, as this means there is no need to modify the -+ * test code. -+ */ -+#define KUTF_F_TEST_EXPECTED_FAILURE ((unsigned int)(1 << 4)) ++ /* Kill any running jobs from the context. Submit is disallowed, so no ++ * more jobs from this context can appear in the job slots from this ++ * point on ++ */ ++ kbase_backend_jm_kill_running_jobs_from_kctx(kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+/** -+ * KUTF_F_TEST_GENERIC - Class indicating that this test is a generic test, -+ * which is not a member of a more specific test class. -+ * Tests which are not created with a specific set -+ * of filter flags by the user are assigned this test class by default. -+ */ -+#define KUTF_F_TEST_GENERIC ((unsigned int)(1 << 5)) ++ /* AS transaction begin */ ++ mutex_lock(&kbdev->mmu_hw_mutex); + -+/** -+ * KUTF_F_TEST_RESFAIL - Class indicating this test is a resource allocation failure test. -+ * A resource allocation failure test will test that an error code is -+ * correctly propagated when an allocation fails. -+ */ -+#define KUTF_F_TEST_RESFAIL ((unsigned int)(1 << 6)) ++ /* switch to UNMAPPED mode, will abort all jobs and stop ++ * any hw counter dumping ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_mmu_disable(kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+/** -+ * KUTF_F_TEST_EXPECTED_FAILURE_RF - Additional flag indicating that this test -+ * is an expected failure when run in resource failure mode. -+ * These tests are never run when running the low resource mode. -+ */ -+#define KUTF_F_TEST_EXPECTED_FAILURE_RF ((unsigned int)(1 << 7)) ++ mutex_unlock(&kbdev->mmu_hw_mutex); + -+/** -+ * KUTF_F_TEST_USER_0 - Flag reserved for user-defined filter zero. -+ */ -+#define KUTF_F_TEST_USER_0 ((unsigned int)(1 << 24)) ++ /* AS transaction end */ ++ /* Clear down the fault */ ++ kbase_mmu_hw_clear_fault(kbdev, as, ++ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); ++ kbase_mmu_hw_enable_fault(kbdev, as, ++ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + -+/** -+ * KUTF_F_TEST_USER_1 - Flag reserved for user-defined filter one. -+ */ -+#define KUTF_F_TEST_USER_1 ((unsigned int)(1 << 25)) ++} + +/** -+ * KUTF_F_TEST_USER_2 - Flag reserved for user-defined filter two. ++ * kbase_mmu_interrupt_process() - Process a bus or page fault. ++ * @kbdev: The kbase_device the fault happened on ++ * @kctx: The kbase_context for the faulting address space if one was ++ * found. ++ * @as: The address space that has the fault ++ * @fault: Data relating to the fault ++ * ++ * This function will process a fault on a specific address space + */ -+#define KUTF_F_TEST_USER_2 ((unsigned int)(1 << 26)) ++static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, ++ struct kbase_context *kctx, struct kbase_as *as, ++ struct kbase_fault *fault) ++{ ++ unsigned long flags; + -+/** -+ * KUTF_F_TEST_USER_3 - Flag reserved for user-defined filter three. -+ */ -+#define KUTF_F_TEST_USER_3 ((unsigned int)(1 << 27)) ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+/** -+ * KUTF_F_TEST_USER_4 - Flag reserved for user-defined filter four. -+ */ -+#define KUTF_F_TEST_USER_4 ((unsigned int)(1 << 28)) ++ dev_dbg(kbdev->dev, ++ "Entering %s kctx %pK, as %pK\n", ++ __func__, (void *)kctx, (void *)as); + -+/** -+ * KUTF_F_TEST_USER_5 - Flag reserved for user-defined filter five. -+ */ -+#define KUTF_F_TEST_USER_5 ((unsigned int)(1 << 29)) ++ if (!kctx) { ++ dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n", ++ kbase_as_has_bus_fault(as, fault) ? ++ "Bus error" : "Page fault", ++ as->number, fault->addr); + -+/** -+ * KUTF_F_TEST_USER_6 - Flag reserved for user-defined filter six. -+ */ -+#define KUTF_F_TEST_USER_6 ((unsigned int)(1 << 30)) ++ /* Since no ctx was found, the MMU must be disabled. */ ++ WARN_ON(as->current_setup.transtab); + -+/** -+ * KUTF_F_TEST_USER_7 - Flag reserved for user-defined filter seven. -+ */ -+#define KUTF_F_TEST_USER_7 ((unsigned int)(1 << 31)) ++ if (kbase_as_has_bus_fault(as, fault)) { ++ kbase_mmu_hw_clear_fault(kbdev, as, ++ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); ++ kbase_mmu_hw_enable_fault(kbdev, as, ++ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); ++ } else if (kbase_as_has_page_fault(as, fault)) { ++ kbase_mmu_hw_clear_fault(kbdev, as, ++ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); ++ kbase_mmu_hw_enable_fault(kbdev, as, ++ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); ++ } + -+/** -+ * KUTF_F_TEST_ALL - Pseudo-flag indicating that all test classes should be executed. -+ */ -+#define KUTF_F_TEST_ALL ((unsigned int)(0xFFFFFFFFU)) ++ return; ++ } + -+/** -+ * union kutf_callback_data - Union used to store test callback data -+ * @ptr_value: pointer to the location where test callback data -+ * are stored -+ * @u32_value: a number which represents test callback data -+ */ -+union kutf_callback_data { -+ void *ptr_value; -+ u32 u32_value; -+}; ++ if (kbase_as_has_bus_fault(as, fault)) { ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + -+/** -+ * struct kutf_userdata_line - A line of user data to be returned to the user -+ * @node: struct list_head to link this into a list -+ * @str: The line of user data to return to user space -+ * @size: The number of bytes within @str -+ */ -+struct kutf_userdata_line { -+ struct list_head node; -+ char *str; -+ size_t size; -+}; ++ /* ++ * hw counters dumping in progress, signal the ++ * other thread that it failed ++ */ ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ if ((kbdev->hwcnt.kctx == kctx) && ++ (kbdev->hwcnt.backend.state == ++ KBASE_INSTR_STATE_DUMPING)) ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; + -+/** -+ * KUTF_USERDATA_WARNING_OUTPUT - Flag specifying that a warning has been output -+ * -+ * If user space reads the "run" file while the test is waiting for user data, -+ * then the framework will output a warning message and set this flag within -+ * struct kutf_userdata. A subsequent read will then simply return an end of -+ * file condition rather than outputting the warning again. The upshot of this -+ * is that simply running 'cat' on a test which requires user data will produce -+ * the warning followed by 'cat' exiting due to EOF - which is much more user -+ * friendly than blocking indefinitely waiting for user data. -+ */ -+#define KUTF_USERDATA_WARNING_OUTPUT 1 ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + -+/** -+ * struct kutf_userdata - Structure holding user data -+ * @flags: See %KUTF_USERDATA_WARNING_OUTPUT -+ * @input_head: List of struct kutf_userdata_line containing user data -+ * to be read by the kernel space test. -+ * @input_waitq: Wait queue signalled when there is new user data to be -+ * read by the kernel space test. -+ */ -+struct kutf_userdata { -+ unsigned long flags; -+ struct list_head input_head; -+ wait_queue_head_t input_waitq; -+}; ++ /* ++ * Stop the kctx from submitting more jobs and cause it ++ * to be scheduled out/rescheduled when all references ++ * to it are released ++ */ ++ kbasep_js_clear_submit_allowed(js_devdata, kctx); + -+/** -+ * struct kutf_context - Structure representing a kernel test context -+ * @kref: Refcount for number of users of this context -+ * @suite: Convenience pointer to the suite this context -+ * is running -+ * @test_fix: The fixture that is being run in this context -+ * @fixture_pool: The memory pool used for the duration of -+ * the fixture/text context. -+ * @fixture: The user provided fixture structure. -+ * @fixture_index: The index (id) of the current fixture. -+ * @fixture_name: The name of the current fixture (or NULL if unnamed). -+ * @test_data: Any user private data associated with this test -+ * @result_set: All the results logged by this test context -+ * @status: The status of the currently running fixture. -+ * @expected_status: The expected status on exist of the currently -+ * running fixture. -+ * @work: Work item to enqueue onto the work queue to run the test -+ * @userdata: Structure containing the user data for the test to read -+ */ -+struct kutf_context { -+ struct kref kref; -+ struct kutf_suite *suite; -+ struct kutf_test_fixture *test_fix; -+ struct kutf_mempool fixture_pool; -+ void *fixture; -+ unsigned int fixture_index; -+ const char *fixture_name; -+ union kutf_callback_data test_data; -+ struct kutf_result_set *result_set; -+ enum kutf_result_status status; -+ enum kutf_result_status expected_status; ++ dev_warn(kbdev->dev, ++ "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", ++ as->number, fault->addr, ++ fault->extra_addr); + -+ struct work_struct work; -+ struct kutf_userdata userdata; -+}; ++ /* ++ * We need to switch to UNMAPPED mode - but we do this in a ++ * worker so that we can sleep ++ */ ++ WARN_ON(!queue_work(as->pf_wq, &as->work_busfault)); ++ atomic_inc(&kbdev->faults_pending); ++ } else { ++ WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault)); ++ atomic_inc(&kbdev->faults_pending); ++ } + -+/** -+ * struct kutf_suite - Structure representing a kernel test suite -+ * @app: The application this suite belongs to. -+ * @name: The name of this suite. -+ * @suite_data: Any user private data associated with this -+ * suite. -+ * @create_fixture: Function used to create a new fixture instance -+ * @remove_fixture: Function used to destroy a new fixture instance -+ * @fixture_variants: The number of variants (must be at least 1). -+ * @suite_default_flags: Suite global filter flags which are set on -+ * all tests. -+ * @node: List node for suite_list -+ * @dir: The debugfs directory for this suite -+ * @test_list: List head to store all the tests which are -+ * part of this suite -+ */ -+struct kutf_suite { -+ struct kutf_application *app; -+ const char *name; -+ union kutf_callback_data suite_data; -+ void *(*create_fixture)(struct kutf_context *context); -+ void (*remove_fixture)(struct kutf_context *context); -+ unsigned int fixture_variants; -+ unsigned int suite_default_flags; -+ struct list_head node; -+ struct dentry *dir; -+ struct list_head test_list; -+}; ++ dev_dbg(kbdev->dev, ++ "Leaving %s kctx %pK, as %pK\n", ++ __func__, (void *)kctx, (void *)as); ++} + -+/** =========================================================================== -+ * Application functions -+ * ============================================================================ -+ */ ++static void validate_protected_page_fault(struct kbase_device *kbdev) ++{ ++ /* GPUs which support (native) protected mode shall not report page ++ * fault addresses unless it has protected debug mode and protected ++ * debug mode is turned on ++ */ ++ u32 protected_debug_mode = 0; + -+/** -+ * kutf_create_application() - Create an in kernel test application. -+ * @name: The name of the test application. -+ * -+ * Return: pointer to the kutf_application on success or NULL -+ * on failure -+ */ -+struct kutf_application *kutf_create_application(const char *name); ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { ++ protected_debug_mode = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_STATUS)) & GPU_DBGEN; ++ } + -+/** -+ * kutf_destroy_application() - Destroy an in kernel test application. -+ * -+ * @app: The test application to destroy. -+ */ -+void kutf_destroy_application(struct kutf_application *app); ++ if (!protected_debug_mode) { ++ /* fault_addr should never be reported in protected mode. ++ * However, we just continue by printing an error message ++ */ ++ dev_err(kbdev->dev, "Fault address reported in protected mode\n"); ++ } ++} + -+/**============================================================================ -+ * Suite functions -+ * ============================================================================ -+ */ ++void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) ++{ ++ const int num_as = 16; ++ const int busfault_shift = MMU_PAGE_FAULT_FLAGS; ++ const int pf_shift = 0; ++ const unsigned long as_bit_mask = (1UL << num_as) - 1; ++ unsigned long flags; ++ u32 new_mask; ++ u32 tmp, bf_bits, pf_bits; + -+/** -+ * kutf_create_suite() - Create a kernel test suite. -+ * @app: The test application to create the suite in. -+ * @name: The name of the suite. -+ * @fixture_count: The number of fixtures to run over the test -+ * functions in this suite -+ * @create_fixture: Callback used to create a fixture. The returned value -+ * is stored in the fixture pointer in the context for -+ * use in the test functions. -+ * @remove_fixture: Callback used to remove a previously created fixture. -+ * -+ * Suite names must be unique. Should two suites with the same name be -+ * registered with the same application then this function will fail, if they -+ * are registered with different applications then the function will not detect -+ * this and the call will succeed. -+ * -+ * Return: pointer to the created kutf_suite on success or NULL -+ * on failure -+ */ -+struct kutf_suite *kutf_create_suite( -+ struct kutf_application *app, -+ const char *name, -+ unsigned int fixture_count, -+ void *(*create_fixture)(struct kutf_context *context), -+ void (*remove_fixture)(struct kutf_context *context)); ++ dev_dbg(kbdev->dev, "Entering %s irq_stat %u\n", ++ __func__, irq_stat); ++ /* bus faults */ ++ bf_bits = (irq_stat >> busfault_shift) & as_bit_mask; ++ /* page faults (note: Ignore ASes with both pf and bf) */ ++ pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits; + -+/** -+ * kutf_create_suite_with_filters() - Create a kernel test suite with user -+ * defined default filters. -+ * @app: The test application to create the suite in. -+ * @name: The name of the suite. -+ * @fixture_count: The number of fixtures to run over the test -+ * functions in this suite -+ * @create_fixture: Callback used to create a fixture. The returned value -+ * is stored in the fixture pointer in the context for -+ * use in the test functions. -+ * @remove_fixture: Callback used to remove a previously created fixture. -+ * @filters: Filters to apply to a test if it doesn't provide its own -+ * -+ * Suite names must be unique. Should two suites with the same name be -+ * registered with the same application then this function will fail, if they -+ * are registered with different applications then the function will not detect -+ * this and the call will succeed. -+ * -+ * Return: pointer to the created kutf_suite on success or NULL on failure -+ */ -+struct kutf_suite *kutf_create_suite_with_filters( -+ struct kutf_application *app, -+ const char *name, -+ unsigned int fixture_count, -+ void *(*create_fixture)(struct kutf_context *context), -+ void (*remove_fixture)(struct kutf_context *context), -+ unsigned int filters); ++ if (WARN_ON(kbdev == NULL)) ++ return; + -+/** -+ * kutf_create_suite_with_filters_and_data() - Create a kernel test suite with -+ * user defined default filters. -+ * @app: The test application to create the suite in. -+ * @name: The name of the suite. -+ * @fixture_count: The number of fixtures to run over the test -+ * functions in this suite -+ * @create_fixture: Callback used to create a fixture. The returned value -+ * is stored in the fixture pointer in the context for -+ * use in the test functions. -+ * @remove_fixture: Callback used to remove a previously created fixture. -+ * @filters: Filters to apply to a test if it doesn't provide its own -+ * @suite_data: Suite specific callback data, provided during the -+ * running of the test in the kutf_context -+ * -+ * Return: pointer to the created kutf_suite on success or NULL -+ * on failure -+ */ -+struct kutf_suite *kutf_create_suite_with_filters_and_data( -+ struct kutf_application *app, -+ const char *name, -+ unsigned int fixture_count, -+ void *(*create_fixture)(struct kutf_context *context), -+ void (*remove_fixture)(struct kutf_context *context), -+ unsigned int filters, -+ union kutf_callback_data suite_data); ++ /* remember current mask */ ++ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); ++ new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); ++ /* mask interrupts for now */ ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); ++ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); + -+/** -+ * kutf_add_test() - Add a test to a kernel test suite. -+ * @suite: The suite to add the test to. -+ * @id: The ID of the test. -+ * @name: The name of the test. -+ * @execute: Callback to the test function to run. -+ * -+ * Note: As no filters are provided the test will use the suite filters instead -+ */ -+void kutf_add_test(struct kutf_suite *suite, -+ unsigned int id, -+ const char *name, -+ void (*execute)(struct kutf_context *context)); ++ while (bf_bits | pf_bits) { ++ struct kbase_as *as; ++ unsigned int as_no; ++ struct kbase_context *kctx; ++ struct kbase_fault *fault; + -+/** -+ * kutf_add_test_with_filters() - Add a test to a kernel test suite with filters -+ * @suite: The suite to add the test to. -+ * @id: The ID of the test. -+ * @name: The name of the test. -+ * @execute: Callback to the test function to run. -+ * @filters: A set of filtering flags, assigning test categories. -+ */ -+void kutf_add_test_with_filters(struct kutf_suite *suite, -+ unsigned int id, -+ const char *name, -+ void (*execute)(struct kutf_context *context), -+ unsigned int filters); ++ /* ++ * the while logic ensures we have a bit set, no need to check ++ * for not-found here ++ */ ++ as_no = ffs(bf_bits | pf_bits) - 1; ++ as = &kbdev->as[as_no]; + -+/** -+ * kutf_add_test_with_filters_and_data() - Add a test to a kernel test suite -+ * with filters. -+ * @suite: The suite to add the test to. -+ * @id: The ID of the test. -+ * @name: The name of the test. -+ * @execute: Callback to the test function to run. -+ * @filters: A set of filtering flags, assigning test categories. -+ * @test_data: Test specific callback data, provided during the -+ * running of the test in the kutf_context -+ */ -+void kutf_add_test_with_filters_and_data( -+ struct kutf_suite *suite, -+ unsigned int id, -+ const char *name, -+ void (*execute)(struct kutf_context *context), -+ unsigned int filters, -+ union kutf_callback_data test_data); ++ /* find the fault type */ ++ if (bf_bits & (1 << as_no)) ++ fault = &as->bf_data; ++ else ++ fault = &as->pf_data; + -+/** =========================================================================== -+ * Test functions -+ * ============================================================================ -+ */ -+/** -+ * kutf_test_log_result_external() - Log a result which has been created -+ * externally into a in a standard form -+ * recognized by the log parser. -+ * @context: The test context the test is running in -+ * @message: The message for this result -+ * @new_status: The result status of this log message -+ */ -+void kutf_test_log_result_external( -+ struct kutf_context *context, -+ const char *message, -+ enum kutf_result_status new_status); ++ /* ++ * Refcount the kctx ASAP - it shouldn't disappear anyway, since ++ * Bus/Page faults _should_ only occur whilst jobs are running, ++ * and a job causing the Bus/Page fault shouldn't complete until ++ * the MMU is updated ++ */ ++ kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as_no); + -+/** -+ * kutf_test_expect_abort() - Tell the kernel that you expect the current -+ * fixture to produce an abort. -+ * @context: The test context this test is running in. -+ */ -+void kutf_test_expect_abort(struct kutf_context *context); ++ /* find faulting address */ ++ fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no, ++ AS_FAULTADDRESS_HI)); ++ fault->addr <<= 32; ++ fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no, ++ AS_FAULTADDRESS_LO)); ++ /* Mark the fault protected or not */ ++ fault->protected_mode = kbdev->protected_mode; + -+/** -+ * kutf_test_expect_fatal() - Tell the kernel that you expect the current -+ * fixture to produce a fatal error. -+ * @context: The test context this test is running in. -+ */ -+void kutf_test_expect_fatal(struct kutf_context *context); ++ if (kbdev->protected_mode && fault->addr) { ++ /* check if address reporting is allowed */ ++ validate_protected_page_fault(kbdev); ++ } + -+/** -+ * kutf_test_expect_fail() - Tell the kernel that you expect the current -+ * fixture to fail. -+ * @context: The test context this test is running in. -+ */ -+void kutf_test_expect_fail(struct kutf_context *context); ++ /* report the fault to debugfs */ ++ kbase_as_fault_debugfs_new(kbdev, as_no); + -+/** -+ * kutf_test_expect_warn() - Tell the kernel that you expect the current -+ * fixture to produce a warning. -+ * @context: The test context this test is running in. -+ */ -+void kutf_test_expect_warn(struct kutf_context *context); ++ /* record the fault status */ ++ fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, ++ AS_FAULTSTATUS)); ++ fault->extra_addr = kbase_reg_read(kbdev, ++ MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); ++ fault->extra_addr <<= 32; ++ fault->extra_addr |= kbase_reg_read(kbdev, ++ MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); + -+/** -+ * kutf_test_expect_pass() - Tell the kernel that you expect the current -+ * fixture to pass. -+ * @context: The test context this test is running in. -+ */ -+void kutf_test_expect_pass(struct kutf_context *context); ++ if (kbase_as_has_bus_fault(as, fault)) { ++ /* Mark bus fault as handled. ++ * Note that a bus fault is processed first in case ++ * where both a bus fault and page fault occur. ++ */ ++ bf_bits &= ~(1UL << as_no); + -+/** -+ * kutf_test_skip() - Tell the kernel that the test should be skipped. -+ * @context: The test context this test is running in. -+ */ -+void kutf_test_skip(struct kutf_context *context); ++ /* remove the queued BF (and PF) from the mask */ ++ new_mask &= ~(MMU_BUS_ERROR(as_no) | ++ MMU_PAGE_FAULT(as_no)); ++ } else { ++ /* Mark page fault as handled */ ++ pf_bits &= ~(1UL << as_no); + -+/** -+ * kutf_test_skip_msg() - Tell the kernel that this test has been skipped, -+ * supplying a reason string. -+ * @context: The test context this test is running in. -+ * @message: A message string containing the reason for the skip. -+ * -+ * Note: The message must not be freed during the lifetime of the test run. -+ * This means it should either be a prebaked string, or if a dynamic string -+ * is required it must be created with kutf_dsprintf which will store -+ * the resultant string in a buffer who's lifetime is the same as the test run. -+ */ -+void kutf_test_skip_msg(struct kutf_context *context, const char *message); ++ /* remove the queued PF from the mask */ ++ new_mask &= ~MMU_PAGE_FAULT(as_no); ++ } + -+/** -+ * kutf_test_pass() - Tell the kernel that this test has passed. -+ * @context: The test context this test is running in. -+ * @message: A message string containing the reason for the pass. -+ * -+ * Note: The message must not be freed during the lifetime of the test run. -+ * This means it should either be a pre-baked string, or if a dynamic string -+ * is required it must be created with kutf_dsprintf which will store -+ * the resultant string in a buffer who's lifetime is the same as the test run. -+ */ -+void kutf_test_pass(struct kutf_context *context, char const *message); ++ /* Process the interrupt for this address space */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_mmu_interrupt_process(kbdev, kctx, as, fault); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } + -+/** -+ * kutf_test_debug() - Send a debug message -+ * @context: The test context this test is running in. -+ * @message: A message string containing the debug information. -+ * -+ * Note: The message must not be freed during the lifetime of the test run. -+ * This means it should either be a pre-baked string, or if a dynamic string -+ * is required it must be created with kutf_dsprintf which will store -+ * the resultant string in a buffer who's lifetime is the same as the test run. -+ */ -+void kutf_test_debug(struct kutf_context *context, char const *message); ++ /* reenable interrupts */ ++ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); ++ tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); ++ new_mask |= tmp; ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask); ++ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); + -+/** -+ * kutf_test_info() - Send an information message -+ * @context: The test context this test is running in. -+ * @message: A message string containing the information message. -+ * -+ * Note: The message must not be freed during the lifetime of the test run. -+ * This means it should either be a pre-baked string, or if a dynamic string -+ * is required it must be created with kutf_dsprintf which will store -+ * the resultant string in a buffer who's lifetime is the same as the test run. -+ */ -+void kutf_test_info(struct kutf_context *context, char const *message); ++ dev_dbg(kbdev->dev, "Leaving %s irq_stat %u\n", ++ __func__, irq_stat); ++} + -+/** -+ * kutf_test_warn() - Send a warning message -+ * @context: The test context this test is running in. -+ * @message: A message string containing the warning message. -+ * -+ * Note: The message must not be freed during the lifetime of the test run. -+ * This means it should either be a pre-baked string, or if a dynamic string -+ * is required it must be created with kutf_dsprintf which will store -+ * the resultant string in a buffer who's lifetime is the same as the test run. -+ */ -+void kutf_test_warn(struct kutf_context *context, char const *message); ++int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, ++ struct kbase_va_region *const reg) ++{ ++ dev_dbg(kctx->kbdev->dev, ++ "Switching to incremental rendering for region %pK\n", ++ (void *)reg); ++ return kbase_job_slot_softstop_start_rp(kctx, reg); ++} + -+/** -+ * kutf_test_fail() - Tell the kernel that a test has failed -+ * @context: The test context this test is running in. -+ * @message: A message string containing the failure message. -+ * -+ * Note: The message must not be freed during the lifetime of the test run. -+ * This means it should either be a pre-baked string, or if a dynamic string -+ * is required it must be created with kutf_dsprintf which will store -+ * the resultant string in a buffer who's lifetime is the same as the test run. -+ */ -+void kutf_test_fail(struct kutf_context *context, char const *message); ++int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i) ++{ ++ kbdev->as[i].number = i; ++ kbdev->as[i].bf_data.addr = 0ULL; ++ kbdev->as[i].pf_data.addr = 0ULL; ++ kbdev->as[i].is_unresponsive = false; + -+/** -+ * kutf_test_fatal() - Tell the kernel that a test has triggered a fatal error -+ * @context: The test context this test is running in. -+ * @message: A message string containing the fatal error message. -+ * -+ * Note: The message must not be freed during the lifetime of the test run. -+ * This means it should either be a pre-baked string, or if a dynamic string -+ * is required it must be created with kutf_dsprintf which will store -+ * the resultant string in a buffer who's lifetime is the same as the test run. -+ */ -+void kutf_test_fatal(struct kutf_context *context, char const *message); ++ kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 1, i); ++ if (!kbdev->as[i].pf_wq) ++ return -ENOMEM; + -+/** -+ * kutf_test_abort() - Tell the kernel that a test triggered an abort in the test -+ * -+ * @context: The test context this test is running in. -+ */ -+void kutf_test_abort(struct kutf_context *context); ++ INIT_WORK(&kbdev->as[i].work_pagefault, kbase_mmu_page_fault_worker); ++ INIT_WORK(&kbdev->as[i].work_busfault, kbase_mmu_bus_fault_worker); + -+#endif /* _KERNEL_UTF_SUITE_H_ */ -diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_utils.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_utils.h ++ return 0; ++} +diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c new file mode 100644 -index 000000000..f6e758b80 +index 000000000..d6d3fcdee --- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_utils.h -@@ -0,0 +1,60 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c +@@ -0,0 +1,3889 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2014, 2017, 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -262890,3403 +262604,3884 @@ index 000000000..f6e758b80 + * + */ + -+#ifndef _KERNEL_UTF_UTILS_H_ -+#define _KERNEL_UTF_UTILS_H_ -+ -+/* kutf_utils.h -+ * Utilities for the kernel UTF test infrastructure. -+ * -+ * This collection of library functions are provided for use by kernel UTF -+ * and users of kernel UTF which don't directly fit within the other -+ * code modules. ++/** ++ * DOC: Base kernel MMU management. + */ + -+#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#if !MALI_USE_CSF ++#include ++#endif + -+/** -+ * KUTF_MAX_DSPRINTF_LEN - Maximum size of the message strings within -+ * kernel UTF, messages longer then this will be truncated. -+ */ -+#define KUTF_MAX_DSPRINTF_LEN 1024 ++#include ++#include + -+/** -+ * kutf_dsprintf() - dynamic sprintf -+ * @pool: memory pool to allocate from -+ * @fmt: The format string describing the string to document. -+ * @... The parameters to feed in to the format string. -+ * -+ * This function implements sprintf which dynamically allocates memory to store -+ * the string. The library will free the memory containing the string when the -+ * result set is cleared or destroyed. -+ * -+ * Note The returned string may be truncated to fit an internal temporary -+ * buffer, which is KUTF_MAX_DSPRINTF_LEN bytes in length. -+ * -+ * Return: Returns pointer to allocated string, or NULL on error. -+ */ -+const char *kutf_dsprintf(struct kutf_mempool *pool, -+ const char *fmt, ...) __printf(2, 3); ++/* Threshold used to decide whether to flush full caches or just a physical range */ ++#define KBASE_PA_RANGE_THRESHOLD_NR_PAGES 20 ++#define MGM_DEFAULT_PTE_GROUP (0) + ++/* Macro to convert updated PDGs to flags indicating levels skip in flush */ ++#define pgd_level_to_skip_flush(dirty_pgds) (~(dirty_pgds) & 0xF) + -+#endif /* _KERNEL_UTF_UTILS_H_ */ -diff --git a/drivers/gpu/arm/bifrost/tests/kutf/Kbuild b/drivers/gpu/arm/bifrost/tests/kutf/Kbuild -new file mode 100755 -index 000000000..c4790bc66 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/kutf/Kbuild -@@ -0,0 +1,31 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++/* Small wrapper function to factor out GPU-dependent context releasing */ ++static void release_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ ++#if MALI_USE_CSF ++ CSTD_UNUSED(kbdev); ++ kbase_ctx_sched_release_ctx_lock(kctx); ++#else /* MALI_USE_CSF */ ++ kbasep_js_runpool_release_ctx(kbdev, kctx); ++#endif /* MALI_USE_CSF */ ++} + -+ifeq ($(CONFIG_MALI_KUTF),y) -+obj-m += kutf.o ++static void mmu_hw_operation_begin(struct kbase_device *kbdev) ++{ ++#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++#if MALI_USE_CSF ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3878)) { ++ unsigned long flags; + -+kutf-y := \ -+ kutf_mem.o \ -+ kutf_resultset.o \ -+ kutf_suite.o \ -+ kutf_utils.o \ -+ kutf_helpers.o \ -+ kutf_helpers_user.o -+endif -diff --git a/drivers/gpu/arm/bifrost/tests/kutf/build.bp b/drivers/gpu/arm/bifrost/tests/kutf/build.bp -new file mode 100755 -index 000000000..89edae9c5 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/kutf/build.bp -@@ -0,0 +1,42 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ lockdep_assert_held(&kbdev->mmu_hw_mutex); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ WARN_ON_ONCE(kbdev->mmu_hw_operation_in_progress); ++ kbdev->mmu_hw_operation_in_progress = true; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } ++#endif /* MALI_USE_CSF */ ++#endif /* !CONFIG_MALI_BIFROST_NO_MALI */ ++} ++ ++static void mmu_hw_operation_end(struct kbase_device *kbdev) ++{ ++#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++#if MALI_USE_CSF ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3878)) { ++ unsigned long flags; ++ ++ lockdep_assert_held(&kbdev->mmu_hw_mutex); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ WARN_ON_ONCE(!kbdev->mmu_hw_operation_in_progress); ++ kbdev->mmu_hw_operation_in_progress = false; ++ /* Invoke the PM state machine, the L2 power off may have been ++ * skipped due to the MMU command. ++ */ ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } ++#endif /* MALI_USE_CSF */ ++#endif /* !CONFIG_MALI_BIFROST_NO_MALI */ ++} ++ ++/** ++ * mmu_flush_cache_on_gpu_ctrl() - Check if cache flush needs to be done ++ * through GPU_CONTROL interface. + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * @kbdev: kbase device to check GPU model ID on. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * This function returns whether a cache flush for page table update should ++ * run through GPU_CONTROL interface or MMU_AS_CONTROL interface. + * ++ * Return: True if cache flush should be done on GPU command. + */ ++static bool mmu_flush_cache_on_gpu_ctrl(struct kbase_device *kbdev) ++{ ++ uint32_t const arch_maj_cur = (kbdev->gpu_props.props.raw_props.gpu_id & ++ GPU_ID2_ARCH_MAJOR) >> ++ GPU_ID2_ARCH_MAJOR_SHIFT; + -+bob_kernel_module { -+ name: "kutf", -+ defaults: [ -+ "mali_kbase_shared_config_defaults", -+ "kernel_test_configs", -+ "kernel_test_includes", -+ ], -+ srcs: [ -+ "Kbuild", -+ "kutf_helpers.c", -+ "kutf_helpers_user.c", -+ "kutf_mem.c", -+ "kutf_resultset.c", -+ "kutf_suite.c", -+ "kutf_utils.c", -+ ], -+ enabled: false, -+ mali_kutf: { -+ enabled: true, -+ }, ++ return arch_maj_cur > 11; +} -diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c -new file mode 100644 -index 000000000..42736195e ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c -@@ -0,0 +1,141 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ ++/** ++ * mmu_flush_pa_range() - Flush physical address range + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * @kbdev: kbase device to issue the MMU operation on. ++ * @phys: Starting address of the physical range to start the operation on. ++ * @nr_bytes: Number of bytes to work on. ++ * @op: Type of cache flush operation to perform. + * ++ * Issue a cache flush physical range command. + */ -+ -+/* Kernel UTF test helpers */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static DEFINE_SPINLOCK(kutf_input_lock); -+ -+bool kutf_helper_pending_input(struct kutf_context *context) ++#if MALI_USE_CSF ++static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, size_t nr_bytes, ++ enum kbase_mmu_op_type op) +{ -+ bool input_pending; -+ -+ spin_lock(&kutf_input_lock); ++ u32 flush_op; + -+ input_pending = !list_empty(&context->userdata.input_head); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ spin_unlock(&kutf_input_lock); ++ /* Translate operation to command */ ++ if (op == KBASE_MMU_OP_FLUSH_PT) ++ flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2; ++ else if (op == KBASE_MMU_OP_FLUSH_MEM) ++ flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC; ++ else { ++ dev_warn(kbdev->dev, "Invalid flush request (op = %d)", op); ++ return; ++ } + -+ return input_pending; ++ if (kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op)) ++ dev_err(kbdev->dev, "Flush for physical address range did not complete"); +} -+EXPORT_SYMBOL(kutf_helper_pending_input); ++#endif + -+char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size) ++/** ++ * mmu_invalidate() - Perform an invalidate operation on MMU caches. ++ * @kbdev: The Kbase device. ++ * @kctx: The Kbase context. ++ * @as_nr: GPU address space number for which invalidate is required. ++ * @op_param: Non-NULL pointer to struct containing information about the MMU ++ * operation to perform. ++ * ++ * Perform an MMU invalidate operation on a particual address space ++ * by issuing a UNLOCK command. ++ */ ++static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr, ++ const struct kbase_mmu_hw_op_param *op_param) +{ -+ struct kutf_userdata_line *line; -+ -+ spin_lock(&kutf_input_lock); -+ -+ while (list_empty(&context->userdata.input_head)) { -+ int err; -+ -+ kutf_set_waiting_for_input(context->result_set); ++ unsigned long flags; + -+ spin_unlock(&kutf_input_lock); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ err = wait_event_interruptible(context->userdata.input_waitq, -+ kutf_helper_pending_input(context)); ++ if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) { ++ as_nr = kctx ? kctx->as_nr : as_nr; ++ if (kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param)) ++ dev_err(kbdev->dev, ++ "Invalidate after GPU page table update did not complete"); ++ } + -+ if (err) -+ return ERR_PTR(-EINTR); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} + -+ spin_lock(&kutf_input_lock); -+ } ++/* Perform a flush/invalidate on a particular address space ++ */ ++static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as, ++ const struct kbase_mmu_hw_op_param *op_param) ++{ ++ unsigned long flags; + -+ line = list_first_entry(&context->userdata.input_head, -+ struct kutf_userdata_line, node); -+ if (line->str) { -+ /* -+ * Unless it is the end-of-input marker, -+ * remove it from the list -+ */ -+ list_del(&line->node); -+ } ++ /* AS transaction begin */ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ spin_unlock(&kutf_input_lock); ++ if (kbdev->pm.backend.gpu_powered && (kbase_mmu_hw_do_flush_locked(kbdev, as, op_param))) ++ dev_err(kbdev->dev, "Flush for GPU page table update did not complete"); + -+ if (str_size) -+ *str_size = line->size; -+ return line->str; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ /* AS transaction end */ +} + -+int kutf_helper_input_enqueue(struct kutf_context *context, -+ const char __user *str, size_t size) ++/** ++ * mmu_flush_invalidate() - Perform a flush operation on GPU caches. ++ * @kbdev: The Kbase device. ++ * @kctx: The Kbase context. ++ * @as_nr: GPU address space number for which flush + invalidate is required. ++ * @op_param: Non-NULL pointer to struct containing information about the MMU ++ * operation to perform. ++ * ++ * This function performs the cache flush operation described by @op_param. ++ * The function retains a reference to the given @kctx and releases it ++ * after performing the flush operation. ++ * ++ * If operation is set to KBASE_MMU_OP_FLUSH_PT then this function will issue ++ * a cache flush + invalidate to the L2 caches and invalidate the TLBs. ++ * ++ * If operation is set to KBASE_MMU_OP_FLUSH_MEM then this function will issue ++ * a cache flush + invalidate to the L2 and GPU Load/Store caches as well as ++ * invalidating the TLBs. ++ */ ++static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr, ++ const struct kbase_mmu_hw_op_param *op_param) +{ -+ struct kutf_userdata_line *line; ++ bool ctx_is_in_runpool; + -+ line = kutf_mempool_alloc(&context->fixture_pool, -+ sizeof(*line) + size + 1); -+ if (!line) -+ return -ENOMEM; -+ if (str) { -+ unsigned long bytes_not_copied; ++ /* Early out if there is nothing to do */ ++ if (op_param->nr == 0) ++ return; + -+ line->size = size; -+ line->str = (void *)(line + 1); -+ bytes_not_copied = copy_from_user(line->str, str, size); -+ if (bytes_not_copied != 0) -+ return -EFAULT; -+ /* Zero terminate the string */ -+ line->str[size] = '\0'; ++ /* If no context is provided then MMU operation is performed on address ++ * space which does not belong to user space context. Otherwise, retain ++ * refcount to context provided and release after flush operation. ++ */ ++ if (!kctx) { ++ mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], op_param); + } else { -+ /* This is used to mark the end of input */ -+ WARN_ON(size); -+ line->size = 0; -+ line->str = NULL; -+ } ++#if !MALI_USE_CSF ++ mutex_lock(&kbdev->js_data.queue_mutex); ++ ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx); ++ mutex_unlock(&kbdev->js_data.queue_mutex); ++#else ++ ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx); ++#endif /* !MALI_USE_CSF */ + -+ spin_lock(&kutf_input_lock); ++ if (ctx_is_in_runpool) { ++ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + -+ list_add_tail(&line->node, &context->userdata.input_head); ++ mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr], op_param); + -+ kutf_clear_waiting_for_input(context->result_set); ++ release_ctx(kbdev, kctx); ++ } ++ } ++} + -+ spin_unlock(&kutf_input_lock); ++/** ++ * mmu_flush_invalidate_on_gpu_ctrl() - Perform a flush operation on GPU caches via ++ * the GPU_CONTROL interface ++ * @kbdev: The Kbase device. ++ * @kctx: The Kbase context. ++ * @as_nr: GPU address space number for which flush + invalidate is required. ++ * @op_param: Non-NULL pointer to struct containing information about the MMU ++ * operation to perform. ++ * ++ * Perform a flush/invalidate on a particular address space via the GPU_CONTROL ++ * interface. ++ */ ++static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_context *kctx, ++ int as_nr, const struct kbase_mmu_hw_op_param *op_param) ++{ ++ unsigned long flags; + -+ wake_up(&context->userdata.input_waitq); ++ /* AS transaction begin */ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ return 0; -+} ++ if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) { ++ as_nr = kctx ? kctx->as_nr : as_nr; ++ if (kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr], op_param)) ++ dev_err(kbdev->dev, "Flush for GPU page table update did not complete"); ++ } + -+void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context) -+{ -+ kutf_helper_input_enqueue(context, NULL, 0); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); +} + -+void kutf_helper_ignore_dmesg(struct device *dev) ++static void kbase_mmu_sync_pgd_gpu(struct kbase_device *kbdev, struct kbase_context *kctx, ++ phys_addr_t phys, size_t size, ++ enum kbase_mmu_op_type flush_op) +{ -+ dev_info(dev, "KUTF: Start ignoring dmesg warnings\n"); ++ kbase_mmu_flush_pa_range(kbdev, kctx, phys, size, flush_op); +} -+EXPORT_SYMBOL(kutf_helper_ignore_dmesg); + -+void kutf_helper_stop_ignoring_dmesg(struct device *dev) ++static void kbase_mmu_sync_pgd_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size) +{ -+ dev_info(dev, "KUTF: Stop ignoring dmesg warnings\n"); ++ /* In non-coherent system, ensure the GPU can read ++ * the pages from memory ++ */ ++ if (kbdev->system_coherency == COHERENCY_NONE) ++ dma_sync_single_for_device(kbdev->dev, handle, size, ++ DMA_TO_DEVICE); +} -+EXPORT_SYMBOL(kutf_helper_stop_ignoring_dmesg); -diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers_user.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers_user.c -new file mode 100644 -index 000000000..c4e294325 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers_user.c -@@ -0,0 +1,474 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ ++/** ++ * kbase_mmu_sync_pgd() - sync page directory to memory when needed. ++ * @kbdev: Device pointer. ++ * @kctx: Context pointer. ++ * @phys: Starting physical address of the destination region. ++ * @handle: Address of DMA region. ++ * @size: Size of the region to sync. ++ * @flush_op: MMU cache flush operation to perform on the physical address ++ * range, if GPU control is available. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * This function is called whenever the association between a virtual address ++ * range and a physical address range changes, because a mapping is created or ++ * destroyed. ++ * One of the effects of this operation is performing an MMU cache flush ++ * operation only on the physical address range affected by this function, if ++ * GPU control is available. + * ++ * This should be called after each page directory update. + */ ++static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, struct kbase_context *kctx, ++ phys_addr_t phys, dma_addr_t handle, size_t size, ++ enum kbase_mmu_op_type flush_op) ++{ + -+/* Kernel UTF test helpers that mirror those for kutf-userside */ -+#include -+#include -+#include ++ kbase_mmu_sync_pgd_cpu(kbdev, handle, size); ++ kbase_mmu_sync_pgd_gpu(kbdev, kctx, phys, size, flush_op); ++} + -+#include -+#include -+#include ++/* ++ * Definitions: ++ * - PGD: Page Directory. ++ * - PTE: Page Table Entry. A 64bit value pointing to the next ++ * level of translation ++ * - ATE: Address Translation Entry. A 64bit value pointing to ++ * a 4kB physical page. ++ */ + -+static const char *const valtype_names[] = { -+ "INVALID", -+ "U64", -+ "STR", -+}; ++static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, ++ u64 vpfn, struct tagged_addr *phys, size_t nr, ++ unsigned long flags, int group_id, u64 *dirty_pgds); + -+static const char *get_val_type_name(enum kutf_helper_valtype valtype) ++/** ++ * kbase_mmu_update_and_free_parent_pgds() - Update number of valid entries and ++ * free memory of the page directories ++ * ++ * @kbdev: Device pointer. ++ * @mmut: GPU MMU page table. ++ * @pgds: Physical addresses of page directories to be freed. ++ * @vpfn: The virtual page frame number. ++ * @level: The level of MMU page table. ++ * @flush_op: The type of MMU flush operation to perform. ++ * @dirty_pgds: Flags to track every level where a PGD has been updated. ++ */ ++static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, ++ struct kbase_mmu_table *mmut, phys_addr_t *pgds, ++ u64 vpfn, int level, ++ enum kbase_mmu_op_type flush_op, u64 *dirty_pgds); ++ ++static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) +{ -+ /* enums can be signed or unsigned (implementation dependant), so -+ * enforce it to prevent: -+ * a) "<0 comparison on unsigned type" warning - if we did both upper -+ * and lower bound check -+ * b) incorrect range checking if it was a signed type - if we did -+ * upper bound check only -+ */ -+ unsigned int type_idx = (unsigned int)valtype; ++ atomic_sub(1, &kbdev->memdev.used_pages); + -+ if (type_idx >= (unsigned int)KUTF_HELPER_VALTYPE_COUNT) -+ type_idx = (unsigned int)KUTF_HELPER_VALTYPE_INVALID; ++ /* If MMU tables belong to a context then pages will have been accounted ++ * against it, so we must decrement the usage counts here. ++ */ ++ if (mmut->kctx) { ++ kbase_process_page_usage_dec(mmut->kctx, 1); ++ atomic_sub(1, &mmut->kctx->used_pages); ++ } + -+ return valtype_names[type_idx]; ++ kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); +} + -+/* Check up to str_len chars of val_str to see if it's a valid value name: -+ * -+ * - Has between 1 and KUTF_HELPER_MAX_VAL_NAME_LEN characters before the \0 terminator -+ * - And, each char is in the character set [A-Z0-9_] -+ */ -+static int validate_val_name(const char *val_str, int str_len) ++static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev, ++ struct kbase_mmu_table *mmut, ++ struct page *p) +{ -+ int i = 0; ++ struct kbase_page_metadata *page_md = kbase_page_private(p); ++ bool page_is_isolated = false; + -+ for (i = 0; str_len && i <= KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0'; ++i, --str_len) { -+ char val_chr = val_str[i]; ++ lockdep_assert_held(&mmut->mmu_lock); + -+ if (val_chr >= 'A' && val_chr <= 'Z') -+ continue; -+ if (val_chr >= '0' && val_chr <= '9') -+ continue; -+ if (val_chr == '_') -+ continue; ++ if (!kbase_page_migration_enabled) ++ return false; + -+ /* Character not in the set [A-Z0-9_] - report error */ -+ return 1; ++ spin_lock(&page_md->migrate_lock); ++ if (PAGE_STATUS_GET(page_md->status) == PT_MAPPED) { ++ WARN_ON_ONCE(!mmut->kctx); ++ if (IS_PAGE_ISOLATED(page_md->status)) { ++ page_md->status = PAGE_STATUS_SET(page_md->status, ++ FREE_PT_ISOLATED_IN_PROGRESS); ++ page_md->data.free_pt_isolated.kbdev = kbdev; ++ page_is_isolated = true; ++ } else { ++ page_md->status = ++ PAGE_STATUS_SET(page_md->status, FREE_IN_PROGRESS); ++ } ++ } else { ++ WARN_ON_ONCE(mmut->kctx); ++ WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != NOT_MOVABLE); + } ++ spin_unlock(&page_md->migrate_lock); + -+ /* Names of 0 length are not valid */ -+ if (i == 0) -+ return 1; -+ /* Length greater than KUTF_HELPER_MAX_VAL_NAME_LEN not allowed */ -+ if (i > KUTF_HELPER_MAX_VAL_NAME_LEN || (i == KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0')) -+ return 1; ++ if (unlikely(page_is_isolated)) { ++ /* Do the CPU cache flush and accounting here for the isolated ++ * PGD page, which is done inside kbase_mmu_free_pgd() for the ++ * PGD page that did not get isolated. ++ */ ++ dma_sync_single_for_device(kbdev->dev, kbase_dma_addr(p), PAGE_SIZE, ++ DMA_BIDIRECTIONAL); ++ kbase_mmu_account_freed_pgd(kbdev, mmut); ++ } + -+ return 0; ++ return page_is_isolated; +} + -+/* Find the length of the valid part of the string when it will be in quotes -+ * e.g. "str" ++/** ++ * kbase_mmu_free_pgd() - Free memory of the page directory + * -+ * That is, before any '\\', '\n' or '"' characters. This is so we don't have -+ * to escape the string ++ * @kbdev: Device pointer. ++ * @mmut: GPU MMU page table. ++ * @pgd: Physical address of page directory to be freed. ++ * ++ * This function is supposed to be called with mmu_lock held and after ++ * ensuring that GPU won't be able to access the page. + */ -+static int find_quoted_string_valid_len(const char *str) ++static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, ++ phys_addr_t pgd) +{ -+ char *ptr; -+ const char *check_chars = "\\\n\""; ++ struct page *p; ++ bool page_is_isolated = false; + -+ ptr = strpbrk(str, check_chars); -+ if (ptr) -+ return (int)(ptr-str); ++ lockdep_assert_held(&mmut->mmu_lock); + -+ return (int)strlen(str); ++ p = pfn_to_page(PFN_DOWN(pgd)); ++ page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p); ++ ++ if (likely(!page_is_isolated)) { ++ kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true); ++ kbase_mmu_account_freed_pgd(kbdev, mmut); ++ } +} + -+static int kutf_helper_userdata_enqueue(struct kutf_context *context, -+ const char *str) ++/** ++ * kbase_mmu_free_pgds_list() - Free the PGD pages present in the list ++ * ++ * @kbdev: Device pointer. ++ * @mmut: GPU MMU page table. ++ * ++ * This function will call kbase_mmu_free_pgd() on each page directory page ++ * present in the list of free PGDs inside @mmut. ++ * ++ * The function is supposed to be called after the GPU cache and MMU TLB has ++ * been invalidated post the teardown loop. ++ * ++ * The mmu_lock shall be held prior to calling the function. ++ */ ++static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) +{ -+ char *str_copy; -+ size_t len; -+ int err; -+ -+ len = strlen(str)+1; -+ -+ str_copy = kutf_mempool_alloc(&context->fixture_pool, len); -+ if (!str_copy) -+ return -ENOMEM; ++ size_t i; + -+ strcpy(str_copy, str); ++ lockdep_assert_held(&mmut->mmu_lock); + -+ err = kutf_add_result(context, KUTF_RESULT_USERDATA, str_copy); ++ for (i = 0; i < mmut->scratch_mem.free_pgds.head_index; i++) ++ kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(mmut->scratch_mem.free_pgds.pgds[i])); + -+ return err; ++ mmut->scratch_mem.free_pgds.head_index = 0; +} + -+#define MAX_U64_HEX_LEN 16 -+/* (Name size) + ("=0x" size) + (64-bit hex value size) + (terminator) */ -+#define NAMED_U64_VAL_BUF_SZ (KUTF_HELPER_MAX_VAL_NAME_LEN + 3 + MAX_U64_HEX_LEN + 1) -+ -+int kutf_helper_send_named_u64(struct kutf_context *context, -+ const char *val_name, u64 val) ++static void kbase_mmu_add_to_free_pgds_list(struct kbase_mmu_table *mmut, struct page *p) +{ -+ int ret = 1; -+ char msgbuf[NAMED_U64_VAL_BUF_SZ]; -+ const char *errmsg = NULL; -+ -+ if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) { -+ errmsg = kutf_dsprintf(&context->fixture_pool, -+ "Failed to send u64 value named '%s': Invalid value name", val_name); -+ goto out_err; -+ } -+ -+ ret = snprintf(msgbuf, NAMED_U64_VAL_BUF_SZ, "%s=0x%llx", val_name, val); -+ if (ret >= NAMED_U64_VAL_BUF_SZ || ret < 0) { -+ errmsg = kutf_dsprintf(&context->fixture_pool, -+ "Failed to send u64 value named '%s': snprintf() problem buffer size==%d ret=%d", -+ val_name, NAMED_U64_VAL_BUF_SZ, ret); -+ goto out_err; -+ } ++ lockdep_assert_held(&mmut->mmu_lock); + -+ ret = kutf_helper_userdata_enqueue(context, msgbuf); -+ if (ret) { -+ errmsg = kutf_dsprintf(&context->fixture_pool, -+ "Failed to send u64 value named '%s': send returned %d", -+ val_name, ret); -+ goto out_err; -+ } ++ if (WARN_ON_ONCE(mmut->scratch_mem.free_pgds.head_index > (MAX_FREE_PGDS - 1))) ++ return; + -+ return ret; -+out_err: -+ kutf_test_fail(context, errmsg); -+ return ret; ++ mmut->scratch_mem.free_pgds.pgds[mmut->scratch_mem.free_pgds.head_index++] = p; +} -+EXPORT_SYMBOL(kutf_helper_send_named_u64); -+ -+#define NAMED_VALUE_SEP "=" -+#define NAMED_STR_START_DELIM NAMED_VALUE_SEP "\"" -+#define NAMED_STR_END_DELIM "\"" + -+int kutf_helper_max_str_len_for_kern(const char *val_name, -+ int kern_buf_sz) ++static inline void kbase_mmu_reset_free_pgds_list(struct kbase_mmu_table *mmut) +{ -+ const int val_name_len = strlen(val_name); -+ const int start_delim_len = strlen(NAMED_STR_START_DELIM); -+ const int end_delim_len = strlen(NAMED_STR_END_DELIM); -+ int max_msg_len = kern_buf_sz; -+ int max_str_len; -+ -+ max_str_len = max_msg_len - val_name_len - start_delim_len - -+ end_delim_len; ++ lockdep_assert_held(&mmut->mmu_lock); + -+ return max_str_len; ++ mmut->scratch_mem.free_pgds.head_index = 0; +} -+EXPORT_SYMBOL(kutf_helper_max_str_len_for_kern); + -+int kutf_helper_send_named_str(struct kutf_context *context, -+ const char *val_name, -+ const char *val_str) ++/** ++ * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to ++ * a region on a GPU page fault ++ * @kbdev: KBase device ++ * @reg: The region that will be backed with more pages ++ * @fault_rel_pfn: PFN of the fault relative to the start of the region ++ * ++ * This calculates how much to increase the backing of a region by, based on ++ * where a GPU page fault occurred and the flags in the region. ++ * ++ * This can be more than the minimum number of pages that would reach ++ * @fault_rel_pfn, for example to reduce the overall rate of page fault ++ * interrupts on a region, or to ensure that the end address is aligned. ++ * ++ * Return: the number of backed pages to increase by ++ */ ++static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, ++ struct kbase_va_region *reg, size_t fault_rel_pfn) +{ -+ int val_str_len; -+ int str_buf_sz; -+ char *str_buf = NULL; -+ int ret = 1; -+ char *copy_ptr; -+ int val_name_len; -+ int start_delim_len = strlen(NAMED_STR_START_DELIM); -+ int end_delim_len = strlen(NAMED_STR_END_DELIM); -+ const char *errmsg = NULL; ++ size_t multiple = reg->extension; ++ size_t reg_current_size = kbase_reg_current_backed_size(reg); ++ size_t minimum_extra = fault_rel_pfn - reg_current_size + 1; ++ size_t remainder; + -+ if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) { -+ errmsg = kutf_dsprintf(&context->fixture_pool, -+ "Failed to send u64 value named '%s': Invalid value name", val_name); -+ goto out_err; ++ if (!multiple) { ++ dev_warn( ++ kbdev->dev, ++ "VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW", ++ ((unsigned long long)reg->start_pfn) << PAGE_SHIFT); ++ return minimum_extra; + } -+ val_name_len = strlen(val_name); -+ -+ val_str_len = find_quoted_string_valid_len(val_str); + -+ /* (name length) + ("=\"" length) + (val_str len) + ("\"" length) + terminator */ -+ str_buf_sz = val_name_len + start_delim_len + val_str_len + end_delim_len + 1; ++ /* Calculate the remainder to subtract from minimum_extra to make it ++ * the desired (rounded down) multiple of the extension. ++ * Depending on reg's flags, the base used for calculating multiples is ++ * different ++ */ + -+ /* Using kmalloc() here instead of mempool since we know we need to free -+ * before we return ++ /* multiple is based from the current backed size, even if the ++ * current backed size/pfn for end of committed memory are not ++ * themselves aligned to multiple + */ -+ str_buf = kmalloc(str_buf_sz, GFP_KERNEL); -+ if (!str_buf) { -+ errmsg = kutf_dsprintf(&context->fixture_pool, -+ "Failed to send str value named '%s': kmalloc failed, str_buf_sz=%d", -+ val_name, str_buf_sz); -+ goto out_err; ++ remainder = minimum_extra % multiple; ++ ++#if !MALI_USE_CSF ++ if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { ++ /* multiple is based from the top of the initial commit, which ++ * has been allocated in such a way that (start_pfn + ++ * initial_commit) is already aligned to multiple. Hence the ++ * pfn for the end of committed memory will also be aligned to ++ * multiple ++ */ ++ size_t initial_commit = reg->initial_commit; ++ ++ if (fault_rel_pfn < initial_commit) { ++ /* this case is just to catch in case it's been ++ * recommitted by userspace to be smaller than the ++ * initial commit ++ */ ++ minimum_extra = initial_commit - reg_current_size; ++ remainder = 0; ++ } else { ++ /* same as calculating ++ * (fault_rel_pfn - initial_commit + 1) ++ */ ++ size_t pages_after_initial = minimum_extra + ++ reg_current_size - initial_commit; ++ ++ remainder = pages_after_initial % multiple; ++ } + } -+ copy_ptr = str_buf; ++#endif /* !MALI_USE_CSF */ + -+ /* Manually copy each string component instead of snprintf because -+ * val_str may need to end early, and less error path handling ++ if (remainder == 0) ++ return minimum_extra; ++ ++ return minimum_extra + multiple - remainder; ++} ++ ++#ifdef CONFIG_MALI_CINSTR_GWT ++static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev, ++ struct kbase_as *faulting_as, ++ u64 start_pfn, size_t nr, ++ u32 kctx_id, u64 dirty_pgds) ++{ ++ /* Calls to this function are inherently synchronous, with respect to ++ * MMU operations. + */ ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; ++ struct kbase_mmu_hw_op_param op_param; ++ int ret = 0; + -+ /* name */ -+ memcpy(copy_ptr, val_name, val_name_len); -+ copy_ptr += val_name_len; ++ mutex_lock(&kbdev->mmu_hw_mutex); + -+ /* str start delimiter */ -+ memcpy(copy_ptr, NAMED_STR_START_DELIM, start_delim_len); -+ copy_ptr += start_delim_len; ++ kbase_mmu_hw_clear_fault(kbdev, faulting_as, ++ KBASE_MMU_FAULT_TYPE_PAGE); + -+ /* str value */ -+ memcpy(copy_ptr, val_str, val_str_len); -+ copy_ptr += val_str_len; ++ /* flush L2 and unlock the VA (resumes the MMU) */ ++ op_param.vpfn = start_pfn; ++ op_param.nr = nr; ++ op_param.op = KBASE_MMU_OP_FLUSH_PT; ++ op_param.kctx_id = kctx_id; ++ op_param.mmu_sync_info = mmu_sync_info; ++ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { ++ unsigned long irq_flags; + -+ /* str end delimiter */ -+ memcpy(copy_ptr, NAMED_STR_END_DELIM, end_delim_len); -+ copy_ptr += end_delim_len; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); ++ op_param.flush_skip_levels = ++ pgd_level_to_skip_flush(dirty_pgds); ++ ret = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, &op_param); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++ } else { ++ mmu_hw_operation_begin(kbdev); ++ ret = kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param); ++ mmu_hw_operation_end(kbdev); ++ } + -+ /* Terminator */ -+ *copy_ptr = '\0'; ++ mutex_unlock(&kbdev->mmu_hw_mutex); + -+ ret = kutf_helper_userdata_enqueue(context, str_buf); ++ if (ret) ++ dev_err(kbdev->dev, ++ "Flush for GPU page fault due to write access did not complete"); + -+ if (ret) { -+ errmsg = kutf_dsprintf(&context->fixture_pool, -+ "Failed to send str value named '%s': send returned %d", -+ val_name, ret); -+ goto out_err; -+ } ++ kbase_mmu_hw_enable_fault(kbdev, faulting_as, ++ KBASE_MMU_FAULT_TYPE_PAGE); ++} + -+ kfree(str_buf); -+ return ret; ++static void set_gwt_element_page_addr_and_size( ++ struct kbasep_gwt_list_element *element, ++ u64 fault_page_addr, struct tagged_addr fault_phys) ++{ ++ u64 fault_pfn = fault_page_addr >> PAGE_SHIFT; ++ unsigned int vindex = fault_pfn & (NUM_4K_PAGES_IN_2MB_PAGE - 1); + -+out_err: -+ kutf_test_fail(context, errmsg); -+ kfree(str_buf); -+ return ret; ++ /* If the fault address lies within a 2MB page, then consider ++ * the whole 2MB page for dumping to avoid incomplete dumps. ++ */ ++ if (is_huge(fault_phys) && (vindex == index_in_large_page(fault_phys))) { ++ element->page_addr = fault_page_addr & ~(SZ_2M - 1); ++ element->num_pages = NUM_4K_PAGES_IN_2MB_PAGE; ++ } else { ++ element->page_addr = fault_page_addr; ++ element->num_pages = 1; ++ } +} -+EXPORT_SYMBOL(kutf_helper_send_named_str); + -+int kutf_helper_receive_named_val( -+ struct kutf_context *context, -+ struct kutf_helper_named_val *named_val) ++static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, ++ struct kbase_as *faulting_as) +{ -+ size_t recv_sz; -+ char *recv_str; -+ char *search_ptr; -+ char *name_str = NULL; -+ int name_len; -+ int strval_len; -+ enum kutf_helper_valtype type = KUTF_HELPER_VALTYPE_INVALID; -+ char *strval = NULL; -+ u64 u64val = 0; -+ int err = KUTF_HELPER_ERR_INVALID_VALUE; ++ struct kbasep_gwt_list_element *pos; ++ struct kbase_va_region *region; ++ struct kbase_device *kbdev; ++ struct tagged_addr *fault_phys_addr; ++ struct kbase_fault *fault; ++ u64 fault_pfn, pfn_offset; ++ int as_no; ++ u64 dirty_pgds = 0; + -+ recv_str = kutf_helper_input_dequeue(context, &recv_sz); -+ if (!recv_str) -+ return -EBUSY; -+ else if (IS_ERR(recv_str)) -+ return PTR_ERR(recv_str); ++ as_no = faulting_as->number; ++ kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); ++ fault = &faulting_as->pf_data; ++ fault_pfn = fault->addr >> PAGE_SHIFT; + -+ /* Find the '=', grab the name and validate it */ -+ search_ptr = strnchr(recv_str, recv_sz, NAMED_VALUE_SEP[0]); -+ if (search_ptr) { -+ name_len = search_ptr - recv_str; -+ if (!validate_val_name(recv_str, name_len)) { -+ /* no need to reallocate - just modify string in place */ -+ name_str = recv_str; -+ name_str[name_len] = '\0'; ++ kbase_gpu_vm_lock(kctx); + -+ /* Move until after the '=' */ -+ recv_str += (name_len + 1); -+ recv_sz -= (name_len + 1); -+ } ++ /* Find region and check if it should be writable. */ ++ region = kbase_region_tracker_find_region_enclosing_address(kctx, ++ fault->addr); ++ if (kbase_is_region_invalid_or_free(region)) { ++ kbase_gpu_vm_unlock(kctx); ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Memory is not mapped on the GPU", ++ &faulting_as->pf_data); ++ return; + } -+ if (!name_str) { -+ pr_err("Invalid name part for received string '%s'\n", -+ recv_str); -+ return KUTF_HELPER_ERR_INVALID_NAME; ++ ++ if (!(region->flags & KBASE_REG_GPU_WR)) { ++ kbase_gpu_vm_unlock(kctx); ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Region does not have write permissions", ++ &faulting_as->pf_data); ++ return; + } + -+ /* detect value type */ -+ if (*recv_str == NAMED_STR_START_DELIM[1]) { -+ /* string delimiter start*/ -+ ++recv_str; -+ --recv_sz; ++ pfn_offset = fault_pfn - region->start_pfn; ++ fault_phys_addr = &kbase_get_gpu_phy_pages(region)[pfn_offset]; + -+ /* Find end of string */ -+ search_ptr = strnchr(recv_str, recv_sz, NAMED_STR_END_DELIM[0]); -+ if (search_ptr) { -+ strval_len = search_ptr - recv_str; -+ /* Validate the string to ensure it contains no quotes */ -+ if (strval_len == find_quoted_string_valid_len(recv_str)) { -+ /* no need to reallocate - just modify string in place */ -+ strval = recv_str; -+ strval[strval_len] = '\0'; ++ /* Capture addresses of faulting write location ++ * for job dumping if write tracking is enabled. ++ */ ++ if (kctx->gwt_enabled) { ++ u64 fault_page_addr = fault->addr & PAGE_MASK; ++ bool found = false; ++ /* Check if this write was already handled. */ ++ list_for_each_entry(pos, &kctx->gwt_current_list, link) { ++ if (fault_page_addr == pos->page_addr) { ++ found = true; ++ break; ++ } ++ } + -+ /* Move until after the end delimiter */ -+ recv_str += (strval_len + 1); -+ recv_sz -= (strval_len + 1); -+ type = KUTF_HELPER_VALTYPE_STR; ++ if (!found) { ++ pos = kmalloc(sizeof(*pos), GFP_KERNEL); ++ if (pos) { ++ pos->region = region; ++ set_gwt_element_page_addr_and_size(pos, ++ fault_page_addr, *fault_phys_addr); ++ list_add(&pos->link, &kctx->gwt_current_list); + } else { -+ pr_err("String value contains invalid characters in rest of received string '%s'\n", recv_str); -+ err = KUTF_HELPER_ERR_CHARS_AFTER_VAL; ++ dev_warn(kbdev->dev, "kmalloc failure"); + } -+ } else { -+ pr_err("End of string delimiter not found in rest of received string '%s'\n", recv_str); -+ err = KUTF_HELPER_ERR_NO_END_DELIMITER; -+ } -+ } else { -+ /* possibly a number value - strtoull will parse it */ -+ err = kstrtoull(recv_str, 0, &u64val); -+ /* unlike userspace can't get an end ptr, but if kstrtoull() -+ * reads characters after the number it'll report -EINVAL -+ */ -+ if (!err) { -+ int len_remain = strnlen(recv_str, recv_sz); -+ -+ type = KUTF_HELPER_VALTYPE_U64; -+ recv_str += len_remain; -+ recv_sz -= len_remain; -+ } else { -+ /* special case: not a number, report as such */ -+ pr_err("Rest of received string was not a numeric value or quoted string value: '%s'\n", recv_str); + } + } + -+ if (type == KUTF_HELPER_VALTYPE_INVALID) -+ return err; ++ /* Now make this faulting page writable to GPU. */ ++ kbase_mmu_update_pages_no_flush(kbdev, &kctx->mmu, fault_pfn, fault_phys_addr, 1, ++ region->flags, region->gpu_alloc->group_id, &dirty_pgds); + -+ /* Any remaining characters - error */ -+ if (strnlen(recv_str, recv_sz) != 0) { -+ pr_err("Characters remain after value of type %s: '%s'\n", -+ get_val_type_name(type), recv_str); -+ return KUTF_HELPER_ERR_CHARS_AFTER_VAL; -+ } ++ kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1, ++ kctx->id, dirty_pgds); + -+ /* Success - write into the output structure */ -+ switch (type) { -+ case KUTF_HELPER_VALTYPE_U64: -+ named_val->u.val_u64 = u64val; ++ kbase_gpu_vm_unlock(kctx); ++} ++ ++static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx, ++ struct kbase_as *faulting_as) ++{ ++ struct kbase_fault *fault = &faulting_as->pf_data; ++ ++ switch (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault->status)) { ++ case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: ++ case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: ++ kbase_gpu_mmu_handle_write_fault(kctx, faulting_as); + break; -+ case KUTF_HELPER_VALTYPE_STR: -+ named_val->u.val_str = strval; ++ case AS_FAULTSTATUS_ACCESS_TYPE_EX: ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Execute Permission fault", fault); ++ break; ++ case AS_FAULTSTATUS_ACCESS_TYPE_READ: ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Read Permission fault", fault); + break; + default: -+ pr_err("Unreachable, fix %s\n", __func__); -+ /* Coding error, report as though 'run' file failed */ -+ return -EINVAL; ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Unknown Permission fault", fault); ++ break; + } ++} ++#endif + -+ named_val->val_name = name_str; -+ named_val->type = type; ++/** ++ * estimate_pool_space_required - Determine how much a pool should be grown by to support a future ++ * allocation ++ * @pool: The memory pool to check, including its linked pools ++ * @pages_required: Number of 4KiB pages require for the pool to support a future allocation ++ * ++ * The value returned is accounting for the size of @pool and the size of each memory pool linked to ++ * @pool. Hence, the caller should use @pool and (if not already satisfied) all its linked pools to ++ * allocate from. ++ * ++ * Note: this is only an estimate, because even during the calculation the memory pool(s) involved ++ * can be updated to be larger or smaller. Hence, the result is only a guide as to whether an ++ * allocation could succeed, or an estimate of the correct amount to grow the pool by. The caller ++ * should keep attempting an allocation and then re-growing with a new value queried form this ++ * function until the allocation succeeds. ++ * ++ * Return: an estimate of the amount of extra 4KiB pages in @pool that are required to satisfy an ++ * allocation, or 0 if @pool (including its linked pools) is likely to already satisfy the ++ * allocation. ++ */ ++static size_t estimate_pool_space_required(struct kbase_mem_pool *pool, const size_t pages_required) ++{ ++ size_t pages_still_required; + -+ return KUTF_HELPER_ERR_NONE; ++ for (pages_still_required = pages_required; pool != NULL && pages_still_required; ++ pool = pool->next_pool) { ++ size_t pool_size_4k; ++ ++ kbase_mem_pool_lock(pool); ++ ++ pool_size_4k = kbase_mem_pool_size(pool) << pool->order; ++ if (pool_size_4k >= pages_still_required) ++ pages_still_required = 0; ++ else ++ pages_still_required -= pool_size_4k; ++ ++ kbase_mem_pool_unlock(pool); ++ } ++ return pages_still_required; +} -+EXPORT_SYMBOL(kutf_helper_receive_named_val); + -+#define DUMMY_MSG "" -+int kutf_helper_receive_check_val( -+ struct kutf_helper_named_val *named_val, -+ struct kutf_context *context, -+ const char *expect_val_name, -+ enum kutf_helper_valtype expect_val_type) ++/** ++ * page_fault_try_alloc - Try to allocate memory from a context pool ++ * @kctx: Context pointer ++ * @region: Region to grow ++ * @new_pages: Number of 4 KiB pages to allocate ++ * @pages_to_grow: Pointer to variable to store number of outstanding pages on failure. This can be ++ * either 4 KiB or 2 MiB pages, depending on the number of pages requested. ++ * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true for 2 MiB, false for ++ * 4 KiB. ++ * @prealloc_sas: Pointer to kbase_sub_alloc structures ++ * ++ * This function will try to allocate as many pages as possible from the context pool, then if ++ * required will try to allocate the remaining pages from the device pool. ++ * ++ * This function will not allocate any new memory beyond that is already present in the context or ++ * device pools. This is because it is intended to be called whilst the thread has acquired the ++ * region list lock with kbase_gpu_vm_lock(), and a large enough memory allocation whilst that is ++ * held could invoke the OoM killer and cause an effective deadlock with kbase_cpu_vm_close(). ++ * ++ * If 2 MiB pages are enabled and new_pages is >= 2 MiB then pages_to_grow will be a count of 2 MiB ++ * pages, otherwise it will be a count of 4 KiB pages. ++ * ++ * Return: true if successful, false on failure ++ */ ++static bool page_fault_try_alloc(struct kbase_context *kctx, ++ struct kbase_va_region *region, size_t new_pages, ++ int *pages_to_grow, bool *grow_2mb_pool, ++ struct kbase_sub_alloc **prealloc_sas) +{ -+ int err; ++ size_t total_gpu_pages_alloced = 0; ++ size_t total_cpu_pages_alloced = 0; ++ struct kbase_mem_pool *pool, *root_pool; ++ bool alloc_failed = false; ++ size_t pages_still_required; ++ size_t total_mempools_free_4k = 0; + -+ err = kutf_helper_receive_named_val(context, named_val); -+ if (err < 0) { -+ const char *msg = kutf_dsprintf(&context->fixture_pool, -+ "Failed to receive value named '%s'", -+ expect_val_name); -+ kutf_test_fail(context, msg); -+ return err; -+ } else if (err > 0) { -+ const char *msg = kutf_dsprintf(&context->fixture_pool, -+ "Named-value parse error when expecting value named '%s'", -+ expect_val_name); -+ kutf_test_fail(context, msg); -+ goto out_fail_and_fixup; ++ lockdep_assert_held(&kctx->reg_lock); ++ lockdep_assert_held(&kctx->mem_partials_lock); ++ ++ if (WARN_ON(region->gpu_alloc->group_id >= ++ MEMORY_GROUP_MANAGER_NR_GROUPS)) { ++ /* Do not try to grow the memory pool */ ++ *pages_to_grow = 0; ++ return false; + } + -+ if (named_val->val_name != NULL && -+ strcmp(named_val->val_name, expect_val_name) != 0) { -+ const char *msg = kutf_dsprintf(&context->fixture_pool, -+ "Expecting to receive value named '%s' but got '%s'", -+ expect_val_name, named_val->val_name); -+ kutf_test_fail(context, msg); -+ goto out_fail_and_fixup; ++ if (kctx->kbdev->pagesize_2mb && new_pages >= (SZ_2M / SZ_4K)) { ++ root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id]; ++ *grow_2mb_pool = true; ++ } else { ++ root_pool = &kctx->mem_pools.small[region->gpu_alloc->group_id]; ++ *grow_2mb_pool = false; + } + ++ if (region->gpu_alloc != region->cpu_alloc) ++ new_pages *= 2; + -+ if (named_val->type != expect_val_type) { -+ const char *msg = kutf_dsprintf(&context->fixture_pool, -+ "Expecting value named '%s' to be of type %s but got %s", -+ expect_val_name, get_val_type_name(expect_val_type), -+ get_val_type_name(named_val->type)); -+ kutf_test_fail(context, msg); -+ goto out_fail_and_fixup; ++ /* Determine how many pages are in the pools before trying to allocate. ++ * Don't attempt to allocate & free if the allocation can't succeed. ++ */ ++ pages_still_required = estimate_pool_space_required(root_pool, new_pages); ++ ++ if (pages_still_required) { ++ /* Insufficient pages in pools. Don't try to allocate - just ++ * request a grow. ++ */ ++ *pages_to_grow = pages_still_required; ++ ++ return false; + } + -+ return err; ++ /* Since we're not holding any of the mempool locks, the amount of memory in the pools may ++ * change between the above estimate and the actual allocation. ++ */ ++ pages_still_required = new_pages; ++ for (pool = root_pool; pool != NULL && pages_still_required; pool = pool->next_pool) { ++ size_t pool_size_4k; ++ size_t pages_to_alloc_4k; ++ size_t pages_to_alloc_4k_per_alloc; + -+out_fail_and_fixup: -+ /* Produce a valid but incorrect value */ -+ switch (expect_val_type) { -+ case KUTF_HELPER_VALTYPE_U64: -+ named_val->u.val_u64 = 0ull; -+ break; -+ case KUTF_HELPER_VALTYPE_STR: -+ { -+ char *str = kutf_mempool_alloc(&context->fixture_pool, sizeof(DUMMY_MSG)); ++ kbase_mem_pool_lock(pool); + -+ if (!str) -+ return -1; ++ /* Allocate as much as possible from this pool*/ ++ pool_size_4k = kbase_mem_pool_size(pool) << pool->order; ++ total_mempools_free_4k += pool_size_4k; ++ pages_to_alloc_4k = MIN(pages_still_required, pool_size_4k); ++ if (region->gpu_alloc == region->cpu_alloc) ++ pages_to_alloc_4k_per_alloc = pages_to_alloc_4k; ++ else ++ pages_to_alloc_4k_per_alloc = pages_to_alloc_4k >> 1; + -+ strcpy(str, DUMMY_MSG); -+ named_val->u.val_str = str; -+ break; -+ } -+ default: -+ break; -+ } ++ if (pages_to_alloc_4k) { ++ struct tagged_addr *gpu_pages = ++ kbase_alloc_phy_pages_helper_locked(region->gpu_alloc, pool, ++ pages_to_alloc_4k_per_alloc, ++ &prealloc_sas[0]); + -+ /* Indicate that this is invalid */ -+ named_val->type = KUTF_HELPER_VALTYPE_INVALID; ++ if (!gpu_pages) ++ alloc_failed = true; ++ else ++ total_gpu_pages_alloced += pages_to_alloc_4k_per_alloc; + -+ /* But at least allow the caller to continue in the test with failures */ -+ return 0; -+} -+EXPORT_SYMBOL(kutf_helper_receive_check_val); ++ if (!alloc_failed && region->gpu_alloc != region->cpu_alloc) { ++ struct tagged_addr *cpu_pages = kbase_alloc_phy_pages_helper_locked( ++ region->cpu_alloc, pool, pages_to_alloc_4k_per_alloc, ++ &prealloc_sas[1]); + -+void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val) -+{ -+ switch (named_val->type) { -+ case KUTF_HELPER_VALTYPE_U64: -+ pr_warn("%s=0x%llx\n", named_val->val_name, named_val->u.val_u64); -+ break; -+ case KUTF_HELPER_VALTYPE_STR: -+ pr_warn("%s=\"%s\"\n", named_val->val_name, named_val->u.val_str); -+ break; -+ case KUTF_HELPER_VALTYPE_INVALID: -+ pr_warn("%s is invalid\n", named_val->val_name); -+ break; -+ default: -+ pr_warn("%s has unknown type %d\n", named_val->val_name, named_val->type); -+ break; -+ } -+} -+EXPORT_SYMBOL(kutf_helper_output_named_val); -diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_mem.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_mem.c -new file mode 100644 -index 000000000..716970abb ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_mem.c -@@ -0,0 +1,107 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ if (!cpu_pages) ++ alloc_failed = true; ++ else ++ total_cpu_pages_alloced += pages_to_alloc_4k_per_alloc; ++ } ++ } + -+/* Kernel UTF memory management functions */ ++ kbase_mem_pool_unlock(pool); + -+#include -+#include -+#include ++ if (alloc_failed) { ++ WARN_ON(!pages_still_required); ++ WARN_ON(pages_to_alloc_4k >= pages_still_required); ++ WARN_ON(pages_to_alloc_4k_per_alloc >= pages_still_required); ++ break; ++ } + -+#include ++ pages_still_required -= pages_to_alloc_4k; ++ } + ++ if (pages_still_required) { ++ /* Allocation was unsuccessful. We have dropped the mem_pool lock after allocation, ++ * so must in any case use kbase_free_phy_pages_helper() rather than ++ * kbase_free_phy_pages_helper_locked() ++ */ ++ if (total_gpu_pages_alloced > 0) ++ kbase_free_phy_pages_helper(region->gpu_alloc, total_gpu_pages_alloced); ++ if (region->gpu_alloc != region->cpu_alloc && total_cpu_pages_alloced > 0) ++ kbase_free_phy_pages_helper(region->cpu_alloc, total_cpu_pages_alloced); + -+/** -+ * struct kutf_alloc_entry - Structure representing an allocation. -+ * @node: List node for use with kutf_mempool. -+ * @data: Data area of the allocation -+ */ -+struct kutf_alloc_entry { -+ struct list_head node; -+ u8 data[0]; -+}; ++ if (alloc_failed) { ++ /* Note that in allocating from the above memory pools, we always ensure ++ * never to request more than is available in each pool with the pool's ++ * lock held. Hence failing to allocate in such situations would be unusual ++ * and we should cancel the growth instead (as re-growing the memory pool ++ * might not fix the situation) ++ */ ++ dev_warn( ++ kctx->kbdev->dev, ++ "Page allocation failure of %zu pages: managed %zu pages, mempool (inc linked pools) had %zu pages available", ++ new_pages, total_gpu_pages_alloced + total_cpu_pages_alloced, ++ total_mempools_free_4k); ++ *pages_to_grow = 0; ++ } else { ++ /* Tell the caller to try to grow the memory pool ++ * ++ * Freeing pages above may have spilled or returned them to the OS, so we ++ * have to take into account how many are still in the pool before giving a ++ * new estimate for growth required of the pool. We can just re-estimate a ++ * new value. ++ */ ++ pages_still_required = estimate_pool_space_required(root_pool, new_pages); ++ if (pages_still_required) { ++ *pages_to_grow = pages_still_required; ++ } else { ++ /* It's possible another thread could've grown the pool to be just ++ * big enough after we rolled back the allocation. Request at least ++ * one more page to ensure the caller doesn't fail the growth by ++ * conflating it with the alloc_failed case above ++ */ ++ *pages_to_grow = 1u; ++ } ++ } + -+int kutf_mempool_init(struct kutf_mempool *pool) -+{ -+ if (!pool) { -+ pr_err("NULL pointer passed to %s\n", __func__); -+ return -1; ++ return false; + } + -+ INIT_LIST_HEAD(&pool->head); -+ mutex_init(&pool->lock); ++ /* Allocation was successful. No pages to grow, return success. */ ++ *pages_to_grow = 0; + -+ return 0; ++ return true; +} -+EXPORT_SYMBOL(kutf_mempool_init); + -+void kutf_mempool_destroy(struct kutf_mempool *pool) ++void kbase_mmu_page_fault_worker(struct work_struct *data) +{ -+ struct list_head *remove; -+ struct list_head *tmp; ++ u64 fault_pfn; ++ u32 fault_status; ++ size_t new_pages; ++ size_t fault_rel_pfn; ++ struct kbase_as *faulting_as; ++ int as_no; ++ struct kbase_context *kctx; ++ struct kbase_device *kbdev; ++ struct kbase_va_region *region; ++ struct kbase_fault *fault; ++ int err; ++ bool grown = false; ++ int pages_to_grow; ++ bool grow_2mb_pool; ++ struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; ++ int i; ++ size_t current_backed_size; ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ size_t pages_trimmed = 0; ++#endif + -+ if (!pool) { -+ pr_err("NULL pointer passed to %s\n", __func__); -+ return; -+ } ++ /* Calls to this function are inherently synchronous, with respect to ++ * MMU operations. ++ */ ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; + -+ mutex_lock(&pool->lock); -+ list_for_each_safe(remove, tmp, &pool->head) { -+ struct kutf_alloc_entry *remove_alloc; ++ faulting_as = container_of(data, struct kbase_as, work_pagefault); ++ fault = &faulting_as->pf_data; ++ fault_pfn = fault->addr >> PAGE_SHIFT; ++ as_no = faulting_as->number; + -+ remove_alloc = list_entry(remove, struct kutf_alloc_entry, node); -+ list_del(&remove_alloc->node); -+ kfree(remove_alloc); ++ kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); ++ dev_dbg(kbdev->dev, "Entering %s %pK, fault_pfn %lld, as_no %d", __func__, (void *)data, ++ fault_pfn, as_no); ++ ++ /* Grab the context that was already refcounted in kbase_mmu_interrupt() ++ * Therefore, it cannot be scheduled out of this AS until we explicitly ++ * release it ++ */ ++ kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_no); ++ if (!kctx) { ++ atomic_dec(&kbdev->faults_pending); ++ return; + } -+ mutex_unlock(&pool->lock); + -+} -+EXPORT_SYMBOL(kutf_mempool_destroy); ++ KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev); + -+void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size) -+{ -+ struct kutf_alloc_entry *ret; ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++#if !MALI_USE_CSF ++ mutex_lock(&kctx->jctx.lock); ++#endif ++#endif + -+ if (!pool) { -+ pr_err("NULL pointer passed to %s\n", __func__); -+ goto fail_pool; ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ /* check if we still have GPU */ ++ if (unlikely(kbase_is_gpu_removed(kbdev))) { ++ dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__); ++ goto fault_done; + } ++#endif + -+ mutex_lock(&pool->lock); ++ if (unlikely(fault->protected_mode)) { ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Protected mode fault", fault); ++ kbase_mmu_hw_clear_fault(kbdev, faulting_as, ++ KBASE_MMU_FAULT_TYPE_PAGE); + -+ ret = kmalloc(sizeof(*ret) + size, GFP_KERNEL); -+ if (!ret) { -+ pr_err("Failed to allocate memory\n"); -+ goto fail_alloc; ++ goto fault_done; + } + -+ INIT_LIST_HEAD(&ret->node); -+ list_add(&ret->node, &pool->head); ++ fault_status = fault->status; ++ switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) { + -+ mutex_unlock(&pool->lock); ++ case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT: ++ /* need to check against the region to handle this one */ ++ break; + -+ return &ret->data[0]; ++ case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT: ++#ifdef CONFIG_MALI_CINSTR_GWT ++ /* If GWT was ever enabled then we need to handle ++ * write fault pages even if the feature was disabled later. ++ */ ++ if (kctx->gwt_was_enabled) { ++ kbase_gpu_mmu_handle_permission_fault(kctx, ++ faulting_as); ++ goto fault_done; ++ } ++#endif + -+fail_alloc: -+ mutex_unlock(&pool->lock); -+fail_pool: -+ return NULL; -+} -+EXPORT_SYMBOL(kutf_mempool_alloc); -diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_resultset.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_resultset.c -new file mode 100644 -index 000000000..3a7ade283 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_resultset.c -@@ -0,0 +1,163 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Permission failure", fault); ++ goto fault_done; + -+/* Kernel UTF result management functions */ ++ case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT: ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Translation table bus fault", fault); ++ goto fault_done; + -+#include -+#include -+#include -+#include -+#include -+#include ++ case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG: ++ /* nothing to do, but we don't expect this fault currently */ ++ dev_warn(kbdev->dev, "Access flag unexpectedly set"); ++ goto fault_done; + -+#include -+#include ++ case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT: ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Address size fault", fault); ++ goto fault_done; + -+/* Lock to protect all result structures */ -+static DEFINE_SPINLOCK(kutf_result_lock); ++ case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT: ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Memory attributes fault", fault); ++ goto fault_done; + -+struct kutf_result_set *kutf_create_result_set(void) -+{ -+ struct kutf_result_set *set; ++ default: ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Unknown fault code", fault); ++ goto fault_done; ++ } + -+ set = kmalloc(sizeof(*set), GFP_KERNEL); -+ if (!set) { -+ pr_err("Failed to allocate resultset"); -+ goto fail_alloc; ++page_fault_retry: ++ if (kbdev->pagesize_2mb) { ++ /* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */ ++ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { ++ if (!prealloc_sas[i]) { ++ prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); ++ ++ if (!prealloc_sas[i]) { ++ kbase_mmu_report_fault_and_kill( ++ kctx, faulting_as, ++ "Failed pre-allocating memory for sub-allocations' metadata", ++ fault); ++ goto fault_done; ++ } ++ } ++ } + } + -+ INIT_LIST_HEAD(&set->results); -+ init_waitqueue_head(&set->waitq); -+ set->flags = 0; ++ /* so we have a translation fault, ++ * let's see if it is for growable memory ++ */ ++ kbase_gpu_vm_lock(kctx); + -+ return set; ++ region = kbase_region_tracker_find_region_enclosing_address(kctx, ++ fault->addr); ++ if (kbase_is_region_invalid_or_free(region)) { ++ kbase_gpu_vm_unlock(kctx); ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Memory is not mapped on the GPU", fault); ++ goto fault_done; ++ } + -+fail_alloc: -+ return NULL; -+} ++ if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { ++ kbase_gpu_vm_unlock(kctx); ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "DMA-BUF is not mapped on the GPU", fault); ++ goto fault_done; ++ } + -+int kutf_add_result(struct kutf_context *context, -+ enum kutf_result_status status, -+ const char *message) -+{ -+ struct kutf_mempool *mempool = &context->fixture_pool; -+ struct kutf_result_set *set = context->result_set; -+ /* Create the new result */ -+ struct kutf_result *new_result; ++ if (region->gpu_alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) { ++ kbase_gpu_vm_unlock(kctx); ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Bad physical memory group ID", fault); ++ goto fault_done; ++ } + -+ BUG_ON(set == NULL); ++ if ((region->flags & GROWABLE_FLAGS_REQUIRED) ++ != GROWABLE_FLAGS_REQUIRED) { ++ kbase_gpu_vm_unlock(kctx); ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Memory is not growable", fault); ++ goto fault_done; ++ } + -+ new_result = kutf_mempool_alloc(mempool, sizeof(*new_result)); -+ if (!new_result) { -+ pr_err("Result allocation failed\n"); -+ return -ENOMEM; ++ if ((region->flags & KBASE_REG_DONT_NEED)) { ++ kbase_gpu_vm_unlock(kctx); ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Don't need memory can't be grown", fault); ++ goto fault_done; + } + -+ INIT_LIST_HEAD(&new_result->node); -+ new_result->status = status; -+ new_result->message = message; ++ if (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault_status) == ++ AS_FAULTSTATUS_ACCESS_TYPE_READ) ++ dev_warn(kbdev->dev, "Grow on pagefault while reading"); + -+ spin_lock(&kutf_result_lock); ++ /* find the size we need to grow it by ++ * we know the result fit in a size_t due to ++ * kbase_region_tracker_find_region_enclosing_address ++ * validating the fault_address to be within a size_t from the start_pfn ++ */ ++ fault_rel_pfn = fault_pfn - region->start_pfn; + -+ list_add_tail(&new_result->node, &set->results); ++ current_backed_size = kbase_reg_current_backed_size(region); + -+ spin_unlock(&kutf_result_lock); ++ if (fault_rel_pfn < current_backed_size) { ++ struct kbase_mmu_hw_op_param op_param; + -+ wake_up(&set->waitq); ++ dev_dbg(kbdev->dev, ++ "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring", ++ fault->addr, region->start_pfn, ++ region->start_pfn + ++ current_backed_size); + -+ return 0; -+} ++ mutex_lock(&kbdev->mmu_hw_mutex); + -+void kutf_destroy_result_set(struct kutf_result_set *set) -+{ -+ if (!list_empty(&set->results)) -+ pr_err("%s: Unread results from test\n", __func__); ++ kbase_mmu_hw_clear_fault(kbdev, faulting_as, ++ KBASE_MMU_FAULT_TYPE_PAGE); ++ /* [1] in case another page fault occurred while we were ++ * handling the (duplicate) page fault we need to ensure we ++ * don't loose the other page fault as result of us clearing ++ * the MMU IRQ. Therefore, after we clear the MMU IRQ we send ++ * an UNLOCK command that will retry any stalled memory ++ * transaction (which should cause the other page fault to be ++ * raised again). ++ */ ++ op_param.mmu_sync_info = mmu_sync_info; ++ op_param.kctx_id = kctx->id; ++ if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { ++ mmu_hw_operation_begin(kbdev); ++ err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as, ++ &op_param); ++ mmu_hw_operation_end(kbdev); ++ } else { ++ /* Can safely skip the invalidate for all levels in case ++ * of duplicate page faults. ++ */ ++ op_param.flush_skip_levels = 0xF; ++ op_param.vpfn = fault_pfn; ++ op_param.nr = 1; ++ err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, ++ &op_param); ++ } + -+ kfree(set); -+} ++ if (err) { ++ dev_err(kbdev->dev, ++ "Invalidation for MMU did not complete on handling page fault @ 0x%llx", ++ fault->addr); ++ } + -+static bool kutf_has_result(struct kutf_result_set *set) -+{ -+ bool has_result; ++ mutex_unlock(&kbdev->mmu_hw_mutex); + -+ spin_lock(&kutf_result_lock); -+ if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT) -+ /* Pretend there are results if waiting for input */ -+ has_result = true; -+ else -+ has_result = !list_empty(&set->results); -+ spin_unlock(&kutf_result_lock); ++ kbase_mmu_hw_enable_fault(kbdev, faulting_as, ++ KBASE_MMU_FAULT_TYPE_PAGE); ++ kbase_gpu_vm_unlock(kctx); + -+ return has_result; -+} ++ goto fault_done; ++ } + -+struct kutf_result *kutf_remove_result(struct kutf_result_set *set) -+{ -+ struct kutf_result *result = NULL; -+ int ret; ++ new_pages = reg_grow_calc_extra_pages(kbdev, region, fault_rel_pfn); + -+ do { -+ ret = wait_event_interruptible(set->waitq, -+ kutf_has_result(set)); ++ /* cap to max vsize */ ++ new_pages = min(new_pages, region->nr_pages - current_backed_size); ++ dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault", new_pages); + -+ if (ret) -+ return ERR_PTR(ret); ++ if (new_pages == 0) { ++ struct kbase_mmu_hw_op_param op_param; + -+ spin_lock(&kutf_result_lock); ++ mutex_lock(&kbdev->mmu_hw_mutex); + -+ if (!list_empty(&set->results)) { -+ result = list_first_entry(&set->results, -+ struct kutf_result, -+ node); -+ list_del(&result->node); -+ } else if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT) { -+ /* Return a fake result */ -+ static struct kutf_result waiting = { -+ .status = KUTF_RESULT_USERDATA_WAIT -+ }; -+ result = &waiting; ++ /* Duplicate of a fault we've already handled, nothing to do */ ++ kbase_mmu_hw_clear_fault(kbdev, faulting_as, ++ KBASE_MMU_FAULT_TYPE_PAGE); ++ ++ /* See comment [1] about UNLOCK usage */ ++ op_param.mmu_sync_info = mmu_sync_info; ++ op_param.kctx_id = kctx->id; ++ if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { ++ mmu_hw_operation_begin(kbdev); ++ err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as, ++ &op_param); ++ mmu_hw_operation_end(kbdev); ++ } else { ++ /* Can safely skip the invalidate for all levels in case ++ * of duplicate page faults. ++ */ ++ op_param.flush_skip_levels = 0xF; ++ op_param.vpfn = fault_pfn; ++ op_param.nr = 1; ++ err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, ++ &op_param); + } -+ /* If result == NULL then there was a race with the event -+ * being removed between the check in kutf_has_result and -+ * the lock being obtained. In this case we retry -+ */ + -+ spin_unlock(&kutf_result_lock); -+ } while (result == NULL); ++ if (err) { ++ dev_err(kbdev->dev, ++ "Invalidation for MMU did not complete on handling page fault @ 0x%llx", ++ fault->addr); ++ } + -+ return result; -+} ++ mutex_unlock(&kbdev->mmu_hw_mutex); + -+void kutf_set_waiting_for_input(struct kutf_result_set *set) -+{ -+ spin_lock(&kutf_result_lock); -+ set->flags |= KUTF_RESULT_SET_WAITING_FOR_INPUT; -+ spin_unlock(&kutf_result_lock); ++ kbase_mmu_hw_enable_fault(kbdev, faulting_as, ++ KBASE_MMU_FAULT_TYPE_PAGE); ++ kbase_gpu_vm_unlock(kctx); ++ goto fault_done; ++ } + -+ wake_up(&set->waitq); -+} ++ pages_to_grow = 0; + -+void kutf_clear_waiting_for_input(struct kutf_result_set *set) -+{ -+ spin_lock(&kutf_result_lock); -+ set->flags &= ~KUTF_RESULT_SET_WAITING_FOR_INPUT; -+ spin_unlock(&kutf_result_lock); -+} -diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_suite.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_suite.c -new file mode 100644 -index 000000000..4468066f1 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_suite.c -@@ -0,0 +1,1216 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2014, 2017-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if ((region->flags & KBASE_REG_ACTIVE_JIT_ALLOC) && !pages_trimmed) { ++ kbase_jit_request_phys_increase(kctx, new_pages); ++ pages_trimmed = new_pages; ++ } ++#endif + -+/* Kernel UTF suite, test and fixture management including user to kernel -+ * interaction -+ */ ++ spin_lock(&kctx->mem_partials_lock); ++ grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow, ++ &grow_2mb_pool, prealloc_sas); ++ spin_unlock(&kctx->mem_partials_lock); + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ if (grown) { ++ u64 dirty_pgds = 0; ++ u64 pfn_offset; ++ struct kbase_mmu_hw_op_param op_param; + -+#include ++ /* alloc success */ ++ WARN_ON(kbase_reg_current_backed_size(region) > ++ region->nr_pages); + -+#include -+#include -+#include -+#include ++ /* set up the new pages */ ++ pfn_offset = kbase_reg_current_backed_size(region) - new_pages; ++ /* ++ * Note: ++ * Issuing an MMU operation will unlock the MMU and cause the ++ * translation to be replayed. If the page insertion fails then ++ * rather then trying to continue the context should be killed ++ * so the no_flush version of insert_pages is used which allows ++ * us to unlock the MMU as we see fit. ++ */ ++ err = kbase_mmu_insert_pages_no_flush( ++ kbdev, &kctx->mmu, region->start_pfn + pfn_offset, ++ &kbase_get_gpu_phy_pages(region)[pfn_offset], new_pages, region->flags, ++ region->gpu_alloc->group_id, &dirty_pgds, region, false); ++ if (err) { ++ kbase_free_phy_pages_helper(region->gpu_alloc, ++ new_pages); ++ if (region->gpu_alloc != region->cpu_alloc) ++ kbase_free_phy_pages_helper(region->cpu_alloc, ++ new_pages); ++ kbase_gpu_vm_unlock(kctx); ++ /* The locked VA region will be unlocked and the cache ++ * invalidated in here ++ */ ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Page table update failure", fault); ++ goto fault_done; ++ } ++ KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, kctx->id, as_no, ++ (u64)new_pages); ++ trace_mali_mmu_page_fault_grow(region, fault, new_pages); + -+/** -+ * struct kutf_application - Structure which represents kutf application -+ * @name: The name of this test application. -+ * @dir: The debugfs directory for this test -+ * @suite_list: List head to store all the suites which are part of this -+ * application -+ */ -+struct kutf_application { -+ const char *name; -+ struct dentry *dir; -+ struct list_head suite_list; -+}; ++#if MALI_INCREMENTAL_RENDERING_JM ++ /* Switch to incremental rendering if we have nearly run out of ++ * memory in a JIT memory allocation. ++ */ ++ if (region->threshold_pages && ++ kbase_reg_current_backed_size(region) > ++ region->threshold_pages) { ++ dev_dbg(kctx->kbdev->dev, "%zu pages exceeded IR threshold %zu", ++ new_pages + current_backed_size, region->threshold_pages); + -+/** -+ * struct kutf_test_function - Structure which represents kutf test function -+ * @suite: Back reference to the suite this test function -+ * belongs to -+ * @filters: Filters that apply to this test function -+ * @test_id: Test ID -+ * @execute: Function to run for this test -+ * @test_data: Static data for this test -+ * @node: List node for test_list -+ * @variant_list: List head to store all the variants which can run on -+ * this function -+ * @dir: debugfs directory for this test function -+ */ -+struct kutf_test_function { -+ struct kutf_suite *suite; -+ unsigned int filters; -+ unsigned int test_id; -+ void (*execute)(struct kutf_context *context); -+ union kutf_callback_data test_data; -+ struct list_head node; -+ struct list_head variant_list; -+ struct dentry *dir; -+}; ++ if (kbase_mmu_switch_to_ir(kctx, region) >= 0) { ++ dev_dbg(kctx->kbdev->dev, "Get region %pK for IR", (void *)region); ++ kbase_va_region_alloc_get(kctx, region); ++ } ++ } ++#endif + -+/** -+ * struct kutf_test_fixture - Structure which holds information on the kutf -+ * test fixture -+ * @test_func: Test function this fixture belongs to -+ * @fixture_index: Index of this fixture -+ * @node: List node for variant_list -+ * @dir: debugfs directory for this test fixture -+ */ -+struct kutf_test_fixture { -+ struct kutf_test_function *test_func; -+ unsigned int fixture_index; -+ struct list_head node; -+ struct dentry *dir; -+}; ++ /* AS transaction begin */ ++ mutex_lock(&kbdev->mmu_hw_mutex); + -+static struct dentry *base_dir; -+static struct workqueue_struct *kutf_workq; ++ /* clear MMU interrupt - this needs to be done after updating ++ * the page tables but before issuing a FLUSH command. The ++ * FLUSH cmd has a side effect that it restarts stalled memory ++ * transactions in other address spaces which may cause ++ * another fault to occur. If we didn't clear the interrupt at ++ * this stage a new IRQ might not be raised when the GPU finds ++ * a MMU IRQ is already pending. ++ */ ++ kbase_mmu_hw_clear_fault(kbdev, faulting_as, ++ KBASE_MMU_FAULT_TYPE_PAGE); + -+/** -+ * struct kutf_convert_table - Structure which keeps test results -+ * @result_name: Status of the test result -+ * @result: Status value for a single test -+ */ -+struct kutf_convert_table { -+ char result_name[50]; -+ enum kutf_result_status result; -+}; ++ op_param.vpfn = region->start_pfn + pfn_offset; ++ op_param.nr = new_pages; ++ op_param.op = KBASE_MMU_OP_FLUSH_PT; ++ op_param.kctx_id = kctx->id; ++ op_param.mmu_sync_info = mmu_sync_info; ++ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { ++ /* Unlock to invalidate the TLB (and resume the MMU) */ ++ op_param.flush_skip_levels = ++ pgd_level_to_skip_flush(dirty_pgds); ++ err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, ++ &op_param); ++ } else { ++ /* flush L2 and unlock the VA (resumes the MMU) */ ++ mmu_hw_operation_begin(kbdev); ++ err = kbase_mmu_hw_do_flush(kbdev, faulting_as, ++ &op_param); ++ mmu_hw_operation_end(kbdev); ++ } + -+static const struct kutf_convert_table kutf_convert[] = { -+#define ADD_UTF_RESULT(_name) \ -+ { \ -+#_name, _name, \ -+ } -+ ADD_UTF_RESULT(KUTF_RESULT_BENCHMARK), ADD_UTF_RESULT(KUTF_RESULT_SKIP), -+ ADD_UTF_RESULT(KUTF_RESULT_UNKNOWN), ADD_UTF_RESULT(KUTF_RESULT_PASS), -+ ADD_UTF_RESULT(KUTF_RESULT_DEBUG), ADD_UTF_RESULT(KUTF_RESULT_INFO), -+ ADD_UTF_RESULT(KUTF_RESULT_WARN), ADD_UTF_RESULT(KUTF_RESULT_FAIL), -+ ADD_UTF_RESULT(KUTF_RESULT_FATAL), ADD_UTF_RESULT(KUTF_RESULT_ABORT), -+}; ++ if (err) { ++ dev_err(kbdev->dev, ++ "Flush for GPU page table update did not complete on handling page fault @ 0x%llx", ++ fault->addr); ++ } + -+#define UTF_CONVERT_SIZE (ARRAY_SIZE(kutf_convert)) ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ /* AS transaction end */ + -+/** -+ * kutf_create_context() - Create a test context in which a specific fixture -+ * of an application will be run and its results -+ * reported back to the user -+ * @test_fix: Test fixture to be run. -+ * -+ * The context's refcount will be initialized to 1. -+ * -+ * Return: Returns the created test context on success or NULL on failure -+ */ -+static struct kutf_context *kutf_create_context( -+ struct kutf_test_fixture *test_fix); ++ /* reenable this in the mask */ ++ kbase_mmu_hw_enable_fault(kbdev, faulting_as, ++ KBASE_MMU_FAULT_TYPE_PAGE); + -+/** -+ * kutf_destroy_context() - Destroy a previously created test context, only -+ * once its refcount has become zero -+ * @kref: pointer to kref member within the context -+ * -+ * This should only be used via a kref_put() call on the context's kref member -+ */ -+static void kutf_destroy_context(struct kref *kref); ++#ifdef CONFIG_MALI_CINSTR_GWT ++ if (kctx->gwt_enabled) { ++ /* GWT also tracks growable regions. */ ++ struct kbasep_gwt_list_element *pos; + -+/** -+ * kutf_context_get() - increment refcount on a context -+ * @context: the kutf context -+ * -+ * This must be used when the lifetime of the context might exceed that of the -+ * thread creating @context -+ */ -+static void kutf_context_get(struct kutf_context *context); ++ pos = kmalloc(sizeof(*pos), GFP_KERNEL); ++ if (pos) { ++ pos->region = region; ++ pos->page_addr = (region->start_pfn + ++ pfn_offset) << ++ PAGE_SHIFT; ++ pos->num_pages = new_pages; ++ list_add(&pos->link, ++ &kctx->gwt_current_list); ++ } else { ++ dev_warn(kbdev->dev, "kmalloc failure"); ++ } ++ } ++#endif + -+/** -+ * kutf_context_put() - decrement refcount on a context, destroying it when it -+ * reached zero -+ * @context: the kutf context -+ * -+ * This must be used only after a corresponding kutf_context_get() call on -+ * @context, and the caller no longer needs access to @context. -+ */ -+static void kutf_context_put(struct kutf_context *context); ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (pages_trimmed) { ++ kbase_jit_done_phys_increase(kctx, pages_trimmed); ++ pages_trimmed = 0; ++ } ++#endif ++ kbase_gpu_vm_unlock(kctx); ++ } else { ++ int ret = -ENOMEM; + -+/** -+ * kutf_set_result() - Set the test result against the specified test context -+ * @context: Test context -+ * @status: Result status -+ */ -+static void kutf_set_result(struct kutf_context *context, -+ enum kutf_result_status status); ++ kbase_gpu_vm_unlock(kctx); + -+/** -+ * kutf_set_expected_result() - Set the expected test result for the specified -+ * test context -+ * @context: Test context -+ * @expected_status: Expected result status -+ */ -+static void kutf_set_expected_result(struct kutf_context *context, -+ enum kutf_result_status expected_status); ++ /* If the memory pool was insufficient then grow it and retry. ++ * Otherwise fail the allocation. ++ */ ++ if (pages_to_grow > 0) { ++ if (kbdev->pagesize_2mb && grow_2mb_pool) { ++ /* Round page requirement up to nearest 2 MB */ ++ struct kbase_mem_pool *const lp_mem_pool = ++ &kctx->mem_pools.large[ ++ region->gpu_alloc->group_id]; + -+/** -+ * kutf_result_to_string() - Converts a KUTF result into a string -+ * @result_str: Output result string -+ * @result: Result status to convert -+ * -+ * Return: 1 if test result was successfully converted to string, 0 otherwise -+ */ -+static int kutf_result_to_string(const char **result_str, enum kutf_result_status result) -+{ -+ int i; -+ int ret = 0; ++ pages_to_grow = (pages_to_grow + ++ ((1 << lp_mem_pool->order) - 1)) ++ >> lp_mem_pool->order; + -+ for (i = 0; i < UTF_CONVERT_SIZE; i++) { -+ if (result == kutf_convert[i].result) { -+ *result_str = kutf_convert[i].result_name; -+ ret = 1; ++ ret = kbase_mem_pool_grow(lp_mem_pool, ++ pages_to_grow, kctx->task); ++ } else { ++ struct kbase_mem_pool *const mem_pool = ++ &kctx->mem_pools.small[ ++ region->gpu_alloc->group_id]; ++ ++ ret = kbase_mem_pool_grow(mem_pool, ++ pages_to_grow, kctx->task); ++ } ++ } ++ if (ret < 0) { ++ /* failed to extend, handle as a normal PF */ ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Page allocation failure", fault); ++ } else { ++ dev_dbg(kbdev->dev, "Try again after pool_grow"); ++ goto page_fault_retry; + } + } -+ return ret; -+} + -+/** -+ * kutf_debugfs_const_string_read() - Simple debugfs read callback which -+ * returns a constant string -+ * @file: Opened file to read from -+ * @buf: User buffer to write the data into -+ * @len: Amount of data to read -+ * @ppos: Offset into file to read from -+ * -+ * Return: On success, the number of bytes read and offset @ppos advanced by -+ * this number; on error, negative value -+ */ -+static ssize_t kutf_debugfs_const_string_read(struct file *file, -+ char __user *buf, size_t len, loff_t *ppos) -+{ -+ char *str = file->private_data; ++fault_done: ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (pages_trimmed) { ++ kbase_gpu_vm_lock(kctx); ++ kbase_jit_done_phys_increase(kctx, pages_trimmed); ++ kbase_gpu_vm_unlock(kctx); ++ } ++#if !MALI_USE_CSF ++ mutex_unlock(&kctx->jctx.lock); ++#endif ++#endif + -+ return simple_read_from_buffer(buf, len, ppos, str, strlen(str)); -+} ++ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) ++ kfree(prealloc_sas[i]); + -+static const struct file_operations kutf_debugfs_const_string_ops = { -+ .owner = THIS_MODULE, -+ .open = simple_open, -+ .read = kutf_debugfs_const_string_read, -+ .llseek = default_llseek, -+}; ++ /* ++ * By this point, the fault was handled in some way, ++ * so release the ctx refcount ++ */ ++ release_ctx(kbdev, kctx); + -+/** -+ * kutf_add_explicit_result() - Check if an explicit result needs to be added -+ * @context: KUTF test context -+ */ -+static void kutf_add_explicit_result(struct kutf_context *context) ++ atomic_dec(&kbdev->faults_pending); ++ dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK", (void *)data); ++} ++ ++static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, ++ struct kbase_mmu_table *mmut) +{ -+ switch (context->expected_status) { -+ case KUTF_RESULT_UNKNOWN: -+ break; ++ u64 *page; ++ struct page *p; ++ phys_addr_t pgd; + -+ case KUTF_RESULT_WARN: -+ if (context->status == KUTF_RESULT_WARN) -+ kutf_test_pass(context, -+ "Pass (expected warn occurred)"); -+ else if (context->status != KUTF_RESULT_SKIP) -+ kutf_test_fail(context, -+ "Fail (expected warn missing)"); -+ break; ++ p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]); ++ if (!p) ++ return KBASE_MMU_INVALID_PGD_ADDRESS; + -+ case KUTF_RESULT_FAIL: -+ if (context->status == KUTF_RESULT_FAIL) -+ kutf_test_pass(context, -+ "Pass (expected fail occurred)"); -+ else if (context->status != KUTF_RESULT_SKIP) { -+ /* Force the expected status so the fail gets logged */ -+ context->expected_status = KUTF_RESULT_PASS; -+ kutf_test_fail(context, -+ "Fail (expected fail missing)"); -+ } -+ break; ++ page = kmap(p); ++ if (page == NULL) ++ goto alloc_free; + -+ case KUTF_RESULT_FATAL: -+ if (context->status == KUTF_RESULT_FATAL) -+ kutf_test_pass(context, -+ "Pass (expected fatal occurred)"); -+ else if (context->status != KUTF_RESULT_SKIP) -+ kutf_test_fail(context, -+ "Fail (expected fatal missing)"); -+ break; ++ pgd = page_to_phys(p); + -+ case KUTF_RESULT_ABORT: -+ if (context->status == KUTF_RESULT_ABORT) -+ kutf_test_pass(context, -+ "Pass (expected abort occurred)"); -+ else if (context->status != KUTF_RESULT_SKIP) -+ kutf_test_fail(context, -+ "Fail (expected abort missing)"); -+ break; -+ default: -+ break; ++ /* If the MMU tables belong to a context then account the memory usage ++ * to that context, otherwise the MMU tables are device wide and are ++ * only accounted to the device. ++ */ ++ if (mmut->kctx) { ++ int new_page_count; ++ ++ new_page_count = atomic_add_return(1, ++ &mmut->kctx->used_pages); ++ KBASE_TLSTREAM_AUX_PAGESALLOC( ++ kbdev, ++ mmut->kctx->id, ++ (u64)new_page_count); ++ kbase_process_page_usage_inc(mmut->kctx, 1); + } ++ ++ atomic_add(1, &kbdev->memdev.used_pages); ++ ++ kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1); ++ ++ kbdev->mmu_mode->entries_invalidate(page, KBASE_MMU_PAGE_ENTRIES); ++ ++ /* As this page is newly created, therefore there is no content to ++ * clean or invalidate in the GPU caches. ++ */ ++ kbase_mmu_sync_pgd_cpu(kbdev, kbase_dma_addr(p), PAGE_SIZE); ++ ++ kunmap(p); ++ return pgd; ++ ++alloc_free: ++ kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, false); ++ ++ return KBASE_MMU_INVALID_PGD_ADDRESS; +} + -+static void kutf_run_test(struct work_struct *data) ++/** ++ * mmu_get_next_pgd() - Given PGD PFN for level N, return PGD PFN for level N+1 ++ * ++ * @kbdev: Device pointer. ++ * @mmut: GPU MMU page table. ++ * @pgd: Physical addresse of level N page directory. ++ * @vpfn: The virtual page frame number. ++ * @level: The level of MMU page table (N). ++ * ++ * Return: ++ * * 0 - OK ++ * * -EFAULT - level N+1 PGD does not exist ++ * * -EINVAL - kmap() failed for level N PGD PFN ++ */ ++static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, ++ phys_addr_t *pgd, u64 vpfn, int level) +{ -+ struct kutf_context *test_context = container_of(data, -+ struct kutf_context, work); -+ struct kutf_suite *suite = test_context->suite; -+ struct kutf_test_function *test_func; ++ u64 *page; ++ phys_addr_t target_pgd; ++ struct page *p; + -+ test_func = test_context->test_fix->test_func; ++ lockdep_assert_held(&mmut->mmu_lock); + + /* -+ * Call the create fixture function if required before the -+ * fixture is run ++ * Architecture spec defines level-0 as being the top-most. ++ * This is a bit unfortunate here, but we keep the same convention. + */ -+ if (suite->create_fixture) -+ test_context->fixture = suite->create_fixture(test_context); -+ -+ /* Only run the test if the fixture was created (if required) */ -+ if ((suite->create_fixture && test_context->fixture) || -+ (!suite->create_fixture)) { -+ /* Run this fixture */ -+ test_func->execute(test_context); ++ vpfn >>= (3 - level) * 9; ++ vpfn &= 0x1FF; + -+ if (suite->remove_fixture) -+ suite->remove_fixture(test_context); ++ p = pfn_to_page(PFN_DOWN(*pgd)); ++ page = kmap(p); ++ if (page == NULL) { ++ dev_err(kbdev->dev, "%s: kmap failure", __func__); ++ return -EINVAL; ++ } + -+ kutf_add_explicit_result(test_context); ++ if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) { ++ dev_dbg(kbdev->dev, "%s: invalid PTE at level %d vpfn 0x%llx", __func__, level, ++ vpfn); ++ kunmap(p); ++ return -EFAULT; ++ } else { ++ target_pgd = kbdev->mmu_mode->pte_to_phy_addr( ++ kbdev->mgm_dev->ops.mgm_pte_to_original_pte( ++ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[vpfn])); + } + -+ kutf_add_result(test_context, KUTF_RESULT_TEST_FINISHED, NULL); ++ kunmap(p); ++ *pgd = target_pgd; + -+ kutf_context_put(test_context); ++ return 0; +} + +/** -+ * kutf_debugfs_run_open() - Debugfs open callback for the "run" entry. ++ * mmu_get_lowest_valid_pgd() - Find a valid PGD at or closest to in_level + * -+ * @inode: inode of the opened file -+ * @file: Opened file to read from ++ * @kbdev: Device pointer. ++ * @mmut: GPU MMU page table. ++ * @vpfn: The virtual page frame number. ++ * @in_level: The level of MMU page table (N). ++ * @out_level: Set to the level of the lowest valid PGD found on success. ++ * Invalid on error. ++ * @out_pgd: Set to the lowest valid PGD found on success. ++ * Invalid on error. + * -+ * This function creates a KUTF context and queues it onto a workqueue to be -+ * run asynchronously. The resulting file descriptor can be used to communicate -+ * userdata to the test and to read back the results of the test execution. ++ * Does a page table walk starting from top level (L0) to in_level to find a valid PGD at or ++ * closest to in_level + * -+ * Return: 0 on success ++ * Terminology: ++ * Level-0 = Top-level = highest ++ * Level-3 = Bottom-level = lowest ++ * ++ * Return: ++ * * 0 - OK ++ * * -EINVAL - kmap() failed during page table walk. + */ -+static int kutf_debugfs_run_open(struct inode *inode, struct file *file) ++static int mmu_get_lowest_valid_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, ++ u64 vpfn, int in_level, int *out_level, phys_addr_t *out_pgd) +{ -+ struct kutf_test_fixture *test_fix = inode->i_private; -+ struct kutf_context *test_context; ++ phys_addr_t pgd; ++ int l; + int err = 0; + -+ test_context = kutf_create_context(test_fix); -+ if (!test_context) { -+ err = -ENOMEM; -+ goto finish; -+ } ++ lockdep_assert_held(&mmut->mmu_lock); ++ pgd = mmut->pgd; + -+ file->private_data = test_context; ++ for (l = MIDGARD_MMU_TOPLEVEL; l < in_level; l++) { ++ err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l); + -+ /* This reference is release by the kutf_run_test */ -+ kutf_context_get(test_context); ++ /* Handle failure condition */ ++ if (err) { ++ dev_dbg(kbdev->dev, ++ "%s: mmu_get_next_pgd() failed to find a valid pgd at level %d", ++ __func__, l + 1); ++ break; ++ } ++ } + -+ queue_work(kutf_workq, &test_context->work); ++ *out_pgd = pgd; ++ *out_level = l; ++ ++ /* -EFAULT indicates that pgd param was valid but the next pgd entry at vpfn was invalid. ++ * This implies that we have found the lowest valid pgd. Reset the error code. ++ */ ++ if (err == -EFAULT) ++ err = 0; + -+finish: + return err; +} + -+#define USERDATA_WARNING_MESSAGE "WARNING: This test requires userdata\n" -+ -+/** -+ * kutf_debugfs_run_read() - Debugfs read callback for the "run" entry. -+ * @file: Opened file to read from -+ * @buf: User buffer to write the data into -+ * @len: Amount of data to read -+ * @ppos: Offset into file to read from -+ * -+ * This function emits the results of the test, blocking until they are -+ * available. -+ * -+ * If the test involves user data then this will also return user data records -+ * to user space. If the test is waiting for user data then this function will -+ * output a message (to make the likes of 'cat' display it), followed by -+ * returning 0 to mark the end of file. -+ * -+ * Results will be emitted one at a time, once all the results have been read -+ * 0 will be returned to indicate there is no more data. -+ * -+ * Return: Number of bytes read. ++/* ++ * On success, sets out_pgd to the PGD for the specified level of translation ++ * Returns -EFAULT if a valid PGD is not found + */ -+static ssize_t kutf_debugfs_run_read(struct file *file, char __user *buf, -+ size_t len, loff_t *ppos) ++static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, ++ int level, phys_addr_t *out_pgd) +{ -+ struct kutf_context *test_context = file->private_data; -+ struct kutf_result *res; -+ unsigned long bytes_not_copied; -+ ssize_t bytes_copied = 0; -+ const char *kutf_str_ptr = NULL; -+ size_t kutf_str_len = 0; -+ size_t message_len = 0; -+ char separator = ':'; -+ char terminator = '\n'; -+ -+ res = kutf_remove_result(test_context->result_set); ++ phys_addr_t pgd; ++ int l; + -+ if (IS_ERR(res)) -+ return PTR_ERR(res); ++ lockdep_assert_held(&mmut->mmu_lock); ++ pgd = mmut->pgd; + -+ /* -+ * Handle 'fake' results - these results are converted to another -+ * form before being returned from the kernel -+ */ -+ switch (res->status) { -+ case KUTF_RESULT_TEST_FINISHED: -+ return 0; -+ case KUTF_RESULT_USERDATA_WAIT: -+ if (test_context->userdata.flags & -+ KUTF_USERDATA_WARNING_OUTPUT) { -+ /* -+ * Warning message already output, -+ * signal end-of-file -+ */ -+ return 0; ++ for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) { ++ int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l); ++ /* Handle failure condition */ ++ if (err) { ++ dev_err(kbdev->dev, ++ "%s: mmu_get_next_pgd() failed to find a valid pgd at level %d", ++ __func__, l + 1); ++ return err; + } ++ } + -+ message_len = sizeof(USERDATA_WARNING_MESSAGE)-1; -+ if (message_len > len) -+ message_len = len; ++ *out_pgd = pgd; + -+ bytes_not_copied = copy_to_user(buf, -+ USERDATA_WARNING_MESSAGE, -+ message_len); -+ if (bytes_not_copied != 0) -+ return -EFAULT; -+ test_context->userdata.flags |= KUTF_USERDATA_WARNING_OUTPUT; -+ return message_len; -+ case KUTF_RESULT_USERDATA: -+ message_len = strlen(res->message); -+ if (message_len > len-1) { -+ message_len = len-1; -+ pr_warn("User data truncated, read not long enough\n"); -+ } -+ bytes_not_copied = copy_to_user(buf, res->message, -+ message_len); -+ if (bytes_not_copied != 0) { -+ pr_warn("Failed to copy data to user space buffer\n"); -+ return -EFAULT; ++ return 0; ++} ++ ++static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, ++ struct kbase_mmu_table *mmut, u64 from_vpfn, ++ u64 to_vpfn, u64 *dirty_pgds, ++ struct tagged_addr *phys, bool ignore_page_migration) ++{ ++ u64 vpfn = from_vpfn; ++ struct kbase_mmu_mode const *mmu_mode; ++ ++ /* 64-bit address range is the max */ ++ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); ++ KBASE_DEBUG_ASSERT(from_vpfn <= to_vpfn); ++ ++ lockdep_assert_held(&mmut->mmu_lock); ++ ++ mmu_mode = kbdev->mmu_mode; ++ kbase_mmu_reset_free_pgds_list(mmut); ++ ++ while (vpfn < to_vpfn) { ++ unsigned int idx = vpfn & 0x1FF; ++ unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx; ++ unsigned int pcount = 0; ++ unsigned int left = to_vpfn - vpfn; ++ int level; ++ u64 *page; ++ phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; ++ phys_addr_t pgd = mmut->pgd; ++ struct page *p = phys_to_page(pgd); ++ ++ register unsigned int num_of_valid_entries; ++ ++ if (count > left) ++ count = left; ++ ++ /* need to check if this is a 2MB page or a 4kB */ ++ for (level = MIDGARD_MMU_TOPLEVEL; ++ level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { ++ idx = (vpfn >> ((3 - level) * 9)) & 0x1FF; ++ pgds[level] = pgd; ++ page = kmap(p); ++ if (mmu_mode->ate_is_valid(page[idx], level)) ++ break; /* keep the mapping */ ++ kunmap(p); ++ pgd = mmu_mode->pte_to_phy_addr(kbdev->mgm_dev->ops.mgm_pte_to_original_pte( ++ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[idx])); ++ p = phys_to_page(pgd); + } -+ /* Finally the terminator */ -+ bytes_not_copied = copy_to_user(&buf[message_len], -+ &terminator, 1); -+ if (bytes_not_copied != 0) { -+ pr_warn("Failed to copy data to user space buffer\n"); -+ return -EFAULT; ++ ++ switch (level) { ++ case MIDGARD_MMU_LEVEL(2): ++ /* remap to single entry to update */ ++ pcount = 1; ++ break; ++ case MIDGARD_MMU_BOTTOMLEVEL: ++ /* page count is the same as the logical count */ ++ pcount = count; ++ break; ++ default: ++ dev_warn(kbdev->dev, "%sNo support for ATEs at level %d", __func__, level); ++ goto next; + } -+ return message_len+1; -+ default: -+ /* Fall through - this is a test result */ -+ break; -+ } + -+ /* Note: This code assumes a result is read completely */ -+ kutf_result_to_string(&kutf_str_ptr, res->status); -+ if (kutf_str_ptr) -+ kutf_str_len = strlen(kutf_str_ptr); ++ if (dirty_pgds && pcount > 0) ++ *dirty_pgds |= 1ULL << level; + -+ if (res->message) -+ message_len = strlen(res->message); ++ num_of_valid_entries = mmu_mode->get_num_valid_entries(page); ++ if (WARN_ON_ONCE(num_of_valid_entries < pcount)) ++ num_of_valid_entries = 0; ++ else ++ num_of_valid_entries -= pcount; + -+ if ((kutf_str_len + 1 + message_len + 1) > len) { -+ pr_err("Not enough space in user buffer for a single result"); -+ return 0; -+ } ++ /* Invalidate the entries we added */ ++ mmu_mode->entries_invalidate(&page[idx], pcount); + -+ /* First copy the result string */ -+ if (kutf_str_ptr) { -+ bytes_not_copied = copy_to_user(&buf[0], kutf_str_ptr, -+ kutf_str_len); -+ bytes_copied += kutf_str_len - bytes_not_copied; -+ if (bytes_not_copied) -+ goto exit; -+ } ++ if (!num_of_valid_entries) { ++ kunmap(p); + -+ /* Then the separator */ -+ bytes_not_copied = copy_to_user(&buf[bytes_copied], -+ &separator, 1); -+ bytes_copied += 1 - bytes_not_copied; -+ if (bytes_not_copied) -+ goto exit; ++ kbase_mmu_add_to_free_pgds_list(mmut, p); + -+ /* Finally Next copy the result string */ -+ if (res->message) { -+ bytes_not_copied = copy_to_user(&buf[bytes_copied], -+ res->message, message_len); -+ bytes_copied += message_len - bytes_not_copied; -+ if (bytes_not_copied) -+ goto exit; ++ kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, ++ KBASE_MMU_OP_NONE, dirty_pgds); ++ vpfn += count; ++ continue; ++ } ++ ++ mmu_mode->set_num_valid_entries(page, num_of_valid_entries); ++ ++ /* MMU cache flush strategy is NONE because GPU cache maintenance is ++ * going to be done by the caller ++ */ ++ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (idx * sizeof(u64)), ++ kbase_dma_addr(p) + sizeof(u64) * idx, sizeof(u64) * pcount, ++ KBASE_MMU_OP_NONE); ++ kunmap(p); ++next: ++ vpfn += count; + } + -+ /* Finally the terminator */ -+ bytes_not_copied = copy_to_user(&buf[bytes_copied], -+ &terminator, 1); -+ bytes_copied += 1 - bytes_not_copied; ++ /* If page migration is enabled: the only way to recover from failure ++ * is to mark all pages as not movable. It is not predictable what's ++ * going to happen to these pages at this stage. They might return ++ * movable once they are returned to a memory pool. ++ */ ++ if (kbase_page_migration_enabled && !ignore_page_migration && phys) { ++ const u64 num_pages = to_vpfn - from_vpfn + 1; ++ u64 i; + -+exit: -+ return bytes_copied; ++ for (i = 0; i < num_pages; i++) { ++ struct page *phys_page = as_page(phys[i]); ++ struct kbase_page_metadata *page_md = kbase_page_private(phys_page); ++ ++ if (page_md) { ++ spin_lock(&page_md->migrate_lock); ++ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); ++ spin_unlock(&page_md->migrate_lock); ++ } ++ } ++ } +} + -+/** -+ * kutf_debugfs_run_write() - Debugfs write callback for the "run" entry. -+ * @file: Opened file to write to -+ * @buf: User buffer to read the data from -+ * @len: Amount of data to write -+ * @ppos: Offset into file to write to -+ * -+ * This function allows user and kernel to exchange extra data necessary for -+ * the test fixture. -+ * -+ * The data is added to the first struct kutf_context running the fixture -+ * -+ * Return: Number of bytes written -+ */ -+static ssize_t kutf_debugfs_run_write(struct file *file, -+ const char __user *buf, size_t len, loff_t *ppos) ++static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, ++ struct kbase_mmu_table *mmut, const u64 vpfn, ++ size_t nr, u64 dirty_pgds, ++ enum kbase_caller_mmu_sync_info mmu_sync_info, ++ bool insert_pages_failed) +{ -+ int ret = 0; -+ struct kutf_context *test_context = file->private_data; ++ struct kbase_mmu_hw_op_param op_param; ++ int as_nr = 0; + -+ if (len > KUTF_MAX_LINE_LENGTH) -+ return -EINVAL; ++ op_param.vpfn = vpfn; ++ op_param.nr = nr; ++ op_param.op = KBASE_MMU_OP_FLUSH_PT; ++ op_param.mmu_sync_info = mmu_sync_info; ++ op_param.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF; ++ op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); + -+ ret = kutf_helper_input_enqueue(test_context, buf, len); -+ if (ret < 0) -+ return ret; ++#if MALI_USE_CSF ++ as_nr = mmut->kctx ? mmut->kctx->as_nr : MCU_AS_NR; ++#else ++ WARN_ON(!mmut->kctx); ++#endif + -+ return len; ++ /* MMU cache flush strategy depends on whether GPU control commands for ++ * flushing physical address ranges are supported. The new physical pages ++ * are not present in GPU caches therefore they don't need any cache ++ * maintenance, but PGDs in the page table may or may not be created anew. ++ * ++ * Operations that affect the whole GPU cache shall only be done if it's ++ * impossible to update physical ranges. ++ * ++ * On GPUs where flushing by physical address range is supported, ++ * full cache flush is done when an error occurs during ++ * insert_pages() to keep the error handling simpler. ++ */ ++ if (mmu_flush_cache_on_gpu_ctrl(kbdev) && !insert_pages_failed) ++ mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param); ++ else ++ mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param); +} + +/** -+ * kutf_debugfs_run_release() - Debugfs release callback for the "run" entry. -+ * @inode: File entry representation -+ * @file: A specific opening of the file ++ * update_parent_pgds() - Updates the page table from bottom level towards ++ * the top level to insert a new ATE + * -+ * Release any resources that were created during the opening of the file ++ * @kbdev: Device pointer. ++ * @mmut: GPU MMU page table. ++ * @cur_level: The level of MMU page table where the ATE needs to be added. ++ * The bottom PGD level. ++ * @insert_level: The level of MMU page table where the chain of newly allocated ++ * PGDs needs to be linked-in/inserted. ++ * The top-most PDG level to be updated. ++ * @insert_vpfn: The virtual page frame number for the ATE. ++ * @pgds_to_insert: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) that contains ++ * the physical addresses of newly allocated PGDs from index ++ * insert_level+1 to cur_level, and an existing PGD at index ++ * insert_level. + * -+ * Note that resources may not be released immediately, that might only happen -+ * later when other users of the kutf_context release their refcount. ++ * The newly allocated PGDs are linked from the bottom level up and inserted into the PGD ++ * at insert_level which already exists in the MMU Page Tables.Migration status is also ++ * updated for all the newly allocated PGD pages. + * -+ * Return: 0 on success ++ * Return: ++ * * 0 - OK ++ * * -EFAULT - level N+1 PGD does not exist ++ * * -EINVAL - kmap() failed for level N PGD PFN + */ -+static int kutf_debugfs_run_release(struct inode *inode, struct file *file) ++static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, ++ int cur_level, int insert_level, u64 insert_vpfn, ++ phys_addr_t *pgds_to_insert) +{ -+ struct kutf_context *test_context = file->private_data; ++ int pgd_index; ++ int err = 0; + -+ kutf_helper_input_enqueue_end_of_data(test_context); ++ /* Add a PTE for the new PGD page at pgd_index into the parent PGD at (pgd_index-1) ++ * Loop runs from the bottom-most to the top-most level so that all entries in the chain ++ * are valid when they are inserted into the MMU Page table via the insert_level PGD. ++ */ ++ for (pgd_index = cur_level; pgd_index > insert_level; pgd_index--) { ++ int parent_index = pgd_index - 1; ++ phys_addr_t parent_pgd = pgds_to_insert[parent_index]; ++ unsigned int current_valid_entries; ++ u64 pte; ++ phys_addr_t target_pgd = pgds_to_insert[pgd_index]; ++ u64 parent_vpfn = (insert_vpfn >> ((3 - parent_index) * 9)) & 0x1FF; ++ struct page *parent_page = pfn_to_page(PFN_DOWN(parent_pgd)); ++ u64 *parent_page_va; + -+ kutf_context_put(test_context); -+ return 0; -+} ++ if (WARN_ON_ONCE(target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS)) { ++ err = -EFAULT; ++ goto failure_recovery; ++ } + -+static const struct file_operations kutf_debugfs_run_ops = { -+ .owner = THIS_MODULE, -+ .open = kutf_debugfs_run_open, -+ .read = kutf_debugfs_run_read, -+ .write = kutf_debugfs_run_write, -+ .release = kutf_debugfs_run_release, -+ .llseek = default_llseek, -+}; ++ parent_page_va = kmap(parent_page); ++ if (unlikely(parent_page_va == NULL)) { ++ dev_err(kbdev->dev, "%s: kmap failure", __func__); ++ err = -EINVAL; ++ goto failure_recovery; ++ } + -+/** -+ * create_fixture_variant() - Creates a fixture variant for the specified -+ * test function and index and the debugfs entries -+ * that represent it. -+ * @test_func: Test function -+ * @fixture_index: Fixture index -+ * -+ * Return: 0 on success, negative value corresponding to error code in failure -+ */ -+static int create_fixture_variant(struct kutf_test_function *test_func, -+ unsigned int fixture_index) -+{ -+ struct kutf_test_fixture *test_fix; -+ char name[11]; /* Enough to print the MAX_UINT32 + the null terminator */ -+ struct dentry *tmp; -+ int err; ++ current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(parent_page_va); + -+ test_fix = kmalloc(sizeof(*test_fix), GFP_KERNEL); -+ if (!test_fix) { -+ pr_err("Failed to create debugfs directory when adding fixture\n"); -+ err = -ENOMEM; -+ goto fail_alloc; -+ } ++ kbdev->mmu_mode->entry_set_pte(&pte, target_pgd); ++ parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte( ++ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, parent_index, pte); ++ kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1); ++ kunmap(parent_page); + -+ test_fix->test_func = test_func; -+ test_fix->fixture_index = fixture_index; ++ if (parent_index != insert_level) { ++ /* Newly allocated PGDs */ ++ kbase_mmu_sync_pgd_cpu( ++ kbdev, kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)), ++ sizeof(u64)); ++ } else { ++ /* A new valid entry is added to an existing PGD. Perform the ++ * invalidate operation for GPU cache as it could be having a ++ * cacheline that contains the entry (in an invalid form). ++ */ ++ kbase_mmu_sync_pgd( ++ kbdev, mmut->kctx, parent_pgd + (parent_vpfn * sizeof(u64)), ++ kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)), ++ sizeof(u64), KBASE_MMU_OP_FLUSH_PT); ++ } + -+ snprintf(name, sizeof(name), "%d", fixture_index); -+ test_fix->dir = debugfs_create_dir(name, test_func->dir); -+ if (IS_ERR_OR_NULL(test_func->dir)) { -+ pr_err("Failed to create debugfs directory when adding fixture\n"); -+ /* Might not be the right error, we don't get it passed back to us */ -+ err = -EEXIST; -+ goto fail_dir; -+ } ++ /* Update the new target_pgd page to its stable state */ ++ if (kbase_page_migration_enabled) { ++ struct kbase_page_metadata *page_md = ++ kbase_page_private(phys_to_page(target_pgd)); + -+ tmp = debugfs_create_file("type", 0004, test_fix->dir, "fixture\n", -+ &kutf_debugfs_const_string_ops); -+ if (IS_ERR_OR_NULL(tmp)) { -+ pr_err("Failed to create debugfs file \"type\" when adding fixture\n"); -+ /* Might not be the right error, we don't get it passed back to us */ -+ err = -EEXIST; -+ goto fail_file; -+ } ++ spin_lock(&page_md->migrate_lock); + -+ tmp = debugfs_create_file_unsafe( -+ "run", 0600, test_fix->dir, -+ test_fix, -+ &kutf_debugfs_run_ops); -+ if (IS_ERR_OR_NULL(tmp)) { -+ pr_err("Failed to create debugfs file \"run\" when adding fixture\n"); -+ /* Might not be the right error, we don't get it passed back to us */ -+ err = -EEXIST; -+ goto fail_file; ++ WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS || ++ IS_PAGE_ISOLATED(page_md->status)); ++ ++ if (mmut->kctx) { ++ page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED); ++ page_md->data.pt_mapped.mmut = mmut; ++ page_md->data.pt_mapped.pgd_vpfn_level = ++ PGD_VPFN_LEVEL_SET(insert_vpfn, parent_index); ++ } else { ++ page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE); ++ } ++ ++ spin_unlock(&page_md->migrate_lock); ++ } + } + -+ list_add(&test_fix->node, &test_func->variant_list); + return 0; + -+fail_file: -+ debugfs_remove_recursive(test_fix->dir); -+fail_dir: -+ kfree(test_fix); -+fail_alloc: ++failure_recovery: ++ /* Cleanup PTEs from PGDs. The Parent PGD in the loop above is just "PGD" here */ ++ for (; pgd_index < cur_level; pgd_index++) { ++ phys_addr_t pgd = pgds_to_insert[pgd_index]; ++ struct page *pgd_page = pfn_to_page(PFN_DOWN(pgd)); ++ u64 *pgd_page_va = kmap(pgd_page); ++ u64 vpfn = (insert_vpfn >> ((3 - pgd_index) * 9)) & 0x1FF; ++ ++ kbdev->mmu_mode->entries_invalidate(&pgd_page_va[vpfn], 1); ++ kunmap(pgd_page); ++ } ++ + return err; +} + +/** -+ * kutf_remove_test_variant() - Destroy a previously created fixture variant. -+ * @test_fix: Test fixture ++ * mmu_insert_alloc_pgds() - allocate memory for PGDs from level_low to ++ * level_high (inclusive) ++ * ++ * @kbdev: Device pointer. ++ * @mmut: GPU MMU page table. ++ * @level_low: The lower bound for the levels for which the PGD allocs are required ++ * @level_high: The higher bound for the levels for which the PGD allocs are required ++ * @new_pgds: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) to write the ++ * newly allocated PGD addresses to. ++ * ++ * Numerically, level_low < level_high, not to be confused with top level and ++ * bottom level concepts for MMU PGDs. They are only used as low and high bounds ++ * in an incrementing for-loop. ++ * ++ * Return: ++ * * 0 - OK ++ * * -ENOMEM - allocation failed for a PGD. + */ -+static void kutf_remove_test_variant(struct kutf_test_fixture *test_fix) ++static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, ++ phys_addr_t *new_pgds, int level_low, int level_high) +{ -+ debugfs_remove_recursive(test_fix->dir); -+ kfree(test_fix); -+} ++ int err = 0; ++ int i; ++ ++ lockdep_assert_held(&mmut->mmu_lock); ++ ++ for (i = level_low; i <= level_high; i++) { ++ do { ++ new_pgds[i] = kbase_mmu_alloc_pgd(kbdev, mmut); ++ if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS) ++ break; ++ ++ mutex_unlock(&mmut->mmu_lock); ++ err = kbase_mem_pool_grow(&kbdev->mem_pools.small[mmut->group_id], ++ level_high, NULL); ++ mutex_lock(&mmut->mmu_lock); ++ if (err) { ++ dev_err(kbdev->dev, "%s: kbase_mem_pool_grow() returned error %d", ++ __func__, err); ++ ++ /* Free all PGDs allocated in previous successful iterations ++ * from (i-1) to level_low ++ */ ++ for (i = (i - 1); i >= level_low; i--) { ++ if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS) ++ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[i]); ++ } ++ ++ return err; ++ } ++ } while (1); ++ } + -+#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE -+/* Adapting to the upstream debugfs_create_x32() change */ -+static int ktufp_u32_get(void *data, u64 *val) -+{ -+ *val = *(u32 *)data; + return 0; +} + -+DEFINE_DEBUGFS_ATTRIBUTE(kutfp_fops_x32_ro, ktufp_u32_get, NULL, "0x%08llx\n"); -+#endif -+ -+void kutf_add_test_with_filters_and_data( -+ struct kutf_suite *suite, -+ unsigned int id, -+ const char *name, -+ void (*execute)(struct kutf_context *context), -+ unsigned int filters, -+ union kutf_callback_data test_data) ++int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn, ++ struct tagged_addr phys, size_t nr, unsigned long flags, ++ int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, ++ bool ignore_page_migration) +{ -+ struct kutf_test_function *test_func; -+ struct dentry *tmp; ++ phys_addr_t pgd; ++ u64 *pgd_page; ++ u64 insert_vpfn = start_vpfn; ++ size_t remain = nr; ++ int err; ++ struct kbase_device *kbdev; ++ u64 dirty_pgds = 0; + unsigned int i; ++ phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; ++ enum kbase_mmu_op_type flush_op; ++ struct kbase_mmu_table *mmut = &kctx->mmu; ++ int l, cur_level, insert_level; + -+ test_func = kmalloc(sizeof(*test_func), GFP_KERNEL); -+ if (!test_func) { -+ pr_err("Failed to allocate memory when adding test %s\n", name); -+ goto fail_alloc; -+ } -+ -+ INIT_LIST_HEAD(&test_func->variant_list); ++ if (WARN_ON(kctx == NULL)) ++ return -EINVAL; + -+ test_func->dir = debugfs_create_dir(name, suite->dir); -+ if (IS_ERR_OR_NULL(test_func->dir)) { -+ pr_err("Failed to create debugfs directory when adding test %s\n", name); -+ goto fail_dir; -+ } ++ /* 64-bit address range is the max */ ++ KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE)); + -+ tmp = debugfs_create_file("type", 0004, test_func->dir, "test\n", -+ &kutf_debugfs_const_string_ops); -+ if (IS_ERR_OR_NULL(tmp)) { -+ pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name); -+ goto fail_file; -+ } ++ kbdev = kctx->kbdev; + -+ test_func->filters = filters; -+#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE -+ tmp = debugfs_create_file_unsafe("filters", 0004, test_func->dir, -+ &test_func->filters, &kutfp_fops_x32_ro); -+#else -+ tmp = debugfs_create_x32("filters", 0004, test_func->dir, -+ &test_func->filters); -+#endif -+ if (IS_ERR_OR_NULL(tmp)) { -+ pr_err("Failed to create debugfs file \"filters\" when adding test %s\n", name); -+ goto fail_file; -+ } ++ /* Early out if there is nothing to do */ ++ if (nr == 0) ++ return 0; + -+ test_func->test_id = id; -+#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE -+ debugfs_create_u32("test_id", 0004, test_func->dir, -+ &test_func->test_id); -+#else -+ tmp = debugfs_create_u32("test_id", 0004, test_func->dir, -+ &test_func->test_id); -+ if (IS_ERR_OR_NULL(tmp)) { -+ pr_err("Failed to create debugfs file \"test_id\" when adding test %s\n", name); -+ goto fail_file; -+ } -+#endif ++ /* If page migration is enabled, pages involved in multiple GPU mappings ++ * are always treated as not movable. ++ */ ++ if (kbase_page_migration_enabled && !ignore_page_migration) { ++ struct page *phys_page = as_page(phys); ++ struct kbase_page_metadata *page_md = kbase_page_private(phys_page); + -+ for (i = 0; i < suite->fixture_variants; i++) { -+ if (create_fixture_variant(test_func, i)) { -+ pr_err("Failed to create fixture %d when adding test %s\n", i, name); -+ goto fail_file; ++ if (page_md) { ++ spin_lock(&page_md->migrate_lock); ++ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); ++ spin_unlock(&page_md->migrate_lock); + } + } + -+ test_func->suite = suite; -+ test_func->execute = execute; -+ test_func->test_data = test_data; ++ mutex_lock(&mmut->mmu_lock); + -+ list_add(&test_func->node, &suite->test_list); -+ return; ++ while (remain) { ++ unsigned int vindex = insert_vpfn & 0x1FF; ++ unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex; ++ struct page *p; ++ register unsigned int num_of_valid_entries; ++ bool newly_created_pgd = false; + -+fail_file: -+ debugfs_remove_recursive(test_func->dir); -+fail_dir: -+ kfree(test_func); -+fail_alloc: -+ return; -+} -+EXPORT_SYMBOL(kutf_add_test_with_filters_and_data); -+ -+void kutf_add_test_with_filters( -+ struct kutf_suite *suite, -+ unsigned int id, -+ const char *name, -+ void (*execute)(struct kutf_context *context), -+ unsigned int filters) -+{ -+ union kutf_callback_data data; -+ -+ data.ptr_value = NULL; -+ -+ kutf_add_test_with_filters_and_data(suite, -+ id, -+ name, -+ execute, -+ suite->suite_default_flags, -+ data); -+} -+EXPORT_SYMBOL(kutf_add_test_with_filters); -+ -+void kutf_add_test(struct kutf_suite *suite, -+ unsigned int id, -+ const char *name, -+ void (*execute)(struct kutf_context *context)) -+{ -+ union kutf_callback_data data; ++ if (count > remain) ++ count = remain; + -+ data.ptr_value = NULL; ++ cur_level = MIDGARD_MMU_BOTTOMLEVEL; ++ insert_level = cur_level; + -+ kutf_add_test_with_filters_and_data(suite, -+ id, -+ name, -+ execute, -+ suite->suite_default_flags, -+ data); -+} -+EXPORT_SYMBOL(kutf_add_test); ++ /* ++ * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly ++ * suboptimal. We don't have to re-parse the whole tree ++ * each time (just cache the l0-l2 sequence). ++ * On the other hand, it's only a gain when we map more than ++ * 256 pages at once (on average). Do we really care? ++ */ ++ /* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */ ++ err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level, ++ &pgd); + -+/** -+ * kutf_remove_test() - Remove a previously added test function. -+ * @test_func: Test function -+ */ -+static void kutf_remove_test(struct kutf_test_function *test_func) -+{ -+ struct list_head *pos; -+ struct list_head *tmp; ++ if (err) { ++ dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d", ++ __func__, err); ++ goto fail_unlock; ++ } + -+ list_for_each_safe(pos, tmp, &test_func->variant_list) { -+ struct kutf_test_fixture *test_fix; ++ /* No valid pgd at cur_level */ ++ if (insert_level != cur_level) { ++ /* Allocate new pgds for all missing levels from the required level ++ * down to the lowest valid pgd at insert_level ++ */ ++ err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1), ++ cur_level); ++ if (err) ++ goto fail_unlock; + -+ test_fix = list_entry(pos, struct kutf_test_fixture, node); -+ kutf_remove_test_variant(test_fix); -+ } ++ newly_created_pgd = true; + -+ list_del(&test_func->node); -+ debugfs_remove_recursive(test_func->dir); -+ kfree(test_func); -+} ++ new_pgds[insert_level] = pgd; + -+struct kutf_suite *kutf_create_suite_with_filters_and_data( -+ struct kutf_application *app, -+ const char *name, -+ unsigned int fixture_count, -+ void *(*create_fixture)(struct kutf_context *context), -+ void (*remove_fixture)(struct kutf_context *context), -+ unsigned int filters, -+ union kutf_callback_data suite_data) -+{ -+ struct kutf_suite *suite; -+ struct dentry *tmp; ++ /* If we didn't find an existing valid pgd at cur_level, ++ * we've now allocated one. The ATE in the next step should ++ * be inserted in this newly allocated pgd. ++ */ ++ pgd = new_pgds[cur_level]; ++ } + -+ suite = kmalloc(sizeof(*suite), GFP_KERNEL); -+ if (!suite) { -+ pr_err("Failed to allocate memory when creating suite %s\n", name); -+ goto fail_kmalloc; -+ } ++ p = pfn_to_page(PFN_DOWN(pgd)); ++ pgd_page = kmap(p); ++ if (!pgd_page) { ++ dev_err(kbdev->dev, "%s: kmap failure", __func__); ++ err = -ENOMEM; + -+ suite->dir = debugfs_create_dir(name, app->dir); -+ if (IS_ERR_OR_NULL(suite->dir)) { -+ pr_err("Failed to create debugfs directory when adding test %s\n", name); -+ goto fail_debugfs; -+ } ++ goto fail_unlock_free_pgds; ++ } + -+ tmp = debugfs_create_file("type", 0004, suite->dir, "suite\n", -+ &kutf_debugfs_const_string_ops); -+ if (IS_ERR_OR_NULL(tmp)) { -+ pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name); -+ goto fail_file; -+ } ++ num_of_valid_entries = ++ kbdev->mmu_mode->get_num_valid_entries(pgd_page); + -+ INIT_LIST_HEAD(&suite->test_list); -+ suite->app = app; -+ suite->name = name; -+ suite->fixture_variants = fixture_count; -+ suite->create_fixture = create_fixture; -+ suite->remove_fixture = remove_fixture; -+ suite->suite_default_flags = filters; -+ suite->suite_data = suite_data; ++ for (i = 0; i < count; i++) { ++ unsigned int ofs = vindex + i; + -+ list_add(&suite->node, &app->suite_list); ++ /* Fail if the current page is a valid ATE entry */ ++ KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL)); + -+ return suite; ++ pgd_page[ofs] = kbase_mmu_create_ate(kbdev, ++ phys, flags, MIDGARD_MMU_BOTTOMLEVEL, group_id); ++ } + -+fail_file: -+ debugfs_remove_recursive(suite->dir); -+fail_debugfs: -+ kfree(suite); -+fail_kmalloc: -+ return NULL; -+} -+EXPORT_SYMBOL(kutf_create_suite_with_filters_and_data); ++ kbdev->mmu_mode->set_num_valid_entries( ++ pgd_page, num_of_valid_entries + count); + -+struct kutf_suite *kutf_create_suite_with_filters( -+ struct kutf_application *app, -+ const char *name, -+ unsigned int fixture_count, -+ void *(*create_fixture)(struct kutf_context *context), -+ void (*remove_fixture)(struct kutf_context *context), -+ unsigned int filters) -+{ -+ union kutf_callback_data data; ++ dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : MIDGARD_MMU_BOTTOMLEVEL); + -+ data.ptr_value = NULL; -+ return kutf_create_suite_with_filters_and_data(app, -+ name, -+ fixture_count, -+ create_fixture, -+ remove_fixture, -+ filters, -+ data); -+} -+EXPORT_SYMBOL(kutf_create_suite_with_filters); ++ /* MMU cache flush operation here will depend on whether bottom level ++ * PGD is newly created or not. ++ * ++ * If bottom level PGD is newly created then no GPU cache maintenance is ++ * required as the PGD will not exist in GPU cache. Otherwise GPU cache ++ * maintenance is required for existing PGD. ++ */ ++ flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT; + -+struct kutf_suite *kutf_create_suite( -+ struct kutf_application *app, -+ const char *name, -+ unsigned int fixture_count, -+ void *(*create_fixture)(struct kutf_context *context), -+ void (*remove_fixture)(struct kutf_context *context)) -+{ -+ union kutf_callback_data data; ++ kbase_mmu_sync_pgd(kbdev, kctx, pgd + (vindex * sizeof(u64)), ++ kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64), ++ flush_op); + -+ data.ptr_value = NULL; -+ return kutf_create_suite_with_filters_and_data(app, -+ name, -+ fixture_count, -+ create_fixture, -+ remove_fixture, -+ KUTF_F_TEST_GENERIC, -+ data); -+} -+EXPORT_SYMBOL(kutf_create_suite); ++ if (newly_created_pgd) { ++ err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn, ++ new_pgds); ++ if (err) { ++ dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)", ++ __func__, err); + -+/** -+ * kutf_destroy_suite() - Destroy a previously added test suite. -+ * @suite: Test suite -+ */ -+static void kutf_destroy_suite(struct kutf_suite *suite) -+{ -+ struct list_head *pos; -+ struct list_head *tmp; ++ kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count); + -+ list_for_each_safe(pos, tmp, &suite->test_list) { -+ struct kutf_test_function *test_func; ++ kunmap(p); ++ goto fail_unlock_free_pgds; ++ } ++ } + -+ test_func = list_entry(pos, struct kutf_test_function, node); -+ kutf_remove_test(test_func); ++ insert_vpfn += count; ++ remain -= count; ++ kunmap(p); + } + -+ list_del(&suite->node); -+ debugfs_remove_recursive(suite->dir); -+ kfree(suite); -+} ++ mutex_unlock(&mmut->mmu_lock); + -+struct kutf_application *kutf_create_application(const char *name) -+{ -+ struct kutf_application *app; -+ struct dentry *tmp; ++ mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info, ++ false); + -+ app = kmalloc(sizeof(*app), GFP_KERNEL); -+ if (!app) { -+ pr_err("Failed to create allocate memory when creating application %s\n", name); -+ goto fail_kmalloc; -+ } ++ return 0; + -+ app->dir = debugfs_create_dir(name, base_dir); -+ if (IS_ERR_OR_NULL(app->dir)) { -+ pr_err("Failed to create debugfs direcotry when creating application %s\n", name); -+ goto fail_debugfs; -+ } ++fail_unlock_free_pgds: ++ /* Free the pgds allocated by us from insert_level+1 to bottom level */ ++ for (l = cur_level; l > insert_level; l--) ++ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]); + -+ tmp = debugfs_create_file("type", 0004, app->dir, "application\n", -+ &kutf_debugfs_const_string_ops); -+ if (IS_ERR_OR_NULL(tmp)) { -+ pr_err("Failed to create debugfs file \"type\" when creating application %s\n", name); -+ goto fail_file; ++fail_unlock: ++ if (insert_vpfn != start_vpfn) { ++ /* Invalidate the pages we have partially completed */ ++ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, &dirty_pgds, ++ NULL, true); + } + -+ INIT_LIST_HEAD(&app->suite_list); -+ app->name = name; -+ -+ return app; ++ mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info, ++ true); ++ kbase_mmu_free_pgds_list(kbdev, mmut); ++ mutex_unlock(&mmut->mmu_lock); + -+fail_file: -+ debugfs_remove_recursive(app->dir); -+fail_debugfs: -+ kfree(app); -+fail_kmalloc: -+ return NULL; ++ return err; +} -+EXPORT_SYMBOL(kutf_create_application); + -+void kutf_destroy_application(struct kutf_application *app) ++int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn, ++ struct tagged_addr phys, size_t nr, unsigned long flags, ++ int const group_id, ++ enum kbase_caller_mmu_sync_info mmu_sync_info) +{ -+ struct list_head *pos; -+ struct list_head *tmp; -+ -+ list_for_each_safe(pos, tmp, &app->suite_list) { -+ struct kutf_suite *suite; -+ -+ suite = list_entry(pos, struct kutf_suite, node); -+ kutf_destroy_suite(suite); -+ } ++ /* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */ ++ return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info, ++ false); ++} + -+ debugfs_remove_recursive(app->dir); -+ kfree(app); ++int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn, ++ struct tagged_addr phys, size_t nr, unsigned long flags, ++ int const group_id, ++ enum kbase_caller_mmu_sync_info mmu_sync_info) ++{ ++ /* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */ ++ return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info, ++ false); +} -+EXPORT_SYMBOL(kutf_destroy_application); + -+static struct kutf_context *kutf_create_context( -+ struct kutf_test_fixture *test_fix) ++static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys, ++ struct kbase_va_region *reg, ++ struct kbase_mmu_table *mmut, const u64 vpfn) +{ -+ struct kutf_context *new_context; ++ struct page *phys_page = as_page(phys); ++ struct kbase_page_metadata *page_md = kbase_page_private(phys_page); + -+ new_context = kmalloc(sizeof(*new_context), GFP_KERNEL); -+ if (!new_context) { -+ pr_err("Failed to allocate test context"); -+ goto fail_alloc; -+ } ++ spin_lock(&page_md->migrate_lock); + -+ new_context->result_set = kutf_create_result_set(); -+ if (!new_context->result_set) { -+ pr_err("Failed to create result set"); -+ goto fail_result_set; ++ /* If no GPU va region is given: the metadata provided are ++ * invalid. ++ * ++ * If the page is already allocated and mapped: this is ++ * an additional GPU mapping, probably to create a memory ++ * alias, which means it is no longer possible to migrate ++ * the page easily because tracking all the GPU mappings ++ * would be too costly. ++ * ++ * In any case: the page becomes not movable. It is kept ++ * alive, but attempts to migrate it will fail. The page ++ * will be freed if it is still not movable when it returns ++ * to a memory pool. Notice that the movable flag is not ++ * cleared because that would require taking the page lock. ++ */ ++ if (!reg || PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATED_MAPPED) { ++ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); ++ } else if (PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATE_IN_PROGRESS) { ++ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATED_MAPPED); ++ page_md->data.mapped.reg = reg; ++ page_md->data.mapped.mmut = mmut; ++ page_md->data.mapped.vpfn = vpfn; + } + -+ new_context->test_fix = test_fix; -+ /* Save the pointer to the suite as the callbacks will require it */ -+ new_context->suite = test_fix->test_func->suite; -+ new_context->status = KUTF_RESULT_UNKNOWN; -+ new_context->expected_status = KUTF_RESULT_UNKNOWN; ++ spin_unlock(&page_md->migrate_lock); ++} + -+ kutf_mempool_init(&new_context->fixture_pool); -+ new_context->fixture = NULL; -+ new_context->fixture_index = test_fix->fixture_index; -+ new_context->fixture_name = NULL; -+ new_context->test_data = test_fix->test_func->test_data; ++static void kbase_mmu_progress_migration_on_teardown(struct kbase_device *kbdev, ++ struct tagged_addr *phys, size_t requested_nr) ++{ ++ size_t i; + -+ new_context->userdata.flags = 0; -+ INIT_LIST_HEAD(&new_context->userdata.input_head); -+ init_waitqueue_head(&new_context->userdata.input_waitq); ++ for (i = 0; i < requested_nr; i++) { ++ struct page *phys_page = as_page(phys[i]); ++ struct kbase_page_metadata *page_md = kbase_page_private(phys_page); + -+ INIT_WORK(&new_context->work, kutf_run_test); ++ /* Skip the 4KB page that is part of a large page, as the large page is ++ * excluded from the migration process. ++ */ ++ if (is_huge(phys[i]) || is_partial(phys[i])) ++ continue; + -+ kref_init(&new_context->kref); ++ if (page_md) { ++ u8 status; + -+ return new_context; ++ spin_lock(&page_md->migrate_lock); ++ status = PAGE_STATUS_GET(page_md->status); + -+fail_result_set: -+ kfree(new_context); -+fail_alloc: -+ return NULL; ++ if (status == ALLOCATED_MAPPED) { ++ if (IS_PAGE_ISOLATED(page_md->status)) { ++ page_md->status = PAGE_STATUS_SET( ++ page_md->status, (u8)FREE_ISOLATED_IN_PROGRESS); ++ page_md->data.free_isolated.kbdev = kbdev; ++ /* At this point, we still have a reference ++ * to the page via its page migration metadata, ++ * and any page with the FREE_ISOLATED_IN_PROGRESS ++ * status will subsequently be freed in either ++ * kbase_page_migrate() or kbase_page_putback() ++ */ ++ phys[i] = as_tagged(0); ++ } else ++ page_md->status = PAGE_STATUS_SET(page_md->status, ++ (u8)FREE_IN_PROGRESS); ++ } ++ ++ spin_unlock(&page_md->migrate_lock); ++ } ++ } +} + -+static void kutf_destroy_context(struct kref *kref) ++u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, ++ struct tagged_addr const phy, unsigned long const flags, ++ int const level, int const group_id) +{ -+ struct kutf_context *context; ++ u64 entry; + -+ context = container_of(kref, struct kutf_context, kref); -+ kutf_destroy_result_set(context->result_set); -+ kutf_mempool_destroy(&context->fixture_pool); -+ kfree(context); ++ kbdev->mmu_mode->entry_set_ate(&entry, phy, flags, level); ++ return kbdev->mgm_dev->ops.mgm_update_gpu_pte(kbdev->mgm_dev, ++ group_id, level, entry); +} + -+static void kutf_context_get(struct kutf_context *context) ++int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, ++ const u64 start_vpfn, struct tagged_addr *phys, size_t nr, ++ unsigned long flags, int const group_id, u64 *dirty_pgds, ++ struct kbase_va_region *reg, bool ignore_page_migration) +{ -+ kref_get(&context->kref); -+} ++ phys_addr_t pgd; ++ u64 *pgd_page; ++ u64 insert_vpfn = start_vpfn; ++ size_t remain = nr; ++ int err; ++ struct kbase_mmu_mode const *mmu_mode; ++ unsigned int i; ++ phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; ++ int l, cur_level, insert_level; + -+static void kutf_context_put(struct kutf_context *context) -+{ -+ kref_put(&context->kref, kutf_destroy_context); -+} ++ /* Note that 0 is a valid start_vpfn */ ++ /* 64-bit address range is the max */ ++ KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE)); + ++ mmu_mode = kbdev->mmu_mode; + -+static void kutf_set_result(struct kutf_context *context, -+ enum kutf_result_status status) -+{ -+ context->status = status; -+} ++ /* Early out if there is nothing to do */ ++ if (nr == 0) ++ return 0; + -+static void kutf_set_expected_result(struct kutf_context *context, -+ enum kutf_result_status expected_status) -+{ -+ context->expected_status = expected_status; -+} ++ mutex_lock(&mmut->mmu_lock); + -+/** -+ * kutf_test_log_result() - Log a result for the specified test context -+ * @context: Test context -+ * @message: Result string -+ * @new_status: Result status -+ */ -+static void kutf_test_log_result( -+ struct kutf_context *context, -+ const char *message, -+ enum kutf_result_status new_status) -+{ -+ if (context->status < new_status) -+ context->status = new_status; ++ while (remain) { ++ unsigned int vindex = insert_vpfn & 0x1FF; ++ unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex; ++ struct page *p; ++ register unsigned int num_of_valid_entries; ++ bool newly_created_pgd = false; ++ enum kbase_mmu_op_type flush_op; + -+ if (context->expected_status != new_status) -+ kutf_add_result(context, new_status, message); -+} ++ if (count > remain) ++ count = remain; + -+void kutf_test_log_result_external( -+ struct kutf_context *context, -+ const char *message, -+ enum kutf_result_status new_status) -+{ -+ kutf_test_log_result(context, message, new_status); -+} -+EXPORT_SYMBOL(kutf_test_log_result_external); ++ if (!vindex && is_huge_head(*phys)) ++ cur_level = MIDGARD_MMU_LEVEL(2); ++ else ++ cur_level = MIDGARD_MMU_BOTTOMLEVEL; + -+void kutf_test_expect_abort(struct kutf_context *context) -+{ -+ kutf_set_expected_result(context, KUTF_RESULT_ABORT); -+} -+EXPORT_SYMBOL(kutf_test_expect_abort); ++ insert_level = cur_level; + -+void kutf_test_expect_fatal(struct kutf_context *context) -+{ -+ kutf_set_expected_result(context, KUTF_RESULT_FATAL); -+} -+EXPORT_SYMBOL(kutf_test_expect_fatal); ++ /* ++ * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly ++ * suboptimal. We don't have to re-parse the whole tree ++ * each time (just cache the l0-l2 sequence). ++ * On the other hand, it's only a gain when we map more than ++ * 256 pages at once (on average). Do we really care? ++ */ ++ /* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */ ++ err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level, ++ &pgd); + -+void kutf_test_expect_fail(struct kutf_context *context) -+{ -+ kutf_set_expected_result(context, KUTF_RESULT_FAIL); -+} -+EXPORT_SYMBOL(kutf_test_expect_fail); ++ if (err) { ++ dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d", ++ __func__, err); ++ goto fail_unlock; ++ } + -+void kutf_test_expect_warn(struct kutf_context *context) -+{ -+ kutf_set_expected_result(context, KUTF_RESULT_WARN); -+} -+EXPORT_SYMBOL(kutf_test_expect_warn); ++ /* No valid pgd at cur_level */ ++ if (insert_level != cur_level) { ++ /* Allocate new pgds for all missing levels from the required level ++ * down to the lowest valid pgd at insert_level ++ */ ++ err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1), ++ cur_level); ++ if (err) ++ goto fail_unlock; + -+void kutf_test_expect_pass(struct kutf_context *context) -+{ -+ kutf_set_expected_result(context, KUTF_RESULT_PASS); -+} -+EXPORT_SYMBOL(kutf_test_expect_pass); ++ newly_created_pgd = true; + -+void kutf_test_skip(struct kutf_context *context) -+{ -+ kutf_set_result(context, KUTF_RESULT_SKIP); -+ kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN); ++ new_pgds[insert_level] = pgd; + -+ kutf_test_log_result(context, "Test skipped", KUTF_RESULT_SKIP); -+} -+EXPORT_SYMBOL(kutf_test_skip); ++ /* If we didn't find an existing valid pgd at cur_level, ++ * we've now allocated one. The ATE in the next step should ++ * be inserted in this newly allocated pgd. ++ */ ++ pgd = new_pgds[cur_level]; ++ } + -+void kutf_test_skip_msg(struct kutf_context *context, const char *message) -+{ -+ kutf_set_result(context, KUTF_RESULT_SKIP); -+ kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN); ++ p = pfn_to_page(PFN_DOWN(pgd)); ++ pgd_page = kmap(p); ++ if (!pgd_page) { ++ dev_err(kbdev->dev, "%s: kmap failure", __func__); ++ err = -ENOMEM; + -+ kutf_test_log_result(context, kutf_dsprintf(&context->fixture_pool, -+ "Test skipped: %s", message), KUTF_RESULT_SKIP); -+ kutf_test_log_result(context, "!!!Test skipped!!!", KUTF_RESULT_SKIP); -+} -+EXPORT_SYMBOL(kutf_test_skip_msg); ++ goto fail_unlock_free_pgds; ++ } + -+void kutf_test_debug(struct kutf_context *context, char const *message) -+{ -+ kutf_test_log_result(context, message, KUTF_RESULT_DEBUG); -+} -+EXPORT_SYMBOL(kutf_test_debug); ++ num_of_valid_entries = ++ mmu_mode->get_num_valid_entries(pgd_page); + -+void kutf_test_pass(struct kutf_context *context, char const *message) -+{ -+ static const char explicit_message[] = "(explicit pass)"; ++ if (cur_level == MIDGARD_MMU_LEVEL(2)) { ++ int level_index = (insert_vpfn >> 9) & 0x1FF; ++ pgd_page[level_index] = ++ kbase_mmu_create_ate(kbdev, *phys, flags, cur_level, group_id); + -+ if (!message) -+ message = explicit_message; ++ num_of_valid_entries++; ++ } else { ++ for (i = 0; i < count; i++) { ++ unsigned int ofs = vindex + i; ++ u64 *target = &pgd_page[ofs]; + -+ kutf_test_log_result(context, message, KUTF_RESULT_PASS); -+} -+EXPORT_SYMBOL(kutf_test_pass); ++ /* Warn if the current page is a valid ATE ++ * entry. The page table shouldn't have anything ++ * in the place where we are trying to put a ++ * new entry. Modification to page table entries ++ * should be performed with ++ * kbase_mmu_update_pages() ++ */ ++ WARN_ON((*target & 1UL) != 0); + -+void kutf_test_info(struct kutf_context *context, char const *message) -+{ -+ kutf_test_log_result(context, message, KUTF_RESULT_INFO); -+} -+EXPORT_SYMBOL(kutf_test_info); ++ *target = kbase_mmu_create_ate(kbdev, ++ phys[i], flags, cur_level, group_id); + -+void kutf_test_warn(struct kutf_context *context, char const *message) -+{ -+ kutf_test_log_result(context, message, KUTF_RESULT_WARN); -+} -+EXPORT_SYMBOL(kutf_test_warn); ++ /* If page migration is enabled, this is the right time ++ * to update the status of the page. ++ */ ++ if (kbase_page_migration_enabled && !ignore_page_migration && ++ !is_huge(phys[i]) && !is_partial(phys[i])) ++ kbase_mmu_progress_migration_on_insert(phys[i], reg, mmut, ++ insert_vpfn + i); ++ } ++ num_of_valid_entries += count; ++ } + -+void kutf_test_fail(struct kutf_context *context, char const *message) -+{ -+ kutf_test_log_result(context, message, KUTF_RESULT_FAIL); -+} -+EXPORT_SYMBOL(kutf_test_fail); ++ mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries); + -+void kutf_test_fatal(struct kutf_context *context, char const *message) -+{ -+ kutf_test_log_result(context, message, KUTF_RESULT_FATAL); -+} -+EXPORT_SYMBOL(kutf_test_fatal); ++ if (dirty_pgds) ++ *dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : cur_level); + -+void kutf_test_abort(struct kutf_context *context) -+{ -+ kutf_test_log_result(context, "", KUTF_RESULT_ABORT); -+} -+EXPORT_SYMBOL(kutf_test_abort); ++ /* MMU cache flush operation here will depend on whether bottom level ++ * PGD is newly created or not. ++ * ++ * If bottom level PGD is newly created then no GPU cache maintenance is ++ * required as the PGD will not exist in GPU cache. Otherwise GPU cache ++ * maintenance is required for existing PGD. ++ */ ++ flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT; + -+#if IS_ENABLED(CONFIG_DEBUG_FS) ++ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (vindex * sizeof(u64)), ++ kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64), ++ flush_op); + -+/** -+ * init_kutf_core() - Module entry point. -+ * Create the base entry point in debugfs. -+ * -+ * Return: 0 on success, error code otherwise. -+ */ -+static int __init init_kutf_core(void) -+{ -+ kutf_workq = alloc_workqueue("kutf workq", WQ_UNBOUND, 1); -+ if (!kutf_workq) -+ return -ENOMEM; ++ if (newly_created_pgd) { ++ err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn, ++ new_pgds); ++ if (err) { ++ dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)", ++ __func__, err); + -+ base_dir = debugfs_create_dir("kutf_tests", NULL); -+ if (IS_ERR_OR_NULL(base_dir)) { -+ destroy_workqueue(kutf_workq); -+ kutf_workq = NULL; -+ return -ENOMEM; ++ kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count); ++ ++ kunmap(p); ++ goto fail_unlock_free_pgds; ++ } ++ } ++ ++ phys += count; ++ insert_vpfn += count; ++ remain -= count; ++ kunmap(p); + } + -+ return 0; -+} ++ mutex_unlock(&mmut->mmu_lock); + -+/** -+ * exit_kutf_core() - Module exit point. -+ * -+ * Remove the base entry point in debugfs. -+ */ -+static void __exit exit_kutf_core(void) -+{ -+ debugfs_remove_recursive(base_dir); ++ return 0; + -+ if (kutf_workq) -+ destroy_workqueue(kutf_workq); -+} ++fail_unlock_free_pgds: ++ /* Free the pgds allocated by us from insert_level+1 to bottom level */ ++ for (l = cur_level; l > insert_level; l--) ++ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]); + -+#else /* CONFIG_DEBUG_FS */ ++fail_unlock: ++ if (insert_vpfn != start_vpfn) { ++ /* Invalidate the pages we have partially completed */ ++ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, dirty_pgds, ++ phys, ignore_page_migration); ++ } + -+/** -+ * init_kutf_core - Module entry point -+ * Stub for when build against a kernel without debugfs support. -+ * -+ * Return: -ENODEV -+ */ -+static int __init init_kutf_core(void) -+{ -+ pr_debug("KUTF requires a kernel with debug fs support"); ++ mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, ++ dirty_pgds ? *dirty_pgds : 0xF, CALLER_MMU_ASYNC, true); ++ kbase_mmu_free_pgds_list(kbdev, mmut); ++ mutex_unlock(&mmut->mmu_lock); + -+ return -ENODEV; ++ return err; +} + -+/** -+ * exit_kutf_core() - Module exit point. -+ * -+ * Stub for when build against a kernel without debugfs support ++/* ++ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space ++ * number 'as_nr'. + */ -+static void __exit exit_kutf_core(void) ++int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, ++ struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr, ++ int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, ++ struct kbase_va_region *reg, bool ignore_page_migration) +{ -+} -+#endif /* CONFIG_DEBUG_FS */ ++ int err; ++ u64 dirty_pgds = 0; + -+MODULE_LICENSE("GPL"); ++ /* Early out if there is nothing to do */ ++ if (nr == 0) ++ return 0; + -+module_init(init_kutf_core); -+module_exit(exit_kutf_core); -diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_utils.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_utils.c -new file mode 100644 -index 000000000..21f5fadcc ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_utils.c -@@ -0,0 +1,75 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2014, 2017, 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++ err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, ++ &dirty_pgds, reg, ignore_page_migration); ++ if (err) ++ return err; + -+/* Kernel UTF utility functions */ ++ mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false); + -+#include -+#include -+#include -+#include ++ return 0; ++} + -+#include -+#include ++KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); + -+static char tmp_buffer[KUTF_MAX_DSPRINTF_LEN]; ++int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, ++ u64 vpfn, struct tagged_addr *phys, size_t nr, ++ unsigned long flags, int as_nr, int const group_id, ++ enum kbase_caller_mmu_sync_info mmu_sync_info, ++ struct kbase_va_region *reg) ++{ ++ int err; ++ u64 dirty_pgds = 0; + -+static DEFINE_MUTEX(buffer_lock); ++ /* Early out if there is nothing to do */ ++ if (nr == 0) ++ return 0; + -+const char *kutf_dsprintf(struct kutf_mempool *pool, -+ const char *fmt, ...) -+{ -+ va_list args; -+ int len; -+ int size; -+ void *buffer; ++ /* Imported allocations don't have metadata and therefore always ignore the ++ * page migration logic. ++ */ ++ err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, ++ &dirty_pgds, reg, true); ++ if (err) ++ return err; + -+ mutex_lock(&buffer_lock); -+ va_start(args, fmt); -+ len = vsnprintf(tmp_buffer, sizeof(tmp_buffer), fmt, args); -+ va_end(args); ++ mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false); + -+ if (len < 0) { -+ pr_err("%s: Bad format dsprintf format %s\n", __func__, fmt); -+ goto fail_format; -+ } ++ return 0; ++} + -+ if (len >= sizeof(tmp_buffer)) { -+ pr_warn("%s: Truncated dsprintf message %s\n", __func__, fmt); -+ size = sizeof(tmp_buffer); -+ } else { -+ size = len + 1; -+ } ++int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, ++ u64 vpfn, struct tagged_addr *phys, size_t nr, ++ unsigned long flags, int as_nr, int const group_id, ++ enum kbase_caller_mmu_sync_info mmu_sync_info, ++ struct kbase_va_region *reg) ++{ ++ int err; ++ u64 dirty_pgds = 0; + -+ buffer = kutf_mempool_alloc(pool, size); -+ if (!buffer) -+ goto fail_alloc; ++ /* Early out if there is nothing to do */ ++ if (nr == 0) ++ return 0; + -+ memcpy(buffer, tmp_buffer, size); -+ mutex_unlock(&buffer_lock); ++ /* Memory aliases are always built on top of existing allocations, ++ * therefore the state of physical pages shall be updated. ++ */ ++ err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, ++ &dirty_pgds, reg, false); ++ if (err) ++ return err; + -+ return buffer; ++ mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false); + -+fail_alloc: -+fail_format: -+ mutex_unlock(&buffer_lock); -+ return NULL; ++ return 0; +} -+EXPORT_SYMBOL(kutf_dsprintf); -diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/Kbuild b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/Kbuild -new file mode 100755 -index 000000000..027bc27c9 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/Kbuild -@@ -0,0 +1,25 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# -+ -+ifeq ($(CONFIG_MALI_KUTF_CLK_RATE_TRACE),y) -+obj-m += mali_kutf_clk_rate_trace_test_portal.o + -+mali_kutf_clk_rate_trace_test_portal-y := mali_kutf_clk_rate_trace_test.o -+endif -diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/build.bp b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/build.bp -new file mode 100755 -index 000000000..225ad69c5 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/build.bp -@@ -0,0 +1,43 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * -+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++void kbase_mmu_update(struct kbase_device *kbdev, ++ struct kbase_mmu_table *mmut, ++ int as_nr) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ lockdep_assert_held(&kbdev->mmu_hw_mutex); ++ KBASE_DEBUG_ASSERT(as_nr != KBASEP_AS_NR_INVALID); + -+bob_kernel_module { -+ name: "mali_kutf_clk_rate_trace_test_portal", -+ defaults: [ -+ "mali_kbase_shared_config_defaults", -+ "kernel_test_configs", -+ "kernel_test_includes", -+ ], -+ srcs: [ -+ "Kbuild", -+ "mali_kutf_clk_rate_trace_test.c", -+ "../mali_kutf_clk_rate_trace_test.h", -+ ], -+ extra_symbols: [ -+ "mali_kbase", -+ "kutf", -+ ], -+ enabled: false, -+ mali_kutf_clk_rate_trace: { -+ kbuild_options: ["CONFIG_MALI_KUTF_CLK_RATE_TRACE=y"], -+ enabled: true, -+ }, ++ kbdev->mmu_mode->update(kbdev, mmut, as_nr); +} -diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c -new file mode 100644 -index 000000000..a6f54b61d ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c -@@ -0,0 +1,965 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * -+ */ ++KBASE_EXPORT_TEST_API(kbase_mmu_update); + -+#include -+#include ++void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ lockdep_assert_held(&kbdev->mmu_hw_mutex); + -+#include -+#include -+#include -+#include -+#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) -+#include -+#else -+#include -+#endif -+#include "mali_kbase.h" -+#include "backend/gpu/mali_kbase_irq_internal.h" -+#include "backend/gpu/mali_kbase_pm_internal.h" -+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" ++ kbdev->mmu_mode->disable_as(kbdev, as_nr); ++} + -+#include -+#include -+#include -+#include ++void kbase_mmu_disable(struct kbase_context *kctx) ++{ ++ /* Calls to this function are inherently asynchronous, with respect to ++ * MMU operations. ++ */ ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_mmu_hw_op_param op_param = { 0 }; ++ int lock_err, flush_err; + -+#include "../mali_kutf_clk_rate_trace_test.h" ++ /* ASSERT that the context has a valid as_nr, which is only the case ++ * when it's scheduled in. ++ * ++ * as_nr won't change because the caller has the hwaccess_lock ++ */ ++ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + -+#define MINOR_FOR_FIRST_KBASE_DEV (-1) ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); + -+/* KUTF test application pointer for this test */ -+static struct kutf_application *kutf_app; ++ op_param.vpfn = 0; ++ op_param.nr = ~0; ++ op_param.op = KBASE_MMU_OP_FLUSH_MEM; ++ op_param.kctx_id = kctx->id; ++ op_param.mmu_sync_info = mmu_sync_info; + -+enum portal_server_state { -+ PORTAL_STATE_NO_CLK, -+ PORTAL_STATE_LIVE, -+ PORTAL_STATE_CLOSING, -+}; ++#if MALI_USE_CSF ++ /* 0xF value used to prevent skipping of any levels when flushing */ ++ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) ++ op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF); ++#endif + -+/** -+ * struct clk_trace_snapshot - Trace info data on a clock. -+ * @previous_rate: Snapshot start point clock rate. -+ * @current_rate: End point clock rate. It becomes the start rate of the -+ * next trace snapshot. -+ * @rate_up_cnt: Count in the snapshot duration when the clock trace -+ * write is a rate of higher value than the last. -+ * @rate_down_cnt: Count in the snapshot duration when the clock trace write -+ * is a rate of lower value than the last. -+ */ -+struct clk_trace_snapshot { -+ unsigned long previous_rate; -+ unsigned long current_rate; -+ u32 rate_up_cnt; -+ u32 rate_down_cnt; -+}; ++ /* lock MMU to prevent existing jobs on GPU from executing while the AS is ++ * not yet disabled ++ */ ++ lock_err = kbase_mmu_hw_do_lock(kbdev, &kbdev->as[kctx->as_nr], &op_param); ++ if (lock_err) ++ dev_err(kbdev->dev, "Failed to lock AS %d for ctx %d_%d", kctx->as_nr, kctx->tgid, ++ kctx->id); + -+/** -+ * struct kutf_clk_rate_trace_fixture_data - Fixture data for the test. -+ * @kbdev: kbase device for the GPU. -+ * @listener: Clock rate change listener structure. -+ * @invoke_notify: When true, invoke notify command is being executed. -+ * @snapshot: Clock trace update snapshot data array. A snapshot -+ * for each clock contains info accumulated beteen two -+ * GET_TRACE_SNAPSHOT requests. -+ * @nclks: Number of clocks visible to the trace portal. -+ * @pm_ctx_cnt: Net count of PM (Power Management) context INC/DEC -+ * PM_CTX_CNT requests made to the portal. On change from -+ * 0 to 1 (INC), or, 1 to 0 (DEC), a PM context action is -+ * triggered. -+ * @total_update_cnt: Total number of received trace write callbacks. -+ * @server_state: Portal server operational state. -+ * @result_msg: Message for the test result. -+ * @test_status: Portal test reslt status. -+ */ -+struct kutf_clk_rate_trace_fixture_data { -+ struct kbase_device *kbdev; -+ struct kbase_clk_rate_listener listener; -+ bool invoke_notify; -+ struct clk_trace_snapshot snapshot[BASE_MAX_NR_CLOCKS_REGULATORS]; -+ unsigned int nclks; -+ unsigned int pm_ctx_cnt; -+ unsigned int total_update_cnt; -+ enum portal_server_state server_state; -+ char const *result_msg; -+ enum kutf_result_status test_status; -+}; ++ /* Issue the flush command only when L2 cache is in stable power on state. ++ * Any other state for L2 cache implies that shader cores are powered off, ++ * which in turn implies there is no execution happening on the GPU. ++ */ ++ if (kbdev->pm.backend.l2_state == KBASE_L2_ON) { ++ flush_err = kbase_gpu_cache_flush_and_busy_wait(kbdev, ++ GPU_COMMAND_CACHE_CLN_INV_L2_LSC); ++ if (flush_err) ++ dev_err(kbdev->dev, ++ "Failed to flush GPU cache when disabling AS %d for ctx %d_%d", ++ kctx->as_nr, kctx->tgid, kctx->id); ++ } ++ kbdev->mmu_mode->disable_as(kbdev, kctx->as_nr); + -+struct clk_trace_portal_input { -+ struct kutf_helper_named_val cmd_input; -+ enum kbasep_clk_rate_trace_req portal_cmd; -+ int named_val_err; -+}; ++ if (!lock_err) { ++ /* unlock the MMU to allow it to resume */ ++ lock_err = ++ kbase_mmu_hw_do_unlock_no_addr(kbdev, &kbdev->as[kctx->as_nr], &op_param); ++ if (lock_err) ++ dev_err(kbdev->dev, "Failed to unlock AS %d for ctx %d_%d", kctx->as_nr, ++ kctx->tgid, kctx->id); ++ } + -+struct kbasep_cmd_name_pair { -+ enum kbasep_clk_rate_trace_req cmd; -+ const char *name; -+}; ++#if !MALI_USE_CSF ++ /* ++ * JM GPUs has some L1 read only caches that need to be invalidated ++ * with START_FLUSH configuration. Purge the MMU disabled kctx from ++ * the slot_rb tracking field so such invalidation is performed when ++ * a new katom is executed on the affected slots. ++ */ ++ kbase_backend_slot_kctx_purge_locked(kbdev, kctx); ++#endif ++} ++KBASE_EXPORT_TEST_API(kbase_mmu_disable); + -+static const struct kbasep_cmd_name_pair kbasep_portal_cmd_name_map[] = { -+ { PORTAL_CMD_GET_PLATFORM, GET_PLATFORM }, -+ { PORTAL_CMD_GET_CLK_RATE_MGR, GET_CLK_RATE_MGR }, -+ { PORTAL_CMD_GET_CLK_RATE_TRACE, GET_CLK_RATE_TRACE }, -+ { PORTAL_CMD_GET_TRACE_SNAPSHOT, GET_TRACE_SNAPSHOT }, -+ { PORTAL_CMD_INC_PM_CTX_CNT, INC_PM_CTX_CNT }, -+ { PORTAL_CMD_DEC_PM_CTX_CNT, DEC_PM_CTX_CNT }, -+ { PORTAL_CMD_CLOSE_PORTAL, CLOSE_PORTAL }, -+ { PORTAL_CMD_INVOKE_NOTIFY_42KHZ, INVOKE_NOTIFY_42KHZ }, -+}; ++static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, ++ struct kbase_mmu_table *mmut, phys_addr_t *pgds, ++ u64 vpfn, int level, ++ enum kbase_mmu_op_type flush_op, u64 *dirty_pgds) ++{ ++ int current_level; + -+/* Global pointer for the kutf_portal_trace_write() to use. When -+ * this pointer is engaged, new requests for create fixture will fail -+ * hence limiting the use of the portal at any time to a singleton. -+ */ -+static struct kutf_clk_rate_trace_fixture_data *g_ptr_portal_data; ++ lockdep_assert_held(&mmut->mmu_lock); + -+#define PORTAL_MSG_LEN (KUTF_MAX_LINE_LENGTH - MAX_REPLY_NAME_LEN) -+static char portal_msg_buf[PORTAL_MSG_LEN]; ++ for (current_level = level - 1; current_level >= MIDGARD_MMU_LEVEL(0); ++ current_level--) { ++ phys_addr_t current_pgd = pgds[current_level]; ++ struct page *p = phys_to_page(current_pgd); ++ u64 *current_page = kmap(p); ++ unsigned int current_valid_entries = ++ kbdev->mmu_mode->get_num_valid_entries(current_page); ++ int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FF; + -+static void kutf_portal_trace_write( -+ struct kbase_clk_rate_listener *listener, -+ u32 index, u32 new_rate) -+{ -+ struct clk_trace_snapshot *snapshot; -+ struct kutf_clk_rate_trace_fixture_data *data; ++ /* We need to track every level that needs updating */ ++ if (dirty_pgds) ++ *dirty_pgds |= 1ULL << current_level; + -+ if (listener == NULL) { -+ pr_err("%s - index: %u, new_rate: %u, listener is NULL\n", -+ __func__, index, new_rate); -+ return; -+ } ++ kbdev->mmu_mode->entries_invalidate(¤t_page[index], 1); ++ if (current_valid_entries == 1 && ++ current_level != MIDGARD_MMU_LEVEL(0)) { ++ kunmap(p); + -+ data = container_of(listener, struct kutf_clk_rate_trace_fixture_data, -+ listener); ++ /* Ensure the cacheline containing the last valid entry ++ * of PGD is invalidated from the GPU cache, before the ++ * PGD page is freed. ++ */ ++ kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, ++ current_pgd + (index * sizeof(u64)), ++ sizeof(u64), flush_op); + -+ lockdep_assert_held(&data->kbdev->pm.clk_rtm.lock); ++ kbase_mmu_add_to_free_pgds_list(mmut, p); ++ } else { ++ current_valid_entries--; + -+ if (WARN_ON(g_ptr_portal_data == NULL)) -+ return; -+ if (WARN_ON(index >= g_ptr_portal_data->nclks)) -+ return; ++ kbdev->mmu_mode->set_num_valid_entries( ++ current_page, current_valid_entries); + -+ /* This callback is triggered by invoke notify command, skipping */ -+ if (data->invoke_notify) -+ return; ++ kunmap(p); + -+ snapshot = &g_ptr_portal_data->snapshot[index]; -+ if (new_rate > snapshot->current_rate) -+ snapshot->rate_up_cnt++; -+ else -+ snapshot->rate_down_cnt++; -+ snapshot->current_rate = new_rate; -+ g_ptr_portal_data->total_update_cnt++; ++ kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)), ++ kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64), ++ flush_op); ++ break; ++ } ++ } +} + -+static void kutf_set_pm_ctx_active(struct kutf_context *context) ++/** ++ * mmu_flush_invalidate_teardown_pages() - Perform flush operation after unmapping pages. ++ * ++ * @kbdev: Pointer to kbase device. ++ * @kctx: Pointer to kbase context. ++ * @as_nr: Address space number, for GPU cache maintenance operations ++ * that happen outside a specific kbase context. ++ * @phys: Array of physical pages to flush. ++ * @phys_page_nr: Number of physical pages to flush. ++ * @op_param: Non-NULL pointer to struct containing information about the flush ++ * operation to perform. ++ * ++ * This function will do one of three things: ++ * 1. Invalidate the MMU caches, followed by a partial GPU cache flush of the ++ * individual pages that were unmapped if feature is supported on GPU. ++ * 2. Perform a full GPU cache flush through the GPU_CONTROL interface if feature is ++ * supported on GPU or, ++ * 3. Perform a full GPU cache flush through the MMU_CONTROL interface. ++ * ++ * When performing a partial GPU cache flush, the number of physical ++ * pages does not have to be identical to the number of virtual pages on the MMU, ++ * to support a single physical address flush for an aliased page. ++ */ ++static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, ++ struct kbase_context *kctx, int as_nr, ++ struct tagged_addr *phys, size_t phys_page_nr, ++ struct kbase_mmu_hw_op_param *op_param) +{ -+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { ++ /* Full cache flush through the MMU_COMMAND */ ++ mmu_flush_invalidate(kbdev, kctx, as_nr, op_param); ++ } else if (op_param->op == KBASE_MMU_OP_FLUSH_MEM) { ++ /* Full cache flush through the GPU_CONTROL */ ++ mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, op_param); ++ } ++#if MALI_USE_CSF ++ else { ++ /* Partial GPU cache flush with MMU cache invalidation */ ++ unsigned long irq_flags; ++ unsigned int i; ++ bool flush_done = false; + -+ if (WARN_ON(data->pm_ctx_cnt != 1)) -+ return; ++ mmu_invalidate(kbdev, kctx, as_nr, op_param); + -+ kbase_pm_context_active(data->kbdev); -+ kbase_pm_wait_for_desired_state(data->kbdev); -+#if !MALI_USE_CSF -+ kbase_pm_request_gpu_cycle_counter(data->kbdev); ++ for (i = 0; !flush_done && i < phys_page_nr; i++) { ++ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); ++ if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) ++ mmu_flush_pa_range(kbdev, as_phys_addr_t(phys[i]), PAGE_SIZE, ++ KBASE_MMU_OP_FLUSH_MEM); ++ else ++ flush_done = true; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++ } ++ } +#endif +} + -+static void kutf_set_pm_ctx_idle(struct kutf_context *context) ++static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, ++ u64 vpfn, size_t nr, u64 *dirty_pgds, ++ struct list_head *free_pgds_list, ++ enum kbase_mmu_op_type flush_op) +{ -+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode; + -+ if (WARN_ON(data->pm_ctx_cnt > 0)) -+ return; -+#if !MALI_USE_CSF -+ kbase_pm_release_gpu_cycle_counter(data->kbdev); -+#endif -+ kbase_pm_context_idle(data->kbdev); -+} ++ lockdep_assert_held(&mmut->mmu_lock); ++ kbase_mmu_reset_free_pgds_list(mmut); + -+static const char *kutf_clk_trace_do_change_pm_ctx(struct kutf_context *context, -+ struct clk_trace_portal_input *cmd) -+{ -+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; -+ int seq = cmd->cmd_input.u.val_u64 & 0xFF; -+ const unsigned int cnt = data->pm_ctx_cnt; -+ const enum kbasep_clk_rate_trace_req req = cmd->portal_cmd; -+ char const *errmsg = NULL; ++ while (nr) { ++ unsigned int index = vpfn & 0x1FF; ++ unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; ++ unsigned int pcount; ++ int level; ++ u64 *page; ++ phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; ++ register unsigned int num_of_valid_entries; ++ phys_addr_t pgd = mmut->pgd; ++ struct page *p = phys_to_page(pgd); + -+ WARN_ON(req != PORTAL_CMD_INC_PM_CTX_CNT && -+ req != PORTAL_CMD_DEC_PM_CTX_CNT); ++ if (count > nr) ++ count = nr; + -+ if (req == PORTAL_CMD_INC_PM_CTX_CNT && cnt < UINT_MAX) { -+ data->pm_ctx_cnt++; -+ if (data->pm_ctx_cnt == 1) -+ kutf_set_pm_ctx_active(context); -+ } ++ /* need to check if this is a 2MB page or a 4kB */ ++ for (level = MIDGARD_MMU_TOPLEVEL; ++ level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { ++ phys_addr_t next_pgd; + -+ if (req == PORTAL_CMD_DEC_PM_CTX_CNT && cnt > 0) { -+ data->pm_ctx_cnt--; -+ if (data->pm_ctx_cnt == 0) -+ kutf_set_pm_ctx_idle(context); -+ } ++ index = (vpfn >> ((3 - level) * 9)) & 0x1FF; ++ page = kmap(p); ++ if (mmu_mode->ate_is_valid(page[index], level)) ++ break; /* keep the mapping */ ++ else if (!mmu_mode->pte_is_valid(page[index], level)) { ++ /* nothing here, advance */ ++ switch (level) { ++ case MIDGARD_MMU_LEVEL(0): ++ count = 134217728; ++ break; ++ case MIDGARD_MMU_LEVEL(1): ++ count = 262144; ++ break; ++ case MIDGARD_MMU_LEVEL(2): ++ count = 512; ++ break; ++ case MIDGARD_MMU_LEVEL(3): ++ count = 1; ++ break; ++ } ++ if (count > nr) ++ count = nr; ++ goto next; ++ } ++ next_pgd = mmu_mode->pte_to_phy_addr( ++ kbdev->mgm_dev->ops.mgm_pte_to_original_pte( ++ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[index])); ++ kunmap(p); ++ pgds[level] = pgd; ++ pgd = next_pgd; ++ p = phys_to_page(pgd); ++ } + -+ /* Skip the length check, no chance of overflow for two ints */ -+ snprintf(portal_msg_buf, PORTAL_MSG_LEN, -+ "{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt); ++ switch (level) { ++ case MIDGARD_MMU_LEVEL(0): ++ case MIDGARD_MMU_LEVEL(1): ++ dev_warn(kbdev->dev, "%s: No support for ATEs at level %d", __func__, ++ level); ++ kunmap(p); ++ goto out; ++ case MIDGARD_MMU_LEVEL(2): ++ /* can only teardown if count >= 512 */ ++ if (count >= 512) { ++ pcount = 1; ++ } else { ++ dev_warn( ++ kbdev->dev, ++ "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down", ++ __func__, count); ++ pcount = 0; ++ } ++ break; ++ case MIDGARD_MMU_BOTTOMLEVEL: ++ /* page count is the same as the logical count */ ++ pcount = count; ++ break; ++ default: ++ dev_err(kbdev->dev, "%s: found non-mapped memory, early out", __func__); ++ vpfn += count; ++ nr -= count; ++ continue; ++ } + -+ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { -+ pr_warn("Error in sending ack for adjusting pm_ctx_cnt\n"); -+ errmsg = kutf_dsprintf(&context->fixture_pool, -+ "Error in sending ack for adjusting pm_ctx_cnt"); -+ } ++ if (pcount > 0) ++ *dirty_pgds |= 1ULL << level; + -+ return errmsg; -+} ++ num_of_valid_entries = mmu_mode->get_num_valid_entries(page); ++ if (WARN_ON_ONCE(num_of_valid_entries < pcount)) ++ num_of_valid_entries = 0; ++ else ++ num_of_valid_entries -= pcount; + -+static const char *kutf_clk_trace_do_get_rate(struct kutf_context *context, -+ struct clk_trace_portal_input *cmd) -+{ -+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; -+ struct kbase_device *kbdev = data->kbdev; -+ int seq = cmd->cmd_input.u.val_u64 & 0xFF; -+ unsigned long rate; -+ bool idle; -+ int ret; -+ int i; -+ char const *errmsg = NULL; ++ /* Invalidate the entries we added */ ++ mmu_mode->entries_invalidate(&page[index], pcount); + -+ WARN_ON((cmd->portal_cmd != PORTAL_CMD_GET_CLK_RATE_MGR) && -+ (cmd->portal_cmd != PORTAL_CMD_GET_CLK_RATE_TRACE)); ++ if (!num_of_valid_entries) { ++ kunmap(p); + -+ ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, -+ "{SEQ:%d, RATE:[", seq); ++ /* Ensure the cacheline(s) containing the last valid entries ++ * of PGD is invalidated from the GPU cache, before the ++ * PGD page is freed. ++ */ ++ kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, ++ pgd + (index * sizeof(u64)), ++ pcount * sizeof(u64), flush_op); + -+ for (i = 0; i < data->nclks; i++) { -+ spin_lock(&kbdev->pm.clk_rtm.lock); -+ if (cmd->portal_cmd == PORTAL_CMD_GET_CLK_RATE_MGR) -+ rate = kbdev->pm.clk_rtm.clks[i]->clock_val; -+ else -+ rate = data->snapshot[i].current_rate; -+ idle = kbdev->pm.clk_rtm.gpu_idle; -+ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ kbase_mmu_add_to_free_pgds_list(mmut, p); + -+ if ((i + 1) == data->nclks) -+ ret += snprintf(portal_msg_buf + ret, -+ PORTAL_MSG_LEN - ret, "0x%lx], GPU_IDLE:%d}", -+ rate, idle); -+ else -+ ret += snprintf(portal_msg_buf + ret, -+ PORTAL_MSG_LEN - ret, "0x%lx, ", rate); ++ kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, ++ flush_op, dirty_pgds); + -+ if (ret >= PORTAL_MSG_LEN) { -+ pr_warn("Message buf overflow with rate array data\n"); -+ return kutf_dsprintf(&context->fixture_pool, -+ "Message buf overflow with rate array data"); ++ vpfn += count; ++ nr -= count; ++ continue; + } -+ } + -+ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { -+ pr_warn("Error in sending back rate array\n"); -+ errmsg = kutf_dsprintf(&context->fixture_pool, -+ "Error in sending rate array"); -+ } ++ mmu_mode->set_num_valid_entries(page, num_of_valid_entries); + -+ return errmsg; ++ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), ++ kbase_dma_addr(p) + (index * sizeof(u64)), pcount * sizeof(u64), ++ flush_op); ++next: ++ kunmap(p); ++ vpfn += count; ++ nr -= count; ++ } ++out: ++ return 0; +} + -+/** -+ * kutf_clk_trace_do_get_snapshot() - Send back the current snapshot -+ * @context: KUTF context -+ * @cmd: The decoded portal input request -+ * -+ * The accumulated clock rate trace information is kept inside as an snapshot -+ * record. A user request of getting the snapshot marks the closure of the -+ * current snapshot record, and the start of the next one. The response -+ * message contains the current snapshot record, with each clock's -+ * data sequentially placed inside (array marker) [ ]. -+ * -+ * Return: generated string -+ */ -+static const char *kutf_clk_trace_do_get_snapshot(struct kutf_context *context, -+ struct clk_trace_portal_input *cmd) ++int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, ++ struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages, ++ int as_nr, bool ignore_page_migration) +{ -+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; -+ struct clk_trace_snapshot snapshot; -+ int seq = cmd->cmd_input.u.val_u64 & 0xFF; -+ int ret; -+ int i; -+ char const *fmt; -+ char const *errmsg = NULL; ++ u64 start_vpfn = vpfn; ++ enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; ++ struct kbase_mmu_hw_op_param op_param; ++ int err = -EFAULT; ++ u64 dirty_pgds = 0; ++ LIST_HEAD(free_pgds_list); + -+ WARN_ON(cmd->portal_cmd != PORTAL_CMD_GET_TRACE_SNAPSHOT); ++ /* Calls to this function are inherently asynchronous, with respect to ++ * MMU operations. ++ */ ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + -+ ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, -+ "{SEQ:%d, SNAPSHOT_ARRAY:[", seq); ++ /* This function performs two operations: MMU maintenance and flushing ++ * the caches. To ensure internal consistency between the caches and the ++ * MMU, it does not make sense to be able to flush only the physical pages ++ * from the cache and keep the PTE, nor does it make sense to use this ++ * function to remove a PTE and keep the physical pages in the cache. ++ * ++ * However, we have legitimate cases where we can try to tear down a mapping ++ * with zero virtual and zero physical pages, so we must have the following ++ * behaviour: ++ * - if both physical and virtual page counts are zero, return early ++ * - if either physical and virtual page counts are zero, return early ++ * - if there are fewer physical pages than virtual pages, return -EINVAL ++ */ ++ if (unlikely(nr_virt_pages == 0 || nr_phys_pages == 0)) ++ return 0; + -+ for (i = 0; i < data->nclks; i++) { -+ spin_lock(&data->kbdev->pm.clk_rtm.lock); -+ /* copy out the snapshot of the clock */ -+ snapshot = data->snapshot[i]; -+ /* Set the next snapshot start condition */ -+ data->snapshot[i].previous_rate = snapshot.current_rate; -+ data->snapshot[i].rate_up_cnt = 0; -+ data->snapshot[i].rate_down_cnt = 0; -+ spin_unlock(&data->kbdev->pm.clk_rtm.lock); ++ if (unlikely(nr_virt_pages < nr_phys_pages)) ++ return -EINVAL; + -+ /* Check i corresponding to the last clock */ -+ if ((i + 1) == data->nclks) -+ fmt = "(0x%lx, 0x%lx, %u, %u)]}"; -+ else -+ fmt = "(0x%lx, 0x%lx, %u, %u), "; -+ ret += snprintf(portal_msg_buf + ret, PORTAL_MSG_LEN - ret, -+ fmt, snapshot.previous_rate, snapshot.current_rate, -+ snapshot.rate_up_cnt, snapshot.rate_down_cnt); -+ if (ret >= PORTAL_MSG_LEN) { -+ pr_warn("Message buf overflow with snapshot data\n"); -+ return kutf_dsprintf(&context->fixture_pool, -+ "Message buf overflow with snapshot data"); -+ } -+ } ++ /* MMU cache flush strategy depends on the number of pages to unmap. In both cases ++ * the operation is invalidate but the granularity of cache maintenance may change ++ * according to the situation. ++ * ++ * If GPU control command operations are present and the number of pages is "small", ++ * then the optimal strategy is flushing on the physical address range of the pages ++ * which are affected by the operation. That implies both the PGDs which are modified ++ * or removed from the page table and the physical pages which are freed from memory. ++ * ++ * Otherwise, there's no alternative to invalidating the whole GPU cache. ++ */ ++ if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys && ++ nr_phys_pages <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES) ++ flush_op = KBASE_MMU_OP_FLUSH_PT; + -+ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { -+ pr_warn("Error in sending back snapshot array\n"); -+ errmsg = kutf_dsprintf(&context->fixture_pool, -+ "Error in sending snapshot array"); -+ } ++ mutex_lock(&mmut->mmu_lock); + -+ return errmsg; ++ err = kbase_mmu_teardown_pgd_pages(kbdev, mmut, vpfn, nr_virt_pages, &dirty_pgds, ++ &free_pgds_list, flush_op); ++ ++ /* Set up MMU operation parameters. See above about MMU cache flush strategy. */ ++ op_param = (struct kbase_mmu_hw_op_param){ ++ .vpfn = start_vpfn, ++ .nr = nr_virt_pages, ++ .mmu_sync_info = mmu_sync_info, ++ .kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF, ++ .op = (flush_op == KBASE_MMU_OP_FLUSH_PT) ? KBASE_MMU_OP_FLUSH_PT : ++ KBASE_MMU_OP_FLUSH_MEM, ++ .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds), ++ }; ++ mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, nr_phys_pages, ++ &op_param); ++ ++ /* If page migration is enabled: the status of all physical pages involved ++ * shall be updated, unless they are not movable. Their status shall be ++ * updated before releasing the lock to protect against concurrent ++ * requests to migrate the pages, if they have been isolated. ++ */ ++ if (kbase_page_migration_enabled && phys && !ignore_page_migration) ++ kbase_mmu_progress_migration_on_teardown(kbdev, phys, nr_phys_pages); ++ ++ kbase_mmu_free_pgds_list(kbdev, mmut); ++ ++ mutex_unlock(&mmut->mmu_lock); ++ ++ return err; +} ++KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); + +/** -+ * kutf_clk_trace_do_invoke_notify_42k() - Invokes the stored notification callback -+ * @context: KUTF context -+ * @cmd: The decoded portal input request ++ * kbase_mmu_update_pages_no_flush() - Update phy pages and attributes data in GPU ++ * page table entries + * -+ * Invokes frequency change notification callbacks with a fake -+ * GPU frequency 42 kHz for the top clock domain. ++ * @kbdev: Pointer to kbase device. ++ * @mmut: The involved MMU table ++ * @vpfn: Virtual PFN (Page Frame Number) of the first page to update ++ * @phys: Pointer to the array of tagged physical addresses of the physical ++ * pages that are pointed to by the page table entries (that need to ++ * be updated). The pointer should be within the reg->gpu_alloc->pages ++ * array. ++ * @nr: Number of pages to update ++ * @flags: Flags ++ * @group_id: The physical memory group in which the page was allocated. ++ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * @dirty_pgds: Flags to track every level where a PGD has been updated. + * -+ * Return: generated string ++ * This will update page table entries that already exist on the GPU based on ++ * new flags and replace any existing phy pages that are passed (the PGD pages ++ * remain unchanged). It is used as a response to the changes of phys as well ++ * as the the memory attributes. ++ * ++ * The caller is responsible for validating the memory attributes. ++ * ++ * Return: 0 if the attributes data in page table entries were updated ++ * successfully, otherwise an error code. + */ -+static const char *kutf_clk_trace_do_invoke_notify_42k( -+ struct kutf_context *context, -+ struct clk_trace_portal_input *cmd) ++static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, ++ u64 vpfn, struct tagged_addr *phys, size_t nr, ++ unsigned long flags, int const group_id, u64 *dirty_pgds) +{ -+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; -+ int seq = cmd->cmd_input.u.val_u64 & 0xFF; -+ const unsigned long new_rate_hz = 42000; -+ int ret; -+ char const *errmsg = NULL; -+ struct kbase_clk_rate_trace_manager *clk_rtm = &data->kbdev->pm.clk_rtm; ++ phys_addr_t pgd; ++ u64 *pgd_page; ++ int err; + -+ WARN_ON(cmd->portal_cmd != PORTAL_CMD_INVOKE_NOTIFY_42KHZ); ++ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + -+ spin_lock(&clk_rtm->lock); ++ /* Early out if there is nothing to do */ ++ if (nr == 0) ++ return 0; + -+ data->invoke_notify = true; -+ kbase_clk_rate_trace_manager_notify_all( -+ clk_rtm, 0, new_rate_hz); -+ data->invoke_notify = false; ++ mutex_lock(&mmut->mmu_lock); + -+ spin_unlock(&clk_rtm->lock); ++ while (nr) { ++ unsigned int i; ++ unsigned int index = vpfn & 0x1FF; ++ size_t count = KBASE_MMU_PAGE_ENTRIES - index; ++ struct page *p; ++ register unsigned int num_of_valid_entries; ++ int cur_level = MIDGARD_MMU_BOTTOMLEVEL; + -+ ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, -+ "{SEQ:%d, HZ:%lu}", seq, new_rate_hz); ++ if (count > nr) ++ count = nr; + -+ if (ret >= PORTAL_MSG_LEN) { -+ pr_warn("Message buf overflow with invoked data\n"); -+ return kutf_dsprintf(&context->fixture_pool, -+ "Message buf overflow with invoked data"); -+ } ++ if (is_huge(*phys) && (index == index_in_large_page(*phys))) ++ cur_level = MIDGARD_MMU_LEVEL(2); + -+ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { -+ pr_warn("Error in sending ack for " INVOKE_NOTIFY_42KHZ "request\n"); -+ errmsg = kutf_dsprintf(&context->fixture_pool, -+ "Error in sending ack for " INVOKE_NOTIFY_42KHZ "request"); -+ } ++ err = mmu_get_pgd_at_level(kbdev, mmut, vpfn, cur_level, &pgd); ++ if (WARN_ON(err)) ++ goto fail_unlock; + -+ return errmsg; -+} ++ p = pfn_to_page(PFN_DOWN(pgd)); ++ pgd_page = kmap(p); ++ if (!pgd_page) { ++ dev_warn(kbdev->dev, "kmap failure on update_pages"); ++ err = -ENOMEM; ++ goto fail_unlock; ++ } + -+static const char *kutf_clk_trace_do_close_portal(struct kutf_context *context, -+ struct clk_trace_portal_input *cmd) -+{ -+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; -+ int seq = cmd->cmd_input.u.val_u64 & 0xFF; -+ char const *errmsg = NULL; ++ num_of_valid_entries = ++ kbdev->mmu_mode->get_num_valid_entries(pgd_page); + -+ WARN_ON(cmd->portal_cmd != PORTAL_CMD_CLOSE_PORTAL); ++ if (cur_level == MIDGARD_MMU_LEVEL(2)) { ++ int level_index = (vpfn >> 9) & 0x1FF; ++ struct tagged_addr *target_phys = ++ phys - index_in_large_page(*phys); + -+ data->server_state = PORTAL_STATE_CLOSING; ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid( ++ pgd_page[level_index], MIDGARD_MMU_LEVEL(2))); ++#endif ++ pgd_page[level_index] = kbase_mmu_create_ate(kbdev, ++ *target_phys, flags, MIDGARD_MMU_LEVEL(2), ++ group_id); ++ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (level_index * sizeof(u64)), ++ kbase_dma_addr(p) + (level_index * sizeof(u64)), ++ sizeof(u64), KBASE_MMU_OP_NONE); ++ } else { ++ for (i = 0; i < count; i++) { ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid( ++ pgd_page[index + i], ++ MIDGARD_MMU_BOTTOMLEVEL)); ++#endif ++ pgd_page[index + i] = kbase_mmu_create_ate(kbdev, ++ phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL, ++ group_id); ++ } + -+ /* Skip the length check, no chance of overflow for two ints */ -+ snprintf(portal_msg_buf, PORTAL_MSG_LEN, -+ "{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt); ++ /* MMU cache flush strategy is NONE because GPU cache maintenance ++ * will be done by the caller. ++ */ ++ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), ++ kbase_dma_addr(p) + (index * sizeof(u64)), ++ count * sizeof(u64), KBASE_MMU_OP_NONE); ++ } + -+ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { -+ pr_warn("Error in sending ack for " CLOSE_PORTAL "reuquest\n"); -+ errmsg = kutf_dsprintf(&context->fixture_pool, -+ "Error in sending ack for " CLOSE_PORTAL "reuquest"); ++ kbdev->mmu_mode->set_num_valid_entries(pgd_page, ++ num_of_valid_entries); ++ ++ if (dirty_pgds && count > 0) ++ *dirty_pgds |= 1ULL << cur_level; ++ ++ phys += count; ++ vpfn += count; ++ nr -= count; ++ ++ kunmap(p); + } + -+ return errmsg; ++ mutex_unlock(&mmut->mmu_lock); ++ return 0; ++ ++fail_unlock: ++ mutex_unlock(&mmut->mmu_lock); ++ return err; +} + -+/** -+ * kutf_clk_trace_do_get_platform() - Gets platform information -+ * @context: KUTF context -+ * @cmd: The decoded portal input request -+ * -+ * Checks the gpu node in the device tree to see if arbitration is enabled -+ * If so determines device tree whether platform is PV or PTM -+ * -+ * Return: A string to indicate the platform (PV/PTM/GPU/UNKNOWN) -+ */ -+static const char *kutf_clk_trace_do_get_platform( -+ struct kutf_context *context, -+ struct clk_trace_portal_input *cmd) ++static int kbase_mmu_update_pages_common(struct kbase_device *kbdev, struct kbase_context *kctx, ++ u64 vpfn, struct tagged_addr *phys, size_t nr, ++ unsigned long flags, int const group_id) +{ -+ int seq = cmd->cmd_input.u.val_u64 & 0xFF; -+ char const *errmsg = NULL; -+ const void *arbiter_if_node = NULL; -+ const void *power_node = NULL; -+ const char *platform = "GPU"; -+#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) -+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ int err; ++ struct kbase_mmu_hw_op_param op_param; ++ u64 dirty_pgds = 0; ++ struct kbase_mmu_table *mmut; ++ /* Calls to this function are inherently asynchronous, with respect to ++ * MMU operations. ++ */ ++ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; ++ int as_nr; + -+ arbiter_if_node = -+ of_get_property(data->kbdev->dev->of_node, "arbiter_if", NULL); -+#endif -+ if (arbiter_if_node) { -+ power_node = of_find_compatible_node(NULL, NULL, -+ "arm,mali-gpu-power"); -+ if (power_node) { -+ platform = "PV"; -+ } else { -+ power_node = of_find_compatible_node(NULL, NULL, -+ "arm,mali-ptm"); -+ if (power_node) -+ platform = "PTM"; -+ else -+ platform = "UNKNOWN"; -+ } ++#if !MALI_USE_CSF ++ if (unlikely(kctx == NULL)) ++ return -EINVAL; ++ ++ as_nr = kctx->as_nr; ++ mmut = &kctx->mmu; ++#else ++ if (kctx) { ++ mmut = &kctx->mmu; ++ as_nr = kctx->as_nr; + } else { -+ platform = "GPU"; ++ mmut = &kbdev->csf.mcu_mmu; ++ as_nr = MCU_AS_NR; + } ++#endif + -+ pr_debug("%s - platform is %s\n", __func__, platform); -+ snprintf(portal_msg_buf, PORTAL_MSG_LEN, -+ "{SEQ:%d, PLATFORM:%s}", seq, platform); ++ err = kbase_mmu_update_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, ++ &dirty_pgds); + -+ WARN_ON(cmd->portal_cmd != PORTAL_CMD_GET_PLATFORM); ++ op_param = (const struct kbase_mmu_hw_op_param){ ++ .vpfn = vpfn, ++ .nr = nr, ++ .op = KBASE_MMU_OP_FLUSH_MEM, ++ .kctx_id = kctx ? kctx->id : 0xFFFFFFFF, ++ .mmu_sync_info = mmu_sync_info, ++ .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds), ++ }; + -+ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { -+ pr_warn("Error in sending ack for " CLOSE_PORTAL "reuquest\n"); -+ errmsg = kutf_dsprintf(&context->fixture_pool, -+ "Error in sending ack for " GET_PLATFORM "request"); -+ } ++ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) ++ mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, &op_param); ++ else ++ mmu_flush_invalidate(kbdev, kctx, as_nr, &op_param); + -+ return errmsg; ++ return err; +} + -+static bool kutf_clk_trace_dequeue_portal_cmd(struct kutf_context *context, -+ struct clk_trace_portal_input *cmd) ++int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys, ++ size_t nr, unsigned long flags, int const group_id) +{ -+ int i; -+ int err = kutf_helper_receive_named_val(context, &cmd->cmd_input); -+ -+ cmd->named_val_err = err; -+ if (err == KUTF_HELPER_ERR_NONE && -+ cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) { -+ /* All portal request commands are of format (named u64): -+ * CMD_NAME=1234 -+ * where, 1234 is a (variable) sequence number tag. -+ */ -+ for (i = 0; i < PORTAL_TOTAL_CMDS; i++) { -+ if (strcmp(cmd->cmd_input.val_name, -+ kbasep_portal_cmd_name_map[i].name)) -+ continue; -+ -+ cmd->portal_cmd = kbasep_portal_cmd_name_map[i].cmd; -+ return true; -+ } -+ } ++ if (unlikely(kctx == NULL)) ++ return -EINVAL; + -+ cmd->portal_cmd = PORTAL_CMD_INVALID; -+ return false; ++ return kbase_mmu_update_pages_common(kctx->kbdev, kctx, vpfn, phys, nr, flags, group_id); +} + -+static void kutf_clk_trace_flag_result(struct kutf_context *context, -+ enum kutf_result_status result, char const *msg) ++#if MALI_USE_CSF ++int kbase_mmu_update_csf_mcu_pages(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys, ++ size_t nr, unsigned long flags, int const group_id) +{ -+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; -+ -+ if (result > data->test_status) { -+ data->test_status = result; -+ if (msg) -+ data->result_msg = msg; -+ if (data->server_state == PORTAL_STATE_LIVE && -+ result > KUTF_RESULT_WARN) { -+ data->server_state = PORTAL_STATE_CLOSING; -+ } -+ } ++ return kbase_mmu_update_pages_common(kbdev, NULL, vpfn, phys, nr, flags, group_id); +} ++#endif /* MALI_USE_CSF */ + -+static bool kutf_clk_trace_process_portal_cmd(struct kutf_context *context, -+ struct clk_trace_portal_input *cmd) ++static void mmu_page_migration_transaction_begin(struct kbase_device *kbdev) +{ -+ char const *errmsg = NULL; -+ -+ BUILD_BUG_ON(ARRAY_SIZE(kbasep_portal_cmd_name_map) != -+ PORTAL_TOTAL_CMDS); -+ WARN_ON(cmd->portal_cmd == PORTAL_CMD_INVALID); -+ -+ switch (cmd->portal_cmd) { -+ case PORTAL_CMD_GET_PLATFORM: -+ errmsg = kutf_clk_trace_do_get_platform(context, cmd); -+ break; -+ case PORTAL_CMD_GET_CLK_RATE_MGR: -+ fallthrough; -+ case PORTAL_CMD_GET_CLK_RATE_TRACE: -+ errmsg = kutf_clk_trace_do_get_rate(context, cmd); -+ break; -+ case PORTAL_CMD_GET_TRACE_SNAPSHOT: -+ errmsg = kutf_clk_trace_do_get_snapshot(context, cmd); -+ break; -+ case PORTAL_CMD_INC_PM_CTX_CNT: -+ fallthrough; -+ case PORTAL_CMD_DEC_PM_CTX_CNT: -+ errmsg = kutf_clk_trace_do_change_pm_ctx(context, cmd); -+ break; -+ case PORTAL_CMD_CLOSE_PORTAL: -+ errmsg = kutf_clk_trace_do_close_portal(context, cmd); -+ break; -+ case PORTAL_CMD_INVOKE_NOTIFY_42KHZ: -+ errmsg = kutf_clk_trace_do_invoke_notify_42k(context, cmd); -+ break; -+ default: -+ pr_warn("Don't know how to handle portal_cmd: %d, abort session.\n", -+ cmd->portal_cmd); -+ errmsg = kutf_dsprintf(&context->fixture_pool, -+ "Don't know how to handle portal_cmd: %d", -+ cmd->portal_cmd); -+ break; -+ } ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (errmsg) -+ kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, errmsg); ++ WARN_ON_ONCE(kbdev->mmu_page_migrate_in_progress); ++ kbdev->mmu_page_migrate_in_progress = true; ++} + -+ return (errmsg == NULL); ++static void mmu_page_migration_transaction_end(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ WARN_ON_ONCE(!kbdev->mmu_page_migrate_in_progress); ++ kbdev->mmu_page_migrate_in_progress = false; ++ /* Invoke the PM state machine, as the MMU page migration session ++ * may have deferred a transition in L2 state machine. ++ */ ++ kbase_pm_update_state(kbdev); +} + -+/** -+ * kutf_clk_trace_do_nack_response() - respond a NACK to erroneous input -+ * @context: KUTF context -+ * @cmd: The erroneous input request -+ * -+ * This function deal with an erroneous input request, and respond with -+ * a proper 'NACK' message. -+ * -+ * Return: 0 on success, non-zero on failure -+ */ -+static int kutf_clk_trace_do_nack_response(struct kutf_context *context, -+ struct clk_trace_portal_input *cmd) ++int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys, ++ dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level) +{ -+ int seq; -+ int err; -+ char const *errmsg = NULL; ++ struct kbase_page_metadata *page_md = kbase_page_private(as_page(old_phys)); ++ struct kbase_mmu_hw_op_param op_param; ++ struct kbase_mmu_table *mmut = (level == MIDGARD_MMU_BOTTOMLEVEL) ? ++ page_md->data.mapped.mmut : ++ page_md->data.pt_mapped.mmut; ++ struct kbase_device *kbdev; ++ phys_addr_t pgd; ++ u64 *old_page, *new_page, *pgd_page, *target, vpfn; ++ int index, check_state, ret = 0; ++ unsigned long hwaccess_flags = 0; ++ unsigned int num_of_valid_entries; ++ u8 vmap_count = 0; + -+ WARN_ON(cmd->portal_cmd != PORTAL_CMD_INVALID); ++ /* Due to the hard binding of mmu_command_instr with kctx_id via kbase_mmu_hw_op_param, ++ * here we skip the no kctx case, which is only used with MCU's mmut. ++ */ ++ if (!mmut->kctx) ++ return -EINVAL; + -+ if (cmd->named_val_err == KUTF_HELPER_ERR_NONE && -+ cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) { -+ /* Keep seq number as % 256 */ -+ seq = cmd->cmd_input.u.val_u64 & 255; -+ snprintf(portal_msg_buf, PORTAL_MSG_LEN, -+ "{SEQ:%d, MSG: Unknown command '%s'.}", seq, -+ cmd->cmd_input.val_name); -+ err = kutf_helper_send_named_str(context, "NACK", -+ portal_msg_buf); -+ } else -+ err = kutf_helper_send_named_str(context, "NACK", -+ "Wrong portal cmd format (Ref example: CMD_NAME=0X16)"); ++ if (level > MIDGARD_MMU_BOTTOMLEVEL) ++ return -EINVAL; ++ else if (level == MIDGARD_MMU_BOTTOMLEVEL) ++ vpfn = page_md->data.mapped.vpfn; ++ else ++ vpfn = PGD_VPFN_LEVEL_GET_VPFN(page_md->data.pt_mapped.pgd_vpfn_level); + -+ if (err) { -+ errmsg = kutf_dsprintf(&context->fixture_pool, -+ "Failed to send portal NACK response"); -+ kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, errmsg); ++ kbdev = mmut->kctx->kbdev; ++ index = (vpfn >> ((3 - level) * 9)) & 0x1FF; ++ ++ /* Create all mappings before copying content. ++ * This is done as early as possible because is the only operation that may ++ * fail. It is possible to do this before taking any locks because the ++ * pages to migrate are not going to change and even the parent PGD is not ++ * going to be affected by any other concurrent operation, since the page ++ * has been isolated before migration and therefore it cannot disappear in ++ * the middle of this function. ++ */ ++ old_page = kmap(as_page(old_phys)); ++ if (!old_page) { ++ dev_warn(kbdev->dev, "%s: kmap failure for old page.", __func__); ++ ret = -EINVAL; ++ goto old_page_map_error; + } + -+ return err; -+} ++ new_page = kmap(as_page(new_phys)); ++ if (!new_page) { ++ dev_warn(kbdev->dev, "%s: kmap failure for new page.", __func__); ++ ret = -EINVAL; ++ goto new_page_map_error; ++ } + -+/** -+ * kutf_clk_trace_barebone_check() - Sanity test on the clock tracing -+ * @context: KUTF context -+ * -+ * This function carries out some basic test on the tracing operation: -+ * 1). GPU idle on test start, trace rate should be 0 (low power state) -+ * 2). Make sure GPU is powered up, the trace rate should match -+ * that from the clcok manager's internal recorded rate -+ * 3). If the GPU active transition occurs following 2), there -+ * must be rate change event from tracing. -+ */ -+static void kutf_clk_trace_barebone_check(struct kutf_context *context) -+{ -+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; -+ struct kbase_device *kbdev = data->kbdev; -+ bool fail = false; -+ bool idle[2] = { false }; -+ char const *msg = NULL; -+ int i; ++ /* GPU cache maintenance affects both memory content and page table, ++ * but at two different stages. A single virtual memory page is affected ++ * by the migration. ++ * ++ * Notice that the MMU maintenance is done in the following steps: ++ * ++ * 1) The MMU region is locked without performing any other operation. ++ * This lock must cover the entire migration process, in order to ++ * prevent any GPU access to the virtual page whose physical page ++ * is being migrated. ++ * 2) Immediately after locking: the MMU region content is flushed via ++ * GPU control while the lock is taken and without unlocking. ++ * The region must stay locked for the duration of the whole page ++ * migration procedure. ++ * This is necessary to make sure that pending writes to the old page ++ * are finalized before copying content to the new page. ++ * 3) Before unlocking: changes to the page table are flushed. ++ * Finer-grained GPU control operations are used if possible, otherwise ++ * the whole GPU cache shall be flushed again. ++ * This is necessary to make sure that the GPU accesses the new page ++ * after migration. ++ * 4) The MMU region is unlocked. ++ */ ++#define PGD_VPFN_MASK(level) (~((((u64)1) << ((3 - level) * 9)) - 1)) ++ op_param.mmu_sync_info = CALLER_MMU_ASYNC; ++ op_param.kctx_id = mmut->kctx->id; ++ op_param.vpfn = vpfn & PGD_VPFN_MASK(level); ++ op_param.nr = 1 << ((3 - level) * 9); ++ op_param.op = KBASE_MMU_OP_FLUSH_PT; ++ /* When level is not MIDGARD_MMU_BOTTOMLEVEL, it is assumed PGD page migration */ ++ op_param.flush_skip_levels = (level == MIDGARD_MMU_BOTTOMLEVEL) ? ++ pgd_level_to_skip_flush(1ULL << level) : ++ pgd_level_to_skip_flush(3ULL << level); + -+ /* Check consistency if gpu happens to be idle */ -+ spin_lock(&kbdev->pm.clk_rtm.lock); -+ idle[0] = kbdev->pm.clk_rtm.gpu_idle; -+ if (kbdev->pm.clk_rtm.gpu_idle) { -+ for (i = 0; i < data->nclks; i++) { -+ if (data->snapshot[i].current_rate) { -+ /* Idle should have a rate 0 */ -+ fail = true; -+ break; -+ } ++ mutex_lock(&mmut->mmu_lock); ++ ++ /* The state was evaluated before entering this function, but it could ++ * have changed before the mmu_lock was taken. However, the state ++ * transitions which are possible at this point are only two, and in both ++ * cases it is a stable state progressing to a "free in progress" state. ++ * ++ * After taking the mmu_lock the state can no longer change: read it again ++ * and make sure that it hasn't changed before continuing. ++ */ ++ spin_lock(&page_md->migrate_lock); ++ check_state = PAGE_STATUS_GET(page_md->status); ++ if (level == MIDGARD_MMU_BOTTOMLEVEL) ++ vmap_count = page_md->vmap_count; ++ spin_unlock(&page_md->migrate_lock); ++ ++ if (level == MIDGARD_MMU_BOTTOMLEVEL) { ++ if (check_state != ALLOCATED_MAPPED) { ++ dev_dbg(kbdev->dev, ++ "%s: state changed to %d (was %d), abort page migration", __func__, ++ check_state, ALLOCATED_MAPPED); ++ ret = -EAGAIN; ++ goto page_state_change_out; ++ } else if (vmap_count > 0) { ++ dev_dbg(kbdev->dev, "%s: page was multi-mapped, abort page migration", ++ __func__); ++ ret = -EAGAIN; ++ goto page_state_change_out; + } -+ } -+ spin_unlock(&kbdev->pm.clk_rtm.lock); -+ if (fail) { -+ msg = kutf_dsprintf(&context->fixture_pool, -+ "GPU Idle not yielding 0-rate"); -+ pr_err("Trace did not see idle rate\n"); + } else { -+ /* Make local PM active if not done so yet */ -+ if (data->pm_ctx_cnt == 0) { -+ /* Ensure the GPU is powered */ -+ data->pm_ctx_cnt++; -+ kutf_set_pm_ctx_active(context); -+ } -+ /* Checking the rate is consistent */ -+ spin_lock(&kbdev->pm.clk_rtm.lock); -+ idle[1] = kbdev->pm.clk_rtm.gpu_idle; -+ for (i = 0; i < data->nclks; i++) { -+ /* Rate match between the manager and the trace */ -+ if (kbdev->pm.clk_rtm.clks[i]->clock_val != -+ data->snapshot[i].current_rate) { -+ fail = true; -+ break; -+ } ++ if (check_state != PT_MAPPED) { ++ dev_dbg(kbdev->dev, ++ "%s: state changed to %d (was %d), abort PGD page migration", ++ __func__, check_state, PT_MAPPED); ++ WARN_ON_ONCE(check_state != FREE_PT_ISOLATED_IN_PROGRESS); ++ ret = -EAGAIN; ++ goto page_state_change_out; + } -+ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ } + -+ if (idle[1]) { -+ msg = kutf_dsprintf(&context->fixture_pool, -+ "GPU still idle after set_pm_ctx_active"); -+ pr_err("GPU still idle after set_pm_ctx_active\n"); -+ } ++ ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd); ++ if (ret) { ++ dev_err(kbdev->dev, "%s: failed to find PGD for old page.", __func__); ++ goto get_pgd_at_level_error; ++ } + -+ if (!msg && fail) { -+ msg = kutf_dsprintf(&context->fixture_pool, -+ "Trace rate not matching Clk manager's read"); -+ pr_err("Trace rate not matching Clk manager's read\n"); ++ pgd_page = kmap(phys_to_page(pgd)); ++ if (!pgd_page) { ++ dev_warn(kbdev->dev, "%s: kmap failure for PGD page.", __func__); ++ ret = -EINVAL; ++ goto pgd_page_map_error; ++ } ++ ++ mutex_lock(&kbdev->pm.lock); ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ ++ /* Lock MMU region and flush GPU cache by using GPU control, ++ * in order to keep MMU region locked. ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); ++ if (unlikely(!kbase_pm_l2_allow_mmu_page_migration(kbdev))) { ++ /* Defer the migration as L2 is in a transitional phase */ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ mutex_unlock(&kbdev->pm.lock); ++ dev_dbg(kbdev->dev, "%s: L2 in transtion, abort PGD page migration", __func__); ++ ret = -EAGAIN; ++ goto l2_state_defer_out; ++ } ++ /* Prevent transitional phases in L2 by starting the transaction */ ++ mmu_page_migration_transaction_begin(kbdev); ++ if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) { ++ int as_nr = mmut->kctx->as_nr; ++ struct kbase_as *as = &kbdev->as[as_nr]; ++ ++ ret = kbase_mmu_hw_do_lock(kbdev, as, &op_param); ++ if (!ret) { ++ ret = kbase_gpu_cache_flush_and_busy_wait( ++ kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC); + } ++ if (ret) ++ mmu_page_migration_transaction_end(kbdev); + } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); + -+ if (!msg && idle[0] && !idle[1] && !data->total_update_cnt) { -+ msg = kutf_dsprintf(&context->fixture_pool, -+ "Trace update did not occur"); -+ pr_err("Trace update did not occur\n"); ++ if (ret < 0) { ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ mutex_unlock(&kbdev->pm.lock); ++ dev_err(kbdev->dev, "%s: failed to lock MMU region or flush GPU cache", __func__); ++ goto undo_mappings; + } -+ if (msg) -+ kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, msg); -+ else if (!data->total_update_cnt) { -+ msg = kutf_dsprintf(&context->fixture_pool, -+ "No trace update seen during the test!"); -+ kutf_clk_trace_flag_result(context, KUTF_RESULT_WARN, msg); ++ ++ /* Copy memory content. ++ * ++ * It is necessary to claim the ownership of the DMA buffer for the old ++ * page before performing the copy, to make sure of reading a consistent ++ * version of its content, before copying. After the copy, ownership of ++ * the DMA buffer for the new page is given to the GPU in order to make ++ * the content visible to potential GPU access that may happen as soon as ++ * this function releases the lock on the MMU region. ++ */ ++ dma_sync_single_for_cpu(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); ++ memcpy(new_page, old_page, PAGE_SIZE); ++ dma_sync_single_for_device(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); ++ ++ /* Remap GPU virtual page. ++ * ++ * This code rests on the assumption that page migration is only enabled ++ * for 4 kB pages, that necessarily live in the bottom level of the MMU ++ * page table. For this reason, the PGD level tells us inequivocably ++ * whether the page being migrated is a "content page" or another PGD ++ * of the page table: ++ * ++ * - Bottom level implies ATE (Address Translation Entry) ++ * - Any other level implies PTE (Page Table Entry) ++ * ++ * The current implementation doesn't handle the case of a level 0 PGD, ++ * that is: the root PGD of the page table. ++ */ ++ target = &pgd_page[index]; ++ ++ /* Certain entries of a page table page encode the count of valid entries ++ * present in that page. So need to save & restore the count information ++ * when updating the PTE/ATE to point to the new page. ++ */ ++ num_of_valid_entries = kbdev->mmu_mode->get_num_valid_entries(pgd_page); ++ ++ if (level == MIDGARD_MMU_BOTTOMLEVEL) { ++ WARN_ON_ONCE((*target & 1UL) == 0); ++ *target = ++ kbase_mmu_create_ate(kbdev, new_phys, page_md->data.mapped.reg->flags, ++ level, page_md->data.mapped.reg->gpu_alloc->group_id); ++ } else { ++ u64 managed_pte; ++ ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ /* The PTE should be pointing to the page being migrated */ ++ WARN_ON_ONCE(as_phys_addr_t(old_phys) != kbdev->mmu_mode->pte_to_phy_addr( ++ kbdev->mgm_dev->ops.mgm_pte_to_original_pte( ++ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, pgd_page[index]))); ++#endif ++ kbdev->mmu_mode->entry_set_pte(&managed_pte, as_phys_addr_t(new_phys)); ++ *target = kbdev->mgm_dev->ops.mgm_update_gpu_pte( ++ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte); + } -+} + -+static bool kutf_clk_trace_end_of_stream(struct clk_trace_portal_input *cmd) -+{ -+ return (cmd->named_val_err == -EBUSY); -+} ++ kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries); + -+static void kutf_clk_trace_no_clks_dummy(struct kutf_context *context) -+{ -+ struct clk_trace_portal_input cmd; -+ unsigned long timeout = jiffies + HZ * 2; -+ bool has_cmd; ++ /* This function always updates a single entry inside an existing PGD, ++ * therefore cache maintenance is necessary and affects a single entry. ++ */ ++ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), ++ kbase_dma_addr(phys_to_page(pgd)) + (index * sizeof(u64)), sizeof(u64), ++ KBASE_MMU_OP_FLUSH_PT); + -+ while (time_before(jiffies, timeout)) { -+ if (kutf_helper_pending_input(context)) { -+ has_cmd = kutf_clk_trace_dequeue_portal_cmd(context, -+ &cmd); -+ if (!has_cmd && kutf_clk_trace_end_of_stream(&cmd)) -+ break; ++ /* Unlock MMU region. ++ * ++ * Notice that GPUs which don't issue flush commands via GPU control ++ * still need an additional GPU cache flush here, this time only ++ * for the page table, because the function call above to sync PGDs ++ * won't have any effect on them. ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); ++ if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) { ++ int as_nr = mmut->kctx->as_nr; ++ struct kbase_as *as = &kbdev->as[as_nr]; + -+ kutf_helper_send_named_str(context, "NACK", -+ "Fatal! No clocks visible, aborting"); ++ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { ++ ret = kbase_mmu_hw_do_unlock(kbdev, as, &op_param); ++ } else { ++ ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, ++ GPU_COMMAND_CACHE_CLN_INV_L2); ++ if (!ret) ++ ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, &op_param); + } -+ msleep(20); + } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); ++ /* Releasing locks before checking the migration transaction error state */ ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ mutex_unlock(&kbdev->pm.lock); + -+ kutf_clk_trace_flag_result(context, KUTF_RESULT_FATAL, -+ "No clocks visble to the portal"); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); ++ /* Release the transition prevention in L2 by ending the transaction */ ++ mmu_page_migration_transaction_end(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); ++ ++ /* Checking the final migration transaction error state */ ++ if (ret < 0) { ++ dev_err(kbdev->dev, "%s: failed to unlock MMU region.", __func__); ++ goto undo_mappings; ++ } ++ ++ /* Undertaking metadata transfer, while we are holding the mmu_lock */ ++ spin_lock(&page_md->migrate_lock); ++ if (level == MIDGARD_MMU_BOTTOMLEVEL) { ++ size_t page_array_index = ++ page_md->data.mapped.vpfn - page_md->data.mapped.reg->start_pfn; ++ ++ WARN_ON(PAGE_STATUS_GET(page_md->status) != ALLOCATED_MAPPED); ++ ++ /* Replace page in array of pages of the physical allocation. */ ++ page_md->data.mapped.reg->gpu_alloc->pages[page_array_index] = new_phys; ++ } ++ /* Update the new page dma_addr with the transferred metadata from the old_page */ ++ page_md->dma_addr = new_dma_addr; ++ page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); ++ spin_unlock(&page_md->migrate_lock); ++ set_page_private(as_page(new_phys), (unsigned long)page_md); ++ /* Old page metatdata pointer cleared as it now owned by the new page */ ++ set_page_private(as_page(old_phys), 0); ++ ++l2_state_defer_out: ++ kunmap(phys_to_page(pgd)); ++pgd_page_map_error: ++get_pgd_at_level_error: ++page_state_change_out: ++ mutex_unlock(&mmut->mmu_lock); ++ ++ kunmap(as_page(new_phys)); ++new_page_map_error: ++ kunmap(as_page(old_phys)); ++old_page_map_error: ++ return ret; ++ ++undo_mappings: ++ /* Unlock the MMU table and undo mappings. */ ++ mutex_unlock(&mmut->mmu_lock); ++ kunmap(phys_to_page(pgd)); ++ kunmap(as_page(new_phys)); ++ kunmap(as_page(old_phys)); ++ ++ return ret; +} + -+/** -+ * mali_kutf_clk_rate_trace_test_portal() - Service portal input -+ * @context: KUTF context -+ * -+ * The test portal operates on input requests. If the input request is one -+ * of the recognized portal commands, it handles it accordingly. Otherwise -+ * a negative response 'NACK' is returned. The portal service terminates -+ * when a 'CLOSE_PORTAL' request is received, or due to an internal error. -+ * Both case would result in the server_state transitioned to CLOSING. -+ * -+ * If the portal is closed on request, a sanity test on the clock rate -+ * trace operation is undertaken via function: -+ * kutf_clk_trace_barebone_check(); -+ */ -+static void mali_kutf_clk_rate_trace_test_portal(struct kutf_context *context) ++static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, ++ phys_addr_t pgd, unsigned int level) +{ -+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; -+ struct clk_trace_portal_input new_cmd; ++ u64 *pgd_page; ++ int i; ++ struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev; ++ struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode; ++ u64 *pgd_page_buffer = NULL; ++ struct page *p = phys_to_page(pgd); + -+ pr_debug("Test portal service start\n"); ++ lockdep_assert_held(&mmut->mmu_lock); + -+ while (data->server_state == PORTAL_STATE_LIVE) { -+ if (kutf_clk_trace_dequeue_portal_cmd(context, &new_cmd)) -+ kutf_clk_trace_process_portal_cmd(context, &new_cmd); -+ else if (kutf_clk_trace_end_of_stream(&new_cmd)) -+ /* Dequeue on portal input, end of stream */ -+ data->server_state = PORTAL_STATE_CLOSING; -+ else -+ kutf_clk_trace_do_nack_response(context, &new_cmd); ++ pgd_page = kmap_atomic(p); ++ /* kmap_atomic should NEVER fail. */ ++ if (WARN_ON_ONCE(pgd_page == NULL)) ++ return; ++ if (level < MIDGARD_MMU_BOTTOMLEVEL) { ++ /* Copy the page to our preallocated buffer so that we can minimize ++ * kmap_atomic usage ++ */ ++ pgd_page_buffer = mmut->scratch_mem.teardown_pages.levels[level]; ++ memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); + } + -+ /* Closing, exhausting all the pending inputs with NACKs. */ -+ if (data->server_state == PORTAL_STATE_CLOSING) { -+ while (kutf_helper_pending_input(context) && -+ (kutf_clk_trace_dequeue_portal_cmd(context, &new_cmd) || -+ !kutf_clk_trace_end_of_stream(&new_cmd))) { -+ kutf_helper_send_named_str(context, "NACK", -+ "Portal closing down"); ++ /* When page migration is enabled, kbase_region_tracker_term() would ensure ++ * there are no pages left mapped on the GPU for a context. Hence the count ++ * of valid entries is expected to be zero here. ++ */ ++ if (kbase_page_migration_enabled && mmut->kctx) ++ WARN_ON_ONCE(kbdev->mmu_mode->get_num_valid_entries(pgd_page)); ++ /* Invalidate page after copying */ ++ mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES); ++ kunmap_atomic(pgd_page); ++ pgd_page = pgd_page_buffer; ++ ++ if (level < MIDGARD_MMU_BOTTOMLEVEL) { ++ for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { ++ if (mmu_mode->pte_is_valid(pgd_page[i], level)) { ++ phys_addr_t target_pgd = mmu_mode->pte_to_phy_addr( ++ mgm_dev->ops.mgm_pte_to_original_pte(mgm_dev, ++ MGM_DEFAULT_PTE_GROUP, ++ level, pgd_page[i])); ++ ++ mmu_teardown_level(kbdev, mmut, target_pgd, level + 1); ++ } + } + } + -+ /* If no portal error, do a barebone test here irrespective -+ * whatever the portal live session has been testing, which -+ * is entirely driven by the user-side via portal requests. ++ kbase_mmu_free_pgd(kbdev, mmut, pgd); ++} ++ ++int kbase_mmu_init(struct kbase_device *const kbdev, ++ struct kbase_mmu_table *const mmut, struct kbase_context *const kctx, ++ int const group_id) ++{ ++ if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) || ++ WARN_ON(group_id < 0)) ++ return -EINVAL; ++ ++ compiletime_assert(KBASE_MEM_ALLOC_MAX_SIZE <= (((8ull << 30) >> PAGE_SHIFT)), ++ "List of free PGDs may not be large enough."); ++ compiletime_assert(MAX_PAGES_FOR_FREE_PGDS >= MIDGARD_MMU_BOTTOMLEVEL, ++ "Array of MMU levels is not large enough."); ++ ++ mmut->group_id = group_id; ++ mutex_init(&mmut->mmu_lock); ++ mmut->kctx = kctx; ++ mmut->pgd = KBASE_MMU_INVALID_PGD_ADDRESS; ++ ++ /* We allocate pages into the kbdev memory pool, then ++ * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to ++ * avoid allocations from the kernel happening with the lock held. + */ -+ if (data->test_status <= KUTF_RESULT_WARN) { -+ if (data->server_state != PORTAL_STATE_NO_CLK) -+ kutf_clk_trace_barebone_check(context); -+ else { -+ /* No clocks case, NACK 2-sec for the fatal situation */ -+ kutf_clk_trace_no_clks_dummy(context); ++ while (mmut->pgd == KBASE_MMU_INVALID_PGD_ADDRESS) { ++ int err; ++ ++ err = kbase_mem_pool_grow( ++ &kbdev->mem_pools.small[mmut->group_id], ++ MIDGARD_MMU_BOTTOMLEVEL, kctx ? kctx->task : NULL); ++ if (err) { ++ kbase_mmu_term(kbdev, mmut); ++ return -ENOMEM; + } ++ ++ mutex_lock(&mmut->mmu_lock); ++ mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut); ++ mutex_unlock(&mmut->mmu_lock); + } + -+ /* If we have changed pm_ctx count, drop it back */ -+ if (data->pm_ctx_cnt) { -+ /* Although we count on portal requests, it only has material -+ * impact when from 0 -> 1. So the reverse is a simple one off. -+ */ -+ data->pm_ctx_cnt = 0; -+ kutf_set_pm_ctx_idle(context); ++ return 0; ++} ++ ++void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) ++{ ++ WARN((mmut->kctx) && (mmut->kctx->as_nr != KBASEP_AS_NR_INVALID), ++ "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables", ++ mmut->kctx->tgid, mmut->kctx->id); ++ ++ if (mmut->pgd != KBASE_MMU_INVALID_PGD_ADDRESS) { ++ mutex_lock(&mmut->mmu_lock); ++ mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL); ++ mutex_unlock(&mmut->mmu_lock); ++ ++ if (mmut->kctx) ++ KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0); + } + -+ /* Finally log the test result line */ -+ if (data->test_status < KUTF_RESULT_WARN) -+ kutf_test_pass(context, data->result_msg); -+ else if (data->test_status == KUTF_RESULT_WARN) -+ kutf_test_warn(context, data->result_msg); -+ else if (data->test_status == KUTF_RESULT_FATAL) -+ kutf_test_fatal(context, data->result_msg); -+ else -+ kutf_test_fail(context, data->result_msg); ++ mutex_destroy(&mmut->mmu_lock); ++} + -+ pr_debug("Test end\n"); ++void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i) ++{ ++ destroy_workqueue(kbdev->as[i].pf_wq); +} + -+/** -+ * mali_kutf_clk_rate_trace_create_fixture() - Creates the fixture data -+ * required for mali_kutf_clk_rate_trace_test_portal. -+ * @context: KUTF context. -+ * -+ * Return: Fixture data created on success or NULL on failure -+ */ -+static void *mali_kutf_clk_rate_trace_create_fixture( -+ struct kutf_context *context) ++void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx, ++ phys_addr_t phys, size_t size, ++ enum kbase_mmu_op_type flush_op) +{ -+ struct kutf_clk_rate_trace_fixture_data *data; -+ struct kbase_device *kbdev; -+ unsigned long rate; ++#if MALI_USE_CSF ++ unsigned long irq_flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); ++ if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) && ++ kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) ++ mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++#endif ++} ++ ++#ifdef CONFIG_MALI_VECTOR_DUMP ++static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, ++ int level, char ** const buffer, size_t *size_left) ++{ ++ phys_addr_t target_pgd; ++ u64 *pgd_page; + int i; ++ size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64); ++ size_t dump_size; ++ struct kbase_device *kbdev; ++ struct kbase_mmu_mode const *mmu_mode; + -+ /* Acquire the kbase device */ -+ pr_debug("Finding device\n"); -+ kbdev = kbase_find_device(MINOR_FOR_FIRST_KBASE_DEV); -+ if (kbdev == NULL) { -+ kutf_test_fail(context, "Failed to find kbase device"); -+ return NULL; ++ if (WARN_ON(kctx == NULL)) ++ return 0; ++ lockdep_assert_held(&kctx->mmu.mmu_lock); ++ ++ kbdev = kctx->kbdev; ++ mmu_mode = kbdev->mmu_mode; ++ ++ pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd))); ++ if (!pgd_page) { ++ dev_warn(kbdev->dev, "%s: kmap failure", __func__); ++ return 0; + } + -+ pr_debug("Creating fixture\n"); -+ data = kutf_mempool_alloc(&context->fixture_pool, -+ sizeof(struct kutf_clk_rate_trace_fixture_data)); -+ if (!data) -+ return NULL; ++ if (*size_left >= size) { ++ /* A modified physical address that contains ++ * the page table level ++ */ ++ u64 m_pgd = pgd | level; + -+ memset(data, 0, sizeof(*data)); -+ pr_debug("Hooking up the test portal to kbdev clk rate trace\n"); -+ spin_lock(&kbdev->pm.clk_rtm.lock); ++ /* Put the modified physical address in the output buffer */ ++ memcpy(*buffer, &m_pgd, sizeof(m_pgd)); ++ *buffer += sizeof(m_pgd); + -+ if (g_ptr_portal_data != NULL) { -+ pr_warn("Test portal is already in use, run aborted\n"); -+ spin_unlock(&kbdev->pm.clk_rtm.lock); -+ kutf_test_fail(context, "Portal allows single session only"); -+ return NULL; ++ /* Followed by the page table itself */ ++ memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES); ++ *buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES; ++ ++ *size_left -= size; + } + -+ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { -+ if (kbdev->pm.clk_rtm.clks[i]) { -+ data->nclks++; -+ if (kbdev->pm.clk_rtm.gpu_idle) -+ rate = 0; -+ else -+ rate = kbdev->pm.clk_rtm.clks[i]->clock_val; -+ data->snapshot[i].previous_rate = rate; -+ data->snapshot[i].current_rate = rate; ++ if (level < MIDGARD_MMU_BOTTOMLEVEL) { ++ for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { ++ if (mmu_mode->pte_is_valid(pgd_page[i], level)) { ++ target_pgd = mmu_mode->pte_to_phy_addr( ++ kbdev->mgm_dev->ops.mgm_pte_to_original_pte( ++ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, ++ level, pgd_page[i])); ++ ++ dump_size = kbasep_mmu_dump_level(kctx, ++ target_pgd, level + 1, ++ buffer, size_left); ++ if (!dump_size) { ++ kunmap(pfn_to_page(PFN_DOWN(pgd))); ++ return 0; ++ } ++ size += dump_size; ++ } + } + } + -+ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ kunmap(pfn_to_page(PFN_DOWN(pgd))); + -+ if (data->nclks) { -+ /* Subscribe this test server portal */ -+ data->listener.notify = kutf_portal_trace_write; -+ data->invoke_notify = false; ++ return size; ++} + -+ kbase_clk_rate_trace_manager_subscribe( -+ &kbdev->pm.clk_rtm, &data->listener); -+ /* Update the kutf_server_portal fixture_data pointer */ -+ g_ptr_portal_data = data; ++void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) ++{ ++ void *kaddr; ++ size_t size_left; ++ ++ KBASE_DEBUG_ASSERT(kctx); ++ ++ if (nr_pages == 0) { ++ /* can't dump in a 0 sized buffer, early out */ ++ return NULL; + } + -+ data->kbdev = kbdev; -+ data->result_msg = NULL; -+ data->test_status = KUTF_RESULT_PASS; ++ size_left = nr_pages * PAGE_SIZE; + -+ if (data->nclks == 0) { -+ data->server_state = PORTAL_STATE_NO_CLK; -+ pr_debug("Kbdev has no clocks for rate trace"); -+ } else -+ data->server_state = PORTAL_STATE_LIVE; ++ if (WARN_ON(size_left == 0)) ++ return NULL; ++ kaddr = vmalloc_user(size_left); + -+ pr_debug("Created fixture\n"); ++ mutex_lock(&kctx->mmu.mmu_lock); + -+ return data; -+} ++ if (kaddr) { ++ u64 end_marker = 0xFFULL; ++ char *buffer; ++ char *mmu_dump_buffer; ++ u64 config[3]; ++ size_t dump_size, size = 0; ++ struct kbase_mmu_setup as_setup; + -+/** -+ * mali_kutf_clk_rate_trace_remove_fixture - Destroy fixture data previously created by -+ * mali_kutf_clk_rate_trace_create_fixture. -+ * -+ * @context: KUTF context. -+ */ -+static void mali_kutf_clk_rate_trace_remove_fixture( -+ struct kutf_context *context) -+{ -+ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; -+ struct kbase_device *kbdev = data->kbdev; ++ buffer = (char *)kaddr; ++ mmu_dump_buffer = buffer; + -+ if (data->nclks) { -+ /* Clean up the portal trace write arrangement */ -+ g_ptr_portal_data = NULL; ++ kctx->kbdev->mmu_mode->get_as_setup(&kctx->mmu, ++ &as_setup); ++ config[0] = as_setup.transtab; ++ config[1] = as_setup.memattr; ++ config[2] = as_setup.transcfg; ++ memcpy(buffer, &config, sizeof(config)); ++ mmu_dump_buffer += sizeof(config); ++ size_left -= sizeof(config); ++ size += sizeof(config); + -+ kbase_clk_rate_trace_manager_unsubscribe( -+ &kbdev->pm.clk_rtm, &data->listener); ++ dump_size = kbasep_mmu_dump_level(kctx, ++ kctx->mmu.pgd, ++ MIDGARD_MMU_TOPLEVEL, ++ &mmu_dump_buffer, ++ &size_left); ++ ++ if (!dump_size) ++ goto fail_free; ++ ++ size += dump_size; ++ ++ /* Add on the size for the end marker */ ++ size += sizeof(u64); ++ ++ if (size > (nr_pages * PAGE_SIZE)) { ++ /* The buffer isn't big enough - free the memory and ++ * return failure ++ */ ++ goto fail_free; ++ } ++ ++ /* Add the end marker */ ++ memcpy(mmu_dump_buffer, &end_marker, sizeof(u64)); + } -+ pr_debug("Destroying fixture\n"); -+ kbase_release_device(kbdev); -+ pr_debug("Destroyed fixture\n"); ++ ++ mutex_unlock(&kctx->mmu.mmu_lock); ++ return kaddr; ++ ++fail_free: ++ vfree(kaddr); ++ mutex_unlock(&kctx->mmu.mmu_lock); ++ return NULL; +} ++KBASE_EXPORT_TEST_API(kbase_mmu_dump); ++#endif /* CONFIG_MALI_VECTOR_DUMP */ + -+/** -+ * mali_kutf_clk_rate_trace_test_module_init() - Entry point for test mdoule. -+ * -+ * Return: 0 on success, error code otherwise -+ */ -+static int __init mali_kutf_clk_rate_trace_test_module_init(void) ++void kbase_mmu_bus_fault_worker(struct work_struct *data) +{ -+ struct kutf_suite *suite; -+ unsigned int filters; -+ union kutf_callback_data suite_data = { NULL }; ++ struct kbase_as *faulting_as; ++ int as_no; ++ struct kbase_context *kctx; ++ struct kbase_device *kbdev; ++ struct kbase_fault *fault; + -+ pr_debug("Creating app\n"); ++ faulting_as = container_of(data, struct kbase_as, work_busfault); ++ fault = &faulting_as->bf_data; + -+ g_ptr_portal_data = NULL; -+ kutf_app = kutf_create_application(CLK_RATE_TRACE_APP_NAME); ++ /* Ensure that any pending page fault worker has completed */ ++ flush_work(&faulting_as->work_pagefault); + -+ if (!kutf_app) { -+ pr_warn("Creation of app " CLK_RATE_TRACE_APP_NAME -+ " failed!\n"); -+ return -ENOMEM; ++ as_no = faulting_as->number; ++ ++ kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); ++ ++ /* Grab the context, already refcounted in kbase_mmu_interrupt() on ++ * flagging of the bus-fault. Therefore, it cannot be scheduled out of ++ * this AS until we explicitly release it ++ */ ++ kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_no); ++ if (!kctx) { ++ atomic_dec(&kbdev->faults_pending); ++ return; + } + -+ pr_debug("Create suite %s\n", CLK_RATE_TRACE_SUITE_NAME); -+ suite = kutf_create_suite_with_filters_and_data( -+ kutf_app, CLK_RATE_TRACE_SUITE_NAME, 1, -+ mali_kutf_clk_rate_trace_create_fixture, -+ mali_kutf_clk_rate_trace_remove_fixture, -+ KUTF_F_TEST_GENERIC, -+ suite_data); ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ /* check if we still have GPU */ ++ if (unlikely(kbase_is_gpu_removed(kbdev))) { ++ dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__); ++ release_ctx(kbdev, kctx); ++ atomic_dec(&kbdev->faults_pending); ++ return; ++ } ++#endif ++ ++ if (unlikely(fault->protected_mode)) { ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Permission failure", fault); ++ kbase_mmu_hw_clear_fault(kbdev, faulting_as, ++ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); ++ release_ctx(kbdev, kctx); ++ atomic_dec(&kbdev->faults_pending); ++ return; + -+ if (!suite) { -+ pr_warn("Creation of suite %s failed!\n", -+ CLK_RATE_TRACE_SUITE_NAME); -+ kutf_destroy_application(kutf_app); -+ return -ENOMEM; + } + -+ filters = suite->suite_default_flags; -+ kutf_add_test_with_filters( -+ suite, 0x0, CLK_RATE_TRACE_PORTAL, -+ mali_kutf_clk_rate_trace_test_portal, -+ filters); ++#if MALI_USE_CSF ++ /* Before the GPU power off, wait is done for the completion of ++ * in-flight MMU fault work items. So GPU is expected to remain ++ * powered up whilst the bus fault handling is being done. ++ */ ++ kbase_gpu_report_bus_fault_and_kill(kctx, faulting_as, fault); ++#else ++ /* NOTE: If GPU already powered off for suspend, ++ * we don't need to switch to unmapped ++ */ ++ if (!kbase_pm_context_active_handle_suspend(kbdev, ++ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { ++ kbase_gpu_report_bus_fault_and_kill(kctx, faulting_as, fault); ++ kbase_pm_context_idle(kbdev); ++ } ++#endif + -+ pr_debug("Init complete\n"); -+ return 0; -+} ++ release_ctx(kbdev, kctx); + -+/** -+ * mali_kutf_clk_rate_trace_test_module_exit() - Module exit point for this -+ * test. -+ */ -+static void __exit mali_kutf_clk_rate_trace_test_module_exit(void) -+{ -+ pr_debug("Exit start\n"); -+ kutf_destroy_application(kutf_app); -+ pr_debug("Exit complete\n"); ++ atomic_dec(&kbdev->faults_pending); +} + ++void kbase_flush_mmu_wqs(struct kbase_device *kbdev) ++{ ++ int i; + -+module_init(mali_kutf_clk_rate_trace_test_module_init); -+module_exit(mali_kutf_clk_rate_trace_test_module_exit); ++ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { ++ struct kbase_as *as = &kbdev->as[i]; + -+MODULE_LICENSE("GPL"); -diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h ++ flush_workqueue(as->pf_wq); ++ } ++} +diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h new file mode 100644 -index 000000000..a716b9f70 +index 000000000..699b1f340 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h -@@ -0,0 +1,154 @@ ++++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h +@@ -0,0 +1,341 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -266304,228 +266499,336 @@ index 000000000..a716b9f70 + * + */ + -+#ifndef _KUTF_CLK_RATE_TRACE_TEST_H_ -+#define _KUTF_CLK_RATE_TRACE_TEST_H_ ++#ifndef _KBASE_MMU_H_ ++#define _KBASE_MMU_H_ + -+#define CLK_RATE_TRACE_APP_NAME "clk_rate_trace" -+#define CLK_RATE_TRACE_SUITE_NAME "rate_trace" -+#define CLK_RATE_TRACE_PORTAL "portal" ++#include ++ ++#define KBASE_MMU_PAGE_ENTRIES 512 ++#define KBASE_MMU_INVALID_PGD_ADDRESS (~(phys_addr_t)0) ++ ++struct kbase_context; ++struct kbase_mmu_table; ++struct kbase_va_region; + +/** -+ * enum kbasep_clk_rate_trace_req - request command to the clock rate trace -+ * service portal. ++ * enum kbase_caller_mmu_sync_info - MMU-synchronous caller info. ++ * A pointer to this type is passed down from the outer-most callers in the kbase ++ * module - where the information resides as to the synchronous / asynchronous ++ * nature of the call flow, with respect to MMU operations. ie - does the call flow relate to ++ * existing GPU work does it come from requests (like ioctl) from user-space, power management, ++ * etc. + * -+ * @PORTAL_CMD_GET_PLATFORM: Request the platform that the tests are -+ * to be run on. -+ * @PORTAL_CMD_GET_CLK_RATE_MGR: Request the clock trace manager internal -+ * data record. On a positive acknowledgement -+ * the prevailing clock rates and the GPU idle -+ * condition flag are returned. -+ * @PORTAL_CMD_GET_CLK_RATE_TRACE: Request the clock trace portal to return its -+ * data record. On a positive acknowledgement -+ * the last trace recorded clock rates and the -+ * GPU idle condition flag are returned. -+ * @PORTAL_CMD_GET_TRACE_SNAPSHOT: Request the clock trace portal to return its -+ * current snapshot data record. On a positive -+ * acknowledgement the snapshot array matching -+ * the number of clocks are returned. It also -+ * starts a fresh snapshot inside the clock -+ * trace portal. -+ * @PORTAL_CMD_INC_PM_CTX_CNT: Request the clock trace portal to increase -+ * its internal PM_CTX_COUNT. If this increase -+ * yielded a count of 0 -> 1 change, the portal -+ * will initiate a PM_CTX_ACTIVE call to the -+ * Kbase power management. Futher increase -+ * requests will limit to only affect the -+ * portal internal count value. -+ * @PORTAL_CMD_DEC_PM_CTX_CNT: Request the clock trace portal to decrease -+ * its internal PM_CTX_COUNT. If this decrease -+ * yielded a count of 1 -> 0 change, the portal -+ * will initiate a PM_CTX_IDLE call to the -+ * Kbase power management. -+ * @PORTAL_CMD_CLOSE_PORTAL: Inform the clock trace portal service the -+ * client has completed its session. The portal -+ * will start the close down action. If no -+ * error has occurred during the dynamic -+ * interactive session, an inherent basic test -+ * carrying out some sanity check on the clock -+ * trace is undertaken. -+ * @PORTAL_CMD_INVOKE_NOTIFY_42KHZ: Invokes all clock rate trace manager callbacks -+ * for the top clock domain with a new GPU frequency -+ * set to 42 kHZ. -+ * @PORTAL_CMD_INVALID: Valid commands termination marker. Must be -+ * the highest enumeration value, as it -+ * represents valid command array size. -+ * @PORTAL_TOTAL_CMDS: Alias of PORTAL_CMD_INVALID. ++ * @CALLER_MMU_UNSET_SYNCHRONICITY: default value must be invalid to avoid accidental choice ++ * of a 'valid' value ++ * @CALLER_MMU_SYNC: Arbitrary value for 'synchronous that isn't easy to choose by accident ++ * @CALLER_MMU_ASYNC: Also hard to choose by accident + */ -+/* PORTAL_CMD_INVALID must be the last one, serving the size */ -+enum kbasep_clk_rate_trace_req { -+ PORTAL_CMD_GET_PLATFORM, -+ PORTAL_CMD_GET_CLK_RATE_MGR, -+ PORTAL_CMD_GET_CLK_RATE_TRACE, -+ PORTAL_CMD_GET_TRACE_SNAPSHOT, -+ PORTAL_CMD_INC_PM_CTX_CNT, -+ PORTAL_CMD_DEC_PM_CTX_CNT, -+ PORTAL_CMD_CLOSE_PORTAL, -+ PORTAL_CMD_INVOKE_NOTIFY_42KHZ, -+ PORTAL_CMD_INVALID, -+ PORTAL_TOTAL_CMDS = PORTAL_CMD_INVALID, ++enum kbase_caller_mmu_sync_info { ++ CALLER_MMU_UNSET_SYNCHRONICITY, ++ CALLER_MMU_SYNC = 0x02, ++ CALLER_MMU_ASYNC +}; + +/** -+ * DOC: Portal service request command names. ++ * enum kbase_mmu_op_type - enum for MMU operations ++ * @KBASE_MMU_OP_NONE: To help catch uninitialized struct ++ * @KBASE_MMU_OP_FIRST: The lower boundary of enum ++ * @KBASE_MMU_OP_LOCK: Lock memory region ++ * @KBASE_MMU_OP_UNLOCK: Unlock memory region ++ * @KBASE_MMU_OP_FLUSH_PT: Flush page table (CLN+INV L2 only) ++ * @KBASE_MMU_OP_FLUSH_MEM: Flush memory (CLN+INV L2+LSC) ++ * @KBASE_MMU_OP_COUNT: The upper boundary of enum ++ */ ++enum kbase_mmu_op_type { ++ KBASE_MMU_OP_NONE = 0, /* Must be zero */ ++ KBASE_MMU_OP_FIRST, /* Must be the first non-zero op */ ++ KBASE_MMU_OP_LOCK = KBASE_MMU_OP_FIRST, ++ KBASE_MMU_OP_UNLOCK, ++ KBASE_MMU_OP_FLUSH_PT, ++ KBASE_MMU_OP_FLUSH_MEM, ++ KBASE_MMU_OP_COUNT /* Must be the last in enum */ ++}; ++ ++/** ++ * kbase_mmu_as_init() - Initialising GPU address space object. + * -+ * The portal request consists of a kutf named u64-value. -+ * For those above enumerated PORTAL_CMD, the names defined -+ * here are used to mark the name and then followed with a sequence number -+ * value. Example (manual script here for illustration): -+ * exec 5<>run # open the portal kutf run as fd-5 -+ * echo GET_CLK_RATE_MGR=1 >&5 # send the cmd and sequence number 1 -+ * head -n 1 <&5 # read back the 1-line server reseponse -+ * ACK="{SEQ:1, RATE:[0x1ad27480], GPU_IDLE:1}" # response string -+ * echo GET_TRACE_SNAPSHOT=1 >&5 # send the cmd and sequence number 1 -+ * head -n 1 <&5 # read back the 1-line server reseponse -+ * ACK="{SEQ:1, SNAPSHOT_ARRAY:[(0x0, 0x1ad27480, 1, 0)]}" -+ * echo CLOSE_PORTAL=1 >&5 # close the portal -+ * cat <&5 # read back all the response lines -+ * ACK="{SEQ:1, PM_CTX_CNT:0}" # response to close command -+ * KUTF_RESULT_PASS:(explicit pass) # internal sanity test passed. -+ * exec 5>&- # close the service portal fd. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer). ++ * @i: Array index of address space object. + * -+ * Expected request command return format: -+ * GET_CLK_RATE_MGR: ACK="{SEQ:12, RATE:[1080, 1280], GPU_IDLE:1}" -+ * Note, the above contains 2-clock with rates in [], GPU idle -+ * GET_CLK_RATE_TRACE: ACK="{SEQ:6, RATE:[0x1ad27480], GPU_IDLE:0}" -+ * Note, 1-clock with rate in [], GPU not idle -+ * GET_TRACE_SNAPSHOT: ACK="{SEQ:8, SNAPSHOT_ARRAY:[(0x0, 0x1ad27480, 1, 0)]}" -+ * Note, 1-clock, (start_rate : 0, last_rate : 0x1ad27480, -+ * trace_rate_up_count: 1, trace_rate_down_count : 0) -+ * For the specific sample case here, there is a single rate_trace event -+ * that yielded a rate increase change. No rate drop event recorded in the -+ * reporting snapshot duration. -+ * INC_PM_CTX_CNT: ACK="{SEQ:1, PM_CTX_CNT:1}" -+ * Note, after the increment, M_CTX_CNT is 1. (i.e. 0 -> 1) -+ * DEC_PM_CTX_CNT: ACK="{SEQ:3, PM_CTX_CNT:0}" -+ * Note, after the decrement, PM_CTX_CNT is 0. (i.e. 1 -> 0) -+ * CLOSE_PORTAL: ACK="{SEQ:1, PM_CTX_CNT:1}" -+ * Note, at the close, PM_CTX_CNT is 1. The PM_CTX_CNT will internally be -+ * dropped down to 0 as part of the portal close clean up. ++ * This is called from device probe to initialise an address space object ++ * of the device. ++ * ++ * Return: 0 on success and non-zero value on failure. + */ -+#define GET_PLATFORM "GET_PLATFORM" -+#define GET_CLK_RATE_MGR "GET_CLK_RATE_MGR" -+#define GET_CLK_RATE_TRACE "GET_CLK_RATE_TRACE" -+#define GET_TRACE_SNAPSHOT "GET_TRACE_SNAPSHOT" -+#define INC_PM_CTX_CNT "INC_PM_CTX_CNT" -+#define DEC_PM_CTX_CNT "DEC_PM_CTX_CNT" -+#define CLOSE_PORTAL "CLOSE_PORTAL" -+#define INVOKE_NOTIFY_42KHZ "INVOKE_NOTIFY_42KHZ" ++int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i); + +/** -+ * DOC: Portal service response tag names. ++ * kbase_mmu_as_term() - Terminate address space object. + * -+ * The response consists of a kutf named string-value. -+ * In case of a 'NACK' (negative acknowledgment), it can be one of the two formats: -+ * 1. NACK="{SEQ:2, MSG:xyzed}" # NACK on command with sequence tag-2. -+ * Note, the portal has received a valid name and valid sequence number -+ * but can't carry-out the request, reason in the MSG field. -+ * 2. NACK="Failing-message" -+ * Note, unable to parse a valid name or valid sequence number, -+ * or some internal error condition. Reason in the quoted string. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer). ++ * @i: Array index of address space object. ++ * ++ * This is called upon device termination to destroy ++ * the address space object of the device. + */ -+#define ACK "ACK" -+#define NACK "NACK" -+#define MAX_REPLY_NAME_LEN 32 ++void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i); + -+#endif /* _KUTF_CLK_RATE_TRACE_TEST_H_ */ -diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Kbuild b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Kbuild -new file mode 100755 -index 000000000..213d6d5ea ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Kbuild -@@ -0,0 +1,25 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++/** ++ * kbase_mmu_init - Initialise an object representing GPU page tables ++ * ++ * @kbdev: Instance of GPU platform device, allocated from the probe method. ++ * @mmut: GPU page tables to be initialized. ++ * @kctx: Optional kbase context, may be NULL if this set of MMU tables ++ * is not associated with a context. ++ * @group_id: The physical group ID from which to allocate GPU page tables. ++ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * ++ * The structure should be terminated using kbase_mmu_term() ++ * ++ * Return: 0 if successful, otherwise a negative error code. ++ */ ++int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, ++ struct kbase_context *kctx, int group_id); + -+ifeq ($(CONFIG_MALI_KUTF_IRQ_TEST),y) -+obj-m += mali_kutf_irq_test.o ++/** ++ * kbase_mmu_interrupt - Process an MMU interrupt. ++ * ++ * @kbdev: Pointer to the kbase device for which the interrupt happened. ++ * @irq_stat: Value of the MMU_IRQ_STATUS register. ++ * ++ * Process the MMU interrupt that was reported by the &kbase_device. ++ */ ++void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); + -+mali_kutf_irq_test-y := mali_kutf_irq_test_main.o -+endif -diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/build.bp b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/build.bp -new file mode 100755 -index 000000000..155875b9d ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/build.bp -@@ -0,0 +1,42 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* ++/** ++ * kbase_mmu_term - Terminate an object representing GPU page tables + * -+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. ++ * @kbdev: Instance of GPU platform device, allocated from the probe method. ++ * @mmut: GPU page tables to be destroyed. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * This will free any page tables that have been allocated ++ */ ++void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut); ++ ++/** ++ * kbase_mmu_create_ate - Create an address translation entry + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * @kbdev: Instance of GPU platform device, allocated from the probe method. ++ * @phy: Physical address of the page to be mapped for GPU access. ++ * @flags: Bitmask of attributes of the GPU memory region being mapped. ++ * @level: Page table level for which to build an address translation entry. ++ * @group_id: The physical memory group in which the page was allocated. ++ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * This function creates an address translation entry to encode the physical ++ * address of a page to be mapped for access by the GPU, along with any extra ++ * attributes required for the GPU memory region. + * ++ * Return: An address translation entry, either in LPAE or AArch64 format ++ * (depending on the driver's configuration). + */ ++u64 kbase_mmu_create_ate(struct kbase_device *kbdev, ++ struct tagged_addr phy, unsigned long flags, int level, int group_id); + -+bob_kernel_module { -+ name: "mali_kutf_irq_test", -+ defaults: [ -+ "mali_kbase_shared_config_defaults", -+ "kernel_test_configs", -+ "kernel_test_includes", -+ ], -+ srcs: [ -+ "Kbuild", -+ "mali_kutf_irq_test_main.c", -+ ], -+ extra_symbols: [ -+ "mali_kbase", -+ "kutf", -+ ], -+ enabled: false, -+ mali_kutf_irq_test: { -+ kbuild_options: ["CONFIG_MALI_KUTF_IRQ_TEST=y"], -+ enabled: true, -+ }, ++int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, ++ u64 vpfn, struct tagged_addr *phys, size_t nr, ++ unsigned long flags, int group_id, u64 *dirty_pgds, ++ struct kbase_va_region *reg, bool ignore_page_migration); ++int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, ++ struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr, ++ int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, ++ struct kbase_va_region *reg, bool ignore_page_migration); ++int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, ++ u64 vpfn, struct tagged_addr *phys, size_t nr, ++ unsigned long flags, int as_nr, int group_id, ++ enum kbase_caller_mmu_sync_info mmu_sync_info, ++ struct kbase_va_region *reg); ++int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, ++ u64 vpfn, struct tagged_addr *phys, size_t nr, ++ unsigned long flags, int as_nr, int group_id, ++ enum kbase_caller_mmu_sync_info mmu_sync_info, ++ struct kbase_va_region *reg); ++int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, struct tagged_addr phys, ++ size_t nr, unsigned long flags, int group_id, ++ enum kbase_caller_mmu_sync_info mmu_sync_info, ++ bool ignore_page_migration); ++int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn, ++ struct tagged_addr phys, size_t nr, unsigned long flags, ++ int group_id, ++ enum kbase_caller_mmu_sync_info mmu_sync_info); ++int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn, ++ struct tagged_addr phys, size_t nr, unsigned long flags, ++ int group_id, ++ enum kbase_caller_mmu_sync_info mmu_sync_info); ++ ++/** ++ * kbase_mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table ++ * ++ * @kbdev: Pointer to kbase device. ++ * @mmut: Pointer to GPU MMU page table. ++ * @vpfn: Start page frame number of the GPU virtual pages to unmap. ++ * @phys: Array of physical pages currently mapped to the virtual ++ * pages to unmap, or NULL. This is used for GPU cache maintenance ++ * and page migration support. ++ * @nr_phys_pages: Number of physical pages to flush. ++ * @nr_virt_pages: Number of virtual pages whose PTEs should be destroyed. ++ * @as_nr: Address space number, for GPU cache maintenance operations ++ * that happen outside a specific kbase context. ++ * @ignore_page_migration: Whether page migration metadata should be ignored. ++ * ++ * We actually discard the ATE and free the page table pages if no valid entries ++ * exist in PGD. ++ * ++ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is ++ * currently scheduled into the runpool, and so potentially uses a lot of locks. ++ * These locks must be taken in the correct order with respect to others ++ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more ++ * information. ++ * ++ * The @p phys pointer to physical pages is not necessary for unmapping virtual memory, ++ * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL, ++ * GPU cache maintenance will be done as usual, that is invalidating the whole GPU caches ++ * instead of specific physical address ranges. ++ * ++ * Return: 0 on success, otherwise an error code. ++ */ ++int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, ++ struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages, ++ int as_nr, bool ignore_page_migration); ++ ++int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, ++ struct tagged_addr *phys, size_t nr, ++ unsigned long flags, int const group_id); ++#if MALI_USE_CSF ++/** ++ * kbase_mmu_update_csf_mcu_pages - Update MCU mappings with changes of phys and flags ++ * ++ * @kbdev: Pointer to kbase device. ++ * @vpfn: Virtual PFN (Page Frame Number) of the first page to update ++ * @phys: Pointer to the array of tagged physical addresses of the physical ++ * pages that are pointed to by the page table entries (that need to ++ * be updated). ++ * @nr: Number of pages to update ++ * @flags: Flags ++ * @group_id: The physical memory group in which the page was allocated. ++ * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). ++ * ++ * Return: 0 on success, otherwise an error code. ++ */ ++int kbase_mmu_update_csf_mcu_pages(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys, ++ size_t nr, unsigned long flags, int const group_id); ++#endif ++ ++/** ++ * kbase_mmu_migrate_page - Migrate GPU mappings and content between memory pages ++ * ++ * @old_phys: Old physical page to be replaced. ++ * @new_phys: New physical page used to replace old physical page. ++ * @old_dma_addr: DMA address of the old page. ++ * @new_dma_addr: DMA address of the new page. ++ * @level: MMU page table level of the provided PGD. ++ * ++ * The page migration process is made of 2 big steps: ++ * ++ * 1) Copy the content of the old page to the new page. ++ * 2) Remap the virtual page, that is: replace either the ATE (if the old page ++ * was a regular page) or the PTE (if the old page was used as a PGD) in the ++ * MMU page table with the new page. ++ * ++ * During the process, the MMU region is locked to prevent GPU access to the ++ * virtual memory page that is being remapped. ++ * ++ * Before copying the content of the old page to the new page and while the ++ * MMU region is locked, a GPU cache flush is performed to make sure that ++ * pending GPU writes are finalized to the old page before copying. ++ * That is necessary because otherwise there's a risk that GPU writes might ++ * be finalized to the old page, and not new page, after migration. ++ * The MMU region is unlocked only at the end of the migration operation. ++ * ++ * Return: 0 on success, otherwise an error code. ++ */ ++int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys, ++ dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level); ++ ++/** ++ * kbase_mmu_flush_pa_range() - Flush physical address range from the GPU caches ++ * ++ * @kbdev: Instance of GPU platform device, allocated from the probe method. ++ * @kctx: Pointer to kbase context, it can be NULL if the physical address ++ * range is not associated with User created context. ++ * @phys: Starting address of the physical range to start the operation on. ++ * @size: Number of bytes to work on. ++ * @flush_op: Type of cache flush operation to perform. ++ * ++ * Issue a cache flush physical range command. This function won't perform any ++ * flush if the GPU doesn't support FLUSH_PA_RANGE command. The flush would be ++ * performed only if the context has a JASID assigned to it. ++ * This function is basically a wrapper for kbase_gpu_cache_flush_pa_range_and_busy_wait(). ++ */ ++void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx, ++ phys_addr_t phys, size_t size, ++ enum kbase_mmu_op_type flush_op); ++ ++/** ++ * kbase_mmu_bus_fault_interrupt - Process a bus fault interrupt. ++ * ++ * @kbdev: Pointer to the kbase device for which bus fault was reported. ++ * @status: Value of the GPU_FAULTSTATUS register. ++ * @as_nr: GPU address space for which the bus fault occurred. ++ * ++ * Process the bus fault interrupt that was reported for a particular GPU ++ * address space. ++ * ++ * Return: zero if the operation was successful, non-zero otherwise. ++ */ ++int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, u32 status, ++ u32 as_nr); ++ ++/** ++ * kbase_mmu_gpu_fault_interrupt() - Report a GPU fault. ++ * ++ * @kbdev: Kbase device pointer ++ * @status: GPU fault status ++ * @as_nr: Faulty address space ++ * @address: GPU fault address ++ * @as_valid: true if address space is valid ++ * ++ * This function builds GPU fault information to submit a work ++ * for reporting the details of the fault. ++ */ ++void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status, ++ u32 as_nr, u64 address, bool as_valid); ++ ++/** ++ * kbase_context_mmu_group_id_get - Decode a memory group ID from ++ * base_context_create_flags ++ * ++ * @flags: Bitmask of flags to pass to base_context_init. ++ * ++ * Memory allocated for GPU page tables will come from the returned group. ++ * ++ * Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1). ++ */ ++static inline int ++kbase_context_mmu_group_id_get(base_context_create_flags const flags) ++{ ++ KBASE_DEBUG_ASSERT(flags == ++ (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS)); ++ return (int)BASE_CONTEXT_MMU_GROUP_ID_GET(flags); +} -diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c ++ ++#endif /* _KBASE_MMU_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h new file mode 100644 -index 000000000..f2a014d9b +index 000000000..50d2ea5d0 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c -@@ -0,0 +1,283 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h +@@ -0,0 +1,214 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2016-2018, 2020-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -266543,356 +266846,209 @@ index 000000000..f2a014d9b + * + */ + -+#include -+#include -+#include -+ -+#include "mali_kbase.h" -+#include -+#include -+#include ++/** ++ * DOC: Interface file for accessing MMU hardware functionality ++ * ++ * This module provides an abstraction for accessing the functionality provided ++ * by the midgard MMU and thus allows all MMU HW access to be contained within ++ * one common place and allows for different backends (implementations) to ++ * be provided. ++ */ + -+#include -+#include ++#ifndef _KBASE_MMU_HW_H_ ++#define _KBASE_MMU_HW_H_ + -+/* -+ * This file contains the code which is used for measuring interrupt latency -+ * of the Mali GPU IRQ. In particular, function mali_kutf_irq_latency() is -+ * used with this purpose and it is called within KUTF framework - a kernel -+ * unit test framework. The measured latency provided by this test should -+ * be representative for the latency of the Mali JOB/MMU IRQs as well. -+ */ ++#include "mali_kbase_mmu.h" + -+/* KUTF test application pointer for this test */ -+static struct kutf_application *irq_app; ++/* Forward declarations */ ++struct kbase_device; ++struct kbase_as; ++struct kbase_context; + +/** -+ * struct kutf_irq_fixture_data - test fixture used by the test functions. -+ * @kbdev: kbase device for the GPU. -+ * ++ * enum kbase_mmu_fault_type - MMU fault type descriptor. ++ * @KBASE_MMU_FAULT_TYPE_UNKNOWN: unknown fault ++ * @KBASE_MMU_FAULT_TYPE_PAGE: page fault ++ * @KBASE_MMU_FAULT_TYPE_BUS: nus fault ++ * @KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED: page_unexpected fault ++ * @KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED: bus_unexpected fault + */ -+struct kutf_irq_fixture_data { -+ struct kbase_device *kbdev; ++enum kbase_mmu_fault_type { ++ KBASE_MMU_FAULT_TYPE_UNKNOWN = 0, ++ KBASE_MMU_FAULT_TYPE_PAGE, ++ KBASE_MMU_FAULT_TYPE_BUS, ++ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED, ++ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED +}; + -+/* ID for the GPU IRQ */ -+#define GPU_IRQ_HANDLER 2 -+ -+#define NR_TEST_IRQS ((u32)1000000) -+ -+/* IRQ for the test to trigger. Currently POWER_CHANGED_SINGLE as it is -+ * otherwise unused in the DDK ++/** ++ * struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_* functions ++ * @vpfn: MMU Virtual Page Frame Number to start the operation on. ++ * @nr: Number of pages to work on. ++ * @op: Operation type (written to ASn_COMMAND). ++ * @kctx_id: Kernel context ID for MMU command tracepoint. ++ * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. ++ * @flush_skip_levels: Page table levels to skip flushing. (Only ++ * applicable if GPU supports feature) + */ -+#define TEST_IRQ POWER_CHANGED_SINGLE -+ -+#define IRQ_TIMEOUT HZ -+ -+/* Kernel API for setting irq throttle hook callback and irq time in us*/ -+extern int kbase_set_custom_irq_handler(struct kbase_device *kbdev, -+ irq_handler_t custom_handler, -+ int irq_type); -+extern irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val); -+ -+static DECLARE_WAIT_QUEUE_HEAD(wait); -+static bool triggered; -+static u64 irq_time; -+ -+static void *kbase_untag(void *ptr) -+{ -+ return (void *)(((uintptr_t) ptr) & ~3); -+} ++struct kbase_mmu_hw_op_param { ++ u64 vpfn; ++ u32 nr; ++ enum kbase_mmu_op_type op; ++ u32 kctx_id; ++ enum kbase_caller_mmu_sync_info mmu_sync_info; ++ u64 flush_skip_levels; ++}; + +/** -+ * kbase_gpu_irq_custom_handler - Custom IRQ throttle handler -+ * @irq: IRQ number -+ * @data: Data associated with this IRQ ++ * kbase_mmu_hw_configure - Configure an address space for use. ++ * @kbdev: kbase device to configure. ++ * @as: address space to configure. + * -+ * Return: state of the IRQ ++ * Configure the MMU using the address space details setup in the ++ * kbase_context structure. + */ -+static irqreturn_t kbase_gpu_irq_custom_handler(int irq, void *data) -+{ -+ struct kbase_device *kbdev = kbase_untag(data); -+ u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS)); -+ irqreturn_t result; -+ u64 tval; -+ bool has_test_irq = val & TEST_IRQ; -+ -+ if (has_test_irq) { -+ tval = ktime_get_real_ns(); -+ /* Clear the test source only here */ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), -+ TEST_IRQ); -+ /* Remove the test IRQ status bit */ -+ val = val ^ TEST_IRQ; -+ } -+ -+ result = kbase_gpu_irq_test_handler(irq, data, val); -+ -+ if (has_test_irq) { -+ irq_time = tval; -+ triggered = true; -+ wake_up(&wait); -+ result = IRQ_HANDLED; -+ } -+ -+ return result; -+} ++void kbase_mmu_hw_configure(struct kbase_device *kbdev, ++ struct kbase_as *as); + +/** -+ * mali_kutf_irq_default_create_fixture() - Creates the fixture data required -+ * for all the tests in the irq suite. -+ * @context: KUTF context. ++ * kbase_mmu_hw_do_lock - Issue LOCK command to the MMU and program ++ * the LOCKADDR register. + * -+ * Return: Fixture data created on success or NULL on failure ++ * @kbdev: Kbase device to issue the MMU operation on. ++ * @as: Address space to issue the MMU operation on. ++ * @op_param: Pointer to struct containing information about the MMU ++ * operation to perform. ++ * ++ * hwaccess_lock needs to be held when calling this function. ++ * ++ * Return: 0 if issuing the command was successful, otherwise an error code. + */ -+static void *mali_kutf_irq_default_create_fixture( -+ struct kutf_context *context) -+{ -+ struct kutf_irq_fixture_data *data; -+ -+ data = kutf_mempool_alloc(&context->fixture_pool, -+ sizeof(struct kutf_irq_fixture_data)); -+ -+ if (!data) -+ goto fail; -+ -+ /* Acquire the kbase device */ -+ data->kbdev = kbase_find_device(-1); -+ if (data->kbdev == NULL) { -+ kutf_test_fail(context, "Failed to find kbase device"); -+ goto fail; -+ } -+ -+ return data; -+ -+fail: -+ return NULL; -+} ++int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, ++ const struct kbase_mmu_hw_op_param *op_param); + +/** -+ * mali_kutf_irq_default_remove_fixture() - Destroy fixture data previously -+ * created by mali_kutf_irq_default_create_fixture. ++ * kbase_mmu_hw_do_unlock_no_addr - Issue UNLOCK command to the MMU without ++ * programming the LOCKADDR register and wait ++ * for it to complete before returning. + * -+ * @context: KUTF context. ++ * @kbdev: Kbase device to issue the MMU operation on. ++ * @as: Address space to issue the MMU operation on. ++ * @op_param: Pointer to struct containing information about the MMU ++ * operation to perform. ++ * ++ * This function should be called for GPU where GPU command is used to flush ++ * the cache(s) instead of MMU command. ++ * ++ * Return: 0 if issuing the command was successful, otherwise an error code. + */ -+static void mali_kutf_irq_default_remove_fixture( -+ struct kutf_context *context) -+{ -+ struct kutf_irq_fixture_data *data = context->fixture; -+ struct kbase_device *kbdev = data->kbdev; -+ -+ kbase_release_device(kbdev); -+} ++int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as, ++ const struct kbase_mmu_hw_op_param *op_param); + +/** -+ * mali_kutf_irq_latency() - measure GPU IRQ latency -+ * @context: kutf context within which to perform the test ++ * kbase_mmu_hw_do_unlock - Issue UNLOCK command to the MMU and wait for it ++ * to complete before returning. + * -+ * The test triggers IRQs manually, and measures the -+ * time between triggering the IRQ and the IRQ handler being executed. ++ * @kbdev: Kbase device to issue the MMU operation on. ++ * @as: Address space to issue the MMU operation on. ++ * @op_param: Pointer to struct containing information about the MMU ++ * operation to perform. + * -+ * This is not a traditional test, in that the pass/fail status has little -+ * meaning (other than indicating that the IRQ handler executed at all). Instead -+ * the results are in the latencies provided with the test result. There is no -+ * meaningful pass/fail result that can be obtained here, instead the latencies -+ * are provided for manual analysis only. ++ * Return: 0 if issuing the command was successful, otherwise an error code. + */ -+static void mali_kutf_irq_latency(struct kutf_context *context) -+{ -+ struct kutf_irq_fixture_data *data = context->fixture; -+ struct kbase_device *kbdev = data->kbdev; -+ u64 min_time = U64_MAX, max_time = 0, average_time = 0; -+ u32 i; -+ const char *results; -+ -+ /* Force GPU to be powered */ -+ kbase_pm_context_active(kbdev); -+ kbase_pm_wait_for_desired_state(kbdev); -+ -+ kbase_set_custom_irq_handler(kbdev, kbase_gpu_irq_custom_handler, -+ GPU_IRQ_HANDLER); -+ -+ for (i = 1; i <= NR_TEST_IRQS; i++) { -+ u64 start_time = ktime_get_real_ns(); -+ -+ triggered = false; -+ -+ /* Trigger fake IRQ */ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), -+ TEST_IRQ); -+ -+ if (wait_event_timeout(wait, triggered, IRQ_TIMEOUT) == 0) { -+ /* Wait extra time to see if it would come */ -+ wait_event_timeout(wait, triggered, 10 * IRQ_TIMEOUT); -+ break; -+ } -+ -+ if ((irq_time - start_time) < min_time) -+ min_time = irq_time - start_time; -+ if ((irq_time - start_time) > max_time) -+ max_time = irq_time - start_time; -+ average_time += irq_time - start_time; -+ -+ udelay(10); -+ /* Sleep for a ms, every 10000 iterations, to avoid misleading warning -+ * of CPU softlockup when all GPU IRQs keep going to the same CPU. -+ */ -+ if (!(i % 10000)) -+ msleep(1); -+ } -+ -+ /* Go back to default handler */ -+ kbase_set_custom_irq_handler(kbdev, NULL, GPU_IRQ_HANDLER); -+ -+ kbase_pm_context_idle(kbdev); -+ -+ if (i > NR_TEST_IRQS) { -+ do_div(average_time, NR_TEST_IRQS); -+ results = kutf_dsprintf(&context->fixture_pool, -+ "Min latency = %lldns, Max latency = %lldns, Average latency = %lldns\n", -+ min_time, max_time, average_time); -+ kutf_test_pass(context, results); -+ } else { -+ results = kutf_dsprintf(&context->fixture_pool, -+ "Timed out for the %u-th IRQ (loop_limit: %u), triggered late: %d\n", -+ i, NR_TEST_IRQS, triggered); -+ kutf_test_fail(context, results); -+ } -+} -+ ++int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as, ++ const struct kbase_mmu_hw_op_param *op_param); +/** -+ * mali_kutf_irq_test_main_init - Module entry point for this test. ++ * kbase_mmu_hw_do_flush - Issue a flush operation to the MMU. + * -+ * Return: 0 on success, error code otherwise ++ * @kbdev: Kbase device to issue the MMU operation on. ++ * @as: Address space to issue the MMU operation on. ++ * @op_param: Pointer to struct containing information about the MMU ++ * operation to perform. ++ * ++ * Issue a flush operation on the address space as per the information ++ * specified inside @op_param. This function should not be called for ++ * GPUs where MMU command to flush the cache(s) is deprecated. ++ * mmu_hw_mutex needs to be held when calling this function. ++ * ++ * Return: 0 if the operation was successful, non-zero otherwise. + */ -+static int __init mali_kutf_irq_test_main_init(void) -+{ -+ struct kutf_suite *suite; -+ -+ irq_app = kutf_create_application("irq"); -+ -+ if (irq_app == NULL) { -+ pr_warn("Creation of test application failed!\n"); -+ return -ENOMEM; -+ } -+ -+ suite = kutf_create_suite(irq_app, "irq_default", -+ 1, mali_kutf_irq_default_create_fixture, -+ mali_kutf_irq_default_remove_fixture); -+ -+ if (suite == NULL) { -+ pr_warn("Creation of test suite failed!\n"); -+ kutf_destroy_application(irq_app); -+ return -ENOMEM; -+ } -+ -+ kutf_add_test(suite, 0x0, "irq_latency", -+ mali_kutf_irq_latency); -+ return 0; -+} ++int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, ++ const struct kbase_mmu_hw_op_param *op_param); + +/** -+ * mali_kutf_irq_test_main_exit - Module exit point for this test. ++ * kbase_mmu_hw_do_flush_locked - Issue a flush operation to the MMU. ++ * ++ * @kbdev: Kbase device to issue the MMU operation on. ++ * @as: Address space to issue the MMU operation on. ++ * @op_param: Pointer to struct containing information about the MMU ++ * operation to perform. ++ * ++ * Issue a flush operation on the address space as per the information ++ * specified inside @op_param. This function should not be called for ++ * GPUs where MMU command to flush the cache(s) is deprecated. ++ * Both mmu_hw_mutex and hwaccess_lock need to be held when calling this ++ * function. ++ * ++ * Return: 0 if the operation was successful, non-zero otherwise. + */ -+static void __exit mali_kutf_irq_test_main_exit(void) -+{ -+ kutf_destroy_application(irq_app); -+} -+ -+module_init(mali_kutf_irq_test_main_init); -+module_exit(mali_kutf_irq_test_main_exit); -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("ARM Ltd."); -+MODULE_VERSION("1.0"); -diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/Kbuild b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/Kbuild -new file mode 100755 -index 000000000..e9bff98b8 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/Kbuild -@@ -0,0 +1,25 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# -+ -+ifeq ($(CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST),y) -+obj-m += mali_kutf_mgm_integration_test.o ++int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as, ++ const struct kbase_mmu_hw_op_param *op_param); + -+mali_kutf_mgm_integration_test-y := mali_kutf_mgm_integration_test_main.o -+endif -diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/build.bp b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/build.bp -new file mode 100755 -index 000000000..8b995f8a0 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/build.bp -@@ -0,0 +1,41 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* ++/** ++ * kbase_mmu_hw_do_flush_on_gpu_ctrl - Issue a flush operation to the MMU. + * -+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. ++ * @kbdev: Kbase device to issue the MMU operation on. ++ * @as: Address space to issue the MMU operation on. ++ * @op_param: Pointer to struct containing information about the MMU ++ * operation to perform. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. ++ * Issue a flush operation on the address space as per the information ++ * specified inside @op_param. GPU command is used to flush the cache(s) ++ * instead of the MMU command. + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++ * Return: 0 if the operation was successful, non-zero otherwise. ++ */ ++int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as, ++ const struct kbase_mmu_hw_op_param *op_param); ++ ++/** ++ * kbase_mmu_hw_clear_fault - Clear a fault that has been previously reported by ++ * the MMU. ++ * @kbdev: kbase device to clear the fault from. ++ * @as: address space to clear the fault from. ++ * @type: The type of fault that needs to be cleared. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * Clear a bus error or page fault that has been reported by the MMU. ++ */ ++void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, ++ enum kbase_mmu_fault_type type); ++ ++/** ++ * kbase_mmu_hw_enable_fault - Enable fault that has been previously reported by ++ * the MMU. ++ * @kbdev: kbase device to again enable the fault from. ++ * @as: address space to again enable the fault from. ++ * @type: The type of fault that needs to be enabled again. + * ++ * After a page fault or bus error has been reported by the MMU these ++ * will be disabled. After these are handled this function needs to be ++ * called to enable the page fault or bus error fault again. + */ -+bob_kernel_module { -+ name: "mali_kutf_mgm_integration_test", -+ defaults: [ -+ "mali_kbase_shared_config_defaults", -+ "kernel_test_configs", -+ "kernel_test_includes", -+ ], -+ srcs: [ -+ "Kbuild", -+ "mali_kutf_mgm_integration_test_main.c", -+ ], -+ extra_symbols: [ -+ "mali_kbase", -+ "kutf", -+ ], -+ enabled: false, -+ mali_kutf_mgm_integration_test: { -+ kbuild_options: ["CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST=y"], -+ enabled: true, -+ }, -+} -diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c ++void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, ++ enum kbase_mmu_fault_type type); ++ ++#endif /* _KBASE_MMU_HW_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c new file mode 100644 -index 000000000..5a42bd675 +index 000000000..3f6da35d8 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c -@@ -0,0 +1,210 @@ ++++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c +@@ -0,0 +1,699 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -266909,893 +267065,695 @@ index 000000000..5a42bd675 + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ -+#include -+#include "mali_kbase.h" -+#include -+#include -+#include -+#include -+ -+#define MINOR_FOR_FIRST_KBASE_DEV (-1) -+ -+#define BASE_MEM_GROUP_COUNT (16) -+#define PA_MAX ((1ULL << 48) - 1) -+#define PA_START_BIT 12 -+#define ENTRY_ACCESS_BIT (1ULL << 10) -+ -+#define ENTRY_IS_ATE_L3 3ULL -+#define ENTRY_IS_ATE_L02 1ULL -+ -+#define MGM_INTEGRATION_SUITE_NAME "mgm_integration" -+#define MGM_INTEGRATION_PTE_TRANSLATION "pte_translation" -+ -+static char msg_buf[KUTF_MAX_LINE_LENGTH]; + -+/* KUTF test application pointer for this test */ -+struct kutf_application *mgm_app; ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + ++#if MALI_USE_CSF +/** -+ * struct kutf_mgm_fixture_data - test fixture used by test functions -+ * @kbdev: kbase device for the GPU. -+ * @group_id: Memory group ID to test based on fixture index. ++ * mmu_has_flush_skip_pgd_levels() - Check if the GPU has the feature ++ * AS_LOCKADDR_FLUSH_SKIP_LEVELS ++ * ++ * @gpu_props: GPU properties for the GPU instance. ++ * ++ * This function returns whether a cache flush can apply the skip flags of ++ * AS_LOCKADDR_FLUSH_SKIP_LEVELS. ++ * ++ * Return: True if cache flush has the said feature. + */ -+struct kutf_mgm_fixture_data { -+ struct kbase_device *kbdev; -+ int group_id; -+}; ++static bool mmu_has_flush_skip_pgd_levels(struct kbase_gpu_props const *gpu_props) ++{ ++ u32 const signature = ++ gpu_props->props.raw_props.gpu_id & (GPU_ID2_ARCH_MAJOR | GPU_ID2_ARCH_REV); ++ ++ return signature >= (u32)GPU_ID2_PRODUCT_MAKE(12, 0, 4, 0); ++} ++#endif + +/** -+ * mali_kutf_mgm_pte_translation_test() - Tests forward and reverse translation -+ * of PTE by the MGM module -+ * @context: KUTF context within which to perform the test. ++ * lock_region() - Generate lockaddr to lock memory region in MMU + * -+ * This test creates PTEs with physical addresses in the range -+ * 0x0000-0xFFFFFFFFF000 and tests that mgm_update_gpu_pte() returns a different -+ * PTE and mgm_pte_to_original_pte() returns the original PTE. This is tested -+ * at MMU level 2 and 3 as mgm_update_gpu_pte() is called for ATEs only. ++ * @gpu_props: GPU properties for finding the MMU lock region size. ++ * @lockaddr: Address and size of memory region to lock. ++ * @op_param: Pointer to a struct containing the starting page frame number of ++ * the region to lock, the number of pages to lock and page table ++ * levels to skip when flushing (if supported). + * -+ * This test is run for a specific group_id depending on the fixture_id. ++ * The lockaddr value is a combination of the starting address and ++ * the size of the region that encompasses all the memory pages to lock. ++ * ++ * Bits 5:0 are used to represent the size, which must be a power of 2. ++ * The smallest amount of memory to be locked corresponds to 32 kB, ++ * i.e. 8 memory pages, because a MMU cache line is made of 64 bytes ++ * and every page table entry is 8 bytes. Therefore it is not possible ++ * to lock less than 8 memory pages at a time. ++ * ++ * The size is expressed as a logarithm minus one: ++ * - A value of 14 is thus interpreted as log(32 kB) = 15, where 32 kB ++ * is the smallest possible size. ++ * - Likewise, a value of 47 is interpreted as log(256 TB) = 48, where 256 TB ++ * is the largest possible size (implementation defined value according ++ * to the HW spec). ++ * ++ * Bits 11:6 are reserved. ++ * ++ * Bits 63:12 are used to represent the base address of the region to lock. ++ * Only the upper bits of the address are used; lowest bits are cleared ++ * to avoid confusion. ++ * ++ * The address is aligned to a multiple of the region size. This has profound ++ * implications on the region size itself: often the MMU will lock a region ++ * larger than the given number of pages, because the lock region cannot start ++ * from any arbitrary address. ++ * ++ * Return: 0 if success, or an error code on failure. + */ -+static void mali_kutf_mgm_pte_translation_test(struct kutf_context *context) ++static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr, ++ const struct kbase_mmu_hw_op_param *op_param) +{ -+ struct kutf_mgm_fixture_data *data = context->fixture; -+ struct kbase_device *kbdev = data->kbdev; -+ struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev; -+ u64 addr; ++ const u64 lockaddr_base = op_param->vpfn << PAGE_SHIFT; ++ const u64 lockaddr_end = ((op_param->vpfn + op_param->nr) << PAGE_SHIFT) - 1; ++ u64 lockaddr_size_log2; + -+ for (addr = 1 << (PA_START_BIT - 1); addr <= PA_MAX; addr <<= 1) { -+ /* Mask 1 << 11 by ~0xFFF to get 0x0000 at first iteration */ -+ phys_addr_t pa = addr; -+ u8 mmu_level; ++ if (op_param->nr == 0) ++ return -EINVAL; + -+ /* Test MMU level 3 and 2 (2MB pages) only */ -+ for (mmu_level = MIDGARD_MMU_LEVEL(2); mmu_level <= MIDGARD_MMU_LEVEL(3); -+ mmu_level++) { -+ u64 translated_pte; -+ u64 returned_pte; -+ u64 original_pte; ++ /* The MMU lock region is a self-aligned region whose size ++ * is a power of 2 and that contains both start and end ++ * of the address range determined by pfn and num_pages. ++ * The size of the MMU lock region can be defined as the ++ * largest divisor that yields the same result when both ++ * start and end addresses are divided by it. ++ * ++ * For instance: pfn=0x4F000 num_pages=2 describe the ++ * address range between 0x4F000 and 0x50FFF. It is only ++ * 2 memory pages. However there isn't a single lock region ++ * of 8 kB that encompasses both addresses because 0x4F000 ++ * would fall into the [0x4E000, 0x4FFFF] region while ++ * 0x50000 would fall into the [0x50000, 0x51FFF] region. ++ * The minimum lock region size that includes the entire ++ * address range is 128 kB, and the region would be ++ * [0x40000, 0x5FFFF]. ++ * ++ * The region size can be found by comparing the desired ++ * start and end addresses and finding the highest bit ++ * that differs. The smallest naturally aligned region ++ * must include this bit change, hence the desired region ++ * starts with this bit (and subsequent bits) set to 0 ++ * and ends with the bit (and subsequent bits) set to 1. ++ * ++ * In the example above: 0x4F000 ^ 0x50FFF = 0x1FFFF ++ * therefore the highest bit that differs is bit #16 ++ * and the region size (as a logarithm) is 16 + 1 = 17, i.e. 128 kB. ++ */ ++ lockaddr_size_log2 = fls64(lockaddr_base ^ lockaddr_end); + -+ if (mmu_level == MIDGARD_MMU_LEVEL(3)) -+ original_pte = -+ (pa & PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L3; -+ else -+ original_pte = -+ (pa & PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02; ++ /* Cap the size against minimum and maximum values allowed. */ ++ if (lockaddr_size_log2 > KBASE_LOCK_REGION_MAX_SIZE_LOG2) ++ return -EINVAL; + -+ dev_dbg(kbdev->dev, "Testing group_id=%u, mmu_level=%u, pte=0x%llx\n", -+ data->group_id, mmu_level, original_pte); ++ lockaddr_size_log2 = ++ MAX(lockaddr_size_log2, kbase_get_lock_region_min_size_log2(gpu_props)); + -+ translated_pte = mgm_dev->ops.mgm_update_gpu_pte(mgm_dev, data->group_id, -+ mmu_level, original_pte); -+ if (translated_pte == original_pte) { -+ snprintf( -+ msg_buf, sizeof(msg_buf), -+ "PTE unchanged. translated_pte (0x%llx) == original_pte (0x%llx) for mmu_level=%u, group_id=%d", -+ translated_pte, original_pte, mmu_level, data->group_id); -+ kutf_test_fail(context, msg_buf); -+ return; -+ } ++ /* Represent the result in a way that is compatible with HW spec. ++ * ++ * Upper bits are used for the base address, whose lower bits ++ * are cleared to avoid confusion because they are going to be ignored ++ * by the MMU anyway, since lock regions shall be aligned with ++ * a multiple of their size and cannot start from any address. ++ * ++ * Lower bits are used for the size, which is represented as ++ * logarithm minus one of the actual size. ++ */ ++ *lockaddr = lockaddr_base & ~((1ull << lockaddr_size_log2) - 1); ++ *lockaddr |= lockaddr_size_log2 - 1; + -+ returned_pte = mgm_dev->ops.mgm_pte_to_original_pte( -+ mgm_dev, data->group_id, mmu_level, translated_pte); -+ dev_dbg(kbdev->dev, "\treturned_pte=%llx\n", returned_pte); ++#if MALI_USE_CSF ++ if (mmu_has_flush_skip_pgd_levels(gpu_props)) ++ *lockaddr = ++ AS_LOCKADDR_FLUSH_SKIP_LEVELS_SET(*lockaddr, op_param->flush_skip_levels); ++#endif + -+ if (returned_pte != original_pte) { -+ snprintf( -+ msg_buf, sizeof(msg_buf), -+ "Original PTE not returned. returned_pte (0x%llx) != origin al_pte (0x%llx) for mmu_level=%u, group_id=%d", -+ returned_pte, original_pte, mmu_level, data->group_id); -+ kutf_test_fail(context, msg_buf); -+ return; -+ } -+ } -+ } -+ snprintf(msg_buf, sizeof(msg_buf), "Translation passed for group_id=%d", data->group_id); -+ kutf_test_pass(context, msg_buf); ++ return 0; +} + +/** -+ * mali_kutf_mgm_integration_create_fixture() - Creates the fixture data -+ * required for all tests in the mgm integration suite. -+ * @context: KUTF context. ++ * wait_ready() - Wait for previously issued MMU command to complete. + * -+ * Return: Fixture data created on success or NULL on failure ++ * @kbdev: Kbase device to wait for a MMU command to complete. ++ * @as_nr: Address space to wait for a MMU command to complete. ++ * ++ * Reset GPU if the wait for previously issued command fails. ++ * ++ * Return: 0 on successful completion. negative error on failure. + */ -+static void *mali_kutf_mgm_integration_create_fixture(struct kutf_context *context) ++static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr) +{ -+ struct kutf_mgm_fixture_data *data; -+ struct kbase_device *kbdev; ++ const ktime_t wait_loop_start = ktime_get_raw(); ++ const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms; ++ s64 diff; + -+ pr_debug("Finding kbase device\n"); -+ kbdev = kbase_find_device(MINOR_FOR_FIRST_KBASE_DEV); -+ if (kbdev == NULL) { -+ kutf_test_fail(context, "Failed to find kbase device"); -+ return NULL; -+ } -+ pr_debug("Creating fixture\n"); ++ if (unlikely(kbdev->as[as_nr].is_unresponsive)) ++ return -EBUSY; + -+ data = kutf_mempool_alloc(&context->fixture_pool, sizeof(struct kutf_mgm_fixture_data)); -+ if (!data) -+ return NULL; -+ data->kbdev = kbdev; -+ data->group_id = context->fixture_index; ++ do { ++ unsigned int i; + -+ pr_debug("Fixture created\n"); -+ return data; -+} ++ for (i = 0; i < 1000; i++) { ++ /* Wait for the MMU status to indicate there is no active command */ ++ if (!(kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) & ++ AS_STATUS_AS_ACTIVE)) ++ return 0; ++ } + -+/** -+ * mali_kutf_mgm_integration_remove_fixture() - Destroy fixture data previously -+ * created by mali_kutf_mgm_integration_create_fixture. -+ * @context: KUTF context. -+ */ -+static void mali_kutf_mgm_integration_remove_fixture(struct kutf_context *context) -+{ -+ struct kutf_mgm_fixture_data *data = context->fixture; -+ struct kbase_device *kbdev = data->kbdev; ++ diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start)); ++ } while (diff < mmu_as_inactive_wait_time_ms); + -+ kbase_release_device(kbdev); ++ dev_err(kbdev->dev, ++ "AS_ACTIVE bit stuck for as %u. Might be caused by unstable GPU clk/pwr or faulty system", ++ as_nr); ++ kbdev->as[as_nr].is_unresponsive = true; ++ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) ++ kbase_reset_gpu_locked(kbdev); ++ ++ return -ETIMEDOUT; +} + -+/** -+ * mali_kutf_mgm_integration_test_main_init() - Module entry point for this test. -+ * -+ * Return: 0 on success, error code on failure. -+ */ -+static int __init mali_kutf_mgm_integration_test_main_init(void) ++static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd) +{ -+ struct kutf_suite *suite; ++ /* write AS_COMMAND when MMU is ready to accept another command */ ++ const int status = wait_ready(kbdev, as_nr); + -+ mgm_app = kutf_create_application("mgm"); ++ if (likely(status == 0)) ++ kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd); ++ else if (status == -EBUSY) { ++ dev_dbg(kbdev->dev, ++ "Skipped the wait for AS_ACTIVE bit for as %u, before sending MMU command %u", ++ as_nr, cmd); ++ } else { ++ dev_err(kbdev->dev, ++ "Wait for AS_ACTIVE bit failed for as %u, before sending MMU command %u", ++ as_nr, cmd); ++ } + -+ if (mgm_app == NULL) { -+ pr_warn("Creation of mgm KUTF app failed!\n"); -+ return -ENOMEM; ++ return status; ++} ++ ++#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++static int wait_cores_power_trans_complete(struct kbase_device *kbdev) ++{ ++#define WAIT_TIMEOUT 1000 /* 1ms timeout */ ++#define DELAY_TIME_IN_US 1 ++ const int max_iterations = WAIT_TIMEOUT; ++ int loop; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ for (loop = 0; loop < max_iterations; loop++) { ++ u32 lo = ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_LO)); ++ u32 hi = ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_HI)); ++ ++ if (!lo && !hi) ++ break; ++ ++ udelay(DELAY_TIME_IN_US); + } -+ suite = kutf_create_suite(mgm_app, MGM_INTEGRATION_SUITE_NAME, BASE_MEM_GROUP_COUNT, -+ mali_kutf_mgm_integration_create_fixture, -+ mali_kutf_mgm_integration_remove_fixture); -+ if (suite == NULL) { -+ pr_warn("Creation of %s suite failed!\n", MGM_INTEGRATION_SUITE_NAME); -+ kutf_destroy_application(mgm_app); -+ return -ENOMEM; ++ ++ if (loop == max_iterations) { ++ dev_warn(kbdev->dev, "SHADER_PWRTRANS set for too long"); ++ return -ETIMEDOUT; + } -+ kutf_add_test(suite, 0x0, MGM_INTEGRATION_PTE_TRANSLATION, -+ mali_kutf_mgm_pte_translation_test); ++ + return 0; +} + +/** -+ * mali_kutf_mgm_integration_test_main_exit() - Module exit point for this test. ++ * apply_hw_issue_GPU2019_3901_wa - Apply WA for the HW issue GPU2019_3901 ++ * ++ * @kbdev: Kbase device to issue the MMU operation on. ++ * @mmu_cmd: Pointer to the variable contain the value of MMU command ++ * that needs to be sent to flush the L2 cache and do an ++ * implicit unlock. ++ * @as_nr: Address space number for which MMU command needs to be ++ * sent. ++ * ++ * This function ensures that the flush of LSC is not missed for the pages that ++ * were unmapped from the GPU, due to the power down transition of shader cores. ++ * ++ * Return: 0 if the WA was successfully applied, non-zero otherwise. + */ -+static void __exit mali_kutf_mgm_integration_test_main_exit(void) ++static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev, u32 *mmu_cmd, ++ unsigned int as_nr) +{ -+ kutf_destroy_application(mgm_app); -+} ++ int ret = 0; + -+module_init(mali_kutf_mgm_integration_test_main_init); -+module_exit(mali_kutf_mgm_integration_test_main_exit); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("ARM Ltd."); -+MODULE_VERSION("1.0"); -diff --git a/drivers/gpu/arm/bifrost/thirdparty/Kbuild b/drivers/gpu/arm/bifrost/thirdparty/Kbuild -new file mode 100755 -index 000000000..558be077d ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/thirdparty/Kbuild -@@ -0,0 +1,21 @@ -+# SPDX-License-Identifier: GPL-2.0 -+# -+# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# ++ /* Check if L2 is OFF. The cores also must be OFF if L2 is not up, so ++ * the workaround can be safely skipped. ++ */ ++ if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) { ++ if (*mmu_cmd != AS_COMMAND_FLUSH_MEM) { ++ dev_warn(kbdev->dev, ++ "Unexpected mmu command received"); ++ return -EINVAL; ++ } + -+bifrost_kbase-y += thirdparty/mali_kbase_mmap.o -diff --git a/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c -new file mode 100644 -index 000000000..1e636b9a7 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c -@@ -0,0 +1,420 @@ -+/* -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ */ ++ /* Wait for the LOCK MMU command to complete, issued by the caller */ ++ ret = wait_ready(kbdev, as_nr); ++ if (unlikely(ret)) ++ return ret; + -+#include "linux/mman.h" -+#include -+#include ++ ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, ++ GPU_COMMAND_CACHE_CLN_INV_LSC); ++ if (unlikely(ret)) ++ return ret; + -+/* mali_kbase_mmap.c -+ * -+ * This file contains Linux specific implementation of -+ * kbase_context_get_unmapped_area() interface. -+ */ ++ ret = wait_cores_power_trans_complete(kbdev); ++ if (unlikely(ret)) { ++ if (kbase_prepare_to_reset_gpu_locked(kbdev, ++ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) ++ kbase_reset_gpu_locked(kbdev); ++ return ret; ++ } + ++ /* As LSC is guaranteed to have been flushed we can use FLUSH_PT ++ * MMU command to only flush the L2. ++ */ ++ *mmu_cmd = AS_COMMAND_FLUSH_PT; ++ } + -+/** -+ * align_and_check() - Align the specified pointer to the provided alignment and -+ * check that it is still in range. -+ * @gap_end: Highest possible start address for allocation (end of gap in -+ * address space) -+ * @gap_start: Start address of current memory area / gap in address space -+ * @info: vm_unmapped_area_info structure passed to caller, containing -+ * alignment, length and limits for the allocation -+ * @is_shader_code: True if the allocation is for shader code (which has -+ * additional alignment requirements) -+ * @is_same_4gb_page: True if the allocation needs to reside completely within -+ * a 4GB chunk -+ * -+ * Return: true if gap_end is now aligned correctly and is still in range, -+ * false otherwise -+ */ -+static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, -+ struct vm_unmapped_area_info *info, bool is_shader_code, -+ bool is_same_4gb_page) ++ return ret; ++} ++#endif ++ ++void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) +{ -+ /* Compute highest gap address at the desired alignment */ -+ (*gap_end) -= info->length; -+ (*gap_end) -= (*gap_end - info->align_offset) & info->align_mask; ++ struct kbase_mmu_setup *current_setup = &as->current_setup; ++ u64 transcfg = 0; + -+ if (is_shader_code) { -+ /* Check for 4GB boundary */ -+ if (0 == (*gap_end & BASE_MEM_MASK_4GB)) -+ (*gap_end) -= (info->align_offset ? info->align_offset : -+ info->length); -+ if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB)) -+ (*gap_end) -= (info->align_offset ? info->align_offset : -+ info->length); ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ lockdep_assert_held(&kbdev->mmu_hw_mutex); + -+ if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end + -+ info->length) & BASE_MEM_MASK_4GB)) -+ return false; -+ } else if (is_same_4gb_page) { -+ unsigned long start = *gap_end; -+ unsigned long end = *gap_end + info->length; -+ unsigned long mask = ~((unsigned long)U32_MAX); ++ transcfg = current_setup->transcfg; + -+ /* Check if 4GB boundary is straddled */ -+ if ((start & mask) != ((end - 1) & mask)) { -+ unsigned long offset = end - (end & mask); -+ /* This is to ensure that alignment doesn't get -+ * disturbed in an attempt to prevent straddling at -+ * 4GB boundary. The GPU VA is aligned to 2MB when the -+ * allocation size is > 2MB and there is enough CPU & -+ * GPU virtual space. -+ */ -+ unsigned long rounded_offset = -+ ALIGN(offset, info->align_mask + 1); ++ /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK ++ * Clear PTW_MEMATTR bits ++ */ ++ transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; ++ /* Enable correct PTW_MEMATTR bits */ ++ transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; ++ /* Ensure page-tables reads use read-allocate cache-policy in ++ * the L2 ++ */ ++ transcfg |= AS_TRANSCFG_R_ALLOCATE; + -+ start -= rounded_offset; -+ end -= rounded_offset; ++ if (kbdev->system_coherency != COHERENCY_NONE) { ++ /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) ++ * Clear PTW_SH bits ++ */ ++ transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); ++ /* Enable correct PTW_SH bits */ ++ transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); ++ } + -+ *gap_end = start; ++ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), ++ transcfg); ++ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), ++ (transcfg >> 32) & 0xFFFFFFFFUL); + -+ /* The preceding 4GB boundary shall not get straddled, -+ * even after accounting for the alignment, as the -+ * size of allocation is limited to 4GB and the initial -+ * start location was already aligned. -+ */ -+ WARN_ON((start & mask) != ((end - 1) & mask)); -+ } -+ } ++ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), ++ current_setup->transtab & 0xFFFFFFFFUL); ++ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI), ++ (current_setup->transtab >> 32) & 0xFFFFFFFFUL); + ++ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO), ++ current_setup->memattr & 0xFFFFFFFFUL); ++ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI), ++ (current_setup->memattr >> 32) & 0xFFFFFFFFUL); + -+ if ((*gap_end < info->low_limit) || (*gap_end < gap_start)) -+ return false; ++ KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(kbdev, as, ++ current_setup->transtab, ++ current_setup->memattr, ++ transcfg); + -+ return true; ++ write_cmd(kbdev, as->number, AS_COMMAND_UPDATE); ++#if MALI_USE_CSF ++ /* Wait for UPDATE command to complete */ ++ wait_ready(kbdev, as->number); ++#endif +} + +/** -+ * kbase_unmapped_area_topdown() - allocates new areas top-down from -+ * below the stack limit. -+ * @info: Information about the memory area to allocate. -+ * @is_shader_code: Boolean which denotes whether the allocated area is -+ * intended for the use by shader core in which case a -+ * special alignment requirements apply. -+ * @is_same_4gb_page: Boolean which indicates whether the allocated area needs -+ * to reside completely within a 4GB chunk. -+ * -+ * The unmapped_area_topdown() function in the Linux kernel is not exported -+ * using EXPORT_SYMBOL_GPL macro. To allow us to call this function from a -+ * module and also make use of the fact that some of the requirements for -+ * the unmapped area are known in advance, we implemented an extended version -+ * of this function and prefixed it with 'kbase_'. -+ * -+ * The difference in the call parameter list comes from the fact that -+ * kbase_unmapped_area_topdown() is called with additional parameters which -+ * are provided to indicate whether the allocation is for a shader core memory, -+ * which has additional alignment requirements, and whether the allocation can -+ * straddle a 4GB boundary. -+ * -+ * The modification of the original Linux function lies in how the computation -+ * of the highest gap address at the desired alignment is performed once the -+ * gap with desirable properties is found. For this purpose a special function -+ * is introduced (@ref align_and_check()) which beside computing the gap end -+ * at the desired alignment also performs additional alignment checks for the -+ * case when the memory is executable shader core memory, for which it is -+ * ensured that the gap does not end on a 4GB boundary, and for the case when -+ * memory needs to be confined within a 4GB chunk. ++ * mmu_command_instr - Record an MMU command for instrumentation purposes. + * -+ * Return: address of the found gap end (high limit) if area is found; -+ * -ENOMEM if search is unsuccessful ++ * @kbdev: Kbase device used to issue MMU operation on. ++ * @kctx_id: Kernel context ID for MMU command tracepoint. ++ * @cmd: Command issued to the MMU. ++ * @lock_addr: Address of memory region locked for the operation. ++ * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. + */ -+ -+static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info -+ *info, bool is_shader_code, bool is_same_4gb_page) ++static void mmu_command_instr(struct kbase_device *kbdev, u32 kctx_id, u32 cmd, u64 lock_addr, ++ enum kbase_caller_mmu_sync_info mmu_sync_info) +{ -+#if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE) -+ struct mm_struct *mm = current->mm; -+ struct vm_area_struct *vma; -+ unsigned long length, low_limit, high_limit, gap_start, gap_end; -+ -+ /* Adjust search length to account for worst case alignment overhead */ -+ length = info->length + info->align_mask; -+ if (length < info->length) -+ return -ENOMEM; ++ u64 lock_addr_base = AS_LOCKADDR_LOCKADDR_BASE_GET(lock_addr); ++ u32 lock_addr_size = AS_LOCKADDR_LOCKADDR_SIZE_GET(lock_addr); + -+ /* -+ * Adjust search limits by the desired length. -+ * See implementation comment at top of unmapped_area(). -+ */ -+ gap_end = info->high_limit; -+ if (gap_end < length) -+ return -ENOMEM; -+ high_limit = gap_end - length; ++ bool is_mmu_synchronous = (mmu_sync_info == CALLER_MMU_SYNC); + -+ if (info->low_limit > high_limit) -+ return -ENOMEM; -+ low_limit = info->low_limit + length; ++ KBASE_TLSTREAM_AUX_MMU_COMMAND(kbdev, kctx_id, cmd, is_mmu_synchronous, lock_addr_base, ++ lock_addr_size); ++} + -+ /* Check highest gap, which does not precede any rbtree node */ -+ gap_start = mm->highest_vm_end; -+ if (gap_start <= high_limit) { -+ if (align_and_check(&gap_end, gap_start, info, -+ is_shader_code, is_same_4gb_page)) -+ return gap_end; -+ } ++/* Helper function to program the LOCKADDR register before LOCK/UNLOCK command ++ * is issued. ++ */ ++static int mmu_hw_set_lock_addr(struct kbase_device *kbdev, int as_nr, u64 *lock_addr, ++ const struct kbase_mmu_hw_op_param *op_param) ++{ ++ int ret; + -+ /* Check if rbtree root looks promising */ -+ if (RB_EMPTY_ROOT(&mm->mm_rb)) -+ return -ENOMEM; -+ vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb); -+ if (vma->rb_subtree_gap < length) -+ return -ENOMEM; ++ ret = lock_region(&kbdev->gpu_props, lock_addr, op_param); + -+ while (true) { -+ /* Visit right subtree if it looks promising */ -+ gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; -+ if (gap_start <= high_limit && vma->vm_rb.rb_right) { -+ struct vm_area_struct *right = -+ rb_entry(vma->vm_rb.rb_right, -+ struct vm_area_struct, vm_rb); -+ if (right->rb_subtree_gap >= length) { -+ vma = right; -+ continue; -+ } -+ } ++ if (!ret) { ++ /* Set the region that needs to be updated */ ++ kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_LO), ++ *lock_addr & 0xFFFFFFFFUL); ++ kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_HI), ++ (*lock_addr >> 32) & 0xFFFFFFFFUL); ++ } ++ return ret; ++} + -+check_current: -+ /* Check if current node has a suitable gap */ -+ gap_end = vma->vm_start; -+ if (gap_end < low_limit) -+ return -ENOMEM; -+ if (gap_start <= high_limit && gap_end - gap_start >= length) { -+ /* We found a suitable gap. Clip it with the original -+ * high_limit. -+ */ -+ if (gap_end > info->high_limit) -+ gap_end = info->high_limit; ++/** ++ * mmu_hw_do_lock_no_wait - Issue LOCK command to the MMU and return without ++ * waiting for it's completion. ++ * ++ * @kbdev: Kbase device to issue the MMU operation on. ++ * @as: Address space to issue the MMU operation on. ++ * @lock_addr: Address of memory region locked for this operation. ++ * @op_param: Pointer to a struct containing information about the MMU operation. ++ * ++ * Return: 0 if issuing the command was successful, otherwise an error code. ++ */ ++static int mmu_hw_do_lock_no_wait(struct kbase_device *kbdev, struct kbase_as *as, u64 *lock_addr, ++ const struct kbase_mmu_hw_op_param *op_param) ++{ ++ int ret; + -+ if (align_and_check(&gap_end, gap_start, info, -+ is_shader_code, is_same_4gb_page)) -+ return gap_end; -+ } ++ ret = mmu_hw_set_lock_addr(kbdev, as->number, lock_addr, op_param); + -+ /* Visit left subtree if it looks promising */ -+ if (vma->vm_rb.rb_left) { -+ struct vm_area_struct *left = -+ rb_entry(vma->vm_rb.rb_left, -+ struct vm_area_struct, vm_rb); -+ if (left->rb_subtree_gap >= length) { -+ vma = left; -+ continue; -+ } -+ } ++ if (likely(!ret)) ++ ret = write_cmd(kbdev, as->number, AS_COMMAND_LOCK); + -+ /* Go back up the rbtree to find next candidate node */ -+ while (true) { -+ struct rb_node *prev = &vma->vm_rb; ++ return ret; ++} + -+ if (!rb_parent(prev)) -+ return -ENOMEM; -+ vma = rb_entry(rb_parent(prev), -+ struct vm_area_struct, vm_rb); -+ if (prev == vma->vm_rb.rb_right) { -+ gap_start = vma->vm_prev ? -+ vma->vm_prev->vm_end : 0; -+ goto check_current; -+ } -+ } -+ } -+#else -+ unsigned long length, high_limit, gap_start, gap_end; ++/** ++ * mmu_hw_do_lock - Issue LOCK command to the MMU and wait for its completion. ++ * ++ * @kbdev: Kbase device to issue the MMU operation on. ++ * @as: Address space to issue the MMU operation on. ++ * @op_param: Pointer to a struct containing information about the MMU operation. ++ * ++ * Return: 0 if issuing the LOCK command was successful, otherwise an error code. ++ */ ++static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, ++ const struct kbase_mmu_hw_op_param *op_param) ++{ ++ int ret; ++ u64 lock_addr = 0x0; + -+ MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); -+ /* Adjust search length to account for worst case alignment overhead */ -+ length = info->length + info->align_mask; -+ if (length < info->length) -+ return -ENOMEM; ++ if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) ++ return -EINVAL; + -+ /* -+ * Adjust search limits by the desired length. -+ * See implementation comment at top of unmapped_area(). -+ */ -+ gap_end = info->high_limit; -+ if (gap_end < length) -+ return -ENOMEM; -+ high_limit = gap_end - length; ++ ret = mmu_hw_do_lock_no_wait(kbdev, as, &lock_addr, op_param); + -+ if (info->low_limit > high_limit) -+ return -ENOMEM; ++ if (!ret) ++ ret = wait_ready(kbdev, as->number); + -+ while (true) { -+ if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, length)) -+ return -ENOMEM; -+ gap_end = mas.last + 1; -+ gap_start = mas.min; ++ if (!ret) ++ mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_LOCK, lock_addr, ++ op_param->mmu_sync_info); + -+ if (align_and_check(&gap_end, gap_start, info, is_shader_code, is_same_4gb_page)) -+ return gap_end; -+ } -+#endif -+ return -ENOMEM; ++ return ret; +} + -+ -+/* This function is based on Linux kernel's arch_get_unmapped_area, but -+ * simplified slightly. Modifications come from the fact that some values -+ * about the memory area are known in advance. -+ */ -+unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, -+ const unsigned long addr, const unsigned long len, -+ const unsigned long pgoff, const unsigned long flags) ++int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, ++ const struct kbase_mmu_hw_op_param *op_param) +{ -+ struct mm_struct *mm = current->mm; -+ struct vm_unmapped_area_info info; -+ unsigned long align_offset = 0; -+ unsigned long align_mask = 0; -+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) -+ unsigned long high_limit = arch_get_mmap_base(addr, mm->mmap_base); -+ unsigned long low_limit = max_t(unsigned long, PAGE_SIZE, kbase_mmap_min_addr); -+#else -+ unsigned long high_limit = mm->mmap_base; -+ unsigned long low_limit = PAGE_SIZE; -+#endif -+ int cpu_va_bits = BITS_PER_LONG; -+ int gpu_pc_bits = -+ kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; -+ bool is_shader_code = false; -+ bool is_same_4gb_page = false; -+ unsigned long ret; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* the 'nolock' form is used here: -+ * - the base_pfn of the SAME_VA zone does not change -+ * - in normal use, va_size_pages is constant once the first allocation -+ * begins -+ * -+ * However, in abnormal use this function could be processing whilst -+ * another new zone is being setup in a different thread (e.g. to -+ * borrow part of the SAME_VA zone). In the worst case, this path may -+ * witness a higher SAME_VA end_pfn than the code setting up the new -+ * zone. -+ * -+ * This is safe because once we reach the main allocation functions, -+ * we'll see the updated SAME_VA end_pfn and will determine that there -+ * is no free region at the address found originally by too large a -+ * same_va_end_addr here, and will fail the allocation gracefully. -+ */ -+ struct kbase_reg_zone *zone = -+ kbase_ctx_reg_zone_get_nolock(kctx, KBASE_REG_ZONE_SAME_VA); -+ u64 same_va_end_addr = kbase_reg_zone_end_pfn(zone) << PAGE_SHIFT; -+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) -+ const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags); ++ return mmu_hw_do_lock(kbdev, as, op_param); ++} + -+ /* requested length too big for entire address space */ -+ if (len > mmap_end - kbase_mmap_min_addr) -+ return -ENOMEM; -+#endif ++int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as, ++ const struct kbase_mmu_hw_op_param *op_param) ++{ ++ int ret = 0; + -+ /* err on fixed address */ -+ if ((flags & MAP_FIXED) || addr) ++ if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) + return -EINVAL; + -+#if IS_ENABLED(CONFIG_64BIT) -+ /* too big? */ -+ if (len > TASK_SIZE - SZ_2M) -+ return -ENOMEM; ++ ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK); + -+ if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { -+ high_limit = -+ min_t(unsigned long, high_limit, same_va_end_addr); ++ /* Wait for UNLOCK command to complete */ ++ if (likely(!ret)) ++ ret = wait_ready(kbdev, as->number); + -+ /* If there's enough (> 33 bits) of GPU VA space, align -+ * to 2MB boundaries. -+ */ -+ if (kctx->kbdev->gpu_props.mmu.va_bits > 33) { -+ if (len >= SZ_2M) { -+ align_offset = SZ_2M; -+ align_mask = SZ_2M - 1; -+ } -+ } ++ if (likely(!ret)) { ++ u64 lock_addr = 0x0; ++ /* read MMU_AS_CONTROL.LOCKADDR register */ ++ lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI)) ++ << 32; ++ lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO)); + -+ low_limit = SZ_2M; -+ } else { -+ cpu_va_bits = 32; ++ mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_UNLOCK, ++ lock_addr, op_param->mmu_sync_info); + } -+#endif /* CONFIG_64BIT */ -+ if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) && -+ (PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) { -+ int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); -+ struct kbase_va_region *reg; + -+ /* Need to hold gpu vm lock when using reg */ -+ kbase_gpu_vm_lock(kctx); -+ reg = kctx->pending_regions[cookie]; -+ if (!reg) { -+ kbase_gpu_vm_unlock(kctx); -+ return -EINVAL; -+ } -+ if (!(reg->flags & KBASE_REG_GPU_NX)) { -+ if (cpu_va_bits > gpu_pc_bits) { -+ align_offset = 1ULL << gpu_pc_bits; -+ align_mask = align_offset - 1; -+ is_shader_code = true; -+ } -+#if !MALI_USE_CSF -+ } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { -+ unsigned long extension_bytes = -+ (unsigned long)(reg->extension -+ << PAGE_SHIFT); -+ /* kbase_check_alloc_sizes() already satisfies -+ * these checks, but they're here to avoid -+ * maintenance hazards due to the assumptions -+ * involved -+ */ -+ WARN_ON(reg->extension > -+ (ULONG_MAX >> PAGE_SHIFT)); -+ WARN_ON(reg->initial_commit > (ULONG_MAX >> PAGE_SHIFT)); -+ WARN_ON(!is_power_of_2(extension_bytes)); -+ align_mask = extension_bytes - 1; -+ align_offset = -+ extension_bytes - -+ (reg->initial_commit << PAGE_SHIFT); -+#endif /* !MALI_USE_CSF */ -+ } else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) { -+ is_same_4gb_page = true; -+ } -+ kbase_gpu_vm_unlock(kctx); -+#ifndef CONFIG_64BIT -+ } else { -+ return current->mm->get_unmapped_area( -+ kctx->filp, addr, len, pgoff, flags); -+#endif -+ } ++ return ret; ++} + -+ info.flags = 0; -+ info.length = len; -+ info.low_limit = low_limit; -+ info.high_limit = high_limit; -+ info.align_offset = align_offset; -+ info.align_mask = align_mask; ++int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as, ++ const struct kbase_mmu_hw_op_param *op_param) ++{ ++ int ret = 0; ++ u64 lock_addr = 0x0; + -+ ret = kbase_unmapped_area_topdown(&info, is_shader_code, -+ is_same_4gb_page); ++ if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) ++ return -EINVAL; + -+ if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base && -+ high_limit < same_va_end_addr) { -+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) -+ /* Retry above TASK_UNMAPPED_BASE */ -+ info.low_limit = TASK_UNMAPPED_BASE; -+ info.high_limit = min_t(u64, mmap_end, same_va_end_addr); -+#else -+ /* Retry above mmap_base */ -+ info.low_limit = mm->mmap_base; -+ info.high_limit = min_t(u64, TASK_SIZE, same_va_end_addr); -+#endif ++ ret = mmu_hw_set_lock_addr(kbdev, as->number, &lock_addr, op_param); + -+ ret = kbase_unmapped_area_topdown(&info, is_shader_code, -+ is_same_4gb_page); -+ } ++ if (!ret) ++ ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, ++ op_param); + + return ret; +} -diff --git a/drivers/gpu/arm/bifrost/tl/Kbuild b/drivers/gpu/arm/bifrost/tl/Kbuild -new file mode 100755 -index 000000000..1c684d489 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tl/Kbuild -@@ -0,0 +1,32 @@ -+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+# -+# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU license. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# -+ -+bifrost_kbase-y += \ -+ tl/mali_kbase_timeline.o \ -+ tl/mali_kbase_timeline_io.o \ -+ tl/mali_kbase_tlstream.o \ -+ tl/mali_kbase_tracepoints.o + -+ -+ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) -+ bifrost_kbase-y += tl/backend/mali_kbase_timeline_csf.o -+else -+ bifrost_kbase-y += tl/backend/mali_kbase_timeline_jm.o -+endif -diff --git a/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c -new file mode 100644 -index 000000000..a6062f170 ---- /dev/null -+++ b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c -@@ -0,0 +1,190 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -+/* -+ * -+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. ++/** ++ * mmu_hw_do_flush - Flush MMU and wait for its completion. + * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * @kbdev: Kbase device to issue the MMU operation on. ++ * @as: Address space to issue the MMU operation on. ++ * @op_param: Pointer to a struct containing information about the MMU operation. ++ * @hwaccess_locked: Flag to indicate if the lock has been held. + * ++ * Return: 0 if flushing MMU was successful, otherwise an error code. + */ ++static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, ++ const struct kbase_mmu_hw_op_param *op_param, bool hwaccess_locked) ++{ ++ int ret; ++ u64 lock_addr = 0x0; ++ u32 mmu_cmd = AS_COMMAND_FLUSH_MEM; + -+#include -+#include -+#include ++ if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) ++ return -EINVAL; + -+#include ++ /* MMU operations can be either FLUSH_PT or FLUSH_MEM, anything else at ++ * this point would be unexpected. ++ */ ++ if (op_param->op != KBASE_MMU_OP_FLUSH_PT && ++ op_param->op != KBASE_MMU_OP_FLUSH_MEM) { ++ dev_err(kbdev->dev, "Unexpected flush operation received"); ++ return -EINVAL; ++ } + -+#define GPU_FEATURES_CROSS_STREAM_SYNC_MASK (1ull << 3ull) ++ lockdep_assert_held(&kbdev->mmu_hw_mutex); + -+void kbase_create_timeline_objects(struct kbase_device *kbdev) -+{ -+ unsigned int as_nr; -+ unsigned int slot_i; -+ struct kbase_context *kctx; -+ struct kbase_timeline *timeline = kbdev->timeline; -+ struct kbase_tlstream *summary = -+ &kbdev->timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]; -+ u32 const kbdev_has_cross_stream_sync = -+ (kbdev->gpu_props.props.raw_props.gpu_features & -+ GPU_FEATURES_CROSS_STREAM_SYNC_MASK) ? -+ 1 : -+ 0; -+ u32 const arch_maj = (kbdev->gpu_props.props.raw_props.gpu_id & -+ GPU_ID2_ARCH_MAJOR) >> -+ GPU_ID2_ARCH_MAJOR_SHIFT; -+ u32 const num_sb_entries = arch_maj >= 11 ? 16 : 8; -+ u32 const supports_gpu_sleep = -+#ifdef KBASE_PM_RUNTIME -+ kbdev->pm.backend.gpu_sleep_supported; -+#else -+ false; -+#endif /* KBASE_PM_RUNTIME */ ++ if (op_param->op == KBASE_MMU_OP_FLUSH_PT) ++ mmu_cmd = AS_COMMAND_FLUSH_PT; + -+ /* Summarize the Address Space objects. */ -+ for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) -+ __kbase_tlstream_tl_new_as(summary, &kbdev->as[as_nr], as_nr); ++ /* Lock the region that needs to be updated */ ++ ret = mmu_hw_do_lock_no_wait(kbdev, as, &lock_addr, op_param); ++ if (ret) ++ return ret; + -+ /* Create Legacy GPU object to track in AOM for dumping */ -+ __kbase_tlstream_tl_new_gpu(summary, -+ kbdev, -+ kbdev->gpu_props.props.raw_props.gpu_id, -+ kbdev->gpu_props.num_cores); ++#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) ++ /* WA for the BASE_HW_ISSUE_GPU2019_3901. */ ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3901) && ++ mmu_cmd == AS_COMMAND_FLUSH_MEM) { ++ if (!hwaccess_locked) { ++ unsigned long flags = 0; + ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd, as->number); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } else { ++ ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd, as->number); ++ } + -+ for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) -+ __kbase_tlstream_tl_lifelink_as_gpu(summary, -+ &kbdev->as[as_nr], -+ kbdev); ++ if (ret) ++ return ret; ++ } ++#endif + -+ /* Trace the creation of a new kbase device and set its properties. */ -+ __kbase_tlstream_tl_kbase_new_device(summary, kbdev->gpu_props.props.raw_props.gpu_id, -+ kbdev->gpu_props.num_cores, -+ kbdev->csf.global_iface.group_num, -+ kbdev->nr_hw_address_spaces, num_sb_entries, -+ kbdev_has_cross_stream_sync, supports_gpu_sleep); ++ ret = write_cmd(kbdev, as->number, mmu_cmd); + -+ /* Lock the context list, to ensure no changes to the list are made -+ * while we're summarizing the contexts and their contents. -+ */ -+ mutex_lock(&timeline->tl_kctx_list_lock); ++ /* Wait for the command to complete */ ++ if (likely(!ret)) ++ ret = wait_ready(kbdev, as->number); + -+ /* Hold the scheduler lock while we emit the current state -+ * We also need to continue holding the lock until after the first body -+ * stream tracepoints are emitted to ensure we don't change the -+ * scheduler until after then -+ */ -+ mutex_lock(&kbdev->csf.scheduler.lock); ++ if (likely(!ret)) ++ mmu_command_instr(kbdev, op_param->kctx_id, mmu_cmd, lock_addr, ++ op_param->mmu_sync_info); + -+ for (slot_i = 0; slot_i < kbdev->csf.global_iface.group_num; slot_i++) { ++ return ret; ++} + -+ struct kbase_queue_group *group = -+ kbdev->csf.scheduler.csg_slots[slot_i].resident_group; ++int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as, ++ const struct kbase_mmu_hw_op_param *op_param) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (group) -+ __kbase_tlstream_tl_kbase_device_program_csg( -+ summary, -+ kbdev->gpu_props.props.raw_props.gpu_id, -+ group->kctx->id, group->handle, slot_i, 0); -+ } ++ return mmu_hw_do_flush(kbdev, as, op_param, true); ++} + -+ /* Reset body stream buffers while holding the kctx lock. -+ * As we are holding the lock, we can guarantee that no kctx creation or -+ * deletion tracepoints can be fired from outside of this function by -+ * some other thread. ++int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, ++ const struct kbase_mmu_hw_op_param *op_param) ++{ ++ return mmu_hw_do_flush(kbdev, as, op_param, false); ++} ++ ++int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as, ++ const struct kbase_mmu_hw_op_param *op_param) ++{ ++ int ret, ret2; ++ u32 gpu_cmd = GPU_COMMAND_CACHE_CLN_INV_L2_LSC; ++ ++ if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL)) ++ return -EINVAL; ++ ++ /* MMU operations can be either FLUSH_PT or FLUSH_MEM, anything else at ++ * this point would be unexpected. + */ -+ kbase_timeline_streams_body_reset(timeline); ++ if (op_param->op != KBASE_MMU_OP_FLUSH_PT && ++ op_param->op != KBASE_MMU_OP_FLUSH_MEM) { ++ dev_err(kbdev->dev, "Unexpected flush operation received"); ++ return -EINVAL; ++ } + -+ mutex_unlock(&kbdev->csf.scheduler.lock); ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ lockdep_assert_held(&kbdev->mmu_hw_mutex); + -+ /* For each context in the device... */ -+ list_for_each_entry(kctx, &timeline->tl_kctx_list, tl_kctx_list_node) { -+ size_t i; -+ struct kbase_tlstream *body = -+ &timeline->streams[TL_STREAM_TYPE_OBJ]; ++ if (op_param->op == KBASE_MMU_OP_FLUSH_PT) ++ gpu_cmd = GPU_COMMAND_CACHE_CLN_INV_L2; + -+ /* Lock the context's KCPU queues, to ensure no KCPU-queue -+ * related actions can occur in this context from now on. -+ */ -+ mutex_lock(&kctx->csf.kcpu_queues.lock); ++ /* 1. Issue MMU_AS_CONTROL.COMMAND.LOCK operation. */ ++ ret = mmu_hw_do_lock(kbdev, as, op_param); ++ if (ret) ++ return ret; + -+ /* Acquire the MMU lock, to ensure we don't get a concurrent -+ * address space assignment while summarizing this context's -+ * address space. -+ */ -+ mutex_lock(&kbdev->mmu_hw_mutex); ++ /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHES operation */ ++ ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, gpu_cmd); + -+ /* Trace the context itself into the body stream, not the -+ * summary stream. -+ * We place this in the body to ensure it is ordered after any -+ * other tracepoints related to the contents of the context that -+ * might have been fired before acquiring all of the per-context -+ * locks. -+ * This ensures that those tracepoints will not actually affect -+ * the object model state, as they reference a context that -+ * hasn't been traced yet. They may, however, cause benign -+ * errors to be emitted. -+ */ -+ __kbase_tlstream_tl_kbase_new_ctx(body, kctx->id, -+ kbdev->gpu_props.props.raw_props.gpu_id); ++ /* 3. Issue MMU_AS_CONTROL.COMMAND.UNLOCK operation. */ ++ ret2 = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, op_param); + -+ /* Also trace with the legacy AOM tracepoint for dumping */ -+ __kbase_tlstream_tl_new_ctx(body, -+ kctx, -+ kctx->id, -+ (u32)(kctx->tgid)); ++ return ret ?: ret2; ++} + -+ /* Trace the currently assigned address space */ -+ if (kctx->as_nr != KBASEP_AS_NR_INVALID) -+ __kbase_tlstream_tl_kbase_ctx_assign_as(body, kctx->id, -+ kctx->as_nr); ++void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, ++ enum kbase_mmu_fault_type type) ++{ ++ unsigned long flags; ++ u32 pf_bf_mask; + ++ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + -+ /* Trace all KCPU queues in the context into the body stream. -+ * As we acquired the KCPU lock after resetting the body stream, -+ * it's possible that some KCPU-related events for this context -+ * occurred between that reset and now. -+ * These will cause errors to be emitted when parsing the -+ * timeline, but they will not affect the correctness of the -+ * object model. -+ */ -+ for (i = 0; i < KBASEP_MAX_KCPU_QUEUES; i++) { -+ const struct kbase_kcpu_command_queue *kcpu_queue = -+ kctx->csf.kcpu_queues.array[i]; ++ /* ++ * A reset is in-flight and we're flushing the IRQ + bottom half ++ * so don't update anything as it could race with the reset code. ++ */ ++ if (kbdev->irq_reset_flush) ++ goto unlock; + -+ if (kcpu_queue) -+ __kbase_tlstream_tl_kbase_new_kcpuqueue( -+ body, kcpu_queue, kcpu_queue->id, kcpu_queue->kctx->id, -+ kcpu_queue->num_pending_cmds); -+ } ++ /* Clear the page (and bus fault IRQ as well in case one occurred) */ ++ pf_bf_mask = MMU_PAGE_FAULT(as->number); ++#if !MALI_USE_CSF ++ if (type == KBASE_MMU_FAULT_TYPE_BUS || ++ type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) ++ pf_bf_mask |= MMU_BUS_ERROR(as->number); ++#endif ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask); + -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ mutex_unlock(&kctx->csf.kcpu_queues.lock); ++unlock: ++ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); ++} + -+ /* Now that all per-context locks for this context have been -+ * released, any per-context tracepoints that are fired from -+ * any other threads will go into the body stream after -+ * everything that was just summarised into the body stream in -+ * this iteration of the loop, so will start to correctly update -+ * the object model state. -+ */ -+ } ++void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, ++ enum kbase_mmu_fault_type type) ++{ ++ unsigned long flags; ++ u32 irq_mask; + -+ mutex_unlock(&timeline->tl_kctx_list_lock); ++ /* Enable the page fault IRQ ++ * (and bus fault IRQ as well in case one occurred) ++ */ ++ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + -+ /* Static object are placed into summary packet that needs to be -+ * transmitted first. Flush all streams to make it available to -+ * user space. ++ /* ++ * A reset is in-flight and we're flushing the IRQ + bottom half ++ * so don't update anything as it could race with the reset code. + */ -+ kbase_timeline_streams_flush(timeline); ++ if (kbdev->irq_reset_flush) ++ goto unlock; ++ ++ irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)) | ++ MMU_PAGE_FAULT(as->number); ++ ++#if !MALI_USE_CSF ++ if (type == KBASE_MMU_FAULT_TYPE_BUS || ++ type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) ++ irq_mask |= MMU_BUS_ERROR(as->number); ++#endif ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask); ++ ++unlock: ++ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); +} -diff --git a/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_jm.c b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_jm.c +diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h new file mode 100644 -index 000000000..9ba89f59f +index 000000000..9d7ce4856 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_jm.c -@@ -0,0 +1,96 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h +@@ -0,0 +1,72 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -267813,91 +267771,67 @@ index 000000000..9ba89f59f + * + */ + -+#include -+#include -+#include -+ -+#include -+ -+void kbase_create_timeline_objects(struct kbase_device *kbdev) -+{ -+ unsigned int lpu_id; -+ unsigned int as_nr; -+ struct kbase_context *kctx; -+ struct kbase_timeline *timeline = kbdev->timeline; -+ struct kbase_tlstream *summary = -+ &timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]; -+ -+ /* Summarize the LPU objects. */ -+ for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { -+ u32 *lpu = -+ &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; -+ __kbase_tlstream_tl_new_lpu(summary, lpu, lpu_id, *lpu); -+ } -+ -+ /* Summarize the Address Space objects. */ -+ for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) -+ __kbase_tlstream_tl_new_as(summary, &kbdev->as[as_nr], as_nr); ++#ifndef _KBASE_MMU_INTERNAL_H_ ++#define _KBASE_MMU_INTERNAL_H_ + -+ /* Create GPU object and make it retain all LPUs and address spaces. */ -+ __kbase_tlstream_tl_new_gpu(summary, -+ kbdev, -+ kbdev->gpu_props.props.raw_props.gpu_id, -+ kbdev->gpu_props.num_cores); ++void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, ++ struct kbase_mmu_setup * const setup); + -+ for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { -+ void *lpu = -+ &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; -+ __kbase_tlstream_tl_lifelink_lpu_gpu(summary, lpu, kbdev); -+ } ++/** ++ * kbase_mmu_report_mcu_as_fault_and_reset - Report page fault for all ++ * address spaces and reset the GPU. ++ * @kbdev: The kbase_device the fault happened on ++ * @fault: Data relating to the fault ++ */ ++void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, ++ struct kbase_fault *fault); + -+ for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) -+ __kbase_tlstream_tl_lifelink_as_gpu(summary, -+ &kbdev->as[as_nr], -+ kbdev); ++void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, ++ struct kbase_as *as, struct kbase_fault *fault); + -+ /* Lock the context list, to ensure no changes to the list are made -+ * while we're summarizing the contexts and their contents. -+ */ -+ mutex_lock(&timeline->tl_kctx_list_lock); ++void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, ++ struct kbase_as *as, const char *reason_str, ++ struct kbase_fault *fault); + -+ /* For each context in the device... */ -+ list_for_each_entry(kctx, &timeline->tl_kctx_list, tl_kctx_list_node) { -+ /* Summarize the context itself */ -+ __kbase_tlstream_tl_new_ctx(summary, -+ kctx, -+ kctx->id, -+ (u32)(kctx->tgid)); -+ } ++/** ++ * kbase_mmu_switch_to_ir() - Switch to incremental rendering if possible ++ * @kctx: kbase_context for the faulting address space. ++ * @reg: of a growable GPU memory region in the same context. ++ * Takes ownership of the reference if successful. ++ * ++ * Used to switch to incremental rendering if we have nearly run out of ++ * virtual address space in a growable memory region. ++ * ++ * Return: 0 if successful, otherwise a negative error code. ++ */ ++int kbase_mmu_switch_to_ir(struct kbase_context *kctx, ++ struct kbase_va_region *reg); + -+ /* Reset body stream buffers while holding the kctx lock. -+ * This ensures we can't fire both summary and normal tracepoints for -+ * the same objects. -+ * If we weren't holding the lock, it's possible that the summarized -+ * objects could have been created, destroyed, or used after we -+ * constructed the summary stream tracepoints, but before we reset -+ * the body stream, resulting in losing those object event tracepoints. -+ */ -+ kbase_timeline_streams_body_reset(timeline); ++/** ++ * kbase_mmu_page_fault_worker() - Process a page fault. ++ * ++ * @data: work_struct passed by queue_work() ++ */ ++void kbase_mmu_page_fault_worker(struct work_struct *data); + -+ mutex_unlock(&timeline->tl_kctx_list_lock); ++/** ++ * kbase_mmu_bus_fault_worker() - Process a bus fault. ++ * ++ * @data: work_struct passed by queue_work() ++ */ ++void kbase_mmu_bus_fault_worker(struct work_struct *data); + -+ /* Static object are placed into summary packet that needs to be -+ * transmitted first. Flush all streams to make it available to -+ * user space. -+ */ -+ kbase_timeline_streams_flush(timeline); -+} -diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c ++#endif /* _KBASE_MMU_INTERNAL_H_ */ +diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c new file mode 100644 -index 000000000..930decf8f +index 000000000..1464320cb --- /dev/null -+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c -@@ -0,0 +1,413 @@ ++++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c +@@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -267915,408 +267849,377 @@ index 000000000..930decf8f + * + */ + -+#include "mali_kbase_timeline.h" -+#include "mali_kbase_timeline_priv.h" -+#include "mali_kbase_tracepoints.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* The period of autoflush checker execution in milliseconds. */ -+#define AUTOFLUSH_INTERVAL 1000 /* ms */ -+ -+/*****************************************************************************/ ++#include "mali_kbase.h" ++#include ++#include "mali_kbase_defs.h" ++#include ++#include + -+/* These values are used in mali_kbase_tracepoints.h -+ * to retrieve the streams from a kbase_timeline instance. ++#define ENTRY_TYPE_MASK 3ULL ++/* For valid ATEs bit 1 = ((level == 3) ? 1 : 0). ++ * Valid ATE entries at level 3 are flagged with the value 3. ++ * Valid ATE entries at level 0-2 are flagged with the value 1. + */ -+const size_t __obj_stream_offset = -+ offsetof(struct kbase_timeline, streams) -+ + sizeof(struct kbase_tlstream) * TL_STREAM_TYPE_OBJ; ++#define ENTRY_IS_ATE_L3 3ULL ++#define ENTRY_IS_ATE_L02 1ULL ++#define ENTRY_IS_INVAL 2ULL ++#define ENTRY_IS_PTE 3ULL + -+const size_t __aux_stream_offset = -+ offsetof(struct kbase_timeline, streams) -+ + sizeof(struct kbase_tlstream) * TL_STREAM_TYPE_AUX; ++#define ENTRY_ACCESS_RW (1ULL << 6) /* bits 6:7 */ ++#define ENTRY_ACCESS_RO (3ULL << 6) ++#define ENTRY_ACCESS_BIT (1ULL << 10) ++#define ENTRY_NX_BIT (1ULL << 54) + -+/** -+ * kbasep_timeline_autoflush_timer_callback - autoflush timer callback -+ * @timer: Timer list -+ * -+ * Timer is executed periodically to check if any of the stream contains -+ * buffer ready to be submitted to user space. ++#define UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR (55) ++#define VALID_ENTRY_MASK ((u64)0xF << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR) ++ ++/* Helper Function to perform assignment of page table entries, to ++ * ensure the use of strd, which is required on LPAE systems. + */ -+static void kbasep_timeline_autoflush_timer_callback(struct timer_list *timer) ++static inline void page_table_entry_set(u64 *pte, u64 phy) +{ -+ enum tl_stream_type stype; -+ int rcode; -+ struct kbase_timeline *timeline = -+ container_of(timer, struct kbase_timeline, autoflush_timer); -+ -+ CSTD_UNUSED(timer); -+ -+ for (stype = (enum tl_stream_type)0; stype < TL_STREAM_TYPE_COUNT; -+ stype++) { -+ struct kbase_tlstream *stream = &timeline->streams[stype]; ++ WRITE_ONCE(*pte, phy); ++} + -+ int af_cnt = atomic_read(&stream->autoflush_counter); ++static void mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, int as_nr) ++{ ++ struct kbase_as *as; ++ struct kbase_mmu_setup *current_setup; + -+ /* Check if stream contain unflushed data. */ -+ if (af_cnt < 0) -+ continue; ++ if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID)) ++ return; + -+ /* Check if stream should be flushed now. */ -+ if (af_cnt != atomic_cmpxchg( -+ &stream->autoflush_counter, -+ af_cnt, -+ af_cnt + 1)) -+ continue; -+ if (!af_cnt) -+ continue; ++ as = &kbdev->as[as_nr]; ++ current_setup = &as->current_setup; + -+ /* Autoflush this stream. */ -+ kbase_tlstream_flush_stream(stream); -+ } ++ kbase_mmu_get_as_setup(mmut, current_setup); + -+ if (atomic_read(&timeline->autoflush_timer_active)) -+ rcode = mod_timer( -+ &timeline->autoflush_timer, -+ jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); -+ CSTD_UNUSED(rcode); ++ /* Apply the address space setting */ ++ kbase_mmu_hw_configure(kbdev, as); +} + -+ -+ -+/*****************************************************************************/ -+ -+int kbase_timeline_init(struct kbase_timeline **timeline, -+ atomic_t *timeline_flags) ++static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) +{ -+ enum tl_stream_type i; -+ struct kbase_timeline *result; -+#if MALI_USE_CSF -+ struct kbase_tlstream *csffw_stream; -+#endif -+ -+ if (!timeline || !timeline_flags) -+ return -EINVAL; -+ -+ result = vzalloc(sizeof(*result)); -+ if (!result) -+ return -ENOMEM; ++ struct kbase_as *const as = &kbdev->as[as_nr]; ++ struct kbase_mmu_setup *const current_setup = &as->current_setup; + -+ mutex_init(&result->reader_lock); -+ init_waitqueue_head(&result->event_queue); ++ current_setup->transtab = 0ULL; ++ current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED; + -+ /* Prepare stream structures. */ -+ for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) -+ kbase_tlstream_init(&result->streams[i], i, -+ &result->event_queue); ++ /* Apply the address space setting */ ++ kbase_mmu_hw_configure(kbdev, as); ++} + -+ /* Initialize the kctx list */ -+ mutex_init(&result->tl_kctx_list_lock); -+ INIT_LIST_HEAD(&result->tl_kctx_list); ++static phys_addr_t pte_to_phy_addr(u64 entry) ++{ ++ if (!(entry & 1)) ++ return 0; + -+ /* Initialize autoflush timer. */ -+ atomic_set(&result->autoflush_timer_active, 0); -+ kbase_timer_setup(&result->autoflush_timer, -+ kbasep_timeline_autoflush_timer_callback); -+ result->timeline_flags = timeline_flags; ++ entry &= ~VALID_ENTRY_MASK; ++ return entry & ~0xFFF; ++} + -+#if MALI_USE_CSF -+ csffw_stream = &result->streams[TL_STREAM_TYPE_CSFFW]; -+ kbase_csf_tl_reader_init(&result->csf_tl_reader, csffw_stream); -+#endif ++static int ate_is_valid(u64 ate, int const level) ++{ ++ if (level == MIDGARD_MMU_BOTTOMLEVEL) ++ return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L3); ++ else ++ return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L02); ++} + -+ *timeline = result; -+ return 0; ++static int pte_is_valid(u64 pte, int const level) ++{ ++ /* PTEs cannot exist at the bottom level */ ++ if (level == MIDGARD_MMU_BOTTOMLEVEL) ++ return false; ++ return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE); +} + -+void kbase_timeline_term(struct kbase_timeline *timeline) ++/* ++ * Map KBASE_REG flags to MMU flags ++ */ ++static u64 get_mmu_flags(unsigned long flags) +{ -+ enum tl_stream_type i; ++ u64 mmu_flags; + -+ if (!timeline) -+ return; ++ /* store mem_attr index as 4:2 (macro called ensures 3 bits already) */ ++ mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2; + -+#if MALI_USE_CSF -+ kbase_csf_tl_reader_term(&timeline->csf_tl_reader); -+#endif ++ /* Set access flags - note that AArch64 stage 1 does not support ++ * write-only access, so we use read/write instead ++ */ ++ if (flags & KBASE_REG_GPU_WR) ++ mmu_flags |= ENTRY_ACCESS_RW; ++ else if (flags & KBASE_REG_GPU_RD) ++ mmu_flags |= ENTRY_ACCESS_RO; + -+ WARN_ON(!list_empty(&timeline->tl_kctx_list)); ++ /* nx if requested */ ++ mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0; + -+ for (i = (enum tl_stream_type)0; i < TL_STREAM_TYPE_COUNT; i++) -+ kbase_tlstream_term(&timeline->streams[i]); ++ if (flags & KBASE_REG_SHARE_BOTH) { ++ /* inner and outer shareable */ ++ mmu_flags |= SHARE_BOTH_BITS; ++ } else if (flags & KBASE_REG_SHARE_IN) { ++ /* inner shareable coherency */ ++ mmu_flags |= SHARE_INNER_BITS; ++ } + -+ vfree(timeline); ++ return mmu_flags; +} + -+#ifdef CONFIG_MALI_BIFROST_DEVFREQ -+static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev) ++static void entry_set_ate(u64 *entry, struct tagged_addr phy, unsigned long flags, int const level) +{ -+ struct devfreq *devfreq = kbdev->devfreq; -+ -+ /* Devfreq initialization failure isn't a fatal error, so devfreq might -+ * be null. -+ */ -+ if (devfreq) { -+ unsigned long cur_freq = 0; -+ -+ mutex_lock(&devfreq->lock); -+ cur_freq = devfreq->last_status.current_frequency; -+ KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(kbdev, (u64)cur_freq); -+ mutex_unlock(&devfreq->lock); -+ } ++ if (level == MIDGARD_MMU_BOTTOMLEVEL) ++ page_table_entry_set(entry, as_phys_addr_t(phy) | get_mmu_flags(flags) | ++ ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L3); ++ else ++ page_table_entry_set(entry, as_phys_addr_t(phy) | get_mmu_flags(flags) | ++ ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02); +} -+#endif /* CONFIG_MALI_BIFROST_DEVFREQ */ + -+int kbase_timeline_acquire(struct kbase_device *kbdev, u32 flags) ++static unsigned int get_num_valid_entries(u64 *pgd) +{ -+ int err = 0; -+ u32 timeline_flags = TLSTREAM_ENABLED | flags; -+ struct kbase_timeline *timeline; -+ int rcode; -+ -+ if (WARN_ON(!kbdev) || WARN_ON(flags & ~BASE_TLSTREAM_FLAGS_MASK)) -+ return -EINVAL; ++ register unsigned int num_of_valid_entries; + -+ timeline = kbdev->timeline; -+ if (WARN_ON(!timeline)) -+ return -EFAULT; ++ num_of_valid_entries = (unsigned int)((pgd[2] & VALID_ENTRY_MASK) >> ++ (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR - 8)); ++ num_of_valid_entries |= (unsigned int)((pgd[1] & VALID_ENTRY_MASK) >> ++ (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR - 4)); ++ num_of_valid_entries |= (unsigned int)((pgd[0] & VALID_ENTRY_MASK) >> ++ (UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR)); + -+ if (atomic_cmpxchg(timeline->timeline_flags, 0, timeline_flags)) -+ return -EBUSY; ++ return num_of_valid_entries; ++} + -+#if MALI_USE_CSF -+ if (flags & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) { -+ err = kbase_csf_tl_reader_start(&timeline->csf_tl_reader, kbdev); -+ if (err) { -+ atomic_set(timeline->timeline_flags, 0); -+ return err; -+ } -+ } -+#endif ++static void set_num_valid_entries(u64 *pgd, unsigned int num_of_valid_entries) ++{ ++ WARN_ON_ONCE(num_of_valid_entries > KBASE_MMU_PAGE_ENTRIES); + -+ /* Reset and initialize header streams. */ -+ kbase_tlstream_reset(&timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]); ++ pgd[0] &= ~VALID_ENTRY_MASK; ++ pgd[0] |= ((u64)(num_of_valid_entries & 0xF) << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR); + -+ timeline->obj_header_btc = obj_desc_header_size; -+ timeline->aux_header_btc = aux_desc_header_size; ++ pgd[1] &= ~VALID_ENTRY_MASK; ++ pgd[1] |= ((u64)((num_of_valid_entries >> 4) & 0xF) ++ << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR); + -+#if !MALI_USE_CSF -+ /* If job dumping is enabled, readjust the software event's -+ * timeout as the default value of 3 seconds is often -+ * insufficient. -+ */ -+ if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) { -+ dev_info(kbdev->dev, -+ "Job dumping is enabled, readjusting the software event's timeout\n"); -+ atomic_set(&kbdev->js_data.soft_job_timeout_ms, 1800000); -+ } -+#endif /* !MALI_USE_CSF */ ++ pgd[2] &= ~VALID_ENTRY_MASK; ++ pgd[2] |= ((u64)((num_of_valid_entries >> 8) & 0xF) ++ << UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR); ++} + -+ /* Summary stream was cleared during acquire. -+ * Create static timeline objects that will be -+ * read by client. -+ */ -+ kbase_create_timeline_objects(kbdev); ++static void entry_set_pte(u64 *entry, phys_addr_t phy) ++{ ++ page_table_entry_set(entry, (phy & PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_PTE); ++} + -+#ifdef CONFIG_MALI_BIFROST_DEVFREQ -+ /* Devfreq target tracepoints are only fired when the target -+ * changes, so we won't know the current target unless we -+ * send it now. -+ */ -+ kbase_tlstream_current_devfreq_target(kbdev); -+#endif /* CONFIG_MALI_BIFROST_DEVFREQ */ ++static void entries_invalidate(u64 *entry, u32 count) ++{ ++ u32 i; + -+ /* Start the autoflush timer. -+ * We must do this after creating timeline objects to ensure we -+ * don't auto-flush the streams which will be reset during the -+ * summarization process. -+ */ -+ atomic_set(&timeline->autoflush_timer_active, 1); -+ rcode = mod_timer(&timeline->autoflush_timer, -+ jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); -+ CSTD_UNUSED(rcode); ++ for (i = 0; i < count; i++) ++ page_table_entry_set(entry + i, ENTRY_IS_INVAL); ++} + -+ timeline->last_acquire_time = ktime_get_raw(); ++static const struct kbase_mmu_mode aarch64_mode = { .update = mmu_update, ++ .get_as_setup = kbase_mmu_get_as_setup, ++ .disable_as = mmu_disable_as, ++ .pte_to_phy_addr = pte_to_phy_addr, ++ .ate_is_valid = ate_is_valid, ++ .pte_is_valid = pte_is_valid, ++ .entry_set_ate = entry_set_ate, ++ .entry_set_pte = entry_set_pte, ++ .entries_invalidate = entries_invalidate, ++ .get_num_valid_entries = get_num_valid_entries, ++ .set_num_valid_entries = set_num_valid_entries, ++ .flags = KBASE_MMU_MODE_HAS_NON_CACHEABLE }; + -+ return err; ++struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void) ++{ ++ return &aarch64_mode; +} +diff --git a/drivers/gpu/arm/bifrost/platform/Kconfig b/drivers/gpu/arm/bifrost/platform/Kconfig +new file mode 100644 +index 000000000..3e1bd235b +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/platform/Kconfig +@@ -0,0 +1,26 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2012-2013, 2017, 2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# + -+void kbase_timeline_release(struct kbase_timeline *timeline) -+{ -+ ktime_t elapsed_time; -+ s64 elapsed_time_ms, time_to_sleep; ++# Add your platform specific Kconfig file here ++# ++# "drivers/gpu/arm/bifrost/platform/xxx/Kconfig" ++# ++# Where xxx is the platform name is the name set in MALI_PLATFORM_NAME ++# +diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/Kbuild b/drivers/gpu/arm/bifrost/platform/devicetree/Kbuild +new file mode 100755 +index 000000000..60a52d80f +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/platform/devicetree/Kbuild +@@ -0,0 +1,24 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2012-2017, 2020-2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# + -+ if (WARN_ON(!timeline) || WARN_ON(!atomic_read(timeline->timeline_flags))) -+ return; ++bifrost_kbase-y += \ ++ platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \ ++ platform/$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \ ++ platform/$(MALI_PLATFORM_DIR)/mali_kbase_clk_rate_trace.o +diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_clk_rate_trace.c b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_clk_rate_trace.c +new file mode 100644 +index 000000000..4bcd5854d +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_clk_rate_trace.c +@@ -0,0 +1,105 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2015, 2017-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ /* Get the amount of time passed since the timeline was acquired and ensure -+ * we sleep for long enough such that it has been at least -+ * TIMELINE_HYSTERESIS_TIMEOUT_MS amount of time between acquire and release. -+ * This prevents userspace from spamming acquire and release too quickly. -+ */ -+ elapsed_time = ktime_sub(ktime_get_raw(), timeline->last_acquire_time); -+ elapsed_time_ms = ktime_to_ms(elapsed_time); -+ time_to_sleep = (elapsed_time_ms < 0 ? TIMELINE_HYSTERESIS_TIMEOUT_MS : -+ TIMELINE_HYSTERESIS_TIMEOUT_MS - elapsed_time_ms); -+ if (time_to_sleep > 0) -+ msleep_interruptible(time_to_sleep); ++#include ++#include ++#include ++#include "mali_kbase_config_platform.h" + +#if MALI_USE_CSF -+ kbase_csf_tl_reader_stop(&timeline->csf_tl_reader); ++#include +#endif + -+ /* Stop autoflush timer before releasing access to streams. */ -+ atomic_set(&timeline->autoflush_timer_active, 0); -+ del_timer_sync(&timeline->autoflush_timer); -+ -+ atomic_set(timeline->timeline_flags, 0); -+} -+ -+int kbase_timeline_streams_flush(struct kbase_timeline *timeline) ++static void *enumerate_gpu_clk(struct kbase_device *kbdev, ++ unsigned int index) +{ -+ enum tl_stream_type stype; -+ bool has_bytes = false; -+ size_t nbytes = 0; -+ -+ if (WARN_ON(!timeline)) -+ return -EINVAL; ++ if (index >= kbdev->nr_clocks) ++ return NULL; + +#if MALI_USE_CSF -+ { -+ int ret = kbase_csf_tl_reader_flush_buffer(&timeline->csf_tl_reader); -+ -+ if (ret > 0) -+ has_bytes = true; -+ } ++ if (of_machine_is_compatible("arm,juno")) ++ WARN_ON(kbdev->nr_clocks != 1); +#endif + -+ for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) { -+ nbytes = kbase_tlstream_flush_stream(&timeline->streams[stype]); -+ if (nbytes > 0) -+ has_bytes = true; -+ } -+ return has_bytes ? 0 : -EIO; ++ return kbdev->clocks[index]; +} + -+void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline) ++static unsigned long get_gpu_clk_rate(struct kbase_device *kbdev, ++ void *gpu_clk_handle) +{ -+ kbase_tlstream_reset( -+ &timeline->streams[TL_STREAM_TYPE_OBJ]); -+ kbase_tlstream_reset( -+ &timeline->streams[TL_STREAM_TYPE_AUX]); +#if MALI_USE_CSF -+ kbase_tlstream_reset( -+ &timeline->streams[TL_STREAM_TYPE_CSFFW]); ++ /* On Juno fpga platforms, the GPU clock rate is reported as 600 MHZ at ++ * the boot time. Then after the first call to kbase_devfreq_target() ++ * the clock rate is reported as 450 MHZ and the frequency does not ++ * change after that. But the actual frequency at which GPU operates ++ * is always 50 MHz, which is equal to the frequency of system counter ++ * and HW counters also increment at the same rate. ++ * DVFS, which is a client of kbase_ipa_control, needs normalization of ++ * GPU_ACTIVE counter to calculate the time for which GPU has been busy. ++ * So for the correct normalization need to return the system counter ++ * frequency value. ++ * This is a reasonable workaround as the frequency value remains same ++ * throughout. It can be removed after GPUCORE-25693. ++ */ ++ if (of_machine_is_compatible("arm,juno")) ++ return arch_timer_get_cntfrq(); +#endif -+} -+ -+void kbase_timeline_pre_kbase_context_destroy(struct kbase_context *kctx) -+{ -+ struct kbase_device *const kbdev = kctx->kbdev; -+ struct kbase_timeline *timeline = kbdev->timeline; + -+ /* Remove the context from the list to ensure we don't try and -+ * summarize a context that is being destroyed. -+ * -+ * It's unsafe to try and summarize a context being destroyed as the -+ * locks we might normally attempt to acquire, and the data structures -+ * we would normally attempt to traverse could already be destroyed. -+ * -+ * In the case where the tlstream is acquired between this pre destroy -+ * call and the post destroy call, we will get a context destroy -+ * tracepoint without the corresponding context create tracepoint, -+ * but this will not affect the correctness of the object model. -+ */ -+ mutex_lock(&timeline->tl_kctx_list_lock); -+ list_del_init(&kctx->tl_kctx_list_node); -+ mutex_unlock(&timeline->tl_kctx_list_lock); ++ return clk_get_rate((struct clk *)gpu_clk_handle); +} + -+void kbase_timeline_post_kbase_context_create(struct kbase_context *kctx) ++static int gpu_clk_notifier_register(struct kbase_device *kbdev, ++ void *gpu_clk_handle, struct notifier_block *nb) +{ -+ struct kbase_device *const kbdev = kctx->kbdev; -+ struct kbase_timeline *timeline = kbdev->timeline; -+ -+ /* On context create, add the context to the list to ensure it is -+ * summarized when timeline is acquired -+ */ -+ mutex_lock(&timeline->tl_kctx_list_lock); ++ compiletime_assert(offsetof(struct clk_notifier_data, clk) == ++ offsetof(struct kbase_gpu_clk_notifier_data, gpu_clk_handle), ++ "mismatch in the offset of clk member"); + -+ list_add(&kctx->tl_kctx_list_node, &timeline->tl_kctx_list); ++ compiletime_assert(sizeof(((struct clk_notifier_data *)0)->clk) == ++ sizeof(((struct kbase_gpu_clk_notifier_data *)0)->gpu_clk_handle), ++ "mismatch in the size of clk member"); + -+ /* Fire the tracepoints with the lock held to ensure the tracepoints -+ * are either fired before or after the summarization, -+ * never in parallel with it. If fired in parallel, we could get -+ * duplicate creation tracepoints. -+ */ +#if MALI_USE_CSF -+ KBASE_TLSTREAM_TL_KBASE_NEW_CTX( -+ kbdev, kctx->id, kbdev->gpu_props.props.raw_props.gpu_id); ++ /* Frequency is fixed on Juno platforms */ ++ if (of_machine_is_compatible("arm,juno")) ++ return 0; +#endif -+ /* Trace with the AOM tracepoint even in CSF for dumping */ -+ KBASE_TLSTREAM_TL_NEW_CTX(kbdev, kctx, kctx->id, 0); + -+ mutex_unlock(&timeline->tl_kctx_list_lock); ++ return clk_notifier_register((struct clk *)gpu_clk_handle, nb); +} + -+void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx) ++static void gpu_clk_notifier_unregister(struct kbase_device *kbdev, ++ void *gpu_clk_handle, struct notifier_block *nb) +{ -+ struct kbase_device *const kbdev = kctx->kbdev; -+ -+ /* Trace with the AOM tracepoint even in CSF for dumping */ -+ KBASE_TLSTREAM_TL_DEL_CTX(kbdev, kctx); +#if MALI_USE_CSF -+ KBASE_TLSTREAM_TL_KBASE_DEL_CTX(kbdev, kctx->id); ++ if (of_machine_is_compatible("arm,juno")) ++ return; +#endif + -+ /* Flush the timeline stream, so the user can see the termination -+ * tracepoints being fired. -+ * The "if" statement below is for optimization. It is safe to call -+ * kbase_timeline_streams_flush when timeline is disabled. -+ */ -+ if (atomic_read(&kbdev->timeline_flags) != 0) -+ kbase_timeline_streams_flush(kbdev->timeline); ++ clk_notifier_unregister((struct clk *)gpu_clk_handle, nb); +} + -+#if MALI_UNIT_TEST -+void kbase_timeline_stats(struct kbase_timeline *timeline, -+ u32 *bytes_collected, u32 *bytes_generated) -+{ -+ enum tl_stream_type stype; -+ -+ KBASE_DEBUG_ASSERT(bytes_collected); -+ -+ /* Accumulate bytes generated per stream */ -+ *bytes_generated = 0; -+ for (stype = (enum tl_stream_type)0; stype < TL_STREAM_TYPE_COUNT; -+ stype++) -+ *bytes_generated += atomic_read( -+ &timeline->streams[stype].bytes_generated); -+ -+ *bytes_collected = atomic_read(&timeline->bytes_collected); -+} -+#endif /* MALI_UNIT_TEST */ -diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h ++struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops = { ++ .get_gpu_clk_rate = get_gpu_clk_rate, ++ .enumerate_gpu_clk = enumerate_gpu_clk, ++ .gpu_clk_notifier_register = gpu_clk_notifier_register, ++ .gpu_clk_notifier_unregister = gpu_clk_notifier_unregister, ++}; +diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_devicetree.c b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_devicetree.c new file mode 100644 -index 000000000..62be6c64c +index 000000000..a0b4a434e --- /dev/null -+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h -@@ -0,0 +1,128 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_devicetree.c +@@ -0,0 +1,53 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015, 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -268334,123 +268237,101 @@ index 000000000..62be6c64c + * + */ + -+#if !defined(_KBASE_TIMELINE_H) -+#define _KBASE_TIMELINE_H -+ +#include ++#include ++#include + -+/*****************************************************************************/ ++static struct kbase_platform_config dummy_platform_config; + -+struct kbase_timeline; ++struct kbase_platform_config *kbase_get_platform_config(void) ++{ ++ return &dummy_platform_config; ++} + -+/** -+ * kbase_timeline_init - initialize timeline infrastructure in kernel -+ * @timeline: Newly created instance of kbase_timeline will be stored in -+ * this pointer. -+ * @timeline_flags: Timeline status will be written to this variable when a -+ * client is attached/detached. The variable must be valid -+ * while timeline instance is valid. -+ * Return: zero on success, negative number on error -+ */ -+int kbase_timeline_init(struct kbase_timeline **timeline, -+ atomic_t *timeline_flags); ++#ifndef CONFIG_OF ++int kbase_platform_register(void) ++{ ++ return 0; ++} + -+/** -+ * kbase_timeline_term - terminate timeline infrastructure in kernel -+ * -+ * @timeline: Timeline instance to be terminated. It must be previously created -+ * with kbase_timeline_init(). -+ */ -+void kbase_timeline_term(struct kbase_timeline *timeline); ++void kbase_platform_unregister(void) ++{ ++} ++#endif + -+/** -+ * kbase_timeline_io_acquire - acquire timeline stream file descriptor -+ * @kbdev: Kbase device -+ * @flags: Timeline stream flags ++#ifdef CONFIG_MALI_BIFROST_DVFS ++#if MALI_USE_CSF ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) ++#else ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) ++#endif ++{ ++ return 1; ++} ++#endif /* CONFIG_MALI_BIFROST_DVFS */ +diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_platform.h b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_platform.h +new file mode 100644 +index 000000000..584a7217d +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_platform.h +@@ -0,0 +1,47 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * This descriptor is meant to be used by userspace timeline to gain access to -+ * kernel timeline stream. This stream is later broadcasted by user space to the -+ * timeline client. -+ * Only one entity can own the descriptor at any given time. Descriptor shall be -+ * closed if unused. If descriptor cannot be obtained (i.e. when it is already -+ * being used) return will be a negative value. ++ * (C) COPYRIGHT 2014-2017, 2020-2022 ARM Limited. All rights reserved. + * -+ * Return: file descriptor on success, negative number on error -+ */ -+int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags); -+ -+/** -+ * kbase_timeline_streams_flush - flush timeline streams. -+ * @timeline: Timeline instance ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * Function will flush pending data in all timeline streams. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + * -+ * Return: Zero on success, errno on failure. -+ */ -+int kbase_timeline_streams_flush(struct kbase_timeline *timeline); -+ -+/** -+ * kbase_timeline_streams_body_reset - reset timeline body streams. -+ * @timeline: Timeline instance ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * Function will discard pending data in all timeline body streams. -+ */ -+void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline); -+ -+/** -+ * kbase_timeline_post_kbase_context_create - Inform timeline that a new KBase -+ * Context has been created. -+ * @kctx: KBase Context + */ -+void kbase_timeline_post_kbase_context_create(struct kbase_context *kctx); + +/** -+ * kbase_timeline_pre_kbase_context_destroy - Inform timeline that a KBase -+ * Context is about to be destroyed. -+ * @kctx: KBase Context ++ * POWER_MANAGEMENT_CALLBACKS - Power management configuration ++ * ++ * Attached value: pointer to @ref kbase_pm_callback_conf ++ * Default value: See @ref kbase_pm_callback_conf + */ -+void kbase_timeline_pre_kbase_context_destroy(struct kbase_context *kctx); ++#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + +/** -+ * kbase_timeline_post_kbase_context_destroy - Inform timeline that a KBase -+ * Context has been destroyed. -+ * @kctx: KBase Context ++ * PLATFORM_FUNCS - Platform specific configuration functions + * -+ * Should be called immediately before the memory is freed, and the context ID -+ * and kbdev pointer should still be valid. ++ * Attached value: pointer to @ref kbase_platform_funcs_conf ++ * Default value: See @ref kbase_platform_funcs_conf + */ -+void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx); -+ -+#if MALI_UNIT_TEST ++#define PLATFORM_FUNCS (NULL) + -+/** -+ * kbase_timeline_stats - read timeline stream statistics -+ * @timeline: Timeline instance -+ * @bytes_collected: Will hold number of bytes read by the user -+ * @bytes_generated: Will hold number of bytes generated by trace points -+ */ -+void kbase_timeline_stats(struct kbase_timeline *timeline, u32 *bytes_collected, u32 *bytes_generated); -+#endif /* MALI_UNIT_TEST */ ++#define CLK_RATE_TRACE_OPS (&clk_rate_trace_ops) + ++extern struct kbase_pm_callback_conf pm_callbacks; ++extern struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops; +/** -+ * kbase_timeline_io_debugfs_init - Add a debugfs entry for reading timeline stream data ++ * AUTO_SUSPEND_DELAY - Autosuspend delay + * -+ * @kbdev: An instance of the GPU platform device, allocated from the probe -+ * method of the driver. ++ * The delay time (in milliseconds) to be used for autosuspend + */ -+void kbase_timeline_io_debugfs_init(struct kbase_device *kbdev); -+ -+#endif /* _KBASE_TIMELINE_H */ -diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c ++#define AUTO_SUSPEND_DELAY (100) +diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c new file mode 100644 -index 000000000..ae570064e +index 000000000..2687bee96 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c -@@ -0,0 +1,465 @@ ++++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c +@@ -0,0 +1,298 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -268468,460 +268349,322 @@ index 000000000..ae570064e + * + */ + -+#include "mali_kbase_timeline_priv.h" -+#include "mali_kbase_tlstream.h" -+#include "mali_kbase_tracepoints.h" -+#include "mali_kbase_timeline.h" -+ ++#include ++#include +#include ++#include ++#include ++#include ++#include + -+#include -+#include -+#include -+ -+/* Explicitly include epoll header for old kernels. Not required from 4.16. */ -+#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE -+#include -+#endif -+ -+static int kbase_unprivileged_global_profiling; ++#include "mali_kbase_config_platform.h" + -+/** -+ * kbase_unprivileged_global_profiling_set - set permissions for unprivileged processes -+ * -+ * @val: String containing value to set. Only strings representing positive -+ * integers are accepted as valid; any non-positive integer (including 0) -+ * is rejected. -+ * @kp: Module parameter associated with this method. -+ * -+ * This method can only be used to enable permissions for unprivileged processes, -+ * if they are disabled: for this reason, the only values which are accepted are -+ * strings representing positive integers. Since it's impossible to disable -+ * permissions once they're set, any integer which is non-positive is rejected, -+ * including 0. -+ * -+ * Return: 0 if success, otherwise error code. -+ */ -+static int kbase_unprivileged_global_profiling_set(const char *val, const struct kernel_param *kp) ++static void enable_gpu_power_control(struct kbase_device *kbdev) +{ -+ int new_val; -+ int ret = kstrtoint(val, 0, &new_val); -+ -+ if (ret == 0) { -+ if (new_val < 1) -+ return -EINVAL; ++ unsigned int i; + -+ kbase_unprivileged_global_profiling = 1; ++#if defined(CONFIG_REGULATOR) ++ for (i = 0; i < kbdev->nr_regulators; i++) { ++ if (WARN_ON(kbdev->regulators[i] == NULL)) ++ ; ++ else if (!regulator_is_enabled(kbdev->regulators[i])) ++ WARN_ON(regulator_enable(kbdev->regulators[i])); + } ++#endif + -+ return ret; ++ for (i = 0; i < kbdev->nr_clocks; i++) { ++ if (WARN_ON(kbdev->clocks[i] == NULL)) ++ ; ++ else if (!__clk_is_enabled(kbdev->clocks[i])) ++ WARN_ON(clk_prepare_enable(kbdev->clocks[i])); ++ } +} + -+static const struct kernel_param_ops kbase_global_unprivileged_profiling_ops = { -+ .get = param_get_int, -+ .set = kbase_unprivileged_global_profiling_set, -+}; ++static void disable_gpu_power_control(struct kbase_device *kbdev) ++{ ++ unsigned int i; + -+module_param_cb(kbase_unprivileged_global_profiling, &kbase_global_unprivileged_profiling_ops, -+ &kbase_unprivileged_global_profiling, 0600); ++ for (i = 0; i < kbdev->nr_clocks; i++) { ++ if (WARN_ON(kbdev->clocks[i] == NULL)) ++ ; ++ else if (__clk_is_enabled(kbdev->clocks[i])) { ++ clk_disable_unprepare(kbdev->clocks[i]); ++ WARN_ON(__clk_is_enabled(kbdev->clocks[i])); ++ } + -+/* The timeline stream file operations functions. */ -+static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, -+ size_t size, loff_t *f_pos); -+static __poll_t kbasep_timeline_io_poll(struct file *filp, poll_table *wait); -+static int kbasep_timeline_io_release(struct inode *inode, struct file *filp); -+static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end, -+ int datasync); ++ } + -+static bool timeline_is_permitted(void) -+{ -+#if KERNEL_VERSION(5, 8, 0) <= LINUX_VERSION_CODE -+ return kbase_unprivileged_global_profiling || perfmon_capable(); -+#else -+ return kbase_unprivileged_global_profiling || capable(CAP_SYS_ADMIN); ++#if defined(CONFIG_REGULATOR) ++ for (i = 0; i < kbdev->nr_regulators; i++) { ++ if (WARN_ON(kbdev->regulators[i] == NULL)) ++ ; ++ else if (regulator_is_enabled(kbdev->regulators[i])) ++ WARN_ON(regulator_disable(kbdev->regulators[i])); ++ } +#endif ++ +} + -+/** -+ * kbasep_timeline_io_packet_pending - check timeline streams for pending -+ * packets -+ * -+ * @timeline: Timeline instance -+ * @ready_stream: Pointer to variable where stream will be placed -+ * @rb_idx_raw: Pointer to variable where read buffer index will be placed -+ * -+ * Function checks all streams for pending packets. It will stop as soon as -+ * packet ready to be submitted to user space is detected. Variables under -+ * pointers, passed as the parameters to this function will be updated with -+ * values pointing to right stream and buffer. -+ * -+ * Return: non-zero if any of timeline streams has at last one packet ready -+ */ -+static int -+kbasep_timeline_io_packet_pending(struct kbase_timeline *timeline, -+ struct kbase_tlstream **ready_stream, -+ unsigned int *rb_idx_raw) ++static int pm_callback_power_on(struct kbase_device *kbdev) +{ -+ enum tl_stream_type i; ++ int ret = 1; /* Assume GPU has been powered off */ ++ int error; ++ unsigned long flags; + -+ KBASE_DEBUG_ASSERT(ready_stream); -+ KBASE_DEBUG_ASSERT(rb_idx_raw); ++ dev_dbg(kbdev->dev, "%s %pK\n", __func__, (void *)kbdev->dev->pm_domain); + -+ for (i = (enum tl_stream_type)0; i < TL_STREAM_TYPE_COUNT; ++i) { -+ struct kbase_tlstream *stream = &timeline->streams[i]; -+ *rb_idx_raw = atomic_read(&stream->rbi); -+ /* Read buffer index may be updated by writer in case of -+ * overflow. Read and write buffer indexes must be -+ * loaded in correct order. ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ WARN_ON(kbdev->pm.backend.gpu_powered); ++#if MALI_USE_CSF ++ if (likely(kbdev->csf.firmware_inited)) { ++ WARN_ON(!kbdev->pm.active_count); ++ WARN_ON(kbdev->pm.runtime_active); ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ enable_gpu_power_control(kbdev); ++ CSTD_UNUSED(error); ++#else ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++#ifdef KBASE_PM_RUNTIME ++ error = pm_runtime_get_sync(kbdev->dev); ++ if (error == 1) { ++ /* ++ * Let core know that the chip has not been ++ * powered off, so we can save on re-initialization. + */ -+ smp_rmb(); -+ if (atomic_read(&stream->wbi) != *rb_idx_raw) { -+ *ready_stream = stream; -+ return 1; -+ } ++ ret = 0; + } ++ dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error); ++#else ++ enable_gpu_power_control(kbdev); ++#endif /* KBASE_PM_RUNTIME */ + -+ return 0; -+} ++#endif /* MALI_USE_CSF */ + -+/** -+ * kbasep_timeline_has_header_data() - check timeline headers for pending -+ * packets -+ * -+ * @timeline: Timeline instance -+ * -+ * Return: non-zero if any of timeline headers has at last one packet ready. -+ */ -+static int kbasep_timeline_has_header_data(struct kbase_timeline *timeline) -+{ -+ return timeline->obj_header_btc || timeline->aux_header_btc -+#if MALI_USE_CSF -+ || timeline->csf_tl_reader.tl_header.btc -+#endif -+ ; ++ return ret; +} + -+/** -+ * copy_stream_header() - copy timeline stream header. -+ * -+ * @buffer: Pointer to the buffer provided by user. -+ * @size: Maximum amount of data that can be stored in the buffer. -+ * @copy_len: Pointer to amount of bytes that has been copied already -+ * within the read system call. -+ * @hdr: Pointer to the stream header. -+ * @hdr_size: Header size. -+ * @hdr_btc: Pointer to the remaining number of bytes to copy. -+ * -+ * Return: 0 if success, -1 otherwise. -+ */ -+static inline int copy_stream_header(char __user *buffer, size_t size, -+ ssize_t *copy_len, const char *hdr, -+ size_t hdr_size, size_t *hdr_btc) ++static void pm_callback_power_off(struct kbase_device *kbdev) +{ -+ const size_t offset = hdr_size - *hdr_btc; -+ const size_t copy_size = MIN(size - *copy_len, *hdr_btc); -+ -+ if (!*hdr_btc) -+ return 0; -+ -+ if (WARN_ON(*hdr_btc > hdr_size)) -+ return -1; -+ -+ if (copy_to_user(&buffer[*copy_len], &hdr[offset], copy_size)) -+ return -1; ++ unsigned long flags; + -+ *hdr_btc -= copy_size; -+ *copy_len += copy_size; ++ dev_dbg(kbdev->dev, "%s\n", __func__); + -+ return 0; -+} ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ WARN_ON(kbdev->pm.backend.gpu_powered); ++#if MALI_USE_CSF ++ if (likely(kbdev->csf.firmware_inited)) { ++#ifdef CONFIG_MALI_BIFROST_DEBUG ++ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev)); ++#endif ++ WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_OFF); ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+/** -+ * kbasep_timeline_copy_headers - copy timeline headers to the user -+ * -+ * @timeline: Timeline instance -+ * @buffer: Pointer to the buffer provided by user -+ * @size: Maximum amount of data that can be stored in the buffer -+ * @copy_len: Pointer to amount of bytes that has been copied already -+ * within the read system call. -+ * -+ * This helper function checks if timeline headers have not been sent -+ * to the user, and if so, sends them. copy_len is respectively -+ * updated. -+ * -+ * Return: 0 if success, -1 if copy_to_user has failed. -+ */ -+static inline int kbasep_timeline_copy_headers(struct kbase_timeline *timeline, -+ char __user *buffer, size_t size, -+ ssize_t *copy_len) -+{ -+ if (copy_stream_header(buffer, size, copy_len, obj_desc_header, -+ obj_desc_header_size, &timeline->obj_header_btc)) -+ return -1; ++ /* Power down the GPU immediately */ ++ disable_gpu_power_control(kbdev); ++#else /* MALI_USE_CSF */ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ if (copy_stream_header(buffer, size, copy_len, aux_desc_header, -+ aux_desc_header_size, &timeline->aux_header_btc)) -+ return -1; -+#if MALI_USE_CSF -+ if (copy_stream_header(buffer, size, copy_len, -+ timeline->csf_tl_reader.tl_header.data, -+ timeline->csf_tl_reader.tl_header.size, -+ &timeline->csf_tl_reader.tl_header.btc)) -+ return -1; ++#ifdef KBASE_PM_RUNTIME ++ pm_runtime_mark_last_busy(kbdev->dev); ++ pm_runtime_put_autosuspend(kbdev->dev); ++#else ++ /* Power down the GPU immediately as runtime PM is disabled */ ++ disable_gpu_power_control(kbdev); +#endif -+ return 0; ++#endif /* MALI_USE_CSF */ +} + -+/** -+ * kbasep_timeline_io_read - copy data from streams to buffer provided by user -+ * -+ * @filp: Pointer to file structure -+ * @buffer: Pointer to the buffer provided by user -+ * @size: Maximum amount of data that can be stored in the buffer -+ * @f_pos: Pointer to file offset (unused) -+ * -+ * Return: number of bytes stored in the buffer -+ */ -+static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, -+ size_t size, loff_t *f_pos) ++#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) ++static void pm_callback_runtime_gpu_active(struct kbase_device *kbdev) +{ -+ ssize_t copy_len = 0; -+ struct kbase_timeline *timeline; -+ -+ KBASE_DEBUG_ASSERT(filp); -+ KBASE_DEBUG_ASSERT(f_pos); -+ -+ if (WARN_ON(!filp->private_data)) -+ return -EFAULT; -+ -+ timeline = (struct kbase_timeline *)filp->private_data; -+ -+ if (!buffer) -+ return -EINVAL; -+ -+ if (*f_pos < 0) -+ return -EINVAL; -+ -+ mutex_lock(&timeline->reader_lock); -+ -+ while (copy_len < size) { -+ struct kbase_tlstream *stream = NULL; -+ unsigned int rb_idx_raw = 0; -+ unsigned int wb_idx_raw; -+ unsigned int rb_idx; -+ size_t rb_size; -+ -+ if (kbasep_timeline_copy_headers(timeline, buffer, size, -+ ©_len)) { -+ copy_len = -EFAULT; -+ break; -+ } -+ -+ /* If we already read some packets and there is no -+ * packet pending then return back to user. -+ * If we don't have any data yet, wait for packet to be -+ * submitted. -+ */ -+ if (copy_len > 0) { -+ if (!kbasep_timeline_io_packet_pending( -+ timeline, &stream, &rb_idx_raw)) -+ break; -+ } else { -+ if (wait_event_interruptible( -+ timeline->event_queue, -+ kbasep_timeline_io_packet_pending( -+ timeline, &stream, &rb_idx_raw))) { -+ copy_len = -ERESTARTSYS; -+ break; -+ } -+ } ++ unsigned long flags; ++ int error; + -+ if (WARN_ON(!stream)) { -+ copy_len = -EFAULT; -+ break; -+ } ++ lockdep_assert_held(&kbdev->pm.lock); + -+ /* Check if this packet fits into the user buffer. -+ * If so copy its content. -+ */ -+ rb_idx = rb_idx_raw % PACKET_COUNT; -+ rb_size = atomic_read(&stream->buffer[rb_idx].size); -+ if (rb_size > size - copy_len) -+ break; -+ if (copy_to_user(&buffer[copy_len], stream->buffer[rb_idx].data, -+ rb_size)) { -+ copy_len = -EFAULT; -+ break; -+ } ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ WARN_ON(!kbdev->pm.backend.gpu_powered); ++ WARN_ON(!kbdev->pm.active_count); ++ WARN_ON(kbdev->pm.runtime_active); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ /* If the distance between read buffer index and write -+ * buffer index became more than PACKET_COUNT, then overflow -+ * happened and we need to ignore the last portion of bytes -+ * that we have just sent to user. ++ if (pm_runtime_status_suspended(kbdev->dev)) { ++ error = pm_runtime_get_sync(kbdev->dev); ++ dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d", error); ++ } else { ++ /* Call the async version here, otherwise there could be ++ * a deadlock if the runtime suspend operation is ongoing. ++ * Caller would have taken the kbdev->pm.lock and/or the ++ * scheduler lock, and the runtime suspend callback function ++ * will also try to acquire the same lock(s). + */ -+ smp_rmb(); -+ wb_idx_raw = atomic_read(&stream->wbi); -+ -+ if (wb_idx_raw - rb_idx_raw < PACKET_COUNT) { -+ copy_len += rb_size; -+ atomic_inc(&stream->rbi); -+#if MALI_UNIT_TEST -+ atomic_add(rb_size, &timeline->bytes_collected); -+#endif /* MALI_UNIT_TEST */ -+ -+ } else { -+ const unsigned int new_rb_idx_raw = -+ wb_idx_raw - PACKET_COUNT + 1; -+ /* Adjust read buffer index to the next valid buffer */ -+ atomic_set(&stream->rbi, new_rb_idx_raw); -+ } ++ error = pm_runtime_get(kbdev->dev); ++ dev_dbg(kbdev->dev, "pm_runtime_get returned %d", error); + } + -+ mutex_unlock(&timeline->reader_lock); -+ -+ return copy_len; ++ kbdev->pm.runtime_active = true; +} + -+/** -+ * kbasep_timeline_io_poll - poll timeline stream for packets -+ * @filp: Pointer to file structure -+ * @wait: Pointer to poll table -+ * -+ * Return: EPOLLIN | EPOLLRDNORM if data can be read without blocking, -+ * otherwise zero, or EPOLLHUP | EPOLLERR on error. -+ */ -+static __poll_t kbasep_timeline_io_poll(struct file *filp, poll_table *wait) ++static void pm_callback_runtime_gpu_idle(struct kbase_device *kbdev) +{ -+ struct kbase_tlstream *stream; -+ unsigned int rb_idx; -+ struct kbase_timeline *timeline; -+ -+ KBASE_DEBUG_ASSERT(filp); -+ KBASE_DEBUG_ASSERT(wait); -+ -+ if (WARN_ON(!filp->private_data)) -+ return EPOLLHUP | EPOLLERR; ++ unsigned long flags; + -+ timeline = (struct kbase_timeline *)filp->private_data; ++ lockdep_assert_held(&kbdev->pm.lock); + -+ /* If there are header bytes to copy, read will not block */ -+ if (kbasep_timeline_has_header_data(timeline)) -+ return EPOLLIN | EPOLLRDNORM; ++ dev_dbg(kbdev->dev, "%s", __func__); + -+ poll_wait(filp, &timeline->event_queue, wait); -+ if (kbasep_timeline_io_packet_pending(timeline, &stream, &rb_idx)) -+ return EPOLLIN | EPOLLRDNORM; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ WARN_ON(!kbdev->pm.backend.gpu_powered); ++ WARN_ON(kbdev->pm.backend.l2_state != KBASE_L2_OFF); ++ WARN_ON(kbdev->pm.active_count); ++ WARN_ON(!kbdev->pm.runtime_active); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ return (__poll_t)0; ++ pm_runtime_mark_last_busy(kbdev->dev); ++ pm_runtime_put_autosuspend(kbdev->dev); ++ kbdev->pm.runtime_active = false; +} ++#endif + -+int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) ++#ifdef KBASE_PM_RUNTIME ++static int kbase_device_runtime_init(struct kbase_device *kbdev) +{ -+ /* The timeline stream file operations structure. */ -+ static const struct file_operations kbasep_tlstream_fops = { -+ .owner = THIS_MODULE, -+ .release = kbasep_timeline_io_release, -+ .read = kbasep_timeline_io_read, -+ .poll = kbasep_timeline_io_poll, -+ .fsync = kbasep_timeline_io_fsync, -+ }; -+ int err; ++ int ret = 0; + -+ if (!timeline_is_permitted()) -+ return -EPERM; ++ dev_dbg(kbdev->dev, "%s\n", __func__); + -+ if (WARN_ON(!kbdev) || (flags & ~BASE_TLSTREAM_FLAGS_MASK)) -+ return -EINVAL; ++ pm_runtime_set_autosuspend_delay(kbdev->dev, AUTO_SUSPEND_DELAY); ++ pm_runtime_use_autosuspend(kbdev->dev); + -+ err = kbase_timeline_acquire(kbdev, flags); -+ if (err) -+ return err; ++ pm_runtime_set_active(kbdev->dev); ++ pm_runtime_enable(kbdev->dev); + -+ err = anon_inode_getfd("[mali_tlstream]", &kbasep_tlstream_fops, kbdev->timeline, -+ O_RDONLY | O_CLOEXEC); -+ if (err < 0) -+ kbase_timeline_release(kbdev->timeline); ++ if (!pm_runtime_enabled(kbdev->dev)) { ++ dev_warn(kbdev->dev, "pm_runtime not enabled"); ++ ret = -EINVAL; ++ } else if (atomic_read(&kbdev->dev->power.usage_count)) { ++ dev_warn(kbdev->dev, ++ "%s: Device runtime usage count unexpectedly non zero %d", ++ __func__, atomic_read(&kbdev->dev->power.usage_count)); ++ ret = -EINVAL; ++ } + -+ return err; ++ return ret; +} + -+#if IS_ENABLED(CONFIG_DEBUG_FS) -+static int kbasep_timeline_io_open(struct inode *in, struct file *file) ++static void kbase_device_runtime_disable(struct kbase_device *kbdev) +{ -+ struct kbase_device *const kbdev = in->i_private; ++ dev_dbg(kbdev->dev, "%s\n", __func__); + -+ if (WARN_ON(!kbdev)) -+ return -EFAULT; ++ if (atomic_read(&kbdev->dev->power.usage_count)) ++ dev_warn(kbdev->dev, ++ "%s: Device runtime usage count unexpectedly non zero %d", ++ __func__, atomic_read(&kbdev->dev->power.usage_count)); + -+ file->private_data = kbdev->timeline; -+ return kbase_timeline_acquire(kbdev, BASE_TLSTREAM_FLAGS_MASK & -+ ~BASE_TLSTREAM_JOB_DUMPING_ENABLED); ++ pm_runtime_disable(kbdev->dev); +} ++#endif /* KBASE_PM_RUNTIME */ + -+void kbase_timeline_io_debugfs_init(struct kbase_device *const kbdev) ++static int pm_callback_runtime_on(struct kbase_device *kbdev) +{ -+ static const struct file_operations kbasep_tlstream_debugfs_fops = { -+ .owner = THIS_MODULE, -+ .open = kbasep_timeline_io_open, -+ .release = kbasep_timeline_io_release, -+ .read = kbasep_timeline_io_read, -+ .poll = kbasep_timeline_io_poll, -+ .fsync = kbasep_timeline_io_fsync, -+ }; -+ struct dentry *file; -+ -+ if (WARN_ON(!kbdev) || WARN_ON(IS_ERR_OR_NULL(kbdev->mali_debugfs_directory))) -+ return; -+ -+ file = debugfs_create_file("tlstream", 0400, kbdev->mali_debugfs_directory, kbdev, -+ &kbasep_tlstream_debugfs_fops); ++ dev_dbg(kbdev->dev, "%s\n", __func__); + -+ if (IS_ERR_OR_NULL(file)) -+ dev_warn(kbdev->dev, "Unable to create timeline debugfs entry"); ++#if !MALI_USE_CSF ++ enable_gpu_power_control(kbdev); ++#endif ++ return 0; +} -+#else -+/* -+ * Stub function for when debugfs is disabled -+ */ -+void kbase_timeline_io_debugfs_init(struct kbase_device *const kbdev) ++ ++static void pm_callback_runtime_off(struct kbase_device *kbdev) +{ -+} ++ dev_dbg(kbdev->dev, "%s\n", __func__); ++ ++#if !MALI_USE_CSF ++ disable_gpu_power_control(kbdev); +#endif ++} + -+/** -+ * kbasep_timeline_io_release - release timeline stream descriptor -+ * @inode: Pointer to inode structure -+ * @filp: Pointer to file structure -+ * -+ * Return: always return zero -+ */ -+static int kbasep_timeline_io_release(struct inode *inode, struct file *filp) ++static void pm_callback_resume(struct kbase_device *kbdev) +{ -+ CSTD_UNUSED(inode); ++ int ret = pm_callback_runtime_on(kbdev); + -+ kbase_timeline_release(filp->private_data); -+ return 0; ++ WARN_ON(ret); +} + -+static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end, -+ int datasync) ++static void pm_callback_suspend(struct kbase_device *kbdev) +{ -+ CSTD_UNUSED(start); -+ CSTD_UNUSED(end); -+ CSTD_UNUSED(datasync); -+ -+ return kbase_timeline_streams_flush(filp->private_data); ++ pm_callback_runtime_off(kbdev); +} -diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h ++ ++ ++struct kbase_pm_callback_conf pm_callbacks = { ++ .power_on_callback = pm_callback_power_on, ++ .power_off_callback = pm_callback_power_off, ++ .power_suspend_callback = pm_callback_suspend, ++ .power_resume_callback = pm_callback_resume, ++#ifdef KBASE_PM_RUNTIME ++ .power_runtime_init_callback = kbase_device_runtime_init, ++ .power_runtime_term_callback = kbase_device_runtime_disable, ++ .power_runtime_on_callback = pm_callback_runtime_on, ++ .power_runtime_off_callback = pm_callback_runtime_off, ++#else /* KBASE_PM_RUNTIME */ ++ .power_runtime_init_callback = NULL, ++ .power_runtime_term_callback = NULL, ++ .power_runtime_on_callback = NULL, ++ .power_runtime_off_callback = NULL, ++#endif /* KBASE_PM_RUNTIME */ ++ ++#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) ++ .power_runtime_gpu_idle_callback = pm_callback_runtime_gpu_idle, ++ .power_runtime_gpu_active_callback = pm_callback_runtime_gpu_active, ++#else ++ .power_runtime_gpu_idle_callback = NULL, ++ .power_runtime_gpu_active_callback = NULL, ++#endif ++}; +diff --git a/drivers/gpu/arm/bifrost/platform/meson/Kbuild b/drivers/gpu/arm/bifrost/platform/meson/Kbuild +new file mode 100755 +index 000000000..9b3de96ba +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/platform/meson/Kbuild +@@ -0,0 +1,23 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2012-2017, 2019-2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# ++ ++bifrost_kbase-y += \ ++ platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_meson.o \ ++ platform/$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o +diff --git a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_meson.c b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_meson.c new file mode 100644 -index 000000000..de30bccc7 +index 000000000..7b896b602 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h -@@ -0,0 +1,103 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_meson.c +@@ -0,0 +1,53 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015, 2017, 2019, 2021, 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -268939,98 +268682,48 @@ index 000000000..de30bccc7 + * + */ + -+#if !defined(_KBASE_TIMELINE_PRIV_H) -+#define _KBASE_TIMELINE_PRIV_H -+ +#include -+#include "mali_kbase_tlstream.h" ++#include ++#include + -+#if MALI_USE_CSF -+#include "csf/mali_kbase_csf_tl_reader.h" -+#include "csf/mali_kbase_csf_trace_buffer.h" -+#endif ++static struct kbase_platform_config dummy_platform_config; + -+#include -+#include -+#include ++struct kbase_platform_config *kbase_get_platform_config(void) ++{ ++ return &dummy_platform_config; ++} + -+/* The minimum amount of time timeline must be acquired for before release is -+ * allowed, to prevent DoS attacks. -+ */ -+#define TIMELINE_HYSTERESIS_TIMEOUT_MS ((s64)500) ++#ifndef CONFIG_OF ++int kbase_platform_register(void) ++{ ++ return 0; ++} + -+/** -+ * struct kbase_timeline - timeline state structure -+ * @streams: The timeline streams generated by kernel -+ * @tl_kctx_list: List of contexts for timeline. -+ * @tl_kctx_list_lock: Lock to protect @tl_kctx_list. -+ * @autoflush_timer: Autoflush timer -+ * @autoflush_timer_active: If non-zero autoflush timer is active -+ * @reader_lock: Reader lock. Only one reader is allowed to -+ * have access to the timeline streams at any given time. -+ * @event_queue: Timeline stream event queue -+ * @bytes_collected: Number of bytes read by user -+ * @timeline_flags: Zero, if timeline is disabled. Timeline stream flags -+ * otherwise. See kbase_timeline_acquire(). -+ * @obj_header_btc: Remaining bytes to copy for the object stream header -+ * @aux_header_btc: Remaining bytes to copy for the aux stream header -+ * @last_acquire_time: The time at which timeline was last acquired. -+ * @csf_tl_reader: CSFFW timeline reader -+ */ -+struct kbase_timeline { -+ struct kbase_tlstream streams[TL_STREAM_TYPE_COUNT]; -+ struct list_head tl_kctx_list; -+ struct mutex tl_kctx_list_lock; -+ struct timer_list autoflush_timer; -+ atomic_t autoflush_timer_active; -+ struct mutex reader_lock; -+ wait_queue_head_t event_queue; -+#if MALI_UNIT_TEST -+ atomic_t bytes_collected; -+#endif /* MALI_UNIT_TEST */ -+ atomic_t *timeline_flags; -+ size_t obj_header_btc; -+ size_t aux_header_btc; -+ ktime_t last_acquire_time; -+#if MALI_USE_CSF -+ struct kbase_csf_tl_reader csf_tl_reader; ++void kbase_platform_unregister(void) ++{ ++} +#endif -+}; -+ -+void kbase_create_timeline_objects(struct kbase_device *kbdev); -+ -+/** -+ * kbase_timeline_acquire - acquire timeline for a userspace client. -+ * @kbdev: An instance of the GPU platform device, allocated from the probe -+ * method of the driver. -+ * @flags: Timeline stream flags -+ * -+ * Each timeline instance can be acquired by only one userspace client at a time. -+ * -+ * Return: Zero on success, error number on failure (e.g. if already acquired). -+ */ -+int kbase_timeline_acquire(struct kbase_device *kbdev, u32 flags); -+ -+/** -+ * kbase_timeline_release - release timeline for a userspace client. -+ * @timeline: Timeline instance to be stopped. It must be previously acquired -+ * with kbase_timeline_acquire(). -+ * -+ * Releasing the timeline instance allows it to be acquired by another userspace client. -+ */ -+void kbase_timeline_release(struct kbase_timeline *timeline); + -+#endif /* _KBASE_TIMELINE_PRIV_H */ -diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tl_serialize.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tl_serialize.h ++#ifdef CONFIG_MALI_BIFROST_DVFS ++#if MALI_USE_CSF ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) ++#else ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) ++#endif ++{ ++ return 1; ++} ++#endif /* CONFIG_MALI_BIFROST_DVFS */ +diff --git a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_platform.h b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_platform.h new file mode 100644 -index 000000000..b6aaadedc +index 000000000..06279e2f6 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tl_serialize.h -@@ -0,0 +1,123 @@ ++++ b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_platform.h +@@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * -+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2017, 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -269048,118 +268741,40 @@ index 000000000..b6aaadedc + * + */ + -+#if !defined(_KBASE_TL_SERIALIZE_H) -+#define _KBASE_TL_SERIALIZE_H -+ -+#include -+ -+#include -+ -+/* The number of nanoseconds in a second. */ -+#define NSECS_IN_SEC 1000000000ull /* ns */ -+ +/** -+ * kbasep_serialize_bytes - serialize bytes to the message buffer -+ * -+ * @buffer: Message buffer -+ * @pos: Message buffer offset -+ * @bytes: Bytes to serialize -+ * @len: Length of bytes array -+ * -+ * Serialize bytes as if using memcpy(). ++ * Power management configuration + * -+ * Return: updated position in the buffer ++ * Attached value: pointer to @ref kbase_pm_callback_conf ++ * Default value: See @ref kbase_pm_callback_conf + */ -+static inline size_t kbasep_serialize_bytes( -+ char *buffer, -+ size_t pos, -+ const void *bytes, -+ size_t len) -+{ -+ KBASE_DEBUG_ASSERT(buffer); -+ KBASE_DEBUG_ASSERT(bytes); -+ -+ memcpy(&buffer[pos], bytes, len); -+ -+ return pos + len; -+} ++#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + +/** -+ * kbasep_serialize_string - serialize string to the message buffer -+ * -+ * @buffer: Message buffer -+ * @pos: Message buffer offset -+ * @string: String to serialize -+ * @max_write_size: Number of bytes that can be stored in buffer -+ * -+ * String is serialized as 4 bytes for string size, -+ * then string content and then null terminator. ++ * Platform specific configuration functions + * -+ * Return: updated position in the buffer ++ * Attached value: pointer to @ref kbase_platform_funcs_conf ++ * Default value: See @ref kbase_platform_funcs_conf + */ -+static inline size_t kbasep_serialize_string( -+ char *buffer, -+ size_t pos, -+ const char *string, -+ size_t max_write_size) -+{ -+ u32 string_len; -+ -+ KBASE_DEBUG_ASSERT(buffer); -+ KBASE_DEBUG_ASSERT(string); -+ /* Timeline string consists of at least string length and nul -+ * terminator. -+ */ -+ KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char)); -+ max_write_size -= sizeof(string_len); -+ -+ string_len = strscpy( -+ &buffer[pos + sizeof(string_len)], -+ string, -+ max_write_size); -+ string_len += sizeof(char); -+ -+ /* Make sure that the source string fit into the buffer. */ -+ KBASE_DEBUG_ASSERT(string_len <= max_write_size); -+ -+ /* Update string length. */ -+ memcpy(&buffer[pos], &string_len, sizeof(string_len)); ++#define PLATFORM_FUNCS (NULL) + -+ return pos + sizeof(string_len) + string_len; -+} ++extern struct kbase_pm_callback_conf pm_callbacks; + +/** -+ * kbasep_serialize_timestamp - serialize timestamp to the message buffer -+ * -+ * @buffer: Message buffer -+ * @pos: Message buffer offset -+ * -+ * Get current timestamp using kbasep_get_timestamp() -+ * and serialize it as 64 bit unsigned integer. ++ * Autosuspend delay + * -+ * Return: updated position in the buffer ++ * The delay time (in milliseconds) to be used for autosuspend + */ -+static inline size_t kbasep_serialize_timestamp(void *buffer, size_t pos) -+{ -+ u64 timestamp; -+ -+ timestamp = ktime_get_raw_ns(); -+ -+ return kbasep_serialize_bytes( -+ buffer, pos, -+ ×tamp, sizeof(timestamp)); -+} -+#endif /* _KBASE_TL_SERIALIZE_H */ -diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.c ++#define AUTO_SUSPEND_DELAY (100) +diff --git a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_runtime_pm.c b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_runtime_pm.c new file mode 100644 -index 000000000..47059deb4 +index 000000000..910d4b4fd --- /dev/null -+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.c -@@ -0,0 +1,313 @@ ++++ b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_runtime_pm.c +@@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015, 2017-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -269177,4711 +268792,4286 @@ index 000000000..47059deb4 + * + */ + -+#include "mali_kbase_tlstream.h" -+#include "mali_kbase_tl_serialize.h" -+#include "mali_kbase_mipe_proto.h" ++#include ++#include ++#include + -+/** -+ * kbasep_packet_header_setup - setup the packet header -+ * @buffer: pointer to the buffer -+ * @pkt_family: packet's family -+ * @pkt_type: packet's type -+ * @pkt_class: packet's class -+ * @stream_id: stream id -+ * @numbered: non-zero if this stream is numbered -+ * -+ * Function sets up immutable part of packet header in the given buffer. -+ */ -+static void kbasep_packet_header_setup( -+ char *buffer, -+ enum tl_packet_family pkt_family, -+ enum tl_packet_class pkt_class, -+ enum tl_packet_type pkt_type, -+ unsigned int stream_id, -+ int numbered) -+{ -+ u32 words[2] = { -+ MIPE_PACKET_HEADER_W0(pkt_family, pkt_class, pkt_type, stream_id), -+ MIPE_PACKET_HEADER_W1(0, !!numbered), -+ }; -+ memcpy(buffer, words, sizeof(words)); -+} ++#include ++#include ++#include ++#include ++#include ++#include + -+/** -+ * kbasep_packet_header_update - update the packet header -+ * @buffer: pointer to the buffer -+ * @data_size: amount of data carried in this packet -+ * @numbered: non-zero if the stream is numbered -+ * -+ * Function updates mutable part of packet header in the given buffer. -+ * Note that value of data_size must not include size of the header. -+ */ -+static void kbasep_packet_header_update( -+ char *buffer, -+ size_t data_size, -+ int numbered) -+{ -+ u32 word1 = MIPE_PACKET_HEADER_W1((u32)data_size, !!numbered); ++#include "mali_kbase_config_platform.h" + -+ KBASE_DEBUG_ASSERT(buffer); + -+ /* we copy the contents of word1 to its respective position in the buffer */ -+ memcpy(&buffer[sizeof(u32)], &word1, sizeof(word1)); -+} ++static struct reset_control **resets; ++static int nr_resets; + -+/** -+ * kbasep_packet_number_update - update the packet number -+ * @buffer: pointer to the buffer -+ * @counter: value of packet counter for this packet's stream -+ * -+ * Function updates packet number embedded within the packet placed in the -+ * given buffer. -+ */ -+static void kbasep_packet_number_update(char *buffer, u32 counter) ++static int resets_init(struct kbase_device *kbdev) +{ -+ KBASE_DEBUG_ASSERT(buffer); ++ struct device_node *np; ++ int i; ++ int err = 0; + -+ memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter)); -+} ++ np = kbdev->dev->of_node; + -+void kbase_tlstream_reset(struct kbase_tlstream *stream) -+{ -+ unsigned int i; ++ nr_resets = of_count_phandle_with_args(np, "resets", "#reset-cells"); ++ if (nr_resets <= 0) { ++ dev_err(kbdev->dev, "Failed to get GPU resets from dtb\n"); ++ return nr_resets; ++ } + -+ for (i = 0; i < PACKET_COUNT; i++) { -+ if (stream->numbered) -+ atomic_set( -+ &stream->buffer[i].size, -+ PACKET_HEADER_SIZE + -+ PACKET_NUMBER_SIZE); -+ else -+ atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE); ++ resets = devm_kcalloc(kbdev->dev, nr_resets, sizeof(*resets), ++ GFP_KERNEL); ++ if (!resets) ++ return -ENOMEM; ++ ++ for (i = 0; i < nr_resets; ++i) { ++ resets[i] = devm_reset_control_get_exclusive_by_index( ++ kbdev->dev, i); ++ if (IS_ERR(resets[i])) { ++ err = PTR_ERR(resets[i]); ++ nr_resets = i; ++ break; ++ } + } + -+ atomic_set(&stream->wbi, 0); -+ atomic_set(&stream->rbi, 0); ++ return err; +} + -+/* Configuration of timeline streams generated by kernel. */ -+static const struct { -+ enum tl_packet_family pkt_family; -+ enum tl_packet_class pkt_class; -+ enum tl_packet_type pkt_type; -+ enum tl_stream_id stream_id; -+} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = { -+ { -+ TL_PACKET_FAMILY_TL, -+ TL_PACKET_CLASS_OBJ, -+ TL_PACKET_TYPE_SUMMARY, -+ TL_STREAM_ID_KERNEL, -+ }, -+ { -+ TL_PACKET_FAMILY_TL, -+ TL_PACKET_CLASS_OBJ, -+ TL_PACKET_TYPE_BODY, -+ TL_STREAM_ID_KERNEL, -+ }, -+ { -+ TL_PACKET_FAMILY_TL, -+ TL_PACKET_CLASS_AUX, -+ TL_PACKET_TYPE_BODY, -+ TL_STREAM_ID_KERNEL, -+ }, -+#if MALI_USE_CSF -+ { -+ TL_PACKET_FAMILY_TL, -+ TL_PACKET_CLASS_OBJ, -+ TL_PACKET_TYPE_BODY, -+ TL_STREAM_ID_CSFFW, -+ }, -+#endif -+}; -+ -+void kbase_tlstream_init( -+ struct kbase_tlstream *stream, -+ enum tl_stream_type stream_type, -+ wait_queue_head_t *ready_read) ++static int pm_callback_soft_reset(struct kbase_device *kbdev) +{ -+ unsigned int i; ++ int ret, i; + -+ KBASE_DEBUG_ASSERT(stream); -+ KBASE_DEBUG_ASSERT(stream_type < TL_STREAM_TYPE_COUNT); ++ if (!resets) { ++ ret = resets_init(kbdev); ++ if (ret) ++ return ret; ++ } + -+ spin_lock_init(&stream->lock); ++ for (i = 0; i < nr_resets; ++i) ++ reset_control_assert(resets[i]); + -+ /* All packets carrying tracepoints shall be numbered. */ -+ if (tl_stream_cfg[stream_type].pkt_type == TL_PACKET_TYPE_BODY) -+ stream->numbered = 1; -+ else -+ stream->numbered = 0; ++ udelay(10); + -+ for (i = 0; i < PACKET_COUNT; i++) -+ kbasep_packet_header_setup( -+ stream->buffer[i].data, -+ tl_stream_cfg[stream_type].pkt_family, -+ tl_stream_cfg[stream_type].pkt_class, -+ tl_stream_cfg[stream_type].pkt_type, -+ tl_stream_cfg[stream_type].stream_id, -+ stream->numbered); ++ for (i = 0; i < nr_resets; ++i) ++ reset_control_deassert(resets[i]); + -+#if MALI_UNIT_TEST -+ atomic_set(&stream->bytes_generated, 0); -+#endif -+ stream->ready_read = ready_read; ++ udelay(10); + -+ kbase_tlstream_reset(stream); ++ /* Override Power Management Settings, values from manufacturer's defaults */ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), ++ 0xfff | (0x20 << 16)); ++ ++ /* ++ * RESET_COMPLETED interrupt will be raised, so continue with ++ * the normal soft reset procedure ++ */ ++ return 0; +} + -+void kbase_tlstream_term(struct kbase_tlstream *stream) ++static void enable_gpu_power_control(struct kbase_device *kbdev) +{ -+ KBASE_DEBUG_ASSERT(stream); ++ unsigned int i; ++ ++#if defined(CONFIG_REGULATOR) ++ for (i = 0; i < kbdev->nr_regulators; i++) { ++ if (WARN_ON(kbdev->regulators[i] == NULL)) ++ ; ++ else if (!regulator_is_enabled(kbdev->regulators[i])) ++ WARN_ON(regulator_enable(kbdev->regulators[i])); ++ } ++#endif ++ ++ for (i = 0; i < kbdev->nr_clocks; i++) { ++ if (WARN_ON(kbdev->clocks[i] == NULL)) ++ ; ++ else if (!__clk_is_enabled(kbdev->clocks[i])) ++ WARN_ON(clk_prepare_enable(kbdev->clocks[i])); ++ } +} + -+/** -+ * kbasep_tlstream_msgbuf_submit - submit packet to user space -+ * @stream: Pointer to the stream structure -+ * @wb_idx_raw: Write buffer index -+ * @wb_size: Length of data stored in the current buffer -+ * -+ * Updates currently written buffer with the packet header. -+ * Then write index is incremented and the buffer is handed to user space. -+ * Parameters of the new buffer are returned using provided arguments. -+ * -+ * Return: length of data in the new buffer -+ * -+ * Warning: the user must update the stream structure with returned value. -+ */ -+static size_t kbasep_tlstream_msgbuf_submit( -+ struct kbase_tlstream *stream, -+ unsigned int wb_idx_raw, -+ unsigned int wb_size) ++static void disable_gpu_power_control(struct kbase_device *kbdev) +{ -+ unsigned int wb_idx = wb_idx_raw % PACKET_COUNT; ++ unsigned int i; + -+ /* Set stream as flushed. */ -+ atomic_set(&stream->autoflush_counter, -1); ++ for (i = 0; i < kbdev->nr_clocks; i++) { ++ if (WARN_ON(kbdev->clocks[i] == NULL)) ++ ; ++ else if (__clk_is_enabled(kbdev->clocks[i])) { ++ clk_disable_unprepare(kbdev->clocks[i]); ++ WARN_ON(__clk_is_enabled(kbdev->clocks[i])); ++ } ++ } + -+ kbasep_packet_header_update( -+ stream->buffer[wb_idx].data, -+ wb_size - PACKET_HEADER_SIZE, -+ stream->numbered); ++#if defined(CONFIG_REGULATOR) ++ for (i = 0; i < kbdev->nr_regulators; i++) { ++ if (WARN_ON(kbdev->regulators[i] == NULL)) ++ ; ++ else if (regulator_is_enabled(kbdev->regulators[i])) ++ WARN_ON(regulator_disable(kbdev->regulators[i])); ++ } ++#endif ++} + -+ if (stream->numbered) -+ kbasep_packet_number_update( -+ stream->buffer[wb_idx].data, -+ wb_idx_raw); ++static int pm_callback_power_on(struct kbase_device *kbdev) ++{ ++ int ret = 1; /* Assume GPU has been powered off */ ++ int error; + -+ /* Increasing write buffer index will expose this packet to the reader. -+ * As stream->lock is not taken on reader side we must make sure memory -+ * is updated correctly before this will happen. -+ */ -+ smp_wmb(); -+ atomic_inc(&stream->wbi); ++ dev_dbg(kbdev->dev, "%s %pK\n", __func__, (void *)kbdev->dev->pm_domain); + -+ /* Inform user that packets are ready for reading. */ -+ wake_up_interruptible(stream->ready_read); ++#ifdef KBASE_PM_RUNTIME ++ error = pm_runtime_get_sync(kbdev->dev); ++ if (error == 1) { ++ /* ++ * Let core know that the chip has not been ++ * powered off, so we can save on re-initialization. ++ */ ++ ret = 0; ++ } ++ dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error); ++#else ++ enable_gpu_power_control(kbdev); ++#endif + -+ wb_size = PACKET_HEADER_SIZE; -+ if (stream->numbered) -+ wb_size += PACKET_NUMBER_SIZE; ++ return ret; ++} + -+ return wb_size; ++static void pm_callback_power_off(struct kbase_device *kbdev) ++{ ++ dev_dbg(kbdev->dev, "%s\n", __func__); ++ ++#ifdef KBASE_PM_RUNTIME ++ pm_runtime_mark_last_busy(kbdev->dev); ++ pm_runtime_put_autosuspend(kbdev->dev); ++#else ++ /* Power down the GPU immediately as runtime PM is disabled */ ++ disable_gpu_power_control(kbdev); ++#endif +} + -+char *kbase_tlstream_msgbuf_acquire( -+ struct kbase_tlstream *stream, -+ size_t msg_size, -+ unsigned long *flags) __acquires(&stream->lock) ++#ifdef KBASE_PM_RUNTIME ++static int kbase_device_runtime_init(struct kbase_device *kbdev) +{ -+ unsigned int wb_idx_raw; -+ unsigned int wb_idx; -+ size_t wb_size; ++ int ret = 0; + -+ KBASE_DEBUG_ASSERT( -+ PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >= -+ msg_size); ++ dev_dbg(kbdev->dev, "%s\n", __func__); + -+ spin_lock_irqsave(&stream->lock, *flags); ++ pm_runtime_set_autosuspend_delay(kbdev->dev, AUTO_SUSPEND_DELAY); ++ pm_runtime_use_autosuspend(kbdev->dev); + -+ wb_idx_raw = atomic_read(&stream->wbi); -+ wb_idx = wb_idx_raw % PACKET_COUNT; -+ wb_size = atomic_read(&stream->buffer[wb_idx].size); ++ pm_runtime_set_active(kbdev->dev); ++ pm_runtime_enable(kbdev->dev); + -+ /* Select next buffer if data will not fit into current one. */ -+ if (wb_size + msg_size > PACKET_SIZE) { -+ wb_size = kbasep_tlstream_msgbuf_submit( -+ stream, wb_idx_raw, wb_size); -+ wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; ++ if (!pm_runtime_enabled(kbdev->dev)) { ++ dev_warn(kbdev->dev, "pm_runtime not enabled"); ++ ret = -EINVAL; ++ } else if (atomic_read(&kbdev->dev->power.usage_count)) { ++ dev_warn(kbdev->dev, "%s: Device runtime usage count unexpectedly non zero %d", ++ __func__, atomic_read(&kbdev->dev->power.usage_count)); ++ ret = -EINVAL; + } + -+ /* Reserve space in selected buffer. */ -+ atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size); ++ return ret; ++} + -+#if MALI_UNIT_TEST -+ atomic_add(msg_size, &stream->bytes_generated); -+#endif /* MALI_UNIT_TEST */ ++static void kbase_device_runtime_disable(struct kbase_device *kbdev) ++{ ++ dev_dbg(kbdev->dev, "%s\n", __func__); + -+ return &stream->buffer[wb_idx].data[wb_size]; ++ if (atomic_read(&kbdev->dev->power.usage_count)) ++ dev_warn(kbdev->dev, "%s: Device runtime usage count unexpectedly non zero %d", ++ __func__, atomic_read(&kbdev->dev->power.usage_count)); ++ ++ pm_runtime_disable(kbdev->dev); +} ++#endif /* KBASE_PM_RUNTIME */ + -+void kbase_tlstream_msgbuf_release( -+ struct kbase_tlstream *stream, -+ unsigned long flags) __releases(&stream->lock) ++static int pm_callback_runtime_on(struct kbase_device *kbdev) +{ -+ /* Mark stream as containing unflushed data. */ -+ atomic_set(&stream->autoflush_counter, 0); ++ dev_dbg(kbdev->dev, "%s\n", __func__); + -+ spin_unlock_irqrestore(&stream->lock, flags); ++ enable_gpu_power_control(kbdev); ++ return 0; +} + -+size_t kbase_tlstream_flush_stream( -+ struct kbase_tlstream *stream) ++static void pm_callback_runtime_off(struct kbase_device *kbdev) +{ -+ unsigned long flags; -+ unsigned int wb_idx_raw; -+ unsigned int wb_idx; -+ size_t wb_size; -+ size_t min_size = PACKET_HEADER_SIZE; ++ dev_dbg(kbdev->dev, "%s\n", __func__); + ++ disable_gpu_power_control(kbdev); ++} + -+ if (stream->numbered) -+ min_size += PACKET_NUMBER_SIZE; ++static void pm_callback_resume(struct kbase_device *kbdev) ++{ ++ int ret = pm_callback_runtime_on(kbdev); + -+ spin_lock_irqsave(&stream->lock, flags); ++ WARN_ON(ret); ++} + -+ wb_idx_raw = atomic_read(&stream->wbi); -+ wb_idx = wb_idx_raw % PACKET_COUNT; -+ wb_size = atomic_read(&stream->buffer[wb_idx].size); ++static void pm_callback_suspend(struct kbase_device *kbdev) ++{ ++ pm_callback_runtime_off(kbdev); ++} + -+ if (wb_size > min_size) { -+ wb_size = kbasep_tlstream_msgbuf_submit( -+ stream, wb_idx_raw, wb_size); -+ wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; -+ atomic_set(&stream->buffer[wb_idx].size, wb_size); -+ } else { -+ /* we return that there is no bytes to be read.*/ -+ /* Timeline io fsync will use this info the decide whether -+ * fsync should return an error -+ */ -+ wb_size = 0; -+ } ++struct kbase_pm_callback_conf pm_callbacks = { ++ .power_on_callback = pm_callback_power_on, ++ .power_off_callback = pm_callback_power_off, ++ .power_suspend_callback = pm_callback_suspend, ++ .power_resume_callback = pm_callback_resume, ++ .soft_reset_callback = pm_callback_soft_reset, ++#ifdef KBASE_PM_RUNTIME ++ .power_runtime_init_callback = kbase_device_runtime_init, ++ .power_runtime_term_callback = kbase_device_runtime_disable, ++ .power_runtime_on_callback = pm_callback_runtime_on, ++ .power_runtime_off_callback = pm_callback_runtime_off, ++#else /* KBASE_PM_RUNTIME */ ++ .power_runtime_init_callback = NULL, ++ .power_runtime_term_callback = NULL, ++ .power_runtime_on_callback = NULL, ++ .power_runtime_off_callback = NULL, ++#endif /* KBASE_PM_RUNTIME */ ++}; +diff --git a/drivers/gpu/arm/bifrost/platform/rk/Kbuild b/drivers/gpu/arm/bifrost/platform/rk/Kbuild +new file mode 100755 +index 000000000..a1e78cfc4 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/platform/rk/Kbuild +@@ -0,0 +1,16 @@ ++# ++# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+ spin_unlock_irqrestore(&stream->lock, flags); -+ return wb_size; -+} -diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h ++bifrost_kbase-y += \ ++ platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_rk.o +diff --git a/drivers/gpu/arm/bifrost/platform/rk/custom_log.h b/drivers/gpu/arm/bifrost/platform/rk/custom_log.h new file mode 100644 -index 000000000..c1428495b +index 000000000..5de70ee13 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h -@@ -0,0 +1,168 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/platform/rk/custom_log.h +@@ -0,0 +1,192 @@ +/* -+ * -+ * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT RockChip Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * ++ * of such GNU licence. + */ + -+#if !defined(_KBASE_TLSTREAM_H) -+#define _KBASE_TLSTREAM_H ++#ifndef __CUSTOM_LOG_H__ ++#define __CUSTOM_LOG_H__ + -+#include -+#include -+#include ++#ifdef __cplusplus ++extern "C" { ++#endif + -+/* The maximum size of a single packet used by timeline. */ -+#define PACKET_SIZE 4096 /* bytes */ ++/* ----------------------------------------------------------------------------- ++ * Include Files ++ * ----------------------------------------------------------------------------- ++ */ ++#include ++#include + -+/* The number of packets used by one timeline stream. */ -+#define PACKET_COUNT 128 ++/* ----------------------------------------------------------------------------- ++ * Macros Definition ++ * ----------------------------------------------------------------------------- ++ */ + -+/* The maximum expected length of string in tracepoint descriptor. */ -+#define STRLEN_MAX 64 /* bytes */ ++/** 若下列 macro 有被定义, æ‰ ä½¿èƒ½ log 输出. */ ++/* #define ENABLE_DEBUG_LOG */ ++ ++/*----------------------------------------------------------------------------*/ ++ ++#ifdef ENABLE_VERBOSE_LOG ++/** Verbose log. */ ++#define V(fmt, args...) \ ++ pr_debug("V : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ ++ "\n", \ ++ __FILE__, \ ++ __LINE__, \ ++ __func__, \ ++ ## args) ++#else ++#define V(...) ((void)0) ++#endif ++ ++#ifdef ENABLE_DEBUG_LOG ++/** Debug log. */ ++#define D(fmt, args...) \ ++ pr_info("D : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ ++ "\n", \ ++ __FILE__, \ ++ __LINE__, \ ++ __func__, \ ++ ## args) ++#else ++#define D(...) ((void)0) ++#endif ++ ++#define I(fmt, args...) \ ++ pr_info("I : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ ++ "\n", \ ++ __FILE__, \ ++ __LINE__, \ ++ __func__, \ ++ ## args) ++ ++#define W(fmt, args...) \ ++ pr_warn("W : [File] : %s; [Line] : %d; [Func] : %s(); " \ ++ fmt "\n", \ ++ __FILE__, \ ++ __LINE__, \ ++ __func__, \ ++ ## args) ++ ++#define E(fmt, args...) \ ++ pr_err("E : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ ++ "\n", \ ++ __FILE__, \ ++ __LINE__, \ ++ __func__, \ ++ ## args) ++ ++/*-------------------------------------------------------*/ ++ ++/** 使用 D(), 以åè¿›åˆ¶çš„å½¢å¼æ‰“å°å˜é‡ 'var' çš„ value. */ ++#define D_DEC(var) D(#var " = %d.", var) ++ ++#define E_DEC(var) E(#var " = %d.", var) ++ ++/** 使用 D(), 以åå…­è¿›åˆ¶çš„å½¢å¼æ‰“å°å˜é‡ 'var' çš„ value. */ ++#define D_HEX(var) D(#var " = 0x%x.", var) ++ ++#define E_HEX(var) E(#var " = 0x%x.", var) + +/** -+ * struct kbase_tlstream - timeline stream structure -+ * @lock: Message order lock -+ * @buffer: Array of buffers -+ * @buffer.size: Number of bytes in buffer -+ * @buffer.data: Buffer's data -+ * @wbi: Write buffer index -+ * @rbi: Read buffer index -+ * @numbered: If non-zero stream's packets are sequentially numbered -+ * @autoflush_counter: Counter tracking stream's autoflush state -+ * @ready_read: Pointer to a wait queue, which is signaled when -+ * timeline messages are ready for collection. -+ * @bytes_generated: Number of bytes generated by tracepoint messages -+ * -+ * This structure holds information needed to construct proper packets in the -+ * timeline stream. -+ * -+ * Each message in the sequence must bear a timestamp that is -+ * greater than the previous message in the same stream. For this reason -+ * a lock is held throughout the process of message creation. -+ * -+ * Each stream contains a set of buffers. Each buffer will hold one MIPE -+ * packet. In case there is no free space required to store the incoming -+ * message the oldest buffer is discarded. Each packet in timeline body -+ * stream has a sequence number embedded, this value must increment -+ * monotonically and is used by the packets receiver to discover these -+ * buffer overflows. -+ * -+ * The autoflush counter is set to a negative number when there is no data -+ * pending for flush and it is set to zero on every update of the buffer. The -+ * autoflush timer will increment the counter by one on every expiry. If there -+ * is no activity on the buffer for two consecutive timer expiries, the stream -+ * buffer will be flushed. ++ * 使用 D(), 以å六进制的形å¼, ++ * æ‰“å°æŒ‡é’ˆç±»åž‹å˜é‡ 'ptr' çš„ value. + */ -+struct kbase_tlstream { -+ spinlock_t lock; ++#define D_PTR(ptr) D(#ptr " = %p.", ptr) + -+ struct { -+ atomic_t size; -+ char data[PACKET_SIZE]; -+ } buffer[PACKET_COUNT]; ++#define E_PTR(ptr) E(#ptr " = %p.", ptr) + -+ atomic_t wbi; -+ atomic_t rbi; ++/** 使用 D(), æ‰“å° char 字串. */ ++#define D_STR(p_str) \ ++do { \ ++ if (!p_str) { \ ++ D(#p_str " = NULL."); \ ++ else \ ++ D(#p_str " = '%s'.", p_str); \ ++} while (0) + -+ int numbered; -+ atomic_t autoflush_counter; -+ wait_queue_head_t *ready_read; -+#if MALI_UNIT_TEST -+ atomic_t bytes_generated; -+#endif -+}; ++#define E_STR(p_str) \ ++do { \ ++ if (!p_str) \ ++ E(#p_str " = NULL."); \ ++ else \ ++ E(#p_str " = '%s'.", p_str); \ ++} while (0) + -+/* Types of streams generated by timeline. */ -+enum tl_stream_type { -+ TL_STREAM_TYPE_FIRST, -+ TL_STREAM_TYPE_OBJ_SUMMARY = TL_STREAM_TYPE_FIRST, -+ TL_STREAM_TYPE_OBJ, -+ TL_STREAM_TYPE_AUX, -+#if MALI_USE_CSF -+ TL_STREAM_TYPE_CSFFW, ++#ifdef ENABLE_DEBUG_LOG ++/** ++ * log 从 'p_start' 地å€å¼€å§‹çš„ 'len' 个字节的数æ®. ++ */ ++#define D_MEM(p_start, len) \ ++do { \ ++ int i = 0; \ ++ char *p = (char *)(p_start); \ ++ D("dump memory from addr of '" #p_start "', from %p, length %d' : ", \ ++ (p_start), \ ++ (len)); \ ++ pr_debug("\t\t"); \ ++ for (i = 0; i < (len); i++) \ ++ pr_debug("0x%02x, ", p[i]); \ ++ pr_debug("\n"); \ ++} while (0) ++#else ++#define D_MEM(...) ((void)0) +#endif -+ TL_STREAM_TYPE_COUNT -+}; ++ ++/*-------------------------------------------------------*/ + +/** -+ * kbase_tlstream_init - initialize timeline stream -+ * @stream: Pointer to the stream structure -+ * @stream_type: Stream type -+ * @ready_read: Pointer to a wait queue to signal when -+ * timeline messages are ready for collection. ++ * 在特定æ¡ä»¶ä¸‹, 判定 error å‘生, ++ * å°†å˜é‡ 'ret_var' 设置 'err_code', ++ * log 输出对应的 Error Caution, ++ * ç„¶åŽè·³è½¬ 'label' 指定的代ç å¤„执行. ++ * @param msg ++ * 纯字串形å¼çš„æç¤ºä¿¡æ¯. ++ * @param ret_var ++ * æ ‡è¯†å‡½æ•°æ‰§è¡ŒçŠ¶æ€æˆ–者结果的å˜é‡, ++ * 将被设置具体的 Error Code. ++ * 通常是 'ret' or 'result'. ++ * @param err_code ++ * 表å¾ç‰¹å®š error 的常数标识, ++ * 通常是 å®çš„å½¢æ€. ++ * @param label ++ * 程åºå°†è¦è·³è½¬åˆ°çš„错误处ç†ä»£ç çš„æ ‡å·, ++ * 通常就是 'EXIT'. ++ * @param args... ++ * 对应 'msg_fmt' 实å‚中, ++ * '%s', '%d', ... 等转æ¢è¯´æ˜Žç¬¦çš„具体å¯å˜é•¿å®žå‚. ++ */ ++#define SET_ERROR_AND_JUMP(msg_fmt, ret_var, err_code, label, args...) \ ++do { \ ++ E("To set '" #ret_var "' to %d('" #err_code "'), because : " msg_fmt, \ ++ (err_code), \ ++ ## args); \ ++ (ret_var) = (err_code); \ ++ goto label; \ ++} while (0) ++ ++/* ----------------------------------------------------------------------------- ++ * Types and Structures Definition ++ * ----------------------------------------------------------------------------- ++ */ ++ ++/* ----------------------------------------------------------------------------- ++ * Global Functions' Prototype ++ * ----------------------------------------------------------------------------- ++ */ ++ ++/* ----------------------------------------------------------------------------- ++ * Inline Functions Implementation ++ * ----------------------------------------------------------------------------- ++ */ ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* __CUSTOM_LOG_H__ */ +diff --git a/drivers/gpu/arm/bifrost/platform/rk/mali_kbase_config_platform.h b/drivers/gpu/arm/bifrost/platform/rk/mali_kbase_config_platform.h +new file mode 100644 +index 000000000..ea01d502c +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/platform/rk/mali_kbase_config_platform.h +@@ -0,0 +1,94 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. + */ -+void kbase_tlstream_init(struct kbase_tlstream *stream, -+ enum tl_stream_type stream_type, -+ wait_queue_head_t *ready_read); + +/** -+ * kbase_tlstream_term - terminate timeline stream -+ * @stream: Pointer to the stream structure ++ * @file mali_kbase_config_platform.h ++ * 声明 platform_config_of_rk (platform_rk çš„ platform_config). + */ -+void kbase_tlstream_term(struct kbase_tlstream *stream); + +/** -+ * kbase_tlstream_reset - reset stream -+ * @stream: Pointer to the stream structure ++ * Maximum frequency GPU will be clocked at. ++ * Given in kHz. ++ * This must be specified as there is no default value. + * -+ * Function discards all pending messages and resets packet counters. ++ * Attached value: number in kHz ++ * Default value: NA + */ -+void kbase_tlstream_reset(struct kbase_tlstream *stream); ++#define GPU_FREQ_KHZ_MAX (5000) + +/** -+ * kbase_tlstream_msgbuf_acquire - lock selected stream and reserve a buffer -+ * @stream: Pointer to the stream structure -+ * @msg_size: Message size -+ * @flags: Pointer to store flags passed back on stream release ++ * Minimum frequency GPU will be clocked at. ++ * Given in kHz. ++ * This must be specified as there is no default value. + * -+ * Lock the stream and reserve the number of bytes requested -+ * in msg_size for the user. ++ * Attached value: number in kHz ++ * Default value: NA ++ */ ++#define GPU_FREQ_KHZ_MIN (5000) ++ ++/** ++ * CPU_SPEED_FUNC ++ * - A pointer to a function that calculates the CPU clock + * -+ * Return: pointer to the buffer where a message can be stored ++ * CPU clock speed of the platform is in MHz ++ * - see kbase_cpu_clk_speed_func for the function prototype. + * -+ * Warning: The stream must be released with kbase_tlstream_msgbuf_release(). -+ * Only atomic operations are allowed while the stream is locked -+ * (i.e. do not use any operation that may sleep). ++ * Attached value: A kbase_cpu_clk_speed_func. ++ * Default Value: NA + */ -+char *kbase_tlstream_msgbuf_acquire(struct kbase_tlstream *stream, -+ size_t msg_size, unsigned long *flags) __acquires(&stream->lock); ++#define CPU_SPEED_FUNC (NULL) + +/** -+ * kbase_tlstream_msgbuf_release - unlock selected stream -+ * @stream: Pointer to the stream structure -+ * @flags: Value obtained during stream acquire ++ * GPU_SPEED_FUNC ++ * - A pointer to a function that calculates the GPU clock + * -+ * Release the stream that has been previously -+ * locked with a call to kbase_tlstream_msgbuf_acquire(). ++ * GPU clock speed of the platform in MHz ++ * - see kbase_gpu_clk_speed_func for the function prototype. ++ * ++ * Attached value: A kbase_gpu_clk_speed_func. ++ * Default Value: NA + */ -+void kbase_tlstream_msgbuf_release(struct kbase_tlstream *stream, -+ unsigned long flags) __releases(&stream->lock); ++#define GPU_SPEED_FUNC (NULL) + +/** -+ * kbase_tlstream_flush_stream - flush stream -+ * @stream: Pointer to the stream structure ++ * Power management configuration + * -+ * Flush pending data in the timeline stream. ++ * Attached value: ++ * pointer to @ref kbase_pm_callback_conf ++ * Default value: ++ * See @ref kbase_pm_callback_conf ++ */ ++#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) ++extern struct kbase_pm_callback_conf pm_callbacks; ++ ++/** ++ * Platform specific configuration functions + * -+ * Return: Number of bytes available flushed and available to be read ++ * Attached value: ++ * pointer to @ref kbase_platform_funcs_conf ++ * Default value: ++ * See @ref kbase_platform_funcs_conf ++ */ ++#define PLATFORM_FUNCS (&platform_funcs) ++extern struct kbase_platform_funcs_conf platform_funcs; ++ ++/** ++ * Secure mode switch + * ++ * Attached value: pointer to @ref kbase_secure_ops + */ -+size_t kbase_tlstream_flush_stream(struct kbase_tlstream *stream); ++#define SECURE_CALLBACKS (NULL) + -+#endif /* _KBASE_TLSTREAM_H */ -diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c ++#define CLK_RATE_TRACE_OPS (&clk_rate_trace_ops) ++extern struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops; ++ ++int kbase_platform_rk_init_opp_table(struct kbase_device *kbdev); ++void kbase_platform_rk_uninit_opp_table(struct kbase_device *kbdev); ++int kbase_platform_rk_enable_regulator(struct kbase_device *kbdev); +diff --git a/drivers/gpu/arm/bifrost/platform/rk/mali_kbase_config_rk.c b/drivers/gpu/arm/bifrost/platform/rk/mali_kbase_config_rk.c new file mode 100644 -index 000000000..f62c75583 +index 000000000..3ac4aef79 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c -@@ -0,0 +1,4223 @@ -+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++++ b/drivers/gpu/arm/bifrost/platform/rk/mali_kbase_config_rk.c +@@ -0,0 +1,724 @@ +/* -+ * -+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT RockChip Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU license. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ * ++ * of such GNU licence. + */ + -+/* -+ * THIS FILE IS AUTOGENERATED BY generate_tracepoints.py. -+ * DO NOT EDIT. ++/* #define ENABLE_DEBUG_LOG */ ++#include "custom_log.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#if MALI_USE_CSF ++#include ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "mali_kbase_config_platform.h" ++#include "mali_kbase_rk.h" ++ ++#define POWER_DOWN_FREQ 200000000 ++ ++/** ++ * @file mali_kbase_config_rk.c ++ * 对 platform_config_of_rk 的具体实现. ++ * ++ * mali_device_driver 包å«ä¸¤éƒ¨åˆ† : ++ * .DP : platform_dependent_part_in_mdd : ++ * ä¾èµ– platform 部分, ++ * æºç åœ¨ /platform// ++ * 在 mali_device_driver 内部, ++ * 记为 platform_dependent_part, ++ * 也被记为 platform_specific_code. ++ * .DP : common_parts_in_mdd : ++ * arm 实现的通用的部分, ++ * æºç åœ¨ / 下. ++ * 在 mali_device_driver 内部, 记为 common_parts. + */ + -+#include "mali_kbase_tracepoints.h" -+#include "mali_kbase_tlstream.h" -+#include "mali_kbase_tl_serialize.h" ++/*---------------------------------------------------------------------------*/ ++#ifndef CONFIG_MALI_BIFROST_DEVFREQ ++static inline void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, ++ struct kbasep_pm_metrics *last, ++ struct kbasep_pm_metrics *diff) ++{ ++} ++#endif + -+/* clang-format off */ ++#ifdef CONFIG_REGULATOR ++static int rk_pm_enable_regulator(struct kbase_device *kbdev); ++static void rk_pm_disable_regulator(struct kbase_device *kbdev); ++#else ++static inline int rk_pm_enable_regulator(struct kbase_device *kbdev) ++{ ++ return 0; ++} + -+/* Message ids of trace events that are recorded in the obj stream. */ -+enum tl_msg_id_obj { -+ KBASE_TL_NEW_CTX, -+ KBASE_TL_NEW_GPU, -+ KBASE_TL_NEW_LPU, -+ KBASE_TL_NEW_ATOM, -+ KBASE_TL_NEW_AS, -+ KBASE_TL_DEL_CTX, -+ KBASE_TL_DEL_ATOM, -+ KBASE_TL_LIFELINK_LPU_GPU, -+ KBASE_TL_LIFELINK_AS_GPU, -+ KBASE_TL_RET_CTX_LPU, -+ KBASE_TL_RET_ATOM_CTX, -+ KBASE_TL_RET_ATOM_LPU, -+ KBASE_TL_NRET_CTX_LPU, -+ KBASE_TL_NRET_ATOM_CTX, -+ KBASE_TL_NRET_ATOM_LPU, -+ KBASE_TL_RET_AS_CTX, -+ KBASE_TL_NRET_AS_CTX, -+ KBASE_TL_RET_ATOM_AS, -+ KBASE_TL_NRET_ATOM_AS, -+ KBASE_TL_ATTRIB_ATOM_CONFIG, -+ KBASE_TL_JIT_USEDPAGES, -+ KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, -+ KBASE_TL_ATTRIB_ATOM_JITFREEINFO, -+ KBASE_TL_ATTRIB_AS_CONFIG, -+ KBASE_TL_EVENT_LPU_SOFTSTOP, -+ KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, -+ KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, -+ KBASE_TL_EVENT_ATOM_SOFTJOB_START, -+ KBASE_TL_EVENT_ATOM_SOFTJOB_END, -+ KBASE_TL_ARBITER_GRANTED, -+ KBASE_TL_ARBITER_STARTED, -+ KBASE_TL_ARBITER_STOP_REQUESTED, -+ KBASE_TL_ARBITER_STOPPED, -+ KBASE_TL_ARBITER_REQUESTED, -+ KBASE_JD_GPU_SOFT_RESET, -+ KBASE_JD_TILER_HEAP_CHUNK_ALLOC, -+ KBASE_TL_JS_SCHED_START, -+ KBASE_TL_JS_SCHED_END, -+ KBASE_TL_JD_SUBMIT_ATOM_START, -+ KBASE_TL_JD_SUBMIT_ATOM_END, -+ KBASE_TL_JD_DONE_NO_LOCK_START, -+ KBASE_TL_JD_DONE_NO_LOCK_END, -+ KBASE_TL_JD_DONE_START, -+ KBASE_TL_JD_DONE_END, -+ KBASE_TL_JD_ATOM_COMPLETE, -+ KBASE_TL_RUN_ATOM_START, -+ KBASE_TL_RUN_ATOM_END, -+ KBASE_TL_ATTRIB_ATOM_PRIORITY, -+ KBASE_TL_ATTRIB_ATOM_STATE, -+ KBASE_TL_ATTRIB_ATOM_PRIORITIZED, -+ KBASE_TL_ATTRIB_ATOM_JIT, -+ KBASE_TL_KBASE_NEW_DEVICE, -+ KBASE_TL_KBASE_GPUCMDQUEUE_KICK, -+ KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, -+ KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, -+ KBASE_TL_KBASE_DEVICE_HALTING_CSG, -+ KBASE_TL_KBASE_DEVICE_SUSPEND_CSG, -+ KBASE_TL_KBASE_DEVICE_CSG_IDLE, -+ KBASE_TL_KBASE_NEW_CTX, -+ KBASE_TL_KBASE_DEL_CTX, -+ KBASE_TL_KBASE_CTX_ASSIGN_AS, -+ KBASE_TL_KBASE_CTX_UNASSIGN_AS, -+ KBASE_TL_KBASE_NEW_KCPUQUEUE, -+ KBASE_TL_KBASE_DEL_KCPUQUEUE, -+ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, -+ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT, -+ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT, -+ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET, -+ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION, -+ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION, -+ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, -+ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, -+ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, -+ KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, -+ KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, -+ KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, -+ KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE, -+ KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE, -+ KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE, -+ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER, -+ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, -+ KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, -+ KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, -+ KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START, -+ KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, -+ KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, -+ KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START, -+ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END, -+ KBASE_TL_KBASE_CSFFW_FW_RELOADING, -+ KBASE_TL_KBASE_CSFFW_FW_ENABLING, -+ KBASE_TL_KBASE_CSFFW_FW_REQUEST_SLEEP, -+ KBASE_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP, -+ KBASE_TL_KBASE_CSFFW_FW_REQUEST_HALT, -+ KBASE_TL_KBASE_CSFFW_FW_DISABLING, -+ KBASE_TL_KBASE_CSFFW_FW_OFF, -+ KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, -+ KBASE_OBJ_MSG_COUNT, ++static inline void rk_pm_disable_regulator(struct kbase_device *kbdev) ++{ ++} ++#endif ++ ++static int rk_pm_enable_clk(struct kbase_device *kbdev); ++ ++static void rk_pm_disable_clk(struct kbase_device *kbdev); ++ ++static int kbase_platform_rk_create_sysfs_files(struct device *dev); ++ ++static void kbase_platform_rk_remove_sysfs_files(struct device *dev); ++ ++/*---------------------------------------------------------------------------*/ ++ ++static void rk_pm_power_off_delay_work(struct work_struct *work) ++{ ++ struct rk_context *platform = ++ container_of(to_delayed_work(work), struct rk_context, work); ++ struct kbase_device *kbdev = platform->kbdev; ++ struct rockchip_opp_info *opp_info = &kbdev->opp_info; ++ ++ mutex_lock(&platform->lock); ++ ++ if (!platform->is_powered) { ++ D("mali_dev is already powered off."); ++ mutex_unlock(&platform->lock); ++ return; ++ } ++ ++ rockchip_opp_dvfs_lock(opp_info); ++ if (pm_runtime_enabled(kbdev->dev)) { ++ D("to put_sync_suspend mali_dev."); ++ pm_runtime_put_sync_suspend(kbdev->dev); ++ } ++ rockchip_opp_dvfs_unlock(opp_info); ++ ++ rk_pm_disable_clk(kbdev); ++ ++ if (pm_runtime_suspended(kbdev->dev)) { ++ rk_pm_disable_regulator(kbdev); ++ platform->is_regulator_on = false; ++ } ++ ++ platform->is_powered = false; ++ wake_unlock(&platform->wake_lock); ++ ++ mutex_unlock(&platform->lock); ++} ++ ++static int kbase_platform_rk_init(struct kbase_device *kbdev) ++{ ++ int ret = 0; ++ struct rk_context *platform; ++ ++ platform = kzalloc(sizeof(*platform), GFP_KERNEL); ++ if (!platform) { ++ E("err."); ++ return -ENOMEM; ++ } ++ ++ platform->is_powered = false; ++ platform->kbdev = kbdev; ++ ++ platform->delay_ms = 200; ++ if (of_property_read_u32(kbdev->dev->of_node, "power-off-delay-ms", ++ &platform->delay_ms)) ++ W("power-off-delay-ms not available."); ++ ++ platform->power_off_wq = create_freezable_workqueue("gpu_power_off_wq"); ++ if (!platform->power_off_wq) { ++ E("couldn't create workqueue"); ++ ret = -ENOMEM; ++ goto err_wq; ++ } ++ INIT_DEFERRABLE_WORK(&platform->work, rk_pm_power_off_delay_work); ++ ++ wake_lock_init(&platform->wake_lock, WAKE_LOCK_SUSPEND, "gpu"); ++ ++ platform->utilisation_period = DEFAULT_UTILISATION_PERIOD_IN_MS; ++ ++ ret = kbase_platform_rk_create_sysfs_files(kbdev->dev); ++ if (ret) { ++ E("fail to create sysfs_files. ret = %d.", ret); ++ goto err_sysfs_files; ++ } ++ ++ kbdev->platform_context = (void *)platform; ++ pm_runtime_enable(kbdev->dev); ++ ++ mutex_init(&platform->lock); ++ ++ return 0; ++ ++err_sysfs_files: ++ wake_lock_destroy(&platform->wake_lock); ++ destroy_workqueue(platform->power_off_wq); ++err_wq: ++ return ret; ++} ++ ++static void kbase_platform_rk_term(struct kbase_device *kbdev) ++{ ++ struct rk_context *platform = ++ (struct rk_context *)kbdev->platform_context; ++ ++ pm_runtime_disable(kbdev->dev); ++ kbdev->platform_context = NULL; ++ ++ if (platform) { ++ cancel_delayed_work_sync(&platform->work); ++ wake_lock_destroy(&platform->wake_lock); ++ destroy_workqueue(platform->power_off_wq); ++ platform->is_powered = false; ++ platform->kbdev = NULL; ++ kfree(platform); ++ } ++ kbase_platform_rk_remove_sysfs_files(kbdev->dev); ++} ++ ++struct kbase_platform_funcs_conf platform_funcs = { ++ .platform_init_func = &kbase_platform_rk_init, ++ .platform_term_func = &kbase_platform_rk_term, +}; + -+#define OBJ_TP_LIST \ -+ TRACEPOINT_DESC(KBASE_TL_NEW_CTX, \ -+ "object ctx is created", \ -+ "@pII", \ -+ "ctx,ctx_nr,tgid") \ -+ TRACEPOINT_DESC(KBASE_TL_NEW_GPU, \ -+ "object gpu is created", \ -+ "@pII", \ -+ "gpu,gpu_id,core_count") \ -+ TRACEPOINT_DESC(KBASE_TL_NEW_LPU, \ -+ "object lpu is created", \ -+ "@pII", \ -+ "lpu,lpu_nr,lpu_fn") \ -+ TRACEPOINT_DESC(KBASE_TL_NEW_ATOM, \ -+ "object atom is created", \ -+ "@pI", \ -+ "atom,atom_nr") \ -+ TRACEPOINT_DESC(KBASE_TL_NEW_AS, \ -+ "address space object is created", \ -+ "@pI", \ -+ "address_space,as_nr") \ -+ TRACEPOINT_DESC(KBASE_TL_DEL_CTX, \ -+ "context is destroyed", \ -+ "@p", \ -+ "ctx") \ -+ TRACEPOINT_DESC(KBASE_TL_DEL_ATOM, \ -+ "atom is destroyed", \ -+ "@p", \ -+ "atom") \ -+ TRACEPOINT_DESC(KBASE_TL_LIFELINK_LPU_GPU, \ -+ "lpu is deleted with gpu", \ -+ "@pp", \ -+ "lpu,gpu") \ -+ TRACEPOINT_DESC(KBASE_TL_LIFELINK_AS_GPU, \ -+ "address space is deleted with gpu", \ -+ "@pp", \ -+ "address_space,gpu") \ -+ TRACEPOINT_DESC(KBASE_TL_RET_CTX_LPU, \ -+ "context is retained by lpu", \ -+ "@pp", \ -+ "ctx,lpu") \ -+ TRACEPOINT_DESC(KBASE_TL_RET_ATOM_CTX, \ -+ "atom is retained by context", \ -+ "@pp", \ -+ "atom,ctx") \ -+ TRACEPOINT_DESC(KBASE_TL_RET_ATOM_LPU, \ -+ "atom is retained by lpu", \ -+ "@pps", \ -+ "atom,lpu,attrib_match_list") \ -+ TRACEPOINT_DESC(KBASE_TL_NRET_CTX_LPU, \ -+ "context is released by lpu", \ -+ "@pp", \ -+ "ctx,lpu") \ -+ TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_CTX, \ -+ "atom is released by context", \ -+ "@pp", \ -+ "atom,ctx") \ -+ TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_LPU, \ -+ "atom is released by lpu", \ -+ "@pp", \ -+ "atom,lpu") \ -+ TRACEPOINT_DESC(KBASE_TL_RET_AS_CTX, \ -+ "address space is retained by context", \ -+ "@pp", \ -+ "address_space,ctx") \ -+ TRACEPOINT_DESC(KBASE_TL_NRET_AS_CTX, \ -+ "address space is released by context", \ -+ "@pp", \ -+ "address_space,ctx") \ -+ TRACEPOINT_DESC(KBASE_TL_RET_ATOM_AS, \ -+ "atom is retained by address space", \ -+ "@pp", \ -+ "atom,address_space") \ -+ TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_AS, \ -+ "atom is released by address space", \ -+ "@pp", \ -+ "atom,address_space") \ -+ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_CONFIG, \ -+ "atom job slot attributes", \ -+ "@pLLI", \ -+ "atom,descriptor,affinity,config") \ -+ TRACEPOINT_DESC(KBASE_TL_JIT_USEDPAGES, \ -+ "used pages for jit", \ -+ "@LI", \ -+ "used_pages,j_id") \ -+ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, \ -+ "Information about JIT allocations", \ -+ "@pLLLIIIII", \ -+ "atom,va_pgs,com_pgs,extent,j_id,bin_id,max_allocs,jit_flags,usg_id") \ -+ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JITFREEINFO, \ -+ "Information about JIT frees", \ -+ "@pI", \ -+ "atom,j_id") \ -+ TRACEPOINT_DESC(KBASE_TL_ATTRIB_AS_CONFIG, \ -+ "address space attributes", \ -+ "@pLLL", \ -+ "address_space,transtab,memattr,transcfg") \ -+ TRACEPOINT_DESC(KBASE_TL_EVENT_LPU_SOFTSTOP, \ -+ "softstop event on given lpu", \ -+ "@p", \ -+ "lpu") \ -+ TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, \ -+ "atom softstopped", \ -+ "@p", \ -+ "atom") \ -+ TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, \ -+ "atom softstop issued", \ -+ "@p", \ -+ "atom") \ -+ TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_START, \ -+ "atom soft job has started", \ -+ "@p", \ -+ "atom") \ -+ TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_END, \ -+ "atom soft job has completed", \ -+ "@p", \ -+ "atom") \ -+ TRACEPOINT_DESC(KBASE_TL_ARBITER_GRANTED, \ -+ "Arbiter has granted gpu access", \ -+ "@p", \ -+ "gpu") \ -+ TRACEPOINT_DESC(KBASE_TL_ARBITER_STARTED, \ -+ "Driver is running again and able to process jobs", \ -+ "@p", \ -+ "gpu") \ -+ TRACEPOINT_DESC(KBASE_TL_ARBITER_STOP_REQUESTED, \ -+ "Arbiter has requested driver to stop using gpu", \ -+ "@p", \ -+ "gpu") \ -+ TRACEPOINT_DESC(KBASE_TL_ARBITER_STOPPED, \ -+ "Driver has stopped using gpu", \ -+ "@p", \ -+ "gpu") \ -+ TRACEPOINT_DESC(KBASE_TL_ARBITER_REQUESTED, \ -+ "Driver has requested the arbiter for gpu access", \ -+ "@p", \ -+ "gpu") \ -+ TRACEPOINT_DESC(KBASE_JD_GPU_SOFT_RESET, \ -+ "gpu soft reset", \ -+ "@p", \ -+ "gpu") \ -+ TRACEPOINT_DESC(KBASE_JD_TILER_HEAP_CHUNK_ALLOC, \ -+ "Tiler Heap Chunk Allocation", \ -+ "@ILL", \ -+ "ctx_nr,heap_id,chunk_va") \ -+ TRACEPOINT_DESC(KBASE_TL_JS_SCHED_START, \ -+ "Scheduling starts", \ -+ "@I", \ -+ "dummy") \ -+ TRACEPOINT_DESC(KBASE_TL_JS_SCHED_END, \ -+ "Scheduling ends", \ -+ "@I", \ -+ "dummy") \ -+ TRACEPOINT_DESC(KBASE_TL_JD_SUBMIT_ATOM_START, \ -+ "Submitting an atom starts", \ -+ "@p", \ -+ "atom") \ -+ TRACEPOINT_DESC(KBASE_TL_JD_SUBMIT_ATOM_END, \ -+ "Submitting an atom ends", \ -+ "@p", \ -+ "atom") \ -+ TRACEPOINT_DESC(KBASE_TL_JD_DONE_NO_LOCK_START, \ -+ "Within function kbase_jd_done_nolock", \ -+ "@p", \ -+ "atom") \ -+ TRACEPOINT_DESC(KBASE_TL_JD_DONE_NO_LOCK_END, \ -+ "Within function kbase_jd_done_nolock - end", \ -+ "@p", \ -+ "atom") \ -+ TRACEPOINT_DESC(KBASE_TL_JD_DONE_START, \ -+ "Start of kbase_jd_done", \ -+ "@p", \ -+ "atom") \ -+ TRACEPOINT_DESC(KBASE_TL_JD_DONE_END, \ -+ "End of kbase_jd_done", \ -+ "@p", \ -+ "atom") \ -+ TRACEPOINT_DESC(KBASE_TL_JD_ATOM_COMPLETE, \ -+ "Atom marked complete", \ -+ "@p", \ -+ "atom") \ -+ TRACEPOINT_DESC(KBASE_TL_RUN_ATOM_START, \ -+ "Running of atom starts", \ -+ "@pI", \ -+ "atom,atom_nr") \ -+ TRACEPOINT_DESC(KBASE_TL_RUN_ATOM_END, \ -+ "Running of atom ends", \ -+ "@pI", \ -+ "atom,atom_nr") \ -+ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITY, \ -+ "atom priority", \ -+ "@pI", \ -+ "atom,prio") \ -+ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_STATE, \ -+ "atom state", \ -+ "@pI", \ -+ "atom,state") \ -+ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITIZED, \ -+ "atom caused priority change", \ -+ "@p", \ -+ "atom") \ -+ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JIT, \ -+ "jit done for atom", \ -+ "@pLLILILLL", \ -+ "atom,edit_addr,new_addr,jit_flags,mem_flags,j_id,com_pgs,extent,va_pgs") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_DEVICE, \ -+ "New KBase Device", \ -+ "@IIIIIII", \ -+ "kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs,kbase_device_as_count,kbase_device_sb_entry_count,kbase_device_has_cross_stream_sync,kbase_device_supports_gpu_sleep") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_GPUCMDQUEUE_KICK, \ -+ "Kernel receives a request to process new GPU queue instructions", \ -+ "@IL", \ -+ "kernel_ctx_id,buffer_gpu_addr") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \ -+ "CSG is programmed to a slot", \ -+ "@IIIII", \ -+ "kbase_device_id,kernel_ctx_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index,kbase_device_csg_slot_resuming") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, \ -+ "CSG is deprogrammed from a slot", \ -+ "@II", \ -+ "kbase_device_id,kbase_device_csg_slot_index") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_HALTING_CSG, \ -+ "CSG is halting", \ -+ "@III", \ -+ "kbase_device_id,kbase_device_csg_slot_index,kbase_device_csg_slot_suspending") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_SUSPEND_CSG, \ -+ "CSG is suspended", \ -+ "@II", \ -+ "kbase_device_id,kbase_device_csg_slot_index") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_CSG_IDLE, \ -+ "KBase device is notified that CSG is idle.", \ -+ "@II", \ -+ "kbase_device_id,kbase_device_csg_slot_index") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_CTX, \ -+ "New KBase Context", \ -+ "@II", \ -+ "kernel_ctx_id,kbase_device_id") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_DEL_CTX, \ -+ "Delete KBase Context", \ -+ "@I", \ -+ "kernel_ctx_id") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_CTX_ASSIGN_AS, \ -+ "Address Space is assigned to a KBase context", \ -+ "@II", \ -+ "kernel_ctx_id,kbase_device_as_index") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_CTX_UNASSIGN_AS, \ -+ "Address Space is unassigned from a KBase context", \ -+ "@I", \ -+ "kernel_ctx_id") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_KCPUQUEUE, \ -+ "New KCPU Queue", \ -+ "@pIII", \ -+ "kcpu_queue,kcpu_queue_id,kernel_ctx_id,kcpuq_num_pending_cmds") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_DEL_KCPUQUEUE, \ -+ "Delete KCPU Queue", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, \ -+ "KCPU Queue enqueues Signal on Fence", \ -+ "@pp", \ -+ "kcpu_queue,fence") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT, \ -+ "KCPU Queue enqueues Wait on Fence", \ -+ "@pp", \ -+ "kcpu_queue,fence") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ -+ "KCPU Queue enqueues Wait on Cross Queue Sync Object", \ -+ "@pLII", \ -+ "kcpu_queue,cqs_obj_gpu_addr,compare_value,inherit_error") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET, \ -+ "KCPU Queue enqueues Set on Cross Queue Sync Object", \ -+ "@pL", \ -+ "kcpu_queue,cqs_obj_gpu_addr") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION, \ -+ "KCPU Queue enqueues Wait Operation on Cross Queue Sync Object", \ -+ "@pLLIII", \ -+ "kcpu_queue,cqs_obj_gpu_addr,compare_value,condition,data_type,inherit_error") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION, \ -+ "KCPU Queue enqueues Set Operation on Cross Queue Sync Object", \ -+ "@pLLII", \ -+ "kcpu_queue,cqs_obj_gpu_addr,value,operation,data_type") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, \ -+ "KCPU Queue enqueues Map Import", \ -+ "@pL", \ -+ "kcpu_queue,map_import_buf_gpu_addr") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, \ -+ "KCPU Queue enqueues Unmap Import", \ -+ "@pL", \ -+ "kcpu_queue,map_import_buf_gpu_addr") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, \ -+ "KCPU Queue enqueues Unmap Import ignoring reference count", \ -+ "@pL", \ -+ "kcpu_queue,map_import_buf_gpu_addr") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ -+ "Begin array of KCPU Queue enqueues JIT Alloc", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ -+ "Array item of KCPU Queue enqueues JIT Alloc", \ -+ "@pLLLLIIIII", \ -+ "kcpu_queue,jit_alloc_gpu_alloc_addr_dest,jit_alloc_va_pages,jit_alloc_commit_pages,jit_alloc_extent,jit_alloc_jit_id,jit_alloc_bin_id,jit_alloc_max_allocations,jit_alloc_flags,jit_alloc_usage_id") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ -+ "End array of KCPU Queue enqueues JIT Alloc", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE, \ -+ "Begin array of KCPU Queue enqueues JIT Free", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE, \ -+ "Array item of KCPU Queue enqueues JIT Free", \ -+ "@pI", \ -+ "kcpu_queue,jit_alloc_jit_id") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE, \ -+ "End array of KCPU Queue enqueues JIT Free", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER, \ -+ "KCPU Queue enqueues Error Barrier", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND, \ -+ "KCPU Queue enqueues Group Suspend", \ -+ "@ppI", \ -+ "kcpu_queue,group_suspend_buf,gpu_cmdq_grp_handle") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, \ -+ "KCPU Queue starts a Signal on Fence", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, \ -+ "KCPU Queue ends a Signal on Fence", \ -+ "@pI", \ -+ "kcpu_queue,execute_error") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, \ -+ "KCPU Queue starts a Wait on Fence", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, \ -+ "KCPU Queue ends a Wait on Fence", \ -+ "@pI", \ -+ "kcpu_queue,execute_error") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, \ -+ "KCPU Queue starts a Wait on Cross Queue Sync Object", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, \ -+ "KCPU Queue ends a Wait on Cross Queue Sync Object", \ -+ "@pI", \ -+ "kcpu_queue,execute_error") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, \ -+ "KCPU Queue executes a Set on Cross Queue Sync Object", \ -+ "@pI", \ -+ "kcpu_queue,execute_error") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START, \ -+ "KCPU Queue starts a Wait Operation on Cross Queue Sync Object", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END, \ -+ "KCPU Queue ends a Wait Operation on Cross Queue Sync Object", \ -+ "@pI", \ -+ "kcpu_queue,execute_error") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION, \ -+ "KCPU Queue executes a Set Operation on Cross Queue Sync Object", \ -+ "@pI", \ -+ "kcpu_queue,execute_error") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \ -+ "KCPU Queue starts a Map Import", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, \ -+ "KCPU Queue ends a Map Import", \ -+ "@pI", \ -+ "kcpu_queue,execute_error") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, \ -+ "KCPU Queue starts an Unmap Import", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, \ -+ "KCPU Queue ends an Unmap Import", \ -+ "@pI", \ -+ "kcpu_queue,execute_error") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, \ -+ "KCPU Queue starts an Unmap Import ignoring reference count", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, \ -+ "KCPU Queue ends an Unmap Import ignoring reference count", \ -+ "@pI", \ -+ "kcpu_queue,execute_error") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, \ -+ "KCPU Queue starts an array of JIT Allocs", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ -+ "Begin array of KCPU Queue ends an array of JIT Allocs", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ -+ "Array item of KCPU Queue ends an array of JIT Allocs", \ -+ "@pILL", \ -+ "kcpu_queue,execute_error,jit_alloc_gpu_alloc_addr,jit_alloc_mmu_flags") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ -+ "End array of KCPU Queue ends an array of JIT Allocs", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START, \ -+ "KCPU Queue starts an array of JIT Frees", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ -+ "Begin array of KCPU Queue ends an array of JIT Frees", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ -+ "Array item of KCPU Queue ends an array of JIT Frees", \ -+ "@pIL", \ -+ "kcpu_queue,execute_error,jit_free_pages_used") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ -+ "End array of KCPU Queue ends an array of JIT Frees", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER, \ -+ "KCPU Queue executes an Error Barrier", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START, \ -+ "KCPU Queue starts a group suspend", \ -+ "@p", \ -+ "kcpu_queue") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END, \ -+ "KCPU Queue ends a group suspend", \ -+ "@pI", \ -+ "kcpu_queue,execute_error") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_RELOADING, \ -+ "CSF FW is being reloaded", \ -+ "@L", \ -+ "csffw_cycle") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_ENABLING, \ -+ "CSF FW is being enabled", \ -+ "@L", \ -+ "csffw_cycle") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_REQUEST_SLEEP, \ -+ "CSF FW sleep is requested", \ -+ "@L", \ -+ "csffw_cycle") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP, \ -+ "CSF FW wake up is requested", \ -+ "@L", \ -+ "csffw_cycle") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_REQUEST_HALT, \ -+ "CSF FW halt is requested", \ -+ "@L", \ -+ "csffw_cycle") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_DISABLING, \ -+ "CSF FW is being disabled", \ -+ "@L", \ -+ "csffw_cycle") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_OFF, \ -+ "CSF FW is off", \ -+ "@L", \ -+ "csffw_cycle") \ -+ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, \ -+ "An overflow has happened with the CSFFW Timeline stream", \ -+ "@LL", \ -+ "csffw_timestamp,csffw_cycle") -+ -+#define MIPE_HEADER_BLOB_VAR_NAME __obj_desc_header -+#define MIPE_HEADER_STREAM_ID TL_STREAM_ID_KERNEL -+#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_OBJ -+#define MIPE_HEADER_TRACEPOINT_LIST OBJ_TP_LIST -+#define MIPE_HEADER_TRACEPOINT_LIST_SIZE KBASE_OBJ_MSG_COUNT -+ -+#include "mali_kbase_mipe_gen_header.h" -+ -+const char *obj_desc_header = (const char *) &__obj_desc_header; -+const size_t obj_desc_header_size = sizeof(__obj_desc_header); -+ -+/* Message ids of trace events that are recorded in the aux stream. */ -+enum tl_msg_id_aux { -+ KBASE_AUX_PM_STATE, -+ KBASE_AUX_PAGEFAULT, -+ KBASE_AUX_PAGESALLOC, -+ KBASE_AUX_DEVFREQ_TARGET, -+ KBASE_AUX_JIT_STATS, -+ KBASE_AUX_TILER_HEAP_STATS, -+ KBASE_AUX_EVENT_JOB_SLOT, -+ KBASE_AUX_PROTECTED_ENTER_START, -+ KBASE_AUX_PROTECTED_ENTER_END, -+ KBASE_AUX_MMU_COMMAND, -+ KBASE_AUX_PROTECTED_LEAVE_START, -+ KBASE_AUX_PROTECTED_LEAVE_END, -+ KBASE_AUX_MSG_COUNT, -+}; -+ -+#define AUX_TP_LIST \ -+ TRACEPOINT_DESC(KBASE_AUX_PM_STATE, \ -+ "PM state", \ -+ "@IL", \ -+ "core_type,core_state_bitset") \ -+ TRACEPOINT_DESC(KBASE_AUX_PAGEFAULT, \ -+ "Page fault", \ -+ "@IIL", \ -+ "ctx_nr,as_nr,page_cnt_change") \ -+ TRACEPOINT_DESC(KBASE_AUX_PAGESALLOC, \ -+ "Total alloc pages change", \ -+ "@IL", \ -+ "ctx_nr,page_cnt") \ -+ TRACEPOINT_DESC(KBASE_AUX_DEVFREQ_TARGET, \ -+ "New device frequency target", \ -+ "@L", \ -+ "target_freq") \ -+ TRACEPOINT_DESC(KBASE_AUX_JIT_STATS, \ -+ "per-bin JIT statistics", \ -+ "@IIIIII", \ -+ "ctx_nr,bid,max_allocs,allocs,va_pages,ph_pages") \ -+ TRACEPOINT_DESC(KBASE_AUX_TILER_HEAP_STATS, \ -+ "Tiler Heap statistics", \ -+ "@ILIIIIIII", \ -+ "ctx_nr,heap_id,va_pages,ph_pages,max_chunks,chunk_size,chunk_count,target_in_flight,nr_in_flight") \ -+ TRACEPOINT_DESC(KBASE_AUX_EVENT_JOB_SLOT, \ -+ "event on a given job slot", \ -+ "@pIII", \ -+ "ctx,slot_nr,atom_nr,event") \ -+ TRACEPOINT_DESC(KBASE_AUX_PROTECTED_ENTER_START, \ -+ "enter protected mode start", \ -+ "@p", \ -+ "gpu") \ -+ TRACEPOINT_DESC(KBASE_AUX_PROTECTED_ENTER_END, \ -+ "enter protected mode end", \ -+ "@p", \ -+ "gpu") \ -+ TRACEPOINT_DESC(KBASE_AUX_MMU_COMMAND, \ -+ "mmu commands with synchronicity info", \ -+ "@IIILI", \ -+ "kernel_ctx_id,mmu_cmd_id,mmu_synchronicity,mmu_lock_addr,mmu_lock_page_num") \ -+ TRACEPOINT_DESC(KBASE_AUX_PROTECTED_LEAVE_START, \ -+ "leave protected mode start", \ -+ "@p", \ -+ "gpu") \ -+ TRACEPOINT_DESC(KBASE_AUX_PROTECTED_LEAVE_END, \ -+ "leave protected mode end", \ -+ "@p", \ -+ "gpu") -+ -+#define MIPE_HEADER_BLOB_VAR_NAME __aux_desc_header -+#define MIPE_HEADER_STREAM_ID TL_STREAM_ID_KERNEL -+#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_AUX -+#define MIPE_HEADER_TRACEPOINT_LIST AUX_TP_LIST -+#define MIPE_HEADER_TRACEPOINT_LIST_SIZE KBASE_AUX_MSG_COUNT -+ -+#include "mali_kbase_mipe_gen_header.h" -+ -+const char *aux_desc_header = (const char *) &__aux_desc_header; -+const size_t aux_desc_header_size = sizeof(__aux_desc_header); ++/*---------------------------------------------------------------------------*/ + -+void __kbase_tlstream_tl_new_ctx( -+ struct kbase_tlstream *stream, -+ const void *ctx, -+ u32 ctx_nr, -+ u32 tgid -+) ++static int rk_pm_callback_runtime_on(struct kbase_device *kbdev) +{ -+ const u32 msg_id = KBASE_TL_NEW_CTX; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(ctx) -+ + sizeof(ctx_nr) -+ + sizeof(tgid) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ struct rockchip_opp_info *opp_info = &kbdev->opp_info; ++ int ret = 0; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ if (!kbdev->current_nominal_freq) ++ return 0; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &ctx, sizeof(ctx)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &ctx_nr, sizeof(ctx_nr)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &tgid, sizeof(tgid)); ++ ret = clk_bulk_prepare_enable(opp_info->nclocks, opp_info->clocks); ++ if (ret) { ++ dev_err(kbdev->dev, "failed to enable opp clks\n"); ++ return ret; ++ } ++ if (opp_info->data && opp_info->data->set_read_margin) ++ opp_info->data->set_read_margin(kbdev->dev, opp_info, ++ opp_info->target_rm); ++ if (opp_info->is_scmi_clk) { ++ if (clk_set_rate(opp_info->clk, kbdev->current_nominal_freq)) ++ dev_err(kbdev->dev, "failed to restore clk rate\n"); ++ } ++ clk_bulk_disable_unprepare(opp_info->nclocks, opp_info->clocks); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return 0; +} + -+void __kbase_tlstream_tl_new_gpu( -+ struct kbase_tlstream *stream, -+ const void *gpu, -+ u32 gpu_id, -+ u32 core_count -+) ++static void rk_pm_callback_runtime_off(struct kbase_device *kbdev) +{ -+ const u32 msg_id = KBASE_TL_NEW_GPU; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(gpu) -+ + sizeof(gpu_id) -+ + sizeof(core_count) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &gpu, sizeof(gpu)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &gpu_id, sizeof(gpu_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &core_count, sizeof(core_count)); ++ struct rockchip_opp_info *opp_info = &kbdev->opp_info; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ if (opp_info->is_scmi_clk) { ++ if (clk_set_rate(opp_info->clk, POWER_DOWN_FREQ)) ++ dev_err(kbdev->dev, "failed to set power down rate\n"); ++ } ++ opp_info->current_rm = UINT_MAX; +} + -+void __kbase_tlstream_tl_new_lpu( -+ struct kbase_tlstream *stream, -+ const void *lpu, -+ u32 lpu_nr, -+ u32 lpu_fn -+) ++static int rk_pm_callback_power_on(struct kbase_device *kbdev) +{ -+ const u32 msg_id = KBASE_TL_NEW_LPU; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(lpu) -+ + sizeof(lpu_nr) -+ + sizeof(lpu_fn) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &lpu, sizeof(lpu)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &lpu_nr, sizeof(lpu_nr)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &lpu_fn, sizeof(lpu_fn)); -+ -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++ int ret = 1; /* Assume GPU has been powered off */ ++ int err = 0; ++ struct rk_context *platform = get_rk_context(kbdev); ++ struct rockchip_opp_info *opp_info = &kbdev->opp_info; + -+void __kbase_tlstream_tl_new_atom( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ u32 atom_nr -+) -+{ -+ const u32 msg_id = KBASE_TL_NEW_ATOM; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ + sizeof(atom_nr) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ cancel_delayed_work_sync(&platform->work); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ mutex_lock(&platform->lock); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom_nr, sizeof(atom_nr)); ++ if (platform->is_powered) { ++ D("mali_device is already powered."); ++ ret = 0; ++ goto out; ++ } + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++ /* we must enable vdd_gpu before pd_gpu_in_chip. */ ++ if (!platform->is_regulator_on) { ++ err = rk_pm_enable_regulator(kbdev); ++ if (err) { ++ E("fail to enable regulator, err : %d.", err); ++ ret = err; ++ goto out; ++ } ++ platform->is_regulator_on = true; ++ } + -+void __kbase_tlstream_tl_new_as( -+ struct kbase_tlstream *stream, -+ const void *address_space, -+ u32 as_nr -+) -+{ -+ const u32 msg_id = KBASE_TL_NEW_AS; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(address_space) -+ + sizeof(as_nr) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ err = rk_pm_enable_clk(kbdev); ++ if (err) { ++ E("failed to enable clk: %d", err); ++ ret = err; ++ goto out; ++ } + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ rockchip_opp_dvfs_lock(opp_info); ++ /* è‹¥ mali_dev çš„ runtime_pm 是 enabled çš„, 则... */ ++ if (pm_runtime_enabled(kbdev->dev)) { ++ D("to resume mali_dev syncly."); ++ /* 对 pd_in_chip çš„ on æ“作, ++ * 将在 pm_domain çš„ runtime_pm_callbacks 中完æˆ. ++ */ ++ err = pm_runtime_get_sync(kbdev->dev); ++ if (err < 0) { ++ rockchip_opp_dvfs_unlock(opp_info); ++ E("failed to runtime resume device: %d.", err); ++ ret = err; ++ goto out; ++ } else if (err == 1) { /* runtime_pm_status is still active */ ++ D("chip has NOT been powered off, no need to re-init."); ++ ret = 0; ++ } ++ } ++ rockchip_opp_dvfs_unlock(opp_info); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &address_space, sizeof(address_space)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &as_nr, sizeof(as_nr)); ++ platform->is_powered = true; ++ wake_lock(&platform->wake_lock); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++out: ++ mutex_unlock(&platform->lock); ++ return ret; +} + -+void __kbase_tlstream_tl_del_ctx( -+ struct kbase_tlstream *stream, -+ const void *ctx -+) ++static void rk_pm_callback_power_off(struct kbase_device *kbdev) +{ -+ const u32 msg_id = KBASE_TL_DEL_CTX; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(ctx) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ struct rk_context *platform = get_rk_context(kbdev); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &ctx, sizeof(ctx)); ++ D("enter"); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ queue_delayed_work(platform->power_off_wq, &platform->work, ++ msecs_to_jiffies(platform->delay_ms)); +} + -+void __kbase_tlstream_tl_del_atom( -+ struct kbase_tlstream *stream, -+ const void *atom -+) ++static int rk_kbase_device_runtime_init(struct kbase_device *kbdev) +{ -+ const u32 msg_id = KBASE_TL_DEL_ATOM; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return 0; +} + -+void __kbase_tlstream_tl_lifelink_lpu_gpu( -+ struct kbase_tlstream *stream, -+ const void *lpu, -+ const void *gpu -+) ++static void rk_kbase_device_runtime_disable(struct kbase_device *kbdev) +{ -+ const u32 msg_id = KBASE_TL_LIFELINK_LPU_GPU; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(lpu) -+ + sizeof(gpu) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &lpu, sizeof(lpu)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &gpu, sizeof(gpu)); -+ -+ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+void __kbase_tlstream_tl_lifelink_as_gpu( -+ struct kbase_tlstream *stream, -+ const void *address_space, -+ const void *gpu -+) -+{ -+ const u32 msg_id = KBASE_TL_LIFELINK_AS_GPU; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(address_space) -+ + sizeof(gpu) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &address_space, sizeof(address_space)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &gpu, sizeof(gpu)); ++struct kbase_pm_callback_conf pm_callbacks = { ++ .power_on_callback = rk_pm_callback_power_on, ++ .power_off_callback = rk_pm_callback_power_off, ++#ifdef CONFIG_PM ++ .power_runtime_init_callback = rk_kbase_device_runtime_init, ++ .power_runtime_term_callback = rk_kbase_device_runtime_disable, ++ .power_runtime_on_callback = rk_pm_callback_runtime_on, ++ .power_runtime_off_callback = rk_pm_callback_runtime_off, ++#else /* CONFIG_PM */ ++ .power_runtime_init_callback = NULL, ++ .power_runtime_term_callback = NULL, ++ .power_runtime_on_callback = NULL, ++ .power_runtime_off_callback = NULL, ++#endif /* CONFIG_PM */ ++}; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/*---------------------------------------------------------------------------*/ + -+void __kbase_tlstream_tl_ret_ctx_lpu( -+ struct kbase_tlstream *stream, -+ const void *ctx, -+ const void *lpu -+) ++#ifdef CONFIG_REGULATOR ++static int rk_pm_enable_regulator(struct kbase_device *kbdev) +{ -+ const u32 msg_id = KBASE_TL_RET_CTX_LPU; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(ctx) -+ + sizeof(lpu) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ int ret = 0; ++ unsigned int i; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ for (i = 0; i < kbdev->nr_regulators; i++) { ++ struct regulator *regulator = kbdev->regulators[i]; ++ if (!regulator) { ++ W("no mali regulator control, no need to enable."); ++ goto EXIT; ++ } + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &ctx, sizeof(ctx)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &lpu, sizeof(lpu)); ++ D("to enable regulator."); ++ ret = regulator_enable(regulator); ++ if (ret) { ++ E("fail to enable regulator, ret : %d.", ret); ++ goto EXIT; ++ } ++ } + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++EXIT: ++ return ret; +} + -+void __kbase_tlstream_tl_ret_atom_ctx( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ const void *ctx -+) ++static void rk_pm_disable_regulator(struct kbase_device *kbdev) +{ -+ const u32 msg_id = KBASE_TL_RET_ATOM_CTX; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ + sizeof(ctx) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ unsigned int i; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ for (i = 0; i < kbdev->nr_regulators; i++) { ++ struct regulator *regulator = kbdev->regulators[i]; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &ctx, sizeof(ctx)); ++ if (!regulator) { ++ W("no mali regulator control, no need to disable."); ++ return; ++ } + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ D("to disable regulator."); ++ regulator_disable(regulator); ++ } +} ++#endif + -+void __kbase_tlstream_tl_ret_atom_lpu( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ const void *lpu, -+ const char *attrib_match_list -+) ++static int rk_pm_enable_clk(struct kbase_device *kbdev) +{ -+ const u32 msg_id = KBASE_TL_RET_ATOM_LPU; -+ const size_t s2 = sizeof(u32) + sizeof(char) -+ + strnlen(attrib_match_list, STRLEN_MAX); -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ + sizeof(lpu) -+ + s2 -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ int err = 0; ++ unsigned int i; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ for (i = 0; i < kbdev->nr_clocks; i++) { ++ struct clk *clock = kbdev->clocks[i]; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &lpu, sizeof(lpu)); -+ pos = kbasep_serialize_string(buffer, -+ pos, attrib_match_list, s2); ++ if (!clock) { ++ W("no mali clock control, no need to enable."); ++ } else { ++ D("to enable clk."); ++ err = clk_enable(clock); ++ if (err) ++ E("failed to enable clk: %d.", err); ++ } ++ } + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return err; +} + -+void __kbase_tlstream_tl_nret_ctx_lpu( -+ struct kbase_tlstream *stream, -+ const void *ctx, -+ const void *lpu -+) ++static void rk_pm_disable_clk(struct kbase_device *kbdev) +{ -+ const u32 msg_id = KBASE_TL_NRET_CTX_LPU; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(ctx) -+ + sizeof(lpu) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ unsigned int i; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &ctx, sizeof(ctx)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &lpu, sizeof(lpu)); ++ for (i = 0; i < kbdev->nr_clocks; i++) { ++ struct clk *clock = kbdev->clocks[i]; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ if (!clock) { ++ W("no mali clock control, no need to disable."); ++ } else { ++ D("to disable clk."); ++ clk_disable(clock); ++ } ++ } +} + -+void __kbase_tlstream_tl_nret_atom_ctx( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ const void *ctx -+) -+{ -+ const u32 msg_id = KBASE_TL_NRET_ATOM_CTX; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ + sizeof(ctx) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/*---------------------------------------------------------------------------*/ + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++static ssize_t utilisation_period_show(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct kbase_device *kbdev = dev_get_drvdata(dev); ++ struct rk_context *platform = get_rk_context(kbdev); ++ ssize_t ret = 0; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &ctx, sizeof(ctx)); ++ ret += snprintf(buf, PAGE_SIZE, "%u\n", platform->utilisation_period); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return ret; +} + -+void __kbase_tlstream_tl_nret_atom_lpu( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ const void *lpu -+) ++static ssize_t utilisation_period_store(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, ++ size_t count) +{ -+ const u32 msg_id = KBASE_TL_NRET_ATOM_LPU; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ + sizeof(lpu) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ struct kbase_device *kbdev = dev_get_drvdata(dev); ++ struct rk_context *platform = get_rk_context(kbdev); ++ int ret = 0; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &lpu, sizeof(lpu)); ++ ret = kstrtouint(buf, 0, &platform->utilisation_period); ++ if (ret) { ++ E("invalid input period : %s.", buf); ++ return ret; ++ } ++ D("set utilisation_period to '%d'.", platform->utilisation_period); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return count; +} + -+void __kbase_tlstream_tl_ret_as_ctx( -+ struct kbase_tlstream *stream, -+ const void *address_space, -+ const void *ctx -+) ++static ssize_t utilisation_show(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) +{ -+ const u32 msg_id = KBASE_TL_RET_AS_CTX; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(address_space) -+ + sizeof(ctx) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ struct kbase_device *kbdev = dev_get_drvdata(dev); ++ struct rk_context *platform = get_rk_context(kbdev); ++ ssize_t ret = 0; ++ unsigned long period_in_us = platform->utilisation_period * 1000; ++ u32 utilisation; ++ struct kbasep_pm_metrics metrics_when_start; ++ struct kbasep_pm_metrics metrics_diff = {}; /* between start and end. */ ++ u32 total_time = 0; ++ u32 busy_time = 0; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ /* get current metrics data. */ ++ kbase_pm_get_dvfs_metrics(kbdev, &metrics_when_start, &metrics_diff); ++ /* sleep for 'period_in_us'. */ ++ usleep_range(period_in_us, period_in_us + 100); ++ /* get metrics data between start and end. */ ++ kbase_pm_get_dvfs_metrics(kbdev, &metrics_when_start, &metrics_diff); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &address_space, sizeof(address_space)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &ctx, sizeof(ctx)); ++ total_time = metrics_diff.time_busy + metrics_diff.time_idle; ++ busy_time = metrics_diff.time_busy; ++ D("total_time : %u, busy_time : %u.", total_time, busy_time); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ utilisation = busy_time * 100 / total_time; ++ ret += snprintf(buf, PAGE_SIZE, "%d\n", utilisation); ++ ++ return ret; +} + -+void __kbase_tlstream_tl_nret_as_ctx( -+ struct kbase_tlstream *stream, -+ const void *address_space, -+ const void *ctx -+) ++static DEVICE_ATTR_RW(utilisation_period); ++static DEVICE_ATTR_RO(utilisation); ++ ++static int kbase_platform_rk_create_sysfs_files(struct device *dev) +{ -+ const u32 msg_id = KBASE_TL_NRET_AS_CTX; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(address_space) -+ + sizeof(ctx) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ int ret = 0; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ ret = device_create_file(dev, &dev_attr_utilisation_period); ++ if (ret) { ++ E("fail to create sysfs file 'utilisation_period'."); ++ goto out; ++ } + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &address_space, sizeof(address_space)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &ctx, sizeof(ctx)); ++ ret = device_create_file(dev, &dev_attr_utilisation); ++ if (ret) { ++ E("fail to create sysfs file 'utilisation'."); ++ goto remove_utilisation_period; ++ } + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return 0; ++ ++remove_utilisation_period: ++ device_remove_file(dev, &dev_attr_utilisation_period); ++out: ++ return ret; +} + -+void __kbase_tlstream_tl_ret_atom_as( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ const void *address_space -+) ++static void kbase_platform_rk_remove_sysfs_files(struct device *dev) +{ -+ const u32 msg_id = KBASE_TL_RET_ATOM_AS; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ + sizeof(address_space) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &address_space, sizeof(address_space)); -+ -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ device_remove_file(dev, &dev_attr_utilisation_period); ++ device_remove_file(dev, &dev_attr_utilisation); +} + -+void __kbase_tlstream_tl_nret_atom_as( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ const void *address_space -+) ++static int rk3588_gpu_get_soc_info(struct device *dev, struct device_node *np, ++ int *bin, int *process) +{ -+ const u32 msg_id = KBASE_TL_NRET_ATOM_AS; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ + sizeof(address_space) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ int ret = 0; ++ u8 value = 0; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ if (!bin) ++ return 0; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &address_space, sizeof(address_space)); ++ if (of_property_match_string(np, "nvmem-cell-names", ++ "specification_serial_number") >= 0) { ++ ret = rockchip_nvmem_cell_read_u8(np, ++ "specification_serial_number", ++ &value); ++ if (ret) { ++ dev_err(dev, ++ "Failed to get specification_serial_number\n"); ++ return ret; ++ } ++ /* RK3588M */ ++ if (value == 0xd) ++ *bin = 1; ++ /* RK3588J */ ++ else if (value == 0xa) ++ *bin = 2; ++ } ++ if (*bin < 0) ++ *bin = 0; ++ dev_info(dev, "bin=%d\n", *bin); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return ret; +} + -+void __kbase_tlstream_tl_attrib_atom_config( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ u64 descriptor, -+ u64 affinity, -+ u32 config -+) ++static int rk3588_gpu_set_soc_info(struct device *dev, struct device_node *np, ++ struct rockchip_opp_info *opp_info) +{ -+ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ + sizeof(descriptor) -+ + sizeof(affinity) -+ + sizeof(config) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ int bin = opp_info->bin; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ if (opp_info->volt_sel < 0) ++ return 0; ++ if (bin < 0) ++ bin = 0; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &descriptor, sizeof(descriptor)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &affinity, sizeof(affinity)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &config, sizeof(config)); ++ if (!of_property_read_bool(np, "rockchip,supported-hw")) ++ return 0; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ /* SoC Version */ ++ opp_info->supported_hw[0] = BIT(bin); ++ /* Speed Grade */ ++ opp_info->supported_hw[1] = BIT(opp_info->volt_sel); ++ ++ return 0; +} + -+void __kbase_tlstream_tl_jit_usedpages( -+ struct kbase_tlstream *stream, -+ u64 used_pages, -+ u32 j_id -+) ++static int rk3588_gpu_set_read_margin(struct device *dev, ++ struct rockchip_opp_info *opp_info, ++ u32 rm) +{ -+ const u32 msg_id = KBASE_TL_JIT_USEDPAGES; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(used_pages) -+ + sizeof(j_id) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ int ret = 0; ++ u32 val; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &used_pages, sizeof(used_pages)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &j_id, sizeof(j_id)); ++ if (!opp_info->grf || !opp_info->volt_rm_tbl) ++ return 0; ++ if (rm == opp_info->current_rm || rm == UINT_MAX) ++ return 0; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++ dev_dbg(dev, "set rm to %d\n", rm); + -+void __kbase_tlstream_tl_attrib_atom_jitallocinfo( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ u64 va_pgs, -+ u64 com_pgs, -+ u64 extent, -+ u32 j_id, -+ u32 bin_id, -+ u32 max_allocs, -+ u32 jit_flags, -+ u32 usg_id -+) -+{ -+ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JITALLOCINFO; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ + sizeof(va_pgs) -+ + sizeof(com_pgs) -+ + sizeof(extent) -+ + sizeof(j_id) -+ + sizeof(bin_id) -+ + sizeof(max_allocs) -+ + sizeof(jit_flags) -+ + sizeof(usg_id) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ ret = regmap_read(opp_info->grf, 0x24, &val); ++ if (ret < 0) { ++ dev_err(dev, "failed to get rm from 0x24\n"); ++ return ret; ++ } ++ val &= ~0x1c; ++ regmap_write(opp_info->grf, 0x24, val | (rm << 2)); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ ret = regmap_read(opp_info->grf, 0x28, &val); ++ if (ret < 0) { ++ dev_err(dev, "failed to get rm from 0x28\n"); ++ return ret; ++ } ++ val &= ~0x1c; ++ regmap_write(opp_info->grf, 0x28, val | (rm << 2)); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &va_pgs, sizeof(va_pgs)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &com_pgs, sizeof(com_pgs)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &extent, sizeof(extent)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &j_id, sizeof(j_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &bin_id, sizeof(bin_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &max_allocs, sizeof(max_allocs)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &jit_flags, sizeof(jit_flags)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &usg_id, sizeof(usg_id)); ++ opp_info->current_rm = rm; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return 0; +} + -+void __kbase_tlstream_tl_attrib_atom_jitfreeinfo( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ u32 j_id -+) ++static int gpu_opp_config_regulators(struct device *dev, ++ struct dev_pm_opp *old_opp, ++ struct dev_pm_opp *new_opp, ++ struct regulator **regulators, ++ unsigned int count) +{ -+ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JITFREEINFO; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ + sizeof(j_id) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &j_id, sizeof(j_id)); ++ struct kbase_device *kbdev = dev_get_drvdata(dev); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return rockchip_opp_config_regulators(dev, old_opp, new_opp, regulators, ++ count, &kbdev->opp_info); +} + -+void __kbase_tlstream_tl_attrib_as_config( -+ struct kbase_tlstream *stream, -+ const void *address_space, -+ u64 transtab, -+ u64 memattr, -+ u64 transcfg -+) ++static int gpu_opp_config_clks(struct device *dev, struct opp_table *opp_table, ++ struct dev_pm_opp *opp, void *data, ++ bool scaling_down) +{ -+ const u32 msg_id = KBASE_TL_ATTRIB_AS_CONFIG; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(address_space) -+ + sizeof(transtab) -+ + sizeof(memattr) -+ + sizeof(transcfg) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &address_space, sizeof(address_space)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &transtab, sizeof(transtab)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &memattr, sizeof(memattr)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &transcfg, sizeof(transcfg)); ++ struct kbase_device *kbdev = dev_get_drvdata(dev); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return rockchip_opp_config_clks(dev, opp_table, opp, data, scaling_down, ++ &kbdev->opp_info); +} + -+void __kbase_tlstream_tl_event_lpu_softstop( -+ struct kbase_tlstream *stream, -+ const void *lpu -+) -+{ -+ const u32 msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(lpu) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++static const struct rockchip_opp_data rk3588_gpu_opp_data = { ++ .get_soc_info = rk3588_gpu_get_soc_info, ++ .set_soc_info = rk3588_gpu_set_soc_info, ++ .set_read_margin = rk3588_gpu_set_read_margin, ++ .config_regulators = gpu_opp_config_regulators, ++ .config_clks = gpu_opp_config_clks, ++}; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &lpu, sizeof(lpu)); ++static const struct rockchip_opp_data rockchip_gpu_opp_data = { ++ .config_clks = gpu_opp_config_clks, ++}; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++static const struct of_device_id rockchip_mali_of_match[] = { ++ { ++ .compatible = "rockchip,rk3588", ++ .data = (void *)&rk3588_gpu_opp_data, ++ }, ++ {}, ++}; + -+void __kbase_tlstream_tl_event_atom_softstop_ex( -+ struct kbase_tlstream *stream, -+ const void *atom -+) ++int kbase_platform_rk_init_opp_table(struct kbase_device *kbdev) +{ -+ const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ struct rockchip_opp_info *info = &kbdev->opp_info; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); ++ info->data = &rockchip_gpu_opp_data; ++ rockchip_get_opp_data(rockchip_mali_of_match, &kbdev->opp_info); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return rockchip_init_opp_table(kbdev->dev, &kbdev->opp_info, ++ "clk_mali", "mali"); +} + -+void __kbase_tlstream_tl_event_atom_softstop_issue( -+ struct kbase_tlstream *stream, -+ const void *atom -+) ++void kbase_platform_rk_uninit_opp_table(struct kbase_device *kbdev) +{ -+ const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ rockchip_uninit_opp_table(kbdev->dev, &kbdev->opp_info); +} + -+void __kbase_tlstream_tl_event_atom_softjob_start( -+ struct kbase_tlstream *stream, -+ const void *atom -+) ++int kbase_platform_rk_enable_regulator(struct kbase_device *kbdev) +{ -+ const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_START; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ struct rk_context *platform = get_rk_context(kbdev); ++ int err = 0; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); ++ if (!platform->is_regulator_on) { ++ err = rk_pm_enable_regulator(kbdev); ++ if (err) { ++ E("fail to enable regulator, err : %d.", err); ++ return err; ++ } ++ platform->is_regulator_on = true; ++ } + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return 0; +} + -+void __kbase_tlstream_tl_event_atom_softjob_end( -+ struct kbase_tlstream *stream, -+ const void *atom -+) -+{ -+ const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/*---------------------------------------------------------------------------*/ + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); ++static void *enumerate_gpu_clk(struct kbase_device *kbdev, ++ unsigned int index) ++{ ++ if (index >= kbdev->nr_clocks) ++ return NULL; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return kbdev->clocks[index]; +} + -+void __kbase_tlstream_tl_arbiter_granted( -+ struct kbase_tlstream *stream, -+ const void *gpu -+) ++static unsigned long get_gpu_clk_rate(struct kbase_device *kbdev, ++ void *gpu_clk_handle) +{ -+ const u32 msg_id = KBASE_TL_ARBITER_GRANTED; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(gpu) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &gpu, sizeof(gpu)); -+ -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return clk_get_rate((struct clk *)gpu_clk_handle); +} + -+void __kbase_tlstream_tl_arbiter_started( -+ struct kbase_tlstream *stream, -+ const void *gpu -+) ++static int gpu_clk_notifier_register(struct kbase_device *kbdev, ++ void *gpu_clk_handle, struct notifier_block *nb) +{ -+ const u32 msg_id = KBASE_TL_ARBITER_STARTED; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(gpu) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ compiletime_assert(offsetof(struct clk_notifier_data, clk) == ++ offsetof(struct kbase_gpu_clk_notifier_data, gpu_clk_handle), ++ "mismatch in the offset of clk member"); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &gpu, sizeof(gpu)); ++ compiletime_assert(sizeof(((struct clk_notifier_data *)0)->clk) == ++ sizeof(((struct kbase_gpu_clk_notifier_data *)0)->gpu_clk_handle), ++ "mismatch in the size of clk member"); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return clk_notifier_register((struct clk *)gpu_clk_handle, nb); +} + -+void __kbase_tlstream_tl_arbiter_stop_requested( -+ struct kbase_tlstream *stream, -+ const void *gpu -+) ++static void gpu_clk_notifier_unregister(struct kbase_device *kbdev, ++ void *gpu_clk_handle, struct notifier_block *nb) +{ -+ const u32 msg_id = KBASE_TL_ARBITER_STOP_REQUESTED; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(gpu) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ clk_notifier_unregister((struct clk *)gpu_clk_handle, nb); ++} + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops = { ++ .get_gpu_clk_rate = get_gpu_clk_rate, ++ .enumerate_gpu_clk = enumerate_gpu_clk, ++ .gpu_clk_notifier_register = gpu_clk_notifier_register, ++ .gpu_clk_notifier_unregister = gpu_clk_notifier_unregister, ++}; +diff --git a/drivers/gpu/arm/bifrost/platform/rk/mali_kbase_rk.h b/drivers/gpu/arm/bifrost/platform/rk/mali_kbase_rk.h +new file mode 100644 +index 000000000..0a42559df +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/platform/rk/mali_kbase_rk.h +@@ -0,0 +1,67 @@ ++/* drivers/gpu/t6xx/kbase/src/platform/rk/mali_kbase_platform.h ++ * Rockchip SoC Mali-Midgard platform-dependent codes ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software FoundatIon. ++ */ + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &gpu, sizeof(gpu)); ++/** ++ * @file mali_kbase_rk.h ++ * ++ * defines work_context type of platform_dependent_part. ++ */ + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++#ifndef _MALI_KBASE_RK_H_ ++#define _MALI_KBASE_RK_H_ + -+void __kbase_tlstream_tl_arbiter_stopped( -+ struct kbase_tlstream *stream, -+ const void *gpu -+) -+{ -+ const u32 msg_id = KBASE_TL_ARBITER_STOPPED; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(gpu) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++#include + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/*---------------------------------------------------------------------------*/ + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &gpu, sizeof(gpu)); ++#define DEFAULT_UTILISATION_PERIOD_IN_MS (100) + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/*---------------------------------------------------------------------------*/ + -+void __kbase_tlstream_tl_arbiter_requested( -+ struct kbase_tlstream *stream, -+ const void *gpu -+) -+{ -+ const u32 msg_id = KBASE_TL_ARBITER_REQUESTED; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(gpu) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/* ++ * struct rk_context - work_context of platform_dependent_part_of_rk. ++ */ ++struct rk_context { ++ /* ++ * record the status of common_parts calling 'power_on_callback' ++ * and 'power_off_callback'. ++ */ ++ bool is_powered; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ bool is_regulator_on; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &gpu, sizeof(gpu)); ++ struct kbase_device *kbdev; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++ struct workqueue_struct *power_off_wq; ++ /* delayed_work_to_power_off_gpu. */ ++ struct delayed_work work; ++ unsigned int delay_ms; + -+void __kbase_tlstream_jd_gpu_soft_reset( -+ struct kbase_tlstream *stream, -+ const void *gpu -+) -+{ -+ const u32 msg_id = KBASE_JD_GPU_SOFT_RESET; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(gpu) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ /* ++ * WAKE_LOCK_SUSPEND for ensuring to run ++ * delayed_work_to_power_off_gpu before suspend. ++ */ ++ struct wake_lock wake_lock; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ /* debug only, the period in ms to count gpu_utilisation. */ ++ unsigned int utilisation_period; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &gpu, sizeof(gpu)); ++ /* to protect operations on 'is_powered' and clks, pd, vd of gpu. */ ++ struct mutex lock; ++}; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/*---------------------------------------------------------------------------*/ + -+void __kbase_tlstream_jd_tiler_heap_chunk_alloc( -+ struct kbase_tlstream *stream, -+ u32 ctx_nr, -+ u64 heap_id, -+ u64 chunk_va -+) ++static inline struct rk_context *get_rk_context( ++ const struct kbase_device *kbdev) +{ -+ const u32 msg_id = KBASE_JD_TILER_HEAP_CHUNK_ALLOC; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(ctx_nr) -+ + sizeof(heap_id) -+ + sizeof(chunk_va) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ return (struct rk_context *)(kbdev->platform_context); ++} + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &ctx_nr, sizeof(ctx_nr)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &heap_id, sizeof(heap_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &chunk_va, sizeof(chunk_va)); ++#endif /* _MALI_KBASE_RK_H_ */ + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} +diff --git a/drivers/gpu/arm/bifrost/platform/vexpress/Kbuild b/drivers/gpu/arm/bifrost/platform/vexpress/Kbuild +new file mode 100755 +index 000000000..e1398fde3 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/platform/vexpress/Kbuild +@@ -0,0 +1,23 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2012-2013, 2016-2017, 2020-2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# + -+void __kbase_tlstream_tl_js_sched_start( -+ struct kbase_tlstream *stream, -+ u32 dummy -+) -+{ -+ const u32 msg_id = KBASE_TL_JS_SCHED_START; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(dummy) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++bifrost_kbase-y += \ ++ platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ ++ mali_kbase_platform_fake.o +diff --git a/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_platform.h b/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_platform.h +new file mode 100644 +index 000000000..28f453161 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_platform.h +@@ -0,0 +1,38 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2014-2017, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * POWER_MANAGEMENT_CALLBACKS - Power management configuration ++ * ++ * Attached value: pointer to @ref kbase_pm_callback_conf ++ * Default value: See @ref kbase_pm_callback_conf ++ */ ++#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &dummy, sizeof(dummy)); ++/** ++ * PLATFORM_FUNCS - Platform specific configuration functions ++ * ++ * Attached value: pointer to @ref kbase_platform_funcs_conf ++ * Default value: See @ref kbase_platform_funcs_conf ++ */ ++#define PLATFORM_FUNCS (NULL) + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++extern struct kbase_pm_callback_conf pm_callbacks; +diff --git a/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_vexpress.c b/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_vexpress.c +new file mode 100644 +index 000000000..8add708d0 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_vexpress.c +@@ -0,0 +1,79 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2011-2017, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+void __kbase_tlstream_tl_js_sched_end( -+ struct kbase_tlstream *stream, -+ u32 dummy -+) -+{ -+ const u32 msg_id = KBASE_TL_JS_SCHED_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(dummy) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++#include ++#include ++#include ++#include ++#include "mali_kbase_config_platform.h" + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++#include + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &dummy, sizeof(dummy)); ++#ifndef CONFIG_OF ++static struct kbase_io_resources io_resources = { ++ .job_irq_number = 68, ++ .mmu_irq_number = 69, ++ .gpu_irq_number = 70, ++ .io_memory_region = { ++ .start = 0xFC010000, ++ .end = 0xFC010000 + (4096 * 4) - 1 ++ } ++}; ++#endif /* CONFIG_OF */ + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++static int pm_callback_power_on(struct kbase_device *kbdev) ++{ ++ /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ ++ return 1; +} + -+void __kbase_tlstream_tl_jd_submit_atom_start( -+ struct kbase_tlstream *stream, -+ const void *atom -+) ++static void pm_callback_power_off(struct kbase_device *kbdev) +{ -+ const u32 msg_id = KBASE_TL_JD_SUBMIT_ATOM_START; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++} + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++struct kbase_pm_callback_conf pm_callbacks = { ++ .power_on_callback = pm_callback_power_on, ++ .power_off_callback = pm_callback_power_off, ++ .power_suspend_callback = NULL, ++ .power_resume_callback = NULL ++}; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); ++static struct kbase_platform_config versatile_platform_config = { ++#ifndef CONFIG_OF ++ .io_resources = &io_resources ++#endif ++}; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++struct kbase_platform_config *kbase_get_platform_config(void) ++{ ++ return &versatile_platform_config; +} + -+void __kbase_tlstream_tl_jd_submit_atom_end( -+ struct kbase_tlstream *stream, -+ const void *atom -+) ++#ifdef CONFIG_MALI_BIFROST_DVFS ++#if MALI_USE_CSF ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) ++#else ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) ++#endif +{ -+ const u32 msg_id = KBASE_TL_JD_SUBMIT_ATOM_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return 1; +} ++#endif /* CONFIG_MALI_BIFROST_DVFS */ +diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/Kbuild b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/Kbuild +new file mode 100755 +index 000000000..e1398fde3 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/Kbuild +@@ -0,0 +1,23 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2012-2013, 2016-2017, 2020-2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# + -+void __kbase_tlstream_tl_jd_done_no_lock_start( -+ struct kbase_tlstream *stream, -+ const void *atom -+) -+{ -+ const u32 msg_id = KBASE_TL_JD_DONE_NO_LOCK_START; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++bifrost_kbase-y += \ ++ platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ ++ mali_kbase_platform_fake.o +diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h +new file mode 100644 +index 000000000..28f453161 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h +@@ -0,0 +1,38 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2014-2017, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * POWER_MANAGEMENT_CALLBACKS - Power management configuration ++ * ++ * Attached value: pointer to @ref kbase_pm_callback_conf ++ * Default value: See @ref kbase_pm_callback_conf ++ */ ++#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); ++/** ++ * PLATFORM_FUNCS - Platform specific configuration functions ++ * ++ * Attached value: pointer to @ref kbase_platform_funcs_conf ++ * Default value: See @ref kbase_platform_funcs_conf ++ */ ++#define PLATFORM_FUNCS (NULL) + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++extern struct kbase_pm_callback_conf pm_callbacks; +diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c +new file mode 100644 +index 000000000..835b7587c +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c +@@ -0,0 +1,77 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2011-2014, 2017, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+void __kbase_tlstream_tl_jd_done_no_lock_end( -+ struct kbase_tlstream *stream, -+ const void *atom -+) -+{ -+ const u32 msg_id = KBASE_TL_JD_DONE_NO_LOCK_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++#include ++#include ++#include ++#include + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++#include + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); ++#ifndef CONFIG_OF ++static struct kbase_io_resources io_resources = { ++ .job_irq_number = 68, ++ .mmu_irq_number = 69, ++ .gpu_irq_number = 70, ++ .io_memory_region = { ++ .start = 0x2f010000, ++ .end = 0x2f010000 + (4096 * 4) - 1} ++}; ++#endif + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++static int pm_callback_power_on(struct kbase_device *kbdev) ++{ ++ /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ ++ return 1; +} + -+void __kbase_tlstream_tl_jd_done_start( -+ struct kbase_tlstream *stream, -+ const void *atom -+) ++static void pm_callback_power_off(struct kbase_device *kbdev) +{ -+ const u32 msg_id = KBASE_TL_JD_DONE_START; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++} + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++struct kbase_pm_callback_conf pm_callbacks = { ++ .power_on_callback = pm_callback_power_on, ++ .power_off_callback = pm_callback_power_off, ++ .power_suspend_callback = NULL, ++ .power_resume_callback = NULL ++}; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); ++static struct kbase_platform_config versatile_platform_config = { ++#ifndef CONFIG_OF ++ .io_resources = &io_resources ++#endif ++}; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++struct kbase_platform_config *kbase_get_platform_config(void) ++{ ++ return &versatile_platform_config; +} + -+void __kbase_tlstream_tl_jd_done_end( -+ struct kbase_tlstream *stream, -+ const void *atom -+) ++#ifdef CONFIG_MALI_BIFROST_DVFS ++#if MALI_USE_CSF ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) ++#else ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) ++#endif +{ -+ const u32 msg_id = KBASE_TL_JD_DONE_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return 1; +} ++#endif /* CONFIG_MALI_BIFROST_DVFS */ +diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/Kbuild b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/Kbuild +new file mode 100755 +index 000000000..10f7dc8cf +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/Kbuild +@@ -0,0 +1,24 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2012-2013, 2016-2017, 2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# + -+void __kbase_tlstream_tl_jd_atom_complete( -+ struct kbase_tlstream *stream, -+ const void *atom -+) -+{ -+ const u32 msg_id = KBASE_TL_JD_ATOM_COMPLETE; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++bifrost_kbase-y += \ ++ platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ ++ platform/$(MALI_PLATFORM_DIR)/mali_kbase_cpu_vexpress.o \ ++ mali_kbase_platform_fake.o +diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h +new file mode 100644 +index 000000000..28f453161 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h +@@ -0,0 +1,38 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2014-2017, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); ++/** ++ * POWER_MANAGEMENT_CALLBACKS - Power management configuration ++ * ++ * Attached value: pointer to @ref kbase_pm_callback_conf ++ * Default value: See @ref kbase_pm_callback_conf ++ */ ++#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/** ++ * PLATFORM_FUNCS - Platform specific configuration functions ++ * ++ * Attached value: pointer to @ref kbase_platform_funcs_conf ++ * Default value: See @ref kbase_platform_funcs_conf ++ */ ++#define PLATFORM_FUNCS (NULL) + -+void __kbase_tlstream_tl_run_atom_start( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ u32 atom_nr -+) -+{ -+ const u32 msg_id = KBASE_TL_RUN_ATOM_START; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ + sizeof(atom_nr) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++extern struct kbase_pm_callback_conf pm_callbacks; +diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c +new file mode 100644 +index 000000000..8be30fb25 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c +@@ -0,0 +1,77 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2011-2014, 2017, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++#include ++#include ++#include ++#include + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom_nr, sizeof(atom_nr)); ++#include + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++#ifndef CONFIG_OF ++static struct kbase_io_resources io_resources = { ++ .job_irq_number = 75, ++ .mmu_irq_number = 76, ++ .gpu_irq_number = 77, ++ .io_memory_region = { ++ .start = 0x2F000000, ++ .end = 0x2F000000 + (4096 * 4) - 1} ++}; ++#endif + -+void __kbase_tlstream_tl_run_atom_end( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ u32 atom_nr -+) ++static int pm_callback_power_on(struct kbase_device *kbdev) +{ -+ const u32 msg_id = KBASE_TL_RUN_ATOM_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ + sizeof(atom_nr) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom_nr, sizeof(atom_nr)); -+ -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ ++ return 1; +} + -+void __kbase_tlstream_tl_attrib_atom_priority( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ u32 prio -+) ++static void pm_callback_power_off(struct kbase_device *kbdev) +{ -+ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ + sizeof(prio) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &prio, sizeof(prio)); -+ -+ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+void __kbase_tlstream_tl_attrib_atom_state( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ u32 state -+) -+{ -+ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_STATE; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ + sizeof(state) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &state, sizeof(state)); ++struct kbase_pm_callback_conf pm_callbacks = { ++ .power_on_callback = pm_callback_power_on, ++ .power_off_callback = pm_callback_power_off, ++ .power_suspend_callback = NULL, ++ .power_resume_callback = NULL ++}; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++static struct kbase_platform_config versatile_platform_config = { ++#ifndef CONFIG_OF ++ .io_resources = &io_resources ++#endif ++}; + -+void __kbase_tlstream_tl_attrib_atom_prioritized( -+ struct kbase_tlstream *stream, -+ const void *atom -+) ++struct kbase_platform_config *kbase_get_platform_config(void) +{ -+ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITIZED; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return &versatile_platform_config; +} + -+void __kbase_tlstream_tl_attrib_atom_jit( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ u64 edit_addr, -+ u64 new_addr, -+ u32 jit_flags, -+ u64 mem_flags, -+ u32 j_id, -+ u64 com_pgs, -+ u64 extent, -+ u64 va_pgs -+) ++#ifdef CONFIG_MALI_BIFROST_DVFS ++#if MALI_USE_CSF ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) ++#else ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) ++#endif +{ -+ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JIT; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(atom) -+ + sizeof(edit_addr) -+ + sizeof(new_addr) -+ + sizeof(jit_flags) -+ + sizeof(mem_flags) -+ + sizeof(j_id) -+ + sizeof(com_pgs) -+ + sizeof(extent) -+ + sizeof(va_pgs) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom, sizeof(atom)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &edit_addr, sizeof(edit_addr)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &new_addr, sizeof(new_addr)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &jit_flags, sizeof(jit_flags)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &mem_flags, sizeof(mem_flags)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &j_id, sizeof(j_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &com_pgs, sizeof(com_pgs)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &extent, sizeof(extent)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &va_pgs, sizeof(va_pgs)); -+ -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return 1; +} ++#endif /* CONFIG_MALI_BIFROST_DVFS */ +diff --git a/drivers/gpu/arm/bifrost/protected_mode_switcher.h b/drivers/gpu/arm/bifrost/protected_mode_switcher.h +new file mode 100644 +index 000000000..9dd9253c7 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/protected_mode_switcher.h +@@ -0,0 +1,56 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+void __kbase_tlstream_tl_kbase_new_device( -+ struct kbase_tlstream *stream, -+ u32 kbase_device_id, -+ u32 kbase_device_gpu_core_count, -+ u32 kbase_device_max_num_csgs, -+ u32 kbase_device_as_count, -+ u32 kbase_device_sb_entry_count, -+ u32 kbase_device_has_cross_stream_sync, -+ u32 kbase_device_supports_gpu_sleep -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_NEW_DEVICE; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kbase_device_id) -+ + sizeof(kbase_device_gpu_core_count) -+ + sizeof(kbase_device_max_num_csgs) -+ + sizeof(kbase_device_as_count) -+ + sizeof(kbase_device_sb_entry_count) -+ + sizeof(kbase_device_has_cross_stream_sync) -+ + sizeof(kbase_device_supports_gpu_sleep) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++#ifndef _PROTECTED_MODE_SWITCH_H_ ++#define _PROTECTED_MODE_SWITCH_H_ + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++struct protected_mode_device; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_id, sizeof(kbase_device_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_gpu_core_count, sizeof(kbase_device_gpu_core_count)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_max_num_csgs, sizeof(kbase_device_max_num_csgs)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_as_count, sizeof(kbase_device_as_count)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_sb_entry_count, sizeof(kbase_device_sb_entry_count)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_has_cross_stream_sync, sizeof(kbase_device_has_cross_stream_sync)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_supports_gpu_sleep, sizeof(kbase_device_supports_gpu_sleep)); ++/** ++ * struct protected_mode_ops - Callbacks for protected mode switch operations ++ * ++ * @protected_mode_enable: Callback to enable protected mode for device, and ++ * reset device ++ * Returns 0 on success, non-zero on error ++ * @protected_mode_disable: Callback to disable protected mode for device ++ * Returns 0 on success, non-zero on error ++ */ ++struct protected_mode_ops { ++ int (*protected_mode_enable)( ++ struct protected_mode_device *protected_dev); ++ int (*protected_mode_disable)( ++ struct protected_mode_device *protected_dev); ++}; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/** ++ * struct protected_mode_device - Device structure for protected mode devices ++ * @ops: Callbacks associated with this device ++ * @data: Pointer to device private data ++ * ++ * This structure should be registered with the platform device using ++ * platform_set_drvdata(). ++ */ ++struct protected_mode_device { ++ struct protected_mode_ops ops; ++ void *data; ++}; + -+void __kbase_tlstream_tl_kbase_gpucmdqueue_kick( -+ struct kbase_tlstream *stream, -+ u32 kernel_ctx_id, -+ u64 buffer_gpu_addr -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_GPUCMDQUEUE_KICK; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kernel_ctx_id) -+ + sizeof(buffer_gpu_addr) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++#endif /* _PROTECTED_MODE_SWITCH_H_ */ +diff --git a/drivers/gpu/arm/bifrost/tests/Kbuild b/drivers/gpu/arm/bifrost/tests/Kbuild +new file mode 100755 +index 000000000..38e4dd4d7 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/Kbuild +@@ -0,0 +1,31 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ccflags-y += -I$(src)/include \ ++ -I$(src) + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &buffer_gpu_addr, sizeof(buffer_gpu_addr)); ++subdir-ccflags-y += -I$(src)/include \ ++ -I$(src) + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++obj-$(CONFIG_MALI_KUTF) += kutf/ ++obj-$(CONFIG_MALI_KUTF_IRQ_TEST) += mali_kutf_irq_test/ ++obj-$(CONFIG_MALI_KUTF_CLK_RATE_TRACE) += mali_kutf_clk_rate_trace/kernel/ ++obj-$(CONFIG_MALI_KUTF_MGM_INTEGRATION) += mali_kutf_mgm_integration_test/ + -+void __kbase_tlstream_tl_kbase_device_program_csg( -+ struct kbase_tlstream *stream, -+ u32 kbase_device_id, -+ u32 kernel_ctx_id, -+ u32 gpu_cmdq_grp_handle, -+ u32 kbase_device_csg_slot_index, -+ u32 kbase_device_csg_slot_resuming -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_DEVICE_PROGRAM_CSG; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kbase_device_id) -+ + sizeof(kernel_ctx_id) -+ + sizeof(gpu_cmdq_grp_handle) -+ + sizeof(kbase_device_csg_slot_index) -+ + sizeof(kbase_device_csg_slot_resuming) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; +diff --git a/drivers/gpu/arm/bifrost/tests/Kconfig b/drivers/gpu/arm/bifrost/tests/Kconfig +new file mode 100644 +index 000000000..e9fe22771 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/Kconfig +@@ -0,0 +1,69 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++menuconfig MALI_KUTF ++ bool "Build Mali Kernel Unit Test Framework modules" ++ depends on MALI_BIFROST && MALI_BIFROST_DEBUG ++ default y if MALI_BIFROST_DEBUG ++ help ++ This option will build the Mali testing framework modules. + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_id, sizeof(kbase_device_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &gpu_cmdq_grp_handle, sizeof(gpu_cmdq_grp_handle)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_csg_slot_resuming, sizeof(kbase_device_csg_slot_resuming)); ++ Modules: ++ - kutf.ko ++ - kutf_test.ko + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++config MALI_KUTF_IRQ_TEST ++ bool "Build Mali KUTF IRQ test module" ++ depends on MALI_KUTF ++ default y ++ help ++ This option will build the IRQ latency measurement test module. ++ It can determine the latency of the Mali GPU IRQ on your system. + -+void __kbase_tlstream_tl_kbase_device_deprogram_csg( -+ struct kbase_tlstream *stream, -+ u32 kbase_device_id, -+ u32 kbase_device_csg_slot_index -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kbase_device_id) -+ + sizeof(kbase_device_csg_slot_index) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ Modules: ++ - mali_kutf_irq_test.ko + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++config MALI_KUTF_CLK_RATE_TRACE ++ bool "Build Mali KUTF Clock rate trace test module" ++ depends on MALI_KUTF ++ default y ++ help ++ This option will build the clock rate trace portal test module. ++ It can test the clocks integration into the platform and exercise some ++ basic trace test in the system. + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_id, sizeof(kbase_device_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); ++ Modules: ++ - mali_kutf_clk_rate_trace_test_portal.ko + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++config MALI_KUTF_MGM_INTEGRATION_TEST ++ bool "Build Mali KUTF MGM integration test module" ++ depends on MALI_KUTF ++ default y ++ help ++ This option will build the MGM integration test module. ++ It can test the implementation of PTE translation for specific ++ group ids. + -+void __kbase_tlstream_tl_kbase_device_halting_csg( -+ struct kbase_tlstream *stream, -+ u32 kbase_device_id, -+ u32 kbase_device_csg_slot_index, -+ u32 kbase_device_csg_slot_suspending -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_DEVICE_HALTING_CSG; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kbase_device_id) -+ + sizeof(kbase_device_csg_slot_index) -+ + sizeof(kbase_device_csg_slot_suspending) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ Modules: ++ - mali_kutf_mgm_integration_test.ko + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_id, sizeof(kbase_device_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_csg_slot_suspending, sizeof(kbase_device_csg_slot_suspending)); ++comment "Enable MALI_BIFROST_DEBUG for KUTF modules support" ++ depends on MALI_BIFROST && !MALI_BIFROST_DEBUG && MALI_KUTF +diff --git a/drivers/gpu/arm/bifrost/tests/build.bp b/drivers/gpu/arm/bifrost/tests/build.bp +new file mode 100755 +index 000000000..5581ba934 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/build.bp +@@ -0,0 +1,46 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++bob_defaults { ++ name: "kernel_test_includes", ++ local_include_dirs: [ ++ "include", ++ "./../../", ++ "./../", ++ "./", ++ ], +} + -+void __kbase_tlstream_tl_kbase_device_suspend_csg( -+ struct kbase_tlstream *stream, -+ u32 kbase_device_id, -+ u32 kbase_device_csg_slot_index -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_DEVICE_SUSPEND_CSG; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kbase_device_id) -+ + sizeof(kbase_device_csg_slot_index) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_id, sizeof(kbase_device_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); ++bob_defaults { ++ name: "kernel_test_configs", ++ mali_kutf: { ++ kbuild_options: ["CONFIG_MALI_KUTF=y"], ++ }, ++ unit_test_kernel_modules: { ++ kbuild_options: ["CONFIG_UNIT_TEST_KERNEL_MODULES=y"], ++ }, ++} + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++bob_defaults { ++ name: "kernel_unit_tests", ++ add_to_alias: ["unit_tests"], ++ srcs: [".*_unit_test/"], +} +diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h +new file mode 100644 +index 000000000..3f68efa42 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h +@@ -0,0 +1,109 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+void __kbase_tlstream_tl_kbase_device_csg_idle( -+ struct kbase_tlstream *stream, -+ u32 kbase_device_id, -+ u32 kbase_device_csg_slot_index -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_DEVICE_CSG_IDLE; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kbase_device_id) -+ + sizeof(kbase_device_csg_slot_index) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++#ifndef _KERNEL_UTF_HELPERS_H_ ++#define _KERNEL_UTF_HELPERS_H_ + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/* kutf_helpers.h ++ * Test helper functions for the kernel UTF test infrastructure. ++ * ++ * These functions provide methods for enqueuing/dequeuing lines of text sent ++ * by user space. They are used to implement the transfer of "userdata" from ++ * user space to kernel. ++ */ + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_id, sizeof(kbase_device_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); ++#include ++#include + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/** ++ * kutf_helper_pending_input() - Check any pending lines sent by user space ++ * @context: KUTF context ++ * ++ * Return: true if there are pending lines, otherwise false ++ */ ++bool kutf_helper_pending_input(struct kutf_context *context); + -+void __kbase_tlstream_tl_kbase_new_ctx( -+ struct kbase_tlstream *stream, -+ u32 kernel_ctx_id, -+ u32 kbase_device_id -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_NEW_CTX; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kernel_ctx_id) -+ + sizeof(kbase_device_id) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/** ++ * kutf_helper_input_dequeue() - Dequeue a line sent by user space ++ * @context: KUTF context ++ * @str_size: Pointer to an integer to receive the size of the string ++ * ++ * If no line is available then this function will wait (interruptibly) until ++ * a line is available. ++ * ++ * Return: The line dequeued, ERR_PTR(-EINTR) if interrupted or NULL on end ++ * of data. ++ */ ++char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * kutf_helper_input_enqueue() - Enqueue a line sent by user space ++ * @context: KUTF context ++ * @str: The user space address of the line ++ * @size: The length in bytes of the string ++ * ++ * This function will use copy_from_user to copy the string out of user space. ++ * The string need not be NULL-terminated (@size should not include the NULL ++ * termination). ++ * ++ * As a special case @str==NULL and @size==0 is valid to mark the end of input, ++ * but callers should use kutf_helper_input_enqueue_end_of_data() instead. ++ * ++ * Return: 0 on success, -EFAULT if the line cannot be copied from user space, ++ * -ENOMEM if out of memory. ++ */ ++int kutf_helper_input_enqueue(struct kutf_context *context, ++ const char __user *str, size_t size); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_id, sizeof(kbase_device_id)); ++/** ++ * kutf_helper_input_enqueue_end_of_data() - Signal no more data is to be sent ++ * @context: KUTF context ++ * ++ * After this function has been called, kutf_helper_input_dequeue() will always ++ * return NULL. ++ */ ++void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/** ++ * kutf_helper_ignore_dmesg() - Write message in dmesg to instruct parser ++ * to ignore errors, until the counterpart ++ * is written to dmesg to stop ignoring errors. ++ * @dev: Device pointer to write to dmesg using. ++ * ++ * This function writes "Start ignoring dmesg warnings" to dmesg, which ++ * the parser will read and not log any errors. Only to be used in cases where ++ * we expect an error to be produced in dmesg but that we do not want to be ++ * flagged as an error. ++ */ ++void kutf_helper_ignore_dmesg(struct device *dev); + -+void __kbase_tlstream_tl_kbase_del_ctx( -+ struct kbase_tlstream *stream, -+ u32 kernel_ctx_id -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_DEL_CTX; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kernel_ctx_id) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/** ++ * kutf_helper_stop_ignoring_dmesg() - Write message in dmesg to instruct parser ++ * to stop ignoring errors. ++ * @dev: Device pointer to write to dmesg using. ++ * ++ * This function writes "Stop ignoring dmesg warnings" to dmesg, which ++ * the parser will read and continue to log any errors. Counterpart to ++ * kutf_helper_ignore_dmesg(). ++ */ ++void kutf_helper_stop_ignoring_dmesg(struct device *dev); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++#endif /* _KERNEL_UTF_HELPERS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers_user.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers_user.h +new file mode 100644 +index 000000000..e147cbb90 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers_user.h +@@ -0,0 +1,184 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); ++#ifndef _KERNEL_UTF_HELPERS_USER_H_ ++#define _KERNEL_UTF_HELPERS_USER_H_ + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/* kutf_helpers.h ++ * Test helper functions for the kernel UTF test infrastructure, whose ++ * implementation mirrors that of similar functions for kutf-userside ++ */ + -+void __kbase_tlstream_tl_kbase_ctx_assign_as( -+ struct kbase_tlstream *stream, -+ u32 kernel_ctx_id, -+ u32 kbase_device_as_index -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_CTX_ASSIGN_AS; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kernel_ctx_id) -+ + sizeof(kbase_device_as_index) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++#include ++#include + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kbase_device_as_index, sizeof(kbase_device_as_index)); ++#define KUTF_HELPER_MAX_VAL_NAME_LEN 255 + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++enum kutf_helper_valtype { ++ KUTF_HELPER_VALTYPE_INVALID, ++ KUTF_HELPER_VALTYPE_U64, ++ KUTF_HELPER_VALTYPE_STR, + -+void __kbase_tlstream_tl_kbase_ctx_unassign_as( -+ struct kbase_tlstream *stream, -+ u32 kernel_ctx_id -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_CTX_UNASSIGN_AS; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kernel_ctx_id) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ KUTF_HELPER_VALTYPE_COUNT /* Must be last */ ++}; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++struct kutf_helper_named_val { ++ enum kutf_helper_valtype type; ++ char *val_name; ++ union { ++ u64 val_u64; ++ char *val_str; ++ } u; ++}; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); ++/* Extra error values for certain helpers when we want to distinguish between ++ * Linux's own error values too. ++ * ++ * These can only be used on certain functions returning an int type that are ++ * documented as returning one of these potential values, they cannot be used ++ * from functions return a ptr type, since we can't decode it with PTR_ERR ++ * ++ * No negative values are used - Linux error codes should be used instead, and ++ * indicate a problem in accessing the data file itself (are generally ++ * unrecoverable) ++ * ++ * Positive values indicate correct access but invalid parsing (can be ++ * recovered from assuming data in the future is correct) ++ */ ++enum kutf_helper_err { ++ /* No error - must be zero */ ++ KUTF_HELPER_ERR_NONE = 0, ++ /* Named value parsing encountered an invalid name */ ++ KUTF_HELPER_ERR_INVALID_NAME, ++ /* Named value parsing of string or u64 type encountered extra ++ * characters after the value (after the last digit for a u64 type or ++ * after the string end delimiter for string type) ++ */ ++ KUTF_HELPER_ERR_CHARS_AFTER_VAL, ++ /* Named value parsing of string type couldn't find the string end ++ * delimiter. ++ * ++ * This cannot be encountered when the NAME="value" message exceeds the ++ * textbuf's maximum line length, because such messages are not checked ++ * for an end string delimiter ++ */ ++ KUTF_HELPER_ERR_NO_END_DELIMITER, ++ /* Named value didn't parse as any of the known types */ ++ KUTF_HELPER_ERR_INVALID_VALUE, ++}; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} + -+void __kbase_tlstream_tl_kbase_new_kcpuqueue( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 kcpu_queue_id, -+ u32 kernel_ctx_id, -+ u32 kcpuq_num_pending_cmds -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_NEW_KCPUQUEUE; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(kcpu_queue_id) -+ + sizeof(kernel_ctx_id) -+ + sizeof(kcpuq_num_pending_cmds) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/* Send named NAME=value pair, u64 value ++ * ++ * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long ++ * ++ * Any failure will be logged on the suite's current test fixture ++ * ++ * Returns 0 on success, non-zero on failure ++ */ ++int kutf_helper_send_named_u64(struct kutf_context *context, ++ const char *val_name, u64 val); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/* Get the maximum length of a string that can be represented as a particular ++ * NAME="value" pair without string-value truncation in the kernel's buffer ++ * ++ * Given val_name and the kernel buffer's size, this can be used to determine ++ * the maximum length of a string that can be sent as val_name="value" pair ++ * without having the string value truncated. Any string longer than this will ++ * be truncated at some point during communication to this size. ++ * ++ * It is assumed that val_name is a valid name for ++ * kutf_helper_send_named_str(), and no checking will be made to ++ * ensure this. ++ * ++ * Returns the maximum string length that can be represented, or a negative ++ * value if the NAME="value" encoding itself wouldn't fit in kern_buf_sz ++ */ ++int kutf_helper_max_str_len_for_kern(const char *val_name, int kern_buf_sz); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue_id, sizeof(kcpu_queue_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpuq_num_pending_cmds, sizeof(kcpuq_num_pending_cmds)); ++/* Send named NAME="str" pair ++ * ++ * no escaping allowed in str. Any of the following characters will terminate ++ * the string: '"' '\\' '\n' ++ * ++ * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long ++ * ++ * Any failure will be logged on the suite's current test fixture ++ * ++ * Returns 0 on success, non-zero on failure ++ */ ++int kutf_helper_send_named_str(struct kutf_context *context, ++ const char *val_name, const char *val_str); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/* Receive named NAME=value pair ++ * ++ * This can receive u64 and string values - check named_val->type ++ * ++ * If you are not planning on dynamic handling of the named value's name and ++ * type, then kutf_helper_receive_check_val() is more useful as a ++ * convenience function. ++ * ++ * String members of named_val will come from memory allocated on the fixture's mempool ++ * ++ * Returns 0 on success. Negative value on failure to receive from the 'run' ++ * file, positive value indicates an enum kutf_helper_err value for correct ++ * reception of data but invalid parsing ++ */ ++int kutf_helper_receive_named_val( ++ struct kutf_context *context, ++ struct kutf_helper_named_val *named_val); + -+void __kbase_tlstream_tl_kbase_del_kcpuqueue( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_DEL_KCPUQUEUE; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/* Receive and validate NAME=value pair ++ * ++ * As with kutf_helper_receive_named_val, but validate that the ++ * name and type are as expected, as a convenience for a common pattern found ++ * in tests. ++ * ++ * NOTE: this only returns an error value if there was actually a problem ++ * receiving data. ++ * ++ * NOTE: If the underlying data was received correctly, but: ++ * - isn't of the expected name ++ * - isn't the expected type ++ * - isn't correctly parsed for the type ++ * then the following happens: ++ * - failure result is recorded ++ * - named_val->type will be KUTF_HELPER_VALTYPE_INVALID ++ * - named_val->u will contain some default value that should be relatively ++ * harmless for the test, including being writable in the case of string ++ * values ++ * - return value will be 0 to indicate success ++ * ++ * The rationale behind this is that we'd prefer to continue the rest of the ++ * test with failures propagated, rather than hitting a timeout ++ */ ++int kutf_helper_receive_check_val( ++ struct kutf_helper_named_val *named_val, ++ struct kutf_context *context, ++ const char *expect_val_name, ++ enum kutf_helper_valtype expect_val_type); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/* Output a named value to kmsg */ ++void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++#endif /* _KERNEL_UTF_HELPERS_USER_H_ */ +diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_mem.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_mem.h +new file mode 100644 +index 000000000..5d4d96ef3 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_mem.h +@@ -0,0 +1,72 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_signal( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ const void *fence -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(fence) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++#ifndef _KERNEL_UTF_MEM_H_ ++#define _KERNEL_UTF_MEM_H_ + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/* kutf_mem.h ++ * Functions for management of memory pools in the kernel. ++ * ++ * This module implements a memory pool allocator, allowing a test ++ * implementation to allocate linked allocations which can then be freed by a ++ * single free which releases all of the resources held by the entire pool. ++ * ++ * Note that it is not possible to free single resources within the pool once ++ * allocated. ++ */ + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &fence, sizeof(fence)); ++#include ++#include + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/** ++ * struct kutf_mempool - the memory pool context management structure ++ * @head: list head on which the allocations in this context are added to ++ * @lock: mutex for concurrent allocation from multiple threads ++ * ++ */ ++struct kutf_mempool { ++ struct list_head head; ++ struct mutex lock; ++}; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ const void *fence -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(fence) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/** ++ * kutf_mempool_init() - Initialize a memory pool. ++ * @pool: Memory pool structure to initialize, provided by the user ++ * ++ * Return: zero on success ++ */ ++int kutf_mempool_init(struct kutf_mempool *pool); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * kutf_mempool_alloc() - Allocate memory from a pool ++ * @pool: Memory pool to allocate from ++ * @size: Size of memory wanted in number of bytes ++ * ++ * Return: Pointer to memory on success, NULL on failure. ++ */ ++void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &fence, sizeof(fence)); ++/** ++ * kutf_mempool_destroy() - Destroy a memory pool, freeing all memory within it. ++ * @pool: The memory pool to free ++ */ ++void kutf_mempool_destroy(struct kutf_mempool *pool); ++#endif /* _KERNEL_UTF_MEM_H_ */ +diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_resultset.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_resultset.h +new file mode 100644 +index 000000000..2fb1a47a5 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_resultset.h +@@ -0,0 +1,180 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++#ifndef _KERNEL_UTF_RESULTSET_H_ ++#define _KERNEL_UTF_RESULTSET_H_ + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u64 cqs_obj_gpu_addr, -+ u32 compare_value, -+ u32 inherit_error -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(cqs_obj_gpu_addr) -+ + sizeof(compare_value) -+ + sizeof(inherit_error) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/* kutf_resultset.h ++ * Functions and structures for handling test results and result sets. ++ * ++ * This section of the kernel UTF contains structures and functions used for the ++ * management of Results and Result Sets. ++ */ + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * enum kutf_result_status - Status values for a single Test error. ++ * @KUTF_RESULT_BENCHMARK: Result is a meta-result containing benchmark ++ * results. ++ * @KUTF_RESULT_SKIP: The test was skipped. ++ * @KUTF_RESULT_UNKNOWN: The test has an unknown result. ++ * @KUTF_RESULT_PASS: The test result passed. ++ * @KUTF_RESULT_DEBUG: The test result passed, but raised a debug ++ * message. ++ * @KUTF_RESULT_INFO: The test result passed, but raised ++ * an informative message. ++ * @KUTF_RESULT_WARN: The test result passed, but raised a warning ++ * message. ++ * @KUTF_RESULT_FAIL: The test result failed with a non-fatal error. ++ * @KUTF_RESULT_FATAL: The test result failed with a fatal error. ++ * @KUTF_RESULT_ABORT: The test result failed due to a non-UTF ++ * assertion failure. ++ * @KUTF_RESULT_USERDATA: User data is ready to be read, ++ * this is not seen outside the kernel ++ * @KUTF_RESULT_USERDATA_WAIT: Waiting for user data to be sent, ++ * this is not seen outside the kernel ++ * @KUTF_RESULT_TEST_FINISHED: The test has finished, no more results will ++ * be produced. This is not seen outside kutf ++ */ ++enum kutf_result_status { ++ KUTF_RESULT_BENCHMARK = -3, ++ KUTF_RESULT_SKIP = -2, ++ KUTF_RESULT_UNKNOWN = -1, + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &compare_value, sizeof(compare_value)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &inherit_error, sizeof(inherit_error)); ++ KUTF_RESULT_PASS = 0, ++ KUTF_RESULT_DEBUG = 1, ++ KUTF_RESULT_INFO = 2, ++ KUTF_RESULT_WARN = 3, ++ KUTF_RESULT_FAIL = 4, ++ KUTF_RESULT_FATAL = 5, ++ KUTF_RESULT_ABORT = 6, + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++ KUTF_RESULT_USERDATA = 7, ++ KUTF_RESULT_USERDATA_WAIT = 8, ++ KUTF_RESULT_TEST_FINISHED = 9 ++}; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u64 cqs_obj_gpu_addr -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(cqs_obj_gpu_addr) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/* The maximum size of a kutf_result_status result when ++ * converted to a string ++ */ ++#define KUTF_ERROR_MAX_NAME_SIZE 21 + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++#ifdef __KERNEL__ + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr)); ++#include ++#include + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++struct kutf_context; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u64 cqs_obj_gpu_addr, -+ u64 compare_value, -+ u32 condition, -+ u32 data_type, -+ u32 inherit_error -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(cqs_obj_gpu_addr) -+ + sizeof(compare_value) -+ + sizeof(condition) -+ + sizeof(data_type) -+ + sizeof(inherit_error) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/** ++ * struct kutf_result - Represents a single test result. ++ * @node: Next result in the list of results. ++ * @status: The status summary (pass / warn / fail / etc). ++ * @message: A more verbose status message. ++ */ ++struct kutf_result { ++ struct list_head node; ++ enum kutf_result_status status; ++ const char *message; ++}; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * KUTF_RESULT_SET_WAITING_FOR_INPUT - Test is waiting for user data ++ * ++ * This flag is set within a struct kutf_result_set whenever the test is blocked ++ * waiting for user data. Attempts to dequeue results when this flag is set ++ * will cause a dummy %KUTF_RESULT_USERDATA_WAIT result to be produced. This ++ * is used to output a warning message and end of file. ++ */ ++#define KUTF_RESULT_SET_WAITING_FOR_INPUT 1 + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &compare_value, sizeof(compare_value)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &condition, sizeof(condition)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &data_type, sizeof(data_type)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &inherit_error, sizeof(inherit_error)); ++/** ++ * struct kutf_result_set - Represents a set of results. ++ * @results: List head of a struct kutf_result list for storing the results ++ * @waitq: Wait queue signalled whenever new results are added. ++ * @flags: Flags see %KUTF_RESULT_SET_WAITING_FOR_INPUT ++ */ ++struct kutf_result_set { ++ struct list_head results; ++ wait_queue_head_t waitq; ++ int flags; ++}; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/** ++ * kutf_create_result_set() - Create a new result set ++ * to which results can be added. ++ * ++ * Return: The created result set. ++ */ ++struct kutf_result_set *kutf_create_result_set(void); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u64 cqs_obj_gpu_addr, -+ u64 value, -+ u32 operation, -+ u32 data_type -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(cqs_obj_gpu_addr) -+ + sizeof(value) -+ + sizeof(operation) -+ + sizeof(data_type) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/** ++ * kutf_add_result() - Add a result to the end of an existing result set. ++ * ++ * @context: The kutf context ++ * @status: The result status to add. ++ * @message: The result message to add. ++ * ++ * Return: 0 if the result is successfully added. -ENOMEM if allocation fails. ++ */ ++int kutf_add_result(struct kutf_context *context, ++ enum kutf_result_status status, const char *message); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * kutf_remove_result() - Remove a result from the head of a result set. ++ * @set: The result set. ++ * ++ * This function will block until there is a result to read. The wait is ++ * interruptible, so this function will return with an ERR_PTR if interrupted. ++ * ++ * Return: result or ERR_PTR if interrupted ++ */ ++struct kutf_result *kutf_remove_result( ++ struct kutf_result_set *set); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &value, sizeof(value)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &operation, sizeof(operation)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &data_type, sizeof(data_type)); ++/** ++ * kutf_destroy_result_set() - Free a previously created result set. ++ * ++ * @results: The result set whose resources to free. ++ */ ++void kutf_destroy_result_set(struct kutf_result_set *results); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/** ++ * kutf_set_waiting_for_input() - The test is waiting for userdata ++ * ++ * @set: The result set to update ++ * ++ * Causes the result set to always have results and return a fake ++ * %KUTF_RESULT_USERDATA_WAIT result. ++ */ ++void kutf_set_waiting_for_input(struct kutf_result_set *set); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u64 map_import_buf_gpu_addr -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(map_import_buf_gpu_addr) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/** ++ * kutf_clear_waiting_for_input() - The test is no longer waiting for userdata ++ * ++ * @set: The result set to update ++ * ++ * Cancels the effect of kutf_set_waiting_for_input() ++ */ ++void kutf_clear_waiting_for_input(struct kutf_result_set *set); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++#endif /* __KERNEL__ */ + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); ++#endif /* _KERNEL_UTF_RESULTSET_H_ */ +diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_suite.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_suite.h +new file mode 100644 +index 000000000..9e459c556 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_suite.h +@@ -0,0 +1,571 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2014, 2017, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++#ifndef _KERNEL_UTF_SUITE_H_ ++#define _KERNEL_UTF_SUITE_H_ + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u64 map_import_buf_gpu_addr -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(map_import_buf_gpu_addr) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/* kutf_suite.h ++ * Functions for management of test suites. ++ * ++ * This collection of data structures, macros, and functions are used to ++ * create Test Suites, Tests within those Test Suites, and Fixture variants ++ * of each test. ++ */ + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++#include ++#include ++#include + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); ++#include ++#include + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/* Arbitrary maximum size to prevent user space allocating too much kernel ++ * memory ++ */ ++#define KUTF_MAX_LINE_LENGTH (1024u) + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u64 map_import_buf_gpu_addr -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(map_import_buf_gpu_addr) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/** ++ * KUTF_F_TEST_NONE - Pseudo-flag indicating an absence of any specified test class. ++ * Note that tests should not be annotated with this constant as it is simply a zero ++ * value; tests without a more specific class must be marked with the flag ++ * KUTF_F_TEST_GENERIC. ++ */ ++#define KUTF_F_TEST_NONE ((unsigned int)(0)) + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * KUTF_F_TEST_SMOKETEST - Class indicating this test is a smoke test. ++ * A given set of smoke tests should be quick to run, enabling rapid turn-around ++ * of "regress-on-commit" test runs. ++ */ ++#define KUTF_F_TEST_SMOKETEST ((unsigned int)(1 << 1)) + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); ++/** ++ * KUTF_F_TEST_PERFORMANCE - Class indicating this test is a performance test. ++ * These tests typically produce a performance metric, such as "time to run" or ++ * "frames per second", ++ */ ++#define KUTF_F_TEST_PERFORMANCE ((unsigned int)(1 << 2)) + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/** ++ * KUTF_F_TEST_DEPRECATED - Class indicating that this test is a deprecated test. ++ * These tests have typically been replaced by an alternative test which is ++ * more efficient, or has better coverage. ++ */ ++#define KUTF_F_TEST_DEPRECATED ((unsigned int)(1 << 3)) + -+void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/** ++ * KUTF_F_TEST_EXPECTED_FAILURE - Class indicating that this test is a known failure. ++ * These tests have typically been run and failed, but marking them as a known ++ * failure means it is easier to triage results. ++ * ++ * It is typically more convenient to triage known failures using the ++ * results database and web UI, as this means there is no need to modify the ++ * test code. ++ */ ++#define KUTF_F_TEST_EXPECTED_FAILURE ((unsigned int)(1 << 4)) + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * KUTF_F_TEST_GENERIC - Class indicating that this test is a generic test, ++ * which is not a member of a more specific test class. ++ * Tests which are not created with a specific set ++ * of filter flags by the user are assigned this test class by default. ++ */ ++#define KUTF_F_TEST_GENERIC ((unsigned int)(1 << 5)) + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++/** ++ * KUTF_F_TEST_RESFAIL - Class indicating this test is a resource allocation failure test. ++ * A resource allocation failure test will test that an error code is ++ * correctly propagated when an allocation fails. ++ */ ++#define KUTF_F_TEST_RESFAIL ((unsigned int)(1 << 6)) + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/** ++ * KUTF_F_TEST_EXPECTED_FAILURE_RF - Additional flag indicating that this test ++ * is an expected failure when run in resource failure mode. ++ * These tests are never run when running the low resource mode. ++ */ ++#define KUTF_F_TEST_EXPECTED_FAILURE_RF ((unsigned int)(1 << 7)) + -+void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_alloc( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u64 jit_alloc_gpu_alloc_addr_dest, -+ u64 jit_alloc_va_pages, -+ u64 jit_alloc_commit_pages, -+ u64 jit_alloc_extent, -+ u32 jit_alloc_jit_id, -+ u32 jit_alloc_bin_id, -+ u32 jit_alloc_max_allocations, -+ u32 jit_alloc_flags, -+ u32 jit_alloc_usage_id -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(jit_alloc_gpu_alloc_addr_dest) -+ + sizeof(jit_alloc_va_pages) -+ + sizeof(jit_alloc_commit_pages) -+ + sizeof(jit_alloc_extent) -+ + sizeof(jit_alloc_jit_id) -+ + sizeof(jit_alloc_bin_id) -+ + sizeof(jit_alloc_max_allocations) -+ + sizeof(jit_alloc_flags) -+ + sizeof(jit_alloc_usage_id) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/** ++ * KUTF_F_TEST_USER_0 - Flag reserved for user-defined filter zero. ++ */ ++#define KUTF_F_TEST_USER_0 ((unsigned int)(1 << 24)) + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * KUTF_F_TEST_USER_1 - Flag reserved for user-defined filter one. ++ */ ++#define KUTF_F_TEST_USER_1 ((unsigned int)(1 << 25)) + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &jit_alloc_gpu_alloc_addr_dest, sizeof(jit_alloc_gpu_alloc_addr_dest)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &jit_alloc_va_pages, sizeof(jit_alloc_va_pages)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &jit_alloc_commit_pages, sizeof(jit_alloc_commit_pages)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &jit_alloc_extent, sizeof(jit_alloc_extent)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &jit_alloc_jit_id, sizeof(jit_alloc_jit_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &jit_alloc_bin_id, sizeof(jit_alloc_bin_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &jit_alloc_max_allocations, sizeof(jit_alloc_max_allocations)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &jit_alloc_flags, sizeof(jit_alloc_flags)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &jit_alloc_usage_id, sizeof(jit_alloc_usage_id)); ++/** ++ * KUTF_F_TEST_USER_2 - Flag reserved for user-defined filter two. ++ */ ++#define KUTF_F_TEST_USER_2 ((unsigned int)(1 << 26)) + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/** ++ * KUTF_F_TEST_USER_3 - Flag reserved for user-defined filter three. ++ */ ++#define KUTF_F_TEST_USER_3 ((unsigned int)(1 << 27)) + -+void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_alloc( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/** ++ * KUTF_F_TEST_USER_4 - Flag reserved for user-defined filter four. ++ */ ++#define KUTF_F_TEST_USER_4 ((unsigned int)(1 << 28)) + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * KUTF_F_TEST_USER_5 - Flag reserved for user-defined filter five. ++ */ ++#define KUTF_F_TEST_USER_5 ((unsigned int)(1 << 29)) + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++/** ++ * KUTF_F_TEST_USER_6 - Flag reserved for user-defined filter six. ++ */ ++#define KUTF_F_TEST_USER_6 ((unsigned int)(1 << 30)) + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/** ++ * KUTF_F_TEST_USER_7 - Flag reserved for user-defined filter seven. ++ */ ++#define KUTF_F_TEST_USER_7 ((unsigned int)(1 << 31)) + -+void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_free( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/** ++ * KUTF_F_TEST_ALL - Pseudo-flag indicating that all test classes should be executed. ++ */ ++#define KUTF_F_TEST_ALL ((unsigned int)(0xFFFFFFFFU)) + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * union kutf_callback_data - Union used to store test callback data ++ * @ptr_value: pointer to the location where test callback data ++ * are stored ++ * @u32_value: a number which represents test callback data ++ */ ++union kutf_callback_data { ++ void *ptr_value; ++ u32 u32_value; ++}; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++/** ++ * struct kutf_userdata_line - A line of user data to be returned to the user ++ * @node: struct list_head to link this into a list ++ * @str: The line of user data to return to user space ++ * @size: The number of bytes within @str ++ */ ++struct kutf_userdata_line { ++ struct list_head node; ++ char *str; ++ size_t size; ++}; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/** ++ * KUTF_USERDATA_WARNING_OUTPUT - Flag specifying that a warning has been output ++ * ++ * If user space reads the "run" file while the test is waiting for user data, ++ * then the framework will output a warning message and set this flag within ++ * struct kutf_userdata. A subsequent read will then simply return an end of ++ * file condition rather than outputting the warning again. The upshot of this ++ * is that simply running 'cat' on a test which requires user data will produce ++ * the warning followed by 'cat' exiting due to EOF - which is much more user ++ * friendly than blocking indefinitely waiting for user data. ++ */ ++#define KUTF_USERDATA_WARNING_OUTPUT 1 + -+void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_free( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 jit_alloc_jit_id -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(jit_alloc_jit_id) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/** ++ * struct kutf_userdata - Structure holding user data ++ * @flags: See %KUTF_USERDATA_WARNING_OUTPUT ++ * @input_head: List of struct kutf_userdata_line containing user data ++ * to be read by the kernel space test. ++ * @input_waitq: Wait queue signalled when there is new user data to be ++ * read by the kernel space test. ++ */ ++struct kutf_userdata { ++ unsigned long flags; ++ struct list_head input_head; ++ wait_queue_head_t input_waitq; ++}; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * struct kutf_context - Structure representing a kernel test context ++ * @kref: Refcount for number of users of this context ++ * @suite: Convenience pointer to the suite this context ++ * is running ++ * @test_fix: The fixture that is being run in this context ++ * @fixture_pool: The memory pool used for the duration of ++ * the fixture/text context. ++ * @fixture: The user provided fixture structure. ++ * @fixture_index: The index (id) of the current fixture. ++ * @fixture_name: The name of the current fixture (or NULL if unnamed). ++ * @test_data: Any user private data associated with this test ++ * @result_set: All the results logged by this test context ++ * @status: The status of the currently running fixture. ++ * @expected_status: The expected status on exist of the currently ++ * running fixture. ++ * @work: Work item to enqueue onto the work queue to run the test ++ * @userdata: Structure containing the user data for the test to read ++ */ ++struct kutf_context { ++ struct kref kref; ++ struct kutf_suite *suite; ++ struct kutf_test_fixture *test_fix; ++ struct kutf_mempool fixture_pool; ++ void *fixture; ++ unsigned int fixture_index; ++ const char *fixture_name; ++ union kutf_callback_data test_data; ++ struct kutf_result_set *result_set; ++ enum kutf_result_status status; ++ enum kutf_result_status expected_status; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &jit_alloc_jit_id, sizeof(jit_alloc_jit_id)); ++ struct work_struct work; ++ struct kutf_userdata userdata; ++}; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/** ++ * struct kutf_suite - Structure representing a kernel test suite ++ * @app: The application this suite belongs to. ++ * @name: The name of this suite. ++ * @suite_data: Any user private data associated with this ++ * suite. ++ * @create_fixture: Function used to create a new fixture instance ++ * @remove_fixture: Function used to destroy a new fixture instance ++ * @fixture_variants: The number of variants (must be at least 1). ++ * @suite_default_flags: Suite global filter flags which are set on ++ * all tests. ++ * @node: List node for suite_list ++ * @dir: The debugfs directory for this suite ++ * @test_list: List head to store all the tests which are ++ * part of this suite ++ */ ++struct kutf_suite { ++ struct kutf_application *app; ++ const char *name; ++ union kutf_callback_data suite_data; ++ void *(*create_fixture)(struct kutf_context *context); ++ void (*remove_fixture)(struct kutf_context *context); ++ unsigned int fixture_variants; ++ unsigned int suite_default_flags; ++ struct list_head node; ++ struct dentry *dir; ++ struct list_head test_list; ++}; + -+void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/** =========================================================================== ++ * Application functions ++ * ============================================================================ ++ */ + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * kutf_create_application() - Create an in kernel test application. ++ * @name: The name of the test application. ++ * ++ * Return: pointer to the kutf_application on success or NULL ++ * on failure ++ */ ++struct kutf_application *kutf_create_application(const char *name); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++/** ++ * kutf_destroy_application() - Destroy an in kernel test application. ++ * ++ * @app: The test application to destroy. ++ */ ++void kutf_destroy_application(struct kutf_application *app); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/**============================================================================ ++ * Suite functions ++ * ============================================================================ ++ */ + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/** ++ * kutf_create_suite() - Create a kernel test suite. ++ * @app: The test application to create the suite in. ++ * @name: The name of the suite. ++ * @fixture_count: The number of fixtures to run over the test ++ * functions in this suite ++ * @create_fixture: Callback used to create a fixture. The returned value ++ * is stored in the fixture pointer in the context for ++ * use in the test functions. ++ * @remove_fixture: Callback used to remove a previously created fixture. ++ * ++ * Suite names must be unique. Should two suites with the same name be ++ * registered with the same application then this function will fail, if they ++ * are registered with different applications then the function will not detect ++ * this and the call will succeed. ++ * ++ * Return: pointer to the created kutf_suite on success or NULL ++ * on failure ++ */ ++struct kutf_suite *kutf_create_suite( ++ struct kutf_application *app, ++ const char *name, ++ unsigned int fixture_count, ++ void *(*create_fixture)(struct kutf_context *context), ++ void (*remove_fixture)(struct kutf_context *context)); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * kutf_create_suite_with_filters() - Create a kernel test suite with user ++ * defined default filters. ++ * @app: The test application to create the suite in. ++ * @name: The name of the suite. ++ * @fixture_count: The number of fixtures to run over the test ++ * functions in this suite ++ * @create_fixture: Callback used to create a fixture. The returned value ++ * is stored in the fixture pointer in the context for ++ * use in the test functions. ++ * @remove_fixture: Callback used to remove a previously created fixture. ++ * @filters: Filters to apply to a test if it doesn't provide its own ++ * ++ * Suite names must be unique. Should two suites with the same name be ++ * registered with the same application then this function will fail, if they ++ * are registered with different applications then the function will not detect ++ * this and the call will succeed. ++ * ++ * Return: pointer to the created kutf_suite on success or NULL on failure ++ */ ++struct kutf_suite *kutf_create_suite_with_filters( ++ struct kutf_application *app, ++ const char *name, ++ unsigned int fixture_count, ++ void *(*create_fixture)(struct kutf_context *context), ++ void (*remove_fixture)(struct kutf_context *context), ++ unsigned int filters); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++/** ++ * kutf_create_suite_with_filters_and_data() - Create a kernel test suite with ++ * user defined default filters. ++ * @app: The test application to create the suite in. ++ * @name: The name of the suite. ++ * @fixture_count: The number of fixtures to run over the test ++ * functions in this suite ++ * @create_fixture: Callback used to create a fixture. The returned value ++ * is stored in the fixture pointer in the context for ++ * use in the test functions. ++ * @remove_fixture: Callback used to remove a previously created fixture. ++ * @filters: Filters to apply to a test if it doesn't provide its own ++ * @suite_data: Suite specific callback data, provided during the ++ * running of the test in the kutf_context ++ * ++ * Return: pointer to the created kutf_suite on success or NULL ++ * on failure ++ */ ++struct kutf_suite *kutf_create_suite_with_filters_and_data( ++ struct kutf_application *app, ++ const char *name, ++ unsigned int fixture_count, ++ void *(*create_fixture)(struct kutf_context *context), ++ void (*remove_fixture)(struct kutf_context *context), ++ unsigned int filters, ++ union kutf_callback_data suite_data); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/** ++ * kutf_add_test() - Add a test to a kernel test suite. ++ * @suite: The suite to add the test to. ++ * @id: The ID of the test. ++ * @name: The name of the test. ++ * @execute: Callback to the test function to run. ++ * ++ * Note: As no filters are provided the test will use the suite filters instead ++ */ ++void kutf_add_test(struct kutf_suite *suite, ++ unsigned int id, ++ const char *name, ++ void (*execute)(struct kutf_context *context)); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ const void *group_suspend_buf, -+ u32 gpu_cmdq_grp_handle -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(group_suspend_buf) -+ + sizeof(gpu_cmdq_grp_handle) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/** ++ * kutf_add_test_with_filters() - Add a test to a kernel test suite with filters ++ * @suite: The suite to add the test to. ++ * @id: The ID of the test. ++ * @name: The name of the test. ++ * @execute: Callback to the test function to run. ++ * @filters: A set of filtering flags, assigning test categories. ++ */ ++void kutf_add_test_with_filters(struct kutf_suite *suite, ++ unsigned int id, ++ const char *name, ++ void (*execute)(struct kutf_context *context), ++ unsigned int filters); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * kutf_add_test_with_filters_and_data() - Add a test to a kernel test suite ++ * with filters. ++ * @suite: The suite to add the test to. ++ * @id: The ID of the test. ++ * @name: The name of the test. ++ * @execute: Callback to the test function to run. ++ * @filters: A set of filtering flags, assigning test categories. ++ * @test_data: Test specific callback data, provided during the ++ * running of the test in the kutf_context ++ */ ++void kutf_add_test_with_filters_and_data( ++ struct kutf_suite *suite, ++ unsigned int id, ++ const char *name, ++ void (*execute)(struct kutf_context *context), ++ unsigned int filters, ++ union kutf_callback_data test_data); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &group_suspend_buf, sizeof(group_suspend_buf)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &gpu_cmdq_grp_handle, sizeof(gpu_cmdq_grp_handle)); ++/** =========================================================================== ++ * Test functions ++ * ============================================================================ ++ */ ++/** ++ * kutf_test_log_result_external() - Log a result which has been created ++ * externally into a in a standard form ++ * recognized by the log parser. ++ * @context: The test context the test is running in ++ * @message: The message for this result ++ * @new_status: The result status of this log message ++ */ ++void kutf_test_log_result_external( ++ struct kutf_context *context, ++ const char *message, ++ enum kutf_result_status new_status); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/** ++ * kutf_test_expect_abort() - Tell the kernel that you expect the current ++ * fixture to produce an abort. ++ * @context: The test context this test is running in. ++ */ ++void kutf_test_expect_abort(struct kutf_context *context); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/** ++ * kutf_test_expect_fatal() - Tell the kernel that you expect the current ++ * fixture to produce a fatal error. ++ * @context: The test context this test is running in. ++ */ ++void kutf_test_expect_fatal(struct kutf_context *context); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * kutf_test_expect_fail() - Tell the kernel that you expect the current ++ * fixture to fail. ++ * @context: The test context this test is running in. ++ */ ++void kutf_test_expect_fail(struct kutf_context *context); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++/** ++ * kutf_test_expect_warn() - Tell the kernel that you expect the current ++ * fixture to produce a warning. ++ * @context: The test context this test is running in. ++ */ ++void kutf_test_expect_warn(struct kutf_context *context); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/** ++ * kutf_test_expect_pass() - Tell the kernel that you expect the current ++ * fixture to pass. ++ * @context: The test context this test is running in. ++ */ ++void kutf_test_expect_pass(struct kutf_context *context); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(execute_error) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/** ++ * kutf_test_skip() - Tell the kernel that the test should be skipped. ++ * @context: The test context this test is running in. ++ */ ++void kutf_test_skip(struct kutf_context *context); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * kutf_test_skip_msg() - Tell the kernel that this test has been skipped, ++ * supplying a reason string. ++ * @context: The test context this test is running in. ++ * @message: A message string containing the reason for the skip. ++ * ++ * Note: The message must not be freed during the lifetime of the test run. ++ * This means it should either be a prebaked string, or if a dynamic string ++ * is required it must be created with kutf_dsprintf which will store ++ * the resultant string in a buffer who's lifetime is the same as the test run. ++ */ ++void kutf_test_skip_msg(struct kutf_context *context, const char *message); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &execute_error, sizeof(execute_error)); ++/** ++ * kutf_test_pass() - Tell the kernel that this test has passed. ++ * @context: The test context this test is running in. ++ * @message: A message string containing the reason for the pass. ++ * ++ * Note: The message must not be freed during the lifetime of the test run. ++ * This means it should either be a pre-baked string, or if a dynamic string ++ * is required it must be created with kutf_dsprintf which will store ++ * the resultant string in a buffer who's lifetime is the same as the test run. ++ */ ++void kutf_test_pass(struct kutf_context *context, char const *message); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/** ++ * kutf_test_debug() - Send a debug message ++ * @context: The test context this test is running in. ++ * @message: A message string containing the debug information. ++ * ++ * Note: The message must not be freed during the lifetime of the test run. ++ * This means it should either be a pre-baked string, or if a dynamic string ++ * is required it must be created with kutf_dsprintf which will store ++ * the resultant string in a buffer who's lifetime is the same as the test run. ++ */ ++void kutf_test_debug(struct kutf_context *context, char const *message); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/** ++ * kutf_test_info() - Send an information message ++ * @context: The test context this test is running in. ++ * @message: A message string containing the information message. ++ * ++ * Note: The message must not be freed during the lifetime of the test run. ++ * This means it should either be a pre-baked string, or if a dynamic string ++ * is required it must be created with kutf_dsprintf which will store ++ * the resultant string in a buffer who's lifetime is the same as the test run. ++ */ ++void kutf_test_info(struct kutf_context *context, char const *message); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * kutf_test_warn() - Send a warning message ++ * @context: The test context this test is running in. ++ * @message: A message string containing the warning message. ++ * ++ * Note: The message must not be freed during the lifetime of the test run. ++ * This means it should either be a pre-baked string, or if a dynamic string ++ * is required it must be created with kutf_dsprintf which will store ++ * the resultant string in a buffer who's lifetime is the same as the test run. ++ */ ++void kutf_test_warn(struct kutf_context *context, char const *message); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++/** ++ * kutf_test_fail() - Tell the kernel that a test has failed ++ * @context: The test context this test is running in. ++ * @message: A message string containing the failure message. ++ * ++ * Note: The message must not be freed during the lifetime of the test run. ++ * This means it should either be a pre-baked string, or if a dynamic string ++ * is required it must be created with kutf_dsprintf which will store ++ * the resultant string in a buffer who's lifetime is the same as the test run. ++ */ ++void kutf_test_fail(struct kutf_context *context, char const *message); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/** ++ * kutf_test_fatal() - Tell the kernel that a test has triggered a fatal error ++ * @context: The test context this test is running in. ++ * @message: A message string containing the fatal error message. ++ * ++ * Note: The message must not be freed during the lifetime of the test run. ++ * This means it should either be a pre-baked string, or if a dynamic string ++ * is required it must be created with kutf_dsprintf which will store ++ * the resultant string in a buffer who's lifetime is the same as the test run. ++ */ ++void kutf_test_fatal(struct kutf_context *context, char const *message); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(execute_error) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/** ++ * kutf_test_abort() - Tell the kernel that a test triggered an abort in the test ++ * ++ * @context: The test context this test is running in. ++ */ ++void kutf_test_abort(struct kutf_context *context); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++#endif /* _KERNEL_UTF_SUITE_H_ */ +diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_utils.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_utils.h +new file mode 100644 +index 000000000..f6e758b80 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_utils.h +@@ -0,0 +1,60 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2014, 2017, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &execute_error, sizeof(execute_error)); ++#ifndef _KERNEL_UTF_UTILS_H_ ++#define _KERNEL_UTF_UTILS_H_ + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/* kutf_utils.h ++ * Utilities for the kernel UTF test infrastructure. ++ * ++ * This collection of library functions are provided for use by kernel UTF ++ * and users of kernel UTF which don't directly fit within the other ++ * code modules. ++ */ + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++#include + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * KUTF_MAX_DSPRINTF_LEN - Maximum size of the message strings within ++ * kernel UTF, messages longer then this will be truncated. ++ */ ++#define KUTF_MAX_DSPRINTF_LEN 1024 + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++/** ++ * kutf_dsprintf() - dynamic sprintf ++ * @pool: memory pool to allocate from ++ * @fmt: The format string describing the string to document. ++ * @... The parameters to feed in to the format string. ++ * ++ * This function implements sprintf which dynamically allocates memory to store ++ * the string. The library will free the memory containing the string when the ++ * result set is cleared or destroyed. ++ * ++ * Note The returned string may be truncated to fit an internal temporary ++ * buffer, which is KUTF_MAX_DSPRINTF_LEN bytes in length. ++ * ++ * Return: Returns pointer to allocated string, or NULL on error. ++ */ ++const char *kutf_dsprintf(struct kutf_mempool *pool, ++ const char *fmt, ...) __printf(2, 3); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(execute_error) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++#endif /* _KERNEL_UTF_UTILS_H_ */ +diff --git a/drivers/gpu/arm/bifrost/tests/kutf/Kbuild b/drivers/gpu/arm/bifrost/tests/kutf/Kbuild +new file mode 100755 +index 000000000..c4790bc66 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/kutf/Kbuild +@@ -0,0 +1,31 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ifeq ($(CONFIG_MALI_KUTF),y) ++obj-m += kutf.o + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &execute_error, sizeof(execute_error)); ++kutf-y := \ ++ kutf_mem.o \ ++ kutf_resultset.o \ ++ kutf_suite.o \ ++ kutf_utils.o \ ++ kutf_helpers.o \ ++ kutf_helpers_user.o ++endif +diff --git a/drivers/gpu/arm/bifrost/tests/kutf/build.bp b/drivers/gpu/arm/bifrost/tests/kutf/build.bp +new file mode 100755 +index 000000000..89edae9c5 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/kutf/build.bp +@@ -0,0 +1,42 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++bob_kernel_module { ++ name: "kutf", ++ defaults: [ ++ "mali_kbase_shared_config_defaults", ++ "kernel_test_configs", ++ "kernel_test_includes", ++ ], ++ srcs: [ ++ "Kbuild", ++ "kutf_helpers.c", ++ "kutf_helpers_user.c", ++ "kutf_mem.c", ++ "kutf_resultset.c", ++ "kutf_suite.c", ++ "kutf_utils.c", ++ ], ++ enabled: false, ++ mali_kutf: { ++ enabled: true, ++ }, +} +diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c +new file mode 100644 +index 000000000..42736195e +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c +@@ -0,0 +1,141 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+) ++/* Kernel UTF test helpers */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static DEFINE_SPINLOCK(kutf_input_lock); ++ ++bool kutf_helper_pending_input(struct kutf_context *context) +{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(execute_error) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ bool input_pending; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ spin_lock(&kutf_input_lock); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &execute_error, sizeof(execute_error)); ++ input_pending = !list_empty(&context->userdata.input_head); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ spin_unlock(&kutf_input_lock); ++ ++ return input_pending; +} ++EXPORT_SYMBOL(kutf_helper_pending_input); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) ++char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size) +{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ struct kutf_userdata_line *line; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ spin_lock(&kutf_input_lock); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++ while (list_empty(&context->userdata.input_head)) { ++ int err; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(execute_error) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ kutf_set_waiting_for_input(context->result_set); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ spin_unlock(&kutf_input_lock); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &execute_error, sizeof(execute_error)); ++ err = wait_event_interruptible(context->userdata.input_waitq, ++ kutf_helper_pending_input(context)); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++ if (err) ++ return ERR_PTR(-EINTR); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(execute_error) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ spin_lock(&kutf_input_lock); ++ } + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ line = list_first_entry(&context->userdata.input_head, ++ struct kutf_userdata_line, node); ++ if (line->str) { ++ /* ++ * Unless it is the end-of-input marker, ++ * remove it from the list ++ */ ++ list_del(&line->node); ++ } + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &execute_error, sizeof(execute_error)); ++ spin_unlock(&kutf_input_lock); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ if (str_size) ++ *str_size = line->size; ++ return line->str; +} + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) ++int kutf_helper_input_enqueue(struct kutf_context *context, ++ const char __user *str, size_t size) +{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ struct kutf_userdata_line *line; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ line = kutf_mempool_alloc(&context->fixture_pool, ++ sizeof(*line) + size + 1); ++ if (!line) ++ return -ENOMEM; ++ if (str) { ++ unsigned long bytes_not_copied; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ line->size = size; ++ line->str = (void *)(line + 1); ++ bytes_not_copied = copy_from_user(line->str, str, size); ++ if (bytes_not_copied != 0) ++ return -EFAULT; ++ /* Zero terminate the string */ ++ line->str[size] = '\0'; ++ } else { ++ /* This is used to mark the end of input */ ++ WARN_ON(size); ++ line->size = 0; ++ line->str = NULL; ++ } + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++ spin_lock(&kutf_input_lock); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(execute_error) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ list_add_tail(&line->node, &context->userdata.input_head); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ kutf_clear_waiting_for_input(context->result_set); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &execute_error, sizeof(execute_error)); ++ spin_unlock(&kutf_input_lock); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ wake_up(&context->userdata.input_waitq); ++ ++ return 0; +} + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) ++void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context) +{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ kutf_helper_input_enqueue(context, NULL, 0); ++} + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++void kutf_helper_ignore_dmesg(struct device *dev) ++{ ++ dev_info(dev, "KUTF: Start ignoring dmesg warnings\n"); +} ++EXPORT_SYMBOL(kutf_helper_ignore_dmesg); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+) ++void kutf_helper_stop_ignoring_dmesg(struct device *dev) +{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(execute_error) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ dev_info(dev, "KUTF: Stop ignoring dmesg warnings\n"); ++} ++EXPORT_SYMBOL(kutf_helper_stop_ignoring_dmesg); +diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers_user.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers_user.c +new file mode 100644 +index 000000000..c4e294325 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers_user.c +@@ -0,0 +1,474 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/* Kernel UTF test helpers that mirror those for kutf-userside */ ++#include ++#include ++#include + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &execute_error, sizeof(execute_error)); ++#include ++#include ++#include + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++static const char *const valtype_names[] = { ++ "INVALID", ++ "U64", ++ "STR", ++}; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) ++static const char *get_val_type_name(enum kutf_helper_valtype valtype) +{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ /* enums can be signed or unsigned (implementation dependant), so ++ * enforce it to prevent: ++ * a) "<0 comparison on unsigned type" warning - if we did both upper ++ * and lower bound check ++ * b) incorrect range checking if it was a signed type - if we did ++ * upper bound check only ++ */ ++ unsigned int type_idx = (unsigned int)valtype; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ if (type_idx >= (unsigned int)KUTF_HELPER_VALTYPE_COUNT) ++ type_idx = (unsigned int)KUTF_HELPER_VALTYPE_INVALID; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return valtype_names[type_idx]; +} + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+) ++/* Check up to str_len chars of val_str to see if it's a valid value name: ++ * ++ * - Has between 1 and KUTF_HELPER_MAX_VAL_NAME_LEN characters before the \0 terminator ++ * - And, each char is in the character set [A-Z0-9_] ++ */ ++static int validate_val_name(const char *val_str, int str_len) +{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(execute_error) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ int i = 0; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ for (i = 0; str_len && i <= KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0'; ++i, --str_len) { ++ char val_chr = val_str[i]; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &execute_error, sizeof(execute_error)); ++ if (val_chr >= 'A' && val_chr <= 'Z') ++ continue; ++ if (val_chr >= '0' && val_chr <= '9') ++ continue; ++ if (val_chr == '_') ++ continue; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ /* Character not in the set [A-Z0-9_] - report error */ ++ return 1; ++ } ++ ++ /* Names of 0 length are not valid */ ++ if (i == 0) ++ return 1; ++ /* Length greater than KUTF_HELPER_MAX_VAL_NAME_LEN not allowed */ ++ if (i > KUTF_HELPER_MAX_VAL_NAME_LEN || (i == KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0')) ++ return 1; ++ ++ return 0; +} + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) ++/* Find the length of the valid part of the string when it will be in quotes ++ * e.g. "str" ++ * ++ * That is, before any '\\', '\n' or '"' characters. This is so we don't have ++ * to escape the string ++ */ ++static int find_quoted_string_valid_len(const char *str) +{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ char *ptr; ++ const char *check_chars = "\\\n\""; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ ptr = strpbrk(str, check_chars); ++ if (ptr) ++ return (int)(ptr-str); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return (int)strlen(str); +} + -+void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) ++static int kutf_helper_userdata_enqueue(struct kutf_context *context, ++ const char *str) +{ -+ const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ char *str_copy; ++ size_t len; ++ int err; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ len = strlen(str)+1; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ str_copy = kutf_mempool_alloc(&context->fixture_pool, len); ++ if (!str_copy) ++ return -ENOMEM; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ strcpy(str_copy, str); ++ ++ err = kutf_add_result(context, KUTF_RESULT_USERDATA, str_copy); ++ ++ return err; +} + -+void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error, -+ u64 jit_alloc_gpu_alloc_addr, -+ u64 jit_alloc_mmu_flags -+) ++#define MAX_U64_HEX_LEN 16 ++/* (Name size) + ("=0x" size) + (64-bit hex value size) + (terminator) */ ++#define NAMED_U64_VAL_BUF_SZ (KUTF_HELPER_MAX_VAL_NAME_LEN + 3 + MAX_U64_HEX_LEN + 1) ++ ++int kutf_helper_send_named_u64(struct kutf_context *context, ++ const char *val_name, u64 val) +{ -+ const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(execute_error) -+ + sizeof(jit_alloc_gpu_alloc_addr) -+ + sizeof(jit_alloc_mmu_flags) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ int ret = 1; ++ char msgbuf[NAMED_U64_VAL_BUF_SZ]; ++ const char *errmsg = NULL; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) { ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Failed to send u64 value named '%s': Invalid value name", val_name); ++ goto out_err; ++ } + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &execute_error, sizeof(execute_error)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &jit_alloc_gpu_alloc_addr, sizeof(jit_alloc_gpu_alloc_addr)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &jit_alloc_mmu_flags, sizeof(jit_alloc_mmu_flags)); ++ ret = snprintf(msgbuf, NAMED_U64_VAL_BUF_SZ, "%s=0x%llx", val_name, val); ++ if (ret >= NAMED_U64_VAL_BUF_SZ || ret < 0) { ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Failed to send u64 value named '%s': snprintf() problem buffer size==%d ret=%d", ++ val_name, NAMED_U64_VAL_BUF_SZ, ret); ++ goto out_err; ++ } + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ ret = kutf_helper_userdata_enqueue(context, msgbuf); ++ if (ret) { ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Failed to send u64 value named '%s': send returned %d", ++ val_name, ret); ++ goto out_err; ++ } ++ ++ return ret; ++out_err: ++ kutf_test_fail(context, errmsg); ++ return ret; +} ++EXPORT_SYMBOL(kutf_helper_send_named_u64); + -+void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++#define NAMED_VALUE_SEP "=" ++#define NAMED_STR_START_DELIM NAMED_VALUE_SEP "\"" ++#define NAMED_STR_END_DELIM "\"" + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++int kutf_helper_max_str_len_for_kern(const char *val_name, ++ int kern_buf_sz) ++{ ++ const int val_name_len = strlen(val_name); ++ const int start_delim_len = strlen(NAMED_STR_START_DELIM); ++ const int end_delim_len = strlen(NAMED_STR_END_DELIM); ++ int max_msg_len = kern_buf_sz; ++ int max_str_len; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ max_str_len = max_msg_len - val_name_len - start_delim_len - ++ end_delim_len; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return max_str_len; +} ++EXPORT_SYMBOL(kutf_helper_max_str_len_for_kern); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_free_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) ++int kutf_helper_send_named_str(struct kutf_context *context, ++ const char *val_name, ++ const char *val_str) +{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ int val_str_len; ++ int str_buf_sz; ++ char *str_buf = NULL; ++ int ret = 1; ++ char *copy_ptr; ++ int val_name_len; ++ int start_delim_len = strlen(NAMED_STR_START_DELIM); ++ int end_delim_len = strlen(NAMED_STR_END_DELIM); ++ const char *errmsg = NULL; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) { ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Failed to send u64 value named '%s': Invalid value name", val_name); ++ goto out_err; ++ } ++ val_name_len = strlen(val_name); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++ val_str_len = find_quoted_string_valid_len(val_str); + -+void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ /* (name length) + ("=\"" length) + (val_str len) + ("\"" length) + terminator */ ++ str_buf_sz = val_name_len + start_delim_len + val_str_len + end_delim_len + 1; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ /* Using kmalloc() here instead of mempool since we know we need to free ++ * before we return ++ */ ++ str_buf = kmalloc(str_buf_sz, GFP_KERNEL); ++ if (!str_buf) { ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Failed to send str value named '%s': kmalloc failed, str_buf_sz=%d", ++ val_name, str_buf_sz); ++ goto out_err; ++ } ++ copy_ptr = str_buf; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ /* Manually copy each string component instead of snprintf because ++ * val_str may need to end early, and less error path handling ++ */ + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++ /* name */ ++ memcpy(copy_ptr, val_name, val_name_len); ++ copy_ptr += val_name_len; + -+void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error, -+ u64 jit_free_pages_used -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(execute_error) -+ + sizeof(jit_free_pages_used) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ /* str start delimiter */ ++ memcpy(copy_ptr, NAMED_STR_START_DELIM, start_delim_len); ++ copy_ptr += start_delim_len; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ /* str value */ ++ memcpy(copy_ptr, val_str, val_str_len); ++ copy_ptr += val_str_len; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &execute_error, sizeof(execute_error)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &jit_free_pages_used, sizeof(jit_free_pages_used)); ++ /* str end delimiter */ ++ memcpy(copy_ptr, NAMED_STR_END_DELIM, end_delim_len); ++ copy_ptr += end_delim_len; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++ /* Terminator */ ++ *copy_ptr = '\0'; + -+void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ ret = kutf_helper_userdata_enqueue(context, str_buf); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ if (ret) { ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Failed to send str value named '%s': send returned %d", ++ val_name, ret); ++ goto out_err; ++ } + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ kfree(str_buf); ++ return ret; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++out_err: ++ kutf_test_fail(context, errmsg); ++ kfree(str_buf); ++ return ret; +} ++EXPORT_SYMBOL(kutf_helper_send_named_str); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) ++int kutf_helper_receive_named_val( ++ struct kutf_context *context, ++ struct kutf_helper_named_val *named_val) +{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ size_t recv_sz; ++ char *recv_str; ++ char *search_ptr; ++ char *name_str = NULL; ++ int name_len; ++ int strval_len; ++ enum kutf_helper_valtype type = KUTF_HELPER_VALTYPE_INVALID; ++ char *strval = NULL; ++ u64 u64val = 0; ++ int err = KUTF_HELPER_ERR_INVALID_VALUE; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ recv_str = kutf_helper_input_dequeue(context, &recv_sz); ++ if (!recv_str) ++ return -EBUSY; ++ else if (IS_ERR(recv_str)) ++ return PTR_ERR(recv_str); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ /* Find the '=', grab the name and validate it */ ++ search_ptr = strnchr(recv_str, recv_sz, NAMED_VALUE_SEP[0]); ++ if (search_ptr) { ++ name_len = search_ptr - recv_str; ++ if (!validate_val_name(recv_str, name_len)) { ++ /* no need to reallocate - just modify string in place */ ++ name_str = recv_str; ++ name_str[name_len] = '\0'; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++ /* Move until after the '=' */ ++ recv_str += (name_len + 1); ++ recv_sz -= (name_len + 1); ++ } ++ } ++ if (!name_str) { ++ pr_err("Invalid name part for received string '%s'\n", ++ recv_str); ++ return KUTF_HELPER_ERR_INVALID_NAME; ++ } + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ /* detect value type */ ++ if (*recv_str == NAMED_STR_START_DELIM[1]) { ++ /* string delimiter start*/ ++ ++recv_str; ++ --recv_sz; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ /* Find end of string */ ++ search_ptr = strnchr(recv_str, recv_sz, NAMED_STR_END_DELIM[0]); ++ if (search_ptr) { ++ strval_len = search_ptr - recv_str; ++ /* Validate the string to ensure it contains no quotes */ ++ if (strval_len == find_quoted_string_valid_len(recv_str)) { ++ /* no need to reallocate - just modify string in place */ ++ strval = recv_str; ++ strval[strval_len] = '\0'; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ /* Move until after the end delimiter */ ++ recv_str += (strval_len + 1); ++ recv_sz -= (strval_len + 1); ++ type = KUTF_HELPER_VALTYPE_STR; ++ } else { ++ pr_err("String value contains invalid characters in rest of received string '%s'\n", recv_str); ++ err = KUTF_HELPER_ERR_CHARS_AFTER_VAL; ++ } ++ } else { ++ pr_err("End of string delimiter not found in rest of received string '%s'\n", recv_str); ++ err = KUTF_HELPER_ERR_NO_END_DELIMITER; ++ } ++ } else { ++ /* possibly a number value - strtoull will parse it */ ++ err = kstrtoull(recv_str, 0, &u64val); ++ /* unlike userspace can't get an end ptr, but if kstrtoull() ++ * reads characters after the number it'll report -EINVAL ++ */ ++ if (!err) { ++ int len_remain = strnlen(recv_str, recv_sz); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++ type = KUTF_HELPER_VALTYPE_U64; ++ recv_str += len_remain; ++ recv_sz -= len_remain; ++ } else { ++ /* special case: not a number, report as such */ ++ pr_err("Rest of received string was not a numeric value or quoted string value: '%s'\n", recv_str); ++ } ++ } + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kcpu_queue) -+ + sizeof(execute_error) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ if (type == KUTF_HELPER_VALTYPE_INVALID) ++ return err; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ /* Any remaining characters - error */ ++ if (strnlen(recv_str, recv_sz) != 0) { ++ pr_err("Characters remain after value of type %s: '%s'\n", ++ get_val_type_name(type), recv_str); ++ return KUTF_HELPER_ERR_CHARS_AFTER_VAL; ++ } + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kcpu_queue, sizeof(kcpu_queue)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &execute_error, sizeof(execute_error)); ++ /* Success - write into the output structure */ ++ switch (type) { ++ case KUTF_HELPER_VALTYPE_U64: ++ named_val->u.val_u64 = u64val; ++ break; ++ case KUTF_HELPER_VALTYPE_STR: ++ named_val->u.val_str = strval; ++ break; ++ default: ++ pr_err("Unreachable, fix %s\n", __func__); ++ /* Coding error, report as though 'run' file failed */ ++ return -EINVAL; ++ } + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ named_val->val_name = name_str; ++ named_val->type = type; ++ ++ return KUTF_HELPER_ERR_NONE; +} ++EXPORT_SYMBOL(kutf_helper_receive_named_val); + -+void __kbase_tlstream_tl_kbase_csffw_fw_reloading( -+ struct kbase_tlstream *stream, -+ u64 csffw_cycle -+) ++#define DUMMY_MSG "" ++int kutf_helper_receive_check_val( ++ struct kutf_helper_named_val *named_val, ++ struct kutf_context *context, ++ const char *expect_val_name, ++ enum kutf_helper_valtype expect_val_type) +{ -+ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_RELOADING; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(csffw_cycle) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ int err; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &csffw_cycle, sizeof(csffw_cycle)); ++ err = kutf_helper_receive_named_val(context, named_val); ++ if (err < 0) { ++ const char *msg = kutf_dsprintf(&context->fixture_pool, ++ "Failed to receive value named '%s'", ++ expect_val_name); ++ kutf_test_fail(context, msg); ++ return err; ++ } else if (err > 0) { ++ const char *msg = kutf_dsprintf(&context->fixture_pool, ++ "Named-value parse error when expecting value named '%s'", ++ expect_val_name); ++ kutf_test_fail(context, msg); ++ goto out_fail_and_fixup; ++ } + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++ if (named_val->val_name != NULL && ++ strcmp(named_val->val_name, expect_val_name) != 0) { ++ const char *msg = kutf_dsprintf(&context->fixture_pool, ++ "Expecting to receive value named '%s' but got '%s'", ++ expect_val_name, named_val->val_name); ++ kutf_test_fail(context, msg); ++ goto out_fail_and_fixup; ++ } + -+void __kbase_tlstream_tl_kbase_csffw_fw_enabling( -+ struct kbase_tlstream *stream, -+ u64 csffw_cycle -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_ENABLING; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(csffw_cycle) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ if (named_val->type != expect_val_type) { ++ const char *msg = kutf_dsprintf(&context->fixture_pool, ++ "Expecting value named '%s' to be of type %s but got %s", ++ expect_val_name, get_val_type_name(expect_val_type), ++ get_val_type_name(named_val->type)); ++ kutf_test_fail(context, msg); ++ goto out_fail_and_fixup; ++ } + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &csffw_cycle, sizeof(csffw_cycle)); ++ return err; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++out_fail_and_fixup: ++ /* Produce a valid but incorrect value */ ++ switch (expect_val_type) { ++ case KUTF_HELPER_VALTYPE_U64: ++ named_val->u.val_u64 = 0ull; ++ break; ++ case KUTF_HELPER_VALTYPE_STR: ++ { ++ char *str = kutf_mempool_alloc(&context->fixture_pool, sizeof(DUMMY_MSG)); + -+void __kbase_tlstream_tl_kbase_csffw_fw_request_sleep( -+ struct kbase_tlstream *stream, -+ u64 csffw_cycle -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_REQUEST_SLEEP; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(csffw_cycle) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ if (!str) ++ return -1; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ strcpy(str, DUMMY_MSG); ++ named_val->u.val_str = str; ++ break; ++ } ++ default: ++ break; ++ } + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &csffw_cycle, sizeof(csffw_cycle)); ++ /* Indicate that this is invalid */ ++ named_val->type = KUTF_HELPER_VALTYPE_INVALID; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ /* But at least allow the caller to continue in the test with failures */ ++ return 0; +} ++EXPORT_SYMBOL(kutf_helper_receive_check_val); + -+void __kbase_tlstream_tl_kbase_csffw_fw_request_wakeup( -+ struct kbase_tlstream *stream, -+ u64 csffw_cycle -+) ++void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val) +{ -+ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(csffw_cycle) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ switch (named_val->type) { ++ case KUTF_HELPER_VALTYPE_U64: ++ pr_warn("%s=0x%llx\n", named_val->val_name, named_val->u.val_u64); ++ break; ++ case KUTF_HELPER_VALTYPE_STR: ++ pr_warn("%s=\"%s\"\n", named_val->val_name, named_val->u.val_str); ++ break; ++ case KUTF_HELPER_VALTYPE_INVALID: ++ pr_warn("%s is invalid\n", named_val->val_name); ++ break; ++ default: ++ pr_warn("%s has unknown type %d\n", named_val->val_name, named_val->type); ++ break; ++ } ++} ++EXPORT_SYMBOL(kutf_helper_output_named_val); +diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_mem.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_mem.c +new file mode 100644 +index 000000000..716970abb +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_mem.c +@@ -0,0 +1,107 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/* Kernel UTF memory management functions */ + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &csffw_cycle, sizeof(csffw_cycle)); ++#include ++#include ++#include + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++#include + -+void __kbase_tlstream_tl_kbase_csffw_fw_request_halt( -+ struct kbase_tlstream *stream, -+ u64 csffw_cycle -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_REQUEST_HALT; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(csffw_cycle) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++/** ++ * struct kutf_alloc_entry - Structure representing an allocation. ++ * @node: List node for use with kutf_mempool. ++ * @data: Data area of the allocation ++ */ ++struct kutf_alloc_entry { ++ struct list_head node; ++ u8 data[0]; ++}; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &csffw_cycle, sizeof(csffw_cycle)); ++int kutf_mempool_init(struct kutf_mempool *pool) ++{ ++ if (!pool) { ++ pr_err("NULL pointer passed to %s\n", __func__); ++ return -1; ++ } + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ INIT_LIST_HEAD(&pool->head); ++ mutex_init(&pool->lock); ++ ++ return 0; +} ++EXPORT_SYMBOL(kutf_mempool_init); + -+void __kbase_tlstream_tl_kbase_csffw_fw_disabling( -+ struct kbase_tlstream *stream, -+ u64 csffw_cycle -+) ++void kutf_mempool_destroy(struct kutf_mempool *pool) +{ -+ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_DISABLING; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(csffw_cycle) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ struct list_head *remove; ++ struct list_head *tmp; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ if (!pool) { ++ pr_err("NULL pointer passed to %s\n", __func__); ++ return; ++ } + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &csffw_cycle, sizeof(csffw_cycle)); ++ mutex_lock(&pool->lock); ++ list_for_each_safe(remove, tmp, &pool->head) { ++ struct kutf_alloc_entry *remove_alloc; ++ ++ remove_alloc = list_entry(remove, struct kutf_alloc_entry, node); ++ list_del(&remove_alloc->node); ++ kfree(remove_alloc); ++ } ++ mutex_unlock(&pool->lock); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); +} ++EXPORT_SYMBOL(kutf_mempool_destroy); + -+void __kbase_tlstream_tl_kbase_csffw_fw_off( -+ struct kbase_tlstream *stream, -+ u64 csffw_cycle -+) ++void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size) +{ -+ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_OFF; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(csffw_cycle) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ struct kutf_alloc_entry *ret; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ if (!pool) { ++ pr_err("NULL pointer passed to %s\n", __func__); ++ goto fail_pool; ++ } + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &csffw_cycle, sizeof(csffw_cycle)); ++ mutex_lock(&pool->lock); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++ ret = kmalloc(sizeof(*ret) + size, GFP_KERNEL); ++ if (!ret) { ++ pr_err("Failed to allocate memory\n"); ++ goto fail_alloc; ++ } + -+void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( -+ struct kbase_tlstream *stream, -+ u64 csffw_timestamp, -+ u64 csffw_cycle -+) -+{ -+ const u32 msg_id = KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(csffw_timestamp) -+ + sizeof(csffw_cycle) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ INIT_LIST_HEAD(&ret->node); ++ list_add(&ret->node, &pool->head); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ mutex_unlock(&pool->lock); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &csffw_timestamp, sizeof(csffw_timestamp)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &csffw_cycle, sizeof(csffw_cycle)); ++ return &ret->data[0]; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++fail_alloc: ++ mutex_unlock(&pool->lock); ++fail_pool: ++ return NULL; +} ++EXPORT_SYMBOL(kutf_mempool_alloc); +diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_resultset.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_resultset.c +new file mode 100644 +index 000000000..3a7ade283 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_resultset.c +@@ -0,0 +1,163 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+void __kbase_tlstream_aux_pm_state( -+ struct kbase_tlstream *stream, -+ u32 core_type, -+ u64 core_state_bitset -+) -+{ -+ const u32 msg_id = KBASE_AUX_PM_STATE; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(core_type) -+ + sizeof(core_state_bitset) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++/* Kernel UTF result management functions */ + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++#include ++#include ++#include ++#include ++#include ++#include + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &core_type, sizeof(core_type)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &core_state_bitset, sizeof(core_state_bitset)); ++#include ++#include + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++/* Lock to protect all result structures */ ++static DEFINE_SPINLOCK(kutf_result_lock); + -+void __kbase_tlstream_aux_pagefault( -+ struct kbase_tlstream *stream, -+ u32 ctx_nr, -+ u32 as_nr, -+ u64 page_cnt_change -+) ++struct kutf_result_set *kutf_create_result_set(void) +{ -+ const u32 msg_id = KBASE_AUX_PAGEFAULT; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(ctx_nr) -+ + sizeof(as_nr) -+ + sizeof(page_cnt_change) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ struct kutf_result_set *set; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ set = kmalloc(sizeof(*set), GFP_KERNEL); ++ if (!set) { ++ pr_err("Failed to allocate resultset"); ++ goto fail_alloc; ++ } + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &ctx_nr, sizeof(ctx_nr)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &as_nr, sizeof(as_nr)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &page_cnt_change, sizeof(page_cnt_change)); ++ INIT_LIST_HEAD(&set->results); ++ init_waitqueue_head(&set->waitq); ++ set->flags = 0; + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return set; ++ ++fail_alloc: ++ return NULL; +} + -+void __kbase_tlstream_aux_pagesalloc( -+ struct kbase_tlstream *stream, -+ u32 ctx_nr, -+ u64 page_cnt -+) ++int kutf_add_result(struct kutf_context *context, ++ enum kutf_result_status status, ++ const char *message) +{ -+ const u32 msg_id = KBASE_AUX_PAGESALLOC; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(ctx_nr) -+ + sizeof(page_cnt) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ struct kutf_mempool *mempool = &context->fixture_pool; ++ struct kutf_result_set *set = context->result_set; ++ /* Create the new result */ ++ struct kutf_result *new_result; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ BUG_ON(set == NULL); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &ctx_nr, sizeof(ctx_nr)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &page_cnt, sizeof(page_cnt)); ++ new_result = kutf_mempool_alloc(mempool, sizeof(*new_result)); ++ if (!new_result) { ++ pr_err("Result allocation failed\n"); ++ return -ENOMEM; ++ } + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++ INIT_LIST_HEAD(&new_result->node); ++ new_result->status = status; ++ new_result->message = message; + -+void __kbase_tlstream_aux_devfreq_target( -+ struct kbase_tlstream *stream, -+ u64 target_freq -+) -+{ -+ const u32 msg_id = KBASE_AUX_DEVFREQ_TARGET; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(target_freq) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ spin_lock(&kutf_result_lock); + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ list_add_tail(&new_result->node, &set->results); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &target_freq, sizeof(target_freq)); ++ spin_unlock(&kutf_result_lock); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ wake_up(&set->waitq); ++ ++ return 0; +} + -+void __kbase_tlstream_aux_jit_stats( -+ struct kbase_tlstream *stream, -+ u32 ctx_nr, -+ u32 bid, -+ u32 max_allocs, -+ u32 allocs, -+ u32 va_pages, -+ u32 ph_pages -+) ++void kutf_destroy_result_set(struct kutf_result_set *set) +{ -+ const u32 msg_id = KBASE_AUX_JIT_STATS; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(ctx_nr) -+ + sizeof(bid) -+ + sizeof(max_allocs) -+ + sizeof(allocs) -+ + sizeof(va_pages) -+ + sizeof(ph_pages) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &ctx_nr, sizeof(ctx_nr)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &bid, sizeof(bid)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &max_allocs, sizeof(max_allocs)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &allocs, sizeof(allocs)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &va_pages, sizeof(va_pages)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &ph_pages, sizeof(ph_pages)); ++ if (!list_empty(&set->results)) ++ pr_err("%s: Unread results from test\n", __func__); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ kfree(set); +} + -+void __kbase_tlstream_aux_tiler_heap_stats( -+ struct kbase_tlstream *stream, -+ u32 ctx_nr, -+ u64 heap_id, -+ u32 va_pages, -+ u32 ph_pages, -+ u32 max_chunks, -+ u32 chunk_size, -+ u32 chunk_count, -+ u32 target_in_flight, -+ u32 nr_in_flight -+) ++static bool kutf_has_result(struct kutf_result_set *set) +{ -+ const u32 msg_id = KBASE_AUX_TILER_HEAP_STATS; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(ctx_nr) -+ + sizeof(heap_id) -+ + sizeof(va_pages) -+ + sizeof(ph_pages) -+ + sizeof(max_chunks) -+ + sizeof(chunk_size) -+ + sizeof(chunk_count) -+ + sizeof(target_in_flight) -+ + sizeof(nr_in_flight) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ bool has_result; + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &ctx_nr, sizeof(ctx_nr)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &heap_id, sizeof(heap_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &va_pages, sizeof(va_pages)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &ph_pages, sizeof(ph_pages)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &max_chunks, sizeof(max_chunks)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &chunk_size, sizeof(chunk_size)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &chunk_count, sizeof(chunk_count)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &target_in_flight, sizeof(target_in_flight)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &nr_in_flight, sizeof(nr_in_flight)); ++ spin_lock(&kutf_result_lock); ++ if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT) ++ /* Pretend there are results if waiting for input */ ++ has_result = true; ++ else ++ has_result = !list_empty(&set->results); ++ spin_unlock(&kutf_result_lock); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return has_result; +} + -+void __kbase_tlstream_aux_event_job_slot( -+ struct kbase_tlstream *stream, -+ const void *ctx, -+ u32 slot_nr, -+ u32 atom_nr, -+ u32 event -+) ++struct kutf_result *kutf_remove_result(struct kutf_result_set *set) +{ -+ const u32 msg_id = KBASE_AUX_EVENT_JOB_SLOT; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(ctx) -+ + sizeof(slot_nr) -+ + sizeof(atom_nr) -+ + sizeof(event) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; ++ struct kutf_result *result = NULL; ++ int ret; + -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ do { ++ ret = wait_event_interruptible(set->waitq, ++ kutf_has_result(set)); + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &ctx, sizeof(ctx)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &slot_nr, sizeof(slot_nr)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &atom_nr, sizeof(atom_nr)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &event, sizeof(event)); ++ if (ret) ++ return ERR_PTR(ret); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} ++ spin_lock(&kutf_result_lock); + -+void __kbase_tlstream_aux_protected_enter_start( -+ struct kbase_tlstream *stream, -+ const void *gpu -+) -+{ -+ const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_START; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(gpu) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &gpu, sizeof(gpu)); -+ -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} -+ -+void __kbase_tlstream_aux_protected_enter_end( -+ struct kbase_tlstream *stream, -+ const void *gpu -+) -+{ -+ const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(gpu) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &gpu, sizeof(gpu)); -+ -+ kbase_tlstream_msgbuf_release(stream, acq_flags); -+} -+ -+void __kbase_tlstream_aux_mmu_command( -+ struct kbase_tlstream *stream, -+ u32 kernel_ctx_id, -+ u32 mmu_cmd_id, -+ u32 mmu_synchronicity, -+ u64 mmu_lock_addr, -+ u32 mmu_lock_page_num -+) -+{ -+ const u32 msg_id = KBASE_AUX_MMU_COMMAND; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(kernel_ctx_id) -+ + sizeof(mmu_cmd_id) -+ + sizeof(mmu_synchronicity) -+ + sizeof(mmu_lock_addr) -+ + sizeof(mmu_lock_page_num) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ if (!list_empty(&set->results)) { ++ result = list_first_entry(&set->results, ++ struct kutf_result, ++ node); ++ list_del(&result->node); ++ } else if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT) { ++ /* Return a fake result */ ++ static struct kutf_result waiting = { ++ .status = KUTF_RESULT_USERDATA_WAIT ++ }; ++ result = &waiting; ++ } ++ /* If result == NULL then there was a race with the event ++ * being removed between the check in kutf_has_result and ++ * the lock being obtained. In this case we retry ++ */ + -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &mmu_cmd_id, sizeof(mmu_cmd_id)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &mmu_synchronicity, sizeof(mmu_synchronicity)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &mmu_lock_addr, sizeof(mmu_lock_addr)); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &mmu_lock_page_num, sizeof(mmu_lock_page_num)); ++ spin_unlock(&kutf_result_lock); ++ } while (result == NULL); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ return result; +} + -+void __kbase_tlstream_aux_protected_leave_start( -+ struct kbase_tlstream *stream, -+ const void *gpu -+) ++void kutf_set_waiting_for_input(struct kutf_result_set *set) +{ -+ const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_START; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(gpu) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &gpu, sizeof(gpu)); ++ spin_lock(&kutf_result_lock); ++ set->flags |= KUTF_RESULT_SET_WAITING_FOR_INPUT; ++ spin_unlock(&kutf_result_lock); + -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ wake_up(&set->waitq); +} + -+void __kbase_tlstream_aux_protected_leave_end( -+ struct kbase_tlstream *stream, -+ const void *gpu -+) ++void kutf_clear_waiting_for_input(struct kutf_result_set *set) +{ -+ const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_END; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) -+ + sizeof(gpu) -+ ; -+ char *buffer; -+ unsigned long acq_flags; -+ size_t pos = 0; -+ -+ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); -+ -+ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_serialize_timestamp(buffer, pos); -+ pos = kbasep_serialize_bytes(buffer, -+ pos, &gpu, sizeof(gpu)); -+ -+ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ spin_lock(&kutf_result_lock); ++ set->flags &= ~KUTF_RESULT_SET_WAITING_FOR_INPUT; ++ spin_unlock(&kutf_result_lock); +} -+ -+/* clang-format on */ -diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h +diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_suite.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_suite.c new file mode 100644 -index 000000000..06e4ca4a6 +index 000000000..4468066f1 --- /dev/null -+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h -@@ -0,0 +1,4382 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_suite.c +@@ -0,0 +1,1216 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * -+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2017-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -273899,11850 +273089,15554 @@ index 000000000..06e4ca4a6 + * + */ + -+/* -+ * THIS FILE IS AUTOGENERATED BY generate_tracepoints.py. -+ * DO NOT EDIT. ++/* Kernel UTF suite, test and fixture management including user to kernel ++ * interaction + */ + -+#if !defined(_KBASE_TRACEPOINTS_H) -+#define _KBASE_TRACEPOINTS_H ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+/* Tracepoints are abstract callbacks notifying that some important -+ * software or hardware event has happened. ++#include ++ ++#include ++#include ++#include ++#include ++ ++/** ++ * struct kutf_application - Structure which represents kutf application ++ * @name: The name of this test application. ++ * @dir: The debugfs directory for this test ++ * @suite_list: List head to store all the suites which are part of this ++ * application ++ */ ++struct kutf_application { ++ const char *name; ++ struct dentry *dir; ++ struct list_head suite_list; ++}; ++ ++/** ++ * struct kutf_test_function - Structure which represents kutf test function ++ * @suite: Back reference to the suite this test function ++ * belongs to ++ * @filters: Filters that apply to this test function ++ * @test_id: Test ID ++ * @execute: Function to run for this test ++ * @test_data: Static data for this test ++ * @node: List node for test_list ++ * @variant_list: List head to store all the variants which can run on ++ * this function ++ * @dir: debugfs directory for this test function ++ */ ++struct kutf_test_function { ++ struct kutf_suite *suite; ++ unsigned int filters; ++ unsigned int test_id; ++ void (*execute)(struct kutf_context *context); ++ union kutf_callback_data test_data; ++ struct list_head node; ++ struct list_head variant_list; ++ struct dentry *dir; ++}; ++ ++/** ++ * struct kutf_test_fixture - Structure which holds information on the kutf ++ * test fixture ++ * @test_func: Test function this fixture belongs to ++ * @fixture_index: Index of this fixture ++ * @node: List node for variant_list ++ * @dir: debugfs directory for this test fixture ++ */ ++struct kutf_test_fixture { ++ struct kutf_test_function *test_func; ++ unsigned int fixture_index; ++ struct list_head node; ++ struct dentry *dir; ++}; ++ ++static struct dentry *base_dir; ++static struct workqueue_struct *kutf_workq; ++ ++/** ++ * struct kutf_convert_table - Structure which keeps test results ++ * @result_name: Status of the test result ++ * @result: Status value for a single test ++ */ ++struct kutf_convert_table { ++ char result_name[50]; ++ enum kutf_result_status result; ++}; ++ ++static const struct kutf_convert_table kutf_convert[] = { ++#define ADD_UTF_RESULT(_name) \ ++ { \ ++#_name, _name, \ ++ } ++ ADD_UTF_RESULT(KUTF_RESULT_BENCHMARK), ADD_UTF_RESULT(KUTF_RESULT_SKIP), ++ ADD_UTF_RESULT(KUTF_RESULT_UNKNOWN), ADD_UTF_RESULT(KUTF_RESULT_PASS), ++ ADD_UTF_RESULT(KUTF_RESULT_DEBUG), ADD_UTF_RESULT(KUTF_RESULT_INFO), ++ ADD_UTF_RESULT(KUTF_RESULT_WARN), ADD_UTF_RESULT(KUTF_RESULT_FAIL), ++ ADD_UTF_RESULT(KUTF_RESULT_FATAL), ADD_UTF_RESULT(KUTF_RESULT_ABORT), ++}; ++ ++#define UTF_CONVERT_SIZE (ARRAY_SIZE(kutf_convert)) ++ ++/** ++ * kutf_create_context() - Create a test context in which a specific fixture ++ * of an application will be run and its results ++ * reported back to the user ++ * @test_fix: Test fixture to be run. + * -+ * In this particular implementation, it results into a MIPE -+ * timeline event and, in some cases, it also fires an ftrace event -+ * (a.k.a. Gator events, see details below). ++ * The context's refcount will be initialized to 1. ++ * ++ * Return: Returns the created test context on success or NULL on failure + */ ++static struct kutf_context *kutf_create_context( ++ struct kutf_test_fixture *test_fix); + -+#include "mali_kbase.h" -+#include "mali_kbase_gator.h" ++/** ++ * kutf_destroy_context() - Destroy a previously created test context, only ++ * once its refcount has become zero ++ * @kref: pointer to kref member within the context ++ * ++ * This should only be used via a kref_put() call on the context's kref member ++ */ ++static void kutf_destroy_context(struct kref *kref); + -+#include -+#include ++/** ++ * kutf_context_get() - increment refcount on a context ++ * @context: the kutf context ++ * ++ * This must be used when the lifetime of the context might exceed that of the ++ * thread creating @context ++ */ ++static void kutf_context_get(struct kutf_context *context); + -+/* clang-format off */ ++/** ++ * kutf_context_put() - decrement refcount on a context, destroying it when it ++ * reached zero ++ * @context: the kutf context ++ * ++ * This must be used only after a corresponding kutf_context_get() call on ++ * @context, and the caller no longer needs access to @context. ++ */ ++static void kutf_context_put(struct kutf_context *context); + -+struct kbase_tlstream; ++/** ++ * kutf_set_result() - Set the test result against the specified test context ++ * @context: Test context ++ * @status: Result status ++ */ ++static void kutf_set_result(struct kutf_context *context, ++ enum kutf_result_status status); + -+extern const size_t __obj_stream_offset; -+extern const size_t __aux_stream_offset; ++/** ++ * kutf_set_expected_result() - Set the expected test result for the specified ++ * test context ++ * @context: Test context ++ * @expected_status: Expected result status ++ */ ++static void kutf_set_expected_result(struct kutf_context *context, ++ enum kutf_result_status expected_status); + -+/* This macro dispatches a kbase_tlstream from -+ * a kbase_device instance. Only AUX or OBJ -+ * streams can be dispatched. It is aware of -+ * kbase_timeline binary representation and -+ * relies on offset variables: -+ * __obj_stream_offset and __aux_stream_offset. ++/** ++ * kutf_result_to_string() - Converts a KUTF result into a string ++ * @result_str: Output result string ++ * @result: Result status to convert ++ * ++ * Return: 1 if test result was successfully converted to string, 0 otherwise + */ -+#define __TL_DISPATCH_STREAM(kbdev, stype) \ -+ ((struct kbase_tlstream *) \ -+ ((u8 *)kbdev->timeline + __ ## stype ## _stream_offset)) ++static int kutf_result_to_string(const char **result_str, enum kutf_result_status result) ++{ ++ int i; ++ int ret = 0; + -+struct tp_desc; ++ for (i = 0; i < UTF_CONVERT_SIZE; i++) { ++ if (result == kutf_convert[i].result) { ++ *result_str = kutf_convert[i].result_name; ++ ret = 1; ++ } ++ } ++ return ret; ++} + -+/* Descriptors of timeline messages transmitted in object events stream. */ -+extern const char *obj_desc_header; -+extern const size_t obj_desc_header_size; -+/* Descriptors of timeline messages transmitted in auxiliary events stream. */ -+extern const char *aux_desc_header; -+extern const size_t aux_desc_header_size; ++/** ++ * kutf_debugfs_const_string_read() - Simple debugfs read callback which ++ * returns a constant string ++ * @file: Opened file to read from ++ * @buf: User buffer to write the data into ++ * @len: Amount of data to read ++ * @ppos: Offset into file to read from ++ * ++ * Return: On success, the number of bytes read and offset @ppos advanced by ++ * this number; on error, negative value ++ */ ++static ssize_t kutf_debugfs_const_string_read(struct file *file, ++ char __user *buf, size_t len, loff_t *ppos) ++{ ++ char *str = file->private_data; + -+#define TL_ATOM_STATE_IDLE 0 -+#define TL_ATOM_STATE_READY 1 -+#define TL_ATOM_STATE_DONE 2 -+#define TL_ATOM_STATE_POSTED 3 ++ return simple_read_from_buffer(buf, len, ppos, str, strlen(str)); ++} + -+#define TL_JS_EVENT_START GATOR_JOB_SLOT_START -+#define TL_JS_EVENT_STOP GATOR_JOB_SLOT_STOP -+#define TL_JS_EVENT_SOFT_STOP GATOR_JOB_SLOT_SOFT_STOPPED ++static const struct file_operations kutf_debugfs_const_string_ops = { ++ .owner = THIS_MODULE, ++ .open = simple_open, ++ .read = kutf_debugfs_const_string_read, ++ .llseek = default_llseek, ++}; + -+#define TLSTREAM_ENABLED (1u << 31) ++/** ++ * kutf_add_explicit_result() - Check if an explicit result needs to be added ++ * @context: KUTF test context ++ */ ++static void kutf_add_explicit_result(struct kutf_context *context) ++{ ++ switch (context->expected_status) { ++ case KUTF_RESULT_UNKNOWN: ++ break; + -+void __kbase_tlstream_tl_new_ctx( -+ struct kbase_tlstream *stream, -+ const void *ctx, -+ u32 ctx_nr, -+ u32 tgid -+); ++ case KUTF_RESULT_WARN: ++ if (context->status == KUTF_RESULT_WARN) ++ kutf_test_pass(context, ++ "Pass (expected warn occurred)"); ++ else if (context->status != KUTF_RESULT_SKIP) ++ kutf_test_fail(context, ++ "Fail (expected warn missing)"); ++ break; + -+void __kbase_tlstream_tl_new_gpu( -+ struct kbase_tlstream *stream, -+ const void *gpu, -+ u32 gpu_id, -+ u32 core_count -+); ++ case KUTF_RESULT_FAIL: ++ if (context->status == KUTF_RESULT_FAIL) ++ kutf_test_pass(context, ++ "Pass (expected fail occurred)"); ++ else if (context->status != KUTF_RESULT_SKIP) { ++ /* Force the expected status so the fail gets logged */ ++ context->expected_status = KUTF_RESULT_PASS; ++ kutf_test_fail(context, ++ "Fail (expected fail missing)"); ++ } ++ break; + -+void __kbase_tlstream_tl_new_lpu( -+ struct kbase_tlstream *stream, -+ const void *lpu, -+ u32 lpu_nr, -+ u32 lpu_fn -+); ++ case KUTF_RESULT_FATAL: ++ if (context->status == KUTF_RESULT_FATAL) ++ kutf_test_pass(context, ++ "Pass (expected fatal occurred)"); ++ else if (context->status != KUTF_RESULT_SKIP) ++ kutf_test_fail(context, ++ "Fail (expected fatal missing)"); ++ break; + -+void __kbase_tlstream_tl_new_atom( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ u32 atom_nr -+); ++ case KUTF_RESULT_ABORT: ++ if (context->status == KUTF_RESULT_ABORT) ++ kutf_test_pass(context, ++ "Pass (expected abort occurred)"); ++ else if (context->status != KUTF_RESULT_SKIP) ++ kutf_test_fail(context, ++ "Fail (expected abort missing)"); ++ break; ++ default: ++ break; ++ } ++} + -+void __kbase_tlstream_tl_new_as( -+ struct kbase_tlstream *stream, -+ const void *address_space, -+ u32 as_nr -+); ++static void kutf_run_test(struct work_struct *data) ++{ ++ struct kutf_context *test_context = container_of(data, ++ struct kutf_context, work); ++ struct kutf_suite *suite = test_context->suite; ++ struct kutf_test_function *test_func; + -+void __kbase_tlstream_tl_del_ctx( -+ struct kbase_tlstream *stream, -+ const void *ctx -+); ++ test_func = test_context->test_fix->test_func; + -+void __kbase_tlstream_tl_del_atom( -+ struct kbase_tlstream *stream, -+ const void *atom -+); ++ /* ++ * Call the create fixture function if required before the ++ * fixture is run ++ */ ++ if (suite->create_fixture) ++ test_context->fixture = suite->create_fixture(test_context); + -+void __kbase_tlstream_tl_lifelink_lpu_gpu( -+ struct kbase_tlstream *stream, -+ const void *lpu, -+ const void *gpu -+); ++ /* Only run the test if the fixture was created (if required) */ ++ if ((suite->create_fixture && test_context->fixture) || ++ (!suite->create_fixture)) { ++ /* Run this fixture */ ++ test_func->execute(test_context); + -+void __kbase_tlstream_tl_lifelink_as_gpu( -+ struct kbase_tlstream *stream, -+ const void *address_space, -+ const void *gpu -+); ++ if (suite->remove_fixture) ++ suite->remove_fixture(test_context); + -+void __kbase_tlstream_tl_ret_ctx_lpu( -+ struct kbase_tlstream *stream, -+ const void *ctx, -+ const void *lpu -+); ++ kutf_add_explicit_result(test_context); ++ } + -+void __kbase_tlstream_tl_ret_atom_ctx( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ const void *ctx -+); ++ kutf_add_result(test_context, KUTF_RESULT_TEST_FINISHED, NULL); + -+void __kbase_tlstream_tl_ret_atom_lpu( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ const void *lpu, -+ const char *attrib_match_list -+); ++ kutf_context_put(test_context); ++} + -+void __kbase_tlstream_tl_nret_ctx_lpu( -+ struct kbase_tlstream *stream, -+ const void *ctx, -+ const void *lpu -+); ++/** ++ * kutf_debugfs_run_open() - Debugfs open callback for the "run" entry. ++ * ++ * @inode: inode of the opened file ++ * @file: Opened file to read from ++ * ++ * This function creates a KUTF context and queues it onto a workqueue to be ++ * run asynchronously. The resulting file descriptor can be used to communicate ++ * userdata to the test and to read back the results of the test execution. ++ * ++ * Return: 0 on success ++ */ ++static int kutf_debugfs_run_open(struct inode *inode, struct file *file) ++{ ++ struct kutf_test_fixture *test_fix = inode->i_private; ++ struct kutf_context *test_context; ++ int err = 0; + -+void __kbase_tlstream_tl_nret_atom_ctx( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ const void *ctx -+); ++ test_context = kutf_create_context(test_fix); ++ if (!test_context) { ++ err = -ENOMEM; ++ goto finish; ++ } + -+void __kbase_tlstream_tl_nret_atom_lpu( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ const void *lpu -+); ++ file->private_data = test_context; + -+void __kbase_tlstream_tl_ret_as_ctx( -+ struct kbase_tlstream *stream, -+ const void *address_space, -+ const void *ctx -+); ++ /* This reference is release by the kutf_run_test */ ++ kutf_context_get(test_context); + -+void __kbase_tlstream_tl_nret_as_ctx( -+ struct kbase_tlstream *stream, -+ const void *address_space, -+ const void *ctx -+); ++ queue_work(kutf_workq, &test_context->work); + -+void __kbase_tlstream_tl_ret_atom_as( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ const void *address_space -+); ++finish: ++ return err; ++} + -+void __kbase_tlstream_tl_nret_atom_as( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ const void *address_space -+); ++#define USERDATA_WARNING_MESSAGE "WARNING: This test requires userdata\n" + -+void __kbase_tlstream_tl_attrib_atom_config( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ u64 descriptor, -+ u64 affinity, -+ u32 config -+); ++/** ++ * kutf_debugfs_run_read() - Debugfs read callback for the "run" entry. ++ * @file: Opened file to read from ++ * @buf: User buffer to write the data into ++ * @len: Amount of data to read ++ * @ppos: Offset into file to read from ++ * ++ * This function emits the results of the test, blocking until they are ++ * available. ++ * ++ * If the test involves user data then this will also return user data records ++ * to user space. If the test is waiting for user data then this function will ++ * output a message (to make the likes of 'cat' display it), followed by ++ * returning 0 to mark the end of file. ++ * ++ * Results will be emitted one at a time, once all the results have been read ++ * 0 will be returned to indicate there is no more data. ++ * ++ * Return: Number of bytes read. ++ */ ++static ssize_t kutf_debugfs_run_read(struct file *file, char __user *buf, ++ size_t len, loff_t *ppos) ++{ ++ struct kutf_context *test_context = file->private_data; ++ struct kutf_result *res; ++ unsigned long bytes_not_copied; ++ ssize_t bytes_copied = 0; ++ const char *kutf_str_ptr = NULL; ++ size_t kutf_str_len = 0; ++ size_t message_len = 0; ++ char separator = ':'; ++ char terminator = '\n'; + -+void __kbase_tlstream_tl_jit_usedpages( -+ struct kbase_tlstream *stream, -+ u64 used_pages, -+ u32 j_id -+); ++ res = kutf_remove_result(test_context->result_set); + -+void __kbase_tlstream_tl_attrib_atom_jitallocinfo( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ u64 va_pgs, -+ u64 com_pgs, -+ u64 extent, -+ u32 j_id, -+ u32 bin_id, -+ u32 max_allocs, -+ u32 jit_flags, -+ u32 usg_id -+); ++ if (IS_ERR(res)) ++ return PTR_ERR(res); + -+void __kbase_tlstream_tl_attrib_atom_jitfreeinfo( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ u32 j_id -+); ++ /* ++ * Handle 'fake' results - these results are converted to another ++ * form before being returned from the kernel ++ */ ++ switch (res->status) { ++ case KUTF_RESULT_TEST_FINISHED: ++ return 0; ++ case KUTF_RESULT_USERDATA_WAIT: ++ if (test_context->userdata.flags & ++ KUTF_USERDATA_WARNING_OUTPUT) { ++ /* ++ * Warning message already output, ++ * signal end-of-file ++ */ ++ return 0; ++ } + -+void __kbase_tlstream_tl_attrib_as_config( -+ struct kbase_tlstream *stream, -+ const void *address_space, -+ u64 transtab, -+ u64 memattr, -+ u64 transcfg -+); ++ message_len = sizeof(USERDATA_WARNING_MESSAGE)-1; ++ if (message_len > len) ++ message_len = len; + -+void __kbase_tlstream_tl_event_lpu_softstop( -+ struct kbase_tlstream *stream, -+ const void *lpu -+); ++ bytes_not_copied = copy_to_user(buf, ++ USERDATA_WARNING_MESSAGE, ++ message_len); ++ if (bytes_not_copied != 0) ++ return -EFAULT; ++ test_context->userdata.flags |= KUTF_USERDATA_WARNING_OUTPUT; ++ return message_len; ++ case KUTF_RESULT_USERDATA: ++ message_len = strlen(res->message); ++ if (message_len > len-1) { ++ message_len = len-1; ++ pr_warn("User data truncated, read not long enough\n"); ++ } ++ bytes_not_copied = copy_to_user(buf, res->message, ++ message_len); ++ if (bytes_not_copied != 0) { ++ pr_warn("Failed to copy data to user space buffer\n"); ++ return -EFAULT; ++ } ++ /* Finally the terminator */ ++ bytes_not_copied = copy_to_user(&buf[message_len], ++ &terminator, 1); ++ if (bytes_not_copied != 0) { ++ pr_warn("Failed to copy data to user space buffer\n"); ++ return -EFAULT; ++ } ++ return message_len+1; ++ default: ++ /* Fall through - this is a test result */ ++ break; ++ } + -+void __kbase_tlstream_tl_event_atom_softstop_ex( -+ struct kbase_tlstream *stream, -+ const void *atom -+); ++ /* Note: This code assumes a result is read completely */ ++ kutf_result_to_string(&kutf_str_ptr, res->status); ++ if (kutf_str_ptr) ++ kutf_str_len = strlen(kutf_str_ptr); + -+void __kbase_tlstream_tl_event_atom_softstop_issue( -+ struct kbase_tlstream *stream, -+ const void *atom -+); ++ if (res->message) ++ message_len = strlen(res->message); + -+void __kbase_tlstream_tl_event_atom_softjob_start( -+ struct kbase_tlstream *stream, -+ const void *atom -+); ++ if ((kutf_str_len + 1 + message_len + 1) > len) { ++ pr_err("Not enough space in user buffer for a single result"); ++ return 0; ++ } + -+void __kbase_tlstream_tl_event_atom_softjob_end( -+ struct kbase_tlstream *stream, -+ const void *atom -+); ++ /* First copy the result string */ ++ if (kutf_str_ptr) { ++ bytes_not_copied = copy_to_user(&buf[0], kutf_str_ptr, ++ kutf_str_len); ++ bytes_copied += kutf_str_len - bytes_not_copied; ++ if (bytes_not_copied) ++ goto exit; ++ } + -+void __kbase_tlstream_tl_arbiter_granted( -+ struct kbase_tlstream *stream, -+ const void *gpu -+); ++ /* Then the separator */ ++ bytes_not_copied = copy_to_user(&buf[bytes_copied], ++ &separator, 1); ++ bytes_copied += 1 - bytes_not_copied; ++ if (bytes_not_copied) ++ goto exit; + -+void __kbase_tlstream_tl_arbiter_started( -+ struct kbase_tlstream *stream, -+ const void *gpu -+); ++ /* Finally Next copy the result string */ ++ if (res->message) { ++ bytes_not_copied = copy_to_user(&buf[bytes_copied], ++ res->message, message_len); ++ bytes_copied += message_len - bytes_not_copied; ++ if (bytes_not_copied) ++ goto exit; ++ } + -+void __kbase_tlstream_tl_arbiter_stop_requested( -+ struct kbase_tlstream *stream, -+ const void *gpu -+); ++ /* Finally the terminator */ ++ bytes_not_copied = copy_to_user(&buf[bytes_copied], ++ &terminator, 1); ++ bytes_copied += 1 - bytes_not_copied; + -+void __kbase_tlstream_tl_arbiter_stopped( -+ struct kbase_tlstream *stream, -+ const void *gpu -+); ++exit: ++ return bytes_copied; ++} + -+void __kbase_tlstream_tl_arbiter_requested( -+ struct kbase_tlstream *stream, -+ const void *gpu -+); ++/** ++ * kutf_debugfs_run_write() - Debugfs write callback for the "run" entry. ++ * @file: Opened file to write to ++ * @buf: User buffer to read the data from ++ * @len: Amount of data to write ++ * @ppos: Offset into file to write to ++ * ++ * This function allows user and kernel to exchange extra data necessary for ++ * the test fixture. ++ * ++ * The data is added to the first struct kutf_context running the fixture ++ * ++ * Return: Number of bytes written ++ */ ++static ssize_t kutf_debugfs_run_write(struct file *file, ++ const char __user *buf, size_t len, loff_t *ppos) ++{ ++ int ret = 0; ++ struct kutf_context *test_context = file->private_data; + -+void __kbase_tlstream_jd_gpu_soft_reset( -+ struct kbase_tlstream *stream, -+ const void *gpu -+); ++ if (len > KUTF_MAX_LINE_LENGTH) ++ return -EINVAL; + -+void __kbase_tlstream_jd_tiler_heap_chunk_alloc( -+ struct kbase_tlstream *stream, -+ u32 ctx_nr, -+ u64 heap_id, -+ u64 chunk_va -+); ++ ret = kutf_helper_input_enqueue(test_context, buf, len); ++ if (ret < 0) ++ return ret; + -+void __kbase_tlstream_tl_js_sched_start( -+ struct kbase_tlstream *stream, -+ u32 dummy -+); ++ return len; ++} + -+void __kbase_tlstream_tl_js_sched_end( -+ struct kbase_tlstream *stream, -+ u32 dummy -+); ++/** ++ * kutf_debugfs_run_release() - Debugfs release callback for the "run" entry. ++ * @inode: File entry representation ++ * @file: A specific opening of the file ++ * ++ * Release any resources that were created during the opening of the file ++ * ++ * Note that resources may not be released immediately, that might only happen ++ * later when other users of the kutf_context release their refcount. ++ * ++ * Return: 0 on success ++ */ ++static int kutf_debugfs_run_release(struct inode *inode, struct file *file) ++{ ++ struct kutf_context *test_context = file->private_data; + -+void __kbase_tlstream_tl_jd_submit_atom_start( -+ struct kbase_tlstream *stream, -+ const void *atom -+); ++ kutf_helper_input_enqueue_end_of_data(test_context); + -+void __kbase_tlstream_tl_jd_submit_atom_end( -+ struct kbase_tlstream *stream, -+ const void *atom -+); ++ kutf_context_put(test_context); ++ return 0; ++} + -+void __kbase_tlstream_tl_jd_done_no_lock_start( -+ struct kbase_tlstream *stream, -+ const void *atom -+); ++static const struct file_operations kutf_debugfs_run_ops = { ++ .owner = THIS_MODULE, ++ .open = kutf_debugfs_run_open, ++ .read = kutf_debugfs_run_read, ++ .write = kutf_debugfs_run_write, ++ .release = kutf_debugfs_run_release, ++ .llseek = default_llseek, ++}; + -+void __kbase_tlstream_tl_jd_done_no_lock_end( -+ struct kbase_tlstream *stream, -+ const void *atom -+); ++/** ++ * create_fixture_variant() - Creates a fixture variant for the specified ++ * test function and index and the debugfs entries ++ * that represent it. ++ * @test_func: Test function ++ * @fixture_index: Fixture index ++ * ++ * Return: 0 on success, negative value corresponding to error code in failure ++ */ ++static int create_fixture_variant(struct kutf_test_function *test_func, ++ unsigned int fixture_index) ++{ ++ struct kutf_test_fixture *test_fix; ++ char name[11]; /* Enough to print the MAX_UINT32 + the null terminator */ ++ struct dentry *tmp; ++ int err; + -+void __kbase_tlstream_tl_jd_done_start( -+ struct kbase_tlstream *stream, -+ const void *atom -+); ++ test_fix = kmalloc(sizeof(*test_fix), GFP_KERNEL); ++ if (!test_fix) { ++ pr_err("Failed to create debugfs directory when adding fixture\n"); ++ err = -ENOMEM; ++ goto fail_alloc; ++ } + -+void __kbase_tlstream_tl_jd_done_end( -+ struct kbase_tlstream *stream, -+ const void *atom -+); ++ test_fix->test_func = test_func; ++ test_fix->fixture_index = fixture_index; + -+void __kbase_tlstream_tl_jd_atom_complete( -+ struct kbase_tlstream *stream, -+ const void *atom -+); ++ snprintf(name, sizeof(name), "%d", fixture_index); ++ test_fix->dir = debugfs_create_dir(name, test_func->dir); ++ if (IS_ERR_OR_NULL(test_func->dir)) { ++ pr_err("Failed to create debugfs directory when adding fixture\n"); ++ /* Might not be the right error, we don't get it passed back to us */ ++ err = -EEXIST; ++ goto fail_dir; ++ } + -+void __kbase_tlstream_tl_run_atom_start( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ u32 atom_nr -+); ++ tmp = debugfs_create_file("type", 0004, test_fix->dir, "fixture\n", ++ &kutf_debugfs_const_string_ops); ++ if (IS_ERR_OR_NULL(tmp)) { ++ pr_err("Failed to create debugfs file \"type\" when adding fixture\n"); ++ /* Might not be the right error, we don't get it passed back to us */ ++ err = -EEXIST; ++ goto fail_file; ++ } + -+void __kbase_tlstream_tl_run_atom_end( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ u32 atom_nr -+); ++ tmp = debugfs_create_file_unsafe( ++ "run", 0600, test_fix->dir, ++ test_fix, ++ &kutf_debugfs_run_ops); ++ if (IS_ERR_OR_NULL(tmp)) { ++ pr_err("Failed to create debugfs file \"run\" when adding fixture\n"); ++ /* Might not be the right error, we don't get it passed back to us */ ++ err = -EEXIST; ++ goto fail_file; ++ } + -+void __kbase_tlstream_tl_attrib_atom_priority( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ u32 prio -+); ++ list_add(&test_fix->node, &test_func->variant_list); ++ return 0; + -+void __kbase_tlstream_tl_attrib_atom_state( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ u32 state -+); ++fail_file: ++ debugfs_remove_recursive(test_fix->dir); ++fail_dir: ++ kfree(test_fix); ++fail_alloc: ++ return err; ++} + -+void __kbase_tlstream_tl_attrib_atom_prioritized( -+ struct kbase_tlstream *stream, -+ const void *atom -+); ++/** ++ * kutf_remove_test_variant() - Destroy a previously created fixture variant. ++ * @test_fix: Test fixture ++ */ ++static void kutf_remove_test_variant(struct kutf_test_fixture *test_fix) ++{ ++ debugfs_remove_recursive(test_fix->dir); ++ kfree(test_fix); ++} + -+void __kbase_tlstream_tl_attrib_atom_jit( -+ struct kbase_tlstream *stream, -+ const void *atom, -+ u64 edit_addr, -+ u64 new_addr, -+ u32 jit_flags, -+ u64 mem_flags, -+ u32 j_id, -+ u64 com_pgs, -+ u64 extent, -+ u64 va_pgs -+); ++#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE ++/* Adapting to the upstream debugfs_create_x32() change */ ++static int ktufp_u32_get(void *data, u64 *val) ++{ ++ *val = *(u32 *)data; ++ return 0; ++} + -+void __kbase_tlstream_tl_kbase_new_device( -+ struct kbase_tlstream *stream, -+ u32 kbase_device_id, -+ u32 kbase_device_gpu_core_count, -+ u32 kbase_device_max_num_csgs, -+ u32 kbase_device_as_count, -+ u32 kbase_device_sb_entry_count, -+ u32 kbase_device_has_cross_stream_sync, -+ u32 kbase_device_supports_gpu_sleep -+); ++DEFINE_DEBUGFS_ATTRIBUTE(kutfp_fops_x32_ro, ktufp_u32_get, NULL, "0x%08llx\n"); ++#endif + -+void __kbase_tlstream_tl_kbase_gpucmdqueue_kick( -+ struct kbase_tlstream *stream, -+ u32 kernel_ctx_id, -+ u64 buffer_gpu_addr -+); ++void kutf_add_test_with_filters_and_data( ++ struct kutf_suite *suite, ++ unsigned int id, ++ const char *name, ++ void (*execute)(struct kutf_context *context), ++ unsigned int filters, ++ union kutf_callback_data test_data) ++{ ++ struct kutf_test_function *test_func; ++ struct dentry *tmp; ++ unsigned int i; + -+void __kbase_tlstream_tl_kbase_device_program_csg( -+ struct kbase_tlstream *stream, -+ u32 kbase_device_id, -+ u32 kernel_ctx_id, -+ u32 gpu_cmdq_grp_handle, -+ u32 kbase_device_csg_slot_index, -+ u32 kbase_device_csg_slot_resuming -+); ++ test_func = kmalloc(sizeof(*test_func), GFP_KERNEL); ++ if (!test_func) { ++ pr_err("Failed to allocate memory when adding test %s\n", name); ++ goto fail_alloc; ++ } + -+void __kbase_tlstream_tl_kbase_device_deprogram_csg( -+ struct kbase_tlstream *stream, -+ u32 kbase_device_id, -+ u32 kbase_device_csg_slot_index -+); ++ INIT_LIST_HEAD(&test_func->variant_list); + -+void __kbase_tlstream_tl_kbase_device_halting_csg( -+ struct kbase_tlstream *stream, -+ u32 kbase_device_id, -+ u32 kbase_device_csg_slot_index, -+ u32 kbase_device_csg_slot_suspending -+); ++ test_func->dir = debugfs_create_dir(name, suite->dir); ++ if (IS_ERR_OR_NULL(test_func->dir)) { ++ pr_err("Failed to create debugfs directory when adding test %s\n", name); ++ goto fail_dir; ++ } + -+void __kbase_tlstream_tl_kbase_device_suspend_csg( -+ struct kbase_tlstream *stream, -+ u32 kbase_device_id, -+ u32 kbase_device_csg_slot_index -+); ++ tmp = debugfs_create_file("type", 0004, test_func->dir, "test\n", ++ &kutf_debugfs_const_string_ops); ++ if (IS_ERR_OR_NULL(tmp)) { ++ pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name); ++ goto fail_file; ++ } + -+void __kbase_tlstream_tl_kbase_device_csg_idle( -+ struct kbase_tlstream *stream, -+ u32 kbase_device_id, -+ u32 kbase_device_csg_slot_index -+); ++ test_func->filters = filters; ++#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE ++ tmp = debugfs_create_file_unsafe("filters", 0004, test_func->dir, ++ &test_func->filters, &kutfp_fops_x32_ro); ++#else ++ tmp = debugfs_create_x32("filters", 0004, test_func->dir, ++ &test_func->filters); ++#endif ++ if (IS_ERR_OR_NULL(tmp)) { ++ pr_err("Failed to create debugfs file \"filters\" when adding test %s\n", name); ++ goto fail_file; ++ } + -+void __kbase_tlstream_tl_kbase_new_ctx( -+ struct kbase_tlstream *stream, -+ u32 kernel_ctx_id, -+ u32 kbase_device_id -+); ++ test_func->test_id = id; ++#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE ++ debugfs_create_u32("test_id", 0004, test_func->dir, ++ &test_func->test_id); ++#else ++ tmp = debugfs_create_u32("test_id", 0004, test_func->dir, ++ &test_func->test_id); ++ if (IS_ERR_OR_NULL(tmp)) { ++ pr_err("Failed to create debugfs file \"test_id\" when adding test %s\n", name); ++ goto fail_file; ++ } ++#endif + -+void __kbase_tlstream_tl_kbase_del_ctx( -+ struct kbase_tlstream *stream, -+ u32 kernel_ctx_id -+); ++ for (i = 0; i < suite->fixture_variants; i++) { ++ if (create_fixture_variant(test_func, i)) { ++ pr_err("Failed to create fixture %d when adding test %s\n", i, name); ++ goto fail_file; ++ } ++ } + -+void __kbase_tlstream_tl_kbase_ctx_assign_as( -+ struct kbase_tlstream *stream, -+ u32 kernel_ctx_id, -+ u32 kbase_device_as_index -+); ++ test_func->suite = suite; ++ test_func->execute = execute; ++ test_func->test_data = test_data; + -+void __kbase_tlstream_tl_kbase_ctx_unassign_as( -+ struct kbase_tlstream *stream, -+ u32 kernel_ctx_id -+); ++ list_add(&test_func->node, &suite->test_list); ++ return; + -+void __kbase_tlstream_tl_kbase_new_kcpuqueue( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 kcpu_queue_id, -+ u32 kernel_ctx_id, -+ u32 kcpuq_num_pending_cmds -+); ++fail_file: ++ debugfs_remove_recursive(test_func->dir); ++fail_dir: ++ kfree(test_func); ++fail_alloc: ++ return; ++} ++EXPORT_SYMBOL(kutf_add_test_with_filters_and_data); + -+void __kbase_tlstream_tl_kbase_del_kcpuqueue( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++void kutf_add_test_with_filters( ++ struct kutf_suite *suite, ++ unsigned int id, ++ const char *name, ++ void (*execute)(struct kutf_context *context), ++ unsigned int filters) ++{ ++ union kutf_callback_data data; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_signal( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ const void *fence -+); ++ data.ptr_value = NULL; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ const void *fence -+); ++ kutf_add_test_with_filters_and_data(suite, ++ id, ++ name, ++ execute, ++ suite->suite_default_flags, ++ data); ++} ++EXPORT_SYMBOL(kutf_add_test_with_filters); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u64 cqs_obj_gpu_addr, -+ u32 compare_value, -+ u32 inherit_error -+); ++void kutf_add_test(struct kutf_suite *suite, ++ unsigned int id, ++ const char *name, ++ void (*execute)(struct kutf_context *context)) ++{ ++ union kutf_callback_data data; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u64 cqs_obj_gpu_addr -+); ++ data.ptr_value = NULL; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u64 cqs_obj_gpu_addr, -+ u64 compare_value, -+ u32 condition, -+ u32 data_type, -+ u32 inherit_error -+); ++ kutf_add_test_with_filters_and_data(suite, ++ id, ++ name, ++ execute, ++ suite->suite_default_flags, ++ data); ++} ++EXPORT_SYMBOL(kutf_add_test); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u64 cqs_obj_gpu_addr, -+ u64 value, -+ u32 operation, -+ u32 data_type -+); ++/** ++ * kutf_remove_test() - Remove a previously added test function. ++ * @test_func: Test function ++ */ ++static void kutf_remove_test(struct kutf_test_function *test_func) ++{ ++ struct list_head *pos; ++ struct list_head *tmp; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u64 map_import_buf_gpu_addr -+); ++ list_for_each_safe(pos, tmp, &test_func->variant_list) { ++ struct kutf_test_fixture *test_fix; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u64 map_import_buf_gpu_addr -+); ++ test_fix = list_entry(pos, struct kutf_test_fixture, node); ++ kutf_remove_test_variant(test_fix); ++ } + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u64 map_import_buf_gpu_addr -+); ++ list_del(&test_func->node); ++ debugfs_remove_recursive(test_func->dir); ++ kfree(test_func); ++} + -+void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++struct kutf_suite *kutf_create_suite_with_filters_and_data( ++ struct kutf_application *app, ++ const char *name, ++ unsigned int fixture_count, ++ void *(*create_fixture)(struct kutf_context *context), ++ void (*remove_fixture)(struct kutf_context *context), ++ unsigned int filters, ++ union kutf_callback_data suite_data) ++{ ++ struct kutf_suite *suite; ++ struct dentry *tmp; + -+void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_alloc( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u64 jit_alloc_gpu_alloc_addr_dest, -+ u64 jit_alloc_va_pages, -+ u64 jit_alloc_commit_pages, -+ u64 jit_alloc_extent, -+ u32 jit_alloc_jit_id, -+ u32 jit_alloc_bin_id, -+ u32 jit_alloc_max_allocations, -+ u32 jit_alloc_flags, -+ u32 jit_alloc_usage_id -+); ++ suite = kmalloc(sizeof(*suite), GFP_KERNEL); ++ if (!suite) { ++ pr_err("Failed to allocate memory when creating suite %s\n", name); ++ goto fail_kmalloc; ++ } + -+void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_alloc( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++ suite->dir = debugfs_create_dir(name, app->dir); ++ if (IS_ERR_OR_NULL(suite->dir)) { ++ pr_err("Failed to create debugfs directory when adding test %s\n", name); ++ goto fail_debugfs; ++ } + -+void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_free( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++ tmp = debugfs_create_file("type", 0004, suite->dir, "suite\n", ++ &kutf_debugfs_const_string_ops); ++ if (IS_ERR_OR_NULL(tmp)) { ++ pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name); ++ goto fail_file; ++ } + -+void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_free( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 jit_alloc_jit_id -+); ++ INIT_LIST_HEAD(&suite->test_list); ++ suite->app = app; ++ suite->name = name; ++ suite->fixture_variants = fixture_count; ++ suite->create_fixture = create_fixture; ++ suite->remove_fixture = remove_fixture; ++ suite->suite_default_flags = filters; ++ suite->suite_data = suite_data; + -+void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++ list_add(&suite->node, &app->suite_list); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++ return suite; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ const void *group_suspend_buf, -+ u32 gpu_cmdq_grp_handle -+); ++fail_file: ++ debugfs_remove_recursive(suite->dir); ++fail_debugfs: ++ kfree(suite); ++fail_kmalloc: ++ return NULL; ++} ++EXPORT_SYMBOL(kutf_create_suite_with_filters_and_data); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++struct kutf_suite *kutf_create_suite_with_filters( ++ struct kutf_application *app, ++ const char *name, ++ unsigned int fixture_count, ++ void *(*create_fixture)(struct kutf_context *context), ++ void (*remove_fixture)(struct kutf_context *context), ++ unsigned int filters) ++{ ++ union kutf_callback_data data; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+); ++ data.ptr_value = NULL; ++ return kutf_create_suite_with_filters_and_data(app, ++ name, ++ fixture_count, ++ create_fixture, ++ remove_fixture, ++ filters, ++ data); ++} ++EXPORT_SYMBOL(kutf_create_suite_with_filters); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++struct kutf_suite *kutf_create_suite( ++ struct kutf_application *app, ++ const char *name, ++ unsigned int fixture_count, ++ void *(*create_fixture)(struct kutf_context *context), ++ void (*remove_fixture)(struct kutf_context *context)) ++{ ++ union kutf_callback_data data; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+); ++ data.ptr_value = NULL; ++ return kutf_create_suite_with_filters_and_data(app, ++ name, ++ fixture_count, ++ create_fixture, ++ remove_fixture, ++ KUTF_F_TEST_GENERIC, ++ data); ++} ++EXPORT_SYMBOL(kutf_create_suite); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++/** ++ * kutf_destroy_suite() - Destroy a previously added test suite. ++ * @suite: Test suite ++ */ ++static void kutf_destroy_suite(struct kutf_suite *suite) ++{ ++ struct list_head *pos; ++ struct list_head *tmp; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+); ++ list_for_each_safe(pos, tmp, &suite->test_list) { ++ struct kutf_test_function *test_func; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+); ++ test_func = list_entry(pos, struct kutf_test_function, node); ++ kutf_remove_test(test_func); ++ } + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++ list_del(&suite->node); ++ debugfs_remove_recursive(suite->dir); ++ kfree(suite); ++} + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+); ++struct kutf_application *kutf_create_application(const char *name) ++{ ++ struct kutf_application *app; ++ struct dentry *tmp; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+); ++ app = kmalloc(sizeof(*app), GFP_KERNEL); ++ if (!app) { ++ pr_err("Failed to create allocate memory when creating application %s\n", name); ++ goto fail_kmalloc; ++ } + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++ app->dir = debugfs_create_dir(name, base_dir); ++ if (IS_ERR_OR_NULL(app->dir)) { ++ pr_err("Failed to create debugfs direcotry when creating application %s\n", name); ++ goto fail_debugfs; ++ } + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+); ++ tmp = debugfs_create_file("type", 0004, app->dir, "application\n", ++ &kutf_debugfs_const_string_ops); ++ if (IS_ERR_OR_NULL(tmp)) { ++ pr_err("Failed to create debugfs file \"type\" when creating application %s\n", name); ++ goto fail_file; ++ } + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++ INIT_LIST_HEAD(&app->suite_list); ++ app->name = name; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+); ++ return app; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++fail_file: ++ debugfs_remove_recursive(app->dir); ++fail_debugfs: ++ kfree(app); ++fail_kmalloc: ++ return NULL; ++} ++EXPORT_SYMBOL(kutf_create_application); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+); ++void kutf_destroy_application(struct kutf_application *app) ++{ ++ struct list_head *pos; ++ struct list_head *tmp; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++ list_for_each_safe(pos, tmp, &app->suite_list) { ++ struct kutf_suite *suite; + -+void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++ suite = list_entry(pos, struct kutf_suite, node); ++ kutf_destroy_suite(suite); ++ } + -+void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error, -+ u64 jit_alloc_gpu_alloc_addr, -+ u64 jit_alloc_mmu_flags -+); ++ debugfs_remove_recursive(app->dir); ++ kfree(app); ++} ++EXPORT_SYMBOL(kutf_destroy_application); + -+void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++static struct kutf_context *kutf_create_context( ++ struct kutf_test_fixture *test_fix) ++{ ++ struct kutf_context *new_context; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_free_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++ new_context = kmalloc(sizeof(*new_context), GFP_KERNEL); ++ if (!new_context) { ++ pr_err("Failed to allocate test context"); ++ goto fail_alloc; ++ } + -+void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++ new_context->result_set = kutf_create_result_set(); ++ if (!new_context->result_set) { ++ pr_err("Failed to create result set"); ++ goto fail_result_set; ++ } + -+void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error, -+ u64 jit_free_pages_used -+); ++ new_context->test_fix = test_fix; ++ /* Save the pointer to the suite as the callbacks will require it */ ++ new_context->suite = test_fix->test_func->suite; ++ new_context->status = KUTF_RESULT_UNKNOWN; ++ new_context->expected_status = KUTF_RESULT_UNKNOWN; + -+void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++ kutf_mempool_init(&new_context->fixture_pool); ++ new_context->fixture = NULL; ++ new_context->fixture_index = test_fix->fixture_index; ++ new_context->fixture_name = NULL; ++ new_context->test_data = test_fix->test_func->test_data; + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++ new_context->userdata.flags = 0; ++ INIT_LIST_HEAD(&new_context->userdata.input_head); ++ init_waitqueue_head(&new_context->userdata.input_waitq); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue -+); ++ INIT_WORK(&new_context->work, kutf_run_test); + -+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end( -+ struct kbase_tlstream *stream, -+ const void *kcpu_queue, -+ u32 execute_error -+); ++ kref_init(&new_context->kref); + -+void __kbase_tlstream_tl_kbase_csffw_fw_reloading( -+ struct kbase_tlstream *stream, -+ u64 csffw_cycle -+); ++ return new_context; + -+void __kbase_tlstream_tl_kbase_csffw_fw_enabling( -+ struct kbase_tlstream *stream, -+ u64 csffw_cycle -+); ++fail_result_set: ++ kfree(new_context); ++fail_alloc: ++ return NULL; ++} + -+void __kbase_tlstream_tl_kbase_csffw_fw_request_sleep( -+ struct kbase_tlstream *stream, -+ u64 csffw_cycle -+); ++static void kutf_destroy_context(struct kref *kref) ++{ ++ struct kutf_context *context; + -+void __kbase_tlstream_tl_kbase_csffw_fw_request_wakeup( -+ struct kbase_tlstream *stream, -+ u64 csffw_cycle -+); ++ context = container_of(kref, struct kutf_context, kref); ++ kutf_destroy_result_set(context->result_set); ++ kutf_mempool_destroy(&context->fixture_pool); ++ kfree(context); ++} + -+void __kbase_tlstream_tl_kbase_csffw_fw_request_halt( -+ struct kbase_tlstream *stream, -+ u64 csffw_cycle -+); ++static void kutf_context_get(struct kutf_context *context) ++{ ++ kref_get(&context->kref); ++} + -+void __kbase_tlstream_tl_kbase_csffw_fw_disabling( -+ struct kbase_tlstream *stream, -+ u64 csffw_cycle -+); ++static void kutf_context_put(struct kutf_context *context) ++{ ++ kref_put(&context->kref, kutf_destroy_context); ++} + -+void __kbase_tlstream_tl_kbase_csffw_fw_off( -+ struct kbase_tlstream *stream, -+ u64 csffw_cycle -+); + -+void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( -+ struct kbase_tlstream *stream, -+ u64 csffw_timestamp, -+ u64 csffw_cycle -+); ++static void kutf_set_result(struct kutf_context *context, ++ enum kutf_result_status status) ++{ ++ context->status = status; ++} + -+void __kbase_tlstream_aux_pm_state( -+ struct kbase_tlstream *stream, -+ u32 core_type, -+ u64 core_state_bitset -+); ++static void kutf_set_expected_result(struct kutf_context *context, ++ enum kutf_result_status expected_status) ++{ ++ context->expected_status = expected_status; ++} + -+void __kbase_tlstream_aux_pagefault( -+ struct kbase_tlstream *stream, -+ u32 ctx_nr, -+ u32 as_nr, -+ u64 page_cnt_change -+); ++/** ++ * kutf_test_log_result() - Log a result for the specified test context ++ * @context: Test context ++ * @message: Result string ++ * @new_status: Result status ++ */ ++static void kutf_test_log_result( ++ struct kutf_context *context, ++ const char *message, ++ enum kutf_result_status new_status) ++{ ++ if (context->status < new_status) ++ context->status = new_status; + -+void __kbase_tlstream_aux_pagesalloc( -+ struct kbase_tlstream *stream, -+ u32 ctx_nr, -+ u64 page_cnt -+); ++ if (context->expected_status != new_status) ++ kutf_add_result(context, new_status, message); ++} + -+void __kbase_tlstream_aux_devfreq_target( -+ struct kbase_tlstream *stream, -+ u64 target_freq -+); ++void kutf_test_log_result_external( ++ struct kutf_context *context, ++ const char *message, ++ enum kutf_result_status new_status) ++{ ++ kutf_test_log_result(context, message, new_status); ++} ++EXPORT_SYMBOL(kutf_test_log_result_external); + -+void __kbase_tlstream_aux_jit_stats( -+ struct kbase_tlstream *stream, -+ u32 ctx_nr, -+ u32 bid, -+ u32 max_allocs, -+ u32 allocs, -+ u32 va_pages, -+ u32 ph_pages -+); ++void kutf_test_expect_abort(struct kutf_context *context) ++{ ++ kutf_set_expected_result(context, KUTF_RESULT_ABORT); ++} ++EXPORT_SYMBOL(kutf_test_expect_abort); + -+void __kbase_tlstream_aux_tiler_heap_stats( -+ struct kbase_tlstream *stream, -+ u32 ctx_nr, -+ u64 heap_id, -+ u32 va_pages, -+ u32 ph_pages, -+ u32 max_chunks, -+ u32 chunk_size, -+ u32 chunk_count, -+ u32 target_in_flight, -+ u32 nr_in_flight -+); ++void kutf_test_expect_fatal(struct kutf_context *context) ++{ ++ kutf_set_expected_result(context, KUTF_RESULT_FATAL); ++} ++EXPORT_SYMBOL(kutf_test_expect_fatal); + -+void __kbase_tlstream_aux_event_job_slot( -+ struct kbase_tlstream *stream, -+ const void *ctx, -+ u32 slot_nr, -+ u32 atom_nr, -+ u32 event -+); ++void kutf_test_expect_fail(struct kutf_context *context) ++{ ++ kutf_set_expected_result(context, KUTF_RESULT_FAIL); ++} ++EXPORT_SYMBOL(kutf_test_expect_fail); + -+void __kbase_tlstream_aux_protected_enter_start( -+ struct kbase_tlstream *stream, -+ const void *gpu -+); ++void kutf_test_expect_warn(struct kutf_context *context) ++{ ++ kutf_set_expected_result(context, KUTF_RESULT_WARN); ++} ++EXPORT_SYMBOL(kutf_test_expect_warn); + -+void __kbase_tlstream_aux_protected_enter_end( -+ struct kbase_tlstream *stream, -+ const void *gpu -+); ++void kutf_test_expect_pass(struct kutf_context *context) ++{ ++ kutf_set_expected_result(context, KUTF_RESULT_PASS); ++} ++EXPORT_SYMBOL(kutf_test_expect_pass); + -+void __kbase_tlstream_aux_mmu_command( -+ struct kbase_tlstream *stream, -+ u32 kernel_ctx_id, -+ u32 mmu_cmd_id, -+ u32 mmu_synchronicity, -+ u64 mmu_lock_addr, -+ u32 mmu_lock_page_num -+); ++void kutf_test_skip(struct kutf_context *context) ++{ ++ kutf_set_result(context, KUTF_RESULT_SKIP); ++ kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN); + -+void __kbase_tlstream_aux_protected_leave_start( -+ struct kbase_tlstream *stream, -+ const void *gpu -+); ++ kutf_test_log_result(context, "Test skipped", KUTF_RESULT_SKIP); ++} ++EXPORT_SYMBOL(kutf_test_skip); + -+void __kbase_tlstream_aux_protected_leave_end( -+ struct kbase_tlstream *stream, -+ const void *gpu -+); ++void kutf_test_skip_msg(struct kutf_context *context, const char *message) ++{ ++ kutf_set_result(context, KUTF_RESULT_SKIP); ++ kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN); + -+struct kbase_tlstream; ++ kutf_test_log_result(context, kutf_dsprintf(&context->fixture_pool, ++ "Test skipped: %s", message), KUTF_RESULT_SKIP); ++ kutf_test_log_result(context, "!!!Test skipped!!!", KUTF_RESULT_SKIP); ++} ++EXPORT_SYMBOL(kutf_test_skip_msg); + -+/** -+ * KBASE_TLSTREAM_TL_NEW_CTX - object ctx is created -+ * -+ * @kbdev: Kbase device -+ * @ctx: Name of the context object -+ * @ctx_nr: Kernel context number -+ * @tgid: Thread Group Id -+ */ -+#define KBASE_TLSTREAM_TL_NEW_CTX( \ -+ kbdev, \ -+ ctx, \ -+ ctx_nr, \ -+ tgid \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_new_ctx( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ ctx, \ -+ ctx_nr, \ -+ tgid \ -+ ); \ -+ } while (0) ++void kutf_test_debug(struct kutf_context *context, char const *message) ++{ ++ kutf_test_log_result(context, message, KUTF_RESULT_DEBUG); ++} ++EXPORT_SYMBOL(kutf_test_debug); + -+/** -+ * KBASE_TLSTREAM_TL_NEW_GPU - object gpu is created -+ * -+ * @kbdev: Kbase device -+ * @gpu: Name of the GPU object -+ * @gpu_id: Name of the GPU object -+ * @core_count: Number of cores this GPU hosts -+ */ -+#define KBASE_TLSTREAM_TL_NEW_GPU( \ -+ kbdev, \ -+ gpu, \ -+ gpu_id, \ -+ core_count \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_new_gpu( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ gpu, \ -+ gpu_id, \ -+ core_count \ -+ ); \ -+ } while (0) ++void kutf_test_pass(struct kutf_context *context, char const *message) ++{ ++ static const char explicit_message[] = "(explicit pass)"; ++ ++ if (!message) ++ message = explicit_message; ++ ++ kutf_test_log_result(context, message, KUTF_RESULT_PASS); ++} ++EXPORT_SYMBOL(kutf_test_pass); ++ ++void kutf_test_info(struct kutf_context *context, char const *message) ++{ ++ kutf_test_log_result(context, message, KUTF_RESULT_INFO); ++} ++EXPORT_SYMBOL(kutf_test_info); ++ ++void kutf_test_warn(struct kutf_context *context, char const *message) ++{ ++ kutf_test_log_result(context, message, KUTF_RESULT_WARN); ++} ++EXPORT_SYMBOL(kutf_test_warn); ++ ++void kutf_test_fail(struct kutf_context *context, char const *message) ++{ ++ kutf_test_log_result(context, message, KUTF_RESULT_FAIL); ++} ++EXPORT_SYMBOL(kutf_test_fail); ++ ++void kutf_test_fatal(struct kutf_context *context, char const *message) ++{ ++ kutf_test_log_result(context, message, KUTF_RESULT_FATAL); ++} ++EXPORT_SYMBOL(kutf_test_fatal); ++ ++void kutf_test_abort(struct kutf_context *context) ++{ ++ kutf_test_log_result(context, "", KUTF_RESULT_ABORT); ++} ++EXPORT_SYMBOL(kutf_test_abort); ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) + +/** -+ * KBASE_TLSTREAM_TL_NEW_LPU - object lpu is created ++ * init_kutf_core() - Module entry point. ++ * Create the base entry point in debugfs. + * -+ * @kbdev: Kbase device -+ * @lpu: Name of the Logical Processing Unit object -+ * @lpu_nr: Sequential number assigned to the newly created LPU -+ * @lpu_fn: Property describing functional abilities of this LPU ++ * Return: 0 on success, error code otherwise. + */ -+#define KBASE_TLSTREAM_TL_NEW_LPU( \ -+ kbdev, \ -+ lpu, \ -+ lpu_nr, \ -+ lpu_fn \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_new_lpu( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ lpu, \ -+ lpu_nr, \ -+ lpu_fn \ -+ ); \ -+ } while (0) ++static int __init init_kutf_core(void) ++{ ++ kutf_workq = alloc_workqueue("kutf workq", WQ_UNBOUND, 1); ++ if (!kutf_workq) ++ return -ENOMEM; ++ ++ base_dir = debugfs_create_dir("kutf_tests", NULL); ++ if (IS_ERR_OR_NULL(base_dir)) { ++ destroy_workqueue(kutf_workq); ++ kutf_workq = NULL; ++ return -ENOMEM; ++ } ++ ++ return 0; ++} + +/** -+ * KBASE_TLSTREAM_TL_NEW_ATOM - object atom is created ++ * exit_kutf_core() - Module exit point. + * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier -+ * @atom_nr: Sequential number of an atom ++ * Remove the base entry point in debugfs. + */ -+#define KBASE_TLSTREAM_TL_NEW_ATOM( \ -+ kbdev, \ -+ atom, \ -+ atom_nr \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_new_atom( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom, \ -+ atom_nr \ -+ ); \ -+ } while (0) ++static void __exit exit_kutf_core(void) ++{ ++ debugfs_remove_recursive(base_dir); ++ ++ if (kutf_workq) ++ destroy_workqueue(kutf_workq); ++} ++ ++#else /* CONFIG_DEBUG_FS */ + +/** -+ * KBASE_TLSTREAM_TL_NEW_AS - address space object is created ++ * init_kutf_core - Module entry point ++ * Stub for when build against a kernel without debugfs support. + * -+ * @kbdev: Kbase device -+ * @address_space: Name of the address space object -+ * @as_nr: Address space number ++ * Return: -ENODEV + */ -+#define KBASE_TLSTREAM_TL_NEW_AS( \ -+ kbdev, \ -+ address_space, \ -+ as_nr \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_new_as( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ address_space, \ -+ as_nr \ -+ ); \ -+ } while (0) ++static int __init init_kutf_core(void) ++{ ++ pr_debug("KUTF requires a kernel with debug fs support"); ++ ++ return -ENODEV; ++} + +/** -+ * KBASE_TLSTREAM_TL_DEL_CTX - context is destroyed ++ * exit_kutf_core() - Module exit point. + * -+ * @kbdev: Kbase device -+ * @ctx: Name of the context object ++ * Stub for when build against a kernel without debugfs support + */ -+#define KBASE_TLSTREAM_TL_DEL_CTX( \ -+ kbdev, \ -+ ctx \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_del_ctx( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ ctx \ -+ ); \ -+ } while (0) ++static void __exit exit_kutf_core(void) ++{ ++} ++#endif /* CONFIG_DEBUG_FS */ + -+/** -+ * KBASE_TLSTREAM_TL_DEL_ATOM - atom is destroyed ++MODULE_LICENSE("GPL"); ++ ++module_init(init_kutf_core); ++module_exit(exit_kutf_core); +diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_utils.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_utils.c +new file mode 100644 +index 000000000..21f5fadcc +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_utils.c +@@ -0,0 +1,75 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2014, 2017, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier + */ -+#define KBASE_TLSTREAM_TL_DEL_ATOM( \ -+ kbdev, \ -+ atom \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_del_atom( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom \ -+ ); \ -+ } while (0) + -+/** -+ * KBASE_TLSTREAM_TL_LIFELINK_LPU_GPU - lpu is deleted with gpu -+ * -+ * @kbdev: Kbase device -+ * @lpu: Name of the Logical Processing Unit object -+ * @gpu: Name of the GPU object -+ */ -+#define KBASE_TLSTREAM_TL_LIFELINK_LPU_GPU( \ -+ kbdev, \ -+ lpu, \ -+ gpu \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_lifelink_lpu_gpu( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ lpu, \ -+ gpu \ -+ ); \ -+ } while (0) ++/* Kernel UTF utility functions */ + -+/** -+ * KBASE_TLSTREAM_TL_LIFELINK_AS_GPU - address space is deleted with gpu -+ * -+ * @kbdev: Kbase device -+ * @address_space: Name of the address space object -+ * @gpu: Name of the GPU object -+ */ -+#define KBASE_TLSTREAM_TL_LIFELINK_AS_GPU( \ -+ kbdev, \ -+ address_space, \ -+ gpu \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_lifelink_as_gpu( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ address_space, \ -+ gpu \ -+ ); \ -+ } while (0) ++#include ++#include ++#include ++#include + -+/** -+ * KBASE_TLSTREAM_TL_RET_CTX_LPU - context is retained by lpu -+ * -+ * @kbdev: Kbase device -+ * @ctx: Name of the context object -+ * @lpu: Name of the Logical Processing Unit object -+ */ -+#define KBASE_TLSTREAM_TL_RET_CTX_LPU( \ -+ kbdev, \ -+ ctx, \ -+ lpu \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_ret_ctx_lpu( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ ctx, \ -+ lpu \ -+ ); \ -+ } while (0) ++#include ++#include + -+/** -+ * KBASE_TLSTREAM_TL_RET_ATOM_CTX - atom is retained by context -+ * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier -+ * @ctx: Name of the context object -+ */ -+#define KBASE_TLSTREAM_TL_RET_ATOM_CTX( \ -+ kbdev, \ -+ atom, \ -+ ctx \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_ret_atom_ctx( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom, \ -+ ctx \ -+ ); \ -+ } while (0) ++static char tmp_buffer[KUTF_MAX_DSPRINTF_LEN]; + -+/** -+ * KBASE_TLSTREAM_TL_RET_ATOM_LPU - atom is retained by lpu -+ * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier -+ * @lpu: Name of the Logical Processing Unit object -+ * @attrib_match_list: List containing match operator attributes -+ */ -+#define KBASE_TLSTREAM_TL_RET_ATOM_LPU( \ -+ kbdev, \ -+ atom, \ -+ lpu, \ -+ attrib_match_list \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_ret_atom_lpu( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom, \ -+ lpu, \ -+ attrib_match_list \ -+ ); \ -+ } while (0) ++static DEFINE_MUTEX(buffer_lock); + -+/** -+ * KBASE_TLSTREAM_TL_NRET_CTX_LPU - context is released by lpu -+ * -+ * @kbdev: Kbase device -+ * @ctx: Name of the context object -+ * @lpu: Name of the Logical Processing Unit object -+ */ -+#define KBASE_TLSTREAM_TL_NRET_CTX_LPU( \ -+ kbdev, \ -+ ctx, \ -+ lpu \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_nret_ctx_lpu( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ ctx, \ -+ lpu \ -+ ); \ -+ } while (0) ++const char *kutf_dsprintf(struct kutf_mempool *pool, ++ const char *fmt, ...) ++{ ++ va_list args; ++ int len; ++ int size; ++ void *buffer; + -+/** -+ * KBASE_TLSTREAM_TL_NRET_ATOM_CTX - atom is released by context -+ * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier -+ * @ctx: Name of the context object -+ */ -+#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX( \ -+ kbdev, \ -+ atom, \ -+ ctx \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_nret_atom_ctx( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom, \ -+ ctx \ -+ ); \ -+ } while (0) ++ mutex_lock(&buffer_lock); ++ va_start(args, fmt); ++ len = vsnprintf(tmp_buffer, sizeof(tmp_buffer), fmt, args); ++ va_end(args); + -+/** -+ * KBASE_TLSTREAM_TL_NRET_ATOM_LPU - atom is released by lpu ++ if (len < 0) { ++ pr_err("%s: Bad format dsprintf format %s\n", __func__, fmt); ++ goto fail_format; ++ } ++ ++ if (len >= sizeof(tmp_buffer)) { ++ pr_warn("%s: Truncated dsprintf message %s\n", __func__, fmt); ++ size = sizeof(tmp_buffer); ++ } else { ++ size = len + 1; ++ } ++ ++ buffer = kutf_mempool_alloc(pool, size); ++ if (!buffer) ++ goto fail_alloc; ++ ++ memcpy(buffer, tmp_buffer, size); ++ mutex_unlock(&buffer_lock); ++ ++ return buffer; ++ ++fail_alloc: ++fail_format: ++ mutex_unlock(&buffer_lock); ++ return NULL; ++} ++EXPORT_SYMBOL(kutf_dsprintf); +diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/Kbuild b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/Kbuild +new file mode 100755 +index 000000000..027bc27c9 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/Kbuild +@@ -0,0 +1,25 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# ++ ++ifeq ($(CONFIG_MALI_KUTF_CLK_RATE_TRACE),y) ++obj-m += mali_kutf_clk_rate_trace_test_portal.o ++ ++mali_kutf_clk_rate_trace_test_portal-y := mali_kutf_clk_rate_trace_test.o ++endif +diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/build.bp b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/build.bp +new file mode 100755 +index 000000000..225ad69c5 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/build.bp +@@ -0,0 +1,43 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier -+ * @lpu: Name of the Logical Processing Unit object + */ -+#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU( \ -+ kbdev, \ -+ atom, \ -+ lpu \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_nret_atom_lpu( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom, \ -+ lpu \ -+ ); \ -+ } while (0) + -+/** -+ * KBASE_TLSTREAM_TL_RET_AS_CTX - address space is retained by context ++bob_kernel_module { ++ name: "mali_kutf_clk_rate_trace_test_portal", ++ defaults: [ ++ "mali_kbase_shared_config_defaults", ++ "kernel_test_configs", ++ "kernel_test_includes", ++ ], ++ srcs: [ ++ "Kbuild", ++ "mali_kutf_clk_rate_trace_test.c", ++ "../mali_kutf_clk_rate_trace_test.h", ++ ], ++ extra_symbols: [ ++ "mali_kbase", ++ "kutf", ++ ], ++ enabled: false, ++ mali_kutf_clk_rate_trace: { ++ kbuild_options: ["CONFIG_MALI_KUTF_CLK_RATE_TRACE=y"], ++ enabled: true, ++ }, ++} +diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c +new file mode 100644 +index 000000000..a6f54b61d +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c +@@ -0,0 +1,965 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * @kbdev: Kbase device -+ * @address_space: Name of the address space object -+ * @ctx: Name of the context object + */ -+#define KBASE_TLSTREAM_TL_RET_AS_CTX( \ -+ kbdev, \ -+ address_space, \ -+ ctx \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_ret_as_ctx( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ address_space, \ -+ ctx \ -+ ); \ -+ } while (0) ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) ++#include ++#else ++#include ++#endif ++#include "mali_kbase.h" ++#include "backend/gpu/mali_kbase_irq_internal.h" ++#include "backend/gpu/mali_kbase_pm_internal.h" ++#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" ++ ++#include ++#include ++#include ++#include ++ ++#include "../mali_kutf_clk_rate_trace_test.h" ++ ++#define MINOR_FOR_FIRST_KBASE_DEV (-1) ++ ++/* KUTF test application pointer for this test */ ++static struct kutf_application *kutf_app; ++ ++enum portal_server_state { ++ PORTAL_STATE_NO_CLK, ++ PORTAL_STATE_LIVE, ++ PORTAL_STATE_CLOSING, ++}; + +/** -+ * KBASE_TLSTREAM_TL_NRET_AS_CTX - address space is released by context -+ * -+ * @kbdev: Kbase device -+ * @address_space: Name of the address space object -+ * @ctx: Name of the context object ++ * struct clk_trace_snapshot - Trace info data on a clock. ++ * @previous_rate: Snapshot start point clock rate. ++ * @current_rate: End point clock rate. It becomes the start rate of the ++ * next trace snapshot. ++ * @rate_up_cnt: Count in the snapshot duration when the clock trace ++ * write is a rate of higher value than the last. ++ * @rate_down_cnt: Count in the snapshot duration when the clock trace write ++ * is a rate of lower value than the last. + */ -+#define KBASE_TLSTREAM_TL_NRET_AS_CTX( \ -+ kbdev, \ -+ address_space, \ -+ ctx \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_nret_as_ctx( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ address_space, \ -+ ctx \ -+ ); \ -+ } while (0) ++struct clk_trace_snapshot { ++ unsigned long previous_rate; ++ unsigned long current_rate; ++ u32 rate_up_cnt; ++ u32 rate_down_cnt; ++}; + +/** -+ * KBASE_TLSTREAM_TL_RET_ATOM_AS - atom is retained by address space -+ * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier -+ * @address_space: Name of the address space object ++ * struct kutf_clk_rate_trace_fixture_data - Fixture data for the test. ++ * @kbdev: kbase device for the GPU. ++ * @listener: Clock rate change listener structure. ++ * @invoke_notify: When true, invoke notify command is being executed. ++ * @snapshot: Clock trace update snapshot data array. A snapshot ++ * for each clock contains info accumulated beteen two ++ * GET_TRACE_SNAPSHOT requests. ++ * @nclks: Number of clocks visible to the trace portal. ++ * @pm_ctx_cnt: Net count of PM (Power Management) context INC/DEC ++ * PM_CTX_CNT requests made to the portal. On change from ++ * 0 to 1 (INC), or, 1 to 0 (DEC), a PM context action is ++ * triggered. ++ * @total_update_cnt: Total number of received trace write callbacks. ++ * @server_state: Portal server operational state. ++ * @result_msg: Message for the test result. ++ * @test_status: Portal test reslt status. + */ -+#define KBASE_TLSTREAM_TL_RET_ATOM_AS( \ -+ kbdev, \ -+ atom, \ -+ address_space \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_ret_atom_as( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom, \ -+ address_space \ -+ ); \ -+ } while (0) ++struct kutf_clk_rate_trace_fixture_data { ++ struct kbase_device *kbdev; ++ struct kbase_clk_rate_listener listener; ++ bool invoke_notify; ++ struct clk_trace_snapshot snapshot[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ unsigned int nclks; ++ unsigned int pm_ctx_cnt; ++ unsigned int total_update_cnt; ++ enum portal_server_state server_state; ++ char const *result_msg; ++ enum kutf_result_status test_status; ++}; + -+/** -+ * KBASE_TLSTREAM_TL_NRET_ATOM_AS - atom is released by address space -+ * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier -+ * @address_space: Name of the address space object ++struct clk_trace_portal_input { ++ struct kutf_helper_named_val cmd_input; ++ enum kbasep_clk_rate_trace_req portal_cmd; ++ int named_val_err; ++}; ++ ++struct kbasep_cmd_name_pair { ++ enum kbasep_clk_rate_trace_req cmd; ++ const char *name; ++}; ++ ++static const struct kbasep_cmd_name_pair kbasep_portal_cmd_name_map[] = { ++ { PORTAL_CMD_GET_PLATFORM, GET_PLATFORM }, ++ { PORTAL_CMD_GET_CLK_RATE_MGR, GET_CLK_RATE_MGR }, ++ { PORTAL_CMD_GET_CLK_RATE_TRACE, GET_CLK_RATE_TRACE }, ++ { PORTAL_CMD_GET_TRACE_SNAPSHOT, GET_TRACE_SNAPSHOT }, ++ { PORTAL_CMD_INC_PM_CTX_CNT, INC_PM_CTX_CNT }, ++ { PORTAL_CMD_DEC_PM_CTX_CNT, DEC_PM_CTX_CNT }, ++ { PORTAL_CMD_CLOSE_PORTAL, CLOSE_PORTAL }, ++ { PORTAL_CMD_INVOKE_NOTIFY_42KHZ, INVOKE_NOTIFY_42KHZ }, ++}; ++ ++/* Global pointer for the kutf_portal_trace_write() to use. When ++ * this pointer is engaged, new requests for create fixture will fail ++ * hence limiting the use of the portal at any time to a singleton. + */ -+#define KBASE_TLSTREAM_TL_NRET_ATOM_AS( \ -+ kbdev, \ -+ atom, \ -+ address_space \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_nret_atom_as( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom, \ -+ address_space \ -+ ); \ -+ } while (0) ++static struct kutf_clk_rate_trace_fixture_data *g_ptr_portal_data; ++ ++#define PORTAL_MSG_LEN (KUTF_MAX_LINE_LENGTH - MAX_REPLY_NAME_LEN) ++static char portal_msg_buf[PORTAL_MSG_LEN]; ++ ++static void kutf_portal_trace_write( ++ struct kbase_clk_rate_listener *listener, ++ u32 index, u32 new_rate) ++{ ++ struct clk_trace_snapshot *snapshot; ++ struct kutf_clk_rate_trace_fixture_data *data; ++ ++ if (listener == NULL) { ++ pr_err("%s - index: %u, new_rate: %u, listener is NULL\n", ++ __func__, index, new_rate); ++ return; ++ } ++ ++ data = container_of(listener, struct kutf_clk_rate_trace_fixture_data, ++ listener); ++ ++ lockdep_assert_held(&data->kbdev->pm.clk_rtm.lock); ++ ++ if (WARN_ON(g_ptr_portal_data == NULL)) ++ return; ++ if (WARN_ON(index >= g_ptr_portal_data->nclks)) ++ return; ++ ++ /* This callback is triggered by invoke notify command, skipping */ ++ if (data->invoke_notify) ++ return; ++ ++ snapshot = &g_ptr_portal_data->snapshot[index]; ++ if (new_rate > snapshot->current_rate) ++ snapshot->rate_up_cnt++; ++ else ++ snapshot->rate_down_cnt++; ++ snapshot->current_rate = new_rate; ++ g_ptr_portal_data->total_update_cnt++; ++} ++ ++static void kutf_set_pm_ctx_active(struct kutf_context *context) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ ++ if (WARN_ON(data->pm_ctx_cnt != 1)) ++ return; ++ ++ kbase_pm_context_active(data->kbdev); ++ kbase_pm_wait_for_desired_state(data->kbdev); ++#if !MALI_USE_CSF ++ kbase_pm_request_gpu_cycle_counter(data->kbdev); ++#endif ++} ++ ++static void kutf_set_pm_ctx_idle(struct kutf_context *context) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ ++ if (WARN_ON(data->pm_ctx_cnt > 0)) ++ return; ++#if !MALI_USE_CSF ++ kbase_pm_release_gpu_cycle_counter(data->kbdev); ++#endif ++ kbase_pm_context_idle(data->kbdev); ++} ++ ++static const char *kutf_clk_trace_do_change_pm_ctx(struct kutf_context *context, ++ struct clk_trace_portal_input *cmd) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ int seq = cmd->cmd_input.u.val_u64 & 0xFF; ++ const unsigned int cnt = data->pm_ctx_cnt; ++ const enum kbasep_clk_rate_trace_req req = cmd->portal_cmd; ++ char const *errmsg = NULL; ++ ++ WARN_ON(req != PORTAL_CMD_INC_PM_CTX_CNT && ++ req != PORTAL_CMD_DEC_PM_CTX_CNT); ++ ++ if (req == PORTAL_CMD_INC_PM_CTX_CNT && cnt < UINT_MAX) { ++ data->pm_ctx_cnt++; ++ if (data->pm_ctx_cnt == 1) ++ kutf_set_pm_ctx_active(context); ++ } ++ ++ if (req == PORTAL_CMD_DEC_PM_CTX_CNT && cnt > 0) { ++ data->pm_ctx_cnt--; ++ if (data->pm_ctx_cnt == 0) ++ kutf_set_pm_ctx_idle(context); ++ } ++ ++ /* Skip the length check, no chance of overflow for two ints */ ++ snprintf(portal_msg_buf, PORTAL_MSG_LEN, ++ "{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt); ++ ++ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { ++ pr_warn("Error in sending ack for adjusting pm_ctx_cnt\n"); ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Error in sending ack for adjusting pm_ctx_cnt"); ++ } ++ ++ return errmsg; ++} ++ ++static const char *kutf_clk_trace_do_get_rate(struct kutf_context *context, ++ struct clk_trace_portal_input *cmd) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ struct kbase_device *kbdev = data->kbdev; ++ int seq = cmd->cmd_input.u.val_u64 & 0xFF; ++ unsigned long rate; ++ bool idle; ++ int ret; ++ int i; ++ char const *errmsg = NULL; ++ ++ WARN_ON((cmd->portal_cmd != PORTAL_CMD_GET_CLK_RATE_MGR) && ++ (cmd->portal_cmd != PORTAL_CMD_GET_CLK_RATE_TRACE)); ++ ++ ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, ++ "{SEQ:%d, RATE:[", seq); ++ ++ for (i = 0; i < data->nclks; i++) { ++ spin_lock(&kbdev->pm.clk_rtm.lock); ++ if (cmd->portal_cmd == PORTAL_CMD_GET_CLK_RATE_MGR) ++ rate = kbdev->pm.clk_rtm.clks[i]->clock_val; ++ else ++ rate = data->snapshot[i].current_rate; ++ idle = kbdev->pm.clk_rtm.gpu_idle; ++ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ ++ if ((i + 1) == data->nclks) ++ ret += snprintf(portal_msg_buf + ret, ++ PORTAL_MSG_LEN - ret, "0x%lx], GPU_IDLE:%d}", ++ rate, idle); ++ else ++ ret += snprintf(portal_msg_buf + ret, ++ PORTAL_MSG_LEN - ret, "0x%lx, ", rate); ++ ++ if (ret >= PORTAL_MSG_LEN) { ++ pr_warn("Message buf overflow with rate array data\n"); ++ return kutf_dsprintf(&context->fixture_pool, ++ "Message buf overflow with rate array data"); ++ } ++ } ++ ++ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { ++ pr_warn("Error in sending back rate array\n"); ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Error in sending rate array"); ++ } ++ ++ return errmsg; ++} + +/** -+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG - atom job slot attributes ++ * kutf_clk_trace_do_get_snapshot() - Send back the current snapshot ++ * @context: KUTF context ++ * @cmd: The decoded portal input request + * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier -+ * @descriptor: Job descriptor address -+ * @affinity: Job affinity -+ * @config: Job config ++ * The accumulated clock rate trace information is kept inside as an snapshot ++ * record. A user request of getting the snapshot marks the closure of the ++ * current snapshot record, and the start of the next one. The response ++ * message contains the current snapshot record, with each clock's ++ * data sequentially placed inside (array marker) [ ]. ++ * ++ * Return: generated string + */ -+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG( \ -+ kbdev, \ -+ atom, \ -+ descriptor, \ -+ affinity, \ -+ config \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_attrib_atom_config( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom, \ -+ descriptor, \ -+ affinity, \ -+ config \ -+ ); \ -+ } while (0) ++static const char *kutf_clk_trace_do_get_snapshot(struct kutf_context *context, ++ struct clk_trace_portal_input *cmd) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ struct clk_trace_snapshot snapshot; ++ int seq = cmd->cmd_input.u.val_u64 & 0xFF; ++ int ret; ++ int i; ++ char const *fmt; ++ char const *errmsg = NULL; ++ ++ WARN_ON(cmd->portal_cmd != PORTAL_CMD_GET_TRACE_SNAPSHOT); ++ ++ ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, ++ "{SEQ:%d, SNAPSHOT_ARRAY:[", seq); ++ ++ for (i = 0; i < data->nclks; i++) { ++ spin_lock(&data->kbdev->pm.clk_rtm.lock); ++ /* copy out the snapshot of the clock */ ++ snapshot = data->snapshot[i]; ++ /* Set the next snapshot start condition */ ++ data->snapshot[i].previous_rate = snapshot.current_rate; ++ data->snapshot[i].rate_up_cnt = 0; ++ data->snapshot[i].rate_down_cnt = 0; ++ spin_unlock(&data->kbdev->pm.clk_rtm.lock); ++ ++ /* Check i corresponding to the last clock */ ++ if ((i + 1) == data->nclks) ++ fmt = "(0x%lx, 0x%lx, %u, %u)]}"; ++ else ++ fmt = "(0x%lx, 0x%lx, %u, %u), "; ++ ret += snprintf(portal_msg_buf + ret, PORTAL_MSG_LEN - ret, ++ fmt, snapshot.previous_rate, snapshot.current_rate, ++ snapshot.rate_up_cnt, snapshot.rate_down_cnt); ++ if (ret >= PORTAL_MSG_LEN) { ++ pr_warn("Message buf overflow with snapshot data\n"); ++ return kutf_dsprintf(&context->fixture_pool, ++ "Message buf overflow with snapshot data"); ++ } ++ } ++ ++ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { ++ pr_warn("Error in sending back snapshot array\n"); ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Error in sending snapshot array"); ++ } ++ ++ return errmsg; ++} + +/** -+ * KBASE_TLSTREAM_TL_JIT_USEDPAGES - used pages for jit ++ * kutf_clk_trace_do_invoke_notify_42k() - Invokes the stored notification callback ++ * @context: KUTF context ++ * @cmd: The decoded portal input request + * -+ * @kbdev: Kbase device -+ * @used_pages: Number of pages used for jit -+ * @j_id: Unique ID provided by the caller, this is used to pair allocation and free requests. ++ * Invokes frequency change notification callbacks with a fake ++ * GPU frequency 42 kHz for the top clock domain. ++ * ++ * Return: generated string + */ -+#define KBASE_TLSTREAM_TL_JIT_USEDPAGES( \ -+ kbdev, \ -+ used_pages, \ -+ j_id \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_jit_usedpages( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ used_pages, \ -+ j_id \ -+ ); \ -+ } while (0) ++static const char *kutf_clk_trace_do_invoke_notify_42k( ++ struct kutf_context *context, ++ struct clk_trace_portal_input *cmd) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ int seq = cmd->cmd_input.u.val_u64 & 0xFF; ++ const unsigned long new_rate_hz = 42000; ++ int ret; ++ char const *errmsg = NULL; ++ struct kbase_clk_rate_trace_manager *clk_rtm = &data->kbdev->pm.clk_rtm; ++ ++ WARN_ON(cmd->portal_cmd != PORTAL_CMD_INVOKE_NOTIFY_42KHZ); ++ ++ spin_lock(&clk_rtm->lock); ++ ++ data->invoke_notify = true; ++ kbase_clk_rate_trace_manager_notify_all( ++ clk_rtm, 0, new_rate_hz); ++ data->invoke_notify = false; ++ ++ spin_unlock(&clk_rtm->lock); ++ ++ ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, ++ "{SEQ:%d, HZ:%lu}", seq, new_rate_hz); ++ ++ if (ret >= PORTAL_MSG_LEN) { ++ pr_warn("Message buf overflow with invoked data\n"); ++ return kutf_dsprintf(&context->fixture_pool, ++ "Message buf overflow with invoked data"); ++ } ++ ++ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { ++ pr_warn("Error in sending ack for " INVOKE_NOTIFY_42KHZ "request\n"); ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Error in sending ack for " INVOKE_NOTIFY_42KHZ "request"); ++ } ++ ++ return errmsg; ++} ++ ++static const char *kutf_clk_trace_do_close_portal(struct kutf_context *context, ++ struct clk_trace_portal_input *cmd) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ int seq = cmd->cmd_input.u.val_u64 & 0xFF; ++ char const *errmsg = NULL; ++ ++ WARN_ON(cmd->portal_cmd != PORTAL_CMD_CLOSE_PORTAL); ++ ++ data->server_state = PORTAL_STATE_CLOSING; ++ ++ /* Skip the length check, no chance of overflow for two ints */ ++ snprintf(portal_msg_buf, PORTAL_MSG_LEN, ++ "{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt); ++ ++ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { ++ pr_warn("Error in sending ack for " CLOSE_PORTAL "reuquest\n"); ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Error in sending ack for " CLOSE_PORTAL "reuquest"); ++ } ++ ++ return errmsg; ++} + +/** -+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO - Information about JIT allocations ++ * kutf_clk_trace_do_get_platform() - Gets platform information ++ * @context: KUTF context ++ * @cmd: The decoded portal input request + * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier -+ * @va_pgs: The minimum number of virtual pages required -+ * @com_pgs: The minimum number of physical pages which should back the allocation. -+ * @extent: Granularity of physical pages to grow the allocation by during a fault. -+ * @j_id: Unique ID provided by the caller, this is used to pair allocation and free requests. -+ * @bin_id: The JIT allocation bin, used in conjunction with max_allocations to limit the number of each type of JIT allocation. -+ * @max_allocs: Maximum allocations allowed in this bin. -+ * @jit_flags: Flags specifying the special requirements for the JIT allocation. -+ * @usg_id: A hint about which allocation should be reused. ++ * Checks the gpu node in the device tree to see if arbitration is enabled ++ * If so determines device tree whether platform is PV or PTM ++ * ++ * Return: A string to indicate the platform (PV/PTM/GPU/UNKNOWN) + */ -+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO( \ -+ kbdev, \ -+ atom, \ -+ va_pgs, \ -+ com_pgs, \ -+ extent, \ -+ j_id, \ -+ bin_id, \ -+ max_allocs, \ -+ jit_flags, \ -+ usg_id \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_attrib_atom_jitallocinfo( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom, \ -+ va_pgs, \ -+ com_pgs, \ -+ extent, \ -+ j_id, \ -+ bin_id, \ -+ max_allocs, \ -+ jit_flags, \ -+ usg_id \ -+ ); \ -+ } while (0) ++static const char *kutf_clk_trace_do_get_platform( ++ struct kutf_context *context, ++ struct clk_trace_portal_input *cmd) ++{ ++ int seq = cmd->cmd_input.u.val_u64 & 0xFF; ++ char const *errmsg = NULL; ++ const void *arbiter_if_node = NULL; ++ const void *power_node = NULL; ++ const char *platform = "GPU"; ++#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ ++ arbiter_if_node = ++ of_get_property(data->kbdev->dev->of_node, "arbiter_if", NULL); ++#endif ++ if (arbiter_if_node) { ++ power_node = of_find_compatible_node(NULL, NULL, ++ "arm,mali-gpu-power"); ++ if (power_node) { ++ platform = "PV"; ++ } else { ++ power_node = of_find_compatible_node(NULL, NULL, ++ "arm,mali-ptm"); ++ if (power_node) ++ platform = "PTM"; ++ else ++ platform = "UNKNOWN"; ++ } ++ } else { ++ platform = "GPU"; ++ } ++ ++ pr_debug("%s - platform is %s\n", __func__, platform); ++ snprintf(portal_msg_buf, PORTAL_MSG_LEN, ++ "{SEQ:%d, PLATFORM:%s}", seq, platform); ++ ++ WARN_ON(cmd->portal_cmd != PORTAL_CMD_GET_PLATFORM); ++ ++ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { ++ pr_warn("Error in sending ack for " CLOSE_PORTAL "reuquest\n"); ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Error in sending ack for " GET_PLATFORM "request"); ++ } ++ ++ return errmsg; ++} ++ ++static bool kutf_clk_trace_dequeue_portal_cmd(struct kutf_context *context, ++ struct clk_trace_portal_input *cmd) ++{ ++ int i; ++ int err = kutf_helper_receive_named_val(context, &cmd->cmd_input); ++ ++ cmd->named_val_err = err; ++ if (err == KUTF_HELPER_ERR_NONE && ++ cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) { ++ /* All portal request commands are of format (named u64): ++ * CMD_NAME=1234 ++ * where, 1234 is a (variable) sequence number tag. ++ */ ++ for (i = 0; i < PORTAL_TOTAL_CMDS; i++) { ++ if (strcmp(cmd->cmd_input.val_name, ++ kbasep_portal_cmd_name_map[i].name)) ++ continue; ++ ++ cmd->portal_cmd = kbasep_portal_cmd_name_map[i].cmd; ++ return true; ++ } ++ } ++ ++ cmd->portal_cmd = PORTAL_CMD_INVALID; ++ return false; ++} ++ ++static void kutf_clk_trace_flag_result(struct kutf_context *context, ++ enum kutf_result_status result, char const *msg) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ ++ if (result > data->test_status) { ++ data->test_status = result; ++ if (msg) ++ data->result_msg = msg; ++ if (data->server_state == PORTAL_STATE_LIVE && ++ result > KUTF_RESULT_WARN) { ++ data->server_state = PORTAL_STATE_CLOSING; ++ } ++ } ++} ++ ++static bool kutf_clk_trace_process_portal_cmd(struct kutf_context *context, ++ struct clk_trace_portal_input *cmd) ++{ ++ char const *errmsg = NULL; ++ ++ BUILD_BUG_ON(ARRAY_SIZE(kbasep_portal_cmd_name_map) != ++ PORTAL_TOTAL_CMDS); ++ WARN_ON(cmd->portal_cmd == PORTAL_CMD_INVALID); ++ ++ switch (cmd->portal_cmd) { ++ case PORTAL_CMD_GET_PLATFORM: ++ errmsg = kutf_clk_trace_do_get_platform(context, cmd); ++ break; ++ case PORTAL_CMD_GET_CLK_RATE_MGR: ++ fallthrough; ++ case PORTAL_CMD_GET_CLK_RATE_TRACE: ++ errmsg = kutf_clk_trace_do_get_rate(context, cmd); ++ break; ++ case PORTAL_CMD_GET_TRACE_SNAPSHOT: ++ errmsg = kutf_clk_trace_do_get_snapshot(context, cmd); ++ break; ++ case PORTAL_CMD_INC_PM_CTX_CNT: ++ fallthrough; ++ case PORTAL_CMD_DEC_PM_CTX_CNT: ++ errmsg = kutf_clk_trace_do_change_pm_ctx(context, cmd); ++ break; ++ case PORTAL_CMD_CLOSE_PORTAL: ++ errmsg = kutf_clk_trace_do_close_portal(context, cmd); ++ break; ++ case PORTAL_CMD_INVOKE_NOTIFY_42KHZ: ++ errmsg = kutf_clk_trace_do_invoke_notify_42k(context, cmd); ++ break; ++ default: ++ pr_warn("Don't know how to handle portal_cmd: %d, abort session.\n", ++ cmd->portal_cmd); ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Don't know how to handle portal_cmd: %d", ++ cmd->portal_cmd); ++ break; ++ } ++ ++ if (errmsg) ++ kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, errmsg); ++ ++ return (errmsg == NULL); ++} + +/** -+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO - Information about JIT frees ++ * kutf_clk_trace_do_nack_response() - respond a NACK to erroneous input ++ * @context: KUTF context ++ * @cmd: The erroneous input request + * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier -+ * @j_id: Unique ID provided by the caller, this is used to pair allocation and free requests. ++ * This function deal with an erroneous input request, and respond with ++ * a proper 'NACK' message. ++ * ++ * Return: 0 on success, non-zero on failure + */ -+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO( \ -+ kbdev, \ -+ atom, \ -+ j_id \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_attrib_atom_jitfreeinfo( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom, \ -+ j_id \ -+ ); \ -+ } while (0) ++static int kutf_clk_trace_do_nack_response(struct kutf_context *context, ++ struct clk_trace_portal_input *cmd) ++{ ++ int seq; ++ int err; ++ char const *errmsg = NULL; ++ ++ WARN_ON(cmd->portal_cmd != PORTAL_CMD_INVALID); ++ ++ if (cmd->named_val_err == KUTF_HELPER_ERR_NONE && ++ cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) { ++ /* Keep seq number as % 256 */ ++ seq = cmd->cmd_input.u.val_u64 & 255; ++ snprintf(portal_msg_buf, PORTAL_MSG_LEN, ++ "{SEQ:%d, MSG: Unknown command '%s'.}", seq, ++ cmd->cmd_input.val_name); ++ err = kutf_helper_send_named_str(context, "NACK", ++ portal_msg_buf); ++ } else ++ err = kutf_helper_send_named_str(context, "NACK", ++ "Wrong portal cmd format (Ref example: CMD_NAME=0X16)"); ++ ++ if (err) { ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Failed to send portal NACK response"); ++ kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, errmsg); ++ } ++ ++ return err; ++} + +/** -+ * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG - address space attributes ++ * kutf_clk_trace_barebone_check() - Sanity test on the clock tracing ++ * @context: KUTF context + * -+ * @kbdev: Kbase device -+ * @address_space: Name of the address space object -+ * @transtab: Configuration of the TRANSTAB register -+ * @memattr: Configuration of the MEMATTR register -+ * @transcfg: Configuration of the TRANSCFG register (or zero if not present) ++ * This function carries out some basic test on the tracing operation: ++ * 1). GPU idle on test start, trace rate should be 0 (low power state) ++ * 2). Make sure GPU is powered up, the trace rate should match ++ * that from the clcok manager's internal recorded rate ++ * 3). If the GPU active transition occurs following 2), there ++ * must be rate change event from tracing. + */ -+#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG( \ -+ kbdev, \ -+ address_space, \ -+ transtab, \ -+ memattr, \ -+ transcfg \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_attrib_as_config( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ address_space, \ -+ transtab, \ -+ memattr, \ -+ transcfg \ -+ ); \ -+ } while (0) ++static void kutf_clk_trace_barebone_check(struct kutf_context *context) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ struct kbase_device *kbdev = data->kbdev; ++ bool fail = false; ++ bool idle[2] = { false }; ++ char const *msg = NULL; ++ int i; ++ ++ /* Check consistency if gpu happens to be idle */ ++ spin_lock(&kbdev->pm.clk_rtm.lock); ++ idle[0] = kbdev->pm.clk_rtm.gpu_idle; ++ if (kbdev->pm.clk_rtm.gpu_idle) { ++ for (i = 0; i < data->nclks; i++) { ++ if (data->snapshot[i].current_rate) { ++ /* Idle should have a rate 0 */ ++ fail = true; ++ break; ++ } ++ } ++ } ++ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ if (fail) { ++ msg = kutf_dsprintf(&context->fixture_pool, ++ "GPU Idle not yielding 0-rate"); ++ pr_err("Trace did not see idle rate\n"); ++ } else { ++ /* Make local PM active if not done so yet */ ++ if (data->pm_ctx_cnt == 0) { ++ /* Ensure the GPU is powered */ ++ data->pm_ctx_cnt++; ++ kutf_set_pm_ctx_active(context); ++ } ++ /* Checking the rate is consistent */ ++ spin_lock(&kbdev->pm.clk_rtm.lock); ++ idle[1] = kbdev->pm.clk_rtm.gpu_idle; ++ for (i = 0; i < data->nclks; i++) { ++ /* Rate match between the manager and the trace */ ++ if (kbdev->pm.clk_rtm.clks[i]->clock_val != ++ data->snapshot[i].current_rate) { ++ fail = true; ++ break; ++ } ++ } ++ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ ++ if (idle[1]) { ++ msg = kutf_dsprintf(&context->fixture_pool, ++ "GPU still idle after set_pm_ctx_active"); ++ pr_err("GPU still idle after set_pm_ctx_active\n"); ++ } ++ ++ if (!msg && fail) { ++ msg = kutf_dsprintf(&context->fixture_pool, ++ "Trace rate not matching Clk manager's read"); ++ pr_err("Trace rate not matching Clk manager's read\n"); ++ } ++ } ++ ++ if (!msg && idle[0] && !idle[1] && !data->total_update_cnt) { ++ msg = kutf_dsprintf(&context->fixture_pool, ++ "Trace update did not occur"); ++ pr_err("Trace update did not occur\n"); ++ } ++ if (msg) ++ kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, msg); ++ else if (!data->total_update_cnt) { ++ msg = kutf_dsprintf(&context->fixture_pool, ++ "No trace update seen during the test!"); ++ kutf_clk_trace_flag_result(context, KUTF_RESULT_WARN, msg); ++ } ++} ++ ++static bool kutf_clk_trace_end_of_stream(struct clk_trace_portal_input *cmd) ++{ ++ return (cmd->named_val_err == -EBUSY); ++} ++ ++static void kutf_clk_trace_no_clks_dummy(struct kutf_context *context) ++{ ++ struct clk_trace_portal_input cmd; ++ unsigned long timeout = jiffies + HZ * 2; ++ bool has_cmd; ++ ++ while (time_before(jiffies, timeout)) { ++ if (kutf_helper_pending_input(context)) { ++ has_cmd = kutf_clk_trace_dequeue_portal_cmd(context, ++ &cmd); ++ if (!has_cmd && kutf_clk_trace_end_of_stream(&cmd)) ++ break; ++ ++ kutf_helper_send_named_str(context, "NACK", ++ "Fatal! No clocks visible, aborting"); ++ } ++ msleep(20); ++ } ++ ++ kutf_clk_trace_flag_result(context, KUTF_RESULT_FATAL, ++ "No clocks visble to the portal"); ++} + +/** -+ * KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP - softstop event on given lpu ++ * mali_kutf_clk_rate_trace_test_portal() - Service portal input ++ * @context: KUTF context + * -+ * @kbdev: Kbase device -+ * @lpu: Name of the Logical Processing Unit object ++ * The test portal operates on input requests. If the input request is one ++ * of the recognized portal commands, it handles it accordingly. Otherwise ++ * a negative response 'NACK' is returned. The portal service terminates ++ * when a 'CLOSE_PORTAL' request is received, or due to an internal error. ++ * Both case would result in the server_state transitioned to CLOSING. ++ * ++ * If the portal is closed on request, a sanity test on the clock rate ++ * trace operation is undertaken via function: ++ * kutf_clk_trace_barebone_check(); + */ -+#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( \ -+ kbdev, \ -+ lpu \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_event_lpu_softstop( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ lpu \ -+ ); \ -+ } while (0) ++static void mali_kutf_clk_rate_trace_test_portal(struct kutf_context *context) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ struct clk_trace_portal_input new_cmd; ++ ++ pr_debug("Test portal service start\n"); ++ ++ while (data->server_state == PORTAL_STATE_LIVE) { ++ if (kutf_clk_trace_dequeue_portal_cmd(context, &new_cmd)) ++ kutf_clk_trace_process_portal_cmd(context, &new_cmd); ++ else if (kutf_clk_trace_end_of_stream(&new_cmd)) ++ /* Dequeue on portal input, end of stream */ ++ data->server_state = PORTAL_STATE_CLOSING; ++ else ++ kutf_clk_trace_do_nack_response(context, &new_cmd); ++ } ++ ++ /* Closing, exhausting all the pending inputs with NACKs. */ ++ if (data->server_state == PORTAL_STATE_CLOSING) { ++ while (kutf_helper_pending_input(context) && ++ (kutf_clk_trace_dequeue_portal_cmd(context, &new_cmd) || ++ !kutf_clk_trace_end_of_stream(&new_cmd))) { ++ kutf_helper_send_named_str(context, "NACK", ++ "Portal closing down"); ++ } ++ } ++ ++ /* If no portal error, do a barebone test here irrespective ++ * whatever the portal live session has been testing, which ++ * is entirely driven by the user-side via portal requests. ++ */ ++ if (data->test_status <= KUTF_RESULT_WARN) { ++ if (data->server_state != PORTAL_STATE_NO_CLK) ++ kutf_clk_trace_barebone_check(context); ++ else { ++ /* No clocks case, NACK 2-sec for the fatal situation */ ++ kutf_clk_trace_no_clks_dummy(context); ++ } ++ } ++ ++ /* If we have changed pm_ctx count, drop it back */ ++ if (data->pm_ctx_cnt) { ++ /* Although we count on portal requests, it only has material ++ * impact when from 0 -> 1. So the reverse is a simple one off. ++ */ ++ data->pm_ctx_cnt = 0; ++ kutf_set_pm_ctx_idle(context); ++ } ++ ++ /* Finally log the test result line */ ++ if (data->test_status < KUTF_RESULT_WARN) ++ kutf_test_pass(context, data->result_msg); ++ else if (data->test_status == KUTF_RESULT_WARN) ++ kutf_test_warn(context, data->result_msg); ++ else if (data->test_status == KUTF_RESULT_FATAL) ++ kutf_test_fatal(context, data->result_msg); ++ else ++ kutf_test_fail(context, data->result_msg); ++ ++ pr_debug("Test end\n"); ++} + +/** -+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX - atom softstopped ++ * mali_kutf_clk_rate_trace_create_fixture() - Creates the fixture data ++ * required for mali_kutf_clk_rate_trace_test_portal. ++ * @context: KUTF context. + * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier ++ * Return: Fixture data created on success or NULL on failure + */ -+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX( \ -+ kbdev, \ -+ atom \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_event_atom_softstop_ex( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom \ -+ ); \ -+ } while (0) ++static void *mali_kutf_clk_rate_trace_create_fixture( ++ struct kutf_context *context) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data; ++ struct kbase_device *kbdev; ++ unsigned long rate; ++ int i; ++ ++ /* Acquire the kbase device */ ++ pr_debug("Finding device\n"); ++ kbdev = kbase_find_device(MINOR_FOR_FIRST_KBASE_DEV); ++ if (kbdev == NULL) { ++ kutf_test_fail(context, "Failed to find kbase device"); ++ return NULL; ++ } ++ ++ pr_debug("Creating fixture\n"); ++ data = kutf_mempool_alloc(&context->fixture_pool, ++ sizeof(struct kutf_clk_rate_trace_fixture_data)); ++ if (!data) ++ return NULL; ++ ++ memset(data, 0, sizeof(*data)); ++ pr_debug("Hooking up the test portal to kbdev clk rate trace\n"); ++ spin_lock(&kbdev->pm.clk_rtm.lock); ++ ++ if (g_ptr_portal_data != NULL) { ++ pr_warn("Test portal is already in use, run aborted\n"); ++ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ kutf_test_fail(context, "Portal allows single session only"); ++ return NULL; ++ } ++ ++ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { ++ if (kbdev->pm.clk_rtm.clks[i]) { ++ data->nclks++; ++ if (kbdev->pm.clk_rtm.gpu_idle) ++ rate = 0; ++ else ++ rate = kbdev->pm.clk_rtm.clks[i]->clock_val; ++ data->snapshot[i].previous_rate = rate; ++ data->snapshot[i].current_rate = rate; ++ } ++ } ++ ++ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ ++ if (data->nclks) { ++ /* Subscribe this test server portal */ ++ data->listener.notify = kutf_portal_trace_write; ++ data->invoke_notify = false; ++ ++ kbase_clk_rate_trace_manager_subscribe( ++ &kbdev->pm.clk_rtm, &data->listener); ++ /* Update the kutf_server_portal fixture_data pointer */ ++ g_ptr_portal_data = data; ++ } ++ ++ data->kbdev = kbdev; ++ data->result_msg = NULL; ++ data->test_status = KUTF_RESULT_PASS; ++ ++ if (data->nclks == 0) { ++ data->server_state = PORTAL_STATE_NO_CLK; ++ pr_debug("Kbdev has no clocks for rate trace"); ++ } else ++ data->server_state = PORTAL_STATE_LIVE; ++ ++ pr_debug("Created fixture\n"); ++ ++ return data; ++} + +/** -+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE - atom softstop issued ++ * mali_kutf_clk_rate_trace_remove_fixture - Destroy fixture data previously created by ++ * mali_kutf_clk_rate_trace_create_fixture. + * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier ++ * @context: KUTF context. + */ -+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE( \ -+ kbdev, \ -+ atom \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_event_atom_softstop_issue( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom \ -+ ); \ -+ } while (0) ++static void mali_kutf_clk_rate_trace_remove_fixture( ++ struct kutf_context *context) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ struct kbase_device *kbdev = data->kbdev; ++ ++ if (data->nclks) { ++ /* Clean up the portal trace write arrangement */ ++ g_ptr_portal_data = NULL; ++ ++ kbase_clk_rate_trace_manager_unsubscribe( ++ &kbdev->pm.clk_rtm, &data->listener); ++ } ++ pr_debug("Destroying fixture\n"); ++ kbase_release_device(kbdev); ++ pr_debug("Destroyed fixture\n"); ++} + +/** -+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START - atom soft job has started ++ * mali_kutf_clk_rate_trace_test_module_init() - Entry point for test mdoule. + * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier ++ * Return: 0 on success, error code otherwise + */ -+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START( \ -+ kbdev, \ -+ atom \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_event_atom_softjob_start( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom \ -+ ); \ -+ } while (0) ++static int __init mali_kutf_clk_rate_trace_test_module_init(void) ++{ ++ struct kutf_suite *suite; ++ unsigned int filters; ++ union kutf_callback_data suite_data = { NULL }; ++ ++ pr_debug("Creating app\n"); ++ ++ g_ptr_portal_data = NULL; ++ kutf_app = kutf_create_application(CLK_RATE_TRACE_APP_NAME); ++ ++ if (!kutf_app) { ++ pr_warn("Creation of app " CLK_RATE_TRACE_APP_NAME ++ " failed!\n"); ++ return -ENOMEM; ++ } ++ ++ pr_debug("Create suite %s\n", CLK_RATE_TRACE_SUITE_NAME); ++ suite = kutf_create_suite_with_filters_and_data( ++ kutf_app, CLK_RATE_TRACE_SUITE_NAME, 1, ++ mali_kutf_clk_rate_trace_create_fixture, ++ mali_kutf_clk_rate_trace_remove_fixture, ++ KUTF_F_TEST_GENERIC, ++ suite_data); ++ ++ if (!suite) { ++ pr_warn("Creation of suite %s failed!\n", ++ CLK_RATE_TRACE_SUITE_NAME); ++ kutf_destroy_application(kutf_app); ++ return -ENOMEM; ++ } ++ ++ filters = suite->suite_default_flags; ++ kutf_add_test_with_filters( ++ suite, 0x0, CLK_RATE_TRACE_PORTAL, ++ mali_kutf_clk_rate_trace_test_portal, ++ filters); ++ ++ pr_debug("Init complete\n"); ++ return 0; ++} + +/** -+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END - atom soft job has completed -+ * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier ++ * mali_kutf_clk_rate_trace_test_module_exit() - Module exit point for this ++ * test. + */ -+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END( \ -+ kbdev, \ -+ atom \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_event_atom_softjob_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom \ -+ ); \ -+ } while (0) ++static void __exit mali_kutf_clk_rate_trace_test_module_exit(void) ++{ ++ pr_debug("Exit start\n"); ++ kutf_destroy_application(kutf_app); ++ pr_debug("Exit complete\n"); ++} + -+/** -+ * KBASE_TLSTREAM_TL_ARBITER_GRANTED - Arbiter has granted gpu access ++ ++module_init(mali_kutf_clk_rate_trace_test_module_init); ++module_exit(mali_kutf_clk_rate_trace_test_module_exit); ++ ++MODULE_LICENSE("GPL"); +diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h +new file mode 100644 +index 000000000..a716b9f70 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h +@@ -0,0 +1,154 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * @kbdev: Kbase device -+ * @gpu: Name of the GPU object + */ -+#define KBASE_TLSTREAM_TL_ARBITER_GRANTED( \ -+ kbdev, \ -+ gpu \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_arbiter_granted( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ gpu \ -+ ); \ -+ } while (0) ++ ++#ifndef _KUTF_CLK_RATE_TRACE_TEST_H_ ++#define _KUTF_CLK_RATE_TRACE_TEST_H_ ++ ++#define CLK_RATE_TRACE_APP_NAME "clk_rate_trace" ++#define CLK_RATE_TRACE_SUITE_NAME "rate_trace" ++#define CLK_RATE_TRACE_PORTAL "portal" + +/** -+ * KBASE_TLSTREAM_TL_ARBITER_STARTED - Driver is running again and able to process jobs ++ * enum kbasep_clk_rate_trace_req - request command to the clock rate trace ++ * service portal. + * -+ * @kbdev: Kbase device -+ * @gpu: Name of the GPU object ++ * @PORTAL_CMD_GET_PLATFORM: Request the platform that the tests are ++ * to be run on. ++ * @PORTAL_CMD_GET_CLK_RATE_MGR: Request the clock trace manager internal ++ * data record. On a positive acknowledgement ++ * the prevailing clock rates and the GPU idle ++ * condition flag are returned. ++ * @PORTAL_CMD_GET_CLK_RATE_TRACE: Request the clock trace portal to return its ++ * data record. On a positive acknowledgement ++ * the last trace recorded clock rates and the ++ * GPU idle condition flag are returned. ++ * @PORTAL_CMD_GET_TRACE_SNAPSHOT: Request the clock trace portal to return its ++ * current snapshot data record. On a positive ++ * acknowledgement the snapshot array matching ++ * the number of clocks are returned. It also ++ * starts a fresh snapshot inside the clock ++ * trace portal. ++ * @PORTAL_CMD_INC_PM_CTX_CNT: Request the clock trace portal to increase ++ * its internal PM_CTX_COUNT. If this increase ++ * yielded a count of 0 -> 1 change, the portal ++ * will initiate a PM_CTX_ACTIVE call to the ++ * Kbase power management. Futher increase ++ * requests will limit to only affect the ++ * portal internal count value. ++ * @PORTAL_CMD_DEC_PM_CTX_CNT: Request the clock trace portal to decrease ++ * its internal PM_CTX_COUNT. If this decrease ++ * yielded a count of 1 -> 0 change, the portal ++ * will initiate a PM_CTX_IDLE call to the ++ * Kbase power management. ++ * @PORTAL_CMD_CLOSE_PORTAL: Inform the clock trace portal service the ++ * client has completed its session. The portal ++ * will start the close down action. If no ++ * error has occurred during the dynamic ++ * interactive session, an inherent basic test ++ * carrying out some sanity check on the clock ++ * trace is undertaken. ++ * @PORTAL_CMD_INVOKE_NOTIFY_42KHZ: Invokes all clock rate trace manager callbacks ++ * for the top clock domain with a new GPU frequency ++ * set to 42 kHZ. ++ * @PORTAL_CMD_INVALID: Valid commands termination marker. Must be ++ * the highest enumeration value, as it ++ * represents valid command array size. ++ * @PORTAL_TOTAL_CMDS: Alias of PORTAL_CMD_INVALID. + */ -+#define KBASE_TLSTREAM_TL_ARBITER_STARTED( \ -+ kbdev, \ -+ gpu \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_arbiter_started( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ gpu \ -+ ); \ -+ } while (0) ++/* PORTAL_CMD_INVALID must be the last one, serving the size */ ++enum kbasep_clk_rate_trace_req { ++ PORTAL_CMD_GET_PLATFORM, ++ PORTAL_CMD_GET_CLK_RATE_MGR, ++ PORTAL_CMD_GET_CLK_RATE_TRACE, ++ PORTAL_CMD_GET_TRACE_SNAPSHOT, ++ PORTAL_CMD_INC_PM_CTX_CNT, ++ PORTAL_CMD_DEC_PM_CTX_CNT, ++ PORTAL_CMD_CLOSE_PORTAL, ++ PORTAL_CMD_INVOKE_NOTIFY_42KHZ, ++ PORTAL_CMD_INVALID, ++ PORTAL_TOTAL_CMDS = PORTAL_CMD_INVALID, ++}; + +/** -+ * KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED - Arbiter has requested driver to stop using gpu ++ * DOC: Portal service request command names. + * -+ * @kbdev: Kbase device -+ * @gpu: Name of the GPU object ++ * The portal request consists of a kutf named u64-value. ++ * For those above enumerated PORTAL_CMD, the names defined ++ * here are used to mark the name and then followed with a sequence number ++ * value. Example (manual script here for illustration): ++ * exec 5<>run # open the portal kutf run as fd-5 ++ * echo GET_CLK_RATE_MGR=1 >&5 # send the cmd and sequence number 1 ++ * head -n 1 <&5 # read back the 1-line server reseponse ++ * ACK="{SEQ:1, RATE:[0x1ad27480], GPU_IDLE:1}" # response string ++ * echo GET_TRACE_SNAPSHOT=1 >&5 # send the cmd and sequence number 1 ++ * head -n 1 <&5 # read back the 1-line server reseponse ++ * ACK="{SEQ:1, SNAPSHOT_ARRAY:[(0x0, 0x1ad27480, 1, 0)]}" ++ * echo CLOSE_PORTAL=1 >&5 # close the portal ++ * cat <&5 # read back all the response lines ++ * ACK="{SEQ:1, PM_CTX_CNT:0}" # response to close command ++ * KUTF_RESULT_PASS:(explicit pass) # internal sanity test passed. ++ * exec 5>&- # close the service portal fd. ++ * ++ * Expected request command return format: ++ * GET_CLK_RATE_MGR: ACK="{SEQ:12, RATE:[1080, 1280], GPU_IDLE:1}" ++ * Note, the above contains 2-clock with rates in [], GPU idle ++ * GET_CLK_RATE_TRACE: ACK="{SEQ:6, RATE:[0x1ad27480], GPU_IDLE:0}" ++ * Note, 1-clock with rate in [], GPU not idle ++ * GET_TRACE_SNAPSHOT: ACK="{SEQ:8, SNAPSHOT_ARRAY:[(0x0, 0x1ad27480, 1, 0)]}" ++ * Note, 1-clock, (start_rate : 0, last_rate : 0x1ad27480, ++ * trace_rate_up_count: 1, trace_rate_down_count : 0) ++ * For the specific sample case here, there is a single rate_trace event ++ * that yielded a rate increase change. No rate drop event recorded in the ++ * reporting snapshot duration. ++ * INC_PM_CTX_CNT: ACK="{SEQ:1, PM_CTX_CNT:1}" ++ * Note, after the increment, M_CTX_CNT is 1. (i.e. 0 -> 1) ++ * DEC_PM_CTX_CNT: ACK="{SEQ:3, PM_CTX_CNT:0}" ++ * Note, after the decrement, PM_CTX_CNT is 0. (i.e. 1 -> 0) ++ * CLOSE_PORTAL: ACK="{SEQ:1, PM_CTX_CNT:1}" ++ * Note, at the close, PM_CTX_CNT is 1. The PM_CTX_CNT will internally be ++ * dropped down to 0 as part of the portal close clean up. + */ -+#define KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED( \ -+ kbdev, \ -+ gpu \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_arbiter_stop_requested( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ gpu \ -+ ); \ -+ } while (0) ++#define GET_PLATFORM "GET_PLATFORM" ++#define GET_CLK_RATE_MGR "GET_CLK_RATE_MGR" ++#define GET_CLK_RATE_TRACE "GET_CLK_RATE_TRACE" ++#define GET_TRACE_SNAPSHOT "GET_TRACE_SNAPSHOT" ++#define INC_PM_CTX_CNT "INC_PM_CTX_CNT" ++#define DEC_PM_CTX_CNT "DEC_PM_CTX_CNT" ++#define CLOSE_PORTAL "CLOSE_PORTAL" ++#define INVOKE_NOTIFY_42KHZ "INVOKE_NOTIFY_42KHZ" + +/** -+ * KBASE_TLSTREAM_TL_ARBITER_STOPPED - Driver has stopped using gpu ++ * DOC: Portal service response tag names. + * -+ * @kbdev: Kbase device -+ * @gpu: Name of the GPU object ++ * The response consists of a kutf named string-value. ++ * In case of a 'NACK' (negative acknowledgment), it can be one of the two formats: ++ * 1. NACK="{SEQ:2, MSG:xyzed}" # NACK on command with sequence tag-2. ++ * Note, the portal has received a valid name and valid sequence number ++ * but can't carry-out the request, reason in the MSG field. ++ * 2. NACK="Failing-message" ++ * Note, unable to parse a valid name or valid sequence number, ++ * or some internal error condition. Reason in the quoted string. + */ -+#define KBASE_TLSTREAM_TL_ARBITER_STOPPED( \ -+ kbdev, \ -+ gpu \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_arbiter_stopped( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ gpu \ -+ ); \ -+ } while (0) ++#define ACK "ACK" ++#define NACK "NACK" ++#define MAX_REPLY_NAME_LEN 32 + -+/** -+ * KBASE_TLSTREAM_TL_ARBITER_REQUESTED - Driver has requested the arbiter for gpu access ++#endif /* _KUTF_CLK_RATE_TRACE_TEST_H_ */ +diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Kbuild b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Kbuild +new file mode 100755 +index 000000000..213d6d5ea +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Kbuild +@@ -0,0 +1,25 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# ++ ++ifeq ($(CONFIG_MALI_KUTF_IRQ_TEST),y) ++obj-m += mali_kutf_irq_test.o ++ ++mali_kutf_irq_test-y := mali_kutf_irq_test_main.o ++endif +diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/build.bp b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/build.bp +new file mode 100755 +index 000000000..155875b9d +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/build.bp +@@ -0,0 +1,42 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * @kbdev: Kbase device -+ * @gpu: Name of the GPU object + */ -+#define KBASE_TLSTREAM_TL_ARBITER_REQUESTED( \ -+ kbdev, \ -+ gpu \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_arbiter_requested( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ gpu \ -+ ); \ -+ } while (0) + -+/** -+ * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - gpu soft reset ++bob_kernel_module { ++ name: "mali_kutf_irq_test", ++ defaults: [ ++ "mali_kbase_shared_config_defaults", ++ "kernel_test_configs", ++ "kernel_test_includes", ++ ], ++ srcs: [ ++ "Kbuild", ++ "mali_kutf_irq_test_main.c", ++ ], ++ extra_symbols: [ ++ "mali_kbase", ++ "kutf", ++ ], ++ enabled: false, ++ mali_kutf_irq_test: { ++ kbuild_options: ["CONFIG_MALI_KUTF_IRQ_TEST=y"], ++ enabled: true, ++ }, ++} +diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c +new file mode 100644 +index 000000000..f2a014d9b +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c +@@ -0,0 +1,283 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2016-2018, 2020-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * @kbdev: Kbase device -+ * @gpu: Name of the GPU object + */ -+#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET( \ -+ kbdev, \ -+ gpu \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_jd_gpu_soft_reset( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ gpu \ -+ ); \ -+ } while (0) + -+/** -+ * KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC - Tiler Heap Chunk Allocation -+ * -+ * @kbdev: Kbase device -+ * @ctx_nr: Kernel context number -+ * @heap_id: Unique id used to represent a heap under a context -+ * @chunk_va: Virtual start address of tiler heap chunk ++#include ++#include ++#include ++ ++#include "mali_kbase.h" ++#include ++#include ++#include ++ ++#include ++#include ++ ++/* ++ * This file contains the code which is used for measuring interrupt latency ++ * of the Mali GPU IRQ. In particular, function mali_kutf_irq_latency() is ++ * used with this purpose and it is called within KUTF framework - a kernel ++ * unit test framework. The measured latency provided by this test should ++ * be representative for the latency of the Mali JOB/MMU IRQs as well. + */ -+#define KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC( \ -+ kbdev, \ -+ ctx_nr, \ -+ heap_id, \ -+ chunk_va \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_jd_tiler_heap_chunk_alloc( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ ctx_nr, \ -+ heap_id, \ -+ chunk_va \ -+ ); \ -+ } while (0) ++ ++/* KUTF test application pointer for this test */ ++static struct kutf_application *irq_app; + +/** -+ * KBASE_TLSTREAM_TL_JS_SCHED_START - Scheduling starts ++ * struct kutf_irq_fixture_data - test fixture used by the test functions. ++ * @kbdev: kbase device for the GPU. + * -+ * @kbdev: Kbase device -+ * @dummy: dummy argument + */ -+#define KBASE_TLSTREAM_TL_JS_SCHED_START( \ -+ kbdev, \ -+ dummy \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_js_sched_start( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ dummy \ -+ ); \ -+ } while (0) ++struct kutf_irq_fixture_data { ++ struct kbase_device *kbdev; ++}; + -+/** -+ * KBASE_TLSTREAM_TL_JS_SCHED_END - Scheduling ends -+ * -+ * @kbdev: Kbase device -+ * @dummy: dummy argument ++/* ID for the GPU IRQ */ ++#define GPU_IRQ_HANDLER 2 ++ ++#define NR_TEST_IRQS ((u32)1000000) ++ ++/* IRQ for the test to trigger. Currently POWER_CHANGED_SINGLE as it is ++ * otherwise unused in the DDK + */ -+#define KBASE_TLSTREAM_TL_JS_SCHED_END( \ -+ kbdev, \ -+ dummy \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_js_sched_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ dummy \ -+ ); \ -+ } while (0) ++#define TEST_IRQ POWER_CHANGED_SINGLE ++ ++#define IRQ_TIMEOUT HZ ++ ++/* Kernel API for setting irq throttle hook callback and irq time in us*/ ++extern int kbase_set_custom_irq_handler(struct kbase_device *kbdev, ++ irq_handler_t custom_handler, ++ int irq_type); ++extern irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val); ++ ++static DECLARE_WAIT_QUEUE_HEAD(wait); ++static bool triggered; ++static u64 irq_time; ++ ++static void *kbase_untag(void *ptr) ++{ ++ return (void *)(((uintptr_t) ptr) & ~3); ++} + +/** -+ * KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_START - Submitting an atom starts ++ * kbase_gpu_irq_custom_handler - Custom IRQ throttle handler ++ * @irq: IRQ number ++ * @data: Data associated with this IRQ + * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier ++ * Return: state of the IRQ + */ -+#define KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_START( \ -+ kbdev, \ -+ atom \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_jd_submit_atom_start( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom \ -+ ); \ -+ } while (0) ++static irqreturn_t kbase_gpu_irq_custom_handler(int irq, void *data) ++{ ++ struct kbase_device *kbdev = kbase_untag(data); ++ u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS)); ++ irqreturn_t result; ++ u64 tval; ++ bool has_test_irq = val & TEST_IRQ; ++ ++ if (has_test_irq) { ++ tval = ktime_get_real_ns(); ++ /* Clear the test source only here */ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), ++ TEST_IRQ); ++ /* Remove the test IRQ status bit */ ++ val = val ^ TEST_IRQ; ++ } ++ ++ result = kbase_gpu_irq_test_handler(irq, data, val); ++ ++ if (has_test_irq) { ++ irq_time = tval; ++ triggered = true; ++ wake_up(&wait); ++ result = IRQ_HANDLED; ++ } ++ ++ return result; ++} + +/** -+ * KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_END - Submitting an atom ends ++ * mali_kutf_irq_default_create_fixture() - Creates the fixture data required ++ * for all the tests in the irq suite. ++ * @context: KUTF context. + * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier ++ * Return: Fixture data created on success or NULL on failure + */ -+#define KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_END( \ -+ kbdev, \ -+ atom \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_jd_submit_atom_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom \ -+ ); \ -+ } while (0) ++static void *mali_kutf_irq_default_create_fixture( ++ struct kutf_context *context) ++{ ++ struct kutf_irq_fixture_data *data; ++ ++ data = kutf_mempool_alloc(&context->fixture_pool, ++ sizeof(struct kutf_irq_fixture_data)); ++ ++ if (!data) ++ goto fail; ++ ++ /* Acquire the kbase device */ ++ data->kbdev = kbase_find_device(-1); ++ if (data->kbdev == NULL) { ++ kutf_test_fail(context, "Failed to find kbase device"); ++ goto fail; ++ } ++ ++ return data; ++ ++fail: ++ return NULL; ++} + +/** -+ * KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START - Within function kbase_jd_done_nolock ++ * mali_kutf_irq_default_remove_fixture() - Destroy fixture data previously ++ * created by mali_kutf_irq_default_create_fixture. + * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier ++ * @context: KUTF context. + */ -+#define KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START( \ -+ kbdev, \ -+ atom \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_jd_done_no_lock_start( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom \ -+ ); \ -+ } while (0) ++static void mali_kutf_irq_default_remove_fixture( ++ struct kutf_context *context) ++{ ++ struct kutf_irq_fixture_data *data = context->fixture; ++ struct kbase_device *kbdev = data->kbdev; ++ ++ kbase_release_device(kbdev); ++} + +/** -+ * KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_END - Within function kbase_jd_done_nolock - end ++ * mali_kutf_irq_latency() - measure GPU IRQ latency ++ * @context: kutf context within which to perform the test + * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier -+ */ -+#define KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_END( \ -+ kbdev, \ -+ atom \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_jd_done_no_lock_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom \ -+ ); \ -+ } while (0) -+ -+/** -+ * KBASE_TLSTREAM_TL_JD_DONE_START - Start of kbase_jd_done ++ * The test triggers IRQs manually, and measures the ++ * time between triggering the IRQ and the IRQ handler being executed. + * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier ++ * This is not a traditional test, in that the pass/fail status has little ++ * meaning (other than indicating that the IRQ handler executed at all). Instead ++ * the results are in the latencies provided with the test result. There is no ++ * meaningful pass/fail result that can be obtained here, instead the latencies ++ * are provided for manual analysis only. + */ -+#define KBASE_TLSTREAM_TL_JD_DONE_START( \ -+ kbdev, \ -+ atom \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_jd_done_start( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom \ -+ ); \ -+ } while (0) ++static void mali_kutf_irq_latency(struct kutf_context *context) ++{ ++ struct kutf_irq_fixture_data *data = context->fixture; ++ struct kbase_device *kbdev = data->kbdev; ++ u64 min_time = U64_MAX, max_time = 0, average_time = 0; ++ u32 i; ++ const char *results; + -+/** -+ * KBASE_TLSTREAM_TL_JD_DONE_END - End of kbase_jd_done -+ * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier -+ */ -+#define KBASE_TLSTREAM_TL_JD_DONE_END( \ -+ kbdev, \ -+ atom \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_jd_done_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom \ -+ ); \ -+ } while (0) ++ /* Force GPU to be powered */ ++ kbase_pm_context_active(kbdev); ++ kbase_pm_wait_for_desired_state(kbdev); + -+/** -+ * KBASE_TLSTREAM_TL_JD_ATOM_COMPLETE - Atom marked complete -+ * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier -+ */ -+#define KBASE_TLSTREAM_TL_JD_ATOM_COMPLETE( \ -+ kbdev, \ -+ atom \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_jd_atom_complete( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom \ -+ ); \ -+ } while (0) ++ kbase_set_custom_irq_handler(kbdev, kbase_gpu_irq_custom_handler, ++ GPU_IRQ_HANDLER); + -+/** -+ * KBASE_TLSTREAM_TL_RUN_ATOM_START - Running of atom starts -+ * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier -+ * @atom_nr: Sequential number of an atom -+ */ -+#define KBASE_TLSTREAM_TL_RUN_ATOM_START( \ -+ kbdev, \ -+ atom, \ -+ atom_nr \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_run_atom_start( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom, \ -+ atom_nr \ -+ ); \ -+ } while (0) ++ for (i = 1; i <= NR_TEST_IRQS; i++) { ++ u64 start_time = ktime_get_real_ns(); + -+/** -+ * KBASE_TLSTREAM_TL_RUN_ATOM_END - Running of atom ends -+ * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier -+ * @atom_nr: Sequential number of an atom -+ */ -+#define KBASE_TLSTREAM_TL_RUN_ATOM_END( \ -+ kbdev, \ -+ atom, \ -+ atom_nr \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_tl_run_atom_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom, \ -+ atom_nr \ -+ ); \ -+ } while (0) ++ triggered = false; + -+/** -+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY - atom priority -+ * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier -+ * @prio: Atom priority -+ */ -+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY( \ -+ kbdev, \ -+ atom, \ -+ prio \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ -+ __kbase_tlstream_tl_attrib_atom_priority( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom, \ -+ prio \ -+ ); \ -+ } while (0) ++ /* Trigger fake IRQ */ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), ++ TEST_IRQ); + -+/** -+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE - atom state -+ * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier -+ * @state: Atom state -+ */ -+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE( \ -+ kbdev, \ -+ atom, \ -+ state \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ -+ __kbase_tlstream_tl_attrib_atom_state( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom, \ -+ state \ -+ ); \ -+ } while (0) ++ if (wait_event_timeout(wait, triggered, IRQ_TIMEOUT) == 0) { ++ /* Wait extra time to see if it would come */ ++ wait_event_timeout(wait, triggered, 10 * IRQ_TIMEOUT); ++ break; ++ } + -+/** -+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED - atom caused priority change -+ * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier -+ */ -+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED( \ -+ kbdev, \ -+ atom \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ -+ __kbase_tlstream_tl_attrib_atom_prioritized( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom \ -+ ); \ -+ } while (0) ++ if ((irq_time - start_time) < min_time) ++ min_time = irq_time - start_time; ++ if ((irq_time - start_time) > max_time) ++ max_time = irq_time - start_time; ++ average_time += irq_time - start_time; + -+/** -+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT - jit done for atom -+ * -+ * @kbdev: Kbase device -+ * @atom: Atom identifier -+ * @edit_addr: Address edited by jit -+ * @new_addr: Address placed into the edited location -+ * @jit_flags: Flags specifying the special requirements for the JIT allocation. -+ * @mem_flags: Flags defining the properties of a memory region -+ * @j_id: Unique ID provided by the caller, this is used to pair allocation and free requests. -+ * @com_pgs: The minimum number of physical pages which should back the allocation. -+ * @extent: Granularity of physical pages to grow the allocation by during a fault. -+ * @va_pgs: The minimum number of virtual pages required -+ */ -+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT( \ -+ kbdev, \ -+ atom, \ -+ edit_addr, \ -+ new_addr, \ -+ jit_flags, \ -+ mem_flags, \ -+ j_id, \ -+ com_pgs, \ -+ extent, \ -+ va_pgs \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_JOB_DUMPING_ENABLED) \ -+ __kbase_tlstream_tl_attrib_atom_jit( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ atom, \ -+ edit_addr, \ -+ new_addr, \ -+ jit_flags, \ -+ mem_flags, \ -+ j_id, \ -+ com_pgs, \ -+ extent, \ -+ va_pgs \ -+ ); \ -+ } while (0) ++ udelay(10); ++ /* Sleep for a ms, every 10000 iterations, to avoid misleading warning ++ * of CPU softlockup when all GPU IRQs keep going to the same CPU. ++ */ ++ if (!(i % 10000)) ++ msleep(1); ++ } + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE - New KBase Device -+ * -+ * @kbdev: Kbase device -+ * @kbase_device_id: The ID of the physical hardware -+ * @kbase_device_gpu_core_count: The number of gpu cores in the physical hardware -+ * @kbase_device_max_num_csgs: The max number of CSGs the physical hardware supports -+ * @kbase_device_as_count: The number of address spaces the physical hardware has available -+ * @kbase_device_sb_entry_count: The number of entries each scoreboard set in the physical hardware has available -+ * @kbase_device_has_cross_stream_sync: Whether cross-stream synchronization is supported -+ * @kbase_device_supports_gpu_sleep: Whether GPU sleep is supported -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE( \ -+ kbdev, \ -+ kbase_device_id, \ -+ kbase_device_gpu_core_count, \ -+ kbase_device_max_num_csgs, \ -+ kbase_device_as_count, \ -+ kbase_device_sb_entry_count, \ -+ kbase_device_has_cross_stream_sync, \ -+ kbase_device_supports_gpu_sleep \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_new_device( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kbase_device_id, \ -+ kbase_device_gpu_core_count, \ -+ kbase_device_max_num_csgs, \ -+ kbase_device_as_count, \ -+ kbase_device_sb_entry_count, \ -+ kbase_device_has_cross_stream_sync, \ -+ kbase_device_supports_gpu_sleep \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE( \ -+ kbdev, \ -+ kbase_device_id, \ -+ kbase_device_gpu_core_count, \ -+ kbase_device_max_num_csgs, \ -+ kbase_device_as_count, \ -+ kbase_device_sb_entry_count, \ -+ kbase_device_has_cross_stream_sync, \ -+ kbase_device_supports_gpu_sleep \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ /* Go back to default handler */ ++ kbase_set_custom_irq_handler(kbdev, NULL, GPU_IRQ_HANDLER); + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK - Kernel receives a request to process new GPU queue instructions -+ * -+ * @kbdev: Kbase device -+ * @kernel_ctx_id: Unique ID for the KBase Context -+ * @buffer_gpu_addr: Address of the GPU queue's command buffer -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK( \ -+ kbdev, \ -+ kernel_ctx_id, \ -+ buffer_gpu_addr \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_gpucmdqueue_kick( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kernel_ctx_id, \ -+ buffer_gpu_addr \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK( \ -+ kbdev, \ -+ kernel_ctx_id, \ -+ buffer_gpu_addr \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ kbase_pm_context_idle(kbdev); + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG - CSG is programmed to a slot -+ * -+ * @kbdev: Kbase device -+ * @kbase_device_id: The ID of the physical hardware -+ * @kernel_ctx_id: Unique ID for the KBase Context -+ * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace -+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed -+ * @kbase_device_csg_slot_resuming: Whether the csg is being resumed -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \ -+ kbdev, \ -+ kbase_device_id, \ -+ kernel_ctx_id, \ -+ gpu_cmdq_grp_handle, \ -+ kbase_device_csg_slot_index, \ -+ kbase_device_csg_slot_resuming \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_device_program_csg( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kbase_device_id, \ -+ kernel_ctx_id, \ -+ gpu_cmdq_grp_handle, \ -+ kbase_device_csg_slot_index, \ -+ kbase_device_csg_slot_resuming \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \ -+ kbdev, \ -+ kbase_device_id, \ -+ kernel_ctx_id, \ -+ gpu_cmdq_grp_handle, \ -+ kbase_device_csg_slot_index, \ -+ kbase_device_csg_slot_resuming \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ if (i > NR_TEST_IRQS) { ++ do_div(average_time, NR_TEST_IRQS); ++ results = kutf_dsprintf(&context->fixture_pool, ++ "Min latency = %lldns, Max latency = %lldns, Average latency = %lldns\n", ++ min_time, max_time, average_time); ++ kutf_test_pass(context, results); ++ } else { ++ results = kutf_dsprintf(&context->fixture_pool, ++ "Timed out for the %u-th IRQ (loop_limit: %u), triggered late: %d\n", ++ i, NR_TEST_IRQS, triggered); ++ kutf_test_fail(context, results); ++ } ++} + +/** -+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG - CSG is deprogrammed from a slot ++ * mali_kutf_irq_test_main_init - Module entry point for this test. + * -+ * @kbdev: Kbase device -+ * @kbase_device_id: The ID of the physical hardware -+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being deprogrammed ++ * Return: 0 on success, error code otherwise + */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG( \ -+ kbdev, \ -+ kbase_device_id, \ -+ kbase_device_csg_slot_index \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_device_deprogram_csg( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kbase_device_id, \ -+ kbase_device_csg_slot_index \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG( \ -+ kbdev, \ -+ kbase_device_id, \ -+ kbase_device_csg_slot_index \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++static int __init mali_kutf_irq_test_main_init(void) ++{ ++ struct kutf_suite *suite; + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG - CSG is halting -+ * -+ * @kbdev: Kbase device -+ * @kbase_device_id: The ID of the physical hardware -+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being halted -+ * @kbase_device_csg_slot_suspending: Whether the csg is being suspended -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG( \ -+ kbdev, \ -+ kbase_device_id, \ -+ kbase_device_csg_slot_index, \ -+ kbase_device_csg_slot_suspending \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_device_halting_csg( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kbase_device_id, \ -+ kbase_device_csg_slot_index, \ -+ kbase_device_csg_slot_suspending \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG( \ -+ kbdev, \ -+ kbase_device_id, \ -+ kbase_device_csg_slot_index, \ -+ kbase_device_csg_slot_suspending \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ irq_app = kutf_create_application("irq"); + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG - CSG is suspended -+ * -+ * @kbdev: Kbase device -+ * @kbase_device_id: The ID of the physical hardware -+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being suspended -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( \ -+ kbdev, \ -+ kbase_device_id, \ -+ kbase_device_csg_slot_index \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_device_suspend_csg( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kbase_device_id, \ -+ kbase_device_csg_slot_index \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( \ -+ kbdev, \ -+ kbase_device_id, \ -+ kbase_device_csg_slot_index \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ if (irq_app == NULL) { ++ pr_warn("Creation of test application failed!\n"); ++ return -ENOMEM; ++ } + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE - KBase device is notified that CSG is idle. -+ * -+ * @kbdev: Kbase device -+ * @kbase_device_id: The ID of the physical hardware -+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG for which we are receiving an idle notification -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE( \ -+ kbdev, \ -+ kbase_device_id, \ -+ kbase_device_csg_slot_index \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_device_csg_idle( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kbase_device_id, \ -+ kbase_device_csg_slot_index \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE( \ -+ kbdev, \ -+ kbase_device_id, \ -+ kbase_device_csg_slot_index \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ suite = kutf_create_suite(irq_app, "irq_default", ++ 1, mali_kutf_irq_default_create_fixture, ++ mali_kutf_irq_default_remove_fixture); + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_NEW_CTX - New KBase Context -+ * -+ * @kbdev: Kbase device -+ * @kernel_ctx_id: Unique ID for the KBase Context -+ * @kbase_device_id: The ID of the physical hardware -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_NEW_CTX( \ -+ kbdev, \ -+ kernel_ctx_id, \ -+ kbase_device_id \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_new_ctx( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kernel_ctx_id, \ -+ kbase_device_id \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_NEW_CTX( \ -+ kbdev, \ -+ kernel_ctx_id, \ -+ kbase_device_id \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ if (suite == NULL) { ++ pr_warn("Creation of test suite failed!\n"); ++ kutf_destroy_application(irq_app); ++ return -ENOMEM; ++ } + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_DEL_CTX - Delete KBase Context -+ * -+ * @kbdev: Kbase device -+ * @kernel_ctx_id: Unique ID for the KBase Context -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_DEL_CTX( \ -+ kbdev, \ -+ kernel_ctx_id \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_del_ctx( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kernel_ctx_id \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_DEL_CTX( \ -+ kbdev, \ -+ kernel_ctx_id \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ kutf_add_test(suite, 0x0, "irq_latency", ++ mali_kutf_irq_latency); ++ return 0; ++} + +/** -+ * KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS - Address Space is assigned to a KBase context -+ * -+ * @kbdev: Kbase device -+ * @kernel_ctx_id: Unique ID for the KBase Context -+ * @kbase_device_as_index: The index of the device address space being assigned ++ * mali_kutf_irq_test_main_exit - Module exit point for this test. + */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS( \ -+ kbdev, \ -+ kernel_ctx_id, \ -+ kbase_device_as_index \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_ctx_assign_as( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kernel_ctx_id, \ -+ kbase_device_as_index \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS( \ -+ kbdev, \ -+ kernel_ctx_id, \ -+ kbase_device_as_index \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++static void __exit mali_kutf_irq_test_main_exit(void) ++{ ++ kutf_destroy_application(irq_app); ++} + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS - Address Space is unassigned from a KBase context -+ * -+ * @kbdev: Kbase device -+ * @kernel_ctx_id: Unique ID for the KBase Context -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( \ -+ kbdev, \ -+ kernel_ctx_id \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_ctx_unassign_as( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kernel_ctx_id \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( \ -+ kbdev, \ -+ kernel_ctx_id \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++module_init(mali_kutf_irq_test_main_init); ++module_exit(mali_kutf_irq_test_main_exit); + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE - New KCPU Queue -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @kcpu_queue_id: KCPU queue ID -+ * @kernel_ctx_id: Unique ID for the KBase Context -+ * @kcpuq_num_pending_cmds: Number of commands already enqueued in the KCPU queue -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE( \ -+ kbdev, \ -+ kcpu_queue, \ -+ kcpu_queue_id, \ -+ kernel_ctx_id, \ -+ kcpuq_num_pending_cmds \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_new_kcpuqueue( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ kcpu_queue_id, \ -+ kernel_ctx_id, \ -+ kcpuq_num_pending_cmds \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE( \ -+ kbdev, \ -+ kcpu_queue, \ -+ kcpu_queue_id, \ -+ kernel_ctx_id, \ -+ kcpuq_num_pending_cmds \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("ARM Ltd."); ++MODULE_VERSION("1.0"); +diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/Kbuild b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/Kbuild +new file mode 100755 +index 000000000..e9bff98b8 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/Kbuild +@@ -0,0 +1,25 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE - Delete KCPU Queue -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_del_kcpuqueue( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ifeq ($(CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST),y) ++obj-m += mali_kutf_mgm_integration_test.o + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL - KCPU Queue enqueues Signal on Fence ++mali_kutf_mgm_integration_test-y := mali_kutf_mgm_integration_test_main.o ++endif +diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/build.bp b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/build.bp +new file mode 100755 +index 000000000..8b995f8a0 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/build.bp +@@ -0,0 +1,41 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* + * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @fence: Fence object handle -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL( \ -+ kbdev, \ -+ kcpu_queue, \ -+ fence \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_signal( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ fence \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL( \ -+ kbdev, \ -+ kcpu_queue, \ -+ fence \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ -+ -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT - KCPU Queue enqueues Wait on Fence ++ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @fence: Fence object handle -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT( \ -+ kbdev, \ -+ kcpu_queue, \ -+ fence \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ fence \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT( \ -+ kbdev, \ -+ kcpu_queue, \ -+ fence \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ -+ -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT - KCPU Queue enqueues Wait on Cross Queue Sync Object ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @cqs_obj_gpu_addr: CQS Object GPU pointer -+ * @compare_value: Semaphore value that should be exceeded for the WAIT to pass -+ * @inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ -+ kbdev, \ -+ kcpu_queue, \ -+ cqs_obj_gpu_addr, \ -+ compare_value, \ -+ inherit_error \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ cqs_obj_gpu_addr, \ -+ compare_value, \ -+ inherit_error \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ -+ kbdev, \ -+ kcpu_queue, \ -+ cqs_obj_gpu_addr, \ -+ compare_value, \ -+ inherit_error \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ -+ -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET - KCPU Queue enqueues Set on Cross Queue Sync Object ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @cqs_obj_gpu_addr: CQS Object GPU pointer -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET( \ -+ kbdev, \ -+ kcpu_queue, \ -+ cqs_obj_gpu_addr \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ cqs_obj_gpu_addr \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET( \ -+ kbdev, \ -+ kcpu_queue, \ -+ cqs_obj_gpu_addr \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ -+ -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION - KCPU Queue enqueues Wait Operation on Cross Queue Sync Object ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @cqs_obj_gpu_addr: CQS Object GPU pointer -+ * @compare_value: Value that should be compared to semaphore value for the WAIT to pass -+ * @condition: Condition for unblocking WAITs on Timeline Cross Queue Sync Object (e.g. greater than, less or equal) -+ * @data_type: Data type of a CQS Object's value -+ * @inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue + */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION( \ -+ kbdev, \ -+ kcpu_queue, \ -+ cqs_obj_gpu_addr, \ -+ compare_value, \ -+ condition, \ -+ data_type, \ -+ inherit_error \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ cqs_obj_gpu_addr, \ -+ compare_value, \ -+ condition, \ -+ data_type, \ -+ inherit_error \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION( \ -+ kbdev, \ -+ kcpu_queue, \ -+ cqs_obj_gpu_addr, \ -+ compare_value, \ -+ condition, \ -+ data_type, \ -+ inherit_error \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ -+ -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION - KCPU Queue enqueues Set Operation on Cross Queue Sync Object ++bob_kernel_module { ++ name: "mali_kutf_mgm_integration_test", ++ defaults: [ ++ "mali_kbase_shared_config_defaults", ++ "kernel_test_configs", ++ "kernel_test_includes", ++ ], ++ srcs: [ ++ "Kbuild", ++ "mali_kutf_mgm_integration_test_main.c", ++ ], ++ extra_symbols: [ ++ "mali_kbase", ++ "kutf", ++ ], ++ enabled: false, ++ mali_kutf_mgm_integration_test: { ++ kbuild_options: ["CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST=y"], ++ enabled: true, ++ }, ++} +diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c +new file mode 100644 +index 000000000..5a42bd675 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c +@@ -0,0 +1,210 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @cqs_obj_gpu_addr: CQS Object GPU pointer -+ * @value: Value that will be set or added to semaphore -+ * @operation: Operation type performed on semaphore value (SET or ADD) -+ * @data_type: Data type of a CQS Object's value -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION( \ -+ kbdev, \ -+ kcpu_queue, \ -+ cqs_obj_gpu_addr, \ -+ value, \ -+ operation, \ -+ data_type \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ cqs_obj_gpu_addr, \ -+ value, \ -+ operation, \ -+ data_type \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION( \ -+ kbdev, \ -+ kcpu_queue, \ -+ cqs_obj_gpu_addr, \ -+ value, \ -+ operation, \ -+ data_type \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ -+ -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT - KCPU Queue enqueues Map Import ++ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @map_import_buf_gpu_addr: Map import buffer GPU pointer -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( \ -+ kbdev, \ -+ kcpu_queue, \ -+ map_import_buf_gpu_addr \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ map_import_buf_gpu_addr \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( \ -+ kbdev, \ -+ kcpu_queue, \ -+ map_import_buf_gpu_addr \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ -+ -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT - KCPU Queue enqueues Unmap Import ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @map_import_buf_gpu_addr: Map import buffer GPU pointer -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( \ -+ kbdev, \ -+ kcpu_queue, \ -+ map_import_buf_gpu_addr \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ map_import_buf_gpu_addr \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( \ -+ kbdev, \ -+ kcpu_queue, \ -+ map_import_buf_gpu_addr \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ -+ -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE - KCPU Queue enqueues Unmap Import ignoring reference count ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @map_import_buf_gpu_addr: Map import buffer GPU pointer -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( \ -+ kbdev, \ -+ kcpu_queue, \ -+ map_import_buf_gpu_addr \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ map_import_buf_gpu_addr \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( \ -+ kbdev, \ -+ kcpu_queue, \ -+ map_import_buf_gpu_addr \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ -+ -+/** -+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC - Begin array of KCPU Queue enqueues JIT Alloc ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue + */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++#include ++#include "mali_kbase.h" ++#include ++#include ++#include ++#include + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC - Array item of KCPU Queue enqueues JIT Alloc -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @jit_alloc_gpu_alloc_addr_dest: The GPU virtual address to write the JIT allocated GPU virtual address to -+ * @jit_alloc_va_pages: The minimum number of virtual pages required -+ * @jit_alloc_commit_pages: The minimum number of physical pages which should back the allocation -+ * @jit_alloc_extent: Granularity of physical pages to grow the allocation by during a fault -+ * @jit_alloc_jit_id: Unique ID provided by the caller, this is used to pair allocation and free requests. Zero is not a valid value -+ * @jit_alloc_bin_id: The JIT allocation bin, used in conjunction with max_allocations to limit the number of each type of JIT allocation -+ * @jit_alloc_max_allocations: The maximum number of allocations allowed within the bin specified by bin_id. Should be the same for all JIT allocations within the same bin. -+ * @jit_alloc_flags: Flags specifying the special requirements for the JIT allocation -+ * @jit_alloc_usage_id: A hint about which allocation should be reused. The kernel should attempt to use a previous allocation with the same usage_id -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ -+ kbdev, \ -+ kcpu_queue, \ -+ jit_alloc_gpu_alloc_addr_dest, \ -+ jit_alloc_va_pages, \ -+ jit_alloc_commit_pages, \ -+ jit_alloc_extent, \ -+ jit_alloc_jit_id, \ -+ jit_alloc_bin_id, \ -+ jit_alloc_max_allocations, \ -+ jit_alloc_flags, \ -+ jit_alloc_usage_id \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_alloc( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ jit_alloc_gpu_alloc_addr_dest, \ -+ jit_alloc_va_pages, \ -+ jit_alloc_commit_pages, \ -+ jit_alloc_extent, \ -+ jit_alloc_jit_id, \ -+ jit_alloc_bin_id, \ -+ jit_alloc_max_allocations, \ -+ jit_alloc_flags, \ -+ jit_alloc_usage_id \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ -+ kbdev, \ -+ kcpu_queue, \ -+ jit_alloc_gpu_alloc_addr_dest, \ -+ jit_alloc_va_pages, \ -+ jit_alloc_commit_pages, \ -+ jit_alloc_extent, \ -+ jit_alloc_jit_id, \ -+ jit_alloc_bin_id, \ -+ jit_alloc_max_allocations, \ -+ jit_alloc_flags, \ -+ jit_alloc_usage_id \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++#define MINOR_FOR_FIRST_KBASE_DEV (-1) + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC - End array of KCPU Queue enqueues JIT Alloc -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_alloc( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++#define BASE_MEM_GROUP_COUNT (16) ++#define PA_MAX ((1ULL << 48) - 1) ++#define PA_START_BIT 12 ++#define ENTRY_ACCESS_BIT (1ULL << 10) + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE - Begin array of KCPU Queue enqueues JIT Free -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_free( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++#define ENTRY_IS_ATE_L3 3ULL ++#define ENTRY_IS_ATE_L02 1ULL + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE - Array item of KCPU Queue enqueues JIT Free -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @jit_alloc_jit_id: Unique ID provided by the caller, this is used to pair allocation and free requests. Zero is not a valid value -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE( \ -+ kbdev, \ -+ kcpu_queue, \ -+ jit_alloc_jit_id \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_free( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ jit_alloc_jit_id \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE( \ -+ kbdev, \ -+ kcpu_queue, \ -+ jit_alloc_jit_id \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++#define MGM_INTEGRATION_SUITE_NAME "mgm_integration" ++#define MGM_INTEGRATION_PTE_TRANSLATION "pte_translation" + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE - End array of KCPU Queue enqueues JIT Free -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++static char msg_buf[KUTF_MAX_LINE_LENGTH]; + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER - KCPU Queue enqueues Error Barrier -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++/* KUTF test application pointer for this test */ ++struct kutf_application *mgm_app; + +/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND - KCPU Queue enqueues Group Suspend -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @group_suspend_buf: Pointer to the suspend buffer structure -+ * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace ++ * struct kutf_mgm_fixture_data - test fixture used by test functions ++ * @kbdev: kbase device for the GPU. ++ * @group_id: Memory group ID to test based on fixture index. + */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( \ -+ kbdev, \ -+ kcpu_queue, \ -+ group_suspend_buf, \ -+ gpu_cmdq_grp_handle \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ group_suspend_buf, \ -+ gpu_cmdq_grp_handle \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( \ -+ kbdev, \ -+ kcpu_queue, \ -+ group_suspend_buf, \ -+ gpu_cmdq_grp_handle \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++struct kutf_mgm_fixture_data { ++ struct kbase_device *kbdev; ++ int group_id; ++}; + +/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START - KCPU Queue starts a Signal on Fence ++ * mali_kutf_mgm_pte_translation_test() - Tests forward and reverse translation ++ * of PTE by the MGM module ++ * @context: KUTF context within which to perform the test. + * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ -+ -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END - KCPU Queue ends a Signal on Fence ++ * This test creates PTEs with physical addresses in the range ++ * 0x0000-0xFFFFFFFFF000 and tests that mgm_update_gpu_pte() returns a different ++ * PTE and mgm_pte_to_original_pte() returns the original PTE. This is tested ++ * at MMU level 2 and 3 as mgm_update_gpu_pte() is called for ATEs only. + * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero ++ * This test is run for a specific group_id depending on the fixture_id. + */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ execute_error \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++static void mali_kutf_mgm_pte_translation_test(struct kutf_context *context) ++{ ++ struct kutf_mgm_fixture_data *data = context->fixture; ++ struct kbase_device *kbdev = data->kbdev; ++ struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev; ++ u64 addr; + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START - KCPU Queue starts a Wait on Fence -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ for (addr = 1 << (PA_START_BIT - 1); addr <= PA_MAX; addr <<= 1) { ++ /* Mask 1 << 11 by ~0xFFF to get 0x0000 at first iteration */ ++ phys_addr_t pa = addr; ++ u8 mmu_level; + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END - KCPU Queue ends a Wait on Fence -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ execute_error \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ /* Test MMU level 3 and 2 (2MB pages) only */ ++ for (mmu_level = MIDGARD_MMU_LEVEL(2); mmu_level <= MIDGARD_MMU_LEVEL(3); ++ mmu_level++) { ++ u64 translated_pte; ++ u64 returned_pte; ++ u64 original_pte; + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START - KCPU Queue starts a Wait on Cross Queue Sync Object -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ if (mmu_level == MIDGARD_MMU_LEVEL(3)) ++ original_pte = ++ (pa & PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L3; ++ else ++ original_pte = ++ (pa & PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02; + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END - KCPU Queue ends a Wait on Cross Queue Sync Object -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ execute_error \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ dev_dbg(kbdev->dev, "Testing group_id=%u, mmu_level=%u, pte=0x%llx\n", ++ data->group_id, mmu_level, original_pte); + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET - KCPU Queue executes a Set on Cross Queue Sync Object -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ execute_error \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ translated_pte = mgm_dev->ops.mgm_update_gpu_pte(mgm_dev, data->group_id, ++ mmu_level, original_pte); ++ if (translated_pte == original_pte) { ++ snprintf( ++ msg_buf, sizeof(msg_buf), ++ "PTE unchanged. translated_pte (0x%llx) == original_pte (0x%llx) for mmu_level=%u, group_id=%d", ++ translated_pte, original_pte, mmu_level, data->group_id); ++ kutf_test_fail(context, msg_buf); ++ return; ++ } + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START - KCPU Queue starts a Wait Operation on Cross Queue Sync Object -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ returned_pte = mgm_dev->ops.mgm_pte_to_original_pte( ++ mgm_dev, data->group_id, mmu_level, translated_pte); ++ dev_dbg(kbdev->dev, "\treturned_pte=%llx\n", returned_pte); + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END - KCPU Queue ends a Wait Operation on Cross Queue Sync Object -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ execute_error \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ if (returned_pte != original_pte) { ++ snprintf( ++ msg_buf, sizeof(msg_buf), ++ "Original PTE not returned. returned_pte (0x%llx) != origin al_pte (0x%llx) for mmu_level=%u, group_id=%d", ++ returned_pte, original_pte, mmu_level, data->group_id); ++ kutf_test_fail(context, msg_buf); ++ return; ++ } ++ } ++ } ++ snprintf(msg_buf, sizeof(msg_buf), "Translation passed for group_id=%d", data->group_id); ++ kutf_test_pass(context, msg_buf); ++} + +/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION - KCPU Queue executes a Set Operation on Cross Queue Sync Object ++ * mali_kutf_mgm_integration_create_fixture() - Creates the fixture data ++ * required for all tests in the mgm integration suite. ++ * @context: KUTF context. + * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero ++ * Return: Fixture data created on success or NULL on failure + */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ execute_error \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++static void *mali_kutf_mgm_integration_create_fixture(struct kutf_context *context) ++{ ++ struct kutf_mgm_fixture_data *data; ++ struct kbase_device *kbdev; + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START - KCPU Queue starts a Map Import -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ pr_debug("Finding kbase device\n"); ++ kbdev = kbase_find_device(MINOR_FOR_FIRST_KBASE_DEV); ++ if (kbdev == NULL) { ++ kutf_test_fail(context, "Failed to find kbase device"); ++ return NULL; ++ } ++ pr_debug("Creating fixture\n"); + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END - KCPU Queue ends a Map Import -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ execute_error \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ data = kutf_mempool_alloc(&context->fixture_pool, sizeof(struct kutf_mgm_fixture_data)); ++ if (!data) ++ return NULL; ++ data->kbdev = kbdev; ++ data->group_id = context->fixture_index; ++ ++ pr_debug("Fixture created\n"); ++ return data; ++} + +/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START - KCPU Queue starts an Unmap Import -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue ++ * mali_kutf_mgm_integration_remove_fixture() - Destroy fixture data previously ++ * created by mali_kutf_mgm_integration_create_fixture. ++ * @context: KUTF context. + */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++static void mali_kutf_mgm_integration_remove_fixture(struct kutf_context *context) ++{ ++ struct kutf_mgm_fixture_data *data = context->fixture; ++ struct kbase_device *kbdev = data->kbdev; ++ ++ kbase_release_device(kbdev); ++} + +/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END - KCPU Queue ends an Unmap Import ++ * mali_kutf_mgm_integration_test_main_init() - Module entry point for this test. + * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero ++ * Return: 0 on success, error code on failure. + */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ execute_error \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++static int __init mali_kutf_mgm_integration_test_main_init(void) ++{ ++ struct kutf_suite *suite; ++ ++ mgm_app = kutf_create_application("mgm"); ++ ++ if (mgm_app == NULL) { ++ pr_warn("Creation of mgm KUTF app failed!\n"); ++ return -ENOMEM; ++ } ++ suite = kutf_create_suite(mgm_app, MGM_INTEGRATION_SUITE_NAME, BASE_MEM_GROUP_COUNT, ++ mali_kutf_mgm_integration_create_fixture, ++ mali_kutf_mgm_integration_remove_fixture); ++ if (suite == NULL) { ++ pr_warn("Creation of %s suite failed!\n", MGM_INTEGRATION_SUITE_NAME); ++ kutf_destroy_application(mgm_app); ++ return -ENOMEM; ++ } ++ kutf_add_test(suite, 0x0, MGM_INTEGRATION_PTE_TRANSLATION, ++ mali_kutf_mgm_pte_translation_test); ++ return 0; ++} + +/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START - KCPU Queue starts an Unmap Import ignoring reference count -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue ++ * mali_kutf_mgm_integration_test_main_exit() - Module exit point for this test. + */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++static void __exit mali_kutf_mgm_integration_test_main_exit(void) ++{ ++ kutf_destroy_application(mgm_app); ++} + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END - KCPU Queue ends an Unmap Import ignoring reference count ++module_init(mali_kutf_mgm_integration_test_main_init); ++module_exit(mali_kutf_mgm_integration_test_main_exit); ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("ARM Ltd."); ++MODULE_VERSION("1.0"); +diff --git a/drivers/gpu/arm/bifrost/thirdparty/Kbuild b/drivers/gpu/arm/bifrost/thirdparty/Kbuild +new file mode 100755 +index 000000000..558be077d +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/thirdparty/Kbuild +@@ -0,0 +1,21 @@ ++# SPDX-License-Identifier: GPL-2.0 ++# ++# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# ++ ++bifrost_kbase-y += thirdparty/mali_kbase_mmap.o +diff --git a/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c +new file mode 100644 +index 000000000..1e636b9a7 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c +@@ -0,0 +1,420 @@ ++/* ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. + * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ execute_error \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START - KCPU Queue starts an array of JIT Allocs ++#include "linux/mman.h" ++#include ++#include ++ ++/* mali_kbase_mmap.c + * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue ++ * This file contains Linux specific implementation of ++ * kbase_context_get_unmapped_area() interface. + */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ + +/** -+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - Begin array of KCPU Queue ends an array of JIT Allocs ++ * align_and_check() - Align the specified pointer to the provided alignment and ++ * check that it is still in range. ++ * @gap_end: Highest possible start address for allocation (end of gap in ++ * address space) ++ * @gap_start: Start address of current memory area / gap in address space ++ * @info: vm_unmapped_area_info structure passed to caller, containing ++ * alignment, length and limits for the allocation ++ * @is_shader_code: True if the allocation is for shader code (which has ++ * additional alignment requirements) ++ * @is_same_4gb_page: True if the allocation needs to reside completely within ++ * a 4GB chunk + * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue ++ * Return: true if gap_end is now aligned correctly and is still in range, ++ * false otherwise + */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, ++ struct vm_unmapped_area_info *info, bool is_shader_code, ++ bool is_same_4gb_page) ++{ ++ /* Compute highest gap address at the desired alignment */ ++ (*gap_end) -= info->length; ++ (*gap_end) -= (*gap_end - info->align_offset) & info->align_mask; + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - Array item of KCPU Queue ends an array of JIT Allocs -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero -+ * @jit_alloc_gpu_alloc_addr: The JIT allocated GPU virtual address -+ * @jit_alloc_mmu_flags: The MMU flags for the JIT allocation -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error, \ -+ jit_alloc_gpu_alloc_addr, \ -+ jit_alloc_mmu_flags \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ execute_error, \ -+ jit_alloc_gpu_alloc_addr, \ -+ jit_alloc_mmu_flags \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error, \ -+ jit_alloc_gpu_alloc_addr, \ -+ jit_alloc_mmu_flags \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ if (is_shader_code) { ++ /* Check for 4GB boundary */ ++ if (0 == (*gap_end & BASE_MEM_MASK_4GB)) ++ (*gap_end) -= (info->align_offset ? info->align_offset : ++ info->length); ++ if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB)) ++ (*gap_end) -= (info->align_offset ? info->align_offset : ++ info->length); + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - End array of KCPU Queue ends an array of JIT Allocs -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end + ++ info->length) & BASE_MEM_MASK_4GB)) ++ return false; ++ } else if (is_same_4gb_page) { ++ unsigned long start = *gap_end; ++ unsigned long end = *gap_end + info->length; ++ unsigned long mask = ~((unsigned long)U32_MAX); + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START - KCPU Queue starts an array of JIT Frees -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_free_start( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ /* Check if 4GB boundary is straddled */ ++ if ((start & mask) != ((end - 1) & mask)) { ++ unsigned long offset = end - (end & mask); ++ /* This is to ensure that alignment doesn't get ++ * disturbed in an attempt to prevent straddling at ++ * 4GB boundary. The GPU VA is aligned to 2MB when the ++ * allocation size is > 2MB and there is enough CPU & ++ * GPU virtual space. ++ */ ++ unsigned long rounded_offset = ++ ALIGN(offset, info->align_mask + 1); + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END - Begin array of KCPU Queue ends an array of JIT Frees -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ start -= rounded_offset; ++ end -= rounded_offset; + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END - Array item of KCPU Queue ends an array of JIT Frees -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero -+ * @jit_free_pages_used: The actual number of pages used by the JIT allocation -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error, \ -+ jit_free_pages_used \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ execute_error, \ -+ jit_free_pages_used \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error, \ -+ jit_free_pages_used \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ *gap_end = start; + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END - End array of KCPU Queue ends an array of JIT Frees -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ /* The preceding 4GB boundary shall not get straddled, ++ * even after accounting for the alignment, as the ++ * size of allocation is limited to 4GB and the initial ++ * start location was already aligned. ++ */ ++ WARN_ON((start & mask) != ((end - 1) & mask)); ++ } ++ } + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER - KCPU Queue executes an Error Barrier -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START - KCPU Queue starts a group suspend -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START( \ -+ kbdev, \ -+ kcpu_queue \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ if ((*gap_end < info->low_limit) || (*gap_end < gap_start)) ++ return false; + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END - KCPU Queue ends a group suspend -+ * -+ * @kbdev: Kbase device -+ * @kcpu_queue: KCPU queue -+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ kcpu_queue, \ -+ execute_error \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END( \ -+ kbdev, \ -+ kcpu_queue, \ -+ execute_error \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ return true; ++} + +/** -+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING - CSF FW is being reloaded ++ * kbase_unmapped_area_topdown() - allocates new areas top-down from ++ * below the stack limit. ++ * @info: Information about the memory area to allocate. ++ * @is_shader_code: Boolean which denotes whether the allocated area is ++ * intended for the use by shader core in which case a ++ * special alignment requirements apply. ++ * @is_same_4gb_page: Boolean which indicates whether the allocated area needs ++ * to reside completely within a 4GB chunk. + * -+ * @kbdev: Kbase device -+ * @csffw_cycle: Cycle number of a CSFFW event -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING( \ -+ kbdev, \ -+ csffw_cycle \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_csffw_fw_reloading( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ csffw_cycle \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING( \ -+ kbdev, \ -+ csffw_cycle \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ -+ -+/** -+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING - CSF FW is being enabled ++ * The unmapped_area_topdown() function in the Linux kernel is not exported ++ * using EXPORT_SYMBOL_GPL macro. To allow us to call this function from a ++ * module and also make use of the fact that some of the requirements for ++ * the unmapped area are known in advance, we implemented an extended version ++ * of this function and prefixed it with 'kbase_'. + * -+ * @kbdev: Kbase device -+ * @csffw_cycle: Cycle number of a CSFFW event -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING( \ -+ kbdev, \ -+ csffw_cycle \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_csffw_fw_enabling( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ csffw_cycle \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING( \ -+ kbdev, \ -+ csffw_cycle \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ -+ -+/** -+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP - CSF FW sleep is requested ++ * The difference in the call parameter list comes from the fact that ++ * kbase_unmapped_area_topdown() is called with additional parameters which ++ * are provided to indicate whether the allocation is for a shader core memory, ++ * which has additional alignment requirements, and whether the allocation can ++ * straddle a 4GB boundary. + * -+ * @kbdev: Kbase device -+ * @csffw_cycle: Cycle number of a CSFFW event -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP( \ -+ kbdev, \ -+ csffw_cycle \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_csffw_fw_request_sleep( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ csffw_cycle \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP( \ -+ kbdev, \ -+ csffw_cycle \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ -+ -+/** -+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP - CSF FW wake up is requested ++ * The modification of the original Linux function lies in how the computation ++ * of the highest gap address at the desired alignment is performed once the ++ * gap with desirable properties is found. For this purpose a special function ++ * is introduced (@ref align_and_check()) which beside computing the gap end ++ * at the desired alignment also performs additional alignment checks for the ++ * case when the memory is executable shader core memory, for which it is ++ * ensured that the gap does not end on a 4GB boundary, and for the case when ++ * memory needs to be confined within a 4GB chunk. + * -+ * @kbdev: Kbase device -+ * @csffw_cycle: Cycle number of a CSFFW event ++ * Return: address of the found gap end (high limit) if area is found; ++ * -ENOMEM if search is unsuccessful + */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP( \ -+ kbdev, \ -+ csffw_cycle \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_csffw_fw_request_wakeup( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ csffw_cycle \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP( \ -+ kbdev, \ -+ csffw_cycle \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT - CSF FW halt is requested -+ * -+ * @kbdev: Kbase device -+ * @csffw_cycle: Cycle number of a CSFFW event -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT( \ -+ kbdev, \ -+ csffw_cycle \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_csffw_fw_request_halt( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ csffw_cycle \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT( \ -+ kbdev, \ -+ csffw_cycle \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info ++ *info, bool is_shader_code, bool is_same_4gb_page) ++{ ++#if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE) ++ struct mm_struct *mm = current->mm; ++ struct vm_area_struct *vma; ++ unsigned long length, low_limit, high_limit, gap_start, gap_end; + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING - CSF FW is being disabled -+ * -+ * @kbdev: Kbase device -+ * @csffw_cycle: Cycle number of a CSFFW event -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING( \ -+ kbdev, \ -+ csffw_cycle \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_csffw_fw_disabling( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ csffw_cycle \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING( \ -+ kbdev, \ -+ csffw_cycle \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ /* Adjust search length to account for worst case alignment overhead */ ++ length = info->length + info->align_mask; ++ if (length < info->length) ++ return -ENOMEM; + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF - CSF FW is off -+ * -+ * @kbdev: Kbase device -+ * @csffw_cycle: Cycle number of a CSFFW event -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF( \ -+ kbdev, \ -+ csffw_cycle \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_csffw_fw_off( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ csffw_cycle \ -+ ); \ -+ } while (0) -+#else -+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF( \ -+ kbdev, \ -+ csffw_cycle \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ /* ++ * Adjust search limits by the desired length. ++ * See implementation comment at top of unmapped_area(). ++ */ ++ gap_end = info->high_limit; ++ if (gap_end < length) ++ return -ENOMEM; ++ high_limit = gap_end - length; + -+/** -+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW - An overflow has happened with the CSFFW Timeline stream -+ * -+ * @kbdev: Kbase device -+ * @csffw_timestamp: Timestamp of a CSFFW event -+ * @csffw_cycle: Cycle number of a CSFFW event -+ */ -+#if MALI_USE_CSF -+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \ -+ kbdev, \ -+ csffw_timestamp, \ -+ csffw_cycle \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ -+ __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( \ -+ __TL_DISPATCH_STREAM(kbdev, obj), \ -+ csffw_timestamp, \ -+ csffw_cycle \ -+ ); \ -+ } while (0) ++ if (info->low_limit > high_limit) ++ return -ENOMEM; ++ low_limit = info->low_limit + length; ++ ++ /* Check highest gap, which does not precede any rbtree node */ ++ gap_start = mm->highest_vm_end; ++ if (gap_start <= high_limit) { ++ if (align_and_check(&gap_end, gap_start, info, ++ is_shader_code, is_same_4gb_page)) ++ return gap_end; ++ } ++ ++ /* Check if rbtree root looks promising */ ++ if (RB_EMPTY_ROOT(&mm->mm_rb)) ++ return -ENOMEM; ++ vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb); ++ if (vma->rb_subtree_gap < length) ++ return -ENOMEM; ++ ++ while (true) { ++ /* Visit right subtree if it looks promising */ ++ gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; ++ if (gap_start <= high_limit && vma->vm_rb.rb_right) { ++ struct vm_area_struct *right = ++ rb_entry(vma->vm_rb.rb_right, ++ struct vm_area_struct, vm_rb); ++ if (right->rb_subtree_gap >= length) { ++ vma = right; ++ continue; ++ } ++ } ++ ++check_current: ++ /* Check if current node has a suitable gap */ ++ gap_end = vma->vm_start; ++ if (gap_end < low_limit) ++ return -ENOMEM; ++ if (gap_start <= high_limit && gap_end - gap_start >= length) { ++ /* We found a suitable gap. Clip it with the original ++ * high_limit. ++ */ ++ if (gap_end > info->high_limit) ++ gap_end = info->high_limit; ++ ++ if (align_and_check(&gap_end, gap_start, info, ++ is_shader_code, is_same_4gb_page)) ++ return gap_end; ++ } ++ ++ /* Visit left subtree if it looks promising */ ++ if (vma->vm_rb.rb_left) { ++ struct vm_area_struct *left = ++ rb_entry(vma->vm_rb.rb_left, ++ struct vm_area_struct, vm_rb); ++ if (left->rb_subtree_gap >= length) { ++ vma = left; ++ continue; ++ } ++ } ++ ++ /* Go back up the rbtree to find next candidate node */ ++ while (true) { ++ struct rb_node *prev = &vma->vm_rb; ++ ++ if (!rb_parent(prev)) ++ return -ENOMEM; ++ vma = rb_entry(rb_parent(prev), ++ struct vm_area_struct, vm_rb); ++ if (prev == vma->vm_rb.rb_right) { ++ gap_start = vma->vm_prev ? ++ vma->vm_prev->vm_end : 0; ++ goto check_current; ++ } ++ } ++ } +#else -+#define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \ -+ kbdev, \ -+ csffw_timestamp, \ -+ csffw_cycle \ -+ ) \ -+ do { } while (0) -+#endif /* MALI_USE_CSF */ ++ unsigned long length, high_limit, gap_start, gap_end; + -+/** -+ * KBASE_TLSTREAM_AUX_PM_STATE - PM state -+ * -+ * @kbdev: Kbase device -+ * @core_type: Core type (shader, tiler, l2 cache, l3 cache) -+ * @core_state_bitset: 64bits bitmask reporting power state of the cores (1-ON, 0-OFF) -+ */ -+#define KBASE_TLSTREAM_AUX_PM_STATE( \ -+ kbdev, \ -+ core_type, \ -+ core_state_bitset \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_aux_pm_state( \ -+ __TL_DISPATCH_STREAM(kbdev, aux), \ -+ core_type, \ -+ core_state_bitset \ -+ ); \ -+ } while (0) ++ MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); ++ /* Adjust search length to account for worst case alignment overhead */ ++ length = info->length + info->align_mask; ++ if (length < info->length) ++ return -ENOMEM; + -+/** -+ * KBASE_TLSTREAM_AUX_PAGEFAULT - Page fault -+ * -+ * @kbdev: Kbase device -+ * @ctx_nr: Kernel context number -+ * @as_nr: Address space number -+ * @page_cnt_change: Number of pages to be added -+ */ -+#define KBASE_TLSTREAM_AUX_PAGEFAULT( \ -+ kbdev, \ -+ ctx_nr, \ -+ as_nr, \ -+ page_cnt_change \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_aux_pagefault( \ -+ __TL_DISPATCH_STREAM(kbdev, aux), \ -+ ctx_nr, \ -+ as_nr, \ -+ page_cnt_change \ -+ ); \ -+ } while (0) ++ /* ++ * Adjust search limits by the desired length. ++ * See implementation comment at top of unmapped_area(). ++ */ ++ gap_end = info->high_limit; ++ if (gap_end < length) ++ return -ENOMEM; ++ high_limit = gap_end - length; + -+/** -+ * KBASE_TLSTREAM_AUX_PAGESALLOC - Total alloc pages change -+ * -+ * @kbdev: Kbase device -+ * @ctx_nr: Kernel context number -+ * @page_cnt: Number of pages used by the context -+ */ -+#define KBASE_TLSTREAM_AUX_PAGESALLOC( \ -+ kbdev, \ -+ ctx_nr, \ -+ page_cnt \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_aux_pagesalloc( \ -+ __TL_DISPATCH_STREAM(kbdev, aux), \ -+ ctx_nr, \ -+ page_cnt \ -+ ); \ -+ } while (0) ++ if (info->low_limit > high_limit) ++ return -ENOMEM; + -+/** -+ * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET - New device frequency target -+ * -+ * @kbdev: Kbase device -+ * @target_freq: New target frequency -+ */ -+#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET( \ -+ kbdev, \ -+ target_freq \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_aux_devfreq_target( \ -+ __TL_DISPATCH_STREAM(kbdev, aux), \ -+ target_freq \ -+ ); \ -+ } while (0) ++ while (true) { ++ if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, length)) ++ return -ENOMEM; ++ gap_end = mas.last + 1; ++ gap_start = mas.min; + -+/** -+ * KBASE_TLSTREAM_AUX_JIT_STATS - per-bin JIT statistics -+ * -+ * @kbdev: Kbase device -+ * @ctx_nr: Kernel context number -+ * @bid: JIT bin id -+ * @max_allocs: Maximum allocations allowed in this bin. -+ * @allocs: Number of active allocations in this bin -+ * @va_pages: Number of virtual pages allocated in this bin -+ * @ph_pages: Number of physical pages allocated in this bin -+ */ -+#define KBASE_TLSTREAM_AUX_JIT_STATS( \ -+ kbdev, \ -+ ctx_nr, \ -+ bid, \ -+ max_allocs, \ -+ allocs, \ -+ va_pages, \ -+ ph_pages \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_aux_jit_stats( \ -+ __TL_DISPATCH_STREAM(kbdev, aux), \ -+ ctx_nr, \ -+ bid, \ -+ max_allocs, \ -+ allocs, \ -+ va_pages, \ -+ ph_pages \ -+ ); \ -+ } while (0) ++ if (align_and_check(&gap_end, gap_start, info, is_shader_code, is_same_4gb_page)) ++ return gap_end; ++ } ++#endif ++ return -ENOMEM; ++} + -+/** -+ * KBASE_TLSTREAM_AUX_TILER_HEAP_STATS - Tiler Heap statistics -+ * -+ * @kbdev: Kbase device -+ * @ctx_nr: Kernel context number -+ * @heap_id: Unique id used to represent a heap under a context -+ * @va_pages: Number of virtual pages allocated in this bin -+ * @ph_pages: Number of physical pages allocated in this bin -+ * @max_chunks: The maximum number of chunks that the heap should be allowed to use -+ * @chunk_size: Size of each chunk in tiler heap, in bytes -+ * @chunk_count: The number of chunks currently allocated in the tiler heap -+ * @target_in_flight: Number of render-passes that the driver should attempt to keep in flight for which allocation of new chunks is allowed -+ * @nr_in_flight: Number of render-passes that are in flight -+ */ -+#define KBASE_TLSTREAM_AUX_TILER_HEAP_STATS( \ -+ kbdev, \ -+ ctx_nr, \ -+ heap_id, \ -+ va_pages, \ -+ ph_pages, \ -+ max_chunks, \ -+ chunk_size, \ -+ chunk_count, \ -+ target_in_flight, \ -+ nr_in_flight \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_aux_tiler_heap_stats( \ -+ __TL_DISPATCH_STREAM(kbdev, aux), \ -+ ctx_nr, \ -+ heap_id, \ -+ va_pages, \ -+ ph_pages, \ -+ max_chunks, \ -+ chunk_size, \ -+ chunk_count, \ -+ target_in_flight, \ -+ nr_in_flight \ -+ ); \ -+ } while (0) + -+/** -+ * KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT - event on a given job slot -+ * -+ * @kbdev: Kbase device -+ * @ctx: Name of the context object -+ * @slot_nr: Job slot number -+ * @atom_nr: Sequential number of an atom -+ * @event: Event type. One of TL_JS_EVENT values ++/* This function is based on Linux kernel's arch_get_unmapped_area, but ++ * simplified slightly. Modifications come from the fact that some values ++ * about the memory area are known in advance. + */ -+#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT( \ -+ kbdev, \ -+ ctx, \ -+ slot_nr, \ -+ atom_nr, \ -+ event \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_aux_event_job_slot( \ -+ __TL_DISPATCH_STREAM(kbdev, aux), \ -+ ctx, \ -+ slot_nr, \ -+ atom_nr, \ -+ event \ -+ ); \ -+ } while (0) ++unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, ++ const unsigned long addr, const unsigned long len, ++ const unsigned long pgoff, const unsigned long flags) ++{ ++ struct mm_struct *mm = current->mm; ++ struct vm_unmapped_area_info info; ++ unsigned long align_offset = 0; ++ unsigned long align_mask = 0; ++#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) ++ unsigned long high_limit = arch_get_mmap_base(addr, mm->mmap_base); ++ unsigned long low_limit = max_t(unsigned long, PAGE_SIZE, kbase_mmap_min_addr); ++#else ++ unsigned long high_limit = mm->mmap_base; ++ unsigned long low_limit = PAGE_SIZE; ++#endif ++ int cpu_va_bits = BITS_PER_LONG; ++ int gpu_pc_bits = ++ kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; ++ bool is_shader_code = false; ++ bool is_same_4gb_page = false; ++ unsigned long ret; + -+/** -+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START - enter protected mode start -+ * -+ * @kbdev: Kbase device -+ * @gpu: Name of the GPU object -+ */ -+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START( \ -+ kbdev, \ -+ gpu \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_aux_protected_enter_start( \ -+ __TL_DISPATCH_STREAM(kbdev, aux), \ -+ gpu \ -+ ); \ -+ } while (0) ++ /* the 'nolock' form is used here: ++ * - the base_pfn of the SAME_VA zone does not change ++ * - in normal use, va_size_pages is constant once the first allocation ++ * begins ++ * ++ * However, in abnormal use this function could be processing whilst ++ * another new zone is being setup in a different thread (e.g. to ++ * borrow part of the SAME_VA zone). In the worst case, this path may ++ * witness a higher SAME_VA end_pfn than the code setting up the new ++ * zone. ++ * ++ * This is safe because once we reach the main allocation functions, ++ * we'll see the updated SAME_VA end_pfn and will determine that there ++ * is no free region at the address found originally by too large a ++ * same_va_end_addr here, and will fail the allocation gracefully. ++ */ ++ struct kbase_reg_zone *zone = ++ kbase_ctx_reg_zone_get_nolock(kctx, KBASE_REG_ZONE_SAME_VA); ++ u64 same_va_end_addr = kbase_reg_zone_end_pfn(zone) << PAGE_SHIFT; ++#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) ++ const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags); + -+/** -+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END - enter protected mode end -+ * -+ * @kbdev: Kbase device -+ * @gpu: Name of the GPU object -+ */ -+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END( \ -+ kbdev, \ -+ gpu \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_aux_protected_enter_end( \ -+ __TL_DISPATCH_STREAM(kbdev, aux), \ -+ gpu \ -+ ); \ -+ } while (0) ++ /* requested length too big for entire address space */ ++ if (len > mmap_end - kbase_mmap_min_addr) ++ return -ENOMEM; ++#endif + -+/** -+ * KBASE_TLSTREAM_AUX_MMU_COMMAND - mmu commands with synchronicity info -+ * -+ * @kbdev: Kbase device -+ * @kernel_ctx_id: Unique ID for the KBase Context -+ * @mmu_cmd_id: MMU Command ID (e.g AS_COMMAND_UPDATE) -+ * @mmu_synchronicity: Indicates whether the command is related to current running job that needs to be resolved to make it progress (synchronous, e.g. grow on page fault, JIT) or not (asynchronous, e.g. IOCTL calls from user-space). This param will be 0 if it is an asynchronous operation. -+ * @mmu_lock_addr: start address of regions to be locked/unlocked/invalidated -+ * @mmu_lock_page_num: number of pages to be locked/unlocked/invalidated -+ */ -+#define KBASE_TLSTREAM_AUX_MMU_COMMAND( \ -+ kbdev, \ -+ kernel_ctx_id, \ -+ mmu_cmd_id, \ -+ mmu_synchronicity, \ -+ mmu_lock_addr, \ -+ mmu_lock_page_num \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_aux_mmu_command( \ -+ __TL_DISPATCH_STREAM(kbdev, aux), \ -+ kernel_ctx_id, \ -+ mmu_cmd_id, \ -+ mmu_synchronicity, \ -+ mmu_lock_addr, \ -+ mmu_lock_page_num \ -+ ); \ -+ } while (0) ++ /* err on fixed address */ ++ if ((flags & MAP_FIXED) || addr) ++ return -EINVAL; + -+/** -+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START - leave protected mode start -+ * -+ * @kbdev: Kbase device -+ * @gpu: Name of the GPU object -+ */ -+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START( \ -+ kbdev, \ -+ gpu \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ -+ __kbase_tlstream_aux_protected_leave_start( \ -+ __TL_DISPATCH_STREAM(kbdev, aux), \ -+ gpu \ -+ ); \ -+ } while (0) ++#if IS_ENABLED(CONFIG_64BIT) ++ /* too big? */ ++ if (len > TASK_SIZE - SZ_2M) ++ return -ENOMEM; + -+/** -+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END - leave protected mode end -+ * -+ * @kbdev: Kbase device -+ * @gpu: Name of the GPU object -+ */ -+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END( \ -+ kbdev, \ -+ gpu \ -+ ) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ -+ __kbase_tlstream_aux_protected_leave_end( \ -+ __TL_DISPATCH_STREAM(kbdev, aux), \ -+ gpu \ -+ ); \ -+ } while (0) ++ if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { ++ high_limit = ++ min_t(unsigned long, high_limit, same_va_end_addr); + -+/* Gator tracepoints are hooked into TLSTREAM interface. -+ * When the following tracepoints are called, corresponding -+ * Gator tracepoint will be called as well. -+ */ ++ /* If there's enough (> 33 bits) of GPU VA space, align ++ * to 2MB boundaries. ++ */ ++ if (kctx->kbdev->gpu_props.mmu.va_bits > 33) { ++ if (len >= SZ_2M) { ++ align_offset = SZ_2M; ++ align_mask = SZ_2M - 1; ++ } ++ } + -+#if defined(CONFIG_MALI_BIFROST_GATOR_SUPPORT) -+/* `event` is one of TL_JS_EVENT values here. -+ * The values of TL_JS_EVENT are guaranteed to match -+ * with corresponding GATOR_JOB_SLOT values. -+ */ -+#undef KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT -+#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, \ -+ context, slot_nr, atom_nr, event) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ kbase_trace_mali_job_slots_event(kbdev->id, \ -+ GATOR_MAKE_EVENT(event, slot_nr), \ -+ context, (u8) atom_nr); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_aux_event_job_slot( \ -+ __TL_DISPATCH_STREAM(kbdev, aux), \ -+ context, slot_nr, atom_nr, event); \ -+ } while (0) ++ low_limit = SZ_2M; ++ } else { ++ cpu_va_bits = 32; ++ } ++#endif /* CONFIG_64BIT */ ++ if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) && ++ (PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) { ++ int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); ++ struct kbase_va_region *reg; + -+#undef KBASE_TLSTREAM_AUX_PM_STATE -+#define KBASE_TLSTREAM_AUX_PM_STATE(kbdev, core_type, state) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ kbase_trace_mali_pm_status(kbdev->id, \ -+ core_type, state); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_aux_pm_state( \ -+ __TL_DISPATCH_STREAM(kbdev, aux), \ -+ core_type, state); \ -+ } while (0) ++ /* Need to hold gpu vm lock when using reg */ ++ kbase_gpu_vm_lock(kctx); ++ reg = kctx->pending_regions[cookie]; ++ if (!reg) { ++ kbase_gpu_vm_unlock(kctx); ++ return -EINVAL; ++ } ++ if (!(reg->flags & KBASE_REG_GPU_NX)) { ++ if (cpu_va_bits > gpu_pc_bits) { ++ align_offset = 1ULL << gpu_pc_bits; ++ align_mask = align_offset - 1; ++ is_shader_code = true; ++ } ++#if !MALI_USE_CSF ++ } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { ++ unsigned long extension_bytes = ++ (unsigned long)(reg->extension ++ << PAGE_SHIFT); ++ /* kbase_check_alloc_sizes() already satisfies ++ * these checks, but they're here to avoid ++ * maintenance hazards due to the assumptions ++ * involved ++ */ ++ WARN_ON(reg->extension > ++ (ULONG_MAX >> PAGE_SHIFT)); ++ WARN_ON(reg->initial_commit > (ULONG_MAX >> PAGE_SHIFT)); ++ WARN_ON(!is_power_of_2(extension_bytes)); ++ align_mask = extension_bytes - 1; ++ align_offset = ++ extension_bytes - ++ (reg->initial_commit << PAGE_SHIFT); ++#endif /* !MALI_USE_CSF */ ++ } else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) { ++ is_same_4gb_page = true; ++ } ++ kbase_gpu_vm_unlock(kctx); ++#ifndef CONFIG_64BIT ++ } else { ++ return current->mm->get_unmapped_area( ++ kctx->filp, addr, len, pgoff, flags); ++#endif ++ } + -+#undef KBASE_TLSTREAM_AUX_PAGEFAULT -+#define KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, \ -+ ctx_nr, as_nr, page_cnt_change) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ kbase_trace_mali_page_fault_insert_pages(kbdev->id, \ -+ as_nr, \ -+ page_cnt_change); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_aux_pagefault( \ -+ __TL_DISPATCH_STREAM(kbdev, aux), \ -+ ctx_nr, as_nr, page_cnt_change); \ -+ } while (0) ++ info.flags = 0; ++ info.length = len; ++ info.low_limit = low_limit; ++ info.high_limit = high_limit; ++ info.align_offset = align_offset; ++ info.align_mask = align_mask; + -+/* kbase_trace_mali_total_alloc_pages_change is handled differently here. -+ * We stream the total amount of pages allocated for `kbdev` rather -+ * than `page_count`, which is per-context. -+ */ -+#undef KBASE_TLSTREAM_AUX_PAGESALLOC -+#define KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, ctx_nr, page_cnt) \ -+ do { \ -+ int enabled = atomic_read(&kbdev->timeline_flags); \ -+ u32 global_pages_count = \ -+ atomic_read(&kbdev->memdev.used_pages); \ -+ \ -+ kbase_trace_mali_total_alloc_pages_change(kbdev->id, \ -+ global_pages_count); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_aux_pagesalloc( \ -+ __TL_DISPATCH_STREAM(kbdev, aux), \ -+ ctx_nr, page_cnt); \ -+ } while (0) -+#endif /* CONFIG_MALI_BIFROST_GATOR_SUPPORT */ ++ ret = kbase_unmapped_area_topdown(&info, is_shader_code, ++ is_same_4gb_page); + -+/* clang-format on */ ++ if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base && ++ high_limit < same_va_end_addr) { ++#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) ++ /* Retry above TASK_UNMAPPED_BASE */ ++ info.low_limit = TASK_UNMAPPED_BASE; ++ info.high_limit = min_t(u64, mmap_end, same_va_end_addr); ++#else ++ /* Retry above mmap_base */ ++ info.low_limit = mm->mmap_base; ++ info.high_limit = min_t(u64, TASK_SIZE, same_va_end_addr); +#endif -diff --git a/drivers/gpu/arm/mali400/.gitignore b/drivers/gpu/arm/mali400/.gitignore -new file mode 100755 -index 000000000..d91c8078a ---- /dev/null -+++ b/drivers/gpu/arm/mali400/.gitignore -@@ -0,0 +1 @@ -+./mali/__malidrv_build_info.c -diff --git a/drivers/gpu/arm/mali400/Kbuild b/drivers/gpu/arm/mali400/Kbuild -new file mode 100755 -index 000000000..dbb7ad3e5 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/Kbuild -@@ -0,0 +1,2 @@ -+# SPDX-License-Identifier: GPL-2.0 -+obj-y += mali/ -diff --git a/drivers/gpu/arm/mali400/mali/.gitignore b/drivers/gpu/arm/mali400/mali/.gitignore -new file mode 100755 -index 000000000..6b1a3ed27 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/.gitignore -@@ -0,0 +1 @@ -+__malidrv_build_info.c -diff --git a/drivers/gpu/arm/mali400/mali/Kbuild b/drivers/gpu/arm/mali400/mali/Kbuild ++ ++ ret = kbase_unmapped_area_topdown(&info, is_shader_code, ++ is_same_4gb_page); ++ } ++ ++ return ret; ++} +diff --git a/drivers/gpu/arm/bifrost/tl/Kbuild b/drivers/gpu/arm/bifrost/tl/Kbuild new file mode 100755 -index 000000000..7390ab758 +index 000000000..1c684d489 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/Kbuild -@@ -0,0 +1,254 @@ ++++ b/drivers/gpu/arm/bifrost/tl/Kbuild +@@ -0,0 +1,32 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. +# -+# Copyright (C) 2010-2011 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained from Free Software -+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# + -+# This file is called by the Linux build system. ++bifrost_kbase-y += \ ++ tl/mali_kbase_timeline.o \ ++ tl/mali_kbase_timeline_io.o \ ++ tl/mali_kbase_tlstream.o \ ++ tl/mali_kbase_tracepoints.o + -+# make $(src) as absolute path if it isn't already, by prefixing $(srctree) -+src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) + -+# set up defaults if not defined by the user -+TIMESTAMP ?= default -+OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB ?= 16 -+USING_GPU_UTILIZATION ?= 1 -+PROFILING_SKIP_PP_JOBS ?= 0 -+PROFILING_SKIP_PP_AND_GP_JOBS ?= 0 -+MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP ?= 0 -+MALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED ?= 0 -+MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS ?= 0 -+MALI_UPPER_HALF_SCHEDULING ?= 1 -+MALI_ENABLE_CPU_CYCLES ?= 0 -+MALI_PLATFORM ?= rk ++ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) ++ bifrost_kbase-y += tl/backend/mali_kbase_timeline_csf.o ++else ++ bifrost_kbase-y += tl/backend/mali_kbase_timeline_jm.o ++endif +diff --git a/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c +new file mode 100644 +index 000000000..a6062f170 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c +@@ -0,0 +1,190 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+# For customer releases the Linux Device Drivers will be provided as ARM proprietary and GPL releases: -+# The ARM proprietary product will only include the license/proprietary directory -+# The GPL product will only include the license/gpl directory -+ccflags-y += -I$(src)/linux/license/gpl ++#include ++#include ++#include + ++#include + -+ifeq ($(USING_GPU_UTILIZATION), 1) -+ ifeq ($(USING_DVFS), 1) -+ $(error USING_GPU_UTILIZATION conflict with USING_DVFS you can read the Integration Guide to choose which one do you need) -+ endif -+endif ++#define GPU_FEATURES_CROSS_STREAM_SYNC_MASK (1ull << 3ull) + -+ifneq ($(MALI_PLATFORM),) -+ EXTRA_DEFINES += -DMALI_FAKE_PLATFORM_DEVICE=1 -+ #MALI_PLATFORM_FILES = $(wildcard platform/$(MALI_PLATFORM)/*.c) -+ mali-y += \ -+ platform/$(MALI_PLATFORM)/rk.o -+endif ++void kbase_create_timeline_objects(struct kbase_device *kbdev) ++{ ++ unsigned int as_nr; ++ unsigned int slot_i; ++ struct kbase_context *kctx; ++ struct kbase_timeline *timeline = kbdev->timeline; ++ struct kbase_tlstream *summary = ++ &kbdev->timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]; ++ u32 const kbdev_has_cross_stream_sync = ++ (kbdev->gpu_props.props.raw_props.gpu_features & ++ GPU_FEATURES_CROSS_STREAM_SYNC_MASK) ? ++ 1 : ++ 0; ++ u32 const arch_maj = (kbdev->gpu_props.props.raw_props.gpu_id & ++ GPU_ID2_ARCH_MAJOR) >> ++ GPU_ID2_ARCH_MAJOR_SHIFT; ++ u32 const num_sb_entries = arch_maj >= 11 ? 16 : 8; ++ u32 const supports_gpu_sleep = ++#ifdef KBASE_PM_RUNTIME ++ kbdev->pm.backend.gpu_sleep_supported; ++#else ++ false; ++#endif /* KBASE_PM_RUNTIME */ + -+ifeq ($(MALI_PLATFORM_FILES),) -+ifeq ($(CONFIG_ARCH_EXYNOS4),y) -+EXTRA_DEFINES += -DMALI_FAKE_PLATFORM_DEVICE=1 -+export MALI_PLATFORM=exynos4 -+export MALI_PLATFORM_FILES_BUILDIN = $(notdir $(wildcard $(src)/platform/$(MALI_PLATFORM)/*.c)) -+export MALI_PLATFORM_FILES_ADD_PREFIX = $(addprefix platform/$(MALI_PLATFORM)/,$(MALI_PLATFORM_FILES_BUILDIN)) -+endif -+endif ++ /* Summarize the Address Space objects. */ ++ for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) ++ __kbase_tlstream_tl_new_as(summary, &kbdev->as[as_nr], as_nr); + -+mali-y += \ -+ linux/mali_osk_atomics.o \ -+ linux/mali_osk_irq.o \ -+ linux/mali_osk_wq.o \ -+ linux/mali_osk_locks.o \ -+ linux/mali_osk_wait_queue.o \ -+ linux/mali_osk_low_level_mem.o \ -+ linux/mali_osk_math.o \ -+ linux/mali_osk_memory.o \ -+ linux/mali_osk_misc.o \ -+ linux/mali_osk_mali.o \ -+ linux/mali_osk_notification.o \ -+ linux/mali_osk_time.o \ -+ linux/mali_osk_timers.o \ -+ linux/mali_osk_bitmap.o ++ /* Create Legacy GPU object to track in AOM for dumping */ ++ __kbase_tlstream_tl_new_gpu(summary, ++ kbdev, ++ kbdev->gpu_props.props.raw_props.gpu_id, ++ kbdev->gpu_props.num_cores); + -+mali-y += linux/mali_memory.o linux/mali_memory_os_alloc.o -+mali-y += linux/mali_memory_external.o -+mali-y += linux/mali_memory_block_alloc.o -+mali-y += linux/mali_memory_swap_alloc.o + -+mali-y += \ -+ linux/mali_memory_manager.o \ -+ linux/mali_memory_virtual.o \ -+ linux/mali_memory_util.o \ -+ linux/mali_memory_cow.o \ -+ linux/mali_memory_defer_bind.o ++ for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) ++ __kbase_tlstream_tl_lifelink_as_gpu(summary, ++ &kbdev->as[as_nr], ++ kbdev); + -+mali-y += \ -+ linux/mali_ukk_mem.o \ -+ linux/mali_ukk_gp.o \ -+ linux/mali_ukk_pp.o \ -+ linux/mali_ukk_core.o \ -+ linux/mali_ukk_soft_job.o \ -+ linux/mali_ukk_timeline.o ++ /* Trace the creation of a new kbase device and set its properties. */ ++ __kbase_tlstream_tl_kbase_new_device(summary, kbdev->gpu_props.props.raw_props.gpu_id, ++ kbdev->gpu_props.num_cores, ++ kbdev->csf.global_iface.group_num, ++ kbdev->nr_hw_address_spaces, num_sb_entries, ++ kbdev_has_cross_stream_sync, supports_gpu_sleep); + -+mali-$(CONFIG_MALI_DEVFREQ) += \ -+ linux/mali_devfreq.o \ -+ common/mali_pm_metrics.o ++ /* Lock the context list, to ensure no changes to the list are made ++ * while we're summarizing the contexts and their contents. ++ */ ++ mutex_lock(&timeline->tl_kctx_list_lock); + -+# Source files which always are included in a build -+mali-y += \ -+ common/mali_kernel_core.o \ -+ linux/mali_kernel_linux.o \ -+ common/mali_session.o \ -+ linux/mali_device_pause_resume.o \ -+ common/mali_kernel_vsync.o \ -+ linux/mali_ukk_vsync.o \ -+ linux/mali_kernel_sysfs.o \ -+ common/mali_mmu.o \ -+ common/mali_mmu_page_directory.o \ -+ common/mali_mem_validation.o \ -+ common/mali_hw_core.o \ -+ common/mali_gp.o \ -+ common/mali_pp.o \ -+ common/mali_pp_job.o \ -+ common/mali_gp_job.o \ -+ common/mali_soft_job.o \ -+ common/mali_scheduler.o \ -+ common/mali_executor.o \ -+ common/mali_group.o \ -+ common/mali_dlbu.o \ -+ common/mali_broadcast.o \ -+ common/mali_pm.o \ -+ common/mali_pmu.o \ -+ common/mali_user_settings_db.o \ -+ common/mali_kernel_utilization.o \ -+ common/mali_control_timer.o \ -+ common/mali_l2_cache.o \ -+ common/mali_timeline.o \ -+ common/mali_timeline_fence_wait.o \ -+ common/mali_timeline_sync_fence.o \ -+ common/mali_spinlock_reentrant.o \ -+ common/mali_pm_domain.o \ -+ linux/mali_osk_pm.o \ -+ linux/mali_pmu_power_up_down.o \ -+ __malidrv_build_info.o ++ /* Hold the scheduler lock while we emit the current state ++ * We also need to continue holding the lock until after the first body ++ * stream tracepoints are emitted to ensure we don't change the ++ * scheduler until after then ++ */ ++ mutex_lock(&kbdev->csf.scheduler.lock); + -+ifneq ($(wildcard $(src)/linux/mali_slp_global_lock.c),) -+ mali-y += linux/mali_slp_global_lock.o -+endif ++ for (slot_i = 0; slot_i < kbdev->csf.global_iface.group_num; slot_i++) { + -+ifneq ($(MALI_PLATFORM_FILES),) -+ mali-y += $(MALI_PLATFORM_FILES:.c=.o) -+endif ++ struct kbase_queue_group *group = ++ kbdev->csf.scheduler.csg_slots[slot_i].resident_group; + -+ifneq ($(MALI_PLATFORM_FILES_ADD_PREFIX),) -+ mali-y += $(MALI_PLATFORM_FILES_ADD_PREFIX:.c=.o) -+endif ++ if (group) ++ __kbase_tlstream_tl_kbase_device_program_csg( ++ summary, ++ kbdev->gpu_props.props.raw_props.gpu_id, ++ group->kctx->id, group->handle, slot_i, 0); ++ } + -+mali-$(CONFIG_MALI400_PROFILING) += linux/mali_ukk_profiling.o -+mali-$(CONFIG_MALI400_PROFILING) += linux/mali_osk_profiling.o ++ /* Reset body stream buffers while holding the kctx lock. ++ * As we are holding the lock, we can guarantee that no kctx creation or ++ * deletion tracepoints can be fired from outside of this function by ++ * some other thread. ++ */ ++ kbase_timeline_streams_body_reset(timeline); + -+mali-$(CONFIG_MALI400_INTERNAL_PROFILING) += linux/mali_profiling_internal.o timestamp-$(TIMESTAMP)/mali_timestamp.o -+ccflags-$(CONFIG_MALI400_INTERNAL_PROFILING) += -I$(src)/timestamp-$(TIMESTAMP) ++ mutex_unlock(&kbdev->csf.scheduler.lock); + -+mali-$(CONFIG_DMA_SHARED_BUFFER) += linux/mali_memory_dma_buf.o -+mali-$(CONFIG_DMA_SHARED_BUFFER) += linux/mali_memory_secure.o -+mali-$(CONFIG_SYNC) += linux/mali_sync.o -+mali-$(CONFIG_SYNC) += linux/mali_internal_sync.o -+mali-$(CONFIG_SYNC_FILE) += linux/mali_sync.o -+mali-$(CONFIG_SYNC_FILE) += linux/mali_internal_sync.o -+mali-$(CONFIG_MALI_DMA_BUF_FENCE) += linux/mali_dma_fence.o -+ccflags-$(CONFIG_SYNC) += -Idrivers/staging/android -+ccflags-$(CONFIG_SYNC_FILE) += -Idrivers/staging/android ++ /* For each context in the device... */ ++ list_for_each_entry(kctx, &timeline->tl_kctx_list, tl_kctx_list_node) { ++ size_t i; ++ struct kbase_tlstream *body = ++ &timeline->streams[TL_STREAM_TYPE_OBJ]; + -+mali-$(CONFIG_MALI400_UMP) += linux/mali_memory_ump.o ++ /* Lock the context's KCPU queues, to ensure no KCPU-queue ++ * related actions can occur in this context from now on. ++ */ ++ mutex_lock(&kctx->csf.kcpu_queues.lock); + -+mali-$(CONFIG_MALI_DVFS) += common/mali_dvfs_policy.o ++ /* Acquire the MMU lock, to ensure we don't get a concurrent ++ * address space assignment while summarizing this context's ++ * address space. ++ */ ++ mutex_lock(&kbdev->mmu_hw_mutex); + -+# Tell the Linux build system from which .o file to create the kernel module -+obj-$(CONFIG_MALI400) := mali.o ++ /* Trace the context itself into the body stream, not the ++ * summary stream. ++ * We place this in the body to ensure it is ordered after any ++ * other tracepoints related to the contents of the context that ++ * might have been fired before acquiring all of the per-context ++ * locks. ++ * This ensures that those tracepoints will not actually affect ++ * the object model state, as they reference a context that ++ * hasn't been traced yet. They may, however, cause benign ++ * errors to be emitted. ++ */ ++ __kbase_tlstream_tl_kbase_new_ctx(body, kctx->id, ++ kbdev->gpu_props.props.raw_props.gpu_id); + -+ccflags-y += $(EXTRA_DEFINES) ++ /* Also trace with the legacy AOM tracepoint for dumping */ ++ __kbase_tlstream_tl_new_ctx(body, ++ kctx, ++ kctx->id, ++ (u32)(kctx->tgid)); + -+# Set up our defines, which will be passed to gcc -+ccflags-y += -DMALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP=$(MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP) -+ccflags-y += -DMALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED=$(MALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED) -+ccflags-y += -DMALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS=$(MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS) -+ccflags-y += -DMALI_STATE_TRACKING=1 -+ccflags-y += -DMALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB=$(OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB) -+ccflags-y += -DUSING_GPU_UTILIZATION=$(USING_GPU_UTILIZATION) -+ccflags-y += -DMALI_ENABLE_CPU_CYCLES=$(MALI_ENABLE_CPU_CYCLES) ++ /* Trace the currently assigned address space */ ++ if (kctx->as_nr != KBASEP_AS_NR_INVALID) ++ __kbase_tlstream_tl_kbase_ctx_assign_as(body, kctx->id, ++ kctx->as_nr); + -+ifeq ($(MALI_UPPER_HALF_SCHEDULING),1) -+ ccflags-y += -DMALI_UPPER_HALF_SCHEDULING -+endif + -+#build-in include path is different -+ifeq ($(MALI_PLATFORM_FILES),) -+ccflags-$(CONFIG_MALI400_UMP) += -I$(src)/../ump/include/ -+else -+ccflags-$(CONFIG_MALI400_UMP) += -I$(src)/../../ump/include/ump -+endif -+ccflags-$(CONFIG_MALI400_DEBUG) += -DDEBUG ++ /* Trace all KCPU queues in the context into the body stream. ++ * As we acquired the KCPU lock after resetting the body stream, ++ * it's possible that some KCPU-related events for this context ++ * occurred between that reset and now. ++ * These will cause errors to be emitted when parsing the ++ * timeline, but they will not affect the correctness of the ++ * object model. ++ */ ++ for (i = 0; i < KBASEP_MAX_KCPU_QUEUES; i++) { ++ const struct kbase_kcpu_command_queue *kcpu_queue = ++ kctx->csf.kcpu_queues.array[i]; + -+# Use our defines when compiling -+ccflags-y += -I$(src) -I$(src)/include -I$(src)/common -I$(src)/linux -I$(src)/platform -Wno-date-time ++ if (kcpu_queue) ++ __kbase_tlstream_tl_kbase_new_kcpuqueue( ++ body, kcpu_queue, kcpu_queue->id, kcpu_queue->kctx->id, ++ kcpu_queue->num_pending_cmds); ++ } + -+# Get subversion revision number, fall back to only ${MALI_RELEASE_NAME} if no svn info is available -+MALI_RELEASE_NAME=$(shell cat $(src)/.version 2> /dev/null) ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ mutex_unlock(&kctx->csf.kcpu_queues.lock); + -+SVN_INFO = (cd $(src); svn info 2>/dev/null) ++ /* Now that all per-context locks for this context have been ++ * released, any per-context tracepoints that are fired from ++ * any other threads will go into the body stream after ++ * everything that was just summarised into the body stream in ++ * this iteration of the loop, so will start to correctly update ++ * the object model state. ++ */ ++ } + -+ifneq ($(shell $(SVN_INFO) 2>/dev/null),) -+# SVN detected -+SVN_REV := $(shell $(SVN_INFO) | grep '^Revision: '| sed -e 's/^Revision: //' 2>/dev/null) -+DRIVER_REV := $(MALI_RELEASE_NAME)-r$(SVN_REV) -+CHANGE_DATE := $(shell $(SVN_INFO) | grep '^Last Changed Date: ' | cut -d: -f2- | cut -b2-) -+CHANGED_REVISION := $(shell $(SVN_INFO) | grep '^Last Changed Rev: ' | cut -d: -f2- | cut -b2-) -+REPO_URL := $(shell $(SVN_INFO) | grep '^URL: ' | cut -d: -f2- | cut -b2-) ++ mutex_unlock(&timeline->tl_kctx_list_lock); + -+else # SVN -+# GIT_REV := $(shell cd $(src); git describe --always 2>/dev/null) -+ifneq ($(GIT_REV),) -+# Git detected -+DRIVER_REV := $(MALI_RELEASE_NAME)-$(GIT_REV) -+CHANGE_DATE := $(shell cd $(src); git log -1 --format="%ci") -+CHANGED_REVISION := $(GIT_REV) -+REPO_URL := $(shell cd $(src); git describe --all --always 2>/dev/null) ++ /* Static object are placed into summary packet that needs to be ++ * transmitted first. Flush all streams to make it available to ++ * user space. ++ */ ++ kbase_timeline_streams_flush(timeline); ++} +diff --git a/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_jm.c b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_jm.c +new file mode 100644 +index 000000000..9ba89f59f +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_jm.c +@@ -0,0 +1,96 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+else # Git -+# No Git or SVN detected -+DRIVER_REV := $(MALI_RELEASE_NAME) -+CHANGE_DATE := $(MALI_RELEASE_NAME) -+CHANGED_REVISION := $(MALI_RELEASE_NAME) -+endif -+endif ++#include ++#include ++#include + -+ccflags-y += -DSVN_REV_STRING=\"$(DRIVER_REV)\" ++#include + -+VERSION_STRINGS := -+VERSION_STRINGS += API_VERSION=$(shell cd $(src); grep "\#define _MALI_API_VERSION" $(FILES_PREFIX)include/linux/mali/mali_utgard_uk_types.h | cut -d' ' -f 3 ) -+VERSION_STRINGS += REPO_URL=$(REPO_URL) -+VERSION_STRINGS += REVISION=$(DRIVER_REV) -+VERSION_STRINGS += CHANGED_REVISION=$(CHANGED_REVISION) -+VERSION_STRINGS += CHANGE_DATE=$(CHANGE_DATE) -+VERSION_STRINGS += BUILD_DATE=$(shell date) -+ifdef CONFIG_MALI400_DEBUG -+VERSION_STRINGS += BUILD=debug -+else -+VERSION_STRINGS += BUILD=release -+endif -+VERSION_STRINGS += TARGET_PLATFORM=$(TARGET_PLATFORM) -+VERSION_STRINGS += MALI_PLATFORM=$(MALI_PLATFORM) -+VERSION_STRINGS += KDIR=$(KDIR) -+VERSION_STRINGS += OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB=$(OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB) -+VERSION_STRINGS += USING_UMP=$(CONFIG_MALI400_UMP) -+VERSION_STRINGS += USING_PROFILING=$(CONFIG_MALI400_PROFILING) -+VERSION_STRINGS += USING_INTERNAL_PROFILING=$(CONFIG_MALI400_INTERNAL_PROFILING) -+VERSION_STRINGS += USING_GPU_UTILIZATION=$(USING_GPU_UTILIZATION) -+VERSION_STRINGS += USING_DVFS=$(CONFIG_MALI_DVFS) -+VERSION_STRINGS += USING_DMA_BUF_FENCE = $(CONFIG_MALI_DMA_BUF_FENCE) -+VERSION_STRINGS += MALI_UPPER_HALF_SCHEDULING=$(MALI_UPPER_HALF_SCHEDULING) ++void kbase_create_timeline_objects(struct kbase_device *kbdev) ++{ ++ unsigned int lpu_id; ++ unsigned int as_nr; ++ struct kbase_context *kctx; ++ struct kbase_timeline *timeline = kbdev->timeline; ++ struct kbase_tlstream *summary = ++ &timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]; + -+# Create file with Mali driver configuration -+$(src)/__malidrv_build_info.c: -+ @echo 'const char *__malidrv_build_info(void) { return "malidrv: $(VERSION_STRINGS)";}' > $(src)/__malidrv_build_info.c -diff --git a/drivers/gpu/arm/mali400/mali/Kconfig b/drivers/gpu/arm/mali400/mali/Kconfig -new file mode 100644 -index 000000000..082919d91 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/Kconfig -@@ -0,0 +1,119 @@ -+# SPDX-License-Identifier: GPL-2.0 -+config MALI400 -+ tristate "Mali-300/400/450 support" -+ depends on ARM || ARM64 -+ select DMA_SHARED_BUFFER -+ help -+ This enables support for the ARM Mali-300, Mali-400, and Mali-450 -+ GPUs. ++ /* Summarize the LPU objects. */ ++ for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { ++ u32 *lpu = ++ &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; ++ __kbase_tlstream_tl_new_lpu(summary, lpu, lpu_id, *lpu); ++ } + -+ To compile this driver as a module, choose M here: the module will be -+ called mali. ++ /* Summarize the Address Space objects. */ ++ for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) ++ __kbase_tlstream_tl_new_as(summary, &kbdev->as[as_nr], as_nr); + -+config MALI450 -+ bool "Enable Mali-450 support" -+ depends on MALI400 -+ help -+ This enables support for Mali-450 specific features. ++ /* Create GPU object and make it retain all LPUs and address spaces. */ ++ __kbase_tlstream_tl_new_gpu(summary, ++ kbdev, ++ kbdev->gpu_props.props.raw_props.gpu_id, ++ kbdev->gpu_props.num_cores); + -+config MALI470 -+ bool "Enable Mali-470 support" -+ depends on MALI400 -+ help -+ This enables support for Mali-470 specific features. ++ for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { ++ void *lpu = ++ &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; ++ __kbase_tlstream_tl_lifelink_lpu_gpu(summary, lpu, kbdev); ++ } + -+config MALI400_DEBUG -+ bool "Enable debug in Mali driver" -+ depends on MALI400 -+ help -+ This enabled extra debug checks and messages in the Mali driver. ++ for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) ++ __kbase_tlstream_tl_lifelink_as_gpu(summary, ++ &kbdev->as[as_nr], ++ kbdev); + -+config MALI400_PROFILING -+ bool "Enable Mali profiling" -+ depends on MALI400 -+ select TRACEPOINTS -+ default y -+ help -+ This enables gator profiling of Mali GPU events. ++ /* Lock the context list, to ensure no changes to the list are made ++ * while we're summarizing the contexts and their contents. ++ */ ++ mutex_lock(&timeline->tl_kctx_list_lock); + -+config MALI400_INTERNAL_PROFILING -+ bool "Enable internal Mali profiling API" -+ depends on MALI400_PROFILING -+ default n -+ help -+ This enables the internal legacy Mali profiling API. ++ /* For each context in the device... */ ++ list_for_each_entry(kctx, &timeline->tl_kctx_list, tl_kctx_list_node) { ++ /* Summarize the context itself */ ++ __kbase_tlstream_tl_new_ctx(summary, ++ kctx, ++ kctx->id, ++ (u32)(kctx->tgid)); ++ } + -+config MALI400_UMP -+ bool "Enable UMP support" -+ depends on MALI400 -+ help -+ This enables support for the UMP memory sharing API in the Mali driver. ++ /* Reset body stream buffers while holding the kctx lock. ++ * This ensures we can't fire both summary and normal tracepoints for ++ * the same objects. ++ * If we weren't holding the lock, it's possible that the summarized ++ * objects could have been created, destroyed, or used after we ++ * constructed the summary stream tracepoints, but before we reset ++ * the body stream, resulting in losing those object event tracepoints. ++ */ ++ kbase_timeline_streams_body_reset(timeline); + -+config MALI_DVFS -+ bool "Enable Mali dynamically frequency change" -+ depends on MALI400 && !MALI_DEVFREQ -+ default y -+ help -+ This enables support for dynamic change frequency of Mali with the goal of lowering power consumption. ++ mutex_unlock(&timeline->tl_kctx_list_lock); + -+config MALI_DMA_BUF_MAP_ON_ATTACH -+ bool "Map dma-buf attachments on attach" -+ depends on MALI400 && DMA_SHARED_BUFFER -+ default y -+ help -+ This makes the Mali driver map dma-buf attachments after doing -+ attach. If this is not set the dma-buf attachments will be mapped for -+ every time the GPU need to access the buffer. ++ /* Static object are placed into summary packet that needs to be ++ * transmitted first. Flush all streams to make it available to ++ * user space. ++ */ ++ kbase_timeline_streams_flush(timeline); ++} +diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c +new file mode 100644 +index 000000000..930decf8f +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c +@@ -0,0 +1,413 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ Mapping for each access can cause lower performance. ++#include "mali_kbase_timeline.h" ++#include "mali_kbase_timeline_priv.h" ++#include "mali_kbase_tracepoints.h" + -+config MALI_SHARED_INTERRUPTS -+ bool "Support for shared interrupts" -+ depends on MALI400 -+ default n -+ help -+ Adds functionality required to properly support shared interrupts. Without this support, -+ the device driver will fail during insmod if it detects shared interrupts. This also -+ works when the GPU is not using shared interrupts, but might have a slight performance -+ impact. ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+config MALI_PMU_PARALLEL_POWER_UP -+ bool "Power up Mali PMU domains in parallel" -+ depends on MALI400 -+ default n -+ help -+ This makes the Mali driver power up all PMU power domains in parallel, instead of -+ powering up domains one by one, with a slight delay in between. Powering on all power -+ domains at the same time may cause peak currents higher than what some systems can handle. -+ These systems must not enable this option. ++/* The period of autoflush checker execution in milliseconds. */ ++#define AUTOFLUSH_INTERVAL 1000 /* ms */ + -+config MALI_DT -+ bool "Using device tree to initialize module" -+ depends on MALI400 && OF -+ default n -+ help -+ This enable the Mali driver to choose the device tree path to get platform resoures -+ and disable the old config method. Mali driver could run on the platform which the -+ device tree is enabled in kernel and corresponding hardware description is implemented -+ properly in device DTS file. ++/*****************************************************************************/ + -+config MALI_DEVFREQ -+ bool "Using devfreq to tuning frequency" -+ depends on MALI400 && PM_DEVFREQ -+ select DEVFREQ_GOV_SIMPLE_ONDEMAND -+ default n -+ help -+ Support devfreq for Mali. ++/* These values are used in mali_kbase_tracepoints.h ++ * to retrieve the streams from a kbase_timeline instance. ++ */ ++const size_t __obj_stream_offset = ++ offsetof(struct kbase_timeline, streams) ++ + sizeof(struct kbase_tlstream) * TL_STREAM_TYPE_OBJ; + -+ Using the devfreq framework and, by default, the simpleondemand -+ governor, the frequency of Mali will be dynamically selected from the -+ available OPPs. ++const size_t __aux_stream_offset = ++ offsetof(struct kbase_timeline, streams) ++ + sizeof(struct kbase_tlstream) * TL_STREAM_TYPE_AUX; + -+config MALI_QUIET -+ bool "Make Mali driver very quiet" -+ depends on MALI400 && !MALI400_DEBUG -+ default n -+ help -+ This forces the Mali driver to never print any messages. ++/** ++ * kbasep_timeline_autoflush_timer_callback - autoflush timer callback ++ * @timer: Timer list ++ * ++ * Timer is executed periodically to check if any of the stream contains ++ * buffer ready to be submitted to user space. ++ */ ++static void kbasep_timeline_autoflush_timer_callback(struct timer_list *timer) ++{ ++ enum tl_stream_type stype; ++ int rcode; ++ struct kbase_timeline *timeline = ++ container_of(timer, struct kbase_timeline, autoflush_timer); + -+ If unsure, say N. -diff --git a/drivers/gpu/arm/mali400/mali/Makefile b/drivers/gpu/arm/mali400/mali/Makefile -new file mode 100644 -index 000000000..0b91321a5 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/Makefile -@@ -0,0 +1,206 @@ -+# -+# Copyright (C) 2010-2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained from Free Software -+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+# ++ CSTD_UNUSED(timer); + -+USE_UMPV2=0 -+USING_PROFILING ?= 1 -+USING_INTERNAL_PROFILING ?= 0 -+USING_DVFS ?= 1 -+USING_DMA_BUF_FENCE ?= 0 -+MALI_HEATMAPS_ENABLED ?= 0 -+MALI_DMA_BUF_MAP_ON_ATTACH ?= 1 -+MALI_PMU_PARALLEL_POWER_UP ?= 0 -+USING_DT ?= 0 -+MALI_MEM_SWAP_TRACKING ?= 0 -+USING_DEVFREQ ?= 0 ++ for (stype = (enum tl_stream_type)0; stype < TL_STREAM_TYPE_COUNT; ++ stype++) { ++ struct kbase_tlstream *stream = &timeline->streams[stype]; + -+# The Makefile sets up "arch" based on the CONFIG, creates the version info -+# string and the __malidrv_build_info.c file, and then call the Linux build -+# system to actually build the driver. After that point the Kbuild file takes -+# over. ++ int af_cnt = atomic_read(&stream->autoflush_counter); + -+# set up defaults if not defined by the user -+ARCH ?= arm ++ /* Check if stream contain unflushed data. */ ++ if (af_cnt < 0) ++ continue; + -+OSKOS=linux -+FILES_PREFIX= ++ /* Check if stream should be flushed now. */ ++ if (af_cnt != atomic_cmpxchg( ++ &stream->autoflush_counter, ++ af_cnt, ++ af_cnt + 1)) ++ continue; ++ if (!af_cnt) ++ continue; + -+check_cc2 = \ -+ $(shell if $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; \ -+ then \ -+ echo "$(2)"; \ -+ else \ -+ echo "$(3)"; \ -+ fi ;) ++ /* Autoflush this stream. */ ++ kbase_tlstream_flush_stream(stream); ++ } + -+# This conditional makefile exports the global definition ARM_INTERNAL_BUILD. Customer releases will not include arm_internal.mak -+-include ../../../arm_internal.mak ++ if (atomic_read(&timeline->autoflush_timer_active)) ++ rcode = mod_timer( ++ &timeline->autoflush_timer, ++ jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); ++ CSTD_UNUSED(rcode); ++} + -+# Give warning of old config parameters are used -+ifneq ($(CONFIG),) -+$(warning "You have specified the CONFIG variable which is no longer in used. Use TARGET_PLATFORM instead.") -+endif + -+ifneq ($(CPU),) -+$(warning "You have specified the CPU variable which is no longer in used. Use TARGET_PLATFORM instead.") -+endif + -+# Include the mapping between TARGET_PLATFORM and KDIR + MALI_PLATFORM -+-include MALI_CONFIGURATION -+export KDIR ?= $(KDIR-$(TARGET_PLATFORM)) -+export MALI_PLATFORM ?= $(MALI_PLATFORM-$(TARGET_PLATFORM)) ++/*****************************************************************************/ + -+ifneq ($(TARGET_PLATFORM),) -+ifeq ($(MALI_PLATFORM),) -+$(error "Invalid TARGET_PLATFORM: $(TARGET_PLATFORM)") -+endif -+endif ++int kbase_timeline_init(struct kbase_timeline **timeline, ++ atomic_t *timeline_flags) ++{ ++ enum tl_stream_type i; ++ struct kbase_timeline *result; ++#if MALI_USE_CSF ++ struct kbase_tlstream *csffw_stream; ++#endif + -+# validate lookup result -+ifeq ($(KDIR),) -+$(error No KDIR found for platform $(TARGET_PLATFORM)) -+endif ++ if (!timeline || !timeline_flags) ++ return -EINVAL; + -+ifeq ($(USING_GPU_UTILIZATION), 1) -+ ifeq ($(USING_DVFS), 1) -+ $(error USING_GPU_UTILIZATION conflict with USING_DVFS you can read the Integration Guide to choose which one do you need) -+ endif -+endif ++ result = vzalloc(sizeof(*result)); ++ if (!result) ++ return -ENOMEM; + -+ifeq ($(USING_UMP),1) -+export CONFIG_MALI400_UMP=y -+export EXTRA_DEFINES += -DCONFIG_MALI400_UMP=1 -+ifeq ($(USE_UMPV2),1) -+UMP_SYMVERS_FILE ?= ../umpv2/Module.symvers -+else -+UMP_SYMVERS_FILE ?= ../ump/Module.symvers -+endif -+KBUILD_EXTRA_SYMBOLS = $(realpath $(UMP_SYMVERS_FILE)) -+$(warning $(KBUILD_EXTRA_SYMBOLS)) -+endif ++ mutex_init(&result->reader_lock); ++ init_waitqueue_head(&result->event_queue); + -+# Define host system directory -+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build ++ /* Prepare stream structures. */ ++ for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) ++ kbase_tlstream_init(&result->streams[i], i, ++ &result->event_queue); + -+include $(KDIR)/.config ++ /* Initialize the kctx list */ ++ mutex_init(&result->tl_kctx_list_lock); ++ INIT_LIST_HEAD(&result->tl_kctx_list); + -+ifeq ($(ARCH), arm) -+# when compiling for ARM we're cross compiling -+export CROSS_COMPILE ?= $(call check_cc2, arm-linux-gnueabi-gcc, arm-linux-gnueabi-, arm-none-linux-gnueabi-) -+endif ++ /* Initialize autoflush timer. */ ++ atomic_set(&result->autoflush_timer_active, 0); ++ kbase_timer_setup(&result->autoflush_timer, ++ kbasep_timeline_autoflush_timer_callback); ++ result->timeline_flags = timeline_flags; + -+# report detected/selected settings -+ifdef ARM_INTERNAL_BUILD -+$(warning TARGET_PLATFORM $(TARGET_PLATFORM)) -+$(warning KDIR $(KDIR)) -+$(warning MALI_PLATFORM $(MALI_PLATFORM)) -+endif ++#if MALI_USE_CSF ++ csffw_stream = &result->streams[TL_STREAM_TYPE_CSFFW]; ++ kbase_csf_tl_reader_init(&result->csf_tl_reader, csffw_stream); ++#endif + -+# Set up build config -+export CONFIG_MALI400=m -+export CONFIG_MALI450=y -+export CONFIG_MALI470=y ++ *timeline = result; ++ return 0; ++} + -+export EXTRA_DEFINES += -DCONFIG_MALI400=1 -+export EXTRA_DEFINES += -DCONFIG_MALI450=1 -+export EXTRA_DEFINES += -DCONFIG_MALI470=1 ++void kbase_timeline_term(struct kbase_timeline *timeline) ++{ ++ enum tl_stream_type i; + -+ifneq ($(MALI_PLATFORM),) -+export EXTRA_DEFINES += -DMALI_FAKE_PLATFORM_DEVICE=1 -+export MALI_PLATFORM_FILES = $(wildcard platform/$(MALI_PLATFORM)/*.c) -+endif ++ if (!timeline) ++ return; + -+ifeq ($(USING_PROFILING),1) -+ifeq ($(CONFIG_TRACEPOINTS),) -+$(warning CONFIG_TRACEPOINTS required for profiling) -+else -+export CONFIG_MALI400_PROFILING=y -+export EXTRA_DEFINES += -DCONFIG_MALI400_PROFILING=1 -+ifeq ($(USING_INTERNAL_PROFILING),1) -+export CONFIG_MALI400_INTERNAL_PROFILING=y -+export EXTRA_DEFINES += -DCONFIG_MALI400_INTERNAL_PROFILING=1 -+endif -+ifeq ($(MALI_HEATMAPS_ENABLED),1) -+export MALI_HEATMAPS_ENABLED=y -+export EXTRA_DEFINES += -DCONFIG_MALI400_HEATMAPS_ENABLED -+endif -+endif -+endif ++#if MALI_USE_CSF ++ kbase_csf_tl_reader_term(&timeline->csf_tl_reader); ++#endif + -+ifeq ($(MALI_DMA_BUF_MAP_ON_ATTACH),1) -+export CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH=y -+export EXTRA_DEFINES += -DCONFIG_MALI_DMA_BUF_MAP_ON_ATTACH -+endif ++ WARN_ON(!list_empty(&timeline->tl_kctx_list)); + -+ifeq ($(MALI_SHARED_INTERRUPTS),1) -+export CONFIG_MALI_SHARED_INTERRUPTS=y -+export EXTRA_DEFINES += -DCONFIG_MALI_SHARED_INTERRUPTS -+endif ++ for (i = (enum tl_stream_type)0; i < TL_STREAM_TYPE_COUNT; i++) ++ kbase_tlstream_term(&timeline->streams[i]); + -+ifeq ($(USING_DVFS),1) -+export CONFIG_MALI_DVFS=y -+export EXTRA_DEFINES += -DCONFIG_MALI_DVFS -+endif ++ vfree(timeline); ++} + -+ifeq ($(USING_DMA_BUF_FENCE),1) -+export CONFIG_MALI_DMA_BUF_FENCE=y -+export EXTRA_DEFINES += -DCONFIG_MALI_DMA_BUF_FENCE -+endif ++#ifdef CONFIG_MALI_BIFROST_DEVFREQ ++static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev) ++{ ++ struct devfreq *devfreq = kbdev->devfreq; + -+ifeq ($(MALI_PMU_PARALLEL_POWER_UP),1) -+export CONFIG_MALI_PMU_PARALLEL_POWER_UP=y -+export EXTRA_DEFINES += -DCONFIG_MALI_PMU_PARALLEL_POWER_UP -+endif ++ /* Devfreq initialization failure isn't a fatal error, so devfreq might ++ * be null. ++ */ ++ if (devfreq) { ++ unsigned long cur_freq = 0; + -+ifdef CONFIG_OF -+ifeq ($(USING_DT),1) -+export CONFIG_MALI_DT=y -+export EXTRA_DEFINES += -DCONFIG_MALI_DT -+endif -+endif ++ mutex_lock(&devfreq->lock); ++ cur_freq = devfreq->last_status.current_frequency; ++ KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(kbdev, (u64)cur_freq); ++ mutex_unlock(&devfreq->lock); ++ } ++} ++#endif /* CONFIG_MALI_BIFROST_DEVFREQ */ + -+ifeq ($(USING_DEVFREQ), 1) -+ifdef CONFIG_PM_DEVFREQ -+export CONFIG_MALI_DEVFREQ=y -+export EXTRA_DEFINES += -DCONFIG_MALI_DEVFREQ=1 -+else -+$(warning "You want to support DEVFREQ but kernel didn't support DEVFREQ.") -+endif -+endif ++int kbase_timeline_acquire(struct kbase_device *kbdev, u32 flags) ++{ ++ int err = 0; ++ u32 timeline_flags = TLSTREAM_ENABLED | flags; ++ struct kbase_timeline *timeline; ++ int rcode; + -+ifneq ($(BUILD),release) -+# Debug -+export CONFIG_MALI400_DEBUG=y -+else -+# Release -+ifeq ($(MALI_QUIET),1) -+export CONFIG_MALI_QUIET=y -+export EXTRA_DEFINES += -DCONFIG_MALI_QUIET -+endif -+endif ++ if (WARN_ON(!kbdev) || WARN_ON(flags & ~BASE_TLSTREAM_FLAGS_MASK)) ++ return -EINVAL; + -+ifeq ($(MALI_SKIP_JOBS),1) -+EXTRA_DEFINES += -DPROFILING_SKIP_PP_JOBS=1 -DPROFILING_SKIP_GP_JOBS=1 -+endif ++ timeline = kbdev->timeline; ++ if (WARN_ON(!timeline)) ++ return -EFAULT; + -+ifeq ($(MALI_MEM_SWAP_TRACKING),1) -+EXTRA_DEFINES += -DMALI_MEM_SWAP_TRACKING=1 -+endif ++ if (atomic_cmpxchg(timeline->timeline_flags, 0, timeline_flags)) ++ return -EBUSY; + -+all: $(UMP_SYMVERS_FILE) -+ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) modules -+ @rm $(FILES_PREFIX)__malidrv_build_info.c $(FILES_PREFIX)__malidrv_build_info.o ++#if MALI_USE_CSF ++ if (flags & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) { ++ err = kbase_csf_tl_reader_start(&timeline->csf_tl_reader, kbdev); ++ if (err) { ++ atomic_set(timeline->timeline_flags, 0); ++ return err; ++ } ++ } ++#endif + -+clean: -+ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean ++ /* Reset and initialize header streams. */ ++ kbase_tlstream_reset(&timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]); + -+kernelrelease: -+ $(MAKE) ARCH=$(ARCH) -C $(KDIR) kernelrelease ++ timeline->obj_header_btc = obj_desc_header_size; ++ timeline->aux_header_btc = aux_desc_header_size; + -+export CONFIG KBUILD_EXTRA_SYMBOLS -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_broadcast.c b/drivers/gpu/arm/mali400/mali/common/mali_broadcast.c -new file mode 100644 -index 000000000..79a418c36 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_broadcast.c -@@ -0,0 +1,142 @@ -+/* -+ * Copyright (C) 2012-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++#if !MALI_USE_CSF ++ /* If job dumping is enabled, readjust the software event's ++ * timeout as the default value of 3 seconds is often ++ * insufficient. ++ */ ++ if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) { ++ dev_info(kbdev->dev, ++ "Job dumping is enabled, readjusting the software event's timeout\n"); ++ atomic_set(&kbdev->js_data.soft_job_timeout_ms, 1800000); ++ } ++#endif /* !MALI_USE_CSF */ + -+#include "mali_broadcast.h" -+#include "mali_kernel_common.h" -+#include "mali_osk.h" ++ /* Summary stream was cleared during acquire. ++ * Create static timeline objects that will be ++ * read by client. ++ */ ++ kbase_create_timeline_objects(kbdev); + -+#define MALI_BROADCAST_REGISTER_SIZE 0x1000 -+#define MALI_BROADCAST_REG_BROADCAST_MASK 0x0 -+#define MALI_BROADCAST_REG_INTERRUPT_MASK 0x4 ++#ifdef CONFIG_MALI_BIFROST_DEVFREQ ++ /* Devfreq target tracepoints are only fired when the target ++ * changes, so we won't know the current target unless we ++ * send it now. ++ */ ++ kbase_tlstream_current_devfreq_target(kbdev); ++#endif /* CONFIG_MALI_BIFROST_DEVFREQ */ + -+struct mali_bcast_unit { -+ struct mali_hw_core hw_core; -+ u32 current_mask; -+}; ++ /* Start the autoflush timer. ++ * We must do this after creating timeline objects to ensure we ++ * don't auto-flush the streams which will be reset during the ++ * summarization process. ++ */ ++ atomic_set(&timeline->autoflush_timer_active, 1); ++ rcode = mod_timer(&timeline->autoflush_timer, ++ jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); ++ CSTD_UNUSED(rcode); + -+struct mali_bcast_unit *mali_bcast_unit_create(const _mali_osk_resource_t *resource) -+{ -+ struct mali_bcast_unit *bcast_unit = NULL; ++ timeline->last_acquire_time = ktime_get_raw(); + -+ MALI_DEBUG_ASSERT_POINTER(resource); -+ MALI_DEBUG_PRINT(2, ("Broadcast: Creating Mali Broadcast unit: %s\n", -+ resource->description)); ++ return err; ++} + -+ bcast_unit = _mali_osk_malloc(sizeof(struct mali_bcast_unit)); -+ if (NULL == bcast_unit) { -+ MALI_PRINT_ERROR(("Broadcast: Failed to allocate memory for Broadcast unit\n")); -+ return NULL; -+ } ++void kbase_timeline_release(struct kbase_timeline *timeline) ++{ ++ ktime_t elapsed_time; ++ s64 elapsed_time_ms, time_to_sleep; + -+ if (_MALI_OSK_ERR_OK == mali_hw_core_create(&bcast_unit->hw_core, -+ resource, MALI_BROADCAST_REGISTER_SIZE)) { -+ bcast_unit->current_mask = 0; -+ mali_bcast_reset(bcast_unit); ++ if (WARN_ON(!timeline) || WARN_ON(!atomic_read(timeline->timeline_flags))) ++ return; + -+ return bcast_unit; -+ } else { -+ MALI_PRINT_ERROR(("Broadcast: Failed map broadcast unit\n")); -+ } ++ /* Get the amount of time passed since the timeline was acquired and ensure ++ * we sleep for long enough such that it has been at least ++ * TIMELINE_HYSTERESIS_TIMEOUT_MS amount of time between acquire and release. ++ * This prevents userspace from spamming acquire and release too quickly. ++ */ ++ elapsed_time = ktime_sub(ktime_get_raw(), timeline->last_acquire_time); ++ elapsed_time_ms = ktime_to_ms(elapsed_time); ++ time_to_sleep = (elapsed_time_ms < 0 ? TIMELINE_HYSTERESIS_TIMEOUT_MS : ++ TIMELINE_HYSTERESIS_TIMEOUT_MS - elapsed_time_ms); ++ if (time_to_sleep > 0) ++ msleep_interruptible(time_to_sleep); + -+ _mali_osk_free(bcast_unit); ++#if MALI_USE_CSF ++ kbase_csf_tl_reader_stop(&timeline->csf_tl_reader); ++#endif + -+ return NULL; -+} ++ /* Stop autoflush timer before releasing access to streams. */ ++ atomic_set(&timeline->autoflush_timer_active, 0); ++ del_timer_sync(&timeline->autoflush_timer); + -+void mali_bcast_unit_delete(struct mali_bcast_unit *bcast_unit) -+{ -+ MALI_DEBUG_ASSERT_POINTER(bcast_unit); -+ mali_hw_core_delete(&bcast_unit->hw_core); -+ _mali_osk_free(bcast_unit); ++ atomic_set(timeline->timeline_flags, 0); +} + -+/* Call this function to add the @group's id into bcast mask -+ * Note: redundant calling this function with same @group -+ * doesn't make any difference as calling it once -+ */ -+void mali_bcast_add_group(struct mali_bcast_unit *bcast_unit, -+ struct mali_group *group) ++int kbase_timeline_streams_flush(struct kbase_timeline *timeline) +{ -+ u32 bcast_id; -+ u32 broadcast_mask; ++ enum tl_stream_type stype; ++ bool has_bytes = false; ++ size_t nbytes = 0; + -+ MALI_DEBUG_ASSERT_POINTER(bcast_unit); -+ MALI_DEBUG_ASSERT_POINTER(group); ++ if (WARN_ON(!timeline)) ++ return -EINVAL; + -+ bcast_id = mali_pp_core_get_bcast_id(mali_group_get_pp_core(group)); ++#if MALI_USE_CSF ++ { ++ int ret = kbase_csf_tl_reader_flush_buffer(&timeline->csf_tl_reader); + -+ broadcast_mask = bcast_unit->current_mask; ++ if (ret > 0) ++ has_bytes = true; ++ } ++#endif + -+ broadcast_mask |= (bcast_id); /* add PP core to broadcast */ -+ broadcast_mask |= (bcast_id << 16); /* add MMU to broadcast */ ++ for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) { ++ nbytes = kbase_tlstream_flush_stream(&timeline->streams[stype]); ++ if (nbytes > 0) ++ has_bytes = true; ++ } ++ return has_bytes ? 0 : -EIO; ++} + -+ /* store mask so we can restore on reset */ -+ bcast_unit->current_mask = broadcast_mask; ++void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline) ++{ ++ kbase_tlstream_reset( ++ &timeline->streams[TL_STREAM_TYPE_OBJ]); ++ kbase_tlstream_reset( ++ &timeline->streams[TL_STREAM_TYPE_AUX]); ++#if MALI_USE_CSF ++ kbase_tlstream_reset( ++ &timeline->streams[TL_STREAM_TYPE_CSFFW]); ++#endif +} + -+/* Call this function to remove @group's id from bcast mask -+ * Note: redundant calling this function with same @group -+ * doesn't make any difference as calling it once -+ */ -+void mali_bcast_remove_group(struct mali_bcast_unit *bcast_unit, -+ struct mali_group *group) ++void kbase_timeline_pre_kbase_context_destroy(struct kbase_context *kctx) +{ -+ u32 bcast_id; -+ u32 broadcast_mask; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ struct kbase_timeline *timeline = kbdev->timeline; + -+ MALI_DEBUG_ASSERT_POINTER(bcast_unit); -+ MALI_DEBUG_ASSERT_POINTER(group); ++ /* Remove the context from the list to ensure we don't try and ++ * summarize a context that is being destroyed. ++ * ++ * It's unsafe to try and summarize a context being destroyed as the ++ * locks we might normally attempt to acquire, and the data structures ++ * we would normally attempt to traverse could already be destroyed. ++ * ++ * In the case where the tlstream is acquired between this pre destroy ++ * call and the post destroy call, we will get a context destroy ++ * tracepoint without the corresponding context create tracepoint, ++ * but this will not affect the correctness of the object model. ++ */ ++ mutex_lock(&timeline->tl_kctx_list_lock); ++ list_del_init(&kctx->tl_kctx_list_node); ++ mutex_unlock(&timeline->tl_kctx_list_lock); ++} + -+ bcast_id = mali_pp_core_get_bcast_id(mali_group_get_pp_core(group)); ++void kbase_timeline_post_kbase_context_create(struct kbase_context *kctx) ++{ ++ struct kbase_device *const kbdev = kctx->kbdev; ++ struct kbase_timeline *timeline = kbdev->timeline; + -+ broadcast_mask = bcast_unit->current_mask; ++ /* On context create, add the context to the list to ensure it is ++ * summarized when timeline is acquired ++ */ ++ mutex_lock(&timeline->tl_kctx_list_lock); + -+ broadcast_mask &= ~((bcast_id << 16) | bcast_id); ++ list_add(&kctx->tl_kctx_list_node, &timeline->tl_kctx_list); + -+ /* store mask so we can restore on reset */ -+ bcast_unit->current_mask = broadcast_mask; ++ /* Fire the tracepoints with the lock held to ensure the tracepoints ++ * are either fired before or after the summarization, ++ * never in parallel with it. If fired in parallel, we could get ++ * duplicate creation tracepoints. ++ */ ++#if MALI_USE_CSF ++ KBASE_TLSTREAM_TL_KBASE_NEW_CTX( ++ kbdev, kctx->id, kbdev->gpu_props.props.raw_props.gpu_id); ++#endif ++ /* Trace with the AOM tracepoint even in CSF for dumping */ ++ KBASE_TLSTREAM_TL_NEW_CTX(kbdev, kctx, kctx->id, 0); ++ ++ mutex_unlock(&timeline->tl_kctx_list_lock); +} + -+void mali_bcast_reset(struct mali_bcast_unit *bcast_unit) ++void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx) +{ -+ MALI_DEBUG_ASSERT_POINTER(bcast_unit); -+ -+ MALI_DEBUG_PRINT(4, -+ ("Broadcast: setting mask 0x%08X + 0x%08X (reset)\n", -+ bcast_unit->current_mask, -+ bcast_unit->current_mask & 0xFF)); ++ struct kbase_device *const kbdev = kctx->kbdev; + -+ /* set broadcast mask */ -+ mali_hw_core_register_write(&bcast_unit->hw_core, -+ MALI_BROADCAST_REG_BROADCAST_MASK, -+ bcast_unit->current_mask); ++ /* Trace with the AOM tracepoint even in CSF for dumping */ ++ KBASE_TLSTREAM_TL_DEL_CTX(kbdev, kctx); ++#if MALI_USE_CSF ++ KBASE_TLSTREAM_TL_KBASE_DEL_CTX(kbdev, kctx->id); ++#endif + -+ /* set IRQ override mask */ -+ mali_hw_core_register_write(&bcast_unit->hw_core, -+ MALI_BROADCAST_REG_INTERRUPT_MASK, -+ bcast_unit->current_mask & 0xFF); ++ /* Flush the timeline stream, so the user can see the termination ++ * tracepoints being fired. ++ * The "if" statement below is for optimization. It is safe to call ++ * kbase_timeline_streams_flush when timeline is disabled. ++ */ ++ if (atomic_read(&kbdev->timeline_flags) != 0) ++ kbase_timeline_streams_flush(kbdev->timeline); +} + -+void mali_bcast_disable(struct mali_bcast_unit *bcast_unit) ++#if MALI_UNIT_TEST ++void kbase_timeline_stats(struct kbase_timeline *timeline, ++ u32 *bytes_collected, u32 *bytes_generated) +{ -+ MALI_DEBUG_ASSERT_POINTER(bcast_unit); ++ enum tl_stream_type stype; + -+ MALI_DEBUG_PRINT(4, ("Broadcast: setting mask 0x0 + 0x0 (disable)\n")); ++ KBASE_DEBUG_ASSERT(bytes_collected); + -+ /* set broadcast mask */ -+ mali_hw_core_register_write(&bcast_unit->hw_core, -+ MALI_BROADCAST_REG_BROADCAST_MASK, -+ 0x0); ++ /* Accumulate bytes generated per stream */ ++ *bytes_generated = 0; ++ for (stype = (enum tl_stream_type)0; stype < TL_STREAM_TYPE_COUNT; ++ stype++) ++ *bytes_generated += atomic_read( ++ &timeline->streams[stype].bytes_generated); + -+ /* set IRQ override mask */ -+ mali_hw_core_register_write(&bcast_unit->hw_core, -+ MALI_BROADCAST_REG_INTERRUPT_MASK, -+ 0x0); ++ *bytes_collected = atomic_read(&timeline->bytes_collected); +} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_broadcast.h b/drivers/gpu/arm/mali400/mali/common/mali_broadcast.h ++#endif /* MALI_UNIT_TEST */ +diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h new file mode 100644 -index 000000000..0475b7171 +index 000000000..62be6c64c --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_broadcast.h -@@ -0,0 +1,57 @@ ++++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h +@@ -0,0 +1,128 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* -+ * Copyright (C) 2012-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ * ++ * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * + */ + -+#ifndef __MALI_BROADCAST_H__ -+#define __MALI_BROADCAST_H__ ++#if !defined(_KBASE_TIMELINE_H) ++#define _KBASE_TIMELINE_H + -+/* -+ * Interface for the broadcast unit on Mali-450. -+ * -+ * - Represents up to 8 × (MMU + PP) pairs. -+ * - Supports dynamically changing which (MMU + PP) pairs receive the broadcast by -+ * setting a mask. ++#include ++ ++/*****************************************************************************/ ++ ++struct kbase_timeline; ++ ++/** ++ * kbase_timeline_init - initialize timeline infrastructure in kernel ++ * @timeline: Newly created instance of kbase_timeline will be stored in ++ * this pointer. ++ * @timeline_flags: Timeline status will be written to this variable when a ++ * client is attached/detached. The variable must be valid ++ * while timeline instance is valid. ++ * Return: zero on success, negative number on error + */ ++int kbase_timeline_init(struct kbase_timeline **timeline, ++ atomic_t *timeline_flags); + -+#include "mali_hw_core.h" -+#include "mali_group.h" ++/** ++ * kbase_timeline_term - terminate timeline infrastructure in kernel ++ * ++ * @timeline: Timeline instance to be terminated. It must be previously created ++ * with kbase_timeline_init(). ++ */ ++void kbase_timeline_term(struct kbase_timeline *timeline); + -+struct mali_bcast_unit; ++/** ++ * kbase_timeline_io_acquire - acquire timeline stream file descriptor ++ * @kbdev: Kbase device ++ * @flags: Timeline stream flags ++ * ++ * This descriptor is meant to be used by userspace timeline to gain access to ++ * kernel timeline stream. This stream is later broadcasted by user space to the ++ * timeline client. ++ * Only one entity can own the descriptor at any given time. Descriptor shall be ++ * closed if unused. If descriptor cannot be obtained (i.e. when it is already ++ * being used) return will be a negative value. ++ * ++ * Return: file descriptor on success, negative number on error ++ */ ++int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags); + -+struct mali_bcast_unit *mali_bcast_unit_create(const _mali_osk_resource_t *resource); -+void mali_bcast_unit_delete(struct mali_bcast_unit *bcast_unit); ++/** ++ * kbase_timeline_streams_flush - flush timeline streams. ++ * @timeline: Timeline instance ++ * ++ * Function will flush pending data in all timeline streams. ++ * ++ * Return: Zero on success, errno on failure. ++ */ ++int kbase_timeline_streams_flush(struct kbase_timeline *timeline); + -+/* Add a group to the list of (MMU + PP) pairs broadcasts go out to. */ -+void mali_bcast_add_group(struct mali_bcast_unit *bcast_unit, struct mali_group *group); ++/** ++ * kbase_timeline_streams_body_reset - reset timeline body streams. ++ * @timeline: Timeline instance ++ * ++ * Function will discard pending data in all timeline body streams. ++ */ ++void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline); + -+/* Remove a group to the list of (MMU + PP) pairs broadcasts go out to. */ -+void mali_bcast_remove_group(struct mali_bcast_unit *bcast_unit, struct mali_group *group); ++/** ++ * kbase_timeline_post_kbase_context_create - Inform timeline that a new KBase ++ * Context has been created. ++ * @kctx: KBase Context ++ */ ++void kbase_timeline_post_kbase_context_create(struct kbase_context *kctx); + -+/* Re-set cached mask. This needs to be called after having been suspended. */ -+void mali_bcast_reset(struct mali_bcast_unit *bcast_unit); ++/** ++ * kbase_timeline_pre_kbase_context_destroy - Inform timeline that a KBase ++ * Context is about to be destroyed. ++ * @kctx: KBase Context ++ */ ++void kbase_timeline_pre_kbase_context_destroy(struct kbase_context *kctx); + +/** -+ * Disable broadcast unit ++ * kbase_timeline_post_kbase_context_destroy - Inform timeline that a KBase ++ * Context has been destroyed. ++ * @kctx: KBase Context + * -+ * mali_bcast_enable must be called to re-enable the unit. Cores may not be -+ * added or removed when the unit is disabled. ++ * Should be called immediately before the memory is freed, and the context ID ++ * and kbdev pointer should still be valid. + */ -+void mali_bcast_disable(struct mali_bcast_unit *bcast_unit); ++void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx); ++ ++#if MALI_UNIT_TEST + +/** -+ * Re-enable broadcast unit ++ * kbase_timeline_stats - read timeline stream statistics ++ * @timeline: Timeline instance ++ * @bytes_collected: Will hold number of bytes read by the user ++ * @bytes_generated: Will hold number of bytes generated by trace points ++ */ ++void kbase_timeline_stats(struct kbase_timeline *timeline, u32 *bytes_collected, u32 *bytes_generated); ++#endif /* MALI_UNIT_TEST */ ++ ++/** ++ * kbase_timeline_io_debugfs_init - Add a debugfs entry for reading timeline stream data + * -+ * This resets the masks to include the cores present when mali_bcast_disable was called. ++ * @kbdev: An instance of the GPU platform device, allocated from the probe ++ * method of the driver. + */ -+MALI_STATIC_INLINE void mali_bcast_enable(struct mali_bcast_unit *bcast_unit) -+{ -+ mali_bcast_reset(bcast_unit); -+} ++void kbase_timeline_io_debugfs_init(struct kbase_device *kbdev); + -+#endif /* __MALI_BROADCAST_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_control_timer.c b/drivers/gpu/arm/mali400/mali/common/mali_control_timer.c ++#endif /* _KBASE_TIMELINE_H */ +diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c new file mode 100644 -index 000000000..5bed27a8c +index 000000000..ae570064e --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_control_timer.c -@@ -0,0 +1,139 @@ ++++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c +@@ -0,0 +1,465 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* -+ * Copyright (C) 2010-2012, 2014-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ * ++ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * + */ + -+#include "mali_kernel_utilization.h" -+#include "mali_osk.h" -+#include "mali_osk_mali.h" -+#include "mali_kernel_common.h" -+#include "mali_session.h" -+#include "mali_dvfs_policy.h" -+#include "mali_control_timer.h" ++#include "mali_kbase_timeline_priv.h" ++#include "mali_kbase_tlstream.h" ++#include "mali_kbase_tracepoints.h" ++#include "mali_kbase_timeline.h" + -+static u64 period_start_time = 0; ++#include + -+/** .KP : mali_control_timer */ -+static _mali_osk_timer_t *mali_control_timer = NULL; -+static mali_bool timer_running = MALI_FALSE; ++#include ++#include ++#include ++ ++/* Explicitly include epoll header for old kernels. Not required from 4.16. */ ++#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE ++#include ++#endif ++ ++static int kbase_unprivileged_global_profiling; + +/** -+ * period_of_notifying_mali_utilization_to_platform_dependent_part, -+ * ms 为å•ä½. ++ * kbase_unprivileged_global_profiling_set - set permissions for unprivileged processes ++ * ++ * @val: String containing value to set. Only strings representing positive ++ * integers are accepted as valid; any non-positive integer (including 0) ++ * is rejected. ++ * @kp: Module parameter associated with this method. ++ * ++ * This method can only be used to enable permissions for unprivileged processes, ++ * if they are disabled: for this reason, the only values which are accepted are ++ * strings representing positive integers. Since it's impossible to disable ++ * permissions once they're set, any integer which is non-positive is rejected, ++ * including 0. ++ * ++ * Return: 0 if success, otherwise error code. + */ -+static u32 mali_control_timeout = 20; -+ -+void mali_control_timer_add(u32 timeout)/* 'timeout' : 以 ms 为å•ä½. */ ++static int kbase_unprivileged_global_profiling_set(const char *val, const struct kernel_param *kp) +{ -+ _mali_osk_timer_add(mali_control_timer, _mali_osk_time_mstoticks(timeout)); -+} ++ int new_val; ++ int ret = kstrtoint(val, 0, &new_val); + -+void mali_control_timer_mod(u32 timeout_in_ms) -+{ -+ _mali_osk_timer_mod(mali_control_timer, _mali_osk_time_mstoticks(timeout_in_ms)); ++ if (ret == 0) { ++ if (new_val < 1) ++ return -EINVAL; ++ ++ kbase_unprivileged_global_profiling = 1; ++ } ++ ++ return ret; +} + -+static void mali_control_timer_callback(void *arg) -+{ -+ if (mali_utilization_enabled()) { -+ struct mali_gpu_utilization_data *util_data = NULL; -+ u64 time_period = 0; -+ mali_bool need_add_timer = MALI_TRUE; ++static const struct kernel_param_ops kbase_global_unprivileged_profiling_ops = { ++ .get = param_get_int, ++ .set = kbase_unprivileged_global_profiling_set, ++}; + -+ /* Calculate gpu utilization */ -+ util_data = mali_utilization_calculate(&period_start_time, &time_period, &need_add_timer); ++module_param_cb(kbase_unprivileged_global_profiling, &kbase_global_unprivileged_profiling_ops, ++ &kbase_unprivileged_global_profiling, 0600); + -+ if (util_data) { -+#if defined(CONFIG_MALI_DVFS) -+ mali_dvfs_policy_realize(util_data, time_period); ++/* The timeline stream file operations functions. */ ++static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, ++ size_t size, loff_t *f_pos); ++static __poll_t kbasep_timeline_io_poll(struct file *filp, poll_table *wait); ++static int kbasep_timeline_io_release(struct inode *inode, struct file *filp); ++static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end, ++ int datasync); ++ ++static bool timeline_is_permitted(void) ++{ ++#if KERNEL_VERSION(5, 8, 0) <= LINUX_VERSION_CODE ++ return kbase_unprivileged_global_profiling || perfmon_capable(); +#else -+ mali_utilization_platform_realize(util_data); ++ return kbase_unprivileged_global_profiling || capable(CAP_SYS_ADMIN); +#endif -+ -+ if (MALI_TRUE == timer_running) -+ if (MALI_TRUE == need_add_timer) { -+ mali_control_timer_mod(mali_control_timeout); -+ } -+ } -+ } +} + -+/* Init a timer (for now it is used for GPU utilization and dvfs) */ -+_mali_osk_errcode_t mali_control_timer_init(void) ++/** ++ * kbasep_timeline_io_packet_pending - check timeline streams for pending ++ * packets ++ * ++ * @timeline: Timeline instance ++ * @ready_stream: Pointer to variable where stream will be placed ++ * @rb_idx_raw: Pointer to variable where read buffer index will be placed ++ * ++ * Function checks all streams for pending packets. It will stop as soon as ++ * packet ready to be submitted to user space is detected. Variables under ++ * pointers, passed as the parameters to this function will be updated with ++ * values pointing to right stream and buffer. ++ * ++ * Return: non-zero if any of timeline streams has at last one packet ready ++ */ ++static int ++kbasep_timeline_io_packet_pending(struct kbase_timeline *timeline, ++ struct kbase_tlstream **ready_stream, ++ unsigned int *rb_idx_raw) +{ -+ _mali_osk_device_data data; ++ enum tl_stream_type i; + -+ if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) { -+ /* Use device specific settings (if defined) */ -+ if (0 != data.control_interval) { -+ mali_control_timeout = data.control_interval; -+ MALI_DEBUG_PRINT(2, ("Mali GPU Timer: %u\n", mali_control_timeout)); -+ } -+ } ++ KBASE_DEBUG_ASSERT(ready_stream); ++ KBASE_DEBUG_ASSERT(rb_idx_raw); + -+ mali_control_timer = _mali_osk_timer_init(mali_control_timer_callback); -+ if (NULL == mali_control_timer) { -+ return _MALI_OSK_ERR_FAULT; ++ for (i = (enum tl_stream_type)0; i < TL_STREAM_TYPE_COUNT; ++i) { ++ struct kbase_tlstream *stream = &timeline->streams[i]; ++ *rb_idx_raw = atomic_read(&stream->rbi); ++ /* Read buffer index may be updated by writer in case of ++ * overflow. Read and write buffer indexes must be ++ * loaded in correct order. ++ */ ++ smp_rmb(); ++ if (atomic_read(&stream->wbi) != *rb_idx_raw) { ++ *ready_stream = stream; ++ return 1; ++ } + } -+ _mali_osk_timer_setcallback(mali_control_timer, mali_control_timer_callback, NULL); + -+ return _MALI_OSK_ERR_OK; ++ return 0; +} + -+void mali_control_timer_term(void) ++/** ++ * kbasep_timeline_has_header_data() - check timeline headers for pending ++ * packets ++ * ++ * @timeline: Timeline instance ++ * ++ * Return: non-zero if any of timeline headers has at last one packet ready. ++ */ ++static int kbasep_timeline_has_header_data(struct kbase_timeline *timeline) +{ -+ if (NULL != mali_control_timer) { -+ _mali_osk_timer_del(mali_control_timer); -+ timer_running = MALI_FALSE; -+ _mali_osk_timer_term(mali_control_timer); -+ mali_control_timer = NULL; -+ } ++ return timeline->obj_header_btc || timeline->aux_header_btc ++#if MALI_USE_CSF ++ || timeline->csf_tl_reader.tl_header.btc ++#endif ++ ; +} + -+mali_bool mali_control_timer_resume(u64 time_now) ++/** ++ * copy_stream_header() - copy timeline stream header. ++ * ++ * @buffer: Pointer to the buffer provided by user. ++ * @size: Maximum amount of data that can be stored in the buffer. ++ * @copy_len: Pointer to amount of bytes that has been copied already ++ * within the read system call. ++ * @hdr: Pointer to the stream header. ++ * @hdr_size: Header size. ++ * @hdr_btc: Pointer to the remaining number of bytes to copy. ++ * ++ * Return: 0 if success, -1 otherwise. ++ */ ++static inline int copy_stream_header(char __user *buffer, size_t size, ++ ssize_t *copy_len, const char *hdr, ++ size_t hdr_size, size_t *hdr_btc) +{ -+ mali_utilization_data_assert_locked(); ++ const size_t offset = hdr_size - *hdr_btc; ++ const size_t copy_size = MIN(size - *copy_len, *hdr_btc); + -+ if (timer_running != MALI_TRUE) { -+ timer_running = MALI_TRUE; ++ if (!*hdr_btc) ++ return 0; + -+ period_start_time = time_now; ++ if (WARN_ON(*hdr_btc > hdr_size)) ++ return -1; + -+ mali_utilization_reset(); ++ if (copy_to_user(&buffer[*copy_len], &hdr[offset], copy_size)) ++ return -1; + -+ return MALI_TRUE; -+ } ++ *hdr_btc -= copy_size; ++ *copy_len += copy_size; + -+ return MALI_FALSE; ++ return 0; +} + -+void mali_control_timer_pause(void) ++/** ++ * kbasep_timeline_copy_headers - copy timeline headers to the user ++ * ++ * @timeline: Timeline instance ++ * @buffer: Pointer to the buffer provided by user ++ * @size: Maximum amount of data that can be stored in the buffer ++ * @copy_len: Pointer to amount of bytes that has been copied already ++ * within the read system call. ++ * ++ * This helper function checks if timeline headers have not been sent ++ * to the user, and if so, sends them. copy_len is respectively ++ * updated. ++ * ++ * Return: 0 if success, -1 if copy_to_user has failed. ++ */ ++static inline int kbasep_timeline_copy_headers(struct kbase_timeline *timeline, ++ char __user *buffer, size_t size, ++ ssize_t *copy_len) +{ -+ mali_utilization_data_assert_locked(); -+ if (timer_running == MALI_TRUE) { -+ timer_running = MALI_FALSE; -+ } ++ if (copy_stream_header(buffer, size, copy_len, obj_desc_header, ++ obj_desc_header_size, &timeline->obj_header_btc)) ++ return -1; ++ ++ if (copy_stream_header(buffer, size, copy_len, aux_desc_header, ++ aux_desc_header_size, &timeline->aux_header_btc)) ++ return -1; ++#if MALI_USE_CSF ++ if (copy_stream_header(buffer, size, copy_len, ++ timeline->csf_tl_reader.tl_header.data, ++ timeline->csf_tl_reader.tl_header.size, ++ &timeline->csf_tl_reader.tl_header.btc)) ++ return -1; ++#endif ++ return 0; +} + -+void mali_control_timer_suspend(mali_bool suspend) ++/** ++ * kbasep_timeline_io_read - copy data from streams to buffer provided by user ++ * ++ * @filp: Pointer to file structure ++ * @buffer: Pointer to the buffer provided by user ++ * @size: Maximum amount of data that can be stored in the buffer ++ * @f_pos: Pointer to file offset (unused) ++ * ++ * Return: number of bytes stored in the buffer ++ */ ++static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, ++ size_t size, loff_t *f_pos) +{ -+ mali_utilization_data_lock(); ++ ssize_t copy_len = 0; ++ struct kbase_timeline *timeline; + -+ if (timer_running == MALI_TRUE) { -+ timer_running = MALI_FALSE; ++ KBASE_DEBUG_ASSERT(filp); ++ KBASE_DEBUG_ASSERT(f_pos); + -+ mali_utilization_data_unlock(); ++ if (WARN_ON(!filp->private_data)) ++ return -EFAULT; + -+ if (suspend == MALI_TRUE) { -+ _mali_osk_timer_del(mali_control_timer); -+ mali_utilization_reset(); -+ } -+ } else { -+ mali_utilization_data_unlock(); -+ } -+} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_control_timer.h b/drivers/gpu/arm/mali400/mali/common/mali_control_timer.h -new file mode 100644 -index 000000000..c9e6e058e ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_control_timer.h -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2010-2012, 2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ timeline = (struct kbase_timeline *)filp->private_data; + -+#ifndef __MALI_CONTROL_TIMER_H__ -+#define __MALI_CONTROL_TIMER_H__ ++ if (!buffer) ++ return -EINVAL; + -+#include "mali_osk.h" ++ if (*f_pos < 0) ++ return -EINVAL; + -+_mali_osk_errcode_t mali_control_timer_init(void); ++ mutex_lock(&timeline->reader_lock); + -+void mali_control_timer_term(void); ++ while (copy_len < size) { ++ struct kbase_tlstream *stream = NULL; ++ unsigned int rb_idx_raw = 0; ++ unsigned int wb_idx_raw; ++ unsigned int rb_idx; ++ size_t rb_size; + -+mali_bool mali_control_timer_resume(u64 time_now); ++ if (kbasep_timeline_copy_headers(timeline, buffer, size, ++ ©_len)) { ++ copy_len = -EFAULT; ++ break; ++ } + -+void mali_control_timer_suspend(mali_bool suspend); -+void mali_control_timer_pause(void); ++ /* If we already read some packets and there is no ++ * packet pending then return back to user. ++ * If we don't have any data yet, wait for packet to be ++ * submitted. ++ */ ++ if (copy_len > 0) { ++ if (!kbasep_timeline_io_packet_pending( ++ timeline, &stream, &rb_idx_raw)) ++ break; ++ } else { ++ if (wait_event_interruptible( ++ timeline->event_queue, ++ kbasep_timeline_io_packet_pending( ++ timeline, &stream, &rb_idx_raw))) { ++ copy_len = -ERESTARTSYS; ++ break; ++ } ++ } + -+void mali_control_timer_add(u32 timeout); ++ if (WARN_ON(!stream)) { ++ copy_len = -EFAULT; ++ break; ++ } + -+void mali_control_timer_mod(u32 timeout_in_ms); ++ /* Check if this packet fits into the user buffer. ++ * If so copy its content. ++ */ ++ rb_idx = rb_idx_raw % PACKET_COUNT; ++ rb_size = atomic_read(&stream->buffer[rb_idx].size); ++ if (rb_size > size - copy_len) ++ break; ++ if (copy_to_user(&buffer[copy_len], stream->buffer[rb_idx].data, ++ rb_size)) { ++ copy_len = -EFAULT; ++ break; ++ } + -+#endif /* __MALI_CONTROL_TIMER_H__ */ ++ /* If the distance between read buffer index and write ++ * buffer index became more than PACKET_COUNT, then overflow ++ * happened and we need to ignore the last portion of bytes ++ * that we have just sent to user. ++ */ ++ smp_rmb(); ++ wb_idx_raw = atomic_read(&stream->wbi); + -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_dlbu.c b/drivers/gpu/arm/mali400/mali/common/mali_dlbu.c -new file mode 100644 -index 000000000..99b7f3607 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_dlbu.c -@@ -0,0 +1,213 @@ -+/* -+ * Copyright (C) 2012-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ if (wb_idx_raw - rb_idx_raw < PACKET_COUNT) { ++ copy_len += rb_size; ++ atomic_inc(&stream->rbi); ++#if MALI_UNIT_TEST ++ atomic_add(rb_size, &timeline->bytes_collected); ++#endif /* MALI_UNIT_TEST */ + -+#include "mali_dlbu.h" -+#include "mali_memory.h" -+#include "mali_pp.h" -+#include "mali_group.h" -+#include "mali_osk.h" -+#include "mali_hw_core.h" ++ } else { ++ const unsigned int new_rb_idx_raw = ++ wb_idx_raw - PACKET_COUNT + 1; ++ /* Adjust read buffer index to the next valid buffer */ ++ atomic_set(&stream->rbi, new_rb_idx_raw); ++ } ++ } + -+/** -+ * Size of DLBU registers in bytes -+ */ -+#define MALI_DLBU_SIZE 0x400 ++ mutex_unlock(&timeline->reader_lock); + -+mali_dma_addr mali_dlbu_phys_addr = 0; -+static mali_io_address mali_dlbu_cpu_addr = NULL; ++ return copy_len; ++} + +/** -+ * DLBU register numbers -+ * Used in the register read/write routines. -+ * See the hardware documentation for more information about each register ++ * kbasep_timeline_io_poll - poll timeline stream for packets ++ * @filp: Pointer to file structure ++ * @wait: Pointer to poll table ++ * ++ * Return: EPOLLIN | EPOLLRDNORM if data can be read without blocking, ++ * otherwise zero, or EPOLLHUP | EPOLLERR on error. + */ -+typedef enum mali_dlbu_register { -+ MALI_DLBU_REGISTER_MASTER_TLLIST_PHYS_ADDR = 0x0000, /**< Master tile list physical base address; -+ 31:12 Physical address to the page used for the DLBU -+ 0 DLBU enable - set this bit to 1 enables the AXI bus -+ between PPs and L2s, setting to 0 disables the router and -+ no further transactions are sent to DLBU */ -+ MALI_DLBU_REGISTER_MASTER_TLLIST_VADDR = 0x0004, /**< Master tile list virtual base address; -+ 31:12 Virtual address to the page used for the DLBU */ -+ MALI_DLBU_REGISTER_TLLIST_VBASEADDR = 0x0008, /**< Tile list virtual base address; -+ 31:12 Virtual address to the tile list. This address is used when -+ calculating the call address sent to PP.*/ -+ MALI_DLBU_REGISTER_FB_DIM = 0x000C, /**< Framebuffer dimension; -+ 23:16 Number of tiles in Y direction-1 -+ 7:0 Number of tiles in X direction-1 */ -+ MALI_DLBU_REGISTER_TLLIST_CONF = 0x0010, /**< Tile list configuration; -+ 29:28 select the size of each allocated block: 0=128 bytes, 1=256, 2=512, 3=1024 -+ 21:16 2^n number of tiles to be binned to one tile list in Y direction -+ 5:0 2^n number of tiles to be binned to one tile list in X direction */ -+ MALI_DLBU_REGISTER_START_TILE_POS = 0x0014, /**< Start tile positions; -+ 31:24 start position in Y direction for group 1 -+ 23:16 start position in X direction for group 1 -+ 15:8 start position in Y direction for group 0 -+ 7:0 start position in X direction for group 0 */ -+ MALI_DLBU_REGISTER_PP_ENABLE_MASK = 0x0018, /**< PP enable mask; -+ 7 enable PP7 for load balancing -+ 6 enable PP6 for load balancing -+ 5 enable PP5 for load balancing -+ 4 enable PP4 for load balancing -+ 3 enable PP3 for load balancing -+ 2 enable PP2 for load balancing -+ 1 enable PP1 for load balancing -+ 0 enable PP0 for load balancing */ -+} mali_dlbu_register; -+ -+typedef enum { -+ PP0ENABLE = 0, -+ PP1ENABLE, -+ PP2ENABLE, -+ PP3ENABLE, -+ PP4ENABLE, -+ PP5ENABLE, -+ PP6ENABLE, -+ PP7ENABLE -+} mali_dlbu_pp_enable; -+ -+struct mali_dlbu_core { -+ struct mali_hw_core hw_core; /**< Common for all HW cores */ -+ u32 pp_cores_mask; /**< This is a mask for the PP cores whose operation will be controlled by LBU -+ see MALI_DLBU_REGISTER_PP_ENABLE_MASK register */ -+}; -+ -+_mali_osk_errcode_t mali_dlbu_initialize(void) -+{ -+ MALI_DEBUG_PRINT(2, ("Mali DLBU: Initializing\n")); -+ -+ if (_MALI_OSK_ERR_OK == -+ mali_mmu_get_table_page(&mali_dlbu_phys_addr, -+ &mali_dlbu_cpu_addr)) { -+ return _MALI_OSK_ERR_OK; -+ } -+ -+ return _MALI_OSK_ERR_FAULT; -+} -+ -+void mali_dlbu_terminate(void) ++static __poll_t kbasep_timeline_io_poll(struct file *filp, poll_table *wait) +{ -+ MALI_DEBUG_PRINT(3, ("Mali DLBU: terminating\n")); -+ -+ if (0 != mali_dlbu_phys_addr && 0 != mali_dlbu_cpu_addr) { -+ mali_mmu_release_table_page(mali_dlbu_phys_addr, -+ mali_dlbu_cpu_addr); -+ mali_dlbu_phys_addr = 0; -+ mali_dlbu_cpu_addr = 0; -+ } -+} ++ struct kbase_tlstream *stream; ++ unsigned int rb_idx; ++ struct kbase_timeline *timeline; + -+struct mali_dlbu_core *mali_dlbu_create(const _mali_osk_resource_t *resource) -+{ -+ struct mali_dlbu_core *core = NULL; ++ KBASE_DEBUG_ASSERT(filp); ++ KBASE_DEBUG_ASSERT(wait); + -+ MALI_DEBUG_PRINT(2, ("Mali DLBU: Creating Mali dynamic load balancing unit: %s\n", resource->description)); ++ if (WARN_ON(!filp->private_data)) ++ return EPOLLHUP | EPOLLERR; + -+ core = _mali_osk_malloc(sizeof(struct mali_dlbu_core)); -+ if (NULL != core) { -+ if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALI_DLBU_SIZE)) { -+ core->pp_cores_mask = 0; -+ if (_MALI_OSK_ERR_OK == mali_dlbu_reset(core)) { -+ return core; -+ } -+ MALI_PRINT_ERROR(("Failed to reset DLBU %s\n", core->hw_core.description)); -+ mali_hw_core_delete(&core->hw_core); -+ } ++ timeline = (struct kbase_timeline *)filp->private_data; + -+ _mali_osk_free(core); -+ } else { -+ MALI_PRINT_ERROR(("Mali DLBU: Failed to allocate memory for DLBU core\n")); -+ } ++ /* If there are header bytes to copy, read will not block */ ++ if (kbasep_timeline_has_header_data(timeline)) ++ return EPOLLIN | EPOLLRDNORM; + -+ return NULL; -+} ++ poll_wait(filp, &timeline->event_queue, wait); ++ if (kbasep_timeline_io_packet_pending(timeline, &stream, &rb_idx)) ++ return EPOLLIN | EPOLLRDNORM; + -+void mali_dlbu_delete(struct mali_dlbu_core *dlbu) -+{ -+ MALI_DEBUG_ASSERT_POINTER(dlbu); -+ mali_hw_core_delete(&dlbu->hw_core); -+ _mali_osk_free(dlbu); ++ return (__poll_t)0; +} + -+_mali_osk_errcode_t mali_dlbu_reset(struct mali_dlbu_core *dlbu) ++int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) +{ -+ u32 dlbu_registers[7]; -+ _mali_osk_errcode_t err = _MALI_OSK_ERR_FAULT; -+ MALI_DEBUG_ASSERT_POINTER(dlbu); ++ /* The timeline stream file operations structure. */ ++ static const struct file_operations kbasep_tlstream_fops = { ++ .owner = THIS_MODULE, ++ .release = kbasep_timeline_io_release, ++ .read = kbasep_timeline_io_read, ++ .poll = kbasep_timeline_io_poll, ++ .fsync = kbasep_timeline_io_fsync, ++ }; ++ int err; + -+ MALI_DEBUG_PRINT(4, ("Mali DLBU: mali_dlbu_reset: %s\n", dlbu->hw_core.description)); ++ if (!timeline_is_permitted()) ++ return -EPERM; + -+ dlbu_registers[0] = mali_dlbu_phys_addr | 1; /* bit 0 enables the whole core */ -+ dlbu_registers[1] = MALI_DLBU_VIRT_ADDR; -+ dlbu_registers[2] = 0; -+ dlbu_registers[3] = 0; -+ dlbu_registers[4] = 0; -+ dlbu_registers[5] = 0; -+ dlbu_registers[6] = dlbu->pp_cores_mask; ++ if (WARN_ON(!kbdev) || (flags & ~BASE_TLSTREAM_FLAGS_MASK)) ++ return -EINVAL; + -+ /* write reset values to core registers */ -+ mali_hw_core_register_write_array_relaxed(&dlbu->hw_core, MALI_DLBU_REGISTER_MASTER_TLLIST_PHYS_ADDR, dlbu_registers, 7); ++ err = kbase_timeline_acquire(kbdev, flags); ++ if (err) ++ return err; + -+ err = _MALI_OSK_ERR_OK; ++ err = anon_inode_getfd("[mali_tlstream]", &kbasep_tlstream_fops, kbdev->timeline, ++ O_RDONLY | O_CLOEXEC); ++ if (err < 0) ++ kbase_timeline_release(kbdev->timeline); + + return err; +} + -+void mali_dlbu_update_mask(struct mali_dlbu_core *dlbu) ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++static int kbasep_timeline_io_open(struct inode *in, struct file *file) +{ -+ MALI_DEBUG_ASSERT_POINTER(dlbu); ++ struct kbase_device *const kbdev = in->i_private; + -+ mali_hw_core_register_write(&dlbu->hw_core, MALI_DLBU_REGISTER_PP_ENABLE_MASK, dlbu->pp_cores_mask); ++ if (WARN_ON(!kbdev)) ++ return -EFAULT; ++ ++ file->private_data = kbdev->timeline; ++ return kbase_timeline_acquire(kbdev, BASE_TLSTREAM_FLAGS_MASK & ++ ~BASE_TLSTREAM_JOB_DUMPING_ENABLED); +} + -+void mali_dlbu_add_group(struct mali_dlbu_core *dlbu, struct mali_group *group) ++void kbase_timeline_io_debugfs_init(struct kbase_device *const kbdev) +{ -+ struct mali_pp_core *pp_core; -+ u32 bcast_id; ++ static const struct file_operations kbasep_tlstream_debugfs_fops = { ++ .owner = THIS_MODULE, ++ .open = kbasep_timeline_io_open, ++ .release = kbasep_timeline_io_release, ++ .read = kbasep_timeline_io_read, ++ .poll = kbasep_timeline_io_poll, ++ .fsync = kbasep_timeline_io_fsync, ++ }; ++ struct dentry *file; + -+ MALI_DEBUG_ASSERT_POINTER(dlbu); -+ MALI_DEBUG_ASSERT_POINTER(group); ++ if (WARN_ON(!kbdev) || WARN_ON(IS_ERR_OR_NULL(kbdev->mali_debugfs_directory))) ++ return; + -+ pp_core = mali_group_get_pp_core(group); -+ bcast_id = mali_pp_core_get_bcast_id(pp_core); ++ file = debugfs_create_file("tlstream", 0400, kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_tlstream_debugfs_fops); + -+ dlbu->pp_cores_mask |= bcast_id; -+ MALI_DEBUG_PRINT(3, ("Mali DLBU: Adding core[%d] New mask= 0x%02x\n", bcast_id , dlbu->pp_cores_mask)); ++ if (IS_ERR_OR_NULL(file)) ++ dev_warn(kbdev->dev, "Unable to create timeline debugfs entry"); +} -+ -+/* Remove a group from the DLBU */ -+void mali_dlbu_remove_group(struct mali_dlbu_core *dlbu, struct mali_group *group) ++#else ++/* ++ * Stub function for when debugfs is disabled ++ */ ++void kbase_timeline_io_debugfs_init(struct kbase_device *const kbdev) +{ -+ struct mali_pp_core *pp_core; -+ u32 bcast_id; -+ -+ MALI_DEBUG_ASSERT_POINTER(dlbu); -+ MALI_DEBUG_ASSERT_POINTER(group); ++} ++#endif + -+ pp_core = mali_group_get_pp_core(group); -+ bcast_id = mali_pp_core_get_bcast_id(pp_core); ++/** ++ * kbasep_timeline_io_release - release timeline stream descriptor ++ * @inode: Pointer to inode structure ++ * @filp: Pointer to file structure ++ * ++ * Return: always return zero ++ */ ++static int kbasep_timeline_io_release(struct inode *inode, struct file *filp) ++{ ++ CSTD_UNUSED(inode); + -+ dlbu->pp_cores_mask &= ~bcast_id; -+ MALI_DEBUG_PRINT(3, ("Mali DLBU: Removing core[%d] New mask= 0x%02x\n", bcast_id, dlbu->pp_cores_mask)); ++ kbase_timeline_release(filp->private_data); ++ return 0; +} + -+/* Configure the DLBU for \a job. This needs to be done before the job is started on the groups in the DLBU. */ -+void mali_dlbu_config_job(struct mali_dlbu_core *dlbu, struct mali_pp_job *job) ++static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end, ++ int datasync) +{ -+ u32 *registers; -+ MALI_DEBUG_ASSERT(job); -+ registers = mali_pp_job_get_dlbu_registers(job); -+ MALI_DEBUG_PRINT(4, ("Mali DLBU: Starting job\n")); -+ -+ /* Writing 4 registers: -+ * DLBU registers except the first two (written once at DLBU initialisation / reset) and the PP_ENABLE_MASK register */ -+ mali_hw_core_register_write_array_relaxed(&dlbu->hw_core, MALI_DLBU_REGISTER_TLLIST_VBASEADDR, registers, 4); ++ CSTD_UNUSED(start); ++ CSTD_UNUSED(end); ++ CSTD_UNUSED(datasync); + ++ return kbase_timeline_streams_flush(filp->private_data); +} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_dlbu.h b/drivers/gpu/arm/mali400/mali/common/mali_dlbu.h +diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h new file mode 100644 -index 000000000..a7ecf4147 +index 000000000..de30bccc7 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_dlbu.h -@@ -0,0 +1,45 @@ ++++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h +@@ -0,0 +1,103 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* -+ * Copyright (C) 2012-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ * ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * + */ + -+#ifndef __MALI_DLBU_H__ -+#define __MALI_DLBU_H__ -+ -+#define MALI_DLBU_VIRT_ADDR 0xFFF00000 /* master tile virtual address fixed at this value and mapped into every session */ -+ -+#include "mali_osk.h" ++#if !defined(_KBASE_TIMELINE_PRIV_H) ++#define _KBASE_TIMELINE_PRIV_H + -+struct mali_pp_job; -+struct mali_group; -+struct mali_dlbu_core; ++#include ++#include "mali_kbase_tlstream.h" + -+extern mali_dma_addr mali_dlbu_phys_addr; ++#if MALI_USE_CSF ++#include "csf/mali_kbase_csf_tl_reader.h" ++#include "csf/mali_kbase_csf_trace_buffer.h" ++#endif + -+_mali_osk_errcode_t mali_dlbu_initialize(void); -+void mali_dlbu_terminate(void); ++#include ++#include ++#include + -+struct mali_dlbu_core *mali_dlbu_create(const _mali_osk_resource_t *resource); -+void mali_dlbu_delete(struct mali_dlbu_core *dlbu); ++/* The minimum amount of time timeline must be acquired for before release is ++ * allowed, to prevent DoS attacks. ++ */ ++#define TIMELINE_HYSTERESIS_TIMEOUT_MS ((s64)500) + -+_mali_osk_errcode_t mali_dlbu_reset(struct mali_dlbu_core *dlbu); ++/** ++ * struct kbase_timeline - timeline state structure ++ * @streams: The timeline streams generated by kernel ++ * @tl_kctx_list: List of contexts for timeline. ++ * @tl_kctx_list_lock: Lock to protect @tl_kctx_list. ++ * @autoflush_timer: Autoflush timer ++ * @autoflush_timer_active: If non-zero autoflush timer is active ++ * @reader_lock: Reader lock. Only one reader is allowed to ++ * have access to the timeline streams at any given time. ++ * @event_queue: Timeline stream event queue ++ * @bytes_collected: Number of bytes read by user ++ * @timeline_flags: Zero, if timeline is disabled. Timeline stream flags ++ * otherwise. See kbase_timeline_acquire(). ++ * @obj_header_btc: Remaining bytes to copy for the object stream header ++ * @aux_header_btc: Remaining bytes to copy for the aux stream header ++ * @last_acquire_time: The time at which timeline was last acquired. ++ * @csf_tl_reader: CSFFW timeline reader ++ */ ++struct kbase_timeline { ++ struct kbase_tlstream streams[TL_STREAM_TYPE_COUNT]; ++ struct list_head tl_kctx_list; ++ struct mutex tl_kctx_list_lock; ++ struct timer_list autoflush_timer; ++ atomic_t autoflush_timer_active; ++ struct mutex reader_lock; ++ wait_queue_head_t event_queue; ++#if MALI_UNIT_TEST ++ atomic_t bytes_collected; ++#endif /* MALI_UNIT_TEST */ ++ atomic_t *timeline_flags; ++ size_t obj_header_btc; ++ size_t aux_header_btc; ++ ktime_t last_acquire_time; ++#if MALI_USE_CSF ++ struct kbase_csf_tl_reader csf_tl_reader; ++#endif ++}; + -+void mali_dlbu_add_group(struct mali_dlbu_core *dlbu, struct mali_group *group); -+void mali_dlbu_remove_group(struct mali_dlbu_core *dlbu, struct mali_group *group); ++void kbase_create_timeline_objects(struct kbase_device *kbdev); + -+/** @brief Called to update HW after DLBU state changed ++/** ++ * kbase_timeline_acquire - acquire timeline for a userspace client. ++ * @kbdev: An instance of the GPU platform device, allocated from the probe ++ * method of the driver. ++ * @flags: Timeline stream flags + * -+ * This function must be called after \a mali_dlbu_add_group or \a -+ * mali_dlbu_remove_group to write the updated mask to hardware, unless the -+ * same is accomplished by calling \a mali_dlbu_reset. ++ * Each timeline instance can be acquired by only one userspace client at a time. ++ * ++ * Return: Zero on success, error number on failure (e.g. if already acquired). + */ -+void mali_dlbu_update_mask(struct mali_dlbu_core *dlbu); ++int kbase_timeline_acquire(struct kbase_device *kbdev, u32 flags); + -+void mali_dlbu_config_job(struct mali_dlbu_core *dlbu, struct mali_pp_job *job); ++/** ++ * kbase_timeline_release - release timeline for a userspace client. ++ * @timeline: Timeline instance to be stopped. It must be previously acquired ++ * with kbase_timeline_acquire(). ++ * ++ * Releasing the timeline instance allows it to be acquired by another userspace client. ++ */ ++void kbase_timeline_release(struct kbase_timeline *timeline); + -+#endif /* __MALI_DLBU_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_dvfs_policy.c b/drivers/gpu/arm/mali400/mali/common/mali_dvfs_policy.c ++#endif /* _KBASE_TIMELINE_PRIV_H */ +diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tl_serialize.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tl_serialize.h new file mode 100644 -index 000000000..55b21a410 +index 000000000..b6aaadedc --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_dvfs_policy.c -@@ -0,0 +1,308 @@ ++++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tl_serialize.h +@@ -0,0 +1,123 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* -+ * Copyright (C) 2010-2012, 2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ * ++ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * + */ + -+#include -+#include "mali_kernel_common.h" -+#include "mali_scheduler.h" -+#include "mali_dvfs_policy.h" -+#include "mali_osk_mali.h" -+#include "mali_osk_profiling.h" -+ -+#define CLOCK_TUNING_TIME_DEBUG 0 ++#if !defined(_KBASE_TL_SERIALIZE_H) ++#define _KBASE_TL_SERIALIZE_H + -+#define MAX_PERFORMANCE_VALUE 256 -+#define MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(percent) ((int) ((percent)*(MAX_PERFORMANCE_VALUE)/100.0 + 0.5)) ++#include + -+/** The max fps the same as display vsync default 60, can set by module insert parameter */ -+int mali_max_system_fps = 60; -+/** A lower limit on their desired FPS default 58, can set by module insert parameter */ -+int mali_desired_fps = 58; ++#include + -+static int mali_fps_step1 = 0; -+static int mali_fps_step2 = 0; ++/* The number of nanoseconds in a second. */ ++#define NSECS_IN_SEC 1000000000ull /* ns */ + -+static int clock_step = -1; -+static int cur_clk_step = -1; -+static struct mali_gpu_clock *gpu_clk = NULL; ++/** ++ * kbasep_serialize_bytes - serialize bytes to the message buffer ++ * ++ * @buffer: Message buffer ++ * @pos: Message buffer offset ++ * @bytes: Bytes to serialize ++ * @len: Length of bytes array ++ * ++ * Serialize bytes as if using memcpy(). ++ * ++ * Return: updated position in the buffer ++ */ ++static inline size_t kbasep_serialize_bytes( ++ char *buffer, ++ size_t pos, ++ const void *bytes, ++ size_t len) ++{ ++ KBASE_DEBUG_ASSERT(buffer); ++ KBASE_DEBUG_ASSERT(bytes); + -+/*Function prototype */ -+static int (*mali_gpu_set_freq)(int) = NULL; -+static int (*mali_gpu_get_freq)(void) = NULL; ++ memcpy(&buffer[pos], bytes, len); + -+static mali_bool mali_dvfs_enabled = MALI_FALSE; ++ return pos + len; ++} + -+#define NUMBER_OF_NANOSECONDS_PER_SECOND 1000000000ULL -+static u32 calculate_window_render_fps(u64 time_period) ++/** ++ * kbasep_serialize_string - serialize string to the message buffer ++ * ++ * @buffer: Message buffer ++ * @pos: Message buffer offset ++ * @string: String to serialize ++ * @max_write_size: Number of bytes that can be stored in buffer ++ * ++ * String is serialized as 4 bytes for string size, ++ * then string content and then null terminator. ++ * ++ * Return: updated position in the buffer ++ */ ++static inline size_t kbasep_serialize_string( ++ char *buffer, ++ size_t pos, ++ const char *string, ++ size_t max_write_size) +{ -+ u32 max_window_number; -+ u64 tmp; -+ u64 max = time_period; -+ u32 leading_zeroes; -+ u32 shift_val; -+ u32 time_period_shift; -+ u32 max_window_number_shift; -+ u32 ret_val; -+ -+ max_window_number = mali_session_max_window_num(); -+ -+ /* To avoid float division, extend the dividend to ns unit */ -+ tmp = (u64)max_window_number * NUMBER_OF_NANOSECONDS_PER_SECOND; -+ if (tmp > time_period) { -+ max = tmp; -+ } ++ u32 string_len; + -+ /* -+ * We may have 64-bit values, a dividend or a divisor or both -+ * To avoid dependencies to a 64-bit divider, we shift down the two values -+ * equally first. ++ KBASE_DEBUG_ASSERT(buffer); ++ KBASE_DEBUG_ASSERT(string); ++ /* Timeline string consists of at least string length and nul ++ * terminator. + */ -+ leading_zeroes = _mali_osk_clz((u32)(max >> 32)); -+ shift_val = 32 - leading_zeroes; ++ KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char)); ++ max_write_size -= sizeof(string_len); + -+ time_period_shift = (u32)(time_period >> shift_val); -+ max_window_number_shift = (u32)(tmp >> shift_val); ++ string_len = strscpy( ++ &buffer[pos + sizeof(string_len)], ++ string, ++ max_write_size); ++ string_len += sizeof(char); + -+ ret_val = max_window_number_shift / time_period_shift; ++ /* Make sure that the source string fit into the buffer. */ ++ KBASE_DEBUG_ASSERT(string_len <= max_write_size); + -+ return ret_val; ++ /* Update string length. */ ++ memcpy(&buffer[pos], &string_len, sizeof(string_len)); ++ ++ return pos + sizeof(string_len) + string_len; +} + -+static bool mali_pickup_closest_avail_clock(int target_clock_mhz, mali_bool pick_clock_up) ++/** ++ * kbasep_serialize_timestamp - serialize timestamp to the message buffer ++ * ++ * @buffer: Message buffer ++ * @pos: Message buffer offset ++ * ++ * Get current timestamp using kbasep_get_timestamp() ++ * and serialize it as 64 bit unsigned integer. ++ * ++ * Return: updated position in the buffer ++ */ ++static inline size_t kbasep_serialize_timestamp(void *buffer, size_t pos) +{ -+ int i = 0; -+ bool clock_changed = false; ++ u64 timestamp; + -+ /* Round up the closest available frequency step for target_clock_hz */ -+ for (i = 0; i < gpu_clk->num_of_steps; i++) { -+ /* Find the first item > target_clock_hz */ -+ if (((int)(gpu_clk->item[i].clock) - target_clock_mhz) > 0) { -+ break; -+ } -+ } ++ timestamp = ktime_get_raw_ns(); + -+ /* If the target clock greater than the maximum clock just pick the maximum one*/ -+ if (i == gpu_clk->num_of_steps) { -+ i = gpu_clk->num_of_steps - 1; -+ } else { -+ if ((!pick_clock_up) && (i > 0)) { -+ i = i - 1; -+ } -+ } ++ return kbasep_serialize_bytes( ++ buffer, pos, ++ ×tamp, sizeof(timestamp)); ++} ++#endif /* _KBASE_TL_SERIALIZE_H */ +diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.c +new file mode 100644 +index 000000000..47059deb4 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.c +@@ -0,0 +1,313 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ clock_step = i; -+ if (cur_clk_step != clock_step) { -+ clock_changed = true; -+ } ++#include "mali_kbase_tlstream.h" ++#include "mali_kbase_tl_serialize.h" ++#include "mali_kbase_mipe_proto.h" + -+ return clock_changed; ++/** ++ * kbasep_packet_header_setup - setup the packet header ++ * @buffer: pointer to the buffer ++ * @pkt_family: packet's family ++ * @pkt_type: packet's type ++ * @pkt_class: packet's class ++ * @stream_id: stream id ++ * @numbered: non-zero if this stream is numbered ++ * ++ * Function sets up immutable part of packet header in the given buffer. ++ */ ++static void kbasep_packet_header_setup( ++ char *buffer, ++ enum tl_packet_family pkt_family, ++ enum tl_packet_class pkt_class, ++ enum tl_packet_type pkt_type, ++ unsigned int stream_id, ++ int numbered) ++{ ++ u32 words[2] = { ++ MIPE_PACKET_HEADER_W0(pkt_family, pkt_class, pkt_type, stream_id), ++ MIPE_PACKET_HEADER_W1(0, !!numbered), ++ }; ++ memcpy(buffer, words, sizeof(words)); +} + -+void mali_dvfs_policy_realize(struct mali_gpu_utilization_data *data, u64 time_period) ++/** ++ * kbasep_packet_header_update - update the packet header ++ * @buffer: pointer to the buffer ++ * @data_size: amount of data carried in this packet ++ * @numbered: non-zero if the stream is numbered ++ * ++ * Function updates mutable part of packet header in the given buffer. ++ * Note that value of data_size must not include size of the header. ++ */ ++static void kbasep_packet_header_update( ++ char *buffer, ++ size_t data_size, ++ int numbered) +{ -+ int under_perform_boundary_value = 0; -+ int over_perform_boundary_value = 0; -+ int current_fps = 0; -+ int current_gpu_util = 0; -+ bool clock_changed = false; -+#if CLOCK_TUNING_TIME_DEBUG -+ struct timeval start; -+ struct timeval stop; -+ unsigned int elapse_time; -+ do_gettimeofday(&start); -+#endif -+ u32 window_render_fps; -+ -+ if (NULL == gpu_clk) { -+ MALI_DEBUG_PRINT(2, ("Enable DVFS but patform doesn't Support freq change. \n")); -+ return; -+ } -+ -+ window_render_fps = calculate_window_render_fps(time_period); -+ -+ current_fps = window_render_fps; -+ current_gpu_util = data->utilization_gpu; ++ u32 word1 = MIPE_PACKET_HEADER_W1((u32)data_size, !!numbered); + -+ /* Get the specific under_perform_boundary_value and over_perform_boundary_value */ -+ if ((mali_desired_fps <= current_fps) && (current_fps < mali_max_system_fps)) { -+ under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(90); -+ over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(70); -+ } else if ((mali_fps_step1 <= current_fps) && (current_fps < mali_desired_fps)) { -+ under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(55); -+ over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(35); -+ } else if ((mali_fps_step2 <= current_fps) && (current_fps < mali_fps_step1)) { -+ under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(70); -+ over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(50); -+ } else { -+ under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(55); -+ over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(35); -+ } ++ KBASE_DEBUG_ASSERT(buffer); + -+ MALI_DEBUG_PRINT(5, ("Using ARM power policy: gpu util = %d \n", current_gpu_util)); -+ MALI_DEBUG_PRINT(5, ("Using ARM power policy: under_perform = %d, over_perform = %d \n", under_perform_boundary_value, over_perform_boundary_value)); -+ MALI_DEBUG_PRINT(5, ("Using ARM power policy: render fps = %d, pressure render fps = %d \n", current_fps, window_render_fps)); ++ /* we copy the contents of word1 to its respective position in the buffer */ ++ memcpy(&buffer[sizeof(u32)], &word1, sizeof(word1)); ++} + -+ /* Get current clock value */ -+ cur_clk_step = mali_gpu_get_freq(); ++/** ++ * kbasep_packet_number_update - update the packet number ++ * @buffer: pointer to the buffer ++ * @counter: value of packet counter for this packet's stream ++ * ++ * Function updates packet number embedded within the packet placed in the ++ * given buffer. ++ */ ++static void kbasep_packet_number_update(char *buffer, u32 counter) ++{ ++ KBASE_DEBUG_ASSERT(buffer); + -+ /* Consider offscreen */ -+ if (0 == current_fps) { -+ /* GP or PP under perform, need to give full power */ -+ if (current_gpu_util > over_perform_boundary_value) { -+ if (cur_clk_step != gpu_clk->num_of_steps - 1) { -+ clock_changed = true; -+ clock_step = gpu_clk->num_of_steps - 1; -+ } -+ } ++ memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter)); ++} + -+ /* If GPU is idle, use lowest power */ -+ if (0 == current_gpu_util) { -+ if (cur_clk_step != 0) { -+ clock_changed = true; -+ clock_step = 0; -+ } -+ } ++void kbase_tlstream_reset(struct kbase_tlstream *stream) ++{ ++ unsigned int i; + -+ goto real_setting; ++ for (i = 0; i < PACKET_COUNT; i++) { ++ if (stream->numbered) ++ atomic_set( ++ &stream->buffer[i].size, ++ PACKET_HEADER_SIZE + ++ PACKET_NUMBER_SIZE); ++ else ++ atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE); + } + -+ /* 2. Calculate target clock if the GPU clock can be tuned */ -+ if (-1 != cur_clk_step) { -+ int target_clk_mhz = -1; -+ mali_bool pick_clock_up = MALI_TRUE; ++ atomic_set(&stream->wbi, 0); ++ atomic_set(&stream->rbi, 0); ++} + -+ if (current_gpu_util > under_perform_boundary_value) { -+ /* when under perform, need to consider the fps part */ -+ target_clk_mhz = gpu_clk->item[cur_clk_step].clock * current_gpu_util * mali_desired_fps / under_perform_boundary_value / current_fps; -+ pick_clock_up = MALI_TRUE; -+ } else if (current_gpu_util < over_perform_boundary_value) { -+ /* when over perform, did't need to consider fps, system didn't want to reach desired fps */ -+ target_clk_mhz = gpu_clk->item[cur_clk_step].clock * current_gpu_util / under_perform_boundary_value; -+ pick_clock_up = MALI_FALSE; -+ } ++/* Configuration of timeline streams generated by kernel. */ ++static const struct { ++ enum tl_packet_family pkt_family; ++ enum tl_packet_class pkt_class; ++ enum tl_packet_type pkt_type; ++ enum tl_stream_id stream_id; ++} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = { ++ { ++ TL_PACKET_FAMILY_TL, ++ TL_PACKET_CLASS_OBJ, ++ TL_PACKET_TYPE_SUMMARY, ++ TL_STREAM_ID_KERNEL, ++ }, ++ { ++ TL_PACKET_FAMILY_TL, ++ TL_PACKET_CLASS_OBJ, ++ TL_PACKET_TYPE_BODY, ++ TL_STREAM_ID_KERNEL, ++ }, ++ { ++ TL_PACKET_FAMILY_TL, ++ TL_PACKET_CLASS_AUX, ++ TL_PACKET_TYPE_BODY, ++ TL_STREAM_ID_KERNEL, ++ }, ++#if MALI_USE_CSF ++ { ++ TL_PACKET_FAMILY_TL, ++ TL_PACKET_CLASS_OBJ, ++ TL_PACKET_TYPE_BODY, ++ TL_STREAM_ID_CSFFW, ++ }, ++#endif ++}; + -+ if (-1 != target_clk_mhz) { -+ clock_changed = mali_pickup_closest_avail_clock(target_clk_mhz, pick_clock_up); -+ } -+ } ++void kbase_tlstream_init( ++ struct kbase_tlstream *stream, ++ enum tl_stream_type stream_type, ++ wait_queue_head_t *ready_read) ++{ ++ unsigned int i; + -+real_setting: -+ if (clock_changed) { -+ mali_gpu_set_freq(clock_step); ++ KBASE_DEBUG_ASSERT(stream); ++ KBASE_DEBUG_ASSERT(stream_type < TL_STREAM_TYPE_COUNT); + -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | -+ MALI_PROFILING_EVENT_CHANNEL_GPU | -+ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, -+ gpu_clk->item[clock_step].clock, -+ gpu_clk->item[clock_step].vol / 1000, -+ 0, 0, 0); -+ } ++ spin_lock_init(&stream->lock); + -+#if CLOCK_TUNING_TIME_DEBUG -+ do_gettimeofday(&stop); ++ /* All packets carrying tracepoints shall be numbered. */ ++ if (tl_stream_cfg[stream_type].pkt_type == TL_PACKET_TYPE_BODY) ++ stream->numbered = 1; ++ else ++ stream->numbered = 0; + -+ elapse_time = timeval_to_ns(&stop) - timeval_to_ns(&start); -+ MALI_DEBUG_PRINT(2, ("Using ARM power policy: eclapse time = %d\n", elapse_time)); ++ for (i = 0; i < PACKET_COUNT; i++) ++ kbasep_packet_header_setup( ++ stream->buffer[i].data, ++ tl_stream_cfg[stream_type].pkt_family, ++ tl_stream_cfg[stream_type].pkt_class, ++ tl_stream_cfg[stream_type].pkt_type, ++ tl_stream_cfg[stream_type].stream_id, ++ stream->numbered); ++ ++#if MALI_UNIT_TEST ++ atomic_set(&stream->bytes_generated, 0); +#endif ++ stream->ready_read = ready_read; ++ ++ kbase_tlstream_reset(stream); +} + -+_mali_osk_errcode_t mali_dvfs_policy_init(void) ++void kbase_tlstream_term(struct kbase_tlstream *stream) +{ -+ _mali_osk_device_data data; -+ _mali_osk_errcode_t err = _MALI_OSK_ERR_OK; ++ KBASE_DEBUG_ASSERT(stream); ++} + -+ if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) { -+ if ((NULL != data.get_clock_info) && (NULL != data.set_freq) && (NULL != data.get_freq)) { -+ MALI_DEBUG_PRINT(2, ("Mali DVFS init: using arm dvfs policy \n")); ++/** ++ * kbasep_tlstream_msgbuf_submit - submit packet to user space ++ * @stream: Pointer to the stream structure ++ * @wb_idx_raw: Write buffer index ++ * @wb_size: Length of data stored in the current buffer ++ * ++ * Updates currently written buffer with the packet header. ++ * Then write index is incremented and the buffer is handed to user space. ++ * Parameters of the new buffer are returned using provided arguments. ++ * ++ * Return: length of data in the new buffer ++ * ++ * Warning: the user must update the stream structure with returned value. ++ */ ++static size_t kbasep_tlstream_msgbuf_submit( ++ struct kbase_tlstream *stream, ++ unsigned int wb_idx_raw, ++ unsigned int wb_size) ++{ ++ unsigned int wb_idx = wb_idx_raw % PACKET_COUNT; + ++ /* Set stream as flushed. */ ++ atomic_set(&stream->autoflush_counter, -1); + -+ mali_fps_step1 = mali_max_system_fps / 3; -+ mali_fps_step2 = mali_max_system_fps / 5; ++ kbasep_packet_header_update( ++ stream->buffer[wb_idx].data, ++ wb_size - PACKET_HEADER_SIZE, ++ stream->numbered); + -+ data.get_clock_info(&gpu_clk); ++ if (stream->numbered) ++ kbasep_packet_number_update( ++ stream->buffer[wb_idx].data, ++ wb_idx_raw); + -+ if (gpu_clk != NULL) { -+#ifdef DEBUG -+ int i; -+ for (i = 0; i < gpu_clk->num_of_steps; i++) { -+ MALI_DEBUG_PRINT(5, ("mali gpu clock info: step%d clock(%d)Hz,vol(%d) \n", -+ i, gpu_clk->item[i].clock, gpu_clk->item[i].vol)); -+ } -+#endif -+ } else { -+ MALI_DEBUG_PRINT(2, ("Mali DVFS init: platform didn't define enough info for ddk to do DVFS \n")); -+ } ++ /* Increasing write buffer index will expose this packet to the reader. ++ * As stream->lock is not taken on reader side we must make sure memory ++ * is updated correctly before this will happen. ++ */ ++ smp_wmb(); ++ atomic_inc(&stream->wbi); + -+ mali_gpu_get_freq = data.get_freq; -+ mali_gpu_set_freq = data.set_freq; ++ /* Inform user that packets are ready for reading. */ ++ wake_up_interruptible(stream->ready_read); + -+ if ((NULL != gpu_clk) && (gpu_clk->num_of_steps > 0) -+ && (NULL != mali_gpu_get_freq) && (NULL != mali_gpu_set_freq)) { -+ mali_dvfs_enabled = MALI_TRUE; -+ } -+ } else { -+ MALI_DEBUG_PRINT(2, ("Mali DVFS init: platform function callback incomplete, need check mali_gpu_device_data in platform .\n")); -+ } -+ } else { -+ err = _MALI_OSK_ERR_FAULT; -+ MALI_DEBUG_PRINT(2, ("Mali DVFS init: get platform data error .\n")); -+ } ++ wb_size = PACKET_HEADER_SIZE; ++ if (stream->numbered) ++ wb_size += PACKET_NUMBER_SIZE; + -+ return err; ++ return wb_size; +} + -+/* -+ * Always give full power when start a new period, -+ * if mali dvfs enabled, for performance consideration -+ */ -+void mali_dvfs_policy_new_period(void) ++char *kbase_tlstream_msgbuf_acquire( ++ struct kbase_tlstream *stream, ++ size_t msg_size, ++ unsigned long *flags) __acquires(&stream->lock) +{ -+ /* Always give full power when start a new period */ -+ unsigned int cur_clk_step = 0; ++ unsigned int wb_idx_raw; ++ unsigned int wb_idx; ++ size_t wb_size; + -+ cur_clk_step = mali_gpu_get_freq(); ++ KBASE_DEBUG_ASSERT( ++ PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >= ++ msg_size); + -+ if (cur_clk_step != (gpu_clk->num_of_steps - 1)) { -+ mali_gpu_set_freq(gpu_clk->num_of_steps - 1); ++ spin_lock_irqsave(&stream->lock, *flags); + -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | -+ MALI_PROFILING_EVENT_CHANNEL_GPU | -+ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, gpu_clk->item[gpu_clk->num_of_steps - 1].clock, -+ gpu_clk->item[gpu_clk->num_of_steps - 1].vol / 1000, 0, 0, 0); ++ wb_idx_raw = atomic_read(&stream->wbi); ++ wb_idx = wb_idx_raw % PACKET_COUNT; ++ wb_size = atomic_read(&stream->buffer[wb_idx].size); ++ ++ /* Select next buffer if data will not fit into current one. */ ++ if (wb_size + msg_size > PACKET_SIZE) { ++ wb_size = kbasep_tlstream_msgbuf_submit( ++ stream, wb_idx_raw, wb_size); ++ wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; + } ++ ++ /* Reserve space in selected buffer. */ ++ atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size); ++ ++#if MALI_UNIT_TEST ++ atomic_add(msg_size, &stream->bytes_generated); ++#endif /* MALI_UNIT_TEST */ ++ ++ return &stream->buffer[wb_idx].data[wb_size]; +} + -+mali_bool mali_dvfs_policy_enabled(void) ++void kbase_tlstream_msgbuf_release( ++ struct kbase_tlstream *stream, ++ unsigned long flags) __releases(&stream->lock) +{ -+ return mali_dvfs_enabled; ++ /* Mark stream as containing unflushed data. */ ++ atomic_set(&stream->autoflush_counter, 0); ++ ++ spin_unlock_irqrestore(&stream->lock, flags); +} + -+#if defined(CONFIG_MALI400_PROFILING) -+void mali_get_current_gpu_clk_item(struct mali_gpu_clk_item *clk_item) ++size_t kbase_tlstream_flush_stream( ++ struct kbase_tlstream *stream) +{ -+ if (mali_platform_device != NULL) { ++ unsigned long flags; ++ unsigned int wb_idx_raw; ++ unsigned int wb_idx; ++ size_t wb_size; ++ size_t min_size = PACKET_HEADER_SIZE; + -+ struct mali_gpu_device_data *device_data = NULL; -+ device_data = (struct mali_gpu_device_data *)mali_platform_device->dev.platform_data; + -+ if ((NULL != device_data->get_clock_info) && (NULL != device_data->get_freq)) { ++ if (stream->numbered) ++ min_size += PACKET_NUMBER_SIZE; + -+ int cur_clk_step = device_data->get_freq(); -+ struct mali_gpu_clock *mali_gpu_clk = NULL; ++ spin_lock_irqsave(&stream->lock, flags); + -+ device_data->get_clock_info(&mali_gpu_clk); -+ clk_item->clock = mali_gpu_clk->item[cur_clk_step].clock; -+ clk_item->vol = mali_gpu_clk->item[cur_clk_step].vol; -+ } else { -+ MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: platform function callback incomplete, need check mali_gpu_device_data in platform .\n")); -+ } ++ wb_idx_raw = atomic_read(&stream->wbi); ++ wb_idx = wb_idx_raw % PACKET_COUNT; ++ wb_size = atomic_read(&stream->buffer[wb_idx].size); ++ ++ if (wb_size > min_size) { ++ wb_size = kbasep_tlstream_msgbuf_submit( ++ stream, wb_idx_raw, wb_size); ++ wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; ++ atomic_set(&stream->buffer[wb_idx].size, wb_size); ++ } else { ++ /* we return that there is no bytes to be read.*/ ++ /* Timeline io fsync will use this info the decide whether ++ * fsync should return an error ++ */ ++ wb_size = 0; + } -+} -+#endif + -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_dvfs_policy.h b/drivers/gpu/arm/mali400/mali/common/mali_dvfs_policy.h ++ spin_unlock_irqrestore(&stream->lock, flags); ++ return wb_size; ++} +diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h new file mode 100644 -index 000000000..662348c4e +index 000000000..c1428495b --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_dvfs_policy.h -@@ -0,0 +1,34 @@ ++++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h +@@ -0,0 +1,168 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* -+ * Copyright (C) 2010-2012, 2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ * ++ * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * + */ + -+#ifndef __MALI_DVFS_POLICY_H__ -+#define __MALI_DVFS_POLICY_H__ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+void mali_dvfs_policy_realize(struct mali_gpu_utilization_data *data, u64 time_period); -+ -+_mali_osk_errcode_t mali_dvfs_policy_init(void); -+ -+void mali_dvfs_policy_new_period(void); ++#if !defined(_KBASE_TLSTREAM_H) ++#define _KBASE_TLSTREAM_H + -+mali_bool mali_dvfs_policy_enabled(void); ++#include ++#include ++#include + -+#if defined(CONFIG_MALI400_PROFILING) -+void mali_get_current_gpu_clk_item(struct mali_gpu_clk_item *clk_item); -+#endif ++/* The maximum size of a single packet used by timeline. */ ++#define PACKET_SIZE 4096 /* bytes */ + -+#ifdef __cplusplus -+} -+#endif ++/* The number of packets used by one timeline stream. */ ++#define PACKET_COUNT 128 + -+#endif/* __MALI_DVFS_POLICY_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_executor.c b/drivers/gpu/arm/mali400/mali/common/mali_executor.c -new file mode 100644 -index 000000000..0cf1ec0b0 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_executor.c -@@ -0,0 +1,2707 @@ -+/* -+ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++/* The maximum expected length of string in tracepoint descriptor. */ ++#define STRLEN_MAX 64 /* bytes */ + -+#include "mali_executor.h" -+#include "mali_scheduler.h" -+#include "mali_kernel_common.h" -+#include "mali_kernel_core.h" -+#include "mali_osk.h" -+#include "mali_osk_list.h" -+#include "mali_pp.h" -+#include "mali_pp_job.h" -+#include "mali_group.h" -+#include "mali_pm.h" -+#include "mali_timeline.h" -+#include "mali_osk_profiling.h" -+#include "mali_session.h" -+#include "mali_osk_mali.h" ++/** ++ * struct kbase_tlstream - timeline stream structure ++ * @lock: Message order lock ++ * @buffer: Array of buffers ++ * @buffer.size: Number of bytes in buffer ++ * @buffer.data: Buffer's data ++ * @wbi: Write buffer index ++ * @rbi: Read buffer index ++ * @numbered: If non-zero stream's packets are sequentially numbered ++ * @autoflush_counter: Counter tracking stream's autoflush state ++ * @ready_read: Pointer to a wait queue, which is signaled when ++ * timeline messages are ready for collection. ++ * @bytes_generated: Number of bytes generated by tracepoint messages ++ * ++ * This structure holds information needed to construct proper packets in the ++ * timeline stream. ++ * ++ * Each message in the sequence must bear a timestamp that is ++ * greater than the previous message in the same stream. For this reason ++ * a lock is held throughout the process of message creation. ++ * ++ * Each stream contains a set of buffers. Each buffer will hold one MIPE ++ * packet. In case there is no free space required to store the incoming ++ * message the oldest buffer is discarded. Each packet in timeline body ++ * stream has a sequence number embedded, this value must increment ++ * monotonically and is used by the packets receiver to discover these ++ * buffer overflows. ++ * ++ * The autoflush counter is set to a negative number when there is no data ++ * pending for flush and it is set to zero on every update of the buffer. The ++ * autoflush timer will increment the counter by one on every expiry. If there ++ * is no activity on the buffer for two consecutive timer expiries, the stream ++ * buffer will be flushed. ++ */ ++struct kbase_tlstream { ++ spinlock_t lock; + -+/*Add for voltage scan function*/ -+extern u32 mali_group_error; ++ struct { ++ atomic_t size; ++ char data[PACKET_SIZE]; ++ } buffer[PACKET_COUNT]; + -+/* -+ * If dma_buf with map on demand is used, we defer job deletion and job queue -+ * if in atomic context, since both might sleep. -+ */ -+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) -+#define MALI_EXECUTOR_USE_DEFERRED_PP_JOB_DELETE 1 -+#define MALI_EXECUTOR_USE_DEFERRED_PP_JOB_QUEUE 1 -+#endif /* !defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) */ ++ atomic_t wbi; ++ atomic_t rbi; + -+/* -+ * ---------- static type definitions (structs, enums, etc) ---------- -+ */ ++ int numbered; ++ atomic_t autoflush_counter; ++ wait_queue_head_t *ready_read; ++#if MALI_UNIT_TEST ++ atomic_t bytes_generated; ++#endif ++}; + -+enum mali_executor_state_t { -+ EXEC_STATE_NOT_PRESENT, /* Virtual group on Mali-300/400 (do not use) */ -+ EXEC_STATE_DISABLED, /* Disabled by core scaling (do not use) */ -+ EXEC_STATE_EMPTY, /* No child groups for virtual group (do not use) */ -+ EXEC_STATE_INACTIVE, /* Can be used, but must be activate first */ -+ EXEC_STATE_IDLE, /* Active and ready to be used */ -+ EXEC_STATE_WORKING, /* Executing a job */ ++/* Types of streams generated by timeline. */ ++enum tl_stream_type { ++ TL_STREAM_TYPE_FIRST, ++ TL_STREAM_TYPE_OBJ_SUMMARY = TL_STREAM_TYPE_FIRST, ++ TL_STREAM_TYPE_OBJ, ++ TL_STREAM_TYPE_AUX, ++#if MALI_USE_CSF ++ TL_STREAM_TYPE_CSFFW, ++#endif ++ TL_STREAM_TYPE_COUNT +}; + -+/* -+ * ---------- global variables (exported due to inline functions) ---------- ++/** ++ * kbase_tlstream_init - initialize timeline stream ++ * @stream: Pointer to the stream structure ++ * @stream_type: Stream type ++ * @ready_read: Pointer to a wait queue to signal when ++ * timeline messages are ready for collection. + */ ++void kbase_tlstream_init(struct kbase_tlstream *stream, ++ enum tl_stream_type stream_type, ++ wait_queue_head_t *ready_read); + -+/* Lock for this module (protecting all HW access except L2 caches) */ -+_mali_osk_spinlock_irq_t *mali_executor_lock_obj = NULL; ++/** ++ * kbase_tlstream_term - terminate timeline stream ++ * @stream: Pointer to the stream structure ++ */ ++void kbase_tlstream_term(struct kbase_tlstream *stream); + -+mali_bool mali_executor_hints[MALI_EXECUTOR_HINT_MAX]; ++/** ++ * kbase_tlstream_reset - reset stream ++ * @stream: Pointer to the stream structure ++ * ++ * Function discards all pending messages and resets packet counters. ++ */ ++void kbase_tlstream_reset(struct kbase_tlstream *stream); + -+/* -+ * ---------- static variables ---------- ++/** ++ * kbase_tlstream_msgbuf_acquire - lock selected stream and reserve a buffer ++ * @stream: Pointer to the stream structure ++ * @msg_size: Message size ++ * @flags: Pointer to store flags passed back on stream release ++ * ++ * Lock the stream and reserve the number of bytes requested ++ * in msg_size for the user. ++ * ++ * Return: pointer to the buffer where a message can be stored ++ * ++ * Warning: The stream must be released with kbase_tlstream_msgbuf_release(). ++ * Only atomic operations are allowed while the stream is locked ++ * (i.e. do not use any operation that may sleep). + */ ++char *kbase_tlstream_msgbuf_acquire(struct kbase_tlstream *stream, ++ size_t msg_size, unsigned long *flags) __acquires(&stream->lock); + -+/* Used to defer job scheduling */ -+static _mali_osk_wq_work_t *executor_wq_high_pri = NULL; ++/** ++ * kbase_tlstream_msgbuf_release - unlock selected stream ++ * @stream: Pointer to the stream structure ++ * @flags: Value obtained during stream acquire ++ * ++ * Release the stream that has been previously ++ * locked with a call to kbase_tlstream_msgbuf_acquire(). ++ */ ++void kbase_tlstream_msgbuf_release(struct kbase_tlstream *stream, ++ unsigned long flags) __releases(&stream->lock); + -+/* Store version from GP and PP (user space wants to know this) */ -+static u32 pp_version = 0; -+static u32 gp_version = 0; ++/** ++ * kbase_tlstream_flush_stream - flush stream ++ * @stream: Pointer to the stream structure ++ * ++ * Flush pending data in the timeline stream. ++ * ++ * Return: Number of bytes available flushed and available to be read ++ * ++ */ ++size_t kbase_tlstream_flush_stream(struct kbase_tlstream *stream); + -+/* List of physical PP groups which are disabled by some external source */ -+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_disabled); -+static u32 group_list_disabled_count = 0; ++#endif /* _KBASE_TLSTREAM_H */ +diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c +new file mode 100644 +index 000000000..f62c75583 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c +@@ -0,0 +1,4223 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+/* List of groups which can be used, but activate first */ -+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_inactive); -+static u32 group_list_inactive_count = 0; ++/* ++ * THIS FILE IS AUTOGENERATED BY generate_tracepoints.py. ++ * DO NOT EDIT. ++ */ + -+/* List of groups which are active and ready to be used */ -+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_idle); -+static u32 group_list_idle_count = 0; ++#include "mali_kbase_tracepoints.h" ++#include "mali_kbase_tlstream.h" ++#include "mali_kbase_tl_serialize.h" + -+/* List of groups which are executing a job */ -+static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_working); -+static u32 group_list_working_count = 0; ++/* clang-format off */ + -+/* Virtual group (if any) */ -+static struct mali_group *virtual_group = NULL; ++/* Message ids of trace events that are recorded in the obj stream. */ ++enum tl_msg_id_obj { ++ KBASE_TL_NEW_CTX, ++ KBASE_TL_NEW_GPU, ++ KBASE_TL_NEW_LPU, ++ KBASE_TL_NEW_ATOM, ++ KBASE_TL_NEW_AS, ++ KBASE_TL_DEL_CTX, ++ KBASE_TL_DEL_ATOM, ++ KBASE_TL_LIFELINK_LPU_GPU, ++ KBASE_TL_LIFELINK_AS_GPU, ++ KBASE_TL_RET_CTX_LPU, ++ KBASE_TL_RET_ATOM_CTX, ++ KBASE_TL_RET_ATOM_LPU, ++ KBASE_TL_NRET_CTX_LPU, ++ KBASE_TL_NRET_ATOM_CTX, ++ KBASE_TL_NRET_ATOM_LPU, ++ KBASE_TL_RET_AS_CTX, ++ KBASE_TL_NRET_AS_CTX, ++ KBASE_TL_RET_ATOM_AS, ++ KBASE_TL_NRET_ATOM_AS, ++ KBASE_TL_ATTRIB_ATOM_CONFIG, ++ KBASE_TL_JIT_USEDPAGES, ++ KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, ++ KBASE_TL_ATTRIB_ATOM_JITFREEINFO, ++ KBASE_TL_ATTRIB_AS_CONFIG, ++ KBASE_TL_EVENT_LPU_SOFTSTOP, ++ KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, ++ KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, ++ KBASE_TL_EVENT_ATOM_SOFTJOB_START, ++ KBASE_TL_EVENT_ATOM_SOFTJOB_END, ++ KBASE_TL_ARBITER_GRANTED, ++ KBASE_TL_ARBITER_STARTED, ++ KBASE_TL_ARBITER_STOP_REQUESTED, ++ KBASE_TL_ARBITER_STOPPED, ++ KBASE_TL_ARBITER_REQUESTED, ++ KBASE_JD_GPU_SOFT_RESET, ++ KBASE_JD_TILER_HEAP_CHUNK_ALLOC, ++ KBASE_TL_JS_SCHED_START, ++ KBASE_TL_JS_SCHED_END, ++ KBASE_TL_JD_SUBMIT_ATOM_START, ++ KBASE_TL_JD_SUBMIT_ATOM_END, ++ KBASE_TL_JD_DONE_NO_LOCK_START, ++ KBASE_TL_JD_DONE_NO_LOCK_END, ++ KBASE_TL_JD_DONE_START, ++ KBASE_TL_JD_DONE_END, ++ KBASE_TL_JD_ATOM_COMPLETE, ++ KBASE_TL_RUN_ATOM_START, ++ KBASE_TL_RUN_ATOM_END, ++ KBASE_TL_ATTRIB_ATOM_PRIORITY, ++ KBASE_TL_ATTRIB_ATOM_STATE, ++ KBASE_TL_ATTRIB_ATOM_PRIORITIZED, ++ KBASE_TL_ATTRIB_ATOM_JIT, ++ KBASE_TL_KBASE_NEW_DEVICE, ++ KBASE_TL_KBASE_GPUCMDQUEUE_KICK, ++ KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, ++ KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, ++ KBASE_TL_KBASE_DEVICE_HALTING_CSG, ++ KBASE_TL_KBASE_DEVICE_SUSPEND_CSG, ++ KBASE_TL_KBASE_DEVICE_CSG_IDLE, ++ KBASE_TL_KBASE_NEW_CTX, ++ KBASE_TL_KBASE_DEL_CTX, ++ KBASE_TL_KBASE_CTX_ASSIGN_AS, ++ KBASE_TL_KBASE_CTX_UNASSIGN_AS, ++ KBASE_TL_KBASE_NEW_KCPUQUEUE, ++ KBASE_TL_KBASE_DEL_KCPUQUEUE, ++ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, ++ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT, ++ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT, ++ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET, ++ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION, ++ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION, ++ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, ++ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, ++ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, ++ KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, ++ KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, ++ KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, ++ KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE, ++ KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE, ++ KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE, ++ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER, ++ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, ++ KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, ++ KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, ++ KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START, ++ KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, ++ KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, ++ KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END, ++ KBASE_TL_KBASE_CSFFW_FW_RELOADING, ++ KBASE_TL_KBASE_CSFFW_FW_ENABLING, ++ KBASE_TL_KBASE_CSFFW_FW_REQUEST_SLEEP, ++ KBASE_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP, ++ KBASE_TL_KBASE_CSFFW_FW_REQUEST_HALT, ++ KBASE_TL_KBASE_CSFFW_FW_DISABLING, ++ KBASE_TL_KBASE_CSFFW_FW_OFF, ++ KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, ++ KBASE_OBJ_MSG_COUNT, ++}; + -+/* Virtual group state is tracked with a state variable instead of 4 lists */ -+static enum mali_executor_state_t virtual_group_state = EXEC_STATE_NOT_PRESENT; ++#define OBJ_TP_LIST \ ++ TRACEPOINT_DESC(KBASE_TL_NEW_CTX, \ ++ "object ctx is created", \ ++ "@pII", \ ++ "ctx,ctx_nr,tgid") \ ++ TRACEPOINT_DESC(KBASE_TL_NEW_GPU, \ ++ "object gpu is created", \ ++ "@pII", \ ++ "gpu,gpu_id,core_count") \ ++ TRACEPOINT_DESC(KBASE_TL_NEW_LPU, \ ++ "object lpu is created", \ ++ "@pII", \ ++ "lpu,lpu_nr,lpu_fn") \ ++ TRACEPOINT_DESC(KBASE_TL_NEW_ATOM, \ ++ "object atom is created", \ ++ "@pI", \ ++ "atom,atom_nr") \ ++ TRACEPOINT_DESC(KBASE_TL_NEW_AS, \ ++ "address space object is created", \ ++ "@pI", \ ++ "address_space,as_nr") \ ++ TRACEPOINT_DESC(KBASE_TL_DEL_CTX, \ ++ "context is destroyed", \ ++ "@p", \ ++ "ctx") \ ++ TRACEPOINT_DESC(KBASE_TL_DEL_ATOM, \ ++ "atom is destroyed", \ ++ "@p", \ ++ "atom") \ ++ TRACEPOINT_DESC(KBASE_TL_LIFELINK_LPU_GPU, \ ++ "lpu is deleted with gpu", \ ++ "@pp", \ ++ "lpu,gpu") \ ++ TRACEPOINT_DESC(KBASE_TL_LIFELINK_AS_GPU, \ ++ "address space is deleted with gpu", \ ++ "@pp", \ ++ "address_space,gpu") \ ++ TRACEPOINT_DESC(KBASE_TL_RET_CTX_LPU, \ ++ "context is retained by lpu", \ ++ "@pp", \ ++ "ctx,lpu") \ ++ TRACEPOINT_DESC(KBASE_TL_RET_ATOM_CTX, \ ++ "atom is retained by context", \ ++ "@pp", \ ++ "atom,ctx") \ ++ TRACEPOINT_DESC(KBASE_TL_RET_ATOM_LPU, \ ++ "atom is retained by lpu", \ ++ "@pps", \ ++ "atom,lpu,attrib_match_list") \ ++ TRACEPOINT_DESC(KBASE_TL_NRET_CTX_LPU, \ ++ "context is released by lpu", \ ++ "@pp", \ ++ "ctx,lpu") \ ++ TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_CTX, \ ++ "atom is released by context", \ ++ "@pp", \ ++ "atom,ctx") \ ++ TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_LPU, \ ++ "atom is released by lpu", \ ++ "@pp", \ ++ "atom,lpu") \ ++ TRACEPOINT_DESC(KBASE_TL_RET_AS_CTX, \ ++ "address space is retained by context", \ ++ "@pp", \ ++ "address_space,ctx") \ ++ TRACEPOINT_DESC(KBASE_TL_NRET_AS_CTX, \ ++ "address space is released by context", \ ++ "@pp", \ ++ "address_space,ctx") \ ++ TRACEPOINT_DESC(KBASE_TL_RET_ATOM_AS, \ ++ "atom is retained by address space", \ ++ "@pp", \ ++ "atom,address_space") \ ++ TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_AS, \ ++ "atom is released by address space", \ ++ "@pp", \ ++ "atom,address_space") \ ++ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_CONFIG, \ ++ "atom job slot attributes", \ ++ "@pLLI", \ ++ "atom,descriptor,affinity,config") \ ++ TRACEPOINT_DESC(KBASE_TL_JIT_USEDPAGES, \ ++ "used pages for jit", \ ++ "@LI", \ ++ "used_pages,j_id") \ ++ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, \ ++ "Information about JIT allocations", \ ++ "@pLLLIIIII", \ ++ "atom,va_pgs,com_pgs,extent,j_id,bin_id,max_allocs,jit_flags,usg_id") \ ++ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JITFREEINFO, \ ++ "Information about JIT frees", \ ++ "@pI", \ ++ "atom,j_id") \ ++ TRACEPOINT_DESC(KBASE_TL_ATTRIB_AS_CONFIG, \ ++ "address space attributes", \ ++ "@pLLL", \ ++ "address_space,transtab,memattr,transcfg") \ ++ TRACEPOINT_DESC(KBASE_TL_EVENT_LPU_SOFTSTOP, \ ++ "softstop event on given lpu", \ ++ "@p", \ ++ "lpu") \ ++ TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, \ ++ "atom softstopped", \ ++ "@p", \ ++ "atom") \ ++ TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, \ ++ "atom softstop issued", \ ++ "@p", \ ++ "atom") \ ++ TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_START, \ ++ "atom soft job has started", \ ++ "@p", \ ++ "atom") \ ++ TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_END, \ ++ "atom soft job has completed", \ ++ "@p", \ ++ "atom") \ ++ TRACEPOINT_DESC(KBASE_TL_ARBITER_GRANTED, \ ++ "Arbiter has granted gpu access", \ ++ "@p", \ ++ "gpu") \ ++ TRACEPOINT_DESC(KBASE_TL_ARBITER_STARTED, \ ++ "Driver is running again and able to process jobs", \ ++ "@p", \ ++ "gpu") \ ++ TRACEPOINT_DESC(KBASE_TL_ARBITER_STOP_REQUESTED, \ ++ "Arbiter has requested driver to stop using gpu", \ ++ "@p", \ ++ "gpu") \ ++ TRACEPOINT_DESC(KBASE_TL_ARBITER_STOPPED, \ ++ "Driver has stopped using gpu", \ ++ "@p", \ ++ "gpu") \ ++ TRACEPOINT_DESC(KBASE_TL_ARBITER_REQUESTED, \ ++ "Driver has requested the arbiter for gpu access", \ ++ "@p", \ ++ "gpu") \ ++ TRACEPOINT_DESC(KBASE_JD_GPU_SOFT_RESET, \ ++ "gpu soft reset", \ ++ "@p", \ ++ "gpu") \ ++ TRACEPOINT_DESC(KBASE_JD_TILER_HEAP_CHUNK_ALLOC, \ ++ "Tiler Heap Chunk Allocation", \ ++ "@ILL", \ ++ "ctx_nr,heap_id,chunk_va") \ ++ TRACEPOINT_DESC(KBASE_TL_JS_SCHED_START, \ ++ "Scheduling starts", \ ++ "@I", \ ++ "dummy") \ ++ TRACEPOINT_DESC(KBASE_TL_JS_SCHED_END, \ ++ "Scheduling ends", \ ++ "@I", \ ++ "dummy") \ ++ TRACEPOINT_DESC(KBASE_TL_JD_SUBMIT_ATOM_START, \ ++ "Submitting an atom starts", \ ++ "@p", \ ++ "atom") \ ++ TRACEPOINT_DESC(KBASE_TL_JD_SUBMIT_ATOM_END, \ ++ "Submitting an atom ends", \ ++ "@p", \ ++ "atom") \ ++ TRACEPOINT_DESC(KBASE_TL_JD_DONE_NO_LOCK_START, \ ++ "Within function kbase_jd_done_nolock", \ ++ "@p", \ ++ "atom") \ ++ TRACEPOINT_DESC(KBASE_TL_JD_DONE_NO_LOCK_END, \ ++ "Within function kbase_jd_done_nolock - end", \ ++ "@p", \ ++ "atom") \ ++ TRACEPOINT_DESC(KBASE_TL_JD_DONE_START, \ ++ "Start of kbase_jd_done", \ ++ "@p", \ ++ "atom") \ ++ TRACEPOINT_DESC(KBASE_TL_JD_DONE_END, \ ++ "End of kbase_jd_done", \ ++ "@p", \ ++ "atom") \ ++ TRACEPOINT_DESC(KBASE_TL_JD_ATOM_COMPLETE, \ ++ "Atom marked complete", \ ++ "@p", \ ++ "atom") \ ++ TRACEPOINT_DESC(KBASE_TL_RUN_ATOM_START, \ ++ "Running of atom starts", \ ++ "@pI", \ ++ "atom,atom_nr") \ ++ TRACEPOINT_DESC(KBASE_TL_RUN_ATOM_END, \ ++ "Running of atom ends", \ ++ "@pI", \ ++ "atom,atom_nr") \ ++ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITY, \ ++ "atom priority", \ ++ "@pI", \ ++ "atom,prio") \ ++ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_STATE, \ ++ "atom state", \ ++ "@pI", \ ++ "atom,state") \ ++ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITIZED, \ ++ "atom caused priority change", \ ++ "@p", \ ++ "atom") \ ++ TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JIT, \ ++ "jit done for atom", \ ++ "@pLLILILLL", \ ++ "atom,edit_addr,new_addr,jit_flags,mem_flags,j_id,com_pgs,extent,va_pgs") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_DEVICE, \ ++ "New KBase Device", \ ++ "@IIIIIII", \ ++ "kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs,kbase_device_as_count,kbase_device_sb_entry_count,kbase_device_has_cross_stream_sync,kbase_device_supports_gpu_sleep") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_GPUCMDQUEUE_KICK, \ ++ "Kernel receives a request to process new GPU queue instructions", \ ++ "@IL", \ ++ "kernel_ctx_id,buffer_gpu_addr") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \ ++ "CSG is programmed to a slot", \ ++ "@IIIII", \ ++ "kbase_device_id,kernel_ctx_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index,kbase_device_csg_slot_resuming") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, \ ++ "CSG is deprogrammed from a slot", \ ++ "@II", \ ++ "kbase_device_id,kbase_device_csg_slot_index") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_HALTING_CSG, \ ++ "CSG is halting", \ ++ "@III", \ ++ "kbase_device_id,kbase_device_csg_slot_index,kbase_device_csg_slot_suspending") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_SUSPEND_CSG, \ ++ "CSG is suspended", \ ++ "@II", \ ++ "kbase_device_id,kbase_device_csg_slot_index") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_CSG_IDLE, \ ++ "KBase device is notified that CSG is idle.", \ ++ "@II", \ ++ "kbase_device_id,kbase_device_csg_slot_index") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_CTX, \ ++ "New KBase Context", \ ++ "@II", \ ++ "kernel_ctx_id,kbase_device_id") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_DEL_CTX, \ ++ "Delete KBase Context", \ ++ "@I", \ ++ "kernel_ctx_id") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_CTX_ASSIGN_AS, \ ++ "Address Space is assigned to a KBase context", \ ++ "@II", \ ++ "kernel_ctx_id,kbase_device_as_index") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_CTX_UNASSIGN_AS, \ ++ "Address Space is unassigned from a KBase context", \ ++ "@I", \ ++ "kernel_ctx_id") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_KCPUQUEUE, \ ++ "New KCPU Queue", \ ++ "@pIII", \ ++ "kcpu_queue,kcpu_queue_id,kernel_ctx_id,kcpuq_num_pending_cmds") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_DEL_KCPUQUEUE, \ ++ "Delete KCPU Queue", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, \ ++ "KCPU Queue enqueues Signal on Fence", \ ++ "@pp", \ ++ "kcpu_queue,fence") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT, \ ++ "KCPU Queue enqueues Wait on Fence", \ ++ "@pp", \ ++ "kcpu_queue,fence") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ ++ "KCPU Queue enqueues Wait on Cross Queue Sync Object", \ ++ "@pLII", \ ++ "kcpu_queue,cqs_obj_gpu_addr,compare_value,inherit_error") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET, \ ++ "KCPU Queue enqueues Set on Cross Queue Sync Object", \ ++ "@pL", \ ++ "kcpu_queue,cqs_obj_gpu_addr") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION, \ ++ "KCPU Queue enqueues Wait Operation on Cross Queue Sync Object", \ ++ "@pLLIII", \ ++ "kcpu_queue,cqs_obj_gpu_addr,compare_value,condition,data_type,inherit_error") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION, \ ++ "KCPU Queue enqueues Set Operation on Cross Queue Sync Object", \ ++ "@pLLII", \ ++ "kcpu_queue,cqs_obj_gpu_addr,value,operation,data_type") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, \ ++ "KCPU Queue enqueues Map Import", \ ++ "@pL", \ ++ "kcpu_queue,map_import_buf_gpu_addr") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, \ ++ "KCPU Queue enqueues Unmap Import", \ ++ "@pL", \ ++ "kcpu_queue,map_import_buf_gpu_addr") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, \ ++ "KCPU Queue enqueues Unmap Import ignoring reference count", \ ++ "@pL", \ ++ "kcpu_queue,map_import_buf_gpu_addr") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ ++ "Begin array of KCPU Queue enqueues JIT Alloc", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ ++ "Array item of KCPU Queue enqueues JIT Alloc", \ ++ "@pLLLLIIIII", \ ++ "kcpu_queue,jit_alloc_gpu_alloc_addr_dest,jit_alloc_va_pages,jit_alloc_commit_pages,jit_alloc_extent,jit_alloc_jit_id,jit_alloc_bin_id,jit_alloc_max_allocations,jit_alloc_flags,jit_alloc_usage_id") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ ++ "End array of KCPU Queue enqueues JIT Alloc", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE, \ ++ "Begin array of KCPU Queue enqueues JIT Free", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE, \ ++ "Array item of KCPU Queue enqueues JIT Free", \ ++ "@pI", \ ++ "kcpu_queue,jit_alloc_jit_id") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE, \ ++ "End array of KCPU Queue enqueues JIT Free", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER, \ ++ "KCPU Queue enqueues Error Barrier", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND, \ ++ "KCPU Queue enqueues Group Suspend", \ ++ "@ppI", \ ++ "kcpu_queue,group_suspend_buf,gpu_cmdq_grp_handle") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, \ ++ "KCPU Queue starts a Signal on Fence", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, \ ++ "KCPU Queue ends a Signal on Fence", \ ++ "@pI", \ ++ "kcpu_queue,execute_error") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, \ ++ "KCPU Queue starts a Wait on Fence", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, \ ++ "KCPU Queue ends a Wait on Fence", \ ++ "@pI", \ ++ "kcpu_queue,execute_error") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, \ ++ "KCPU Queue starts a Wait on Cross Queue Sync Object", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, \ ++ "KCPU Queue ends a Wait on Cross Queue Sync Object", \ ++ "@pI", \ ++ "kcpu_queue,execute_error") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, \ ++ "KCPU Queue executes a Set on Cross Queue Sync Object", \ ++ "@pI", \ ++ "kcpu_queue,execute_error") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START, \ ++ "KCPU Queue starts a Wait Operation on Cross Queue Sync Object", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END, \ ++ "KCPU Queue ends a Wait Operation on Cross Queue Sync Object", \ ++ "@pI", \ ++ "kcpu_queue,execute_error") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION, \ ++ "KCPU Queue executes a Set Operation on Cross Queue Sync Object", \ ++ "@pI", \ ++ "kcpu_queue,execute_error") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \ ++ "KCPU Queue starts a Map Import", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, \ ++ "KCPU Queue ends a Map Import", \ ++ "@pI", \ ++ "kcpu_queue,execute_error") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, \ ++ "KCPU Queue starts an Unmap Import", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, \ ++ "KCPU Queue ends an Unmap Import", \ ++ "@pI", \ ++ "kcpu_queue,execute_error") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, \ ++ "KCPU Queue starts an Unmap Import ignoring reference count", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, \ ++ "KCPU Queue ends an Unmap Import ignoring reference count", \ ++ "@pI", \ ++ "kcpu_queue,execute_error") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, \ ++ "KCPU Queue starts an array of JIT Allocs", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ ++ "Begin array of KCPU Queue ends an array of JIT Allocs", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ ++ "Array item of KCPU Queue ends an array of JIT Allocs", \ ++ "@pILL", \ ++ "kcpu_queue,execute_error,jit_alloc_gpu_alloc_addr,jit_alloc_mmu_flags") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ ++ "End array of KCPU Queue ends an array of JIT Allocs", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START, \ ++ "KCPU Queue starts an array of JIT Frees", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ ++ "Begin array of KCPU Queue ends an array of JIT Frees", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ ++ "Array item of KCPU Queue ends an array of JIT Frees", \ ++ "@pIL", \ ++ "kcpu_queue,execute_error,jit_free_pages_used") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ ++ "End array of KCPU Queue ends an array of JIT Frees", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER, \ ++ "KCPU Queue executes an Error Barrier", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START, \ ++ "KCPU Queue starts a group suspend", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END, \ ++ "KCPU Queue ends a group suspend", \ ++ "@pI", \ ++ "kcpu_queue,execute_error") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_RELOADING, \ ++ "CSF FW is being reloaded", \ ++ "@L", \ ++ "csffw_cycle") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_ENABLING, \ ++ "CSF FW is being enabled", \ ++ "@L", \ ++ "csffw_cycle") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_REQUEST_SLEEP, \ ++ "CSF FW sleep is requested", \ ++ "@L", \ ++ "csffw_cycle") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP, \ ++ "CSF FW wake up is requested", \ ++ "@L", \ ++ "csffw_cycle") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_REQUEST_HALT, \ ++ "CSF FW halt is requested", \ ++ "@L", \ ++ "csffw_cycle") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_DISABLING, \ ++ "CSF FW is being disabled", \ ++ "@L", \ ++ "csffw_cycle") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_FW_OFF, \ ++ "CSF FW is off", \ ++ "@L", \ ++ "csffw_cycle") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, \ ++ "An overflow has happened with the CSFFW Timeline stream", \ ++ "@LL", \ ++ "csffw_timestamp,csffw_cycle") + -+/* GP group */ -+static struct mali_group *gp_group = NULL; ++#define MIPE_HEADER_BLOB_VAR_NAME __obj_desc_header ++#define MIPE_HEADER_STREAM_ID TL_STREAM_ID_KERNEL ++#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_OBJ ++#define MIPE_HEADER_TRACEPOINT_LIST OBJ_TP_LIST ++#define MIPE_HEADER_TRACEPOINT_LIST_SIZE KBASE_OBJ_MSG_COUNT + -+/* GP group state is tracked with a state variable instead of 4 lists */ -+static enum mali_executor_state_t gp_group_state = EXEC_STATE_NOT_PRESENT; ++#include "mali_kbase_mipe_gen_header.h" + -+static u32 gp_returned_cookie = 0; ++const char *obj_desc_header = (const char *) &__obj_desc_header; ++const size_t obj_desc_header_size = sizeof(__obj_desc_header); + -+/* Total number of physical PP cores present */ -+static u32 num_physical_pp_cores_total = 0; ++/* Message ids of trace events that are recorded in the aux stream. */ ++enum tl_msg_id_aux { ++ KBASE_AUX_PM_STATE, ++ KBASE_AUX_PAGEFAULT, ++ KBASE_AUX_PAGESALLOC, ++ KBASE_AUX_DEVFREQ_TARGET, ++ KBASE_AUX_JIT_STATS, ++ KBASE_AUX_TILER_HEAP_STATS, ++ KBASE_AUX_EVENT_JOB_SLOT, ++ KBASE_AUX_PROTECTED_ENTER_START, ++ KBASE_AUX_PROTECTED_ENTER_END, ++ KBASE_AUX_MMU_COMMAND, ++ KBASE_AUX_PROTECTED_LEAVE_START, ++ KBASE_AUX_PROTECTED_LEAVE_END, ++ KBASE_AUX_MSG_COUNT, ++}; + -+/* Number of physical cores which are enabled */ -+static u32 num_physical_pp_cores_enabled = 0; ++#define AUX_TP_LIST \ ++ TRACEPOINT_DESC(KBASE_AUX_PM_STATE, \ ++ "PM state", \ ++ "@IL", \ ++ "core_type,core_state_bitset") \ ++ TRACEPOINT_DESC(KBASE_AUX_PAGEFAULT, \ ++ "Page fault", \ ++ "@IIL", \ ++ "ctx_nr,as_nr,page_cnt_change") \ ++ TRACEPOINT_DESC(KBASE_AUX_PAGESALLOC, \ ++ "Total alloc pages change", \ ++ "@IL", \ ++ "ctx_nr,page_cnt") \ ++ TRACEPOINT_DESC(KBASE_AUX_DEVFREQ_TARGET, \ ++ "New device frequency target", \ ++ "@L", \ ++ "target_freq") \ ++ TRACEPOINT_DESC(KBASE_AUX_JIT_STATS, \ ++ "per-bin JIT statistics", \ ++ "@IIIIII", \ ++ "ctx_nr,bid,max_allocs,allocs,va_pages,ph_pages") \ ++ TRACEPOINT_DESC(KBASE_AUX_TILER_HEAP_STATS, \ ++ "Tiler Heap statistics", \ ++ "@ILIIIIIII", \ ++ "ctx_nr,heap_id,va_pages,ph_pages,max_chunks,chunk_size,chunk_count,target_in_flight,nr_in_flight") \ ++ TRACEPOINT_DESC(KBASE_AUX_EVENT_JOB_SLOT, \ ++ "event on a given job slot", \ ++ "@pIII", \ ++ "ctx,slot_nr,atom_nr,event") \ ++ TRACEPOINT_DESC(KBASE_AUX_PROTECTED_ENTER_START, \ ++ "enter protected mode start", \ ++ "@p", \ ++ "gpu") \ ++ TRACEPOINT_DESC(KBASE_AUX_PROTECTED_ENTER_END, \ ++ "enter protected mode end", \ ++ "@p", \ ++ "gpu") \ ++ TRACEPOINT_DESC(KBASE_AUX_MMU_COMMAND, \ ++ "mmu commands with synchronicity info", \ ++ "@IIILI", \ ++ "kernel_ctx_id,mmu_cmd_id,mmu_synchronicity,mmu_lock_addr,mmu_lock_page_num") \ ++ TRACEPOINT_DESC(KBASE_AUX_PROTECTED_LEAVE_START, \ ++ "leave protected mode start", \ ++ "@p", \ ++ "gpu") \ ++ TRACEPOINT_DESC(KBASE_AUX_PROTECTED_LEAVE_END, \ ++ "leave protected mode end", \ ++ "@p", \ ++ "gpu") + -+/* Enable or disable core scaling */ -+static mali_bool core_scaling_enabled = MALI_TRUE; ++#define MIPE_HEADER_BLOB_VAR_NAME __aux_desc_header ++#define MIPE_HEADER_STREAM_ID TL_STREAM_ID_KERNEL ++#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_AUX ++#define MIPE_HEADER_TRACEPOINT_LIST AUX_TP_LIST ++#define MIPE_HEADER_TRACEPOINT_LIST_SIZE KBASE_AUX_MSG_COUNT + -+/* Variables to allow safe pausing of the scheduler */ -+static _mali_osk_wait_queue_t *executor_working_wait_queue = NULL; -+static u32 pause_count = 0; ++#include "mali_kbase_mipe_gen_header.h" + -+/* PP cores haven't been enabled because of some pp cores haven't been disabled. */ -+static int core_scaling_delay_up_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 }; ++const char *aux_desc_header = (const char *) &__aux_desc_header; ++const size_t aux_desc_header_size = sizeof(__aux_desc_header); + -+/* Variables used to implement notify pp core changes to userspace when core scaling -+ * is finished in mali_executor_complete_group() function. */ -+static _mali_osk_wq_work_t *executor_wq_notify_core_change = NULL; -+static _mali_osk_wait_queue_t *executor_notify_core_change_wait_queue = NULL; ++void __kbase_tlstream_tl_new_ctx( ++ struct kbase_tlstream *stream, ++ const void *ctx, ++ u32 ctx_nr, ++ u32 tgid ++) ++{ ++ const u32 msg_id = KBASE_TL_NEW_CTX; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(ctx) ++ + sizeof(ctx_nr) ++ + sizeof(tgid) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+/* -+ * ---------- Forward declaration of static functions ---------- -+ */ -+static mali_bool mali_executor_is_suspended(void *data); -+static mali_bool mali_executor_is_working(void); -+static void mali_executor_disable_empty_virtual(void); -+static mali_bool mali_executor_physical_rejoin_virtual(struct mali_group *group); -+static mali_bool mali_executor_has_virtual_group(void); -+static mali_bool mali_executor_virtual_group_is_usable(void); -+static void mali_executor_schedule(void); -+static void mali_executor_wq_schedule(void *arg); -+static void mali_executor_send_gp_oom_to_user(struct mali_gp_job *job); -+static void mali_executor_complete_group(struct mali_group *group, -+ mali_bool success, -+ struct mali_gp_job **gp_job_done, -+ struct mali_pp_job **pp_job_done); -+static void mali_executor_change_state_pp_physical(struct mali_group *group, -+ _mali_osk_list_t *old_list, -+ u32 *old_count, -+ _mali_osk_list_t *new_list, -+ u32 *new_count); -+static mali_bool mali_executor_group_is_in_state(struct mali_group *group, -+ enum mali_executor_state_t state); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+static void mali_executor_group_enable_internal(struct mali_group *group); -+static void mali_executor_group_disable_internal(struct mali_group *group); -+static void mali_executor_core_scale(unsigned int target_core_nr); -+static void mali_executor_core_scale_in_group_complete(struct mali_group *group); -+static void mali_executor_notify_core_change(u32 num_cores); -+static void mali_executor_wq_notify_core_change(void *arg); -+static void mali_executor_change_group_status_disabled(struct mali_group *group); -+static mali_bool mali_executor_deactivate_list_idle(mali_bool deactivate_idle_group); -+static void mali_executor_set_state_pp_physical(struct mali_group *group, -+ _mali_osk_list_t *new_list, -+ u32 *new_count); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &ctx, sizeof(ctx)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &ctx_nr, sizeof(ctx_nr)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &tgid, sizeof(tgid)); + -+/* -+ * ---------- Actual implementation ---------- -+ */ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+_mali_osk_errcode_t mali_executor_initialize(void) ++void __kbase_tlstream_tl_new_gpu( ++ struct kbase_tlstream *stream, ++ const void *gpu, ++ u32 gpu_id, ++ u32 core_count ++) +{ -+ mali_executor_lock_obj = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_EXECUTOR); -+ if (NULL == mali_executor_lock_obj) { -+ mali_executor_terminate(); -+ return _MALI_OSK_ERR_NOMEM; -+ } -+ -+ executor_wq_high_pri = _mali_osk_wq_create_work_high_pri(mali_executor_wq_schedule, NULL); -+ if (NULL == executor_wq_high_pri) { -+ mali_executor_terminate(); -+ return _MALI_OSK_ERR_NOMEM; -+ } -+ -+ executor_working_wait_queue = _mali_osk_wait_queue_init(); -+ if (NULL == executor_working_wait_queue) { -+ mali_executor_terminate(); -+ return _MALI_OSK_ERR_NOMEM; -+ } ++ const u32 msg_id = KBASE_TL_NEW_GPU; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(gpu) ++ + sizeof(gpu_id) ++ + sizeof(core_count) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ executor_wq_notify_core_change = _mali_osk_wq_create_work(mali_executor_wq_notify_core_change, NULL); -+ if (NULL == executor_wq_notify_core_change) { -+ mali_executor_terminate(); -+ return _MALI_OSK_ERR_NOMEM; -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ executor_notify_core_change_wait_queue = _mali_osk_wait_queue_init(); -+ if (NULL == executor_notify_core_change_wait_queue) { -+ mali_executor_terminate(); -+ return _MALI_OSK_ERR_NOMEM; -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &gpu, sizeof(gpu)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &gpu_id, sizeof(gpu_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &core_count, sizeof(core_count)); + -+ return _MALI_OSK_ERR_OK; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+void mali_executor_terminate(void) ++void __kbase_tlstream_tl_new_lpu( ++ struct kbase_tlstream *stream, ++ const void *lpu, ++ u32 lpu_nr, ++ u32 lpu_fn ++) +{ -+ if (NULL != executor_notify_core_change_wait_queue) { -+ _mali_osk_wait_queue_term(executor_notify_core_change_wait_queue); -+ executor_notify_core_change_wait_queue = NULL; -+ } -+ -+ if (NULL != executor_wq_notify_core_change) { -+ _mali_osk_wq_delete_work(executor_wq_notify_core_change); -+ executor_wq_notify_core_change = NULL; -+ } ++ const u32 msg_id = KBASE_TL_NEW_LPU; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(lpu) ++ + sizeof(lpu_nr) ++ + sizeof(lpu_fn) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if (NULL != executor_working_wait_queue) { -+ _mali_osk_wait_queue_term(executor_working_wait_queue); -+ executor_working_wait_queue = NULL; -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (NULL != executor_wq_high_pri) { -+ _mali_osk_wq_delete_work(executor_wq_high_pri); -+ executor_wq_high_pri = NULL; -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &lpu, sizeof(lpu)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &lpu_nr, sizeof(lpu_nr)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &lpu_fn, sizeof(lpu_fn)); + -+ if (NULL != mali_executor_lock_obj) { -+ _mali_osk_spinlock_irq_term(mali_executor_lock_obj); -+ mali_executor_lock_obj = NULL; -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+void mali_executor_populate(void) ++void __kbase_tlstream_tl_new_atom( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ u32 atom_nr ++) +{ -+ u32 num_groups; -+ u32 i; -+ -+ num_groups = mali_group_get_glob_num_groups(); ++ const u32 msg_id = KBASE_TL_NEW_ATOM; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ + sizeof(atom_nr) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* Do we have a virtual group? */ -+ for (i = 0; i < num_groups; i++) { -+ struct mali_group *group = mali_group_get_glob_group(i); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (mali_group_is_virtual(group)) { -+ virtual_group = group; -+ virtual_group_state = EXEC_STATE_INACTIVE; -+ break; -+ } -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom_nr, sizeof(atom_nr)); + -+ /* Find all the available physical GP and PP cores */ -+ for (i = 0; i < num_groups; i++) { -+ struct mali_group *group = mali_group_get_glob_group(i); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ if (NULL != group) { -+ struct mali_pp_core *pp_core = mali_group_get_pp_core(group); -+ struct mali_gp_core *gp_core = mali_group_get_gp_core(group); ++void __kbase_tlstream_tl_new_as( ++ struct kbase_tlstream *stream, ++ const void *address_space, ++ u32 as_nr ++) ++{ ++ const u32 msg_id = KBASE_TL_NEW_AS; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(address_space) ++ + sizeof(as_nr) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if (!mali_group_is_virtual(group)) { -+ if (NULL != pp_core) { -+ if (0 == pp_version) { -+ /* Retrieve PP version from the first available PP core */ -+ pp_version = mali_pp_core_get_version(pp_core); -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (NULL != virtual_group) { -+ mali_executor_lock(); -+ mali_group_add_group(virtual_group, group); -+ mali_executor_unlock(); -+ } else { -+ _mali_osk_list_add(&group->executor_list, &group_list_inactive); -+ group_list_inactive_count++; -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &address_space, sizeof(address_space)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &as_nr, sizeof(as_nr)); + -+ num_physical_pp_cores_total++; -+ } else { -+ MALI_DEBUG_ASSERT_POINTER(gp_core); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ if (0 == gp_version) { -+ /* Retrieve GP version */ -+ gp_version = mali_gp_core_get_version(gp_core); -+ } ++void __kbase_tlstream_tl_del_ctx( ++ struct kbase_tlstream *stream, ++ const void *ctx ++) ++{ ++ const u32 msg_id = KBASE_TL_DEL_CTX; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(ctx) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ gp_group = group; -+ gp_group_state = EXEC_STATE_INACTIVE; -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ } -+ } -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &ctx, sizeof(ctx)); + -+ num_physical_pp_cores_enabled = num_physical_pp_cores_total; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+void mali_executor_depopulate(void) ++void __kbase_tlstream_tl_del_atom( ++ struct kbase_tlstream *stream, ++ const void *atom ++) +{ -+ struct mali_group *group; -+ struct mali_group *temp; -+ -+ MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != gp_group_state); ++ const u32 msg_id = KBASE_TL_DEL_ATOM; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if (NULL != gp_group) { -+ mali_group_delete(gp_group); -+ gp_group = NULL; -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != virtual_group_state); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); + -+ if (NULL != virtual_group) { -+ mali_group_delete(virtual_group); -+ virtual_group = NULL; -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ MALI_DEBUG_ASSERT(_mali_osk_list_empty(&group_list_working)); ++void __kbase_tlstream_tl_lifelink_lpu_gpu( ++ struct kbase_tlstream *stream, ++ const void *lpu, ++ const void *gpu ++) ++{ ++ const u32 msg_id = KBASE_TL_LIFELINK_LPU_GPU; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(lpu) ++ + sizeof(gpu) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) { -+ mali_group_delete(group); -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) { -+ mali_group_delete(group); -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &lpu, sizeof(lpu)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &gpu, sizeof(gpu)); + -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) { -+ mali_group_delete(group); -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+void mali_executor_suspend(void) ++void __kbase_tlstream_tl_lifelink_as_gpu( ++ struct kbase_tlstream *stream, ++ const void *address_space, ++ const void *gpu ++) +{ -+ mali_executor_lock(); -+ -+ /* Increment the pause_count so that no more jobs will be scheduled */ -+ pause_count++; ++ const u32 msg_id = KBASE_TL_LIFELINK_AS_GPU; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(address_space) ++ + sizeof(gpu) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_executor_unlock(); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ _mali_osk_wait_queue_wait_event(executor_working_wait_queue, -+ mali_executor_is_suspended, NULL); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &address_space, sizeof(address_space)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &gpu, sizeof(gpu)); + -+ /* -+ * mali_executor_complete_XX() leaves jobs in idle state. -+ * deactivate option is used when we are going to power down -+ * the entire GPU (OS suspend) and want a consistent SW vs HW -+ * state. -+ */ -+ mali_executor_lock(); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ mali_executor_deactivate_list_idle(MALI_TRUE); ++void __kbase_tlstream_tl_ret_ctx_lpu( ++ struct kbase_tlstream *stream, ++ const void *ctx, ++ const void *lpu ++) ++{ ++ const u32 msg_id = KBASE_TL_RET_CTX_LPU; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(ctx) ++ + sizeof(lpu) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* -+ * The following steps are used to deactive all of activated -+ * (MALI_GROUP_STATE_ACTIVE) and activating (MALI_GROUP -+ * _STAET_ACTIVATION_PENDING) groups, to make sure the variable -+ * pd_mask_wanted is equal with 0. */ -+ if (MALI_GROUP_STATE_INACTIVE != mali_group_get_state(gp_group)) { -+ gp_group_state = EXEC_STATE_INACTIVE; -+ mali_group_deactivate(gp_group); -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (mali_executor_has_virtual_group()) { -+ if (MALI_GROUP_STATE_INACTIVE -+ != mali_group_get_state(virtual_group)) { -+ virtual_group_state = EXEC_STATE_INACTIVE; -+ mali_group_deactivate(virtual_group); -+ } -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &ctx, sizeof(ctx)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &lpu, sizeof(lpu)); + -+ if (0 < group_list_inactive_count) { -+ struct mali_group *group; -+ struct mali_group *temp; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, -+ &group_list_inactive, -+ struct mali_group, executor_list) { -+ if (MALI_GROUP_STATE_ACTIVATION_PENDING -+ == mali_group_get_state(group)) { -+ mali_group_deactivate(group); -+ } ++void __kbase_tlstream_tl_ret_atom_ctx( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ const void *ctx ++) ++{ ++ const u32 msg_id = KBASE_TL_RET_ATOM_CTX; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ + sizeof(ctx) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* -+ * On mali-450 platform, we may have physical group in the group inactive -+ * list, and its state is MALI_GROUP_STATE_ACTIVATION_PENDING, so we only -+ * deactivate it is not enough, we still also need add it back to virtual group. -+ * And now, virtual group must be in INACTIVE state, so it's safe to add -+ * physical group to virtual group at this point. -+ */ -+ if (NULL != virtual_group) { -+ _mali_osk_list_delinit(&group->executor_list); -+ group_list_inactive_count--; ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_group_add_group(virtual_group, group); -+ } -+ } -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &ctx, sizeof(ctx)); + -+ mali_executor_unlock(); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+void mali_executor_resume(void) ++void __kbase_tlstream_tl_ret_atom_lpu( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ const void *lpu, ++ const char *attrib_match_list ++) +{ -+ mali_executor_lock(); ++ const u32 msg_id = KBASE_TL_RET_ATOM_LPU; ++ const size_t s2 = sizeof(u32) + sizeof(char) ++ + strnlen(attrib_match_list, STRLEN_MAX); ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ + sizeof(lpu) ++ + s2 ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* Decrement pause_count to allow scheduling again (if it reaches 0) */ -+ pause_count--; -+ if (0 == pause_count) { -+ mali_executor_schedule(); -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_executor_unlock(); -+} ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &lpu, sizeof(lpu)); ++ pos = kbasep_serialize_string(buffer, ++ pos, attrib_match_list, s2); + -+u32 mali_executor_get_num_cores_total(void) -+{ -+ return num_physical_pp_cores_total; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+u32 mali_executor_get_num_cores_enabled(void) ++void __kbase_tlstream_tl_nret_ctx_lpu( ++ struct kbase_tlstream *stream, ++ const void *ctx, ++ const void *lpu ++) +{ -+ return num_physical_pp_cores_enabled; -+} ++ const u32 msg_id = KBASE_TL_NRET_CTX_LPU; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(ctx) ++ + sizeof(lpu) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+struct mali_pp_core *mali_executor_get_virtual_pp(void) -+{ -+ MALI_DEBUG_ASSERT_POINTER(virtual_group); -+ MALI_DEBUG_ASSERT_POINTER(virtual_group->pp_core); -+ return virtual_group->pp_core; -+} ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+struct mali_group *mali_executor_get_virtual_group(void) -+{ -+ return virtual_group; ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &ctx, sizeof(ctx)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &lpu, sizeof(lpu)); ++ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+void mali_executor_zap_all_active(struct mali_session_data *session) ++void __kbase_tlstream_tl_nret_atom_ctx( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ const void *ctx ++) +{ -+ struct mali_group *group; -+ struct mali_group *temp; -+ mali_bool ret; -+ -+ mali_executor_lock(); -+ -+ /* -+ * This function is a bit complicated because -+ * mali_group_zap_session() can fail. This only happens because the -+ * group is in an unhandled page fault status. -+ * We need to make sure this page fault is handled before we return, -+ * so that we know every single outstanding MMU transactions have -+ * completed. This will allow caller to safely remove physical pages -+ * when we have returned. -+ */ ++ const u32 msg_id = KBASE_TL_NRET_ATOM_CTX; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ + sizeof(ctx) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ MALI_DEBUG_ASSERT(NULL != gp_group); -+ ret = mali_group_zap_session(gp_group, session); -+ if (MALI_FALSE == ret) { -+ struct mali_gp_job *gp_job = NULL; ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_executor_complete_group(gp_group, MALI_FALSE, &gp_job, NULL); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &ctx, sizeof(ctx)); + -+ MALI_DEBUG_ASSERT_POINTER(gp_job); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ /* GP job completed, make sure it is freed */ -+ mali_scheduler_complete_gp_job(gp_job, MALI_FALSE, -+ MALI_TRUE, MALI_TRUE); -+ } ++void __kbase_tlstream_tl_nret_atom_lpu( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ const void *lpu ++) ++{ ++ const u32 msg_id = KBASE_TL_NRET_ATOM_LPU; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ + sizeof(lpu) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if (mali_executor_has_virtual_group()) { -+ ret = mali_group_zap_session(virtual_group, session); -+ if (MALI_FALSE == ret) { -+ struct mali_pp_job *pp_job = NULL; ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_executor_complete_group(virtual_group, MALI_FALSE, NULL, &pp_job); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &lpu, sizeof(lpu)); + -+ if (NULL != pp_job) { -+ /* PP job completed, make sure it is freed */ -+ mali_scheduler_complete_pp_job(pp_job, 0, -+ MALI_FALSE, MALI_TRUE); -+ } -+ } -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working, -+ struct mali_group, executor_list) { -+ ret = mali_group_zap_session(group, session); -+ if (MALI_FALSE == ret) { -+ ret = mali_group_zap_session(group, session); -+ if (MALI_FALSE == ret) { -+ struct mali_pp_job *pp_job = NULL; ++void __kbase_tlstream_tl_ret_as_ctx( ++ struct kbase_tlstream *stream, ++ const void *address_space, ++ const void *ctx ++) ++{ ++ const u32 msg_id = KBASE_TL_RET_AS_CTX; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(address_space) ++ + sizeof(ctx) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_executor_complete_group(group, MALI_FALSE, NULL, &pp_job); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (NULL != pp_job) { -+ /* PP job completed, free it */ -+ mali_scheduler_complete_pp_job(pp_job, -+ 0, MALI_FALSE, -+ MALI_TRUE); -+ } -+ } -+ } -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &address_space, sizeof(address_space)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &ctx, sizeof(ctx)); + -+ mali_executor_unlock(); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+void mali_executor_schedule_from_mask(mali_scheduler_mask mask, mali_bool deferred_schedule) ++void __kbase_tlstream_tl_nret_as_ctx( ++ struct kbase_tlstream *stream, ++ const void *address_space, ++ const void *ctx ++) +{ -+ if (MALI_SCHEDULER_MASK_EMPTY != mask) { -+ if (MALI_TRUE == deferred_schedule) { -+ _mali_osk_wq_schedule_work_high_pri(executor_wq_high_pri); -+ } else { -+ /* Schedule from this thread*/ -+ mali_executor_lock(); -+ mali_executor_schedule(); -+ mali_executor_unlock(); -+ } -+ } ++ const u32 msg_id = KBASE_TL_NRET_AS_CTX; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(address_space) ++ + sizeof(ctx) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; ++ ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &address_space, sizeof(address_space)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &ctx, sizeof(ctx)); ++ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+_mali_osk_errcode_t mali_executor_interrupt_gp(struct mali_group *group, -+ mali_bool in_upper_half) ++void __kbase_tlstream_tl_ret_atom_as( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ const void *address_space ++) +{ -+ enum mali_interrupt_result int_result; -+ mali_bool time_out = MALI_FALSE; -+ -+ MALI_DEBUG_PRINT(4, ("Executor: GP interrupt from %s in %s half\n", -+ mali_group_core_description(group), -+ in_upper_half ? "upper" : "bottom")); ++ const u32 msg_id = KBASE_TL_RET_ATOM_AS; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ + sizeof(address_space) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_executor_lock(); -+ if (!mali_group_is_working(group)) { -+ /* Not working, so nothing to do */ -+ mali_executor_unlock(); -+ return _MALI_OSK_ERR_FAULT; -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ MALI_DEBUG_ASSERT(mali_group_is_working(group)); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &address_space, sizeof(address_space)); + -+ if (mali_group_has_timed_out(group)) { -+ int_result = MALI_INTERRUPT_RESULT_ERROR; -+ time_out = MALI_TRUE; -+ MALI_PRINT(("Executor GP: Job %d Timeout on %s\n", -+ mali_gp_job_get_id(group->gp_running_job), -+ mali_group_core_description(group))); -+ } else { -+ int_result = mali_group_get_interrupt_result_gp(group); -+ if (MALI_INTERRUPT_RESULT_NONE == int_result) { -+ mali_executor_unlock(); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+#if defined(CONFIG_MALI_SHARED_INTERRUPTS) -+ if (MALI_INTERRUPT_RESULT_NONE == int_result) { -+ /* No interrupts signalled, so nothing to do */ -+ mali_executor_unlock(); -+ return _MALI_OSK_ERR_FAULT; -+ } -+#else -+ MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_NONE != int_result); -+#endif ++void __kbase_tlstream_tl_nret_atom_as( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ const void *address_space ++) ++{ ++ const u32 msg_id = KBASE_TL_NRET_ATOM_AS; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ + sizeof(address_space) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_group_mask_all_interrupts_gp(group); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (MALI_INTERRUPT_RESULT_SUCCESS_VS == int_result) { -+ if (mali_group_gp_is_active(group)) { -+ /* Only VS completed so far, while PLBU is still active */ ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &address_space, sizeof(address_space)); + -+ /* Enable all but the current interrupt */ -+ mali_group_enable_interrupts_gp(group, int_result); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ mali_executor_unlock(); -+ return _MALI_OSK_ERR_OK; -+ } -+ } else if (MALI_INTERRUPT_RESULT_SUCCESS_PLBU == int_result) { -+ if (mali_group_gp_is_active(group)) { -+ /* Only PLBU completed so far, while VS is still active */ ++void __kbase_tlstream_tl_attrib_atom_config( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ u64 descriptor, ++ u64 affinity, ++ u32 config ++) ++{ ++ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ + sizeof(descriptor) ++ + sizeof(affinity) ++ + sizeof(config) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* Enable all but the current interrupt */ -+ mali_group_enable_interrupts_gp(group, int_result); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_executor_unlock(); -+ return _MALI_OSK_ERR_OK; -+ } -+ } else if (MALI_INTERRUPT_RESULT_OOM == int_result) { -+ struct mali_gp_job *job = mali_group_get_running_gp_job(group); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &descriptor, sizeof(descriptor)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &affinity, sizeof(affinity)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &config, sizeof(config)); + -+ /* PLBU out of mem */ -+ MALI_DEBUG_PRINT(3, ("Executor: PLBU needs more heap memory\n")); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+#if defined(CONFIG_MALI400_PROFILING) -+ /* Give group a chance to generate a SUSPEND event */ -+ mali_group_oom(group); -+#endif ++void __kbase_tlstream_tl_jit_usedpages( ++ struct kbase_tlstream *stream, ++ u64 used_pages, ++ u32 j_id ++) ++{ ++ const u32 msg_id = KBASE_TL_JIT_USEDPAGES; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(used_pages) ++ + sizeof(j_id) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* -+ * no need to hold interrupt raised while -+ * waiting for more memory. -+ */ -+ mali_executor_send_gp_oom_to_user(job); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_executor_unlock(); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &used_pages, sizeof(used_pages)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &j_id, sizeof(j_id)); + -+ return _MALI_OSK_ERR_OK; -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ /*Add for voltage scan function*/ -+ if (MALI_INTERRUPT_RESULT_ERROR == int_result) -+ mali_group_error++; ++void __kbase_tlstream_tl_attrib_atom_jitallocinfo( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ u64 va_pgs, ++ u64 com_pgs, ++ u64 extent, ++ u32 j_id, ++ u32 bin_id, ++ u32 max_allocs, ++ u32 jit_flags, ++ u32 usg_id ++) ++{ ++ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JITALLOCINFO; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ + sizeof(va_pgs) ++ + sizeof(com_pgs) ++ + sizeof(extent) ++ + sizeof(j_id) ++ + sizeof(bin_id) ++ + sizeof(max_allocs) ++ + sizeof(jit_flags) ++ + sizeof(usg_id) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* We should now have a real interrupt to handle */ ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ MALI_DEBUG_PRINT(4, ("Executor: Group %s completed with %s\n", -+ mali_group_core_description(group), -+ (MALI_INTERRUPT_RESULT_ERROR == int_result) ? -+ "ERROR" : "success")); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &va_pgs, sizeof(va_pgs)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &com_pgs, sizeof(com_pgs)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &extent, sizeof(extent)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &j_id, sizeof(j_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &bin_id, sizeof(bin_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &max_allocs, sizeof(max_allocs)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &jit_flags, sizeof(jit_flags)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &usg_id, sizeof(usg_id)); + -+ if (in_upper_half && MALI_INTERRUPT_RESULT_ERROR == int_result) { -+ /* Don't bother to do processing of errors in upper half */ -+ mali_executor_unlock(); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ if (MALI_FALSE == time_out) { -+ mali_group_schedule_bottom_half_gp(group); -+ } -+ } else { -+ struct mali_gp_job *job; -+ mali_bool success; ++void __kbase_tlstream_tl_attrib_atom_jitfreeinfo( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ u32 j_id ++) ++{ ++ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JITFREEINFO; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ + sizeof(j_id) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* -+ if (MALI_TRUE == time_out) { -+ mali_group_dump_status(group); -+ } -+ */ ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ success = (int_result != MALI_INTERRUPT_RESULT_ERROR) ? -+ MALI_TRUE : MALI_FALSE; ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &j_id, sizeof(j_id)); + -+ mali_executor_complete_group(group, success, &job, NULL); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ mali_executor_unlock(); ++void __kbase_tlstream_tl_attrib_as_config( ++ struct kbase_tlstream *stream, ++ const void *address_space, ++ u64 transtab, ++ u64 memattr, ++ u64 transcfg ++) ++{ ++ const u32 msg_id = KBASE_TL_ATTRIB_AS_CONFIG; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(address_space) ++ + sizeof(transtab) ++ + sizeof(memattr) ++ + sizeof(transcfg) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* GP jobs always fully complete */ -+ MALI_DEBUG_ASSERT(NULL != job); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* This will notify user space and close the job object */ -+ mali_scheduler_complete_gp_job(job, success, -+ MALI_TRUE, MALI_TRUE); -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &address_space, sizeof(address_space)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &transtab, sizeof(transtab)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &memattr, sizeof(memattr)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &transcfg, sizeof(transcfg)); + -+ return _MALI_OSK_ERR_OK; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+_mali_osk_errcode_t mali_executor_interrupt_pp(struct mali_group *group, -+ mali_bool in_upper_half) ++void __kbase_tlstream_tl_event_lpu_softstop( ++ struct kbase_tlstream *stream, ++ const void *lpu ++) +{ -+ enum mali_interrupt_result int_result; -+ mali_bool time_out = MALI_FALSE; -+ -+ MALI_DEBUG_PRINT(4, ("Executor: PP interrupt from %s in %s half\n", -+ mali_group_core_description(group), -+ in_upper_half ? "upper" : "bottom")); ++ const u32 msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(lpu) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_executor_lock(); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (!mali_group_is_working(group)) { -+ /* Not working, so nothing to do */ -+ mali_executor_unlock(); -+ return _MALI_OSK_ERR_FAULT; -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &lpu, sizeof(lpu)); + -+ if (in_upper_half) { -+ if (mali_group_is_in_virtual(group)) { -+ /* Child groups should never handle PP interrupts */ -+ MALI_DEBUG_ASSERT(!mali_group_has_timed_out(group)); -+ mali_executor_unlock(); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ } -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ MALI_DEBUG_ASSERT(mali_group_is_working(group)); -+ MALI_DEBUG_ASSERT(!mali_group_is_in_virtual(group)); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ if (mali_group_has_timed_out(group)) { -+ int_result = MALI_INTERRUPT_RESULT_ERROR; -+ time_out = MALI_TRUE; -+ MALI_PRINT(("Executor PP: Job %d Timeout on %s\n", -+ mali_pp_job_get_id(group->pp_running_job), -+ mali_group_core_description(group))); -+ } else { -+ int_result = mali_group_get_interrupt_result_pp(group); -+ if (MALI_INTERRUPT_RESULT_NONE == int_result) { -+ mali_executor_unlock(); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ } ++void __kbase_tlstream_tl_event_atom_softstop_ex( ++ struct kbase_tlstream *stream, ++ const void *atom ++) ++{ ++ const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+#if defined(CONFIG_MALI_SHARED_INTERRUPTS) -+ if (MALI_INTERRUPT_RESULT_NONE == int_result) { -+ /* No interrupts signalled, so nothing to do */ -+ mali_executor_unlock(); -+ return _MALI_OSK_ERR_FAULT; -+ } else if (MALI_INTERRUPT_RESULT_SUCCESS == int_result) { -+ if (mali_group_is_virtual(group) && mali_group_pp_is_active(group)) { -+ /* Some child groups are still working, so nothing to do right now */ -+ mali_executor_unlock(); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ } -+#else -+ MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_NONE != int_result); -+#endif ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /*Add voltage scan function*/ ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); + -+ if (MALI_INTERRUPT_RESULT_ERROR == int_result) -+ mali_group_error++; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ /* We should now have a real interrupt to handle */ ++void __kbase_tlstream_tl_event_atom_softstop_issue( ++ struct kbase_tlstream *stream, ++ const void *atom ++) ++{ ++ const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ MALI_DEBUG_PRINT(4, ("Executor: Group %s completed with %s\n", -+ mali_group_core_description(group), -+ (MALI_INTERRUPT_RESULT_ERROR == int_result) ? -+ "ERROR" : "success")); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (in_upper_half && MALI_INTERRUPT_RESULT_ERROR == int_result) { -+ /* Don't bother to do processing of errors in upper half */ -+ mali_group_mask_all_interrupts_pp(group); -+ mali_executor_unlock(); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); + -+ if (MALI_FALSE == time_out) { -+ mali_group_schedule_bottom_half_pp(group); -+ } -+ } else { -+ struct mali_pp_job *job = NULL; -+ mali_bool success; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ if (MALI_TRUE == time_out) { -+ mali_group_dump_status(group); -+ } ++void __kbase_tlstream_tl_event_atom_softjob_start( ++ struct kbase_tlstream *stream, ++ const void *atom ++) ++{ ++ const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_START; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ success = (int_result == MALI_INTERRUPT_RESULT_SUCCESS) ? -+ MALI_TRUE : MALI_FALSE; ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_executor_complete_group(group, success, NULL, &job); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); + -+ mali_executor_unlock(); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ if (NULL != job) { -+ /* Notify user space and close the job object */ -+ mali_scheduler_complete_pp_job(job, -+ num_physical_pp_cores_total, -+ MALI_TRUE, MALI_TRUE); -+ } -+ } ++void __kbase_tlstream_tl_event_atom_softjob_end( ++ struct kbase_tlstream *stream, ++ const void *atom ++) ++{ ++ const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ return _MALI_OSK_ERR_OK; ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); ++ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+_mali_osk_errcode_t mali_executor_interrupt_mmu(struct mali_group *group, -+ mali_bool in_upper_half) ++void __kbase_tlstream_tl_arbiter_granted( ++ struct kbase_tlstream *stream, ++ const void *gpu ++) +{ -+ enum mali_interrupt_result int_result; -+ -+ MALI_DEBUG_PRINT(4, ("Executor: MMU interrupt from %s in %s half\n", -+ mali_group_core_description(group), -+ in_upper_half ? "upper" : "bottom")); ++ const u32 msg_id = KBASE_TL_ARBITER_GRANTED; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(gpu) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_executor_lock(); -+ if (!mali_group_is_working(group)) { -+ /* Not working, so nothing to do */ -+ mali_executor_unlock(); -+ return _MALI_OSK_ERR_FAULT; -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ MALI_DEBUG_ASSERT(mali_group_is_working(group)); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &gpu, sizeof(gpu)); + -+ int_result = mali_group_get_interrupt_result_mmu(group); -+ if (MALI_INTERRUPT_RESULT_NONE == int_result) { -+ mali_executor_unlock(); -+ return _MALI_OSK_ERR_FAULT; -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+#if defined(CONFIG_MALI_SHARED_INTERRUPTS) -+ if (MALI_INTERRUPT_RESULT_NONE == int_result) { -+ /* No interrupts signalled, so nothing to do */ -+ mali_executor_unlock(); -+ return _MALI_OSK_ERR_FAULT; -+ } -+#else -+ MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_ERROR == int_result); -+#endif ++void __kbase_tlstream_tl_arbiter_started( ++ struct kbase_tlstream *stream, ++ const void *gpu ++) ++{ ++ const u32 msg_id = KBASE_TL_ARBITER_STARTED; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(gpu) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* We should now have a real interrupt to handle */ ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (in_upper_half) { -+ /* Don't bother to do processing of errors in upper half */ ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &gpu, sizeof(gpu)); + -+ struct mali_group *parent = group->parent_group; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ mali_mmu_mask_all_interrupts(group->mmu); ++void __kbase_tlstream_tl_arbiter_stop_requested( ++ struct kbase_tlstream *stream, ++ const void *gpu ++) ++{ ++ const u32 msg_id = KBASE_TL_ARBITER_STOP_REQUESTED; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(gpu) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_executor_unlock(); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (NULL == parent) { -+ mali_group_schedule_bottom_half_mmu(group); -+ } else { -+ mali_group_schedule_bottom_half_mmu(parent); -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &gpu, sizeof(gpu)); + -+ } else { -+ struct mali_gp_job *gp_job = NULL; -+ struct mali_pp_job *pp_job = NULL; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+#ifdef DEBUG ++void __kbase_tlstream_tl_arbiter_stopped( ++ struct kbase_tlstream *stream, ++ const void *gpu ++) ++{ ++ const u32 msg_id = KBASE_TL_ARBITER_STOPPED; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(gpu) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ u32 fault_address = mali_mmu_get_page_fault_addr(group->mmu); -+ u32 status = mali_mmu_get_status(group->mmu); -+ MALI_DEBUG_PRINT(2, ("Executor: Mali page fault detected at 0x%x from bus id %d of type %s on %s\n", -+ (void *)(uintptr_t)fault_address, -+ (status >> 6) & 0x1F, -+ (status & 32) ? "write" : "read", -+ group->mmu->hw_core.description)); -+ MALI_DEBUG_PRINT(3, ("Executor: MMU rawstat = 0x%08X, MMU status = 0x%08X\n", -+ mali_mmu_get_rawstat(group->mmu), status)); -+ mali_mmu_pagedir_diag(mali_session_get_page_directory(group->session), fault_address); -+#endif ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_executor_complete_group(group, MALI_FALSE, &gp_job, &pp_job); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &gpu, sizeof(gpu)); + -+ mali_executor_unlock(); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ if (NULL != gp_job) { -+ MALI_DEBUG_ASSERT(NULL == pp_job); ++void __kbase_tlstream_tl_arbiter_requested( ++ struct kbase_tlstream *stream, ++ const void *gpu ++) ++{ ++ const u32 msg_id = KBASE_TL_ARBITER_REQUESTED; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(gpu) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* Notify user space and close the job object */ -+ mali_scheduler_complete_gp_job(gp_job, MALI_FALSE, -+ MALI_TRUE, MALI_TRUE); -+ } else if (NULL != pp_job) { -+ MALI_DEBUG_ASSERT(NULL == gp_job); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* Notify user space and close the job object */ -+ mali_scheduler_complete_pp_job(pp_job, -+ num_physical_pp_cores_total, -+ MALI_TRUE, MALI_TRUE); -+ } -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &gpu, sizeof(gpu)); + -+ return _MALI_OSK_ERR_OK; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+void mali_executor_group_power_up(struct mali_group *groups[], u32 num_groups) ++void __kbase_tlstream_jd_gpu_soft_reset( ++ struct kbase_tlstream *stream, ++ const void *gpu ++) +{ -+ u32 i; -+ mali_bool child_groups_activated = MALI_FALSE; -+ mali_bool do_schedule = MALI_FALSE; -+#if defined(DEBUG) -+ u32 num_activated = 0; -+#endif -+ -+ MALI_DEBUG_ASSERT_POINTER(groups); -+ MALI_DEBUG_ASSERT(0 < num_groups); ++ const u32 msg_id = KBASE_JD_GPU_SOFT_RESET; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(gpu) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_executor_lock(); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups\n", num_groups)); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &gpu, sizeof(gpu)); + -+ for (i = 0; i < num_groups; i++) { -+ MALI_DEBUG_PRINT(3, ("Executor: powering up group %s\n", -+ mali_group_core_description(groups[i]))); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ mali_group_power_up(groups[i]); ++void __kbase_tlstream_jd_tiler_heap_chunk_alloc( ++ struct kbase_tlstream *stream, ++ u32 ctx_nr, ++ u64 heap_id, ++ u64 chunk_va ++) ++{ ++ const u32 msg_id = KBASE_JD_TILER_HEAP_CHUNK_ALLOC; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(ctx_nr) ++ + sizeof(heap_id) ++ + sizeof(chunk_va) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if ((MALI_GROUP_STATE_ACTIVATION_PENDING != mali_group_get_state(groups[i]) || -+ (MALI_TRUE != mali_executor_group_is_in_state(groups[i], EXEC_STATE_INACTIVE)))) { -+ /* nothing more to do for this group */ -+ continue; -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ MALI_DEBUG_PRINT(3, ("Executor: activating group %s\n", -+ mali_group_core_description(groups[i]))); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &ctx_nr, sizeof(ctx_nr)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &heap_id, sizeof(heap_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &chunk_va, sizeof(chunk_va)); + -+#if defined(DEBUG) -+ num_activated++; -+#endif ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ if (mali_group_is_in_virtual(groups[i])) { -+ /* -+ * At least one child group of virtual group is powered on. -+ */ -+ child_groups_activated = MALI_TRUE; -+ } else if (MALI_FALSE == mali_group_is_virtual(groups[i])) { -+ /* Set gp and pp not in virtual to active. */ -+ mali_group_set_active(groups[i]); -+ } ++void __kbase_tlstream_tl_js_sched_start( ++ struct kbase_tlstream *stream, ++ u32 dummy ++) ++{ ++ const u32 msg_id = KBASE_TL_JS_SCHED_START; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(dummy) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* Move group from inactive to idle list */ -+ if (groups[i] == gp_group) { -+ MALI_DEBUG_ASSERT(EXEC_STATE_INACTIVE == -+ gp_group_state); -+ gp_group_state = EXEC_STATE_IDLE; -+ } else if (MALI_FALSE == mali_group_is_in_virtual(groups[i]) -+ && MALI_FALSE == mali_group_is_virtual(groups[i])) { -+ MALI_DEBUG_ASSERT(MALI_TRUE == mali_executor_group_is_in_state(groups[i], -+ EXEC_STATE_INACTIVE)); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_executor_change_state_pp_physical(groups[i], -+ &group_list_inactive, -+ &group_list_inactive_count, -+ &group_list_idle, -+ &group_list_idle_count); -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &dummy, sizeof(dummy)); + -+ do_schedule = MALI_TRUE; -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ if (mali_executor_has_virtual_group() && -+ MALI_TRUE == child_groups_activated && -+ MALI_GROUP_STATE_ACTIVATION_PENDING == -+ mali_group_get_state(virtual_group)) { -+ /* -+ * Try to active virtual group while it may be not sucessful every time, -+ * because there is one situation that not all of child groups are powered on -+ * in one time and virtual group is in activation pending state. -+ */ -+ if (mali_group_set_active(virtual_group)) { -+ /* Move group from inactive to idle */ -+ MALI_DEBUG_ASSERT(EXEC_STATE_INACTIVE == -+ virtual_group_state); -+ virtual_group_state = EXEC_STATE_IDLE; ++void __kbase_tlstream_tl_js_sched_end( ++ struct kbase_tlstream *stream, ++ u32 dummy ++) ++{ ++ const u32 msg_id = KBASE_TL_JS_SCHED_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(dummy) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u physical activated, 1 virtual activated.\n", num_groups, num_activated)); -+ } else { -+ MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u physical activated\n", num_groups, num_activated)); -+ } -+ } else { -+ MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u physical activated\n", num_groups, num_activated)); -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (MALI_TRUE == do_schedule) { -+ /* Trigger a schedule */ -+ mali_executor_schedule(); -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &dummy, sizeof(dummy)); + -+ mali_executor_unlock(); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+void mali_executor_group_power_down(struct mali_group *groups[], -+ u32 num_groups) ++void __kbase_tlstream_tl_jd_submit_atom_start( ++ struct kbase_tlstream *stream, ++ const void *atom ++) +{ -+ u32 i; -+ -+ MALI_DEBUG_ASSERT_POINTER(groups); -+ MALI_DEBUG_ASSERT(0 < num_groups); ++ const u32 msg_id = KBASE_TL_JD_SUBMIT_ATOM_START; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_executor_lock(); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ MALI_DEBUG_PRINT(3, ("Executor: powering down %u groups\n", num_groups)); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); + -+ for (i = 0; i < num_groups; i++) { -+ /* Groups must be either disabled or inactive. while for virtual group, -+ * it maybe in empty state, because when we meet pm_runtime_suspend, -+ * virtual group could be powered off, and before we acquire mali_executor_lock, -+ * we must release mali_pm_state_lock, if there is a new physical job was queued, -+ * all of physical groups in virtual group could be pulled out, so we only can -+ * powered down an empty virtual group. Those physical groups will be powered -+ * up in following pm_runtime_resume callback function. -+ */ -+ MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(groups[i], -+ EXEC_STATE_DISABLED) || -+ mali_executor_group_is_in_state(groups[i], -+ EXEC_STATE_INACTIVE) || -+ mali_executor_group_is_in_state(groups[i], -+ EXEC_STATE_EMPTY)); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ MALI_DEBUG_PRINT(3, ("Executor: powering down group %s\n", -+ mali_group_core_description(groups[i]))); ++void __kbase_tlstream_tl_jd_submit_atom_end( ++ struct kbase_tlstream *stream, ++ const void *atom ++) ++{ ++ const u32 msg_id = KBASE_TL_JD_SUBMIT_ATOM_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_group_power_down(groups[i]); -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ MALI_DEBUG_PRINT(3, ("Executor: powering down %u groups completed\n", num_groups)); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); + -+ mali_executor_unlock(); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+void mali_executor_abort_session(struct mali_session_data *session) ++void __kbase_tlstream_tl_jd_done_no_lock_start( ++ struct kbase_tlstream *stream, ++ const void *atom ++) +{ -+ struct mali_group *group; -+ struct mali_group *tmp_group; ++ const u32 msg_id = KBASE_TL_JD_DONE_NO_LOCK_START; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ MALI_DEBUG_ASSERT_POINTER(session); -+ MALI_DEBUG_ASSERT(session->is_aborting); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ MALI_DEBUG_PRINT(3, -+ ("Executor: Aborting all jobs from session 0x%08X.\n", -+ session)); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); + -+ mali_executor_lock(); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ if (mali_group_get_session(gp_group) == session) { -+ if (EXEC_STATE_WORKING == gp_group_state) { -+ struct mali_gp_job *gp_job = NULL; ++void __kbase_tlstream_tl_jd_done_no_lock_end( ++ struct kbase_tlstream *stream, ++ const void *atom ++) ++{ ++ const u32 msg_id = KBASE_TL_JD_DONE_NO_LOCK_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_executor_complete_group(gp_group, MALI_FALSE, &gp_job, NULL); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ MALI_DEBUG_ASSERT_POINTER(gp_job); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); + -+ /* GP job completed, make sure it is freed */ -+ mali_scheduler_complete_gp_job(gp_job, MALI_FALSE, -+ MALI_FALSE, MALI_TRUE); -+ } else { -+ /* Same session, but not working, so just clear it */ -+ mali_group_clear_session(gp_group); -+ } -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ if (mali_executor_has_virtual_group()) { -+ if (EXEC_STATE_WORKING == virtual_group_state -+ && mali_group_get_session(virtual_group) == session) { -+ struct mali_pp_job *pp_job = NULL; ++void __kbase_tlstream_tl_jd_done_start( ++ struct kbase_tlstream *stream, ++ const void *atom ++) ++{ ++ const u32 msg_id = KBASE_TL_JD_DONE_START; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_executor_complete_group(virtual_group, MALI_FALSE, NULL, &pp_job); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (NULL != pp_job) { -+ /* PP job completed, make sure it is freed */ -+ mali_scheduler_complete_pp_job(pp_job, 0, -+ MALI_FALSE, MALI_TRUE); -+ } -+ } -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); + -+ _MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_working, -+ struct mali_group, executor_list) { -+ if (mali_group_get_session(group) == session) { -+ struct mali_pp_job *pp_job = NULL; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ mali_executor_complete_group(group, MALI_FALSE, NULL, &pp_job); ++void __kbase_tlstream_tl_jd_done_end( ++ struct kbase_tlstream *stream, ++ const void *atom ++) ++{ ++ const u32 msg_id = KBASE_TL_JD_DONE_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if (NULL != pp_job) { -+ /* PP job completed, make sure it is freed */ -+ mali_scheduler_complete_pp_job(pp_job, 0, -+ MALI_FALSE, MALI_TRUE); -+ } -+ } -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ _MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_idle, struct mali_group, executor_list) { -+ mali_group_clear_session(group); -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); + -+ _MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_inactive, struct mali_group, executor_list) { -+ mali_group_clear_session(group); -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ _MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_disabled, struct mali_group, executor_list) { -+ mali_group_clear_session(group); -+ } ++void __kbase_tlstream_tl_jd_atom_complete( ++ struct kbase_tlstream *stream, ++ const void *atom ++) ++{ ++ const u32 msg_id = KBASE_TL_JD_ATOM_COMPLETE; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_executor_unlock(); -+} ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); + -+void mali_executor_core_scaling_enable(void) -+{ -+ /* PS: Core scaling is by default enabled */ -+ core_scaling_enabled = MALI_TRUE; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+void mali_executor_core_scaling_disable(void) ++void __kbase_tlstream_tl_run_atom_start( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ u32 atom_nr ++) +{ -+ core_scaling_enabled = MALI_FALSE; -+} ++ const u32 msg_id = KBASE_TL_RUN_ATOM_START; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ + sizeof(atom_nr) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+mali_bool mali_executor_core_scaling_is_enabled(void) -+{ -+ return core_scaling_enabled; ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom_nr, sizeof(atom_nr)); ++ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+void mali_executor_group_enable(struct mali_group *group) ++void __kbase_tlstream_tl_run_atom_end( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ u32 atom_nr ++) +{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ -+ mali_executor_lock(); ++ const u32 msg_id = KBASE_TL_RUN_ATOM_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ + sizeof(atom_nr) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if ((NULL != mali_group_get_gp_core(group) || NULL != mali_group_get_pp_core(group)) -+ && (mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))) { -+ mali_executor_group_enable_internal(group); -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_executor_schedule(); -+ mali_executor_unlock(); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom_nr, sizeof(atom_nr)); + -+ _mali_osk_wq_schedule_work(executor_wq_notify_core_change); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+/* -+ * If a physical group is inactive or idle, we should disable it immediately, -+ * if group is in virtual, and virtual group is idle, disable given physical group in it. -+ */ -+void mali_executor_group_disable(struct mali_group *group) ++void __kbase_tlstream_tl_attrib_atom_priority( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ u32 prio ++) +{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ -+ mali_executor_lock(); ++ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ + sizeof(prio) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if ((NULL != mali_group_get_gp_core(group) || NULL != mali_group_get_pp_core(group)) -+ && (!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))) { -+ mali_executor_group_disable_internal(group); -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_executor_schedule(); -+ mali_executor_unlock(); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &prio, sizeof(prio)); + -+ _mali_osk_wq_schedule_work(executor_wq_notify_core_change); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+mali_bool mali_executor_group_is_disabled(struct mali_group *group) ++void __kbase_tlstream_tl_attrib_atom_state( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ u32 state ++) +{ -+ /* NB: This function is not optimized for time critical usage */ -+ -+ mali_bool ret; ++ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_STATE; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ + sizeof(state) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ MALI_DEBUG_ASSERT_POINTER(group); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_executor_lock(); -+ ret = mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED); -+ mali_executor_unlock(); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &state, sizeof(state)); + -+ return ret; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+int mali_executor_set_perf_level(unsigned int target_core_nr, mali_bool override) ++void __kbase_tlstream_tl_attrib_atom_prioritized( ++ struct kbase_tlstream *stream, ++ const void *atom ++) +{ -+ if (target_core_nr == num_physical_pp_cores_enabled) return 0; -+ if (MALI_FALSE == core_scaling_enabled && MALI_FALSE == override) return -EPERM; -+ if (target_core_nr > num_physical_pp_cores_total) return -EINVAL; -+ if (0 == target_core_nr) return -EINVAL; ++ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITIZED; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_executor_core_scale(target_core_nr); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ _mali_osk_wq_schedule_work(executor_wq_notify_core_change); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); + -+ return 0; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+#if MALI_STATE_TRACKING -+u32 mali_executor_dump_state(char *buf, u32 size) ++void __kbase_tlstream_tl_attrib_atom_jit( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ u64 edit_addr, ++ u64 new_addr, ++ u32 jit_flags, ++ u64 mem_flags, ++ u32 j_id, ++ u64 com_pgs, ++ u64 extent, ++ u64 va_pgs ++) +{ -+ int n = 0; -+ struct mali_group *group; -+ struct mali_group *temp; -+ -+ mali_executor_lock(); ++ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JIT; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(atom) ++ + sizeof(edit_addr) ++ + sizeof(new_addr) ++ + sizeof(jit_flags) ++ + sizeof(mem_flags) ++ + sizeof(j_id) ++ + sizeof(com_pgs) ++ + sizeof(extent) ++ + sizeof(va_pgs) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ switch (gp_group_state) { -+ case EXEC_STATE_INACTIVE: -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "GP group is in state INACTIVE\n"); -+ break; -+ case EXEC_STATE_IDLE: -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "GP group is in state IDLE\n"); -+ break; -+ case EXEC_STATE_WORKING: -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "GP group is in state WORKING\n"); -+ break; -+ default: -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "GP group is in unknown/illegal state %u\n", -+ gp_group_state); -+ break; -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ n += mali_group_dump_state(gp_group, buf + n, size - n); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom, sizeof(atom)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &edit_addr, sizeof(edit_addr)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &new_addr, sizeof(new_addr)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &jit_flags, sizeof(jit_flags)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &mem_flags, sizeof(mem_flags)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &j_id, sizeof(j_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &com_pgs, sizeof(com_pgs)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &extent, sizeof(extent)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &va_pgs, sizeof(va_pgs)); + -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "Physical PP groups in WORKING state (count = %u):\n", -+ group_list_working_count); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working, struct mali_group, executor_list) { -+ n += mali_group_dump_state(group, buf + n, size - n); -+ } ++void __kbase_tlstream_tl_kbase_new_device( ++ struct kbase_tlstream *stream, ++ u32 kbase_device_id, ++ u32 kbase_device_gpu_core_count, ++ u32 kbase_device_max_num_csgs, ++ u32 kbase_device_as_count, ++ u32 kbase_device_sb_entry_count, ++ u32 kbase_device_has_cross_stream_sync, ++ u32 kbase_device_supports_gpu_sleep ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_NEW_DEVICE; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kbase_device_id) ++ + sizeof(kbase_device_gpu_core_count) ++ + sizeof(kbase_device_max_num_csgs) ++ + sizeof(kbase_device_as_count) ++ + sizeof(kbase_device_sb_entry_count) ++ + sizeof(kbase_device_has_cross_stream_sync) ++ + sizeof(kbase_device_supports_gpu_sleep) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "Physical PP groups in IDLE state (count = %u):\n", -+ group_list_idle_count); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) { -+ n += mali_group_dump_state(group, buf + n, size - n); -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_id, sizeof(kbase_device_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_gpu_core_count, sizeof(kbase_device_gpu_core_count)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_max_num_csgs, sizeof(kbase_device_max_num_csgs)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_as_count, sizeof(kbase_device_as_count)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_sb_entry_count, sizeof(kbase_device_sb_entry_count)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_has_cross_stream_sync, sizeof(kbase_device_has_cross_stream_sync)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_supports_gpu_sleep, sizeof(kbase_device_supports_gpu_sleep)); + -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "Physical PP groups in INACTIVE state (count = %u):\n", -+ group_list_inactive_count); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) { -+ n += mali_group_dump_state(group, buf + n, size - n); -+ } ++void __kbase_tlstream_tl_kbase_gpucmdqueue_kick( ++ struct kbase_tlstream *stream, ++ u32 kernel_ctx_id, ++ u64 buffer_gpu_addr ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_GPUCMDQUEUE_KICK; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kernel_ctx_id) ++ + sizeof(buffer_gpu_addr) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "Physical PP groups in DISABLED state (count = %u):\n", -+ group_list_disabled_count); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) { -+ n += mali_group_dump_state(group, buf + n, size - n); -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &buffer_gpu_addr, sizeof(buffer_gpu_addr)); + -+ if (mali_executor_has_virtual_group()) { -+ switch (virtual_group_state) { -+ case EXEC_STATE_EMPTY: -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "Virtual PP group is in state EMPTY\n"); -+ break; -+ case EXEC_STATE_INACTIVE: -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "Virtual PP group is in state INACTIVE\n"); -+ break; -+ case EXEC_STATE_IDLE: -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "Virtual PP group is in state IDLE\n"); -+ break; -+ case EXEC_STATE_WORKING: -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "Virtual PP group is in state WORKING\n"); -+ break; -+ default: -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "Virtual PP group is in unknown/illegal state %u\n", -+ virtual_group_state); -+ break; -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ n += mali_group_dump_state(virtual_group, buf + n, size - n); -+ } ++void __kbase_tlstream_tl_kbase_device_program_csg( ++ struct kbase_tlstream *stream, ++ u32 kbase_device_id, ++ u32 kernel_ctx_id, ++ u32 gpu_cmdq_grp_handle, ++ u32 kbase_device_csg_slot_index, ++ u32 kbase_device_csg_slot_resuming ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_DEVICE_PROGRAM_CSG; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kbase_device_id) ++ + sizeof(kernel_ctx_id) ++ + sizeof(gpu_cmdq_grp_handle) ++ + sizeof(kbase_device_csg_slot_index) ++ + sizeof(kbase_device_csg_slot_resuming) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_executor_unlock(); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ n += _mali_osk_snprintf(buf + n, size - n, "\n"); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_id, sizeof(kbase_device_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &gpu_cmdq_grp_handle, sizeof(gpu_cmdq_grp_handle)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_csg_slot_resuming, sizeof(kbase_device_csg_slot_resuming)); + -+ return n; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} -+#endif + -+_mali_osk_errcode_t _mali_ukk_get_pp_number_of_cores(_mali_uk_get_pp_number_of_cores_s *args) ++void __kbase_tlstream_tl_kbase_device_deprogram_csg( ++ struct kbase_tlstream *stream, ++ u32 kbase_device_id, ++ u32 kbase_device_csg_slot_index ++) +{ -+ MALI_DEBUG_ASSERT_POINTER(args); -+ MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); -+ args->number_of_total_cores = num_physical_pp_cores_total; -+ args->number_of_enabled_cores = num_physical_pp_cores_enabled; -+ return _MALI_OSK_ERR_OK; -+} ++ const u32 msg_id = KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kbase_device_id) ++ + sizeof(kbase_device_csg_slot_index) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+_mali_osk_errcode_t _mali_ukk_get_pp_core_version(_mali_uk_get_pp_core_version_s *args) -+{ -+ MALI_DEBUG_ASSERT_POINTER(args); -+ MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); -+ args->version = pp_version; -+ return _MALI_OSK_ERR_OK; -+} ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+_mali_osk_errcode_t _mali_ukk_get_gp_number_of_cores(_mali_uk_get_gp_number_of_cores_s *args) -+{ -+ MALI_DEBUG_ASSERT_POINTER(args); -+ MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); -+ args->number_of_cores = 1; -+ return _MALI_OSK_ERR_OK; -+} ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_id, sizeof(kbase_device_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); + -+_mali_osk_errcode_t _mali_ukk_get_gp_core_version(_mali_uk_get_gp_core_version_s *args) -+{ -+ MALI_DEBUG_ASSERT_POINTER(args); -+ MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); -+ args->version = gp_version; -+ return _MALI_OSK_ERR_OK; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+_mali_osk_errcode_t _mali_ukk_gp_suspend_response(_mali_uk_gp_suspend_response_s *args) ++void __kbase_tlstream_tl_kbase_device_halting_csg( ++ struct kbase_tlstream *stream, ++ u32 kbase_device_id, ++ u32 kbase_device_csg_slot_index, ++ u32 kbase_device_csg_slot_suspending ++) +{ -+ struct mali_session_data *session; -+ struct mali_gp_job *job; ++ const u32 msg_id = KBASE_TL_KBASE_DEVICE_HALTING_CSG; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kbase_device_id) ++ + sizeof(kbase_device_csg_slot_index) ++ + sizeof(kbase_device_csg_slot_suspending) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ MALI_DEBUG_ASSERT_POINTER(args); -+ MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ session = (struct mali_session_data *)(uintptr_t)args->ctx; ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_id, sizeof(kbase_device_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_csg_slot_suspending, sizeof(kbase_device_csg_slot_suspending)); + -+ if (_MALIGP_JOB_RESUME_WITH_NEW_HEAP == args->code) { -+ _mali_osk_notification_t *new_notification = NULL; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ new_notification = _mali_osk_notification_create( -+ _MALI_NOTIFICATION_GP_STALLED, -+ sizeof(_mali_uk_gp_job_suspended_s)); ++void __kbase_tlstream_tl_kbase_device_suspend_csg( ++ struct kbase_tlstream *stream, ++ u32 kbase_device_id, ++ u32 kbase_device_csg_slot_index ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_DEVICE_SUSPEND_CSG; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kbase_device_id) ++ + sizeof(kbase_device_csg_slot_index) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if (NULL != new_notification) { -+ MALI_DEBUG_PRINT(3, ("Executor: Resuming job %u with new heap; 0x%08X - 0x%08X\n", -+ args->cookie, args->arguments[0], args->arguments[1])); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_executor_lock(); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_id, sizeof(kbase_device_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); + -+ /* Resume the job in question if it is still running */ -+ job = mali_group_get_running_gp_job(gp_group); -+ if (NULL != job && -+ args->cookie == mali_gp_job_get_id(job) && -+ session == mali_gp_job_get_session(job)) { -+ /* -+ * Correct job is running, resume with new heap -+ */ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ mali_gp_job_set_oom_notification(job, -+ new_notification); ++void __kbase_tlstream_tl_kbase_device_csg_idle( ++ struct kbase_tlstream *stream, ++ u32 kbase_device_id, ++ u32 kbase_device_csg_slot_index ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_DEVICE_CSG_IDLE; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kbase_device_id) ++ + sizeof(kbase_device_csg_slot_index) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* This will also re-enable interrupts */ -+ mali_group_resume_gp_with_new_heap(gp_group, -+ args->cookie, -+ args->arguments[0], -+ args->arguments[1]); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_executor_unlock(); -+ return _MALI_OSK_ERR_OK; -+ } else { -+ MALI_DEBUG_PRINT(2, ("Executor: Unable to resume gp job becasue gp time out or any other unexpected reason!\n")); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_id, sizeof(kbase_device_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); + -+ _mali_osk_notification_delete(new_notification); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ mali_executor_unlock(); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ } else { -+ MALI_PRINT_ERROR(("Executor: Failed to allocate notification object. Will abort GP job.\n")); -+ } -+ } else { -+ MALI_DEBUG_PRINT(2, ("Executor: Aborting job %u, no new heap provided\n", args->cookie)); -+ } ++void __kbase_tlstream_tl_kbase_new_ctx( ++ struct kbase_tlstream *stream, ++ u32 kernel_ctx_id, ++ u32 kbase_device_id ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_NEW_CTX; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kernel_ctx_id) ++ + sizeof(kbase_device_id) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_executor_lock(); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* Abort the job in question if it is still running */ -+ job = mali_group_get_running_gp_job(gp_group); -+ if (NULL != job && -+ args->cookie == mali_gp_job_get_id(job) && -+ session == mali_gp_job_get_session(job)) { -+ /* Correct job is still running */ -+ struct mali_gp_job *job_done = NULL; ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_id, sizeof(kbase_device_id)); + -+ mali_executor_complete_group(gp_group, MALI_FALSE, &job_done, NULL); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ /* The same job should have completed */ -+ MALI_DEBUG_ASSERT(job_done == job); ++void __kbase_tlstream_tl_kbase_del_ctx( ++ struct kbase_tlstream *stream, ++ u32 kernel_ctx_id ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_DEL_CTX; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kernel_ctx_id) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* GP job completed, make sure it is freed */ -+ mali_scheduler_complete_gp_job(job_done, MALI_FALSE, -+ MALI_TRUE, MALI_TRUE); -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_executor_unlock(); -+ return _MALI_OSK_ERR_FAULT; ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); ++ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + ++void __kbase_tlstream_tl_kbase_ctx_assign_as( ++ struct kbase_tlstream *stream, ++ u32 kernel_ctx_id, ++ u32 kbase_device_as_index ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_CTX_ASSIGN_AS; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kernel_ctx_id) ++ + sizeof(kbase_device_as_index) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+/* -+ * ---------- Implementation of static functions ---------- -+ */ ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+void mali_executor_lock(void) -+{ -+ _mali_osk_spinlock_irq_lock(mali_executor_lock_obj); -+ MALI_DEBUG_PRINT(5, ("Executor: lock taken\n")); -+} ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kbase_device_as_index, sizeof(kbase_device_as_index)); + -+void mali_executor_unlock(void) -+{ -+ MALI_DEBUG_PRINT(5, ("Executor: Releasing lock\n")); -+ _mali_osk_spinlock_irq_unlock(mali_executor_lock_obj); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static mali_bool mali_executor_is_suspended(void *data) ++void __kbase_tlstream_tl_kbase_ctx_unassign_as( ++ struct kbase_tlstream *stream, ++ u32 kernel_ctx_id ++) +{ -+ mali_bool ret; ++ const u32 msg_id = KBASE_TL_KBASE_CTX_UNASSIGN_AS; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kernel_ctx_id) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* This callback does not use the data pointer. */ -+ MALI_IGNORE(data); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_executor_lock(); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); + -+ ret = pause_count > 0 && !mali_executor_is_working(); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ mali_executor_unlock(); ++void __kbase_tlstream_tl_kbase_new_kcpuqueue( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 kcpu_queue_id, ++ u32 kernel_ctx_id, ++ u32 kcpuq_num_pending_cmds ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_NEW_KCPUQUEUE; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(kcpu_queue_id) ++ + sizeof(kernel_ctx_id) ++ + sizeof(kcpuq_num_pending_cmds) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ return ret; ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue_id, sizeof(kcpu_queue_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpuq_num_pending_cmds, sizeof(kcpuq_num_pending_cmds)); ++ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static mali_bool mali_executor_is_working(void) ++void __kbase_tlstream_tl_kbase_del_kcpuqueue( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) +{ -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ const u32 msg_id = KBASE_TL_KBASE_DEL_KCPUQUEUE; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ return (0 != group_list_working_count || -+ EXEC_STATE_WORKING == gp_group_state || -+ EXEC_STATE_WORKING == virtual_group_state); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static void mali_executor_disable_empty_virtual(void) ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_signal( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ const void *fence ++) +{ -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ MALI_DEBUG_ASSERT(virtual_group_state != EXEC_STATE_EMPTY); -+ MALI_DEBUG_ASSERT(virtual_group_state != EXEC_STATE_WORKING); ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(fence) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if (mali_group_is_empty(virtual_group)) { -+ virtual_group_state = EXEC_STATE_EMPTY; -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &fence, sizeof(fence)); ++ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static mali_bool mali_executor_physical_rejoin_virtual(struct mali_group *group) ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ const void *fence ++) +{ -+ mali_bool trigger_pm_update = MALI_FALSE; -+ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ /* Only rejoining after job has completed (still active) */ -+ MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE == -+ mali_group_get_state(group)); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ MALI_DEBUG_ASSERT(MALI_TRUE == mali_executor_has_virtual_group()); -+ MALI_DEBUG_ASSERT(MALI_FALSE == mali_group_is_virtual(group)); ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(fence) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* Make sure group and virtual group have same status */ ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (MALI_GROUP_STATE_INACTIVE == mali_group_get_state(virtual_group)) { -+ if (mali_group_deactivate(group)) { -+ trigger_pm_update = MALI_TRUE; -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &fence, sizeof(fence)); + -+ if (virtual_group_state == EXEC_STATE_EMPTY) { -+ virtual_group_state = EXEC_STATE_INACTIVE; -+ } -+ } else if (MALI_GROUP_STATE_ACTIVATION_PENDING == -+ mali_group_get_state(virtual_group)) { -+ /* -+ * Activation is pending for virtual group, leave -+ * this child group as active. -+ */ -+ if (virtual_group_state == EXEC_STATE_EMPTY) { -+ virtual_group_state = EXEC_STATE_INACTIVE; -+ } -+ } else { -+ MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE == -+ mali_group_get_state(virtual_group)); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ if (virtual_group_state == EXEC_STATE_EMPTY) { -+ virtual_group_state = EXEC_STATE_IDLE; -+ } -+ } ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u64 cqs_obj_gpu_addr, ++ u32 compare_value, ++ u32 inherit_error ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(cqs_obj_gpu_addr) ++ + sizeof(compare_value) ++ + sizeof(inherit_error) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* Remove group from idle list */ -+ MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(group, -+ EXEC_STATE_IDLE)); -+ _mali_osk_list_delinit(&group->executor_list); -+ group_list_idle_count--; ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* -+ * And finally rejoin the virtual group -+ * group will start working on same job as virtual_group, -+ * if virtual_group is working on a job -+ */ -+ mali_group_add_group(virtual_group, group); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &compare_value, sizeof(compare_value)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &inherit_error, sizeof(inherit_error)); + -+ return trigger_pm_update; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static mali_bool mali_executor_has_virtual_group(void) ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u64 cqs_obj_gpu_addr ++) +{ -+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) -+ return (NULL != virtual_group) ? MALI_TRUE : MALI_FALSE; -+#else -+ return MALI_FALSE; -+#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(cqs_obj_gpu_addr) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; ++ ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr)); ++ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static mali_bool mali_executor_virtual_group_is_usable(void) ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u64 cqs_obj_gpu_addr, ++ u64 compare_value, ++ u32 condition, ++ u32 data_type, ++ u32 inherit_error ++) +{ -+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ return ((EXEC_STATE_INACTIVE == virtual_group_state || -+ EXEC_STATE_IDLE == virtual_group_state) && (virtual_group->state != MALI_GROUP_STATE_ACTIVATION_PENDING)) ? -+ MALI_TRUE : MALI_FALSE; -+#else -+ return MALI_FALSE; -+#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(cqs_obj_gpu_addr) ++ + sizeof(compare_value) ++ + sizeof(condition) ++ + sizeof(data_type) ++ + sizeof(inherit_error) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; ++ ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &compare_value, sizeof(compare_value)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &condition, sizeof(condition)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &data_type, sizeof(data_type)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &inherit_error, sizeof(inherit_error)); ++ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static mali_bool mali_executor_tackle_gp_bound(void) ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u64 cqs_obj_gpu_addr, ++ u64 value, ++ u32 operation, ++ u32 data_type ++) +{ -+ struct mali_pp_job *job; -+ -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(cqs_obj_gpu_addr) ++ + sizeof(value) ++ + sizeof(operation) ++ + sizeof(data_type) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ job = mali_scheduler_job_pp_physical_peek(); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (NULL != job && MALI_TRUE == mali_is_mali400()) { -+ if (0 < group_list_working_count && -+ mali_pp_job_is_large_and_unstarted(job)) { -+ return MALI_TRUE; -+ } -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &value, sizeof(value)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &operation, sizeof(operation)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &data_type, sizeof(data_type)); + -+ return MALI_FALSE; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static mali_bool mali_executor_schedule_is_early_out(mali_bool *gpu_secure_mode_is_needed) ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u64 map_import_buf_gpu_addr ++) +{ -+ struct mali_pp_job *next_pp_job_to_start = NULL; -+ struct mali_group *group; -+ struct mali_group *tmp_group; -+ struct mali_pp_job *physical_pp_job_working = NULL; -+ struct mali_pp_job *virtual_pp_job_working = NULL; -+ mali_bool gpu_working_in_protected_mode = MALI_FALSE; -+ mali_bool gpu_working_in_non_protected_mode = MALI_FALSE; -+ -+ MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj); ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(map_import_buf_gpu_addr) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ *gpu_secure_mode_is_needed = MALI_FALSE; ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* Check if the gpu secure mode is supported, exit if not.*/ -+ if (MALI_FALSE == _mali_osk_gpu_secure_mode_is_supported()) { -+ return MALI_FALSE; -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); + -+ /* Check if need to set gpu secure mode for the next pp job, -+ * get the next pp job that will be scheduled if exist. -+ */ -+ next_pp_job_to_start = mali_scheduler_job_pp_next(); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ /* Check current pp physical/virtual running job is protected job or not if exist.*/ -+ _MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_working, -+ struct mali_group, executor_list) { -+ physical_pp_job_working = group->pp_running_job; -+ break; -+ } ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u64 map_import_buf_gpu_addr ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(map_import_buf_gpu_addr) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if (EXEC_STATE_WORKING == virtual_group_state) { -+ virtual_pp_job_working = virtual_group->pp_running_job; -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (NULL != physical_pp_job_working) { -+ if (MALI_TRUE == mali_pp_job_is_protected_job(physical_pp_job_working)) { -+ gpu_working_in_protected_mode = MALI_TRUE; -+ } else { -+ gpu_working_in_non_protected_mode = MALI_TRUE; -+ } -+ } else if (NULL != virtual_pp_job_working) { -+ if (MALI_TRUE == mali_pp_job_is_protected_job(virtual_pp_job_working)) { -+ gpu_working_in_protected_mode = MALI_TRUE; -+ } else { -+ gpu_working_in_non_protected_mode = MALI_TRUE; -+ } -+ } else if (EXEC_STATE_WORKING == gp_group_state) { -+ gpu_working_in_non_protected_mode = MALI_TRUE; -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); + -+ /* If the next pp job is the protected pp job.*/ -+ if ((NULL != next_pp_job_to_start) && MALI_TRUE == mali_pp_job_is_protected_job(next_pp_job_to_start)) { -+ /* if gp is working or any non-protected pp job is working now, unable to schedule protected pp job. */ -+ if (MALI_TRUE == gpu_working_in_non_protected_mode) -+ return MALI_TRUE; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ *gpu_secure_mode_is_needed = MALI_TRUE; -+ return MALI_FALSE; ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u64 map_import_buf_gpu_addr ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(map_import_buf_gpu_addr) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (MALI_TRUE == gpu_working_in_protected_mode) { -+ /* Unable to schedule non-protected pp job/gp job if exist protected pp running jobs*/ -+ return MALI_TRUE; -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); + -+ return MALI_FALSE; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} -+/* -+ * This is where jobs are actually started. -+ */ -+static void mali_executor_schedule(void) ++ ++void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) +{ -+ u32 i; -+ u32 num_physical_needed = 0; -+ u32 num_physical_to_process = 0; -+ mali_bool trigger_pm_update = MALI_FALSE; -+ mali_bool deactivate_idle_group = MALI_TRUE; -+ mali_bool gpu_secure_mode_is_needed = MALI_FALSE; -+ mali_bool is_gpu_secure_mode = MALI_FALSE; -+ /* Physical groups + jobs to start in this function */ -+ struct mali_group *groups_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS]; -+ struct mali_pp_job *jobs_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS]; -+ u32 sub_jobs_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS]; -+ int num_jobs_to_start = 0; ++ const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* Virtual job to start in this function */ -+ struct mali_pp_job *virtual_job_to_start = NULL; ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* GP job to start in this function */ -+ struct mali_gp_job *gp_job_to_start = NULL; ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ if (pause_count > 0) { -+ /* Execution is suspended, don't schedule any jobs. */ -+ return; -+ } ++void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_alloc( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u64 jit_alloc_gpu_alloc_addr_dest, ++ u64 jit_alloc_va_pages, ++ u64 jit_alloc_commit_pages, ++ u64 jit_alloc_extent, ++ u32 jit_alloc_jit_id, ++ u32 jit_alloc_bin_id, ++ u32 jit_alloc_max_allocations, ++ u32 jit_alloc_flags, ++ u32 jit_alloc_usage_id ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(jit_alloc_gpu_alloc_addr_dest) ++ + sizeof(jit_alloc_va_pages) ++ + sizeof(jit_alloc_commit_pages) ++ + sizeof(jit_alloc_extent) ++ + sizeof(jit_alloc_jit_id) ++ + sizeof(jit_alloc_bin_id) ++ + sizeof(jit_alloc_max_allocations) ++ + sizeof(jit_alloc_flags) ++ + sizeof(jit_alloc_usage_id) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* Lock needed in order to safely handle the job queues */ -+ mali_scheduler_lock(); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* 1. Check the schedule if need to early out. */ -+ if (MALI_TRUE == mali_executor_schedule_is_early_out(&gpu_secure_mode_is_needed)) { -+ mali_scheduler_unlock(); -+ return; -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &jit_alloc_gpu_alloc_addr_dest, sizeof(jit_alloc_gpu_alloc_addr_dest)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &jit_alloc_va_pages, sizeof(jit_alloc_va_pages)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &jit_alloc_commit_pages, sizeof(jit_alloc_commit_pages)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &jit_alloc_extent, sizeof(jit_alloc_extent)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &jit_alloc_jit_id, sizeof(jit_alloc_jit_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &jit_alloc_bin_id, sizeof(jit_alloc_bin_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &jit_alloc_max_allocations, sizeof(jit_alloc_max_allocations)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &jit_alloc_flags, sizeof(jit_alloc_flags)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &jit_alloc_usage_id, sizeof(jit_alloc_usage_id)); + -+ /* 2. Activate gp firstly if have gp job queued. */ -+ if ((EXEC_STATE_INACTIVE == gp_group_state) -+ && (0 < mali_scheduler_job_gp_count()) -+ && (gpu_secure_mode_is_needed == MALI_FALSE)) { ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ enum mali_group_state state = -+ mali_group_activate(gp_group); -+ if (MALI_GROUP_STATE_ACTIVE == state) { -+ /* Set GP group state to idle */ -+ gp_group_state = EXEC_STATE_IDLE; -+ } else { -+ trigger_pm_update = MALI_TRUE; -+ } -+ } ++void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_alloc( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* 3. Prepare as many physical groups as needed/possible */ ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ num_physical_needed = mali_scheduler_job_physical_head_count(gpu_secure_mode_is_needed); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ /* On mali-450 platform, we don't need to enter in this block frequently. */ -+ if (0 < num_physical_needed) { ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ if (num_physical_needed <= group_list_idle_count) { -+ /* We have enough groups on idle list already */ -+ num_physical_to_process = num_physical_needed; -+ num_physical_needed = 0; -+ } else { -+ /* We need to get a hold of some more groups */ -+ num_physical_to_process = group_list_idle_count; -+ num_physical_needed -= group_list_idle_count; -+ } ++void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_free( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if (0 < num_physical_needed) { ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* 3.1. Activate groups which are inactive */ ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ struct mali_group *group; -+ struct mali_group *temp; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, -+ struct mali_group, executor_list) { -+ enum mali_group_state state = -+ mali_group_activate(group); -+ if (MALI_GROUP_STATE_ACTIVE == state) { -+ /* Move from inactive to idle */ -+ mali_executor_change_state_pp_physical(group, -+ &group_list_inactive, -+ &group_list_inactive_count, -+ &group_list_idle, -+ &group_list_idle_count); -+ num_physical_to_process++; -+ } else { -+ trigger_pm_update = MALI_TRUE; -+ } ++void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_free( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 jit_alloc_jit_id ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(jit_alloc_jit_id) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ num_physical_needed--; -+ if (0 == num_physical_needed) { -+ /* We have activated all the groups we need */ -+ break; -+ } -+ } -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (mali_executor_virtual_group_is_usable()) { ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &jit_alloc_jit_id, sizeof(jit_alloc_jit_id)); + -+ /* -+ * 3.2. And finally, steal and activate groups -+ * from virtual group if we need even more -+ */ -+ while (0 < num_physical_needed) { -+ struct mali_group *group; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ group = mali_group_acquire_group(virtual_group); -+ if (NULL != group) { -+ enum mali_group_state state; ++void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_executor_disable_empty_virtual(); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ state = mali_group_activate(group); -+ if (MALI_GROUP_STATE_ACTIVE == state) { -+ /* Group is ready, add to idle list */ -+ _mali_osk_list_add( -+ &group->executor_list, -+ &group_list_idle); -+ group_list_idle_count++; -+ num_physical_to_process++; -+ } else { -+ /* -+ * Group is not ready yet, -+ * add to inactive list -+ */ -+ _mali_osk_list_add( -+ &group->executor_list, -+ &group_list_inactive); -+ group_list_inactive_count++; ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ trigger_pm_update = MALI_TRUE; -+ } -+ num_physical_needed--; -+ } else { -+ /* -+ * We could not get enough groups -+ * from the virtual group. -+ */ -+ break; -+ } -+ } -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ /* 3.3. Assign physical jobs to groups */ ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if (0 < num_physical_to_process) { -+ struct mali_group *group; -+ struct mali_group *temp; ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, -+ struct mali_group, executor_list) { -+ struct mali_pp_job *job = NULL; -+ u32 sub_job = MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ MALI_DEBUG_ASSERT(num_jobs_to_start < -+ MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ MALI_DEBUG_ASSERT(0 < -+ mali_scheduler_job_physical_head_count(gpu_secure_mode_is_needed)); ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ const void *group_suspend_buf, ++ u32 gpu_cmdq_grp_handle ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(group_suspend_buf) ++ + sizeof(gpu_cmdq_grp_handle) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* If the next pp job is non-protected, check if gp bound now. */ -+ if ((MALI_FALSE == gpu_secure_mode_is_needed) -+ && (mali_executor_hint_is_enabled(MALI_EXECUTOR_HINT_GP_BOUND)) -+ && (MALI_TRUE == mali_executor_tackle_gp_bound())) { -+ /* -+ * We're gp bound, -+ * don't start this right now. -+ */ -+ deactivate_idle_group = MALI_FALSE; -+ num_physical_to_process = 0; -+ break; -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ job = mali_scheduler_job_pp_physical_get( -+ &sub_job); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &group_suspend_buf, sizeof(group_suspend_buf)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &gpu_cmdq_grp_handle, sizeof(gpu_cmdq_grp_handle)); + -+ if (MALI_FALSE == gpu_secure_mode_is_needed) { -+ MALI_DEBUG_ASSERT(MALI_FALSE == mali_pp_job_is_protected_job(job)); -+ } else { -+ MALI_DEBUG_ASSERT(MALI_TRUE == mali_pp_job_is_protected_job(job)); -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT(sub_job <= MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS); ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* Put job + group on list of jobs to start later on */ ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ groups_to_start[num_jobs_to_start] = group; -+ jobs_to_start[num_jobs_to_start] = job; -+ sub_jobs_to_start[num_jobs_to_start] = sub_job; -+ num_jobs_to_start++; ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ /* Move group from idle to working */ -+ mali_executor_change_state_pp_physical(group, -+ &group_list_idle, -+ &group_list_idle_count, -+ &group_list_working, -+ &group_list_working_count); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ num_physical_to_process--; -+ if (0 == num_physical_to_process) { -+ /* Got all we needed */ -+ break; -+ } -+ } -+ } -+ } ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(execute_error) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* 4. Deactivate idle pp group , must put deactive here before active vitual group -+ * for cover case first only has physical job in normal queue but group inactive, -+ * so delay the job start go to active group, when group activated, -+ * call scheduler again, but now if we get high queue virtual job, -+ * we will do nothing in schedule cause executor schedule stop -+ */ ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (MALI_TRUE == mali_executor_deactivate_list_idle(deactivate_idle_group -+ && (!mali_timeline_has_physical_pp_job()))) { -+ trigger_pm_update = MALI_TRUE; -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); + -+ /* 5. Activate virtual group, if needed */ -+ if (EXEC_STATE_INACTIVE == virtual_group_state && -+ MALI_TRUE == mali_scheduler_job_next_is_virtual()) { -+ struct mali_pp_job *virtual_job = mali_scheduler_job_pp_virtual_peek(); -+ if ((MALI_FALSE == gpu_secure_mode_is_needed && MALI_FALSE == mali_pp_job_is_protected_job(virtual_job)) -+ || (MALI_TRUE == gpu_secure_mode_is_needed && MALI_TRUE == mali_pp_job_is_protected_job(virtual_job))) { -+ enum mali_group_state state = -+ mali_group_activate(virtual_group); -+ if (MALI_GROUP_STATE_ACTIVE == state) { -+ /* Set virtual group state to idle */ -+ virtual_group_state = EXEC_STATE_IDLE; -+ } else { -+ trigger_pm_update = MALI_TRUE; -+ } -+ } -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ /* 6. To power up group asap, trigger pm update only when no need to swith the gpu mode. */ ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ is_gpu_secure_mode = _mali_osk_gpu_secure_mode_is_enabled(); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if ((MALI_FALSE == gpu_secure_mode_is_needed && MALI_FALSE == is_gpu_secure_mode) -+ || (MALI_TRUE == gpu_secure_mode_is_needed && MALI_TRUE == is_gpu_secure_mode)) { -+ if (MALI_TRUE == trigger_pm_update) { -+ trigger_pm_update = MALI_FALSE; -+ mali_pm_update_async(); -+ } -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ /* 7. Assign jobs to idle virtual group (or deactivate if no job) */ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ if (EXEC_STATE_IDLE == virtual_group_state) { -+ if (MALI_TRUE == mali_scheduler_job_next_is_virtual()) { -+ struct mali_pp_job *virtual_job = mali_scheduler_job_pp_virtual_peek(); -+ if ((MALI_FALSE == gpu_secure_mode_is_needed && MALI_FALSE == mali_pp_job_is_protected_job(virtual_job)) -+ || (MALI_TRUE == gpu_secure_mode_is_needed && MALI_TRUE == mali_pp_job_is_protected_job(virtual_job))) { -+ virtual_job_to_start = -+ mali_scheduler_job_pp_virtual_get(); -+ virtual_group_state = EXEC_STATE_WORKING; -+ } -+ } else if (!mali_timeline_has_virtual_pp_job()) { -+ virtual_group_state = EXEC_STATE_INACTIVE; ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(execute_error) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if (mali_group_deactivate(virtual_group)) { -+ trigger_pm_update = MALI_TRUE; -+ } -+ } -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* 8. Assign job to idle GP group (or deactivate if no job) */ ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); + -+ if (EXEC_STATE_IDLE == gp_group_state && MALI_FALSE == gpu_secure_mode_is_needed) { -+ if (0 < mali_scheduler_job_gp_count()) { -+ gp_job_to_start = mali_scheduler_job_gp_get(); -+ gp_group_state = EXEC_STATE_WORKING; -+ } else if (!mali_timeline_has_gp_job()) { -+ gp_group_state = EXEC_STATE_INACTIVE; -+ if (mali_group_deactivate(gp_group)) { -+ trigger_pm_update = MALI_TRUE; -+ } -+ } -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ /* 9. We no longer need the schedule/queue lock */ ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_scheduler_unlock(); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* 10. start jobs */ -+ if (NULL != virtual_job_to_start) { -+ MALI_DEBUG_ASSERT(!mali_group_pp_is_active(virtual_group)); -+ mali_group_start_pp_job(virtual_group, -+ virtual_job_to_start, 0, is_gpu_secure_mode); -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ for (i = 0; i < num_jobs_to_start; i++) { -+ MALI_DEBUG_ASSERT(!mali_group_pp_is_active( -+ groups_to_start[i])); -+ mali_group_start_pp_job(groups_to_start[i], -+ jobs_to_start[i], -+ sub_jobs_to_start[i], is_gpu_secure_mode); -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ MALI_DEBUG_ASSERT_POINTER(gp_group); ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(execute_error) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if (NULL != gp_job_to_start) { -+ MALI_DEBUG_ASSERT(!mali_group_gp_is_active(gp_group)); -+ mali_group_start_gp_job(gp_group, gp_job_to_start, is_gpu_secure_mode); -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* 11. Trigger any pending PM updates */ -+ if (MALI_TRUE == trigger_pm_update) { -+ mali_pm_update_async(); -+ } -+} ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); + -+/* Handler for deferred schedule requests */ -+static void mali_executor_wq_schedule(void *arg) -+{ -+ MALI_IGNORE(arg); -+ mali_executor_lock(); -+ mali_executor_schedule(); -+ mali_executor_unlock(); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static void mali_executor_send_gp_oom_to_user(struct mali_gp_job *job) ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++) +{ -+ _mali_uk_gp_job_suspended_s *jobres; -+ _mali_osk_notification_t *notification; -+ -+ notification = mali_gp_job_get_oom_notification(job); ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(execute_error) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* -+ * Remember the id we send to user space, so we have something to -+ * verify when we get a response -+ */ -+ gp_returned_cookie = mali_gp_job_get_id(job); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ jobres = (_mali_uk_gp_job_suspended_s *)notification->result_buffer; -+ jobres->user_job_ptr = mali_gp_job_get_user_id(job); -+ jobres->cookie = gp_returned_cookie; ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); + -+ mali_session_send_notification(mali_gp_job_get_session(job), -+ notification); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} -+static struct mali_gp_job *mali_executor_complete_gp(struct mali_group *group, -+ mali_bool success) ++ ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) +{ -+ struct mali_gp_job *job; ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* Extracts the needed HW status from core and reset */ -+ job = mali_group_complete_gp(group, success); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ MALI_DEBUG_ASSERT_POINTER(job); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ /* Core is now ready to go into idle list */ -+ gp_group_state = EXEC_STATE_IDLE; ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(execute_error) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* This will potentially queue more GP and PP jobs */ -+ mali_timeline_tracker_release(&job->tracker); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* Signal PP job */ -+ mali_gp_job_signal_pp_tracker(job, success); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); + -+ return job; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static struct mali_pp_job *mali_executor_complete_pp(struct mali_group *group, -+ mali_bool success) ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++) +{ -+ struct mali_pp_job *job; -+ u32 sub_job; -+ mali_bool job_is_done; ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(execute_error) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* Extracts the needed HW status from core and reset */ -+ job = mali_group_complete_pp(group, success, &sub_job); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); + -+ MALI_DEBUG_ASSERT_POINTER(job); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ /* Core is now ready to go into idle list */ -+ if (mali_group_is_virtual(group)) { -+ virtual_group_state = EXEC_STATE_IDLE; -+ } else { -+ /* Move from working to idle state */ -+ mali_executor_change_state_pp_physical(group, -+ &group_list_working, -+ &group_list_working_count, -+ &group_list_idle, -+ &group_list_idle_count); -+ } ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* It is the executor module which owns the jobs themselves by now */ -+ mali_pp_job_mark_sub_job_completed(job, success); -+ job_is_done = mali_pp_job_is_complete(job); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (job_is_done) { -+ /* This will potentially queue more GP and PP jobs */ -+ mali_timeline_tracker_release(&job->tracker); -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ return job; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static void mali_executor_complete_group(struct mali_group *group, -+ mali_bool success, -+ struct mali_gp_job **gp_job_done, -+ struct mali_pp_job **pp_job_done) ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++) +{ -+ struct mali_gp_core *gp_core = mali_group_get_gp_core(group); -+ struct mali_pp_core *pp_core = mali_group_get_pp_core(group); -+ struct mali_gp_job *gp_job = NULL; -+ struct mali_pp_job *pp_job = NULL; -+ mali_bool pp_job_is_done = MALI_TRUE; -+ -+ if (NULL != gp_core) { -+ gp_job = mali_executor_complete_gp(group, success); -+ } else { -+ MALI_DEBUG_ASSERT_POINTER(pp_core); -+ MALI_IGNORE(pp_core); -+ pp_job = mali_executor_complete_pp(group, success); -+ -+ pp_job_is_done = mali_pp_job_is_complete(pp_job); -+ } -+ -+ if (pause_count > 0) { -+ /* Execution has been suspended */ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(execute_error) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if (!mali_executor_is_working()) { -+ /* Last job completed, wake up sleepers */ -+ _mali_osk_wait_queue_wake_up( -+ executor_working_wait_queue); -+ } -+ } else if (MALI_TRUE == mali_group_disable_requested(group)) { -+ mali_executor_core_scale_in_group_complete(group); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_executor_schedule(); -+ } else { -+ /* try to schedule new jobs */ -+ mali_executor_schedule(); -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); + -+ if (NULL != gp_job) { -+ MALI_DEBUG_ASSERT_POINTER(gp_job_done); -+ *gp_job_done = gp_job; -+ } else if (pp_job_is_done) { -+ MALI_DEBUG_ASSERT_POINTER(pp_job); -+ MALI_DEBUG_ASSERT_POINTER(pp_job_done); -+ *pp_job_done = pp_job; -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static void mali_executor_change_state_pp_physical(struct mali_group *group, -+ _mali_osk_list_t *old_list, -+ u32 *old_count, -+ _mali_osk_list_t *new_list, -+ u32 *new_count) ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) +{ -+ /* -+ * It's a bit more complicated to change the state for the physical PP -+ * groups since their state is determined by the list they are on. -+ */ -+#if defined(DEBUG) -+ mali_bool found = MALI_FALSE; -+ struct mali_group *group_iter; -+ struct mali_group *temp; -+ u32 old_counted = 0; -+ u32 new_counted = 0; -+ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(old_list); -+ MALI_DEBUG_ASSERT_POINTER(old_count); -+ MALI_DEBUG_ASSERT_POINTER(new_list); -+ MALI_DEBUG_ASSERT_POINTER(new_count); -+ -+ /* -+ * Verify that group is present on old list, -+ * and that the count is correct -+ */ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ _MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, old_list, -+ struct mali_group, executor_list) { -+ old_counted++; -+ if (group == group_iter) { -+ found = MALI_TRUE; -+ } -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ _MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, new_list, -+ struct mali_group, executor_list) { -+ new_counted++; -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ if (MALI_FALSE == found) { -+ if (old_list == &group_list_idle) { -+ MALI_DEBUG_PRINT(1, (" old Group list is idle,")); -+ } else if (old_list == &group_list_inactive) { -+ MALI_DEBUG_PRINT(1, (" old Group list is inactive,")); -+ } else if (old_list == &group_list_working) { -+ MALI_DEBUG_PRINT(1, (" old Group list is working,")); -+ } else if (old_list == &group_list_disabled) { -+ MALI_DEBUG_PRINT(1, (" old Group list is disable,")); -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_WORKING)) { -+ MALI_DEBUG_PRINT(1, (" group in working \n")); -+ } else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_INACTIVE)) { -+ MALI_DEBUG_PRINT(1, (" group in inactive \n")); -+ } else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_IDLE)) { -+ MALI_DEBUG_PRINT(1, (" group in idle \n")); -+ } else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED)) { -+ MALI_DEBUG_PRINT(1, (" but group in disabled \n")); -+ } -+ } ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(execute_error) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ MALI_DEBUG_ASSERT(MALI_TRUE == found); -+ MALI_DEBUG_ASSERT(0 < (*old_count)); -+ MALI_DEBUG_ASSERT((*old_count) == old_counted); -+ MALI_DEBUG_ASSERT((*new_count) == new_counted); -+#endif ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ _mali_osk_list_move(&group->executor_list, new_list); -+ (*old_count)--; -+ (*new_count)++; -+} ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); + -+static void mali_executor_set_state_pp_physical(struct mali_group *group, -+ _mali_osk_list_t *new_list, -+ u32 *new_count) -+{ -+ _mali_osk_list_add(&group->executor_list, new_list); -+ (*new_count)++; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static mali_bool mali_executor_group_is_in_state(struct mali_group *group, -+ enum mali_executor_state_t state) ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) +{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ -+ if (gp_group == group) { -+ if (gp_group_state == state) { -+ return MALI_TRUE; -+ } -+ } else if (virtual_group == group || mali_group_is_in_virtual(group)) { -+ if (virtual_group_state == state) { -+ return MALI_TRUE; -+ } -+ } else { -+ /* Physical PP group */ -+ struct mali_group *group_iter; -+ struct mali_group *temp; -+ _mali_osk_list_t *list; ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if (EXEC_STATE_DISABLED == state) { -+ list = &group_list_disabled; -+ } else if (EXEC_STATE_INACTIVE == state) { -+ list = &group_list_inactive; -+ } else if (EXEC_STATE_IDLE == state) { -+ list = &group_list_idle; -+ } else { -+ MALI_DEBUG_ASSERT(EXEC_STATE_WORKING == state); -+ list = &group_list_working; -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ _MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, list, -+ struct mali_group, executor_list) { -+ if (group_iter == group) { -+ return MALI_TRUE; -+ } -+ } -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ /* group not in correct state */ -+ return MALI_FALSE; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static void mali_executor_group_enable_internal(struct mali_group *group) ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++) +{ -+ MALI_DEBUG_ASSERT(group); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED)); -+ -+ /* Put into inactive state (== "lowest" enabled state) */ -+ if (group == gp_group) { -+ MALI_DEBUG_ASSERT(EXEC_STATE_DISABLED == gp_group_state); -+ gp_group_state = EXEC_STATE_INACTIVE; -+ } else { -+ mali_executor_change_state_pp_physical(group, -+ &group_list_disabled, -+ &group_list_disabled_count, -+ &group_list_inactive, -+ &group_list_inactive_count); -+ -+ ++num_physical_pp_cores_enabled; -+ MALI_DEBUG_PRINT(4, ("Enabling group id %d \n", group->pp_core->core_id)); -+ } ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(execute_error) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if (MALI_GROUP_STATE_ACTIVE == mali_group_activate(group)) { -+ MALI_DEBUG_ASSERT(MALI_TRUE == mali_group_power_is_on(group)); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* Move from inactive to idle */ -+ if (group == gp_group) { -+ gp_group_state = EXEC_STATE_IDLE; -+ } else { -+ mali_executor_change_state_pp_physical(group, -+ &group_list_inactive, -+ &group_list_inactive_count, -+ &group_list_idle, -+ &group_list_idle_count); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); + -+ if (mali_executor_has_virtual_group()) { -+ if (mali_executor_physical_rejoin_virtual(group)) { -+ mali_pm_update_async(); -+ } -+ } -+ } -+ } else { -+ mali_pm_update_async(); -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static void mali_executor_group_disable_internal(struct mali_group *group) ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) +{ -+ mali_bool working; ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ MALI_DEBUG_ASSERT(!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED)); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ working = mali_executor_group_is_in_state(group, EXEC_STATE_WORKING); -+ if (MALI_TRUE == working) { -+ /** Group to be disabled once it completes current work, -+ * when virtual group completes, also check child groups for this flag */ -+ mali_group_set_disable_request(group, MALI_TRUE); -+ return; -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ /* Put into disabled state */ -+ if (group == gp_group) { -+ /* GP group */ -+ MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != gp_group_state); -+ gp_group_state = EXEC_STATE_DISABLED; -+ } else { -+ if (mali_group_is_in_virtual(group)) { -+ /* A child group of virtual group. move the specific group from virtual group */ -+ MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != virtual_group_state); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ mali_executor_set_state_pp_physical(group, -+ &group_list_disabled, -+ &group_list_disabled_count); ++void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_group_remove_group(virtual_group, group); -+ mali_executor_disable_empty_virtual(); -+ } else { -+ mali_executor_change_group_status_disabled(group); -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ --num_physical_pp_cores_enabled; -+ MALI_DEBUG_PRINT(4, ("Disabling group id %d \n", group->pp_core->core_id)); -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ if (MALI_GROUP_STATE_INACTIVE != group->state) { -+ if (MALI_TRUE == mali_group_deactivate(group)) { -+ mali_pm_update_async(); -+ } -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static void mali_executor_notify_core_change(u32 num_cores) ++void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error, ++ u64 jit_alloc_gpu_alloc_addr, ++ u64 jit_alloc_mmu_flags ++) +{ -+ mali_bool done = MALI_FALSE; ++ const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(execute_error) ++ + sizeof(jit_alloc_gpu_alloc_addr) ++ + sizeof(jit_alloc_mmu_flags) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if (mali_is_mali450() || mali_is_mali470()) { -+ return; -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* -+ * This function gets a bit complicated because we can't hold the session lock while -+ * allocating notification objects. -+ */ -+ while (!done) { -+ u32 i; -+ u32 num_sessions_alloc; -+ u32 num_sessions_with_lock; -+ u32 used_notification_objects = 0; -+ _mali_osk_notification_t **notobjs; ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &jit_alloc_gpu_alloc_addr, sizeof(jit_alloc_gpu_alloc_addr)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &jit_alloc_mmu_flags, sizeof(jit_alloc_mmu_flags)); + -+ /* Pre allocate the number of notifications objects we need right now (might change after lock has been taken) */ -+ num_sessions_alloc = mali_session_get_count(); -+ if (0 == num_sessions_alloc) { -+ /* No sessions to report to */ -+ return; -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ notobjs = (_mali_osk_notification_t **)_mali_osk_malloc(sizeof(_mali_osk_notification_t *) * num_sessions_alloc); -+ if (NULL == notobjs) { -+ MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure)\n")); -+ /* there is probably no point in trying again, system must be really low on memory and probably unusable now anyway */ -+ return; -+ } ++void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ for (i = 0; i < num_sessions_alloc; i++) { -+ notobjs[i] = _mali_osk_notification_create(_MALI_NOTIFICATION_PP_NUM_CORE_CHANGE, sizeof(_mali_uk_pp_num_cores_changed_s)); -+ if (NULL != notobjs[i]) { -+ _mali_uk_pp_num_cores_changed_s *data = notobjs[i]->result_buffer; -+ data->number_of_enabled_cores = num_cores; -+ } else { -+ MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure %u)\n", i)); -+ } -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_session_lock(); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ /* number of sessions will not change while we hold the lock */ -+ num_sessions_with_lock = mali_session_get_count(); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ if (num_sessions_alloc >= num_sessions_with_lock) { -+ /* We have allocated enough notification objects for all the sessions atm */ -+ struct mali_session_data *session, *tmp; -+ MALI_SESSION_FOREACH(session, tmp, link) { -+ MALI_DEBUG_ASSERT(used_notification_objects < num_sessions_alloc); -+ if (NULL != notobjs[used_notification_objects]) { -+ mali_session_send_notification(session, notobjs[used_notification_objects]); -+ notobjs[used_notification_objects] = NULL; /* Don't track this notification object any more */ -+ } -+ used_notification_objects++; -+ } -+ done = MALI_TRUE; -+ } ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_free_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_session_unlock(); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* Delete any remaining/unused notification objects */ -+ for (; used_notification_objects < num_sessions_alloc; used_notification_objects++) { -+ if (NULL != notobjs[used_notification_objects]) { -+ _mali_osk_notification_delete(notobjs[used_notification_objects]); -+ } -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ _mali_osk_free(notobjs); -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static mali_bool mali_executor_core_scaling_is_done(void *data) ++void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) +{ -+ u32 i; -+ u32 num_groups; -+ mali_bool ret = MALI_TRUE; -+ -+ MALI_IGNORE(data); -+ -+ mali_executor_lock(); -+ -+ num_groups = mali_group_get_glob_num_groups(); ++ const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ for (i = 0; i < num_groups; i++) { -+ struct mali_group *group = mali_group_get_glob_group(i); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (NULL != group) { -+ if (MALI_TRUE == group->disable_requested && NULL != mali_group_get_pp_core(group)) { -+ ret = MALI_FALSE; -+ break; -+ } -+ } -+ } -+ mali_executor_unlock(); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ return ret; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static void mali_executor_wq_notify_core_change(void *arg) ++void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error, ++ u64 jit_free_pages_used ++) +{ -+ MALI_IGNORE(arg); ++ const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(execute_error) ++ + sizeof(jit_free_pages_used) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if (mali_is_mali450() || mali_is_mali470()) { -+ return; -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ _mali_osk_wait_queue_wait_event(executor_notify_core_change_wait_queue, -+ mali_executor_core_scaling_is_done, NULL); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &jit_free_pages_used, sizeof(jit_free_pages_used)); + -+ mali_executor_notify_core_change(num_physical_pp_cores_enabled); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+/** -+ * Clear all disable request from the _last_ core scaling behavior. -+ */ -+static void mali_executor_core_scaling_reset(void) ++void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) +{ -+ u32 i; -+ u32 num_groups; -+ -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ -+ num_groups = mali_group_get_glob_num_groups(); ++ const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ for (i = 0; i < num_groups; i++) { -+ struct mali_group *group = mali_group_get_glob_group(i); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (NULL != group) { -+ group->disable_requested = MALI_FALSE; -+ } -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { -+ core_scaling_delay_up_mask[i] = 0; -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static void mali_executor_core_scale(unsigned int target_core_nr) ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) +{ -+ int current_core_scaling_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 }; -+ int target_core_scaling_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 }; -+ int i; ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ MALI_DEBUG_ASSERT(0 < target_core_nr); -+ MALI_DEBUG_ASSERT(num_physical_pp_cores_total >= target_core_nr); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_executor_lock(); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ if (target_core_nr < num_physical_pp_cores_enabled) { -+ MALI_DEBUG_PRINT(2, ("Requesting %d cores: disabling %d cores\n", target_core_nr, num_physical_pp_cores_enabled - target_core_nr)); -+ } else { -+ MALI_DEBUG_PRINT(2, ("Requesting %d cores: enabling %d cores\n", target_core_nr, target_core_nr - num_physical_pp_cores_enabled)); -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ /* When a new core scaling request is comming, we should remove the un-doing -+ * part of the last core scaling request. It's safe because we have only one -+ * lock(executor lock) protection. */ -+ mali_executor_core_scaling_reset(); ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ mali_pm_get_best_power_cost_mask(num_physical_pp_cores_enabled, current_core_scaling_mask); -+ mali_pm_get_best_power_cost_mask(target_core_nr, target_core_scaling_mask); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { -+ target_core_scaling_mask[i] = target_core_scaling_mask[i] - current_core_scaling_mask[i]; -+ MALI_DEBUG_PRINT(5, ("target_core_scaling_mask[%d] = %d\n", i, target_core_scaling_mask[i])); -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); + -+ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { -+ if (0 > target_core_scaling_mask[i]) { -+ struct mali_pm_domain *domain; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ domain = mali_pm_domain_get_from_index(i); ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ + sizeof(execute_error) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* Domain is valid and has pp cores */ -+ if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) { -+ struct mali_group *group; -+ struct mali_group *temp; ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &domain->group_list, struct mali_group, pm_domain_list) { -+ if (NULL != mali_group_get_pp_core(group) && (!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED)) -+ && (!mali_group_is_virtual(group))) { -+ mali_executor_group_disable_internal(group); -+ target_core_scaling_mask[i]++; -+ if ((0 == target_core_scaling_mask[i])) { -+ break; -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); + -+ } -+ } -+ } -+ } -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { -+ /** -+ * Target_core_scaling_mask[i] is bigger than 0, -+ * means we need to enable some pp cores in -+ * this domain whose domain index is i. -+ */ -+ if (0 < target_core_scaling_mask[i]) { -+ struct mali_pm_domain *domain; ++void __kbase_tlstream_tl_kbase_csffw_fw_reloading( ++ struct kbase_tlstream *stream, ++ u64 csffw_cycle ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_RELOADING; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(csffw_cycle) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if (num_physical_pp_cores_enabled >= target_core_nr) { -+ break; -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ domain = mali_pm_domain_get_from_index(i); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &csffw_cycle, sizeof(csffw_cycle)); + -+ /* Domain is valid and has pp cores */ -+ if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) { -+ struct mali_group *group; -+ struct mali_group *temp; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &domain->group_list, struct mali_group, pm_domain_list) { -+ if (NULL != mali_group_get_pp_core(group) && mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED) -+ && (!mali_group_is_virtual(group))) { -+ mali_executor_group_enable_internal(group); -+ target_core_scaling_mask[i]--; ++void __kbase_tlstream_tl_kbase_csffw_fw_enabling( ++ struct kbase_tlstream *stream, ++ u64 csffw_cycle ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_ENABLING; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(csffw_cycle) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ if ((0 == target_core_scaling_mask[i]) || num_physical_pp_cores_enabled == target_core_nr) { -+ break; -+ } -+ } -+ } -+ } -+ } -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /** -+ * Here, we may still have some pp cores not been enabled because of some -+ * pp cores need to be disabled are still in working state. -+ */ -+ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { -+ if (0 < target_core_scaling_mask[i]) { -+ core_scaling_delay_up_mask[i] = target_core_scaling_mask[i]; -+ } -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &csffw_cycle, sizeof(csffw_cycle)); + -+ mali_executor_schedule(); -+ mali_executor_unlock(); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static void mali_executor_core_scale_in_group_complete(struct mali_group *group) ++void __kbase_tlstream_tl_kbase_csffw_fw_request_sleep( ++ struct kbase_tlstream *stream, ++ u64 csffw_cycle ++) +{ -+ int num_pp_cores_disabled = 0; -+ int num_pp_cores_to_enable = 0; -+ int i; ++ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_REQUEST_SLEEP; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(csffw_cycle) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ MALI_DEBUG_ASSERT(MALI_TRUE == mali_group_disable_requested(group)); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* Disable child group of virtual group */ -+ if (mali_group_is_virtual(group)) { -+ struct mali_group *child; -+ struct mali_group *temp; ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &csffw_cycle, sizeof(csffw_cycle)); + -+ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) { -+ if (MALI_TRUE == mali_group_disable_requested(child)) { -+ mali_group_set_disable_request(child, MALI_FALSE); -+ mali_executor_group_disable_internal(child); -+ num_pp_cores_disabled++; -+ } -+ } -+ mali_group_set_disable_request(group, MALI_FALSE); -+ } else { -+ mali_executor_group_disable_internal(group); -+ mali_group_set_disable_request(group, MALI_FALSE); -+ if (NULL != mali_group_get_pp_core(group)) { -+ num_pp_cores_disabled++; -+ } -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ num_pp_cores_to_enable = num_pp_cores_disabled; ++void __kbase_tlstream_tl_kbase_csffw_fw_request_wakeup( ++ struct kbase_tlstream *stream, ++ u64 csffw_cycle ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(csffw_cycle) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { -+ if (0 < core_scaling_delay_up_mask[i]) { -+ struct mali_pm_domain *domain; ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (0 == num_pp_cores_to_enable) { -+ break; -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &csffw_cycle, sizeof(csffw_cycle)); + -+ domain = mali_pm_domain_get_from_index(i); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ /* Domain is valid and has pp cores */ -+ if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) { -+ struct mali_group *disabled_group; -+ struct mali_group *temp; ++void __kbase_tlstream_tl_kbase_csffw_fw_request_halt( ++ struct kbase_tlstream *stream, ++ u64 csffw_cycle ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_REQUEST_HALT; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(csffw_cycle) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ _MALI_OSK_LIST_FOREACHENTRY(disabled_group, temp, &domain->group_list, struct mali_group, pm_domain_list) { -+ if (NULL != mali_group_get_pp_core(disabled_group) && mali_executor_group_is_in_state(disabled_group, EXEC_STATE_DISABLED)) { -+ mali_executor_group_enable_internal(disabled_group); -+ core_scaling_delay_up_mask[i]--; -+ num_pp_cores_to_enable--; ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if ((0 == core_scaling_delay_up_mask[i]) || 0 == num_pp_cores_to_enable) { -+ break; -+ } -+ } -+ } -+ } -+ } -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &csffw_cycle, sizeof(csffw_cycle)); + -+ _mali_osk_wait_queue_wake_up(executor_notify_core_change_wait_queue); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static void mali_executor_change_group_status_disabled(struct mali_group *group) ++void __kbase_tlstream_tl_kbase_csffw_fw_disabling( ++ struct kbase_tlstream *stream, ++ u64 csffw_cycle ++) +{ -+ /* Physical PP group */ -+ mali_bool idle; ++ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_DISABLING; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(csffw_cycle) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ idle = mali_executor_group_is_in_state(group, EXEC_STATE_IDLE); -+ if (MALI_TRUE == idle) { -+ mali_executor_change_state_pp_physical(group, -+ &group_list_idle, -+ &group_list_idle_count, -+ &group_list_disabled, -+ &group_list_disabled_count); -+ } else { -+ mali_executor_change_state_pp_physical(group, -+ &group_list_inactive, -+ &group_list_inactive_count, -+ &group_list_disabled, -+ &group_list_disabled_count); -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &csffw_cycle, sizeof(csffw_cycle)); ++ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+static mali_bool mali_executor_deactivate_list_idle(mali_bool deactivate_idle_group) ++void __kbase_tlstream_tl_kbase_csffw_fw_off( ++ struct kbase_tlstream *stream, ++ u64 csffw_cycle ++) +{ -+ mali_bool trigger_pm_update = MALI_FALSE; -+ -+ if (group_list_idle_count > 0) { -+ if (mali_executor_has_virtual_group()) { ++ const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_OFF; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(csffw_cycle) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* Rejoin virtual group on Mali-450 */ ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ struct mali_group *group; -+ struct mali_group *temp; ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &csffw_cycle, sizeof(csffw_cycle)); + -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, -+ &group_list_idle, -+ struct mali_group, executor_list) { -+ if (mali_executor_physical_rejoin_virtual( -+ group)) { -+ trigger_pm_update = MALI_TRUE; -+ } -+ } -+ } else if (deactivate_idle_group) { -+ struct mali_group *group; -+ struct mali_group *temp; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ /* Deactivate group on Mali-300/400 */ ++void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( ++ struct kbase_tlstream *stream, ++ u64 csffw_timestamp, ++ u64 csffw_cycle ++) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(csffw_timestamp) ++ + sizeof(csffw_cycle) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, -+ &group_list_idle, -+ struct mali_group, executor_list) { -+ if (mali_group_deactivate(group)) { -+ trigger_pm_update = MALI_TRUE; -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* Move from idle to inactive */ -+ mali_executor_change_state_pp_physical(group, -+ &group_list_idle, -+ &group_list_idle_count, -+ &group_list_inactive, -+ &group_list_inactive_count); -+ } -+ } -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &csffw_timestamp, sizeof(csffw_timestamp)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &csffw_cycle, sizeof(csffw_cycle)); + -+ return trigger_pm_update; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+void mali_executor_running_status_print(void) ++void __kbase_tlstream_aux_pm_state( ++ struct kbase_tlstream *stream, ++ u32 core_type, ++ u64 core_state_bitset ++) +{ -+ struct mali_group *group = NULL; -+ struct mali_group *temp = NULL; ++ const u32 msg_id = KBASE_AUX_PM_STATE; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(core_type) ++ + sizeof(core_state_bitset) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ MALI_PRINT(("GP running job: %p\n", gp_group->gp_running_job)); -+ if ((gp_group->gp_core) && (gp_group->is_working)) { -+ mali_group_dump_status(gp_group); -+ } -+ MALI_PRINT(("Physical PP groups in WORKING state (count = %u):\n", group_list_working_count)); -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working, struct mali_group, executor_list) { -+ MALI_PRINT(("PP running job: %p, subjob %d \n", group->pp_running_job, group->pp_running_sub_job)); -+ mali_group_dump_status(group); -+ } -+ MALI_PRINT(("Physical PP groups in INACTIVE state (count = %u):\n", group_list_inactive_count)); -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) { -+ MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off")); -+ MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description)); -+ } -+ MALI_PRINT(("Physical PP groups in IDLE state (count = %u):\n", group_list_idle_count)); -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) { -+ MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off")); -+ MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description)); -+ } -+ MALI_PRINT(("Physical PP groups in DISABLED state (count = %u):\n", group_list_disabled_count)); -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) { -+ MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off")); -+ MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description)); -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ if (mali_executor_has_virtual_group()) { -+ MALI_PRINT(("Virtual group running job: %p\n", virtual_group->pp_running_job)); -+ MALI_PRINT(("Virtual group status: %d\n", virtual_group_state)); -+ MALI_PRINT(("Virtual group->status: %d\n", virtual_group->state)); -+ MALI_PRINT(("\tSW power: %s\n", virtual_group->power_is_on ? "On" : "Off")); -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &virtual_group->group_list, -+ struct mali_group, group_list) { -+ int i = 0; -+ MALI_PRINT(("\tchild group(%s) running job: %p\n", group->pp_core->hw_core.description, group->pp_running_job)); -+ MALI_PRINT(("\tchild group(%s)->status: %d\n", group->pp_core->hw_core.description, group->state)); -+ MALI_PRINT(("\tchild group(%s) SW power: %s\n", group->pp_core->hw_core.description, group->power_is_on ? "On" : "Off")); -+ if (group->pm_domain) { -+ MALI_PRINT(("\tPower domain: id %u\n", mali_pm_domain_get_id(group->pm_domain))); -+ MALI_PRINT(("\tMask:0x%04x \n", mali_pm_domain_get_mask(group->pm_domain))); -+ MALI_PRINT(("\tUse-count:%u \n", mali_pm_domain_get_use_count(group->pm_domain))); -+ MALI_PRINT(("\tCurrent power status:%s \n", (mali_pm_domain_get_mask(group->pm_domain)& mali_pm_get_current_mask()) ? "On" : "Off")); -+ MALI_PRINT(("\tWanted power status:%s \n", (mali_pm_domain_get_mask(group->pm_domain)& mali_pm_get_wanted_mask()) ? "On" : "Off")); -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &core_type, sizeof(core_type)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &core_state_bitset, sizeof(core_state_bitset)); + -+ for (i = 0; i < 2; i++) { -+ if (NULL != group->l2_cache_core[i]) { -+ struct mali_pm_domain *domain; -+ domain = mali_l2_cache_get_pm_domain(group->l2_cache_core[i]); -+ MALI_PRINT(("\t L2(index %d) group SW power: %s\n", i, group->l2_cache_core[i]->power_is_on ? "On" : "Off")); -+ if (domain) { -+ MALI_PRINT(("\tL2 Power domain: id %u\n", mali_pm_domain_get_id(domain))); -+ MALI_PRINT(("\tL2 Mask:0x%04x \n", mali_pm_domain_get_mask(domain))); -+ MALI_PRINT(("\tL2 Use-count:%u \n", mali_pm_domain_get_use_count(domain))); -+ MALI_PRINT(("\tL2 Current power status:%s \n", (mali_pm_domain_get_mask(domain) & mali_pm_get_current_mask()) ? "On" : "Off")); -+ MALI_PRINT(("\tL2 Wanted power status:%s \n", (mali_pm_domain_get_mask(domain) & mali_pm_get_wanted_mask()) ? "On" : "Off")); -+ } -+ } -+ } -+ } -+ if (EXEC_STATE_WORKING == virtual_group_state) { -+ mali_group_dump_status(virtual_group); -+ } -+ } ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+void mali_executor_status_dump(void) ++void __kbase_tlstream_aux_pagefault( ++ struct kbase_tlstream *stream, ++ u32 ctx_nr, ++ u32 as_nr, ++ u64 page_cnt_change ++) +{ -+ mali_executor_lock(); -+ mali_scheduler_lock(); ++ const u32 msg_id = KBASE_AUX_PAGEFAULT; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(ctx_nr) ++ + sizeof(as_nr) ++ + sizeof(page_cnt_change) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* print schedule queue status */ -+ mali_scheduler_gp_pp_job_queue_print(); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ mali_scheduler_unlock(); -+ mali_executor_unlock(); -+} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_executor.h b/drivers/gpu/arm/mali400/mali/common/mali_executor.h -new file mode 100644 -index 000000000..4224d6a6c ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_executor.h -@@ -0,0 +1,102 @@ -+/* -+ * Copyright (C) 2012, 2014-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &ctx_nr, sizeof(ctx_nr)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &as_nr, sizeof(as_nr)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &page_cnt_change, sizeof(page_cnt_change)); + -+#ifndef __MALI_EXECUTOR_H__ -+#define __MALI_EXECUTOR_H__ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+#include "mali_osk.h" -+#include "mali_scheduler_types.h" -+#include "mali_kernel_common.h" ++void __kbase_tlstream_aux_pagesalloc( ++ struct kbase_tlstream *stream, ++ u32 ctx_nr, ++ u64 page_cnt ++) ++{ ++ const u32 msg_id = KBASE_AUX_PAGESALLOC; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(ctx_nr) ++ + sizeof(page_cnt) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+typedef enum { -+ MALI_EXECUTOR_HINT_GP_BOUND = 0 -+#define MALI_EXECUTOR_HINT_MAX 1 -+} mali_executor_hint; ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+extern mali_bool mali_executor_hints[MALI_EXECUTOR_HINT_MAX]; ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &ctx_nr, sizeof(ctx_nr)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &page_cnt, sizeof(page_cnt)); + -+/* forward declare struct instead of using include */ -+struct mali_session_data; -+struct mali_group; -+struct mali_pp_core; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+extern _mali_osk_spinlock_irq_t *mali_executor_lock_obj; ++void __kbase_tlstream_aux_devfreq_target( ++ struct kbase_tlstream *stream, ++ u64 target_freq ++) ++{ ++ const u32 msg_id = KBASE_AUX_DEVFREQ_TARGET; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(target_freq) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+#define MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD() MALI_DEBUG_ASSERT_LOCK_HELD(mali_executor_lock_obj); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+_mali_osk_errcode_t mali_executor_initialize(void); -+void mali_executor_terminate(void); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &target_freq, sizeof(target_freq)); + -+void mali_executor_populate(void); -+void mali_executor_depopulate(void); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+void mali_executor_suspend(void); -+void mali_executor_resume(void); ++void __kbase_tlstream_aux_jit_stats( ++ struct kbase_tlstream *stream, ++ u32 ctx_nr, ++ u32 bid, ++ u32 max_allocs, ++ u32 allocs, ++ u32 va_pages, ++ u32 ph_pages ++) ++{ ++ const u32 msg_id = KBASE_AUX_JIT_STATS; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(ctx_nr) ++ + sizeof(bid) ++ + sizeof(max_allocs) ++ + sizeof(allocs) ++ + sizeof(va_pages) ++ + sizeof(ph_pages) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+u32 mali_executor_get_num_cores_total(void); -+u32 mali_executor_get_num_cores_enabled(void); -+struct mali_pp_core *mali_executor_get_virtual_pp(void); -+struct mali_group *mali_executor_get_virtual_group(void); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+void mali_executor_zap_all_active(struct mali_session_data *session); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &ctx_nr, sizeof(ctx_nr)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &bid, sizeof(bid)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &max_allocs, sizeof(max_allocs)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &allocs, sizeof(allocs)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &va_pages, sizeof(va_pages)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &ph_pages, sizeof(ph_pages)); + -+/** -+ * Schedule GP and PP according to bitmask. -+ * -+ * @param mask A scheduling bitmask. -+ * @param deferred_schedule MALI_TRUE if schedule should be deferred, MALI_FALSE if not. -+ */ -+void mali_executor_schedule_from_mask(mali_scheduler_mask mask, mali_bool deferred_schedule); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+_mali_osk_errcode_t mali_executor_interrupt_gp(struct mali_group *group, mali_bool in_upper_half); -+_mali_osk_errcode_t mali_executor_interrupt_pp(struct mali_group *group, mali_bool in_upper_half); -+_mali_osk_errcode_t mali_executor_interrupt_mmu(struct mali_group *group, mali_bool in_upper_half); -+void mali_executor_group_power_up(struct mali_group *groups[], u32 num_groups); -+void mali_executor_group_power_down(struct mali_group *groups[], u32 num_groups); ++void __kbase_tlstream_aux_tiler_heap_stats( ++ struct kbase_tlstream *stream, ++ u32 ctx_nr, ++ u64 heap_id, ++ u32 va_pages, ++ u32 ph_pages, ++ u32 max_chunks, ++ u32 chunk_size, ++ u32 chunk_count, ++ u32 target_in_flight, ++ u32 nr_in_flight ++) ++{ ++ const u32 msg_id = KBASE_AUX_TILER_HEAP_STATS; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(ctx_nr) ++ + sizeof(heap_id) ++ + sizeof(va_pages) ++ + sizeof(ph_pages) ++ + sizeof(max_chunks) ++ + sizeof(chunk_size) ++ + sizeof(chunk_count) ++ + sizeof(target_in_flight) ++ + sizeof(nr_in_flight) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+void mali_executor_abort_session(struct mali_session_data *session); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+void mali_executor_core_scaling_enable(void); -+void mali_executor_core_scaling_disable(void); -+mali_bool mali_executor_core_scaling_is_enabled(void); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &ctx_nr, sizeof(ctx_nr)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &heap_id, sizeof(heap_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &va_pages, sizeof(va_pages)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &ph_pages, sizeof(ph_pages)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &max_chunks, sizeof(max_chunks)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &chunk_size, sizeof(chunk_size)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &chunk_count, sizeof(chunk_count)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &target_in_flight, sizeof(target_in_flight)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &nr_in_flight, sizeof(nr_in_flight)); + -+void mali_executor_group_enable(struct mali_group *group); -+void mali_executor_group_disable(struct mali_group *group); -+mali_bool mali_executor_group_is_disabled(struct mali_group *group); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+int mali_executor_set_perf_level(unsigned int target_core_nr, mali_bool override); ++void __kbase_tlstream_aux_event_job_slot( ++ struct kbase_tlstream *stream, ++ const void *ctx, ++ u32 slot_nr, ++ u32 atom_nr, ++ u32 event ++) ++{ ++ const u32 msg_id = KBASE_AUX_EVENT_JOB_SLOT; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(ctx) ++ + sizeof(slot_nr) ++ + sizeof(atom_nr) ++ + sizeof(event) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+#if MALI_STATE_TRACKING -+u32 mali_executor_dump_state(char *buf, u32 size); -+#endif ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+MALI_STATIC_INLINE void mali_executor_hint_enable(mali_executor_hint hint) -+{ -+ MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX); -+ mali_executor_hints[hint] = MALI_TRUE; -+} ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &ctx, sizeof(ctx)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &slot_nr, sizeof(slot_nr)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &atom_nr, sizeof(atom_nr)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &event, sizeof(event)); + -+MALI_STATIC_INLINE void mali_executor_hint_disable(mali_executor_hint hint) -+{ -+ MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX); -+ mali_executor_hints[hint] = MALI_FALSE; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+MALI_STATIC_INLINE mali_bool mali_executor_hint_is_enabled(mali_executor_hint hint) ++void __kbase_tlstream_aux_protected_enter_start( ++ struct kbase_tlstream *stream, ++ const void *gpu ++) +{ -+ MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX); -+ return mali_executor_hints[hint]; -+} -+ -+void mali_executor_running_status_print(void); -+void mali_executor_status_dump(void); -+void mali_executor_lock(void); -+void mali_executor_unlock(void); -+#endif /* __MALI_EXECUTOR_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_gp.c b/drivers/gpu/arm/mali400/mali/common/mali_gp.c -new file mode 100644 -index 000000000..7d3d4aff7 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_gp.c -@@ -0,0 +1,357 @@ -+/* -+ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_START; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(gpu) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+#include "mali_gp.h" -+#include "mali_hw_core.h" -+#include "mali_group.h" -+#include "mali_osk.h" -+#include "regs/mali_gp_regs.h" -+#include "mali_kernel_common.h" -+#include "mali_kernel_core.h" -+#if defined(CONFIG_MALI400_PROFILING) -+#include "mali_osk_profiling.h" -+#endif ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+static struct mali_gp_core *mali_global_gp_core = NULL; ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &gpu, sizeof(gpu)); + -+/* Interrupt handlers */ -+static void mali_gp_irq_probe_trigger(void *data); -+static _mali_osk_errcode_t mali_gp_irq_probe_ack(void *data); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+struct mali_gp_core *mali_gp_create(const _mali_osk_resource_t *resource, struct mali_group *group) ++void __kbase_tlstream_aux_protected_enter_end( ++ struct kbase_tlstream *stream, ++ const void *gpu ++) +{ -+ struct mali_gp_core *core = NULL; ++ const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(gpu) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ MALI_DEBUG_ASSERT(NULL == mali_global_gp_core); -+ MALI_DEBUG_PRINT(2, ("Mali GP: Creating Mali GP core: %s\n", resource->description)); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ core = _mali_osk_malloc(sizeof(struct mali_gp_core)); -+ if (NULL != core) { -+ if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALIGP2_REGISTER_ADDRESS_SPACE_SIZE)) { -+ _mali_osk_errcode_t ret; ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &gpu, sizeof(gpu)); + -+ ret = mali_gp_reset(core); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} + -+ if (_MALI_OSK_ERR_OK == ret) { -+ ret = mali_group_add_gp_core(group, core); -+ if (_MALI_OSK_ERR_OK == ret) { -+ /* Setup IRQ handlers (which will do IRQ probing if needed) */ -+ core->irq = _mali_osk_irq_init(resource->irq, -+ mali_group_upper_half_gp, -+ group, -+ mali_gp_irq_probe_trigger, -+ mali_gp_irq_probe_ack, -+ core, -+ resource->description); -+ if (NULL != core->irq) { -+ MALI_DEBUG_PRINT(4, ("Mali GP: set global gp core from 0x%08X to 0x%08X\n", mali_global_gp_core, core)); -+ mali_global_gp_core = core; ++void __kbase_tlstream_aux_mmu_command( ++ struct kbase_tlstream *stream, ++ u32 kernel_ctx_id, ++ u32 mmu_cmd_id, ++ u32 mmu_synchronicity, ++ u64 mmu_lock_addr, ++ u32 mmu_lock_page_num ++) ++{ ++ const u32 msg_id = KBASE_AUX_MMU_COMMAND; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kernel_ctx_id) ++ + sizeof(mmu_cmd_id) ++ + sizeof(mmu_synchronicity) ++ + sizeof(mmu_lock_addr) ++ + sizeof(mmu_lock_page_num) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ return core; -+ } else { -+ MALI_PRINT_ERROR(("Mali GP: Failed to setup interrupt handlers for GP core %s\n", core->hw_core.description)); -+ } -+ mali_group_remove_gp_core(group); -+ } else { -+ MALI_PRINT_ERROR(("Mali GP: Failed to add core %s to group\n", core->hw_core.description)); -+ } -+ } -+ mali_hw_core_delete(&core->hw_core); -+ } ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ _mali_osk_free(core); -+ } else { -+ MALI_PRINT_ERROR(("Failed to allocate memory for GP core\n")); -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &mmu_cmd_id, sizeof(mmu_cmd_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &mmu_synchronicity, sizeof(mmu_synchronicity)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &mmu_lock_addr, sizeof(mmu_lock_addr)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &mmu_lock_page_num, sizeof(mmu_lock_page_num)); + -+ return NULL; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+void mali_gp_delete(struct mali_gp_core *core) ++void __kbase_tlstream_aux_protected_leave_start( ++ struct kbase_tlstream *stream, ++ const void *gpu ++) +{ -+ MALI_DEBUG_ASSERT_POINTER(core); ++ const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_START; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(gpu) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ _mali_osk_irq_term(core->irq); -+ mali_hw_core_delete(&core->hw_core); -+ mali_global_gp_core = NULL; -+ _mali_osk_free(core); -+} ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+void mali_gp_stop_bus(struct mali_gp_core *core) -+{ -+ MALI_DEBUG_ASSERT_POINTER(core); ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &gpu, sizeof(gpu)); + -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_STOP_BUS); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+_mali_osk_errcode_t mali_gp_stop_bus_wait(struct mali_gp_core *core) ++void __kbase_tlstream_aux_protected_leave_end( ++ struct kbase_tlstream *stream, ++ const void *gpu ++) +{ -+ int i; -+ -+ MALI_DEBUG_ASSERT_POINTER(core); ++ const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_END; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(gpu) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; + -+ /* Send the stop bus command. */ -+ mali_gp_stop_bus(core); ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + -+ /* Wait for bus to be stopped */ -+ for (i = 0; i < MALI_REG_POLL_COUNT_SLOW; i++) { -+ if (mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_STATUS) & MALIGP2_REG_VAL_STATUS_BUS_STOPPED) { -+ break; -+ } -+ } ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &gpu, sizeof(gpu)); + -+ if (MALI_REG_POLL_COUNT_SLOW == i) { -+ MALI_PRINT_ERROR(("Mali GP: Failed to stop bus on %s\n", core->hw_core.description)); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ return _MALI_OSK_ERR_OK; ++ kbase_tlstream_msgbuf_release(stream, acq_flags); +} + -+void mali_gp_hard_reset(struct mali_gp_core *core) -+{ -+ const u32 reset_wait_target_register = MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_LIMIT; -+ const u32 reset_invalid_value = 0xC0FFE000; -+ const u32 reset_check_value = 0xC01A0000; -+ const u32 reset_default_value = 0; -+ int i; ++/* clang-format on */ +diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h +new file mode 100644 +index 000000000..06e4ca4a6 +--- /dev/null ++++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h +@@ -0,0 +1,4382 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ + -+ MALI_DEBUG_ASSERT_POINTER(core); -+ MALI_DEBUG_PRINT(4, ("Mali GP: Hard reset of core %s\n", core->hw_core.description)); ++/* ++ * THIS FILE IS AUTOGENERATED BY generate_tracepoints.py. ++ * DO NOT EDIT. ++ */ + -+ mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_invalid_value); ++#if !defined(_KBASE_TRACEPOINTS_H) ++#define _KBASE_TRACEPOINTS_H + -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_RESET); ++/* Tracepoints are abstract callbacks notifying that some important ++ * software or hardware event has happened. ++ * ++ * In this particular implementation, it results into a MIPE ++ * timeline event and, in some cases, it also fires an ftrace event ++ * (a.k.a. Gator events, see details below). ++ */ + -+ for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) { -+ mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_check_value); -+ if (reset_check_value == mali_hw_core_register_read(&core->hw_core, reset_wait_target_register)) { -+ break; -+ } -+ } ++#include "mali_kbase.h" ++#include "mali_kbase_gator.h" + -+ if (MALI_REG_POLL_COUNT_FAST == i) { -+ MALI_PRINT_ERROR(("Mali GP: The hard reset loop didn't work, unable to recover\n")); -+ } ++#include ++#include + -+ mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_default_value); /* set it back to the default */ -+ /* Re-enable interrupts */ -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_MASK_ALL); -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED); ++/* clang-format off */ + -+} ++struct kbase_tlstream; + -+void mali_gp_reset_async(struct mali_gp_core *core) -+{ -+ MALI_DEBUG_ASSERT_POINTER(core); ++extern const size_t __obj_stream_offset; ++extern const size_t __aux_stream_offset; + -+ MALI_DEBUG_PRINT(4, ("Mali GP: Reset of core %s\n", core->hw_core.description)); ++/* This macro dispatches a kbase_tlstream from ++ * a kbase_device instance. Only AUX or OBJ ++ * streams can be dispatched. It is aware of ++ * kbase_timeline binary representation and ++ * relies on offset variables: ++ * __obj_stream_offset and __aux_stream_offset. ++ */ ++#define __TL_DISPATCH_STREAM(kbdev, stype) \ ++ ((struct kbase_tlstream *) \ ++ ((u8 *)kbdev->timeline + __ ## stype ## _stream_offset)) + -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, 0); /* disable the IRQs */ -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALI400GP_REG_VAL_IRQ_RESET_COMPLETED); -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALI400GP_REG_VAL_CMD_SOFT_RESET); ++struct tp_desc; + -+} ++/* Descriptors of timeline messages transmitted in object events stream. */ ++extern const char *obj_desc_header; ++extern const size_t obj_desc_header_size; ++/* Descriptors of timeline messages transmitted in auxiliary events stream. */ ++extern const char *aux_desc_header; ++extern const size_t aux_desc_header_size; + -+_mali_osk_errcode_t mali_gp_reset_wait(struct mali_gp_core *core) -+{ -+ int i; -+ u32 rawstat = 0; ++#define TL_ATOM_STATE_IDLE 0 ++#define TL_ATOM_STATE_READY 1 ++#define TL_ATOM_STATE_DONE 2 ++#define TL_ATOM_STATE_POSTED 3 + -+ MALI_DEBUG_ASSERT_POINTER(core); ++#define TL_JS_EVENT_START GATOR_JOB_SLOT_START ++#define TL_JS_EVENT_STOP GATOR_JOB_SLOT_STOP ++#define TL_JS_EVENT_SOFT_STOP GATOR_JOB_SLOT_SOFT_STOPPED + -+ for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) { -+ rawstat = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT); -+ if (rawstat & MALI400GP_REG_VAL_IRQ_RESET_COMPLETED) { -+ break; -+ } -+ } -+ -+ if (i == MALI_REG_POLL_COUNT_FAST) { -+ MALI_PRINT_ERROR(("Mali GP: Failed to reset core %s, rawstat: 0x%08x\n", -+ core->hw_core.description, rawstat)); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ -+ /* Re-enable interrupts */ -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_MASK_ALL); -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED); -+ -+ return _MALI_OSK_ERR_OK; -+} -+ -+_mali_osk_errcode_t mali_gp_reset(struct mali_gp_core *core) -+{ -+ mali_gp_reset_async(core); -+ return mali_gp_reset_wait(core); -+} -+ -+void mali_gp_job_start(struct mali_gp_core *core, struct mali_gp_job *job) -+{ -+ u32 startcmd = 0; -+ u32 *frame_registers = mali_gp_job_get_frame_registers(job); -+ u32 counter_src0 = mali_gp_job_get_perf_counter_src0(job); -+ u32 counter_src1 = mali_gp_job_get_perf_counter_src1(job); -+ -+ MALI_DEBUG_ASSERT_POINTER(core); -+ -+ if (mali_gp_job_has_vs_job(job)) { -+ startcmd |= (u32) MALIGP2_REG_VAL_CMD_START_VS; -+ } -+ -+ if (mali_gp_job_has_plbu_job(job)) { -+ startcmd |= (u32) MALIGP2_REG_VAL_CMD_START_PLBU; -+ } -+ -+ MALI_DEBUG_ASSERT(0 != startcmd); -+ -+ mali_hw_core_register_write_array_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR, frame_registers, MALIGP2_NUM_REGS_FRAME); -+ -+ if (MALI_HW_CORE_NO_COUNTER != counter_src0) { -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC, counter_src0); -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_ENABLE, MALIGP2_REG_VAL_PERF_CNT_ENABLE); -+ } -+ if (MALI_HW_CORE_NO_COUNTER != counter_src1) { -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_SRC, counter_src1); -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_ENABLE, MALIGP2_REG_VAL_PERF_CNT_ENABLE); -+ } -+ -+ MALI_DEBUG_PRINT(3, ("Mali GP: Starting job (0x%08x) on core %s with command 0x%08X\n", job, core->hw_core.description, startcmd)); -+ -+ mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC); -+ -+ /* Barrier to make sure the previous register write is finished */ -+ _mali_osk_write_mem_barrier(); -+ -+ /* This is the command that starts the core. -+ * -+ * Don't actually run the job if PROFILING_SKIP_PP_JOBS are set, just -+ * force core to assert the completion interrupt. -+ */ -+#if !defined(PROFILING_SKIP_GP_JOBS) -+ mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, startcmd); -+#else -+ { -+ u32 bits = 0; ++#define TLSTREAM_ENABLED (1u << 31) + -+ if (mali_gp_job_has_vs_job(job)) -+ bits = MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST; -+ if (mali_gp_job_has_plbu_job(job)) -+ bits |= MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST; ++void __kbase_tlstream_tl_new_ctx( ++ struct kbase_tlstream *stream, ++ const void *ctx, ++ u32 ctx_nr, ++ u32 tgid ++); + -+ mali_hw_core_register_write_relaxed(&core->hw_core, -+ MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT, bits); -+ } -+#endif ++void __kbase_tlstream_tl_new_gpu( ++ struct kbase_tlstream *stream, ++ const void *gpu, ++ u32 gpu_id, ++ u32 core_count ++); + -+ /* Barrier to make sure the previous register write is finished */ -+ _mali_osk_write_mem_barrier(); -+} ++void __kbase_tlstream_tl_new_lpu( ++ struct kbase_tlstream *stream, ++ const void *lpu, ++ u32 lpu_nr, ++ u32 lpu_fn ++); + -+void mali_gp_resume_with_new_heap(struct mali_gp_core *core, u32 start_addr, u32 end_addr) -+{ -+ u32 irq_readout; ++void __kbase_tlstream_tl_new_atom( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ u32 atom_nr ++); + -+ MALI_DEBUG_ASSERT_POINTER(core); ++void __kbase_tlstream_tl_new_as( ++ struct kbase_tlstream *stream, ++ const void *address_space, ++ u32 as_nr ++); + -+ irq_readout = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT); ++void __kbase_tlstream_tl_del_ctx( ++ struct kbase_tlstream *stream, ++ const void *ctx ++); + -+ if (irq_readout & MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM) { -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, (MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM | MALIGP2_REG_VAL_IRQ_HANG)); -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED); /* re-enable interrupts */ -+ mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR, start_addr); -+ mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_END_ADDR, end_addr); ++void __kbase_tlstream_tl_del_atom( ++ struct kbase_tlstream *stream, ++ const void *atom ++); + -+ MALI_DEBUG_PRINT(3, ("Mali GP: Resuming job\n")); ++void __kbase_tlstream_tl_lifelink_lpu_gpu( ++ struct kbase_tlstream *stream, ++ const void *lpu, ++ const void *gpu ++); + -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC); -+ _mali_osk_write_mem_barrier(); -+ } -+ /* -+ * else: core has been reset between PLBU_OUT_OF_MEM interrupt and this new heap response. -+ * A timeout or a page fault on Mali-200 PP core can cause this behaviour. -+ */ -+} ++void __kbase_tlstream_tl_lifelink_as_gpu( ++ struct kbase_tlstream *stream, ++ const void *address_space, ++ const void *gpu ++); + -+u32 mali_gp_core_get_version(struct mali_gp_core *core) -+{ -+ MALI_DEBUG_ASSERT_POINTER(core); -+ return mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_VERSION); -+} ++void __kbase_tlstream_tl_ret_ctx_lpu( ++ struct kbase_tlstream *stream, ++ const void *ctx, ++ const void *lpu ++); + -+struct mali_gp_core *mali_gp_get_global_gp_core(void) -+{ -+ return mali_global_gp_core; -+} ++void __kbase_tlstream_tl_ret_atom_ctx( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ const void *ctx ++); + -+/* ------------- interrupt handling below ------------------ */ -+static void mali_gp_irq_probe_trigger(void *data) -+{ -+ struct mali_gp_core *core = (struct mali_gp_core *)data; ++void __kbase_tlstream_tl_ret_atom_lpu( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ const void *lpu, ++ const char *attrib_match_list ++); + -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED); -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT, MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR); -+ _mali_osk_mem_barrier(); -+} ++void __kbase_tlstream_tl_nret_ctx_lpu( ++ struct kbase_tlstream *stream, ++ const void *ctx, ++ const void *lpu ++); + -+static _mali_osk_errcode_t mali_gp_irq_probe_ack(void *data) -+{ -+ struct mali_gp_core *core = (struct mali_gp_core *)data; -+ u32 irq_readout; ++void __kbase_tlstream_tl_nret_atom_ctx( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ const void *ctx ++); + -+ irq_readout = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_STAT); -+ if (MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR & irq_readout) { -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR); -+ _mali_osk_mem_barrier(); -+ return _MALI_OSK_ERR_OK; -+ } ++void __kbase_tlstream_tl_nret_atom_lpu( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ const void *lpu ++); + -+ return _MALI_OSK_ERR_FAULT; -+} ++void __kbase_tlstream_tl_ret_as_ctx( ++ struct kbase_tlstream *stream, ++ const void *address_space, ++ const void *ctx ++); + -+/* ------ local helper functions below --------- */ -+#if MALI_STATE_TRACKING -+u32 mali_gp_dump_state(struct mali_gp_core *core, char *buf, u32 size) -+{ -+ int n = 0; ++void __kbase_tlstream_tl_nret_as_ctx( ++ struct kbase_tlstream *stream, ++ const void *address_space, ++ const void *ctx ++); + -+ n += _mali_osk_snprintf(buf + n, size - n, "\tGP: %s\n", core->hw_core.description); ++void __kbase_tlstream_tl_ret_atom_as( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ const void *address_space ++); + -+ return n; -+} -+#endif ++void __kbase_tlstream_tl_nret_atom_as( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ const void *address_space ++); + -+void mali_gp_update_performance_counters(struct mali_gp_core *core, struct mali_gp_job *job) -+{ -+ u32 val0 = 0; -+ u32 val1 = 0; -+ u32 counter_src0 = mali_gp_job_get_perf_counter_src0(job); -+ u32 counter_src1 = mali_gp_job_get_perf_counter_src1(job); ++void __kbase_tlstream_tl_attrib_atom_config( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ u64 descriptor, ++ u64 affinity, ++ u32 config ++); + -+ if (MALI_HW_CORE_NO_COUNTER != counter_src0) { -+ val0 = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_VALUE); -+ mali_gp_job_set_perf_counter_value0(job, val0); ++void __kbase_tlstream_tl_jit_usedpages( ++ struct kbase_tlstream *stream, ++ u64 used_pages, ++ u32 j_id ++); + -+#if defined(CONFIG_MALI400_PROFILING) -+ _mali_osk_profiling_report_hw_counter(COUNTER_VP_0_C0, val0); -+ _mali_osk_profiling_record_global_counters(COUNTER_VP_0_C0, val0); -+#endif ++void __kbase_tlstream_tl_attrib_atom_jitallocinfo( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ u64 va_pgs, ++ u64 com_pgs, ++ u64 extent, ++ u32 j_id, ++ u32 bin_id, ++ u32 max_allocs, ++ u32 jit_flags, ++ u32 usg_id ++); + -+ } ++void __kbase_tlstream_tl_attrib_atom_jitfreeinfo( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ u32 j_id ++); + -+ if (MALI_HW_CORE_NO_COUNTER != counter_src1) { -+ val1 = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_VALUE); -+ mali_gp_job_set_perf_counter_value1(job, val1); ++void __kbase_tlstream_tl_attrib_as_config( ++ struct kbase_tlstream *stream, ++ const void *address_space, ++ u64 transtab, ++ u64 memattr, ++ u64 transcfg ++); + -+#if defined(CONFIG_MALI400_PROFILING) -+ _mali_osk_profiling_report_hw_counter(COUNTER_VP_0_C1, val1); -+ _mali_osk_profiling_record_global_counters(COUNTER_VP_0_C1, val1); -+#endif -+ } -+} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_gp.h b/drivers/gpu/arm/mali400/mali/common/mali_gp.h -new file mode 100644 -index 000000000..3156310f2 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_gp.h -@@ -0,0 +1,127 @@ -+/* -+ * Copyright (C) 2011-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++void __kbase_tlstream_tl_event_lpu_softstop( ++ struct kbase_tlstream *stream, ++ const void *lpu ++); + -+#ifndef __MALI_GP_H__ -+#define __MALI_GP_H__ ++void __kbase_tlstream_tl_event_atom_softstop_ex( ++ struct kbase_tlstream *stream, ++ const void *atom ++); + -+#include "mali_osk.h" -+#include "mali_gp_job.h" -+#include "mali_hw_core.h" -+#include "regs/mali_gp_regs.h" ++void __kbase_tlstream_tl_event_atom_softstop_issue( ++ struct kbase_tlstream *stream, ++ const void *atom ++); + -+struct mali_group; ++void __kbase_tlstream_tl_event_atom_softjob_start( ++ struct kbase_tlstream *stream, ++ const void *atom ++); + -+/** -+ * Definition of the GP core struct -+ * Used to track a GP core in the system. -+ */ -+struct mali_gp_core { -+ struct mali_hw_core hw_core; /**< Common for all HW cores */ -+ _mali_osk_irq_t *irq; /**< IRQ handler */ -+}; ++void __kbase_tlstream_tl_event_atom_softjob_end( ++ struct kbase_tlstream *stream, ++ const void *atom ++); + -+_mali_osk_errcode_t mali_gp_initialize(void); -+void mali_gp_terminate(void); ++void __kbase_tlstream_tl_arbiter_granted( ++ struct kbase_tlstream *stream, ++ const void *gpu ++); + -+struct mali_gp_core *mali_gp_create(const _mali_osk_resource_t *resource, struct mali_group *group); -+void mali_gp_delete(struct mali_gp_core *core); ++void __kbase_tlstream_tl_arbiter_started( ++ struct kbase_tlstream *stream, ++ const void *gpu ++); + -+void mali_gp_stop_bus(struct mali_gp_core *core); -+_mali_osk_errcode_t mali_gp_stop_bus_wait(struct mali_gp_core *core); -+void mali_gp_reset_async(struct mali_gp_core *core); -+_mali_osk_errcode_t mali_gp_reset_wait(struct mali_gp_core *core); -+void mali_gp_hard_reset(struct mali_gp_core *core); -+_mali_osk_errcode_t mali_gp_reset(struct mali_gp_core *core); ++void __kbase_tlstream_tl_arbiter_stop_requested( ++ struct kbase_tlstream *stream, ++ const void *gpu ++); + -+void mali_gp_job_start(struct mali_gp_core *core, struct mali_gp_job *job); -+void mali_gp_resume_with_new_heap(struct mali_gp_core *core, u32 start_addr, u32 end_addr); ++void __kbase_tlstream_tl_arbiter_stopped( ++ struct kbase_tlstream *stream, ++ const void *gpu ++); + -+u32 mali_gp_core_get_version(struct mali_gp_core *core); ++void __kbase_tlstream_tl_arbiter_requested( ++ struct kbase_tlstream *stream, ++ const void *gpu ++); + -+struct mali_gp_core *mali_gp_get_global_gp_core(void); ++void __kbase_tlstream_jd_gpu_soft_reset( ++ struct kbase_tlstream *stream, ++ const void *gpu ++); + -+#if MALI_STATE_TRACKING -+u32 mali_gp_dump_state(struct mali_gp_core *core, char *buf, u32 size); -+#endif ++void __kbase_tlstream_jd_tiler_heap_chunk_alloc( ++ struct kbase_tlstream *stream, ++ u32 ctx_nr, ++ u64 heap_id, ++ u64 chunk_va ++); + -+void mali_gp_update_performance_counters(struct mali_gp_core *core, struct mali_gp_job *job); ++void __kbase_tlstream_tl_js_sched_start( ++ struct kbase_tlstream *stream, ++ u32 dummy ++); + -+MALI_STATIC_INLINE const char *mali_gp_core_description(struct mali_gp_core *core) -+{ -+ return core->hw_core.description; -+} ++void __kbase_tlstream_tl_js_sched_end( ++ struct kbase_tlstream *stream, ++ u32 dummy ++); + -+MALI_STATIC_INLINE enum mali_interrupt_result mali_gp_get_interrupt_result(struct mali_gp_core *core) -+{ -+ u32 stat_used = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_STAT) & -+ MALIGP2_REG_VAL_IRQ_MASK_USED; ++void __kbase_tlstream_tl_jd_submit_atom_start( ++ struct kbase_tlstream *stream, ++ const void *atom ++); + -+ if (0 == stat_used) { -+ return MALI_INTERRUPT_RESULT_NONE; -+ } else if ((MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST | -+ MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST) == stat_used) { -+ return MALI_INTERRUPT_RESULT_SUCCESS; -+ } else if (MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST == stat_used) { -+ return MALI_INTERRUPT_RESULT_SUCCESS_VS; -+ } else if (MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST == stat_used) { -+ return MALI_INTERRUPT_RESULT_SUCCESS_PLBU; -+ } else if (MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM & stat_used) { -+ return MALI_INTERRUPT_RESULT_OOM; -+ } ++void __kbase_tlstream_tl_jd_submit_atom_end( ++ struct kbase_tlstream *stream, ++ const void *atom ++); + -+ return MALI_INTERRUPT_RESULT_ERROR; -+} ++void __kbase_tlstream_tl_jd_done_no_lock_start( ++ struct kbase_tlstream *stream, ++ const void *atom ++); + -+MALI_STATIC_INLINE u32 mali_gp_get_rawstat(struct mali_gp_core *core) -+{ -+ MALI_DEBUG_ASSERT_POINTER(core); -+ return mali_hw_core_register_read(&core->hw_core, -+ MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT); -+} ++void __kbase_tlstream_tl_jd_done_no_lock_end( ++ struct kbase_tlstream *stream, ++ const void *atom ++); + -+MALI_STATIC_INLINE u32 mali_gp_is_active(struct mali_gp_core *core) -+{ -+ u32 status = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_STATUS); -+ return (status & MALIGP2_REG_VAL_STATUS_MASK_ACTIVE) ? MALI_TRUE : MALI_FALSE; -+} ++void __kbase_tlstream_tl_jd_done_start( ++ struct kbase_tlstream *stream, ++ const void *atom ++); + -+MALI_STATIC_INLINE void mali_gp_mask_all_interrupts(struct mali_gp_core *core) -+{ -+ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_NONE); -+} ++void __kbase_tlstream_tl_jd_done_end( ++ struct kbase_tlstream *stream, ++ const void *atom ++); + -+MALI_STATIC_INLINE void mali_gp_enable_interrupts(struct mali_gp_core *core, enum mali_interrupt_result exceptions) -+{ -+ /* Enable all interrupts, except those specified in exceptions */ -+ u32 value; ++void __kbase_tlstream_tl_jd_atom_complete( ++ struct kbase_tlstream *stream, ++ const void *atom ++); + -+ if (MALI_INTERRUPT_RESULT_SUCCESS_VS == exceptions) { -+ /* Enable all used except VS complete */ -+ value = MALIGP2_REG_VAL_IRQ_MASK_USED & -+ ~MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST; -+ } else { -+ MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_SUCCESS_PLBU == -+ exceptions); -+ /* Enable all used except PLBU complete */ -+ value = MALIGP2_REG_VAL_IRQ_MASK_USED & -+ ~MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST; -+ } ++void __kbase_tlstream_tl_run_atom_start( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ u32 atom_nr ++); + -+ mali_hw_core_register_write(&core->hw_core, -+ MALIGP2_REG_ADDR_MGMT_INT_MASK, -+ value); -+} ++void __kbase_tlstream_tl_run_atom_end( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ u32 atom_nr ++); + -+MALI_STATIC_INLINE u32 mali_gp_read_plbu_alloc_start_addr(struct mali_gp_core *core) -+{ -+ return mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR); -+} ++void __kbase_tlstream_tl_attrib_atom_priority( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ u32 prio ++); + -+#endif /* __MALI_GP_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_gp_job.c b/drivers/gpu/arm/mali400/mali/common/mali_gp_job.c -new file mode 100644 -index 000000000..5d4d9f253 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_gp_job.c -@@ -0,0 +1,306 @@ -+/* -+ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++void __kbase_tlstream_tl_attrib_atom_state( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ u32 state ++); + -+#include "mali_gp_job.h" -+#include "mali_osk.h" -+#include "mali_osk_list.h" -+#include "mali_uk_types.h" -+#include "mali_memory_virtual.h" -+#include "mali_memory_defer_bind.h" ++void __kbase_tlstream_tl_attrib_atom_prioritized( ++ struct kbase_tlstream *stream, ++ const void *atom ++); + -+static u32 gp_counter_src0 = MALI_HW_CORE_NO_COUNTER; /**< Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */ -+static u32 gp_counter_src1 = MALI_HW_CORE_NO_COUNTER; /**< Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */ -+static void _mali_gp_del_varying_allocations(struct mali_gp_job *job); ++void __kbase_tlstream_tl_attrib_atom_jit( ++ struct kbase_tlstream *stream, ++ const void *atom, ++ u64 edit_addr, ++ u64 new_addr, ++ u32 jit_flags, ++ u64 mem_flags, ++ u32 j_id, ++ u64 com_pgs, ++ u64 extent, ++ u64 va_pgs ++); + ++void __kbase_tlstream_tl_kbase_new_device( ++ struct kbase_tlstream *stream, ++ u32 kbase_device_id, ++ u32 kbase_device_gpu_core_count, ++ u32 kbase_device_max_num_csgs, ++ u32 kbase_device_as_count, ++ u32 kbase_device_sb_entry_count, ++ u32 kbase_device_has_cross_stream_sync, ++ u32 kbase_device_supports_gpu_sleep ++); + -+static int _mali_gp_add_varying_allocations(struct mali_session_data *session, -+ struct mali_gp_job *job, -+ u32 *alloc, -+ u32 num) -+{ -+ int i = 0; -+ struct mali_gp_allocation_node *alloc_node; -+ mali_mem_allocation *mali_alloc = NULL; -+ struct mali_vma_node *mali_vma_node = NULL; ++void __kbase_tlstream_tl_kbase_gpucmdqueue_kick( ++ struct kbase_tlstream *stream, ++ u32 kernel_ctx_id, ++ u64 buffer_gpu_addr ++); + -+ for (i = 0 ; i < num ; i++) { -+ MALI_DEBUG_ASSERT(alloc[i]); -+ alloc_node = _mali_osk_calloc(1, sizeof(struct mali_gp_allocation_node)); -+ if (alloc_node) { -+ INIT_LIST_HEAD(&alloc_node->node); -+ /* find mali allocation structure by vaddress*/ -+ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, alloc[i], 0); ++void __kbase_tlstream_tl_kbase_device_program_csg( ++ struct kbase_tlstream *stream, ++ u32 kbase_device_id, ++ u32 kernel_ctx_id, ++ u32 gpu_cmdq_grp_handle, ++ u32 kbase_device_csg_slot_index, ++ u32 kbase_device_csg_slot_resuming ++); + -+ if (likely(mali_vma_node)) { -+ mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node); -+ MALI_DEBUG_ASSERT(alloc[i] == mali_vma_node->vm_node.start); -+ } else { -+ MALI_DEBUG_PRINT(1, ("ERROE!_mali_gp_add_varying_allocations,can't find allocation %d by address =0x%x, num=%d\n", i, alloc[i], num)); -+ _mali_osk_free(alloc_node); -+ goto fail; -+ } -+ alloc_node->alloc = mali_alloc; -+ /* add to gp job varying alloc list*/ -+ list_move(&alloc_node->node, &job->varying_alloc); -+ } else -+ goto fail; -+ } ++void __kbase_tlstream_tl_kbase_device_deprogram_csg( ++ struct kbase_tlstream *stream, ++ u32 kbase_device_id, ++ u32 kbase_device_csg_slot_index ++); + -+ return 0; -+fail: -+ MALI_DEBUG_PRINT(1, ("ERROE!_mali_gp_add_varying_allocations,failed to alloc memory!\n")); -+ _mali_gp_del_varying_allocations(job); -+ return -1; -+} ++void __kbase_tlstream_tl_kbase_device_halting_csg( ++ struct kbase_tlstream *stream, ++ u32 kbase_device_id, ++ u32 kbase_device_csg_slot_index, ++ u32 kbase_device_csg_slot_suspending ++); + ++void __kbase_tlstream_tl_kbase_device_suspend_csg( ++ struct kbase_tlstream *stream, ++ u32 kbase_device_id, ++ u32 kbase_device_csg_slot_index ++); + -+static void _mali_gp_del_varying_allocations(struct mali_gp_job *job) -+{ -+ struct mali_gp_allocation_node *alloc_node, *tmp_node; ++void __kbase_tlstream_tl_kbase_device_csg_idle( ++ struct kbase_tlstream *stream, ++ u32 kbase_device_id, ++ u32 kbase_device_csg_slot_index ++); + -+ list_for_each_entry_safe(alloc_node, tmp_node, &job->varying_alloc, node) { -+ list_del(&alloc_node->node); -+ kfree(alloc_node); -+ } -+ INIT_LIST_HEAD(&job->varying_alloc); -+} ++void __kbase_tlstream_tl_kbase_new_ctx( ++ struct kbase_tlstream *stream, ++ u32 kernel_ctx_id, ++ u32 kbase_device_id ++); + -+struct mali_gp_job *mali_gp_job_create(struct mali_session_data *session, _mali_uk_gp_start_job_s *uargs, u32 id, struct mali_timeline_tracker *pp_tracker) -+{ -+ struct mali_gp_job *job; -+ u32 perf_counter_flag; -+ u32 __user *memory_list = NULL; -+ struct mali_gp_allocation_node *alloc_node, *tmp_node; -+ _mali_uk_gp_start_job_s copy_of_uargs; ++void __kbase_tlstream_tl_kbase_del_ctx( ++ struct kbase_tlstream *stream, ++ u32 kernel_ctx_id ++); + -+ job = _mali_osk_calloc(1, sizeof(struct mali_gp_job)); -+ if (NULL != job) { -+ job->finished_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_GP_FINISHED, sizeof(_mali_uk_gp_job_finished_s)); -+ if (NULL == job->finished_notification) { -+ goto fail3; -+ } ++void __kbase_tlstream_tl_kbase_ctx_assign_as( ++ struct kbase_tlstream *stream, ++ u32 kernel_ctx_id, ++ u32 kbase_device_as_index ++); + -+ job->oom_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_GP_STALLED, sizeof(_mali_uk_gp_job_suspended_s)); -+ if (NULL == job->oom_notification) { -+ goto fail2; -+ } ++void __kbase_tlstream_tl_kbase_ctx_unassign_as( ++ struct kbase_tlstream *stream, ++ u32 kernel_ctx_id ++); + -+ if (0 != _mali_osk_copy_from_user(&job->uargs, uargs, sizeof(_mali_uk_gp_start_job_s))) { -+ goto fail1; -+ } ++void __kbase_tlstream_tl_kbase_new_kcpuqueue( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 kcpu_queue_id, ++ u32 kernel_ctx_id, ++ u32 kcpuq_num_pending_cmds ++); + -+ perf_counter_flag = mali_gp_job_get_perf_counter_flag(job); ++void __kbase_tlstream_tl_kbase_del_kcpuqueue( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+ /* case when no counters came from user space -+ * so pass the debugfs / DS-5 provided global ones to the job object */ -+ if (!((perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE) || -+ (perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE))) { -+ mali_gp_job_set_perf_counter_src0(job, mali_gp_job_get_gp_counter_src0()); -+ mali_gp_job_set_perf_counter_src1(job, mali_gp_job_get_gp_counter_src1()); -+ } ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_signal( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ const void *fence ++); + -+ _mali_osk_list_init(&job->list); -+ job->session = session; -+ job->id = id; -+ job->heap_current_addr = job->uargs.frame_registers[4]; -+ job->perf_counter_value0 = 0; -+ job->perf_counter_value1 = 0; -+ job->pid = _mali_osk_get_pid(); -+ job->tid = _mali_osk_get_tid(); ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ const void *fence ++); + ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u64 cqs_obj_gpu_addr, ++ u32 compare_value, ++ u32 inherit_error ++); + -+ INIT_LIST_HEAD(&job->varying_alloc); -+ INIT_LIST_HEAD(&job->vary_todo); -+ job->dmem = NULL; ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u64 cqs_obj_gpu_addr ++); + -+ if (job->uargs.deferred_mem_num > session->allocation_mgr.mali_allocation_num) { -+ MALI_PRINT_ERROR(("Mali GP job: The number of varying buffer to defer bind is invalid !\n")); -+ goto fail1; -+ } ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u64 cqs_obj_gpu_addr, ++ u64 compare_value, ++ u32 condition, ++ u32 data_type, ++ u32 inherit_error ++); + -+ /* add varying allocation list*/ -+ if (job->uargs.deferred_mem_num > 0) { -+ /* copy varying list from user space*/ -+ job->varying_list = _mali_osk_calloc(1, sizeof(u32) * job->uargs.deferred_mem_num); -+ if (!job->varying_list) { -+ MALI_PRINT_ERROR(("Mali GP job: allocate varying_list failed varying_alloc_num = %d !\n", job->uargs.deferred_mem_num)); -+ goto fail1; -+ } ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u64 cqs_obj_gpu_addr, ++ u64 value, ++ u32 operation, ++ u32 data_type ++); + -+ if (0 != _mali_osk_copy_from_user(©_of_uargs, uargs, sizeof(_mali_uk_gp_start_job_s))) { -+ goto fail1; -+ } -+ memory_list = (u32 __user *)(uintptr_t)copy_of_uargs.deferred_mem_list; ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u64 map_import_buf_gpu_addr ++); + -+ if (0 != _mali_osk_copy_from_user(job->varying_list, memory_list, sizeof(u32) * job->uargs.deferred_mem_num)) { -+ MALI_PRINT_ERROR(("Mali GP job: Failed to copy varying list from user space!\n")); -+ goto fail; -+ } ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u64 map_import_buf_gpu_addr ++); + -+ if (unlikely(_mali_gp_add_varying_allocations(session, job, job->varying_list, -+ job->uargs.deferred_mem_num))) { -+ MALI_PRINT_ERROR(("Mali GP job: _mali_gp_add_varying_allocations failed!\n")); -+ goto fail; -+ } ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u64 map_import_buf_gpu_addr ++); + -+ /* do preparetion for each allocation */ -+ list_for_each_entry_safe(alloc_node, tmp_node, &job->varying_alloc, node) { -+ if (unlikely(_MALI_OSK_ERR_OK != mali_mem_defer_bind_allocation_prepare(alloc_node->alloc, &job->vary_todo, &job->required_varying_memsize))) { -+ MALI_PRINT_ERROR(("Mali GP job: mali_mem_defer_bind_allocation_prepare failed!\n")); -+ goto fail; -+ } -+ } ++void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+ _mali_gp_del_varying_allocations(job); ++void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_alloc( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u64 jit_alloc_gpu_alloc_addr_dest, ++ u64 jit_alloc_va_pages, ++ u64 jit_alloc_commit_pages, ++ u64 jit_alloc_extent, ++ u32 jit_alloc_jit_id, ++ u32 jit_alloc_bin_id, ++ u32 jit_alloc_max_allocations, ++ u32 jit_alloc_flags, ++ u32 jit_alloc_usage_id ++); + -+ /* bind varying here, to avoid memory latency issue. */ -+ { -+ struct mali_defer_mem_block dmem_block; ++void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_alloc( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+ INIT_LIST_HEAD(&dmem_block.free_pages); -+ atomic_set(&dmem_block.num_free_pages, 0); ++void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_free( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+ if (mali_mem_prepare_mem_for_job(job, &dmem_block)) { -+ MALI_PRINT_ERROR(("Mali GP job: mali_mem_prepare_mem_for_job failed!\n")); -+ goto fail; -+ } -+ if (_MALI_OSK_ERR_OK != mali_mem_defer_bind(job, &dmem_block)) { -+ MALI_PRINT_ERROR(("gp job create, mali_mem_defer_bind failed! GP %x fail!", job)); -+ goto fail; -+ } -+ } ++void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_free( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 jit_alloc_jit_id ++); + -+ if (job->uargs.varying_memsize > MALI_UK_BIG_VARYING_SIZE) { -+ job->big_job = 1; -+ } -+ } -+ job->pp_tracker = pp_tracker; -+ if (NULL != job->pp_tracker) { -+ /* Take a reference on PP job's tracker that will be released when the GP -+ job is done. */ -+ mali_timeline_system_tracker_get(session->timeline_system, pp_tracker); -+ } ++void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+ mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_GP, NULL, job); -+ mali_timeline_fence_copy_uk_fence(&(job->tracker.fence), &(job->uargs.fence)); ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+ return job; -+ } else { -+ MALI_PRINT_ERROR(("Mali GP job: _mali_osk_calloc failed!\n")); -+ return NULL; -+ } ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ const void *group_suspend_buf, ++ u32 gpu_cmdq_grp_handle ++); + ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+fail: -+ _mali_osk_free(job->varying_list); -+ /* Handle allocate fail here, free all varying node */ -+ { -+ struct mali_backend_bind_list *bkn, *bkn_tmp; -+ list_for_each_entry_safe(bkn, bkn_tmp , &job->vary_todo, node) { -+ list_del(&bkn->node); -+ _mali_osk_free(bkn); -+ } -+ } -+fail1: -+ _mali_osk_notification_delete(job->oom_notification); -+fail2: -+ _mali_osk_notification_delete(job->finished_notification); -+fail3: -+ _mali_osk_free(job); -+ return NULL; -+} ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++); + -+void mali_gp_job_delete(struct mali_gp_job *job) -+{ -+ struct mali_backend_bind_list *bkn, *bkn_tmp; -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT(NULL == job->pp_tracker); -+ MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->list)); -+ _mali_osk_free(job->varying_list); ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+ /* Handle allocate fail here, free all varying node */ -+ list_for_each_entry_safe(bkn, bkn_tmp , &job->vary_todo, node) { -+ list_del(&bkn->node); -+ _mali_osk_free(bkn); -+ } ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++); + -+ mali_mem_defer_dmem_free(job); ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+ /* de-allocate the pre-allocated oom notifications */ -+ if (NULL != job->oom_notification) { -+ _mali_osk_notification_delete(job->oom_notification); -+ job->oom_notification = NULL; -+ } -+ if (NULL != job->finished_notification) { -+ _mali_osk_notification_delete(job->finished_notification); -+ job->finished_notification = NULL; -+ } ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++); + -+ _mali_osk_free(job); -+} ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++); + -+void mali_gp_job_list_add(struct mali_gp_job *job, _mali_osk_list_t *list) -+{ -+ struct mali_gp_job *iter; -+ struct mali_gp_job *tmp; ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++); + -+ /* Find position in list/queue where job should be added. */ -+ _MALI_OSK_LIST_FOREACHENTRY_REVERSE(iter, tmp, list, -+ struct mali_gp_job, list) { ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++); + -+ /* A span is used to handle job ID wrapping. */ -+ bool job_is_after = (mali_gp_job_get_id(job) - -+ mali_gp_job_get_id(iter)) < -+ MALI_SCHEDULER_JOB_ID_SPAN; ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+ if (job_is_after) { -+ break; -+ } -+ } ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++); + -+ _mali_osk_list_add(&job->list, &iter->list); -+} ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+u32 mali_gp_job_get_gp_counter_src0(void) -+{ -+ return gp_counter_src0; -+} ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++); + -+void mali_gp_job_set_gp_counter_src0(u32 counter) -+{ -+ gp_counter_src0 = counter; -+} ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+u32 mali_gp_job_get_gp_counter_src1(void) -+{ -+ return gp_counter_src1; -+} ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++); + -+void mali_gp_job_set_gp_counter_src1(u32 counter) -+{ -+ gp_counter_src1 = counter; -+} ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+mali_scheduler_mask mali_gp_job_signal_pp_tracker(struct mali_gp_job *job, mali_bool success) -+{ -+ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; ++void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+ MALI_DEBUG_ASSERT_POINTER(job); ++void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error, ++ u64 jit_alloc_gpu_alloc_addr, ++ u64 jit_alloc_mmu_flags ++); + -+ if (NULL != job->pp_tracker) { -+ schedule_mask |= mali_timeline_system_tracker_put(job->session->timeline_system, job->pp_tracker, MALI_FALSE == success); -+ job->pp_tracker = NULL; -+ } ++void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+ return schedule_mask; -+} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_gp_job.h b/drivers/gpu/arm/mali400/mali/common/mali_gp_job.h -new file mode 100644 -index 000000000..b84333f9f ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_gp_job.h -@@ -0,0 +1,324 @@ -+/* -+ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_free_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+#ifndef __MALI_GP_JOB_H__ -+#define __MALI_GP_JOB_H__ ++void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+#include "mali_osk.h" -+#include "mali_osk_list.h" -+#include "mali_uk_types.h" -+#include "mali_session.h" -+#include "mali_timeline.h" -+#include "mali_scheduler_types.h" -+#include "mali_scheduler.h" -+#include "mali_executor.h" -+#include "mali_timeline.h" ++void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error, ++ u64 jit_free_pages_used ++); + -+struct mali_defer_mem; -+/** -+ * This structure represents a GP job -+ * -+ * The GP job object itself is not protected by any single lock, -+ * but relies on other locks instead (scheduler, executor and timeline lock). -+ * Think of the job object as moving between these sub systems through-out -+ * its lifetime. Different part of the GP job struct is used by different -+ * subsystems. Accessor functions ensure that correct lock is taken. -+ * Do NOT access any data members directly from outside this module! -+ */ -+struct mali_gp_job { -+ /* -+ * These members are typically only set at creation, -+ * and only read later on. -+ * They do not require any lock protection. -+ */ -+ _mali_uk_gp_start_job_s uargs; /**< Arguments from user space */ -+ struct mali_session_data *session; /**< Session which submitted this job */ -+ u32 pid; /**< Process ID of submitting process */ -+ u32 tid; /**< Thread ID of submitting thread */ -+ u32 id; /**< Identifier for this job in kernel space (sequential numbering) */ -+ u32 cache_order; /**< Cache order used for L2 cache flushing (sequential numbering) */ -+ struct mali_timeline_tracker tracker; /**< Timeline tracker for this job */ -+ struct mali_timeline_tracker *pp_tracker; /**< Pointer to Timeline tracker for PP job that depends on this job. */ -+ _mali_osk_notification_t *finished_notification; /**< Notification sent back to userspace on job complete */ ++void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+ /* -+ * These members are used by the scheduler, -+ * protected by scheduler lock -+ */ -+ _mali_osk_list_t list; /**< Used to link jobs together in the scheduler queue */ ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+ /* -+ * These members are used by the executor and/or group, -+ * protected by executor lock -+ */ -+ _mali_osk_notification_t *oom_notification; /**< Notification sent back to userspace on OOM */ ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue ++); + -+ /* -+ * Set by executor/group on job completion, read by scheduler when -+ * returning job to user. Hold executor lock when setting, -+ * no lock needed when reading -+ */ -+ u32 heap_current_addr; /**< Holds the current HEAP address when the job has completed */ -+ u32 perf_counter_value0; /**< Value of performance counter 0 (to be returned to user space) */ -+ u32 perf_counter_value1; /**< Value of performance counter 1 (to be returned to user space) */ -+ struct mali_defer_mem *dmem; /** < used for defer bind to store dmem info */ -+ struct list_head varying_alloc; /**< hold the list of varying allocations */ -+ u32 bind_flag; /** < flag for deferbind*/ -+ u32 *varying_list; /**< varying memory list need to to defer bind*/ -+ struct list_head vary_todo; /**< list of backend list need to do defer bind*/ -+ u32 required_varying_memsize; /** < size of varying memory to reallocate*/ -+ u32 big_job; /** < if the gp job have large varying output and may take long time*/ -+}; ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error ++); + -+#define MALI_DEFER_BIND_MEMORY_PREPARED (0x1 << 0) -+#define MALI_DEFER_BIND_MEMORY_BINDED (0x1 << 2) ++void __kbase_tlstream_tl_kbase_csffw_fw_reloading( ++ struct kbase_tlstream *stream, ++ u64 csffw_cycle ++); + -+struct mali_gp_allocation_node { -+ struct list_head node; -+ mali_mem_allocation *alloc; -+}; ++void __kbase_tlstream_tl_kbase_csffw_fw_enabling( ++ struct kbase_tlstream *stream, ++ u64 csffw_cycle ++); + -+struct mali_gp_job *mali_gp_job_create(struct mali_session_data *session, _mali_uk_gp_start_job_s *uargs, u32 id, struct mali_timeline_tracker *pp_tracker); -+void mali_gp_job_delete(struct mali_gp_job *job); ++void __kbase_tlstream_tl_kbase_csffw_fw_request_sleep( ++ struct kbase_tlstream *stream, ++ u64 csffw_cycle ++); + -+u32 mali_gp_job_get_gp_counter_src0(void); -+void mali_gp_job_set_gp_counter_src0(u32 counter); -+u32 mali_gp_job_get_gp_counter_src1(void); -+void mali_gp_job_set_gp_counter_src1(u32 counter); ++void __kbase_tlstream_tl_kbase_csffw_fw_request_wakeup( ++ struct kbase_tlstream *stream, ++ u64 csffw_cycle ++); + -+MALI_STATIC_INLINE u32 mali_gp_job_get_id(struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return (NULL == job) ? 0 : job->id; -+} ++void __kbase_tlstream_tl_kbase_csffw_fw_request_halt( ++ struct kbase_tlstream *stream, ++ u64 csffw_cycle ++); + -+MALI_STATIC_INLINE void mali_gp_job_set_cache_order(struct mali_gp_job *job, -+ u32 cache_order) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); -+ job->cache_order = cache_order; -+} ++void __kbase_tlstream_tl_kbase_csffw_fw_disabling( ++ struct kbase_tlstream *stream, ++ u64 csffw_cycle ++); + -+MALI_STATIC_INLINE u32 mali_gp_job_get_cache_order(struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return (NULL == job) ? 0 : job->cache_order; -+} ++void __kbase_tlstream_tl_kbase_csffw_fw_off( ++ struct kbase_tlstream *stream, ++ u64 csffw_cycle ++); + -+MALI_STATIC_INLINE u64 mali_gp_job_get_user_id(struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.user_job_ptr; -+} ++void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( ++ struct kbase_tlstream *stream, ++ u64 csffw_timestamp, ++ u64 csffw_cycle ++); + -+MALI_STATIC_INLINE u32 mali_gp_job_get_frame_builder_id(struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.frame_builder_id; -+} ++void __kbase_tlstream_aux_pm_state( ++ struct kbase_tlstream *stream, ++ u32 core_type, ++ u64 core_state_bitset ++); + -+MALI_STATIC_INLINE u32 mali_gp_job_get_flush_id(struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.flush_id; -+} ++void __kbase_tlstream_aux_pagefault( ++ struct kbase_tlstream *stream, ++ u32 ctx_nr, ++ u32 as_nr, ++ u64 page_cnt_change ++); + -+MALI_STATIC_INLINE u32 mali_gp_job_get_pid(struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->pid; -+} ++void __kbase_tlstream_aux_pagesalloc( ++ struct kbase_tlstream *stream, ++ u32 ctx_nr, ++ u64 page_cnt ++); + -+MALI_STATIC_INLINE u32 mali_gp_job_get_tid(struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->tid; -+} ++void __kbase_tlstream_aux_devfreq_target( ++ struct kbase_tlstream *stream, ++ u64 target_freq ++); + -+MALI_STATIC_INLINE u32 *mali_gp_job_get_frame_registers(struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.frame_registers; -+} ++void __kbase_tlstream_aux_jit_stats( ++ struct kbase_tlstream *stream, ++ u32 ctx_nr, ++ u32 bid, ++ u32 max_allocs, ++ u32 allocs, ++ u32 va_pages, ++ u32 ph_pages ++); + -+MALI_STATIC_INLINE struct mali_session_data *mali_gp_job_get_session(struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->session; -+} ++void __kbase_tlstream_aux_tiler_heap_stats( ++ struct kbase_tlstream *stream, ++ u32 ctx_nr, ++ u64 heap_id, ++ u32 va_pages, ++ u32 ph_pages, ++ u32 max_chunks, ++ u32 chunk_size, ++ u32 chunk_count, ++ u32 target_in_flight, ++ u32 nr_in_flight ++); + -+MALI_STATIC_INLINE mali_bool mali_gp_job_has_vs_job(struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return (job->uargs.frame_registers[0] != job->uargs.frame_registers[1]) ? MALI_TRUE : MALI_FALSE; -+} ++void __kbase_tlstream_aux_event_job_slot( ++ struct kbase_tlstream *stream, ++ const void *ctx, ++ u32 slot_nr, ++ u32 atom_nr, ++ u32 event ++); + -+MALI_STATIC_INLINE mali_bool mali_gp_job_has_plbu_job(struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return (job->uargs.frame_registers[2] != job->uargs.frame_registers[3]) ? MALI_TRUE : MALI_FALSE; -+} ++void __kbase_tlstream_aux_protected_enter_start( ++ struct kbase_tlstream *stream, ++ const void *gpu ++); + -+MALI_STATIC_INLINE u32 mali_gp_job_get_current_heap_addr(struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->heap_current_addr; -+} ++void __kbase_tlstream_aux_protected_enter_end( ++ struct kbase_tlstream *stream, ++ const void *gpu ++); + -+MALI_STATIC_INLINE void mali_gp_job_set_current_heap_addr(struct mali_gp_job *job, u32 heap_addr) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ job->heap_current_addr = heap_addr; -+} ++void __kbase_tlstream_aux_mmu_command( ++ struct kbase_tlstream *stream, ++ u32 kernel_ctx_id, ++ u32 mmu_cmd_id, ++ u32 mmu_synchronicity, ++ u64 mmu_lock_addr, ++ u32 mmu_lock_page_num ++); + -+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_flag(struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.perf_counter_flag; -+} ++void __kbase_tlstream_aux_protected_leave_start( ++ struct kbase_tlstream *stream, ++ const void *gpu ++); + -+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_src0(struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.perf_counter_src0; -+} ++void __kbase_tlstream_aux_protected_leave_end( ++ struct kbase_tlstream *stream, ++ const void *gpu ++); + -+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_src1(struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.perf_counter_src1; -+} ++struct kbase_tlstream; + -+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_value0(struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->perf_counter_value0; -+} ++/** ++ * KBASE_TLSTREAM_TL_NEW_CTX - object ctx is created ++ * ++ * @kbdev: Kbase device ++ * @ctx: Name of the context object ++ * @ctx_nr: Kernel context number ++ * @tgid: Thread Group Id ++ */ ++#define KBASE_TLSTREAM_TL_NEW_CTX( \ ++ kbdev, \ ++ ctx, \ ++ ctx_nr, \ ++ tgid \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_new_ctx( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ ctx, \ ++ ctx_nr, \ ++ tgid \ ++ ); \ ++ } while (0) + -+MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_value1(struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->perf_counter_value1; -+} ++/** ++ * KBASE_TLSTREAM_TL_NEW_GPU - object gpu is created ++ * ++ * @kbdev: Kbase device ++ * @gpu: Name of the GPU object ++ * @gpu_id: Name of the GPU object ++ * @core_count: Number of cores this GPU hosts ++ */ ++#define KBASE_TLSTREAM_TL_NEW_GPU( \ ++ kbdev, \ ++ gpu, \ ++ gpu_id, \ ++ core_count \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_new_gpu( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ gpu, \ ++ gpu_id, \ ++ core_count \ ++ ); \ ++ } while (0) + -+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_src0(struct mali_gp_job *job, u32 src) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ job->uargs.perf_counter_src0 = src; -+} ++/** ++ * KBASE_TLSTREAM_TL_NEW_LPU - object lpu is created ++ * ++ * @kbdev: Kbase device ++ * @lpu: Name of the Logical Processing Unit object ++ * @lpu_nr: Sequential number assigned to the newly created LPU ++ * @lpu_fn: Property describing functional abilities of this LPU ++ */ ++#define KBASE_TLSTREAM_TL_NEW_LPU( \ ++ kbdev, \ ++ lpu, \ ++ lpu_nr, \ ++ lpu_fn \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_new_lpu( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ lpu, \ ++ lpu_nr, \ ++ lpu_fn \ ++ ); \ ++ } while (0) + -+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_src1(struct mali_gp_job *job, u32 src) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ job->uargs.perf_counter_src1 = src; -+} ++/** ++ * KBASE_TLSTREAM_TL_NEW_ATOM - object atom is created ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ * @atom_nr: Sequential number of an atom ++ */ ++#define KBASE_TLSTREAM_TL_NEW_ATOM( \ ++ kbdev, \ ++ atom, \ ++ atom_nr \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_new_atom( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom, \ ++ atom_nr \ ++ ); \ ++ } while (0) + -+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_value0(struct mali_gp_job *job, u32 value) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ job->perf_counter_value0 = value; -+} ++/** ++ * KBASE_TLSTREAM_TL_NEW_AS - address space object is created ++ * ++ * @kbdev: Kbase device ++ * @address_space: Name of the address space object ++ * @as_nr: Address space number ++ */ ++#define KBASE_TLSTREAM_TL_NEW_AS( \ ++ kbdev, \ ++ address_space, \ ++ as_nr \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_new_as( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ address_space, \ ++ as_nr \ ++ ); \ ++ } while (0) + -+MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_value1(struct mali_gp_job *job, u32 value) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ job->perf_counter_value1 = value; -+} ++/** ++ * KBASE_TLSTREAM_TL_DEL_CTX - context is destroyed ++ * ++ * @kbdev: Kbase device ++ * @ctx: Name of the context object ++ */ ++#define KBASE_TLSTREAM_TL_DEL_CTX( \ ++ kbdev, \ ++ ctx \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_del_ctx( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ ctx \ ++ ); \ ++ } while (0) + -+void mali_gp_job_list_add(struct mali_gp_job *job, _mali_osk_list_t *list); ++/** ++ * KBASE_TLSTREAM_TL_DEL_ATOM - atom is destroyed ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ */ ++#define KBASE_TLSTREAM_TL_DEL_ATOM( \ ++ kbdev, \ ++ atom \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_del_atom( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom \ ++ ); \ ++ } while (0) + -+MALI_STATIC_INLINE void mali_gp_job_list_move(struct mali_gp_job *job, -+ _mali_osk_list_t *list) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); -+ MALI_DEBUG_ASSERT(!_mali_osk_list_empty(&job->list)); -+ _mali_osk_list_move(&job->list, list); -+} ++/** ++ * KBASE_TLSTREAM_TL_LIFELINK_LPU_GPU - lpu is deleted with gpu ++ * ++ * @kbdev: Kbase device ++ * @lpu: Name of the Logical Processing Unit object ++ * @gpu: Name of the GPU object ++ */ ++#define KBASE_TLSTREAM_TL_LIFELINK_LPU_GPU( \ ++ kbdev, \ ++ lpu, \ ++ gpu \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_lifelink_lpu_gpu( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ lpu, \ ++ gpu \ ++ ); \ ++ } while (0) + -+MALI_STATIC_INLINE void mali_gp_job_list_remove(struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); -+ _mali_osk_list_delinit(&job->list); -+} ++/** ++ * KBASE_TLSTREAM_TL_LIFELINK_AS_GPU - address space is deleted with gpu ++ * ++ * @kbdev: Kbase device ++ * @address_space: Name of the address space object ++ * @gpu: Name of the GPU object ++ */ ++#define KBASE_TLSTREAM_TL_LIFELINK_AS_GPU( \ ++ kbdev, \ ++ address_space, \ ++ gpu \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_lifelink_as_gpu( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ address_space, \ ++ gpu \ ++ ); \ ++ } while (0) + -+MALI_STATIC_INLINE _mali_osk_notification_t * -+mali_gp_job_get_finished_notification(struct mali_gp_job *job) -+{ -+ _mali_osk_notification_t *notification; ++/** ++ * KBASE_TLSTREAM_TL_RET_CTX_LPU - context is retained by lpu ++ * ++ * @kbdev: Kbase device ++ * @ctx: Name of the context object ++ * @lpu: Name of the Logical Processing Unit object ++ */ ++#define KBASE_TLSTREAM_TL_RET_CTX_LPU( \ ++ kbdev, \ ++ ctx, \ ++ lpu \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_ret_ctx_lpu( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ ctx, \ ++ lpu \ ++ ); \ ++ } while (0) + -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_POINTER(job->finished_notification); ++/** ++ * KBASE_TLSTREAM_TL_RET_ATOM_CTX - atom is retained by context ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ * @ctx: Name of the context object ++ */ ++#define KBASE_TLSTREAM_TL_RET_ATOM_CTX( \ ++ kbdev, \ ++ atom, \ ++ ctx \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_ret_atom_ctx( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom, \ ++ ctx \ ++ ); \ ++ } while (0) + -+ notification = job->finished_notification; -+ job->finished_notification = NULL; ++/** ++ * KBASE_TLSTREAM_TL_RET_ATOM_LPU - atom is retained by lpu ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ * @lpu: Name of the Logical Processing Unit object ++ * @attrib_match_list: List containing match operator attributes ++ */ ++#define KBASE_TLSTREAM_TL_RET_ATOM_LPU( \ ++ kbdev, \ ++ atom, \ ++ lpu, \ ++ attrib_match_list \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_ret_atom_lpu( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom, \ ++ lpu, \ ++ attrib_match_list \ ++ ); \ ++ } while (0) + -+ return notification; -+} ++/** ++ * KBASE_TLSTREAM_TL_NRET_CTX_LPU - context is released by lpu ++ * ++ * @kbdev: Kbase device ++ * @ctx: Name of the context object ++ * @lpu: Name of the Logical Processing Unit object ++ */ ++#define KBASE_TLSTREAM_TL_NRET_CTX_LPU( \ ++ kbdev, \ ++ ctx, \ ++ lpu \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_nret_ctx_lpu( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ ctx, \ ++ lpu \ ++ ); \ ++ } while (0) + -+MALI_STATIC_INLINE _mali_osk_notification_t *mali_gp_job_get_oom_notification( -+ struct mali_gp_job *job) -+{ -+ _mali_osk_notification_t *notification; ++/** ++ * KBASE_TLSTREAM_TL_NRET_ATOM_CTX - atom is released by context ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ * @ctx: Name of the context object ++ */ ++#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX( \ ++ kbdev, \ ++ atom, \ ++ ctx \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_nret_atom_ctx( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom, \ ++ ctx \ ++ ); \ ++ } while (0) + -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ MALI_DEBUG_ASSERT_POINTER(job->oom_notification); ++/** ++ * KBASE_TLSTREAM_TL_NRET_ATOM_LPU - atom is released by lpu ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ * @lpu: Name of the Logical Processing Unit object ++ */ ++#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU( \ ++ kbdev, \ ++ atom, \ ++ lpu \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_nret_atom_lpu( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom, \ ++ lpu \ ++ ); \ ++ } while (0) + -+ notification = job->oom_notification; -+ job->oom_notification = NULL; ++/** ++ * KBASE_TLSTREAM_TL_RET_AS_CTX - address space is retained by context ++ * ++ * @kbdev: Kbase device ++ * @address_space: Name of the address space object ++ * @ctx: Name of the context object ++ */ ++#define KBASE_TLSTREAM_TL_RET_AS_CTX( \ ++ kbdev, \ ++ address_space, \ ++ ctx \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_ret_as_ctx( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ address_space, \ ++ ctx \ ++ ); \ ++ } while (0) + -+ return notification; -+} ++/** ++ * KBASE_TLSTREAM_TL_NRET_AS_CTX - address space is released by context ++ * ++ * @kbdev: Kbase device ++ * @address_space: Name of the address space object ++ * @ctx: Name of the context object ++ */ ++#define KBASE_TLSTREAM_TL_NRET_AS_CTX( \ ++ kbdev, \ ++ address_space, \ ++ ctx \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_nret_as_ctx( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ address_space, \ ++ ctx \ ++ ); \ ++ } while (0) + -+MALI_STATIC_INLINE void mali_gp_job_set_oom_notification( -+ struct mali_gp_job *job, -+ _mali_osk_notification_t *notification) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ MALI_DEBUG_ASSERT(NULL == job->oom_notification); -+ job->oom_notification = notification; -+} ++/** ++ * KBASE_TLSTREAM_TL_RET_ATOM_AS - atom is retained by address space ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ * @address_space: Name of the address space object ++ */ ++#define KBASE_TLSTREAM_TL_RET_ATOM_AS( \ ++ kbdev, \ ++ atom, \ ++ address_space \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_ret_atom_as( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom, \ ++ address_space \ ++ ); \ ++ } while (0) + -+MALI_STATIC_INLINE struct mali_timeline_tracker *mali_gp_job_get_tracker( -+ struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return &(job->tracker); -+} ++/** ++ * KBASE_TLSTREAM_TL_NRET_ATOM_AS - atom is released by address space ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ * @address_space: Name of the address space object ++ */ ++#define KBASE_TLSTREAM_TL_NRET_ATOM_AS( \ ++ kbdev, \ ++ atom, \ ++ address_space \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_nret_atom_as( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom, \ ++ address_space \ ++ ); \ ++ } while (0) + ++/** ++ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG - atom job slot attributes ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ * @descriptor: Job descriptor address ++ * @affinity: Job affinity ++ * @config: Job config ++ */ ++#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG( \ ++ kbdev, \ ++ atom, \ ++ descriptor, \ ++ affinity, \ ++ config \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_attrib_atom_config( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom, \ ++ descriptor, \ ++ affinity, \ ++ config \ ++ ); \ ++ } while (0) + -+MALI_STATIC_INLINE u32 *mali_gp_job_get_timeline_point_ptr( -+ struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return (u32 __user *)(uintptr_t)job->uargs.timeline_point_ptr; -+} ++/** ++ * KBASE_TLSTREAM_TL_JIT_USEDPAGES - used pages for jit ++ * ++ * @kbdev: Kbase device ++ * @used_pages: Number of pages used for jit ++ * @j_id: Unique ID provided by the caller, this is used to pair allocation and free requests. ++ */ ++#define KBASE_TLSTREAM_TL_JIT_USEDPAGES( \ ++ kbdev, \ ++ used_pages, \ ++ j_id \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_jit_usedpages( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ used_pages, \ ++ j_id \ ++ ); \ ++ } while (0) + ++/** ++ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO - Information about JIT allocations ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ * @va_pgs: The minimum number of virtual pages required ++ * @com_pgs: The minimum number of physical pages which should back the allocation. ++ * @extent: Granularity of physical pages to grow the allocation by during a fault. ++ * @j_id: Unique ID provided by the caller, this is used to pair allocation and free requests. ++ * @bin_id: The JIT allocation bin, used in conjunction with max_allocations to limit the number of each type of JIT allocation. ++ * @max_allocs: Maximum allocations allowed in this bin. ++ * @jit_flags: Flags specifying the special requirements for the JIT allocation. ++ * @usg_id: A hint about which allocation should be reused. ++ */ ++#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO( \ ++ kbdev, \ ++ atom, \ ++ va_pgs, \ ++ com_pgs, \ ++ extent, \ ++ j_id, \ ++ bin_id, \ ++ max_allocs, \ ++ jit_flags, \ ++ usg_id \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_attrib_atom_jitallocinfo( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom, \ ++ va_pgs, \ ++ com_pgs, \ ++ extent, \ ++ j_id, \ ++ bin_id, \ ++ max_allocs, \ ++ jit_flags, \ ++ usg_id \ ++ ); \ ++ } while (0) + +/** -+ * Release reference on tracker for PP job that depends on this GP job. ++ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO - Information about JIT frees + * -+ * @note If GP job has a reference on tracker, this function MUST be called before the GP job is -+ * deleted. ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ * @j_id: Unique ID provided by the caller, this is used to pair allocation and free requests. ++ */ ++#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO( \ ++ kbdev, \ ++ atom, \ ++ j_id \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_attrib_atom_jitfreeinfo( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom, \ ++ j_id \ ++ ); \ ++ } while (0) ++ ++/** ++ * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG - address space attributes + * -+ * @param job GP job that is done. -+ * @param success MALI_TRUE if job completed successfully, MALI_FALSE if not. -+ * @return A scheduling bitmask indicating whether scheduling needs to be done. ++ * @kbdev: Kbase device ++ * @address_space: Name of the address space object ++ * @transtab: Configuration of the TRANSTAB register ++ * @memattr: Configuration of the MEMATTR register ++ * @transcfg: Configuration of the TRANSCFG register (or zero if not present) + */ -+mali_scheduler_mask mali_gp_job_signal_pp_tracker(struct mali_gp_job *job, mali_bool success); ++#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG( \ ++ kbdev, \ ++ address_space, \ ++ transtab, \ ++ memattr, \ ++ transcfg \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_attrib_as_config( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ address_space, \ ++ transtab, \ ++ memattr, \ ++ transcfg \ ++ ); \ ++ } while (0) + -+#endif /* __MALI_GP_JOB_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_group.c b/drivers/gpu/arm/mali400/mali/common/mali_group.c -new file mode 100644 -index 000000000..47979a2f1 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_group.c -@@ -0,0 +1,1875 @@ -+/* -+ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++/** ++ * KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP - softstop event on given lpu ++ * ++ * @kbdev: Kbase device ++ * @lpu: Name of the Logical Processing Unit object + */ -+#include "mali_kernel_common.h" -+#include "mali_group.h" -+#include "mali_osk.h" -+#include "mali_l2_cache.h" -+#include "mali_gp.h" -+#include "mali_pp.h" -+#include "mali_mmu.h" -+#include "mali_dlbu.h" -+#include "mali_broadcast.h" -+#include "mali_scheduler.h" -+#include "mali_osk_profiling.h" -+#include "mali_osk_mali.h" -+#include "mali_pm_domain.h" -+#include "mali_pm.h" -+#include "mali_executor.h" ++#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( \ ++ kbdev, \ ++ lpu \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_event_lpu_softstop( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ lpu \ ++ ); \ ++ } while (0) + -+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS) -+#include -+#include -+#endif -+ -+#define MALI_MAX_NUM_DOMAIN_REFS (MALI_MAX_NUMBER_OF_GROUPS * 2) -+ -+#if defined(CONFIG_MALI400_PROFILING) -+static void mali_group_report_l2_cache_counters_per_core(struct mali_group *group, u32 core_num); -+#endif /* #if defined(CONFIG_MALI400_PROFILING) */ -+ -+static struct mali_group *mali_global_groups[MALI_MAX_NUMBER_OF_GROUPS] = { NULL, }; -+static u32 mali_global_num_groups = 0; -+ -+/* SW timer for job execution */ -+int mali_max_job_runtime = MALI_MAX_JOB_RUNTIME_DEFAULT; ++/** ++ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX - atom softstopped ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ */ ++#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX( \ ++ kbdev, \ ++ atom \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_event_atom_softstop_ex( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom \ ++ ); \ ++ } while (0) + -+/* local helper functions */ -+static void mali_group_bottom_half_mmu(void *data); -+static void mali_group_bottom_half_gp(void *data); -+static void mali_group_bottom_half_pp(void *data); -+static void mali_group_timeout(void *data); -+static void mali_group_reset_pp(struct mali_group *group); -+static void mali_group_reset_mmu(struct mali_group *group); ++/** ++ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE - atom softstop issued ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ */ ++#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE( \ ++ kbdev, \ ++ atom \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_event_atom_softstop_issue( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom \ ++ ); \ ++ } while (0) + -+static void mali_group_activate_page_directory(struct mali_group *group, struct mali_session_data *session, mali_bool is_reload); -+static void mali_group_recovery_reset(struct mali_group *group); ++/** ++ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START - atom soft job has started ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ */ ++#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START( \ ++ kbdev, \ ++ atom \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_event_atom_softjob_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom \ ++ ); \ ++ } while (0) + -+struct mali_group *mali_group_create(struct mali_l2_cache_core *core, -+ struct mali_dlbu_core *dlbu, -+ struct mali_bcast_unit *bcast, -+ u32 domain_index) -+{ -+ struct mali_group *group = NULL; ++/** ++ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END - atom soft job has completed ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ */ ++#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END( \ ++ kbdev, \ ++ atom \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_event_atom_softjob_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom \ ++ ); \ ++ } while (0) + -+ if (mali_global_num_groups >= MALI_MAX_NUMBER_OF_GROUPS) { -+ MALI_PRINT_ERROR(("Mali group: Too many group objects created\n")); -+ return NULL; -+ } ++/** ++ * KBASE_TLSTREAM_TL_ARBITER_GRANTED - Arbiter has granted gpu access ++ * ++ * @kbdev: Kbase device ++ * @gpu: Name of the GPU object ++ */ ++#define KBASE_TLSTREAM_TL_ARBITER_GRANTED( \ ++ kbdev, \ ++ gpu \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_arbiter_granted( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ gpu \ ++ ); \ ++ } while (0) + -+ group = _mali_osk_calloc(1, sizeof(struct mali_group)); -+ if (NULL != group) { -+ group->timeout_timer = _mali_osk_timer_init(mali_group_timeout); -+ if (NULL != group->timeout_timer) { -+ _mali_osk_timer_setcallback(group->timeout_timer, mali_group_timeout, (void *)group); ++/** ++ * KBASE_TLSTREAM_TL_ARBITER_STARTED - Driver is running again and able to process jobs ++ * ++ * @kbdev: Kbase device ++ * @gpu: Name of the GPU object ++ */ ++#define KBASE_TLSTREAM_TL_ARBITER_STARTED( \ ++ kbdev, \ ++ gpu \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_arbiter_started( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ gpu \ ++ ); \ ++ } while (0) + -+ group->l2_cache_core[0] = core; -+ _mali_osk_list_init(&group->group_list); -+ _mali_osk_list_init(&group->executor_list); -+ _mali_osk_list_init(&group->pm_domain_list); -+ group->bcast_core = bcast; -+ group->dlbu_core = dlbu; ++/** ++ * KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED - Arbiter has requested driver to stop using gpu ++ * ++ * @kbdev: Kbase device ++ * @gpu: Name of the GPU object ++ */ ++#define KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED( \ ++ kbdev, \ ++ gpu \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_arbiter_stop_requested( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ gpu \ ++ ); \ ++ } while (0) + -+ /* register this object as a part of the correct power domain */ -+ if ((NULL != core) || (NULL != dlbu) || (NULL != bcast)) -+ group->pm_domain = mali_pm_register_group(domain_index, group); ++/** ++ * KBASE_TLSTREAM_TL_ARBITER_STOPPED - Driver has stopped using gpu ++ * ++ * @kbdev: Kbase device ++ * @gpu: Name of the GPU object ++ */ ++#define KBASE_TLSTREAM_TL_ARBITER_STOPPED( \ ++ kbdev, \ ++ gpu \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_arbiter_stopped( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ gpu \ ++ ); \ ++ } while (0) + -+ mali_global_groups[mali_global_num_groups] = group; -+ mali_global_num_groups++; ++/** ++ * KBASE_TLSTREAM_TL_ARBITER_REQUESTED - Driver has requested the arbiter for gpu access ++ * ++ * @kbdev: Kbase device ++ * @gpu: Name of the GPU object ++ */ ++#define KBASE_TLSTREAM_TL_ARBITER_REQUESTED( \ ++ kbdev, \ ++ gpu \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_arbiter_requested( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ gpu \ ++ ); \ ++ } while (0) + -+ return group; -+ } -+ _mali_osk_free(group); -+ } ++/** ++ * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - gpu soft reset ++ * ++ * @kbdev: Kbase device ++ * @gpu: Name of the GPU object ++ */ ++#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET( \ ++ kbdev, \ ++ gpu \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_jd_gpu_soft_reset( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ gpu \ ++ ); \ ++ } while (0) + -+ return NULL; -+} ++/** ++ * KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC - Tiler Heap Chunk Allocation ++ * ++ * @kbdev: Kbase device ++ * @ctx_nr: Kernel context number ++ * @heap_id: Unique id used to represent a heap under a context ++ * @chunk_va: Virtual start address of tiler heap chunk ++ */ ++#define KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC( \ ++ kbdev, \ ++ ctx_nr, \ ++ heap_id, \ ++ chunk_va \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_jd_tiler_heap_chunk_alloc( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ ctx_nr, \ ++ heap_id, \ ++ chunk_va \ ++ ); \ ++ } while (0) + -+void mali_group_delete(struct mali_group *group) -+{ -+ u32 i; ++/** ++ * KBASE_TLSTREAM_TL_JS_SCHED_START - Scheduling starts ++ * ++ * @kbdev: Kbase device ++ * @dummy: dummy argument ++ */ ++#define KBASE_TLSTREAM_TL_JS_SCHED_START( \ ++ kbdev, \ ++ dummy \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_js_sched_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ dummy \ ++ ); \ ++ } while (0) + -+ MALI_DEBUG_PRINT(4, ("Deleting group %s\n", -+ mali_group_core_description(group))); ++/** ++ * KBASE_TLSTREAM_TL_JS_SCHED_END - Scheduling ends ++ * ++ * @kbdev: Kbase device ++ * @dummy: dummy argument ++ */ ++#define KBASE_TLSTREAM_TL_JS_SCHED_END( \ ++ kbdev, \ ++ dummy \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_js_sched_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ dummy \ ++ ); \ ++ } while (0) + -+ MALI_DEBUG_ASSERT(NULL == group->parent_group); -+ MALI_DEBUG_ASSERT((MALI_GROUP_STATE_INACTIVE == group->state) || ((MALI_GROUP_STATE_ACTIVATION_PENDING == group->state))); ++/** ++ * KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_START - Submitting an atom starts ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ */ ++#define KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_START( \ ++ kbdev, \ ++ atom \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_jd_submit_atom_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom \ ++ ); \ ++ } while (0) + -+ /* Delete the resources that this group owns */ -+ if (NULL != group->gp_core) { -+ mali_gp_delete(group->gp_core); -+ } ++/** ++ * KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_END - Submitting an atom ends ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ */ ++#define KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_END( \ ++ kbdev, \ ++ atom \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_jd_submit_atom_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom \ ++ ); \ ++ } while (0) + -+ if (NULL != group->pp_core) { -+ mali_pp_delete(group->pp_core); -+ } ++/** ++ * KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START - Within function kbase_jd_done_nolock ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ */ ++#define KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START( \ ++ kbdev, \ ++ atom \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_jd_done_no_lock_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom \ ++ ); \ ++ } while (0) + -+ if (NULL != group->mmu) { -+ mali_mmu_delete(group->mmu); -+ } ++/** ++ * KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_END - Within function kbase_jd_done_nolock - end ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ */ ++#define KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_END( \ ++ kbdev, \ ++ atom \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_jd_done_no_lock_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom \ ++ ); \ ++ } while (0) + -+ if (mali_group_is_virtual(group)) { -+ /* Remove all groups from virtual group */ -+ struct mali_group *child; -+ struct mali_group *temp; ++/** ++ * KBASE_TLSTREAM_TL_JD_DONE_START - Start of kbase_jd_done ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ */ ++#define KBASE_TLSTREAM_TL_JD_DONE_START( \ ++ kbdev, \ ++ atom \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_jd_done_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom \ ++ ); \ ++ } while (0) + -+ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) { -+ child->parent_group = NULL; -+ mali_group_delete(child); -+ } ++/** ++ * KBASE_TLSTREAM_TL_JD_DONE_END - End of kbase_jd_done ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ */ ++#define KBASE_TLSTREAM_TL_JD_DONE_END( \ ++ kbdev, \ ++ atom \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_jd_done_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom \ ++ ); \ ++ } while (0) + -+ mali_dlbu_delete(group->dlbu_core); ++/** ++ * KBASE_TLSTREAM_TL_JD_ATOM_COMPLETE - Atom marked complete ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ */ ++#define KBASE_TLSTREAM_TL_JD_ATOM_COMPLETE( \ ++ kbdev, \ ++ atom \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_jd_atom_complete( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom \ ++ ); \ ++ } while (0) + -+ if (NULL != group->bcast_core) { -+ mali_bcast_unit_delete(group->bcast_core); -+ } -+ } ++/** ++ * KBASE_TLSTREAM_TL_RUN_ATOM_START - Running of atom starts ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ * @atom_nr: Sequential number of an atom ++ */ ++#define KBASE_TLSTREAM_TL_RUN_ATOM_START( \ ++ kbdev, \ ++ atom, \ ++ atom_nr \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_run_atom_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom, \ ++ atom_nr \ ++ ); \ ++ } while (0) + -+ for (i = 0; i < mali_global_num_groups; i++) { -+ if (mali_global_groups[i] == group) { -+ mali_global_groups[i] = NULL; -+ mali_global_num_groups--; ++/** ++ * KBASE_TLSTREAM_TL_RUN_ATOM_END - Running of atom ends ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ * @atom_nr: Sequential number of an atom ++ */ ++#define KBASE_TLSTREAM_TL_RUN_ATOM_END( \ ++ kbdev, \ ++ atom, \ ++ atom_nr \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_run_atom_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom, \ ++ atom_nr \ ++ ); \ ++ } while (0) + -+ if (i != mali_global_num_groups) { -+ /* We removed a group from the middle of the array -- move the last -+ * group to the current position to close the gap */ -+ mali_global_groups[i] = mali_global_groups[mali_global_num_groups]; -+ mali_global_groups[mali_global_num_groups] = NULL; -+ } ++/** ++ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY - atom priority ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ * @prio: Atom priority ++ */ ++#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY( \ ++ kbdev, \ ++ atom, \ ++ prio \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ ++ __kbase_tlstream_tl_attrib_atom_priority( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom, \ ++ prio \ ++ ); \ ++ } while (0) + -+ break; -+ } -+ } ++/** ++ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE - atom state ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ * @state: Atom state ++ */ ++#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE( \ ++ kbdev, \ ++ atom, \ ++ state \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ ++ __kbase_tlstream_tl_attrib_atom_state( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom, \ ++ state \ ++ ); \ ++ } while (0) + -+ if (NULL != group->timeout_timer) { -+ _mali_osk_timer_del(group->timeout_timer); -+ _mali_osk_timer_term(group->timeout_timer); -+ } ++/** ++ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED - atom caused priority change ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ */ ++#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED( \ ++ kbdev, \ ++ atom \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ ++ __kbase_tlstream_tl_attrib_atom_prioritized( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom \ ++ ); \ ++ } while (0) + -+ if (NULL != group->bottom_half_work_mmu) { -+ _mali_osk_wq_delete_work(group->bottom_half_work_mmu); -+ } ++/** ++ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT - jit done for atom ++ * ++ * @kbdev: Kbase device ++ * @atom: Atom identifier ++ * @edit_addr: Address edited by jit ++ * @new_addr: Address placed into the edited location ++ * @jit_flags: Flags specifying the special requirements for the JIT allocation. ++ * @mem_flags: Flags defining the properties of a memory region ++ * @j_id: Unique ID provided by the caller, this is used to pair allocation and free requests. ++ * @com_pgs: The minimum number of physical pages which should back the allocation. ++ * @extent: Granularity of physical pages to grow the allocation by during a fault. ++ * @va_pgs: The minimum number of virtual pages required ++ */ ++#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT( \ ++ kbdev, \ ++ atom, \ ++ edit_addr, \ ++ new_addr, \ ++ jit_flags, \ ++ mem_flags, \ ++ j_id, \ ++ com_pgs, \ ++ extent, \ ++ va_pgs \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_JOB_DUMPING_ENABLED) \ ++ __kbase_tlstream_tl_attrib_atom_jit( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ atom, \ ++ edit_addr, \ ++ new_addr, \ ++ jit_flags, \ ++ mem_flags, \ ++ j_id, \ ++ com_pgs, \ ++ extent, \ ++ va_pgs \ ++ ); \ ++ } while (0) + -+ if (NULL != group->bottom_half_work_gp) { -+ _mali_osk_wq_delete_work(group->bottom_half_work_gp); -+ } ++/** ++ * KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE - New KBase Device ++ * ++ * @kbdev: Kbase device ++ * @kbase_device_id: The ID of the physical hardware ++ * @kbase_device_gpu_core_count: The number of gpu cores in the physical hardware ++ * @kbase_device_max_num_csgs: The max number of CSGs the physical hardware supports ++ * @kbase_device_as_count: The number of address spaces the physical hardware has available ++ * @kbase_device_sb_entry_count: The number of entries each scoreboard set in the physical hardware has available ++ * @kbase_device_has_cross_stream_sync: Whether cross-stream synchronization is supported ++ * @kbase_device_supports_gpu_sleep: Whether GPU sleep is supported ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE( \ ++ kbdev, \ ++ kbase_device_id, \ ++ kbase_device_gpu_core_count, \ ++ kbase_device_max_num_csgs, \ ++ kbase_device_as_count, \ ++ kbase_device_sb_entry_count, \ ++ kbase_device_has_cross_stream_sync, \ ++ kbase_device_supports_gpu_sleep \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_new_device( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kbase_device_id, \ ++ kbase_device_gpu_core_count, \ ++ kbase_device_max_num_csgs, \ ++ kbase_device_as_count, \ ++ kbase_device_sb_entry_count, \ ++ kbase_device_has_cross_stream_sync, \ ++ kbase_device_supports_gpu_sleep \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE( \ ++ kbdev, \ ++ kbase_device_id, \ ++ kbase_device_gpu_core_count, \ ++ kbase_device_max_num_csgs, \ ++ kbase_device_as_count, \ ++ kbase_device_sb_entry_count, \ ++ kbase_device_has_cross_stream_sync, \ ++ kbase_device_supports_gpu_sleep \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ if (NULL != group->bottom_half_work_pp) { -+ _mali_osk_wq_delete_work(group->bottom_half_work_pp); -+ } ++/** ++ * KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK - Kernel receives a request to process new GPU queue instructions ++ * ++ * @kbdev: Kbase device ++ * @kernel_ctx_id: Unique ID for the KBase Context ++ * @buffer_gpu_addr: Address of the GPU queue's command buffer ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK( \ ++ kbdev, \ ++ kernel_ctx_id, \ ++ buffer_gpu_addr \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_gpucmdqueue_kick( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kernel_ctx_id, \ ++ buffer_gpu_addr \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK( \ ++ kbdev, \ ++ kernel_ctx_id, \ ++ buffer_gpu_addr \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ _mali_osk_free(group); -+} ++/** ++ * KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG - CSG is programmed to a slot ++ * ++ * @kbdev: Kbase device ++ * @kbase_device_id: The ID of the physical hardware ++ * @kernel_ctx_id: Unique ID for the KBase Context ++ * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace ++ * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed ++ * @kbase_device_csg_slot_resuming: Whether the csg is being resumed ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \ ++ kbdev, \ ++ kbase_device_id, \ ++ kernel_ctx_id, \ ++ gpu_cmdq_grp_handle, \ ++ kbase_device_csg_slot_index, \ ++ kbase_device_csg_slot_resuming \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_device_program_csg( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kbase_device_id, \ ++ kernel_ctx_id, \ ++ gpu_cmdq_grp_handle, \ ++ kbase_device_csg_slot_index, \ ++ kbase_device_csg_slot_resuming \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \ ++ kbdev, \ ++ kbase_device_id, \ ++ kernel_ctx_id, \ ++ gpu_cmdq_grp_handle, \ ++ kbase_device_csg_slot_index, \ ++ kbase_device_csg_slot_resuming \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+_mali_osk_errcode_t mali_group_add_mmu_core(struct mali_group *group, struct mali_mmu_core *mmu_core) -+{ -+ /* This group object now owns the MMU core object */ -+ group->mmu = mmu_core; -+ group->bottom_half_work_mmu = _mali_osk_wq_create_work(mali_group_bottom_half_mmu, group); -+ if (NULL == group->bottom_half_work_mmu) { -+ return _MALI_OSK_ERR_FAULT; -+ } -+ return _MALI_OSK_ERR_OK; -+} ++/** ++ * KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG - CSG is deprogrammed from a slot ++ * ++ * @kbdev: Kbase device ++ * @kbase_device_id: The ID of the physical hardware ++ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being deprogrammed ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG( \ ++ kbdev, \ ++ kbase_device_id, \ ++ kbase_device_csg_slot_index \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_device_deprogram_csg( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kbase_device_id, \ ++ kbase_device_csg_slot_index \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG( \ ++ kbdev, \ ++ kbase_device_id, \ ++ kbase_device_csg_slot_index \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+void mali_group_remove_mmu_core(struct mali_group *group) -+{ -+ /* This group object no longer owns the MMU core object */ -+ group->mmu = NULL; -+ if (NULL != group->bottom_half_work_mmu) { -+ _mali_osk_wq_delete_work(group->bottom_half_work_mmu); -+ } -+} ++/** ++ * KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG - CSG is halting ++ * ++ * @kbdev: Kbase device ++ * @kbase_device_id: The ID of the physical hardware ++ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being halted ++ * @kbase_device_csg_slot_suspending: Whether the csg is being suspended ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG( \ ++ kbdev, \ ++ kbase_device_id, \ ++ kbase_device_csg_slot_index, \ ++ kbase_device_csg_slot_suspending \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_device_halting_csg( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kbase_device_id, \ ++ kbase_device_csg_slot_index, \ ++ kbase_device_csg_slot_suspending \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG( \ ++ kbdev, \ ++ kbase_device_id, \ ++ kbase_device_csg_slot_index, \ ++ kbase_device_csg_slot_suspending \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+_mali_osk_errcode_t mali_group_add_gp_core(struct mali_group *group, struct mali_gp_core *gp_core) -+{ -+ /* This group object now owns the GP core object */ -+ group->gp_core = gp_core; -+ group->bottom_half_work_gp = _mali_osk_wq_create_work(mali_group_bottom_half_gp, group); -+ if (NULL == group->bottom_half_work_gp) { -+ return _MALI_OSK_ERR_FAULT; -+ } ++/** ++ * KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG - CSG is suspended ++ * ++ * @kbdev: Kbase device ++ * @kbase_device_id: The ID of the physical hardware ++ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being suspended ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( \ ++ kbdev, \ ++ kbase_device_id, \ ++ kbase_device_csg_slot_index \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_device_suspend_csg( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kbase_device_id, \ ++ kbase_device_csg_slot_index \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( \ ++ kbdev, \ ++ kbase_device_id, \ ++ kbase_device_csg_slot_index \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ return _MALI_OSK_ERR_OK; -+} ++/** ++ * KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE - KBase device is notified that CSG is idle. ++ * ++ * @kbdev: Kbase device ++ * @kbase_device_id: The ID of the physical hardware ++ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG for which we are receiving an idle notification ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE( \ ++ kbdev, \ ++ kbase_device_id, \ ++ kbase_device_csg_slot_index \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_device_csg_idle( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kbase_device_id, \ ++ kbase_device_csg_slot_index \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE( \ ++ kbdev, \ ++ kbase_device_id, \ ++ kbase_device_csg_slot_index \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+void mali_group_remove_gp_core(struct mali_group *group) -+{ -+ /* This group object no longer owns the GP core object */ -+ group->gp_core = NULL; -+ if (NULL != group->bottom_half_work_gp) { -+ _mali_osk_wq_delete_work(group->bottom_half_work_gp); -+ } -+} ++/** ++ * KBASE_TLSTREAM_TL_KBASE_NEW_CTX - New KBase Context ++ * ++ * @kbdev: Kbase device ++ * @kernel_ctx_id: Unique ID for the KBase Context ++ * @kbase_device_id: The ID of the physical hardware ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_NEW_CTX( \ ++ kbdev, \ ++ kernel_ctx_id, \ ++ kbase_device_id \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_new_ctx( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kernel_ctx_id, \ ++ kbase_device_id \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_NEW_CTX( \ ++ kbdev, \ ++ kernel_ctx_id, \ ++ kbase_device_id \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+_mali_osk_errcode_t mali_group_add_pp_core(struct mali_group *group, struct mali_pp_core *pp_core) -+{ -+ /* This group object now owns the PP core object */ -+ group->pp_core = pp_core; -+ group->bottom_half_work_pp = _mali_osk_wq_create_work(mali_group_bottom_half_pp, group); -+ if (NULL == group->bottom_half_work_pp) { -+ return _MALI_OSK_ERR_FAULT; -+ } -+ return _MALI_OSK_ERR_OK; -+} ++/** ++ * KBASE_TLSTREAM_TL_KBASE_DEL_CTX - Delete KBase Context ++ * ++ * @kbdev: Kbase device ++ * @kernel_ctx_id: Unique ID for the KBase Context ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_DEL_CTX( \ ++ kbdev, \ ++ kernel_ctx_id \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_del_ctx( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kernel_ctx_id \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_DEL_CTX( \ ++ kbdev, \ ++ kernel_ctx_id \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+void mali_group_remove_pp_core(struct mali_group *group) -+{ -+ /* This group object no longer owns the PP core object */ -+ group->pp_core = NULL; -+ if (NULL != group->bottom_half_work_pp) { -+ _mali_osk_wq_delete_work(group->bottom_half_work_pp); -+ } -+} ++/** ++ * KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS - Address Space is assigned to a KBase context ++ * ++ * @kbdev: Kbase device ++ * @kernel_ctx_id: Unique ID for the KBase Context ++ * @kbase_device_as_index: The index of the device address space being assigned ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS( \ ++ kbdev, \ ++ kernel_ctx_id, \ ++ kbase_device_as_index \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_ctx_assign_as( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kernel_ctx_id, \ ++ kbase_device_as_index \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS( \ ++ kbdev, \ ++ kernel_ctx_id, \ ++ kbase_device_as_index \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+enum mali_group_state mali_group_activate(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++/** ++ * KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS - Address Space is unassigned from a KBase context ++ * ++ * @kbdev: Kbase device ++ * @kernel_ctx_id: Unique ID for the KBase Context ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( \ ++ kbdev, \ ++ kernel_ctx_id \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_ctx_unassign_as( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kernel_ctx_id \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( \ ++ kbdev, \ ++ kernel_ctx_id \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ MALI_DEBUG_PRINT(4, ("Group: Activating group %s\n", -+ mali_group_core_description(group))); ++/** ++ * KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE - New KCPU Queue ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @kcpu_queue_id: KCPU queue ID ++ * @kernel_ctx_id: Unique ID for the KBase Context ++ * @kcpuq_num_pending_cmds: Number of commands already enqueued in the KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE( \ ++ kbdev, \ ++ kcpu_queue, \ ++ kcpu_queue_id, \ ++ kernel_ctx_id, \ ++ kcpuq_num_pending_cmds \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_new_kcpuqueue( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ kcpu_queue_id, \ ++ kernel_ctx_id, \ ++ kcpuq_num_pending_cmds \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE( \ ++ kbdev, \ ++ kcpu_queue, \ ++ kcpu_queue_id, \ ++ kernel_ctx_id, \ ++ kcpuq_num_pending_cmds \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ if (MALI_GROUP_STATE_INACTIVE == group->state) { -+ /* Group is inactive, get PM refs in order to power up */ ++/** ++ * KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE - Delete KCPU Queue ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_del_kcpuqueue( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ /* -+ * We'll take a maximum of 2 power domain references pr group, -+ * one for the group itself, and one for it's L2 cache. -+ */ -+ struct mali_pm_domain *domains[MALI_MAX_NUM_DOMAIN_REFS]; -+ struct mali_group *groups[MALI_MAX_NUM_DOMAIN_REFS]; -+ u32 num_domains = 0; -+ mali_bool all_groups_on; ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL - KCPU Queue enqueues Signal on Fence ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @fence: Fence object handle ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL( \ ++ kbdev, \ ++ kcpu_queue, \ ++ fence \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_signal( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ fence \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL( \ ++ kbdev, \ ++ kcpu_queue, \ ++ fence \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ /* Deal with child groups first */ -+ if (mali_group_is_virtual(group)) { -+ /* -+ * The virtual group might have 0, 1 or 2 L2s in -+ * its l2_cache_core array, but we ignore these and -+ * let the child groups take the needed L2 cache ref -+ * on behalf of the virtual group. -+ * In other words; The L2 refs are taken in pair with -+ * the physical group which the L2 is attached to. -+ */ -+ struct mali_group *child; -+ struct mali_group *temp; ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT - KCPU Queue enqueues Wait on Fence ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @fence: Fence object handle ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT( \ ++ kbdev, \ ++ kcpu_queue, \ ++ fence \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ fence \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT( \ ++ kbdev, \ ++ kcpu_queue, \ ++ fence \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ /* -+ * Child group is inactive, get PM -+ * refs in order to power up. -+ */ -+ _MALI_OSK_LIST_FOREACHENTRY(child, temp, -+ &group->group_list, -+ struct mali_group, group_list) { -+ MALI_DEBUG_ASSERT(MALI_GROUP_STATE_INACTIVE -+ == child->state); ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT - KCPU Queue enqueues Wait on Cross Queue Sync Object ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @cqs_obj_gpu_addr: CQS Object GPU pointer ++ * @compare_value: Semaphore value that should be exceeded for the WAIT to pass ++ * @inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ ++ kbdev, \ ++ kcpu_queue, \ ++ cqs_obj_gpu_addr, \ ++ compare_value, \ ++ inherit_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ cqs_obj_gpu_addr, \ ++ compare_value, \ ++ inherit_error \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ ++ kbdev, \ ++ kcpu_queue, \ ++ cqs_obj_gpu_addr, \ ++ compare_value, \ ++ inherit_error \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ child->state = MALI_GROUP_STATE_ACTIVATION_PENDING; ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET - KCPU Queue enqueues Set on Cross Queue Sync Object ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @cqs_obj_gpu_addr: CQS Object GPU pointer ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET( \ ++ kbdev, \ ++ kcpu_queue, \ ++ cqs_obj_gpu_addr \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ cqs_obj_gpu_addr \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET( \ ++ kbdev, \ ++ kcpu_queue, \ ++ cqs_obj_gpu_addr \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ MALI_DEBUG_ASSERT_POINTER( -+ child->pm_domain); -+ domains[num_domains] = child->pm_domain; -+ groups[num_domains] = child; -+ num_domains++; ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION - KCPU Queue enqueues Wait Operation on Cross Queue Sync Object ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @cqs_obj_gpu_addr: CQS Object GPU pointer ++ * @compare_value: Value that should be compared to semaphore value for the WAIT to pass ++ * @condition: Condition for unblocking WAITs on Timeline Cross Queue Sync Object (e.g. greater than, less or equal) ++ * @data_type: Data type of a CQS Object's value ++ * @inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION( \ ++ kbdev, \ ++ kcpu_queue, \ ++ cqs_obj_gpu_addr, \ ++ compare_value, \ ++ condition, \ ++ data_type, \ ++ inherit_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ cqs_obj_gpu_addr, \ ++ compare_value, \ ++ condition, \ ++ data_type, \ ++ inherit_error \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION( \ ++ kbdev, \ ++ kcpu_queue, \ ++ cqs_obj_gpu_addr, \ ++ compare_value, \ ++ condition, \ ++ data_type, \ ++ inherit_error \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ /* -+ * Take L2 domain ref for child group. -+ */ -+ MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS -+ > num_domains); -+ domains[num_domains] = mali_l2_cache_get_pm_domain( -+ child->l2_cache_core[0]); -+ groups[num_domains] = NULL; -+ MALI_DEBUG_ASSERT(NULL == -+ child->l2_cache_core[1]); -+ num_domains++; -+ } -+ } else { -+ /* Take L2 domain ref for physical groups. */ -+ MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS > -+ num_domains); ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION - KCPU Queue enqueues Set Operation on Cross Queue Sync Object ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @cqs_obj_gpu_addr: CQS Object GPU pointer ++ * @value: Value that will be set or added to semaphore ++ * @operation: Operation type performed on semaphore value (SET or ADD) ++ * @data_type: Data type of a CQS Object's value ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION( \ ++ kbdev, \ ++ kcpu_queue, \ ++ cqs_obj_gpu_addr, \ ++ value, \ ++ operation, \ ++ data_type \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ cqs_obj_gpu_addr, \ ++ value, \ ++ operation, \ ++ data_type \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION( \ ++ kbdev, \ ++ kcpu_queue, \ ++ cqs_obj_gpu_addr, \ ++ value, \ ++ operation, \ ++ data_type \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ domains[num_domains] = mali_l2_cache_get_pm_domain( -+ group->l2_cache_core[0]); -+ groups[num_domains] = NULL; -+ MALI_DEBUG_ASSERT(NULL == group->l2_cache_core[1]); -+ num_domains++; -+ } ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT - KCPU Queue enqueues Map Import ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @map_import_buf_gpu_addr: Map import buffer GPU pointer ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( \ ++ kbdev, \ ++ kcpu_queue, \ ++ map_import_buf_gpu_addr \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ map_import_buf_gpu_addr \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( \ ++ kbdev, \ ++ kcpu_queue, \ ++ map_import_buf_gpu_addr \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ /* Do the group itself last (it's dependencies first) */ ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT - KCPU Queue enqueues Unmap Import ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @map_import_buf_gpu_addr: Map import buffer GPU pointer ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( \ ++ kbdev, \ ++ kcpu_queue, \ ++ map_import_buf_gpu_addr \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ map_import_buf_gpu_addr \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( \ ++ kbdev, \ ++ kcpu_queue, \ ++ map_import_buf_gpu_addr \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ group->state = MALI_GROUP_STATE_ACTIVATION_PENDING; ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE - KCPU Queue enqueues Unmap Import ignoring reference count ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @map_import_buf_gpu_addr: Map import buffer GPU pointer ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( \ ++ kbdev, \ ++ kcpu_queue, \ ++ map_import_buf_gpu_addr \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ map_import_buf_gpu_addr \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( \ ++ kbdev, \ ++ kcpu_queue, \ ++ map_import_buf_gpu_addr \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ MALI_DEBUG_ASSERT_POINTER(group->pm_domain); -+ domains[num_domains] = group->pm_domain; -+ groups[num_domains] = group; -+ num_domains++; ++/** ++ * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC - Begin array of KCPU Queue enqueues JIT Alloc ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ all_groups_on = mali_pm_get_domain_refs(domains, groups, -+ num_domains); ++/** ++ * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC - Array item of KCPU Queue enqueues JIT Alloc ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @jit_alloc_gpu_alloc_addr_dest: The GPU virtual address to write the JIT allocated GPU virtual address to ++ * @jit_alloc_va_pages: The minimum number of virtual pages required ++ * @jit_alloc_commit_pages: The minimum number of physical pages which should back the allocation ++ * @jit_alloc_extent: Granularity of physical pages to grow the allocation by during a fault ++ * @jit_alloc_jit_id: Unique ID provided by the caller, this is used to pair allocation and free requests. Zero is not a valid value ++ * @jit_alloc_bin_id: The JIT allocation bin, used in conjunction with max_allocations to limit the number of each type of JIT allocation ++ * @jit_alloc_max_allocations: The maximum number of allocations allowed within the bin specified by bin_id. Should be the same for all JIT allocations within the same bin. ++ * @jit_alloc_flags: Flags specifying the special requirements for the JIT allocation ++ * @jit_alloc_usage_id: A hint about which allocation should be reused. The kernel should attempt to use a previous allocation with the same usage_id ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ ++ kbdev, \ ++ kcpu_queue, \ ++ jit_alloc_gpu_alloc_addr_dest, \ ++ jit_alloc_va_pages, \ ++ jit_alloc_commit_pages, \ ++ jit_alloc_extent, \ ++ jit_alloc_jit_id, \ ++ jit_alloc_bin_id, \ ++ jit_alloc_max_allocations, \ ++ jit_alloc_flags, \ ++ jit_alloc_usage_id \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_alloc( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ jit_alloc_gpu_alloc_addr_dest, \ ++ jit_alloc_va_pages, \ ++ jit_alloc_commit_pages, \ ++ jit_alloc_extent, \ ++ jit_alloc_jit_id, \ ++ jit_alloc_bin_id, \ ++ jit_alloc_max_allocations, \ ++ jit_alloc_flags, \ ++ jit_alloc_usage_id \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ ++ kbdev, \ ++ kcpu_queue, \ ++ jit_alloc_gpu_alloc_addr_dest, \ ++ jit_alloc_va_pages, \ ++ jit_alloc_commit_pages, \ ++ jit_alloc_extent, \ ++ jit_alloc_jit_id, \ ++ jit_alloc_bin_id, \ ++ jit_alloc_max_allocations, \ ++ jit_alloc_flags, \ ++ jit_alloc_usage_id \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ /* -+ * Complete activation for group, include -+ * virtual group or physical group. -+ */ -+ if (MALI_TRUE == all_groups_on) { ++/** ++ * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC - End array of KCPU Queue enqueues JIT Alloc ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_alloc( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ mali_group_set_active(group); -+ } -+ } else if (MALI_GROUP_STATE_ACTIVE == group->state) { -+ /* Already active */ -+ MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on); -+ } else { -+ /* -+ * Activation already pending, group->power_is_on could -+ * be both true or false. We need to wait for power up -+ * notification anyway. -+ */ -+ MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVATION_PENDING -+ == group->state); -+ } ++/** ++ * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE - Begin array of KCPU Queue enqueues JIT Free ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_free( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ MALI_DEBUG_PRINT(4, ("Group: group %s activation result: %s\n", -+ mali_group_core_description(group), -+ MALI_GROUP_STATE_ACTIVE == group->state ? -+ "ACTIVE" : "PENDING")); ++/** ++ * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE - Array item of KCPU Queue enqueues JIT Free ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @jit_alloc_jit_id: Unique ID provided by the caller, this is used to pair allocation and free requests. Zero is not a valid value ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE( \ ++ kbdev, \ ++ kcpu_queue, \ ++ jit_alloc_jit_id \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_free( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ jit_alloc_jit_id \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE( \ ++ kbdev, \ ++ kcpu_queue, \ ++ jit_alloc_jit_id \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ return group->state; -+} ++/** ++ * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE - End array of KCPU Queue enqueues JIT Free ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+mali_bool mali_group_set_active(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVATION_PENDING == group->state); -+ MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on); ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER - KCPU Queue enqueues Error Barrier ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ MALI_DEBUG_PRINT(4, ("Group: Activation completed for %s\n", -+ mali_group_core_description(group))); ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND - KCPU Queue enqueues Group Suspend ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @group_suspend_buf: Pointer to the suspend buffer structure ++ * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( \ ++ kbdev, \ ++ kcpu_queue, \ ++ group_suspend_buf, \ ++ gpu_cmdq_grp_handle \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ group_suspend_buf, \ ++ gpu_cmdq_grp_handle \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( \ ++ kbdev, \ ++ kcpu_queue, \ ++ group_suspend_buf, \ ++ gpu_cmdq_grp_handle \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ if (mali_group_is_virtual(group)) { -+ struct mali_group *child; -+ struct mali_group *temp; ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START - KCPU Queue starts a Signal on Fence ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, -+ struct mali_group, group_list) { -+ if (MALI_TRUE != child->power_is_on) { -+ return MALI_FALSE; -+ } ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END - KCPU Queue ends a Signal on Fence ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ execute_error \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ child->state = MALI_GROUP_STATE_ACTIVE; -+ } ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START - KCPU Queue starts a Wait on Fence ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ mali_group_reset(group); -+ } ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END - KCPU Queue ends a Wait on Fence ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ execute_error \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ /* Go to ACTIVE state */ -+ group->state = MALI_GROUP_STATE_ACTIVE; ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START - KCPU Queue starts a Wait on Cross Queue Sync Object ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ return MALI_TRUE; -+} ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END - KCPU Queue ends a Wait on Cross Queue Sync Object ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ execute_error \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+mali_bool mali_group_deactivate(struct mali_group *group) -+{ -+ struct mali_pm_domain *domains[MALI_MAX_NUM_DOMAIN_REFS]; -+ u32 num_domains = 0; -+ mali_bool power_down = MALI_FALSE; ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET - KCPU Queue executes a Set on Cross Queue Sync Object ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ execute_error \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ MALI_DEBUG_ASSERT(MALI_GROUP_STATE_INACTIVE != group->state); ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START - KCPU Queue starts a Wait Operation on Cross Queue Sync Object ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ MALI_DEBUG_PRINT(3, ("Group: Deactivating group %s\n", -+ mali_group_core_description(group))); ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END - KCPU Queue ends a Wait Operation on Cross Queue Sync Object ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ execute_error \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ group->state = MALI_GROUP_STATE_INACTIVE; -+ -+ MALI_DEBUG_ASSERT_POINTER(group->pm_domain); -+ domains[num_domains] = group->pm_domain; -+ num_domains++; ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION - KCPU Queue executes a Set Operation on Cross Queue Sync Object ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ execute_error \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ if (mali_group_is_virtual(group)) { -+ /* Release refs for all child groups */ -+ struct mali_group *child; -+ struct mali_group *temp; ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START - KCPU Queue starts a Map Import ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ _MALI_OSK_LIST_FOREACHENTRY(child, temp, -+ &group->group_list, -+ struct mali_group, group_list) { -+ child->state = MALI_GROUP_STATE_INACTIVE; ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END - KCPU Queue ends a Map Import ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ execute_error \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ MALI_DEBUG_ASSERT_POINTER(child->pm_domain); -+ domains[num_domains] = child->pm_domain; -+ num_domains++; ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START - KCPU Queue starts an Unmap Import ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ /* Release L2 cache domain for child groups */ -+ MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS > -+ num_domains); -+ domains[num_domains] = mali_l2_cache_get_pm_domain( -+ child->l2_cache_core[0]); -+ MALI_DEBUG_ASSERT(NULL == child->l2_cache_core[1]); -+ num_domains++; -+ } ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END - KCPU Queue ends an Unmap Import ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ execute_error \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ /* -+ * Must do mali_group_power_down() steps right here for -+ * virtual group, because virtual group itself is likely to -+ * stay powered on, however child groups are now very likely -+ * to be powered off (and thus lose their state). -+ */ ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START - KCPU Queue starts an Unmap Import ignoring reference count ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ mali_group_clear_session(group); -+ /* -+ * Disable the broadcast unit (clear it's mask). -+ * This is needed in case the GPU isn't actually -+ * powered down at this point and groups are -+ * removed from an inactive virtual group. -+ * If not, then the broadcast unit will intercept -+ * their interrupts! -+ */ -+ mali_bcast_disable(group->bcast_core); -+ } else { -+ /* Release L2 cache domain for physical groups */ -+ MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS > -+ num_domains); -+ domains[num_domains] = mali_l2_cache_get_pm_domain( -+ group->l2_cache_core[0]); -+ MALI_DEBUG_ASSERT(NULL == group->l2_cache_core[1]); -+ num_domains++; -+ } ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END - KCPU Queue ends an Unmap Import ignoring reference count ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ execute_error \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ power_down = mali_pm_put_domain_refs(domains, num_domains); ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START - KCPU Queue starts an array of JIT Allocs ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ return power_down; -+} ++/** ++ * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - Begin array of KCPU Queue ends an array of JIT Allocs ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+void mali_group_power_up(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++/** ++ * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - Array item of KCPU Queue ends an array of JIT Allocs ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero ++ * @jit_alloc_gpu_alloc_addr: The JIT allocated GPU virtual address ++ * @jit_alloc_mmu_flags: The MMU flags for the JIT allocation ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error, \ ++ jit_alloc_gpu_alloc_addr, \ ++ jit_alloc_mmu_flags \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ execute_error, \ ++ jit_alloc_gpu_alloc_addr, \ ++ jit_alloc_mmu_flags \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error, \ ++ jit_alloc_gpu_alloc_addr, \ ++ jit_alloc_mmu_flags \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ MALI_DEBUG_PRINT(3, ("Group: Power up for %s\n", -+ mali_group_core_description(group))); ++/** ++ * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - End array of KCPU Queue ends an array of JIT Allocs ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ group->power_is_on = MALI_TRUE; ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START - KCPU Queue starts an array of JIT Frees ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_free_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ if (MALI_FALSE == mali_group_is_virtual(group) -+ && MALI_FALSE == mali_group_is_in_virtual(group)) { -+ mali_group_reset(group); -+ } ++/** ++ * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END - Begin array of KCPU Queue ends an array of JIT Frees ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ /* -+ * When we just acquire only one physical group form virt group, -+ * we should remove the bcast&dlbu mask from virt group and -+ * reset bcast and dlbu core, although part of pp cores in virt -+ * group maybe not be powered on. -+ */ -+ if (MALI_TRUE == mali_group_is_virtual(group)) { -+ mali_bcast_reset(group->bcast_core); -+ mali_dlbu_update_mask(group->dlbu_core); -+ } -+} ++/** ++ * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END - Array item of KCPU Queue ends an array of JIT Frees ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero ++ * @jit_free_pages_used: The actual number of pages used by the JIT allocation ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error, \ ++ jit_free_pages_used \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ execute_error, \ ++ jit_free_pages_used \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error, \ ++ jit_free_pages_used \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+void mali_group_power_down(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++/** ++ * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END - End array of KCPU Queue ends an array of JIT Frees ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ MALI_DEBUG_PRINT(3, ("Group: Power down for %s\n", -+ mali_group_core_description(group))); ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER - KCPU Queue executes an Error Barrier ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ group->power_is_on = MALI_FALSE; ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START - KCPU Queue starts a group suspend ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ if (mali_group_is_virtual(group)) { -+ /* -+ * What we do for physical jobs in this function should -+ * already have been done in mali_group_deactivate() -+ * for virtual group. -+ */ -+ MALI_DEBUG_ASSERT(NULL == group->session); -+ } else { -+ mali_group_clear_session(group); -+ } -+} ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END - KCPU Queue ends a group suspend ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, \ ++ execute_error \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+MALI_DEBUG_CODE(static void mali_group_print_virtual(struct mali_group *vgroup) -+{ -+ u32 i; -+ struct mali_group *group; -+ struct mali_group *temp; ++/** ++ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING - CSF FW is being reloaded ++ * ++ * @kbdev: Kbase device ++ * @csffw_cycle: Cycle number of a CSFFW event ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING( \ ++ kbdev, \ ++ csffw_cycle \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_csffw_fw_reloading( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ csffw_cycle \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING( \ ++ kbdev, \ ++ csffw_cycle \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ MALI_DEBUG_PRINT(4, ("Virtual group %s (%p)\n", -+ mali_group_core_description(vgroup), -+ vgroup)); -+ MALI_DEBUG_PRINT(4, ("l2_cache_core[0] = %p, ref = %d\n", vgroup->l2_cache_core[0], vgroup->l2_cache_core_ref_count[0])); -+ MALI_DEBUG_PRINT(4, ("l2_cache_core[1] = %p, ref = %d\n", vgroup->l2_cache_core[1], vgroup->l2_cache_core_ref_count[1])); ++/** ++ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING - CSF FW is being enabled ++ * ++ * @kbdev: Kbase device ++ * @csffw_cycle: Cycle number of a CSFFW event ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING( \ ++ kbdev, \ ++ csffw_cycle \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_csffw_fw_enabling( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ csffw_cycle \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING( \ ++ kbdev, \ ++ csffw_cycle \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ i = 0; -+ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &vgroup->group_list, struct mali_group, group_list) { -+ MALI_DEBUG_PRINT(4, ("[%d] %s (%p), l2_cache_core[0] = %p\n", -+ i, mali_group_core_description(group), -+ group, group->l2_cache_core[0])); -+ i++; -+ } -+}) ++/** ++ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP - CSF FW sleep is requested ++ * ++ * @kbdev: Kbase device ++ * @csffw_cycle: Cycle number of a CSFFW event ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP( \ ++ kbdev, \ ++ csffw_cycle \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_csffw_fw_request_sleep( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ csffw_cycle \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP( \ ++ kbdev, \ ++ csffw_cycle \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+static void mali_group_dump_core_status(struct mali_group *group) -+{ -+ u32 i; ++/** ++ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP - CSF FW wake up is requested ++ * ++ * @kbdev: Kbase device ++ * @csffw_cycle: Cycle number of a CSFFW event ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP( \ ++ kbdev, \ ++ csffw_cycle \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_csffw_fw_request_wakeup( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ csffw_cycle \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP( \ ++ kbdev, \ ++ csffw_cycle \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT(NULL != group->gp_core || (NULL != group->pp_core && !mali_group_is_virtual(group))); ++/** ++ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT - CSF FW halt is requested ++ * ++ * @kbdev: Kbase device ++ * @csffw_cycle: Cycle number of a CSFFW event ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT( \ ++ kbdev, \ ++ csffw_cycle \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_csffw_fw_request_halt( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ csffw_cycle \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT( \ ++ kbdev, \ ++ csffw_cycle \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ if (NULL != group->gp_core) { -+ MALI_PRINT(("Dump Group %s\n", group->gp_core->hw_core.description)); ++/** ++ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING - CSF FW is being disabled ++ * ++ * @kbdev: Kbase device ++ * @csffw_cycle: Cycle number of a CSFFW event ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING( \ ++ kbdev, \ ++ csffw_cycle \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_csffw_fw_disabling( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ csffw_cycle \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING( \ ++ kbdev, \ ++ csffw_cycle \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ for (i = 0; i < 0xA8; i += 0x10) { -+ MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->gp_core->hw_core, i), -+ mali_hw_core_register_read(&group->gp_core->hw_core, i + 4), -+ mali_hw_core_register_read(&group->gp_core->hw_core, i + 8), -+ mali_hw_core_register_read(&group->gp_core->hw_core, i + 12))); -+ } ++/** ++ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF - CSF FW is off ++ * ++ * @kbdev: Kbase device ++ * @csffw_cycle: Cycle number of a CSFFW event ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF( \ ++ kbdev, \ ++ csffw_cycle \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_csffw_fw_off( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ csffw_cycle \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF( \ ++ kbdev, \ ++ csffw_cycle \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + ++/** ++ * KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW - An overflow has happened with the CSFFW Timeline stream ++ * ++ * @kbdev: Kbase device ++ * @csffw_timestamp: Timestamp of a CSFFW event ++ * @csffw_cycle: Cycle number of a CSFFW event ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \ ++ kbdev, \ ++ csffw_timestamp, \ ++ csffw_cycle \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ csffw_timestamp, \ ++ csffw_cycle \ ++ ); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \ ++ kbdev, \ ++ csffw_timestamp, \ ++ csffw_cycle \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + -+ } else { -+ MALI_PRINT(("Dump Group %s\n", group->pp_core->hw_core.description)); ++/** ++ * KBASE_TLSTREAM_AUX_PM_STATE - PM state ++ * ++ * @kbdev: Kbase device ++ * @core_type: Core type (shader, tiler, l2 cache, l3 cache) ++ * @core_state_bitset: 64bits bitmask reporting power state of the cores (1-ON, 0-OFF) ++ */ ++#define KBASE_TLSTREAM_AUX_PM_STATE( \ ++ kbdev, \ ++ core_type, \ ++ core_state_bitset \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_aux_pm_state( \ ++ __TL_DISPATCH_STREAM(kbdev, aux), \ ++ core_type, \ ++ core_state_bitset \ ++ ); \ ++ } while (0) + -+ for (i = 0; i < 0x5c; i += 0x10) { -+ MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->pp_core->hw_core, i), -+ mali_hw_core_register_read(&group->pp_core->hw_core, i + 4), -+ mali_hw_core_register_read(&group->pp_core->hw_core, i + 8), -+ mali_hw_core_register_read(&group->pp_core->hw_core, i + 12))); -+ } ++/** ++ * KBASE_TLSTREAM_AUX_PAGEFAULT - Page fault ++ * ++ * @kbdev: Kbase device ++ * @ctx_nr: Kernel context number ++ * @as_nr: Address space number ++ * @page_cnt_change: Number of pages to be added ++ */ ++#define KBASE_TLSTREAM_AUX_PAGEFAULT( \ ++ kbdev, \ ++ ctx_nr, \ ++ as_nr, \ ++ page_cnt_change \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_aux_pagefault( \ ++ __TL_DISPATCH_STREAM(kbdev, aux), \ ++ ctx_nr, \ ++ as_nr, \ ++ page_cnt_change \ ++ ); \ ++ } while (0) + -+ /* Ignore some minor registers */ -+ for (i = 0x1000; i < 0x1068; i += 0x10) { -+ MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->pp_core->hw_core, i), -+ mali_hw_core_register_read(&group->pp_core->hw_core, i + 4), -+ mali_hw_core_register_read(&group->pp_core->hw_core, i + 8), -+ mali_hw_core_register_read(&group->pp_core->hw_core, i + 12))); -+ } -+ } ++/** ++ * KBASE_TLSTREAM_AUX_PAGESALLOC - Total alloc pages change ++ * ++ * @kbdev: Kbase device ++ * @ctx_nr: Kernel context number ++ * @page_cnt: Number of pages used by the context ++ */ ++#define KBASE_TLSTREAM_AUX_PAGESALLOC( \ ++ kbdev, \ ++ ctx_nr, \ ++ page_cnt \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_aux_pagesalloc( \ ++ __TL_DISPATCH_STREAM(kbdev, aux), \ ++ ctx_nr, \ ++ page_cnt \ ++ ); \ ++ } while (0) + -+ MALI_PRINT(("Dump Group MMU\n")); -+ for (i = 0; i < 0x24; i += 0x10) { -+ MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->mmu->hw_core, i), -+ mali_hw_core_register_read(&group->mmu->hw_core, i + 4), -+ mali_hw_core_register_read(&group->mmu->hw_core, i + 8), -+ mali_hw_core_register_read(&group->mmu->hw_core, i + 12))); -+ } -+} ++/** ++ * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET - New device frequency target ++ * ++ * @kbdev: Kbase device ++ * @target_freq: New target frequency ++ */ ++#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET( \ ++ kbdev, \ ++ target_freq \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_aux_devfreq_target( \ ++ __TL_DISPATCH_STREAM(kbdev, aux), \ ++ target_freq \ ++ ); \ ++ } while (0) + ++/** ++ * KBASE_TLSTREAM_AUX_JIT_STATS - per-bin JIT statistics ++ * ++ * @kbdev: Kbase device ++ * @ctx_nr: Kernel context number ++ * @bid: JIT bin id ++ * @max_allocs: Maximum allocations allowed in this bin. ++ * @allocs: Number of active allocations in this bin ++ * @va_pages: Number of virtual pages allocated in this bin ++ * @ph_pages: Number of physical pages allocated in this bin ++ */ ++#define KBASE_TLSTREAM_AUX_JIT_STATS( \ ++ kbdev, \ ++ ctx_nr, \ ++ bid, \ ++ max_allocs, \ ++ allocs, \ ++ va_pages, \ ++ ph_pages \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_aux_jit_stats( \ ++ __TL_DISPATCH_STREAM(kbdev, aux), \ ++ ctx_nr, \ ++ bid, \ ++ max_allocs, \ ++ allocs, \ ++ va_pages, \ ++ ph_pages \ ++ ); \ ++ } while (0) + +/** -+ * @Dump group status ++ * KBASE_TLSTREAM_AUX_TILER_HEAP_STATS - Tiler Heap statistics ++ * ++ * @kbdev: Kbase device ++ * @ctx_nr: Kernel context number ++ * @heap_id: Unique id used to represent a heap under a context ++ * @va_pages: Number of virtual pages allocated in this bin ++ * @ph_pages: Number of physical pages allocated in this bin ++ * @max_chunks: The maximum number of chunks that the heap should be allowed to use ++ * @chunk_size: Size of each chunk in tiler heap, in bytes ++ * @chunk_count: The number of chunks currently allocated in the tiler heap ++ * @target_in_flight: Number of render-passes that the driver should attempt to keep in flight for which allocation of new chunks is allowed ++ * @nr_in_flight: Number of render-passes that are in flight + */ -+void mali_group_dump_status(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); ++#define KBASE_TLSTREAM_AUX_TILER_HEAP_STATS( \ ++ kbdev, \ ++ ctx_nr, \ ++ heap_id, \ ++ va_pages, \ ++ ph_pages, \ ++ max_chunks, \ ++ chunk_size, \ ++ chunk_count, \ ++ target_in_flight, \ ++ nr_in_flight \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_aux_tiler_heap_stats( \ ++ __TL_DISPATCH_STREAM(kbdev, aux), \ ++ ctx_nr, \ ++ heap_id, \ ++ va_pages, \ ++ ph_pages, \ ++ max_chunks, \ ++ chunk_size, \ ++ chunk_count, \ ++ target_in_flight, \ ++ nr_in_flight \ ++ ); \ ++ } while (0) + -+ if (mali_group_is_virtual(group)) { -+ struct mali_group *group_c; -+ struct mali_group *temp; -+ _MALI_OSK_LIST_FOREACHENTRY(group_c, temp, &group->group_list, struct mali_group, group_list) { -+ mali_group_dump_core_status(group_c); -+ } -+ } else { -+ mali_group_dump_core_status(group); -+ } -+} ++/** ++ * KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT - event on a given job slot ++ * ++ * @kbdev: Kbase device ++ * @ctx: Name of the context object ++ * @slot_nr: Job slot number ++ * @atom_nr: Sequential number of an atom ++ * @event: Event type. One of TL_JS_EVENT values ++ */ ++#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT( \ ++ kbdev, \ ++ ctx, \ ++ slot_nr, \ ++ atom_nr, \ ++ event \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_aux_event_job_slot( \ ++ __TL_DISPATCH_STREAM(kbdev, aux), \ ++ ctx, \ ++ slot_nr, \ ++ atom_nr, \ ++ event \ ++ ); \ ++ } while (0) + +/** -+ * @brief Add child group to virtual group parent ++ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START - enter protected mode start ++ * ++ * @kbdev: Kbase device ++ * @gpu: Name of the GPU object + */ -+void mali_group_add_group(struct mali_group *parent, struct mali_group *child) -+{ -+ mali_bool found; -+ u32 i; ++#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START( \ ++ kbdev, \ ++ gpu \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_aux_protected_enter_start( \ ++ __TL_DISPATCH_STREAM(kbdev, aux), \ ++ gpu \ ++ ); \ ++ } while (0) + -+ MALI_DEBUG_PRINT(3, ("Adding group %s to virtual group %s\n", -+ mali_group_core_description(child), -+ mali_group_core_description(parent))); ++/** ++ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END - enter protected mode end ++ * ++ * @kbdev: Kbase device ++ * @gpu: Name of the GPU object ++ */ ++#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END( \ ++ kbdev, \ ++ gpu \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_aux_protected_enter_end( \ ++ __TL_DISPATCH_STREAM(kbdev, aux), \ ++ gpu \ ++ ); \ ++ } while (0) + -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ MALI_DEBUG_ASSERT(mali_group_is_virtual(parent)); -+ MALI_DEBUG_ASSERT(!mali_group_is_virtual(child)); -+ MALI_DEBUG_ASSERT(NULL == child->parent_group); ++/** ++ * KBASE_TLSTREAM_AUX_MMU_COMMAND - mmu commands with synchronicity info ++ * ++ * @kbdev: Kbase device ++ * @kernel_ctx_id: Unique ID for the KBase Context ++ * @mmu_cmd_id: MMU Command ID (e.g AS_COMMAND_UPDATE) ++ * @mmu_synchronicity: Indicates whether the command is related to current running job that needs to be resolved to make it progress (synchronous, e.g. grow on page fault, JIT) or not (asynchronous, e.g. IOCTL calls from user-space). This param will be 0 if it is an asynchronous operation. ++ * @mmu_lock_addr: start address of regions to be locked/unlocked/invalidated ++ * @mmu_lock_page_num: number of pages to be locked/unlocked/invalidated ++ */ ++#define KBASE_TLSTREAM_AUX_MMU_COMMAND( \ ++ kbdev, \ ++ kernel_ctx_id, \ ++ mmu_cmd_id, \ ++ mmu_synchronicity, \ ++ mmu_lock_addr, \ ++ mmu_lock_page_num \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_aux_mmu_command( \ ++ __TL_DISPATCH_STREAM(kbdev, aux), \ ++ kernel_ctx_id, \ ++ mmu_cmd_id, \ ++ mmu_synchronicity, \ ++ mmu_lock_addr, \ ++ mmu_lock_page_num \ ++ ); \ ++ } while (0) + -+ _mali_osk_list_addtail(&child->group_list, &parent->group_list); ++/** ++ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START - leave protected mode start ++ * ++ * @kbdev: Kbase device ++ * @gpu: Name of the GPU object ++ */ ++#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START( \ ++ kbdev, \ ++ gpu \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ ++ __kbase_tlstream_aux_protected_leave_start( \ ++ __TL_DISPATCH_STREAM(kbdev, aux), \ ++ gpu \ ++ ); \ ++ } while (0) + -+ child->parent_group = parent; ++/** ++ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END - leave protected mode end ++ * ++ * @kbdev: Kbase device ++ * @gpu: Name of the GPU object ++ */ ++#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END( \ ++ kbdev, \ ++ gpu \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ ++ __kbase_tlstream_aux_protected_leave_end( \ ++ __TL_DISPATCH_STREAM(kbdev, aux), \ ++ gpu \ ++ ); \ ++ } while (0) + -+ MALI_DEBUG_ASSERT_POINTER(child->l2_cache_core[0]); ++/* Gator tracepoints are hooked into TLSTREAM interface. ++ * When the following tracepoints are called, corresponding ++ * Gator tracepoint will be called as well. ++ */ + -+ MALI_DEBUG_PRINT(4, ("parent->l2_cache_core: [0] = %p, [1] = %p\n", parent->l2_cache_core[0], parent->l2_cache_core[1])); -+ MALI_DEBUG_PRINT(4, ("child->l2_cache_core: [0] = %p, [1] = %p\n", child->l2_cache_core[0], child->l2_cache_core[1])); ++#if defined(CONFIG_MALI_BIFROST_GATOR_SUPPORT) ++/* `event` is one of TL_JS_EVENT values here. ++ * The values of TL_JS_EVENT are guaranteed to match ++ * with corresponding GATOR_JOB_SLOT values. ++ */ ++#undef KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT ++#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, \ ++ context, slot_nr, atom_nr, event) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ kbase_trace_mali_job_slots_event(kbdev->id, \ ++ GATOR_MAKE_EVENT(event, slot_nr), \ ++ context, (u8) atom_nr); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_aux_event_job_slot( \ ++ __TL_DISPATCH_STREAM(kbdev, aux), \ ++ context, slot_nr, atom_nr, event); \ ++ } while (0) + -+ /* Keep track of the L2 cache cores of child groups */ -+ found = MALI_FALSE; -+ for (i = 0; i < 2; i++) { -+ if (parent->l2_cache_core[i] == child->l2_cache_core[0]) { -+ MALI_DEBUG_ASSERT(parent->l2_cache_core_ref_count[i] > 0); -+ parent->l2_cache_core_ref_count[i]++; -+ found = MALI_TRUE; -+ } -+ } ++#undef KBASE_TLSTREAM_AUX_PM_STATE ++#define KBASE_TLSTREAM_AUX_PM_STATE(kbdev, core_type, state) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ kbase_trace_mali_pm_status(kbdev->id, \ ++ core_type, state); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_aux_pm_state( \ ++ __TL_DISPATCH_STREAM(kbdev, aux), \ ++ core_type, state); \ ++ } while (0) + -+ if (!found) { -+ /* First time we see this L2 cache, add it to our list */ -+ i = (NULL == parent->l2_cache_core[0]) ? 0 : 1; ++#undef KBASE_TLSTREAM_AUX_PAGEFAULT ++#define KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, \ ++ ctx_nr, as_nr, page_cnt_change) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ kbase_trace_mali_page_fault_insert_pages(kbdev->id, \ ++ as_nr, \ ++ page_cnt_change); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_aux_pagefault( \ ++ __TL_DISPATCH_STREAM(kbdev, aux), \ ++ ctx_nr, as_nr, page_cnt_change); \ ++ } while (0) + -+ MALI_DEBUG_PRINT(4, ("First time we see l2_cache %p. Adding to [%d] = %p\n", child->l2_cache_core[0], i, parent->l2_cache_core[i])); ++/* kbase_trace_mali_total_alloc_pages_change is handled differently here. ++ * We stream the total amount of pages allocated for `kbdev` rather ++ * than `page_count`, which is per-context. ++ */ ++#undef KBASE_TLSTREAM_AUX_PAGESALLOC ++#define KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, ctx_nr, page_cnt) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ u32 global_pages_count = \ ++ atomic_read(&kbdev->memdev.used_pages); \ ++ \ ++ kbase_trace_mali_total_alloc_pages_change(kbdev->id, \ ++ global_pages_count); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_aux_pagesalloc( \ ++ __TL_DISPATCH_STREAM(kbdev, aux), \ ++ ctx_nr, page_cnt); \ ++ } while (0) ++#endif /* CONFIG_MALI_BIFROST_GATOR_SUPPORT */ + -+ MALI_DEBUG_ASSERT(NULL == parent->l2_cache_core[i]); ++/* clang-format on */ ++#endif +diff --git a/drivers/gpu/arm/mali400/.gitignore b/drivers/gpu/arm/mali400/.gitignore +new file mode 100755 +index 000000000..d91c8078a +--- /dev/null ++++ b/drivers/gpu/arm/mali400/.gitignore +@@ -0,0 +1 @@ ++./mali/__malidrv_build_info.c +diff --git a/drivers/gpu/arm/mali400/Kbuild b/drivers/gpu/arm/mali400/Kbuild +new file mode 100755 +index 000000000..dbb7ad3e5 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/Kbuild +@@ -0,0 +1,2 @@ ++# SPDX-License-Identifier: GPL-2.0 ++obj-y += mali/ +diff --git a/drivers/gpu/arm/mali400/mali/.gitignore b/drivers/gpu/arm/mali400/mali/.gitignore +new file mode 100755 +index 000000000..6b1a3ed27 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/.gitignore +@@ -0,0 +1 @@ ++__malidrv_build_info.c +diff --git a/drivers/gpu/arm/mali400/mali/Kbuild b/drivers/gpu/arm/mali400/mali/Kbuild +new file mode 100755 +index 000000000..7390ab758 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/Kbuild +@@ -0,0 +1,254 @@ ++# ++# Copyright (C) 2010-2011 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained from Free Software ++# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++# + -+ parent->l2_cache_core[i] = child->l2_cache_core[0]; -+ parent->l2_cache_core_ref_count[i]++; -+ } ++# This file is called by the Linux build system. + -+ /* Update Broadcast Unit and DLBU */ -+ mali_bcast_add_group(parent->bcast_core, child); -+ mali_dlbu_add_group(parent->dlbu_core, child); ++# make $(src) as absolute path if it isn't already, by prefixing $(srctree) ++src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) + -+ if (MALI_TRUE == parent->power_is_on) { -+ mali_bcast_reset(parent->bcast_core); -+ mali_dlbu_update_mask(parent->dlbu_core); -+ } ++# set up defaults if not defined by the user ++TIMESTAMP ?= default ++OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB ?= 16 ++USING_GPU_UTILIZATION ?= 1 ++PROFILING_SKIP_PP_JOBS ?= 0 ++PROFILING_SKIP_PP_AND_GP_JOBS ?= 0 ++MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP ?= 0 ++MALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED ?= 0 ++MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS ?= 0 ++MALI_UPPER_HALF_SCHEDULING ?= 1 ++MALI_ENABLE_CPU_CYCLES ?= 0 ++MALI_PLATFORM ?= rk + -+ if (MALI_TRUE == child->power_is_on) { -+ if (NULL == parent->session) { -+ if (NULL != child->session) { -+ /* -+ * Parent has no session, so clear -+ * child session as well. -+ */ -+ mali_mmu_activate_empty_page_directory(child->mmu); -+ } -+ } else { -+ if (parent->session == child->session) { -+ /* We already have same session as parent, -+ * so a simple zap should be enough. -+ */ -+ mali_mmu_zap_tlb(child->mmu); -+ } else { -+ /* -+ * Parent has a different session, so we must -+ * switch to that sessions page table -+ */ -+ mali_mmu_activate_page_directory(child->mmu, mali_session_get_page_directory(parent->session)); -+ } ++# For customer releases the Linux Device Drivers will be provided as ARM proprietary and GPL releases: ++# The ARM proprietary product will only include the license/proprietary directory ++# The GPL product will only include the license/gpl directory ++ccflags-y += -I$(src)/linux/license/gpl + -+ /* It is the parent which keeps the session from now on */ -+ child->session = NULL; -+ } -+ } else { -+ /* should have been cleared when child was powered down */ -+ MALI_DEBUG_ASSERT(NULL == child->session); -+ } + -+ /* Start job on child when parent is active */ -+ if (NULL != parent->pp_running_job) { -+ struct mali_pp_job *job = parent->pp_running_job; ++ifeq ($(USING_GPU_UTILIZATION), 1) ++ ifeq ($(USING_DVFS), 1) ++ $(error USING_GPU_UTILIZATION conflict with USING_DVFS you can read the Integration Guide to choose which one do you need) ++ endif ++endif + -+ MALI_DEBUG_PRINT(3, ("Group %x joining running job %d on virtual group %x\n", -+ child, mali_pp_job_get_id(job), parent)); ++ifneq ($(MALI_PLATFORM),) ++ EXTRA_DEFINES += -DMALI_FAKE_PLATFORM_DEVICE=1 ++ #MALI_PLATFORM_FILES = $(wildcard platform/$(MALI_PLATFORM)/*.c) ++ mali-y += \ ++ platform/$(MALI_PLATFORM)/rk.o ++endif + -+ /* Only allowed to add active child to an active parent */ -+ MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE == parent->state); -+ MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE == child->state); ++ifeq ($(MALI_PLATFORM_FILES),) ++ifeq ($(CONFIG_ARCH_EXYNOS4),y) ++EXTRA_DEFINES += -DMALI_FAKE_PLATFORM_DEVICE=1 ++export MALI_PLATFORM=exynos4 ++export MALI_PLATFORM_FILES_BUILDIN = $(notdir $(wildcard $(src)/platform/$(MALI_PLATFORM)/*.c)) ++export MALI_PLATFORM_FILES_ADD_PREFIX = $(addprefix platform/$(MALI_PLATFORM)/,$(MALI_PLATFORM_FILES_BUILDIN)) ++endif ++endif + -+ mali_pp_job_start(child->pp_core, job, mali_pp_core_get_id(child->pp_core), MALI_TRUE); ++mali-y += \ ++ linux/mali_osk_atomics.o \ ++ linux/mali_osk_irq.o \ ++ linux/mali_osk_wq.o \ ++ linux/mali_osk_locks.o \ ++ linux/mali_osk_wait_queue.o \ ++ linux/mali_osk_low_level_mem.o \ ++ linux/mali_osk_math.o \ ++ linux/mali_osk_memory.o \ ++ linux/mali_osk_misc.o \ ++ linux/mali_osk_mali.o \ ++ linux/mali_osk_notification.o \ ++ linux/mali_osk_time.o \ ++ linux/mali_osk_timers.o \ ++ linux/mali_osk_bitmap.o + -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | -+ MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) | -+ MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH, -+ mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0); ++mali-y += linux/mali_memory.o linux/mali_memory_os_alloc.o ++mali-y += linux/mali_memory_external.o ++mali-y += linux/mali_memory_block_alloc.o ++mali-y += linux/mali_memory_swap_alloc.o + -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | -+ MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) | -+ MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL, -+ mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0); -+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS) -+ if (child->pp_core) { -+ trace_gpu_sched_switch( -+ mali_pp_core_description(child->pp_core), -+ sched_clock(), mali_pp_job_get_tid(job), -+ 0, mali_pp_job_get_id(job)); -+ } -+#endif ++mali-y += \ ++ linux/mali_memory_manager.o \ ++ linux/mali_memory_virtual.o \ ++ linux/mali_memory_util.o \ ++ linux/mali_memory_cow.o \ ++ linux/mali_memory_defer_bind.o + -+#if defined(CONFIG_MALI400_PROFILING) -+ trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core), -+ mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job)); -+#endif -+ } ++mali-y += \ ++ linux/mali_ukk_mem.o \ ++ linux/mali_ukk_gp.o \ ++ linux/mali_ukk_pp.o \ ++ linux/mali_ukk_core.o \ ++ linux/mali_ukk_soft_job.o \ ++ linux/mali_ukk_timeline.o + -+ MALI_DEBUG_CODE(mali_group_print_virtual(parent);) -+} ++mali-$(CONFIG_MALI_DEVFREQ) += \ ++ linux/mali_devfreq.o \ ++ common/mali_pm_metrics.o + -+/** -+ * @brief Remove child group from virtual group parent -+ */ -+void mali_group_remove_group(struct mali_group *parent, struct mali_group *child) -+{ -+ u32 i; ++# Source files which always are included in a build ++mali-y += \ ++ common/mali_kernel_core.o \ ++ linux/mali_kernel_linux.o \ ++ common/mali_session.o \ ++ linux/mali_device_pause_resume.o \ ++ common/mali_kernel_vsync.o \ ++ linux/mali_ukk_vsync.o \ ++ linux/mali_kernel_sysfs.o \ ++ common/mali_mmu.o \ ++ common/mali_mmu_page_directory.o \ ++ common/mali_mem_validation.o \ ++ common/mali_hw_core.o \ ++ common/mali_gp.o \ ++ common/mali_pp.o \ ++ common/mali_pp_job.o \ ++ common/mali_gp_job.o \ ++ common/mali_soft_job.o \ ++ common/mali_scheduler.o \ ++ common/mali_executor.o \ ++ common/mali_group.o \ ++ common/mali_dlbu.o \ ++ common/mali_broadcast.o \ ++ common/mali_pm.o \ ++ common/mali_pmu.o \ ++ common/mali_user_settings_db.o \ ++ common/mali_kernel_utilization.o \ ++ common/mali_control_timer.o \ ++ common/mali_l2_cache.o \ ++ common/mali_timeline.o \ ++ common/mali_timeline_fence_wait.o \ ++ common/mali_timeline_sync_fence.o \ ++ common/mali_spinlock_reentrant.o \ ++ common/mali_pm_domain.o \ ++ linux/mali_osk_pm.o \ ++ linux/mali_pmu_power_up_down.o \ ++ __malidrv_build_info.o + -+ MALI_DEBUG_PRINT(3, ("Removing group %s from virtual group %s\n", -+ mali_group_core_description(child), -+ mali_group_core_description(parent))); ++ifneq ($(wildcard $(src)/linux/mali_slp_global_lock.c),) ++ mali-y += linux/mali_slp_global_lock.o ++endif + -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ MALI_DEBUG_ASSERT(mali_group_is_virtual(parent)); -+ MALI_DEBUG_ASSERT(!mali_group_is_virtual(child)); -+ MALI_DEBUG_ASSERT(parent == child->parent_group); ++ifneq ($(MALI_PLATFORM_FILES),) ++ mali-y += $(MALI_PLATFORM_FILES:.c=.o) ++endif + -+ /* Update Broadcast Unit and DLBU */ -+ mali_bcast_remove_group(parent->bcast_core, child); -+ mali_dlbu_remove_group(parent->dlbu_core, child); ++ifneq ($(MALI_PLATFORM_FILES_ADD_PREFIX),) ++ mali-y += $(MALI_PLATFORM_FILES_ADD_PREFIX:.c=.o) ++endif + -+ if (MALI_TRUE == parent->power_is_on) { -+ mali_bcast_reset(parent->bcast_core); -+ mali_dlbu_update_mask(parent->dlbu_core); -+ } ++mali-$(CONFIG_MALI400_PROFILING) += linux/mali_ukk_profiling.o ++mali-$(CONFIG_MALI400_PROFILING) += linux/mali_osk_profiling.o + -+ child->session = parent->session; -+ child->parent_group = NULL; ++mali-$(CONFIG_MALI400_INTERNAL_PROFILING) += linux/mali_profiling_internal.o timestamp-$(TIMESTAMP)/mali_timestamp.o ++ccflags-$(CONFIG_MALI400_INTERNAL_PROFILING) += -I$(src)/timestamp-$(TIMESTAMP) + -+ _mali_osk_list_delinit(&child->group_list); -+ if (_mali_osk_list_empty(&parent->group_list)) { -+ parent->session = NULL; -+ } ++mali-$(CONFIG_DMA_SHARED_BUFFER) += linux/mali_memory_dma_buf.o ++mali-$(CONFIG_DMA_SHARED_BUFFER) += linux/mali_memory_secure.o ++mali-$(CONFIG_SYNC) += linux/mali_sync.o ++mali-$(CONFIG_SYNC) += linux/mali_internal_sync.o ++mali-$(CONFIG_SYNC_FILE) += linux/mali_sync.o ++mali-$(CONFIG_SYNC_FILE) += linux/mali_internal_sync.o ++mali-$(CONFIG_MALI_DMA_BUF_FENCE) += linux/mali_dma_fence.o ++ccflags-$(CONFIG_SYNC) += -Idrivers/staging/android ++ccflags-$(CONFIG_SYNC_FILE) += -Idrivers/staging/android + -+ /* Keep track of the L2 cache cores of child groups */ -+ i = (child->l2_cache_core[0] == parent->l2_cache_core[0]) ? 0 : 1; ++mali-$(CONFIG_MALI400_UMP) += linux/mali_memory_ump.o + -+ MALI_DEBUG_ASSERT(child->l2_cache_core[0] == parent->l2_cache_core[i]); ++mali-$(CONFIG_MALI_DVFS) += common/mali_dvfs_policy.o + -+ parent->l2_cache_core_ref_count[i]--; -+ if (parent->l2_cache_core_ref_count[i] == 0) { -+ parent->l2_cache_core[i] = NULL; -+ } ++# Tell the Linux build system from which .o file to create the kernel module ++obj-$(CONFIG_MALI400) := mali.o + -+ MALI_DEBUG_CODE(mali_group_print_virtual(parent)); -+} ++ccflags-y += $(EXTRA_DEFINES) + -+struct mali_group *mali_group_acquire_group(struct mali_group *parent) -+{ -+ struct mali_group *child = NULL; ++# Set up our defines, which will be passed to gcc ++ccflags-y += -DMALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP=$(MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP) ++ccflags-y += -DMALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED=$(MALI_PP_SCHEDULER_KEEP_SUB_JOB_STARTS_ALIGNED) ++ccflags-y += -DMALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS=$(MALI_PP_SCHEDULER_FORCE_NO_JOB_OVERLAP_BETWEEN_APPS) ++ccflags-y += -DMALI_STATE_TRACKING=1 ++ccflags-y += -DMALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB=$(OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB) ++ccflags-y += -DUSING_GPU_UTILIZATION=$(USING_GPU_UTILIZATION) ++ccflags-y += -DMALI_ENABLE_CPU_CYCLES=$(MALI_ENABLE_CPU_CYCLES) + -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ MALI_DEBUG_ASSERT(mali_group_is_virtual(parent)); ++ifeq ($(MALI_UPPER_HALF_SCHEDULING),1) ++ ccflags-y += -DMALI_UPPER_HALF_SCHEDULING ++endif + -+ if (!_mali_osk_list_empty(&parent->group_list)) { -+ child = _MALI_OSK_LIST_ENTRY(parent->group_list.prev, struct mali_group, group_list); -+ mali_group_remove_group(parent, child); -+ } ++#build-in include path is different ++ifeq ($(MALI_PLATFORM_FILES),) ++ccflags-$(CONFIG_MALI400_UMP) += -I$(src)/../ump/include/ ++else ++ccflags-$(CONFIG_MALI400_UMP) += -I$(src)/../../ump/include/ump ++endif ++ccflags-$(CONFIG_MALI400_DEBUG) += -DDEBUG + -+ if (NULL != child) { -+ if (MALI_GROUP_STATE_ACTIVE != parent->state -+ && MALI_TRUE == child->power_is_on) { -+ mali_group_reset(child); -+ } -+ } ++# Use our defines when compiling ++ccflags-y += -I$(src) -I$(src)/include -I$(src)/common -I$(src)/linux -I$(src)/platform -Wno-date-time + -+ return child; -+} ++# Get subversion revision number, fall back to only ${MALI_RELEASE_NAME} if no svn info is available ++MALI_RELEASE_NAME=$(shell cat $(src)/.version 2> /dev/null) + -+void mali_group_reset(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ MALI_DEBUG_ASSERT(NULL == group->gp_running_job); -+ MALI_DEBUG_ASSERT(NULL == group->pp_running_job); ++SVN_INFO = (cd $(src); svn info 2>/dev/null) + -+ MALI_DEBUG_PRINT(3, ("Group: reset of %s\n", -+ mali_group_core_description(group))); ++ifneq ($(shell $(SVN_INFO) 2>/dev/null),) ++# SVN detected ++SVN_REV := $(shell $(SVN_INFO) | grep '^Revision: '| sed -e 's/^Revision: //' 2>/dev/null) ++DRIVER_REV := $(MALI_RELEASE_NAME)-r$(SVN_REV) ++CHANGE_DATE := $(shell $(SVN_INFO) | grep '^Last Changed Date: ' | cut -d: -f2- | cut -b2-) ++CHANGED_REVISION := $(shell $(SVN_INFO) | grep '^Last Changed Rev: ' | cut -d: -f2- | cut -b2-) ++REPO_URL := $(shell $(SVN_INFO) | grep '^URL: ' | cut -d: -f2- | cut -b2-) + -+ if (NULL != group->dlbu_core) { -+ mali_dlbu_reset(group->dlbu_core); -+ } ++else # SVN ++# GIT_REV := $(shell cd $(src); git describe --always 2>/dev/null) ++ifneq ($(GIT_REV),) ++# Git detected ++DRIVER_REV := $(MALI_RELEASE_NAME)-$(GIT_REV) ++CHANGE_DATE := $(shell cd $(src); git log -1 --format="%ci") ++CHANGED_REVISION := $(GIT_REV) ++REPO_URL := $(shell cd $(src); git describe --all --always 2>/dev/null) + -+ if (NULL != group->bcast_core) { -+ mali_bcast_reset(group->bcast_core); -+ } ++else # Git ++# No Git or SVN detected ++DRIVER_REV := $(MALI_RELEASE_NAME) ++CHANGE_DATE := $(MALI_RELEASE_NAME) ++CHANGED_REVISION := $(MALI_RELEASE_NAME) ++endif ++endif + -+ MALI_DEBUG_ASSERT(NULL != group->mmu); -+ mali_group_reset_mmu(group); ++ccflags-y += -DSVN_REV_STRING=\"$(DRIVER_REV)\" + -+ if (NULL != group->gp_core) { -+ MALI_DEBUG_ASSERT(NULL == group->pp_core); -+ mali_gp_reset(group->gp_core); -+ } else { -+ MALI_DEBUG_ASSERT(NULL != group->pp_core); -+ mali_group_reset_pp(group); -+ } -+} ++VERSION_STRINGS := ++VERSION_STRINGS += API_VERSION=$(shell cd $(src); grep "\#define _MALI_API_VERSION" $(FILES_PREFIX)include/linux/mali/mali_utgard_uk_types.h | cut -d' ' -f 3 ) ++VERSION_STRINGS += REPO_URL=$(REPO_URL) ++VERSION_STRINGS += REVISION=$(DRIVER_REV) ++VERSION_STRINGS += CHANGED_REVISION=$(CHANGED_REVISION) ++VERSION_STRINGS += CHANGE_DATE=$(CHANGE_DATE) ++VERSION_STRINGS += BUILD_DATE=$(shell date) ++ifdef CONFIG_MALI400_DEBUG ++VERSION_STRINGS += BUILD=debug ++else ++VERSION_STRINGS += BUILD=release ++endif ++VERSION_STRINGS += TARGET_PLATFORM=$(TARGET_PLATFORM) ++VERSION_STRINGS += MALI_PLATFORM=$(MALI_PLATFORM) ++VERSION_STRINGS += KDIR=$(KDIR) ++VERSION_STRINGS += OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB=$(OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB) ++VERSION_STRINGS += USING_UMP=$(CONFIG_MALI400_UMP) ++VERSION_STRINGS += USING_PROFILING=$(CONFIG_MALI400_PROFILING) ++VERSION_STRINGS += USING_INTERNAL_PROFILING=$(CONFIG_MALI400_INTERNAL_PROFILING) ++VERSION_STRINGS += USING_GPU_UTILIZATION=$(USING_GPU_UTILIZATION) ++VERSION_STRINGS += USING_DVFS=$(CONFIG_MALI_DVFS) ++VERSION_STRINGS += USING_DMA_BUF_FENCE = $(CONFIG_MALI_DMA_BUF_FENCE) ++VERSION_STRINGS += MALI_UPPER_HALF_SCHEDULING=$(MALI_UPPER_HALF_SCHEDULING) + -+void mali_group_start_gp_job(struct mali_group *group, struct mali_gp_job *job, mali_bool gpu_secure_mode_pre_enabled) -+{ -+ struct mali_session_data *session; ++# Create file with Mali driver configuration ++$(src)/__malidrv_build_info.c: ++ @echo 'const char *__malidrv_build_info(void) { return "malidrv: $(VERSION_STRINGS)";}' > $(src)/__malidrv_build_info.c +diff --git a/drivers/gpu/arm/mali400/mali/Kconfig b/drivers/gpu/arm/mali400/mali/Kconfig +new file mode 100644 +index 000000000..082919d91 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/Kconfig +@@ -0,0 +1,119 @@ ++# SPDX-License-Identifier: GPL-2.0 ++config MALI400 ++ tristate "Mali-300/400/450 support" ++ depends on ARM || ARM64 ++ select DMA_SHARED_BUFFER ++ help ++ This enables support for the ARM Mali-300, Mali-400, and Mali-450 ++ GPUs. + -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ To compile this driver as a module, choose M here: the module will be ++ called mali. + -+ MALI_DEBUG_PRINT(3, ("Group: Starting GP job 0x%08X on group %s\n", -+ job, -+ mali_group_core_description(group))); ++config MALI450 ++ bool "Enable Mali-450 support" ++ depends on MALI400 ++ help ++ This enables support for Mali-450 specific features. + -+ session = mali_gp_job_get_session(job); ++config MALI470 ++ bool "Enable Mali-470 support" ++ depends on MALI400 ++ help ++ This enables support for Mali-470 specific features. + -+ MALI_DEBUG_ASSERT_POINTER(group->l2_cache_core[0]); -+ mali_l2_cache_invalidate_conditional(group->l2_cache_core[0], mali_gp_job_get_cache_order(job)); ++config MALI400_DEBUG ++ bool "Enable debug in Mali driver" ++ depends on MALI400 ++ help ++ This enabled extra debug checks and messages in the Mali driver. + -+ /* Reset GPU and disable gpu secure mode if needed. */ -+ if (MALI_TRUE == _mali_osk_gpu_secure_mode_is_enabled()) { -+ struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core(); -+ _mali_osk_gpu_reset_and_secure_mode_disable(); -+ /* Need to disable the pmu interrupt mask register */ -+ if (NULL != pmu) { -+ mali_pmu_reset(pmu); -+ } -+ } ++config MALI400_PROFILING ++ bool "Enable Mali profiling" ++ depends on MALI400 ++ select TRACEPOINTS ++ default y ++ help ++ This enables gator profiling of Mali GPU events. + -+ /* Reload mmu page table if needed */ -+ if (MALI_TRUE == gpu_secure_mode_pre_enabled) { -+ mali_group_reset(group); -+ mali_group_activate_page_directory(group, session, MALI_TRUE); -+ } else { -+ mali_group_activate_page_directory(group, session, MALI_FALSE); -+ } ++config MALI400_INTERNAL_PROFILING ++ bool "Enable internal Mali profiling API" ++ depends on MALI400_PROFILING ++ default n ++ help ++ This enables the internal legacy Mali profiling API. + -+ mali_gp_job_start(group->gp_core, job); ++config MALI400_UMP ++ bool "Enable UMP support" ++ depends on MALI400 ++ help ++ This enables support for the UMP memory sharing API in the Mali driver. + -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | -+ MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0) | -+ MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH, -+ mali_gp_job_get_frame_builder_id(job), mali_gp_job_get_flush_id(job), 0, 0, 0); -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | -+ MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0), -+ mali_gp_job_get_pid(job), mali_gp_job_get_tid(job), 0, 0, 0); ++config MALI_DVFS ++ bool "Enable Mali dynamically frequency change" ++ depends on MALI400 && !MALI_DEVFREQ ++ default y ++ help ++ This enables support for dynamic change frequency of Mali with the goal of lowering power consumption. + -+#if defined(CONFIG_MALI400_PROFILING) -+ trace_mali_core_active(mali_gp_job_get_pid(job), 1 /* active */, 1 /* GP */, 0 /* core */, -+ mali_gp_job_get_frame_builder_id(job), mali_gp_job_get_flush_id(job)); -+#endif ++config MALI_DMA_BUF_MAP_ON_ATTACH ++ bool "Map dma-buf attachments on attach" ++ depends on MALI400 && DMA_SHARED_BUFFER ++ default y ++ help ++ This makes the Mali driver map dma-buf attachments after doing ++ attach. If this is not set the dma-buf attachments will be mapped for ++ every time the GPU need to access the buffer. + -+#if defined(CONFIG_MALI400_PROFILING) -+ if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) && -+ (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) { -+ mali_group_report_l2_cache_counters_per_core(group, 0); -+ } -+#endif /* #if defined(CONFIG_MALI400_PROFILING) */ ++ Mapping for each access can cause lower performance. + -+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS) -+ if (group->gp_core) { -+ trace_gpu_sched_switch(mali_gp_core_description(group->gp_core), -+ sched_clock(), mali_gp_job_get_tid(job), -+ 0, mali_gp_job_get_id(job)); -+ } -+#endif ++config MALI_SHARED_INTERRUPTS ++ bool "Support for shared interrupts" ++ depends on MALI400 ++ default n ++ help ++ Adds functionality required to properly support shared interrupts. Without this support, ++ the device driver will fail during insmod if it detects shared interrupts. This also ++ works when the GPU is not using shared interrupts, but might have a slight performance ++ impact. + -+ group->gp_running_job = job; -+ group->is_working = MALI_TRUE; ++config MALI_PMU_PARALLEL_POWER_UP ++ bool "Power up Mali PMU domains in parallel" ++ depends on MALI400 ++ default n ++ help ++ This makes the Mali driver power up all PMU power domains in parallel, instead of ++ powering up domains one by one, with a slight delay in between. Powering on all power ++ domains at the same time may cause peak currents higher than what some systems can handle. ++ These systems must not enable this option. + -+ /* Setup SW timer and record start time */ -+ group->start_time = _mali_osk_time_tickcount(); -+ _mali_osk_timer_mod(group->timeout_timer, _mali_osk_time_mstoticks(mali_max_job_runtime)); ++config MALI_DT ++ bool "Using device tree to initialize module" ++ depends on MALI400 && OF ++ default n ++ help ++ This enable the Mali driver to choose the device tree path to get platform resoures ++ and disable the old config method. Mali driver could run on the platform which the ++ device tree is enabled in kernel and corresponding hardware description is implemented ++ properly in device DTS file. + -+ MALI_DEBUG_PRINT(4, ("Group: Started GP job 0x%08X on group %s at %u\n", -+ job, -+ mali_group_core_description(group), -+ group->start_time)); -+} ++config MALI_DEVFREQ ++ bool "Using devfreq to tuning frequency" ++ depends on MALI400 && PM_DEVFREQ ++ select DEVFREQ_GOV_SIMPLE_ONDEMAND ++ default n ++ help ++ Support devfreq for Mali. + -+/* Used to set all the registers except frame renderer list address and fragment shader stack address -+ * It means the caller must set these two registers properly before calling this function -+ */ -+void mali_group_start_pp_job(struct mali_group *group, struct mali_pp_job *job, u32 sub_job, mali_bool gpu_secure_mode_pre_enabled) -+{ -+ struct mali_session_data *session; ++ Using the devfreq framework and, by default, the simpleondemand ++ governor, the frequency of Mali will be dynamically selected from the ++ available OPPs. + -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++config MALI_QUIET ++ bool "Make Mali driver very quiet" ++ depends on MALI400 && !MALI400_DEBUG ++ default n ++ help ++ This forces the Mali driver to never print any messages. + -+ MALI_DEBUG_PRINT(3, ("Group: Starting PP job 0x%08X part %u/%u on group %s\n", -+ job, sub_job + 1, -+ mali_pp_job_get_sub_job_count(job), -+ mali_group_core_description(group))); ++ If unsure, say N. +diff --git a/drivers/gpu/arm/mali400/mali/Makefile b/drivers/gpu/arm/mali400/mali/Makefile +new file mode 100644 +index 000000000..0b91321a5 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/Makefile +@@ -0,0 +1,206 @@ ++# ++# Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained from Free Software ++# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++# + -+ session = mali_pp_job_get_session(job); ++USE_UMPV2=0 ++USING_PROFILING ?= 1 ++USING_INTERNAL_PROFILING ?= 0 ++USING_DVFS ?= 1 ++USING_DMA_BUF_FENCE ?= 0 ++MALI_HEATMAPS_ENABLED ?= 0 ++MALI_DMA_BUF_MAP_ON_ATTACH ?= 1 ++MALI_PMU_PARALLEL_POWER_UP ?= 0 ++USING_DT ?= 0 ++MALI_MEM_SWAP_TRACKING ?= 0 ++USING_DEVFREQ ?= 0 + -+ if (NULL != group->l2_cache_core[0]) { -+ mali_l2_cache_invalidate_conditional(group->l2_cache_core[0], mali_pp_job_get_cache_order(job)); -+ } ++# The Makefile sets up "arch" based on the CONFIG, creates the version info ++# string and the __malidrv_build_info.c file, and then call the Linux build ++# system to actually build the driver. After that point the Kbuild file takes ++# over. + -+ if (NULL != group->l2_cache_core[1]) { -+ mali_l2_cache_invalidate_conditional(group->l2_cache_core[1], mali_pp_job_get_cache_order(job)); -+ } ++# set up defaults if not defined by the user ++ARCH ?= arm + -+ /* Reset GPU and change gpu secure mode if needed. */ -+ if (MALI_TRUE == mali_pp_job_is_protected_job(job) && MALI_FALSE == _mali_osk_gpu_secure_mode_is_enabled()) { -+ struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core(); -+ _mali_osk_gpu_reset_and_secure_mode_enable(); -+ /* Need to disable the pmu interrupt mask register */ -+ if (NULL != pmu) { -+ mali_pmu_reset(pmu); -+ } -+ } else if (MALI_FALSE == mali_pp_job_is_protected_job(job) && MALI_TRUE == _mali_osk_gpu_secure_mode_is_enabled()) { -+ struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core(); -+ _mali_osk_gpu_reset_and_secure_mode_disable(); -+ /* Need to disable the pmu interrupt mask register */ -+ if (NULL != pmu) { -+ mali_pmu_reset(pmu); -+ } -+ } ++OSKOS=linux ++FILES_PREFIX= + -+ /* Reload the mmu page table if needed */ -+ if ((MALI_TRUE == mali_pp_job_is_protected_job(job) && MALI_FALSE == gpu_secure_mode_pre_enabled) -+ || (MALI_FALSE == mali_pp_job_is_protected_job(job) && MALI_TRUE == gpu_secure_mode_pre_enabled)) { -+ mali_group_reset(group); -+ mali_group_activate_page_directory(group, session, MALI_TRUE); -+ } else { -+ mali_group_activate_page_directory(group, session, MALI_FALSE); -+ } ++check_cc2 = \ ++ $(shell if $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; \ ++ then \ ++ echo "$(2)"; \ ++ else \ ++ echo "$(3)"; \ ++ fi ;) + -+ if (mali_group_is_virtual(group)) { -+ struct mali_group *child; -+ struct mali_group *temp; -+ u32 core_num = 0; ++# This conditional makefile exports the global definition ARM_INTERNAL_BUILD. Customer releases will not include arm_internal.mak ++-include ../../../arm_internal.mak + -+ MALI_DEBUG_ASSERT(mali_pp_job_is_virtual(job)); ++# Give warning of old config parameters are used ++ifneq ($(CONFIG),) ++$(warning "You have specified the CONFIG variable which is no longer in used. Use TARGET_PLATFORM instead.") ++endif + -+ /* Configure DLBU for the job */ -+ mali_dlbu_config_job(group->dlbu_core, job); ++ifneq ($(CPU),) ++$(warning "You have specified the CPU variable which is no longer in used. Use TARGET_PLATFORM instead.") ++endif + -+ /* Write stack address for each child group */ -+ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) { -+ mali_pp_write_addr_stack(child->pp_core, job); -+ core_num++; -+ } ++# Include the mapping between TARGET_PLATFORM and KDIR + MALI_PLATFORM ++-include MALI_CONFIGURATION ++export KDIR ?= $(KDIR-$(TARGET_PLATFORM)) ++export MALI_PLATFORM ?= $(MALI_PLATFORM-$(TARGET_PLATFORM)) + -+ mali_pp_job_start(group->pp_core, job, sub_job, MALI_FALSE); -+ } else { -+ mali_pp_job_start(group->pp_core, job, sub_job, MALI_FALSE); -+ } ++ifneq ($(TARGET_PLATFORM),) ++ifeq ($(MALI_PLATFORM),) ++$(error "Invalid TARGET_PLATFORM: $(TARGET_PLATFORM)") ++endif ++endif + -+ /* if the group is virtual, loop through physical groups which belong to this group -+ * and call profiling events for its cores as virtual */ -+ if (MALI_TRUE == mali_group_is_virtual(group)) { -+ struct mali_group *child; -+ struct mali_group *temp; ++# validate lookup result ++ifeq ($(KDIR),) ++$(error No KDIR found for platform $(TARGET_PLATFORM)) ++endif + -+ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) { -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | -+ MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) | -+ MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH, -+ mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0); ++ifeq ($(USING_GPU_UTILIZATION), 1) ++ ifeq ($(USING_DVFS), 1) ++ $(error USING_GPU_UTILIZATION conflict with USING_DVFS you can read the Integration Guide to choose which one do you need) ++ endif ++endif + -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | -+ MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) | -+ MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL, -+ mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0); ++ifeq ($(USING_UMP),1) ++export CONFIG_MALI400_UMP=y ++export EXTRA_DEFINES += -DCONFIG_MALI400_UMP=1 ++ifeq ($(USE_UMPV2),1) ++UMP_SYMVERS_FILE ?= ../umpv2/Module.symvers ++else ++UMP_SYMVERS_FILE ?= ../ump/Module.symvers ++endif ++KBUILD_EXTRA_SYMBOLS = $(realpath $(UMP_SYMVERS_FILE)) ++$(warning $(KBUILD_EXTRA_SYMBOLS)) ++endif + -+#if defined(CONFIG_MALI400_PROFILING) -+ trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core), -+ mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job)); -+#endif -+ } ++# Define host system directory ++KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build + -+#if defined(CONFIG_MALI400_PROFILING) -+ if (0 != group->l2_cache_core_ref_count[0]) { -+ if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) && -+ (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) { -+ mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0])); -+ } -+ } -+ if (0 != group->l2_cache_core_ref_count[1]) { -+ if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[1])) && -+ (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[1]))) { -+ mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[1])); -+ } -+ } -+#endif /* #if defined(CONFIG_MALI400_PROFILING) */ ++include $(KDIR)/.config + -+ } else { /* group is physical - call profiling events for physical cores */ -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | -+ MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) | -+ MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH, -+ mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0); ++ifeq ($(ARCH), arm) ++# when compiling for ARM we're cross compiling ++export CROSS_COMPILE ?= $(call check_cc2, arm-linux-gnueabi-gcc, arm-linux-gnueabi-, arm-none-linux-gnueabi-) ++endif + -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | -+ MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) | -+ MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL, -+ mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0); ++# report detected/selected settings ++ifdef ARM_INTERNAL_BUILD ++$(warning TARGET_PLATFORM $(TARGET_PLATFORM)) ++$(warning KDIR $(KDIR)) ++$(warning MALI_PLATFORM $(MALI_PLATFORM)) ++endif + -+#if defined(CONFIG_MALI400_PROFILING) -+ trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(group->pp_core), -+ mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job)); -+#endif ++# Set up build config ++export CONFIG_MALI400=m ++export CONFIG_MALI450=y ++export CONFIG_MALI470=y + -+#if defined(CONFIG_MALI400_PROFILING) -+ if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) && -+ (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) { -+ mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0])); -+ } -+#endif /* #if defined(CONFIG_MALI400_PROFILING) */ -+ } ++export EXTRA_DEFINES += -DCONFIG_MALI400=1 ++export EXTRA_DEFINES += -DCONFIG_MALI450=1 ++export EXTRA_DEFINES += -DCONFIG_MALI470=1 + -+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS) -+ if (group->pp_core) { -+ trace_gpu_sched_switch(mali_pp_core_description(group->pp_core), -+ sched_clock(), mali_pp_job_get_tid(job), -+ 0, mali_pp_job_get_id(job)); -+ } -+#endif ++ifneq ($(MALI_PLATFORM),) ++export EXTRA_DEFINES += -DMALI_FAKE_PLATFORM_DEVICE=1 ++export MALI_PLATFORM_FILES = $(wildcard platform/$(MALI_PLATFORM)/*.c) ++endif + -+ group->pp_running_job = job; -+ group->pp_running_sub_job = sub_job; -+ group->is_working = MALI_TRUE; ++ifeq ($(USING_PROFILING),1) ++ifeq ($(CONFIG_TRACEPOINTS),) ++$(warning CONFIG_TRACEPOINTS required for profiling) ++else ++export CONFIG_MALI400_PROFILING=y ++export EXTRA_DEFINES += -DCONFIG_MALI400_PROFILING=1 ++ifeq ($(USING_INTERNAL_PROFILING),1) ++export CONFIG_MALI400_INTERNAL_PROFILING=y ++export EXTRA_DEFINES += -DCONFIG_MALI400_INTERNAL_PROFILING=1 ++endif ++ifeq ($(MALI_HEATMAPS_ENABLED),1) ++export MALI_HEATMAPS_ENABLED=y ++export EXTRA_DEFINES += -DCONFIG_MALI400_HEATMAPS_ENABLED ++endif ++endif ++endif + -+ /* Setup SW timer and record start time */ -+ group->start_time = _mali_osk_time_tickcount(); -+ _mali_osk_timer_mod(group->timeout_timer, _mali_osk_time_mstoticks(mali_max_job_runtime)); ++ifeq ($(MALI_DMA_BUF_MAP_ON_ATTACH),1) ++export CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH=y ++export EXTRA_DEFINES += -DCONFIG_MALI_DMA_BUF_MAP_ON_ATTACH ++endif + -+ MALI_DEBUG_PRINT(4, ("Group: Started PP job 0x%08X part %u/%u on group %s at %u\n", -+ job, sub_job + 1, -+ mali_pp_job_get_sub_job_count(job), -+ mali_group_core_description(group), -+ group->start_time)); ++ifeq ($(MALI_SHARED_INTERRUPTS),1) ++export CONFIG_MALI_SHARED_INTERRUPTS=y ++export EXTRA_DEFINES += -DCONFIG_MALI_SHARED_INTERRUPTS ++endif + -+} ++ifeq ($(USING_DVFS),1) ++export CONFIG_MALI_DVFS=y ++export EXTRA_DEFINES += -DCONFIG_MALI_DVFS ++endif + -+void mali_group_resume_gp_with_new_heap(struct mali_group *group, u32 job_id, u32 start_addr, u32 end_addr) -+{ -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ifeq ($(USING_DMA_BUF_FENCE),1) ++export CONFIG_MALI_DMA_BUF_FENCE=y ++export EXTRA_DEFINES += -DCONFIG_MALI_DMA_BUF_FENCE ++endif + -+ MALI_DEBUG_ASSERT_POINTER(group->l2_cache_core[0]); -+ mali_l2_cache_invalidate(group->l2_cache_core[0]); ++ifeq ($(MALI_PMU_PARALLEL_POWER_UP),1) ++export CONFIG_MALI_PMU_PARALLEL_POWER_UP=y ++export EXTRA_DEFINES += -DCONFIG_MALI_PMU_PARALLEL_POWER_UP ++endif + -+ mali_mmu_zap_tlb_without_stall(group->mmu); ++ifdef CONFIG_OF ++ifeq ($(USING_DT),1) ++export CONFIG_MALI_DT=y ++export EXTRA_DEFINES += -DCONFIG_MALI_DT ++endif ++endif + -+ mali_gp_resume_with_new_heap(group->gp_core, start_addr, end_addr); ++ifeq ($(USING_DEVFREQ), 1) ++ifdef CONFIG_PM_DEVFREQ ++export CONFIG_MALI_DEVFREQ=y ++export EXTRA_DEFINES += -DCONFIG_MALI_DEVFREQ=1 ++else ++$(warning "You want to support DEVFREQ but kernel didn't support DEVFREQ.") ++endif ++endif + -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_RESUME | -+ MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0), -+ 0, 0, 0, 0, 0); ++ifneq ($(BUILD),release) ++# Debug ++export CONFIG_MALI400_DEBUG=y ++else ++# Release ++ifeq ($(MALI_QUIET),1) ++export CONFIG_MALI_QUIET=y ++export EXTRA_DEFINES += -DCONFIG_MALI_QUIET ++endif ++endif + -+#if defined(CONFIG_MALI400_PROFILING) -+ trace_mali_core_active(mali_gp_job_get_pid(group->gp_running_job), 1 /* active */, 1 /* GP */, 0 /* core */, -+ mali_gp_job_get_frame_builder_id(group->gp_running_job), mali_gp_job_get_flush_id(group->gp_running_job)); -+#endif -+} ++ifeq ($(MALI_SKIP_JOBS),1) ++EXTRA_DEFINES += -DPROFILING_SKIP_PP_JOBS=1 -DPROFILING_SKIP_GP_JOBS=1 ++endif + -+static void mali_group_reset_mmu(struct mali_group *group) -+{ -+ struct mali_group *child; -+ struct mali_group *temp; -+ _mali_osk_errcode_t err; ++ifeq ($(MALI_MEM_SWAP_TRACKING),1) ++EXTRA_DEFINES += -DMALI_MEM_SWAP_TRACKING=1 ++endif + -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++all: $(UMP_SYMVERS_FILE) ++ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) modules ++ @rm $(FILES_PREFIX)__malidrv_build_info.c $(FILES_PREFIX)__malidrv_build_info.o + -+ if (!mali_group_is_virtual(group)) { -+ /* This is a physical group or an idle virtual group -- simply wait for -+ * the reset to complete. */ -+ err = mali_mmu_reset(group->mmu); -+ MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err); -+ } else { /* virtual group */ -+ /* Loop through all members of this virtual group and wait -+ * until they are done resetting. -+ */ -+ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) { -+ err = mali_mmu_reset(child->mmu); -+ MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err); -+ } -+ } -+} ++clean: ++ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean + -+static void mali_group_reset_pp(struct mali_group *group) ++kernelrelease: ++ $(MAKE) ARCH=$(ARCH) -C $(KDIR) kernelrelease ++ ++export CONFIG KBUILD_EXTRA_SYMBOLS +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_broadcast.c b/drivers/gpu/arm/mali400/mali/common/mali_broadcast.c +new file mode 100644 +index 000000000..79a418c36 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_broadcast.c +@@ -0,0 +1,142 @@ ++/* ++ * Copyright (C) 2012-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ ++ ++#include "mali_broadcast.h" ++#include "mali_kernel_common.h" ++#include "mali_osk.h" ++ ++#define MALI_BROADCAST_REGISTER_SIZE 0x1000 ++#define MALI_BROADCAST_REG_BROADCAST_MASK 0x0 ++#define MALI_BROADCAST_REG_INTERRUPT_MASK 0x4 ++ ++struct mali_bcast_unit { ++ struct mali_hw_core hw_core; ++ u32 current_mask; ++}; ++ ++struct mali_bcast_unit *mali_bcast_unit_create(const _mali_osk_resource_t *resource) +{ -+ struct mali_group *child; -+ struct mali_group *temp; ++ struct mali_bcast_unit *bcast_unit = NULL; + -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ MALI_DEBUG_ASSERT_POINTER(resource); ++ MALI_DEBUG_PRINT(2, ("Broadcast: Creating Mali Broadcast unit: %s\n", ++ resource->description)); + -+ mali_pp_reset_async(group->pp_core); ++ bcast_unit = _mali_osk_malloc(sizeof(struct mali_bcast_unit)); ++ if (NULL == bcast_unit) { ++ MALI_PRINT_ERROR(("Broadcast: Failed to allocate memory for Broadcast unit\n")); ++ return NULL; ++ } + -+ if (!mali_group_is_virtual(group) || NULL == group->pp_running_job) { -+ /* This is a physical group or an idle virtual group -- simply wait for -+ * the reset to complete. */ -+ mali_pp_reset_wait(group->pp_core); ++ if (_MALI_OSK_ERR_OK == mali_hw_core_create(&bcast_unit->hw_core, ++ resource, MALI_BROADCAST_REGISTER_SIZE)) { ++ bcast_unit->current_mask = 0; ++ mali_bcast_reset(bcast_unit); ++ ++ return bcast_unit; + } else { -+ /* Loop through all members of this virtual group and wait until they -+ * are done resetting. -+ */ -+ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) { -+ mali_pp_reset_wait(child->pp_core); -+ } ++ MALI_PRINT_ERROR(("Broadcast: Failed map broadcast unit\n")); + } ++ ++ _mali_osk_free(bcast_unit); ++ ++ return NULL; +} + -+struct mali_pp_job *mali_group_complete_pp(struct mali_group *group, mali_bool success, u32 *sub_job) ++void mali_bcast_unit_delete(struct mali_bcast_unit *bcast_unit) +{ -+ struct mali_pp_job *pp_job_to_return; ++ MALI_DEBUG_ASSERT_POINTER(bcast_unit); ++ mali_hw_core_delete(&bcast_unit->hw_core); ++ _mali_osk_free(bcast_unit); ++} + -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++/* Call this function to add the @group's id into bcast mask ++ * Note: redundant calling this function with same @group ++ * doesn't make any difference as calling it once ++ */ ++void mali_bcast_add_group(struct mali_bcast_unit *bcast_unit, ++ struct mali_group *group) ++{ ++ u32 bcast_id; ++ u32 broadcast_mask; ++ ++ MALI_DEBUG_ASSERT_POINTER(bcast_unit); + MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(group->pp_core); -+ MALI_DEBUG_ASSERT_POINTER(group->pp_running_job); -+ MALI_DEBUG_ASSERT_POINTER(sub_job); -+ MALI_DEBUG_ASSERT(MALI_TRUE == group->is_working); + -+ /* Stop/clear the timeout timer. */ -+ _mali_osk_timer_del_async(group->timeout_timer); ++ bcast_id = mali_pp_core_get_bcast_id(mali_group_get_pp_core(group)); + -+ if (NULL != group->pp_running_job) { ++ broadcast_mask = bcast_unit->current_mask; + -+ /* Deal with HW counters and profiling */ ++ broadcast_mask |= (bcast_id); /* add PP core to broadcast */ ++ broadcast_mask |= (bcast_id << 16); /* add MMU to broadcast */ + -+ if (MALI_TRUE == mali_group_is_virtual(group)) { -+ struct mali_group *child; -+ struct mali_group *temp; ++ /* store mask so we can restore on reset */ ++ bcast_unit->current_mask = broadcast_mask; ++} + -+ /* update performance counters from each physical pp core within this virtual group */ -+ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) { -+ mali_pp_update_performance_counters(group->pp_core, child->pp_core, group->pp_running_job, mali_pp_core_get_id(child->pp_core)); -+ } ++/* Call this function to remove @group's id from bcast mask ++ * Note: redundant calling this function with same @group ++ * doesn't make any difference as calling it once ++ */ ++void mali_bcast_remove_group(struct mali_bcast_unit *bcast_unit, ++ struct mali_group *group) ++{ ++ u32 bcast_id; ++ u32 broadcast_mask; + -+#if defined(CONFIG_MALI400_PROFILING) -+ /* send profiling data per physical core */ -+ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) { -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | -+ MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) | -+ MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL, -+ mali_pp_job_get_perf_counter_value0(group->pp_running_job, mali_pp_core_get_id(child->pp_core)), -+ mali_pp_job_get_perf_counter_value1(group->pp_running_job, mali_pp_core_get_id(child->pp_core)), -+ mali_pp_job_get_perf_counter_src0(group->pp_running_job, group->pp_running_sub_job) | (mali_pp_job_get_perf_counter_src1(group->pp_running_job, group->pp_running_sub_job) << 8), -+ 0, 0); ++ MALI_DEBUG_ASSERT_POINTER(bcast_unit); ++ MALI_DEBUG_ASSERT_POINTER(group); + -+ trace_mali_core_active(mali_pp_job_get_pid(group->pp_running_job), -+ 0 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core), -+ mali_pp_job_get_frame_builder_id(group->pp_running_job), -+ mali_pp_job_get_flush_id(group->pp_running_job)); -+ } -+ if (0 != group->l2_cache_core_ref_count[0]) { -+ if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) && -+ (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) { -+ mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0])); -+ } -+ } -+ if (0 != group->l2_cache_core_ref_count[1]) { -+ if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[1])) && -+ (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[1]))) { -+ mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[1])); -+ } -+ } ++ bcast_id = mali_pp_core_get_bcast_id(mali_group_get_pp_core(group)); + -+#endif -+ } else { -+ /* update performance counters for a physical group's pp core */ -+ mali_pp_update_performance_counters(group->pp_core, group->pp_core, group->pp_running_job, group->pp_running_sub_job); ++ broadcast_mask = bcast_unit->current_mask; + -+#if defined(CONFIG_MALI400_PROFILING) -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | -+ MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) | -+ MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL, -+ mali_pp_job_get_perf_counter_value0(group->pp_running_job, group->pp_running_sub_job), -+ mali_pp_job_get_perf_counter_value1(group->pp_running_job, group->pp_running_sub_job), -+ mali_pp_job_get_perf_counter_src0(group->pp_running_job, group->pp_running_sub_job) | (mali_pp_job_get_perf_counter_src1(group->pp_running_job, group->pp_running_sub_job) << 8), -+ 0, 0); ++ broadcast_mask &= ~((bcast_id << 16) | bcast_id); + -+ trace_mali_core_active(mali_pp_job_get_pid(group->pp_running_job), -+ 0 /* active */, 0 /* PP */, mali_pp_core_get_id(group->pp_core), -+ mali_pp_job_get_frame_builder_id(group->pp_running_job), -+ mali_pp_job_get_flush_id(group->pp_running_job)); ++ /* store mask so we can restore on reset */ ++ bcast_unit->current_mask = broadcast_mask; ++} + -+ if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) && -+ (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) { -+ mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0])); -+ } -+#endif -+ } ++void mali_bcast_reset(struct mali_bcast_unit *bcast_unit) ++{ ++ MALI_DEBUG_ASSERT_POINTER(bcast_unit); + -+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS) -+ if (group->gp_core) { -+ trace_gpu_sched_switch( -+ mali_gp_core_description(group->gp_core), -+ sched_clock(), 0, 0, 0); -+ } -+#endif ++ MALI_DEBUG_PRINT(4, ++ ("Broadcast: setting mask 0x%08X + 0x%08X (reset)\n", ++ bcast_unit->current_mask, ++ bcast_unit->current_mask & 0xFF)); + -+ } ++ /* set broadcast mask */ ++ mali_hw_core_register_write(&bcast_unit->hw_core, ++ MALI_BROADCAST_REG_BROADCAST_MASK, ++ bcast_unit->current_mask); + -+ if (success) { -+ /* Only do soft reset for successful jobs, a full recovery -+ * reset will be done for failed jobs. */ -+ mali_pp_reset_async(group->pp_core); -+ } ++ /* set IRQ override mask */ ++ mali_hw_core_register_write(&bcast_unit->hw_core, ++ MALI_BROADCAST_REG_INTERRUPT_MASK, ++ bcast_unit->current_mask & 0xFF); ++} + -+ pp_job_to_return = group->pp_running_job; -+ group->pp_running_job = NULL; -+ group->is_working = MALI_FALSE; -+ *sub_job = group->pp_running_sub_job; ++void mali_bcast_disable(struct mali_bcast_unit *bcast_unit) ++{ ++ MALI_DEBUG_ASSERT_POINTER(bcast_unit); + -+ if (!success) { -+ MALI_DEBUG_PRINT(2, ("Mali group: Executing recovery reset due to job failure\n")); -+ mali_group_recovery_reset(group); -+ } else if (_MALI_OSK_ERR_OK != mali_pp_reset_wait(group->pp_core)) { -+ MALI_PRINT_ERROR(("Mali group: Executing recovery reset due to reset failure\n")); -+ mali_group_recovery_reset(group); -+ } ++ MALI_DEBUG_PRINT(4, ("Broadcast: setting mask 0x0 + 0x0 (disable)\n")); + -+ return pp_job_to_return; ++ /* set broadcast mask */ ++ mali_hw_core_register_write(&bcast_unit->hw_core, ++ MALI_BROADCAST_REG_BROADCAST_MASK, ++ 0x0); ++ ++ /* set IRQ override mask */ ++ mali_hw_core_register_write(&bcast_unit->hw_core, ++ MALI_BROADCAST_REG_INTERRUPT_MASK, ++ 0x0); +} +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_broadcast.h b/drivers/gpu/arm/mali400/mali/common/mali_broadcast.h +new file mode 100644 +index 000000000..0475b7171 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_broadcast.h +@@ -0,0 +1,57 @@ ++/* ++ * Copyright (C) 2012-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+struct mali_gp_job *mali_group_complete_gp(struct mali_group *group, mali_bool success) -+{ -+ struct mali_gp_job *gp_job_to_return; ++#ifndef __MALI_BROADCAST_H__ ++#define __MALI_BROADCAST_H__ + -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(group->gp_core); -+ MALI_DEBUG_ASSERT_POINTER(group->gp_running_job); -+ MALI_DEBUG_ASSERT(MALI_TRUE == group->is_working); ++/* ++ * Interface for the broadcast unit on Mali-450. ++ * ++ * - Represents up to 8 × (MMU + PP) pairs. ++ * - Supports dynamically changing which (MMU + PP) pairs receive the broadcast by ++ * setting a mask. ++ */ + -+ /* Stop/clear the timeout timer. */ -+ _mali_osk_timer_del_async(group->timeout_timer); ++#include "mali_hw_core.h" ++#include "mali_group.h" + -+ if (NULL != group->gp_running_job) { -+ mali_gp_update_performance_counters(group->gp_core, group->gp_running_job); ++struct mali_bcast_unit; + -+#if defined(CONFIG_MALI400_PROFILING) -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0), -+ mali_gp_job_get_perf_counter_value0(group->gp_running_job), -+ mali_gp_job_get_perf_counter_value1(group->gp_running_job), -+ mali_gp_job_get_perf_counter_src0(group->gp_running_job) | (mali_gp_job_get_perf_counter_src1(group->gp_running_job) << 8), -+ 0, 0); ++struct mali_bcast_unit *mali_bcast_unit_create(const _mali_osk_resource_t *resource); ++void mali_bcast_unit_delete(struct mali_bcast_unit *bcast_unit); + -+ if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) && -+ (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) -+ mali_group_report_l2_cache_counters_per_core(group, 0); -+#endif ++/* Add a group to the list of (MMU + PP) pairs broadcasts go out to. */ ++void mali_bcast_add_group(struct mali_bcast_unit *bcast_unit, struct mali_group *group); + -+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS) -+ if (group->pp_core) { -+ trace_gpu_sched_switch( -+ mali_pp_core_description(group->pp_core), -+ sched_clock(), 0, 0, 0); -+ } -+#endif ++/* Remove a group to the list of (MMU + PP) pairs broadcasts go out to. */ ++void mali_bcast_remove_group(struct mali_bcast_unit *bcast_unit, struct mali_group *group); + -+#if defined(CONFIG_MALI400_PROFILING) -+ trace_mali_core_active(mali_gp_job_get_pid(group->gp_running_job), 0 /* active */, 1 /* GP */, 0 /* core */, -+ mali_gp_job_get_frame_builder_id(group->gp_running_job), mali_gp_job_get_flush_id(group->gp_running_job)); -+#endif ++/* Re-set cached mask. This needs to be called after having been suspended. */ ++void mali_bcast_reset(struct mali_bcast_unit *bcast_unit); + -+ mali_gp_job_set_current_heap_addr(group->gp_running_job, -+ mali_gp_read_plbu_alloc_start_addr(group->gp_core)); -+ } ++/** ++ * Disable broadcast unit ++ * ++ * mali_bcast_enable must be called to re-enable the unit. Cores may not be ++ * added or removed when the unit is disabled. ++ */ ++void mali_bcast_disable(struct mali_bcast_unit *bcast_unit); + -+ if (success) { -+ /* Only do soft reset for successful jobs, a full recovery -+ * reset will be done for failed jobs. */ -+ mali_gp_reset_async(group->gp_core); -+ } ++/** ++ * Re-enable broadcast unit ++ * ++ * This resets the masks to include the cores present when mali_bcast_disable was called. ++ */ ++MALI_STATIC_INLINE void mali_bcast_enable(struct mali_bcast_unit *bcast_unit) ++{ ++ mali_bcast_reset(bcast_unit); ++} + -+ gp_job_to_return = group->gp_running_job; -+ group->gp_running_job = NULL; -+ group->is_working = MALI_FALSE; ++#endif /* __MALI_BROADCAST_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_control_timer.c b/drivers/gpu/arm/mali400/mali/common/mali_control_timer.c +new file mode 100644 +index 000000000..5bed27a8c +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_control_timer.c +@@ -0,0 +1,139 @@ ++/* ++ * Copyright (C) 2010-2012, 2014-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ if (!success) { -+ MALI_DEBUG_PRINT(2, ("Mali group: Executing recovery reset due to job failure\n")); -+ mali_group_recovery_reset(group); -+ } else if (_MALI_OSK_ERR_OK != mali_gp_reset_wait(group->gp_core)) { -+ MALI_PRINT_ERROR(("Mali group: Executing recovery reset due to reset failure\n")); -+ mali_group_recovery_reset(group); -+ } ++#include "mali_kernel_utilization.h" ++#include "mali_osk.h" ++#include "mali_osk_mali.h" ++#include "mali_kernel_common.h" ++#include "mali_session.h" ++#include "mali_dvfs_policy.h" ++#include "mali_control_timer.h" + -+ return gp_job_to_return; -+} ++static u64 period_start_time = 0; + -+struct mali_group *mali_group_get_glob_group(u32 index) -+{ -+ if (mali_global_num_groups > index) { -+ return mali_global_groups[index]; -+ } ++/** .KP : mali_control_timer */ ++static _mali_osk_timer_t *mali_control_timer = NULL; ++static mali_bool timer_running = MALI_FALSE; + -+ return NULL; ++/** ++ * period_of_notifying_mali_utilization_to_platform_dependent_part, ++ * ms 为å•ä½. ++ */ ++static u32 mali_control_timeout = 20; ++ ++void mali_control_timer_add(u32 timeout)/* 'timeout' : 以 ms 为å•ä½. */ ++{ ++ _mali_osk_timer_add(mali_control_timer, _mali_osk_time_mstoticks(timeout)); +} + -+u32 mali_group_get_glob_num_groups(void) ++void mali_control_timer_mod(u32 timeout_in_ms) +{ -+ return mali_global_num_groups; ++ _mali_osk_timer_mod(mali_control_timer, _mali_osk_time_mstoticks(timeout_in_ms)); +} + -+static void mali_group_activate_page_directory(struct mali_group *group, struct mali_session_data *session, mali_bool is_reload) ++static void mali_control_timer_callback(void *arg) +{ -+ MALI_DEBUG_PRINT(5, ("Mali group: Activating page directory 0x%08X from session 0x%08X on group %s\n", -+ mali_session_get_page_directory(session), session, -+ mali_group_core_description(group))); ++ if (mali_utilization_enabled()) { ++ struct mali_gpu_utilization_data *util_data = NULL; ++ u64 time_period = 0; ++ mali_bool need_add_timer = MALI_TRUE; + -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ /* Calculate gpu utilization */ ++ util_data = mali_utilization_calculate(&period_start_time, &time_period, &need_add_timer); + -+ if (group->session != session || MALI_TRUE == is_reload) { -+ /* Different session than last time, so we need to do some work */ -+ MALI_DEBUG_PRINT(5, ("Mali group: Activate session: %08x previous: %08x on group %s\n", -+ session, group->session, -+ mali_group_core_description(group))); -+ mali_mmu_activate_page_directory(group->mmu, mali_session_get_page_directory(session)); -+ group->session = session; -+ } else { -+ /* Same session as last time, so no work required */ -+ MALI_DEBUG_PRINT(4, ("Mali group: Activate existing session 0x%08X on group %s\n", -+ session->page_directory, -+ mali_group_core_description(group))); -+ mali_mmu_zap_tlb_without_stall(group->mmu); ++ if (util_data) { ++#if defined(CONFIG_MALI_DVFS) ++ mali_dvfs_policy_realize(util_data, time_period); ++#else ++ mali_utilization_platform_realize(util_data); ++#endif ++ ++ if (MALI_TRUE == timer_running) ++ if (MALI_TRUE == need_add_timer) { ++ mali_control_timer_mod(mali_control_timeout); ++ } ++ } + } +} + -+static void mali_group_recovery_reset(struct mali_group *group) ++/* Init a timer (for now it is used for GPU utilization and dvfs) */ ++_mali_osk_errcode_t mali_control_timer_init(void) +{ -+ _mali_osk_errcode_t err; ++ _mali_osk_device_data data; + -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) { ++ /* Use device specific settings (if defined) */ ++ if (0 != data.control_interval) { ++ mali_control_timeout = data.control_interval; ++ MALI_DEBUG_PRINT(2, ("Mali GPU Timer: %u\n", mali_control_timeout)); ++ } ++ } + -+ /* Stop cores, bus stop */ -+ if (NULL != group->pp_core) { -+ mali_pp_stop_bus(group->pp_core); -+ } else { -+ mali_gp_stop_bus(group->gp_core); ++ mali_control_timer = _mali_osk_timer_init(mali_control_timer_callback); ++ if (NULL == mali_control_timer) { ++ return _MALI_OSK_ERR_FAULT; + } ++ _mali_osk_timer_setcallback(mali_control_timer, mali_control_timer_callback, NULL); + -+ /* Flush MMU and clear page fault (if any) */ -+ mali_mmu_activate_fault_flush_page_directory(group->mmu); -+ mali_mmu_page_fault_done(group->mmu); ++ return _MALI_OSK_ERR_OK; ++} + -+ /* Wait for cores to stop bus, then do a hard reset on them */ -+ if (NULL != group->pp_core) { -+ if (mali_group_is_virtual(group)) { -+ struct mali_group *child, *temp; ++void mali_control_timer_term(void) ++{ ++ if (NULL != mali_control_timer) { ++ _mali_osk_timer_del(mali_control_timer); ++ timer_running = MALI_FALSE; ++ _mali_osk_timer_term(mali_control_timer); ++ mali_control_timer = NULL; ++ } ++} + -+ /* Disable the broadcast unit while we do reset directly on the member cores. */ -+ mali_bcast_disable(group->bcast_core); ++mali_bool mali_control_timer_resume(u64 time_now) ++{ ++ mali_utilization_data_assert_locked(); + -+ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) { -+ mali_pp_stop_bus_wait(child->pp_core); -+ mali_pp_hard_reset(child->pp_core); -+ } ++ if (timer_running != MALI_TRUE) { ++ timer_running = MALI_TRUE; + -+ mali_bcast_enable(group->bcast_core); -+ } else { -+ mali_pp_stop_bus_wait(group->pp_core); -+ mali_pp_hard_reset(group->pp_core); -+ } -+ } else { -+ mali_gp_stop_bus_wait(group->gp_core); -+ mali_gp_hard_reset(group->gp_core); -+ } ++ period_start_time = time_now; + -+ /* Reset MMU */ -+ err = mali_mmu_reset(group->mmu); -+ MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err); -+ MALI_IGNORE(err); ++ mali_utilization_reset(); + -+ group->session = NULL; ++ return MALI_TRUE; ++ } ++ ++ return MALI_FALSE; +} + -+#if MALI_STATE_TRACKING -+u32 mali_group_dump_state(struct mali_group *group, char *buf, u32 size) ++void mali_control_timer_pause(void) +{ -+ int n = 0; -+ int i; -+ struct mali_group *child; -+ struct mali_group *temp; -+ -+ if (mali_group_is_virtual(group)) { -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "Virtual PP Group: %p\n", group); -+ } else if (mali_group_is_in_virtual(group)) { -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "Child PP Group: %p\n", group); -+ } else if (NULL != group->pp_core) { -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "Physical PP Group: %p\n", group); -+ } else { -+ MALI_DEBUG_ASSERT_POINTER(group->gp_core); -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "GP Group: %p\n", group); ++ mali_utilization_data_assert_locked(); ++ if (timer_running == MALI_TRUE) { ++ timer_running = MALI_FALSE; + } ++} + -+ switch (group->state) { -+ case MALI_GROUP_STATE_INACTIVE: -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "\tstate: INACTIVE\n"); -+ break; -+ case MALI_GROUP_STATE_ACTIVATION_PENDING: -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "\tstate: ACTIVATION_PENDING\n"); -+ break; -+ case MALI_GROUP_STATE_ACTIVE: -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "\tstate: MALI_GROUP_STATE_ACTIVE\n"); -+ break; -+ default: -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "\tstate: UNKNOWN (%d)\n", group->state); -+ MALI_DEBUG_ASSERT(0); -+ break; -+ } ++void mali_control_timer_suspend(mali_bool suspend) ++{ ++ mali_utilization_data_lock(); + -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "\tSW power: %s\n", -+ group->power_is_on ? "On" : "Off"); ++ if (timer_running == MALI_TRUE) { ++ timer_running = MALI_FALSE; + -+ n += mali_pm_dump_state_domain(group->pm_domain, buf + n, size - n); ++ mali_utilization_data_unlock(); + -+ for (i = 0; i < 2; i++) { -+ if (NULL != group->l2_cache_core[i]) { -+ struct mali_pm_domain *domain; -+ domain = mali_l2_cache_get_pm_domain( -+ group->l2_cache_core[i]); -+ n += mali_pm_dump_state_domain(domain, -+ buf + n, size - n); ++ if (suspend == MALI_TRUE) { ++ _mali_osk_timer_del(mali_control_timer); ++ mali_utilization_reset(); + } ++ } else { ++ mali_utilization_data_unlock(); + } ++} +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_control_timer.h b/drivers/gpu/arm/mali400/mali/common/mali_control_timer.h +new file mode 100644 +index 000000000..c9e6e058e +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_control_timer.h +@@ -0,0 +1,30 @@ ++/* ++ * Copyright (C) 2010-2012, 2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ if (group->gp_core) { -+ n += mali_gp_dump_state(group->gp_core, buf + n, size - n); -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "\tGP running job: %p\n", group->gp_running_job); -+ } -+ -+ if (group->pp_core) { -+ n += mali_pp_dump_state(group->pp_core, buf + n, size - n); -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "\tPP running job: %p, subjob %d \n", -+ group->pp_running_job, -+ group->pp_running_sub_job); -+ } ++#ifndef __MALI_CONTROL_TIMER_H__ ++#define __MALI_CONTROL_TIMER_H__ + -+ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, -+ struct mali_group, group_list) { -+ n += mali_group_dump_state(child, buf + n, size - n); -+ } ++#include "mali_osk.h" + -+ return n; -+} -+#endif ++_mali_osk_errcode_t mali_control_timer_init(void); + -+_mali_osk_errcode_t mali_group_upper_half_mmu(void *data) -+{ -+ struct mali_group *group = (struct mali_group *)data; -+ _mali_osk_errcode_t ret; ++void mali_control_timer_term(void); + -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(group->mmu); ++mali_bool mali_control_timer_resume(u64 time_now); + -+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS) -+#if defined(CONFIG_MALI_SHARED_INTERRUPTS) -+ mali_executor_lock(); -+ if (!mali_group_is_working(group)) { -+ /* Not working, so nothing to do */ -+ mali_executor_unlock(); -+ return _MALI_OSK_ERR_FAULT; -+ } -+#endif -+ if (NULL != group->gp_core) { -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, -+ 0, 0, /* No pid and tid for interrupt handler */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0), -+ mali_mmu_get_rawstat(group->mmu), 0); -+ } else { -+ MALI_DEBUG_ASSERT_POINTER(group->pp_core); -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, -+ 0, 0, /* No pid and tid for interrupt handler */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU( -+ mali_pp_core_get_id(group->pp_core)), -+ mali_mmu_get_rawstat(group->mmu), 0); -+ } -+#if defined(CONFIG_MALI_SHARED_INTERRUPTS) -+ mali_executor_unlock(); -+#endif -+#endif ++void mali_control_timer_suspend(mali_bool suspend); ++void mali_control_timer_pause(void); + -+ ret = mali_executor_interrupt_mmu(group, MALI_TRUE); ++void mali_control_timer_add(u32 timeout); + -+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS) -+#if defined(CONFIG_MALI_SHARED_INTERRUPTS) -+ mali_executor_lock(); -+ if (!mali_group_is_working(group) && (!mali_group_power_is_on(group))) { -+ /* group complete and on job shedule on it, it already power off */ -+ if (NULL != group->gp_core) { -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, -+ 0, 0, /* No pid and tid for interrupt handler */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0), -+ 0xFFFFFFFF, 0); -+ } else { -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, -+ 0, 0, /* No pid and tid for interrupt handler */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU( -+ mali_pp_core_get_id(group->pp_core)), -+ 0xFFFFFFFF, 0); -+ } ++void mali_control_timer_mod(u32 timeout_in_ms); + -+ mali_executor_unlock(); -+ return ret; -+ } -+#endif ++#endif /* __MALI_CONTROL_TIMER_H__ */ + -+ if (NULL != group->gp_core) { -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, -+ 0, 0, /* No pid and tid for interrupt handler */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0), -+ mali_mmu_get_rawstat(group->mmu), 0); -+ } else { -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, -+ 0, 0, /* No pid and tid for interrupt handler */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU( -+ mali_pp_core_get_id(group->pp_core)), -+ mali_mmu_get_rawstat(group->mmu), 0); -+ } -+#if defined(CONFIG_MALI_SHARED_INTERRUPTS) -+ mali_executor_unlock(); -+#endif -+#endif +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_dlbu.c b/drivers/gpu/arm/mali400/mali/common/mali_dlbu.c +new file mode 100644 +index 000000000..99b7f3607 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_dlbu.c +@@ -0,0 +1,213 @@ ++/* ++ * Copyright (C) 2012-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ return ret; -+} ++#include "mali_dlbu.h" ++#include "mali_memory.h" ++#include "mali_pp.h" ++#include "mali_group.h" ++#include "mali_osk.h" ++#include "mali_hw_core.h" + -+static void mali_group_bottom_half_mmu(void *data) -+{ -+ struct mali_group *group = (struct mali_group *)data; ++/** ++ * Size of DLBU registers in bytes ++ */ ++#define MALI_DLBU_SIZE 0x400 + -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(group->mmu); ++mali_dma_addr mali_dlbu_phys_addr = 0; ++static mali_io_address mali_dlbu_cpu_addr = NULL; + -+ if (NULL != group->gp_core) { -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF, -+ 0, _mali_osk_get_tid(), /* pid and tid */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0), -+ mali_mmu_get_rawstat(group->mmu), 0); -+ } else { -+ MALI_DEBUG_ASSERT_POINTER(group->pp_core); -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF, -+ 0, _mali_osk_get_tid(), /* pid and tid */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU( -+ mali_pp_core_get_id(group->pp_core)), -+ mali_mmu_get_rawstat(group->mmu), 0); -+ } ++/** ++ * DLBU register numbers ++ * Used in the register read/write routines. ++ * See the hardware documentation for more information about each register ++ */ ++typedef enum mali_dlbu_register { ++ MALI_DLBU_REGISTER_MASTER_TLLIST_PHYS_ADDR = 0x0000, /**< Master tile list physical base address; ++ 31:12 Physical address to the page used for the DLBU ++ 0 DLBU enable - set this bit to 1 enables the AXI bus ++ between PPs and L2s, setting to 0 disables the router and ++ no further transactions are sent to DLBU */ ++ MALI_DLBU_REGISTER_MASTER_TLLIST_VADDR = 0x0004, /**< Master tile list virtual base address; ++ 31:12 Virtual address to the page used for the DLBU */ ++ MALI_DLBU_REGISTER_TLLIST_VBASEADDR = 0x0008, /**< Tile list virtual base address; ++ 31:12 Virtual address to the tile list. This address is used when ++ calculating the call address sent to PP.*/ ++ MALI_DLBU_REGISTER_FB_DIM = 0x000C, /**< Framebuffer dimension; ++ 23:16 Number of tiles in Y direction-1 ++ 7:0 Number of tiles in X direction-1 */ ++ MALI_DLBU_REGISTER_TLLIST_CONF = 0x0010, /**< Tile list configuration; ++ 29:28 select the size of each allocated block: 0=128 bytes, 1=256, 2=512, 3=1024 ++ 21:16 2^n number of tiles to be binned to one tile list in Y direction ++ 5:0 2^n number of tiles to be binned to one tile list in X direction */ ++ MALI_DLBU_REGISTER_START_TILE_POS = 0x0014, /**< Start tile positions; ++ 31:24 start position in Y direction for group 1 ++ 23:16 start position in X direction for group 1 ++ 15:8 start position in Y direction for group 0 ++ 7:0 start position in X direction for group 0 */ ++ MALI_DLBU_REGISTER_PP_ENABLE_MASK = 0x0018, /**< PP enable mask; ++ 7 enable PP7 for load balancing ++ 6 enable PP6 for load balancing ++ 5 enable PP5 for load balancing ++ 4 enable PP4 for load balancing ++ 3 enable PP3 for load balancing ++ 2 enable PP2 for load balancing ++ 1 enable PP1 for load balancing ++ 0 enable PP0 for load balancing */ ++} mali_dlbu_register; + -+ mali_executor_interrupt_mmu(group, MALI_FALSE); ++typedef enum { ++ PP0ENABLE = 0, ++ PP1ENABLE, ++ PP2ENABLE, ++ PP3ENABLE, ++ PP4ENABLE, ++ PP5ENABLE, ++ PP6ENABLE, ++ PP7ENABLE ++} mali_dlbu_pp_enable; + -+ if (NULL != group->gp_core) { -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF, -+ 0, _mali_osk_get_tid(), /* pid and tid */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0), -+ mali_mmu_get_rawstat(group->mmu), 0); -+ } else { -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF, -+ 0, _mali_osk_get_tid(), /* pid and tid */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU( -+ mali_pp_core_get_id(group->pp_core)), -+ mali_mmu_get_rawstat(group->mmu), 0); -+ } -+} ++struct mali_dlbu_core { ++ struct mali_hw_core hw_core; /**< Common for all HW cores */ ++ u32 pp_cores_mask; /**< This is a mask for the PP cores whose operation will be controlled by LBU ++ see MALI_DLBU_REGISTER_PP_ENABLE_MASK register */ ++}; + -+_mali_osk_errcode_t mali_group_upper_half_gp(void *data) ++_mali_osk_errcode_t mali_dlbu_initialize(void) +{ -+ struct mali_group *group = (struct mali_group *)data; -+ _mali_osk_errcode_t ret; -+ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(group->gp_core); -+ MALI_DEBUG_ASSERT_POINTER(group->mmu); ++ MALI_DEBUG_PRINT(2, ("Mali DLBU: Initializing\n")); + -+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS) -+#if defined(CONFIG_MALI_SHARED_INTERRUPTS) -+ mali_executor_lock(); -+ if (!mali_group_is_working(group)) { -+ /* Not working, so nothing to do */ -+ mali_executor_unlock(); -+ return _MALI_OSK_ERR_FAULT; ++ if (_MALI_OSK_ERR_OK == ++ mali_mmu_get_table_page(&mali_dlbu_phys_addr, ++ &mali_dlbu_cpu_addr)) { ++ return _MALI_OSK_ERR_OK; + } -+#endif -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, -+ 0, 0, /* No pid and tid for interrupt handler */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0), -+ mali_gp_get_rawstat(group->gp_core), 0); + -+ MALI_DEBUG_PRINT(4, ("Group: Interrupt 0x%08X from %s\n", -+ mali_gp_get_rawstat(group->gp_core), -+ mali_group_core_description(group))); -+#if defined(CONFIG_MALI_SHARED_INTERRUPTS) -+ mali_executor_unlock(); -+#endif -+#endif -+ ret = mali_executor_interrupt_gp(group, MALI_TRUE); ++ return _MALI_OSK_ERR_FAULT; ++} + -+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS) -+#if defined(CONFIG_MALI_SHARED_INTERRUPTS) -+ mali_executor_lock(); -+ if (!mali_group_is_working(group) && (!mali_group_power_is_on(group))) { -+ /* group complete and on job shedule on it, it already power off */ -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, -+ 0, 0, /* No pid and tid for interrupt handler */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0), -+ 0xFFFFFFFF, 0); -+ mali_executor_unlock(); -+ return ret; ++void mali_dlbu_terminate(void) ++{ ++ MALI_DEBUG_PRINT(3, ("Mali DLBU: terminating\n")); ++ ++ if (0 != mali_dlbu_phys_addr && 0 != mali_dlbu_cpu_addr) { ++ mali_mmu_release_table_page(mali_dlbu_phys_addr, ++ mali_dlbu_cpu_addr); ++ mali_dlbu_phys_addr = 0; ++ mali_dlbu_cpu_addr = 0; + } -+#endif -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, -+ 0, 0, /* No pid and tid for interrupt handler */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0), -+ mali_gp_get_rawstat(group->gp_core), 0); -+#if defined(CONFIG_MALI_SHARED_INTERRUPTS) -+ mali_executor_unlock(); -+#endif -+#endif -+ return ret; +} + -+static void mali_group_bottom_half_gp(void *data) ++struct mali_dlbu_core *mali_dlbu_create(const _mali_osk_resource_t *resource) +{ -+ struct mali_group *group = (struct mali_group *)data; ++ struct mali_dlbu_core *core = NULL; + -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(group->gp_core); -+ MALI_DEBUG_ASSERT_POINTER(group->mmu); ++ MALI_DEBUG_PRINT(2, ("Mali DLBU: Creating Mali dynamic load balancing unit: %s\n", resource->description)); + -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF, -+ 0, _mali_osk_get_tid(), /* pid and tid */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0), -+ mali_gp_get_rawstat(group->gp_core), 0); ++ core = _mali_osk_malloc(sizeof(struct mali_dlbu_core)); ++ if (NULL != core) { ++ if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALI_DLBU_SIZE)) { ++ core->pp_cores_mask = 0; ++ if (_MALI_OSK_ERR_OK == mali_dlbu_reset(core)) { ++ return core; ++ } ++ MALI_PRINT_ERROR(("Failed to reset DLBU %s\n", core->hw_core.description)); ++ mali_hw_core_delete(&core->hw_core); ++ } + -+ mali_executor_interrupt_gp(group, MALI_FALSE); ++ _mali_osk_free(core); ++ } else { ++ MALI_PRINT_ERROR(("Mali DLBU: Failed to allocate memory for DLBU core\n")); ++ } + -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF, -+ 0, _mali_osk_get_tid(), /* pid and tid */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0), -+ mali_gp_get_rawstat(group->gp_core), 0); ++ return NULL; +} + -+_mali_osk_errcode_t mali_group_upper_half_pp(void *data) ++void mali_dlbu_delete(struct mali_dlbu_core *dlbu) +{ -+ struct mali_group *group = (struct mali_group *)data; -+ _mali_osk_errcode_t ret; ++ MALI_DEBUG_ASSERT_POINTER(dlbu); ++ mali_hw_core_delete(&dlbu->hw_core); ++ _mali_osk_free(dlbu); ++} + -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(group->pp_core); -+ MALI_DEBUG_ASSERT_POINTER(group->mmu); ++_mali_osk_errcode_t mali_dlbu_reset(struct mali_dlbu_core *dlbu) ++{ ++ u32 dlbu_registers[7]; ++ _mali_osk_errcode_t err = _MALI_OSK_ERR_FAULT; ++ MALI_DEBUG_ASSERT_POINTER(dlbu); + -+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS) -+#if defined(CONFIG_MALI_SHARED_INTERRUPTS) -+ mali_executor_lock(); -+ if (!mali_group_is_working(group)) { -+ /* Not working, so nothing to do */ -+ mali_executor_unlock(); -+ return _MALI_OSK_ERR_FAULT; -+ } -+#endif ++ MALI_DEBUG_PRINT(4, ("Mali DLBU: mali_dlbu_reset: %s\n", dlbu->hw_core.description)); + -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, -+ 0, 0, /* No pid and tid for interrupt handler */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP( -+ mali_pp_core_get_id(group->pp_core)), -+ mali_pp_get_rawstat(group->pp_core), 0); ++ dlbu_registers[0] = mali_dlbu_phys_addr | 1; /* bit 0 enables the whole core */ ++ dlbu_registers[1] = MALI_DLBU_VIRT_ADDR; ++ dlbu_registers[2] = 0; ++ dlbu_registers[3] = 0; ++ dlbu_registers[4] = 0; ++ dlbu_registers[5] = 0; ++ dlbu_registers[6] = dlbu->pp_cores_mask; + -+ MALI_DEBUG_PRINT(4, ("Group: Interrupt 0x%08X from %s\n", -+ mali_pp_get_rawstat(group->pp_core), -+ mali_group_core_description(group))); -+#if defined(CONFIG_MALI_SHARED_INTERRUPTS) -+ mali_executor_unlock(); -+#endif -+#endif ++ /* write reset values to core registers */ ++ mali_hw_core_register_write_array_relaxed(&dlbu->hw_core, MALI_DLBU_REGISTER_MASTER_TLLIST_PHYS_ADDR, dlbu_registers, 7); + -+ ret = mali_executor_interrupt_pp(group, MALI_TRUE); ++ err = _MALI_OSK_ERR_OK; + -+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS) -+#if defined(CONFIG_MALI_SHARED_INTERRUPTS) -+ mali_executor_lock(); -+ if (!mali_group_is_working(group) && (!mali_group_power_is_on(group))) { -+ /* group complete and on job shedule on it, it already power off */ -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, -+ 0, 0, /* No pid and tid for interrupt handler */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP( -+ mali_pp_core_get_id(group->pp_core)), -+ 0xFFFFFFFF, 0); -+ mali_executor_unlock(); -+ return ret; -+ } -+#endif -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, -+ 0, 0, /* No pid and tid for interrupt handler */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP( -+ mali_pp_core_get_id(group->pp_core)), -+ mali_pp_get_rawstat(group->pp_core), 0); -+#if defined(CONFIG_MALI_SHARED_INTERRUPTS) -+ mali_executor_unlock(); -+#endif -+#endif -+ return ret; ++ return err; +} + -+static void mali_group_bottom_half_pp(void *data) ++void mali_dlbu_update_mask(struct mali_dlbu_core *dlbu) +{ -+ struct mali_group *group = (struct mali_group *)data; -+ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(group->pp_core); -+ MALI_DEBUG_ASSERT_POINTER(group->mmu); -+ -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF, -+ 0, _mali_osk_get_tid(), /* pid and tid */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP( -+ mali_pp_core_get_id(group->pp_core)), -+ mali_pp_get_rawstat(group->pp_core), 0); -+ -+ mali_executor_interrupt_pp(group, MALI_FALSE); ++ MALI_DEBUG_ASSERT_POINTER(dlbu); + -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF, -+ 0, _mali_osk_get_tid(), /* pid and tid */ -+ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP( -+ mali_pp_core_get_id(group->pp_core)), -+ mali_pp_get_rawstat(group->pp_core), 0); ++ mali_hw_core_register_write(&dlbu->hw_core, MALI_DLBU_REGISTER_PP_ENABLE_MASK, dlbu->pp_cores_mask); +} + -+static void mali_group_timeout(void *data) ++void mali_dlbu_add_group(struct mali_dlbu_core *dlbu, struct mali_group *group) +{ -+ struct mali_group *group = (struct mali_group *)data; ++ struct mali_pp_core *pp_core; ++ u32 bcast_id; ++ ++ MALI_DEBUG_ASSERT_POINTER(dlbu); + MALI_DEBUG_ASSERT_POINTER(group); + -+ MALI_DEBUG_PRINT(2, ("Group: timeout handler for %s at %u\n", -+ mali_group_core_description(group), -+ _mali_osk_time_tickcount())); ++ pp_core = mali_group_get_pp_core(group); ++ bcast_id = mali_pp_core_get_bcast_id(pp_core); + -+ if (NULL != group->gp_core) { -+ mali_group_schedule_bottom_half_gp(group); -+ } else { -+ MALI_DEBUG_ASSERT_POINTER(group->pp_core); -+ mali_group_schedule_bottom_half_pp(group); -+ } ++ dlbu->pp_cores_mask |= bcast_id; ++ MALI_DEBUG_PRINT(3, ("Mali DLBU: Adding core[%d] New mask= 0x%02x\n", bcast_id , dlbu->pp_cores_mask)); +} + -+mali_bool mali_group_zap_session(struct mali_group *group, -+ struct mali_session_data *session) ++/* Remove a group from the DLBU */ ++void mali_dlbu_remove_group(struct mali_dlbu_core *dlbu, struct mali_group *group) +{ ++ struct mali_pp_core *pp_core; ++ u32 bcast_id; ++ ++ MALI_DEBUG_ASSERT_POINTER(dlbu); + MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(session); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); + -+ if (group->session != session) { -+ /* not running from this session */ -+ return MALI_TRUE; /* success */ -+ } ++ pp_core = mali_group_get_pp_core(group); ++ bcast_id = mali_pp_core_get_bcast_id(pp_core); + -+ if (group->is_working) { -+ /* The Zap also does the stall and disable_stall */ -+ mali_bool zap_success = mali_mmu_zap_tlb(group->mmu); -+ return zap_success; -+ } else { -+ /* Just remove the session instead of zapping */ -+ mali_group_clear_session(group); -+ return MALI_TRUE; /* success */ -+ } ++ dlbu->pp_cores_mask &= ~bcast_id; ++ MALI_DEBUG_PRINT(3, ("Mali DLBU: Removing core[%d] New mask= 0x%02x\n", bcast_id, dlbu->pp_cores_mask)); +} + -+#if defined(CONFIG_MALI400_PROFILING) -+static void mali_group_report_l2_cache_counters_per_core(struct mali_group *group, u32 core_num) ++/* Configure the DLBU for \a job. This needs to be done before the job is started on the groups in the DLBU. */ ++void mali_dlbu_config_job(struct mali_dlbu_core *dlbu, struct mali_pp_job *job) +{ -+ u32 source0 = 0; -+ u32 value0 = 0; -+ u32 source1 = 0; -+ u32 value1 = 0; -+ u32 profiling_channel = 0; -+ -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ -+ switch (core_num) { -+ case 0: -+ profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE | -+ MALI_PROFILING_EVENT_CHANNEL_GPU | -+ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS; -+ break; -+ case 1: -+ profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE | -+ MALI_PROFILING_EVENT_CHANNEL_GPU | -+ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L21_COUNTERS; -+ break; -+ case 2: -+ profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE | -+ MALI_PROFILING_EVENT_CHANNEL_GPU | -+ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L22_COUNTERS; -+ break; -+ default: -+ profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE | -+ MALI_PROFILING_EVENT_CHANNEL_GPU | -+ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS; -+ break; -+ } ++ u32 *registers; ++ MALI_DEBUG_ASSERT(job); ++ registers = mali_pp_job_get_dlbu_registers(job); ++ MALI_DEBUG_PRINT(4, ("Mali DLBU: Starting job\n")); + -+ if (0 == core_num) { -+ mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1); -+ } -+ if (1 == core_num) { -+ if (1 == mali_l2_cache_get_id(group->l2_cache_core[0])) { -+ mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1); -+ } else if (1 == mali_l2_cache_get_id(group->l2_cache_core[1])) { -+ mali_l2_cache_core_get_counter_values(group->l2_cache_core[1], &source0, &value0, &source1, &value1); -+ } -+ } -+ if (2 == core_num) { -+ if (2 == mali_l2_cache_get_id(group->l2_cache_core[0])) { -+ mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1); -+ } else if (2 == mali_l2_cache_get_id(group->l2_cache_core[1])) { -+ mali_l2_cache_core_get_counter_values(group->l2_cache_core[1], &source0, &value0, &source1, &value1); -+ } -+ } ++ /* Writing 4 registers: ++ * DLBU registers except the first two (written once at DLBU initialisation / reset) and the PP_ENABLE_MASK register */ ++ mali_hw_core_register_write_array_relaxed(&dlbu->hw_core, MALI_DLBU_REGISTER_TLLIST_VBASEADDR, registers, 4); + -+ _mali_osk_profiling_add_event(profiling_channel, source1 << 8 | source0, value0, value1, 0, 0); +} -+#endif /* #if defined(CONFIG_MALI400_PROFILING) */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_group.h b/drivers/gpu/arm/mali400/mali/common/mali_group.h +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_dlbu.h b/drivers/gpu/arm/mali400/mali/common/mali_dlbu.h new file mode 100644 -index 000000000..32481e4a6 +index 000000000..a7ecf4147 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_group.h -@@ -0,0 +1,460 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_dlbu.h +@@ -0,0 +1,45 @@ +/* -+ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2012-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -285751,464 +288645,363 @@ index 000000000..32481e4a6 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_GROUP_H__ -+#define __MALI_GROUP_H__ -+ -+#include "mali_osk.h" -+#include "mali_l2_cache.h" -+#include "mali_mmu.h" -+#include "mali_gp.h" -+#include "mali_pp.h" -+#include "mali_session.h" -+#include "mali_osk_profiling.h" -+ -+/** -+ * @brief Default max runtime [ms] for a core job - used by timeout timers -+ */ -+#define MALI_MAX_JOB_RUNTIME_DEFAULT 5000 -+ -+extern int mali_max_job_runtime; -+ -+#define MALI_MAX_NUMBER_OF_GROUPS 10 -+#define MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS 8 -+ -+enum mali_group_state { -+ MALI_GROUP_STATE_INACTIVE, -+ MALI_GROUP_STATE_ACTIVATION_PENDING, -+ MALI_GROUP_STATE_ACTIVE, -+}; -+ -+/** -+ * The structure represents a render group -+ * A render group is defined by all the cores that share the same Mali MMU -+ */ -+ -+struct mali_group { -+ struct mali_mmu_core *mmu; -+ struct mali_session_data *session; -+ -+ enum mali_group_state state; -+ mali_bool power_is_on; -+ -+ mali_bool is_working; -+ unsigned long start_time; /* in ticks */ -+ -+ struct mali_gp_core *gp_core; -+ struct mali_gp_job *gp_running_job; -+ -+ struct mali_pp_core *pp_core; -+ struct mali_pp_job *pp_running_job; -+ u32 pp_running_sub_job; -+ -+ struct mali_pm_domain *pm_domain; -+ -+ struct mali_l2_cache_core *l2_cache_core[2]; -+ u32 l2_cache_core_ref_count[2]; ++#ifndef __MALI_DLBU_H__ ++#define __MALI_DLBU_H__ + -+ /* Parent virtual group (if any) */ -+ struct mali_group *parent_group; ++#define MALI_DLBU_VIRT_ADDR 0xFFF00000 /* master tile virtual address fixed at this value and mapped into every session */ + -+ struct mali_dlbu_core *dlbu_core; -+ struct mali_bcast_unit *bcast_core; ++#include "mali_osk.h" + -+ /* Used for working groups which needs to be disabled */ -+ mali_bool disable_requested; ++struct mali_pp_job; ++struct mali_group; ++struct mali_dlbu_core; + -+ /* Used by group to link child groups (for virtual group) */ -+ _mali_osk_list_t group_list; ++extern mali_dma_addr mali_dlbu_phys_addr; + -+ /* Used by executor module in order to link groups of same state */ -+ _mali_osk_list_t executor_list; ++_mali_osk_errcode_t mali_dlbu_initialize(void); ++void mali_dlbu_terminate(void); + -+ /* Used by PM domains to link groups of same domain */ -+ _mali_osk_list_t pm_domain_list; ++struct mali_dlbu_core *mali_dlbu_create(const _mali_osk_resource_t *resource); ++void mali_dlbu_delete(struct mali_dlbu_core *dlbu); + -+ _mali_osk_wq_work_t *bottom_half_work_mmu; -+ _mali_osk_wq_work_t *bottom_half_work_gp; -+ _mali_osk_wq_work_t *bottom_half_work_pp; ++_mali_osk_errcode_t mali_dlbu_reset(struct mali_dlbu_core *dlbu); + -+ _mali_osk_timer_t *timeout_timer; -+}; ++void mali_dlbu_add_group(struct mali_dlbu_core *dlbu, struct mali_group *group); ++void mali_dlbu_remove_group(struct mali_dlbu_core *dlbu, struct mali_group *group); + -+/** @brief Create a new Mali group object ++/** @brief Called to update HW after DLBU state changed + * -+ * @return A pointer to a new group object ++ * This function must be called after \a mali_dlbu_add_group or \a ++ * mali_dlbu_remove_group to write the updated mask to hardware, unless the ++ * same is accomplished by calling \a mali_dlbu_reset. + */ -+struct mali_group *mali_group_create(struct mali_l2_cache_core *core, -+ struct mali_dlbu_core *dlbu, -+ struct mali_bcast_unit *bcast, -+ u32 domain_index); -+ -+void mali_group_dump_status(struct mali_group *group); ++void mali_dlbu_update_mask(struct mali_dlbu_core *dlbu); + -+void mali_group_delete(struct mali_group *group); ++void mali_dlbu_config_job(struct mali_dlbu_core *dlbu, struct mali_pp_job *job); + -+_mali_osk_errcode_t mali_group_add_mmu_core(struct mali_group *group, -+ struct mali_mmu_core *mmu_core); -+void mali_group_remove_mmu_core(struct mali_group *group); ++#endif /* __MALI_DLBU_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_dvfs_policy.c b/drivers/gpu/arm/mali400/mali/common/mali_dvfs_policy.c +new file mode 100644 +index 000000000..55b21a410 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_dvfs_policy.c +@@ -0,0 +1,308 @@ ++/* ++ * Copyright (C) 2010-2012, 2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+_mali_osk_errcode_t mali_group_add_gp_core(struct mali_group *group, -+ struct mali_gp_core *gp_core); -+void mali_group_remove_gp_core(struct mali_group *group); ++#include ++#include "mali_kernel_common.h" ++#include "mali_scheduler.h" ++#include "mali_dvfs_policy.h" ++#include "mali_osk_mali.h" ++#include "mali_osk_profiling.h" + -+_mali_osk_errcode_t mali_group_add_pp_core(struct mali_group *group, -+ struct mali_pp_core *pp_core); -+void mali_group_remove_pp_core(struct mali_group *group); ++#define CLOCK_TUNING_TIME_DEBUG 0 + -+MALI_STATIC_INLINE const char *mali_group_core_description( -+ struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ if (NULL != group->pp_core) { -+ return mali_pp_core_description(group->pp_core); -+ } else { -+ MALI_DEBUG_ASSERT_POINTER(group->gp_core); -+ return mali_gp_core_description(group->gp_core); -+ } -+} ++#define MAX_PERFORMANCE_VALUE 256 ++#define MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(percent) ((int) ((percent)*(MAX_PERFORMANCE_VALUE)/100.0 + 0.5)) + -+MALI_STATIC_INLINE mali_bool mali_group_is_virtual(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); ++/** The max fps the same as display vsync default 60, can set by module insert parameter */ ++int mali_max_system_fps = 60; ++/** A lower limit on their desired FPS default 58, can set by module insert parameter */ ++int mali_desired_fps = 58; + -+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) -+ return (NULL != group->dlbu_core); -+#else -+ return MALI_FALSE; -+#endif -+} ++static int mali_fps_step1 = 0; ++static int mali_fps_step2 = 0; + -+/** @brief Check if a group is a part of a virtual group or not -+ */ -+MALI_STATIC_INLINE mali_bool mali_group_is_in_virtual(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++static int clock_step = -1; ++static int cur_clk_step = -1; ++static struct mali_gpu_clock *gpu_clk = NULL; + -+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) -+ return (NULL != group->parent_group) ? MALI_TRUE : MALI_FALSE; -+#else -+ return MALI_FALSE; -+#endif -+} ++/*Function prototype */ ++static int (*mali_gpu_set_freq)(int) = NULL; ++static int (*mali_gpu_get_freq)(void) = NULL; + -+/** @brief Reset group -+ * -+ * This function will reset the entire group, -+ * including all the cores present in the group. -+ * -+ * @param group Pointer to the group to reset -+ */ -+void mali_group_reset(struct mali_group *group); ++static mali_bool mali_dvfs_enabled = MALI_FALSE; + -+MALI_STATIC_INLINE struct mali_session_data *mali_group_get_session( -+ struct mali_group *group) ++#define NUMBER_OF_NANOSECONDS_PER_SECOND 1000000000ULL ++static u32 calculate_window_render_fps(u64 time_period) +{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ -+ return group->session; -+} ++ u32 max_window_number; ++ u64 tmp; ++ u64 max = time_period; ++ u32 leading_zeroes; ++ u32 shift_val; ++ u32 time_period_shift; ++ u32 max_window_number_shift; ++ u32 ret_val; + -+MALI_STATIC_INLINE void mali_group_clear_session(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ max_window_number = mali_session_max_window_num(); + -+ if (NULL != group->session) { -+ mali_mmu_activate_empty_page_directory(group->mmu); -+ group->session = NULL; ++ /* To avoid float division, extend the dividend to ns unit */ ++ tmp = (u64)max_window_number * NUMBER_OF_NANOSECONDS_PER_SECOND; ++ if (tmp > time_period) { ++ max = tmp; + } -+} + -+enum mali_group_state mali_group_activate(struct mali_group *group); ++ /* ++ * We may have 64-bit values, a dividend or a divisor or both ++ * To avoid dependencies to a 64-bit divider, we shift down the two values ++ * equally first. ++ */ ++ leading_zeroes = _mali_osk_clz((u32)(max >> 32)); ++ shift_val = 32 - leading_zeroes; + -+/* -+ * Change state from ACTIVATION_PENDING to ACTIVE -+ * For virtual group, all childs need to be ACTIVE first -+ */ -+mali_bool mali_group_set_active(struct mali_group *group); ++ time_period_shift = (u32)(time_period >> shift_val); ++ max_window_number_shift = (u32)(tmp >> shift_val); + -+/* -+ * @return MALI_TRUE means one or more domains can now be powered off, -+ * and caller should call either mali_pm_update_async() or -+ * mali_pm_update_sync() in order to do so. -+ */ -+mali_bool mali_group_deactivate(struct mali_group *group); ++ ret_val = max_window_number_shift / time_period_shift; + -+MALI_STATIC_INLINE enum mali_group_state mali_group_get_state(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ return group->state; ++ return ret_val; +} + -+MALI_STATIC_INLINE mali_bool mali_group_power_is_on(struct mali_group *group) ++static bool mali_pickup_closest_avail_clock(int target_clock_mhz, mali_bool pick_clock_up) +{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ return group->power_is_on; -+} ++ int i = 0; ++ bool clock_changed = false; + -+void mali_group_power_up(struct mali_group *group); -+void mali_group_power_down(struct mali_group *group); ++ /* Round up the closest available frequency step for target_clock_hz */ ++ for (i = 0; i < gpu_clk->num_of_steps; i++) { ++ /* Find the first item > target_clock_hz */ ++ if (((int)(gpu_clk->item[i].clock) - target_clock_mhz) > 0) { ++ break; ++ } ++ } + -+MALI_STATIC_INLINE void mali_group_set_disable_request( -+ struct mali_group *group, mali_bool disable) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ group->disable_requested = disable; ++ /* If the target clock greater than the maximum clock just pick the maximum one*/ ++ if (i == gpu_clk->num_of_steps) { ++ i = gpu_clk->num_of_steps - 1; ++ } else { ++ if ((!pick_clock_up) && (i > 0)) { ++ i = i - 1; ++ } ++ } + -+ /** -+ * When one of child group's disable_requeset is set TRUE, then -+ * the disable_request of parent group should also be set to TRUE. -+ * While, the disable_request of parent group should only be set to FALSE -+ * only when all of its child group's disable_request are set to FALSE. -+ */ -+ if (NULL != group->parent_group && MALI_TRUE == disable) { -+ group->parent_group->disable_requested = disable; ++ clock_step = i; ++ if (cur_clk_step != clock_step) { ++ clock_changed = true; + } -+} + -+MALI_STATIC_INLINE mali_bool mali_group_disable_requested( -+ struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ return group->disable_requested; ++ return clock_changed; +} + -+/** @brief Virtual groups */ -+void mali_group_add_group(struct mali_group *parent, struct mali_group *child); -+struct mali_group *mali_group_acquire_group(struct mali_group *parent); -+void mali_group_remove_group(struct mali_group *parent, struct mali_group *child); -+ -+/** @brief Checks if the group is working. -+ */ -+MALI_STATIC_INLINE mali_bool mali_group_is_working(struct mali_group *group) ++void mali_dvfs_policy_realize(struct mali_gpu_utilization_data *data, u64 time_period) +{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ if (mali_group_is_in_virtual(group)) { -+ struct mali_group *tmp_group = mali_executor_get_virtual_group(); -+ return tmp_group->is_working; ++ int under_perform_boundary_value = 0; ++ int over_perform_boundary_value = 0; ++ int current_fps = 0; ++ int current_gpu_util = 0; ++ bool clock_changed = false; ++#if CLOCK_TUNING_TIME_DEBUG ++ struct timeval start; ++ struct timeval stop; ++ unsigned int elapse_time; ++ do_gettimeofday(&start); ++#endif ++ u32 window_render_fps; ++ ++ if (NULL == gpu_clk) { ++ MALI_DEBUG_PRINT(2, ("Enable DVFS but patform doesn't Support freq change. \n")); ++ return; + } -+ return group->is_working; -+} + -+MALI_STATIC_INLINE struct mali_gp_job *mali_group_get_running_gp_job(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ return group->gp_running_job; -+} ++ window_render_fps = calculate_window_render_fps(time_period); + -+/** @brief Zap MMU TLB on all groups -+ * -+ * Zap TLB on group if \a session is active. -+ */ -+mali_bool mali_group_zap_session(struct mali_group *group, -+ struct mali_session_data *session); ++ current_fps = window_render_fps; ++ current_gpu_util = data->utilization_gpu; + -+/** @brief Get pointer to GP core object -+ */ -+MALI_STATIC_INLINE struct mali_gp_core *mali_group_get_gp_core(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ return group->gp_core; -+} ++ /* Get the specific under_perform_boundary_value and over_perform_boundary_value */ ++ if ((mali_desired_fps <= current_fps) && (current_fps < mali_max_system_fps)) { ++ under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(90); ++ over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(70); ++ } else if ((mali_fps_step1 <= current_fps) && (current_fps < mali_desired_fps)) { ++ under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(55); ++ over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(35); ++ } else if ((mali_fps_step2 <= current_fps) && (current_fps < mali_fps_step1)) { ++ under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(70); ++ over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(50); ++ } else { ++ under_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(55); ++ over_perform_boundary_value = MALI_PERCENTAGE_TO_UTILIZATION_FRACTION(35); ++ } + -+/** @brief Get pointer to PP core object -+ */ -+MALI_STATIC_INLINE struct mali_pp_core *mali_group_get_pp_core(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ return group->pp_core; -+} ++ MALI_DEBUG_PRINT(5, ("Using ARM power policy: gpu util = %d \n", current_gpu_util)); ++ MALI_DEBUG_PRINT(5, ("Using ARM power policy: under_perform = %d, over_perform = %d \n", under_perform_boundary_value, over_perform_boundary_value)); ++ MALI_DEBUG_PRINT(5, ("Using ARM power policy: render fps = %d, pressure render fps = %d \n", current_fps, window_render_fps)); + -+/** @brief Start GP job -+ */ -+void mali_group_start_gp_job(struct mali_group *group, struct mali_gp_job *job, mali_bool gpu_secure_mode_pre_enabled); ++ /* Get current clock value */ ++ cur_clk_step = mali_gpu_get_freq(); + -+void mali_group_start_pp_job(struct mali_group *group, struct mali_pp_job *job, u32 sub_job, mali_bool gpu_secure_mode_pre_enabled); ++ /* Consider offscreen */ ++ if (0 == current_fps) { ++ /* GP or PP under perform, need to give full power */ ++ if (current_gpu_util > over_perform_boundary_value) { ++ if (cur_clk_step != gpu_clk->num_of_steps - 1) { ++ clock_changed = true; ++ clock_step = gpu_clk->num_of_steps - 1; ++ } ++ } + -+/** @brief Start virtual group Job on a virtual group -+*/ -+void mali_group_start_job_on_virtual(struct mali_group *group, struct mali_pp_job *job, u32 first_subjob, u32 last_subjob); ++ /* If GPU is idle, use lowest power */ ++ if (0 == current_gpu_util) { ++ if (cur_clk_step != 0) { ++ clock_changed = true; ++ clock_step = 0; ++ } ++ } + ++ goto real_setting; ++ } + -+/** @brief Start a subjob from a particular on a specific PP group -+*/ -+void mali_group_start_job_on_group(struct mali_group *group, struct mali_pp_job *job, u32 subjob); ++ /* 2. Calculate target clock if the GPU clock can be tuned */ ++ if (-1 != cur_clk_step) { ++ int target_clk_mhz = -1; ++ mali_bool pick_clock_up = MALI_TRUE; ++ ++ if (current_gpu_util > under_perform_boundary_value) { ++ /* when under perform, need to consider the fps part */ ++ target_clk_mhz = gpu_clk->item[cur_clk_step].clock * current_gpu_util * mali_desired_fps / under_perform_boundary_value / current_fps; ++ pick_clock_up = MALI_TRUE; ++ } else if (current_gpu_util < over_perform_boundary_value) { ++ /* when over perform, did't need to consider fps, system didn't want to reach desired fps */ ++ target_clk_mhz = gpu_clk->item[cur_clk_step].clock * current_gpu_util / under_perform_boundary_value; ++ pick_clock_up = MALI_FALSE; ++ } + ++ if (-1 != target_clk_mhz) { ++ clock_changed = mali_pickup_closest_avail_clock(target_clk_mhz, pick_clock_up); ++ } ++ } + -+/** @brief remove all the unused groups in tmp_unused group list, so that the group is in consistent status. -+ */ -+void mali_group_non_dlbu_job_done_virtual(struct mali_group *group); ++real_setting: ++ if (clock_changed) { ++ mali_gpu_set_freq(clock_step); + ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | ++ MALI_PROFILING_EVENT_CHANNEL_GPU | ++ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, ++ gpu_clk->item[clock_step].clock, ++ gpu_clk->item[clock_step].vol / 1000, ++ 0, 0, 0); ++ } + -+/** @brief Resume GP job that suspended waiting for more heap memory -+ */ -+void mali_group_resume_gp_with_new_heap(struct mali_group *group, u32 job_id, u32 start_addr, u32 end_addr); ++#if CLOCK_TUNING_TIME_DEBUG ++ do_gettimeofday(&stop); + -+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_gp(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(group->gp_core); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ return mali_gp_get_interrupt_result(group->gp_core); ++ elapse_time = timeval_to_ns(&stop) - timeval_to_ns(&start); ++ MALI_DEBUG_PRINT(2, ("Using ARM power policy: eclapse time = %d\n", elapse_time)); ++#endif +} + -+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_pp(struct mali_group *group) ++_mali_osk_errcode_t mali_dvfs_policy_init(void) +{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(group->pp_core); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ return mali_pp_get_interrupt_result(group->pp_core); -+} ++ _mali_osk_device_data data; ++ _mali_osk_errcode_t err = _MALI_OSK_ERR_OK; + -+MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_mmu(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(group->mmu); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ return mali_mmu_get_interrupt_result(group->mmu); -+} ++ if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) { ++ if ((NULL != data.get_clock_info) && (NULL != data.set_freq) && (NULL != data.get_freq)) { ++ MALI_DEBUG_PRINT(2, ("Mali DVFS init: using arm dvfs policy \n")); + -+MALI_STATIC_INLINE mali_bool mali_group_gp_is_active(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(group->gp_core); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ return mali_gp_is_active(group->gp_core); -+} + -+MALI_STATIC_INLINE mali_bool mali_group_pp_is_active(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(group->pp_core); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ return mali_pp_is_active(group->pp_core); -+} ++ mali_fps_step1 = mali_max_system_fps / 3; ++ mali_fps_step2 = mali_max_system_fps / 5; + -+MALI_STATIC_INLINE mali_bool mali_group_has_timed_out(struct mali_group *group) -+{ -+ unsigned long time_cost; -+ struct mali_group *tmp_group = group; ++ data.get_clock_info(&gpu_clk); + -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ if (gpu_clk != NULL) { ++#ifdef DEBUG ++ int i; ++ for (i = 0; i < gpu_clk->num_of_steps; i++) { ++ MALI_DEBUG_PRINT(5, ("mali gpu clock info: step%d clock(%d)Hz,vol(%d) \n", ++ i, gpu_clk->item[i].clock, gpu_clk->item[i].vol)); ++ } ++#endif ++ } else { ++ MALI_DEBUG_PRINT(2, ("Mali DVFS init: platform didn't define enough info for ddk to do DVFS \n")); ++ } + -+ /* if the group is in virtual need to use virtual_group's start time */ -+ if (mali_group_is_in_virtual(group)) { -+ tmp_group = mali_executor_get_virtual_group(); -+ } ++ mali_gpu_get_freq = data.get_freq; ++ mali_gpu_set_freq = data.set_freq; + -+ time_cost = _mali_osk_time_tickcount() - tmp_group->start_time; -+ if (_mali_osk_time_mstoticks(mali_max_job_runtime) <= time_cost) { -+ /* -+ * current tick is at or after timeout end time, -+ * so this is a valid timeout -+ */ -+ return MALI_TRUE; ++ if ((NULL != gpu_clk) && (gpu_clk->num_of_steps > 0) ++ && (NULL != mali_gpu_get_freq) && (NULL != mali_gpu_set_freq)) { ++ mali_dvfs_enabled = MALI_TRUE; ++ } ++ } else { ++ MALI_DEBUG_PRINT(2, ("Mali DVFS init: platform function callback incomplete, need check mali_gpu_device_data in platform .\n")); ++ } + } else { -+ /* -+ * Not a valid timeout. A HW interrupt probably beat -+ * us to it, and the timer wasn't properly deleted -+ * (async deletion used due to atomic context). -+ */ -+ return MALI_FALSE; ++ err = _MALI_OSK_ERR_FAULT; ++ MALI_DEBUG_PRINT(2, ("Mali DVFS init: get platform data error .\n")); + } -+} -+ -+MALI_STATIC_INLINE void mali_group_mask_all_interrupts_gp(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(group->gp_core); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ return mali_gp_mask_all_interrupts(group->gp_core); -+} + -+MALI_STATIC_INLINE void mali_group_mask_all_interrupts_pp(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(group->pp_core); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ return mali_pp_mask_all_interrupts(group->pp_core); ++ return err; +} + -+MALI_STATIC_INLINE void mali_group_enable_interrupts_gp( -+ struct mali_group *group, -+ enum mali_interrupt_result exceptions) ++/* ++ * Always give full power when start a new period, ++ * if mali dvfs enabled, for performance consideration ++ */ ++void mali_dvfs_policy_new_period(void) +{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(group->gp_core); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ mali_gp_enable_interrupts(group->gp_core, exceptions); -+} ++ /* Always give full power when start a new period */ ++ unsigned int cur_clk_step = 0; + -+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_gp(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(group->gp_core); -+ _mali_osk_wq_schedule_work(group->bottom_half_work_gp); -+} ++ cur_clk_step = mali_gpu_get_freq(); + ++ if (cur_clk_step != (gpu_clk->num_of_steps - 1)) { ++ mali_gpu_set_freq(gpu_clk->num_of_steps - 1); + -+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_pp(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(group->pp_core); -+ _mali_osk_wq_schedule_work(group->bottom_half_work_pp); ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | ++ MALI_PROFILING_EVENT_CHANNEL_GPU | ++ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, gpu_clk->item[gpu_clk->num_of_steps - 1].clock, ++ gpu_clk->item[gpu_clk->num_of_steps - 1].vol / 1000, 0, 0, 0); ++ } +} + -+MALI_STATIC_INLINE void mali_group_schedule_bottom_half_mmu(struct mali_group *group) ++mali_bool mali_dvfs_policy_enabled(void) +{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT_POINTER(group->mmu); -+ _mali_osk_wq_schedule_work(group->bottom_half_work_mmu); ++ return mali_dvfs_enabled; +} + -+struct mali_pp_job *mali_group_complete_pp(struct mali_group *group, mali_bool success, u32 *sub_job); -+ -+struct mali_gp_job *mali_group_complete_gp(struct mali_group *group, mali_bool success); -+ +#if defined(CONFIG_MALI400_PROFILING) -+MALI_STATIC_INLINE void mali_group_oom(struct mali_group *group) ++void mali_get_current_gpu_clk_item(struct mali_gpu_clk_item *clk_item) +{ -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SUSPEND | -+ MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0), -+ 0, 0, 0, 0, 0); -+} -+#endif -+ -+struct mali_group *mali_group_get_glob_group(u32 index); -+u32 mali_group_get_glob_num_groups(void); ++ if (mali_platform_device != NULL) { + -+u32 mali_group_dump_state(struct mali_group *group, char *buf, u32 size); ++ struct mali_gpu_device_data *device_data = NULL; ++ device_data = (struct mali_gpu_device_data *)mali_platform_device->dev.platform_data; + ++ if ((NULL != device_data->get_clock_info) && (NULL != device_data->get_freq)) { + -+_mali_osk_errcode_t mali_group_upper_half_mmu(void *data); -+_mali_osk_errcode_t mali_group_upper_half_gp(void *data); -+_mali_osk_errcode_t mali_group_upper_half_pp(void *data); ++ int cur_clk_step = device_data->get_freq(); ++ struct mali_gpu_clock *mali_gpu_clk = NULL; + -+MALI_STATIC_INLINE mali_bool mali_group_is_empty(struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(group); -+ MALI_DEBUG_ASSERT(mali_group_is_virtual(group)); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ return _mali_osk_list_empty(&group->group_list); ++ device_data->get_clock_info(&mali_gpu_clk); ++ clk_item->clock = mali_gpu_clk->item[cur_clk_step].clock; ++ clk_item->vol = mali_gpu_clk->item[cur_clk_step].vol; ++ } else { ++ MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: platform function callback incomplete, need check mali_gpu_device_data in platform .\n")); ++ } ++ } +} ++#endif + -+#endif /* __MALI_GROUP_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_hw_core.c b/drivers/gpu/arm/mali400/mali/common/mali_hw_core.c +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_dvfs_policy.h b/drivers/gpu/arm/mali400/mali/common/mali_dvfs_policy.h new file mode 100644 -index 000000000..a813816e9 +index 000000000..662348c4e --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_hw_core.c -@@ -0,0 +1,47 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_dvfs_policy.h +@@ -0,0 +1,34 @@ +/* -+ * Copyright (C) 2011-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010-2012, 2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -286217,51 +289010,38 @@ index 000000000..a813816e9 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#include "mali_hw_core.h" -+#include "mali_osk.h" -+#include "mali_kernel_common.h" -+#include "mali_osk_mali.h" ++#ifndef __MALI_DVFS_POLICY_H__ ++#define __MALI_DVFS_POLICY_H__ + -+_mali_osk_errcode_t mali_hw_core_create(struct mali_hw_core *core, const _mali_osk_resource_t *resource, u32 reg_size) -+{ -+ core->phys_addr = resource->base; -+ core->phys_offset = resource->base - _mali_osk_resource_base_address(); -+ core->description = resource->description; -+ core->size = reg_size; ++#ifdef __cplusplus ++extern "C" { ++#endif + -+ MALI_DEBUG_ASSERT(core->phys_offset < core->phys_addr); ++void mali_dvfs_policy_realize(struct mali_gpu_utilization_data *data, u64 time_period); + -+ if (_MALI_OSK_ERR_OK == _mali_osk_mem_reqregion(core->phys_addr, core->size, core->description)) { -+ core->mapped_registers = _mali_osk_mem_mapioregion(core->phys_addr, core->size, core->description); -+ if (NULL != core->mapped_registers) { -+ return _MALI_OSK_ERR_OK; -+ } else { -+ MALI_PRINT_ERROR(("Failed to map memory region for core %s at phys_addr 0x%08X\n", core->description, core->phys_addr)); -+ } -+ _mali_osk_mem_unreqregion(core->phys_addr, core->size); -+ } else { -+ MALI_PRINT_ERROR(("Failed to request memory region for core %s at phys_addr 0x%08X\n", core->description, core->phys_addr)); -+ } ++_mali_osk_errcode_t mali_dvfs_policy_init(void); + -+ return _MALI_OSK_ERR_FAULT; -+} ++void mali_dvfs_policy_new_period(void); + -+void mali_hw_core_delete(struct mali_hw_core *core) -+{ -+ if (NULL != core->mapped_registers) { -+ _mali_osk_mem_unmapioregion(core->phys_addr, core->size, core->mapped_registers); -+ core->mapped_registers = NULL; -+ } -+ _mali_osk_mem_unreqregion(core->phys_addr, core->size); ++mali_bool mali_dvfs_policy_enabled(void); ++ ++#if defined(CONFIG_MALI400_PROFILING) ++void mali_get_current_gpu_clk_item(struct mali_gpu_clk_item *clk_item); ++#endif ++ ++#ifdef __cplusplus +} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_hw_core.h b/drivers/gpu/arm/mali400/mali/common/mali_hw_core.h ++#endif ++ ++#endif/* __MALI_DVFS_POLICY_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_executor.c b/drivers/gpu/arm/mali400/mali/common/mali_executor.c new file mode 100644 -index 000000000..38d96e240 +index 000000000..0cf1ec0b0 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_hw_core.h -@@ -0,0 +1,111 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_executor.c +@@ -0,0 +1,2707 @@ +/* -+ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -286270,2166 +289050,2711 @@ index 000000000..38d96e240 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_HW_CORE_H__ -+#define __MALI_HW_CORE_H__ -+ -+#include "mali_osk.h" ++#include "mali_executor.h" ++#include "mali_scheduler.h" +#include "mali_kernel_common.h" ++#include "mali_kernel_core.h" ++#include "mali_osk.h" ++#include "mali_osk_list.h" ++#include "mali_pp.h" ++#include "mali_pp_job.h" ++#include "mali_group.h" ++#include "mali_pm.h" ++#include "mali_timeline.h" ++#include "mali_osk_profiling.h" ++#include "mali_session.h" ++#include "mali_osk_mali.h" + -+/** -+ * The common parts for all Mali HW cores (GP, PP, MMU, L2 and PMU) -+ * This struct is embedded inside all core specific structs. -+ */ -+struct mali_hw_core { -+ uintptr_t phys_addr; /**< Physical address of the registers */ -+ u32 phys_offset; /**< Offset from start of Mali to registers */ -+ u32 size; /**< Size of registers */ -+ mali_io_address mapped_registers; /**< Virtual mapping of the registers */ -+ const char *description; /**< Name of unit (as specified in device configuration) */ -+}; ++/*Add for voltage scan function*/ ++extern u32 mali_group_error; + -+#define MALI_REG_POLL_COUNT_FAST 1000000 -+#define MALI_REG_POLL_COUNT_SLOW 1000000 ++/* ++ * If dma_buf with map on demand is used, we defer job deletion and job queue ++ * if in atomic context, since both might sleep. ++ */ ++#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) ++#define MALI_EXECUTOR_USE_DEFERRED_PP_JOB_DELETE 1 ++#define MALI_EXECUTOR_USE_DEFERRED_PP_JOB_QUEUE 1 ++#endif /* !defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) */ + +/* -+ * GP and PP core translate their int_stat/rawstat into one of these ++ * ---------- static type definitions (structs, enums, etc) ---------- + */ -+enum mali_interrupt_result { -+ MALI_INTERRUPT_RESULT_NONE, -+ MALI_INTERRUPT_RESULT_SUCCESS, -+ MALI_INTERRUPT_RESULT_SUCCESS_VS, -+ MALI_INTERRUPT_RESULT_SUCCESS_PLBU, -+ MALI_INTERRUPT_RESULT_OOM, -+ MALI_INTERRUPT_RESULT_ERROR ++ ++enum mali_executor_state_t { ++ EXEC_STATE_NOT_PRESENT, /* Virtual group on Mali-300/400 (do not use) */ ++ EXEC_STATE_DISABLED, /* Disabled by core scaling (do not use) */ ++ EXEC_STATE_EMPTY, /* No child groups for virtual group (do not use) */ ++ EXEC_STATE_INACTIVE, /* Can be used, but must be activate first */ ++ EXEC_STATE_IDLE, /* Active and ready to be used */ ++ EXEC_STATE_WORKING, /* Executing a job */ +}; + -+_mali_osk_errcode_t mali_hw_core_create(struct mali_hw_core *core, const _mali_osk_resource_t *resource, u32 reg_size); -+void mali_hw_core_delete(struct mali_hw_core *core); ++/* ++ * ---------- global variables (exported due to inline functions) ---------- ++ */ + -+MALI_STATIC_INLINE u32 mali_hw_core_register_read(struct mali_hw_core *core, u32 relative_address) -+{ -+ u32 read_val; -+ read_val = _mali_osk_mem_ioread32(core->mapped_registers, relative_address); -+ MALI_DEBUG_PRINT(6, ("register_read for core %s, relative addr=0x%04X, val=0x%08X\n", -+ core->description, relative_address, read_val)); -+ return read_val; -+} ++/* Lock for this module (protecting all HW access except L2 caches) */ ++_mali_osk_spinlock_irq_t *mali_executor_lock_obj = NULL; + -+MALI_STATIC_INLINE void mali_hw_core_register_write_relaxed(struct mali_hw_core *core, u32 relative_address, u32 new_val) -+{ -+ MALI_DEBUG_PRINT(6, ("register_write_relaxed for core %s, relative addr=0x%04X, val=0x%08X\n", -+ core->description, relative_address, new_val)); -+ _mali_osk_mem_iowrite32_relaxed(core->mapped_registers, relative_address, new_val); -+} ++mali_bool mali_executor_hints[MALI_EXECUTOR_HINT_MAX]; + -+/* Conditionally write a register. -+ * The register will only be written if the new value is different from the old_value. -+ * If the new value is different, the old value will also be updated */ -+MALI_STATIC_INLINE void mali_hw_core_register_write_relaxed_conditional(struct mali_hw_core *core, u32 relative_address, u32 new_val, const u32 old_val) -+{ -+ MALI_DEBUG_PRINT(6, ("register_write_relaxed for core %s, relative addr=0x%04X, val=0x%08X\n", -+ core->description, relative_address, new_val)); -+ if (old_val != new_val) { -+ _mali_osk_mem_iowrite32_relaxed(core->mapped_registers, relative_address, new_val); -+ } -+} ++/* ++ * ---------- static variables ---------- ++ */ + -+MALI_STATIC_INLINE void mali_hw_core_register_write(struct mali_hw_core *core, u32 relative_address, u32 new_val) -+{ -+ MALI_DEBUG_PRINT(6, ("register_write for core %s, relative addr=0x%04X, val=0x%08X\n", -+ core->description, relative_address, new_val)); -+ _mali_osk_mem_iowrite32(core->mapped_registers, relative_address, new_val); -+} ++/* Used to defer job scheduling */ ++static _mali_osk_wq_work_t *executor_wq_high_pri = NULL; + -+MALI_STATIC_INLINE void mali_hw_core_register_write_array_relaxed(struct mali_hw_core *core, u32 relative_address, u32 *write_array, u32 nr_of_regs) -+{ -+ u32 i; -+ MALI_DEBUG_PRINT(6, ("register_write_array: for core %s, relative addr=0x%04X, nr of regs=%u\n", -+ core->description, relative_address, nr_of_regs)); ++/* Store version from GP and PP (user space wants to know this) */ ++static u32 pp_version = 0; ++static u32 gp_version = 0; + -+ /* Do not use burst writes against the registers */ -+ for (i = 0; i < nr_of_regs; i++) { -+ mali_hw_core_register_write_relaxed(core, relative_address + i * 4, write_array[i]); -+ } -+} ++/* List of physical PP groups which are disabled by some external source */ ++static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_disabled); ++static u32 group_list_disabled_count = 0; + -+/* Conditionally write a set of registers. -+ * The register will only be written if the new value is different from the old_value. -+ * If the new value is different, the old value will also be updated */ -+MALI_STATIC_INLINE void mali_hw_core_register_write_array_relaxed_conditional(struct mali_hw_core *core, u32 relative_address, u32 *write_array, u32 nr_of_regs, const u32 *old_array) -+{ -+ u32 i; -+ MALI_DEBUG_PRINT(6, ("register_write_array: for core %s, relative addr=0x%04X, nr of regs=%u\n", -+ core->description, relative_address, nr_of_regs)); ++/* List of groups which can be used, but activate first */ ++static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_inactive); ++static u32 group_list_inactive_count = 0; + -+ /* Do not use burst writes against the registers */ -+ for (i = 0; i < nr_of_regs; i++) { -+ if (old_array[i] != write_array[i]) { -+ mali_hw_core_register_write_relaxed(core, relative_address + i * 4, write_array[i]); -+ } -+ } -+} ++/* List of groups which are active and ready to be used */ ++static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_idle); ++static u32 group_list_idle_count = 0; + -+#endif /* __MALI_HW_CORE_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_kernel_common.h b/drivers/gpu/arm/mali400/mali/common/mali_kernel_common.h -new file mode 100644 -index 000000000..6a8f0f011 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_kernel_common.h -@@ -0,0 +1,181 @@ -+/* -+ * Copyright (C) 2010, 2012-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++/* List of groups which are executing a job */ ++static _MALI_OSK_LIST_HEAD_STATIC_INIT(group_list_working); ++static u32 group_list_working_count = 0; + -+#ifndef __MALI_KERNEL_COMMON_H__ -+#define __MALI_KERNEL_COMMON_H__ ++/* Virtual group (if any) */ ++static struct mali_group *virtual_group = NULL; + -+#include "mali_osk.h" ++/* Virtual group state is tracked with a state variable instead of 4 lists */ ++static enum mali_executor_state_t virtual_group_state = EXEC_STATE_NOT_PRESENT; + -+/* Make sure debug is defined when it should be */ -+#ifndef DEBUG -+#if defined(_DEBUG) -+#define DEBUG -+#endif -+#endif ++/* GP group */ ++static struct mali_group *gp_group = NULL; + -+/* The file include several useful macros for error checking, debugging and printing. -+ * - MALI_PRINTF(...) Do not use this function: Will be included in Release builds. -+ * - MALI_DEBUG_PRINT(nr, (X) ) Prints the second argument if nr<=MALI_DEBUG_LEVEL. -+ * - MALI_DEBUG_ERROR( (X) ) Prints an errortext, a source trace, and the given error message. -+ * - MALI_DEBUG_ASSERT(exp,(X)) If the asserted expr is false, the program will exit. -+ * - MALI_DEBUG_ASSERT_POINTER(pointer) Triggers if the pointer is a zero pointer. -+ * - MALI_DEBUG_CODE( X ) The code inside the macro is only compiled in Debug builds. -+ * -+ * The (X) means that you must add an extra parenthesis around the argumentlist. -+ * -+ * The printf function: MALI_PRINTF(...) is routed to _mali_osk_debugmsg -+ * -+ * Suggested range for the DEBUG-LEVEL is [1:6] where -+ * [1:2] Is messages with highest priority, indicate possible errors. -+ * [3:4] Is messages with medium priority, output important variables. -+ * [5:6] Is messages with low priority, used during extensive debugging. -+ */ ++/* GP group state is tracked with a state variable instead of 4 lists */ ++static enum mali_executor_state_t gp_group_state = EXEC_STATE_NOT_PRESENT; + -+/** -+* Fundamental error macro. Reports an error code. This is abstracted to allow us to -+* easily switch to a different error reporting method if we want, and also to allow -+* us to search for error returns easily. -+* -+* Note no closing semicolon - this is supplied in typical usage: -+* -+* MALI_ERROR(MALI_ERROR_OUT_OF_MEMORY); -+*/ -+#define MALI_ERROR(error_code) return (error_code) ++static u32 gp_returned_cookie = 0; + -+/** -+ * Basic error macro, to indicate success. -+ * Note no closing semicolon - this is supplied in typical usage: -+ * -+ * MALI_SUCCESS; -+ */ -+#define MALI_SUCCESS MALI_ERROR(_MALI_OSK_ERR_OK) ++/* Total number of physical PP cores present */ ++static u32 num_physical_pp_cores_total = 0; + -+/** -+ * Basic error macro. This checks whether the given condition is true, and if not returns -+ * from this function with the supplied error code. This is a macro so that we can override it -+ * for stress testing. -+ * -+ * Note that this uses the do-while-0 wrapping to ensure that we don't get problems with dangling -+ * else clauses. Note also no closing semicolon - this is supplied in typical usage: -+ * -+ * MALI_CHECK((p!=NULL), ERROR_NO_OBJECT); -+ */ -+#define MALI_CHECK(condition, error_code) do { if(!(condition)) MALI_ERROR(error_code); } while(0) ++/* Number of physical cores which are enabled */ ++static u32 num_physical_pp_cores_enabled = 0; + -+/** -+ * Error propagation macro. If the expression given is anything other than -+ * _MALI_OSK_NO_ERROR, then the value is returned from the enclosing function -+ * as an error code. This effectively acts as a guard clause, and propagates -+ * error values up the call stack. This uses a temporary value to ensure that -+ * the error expression is not evaluated twice. -+ * If the counter for forcing a failure has been set using _mali_force_error, -+ * this error will be returned without evaluating the expression in -+ * MALI_CHECK_NO_ERROR -+ */ -+#define MALI_CHECK_NO_ERROR(expression) \ -+ do { _mali_osk_errcode_t _check_no_error_result=(expression); \ -+ if(_check_no_error_result != _MALI_OSK_ERR_OK) \ -+ MALI_ERROR(_check_no_error_result); \ -+ } while(0) ++/* Enable or disable core scaling */ ++static mali_bool core_scaling_enabled = MALI_TRUE; + -+/** -+ * Pointer check macro. Checks non-null pointer. -+ */ -+#define MALI_CHECK_NON_NULL(pointer, error_code) MALI_CHECK( ((pointer)!=NULL), (error_code) ) ++/* Variables to allow safe pausing of the scheduler */ ++static _mali_osk_wait_queue_t *executor_working_wait_queue = NULL; ++static u32 pause_count = 0; + -+/** -+ * Error macro with goto. This checks whether the given condition is true, and if not jumps -+ * to the specified label using a goto. The label must therefore be local to the function in -+ * which this macro appears. This is most usually used to execute some clean-up code before -+ * exiting with a call to ERROR. -+ * -+ * Like the other macros, this is a macro to allow us to override the condition if we wish, -+ * e.g. to force an error during stress testing. -+ */ -+#define MALI_CHECK_GOTO(condition, label) do { if(!(condition)) goto label; } while(0) ++/* PP cores haven't been enabled because of some pp cores haven't been disabled. */ ++static int core_scaling_delay_up_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 }; + -+/** -+ * Explicitly ignore a parameter passed into a function, to suppress compiler warnings. -+ * Should only be used with parameter names. ++/* Variables used to implement notify pp core changes to userspace when core scaling ++ * is finished in mali_executor_complete_group() function. */ ++static _mali_osk_wq_work_t *executor_wq_notify_core_change = NULL; ++static _mali_osk_wait_queue_t *executor_notify_core_change_wait_queue = NULL; ++ ++/* ++ * ---------- Forward declaration of static functions ---------- + */ -+#define MALI_IGNORE(x) x=x ++static mali_bool mali_executor_is_suspended(void *data); ++static mali_bool mali_executor_is_working(void); ++static void mali_executor_disable_empty_virtual(void); ++static mali_bool mali_executor_physical_rejoin_virtual(struct mali_group *group); ++static mali_bool mali_executor_has_virtual_group(void); ++static mali_bool mali_executor_virtual_group_is_usable(void); ++static void mali_executor_schedule(void); ++static void mali_executor_wq_schedule(void *arg); ++static void mali_executor_send_gp_oom_to_user(struct mali_gp_job *job); ++static void mali_executor_complete_group(struct mali_group *group, ++ mali_bool success, ++ struct mali_gp_job **gp_job_done, ++ struct mali_pp_job **pp_job_done); ++static void mali_executor_change_state_pp_physical(struct mali_group *group, ++ _mali_osk_list_t *old_list, ++ u32 *old_count, ++ _mali_osk_list_t *new_list, ++ u32 *new_count); ++static mali_bool mali_executor_group_is_in_state(struct mali_group *group, ++ enum mali_executor_state_t state); + -+#if defined(CONFIG_MALI_QUIET) -+#define MALI_PRINTF(args) -+#else -+#define MALI_PRINTF(args) _mali_osk_dbgmsg args; -+#endif ++static void mali_executor_group_enable_internal(struct mali_group *group); ++static void mali_executor_group_disable_internal(struct mali_group *group); ++static void mali_executor_core_scale(unsigned int target_core_nr); ++static void mali_executor_core_scale_in_group_complete(struct mali_group *group); ++static void mali_executor_notify_core_change(u32 num_cores); ++static void mali_executor_wq_notify_core_change(void *arg); ++static void mali_executor_change_group_status_disabled(struct mali_group *group); ++static mali_bool mali_executor_deactivate_list_idle(mali_bool deactivate_idle_group); ++static void mali_executor_set_state_pp_physical(struct mali_group *group, ++ _mali_osk_list_t *new_list, ++ u32 *new_count); + -+#define MALI_PRINT_ERROR(args) do{ \ -+ MALI_PRINTF(("Mali: ERR: %s\n" ,__FILE__)); \ -+ MALI_PRINTF((" %s()%4d\n ", __FUNCTION__, __LINE__)) ; \ -+ MALI_PRINTF(args); \ -+ MALI_PRINTF(("\n")); \ -+ } while(0) ++/* ++ * ---------- Actual implementation ---------- ++ */ + -+#define MALI_PRINT(args) do{ \ -+ MALI_PRINTF(("Mali: ")); \ -+ MALI_PRINTF(args); \ -+ } while (0) ++_mali_osk_errcode_t mali_executor_initialize(void) ++{ ++ mali_executor_lock_obj = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_EXECUTOR); ++ if (NULL == mali_executor_lock_obj) { ++ mali_executor_terminate(); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+#ifdef DEBUG -+#ifndef mali_debug_level -+extern int mali_debug_level; -+#endif ++ executor_wq_high_pri = _mali_osk_wq_create_work_high_pri(mali_executor_wq_schedule, NULL); ++ if (NULL == executor_wq_high_pri) { ++ mali_executor_terminate(); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+#define MALI_DEBUG_CODE(code) code -+#define MALI_DEBUG_PRINT(level, args) do { \ -+ if((level) <= mali_debug_level)\ -+ {MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); } \ -+ } while (0) ++ executor_working_wait_queue = _mali_osk_wait_queue_init(); ++ if (NULL == executor_working_wait_queue) { ++ mali_executor_terminate(); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+#define MALI_DEBUG_PRINT_ERROR(args) MALI_PRINT_ERROR(args) ++ executor_wq_notify_core_change = _mali_osk_wq_create_work(mali_executor_wq_notify_core_change, NULL); ++ if (NULL == executor_wq_notify_core_change) { ++ mali_executor_terminate(); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+#define MALI_DEBUG_PRINT_IF(level,condition,args) \ -+ if((condition)&&((level) <= mali_debug_level))\ -+ {MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); } ++ executor_notify_core_change_wait_queue = _mali_osk_wait_queue_init(); ++ if (NULL == executor_notify_core_change_wait_queue) { ++ mali_executor_terminate(); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+#define MALI_DEBUG_PRINT_ELSE(level, args)\ -+ else if((level) <= mali_debug_level)\ -+ { MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); } ++ return _MALI_OSK_ERR_OK; ++} + -+/** -+ * @note these variants of DEBUG ASSERTS will cause a debugger breakpoint -+ * to be entered (see _mali_osk_break() ). An alternative would be to call -+ * _mali_osk_abort(), on OSs that support it. -+ */ -+#define MALI_DEBUG_PRINT_ASSERT(condition, args) do {if( !(condition)) { MALI_PRINT_ERROR(args); _mali_osk_break(); } } while(0) -+#define MALI_DEBUG_ASSERT_POINTER(pointer) do {if( (pointer)== NULL) {MALI_PRINT_ERROR(("NULL pointer " #pointer)); _mali_osk_break();} } while(0) -+#define MALI_DEBUG_ASSERT(condition) do {if( !(condition)) {MALI_PRINT_ERROR(("ASSERT failed: " #condition )); _mali_osk_break();} } while(0) ++void mali_executor_terminate(void) ++{ ++ if (NULL != executor_notify_core_change_wait_queue) { ++ _mali_osk_wait_queue_term(executor_notify_core_change_wait_queue); ++ executor_notify_core_change_wait_queue = NULL; ++ } + -+#else /* DEBUG */ ++ if (NULL != executor_wq_notify_core_change) { ++ _mali_osk_wq_delete_work(executor_wq_notify_core_change); ++ executor_wq_notify_core_change = NULL; ++ } + -+#define MALI_DEBUG_CODE(code) -+#define MALI_DEBUG_PRINT(string,args) do {} while(0) -+#define MALI_DEBUG_PRINT_ERROR(args) do {} while(0) -+#define MALI_DEBUG_PRINT_IF(level,condition,args) do {} while(0) -+#define MALI_DEBUG_PRINT_ELSE(level,condition,args) do {} while(0) -+#define MALI_DEBUG_PRINT_ASSERT(condition,args) do {} while(0) -+#define MALI_DEBUG_ASSERT_POINTER(pointer) do {} while(0) -+#define MALI_DEBUG_ASSERT(condition) do {} while(0) ++ if (NULL != executor_working_wait_queue) { ++ _mali_osk_wait_queue_term(executor_working_wait_queue); ++ executor_working_wait_queue = NULL; ++ } + -+#endif /* DEBUG */ ++ if (NULL != executor_wq_high_pri) { ++ _mali_osk_wq_delete_work(executor_wq_high_pri); ++ executor_wq_high_pri = NULL; ++ } + -+/** -+ * variables from user space cannot be dereferenced from kernel space; tagging them -+ * with __user allows the GCC compiler to generate a warning. Other compilers may -+ * not support this so we define it here as an empty macro if the compiler doesn't -+ * define it. -+ */ -+#ifndef __user -+#define __user -+#endif ++ if (NULL != mali_executor_lock_obj) { ++ _mali_osk_spinlock_irq_term(mali_executor_lock_obj); ++ mali_executor_lock_obj = NULL; ++ } ++} + -+#endif /* __MALI_KERNEL_COMMON_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_kernel_core.c b/drivers/gpu/arm/mali400/mali/common/mali_kernel_core.c -new file mode 100644 -index 000000000..87f97b710 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_kernel_core.c -@@ -0,0 +1,1349 @@ -+/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++void mali_executor_populate(void) ++{ ++ u32 num_groups; ++ u32 i; + -+#include "mali_kernel_common.h" -+#include "mali_session.h" -+#include "mali_osk.h" -+#include "mali_osk_mali.h" -+#include "mali_ukk.h" -+#include "mali_kernel_core.h" -+#include "mali_memory.h" -+#include "mali_mem_validation.h" -+#include "mali_mmu.h" -+#include "mali_mmu_page_directory.h" -+#include "mali_dlbu.h" -+#include "mali_broadcast.h" -+#include "mali_gp.h" -+#include "mali_pp.h" -+#include "mali_executor.h" -+#include "mali_pp_job.h" -+#include "mali_group.h" -+#include "mali_pm.h" -+#include "mali_pmu.h" -+#include "mali_scheduler.h" -+#include "mali_kernel_utilization.h" -+#include "mali_l2_cache.h" -+#include "mali_timeline.h" -+#include "mali_soft_job.h" -+#include "mali_pm_domain.h" -+#if defined(CONFIG_MALI400_PROFILING) -+#include "mali_osk_profiling.h" -+#endif -+#if defined(CONFIG_MALI400_INTERNAL_PROFILING) -+#include "mali_profiling_internal.h" -+#endif -+#include "mali_control_timer.h" -+#include "mali_dvfs_policy.h" -+#include -+#include -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+#include -+#else -+#include -+#endif -+#endif ++ num_groups = mali_group_get_glob_num_groups(); + -+#define MALI_SHARED_MEMORY_DEFAULT_SIZE 0xffffffff ++ /* Do we have a virtual group? */ ++ for (i = 0; i < num_groups; i++) { ++ struct mali_group *group = mali_group_get_glob_group(i); + -+/* Mali GPU memory. Real values come from module parameter or from device specific data */ -+unsigned int mali_dedicated_mem_start = 0; -+unsigned int mali_dedicated_mem_size = 0; ++ if (mali_group_is_virtual(group)) { ++ virtual_group = group; ++ virtual_group_state = EXEC_STATE_INACTIVE; ++ break; ++ } ++ } + -+/* Default shared memory size is set to 4G. */ -+unsigned int mali_shared_mem_size = MALI_SHARED_MEMORY_DEFAULT_SIZE; ++ /* Find all the available physical GP and PP cores */ ++ for (i = 0; i < num_groups; i++) { ++ struct mali_group *group = mali_group_get_glob_group(i); + -+/* Frame buffer memory to be accessible by Mali GPU */ -+int mali_fb_start = 0; -+int mali_fb_size = 0; ++ if (NULL != group) { ++ struct mali_pp_core *pp_core = mali_group_get_pp_core(group); ++ struct mali_gp_core *gp_core = mali_group_get_gp_core(group); + -+/* Mali max job runtime */ -+extern int mali_max_job_runtime; ++ if (!mali_group_is_virtual(group)) { ++ if (NULL != pp_core) { ++ if (0 == pp_version) { ++ /* Retrieve PP version from the first available PP core */ ++ pp_version = mali_pp_core_get_version(pp_core); ++ } + -+/** Start profiling from module load? */ -+int mali_boot_profiling = 0; ++ if (NULL != virtual_group) { ++ mali_executor_lock(); ++ mali_group_add_group(virtual_group, group); ++ mali_executor_unlock(); ++ } else { ++ _mali_osk_list_add(&group->executor_list, &group_list_inactive); ++ group_list_inactive_count++; ++ } + -+/** Limits for the number of PP cores behind each L2 cache. */ -+int mali_max_pp_cores_group_1 = 0xFF; -+int mali_max_pp_cores_group_2 = 0xFF; ++ num_physical_pp_cores_total++; ++ } else { ++ MALI_DEBUG_ASSERT_POINTER(gp_core); + -+int mali_inited_pp_cores_group_1 = 0; -+int mali_inited_pp_cores_group_2 = 0; ++ if (0 == gp_version) { ++ /* Retrieve GP version */ ++ gp_version = mali_gp_core_get_version(gp_core); ++ } + -+static _mali_product_id_t global_product_id = _MALI_PRODUCT_ID_UNKNOWN; -+static uintptr_t global_gpu_base_address = 0; -+static u32 global_gpu_major_version = 0; -+static u32 global_gpu_minor_version = 0; ++ gp_group = group; ++ gp_group_state = EXEC_STATE_INACTIVE; ++ } + -+mali_bool mali_gpu_class_is_mali450 = MALI_FALSE; -+mali_bool mali_gpu_class_is_mali470 = MALI_FALSE; ++ } ++ } ++ } + -+static _mali_osk_errcode_t mali_set_global_gpu_base_address(void) ++ num_physical_pp_cores_enabled = num_physical_pp_cores_total; ++} ++ ++void mali_executor_depopulate(void) +{ -+ _mali_osk_errcode_t err = _MALI_OSK_ERR_OK; ++ struct mali_group *group; ++ struct mali_group *temp; + -+ global_gpu_base_address = _mali_osk_resource_base_address(); -+ if (0 == global_gpu_base_address) { -+ err = _MALI_OSK_ERR_ITEM_NOT_FOUND; ++ MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != gp_group_state); ++ ++ if (NULL != gp_group) { ++ mali_group_delete(gp_group); ++ gp_group = NULL; + } + -+ return err; -+} ++ MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != virtual_group_state); + -+static u32 mali_get_bcast_id(_mali_osk_resource_t *resource_pp) -+{ -+ switch (resource_pp->base - global_gpu_base_address) { -+ case 0x08000: -+ case 0x20000: /* fall-through for aliased mapping */ -+ return 0x01; -+ case 0x0A000: -+ case 0x22000: /* fall-through for aliased mapping */ -+ return 0x02; -+ case 0x0C000: -+ case 0x24000: /* fall-through for aliased mapping */ -+ return 0x04; -+ case 0x0E000: -+ case 0x26000: /* fall-through for aliased mapping */ -+ return 0x08; -+ case 0x28000: -+ return 0x10; -+ case 0x2A000: -+ return 0x20; -+ case 0x2C000: -+ return 0x40; -+ case 0x2E000: -+ return 0x80; -+ default: -+ return 0; ++ if (NULL != virtual_group) { ++ mali_group_delete(virtual_group); ++ virtual_group = NULL; ++ } ++ ++ MALI_DEBUG_ASSERT(_mali_osk_list_empty(&group_list_working)); ++ ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) { ++ mali_group_delete(group); ++ } ++ ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) { ++ mali_group_delete(group); ++ } ++ ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) { ++ mali_group_delete(group); + } +} + -+static _mali_osk_errcode_t mali_parse_product_info(void) ++void mali_executor_suspend(void) +{ -+ _mali_osk_resource_t first_pp_resource; ++ mali_executor_lock(); + -+ /* Find the first PP core resource (again) */ -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_PP0, &first_pp_resource)) { -+ /* Create a dummy PP object for this core so that we can read the version register */ -+ struct mali_group *group = mali_group_create(NULL, NULL, NULL, MALI_DOMAIN_INDEX_PP0); -+ if (NULL != group) { -+ struct mali_pp_core *pp_core = mali_pp_create(&first_pp_resource, group, MALI_FALSE, mali_get_bcast_id(&first_pp_resource)); -+ if (NULL != pp_core) { -+ u32 pp_version; ++ /* Increment the pause_count so that no more jobs will be scheduled */ ++ pause_count++; + -+ pp_version = mali_pp_core_get_version(pp_core); ++ mali_executor_unlock(); + -+ mali_group_delete(group); ++ _mali_osk_wait_queue_wait_event(executor_working_wait_queue, ++ mali_executor_is_suspended, NULL); + -+ global_gpu_major_version = (pp_version >> 8) & 0xFF; -+ global_gpu_minor_version = pp_version & 0xFF; ++ /* ++ * mali_executor_complete_XX() leaves jobs in idle state. ++ * deactivate option is used when we are going to power down ++ * the entire GPU (OS suspend) and want a consistent SW vs HW ++ * state. ++ */ ++ mali_executor_lock(); + -+ switch (pp_version >> 16) { -+ case MALI200_PP_PRODUCT_ID: -+ global_product_id = _MALI_PRODUCT_ID_MALI200; -+ MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-200 r%up%u\n", global_gpu_major_version, global_gpu_minor_version)); -+ MALI_PRINT_ERROR(("Mali-200 is not supported by this driver.\n")); -+ _mali_osk_abort(); -+ break; -+ case MALI300_PP_PRODUCT_ID: -+ global_product_id = _MALI_PRODUCT_ID_MALI300; -+ MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-300 r%up%u\n", global_gpu_major_version, global_gpu_minor_version)); -+ break; -+ case MALI400_PP_PRODUCT_ID: -+ global_product_id = _MALI_PRODUCT_ID_MALI400; -+ MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-400 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version)); -+ break; -+ case MALI450_PP_PRODUCT_ID: -+ global_product_id = _MALI_PRODUCT_ID_MALI450; -+ MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-450 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version)); -+ break; -+ case MALI470_PP_PRODUCT_ID: -+ global_product_id = _MALI_PRODUCT_ID_MALI470; -+ MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-470 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version)); -+ break; -+ default: -+ MALI_DEBUG_PRINT(2, ("Found unknown Mali GPU (r%up%u)\n", global_gpu_major_version, global_gpu_minor_version)); -+ return _MALI_OSK_ERR_FAULT; -+ } ++ mali_executor_deactivate_list_idle(MALI_TRUE); + -+ return _MALI_OSK_ERR_OK; -+ } else { -+ MALI_PRINT_ERROR(("Failed to create initial PP object\n")); ++ /* ++ * The following steps are used to deactive all of activated ++ * (MALI_GROUP_STATE_ACTIVE) and activating (MALI_GROUP ++ * _STAET_ACTIVATION_PENDING) groups, to make sure the variable ++ * pd_mask_wanted is equal with 0. */ ++ if (MALI_GROUP_STATE_INACTIVE != mali_group_get_state(gp_group)) { ++ gp_group_state = EXEC_STATE_INACTIVE; ++ mali_group_deactivate(gp_group); ++ } ++ ++ if (mali_executor_has_virtual_group()) { ++ if (MALI_GROUP_STATE_INACTIVE ++ != mali_group_get_state(virtual_group)) { ++ virtual_group_state = EXEC_STATE_INACTIVE; ++ mali_group_deactivate(virtual_group); ++ } ++ } ++ ++ if (0 < group_list_inactive_count) { ++ struct mali_group *group; ++ struct mali_group *temp; ++ ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, ++ &group_list_inactive, ++ struct mali_group, executor_list) { ++ if (MALI_GROUP_STATE_ACTIVATION_PENDING ++ == mali_group_get_state(group)) { ++ mali_group_deactivate(group); ++ } ++ ++ /* ++ * On mali-450 platform, we may have physical group in the group inactive ++ * list, and its state is MALI_GROUP_STATE_ACTIVATION_PENDING, so we only ++ * deactivate it is not enough, we still also need add it back to virtual group. ++ * And now, virtual group must be in INACTIVE state, so it's safe to add ++ * physical group to virtual group at this point. ++ */ ++ if (NULL != virtual_group) { ++ _mali_osk_list_delinit(&group->executor_list); ++ group_list_inactive_count--; ++ ++ mali_group_add_group(virtual_group, group); + } -+ } else { -+ MALI_PRINT_ERROR(("Failed to create initial group object\n")); + } -+ } else { -+ MALI_PRINT_ERROR(("First PP core not specified in config file\n")); + } + -+ return _MALI_OSK_ERR_FAULT; ++ mali_executor_unlock(); +} + -+static void mali_delete_groups(void) ++void mali_executor_resume(void) +{ -+ struct mali_group *group; ++ mali_executor_lock(); + -+ group = mali_group_get_glob_group(0); -+ while (NULL != group) { -+ mali_group_delete(group); -+ group = mali_group_get_glob_group(0); ++ /* Decrement pause_count to allow scheduling again (if it reaches 0) */ ++ pause_count--; ++ if (0 == pause_count) { ++ mali_executor_schedule(); + } + -+ MALI_DEBUG_ASSERT(0 == mali_group_get_glob_num_groups()); ++ mali_executor_unlock(); +} + -+static void mali_delete_l2_cache_cores(void) ++u32 mali_executor_get_num_cores_total(void) +{ -+ struct mali_l2_cache_core *l2; ++ return num_physical_pp_cores_total; ++} + -+ l2 = mali_l2_cache_core_get_glob_l2_core(0); -+ while (NULL != l2) { -+ mali_l2_cache_delete(l2); -+ l2 = mali_l2_cache_core_get_glob_l2_core(0); -+ } ++u32 mali_executor_get_num_cores_enabled(void) ++{ ++ return num_physical_pp_cores_enabled; ++} + -+ MALI_DEBUG_ASSERT(0 == mali_l2_cache_core_get_glob_num_l2_cores()); ++struct mali_pp_core *mali_executor_get_virtual_pp(void) ++{ ++ MALI_DEBUG_ASSERT_POINTER(virtual_group); ++ MALI_DEBUG_ASSERT_POINTER(virtual_group->pp_core); ++ return virtual_group->pp_core; +} + -+static struct mali_l2_cache_core *mali_create_l2_cache_core(_mali_osk_resource_t *resource, u32 domain_index) ++struct mali_group *mali_executor_get_virtual_group(void) +{ -+ struct mali_l2_cache_core *l2_cache = NULL; ++ return virtual_group; ++} + -+ if (NULL != resource) { ++void mali_executor_zap_all_active(struct mali_session_data *session) ++{ ++ struct mali_group *group; ++ struct mali_group *temp; ++ mali_bool ret; + -+ MALI_DEBUG_PRINT(3, ("Found L2 cache %s\n", resource->description)); ++ mali_executor_lock(); + -+ l2_cache = mali_l2_cache_create(resource, domain_index); -+ if (NULL == l2_cache) { -+ MALI_PRINT_ERROR(("Failed to create L2 cache object\n")); -+ return NULL; -+ } -+ } -+ MALI_DEBUG_PRINT(3, ("Created L2 cache core object\n")); ++ /* ++ * This function is a bit complicated because ++ * mali_group_zap_session() can fail. This only happens because the ++ * group is in an unhandled page fault status. ++ * We need to make sure this page fault is handled before we return, ++ * so that we know every single outstanding MMU transactions have ++ * completed. This will allow caller to safely remove physical pages ++ * when we have returned. ++ */ + -+ return l2_cache; -+} ++ MALI_DEBUG_ASSERT(NULL != gp_group); ++ ret = mali_group_zap_session(gp_group, session); ++ if (MALI_FALSE == ret) { ++ struct mali_gp_job *gp_job = NULL; + -+static _mali_osk_errcode_t mali_parse_config_l2_cache(void) -+{ -+ struct mali_l2_cache_core *l2_cache = NULL; ++ mali_executor_complete_group(gp_group, MALI_FALSE, &gp_job, NULL); + -+ if (mali_is_mali400()) { -+ _mali_osk_resource_t l2_resource; -+ if (_MALI_OSK_ERR_OK != _mali_osk_resource_find(MALI400_OFFSET_L2_CACHE0, &l2_resource)) { -+ MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache in config file\n")); -+ return _MALI_OSK_ERR_FAULT; -+ } ++ MALI_DEBUG_ASSERT_POINTER(gp_job); + -+ l2_cache = mali_create_l2_cache_core(&l2_resource, MALI_DOMAIN_INDEX_L20); -+ if (NULL == l2_cache) { -+ return _MALI_OSK_ERR_FAULT; -+ } -+ } else if (mali_is_mali450()) { -+ /* -+ * L2 for GP at 0x10000 -+ * L2 for PP0-3 at 0x01000 -+ * L2 for PP4-7 at 0x11000 (optional) -+ */ ++ /* GP job completed, make sure it is freed */ ++ mali_scheduler_complete_gp_job(gp_job, MALI_FALSE, ++ MALI_TRUE, MALI_TRUE); ++ } + -+ _mali_osk_resource_t l2_gp_resource; -+ _mali_osk_resource_t l2_pp_grp0_resource; -+ _mali_osk_resource_t l2_pp_grp1_resource; ++ if (mali_executor_has_virtual_group()) { ++ ret = mali_group_zap_session(virtual_group, session); ++ if (MALI_FALSE == ret) { ++ struct mali_pp_job *pp_job = NULL; + -+ /* Make cluster for GP's L2 */ -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE0, &l2_gp_resource)) { -+ MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for GP\n")); -+ l2_cache = mali_create_l2_cache_core(&l2_gp_resource, MALI_DOMAIN_INDEX_L20); -+ if (NULL == l2_cache) { -+ return _MALI_OSK_ERR_FAULT; -+ } -+ } else { -+ MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for GP in config file\n")); -+ return _MALI_OSK_ERR_FAULT; -+ } ++ mali_executor_complete_group(virtual_group, MALI_FALSE, NULL, &pp_job); + -+ /* Find corresponding l2 domain */ -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE1, &l2_pp_grp0_resource)) { -+ MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for PP group 0\n")); -+ l2_cache = mali_create_l2_cache_core(&l2_pp_grp0_resource, MALI_DOMAIN_INDEX_L21); -+ if (NULL == l2_cache) { -+ return _MALI_OSK_ERR_FAULT; ++ if (NULL != pp_job) { ++ /* PP job completed, make sure it is freed */ ++ mali_scheduler_complete_pp_job(pp_job, 0, ++ MALI_FALSE, MALI_TRUE); + } -+ } else { -+ MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for PP group 0 in config file\n")); -+ return _MALI_OSK_ERR_FAULT; + } ++ } + -+ /* Second PP core group is optional, don't fail if we don't find it */ -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE2, &l2_pp_grp1_resource)) { -+ MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for PP group 1\n")); -+ l2_cache = mali_create_l2_cache_core(&l2_pp_grp1_resource, MALI_DOMAIN_INDEX_L22); -+ if (NULL == l2_cache) { -+ return _MALI_OSK_ERR_FAULT; ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working, ++ struct mali_group, executor_list) { ++ ret = mali_group_zap_session(group, session); ++ if (MALI_FALSE == ret) { ++ ret = mali_group_zap_session(group, session); ++ if (MALI_FALSE == ret) { ++ struct mali_pp_job *pp_job = NULL; ++ ++ mali_executor_complete_group(group, MALI_FALSE, NULL, &pp_job); ++ ++ if (NULL != pp_job) { ++ /* PP job completed, free it */ ++ mali_scheduler_complete_pp_job(pp_job, ++ 0, MALI_FALSE, ++ MALI_TRUE); ++ } + } + } -+ } else if (mali_is_mali470()) { -+ _mali_osk_resource_t l2c1_resource; ++ } + -+ /* Make cluster for L2C1 */ -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI470_OFFSET_L2_CACHE1, &l2c1_resource)) { -+ MALI_DEBUG_PRINT(3, ("Creating Mali-470 L2 cache 1\n")); -+ l2_cache = mali_create_l2_cache_core(&l2c1_resource, MALI_DOMAIN_INDEX_L21); -+ if (NULL == l2_cache) { -+ return _MALI_OSK_ERR_FAULT; -+ } ++ mali_executor_unlock(); ++} ++ ++void mali_executor_schedule_from_mask(mali_scheduler_mask mask, mali_bool deferred_schedule) ++{ ++ if (MALI_SCHEDULER_MASK_EMPTY != mask) { ++ if (MALI_TRUE == deferred_schedule) { ++ _mali_osk_wq_schedule_work_high_pri(executor_wq_high_pri); + } else { -+ MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for L2C1\n")); -+ return _MALI_OSK_ERR_FAULT; ++ /* Schedule from this thread*/ ++ mali_executor_lock(); ++ mali_executor_schedule(); ++ mali_executor_unlock(); + } + } -+ -+ return _MALI_OSK_ERR_OK; +} + -+static struct mali_group *mali_create_group(struct mali_l2_cache_core *cache, -+ _mali_osk_resource_t *resource_mmu, -+ _mali_osk_resource_t *resource_gp, -+ _mali_osk_resource_t *resource_pp, -+ u32 domain_index) ++_mali_osk_errcode_t mali_executor_interrupt_gp(struct mali_group *group, ++ mali_bool in_upper_half) +{ -+ struct mali_mmu_core *mmu; -+ struct mali_group *group; ++ enum mali_interrupt_result int_result; ++ mali_bool time_out = MALI_FALSE; + -+ MALI_DEBUG_PRINT(3, ("Starting new group for MMU %s\n", resource_mmu->description)); ++ MALI_DEBUG_PRINT(4, ("Executor: GP interrupt from %s in %s half\n", ++ mali_group_core_description(group), ++ in_upper_half ? "upper" : "bottom")); + -+ /* Create the group object */ -+ group = mali_group_create(cache, NULL, NULL, domain_index); -+ if (NULL == group) { -+ MALI_PRINT_ERROR(("Failed to create group object for MMU %s\n", resource_mmu->description)); -+ return NULL; ++ mali_executor_lock(); ++ if (!mali_group_is_working(group)) { ++ /* Not working, so nothing to do */ ++ mali_executor_unlock(); ++ return _MALI_OSK_ERR_FAULT; + } + -+ /* Create the MMU object inside group */ -+ mmu = mali_mmu_create(resource_mmu, group, MALI_FALSE); -+ if (NULL == mmu) { -+ MALI_PRINT_ERROR(("Failed to create MMU object\n")); -+ mali_group_delete(group); -+ return NULL; -+ } ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ MALI_DEBUG_ASSERT(mali_group_is_working(group)); + -+ if (NULL != resource_gp) { -+ /* Create the GP core object inside this group */ -+ struct mali_gp_core *gp_core = mali_gp_create(resource_gp, group); -+ if (NULL == gp_core) { -+ /* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */ -+ MALI_PRINT_ERROR(("Failed to create GP object\n")); -+ mali_group_delete(group); -+ return NULL; ++ if (mali_group_has_timed_out(group)) { ++ int_result = MALI_INTERRUPT_RESULT_ERROR; ++ time_out = MALI_TRUE; ++ MALI_PRINT(("Executor GP: Job %d Timeout on %s\n", ++ mali_gp_job_get_id(group->gp_running_job), ++ mali_group_core_description(group))); ++ } else { ++ int_result = mali_group_get_interrupt_result_gp(group); ++ if (MALI_INTERRUPT_RESULT_NONE == int_result) { ++ mali_executor_unlock(); ++ return _MALI_OSK_ERR_FAULT; + } + } + -+ if (NULL != resource_pp) { -+ struct mali_pp_core *pp_core; -+ -+ /* Create the PP core object inside this group */ -+ pp_core = mali_pp_create(resource_pp, group, MALI_FALSE, mali_get_bcast_id(resource_pp)); -+ if (NULL == pp_core) { -+ /* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */ -+ MALI_PRINT_ERROR(("Failed to create PP object\n")); -+ mali_group_delete(group); -+ return NULL; -+ } ++#if defined(CONFIG_MALI_SHARED_INTERRUPTS) ++ if (MALI_INTERRUPT_RESULT_NONE == int_result) { ++ /* No interrupts signalled, so nothing to do */ ++ mali_executor_unlock(); ++ return _MALI_OSK_ERR_FAULT; + } ++#else ++ MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_NONE != int_result); ++#endif + -+ return group; -+} ++ mali_group_mask_all_interrupts_gp(group); + -+static _mali_osk_errcode_t mali_create_virtual_group(_mali_osk_resource_t *resource_mmu_pp_bcast, -+ _mali_osk_resource_t *resource_pp_bcast, -+ _mali_osk_resource_t *resource_dlbu, -+ _mali_osk_resource_t *resource_bcast) -+{ -+ struct mali_mmu_core *mmu_pp_bcast_core; -+ struct mali_pp_core *pp_bcast_core; -+ struct mali_dlbu_core *dlbu_core; -+ struct mali_bcast_unit *bcast_core; -+ struct mali_group *group; ++ if (MALI_INTERRUPT_RESULT_SUCCESS_VS == int_result) { ++ if (mali_group_gp_is_active(group)) { ++ /* Only VS completed so far, while PLBU is still active */ + -+ MALI_DEBUG_PRINT(2, ("Starting new virtual group for MMU PP broadcast core %s\n", resource_mmu_pp_bcast->description)); ++ /* Enable all but the current interrupt */ ++ mali_group_enable_interrupts_gp(group, int_result); + -+ /* Create the DLBU core object */ -+ dlbu_core = mali_dlbu_create(resource_dlbu); -+ if (NULL == dlbu_core) { -+ MALI_PRINT_ERROR(("Failed to create DLBU object \n")); -+ return _MALI_OSK_ERR_FAULT; -+ } ++ mali_executor_unlock(); ++ return _MALI_OSK_ERR_OK; ++ } ++ } else if (MALI_INTERRUPT_RESULT_SUCCESS_PLBU == int_result) { ++ if (mali_group_gp_is_active(group)) { ++ /* Only PLBU completed so far, while VS is still active */ + -+ /* Create the Broadcast unit core */ -+ bcast_core = mali_bcast_unit_create(resource_bcast); -+ if (NULL == bcast_core) { -+ MALI_PRINT_ERROR(("Failed to create Broadcast unit object!\n")); -+ mali_dlbu_delete(dlbu_core); -+ return _MALI_OSK_ERR_FAULT; -+ } ++ /* Enable all but the current interrupt */ ++ mali_group_enable_interrupts_gp(group, int_result); + -+ /* Create the group object */ -+#if defined(DEBUG) -+ /* Get a physical PP group to temporarily add to broadcast unit. IRQ -+ * verification needs a physical group in the broadcast unit to test -+ * the broadcast unit interrupt line. */ -+ { -+ struct mali_group *phys_group = NULL; -+ int i; -+ for (i = 0; i < mali_group_get_glob_num_groups(); i++) { -+ phys_group = mali_group_get_glob_group(i); -+ if (NULL != mali_group_get_pp_core(phys_group)) break; ++ mali_executor_unlock(); ++ return _MALI_OSK_ERR_OK; + } -+ MALI_DEBUG_ASSERT(NULL != mali_group_get_pp_core(phys_group)); ++ } else if (MALI_INTERRUPT_RESULT_OOM == int_result) { ++ struct mali_gp_job *job = mali_group_get_running_gp_job(group); + -+ /* Add the group temporarily to the broadcast, and update the -+ * broadcast HW. Since the HW is not updated when removing the -+ * group the IRQ check will work when the virtual PP is created -+ * later. -+ * -+ * When the virtual group gets populated, the actually used -+ * groups will be added to the broadcast unit and the HW will -+ * be updated. ++ /* PLBU out of mem */ ++ MALI_DEBUG_PRINT(3, ("Executor: PLBU needs more heap memory\n")); ++ ++#if defined(CONFIG_MALI400_PROFILING) ++ /* Give group a chance to generate a SUSPEND event */ ++ mali_group_oom(group); ++#endif ++ ++ /* ++ * no need to hold interrupt raised while ++ * waiting for more memory. + */ -+ mali_bcast_add_group(bcast_core, phys_group); -+ mali_bcast_reset(bcast_core); -+ mali_bcast_remove_group(bcast_core, phys_group); -+ } -+#endif /* DEBUG */ -+ group = mali_group_create(NULL, dlbu_core, bcast_core, MALI_DOMAIN_INDEX_DUMMY); -+ if (NULL == group) { -+ MALI_PRINT_ERROR(("Failed to create group object for MMU PP broadcast core %s\n", resource_mmu_pp_bcast->description)); -+ mali_bcast_unit_delete(bcast_core); -+ mali_dlbu_delete(dlbu_core); -+ return _MALI_OSK_ERR_FAULT; -+ } ++ mali_executor_send_gp_oom_to_user(job); + -+ /* Create the MMU object inside group */ -+ mmu_pp_bcast_core = mali_mmu_create(resource_mmu_pp_bcast, group, MALI_TRUE); -+ if (NULL == mmu_pp_bcast_core) { -+ MALI_PRINT_ERROR(("Failed to create MMU PP broadcast object\n")); -+ mali_group_delete(group); -+ return _MALI_OSK_ERR_FAULT; ++ mali_executor_unlock(); ++ ++ return _MALI_OSK_ERR_OK; + } + -+ /* Create the PP core object inside this group */ -+ pp_bcast_core = mali_pp_create(resource_pp_bcast, group, MALI_TRUE, 0); -+ if (NULL == pp_bcast_core) { -+ /* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */ -+ MALI_PRINT_ERROR(("Failed to create PP object\n")); -+ mali_group_delete(group); -+ return _MALI_OSK_ERR_FAULT; ++ /*Add for voltage scan function*/ ++ if (MALI_INTERRUPT_RESULT_ERROR == int_result) ++ mali_group_error++; ++ ++ /* We should now have a real interrupt to handle */ ++ ++ MALI_DEBUG_PRINT(4, ("Executor: Group %s completed with %s\n", ++ mali_group_core_description(group), ++ (MALI_INTERRUPT_RESULT_ERROR == int_result) ? ++ "ERROR" : "success")); ++ ++ if (in_upper_half && MALI_INTERRUPT_RESULT_ERROR == int_result) { ++ /* Don't bother to do processing of errors in upper half */ ++ mali_executor_unlock(); ++ ++ if (MALI_FALSE == time_out) { ++ mali_group_schedule_bottom_half_gp(group); ++ } ++ } else { ++ struct mali_gp_job *job; ++ mali_bool success; ++ ++ /* ++ if (MALI_TRUE == time_out) { ++ mali_group_dump_status(group); ++ } ++ */ ++ ++ success = (int_result != MALI_INTERRUPT_RESULT_ERROR) ? ++ MALI_TRUE : MALI_FALSE; ++ ++ mali_executor_complete_group(group, success, &job, NULL); ++ ++ mali_executor_unlock(); ++ ++ /* GP jobs always fully complete */ ++ MALI_DEBUG_ASSERT(NULL != job); ++ ++ /* This will notify user space and close the job object */ ++ mali_scheduler_complete_gp_job(job, success, ++ MALI_TRUE, MALI_TRUE); + } + + return _MALI_OSK_ERR_OK; +} + -+static _mali_osk_errcode_t mali_parse_config_groups(void) ++_mali_osk_errcode_t mali_executor_interrupt_pp(struct mali_group *group, ++ mali_bool in_upper_half) +{ -+ struct mali_group *group; -+ int cluster_id_gp = 0; -+ int cluster_id_pp_grp0 = 0; -+ int cluster_id_pp_grp1 = 0; -+ int i; ++ enum mali_interrupt_result int_result; ++ mali_bool time_out = MALI_FALSE; + -+ _mali_osk_resource_t resource_gp; -+ _mali_osk_resource_t resource_gp_mmu; -+ _mali_osk_resource_t resource_pp[8]; -+ _mali_osk_resource_t resource_pp_mmu[8]; -+ _mali_osk_resource_t resource_pp_mmu_bcast; -+ _mali_osk_resource_t resource_pp_bcast; -+ _mali_osk_resource_t resource_dlbu; -+ _mali_osk_resource_t resource_bcast; -+ _mali_osk_errcode_t resource_gp_found; -+ _mali_osk_errcode_t resource_gp_mmu_found; -+ _mali_osk_errcode_t resource_pp_found[8]; -+ _mali_osk_errcode_t resource_pp_mmu_found[8]; -+ _mali_osk_errcode_t resource_pp_mmu_bcast_found; -+ _mali_osk_errcode_t resource_pp_bcast_found; -+ _mali_osk_errcode_t resource_dlbu_found; -+ _mali_osk_errcode_t resource_bcast_found; ++ MALI_DEBUG_PRINT(4, ("Executor: PP interrupt from %s in %s half\n", ++ mali_group_core_description(group), ++ in_upper_half ? "upper" : "bottom")); + -+ if (!(mali_is_mali400() || mali_is_mali450() || mali_is_mali470())) { -+ /* No known HW core */ ++ mali_executor_lock(); ++ ++ if (!mali_group_is_working(group)) { ++ /* Not working, so nothing to do */ ++ mali_executor_unlock(); + return _MALI_OSK_ERR_FAULT; + } + -+ if (MALI_MAX_JOB_RUNTIME_DEFAULT == mali_max_job_runtime) { -+ /* Group settings are not overridden by module parameters, so use device settings */ -+ _mali_osk_device_data data = { 0, }; ++ if (in_upper_half) { ++ if (mali_group_is_in_virtual(group)) { ++ /* Child groups should never handle PP interrupts */ ++ MALI_DEBUG_ASSERT(!mali_group_has_timed_out(group)); ++ mali_executor_unlock(); ++ return _MALI_OSK_ERR_FAULT; ++ } ++ } ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ MALI_DEBUG_ASSERT(mali_group_is_working(group)); ++ MALI_DEBUG_ASSERT(!mali_group_is_in_virtual(group)); + -+ if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) { -+ /* Use device specific settings (if defined) */ -+ if (0 != data.max_job_runtime) { -+ mali_max_job_runtime = data.max_job_runtime; -+ } ++ if (mali_group_has_timed_out(group)) { ++ int_result = MALI_INTERRUPT_RESULT_ERROR; ++ time_out = MALI_TRUE; ++ MALI_PRINT(("Executor PP: Job %d Timeout on %s\n", ++ mali_pp_job_get_id(group->pp_running_job), ++ mali_group_core_description(group))); ++ } else { ++ int_result = mali_group_get_interrupt_result_pp(group); ++ if (MALI_INTERRUPT_RESULT_NONE == int_result) { ++ mali_executor_unlock(); ++ return _MALI_OSK_ERR_FAULT; + } + } + -+ if (mali_is_mali450()) { -+ /* Mali-450 have separate L2s for GP, and PP core group(s) */ -+ cluster_id_pp_grp0 = 1; -+ cluster_id_pp_grp1 = 2; ++#if defined(CONFIG_MALI_SHARED_INTERRUPTS) ++ if (MALI_INTERRUPT_RESULT_NONE == int_result) { ++ /* No interrupts signalled, so nothing to do */ ++ mali_executor_unlock(); ++ return _MALI_OSK_ERR_FAULT; ++ } else if (MALI_INTERRUPT_RESULT_SUCCESS == int_result) { ++ if (mali_group_is_virtual(group) && mali_group_pp_is_active(group)) { ++ /* Some child groups are still working, so nothing to do right now */ ++ mali_executor_unlock(); ++ return _MALI_OSK_ERR_FAULT; ++ } + } ++#else ++ MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_NONE != int_result); ++#endif + -+ resource_gp_found = _mali_osk_resource_find(MALI_OFFSET_GP, &resource_gp); -+ resource_gp_mmu_found = _mali_osk_resource_find(MALI_OFFSET_GP_MMU, &resource_gp_mmu); -+ resource_pp_found[0] = _mali_osk_resource_find(MALI_OFFSET_PP0, &(resource_pp[0])); -+ resource_pp_found[1] = _mali_osk_resource_find(MALI_OFFSET_PP1, &(resource_pp[1])); -+ resource_pp_found[2] = _mali_osk_resource_find(MALI_OFFSET_PP2, &(resource_pp[2])); -+ resource_pp_found[3] = _mali_osk_resource_find(MALI_OFFSET_PP3, &(resource_pp[3])); -+ resource_pp_found[4] = _mali_osk_resource_find(MALI_OFFSET_PP4, &(resource_pp[4])); -+ resource_pp_found[5] = _mali_osk_resource_find(MALI_OFFSET_PP5, &(resource_pp[5])); -+ resource_pp_found[6] = _mali_osk_resource_find(MALI_OFFSET_PP6, &(resource_pp[6])); -+ resource_pp_found[7] = _mali_osk_resource_find(MALI_OFFSET_PP7, &(resource_pp[7])); -+ resource_pp_mmu_found[0] = _mali_osk_resource_find(MALI_OFFSET_PP0_MMU, &(resource_pp_mmu[0])); -+ resource_pp_mmu_found[1] = _mali_osk_resource_find(MALI_OFFSET_PP1_MMU, &(resource_pp_mmu[1])); -+ resource_pp_mmu_found[2] = _mali_osk_resource_find(MALI_OFFSET_PP2_MMU, &(resource_pp_mmu[2])); -+ resource_pp_mmu_found[3] = _mali_osk_resource_find(MALI_OFFSET_PP3_MMU, &(resource_pp_mmu[3])); -+ resource_pp_mmu_found[4] = _mali_osk_resource_find(MALI_OFFSET_PP4_MMU, &(resource_pp_mmu[4])); -+ resource_pp_mmu_found[5] = _mali_osk_resource_find(MALI_OFFSET_PP5_MMU, &(resource_pp_mmu[5])); -+ resource_pp_mmu_found[6] = _mali_osk_resource_find(MALI_OFFSET_PP6_MMU, &(resource_pp_mmu[6])); -+ resource_pp_mmu_found[7] = _mali_osk_resource_find(MALI_OFFSET_PP7_MMU, &(resource_pp_mmu[7])); ++ /*Add voltage scan function*/ + ++ if (MALI_INTERRUPT_RESULT_ERROR == int_result) ++ mali_group_error++; + -+ if (mali_is_mali450() || mali_is_mali470()) { -+ resource_bcast_found = _mali_osk_resource_find(MALI_OFFSET_BCAST, &resource_bcast); -+ resource_dlbu_found = _mali_osk_resource_find(MALI_OFFSET_DLBU, &resource_dlbu); -+ resource_pp_mmu_bcast_found = _mali_osk_resource_find(MALI_OFFSET_PP_BCAST_MMU, &resource_pp_mmu_bcast); -+ resource_pp_bcast_found = _mali_osk_resource_find(MALI_OFFSET_PP_BCAST, &resource_pp_bcast); ++ /* We should now have a real interrupt to handle */ + -+ if (_MALI_OSK_ERR_OK != resource_bcast_found || -+ _MALI_OSK_ERR_OK != resource_dlbu_found || -+ _MALI_OSK_ERR_OK != resource_pp_mmu_bcast_found || -+ _MALI_OSK_ERR_OK != resource_pp_bcast_found) { -+ /* Missing mandatory core(s) for Mali-450 or Mali-470 */ -+ MALI_DEBUG_PRINT(2, ("Missing mandatory resources, Mali-450 needs DLBU, Broadcast unit, virtual PP core and virtual MMU\n")); -+ return _MALI_OSK_ERR_FAULT; ++ MALI_DEBUG_PRINT(4, ("Executor: Group %s completed with %s\n", ++ mali_group_core_description(group), ++ (MALI_INTERRUPT_RESULT_ERROR == int_result) ? ++ "ERROR" : "success")); ++ ++ if (in_upper_half && MALI_INTERRUPT_RESULT_ERROR == int_result) { ++ /* Don't bother to do processing of errors in upper half */ ++ mali_group_mask_all_interrupts_pp(group); ++ mali_executor_unlock(); ++ ++ if (MALI_FALSE == time_out) { ++ mali_group_schedule_bottom_half_pp(group); ++ } ++ } else { ++ struct mali_pp_job *job = NULL; ++ mali_bool success; ++ ++ if (MALI_TRUE == time_out) { ++ mali_group_dump_status(group); ++ } ++ ++ success = (int_result == MALI_INTERRUPT_RESULT_SUCCESS) ? ++ MALI_TRUE : MALI_FALSE; ++ ++ mali_executor_complete_group(group, success, NULL, &job); ++ ++ mali_executor_unlock(); ++ ++ if (NULL != job) { ++ /* Notify user space and close the job object */ ++ mali_scheduler_complete_pp_job(job, ++ num_physical_pp_cores_total, ++ MALI_TRUE, MALI_TRUE); + } + } + -+ if (_MALI_OSK_ERR_OK != resource_gp_found || -+ _MALI_OSK_ERR_OK != resource_gp_mmu_found || -+ _MALI_OSK_ERR_OK != resource_pp_found[0] || -+ _MALI_OSK_ERR_OK != resource_pp_mmu_found[0]) { -+ /* Missing mandatory core(s) */ -+ MALI_DEBUG_PRINT(2, ("Missing mandatory resource, need at least one GP and one PP, both with a separate MMU\n")); ++ return _MALI_OSK_ERR_OK; ++} ++ ++_mali_osk_errcode_t mali_executor_interrupt_mmu(struct mali_group *group, ++ mali_bool in_upper_half) ++{ ++ enum mali_interrupt_result int_result; ++ ++ MALI_DEBUG_PRINT(4, ("Executor: MMU interrupt from %s in %s half\n", ++ mali_group_core_description(group), ++ in_upper_half ? "upper" : "bottom")); ++ ++ mali_executor_lock(); ++ if (!mali_group_is_working(group)) { ++ /* Not working, so nothing to do */ ++ mali_executor_unlock(); + return _MALI_OSK_ERR_FAULT; + } + -+ MALI_DEBUG_ASSERT(1 <= mali_l2_cache_core_get_glob_num_l2_cores()); -+ group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_gp), &resource_gp_mmu, &resource_gp, NULL, MALI_DOMAIN_INDEX_GP); -+ if (NULL == group) { ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ MALI_DEBUG_ASSERT(mali_group_is_working(group)); ++ ++ int_result = mali_group_get_interrupt_result_mmu(group); ++ if (MALI_INTERRUPT_RESULT_NONE == int_result) { ++ mali_executor_unlock(); + return _MALI_OSK_ERR_FAULT; + } + -+ /* Create group for first (and mandatory) PP core */ -+ MALI_DEBUG_ASSERT(mali_l2_cache_core_get_glob_num_l2_cores() >= (cluster_id_pp_grp0 + 1)); /* >= 1 on Mali-300 and Mali-400, >= 2 on Mali-450 */ -+ group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp0), &resource_pp_mmu[0], NULL, &resource_pp[0], MALI_DOMAIN_INDEX_PP0); -+ if (NULL == group) { ++#if defined(CONFIG_MALI_SHARED_INTERRUPTS) ++ if (MALI_INTERRUPT_RESULT_NONE == int_result) { ++ /* No interrupts signalled, so nothing to do */ ++ mali_executor_unlock(); + return _MALI_OSK_ERR_FAULT; + } ++#else ++ MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_ERROR == int_result); ++#endif + -+ mali_inited_pp_cores_group_1++; ++ /* We should now have a real interrupt to handle */ + -+ /* Create groups for rest of the cores in the first PP core group */ -+ for (i = 1; i < 4; i++) { /* First half of the PP cores belong to first core group */ -+ if (mali_inited_pp_cores_group_1 < mali_max_pp_cores_group_1) { -+ if (_MALI_OSK_ERR_OK == resource_pp_found[i] && _MALI_OSK_ERR_OK == resource_pp_mmu_found[i]) { -+ group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp0), &resource_pp_mmu[i], NULL, &resource_pp[i], MALI_DOMAIN_INDEX_PP0 + i); -+ if (NULL == group) { -+ return _MALI_OSK_ERR_FAULT; -+ } ++ if (in_upper_half) { ++ /* Don't bother to do processing of errors in upper half */ + -+ mali_inited_pp_cores_group_1++; -+ } -+ } -+ } ++ struct mali_group *parent = group->parent_group; + -+ /* Create groups for cores in the second PP core group */ -+ for (i = 4; i < 8; i++) { /* Second half of the PP cores belong to second core group */ -+ if (mali_inited_pp_cores_group_2 < mali_max_pp_cores_group_2) { -+ if (_MALI_OSK_ERR_OK == resource_pp_found[i] && _MALI_OSK_ERR_OK == resource_pp_mmu_found[i]) { -+ MALI_DEBUG_ASSERT(mali_l2_cache_core_get_glob_num_l2_cores() >= 2); /* Only Mali-450 have a second core group */ -+ group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp1), &resource_pp_mmu[i], NULL, &resource_pp[i], MALI_DOMAIN_INDEX_PP0 + i); -+ if (NULL == group) { -+ return _MALI_OSK_ERR_FAULT; -+ } ++ mali_mmu_mask_all_interrupts(group->mmu); + -+ mali_inited_pp_cores_group_2++; -+ } -+ } -+ } ++ mali_executor_unlock(); + -+ if (mali_is_mali450() || mali_is_mali470()) { -+ _mali_osk_errcode_t err = mali_create_virtual_group(&resource_pp_mmu_bcast, &resource_pp_bcast, &resource_dlbu, &resource_bcast); -+ if (_MALI_OSK_ERR_OK != err) { -+ return err; ++ if (NULL == parent) { ++ mali_group_schedule_bottom_half_mmu(group); ++ } else { ++ mali_group_schedule_bottom_half_mmu(parent); + } -+ } + -+ mali_max_pp_cores_group_1 = mali_inited_pp_cores_group_1; -+ mali_max_pp_cores_group_2 = mali_inited_pp_cores_group_2; -+ MALI_DEBUG_PRINT(2, ("%d+%d PP cores initialized\n", mali_inited_pp_cores_group_1, mali_inited_pp_cores_group_2)); ++ } else { ++ struct mali_gp_job *gp_job = NULL; ++ struct mali_pp_job *pp_job = NULL; + -+ return _MALI_OSK_ERR_OK; -+} ++#ifdef DEBUG + -+static _mali_osk_errcode_t mali_check_shared_interrupts(void) -+{ -+#if !defined(CONFIG_MALI_SHARED_INTERRUPTS) -+ if (MALI_TRUE == _mali_osk_shared_interrupts()) { -+ MALI_PRINT_ERROR(("Shared interrupts detected, but driver support is not enabled\n")); -+ return _MALI_OSK_ERR_FAULT; -+ } -+#endif /* !defined(CONFIG_MALI_SHARED_INTERRUPTS) */ ++ u32 fault_address = mali_mmu_get_page_fault_addr(group->mmu); ++ u32 status = mali_mmu_get_status(group->mmu); ++ MALI_DEBUG_PRINT(2, ("Executor: Mali page fault detected at 0x%x from bus id %d of type %s on %s\n", ++ (void *)(uintptr_t)fault_address, ++ (status >> 6) & 0x1F, ++ (status & 32) ? "write" : "read", ++ group->mmu->hw_core.description)); ++ MALI_DEBUG_PRINT(3, ("Executor: MMU rawstat = 0x%08X, MMU status = 0x%08X\n", ++ mali_mmu_get_rawstat(group->mmu), status)); ++ mali_mmu_pagedir_diag(mali_session_get_page_directory(group->session), fault_address); ++#endif + -+ /* It is OK to compile support for shared interrupts even if Mali is not using it. */ -+ return _MALI_OSK_ERR_OK; -+} ++ mali_executor_complete_group(group, MALI_FALSE, &gp_job, &pp_job); + -+static _mali_osk_errcode_t mali_parse_config_pmu(void) -+{ -+ _mali_osk_resource_t resource_pmu; ++ mali_executor_unlock(); + -+ MALI_DEBUG_ASSERT(0 != global_gpu_base_address); ++ if (NULL != gp_job) { ++ MALI_DEBUG_ASSERT(NULL == pp_job); + -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_PMU, &resource_pmu)) { -+ struct mali_pmu_core *pmu; ++ /* Notify user space and close the job object */ ++ mali_scheduler_complete_gp_job(gp_job, MALI_FALSE, ++ MALI_TRUE, MALI_TRUE); ++ } else if (NULL != pp_job) { ++ MALI_DEBUG_ASSERT(NULL == gp_job); + -+ pmu = mali_pmu_create(&resource_pmu); -+ if (NULL == pmu) { -+ MALI_PRINT_ERROR(("Failed to create PMU\n")); -+ return _MALI_OSK_ERR_FAULT; ++ /* Notify user space and close the job object */ ++ mali_scheduler_complete_pp_job(pp_job, ++ num_physical_pp_cores_total, ++ MALI_TRUE, MALI_TRUE); + } + } + -+ /* It's ok if the PMU doesn't exist */ + return _MALI_OSK_ERR_OK; +} + -+static _mali_osk_errcode_t mali_parse_config_memory(void) ++void mali_executor_group_power_up(struct mali_group *groups[], u32 num_groups) +{ -+ _mali_osk_device_data data = { 0, }; -+ _mali_osk_errcode_t ret; ++ u32 i; ++ mali_bool child_groups_activated = MALI_FALSE; ++ mali_bool do_schedule = MALI_FALSE; ++#if defined(DEBUG) ++ u32 num_activated = 0; ++#endif + -+ /* The priority of setting the value of mali_shared_mem_size, -+ * mali_dedicated_mem_start and mali_dedicated_mem_size: -+ * 1. module parameter; -+ * 2. platform data; -+ * 3. default value; -+ **/ -+ if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) { -+ /* Memory settings are not overridden by module parameters, so use device settings */ -+ if (0 == mali_dedicated_mem_start && 0 == mali_dedicated_mem_size) { -+ /* Use device specific settings (if defined) */ -+ mali_dedicated_mem_start = data.dedicated_mem_start; -+ mali_dedicated_mem_size = data.dedicated_mem_size; -+ } ++ MALI_DEBUG_ASSERT_POINTER(groups); ++ MALI_DEBUG_ASSERT(0 < num_groups); + -+ if (MALI_SHARED_MEMORY_DEFAULT_SIZE == mali_shared_mem_size && -+ 0 != data.shared_mem_size) { -+ mali_shared_mem_size = data.shared_mem_size; ++ mali_executor_lock(); ++ ++ MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups\n", num_groups)); ++ ++ for (i = 0; i < num_groups; i++) { ++ MALI_DEBUG_PRINT(3, ("Executor: powering up group %s\n", ++ mali_group_core_description(groups[i]))); ++ ++ mali_group_power_up(groups[i]); ++ ++ if ((MALI_GROUP_STATE_ACTIVATION_PENDING != mali_group_get_state(groups[i]) || ++ (MALI_TRUE != mali_executor_group_is_in_state(groups[i], EXEC_STATE_INACTIVE)))) { ++ /* nothing more to do for this group */ ++ continue; + } -+ } + -+ if (0 < mali_dedicated_mem_size && 0 != mali_dedicated_mem_start) { -+ MALI_DEBUG_PRINT(2, ("Mali memory settings (dedicated: 0x%08X@0x%08X)\n", -+ mali_dedicated_mem_size, mali_dedicated_mem_start)); ++ MALI_DEBUG_PRINT(3, ("Executor: activating group %s\n", ++ mali_group_core_description(groups[i]))); + -+ /* Dedicated memory */ -+ ret = mali_memory_core_resource_dedicated_memory(mali_dedicated_mem_start, mali_dedicated_mem_size); -+ if (_MALI_OSK_ERR_OK != ret) { -+ MALI_PRINT_ERROR(("Failed to register dedicated memory\n")); -+ mali_memory_terminate(); -+ return ret; ++#if defined(DEBUG) ++ num_activated++; ++#endif ++ ++ if (mali_group_is_in_virtual(groups[i])) { ++ /* ++ * At least one child group of virtual group is powered on. ++ */ ++ child_groups_activated = MALI_TRUE; ++ } else if (MALI_FALSE == mali_group_is_virtual(groups[i])) { ++ /* Set gp and pp not in virtual to active. */ ++ mali_group_set_active(groups[i]); + } -+ } + -+ if (0 < mali_shared_mem_size) { -+ MALI_DEBUG_PRINT(2, ("Mali memory settings (shared: 0x%08X)\n", mali_shared_mem_size)); ++ /* Move group from inactive to idle list */ ++ if (groups[i] == gp_group) { ++ MALI_DEBUG_ASSERT(EXEC_STATE_INACTIVE == ++ gp_group_state); ++ gp_group_state = EXEC_STATE_IDLE; ++ } else if (MALI_FALSE == mali_group_is_in_virtual(groups[i]) ++ && MALI_FALSE == mali_group_is_virtual(groups[i])) { ++ MALI_DEBUG_ASSERT(MALI_TRUE == mali_executor_group_is_in_state(groups[i], ++ EXEC_STATE_INACTIVE)); + -+ /* Shared OS memory */ -+ ret = mali_memory_core_resource_os_memory(mali_shared_mem_size); -+ if (_MALI_OSK_ERR_OK != ret) { -+ MALI_PRINT_ERROR(("Failed to register shared OS memory\n")); -+ mali_memory_terminate(); -+ return ret; ++ mali_executor_change_state_pp_physical(groups[i], ++ &group_list_inactive, ++ &group_list_inactive_count, ++ &group_list_idle, ++ &group_list_idle_count); + } ++ ++ do_schedule = MALI_TRUE; + } + -+ if (0 == mali_fb_start && 0 == mali_fb_size) { -+ /* Frame buffer settings are not overridden by module parameters, so use device settings */ -+ _mali_osk_device_data data = { 0, }; ++ if (mali_executor_has_virtual_group() && ++ MALI_TRUE == child_groups_activated && ++ MALI_GROUP_STATE_ACTIVATION_PENDING == ++ mali_group_get_state(virtual_group)) { ++ /* ++ * Try to active virtual group while it may be not sucessful every time, ++ * because there is one situation that not all of child groups are powered on ++ * in one time and virtual group is in activation pending state. ++ */ ++ if (mali_group_set_active(virtual_group)) { ++ /* Move group from inactive to idle */ ++ MALI_DEBUG_ASSERT(EXEC_STATE_INACTIVE == ++ virtual_group_state); ++ virtual_group_state = EXEC_STATE_IDLE; + -+ if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) { -+ /* Use device specific settings (if defined) */ -+ mali_fb_start = data.fb_start; -+ mali_fb_size = data.fb_size; ++ MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u physical activated, 1 virtual activated.\n", num_groups, num_activated)); ++ } else { ++ MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u physical activated\n", num_groups, num_activated)); + } -+ -+ MALI_DEBUG_PRINT(2, ("Using device defined frame buffer settings (0x%08X@0x%08X)\n", -+ mali_fb_size, mali_fb_start)); + } else { -+ MALI_DEBUG_PRINT(2, ("Using module defined frame buffer settings (0x%08X@0x%08X)\n", -+ mali_fb_size, mali_fb_start)); ++ MALI_DEBUG_PRINT(3, ("Executor: powering up %u groups completed, %u physical activated\n", num_groups, num_activated)); + } + -+ if (0 != mali_fb_size) { -+ /* Register frame buffer */ -+ ret = mali_mem_validation_add_range(mali_fb_start, mali_fb_size); -+ if (_MALI_OSK_ERR_OK != ret) { -+ MALI_PRINT_ERROR(("Failed to register frame buffer memory region\n")); -+ mali_memory_terminate(); -+ return ret; -+ } ++ if (MALI_TRUE == do_schedule) { ++ /* Trigger a schedule */ ++ mali_executor_schedule(); + } + -+ return _MALI_OSK_ERR_OK; ++ mali_executor_unlock(); +} + -+static void mali_detect_gpu_class(void) ++void mali_executor_group_power_down(struct mali_group *groups[], ++ u32 num_groups) +{ -+ if (_mali_osk_identify_gpu_resource() == 0x450) -+ mali_gpu_class_is_mali450 = MALI_TRUE; ++ u32 i; + -+ if (_mali_osk_identify_gpu_resource() == 0x470) -+ mali_gpu_class_is_mali470 = MALI_TRUE; -+} ++ MALI_DEBUG_ASSERT_POINTER(groups); ++ MALI_DEBUG_ASSERT(0 < num_groups); + -+static _mali_osk_errcode_t mali_init_hw_reset(void) -+{ -+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) -+ _mali_osk_resource_t resource_bcast; ++ mali_executor_lock(); + -+ /* Ensure broadcast unit is in a good state before we start creating -+ * groups and cores. -+ */ -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_BCAST, &resource_bcast)) { -+ struct mali_bcast_unit *bcast_core; ++ MALI_DEBUG_PRINT(3, ("Executor: powering down %u groups\n", num_groups)); + -+ bcast_core = mali_bcast_unit_create(&resource_bcast); -+ if (NULL == bcast_core) { -+ MALI_PRINT_ERROR(("Failed to create Broadcast unit object!\n")); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ mali_bcast_unit_delete(bcast_core); ++ for (i = 0; i < num_groups; i++) { ++ /* Groups must be either disabled or inactive. while for virtual group, ++ * it maybe in empty state, because when we meet pm_runtime_suspend, ++ * virtual group could be powered off, and before we acquire mali_executor_lock, ++ * we must release mali_pm_state_lock, if there is a new physical job was queued, ++ * all of physical groups in virtual group could be pulled out, so we only can ++ * powered down an empty virtual group. Those physical groups will be powered ++ * up in following pm_runtime_resume callback function. ++ */ ++ MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(groups[i], ++ EXEC_STATE_DISABLED) || ++ mali_executor_group_is_in_state(groups[i], ++ EXEC_STATE_INACTIVE) || ++ mali_executor_group_is_in_state(groups[i], ++ EXEC_STATE_EMPTY)); ++ ++ MALI_DEBUG_PRINT(3, ("Executor: powering down group %s\n", ++ mali_group_core_description(groups[i]))); ++ ++ mali_group_power_down(groups[i]); + } -+#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */ + -+ return _MALI_OSK_ERR_OK; ++ MALI_DEBUG_PRINT(3, ("Executor: powering down %u groups completed\n", num_groups)); ++ ++ mali_executor_unlock(); +} + -+_mali_osk_errcode_t mali_initialize_subsystems(void) ++void mali_executor_abort_session(struct mali_session_data *session) +{ -+ _mali_osk_errcode_t err; ++ struct mali_group *group; ++ struct mali_group *tmp_group; + -+#ifdef CONFIG_MALI_DT -+ err = _mali_osk_resource_initialize(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_terminate_subsystems(); -+ return err; -+ } -+#endif ++ MALI_DEBUG_ASSERT_POINTER(session); ++ MALI_DEBUG_ASSERT(session->is_aborting); + -+ mali_pp_job_initialize(); ++ MALI_DEBUG_PRINT(3, ++ ("Executor: Aborting all jobs from session 0x%08X.\n", ++ session)); + -+ err = mali_timeline_initialize(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_terminate_subsystems(); -+ return err; -+ } -+ -+ err = mali_session_initialize(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_terminate_subsystems(); -+ return err; -+ } ++ mali_executor_lock(); + -+ /*Try to init gpu secure mode */ -+ _mali_osk_gpu_secure_mode_init(); ++ if (mali_group_get_session(gp_group) == session) { ++ if (EXEC_STATE_WORKING == gp_group_state) { ++ struct mali_gp_job *gp_job = NULL; + -+#if defined(CONFIG_MALI400_PROFILING) -+ err = _mali_osk_profiling_init(mali_boot_profiling ? MALI_TRUE : MALI_FALSE); -+ if (_MALI_OSK_ERR_OK != err) { -+ /* No biggie if we weren't able to initialize the profiling */ -+ MALI_PRINT_ERROR(("Failed to initialize profiling, feature will be unavailable\n")); ++ mali_executor_complete_group(gp_group, MALI_FALSE, &gp_job, NULL); ++ ++ MALI_DEBUG_ASSERT_POINTER(gp_job); ++ ++ /* GP job completed, make sure it is freed */ ++ mali_scheduler_complete_gp_job(gp_job, MALI_FALSE, ++ MALI_FALSE, MALI_TRUE); ++ } else { ++ /* Same session, but not working, so just clear it */ ++ mali_group_clear_session(gp_group); ++ } + } -+#endif + -+ err = mali_memory_initialize(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_terminate_subsystems(); -+ return err; ++ if (mali_executor_has_virtual_group()) { ++ if (EXEC_STATE_WORKING == virtual_group_state ++ && mali_group_get_session(virtual_group) == session) { ++ struct mali_pp_job *pp_job = NULL; ++ ++ mali_executor_complete_group(virtual_group, MALI_FALSE, NULL, &pp_job); ++ ++ if (NULL != pp_job) { ++ /* PP job completed, make sure it is freed */ ++ mali_scheduler_complete_pp_job(pp_job, 0, ++ MALI_FALSE, MALI_TRUE); ++ } ++ } + } + -+ err = mali_executor_initialize(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_terminate_subsystems(); -+ return err; ++ _MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_working, ++ struct mali_group, executor_list) { ++ if (mali_group_get_session(group) == session) { ++ struct mali_pp_job *pp_job = NULL; ++ ++ mali_executor_complete_group(group, MALI_FALSE, NULL, &pp_job); ++ ++ if (NULL != pp_job) { ++ /* PP job completed, make sure it is freed */ ++ mali_scheduler_complete_pp_job(pp_job, 0, ++ MALI_FALSE, MALI_TRUE); ++ } ++ } + } + -+ err = mali_scheduler_initialize(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_terminate_subsystems(); -+ return err; ++ _MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_idle, struct mali_group, executor_list) { ++ mali_group_clear_session(group); + } + -+ /* Configure memory early, needed by mali_mmu_initialize. */ -+ err = mali_parse_config_memory(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_terminate_subsystems(); -+ return err; ++ _MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_inactive, struct mali_group, executor_list) { ++ mali_group_clear_session(group); + } + -+ err = mali_set_global_gpu_base_address(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_terminate_subsystems(); -+ return err; ++ _MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_disabled, struct mali_group, executor_list) { ++ mali_group_clear_session(group); + } + -+ /* Detect GPU class (uses L2 cache count) */ -+ mali_detect_gpu_class(); ++ mali_executor_unlock(); ++} + -+ err = mali_check_shared_interrupts(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_terminate_subsystems(); -+ return err; -+ } + -+ /* Initialize the MALI PMU (will not touch HW!) */ -+ err = mali_parse_config_pmu(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_terminate_subsystems(); -+ return err; -+ } ++void mali_executor_core_scaling_enable(void) ++{ ++ /* PS: Core scaling is by default enabled */ ++ core_scaling_enabled = MALI_TRUE; ++} + -+ /* Initialize the power management module */ -+ err = mali_pm_initialize(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_terminate_subsystems(); -+ return err; -+ } ++void mali_executor_core_scaling_disable(void) ++{ ++ core_scaling_enabled = MALI_FALSE; ++} + -+ /* Make sure the entire GPU stays on for the rest of this function */ -+ mali_pm_init_begin(); ++mali_bool mali_executor_core_scaling_is_enabled(void) ++{ ++ return core_scaling_enabled; ++} + -+ /* Ensure HW is in a good state before starting to access cores. */ -+ err = mali_init_hw_reset(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_terminate_subsystems(); -+ return err; -+ } ++void mali_executor_group_enable(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); + -+ /* Detect which Mali GPU we are dealing with */ -+ err = mali_parse_product_info(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_pm_init_end(); -+ mali_terminate_subsystems(); -+ return err; ++ mali_executor_lock(); ++ ++ if ((NULL != mali_group_get_gp_core(group) || NULL != mali_group_get_pp_core(group)) ++ && (mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))) { ++ mali_executor_group_enable_internal(group); + } + -+ /* The global_product_id is now populated with the correct Mali GPU */ ++ mali_executor_schedule(); ++ mali_executor_unlock(); + -+ /* Start configuring the actual Mali hardware. */ ++ _mali_osk_wq_schedule_work(executor_wq_notify_core_change); ++} + -+ err = mali_mmu_initialize(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_pm_init_end(); -+ mali_terminate_subsystems(); -+ return err; -+ } ++/* ++ * If a physical group is inactive or idle, we should disable it immediately, ++ * if group is in virtual, and virtual group is idle, disable given physical group in it. ++ */ ++void mali_executor_group_disable(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); + -+ if (mali_is_mali450() || mali_is_mali470()) { -+ err = mali_dlbu_initialize(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_pm_init_end(); -+ mali_terminate_subsystems(); -+ return err; -+ } -+ } ++ mali_executor_lock(); + -+ err = mali_parse_config_l2_cache(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_pm_init_end(); -+ mali_terminate_subsystems(); -+ return err; ++ if ((NULL != mali_group_get_gp_core(group) || NULL != mali_group_get_pp_core(group)) ++ && (!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED))) { ++ mali_executor_group_disable_internal(group); + } + -+ err = mali_parse_config_groups(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_pm_init_end(); -+ mali_terminate_subsystems(); -+ return err; -+ } ++ mali_executor_schedule(); ++ mali_executor_unlock(); + -+ /* Move groups into executor */ -+ mali_executor_populate(); ++ _mali_osk_wq_schedule_work(executor_wq_notify_core_change); ++} + -+ /* Need call after all group has assigned a domain */ -+ mali_pm_power_cost_setup(); ++mali_bool mali_executor_group_is_disabled(struct mali_group *group) ++{ ++ /* NB: This function is not optimized for time critical usage */ + -+ /* Initialize the GPU timer */ -+ err = mali_control_timer_init(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_pm_init_end(); -+ mali_terminate_subsystems(); -+ return err; -+ } ++ mali_bool ret; + -+ /* Initialize the GPU utilization tracking */ -+ err = mali_utilization_init(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_pm_init_end(); -+ mali_terminate_subsystems(); -+ return err; -+ } ++ MALI_DEBUG_ASSERT_POINTER(group); + -+#if defined(CONFIG_MALI_DVFS) -+ err = mali_dvfs_policy_init(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_pm_init_end(); -+ mali_terminate_subsystems(); -+ return err; -+ } -+#endif ++ mali_executor_lock(); ++ ret = mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED); ++ mali_executor_unlock(); + -+ /* Allowing the system to be turned off */ -+ mali_pm_init_end(); ++ return ret; ++} + -+ return _MALI_OSK_ERR_OK; /* all ok */ ++int mali_executor_set_perf_level(unsigned int target_core_nr, mali_bool override) ++{ ++ if (target_core_nr == num_physical_pp_cores_enabled) return 0; ++ if (MALI_FALSE == core_scaling_enabled && MALI_FALSE == override) return -EPERM; ++ if (target_core_nr > num_physical_pp_cores_total) return -EINVAL; ++ if (0 == target_core_nr) return -EINVAL; ++ ++ mali_executor_core_scale(target_core_nr); ++ ++ _mali_osk_wq_schedule_work(executor_wq_notify_core_change); ++ ++ return 0; +} + -+void mali_terminate_subsystems(void) ++#if MALI_STATE_TRACKING ++u32 mali_executor_dump_state(char *buf, u32 size) +{ -+ struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core(); ++ int n = 0; ++ struct mali_group *group; ++ struct mali_group *temp; + -+ MALI_DEBUG_PRINT(2, ("terminate_subsystems() called\n")); ++ mali_executor_lock(); + -+ mali_utilization_term(); -+ mali_control_timer_term(); ++ switch (gp_group_state) { ++ case EXEC_STATE_INACTIVE: ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "GP group is in state INACTIVE\n"); ++ break; ++ case EXEC_STATE_IDLE: ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "GP group is in state IDLE\n"); ++ break; ++ case EXEC_STATE_WORKING: ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "GP group is in state WORKING\n"); ++ break; ++ default: ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "GP group is in unknown/illegal state %u\n", ++ gp_group_state); ++ break; ++ } + -+ mali_executor_depopulate(); -+ mali_delete_groups(); /* Delete groups not added to executor */ -+ mali_executor_terminate(); ++ n += mali_group_dump_state(gp_group, buf + n, size - n); + -+ mali_scheduler_terminate(); -+ mali_pp_job_terminate(); -+ mali_delete_l2_cache_cores(); -+ mali_mmu_terminate(); ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "Physical PP groups in WORKING state (count = %u):\n", ++ group_list_working_count); + -+ if (mali_is_mali450() || mali_is_mali470()) { -+ mali_dlbu_terminate(); ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working, struct mali_group, executor_list) { ++ n += mali_group_dump_state(group, buf + n, size - n); + } + -+ mali_pm_terminate(); ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "Physical PP groups in IDLE state (count = %u):\n", ++ group_list_idle_count); + -+ if (NULL != pmu) { -+ mali_pmu_delete(pmu); ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) { ++ n += mali_group_dump_state(group, buf + n, size - n); + } + -+#if defined(CONFIG_MALI400_PROFILING) -+ _mali_osk_profiling_term(); -+#endif ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "Physical PP groups in INACTIVE state (count = %u):\n", ++ group_list_inactive_count); + -+ _mali_osk_gpu_secure_mode_deinit(); ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) { ++ n += mali_group_dump_state(group, buf + n, size - n); ++ } + -+ mali_memory_terminate(); ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "Physical PP groups in DISABLED state (count = %u):\n", ++ group_list_disabled_count); + -+ mali_session_terminate(); ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) { ++ n += mali_group_dump_state(group, buf + n, size - n); ++ } + -+ mali_timeline_terminate(); ++ if (mali_executor_has_virtual_group()) { ++ switch (virtual_group_state) { ++ case EXEC_STATE_EMPTY: ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "Virtual PP group is in state EMPTY\n"); ++ break; ++ case EXEC_STATE_INACTIVE: ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "Virtual PP group is in state INACTIVE\n"); ++ break; ++ case EXEC_STATE_IDLE: ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "Virtual PP group is in state IDLE\n"); ++ break; ++ case EXEC_STATE_WORKING: ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "Virtual PP group is in state WORKING\n"); ++ break; ++ default: ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "Virtual PP group is in unknown/illegal state %u\n", ++ virtual_group_state); ++ break; ++ } + -+ global_gpu_base_address = 0; -+} ++ n += mali_group_dump_state(virtual_group, buf + n, size - n); ++ } + -+_mali_product_id_t mali_kernel_core_get_product_id(void) -+{ -+ return global_product_id; ++ mali_executor_unlock(); ++ ++ n += _mali_osk_snprintf(buf + n, size - n, "\n"); ++ ++ return n; +} ++#endif + -+u32 mali_kernel_core_get_gpu_major_version(void) ++_mali_osk_errcode_t _mali_ukk_get_pp_number_of_cores(_mali_uk_get_pp_number_of_cores_s *args) +{ -+ return global_gpu_major_version; ++ MALI_DEBUG_ASSERT_POINTER(args); ++ MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); ++ args->number_of_total_cores = num_physical_pp_cores_total; ++ args->number_of_enabled_cores = num_physical_pp_cores_enabled; ++ return _MALI_OSK_ERR_OK; +} + -+u32 mali_kernel_core_get_gpu_minor_version(void) ++_mali_osk_errcode_t _mali_ukk_get_pp_core_version(_mali_uk_get_pp_core_version_s *args) +{ -+ return global_gpu_minor_version; ++ MALI_DEBUG_ASSERT_POINTER(args); ++ MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); ++ args->version = pp_version; ++ return _MALI_OSK_ERR_OK; +} + -+_mali_osk_errcode_t _mali_ukk_get_api_version(_mali_uk_get_api_version_s *args) ++_mali_osk_errcode_t _mali_ukk_get_gp_number_of_cores(_mali_uk_get_gp_number_of_cores_s *args) +{ + MALI_DEBUG_ASSERT_POINTER(args); + MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); -+ -+ /* check compatability */ -+ if (args->version == _MALI_UK_API_VERSION) { -+ args->compatible = 1; -+ } else { -+ args->compatible = 0; -+ } -+ -+ args->version = _MALI_UK_API_VERSION; /* report our version */ -+ -+ /* success regardless of being compatible or not */ -+ MALI_SUCCESS; ++ args->number_of_cores = 1; ++ return _MALI_OSK_ERR_OK; +} + -+_mali_osk_errcode_t _mali_ukk_get_api_version_v2(_mali_uk_get_api_version_v2_s *args) ++_mali_osk_errcode_t _mali_ukk_get_gp_core_version(_mali_uk_get_gp_core_version_s *args) +{ + MALI_DEBUG_ASSERT_POINTER(args); + MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); -+ -+ /* check compatability */ -+ if (args->version == _MALI_UK_API_VERSION) { -+ args->compatible = 1; -+ } else { -+ args->compatible = 0; -+ } -+ -+ args->version = _MALI_UK_API_VERSION; /* report our version */ -+ -+ /* success regardless of being compatible or not */ ++ args->version = gp_version; + return _MALI_OSK_ERR_OK; +} + -+_mali_osk_errcode_t _mali_ukk_wait_for_notification(_mali_uk_wait_for_notification_s *args) ++_mali_osk_errcode_t _mali_ukk_gp_suspend_response(_mali_uk_gp_suspend_response_s *args) +{ -+ _mali_osk_errcode_t err; -+ _mali_osk_notification_t *notification; -+ _mali_osk_notification_queue_t *queue; + struct mali_session_data *session; ++ struct mali_gp_job *job; + -+ /* check input */ + MALI_DEBUG_ASSERT_POINTER(args); + MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); + + session = (struct mali_session_data *)(uintptr_t)args->ctx; -+ queue = session->ioctl_queue; + -+ /* if the queue does not exist we're currently shutting down */ -+ if (NULL == queue) { -+ MALI_DEBUG_PRINT(1, ("No notification queue registered with the session. Asking userspace to stop querying\n")); -+ args->type = _MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS; -+ return _MALI_OSK_ERR_OK; -+ } ++ if (_MALIGP_JOB_RESUME_WITH_NEW_HEAP == args->code) { ++ _mali_osk_notification_t *new_notification = NULL; + -+ /* receive a notification, might sleep */ -+ err = _mali_osk_notification_queue_receive(queue, ¬ification); -+ if (_MALI_OSK_ERR_OK != err) { -+ MALI_ERROR(err); /* errcode returned, pass on to caller */ -+ } ++ new_notification = _mali_osk_notification_create( ++ _MALI_NOTIFICATION_GP_STALLED, ++ sizeof(_mali_uk_gp_job_suspended_s)); + -+ /* copy the buffer to the user */ -+ args->type = (_mali_uk_notification_type)notification->notification_type; -+ _mali_osk_memcpy(&args->data, notification->result_buffer, notification->result_buffer_size); ++ if (NULL != new_notification) { ++ MALI_DEBUG_PRINT(3, ("Executor: Resuming job %u with new heap; 0x%08X - 0x%08X\n", ++ args->cookie, args->arguments[0], args->arguments[1])); + -+ /* finished with the notification */ -+ _mali_osk_notification_delete(notification); ++ mali_executor_lock(); + -+ return _MALI_OSK_ERR_OK; /* all ok */ -+} ++ /* Resume the job in question if it is still running */ ++ job = mali_group_get_running_gp_job(gp_group); ++ if (NULL != job && ++ args->cookie == mali_gp_job_get_id(job) && ++ session == mali_gp_job_get_session(job)) { ++ /* ++ * Correct job is running, resume with new heap ++ */ + -+_mali_osk_errcode_t _mali_ukk_post_notification(_mali_uk_post_notification_s *args) -+{ -+ _mali_osk_notification_t *notification; -+ _mali_osk_notification_queue_t *queue; -+ struct mali_session_data *session; ++ mali_gp_job_set_oom_notification(job, ++ new_notification); + -+ /* check input */ -+ MALI_DEBUG_ASSERT_POINTER(args); -+ MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); ++ /* This will also re-enable interrupts */ ++ mali_group_resume_gp_with_new_heap(gp_group, ++ args->cookie, ++ args->arguments[0], ++ args->arguments[1]); + -+ session = (struct mali_session_data *)(uintptr_t)args->ctx; -+ queue = session->ioctl_queue; ++ mali_executor_unlock(); ++ return _MALI_OSK_ERR_OK; ++ } else { ++ MALI_DEBUG_PRINT(2, ("Executor: Unable to resume gp job becasue gp time out or any other unexpected reason!\n")); + -+ /* if the queue does not exist we're currently shutting down */ -+ if (NULL == queue) { -+ MALI_DEBUG_PRINT(1, ("No notification queue registered with the session. Asking userspace to stop querying\n")); -+ return _MALI_OSK_ERR_OK; -+ } ++ _mali_osk_notification_delete(new_notification); + -+ notification = _mali_osk_notification_create(args->type, 0); -+ if (NULL == notification) { -+ MALI_PRINT_ERROR(("Failed to create notification object\n")); -+ return _MALI_OSK_ERR_NOMEM; ++ mali_executor_unlock(); ++ return _MALI_OSK_ERR_FAULT; ++ } ++ } else { ++ MALI_PRINT_ERROR(("Executor: Failed to allocate notification object. Will abort GP job.\n")); ++ } ++ } else { ++ MALI_DEBUG_PRINT(2, ("Executor: Aborting job %u, no new heap provided\n", args->cookie)); + } + -+ _mali_osk_notification_queue_send(queue, notification); -+ -+ return _MALI_OSK_ERR_OK; /* all ok */ -+} ++ mali_executor_lock(); + -+_mali_osk_errcode_t _mali_ukk_pending_submit(_mali_uk_pending_submit_s *args) -+{ -+ wait_queue_head_t *queue; ++ /* Abort the job in question if it is still running */ ++ job = mali_group_get_running_gp_job(gp_group); ++ if (NULL != job && ++ args->cookie == mali_gp_job_get_id(job) && ++ session == mali_gp_job_get_session(job)) { ++ /* Correct job is still running */ ++ struct mali_gp_job *job_done = NULL; + -+ /* check input */ -+ MALI_DEBUG_ASSERT_POINTER(args); -+ MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); ++ mali_executor_complete_group(gp_group, MALI_FALSE, &job_done, NULL); + -+ queue = mali_session_get_wait_queue(); ++ /* The same job should have completed */ ++ MALI_DEBUG_ASSERT(job_done == job); + -+ /* check pending big job number, might sleep if larger than MAX allowed number */ -+ if (wait_event_interruptible(*queue, MALI_MAX_PENDING_BIG_JOB > mali_scheduler_job_gp_big_job_count())) { -+ return _MALI_OSK_ERR_RESTARTSYSCALL; ++ /* GP job completed, make sure it is freed */ ++ mali_scheduler_complete_gp_job(job_done, MALI_FALSE, ++ MALI_TRUE, MALI_TRUE); + } + -+ return _MALI_OSK_ERR_OK; /* all ok */ ++ mali_executor_unlock(); ++ return _MALI_OSK_ERR_FAULT; +} + + -+_mali_osk_errcode_t _mali_ukk_request_high_priority(_mali_uk_request_high_priority_s *args) ++/* ++ * ---------- Implementation of static functions ---------- ++ */ ++ ++void mali_executor_lock(void) +{ -+ struct mali_session_data *session; ++ _mali_osk_spinlock_irq_lock(mali_executor_lock_obj); ++ MALI_DEBUG_PRINT(5, ("Executor: lock taken\n")); ++} + -+ MALI_DEBUG_ASSERT_POINTER(args); -+ MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); ++void mali_executor_unlock(void) ++{ ++ MALI_DEBUG_PRINT(5, ("Executor: Releasing lock\n")); ++ _mali_osk_spinlock_irq_unlock(mali_executor_lock_obj); ++} + -+ session = (struct mali_session_data *)(uintptr_t)args->ctx; ++static mali_bool mali_executor_is_suspended(void *data) ++{ ++ mali_bool ret; + -+ if (!session->use_high_priority_job_queue) { -+ session->use_high_priority_job_queue = MALI_TRUE; -+ MALI_DEBUG_PRINT(2, ("Session 0x%08X with pid %d was granted higher priority.\n", session, _mali_osk_get_pid())); -+ } ++ /* This callback does not use the data pointer. */ ++ MALI_IGNORE(data); + -+ return _MALI_OSK_ERR_OK; ++ mali_executor_lock(); ++ ++ ret = pause_count > 0 && !mali_executor_is_working(); ++ ++ mali_executor_unlock(); ++ ++ return ret; +} + -+_mali_osk_errcode_t _mali_ukk_open(void **context) ++static mali_bool mali_executor_is_working(void) +{ -+ u32 i; -+ struct mali_session_data *session; ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); + -+ /* allocated struct to track this session */ -+ session = (struct mali_session_data *)_mali_osk_calloc(1, sizeof(struct mali_session_data)); -+ MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_NOMEM); ++ return (0 != group_list_working_count || ++ EXEC_STATE_WORKING == gp_group_state || ++ EXEC_STATE_WORKING == virtual_group_state); ++} + -+ MALI_DEBUG_PRINT(3, ("Session starting\n")); ++static void mali_executor_disable_empty_virtual(void) ++{ ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ MALI_DEBUG_ASSERT(virtual_group_state != EXEC_STATE_EMPTY); ++ MALI_DEBUG_ASSERT(virtual_group_state != EXEC_STATE_WORKING); + -+ /* create a response queue for this session */ -+ session->ioctl_queue = _mali_osk_notification_queue_init(); -+ if (NULL == session->ioctl_queue) { -+ goto err; ++ if (mali_group_is_empty(virtual_group)) { ++ virtual_group_state = EXEC_STATE_EMPTY; + } ++} + -+ /*create a wait queue for this session */ -+ session->wait_queue = _mali_osk_wait_queue_init(); -+ if (NULL == session->wait_queue) { -+ goto err_wait_queue; ++static mali_bool mali_executor_physical_rejoin_virtual(struct mali_group *group) ++{ ++ mali_bool trigger_pm_update = MALI_FALSE; ++ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ /* Only rejoining after job has completed (still active) */ ++ MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE == ++ mali_group_get_state(group)); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ MALI_DEBUG_ASSERT(MALI_TRUE == mali_executor_has_virtual_group()); ++ MALI_DEBUG_ASSERT(MALI_FALSE == mali_group_is_virtual(group)); ++ ++ /* Make sure group and virtual group have same status */ ++ ++ if (MALI_GROUP_STATE_INACTIVE == mali_group_get_state(virtual_group)) { ++ if (mali_group_deactivate(group)) { ++ trigger_pm_update = MALI_TRUE; ++ } ++ ++ if (virtual_group_state == EXEC_STATE_EMPTY) { ++ virtual_group_state = EXEC_STATE_INACTIVE; ++ } ++ } else if (MALI_GROUP_STATE_ACTIVATION_PENDING == ++ mali_group_get_state(virtual_group)) { ++ /* ++ * Activation is pending for virtual group, leave ++ * this child group as active. ++ */ ++ if (virtual_group_state == EXEC_STATE_EMPTY) { ++ virtual_group_state = EXEC_STATE_INACTIVE; ++ } ++ } else { ++ MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE == ++ mali_group_get_state(virtual_group)); ++ ++ if (virtual_group_state == EXEC_STATE_EMPTY) { ++ virtual_group_state = EXEC_STATE_IDLE; ++ } + } + -+ session->page_directory = mali_mmu_pagedir_alloc(); -+ if (NULL == session->page_directory) { -+ goto err_mmu; ++ /* Remove group from idle list */ ++ MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(group, ++ EXEC_STATE_IDLE)); ++ _mali_osk_list_delinit(&group->executor_list); ++ group_list_idle_count--; ++ ++ /* ++ * And finally rejoin the virtual group ++ * group will start working on same job as virtual_group, ++ * if virtual_group is working on a job ++ */ ++ mali_group_add_group(virtual_group, group); ++ ++ return trigger_pm_update; ++} ++ ++static mali_bool mali_executor_has_virtual_group(void) ++{ ++#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) ++ return (NULL != virtual_group) ? MALI_TRUE : MALI_FALSE; ++#else ++ return MALI_FALSE; ++#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */ ++} ++ ++static mali_bool mali_executor_virtual_group_is_usable(void) ++{ ++#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ return ((EXEC_STATE_INACTIVE == virtual_group_state || ++ EXEC_STATE_IDLE == virtual_group_state) && (virtual_group->state != MALI_GROUP_STATE_ACTIVATION_PENDING)) ? ++ MALI_TRUE : MALI_FALSE; ++#else ++ return MALI_FALSE; ++#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */ ++} ++ ++static mali_bool mali_executor_tackle_gp_bound(void) ++{ ++ struct mali_pp_job *job; ++ ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ ++ job = mali_scheduler_job_pp_physical_peek(); ++ ++ if (NULL != job && MALI_TRUE == mali_is_mali400()) { ++ if (0 < group_list_working_count && ++ mali_pp_job_is_large_and_unstarted(job)) { ++ return MALI_TRUE; ++ } + } + -+ if (_MALI_OSK_ERR_OK != mali_mmu_pagedir_map(session->page_directory, MALI_DLBU_VIRT_ADDR, _MALI_OSK_MALI_PAGE_SIZE)) { -+ MALI_PRINT_ERROR(("Failed to map DLBU page into session\n")); -+ goto err_mmu; ++ return MALI_FALSE; ++} ++ ++static mali_bool mali_executor_schedule_is_early_out(mali_bool *gpu_secure_mode_is_needed) ++{ ++ struct mali_pp_job *next_pp_job_to_start = NULL; ++ struct mali_group *group; ++ struct mali_group *tmp_group; ++ struct mali_pp_job *physical_pp_job_working = NULL; ++ struct mali_pp_job *virtual_pp_job_working = NULL; ++ mali_bool gpu_working_in_protected_mode = MALI_FALSE; ++ mali_bool gpu_working_in_non_protected_mode = MALI_FALSE; ++ ++ MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj); ++ ++ *gpu_secure_mode_is_needed = MALI_FALSE; ++ ++ /* Check if the gpu secure mode is supported, exit if not.*/ ++ if (MALI_FALSE == _mali_osk_gpu_secure_mode_is_supported()) { ++ return MALI_FALSE; + } + -+ if (0 != mali_dlbu_phys_addr) { -+ mali_mmu_pagedir_update(session->page_directory, MALI_DLBU_VIRT_ADDR, mali_dlbu_phys_addr, -+ _MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT); ++ /* Check if need to set gpu secure mode for the next pp job, ++ * get the next pp job that will be scheduled if exist. ++ */ ++ next_pp_job_to_start = mali_scheduler_job_pp_next(); ++ ++ /* Check current pp physical/virtual running job is protected job or not if exist.*/ ++ _MALI_OSK_LIST_FOREACHENTRY(group, tmp_group, &group_list_working, ++ struct mali_group, executor_list) { ++ physical_pp_job_working = group->pp_running_job; ++ break; + } + -+ if (_MALI_OSK_ERR_OK != mali_memory_session_begin(session)) { -+ goto err_session; ++ if (EXEC_STATE_WORKING == virtual_group_state) { ++ virtual_pp_job_working = virtual_group->pp_running_job; + } + -+ /* Create soft system. */ -+ session->soft_job_system = mali_soft_job_system_create(session); -+ if (NULL == session->soft_job_system) { -+ goto err_soft; ++ if (NULL != physical_pp_job_working) { ++ if (MALI_TRUE == mali_pp_job_is_protected_job(physical_pp_job_working)) { ++ gpu_working_in_protected_mode = MALI_TRUE; ++ } else { ++ gpu_working_in_non_protected_mode = MALI_TRUE; ++ } ++ } else if (NULL != virtual_pp_job_working) { ++ if (MALI_TRUE == mali_pp_job_is_protected_job(virtual_pp_job_working)) { ++ gpu_working_in_protected_mode = MALI_TRUE; ++ } else { ++ gpu_working_in_non_protected_mode = MALI_TRUE; ++ } ++ } else if (EXEC_STATE_WORKING == gp_group_state) { ++ gpu_working_in_non_protected_mode = MALI_TRUE; + } + -+ /* Initialize the dma fence context.*/ -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ session->fence_context = dma_fence_context_alloc(1); -+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) -+ session->fence_context = fence_context_alloc(1); -+ _mali_osk_atomic_init(&session->fence_seqno, 0); -+#else -+ MALI_PRINT_ERROR(("The kernel version not support dma fence!\n")); -+ goto err_time_line; -+#endif -+#endif ++ /* If the next pp job is the protected pp job.*/ ++ if ((NULL != next_pp_job_to_start) && MALI_TRUE == mali_pp_job_is_protected_job(next_pp_job_to_start)) { ++ /* if gp is working or any non-protected pp job is working now, unable to schedule protected pp job. */ ++ if (MALI_TRUE == gpu_working_in_non_protected_mode) ++ return MALI_TRUE; ++ ++ *gpu_secure_mode_is_needed = MALI_TRUE; ++ return MALI_FALSE; + -+ /* Create timeline system. */ -+ session->timeline_system = mali_timeline_system_create(session); -+ if (NULL == session->timeline_system) { -+ goto err_time_line; + } + -+#if defined(CONFIG_MALI_DVFS) -+ _mali_osk_atomic_init(&session->number_of_window_jobs, 0); -+#endif ++ if (MALI_TRUE == gpu_working_in_protected_mode) { ++ /* Unable to schedule non-protected pp job/gp job if exist protected pp running jobs*/ ++ return MALI_TRUE; ++ } + -+ _mali_osk_atomic_init(&session->number_of_pp_jobs, 0); ++ return MALI_FALSE; ++} ++/* ++ * This is where jobs are actually started. ++ */ ++static void mali_executor_schedule(void) ++{ ++ u32 i; ++ u32 num_physical_needed = 0; ++ u32 num_physical_to_process = 0; ++ mali_bool trigger_pm_update = MALI_FALSE; ++ mali_bool deactivate_idle_group = MALI_TRUE; ++ mali_bool gpu_secure_mode_is_needed = MALI_FALSE; ++ mali_bool is_gpu_secure_mode = MALI_FALSE; ++ /* Physical groups + jobs to start in this function */ ++ struct mali_group *groups_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS]; ++ struct mali_pp_job *jobs_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS]; ++ u32 sub_jobs_to_start[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS]; ++ int num_jobs_to_start = 0; + -+ session->use_high_priority_job_queue = MALI_FALSE; ++ /* Virtual job to start in this function */ ++ struct mali_pp_job *virtual_job_to_start = NULL; + -+ /* Initialize list of PP jobs on this session. */ -+ _MALI_OSK_INIT_LIST_HEAD(&session->pp_job_list); ++ /* GP job to start in this function */ ++ struct mali_gp_job *gp_job_to_start = NULL; + -+ /* Initialize the pp_job_fb_lookup_list array used to quickly lookup jobs from a given frame builder */ -+ for (i = 0; i < MALI_PP_JOB_FB_LOOKUP_LIST_SIZE; ++i) { -+ _MALI_OSK_INIT_LIST_HEAD(&session->pp_job_fb_lookup_list[i]); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ ++ if (pause_count > 0) { ++ /* Execution is suspended, don't schedule any jobs. */ ++ return; + } + -+ session->pid = _mali_osk_get_pid(); -+ session->comm = _mali_osk_get_comm(); -+ session->max_mali_mem_allocated_size = 0; -+ for (i = 0; i < MALI_MEM_TYPE_MAX; i ++) { -+ atomic_set(&session->mali_mem_array[i], 0); ++ /* Lock needed in order to safely handle the job queues */ ++ mali_scheduler_lock(); ++ ++ /* 1. Check the schedule if need to early out. */ ++ if (MALI_TRUE == mali_executor_schedule_is_early_out(&gpu_secure_mode_is_needed)) { ++ mali_scheduler_unlock(); ++ return; + } -+ atomic_set(&session->mali_mem_allocated_pages, 0); -+ *context = (void *)session; + -+ /* Add session to the list of all sessions. */ -+ mali_session_add(session); ++ /* 2. Activate gp firstly if have gp job queued. */ ++ if ((EXEC_STATE_INACTIVE == gp_group_state) ++ && (0 < mali_scheduler_job_gp_count()) ++ && (gpu_secure_mode_is_needed == MALI_FALSE)) { + -+ MALI_DEBUG_PRINT(3, ("Session started\n")); -+ return _MALI_OSK_ERR_OK; ++ enum mali_group_state state = ++ mali_group_activate(gp_group); ++ if (MALI_GROUP_STATE_ACTIVE == state) { ++ /* Set GP group state to idle */ ++ gp_group_state = EXEC_STATE_IDLE; ++ } else { ++ trigger_pm_update = MALI_TRUE; ++ } ++ } + -+err_time_line: -+ mali_soft_job_system_destroy(session->soft_job_system); -+err_soft: -+ mali_memory_session_end(session); -+err_session: -+ mali_mmu_pagedir_free(session->page_directory); -+err_mmu: -+ _mali_osk_wait_queue_term(session->wait_queue); -+err_wait_queue: -+ _mali_osk_notification_queue_term(session->ioctl_queue); -+err: -+ _mali_osk_free(session); -+ MALI_ERROR(_MALI_OSK_ERR_NOMEM); ++ /* 3. Prepare as many physical groups as needed/possible */ + -+} ++ num_physical_needed = mali_scheduler_job_physical_head_count(gpu_secure_mode_is_needed); + -+#if defined(DEBUG) -+/* parameter used for debug */ -+extern u32 num_pm_runtime_resume; -+extern u32 num_pm_updates; -+extern u32 num_pm_updates_up; -+extern u32 num_pm_updates_down; -+#endif ++ /* On mali-450 platform, we don't need to enter in this block frequently. */ ++ if (0 < num_physical_needed) { + -+_mali_osk_errcode_t _mali_ukk_close(void **context) -+{ -+ struct mali_session_data *session; -+ MALI_CHECK_NON_NULL(context, _MALI_OSK_ERR_INVALID_ARGS); -+ session = (struct mali_session_data *)*context; ++ if (num_physical_needed <= group_list_idle_count) { ++ /* We have enough groups on idle list already */ ++ num_physical_to_process = num_physical_needed; ++ num_physical_needed = 0; ++ } else { ++ /* We need to get a hold of some more groups */ ++ num_physical_to_process = group_list_idle_count; ++ num_physical_needed -= group_list_idle_count; ++ } + -+ MALI_DEBUG_PRINT(3, ("Session ending\n")); ++ if (0 < num_physical_needed) { + -+ MALI_DEBUG_ASSERT_POINTER(session->soft_job_system); -+ MALI_DEBUG_ASSERT_POINTER(session->timeline_system); ++ /* 3.1. Activate groups which are inactive */ + -+ /* Remove session from list of all sessions. */ -+ mali_session_remove(session); ++ struct mali_group *group; ++ struct mali_group *temp; + -+ /* This flag is used to prevent queueing of jobs due to activation. */ -+ session->is_aborting = MALI_TRUE; ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, ++ struct mali_group, executor_list) { ++ enum mali_group_state state = ++ mali_group_activate(group); ++ if (MALI_GROUP_STATE_ACTIVE == state) { ++ /* Move from inactive to idle */ ++ mali_executor_change_state_pp_physical(group, ++ &group_list_inactive, ++ &group_list_inactive_count, ++ &group_list_idle, ++ &group_list_idle_count); ++ num_physical_to_process++; ++ } else { ++ trigger_pm_update = MALI_TRUE; ++ } + -+ /* Stop the soft job timer. */ -+ mali_timeline_system_stop_timer(session->timeline_system); ++ num_physical_needed--; ++ if (0 == num_physical_needed) { ++ /* We have activated all the groups we need */ ++ break; ++ } ++ } ++ } + -+ /* Abort queued jobs */ -+ mali_scheduler_abort_session(session); ++ if (mali_executor_virtual_group_is_usable()) { + -+ /* Abort executing jobs */ -+ mali_executor_abort_session(session); ++ /* ++ * 3.2. And finally, steal and activate groups ++ * from virtual group if we need even more ++ */ ++ while (0 < num_physical_needed) { ++ struct mali_group *group; + -+ /* Abort the soft job system. */ -+ mali_soft_job_system_abort(session->soft_job_system); ++ group = mali_group_acquire_group(virtual_group); ++ if (NULL != group) { ++ enum mali_group_state state; + -+ /* Force execution of all pending bottom half processing for GP and PP. */ -+ _mali_osk_wq_flush(); ++ mali_executor_disable_empty_virtual(); + -+ /* The session PP list should now be empty. */ -+ MALI_DEBUG_ASSERT(_mali_osk_list_empty(&session->pp_job_list)); ++ state = mali_group_activate(group); ++ if (MALI_GROUP_STATE_ACTIVE == state) { ++ /* Group is ready, add to idle list */ ++ _mali_osk_list_add( ++ &group->executor_list, ++ &group_list_idle); ++ group_list_idle_count++; ++ num_physical_to_process++; ++ } else { ++ /* ++ * Group is not ready yet, ++ * add to inactive list ++ */ ++ _mali_osk_list_add( ++ &group->executor_list, ++ &group_list_inactive); ++ group_list_inactive_count++; + -+ /* At this point the GP and PP scheduler no longer has any jobs queued or running from this -+ * session, and all soft jobs in the soft job system has been destroyed. */ ++ trigger_pm_update = MALI_TRUE; ++ } ++ num_physical_needed--; ++ } else { ++ /* ++ * We could not get enough groups ++ * from the virtual group. ++ */ ++ break; ++ } ++ } ++ } + -+ /* Any trackers left in the timeline system are directly or indirectly waiting on external -+ * sync fences. Cancel all sync fence waiters to trigger activation of all remaining -+ * trackers. This call will sleep until all timelines are empty. */ -+ mali_timeline_system_abort(session->timeline_system); ++ /* 3.3. Assign physical jobs to groups */ + -+ /* Flush pending work. -+ * Needed to make sure all bottom half processing related to this -+ * session has been completed, before we free internal data structures. -+ */ -+ _mali_osk_wq_flush(); ++ if (0 < num_physical_to_process) { ++ struct mali_group *group; ++ struct mali_group *temp; + -+ /* Destroy timeline system. */ -+ mali_timeline_system_destroy(session->timeline_system); -+ session->timeline_system = NULL; ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, ++ struct mali_group, executor_list) { ++ struct mali_pp_job *job = NULL; ++ u32 sub_job = MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; + -+ /* Destroy soft system. */ -+ mali_soft_job_system_destroy(session->soft_job_system); -+ session->soft_job_system = NULL; ++ MALI_DEBUG_ASSERT(num_jobs_to_start < ++ MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS); + -+ /*Wait for the session job lists become empty.*/ -+ _mali_osk_wait_queue_wait_event(session->wait_queue, mali_session_pp_job_is_empty, (void *) session); ++ MALI_DEBUG_ASSERT(0 < ++ mali_scheduler_job_physical_head_count(gpu_secure_mode_is_needed)); + -+ /* Free remaining memory allocated to this session */ -+ mali_memory_session_end(session); ++ /* If the next pp job is non-protected, check if gp bound now. */ ++ if ((MALI_FALSE == gpu_secure_mode_is_needed) ++ && (mali_executor_hint_is_enabled(MALI_EXECUTOR_HINT_GP_BOUND)) ++ && (MALI_TRUE == mali_executor_tackle_gp_bound())) { ++ /* ++ * We're gp bound, ++ * don't start this right now. ++ */ ++ deactivate_idle_group = MALI_FALSE; ++ num_physical_to_process = 0; ++ break; ++ } + -+#if defined(CONFIG_MALI_DVFS) -+ _mali_osk_atomic_term(&session->number_of_window_jobs); -+#endif ++ job = mali_scheduler_job_pp_physical_get( ++ &sub_job); + -+#if defined(CONFIG_MALI400_PROFILING) -+ _mali_osk_profiling_stop_sampling(session->pid); -+#endif ++ if (MALI_FALSE == gpu_secure_mode_is_needed) { ++ MALI_DEBUG_ASSERT(MALI_FALSE == mali_pp_job_is_protected_job(job)); ++ } else { ++ MALI_DEBUG_ASSERT(MALI_TRUE == mali_pp_job_is_protected_job(job)); ++ } + -+ /* Free session data structures */ -+ mali_mmu_pagedir_unmap(session->page_directory, MALI_DLBU_VIRT_ADDR, _MALI_OSK_MALI_PAGE_SIZE); -+ mali_mmu_pagedir_free(session->page_directory); -+ _mali_osk_wait_queue_term(session->wait_queue); -+ _mali_osk_notification_queue_term(session->ioctl_queue); -+ _mali_osk_free(session); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT(sub_job <= MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS); + -+ *context = NULL; ++ /* Put job + group on list of jobs to start later on */ + -+ MALI_DEBUG_PRINT(3, ("Session has ended\n")); ++ groups_to_start[num_jobs_to_start] = group; ++ jobs_to_start[num_jobs_to_start] = job; ++ sub_jobs_to_start[num_jobs_to_start] = sub_job; ++ num_jobs_to_start++; + -+#if defined(DEBUG) -+ MALI_DEBUG_PRINT(3, ("Stats: # runtime resumes: %u\n", num_pm_runtime_resume)); -+ MALI_DEBUG_PRINT(3, (" # PM updates: .... %u (up %u, down %u)\n", num_pm_updates, num_pm_updates_up, num_pm_updates_down)); ++ /* Move group from idle to working */ ++ mali_executor_change_state_pp_physical(group, ++ &group_list_idle, ++ &group_list_idle_count, ++ &group_list_working, ++ &group_list_working_count); + -+ num_pm_runtime_resume = 0; -+ num_pm_updates = 0; -+ num_pm_updates_up = 0; -+ num_pm_updates_down = 0; -+#endif ++ num_physical_to_process--; ++ if (0 == num_physical_to_process) { ++ /* Got all we needed */ ++ break; ++ } ++ } ++ } ++ } + -+ return _MALI_OSK_ERR_OK;; -+} ++ /* 4. Deactivate idle pp group , must put deactive here before active vitual group ++ * for cover case first only has physical job in normal queue but group inactive, ++ * so delay the job start go to active group, when group activated, ++ * call scheduler again, but now if we get high queue virtual job, ++ * we will do nothing in schedule cause executor schedule stop ++ */ + -+#if MALI_STATE_TRACKING -+u32 _mali_kernel_core_dump_state(char *buf, u32 size) -+{ -+ int n = 0; /* Number of bytes written to buf */ ++ if (MALI_TRUE == mali_executor_deactivate_list_idle(deactivate_idle_group ++ && (!mali_timeline_has_physical_pp_job()))) { ++ trigger_pm_update = MALI_TRUE; ++ } + -+ n += mali_scheduler_dump_state(buf + n, size - n); -+ n += mali_executor_dump_state(buf + n, size - n); ++ /* 5. Activate virtual group, if needed */ ++ if (EXEC_STATE_INACTIVE == virtual_group_state && ++ MALI_TRUE == mali_scheduler_job_next_is_virtual()) { ++ struct mali_pp_job *virtual_job = mali_scheduler_job_pp_virtual_peek(); ++ if ((MALI_FALSE == gpu_secure_mode_is_needed && MALI_FALSE == mali_pp_job_is_protected_job(virtual_job)) ++ || (MALI_TRUE == gpu_secure_mode_is_needed && MALI_TRUE == mali_pp_job_is_protected_job(virtual_job))) { ++ enum mali_group_state state = ++ mali_group_activate(virtual_group); ++ if (MALI_GROUP_STATE_ACTIVE == state) { ++ /* Set virtual group state to idle */ ++ virtual_group_state = EXEC_STATE_IDLE; ++ } else { ++ trigger_pm_update = MALI_TRUE; ++ } ++ } ++ } + -+ return n; -+} -+#endif -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_kernel_core.h b/drivers/gpu/arm/mali400/mali/common/mali_kernel_core.h -new file mode 100644 -index 000000000..c471fc955 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_kernel_core.h -@@ -0,0 +1,57 @@ -+/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ /* 6. To power up group asap, trigger pm update only when no need to swith the gpu mode. */ + -+#ifndef __MALI_KERNEL_CORE_H__ -+#define __MALI_KERNEL_CORE_H__ ++ is_gpu_secure_mode = _mali_osk_gpu_secure_mode_is_enabled(); + -+#include "mali_osk.h" ++ if ((MALI_FALSE == gpu_secure_mode_is_needed && MALI_FALSE == is_gpu_secure_mode) ++ || (MALI_TRUE == gpu_secure_mode_is_needed && MALI_TRUE == is_gpu_secure_mode)) { ++ if (MALI_TRUE == trigger_pm_update) { ++ trigger_pm_update = MALI_FALSE; ++ mali_pm_update_async(); ++ } ++ } + -+typedef enum { -+ _MALI_PRODUCT_ID_UNKNOWN, -+ _MALI_PRODUCT_ID_MALI200, -+ _MALI_PRODUCT_ID_MALI300, -+ _MALI_PRODUCT_ID_MALI400, -+ _MALI_PRODUCT_ID_MALI450, -+ _MALI_PRODUCT_ID_MALI470, -+} _mali_product_id_t; ++ /* 7. Assign jobs to idle virtual group (or deactivate if no job) */ + -+extern mali_bool mali_gpu_class_is_mali450; -+extern mali_bool mali_gpu_class_is_mali470; ++ if (EXEC_STATE_IDLE == virtual_group_state) { ++ if (MALI_TRUE == mali_scheduler_job_next_is_virtual()) { ++ struct mali_pp_job *virtual_job = mali_scheduler_job_pp_virtual_peek(); ++ if ((MALI_FALSE == gpu_secure_mode_is_needed && MALI_FALSE == mali_pp_job_is_protected_job(virtual_job)) ++ || (MALI_TRUE == gpu_secure_mode_is_needed && MALI_TRUE == mali_pp_job_is_protected_job(virtual_job))) { ++ virtual_job_to_start = ++ mali_scheduler_job_pp_virtual_get(); ++ virtual_group_state = EXEC_STATE_WORKING; ++ } ++ } else if (!mali_timeline_has_virtual_pp_job()) { ++ virtual_group_state = EXEC_STATE_INACTIVE; + -+_mali_osk_errcode_t mali_initialize_subsystems(void); ++ if (mali_group_deactivate(virtual_group)) { ++ trigger_pm_update = MALI_TRUE; ++ } ++ } ++ } + -+void mali_terminate_subsystems(void); ++ /* 8. Assign job to idle GP group (or deactivate if no job) */ + -+_mali_product_id_t mali_kernel_core_get_product_id(void); ++ if (EXEC_STATE_IDLE == gp_group_state && MALI_FALSE == gpu_secure_mode_is_needed) { ++ if (0 < mali_scheduler_job_gp_count()) { ++ gp_job_to_start = mali_scheduler_job_gp_get(); ++ gp_group_state = EXEC_STATE_WORKING; ++ } else if (!mali_timeline_has_gp_job()) { ++ gp_group_state = EXEC_STATE_INACTIVE; ++ if (mali_group_deactivate(gp_group)) { ++ trigger_pm_update = MALI_TRUE; ++ } ++ } ++ } + -+u32 mali_kernel_core_get_gpu_major_version(void); ++ /* 9. We no longer need the schedule/queue lock */ + -+u32 mali_kernel_core_get_gpu_minor_version(void); ++ mali_scheduler_unlock(); + -+u32 _mali_kernel_core_dump_state(char *buf, u32 size); ++ /* 10. start jobs */ ++ if (NULL != virtual_job_to_start) { ++ MALI_DEBUG_ASSERT(!mali_group_pp_is_active(virtual_group)); ++ mali_group_start_pp_job(virtual_group, ++ virtual_job_to_start, 0, is_gpu_secure_mode); ++ } + -+MALI_STATIC_INLINE mali_bool mali_is_mali470(void) -+{ -+ return mali_gpu_class_is_mali470; ++ for (i = 0; i < num_jobs_to_start; i++) { ++ MALI_DEBUG_ASSERT(!mali_group_pp_is_active( ++ groups_to_start[i])); ++ mali_group_start_pp_job(groups_to_start[i], ++ jobs_to_start[i], ++ sub_jobs_to_start[i], is_gpu_secure_mode); ++ } ++ ++ MALI_DEBUG_ASSERT_POINTER(gp_group); ++ ++ if (NULL != gp_job_to_start) { ++ MALI_DEBUG_ASSERT(!mali_group_gp_is_active(gp_group)); ++ mali_group_start_gp_job(gp_group, gp_job_to_start, is_gpu_secure_mode); ++ } ++ ++ /* 11. Trigger any pending PM updates */ ++ if (MALI_TRUE == trigger_pm_update) { ++ mali_pm_update_async(); ++ } +} + -+MALI_STATIC_INLINE mali_bool mali_is_mali450(void) ++/* Handler for deferred schedule requests */ ++static void mali_executor_wq_schedule(void *arg) +{ -+ return mali_gpu_class_is_mali450; ++ MALI_IGNORE(arg); ++ mali_executor_lock(); ++ mali_executor_schedule(); ++ mali_executor_unlock(); +} + -+MALI_STATIC_INLINE mali_bool mali_is_mali400(void) ++static void mali_executor_send_gp_oom_to_user(struct mali_gp_job *job) +{ -+ if (mali_gpu_class_is_mali450 || mali_gpu_class_is_mali470) -+ return MALI_FALSE; ++ _mali_uk_gp_job_suspended_s *jobres; ++ _mali_osk_notification_t *notification; + -+ return MALI_TRUE; -+} -+#endif /* __MALI_KERNEL_CORE_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_kernel_utilization.c b/drivers/gpu/arm/mali400/mali/common/mali_kernel_utilization.c -new file mode 100644 -index 000000000..d1b8dc3b0 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_kernel_utilization.c -@@ -0,0 +1,440 @@ -+/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ notification = mali_gp_job_get_oom_notification(job); + -+#include "mali_kernel_utilization.h" -+#include "mali_osk.h" -+#include "mali_osk_mali.h" -+#include "mali_kernel_common.h" -+#include "mali_session.h" -+#include "mali_scheduler.h" ++ /* ++ * Remember the id we send to user space, so we have something to ++ * verify when we get a response ++ */ ++ gp_returned_cookie = mali_gp_job_get_id(job); + -+#include "mali_executor.h" -+#include "mali_dvfs_policy.h" -+#include "mali_control_timer.h" ++ jobres = (_mali_uk_gp_job_suspended_s *)notification->result_buffer; ++ jobres->user_job_ptr = mali_gp_job_get_user_id(job); ++ jobres->cookie = gp_returned_cookie; + -+/* Thresholds for GP bound detection. */ -+#define MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD 240 -+#define MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD 250 ++ mali_session_send_notification(mali_gp_job_get_session(job), ++ notification); ++} ++static struct mali_gp_job *mali_executor_complete_gp(struct mali_group *group, ++ mali_bool success) ++{ ++ struct mali_gp_job *job; + -+static _mali_osk_spinlock_irq_t *utilization_data_lock; ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); + -+static u32 num_running_gp_cores = 0; -+static u32 num_running_pp_cores = 0; ++ /* Extracts the needed HW status from core and reset */ ++ job = mali_group_complete_gp(group, success); + -+static u64 work_start_time_gpu = 0; -+static u64 work_start_time_gp = 0; -+static u64 work_start_time_pp = 0; -+static u64 accumulated_work_time_gpu = 0; -+static u64 accumulated_work_time_gp = 0; -+static u64 accumulated_work_time_pp = 0; ++ MALI_DEBUG_ASSERT_POINTER(job); + -+static u32 last_utilization_gpu = 0 ; -+static u32 last_utilization_gp = 0 ; -+static u32 last_utilization_pp = 0 ; ++ /* Core is now ready to go into idle list */ ++ gp_group_state = EXEC_STATE_IDLE; + -+void (*mali_utilization_callback)(struct mali_gpu_utilization_data *data) = NULL; ++ /* This will potentially queue more GP and PP jobs */ ++ mali_timeline_tracker_release(&job->tracker); + -+/* Define the first timer control timer timeout in milliseconds */ -+static u32 mali_control_first_timeout = 100; -+static struct mali_gpu_utilization_data mali_util_data = {0, }; ++ /* Signal PP job */ ++ mali_gp_job_signal_pp_tracker(job, success); + -+struct mali_gpu_utilization_data *mali_utilization_calculate(u64 *start_time, u64 *time_period, mali_bool *need_add_timer) ++ return job; ++} ++ ++static struct mali_pp_job *mali_executor_complete_pp(struct mali_group *group, ++ mali_bool success) +{ -+ u64 time_now; -+ u32 leading_zeroes; -+ u32 shift_val; -+ u32 work_normalized_gpu; -+ u32 work_normalized_gp; -+ u32 work_normalized_pp; -+ u32 period_normalized; -+ u32 utilization_gpu; -+ u32 utilization_gp; -+ u32 utilization_pp; ++ struct mali_pp_job *job; ++ u32 sub_job; ++ mali_bool job_is_done; + -+ mali_utilization_data_lock(); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); + -+ time_now = _mali_osk_time_get_ns(); ++ /* Extracts the needed HW status from core and reset */ ++ job = mali_group_complete_pp(group, success, &sub_job); + -+ *time_period = time_now - *start_time; ++ MALI_DEBUG_ASSERT_POINTER(job); + -+ if (accumulated_work_time_gpu == 0 && work_start_time_gpu == 0) { -+ mali_control_timer_pause(); -+ /* -+ * No work done for this period -+ * - No need to reschedule timer -+ * - Report zero usage -+ */ -+ last_utilization_gpu = 0; -+ last_utilization_gp = 0; -+ last_utilization_pp = 0; ++ /* Core is now ready to go into idle list */ ++ if (mali_group_is_virtual(group)) { ++ virtual_group_state = EXEC_STATE_IDLE; ++ } else { ++ /* Move from working to idle state */ ++ mali_executor_change_state_pp_physical(group, ++ &group_list_working, ++ &group_list_working_count, ++ &group_list_idle, ++ &group_list_idle_count); ++ } + -+ mali_util_data.utilization_gpu = last_utilization_gpu; -+ mali_util_data.utilization_gp = last_utilization_gp; -+ mali_util_data.utilization_pp = last_utilization_pp; ++ /* It is the executor module which owns the jobs themselves by now */ ++ mali_pp_job_mark_sub_job_completed(job, success); ++ job_is_done = mali_pp_job_is_complete(job); + -+ mali_utilization_data_unlock(); ++ if (job_is_done) { ++ /* This will potentially queue more GP and PP jobs */ ++ mali_timeline_tracker_release(&job->tracker); ++ } + -+ *need_add_timer = MALI_FALSE; ++ return job; ++} + -+ mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND); ++static void mali_executor_complete_group(struct mali_group *group, ++ mali_bool success, ++ struct mali_gp_job **gp_job_done, ++ struct mali_pp_job **pp_job_done) ++{ ++ struct mali_gp_core *gp_core = mali_group_get_gp_core(group); ++ struct mali_pp_core *pp_core = mali_group_get_pp_core(group); ++ struct mali_gp_job *gp_job = NULL; ++ struct mali_pp_job *pp_job = NULL; ++ mali_bool pp_job_is_done = MALI_TRUE; + -+ MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu)); -+ MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp)); -+ MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp)); ++ if (NULL != gp_core) { ++ gp_job = mali_executor_complete_gp(group, success); ++ } else { ++ MALI_DEBUG_ASSERT_POINTER(pp_core); ++ MALI_IGNORE(pp_core); ++ pp_job = mali_executor_complete_pp(group, success); + -+ return &mali_util_data; ++ pp_job_is_done = mali_pp_job_is_complete(pp_job); + } + -+ /* If we are currently busy, update working period up to now */ -+ if (work_start_time_gpu != 0) { -+ accumulated_work_time_gpu += (time_now - work_start_time_gpu); -+ work_start_time_gpu = time_now; -+ -+ /* GP and/or PP will also be busy if the GPU is busy at this point */ ++ if (pause_count > 0) { ++ /* Execution has been suspended */ + -+ if (work_start_time_gp != 0) { -+ accumulated_work_time_gp += (time_now - work_start_time_gp); -+ work_start_time_gp = time_now; ++ if (!mali_executor_is_working()) { ++ /* Last job completed, wake up sleepers */ ++ _mali_osk_wait_queue_wake_up( ++ executor_working_wait_queue); + } ++ } else if (MALI_TRUE == mali_group_disable_requested(group)) { ++ mali_executor_core_scale_in_group_complete(group); + -+ if (work_start_time_pp != 0) { -+ accumulated_work_time_pp += (time_now - work_start_time_pp); -+ work_start_time_pp = time_now; -+ } ++ mali_executor_schedule(); ++ } else { ++ /* try to schedule new jobs */ ++ mali_executor_schedule(); ++ } ++ ++ if (NULL != gp_job) { ++ MALI_DEBUG_ASSERT_POINTER(gp_job_done); ++ *gp_job_done = gp_job; ++ } else if (pp_job_is_done) { ++ MALI_DEBUG_ASSERT_POINTER(pp_job); ++ MALI_DEBUG_ASSERT_POINTER(pp_job_done); ++ *pp_job_done = pp_job; + } ++} + ++static void mali_executor_change_state_pp_physical(struct mali_group *group, ++ _mali_osk_list_t *old_list, ++ u32 *old_count, ++ _mali_osk_list_t *new_list, ++ u32 *new_count) ++{ + /* -+ * We have two 64-bit values, a dividend and a divisor. -+ * To avoid dependencies to a 64-bit divider, we shift down the two values -+ * equally first. -+ * We shift the dividend up and possibly the divisor down, making the result X in 256. ++ * It's a bit more complicated to change the state for the physical PP ++ * groups since their state is determined by the list they are on. + */ ++#if defined(DEBUG) ++ mali_bool found = MALI_FALSE; ++ struct mali_group *group_iter; ++ struct mali_group *temp; ++ u32 old_counted = 0; ++ u32 new_counted = 0; + -+ /* Shift the 64-bit values down so they fit inside a 32-bit integer */ -+ leading_zeroes = _mali_osk_clz((u32)(*time_period >> 32)); -+ shift_val = 32 - leading_zeroes; -+ work_normalized_gpu = (u32)(accumulated_work_time_gpu >> shift_val); -+ work_normalized_gp = (u32)(accumulated_work_time_gp >> shift_val); -+ work_normalized_pp = (u32)(accumulated_work_time_pp >> shift_val); -+ period_normalized = (u32)(*time_period >> shift_val); ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(old_list); ++ MALI_DEBUG_ASSERT_POINTER(old_count); ++ MALI_DEBUG_ASSERT_POINTER(new_list); ++ MALI_DEBUG_ASSERT_POINTER(new_count); + + /* -+ * Now, we should report the usage in parts of 256 -+ * this means we must shift up the dividend or down the divisor by 8 -+ * (we could do a combination, but we just use one for simplicity, -+ * but the end result should be good enough anyway) ++ * Verify that group is present on old list, ++ * and that the count is correct + */ -+ if (period_normalized > 0x00FFFFFF) { -+ /* The divisor is so big that it is safe to shift it down */ -+ period_normalized >>= 8; -+ } else { -+ /* -+ * The divisor is so small that we can shift up the dividend, without loosing any data. -+ * (dividend is always smaller than the divisor) -+ */ -+ work_normalized_gpu <<= 8; -+ work_normalized_gp <<= 8; -+ work_normalized_pp <<= 8; ++ ++ _MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, old_list, ++ struct mali_group, executor_list) { ++ old_counted++; ++ if (group == group_iter) { ++ found = MALI_TRUE; ++ } + } + -+ utilization_gpu = work_normalized_gpu / period_normalized; -+ utilization_gp = work_normalized_gp / period_normalized; -+ utilization_pp = work_normalized_pp / period_normalized; ++ _MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, new_list, ++ struct mali_group, executor_list) { ++ new_counted++; ++ } + -+ last_utilization_gpu = utilization_gpu; -+ last_utilization_gp = utilization_gp; -+ last_utilization_pp = utilization_pp; ++ if (MALI_FALSE == found) { ++ if (old_list == &group_list_idle) { ++ MALI_DEBUG_PRINT(1, (" old Group list is idle,")); ++ } else if (old_list == &group_list_inactive) { ++ MALI_DEBUG_PRINT(1, (" old Group list is inactive,")); ++ } else if (old_list == &group_list_working) { ++ MALI_DEBUG_PRINT(1, (" old Group list is working,")); ++ } else if (old_list == &group_list_disabled) { ++ MALI_DEBUG_PRINT(1, (" old Group list is disable,")); ++ } + -+ if ((MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD < last_utilization_gp) && -+ (MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD > last_utilization_pp)) { -+ mali_executor_hint_enable(MALI_EXECUTOR_HINT_GP_BOUND); ++ if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_WORKING)) { ++ MALI_DEBUG_PRINT(1, (" group in working \n")); ++ } else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_INACTIVE)) { ++ MALI_DEBUG_PRINT(1, (" group in inactive \n")); ++ } else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_IDLE)) { ++ MALI_DEBUG_PRINT(1, (" group in idle \n")); ++ } else if (MALI_TRUE == mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED)) { ++ MALI_DEBUG_PRINT(1, (" but group in disabled \n")); ++ } ++ } ++ ++ MALI_DEBUG_ASSERT(MALI_TRUE == found); ++ MALI_DEBUG_ASSERT(0 < (*old_count)); ++ MALI_DEBUG_ASSERT((*old_count) == old_counted); ++ MALI_DEBUG_ASSERT((*new_count) == new_counted); ++#endif ++ ++ _mali_osk_list_move(&group->executor_list, new_list); ++ (*old_count)--; ++ (*new_count)++; ++} ++ ++static void mali_executor_set_state_pp_physical(struct mali_group *group, ++ _mali_osk_list_t *new_list, ++ u32 *new_count) ++{ ++ _mali_osk_list_add(&group->executor_list, new_list); ++ (*new_count)++; ++} ++ ++static mali_bool mali_executor_group_is_in_state(struct mali_group *group, ++ enum mali_executor_state_t state) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ ++ if (gp_group == group) { ++ if (gp_group_state == state) { ++ return MALI_TRUE; ++ } ++ } else if (virtual_group == group || mali_group_is_in_virtual(group)) { ++ if (virtual_group_state == state) { ++ return MALI_TRUE; ++ } + } else { -+ mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND); ++ /* Physical PP group */ ++ struct mali_group *group_iter; ++ struct mali_group *temp; ++ _mali_osk_list_t *list; ++ ++ if (EXEC_STATE_DISABLED == state) { ++ list = &group_list_disabled; ++ } else if (EXEC_STATE_INACTIVE == state) { ++ list = &group_list_inactive; ++ } else if (EXEC_STATE_IDLE == state) { ++ list = &group_list_idle; ++ } else { ++ MALI_DEBUG_ASSERT(EXEC_STATE_WORKING == state); ++ list = &group_list_working; ++ } ++ ++ _MALI_OSK_LIST_FOREACHENTRY(group_iter, temp, list, ++ struct mali_group, executor_list) { ++ if (group_iter == group) { ++ return MALI_TRUE; ++ } ++ } + } + -+ /* starting a new period */ -+ accumulated_work_time_gpu = 0; -+ accumulated_work_time_gp = 0; -+ accumulated_work_time_pp = 0; ++ /* group not in correct state */ ++ return MALI_FALSE; ++} + -+ *start_time = time_now; ++static void mali_executor_group_enable_internal(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT(group); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ MALI_DEBUG_ASSERT(mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED)); + -+ mali_util_data.utilization_gp = last_utilization_gp; -+ mali_util_data.utilization_gpu = last_utilization_gpu; -+ mali_util_data.utilization_pp = last_utilization_pp; ++ /* Put into inactive state (== "lowest" enabled state) */ ++ if (group == gp_group) { ++ MALI_DEBUG_ASSERT(EXEC_STATE_DISABLED == gp_group_state); ++ gp_group_state = EXEC_STATE_INACTIVE; ++ } else { ++ mali_executor_change_state_pp_physical(group, ++ &group_list_disabled, ++ &group_list_disabled_count, ++ &group_list_inactive, ++ &group_list_inactive_count); + -+ mali_utilization_data_unlock(); ++ ++num_physical_pp_cores_enabled; ++ MALI_DEBUG_PRINT(4, ("Enabling group id %d \n", group->pp_core->core_id)); ++ } + -+ *need_add_timer = MALI_TRUE; ++ if (MALI_GROUP_STATE_ACTIVE == mali_group_activate(group)) { ++ MALI_DEBUG_ASSERT(MALI_TRUE == mali_group_power_is_on(group)); + -+ MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu)); -+ MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp)); -+ MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp)); ++ /* Move from inactive to idle */ ++ if (group == gp_group) { ++ gp_group_state = EXEC_STATE_IDLE; ++ } else { ++ mali_executor_change_state_pp_physical(group, ++ &group_list_inactive, ++ &group_list_inactive_count, ++ &group_list_idle, ++ &group_list_idle_count); + -+ return &mali_util_data; ++ if (mali_executor_has_virtual_group()) { ++ if (mali_executor_physical_rejoin_virtual(group)) { ++ mali_pm_update_async(); ++ } ++ } ++ } ++ } else { ++ mali_pm_update_async(); ++ } +} + -+_mali_osk_errcode_t mali_utilization_init(void) ++static void mali_executor_group_disable_internal(struct mali_group *group) +{ -+#if USING_GPU_UTILIZATION -+ _mali_osk_device_data data; ++ mali_bool working; + -+ if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) { -+ if (NULL != data.utilization_callback) { -+ mali_utilization_callback = data.utilization_callback; -+ MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: Utilization handler installed \n")); ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ MALI_DEBUG_ASSERT(!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED)); ++ ++ working = mali_executor_group_is_in_state(group, EXEC_STATE_WORKING); ++ if (MALI_TRUE == working) { ++ /** Group to be disabled once it completes current work, ++ * when virtual group completes, also check child groups for this flag */ ++ mali_group_set_disable_request(group, MALI_TRUE); ++ return; ++ } ++ ++ /* Put into disabled state */ ++ if (group == gp_group) { ++ /* GP group */ ++ MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != gp_group_state); ++ gp_group_state = EXEC_STATE_DISABLED; ++ } else { ++ if (mali_group_is_in_virtual(group)) { ++ /* A child group of virtual group. move the specific group from virtual group */ ++ MALI_DEBUG_ASSERT(EXEC_STATE_WORKING != virtual_group_state); ++ ++ mali_executor_set_state_pp_physical(group, ++ &group_list_disabled, ++ &group_list_disabled_count); ++ ++ mali_group_remove_group(virtual_group, group); ++ mali_executor_disable_empty_virtual(); ++ } else { ++ mali_executor_change_group_status_disabled(group); + } ++ ++ --num_physical_pp_cores_enabled; ++ MALI_DEBUG_PRINT(4, ("Disabling group id %d \n", group->pp_core->core_id)); + } -+#endif /* defined(USING_GPU_UTILIZATION) */ + -+ if (NULL == mali_utilization_callback) { -+ MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: No platform utilization handler installed\n")); ++ if (MALI_GROUP_STATE_INACTIVE != group->state) { ++ if (MALI_TRUE == mali_group_deactivate(group)) { ++ mali_pm_update_async(); ++ } + } ++} + -+ utilization_data_lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_UTILIZATION); -+ if (NULL == utilization_data_lock) { -+ return _MALI_OSK_ERR_FAULT; ++static void mali_executor_notify_core_change(u32 num_cores) ++{ ++ mali_bool done = MALI_FALSE; ++ ++ if (mali_is_mali450() || mali_is_mali470()) { ++ return; + } + -+ num_running_gp_cores = 0; -+ num_running_pp_cores = 0; ++ /* ++ * This function gets a bit complicated because we can't hold the session lock while ++ * allocating notification objects. ++ */ ++ while (!done) { ++ u32 i; ++ u32 num_sessions_alloc; ++ u32 num_sessions_with_lock; ++ u32 used_notification_objects = 0; ++ _mali_osk_notification_t **notobjs; + -+ return _MALI_OSK_ERR_OK; ++ /* Pre allocate the number of notifications objects we need right now (might change after lock has been taken) */ ++ num_sessions_alloc = mali_session_get_count(); ++ if (0 == num_sessions_alloc) { ++ /* No sessions to report to */ ++ return; ++ } ++ ++ notobjs = (_mali_osk_notification_t **)_mali_osk_malloc(sizeof(_mali_osk_notification_t *) * num_sessions_alloc); ++ if (NULL == notobjs) { ++ MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure)\n")); ++ /* there is probably no point in trying again, system must be really low on memory and probably unusable now anyway */ ++ return; ++ } ++ ++ for (i = 0; i < num_sessions_alloc; i++) { ++ notobjs[i] = _mali_osk_notification_create(_MALI_NOTIFICATION_PP_NUM_CORE_CHANGE, sizeof(_mali_uk_pp_num_cores_changed_s)); ++ if (NULL != notobjs[i]) { ++ _mali_uk_pp_num_cores_changed_s *data = notobjs[i]->result_buffer; ++ data->number_of_enabled_cores = num_cores; ++ } else { ++ MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure %u)\n", i)); ++ } ++ } ++ ++ mali_session_lock(); ++ ++ /* number of sessions will not change while we hold the lock */ ++ num_sessions_with_lock = mali_session_get_count(); ++ ++ if (num_sessions_alloc >= num_sessions_with_lock) { ++ /* We have allocated enough notification objects for all the sessions atm */ ++ struct mali_session_data *session, *tmp; ++ MALI_SESSION_FOREACH(session, tmp, link) { ++ MALI_DEBUG_ASSERT(used_notification_objects < num_sessions_alloc); ++ if (NULL != notobjs[used_notification_objects]) { ++ mali_session_send_notification(session, notobjs[used_notification_objects]); ++ notobjs[used_notification_objects] = NULL; /* Don't track this notification object any more */ ++ } ++ used_notification_objects++; ++ } ++ done = MALI_TRUE; ++ } ++ ++ mali_session_unlock(); ++ ++ /* Delete any remaining/unused notification objects */ ++ for (; used_notification_objects < num_sessions_alloc; used_notification_objects++) { ++ if (NULL != notobjs[used_notification_objects]) { ++ _mali_osk_notification_delete(notobjs[used_notification_objects]); ++ } ++ } ++ ++ _mali_osk_free(notobjs); ++ } +} + -+void mali_utilization_term(void) ++static mali_bool mali_executor_core_scaling_is_done(void *data) +{ -+ if (NULL != utilization_data_lock) { -+ _mali_osk_spinlock_irq_term(utilization_data_lock); ++ u32 i; ++ u32 num_groups; ++ mali_bool ret = MALI_TRUE; ++ ++ MALI_IGNORE(data); ++ ++ mali_executor_lock(); ++ ++ num_groups = mali_group_get_glob_num_groups(); ++ ++ for (i = 0; i < num_groups; i++) { ++ struct mali_group *group = mali_group_get_glob_group(i); ++ ++ if (NULL != group) { ++ if (MALI_TRUE == group->disable_requested && NULL != mali_group_get_pp_core(group)) { ++ ret = MALI_FALSE; ++ break; ++ } ++ } + } ++ mali_executor_unlock(); ++ ++ return ret; +} + -+void mali_utilization_gp_start(void) ++static void mali_executor_wq_notify_core_change(void *arg) +{ -+ mali_utilization_data_lock(); ++ MALI_IGNORE(arg); + -+ ++num_running_gp_cores; -+ if (1 == num_running_gp_cores) { -+ u64 time_now = _mali_osk_time_get_ns(); ++ if (mali_is_mali450() || mali_is_mali470()) { ++ return; ++ } + -+ /* First GP core started, consider GP busy from now and onwards */ -+ work_start_time_gp = time_now; ++ _mali_osk_wait_queue_wait_event(executor_notify_core_change_wait_queue, ++ mali_executor_core_scaling_is_done, NULL); + -+ if (0 == num_running_pp_cores) { -+ mali_bool is_resume = MALI_FALSE; -+ /* -+ * There are no PP cores running, so this is also the point -+ * at which we consider the GPU to be busy as well. -+ */ -+ work_start_time_gpu = time_now; ++ mali_executor_notify_core_change(num_physical_pp_cores_enabled); ++} + -+ is_resume = mali_control_timer_resume(time_now); ++/** ++ * Clear all disable request from the _last_ core scaling behavior. ++ */ ++static void mali_executor_core_scaling_reset(void) ++{ ++ u32 i; ++ u32 num_groups; + -+ mali_utilization_data_unlock(); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); + -+ if (is_resume) { -+ /* Do some policy in new period for performance consideration */ -+#if defined(CONFIG_MALI_DVFS) -+ /* Clear session->number_of_window_jobs, prepare parameter for dvfs */ -+ mali_session_max_window_num(); -+ if (0 == last_utilization_gpu) { -+ /* -+ * for mali_dev_pause is called in set clock, -+ * so each time we change clock, we will set clock to -+ * highest step even if under down clock case, -+ * it is not nessesary, so we only set the clock under -+ * last time utilization equal 0, we stop the timer then -+ * start the GPU again case -+ */ -+ mali_dvfs_policy_new_period(); -+ } -+#endif -+ /* -+ * First timeout using short interval for power consideration -+ * because we give full power in the new period, but if the -+ * job loading is light, finish in 10ms, the other time all keep -+ * in high freq it will wast time. -+ */ -+ mali_control_timer_add(mali_control_first_timeout); -+ } -+ } else { -+ mali_utilization_data_unlock(); ++ num_groups = mali_group_get_glob_num_groups(); ++ ++ for (i = 0; i < num_groups; i++) { ++ struct mali_group *group = mali_group_get_glob_group(i); ++ ++ if (NULL != group) { ++ group->disable_requested = MALI_FALSE; + } ++ } + -+ } else { -+ /* Nothing to do */ -+ mali_utilization_data_unlock(); ++ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { ++ core_scaling_delay_up_mask[i] = 0; + } +} + -+void mali_utilization_pp_start(void) ++static void mali_executor_core_scale(unsigned int target_core_nr) +{ -+ mali_utilization_data_lock(); ++ int current_core_scaling_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 }; ++ int target_core_scaling_mask[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 }; ++ int i; + -+ ++num_running_pp_cores; -+ if (1 == num_running_pp_cores) { -+ u64 time_now = _mali_osk_time_get_ns(); ++ MALI_DEBUG_ASSERT(0 < target_core_nr); ++ MALI_DEBUG_ASSERT(num_physical_pp_cores_total >= target_core_nr); + -+ /* First PP core started, consider PP busy from now and onwards */ -+ work_start_time_pp = time_now; ++ mali_executor_lock(); + -+ if (0 == num_running_gp_cores) { -+ mali_bool is_resume = MALI_FALSE; -+ /* -+ * There are no GP cores running, so this is also the point -+ * at which we consider the GPU to be busy as well. -+ */ -+ work_start_time_gpu = time_now; ++ if (target_core_nr < num_physical_pp_cores_enabled) { ++ MALI_DEBUG_PRINT(2, ("Requesting %d cores: disabling %d cores\n", target_core_nr, num_physical_pp_cores_enabled - target_core_nr)); ++ } else { ++ MALI_DEBUG_PRINT(2, ("Requesting %d cores: enabling %d cores\n", target_core_nr, target_core_nr - num_physical_pp_cores_enabled)); ++ } + -+ /* Start a new period if stoped */ -+ is_resume = mali_control_timer_resume(time_now); ++ /* When a new core scaling request is comming, we should remove the un-doing ++ * part of the last core scaling request. It's safe because we have only one ++ * lock(executor lock) protection. */ ++ mali_executor_core_scaling_reset(); + -+ mali_utilization_data_unlock(); ++ mali_pm_get_best_power_cost_mask(num_physical_pp_cores_enabled, current_core_scaling_mask); ++ mali_pm_get_best_power_cost_mask(target_core_nr, target_core_scaling_mask); + -+ if (is_resume) { -+#if defined(CONFIG_MALI_DVFS) -+ /* Clear session->number_of_window_jobs, prepare parameter for dvfs */ -+ mali_session_max_window_num(); -+ if (0 == last_utilization_gpu) { -+ /* -+ * for mali_dev_pause is called in set clock, -+ * so each time we change clock, we will set clock to -+ * highest step even if under down clock case, -+ * it is not nessesary, so we only set the clock under -+ * last time utilization equal 0, we stop the timer then -+ * start the GPU again case -+ */ -+ mali_dvfs_policy_new_period(); ++ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { ++ target_core_scaling_mask[i] = target_core_scaling_mask[i] - current_core_scaling_mask[i]; ++ MALI_DEBUG_PRINT(5, ("target_core_scaling_mask[%d] = %d\n", i, target_core_scaling_mask[i])); ++ } ++ ++ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { ++ if (0 > target_core_scaling_mask[i]) { ++ struct mali_pm_domain *domain; ++ ++ domain = mali_pm_domain_get_from_index(i); ++ ++ /* Domain is valid and has pp cores */ ++ if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) { ++ struct mali_group *group; ++ struct mali_group *temp; ++ ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &domain->group_list, struct mali_group, pm_domain_list) { ++ if (NULL != mali_group_get_pp_core(group) && (!mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED)) ++ && (!mali_group_is_virtual(group))) { ++ mali_executor_group_disable_internal(group); ++ target_core_scaling_mask[i]++; ++ if ((0 == target_core_scaling_mask[i])) { ++ break; ++ } ++ ++ } + } -+#endif ++ } ++ } ++ } + -+ /* -+ * First timeout using short interval for power consideration -+ * because we give full power in the new period, but if the -+ * job loading is light, finish in 10ms, the other time all keep -+ * in high freq it will wast time. -+ */ -+ mali_control_timer_add(mali_control_first_timeout); ++ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { ++ /** ++ * Target_core_scaling_mask[i] is bigger than 0, ++ * means we need to enable some pp cores in ++ * this domain whose domain index is i. ++ */ ++ if (0 < target_core_scaling_mask[i]) { ++ struct mali_pm_domain *domain; ++ ++ if (num_physical_pp_cores_enabled >= target_core_nr) { ++ break; ++ } ++ ++ domain = mali_pm_domain_get_from_index(i); ++ ++ /* Domain is valid and has pp cores */ ++ if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) { ++ struct mali_group *group; ++ struct mali_group *temp; ++ ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &domain->group_list, struct mali_group, pm_domain_list) { ++ if (NULL != mali_group_get_pp_core(group) && mali_executor_group_is_in_state(group, EXEC_STATE_DISABLED) ++ && (!mali_group_is_virtual(group))) { ++ mali_executor_group_enable_internal(group); ++ target_core_scaling_mask[i]--; ++ ++ if ((0 == target_core_scaling_mask[i]) || num_physical_pp_cores_enabled == target_core_nr) { ++ break; ++ } ++ } ++ } + } -+ } else { -+ mali_utilization_data_unlock(); + } -+ } else { -+ /* Nothing to do */ -+ mali_utilization_data_unlock(); + } ++ ++ /** ++ * Here, we may still have some pp cores not been enabled because of some ++ * pp cores need to be disabled are still in working state. ++ */ ++ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { ++ if (0 < target_core_scaling_mask[i]) { ++ core_scaling_delay_up_mask[i] = target_core_scaling_mask[i]; ++ } ++ } ++ ++ mali_executor_schedule(); ++ mali_executor_unlock(); +} + -+void mali_utilization_gp_end(void) ++static void mali_executor_core_scale_in_group_complete(struct mali_group *group) +{ -+ mali_utilization_data_lock(); ++ int num_pp_cores_disabled = 0; ++ int num_pp_cores_to_enable = 0; ++ int i; + -+ --num_running_gp_cores; -+ if (0 == num_running_gp_cores) { -+ u64 time_now = _mali_osk_time_get_ns(); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ MALI_DEBUG_ASSERT(MALI_TRUE == mali_group_disable_requested(group)); + -+ /* Last GP core ended, consider GP idle from now and onwards */ -+ accumulated_work_time_gp += (time_now - work_start_time_gp); -+ work_start_time_gp = 0; ++ /* Disable child group of virtual group */ ++ if (mali_group_is_virtual(group)) { ++ struct mali_group *child; ++ struct mali_group *temp; + -+ if (0 == num_running_pp_cores) { -+ /* -+ * There are no PP cores running, so this is also the point -+ * at which we consider the GPU to be idle as well. -+ */ -+ accumulated_work_time_gpu += (time_now - work_start_time_gpu); -+ work_start_time_gpu = 0; ++ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) { ++ if (MALI_TRUE == mali_group_disable_requested(child)) { ++ mali_group_set_disable_request(child, MALI_FALSE); ++ mali_executor_group_disable_internal(child); ++ num_pp_cores_disabled++; ++ } ++ } ++ mali_group_set_disable_request(group, MALI_FALSE); ++ } else { ++ mali_executor_group_disable_internal(group); ++ mali_group_set_disable_request(group, MALI_FALSE); ++ if (NULL != mali_group_get_pp_core(group)) { ++ num_pp_cores_disabled++; + } + } + -+ mali_utilization_data_unlock(); -+} ++ num_pp_cores_to_enable = num_pp_cores_disabled; + -+void mali_utilization_pp_end(void) -+{ -+ mali_utilization_data_lock(); ++ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { ++ if (0 < core_scaling_delay_up_mask[i]) { ++ struct mali_pm_domain *domain; + -+ --num_running_pp_cores; -+ if (0 == num_running_pp_cores) { -+ u64 time_now = _mali_osk_time_get_ns(); ++ if (0 == num_pp_cores_to_enable) { ++ break; ++ } + -+ /* Last PP core ended, consider PP idle from now and onwards */ -+ accumulated_work_time_pp += (time_now - work_start_time_pp); -+ work_start_time_pp = 0; ++ domain = mali_pm_domain_get_from_index(i); + -+ if (0 == num_running_gp_cores) { -+ /* -+ * There are no GP cores running, so this is also the point -+ * at which we consider the GPU to be idle as well. -+ */ -+ accumulated_work_time_gpu += (time_now - work_start_time_gpu); -+ work_start_time_gpu = 0; ++ /* Domain is valid and has pp cores */ ++ if ((NULL != domain) && !(_mali_osk_list_empty(&domain->group_list))) { ++ struct mali_group *disabled_group; ++ struct mali_group *temp; ++ ++ _MALI_OSK_LIST_FOREACHENTRY(disabled_group, temp, &domain->group_list, struct mali_group, pm_domain_list) { ++ if (NULL != mali_group_get_pp_core(disabled_group) && mali_executor_group_is_in_state(disabled_group, EXEC_STATE_DISABLED)) { ++ mali_executor_group_enable_internal(disabled_group); ++ core_scaling_delay_up_mask[i]--; ++ num_pp_cores_to_enable--; ++ ++ if ((0 == core_scaling_delay_up_mask[i]) || 0 == num_pp_cores_to_enable) { ++ break; ++ } ++ } ++ } ++ } + } + } + -+ mali_utilization_data_unlock(); ++ _mali_osk_wait_queue_wake_up(executor_notify_core_change_wait_queue); +} + -+mali_bool mali_utilization_enabled(void) ++static void mali_executor_change_group_status_disabled(struct mali_group *group) +{ -+#if defined(CONFIG_MALI_DVFS) -+ return mali_dvfs_policy_enabled(); -+#else -+ return (NULL != mali_utilization_callback); -+#endif /* defined(CONFIG_MALI_DVFS) */ -+} ++ /* Physical PP group */ ++ mali_bool idle; + -+void mali_utilization_platform_realize(struct mali_gpu_utilization_data *util_data) -+{ -+ MALI_DEBUG_ASSERT_POINTER(mali_utilization_callback); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); + -+ mali_utilization_callback(util_data); ++ idle = mali_executor_group_is_in_state(group, EXEC_STATE_IDLE); ++ if (MALI_TRUE == idle) { ++ mali_executor_change_state_pp_physical(group, ++ &group_list_idle, ++ &group_list_idle_count, ++ &group_list_disabled, ++ &group_list_disabled_count); ++ } else { ++ mali_executor_change_state_pp_physical(group, ++ &group_list_inactive, ++ &group_list_inactive_count, ++ &group_list_disabled, ++ &group_list_disabled_count); ++ } +} + -+void mali_utilization_reset(void) ++static mali_bool mali_executor_deactivate_list_idle(mali_bool deactivate_idle_group) +{ -+ accumulated_work_time_gpu = 0; -+ accumulated_work_time_gp = 0; -+ accumulated_work_time_pp = 0; ++ mali_bool trigger_pm_update = MALI_FALSE; + -+ last_utilization_gpu = 0; -+ last_utilization_gp = 0; -+ last_utilization_pp = 0; -+} ++ if (group_list_idle_count > 0) { ++ if (mali_executor_has_virtual_group()) { + -+void mali_utilization_data_lock(void) -+{ -+ _mali_osk_spinlock_irq_lock(utilization_data_lock); -+} ++ /* Rejoin virtual group on Mali-450 */ + -+void mali_utilization_data_unlock(void) -+{ -+ _mali_osk_spinlock_irq_unlock(utilization_data_lock); -+} ++ struct mali_group *group; ++ struct mali_group *temp; + -+void mali_utilization_data_assert_locked(void) -+{ -+ MALI_DEBUG_ASSERT_LOCK_HELD(utilization_data_lock); -+} ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, ++ &group_list_idle, ++ struct mali_group, executor_list) { ++ if (mali_executor_physical_rejoin_virtual( ++ group)) { ++ trigger_pm_update = MALI_TRUE; ++ } ++ } ++ } else if (deactivate_idle_group) { ++ struct mali_group *group; ++ struct mali_group *temp; + -+u32 _mali_ukk_utilization_gp_pp(void) -+{ -+ return last_utilization_gpu; ++ /* Deactivate group on Mali-300/400 */ ++ ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, ++ &group_list_idle, ++ struct mali_group, executor_list) { ++ if (mali_group_deactivate(group)) { ++ trigger_pm_update = MALI_TRUE; ++ } ++ ++ /* Move from idle to inactive */ ++ mali_executor_change_state_pp_physical(group, ++ &group_list_idle, ++ &group_list_idle_count, ++ &group_list_inactive, ++ &group_list_inactive_count); ++ } ++ } ++ } ++ ++ return trigger_pm_update; +} + -+u32 _mali_ukk_utilization_gp(void) ++void mali_executor_running_status_print(void) +{ -+ return last_utilization_gp; ++ struct mali_group *group = NULL; ++ struct mali_group *temp = NULL; ++ ++ MALI_PRINT(("GP running job: %p\n", gp_group->gp_running_job)); ++ if ((gp_group->gp_core) && (gp_group->is_working)) { ++ mali_group_dump_status(gp_group); ++ } ++ MALI_PRINT(("Physical PP groups in WORKING state (count = %u):\n", group_list_working_count)); ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_working, struct mali_group, executor_list) { ++ MALI_PRINT(("PP running job: %p, subjob %d \n", group->pp_running_job, group->pp_running_sub_job)); ++ mali_group_dump_status(group); ++ } ++ MALI_PRINT(("Physical PP groups in INACTIVE state (count = %u):\n", group_list_inactive_count)); ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_inactive, struct mali_group, executor_list) { ++ MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off")); ++ MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description)); ++ } ++ MALI_PRINT(("Physical PP groups in IDLE state (count = %u):\n", group_list_idle_count)); ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_idle, struct mali_group, executor_list) { ++ MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off")); ++ MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description)); ++ } ++ MALI_PRINT(("Physical PP groups in DISABLED state (count = %u):\n", group_list_disabled_count)); ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &group_list_disabled, struct mali_group, executor_list) { ++ MALI_PRINT(("\tPP status %d, SW power: %s\n", group->state, group->power_is_on ? "On" : "Off")); ++ MALI_PRINT(("\tPP #%d: %s\n", group->pp_core->core_id, group->pp_core->hw_core.description)); ++ } ++ ++ if (mali_executor_has_virtual_group()) { ++ MALI_PRINT(("Virtual group running job: %p\n", virtual_group->pp_running_job)); ++ MALI_PRINT(("Virtual group status: %d\n", virtual_group_state)); ++ MALI_PRINT(("Virtual group->status: %d\n", virtual_group->state)); ++ MALI_PRINT(("\tSW power: %s\n", virtual_group->power_is_on ? "On" : "Off")); ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &virtual_group->group_list, ++ struct mali_group, group_list) { ++ int i = 0; ++ MALI_PRINT(("\tchild group(%s) running job: %p\n", group->pp_core->hw_core.description, group->pp_running_job)); ++ MALI_PRINT(("\tchild group(%s)->status: %d\n", group->pp_core->hw_core.description, group->state)); ++ MALI_PRINT(("\tchild group(%s) SW power: %s\n", group->pp_core->hw_core.description, group->power_is_on ? "On" : "Off")); ++ if (group->pm_domain) { ++ MALI_PRINT(("\tPower domain: id %u\n", mali_pm_domain_get_id(group->pm_domain))); ++ MALI_PRINT(("\tMask:0x%04x \n", mali_pm_domain_get_mask(group->pm_domain))); ++ MALI_PRINT(("\tUse-count:%u \n", mali_pm_domain_get_use_count(group->pm_domain))); ++ MALI_PRINT(("\tCurrent power status:%s \n", (mali_pm_domain_get_mask(group->pm_domain)& mali_pm_get_current_mask()) ? "On" : "Off")); ++ MALI_PRINT(("\tWanted power status:%s \n", (mali_pm_domain_get_mask(group->pm_domain)& mali_pm_get_wanted_mask()) ? "On" : "Off")); ++ } ++ ++ for (i = 0; i < 2; i++) { ++ if (NULL != group->l2_cache_core[i]) { ++ struct mali_pm_domain *domain; ++ domain = mali_l2_cache_get_pm_domain(group->l2_cache_core[i]); ++ MALI_PRINT(("\t L2(index %d) group SW power: %s\n", i, group->l2_cache_core[i]->power_is_on ? "On" : "Off")); ++ if (domain) { ++ MALI_PRINT(("\tL2 Power domain: id %u\n", mali_pm_domain_get_id(domain))); ++ MALI_PRINT(("\tL2 Mask:0x%04x \n", mali_pm_domain_get_mask(domain))); ++ MALI_PRINT(("\tL2 Use-count:%u \n", mali_pm_domain_get_use_count(domain))); ++ MALI_PRINT(("\tL2 Current power status:%s \n", (mali_pm_domain_get_mask(domain) & mali_pm_get_current_mask()) ? "On" : "Off")); ++ MALI_PRINT(("\tL2 Wanted power status:%s \n", (mali_pm_domain_get_mask(domain) & mali_pm_get_wanted_mask()) ? "On" : "Off")); ++ } ++ } ++ } ++ } ++ if (EXEC_STATE_WORKING == virtual_group_state) { ++ mali_group_dump_status(virtual_group); ++ } ++ } +} + -+u32 _mali_ukk_utilization_pp(void) ++void mali_executor_status_dump(void) +{ -+ return last_utilization_pp; ++ mali_executor_lock(); ++ mali_scheduler_lock(); ++ ++ /* print schedule queue status */ ++ mali_scheduler_gp_pp_job_queue_print(); ++ ++ mali_scheduler_unlock(); ++ mali_executor_unlock(); +} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_kernel_utilization.h b/drivers/gpu/arm/mali400/mali/common/mali_kernel_utilization.h +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_executor.h b/drivers/gpu/arm/mali400/mali/common/mali_executor.h new file mode 100644 -index 000000000..06f585dcb +index 000000000..4224d6a6c --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_kernel_utilization.h -@@ -0,0 +1,72 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_executor.h +@@ -0,0 +1,102 @@ +/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2012, 2014-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -288438,127 +291763,106 @@ index 000000000..06f585dcb + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_KERNEL_UTILIZATION_H__ -+#define __MALI_KERNEL_UTILIZATION_H__ ++#ifndef __MALI_EXECUTOR_H__ ++#define __MALI_EXECUTOR_H__ + -+#include +#include "mali_osk.h" ++#include "mali_scheduler_types.h" ++#include "mali_kernel_common.h" + -+/** -+ * Initialize/start the Mali GPU utilization metrics reporting. -+ * -+ * @return _MALI_OSK_ERR_OK on success, otherwise failure. -+ */ -+_mali_osk_errcode_t mali_utilization_init(void); -+ -+/** -+ * Terminate the Mali GPU utilization metrics reporting -+ */ -+void mali_utilization_term(void); ++typedef enum { ++ MALI_EXECUTOR_HINT_GP_BOUND = 0 ++#define MALI_EXECUTOR_HINT_MAX 1 ++} mali_executor_hint; + -+/** -+ * Check if Mali utilization is enabled -+ */ -+mali_bool mali_utilization_enabled(void); ++extern mali_bool mali_executor_hints[MALI_EXECUTOR_HINT_MAX]; + -+/** -+ * Should be called when a job is about to execute a GP job -+ */ -+void mali_utilization_gp_start(void); ++/* forward declare struct instead of using include */ ++struct mali_session_data; ++struct mali_group; ++struct mali_pp_core; + -+/** -+ * Should be called when a job has completed executing a GP job -+ */ -+void mali_utilization_gp_end(void); ++extern _mali_osk_spinlock_irq_t *mali_executor_lock_obj; + -+/** -+ * Should be called when a job is about to execute a PP job -+ */ -+void mali_utilization_pp_start(void); ++#define MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD() MALI_DEBUG_ASSERT_LOCK_HELD(mali_executor_lock_obj); + -+/** -+ * Should be called when a job has completed executing a PP job -+ */ -+void mali_utilization_pp_end(void); ++_mali_osk_errcode_t mali_executor_initialize(void); ++void mali_executor_terminate(void); + -+/** -+ * Should be called to calcution the GPU utilization -+ */ -+struct mali_gpu_utilization_data *mali_utilization_calculate(u64 *start_time, u64 *time_period, mali_bool *need_add_timer); ++void mali_executor_populate(void); ++void mali_executor_depopulate(void); + -+_mali_osk_spinlock_irq_t *mali_utilization_get_lock(void); ++void mali_executor_suspend(void); ++void mali_executor_resume(void); + -+void mali_utilization_platform_realize(struct mali_gpu_utilization_data *util_data); ++u32 mali_executor_get_num_cores_total(void); ++u32 mali_executor_get_num_cores_enabled(void); ++struct mali_pp_core *mali_executor_get_virtual_pp(void); ++struct mali_group *mali_executor_get_virtual_group(void); + -+void mali_utilization_data_lock(void); ++void mali_executor_zap_all_active(struct mali_session_data *session); + -+void mali_utilization_data_unlock(void); ++/** ++ * Schedule GP and PP according to bitmask. ++ * ++ * @param mask A scheduling bitmask. ++ * @param deferred_schedule MALI_TRUE if schedule should be deferred, MALI_FALSE if not. ++ */ ++void mali_executor_schedule_from_mask(mali_scheduler_mask mask, mali_bool deferred_schedule); + -+void mali_utilization_data_assert_locked(void); ++_mali_osk_errcode_t mali_executor_interrupt_gp(struct mali_group *group, mali_bool in_upper_half); ++_mali_osk_errcode_t mali_executor_interrupt_pp(struct mali_group *group, mali_bool in_upper_half); ++_mali_osk_errcode_t mali_executor_interrupt_mmu(struct mali_group *group, mali_bool in_upper_half); ++void mali_executor_group_power_up(struct mali_group *groups[], u32 num_groups); ++void mali_executor_group_power_down(struct mali_group *groups[], u32 num_groups); + -+void mali_utilization_reset(void); ++void mali_executor_abort_session(struct mali_session_data *session); + ++void mali_executor_core_scaling_enable(void); ++void mali_executor_core_scaling_disable(void); ++mali_bool mali_executor_core_scaling_is_enabled(void); + -+#endif /* __MALI_KERNEL_UTILIZATION_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_kernel_vsync.c b/drivers/gpu/arm/mali400/mali/common/mali_kernel_vsync.c -new file mode 100644 -index 000000000..dd44e5e7f ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_kernel_vsync.c -@@ -0,0 +1,45 @@ -+/* -+ * Copyright (C) 2011-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++void mali_executor_group_enable(struct mali_group *group); ++void mali_executor_group_disable(struct mali_group *group); ++mali_bool mali_executor_group_is_disabled(struct mali_group *group); + -+#include "mali_kernel_common.h" -+#include "mali_osk.h" -+#include "mali_ukk.h" ++int mali_executor_set_perf_level(unsigned int target_core_nr, mali_bool override); + -+#include "mali_osk_profiling.h" ++#if MALI_STATE_TRACKING ++u32 mali_executor_dump_state(char *buf, u32 size); ++#endif + -+_mali_osk_errcode_t _mali_ukk_vsync_event_report(_mali_uk_vsync_event_report_s *args) ++MALI_STATIC_INLINE void mali_executor_hint_enable(mali_executor_hint hint) +{ -+ _mali_uk_vsync_event event = (_mali_uk_vsync_event)args->event; -+ MALI_IGNORE(event); /* event is not used for release code, and that is OK */ -+ -+ /* -+ * Manually generate user space events in kernel space. -+ * This saves user space from calling kernel space twice in this case. -+ * We just need to remember to add pid and tid manually. -+ */ -+ if (event == _MALI_UK_VSYNC_EVENT_BEGIN_WAIT) { -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SUSPEND | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC, -+ _mali_osk_get_pid(), _mali_osk_get_tid(), 0, 0, 0); -+ } -+ -+ if (event == _MALI_UK_VSYNC_EVENT_END_WAIT) { -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_RESUME | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC, -+ _mali_osk_get_pid(), _mali_osk_get_tid(), 0, 0, 0); -+ } ++ MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX); ++ mali_executor_hints[hint] = MALI_TRUE; ++} + ++MALI_STATIC_INLINE void mali_executor_hint_disable(mali_executor_hint hint) ++{ ++ MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX); ++ mali_executor_hints[hint] = MALI_FALSE; ++} + -+ MALI_DEBUG_PRINT(4, ("Received VSYNC event: %d\n", event)); -+ MALI_SUCCESS; ++MALI_STATIC_INLINE mali_bool mali_executor_hint_is_enabled(mali_executor_hint hint) ++{ ++ MALI_DEBUG_ASSERT(hint < MALI_EXECUTOR_HINT_MAX); ++ return mali_executor_hints[hint]; +} + -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_l2_cache.c b/drivers/gpu/arm/mali400/mali/common/mali_l2_cache.c ++void mali_executor_running_status_print(void); ++void mali_executor_status_dump(void); ++void mali_executor_lock(void); ++void mali_executor_unlock(void); ++#endif /* __MALI_EXECUTOR_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_gp.c b/drivers/gpu/arm/mali400/mali/common/mali_gp.c new file mode 100644 -index 000000000..fe33f561b +index 000000000..7d3d4aff7 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_l2_cache.c -@@ -0,0 +1,534 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_gp.c +@@ -0,0 +1,357 @@ +/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -288566,539 +291870,362 @@ index 000000000..fe33f561b + * A copy of the licence is included with the program, and can also be obtained from Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ -+#include "mali_kernel_common.h" -+#include "mali_osk.h" -+#include "mali_l2_cache.h" -+#include "mali_hw_core.h" -+#include "mali_scheduler.h" -+#include "mali_pm.h" -+#include "mali_pm_domain.h" -+ -+/** -+ * Size of the Mali L2 cache registers in bytes -+ */ -+#define MALI400_L2_CACHE_REGISTERS_SIZE 0x30 + -+/** -+ * Mali L2 cache register numbers -+ * Used in the register read/write routines. -+ * See the hardware documentation for more information about each register -+ */ -+typedef enum mali_l2_cache_register { -+ MALI400_L2_CACHE_REGISTER_SIZE = 0x0004, -+ MALI400_L2_CACHE_REGISTER_STATUS = 0x0008, -+ /*unused = 0x000C */ -+ MALI400_L2_CACHE_REGISTER_COMMAND = 0x0010, -+ MALI400_L2_CACHE_REGISTER_CLEAR_PAGE = 0x0014, -+ MALI400_L2_CACHE_REGISTER_MAX_READS = 0x0018, -+ MALI400_L2_CACHE_REGISTER_ENABLE = 0x001C, -+ MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0 = 0x0020, -+ MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0 = 0x0024, -+ MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1 = 0x0028, -+ MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1 = 0x002C, -+} mali_l2_cache_register; ++#include "mali_gp.h" ++#include "mali_hw_core.h" ++#include "mali_group.h" ++#include "mali_osk.h" ++#include "regs/mali_gp_regs.h" ++#include "mali_kernel_common.h" ++#include "mali_kernel_core.h" ++#if defined(CONFIG_MALI400_PROFILING) ++#include "mali_osk_profiling.h" ++#endif + -+/** -+ * Mali L2 cache commands -+ * These are the commands that can be sent to the Mali L2 cache unit -+ */ -+typedef enum mali_l2_cache_command { -+ MALI400_L2_CACHE_COMMAND_CLEAR_ALL = 0x01, -+} mali_l2_cache_command; ++static struct mali_gp_core *mali_global_gp_core = NULL; + -+/** -+ * Mali L2 cache commands -+ * These are the commands that can be sent to the Mali L2 cache unit -+ */ -+typedef enum mali_l2_cache_enable { -+ MALI400_L2_CACHE_ENABLE_DEFAULT = 0x0, /* Default */ -+ MALI400_L2_CACHE_ENABLE_ACCESS = 0x01, -+ MALI400_L2_CACHE_ENABLE_READ_ALLOCATE = 0x02, -+} mali_l2_cache_enable; ++/* Interrupt handlers */ ++static void mali_gp_irq_probe_trigger(void *data); ++static _mali_osk_errcode_t mali_gp_irq_probe_ack(void *data); + -+/** -+ * Mali L2 cache status bits -+ */ -+typedef enum mali_l2_cache_status { -+ MALI400_L2_CACHE_STATUS_COMMAND_BUSY = 0x01, -+ MALI400_L2_CACHE_STATUS_DATA_BUSY = 0x02, -+} mali_l2_cache_status; ++struct mali_gp_core *mali_gp_create(const _mali_osk_resource_t *resource, struct mali_group *group) ++{ ++ struct mali_gp_core *core = NULL; + -+#define MALI400_L2_MAX_READS_NOT_SET -1 ++ MALI_DEBUG_ASSERT(NULL == mali_global_gp_core); ++ MALI_DEBUG_PRINT(2, ("Mali GP: Creating Mali GP core: %s\n", resource->description)); + -+static struct mali_l2_cache_core * -+ mali_global_l2s[MALI_MAX_NUMBER_OF_L2_CACHE_CORES] = { NULL, }; -+static u32 mali_global_num_l2s = 0; ++ core = _mali_osk_malloc(sizeof(struct mali_gp_core)); ++ if (NULL != core) { ++ if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALIGP2_REGISTER_ADDRESS_SPACE_SIZE)) { ++ _mali_osk_errcode_t ret; + -+int mali_l2_max_reads = MALI400_L2_MAX_READS_NOT_SET; ++ ret = mali_gp_reset(core); + ++ if (_MALI_OSK_ERR_OK == ret) { ++ ret = mali_group_add_gp_core(group, core); ++ if (_MALI_OSK_ERR_OK == ret) { ++ /* Setup IRQ handlers (which will do IRQ probing if needed) */ ++ core->irq = _mali_osk_irq_init(resource->irq, ++ mali_group_upper_half_gp, ++ group, ++ mali_gp_irq_probe_trigger, ++ mali_gp_irq_probe_ack, ++ core, ++ resource->description); ++ if (NULL != core->irq) { ++ MALI_DEBUG_PRINT(4, ("Mali GP: set global gp core from 0x%08X to 0x%08X\n", mali_global_gp_core, core)); ++ mali_global_gp_core = core; + -+/* Local helper functions */ ++ return core; ++ } else { ++ MALI_PRINT_ERROR(("Mali GP: Failed to setup interrupt handlers for GP core %s\n", core->hw_core.description)); ++ } ++ mali_group_remove_gp_core(group); ++ } else { ++ MALI_PRINT_ERROR(("Mali GP: Failed to add core %s to group\n", core->hw_core.description)); ++ } ++ } ++ mali_hw_core_delete(&core->hw_core); ++ } + -+static void mali_l2_cache_reset(struct mali_l2_cache_core *cache); ++ _mali_osk_free(core); ++ } else { ++ MALI_PRINT_ERROR(("Failed to allocate memory for GP core\n")); ++ } + -+static _mali_osk_errcode_t mali_l2_cache_send_command( -+ struct mali_l2_cache_core *cache, u32 reg, u32 val); ++ return NULL; ++} + -+static void mali_l2_cache_lock(struct mali_l2_cache_core *cache) ++void mali_gp_delete(struct mali_gp_core *core) +{ -+ MALI_DEBUG_ASSERT_POINTER(cache); -+ _mali_osk_spinlock_irq_lock(cache->lock); ++ MALI_DEBUG_ASSERT_POINTER(core); ++ ++ _mali_osk_irq_term(core->irq); ++ mali_hw_core_delete(&core->hw_core); ++ mali_global_gp_core = NULL; ++ _mali_osk_free(core); +} + -+static void mali_l2_cache_unlock(struct mali_l2_cache_core *cache) ++void mali_gp_stop_bus(struct mali_gp_core *core) +{ -+ MALI_DEBUG_ASSERT_POINTER(cache); -+ _mali_osk_spinlock_irq_unlock(cache->lock); -+} ++ MALI_DEBUG_ASSERT_POINTER(core); + -+/* Implementation of the L2 cache interface */ ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_STOP_BUS); ++} + -+struct mali_l2_cache_core *mali_l2_cache_create( -+ _mali_osk_resource_t *resource, u32 domain_index) ++_mali_osk_errcode_t mali_gp_stop_bus_wait(struct mali_gp_core *core) +{ -+ struct mali_l2_cache_core *cache = NULL; -+#if defined(DEBUG) -+ u32 cache_size; -+#endif ++ int i; + -+ MALI_DEBUG_PRINT(4, ("Mali L2 cache: Creating Mali L2 cache: %s\n", -+ resource->description)); ++ MALI_DEBUG_ASSERT_POINTER(core); + -+ if (mali_global_num_l2s >= MALI_MAX_NUMBER_OF_L2_CACHE_CORES) { -+ MALI_PRINT_ERROR(("Mali L2 cache: Too many L2 caches\n")); -+ return NULL; ++ /* Send the stop bus command. */ ++ mali_gp_stop_bus(core); ++ ++ /* Wait for bus to be stopped */ ++ for (i = 0; i < MALI_REG_POLL_COUNT_SLOW; i++) { ++ if (mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_STATUS) & MALIGP2_REG_VAL_STATUS_BUS_STOPPED) { ++ break; ++ } + } + -+ cache = _mali_osk_malloc(sizeof(struct mali_l2_cache_core)); -+ if (NULL == cache) { -+ MALI_PRINT_ERROR(("Mali L2 cache: Failed to allocate memory for L2 cache core\n")); -+ return NULL; ++ if (MALI_REG_POLL_COUNT_SLOW == i) { ++ MALI_PRINT_ERROR(("Mali GP: Failed to stop bus on %s\n", core->hw_core.description)); ++ return _MALI_OSK_ERR_FAULT; + } ++ return _MALI_OSK_ERR_OK; ++} + -+ cache->core_id = mali_global_num_l2s; -+ cache->counter_src0 = MALI_HW_CORE_NO_COUNTER; -+ cache->counter_src1 = MALI_HW_CORE_NO_COUNTER; -+ cache->counter_value0_base = 0; -+ cache->counter_value1_base = 0; -+ cache->pm_domain = NULL; -+ cache->power_is_on = MALI_FALSE; -+ cache->last_invalidated_id = 0; ++void mali_gp_hard_reset(struct mali_gp_core *core) ++{ ++ const u32 reset_wait_target_register = MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_LIMIT; ++ const u32 reset_invalid_value = 0xC0FFE000; ++ const u32 reset_check_value = 0xC01A0000; ++ const u32 reset_default_value = 0; ++ int i; + -+ if (_MALI_OSK_ERR_OK != mali_hw_core_create(&cache->hw_core, -+ resource, MALI400_L2_CACHE_REGISTERS_SIZE)) { -+ _mali_osk_free(cache); -+ return NULL; -+ } ++ MALI_DEBUG_ASSERT_POINTER(core); ++ MALI_DEBUG_PRINT(4, ("Mali GP: Hard reset of core %s\n", core->hw_core.description)); + -+#if defined(DEBUG) -+ cache_size = mali_hw_core_register_read(&cache->hw_core, -+ MALI400_L2_CACHE_REGISTER_SIZE); -+ MALI_DEBUG_PRINT(2, ("Mali L2 cache: Created %s: % 3uK, %u-way, % 2ubyte cache line, % 3ubit external bus\n", -+ resource->description, -+ 1 << (((cache_size >> 16) & 0xff) - 10), -+ 1 << ((cache_size >> 8) & 0xff), -+ 1 << (cache_size & 0xff), -+ 1 << ((cache_size >> 24) & 0xff))); -+#endif ++ mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_invalid_value); + -+ cache->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, -+ _MALI_OSK_LOCK_ORDER_L2); -+ if (NULL == cache->lock) { -+ MALI_PRINT_ERROR(("Mali L2 cache: Failed to create counter lock for L2 cache core %s\n", -+ cache->hw_core.description)); -+ mali_hw_core_delete(&cache->hw_core); -+ _mali_osk_free(cache); -+ return NULL; ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_RESET); ++ ++ for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) { ++ mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_check_value); ++ if (reset_check_value == mali_hw_core_register_read(&core->hw_core, reset_wait_target_register)) { ++ break; ++ } + } + -+ /* register with correct power domain */ -+ cache->pm_domain = mali_pm_register_l2_cache( -+ domain_index, cache); ++ if (MALI_REG_POLL_COUNT_FAST == i) { ++ MALI_PRINT_ERROR(("Mali GP: The hard reset loop didn't work, unable to recover\n")); ++ } + -+ mali_global_l2s[mali_global_num_l2s] = cache; -+ mali_global_num_l2s++; ++ mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_default_value); /* set it back to the default */ ++ /* Re-enable interrupts */ ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_MASK_ALL); ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED); + -+ return cache; +} + -+void mali_l2_cache_delete(struct mali_l2_cache_core *cache) ++void mali_gp_reset_async(struct mali_gp_core *core) +{ -+ u32 i; -+ for (i = 0; i < mali_global_num_l2s; i++) { -+ if (mali_global_l2s[i] != cache) { -+ continue; -+ } ++ MALI_DEBUG_ASSERT_POINTER(core); + -+ mali_global_l2s[i] = NULL; -+ mali_global_num_l2s--; -+ -+ if (i == mali_global_num_l2s) { -+ /* Removed last element, nothing more to do */ -+ break; -+ } -+ -+ /* -+ * We removed a l2 cache from the middle of the array, -+ * so move the last l2 cache to current position -+ */ -+ mali_global_l2s[i] = mali_global_l2s[mali_global_num_l2s]; -+ mali_global_l2s[mali_global_num_l2s] = NULL; ++ MALI_DEBUG_PRINT(4, ("Mali GP: Reset of core %s\n", core->hw_core.description)); + -+ /* All good */ -+ break; -+ } ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, 0); /* disable the IRQs */ ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALI400GP_REG_VAL_IRQ_RESET_COMPLETED); ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALI400GP_REG_VAL_CMD_SOFT_RESET); + -+ _mali_osk_spinlock_irq_term(cache->lock); -+ mali_hw_core_delete(&cache->hw_core); -+ _mali_osk_free(cache); +} + -+void mali_l2_cache_power_up(struct mali_l2_cache_core *cache) ++_mali_osk_errcode_t mali_gp_reset_wait(struct mali_gp_core *core) +{ -+ MALI_DEBUG_ASSERT_POINTER(cache); ++ int i; ++ u32 rawstat = 0; + -+ mali_l2_cache_lock(cache); ++ MALI_DEBUG_ASSERT_POINTER(core); + -+ mali_l2_cache_reset(cache); ++ for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) { ++ rawstat = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT); ++ if (rawstat & MALI400GP_REG_VAL_IRQ_RESET_COMPLETED) { ++ break; ++ } ++ } + -+ if ((1 << MALI_DOMAIN_INDEX_DUMMY) != cache->pm_domain->pmu_mask) -+ MALI_DEBUG_ASSERT(MALI_FALSE == cache->power_is_on); -+ cache->power_is_on = MALI_TRUE; ++ if (i == MALI_REG_POLL_COUNT_FAST) { ++ MALI_PRINT_ERROR(("Mali GP: Failed to reset core %s, rawstat: 0x%08x\n", ++ core->hw_core.description, rawstat)); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ mali_l2_cache_unlock(cache); ++ /* Re-enable interrupts */ ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_MASK_ALL); ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED); ++ ++ return _MALI_OSK_ERR_OK; +} + -+void mali_l2_cache_power_down(struct mali_l2_cache_core *cache) ++_mali_osk_errcode_t mali_gp_reset(struct mali_gp_core *core) +{ -+ MALI_DEBUG_ASSERT_POINTER(cache); -+ -+ mali_l2_cache_lock(cache); ++ mali_gp_reset_async(core); ++ return mali_gp_reset_wait(core); ++} + -+ MALI_DEBUG_ASSERT(MALI_TRUE == cache->power_is_on); ++void mali_gp_job_start(struct mali_gp_core *core, struct mali_gp_job *job) ++{ ++ u32 startcmd = 0; ++ u32 *frame_registers = mali_gp_job_get_frame_registers(job); ++ u32 counter_src0 = mali_gp_job_get_perf_counter_src0(job); ++ u32 counter_src1 = mali_gp_job_get_perf_counter_src1(job); + -+ /* -+ * The HW counters will start from zero again when we resume, -+ * but we should report counters as always increasing. -+ * Take a copy of the HW values now in order to add this to -+ * the values we report after being powered up. -+ * -+ * The physical power off of the L2 cache might be outside our -+ * own control (e.g. runtime PM). That is why we must manually -+ * set set the counter value to zero as well. -+ */ ++ MALI_DEBUG_ASSERT_POINTER(core); + -+ if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) { -+ cache->counter_value0_base += mali_hw_core_register_read( -+ &cache->hw_core, -+ MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0); -+ mali_hw_core_register_write(&cache->hw_core, -+ MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0, 0); ++ if (mali_gp_job_has_vs_job(job)) { ++ startcmd |= (u32) MALIGP2_REG_VAL_CMD_START_VS; + } + -+ if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) { -+ cache->counter_value1_base += mali_hw_core_register_read( -+ &cache->hw_core, -+ MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1); -+ mali_hw_core_register_write(&cache->hw_core, -+ MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1, 0); ++ if (mali_gp_job_has_plbu_job(job)) { ++ startcmd |= (u32) MALIGP2_REG_VAL_CMD_START_PLBU; + } + ++ MALI_DEBUG_ASSERT(0 != startcmd); + -+ cache->power_is_on = MALI_FALSE; -+ -+ mali_l2_cache_unlock(cache); -+} -+ -+void mali_l2_cache_core_set_counter_src( -+ struct mali_l2_cache_core *cache, u32 source_id, u32 counter) -+{ -+ u32 reg_offset_src; -+ u32 reg_offset_val; ++ mali_hw_core_register_write_array_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR, frame_registers, MALIGP2_NUM_REGS_FRAME); + -+ MALI_DEBUG_ASSERT_POINTER(cache); -+ MALI_DEBUG_ASSERT(source_id >= 0 && source_id <= 1); ++ if (MALI_HW_CORE_NO_COUNTER != counter_src0) { ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC, counter_src0); ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_ENABLE, MALIGP2_REG_VAL_PERF_CNT_ENABLE); ++ } ++ if (MALI_HW_CORE_NO_COUNTER != counter_src1) { ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_SRC, counter_src1); ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_ENABLE, MALIGP2_REG_VAL_PERF_CNT_ENABLE); ++ } + -+ mali_l2_cache_lock(cache); ++ MALI_DEBUG_PRINT(3, ("Mali GP: Starting job (0x%08x) on core %s with command 0x%08X\n", job, core->hw_core.description, startcmd)); + -+ if (0 == source_id) { -+ /* start counting from 0 */ -+ cache->counter_value0_base = 0; -+ cache->counter_src0 = counter; -+ reg_offset_src = MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0; -+ reg_offset_val = MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0; -+ } else { -+ /* start counting from 0 */ -+ cache->counter_value1_base = 0; -+ cache->counter_src1 = counter; -+ reg_offset_src = MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1; -+ reg_offset_val = MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1; -+ } ++ mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC); + -+ if (cache->power_is_on) { -+ u32 hw_src; ++ /* Barrier to make sure the previous register write is finished */ ++ _mali_osk_write_mem_barrier(); + -+ if (MALI_HW_CORE_NO_COUNTER != counter) { -+ hw_src = counter; -+ } else { -+ hw_src = 0; /* disable value for HW */ -+ } ++ /* This is the command that starts the core. ++ * ++ * Don't actually run the job if PROFILING_SKIP_PP_JOBS are set, just ++ * force core to assert the completion interrupt. ++ */ ++#if !defined(PROFILING_SKIP_GP_JOBS) ++ mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, startcmd); ++#else ++ { ++ u32 bits = 0; + -+ /* Set counter src */ -+ mali_hw_core_register_write(&cache->hw_core, -+ reg_offset_src, hw_src); ++ if (mali_gp_job_has_vs_job(job)) ++ bits = MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST; ++ if (mali_gp_job_has_plbu_job(job)) ++ bits |= MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST; + -+ /* Make sure the HW starts counting from 0 again */ -+ mali_hw_core_register_write(&cache->hw_core, -+ reg_offset_val, 0); ++ mali_hw_core_register_write_relaxed(&core->hw_core, ++ MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT, bits); + } ++#endif + -+ mali_l2_cache_unlock(cache); ++ /* Barrier to make sure the previous register write is finished */ ++ _mali_osk_write_mem_barrier(); +} + -+void mali_l2_cache_core_get_counter_values( -+ struct mali_l2_cache_core *cache, -+ u32 *src0, u32 *value0, u32 *src1, u32 *value1) ++void mali_gp_resume_with_new_heap(struct mali_gp_core *core, u32 start_addr, u32 end_addr) +{ -+ MALI_DEBUG_ASSERT_POINTER(cache); -+ MALI_DEBUG_ASSERT(NULL != src0); -+ MALI_DEBUG_ASSERT(NULL != value0); -+ MALI_DEBUG_ASSERT(NULL != src1); -+ MALI_DEBUG_ASSERT(NULL != value1); -+ -+ mali_l2_cache_lock(cache); ++ u32 irq_readout; + -+ *src0 = cache->counter_src0; -+ *src1 = cache->counter_src1; ++ MALI_DEBUG_ASSERT_POINTER(core); + -+ if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) { -+ if (MALI_TRUE == cache->power_is_on) { -+ *value0 = mali_hw_core_register_read(&cache->hw_core, -+ MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0); -+ } else { -+ *value0 = 0; -+ } ++ irq_readout = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT); + -+ /* Add base offset value (in case we have been power off) */ -+ *value0 += cache->counter_value0_base; -+ } ++ if (irq_readout & MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM) { ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, (MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM | MALIGP2_REG_VAL_IRQ_HANG)); ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED); /* re-enable interrupts */ ++ mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR, start_addr); ++ mali_hw_core_register_write_relaxed(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_END_ADDR, end_addr); + -+ if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) { -+ if (MALI_TRUE == cache->power_is_on) { -+ *value1 = mali_hw_core_register_read(&cache->hw_core, -+ MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1); -+ } else { -+ *value1 = 0; -+ } ++ MALI_DEBUG_PRINT(3, ("Mali GP: Resuming job\n")); + -+ /* Add base offset value (in case we have been power off) */ -+ *value1 += cache->counter_value1_base; ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_CMD, MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC); ++ _mali_osk_write_mem_barrier(); + } -+ -+ mali_l2_cache_unlock(cache); ++ /* ++ * else: core has been reset between PLBU_OUT_OF_MEM interrupt and this new heap response. ++ * A timeout or a page fault on Mali-200 PP core can cause this behaviour. ++ */ +} + -+struct mali_l2_cache_core *mali_l2_cache_core_get_glob_l2_core(u32 index) ++u32 mali_gp_core_get_version(struct mali_gp_core *core) +{ -+ if (mali_global_num_l2s > index) { -+ return mali_global_l2s[index]; -+ } -+ -+ return NULL; ++ MALI_DEBUG_ASSERT_POINTER(core); ++ return mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_VERSION); +} + -+u32 mali_l2_cache_core_get_glob_num_l2_cores(void) ++struct mali_gp_core *mali_gp_get_global_gp_core(void) +{ -+ return mali_global_num_l2s; ++ return mali_global_gp_core; +} + -+void mali_l2_cache_invalidate(struct mali_l2_cache_core *cache) ++/* ------------- interrupt handling below ------------------ */ ++static void mali_gp_irq_probe_trigger(void *data) +{ -+ MALI_DEBUG_ASSERT_POINTER(cache); -+ -+ if (NULL == cache) { -+ return; -+ } -+ -+ mali_l2_cache_lock(cache); -+ -+ cache->last_invalidated_id = mali_scheduler_get_new_cache_order(); -+ mali_l2_cache_send_command(cache, MALI400_L2_CACHE_REGISTER_COMMAND, -+ MALI400_L2_CACHE_COMMAND_CLEAR_ALL); ++ struct mali_gp_core *core = (struct mali_gp_core *)data; + -+ mali_l2_cache_unlock(cache); ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_USED); ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT, MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR); ++ _mali_osk_mem_barrier(); +} + -+void mali_l2_cache_invalidate_conditional( -+ struct mali_l2_cache_core *cache, u32 id) ++static _mali_osk_errcode_t mali_gp_irq_probe_ack(void *data) +{ -+ MALI_DEBUG_ASSERT_POINTER(cache); -+ -+ if (NULL == cache) { -+ return; -+ } -+ -+ /* -+ * If the last cache invalidation was done by a job with a higher id we -+ * don't have to flush. Since user space will store jobs w/ their -+ * corresponding memory in sequence (first job #0, then job #1, ...), -+ * we don't have to flush for job n-1 if job n has already invalidated -+ * the cache since we know for sure that job n-1's memory was already -+ * written when job n was started. -+ */ -+ -+ mali_l2_cache_lock(cache); ++ struct mali_gp_core *core = (struct mali_gp_core *)data; ++ u32 irq_readout; + -+ if (((s32)id) > ((s32)cache->last_invalidated_id)) { -+ /* Set latest invalidated id to current "point in time" */ -+ cache->last_invalidated_id = -+ mali_scheduler_get_new_cache_order(); -+ mali_l2_cache_send_command(cache, -+ MALI400_L2_CACHE_REGISTER_COMMAND, -+ MALI400_L2_CACHE_COMMAND_CLEAR_ALL); ++ irq_readout = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_STAT); ++ if (MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR & irq_readout) { ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR); ++ _mali_osk_mem_barrier(); ++ return _MALI_OSK_ERR_OK; + } + -+ mali_l2_cache_unlock(cache); -+} -+ -+void mali_l2_cache_invalidate_all(void) -+{ -+ u32 i; -+ for (i = 0; i < mali_global_num_l2s; i++) { -+ struct mali_l2_cache_core *cache = mali_global_l2s[i]; -+ _mali_osk_errcode_t ret; -+ -+ MALI_DEBUG_ASSERT_POINTER(cache); -+ -+ mali_l2_cache_lock(cache); -+ -+ if (MALI_TRUE != cache->power_is_on) { -+ mali_l2_cache_unlock(cache); -+ continue; -+ } -+ -+ cache->last_invalidated_id = -+ mali_scheduler_get_new_cache_order(); -+ -+ ret = mali_l2_cache_send_command(cache, -+ MALI400_L2_CACHE_REGISTER_COMMAND, -+ MALI400_L2_CACHE_COMMAND_CLEAR_ALL); -+ if (_MALI_OSK_ERR_OK != ret) { -+ MALI_PRINT_ERROR(("Failed to invalidate cache\n")); -+ } -+ -+ mali_l2_cache_unlock(cache); -+ } ++ return _MALI_OSK_ERR_FAULT; +} + -+void mali_l2_cache_invalidate_all_pages(u32 *pages, u32 num_pages) ++/* ------ local helper functions below --------- */ ++#if MALI_STATE_TRACKING ++u32 mali_gp_dump_state(struct mali_gp_core *core, char *buf, u32 size) +{ -+ u32 i; -+ for (i = 0; i < mali_global_num_l2s; i++) { -+ struct mali_l2_cache_core *cache = mali_global_l2s[i]; -+ u32 j; -+ -+ MALI_DEBUG_ASSERT_POINTER(cache); -+ -+ mali_l2_cache_lock(cache); -+ -+ if (MALI_TRUE != cache->power_is_on) { -+ mali_l2_cache_unlock(cache); -+ continue; -+ } -+ -+ for (j = 0; j < num_pages; j++) { -+ _mali_osk_errcode_t ret; ++ int n = 0; + -+ ret = mali_l2_cache_send_command(cache, -+ MALI400_L2_CACHE_REGISTER_CLEAR_PAGE, -+ pages[j]); -+ if (_MALI_OSK_ERR_OK != ret) { -+ MALI_PRINT_ERROR(("Failed to invalidate cache (page)\n")); -+ } -+ } ++ n += _mali_osk_snprintf(buf + n, size - n, "\tGP: %s\n", core->hw_core.description); + -+ mali_l2_cache_unlock(cache); -+ } ++ return n; +} ++#endif + -+/* -------- local helper functions below -------- */ -+ -+static void mali_l2_cache_reset(struct mali_l2_cache_core *cache) ++void mali_gp_update_performance_counters(struct mali_gp_core *core, struct mali_gp_job *job) +{ -+ MALI_DEBUG_ASSERT_POINTER(cache); -+ MALI_DEBUG_ASSERT_LOCK_HELD(cache->lock); -+ -+ /* Invalidate cache (just to keep it in a known state at startup) */ -+ mali_l2_cache_send_command(cache, MALI400_L2_CACHE_REGISTER_COMMAND, -+ MALI400_L2_CACHE_COMMAND_CLEAR_ALL); -+ -+ /* Enable cache */ -+ mali_hw_core_register_write(&cache->hw_core, -+ MALI400_L2_CACHE_REGISTER_ENABLE, -+ (u32)MALI400_L2_CACHE_ENABLE_ACCESS | -+ (u32)MALI400_L2_CACHE_ENABLE_READ_ALLOCATE); -+ -+ if (MALI400_L2_MAX_READS_NOT_SET != mali_l2_max_reads) { -+ mali_hw_core_register_write(&cache->hw_core, -+ MALI400_L2_CACHE_REGISTER_MAX_READS, -+ (u32)mali_l2_max_reads); -+ } ++ u32 val0 = 0; ++ u32 val1 = 0; ++ u32 counter_src0 = mali_gp_job_get_perf_counter_src0(job); ++ u32 counter_src1 = mali_gp_job_get_perf_counter_src1(job); + -+ /* Restart any performance counters (if enabled) */ -+ if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) { ++ if (MALI_HW_CORE_NO_COUNTER != counter_src0) { ++ val0 = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_VALUE); ++ mali_gp_job_set_perf_counter_value0(job, val0); + -+ mali_hw_core_register_write(&cache->hw_core, -+ MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0, -+ cache->counter_src0); -+ } ++#if defined(CONFIG_MALI400_PROFILING) ++ _mali_osk_profiling_report_hw_counter(COUNTER_VP_0_C0, val0); ++ _mali_osk_profiling_record_global_counters(COUNTER_VP_0_C0, val0); ++#endif + -+ if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) { -+ mali_hw_core_register_write(&cache->hw_core, -+ MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1, -+ cache->counter_src1); + } -+} -+ -+static _mali_osk_errcode_t mali_l2_cache_send_command( -+ struct mali_l2_cache_core *cache, u32 reg, u32 val) -+{ -+ int i = 0; -+ const int loop_count = 100000; + -+ MALI_DEBUG_ASSERT_POINTER(cache); -+ MALI_DEBUG_ASSERT_LOCK_HELD(cache->lock); -+ -+ /* -+ * First, wait for L2 cache command handler to go idle. -+ * (Commands received while processing another command will be ignored) -+ */ -+ for (i = 0; i < loop_count; i++) { -+ if (!(mali_hw_core_register_read(&cache->hw_core, -+ MALI400_L2_CACHE_REGISTER_STATUS) & -+ (u32)MALI400_L2_CACHE_STATUS_COMMAND_BUSY)) { -+ break; -+ } -+ } ++ if (MALI_HW_CORE_NO_COUNTER != counter_src1) { ++ val1 = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_VALUE); ++ mali_gp_job_set_perf_counter_value1(job, val1); + -+ if (i == loop_count) { -+ MALI_DEBUG_PRINT(1, ("Mali L2 cache: aborting wait for command interface to go idle\n")); -+ return _MALI_OSK_ERR_FAULT; ++#if defined(CONFIG_MALI400_PROFILING) ++ _mali_osk_profiling_report_hw_counter(COUNTER_VP_0_C1, val1); ++ _mali_osk_profiling_record_global_counters(COUNTER_VP_0_C1, val1); ++#endif + } -+ -+ /* then issue the command */ -+ mali_hw_core_register_write(&cache->hw_core, reg, val); -+ -+ return _MALI_OSK_ERR_OK; +} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_l2_cache.h b/drivers/gpu/arm/mali400/mali/common/mali_l2_cache.h +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_gp.h b/drivers/gpu/arm/mali400/mali/common/mali_gp.h new file mode 100644 -index 000000000..c48a88440 +index 000000000..3156310f2 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_l2_cache.h -@@ -0,0 +1,124 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_gp.h +@@ -0,0 +1,127 @@ +/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2011-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -289107,128 +292234,131 @@ index 000000000..c48a88440 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_KERNEL_L2_CACHE_H__ -+#define __MALI_KERNEL_L2_CACHE_H__ ++#ifndef __MALI_GP_H__ ++#define __MALI_GP_H__ + +#include "mali_osk.h" ++#include "mali_gp_job.h" +#include "mali_hw_core.h" ++#include "regs/mali_gp_regs.h" + -+#define MALI_MAX_NUMBER_OF_L2_CACHE_CORES 3 -+/* Maximum 1 GP and 4 PP for an L2 cache core (Mali-400 MP4) */ -+#define MALI_MAX_NUMBER_OF_GROUPS_PER_L2_CACHE 5 ++struct mali_group; + +/** -+ * Definition of the L2 cache core struct -+ * Used to track a L2 cache unit in the system. -+ * Contains information about the mapping of the registers ++ * Definition of the GP core struct ++ * Used to track a GP core in the system. + */ -+struct mali_l2_cache_core { -+ /* Common HW core functionality */ -+ struct mali_hw_core hw_core; -+ -+ /* Synchronize L2 cache access */ -+ _mali_osk_spinlock_irq_t *lock; -+ -+ /* Unique core ID */ -+ u32 core_id; -+ -+ /* The power domain this L2 cache belongs to */ -+ struct mali_pm_domain *pm_domain; -+ -+ /* MALI_TRUE if power is on for this L2 cache */ -+ mali_bool power_is_on; ++struct mali_gp_core { ++ struct mali_hw_core hw_core; /**< Common for all HW cores */ ++ _mali_osk_irq_t *irq; /**< IRQ handler */ ++}; + -+ /* A "timestamp" to avoid unnecessary flushes */ -+ u32 last_invalidated_id; ++_mali_osk_errcode_t mali_gp_initialize(void); ++void mali_gp_terminate(void); + -+ /* Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */ -+ u32 counter_src0; ++struct mali_gp_core *mali_gp_create(const _mali_osk_resource_t *resource, struct mali_group *group); ++void mali_gp_delete(struct mali_gp_core *core); + -+ /* Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */ -+ u32 counter_src1; ++void mali_gp_stop_bus(struct mali_gp_core *core); ++_mali_osk_errcode_t mali_gp_stop_bus_wait(struct mali_gp_core *core); ++void mali_gp_reset_async(struct mali_gp_core *core); ++_mali_osk_errcode_t mali_gp_reset_wait(struct mali_gp_core *core); ++void mali_gp_hard_reset(struct mali_gp_core *core); ++_mali_osk_errcode_t mali_gp_reset(struct mali_gp_core *core); + -+ /* -+ * Performance counter 0 value base/offset -+ * (allows accumulative reporting even after power off) -+ */ -+ u32 counter_value0_base; ++void mali_gp_job_start(struct mali_gp_core *core, struct mali_gp_job *job); ++void mali_gp_resume_with_new_heap(struct mali_gp_core *core, u32 start_addr, u32 end_addr); + -+ /* -+ * Performance counter 0 value base/offset -+ * (allows accumulative reporting even after power off) -+ */ -+ u32 counter_value1_base; ++u32 mali_gp_core_get_version(struct mali_gp_core *core); + -+ /* Used by PM domains to link L2 caches of same domain */ -+ _mali_osk_list_t pm_domain_list; -+}; ++struct mali_gp_core *mali_gp_get_global_gp_core(void); + -+_mali_osk_errcode_t mali_l2_cache_initialize(void); -+void mali_l2_cache_terminate(void); ++#if MALI_STATE_TRACKING ++u32 mali_gp_dump_state(struct mali_gp_core *core, char *buf, u32 size); ++#endif + -+struct mali_l2_cache_core *mali_l2_cache_create( -+ _mali_osk_resource_t *resource, u32 domain_index); -+void mali_l2_cache_delete(struct mali_l2_cache_core *cache); ++void mali_gp_update_performance_counters(struct mali_gp_core *core, struct mali_gp_job *job); + -+MALI_STATIC_INLINE u32 mali_l2_cache_get_id(struct mali_l2_cache_core *cache) ++MALI_STATIC_INLINE const char *mali_gp_core_description(struct mali_gp_core *core) +{ -+ MALI_DEBUG_ASSERT_POINTER(cache); -+ return cache->core_id; ++ return core->hw_core.description; +} + -+MALI_STATIC_INLINE struct mali_pm_domain *mali_l2_cache_get_pm_domain( -+ struct mali_l2_cache_core *cache) ++MALI_STATIC_INLINE enum mali_interrupt_result mali_gp_get_interrupt_result(struct mali_gp_core *core) +{ -+ MALI_DEBUG_ASSERT_POINTER(cache); -+ return cache->pm_domain; -+} ++ u32 stat_used = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_STAT) & ++ MALIGP2_REG_VAL_IRQ_MASK_USED; + -+void mali_l2_cache_power_up(struct mali_l2_cache_core *cache); -+void mali_l2_cache_power_down(struct mali_l2_cache_core *cache); ++ if (0 == stat_used) { ++ return MALI_INTERRUPT_RESULT_NONE; ++ } else if ((MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST | ++ MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST) == stat_used) { ++ return MALI_INTERRUPT_RESULT_SUCCESS; ++ } else if (MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST == stat_used) { ++ return MALI_INTERRUPT_RESULT_SUCCESS_VS; ++ } else if (MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST == stat_used) { ++ return MALI_INTERRUPT_RESULT_SUCCESS_PLBU; ++ } else if (MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM & stat_used) { ++ return MALI_INTERRUPT_RESULT_OOM; ++ } + -+void mali_l2_cache_core_set_counter_src( -+ struct mali_l2_cache_core *cache, u32 source_id, u32 counter); ++ return MALI_INTERRUPT_RESULT_ERROR; ++} + -+MALI_STATIC_INLINE u32 mali_l2_cache_core_get_counter_src0( -+ struct mali_l2_cache_core *cache) ++MALI_STATIC_INLINE u32 mali_gp_get_rawstat(struct mali_gp_core *core) +{ -+ MALI_DEBUG_ASSERT_POINTER(cache); -+ return cache->counter_src0; ++ MALI_DEBUG_ASSERT_POINTER(core); ++ return mali_hw_core_register_read(&core->hw_core, ++ MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT); +} + -+MALI_STATIC_INLINE u32 mali_l2_cache_core_get_counter_src1( -+ struct mali_l2_cache_core *cache) ++MALI_STATIC_INLINE u32 mali_gp_is_active(struct mali_gp_core *core) +{ -+ MALI_DEBUG_ASSERT_POINTER(cache); -+ return cache->counter_src1; ++ u32 status = mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_STATUS); ++ return (status & MALIGP2_REG_VAL_STATUS_MASK_ACTIVE) ? MALI_TRUE : MALI_FALSE; +} + -+void mali_l2_cache_core_get_counter_values( -+ struct mali_l2_cache_core *cache, -+ u32 *src0, u32 *value0, u32 *src1, u32 *value1); ++MALI_STATIC_INLINE void mali_gp_mask_all_interrupts(struct mali_gp_core *core) ++{ ++ mali_hw_core_register_write(&core->hw_core, MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_VAL_IRQ_MASK_NONE); ++} + -+struct mali_l2_cache_core *mali_l2_cache_core_get_glob_l2_core(u32 index); -+u32 mali_l2_cache_core_get_glob_num_l2_cores(void); ++MALI_STATIC_INLINE void mali_gp_enable_interrupts(struct mali_gp_core *core, enum mali_interrupt_result exceptions) ++{ ++ /* Enable all interrupts, except those specified in exceptions */ ++ u32 value; + -+struct mali_group *mali_l2_cache_get_group( -+ struct mali_l2_cache_core *cache, u32 index); ++ if (MALI_INTERRUPT_RESULT_SUCCESS_VS == exceptions) { ++ /* Enable all used except VS complete */ ++ value = MALIGP2_REG_VAL_IRQ_MASK_USED & ++ ~MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST; ++ } else { ++ MALI_DEBUG_ASSERT(MALI_INTERRUPT_RESULT_SUCCESS_PLBU == ++ exceptions); ++ /* Enable all used except PLBU complete */ ++ value = MALIGP2_REG_VAL_IRQ_MASK_USED & ++ ~MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST; ++ } + -+void mali_l2_cache_invalidate(struct mali_l2_cache_core *cache); -+void mali_l2_cache_invalidate_conditional( -+ struct mali_l2_cache_core *cache, u32 id); ++ mali_hw_core_register_write(&core->hw_core, ++ MALIGP2_REG_ADDR_MGMT_INT_MASK, ++ value); ++} + -+void mali_l2_cache_invalidate_all(void); -+void mali_l2_cache_invalidate_all_pages(u32 *pages, u32 num_pages); ++MALI_STATIC_INLINE u32 mali_gp_read_plbu_alloc_start_addr(struct mali_gp_core *core) ++{ ++ return mali_hw_core_register_read(&core->hw_core, MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR); ++} + -+#endif /* __MALI_KERNEL_L2_CACHE_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_mem_validation.c b/drivers/gpu/arm/mali400/mali/common/mali_mem_validation.c ++#endif /* __MALI_GP_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_gp_job.c b/drivers/gpu/arm/mali400/mali/common/mali_gp_job.c new file mode 100644 -index 000000000..eb95998f1 +index 000000000..5d4d9f253 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_mem_validation.c -@@ -0,0 +1,68 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_gp_job.c +@@ -0,0 +1,306 @@ +/* -+ * Copyright (C) 2011-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -289237,664 +292367,638 @@ index 000000000..eb95998f1 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#include "mali_mem_validation.h" ++#include "mali_gp_job.h" +#include "mali_osk.h" -+#include "mali_kernel_common.h" -+ -+#define MALI_INVALID_MEM_ADDR 0xFFFFFFFF ++#include "mali_osk_list.h" ++#include "mali_uk_types.h" ++#include "mali_memory_virtual.h" ++#include "mali_memory_defer_bind.h" + -+typedef struct { -+ u32 phys_base; /**< Mali physical base of the memory, page aligned */ -+ u32 size; /**< size in bytes of the memory, multiple of page size */ -+} _mali_mem_validation_t; ++static u32 gp_counter_src0 = MALI_HW_CORE_NO_COUNTER; /**< Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */ ++static u32 gp_counter_src1 = MALI_HW_CORE_NO_COUNTER; /**< Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */ ++static void _mali_gp_del_varying_allocations(struct mali_gp_job *job); + -+static _mali_mem_validation_t mali_mem_validator = { MALI_INVALID_MEM_ADDR, MALI_INVALID_MEM_ADDR }; + -+_mali_osk_errcode_t mali_mem_validation_add_range(u32 start, u32 size) ++static int _mali_gp_add_varying_allocations(struct mali_session_data *session, ++ struct mali_gp_job *job, ++ u32 *alloc, ++ u32 num) +{ -+ /* Check that no other MEM_VALIDATION resources exist */ -+ if (MALI_INVALID_MEM_ADDR != mali_mem_validator.phys_base) { -+ MALI_PRINT_ERROR(("Failed to add frame buffer memory; another range is already specified\n")); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ -+ /* Check restrictions on page alignment */ -+ if ((0 != (start & (~_MALI_OSK_CPU_PAGE_MASK))) || -+ (0 != (size & (~_MALI_OSK_CPU_PAGE_MASK)))) { -+ MALI_PRINT_ERROR(("Failed to add frame buffer memory; incorrect alignment\n")); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ -+ mali_mem_validator.phys_base = start; -+ mali_mem_validator.size = size; -+ MALI_DEBUG_PRINT(2, ("Memory Validator installed for Mali physical address base=0x%08X, size=0x%08X\n", -+ mali_mem_validator.phys_base, mali_mem_validator.size)); ++ int i = 0; ++ struct mali_gp_allocation_node *alloc_node; ++ mali_mem_allocation *mali_alloc = NULL; ++ struct mali_vma_node *mali_vma_node = NULL; + -+ return _MALI_OSK_ERR_OK; -+} ++ for (i = 0 ; i < num ; i++) { ++ MALI_DEBUG_ASSERT(alloc[i]); ++ alloc_node = _mali_osk_calloc(1, sizeof(struct mali_gp_allocation_node)); ++ if (alloc_node) { ++ INIT_LIST_HEAD(&alloc_node->node); ++ /* find mali allocation structure by vaddress*/ ++ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, alloc[i], 0); + -+_mali_osk_errcode_t mali_mem_validation_check(u32 phys_addr, u32 size) -+{ -+#if 0 -+ if (phys_addr < (phys_addr + size)) { /* Don't allow overflow (or zero size) */ -+ if ((0 == (phys_addr & (~_MALI_OSK_CPU_PAGE_MASK))) && -+ (0 == (size & (~_MALI_OSK_CPU_PAGE_MASK)))) { -+ if ((phys_addr >= mali_mem_validator.phys_base) && -+ ((phys_addr + (size - 1)) >= mali_mem_validator.phys_base) && -+ (phys_addr <= (mali_mem_validator.phys_base + (mali_mem_validator.size - 1))) && -+ ((phys_addr + (size - 1)) <= (mali_mem_validator.phys_base + (mali_mem_validator.size - 1)))) { -+ MALI_DEBUG_PRINT(3, ("Accepted range 0x%08X + size 0x%08X (= 0x%08X)\n", phys_addr, size, (phys_addr + size - 1))); -+ return _MALI_OSK_ERR_OK; ++ if (likely(mali_vma_node)) { ++ mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node); ++ MALI_DEBUG_ASSERT(alloc[i] == mali_vma_node->vm_node.start); ++ } else { ++ MALI_DEBUG_PRINT(1, ("ERROE!_mali_gp_add_varying_allocations,can't find allocation %d by address =0x%x, num=%d\n", i, alloc[i], num)); ++ _mali_osk_free(alloc_node); ++ goto fail; + } -+ } ++ alloc_node->alloc = mali_alloc; ++ /* add to gp job varying alloc list*/ ++ list_move(&alloc_node->node, &job->varying_alloc); ++ } else ++ goto fail; + } + -+ MALI_PRINT_ERROR(("MALI PHYSICAL RANGE VALIDATION ERROR: The range supplied was: phys_base=0x%08X, size=0x%08X\n", phys_addr, size)); -+ -+ return _MALI_OSK_ERR_FAULT; -+#endif -+ return _MALI_OSK_ERR_OK; ++ return 0; ++fail: ++ MALI_DEBUG_PRINT(1, ("ERROE!_mali_gp_add_varying_allocations,failed to alloc memory!\n")); ++ _mali_gp_del_varying_allocations(job); ++ return -1; +} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_mem_validation.h b/drivers/gpu/arm/mali400/mali/common/mali_mem_validation.h -new file mode 100644 -index 000000000..05013f46f ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_mem_validation.h -@@ -0,0 +1,19 @@ -+/* -+ * Copyright (C) 2011-2013, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+#ifndef __MALI_MEM_VALIDATION_H__ -+#define __MALI_MEM_VALIDATION_H__ + -+#include "mali_osk.h" + -+_mali_osk_errcode_t mali_mem_validation_add_range(u32 start, u32 size); -+_mali_osk_errcode_t mali_mem_validation_check(u32 phys_addr, u32 size); ++static void _mali_gp_del_varying_allocations(struct mali_gp_job *job) ++{ ++ struct mali_gp_allocation_node *alloc_node, *tmp_node; + -+#endif /* __MALI_MEM_VALIDATION_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_mmu.c b/drivers/gpu/arm/mali400/mali/common/mali_mmu.c -new file mode 100644 -index 000000000..b82486fa6 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_mmu.c -@@ -0,0 +1,433 @@ -+/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ list_for_each_entry_safe(alloc_node, tmp_node, &job->varying_alloc, node) { ++ list_del(&alloc_node->node); ++ kfree(alloc_node); ++ } ++ INIT_LIST_HEAD(&job->varying_alloc); ++} + -+#include "mali_kernel_common.h" -+#include "mali_osk.h" -+#include "mali_osk_list.h" -+#include "mali_ukk.h" ++struct mali_gp_job *mali_gp_job_create(struct mali_session_data *session, _mali_uk_gp_start_job_s *uargs, u32 id, struct mali_timeline_tracker *pp_tracker) ++{ ++ struct mali_gp_job *job; ++ u32 perf_counter_flag; ++ u32 __user *memory_list = NULL; ++ struct mali_gp_allocation_node *alloc_node, *tmp_node; ++ _mali_uk_gp_start_job_s copy_of_uargs; + -+#include "mali_mmu.h" -+#include "mali_hw_core.h" -+#include "mali_group.h" -+#include "mali_mmu_page_directory.h" ++ job = _mali_osk_calloc(1, sizeof(struct mali_gp_job)); ++ if (NULL != job) { ++ job->finished_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_GP_FINISHED, sizeof(_mali_uk_gp_job_finished_s)); ++ if (NULL == job->finished_notification) { ++ goto fail3; ++ } + -+/** -+ * Size of the MMU registers in bytes -+ */ -+#define MALI_MMU_REGISTERS_SIZE 0x24 ++ job->oom_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_GP_STALLED, sizeof(_mali_uk_gp_job_suspended_s)); ++ if (NULL == job->oom_notification) { ++ goto fail2; ++ } + -+/** -+ * MMU commands -+ * These are the commands that can be sent -+ * to the MMU unit. -+ */ -+typedef enum mali_mmu_command { -+ MALI_MMU_COMMAND_ENABLE_PAGING = 0x00, /**< Enable paging (memory translation) */ -+ MALI_MMU_COMMAND_DISABLE_PAGING = 0x01, /**< Disable paging (memory translation) */ -+ MALI_MMU_COMMAND_ENABLE_STALL = 0x02, /**< Enable stall on page fault */ -+ MALI_MMU_COMMAND_DISABLE_STALL = 0x03, /**< Disable stall on page fault */ -+ MALI_MMU_COMMAND_ZAP_CACHE = 0x04, /**< Zap the entire page table cache */ -+ MALI_MMU_COMMAND_PAGE_FAULT_DONE = 0x05, /**< Page fault processed */ -+ MALI_MMU_COMMAND_HARD_RESET = 0x06 /**< Reset the MMU back to power-on settings */ -+} mali_mmu_command; ++ if (0 != _mali_osk_copy_from_user(&job->uargs, uargs, sizeof(_mali_uk_gp_start_job_s))) { ++ goto fail1; ++ } + -+static void mali_mmu_probe_trigger(void *data); -+static _mali_osk_errcode_t mali_mmu_probe_ack(void *data); ++ perf_counter_flag = mali_gp_job_get_perf_counter_flag(job); + -+MALI_STATIC_INLINE _mali_osk_errcode_t mali_mmu_raw_reset(struct mali_mmu_core *mmu); ++ /* case when no counters came from user space ++ * so pass the debugfs / DS-5 provided global ones to the job object */ ++ if (!((perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE) || ++ (perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE))) { ++ mali_gp_job_set_perf_counter_src0(job, mali_gp_job_get_gp_counter_src0()); ++ mali_gp_job_set_perf_counter_src1(job, mali_gp_job_get_gp_counter_src1()); ++ } + -+/* page fault queue flush helper pages -+ * note that the mapping pointers are currently unused outside of the initialization functions */ -+static mali_dma_addr mali_page_fault_flush_page_directory = MALI_INVALID_PAGE; -+static mali_io_address mali_page_fault_flush_page_directory_mapping = NULL; -+static mali_dma_addr mali_page_fault_flush_page_table = MALI_INVALID_PAGE; -+static mali_io_address mali_page_fault_flush_page_table_mapping = NULL; -+static mali_dma_addr mali_page_fault_flush_data_page = MALI_INVALID_PAGE; -+static mali_io_address mali_page_fault_flush_data_page_mapping = NULL; ++ _mali_osk_list_init(&job->list); ++ job->session = session; ++ job->id = id; ++ job->heap_current_addr = job->uargs.frame_registers[4]; ++ job->perf_counter_value0 = 0; ++ job->perf_counter_value1 = 0; ++ job->pid = _mali_osk_get_pid(); ++ job->tid = _mali_osk_get_tid(); + -+/* an empty page directory (no address valid) which is active on any MMU not currently marked as in use */ -+static mali_dma_addr mali_empty_page_directory_phys = MALI_INVALID_PAGE; -+static mali_io_address mali_empty_page_directory_virt = NULL; + ++ INIT_LIST_HEAD(&job->varying_alloc); ++ INIT_LIST_HEAD(&job->vary_todo); ++ job->dmem = NULL; + -+_mali_osk_errcode_t mali_mmu_initialize(void) -+{ -+ /* allocate the helper pages */ -+ mali_empty_page_directory_phys = mali_allocate_empty_page(&mali_empty_page_directory_virt); -+ if (0 == mali_empty_page_directory_phys) { -+ MALI_DEBUG_PRINT_ERROR(("Mali MMU: Could not allocate empty page directory.\n")); -+ mali_empty_page_directory_phys = MALI_INVALID_PAGE; -+ return _MALI_OSK_ERR_NOMEM; -+ } ++ if (job->uargs.deferred_mem_num > session->allocation_mgr.mali_allocation_num) { ++ MALI_PRINT_ERROR(("Mali GP job: The number of varying buffer to defer bind is invalid !\n")); ++ goto fail1; ++ } + -+ if (_MALI_OSK_ERR_OK != mali_create_fault_flush_pages(&mali_page_fault_flush_page_directory, -+ &mali_page_fault_flush_page_directory_mapping, -+ &mali_page_fault_flush_page_table, -+ &mali_page_fault_flush_page_table_mapping, -+ &mali_page_fault_flush_data_page, -+ &mali_page_fault_flush_data_page_mapping)) { -+ MALI_DEBUG_PRINT_ERROR(("Mali MMU: Could not allocate fault flush pages\n")); -+ mali_free_empty_page(mali_empty_page_directory_phys, mali_empty_page_directory_virt); -+ mali_empty_page_directory_phys = MALI_INVALID_PAGE; -+ mali_empty_page_directory_virt = NULL; -+ return _MALI_OSK_ERR_NOMEM; -+ } ++ /* add varying allocation list*/ ++ if (job->uargs.deferred_mem_num > 0) { ++ /* copy varying list from user space*/ ++ job->varying_list = _mali_osk_calloc(1, sizeof(u32) * job->uargs.deferred_mem_num); ++ if (!job->varying_list) { ++ MALI_PRINT_ERROR(("Mali GP job: allocate varying_list failed varying_alloc_num = %d !\n", job->uargs.deferred_mem_num)); ++ goto fail1; ++ } + -+ return _MALI_OSK_ERR_OK; -+} ++ if (0 != _mali_osk_copy_from_user(©_of_uargs, uargs, sizeof(_mali_uk_gp_start_job_s))) { ++ goto fail1; ++ } ++ memory_list = (u32 __user *)(uintptr_t)copy_of_uargs.deferred_mem_list; + -+void mali_mmu_terminate(void) -+{ -+ MALI_DEBUG_PRINT(3, ("Mali MMU: terminating\n")); ++ if (0 != _mali_osk_copy_from_user(job->varying_list, memory_list, sizeof(u32) * job->uargs.deferred_mem_num)) { ++ MALI_PRINT_ERROR(("Mali GP job: Failed to copy varying list from user space!\n")); ++ goto fail; ++ } + -+ /* Free global helper pages */ -+ mali_free_empty_page(mali_empty_page_directory_phys, mali_empty_page_directory_virt); -+ mali_empty_page_directory_phys = MALI_INVALID_PAGE; -+ mali_empty_page_directory_virt = NULL; ++ if (unlikely(_mali_gp_add_varying_allocations(session, job, job->varying_list, ++ job->uargs.deferred_mem_num))) { ++ MALI_PRINT_ERROR(("Mali GP job: _mali_gp_add_varying_allocations failed!\n")); ++ goto fail; ++ } + -+ /* Free the page fault flush pages */ -+ mali_destroy_fault_flush_pages(&mali_page_fault_flush_page_directory, -+ &mali_page_fault_flush_page_directory_mapping, -+ &mali_page_fault_flush_page_table, -+ &mali_page_fault_flush_page_table_mapping, -+ &mali_page_fault_flush_data_page, -+ &mali_page_fault_flush_data_page_mapping); -+} ++ /* do preparetion for each allocation */ ++ list_for_each_entry_safe(alloc_node, tmp_node, &job->varying_alloc, node) { ++ if (unlikely(_MALI_OSK_ERR_OK != mali_mem_defer_bind_allocation_prepare(alloc_node->alloc, &job->vary_todo, &job->required_varying_memsize))) { ++ MALI_PRINT_ERROR(("Mali GP job: mali_mem_defer_bind_allocation_prepare failed!\n")); ++ goto fail; ++ } ++ } + -+struct mali_mmu_core *mali_mmu_create(_mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual) -+{ -+ struct mali_mmu_core *mmu = NULL; ++ _mali_gp_del_varying_allocations(job); + -+ MALI_DEBUG_ASSERT_POINTER(resource); ++ /* bind varying here, to avoid memory latency issue. */ ++ { ++ struct mali_defer_mem_block dmem_block; + -+ MALI_DEBUG_PRINT(2, ("Mali MMU: Creating Mali MMU: %s\n", resource->description)); ++ INIT_LIST_HEAD(&dmem_block.free_pages); ++ atomic_set(&dmem_block.num_free_pages, 0); + -+ mmu = _mali_osk_calloc(1, sizeof(struct mali_mmu_core)); -+ if (NULL != mmu) { -+ if (_MALI_OSK_ERR_OK == mali_hw_core_create(&mmu->hw_core, resource, MALI_MMU_REGISTERS_SIZE)) { -+ if (_MALI_OSK_ERR_OK == mali_group_add_mmu_core(group, mmu)) { -+ if (is_virtual) { -+ /* Skip reset and IRQ setup for virtual MMU */ -+ return mmu; ++ if (mali_mem_prepare_mem_for_job(job, &dmem_block)) { ++ MALI_PRINT_ERROR(("Mali GP job: mali_mem_prepare_mem_for_job failed!\n")); ++ goto fail; + } -+ -+ if (_MALI_OSK_ERR_OK == mali_mmu_reset(mmu)) { -+ /* Setup IRQ handlers (which will do IRQ probing if needed) */ -+ mmu->irq = _mali_osk_irq_init(resource->irq, -+ mali_group_upper_half_mmu, -+ group, -+ mali_mmu_probe_trigger, -+ mali_mmu_probe_ack, -+ mmu, -+ resource->description); -+ if (NULL != mmu->irq) { -+ return mmu; -+ } else { -+ MALI_PRINT_ERROR(("Mali MMU: Failed to setup interrupt handlers for MMU %s\n", mmu->hw_core.description)); -+ } ++ if (_MALI_OSK_ERR_OK != mali_mem_defer_bind(job, &dmem_block)) { ++ MALI_PRINT_ERROR(("gp job create, mali_mem_defer_bind failed! GP %x fail!", job)); ++ goto fail; + } -+ mali_group_remove_mmu_core(group); -+ } else { -+ MALI_PRINT_ERROR(("Mali MMU: Failed to add core %s to group\n", mmu->hw_core.description)); + } -+ mali_hw_core_delete(&mmu->hw_core); ++ ++ if (job->uargs.varying_memsize > MALI_UK_BIG_VARYING_SIZE) { ++ job->big_job = 1; ++ } ++ } ++ job->pp_tracker = pp_tracker; ++ if (NULL != job->pp_tracker) { ++ /* Take a reference on PP job's tracker that will be released when the GP ++ job is done. */ ++ mali_timeline_system_tracker_get(session->timeline_system, pp_tracker); + } + -+ _mali_osk_free(mmu); ++ mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_GP, NULL, job); ++ mali_timeline_fence_copy_uk_fence(&(job->tracker.fence), &(job->uargs.fence)); ++ ++ return job; + } else { -+ MALI_PRINT_ERROR(("Failed to allocate memory for MMU\n")); ++ MALI_PRINT_ERROR(("Mali GP job: _mali_osk_calloc failed!\n")); ++ return NULL; + } + -+ return NULL; -+} + -+void mali_mmu_delete(struct mali_mmu_core *mmu) -+{ -+ if (NULL != mmu->irq) { -+ _mali_osk_irq_term(mmu->irq); ++fail: ++ _mali_osk_free(job->varying_list); ++ /* Handle allocate fail here, free all varying node */ ++ { ++ struct mali_backend_bind_list *bkn, *bkn_tmp; ++ list_for_each_entry_safe(bkn, bkn_tmp , &job->vary_todo, node) { ++ list_del(&bkn->node); ++ _mali_osk_free(bkn); ++ } + } -+ -+ mali_hw_core_delete(&mmu->hw_core); -+ _mali_osk_free(mmu); ++fail1: ++ _mali_osk_notification_delete(job->oom_notification); ++fail2: ++ _mali_osk_notification_delete(job->finished_notification); ++fail3: ++ _mali_osk_free(job); ++ return NULL; +} + -+static void mali_mmu_enable_paging(struct mali_mmu_core *mmu) ++void mali_gp_job_delete(struct mali_gp_job *job) +{ -+ int i; ++ struct mali_backend_bind_list *bkn, *bkn_tmp; ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT(NULL == job->pp_tracker); ++ MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->list)); ++ _mali_osk_free(job->varying_list); + -+ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ENABLE_PAGING); ++ /* Handle allocate fail here, free all varying node */ ++ list_for_each_entry_safe(bkn, bkn_tmp , &job->vary_todo, node) { ++ list_del(&bkn->node); ++ _mali_osk_free(bkn); ++ } + -+ for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) { -+ if (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS) & MALI_MMU_STATUS_BIT_PAGING_ENABLED) { -+ break; -+ } ++ mali_mem_defer_dmem_free(job); ++ ++ /* de-allocate the pre-allocated oom notifications */ ++ if (NULL != job->oom_notification) { ++ _mali_osk_notification_delete(job->oom_notification); ++ job->oom_notification = NULL; + } -+ if (MALI_REG_POLL_COUNT_FAST == i) { -+ MALI_PRINT_ERROR(("Enable paging request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS))); ++ if (NULL != job->finished_notification) { ++ _mali_osk_notification_delete(job->finished_notification); ++ job->finished_notification = NULL; + } ++ ++ _mali_osk_free(job); +} + -+/** -+ * Issues the enable stall command to the MMU and waits for HW to complete the request -+ * @param mmu The MMU to enable paging for -+ * @return MALI_TRUE if HW stall was successfully engaged, otherwise MALI_FALSE (req timed out) -+ */ -+static mali_bool mali_mmu_enable_stall(struct mali_mmu_core *mmu) ++void mali_gp_job_list_add(struct mali_gp_job *job, _mali_osk_list_t *list) +{ -+ int i; -+ u32 mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS); ++ struct mali_gp_job *iter; ++ struct mali_gp_job *tmp; + -+ if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) { -+ MALI_DEBUG_PRINT(4, ("MMU stall is implicit when Paging is not enabled.\n")); -+ return MALI_TRUE; -+ } ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); + -+ if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) { -+ MALI_DEBUG_PRINT(3, ("Aborting MMU stall request since it is in pagefault state.\n")); -+ return MALI_FALSE; -+ } ++ /* Find position in list/queue where job should be added. */ ++ _MALI_OSK_LIST_FOREACHENTRY_REVERSE(iter, tmp, list, ++ struct mali_gp_job, list) { + -+ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ENABLE_STALL); ++ /* A span is used to handle job ID wrapping. */ ++ bool job_is_after = (mali_gp_job_get_id(job) - ++ mali_gp_job_get_id(iter)) < ++ MALI_SCHEDULER_JOB_ID_SPAN; + -+ for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) { -+ mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS); -+ if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) { -+ break; -+ } -+ if ((mmu_status & MALI_MMU_STATUS_BIT_STALL_ACTIVE) && (0 == (mmu_status & MALI_MMU_STATUS_BIT_STALL_NOT_ACTIVE))) { -+ break; -+ } -+ if (0 == (mmu_status & (MALI_MMU_STATUS_BIT_PAGING_ENABLED))) { ++ if (job_is_after) { + break; + } + } -+ if (MALI_REG_POLL_COUNT_FAST == i) { -+ MALI_DEBUG_PRINT(2, ("Enable stall request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS))); -+ return MALI_FALSE; -+ } -+ -+ if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) { -+ MALI_DEBUG_PRINT(2, ("Aborting MMU stall request since it has a pagefault.\n")); -+ return MALI_FALSE; -+ } + -+ return MALI_TRUE; ++ _mali_osk_list_add(&job->list, &iter->list); +} + -+/** -+ * Issues the disable stall command to the MMU and waits for HW to complete the request -+ * @param mmu The MMU to enable paging for -+ */ -+static void mali_mmu_disable_stall(struct mali_mmu_core *mmu) ++u32 mali_gp_job_get_gp_counter_src0(void) +{ -+ int i; -+ u32 mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS); -+ -+ if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) { -+ MALI_DEBUG_PRINT(3, ("MMU disable skipped since it was not enabled.\n")); -+ return; -+ } -+ if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) { -+ MALI_DEBUG_PRINT(2, ("Aborting MMU disable stall request since it is in pagefault state.\n")); -+ return; -+ } ++ return gp_counter_src0; ++} + -+ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_DISABLE_STALL); ++void mali_gp_job_set_gp_counter_src0(u32 counter) ++{ ++ gp_counter_src0 = counter; ++} + -+ for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) { -+ u32 status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS); -+ if (0 == (status & MALI_MMU_STATUS_BIT_STALL_ACTIVE)) { -+ break; -+ } -+ if (status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) { -+ break; -+ } -+ if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) { -+ break; -+ } -+ } -+ if (MALI_REG_POLL_COUNT_FAST == i) MALI_DEBUG_PRINT(1, ("Disable stall request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS))); ++u32 mali_gp_job_get_gp_counter_src1(void) ++{ ++ return gp_counter_src1; +} + -+void mali_mmu_page_fault_done(struct mali_mmu_core *mmu) ++void mali_gp_job_set_gp_counter_src1(u32 counter) +{ -+ MALI_DEBUG_PRINT(4, ("Mali MMU: %s: Leaving page fault mode\n", mmu->hw_core.description)); -+ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_PAGE_FAULT_DONE); ++ gp_counter_src1 = counter; +} + -+MALI_STATIC_INLINE _mali_osk_errcode_t mali_mmu_raw_reset(struct mali_mmu_core *mmu) ++mali_scheduler_mask mali_gp_job_signal_pp_tracker(struct mali_gp_job *job, mali_bool success) +{ -+ int i; ++ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; + -+ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, 0xCAFEBABE); -+ MALI_DEBUG_ASSERT(0xCAFEB000 == mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR)); -+ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_HARD_RESET); ++ MALI_DEBUG_ASSERT_POINTER(job); + -+ for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) { -+ if (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR) == 0) { -+ break; -+ } -+ } -+ if (MALI_REG_POLL_COUNT_FAST == i) { -+ MALI_PRINT_ERROR(("Reset request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS))); -+ return _MALI_OSK_ERR_FAULT; ++ if (NULL != job->pp_tracker) { ++ schedule_mask |= mali_timeline_system_tracker_put(job->session->timeline_system, job->pp_tracker, MALI_FALSE == success); ++ job->pp_tracker = NULL; + } + -+ return _MALI_OSK_ERR_OK; ++ return schedule_mask; +} +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_gp_job.h b/drivers/gpu/arm/mali400/mali/common/mali_gp_job.h +new file mode 100644 +index 000000000..b84333f9f +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_gp_job.h +@@ -0,0 +1,324 @@ ++/* ++ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+_mali_osk_errcode_t mali_mmu_reset(struct mali_mmu_core *mmu) -+{ -+ _mali_osk_errcode_t err = _MALI_OSK_ERR_FAULT; -+ mali_bool stall_success; -+ MALI_DEBUG_ASSERT_POINTER(mmu); ++#ifndef __MALI_GP_JOB_H__ ++#define __MALI_GP_JOB_H__ + -+ stall_success = mali_mmu_enable_stall(mmu); -+ if (!stall_success) { -+ err = _MALI_OSK_ERR_BUSY; -+ } ++#include "mali_osk.h" ++#include "mali_osk_list.h" ++#include "mali_uk_types.h" ++#include "mali_session.h" ++#include "mali_timeline.h" ++#include "mali_scheduler_types.h" ++#include "mali_scheduler.h" ++#include "mali_executor.h" ++#include "mali_timeline.h" + -+ MALI_DEBUG_PRINT(3, ("Mali MMU: mali_kernel_mmu_reset: %s\n", mmu->hw_core.description)); ++struct mali_defer_mem; ++/** ++ * This structure represents a GP job ++ * ++ * The GP job object itself is not protected by any single lock, ++ * but relies on other locks instead (scheduler, executor and timeline lock). ++ * Think of the job object as moving between these sub systems through-out ++ * its lifetime. Different part of the GP job struct is used by different ++ * subsystems. Accessor functions ensure that correct lock is taken. ++ * Do NOT access any data members directly from outside this module! ++ */ ++struct mali_gp_job { ++ /* ++ * These members are typically only set at creation, ++ * and only read later on. ++ * They do not require any lock protection. ++ */ ++ _mali_uk_gp_start_job_s uargs; /**< Arguments from user space */ ++ struct mali_session_data *session; /**< Session which submitted this job */ ++ u32 pid; /**< Process ID of submitting process */ ++ u32 tid; /**< Thread ID of submitting thread */ ++ u32 id; /**< Identifier for this job in kernel space (sequential numbering) */ ++ u32 cache_order; /**< Cache order used for L2 cache flushing (sequential numbering) */ ++ struct mali_timeline_tracker tracker; /**< Timeline tracker for this job */ ++ struct mali_timeline_tracker *pp_tracker; /**< Pointer to Timeline tracker for PP job that depends on this job. */ ++ _mali_osk_notification_t *finished_notification; /**< Notification sent back to userspace on job complete */ + -+ if (_MALI_OSK_ERR_OK == mali_mmu_raw_reset(mmu)) { -+ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_MASK, MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR); -+ /* no session is active, so just activate the empty page directory */ -+ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, mali_empty_page_directory_phys); -+ mali_mmu_enable_paging(mmu); -+ err = _MALI_OSK_ERR_OK; -+ } -+ mali_mmu_disable_stall(mmu); ++ /* ++ * These members are used by the scheduler, ++ * protected by scheduler lock ++ */ ++ _mali_osk_list_t list; /**< Used to link jobs together in the scheduler queue */ + -+ return err; -+} ++ /* ++ * These members are used by the executor and/or group, ++ * protected by executor lock ++ */ ++ _mali_osk_notification_t *oom_notification; /**< Notification sent back to userspace on OOM */ + -+mali_bool mali_mmu_zap_tlb(struct mali_mmu_core *mmu) -+{ -+ mali_bool stall_success = mali_mmu_enable_stall(mmu); ++ /* ++ * Set by executor/group on job completion, read by scheduler when ++ * returning job to user. Hold executor lock when setting, ++ * no lock needed when reading ++ */ ++ u32 heap_current_addr; /**< Holds the current HEAP address when the job has completed */ ++ u32 perf_counter_value0; /**< Value of performance counter 0 (to be returned to user space) */ ++ u32 perf_counter_value1; /**< Value of performance counter 1 (to be returned to user space) */ ++ struct mali_defer_mem *dmem; /** < used for defer bind to store dmem info */ ++ struct list_head varying_alloc; /**< hold the list of varying allocations */ ++ u32 bind_flag; /** < flag for deferbind*/ ++ u32 *varying_list; /**< varying memory list need to to defer bind*/ ++ struct list_head vary_todo; /**< list of backend list need to do defer bind*/ ++ u32 required_varying_memsize; /** < size of varying memory to reallocate*/ ++ u32 big_job; /** < if the gp job have large varying output and may take long time*/ ++}; + -+ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE); ++#define MALI_DEFER_BIND_MEMORY_PREPARED (0x1 << 0) ++#define MALI_DEFER_BIND_MEMORY_BINDED (0x1 << 2) + -+ if (MALI_FALSE == stall_success) { -+ /* False means that it is in Pagefault state. Not possible to disable_stall then */ -+ return MALI_FALSE; -+ } ++struct mali_gp_allocation_node { ++ struct list_head node; ++ mali_mem_allocation *alloc; ++}; + -+ mali_mmu_disable_stall(mmu); -+ return MALI_TRUE; -+} ++struct mali_gp_job *mali_gp_job_create(struct mali_session_data *session, _mali_uk_gp_start_job_s *uargs, u32 id, struct mali_timeline_tracker *pp_tracker); ++void mali_gp_job_delete(struct mali_gp_job *job); + -+void mali_mmu_zap_tlb_without_stall(struct mali_mmu_core *mmu) ++u32 mali_gp_job_get_gp_counter_src0(void); ++void mali_gp_job_set_gp_counter_src0(u32 counter); ++u32 mali_gp_job_get_gp_counter_src1(void); ++void mali_gp_job_set_gp_counter_src1(u32 counter); ++ ++MALI_STATIC_INLINE u32 mali_gp_job_get_id(struct mali_gp_job *job) +{ -+ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return (NULL == job) ? 0 : job->id; +} + -+ -+void mali_mmu_invalidate_page(struct mali_mmu_core *mmu, u32 mali_address) ++MALI_STATIC_INLINE void mali_gp_job_set_cache_order(struct mali_gp_job *job, ++ u32 cache_order) +{ -+ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_ZAP_ONE_LINE, MALI_MMU_PDE_ENTRY(mali_address)); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++ job->cache_order = cache_order; +} + -+static void mali_mmu_activate_address_space(struct mali_mmu_core *mmu, u32 page_directory) ++MALI_STATIC_INLINE u32 mali_gp_job_get_cache_order(struct mali_gp_job *job) +{ -+ /* The MMU must be in stalled or page fault mode, for this writing to work */ -+ MALI_DEBUG_ASSERT(0 != (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS) -+ & (MALI_MMU_STATUS_BIT_STALL_ACTIVE | MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE))); -+ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, page_directory); -+ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE); -+ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return (NULL == job) ? 0 : job->cache_order; +} + -+void mali_mmu_activate_page_directory(struct mali_mmu_core *mmu, struct mali_page_directory *pagedir) ++MALI_STATIC_INLINE u64 mali_gp_job_get_user_id(struct mali_gp_job *job) +{ -+ mali_bool stall_success; -+ MALI_DEBUG_ASSERT_POINTER(mmu); -+ -+ MALI_DEBUG_PRINT(5, ("Asked to activate page directory 0x%x on MMU %s\n", pagedir, mmu->hw_core.description)); -+ -+ stall_success = mali_mmu_enable_stall(mmu); -+ MALI_DEBUG_ASSERT(stall_success); -+ MALI_IGNORE(stall_success); -+ mali_mmu_activate_address_space(mmu, pagedir->page_directory); -+ mali_mmu_disable_stall(mmu); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.user_job_ptr; +} + -+void mali_mmu_activate_empty_page_directory(struct mali_mmu_core *mmu) ++MALI_STATIC_INLINE u32 mali_gp_job_get_frame_builder_id(struct mali_gp_job *job) +{ -+ mali_bool stall_success; -+ -+ MALI_DEBUG_ASSERT_POINTER(mmu); -+ MALI_DEBUG_PRINT(3, ("Activating the empty page directory on MMU %s\n", mmu->hw_core.description)); -+ -+ stall_success = mali_mmu_enable_stall(mmu); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.frame_builder_id; ++} + -+ /* This function can only be called when the core is idle, so it could not fail. */ -+ MALI_DEBUG_ASSERT(stall_success); -+ MALI_IGNORE(stall_success); ++MALI_STATIC_INLINE u32 mali_gp_job_get_flush_id(struct mali_gp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.flush_id; ++} + -+ mali_mmu_activate_address_space(mmu, mali_empty_page_directory_phys); -+ mali_mmu_disable_stall(mmu); ++MALI_STATIC_INLINE u32 mali_gp_job_get_pid(struct mali_gp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->pid; +} + -+void mali_mmu_activate_fault_flush_page_directory(struct mali_mmu_core *mmu) ++MALI_STATIC_INLINE u32 mali_gp_job_get_tid(struct mali_gp_job *job) +{ -+ mali_bool stall_success; -+ MALI_DEBUG_ASSERT_POINTER(mmu); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->tid; ++} + -+ MALI_DEBUG_PRINT(3, ("Activating the page fault flush page directory on MMU %s\n", mmu->hw_core.description)); -+ stall_success = mali_mmu_enable_stall(mmu); -+ /* This function is expect to fail the stalling, since it might be in PageFault mode when it is called */ -+ mali_mmu_activate_address_space(mmu, mali_page_fault_flush_page_directory); -+ if (MALI_TRUE == stall_success) mali_mmu_disable_stall(mmu); ++MALI_STATIC_INLINE u32 *mali_gp_job_get_frame_registers(struct mali_gp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.frame_registers; +} + -+/* Is called when we want the mmu to give an interrupt */ -+static void mali_mmu_probe_trigger(void *data) ++MALI_STATIC_INLINE struct mali_session_data *mali_gp_job_get_session(struct mali_gp_job *job) +{ -+ struct mali_mmu_core *mmu = (struct mali_mmu_core *)data; -+ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT, MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->session; +} + -+/* Is called when the irq probe wants the mmu to acknowledge an interrupt from the hw */ -+static _mali_osk_errcode_t mali_mmu_probe_ack(void *data) ++MALI_STATIC_INLINE mali_bool mali_gp_job_has_vs_job(struct mali_gp_job *job) +{ -+ struct mali_mmu_core *mmu = (struct mali_mmu_core *)data; -+ u32 int_stat; ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return (job->uargs.frame_registers[0] != job->uargs.frame_registers[1]) ? MALI_TRUE : MALI_FALSE; ++} + -+ int_stat = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_STATUS); ++MALI_STATIC_INLINE mali_bool mali_gp_job_has_plbu_job(struct mali_gp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return (job->uargs.frame_registers[2] != job->uargs.frame_registers[3]) ? MALI_TRUE : MALI_FALSE; ++} + -+ MALI_DEBUG_PRINT(2, ("mali_mmu_probe_irq_acknowledge: intstat 0x%x\n", int_stat)); -+ if (int_stat & MALI_MMU_INTERRUPT_PAGE_FAULT) { -+ MALI_DEBUG_PRINT(2, ("Probe: Page fault detect: PASSED\n")); -+ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_CLEAR, MALI_MMU_INTERRUPT_PAGE_FAULT); -+ } else { -+ MALI_DEBUG_PRINT(1, ("Probe: Page fault detect: FAILED\n")); -+ } ++MALI_STATIC_INLINE u32 mali_gp_job_get_current_heap_addr(struct mali_gp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->heap_current_addr; ++} + -+ if (int_stat & MALI_MMU_INTERRUPT_READ_BUS_ERROR) { -+ MALI_DEBUG_PRINT(2, ("Probe: Bus read error detect: PASSED\n")); -+ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_CLEAR, MALI_MMU_INTERRUPT_READ_BUS_ERROR); -+ } else { -+ MALI_DEBUG_PRINT(1, ("Probe: Bus read error detect: FAILED\n")); -+ } ++MALI_STATIC_INLINE void mali_gp_job_set_current_heap_addr(struct mali_gp_job *job, u32 heap_addr) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ job->heap_current_addr = heap_addr; ++} + -+ if ((int_stat & (MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR)) == -+ (MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR)) { -+ return _MALI_OSK_ERR_OK; -+ } ++MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_flag(struct mali_gp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.perf_counter_flag; ++} + -+ return _MALI_OSK_ERR_FAULT; ++MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_src0(struct mali_gp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.perf_counter_src0; +} + -+#if 0 -+void mali_mmu_print_state(struct mali_mmu_core *mmu) ++MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_src1(struct mali_gp_job *job) +{ -+ MALI_DEBUG_PRINT(2, ("MMU: State of %s is 0x%08x\n", mmu->hw_core.description, mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS))); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.perf_counter_src1; +} -+#endif -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_mmu.h b/drivers/gpu/arm/mali400/mali/common/mali_mmu.h -new file mode 100644 -index 000000000..6ed48585f ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_mmu.h -@@ -0,0 +1,124 @@ -+/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ + -+#ifndef __MALI_MMU_H__ -+#define __MALI_MMU_H__ ++MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_value0(struct mali_gp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->perf_counter_value0; ++} + -+#include "mali_osk.h" -+#include "mali_mmu_page_directory.h" -+#include "mali_hw_core.h" ++MALI_STATIC_INLINE u32 mali_gp_job_get_perf_counter_value1(struct mali_gp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->perf_counter_value1; ++} + -+/* Forward declaration from mali_group.h */ -+struct mali_group; ++MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_src0(struct mali_gp_job *job, u32 src) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ job->uargs.perf_counter_src0 = src; ++} + -+/** -+ * MMU register numbers -+ * Used in the register read/write routines. -+ * See the hardware documentation for more information about each register -+ */ -+typedef enum mali_mmu_register { -+ MALI_MMU_REGISTER_DTE_ADDR = 0x0000, /**< Current Page Directory Pointer */ -+ MALI_MMU_REGISTER_STATUS = 0x0004, /**< Status of the MMU */ -+ MALI_MMU_REGISTER_COMMAND = 0x0008, /**< Command register, used to control the MMU */ -+ MALI_MMU_REGISTER_PAGE_FAULT_ADDR = 0x000C, /**< Logical address of the last page fault */ -+ MALI_MMU_REGISTER_ZAP_ONE_LINE = 0x010, /**< Used to invalidate the mapping of a single page from the MMU */ -+ MALI_MMU_REGISTER_INT_RAWSTAT = 0x0014, /**< Raw interrupt status, all interrupts visible */ -+ MALI_MMU_REGISTER_INT_CLEAR = 0x0018, /**< Indicate to the MMU that the interrupt has been received */ -+ MALI_MMU_REGISTER_INT_MASK = 0x001C, /**< Enable/disable types of interrupts */ -+ MALI_MMU_REGISTER_INT_STATUS = 0x0020 /**< Interrupt status based on the mask */ -+} mali_mmu_register; ++MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_src1(struct mali_gp_job *job, u32 src) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ job->uargs.perf_counter_src1 = src; ++} + -+/** -+ * MMU interrupt register bits -+ * Each cause of the interrupt is reported -+ * through the (raw) interrupt status registers. -+ * Multiple interrupts can be pending, so multiple bits -+ * can be set at once. -+ */ -+typedef enum mali_mmu_interrupt { -+ MALI_MMU_INTERRUPT_PAGE_FAULT = 0x01, /**< A page fault occured */ -+ MALI_MMU_INTERRUPT_READ_BUS_ERROR = 0x02 /**< A bus read error occured */ -+} mali_mmu_interrupt; ++MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_value0(struct mali_gp_job *job, u32 value) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ job->perf_counter_value0 = value; ++} + -+typedef enum mali_mmu_status_bits { -+ MALI_MMU_STATUS_BIT_PAGING_ENABLED = 1 << 0, -+ MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE = 1 << 1, -+ MALI_MMU_STATUS_BIT_STALL_ACTIVE = 1 << 2, -+ MALI_MMU_STATUS_BIT_IDLE = 1 << 3, -+ MALI_MMU_STATUS_BIT_REPLAY_BUFFER_EMPTY = 1 << 4, -+ MALI_MMU_STATUS_BIT_PAGE_FAULT_IS_WRITE = 1 << 5, -+ MALI_MMU_STATUS_BIT_STALL_NOT_ACTIVE = 1 << 31, -+} mali_mmu_status_bits; ++MALI_STATIC_INLINE void mali_gp_job_set_perf_counter_value1(struct mali_gp_job *job, u32 value) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ job->perf_counter_value1 = value; ++} + -+/** -+ * Definition of the MMU struct -+ * Used to track a MMU unit in the system. -+ * Contains information about the mapping of the registers -+ */ -+struct mali_mmu_core { -+ struct mali_hw_core hw_core; /**< Common for all HW cores */ -+ _mali_osk_irq_t *irq; /**< IRQ handler */ -+}; ++void mali_gp_job_list_add(struct mali_gp_job *job, _mali_osk_list_t *list); + -+_mali_osk_errcode_t mali_mmu_initialize(void); ++MALI_STATIC_INLINE void mali_gp_job_list_move(struct mali_gp_job *job, ++ _mali_osk_list_t *list) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++ MALI_DEBUG_ASSERT(!_mali_osk_list_empty(&job->list)); ++ _mali_osk_list_move(&job->list, list); ++} + -+void mali_mmu_terminate(void); ++MALI_STATIC_INLINE void mali_gp_job_list_remove(struct mali_gp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++ _mali_osk_list_delinit(&job->list); ++} + -+struct mali_mmu_core *mali_mmu_create(_mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual); -+void mali_mmu_delete(struct mali_mmu_core *mmu); ++MALI_STATIC_INLINE _mali_osk_notification_t * ++mali_gp_job_get_finished_notification(struct mali_gp_job *job) ++{ ++ _mali_osk_notification_t *notification; + -+_mali_osk_errcode_t mali_mmu_reset(struct mali_mmu_core *mmu); -+mali_bool mali_mmu_zap_tlb(struct mali_mmu_core *mmu); -+void mali_mmu_zap_tlb_without_stall(struct mali_mmu_core *mmu); -+void mali_mmu_invalidate_page(struct mali_mmu_core *mmu, u32 mali_address); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_POINTER(job->finished_notification); + -+void mali_mmu_activate_page_directory(struct mali_mmu_core *mmu, struct mali_page_directory *pagedir); -+void mali_mmu_activate_empty_page_directory(struct mali_mmu_core *mmu); -+void mali_mmu_activate_fault_flush_page_directory(struct mali_mmu_core *mmu); ++ notification = job->finished_notification; ++ job->finished_notification = NULL; + -+void mali_mmu_page_fault_done(struct mali_mmu_core *mmu); ++ return notification; ++} + -+MALI_STATIC_INLINE enum mali_interrupt_result mali_mmu_get_interrupt_result(struct mali_mmu_core *mmu) ++MALI_STATIC_INLINE _mali_osk_notification_t *mali_gp_job_get_oom_notification( ++ struct mali_gp_job *job) +{ -+ u32 rawstat_used = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT); -+ if (0 == rawstat_used) { -+ return MALI_INTERRUPT_RESULT_NONE; -+ } ++ _mali_osk_notification_t *notification; + -+ return MALI_INTERRUPT_RESULT_ERROR; -+} ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ MALI_DEBUG_ASSERT_POINTER(job->oom_notification); + ++ notification = job->oom_notification; ++ job->oom_notification = NULL; + -+MALI_STATIC_INLINE u32 mali_mmu_get_int_status(struct mali_mmu_core *mmu) -+{ -+ return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_STATUS); ++ return notification; +} + -+MALI_STATIC_INLINE u32 mali_mmu_get_rawstat(struct mali_mmu_core *mmu) ++MALI_STATIC_INLINE void mali_gp_job_set_oom_notification( ++ struct mali_gp_job *job, ++ _mali_osk_notification_t *notification) +{ -+ return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ MALI_DEBUG_ASSERT(NULL == job->oom_notification); ++ job->oom_notification = notification; +} + -+MALI_STATIC_INLINE void mali_mmu_mask_all_interrupts(struct mali_mmu_core *mmu) ++MALI_STATIC_INLINE struct mali_timeline_tracker *mali_gp_job_get_tracker( ++ struct mali_gp_job *job) +{ -+ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_MASK, 0); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return &(job->tracker); +} + -+MALI_STATIC_INLINE u32 mali_mmu_get_status(struct mali_mmu_core *mmu) -+{ -+ return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS); -+} + -+MALI_STATIC_INLINE u32 mali_mmu_get_page_fault_addr(struct mali_mmu_core *mmu) ++MALI_STATIC_INLINE u32 *mali_gp_job_get_timeline_point_ptr( ++ struct mali_gp_job *job) +{ -+ return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_PAGE_FAULT_ADDR); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return (u32 __user *)(uintptr_t)job->uargs.timeline_point_ptr; +} + -+#endif /* __MALI_MMU_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_mmu_page_directory.c b/drivers/gpu/arm/mali400/mali/common/mali_mmu_page_directory.c ++ ++/** ++ * Release reference on tracker for PP job that depends on this GP job. ++ * ++ * @note If GP job has a reference on tracker, this function MUST be called before the GP job is ++ * deleted. ++ * ++ * @param job GP job that is done. ++ * @param success MALI_TRUE if job completed successfully, MALI_FALSE if not. ++ * @return A scheduling bitmask indicating whether scheduling needs to be done. ++ */ ++mali_scheduler_mask mali_gp_job_signal_pp_tracker(struct mali_gp_job *job, mali_bool success); ++ ++#endif /* __MALI_GP_JOB_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_group.c b/drivers/gpu/arm/mali400/mali/common/mali_group.c new file mode 100644 -index 000000000..9ad3e8970 +index 000000000..47979a2f1 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_mmu_page_directory.c -@@ -0,0 +1,495 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_group.c +@@ -0,0 +1,1875 @@ +/* + * Copyright (C) 2011-2017 ARM Limited. All rights reserved. + * @@ -289904,2179 +293008,1880 @@ index 000000000..9ad3e8970 + * A copy of the licence is included with the program, and can also be obtained from Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ -+ +#include "mali_kernel_common.h" ++#include "mali_group.h" +#include "mali_osk.h" -+#include "mali_ukk.h" -+#include "mali_uk_types.h" -+#include "mali_mmu_page_directory.h" -+#include "mali_memory.h" +#include "mali_l2_cache.h" ++#include "mali_gp.h" ++#include "mali_pp.h" ++#include "mali_mmu.h" ++#include "mali_dlbu.h" ++#include "mali_broadcast.h" ++#include "mali_scheduler.h" ++#include "mali_osk_profiling.h" ++#include "mali_osk_mali.h" ++#include "mali_pm_domain.h" ++#include "mali_pm.h" ++#include "mali_executor.h" + -+static _mali_osk_errcode_t fill_page(mali_io_address mapping, u32 data); -+ -+u32 mali_allocate_empty_page(mali_io_address *virt_addr) -+{ -+ _mali_osk_errcode_t err; -+ mali_io_address mapping; -+ mali_dma_addr address; -+ -+ if (_MALI_OSK_ERR_OK != mali_mmu_get_table_page(&address, &mapping)) { -+ /* Allocation failed */ -+ MALI_DEBUG_PRINT(2, ("Mali MMU: Failed to get table page for empty pgdir\n")); -+ return 0; -+ } -+ -+ MALI_DEBUG_ASSERT_POINTER(mapping); -+ -+ err = fill_page(mapping, 0); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_mmu_release_table_page(address, mapping); -+ MALI_DEBUG_PRINT(2, ("Mali MMU: Failed to zero page\n")); -+ return 0; -+ } -+ -+ *virt_addr = mapping; -+ return address; -+} ++#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS) ++#include ++#include ++#endif + -+void mali_free_empty_page(mali_dma_addr address, mali_io_address virt_addr) -+{ -+ if (MALI_INVALID_PAGE != address) { -+ mali_mmu_release_table_page(address, virt_addr); -+ } -+} ++#define MALI_MAX_NUM_DOMAIN_REFS (MALI_MAX_NUMBER_OF_GROUPS * 2) + -+_mali_osk_errcode_t mali_create_fault_flush_pages(mali_dma_addr *page_directory, -+ mali_io_address *page_directory_mapping, -+ mali_dma_addr *page_table, mali_io_address *page_table_mapping, -+ mali_dma_addr *data_page, mali_io_address *data_page_mapping) -+{ -+ _mali_osk_errcode_t err; ++#if defined(CONFIG_MALI400_PROFILING) ++static void mali_group_report_l2_cache_counters_per_core(struct mali_group *group, u32 core_num); ++#endif /* #if defined(CONFIG_MALI400_PROFILING) */ + -+ err = mali_mmu_get_table_page(data_page, data_page_mapping); -+ if (_MALI_OSK_ERR_OK == err) { -+ err = mali_mmu_get_table_page(page_table, page_table_mapping); -+ if (_MALI_OSK_ERR_OK == err) { -+ err = mali_mmu_get_table_page(page_directory, page_directory_mapping); -+ if (_MALI_OSK_ERR_OK == err) { -+ fill_page(*data_page_mapping, 0); -+ fill_page(*page_table_mapping, *data_page | MALI_MMU_FLAGS_DEFAULT); -+ fill_page(*page_directory_mapping, *page_table | MALI_MMU_FLAGS_PRESENT); -+ MALI_SUCCESS; -+ } -+ mali_mmu_release_table_page(*page_table, *page_table_mapping); -+ *page_table = MALI_INVALID_PAGE; -+ } -+ mali_mmu_release_table_page(*data_page, *data_page_mapping); -+ *data_page = MALI_INVALID_PAGE; -+ } -+ return err; -+} ++static struct mali_group *mali_global_groups[MALI_MAX_NUMBER_OF_GROUPS] = { NULL, }; ++static u32 mali_global_num_groups = 0; + -+void mali_destroy_fault_flush_pages( -+ mali_dma_addr *page_directory, mali_io_address *page_directory_mapping, -+ mali_dma_addr *page_table, mali_io_address *page_table_mapping, -+ mali_dma_addr *data_page, mali_io_address *data_page_mapping) -+{ -+ if (MALI_INVALID_PAGE != *page_directory) { -+ mali_mmu_release_table_page(*page_directory, *page_directory_mapping); -+ *page_directory = MALI_INVALID_PAGE; -+ *page_directory_mapping = NULL; -+ } ++/* SW timer for job execution */ ++int mali_max_job_runtime = MALI_MAX_JOB_RUNTIME_DEFAULT; + -+ if (MALI_INVALID_PAGE != *page_table) { -+ mali_mmu_release_table_page(*page_table, *page_table_mapping); -+ *page_table = MALI_INVALID_PAGE; -+ *page_table_mapping = NULL; -+ } ++/* local helper functions */ ++static void mali_group_bottom_half_mmu(void *data); ++static void mali_group_bottom_half_gp(void *data); ++static void mali_group_bottom_half_pp(void *data); ++static void mali_group_timeout(void *data); ++static void mali_group_reset_pp(struct mali_group *group); ++static void mali_group_reset_mmu(struct mali_group *group); + -+ if (MALI_INVALID_PAGE != *data_page) { -+ mali_mmu_release_table_page(*data_page, *data_page_mapping); -+ *data_page = MALI_INVALID_PAGE; -+ *data_page_mapping = NULL; -+ } -+} ++static void mali_group_activate_page_directory(struct mali_group *group, struct mali_session_data *session, mali_bool is_reload); ++static void mali_group_recovery_reset(struct mali_group *group); + -+static _mali_osk_errcode_t fill_page(mali_io_address mapping, u32 data) ++struct mali_group *mali_group_create(struct mali_l2_cache_core *core, ++ struct mali_dlbu_core *dlbu, ++ struct mali_bcast_unit *bcast, ++ u32 domain_index) +{ -+ int i; -+ MALI_DEBUG_ASSERT_POINTER(mapping); ++ struct mali_group *group = NULL; + -+ for (i = 0; i < MALI_MMU_PAGE_SIZE / 4; i++) { -+ _mali_osk_mem_iowrite32_relaxed(mapping, i * sizeof(u32), data); ++ if (mali_global_num_groups >= MALI_MAX_NUMBER_OF_GROUPS) { ++ MALI_PRINT_ERROR(("Mali group: Too many group objects created\n")); ++ return NULL; + } -+ _mali_osk_mem_barrier(); -+ MALI_SUCCESS; -+} -+ -+_mali_osk_errcode_t mali_mmu_pagedir_map(struct mali_page_directory *pagedir, u32 mali_address, u32 size) -+{ -+ const int first_pde = MALI_MMU_PDE_ENTRY(mali_address); -+ const int last_pde = MALI_MMU_PDE_ENTRY(mali_address + size - 1); -+ _mali_osk_errcode_t err; -+ mali_io_address pde_mapping; -+ mali_dma_addr pde_phys; -+ int i, page_count; -+ u32 start_address; -+ if (last_pde < first_pde) -+ return _MALI_OSK_ERR_INVALID_ARGS; + -+ for (i = first_pde; i <= last_pde; i++) { -+ if (0 == (_mali_osk_mem_ioread32(pagedir->page_directory_mapped, -+ i * sizeof(u32)) & MALI_MMU_FLAGS_PRESENT)) { -+ /* Page table not present */ -+ MALI_DEBUG_ASSERT(0 == pagedir->page_entries_usage_count[i]); -+ MALI_DEBUG_ASSERT(NULL == pagedir->page_entries_mapped[i]); ++ group = _mali_osk_calloc(1, sizeof(struct mali_group)); ++ if (NULL != group) { ++ group->timeout_timer = _mali_osk_timer_init(mali_group_timeout); ++ if (NULL != group->timeout_timer) { ++ _mali_osk_timer_setcallback(group->timeout_timer, mali_group_timeout, (void *)group); + -+ err = mali_mmu_get_table_page(&pde_phys, &pde_mapping); -+ if (_MALI_OSK_ERR_OK != err) { -+ MALI_PRINT_ERROR(("Failed to allocate page table page.\n")); -+ return err; -+ } -+ pagedir->page_entries_mapped[i] = pde_mapping; ++ group->l2_cache_core[0] = core; ++ _mali_osk_list_init(&group->group_list); ++ _mali_osk_list_init(&group->executor_list); ++ _mali_osk_list_init(&group->pm_domain_list); ++ group->bcast_core = bcast; ++ group->dlbu_core = dlbu; + -+ /* Update PDE, mark as present */ -+ _mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32), -+ pde_phys | MALI_MMU_FLAGS_PRESENT); ++ /* register this object as a part of the correct power domain */ ++ if ((NULL != core) || (NULL != dlbu) || (NULL != bcast)) ++ group->pm_domain = mali_pm_register_group(domain_index, group); + -+ MALI_DEBUG_ASSERT(0 == pagedir->page_entries_usage_count[i]); -+ } ++ mali_global_groups[mali_global_num_groups] = group; ++ mali_global_num_groups++; + -+ if (first_pde == last_pde) { -+ pagedir->page_entries_usage_count[i] += size / MALI_MMU_PAGE_SIZE; -+ } else if (i == first_pde) { -+ start_address = i * MALI_MMU_VIRTUAL_PAGE_SIZE; -+ page_count = (start_address + MALI_MMU_VIRTUAL_PAGE_SIZE - mali_address) / MALI_MMU_PAGE_SIZE; -+ pagedir->page_entries_usage_count[i] += page_count; -+ } else if (i == last_pde) { -+ start_address = i * MALI_MMU_VIRTUAL_PAGE_SIZE; -+ page_count = (mali_address + size - start_address) / MALI_MMU_PAGE_SIZE; -+ pagedir->page_entries_usage_count[i] += page_count; -+ } else { -+ pagedir->page_entries_usage_count[i] = 1024; ++ return group; + } ++ _mali_osk_free(group); + } -+ _mali_osk_write_mem_barrier(); + -+ return _MALI_OSK_ERR_OK; ++ return NULL; +} + -+MALI_STATIC_INLINE void mali_mmu_zero_pte(mali_io_address page_table, u32 mali_address, u32 size) ++void mali_group_delete(struct mali_group *group) +{ -+ int i; -+ const int first_pte = MALI_MMU_PTE_ENTRY(mali_address); -+ const int last_pte = MALI_MMU_PTE_ENTRY(mali_address + size - 1); ++ u32 i; + -+ for (i = first_pte; i <= last_pte; i++) { -+ _mali_osk_mem_iowrite32_relaxed(page_table, i * sizeof(u32), 0); -+ } -+} ++ MALI_DEBUG_PRINT(4, ("Deleting group %s\n", ++ mali_group_core_description(group))); + -+static u32 mali_page_directory_get_phys_address(struct mali_page_directory *pagedir, u32 index) -+{ -+ return (_mali_osk_mem_ioread32(pagedir->page_directory_mapped, -+ index * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK); -+} ++ MALI_DEBUG_ASSERT(NULL == group->parent_group); ++ MALI_DEBUG_ASSERT((MALI_GROUP_STATE_INACTIVE == group->state) || ((MALI_GROUP_STATE_ACTIVATION_PENDING == group->state))); + ++ /* Delete the resources that this group owns */ ++ if (NULL != group->gp_core) { ++ mali_gp_delete(group->gp_core); ++ } + -+_mali_osk_errcode_t mali_mmu_pagedir_unmap(struct mali_page_directory *pagedir, u32 mali_address, u32 size) -+{ -+ const int first_pde = MALI_MMU_PDE_ENTRY(mali_address); -+ const int last_pde = MALI_MMU_PDE_ENTRY(mali_address + size - 1); -+ u32 left = size; -+ int i; -+ mali_bool pd_changed = MALI_FALSE; -+ u32 pages_to_invalidate[3]; /* hard-coded to 3: max two pages from the PT level plus max one page from PD level */ -+ u32 num_pages_inv = 0; -+ mali_bool invalidate_all = MALI_FALSE; /* safety mechanism in case page_entries_usage_count is unreliable */ ++ if (NULL != group->pp_core) { ++ mali_pp_delete(group->pp_core); ++ } + -+ /* For all page directory entries in range. */ -+ for (i = first_pde; i <= last_pde; i++) { -+ u32 size_in_pde, offset; ++ if (NULL != group->mmu) { ++ mali_mmu_delete(group->mmu); ++ } + -+ MALI_DEBUG_ASSERT_POINTER(pagedir->page_entries_mapped[i]); -+ MALI_DEBUG_ASSERT(0 != pagedir->page_entries_usage_count[i]); ++ if (mali_group_is_virtual(group)) { ++ /* Remove all groups from virtual group */ ++ struct mali_group *child; ++ struct mali_group *temp; + -+ /* Offset into page table, 0 if mali_address is 4MiB aligned */ -+ offset = (mali_address & (MALI_MMU_VIRTUAL_PAGE_SIZE - 1)); -+ if (left < MALI_MMU_VIRTUAL_PAGE_SIZE - offset) { -+ size_in_pde = left; -+ } else { -+ size_in_pde = MALI_MMU_VIRTUAL_PAGE_SIZE - offset; ++ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) { ++ child->parent_group = NULL; ++ mali_group_delete(child); + } + -+ pagedir->page_entries_usage_count[i] -= size_in_pde / MALI_MMU_PAGE_SIZE; ++ mali_dlbu_delete(group->dlbu_core); + -+ /* If entire page table is unused, free it */ -+ if (0 == pagedir->page_entries_usage_count[i]) { -+ u32 page_phys; -+ void *page_virt; -+ MALI_DEBUG_PRINT(4, ("Releasing page table as this is the last reference\n")); -+ /* last reference removed, no need to zero out each PTE */ ++ if (NULL != group->bcast_core) { ++ mali_bcast_unit_delete(group->bcast_core); ++ } ++ } + -+ page_phys = MALI_MMU_ENTRY_ADDRESS(_mali_osk_mem_ioread32(pagedir->page_directory_mapped, i * sizeof(u32))); -+ page_virt = pagedir->page_entries_mapped[i]; -+ pagedir->page_entries_mapped[i] = NULL; -+ _mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32), 0); ++ for (i = 0; i < mali_global_num_groups; i++) { ++ if (mali_global_groups[i] == group) { ++ mali_global_groups[i] = NULL; ++ mali_global_num_groups--; + -+ mali_mmu_release_table_page(page_phys, page_virt); -+ pd_changed = MALI_TRUE; -+ } else { -+ MALI_DEBUG_ASSERT(num_pages_inv < 2); -+ if (num_pages_inv < 2) { -+ pages_to_invalidate[num_pages_inv] = mali_page_directory_get_phys_address(pagedir, i); -+ num_pages_inv++; -+ } else { -+ invalidate_all = MALI_TRUE; ++ if (i != mali_global_num_groups) { ++ /* We removed a group from the middle of the array -- move the last ++ * group to the current position to close the gap */ ++ mali_global_groups[i] = mali_global_groups[mali_global_num_groups]; ++ mali_global_groups[mali_global_num_groups] = NULL; + } + -+ /* If part of the page table is still in use, zero the relevant PTEs */ -+ mali_mmu_zero_pte(pagedir->page_entries_mapped[i], mali_address, size_in_pde); ++ break; + } ++ } + -+ left -= size_in_pde; -+ mali_address += size_in_pde; ++ if (NULL != group->timeout_timer) { ++ _mali_osk_timer_del(group->timeout_timer); ++ _mali_osk_timer_term(group->timeout_timer); + } -+ _mali_osk_write_mem_barrier(); + -+ /* L2 pages invalidation */ -+ if (MALI_TRUE == pd_changed) { -+ MALI_DEBUG_ASSERT(num_pages_inv < 3); -+ if (num_pages_inv < 3) { -+ pages_to_invalidate[num_pages_inv] = pagedir->page_directory; -+ num_pages_inv++; -+ } else { -+ invalidate_all = MALI_TRUE; -+ } ++ if (NULL != group->bottom_half_work_mmu) { ++ _mali_osk_wq_delete_work(group->bottom_half_work_mmu); + } + -+ if (invalidate_all) { -+ mali_l2_cache_invalidate_all(); -+ } else { -+ mali_l2_cache_invalidate_all_pages(pages_to_invalidate, num_pages_inv); ++ if (NULL != group->bottom_half_work_gp) { ++ _mali_osk_wq_delete_work(group->bottom_half_work_gp); + } + -+ MALI_SUCCESS; ++ if (NULL != group->bottom_half_work_pp) { ++ _mali_osk_wq_delete_work(group->bottom_half_work_pp); ++ } ++ ++ _mali_osk_free(group); +} + -+struct mali_page_directory *mali_mmu_pagedir_alloc(void) ++_mali_osk_errcode_t mali_group_add_mmu_core(struct mali_group *group, struct mali_mmu_core *mmu_core) +{ -+ struct mali_page_directory *pagedir; -+ _mali_osk_errcode_t err; -+ mali_dma_addr phys; -+ -+ pagedir = _mali_osk_calloc(1, sizeof(struct mali_page_directory)); -+ if (NULL == pagedir) { -+ return NULL; ++ /* This group object now owns the MMU core object */ ++ group->mmu = mmu_core; ++ group->bottom_half_work_mmu = _mali_osk_wq_create_work(mali_group_bottom_half_mmu, group); ++ if (NULL == group->bottom_half_work_mmu) { ++ return _MALI_OSK_ERR_FAULT; + } ++ return _MALI_OSK_ERR_OK; ++} + -+ err = mali_mmu_get_table_page(&phys, &pagedir->page_directory_mapped); -+ if (_MALI_OSK_ERR_OK != err) { -+ _mali_osk_free(pagedir); -+ return NULL; ++void mali_group_remove_mmu_core(struct mali_group *group) ++{ ++ /* This group object no longer owns the MMU core object */ ++ group->mmu = NULL; ++ if (NULL != group->bottom_half_work_mmu) { ++ _mali_osk_wq_delete_work(group->bottom_half_work_mmu); + } -+ -+ pagedir->page_directory = (u32)phys; -+ -+ /* Zero page directory */ -+ fill_page(pagedir->page_directory_mapped, 0); -+ -+ return pagedir; +} + -+void mali_mmu_pagedir_free(struct mali_page_directory *pagedir) ++_mali_osk_errcode_t mali_group_add_gp_core(struct mali_group *group, struct mali_gp_core *gp_core) +{ -+ const int num_page_table_entries = sizeof(pagedir->page_entries_mapped) / sizeof(pagedir->page_entries_mapped[0]); -+ int i; -+ -+ /* Free referenced page tables and zero PDEs. */ -+ for (i = 0; i < num_page_table_entries; i++) { -+ if (pagedir->page_directory_mapped && (_mali_osk_mem_ioread32( -+ pagedir->page_directory_mapped, -+ sizeof(u32)*i) & MALI_MMU_FLAGS_PRESENT)) { -+ mali_dma_addr phys = _mali_osk_mem_ioread32(pagedir->page_directory_mapped, -+ i * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK; -+ _mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32), 0); -+ mali_mmu_release_table_page(phys, pagedir->page_entries_mapped[i]); -+ } ++ /* This group object now owns the GP core object */ ++ group->gp_core = gp_core; ++ group->bottom_half_work_gp = _mali_osk_wq_create_work(mali_group_bottom_half_gp, group); ++ if (NULL == group->bottom_half_work_gp) { ++ return _MALI_OSK_ERR_FAULT; + } -+ _mali_osk_write_mem_barrier(); + -+ /* Free the page directory page. */ -+ mali_mmu_release_table_page(pagedir->page_directory, pagedir->page_directory_mapped); -+ -+ _mali_osk_free(pagedir); ++ return _MALI_OSK_ERR_OK; +} + ++void mali_group_remove_gp_core(struct mali_group *group) ++{ ++ /* This group object no longer owns the GP core object */ ++ group->gp_core = NULL; ++ if (NULL != group->bottom_half_work_gp) { ++ _mali_osk_wq_delete_work(group->bottom_half_work_gp); ++ } ++} + -+void mali_mmu_pagedir_update(struct mali_page_directory *pagedir, u32 mali_address, -+ mali_dma_addr phys_address, u32 size, u32 permission_bits) ++_mali_osk_errcode_t mali_group_add_pp_core(struct mali_group *group, struct mali_pp_core *pp_core) +{ -+ u32 end_address = mali_address + size; -+ u32 mali_phys = (u32)phys_address; ++ /* This group object now owns the PP core object */ ++ group->pp_core = pp_core; ++ group->bottom_half_work_pp = _mali_osk_wq_create_work(mali_group_bottom_half_pp, group); ++ if (NULL == group->bottom_half_work_pp) { ++ return _MALI_OSK_ERR_FAULT; ++ } ++ return _MALI_OSK_ERR_OK; ++} + -+ /* Map physical pages into MMU page tables */ -+ for (; mali_address < end_address; mali_address += MALI_MMU_PAGE_SIZE, mali_phys += MALI_MMU_PAGE_SIZE) { -+ MALI_DEBUG_ASSERT_POINTER(pagedir->page_entries_mapped[MALI_MMU_PDE_ENTRY(mali_address)]); -+ _mali_osk_mem_iowrite32_relaxed(pagedir->page_entries_mapped[MALI_MMU_PDE_ENTRY(mali_address)], -+ MALI_MMU_PTE_ENTRY(mali_address) * sizeof(u32), -+ mali_phys | permission_bits); ++void mali_group_remove_pp_core(struct mali_group *group) ++{ ++ /* This group object no longer owns the PP core object */ ++ group->pp_core = NULL; ++ if (NULL != group->bottom_half_work_pp) { ++ _mali_osk_wq_delete_work(group->bottom_half_work_pp); + } +} + -+void mali_mmu_pagedir_diag(struct mali_page_directory *pagedir, u32 fault_addr) ++enum mali_group_state mali_group_activate(struct mali_group *group) +{ -+#if defined(DEBUG) -+ u32 pde_index, pte_index; -+ u32 pde, pte; ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); + -+ pde_index = MALI_MMU_PDE_ENTRY(fault_addr); -+ pte_index = MALI_MMU_PTE_ENTRY(fault_addr); ++ MALI_DEBUG_PRINT(4, ("Group: Activating group %s\n", ++ mali_group_core_description(group))); + ++ if (MALI_GROUP_STATE_INACTIVE == group->state) { ++ /* Group is inactive, get PM refs in order to power up */ + -+ pde = _mali_osk_mem_ioread32(pagedir->page_directory_mapped, -+ pde_index * sizeof(u32)); ++ /* ++ * We'll take a maximum of 2 power domain references pr group, ++ * one for the group itself, and one for it's L2 cache. ++ */ ++ struct mali_pm_domain *domains[MALI_MAX_NUM_DOMAIN_REFS]; ++ struct mali_group *groups[MALI_MAX_NUM_DOMAIN_REFS]; ++ u32 num_domains = 0; ++ mali_bool all_groups_on; + ++ /* Deal with child groups first */ ++ if (mali_group_is_virtual(group)) { ++ /* ++ * The virtual group might have 0, 1 or 2 L2s in ++ * its l2_cache_core array, but we ignore these and ++ * let the child groups take the needed L2 cache ref ++ * on behalf of the virtual group. ++ * In other words; The L2 refs are taken in pair with ++ * the physical group which the L2 is attached to. ++ */ ++ struct mali_group *child; ++ struct mali_group *temp; + -+ if (pde & MALI_MMU_FLAGS_PRESENT) { -+ u32 pte_addr = MALI_MMU_ENTRY_ADDRESS(pde); ++ /* ++ * Child group is inactive, get PM ++ * refs in order to power up. ++ */ ++ _MALI_OSK_LIST_FOREACHENTRY(child, temp, ++ &group->group_list, ++ struct mali_group, group_list) { ++ MALI_DEBUG_ASSERT(MALI_GROUP_STATE_INACTIVE ++ == child->state); + -+ pte = _mali_osk_mem_ioread32(pagedir->page_entries_mapped[pde_index], -+ pte_index * sizeof(u32)); ++ child->state = MALI_GROUP_STATE_ACTIVATION_PENDING; + -+ MALI_DEBUG_PRINT(2, ("\tMMU: %08x: Page table present: %08x\n" -+ "\t\tPTE: %08x, page %08x is %s\n", -+ fault_addr, pte_addr, pte, -+ MALI_MMU_ENTRY_ADDRESS(pte), -+ pte & MALI_MMU_FLAGS_DEFAULT ? "rw" : "not present")); ++ MALI_DEBUG_ASSERT_POINTER( ++ child->pm_domain); ++ domains[num_domains] = child->pm_domain; ++ groups[num_domains] = child; ++ num_domains++; ++ ++ /* ++ * Take L2 domain ref for child group. ++ */ ++ MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS ++ > num_domains); ++ domains[num_domains] = mali_l2_cache_get_pm_domain( ++ child->l2_cache_core[0]); ++ groups[num_domains] = NULL; ++ MALI_DEBUG_ASSERT(NULL == ++ child->l2_cache_core[1]); ++ num_domains++; ++ } ++ } else { ++ /* Take L2 domain ref for physical groups. */ ++ MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS > ++ num_domains); ++ ++ domains[num_domains] = mali_l2_cache_get_pm_domain( ++ group->l2_cache_core[0]); ++ groups[num_domains] = NULL; ++ MALI_DEBUG_ASSERT(NULL == group->l2_cache_core[1]); ++ num_domains++; ++ } ++ ++ /* Do the group itself last (it's dependencies first) */ ++ ++ group->state = MALI_GROUP_STATE_ACTIVATION_PENDING; ++ ++ MALI_DEBUG_ASSERT_POINTER(group->pm_domain); ++ domains[num_domains] = group->pm_domain; ++ groups[num_domains] = group; ++ num_domains++; ++ ++ all_groups_on = mali_pm_get_domain_refs(domains, groups, ++ num_domains); ++ ++ /* ++ * Complete activation for group, include ++ * virtual group or physical group. ++ */ ++ if (MALI_TRUE == all_groups_on) { ++ ++ mali_group_set_active(group); ++ } ++ } else if (MALI_GROUP_STATE_ACTIVE == group->state) { ++ /* Already active */ ++ MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on); + } else { -+ MALI_DEBUG_PRINT(2, ("\tMMU: %08x: Page table not present: %08x\n", -+ fault_addr, pde)); ++ /* ++ * Activation already pending, group->power_is_on could ++ * be both true or false. We need to wait for power up ++ * notification anyway. ++ */ ++ MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVATION_PENDING ++ == group->state); + } -+#else -+ MALI_IGNORE(pagedir); -+ MALI_IGNORE(fault_addr); -+#endif -+} + -+/* For instrumented */ -+struct dump_info { -+ u32 buffer_left; -+ u32 register_writes_size; -+ u32 page_table_dump_size; -+ u32 *buffer; -+}; ++ MALI_DEBUG_PRINT(4, ("Group: group %s activation result: %s\n", ++ mali_group_core_description(group), ++ MALI_GROUP_STATE_ACTIVE == group->state ? ++ "ACTIVE" : "PENDING")); + -+static _mali_osk_errcode_t writereg(u32 where, u32 what, const char *comment, struct dump_info *info) ++ return group->state; ++} ++ ++mali_bool mali_group_set_active(struct mali_group *group) +{ -+ if (NULL != info) { -+ info->register_writes_size += sizeof(u32) * 2; /* two 32-bit words */ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVATION_PENDING == group->state); ++ MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on); + -+ if (NULL != info->buffer) { -+ /* check that we have enough space */ -+ if (info->buffer_left < sizeof(u32) * 2) MALI_ERROR(_MALI_OSK_ERR_NOMEM); ++ MALI_DEBUG_PRINT(4, ("Group: Activation completed for %s\n", ++ mali_group_core_description(group))); + -+ *info->buffer = where; -+ info->buffer++; ++ if (mali_group_is_virtual(group)) { ++ struct mali_group *child; ++ struct mali_group *temp; + -+ *info->buffer = what; -+ info->buffer++; ++ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, ++ struct mali_group, group_list) { ++ if (MALI_TRUE != child->power_is_on) { ++ return MALI_FALSE; ++ } + -+ info->buffer_left -= sizeof(u32) * 2; ++ child->state = MALI_GROUP_STATE_ACTIVE; + } ++ ++ mali_group_reset(group); + } + -+ MALI_SUCCESS; ++ /* Go to ACTIVE state */ ++ group->state = MALI_GROUP_STATE_ACTIVE; ++ ++ return MALI_TRUE; +} + -+static _mali_osk_errcode_t mali_mmu_dump_page(mali_io_address page, u32 phys_addr, struct dump_info *info) ++mali_bool mali_group_deactivate(struct mali_group *group) +{ -+ if (NULL != info) { -+ /* 4096 for the page and 4 bytes for the address */ -+ const u32 page_size_in_elements = MALI_MMU_PAGE_SIZE / 4; -+ const u32 page_size_in_bytes = MALI_MMU_PAGE_SIZE; -+ const u32 dump_size_in_bytes = MALI_MMU_PAGE_SIZE + 4; ++ struct mali_pm_domain *domains[MALI_MAX_NUM_DOMAIN_REFS]; ++ u32 num_domains = 0; ++ mali_bool power_down = MALI_FALSE; + -+ info->page_table_dump_size += dump_size_in_bytes; ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ MALI_DEBUG_ASSERT(MALI_GROUP_STATE_INACTIVE != group->state); + -+ if (NULL != info->buffer) { -+ if (info->buffer_left < dump_size_in_bytes) MALI_ERROR(_MALI_OSK_ERR_NOMEM); ++ MALI_DEBUG_PRINT(3, ("Group: Deactivating group %s\n", ++ mali_group_core_description(group))); + -+ *info->buffer = phys_addr; -+ info->buffer++; ++ group->state = MALI_GROUP_STATE_INACTIVE; + -+ _mali_osk_memcpy(info->buffer, page, page_size_in_bytes); -+ info->buffer += page_size_in_elements; ++ MALI_DEBUG_ASSERT_POINTER(group->pm_domain); ++ domains[num_domains] = group->pm_domain; ++ num_domains++; + -+ info->buffer_left -= dump_size_in_bytes; ++ if (mali_group_is_virtual(group)) { ++ /* Release refs for all child groups */ ++ struct mali_group *child; ++ struct mali_group *temp; ++ ++ _MALI_OSK_LIST_FOREACHENTRY(child, temp, ++ &group->group_list, ++ struct mali_group, group_list) { ++ child->state = MALI_GROUP_STATE_INACTIVE; ++ ++ MALI_DEBUG_ASSERT_POINTER(child->pm_domain); ++ domains[num_domains] = child->pm_domain; ++ num_domains++; ++ ++ /* Release L2 cache domain for child groups */ ++ MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS > ++ num_domains); ++ domains[num_domains] = mali_l2_cache_get_pm_domain( ++ child->l2_cache_core[0]); ++ MALI_DEBUG_ASSERT(NULL == child->l2_cache_core[1]); ++ num_domains++; + } ++ ++ /* ++ * Must do mali_group_power_down() steps right here for ++ * virtual group, because virtual group itself is likely to ++ * stay powered on, however child groups are now very likely ++ * to be powered off (and thus lose their state). ++ */ ++ ++ mali_group_clear_session(group); ++ /* ++ * Disable the broadcast unit (clear it's mask). ++ * This is needed in case the GPU isn't actually ++ * powered down at this point and groups are ++ * removed from an inactive virtual group. ++ * If not, then the broadcast unit will intercept ++ * their interrupts! ++ */ ++ mali_bcast_disable(group->bcast_core); ++ } else { ++ /* Release L2 cache domain for physical groups */ ++ MALI_DEBUG_ASSERT(MALI_MAX_NUM_DOMAIN_REFS > ++ num_domains); ++ domains[num_domains] = mali_l2_cache_get_pm_domain( ++ group->l2_cache_core[0]); ++ MALI_DEBUG_ASSERT(NULL == group->l2_cache_core[1]); ++ num_domains++; + } + -+ MALI_SUCCESS; ++ power_down = mali_pm_put_domain_refs(domains, num_domains); ++ ++ return power_down; +} + -+static _mali_osk_errcode_t dump_mmu_page_table(struct mali_page_directory *pagedir, struct dump_info *info) ++void mali_group_power_up(struct mali_group *group) +{ -+ MALI_DEBUG_ASSERT_POINTER(pagedir); -+ MALI_DEBUG_ASSERT_POINTER(info); ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); + -+ if (NULL != pagedir->page_directory_mapped) { -+ int i; ++ MALI_DEBUG_PRINT(3, ("Group: Power up for %s\n", ++ mali_group_core_description(group))); + -+ MALI_CHECK_NO_ERROR( -+ mali_mmu_dump_page(pagedir->page_directory_mapped, pagedir->page_directory, info) -+ ); ++ group->power_is_on = MALI_TRUE; + -+ for (i = 0; i < 1024; i++) { -+ if (NULL != pagedir->page_entries_mapped[i]) { -+ MALI_CHECK_NO_ERROR( -+ mali_mmu_dump_page(pagedir->page_entries_mapped[i], -+ _mali_osk_mem_ioread32(pagedir->page_directory_mapped, -+ i * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK, info) -+ ); -+ } -+ } ++ if (MALI_FALSE == mali_group_is_virtual(group) ++ && MALI_FALSE == mali_group_is_in_virtual(group)) { ++ mali_group_reset(group); + } + -+ MALI_SUCCESS; ++ /* ++ * When we just acquire only one physical group form virt group, ++ * we should remove the bcast&dlbu mask from virt group and ++ * reset bcast and dlbu core, although part of pp cores in virt ++ * group maybe not be powered on. ++ */ ++ if (MALI_TRUE == mali_group_is_virtual(group)) { ++ mali_bcast_reset(group->bcast_core); ++ mali_dlbu_update_mask(group->dlbu_core); ++ } +} + -+static _mali_osk_errcode_t dump_mmu_registers(struct mali_page_directory *pagedir, struct dump_info *info) ++void mali_group_power_down(struct mali_group *group) +{ -+ MALI_CHECK_NO_ERROR(writereg(0x00000000, pagedir->page_directory, -+ "set the page directory address", info)); -+ MALI_CHECK_NO_ERROR(writereg(0x00000008, 4, "zap???", info)); -+ MALI_CHECK_NO_ERROR(writereg(0x00000008, 0, "enable paging", info)); -+ MALI_SUCCESS; ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT(MALI_TRUE == group->power_is_on); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ ++ MALI_DEBUG_PRINT(3, ("Group: Power down for %s\n", ++ mali_group_core_description(group))); ++ ++ group->power_is_on = MALI_FALSE; ++ ++ if (mali_group_is_virtual(group)) { ++ /* ++ * What we do for physical jobs in this function should ++ * already have been done in mali_group_deactivate() ++ * for virtual group. ++ */ ++ MALI_DEBUG_ASSERT(NULL == group->session); ++ } else { ++ mali_group_clear_session(group); ++ } +} + -+_mali_osk_errcode_t _mali_ukk_query_mmu_page_table_dump_size(_mali_uk_query_mmu_page_table_dump_size_s *args) ++MALI_DEBUG_CODE(static void mali_group_print_virtual(struct mali_group *vgroup) +{ -+ struct dump_info info = { 0, 0, 0, NULL }; -+ struct mali_session_data *session_data; ++ u32 i; ++ struct mali_group *group; ++ struct mali_group *temp; + -+ session_data = (struct mali_session_data *)(uintptr_t)(args->ctx); -+ MALI_DEBUG_ASSERT_POINTER(session_data); -+ MALI_DEBUG_ASSERT_POINTER(args); ++ MALI_DEBUG_PRINT(4, ("Virtual group %s (%p)\n", ++ mali_group_core_description(vgroup), ++ vgroup)); ++ MALI_DEBUG_PRINT(4, ("l2_cache_core[0] = %p, ref = %d\n", vgroup->l2_cache_core[0], vgroup->l2_cache_core_ref_count[0])); ++ MALI_DEBUG_PRINT(4, ("l2_cache_core[1] = %p, ref = %d\n", vgroup->l2_cache_core[1], vgroup->l2_cache_core_ref_count[1])); + -+ MALI_CHECK_NO_ERROR(dump_mmu_registers(session_data->page_directory, &info)); -+ MALI_CHECK_NO_ERROR(dump_mmu_page_table(session_data->page_directory, &info)); -+ args->size = info.register_writes_size + info.page_table_dump_size; -+ MALI_SUCCESS; -+} ++ i = 0; ++ _MALI_OSK_LIST_FOREACHENTRY(group, temp, &vgroup->group_list, struct mali_group, group_list) { ++ MALI_DEBUG_PRINT(4, ("[%d] %s (%p), l2_cache_core[0] = %p\n", ++ i, mali_group_core_description(group), ++ group, group->l2_cache_core[0])); ++ i++; ++ } ++}) + -+_mali_osk_errcode_t _mali_ukk_dump_mmu_page_table(_mali_uk_dump_mmu_page_table_s *args) ++static void mali_group_dump_core_status(struct mali_group *group) +{ -+ struct dump_info info = { 0, 0, 0, NULL }; -+ struct mali_session_data *session_data; ++ u32 i; + -+ MALI_DEBUG_ASSERT_POINTER(args); ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT(NULL != group->gp_core || (NULL != group->pp_core && !mali_group_is_virtual(group))); + -+ session_data = (struct mali_session_data *)(uintptr_t)(args->ctx); -+ MALI_DEBUG_ASSERT_POINTER(session_data); ++ if (NULL != group->gp_core) { ++ MALI_PRINT(("Dump Group %s\n", group->gp_core->hw_core.description)); + -+ info.buffer_left = args->size; -+ info.buffer = (u32 *)(uintptr_t)args->buffer; ++ for (i = 0; i < 0xA8; i += 0x10) { ++ MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->gp_core->hw_core, i), ++ mali_hw_core_register_read(&group->gp_core->hw_core, i + 4), ++ mali_hw_core_register_read(&group->gp_core->hw_core, i + 8), ++ mali_hw_core_register_read(&group->gp_core->hw_core, i + 12))); ++ } + -+ args->register_writes = (uintptr_t)info.buffer; -+ MALI_CHECK_NO_ERROR(dump_mmu_registers(session_data->page_directory, &info)); + -+ args->page_table_dump = (uintptr_t)info.buffer; -+ MALI_CHECK_NO_ERROR(dump_mmu_page_table(session_data->page_directory, &info)); ++ } else { ++ MALI_PRINT(("Dump Group %s\n", group->pp_core->hw_core.description)); + -+ args->register_writes_size = info.register_writes_size; -+ args->page_table_dump_size = info.page_table_dump_size; ++ for (i = 0; i < 0x5c; i += 0x10) { ++ MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->pp_core->hw_core, i), ++ mali_hw_core_register_read(&group->pp_core->hw_core, i + 4), ++ mali_hw_core_register_read(&group->pp_core->hw_core, i + 8), ++ mali_hw_core_register_read(&group->pp_core->hw_core, i + 12))); ++ } + -+ MALI_SUCCESS; -+} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_mmu_page_directory.h b/drivers/gpu/arm/mali400/mali/common/mali_mmu_page_directory.h -new file mode 100644 -index 000000000..3fdf07210 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_mmu_page_directory.h -@@ -0,0 +1,110 @@ -+/* -+ * Copyright (C) 2011-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ /* Ignore some minor registers */ ++ for (i = 0x1000; i < 0x1068; i += 0x10) { ++ MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->pp_core->hw_core, i), ++ mali_hw_core_register_read(&group->pp_core->hw_core, i + 4), ++ mali_hw_core_register_read(&group->pp_core->hw_core, i + 8), ++ mali_hw_core_register_read(&group->pp_core->hw_core, i + 12))); ++ } ++ } + -+#ifndef __MALI_MMU_PAGE_DIRECTORY_H__ -+#define __MALI_MMU_PAGE_DIRECTORY_H__ ++ MALI_PRINT(("Dump Group MMU\n")); ++ for (i = 0; i < 0x24; i += 0x10) { ++ MALI_PRINT(("0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i, mali_hw_core_register_read(&group->mmu->hw_core, i), ++ mali_hw_core_register_read(&group->mmu->hw_core, i + 4), ++ mali_hw_core_register_read(&group->mmu->hw_core, i + 8), ++ mali_hw_core_register_read(&group->mmu->hw_core, i + 12))); ++ } ++} + -+#include "mali_osk.h" + +/** -+ * Size of an MMU page in bytes ++ * @Dump group status + */ -+#define MALI_MMU_PAGE_SIZE 0x1000 ++void mali_group_dump_status(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); + -+/* -+ * Size of the address space referenced by a page table page -+ */ -+#define MALI_MMU_VIRTUAL_PAGE_SIZE 0x400000 /* 4 MiB */ ++ if (mali_group_is_virtual(group)) { ++ struct mali_group *group_c; ++ struct mali_group *temp; ++ _MALI_OSK_LIST_FOREACHENTRY(group_c, temp, &group->group_list, struct mali_group, group_list) { ++ mali_group_dump_core_status(group_c); ++ } ++ } else { ++ mali_group_dump_core_status(group); ++ } ++} + +/** -+ * Page directory index from address -+ * Calculates the page directory index from the given address ++ * @brief Add child group to virtual group parent + */ -+#define MALI_MMU_PDE_ENTRY(address) (((address)>>22) & 0x03FF) ++void mali_group_add_group(struct mali_group *parent, struct mali_group *child) ++{ ++ mali_bool found; ++ u32 i; + -+/** -+ * Page table index from address -+ * Calculates the page table index from the given address -+ */ -+#define MALI_MMU_PTE_ENTRY(address) (((address)>>12) & 0x03FF) ++ MALI_DEBUG_PRINT(3, ("Adding group %s to virtual group %s\n", ++ mali_group_core_description(child), ++ mali_group_core_description(parent))); + -+/** -+ * Extract the memory address from an PDE/PTE entry -+ */ -+#define MALI_MMU_ENTRY_ADDRESS(value) ((value) & 0xFFFFFC00) ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ MALI_DEBUG_ASSERT(mali_group_is_virtual(parent)); ++ MALI_DEBUG_ASSERT(!mali_group_is_virtual(child)); ++ MALI_DEBUG_ASSERT(NULL == child->parent_group); + -+#define MALI_INVALID_PAGE ((u32)(~0)) ++ _mali_osk_list_addtail(&child->group_list, &parent->group_list); + -+/** -+ * -+ */ -+typedef enum mali_mmu_entry_flags { -+ MALI_MMU_FLAGS_PRESENT = 0x01, -+ MALI_MMU_FLAGS_READ_PERMISSION = 0x02, -+ MALI_MMU_FLAGS_WRITE_PERMISSION = 0x04, -+ MALI_MMU_FLAGS_OVERRIDE_CACHE = 0x8, -+ MALI_MMU_FLAGS_WRITE_CACHEABLE = 0x10, -+ MALI_MMU_FLAGS_WRITE_ALLOCATE = 0x20, -+ MALI_MMU_FLAGS_WRITE_BUFFERABLE = 0x40, -+ MALI_MMU_FLAGS_READ_CACHEABLE = 0x80, -+ MALI_MMU_FLAGS_READ_ALLOCATE = 0x100, -+ MALI_MMU_FLAGS_MASK = 0x1FF, -+} mali_mmu_entry_flags; ++ child->parent_group = parent; + ++ MALI_DEBUG_ASSERT_POINTER(child->l2_cache_core[0]); + -+#define MALI_MMU_FLAGS_FORCE_GP_READ_ALLOCATE ( \ -+ MALI_MMU_FLAGS_PRESENT | \ -+ MALI_MMU_FLAGS_READ_PERMISSION | \ -+ MALI_MMU_FLAGS_WRITE_PERMISSION | \ -+ MALI_MMU_FLAGS_OVERRIDE_CACHE | \ -+ MALI_MMU_FLAGS_WRITE_CACHEABLE | \ -+ MALI_MMU_FLAGS_WRITE_BUFFERABLE | \ -+ MALI_MMU_FLAGS_READ_CACHEABLE | \ -+ MALI_MMU_FLAGS_READ_ALLOCATE ) ++ MALI_DEBUG_PRINT(4, ("parent->l2_cache_core: [0] = %p, [1] = %p\n", parent->l2_cache_core[0], parent->l2_cache_core[1])); ++ MALI_DEBUG_PRINT(4, ("child->l2_cache_core: [0] = %p, [1] = %p\n", child->l2_cache_core[0], child->l2_cache_core[1])); + -+#define MALI_MMU_FLAGS_DEFAULT ( \ -+ MALI_MMU_FLAGS_PRESENT | \ -+ MALI_MMU_FLAGS_READ_PERMISSION | \ -+ MALI_MMU_FLAGS_WRITE_PERMISSION ) ++ /* Keep track of the L2 cache cores of child groups */ ++ found = MALI_FALSE; ++ for (i = 0; i < 2; i++) { ++ if (parent->l2_cache_core[i] == child->l2_cache_core[0]) { ++ MALI_DEBUG_ASSERT(parent->l2_cache_core_ref_count[i] > 0); ++ parent->l2_cache_core_ref_count[i]++; ++ found = MALI_TRUE; ++ } ++ } + ++ if (!found) { ++ /* First time we see this L2 cache, add it to our list */ ++ i = (NULL == parent->l2_cache_core[0]) ? 0 : 1; + -+struct mali_page_directory { -+ u32 page_directory; /**< Physical address of the memory session's page directory */ -+ mali_io_address page_directory_mapped; /**< Pointer to the mapped version of the page directory into the kernel's address space */ ++ MALI_DEBUG_PRINT(4, ("First time we see l2_cache %p. Adding to [%d] = %p\n", child->l2_cache_core[0], i, parent->l2_cache_core[i])); + -+ mali_io_address page_entries_mapped[1024]; /**< Pointers to the page tables which exists in the page directory mapped into the kernel's address space */ -+ u32 page_entries_usage_count[1024]; /**< Tracks usage count of the page table pages, so they can be releases on the last reference */ -+}; ++ MALI_DEBUG_ASSERT(NULL == parent->l2_cache_core[i]); + -+/* Map Mali virtual address space (i.e. ensure page tables exist for the virtual range) */ -+_mali_osk_errcode_t mali_mmu_pagedir_map(struct mali_page_directory *pagedir, u32 mali_address, u32 size); -+_mali_osk_errcode_t mali_mmu_pagedir_unmap(struct mali_page_directory *pagedir, u32 mali_address, u32 size); ++ parent->l2_cache_core[i] = child->l2_cache_core[0]; ++ parent->l2_cache_core_ref_count[i]++; ++ } + -+/* Back virtual address space with actual pages. Assumes input is contiguous and 4k aligned. */ -+void mali_mmu_pagedir_update(struct mali_page_directory *pagedir, u32 mali_address, -+ mali_dma_addr phys_address, u32 size, u32 permission_bits); ++ /* Update Broadcast Unit and DLBU */ ++ mali_bcast_add_group(parent->bcast_core, child); ++ mali_dlbu_add_group(parent->dlbu_core, child); + -+u32 mali_allocate_empty_page(mali_io_address *virtual); -+void mali_free_empty_page(mali_dma_addr address, mali_io_address virt_addr); -+_mali_osk_errcode_t mali_create_fault_flush_pages(mali_dma_addr *page_directory, -+ mali_io_address *page_directory_mapping, -+ mali_dma_addr *page_table, mali_io_address *page_table_mapping, -+ mali_dma_addr *data_page, mali_io_address *data_page_mapping); -+void mali_destroy_fault_flush_pages( -+ mali_dma_addr *page_directory, mali_io_address *page_directory_mapping, -+ mali_dma_addr *page_table, mali_io_address *page_table_mapping, -+ mali_dma_addr *data_page, mali_io_address *data_page_mapping); ++ if (MALI_TRUE == parent->power_is_on) { ++ mali_bcast_reset(parent->bcast_core); ++ mali_dlbu_update_mask(parent->dlbu_core); ++ } + -+struct mali_page_directory *mali_mmu_pagedir_alloc(void); -+void mali_mmu_pagedir_free(struct mali_page_directory *pagedir); ++ if (MALI_TRUE == child->power_is_on) { ++ if (NULL == parent->session) { ++ if (NULL != child->session) { ++ /* ++ * Parent has no session, so clear ++ * child session as well. ++ */ ++ mali_mmu_activate_empty_page_directory(child->mmu); ++ } ++ } else { ++ if (parent->session == child->session) { ++ /* We already have same session as parent, ++ * so a simple zap should be enough. ++ */ ++ mali_mmu_zap_tlb(child->mmu); ++ } else { ++ /* ++ * Parent has a different session, so we must ++ * switch to that sessions page table ++ */ ++ mali_mmu_activate_page_directory(child->mmu, mali_session_get_page_directory(parent->session)); ++ } + -+void mali_mmu_pagedir_diag(struct mali_page_directory *pagedir, u32 fault_addr); ++ /* It is the parent which keeps the session from now on */ ++ child->session = NULL; ++ } ++ } else { ++ /* should have been cleared when child was powered down */ ++ MALI_DEBUG_ASSERT(NULL == child->session); ++ } + -+#endif /* __MALI_MMU_PAGE_DIRECTORY_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_osk.h b/drivers/gpu/arm/mali400/mali/common/mali_osk.h -new file mode 100644 -index 000000000..9ade362d6 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_osk.h -@@ -0,0 +1,1389 @@ -+/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ /* Start job on child when parent is active */ ++ if (NULL != parent->pp_running_job) { ++ struct mali_pp_job *job = parent->pp_running_job; + -+/** -+ * @file mali_osk.h -+ * Defines the OS abstraction layer for the kernel device driver (OSK) -+ */ ++ MALI_DEBUG_PRINT(3, ("Group %x joining running job %d on virtual group %x\n", ++ child, mali_pp_job_get_id(job), parent)); + -+#ifndef __MALI_OSK_H__ -+#define __MALI_OSK_H__ ++ /* Only allowed to add active child to an active parent */ ++ MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE == parent->state); ++ MALI_DEBUG_ASSERT(MALI_GROUP_STATE_ACTIVE == child->state); + -+#include -+#include "mali_osk_types.h" -+#include "mali_osk_specific.h" /* include any per-os specifics */ -+#include "mali_osk_locks.h" ++ mali_pp_job_start(child->pp_core, job, mali_pp_core_get_id(child->pp_core), MALI_TRUE); + -+#ifdef __cplusplus -+extern "C" { ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | ++ MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) | ++ MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH, ++ mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0); ++ ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | ++ MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) | ++ MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL, ++ mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0); ++#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS) ++ if (child->pp_core) { ++ trace_gpu_sched_switch( ++ mali_pp_core_description(child->pp_core), ++ sched_clock(), mali_pp_job_get_tid(job), ++ 0, mali_pp_job_get_id(job)); ++ } +#endif + -+/** -+ * @addtogroup uddapi Unified Device Driver (UDD) APIs -+ * -+ * @{ -+ */ ++#if defined(CONFIG_MALI400_PROFILING) ++ trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core), ++ mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job)); ++#endif ++ } ++ ++ MALI_DEBUG_CODE(mali_group_print_virtual(parent);) ++} + +/** -+ * @addtogroup oskapi UDD OS Abstraction for Kernel-side (OSK) APIs -+ * -+ * @{ ++ * @brief Remove child group from virtual group parent + */ ++void mali_group_remove_group(struct mali_group *parent, struct mali_group *child) ++{ ++ u32 i; + -+/** @addtogroup _mali_osk_lock OSK Mutual Exclusion Locks -+ * @{ */ ++ MALI_DEBUG_PRINT(3, ("Removing group %s from virtual group %s\n", ++ mali_group_core_description(child), ++ mali_group_core_description(parent))); + -+#ifdef DEBUG -+/** @brief Macro for asserting that the current thread holds a given lock -+ */ -+#define MALI_DEBUG_ASSERT_LOCK_HELD(l) MALI_DEBUG_ASSERT(_mali_osk_lock_get_owner((_mali_osk_lock_debug_t *)l) == _mali_osk_get_tid()); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ MALI_DEBUG_ASSERT(mali_group_is_virtual(parent)); ++ MALI_DEBUG_ASSERT(!mali_group_is_virtual(child)); ++ MALI_DEBUG_ASSERT(parent == child->parent_group); + -+/** @brief returns a lock's owner (thread id) if debugging is enabled -+ */ -+#else -+#define MALI_DEBUG_ASSERT_LOCK_HELD(l) do {} while(0) -+#endif ++ /* Update Broadcast Unit and DLBU */ ++ mali_bcast_remove_group(parent->bcast_core, child); ++ mali_dlbu_remove_group(parent->dlbu_core, child); + -+#define _mali_osk_ctxprintf seq_printf ++ if (MALI_TRUE == parent->power_is_on) { ++ mali_bcast_reset(parent->bcast_core); ++ mali_dlbu_update_mask(parent->dlbu_core); ++ } + -+/** @} */ /* end group _mali_osk_lock */ ++ child->session = parent->session; ++ child->parent_group = NULL; + -+/** @addtogroup _mali_osk_miscellaneous -+ * @{ */ ++ _mali_osk_list_delinit(&child->group_list); ++ if (_mali_osk_list_empty(&parent->group_list)) { ++ parent->session = NULL; ++ } + -+/** @brief Find the containing structure of another structure -+ * -+ * This is the reverse of the operation 'offsetof'. This means that the -+ * following condition is satisfied: -+ * -+ * ptr == _MALI_OSK_CONTAINER_OF( &ptr->member, type, member ) -+ * -+ * When ptr is of type 'type'. -+ * -+ * Its purpose it to recover a larger structure that has wrapped a smaller one. -+ * -+ * @note no type or memory checking occurs to ensure that a wrapper structure -+ * does in fact exist, and that it is being recovered with respect to the -+ * correct member. -+ * -+ * @param ptr the pointer to the member that is contained within the larger -+ * structure -+ * @param type the type of the structure that contains the member -+ * @param member the name of the member in the structure that ptr points to. -+ * @return a pointer to a \a type object which contains \a member, as pointed -+ * to by \a ptr. -+ */ -+#define _MALI_OSK_CONTAINER_OF(ptr, type, member) \ -+ ((type *)( ((char *)ptr) - offsetof(type,member) )) ++ /* Keep track of the L2 cache cores of child groups */ ++ i = (child->l2_cache_core[0] == parent->l2_cache_core[0]) ? 0 : 1; + -+/** @addtogroup _mali_osk_wq -+ * @{ */ ++ MALI_DEBUG_ASSERT(child->l2_cache_core[0] == parent->l2_cache_core[i]); + -+/** @brief Initialize work queues (for deferred work) -+ * -+ * @return _MALI_OSK_ERR_OK on success, otherwise failure. -+ */ -+_mali_osk_errcode_t _mali_osk_wq_init(void); ++ parent->l2_cache_core_ref_count[i]--; ++ if (parent->l2_cache_core_ref_count[i] == 0) { ++ parent->l2_cache_core[i] = NULL; ++ } + -+/** @brief Terminate work queues (for deferred work) -+ */ -+void _mali_osk_wq_term(void); ++ MALI_DEBUG_CODE(mali_group_print_virtual(parent)); ++} + -+/** @brief Create work in the work queue -+ * -+ * Creates a work object which can be scheduled in the work queue. When -+ * scheduled, \a handler will be called with \a data as the argument. -+ * -+ * Refer to \ref _mali_osk_wq_schedule_work() for details on how work -+ * is scheduled in the queue. -+ * -+ * The returned pointer must be freed with \ref _mali_osk_wq_delete_work() -+ * when no longer needed. -+ */ -+_mali_osk_wq_work_t *_mali_osk_wq_create_work(_mali_osk_wq_work_handler_t handler, void *data); ++struct mali_group *mali_group_acquire_group(struct mali_group *parent) ++{ ++ struct mali_group *child = NULL; + -+/** @brief A high priority version of \a _mali_osk_wq_create_work() -+ * -+ * Creates a work object which can be scheduled in the high priority work queue. -+ * -+ * This is unfortunately needed to get low latency scheduling of the Mali cores. Normally we would -+ * schedule the next job in hw_irq or tasklet, but often we can't since we need to synchronously map -+ * and unmap shared memory when a job is connected to external fences (timelines). And this requires -+ * taking a mutex. -+ * -+ * We do signal a lot of other (low priority) work also as part of the job being finished, and if we -+ * don't set this Mali scheduling thread as high priority, we see that the CPU scheduler often runs -+ * random things instead of starting the next GPU job when the GPU is idle. So setting the gpu -+ * scheduler to high priority does give a visually more responsive system. -+ * -+ * Start the high priority work with: \a _mali_osk_wq_schedule_work_high_pri() -+ */ -+_mali_osk_wq_work_t *_mali_osk_wq_create_work_high_pri(_mali_osk_wq_work_handler_t handler, void *data); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ MALI_DEBUG_ASSERT(mali_group_is_virtual(parent)); + -+/** @brief Delete a work object -+ * -+ * This will flush the work queue to ensure that the work handler will not -+ * be called after deletion. -+ */ -+void _mali_osk_wq_delete_work(_mali_osk_wq_work_t *work); ++ if (!_mali_osk_list_empty(&parent->group_list)) { ++ child = _MALI_OSK_LIST_ENTRY(parent->group_list.prev, struct mali_group, group_list); ++ mali_group_remove_group(parent, child); ++ } + -+/** @brief Delete a work object -+ * -+ * This will NOT flush the work queue, so only call this if you are sure that the work handler will -+ * not be called after deletion. -+ */ -+void _mali_osk_wq_delete_work_nonflush(_mali_osk_wq_work_t *work); ++ if (NULL != child) { ++ if (MALI_GROUP_STATE_ACTIVE != parent->state ++ && MALI_TRUE == child->power_is_on) { ++ mali_group_reset(child); ++ } ++ } + -+/** @brief Cause a queued, deferred call of the work handler -+ * -+ * _mali_osk_wq_schedule_work provides a mechanism for enqueuing deferred calls -+ * to the work handler. After calling \ref _mali_osk_wq_schedule_work(), the -+ * work handler will be scheduled to run at some point in the future. -+ * -+ * Typically this is called by the IRQ upper-half to defer further processing of -+ * IRQ-related work to the IRQ bottom-half handler. This is necessary for work -+ * that cannot be done in an IRQ context by the IRQ upper-half handler. Timer -+ * callbacks also use this mechanism, because they are treated as though they -+ * operate in an IRQ context. Refer to \ref _mali_osk_timer_t for more -+ * information. -+ * -+ * Code that operates in a kernel-process context (with no IRQ context -+ * restrictions) may also enqueue deferred calls to the IRQ bottom-half. The -+ * advantage over direct calling is that deferred calling allows the caller and -+ * IRQ bottom half to hold the same mutex, with a guarantee that they will not -+ * deadlock just by using this mechanism. -+ * -+ * _mali_osk_wq_schedule_work() places deferred call requests on a queue, to -+ * allow for more than one thread to make a deferred call. Therfore, if it is -+ * called 'K' times, then the IRQ bottom-half will be scheduled 'K' times too. -+ * 'K' is a number that is implementation-specific. -+ * -+ * _mali_osk_wq_schedule_work() is guaranteed to not block on: -+ * - enqueuing a deferred call request. -+ * - the completion of the work handler. -+ * -+ * This is to prevent deadlock. For example, if _mali_osk_wq_schedule_work() -+ * blocked, then it would cause a deadlock when the following two conditions -+ * hold: -+ * - The work handler callback (of type _mali_osk_wq_work_handler_t) locks -+ * a mutex -+ * - And, at the same time, the caller of _mali_osk_wq_schedule_work() also -+ * holds the same mutex -+ * -+ * @note care must be taken to not overflow the queue that -+ * _mali_osk_wq_schedule_work() operates on. Code must be structured to -+ * ensure that the number of requests made to the queue is bounded. Otherwise, -+ * work will be lost. -+ * -+ * The queue that _mali_osk_wq_schedule_work implements is a FIFO of N-writer, -+ * 1-reader type. The writers are the callers of _mali_osk_wq_schedule_work -+ * (all OSK-registered IRQ upper-half handlers in the system, watchdog timers, -+ * callers from a Kernel-process context). The reader is a single thread that -+ * handles all OSK-registered work. -+ * -+ * @param work a pointer to the _mali_osk_wq_work_t object corresponding to the -+ * work to begin processing. -+ */ -+void _mali_osk_wq_schedule_work(_mali_osk_wq_work_t *work); ++ return child; ++} + -+/** @brief Cause a queued, deferred call of the high priority work handler -+ * -+ * Function is the same as \a _mali_osk_wq_schedule_work() with the only -+ * difference that it runs in a high (real time) priority on the system. -+ * -+ * Should only be used as a substitue for doing the same work in interrupts. -+ * -+ * This is allowed to sleep, but the work should be small since it will block -+ * all other applications. -+*/ -+void _mali_osk_wq_schedule_work_high_pri(_mali_osk_wq_work_t *work); ++void mali_group_reset(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ MALI_DEBUG_ASSERT(NULL == group->gp_running_job); ++ MALI_DEBUG_ASSERT(NULL == group->pp_running_job); + -+/** @brief Flush the work queue -+ * -+ * This will flush the OSK work queue, ensuring all work in the queue has -+ * completed before returning. -+ * -+ * Since this blocks on the completion of work in the work-queue, the -+ * caller of this function \b must \b not hold any mutexes that are taken by -+ * any registered work handler. To do so may cause a deadlock. -+ * -+ */ -+void _mali_osk_wq_flush(void); ++ MALI_DEBUG_PRINT(3, ("Group: reset of %s\n", ++ mali_group_core_description(group))); + -+/** @brief Create work in the delayed work queue -+ * -+ * Creates a work object which can be scheduled in the work queue. When -+ * scheduled, a timer will be start and the \a handler will be called with -+ * \a data as the argument when timer out -+ * -+ * Refer to \ref _mali_osk_wq_delayed_schedule_work() for details on how work -+ * is scheduled in the queue. -+ * -+ * The returned pointer must be freed with \ref _mali_osk_wq_delayed_delete_work_nonflush() -+ * when no longer needed. -+ */ -+_mali_osk_wq_delayed_work_t *_mali_osk_wq_delayed_create_work(_mali_osk_wq_work_handler_t handler, void *data); ++ if (NULL != group->dlbu_core) { ++ mali_dlbu_reset(group->dlbu_core); ++ } + -+/** @brief Delete a work object -+ * -+ * This will NOT flush the work queue, so only call this if you are sure that the work handler will -+ * not be called after deletion. -+ */ -+void _mali_osk_wq_delayed_delete_work_nonflush(_mali_osk_wq_delayed_work_t *work); ++ if (NULL != group->bcast_core) { ++ mali_bcast_reset(group->bcast_core); ++ } + -+/** @brief Cancel a delayed work without waiting for it to finish -+ * -+ * Note that the \a work callback function may still be running on return from -+ * _mali_osk_wq_delayed_cancel_work_async(). -+ * -+ * @param work The delayed work to be cancelled -+ */ -+void _mali_osk_wq_delayed_cancel_work_async(_mali_osk_wq_delayed_work_t *work); ++ MALI_DEBUG_ASSERT(NULL != group->mmu); ++ mali_group_reset_mmu(group); + -+/** @brief Cancel a delayed work and wait for it to finish -+ * -+ * When this function returns, the \a work was either cancelled or it finished running. -+ * -+ * @param work The delayed work to be cancelled -+ */ -+void _mali_osk_wq_delayed_cancel_work_sync(_mali_osk_wq_delayed_work_t *work); ++ if (NULL != group->gp_core) { ++ MALI_DEBUG_ASSERT(NULL == group->pp_core); ++ mali_gp_reset(group->gp_core); ++ } else { ++ MALI_DEBUG_ASSERT(NULL != group->pp_core); ++ mali_group_reset_pp(group); ++ } ++} + -+/** @brief Put \a work task in global workqueue after delay -+ * -+ * After waiting for a given time this puts a job in the kernel-global -+ * workqueue. -+ * -+ * If \a work was already on a queue, this function will return without doing anything -+ * -+ * @param work job to be done -+ * @param delay number of jiffies to wait or 0 for immediate execution -+ */ -+void _mali_osk_wq_delayed_schedule_work(_mali_osk_wq_delayed_work_t *work, u32 delay); ++void mali_group_start_gp_job(struct mali_group *group, struct mali_gp_job *job, mali_bool gpu_secure_mode_pre_enabled) ++{ ++ struct mali_session_data *session; + -+/** @} */ /* end group _mali_osk_wq */ ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); + ++ MALI_DEBUG_PRINT(3, ("Group: Starting GP job 0x%08X on group %s\n", ++ job, ++ mali_group_core_description(group))); + -+/** @addtogroup _mali_osk_irq -+ * @{ */ ++ session = mali_gp_job_get_session(job); + -+/** @brief Initialize IRQ handling for a resource -+ * -+ * Registers an interrupt handler \a uhandler for the given IRQ number \a irqnum. -+ * \a data will be passed as argument to the handler when an interrupt occurs. -+ * -+ * If \a irqnum is -1, _mali_osk_irq_init will probe for the IRQ number using -+ * the supplied \a trigger_func and \a ack_func. These functions will also -+ * receive \a data as their argument. -+ * -+ * @param irqnum The IRQ number that the resource uses, as seen by the CPU. -+ * The value -1 has a special meaning which indicates the use of probing, and -+ * trigger_func and ack_func must be non-NULL. -+ * @param uhandler The interrupt handler, corresponding to a ISR handler for -+ * the resource -+ * @param int_data resource specific data, which will be passed to uhandler -+ * @param trigger_func Optional: a function to trigger the resource's irq, to -+ * probe for the interrupt. Use NULL if irqnum != -1. -+ * @param ack_func Optional: a function to acknowledge the resource's irq, to -+ * probe for the interrupt. Use NULL if irqnum != -1. -+ * @param probe_data resource-specific data, which will be passed to -+ * (if present) trigger_func and ack_func -+ * @param description textual description of the IRQ resource. -+ * @return on success, a pointer to a _mali_osk_irq_t object, which represents -+ * the IRQ handling on this resource. NULL on failure. -+ */ -+_mali_osk_irq_t *_mali_osk_irq_init(u32 irqnum, _mali_osk_irq_uhandler_t uhandler, void *int_data, _mali_osk_irq_trigger_t trigger_func, _mali_osk_irq_ack_t ack_func, void *probe_data, const char *description); ++ MALI_DEBUG_ASSERT_POINTER(group->l2_cache_core[0]); ++ mali_l2_cache_invalidate_conditional(group->l2_cache_core[0], mali_gp_job_get_cache_order(job)); + -+/** @brief Terminate IRQ handling on a resource. -+ * -+ * This will disable the interrupt from the device, and then waits for any -+ * currently executing IRQ handlers to complete. -+ * -+ * @note If work is deferred to an IRQ bottom-half handler through -+ * \ref _mali_osk_wq_schedule_work(), be sure to flush any remaining work -+ * with \ref _mali_osk_wq_flush() or (implicitly) with \ref _mali_osk_wq_delete_work() -+ * -+ * @param irq a pointer to the _mali_osk_irq_t object corresponding to the -+ * resource whose IRQ handling is to be terminated. -+ */ -+void _mali_osk_irq_term(_mali_osk_irq_t *irq); ++ /* Reset GPU and disable gpu secure mode if needed. */ ++ if (MALI_TRUE == _mali_osk_gpu_secure_mode_is_enabled()) { ++ struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core(); ++ _mali_osk_gpu_reset_and_secure_mode_disable(); ++ /* Need to disable the pmu interrupt mask register */ ++ if (NULL != pmu) { ++ mali_pmu_reset(pmu); ++ } ++ } + -+/** @} */ /* end group _mali_osk_irq */ ++ /* Reload mmu page table if needed */ ++ if (MALI_TRUE == gpu_secure_mode_pre_enabled) { ++ mali_group_reset(group); ++ mali_group_activate_page_directory(group, session, MALI_TRUE); ++ } else { ++ mali_group_activate_page_directory(group, session, MALI_FALSE); ++ } + ++ mali_gp_job_start(group->gp_core, job); + -+/** @addtogroup _mali_osk_atomic -+ * @{ */ ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | ++ MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0) | ++ MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH, ++ mali_gp_job_get_frame_builder_id(job), mali_gp_job_get_flush_id(job), 0, 0, 0); ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | ++ MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0), ++ mali_gp_job_get_pid(job), mali_gp_job_get_tid(job), 0, 0, 0); + -+/** @brief Decrement an atomic counter -+ * -+ * @note It is an error to decrement the counter beyond -(1<<23) -+ * -+ * @param atom pointer to an atomic counter */ -+void _mali_osk_atomic_dec(_mali_osk_atomic_t *atom); ++#if defined(CONFIG_MALI400_PROFILING) ++ trace_mali_core_active(mali_gp_job_get_pid(job), 1 /* active */, 1 /* GP */, 0 /* core */, ++ mali_gp_job_get_frame_builder_id(job), mali_gp_job_get_flush_id(job)); ++#endif + -+/** @brief Decrement an atomic counter, return new value -+ * -+ * @param atom pointer to an atomic counter -+ * @return The new value, after decrement */ -+u32 _mali_osk_atomic_dec_return(_mali_osk_atomic_t *atom); ++#if defined(CONFIG_MALI400_PROFILING) ++ if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) && ++ (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) { ++ mali_group_report_l2_cache_counters_per_core(group, 0); ++ } ++#endif /* #if defined(CONFIG_MALI400_PROFILING) */ + -+/** @brief Increment an atomic counter -+ * -+ * @note It is an error to increment the counter beyond (1<<23)-1 -+ * -+ * @param atom pointer to an atomic counter */ -+void _mali_osk_atomic_inc(_mali_osk_atomic_t *atom); ++#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS) ++ if (group->gp_core) { ++ trace_gpu_sched_switch(mali_gp_core_description(group->gp_core), ++ sched_clock(), mali_gp_job_get_tid(job), ++ 0, mali_gp_job_get_id(job)); ++ } ++#endif + -+/** @brief Increment an atomic counter, return new value -+ * -+ * @param atom pointer to an atomic counter */ -+u32 _mali_osk_atomic_inc_return(_mali_osk_atomic_t *atom); ++ group->gp_running_job = job; ++ group->is_working = MALI_TRUE; + -+/** @brief Initialize an atomic counter -+ * -+ * @note the parameter required is a u32, and so signed integers should be -+ * cast to u32. -+ * -+ * @param atom pointer to an atomic counter -+ * @param val the value to initialize the atomic counter. -+ */ -+void _mali_osk_atomic_init(_mali_osk_atomic_t *atom, u32 val); ++ /* Setup SW timer and record start time */ ++ group->start_time = _mali_osk_time_tickcount(); ++ _mali_osk_timer_mod(group->timeout_timer, _mali_osk_time_mstoticks(mali_max_job_runtime)); + -+/** @brief Read a value from an atomic counter -+ * -+ * This can only be safely used to determine the value of the counter when it -+ * is guaranteed that other threads will not be modifying the counter. This -+ * makes its usefulness limited. -+ * -+ * @param atom pointer to an atomic counter -+ */ -+u32 _mali_osk_atomic_read(_mali_osk_atomic_t *atom); ++ MALI_DEBUG_PRINT(4, ("Group: Started GP job 0x%08X on group %s at %u\n", ++ job, ++ mali_group_core_description(group), ++ group->start_time)); ++} + -+/** @brief Terminate an atomic counter -+ * -+ * @param atom pointer to an atomic counter ++/* Used to set all the registers except frame renderer list address and fragment shader stack address ++ * It means the caller must set these two registers properly before calling this function + */ -+void _mali_osk_atomic_term(_mali_osk_atomic_t *atom); ++void mali_group_start_pp_job(struct mali_group *group, struct mali_pp_job *job, u32 sub_job, mali_bool gpu_secure_mode_pre_enabled) ++{ ++ struct mali_session_data *session; + -+/** @brief Assign a new val to atomic counter, and return the old atomic counter -+ * -+ * @param atom pointer to an atomic counter -+ * @param val the new value assign to the atomic counter -+ * @return the old value of the atomic counter -+ */ -+u32 _mali_osk_atomic_xchg(_mali_osk_atomic_t *atom, u32 val); -+/** @} */ /* end group _mali_osk_atomic */ ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); + ++ MALI_DEBUG_PRINT(3, ("Group: Starting PP job 0x%08X part %u/%u on group %s\n", ++ job, sub_job + 1, ++ mali_pp_job_get_sub_job_count(job), ++ mali_group_core_description(group))); + -+/** @defgroup _mali_osk_memory OSK Memory Allocation -+ * @{ */ ++ session = mali_pp_job_get_session(job); + -+/** @brief Allocate zero-initialized memory. -+ * -+ * Returns a buffer capable of containing at least \a n elements of \a size -+ * bytes each. The buffer is initialized to zero. -+ * -+ * If there is a need for a bigger block of memory (16KB or bigger), then -+ * consider to use _mali_osk_vmalloc() instead, as this function might -+ * map down to a OS function with size limitations. -+ * -+ * The buffer is suitably aligned for storage and subsequent access of every -+ * type that the compiler supports. Therefore, the pointer to the start of the -+ * buffer may be cast into any pointer type, and be subsequently accessed from -+ * such a pointer, without loss of information. -+ * -+ * When the buffer is no longer in use, it must be freed with _mali_osk_free(). -+ * Failure to do so will cause a memory leak. -+ * -+ * @note Most toolchains supply memory allocation functions that meet the -+ * compiler's alignment requirements. -+ * -+ * @param n Number of elements to allocate -+ * @param size Size of each element -+ * @return On success, the zero-initialized buffer allocated. NULL on failure -+ */ -+void *_mali_osk_calloc(u32 n, u32 size); ++ if (NULL != group->l2_cache_core[0]) { ++ mali_l2_cache_invalidate_conditional(group->l2_cache_core[0], mali_pp_job_get_cache_order(job)); ++ } + -+/** @brief Allocate memory. -+ * -+ * Returns a buffer capable of containing at least \a size bytes. The -+ * contents of the buffer are undefined. -+ * -+ * If there is a need for a bigger block of memory (16KB or bigger), then -+ * consider to use _mali_osk_vmalloc() instead, as this function might -+ * map down to a OS function with size limitations. -+ * -+ * The buffer is suitably aligned for storage and subsequent access of every -+ * type that the compiler supports. Therefore, the pointer to the start of the -+ * buffer may be cast into any pointer type, and be subsequently accessed from -+ * such a pointer, without loss of information. -+ * -+ * When the buffer is no longer in use, it must be freed with _mali_osk_free(). -+ * Failure to do so will cause a memory leak. -+ * -+ * @note Most toolchains supply memory allocation functions that meet the -+ * compiler's alignment requirements. -+ * -+ * Remember to free memory using _mali_osk_free(). -+ * @param size Number of bytes to allocate -+ * @return On success, the buffer allocated. NULL on failure. -+ */ -+void *_mali_osk_malloc(u32 size); ++ if (NULL != group->l2_cache_core[1]) { ++ mali_l2_cache_invalidate_conditional(group->l2_cache_core[1], mali_pp_job_get_cache_order(job)); ++ } + -+/** @brief Free memory. -+ * -+ * Reclaims the buffer pointed to by the parameter \a ptr for the system. -+ * All memory returned from _mali_osk_malloc() and _mali_osk_calloc() -+ * must be freed before the application exits. Otherwise, -+ * a memory leak will occur. -+ * -+ * Memory must be freed once. It is an error to free the same non-NULL pointer -+ * more than once. -+ * -+ * It is legal to free the NULL pointer. -+ * -+ * @param ptr Pointer to buffer to free -+ */ -+void _mali_osk_free(void *ptr); ++ /* Reset GPU and change gpu secure mode if needed. */ ++ if (MALI_TRUE == mali_pp_job_is_protected_job(job) && MALI_FALSE == _mali_osk_gpu_secure_mode_is_enabled()) { ++ struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core(); ++ _mali_osk_gpu_reset_and_secure_mode_enable(); ++ /* Need to disable the pmu interrupt mask register */ ++ if (NULL != pmu) { ++ mali_pmu_reset(pmu); ++ } ++ } else if (MALI_FALSE == mali_pp_job_is_protected_job(job) && MALI_TRUE == _mali_osk_gpu_secure_mode_is_enabled()) { ++ struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core(); ++ _mali_osk_gpu_reset_and_secure_mode_disable(); ++ /* Need to disable the pmu interrupt mask register */ ++ if (NULL != pmu) { ++ mali_pmu_reset(pmu); ++ } ++ } + -+/** @brief Allocate memory. -+ * -+ * Returns a buffer capable of containing at least \a size bytes. The -+ * contents of the buffer are undefined. -+ * -+ * This function is potentially slower than _mali_osk_malloc() and _mali_osk_calloc(), -+ * but do support bigger sizes. -+ * -+ * The buffer is suitably aligned for storage and subsequent access of every -+ * type that the compiler supports. Therefore, the pointer to the start of the -+ * buffer may be cast into any pointer type, and be subsequently accessed from -+ * such a pointer, without loss of information. -+ * -+ * When the buffer is no longer in use, it must be freed with _mali_osk_free(). -+ * Failure to do so will cause a memory leak. -+ * -+ * @note Most toolchains supply memory allocation functions that meet the -+ * compiler's alignment requirements. -+ * -+ * Remember to free memory using _mali_osk_free(). -+ * @param size Number of bytes to allocate -+ * @return On success, the buffer allocated. NULL on failure. -+ */ -+void *_mali_osk_valloc(u32 size); ++ /* Reload the mmu page table if needed */ ++ if ((MALI_TRUE == mali_pp_job_is_protected_job(job) && MALI_FALSE == gpu_secure_mode_pre_enabled) ++ || (MALI_FALSE == mali_pp_job_is_protected_job(job) && MALI_TRUE == gpu_secure_mode_pre_enabled)) { ++ mali_group_reset(group); ++ mali_group_activate_page_directory(group, session, MALI_TRUE); ++ } else { ++ mali_group_activate_page_directory(group, session, MALI_FALSE); ++ } + -+/** @brief Free memory. -+ * -+ * Reclaims the buffer pointed to by the parameter \a ptr for the system. -+ * All memory returned from _mali_osk_valloc() must be freed before the -+ * application exits. Otherwise a memory leak will occur. -+ * -+ * Memory must be freed once. It is an error to free the same non-NULL pointer -+ * more than once. -+ * -+ * It is legal to free the NULL pointer. -+ * -+ * @param ptr Pointer to buffer to free -+ */ -+void _mali_osk_vfree(void *ptr); ++ if (mali_group_is_virtual(group)) { ++ struct mali_group *child; ++ struct mali_group *temp; ++ u32 core_num = 0; + -+/** @brief Copies memory. -+ * -+ * Copies the \a len bytes from the buffer pointed by the parameter \a src -+ * directly to the buffer pointed by \a dst. -+ * -+ * It is an error for \a src to overlap \a dst anywhere in \a len bytes. -+ * -+ * @param dst Pointer to the destination array where the content is to be -+ * copied. -+ * @param src Pointer to the source of data to be copied. -+ * @param len Number of bytes to copy. -+ * @return \a dst is always passed through unmodified. -+ */ -+void *_mali_osk_memcpy(void *dst, const void *src, u32 len); ++ MALI_DEBUG_ASSERT(mali_pp_job_is_virtual(job)); + -+/** @brief Fills memory. -+ * -+ * Sets the first \a n bytes of the block of memory pointed to by \a s to -+ * the specified value -+ * @param s Pointer to the block of memory to fill. -+ * @param c Value to be set, passed as u32. Only the 8 Least Significant Bits (LSB) -+ * are used. -+ * @param n Number of bytes to be set to the value. -+ * @return \a s is always passed through unmodified -+ */ -+void *_mali_osk_memset(void *s, u32 c, u32 n); -+/** @} */ /* end group _mali_osk_memory */ ++ /* Configure DLBU for the job */ ++ mali_dlbu_config_job(group->dlbu_core, job); + ++ /* Write stack address for each child group */ ++ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) { ++ mali_pp_write_addr_stack(child->pp_core, job); ++ core_num++; ++ } + -+/** @brief Checks the amount of memory allocated -+ * -+ * Checks that not more than \a max_allocated bytes are allocated. -+ * -+ * Some OS bring up an interactive out of memory dialogue when the -+ * system runs out of memory. This can stall non-interactive -+ * apps (e.g. automated test runs). This function can be used to -+ * not trigger the OOM dialogue by keeping allocations -+ * within a certain limit. -+ * -+ * @return MALI_TRUE when \a max_allocated bytes are not in use yet. MALI_FALSE -+ * when at least \a max_allocated bytes are in use. -+ */ -+mali_bool _mali_osk_mem_check_allocated(u32 max_allocated); ++ mali_pp_job_start(group->pp_core, job, sub_job, MALI_FALSE); ++ } else { ++ mali_pp_job_start(group->pp_core, job, sub_job, MALI_FALSE); ++ } + ++ /* if the group is virtual, loop through physical groups which belong to this group ++ * and call profiling events for its cores as virtual */ ++ if (MALI_TRUE == mali_group_is_virtual(group)) { ++ struct mali_group *child; ++ struct mali_group *temp; + -+/** @addtogroup _mali_osk_low_level_memory -+ * @{ */ ++ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) { ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | ++ MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) | ++ MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH, ++ mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0); + -+/** @brief Issue a memory barrier -+ * -+ * This defines an arbitrary memory barrier operation, which forces an ordering constraint -+ * on memory read and write operations. -+ */ -+void _mali_osk_mem_barrier(void); ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | ++ MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) | ++ MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL, ++ mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0); + -+/** @brief Issue a write memory barrier -+ * -+ * This defines an write memory barrier operation which forces an ordering constraint -+ * on memory write operations. -+ */ -+void _mali_osk_write_mem_barrier(void); ++#if defined(CONFIG_MALI400_PROFILING) ++ trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core), ++ mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job)); ++#endif ++ } + -+/** @brief Map a physically contiguous region into kernel space -+ * -+ * This is primarily used for mapping in registers from resources, and Mali-MMU -+ * page tables. The mapping is only visable from kernel-space. -+ * -+ * Access has to go through _mali_osk_mem_ioread32 and _mali_osk_mem_iowrite32 -+ * -+ * @param phys CPU-physical base address of the memory to map in. This must -+ * be aligned to the system's page size, which is assumed to be 4K. -+ * @param size the number of bytes of physically contiguous address space to -+ * map in -+ * @param description A textual description of the memory being mapped in. -+ * @return On success, a Mali IO address through which the mapped-in -+ * memory/registers can be accessed. NULL on failure. -+ */ -+mali_io_address _mali_osk_mem_mapioregion(uintptr_t phys, u32 size, const char *description); ++#if defined(CONFIG_MALI400_PROFILING) ++ if (0 != group->l2_cache_core_ref_count[0]) { ++ if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) && ++ (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) { ++ mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0])); ++ } ++ } ++ if (0 != group->l2_cache_core_ref_count[1]) { ++ if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[1])) && ++ (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[1]))) { ++ mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[1])); ++ } ++ } ++#endif /* #if defined(CONFIG_MALI400_PROFILING) */ + -+/** @brief Unmap a physically contiguous address range from kernel space. -+ * -+ * The address range should be one previously mapped in through -+ * _mali_osk_mem_mapioregion. -+ * -+ * It is a programming error to do (but not limited to) the following: -+ * - attempt an unmap twice -+ * - unmap only part of a range obtained through _mali_osk_mem_mapioregion -+ * - unmap more than the range obtained through _mali_osk_mem_mapioregion -+ * - unmap an address range that was not successfully mapped using -+ * _mali_osk_mem_mapioregion -+ * - provide a mapping that does not map to phys. -+ * -+ * @param phys CPU-physical base address of the memory that was originally -+ * mapped in. This must be aligned to the system's page size, which is assumed -+ * to be 4K -+ * @param size The number of bytes that were originally mapped in. -+ * @param mapping The Mali IO address through which the mapping is -+ * accessed. -+ */ -+void _mali_osk_mem_unmapioregion(uintptr_t phys, u32 size, mali_io_address mapping); ++ } else { /* group is physical - call profiling events for physical cores */ ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | ++ MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) | ++ MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH, ++ mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job), 0, 0, 0); + -+/** @brief Allocate and Map a physically contiguous region into kernel space -+ * -+ * This is used for allocating physically contiguous regions (such as Mali-MMU -+ * page tables) and mapping them into kernel space. The mapping is only -+ * visible from kernel-space. -+ * -+ * The alignment of the returned memory is guaranteed to be at least -+ * _MALI_OSK_CPU_PAGE_SIZE. -+ * -+ * Access must go through _mali_osk_mem_ioread32 and _mali_osk_mem_iowrite32 -+ * -+ * @note This function is primarily to provide support for OSs that are -+ * incapable of separating the tasks 'allocate physically contiguous memory' -+ * and 'map it into kernel space' -+ * -+ * @param[out] phys CPU-physical base address of memory that was allocated. -+ * (*phys) will be guaranteed to be aligned to at least -+ * _MALI_OSK_CPU_PAGE_SIZE on success. -+ * -+ * @param[in] size the number of bytes of physically contiguous memory to -+ * allocate. This must be a multiple of _MALI_OSK_CPU_PAGE_SIZE. -+ * -+ * @return On success, a Mali IO address through which the mapped-in -+ * memory/registers can be accessed. NULL on failure, and (*phys) is unmodified. -+ */ -+mali_io_address _mali_osk_mem_allocioregion(u32 *phys, u32 size); ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | ++ MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) | ++ MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL, ++ mali_pp_job_get_pid(job), mali_pp_job_get_tid(job), 0, 0, 0); + -+/** @brief Free a physically contiguous address range from kernel space. -+ * -+ * The address range should be one previously mapped in through -+ * _mali_osk_mem_allocioregion. -+ * -+ * It is a programming error to do (but not limited to) the following: -+ * - attempt a free twice on the same ioregion -+ * - free only part of a range obtained through _mali_osk_mem_allocioregion -+ * - free more than the range obtained through _mali_osk_mem_allocioregion -+ * - free an address range that was not successfully mapped using -+ * _mali_osk_mem_allocioregion -+ * - provide a mapping that does not map to phys. -+ * -+ * @param phys CPU-physical base address of the memory that was originally -+ * mapped in, which was aligned to _MALI_OSK_CPU_PAGE_SIZE. -+ * @param size The number of bytes that were originally mapped in, which was -+ * a multiple of _MALI_OSK_CPU_PAGE_SIZE. -+ * @param mapping The Mali IO address through which the mapping is -+ * accessed. -+ */ -+void _mali_osk_mem_freeioregion(u32 phys, u32 size, mali_io_address mapping); ++#if defined(CONFIG_MALI400_PROFILING) ++ trace_mali_core_active(mali_pp_job_get_pid(job), 1 /* active */, 0 /* PP */, mali_pp_core_get_id(group->pp_core), ++ mali_pp_job_get_frame_builder_id(job), mali_pp_job_get_flush_id(job)); ++#endif + -+/** @brief Request a region of physically contiguous memory -+ * -+ * This is used to ensure exclusive access to a region of physically contigous -+ * memory. -+ * -+ * It is acceptable to implement this as a stub. However, it is then the job -+ * of the System Integrator to ensure that no other device driver will be using -+ * the physical address ranges used by Mali, while the Mali device driver is -+ * loaded. -+ * -+ * @param phys CPU-physical base address of the memory to request. This must -+ * be aligned to the system's page size, which is assumed to be 4K. -+ * @param size the number of bytes of physically contiguous address space to -+ * request. -+ * @param description A textual description of the memory being requested. -+ * @return _MALI_OSK_ERR_OK on success. Otherwise, a suitable -+ * _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_osk_mem_reqregion(uintptr_t phys, u32 size, const char *description); ++#if defined(CONFIG_MALI400_PROFILING) ++ if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) && ++ (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) { ++ mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0])); ++ } ++#endif /* #if defined(CONFIG_MALI400_PROFILING) */ ++ } + -+/** @brief Un-request a region of physically contiguous memory -+ * -+ * This is used to release a regious of physically contiguous memory previously -+ * requested through _mali_osk_mem_reqregion, so that other device drivers may -+ * use it. This will be called at time of Mali device driver termination. -+ * -+ * It is a programming error to attempt to: -+ * - unrequest a region twice -+ * - unrequest only part of a range obtained through _mali_osk_mem_reqregion -+ * - unrequest more than the range obtained through _mali_osk_mem_reqregion -+ * - unrequest an address range that was not successfully requested using -+ * _mali_osk_mem_reqregion -+ * -+ * @param phys CPU-physical base address of the memory to un-request. This must -+ * be aligned to the system's page size, which is assumed to be 4K -+ * @param size the number of bytes of physically contiguous address space to -+ * un-request. -+ */ -+void _mali_osk_mem_unreqregion(uintptr_t phys, u32 size); ++#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS) ++ if (group->pp_core) { ++ trace_gpu_sched_switch(mali_pp_core_description(group->pp_core), ++ sched_clock(), mali_pp_job_get_tid(job), ++ 0, mali_pp_job_get_id(job)); ++ } ++#endif + -+/** @brief Read from a location currently mapped in through -+ * _mali_osk_mem_mapioregion -+ * -+ * This reads a 32-bit word from a 32-bit aligned location. It is a programming -+ * error to provide unaligned locations, or to read from memory that is not -+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or -+ * _mali_osk_mem_allocioregion(). -+ * -+ * @param mapping Mali IO address to read from -+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4 -+ * @return the 32-bit word from the specified location. -+ */ -+u32 _mali_osk_mem_ioread32(volatile mali_io_address mapping, u32 offset); ++ group->pp_running_job = job; ++ group->pp_running_sub_job = sub_job; ++ group->is_working = MALI_TRUE; + -+/** @brief Write to a location currently mapped in through -+ * _mali_osk_mem_mapioregion without memory barriers -+ * -+ * This write a 32-bit word to a 32-bit aligned location without using memory barrier. -+ * It is a programming error to provide unaligned locations, or to write to memory that is not -+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or -+ * _mali_osk_mem_allocioregion(). -+ * -+ * @param mapping Mali IO address to write to -+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4 -+ * @param val the 32-bit word to write. -+ */ -+void _mali_osk_mem_iowrite32_relaxed(volatile mali_io_address addr, u32 offset, u32 val); ++ /* Setup SW timer and record start time */ ++ group->start_time = _mali_osk_time_tickcount(); ++ _mali_osk_timer_mod(group->timeout_timer, _mali_osk_time_mstoticks(mali_max_job_runtime)); + -+/** @brief Write to a location currently mapped in through -+ * _mali_osk_mem_mapioregion with write memory barrier -+ * -+ * This write a 32-bit word to a 32-bit aligned location. It is a programming -+ * error to provide unaligned locations, or to write to memory that is not -+ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or -+ * _mali_osk_mem_allocioregion(). -+ * -+ * @param mapping Mali IO address to write to -+ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4 -+ * @param val the 32-bit word to write. -+ */ -+void _mali_osk_mem_iowrite32(volatile mali_io_address mapping, u32 offset, u32 val); ++ MALI_DEBUG_PRINT(4, ("Group: Started PP job 0x%08X part %u/%u on group %s at %u\n", ++ job, sub_job + 1, ++ mali_pp_job_get_sub_job_count(job), ++ mali_group_core_description(group), ++ group->start_time)); + -+/** @brief Flush all CPU caches -+ * -+ * This should only be implemented if flushing of the cache is required for -+ * memory mapped in through _mali_osk_mem_mapregion. -+ */ -+void _mali_osk_cache_flushall(void); ++} + -+/** @brief Flush any caches necessary for the CPU and MALI to have the same view of a range of uncached mapped memory -+ * -+ * This should only be implemented if your OS doesn't do a full cache flush (inner & outer) -+ * after allocating uncached mapped memory. -+ * -+ * Some OS do not perform a full cache flush (including all outer caches) for uncached mapped memory. -+ * They zero the memory through a cached mapping, then flush the inner caches but not the outer caches. -+ * This is required for MALI to have the correct view of the memory. -+ */ -+void _mali_osk_cache_ensure_uncached_range_flushed(void *uncached_mapping, u32 offset, u32 size); ++void mali_group_resume_gp_with_new_heap(struct mali_group *group, u32 job_id, u32 start_addr, u32 end_addr) ++{ ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); + -+/** @brief Safely copy as much data as possible from src to dest -+ * -+ * Do not crash if src or dest isn't available. -+ * -+ * @param dest Destination buffer (limited to user space mapped Mali memory) -+ * @param src Source buffer -+ * @param size Number of bytes to copy -+ * @return Number of bytes actually copied -+ */ -+u32 _mali_osk_mem_write_safe(void *dest, const void *src, u32 size); ++ MALI_DEBUG_ASSERT_POINTER(group->l2_cache_core[0]); ++ mali_l2_cache_invalidate(group->l2_cache_core[0]); + -+/** @} */ /* end group _mali_osk_low_level_memory */ ++ mali_mmu_zap_tlb_without_stall(group->mmu); + ++ mali_gp_resume_with_new_heap(group->gp_core, start_addr, end_addr); + -+/** @addtogroup _mali_osk_notification -+ * -+ * User space notification framework -+ * -+ * Communication with user space of asynchronous events is performed through a -+ * synchronous call to the \ref u_k_api. -+ * -+ * Since the events are asynchronous, the events have to be queued until a -+ * synchronous U/K API call can be made by user-space. A U/K API call might also -+ * be received before any event has happened. Therefore the notifications the -+ * different subsystems wants to send to user space has to be queued for later -+ * reception, or a U/K API call has to be blocked until an event has occured. -+ * -+ * Typical uses of notifications are after running of jobs on the hardware or -+ * when changes to the system is detected that needs to be relayed to user -+ * space. -+ * -+ * After an event has occured user space has to be notified using some kind of -+ * message. The notification framework supports sending messages to waiting -+ * threads or queueing of messages until a U/K API call is made. -+ * -+ * The notification queue is a FIFO. There are no restrictions on the numbers -+ * of readers or writers in the queue. -+ * -+ * A message contains what user space needs to identifiy how to handle an -+ * event. This includes a type field and a possible type specific payload. -+ * -+ * A notification to user space is represented by a -+ * \ref _mali_osk_notification_t object. A sender gets hold of such an object -+ * using _mali_osk_notification_create(). The buffer given by the -+ * _mali_osk_notification_t::result_buffer field in the object is used to store -+ * any type specific data. The other fields are internal to the queue system -+ * and should not be touched. -+ * -+ * @{ */ ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_RESUME | ++ MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0), ++ 0, 0, 0, 0, 0); + -+/** @brief Create a notification object -+ * -+ * Returns a notification object which can be added to the queue of -+ * notifications pending for user space transfer. -+ * -+ * The implementation will initialize all members of the -+ * \ref _mali_osk_notification_t object. In particular, the -+ * _mali_osk_notification_t::result_buffer member will be initialized to point -+ * to \a size bytes of storage, and that storage will be suitably aligned for -+ * storage of any structure. That is, the created buffer meets the same -+ * requirements as _mali_osk_malloc(). -+ * -+ * The notification object must be deleted when not in use. Use -+ * _mali_osk_notification_delete() for deleting it. -+ * -+ * @note You \b must \b not call _mali_osk_free() on a \ref _mali_osk_notification_t, -+ * object, or on a _mali_osk_notification_t::result_buffer. You must only use -+ * _mali_osk_notification_delete() to free the resources assocaited with a -+ * \ref _mali_osk_notification_t object. -+ * -+ * @param type The notification type -+ * @param size The size of the type specific buffer to send -+ * @return Pointer to a notification object with a suitable buffer, or NULL on error. -+ */ -+_mali_osk_notification_t *_mali_osk_notification_create(u32 type, u32 size); ++#if defined(CONFIG_MALI400_PROFILING) ++ trace_mali_core_active(mali_gp_job_get_pid(group->gp_running_job), 1 /* active */, 1 /* GP */, 0 /* core */, ++ mali_gp_job_get_frame_builder_id(group->gp_running_job), mali_gp_job_get_flush_id(group->gp_running_job)); ++#endif ++} + -+/** @brief Delete a notification object -+ * -+ * This must be called to reclaim the resources of a notification object. This -+ * includes: -+ * - The _mali_osk_notification_t::result_buffer -+ * - The \ref _mali_osk_notification_t itself. -+ * -+ * A notification object \b must \b not be used after it has been deleted by -+ * _mali_osk_notification_delete(). -+ * -+ * In addition, the notification object may not be deleted while it is in a -+ * queue. That is, if it has been placed on a queue with -+ * _mali_osk_notification_queue_send(), then it must not be deleted until -+ * it has been received by a call to _mali_osk_notification_queue_receive(). -+ * Otherwise, the queue may be corrupted. -+ * -+ * @param object the notification object to delete. -+ */ -+void _mali_osk_notification_delete(_mali_osk_notification_t *object); ++static void mali_group_reset_mmu(struct mali_group *group) ++{ ++ struct mali_group *child; ++ struct mali_group *temp; ++ _mali_osk_errcode_t err; + -+/** @brief Create a notification queue -+ * -+ * Creates a notification queue which can be used to queue messages for user -+ * delivery and get queued messages from -+ * -+ * The queue is a FIFO, and has no restrictions on the numbers of readers or -+ * writers. -+ * -+ * When the queue is no longer in use, it must be terminated with -+ * \ref _mali_osk_notification_queue_term(). Failure to do so will result in a -+ * memory leak. -+ * -+ * @return Pointer to a new notification queue or NULL on error. -+ */ -+_mali_osk_notification_queue_t *_mali_osk_notification_queue_init(void); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); + -+/** @brief Destroy a notification queue -+ * -+ * Destroys a notification queue and frees associated resources from the queue. -+ * -+ * A notification queue \b must \b not be destroyed in the following cases: -+ * - while there are \ref _mali_osk_notification_t objects in the queue. -+ * - while there are writers currently acting upon the queue. That is, while -+ * a thread is currently calling \ref _mali_osk_notification_queue_send() on -+ * the queue, or while a thread may call -+ * \ref _mali_osk_notification_queue_send() on the queue in the future. -+ * - while there are readers currently waiting upon the queue. That is, while -+ * a thread is currently calling \ref _mali_osk_notification_queue_receive() on -+ * the queue, or while a thread may call -+ * \ref _mali_osk_notification_queue_receive() on the queue in the future. -+ * -+ * Therefore, all \ref _mali_osk_notification_t objects must be flushed and -+ * deleted by the code that makes use of the notification queues, since only -+ * they know the structure of the _mali_osk_notification_t::result_buffer -+ * (even if it may only be a flat sturcture). -+ * -+ * @note Since the queue is a FIFO, the code using notification queues may -+ * create its own 'flush' type of notification, to assist in flushing the -+ * queue. -+ * -+ * Once the queue has been destroyed, it must not be used again. -+ * -+ * @param queue The queue to destroy -+ */ -+void _mali_osk_notification_queue_term(_mali_osk_notification_queue_t *queue); ++ if (!mali_group_is_virtual(group)) { ++ /* This is a physical group or an idle virtual group -- simply wait for ++ * the reset to complete. */ ++ err = mali_mmu_reset(group->mmu); ++ MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err); ++ } else { /* virtual group */ ++ /* Loop through all members of this virtual group and wait ++ * until they are done resetting. ++ */ ++ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) { ++ err = mali_mmu_reset(child->mmu); ++ MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err); ++ } ++ } ++} + -+/** @brief Schedule notification for delivery -+ * -+ * When a \ref _mali_osk_notification_t object has been created successfully -+ * and set up, it may be added to the queue of objects waiting for user space -+ * transfer. -+ * -+ * The sending will not block if the queue is full. -+ * -+ * A \ref _mali_osk_notification_t object \b must \b not be put on two different -+ * queues at the same time, or enqueued twice onto a single queue before -+ * reception. However, it is acceptable for it to be requeued \em after reception -+ * from a call to _mali_osk_notification_queue_receive(), even onto the same queue. -+ * -+ * Again, requeuing must also not enqueue onto two different queues at the same -+ * time, or enqueue onto the same queue twice before reception. -+ * -+ * @param queue The notification queue to add this notification to -+ * @param object The entry to add -+ */ -+void _mali_osk_notification_queue_send(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t *object); ++static void mali_group_reset_pp(struct mali_group *group) ++{ ++ struct mali_group *child; ++ struct mali_group *temp; + -+/** @brief Receive a notification from a queue -+ * -+ * Receives a single notification from the given queue. -+ * -+ * If no notifciations are ready the thread will sleep until one becomes ready. -+ * Therefore, notifications may not be received into an -+ * IRQ or 'atomic' context (that is, a context where sleeping is disallowed). -+ * -+ * @param queue The queue to receive from -+ * @param result Pointer to storage of a pointer of type -+ * \ref _mali_osk_notification_t*. \a result will be written to such that the -+ * expression \a (*result) will evaluate to a pointer to a valid -+ * \ref _mali_osk_notification_t object, or NULL if none were received. -+ * @return _MALI_OSK_ERR_OK on success. _MALI_OSK_ERR_RESTARTSYSCALL if the sleep was interrupted. -+ */ -+_mali_osk_errcode_t _mali_osk_notification_queue_receive(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); + -+/** @brief Dequeues a notification from a queue -+ * -+ * Receives a single notification from the given queue. -+ * -+ * If no notifciations are ready the function call will return an error code. -+ * -+ * @param queue The queue to receive from -+ * @param result Pointer to storage of a pointer of type -+ * \ref _mali_osk_notification_t*. \a result will be written to such that the -+ * expression \a (*result) will evaluate to a pointer to a valid -+ * \ref _mali_osk_notification_t object, or NULL if none were received. -+ * @return _MALI_OSK_ERR_OK on success, _MALI_OSK_ERR_ITEM_NOT_FOUND if queue was empty. -+ */ -+_mali_osk_errcode_t _mali_osk_notification_queue_dequeue(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result); ++ mali_pp_reset_async(group->pp_core); + -+/** @} */ /* end group _mali_osk_notification */ ++ if (!mali_group_is_virtual(group) || NULL == group->pp_running_job) { ++ /* This is a physical group or an idle virtual group -- simply wait for ++ * the reset to complete. */ ++ mali_pp_reset_wait(group->pp_core); ++ } else { ++ /* Loop through all members of this virtual group and wait until they ++ * are done resetting. ++ */ ++ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) { ++ mali_pp_reset_wait(child->pp_core); ++ } ++ } ++} + ++struct mali_pp_job *mali_group_complete_pp(struct mali_group *group, mali_bool success, u32 *sub_job) ++{ ++ struct mali_pp_job *pp_job_to_return; + -+/** @addtogroup _mali_osk_timer -+ * -+ * Timers use the OS's representation of time, which are 'ticks'. This is to -+ * prevent aliasing problems between the internal timer time, and the time -+ * asked for. -+ * -+ * @{ */ ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(group->pp_core); ++ MALI_DEBUG_ASSERT_POINTER(group->pp_running_job); ++ MALI_DEBUG_ASSERT_POINTER(sub_job); ++ MALI_DEBUG_ASSERT(MALI_TRUE == group->is_working); + -+/** @brief Initialize a timer -+ * -+ * Allocates resources for a new timer, and initializes them. This does not -+ * start the timer. -+ * -+ * @return a pointer to the allocated timer object, or NULL on failure. -+ */ -+_mali_osk_timer_t *_mali_osk_timer_init(_mali_osk_timer_callback_t callback); ++ /* Stop/clear the timeout timer. */ ++ _mali_osk_timer_del_async(group->timeout_timer); + -+/** @brief Start a timer -+ * -+ * It is an error to start a timer without setting the callback via -+ * _mali_osk_timer_setcallback(). -+ * -+ * It is an error to use this to start an already started timer. -+ * -+ * The timer will expire in \a ticks_to_expire ticks, at which point, the -+ * callback function will be invoked with the callback-specific data, -+ * as registered by _mali_osk_timer_setcallback(). -+ * -+ * @param tim the timer to start -+ * @param ticks_to_expire the amount of time in ticks for the timer to run -+ * before triggering. -+ */ -+void _mali_osk_timer_add(_mali_osk_timer_t *tim, unsigned long ticks_to_expire); ++ if (NULL != group->pp_running_job) { + -+/** @brief Modify a timer -+ * -+ * Set the relative time at which a timer will expire, and start it if it is -+ * stopped. If \a ticks_to_expire 0 the timer fires immediately. -+ * -+ * It is an error to modify a timer without setting the callback via -+ * _mali_osk_timer_setcallback(). -+ * -+ * The timer will expire at \a ticks_to_expire from the time of the call, at -+ * which point, the callback function will be invoked with the -+ * callback-specific data, as set by _mali_osk_timer_setcallback(). -+ * -+ * @param tim the timer to modify, and start if necessary -+ * @param ticks_to_expire the \em absolute time in ticks at which this timer -+ * should trigger. -+ * -+ */ -+void _mali_osk_timer_mod(_mali_osk_timer_t *tim, unsigned long ticks_to_expire); ++ /* Deal with HW counters and profiling */ + -+/** @brief Stop a timer, and block on its completion. -+ * -+ * Stop the timer. When the function returns, it is guaranteed that the timer's -+ * callback will not be running on any CPU core. -+ * -+ * Since stoping the timer blocks on compeletion of the callback, the callback -+ * may not obtain any mutexes that the caller holds. Otherwise, a deadlock will -+ * occur. -+ * -+ * @note While the callback itself is guaranteed to not be running, work -+ * enqueued on the work-queue by the timer (with -+ * \ref _mali_osk_wq_schedule_work()) may still run. The timer callback and -+ * work handler must take this into account. -+ * -+ * It is legal to stop an already stopped timer. -+ * -+ * @param tim the timer to stop. -+ * -+ */ -+void _mali_osk_timer_del(_mali_osk_timer_t *tim); ++ if (MALI_TRUE == mali_group_is_virtual(group)) { ++ struct mali_group *child; ++ struct mali_group *temp; + -+/** @brief Stop a timer. -+ * -+ * Stop the timer. When the function returns, the timer's callback may still be -+ * running on any CPU core. -+ * -+ * It is legal to stop an already stopped timer. -+ * -+ * @param tim the timer to stop. -+ */ -+void _mali_osk_timer_del_async(_mali_osk_timer_t *tim); ++ /* update performance counters from each physical pp core within this virtual group */ ++ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) { ++ mali_pp_update_performance_counters(group->pp_core, child->pp_core, group->pp_running_job, mali_pp_core_get_id(child->pp_core)); ++ } + -+/** @brief Check if timer is pending. -+ * -+ * Check if timer is active. -+ * -+ * @param tim the timer to check -+ * @return MALI_TRUE if time is active, MALI_FALSE if it is not active -+ */ -+mali_bool _mali_osk_timer_pending(_mali_osk_timer_t *tim); ++#if defined(CONFIG_MALI400_PROFILING) ++ /* send profiling data per physical core */ ++ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) { ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | ++ MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(child->pp_core)) | ++ MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL, ++ mali_pp_job_get_perf_counter_value0(group->pp_running_job, mali_pp_core_get_id(child->pp_core)), ++ mali_pp_job_get_perf_counter_value1(group->pp_running_job, mali_pp_core_get_id(child->pp_core)), ++ mali_pp_job_get_perf_counter_src0(group->pp_running_job, group->pp_running_sub_job) | (mali_pp_job_get_perf_counter_src1(group->pp_running_job, group->pp_running_sub_job) << 8), ++ 0, 0); + -+/** @brief Set a timer's callback parameters. -+ * -+ * This must be called at least once before a timer is started/modified. -+ * -+ * After a timer has been stopped or expires, the callback remains set. This -+ * means that restarting the timer will call the same function with the same -+ * parameters on expiry. -+ * -+ * @param tim the timer to set callback on. -+ * @param callback Function to call when timer expires -+ * @param data Function-specific data to supply to the function on expiry. -+ */ -+void _mali_osk_timer_setcallback(_mali_osk_timer_t *tim, _mali_osk_timer_callback_t callback, void *data); ++ trace_mali_core_active(mali_pp_job_get_pid(group->pp_running_job), ++ 0 /* active */, 0 /* PP */, mali_pp_core_get_id(child->pp_core), ++ mali_pp_job_get_frame_builder_id(group->pp_running_job), ++ mali_pp_job_get_flush_id(group->pp_running_job)); ++ } ++ if (0 != group->l2_cache_core_ref_count[0]) { ++ if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) && ++ (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) { ++ mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0])); ++ } ++ } ++ if (0 != group->l2_cache_core_ref_count[1]) { ++ if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[1])) && ++ (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[1]))) { ++ mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[1])); ++ } ++ } + -+/** @brief Terminate a timer, and deallocate resources. -+ * -+ * The timer must first be stopped by calling _mali_osk_timer_del(). -+ * -+ * It is a programming error for _mali_osk_timer_term() to be called on: -+ * - timer that is currently running -+ * - a timer that is currently executing its callback. -+ * -+ * @param tim the timer to deallocate. -+ */ -+void _mali_osk_timer_term(_mali_osk_timer_t *tim); -+/** @} */ /* end group _mali_osk_timer */ ++#endif ++ } else { ++ /* update performance counters for a physical group's pp core */ ++ mali_pp_update_performance_counters(group->pp_core, group->pp_core, group->pp_running_job, group->pp_running_sub_job); + ++#if defined(CONFIG_MALI400_PROFILING) ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | ++ MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(mali_pp_core_get_id(group->pp_core)) | ++ MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL, ++ mali_pp_job_get_perf_counter_value0(group->pp_running_job, group->pp_running_sub_job), ++ mali_pp_job_get_perf_counter_value1(group->pp_running_job, group->pp_running_sub_job), ++ mali_pp_job_get_perf_counter_src0(group->pp_running_job, group->pp_running_sub_job) | (mali_pp_job_get_perf_counter_src1(group->pp_running_job, group->pp_running_sub_job) << 8), ++ 0, 0); + -+/** @defgroup _mali_osk_time OSK Time functions -+ * -+ * \ref _mali_osk_time use the OS's representation of time, which are -+ * 'ticks'. This is to prevent aliasing problems between the internal timer -+ * time, and the time asked for. -+ * -+ * OS tick time is measured as a u32. The time stored in a u32 may either be -+ * an absolute time, or a time delta between two events. Whilst it is valid to -+ * use math opeartors to \em change the tick value represented as a u32, it -+ * is often only meaningful to do such operations on time deltas, rather than -+ * on absolute time. However, it is meaningful to add/subtract time deltas to -+ * absolute times. -+ * -+ * Conversion between tick time and milliseconds (ms) may not be loss-less, -+ * and are \em implementation \em depenedant. -+ * -+ * Code use OS time must take this into account, since: -+ * - a small OS time may (or may not) be rounded -+ * - a large time may (or may not) overflow -+ * -+ * @{ */ ++ trace_mali_core_active(mali_pp_job_get_pid(group->pp_running_job), ++ 0 /* active */, 0 /* PP */, mali_pp_core_get_id(group->pp_core), ++ mali_pp_job_get_frame_builder_id(group->pp_running_job), ++ mali_pp_job_get_flush_id(group->pp_running_job)); + -+/** @brief Return whether ticka occurs after or at the same time as tickb -+ * -+ * Systems where ticks can wrap must handle that. -+ * -+ * @param ticka ticka -+ * @param tickb tickb -+ * @return MALI_TRUE if ticka represents a time that occurs at or after tickb. -+ */ -+mali_bool _mali_osk_time_after_eq(unsigned long ticka, unsigned long tickb); ++ if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) && ++ (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) { ++ mali_group_report_l2_cache_counters_per_core(group, mali_l2_cache_get_id(group->l2_cache_core[0])); ++ } ++#endif ++ } + -+/** @brief Convert milliseconds to OS 'ticks' -+ * -+ * @param ms time interval in milliseconds -+ * @return the corresponding time interval in OS ticks. -+ */ -+unsigned long _mali_osk_time_mstoticks(u32 ms); ++#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS) ++ if (group->gp_core) { ++ trace_gpu_sched_switch( ++ mali_gp_core_description(group->gp_core), ++ sched_clock(), 0, 0, 0); ++ } ++#endif + -+/** @brief Convert OS 'ticks' to milliseconds -+ * -+ * @param ticks time interval in OS ticks. -+ * @return the corresponding time interval in milliseconds -+ */ -+u32 _mali_osk_time_tickstoms(unsigned long ticks); ++ } + ++ if (success) { ++ /* Only do soft reset for successful jobs, a full recovery ++ * reset will be done for failed jobs. */ ++ mali_pp_reset_async(group->pp_core); ++ } + -+/** @brief Get the current time in OS 'ticks'. -+ * @return the current time in OS 'ticks'. -+ */ -+unsigned long _mali_osk_time_tickcount(void); ++ pp_job_to_return = group->pp_running_job; ++ group->pp_running_job = NULL; ++ group->is_working = MALI_FALSE; ++ *sub_job = group->pp_running_sub_job; + -+/** @brief Cause a microsecond delay -+ * -+ * The delay will have microsecond resolution, and is necessary for correct -+ * operation of the driver. At worst, the delay will be \b at least \a usecs -+ * microseconds, and so may be (significantly) more. -+ * -+ * This function may be implemented as a busy-wait, which is the most sensible -+ * implementation. On OSs where there are situations in which a thread must not -+ * sleep, this is definitely implemented as a busy-wait. -+ * -+ * @param usecs the number of microseconds to wait for. -+ */ -+void _mali_osk_time_ubusydelay(u32 usecs); ++ if (!success) { ++ MALI_DEBUG_PRINT(2, ("Mali group: Executing recovery reset due to job failure\n")); ++ mali_group_recovery_reset(group); ++ } else if (_MALI_OSK_ERR_OK != mali_pp_reset_wait(group->pp_core)) { ++ MALI_PRINT_ERROR(("Mali group: Executing recovery reset due to reset failure\n")); ++ mali_group_recovery_reset(group); ++ } + -+/** @brief Return time in nano seconds, since any given reference. -+ * -+ * @return Time in nano seconds -+ */ -+u64 _mali_osk_time_get_ns(void); ++ return pp_job_to_return; ++} + -+/** @brief Return time in nano seconds, since boot time. -+ * -+ * @return Time in nano seconds -+ */ -+u64 _mali_osk_boot_time_get_ns(void); ++struct mali_gp_job *mali_group_complete_gp(struct mali_group *group, mali_bool success) ++{ ++ struct mali_gp_job *gp_job_to_return; + -+/** @} */ /* end group _mali_osk_time */ ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(group->gp_core); ++ MALI_DEBUG_ASSERT_POINTER(group->gp_running_job); ++ MALI_DEBUG_ASSERT(MALI_TRUE == group->is_working); + -+/** @defgroup _mali_osk_math OSK Math -+ * @{ */ ++ /* Stop/clear the timeout timer. */ ++ _mali_osk_timer_del_async(group->timeout_timer); + -+/** @brief Count Leading Zeros (Little-endian) -+ * -+ * @note This function must be implemented to support the reference -+ * implementation of _mali_osk_find_first_zero_bit, as defined in -+ * mali_osk_bitops.h. -+ * -+ * @param val 32-bit words to count leading zeros on -+ * @return the number of leading zeros. -+ */ -+u32 _mali_osk_clz(u32 val); ++ if (NULL != group->gp_running_job) { ++ mali_gp_update_performance_counters(group->gp_core, group->gp_running_job); + -+/** @brief find last (most-significant) bit set -+ * -+ * @param val 32-bit words to count last bit set on -+ * @return last bit set. -+ */ -+u32 _mali_osk_fls(u32 val); ++#if defined(CONFIG_MALI400_PROFILING) ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0), ++ mali_gp_job_get_perf_counter_value0(group->gp_running_job), ++ mali_gp_job_get_perf_counter_value1(group->gp_running_job), ++ mali_gp_job_get_perf_counter_src0(group->gp_running_job) | (mali_gp_job_get_perf_counter_src1(group->gp_running_job) << 8), ++ 0, 0); + -+/** @} */ /* end group _mali_osk_math */ ++ if ((MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src0(group->l2_cache_core[0])) && ++ (MALI_HW_CORE_NO_COUNTER != mali_l2_cache_core_get_counter_src1(group->l2_cache_core[0]))) ++ mali_group_report_l2_cache_counters_per_core(group, 0); ++#endif + -+/** @addtogroup _mali_osk_wait_queue OSK Wait Queue functionality -+ * @{ */ -+ -+/** @brief Initialize an empty Wait Queue */ -+_mali_osk_wait_queue_t *_mali_osk_wait_queue_init(void); ++#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS) ++ if (group->pp_core) { ++ trace_gpu_sched_switch( ++ mali_pp_core_description(group->pp_core), ++ sched_clock(), 0, 0, 0); ++ } ++#endif + -+/** @brief Sleep if condition is false -+ * -+ * @param queue the queue to use -+ * @param condition function pointer to a boolean function -+ * @param data data parameter for condition function -+ * -+ * Put thread to sleep if the given \a condition function returns false. When -+ * being asked to wake up again, the condition will be re-checked and the -+ * thread only woken up if the condition is now true. -+ */ -+void _mali_osk_wait_queue_wait_event(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data); ++#if defined(CONFIG_MALI400_PROFILING) ++ trace_mali_core_active(mali_gp_job_get_pid(group->gp_running_job), 0 /* active */, 1 /* GP */, 0 /* core */, ++ mali_gp_job_get_frame_builder_id(group->gp_running_job), mali_gp_job_get_flush_id(group->gp_running_job)); ++#endif + -+/** @brief Sleep if condition is false -+ * -+ * @param queue the queue to use -+ * @param condition function pointer to a boolean function -+ * @param data data parameter for condition function -+ * @param timeout timeout in ms -+ * -+ * Put thread to sleep if the given \a condition function returns false. When -+ * being asked to wake up again, the condition will be re-checked and the -+ * thread only woken up if the condition is now true. Will return if time -+ * exceeds timeout. -+ */ -+void _mali_osk_wait_queue_wait_event_timeout(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data, u32 timeout); ++ mali_gp_job_set_current_heap_addr(group->gp_running_job, ++ mali_gp_read_plbu_alloc_start_addr(group->gp_core)); ++ } + -+/** @brief Wake up all threads in wait queue if their respective conditions are -+ * true -+ * -+ * @param queue the queue whose threads should be woken up -+ * -+ * Wake up all threads in wait queue \a queue whose condition is now true. -+ */ -+void _mali_osk_wait_queue_wake_up(_mali_osk_wait_queue_t *queue); ++ if (success) { ++ /* Only do soft reset for successful jobs, a full recovery ++ * reset will be done for failed jobs. */ ++ mali_gp_reset_async(group->gp_core); ++ } + -+/** @brief terminate a wait queue -+ * -+ * @param queue the queue to terminate. -+ */ -+void _mali_osk_wait_queue_term(_mali_osk_wait_queue_t *queue); -+/** @} */ /* end group _mali_osk_wait_queue */ ++ gp_job_to_return = group->gp_running_job; ++ group->gp_running_job = NULL; ++ group->is_working = MALI_FALSE; + ++ if (!success) { ++ MALI_DEBUG_PRINT(2, ("Mali group: Executing recovery reset due to job failure\n")); ++ mali_group_recovery_reset(group); ++ } else if (_MALI_OSK_ERR_OK != mali_gp_reset_wait(group->gp_core)) { ++ MALI_PRINT_ERROR(("Mali group: Executing recovery reset due to reset failure\n")); ++ mali_group_recovery_reset(group); ++ } + -+/** @addtogroup _mali_osk_miscellaneous -+ * @{ */ ++ return gp_job_to_return; ++} + -+/** @brief Output a device driver debug message. -+ * -+ * The interpretation of \a fmt is the same as the \c format parameter in -+ * _mali_osu_vsnprintf(). -+ * -+ * @param fmt a _mali_osu_vsnprintf() style format string -+ * @param ... a variable-number of parameters suitable for \a fmt -+ */ -+void _mali_osk_dbgmsg(const char *fmt, ...); ++struct mali_group *mali_group_get_glob_group(u32 index) ++{ ++ if (mali_global_num_groups > index) { ++ return mali_global_groups[index]; ++ } + -+/** @brief Print fmt into buf. -+ * -+ * The interpretation of \a fmt is the same as the \c format parameter in -+ * _mali_osu_vsnprintf(). -+ * -+ * @param buf a pointer to the result buffer -+ * @param size the total number of bytes allowed to write to \a buf -+ * @param fmt a _mali_osu_vsnprintf() style format string -+ * @param ... a variable-number of parameters suitable for \a fmt -+ * @return The number of bytes written to \a buf -+ */ -+u32 _mali_osk_snprintf(char *buf, u32 size, const char *fmt, ...); ++ return NULL; ++} + -+/** @brief Abnormal process abort. -+ * -+ * Terminates the caller-process if this function is called. -+ * -+ * This function will be called from Debug assert-macros in mali_kernel_common.h. -+ * -+ * This function will never return - because to continue from a Debug assert -+ * could cause even more problems, and hinder debugging of the initial problem. -+ * -+ * This function is only used in Debug builds, and is not used in Release builds. -+ */ -+void _mali_osk_abort(void); ++u32 mali_group_get_glob_num_groups(void) ++{ ++ return mali_global_num_groups; ++} + -+/** @brief Sets breakpoint at point where function is called. -+ * -+ * This function will be called from Debug assert-macros in mali_kernel_common.h, -+ * to assist in debugging. If debugging at this level is not required, then this -+ * function may be implemented as a stub. -+ * -+ * This function is only used in Debug builds, and is not used in Release builds. -+ */ -+void _mali_osk_break(void); ++static void mali_group_activate_page_directory(struct mali_group *group, struct mali_session_data *session, mali_bool is_reload) ++{ ++ MALI_DEBUG_PRINT(5, ("Mali group: Activating page directory 0x%08X from session 0x%08X on group %s\n", ++ mali_session_get_page_directory(session), session, ++ mali_group_core_description(group))); + -+/** @brief Return an identificator for calling process. -+ * -+ * @return Identificator for calling process. -+ */ -+u32 _mali_osk_get_pid(void); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); + -+/** @brief Return an name for calling process. -+ * -+ * @return name for calling process. -+ */ -+char *_mali_osk_get_comm(void); ++ if (group->session != session || MALI_TRUE == is_reload) { ++ /* Different session than last time, so we need to do some work */ ++ MALI_DEBUG_PRINT(5, ("Mali group: Activate session: %08x previous: %08x on group %s\n", ++ session, group->session, ++ mali_group_core_description(group))); ++ mali_mmu_activate_page_directory(group->mmu, mali_session_get_page_directory(session)); ++ group->session = session; ++ } else { ++ /* Same session as last time, so no work required */ ++ MALI_DEBUG_PRINT(4, ("Mali group: Activate existing session 0x%08X on group %s\n", ++ session->page_directory, ++ mali_group_core_description(group))); ++ mali_mmu_zap_tlb_without_stall(group->mmu); ++ } ++} + -+/** @brief Return an identificator for calling thread. -+ * -+ * @return Identificator for calling thread. -+ */ -+u32 _mali_osk_get_tid(void); ++static void mali_group_recovery_reset(struct mali_group *group) ++{ ++ _mali_osk_errcode_t err; + ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); + -+/** @brief Take a reference to the power manager system for the Mali device (synchronously). -+ * -+ * When function returns successfully, Mali is ON. -+ * -+ * @note Call \a _mali_osk_pm_dev_ref_put() to release this reference. -+ */ -+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_sync(void); ++ /* Stop cores, bus stop */ ++ if (NULL != group->pp_core) { ++ mali_pp_stop_bus(group->pp_core); ++ } else { ++ mali_gp_stop_bus(group->gp_core); ++ } + -+/** @brief Take a reference to the external power manager system for the Mali device (asynchronously). -+ * -+ * Mali might not yet be on after this function as returned. -+ * Please use \a _mali_osk_pm_dev_barrier() or \a _mali_osk_pm_dev_ref_get_sync() -+ * to wait for Mali to be powered on. -+ * -+ * @note Call \a _mali_osk_pm_dev_ref_dec() to release this reference. -+ */ -+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_async(void); ++ /* Flush MMU and clear page fault (if any) */ ++ mali_mmu_activate_fault_flush_page_directory(group->mmu); ++ mali_mmu_page_fault_done(group->mmu); + -+/** @brief Release the reference to the external power manger system for the Mali device. -+ * -+ * When reference count reach zero, the cores can be off. -+ * -+ * @note This must be used to release references taken with -+ * \a _mali_osk_pm_dev_ref_get_sync() or \a _mali_osk_pm_dev_ref_get_sync(). -+ */ -+void _mali_osk_pm_dev_ref_put(void); ++ /* Wait for cores to stop bus, then do a hard reset on them */ ++ if (NULL != group->pp_core) { ++ if (mali_group_is_virtual(group)) { ++ struct mali_group *child, *temp; + -+/** @brief Block until pending PM operations are done -+ */ -+void _mali_osk_pm_dev_barrier(void); ++ /* Disable the broadcast unit while we do reset directly on the member cores. */ ++ mali_bcast_disable(group->bcast_core); + -+/** @} */ /* end group _mali_osk_miscellaneous */ ++ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, struct mali_group, group_list) { ++ mali_pp_stop_bus_wait(child->pp_core); ++ mali_pp_hard_reset(child->pp_core); ++ } + -+/** @defgroup _mali_osk_bitmap OSK Bitmap -+ * @{ */ ++ mali_bcast_enable(group->bcast_core); ++ } else { ++ mali_pp_stop_bus_wait(group->pp_core); ++ mali_pp_hard_reset(group->pp_core); ++ } ++ } else { ++ mali_gp_stop_bus_wait(group->gp_core); ++ mali_gp_hard_reset(group->gp_core); ++ } + -+/** @brief Allocate a unique number from the bitmap object. -+ * -+ * @param bitmap Initialized bitmap object. -+ * @return An unique existence in the bitmap object. -+ */ -+u32 _mali_osk_bitmap_alloc(struct _mali_osk_bitmap *bitmap); ++ /* Reset MMU */ ++ err = mali_mmu_reset(group->mmu); ++ MALI_DEBUG_ASSERT(_MALI_OSK_ERR_OK == err); ++ MALI_IGNORE(err); + -+/** @brief Free a interger to the bitmap object. -+ * -+ * @param bitmap Initialized bitmap object. -+ * @param obj An number allocated from bitmap object. -+ */ -+void _mali_osk_bitmap_free(struct _mali_osk_bitmap *bitmap, u32 obj); ++ group->session = NULL; ++} + -+/** @brief Allocate continuous number from the bitmap object. -+ * -+ * @param bitmap Initialized bitmap object. -+ * @return start number of the continuous number block. -+ */ -+u32 _mali_osk_bitmap_alloc_range(struct _mali_osk_bitmap *bitmap, int cnt); ++#if MALI_STATE_TRACKING ++u32 mali_group_dump_state(struct mali_group *group, char *buf, u32 size) ++{ ++ int n = 0; ++ int i; ++ struct mali_group *child; ++ struct mali_group *temp; + -+/** @brief Free a block of continuous number block to the bitmap object. -+ * -+ * @param bitmap Initialized bitmap object. -+ * @param obj Start number. -+ * @param cnt The size of the continuous number block. -+ */ -+void _mali_osk_bitmap_free_range(struct _mali_osk_bitmap *bitmap, u32 obj, int cnt); ++ if (mali_group_is_virtual(group)) { ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "Virtual PP Group: %p\n", group); ++ } else if (mali_group_is_in_virtual(group)) { ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "Child PP Group: %p\n", group); ++ } else if (NULL != group->pp_core) { ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "Physical PP Group: %p\n", group); ++ } else { ++ MALI_DEBUG_ASSERT_POINTER(group->gp_core); ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "GP Group: %p\n", group); ++ } + -+/** @brief Available count could be used to allocate in the given bitmap object. -+ * -+ */ -+u32 _mali_osk_bitmap_avail(struct _mali_osk_bitmap *bitmap); ++ switch (group->state) { ++ case MALI_GROUP_STATE_INACTIVE: ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "\tstate: INACTIVE\n"); ++ break; ++ case MALI_GROUP_STATE_ACTIVATION_PENDING: ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "\tstate: ACTIVATION_PENDING\n"); ++ break; ++ case MALI_GROUP_STATE_ACTIVE: ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "\tstate: MALI_GROUP_STATE_ACTIVE\n"); ++ break; ++ default: ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "\tstate: UNKNOWN (%d)\n", group->state); ++ MALI_DEBUG_ASSERT(0); ++ break; ++ } + -+/** @brief Initialize an bitmap object.. -+ * -+ * @param bitmap An poiter of uninitialized bitmap object. -+ * @param num Size of thei bitmap object and decide the memory size allocated. -+ * @param reserve start number used to allocate. -+ */ -+int _mali_osk_bitmap_init(struct _mali_osk_bitmap *bitmap, u32 num, u32 reserve); ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "\tSW power: %s\n", ++ group->power_is_on ? "On" : "Off"); + -+/** @brief Free the given bitmap object. -+ * -+ * @param bitmap Initialized bitmap object. -+ */ -+void _mali_osk_bitmap_term(struct _mali_osk_bitmap *bitmap); -+/** @} */ /* end group _mali_osk_bitmap */ ++ n += mali_pm_dump_state_domain(group->pm_domain, buf + n, size - n); + -+/** @} */ /* end group osuapi */ ++ for (i = 0; i < 2; i++) { ++ if (NULL != group->l2_cache_core[i]) { ++ struct mali_pm_domain *domain; ++ domain = mali_l2_cache_get_pm_domain( ++ group->l2_cache_core[i]); ++ n += mali_pm_dump_state_domain(domain, ++ buf + n, size - n); ++ } ++ } + -+/** @} */ /* end group uddapi */ ++ if (group->gp_core) { ++ n += mali_gp_dump_state(group->gp_core, buf + n, size - n); ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "\tGP running job: %p\n", group->gp_running_job); ++ } + ++ if (group->pp_core) { ++ n += mali_pp_dump_state(group->pp_core, buf + n, size - n); ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "\tPP running job: %p, subjob %d \n", ++ group->pp_running_job, ++ group->pp_running_sub_job); ++ } + ++ _MALI_OSK_LIST_FOREACHENTRY(child, temp, &group->group_list, ++ struct mali_group, group_list) { ++ n += mali_group_dump_state(child, buf + n, size - n); ++ } + -+#ifdef __cplusplus ++ return n; +} +#endif + -+/* Check standard inlines */ -+#ifndef MALI_STATIC_INLINE -+#error MALI_STATIC_INLINE not defined on your OS -+#endif ++_mali_osk_errcode_t mali_group_upper_half_mmu(void *data) ++{ ++ struct mali_group *group = (struct mali_group *)data; ++ _mali_osk_errcode_t ret; + -+#ifndef MALI_NON_STATIC_INLINE -+#error MALI_NON_STATIC_INLINE not defined on your OS ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(group->mmu); ++ ++#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS) ++#if defined(CONFIG_MALI_SHARED_INTERRUPTS) ++ mali_executor_lock(); ++ if (!mali_group_is_working(group)) { ++ /* Not working, so nothing to do */ ++ mali_executor_unlock(); ++ return _MALI_OSK_ERR_FAULT; ++ } ++#endif ++ if (NULL != group->gp_core) { ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, ++ 0, 0, /* No pid and tid for interrupt handler */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0), ++ mali_mmu_get_rawstat(group->mmu), 0); ++ } else { ++ MALI_DEBUG_ASSERT_POINTER(group->pp_core); ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, ++ 0, 0, /* No pid and tid for interrupt handler */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU( ++ mali_pp_core_get_id(group->pp_core)), ++ mali_mmu_get_rawstat(group->mmu), 0); ++ } ++#if defined(CONFIG_MALI_SHARED_INTERRUPTS) ++ mali_executor_unlock(); ++#endif +#endif + -+#endif /* __MALI_OSK_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_osk_bitops.h b/drivers/gpu/arm/mali400/mali/common/mali_osk_bitops.h -new file mode 100644 -index 000000000..bb1831753 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_osk_bitops.h -@@ -0,0 +1,162 @@ -+/* -+ * Copyright (C) 2010, 2013-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ ret = mali_executor_interrupt_mmu(group, MALI_TRUE); + -+/** -+ * @file mali_osk_bitops.h -+ * Implementation of the OS abstraction layer for the kernel device driver -+ */ ++#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS) ++#if defined(CONFIG_MALI_SHARED_INTERRUPTS) ++ mali_executor_lock(); ++ if (!mali_group_is_working(group) && (!mali_group_power_is_on(group))) { ++ /* group complete and on job shedule on it, it already power off */ ++ if (NULL != group->gp_core) { ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, ++ 0, 0, /* No pid and tid for interrupt handler */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0), ++ 0xFFFFFFFF, 0); ++ } else { ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, ++ 0, 0, /* No pid and tid for interrupt handler */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU( ++ mali_pp_core_get_id(group->pp_core)), ++ 0xFFFFFFFF, 0); ++ } + -+#ifndef __MALI_OSK_BITOPS_H__ -+#define __MALI_OSK_BITOPS_H__ ++ mali_executor_unlock(); ++ return ret; ++ } ++#endif + -+#ifdef __cplusplus -+extern "C" { ++ if (NULL != group->gp_core) { ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, ++ 0, 0, /* No pid and tid for interrupt handler */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0), ++ mali_mmu_get_rawstat(group->mmu), 0); ++ } else { ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, ++ 0, 0, /* No pid and tid for interrupt handler */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU( ++ mali_pp_core_get_id(group->pp_core)), ++ mali_mmu_get_rawstat(group->mmu), 0); ++ } ++#if defined(CONFIG_MALI_SHARED_INTERRUPTS) ++ mali_executor_unlock(); ++#endif +#endif + -+MALI_STATIC_INLINE void _mali_internal_clear_bit(u32 bit, u32 *addr) ++ return ret; ++} ++ ++static void mali_group_bottom_half_mmu(void *data) +{ -+ MALI_DEBUG_ASSERT(bit < 32); -+ MALI_DEBUG_ASSERT(NULL != addr); ++ struct mali_group *group = (struct mali_group *)data; + -+ (*addr) &= ~(1 << bit); ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(group->mmu); ++ ++ if (NULL != group->gp_core) { ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF, ++ 0, _mali_osk_get_tid(), /* pid and tid */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0), ++ mali_mmu_get_rawstat(group->mmu), 0); ++ } else { ++ MALI_DEBUG_ASSERT_POINTER(group->pp_core); ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF, ++ 0, _mali_osk_get_tid(), /* pid and tid */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU( ++ mali_pp_core_get_id(group->pp_core)), ++ mali_mmu_get_rawstat(group->mmu), 0); ++ } ++ ++ mali_executor_interrupt_mmu(group, MALI_FALSE); ++ ++ if (NULL != group->gp_core) { ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF, ++ 0, _mali_osk_get_tid(), /* pid and tid */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(0), ++ mali_mmu_get_rawstat(group->mmu), 0); ++ } else { ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF, ++ 0, _mali_osk_get_tid(), /* pid and tid */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU( ++ mali_pp_core_get_id(group->pp_core)), ++ mali_mmu_get_rawstat(group->mmu), 0); ++ } +} + -+MALI_STATIC_INLINE void _mali_internal_set_bit(u32 bit, u32 *addr) ++_mali_osk_errcode_t mali_group_upper_half_gp(void *data) +{ -+ MALI_DEBUG_ASSERT(bit < 32); -+ MALI_DEBUG_ASSERT(NULL != addr); ++ struct mali_group *group = (struct mali_group *)data; ++ _mali_osk_errcode_t ret; + -+ (*addr) |= (1 << bit); ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(group->gp_core); ++ MALI_DEBUG_ASSERT_POINTER(group->mmu); ++ ++#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS) ++#if defined(CONFIG_MALI_SHARED_INTERRUPTS) ++ mali_executor_lock(); ++ if (!mali_group_is_working(group)) { ++ /* Not working, so nothing to do */ ++ mali_executor_unlock(); ++ return _MALI_OSK_ERR_FAULT; ++ } ++#endif ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, ++ 0, 0, /* No pid and tid for interrupt handler */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0), ++ mali_gp_get_rawstat(group->gp_core), 0); ++ ++ MALI_DEBUG_PRINT(4, ("Group: Interrupt 0x%08X from %s\n", ++ mali_gp_get_rawstat(group->gp_core), ++ mali_group_core_description(group))); ++#if defined(CONFIG_MALI_SHARED_INTERRUPTS) ++ mali_executor_unlock(); ++#endif ++#endif ++ ret = mali_executor_interrupt_gp(group, MALI_TRUE); ++ ++#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS) ++#if defined(CONFIG_MALI_SHARED_INTERRUPTS) ++ mali_executor_lock(); ++ if (!mali_group_is_working(group) && (!mali_group_power_is_on(group))) { ++ /* group complete and on job shedule on it, it already power off */ ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, ++ 0, 0, /* No pid and tid for interrupt handler */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0), ++ 0xFFFFFFFF, 0); ++ mali_executor_unlock(); ++ return ret; ++ } ++#endif ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, ++ 0, 0, /* No pid and tid for interrupt handler */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0), ++ mali_gp_get_rawstat(group->gp_core), 0); ++#if defined(CONFIG_MALI_SHARED_INTERRUPTS) ++ mali_executor_unlock(); ++#endif ++#endif ++ return ret; +} + -+MALI_STATIC_INLINE u32 _mali_internal_test_bit(u32 bit, u32 value) ++static void mali_group_bottom_half_gp(void *data) +{ -+ MALI_DEBUG_ASSERT(bit < 32); -+ return value & (1 << bit); ++ struct mali_group *group = (struct mali_group *)data; ++ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(group->gp_core); ++ MALI_DEBUG_ASSERT_POINTER(group->mmu); ++ ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF, ++ 0, _mali_osk_get_tid(), /* pid and tid */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0), ++ mali_gp_get_rawstat(group->gp_core), 0); ++ ++ mali_executor_interrupt_gp(group, MALI_FALSE); ++ ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF, ++ 0, _mali_osk_get_tid(), /* pid and tid */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(0), ++ mali_gp_get_rawstat(group->gp_core), 0); +} + -+MALI_STATIC_INLINE int _mali_internal_find_first_zero_bit(u32 value) ++_mali_osk_errcode_t mali_group_upper_half_pp(void *data) +{ -+ u32 inverted; -+ u32 negated; -+ u32 isolated; -+ u32 leading_zeros; ++ struct mali_group *group = (struct mali_group *)data; ++ _mali_osk_errcode_t ret; + -+ /* Begin with xxx...x0yyy...y, where ys are 1, number of ys is in range 0..31 */ -+ inverted = ~value; /* zzz...z1000...0 */ -+ /* Using count_trailing_zeros on inverted value - -+ * See ARM System Developers Guide for details of count_trailing_zeros */ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(group->pp_core); ++ MALI_DEBUG_ASSERT_POINTER(group->mmu); + -+ /* Isolate the zero: it is preceeded by a run of 1s, so add 1 to it */ -+ negated = (u32) - inverted ; /* -a == ~a + 1 (mod 2^n) for n-bit numbers */ -+ /* negated = xxx...x1000...0 */ ++#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS) ++#if defined(CONFIG_MALI_SHARED_INTERRUPTS) ++ mali_executor_lock(); ++ if (!mali_group_is_working(group)) { ++ /* Not working, so nothing to do */ ++ mali_executor_unlock(); ++ return _MALI_OSK_ERR_FAULT; ++ } ++#endif + -+ isolated = negated & inverted ; /* xxx...x1000...0 & zzz...z1000...0, zs are ~xs */ -+ /* And so the first zero bit is in the same position as the 1 == number of 1s that preceeded it -+ * Note that the output is zero if value was all 1s */ ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, ++ 0, 0, /* No pid and tid for interrupt handler */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP( ++ mali_pp_core_get_id(group->pp_core)), ++ mali_pp_get_rawstat(group->pp_core), 0); + -+ leading_zeros = _mali_osk_clz(isolated); ++ MALI_DEBUG_PRINT(4, ("Group: Interrupt 0x%08X from %s\n", ++ mali_pp_get_rawstat(group->pp_core), ++ mali_group_core_description(group))); ++#if defined(CONFIG_MALI_SHARED_INTERRUPTS) ++ mali_executor_unlock(); ++#endif ++#endif + -+ return 31 - leading_zeros; ++ ret = mali_executor_interrupt_pp(group, MALI_TRUE); ++ ++#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS) ++#if defined(CONFIG_MALI_SHARED_INTERRUPTS) ++ mali_executor_lock(); ++ if (!mali_group_is_working(group) && (!mali_group_power_is_on(group))) { ++ /* group complete and on job shedule on it, it already power off */ ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, ++ 0, 0, /* No pid and tid for interrupt handler */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP( ++ mali_pp_core_get_id(group->pp_core)), ++ 0xFFFFFFFF, 0); ++ mali_executor_unlock(); ++ return ret; ++ } ++#endif ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF, ++ 0, 0, /* No pid and tid for interrupt handler */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP( ++ mali_pp_core_get_id(group->pp_core)), ++ mali_pp_get_rawstat(group->pp_core), 0); ++#if defined(CONFIG_MALI_SHARED_INTERRUPTS) ++ mali_executor_unlock(); ++#endif ++#endif ++ return ret; +} + ++static void mali_group_bottom_half_pp(void *data) ++{ ++ struct mali_group *group = (struct mali_group *)data; + -+/** @defgroup _mali_osk_bitops OSK Non-atomic Bit-operations -+ * @{ */ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(group->pp_core); ++ MALI_DEBUG_ASSERT_POINTER(group->mmu); + -+/** -+ * These bit-operations do not work atomically, and so locks must be used if -+ * atomicity is required. -+ * -+ * Reference implementations for Little Endian are provided, and so it should -+ * not normally be necessary to re-implement these. Efficient bit-twiddling -+ * techniques are used where possible, implemented in portable C. -+ * -+ * Note that these reference implementations rely on _mali_osk_clz() being -+ * implemented. -+ */ ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_START | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF, ++ 0, _mali_osk_get_tid(), /* pid and tid */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP( ++ mali_pp_core_get_id(group->pp_core)), ++ mali_pp_get_rawstat(group->pp_core), 0); + -+/** @brief Clear a bit in a sequence of 32-bit words -+ * @param nr bit number to clear, starting from the (Little-endian) least -+ * significant bit -+ * @param addr starting point for counting. -+ */ -+MALI_STATIC_INLINE void _mali_osk_clear_nonatomic_bit(u32 nr, u32 *addr) -+{ -+ addr += nr >> 5; /* find the correct word */ -+ nr = nr & ((1 << 5) - 1); /* The bit number within the word */ ++ mali_executor_interrupt_pp(group, MALI_FALSE); + -+ _mali_internal_clear_bit(nr, addr); ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_STOP | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF, ++ 0, _mali_osk_get_tid(), /* pid and tid */ ++ MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP( ++ mali_pp_core_get_id(group->pp_core)), ++ mali_pp_get_rawstat(group->pp_core), 0); +} + -+/** @brief Set a bit in a sequence of 32-bit words -+ * @param nr bit number to set, starting from the (Little-endian) least -+ * significant bit -+ * @param addr starting point for counting. -+ */ -+MALI_STATIC_INLINE void _mali_osk_set_nonatomic_bit(u32 nr, u32 *addr) ++static void mali_group_timeout(void *data) +{ -+ addr += nr >> 5; /* find the correct word */ -+ nr = nr & ((1 << 5) - 1); /* The bit number within the word */ ++ struct mali_group *group = (struct mali_group *)data; ++ MALI_DEBUG_ASSERT_POINTER(group); + -+ _mali_internal_set_bit(nr, addr); ++ MALI_DEBUG_PRINT(2, ("Group: timeout handler for %s at %u\n", ++ mali_group_core_description(group), ++ _mali_osk_time_tickcount())); ++ ++ if (NULL != group->gp_core) { ++ mali_group_schedule_bottom_half_gp(group); ++ } else { ++ MALI_DEBUG_ASSERT_POINTER(group->pp_core); ++ mali_group_schedule_bottom_half_pp(group); ++ } +} + -+/** @brief Test a bit in a sequence of 32-bit words -+ * @param nr bit number to test, starting from the (Little-endian) least -+ * significant bit -+ * @param addr starting point for counting. -+ * @return zero if bit was clear, non-zero if set. Do not rely on the return -+ * value being related to the actual word under test. -+ */ -+MALI_STATIC_INLINE u32 _mali_osk_test_bit(u32 nr, u32 *addr) ++mali_bool mali_group_zap_session(struct mali_group *group, ++ struct mali_session_data *session) +{ -+ addr += nr >> 5; /* find the correct word */ -+ nr = nr & ((1 << 5) - 1); /* The bit number within the word */ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(session); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); + -+ return _mali_internal_test_bit(nr, *addr); ++ if (group->session != session) { ++ /* not running from this session */ ++ return MALI_TRUE; /* success */ ++ } ++ ++ if (group->is_working) { ++ /* The Zap also does the stall and disable_stall */ ++ mali_bool zap_success = mali_mmu_zap_tlb(group->mmu); ++ return zap_success; ++ } else { ++ /* Just remove the session instead of zapping */ ++ mali_group_clear_session(group); ++ return MALI_TRUE; /* success */ ++ } +} + -+/* Return maxbit if not found */ -+/** @brief Find the first zero bit in a sequence of 32-bit words -+ * @param addr starting point for search. -+ * @param maxbit the maximum number of bits to search -+ * @return the number of the first zero bit found, or maxbit if none were found -+ * in the specified range. -+ */ -+MALI_STATIC_INLINE u32 _mali_osk_find_first_zero_bit(const u32 *addr, u32 maxbit) ++#if defined(CONFIG_MALI400_PROFILING) ++static void mali_group_report_l2_cache_counters_per_core(struct mali_group *group, u32 core_num) +{ -+ u32 total; ++ u32 source0 = 0; ++ u32 value0 = 0; ++ u32 source1 = 0; ++ u32 value1 = 0; ++ u32 profiling_channel = 0; + -+ for (total = 0; total < maxbit; total += 32, ++addr) { -+ int result; -+ result = _mali_internal_find_first_zero_bit(*addr); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); + -+ /* non-negative signifies the bit was found */ -+ if (result >= 0) { -+ total += (u32)result; -+ break; -+ } ++ switch (core_num) { ++ case 0: ++ profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE | ++ MALI_PROFILING_EVENT_CHANNEL_GPU | ++ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS; ++ break; ++ case 1: ++ profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE | ++ MALI_PROFILING_EVENT_CHANNEL_GPU | ++ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L21_COUNTERS; ++ break; ++ case 2: ++ profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE | ++ MALI_PROFILING_EVENT_CHANNEL_GPU | ++ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L22_COUNTERS; ++ break; ++ default: ++ profiling_channel = MALI_PROFILING_EVENT_TYPE_SINGLE | ++ MALI_PROFILING_EVENT_CHANNEL_GPU | ++ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS; ++ break; + } + -+ /* Now check if we reached maxbit or above */ -+ if (total >= maxbit) { -+ total = maxbit; ++ if (0 == core_num) { ++ mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1); ++ } ++ if (1 == core_num) { ++ if (1 == mali_l2_cache_get_id(group->l2_cache_core[0])) { ++ mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1); ++ } else if (1 == mali_l2_cache_get_id(group->l2_cache_core[1])) { ++ mali_l2_cache_core_get_counter_values(group->l2_cache_core[1], &source0, &value0, &source1, &value1); ++ } ++ } ++ if (2 == core_num) { ++ if (2 == mali_l2_cache_get_id(group->l2_cache_core[0])) { ++ mali_l2_cache_core_get_counter_values(group->l2_cache_core[0], &source0, &value0, &source1, &value1); ++ } else if (2 == mali_l2_cache_get_id(group->l2_cache_core[1])) { ++ mali_l2_cache_core_get_counter_values(group->l2_cache_core[1], &source0, &value0, &source1, &value1); ++ } + } + -+ return total; /* either the found bit nr, or maxbit if not found */ -+} -+/** @} */ /* end group _mali_osk_bitops */ -+ -+#ifdef __cplusplus ++ _mali_osk_profiling_add_event(profiling_channel, source1 << 8 | source0, value0, value1, 0, 0); +} -+#endif -+ -+#endif /* __MALI_OSK_BITOPS_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_osk_list.h b/drivers/gpu/arm/mali400/mali/common/mali_osk_list.h ++#endif /* #if defined(CONFIG_MALI400_PROFILING) */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_group.h b/drivers/gpu/arm/mali400/mali/common/mali_group.h new file mode 100644 -index 000000000..9af2d7d4d +index 000000000..32481e4a6 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_osk_list.h -@@ -0,0 +1,273 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_group.h +@@ -0,0 +1,460 @@ +/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -292085,440 +294890,464 @@ index 000000000..9af2d7d4d + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + ++#ifndef __MALI_GROUP_H__ ++#define __MALI_GROUP_H__ ++ ++#include "mali_osk.h" ++#include "mali_l2_cache.h" ++#include "mali_mmu.h" ++#include "mali_gp.h" ++#include "mali_pp.h" ++#include "mali_session.h" ++#include "mali_osk_profiling.h" ++ +/** -+ * @file mali_osk_list.h -+ * Implementation of the OS abstraction layer for the kernel device driver ++ * @brief Default max runtime [ms] for a core job - used by timeout timers + */ ++#define MALI_MAX_JOB_RUNTIME_DEFAULT 5000 + -+#ifndef __MALI_OSK_LIST_H__ -+#define __MALI_OSK_LIST_H__ ++extern int mali_max_job_runtime; + -+#include "mali_osk.h" -+#include "mali_kernel_common.h" ++#define MALI_MAX_NUMBER_OF_GROUPS 10 ++#define MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS 8 + -+#ifdef __cplusplus -+extern "C" { -+#endif ++enum mali_group_state { ++ MALI_GROUP_STATE_INACTIVE, ++ MALI_GROUP_STATE_ACTIVATION_PENDING, ++ MALI_GROUP_STATE_ACTIVE, ++}; + -+MALI_STATIC_INLINE void __mali_osk_list_add(_mali_osk_list_t *new_entry, _mali_osk_list_t *prev, _mali_osk_list_t *next) -+{ -+ next->prev = new_entry; -+ new_entry->next = next; -+ new_entry->prev = prev; -+ prev->next = new_entry; -+} ++/** ++ * The structure represents a render group ++ * A render group is defined by all the cores that share the same Mali MMU ++ */ + -+MALI_STATIC_INLINE void __mali_osk_list_del(_mali_osk_list_t *prev, _mali_osk_list_t *next) -+{ -+ next->prev = prev; -+ prev->next = next; -+} ++struct mali_group { ++ struct mali_mmu_core *mmu; ++ struct mali_session_data *session; + -+/** @addtogroup _mali_osk_list OSK Doubly-Linked Circular Lists -+ * @{ */ ++ enum mali_group_state state; ++ mali_bool power_is_on; + -+/** Reference implementations of Doubly-linked Circular Lists are provided. -+ * There is often no need to re-implement these. -+ * -+ * @note The implementation may differ subtly from any lists the OS provides. -+ * For this reason, these lists should not be mixed with OS-specific lists -+ * inside the OSK/UKK implementation. */ ++ mali_bool is_working; ++ unsigned long start_time; /* in ticks */ + -+/** @brief Initialize a list to be a head of an empty list -+ * @param exp the list to initialize. */ -+#define _MALI_OSK_INIT_LIST_HEAD(exp) _mali_osk_list_init(exp) ++ struct mali_gp_core *gp_core; ++ struct mali_gp_job *gp_running_job; + -+/** @brief Define a list variable, which is uninitialized. -+ * @param exp the name of the variable that the list will be defined as. */ -+#define _MALI_OSK_LIST_HEAD(exp) _mali_osk_list_t exp ++ struct mali_pp_core *pp_core; ++ struct mali_pp_job *pp_running_job; ++ u32 pp_running_sub_job; + -+/** @brief Define a list variable, which is initialized. -+ * @param exp the name of the variable that the list will be defined as. */ -+#define _MALI_OSK_LIST_HEAD_STATIC_INIT(exp) _mali_osk_list_t exp = { &exp, &exp } ++ struct mali_pm_domain *pm_domain; + -+/** @brief Initialize a list element. -+ * -+ * All list elements must be initialized before use. -+ * -+ * Do not use on any list element that is present in a list without using -+ * _mali_osk_list_del first, otherwise this will break the list. ++ struct mali_l2_cache_core *l2_cache_core[2]; ++ u32 l2_cache_core_ref_count[2]; ++ ++ /* Parent virtual group (if any) */ ++ struct mali_group *parent_group; ++ ++ struct mali_dlbu_core *dlbu_core; ++ struct mali_bcast_unit *bcast_core; ++ ++ /* Used for working groups which needs to be disabled */ ++ mali_bool disable_requested; ++ ++ /* Used by group to link child groups (for virtual group) */ ++ _mali_osk_list_t group_list; ++ ++ /* Used by executor module in order to link groups of same state */ ++ _mali_osk_list_t executor_list; ++ ++ /* Used by PM domains to link groups of same domain */ ++ _mali_osk_list_t pm_domain_list; ++ ++ _mali_osk_wq_work_t *bottom_half_work_mmu; ++ _mali_osk_wq_work_t *bottom_half_work_gp; ++ _mali_osk_wq_work_t *bottom_half_work_pp; ++ ++ _mali_osk_timer_t *timeout_timer; ++}; ++ ++/** @brief Create a new Mali group object + * -+ * @param list the list element to initialize ++ * @return A pointer to a new group object + */ -+MALI_STATIC_INLINE void _mali_osk_list_init(_mali_osk_list_t *list) ++struct mali_group *mali_group_create(struct mali_l2_cache_core *core, ++ struct mali_dlbu_core *dlbu, ++ struct mali_bcast_unit *bcast, ++ u32 domain_index); ++ ++void mali_group_dump_status(struct mali_group *group); ++ ++void mali_group_delete(struct mali_group *group); ++ ++_mali_osk_errcode_t mali_group_add_mmu_core(struct mali_group *group, ++ struct mali_mmu_core *mmu_core); ++void mali_group_remove_mmu_core(struct mali_group *group); ++ ++_mali_osk_errcode_t mali_group_add_gp_core(struct mali_group *group, ++ struct mali_gp_core *gp_core); ++void mali_group_remove_gp_core(struct mali_group *group); ++ ++_mali_osk_errcode_t mali_group_add_pp_core(struct mali_group *group, ++ struct mali_pp_core *pp_core); ++void mali_group_remove_pp_core(struct mali_group *group); ++ ++MALI_STATIC_INLINE const char *mali_group_core_description( ++ struct mali_group *group) +{ -+ list->next = list; -+ list->prev = list; ++ MALI_DEBUG_ASSERT_POINTER(group); ++ if (NULL != group->pp_core) { ++ return mali_pp_core_description(group->pp_core); ++ } else { ++ MALI_DEBUG_ASSERT_POINTER(group->gp_core); ++ return mali_gp_core_description(group->gp_core); ++ } +} + -+/** @brief Insert a single list element after an entry in a list -+ * -+ * As an example, if this is inserted to the head of a list, then this becomes -+ * the first element of the list. -+ * -+ * Do not use to move list elements from one list to another, as it will break -+ * the originating list. -+ * -+ * -+ * @param newlist the list element to insert -+ * @param list the list in which to insert. The new element will be the next -+ * entry in this list -+ */ -+MALI_STATIC_INLINE void _mali_osk_list_add(_mali_osk_list_t *new_entry, _mali_osk_list_t *list) ++MALI_STATIC_INLINE mali_bool mali_group_is_virtual(struct mali_group *group) +{ -+ __mali_osk_list_add(new_entry, list, list->next); ++ MALI_DEBUG_ASSERT_POINTER(group); ++ ++#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) ++ return (NULL != group->dlbu_core); ++#else ++ return MALI_FALSE; ++#endif +} + -+/** @brief Insert a single list element before an entry in a list -+ * -+ * As an example, if this is inserted to the head of a list, then this becomes -+ * the last element of the list. -+ * -+ * Do not use to move list elements from one list to another, as it will break -+ * the originating list. -+ * -+ * @param newlist the list element to insert -+ * @param list the list in which to insert. The new element will be the previous -+ * entry in this list ++/** @brief Check if a group is a part of a virtual group or not + */ -+MALI_STATIC_INLINE void _mali_osk_list_addtail(_mali_osk_list_t *new_entry, _mali_osk_list_t *list) ++MALI_STATIC_INLINE mali_bool mali_group_is_in_virtual(struct mali_group *group) +{ -+ __mali_osk_list_add(new_entry, list->prev, list); ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ ++#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) ++ return (NULL != group->parent_group) ? MALI_TRUE : MALI_FALSE; ++#else ++ return MALI_FALSE; ++#endif +} + -+/** @brief Remove a single element from a list ++/** @brief Reset group + * -+ * The element will no longer be present in the list. The removed list element -+ * will be uninitialized, and so should not be traversed. It must be -+ * initialized before further use. ++ * This function will reset the entire group, ++ * including all the cores present in the group. + * -+ * @param list the list element to remove. ++ * @param group Pointer to the group to reset + */ -+MALI_STATIC_INLINE void _mali_osk_list_del(_mali_osk_list_t *list) ++void mali_group_reset(struct mali_group *group); ++ ++MALI_STATIC_INLINE struct mali_session_data *mali_group_get_session( ++ struct mali_group *group) +{ -+ __mali_osk_list_del(list->prev, list->next); ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ ++ return group->session; +} + -+/** @brief Remove a single element from a list, and re-initialize it -+ * -+ * The element will no longer be present in the list. The removed list element -+ * will initialized, and so can be used as normal. -+ * -+ * @param list the list element to remove and initialize. -+ */ -+MALI_STATIC_INLINE void _mali_osk_list_delinit(_mali_osk_list_t *list) ++MALI_STATIC_INLINE void mali_group_clear_session(struct mali_group *group) +{ -+ __mali_osk_list_del(list->prev, list->next); -+ _mali_osk_list_init(list); ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ ++ if (NULL != group->session) { ++ mali_mmu_activate_empty_page_directory(group->mmu); ++ group->session = NULL; ++ } +} + -+/** @brief Determine whether a list is empty. -+ * -+ * An empty list is one that contains a single element that points to itself. -+ * -+ * @param list the list to check. -+ * @return non-zero if the list is empty, and zero otherwise. ++enum mali_group_state mali_group_activate(struct mali_group *group); ++ ++/* ++ * Change state from ACTIVATION_PENDING to ACTIVE ++ * For virtual group, all childs need to be ACTIVE first + */ -+MALI_STATIC_INLINE mali_bool _mali_osk_list_empty(_mali_osk_list_t *list) ++mali_bool mali_group_set_active(struct mali_group *group); ++ ++/* ++ * @return MALI_TRUE means one or more domains can now be powered off, ++ * and caller should call either mali_pm_update_async() or ++ * mali_pm_update_sync() in order to do so. ++ */ ++mali_bool mali_group_deactivate(struct mali_group *group); ++ ++MALI_STATIC_INLINE enum mali_group_state mali_group_get_state(struct mali_group *group) +{ -+ return list->next == list; ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ return group->state; +} + -+/** @brief Move a list element from one list to another. -+ * -+ * The list element must be initialized. -+ * -+ * As an example, moving a list item to the head of a new list causes this item -+ * to be the first element in the new list. -+ * -+ * @param move the list element to move -+ * @param list the new list into which the element will be inserted, as the next -+ * element in the list. -+ */ -+MALI_STATIC_INLINE void _mali_osk_list_move(_mali_osk_list_t *move_entry, _mali_osk_list_t *list) ++MALI_STATIC_INLINE mali_bool mali_group_power_is_on(struct mali_group *group) +{ -+ __mali_osk_list_del(move_entry->prev, move_entry->next); -+ _mali_osk_list_add(move_entry, list); ++ MALI_DEBUG_ASSERT_POINTER(group); ++ return group->power_is_on; +} + -+/** @brief Move an entire list -+ * -+ * The list element must be initialized. -+ * -+ * Allows you to move a list from one list head to another list head -+ * -+ * @param old_list The existing list head -+ * @param new_list The new list head (must be an empty list) -+ */ -+MALI_STATIC_INLINE void _mali_osk_list_move_list(_mali_osk_list_t *old_list, _mali_osk_list_t *new_list) ++void mali_group_power_up(struct mali_group *group); ++void mali_group_power_down(struct mali_group *group); ++ ++MALI_STATIC_INLINE void mali_group_set_disable_request( ++ struct mali_group *group, mali_bool disable) +{ -+ MALI_DEBUG_ASSERT(_mali_osk_list_empty(new_list)); -+ if (!_mali_osk_list_empty(old_list)) { -+ new_list->next = old_list->next; -+ new_list->prev = old_list->prev; -+ new_list->next->prev = new_list; -+ new_list->prev->next = new_list; -+ old_list->next = old_list; -+ old_list->prev = old_list; ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ group->disable_requested = disable; ++ ++ /** ++ * When one of child group's disable_requeset is set TRUE, then ++ * the disable_request of parent group should also be set to TRUE. ++ * While, the disable_request of parent group should only be set to FALSE ++ * only when all of its child group's disable_request are set to FALSE. ++ */ ++ if (NULL != group->parent_group && MALI_TRUE == disable) { ++ group->parent_group->disable_requested = disable; + } +} + -+/** @brief Find the containing structure of a list -+ * -+ * When traversing a list, this is used to recover the containing structure, -+ * given that is contains a _mali_osk_list_t member. -+ * -+ * Each list must be of structures of one type, and must link the same members -+ * together, otherwise it will not be possible to correctly recover the -+ * sturctures that the lists link. -+ * -+ * @note no type or memory checking occurs to ensure that a structure does in -+ * fact exist for the list entry, and that it is being recovered with respect -+ * to the correct list member. -+ * -+ * @param ptr the pointer to the _mali_osk_list_t member in this structure -+ * @param type the type of the structure that contains the member -+ * @param member the member of the structure that ptr points to. -+ * @return a pointer to a \a type object which contains the _mali_osk_list_t -+ * \a member, as pointed to by the _mali_osk_list_t \a *ptr. -+ */ -+#define _MALI_OSK_LIST_ENTRY(ptr, type, member) \ -+ _MALI_OSK_CONTAINER_OF(ptr, type, member) ++MALI_STATIC_INLINE mali_bool mali_group_disable_requested( ++ struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ return group->disable_requested; ++} + -+/** @brief Enumerate a list safely -+ * -+ * With this macro, lists can be enumerated in a 'safe' manner. That is, -+ * entries can be deleted from the list without causing an error during -+ * enumeration. To achieve this, a 'temporary' pointer is required, which must -+ * be provided to the macro. -+ * -+ * Use it like a 'for()', 'while()' or 'do()' construct, and so it must be -+ * followed by a statement or compound-statement which will be executed for -+ * each list entry. -+ * -+ * Upon loop completion, providing that an early out was not taken in the -+ * loop body, then it is guaranteed that ptr->member == list, even if the loop -+ * body never executed. -+ * -+ * @param ptr a pointer to an object of type 'type', which points to the -+ * structure that contains the currently enumerated list entry. -+ * @param tmp a pointer to an object of type 'type', which must not be used -+ * inside the list-execution statement. -+ * @param list a pointer to a _mali_osk_list_t, from which enumeration will -+ * begin -+ * @param type the type of the structure that contains the _mali_osk_list_t -+ * member that is part of the list to be enumerated. -+ * @param member the _mali_osk_list_t member of the structure that is part of -+ * the list to be enumerated. ++/** @brief Virtual groups */ ++void mali_group_add_group(struct mali_group *parent, struct mali_group *child); ++struct mali_group *mali_group_acquire_group(struct mali_group *parent); ++void mali_group_remove_group(struct mali_group *parent, struct mali_group *child); ++ ++/** @brief Checks if the group is working. + */ -+#define _MALI_OSK_LIST_FOREACHENTRY(ptr, tmp, list, type, member) \ -+ for (ptr = _MALI_OSK_LIST_ENTRY((list)->next, type, member), \ -+ tmp = _MALI_OSK_LIST_ENTRY(ptr->member.next, type, member); \ -+ &ptr->member != (list); \ -+ ptr = tmp, \ -+ tmp = _MALI_OSK_LIST_ENTRY(tmp->member.next, type, member)) ++MALI_STATIC_INLINE mali_bool mali_group_is_working(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ if (mali_group_is_in_virtual(group)) { ++ struct mali_group *tmp_group = mali_executor_get_virtual_group(); ++ return tmp_group->is_working; ++ } ++ return group->is_working; ++} + -+/** @brief Enumerate a list in reverse order safely -+ * -+ * This macro is identical to @ref _MALI_OSK_LIST_FOREACHENTRY, except that -+ * entries are enumerated in reverse order. ++MALI_STATIC_INLINE struct mali_gp_job *mali_group_get_running_gp_job(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ return group->gp_running_job; ++} ++ ++/** @brief Zap MMU TLB on all groups + * -+ * @param ptr a pointer to an object of type 'type', which points to the -+ * structure that contains the currently enumerated list entry. -+ * @param tmp a pointer to an object of type 'type', which must not be used -+ * inside the list-execution statement. -+ * @param list a pointer to a _mali_osk_list_t, from which enumeration will -+ * begin -+ * @param type the type of the structure that contains the _mali_osk_list_t -+ * member that is part of the list to be enumerated. -+ * @param member the _mali_osk_list_t member of the structure that is part of -+ * the list to be enumerated. ++ * Zap TLB on group if \a session is active. + */ -+#define _MALI_OSK_LIST_FOREACHENTRY_REVERSE(ptr, tmp, list, type, member) \ -+ for (ptr = _MALI_OSK_LIST_ENTRY((list)->prev, type, member), \ -+ tmp = _MALI_OSK_LIST_ENTRY(ptr->member.prev, type, member); \ -+ &ptr->member != (list); \ -+ ptr = tmp, \ -+ tmp = _MALI_OSK_LIST_ENTRY(tmp->member.prev, type, member)) -+ -+/** @} */ /* end group _mali_osk_list */ ++mali_bool mali_group_zap_session(struct mali_group *group, ++ struct mali_session_data *session); + -+#ifdef __cplusplus ++/** @brief Get pointer to GP core object ++ */ ++MALI_STATIC_INLINE struct mali_gp_core *mali_group_get_gp_core(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ return group->gp_core; +} -+#endif + -+#endif /* __MALI_OSK_LIST_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_osk_mali.h b/drivers/gpu/arm/mali400/mali/common/mali_osk_mali.h -new file mode 100644 -index 000000000..ebcc277fa ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_osk_mali.h -@@ -0,0 +1,157 @@ -+/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++/** @brief Get pointer to PP core object + */ ++MALI_STATIC_INLINE struct mali_pp_core *mali_group_get_pp_core(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ return group->pp_core; ++} + -+/** -+ * @file mali_osk_mali.h -+ * Defines the OS abstraction layer which is specific for the Mali kernel device driver (OSK) ++/** @brief Start GP job + */ ++void mali_group_start_gp_job(struct mali_group *group, struct mali_gp_job *job, mali_bool gpu_secure_mode_pre_enabled); + -+#ifndef __MALI_OSK_MALI_H__ -+#define __MALI_OSK_MALI_H__ ++void mali_group_start_pp_job(struct mali_group *group, struct mali_pp_job *job, u32 sub_job, mali_bool gpu_secure_mode_pre_enabled); + -+#include -+#include -+#include ++/** @brief Start virtual group Job on a virtual group ++*/ ++void mali_group_start_job_on_virtual(struct mali_group *group, struct mali_pp_job *job, u32 first_subjob, u32 last_subjob); + -+#ifdef __cplusplus -+extern "C" { -+#endif + -+#ifdef CONFIG_MALI_DEVFREQ -+struct mali_device { -+ struct device *dev; -+#ifdef CONFIG_HAVE_CLK -+ struct clk *clock; -+ struct clk_bulk_data *clks; -+ int num_clks; -+#endif -+#ifdef CONFIG_REGULATOR -+ struct regulator *regulator; -+ struct opp_table *opp_table; -+#endif -+#ifdef CONFIG_PM_DEVFREQ -+ struct devfreq_dev_profile devfreq_profile; -+ struct devfreq *devfreq; -+ unsigned long current_freq; -+ unsigned long current_voltage; -+ struct monitor_dev_info *mdev_info; -+ struct rockchip_opp_info opp_info; -+#ifdef CONFIG_DEVFREQ_THERMAL -+ struct thermal_cooling_device *devfreq_cooling; -+#endif -+#endif -+ struct mali_pm_metrics_data mali_metrics; -+}; -+#endif ++/** @brief Start a subjob from a particular on a specific PP group ++*/ ++void mali_group_start_job_on_group(struct mali_group *group, struct mali_pp_job *job, u32 subjob); + -+/** @addtogroup _mali_osk_miscellaneous -+ * @{ */ + -+/** @brief Struct with device specific configuration data ++/** @brief remove all the unused groups in tmp_unused group list, so that the group is in consistent status. + */ -+typedef struct mali_gpu_device_data _mali_osk_device_data; ++void mali_group_non_dlbu_job_done_virtual(struct mali_group *group); + -+#ifdef CONFIG_MALI_DT -+/** @brief Initialize those device resources when we use device tree -+ * -+ * @return _MALI_OSK_ERR_OK on success, otherwise failure. -+ */ -+_mali_osk_errcode_t _mali_osk_resource_initialize(void); -+#endif + -+/** @brief Find Mali GPU HW resource -+ * -+ * @param addr Address of Mali GPU resource to find -+ * @param res Storage for resource information if resource is found. -+ * @return _MALI_OSK_ERR_OK on success, _MALI_OSK_ERR_ITEM_NOT_FOUND if resource is not found ++/** @brief Resume GP job that suspended waiting for more heap memory + */ -+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res); ++void mali_group_resume_gp_with_new_heap(struct mali_group *group, u32 job_id, u32 start_addr, u32 end_addr); + ++MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_gp(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(group->gp_core); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ return mali_gp_get_interrupt_result(group->gp_core); ++} + -+/** @brief Find Mali GPU HW base address -+ * -+ * @return 0 if resources are found, otherwise the Mali GPU component with lowest address. -+ */ -+uintptr_t _mali_osk_resource_base_address(void); ++MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_pp(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(group->pp_core); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ return mali_pp_get_interrupt_result(group->pp_core); ++} + -+/** @brief Find the specific GPU resource. -+ * -+ * @return value -+ * 0x400 if Mali 400 specific GPU resource identified -+ * 0x450 if Mali 450 specific GPU resource identified -+ * 0x470 if Mali 470 specific GPU resource identified -+ * -+ */ -+u32 _mali_osk_identify_gpu_resource(void); ++MALI_STATIC_INLINE enum mali_interrupt_result mali_group_get_interrupt_result_mmu(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(group->mmu); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ return mali_mmu_get_interrupt_result(group->mmu); ++} + -+/** @brief Retrieve the Mali GPU specific data -+ * -+ * @return _MALI_OSK_ERR_OK on success, otherwise failure. -+ */ -+_mali_osk_errcode_t _mali_osk_device_data_get(_mali_osk_device_data *data); ++MALI_STATIC_INLINE mali_bool mali_group_gp_is_active(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(group->gp_core); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ return mali_gp_is_active(group->gp_core); ++} + -+/** @brief Find the pmu domain config from device data. -+ * -+ * @param domain_config_array used to store pmu domain config found in device data. -+ * @param array_size is the size of array domain_config_array. -+ */ -+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size); ++MALI_STATIC_INLINE mali_bool mali_group_pp_is_active(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(group->pp_core); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ return mali_pp_is_active(group->pp_core); ++} + -+/** @brief Get Mali PMU switch delay -+ * -+ *@return pmu switch delay if it is configured -+ */ -+u32 _mali_osk_get_pmu_switch_delay(void); ++MALI_STATIC_INLINE mali_bool mali_group_has_timed_out(struct mali_group *group) ++{ ++ unsigned long time_cost; ++ struct mali_group *tmp_group = group; + -+/** @brief Determines if Mali GPU has been configured with shared interrupts. -+ * -+ * @return MALI_TRUE if shared interrupts, MALI_FALSE if not. -+ */ -+mali_bool _mali_osk_shared_interrupts(void); ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); + -+/** @brief Initialize the gpu secure mode. -+ * The gpu secure mode will initially be in a disabled state. -+ * @return _MALI_OSK_ERR_OK on success, otherwise failure. -+ */ -+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_init(void); ++ /* if the group is in virtual need to use virtual_group's start time */ ++ if (mali_group_is_in_virtual(group)) { ++ tmp_group = mali_executor_get_virtual_group(); ++ } + -+/** @brief Deinitialize the gpu secure mode. -+ * @return _MALI_OSK_ERR_OK on success, otherwise failure. -+ */ -+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_deinit(void); ++ time_cost = _mali_osk_time_tickcount() - tmp_group->start_time; ++ if (_mali_osk_time_mstoticks(mali_max_job_runtime) <= time_cost) { ++ /* ++ * current tick is at or after timeout end time, ++ * so this is a valid timeout ++ */ ++ return MALI_TRUE; ++ } else { ++ /* ++ * Not a valid timeout. A HW interrupt probably beat ++ * us to it, and the timer wasn't properly deleted ++ * (async deletion used due to atomic context). ++ */ ++ return MALI_FALSE; ++ } ++} + -+/** @brief Reset GPU and enable the gpu secure mode. -+ * @return _MALI_OSK_ERR_OK on success, otherwise failure. -+ */ -+_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_enable(void); ++MALI_STATIC_INLINE void mali_group_mask_all_interrupts_gp(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(group->gp_core); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ return mali_gp_mask_all_interrupts(group->gp_core); ++} + -+/** @brief Reset GPU and disable the gpu secure mode. -+ * @return _MALI_OSK_ERR_OK on success, otherwise failure. -+ */ -+_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_disable(void); ++MALI_STATIC_INLINE void mali_group_mask_all_interrupts_pp(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(group->pp_core); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ return mali_pp_mask_all_interrupts(group->pp_core); ++} + -+/** @brief Check if the gpu secure mode has been enabled. -+ * @return MALI_TRUE if enabled, otherwise MALI_FALSE. -+ */ -+mali_bool _mali_osk_gpu_secure_mode_is_enabled(void); ++MALI_STATIC_INLINE void mali_group_enable_interrupts_gp( ++ struct mali_group *group, ++ enum mali_interrupt_result exceptions) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(group->gp_core); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ mali_gp_enable_interrupts(group->gp_core, exceptions); ++} + -+/** @brief Check if the gpu secure mode is supported. -+ * @return MALI_TRUE if supported, otherwise MALI_FALSE. -+ */ -+mali_bool _mali_osk_gpu_secure_mode_is_supported(void); ++MALI_STATIC_INLINE void mali_group_schedule_bottom_half_gp(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(group->gp_core); ++ _mali_osk_wq_schedule_work(group->bottom_half_work_gp); ++} + + -+/** @} */ /* end group _mali_osk_miscellaneous */ ++MALI_STATIC_INLINE void mali_group_schedule_bottom_half_pp(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(group->pp_core); ++ _mali_osk_wq_schedule_work(group->bottom_half_work_pp); ++} + -+#ifdef __cplusplus ++MALI_STATIC_INLINE void mali_group_schedule_bottom_half_mmu(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT_POINTER(group->mmu); ++ _mali_osk_wq_schedule_work(group->bottom_half_work_mmu); ++} ++ ++struct mali_pp_job *mali_group_complete_pp(struct mali_group *group, mali_bool success, u32 *sub_job); ++ ++struct mali_gp_job *mali_group_complete_gp(struct mali_group *group, mali_bool success); ++ ++#if defined(CONFIG_MALI400_PROFILING) ++MALI_STATIC_INLINE void mali_group_oom(struct mali_group *group) ++{ ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SUSPEND | ++ MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(0), ++ 0, 0, 0, 0, 0); +} +#endif + -+#endif /* __MALI_OSK_MALI_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_osk_profiling.h b/drivers/gpu/arm/mali400/mali/common/mali_osk_profiling.h ++struct mali_group *mali_group_get_glob_group(u32 index); ++u32 mali_group_get_glob_num_groups(void); ++ ++u32 mali_group_dump_state(struct mali_group *group, char *buf, u32 size); ++ ++ ++_mali_osk_errcode_t mali_group_upper_half_mmu(void *data); ++_mali_osk_errcode_t mali_group_upper_half_gp(void *data); ++_mali_osk_errcode_t mali_group_upper_half_pp(void *data); ++ ++MALI_STATIC_INLINE mali_bool mali_group_is_empty(struct mali_group *group) ++{ ++ MALI_DEBUG_ASSERT_POINTER(group); ++ MALI_DEBUG_ASSERT(mali_group_is_virtual(group)); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ return _mali_osk_list_empty(&group->group_list); ++} ++ ++#endif /* __MALI_GROUP_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_hw_core.c b/drivers/gpu/arm/mali400/mali/common/mali_hw_core.c new file mode 100644 -index 000000000..6e4583db1 +index 000000000..a813816e9 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_osk_profiling.h -@@ -0,0 +1,146 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_hw_core.c +@@ -0,0 +1,47 @@ +/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2011-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -292527,148 +295356,353 @@ index 000000000..6e4583db1 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_OSK_PROFILING_H__ -+#define __MALI_OSK_PROFILING_H__ ++#include "mali_hw_core.h" ++#include "mali_osk.h" ++#include "mali_kernel_common.h" ++#include "mali_osk_mali.h" + -+#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS) ++_mali_osk_errcode_t mali_hw_core_create(struct mali_hw_core *core, const _mali_osk_resource_t *resource, u32 reg_size) ++{ ++ core->phys_addr = resource->base; ++ core->phys_offset = resource->base - _mali_osk_resource_base_address(); ++ core->description = resource->description; ++ core->size = reg_size; + -+#include "mali_linux_trace.h" -+#include "mali_profiling_events.h" -+#include "mali_profiling_gator_api.h" ++ MALI_DEBUG_ASSERT(core->phys_offset < core->phys_addr); + -+#define MALI_PROFILING_MAX_BUFFER_ENTRIES 1048576 ++ if (_MALI_OSK_ERR_OK == _mali_osk_mem_reqregion(core->phys_addr, core->size, core->description)) { ++ core->mapped_registers = _mali_osk_mem_mapioregion(core->phys_addr, core->size, core->description); ++ if (NULL != core->mapped_registers) { ++ return _MALI_OSK_ERR_OK; ++ } else { ++ MALI_PRINT_ERROR(("Failed to map memory region for core %s at phys_addr 0x%08X\n", core->description, core->phys_addr)); ++ } ++ _mali_osk_mem_unreqregion(core->phys_addr, core->size); ++ } else { ++ MALI_PRINT_ERROR(("Failed to request memory region for core %s at phys_addr 0x%08X\n", core->description, core->phys_addr)); ++ } + -+#define MALI_PROFILING_NO_HW_COUNTER = ((u32)-1) ++ return _MALI_OSK_ERR_FAULT; ++} + -+/** @defgroup _mali_osk_profiling External profiling connectivity -+ * @{ */ ++void mali_hw_core_delete(struct mali_hw_core *core) ++{ ++ if (NULL != core->mapped_registers) { ++ _mali_osk_mem_unmapioregion(core->phys_addr, core->size, core->mapped_registers); ++ core->mapped_registers = NULL; ++ } ++ _mali_osk_mem_unreqregion(core->phys_addr, core->size); ++} +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_hw_core.h b/drivers/gpu/arm/mali400/mali/common/mali_hw_core.h +new file mode 100644 +index 000000000..38d96e240 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_hw_core.h +@@ -0,0 +1,111 @@ ++/* ++ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ ++ ++#ifndef __MALI_HW_CORE_H__ ++#define __MALI_HW_CORE_H__ ++ ++#include "mali_osk.h" ++#include "mali_kernel_common.h" + +/** -+ * Initialize the profiling module. -+ * @return _MALI_OSK_ERR_OK on success, otherwise failure. ++ * The common parts for all Mali HW cores (GP, PP, MMU, L2 and PMU) ++ * This struct is embedded inside all core specific structs. + */ -+_mali_osk_errcode_t _mali_osk_profiling_init(mali_bool auto_start); ++struct mali_hw_core { ++ uintptr_t phys_addr; /**< Physical address of the registers */ ++ u32 phys_offset; /**< Offset from start of Mali to registers */ ++ u32 size; /**< Size of registers */ ++ mali_io_address mapped_registers; /**< Virtual mapping of the registers */ ++ const char *description; /**< Name of unit (as specified in device configuration) */ ++}; ++ ++#define MALI_REG_POLL_COUNT_FAST 1000000 ++#define MALI_REG_POLL_COUNT_SLOW 1000000 + +/* -+ * Terminate the profiling module. ++ * GP and PP core translate their int_stat/rawstat into one of these + */ -+void _mali_osk_profiling_term(void); ++enum mali_interrupt_result { ++ MALI_INTERRUPT_RESULT_NONE, ++ MALI_INTERRUPT_RESULT_SUCCESS, ++ MALI_INTERRUPT_RESULT_SUCCESS_VS, ++ MALI_INTERRUPT_RESULT_SUCCESS_PLBU, ++ MALI_INTERRUPT_RESULT_OOM, ++ MALI_INTERRUPT_RESULT_ERROR ++}; + -+/** -+ * Stop the profile sampling operation. -+ */ -+void _mali_osk_profiling_stop_sampling(u32 pid); ++_mali_osk_errcode_t mali_hw_core_create(struct mali_hw_core *core, const _mali_osk_resource_t *resource, u32 reg_size); ++void mali_hw_core_delete(struct mali_hw_core *core); + -+/** -+ * Start recording profiling data -+ * -+ * The specified limit will determine how large the capture buffer is. -+ * MALI_PROFILING_MAX_BUFFER_ENTRIES determines the maximum size allowed by the device driver. -+ * -+ * @param limit The desired maximum number of events to record on input, the actual maximum on output. -+ * @return _MALI_OSK_ERR_OK on success, otherwise failure. -+ */ -+_mali_osk_errcode_t _mali_osk_profiling_start(u32 *limit); ++MALI_STATIC_INLINE u32 mali_hw_core_register_read(struct mali_hw_core *core, u32 relative_address) ++{ ++ u32 read_val; ++ read_val = _mali_osk_mem_ioread32(core->mapped_registers, relative_address); ++ MALI_DEBUG_PRINT(6, ("register_read for core %s, relative addr=0x%04X, val=0x%08X\n", ++ core->description, relative_address, read_val)); ++ return read_val; ++} + -+/** -+ * Add an profiling event -+ * -+ * @param event_id The event identificator. -+ * @param data0 First data parameter, depending on event_id specified. -+ * @param data1 Second data parameter, depending on event_id specified. -+ * @param data2 Third data parameter, depending on event_id specified. -+ * @param data3 Fourth data parameter, depending on event_id specified. -+ * @param data4 Fifth data parameter, depending on event_id specified. -+ */ -+void _mali_osk_profiling_add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4); ++MALI_STATIC_INLINE void mali_hw_core_register_write_relaxed(struct mali_hw_core *core, u32 relative_address, u32 new_val) ++{ ++ MALI_DEBUG_PRINT(6, ("register_write_relaxed for core %s, relative addr=0x%04X, val=0x%08X\n", ++ core->description, relative_address, new_val)); ++ _mali_osk_mem_iowrite32_relaxed(core->mapped_registers, relative_address, new_val); ++} + -+/** -+ * Report a hardware counter event. -+ * -+ * @param counter_id The ID of the counter. -+ * @param value The value of the counter. ++/* Conditionally write a register. ++ * The register will only be written if the new value is different from the old_value. ++ * If the new value is different, the old value will also be updated */ ++MALI_STATIC_INLINE void mali_hw_core_register_write_relaxed_conditional(struct mali_hw_core *core, u32 relative_address, u32 new_val, const u32 old_val) ++{ ++ MALI_DEBUG_PRINT(6, ("register_write_relaxed for core %s, relative addr=0x%04X, val=0x%08X\n", ++ core->description, relative_address, new_val)); ++ if (old_val != new_val) { ++ _mali_osk_mem_iowrite32_relaxed(core->mapped_registers, relative_address, new_val); ++ } ++} ++ ++MALI_STATIC_INLINE void mali_hw_core_register_write(struct mali_hw_core *core, u32 relative_address, u32 new_val) ++{ ++ MALI_DEBUG_PRINT(6, ("register_write for core %s, relative addr=0x%04X, val=0x%08X\n", ++ core->description, relative_address, new_val)); ++ _mali_osk_mem_iowrite32(core->mapped_registers, relative_address, new_val); ++} ++ ++MALI_STATIC_INLINE void mali_hw_core_register_write_array_relaxed(struct mali_hw_core *core, u32 relative_address, u32 *write_array, u32 nr_of_regs) ++{ ++ u32 i; ++ MALI_DEBUG_PRINT(6, ("register_write_array: for core %s, relative addr=0x%04X, nr of regs=%u\n", ++ core->description, relative_address, nr_of_regs)); ++ ++ /* Do not use burst writes against the registers */ ++ for (i = 0; i < nr_of_regs; i++) { ++ mali_hw_core_register_write_relaxed(core, relative_address + i * 4, write_array[i]); ++ } ++} ++ ++/* Conditionally write a set of registers. ++ * The register will only be written if the new value is different from the old_value. ++ * If the new value is different, the old value will also be updated */ ++MALI_STATIC_INLINE void mali_hw_core_register_write_array_relaxed_conditional(struct mali_hw_core *core, u32 relative_address, u32 *write_array, u32 nr_of_regs, const u32 *old_array) ++{ ++ u32 i; ++ MALI_DEBUG_PRINT(6, ("register_write_array: for core %s, relative addr=0x%04X, nr of regs=%u\n", ++ core->description, relative_address, nr_of_regs)); ++ ++ /* Do not use burst writes against the registers */ ++ for (i = 0; i < nr_of_regs; i++) { ++ if (old_array[i] != write_array[i]) { ++ mali_hw_core_register_write_relaxed(core, relative_address + i * 4, write_array[i]); ++ } ++ } ++} ++ ++#endif /* __MALI_HW_CORE_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_kernel_common.h b/drivers/gpu/arm/mali400/mali/common/mali_kernel_common.h +new file mode 100644 +index 000000000..6a8f0f011 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_kernel_common.h +@@ -0,0 +1,181 @@ ++/* ++ * Copyright (C) 2010, 2012-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+/* Call Linux tracepoint directly */ -+#define _mali_osk_profiling_report_hw_counter(counter_id, value) trace_mali_hw_counter(counter_id, value) ++#ifndef __MALI_KERNEL_COMMON_H__ ++#define __MALI_KERNEL_COMMON_H__ + -+/** -+ * Report SW counters ++#include "mali_osk.h" ++ ++/* Make sure debug is defined when it should be */ ++#ifndef DEBUG ++#if defined(_DEBUG) ++#define DEBUG ++#endif ++#endif ++ ++/* The file include several useful macros for error checking, debugging and printing. ++ * - MALI_PRINTF(...) Do not use this function: Will be included in Release builds. ++ * - MALI_DEBUG_PRINT(nr, (X) ) Prints the second argument if nr<=MALI_DEBUG_LEVEL. ++ * - MALI_DEBUG_ERROR( (X) ) Prints an errortext, a source trace, and the given error message. ++ * - MALI_DEBUG_ASSERT(exp,(X)) If the asserted expr is false, the program will exit. ++ * - MALI_DEBUG_ASSERT_POINTER(pointer) Triggers if the pointer is a zero pointer. ++ * - MALI_DEBUG_CODE( X ) The code inside the macro is only compiled in Debug builds. + * -+ * @param counters array of counter values ++ * The (X) means that you must add an extra parenthesis around the argumentlist. ++ * ++ * The printf function: MALI_PRINTF(...) is routed to _mali_osk_debugmsg ++ * ++ * Suggested range for the DEBUG-LEVEL is [1:6] where ++ * [1:2] Is messages with highest priority, indicate possible errors. ++ * [3:4] Is messages with medium priority, output important variables. ++ * [5:6] Is messages with low priority, used during extensive debugging. + */ -+void _mali_osk_profiling_report_sw_counters(u32 *counters); + -+void _mali_osk_profiling_record_global_counters(int counter_id, u32 value); ++/** ++* Fundamental error macro. Reports an error code. This is abstracted to allow us to ++* easily switch to a different error reporting method if we want, and also to allow ++* us to search for error returns easily. ++* ++* Note no closing semicolon - this is supplied in typical usage: ++* ++* MALI_ERROR(MALI_ERROR_OUT_OF_MEMORY); ++*/ ++#define MALI_ERROR(error_code) return (error_code) + +/** -+ * Stop recording profiling data ++ * Basic error macro, to indicate success. ++ * Note no closing semicolon - this is supplied in typical usage: + * -+ * @param count Returns the number of recorded events. -+ * @return _MALI_OSK_ERR_OK on success, otherwise failure. ++ * MALI_SUCCESS; + */ -+_mali_osk_errcode_t _mali_osk_profiling_stop(u32 *count); ++#define MALI_SUCCESS MALI_ERROR(_MALI_OSK_ERR_OK) + +/** -+ * Retrieves the number of events that can be retrieved ++ * Basic error macro. This checks whether the given condition is true, and if not returns ++ * from this function with the supplied error code. This is a macro so that we can override it ++ * for stress testing. + * -+ * @return The number of recorded events that can be retrieved. ++ * Note that this uses the do-while-0 wrapping to ensure that we don't get problems with dangling ++ * else clauses. Note also no closing semicolon - this is supplied in typical usage: ++ * ++ * MALI_CHECK((p!=NULL), ERROR_NO_OBJECT); + */ -+u32 _mali_osk_profiling_get_count(void); ++#define MALI_CHECK(condition, error_code) do { if(!(condition)) MALI_ERROR(error_code); } while(0) + +/** -+ * Retrieve an event -+ * -+ * @param index Event index (start with 0 and continue until this function fails to retrieve all events) -+ * @param timestamp The timestamp for the retrieved event will be stored here. -+ * @param event_id The event ID for the retrieved event will be stored here. -+ * @param data The 5 data values for the retrieved event will be stored here. -+ * @return _MALI_OSK_ERR_OK on success, otherwise failure. ++ * Error propagation macro. If the expression given is anything other than ++ * _MALI_OSK_NO_ERROR, then the value is returned from the enclosing function ++ * as an error code. This effectively acts as a guard clause, and propagates ++ * error values up the call stack. This uses a temporary value to ensure that ++ * the error expression is not evaluated twice. ++ * If the counter for forcing a failure has been set using _mali_force_error, ++ * this error will be returned without evaluating the expression in ++ * MALI_CHECK_NO_ERROR + */ -+_mali_osk_errcode_t _mali_osk_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5]); ++#define MALI_CHECK_NO_ERROR(expression) \ ++ do { _mali_osk_errcode_t _check_no_error_result=(expression); \ ++ if(_check_no_error_result != _MALI_OSK_ERR_OK) \ ++ MALI_ERROR(_check_no_error_result); \ ++ } while(0) + +/** -+ * Clear the recorded buffer. -+ * -+ * This is needed in order to start another recording. -+ * -+ * @return _MALI_OSK_ERR_OK on success, otherwise failure. ++ * Pointer check macro. Checks non-null pointer. + */ -+_mali_osk_errcode_t _mali_osk_profiling_clear(void); ++#define MALI_CHECK_NON_NULL(pointer, error_code) MALI_CHECK( ((pointer)!=NULL), (error_code) ) + +/** -+ * Checks if a recording of profiling data is in progress ++ * Error macro with goto. This checks whether the given condition is true, and if not jumps ++ * to the specified label using a goto. The label must therefore be local to the function in ++ * which this macro appears. This is most usually used to execute some clean-up code before ++ * exiting with a call to ERROR. + * -+ * @return MALI_TRUE if recording of profiling data is in progress, MALI_FALSE if not ++ * Like the other macros, this is a macro to allow us to override the condition if we wish, ++ * e.g. to force an error during stress testing. + */ -+mali_bool _mali_osk_profiling_is_recording(void); ++#define MALI_CHECK_GOTO(condition, label) do { if(!(condition)) goto label; } while(0) + +/** -+ * Checks if profiling data is available for retrival -+ * -+ * @return MALI_TRUE if profiling data is avaiable, MALI_FALSE if not ++ * Explicitly ignore a parameter passed into a function, to suppress compiler warnings. ++ * Should only be used with parameter names. + */ -+mali_bool _mali_osk_profiling_have_recording(void); ++#define MALI_IGNORE(x) x=x + -+/** @} */ /* end group _mali_osk_profiling */ ++#if defined(CONFIG_MALI_QUIET) ++#define MALI_PRINTF(args) ++#else ++#define MALI_PRINTF(args) _mali_osk_dbgmsg args; ++#endif + -+#else /* defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_TRACEPOINTS) */ ++#define MALI_PRINT_ERROR(args) do{ \ ++ MALI_PRINTF(("Mali: ERR: %s\n" ,__FILE__)); \ ++ MALI_PRINTF((" %s()%4d\n ", __FUNCTION__, __LINE__)) ; \ ++ MALI_PRINTF(args); \ ++ MALI_PRINTF(("\n")); \ ++ } while(0) + -+/* Dummy add_event, for when profiling is disabled. */ ++#define MALI_PRINT(args) do{ \ ++ MALI_PRINTF(("Mali: ")); \ ++ MALI_PRINTF(args); \ ++ } while (0) + -+#define _mali_osk_profiling_add_event(event_id, data0, data1, data2, data3, data4) ++#ifdef DEBUG ++#ifndef mali_debug_level ++extern int mali_debug_level; ++#endif + -+#endif /* defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_TRACEPOINTS) */ ++#define MALI_DEBUG_CODE(code) code ++#define MALI_DEBUG_PRINT(level, args) do { \ ++ if((level) <= mali_debug_level)\ ++ {MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); } \ ++ } while (0) + -+#endif /* __MALI_OSK_PROFILING_H__ */ ++#define MALI_DEBUG_PRINT_ERROR(args) MALI_PRINT_ERROR(args) + ++#define MALI_DEBUG_PRINT_IF(level,condition,args) \ ++ if((condition)&&((level) <= mali_debug_level))\ ++ {MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); } + -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_osk_types.h b/drivers/gpu/arm/mali400/mali/common/mali_osk_types.h ++#define MALI_DEBUG_PRINT_ELSE(level, args)\ ++ else if((level) <= mali_debug_level)\ ++ { MALI_PRINTF(("Mali<" #level ">: ")); MALI_PRINTF(args); } ++ ++/** ++ * @note these variants of DEBUG ASSERTS will cause a debugger breakpoint ++ * to be entered (see _mali_osk_break() ). An alternative would be to call ++ * _mali_osk_abort(), on OSs that support it. ++ */ ++#define MALI_DEBUG_PRINT_ASSERT(condition, args) do {if( !(condition)) { MALI_PRINT_ERROR(args); _mali_osk_break(); } } while(0) ++#define MALI_DEBUG_ASSERT_POINTER(pointer) do {if( (pointer)== NULL) {MALI_PRINT_ERROR(("NULL pointer " #pointer)); _mali_osk_break();} } while(0) ++#define MALI_DEBUG_ASSERT(condition) do {if( !(condition)) {MALI_PRINT_ERROR(("ASSERT failed: " #condition )); _mali_osk_break();} } while(0) ++ ++#else /* DEBUG */ ++ ++#define MALI_DEBUG_CODE(code) ++#define MALI_DEBUG_PRINT(string,args) do {} while(0) ++#define MALI_DEBUG_PRINT_ERROR(args) do {} while(0) ++#define MALI_DEBUG_PRINT_IF(level,condition,args) do {} while(0) ++#define MALI_DEBUG_PRINT_ELSE(level,condition,args) do {} while(0) ++#define MALI_DEBUG_PRINT_ASSERT(condition,args) do {} while(0) ++#define MALI_DEBUG_ASSERT_POINTER(pointer) do {} while(0) ++#define MALI_DEBUG_ASSERT(condition) do {} while(0) ++ ++#endif /* DEBUG */ ++ ++/** ++ * variables from user space cannot be dereferenced from kernel space; tagging them ++ * with __user allows the GCC compiler to generate a warning. Other compilers may ++ * not support this so we define it here as an empty macro if the compiler doesn't ++ * define it. ++ */ ++#ifndef __user ++#define __user ++#endif ++ ++#endif /* __MALI_KERNEL_COMMON_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_kernel_core.c b/drivers/gpu/arm/mali400/mali/common/mali_kernel_core.c new file mode 100644 -index 000000000..b6fa94ce1 +index 000000000..87f97b710 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_osk_types.h -@@ -0,0 +1,471 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_kernel_core.c +@@ -0,0 +1,1349 @@ +/* + * Copyright (C) 2010-2017 ARM Limited. All rights reserved. + * @@ -292679,1843 +295713,1862 @@ index 000000000..b6fa94ce1 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+/** -+ * @file mali_osk_types.h -+ * Defines types of the OS abstraction layer for the kernel device driver (OSK) -+ */ -+ -+#ifndef __MALI_OSK_TYPES_H__ -+#define __MALI_OSK_TYPES_H__ -+ -+#ifdef __cplusplus -+extern "C" { ++#include "mali_kernel_common.h" ++#include "mali_session.h" ++#include "mali_osk.h" ++#include "mali_osk_mali.h" ++#include "mali_ukk.h" ++#include "mali_kernel_core.h" ++#include "mali_memory.h" ++#include "mali_mem_validation.h" ++#include "mali_mmu.h" ++#include "mali_mmu_page_directory.h" ++#include "mali_dlbu.h" ++#include "mali_broadcast.h" ++#include "mali_gp.h" ++#include "mali_pp.h" ++#include "mali_executor.h" ++#include "mali_pp_job.h" ++#include "mali_group.h" ++#include "mali_pm.h" ++#include "mali_pmu.h" ++#include "mali_scheduler.h" ++#include "mali_kernel_utilization.h" ++#include "mali_l2_cache.h" ++#include "mali_timeline.h" ++#include "mali_soft_job.h" ++#include "mali_pm_domain.h" ++#if defined(CONFIG_MALI400_PROFILING) ++#include "mali_osk_profiling.h" +#endif -+ -+/** -+ * @addtogroup uddapi Unified Device Driver (UDD) APIs -+ * -+ * @{ -+ */ -+ -+/** -+ * @addtogroup oskapi UDD OS Abstraction for Kernel-side (OSK) APIs -+ * -+ * @{ -+ */ -+ -+/** @defgroup _mali_osk_miscellaneous OSK Miscellaneous functions, constants and types -+ * @{ */ -+ -+/* Define integer types used by OSK. Note: these currently clash with Linux so we only define them if not defined already */ -+#ifndef __KERNEL__ -+typedef unsigned char u8; -+typedef signed char s8; -+typedef unsigned short u16; -+typedef signed short s16; -+typedef unsigned int u32; -+typedef signed int s32; -+typedef unsigned long long u64; -+#define BITS_PER_LONG (sizeof(long)*8) -+#else -+/* Ensure Linux types u32, etc. are defined */ -+#include ++#if defined(CONFIG_MALI400_INTERNAL_PROFILING) ++#include "mali_profiling_internal.h" +#endif -+ -+/** @brief Mali Boolean type which uses MALI_TRUE and MALI_FALSE -+ */ -+typedef unsigned long mali_bool; -+ -+#ifndef MALI_TRUE -+#define MALI_TRUE ((mali_bool)1) ++#include "mali_control_timer.h" ++#include "mali_dvfs_policy.h" ++#include ++#include ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++#include ++#else ++#include +#endif -+ -+#ifndef MALI_FALSE -+#define MALI_FALSE ((mali_bool)0) +#endif + -+#define MALI_HW_CORE_NO_COUNTER ((u32)-1) ++#define MALI_SHARED_MEMORY_DEFAULT_SIZE 0xffffffff + ++/* Mali GPU memory. Real values come from module parameter or from device specific data */ ++unsigned int mali_dedicated_mem_start = 0; ++unsigned int mali_dedicated_mem_size = 0; + -+#define MALI_S32_MAX 0x7fffffff ++/* Default shared memory size is set to 4G. */ ++unsigned int mali_shared_mem_size = MALI_SHARED_MEMORY_DEFAULT_SIZE; + -+/** -+ * @brief OSK Error codes -+ * -+ * Each OS may use its own set of error codes, and may require that the -+ * User/Kernel interface take certain error code. This means that the common -+ * error codes need to be sufficiently rich to pass the correct error code -+ * thorugh from the OSK to U/K layer, across all OSs. -+ * -+ * The result is that some error codes will appear redundant on some OSs. -+ * Under all OSs, the OSK layer must translate native OS error codes to -+ * _mali_osk_errcode_t codes. Similarly, the U/K layer must translate from -+ * _mali_osk_errcode_t codes to native OS error codes. -+ */ -+typedef enum { -+ _MALI_OSK_ERR_OK = 0, /**< Success. */ -+ _MALI_OSK_ERR_FAULT = -1, /**< General non-success */ -+ _MALI_OSK_ERR_INVALID_FUNC = -2, /**< Invalid function requested through User/Kernel interface (e.g. bad IOCTL number) */ -+ _MALI_OSK_ERR_INVALID_ARGS = -3, /**< Invalid arguments passed through User/Kernel interface */ -+ _MALI_OSK_ERR_NOMEM = -4, /**< Insufficient memory */ -+ _MALI_OSK_ERR_TIMEOUT = -5, /**< Timeout occurred */ -+ _MALI_OSK_ERR_RESTARTSYSCALL = -6, /**< Special: On certain OSs, must report when an interruptable mutex is interrupted. Ignore otherwise. */ -+ _MALI_OSK_ERR_ITEM_NOT_FOUND = -7, /**< Table Lookup failed */ -+ _MALI_OSK_ERR_BUSY = -8, /**< Device/operation is busy. Try again later */ -+ _MALI_OSK_ERR_UNSUPPORTED = -9, /**< Optional part of the interface used, and is unsupported */ -+} _mali_osk_errcode_t; -+ -+/** @} */ /* end group _mali_osk_miscellaneous */ ++/* Frame buffer memory to be accessible by Mali GPU */ ++int mali_fb_start = 0; ++int mali_fb_size = 0; + -+/** @defgroup _mali_osk_wq OSK work queues -+ * @{ */ ++/* Mali max job runtime */ ++extern int mali_max_job_runtime; + -+/** @brief Private type for work objects */ -+typedef struct _mali_osk_wq_work_s _mali_osk_wq_work_t; -+typedef struct _mali_osk_wq_delayed_work_s _mali_osk_wq_delayed_work_t; ++/** Start profiling from module load? */ ++int mali_boot_profiling = 0; + -+/** @brief Work queue handler function -+ * -+ * This function type is called when the work is scheduled by the work queue, -+ * e.g. as an IRQ bottom-half handler. -+ * -+ * Refer to \ref _mali_osk_wq_schedule_work() for more information on the -+ * work-queue and work handlers. -+ * -+ * @param arg resource-specific data -+ */ -+typedef void (*_mali_osk_wq_work_handler_t)(void *arg); ++/** Limits for the number of PP cores behind each L2 cache. */ ++int mali_max_pp_cores_group_1 = 0xFF; ++int mali_max_pp_cores_group_2 = 0xFF; + -+/* @} */ /* end group _mali_osk_wq */ ++int mali_inited_pp_cores_group_1 = 0; ++int mali_inited_pp_cores_group_2 = 0; + -+/** @defgroup _mali_osk_irq OSK IRQ handling -+ * @{ */ ++static _mali_product_id_t global_product_id = _MALI_PRODUCT_ID_UNKNOWN; ++static uintptr_t global_gpu_base_address = 0; ++static u32 global_gpu_major_version = 0; ++static u32 global_gpu_minor_version = 0; + -+/** @brief Private type for IRQ handling objects */ -+typedef struct _mali_osk_irq_t_struct _mali_osk_irq_t; ++mali_bool mali_gpu_class_is_mali450 = MALI_FALSE; ++mali_bool mali_gpu_class_is_mali470 = MALI_FALSE; + -+/** @brief Optional function to trigger an irq from a resource -+ * -+ * This function is implemented by the common layer to allow probing of a resource's IRQ. -+ * @param arg resource-specific data */ -+typedef void (*_mali_osk_irq_trigger_t)(void *arg); ++static _mali_osk_errcode_t mali_set_global_gpu_base_address(void) ++{ ++ _mali_osk_errcode_t err = _MALI_OSK_ERR_OK; + -+/** @brief Optional function to acknowledge an irq from a resource -+ * -+ * This function is implemented by the common layer to allow probing of a resource's IRQ. -+ * @param arg resource-specific data -+ * @return _MALI_OSK_ERR_OK if the IRQ was successful, or a suitable _mali_osk_errcode_t on failure. */ -+typedef _mali_osk_errcode_t (*_mali_osk_irq_ack_t)(void *arg); ++ global_gpu_base_address = _mali_osk_resource_base_address(); ++ if (0 == global_gpu_base_address) { ++ err = _MALI_OSK_ERR_ITEM_NOT_FOUND; ++ } + -+/** @brief IRQ 'upper-half' handler callback. -+ * -+ * This function is implemented by the common layer to do the initial handling of a -+ * resource's IRQ. This maps on to the concept of an ISR that does the minimum -+ * work necessary before handing off to an IST. -+ * -+ * The communication of the resource-specific data from the ISR to the IST is -+ * handled by the OSK implementation. -+ * -+ * On most systems, the IRQ upper-half handler executes in IRQ context. -+ * Therefore, the system may have restrictions about what can be done in this -+ * context -+ * -+ * If an IRQ upper-half handler requires more work to be done than can be -+ * acheived in an IRQ context, then it may defer the work with -+ * _mali_osk_wq_schedule_work(). Refer to \ref _mali_osk_wq_create_work() for -+ * more information. -+ * -+ * @param arg resource-specific data -+ * @return _MALI_OSK_ERR_OK if the IRQ was correctly handled, or a suitable -+ * _mali_osk_errcode_t otherwise. -+ */ -+typedef _mali_osk_errcode_t (*_mali_osk_irq_uhandler_t)(void *arg); ++ return err; ++} + ++static u32 mali_get_bcast_id(_mali_osk_resource_t *resource_pp) ++{ ++ switch (resource_pp->base - global_gpu_base_address) { ++ case 0x08000: ++ case 0x20000: /* fall-through for aliased mapping */ ++ return 0x01; ++ case 0x0A000: ++ case 0x22000: /* fall-through for aliased mapping */ ++ return 0x02; ++ case 0x0C000: ++ case 0x24000: /* fall-through for aliased mapping */ ++ return 0x04; ++ case 0x0E000: ++ case 0x26000: /* fall-through for aliased mapping */ ++ return 0x08; ++ case 0x28000: ++ return 0x10; ++ case 0x2A000: ++ return 0x20; ++ case 0x2C000: ++ return 0x40; ++ case 0x2E000: ++ return 0x80; ++ default: ++ return 0; ++ } ++} + -+/** @} */ /* end group _mali_osk_irq */ ++static _mali_osk_errcode_t mali_parse_product_info(void) ++{ ++ _mali_osk_resource_t first_pp_resource; + ++ /* Find the first PP core resource (again) */ ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_PP0, &first_pp_resource)) { ++ /* Create a dummy PP object for this core so that we can read the version register */ ++ struct mali_group *group = mali_group_create(NULL, NULL, NULL, MALI_DOMAIN_INDEX_PP0); ++ if (NULL != group) { ++ struct mali_pp_core *pp_core = mali_pp_create(&first_pp_resource, group, MALI_FALSE, mali_get_bcast_id(&first_pp_resource)); ++ if (NULL != pp_core) { ++ u32 pp_version; + -+/** @defgroup _mali_osk_atomic OSK Atomic counters -+ * @{ */ ++ pp_version = mali_pp_core_get_version(pp_core); + -+/** @brief Public type of atomic counters -+ * -+ * This is public for allocation on stack. On systems that support it, this is just a single 32-bit value. -+ * On others, it could be encapsulating an object stored elsewhere. -+ * -+ * Regardless of implementation, the \ref _mali_osk_atomic functions \b must be used -+ * for all accesses to the variable's value, even if atomicity is not required. -+ * Do not access u.val or u.obj directly. -+ */ -+typedef struct { -+ union { -+ u32 val; -+ void *obj; -+ } u; -+} _mali_osk_atomic_t; -+/** @} */ /* end group _mali_osk_atomic */ ++ mali_group_delete(group); + ++ global_gpu_major_version = (pp_version >> 8) & 0xFF; ++ global_gpu_minor_version = pp_version & 0xFF; + -+/** @defgroup _mali_osk_lock OSK Mutual Exclusion Locks -+ * @{ */ ++ switch (pp_version >> 16) { ++ case MALI200_PP_PRODUCT_ID: ++ global_product_id = _MALI_PRODUCT_ID_MALI200; ++ MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-200 r%up%u\n", global_gpu_major_version, global_gpu_minor_version)); ++ MALI_PRINT_ERROR(("Mali-200 is not supported by this driver.\n")); ++ _mali_osk_abort(); ++ break; ++ case MALI300_PP_PRODUCT_ID: ++ global_product_id = _MALI_PRODUCT_ID_MALI300; ++ MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-300 r%up%u\n", global_gpu_major_version, global_gpu_minor_version)); ++ break; ++ case MALI400_PP_PRODUCT_ID: ++ global_product_id = _MALI_PRODUCT_ID_MALI400; ++ MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-400 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version)); ++ break; ++ case MALI450_PP_PRODUCT_ID: ++ global_product_id = _MALI_PRODUCT_ID_MALI450; ++ MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-450 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version)); ++ break; ++ case MALI470_PP_PRODUCT_ID: ++ global_product_id = _MALI_PRODUCT_ID_MALI470; ++ MALI_DEBUG_PRINT(2, ("Found Mali GPU Mali-470 MP r%up%u\n", global_gpu_major_version, global_gpu_minor_version)); ++ break; ++ default: ++ MALI_DEBUG_PRINT(2, ("Found unknown Mali GPU (r%up%u)\n", global_gpu_major_version, global_gpu_minor_version)); ++ return _MALI_OSK_ERR_FAULT; ++ } + ++ return _MALI_OSK_ERR_OK; ++ } else { ++ MALI_PRINT_ERROR(("Failed to create initial PP object\n")); ++ } ++ } else { ++ MALI_PRINT_ERROR(("Failed to create initial group object\n")); ++ } ++ } else { ++ MALI_PRINT_ERROR(("First PP core not specified in config file\n")); ++ } + -+/** @brief OSK Mutual Exclusion Lock ordered list -+ * -+ * This lists the various types of locks in the system and is used to check -+ * that locks are taken in the correct order. -+ * -+ * - Holding more than one lock of the same order at the same time is not -+ * allowed. -+ * - Taking a lock of a lower order than the highest-order lock currently held -+ * is not allowed. -+ * -+ */ -+typedef enum { -+ /* || Locks || */ -+ /* || must be || */ -+ /* _||_ taken in _||_ */ -+ /* \ / this \ / */ -+ /* \/ order! \/ */ ++ return _MALI_OSK_ERR_FAULT; ++} + -+ _MALI_OSK_LOCK_ORDER_FIRST = 0, ++static void mali_delete_groups(void) ++{ ++ struct mali_group *group; + -+ _MALI_OSK_LOCK_ORDER_SESSIONS, -+ _MALI_OSK_LOCK_ORDER_MEM_SESSION, -+ _MALI_OSK_LOCK_ORDER_MEM_INFO, -+ _MALI_OSK_LOCK_ORDER_MEM_PT_CACHE, -+ _MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP, -+ _MALI_OSK_LOCK_ORDER_PM_EXECUTION, -+ _MALI_OSK_LOCK_ORDER_EXECUTOR, -+ _MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM, -+ _MALI_OSK_LOCK_ORDER_SCHEDULER, -+ _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED, -+ _MALI_OSK_LOCK_ORDER_PROFILING, -+ _MALI_OSK_LOCK_ORDER_L2, -+ _MALI_OSK_LOCK_ORDER_L2_COMMAND, -+ _MALI_OSK_LOCK_ORDER_UTILIZATION, -+ _MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS, -+ _MALI_OSK_LOCK_ORDER_PM_STATE, ++ group = mali_group_get_glob_group(0); ++ while (NULL != group) { ++ mali_group_delete(group); ++ group = mali_group_get_glob_group(0); ++ } + -+ _MALI_OSK_LOCK_ORDER_LAST, -+} _mali_osk_lock_order_t; ++ MALI_DEBUG_ASSERT(0 == mali_group_get_glob_num_groups()); ++} + ++static void mali_delete_l2_cache_cores(void) ++{ ++ struct mali_l2_cache_core *l2; + -+/** @brief OSK Mutual Exclusion Lock flags type -+ * -+ * - Any lock can use the order parameter. -+ */ -+typedef enum { -+ _MALI_OSK_LOCKFLAG_UNORDERED = 0x1, /**< Indicate that the order of this lock should not be checked */ -+ _MALI_OSK_LOCKFLAG_ORDERED = 0x2, -+ /** @enum _mali_osk_lock_flags_t -+ * -+ * Flags from 0x10000--0x80000000 are RESERVED for User-mode */ ++ l2 = mali_l2_cache_core_get_glob_l2_core(0); ++ while (NULL != l2) { ++ mali_l2_cache_delete(l2); ++ l2 = mali_l2_cache_core_get_glob_l2_core(0); ++ } + -+} _mali_osk_lock_flags_t; ++ MALI_DEBUG_ASSERT(0 == mali_l2_cache_core_get_glob_num_l2_cores()); ++} + -+/** @brief Mutual Exclusion Lock Mode Optimization hint -+ * -+ * The lock mode is used to implement the read/write locking of locks when we call -+ * functions _mali_osk_mutex_rw_init/wait/signal/term/. In this case, the RO mode can -+ * be used to allow multiple concurrent readers, but no writers. The RW mode is used for -+ * writers, and so will wait for all readers to release the lock (if any present). -+ * Further readers and writers will wait until the writer releases the lock. -+ * -+ * The mode is purely an optimization hint: for example, it is permissible for -+ * all locks to behave in RW mode, regardless of that supplied. -+ * -+ * It is an error to attempt to use locks in anything other that RW mode when -+ * call functions _mali_osk_mutex_rw_wait/signal(). -+ * -+ */ -+typedef enum { -+ _MALI_OSK_LOCKMODE_UNDEF = -1, /**< Undefined lock mode. For internal use only */ -+ _MALI_OSK_LOCKMODE_RW = 0x0, /**< Read-write mode, default. All readers and writers are mutually-exclusive */ -+ _MALI_OSK_LOCKMODE_RO, /**< Read-only mode, to support multiple concurrent readers, but mutual exclusion in the presence of writers. */ -+ /** @enum _mali_osk_lock_mode_t -+ * -+ * Lock modes 0x40--0x7F are RESERVED for User-mode */ -+} _mali_osk_lock_mode_t; ++static struct mali_l2_cache_core *mali_create_l2_cache_core(_mali_osk_resource_t *resource, u32 domain_index) ++{ ++ struct mali_l2_cache_core *l2_cache = NULL; + -+/** @brief Private types for Mutual Exclusion lock objects */ -+typedef struct _mali_osk_lock_debug_s _mali_osk_lock_debug_t; -+typedef struct _mali_osk_spinlock_s _mali_osk_spinlock_t; -+typedef struct _mali_osk_spinlock_irq_s _mali_osk_spinlock_irq_t; -+typedef struct _mali_osk_mutex_s _mali_osk_mutex_t; -+typedef struct _mali_osk_mutex_rw_s _mali_osk_mutex_rw_t; ++ if (NULL != resource) { + -+/** @} */ /* end group _mali_osk_lock */ ++ MALI_DEBUG_PRINT(3, ("Found L2 cache %s\n", resource->description)); + -+/** @defgroup _mali_osk_low_level_memory OSK Low-level Memory Operations -+ * @{ */ ++ l2_cache = mali_l2_cache_create(resource, domain_index); ++ if (NULL == l2_cache) { ++ MALI_PRINT_ERROR(("Failed to create L2 cache object\n")); ++ return NULL; ++ } ++ } ++ MALI_DEBUG_PRINT(3, ("Created L2 cache core object\n")); + -+/** -+ * @brief Private data type for use in IO accesses to/from devices. -+ * -+ * This represents some range that is accessible from the device. Examples -+ * include: -+ * - Device Registers, which could be readable and/or writeable. -+ * - Memory that the device has access to, for storing configuration structures. -+ * -+ * Access to this range must be made through the _mali_osk_mem_ioread32() and -+ * _mali_osk_mem_iowrite32() functions. -+ */ -+typedef struct _mali_io_address *mali_io_address; ++ return l2_cache; ++} + -+/** @defgroup _MALI_OSK_CPU_PAGE CPU Physical page size macros. -+ * -+ * The order of the page size is supplied for -+ * ease of use by algorithms that might require it, since it is easier to know -+ * it ahead of time rather than calculating it. -+ * -+ * The Mali Page Mask macro masks off the lower bits of a physical address to -+ * give the start address of the page for that physical address. -+ * -+ * @note The Mali device driver code is designed for systems with 4KB page size. -+ * Changing these macros will not make the entire Mali device driver work with -+ * page sizes other than 4KB. -+ * -+ * @note The CPU Physical Page Size has been assumed to be the same as the Mali -+ * Physical Page Size. -+ * -+ * @{ -+ */ ++static _mali_osk_errcode_t mali_parse_config_l2_cache(void) ++{ ++ struct mali_l2_cache_core *l2_cache = NULL; + -+/** CPU Page Order, as log to base 2 of the Page size. @see _MALI_OSK_CPU_PAGE_SIZE */ -+#define _MALI_OSK_CPU_PAGE_ORDER ((u32)12) -+/** CPU Page Size, in bytes. */ -+#define _MALI_OSK_CPU_PAGE_SIZE (((u32)1) << (_MALI_OSK_CPU_PAGE_ORDER)) -+/** CPU Page Mask, which masks off the offset within a page */ -+#define _MALI_OSK_CPU_PAGE_MASK (~((((u32)1) << (_MALI_OSK_CPU_PAGE_ORDER)) - ((u32)1))) -+/** @} */ /* end of group _MALI_OSK_CPU_PAGE */ ++ if (mali_is_mali400()) { ++ _mali_osk_resource_t l2_resource; ++ if (_MALI_OSK_ERR_OK != _mali_osk_resource_find(MALI400_OFFSET_L2_CACHE0, &l2_resource)) { ++ MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache in config file\n")); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+/** @defgroup _MALI_OSK_MALI_PAGE Mali Physical Page size macros -+ * -+ * Mali Physical page size macros. The order of the page size is supplied for -+ * ease of use by algorithms that might require it, since it is easier to know -+ * it ahead of time rather than calculating it. -+ * -+ * The Mali Page Mask macro masks off the lower bits of a physical address to -+ * give the start address of the page for that physical address. -+ * -+ * @note The Mali device driver code is designed for systems with 4KB page size. -+ * Changing these macros will not make the entire Mali device driver work with -+ * page sizes other than 4KB. -+ * -+ * @note The Mali Physical Page Size has been assumed to be the same as the CPU -+ * Physical Page Size. -+ * -+ * @{ -+ */ ++ l2_cache = mali_create_l2_cache_core(&l2_resource, MALI_DOMAIN_INDEX_L20); ++ if (NULL == l2_cache) { ++ return _MALI_OSK_ERR_FAULT; ++ } ++ } else if (mali_is_mali450()) { ++ /* ++ * L2 for GP at 0x10000 ++ * L2 for PP0-3 at 0x01000 ++ * L2 for PP4-7 at 0x11000 (optional) ++ */ + -+/** Mali Page Order, as log to base 2 of the Page size. @see _MALI_OSK_MALI_PAGE_SIZE */ -+#define _MALI_OSK_MALI_PAGE_ORDER PAGE_SHIFT -+/** Mali Page Size, in bytes. */ -+#define _MALI_OSK_MALI_PAGE_SIZE PAGE_SIZE -+/** Mali Page Mask, which masks off the offset within a page */ -+#define _MALI_OSK_MALI_PAGE_MASK PAGE_MASK -+/** @} */ /* end of group _MALI_OSK_MALI_PAGE*/ ++ _mali_osk_resource_t l2_gp_resource; ++ _mali_osk_resource_t l2_pp_grp0_resource; ++ _mali_osk_resource_t l2_pp_grp1_resource; + -+/** @brief flags for mapping a user-accessible memory range -+ * -+ * Where a function with prefix '_mali_osk_mem_mapregion' accepts flags as one -+ * of the function parameters, it will use one of these. These allow per-page -+ * control over mappings. Compare with the mali_memory_allocation_flag type, -+ * which acts over an entire range -+ * -+ * These may be OR'd together with bitwise OR (|), but must be cast back into -+ * the type after OR'ing. -+ */ -+typedef enum { -+ _MALI_OSK_MEM_MAPREGION_FLAG_OS_ALLOCATED_PHYSADDR = 0x1, /**< Physical address is OS Allocated */ -+} _mali_osk_mem_mapregion_flags_t; -+/** @} */ /* end group _mali_osk_low_level_memory */ ++ /* Make cluster for GP's L2 */ ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE0, &l2_gp_resource)) { ++ MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for GP\n")); ++ l2_cache = mali_create_l2_cache_core(&l2_gp_resource, MALI_DOMAIN_INDEX_L20); ++ if (NULL == l2_cache) { ++ return _MALI_OSK_ERR_FAULT; ++ } ++ } else { ++ MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for GP in config file\n")); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+/** @defgroup _mali_osk_notification OSK Notification Queues -+ * @{ */ ++ /* Find corresponding l2 domain */ ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE1, &l2_pp_grp0_resource)) { ++ MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for PP group 0\n")); ++ l2_cache = mali_create_l2_cache_core(&l2_pp_grp0_resource, MALI_DOMAIN_INDEX_L21); ++ if (NULL == l2_cache) { ++ return _MALI_OSK_ERR_FAULT; ++ } ++ } else { ++ MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for PP group 0 in config file\n")); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+/** @brief Private type for notification queue objects */ -+typedef struct _mali_osk_notification_queue_t_struct _mali_osk_notification_queue_t; ++ /* Second PP core group is optional, don't fail if we don't find it */ ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI450_OFFSET_L2_CACHE2, &l2_pp_grp1_resource)) { ++ MALI_DEBUG_PRINT(3, ("Creating Mali-450 L2 cache core for PP group 1\n")); ++ l2_cache = mali_create_l2_cache_core(&l2_pp_grp1_resource, MALI_DOMAIN_INDEX_L22); ++ if (NULL == l2_cache) { ++ return _MALI_OSK_ERR_FAULT; ++ } ++ } ++ } else if (mali_is_mali470()) { ++ _mali_osk_resource_t l2c1_resource; + -+/** @brief Public notification data object type */ -+typedef struct _mali_osk_notification_t_struct { -+ u32 notification_type; /**< The notification type */ -+ u32 result_buffer_size; /**< Size of the result buffer to copy to user space */ -+ void *result_buffer; /**< Buffer containing any type specific data */ -+} _mali_osk_notification_t; ++ /* Make cluster for L2C1 */ ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI470_OFFSET_L2_CACHE1, &l2c1_resource)) { ++ MALI_DEBUG_PRINT(3, ("Creating Mali-470 L2 cache 1\n")); ++ l2_cache = mali_create_l2_cache_core(&l2c1_resource, MALI_DOMAIN_INDEX_L21); ++ if (NULL == l2_cache) { ++ return _MALI_OSK_ERR_FAULT; ++ } ++ } else { ++ MALI_DEBUG_PRINT(3, ("Did not find required Mali L2 cache for L2C1\n")); ++ return _MALI_OSK_ERR_FAULT; ++ } ++ } + -+/** @} */ /* end group _mali_osk_notification */ ++ return _MALI_OSK_ERR_OK; ++} + ++static struct mali_group *mali_create_group(struct mali_l2_cache_core *cache, ++ _mali_osk_resource_t *resource_mmu, ++ _mali_osk_resource_t *resource_gp, ++ _mali_osk_resource_t *resource_pp, ++ u32 domain_index) ++{ ++ struct mali_mmu_core *mmu; ++ struct mali_group *group; + -+/** @defgroup _mali_osk_timer OSK Timer Callbacks -+ * @{ */ ++ MALI_DEBUG_PRINT(3, ("Starting new group for MMU %s\n", resource_mmu->description)); + -+/** @brief Function to call when a timer expires -+ * -+ * When a timer expires, this function is called. Note that on many systems, -+ * a timer callback will be executed in IRQ context. Therefore, restrictions -+ * may apply on what can be done inside the timer callback. -+ * -+ * If a timer requires more work to be done than can be acheived in an IRQ -+ * context, then it may defer the work with a work-queue. For example, it may -+ * use \ref _mali_osk_wq_schedule_work() to make use of a bottom-half handler -+ * to carry out the remaining work. -+ * -+ * Stopping the timer with \ref _mali_osk_timer_del() blocks on compeletion of -+ * the callback. Therefore, the callback may not obtain any mutexes also held -+ * by any callers of _mali_osk_timer_del(). Otherwise, a deadlock may occur. -+ * -+ * @param arg Function-specific data */ -+typedef void (*_mali_osk_timer_callback_t)(void *arg); ++ /* Create the group object */ ++ group = mali_group_create(cache, NULL, NULL, domain_index); ++ if (NULL == group) { ++ MALI_PRINT_ERROR(("Failed to create group object for MMU %s\n", resource_mmu->description)); ++ return NULL; ++ } + -+/** @brief Private type for Timer Callback Objects */ -+typedef struct _mali_osk_timer_t_struct _mali_osk_timer_t; -+/** @} */ /* end group _mali_osk_timer */ ++ /* Create the MMU object inside group */ ++ mmu = mali_mmu_create(resource_mmu, group, MALI_FALSE); ++ if (NULL == mmu) { ++ MALI_PRINT_ERROR(("Failed to create MMU object\n")); ++ mali_group_delete(group); ++ return NULL; ++ } + ++ if (NULL != resource_gp) { ++ /* Create the GP core object inside this group */ ++ struct mali_gp_core *gp_core = mali_gp_create(resource_gp, group); ++ if (NULL == gp_core) { ++ /* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */ ++ MALI_PRINT_ERROR(("Failed to create GP object\n")); ++ mali_group_delete(group); ++ return NULL; ++ } ++ } + -+/** @addtogroup _mali_osk_list OSK Doubly-Linked Circular Lists -+ * @{ */ ++ if (NULL != resource_pp) { ++ struct mali_pp_core *pp_core; + -+/** @brief Public List objects. -+ * -+ * To use, add a _mali_osk_list_t member to the structure that may become part -+ * of a list. When traversing the _mali_osk_list_t objects, use the -+ * _MALI_OSK_CONTAINER_OF() macro to recover the structure from its -+ *_mali_osk_list_t member -+ * -+ * Each structure may have multiple _mali_osk_list_t members, so that the -+ * structure is part of multiple lists. When traversing lists, ensure that the -+ * correct _mali_osk_list_t member is used, because type-checking will be -+ * lost by the compiler. -+ */ -+typedef struct _mali_osk_list_s { -+ struct _mali_osk_list_s *next; -+ struct _mali_osk_list_s *prev; -+} _mali_osk_list_t; -+/** @} */ /* end group _mali_osk_list */ ++ /* Create the PP core object inside this group */ ++ pp_core = mali_pp_create(resource_pp, group, MALI_FALSE, mali_get_bcast_id(resource_pp)); ++ if (NULL == pp_core) { ++ /* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */ ++ MALI_PRINT_ERROR(("Failed to create PP object\n")); ++ mali_group_delete(group); ++ return NULL; ++ } ++ } + -+/** @addtogroup _mali_osk_miscellaneous -+ * @{ */ ++ return group; ++} + -+/** @brief resource description struct -+ * -+ * Platform independent representation of a Mali HW resource -+ */ -+typedef struct _mali_osk_resource { -+ const char *description; /**< short description of the resource */ -+ uintptr_t base; /**< Physical base address of the resource, as seen by Mali resources. */ -+ const char *irq_name; /**< Name of irq belong to this resource */ -+ u32 irq; /**< IRQ number delivered to the CPU, or -1 to tell the driver to probe for it (if possible) */ -+} _mali_osk_resource_t; -+/** @} */ /* end group _mali_osk_miscellaneous */ ++static _mali_osk_errcode_t mali_create_virtual_group(_mali_osk_resource_t *resource_mmu_pp_bcast, ++ _mali_osk_resource_t *resource_pp_bcast, ++ _mali_osk_resource_t *resource_dlbu, ++ _mali_osk_resource_t *resource_bcast) ++{ ++ struct mali_mmu_core *mmu_pp_bcast_core; ++ struct mali_pp_core *pp_bcast_core; ++ struct mali_dlbu_core *dlbu_core; ++ struct mali_bcast_unit *bcast_core; ++ struct mali_group *group; + -+/** @defgroup _mali_osk_wait_queue OSK Wait Queue functionality -+ * @{ */ -+/** @brief Private type for wait queue objects */ -+typedef struct _mali_osk_wait_queue_t_struct _mali_osk_wait_queue_t; -+/** @} */ /* end group _mali_osk_wait_queue */ ++ MALI_DEBUG_PRINT(2, ("Starting new virtual group for MMU PP broadcast core %s\n", resource_mmu_pp_bcast->description)); + -+/** @} */ /* end group osuapi */ ++ /* Create the DLBU core object */ ++ dlbu_core = mali_dlbu_create(resource_dlbu); ++ if (NULL == dlbu_core) { ++ MALI_PRINT_ERROR(("Failed to create DLBU object \n")); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+/** @} */ /* end group uddapi */ ++ /* Create the Broadcast unit core */ ++ bcast_core = mali_bcast_unit_create(resource_bcast); ++ if (NULL == bcast_core) { ++ MALI_PRINT_ERROR(("Failed to create Broadcast unit object!\n")); ++ mali_dlbu_delete(dlbu_core); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+/** @brief Mali print ctx type which uses seq_file -+ */ -+typedef struct seq_file _mali_osk_print_ctx; ++ /* Create the group object */ ++#if defined(DEBUG) ++ /* Get a physical PP group to temporarily add to broadcast unit. IRQ ++ * verification needs a physical group in the broadcast unit to test ++ * the broadcast unit interrupt line. */ ++ { ++ struct mali_group *phys_group = NULL; ++ int i; ++ for (i = 0; i < mali_group_get_glob_num_groups(); i++) { ++ phys_group = mali_group_get_glob_group(i); ++ if (NULL != mali_group_get_pp_core(phys_group)) break; ++ } ++ MALI_DEBUG_ASSERT(NULL != mali_group_get_pp_core(phys_group)); + -+#define _MALI_OSK_BITMAP_INVALIDATE_INDEX -1 ++ /* Add the group temporarily to the broadcast, and update the ++ * broadcast HW. Since the HW is not updated when removing the ++ * group the IRQ check will work when the virtual PP is created ++ * later. ++ * ++ * When the virtual group gets populated, the actually used ++ * groups will be added to the broadcast unit and the HW will ++ * be updated. ++ */ ++ mali_bcast_add_group(bcast_core, phys_group); ++ mali_bcast_reset(bcast_core); ++ mali_bcast_remove_group(bcast_core, phys_group); ++ } ++#endif /* DEBUG */ ++ group = mali_group_create(NULL, dlbu_core, bcast_core, MALI_DOMAIN_INDEX_DUMMY); ++ if (NULL == group) { ++ MALI_PRINT_ERROR(("Failed to create group object for MMU PP broadcast core %s\n", resource_mmu_pp_bcast->description)); ++ mali_bcast_unit_delete(bcast_core); ++ mali_dlbu_delete(dlbu_core); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+typedef struct _mali_osk_bitmap { -+ u32 reserve; -+ u32 last; -+ u32 max; -+ u32 avail; -+ _mali_osk_spinlock_t *lock; -+ unsigned long *table; -+} _mali_osk_bitmap_t; ++ /* Create the MMU object inside group */ ++ mmu_pp_bcast_core = mali_mmu_create(resource_mmu_pp_bcast, group, MALI_TRUE); ++ if (NULL == mmu_pp_bcast_core) { ++ MALI_PRINT_ERROR(("Failed to create MMU PP broadcast object\n")); ++ mali_group_delete(group); ++ return _MALI_OSK_ERR_FAULT; ++ } + ++ /* Create the PP core object inside this group */ ++ pp_bcast_core = mali_pp_create(resource_pp_bcast, group, MALI_TRUE, 0); ++ if (NULL == pp_bcast_core) { ++ /* No need to clean up now, as we will clean up everything linked in from the cluster when we fail this function */ ++ MALI_PRINT_ERROR(("Failed to create PP object\n")); ++ mali_group_delete(group); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+#ifdef __cplusplus ++ return _MALI_OSK_ERR_OK; +} -+#endif -+ -+#endif /* __MALI_OSK_TYPES_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pm.c b/drivers/gpu/arm/mali400/mali/common/mali_pm.c -new file mode 100644 -index 000000000..3989a33ae ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_pm.c -@@ -0,0 +1,1362 @@ -+/* -+ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+#include "mali_pm.h" -+#include "mali_kernel_common.h" -+#include "mali_osk.h" -+#include "mali_osk_mali.h" -+#include "mali_scheduler.h" -+#include "mali_group.h" -+#include "mali_pm_domain.h" -+#include "mali_pmu.h" -+ -+#include "mali_executor.h" -+#include "mali_control_timer.h" -+ -+#if defined(DEBUG) -+u32 num_pm_runtime_resume = 0; -+u32 num_pm_updates = 0; -+u32 num_pm_updates_up = 0; -+u32 num_pm_updates_down = 0; -+#endif -+ -+#define MALI_PM_DOMAIN_DUMMY_MASK (1 << MALI_DOMAIN_INDEX_DUMMY) -+ -+/* lock protecting power state (including pm_domains) */ -+static _mali_osk_spinlock_irq_t *pm_lock_state = NULL; + -+/* the wanted domain mask (protected by pm_lock_state) */ -+static u32 pd_mask_wanted = 0; ++static _mali_osk_errcode_t mali_parse_config_groups(void) ++{ ++ struct mali_group *group; ++ int cluster_id_gp = 0; ++ int cluster_id_pp_grp0 = 0; ++ int cluster_id_pp_grp1 = 0; ++ int i; + -+/* used to deferring the actual power changes */ -+static _mali_osk_wq_work_t *pm_work = NULL; ++ _mali_osk_resource_t resource_gp; ++ _mali_osk_resource_t resource_gp_mmu; ++ _mali_osk_resource_t resource_pp[8]; ++ _mali_osk_resource_t resource_pp_mmu[8]; ++ _mali_osk_resource_t resource_pp_mmu_bcast; ++ _mali_osk_resource_t resource_pp_bcast; ++ _mali_osk_resource_t resource_dlbu; ++ _mali_osk_resource_t resource_bcast; ++ _mali_osk_errcode_t resource_gp_found; ++ _mali_osk_errcode_t resource_gp_mmu_found; ++ _mali_osk_errcode_t resource_pp_found[8]; ++ _mali_osk_errcode_t resource_pp_mmu_found[8]; ++ _mali_osk_errcode_t resource_pp_mmu_bcast_found; ++ _mali_osk_errcode_t resource_pp_bcast_found; ++ _mali_osk_errcode_t resource_dlbu_found; ++ _mali_osk_errcode_t resource_bcast_found; + -+/* lock protecting power change execution */ -+static _mali_osk_mutex_t *pm_lock_exec = NULL; ++ if (!(mali_is_mali400() || mali_is_mali450() || mali_is_mali470())) { ++ /* No known HW core */ ++ return _MALI_OSK_ERR_FAULT; ++ } + -+/* PMU domains which are actually powered on (protected by pm_lock_exec) */ -+static u32 pmu_mask_current = 0; ++ if (MALI_MAX_JOB_RUNTIME_DEFAULT == mali_max_job_runtime) { ++ /* Group settings are not overridden by module parameters, so use device settings */ ++ _mali_osk_device_data data = { 0, }; + -+/* -+ * domains which marked as powered on (protected by pm_lock_exec) -+ * This can be different from pmu_mask_current right after GPU power on -+ * if the PMU domains default to powered up. -+ */ -+static u32 pd_mask_current = 0; ++ if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) { ++ /* Use device specific settings (if defined) */ ++ if (0 != data.max_job_runtime) { ++ mali_max_job_runtime = data.max_job_runtime; ++ } ++ } ++ } + -+static u16 domain_config[MALI_MAX_NUMBER_OF_DOMAINS] = { -+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -+ 1 << MALI_DOMAIN_INDEX_DUMMY -+}; ++ if (mali_is_mali450()) { ++ /* Mali-450 have separate L2s for GP, and PP core group(s) */ ++ cluster_id_pp_grp0 = 1; ++ cluster_id_pp_grp1 = 2; ++ } + -+/* The relative core power cost */ -+#define MALI_GP_COST 3 -+#define MALI_PP_COST 6 -+#define MALI_L2_COST 1 ++ resource_gp_found = _mali_osk_resource_find(MALI_OFFSET_GP, &resource_gp); ++ resource_gp_mmu_found = _mali_osk_resource_find(MALI_OFFSET_GP_MMU, &resource_gp_mmu); ++ resource_pp_found[0] = _mali_osk_resource_find(MALI_OFFSET_PP0, &(resource_pp[0])); ++ resource_pp_found[1] = _mali_osk_resource_find(MALI_OFFSET_PP1, &(resource_pp[1])); ++ resource_pp_found[2] = _mali_osk_resource_find(MALI_OFFSET_PP2, &(resource_pp[2])); ++ resource_pp_found[3] = _mali_osk_resource_find(MALI_OFFSET_PP3, &(resource_pp[3])); ++ resource_pp_found[4] = _mali_osk_resource_find(MALI_OFFSET_PP4, &(resource_pp[4])); ++ resource_pp_found[5] = _mali_osk_resource_find(MALI_OFFSET_PP5, &(resource_pp[5])); ++ resource_pp_found[6] = _mali_osk_resource_find(MALI_OFFSET_PP6, &(resource_pp[6])); ++ resource_pp_found[7] = _mali_osk_resource_find(MALI_OFFSET_PP7, &(resource_pp[7])); ++ resource_pp_mmu_found[0] = _mali_osk_resource_find(MALI_OFFSET_PP0_MMU, &(resource_pp_mmu[0])); ++ resource_pp_mmu_found[1] = _mali_osk_resource_find(MALI_OFFSET_PP1_MMU, &(resource_pp_mmu[1])); ++ resource_pp_mmu_found[2] = _mali_osk_resource_find(MALI_OFFSET_PP2_MMU, &(resource_pp_mmu[2])); ++ resource_pp_mmu_found[3] = _mali_osk_resource_find(MALI_OFFSET_PP3_MMU, &(resource_pp_mmu[3])); ++ resource_pp_mmu_found[4] = _mali_osk_resource_find(MALI_OFFSET_PP4_MMU, &(resource_pp_mmu[4])); ++ resource_pp_mmu_found[5] = _mali_osk_resource_find(MALI_OFFSET_PP5_MMU, &(resource_pp_mmu[5])); ++ resource_pp_mmu_found[6] = _mali_osk_resource_find(MALI_OFFSET_PP6_MMU, &(resource_pp_mmu[6])); ++ resource_pp_mmu_found[7] = _mali_osk_resource_find(MALI_OFFSET_PP7_MMU, &(resource_pp_mmu[7])); + -+/* -+ *We have MALI_MAX_NUMBER_OF_PP_PHYSICAL_CORES + 1 rows in this matrix -+ *because we mush store the mask of different pp cores: 0, 1, 2, 3, 4, 5, 6, 7, 8. -+ */ -+static int mali_pm_domain_power_cost_result[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS + 1][MALI_MAX_NUMBER_OF_DOMAINS]; -+/* -+ * Keep track of runtime PM state, so that we know -+ * how to resume during OS resume. -+ */ -+#ifdef CONFIG_PM_RUNTIME -+static mali_bool mali_pm_runtime_active = MALI_FALSE; -+#else -+/* when kernel don't enable PM_RUNTIME, set the flag always true, -+ * for GPU will not power off by runtime */ -+static mali_bool mali_pm_runtime_active = MALI_TRUE; -+#endif + -+static void mali_pm_state_lock(void); -+static void mali_pm_state_unlock(void); -+static _mali_osk_errcode_t mali_pm_create_pm_domains(void); -+static void mali_pm_set_pmu_domain_config(void); -+static u32 mali_pm_get_registered_cores_mask(void); -+static void mali_pm_update_sync_internal(void); -+static mali_bool mali_pm_common_suspend(void); -+static void mali_pm_update_work(void *data); -+#if defined(DEBUG) -+const char *mali_pm_mask_to_string(u32 mask); -+const char *mali_pm_group_stats_to_string(void); -+#endif ++ if (mali_is_mali450() || mali_is_mali470()) { ++ resource_bcast_found = _mali_osk_resource_find(MALI_OFFSET_BCAST, &resource_bcast); ++ resource_dlbu_found = _mali_osk_resource_find(MALI_OFFSET_DLBU, &resource_dlbu); ++ resource_pp_mmu_bcast_found = _mali_osk_resource_find(MALI_OFFSET_PP_BCAST_MMU, &resource_pp_mmu_bcast); ++ resource_pp_bcast_found = _mali_osk_resource_find(MALI_OFFSET_PP_BCAST, &resource_pp_bcast); + -+_mali_osk_errcode_t mali_pm_initialize(void) -+{ -+ _mali_osk_errcode_t err; -+ struct mali_pmu_core *pmu; ++ if (_MALI_OSK_ERR_OK != resource_bcast_found || ++ _MALI_OSK_ERR_OK != resource_dlbu_found || ++ _MALI_OSK_ERR_OK != resource_pp_mmu_bcast_found || ++ _MALI_OSK_ERR_OK != resource_pp_bcast_found) { ++ /* Missing mandatory core(s) for Mali-450 or Mali-470 */ ++ MALI_DEBUG_PRINT(2, ("Missing mandatory resources, Mali-450 needs DLBU, Broadcast unit, virtual PP core and virtual MMU\n")); ++ return _MALI_OSK_ERR_FAULT; ++ } ++ } + -+ pm_lock_state = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, -+ _MALI_OSK_LOCK_ORDER_PM_STATE); -+ if (NULL == pm_lock_state) { -+ mali_pm_terminate(); ++ if (_MALI_OSK_ERR_OK != resource_gp_found || ++ _MALI_OSK_ERR_OK != resource_gp_mmu_found || ++ _MALI_OSK_ERR_OK != resource_pp_found[0] || ++ _MALI_OSK_ERR_OK != resource_pp_mmu_found[0]) { ++ /* Missing mandatory core(s) */ ++ MALI_DEBUG_PRINT(2, ("Missing mandatory resource, need at least one GP and one PP, both with a separate MMU\n")); + return _MALI_OSK_ERR_FAULT; + } + -+ pm_lock_exec = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED, -+ _MALI_OSK_LOCK_ORDER_PM_STATE); -+ if (NULL == pm_lock_exec) { -+ mali_pm_terminate(); ++ MALI_DEBUG_ASSERT(1 <= mali_l2_cache_core_get_glob_num_l2_cores()); ++ group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_gp), &resource_gp_mmu, &resource_gp, NULL, MALI_DOMAIN_INDEX_GP); ++ if (NULL == group) { + return _MALI_OSK_ERR_FAULT; + } + -+ pm_work = _mali_osk_wq_create_work(mali_pm_update_work, NULL); -+ if (NULL == pm_work) { -+ mali_pm_terminate(); ++ /* Create group for first (and mandatory) PP core */ ++ MALI_DEBUG_ASSERT(mali_l2_cache_core_get_glob_num_l2_cores() >= (cluster_id_pp_grp0 + 1)); /* >= 1 on Mali-300 and Mali-400, >= 2 on Mali-450 */ ++ group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp0), &resource_pp_mmu[0], NULL, &resource_pp[0], MALI_DOMAIN_INDEX_PP0); ++ if (NULL == group) { + return _MALI_OSK_ERR_FAULT; + } + -+ pmu = mali_pmu_get_global_pmu_core(); -+ if (NULL != pmu) { -+ /* -+ * We have a Mali PMU, set the correct domain -+ * configuration (default or custom) -+ */ ++ mali_inited_pp_cores_group_1++; + -+ u32 registered_cores_mask; ++ /* Create groups for rest of the cores in the first PP core group */ ++ for (i = 1; i < 4; i++) { /* First half of the PP cores belong to first core group */ ++ if (mali_inited_pp_cores_group_1 < mali_max_pp_cores_group_1) { ++ if (_MALI_OSK_ERR_OK == resource_pp_found[i] && _MALI_OSK_ERR_OK == resource_pp_mmu_found[i]) { ++ group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp0), &resource_pp_mmu[i], NULL, &resource_pp[i], MALI_DOMAIN_INDEX_PP0 + i); ++ if (NULL == group) { ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ mali_pm_set_pmu_domain_config(); ++ mali_inited_pp_cores_group_1++; ++ } ++ } ++ } + -+ registered_cores_mask = mali_pm_get_registered_cores_mask(); -+ mali_pmu_set_registered_cores_mask(pmu, registered_cores_mask); ++ /* Create groups for cores in the second PP core group */ ++ for (i = 4; i < 8; i++) { /* Second half of the PP cores belong to second core group */ ++ if (mali_inited_pp_cores_group_2 < mali_max_pp_cores_group_2) { ++ if (_MALI_OSK_ERR_OK == resource_pp_found[i] && _MALI_OSK_ERR_OK == resource_pp_mmu_found[i]) { ++ MALI_DEBUG_ASSERT(mali_l2_cache_core_get_glob_num_l2_cores() >= 2); /* Only Mali-450 have a second core group */ ++ group = mali_create_group(mali_l2_cache_core_get_glob_l2_core(cluster_id_pp_grp1), &resource_pp_mmu[i], NULL, &resource_pp[i], MALI_DOMAIN_INDEX_PP0 + i); ++ if (NULL == group) { ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ MALI_DEBUG_ASSERT(0 == pd_mask_wanted); ++ mali_inited_pp_cores_group_2++; ++ } ++ } + } + -+ /* Create all power domains needed (at least one dummy domain) */ -+ err = mali_pm_create_pm_domains(); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_pm_terminate(); -+ return err; ++ if (mali_is_mali450() || mali_is_mali470()) { ++ _mali_osk_errcode_t err = mali_create_virtual_group(&resource_pp_mmu_bcast, &resource_pp_bcast, &resource_dlbu, &resource_bcast); ++ if (_MALI_OSK_ERR_OK != err) { ++ return err; ++ } + } + ++ mali_max_pp_cores_group_1 = mali_inited_pp_cores_group_1; ++ mali_max_pp_cores_group_2 = mali_inited_pp_cores_group_2; ++ MALI_DEBUG_PRINT(2, ("%d+%d PP cores initialized\n", mali_inited_pp_cores_group_1, mali_inited_pp_cores_group_2)); ++ + return _MALI_OSK_ERR_OK; +} + -+void mali_pm_terminate(void) ++static _mali_osk_errcode_t mali_check_shared_interrupts(void) +{ -+ if (NULL != pm_work) { -+ _mali_osk_wq_delete_work(pm_work); -+ pm_work = NULL; -+ } -+ -+ mali_pm_domain_terminate(); -+ -+ if (NULL != pm_lock_exec) { -+ _mali_osk_mutex_term(pm_lock_exec); -+ pm_lock_exec = NULL; ++#if !defined(CONFIG_MALI_SHARED_INTERRUPTS) ++ if (MALI_TRUE == _mali_osk_shared_interrupts()) { ++ MALI_PRINT_ERROR(("Shared interrupts detected, but driver support is not enabled\n")); ++ return _MALI_OSK_ERR_FAULT; + } ++#endif /* !defined(CONFIG_MALI_SHARED_INTERRUPTS) */ + -+ if (NULL != pm_lock_state) { -+ _mali_osk_spinlock_irq_term(pm_lock_state); -+ pm_lock_state = NULL; -+ } ++ /* It is OK to compile support for shared interrupts even if Mali is not using it. */ ++ return _MALI_OSK_ERR_OK; +} + -+struct mali_pm_domain *mali_pm_register_l2_cache(u32 domain_index, -+ struct mali_l2_cache_core *l2_cache) ++static _mali_osk_errcode_t mali_parse_config_pmu(void) +{ -+ struct mali_pm_domain *domain; -+ -+ domain = mali_pm_domain_get_from_mask(domain_config[domain_index]); -+ if (NULL == domain) { -+ MALI_DEBUG_ASSERT(0 == domain_config[domain_index]); -+ domain = mali_pm_domain_get_from_index( -+ MALI_DOMAIN_INDEX_DUMMY); -+ domain_config[domain_index] = MALI_PM_DOMAIN_DUMMY_MASK; -+ } else { -+ MALI_DEBUG_ASSERT(0 != domain_config[domain_index]); -+ } -+ -+ MALI_DEBUG_ASSERT(NULL != domain); -+ -+ mali_pm_domain_add_l2_cache(domain, l2_cache); ++ _mali_osk_resource_t resource_pmu; + -+ return domain; /* return the actual domain this was registered in */ -+} ++ MALI_DEBUG_ASSERT(0 != global_gpu_base_address); + -+struct mali_pm_domain *mali_pm_register_group(u32 domain_index, -+ struct mali_group *group) -+{ -+ struct mali_pm_domain *domain; ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_PMU, &resource_pmu)) { ++ struct mali_pmu_core *pmu; + -+ domain = mali_pm_domain_get_from_mask(domain_config[domain_index]); -+ if (NULL == domain) { -+ MALI_DEBUG_ASSERT(0 == domain_config[domain_index]); -+ domain = mali_pm_domain_get_from_index( -+ MALI_DOMAIN_INDEX_DUMMY); -+ domain_config[domain_index] = MALI_PM_DOMAIN_DUMMY_MASK; -+ } else { -+ MALI_DEBUG_ASSERT(0 != domain_config[domain_index]); ++ pmu = mali_pmu_create(&resource_pmu); ++ if (NULL == pmu) { ++ MALI_PRINT_ERROR(("Failed to create PMU\n")); ++ return _MALI_OSK_ERR_FAULT; ++ } + } + -+ MALI_DEBUG_ASSERT(NULL != domain); -+ -+ mali_pm_domain_add_group(domain, group); -+ -+ return domain; /* return the actual domain this was registered in */ ++ /* It's ok if the PMU doesn't exist */ ++ return _MALI_OSK_ERR_OK; +} + -+mali_bool mali_pm_get_domain_refs(struct mali_pm_domain **domains, -+ struct mali_group **groups, -+ u32 num_domains) ++static _mali_osk_errcode_t mali_parse_config_memory(void) +{ -+ mali_bool ret = MALI_TRUE; /* Assume all is powered on instantly */ -+ u32 i; ++ _mali_osk_device_data data = { 0, }; ++ _mali_osk_errcode_t ret; + -+ mali_pm_state_lock(); ++ /* The priority of setting the value of mali_shared_mem_size, ++ * mali_dedicated_mem_start and mali_dedicated_mem_size: ++ * 1. module parameter; ++ * 2. platform data; ++ * 3. default value; ++ **/ ++ if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) { ++ /* Memory settings are not overridden by module parameters, so use device settings */ ++ if (0 == mali_dedicated_mem_start && 0 == mali_dedicated_mem_size) { ++ /* Use device specific settings (if defined) */ ++ mali_dedicated_mem_start = data.dedicated_mem_start; ++ mali_dedicated_mem_size = data.dedicated_mem_size; ++ } + -+ for (i = 0; i < num_domains; i++) { -+ MALI_DEBUG_ASSERT_POINTER(domains[i]); -+ pd_mask_wanted |= mali_pm_domain_ref_get(domains[i]); -+ if (MALI_FALSE == mali_pm_domain_power_is_on(domains[i])) { -+ /* -+ * Tell caller that the corresponding group -+ * was not already powered on. -+ */ -+ ret = MALI_FALSE; -+ } else { -+ /* -+ * There is a time gap between we power on the domain and -+ * set the power state of the corresponding groups to be on. -+ */ -+ if (NULL != groups[i] && -+ MALI_FALSE == mali_group_power_is_on(groups[i])) { -+ ret = MALI_FALSE; -+ } ++ if (MALI_SHARED_MEMORY_DEFAULT_SIZE == mali_shared_mem_size && ++ 0 != data.shared_mem_size) { ++ mali_shared_mem_size = data.shared_mem_size; + } + } + -+ MALI_DEBUG_PRINT(3, ("PM: wanted domain mask = 0x%08X (get refs)\n", pd_mask_wanted)); -+ -+ mali_pm_state_unlock(); -+ -+ return ret; -+} ++ if (0 < mali_dedicated_mem_size && 0 != mali_dedicated_mem_start) { ++ MALI_DEBUG_PRINT(2, ("Mali memory settings (dedicated: 0x%08X@0x%08X)\n", ++ mali_dedicated_mem_size, mali_dedicated_mem_start)); + -+mali_bool mali_pm_put_domain_refs(struct mali_pm_domain **domains, -+ u32 num_domains) -+{ -+ u32 mask = 0; -+ mali_bool ret; -+ u32 i; ++ /* Dedicated memory */ ++ ret = mali_memory_core_resource_dedicated_memory(mali_dedicated_mem_start, mali_dedicated_mem_size); ++ if (_MALI_OSK_ERR_OK != ret) { ++ MALI_PRINT_ERROR(("Failed to register dedicated memory\n")); ++ mali_memory_terminate(); ++ return ret; ++ } ++ } + -+ mali_pm_state_lock(); ++ if (0 < mali_shared_mem_size) { ++ MALI_DEBUG_PRINT(2, ("Mali memory settings (shared: 0x%08X)\n", mali_shared_mem_size)); + -+ for (i = 0; i < num_domains; i++) { -+ MALI_DEBUG_ASSERT_POINTER(domains[i]); -+ mask |= mali_pm_domain_ref_put(domains[i]); ++ /* Shared OS memory */ ++ ret = mali_memory_core_resource_os_memory(mali_shared_mem_size); ++ if (_MALI_OSK_ERR_OK != ret) { ++ MALI_PRINT_ERROR(("Failed to register shared OS memory\n")); ++ mali_memory_terminate(); ++ return ret; ++ } + } + -+ if (0 == mask) { -+ /* return false, all domains should still stay on */ -+ ret = MALI_FALSE; -+ } else { -+ /* Assert that we are dealing with a change */ -+ MALI_DEBUG_ASSERT((pd_mask_wanted & mask) == mask); ++ if (0 == mali_fb_start && 0 == mali_fb_size) { ++ /* Frame buffer settings are not overridden by module parameters, so use device settings */ ++ _mali_osk_device_data data = { 0, }; + -+ /* Update our desired domain mask */ -+ pd_mask_wanted &= ~mask; ++ if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) { ++ /* Use device specific settings (if defined) */ ++ mali_fb_start = data.fb_start; ++ mali_fb_size = data.fb_size; ++ } + -+ /* return true; one or more domains can now be powered down */ -+ ret = MALI_TRUE; ++ MALI_DEBUG_PRINT(2, ("Using device defined frame buffer settings (0x%08X@0x%08X)\n", ++ mali_fb_size, mali_fb_start)); ++ } else { ++ MALI_DEBUG_PRINT(2, ("Using module defined frame buffer settings (0x%08X@0x%08X)\n", ++ mali_fb_size, mali_fb_start)); + } + -+ MALI_DEBUG_PRINT(3, ("PM: wanted domain mask = 0x%08X (put refs)\n", pd_mask_wanted)); -+ -+ mali_pm_state_unlock(); ++ if (0 != mali_fb_size) { ++ /* Register frame buffer */ ++ ret = mali_mem_validation_add_range(mali_fb_start, mali_fb_size); ++ if (_MALI_OSK_ERR_OK != ret) { ++ MALI_PRINT_ERROR(("Failed to register frame buffer memory region\n")); ++ mali_memory_terminate(); ++ return ret; ++ } ++ } + -+ return ret; ++ return _MALI_OSK_ERR_OK; +} + -+void mali_pm_init_begin(void) ++static void mali_detect_gpu_class(void) +{ -+ struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core(); -+ -+ _mali_osk_pm_dev_ref_get_sync(); ++ if (_mali_osk_identify_gpu_resource() == 0x450) ++ mali_gpu_class_is_mali450 = MALI_TRUE; + -+ /* Ensure all PMU domains are on */ -+ if (NULL != pmu) { -+ mali_pmu_power_up_all(pmu); -+ } ++ if (_mali_osk_identify_gpu_resource() == 0x470) ++ mali_gpu_class_is_mali470 = MALI_TRUE; +} + -+void mali_pm_init_end(void) ++static _mali_osk_errcode_t mali_init_hw_reset(void) +{ -+ struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core(); ++#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) ++ _mali_osk_resource_t resource_bcast; + -+ /* Ensure all PMU domains are off */ -+ if (NULL != pmu) { -+ mali_pmu_power_down_all(pmu); ++ /* Ensure broadcast unit is in a good state before we start creating ++ * groups and cores. ++ */ ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_BCAST, &resource_bcast)) { ++ struct mali_bcast_unit *bcast_core; ++ ++ bcast_core = mali_bcast_unit_create(&resource_bcast); ++ if (NULL == bcast_core) { ++ MALI_PRINT_ERROR(("Failed to create Broadcast unit object!\n")); ++ return _MALI_OSK_ERR_FAULT; ++ } ++ mali_bcast_unit_delete(bcast_core); + } ++#endif /* (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) */ + -+ _mali_osk_pm_dev_ref_put(); ++ return _MALI_OSK_ERR_OK; +} + -+void mali_pm_update_sync(void) ++_mali_osk_errcode_t mali_initialize_subsystems(void) +{ -+ mali_pm_exec_lock(); ++ _mali_osk_errcode_t err; + -+ if (MALI_TRUE == mali_pm_runtime_active) { -+ /* -+ * Only update if GPU is powered on. -+ * Deactivation of the last group will result in both a -+ * deferred runtime PM suspend operation and -+ * deferred execution of this function. -+ * mali_pm_runtime_active will be false if runtime PM -+ * executed first and thus the GPU is now fully powered off. -+ */ -+ mali_pm_update_sync_internal(); ++#ifdef CONFIG_MALI_DT ++ err = _mali_osk_resource_initialize(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_terminate_subsystems(); ++ return err; + } ++#endif + -+ mali_pm_exec_unlock(); -+} ++ mali_pp_job_initialize(); + -+void mali_pm_update_async(void) -+{ -+ _mali_osk_wq_schedule_work(pm_work); -+} ++ err = mali_timeline_initialize(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_terminate_subsystems(); ++ return err; ++ } ++ ++ err = mali_session_initialize(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_terminate_subsystems(); ++ return err; ++ } + -+void mali_pm_os_suspend(mali_bool os_suspend) -+{ -+ int ret; ++ /*Try to init gpu secure mode */ ++ _mali_osk_gpu_secure_mode_init(); + -+ MALI_DEBUG_PRINT(3, ("Mali PM: OS suspend\n")); ++#if defined(CONFIG_MALI400_PROFILING) ++ err = _mali_osk_profiling_init(mali_boot_profiling ? MALI_TRUE : MALI_FALSE); ++ if (_MALI_OSK_ERR_OK != err) { ++ /* No biggie if we weren't able to initialize the profiling */ ++ MALI_PRINT_ERROR(("Failed to initialize profiling, feature will be unavailable\n")); ++ } ++#endif + -+ /* Suspend execution of all jobs, and go to inactive state */ -+ mali_executor_suspend(); ++ err = mali_memory_initialize(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_terminate_subsystems(); ++ return err; ++ } + -+ if (os_suspend) { -+ mali_control_timer_suspend(MALI_TRUE); ++ err = mali_executor_initialize(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_terminate_subsystems(); ++ return err; + } + -+ mali_pm_exec_lock(); ++ err = mali_scheduler_initialize(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_terminate_subsystems(); ++ return err; ++ } + -+ ret = mali_pm_common_suspend(); ++ /* Configure memory early, needed by mali_mmu_initialize. */ ++ err = mali_parse_config_memory(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_terminate_subsystems(); ++ return err; ++ } + -+ MALI_DEBUG_ASSERT(MALI_TRUE == ret); -+ MALI_IGNORE(ret); ++ err = mali_set_global_gpu_base_address(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_terminate_subsystems(); ++ return err; ++ } + -+ mali_pm_exec_unlock(); -+} ++ /* Detect GPU class (uses L2 cache count) */ ++ mali_detect_gpu_class(); + -+void mali_pm_os_resume(void) -+{ -+ struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core(); ++ err = mali_check_shared_interrupts(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_terminate_subsystems(); ++ return err; ++ } + -+ MALI_DEBUG_PRINT(3, ("Mali PM: OS resume\n")); ++ /* Initialize the MALI PMU (will not touch HW!) */ ++ err = mali_parse_config_pmu(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_terminate_subsystems(); ++ return err; ++ } + -+ mali_pm_exec_lock(); ++ /* Initialize the power management module */ ++ err = mali_pm_initialize(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_terminate_subsystems(); ++ return err; ++ } + -+#if defined(DEBUG) -+ mali_pm_state_lock(); ++ /* Make sure the entire GPU stays on for the rest of this function */ ++ mali_pm_init_begin(); + -+ /* Assert that things are as we left them in os_suspend(). */ -+ MALI_DEBUG_ASSERT(0 == pd_mask_wanted); -+ MALI_DEBUG_ASSERT(0 == pd_mask_current); -+ MALI_DEBUG_ASSERT(0 == pmu_mask_current); ++ /* Ensure HW is in a good state before starting to access cores. */ ++ err = mali_init_hw_reset(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_terminate_subsystems(); ++ return err; ++ } + -+ MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused()); ++ /* Detect which Mali GPU we are dealing with */ ++ err = mali_parse_product_info(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_pm_init_end(); ++ mali_terminate_subsystems(); ++ return err; ++ } + -+ mali_pm_state_unlock(); -+#endif ++ /* The global_product_id is now populated with the correct Mali GPU */ + -+ if (MALI_TRUE == mali_pm_runtime_active) { -+ /* Runtime PM was active, so reset PMU */ -+ if (NULL != pmu) { -+ mali_pmu_reset(pmu); -+ pmu_mask_current = mali_pmu_get_mask(pmu); ++ /* Start configuring the actual Mali hardware. */ + -+ MALI_DEBUG_PRINT(3, ("Mali PM: OS resume 0x%x \n", pmu_mask_current)); ++ err = mali_mmu_initialize(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_pm_init_end(); ++ mali_terminate_subsystems(); ++ return err; ++ } ++ ++ if (mali_is_mali450() || mali_is_mali470()) { ++ err = mali_dlbu_initialize(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_pm_init_end(); ++ mali_terminate_subsystems(); ++ return err; + } ++ } + -+ mali_pm_update_sync_internal(); ++ err = mali_parse_config_l2_cache(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_pm_init_end(); ++ mali_terminate_subsystems(); ++ return err; + } + -+ mali_pm_exec_unlock(); ++ err = mali_parse_config_groups(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_pm_init_end(); ++ mali_terminate_subsystems(); ++ return err; ++ } + -+ /* Start executing jobs again */ -+ mali_executor_resume(); -+} ++ /* Move groups into executor */ ++ mali_executor_populate(); + -+mali_bool mali_pm_runtime_suspend(void) -+{ -+ mali_bool ret; ++ /* Need call after all group has assigned a domain */ ++ mali_pm_power_cost_setup(); + -+ MALI_DEBUG_PRINT(3, ("Mali PM: Runtime suspend\n")); ++ /* Initialize the GPU timer */ ++ err = mali_control_timer_init(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_pm_init_end(); ++ mali_terminate_subsystems(); ++ return err; ++ } + -+ mali_pm_exec_lock(); ++ /* Initialize the GPU utilization tracking */ ++ err = mali_utilization_init(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_pm_init_end(); ++ mali_terminate_subsystems(); ++ return err; ++ } + -+ /* -+ * Put SW state directly into "off" state, and do not bother to power -+ * down each power domain, because entire GPU will be powered off -+ * when we return. -+ * For runtime PM suspend, in contrast to OS suspend, there is a race -+ * between this function and the mali_pm_update_sync_internal(), which -+ * is fine... -+ */ -+ ret = mali_pm_common_suspend(); -+ if (MALI_TRUE == ret) { -+ mali_pm_runtime_active = MALI_FALSE; -+ } else { -+ /* -+ * Process the "power up" instead, -+ * which could have been "lost" -+ */ -+ mali_pm_update_sync_internal(); ++#if defined(CONFIG_MALI_DVFS) ++ err = mali_dvfs_policy_init(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_pm_init_end(); ++ mali_terminate_subsystems(); ++ return err; + } ++#endif + -+ mali_pm_exec_unlock(); ++ /* Allowing the system to be turned off */ ++ mali_pm_init_end(); + -+ return ret; ++ return _MALI_OSK_ERR_OK; /* all ok */ +} + -+void mali_pm_runtime_resume(void) ++void mali_terminate_subsystems(void) +{ + struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core(); + -+ mali_pm_exec_lock(); ++ MALI_DEBUG_PRINT(2, ("terminate_subsystems() called\n")); + -+ mali_pm_runtime_active = MALI_TRUE; ++ mali_utilization_term(); ++ mali_control_timer_term(); + -+#if defined(DEBUG) -+ ++num_pm_runtime_resume; ++ mali_executor_depopulate(); ++ mali_delete_groups(); /* Delete groups not added to executor */ ++ mali_executor_terminate(); + -+ mali_pm_state_lock(); ++ mali_scheduler_terminate(); ++ mali_pp_job_terminate(); ++ mali_delete_l2_cache_cores(); ++ mali_mmu_terminate(); + -+ /* -+ * Assert that things are as we left them in runtime_suspend(), -+ * except for pd_mask_wanted which normally will be the reason we -+ * got here (job queued => domains wanted) -+ */ -+ MALI_DEBUG_ASSERT(0 == pd_mask_current); -+ MALI_DEBUG_ASSERT(0 == pmu_mask_current); ++ if (mali_is_mali450() || mali_is_mali470()) { ++ mali_dlbu_terminate(); ++ } + -+ mali_pm_state_unlock(); -+#endif ++ mali_pm_terminate(); + + if (NULL != pmu) { -+ mali_pmu_reset(pmu); -+ pmu_mask_current = mali_pmu_get_mask(pmu); -+ MALI_DEBUG_PRINT(3, ("Mali PM: Runtime resume 0x%x \n", pmu_mask_current)); ++ mali_pmu_delete(pmu); + } + -+ /* -+ * Normally we are resumed because a job has just been queued. -+ * pd_mask_wanted should thus be != 0. -+ * It is however possible for others to take a Mali Runtime PM ref -+ * without having a job queued. -+ * We should however always call mali_pm_update_sync_internal(), -+ * because this will take care of any potential mismatch between -+ * pmu_mask_current and pd_mask_current. -+ */ -+ mali_pm_update_sync_internal(); -+ -+ mali_pm_exec_unlock(); -+} -+ -+#if MALI_STATE_TRACKING -+u32 mali_pm_dump_state_domain(struct mali_pm_domain *domain, -+ char *buf, u32 size) -+{ -+ int n = 0; -+ -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "\tPower domain: id %u\n", -+ mali_pm_domain_get_id(domain)); ++#if defined(CONFIG_MALI400_PROFILING) ++ _mali_osk_profiling_term(); ++#endif + -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "\t\tMask: 0x%04x\n", -+ mali_pm_domain_get_mask(domain)); ++ _mali_osk_gpu_secure_mode_deinit(); + -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "\t\tUse count: %u\n", -+ mali_pm_domain_get_use_count(domain)); ++ mali_memory_terminate(); + -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "\t\tCurrent power state: %s\n", -+ (mali_pm_domain_get_mask(domain) & pd_mask_current) ? -+ "On" : "Off"); ++ mali_session_terminate(); + -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "\t\tWanted power state: %s\n", -+ (mali_pm_domain_get_mask(domain) & pd_mask_wanted) ? -+ "On" : "Off"); ++ mali_timeline_terminate(); + -+ return n; ++ global_gpu_base_address = 0; +} -+#endif + -+static void mali_pm_state_lock(void) ++_mali_product_id_t mali_kernel_core_get_product_id(void) +{ -+ _mali_osk_spinlock_irq_lock(pm_lock_state); ++ return global_product_id; +} + -+static void mali_pm_state_unlock(void) ++u32 mali_kernel_core_get_gpu_major_version(void) +{ -+ _mali_osk_spinlock_irq_unlock(pm_lock_state); ++ return global_gpu_major_version; +} + -+void mali_pm_exec_lock(void) ++u32 mali_kernel_core_get_gpu_minor_version(void) +{ -+ _mali_osk_mutex_wait(pm_lock_exec); ++ return global_gpu_minor_version; +} + -+void mali_pm_exec_unlock(void) ++_mali_osk_errcode_t _mali_ukk_get_api_version(_mali_uk_get_api_version_s *args) +{ -+ _mali_osk_mutex_signal(pm_lock_exec); -+} ++ MALI_DEBUG_ASSERT_POINTER(args); ++ MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); + -+static void mali_pm_domain_power_up(u32 power_up_mask, -+ struct mali_group *groups_up[MALI_MAX_NUMBER_OF_GROUPS], -+ u32 *num_groups_up, -+ struct mali_l2_cache_core *l2_up[MALI_MAX_NUMBER_OF_L2_CACHE_CORES], -+ u32 *num_l2_up) -+{ -+ u32 domain_bit; -+ u32 notify_mask = power_up_mask; ++ /* check compatability */ ++ if (args->version == _MALI_UK_API_VERSION) { ++ args->compatible = 1; ++ } else { ++ args->compatible = 0; ++ } + -+ MALI_DEBUG_ASSERT(0 != power_up_mask); -+ MALI_DEBUG_ASSERT_POINTER(groups_up); -+ MALI_DEBUG_ASSERT_POINTER(num_groups_up); -+ MALI_DEBUG_ASSERT(0 == *num_groups_up); -+ MALI_DEBUG_ASSERT_POINTER(l2_up); -+ MALI_DEBUG_ASSERT_POINTER(num_l2_up); -+ MALI_DEBUG_ASSERT(0 == *num_l2_up); ++ args->version = _MALI_UK_API_VERSION; /* report our version */ + -+ MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec); -+ MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_state); ++ /* success regardless of being compatible or not */ ++ MALI_SUCCESS; ++} + -+ MALI_DEBUG_PRINT(5, -+ ("PM update: Powering up domains: . [%s]\n", -+ mali_pm_mask_to_string(power_up_mask))); ++_mali_osk_errcode_t _mali_ukk_get_api_version_v2(_mali_uk_get_api_version_v2_s *args) ++{ ++ MALI_DEBUG_ASSERT_POINTER(args); ++ MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); + -+ pd_mask_current |= power_up_mask; ++ /* check compatability */ ++ if (args->version == _MALI_UK_API_VERSION) { ++ args->compatible = 1; ++ } else { ++ args->compatible = 0; ++ } + -+ domain_bit = _mali_osk_fls(notify_mask); -+ while (0 != domain_bit) { -+ u32 domain_id = domain_bit - 1; -+ struct mali_pm_domain *domain = -+ mali_pm_domain_get_from_index( -+ domain_id); -+ struct mali_l2_cache_core *l2_cache; -+ struct mali_l2_cache_core *l2_cache_tmp; -+ struct mali_group *group; -+ struct mali_group *group_tmp; ++ args->version = _MALI_UK_API_VERSION; /* report our version */ + -+ /* Mark domain as powered up */ -+ mali_pm_domain_set_power_on(domain, MALI_TRUE); ++ /* success regardless of being compatible or not */ ++ return _MALI_OSK_ERR_OK; ++} + -+ /* -+ * Make a note of the L2 and/or group(s) to notify -+ * (need to release the PM state lock before doing so) -+ */ ++_mali_osk_errcode_t _mali_ukk_wait_for_notification(_mali_uk_wait_for_notification_s *args) ++{ ++ _mali_osk_errcode_t err; ++ _mali_osk_notification_t *notification; ++ _mali_osk_notification_queue_t *queue; ++ struct mali_session_data *session; + -+ _MALI_OSK_LIST_FOREACHENTRY(l2_cache, -+ l2_cache_tmp, -+ mali_pm_domain_get_l2_cache_list( -+ domain), -+ struct mali_l2_cache_core, -+ pm_domain_list) { -+ MALI_DEBUG_ASSERT(*num_l2_up < -+ MALI_MAX_NUMBER_OF_L2_CACHE_CORES); -+ l2_up[*num_l2_up] = l2_cache; -+ (*num_l2_up)++; -+ } ++ /* check input */ ++ MALI_DEBUG_ASSERT_POINTER(args); ++ MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); + -+ _MALI_OSK_LIST_FOREACHENTRY(group, -+ group_tmp, -+ mali_pm_domain_get_group_list(domain), -+ struct mali_group, -+ pm_domain_list) { -+ MALI_DEBUG_ASSERT(*num_groups_up < -+ MALI_MAX_NUMBER_OF_GROUPS); -+ groups_up[*num_groups_up] = group; ++ session = (struct mali_session_data *)(uintptr_t)args->ctx; ++ queue = session->ioctl_queue; + -+ (*num_groups_up)++; -+ } ++ /* if the queue does not exist we're currently shutting down */ ++ if (NULL == queue) { ++ MALI_DEBUG_PRINT(1, ("No notification queue registered with the session. Asking userspace to stop querying\n")); ++ args->type = _MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS; ++ return _MALI_OSK_ERR_OK; ++ } + -+ /* Remove current bit and find next */ -+ notify_mask &= ~(1 << (domain_id)); -+ domain_bit = _mali_osk_fls(notify_mask); ++ /* receive a notification, might sleep */ ++ err = _mali_osk_notification_queue_receive(queue, ¬ification); ++ if (_MALI_OSK_ERR_OK != err) { ++ MALI_ERROR(err); /* errcode returned, pass on to caller */ + } -+} -+static void mali_pm_domain_power_down(u32 power_down_mask, -+ struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS], -+ u32 *num_groups_down, -+ struct mali_l2_cache_core *l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES], -+ u32 *num_l2_down) -+{ -+ u32 domain_bit; -+ u32 notify_mask = power_down_mask; + -+ MALI_DEBUG_ASSERT(0 != power_down_mask); -+ MALI_DEBUG_ASSERT_POINTER(groups_down); -+ MALI_DEBUG_ASSERT_POINTER(num_groups_down); -+ MALI_DEBUG_ASSERT(0 == *num_groups_down); -+ MALI_DEBUG_ASSERT_POINTER(l2_down); -+ MALI_DEBUG_ASSERT_POINTER(num_l2_down); -+ MALI_DEBUG_ASSERT(0 == *num_l2_down); ++ /* copy the buffer to the user */ ++ args->type = (_mali_uk_notification_type)notification->notification_type; ++ _mali_osk_memcpy(&args->data, notification->result_buffer, notification->result_buffer_size); + -+ MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec); -+ MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_state); ++ /* finished with the notification */ ++ _mali_osk_notification_delete(notification); + -+ MALI_DEBUG_PRINT(5, -+ ("PM update: Powering down domains: [%s]\n", -+ mali_pm_mask_to_string(power_down_mask))); ++ return _MALI_OSK_ERR_OK; /* all ok */ ++} + -+ pd_mask_current &= ~power_down_mask; ++_mali_osk_errcode_t _mali_ukk_post_notification(_mali_uk_post_notification_s *args) ++{ ++ _mali_osk_notification_t *notification; ++ _mali_osk_notification_queue_t *queue; ++ struct mali_session_data *session; + -+ domain_bit = _mali_osk_fls(notify_mask); -+ while (0 != domain_bit) { -+ u32 domain_id = domain_bit - 1; -+ struct mali_pm_domain *domain = -+ mali_pm_domain_get_from_index(domain_id); -+ struct mali_l2_cache_core *l2_cache; -+ struct mali_l2_cache_core *l2_cache_tmp; -+ struct mali_group *group; -+ struct mali_group *group_tmp; ++ /* check input */ ++ MALI_DEBUG_ASSERT_POINTER(args); ++ MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); + -+ /* Mark domain as powered down */ -+ mali_pm_domain_set_power_on(domain, MALI_FALSE); ++ session = (struct mali_session_data *)(uintptr_t)args->ctx; ++ queue = session->ioctl_queue; + -+ /* -+ * Make a note of the L2s and/or groups to notify -+ * (need to release the PM state lock before doing so) -+ */ ++ /* if the queue does not exist we're currently shutting down */ ++ if (NULL == queue) { ++ MALI_DEBUG_PRINT(1, ("No notification queue registered with the session. Asking userspace to stop querying\n")); ++ return _MALI_OSK_ERR_OK; ++ } + -+ _MALI_OSK_LIST_FOREACHENTRY(l2_cache, -+ l2_cache_tmp, -+ mali_pm_domain_get_l2_cache_list(domain), -+ struct mali_l2_cache_core, -+ pm_domain_list) { -+ MALI_DEBUG_ASSERT(*num_l2_down < -+ MALI_MAX_NUMBER_OF_L2_CACHE_CORES); -+ l2_down[*num_l2_down] = l2_cache; -+ (*num_l2_down)++; -+ } ++ notification = _mali_osk_notification_create(args->type, 0); ++ if (NULL == notification) { ++ MALI_PRINT_ERROR(("Failed to create notification object\n")); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+ _MALI_OSK_LIST_FOREACHENTRY(group, -+ group_tmp, -+ mali_pm_domain_get_group_list(domain), -+ struct mali_group, -+ pm_domain_list) { -+ MALI_DEBUG_ASSERT(*num_groups_down < -+ MALI_MAX_NUMBER_OF_GROUPS); -+ groups_down[*num_groups_down] = group; -+ (*num_groups_down)++; -+ } ++ _mali_osk_notification_queue_send(queue, notification); + -+ /* Remove current bit and find next */ -+ notify_mask &= ~(1 << (domain_id)); -+ domain_bit = _mali_osk_fls(notify_mask); -+ } ++ return _MALI_OSK_ERR_OK; /* all ok */ +} + -+/* -+ * Execute pending power domain changes -+ * pm_lock_exec lock must be taken by caller. -+ */ -+static void mali_pm_update_sync_internal(void) ++_mali_osk_errcode_t _mali_ukk_pending_submit(_mali_uk_pending_submit_s *args) +{ -+ /* -+ * This should only be called in non-atomic context -+ * (normally as deferred work) -+ * -+ * Look at the pending power domain changes, and execute these. -+ * Make sure group and schedulers are notified about changes. -+ */ -+ -+ struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core(); ++ wait_queue_head_t *queue; + -+ u32 power_down_mask; -+ u32 power_up_mask; ++ /* check input */ ++ MALI_DEBUG_ASSERT_POINTER(args); ++ MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); + -+ MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec); ++ queue = mali_session_get_wait_queue(); + -+#if defined(DEBUG) -+ ++num_pm_updates; -+#endif ++ /* check pending big job number, might sleep if larger than MAX allowed number */ ++ if (wait_event_interruptible(*queue, MALI_MAX_PENDING_BIG_JOB > mali_scheduler_job_gp_big_job_count())) { ++ return _MALI_OSK_ERR_RESTARTSYSCALL; ++ } + -+ /* Hold PM state lock while we look at (and obey) the wanted state */ -+ mali_pm_state_lock(); ++ return _MALI_OSK_ERR_OK; /* all ok */ ++} + -+ MALI_DEBUG_PRINT(5, ("PM update pre: Wanted domain mask: .. [%s]\n", -+ mali_pm_mask_to_string(pd_mask_wanted))); -+ MALI_DEBUG_PRINT(5, ("PM update pre: Current domain mask: . [%s]\n", -+ mali_pm_mask_to_string(pd_mask_current))); -+ MALI_DEBUG_PRINT(5, ("PM update pre: Current PMU mask: .... [%s]\n", -+ mali_pm_mask_to_string(pmu_mask_current))); -+ MALI_DEBUG_PRINT(5, ("PM update pre: Group power stats: ... <%s>\n", -+ mali_pm_group_stats_to_string())); + -+ /* Figure out which cores we need to power on */ -+ power_up_mask = pd_mask_wanted & -+ (pd_mask_wanted ^ pd_mask_current); ++_mali_osk_errcode_t _mali_ukk_request_high_priority(_mali_uk_request_high_priority_s *args) ++{ ++ struct mali_session_data *session; + -+ if (0 != power_up_mask) { -+ u32 power_up_mask_pmu; -+ struct mali_group *groups_up[MALI_MAX_NUMBER_OF_GROUPS]; -+ u32 num_groups_up = 0; -+ struct mali_l2_cache_core * -+ l2_up[MALI_MAX_NUMBER_OF_L2_CACHE_CORES]; -+ u32 num_l2_up = 0; -+ u32 i; ++ MALI_DEBUG_ASSERT_POINTER(args); ++ MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); + -+#if defined(DEBUG) -+ ++num_pm_updates_up; -+#endif ++ session = (struct mali_session_data *)(uintptr_t)args->ctx; + -+ /* -+ * Make sure dummy/global domain is always included when -+ * powering up, since this is controlled by runtime PM, -+ * and device power is on at this stage. -+ */ -+ power_up_mask |= MALI_PM_DOMAIN_DUMMY_MASK; ++ if (!session->use_high_priority_job_queue) { ++ session->use_high_priority_job_queue = MALI_TRUE; ++ MALI_DEBUG_PRINT(2, ("Session 0x%08X with pid %d was granted higher priority.\n", session, _mali_osk_get_pid())); ++ } + -+ /* Power up only real PMU domains */ -+ power_up_mask_pmu = power_up_mask & ~MALI_PM_DOMAIN_DUMMY_MASK; ++ return _MALI_OSK_ERR_OK; ++} + -+ /* But not those that happen to be powered on already */ -+ power_up_mask_pmu &= (power_up_mask ^ pmu_mask_current) & -+ power_up_mask; ++_mali_osk_errcode_t _mali_ukk_open(void **context) ++{ ++ u32 i; ++ struct mali_session_data *session; + -+ if (0 != power_up_mask_pmu) { -+ MALI_DEBUG_ASSERT(NULL != pmu); -+ pmu_mask_current |= power_up_mask_pmu; -+ mali_pmu_power_up(pmu, power_up_mask_pmu); -+ } ++ /* allocated struct to track this session */ ++ session = (struct mali_session_data *)_mali_osk_calloc(1, sizeof(struct mali_session_data)); ++ MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_NOMEM); + -+ /* -+ * Put the domains themselves in power up state. -+ * We get the groups and L2s to notify in return. -+ */ -+ mali_pm_domain_power_up(power_up_mask, -+ groups_up, &num_groups_up, -+ l2_up, &num_l2_up); ++ MALI_DEBUG_PRINT(3, ("Session starting\n")); + -+ /* Need to unlock PM state lock before notifying L2 + groups */ -+ mali_pm_state_unlock(); ++ /* create a response queue for this session */ ++ session->ioctl_queue = _mali_osk_notification_queue_init(); ++ if (NULL == session->ioctl_queue) { ++ goto err; ++ } + -+ /* Notify each L2 cache that we have be powered up */ -+ for (i = 0; i < num_l2_up; i++) { -+ mali_l2_cache_power_up(l2_up[i]); -+ } ++ /*create a wait queue for this session */ ++ session->wait_queue = _mali_osk_wait_queue_init(); ++ if (NULL == session->wait_queue) { ++ goto err_wait_queue; ++ } + -+ /* -+ * Tell execution module about all the groups we have -+ * powered up. Groups will be notified as a result of this. -+ */ -+ mali_executor_group_power_up(groups_up, num_groups_up); ++ session->page_directory = mali_mmu_pagedir_alloc(); ++ if (NULL == session->page_directory) { ++ goto err_mmu; ++ } + -+ /* Lock state again before checking for power down */ -+ mali_pm_state_lock(); ++ if (_MALI_OSK_ERR_OK != mali_mmu_pagedir_map(session->page_directory, MALI_DLBU_VIRT_ADDR, _MALI_OSK_MALI_PAGE_SIZE)) { ++ MALI_PRINT_ERROR(("Failed to map DLBU page into session\n")); ++ goto err_mmu; + } + -+ /* Figure out which cores we need to power off */ -+ power_down_mask = pd_mask_current & -+ (pd_mask_wanted ^ pd_mask_current); ++ if (0 != mali_dlbu_phys_addr) { ++ mali_mmu_pagedir_update(session->page_directory, MALI_DLBU_VIRT_ADDR, mali_dlbu_phys_addr, ++ _MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT); ++ } + -+ /* -+ * Never power down the dummy/global domain here. This is to be done -+ * from a suspend request (since this domain is only physicall powered -+ * down at that point) -+ */ -+ power_down_mask &= ~MALI_PM_DOMAIN_DUMMY_MASK; ++ if (_MALI_OSK_ERR_OK != mali_memory_session_begin(session)) { ++ goto err_session; ++ } + -+ if (0 != power_down_mask) { -+ u32 power_down_mask_pmu; -+ struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS]; -+ u32 num_groups_down = 0; -+ struct mali_l2_cache_core * -+ l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES]; -+ u32 num_l2_down = 0; -+ u32 i; ++ /* Create soft system. */ ++ session->soft_job_system = mali_soft_job_system_create(session); ++ if (NULL == session->soft_job_system) { ++ goto err_soft; ++ } + -+#if defined(DEBUG) -+ ++num_pm_updates_down; ++ /* Initialize the dma fence context.*/ ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ session->fence_context = dma_fence_context_alloc(1); ++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) ++ session->fence_context = fence_context_alloc(1); ++ _mali_osk_atomic_init(&session->fence_seqno, 0); ++#else ++ MALI_PRINT_ERROR(("The kernel version not support dma fence!\n")); ++ goto err_time_line; ++#endif +#endif + -+ /* -+ * Put the domains themselves in power down state. -+ * We get the groups and L2s to notify in return. -+ */ -+ mali_pm_domain_power_down(power_down_mask, -+ groups_down, &num_groups_down, -+ l2_down, &num_l2_down); ++ /* Create timeline system. */ ++ session->timeline_system = mali_timeline_system_create(session); ++ if (NULL == session->timeline_system) { ++ goto err_time_line; ++ } + -+ /* Need to unlock PM state lock before notifying L2 + groups */ -+ mali_pm_state_unlock(); ++#if defined(CONFIG_MALI_DVFS) ++ _mali_osk_atomic_init(&session->number_of_window_jobs, 0); ++#endif + -+ /* -+ * Tell execution module about all the groups we will be -+ * powering down. Groups will be notified as a result of this. -+ */ -+ if (0 < num_groups_down) { -+ mali_executor_group_power_down(groups_down, num_groups_down); -+ } ++ _mali_osk_atomic_init(&session->number_of_pp_jobs, 0); + -+ /* Notify each L2 cache that we will be powering down */ -+ for (i = 0; i < num_l2_down; i++) { -+ mali_l2_cache_power_down(l2_down[i]); -+ } ++ session->use_high_priority_job_queue = MALI_FALSE; + -+ /* -+ * Power down only PMU domains which should not stay on -+ * Some domains might for instance currently be incorrectly -+ * powered up if default domain power state is all on. -+ */ -+ power_down_mask_pmu = pmu_mask_current & (~pd_mask_current); ++ /* Initialize list of PP jobs on this session. */ ++ _MALI_OSK_INIT_LIST_HEAD(&session->pp_job_list); + -+ if (0 != power_down_mask_pmu) { -+ MALI_DEBUG_ASSERT(NULL != pmu); -+ pmu_mask_current &= ~power_down_mask_pmu; -+ mali_pmu_power_down(pmu, power_down_mask_pmu); ++ /* Initialize the pp_job_fb_lookup_list array used to quickly lookup jobs from a given frame builder */ ++ for (i = 0; i < MALI_PP_JOB_FB_LOOKUP_LIST_SIZE; ++i) { ++ _MALI_OSK_INIT_LIST_HEAD(&session->pp_job_fb_lookup_list[i]); ++ } + -+ } -+ } else { -+ /* -+ * Power down only PMU domains which should not stay on -+ * Some domains might for instance currently be incorrectly -+ * powered up if default domain power state is all on. -+ */ -+ u32 power_down_mask_pmu; ++ session->pid = _mali_osk_get_pid(); ++ session->comm = _mali_osk_get_comm(); ++ session->max_mali_mem_allocated_size = 0; ++ for (i = 0; i < MALI_MEM_TYPE_MAX; i ++) { ++ atomic_set(&session->mali_mem_array[i], 0); ++ } ++ atomic_set(&session->mali_mem_allocated_pages, 0); ++ *context = (void *)session; + -+ /* No need for state lock since we'll only update PMU */ -+ mali_pm_state_unlock(); ++ /* Add session to the list of all sessions. */ ++ mali_session_add(session); + -+ power_down_mask_pmu = pmu_mask_current & (~pd_mask_current); ++ MALI_DEBUG_PRINT(3, ("Session started\n")); ++ return _MALI_OSK_ERR_OK; + -+ if (0 != power_down_mask_pmu) { -+ MALI_DEBUG_ASSERT(NULL != pmu); -+ pmu_mask_current &= ~power_down_mask_pmu; -+ mali_pmu_power_down(pmu, power_down_mask_pmu); -+ } -+ } ++err_time_line: ++ mali_soft_job_system_destroy(session->soft_job_system); ++err_soft: ++ mali_memory_session_end(session); ++err_session: ++ mali_mmu_pagedir_free(session->page_directory); ++err_mmu: ++ _mali_osk_wait_queue_term(session->wait_queue); ++err_wait_queue: ++ _mali_osk_notification_queue_term(session->ioctl_queue); ++err: ++ _mali_osk_free(session); ++ MALI_ERROR(_MALI_OSK_ERR_NOMEM); + -+ MALI_DEBUG_PRINT(5, ("PM update post: Current domain mask: . [%s]\n", -+ mali_pm_mask_to_string(pd_mask_current))); -+ MALI_DEBUG_PRINT(5, ("PM update post: Current PMU mask: .... [%s]\n", -+ mali_pm_mask_to_string(pmu_mask_current))); -+ MALI_DEBUG_PRINT(5, ("PM update post: Group power stats: ... <%s>\n", -+ mali_pm_group_stats_to_string())); +} + -+static mali_bool mali_pm_common_suspend(void) ++#if defined(DEBUG) ++/* parameter used for debug */ ++extern u32 num_pm_runtime_resume; ++extern u32 num_pm_updates; ++extern u32 num_pm_updates_up; ++extern u32 num_pm_updates_down; ++#endif ++ ++_mali_osk_errcode_t _mali_ukk_close(void **context) +{ -+ mali_pm_state_lock(); ++ struct mali_session_data *session; ++ MALI_CHECK_NON_NULL(context, _MALI_OSK_ERR_INVALID_ARGS); ++ session = (struct mali_session_data *)*context; + -+ if (0 != pd_mask_wanted) { -+ MALI_DEBUG_PRINT(5, ("PM: Aborting suspend operation\n\n\n")); -+ mali_pm_state_unlock(); -+ return MALI_FALSE; -+ } ++ MALI_DEBUG_PRINT(3, ("Session ending\n")); + -+ MALI_DEBUG_PRINT(5, ("PM suspend pre: Wanted domain mask: .. [%s]\n", -+ mali_pm_mask_to_string(pd_mask_wanted))); -+ MALI_DEBUG_PRINT(5, ("PM suspend pre: Current domain mask: . [%s]\n", -+ mali_pm_mask_to_string(pd_mask_current))); -+ MALI_DEBUG_PRINT(5, ("PM suspend pre: Current PMU mask: .... [%s]\n", -+ mali_pm_mask_to_string(pmu_mask_current))); -+ MALI_DEBUG_PRINT(5, ("PM suspend pre: Group power stats: ... <%s>\n", -+ mali_pm_group_stats_to_string())); ++ MALI_DEBUG_ASSERT_POINTER(session->soft_job_system); ++ MALI_DEBUG_ASSERT_POINTER(session->timeline_system); + -+ if (0 != pd_mask_current) { -+ /* -+ * We have still some domains powered on. -+ * It is for instance very normal that at least the -+ * dummy/global domain is marked as powered on at this point. -+ * (because it is physically powered on until this function -+ * returns) -+ */ ++ /* Remove session from list of all sessions. */ ++ mali_session_remove(session); + -+ struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS]; -+ u32 num_groups_down = 0; -+ struct mali_l2_cache_core * -+ l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES]; -+ u32 num_l2_down = 0; -+ u32 i; ++ /* This flag is used to prevent queueing of jobs due to activation. */ ++ session->is_aborting = MALI_TRUE; + -+ /* -+ * Put the domains themselves in power down state. -+ * We get the groups and L2s to notify in return. -+ */ -+ mali_pm_domain_power_down(pd_mask_current, -+ groups_down, -+ &num_groups_down, -+ l2_down, -+ &num_l2_down); ++ /* Stop the soft job timer. */ ++ mali_timeline_system_stop_timer(session->timeline_system); + -+ MALI_DEBUG_ASSERT(0 == pd_mask_current); -+ MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused()); ++ /* Abort queued jobs */ ++ mali_scheduler_abort_session(session); + -+ /* Need to unlock PM state lock before notifying L2 + groups */ -+ mali_pm_state_unlock(); ++ /* Abort executing jobs */ ++ mali_executor_abort_session(session); + -+ /* -+ * Tell execution module about all the groups we will be -+ * powering down. Groups will be notified as a result of this. -+ */ -+ if (0 < num_groups_down) { -+ mali_executor_group_power_down(groups_down, num_groups_down); -+ } ++ /* Abort the soft job system. */ ++ mali_soft_job_system_abort(session->soft_job_system); + -+ /* Notify each L2 cache that we will be powering down */ -+ for (i = 0; i < num_l2_down; i++) { -+ mali_l2_cache_power_down(l2_down[i]); -+ } ++ /* Force execution of all pending bottom half processing for GP and PP. */ ++ _mali_osk_wq_flush(); + -+ pmu_mask_current = 0; -+ } else { -+ MALI_DEBUG_ASSERT(0 == pmu_mask_current); ++ /* The session PP list should now be empty. */ ++ MALI_DEBUG_ASSERT(_mali_osk_list_empty(&session->pp_job_list)); + -+ MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused()); ++ /* At this point the GP and PP scheduler no longer has any jobs queued or running from this ++ * session, and all soft jobs in the soft job system has been destroyed. */ + -+ mali_pm_state_unlock(); -+ } ++ /* Any trackers left in the timeline system are directly or indirectly waiting on external ++ * sync fences. Cancel all sync fence waiters to trigger activation of all remaining ++ * trackers. This call will sleep until all timelines are empty. */ ++ mali_timeline_system_abort(session->timeline_system); + -+ MALI_DEBUG_PRINT(5, ("PM suspend post: Current domain mask: [%s]\n", -+ mali_pm_mask_to_string(pd_mask_current))); -+ MALI_DEBUG_PRINT(5, ("PM suspend post: Current PMU mask: ... [%s]\n", -+ mali_pm_mask_to_string(pmu_mask_current))); -+ MALI_DEBUG_PRINT(5, ("PM suspend post: Group power stats: .. <%s>\n", -+ mali_pm_group_stats_to_string())); ++ /* Flush pending work. ++ * Needed to make sure all bottom half processing related to this ++ * session has been completed, before we free internal data structures. ++ */ ++ _mali_osk_wq_flush(); + -+ return MALI_TRUE; -+} ++ /* Destroy timeline system. */ ++ mali_timeline_system_destroy(session->timeline_system); ++ session->timeline_system = NULL; + -+static void mali_pm_update_work(void *data) -+{ -+ MALI_IGNORE(data); -+ mali_pm_update_sync(); -+} ++ /* Destroy soft system. */ ++ mali_soft_job_system_destroy(session->soft_job_system); ++ session->soft_job_system = NULL; + -+static _mali_osk_errcode_t mali_pm_create_pm_domains(void) -+{ -+ int i; ++ /*Wait for the session job lists become empty.*/ ++ _mali_osk_wait_queue_wait_event(session->wait_queue, mali_session_pp_job_is_empty, (void *) session); + -+ /* Create all domains (including dummy domain) */ -+ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { -+ if (0x0 == domain_config[i]) continue; ++ /* Free remaining memory allocated to this session */ ++ mali_memory_session_end(session); + -+ if (NULL == mali_pm_domain_create(domain_config[i])) { -+ return _MALI_OSK_ERR_NOMEM; -+ } -+ } ++#if defined(CONFIG_MALI_DVFS) ++ _mali_osk_atomic_term(&session->number_of_window_jobs); ++#endif + -+ return _MALI_OSK_ERR_OK; ++#if defined(CONFIG_MALI400_PROFILING) ++ _mali_osk_profiling_stop_sampling(session->pid); ++#endif ++ ++ /* Free session data structures */ ++ mali_mmu_pagedir_unmap(session->page_directory, MALI_DLBU_VIRT_ADDR, _MALI_OSK_MALI_PAGE_SIZE); ++ mali_mmu_pagedir_free(session->page_directory); ++ _mali_osk_wait_queue_term(session->wait_queue); ++ _mali_osk_notification_queue_term(session->ioctl_queue); ++ _mali_osk_free(session); ++ ++ *context = NULL; ++ ++ MALI_DEBUG_PRINT(3, ("Session has ended\n")); ++ ++#if defined(DEBUG) ++ MALI_DEBUG_PRINT(3, ("Stats: # runtime resumes: %u\n", num_pm_runtime_resume)); ++ MALI_DEBUG_PRINT(3, (" # PM updates: .... %u (up %u, down %u)\n", num_pm_updates, num_pm_updates_up, num_pm_updates_down)); ++ ++ num_pm_runtime_resume = 0; ++ num_pm_updates = 0; ++ num_pm_updates_up = 0; ++ num_pm_updates_down = 0; ++#endif ++ ++ return _MALI_OSK_ERR_OK;; +} + -+static void mali_pm_set_default_pm_domain_config(void) ++#if MALI_STATE_TRACKING ++u32 _mali_kernel_core_dump_state(char *buf, u32 size) +{ -+ MALI_DEBUG_ASSERT(0 != _mali_osk_resource_base_address()); ++ int n = 0; /* Number of bytes written to buf */ + -+ /* GP core */ -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( -+ MALI_OFFSET_GP, NULL)) { -+ domain_config[MALI_DOMAIN_INDEX_GP] = 0x01; -+ } ++ n += mali_scheduler_dump_state(buf + n, size - n); ++ n += mali_executor_dump_state(buf + n, size - n); + -+ /* PP0 - PP3 core */ -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( -+ MALI_OFFSET_PP0, NULL)) { -+ if (mali_is_mali400()) { -+ domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 2; -+ } else if (mali_is_mali450()) { -+ domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 1; -+ } else if (mali_is_mali470()) { -+ domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 0; -+ } -+ } ++ return n; ++} ++#endif +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_kernel_core.h b/drivers/gpu/arm/mali400/mali/common/mali_kernel_core.h +new file mode 100644 +index 000000000..c471fc955 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_kernel_core.h +@@ -0,0 +1,57 @@ ++/* ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( -+ MALI_OFFSET_PP1, NULL)) { -+ if (mali_is_mali400()) { -+ domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 3; -+ } else if (mali_is_mali450()) { -+ domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 2; -+ } else if (mali_is_mali470()) { -+ domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 1; -+ } -+ } ++#ifndef __MALI_KERNEL_CORE_H__ ++#define __MALI_KERNEL_CORE_H__ + -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( -+ MALI_OFFSET_PP2, NULL)) { -+ if (mali_is_mali400()) { -+ domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 4; -+ } else if (mali_is_mali450()) { -+ domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 2; -+ } else if (mali_is_mali470()) { -+ domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 1; -+ } -+ } ++#include "mali_osk.h" + -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( -+ MALI_OFFSET_PP3, NULL)) { -+ if (mali_is_mali400()) { -+ domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 5; -+ } else if (mali_is_mali450()) { -+ domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 2; -+ } else if (mali_is_mali470()) { -+ domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 1; -+ } -+ } ++typedef enum { ++ _MALI_PRODUCT_ID_UNKNOWN, ++ _MALI_PRODUCT_ID_MALI200, ++ _MALI_PRODUCT_ID_MALI300, ++ _MALI_PRODUCT_ID_MALI400, ++ _MALI_PRODUCT_ID_MALI450, ++ _MALI_PRODUCT_ID_MALI470, ++} _mali_product_id_t; + -+ /* PP4 - PP7 */ -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( -+ MALI_OFFSET_PP4, NULL)) { -+ domain_config[MALI_DOMAIN_INDEX_PP4] = 0x01 << 3; -+ } ++extern mali_bool mali_gpu_class_is_mali450; ++extern mali_bool mali_gpu_class_is_mali470; + -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( -+ MALI_OFFSET_PP5, NULL)) { -+ domain_config[MALI_DOMAIN_INDEX_PP5] = 0x01 << 3; -+ } ++_mali_osk_errcode_t mali_initialize_subsystems(void); + -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( -+ MALI_OFFSET_PP6, NULL)) { -+ domain_config[MALI_DOMAIN_INDEX_PP6] = 0x01 << 3; -+ } ++void mali_terminate_subsystems(void); + -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( -+ MALI_OFFSET_PP7, NULL)) { -+ domain_config[MALI_DOMAIN_INDEX_PP7] = 0x01 << 3; -+ } ++_mali_product_id_t mali_kernel_core_get_product_id(void); + -+ /* L2gp/L2PP0/L2PP4 */ -+ if (mali_is_mali400()) { -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( -+ MALI400_OFFSET_L2_CACHE0, NULL)) { -+ domain_config[MALI_DOMAIN_INDEX_L20] = 0x01 << 1; -+ } -+ } else if (mali_is_mali450()) { -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( -+ MALI450_OFFSET_L2_CACHE0, NULL)) { -+ domain_config[MALI_DOMAIN_INDEX_L20] = 0x01 << 0; -+ } ++u32 mali_kernel_core_get_gpu_major_version(void); + -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( -+ MALI450_OFFSET_L2_CACHE1, NULL)) { -+ domain_config[MALI_DOMAIN_INDEX_L21] = 0x01 << 1; -+ } ++u32 mali_kernel_core_get_gpu_minor_version(void); + -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( -+ MALI450_OFFSET_L2_CACHE2, NULL)) { -+ domain_config[MALI_DOMAIN_INDEX_L22] = 0x01 << 3; -+ } -+ } else if (mali_is_mali470()) { -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( -+ MALI470_OFFSET_L2_CACHE1, NULL)) { -+ domain_config[MALI_DOMAIN_INDEX_L21] = 0x01 << 0; -+ } -+ } ++u32 _mali_kernel_core_dump_state(char *buf, u32 size); ++ ++MALI_STATIC_INLINE mali_bool mali_is_mali470(void) ++{ ++ return mali_gpu_class_is_mali470; +} + -+static u32 mali_pm_get_registered_cores_mask(void) ++MALI_STATIC_INLINE mali_bool mali_is_mali450(void) +{ -+ int i = 0; -+ u32 mask = 0; ++ return mali_gpu_class_is_mali450; ++} + -+ for (i = 0; i < MALI_DOMAIN_INDEX_DUMMY; i++) { -+ mask |= domain_config[i]; -+ } ++MALI_STATIC_INLINE mali_bool mali_is_mali400(void) ++{ ++ if (mali_gpu_class_is_mali450 || mali_gpu_class_is_mali470) ++ return MALI_FALSE; + -+ return mask; ++ return MALI_TRUE; +} ++#endif /* __MALI_KERNEL_CORE_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_kernel_utilization.c b/drivers/gpu/arm/mali400/mali/common/mali_kernel_utilization.c +new file mode 100644 +index 000000000..d1b8dc3b0 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_kernel_utilization.c +@@ -0,0 +1,440 @@ ++/* ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+static void mali_pm_set_pmu_domain_config(void) ++#include "mali_kernel_utilization.h" ++#include "mali_osk.h" ++#include "mali_osk_mali.h" ++#include "mali_kernel_common.h" ++#include "mali_session.h" ++#include "mali_scheduler.h" ++ ++#include "mali_executor.h" ++#include "mali_dvfs_policy.h" ++#include "mali_control_timer.h" ++ ++/* Thresholds for GP bound detection. */ ++#define MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD 240 ++#define MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD 250 ++ ++static _mali_osk_spinlock_irq_t *utilization_data_lock; ++ ++static u32 num_running_gp_cores = 0; ++static u32 num_running_pp_cores = 0; ++ ++static u64 work_start_time_gpu = 0; ++static u64 work_start_time_gp = 0; ++static u64 work_start_time_pp = 0; ++static u64 accumulated_work_time_gpu = 0; ++static u64 accumulated_work_time_gp = 0; ++static u64 accumulated_work_time_pp = 0; ++ ++static u32 last_utilization_gpu = 0 ; ++static u32 last_utilization_gp = 0 ; ++static u32 last_utilization_pp = 0 ; ++ ++void (*mali_utilization_callback)(struct mali_gpu_utilization_data *data) = NULL; ++ ++/* Define the first timer control timer timeout in milliseconds */ ++static u32 mali_control_first_timeout = 100; ++static struct mali_gpu_utilization_data mali_util_data = {0, }; ++ ++struct mali_gpu_utilization_data *mali_utilization_calculate(u64 *start_time, u64 *time_period, mali_bool *need_add_timer) +{ -+ int i = 0; ++ u64 time_now; ++ u32 leading_zeroes; ++ u32 shift_val; ++ u32 work_normalized_gpu; ++ u32 work_normalized_gp; ++ u32 work_normalized_pp; ++ u32 period_normalized; ++ u32 utilization_gpu; ++ u32 utilization_gp; ++ u32 utilization_pp; + -+ _mali_osk_device_data_pmu_config_get(domain_config, MALI_MAX_NUMBER_OF_DOMAINS - 1); ++ mali_utilization_data_lock(); + -+ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS - 1; i++) { -+ if (0 != domain_config[i]) { -+ MALI_DEBUG_PRINT(2, ("Using customer pmu config:\n")); -+ break; -+ } -+ } ++ time_now = _mali_osk_time_get_ns(); + -+ if (MALI_MAX_NUMBER_OF_DOMAINS - 1 == i) { -+ MALI_DEBUG_PRINT(2, ("Using hw detect pmu config:\n")); -+ mali_pm_set_default_pm_domain_config(); ++ *time_period = time_now - *start_time; ++ ++ if (accumulated_work_time_gpu == 0 && work_start_time_gpu == 0) { ++ mali_control_timer_pause(); ++ /* ++ * No work done for this period ++ * - No need to reschedule timer ++ * - Report zero usage ++ */ ++ last_utilization_gpu = 0; ++ last_utilization_gp = 0; ++ last_utilization_pp = 0; ++ ++ mali_util_data.utilization_gpu = last_utilization_gpu; ++ mali_util_data.utilization_gp = last_utilization_gp; ++ mali_util_data.utilization_pp = last_utilization_pp; ++ ++ mali_utilization_data_unlock(); ++ ++ *need_add_timer = MALI_FALSE; ++ ++ mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND); ++ ++ MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu)); ++ MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp)); ++ MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp)); ++ ++ return &mali_util_data; + } + -+ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS - 1; i++) { -+ if (domain_config[i]) { -+ MALI_DEBUG_PRINT(2, ("domain_config[%d] = 0x%x \n", i, domain_config[i])); ++ /* If we are currently busy, update working period up to now */ ++ if (work_start_time_gpu != 0) { ++ accumulated_work_time_gpu += (time_now - work_start_time_gpu); ++ work_start_time_gpu = time_now; ++ ++ /* GP and/or PP will also be busy if the GPU is busy at this point */ ++ ++ if (work_start_time_gp != 0) { ++ accumulated_work_time_gp += (time_now - work_start_time_gp); ++ work_start_time_gp = time_now; ++ } ++ ++ if (work_start_time_pp != 0) { ++ accumulated_work_time_pp += (time_now - work_start_time_pp); ++ work_start_time_pp = time_now; + } + } -+ /* Can't override dummy domain mask */ -+ domain_config[MALI_DOMAIN_INDEX_DUMMY] = -+ 1 << MALI_DOMAIN_INDEX_DUMMY; -+} + -+#if defined(DEBUG) -+const char *mali_pm_mask_to_string(u32 mask) -+{ -+ static char bit_str[MALI_MAX_NUMBER_OF_DOMAINS + 1]; -+ int bit; -+ int str_pos = 0; ++ /* ++ * We have two 64-bit values, a dividend and a divisor. ++ * To avoid dependencies to a 64-bit divider, we shift down the two values ++ * equally first. ++ * We shift the dividend up and possibly the divisor down, making the result X in 256. ++ */ + -+ /* Must be protected by lock since we use shared string buffer */ -+ if (NULL != pm_lock_exec) { -+ MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec); ++ /* Shift the 64-bit values down so they fit inside a 32-bit integer */ ++ leading_zeroes = _mali_osk_clz((u32)(*time_period >> 32)); ++ shift_val = 32 - leading_zeroes; ++ work_normalized_gpu = (u32)(accumulated_work_time_gpu >> shift_val); ++ work_normalized_gp = (u32)(accumulated_work_time_gp >> shift_val); ++ work_normalized_pp = (u32)(accumulated_work_time_pp >> shift_val); ++ period_normalized = (u32)(*time_period >> shift_val); ++ ++ /* ++ * Now, we should report the usage in parts of 256 ++ * this means we must shift up the dividend or down the divisor by 8 ++ * (we could do a combination, but we just use one for simplicity, ++ * but the end result should be good enough anyway) ++ */ ++ if (period_normalized > 0x00FFFFFF) { ++ /* The divisor is so big that it is safe to shift it down */ ++ period_normalized >>= 8; ++ } else { ++ /* ++ * The divisor is so small that we can shift up the dividend, without loosing any data. ++ * (dividend is always smaller than the divisor) ++ */ ++ work_normalized_gpu <<= 8; ++ work_normalized_gp <<= 8; ++ work_normalized_pp <<= 8; + } + -+ for (bit = MALI_MAX_NUMBER_OF_DOMAINS - 1; bit >= 0; bit--) { -+ if (mask & (1 << bit)) { -+ bit_str[str_pos] = 'X'; -+ } else { -+ bit_str[str_pos] = '-'; -+ } -+ str_pos++; ++ utilization_gpu = work_normalized_gpu / period_normalized; ++ utilization_gp = work_normalized_gp / period_normalized; ++ utilization_pp = work_normalized_pp / period_normalized; ++ ++ last_utilization_gpu = utilization_gpu; ++ last_utilization_gp = utilization_gp; ++ last_utilization_pp = utilization_pp; ++ ++ if ((MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD < last_utilization_gp) && ++ (MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD > last_utilization_pp)) { ++ mali_executor_hint_enable(MALI_EXECUTOR_HINT_GP_BOUND); ++ } else { ++ mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND); + } + -+ bit_str[MALI_MAX_NUMBER_OF_DOMAINS] = '\0'; ++ /* starting a new period */ ++ accumulated_work_time_gpu = 0; ++ accumulated_work_time_gp = 0; ++ accumulated_work_time_pp = 0; + -+ return bit_str; ++ *start_time = time_now; ++ ++ mali_util_data.utilization_gp = last_utilization_gp; ++ mali_util_data.utilization_gpu = last_utilization_gpu; ++ mali_util_data.utilization_pp = last_utilization_pp; ++ ++ mali_utilization_data_unlock(); ++ ++ *need_add_timer = MALI_TRUE; ++ ++ MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu)); ++ MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp)); ++ MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp)); ++ ++ return &mali_util_data; +} + -+const char *mali_pm_group_stats_to_string(void) ++_mali_osk_errcode_t mali_utilization_init(void) +{ -+ static char bit_str[MALI_MAX_NUMBER_OF_GROUPS + 1]; -+ u32 num_groups = mali_group_get_glob_num_groups(); -+ u32 i; ++#if USING_GPU_UTILIZATION ++ _mali_osk_device_data data; + -+ /* Must be protected by lock since we use shared string buffer */ -+ if (NULL != pm_lock_exec) { -+ MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec); ++ if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) { ++ if (NULL != data.utilization_callback) { ++ mali_utilization_callback = data.utilization_callback; ++ MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: Utilization handler installed \n")); ++ } + } ++#endif /* defined(USING_GPU_UTILIZATION) */ + -+ for (i = 0; i < num_groups && i < MALI_MAX_NUMBER_OF_GROUPS; i++) { -+ struct mali_group *group; -+ -+ group = mali_group_get_glob_group(i); ++ if (NULL == mali_utilization_callback) { ++ MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: No platform utilization handler installed\n")); ++ } + -+ if (MALI_TRUE == mali_group_power_is_on(group)) { -+ bit_str[i] = 'X'; -+ } else { -+ bit_str[i] = '-'; -+ } ++ utilization_data_lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_UTILIZATION); ++ if (NULL == utilization_data_lock) { ++ return _MALI_OSK_ERR_FAULT; + } + -+ bit_str[i] = '\0'; ++ num_running_gp_cores = 0; ++ num_running_pp_cores = 0; + -+ return bit_str; ++ return _MALI_OSK_ERR_OK; +} -+#endif + -+/* -+ * num_pp is the number of PP cores which will be powered on given this mask -+ * cost is the total power cost of cores which will be powered on given this mask -+ */ -+static void mali_pm_stat_from_mask(u32 mask, u32 *num_pp, u32 *cost) ++void mali_utilization_term(void) +{ -+ u32 i; ++ if (NULL != utilization_data_lock) { ++ _mali_osk_spinlock_irq_term(utilization_data_lock); ++ } ++} + -+ /* loop through all cores */ -+ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { -+ if (!(domain_config[i] & mask)) { -+ continue; -+ } ++void mali_utilization_gp_start(void) ++{ ++ mali_utilization_data_lock(); + -+ switch (i) { -+ case MALI_DOMAIN_INDEX_GP: -+ *cost += MALI_GP_COST; ++ ++num_running_gp_cores; ++ if (1 == num_running_gp_cores) { ++ u64 time_now = _mali_osk_time_get_ns(); + -+ break; -+ case MALI_DOMAIN_INDEX_PP0: /* Fall through */ -+ case MALI_DOMAIN_INDEX_PP1: /* Fall through */ -+ case MALI_DOMAIN_INDEX_PP2: /* Fall through */ -+ case MALI_DOMAIN_INDEX_PP3: -+ if (mali_is_mali400()) { -+ if ((domain_config[MALI_DOMAIN_INDEX_L20] & mask) -+ || (domain_config[MALI_DOMAIN_INDEX_DUMMY] -+ == domain_config[MALI_DOMAIN_INDEX_L20])) { -+ *num_pp += 1; -+ } -+ } else { -+ if ((domain_config[MALI_DOMAIN_INDEX_L21] & mask) -+ || (domain_config[MALI_DOMAIN_INDEX_DUMMY] -+ == domain_config[MALI_DOMAIN_INDEX_L21])) { -+ *num_pp += 1; -+ } -+ } ++ /* First GP core started, consider GP busy from now and onwards */ ++ work_start_time_gp = time_now; + -+ *cost += MALI_PP_COST; -+ break; -+ case MALI_DOMAIN_INDEX_PP4: /* Fall through */ -+ case MALI_DOMAIN_INDEX_PP5: /* Fall through */ -+ case MALI_DOMAIN_INDEX_PP6: /* Fall through */ -+ case MALI_DOMAIN_INDEX_PP7: -+ MALI_DEBUG_ASSERT(mali_is_mali450()); ++ if (0 == num_running_pp_cores) { ++ mali_bool is_resume = MALI_FALSE; ++ /* ++ * There are no PP cores running, so this is also the point ++ * at which we consider the GPU to be busy as well. ++ */ ++ work_start_time_gpu = time_now; + -+ if ((domain_config[MALI_DOMAIN_INDEX_L22] & mask) -+ || (domain_config[MALI_DOMAIN_INDEX_DUMMY] -+ == domain_config[MALI_DOMAIN_INDEX_L22])) { -+ *num_pp += 1; -+ } ++ is_resume = mali_control_timer_resume(time_now); + -+ *cost += MALI_PP_COST; -+ break; -+ case MALI_DOMAIN_INDEX_L20: /* Fall through */ -+ case MALI_DOMAIN_INDEX_L21: /* Fall through */ -+ case MALI_DOMAIN_INDEX_L22: -+ *cost += MALI_L2_COST; ++ mali_utilization_data_unlock(); + -+ break; ++ if (is_resume) { ++ /* Do some policy in new period for performance consideration */ ++#if defined(CONFIG_MALI_DVFS) ++ /* Clear session->number_of_window_jobs, prepare parameter for dvfs */ ++ mali_session_max_window_num(); ++ if (0 == last_utilization_gpu) { ++ /* ++ * for mali_dev_pause is called in set clock, ++ * so each time we change clock, we will set clock to ++ * highest step even if under down clock case, ++ * it is not nessesary, so we only set the clock under ++ * last time utilization equal 0, we stop the timer then ++ * start the GPU again case ++ */ ++ mali_dvfs_policy_new_period(); ++ } ++#endif ++ /* ++ * First timeout using short interval for power consideration ++ * because we give full power in the new period, but if the ++ * job loading is light, finish in 10ms, the other time all keep ++ * in high freq it will wast time. ++ */ ++ mali_control_timer_add(mali_control_first_timeout); ++ } ++ } else { ++ mali_utilization_data_unlock(); + } ++ ++ } else { ++ /* Nothing to do */ ++ mali_utilization_data_unlock(); + } +} + -+void mali_pm_power_cost_setup(void) ++void mali_utilization_pp_start(void) +{ -+ /* -+ * Two parallel arrays which store the best domain mask and its cost -+ * The index is the number of PP cores, E.g. Index 0 is for 1 PP option, -+ * might have mask 0x2 and with cost of 1, lower cost is better -+ */ -+ u32 best_mask[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS] = { 0 }; -+ u32 best_cost[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS] = { 0 }; -+ /* Array cores_in_domain is used to store the total pp cores in each pm domain. */ -+ u32 cores_in_domain[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 }; -+ /* Domain_count is used to represent the max domain we have.*/ -+ u32 max_domain_mask = 0; -+ u32 max_domain_id = 0; -+ u32 always_on_pp_cores = 0; ++ mali_utilization_data_lock(); + -+ u32 num_pp, cost, mask; -+ u32 i, j , k; ++ ++num_running_pp_cores; ++ if (1 == num_running_pp_cores) { ++ u64 time_now = _mali_osk_time_get_ns(); + -+ /* Initialize statistics */ -+ for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; i++) { -+ best_mask[i] = 0; -+ best_cost[i] = 0xFFFFFFFF; /* lower cost is better */ -+ } ++ /* First PP core started, consider PP busy from now and onwards */ ++ work_start_time_pp = time_now; + -+ for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS + 1; i++) { -+ for (j = 0; j < MALI_MAX_NUMBER_OF_DOMAINS; j++) { -+ mali_pm_domain_power_cost_result[i][j] = 0; -+ } -+ } ++ if (0 == num_running_gp_cores) { ++ mali_bool is_resume = MALI_FALSE; ++ /* ++ * There are no GP cores running, so this is also the point ++ * at which we consider the GPU to be busy as well. ++ */ ++ work_start_time_gpu = time_now; + -+ /* Caculate number of pp cores of a given domain config. */ -+ for (i = MALI_DOMAIN_INDEX_PP0; i <= MALI_DOMAIN_INDEX_PP7; i++) { -+ if (0 < domain_config[i]) { -+ /* Get the max domain mask value used to caculate power cost -+ * and we don't count in always on pp cores. */ -+ if (MALI_PM_DOMAIN_DUMMY_MASK != domain_config[i] -+ && max_domain_mask < domain_config[i]) { -+ max_domain_mask = domain_config[i]; -+ } ++ /* Start a new period if stoped */ ++ is_resume = mali_control_timer_resume(time_now); + -+ if (MALI_PM_DOMAIN_DUMMY_MASK == domain_config[i]) { -+ always_on_pp_cores++; ++ mali_utilization_data_unlock(); ++ ++ if (is_resume) { ++#if defined(CONFIG_MALI_DVFS) ++ /* Clear session->number_of_window_jobs, prepare parameter for dvfs */ ++ mali_session_max_window_num(); ++ if (0 == last_utilization_gpu) { ++ /* ++ * for mali_dev_pause is called in set clock, ++ * so each time we change clock, we will set clock to ++ * highest step even if under down clock case, ++ * it is not nessesary, so we only set the clock under ++ * last time utilization equal 0, we stop the timer then ++ * start the GPU again case ++ */ ++ mali_dvfs_policy_new_period(); ++ } ++#endif ++ ++ /* ++ * First timeout using short interval for power consideration ++ * because we give full power in the new period, but if the ++ * job loading is light, finish in 10ms, the other time all keep ++ * in high freq it will wast time. ++ */ ++ mali_control_timer_add(mali_control_first_timeout); + } ++ } else { ++ mali_utilization_data_unlock(); + } ++ } else { ++ /* Nothing to do */ ++ mali_utilization_data_unlock(); + } -+ max_domain_id = _mali_osk_fls(max_domain_mask); ++} + -+ /* -+ * Try all combinations of power domains and check how many PP cores -+ * they have and their power cost. -+ */ -+ for (mask = 0; mask < (1 << max_domain_id); mask++) { -+ num_pp = 0; -+ cost = 0; ++void mali_utilization_gp_end(void) ++{ ++ mali_utilization_data_lock(); + -+ mali_pm_stat_from_mask(mask, &num_pp, &cost); ++ --num_running_gp_cores; ++ if (0 == num_running_gp_cores) { ++ u64 time_now = _mali_osk_time_get_ns(); + -+ /* This mask is usable for all MP1 up to num_pp PP cores, check statistics for all */ -+ for (i = 0; i < num_pp; i++) { -+ if (best_cost[i] >= cost) { -+ best_cost[i] = cost; -+ best_mask[i] = mask; -+ } -+ } -+ } ++ /* Last GP core ended, consider GP idle from now and onwards */ ++ accumulated_work_time_gp += (time_now - work_start_time_gp); ++ work_start_time_gp = 0; + -+ /* -+ * If we want to enable x pp cores, if x is less than number of always_on pp cores, -+ * all of pp cores we will enable must be always_on pp cores. -+ */ -+ for (i = 0; i < mali_executor_get_num_cores_total(); i++) { -+ if (i < always_on_pp_cores) { -+ mali_pm_domain_power_cost_result[i + 1][MALI_MAX_NUMBER_OF_DOMAINS - 1] -+ = i + 1; -+ } else { -+ mali_pm_domain_power_cost_result[i + 1][MALI_MAX_NUMBER_OF_DOMAINS - 1] -+ = always_on_pp_cores; ++ if (0 == num_running_pp_cores) { ++ /* ++ * There are no PP cores running, so this is also the point ++ * at which we consider the GPU to be idle as well. ++ */ ++ accumulated_work_time_gpu += (time_now - work_start_time_gpu); ++ work_start_time_gpu = 0; + } + } + -+ /* In this loop, variable i represent for the number of non-always on pp cores we want to enabled. */ -+ for (i = 0; i < (mali_executor_get_num_cores_total() - always_on_pp_cores); i++) { -+ if (best_mask[i] == 0) { -+ /* This MP variant is not available */ -+ continue; -+ } ++ mali_utilization_data_unlock(); ++} + -+ for (j = 0; j < MALI_MAX_NUMBER_OF_DOMAINS; j++) { -+ cores_in_domain[j] = 0; -+ } ++void mali_utilization_pp_end(void) ++{ ++ mali_utilization_data_lock(); + -+ for (j = MALI_DOMAIN_INDEX_PP0; j <= MALI_DOMAIN_INDEX_PP7; j++) { -+ if (0 < domain_config[j] -+ && (MALI_PM_DOMAIN_DUMMY_MASK != domain_config[i])) { -+ cores_in_domain[_mali_osk_fls(domain_config[j]) - 1]++; -+ } -+ } ++ --num_running_pp_cores; ++ if (0 == num_running_pp_cores) { ++ u64 time_now = _mali_osk_time_get_ns(); + -+ /* In this loop, j represent for the number we have already enabled.*/ -+ for (j = 0; j <= i;) { -+ /* j used to visit all of domain to get the number of pp cores remained in it. */ -+ for (k = 0; k < max_domain_id; k++) { -+ /* If domain k in best_mask[i] is enabled and this domain has extra pp cores, -+ * we know we must pick at least one pp core from this domain. -+ * And then we move to next enabled pm domain. */ -+ if ((best_mask[i] & (0x1 << k)) && (0 < cores_in_domain[k])) { -+ cores_in_domain[k]--; -+ mali_pm_domain_power_cost_result[always_on_pp_cores + i + 1][k]++; -+ j++; -+ if (j > i) { -+ break; -+ } -+ } -+ } ++ /* Last PP core ended, consider PP idle from now and onwards */ ++ accumulated_work_time_pp += (time_now - work_start_time_pp); ++ work_start_time_pp = 0; ++ ++ if (0 == num_running_gp_cores) { ++ /* ++ * There are no GP cores running, so this is also the point ++ * at which we consider the GPU to be idle as well. ++ */ ++ accumulated_work_time_gpu += (time_now - work_start_time_gpu); ++ work_start_time_gpu = 0; + } + } ++ ++ mali_utilization_data_unlock(); +} + -+/* -+ * When we are doing core scaling, -+ * this function is called to return the best mask to -+ * achieve the best pp group power cost. -+ */ -+void mali_pm_get_best_power_cost_mask(int num_requested, int *dst) ++mali_bool mali_utilization_enabled(void) +{ -+ MALI_DEBUG_ASSERT((mali_executor_get_num_cores_total() >= num_requested) && (0 <= num_requested)); ++#if defined(CONFIG_MALI_DVFS) ++ return mali_dvfs_policy_enabled(); ++#else ++ return (NULL != mali_utilization_callback); ++#endif /* defined(CONFIG_MALI_DVFS) */ ++} + -+ _mali_osk_memcpy(dst, mali_pm_domain_power_cost_result[num_requested], MALI_MAX_NUMBER_OF_DOMAINS * sizeof(int)); ++void mali_utilization_platform_realize(struct mali_gpu_utilization_data *util_data) ++{ ++ MALI_DEBUG_ASSERT_POINTER(mali_utilization_callback); ++ ++ mali_utilization_callback(util_data); +} + -+u32 mali_pm_get_current_mask(void) ++void mali_utilization_reset(void) +{ -+ return pd_mask_current; ++ accumulated_work_time_gpu = 0; ++ accumulated_work_time_gp = 0; ++ accumulated_work_time_pp = 0; ++ ++ last_utilization_gpu = 0; ++ last_utilization_gp = 0; ++ last_utilization_pp = 0; +} + -+u32 mali_pm_get_wanted_mask(void) ++void mali_utilization_data_lock(void) +{ -+ return pd_mask_wanted; ++ _mali_osk_spinlock_irq_lock(utilization_data_lock); +} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pm.h b/drivers/gpu/arm/mali400/mali/common/mali_pm.h ++ ++void mali_utilization_data_unlock(void) ++{ ++ _mali_osk_spinlock_irq_unlock(utilization_data_lock); ++} ++ ++void mali_utilization_data_assert_locked(void) ++{ ++ MALI_DEBUG_ASSERT_LOCK_HELD(utilization_data_lock); ++} ++ ++u32 _mali_ukk_utilization_gp_pp(void) ++{ ++ return last_utilization_gpu; ++} ++ ++u32 _mali_ukk_utilization_gp(void) ++{ ++ return last_utilization_gp; ++} ++ ++u32 _mali_ukk_utilization_pp(void) ++{ ++ return last_utilization_pp; ++} +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_kernel_utilization.h b/drivers/gpu/arm/mali400/mali/common/mali_kernel_utilization.h new file mode 100644 -index 000000000..dac69958e +index 000000000..06f585dcb --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_pm.h -@@ -0,0 +1,91 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_kernel_utilization.h +@@ -0,0 +1,72 @@ +/* -+ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -294524,95 +297577,76 @@ index 000000000..dac69958e + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_PM_H__ -+#define __MALI_PM_H__ ++#ifndef __MALI_KERNEL_UTILIZATION_H__ ++#define __MALI_KERNEL_UTILIZATION_H__ + ++#include +#include "mali_osk.h" -+#include "mali_pm_domain.h" + -+#define MALI_DOMAIN_INDEX_GP 0 -+#define MALI_DOMAIN_INDEX_PP0 1 -+#define MALI_DOMAIN_INDEX_PP1 2 -+#define MALI_DOMAIN_INDEX_PP2 3 -+#define MALI_DOMAIN_INDEX_PP3 4 -+#define MALI_DOMAIN_INDEX_PP4 5 -+#define MALI_DOMAIN_INDEX_PP5 6 -+#define MALI_DOMAIN_INDEX_PP6 7 -+#define MALI_DOMAIN_INDEX_PP7 8 -+#define MALI_DOMAIN_INDEX_L20 9 -+#define MALI_DOMAIN_INDEX_L21 10 -+#define MALI_DOMAIN_INDEX_L22 11 -+/* -+ * The dummy domain is used when there is no physical power domain -+ * (e.g. no PMU or always on cores) ++/** ++ * Initialize/start the Mali GPU utilization metrics reporting. ++ * ++ * @return _MALI_OSK_ERR_OK on success, otherwise failure. + */ -+#define MALI_DOMAIN_INDEX_DUMMY 12 -+#define MALI_MAX_NUMBER_OF_DOMAINS 13 ++_mali_osk_errcode_t mali_utilization_init(void); + +/** -+ * Initialize the Mali PM module -+ * -+ * PM module covers Mali PM core, PM domains and Mali PMU ++ * Terminate the Mali GPU utilization metrics reporting + */ -+_mali_osk_errcode_t mali_pm_initialize(void); ++void mali_utilization_term(void); + +/** -+ * Terminate the Mali PM module ++ * Check if Mali utilization is enabled + */ -+void mali_pm_terminate(void); ++mali_bool mali_utilization_enabled(void); + -+void mali_pm_exec_lock(void); -+void mali_pm_exec_unlock(void); ++/** ++ * Should be called when a job is about to execute a GP job ++ */ ++void mali_utilization_gp_start(void); + ++/** ++ * Should be called when a job has completed executing a GP job ++ */ ++void mali_utilization_gp_end(void); + -+struct mali_pm_domain *mali_pm_register_l2_cache(u32 domain_index, -+ struct mali_l2_cache_core *l2_cache); -+struct mali_pm_domain *mali_pm_register_group(u32 domain_index, -+ struct mali_group *group); ++/** ++ * Should be called when a job is about to execute a PP job ++ */ ++void mali_utilization_pp_start(void); + -+mali_bool mali_pm_get_domain_refs(struct mali_pm_domain **domains, -+ struct mali_group **groups, -+ u32 num_domains); -+mali_bool mali_pm_put_domain_refs(struct mali_pm_domain **domains, -+ u32 num_domains); ++/** ++ * Should be called when a job has completed executing a PP job ++ */ ++void mali_utilization_pp_end(void); + -+void mali_pm_init_begin(void); -+void mali_pm_init_end(void); ++/** ++ * Should be called to calcution the GPU utilization ++ */ ++struct mali_gpu_utilization_data *mali_utilization_calculate(u64 *start_time, u64 *time_period, mali_bool *need_add_timer); + -+void mali_pm_update_sync(void); -+void mali_pm_update_async(void); ++_mali_osk_spinlock_irq_t *mali_utilization_get_lock(void); + -+/* Callback functions for system power management */ -+void mali_pm_os_suspend(mali_bool os_suspend); -+void mali_pm_os_resume(void); ++void mali_utilization_platform_realize(struct mali_gpu_utilization_data *util_data); + -+mali_bool mali_pm_runtime_suspend(void); -+void mali_pm_runtime_resume(void); ++void mali_utilization_data_lock(void); + -+#if MALI_STATE_TRACKING -+u32 mali_pm_dump_state_domain(struct mali_pm_domain *domain, -+ char *buf, u32 size); -+#endif ++void mali_utilization_data_unlock(void); + -+void mali_pm_power_cost_setup(void); ++void mali_utilization_data_assert_locked(void); + -+void mali_pm_get_best_power_cost_mask(int num_requested, int *dst); ++void mali_utilization_reset(void); + -+#if defined(DEBUG) -+const char *mali_pm_mask_to_string(u32 mask); -+#endif + -+u32 mali_pm_get_current_mask(void); -+u32 mali_pm_get_wanted_mask(void); -+#endif /* __MALI_PM_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pm_domain.c b/drivers/gpu/arm/mali400/mali/common/mali_pm_domain.c ++#endif /* __MALI_KERNEL_UTILIZATION_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_kernel_vsync.c b/drivers/gpu/arm/mali400/mali/common/mali_kernel_vsync.c new file mode 100644 -index 000000000..8290f7d88 +index 000000000..dd44e5e7f --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_pm_domain.c -@@ -0,0 +1,209 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_kernel_vsync.c +@@ -0,0 +1,45 @@ +/* -+ * Copyright (C) 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2011-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -294623,580 +297657,585 @@ index 000000000..8290f7d88 + +#include "mali_kernel_common.h" +#include "mali_osk.h" -+#include "mali_pm_domain.h" -+#include "mali_pmu.h" -+#include "mali_group.h" -+#include "mali_pm.h" ++#include "mali_ukk.h" + -+static struct mali_pm_domain *mali_pm_domains[MALI_MAX_NUMBER_OF_DOMAINS] = -+{ NULL, }; ++#include "mali_osk_profiling.h" + -+void mali_pm_domain_initialize(void) ++_mali_osk_errcode_t _mali_ukk_vsync_event_report(_mali_uk_vsync_event_report_s *args) +{ -+ /* Domains will be initialized/created on demand */ -+} ++ _mali_uk_vsync_event event = (_mali_uk_vsync_event)args->event; ++ MALI_IGNORE(event); /* event is not used for release code, and that is OK */ + -+void mali_pm_domain_terminate(void) -+{ -+ int i; ++ /* ++ * Manually generate user space events in kernel space. ++ * This saves user space from calling kernel space twice in this case. ++ * We just need to remember to add pid and tid manually. ++ */ ++ if (event == _MALI_UK_VSYNC_EVENT_BEGIN_WAIT) { ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SUSPEND | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC, ++ _mali_osk_get_pid(), _mali_osk_get_tid(), 0, 0, 0); ++ } + -+ /* Delete all domains that has been created */ -+ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { -+ mali_pm_domain_delete(mali_pm_domains[i]); -+ mali_pm_domains[i] = NULL; ++ if (event == _MALI_UK_VSYNC_EVENT_END_WAIT) { ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_RESUME | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC, ++ _mali_osk_get_pid(), _mali_osk_get_tid(), 0, 0, 0); + } ++ ++ ++ MALI_DEBUG_PRINT(4, ("Received VSYNC event: %d\n", event)); ++ MALI_SUCCESS; +} + -+struct mali_pm_domain *mali_pm_domain_create(u32 pmu_mask) -+{ -+ struct mali_pm_domain *domain = NULL; -+ u32 domain_id = 0; +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_l2_cache.c b/drivers/gpu/arm/mali400/mali/common/mali_l2_cache.c +new file mode 100644 +index 000000000..fe33f561b +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_l2_cache.c +@@ -0,0 +1,534 @@ ++/* ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ ++#include "mali_kernel_common.h" ++#include "mali_osk.h" ++#include "mali_l2_cache.h" ++#include "mali_hw_core.h" ++#include "mali_scheduler.h" ++#include "mali_pm.h" ++#include "mali_pm_domain.h" + -+ domain = mali_pm_domain_get_from_mask(pmu_mask); -+ if (NULL != domain) return domain; ++/** ++ * Size of the Mali L2 cache registers in bytes ++ */ ++#define MALI400_L2_CACHE_REGISTERS_SIZE 0x30 + -+ MALI_DEBUG_PRINT(2, -+ ("Mali PM domain: Creating Mali PM domain (mask=0x%08X)\n", -+ pmu_mask)); ++/** ++ * Mali L2 cache register numbers ++ * Used in the register read/write routines. ++ * See the hardware documentation for more information about each register ++ */ ++typedef enum mali_l2_cache_register { ++ MALI400_L2_CACHE_REGISTER_SIZE = 0x0004, ++ MALI400_L2_CACHE_REGISTER_STATUS = 0x0008, ++ /*unused = 0x000C */ ++ MALI400_L2_CACHE_REGISTER_COMMAND = 0x0010, ++ MALI400_L2_CACHE_REGISTER_CLEAR_PAGE = 0x0014, ++ MALI400_L2_CACHE_REGISTER_MAX_READS = 0x0018, ++ MALI400_L2_CACHE_REGISTER_ENABLE = 0x001C, ++ MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0 = 0x0020, ++ MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0 = 0x0024, ++ MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1 = 0x0028, ++ MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1 = 0x002C, ++} mali_l2_cache_register; + -+ domain = (struct mali_pm_domain *)_mali_osk_malloc( -+ sizeof(struct mali_pm_domain)); -+ if (NULL != domain) { -+ domain->power_is_on = MALI_FALSE; -+ domain->pmu_mask = pmu_mask; -+ domain->use_count = 0; -+ _mali_osk_list_init(&domain->group_list); -+ _mali_osk_list_init(&domain->l2_cache_list); ++/** ++ * Mali L2 cache commands ++ * These are the commands that can be sent to the Mali L2 cache unit ++ */ ++typedef enum mali_l2_cache_command { ++ MALI400_L2_CACHE_COMMAND_CLEAR_ALL = 0x01, ++} mali_l2_cache_command; + -+ domain_id = _mali_osk_fls(pmu_mask) - 1; -+ /* Verify the domain_id */ -+ MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > domain_id); -+ /* Verify that pmu_mask only one bit is set */ -+ MALI_DEBUG_ASSERT((1 << domain_id) == pmu_mask); -+ mali_pm_domains[domain_id] = domain; ++/** ++ * Mali L2 cache commands ++ * These are the commands that can be sent to the Mali L2 cache unit ++ */ ++typedef enum mali_l2_cache_enable { ++ MALI400_L2_CACHE_ENABLE_DEFAULT = 0x0, /* Default */ ++ MALI400_L2_CACHE_ENABLE_ACCESS = 0x01, ++ MALI400_L2_CACHE_ENABLE_READ_ALLOCATE = 0x02, ++} mali_l2_cache_enable; + -+ return domain; -+ } else { -+ MALI_DEBUG_PRINT_ERROR(("Unable to create PM domain\n")); -+ } ++/** ++ * Mali L2 cache status bits ++ */ ++typedef enum mali_l2_cache_status { ++ MALI400_L2_CACHE_STATUS_COMMAND_BUSY = 0x01, ++ MALI400_L2_CACHE_STATUS_DATA_BUSY = 0x02, ++} mali_l2_cache_status; + -+ return NULL; -+} ++#define MALI400_L2_MAX_READS_NOT_SET -1 + -+void mali_pm_domain_delete(struct mali_pm_domain *domain) -+{ -+ if (NULL == domain) { -+ return; -+ } ++static struct mali_l2_cache_core * ++ mali_global_l2s[MALI_MAX_NUMBER_OF_L2_CACHE_CORES] = { NULL, }; ++static u32 mali_global_num_l2s = 0; + -+ _mali_osk_list_delinit(&domain->group_list); -+ _mali_osk_list_delinit(&domain->l2_cache_list); ++int mali_l2_max_reads = MALI400_L2_MAX_READS_NOT_SET; + -+ _mali_osk_free(domain); -+} + -+void mali_pm_domain_add_group(struct mali_pm_domain *domain, -+ struct mali_group *group) -+{ -+ MALI_DEBUG_ASSERT_POINTER(domain); -+ MALI_DEBUG_ASSERT_POINTER(group); ++/* Local helper functions */ + -+ /* -+ * Use addtail because virtual group is created last and it needs -+ * to be at the end of the list (in order to be activated after -+ * all children. -+ */ -+ _mali_osk_list_addtail(&group->pm_domain_list, &domain->group_list); ++static void mali_l2_cache_reset(struct mali_l2_cache_core *cache); ++ ++static _mali_osk_errcode_t mali_l2_cache_send_command( ++ struct mali_l2_cache_core *cache, u32 reg, u32 val); ++ ++static void mali_l2_cache_lock(struct mali_l2_cache_core *cache) ++{ ++ MALI_DEBUG_ASSERT_POINTER(cache); ++ _mali_osk_spinlock_irq_lock(cache->lock); +} + -+void mali_pm_domain_add_l2_cache(struct mali_pm_domain *domain, -+ struct mali_l2_cache_core *l2_cache) ++static void mali_l2_cache_unlock(struct mali_l2_cache_core *cache) +{ -+ MALI_DEBUG_ASSERT_POINTER(domain); -+ MALI_DEBUG_ASSERT_POINTER(l2_cache); -+ _mali_osk_list_add(&l2_cache->pm_domain_list, &domain->l2_cache_list); ++ MALI_DEBUG_ASSERT_POINTER(cache); ++ _mali_osk_spinlock_irq_unlock(cache->lock); +} + -+struct mali_pm_domain *mali_pm_domain_get_from_mask(u32 mask) ++/* Implementation of the L2 cache interface */ ++ ++struct mali_l2_cache_core *mali_l2_cache_create( ++ _mali_osk_resource_t *resource, u32 domain_index) +{ -+ u32 id = 0; ++ struct mali_l2_cache_core *cache = NULL; ++#if defined(DEBUG) ++ u32 cache_size; ++#endif + -+ if (0 == mask) { ++ MALI_DEBUG_PRINT(4, ("Mali L2 cache: Creating Mali L2 cache: %s\n", ++ resource->description)); ++ ++ if (mali_global_num_l2s >= MALI_MAX_NUMBER_OF_L2_CACHE_CORES) { ++ MALI_PRINT_ERROR(("Mali L2 cache: Too many L2 caches\n")); + return NULL; + } + -+ id = _mali_osk_fls(mask) - 1; ++ cache = _mali_osk_malloc(sizeof(struct mali_l2_cache_core)); ++ if (NULL == cache) { ++ MALI_PRINT_ERROR(("Mali L2 cache: Failed to allocate memory for L2 cache core\n")); ++ return NULL; ++ } + -+ MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id); -+ /* Verify that pmu_mask only one bit is set */ -+ MALI_DEBUG_ASSERT((1 << id) == mask); ++ cache->core_id = mali_global_num_l2s; ++ cache->counter_src0 = MALI_HW_CORE_NO_COUNTER; ++ cache->counter_src1 = MALI_HW_CORE_NO_COUNTER; ++ cache->counter_value0_base = 0; ++ cache->counter_value1_base = 0; ++ cache->pm_domain = NULL; ++ cache->power_is_on = MALI_FALSE; ++ cache->last_invalidated_id = 0; + -+ return mali_pm_domains[id]; ++ if (_MALI_OSK_ERR_OK != mali_hw_core_create(&cache->hw_core, ++ resource, MALI400_L2_CACHE_REGISTERS_SIZE)) { ++ _mali_osk_free(cache); ++ return NULL; ++ } ++ ++#if defined(DEBUG) ++ cache_size = mali_hw_core_register_read(&cache->hw_core, ++ MALI400_L2_CACHE_REGISTER_SIZE); ++ MALI_DEBUG_PRINT(2, ("Mali L2 cache: Created %s: % 3uK, %u-way, % 2ubyte cache line, % 3ubit external bus\n", ++ resource->description, ++ 1 << (((cache_size >> 16) & 0xff) - 10), ++ 1 << ((cache_size >> 8) & 0xff), ++ 1 << (cache_size & 0xff), ++ 1 << ((cache_size >> 24) & 0xff))); ++#endif ++ ++ cache->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, ++ _MALI_OSK_LOCK_ORDER_L2); ++ if (NULL == cache->lock) { ++ MALI_PRINT_ERROR(("Mali L2 cache: Failed to create counter lock for L2 cache core %s\n", ++ cache->hw_core.description)); ++ mali_hw_core_delete(&cache->hw_core); ++ _mali_osk_free(cache); ++ return NULL; ++ } ++ ++ /* register with correct power domain */ ++ cache->pm_domain = mali_pm_register_l2_cache( ++ domain_index, cache); ++ ++ mali_global_l2s[mali_global_num_l2s] = cache; ++ mali_global_num_l2s++; ++ ++ return cache; +} + -+struct mali_pm_domain *mali_pm_domain_get_from_index(u32 id) ++void mali_l2_cache_delete(struct mali_l2_cache_core *cache) +{ -+ MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id); ++ u32 i; ++ for (i = 0; i < mali_global_num_l2s; i++) { ++ if (mali_global_l2s[i] != cache) { ++ continue; ++ } + -+ return mali_pm_domains[id]; ++ mali_global_l2s[i] = NULL; ++ mali_global_num_l2s--; ++ ++ if (i == mali_global_num_l2s) { ++ /* Removed last element, nothing more to do */ ++ break; ++ } ++ ++ /* ++ * We removed a l2 cache from the middle of the array, ++ * so move the last l2 cache to current position ++ */ ++ mali_global_l2s[i] = mali_global_l2s[mali_global_num_l2s]; ++ mali_global_l2s[mali_global_num_l2s] = NULL; ++ ++ /* All good */ ++ break; ++ } ++ ++ _mali_osk_spinlock_irq_term(cache->lock); ++ mali_hw_core_delete(&cache->hw_core); ++ _mali_osk_free(cache); +} + -+u32 mali_pm_domain_ref_get(struct mali_pm_domain *domain) ++void mali_l2_cache_power_up(struct mali_l2_cache_core *cache) +{ -+ MALI_DEBUG_ASSERT_POINTER(domain); ++ MALI_DEBUG_ASSERT_POINTER(cache); + -+ if (0 == domain->use_count) { -+ _mali_osk_pm_dev_ref_get_async(); -+ } ++ mali_l2_cache_lock(cache); + -+ ++domain->use_count; -+ MALI_DEBUG_PRINT(4, ("PM domain %p: ref_get, use_count => %u\n", domain, domain->use_count)); ++ mali_l2_cache_reset(cache); + -+ /* Return our mask so caller can check this against wanted mask */ -+ return domain->pmu_mask; ++ if ((1 << MALI_DOMAIN_INDEX_DUMMY) != cache->pm_domain->pmu_mask) ++ MALI_DEBUG_ASSERT(MALI_FALSE == cache->power_is_on); ++ cache->power_is_on = MALI_TRUE; ++ ++ mali_l2_cache_unlock(cache); +} + -+u32 mali_pm_domain_ref_put(struct mali_pm_domain *domain) ++void mali_l2_cache_power_down(struct mali_l2_cache_core *cache) +{ -+ MALI_DEBUG_ASSERT_POINTER(domain); ++ MALI_DEBUG_ASSERT_POINTER(cache); + -+ --domain->use_count; -+ MALI_DEBUG_PRINT(4, ("PM domain %p: ref_put, use_count => %u\n", domain, domain->use_count)); ++ mali_l2_cache_lock(cache); + -+ if (0 == domain->use_count) { -+ _mali_osk_pm_dev_ref_put(); -+ } ++ MALI_DEBUG_ASSERT(MALI_TRUE == cache->power_is_on); + + /* -+ * Return the PMU mask which now could be be powered down -+ * (the bit for this domain). -+ * This is the responsibility of the caller (mali_pm) ++ * The HW counters will start from zero again when we resume, ++ * but we should report counters as always increasing. ++ * Take a copy of the HW values now in order to add this to ++ * the values we report after being powered up. ++ * ++ * The physical power off of the L2 cache might be outside our ++ * own control (e.g. runtime PM). That is why we must manually ++ * set set the counter value to zero as well. + */ -+ return (0 == domain->use_count ? domain->pmu_mask : 0); -+} + -+#if MALI_STATE_TRACKING -+u32 mali_pm_domain_get_id(struct mali_pm_domain *domain) -+{ -+ u32 id = 0; ++ if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) { ++ cache->counter_value0_base += mali_hw_core_register_read( ++ &cache->hw_core, ++ MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0); ++ mali_hw_core_register_write(&cache->hw_core, ++ MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0, 0); ++ } + -+ MALI_DEBUG_ASSERT_POINTER(domain); -+ MALI_DEBUG_ASSERT(0 != domain->pmu_mask); ++ if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) { ++ cache->counter_value1_base += mali_hw_core_register_read( ++ &cache->hw_core, ++ MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1); ++ mali_hw_core_register_write(&cache->hw_core, ++ MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1, 0); ++ } + -+ id = _mali_osk_fls(domain->pmu_mask) - 1; + -+ MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id); -+ /* Verify that pmu_mask only one bit is set */ -+ MALI_DEBUG_ASSERT((1 << id) == domain->pmu_mask); -+ /* Verify that we have stored the domain at right id/index */ -+ MALI_DEBUG_ASSERT(domain == mali_pm_domains[id]); ++ cache->power_is_on = MALI_FALSE; + -+ return id; ++ mali_l2_cache_unlock(cache); +} -+#endif + -+#if defined(DEBUG) -+mali_bool mali_pm_domain_all_unused(void) ++void mali_l2_cache_core_set_counter_src( ++ struct mali_l2_cache_core *cache, u32 source_id, u32 counter) +{ -+ int i; ++ u32 reg_offset_src; ++ u32 reg_offset_val; + -+ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { -+ if (NULL == mali_pm_domains[i]) { -+ /* Nothing to check */ -+ continue; -+ } ++ MALI_DEBUG_ASSERT_POINTER(cache); ++ MALI_DEBUG_ASSERT(source_id >= 0 && source_id <= 1); + -+ if (MALI_TRUE == mali_pm_domains[i]->power_is_on) { -+ /* Not ready for suspend! */ -+ return MALI_FALSE; -+ } ++ mali_l2_cache_lock(cache); + -+ if (0 != mali_pm_domains[i]->use_count) { -+ /* Not ready for suspend! */ -+ return MALI_FALSE; -+ } ++ if (0 == source_id) { ++ /* start counting from 0 */ ++ cache->counter_value0_base = 0; ++ cache->counter_src0 = counter; ++ reg_offset_src = MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0; ++ reg_offset_val = MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0; ++ } else { ++ /* start counting from 0 */ ++ cache->counter_value1_base = 0; ++ cache->counter_src1 = counter; ++ reg_offset_src = MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1; ++ reg_offset_val = MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1; + } + -+ return MALI_TRUE; -+} -+#endif -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pm_domain.h b/drivers/gpu/arm/mali400/mali/common/mali_pm_domain.h -new file mode 100644 -index 000000000..5776abe39 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_pm_domain.h -@@ -0,0 +1,104 @@ -+/* -+ * Copyright (C) 2013-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+#ifndef __MALI_PM_DOMAIN_H__ -+#define __MALI_PM_DOMAIN_H__ ++ if (cache->power_is_on) { ++ u32 hw_src; + -+#include "mali_kernel_common.h" -+#include "mali_osk.h" ++ if (MALI_HW_CORE_NO_COUNTER != counter) { ++ hw_src = counter; ++ } else { ++ hw_src = 0; /* disable value for HW */ ++ } + -+#include "mali_l2_cache.h" -+#include "mali_group.h" -+#include "mali_pmu.h" ++ /* Set counter src */ ++ mali_hw_core_register_write(&cache->hw_core, ++ reg_offset_src, hw_src); + -+/* Instances are protected by PM state lock */ -+struct mali_pm_domain { -+ mali_bool power_is_on; -+ s32 use_count; -+ u32 pmu_mask; ++ /* Make sure the HW starts counting from 0 again */ ++ mali_hw_core_register_write(&cache->hw_core, ++ reg_offset_val, 0); ++ } + -+ /* Zero or more groups can belong to this domain */ -+ _mali_osk_list_t group_list; ++ mali_l2_cache_unlock(cache); ++} + -+ /* Zero or more L2 caches can belong to this domain */ -+ _mali_osk_list_t l2_cache_list; -+}; ++void mali_l2_cache_core_get_counter_values( ++ struct mali_l2_cache_core *cache, ++ u32 *src0, u32 *value0, u32 *src1, u32 *value1) ++{ ++ MALI_DEBUG_ASSERT_POINTER(cache); ++ MALI_DEBUG_ASSERT(NULL != src0); ++ MALI_DEBUG_ASSERT(NULL != value0); ++ MALI_DEBUG_ASSERT(NULL != src1); ++ MALI_DEBUG_ASSERT(NULL != value1); + ++ mali_l2_cache_lock(cache); + -+void mali_pm_domain_initialize(void); -+void mali_pm_domain_terminate(void); ++ *src0 = cache->counter_src0; ++ *src1 = cache->counter_src1; + -+struct mali_pm_domain *mali_pm_domain_create(u32 pmu_mask); -+void mali_pm_domain_delete(struct mali_pm_domain *domain); ++ if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) { ++ if (MALI_TRUE == cache->power_is_on) { ++ *value0 = mali_hw_core_register_read(&cache->hw_core, ++ MALI400_L2_CACHE_REGISTER_PERFCNT_VAL0); ++ } else { ++ *value0 = 0; ++ } + -+void mali_pm_domain_add_l2_cache( -+ struct mali_pm_domain *domain, -+ struct mali_l2_cache_core *l2_cache); -+void mali_pm_domain_add_group(struct mali_pm_domain *domain, -+ struct mali_group *group); ++ /* Add base offset value (in case we have been power off) */ ++ *value0 += cache->counter_value0_base; ++ } + -+struct mali_pm_domain *mali_pm_domain_get_from_mask(u32 mask); -+struct mali_pm_domain *mali_pm_domain_get_from_index(u32 id); ++ if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) { ++ if (MALI_TRUE == cache->power_is_on) { ++ *value1 = mali_hw_core_register_read(&cache->hw_core, ++ MALI400_L2_CACHE_REGISTER_PERFCNT_VAL1); ++ } else { ++ *value1 = 0; ++ } + -+/* Ref counting */ -+u32 mali_pm_domain_ref_get(struct mali_pm_domain *domain); -+u32 mali_pm_domain_ref_put(struct mali_pm_domain *domain); ++ /* Add base offset value (in case we have been power off) */ ++ *value1 += cache->counter_value1_base; ++ } + -+MALI_STATIC_INLINE _mali_osk_list_t *mali_pm_domain_get_group_list( -+ struct mali_pm_domain *domain) -+{ -+ MALI_DEBUG_ASSERT_POINTER(domain); -+ return &domain->group_list; ++ mali_l2_cache_unlock(cache); +} + -+MALI_STATIC_INLINE _mali_osk_list_t *mali_pm_domain_get_l2_cache_list( -+ struct mali_pm_domain *domain) ++struct mali_l2_cache_core *mali_l2_cache_core_get_glob_l2_core(u32 index) +{ -+ MALI_DEBUG_ASSERT_POINTER(domain); -+ return &domain->l2_cache_list; -+} ++ if (mali_global_num_l2s > index) { ++ return mali_global_l2s[index]; ++ } + -+MALI_STATIC_INLINE mali_bool mali_pm_domain_power_is_on( -+ struct mali_pm_domain *domain) -+{ -+ MALI_DEBUG_ASSERT_POINTER(domain); -+ return domain->power_is_on; ++ return NULL; +} + -+MALI_STATIC_INLINE void mali_pm_domain_set_power_on( -+ struct mali_pm_domain *domain, -+ mali_bool power_is_on) ++u32 mali_l2_cache_core_get_glob_num_l2_cores(void) +{ -+ MALI_DEBUG_ASSERT_POINTER(domain); -+ domain->power_is_on = power_is_on; ++ return mali_global_num_l2s; +} + -+MALI_STATIC_INLINE u32 mali_pm_domain_get_use_count( -+ struct mali_pm_domain *domain) ++void mali_l2_cache_invalidate(struct mali_l2_cache_core *cache) +{ -+ MALI_DEBUG_ASSERT_POINTER(domain); -+ return domain->use_count; -+} -+ -+#if MALI_STATE_TRACKING -+u32 mali_pm_domain_get_id(struct mali_pm_domain *domain); ++ MALI_DEBUG_ASSERT_POINTER(cache); + -+MALI_STATIC_INLINE u32 mali_pm_domain_get_mask(struct mali_pm_domain *domain) -+{ -+ MALI_DEBUG_ASSERT_POINTER(domain); -+ return domain->pmu_mask; -+} -+#endif ++ if (NULL == cache) { ++ return; ++ } + -+#if defined(DEBUG) -+mali_bool mali_pm_domain_all_unused(void); -+#endif ++ mali_l2_cache_lock(cache); + -+#endif /* __MALI_PM_DOMAIN_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pm_metrics.c b/drivers/gpu/arm/mali400/mali/common/mali_pm_metrics.c -new file mode 100644 -index 000000000..cf7482323 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_pm_metrics.c -@@ -0,0 +1,255 @@ -+/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+#include "mali_pm_metrics.h" -+#include "mali_osk_locks.h" -+#include "mali_osk_mali.h" -+#include ++ cache->last_invalidated_id = mali_scheduler_get_new_cache_order(); ++ mali_l2_cache_send_command(cache, MALI400_L2_CACHE_REGISTER_COMMAND, ++ MALI400_L2_CACHE_COMMAND_CLEAR_ALL); + -+#define MALI_PM_TIME_SHIFT 0 -+#define MALI_UTILIZATION_MAX_PERIOD 80000000/* ns = 100ms */ ++ mali_l2_cache_unlock(cache); ++} + -+_mali_osk_errcode_t mali_pm_metrics_init(struct mali_device *mdev) ++void mali_l2_cache_invalidate_conditional( ++ struct mali_l2_cache_core *cache, u32 id) +{ -+ int i = 0; -+ -+ MALI_DEBUG_ASSERT(mdev != NULL); ++ MALI_DEBUG_ASSERT_POINTER(cache); + -+ mdev->mali_metrics.time_period_start = ktime_get(); -+ mdev->mali_metrics.time_period_start_gp = mdev->mali_metrics.time_period_start; -+ mdev->mali_metrics.time_period_start_pp = mdev->mali_metrics.time_period_start; ++ if (NULL == cache) { ++ return; ++ } + -+ mdev->mali_metrics.time_busy = 0; -+ mdev->mali_metrics.time_idle = 0; -+ mdev->mali_metrics.prev_busy = 0; -+ mdev->mali_metrics.prev_idle = 0; -+ mdev->mali_metrics.num_running_gp_cores = 0; -+ mdev->mali_metrics.num_running_pp_cores = 0; -+ mdev->mali_metrics.time_busy_gp = 0; -+ mdev->mali_metrics.time_idle_gp = 0; ++ /* ++ * If the last cache invalidation was done by a job with a higher id we ++ * don't have to flush. Since user space will store jobs w/ their ++ * corresponding memory in sequence (first job #0, then job #1, ...), ++ * we don't have to flush for job n-1 if job n has already invalidated ++ * the cache since we know for sure that job n-1's memory was already ++ * written when job n was started. ++ */ + -+ for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; i++) { -+ mdev->mali_metrics.time_busy_pp[i] = 0; -+ mdev->mali_metrics.time_idle_pp[i] = 0; -+ } -+ mdev->mali_metrics.gpu_active = MALI_FALSE; ++ mali_l2_cache_lock(cache); + -+ mdev->mali_metrics.lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_UNORDERED, _MALI_OSK_LOCK_ORDER_FIRST); -+ if (NULL == mdev->mali_metrics.lock) { -+ return _MALI_OSK_ERR_NOMEM; ++ if (((s32)id) > ((s32)cache->last_invalidated_id)) { ++ /* Set latest invalidated id to current "point in time" */ ++ cache->last_invalidated_id = ++ mali_scheduler_get_new_cache_order(); ++ mali_l2_cache_send_command(cache, ++ MALI400_L2_CACHE_REGISTER_COMMAND, ++ MALI400_L2_CACHE_COMMAND_CLEAR_ALL); + } + -+ return _MALI_OSK_ERR_OK; -+} -+ -+void mali_pm_metrics_term(struct mali_device *mdev) -+{ -+ _mali_osk_spinlock_irq_term(mdev->mali_metrics.lock); ++ mali_l2_cache_unlock(cache); +} + -+/*caller needs to hold mdev->mali_metrics.lock before calling this function*/ -+void mali_pm_record_job_status(struct mali_device *mdev) ++void mali_l2_cache_invalidate_all(void) +{ -+ ktime_t now; -+ ktime_t diff; -+ u64 ns_time; -+ -+ MALI_DEBUG_ASSERT(mdev != NULL); -+ -+ now = ktime_get(); -+ diff = ktime_sub(now, mdev->mali_metrics.time_period_start); -+ -+ ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); -+ mdev->mali_metrics.time_busy += ns_time; -+ mdev->mali_metrics.time_period_start = now; -+} ++ u32 i; ++ for (i = 0; i < mali_global_num_l2s; i++) { ++ struct mali_l2_cache_core *cache = mali_global_l2s[i]; ++ _mali_osk_errcode_t ret; + -+void mali_pm_record_gpu_idle(mali_bool is_gp) -+{ -+ ktime_t now; -+ ktime_t diff; -+ u64 ns_time; -+ struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev); ++ MALI_DEBUG_ASSERT_POINTER(cache); + -+ MALI_DEBUG_ASSERT(mdev != NULL); ++ mali_l2_cache_lock(cache); + -+ _mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock); -+ now = ktime_get(); ++ if (MALI_TRUE != cache->power_is_on) { ++ mali_l2_cache_unlock(cache); ++ continue; ++ } + -+ if (MALI_TRUE == is_gp) { -+ --mdev->mali_metrics.num_running_gp_cores; -+ if (0 == mdev->mali_metrics.num_running_gp_cores) { -+ diff = ktime_sub(now, mdev->mali_metrics.time_period_start_gp); -+ ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); -+ mdev->mali_metrics.time_busy_gp += ns_time; -+ mdev->mali_metrics.time_period_start_gp = now; ++ cache->last_invalidated_id = ++ mali_scheduler_get_new_cache_order(); + -+ if (0 == mdev->mali_metrics.num_running_pp_cores) { -+ MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE); -+ diff = ktime_sub(now, mdev->mali_metrics.time_period_start); -+ ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); -+ mdev->mali_metrics.time_busy += ns_time; -+ mdev->mali_metrics.time_period_start = now; -+ mdev->mali_metrics.gpu_active = MALI_FALSE; -+ } ++ ret = mali_l2_cache_send_command(cache, ++ MALI400_L2_CACHE_REGISTER_COMMAND, ++ MALI400_L2_CACHE_COMMAND_CLEAR_ALL); ++ if (_MALI_OSK_ERR_OK != ret) { ++ MALI_PRINT_ERROR(("Failed to invalidate cache\n")); + } -+ } else { -+ --mdev->mali_metrics.num_running_pp_cores; -+ if (0 == mdev->mali_metrics.num_running_pp_cores) { -+ diff = ktime_sub(now, mdev->mali_metrics.time_period_start_pp); -+ ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); -+ mdev->mali_metrics.time_busy_pp[0] += ns_time; -+ mdev->mali_metrics.time_period_start_pp = now; + -+ if (0 == mdev->mali_metrics.num_running_gp_cores) { -+ MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE); -+ diff = ktime_sub(now, mdev->mali_metrics.time_period_start); -+ ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); -+ mdev->mali_metrics.time_busy += ns_time; -+ mdev->mali_metrics.time_period_start = now; -+ mdev->mali_metrics.gpu_active = MALI_FALSE; -+ } -+ } ++ mali_l2_cache_unlock(cache); + } -+ -+ _mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock); +} + -+void mali_pm_record_gpu_active(mali_bool is_gp) ++void mali_l2_cache_invalidate_all_pages(u32 *pages, u32 num_pages) +{ -+ ktime_t now; -+ ktime_t diff; -+ struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev); ++ u32 i; ++ for (i = 0; i < mali_global_num_l2s; i++) { ++ struct mali_l2_cache_core *cache = mali_global_l2s[i]; ++ u32 j; + -+ MALI_DEBUG_ASSERT(mdev != NULL); ++ MALI_DEBUG_ASSERT_POINTER(cache); + -+ _mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock); -+ now = ktime_get(); ++ mali_l2_cache_lock(cache); + -+ if (MALI_TRUE == is_gp) { -+ mdev->mali_metrics.num_running_gp_cores++; -+ if (1 == mdev->mali_metrics.num_running_gp_cores) { -+ diff = ktime_sub(now, mdev->mali_metrics.time_period_start_gp); -+ mdev->mali_metrics.time_idle_gp += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); -+ mdev->mali_metrics.time_period_start_gp = now; -+ if (0 == mdev->mali_metrics.num_running_pp_cores) { -+ MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_FALSE); -+ diff = ktime_sub(now, mdev->mali_metrics.time_period_start); -+ mdev->mali_metrics.time_idle += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); -+ mdev->mali_metrics.time_period_start = now; -+ mdev->mali_metrics.gpu_active = MALI_TRUE; -+ } -+ } else { -+ MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE); ++ if (MALI_TRUE != cache->power_is_on) { ++ mali_l2_cache_unlock(cache); ++ continue; + } -+ } else { -+ mdev->mali_metrics.num_running_pp_cores++; -+ if (1 == mdev->mali_metrics.num_running_pp_cores) { -+ diff = ktime_sub(now, mdev->mali_metrics.time_period_start_pp); -+ mdev->mali_metrics.time_idle_pp[0] += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); -+ mdev->mali_metrics.time_period_start_pp = now; -+ if (0 == mdev->mali_metrics.num_running_gp_cores) { -+ MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_FALSE); -+ diff = ktime_sub(now, mdev->mali_metrics.time_period_start); -+ mdev->mali_metrics.time_idle += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); -+ mdev->mali_metrics.time_period_start = now; -+ mdev->mali_metrics.gpu_active = MALI_TRUE; ++ ++ for (j = 0; j < num_pages; j++) { ++ _mali_osk_errcode_t ret; ++ ++ ret = mali_l2_cache_send_command(cache, ++ MALI400_L2_CACHE_REGISTER_CLEAR_PAGE, ++ pages[j]); ++ if (_MALI_OSK_ERR_OK != ret) { ++ MALI_PRINT_ERROR(("Failed to invalidate cache (page)\n")); + } -+ } else { -+ MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE); + } -+ } + -+ _mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock); ++ mali_l2_cache_unlock(cache); ++ } +} + ++/* -------- local helper functions below -------- */ + -+/*caller needs to hold mdev->mali_metrics.lock before calling this function*/ -+static void mali_pm_get_dvfs_utilisation_calc(struct mali_device *mdev, ktime_t now) ++static void mali_l2_cache_reset(struct mali_l2_cache_core *cache) +{ -+ ktime_t diff; ++ MALI_DEBUG_ASSERT_POINTER(cache); ++ MALI_DEBUG_ASSERT_LOCK_HELD(cache->lock); + -+ MALI_DEBUG_ASSERT(mdev != NULL); ++ /* Invalidate cache (just to keep it in a known state at startup) */ ++ mali_l2_cache_send_command(cache, MALI400_L2_CACHE_REGISTER_COMMAND, ++ MALI400_L2_CACHE_COMMAND_CLEAR_ALL); + -+ diff = ktime_sub(now, mdev->mali_metrics.time_period_start); ++ /* Enable cache */ ++ mali_hw_core_register_write(&cache->hw_core, ++ MALI400_L2_CACHE_REGISTER_ENABLE, ++ (u32)MALI400_L2_CACHE_ENABLE_ACCESS | ++ (u32)MALI400_L2_CACHE_ENABLE_READ_ALLOCATE); + -+ if (mdev->mali_metrics.gpu_active) { -+ mdev->mali_metrics.time_busy += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); -+ } else { -+ mdev->mali_metrics.time_idle += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); ++ if (MALI400_L2_MAX_READS_NOT_SET != mali_l2_max_reads) { ++ mali_hw_core_register_write(&cache->hw_core, ++ MALI400_L2_CACHE_REGISTER_MAX_READS, ++ (u32)mali_l2_max_reads); + } -+} + -+/* Caller needs to hold mdev->mali_metrics.lock before calling this function. */ -+static void mali_pm_reset_dvfs_utilisation_unlocked(struct mali_device *mdev, ktime_t now) -+{ -+ /* Store previous value */ -+ mdev->mali_metrics.prev_idle = mdev->mali_metrics.time_idle; -+ mdev->mali_metrics.prev_busy = mdev->mali_metrics.time_busy; -+ -+ /* Reset current values */ -+ mdev->mali_metrics.time_period_start = now; -+ mdev->mali_metrics.time_period_start_gp = now; -+ mdev->mali_metrics.time_period_start_pp = now; -+ mdev->mali_metrics.time_idle = 0; -+ mdev->mali_metrics.time_busy = 0; ++ /* Restart any performance counters (if enabled) */ ++ if (cache->counter_src0 != MALI_HW_CORE_NO_COUNTER) { + -+ mdev->mali_metrics.time_busy_gp = 0; -+ mdev->mali_metrics.time_idle_gp = 0; -+ mdev->mali_metrics.time_busy_pp[0] = 0; -+ mdev->mali_metrics.time_idle_pp[0] = 0; -+} ++ mali_hw_core_register_write(&cache->hw_core, ++ MALI400_L2_CACHE_REGISTER_PERFCNT_SRC0, ++ cache->counter_src0); ++ } + -+void mali_pm_reset_dvfs_utilisation(struct mali_device *mdev) -+{ -+ _mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock); -+ mali_pm_reset_dvfs_utilisation_unlocked(mdev, ktime_get()); -+ _mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock); ++ if (cache->counter_src1 != MALI_HW_CORE_NO_COUNTER) { ++ mali_hw_core_register_write(&cache->hw_core, ++ MALI400_L2_CACHE_REGISTER_PERFCNT_SRC1, ++ cache->counter_src1); ++ } +} + -+void mali_pm_get_dvfs_utilisation(struct mali_device *mdev, -+ unsigned long *total_out, unsigned long *busy_out) ++static _mali_osk_errcode_t mali_l2_cache_send_command( ++ struct mali_l2_cache_core *cache, u32 reg, u32 val) +{ -+ ktime_t now = ktime_get(); -+ u64 busy = 0; -+ u64 total = 0; -+ -+ _mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock); -+ -+ mali_pm_get_dvfs_utilisation_calc(mdev, now); ++ int i = 0; ++ const int loop_count = 100000; + -+ busy = mdev->mali_metrics.time_busy; -+ total = busy + mdev->mali_metrics.time_idle; ++ MALI_DEBUG_ASSERT_POINTER(cache); ++ MALI_DEBUG_ASSERT_LOCK_HELD(cache->lock); + -+ /* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default -+ * 100ms) */ -+ if (total >= MALI_UTILIZATION_MAX_PERIOD) { -+ mali_pm_reset_dvfs_utilisation_unlocked(mdev, now); -+ } else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) { -+ total += mdev->mali_metrics.prev_idle + -+ mdev->mali_metrics.prev_busy; -+ busy += mdev->mali_metrics.prev_busy; ++ /* ++ * First, wait for L2 cache command handler to go idle. ++ * (Commands received while processing another command will be ignored) ++ */ ++ for (i = 0; i < loop_count; i++) { ++ if (!(mali_hw_core_register_read(&cache->hw_core, ++ MALI400_L2_CACHE_REGISTER_STATUS) & ++ (u32)MALI400_L2_CACHE_STATUS_COMMAND_BUSY)) { ++ break; ++ } + } + -+ *total_out = (unsigned long)total; -+ *busy_out = (unsigned long)busy; -+ _mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock); -+} ++ if (i == loop_count) { ++ MALI_DEBUG_PRINT(1, ("Mali L2 cache: aborting wait for command interface to go idle\n")); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+void mali_pm_metrics_spin_lock(void) -+{ -+ struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev); -+ _mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock); -+} ++ /* then issue the command */ ++ mali_hw_core_register_write(&cache->hw_core, reg, val); + -+void mali_pm_metrics_spin_unlock(void) -+{ -+ struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev); -+ _mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock); ++ return _MALI_OSK_ERR_OK; +} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pm_metrics.h b/drivers/gpu/arm/mali400/mali/common/mali_pm_metrics.h +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_l2_cache.h b/drivers/gpu/arm/mali400/mali/common/mali_l2_cache.h new file mode 100644 -index 000000000..2b136b0de +index 000000000..c48a88440 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_pm_metrics.h -@@ -0,0 +1,74 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_l2_cache.h +@@ -0,0 +1,124 @@ +/* + * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. + * @@ -295207,354 +298246,202 @@ index 000000000..2b136b0de + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_PM_METRICS_H__ -+#define __MALI_PM_METRICS_H__ ++#ifndef __MALI_KERNEL_L2_CACHE_H__ ++#define __MALI_KERNEL_L2_CACHE_H__ + -+#ifdef CONFIG_MALI_DEVFREQ -+#include "mali_osk_locks.h" -+#include "mali_group.h" ++#include "mali_osk.h" ++#include "mali_hw_core.h" + -+struct mali_device; ++#define MALI_MAX_NUMBER_OF_L2_CACHE_CORES 3 ++/* Maximum 1 GP and 4 PP for an L2 cache core (Mali-400 MP4) */ ++#define MALI_MAX_NUMBER_OF_GROUPS_PER_L2_CACHE 5 + +/** -+ * Metrics data collected for use by the power management framework. ++ * Definition of the L2 cache core struct ++ * Used to track a L2 cache unit in the system. ++ * Contains information about the mapping of the registers + */ -+struct mali_pm_metrics_data { -+ ktime_t time_period_start; -+ u64 time_busy; -+ u64 time_idle; -+ u64 prev_busy; -+ u64 prev_idle; -+ u32 num_running_gp_cores; -+ u32 num_running_pp_cores; -+ ktime_t time_period_start_gp; -+ u64 time_busy_gp; -+ u64 time_idle_gp; -+ ktime_t time_period_start_pp; -+ u64 time_busy_pp[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS]; -+ u64 time_idle_pp[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS]; -+ mali_bool gpu_active; ++struct mali_l2_cache_core { ++ /* Common HW core functionality */ ++ struct mali_hw_core hw_core; ++ ++ /* Synchronize L2 cache access */ + _mali_osk_spinlock_irq_t *lock; -+}; + -+/** -+ * Initialize/start the Mali GPU pm_metrics metrics reporting. -+ * -+ * @return _MALI_OSK_ERR_OK on success, otherwise failure. -+ */ -+_mali_osk_errcode_t mali_pm_metrics_init(struct mali_device *mdev); ++ /* Unique core ID */ ++ u32 core_id; + -+/** -+ * Terminate the Mali GPU pm_metrics metrics reporting -+ */ -+void mali_pm_metrics_term(struct mali_device *mdev); ++ /* The power domain this L2 cache belongs to */ ++ struct mali_pm_domain *pm_domain; + -+/** -+ * Should be called when a job is about to execute a GPU job -+ */ -+void mali_pm_record_gpu_active(mali_bool is_gp); ++ /* MALI_TRUE if power is on for this L2 cache */ ++ mali_bool power_is_on; + -+/** -+ * Should be called when a job is finished -+ */ -+void mali_pm_record_gpu_idle(mali_bool is_gp); ++ /* A "timestamp" to avoid unnecessary flushes */ ++ u32 last_invalidated_id; + -+void mali_pm_reset_dvfs_utilisation(struct mali_device *mdev); ++ /* Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */ ++ u32 counter_src0; + -+void mali_pm_get_dvfs_utilisation(struct mali_device *mdev, unsigned long *total_out, unsigned long *busy_out); ++ /* Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */ ++ u32 counter_src1; + -+void mali_pm_metrics_spin_lock(void); ++ /* ++ * Performance counter 0 value base/offset ++ * (allows accumulative reporting even after power off) ++ */ ++ u32 counter_value0_base; + -+void mali_pm_metrics_spin_unlock(void); -+#else -+void mali_pm_record_gpu_idle(mali_bool is_gp) {} -+void mali_pm_record_gpu_active(mali_bool is_gp) {} -+#endif -+#endif /* __MALI_PM_METRICS_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pmu.c b/drivers/gpu/arm/mali400/mali/common/mali_pmu.c -new file mode 100644 -index 000000000..6f0af59f6 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_pmu.c -@@ -0,0 +1,270 @@ -+/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ /* ++ * Performance counter 0 value base/offset ++ * (allows accumulative reporting even after power off) ++ */ ++ u32 counter_value1_base; + -+/** -+ * @file mali_pmu.c -+ * Mali driver functions for Mali 400 PMU hardware -+ */ -+#include "mali_hw_core.h" -+#include "mali_pmu.h" -+#include "mali_pp.h" -+#include "mali_kernel_common.h" -+#include "mali_osk.h" -+#include "mali_pm.h" -+#include "mali_osk_mali.h" ++ /* Used by PM domains to link L2 caches of same domain */ ++ _mali_osk_list_t pm_domain_list; ++}; + -+struct mali_pmu_core *mali_global_pmu_core = NULL; ++_mali_osk_errcode_t mali_l2_cache_initialize(void); ++void mali_l2_cache_terminate(void); + -+static _mali_osk_errcode_t mali_pmu_wait_for_command_finish( -+ struct mali_pmu_core *pmu); ++struct mali_l2_cache_core *mali_l2_cache_create( ++ _mali_osk_resource_t *resource, u32 domain_index); ++void mali_l2_cache_delete(struct mali_l2_cache_core *cache); + -+struct mali_pmu_core *mali_pmu_create(_mali_osk_resource_t *resource) ++MALI_STATIC_INLINE u32 mali_l2_cache_get_id(struct mali_l2_cache_core *cache) +{ -+ struct mali_pmu_core *pmu; -+ -+ MALI_DEBUG_ASSERT(NULL == mali_global_pmu_core); -+ MALI_DEBUG_PRINT(2, ("Mali PMU: Creating Mali PMU core\n")); -+ -+ pmu = (struct mali_pmu_core *)_mali_osk_malloc( -+ sizeof(struct mali_pmu_core)); -+ if (NULL != pmu) { -+ pmu->registered_cores_mask = 0; /* to be set later */ -+ -+ if (_MALI_OSK_ERR_OK == mali_hw_core_create(&pmu->hw_core, -+ resource, PMU_REGISTER_ADDRESS_SPACE_SIZE)) { -+ -+ pmu->switch_delay = _mali_osk_get_pmu_switch_delay(); -+ -+ mali_global_pmu_core = pmu; -+ -+ return pmu; -+ } -+ _mali_osk_free(pmu); -+ } -+ -+ return NULL; ++ MALI_DEBUG_ASSERT_POINTER(cache); ++ return cache->core_id; +} + -+void mali_pmu_delete(struct mali_pmu_core *pmu) ++MALI_STATIC_INLINE struct mali_pm_domain *mali_l2_cache_get_pm_domain( ++ struct mali_l2_cache_core *cache) +{ -+ MALI_DEBUG_ASSERT_POINTER(pmu); -+ MALI_DEBUG_ASSERT(pmu == mali_global_pmu_core); -+ -+ MALI_DEBUG_PRINT(2, ("Mali PMU: Deleting Mali PMU core\n")); ++ MALI_DEBUG_ASSERT_POINTER(cache); ++ return cache->pm_domain; ++} + -+ mali_global_pmu_core = NULL; ++void mali_l2_cache_power_up(struct mali_l2_cache_core *cache); ++void mali_l2_cache_power_down(struct mali_l2_cache_core *cache); + -+ mali_hw_core_delete(&pmu->hw_core); -+ _mali_osk_free(pmu); -+} ++void mali_l2_cache_core_set_counter_src( ++ struct mali_l2_cache_core *cache, u32 source_id, u32 counter); + -+void mali_pmu_set_registered_cores_mask(struct mali_pmu_core *pmu, u32 mask) ++MALI_STATIC_INLINE u32 mali_l2_cache_core_get_counter_src0( ++ struct mali_l2_cache_core *cache) +{ -+ pmu->registered_cores_mask = mask; ++ MALI_DEBUG_ASSERT_POINTER(cache); ++ return cache->counter_src0; +} + -+void mali_pmu_reset(struct mali_pmu_core *pmu) ++MALI_STATIC_INLINE u32 mali_l2_cache_core_get_counter_src1( ++ struct mali_l2_cache_core *cache) +{ -+ MALI_DEBUG_ASSERT_POINTER(pmu); -+ MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0); -+ -+ /* Setup the desired defaults */ -+ mali_hw_core_register_write_relaxed(&pmu->hw_core, -+ PMU_REG_ADDR_MGMT_INT_MASK, 0); -+ mali_hw_core_register_write_relaxed(&pmu->hw_core, -+ PMU_REG_ADDR_MGMT_SW_DELAY, pmu->switch_delay); ++ MALI_DEBUG_ASSERT_POINTER(cache); ++ return cache->counter_src1; +} + -+void mali_pmu_power_up_all(struct mali_pmu_core *pmu) -+{ -+ u32 stat; -+ -+ MALI_DEBUG_ASSERT_POINTER(pmu); -+ MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0); ++void mali_l2_cache_core_get_counter_values( ++ struct mali_l2_cache_core *cache, ++ u32 *src0, u32 *value0, u32 *src1, u32 *value1); + -+ mali_pm_exec_lock(); ++struct mali_l2_cache_core *mali_l2_cache_core_get_glob_l2_core(u32 index); ++u32 mali_l2_cache_core_get_glob_num_l2_cores(void); + -+ mali_pmu_reset(pmu); ++struct mali_group *mali_l2_cache_get_group( ++ struct mali_l2_cache_core *cache, u32 index); + -+ /* Now simply power up the domains which are marked as powered down */ -+ stat = mali_hw_core_register_read(&pmu->hw_core, -+ PMU_REG_ADDR_MGMT_STATUS); -+ mali_pmu_power_up(pmu, stat); ++void mali_l2_cache_invalidate(struct mali_l2_cache_core *cache); ++void mali_l2_cache_invalidate_conditional( ++ struct mali_l2_cache_core *cache, u32 id); + -+ mali_pm_exec_unlock(); -+} ++void mali_l2_cache_invalidate_all(void); ++void mali_l2_cache_invalidate_all_pages(u32 *pages, u32 num_pages); + -+void mali_pmu_power_down_all(struct mali_pmu_core *pmu) -+{ -+ u32 stat; ++#endif /* __MALI_KERNEL_L2_CACHE_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_mem_validation.c b/drivers/gpu/arm/mali400/mali/common/mali_mem_validation.c +new file mode 100644 +index 000000000..eb95998f1 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_mem_validation.c +@@ -0,0 +1,68 @@ ++/* ++ * Copyright (C) 2011-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ MALI_DEBUG_ASSERT_POINTER(pmu); -+ MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0); ++#include "mali_mem_validation.h" ++#include "mali_osk.h" ++#include "mali_kernel_common.h" + -+ mali_pm_exec_lock(); ++#define MALI_INVALID_MEM_ADDR 0xFFFFFFFF + -+ /* Now simply power down the domains which are marked as powered up */ -+ stat = mali_hw_core_register_read(&pmu->hw_core, -+ PMU_REG_ADDR_MGMT_STATUS); -+ mali_pmu_power_down(pmu, (~stat) & pmu->registered_cores_mask); ++typedef struct { ++ u32 phys_base; /**< Mali physical base of the memory, page aligned */ ++ u32 size; /**< size in bytes of the memory, multiple of page size */ ++} _mali_mem_validation_t; + -+ mali_pm_exec_unlock(); -+} ++static _mali_mem_validation_t mali_mem_validator = { MALI_INVALID_MEM_ADDR, MALI_INVALID_MEM_ADDR }; + -+_mali_osk_errcode_t mali_pmu_power_down(struct mali_pmu_core *pmu, u32 mask) ++_mali_osk_errcode_t mali_mem_validation_add_range(u32 start, u32 size) +{ -+ u32 stat; -+ _mali_osk_errcode_t err; ++ /* Check that no other MEM_VALIDATION resources exist */ ++ if (MALI_INVALID_MEM_ADDR != mali_mem_validator.phys_base) { ++ MALI_PRINT_ERROR(("Failed to add frame buffer memory; another range is already specified\n")); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ MALI_DEBUG_ASSERT_POINTER(pmu); -+ MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0); -+ MALI_DEBUG_ASSERT(mask <= pmu->registered_cores_mask); -+ MALI_DEBUG_ASSERT(0 == (mali_hw_core_register_read(&pmu->hw_core, -+ PMU_REG_ADDR_MGMT_INT_RAWSTAT) & -+ PMU_REG_VAL_IRQ)); ++ /* Check restrictions on page alignment */ ++ if ((0 != (start & (~_MALI_OSK_CPU_PAGE_MASK))) || ++ (0 != (size & (~_MALI_OSK_CPU_PAGE_MASK)))) { ++ MALI_PRINT_ERROR(("Failed to add frame buffer memory; incorrect alignment\n")); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ MALI_DEBUG_PRINT(3, -+ ("PMU power down: ...................... [%s]\n", -+ mali_pm_mask_to_string(mask))); -+ -+ stat = mali_hw_core_register_read(&pmu->hw_core, -+ PMU_REG_ADDR_MGMT_STATUS); -+ -+ /* -+ * Assert that we are not powering down domains which are already -+ * powered down. -+ */ -+ MALI_DEBUG_ASSERT(0 == (stat & mask)); -+ -+ mask &= ~(0x1 << MALI_DOMAIN_INDEX_DUMMY); -+ -+ if (0 == mask || 0 == ((~stat) & mask)) return _MALI_OSK_ERR_OK; -+ -+ mali_hw_core_register_write(&pmu->hw_core, -+ PMU_REG_ADDR_MGMT_POWER_DOWN, mask); -+ -+ /* -+ * Do not wait for interrupt on Mali-300/400 if all domains are -+ * powered off by our power down command, because the HW will simply -+ * not generate an interrupt in this case. -+ */ -+ if (mali_is_mali450() || mali_is_mali470() || pmu->registered_cores_mask != (mask | stat)) { -+ err = mali_pmu_wait_for_command_finish(pmu); -+ if (_MALI_OSK_ERR_OK != err) { -+ return err; -+ } -+ } else { -+ mali_hw_core_register_write(&pmu->hw_core, -+ PMU_REG_ADDR_MGMT_INT_CLEAR, PMU_REG_VAL_IRQ); -+ } -+ -+#if defined(DEBUG) -+ /* Verify power status of domains after power down */ -+ stat = mali_hw_core_register_read(&pmu->hw_core, -+ PMU_REG_ADDR_MGMT_STATUS); -+ MALI_DEBUG_ASSERT(mask == (stat & mask)); -+#endif ++ mali_mem_validator.phys_base = start; ++ mali_mem_validator.size = size; ++ MALI_DEBUG_PRINT(2, ("Memory Validator installed for Mali physical address base=0x%08X, size=0x%08X\n", ++ mali_mem_validator.phys_base, mali_mem_validator.size)); + + return _MALI_OSK_ERR_OK; +} + -+_mali_osk_errcode_t mali_pmu_power_up(struct mali_pmu_core *pmu, u32 mask) ++_mali_osk_errcode_t mali_mem_validation_check(u32 phys_addr, u32 size) +{ -+ u32 stat; -+ _mali_osk_errcode_t err; -+#if !defined(CONFIG_MALI_PMU_PARALLEL_POWER_UP) -+ u32 current_domain; -+#endif -+ -+ MALI_DEBUG_ASSERT_POINTER(pmu); -+ MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0); -+ MALI_DEBUG_ASSERT(mask <= pmu->registered_cores_mask); -+ MALI_DEBUG_ASSERT(0 == (mali_hw_core_register_read(&pmu->hw_core, -+ PMU_REG_ADDR_MGMT_INT_RAWSTAT) & -+ PMU_REG_VAL_IRQ)); -+ -+ MALI_DEBUG_PRINT(3, -+ ("PMU power up: ........................ [%s]\n", -+ mali_pm_mask_to_string(mask))); -+ -+ stat = mali_hw_core_register_read(&pmu->hw_core, -+ PMU_REG_ADDR_MGMT_STATUS); -+ stat &= pmu->registered_cores_mask; -+ -+ mask &= ~(0x1 << MALI_DOMAIN_INDEX_DUMMY); -+ if (0 == mask || 0 == (stat & mask)) return _MALI_OSK_ERR_OK; -+ -+ /* -+ * Assert that we are only powering up domains which are currently -+ * powered down. -+ */ -+ MALI_DEBUG_ASSERT(mask == (stat & mask)); -+ -+#if defined(CONFIG_MALI_PMU_PARALLEL_POWER_UP) -+ mali_hw_core_register_write(&pmu->hw_core, -+ PMU_REG_ADDR_MGMT_POWER_UP, mask); -+ -+ err = mali_pmu_wait_for_command_finish(pmu); -+ if (_MALI_OSK_ERR_OK != err) { -+ return err; -+ } -+#else -+ for (current_domain = 1; -+ current_domain <= pmu->registered_cores_mask; -+ current_domain <<= 1) { -+ if (current_domain & mask & stat) { -+ mali_hw_core_register_write(&pmu->hw_core, -+ PMU_REG_ADDR_MGMT_POWER_UP, -+ current_domain); -+ -+ err = mali_pmu_wait_for_command_finish(pmu); -+ if (_MALI_OSK_ERR_OK != err) { -+ return err; ++#if 0 ++ if (phys_addr < (phys_addr + size)) { /* Don't allow overflow (or zero size) */ ++ if ((0 == (phys_addr & (~_MALI_OSK_CPU_PAGE_MASK))) && ++ (0 == (size & (~_MALI_OSK_CPU_PAGE_MASK)))) { ++ if ((phys_addr >= mali_mem_validator.phys_base) && ++ ((phys_addr + (size - 1)) >= mali_mem_validator.phys_base) && ++ (phys_addr <= (mali_mem_validator.phys_base + (mali_mem_validator.size - 1))) && ++ ((phys_addr + (size - 1)) <= (mali_mem_validator.phys_base + (mali_mem_validator.size - 1)))) { ++ MALI_DEBUG_PRINT(3, ("Accepted range 0x%08X + size 0x%08X (= 0x%08X)\n", phys_addr, size, (phys_addr + size - 1))); ++ return _MALI_OSK_ERR_OK; + } + } + } -+#endif -+ -+#if defined(DEBUG) -+ /* Verify power status of domains after power up */ -+ stat = mali_hw_core_register_read(&pmu->hw_core, -+ PMU_REG_ADDR_MGMT_STATUS); -+ MALI_DEBUG_ASSERT(0 == (stat & mask)); -+#endif /* defined(DEBUG) */ -+ -+ return _MALI_OSK_ERR_OK; -+} -+ -+static _mali_osk_errcode_t mali_pmu_wait_for_command_finish( -+ struct mali_pmu_core *pmu) -+{ -+ u32 rawstat; -+ u32 timeout = MALI_REG_POLL_COUNT_SLOW; -+ -+ MALI_DEBUG_ASSERT(pmu); -+ -+ /* Wait for the command to complete */ -+ do { -+ rawstat = mali_hw_core_register_read(&pmu->hw_core, -+ PMU_REG_ADDR_MGMT_INT_RAWSTAT); -+ --timeout; -+ } while (0 == (rawstat & PMU_REG_VAL_IRQ) && 0 < timeout); -+ -+ MALI_DEBUG_ASSERT(0 < timeout); -+ -+ if (0 == timeout) { -+ return _MALI_OSK_ERR_TIMEOUT; -+ } + -+ mali_hw_core_register_write(&pmu->hw_core, -+ PMU_REG_ADDR_MGMT_INT_CLEAR, PMU_REG_VAL_IRQ); ++ MALI_PRINT_ERROR(("MALI PHYSICAL RANGE VALIDATION ERROR: The range supplied was: phys_base=0x%08X, size=0x%08X\n", phys_addr, size)); + ++ return _MALI_OSK_ERR_FAULT; ++#endif + return _MALI_OSK_ERR_OK; +} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pmu.h b/drivers/gpu/arm/mali400/mali/common/mali_pmu.h +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_mem_validation.h b/drivers/gpu/arm/mali400/mali/common/mali_mem_validation.h new file mode 100644 -index 000000000..5b856240f +index 000000000..05013f46f --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_pmu.h -@@ -0,0 +1,123 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_mem_validation.h +@@ -0,0 +1,19 @@ +/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2011-2013, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -295563,127 +298450,23 @@ index 000000000..5b856240f + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+/** -+ * @file mali_platform.h -+ * Platform specific Mali driver functions -+ */ -+ -+#ifndef __MALI_PMU_H__ -+#define __MALI_PMU_H__ ++#ifndef __MALI_MEM_VALIDATION_H__ ++#define __MALI_MEM_VALIDATION_H__ + +#include "mali_osk.h" -+#include "mali_kernel_common.h" -+#include "mali_hw_core.h" -+ -+/** @brief MALI inbuilt PMU hardware info and PMU hardware has knowledge of cores power mask -+ */ -+struct mali_pmu_core { -+ struct mali_hw_core hw_core; -+ u32 registered_cores_mask; -+ u32 switch_delay; -+}; -+ -+/** @brief Register layout for hardware PMU -+ */ -+typedef enum { -+ PMU_REG_ADDR_MGMT_POWER_UP = 0x00, /*< Power up register */ -+ PMU_REG_ADDR_MGMT_POWER_DOWN = 0x04, /*< Power down register */ -+ PMU_REG_ADDR_MGMT_STATUS = 0x08, /*< Core sleep status register */ -+ PMU_REG_ADDR_MGMT_INT_MASK = 0x0C, /*< Interrupt mask register */ -+ PMU_REG_ADDR_MGMT_INT_RAWSTAT = 0x10, /*< Interrupt raw status register */ -+ PMU_REG_ADDR_MGMT_INT_CLEAR = 0x18, /*< Interrupt clear register */ -+ PMU_REG_ADDR_MGMT_SW_DELAY = 0x1C, /*< Switch delay register */ -+ PMU_REGISTER_ADDRESS_SPACE_SIZE = 0x28, /*< Size of register space */ -+} pmu_reg_addr_mgmt_addr; -+ -+#define PMU_REG_VAL_IRQ 1 -+ -+extern struct mali_pmu_core *mali_global_pmu_core; -+ -+/** @brief Initialisation of MALI PMU -+ * -+ * This is called from entry point of the driver in order to create and intialize the PMU resource -+ * -+ * @param resource it will be a pointer to a PMU resource -+ * @param number_of_pp_cores Number of found PP resources in configuration -+ * @param number_of_l2_caches Number of found L2 cache resources in configuration -+ * @return The created PMU object, or NULL in case of failure. -+ */ -+struct mali_pmu_core *mali_pmu_create(_mali_osk_resource_t *resource); -+ -+/** @brief It deallocates the PMU resource -+ * -+ * This is called on the exit of the driver to terminate the PMU resource -+ * -+ * @param pmu Pointer to PMU core object to delete -+ */ -+void mali_pmu_delete(struct mali_pmu_core *pmu); -+ -+/** @brief Set registered cores mask -+ * -+ * @param pmu Pointer to PMU core object -+ * @param mask All available/valid domain bits -+ */ -+void mali_pmu_set_registered_cores_mask(struct mali_pmu_core *pmu, u32 mask); -+ -+/** @brief Retrieves the Mali PMU core object (if any) -+ * -+ * @return The Mali PMU object, or NULL if no PMU exists. -+ */ -+MALI_STATIC_INLINE struct mali_pmu_core *mali_pmu_get_global_pmu_core(void) -+{ -+ return mali_global_pmu_core; -+} -+ -+/** @brief Reset PMU core -+ * -+ * @param pmu Pointer to PMU core object to reset -+ */ -+void mali_pmu_reset(struct mali_pmu_core *pmu); + -+void mali_pmu_power_up_all(struct mali_pmu_core *pmu); -+ -+void mali_pmu_power_down_all(struct mali_pmu_core *pmu); -+ -+/** @brief Returns a mask of the currently powered up domains -+ * -+ * @param pmu Pointer to PMU core object -+ */ -+MALI_STATIC_INLINE u32 mali_pmu_get_mask(struct mali_pmu_core *pmu) -+{ -+ u32 stat = mali_hw_core_register_read(&pmu->hw_core, PMU_REG_ADDR_MGMT_STATUS); -+ return ((~stat) & pmu->registered_cores_mask); -+} -+ -+/** @brief MALI GPU power down using MALI in-built PMU -+ * -+ * Called to power down the specified cores. -+ * -+ * @param pmu Pointer to PMU core object to power down -+ * @param mask Mask specifying which power domains to power down -+ * @return _MALI_OSK_ERR_OK on success otherwise, a suitable _mali_osk_errcode_t error. -+ */ -+_mali_osk_errcode_t mali_pmu_power_down(struct mali_pmu_core *pmu, u32 mask); -+ -+/** @brief MALI GPU power up using MALI in-built PMU -+ * -+ * Called to power up the specified cores. -+ * -+ * @param pmu Pointer to PMU core object to power up -+ * @param mask Mask specifying which power domains to power up -+ * @return _MALI_OSK_ERR_OK on success otherwise, a suitable _mali_osk_errcode_t error. -+ */ -+_mali_osk_errcode_t mali_pmu_power_up(struct mali_pmu_core *pmu, u32 mask); ++_mali_osk_errcode_t mali_mem_validation_add_range(u32 start, u32 size); ++_mali_osk_errcode_t mali_mem_validation_check(u32 phys_addr, u32 size); + -+#endif /* __MALI_PMU_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pp.c b/drivers/gpu/arm/mali400/mali/common/mali_pp.c ++#endif /* __MALI_MEM_VALIDATION_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_mmu.c b/drivers/gpu/arm/mali400/mali/common/mali_mmu.c new file mode 100644 -index 000000000..2dd8b8766 +index 000000000..b82486fa6 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_pp.c -@@ -0,0 +1,502 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_mmu.c +@@ -0,0 +1,433 @@ +/* -+ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -295692,506 +298475,437 @@ index 000000000..2dd8b8766 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#include "mali_pp_job.h" -+#include "mali_pp.h" ++#include "mali_kernel_common.h" ++#include "mali_osk.h" ++#include "mali_osk_list.h" ++#include "mali_ukk.h" ++ ++#include "mali_mmu.h" +#include "mali_hw_core.h" +#include "mali_group.h" -+#include "regs/mali_200_regs.h" -+#include "mali_kernel_common.h" -+#include "mali_kernel_core.h" ++#include "mali_mmu_page_directory.h" + -+#if defined(CONFIG_MALI400_PROFILING) -+#include "mali_osk_profiling.h" -+#endif ++/** ++ * Size of the MMU registers in bytes ++ */ ++#define MALI_MMU_REGISTERS_SIZE 0x24 + -+/* Number of frame registers on Mali-200 */ -+#define MALI_PP_MALI200_NUM_FRAME_REGISTERS ((0x04C/4)+1) -+/* Number of frame registers on Mali-300 and later */ -+#define MALI_PP_MALI400_NUM_FRAME_REGISTERS ((0x058/4)+1) ++/** ++ * MMU commands ++ * These are the commands that can be sent ++ * to the MMU unit. ++ */ ++typedef enum mali_mmu_command { ++ MALI_MMU_COMMAND_ENABLE_PAGING = 0x00, /**< Enable paging (memory translation) */ ++ MALI_MMU_COMMAND_DISABLE_PAGING = 0x01, /**< Disable paging (memory translation) */ ++ MALI_MMU_COMMAND_ENABLE_STALL = 0x02, /**< Enable stall on page fault */ ++ MALI_MMU_COMMAND_DISABLE_STALL = 0x03, /**< Disable stall on page fault */ ++ MALI_MMU_COMMAND_ZAP_CACHE = 0x04, /**< Zap the entire page table cache */ ++ MALI_MMU_COMMAND_PAGE_FAULT_DONE = 0x05, /**< Page fault processed */ ++ MALI_MMU_COMMAND_HARD_RESET = 0x06 /**< Reset the MMU back to power-on settings */ ++} mali_mmu_command; + -+static struct mali_pp_core *mali_global_pp_cores[MALI_MAX_NUMBER_OF_PP_CORES] = { NULL }; -+static u32 mali_global_num_pp_cores = 0; ++static void mali_mmu_probe_trigger(void *data); ++static _mali_osk_errcode_t mali_mmu_probe_ack(void *data); + -+/* Interrupt handlers */ -+static void mali_pp_irq_probe_trigger(void *data); -+static _mali_osk_errcode_t mali_pp_irq_probe_ack(void *data); ++MALI_STATIC_INLINE _mali_osk_errcode_t mali_mmu_raw_reset(struct mali_mmu_core *mmu); + -+struct mali_pp_core *mali_pp_create(const _mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual, u32 bcast_id) -+{ -+ struct mali_pp_core *core = NULL; ++/* page fault queue flush helper pages ++ * note that the mapping pointers are currently unused outside of the initialization functions */ ++static mali_dma_addr mali_page_fault_flush_page_directory = MALI_INVALID_PAGE; ++static mali_io_address mali_page_fault_flush_page_directory_mapping = NULL; ++static mali_dma_addr mali_page_fault_flush_page_table = MALI_INVALID_PAGE; ++static mali_io_address mali_page_fault_flush_page_table_mapping = NULL; ++static mali_dma_addr mali_page_fault_flush_data_page = MALI_INVALID_PAGE; ++static mali_io_address mali_page_fault_flush_data_page_mapping = NULL; + -+ MALI_DEBUG_PRINT(2, ("Mali PP: Creating Mali PP core: %s\n", resource->description)); -+ MALI_DEBUG_PRINT(2, ("Mali PP: Base address of PP core: 0x%x\n", resource->base)); ++/* an empty page directory (no address valid) which is active on any MMU not currently marked as in use */ ++static mali_dma_addr mali_empty_page_directory_phys = MALI_INVALID_PAGE; ++static mali_io_address mali_empty_page_directory_virt = NULL; + -+ if (mali_global_num_pp_cores >= MALI_MAX_NUMBER_OF_PP_CORES) { -+ MALI_PRINT_ERROR(("Mali PP: Too many PP core objects created\n")); -+ return NULL; ++ ++_mali_osk_errcode_t mali_mmu_initialize(void) ++{ ++ /* allocate the helper pages */ ++ mali_empty_page_directory_phys = mali_allocate_empty_page(&mali_empty_page_directory_virt); ++ if (0 == mali_empty_page_directory_phys) { ++ MALI_DEBUG_PRINT_ERROR(("Mali MMU: Could not allocate empty page directory.\n")); ++ mali_empty_page_directory_phys = MALI_INVALID_PAGE; ++ return _MALI_OSK_ERR_NOMEM; + } + -+ core = _mali_osk_calloc(1, sizeof(struct mali_pp_core)); -+ if (NULL != core) { -+ core->core_id = mali_global_num_pp_cores; -+ core->bcast_id = bcast_id; ++ if (_MALI_OSK_ERR_OK != mali_create_fault_flush_pages(&mali_page_fault_flush_page_directory, ++ &mali_page_fault_flush_page_directory_mapping, ++ &mali_page_fault_flush_page_table, ++ &mali_page_fault_flush_page_table_mapping, ++ &mali_page_fault_flush_data_page, ++ &mali_page_fault_flush_data_page_mapping)) { ++ MALI_DEBUG_PRINT_ERROR(("Mali MMU: Could not allocate fault flush pages\n")); ++ mali_free_empty_page(mali_empty_page_directory_phys, mali_empty_page_directory_virt); ++ mali_empty_page_directory_phys = MALI_INVALID_PAGE; ++ mali_empty_page_directory_virt = NULL; ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+ if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALI200_REG_SIZEOF_REGISTER_BANK)) { -+ _mali_osk_errcode_t ret; ++ return _MALI_OSK_ERR_OK; ++} + -+ if (!is_virtual) { -+ ret = mali_pp_reset(core); -+ } else { -+ ret = _MALI_OSK_ERR_OK; -+ } ++void mali_mmu_terminate(void) ++{ ++ MALI_DEBUG_PRINT(3, ("Mali MMU: terminating\n")); + -+ if (_MALI_OSK_ERR_OK == ret) { -+ ret = mali_group_add_pp_core(group, core); -+ if (_MALI_OSK_ERR_OK == ret) { -+ /* Setup IRQ handlers (which will do IRQ probing if needed) */ -+ MALI_DEBUG_ASSERT(!is_virtual || -1 != resource->irq); ++ /* Free global helper pages */ ++ mali_free_empty_page(mali_empty_page_directory_phys, mali_empty_page_directory_virt); ++ mali_empty_page_directory_phys = MALI_INVALID_PAGE; ++ mali_empty_page_directory_virt = NULL; + -+ core->irq = _mali_osk_irq_init(resource->irq, -+ mali_group_upper_half_pp, -+ group, -+ mali_pp_irq_probe_trigger, -+ mali_pp_irq_probe_ack, -+ core, -+ resource->description); -+ if (NULL != core->irq) { -+ mali_global_pp_cores[mali_global_num_pp_cores] = core; -+ mali_global_num_pp_cores++; ++ /* Free the page fault flush pages */ ++ mali_destroy_fault_flush_pages(&mali_page_fault_flush_page_directory, ++ &mali_page_fault_flush_page_directory_mapping, ++ &mali_page_fault_flush_page_table, ++ &mali_page_fault_flush_page_table_mapping, ++ &mali_page_fault_flush_data_page, ++ &mali_page_fault_flush_data_page_mapping); ++} + -+ return core; ++struct mali_mmu_core *mali_mmu_create(_mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual) ++{ ++ struct mali_mmu_core *mmu = NULL; ++ ++ MALI_DEBUG_ASSERT_POINTER(resource); ++ ++ MALI_DEBUG_PRINT(2, ("Mali MMU: Creating Mali MMU: %s\n", resource->description)); ++ ++ mmu = _mali_osk_calloc(1, sizeof(struct mali_mmu_core)); ++ if (NULL != mmu) { ++ if (_MALI_OSK_ERR_OK == mali_hw_core_create(&mmu->hw_core, resource, MALI_MMU_REGISTERS_SIZE)) { ++ if (_MALI_OSK_ERR_OK == mali_group_add_mmu_core(group, mmu)) { ++ if (is_virtual) { ++ /* Skip reset and IRQ setup for virtual MMU */ ++ return mmu; ++ } ++ ++ if (_MALI_OSK_ERR_OK == mali_mmu_reset(mmu)) { ++ /* Setup IRQ handlers (which will do IRQ probing if needed) */ ++ mmu->irq = _mali_osk_irq_init(resource->irq, ++ mali_group_upper_half_mmu, ++ group, ++ mali_mmu_probe_trigger, ++ mali_mmu_probe_ack, ++ mmu, ++ resource->description); ++ if (NULL != mmu->irq) { ++ return mmu; + } else { -+ MALI_PRINT_ERROR(("Mali PP: Failed to setup interrupt handlers for PP core %s\n", core->hw_core.description)); ++ MALI_PRINT_ERROR(("Mali MMU: Failed to setup interrupt handlers for MMU %s\n", mmu->hw_core.description)); + } -+ mali_group_remove_pp_core(group); -+ } else { -+ MALI_PRINT_ERROR(("Mali PP: Failed to add core %s to group\n", core->hw_core.description)); + } ++ mali_group_remove_mmu_core(group); ++ } else { ++ MALI_PRINT_ERROR(("Mali MMU: Failed to add core %s to group\n", mmu->hw_core.description)); + } -+ mali_hw_core_delete(&core->hw_core); ++ mali_hw_core_delete(&mmu->hw_core); + } + -+ _mali_osk_free(core); ++ _mali_osk_free(mmu); + } else { -+ MALI_PRINT_ERROR(("Mali PP: Failed to allocate memory for PP core\n")); ++ MALI_PRINT_ERROR(("Failed to allocate memory for MMU\n")); + } + + return NULL; +} + -+void mali_pp_delete(struct mali_pp_core *core) ++void mali_mmu_delete(struct mali_mmu_core *mmu) +{ -+ u32 i; -+ -+ MALI_DEBUG_ASSERT_POINTER(core); -+ -+ _mali_osk_irq_term(core->irq); -+ mali_hw_core_delete(&core->hw_core); -+ -+ /* Remove core from global list */ -+ for (i = 0; i < mali_global_num_pp_cores; i++) { -+ if (mali_global_pp_cores[i] == core) { -+ mali_global_pp_cores[i] = NULL; -+ mali_global_num_pp_cores--; -+ -+ if (i != mali_global_num_pp_cores) { -+ /* We removed a PP core from the middle of the array -- move the last -+ * PP core to the current position to close the gap */ -+ mali_global_pp_cores[i] = mali_global_pp_cores[mali_global_num_pp_cores]; -+ mali_global_pp_cores[mali_global_num_pp_cores] = NULL; -+ } -+ -+ break; -+ } ++ if (NULL != mmu->irq) { ++ _mali_osk_irq_term(mmu->irq); + } + -+ _mali_osk_free(core); -+} -+ -+void mali_pp_stop_bus(struct mali_pp_core *core) -+{ -+ MALI_DEBUG_ASSERT_POINTER(core); -+ /* Will only send the stop bus command, and not wait for it to complete */ -+ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_STOP_BUS); ++ mali_hw_core_delete(&mmu->hw_core); ++ _mali_osk_free(mmu); +} + -+_mali_osk_errcode_t mali_pp_stop_bus_wait(struct mali_pp_core *core) ++static void mali_mmu_enable_paging(struct mali_mmu_core *mmu) +{ + int i; + -+ MALI_DEBUG_ASSERT_POINTER(core); -+ -+ /* Send the stop bus command. */ -+ mali_pp_stop_bus(core); ++ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ENABLE_PAGING); + -+ /* Wait for bus to be stopped */ -+ for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) { -+ if (mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS) & MALI200_REG_VAL_STATUS_BUS_STOPPED) ++ for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) { ++ if (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS) & MALI_MMU_STATUS_BIT_PAGING_ENABLED) { + break; ++ } + } -+ + if (MALI_REG_POLL_COUNT_FAST == i) { -+ MALI_PRINT_ERROR(("Mali PP: Failed to stop bus on %s. Status: 0x%08x\n", core->hw_core.description, mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS))); -+ return _MALI_OSK_ERR_FAULT; ++ MALI_PRINT_ERROR(("Enable paging request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS))); + } -+ return _MALI_OSK_ERR_OK; +} + -+/* Frame register reset values. -+ * Taken from the Mali400 TRM, 3.6. Pixel processor control register summary */ -+static const u32 mali_frame_registers_reset_values[_MALI_PP_MAX_FRAME_REGISTERS] = { -+ 0x0, /* Renderer List Address Register */ -+ 0x0, /* Renderer State Word Base Address Register */ -+ 0x0, /* Renderer Vertex Base Register */ -+ 0x2, /* Feature Enable Register */ -+ 0x0, /* Z Clear Value Register */ -+ 0x0, /* Stencil Clear Value Register */ -+ 0x0, /* ABGR Clear Value 0 Register */ -+ 0x0, /* ABGR Clear Value 1 Register */ -+ 0x0, /* ABGR Clear Value 2 Register */ -+ 0x0, /* ABGR Clear Value 3 Register */ -+ 0x0, /* Bounding Box Left Right Register */ -+ 0x0, /* Bounding Box Bottom Register */ -+ 0x0, /* FS Stack Address Register */ -+ 0x0, /* FS Stack Size and Initial Value Register */ -+ 0x0, /* Reserved */ -+ 0x0, /* Reserved */ -+ 0x0, /* Origin Offset X Register */ -+ 0x0, /* Origin Offset Y Register */ -+ 0x75, /* Subpixel Specifier Register */ -+ 0x0, /* Tiebreak mode Register */ -+ 0x0, /* Polygon List Format Register */ -+ 0x0, /* Scaling Register */ -+ 0x0 /* Tilebuffer configuration Register */ -+}; -+ -+/* WBx register reset values */ -+static const u32 mali_wb_registers_reset_values[_MALI_PP_MAX_WB_REGISTERS] = { -+ 0x0, /* WBx Source Select Register */ -+ 0x0, /* WBx Target Address Register */ -+ 0x0, /* WBx Target Pixel Format Register */ -+ 0x0, /* WBx Target AA Format Register */ -+ 0x0, /* WBx Target Layout */ -+ 0x0, /* WBx Target Scanline Length */ -+ 0x0, /* WBx Target Flags Register */ -+ 0x0, /* WBx MRT Enable Register */ -+ 0x0, /* WBx MRT Offset Register */ -+ 0x0, /* WBx Global Test Enable Register */ -+ 0x0, /* WBx Global Test Reference Value Register */ -+ 0x0 /* WBx Global Test Compare Function Register */ -+}; -+ -+/* Performance Counter 0 Enable Register reset value */ -+static const u32 mali_perf_cnt_enable_reset_value = 0; -+ -+_mali_osk_errcode_t mali_pp_hard_reset(struct mali_pp_core *core) ++/** ++ * Issues the enable stall command to the MMU and waits for HW to complete the request ++ * @param mmu The MMU to enable paging for ++ * @return MALI_TRUE if HW stall was successfully engaged, otherwise MALI_FALSE (req timed out) ++ */ ++static mali_bool mali_mmu_enable_stall(struct mali_mmu_core *mmu) +{ -+ /* Bus must be stopped before calling this function */ -+ const u32 reset_wait_target_register = MALI200_REG_ADDR_MGMT_PERF_CNT_0_LIMIT; -+ const u32 reset_invalid_value = 0xC0FFE000; -+ const u32 reset_check_value = 0xC01A0000; + int i; ++ u32 mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS); + -+ MALI_DEBUG_ASSERT_POINTER(core); -+ MALI_DEBUG_PRINT(2, ("Mali PP: Hard reset of core %s\n", core->hw_core.description)); ++ if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) { ++ MALI_DEBUG_PRINT(4, ("MMU stall is implicit when Paging is not enabled.\n")); ++ return MALI_TRUE; ++ } + -+ /* Set register to a bogus value. The register will be used to detect when reset is complete */ -+ mali_hw_core_register_write_relaxed(&core->hw_core, reset_wait_target_register, reset_invalid_value); -+ mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_NONE); ++ if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) { ++ MALI_DEBUG_PRINT(3, ("Aborting MMU stall request since it is in pagefault state.\n")); ++ return MALI_FALSE; ++ } + -+ /* Force core to reset */ -+ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_FORCE_RESET); ++ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ENABLE_STALL); + -+ /* Wait for reset to be complete */ -+ for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) { -+ mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_check_value); -+ if (reset_check_value == mali_hw_core_register_read(&core->hw_core, reset_wait_target_register)) { ++ for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) { ++ mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS); ++ if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) { ++ break; ++ } ++ if ((mmu_status & MALI_MMU_STATUS_BIT_STALL_ACTIVE) && (0 == (mmu_status & MALI_MMU_STATUS_BIT_STALL_NOT_ACTIVE))) { ++ break; ++ } ++ if (0 == (mmu_status & (MALI_MMU_STATUS_BIT_PAGING_ENABLED))) { + break; + } + } -+ + if (MALI_REG_POLL_COUNT_FAST == i) { -+ MALI_PRINT_ERROR(("Mali PP: The hard reset loop didn't work, unable to recover\n")); ++ MALI_DEBUG_PRINT(2, ("Enable stall request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS))); ++ return MALI_FALSE; + } + -+ mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, 0x00000000); /* set it back to the default */ -+ /* Re-enable interrupts */ -+ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_MASK_ALL); -+ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED); -+ -+ return _MALI_OSK_ERR_OK; -+} -+ -+void mali_pp_reset_async(struct mali_pp_core *core) -+{ -+ MALI_DEBUG_ASSERT_POINTER(core); -+ -+ MALI_DEBUG_PRINT(4, ("Mali PP: Reset of core %s\n", core->hw_core.description)); ++ if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) { ++ MALI_DEBUG_PRINT(2, ("Aborting MMU stall request since it has a pagefault.\n")); ++ return MALI_FALSE; ++ } + -+ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, 0); /* disable the IRQs */ -+ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_MASK_ALL); -+ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI400PP_REG_VAL_CTRL_MGMT_SOFT_RESET); ++ return MALI_TRUE; +} + -+_mali_osk_errcode_t mali_pp_reset_wait(struct mali_pp_core *core) ++/** ++ * Issues the disable stall command to the MMU and waits for HW to complete the request ++ * @param mmu The MMU to enable paging for ++ */ ++static void mali_mmu_disable_stall(struct mali_mmu_core *mmu) +{ + int i; -+ u32 rawstat = 0; ++ u32 mmu_status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS); + -+ for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) { -+ u32 status = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS); -+ if (!(status & MALI200_REG_VAL_STATUS_RENDERING_ACTIVE)) { -+ rawstat = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT); -+ if (rawstat == MALI400PP_REG_VAL_IRQ_RESET_COMPLETED) { -+ break; -+ } -+ } ++ if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) { ++ MALI_DEBUG_PRINT(3, ("MMU disable skipped since it was not enabled.\n")); ++ return; + } -+ -+ if (i == MALI_REG_POLL_COUNT_FAST) { -+ MALI_PRINT_ERROR(("Mali PP: Failed to reset core %s, rawstat: 0x%08x\n", -+ core->hw_core.description, rawstat)); -+ return _MALI_OSK_ERR_FAULT; ++ if (mmu_status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) { ++ MALI_DEBUG_PRINT(2, ("Aborting MMU disable stall request since it is in pagefault state.\n")); ++ return; + } + -+ /* Re-enable interrupts */ -+ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_MASK_ALL); -+ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED); ++ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_DISABLE_STALL); + -+ return _MALI_OSK_ERR_OK; ++ for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) { ++ u32 status = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS); ++ if (0 == (status & MALI_MMU_STATUS_BIT_STALL_ACTIVE)) { ++ break; ++ } ++ if (status & MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE) { ++ break; ++ } ++ if (0 == (mmu_status & MALI_MMU_STATUS_BIT_PAGING_ENABLED)) { ++ break; ++ } ++ } ++ if (MALI_REG_POLL_COUNT_FAST == i) MALI_DEBUG_PRINT(1, ("Disable stall request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS))); +} + -+_mali_osk_errcode_t mali_pp_reset(struct mali_pp_core *core) ++void mali_mmu_page_fault_done(struct mali_mmu_core *mmu) +{ -+ mali_pp_reset_async(core); -+ return mali_pp_reset_wait(core); ++ MALI_DEBUG_PRINT(4, ("Mali MMU: %s: Leaving page fault mode\n", mmu->hw_core.description)); ++ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_PAGE_FAULT_DONE); +} + -+void mali_pp_job_start(struct mali_pp_core *core, struct mali_pp_job *job, u32 sub_job, mali_bool restart_virtual) ++MALI_STATIC_INLINE _mali_osk_errcode_t mali_mmu_raw_reset(struct mali_mmu_core *mmu) +{ -+ u32 relative_address; -+ u32 start_index; -+ u32 nr_of_regs; -+ u32 *frame_registers = mali_pp_job_get_frame_registers(job); -+ u32 *wb0_registers = mali_pp_job_get_wb0_registers(job); -+ u32 *wb1_registers = mali_pp_job_get_wb1_registers(job); -+ u32 *wb2_registers = mali_pp_job_get_wb2_registers(job); -+ u32 counter_src0 = mali_pp_job_get_perf_counter_src0(job, sub_job); -+ u32 counter_src1 = mali_pp_job_get_perf_counter_src1(job, sub_job); -+ -+ MALI_DEBUG_ASSERT_POINTER(core); -+ -+ /* Write frame registers */ ++ int i; + -+ /* -+ * There are two frame registers which are different for each sub job: -+ * 1. The Renderer List Address Register (MALI200_REG_ADDR_FRAME) -+ * 2. The FS Stack Address Register (MALI200_REG_ADDR_STACK) -+ */ -+ mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_FRAME, mali_pp_job_get_addr_frame(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_FRAME / sizeof(u32)]); ++ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, 0xCAFEBABE); ++ MALI_DEBUG_ASSERT(0xCAFEB000 == mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR)); ++ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_HARD_RESET); + -+ /* For virtual jobs, the stack address shouldn't be broadcast but written individually */ -+ if (!mali_pp_job_is_virtual(job) || restart_virtual) { -+ mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_STACK, mali_pp_job_get_addr_stack(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_STACK / sizeof(u32)]); ++ for (i = 0; i < MALI_REG_POLL_COUNT_FAST; ++i) { ++ if (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR) == 0) { ++ break; ++ } ++ } ++ if (MALI_REG_POLL_COUNT_FAST == i) { ++ MALI_PRINT_ERROR(("Reset request failed, MMU status is 0x%08X\n", mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS))); ++ return _MALI_OSK_ERR_FAULT; + } + -+ /* Write registers between MALI200_REG_ADDR_FRAME and MALI200_REG_ADDR_STACK */ -+ relative_address = MALI200_REG_ADDR_RSW; -+ start_index = MALI200_REG_ADDR_RSW / sizeof(u32); -+ nr_of_regs = (MALI200_REG_ADDR_STACK - MALI200_REG_ADDR_RSW) / sizeof(u32); -+ -+ mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, -+ relative_address, &frame_registers[start_index], -+ nr_of_regs, &mali_frame_registers_reset_values[start_index]); -+ -+ /* MALI200_REG_ADDR_STACK_SIZE */ -+ relative_address = MALI200_REG_ADDR_STACK_SIZE; -+ start_index = MALI200_REG_ADDR_STACK_SIZE / sizeof(u32); -+ -+ mali_hw_core_register_write_relaxed_conditional(&core->hw_core, -+ relative_address, frame_registers[start_index], -+ mali_frame_registers_reset_values[start_index]); -+ -+ /* Skip 2 reserved registers */ -+ -+ /* Write remaining registers */ -+ relative_address = MALI200_REG_ADDR_ORIGIN_OFFSET_X; -+ start_index = MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32); -+ nr_of_regs = MALI_PP_MALI400_NUM_FRAME_REGISTERS - MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32); -+ -+ mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, -+ relative_address, &frame_registers[start_index], -+ nr_of_regs, &mali_frame_registers_reset_values[start_index]); ++ return _MALI_OSK_ERR_OK; ++} + -+ /* Write WBx registers */ -+ if (wb0_registers[0]) { /* M200_WB0_REG_SOURCE_SELECT register */ -+ mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB0, wb0_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values); -+ } ++_mali_osk_errcode_t mali_mmu_reset(struct mali_mmu_core *mmu) ++{ ++ _mali_osk_errcode_t err = _MALI_OSK_ERR_FAULT; ++ mali_bool stall_success; ++ MALI_DEBUG_ASSERT_POINTER(mmu); + -+ if (wb1_registers[0]) { /* M200_WB1_REG_SOURCE_SELECT register */ -+ mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB1, wb1_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values); ++ stall_success = mali_mmu_enable_stall(mmu); ++ if (!stall_success) { ++ err = _MALI_OSK_ERR_BUSY; + } + -+ if (wb2_registers[0]) { /* M200_WB2_REG_SOURCE_SELECT register */ -+ mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB2, wb2_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values); -+ } ++ MALI_DEBUG_PRINT(3, ("Mali MMU: mali_kernel_mmu_reset: %s\n", mmu->hw_core.description)); + -+ if (MALI_HW_CORE_NO_COUNTER != counter_src0) { -+ mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC, counter_src0); -+ mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value); -+ } -+ if (MALI_HW_CORE_NO_COUNTER != counter_src1) { -+ mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC, counter_src1); -+ mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value); ++ if (_MALI_OSK_ERR_OK == mali_mmu_raw_reset(mmu)) { ++ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_MASK, MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR); ++ /* no session is active, so just activate the empty page directory */ ++ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, mali_empty_page_directory_phys); ++ mali_mmu_enable_paging(mmu); ++ err = _MALI_OSK_ERR_OK; + } ++ mali_mmu_disable_stall(mmu); + -+#ifdef CONFIG_MALI400_HEATMAPS_ENABLED -+ if (job->uargs.perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_HEATMAP_ENABLE) { -+ mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERFMON_CONTR, ((job->uargs.tilesx & 0x3FF) << 16) | 1); -+ mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERFMON_BASE, job->uargs.heatmap_mem & 0xFFFFFFF8); -+ } -+#endif /* CONFIG_MALI400_HEATMAPS_ENABLED */ ++ return err; ++} + -+ MALI_DEBUG_PRINT(3, ("Mali PP: Starting job 0x%08X part %u/%u on PP core %s\n", job, sub_job + 1, mali_pp_job_get_sub_job_count(job), core->hw_core.description)); ++mali_bool mali_mmu_zap_tlb(struct mali_mmu_core *mmu) ++{ ++ mali_bool stall_success = mali_mmu_enable_stall(mmu); + -+ /* Adding barrier to make sure all rester writes are finished */ -+ _mali_osk_write_mem_barrier(); ++ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE); + -+ /* This is the command that starts the core. -+ * -+ * Don't actually run the job if PROFILING_SKIP_PP_JOBS are set, just -+ * force core to assert the completion interrupt. -+ */ -+#if !defined(PROFILING_SKIP_PP_JOBS) -+ mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_START_RENDERING); -+#else -+ mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_END_OF_FRAME); -+#endif ++ if (MALI_FALSE == stall_success) { ++ /* False means that it is in Pagefault state. Not possible to disable_stall then */ ++ return MALI_FALSE; ++ } + -+ /* Adding barrier to make sure previous rester writes is finished */ -+ _mali_osk_write_mem_barrier(); ++ mali_mmu_disable_stall(mmu); ++ return MALI_TRUE; +} + -+u32 mali_pp_core_get_version(struct mali_pp_core *core) ++void mali_mmu_zap_tlb_without_stall(struct mali_mmu_core *mmu) +{ -+ MALI_DEBUG_ASSERT_POINTER(core); -+ return mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_VERSION); ++ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE); +} + -+struct mali_pp_core *mali_pp_get_global_pp_core(u32 index) -+{ -+ if (mali_global_num_pp_cores > index) { -+ return mali_global_pp_cores[index]; -+ } + -+ return NULL; ++void mali_mmu_invalidate_page(struct mali_mmu_core *mmu, u32 mali_address) ++{ ++ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_ZAP_ONE_LINE, MALI_MMU_PDE_ENTRY(mali_address)); +} + -+u32 mali_pp_get_glob_num_pp_cores(void) ++static void mali_mmu_activate_address_space(struct mali_mmu_core *mmu, u32 page_directory) +{ -+ return mali_global_num_pp_cores; ++ /* The MMU must be in stalled or page fault mode, for this writing to work */ ++ MALI_DEBUG_ASSERT(0 != (mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS) ++ & (MALI_MMU_STATUS_BIT_STALL_ACTIVE | MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE))); ++ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_DTE_ADDR, page_directory); ++ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_COMMAND, MALI_MMU_COMMAND_ZAP_CACHE); ++ +} + -+/* ------------- interrupt handling below ------------------ */ -+static void mali_pp_irq_probe_trigger(void *data) ++void mali_mmu_activate_page_directory(struct mali_mmu_core *mmu, struct mali_page_directory *pagedir) +{ -+ struct mali_pp_core *core = (struct mali_pp_core *)data; -+ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED); -+ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_BUS_ERROR); -+ _mali_osk_mem_barrier(); ++ mali_bool stall_success; ++ MALI_DEBUG_ASSERT_POINTER(mmu); ++ ++ MALI_DEBUG_PRINT(5, ("Asked to activate page directory 0x%x on MMU %s\n", pagedir, mmu->hw_core.description)); ++ ++ stall_success = mali_mmu_enable_stall(mmu); ++ MALI_DEBUG_ASSERT(stall_success); ++ MALI_IGNORE(stall_success); ++ mali_mmu_activate_address_space(mmu, pagedir->page_directory); ++ mali_mmu_disable_stall(mmu); +} + -+static _mali_osk_errcode_t mali_pp_irq_probe_ack(void *data) ++void mali_mmu_activate_empty_page_directory(struct mali_mmu_core *mmu) +{ -+ struct mali_pp_core *core = (struct mali_pp_core *)data; -+ u32 irq_readout; ++ mali_bool stall_success; + -+ irq_readout = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_STATUS); -+ if (MALI200_REG_VAL_IRQ_BUS_ERROR & irq_readout) { -+ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_BUS_ERROR); -+ _mali_osk_mem_barrier(); -+ return _MALI_OSK_ERR_OK; -+ } ++ MALI_DEBUG_ASSERT_POINTER(mmu); ++ MALI_DEBUG_PRINT(3, ("Activating the empty page directory on MMU %s\n", mmu->hw_core.description)); + -+ return _MALI_OSK_ERR_FAULT; -+} ++ stall_success = mali_mmu_enable_stall(mmu); ++ ++ /* This function can only be called when the core is idle, so it could not fail. */ ++ MALI_DEBUG_ASSERT(stall_success); ++ MALI_IGNORE(stall_success); + ++ mali_mmu_activate_address_space(mmu, mali_empty_page_directory_phys); ++ mali_mmu_disable_stall(mmu); ++} + -+#if 0 -+static void mali_pp_print_registers(struct mali_pp_core *core) ++void mali_mmu_activate_fault_flush_page_directory(struct mali_mmu_core *mmu) +{ -+ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_VERSION = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_VERSION))); -+ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR))); -+ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS))); -+ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_RAWSTAT = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT))); -+ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_MASK = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK))); -+ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_STATUS))); -+ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS))); -+ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE))); -+ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC))); -+ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE))); -+ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE))); -+ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC))); -+ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE))); ++ mali_bool stall_success; ++ MALI_DEBUG_ASSERT_POINTER(mmu); ++ ++ MALI_DEBUG_PRINT(3, ("Activating the page fault flush page directory on MMU %s\n", mmu->hw_core.description)); ++ stall_success = mali_mmu_enable_stall(mmu); ++ /* This function is expect to fail the stalling, since it might be in PageFault mode when it is called */ ++ mali_mmu_activate_address_space(mmu, mali_page_fault_flush_page_directory); ++ if (MALI_TRUE == stall_success) mali_mmu_disable_stall(mmu); +} -+#endif + -+#if 0 -+void mali_pp_print_state(struct mali_pp_core *core) ++/* Is called when we want the mmu to give an interrupt */ ++static void mali_mmu_probe_trigger(void *data) +{ -+ MALI_DEBUG_PRINT(2, ("Mali PP: State: 0x%08x\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS))); ++ struct mali_mmu_core *mmu = (struct mali_mmu_core *)data; ++ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT, MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR); +} -+#endif + -+void mali_pp_update_performance_counters(struct mali_pp_core *parent, struct mali_pp_core *child, struct mali_pp_job *job, u32 subjob) ++/* Is called when the irq probe wants the mmu to acknowledge an interrupt from the hw */ ++static _mali_osk_errcode_t mali_mmu_probe_ack(void *data) +{ -+ u32 val0 = 0; -+ u32 val1 = 0; -+ u32 counter_src0 = mali_pp_job_get_perf_counter_src0(job, subjob); -+ u32 counter_src1 = mali_pp_job_get_perf_counter_src1(job, subjob); -+#if defined(CONFIG_MALI400_PROFILING) -+ int counter_index = COUNTER_FP_0_C0 + (2 * child->core_id); -+#endif ++ struct mali_mmu_core *mmu = (struct mali_mmu_core *)data; ++ u32 int_stat; + -+ if (MALI_HW_CORE_NO_COUNTER != counter_src0) { -+ val0 = mali_hw_core_register_read(&child->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE); -+ mali_pp_job_set_perf_counter_value0(job, subjob, val0); ++ int_stat = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_STATUS); + -+#if defined(CONFIG_MALI400_PROFILING) -+ _mali_osk_profiling_report_hw_counter(counter_index, val0); -+ _mali_osk_profiling_record_global_counters(counter_index, val0); -+#endif ++ MALI_DEBUG_PRINT(2, ("mali_mmu_probe_irq_acknowledge: intstat 0x%x\n", int_stat)); ++ if (int_stat & MALI_MMU_INTERRUPT_PAGE_FAULT) { ++ MALI_DEBUG_PRINT(2, ("Probe: Page fault detect: PASSED\n")); ++ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_CLEAR, MALI_MMU_INTERRUPT_PAGE_FAULT); ++ } else { ++ MALI_DEBUG_PRINT(1, ("Probe: Page fault detect: FAILED\n")); + } + -+ if (MALI_HW_CORE_NO_COUNTER != counter_src1) { -+ val1 = mali_hw_core_register_read(&child->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE); -+ mali_pp_job_set_perf_counter_value1(job, subjob, val1); ++ if (int_stat & MALI_MMU_INTERRUPT_READ_BUS_ERROR) { ++ MALI_DEBUG_PRINT(2, ("Probe: Bus read error detect: PASSED\n")); ++ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_CLEAR, MALI_MMU_INTERRUPT_READ_BUS_ERROR); ++ } else { ++ MALI_DEBUG_PRINT(1, ("Probe: Bus read error detect: FAILED\n")); ++ } + -+#if defined(CONFIG_MALI400_PROFILING) -+ _mali_osk_profiling_report_hw_counter(counter_index + 1, val1); -+ _mali_osk_profiling_record_global_counters(counter_index + 1, val1); -+#endif ++ if ((int_stat & (MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR)) == ++ (MALI_MMU_INTERRUPT_PAGE_FAULT | MALI_MMU_INTERRUPT_READ_BUS_ERROR)) { ++ return _MALI_OSK_ERR_OK; + } ++ ++ return _MALI_OSK_ERR_FAULT; +} + -+#if MALI_STATE_TRACKING -+u32 mali_pp_dump_state(struct mali_pp_core *core, char *buf, u32 size) ++#if 0 ++void mali_mmu_print_state(struct mali_mmu_core *mmu) +{ -+ int n = 0; -+ -+ n += _mali_osk_snprintf(buf + n, size - n, "\tPP #%d: %s\n", core->core_id, core->hw_core.description); -+ -+ return n; ++ MALI_DEBUG_PRINT(2, ("MMU: State of %s is 0x%08x\n", mmu->hw_core.description, mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS))); +} +#endif -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pp.h b/drivers/gpu/arm/mali400/mali/common/mali_pp.h +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_mmu.h b/drivers/gpu/arm/mali400/mali/common/mali_mmu.h new file mode 100644 -index 000000000..f98b29866 +index 000000000..6ed48585f --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_pp.h -@@ -0,0 +1,138 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_mmu.h +@@ -0,0 +1,124 @@ +/* -+ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -296200,140 +298914,126 @@ index 000000000..f98b29866 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_PP_H__ -+#define __MALI_PP_H__ ++#ifndef __MALI_MMU_H__ ++#define __MALI_MMU_H__ + +#include "mali_osk.h" -+#include "mali_pp_job.h" ++#include "mali_mmu_page_directory.h" +#include "mali_hw_core.h" + ++/* Forward declaration from mali_group.h */ +struct mali_group; + -+#define MALI_MAX_NUMBER_OF_PP_CORES 9 -+ +/** -+ * Definition of the PP core struct -+ * Used to track a PP core in the system. ++ * MMU register numbers ++ * Used in the register read/write routines. ++ * See the hardware documentation for more information about each register + */ -+struct mali_pp_core { -+ struct mali_hw_core hw_core; /**< Common for all HW cores */ -+ _mali_osk_irq_t *irq; /**< IRQ handler */ -+ u32 core_id; /**< Unique core ID */ -+ u32 bcast_id; /**< The "flag" value used by the Mali-450 broadcast and DLBU unit */ -+}; -+ -+_mali_osk_errcode_t mali_pp_initialize(void); -+void mali_pp_terminate(void); -+ -+struct mali_pp_core *mali_pp_create(const _mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual, u32 bcast_id); -+void mali_pp_delete(struct mali_pp_core *core); ++typedef enum mali_mmu_register { ++ MALI_MMU_REGISTER_DTE_ADDR = 0x0000, /**< Current Page Directory Pointer */ ++ MALI_MMU_REGISTER_STATUS = 0x0004, /**< Status of the MMU */ ++ MALI_MMU_REGISTER_COMMAND = 0x0008, /**< Command register, used to control the MMU */ ++ MALI_MMU_REGISTER_PAGE_FAULT_ADDR = 0x000C, /**< Logical address of the last page fault */ ++ MALI_MMU_REGISTER_ZAP_ONE_LINE = 0x010, /**< Used to invalidate the mapping of a single page from the MMU */ ++ MALI_MMU_REGISTER_INT_RAWSTAT = 0x0014, /**< Raw interrupt status, all interrupts visible */ ++ MALI_MMU_REGISTER_INT_CLEAR = 0x0018, /**< Indicate to the MMU that the interrupt has been received */ ++ MALI_MMU_REGISTER_INT_MASK = 0x001C, /**< Enable/disable types of interrupts */ ++ MALI_MMU_REGISTER_INT_STATUS = 0x0020 /**< Interrupt status based on the mask */ ++} mali_mmu_register; + -+void mali_pp_stop_bus(struct mali_pp_core *core); -+_mali_osk_errcode_t mali_pp_stop_bus_wait(struct mali_pp_core *core); -+void mali_pp_reset_async(struct mali_pp_core *core); -+_mali_osk_errcode_t mali_pp_reset_wait(struct mali_pp_core *core); -+_mali_osk_errcode_t mali_pp_reset(struct mali_pp_core *core); -+_mali_osk_errcode_t mali_pp_hard_reset(struct mali_pp_core *core); ++/** ++ * MMU interrupt register bits ++ * Each cause of the interrupt is reported ++ * through the (raw) interrupt status registers. ++ * Multiple interrupts can be pending, so multiple bits ++ * can be set at once. ++ */ ++typedef enum mali_mmu_interrupt { ++ MALI_MMU_INTERRUPT_PAGE_FAULT = 0x01, /**< A page fault occured */ ++ MALI_MMU_INTERRUPT_READ_BUS_ERROR = 0x02 /**< A bus read error occured */ ++} mali_mmu_interrupt; + -+void mali_pp_job_start(struct mali_pp_core *core, struct mali_pp_job *job, u32 sub_job, mali_bool restart_virtual); ++typedef enum mali_mmu_status_bits { ++ MALI_MMU_STATUS_BIT_PAGING_ENABLED = 1 << 0, ++ MALI_MMU_STATUS_BIT_PAGE_FAULT_ACTIVE = 1 << 1, ++ MALI_MMU_STATUS_BIT_STALL_ACTIVE = 1 << 2, ++ MALI_MMU_STATUS_BIT_IDLE = 1 << 3, ++ MALI_MMU_STATUS_BIT_REPLAY_BUFFER_EMPTY = 1 << 4, ++ MALI_MMU_STATUS_BIT_PAGE_FAULT_IS_WRITE = 1 << 5, ++ MALI_MMU_STATUS_BIT_STALL_NOT_ACTIVE = 1 << 31, ++} mali_mmu_status_bits; + -+u32 mali_pp_core_get_version(struct mali_pp_core *core); ++/** ++ * Definition of the MMU struct ++ * Used to track a MMU unit in the system. ++ * Contains information about the mapping of the registers ++ */ ++struct mali_mmu_core { ++ struct mali_hw_core hw_core; /**< Common for all HW cores */ ++ _mali_osk_irq_t *irq; /**< IRQ handler */ ++}; + -+MALI_STATIC_INLINE u32 mali_pp_core_get_id(struct mali_pp_core *core) -+{ -+ MALI_DEBUG_ASSERT_POINTER(core); -+ return core->core_id; -+} ++_mali_osk_errcode_t mali_mmu_initialize(void); + -+MALI_STATIC_INLINE u32 mali_pp_core_get_bcast_id(struct mali_pp_core *core) -+{ -+ MALI_DEBUG_ASSERT_POINTER(core); -+ return core->bcast_id; -+} ++void mali_mmu_terminate(void); + -+struct mali_pp_core *mali_pp_get_global_pp_core(u32 index); -+u32 mali_pp_get_glob_num_pp_cores(void); ++struct mali_mmu_core *mali_mmu_create(_mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual); ++void mali_mmu_delete(struct mali_mmu_core *mmu); + -+/* Debug */ -+u32 mali_pp_dump_state(struct mali_pp_core *core, char *buf, u32 size); ++_mali_osk_errcode_t mali_mmu_reset(struct mali_mmu_core *mmu); ++mali_bool mali_mmu_zap_tlb(struct mali_mmu_core *mmu); ++void mali_mmu_zap_tlb_without_stall(struct mali_mmu_core *mmu); ++void mali_mmu_invalidate_page(struct mali_mmu_core *mmu, u32 mali_address); + -+/** -+ * Put instrumented HW counters from the core(s) to the job object (if enabled) -+ * -+ * parent and child is always the same, except for virtual jobs on Mali-450. -+ * In this case, the counters will be enabled on the virtual core (parent), -+ * but values need to be read from the child cores. -+ * -+ * @param parent The core used to see if the counters was enabled -+ * @param child The core to actually read the values from -+ * @job Job object to update with counter values (if enabled) -+ * @subjob Which subjob the counters are applicable for (core ID for virtual jobs) -+ */ -+void mali_pp_update_performance_counters(struct mali_pp_core *parent, struct mali_pp_core *child, struct mali_pp_job *job, u32 subjob); ++void mali_mmu_activate_page_directory(struct mali_mmu_core *mmu, struct mali_page_directory *pagedir); ++void mali_mmu_activate_empty_page_directory(struct mali_mmu_core *mmu); ++void mali_mmu_activate_fault_flush_page_directory(struct mali_mmu_core *mmu); + -+MALI_STATIC_INLINE const char *mali_pp_core_description(struct mali_pp_core *core) -+{ -+ return core->hw_core.description; -+} ++void mali_mmu_page_fault_done(struct mali_mmu_core *mmu); + -+MALI_STATIC_INLINE enum mali_interrupt_result mali_pp_get_interrupt_result(struct mali_pp_core *core) ++MALI_STATIC_INLINE enum mali_interrupt_result mali_mmu_get_interrupt_result(struct mali_mmu_core *mmu) +{ -+ u32 rawstat_used = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT) & -+ MALI200_REG_VAL_IRQ_MASK_USED; ++ u32 rawstat_used = mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT); + if (0 == rawstat_used) { + return MALI_INTERRUPT_RESULT_NONE; -+ } else if (MALI200_REG_VAL_IRQ_END_OF_FRAME == rawstat_used) { -+ return MALI_INTERRUPT_RESULT_SUCCESS; + } + + return MALI_INTERRUPT_RESULT_ERROR; +} + -+MALI_STATIC_INLINE u32 mali_pp_get_rawstat(struct mali_pp_core *core) -+{ -+ MALI_DEBUG_ASSERT_POINTER(core); -+ return mali_hw_core_register_read(&core->hw_core, -+ MALI200_REG_ADDR_MGMT_INT_RAWSTAT); -+} -+ + -+MALI_STATIC_INLINE u32 mali_pp_is_active(struct mali_pp_core *core) ++MALI_STATIC_INLINE u32 mali_mmu_get_int_status(struct mali_mmu_core *mmu) +{ -+ u32 status = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS); -+ return (status & MALI200_REG_VAL_STATUS_RENDERING_ACTIVE) ? MALI_TRUE : MALI_FALSE; ++ return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_STATUS); +} + -+MALI_STATIC_INLINE void mali_pp_mask_all_interrupts(struct mali_pp_core *core) ++MALI_STATIC_INLINE u32 mali_mmu_get_rawstat(struct mali_mmu_core *mmu) +{ -+ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_NONE); ++ return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_INT_RAWSTAT); +} + -+MALI_STATIC_INLINE void mali_pp_enable_interrupts(struct mali_pp_core *core) ++MALI_STATIC_INLINE void mali_mmu_mask_all_interrupts(struct mali_mmu_core *mmu) +{ -+ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED); ++ mali_hw_core_register_write(&mmu->hw_core, MALI_MMU_REGISTER_INT_MASK, 0); +} + -+MALI_STATIC_INLINE void mali_pp_write_addr_renderer_list(struct mali_pp_core *core, -+ struct mali_pp_job *job, u32 subjob) ++MALI_STATIC_INLINE u32 mali_mmu_get_status(struct mali_mmu_core *mmu) +{ -+ u32 addr = mali_pp_job_get_addr_frame(job, subjob); -+ mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_FRAME, addr); ++ return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_STATUS); +} + -+ -+MALI_STATIC_INLINE void mali_pp_write_addr_stack(struct mali_pp_core *core, struct mali_pp_job *job) ++MALI_STATIC_INLINE u32 mali_mmu_get_page_fault_addr(struct mali_mmu_core *mmu) +{ -+ u32 addr = mali_pp_job_get_addr_stack(job, core->core_id); -+ mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_STACK, addr); ++ return mali_hw_core_register_read(&mmu->hw_core, MALI_MMU_REGISTER_PAGE_FAULT_ADDR); +} + -+#endif /* __MALI_PP_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pp_job.c b/drivers/gpu/arm/mali400/mali/common/mali_pp_job.c ++#endif /* __MALI_MMU_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_mmu_page_directory.c b/drivers/gpu/arm/mali400/mali/common/mali_mmu_page_directory.c new file mode 100644 -index 000000000..b0216d4c1 +index 000000000..9ad3e8970 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_pp_job.c -@@ -0,0 +1,316 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_mmu_page_directory.c +@@ -0,0 +1,495 @@ +/* + * Copyright (C) 2011-2017 ARM Limited. All rights reserved. + * @@ -296344,320 +299044,499 @@ index 000000000..b0216d4c1 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#include "mali_pp.h" -+#include "mali_pp_job.h" -+#include "mali_osk.h" -+#include "mali_osk_list.h" +#include "mali_kernel_common.h" ++#include "mali_osk.h" ++#include "mali_ukk.h" +#include "mali_uk_types.h" -+#include "mali_executor.h" -+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) -+#include "linux/mali_memory_dma_buf.h" -+#endif -+#include "mali_memory_swap_alloc.h" -+#include "mali_scheduler.h" ++#include "mali_mmu_page_directory.h" ++#include "mali_memory.h" ++#include "mali_l2_cache.h" + -+static u32 pp_counter_src0 = MALI_HW_CORE_NO_COUNTER; /**< Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */ -+static u32 pp_counter_src1 = MALI_HW_CORE_NO_COUNTER; /**< Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */ -+static _mali_osk_atomic_t pp_counter_per_sub_job_count; /**< Number of values in the two arrays which is != MALI_HW_CORE_NO_COUNTER */ -+static u32 pp_counter_per_sub_job_src0[_MALI_PP_MAX_SUB_JOBS] = { MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER }; -+static u32 pp_counter_per_sub_job_src1[_MALI_PP_MAX_SUB_JOBS] = { MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER }; ++static _mali_osk_errcode_t fill_page(mali_io_address mapping, u32 data); + -+void mali_pp_job_initialize(void) ++u32 mali_allocate_empty_page(mali_io_address *virt_addr) +{ -+ _mali_osk_atomic_init(&pp_counter_per_sub_job_count, 0); ++ _mali_osk_errcode_t err; ++ mali_io_address mapping; ++ mali_dma_addr address; ++ ++ if (_MALI_OSK_ERR_OK != mali_mmu_get_table_page(&address, &mapping)) { ++ /* Allocation failed */ ++ MALI_DEBUG_PRINT(2, ("Mali MMU: Failed to get table page for empty pgdir\n")); ++ return 0; ++ } ++ ++ MALI_DEBUG_ASSERT_POINTER(mapping); ++ ++ err = fill_page(mapping, 0); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_mmu_release_table_page(address, mapping); ++ MALI_DEBUG_PRINT(2, ("Mali MMU: Failed to zero page\n")); ++ return 0; ++ } ++ ++ *virt_addr = mapping; ++ return address; +} + -+void mali_pp_job_terminate(void) ++void mali_free_empty_page(mali_dma_addr address, mali_io_address virt_addr) +{ -+ _mali_osk_atomic_term(&pp_counter_per_sub_job_count); ++ if (MALI_INVALID_PAGE != address) { ++ mali_mmu_release_table_page(address, virt_addr); ++ } +} + -+struct mali_pp_job *mali_pp_job_create(struct mali_session_data *session, -+ _mali_uk_pp_start_job_s __user *uargs, u32 id) ++_mali_osk_errcode_t mali_create_fault_flush_pages(mali_dma_addr *page_directory, ++ mali_io_address *page_directory_mapping, ++ mali_dma_addr *page_table, mali_io_address *page_table_mapping, ++ mali_dma_addr *data_page, mali_io_address *data_page_mapping) +{ -+ struct mali_pp_job *job; -+ u32 perf_counter_flag; -+ -+ job = _mali_osk_calloc(1, sizeof(struct mali_pp_job)); -+ if (NULL != job) { -+ -+ _mali_osk_list_init(&job->list); -+ _mali_osk_list_init(&job->session_fb_lookup_list); -+ _mali_osk_atomic_inc(&session->number_of_pp_jobs); ++ _mali_osk_errcode_t err; + -+ if (0 != _mali_osk_copy_from_user(&job->uargs, uargs, sizeof(_mali_uk_pp_start_job_s))) { -+ goto fail; ++ err = mali_mmu_get_table_page(data_page, data_page_mapping); ++ if (_MALI_OSK_ERR_OK == err) { ++ err = mali_mmu_get_table_page(page_table, page_table_mapping); ++ if (_MALI_OSK_ERR_OK == err) { ++ err = mali_mmu_get_table_page(page_directory, page_directory_mapping); ++ if (_MALI_OSK_ERR_OK == err) { ++ fill_page(*data_page_mapping, 0); ++ fill_page(*page_table_mapping, *data_page | MALI_MMU_FLAGS_DEFAULT); ++ fill_page(*page_directory_mapping, *page_table | MALI_MMU_FLAGS_PRESENT); ++ MALI_SUCCESS; ++ } ++ mali_mmu_release_table_page(*page_table, *page_table_mapping); ++ *page_table = MALI_INVALID_PAGE; + } ++ mali_mmu_release_table_page(*data_page, *data_page_mapping); ++ *data_page = MALI_INVALID_PAGE; ++ } ++ return err; ++} + -+ if (job->uargs.num_cores > _MALI_PP_MAX_SUB_JOBS) { -+ MALI_PRINT_ERROR(("Mali PP job: Too many sub jobs specified in job object\n")); -+ goto fail; -+ } ++void mali_destroy_fault_flush_pages( ++ mali_dma_addr *page_directory, mali_io_address *page_directory_mapping, ++ mali_dma_addr *page_table, mali_io_address *page_table_mapping, ++ mali_dma_addr *data_page, mali_io_address *data_page_mapping) ++{ ++ if (MALI_INVALID_PAGE != *page_directory) { ++ mali_mmu_release_table_page(*page_directory, *page_directory_mapping); ++ *page_directory = MALI_INVALID_PAGE; ++ *page_directory_mapping = NULL; ++ } + -+ if (!mali_pp_job_use_no_notification(job)) { -+ job->finished_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_PP_FINISHED, sizeof(_mali_uk_pp_job_finished_s)); -+ if (NULL == job->finished_notification) goto fail; -+ } ++ if (MALI_INVALID_PAGE != *page_table) { ++ mali_mmu_release_table_page(*page_table, *page_table_mapping); ++ *page_table = MALI_INVALID_PAGE; ++ *page_table_mapping = NULL; ++ } + -+ perf_counter_flag = mali_pp_job_get_perf_counter_flag(job); ++ if (MALI_INVALID_PAGE != *data_page) { ++ mali_mmu_release_table_page(*data_page, *data_page_mapping); ++ *data_page = MALI_INVALID_PAGE; ++ *data_page_mapping = NULL; ++ } ++} + -+ /* case when no counters came from user space -+ * so pass the debugfs / DS-5 provided global ones to the job object */ -+ if (!((perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE) || -+ (perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE))) { -+ u32 sub_job_count = _mali_osk_atomic_read(&pp_counter_per_sub_job_count); ++static _mali_osk_errcode_t fill_page(mali_io_address mapping, u32 data) ++{ ++ int i; ++ MALI_DEBUG_ASSERT_POINTER(mapping); + -+ /* These counters apply for all virtual jobs, and where no per sub job counter is specified */ -+ job->uargs.perf_counter_src0 = pp_counter_src0; -+ job->uargs.perf_counter_src1 = pp_counter_src1; ++ for (i = 0; i < MALI_MMU_PAGE_SIZE / 4; i++) { ++ _mali_osk_mem_iowrite32_relaxed(mapping, i * sizeof(u32), data); ++ } ++ _mali_osk_mem_barrier(); ++ MALI_SUCCESS; ++} + -+ /* We only copy the per sub job array if it is enabled with at least one counter */ -+ if (0 < sub_job_count) { -+ job->perf_counter_per_sub_job_count = sub_job_count; -+ _mali_osk_memcpy(job->perf_counter_per_sub_job_src0, pp_counter_per_sub_job_src0, sizeof(pp_counter_per_sub_job_src0)); -+ _mali_osk_memcpy(job->perf_counter_per_sub_job_src1, pp_counter_per_sub_job_src1, sizeof(pp_counter_per_sub_job_src1)); ++_mali_osk_errcode_t mali_mmu_pagedir_map(struct mali_page_directory *pagedir, u32 mali_address, u32 size) ++{ ++ const int first_pde = MALI_MMU_PDE_ENTRY(mali_address); ++ const int last_pde = MALI_MMU_PDE_ENTRY(mali_address + size - 1); ++ _mali_osk_errcode_t err; ++ mali_io_address pde_mapping; ++ mali_dma_addr pde_phys; ++ int i, page_count; ++ u32 start_address; ++ if (last_pde < first_pde) ++ return _MALI_OSK_ERR_INVALID_ARGS; ++ ++ for (i = first_pde; i <= last_pde; i++) { ++ if (0 == (_mali_osk_mem_ioread32(pagedir->page_directory_mapped, ++ i * sizeof(u32)) & MALI_MMU_FLAGS_PRESENT)) { ++ /* Page table not present */ ++ MALI_DEBUG_ASSERT(0 == pagedir->page_entries_usage_count[i]); ++ MALI_DEBUG_ASSERT(NULL == pagedir->page_entries_mapped[i]); ++ ++ err = mali_mmu_get_table_page(&pde_phys, &pde_mapping); ++ if (_MALI_OSK_ERR_OK != err) { ++ MALI_PRINT_ERROR(("Failed to allocate page table page.\n")); ++ return err; + } ++ pagedir->page_entries_mapped[i] = pde_mapping; ++ ++ /* Update PDE, mark as present */ ++ _mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32), ++ pde_phys | MALI_MMU_FLAGS_PRESENT); ++ ++ MALI_DEBUG_ASSERT(0 == pagedir->page_entries_usage_count[i]); + } + -+ job->session = session; -+ job->id = id; ++ if (first_pde == last_pde) { ++ pagedir->page_entries_usage_count[i] += size / MALI_MMU_PAGE_SIZE; ++ } else if (i == first_pde) { ++ start_address = i * MALI_MMU_VIRTUAL_PAGE_SIZE; ++ page_count = (start_address + MALI_MMU_VIRTUAL_PAGE_SIZE - mali_address) / MALI_MMU_PAGE_SIZE; ++ pagedir->page_entries_usage_count[i] += page_count; ++ } else if (i == last_pde) { ++ start_address = i * MALI_MMU_VIRTUAL_PAGE_SIZE; ++ page_count = (mali_address + size - start_address) / MALI_MMU_PAGE_SIZE; ++ pagedir->page_entries_usage_count[i] += page_count; ++ } else { ++ pagedir->page_entries_usage_count[i] = 1024; ++ } ++ } ++ _mali_osk_write_mem_barrier(); + -+ job->sub_jobs_num = job->uargs.num_cores ? job->uargs.num_cores : 1; -+ job->pid = _mali_osk_get_pid(); -+ job->tid = _mali_osk_get_tid(); ++ return _MALI_OSK_ERR_OK; ++} + -+ _mali_osk_atomic_init(&job->sub_jobs_completed, 0); -+ _mali_osk_atomic_init(&job->sub_job_errors, 0); -+ job->swap_status = MALI_NO_SWAP_IN; -+ job->user_notification = MALI_FALSE; -+ job->num_pp_cores_in_virtual = 0; ++MALI_STATIC_INLINE void mali_mmu_zero_pte(mali_io_address page_table, u32 mali_address, u32 size) ++{ ++ int i; ++ const int first_pte = MALI_MMU_PTE_ENTRY(mali_address); ++ const int last_pte = MALI_MMU_PTE_ENTRY(mali_address + size - 1); + -+ if (job->uargs.num_memory_cookies > session->allocation_mgr.mali_allocation_num) { -+ MALI_PRINT_ERROR(("Mali PP job: The number of memory cookies is invalid !\n")); -+ goto fail; ++ for (i = first_pte; i <= last_pte; i++) { ++ _mali_osk_mem_iowrite32_relaxed(page_table, i * sizeof(u32), 0); ++ } ++} ++ ++static u32 mali_page_directory_get_phys_address(struct mali_page_directory *pagedir, u32 index) ++{ ++ return (_mali_osk_mem_ioread32(pagedir->page_directory_mapped, ++ index * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK); ++} ++ ++ ++_mali_osk_errcode_t mali_mmu_pagedir_unmap(struct mali_page_directory *pagedir, u32 mali_address, u32 size) ++{ ++ const int first_pde = MALI_MMU_PDE_ENTRY(mali_address); ++ const int last_pde = MALI_MMU_PDE_ENTRY(mali_address + size - 1); ++ u32 left = size; ++ int i; ++ mali_bool pd_changed = MALI_FALSE; ++ u32 pages_to_invalidate[3]; /* hard-coded to 3: max two pages from the PT level plus max one page from PD level */ ++ u32 num_pages_inv = 0; ++ mali_bool invalidate_all = MALI_FALSE; /* safety mechanism in case page_entries_usage_count is unreliable */ ++ ++ /* For all page directory entries in range. */ ++ for (i = first_pde; i <= last_pde; i++) { ++ u32 size_in_pde, offset; ++ ++ MALI_DEBUG_ASSERT_POINTER(pagedir->page_entries_mapped[i]); ++ MALI_DEBUG_ASSERT(0 != pagedir->page_entries_usage_count[i]); ++ ++ /* Offset into page table, 0 if mali_address is 4MiB aligned */ ++ offset = (mali_address & (MALI_MMU_VIRTUAL_PAGE_SIZE - 1)); ++ if (left < MALI_MMU_VIRTUAL_PAGE_SIZE - offset) { ++ size_in_pde = left; ++ } else { ++ size_in_pde = MALI_MMU_VIRTUAL_PAGE_SIZE - offset; + } + -+ if (job->uargs.num_memory_cookies > 0) { -+ u32 size; -+ u32 __user *memory_cookies = (u32 __user *)(uintptr_t)job->uargs.memory_cookies; ++ pagedir->page_entries_usage_count[i] -= size_in_pde / MALI_MMU_PAGE_SIZE; + -+ size = sizeof(*memory_cookies) * (job->uargs.num_memory_cookies); ++ /* If entire page table is unused, free it */ ++ if (0 == pagedir->page_entries_usage_count[i]) { ++ u32 page_phys; ++ void *page_virt; ++ MALI_DEBUG_PRINT(4, ("Releasing page table as this is the last reference\n")); ++ /* last reference removed, no need to zero out each PTE */ + -+ job->memory_cookies = _mali_osk_malloc(size); -+ if (NULL == job->memory_cookies) { -+ MALI_PRINT_ERROR(("Mali PP job: Failed to allocate %d bytes of memory cookies!\n", size)); -+ goto fail; -+ } ++ page_phys = MALI_MMU_ENTRY_ADDRESS(_mali_osk_mem_ioread32(pagedir->page_directory_mapped, i * sizeof(u32))); ++ page_virt = pagedir->page_entries_mapped[i]; ++ pagedir->page_entries_mapped[i] = NULL; ++ _mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32), 0); + -+ if (0 != _mali_osk_copy_from_user(job->memory_cookies, memory_cookies, size)) { -+ MALI_PRINT_ERROR(("Mali PP job: Failed to copy %d bytes of memory cookies from user!\n", size)); -+ goto fail; ++ mali_mmu_release_table_page(page_phys, page_virt); ++ pd_changed = MALI_TRUE; ++ } else { ++ MALI_DEBUG_ASSERT(num_pages_inv < 2); ++ if (num_pages_inv < 2) { ++ pages_to_invalidate[num_pages_inv] = mali_page_directory_get_phys_address(pagedir, i); ++ num_pages_inv++; ++ } else { ++ invalidate_all = MALI_TRUE; + } -+ } + -+ if (_MALI_OSK_ERR_OK != mali_pp_job_check(job)) { -+ /* Not a valid job. */ -+ goto fail; ++ /* If part of the page table is still in use, zero the relevant PTEs */ ++ mali_mmu_zero_pte(pagedir->page_entries_mapped[i], mali_address, size_in_pde); + } + -+ mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_PP, NULL, job); -+ mali_timeline_fence_copy_uk_fence(&(job->tracker.fence), &(job->uargs.fence)); -+ -+ mali_mem_swap_in_pages(job); ++ left -= size_in_pde; ++ mali_address += size_in_pde; ++ } ++ _mali_osk_write_mem_barrier(); + -+ return job; ++ /* L2 pages invalidation */ ++ if (MALI_TRUE == pd_changed) { ++ MALI_DEBUG_ASSERT(num_pages_inv < 3); ++ if (num_pages_inv < 3) { ++ pages_to_invalidate[num_pages_inv] = pagedir->page_directory; ++ num_pages_inv++; ++ } else { ++ invalidate_all = MALI_TRUE; ++ } + } + -+fail: -+ if (NULL != job) { -+ mali_pp_job_delete(job); ++ if (invalidate_all) { ++ mali_l2_cache_invalidate_all(); ++ } else { ++ mali_l2_cache_invalidate_all_pages(pages_to_invalidate, num_pages_inv); + } + -+ return NULL; ++ MALI_SUCCESS; +} + -+void mali_pp_job_delete(struct mali_pp_job *job) ++struct mali_page_directory *mali_mmu_pagedir_alloc(void) +{ -+ struct mali_session_data *session; -+ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->list)); -+ MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->session_fb_lookup_list)); -+ -+ session = mali_pp_job_get_session(job); -+ MALI_DEBUG_ASSERT_POINTER(session); -+ -+ if (NULL != job->memory_cookies) { -+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) -+ /* Unmap buffers attached to job */ -+ mali_dma_buf_unmap_job(job); -+#endif -+ if (MALI_NO_SWAP_IN != job->swap_status) { -+ mali_mem_swap_out_pages(job); -+ } ++ struct mali_page_directory *pagedir; ++ _mali_osk_errcode_t err; ++ mali_dma_addr phys; + -+ _mali_osk_free(job->memory_cookies); ++ pagedir = _mali_osk_calloc(1, sizeof(struct mali_page_directory)); ++ if (NULL == pagedir) { ++ return NULL; + } + -+ if (job->user_notification) { -+ mali_scheduler_return_pp_job_to_user(job, -+ job->num_pp_cores_in_virtual); ++ err = mali_mmu_get_table_page(&phys, &pagedir->page_directory_mapped); ++ if (_MALI_OSK_ERR_OK != err) { ++ _mali_osk_free(pagedir); ++ return NULL; + } + -+ if (NULL != job->finished_notification) { -+ _mali_osk_notification_delete(job->finished_notification); -+ } ++ pagedir->page_directory = (u32)phys; + -+ _mali_osk_atomic_term(&job->sub_jobs_completed); -+ _mali_osk_atomic_term(&job->sub_job_errors); -+ _mali_osk_atomic_dec(&session->number_of_pp_jobs); -+ _mali_osk_free(job); ++ /* Zero page directory */ ++ fill_page(pagedir->page_directory_mapped, 0); + -+ _mali_osk_wait_queue_wake_up(session->wait_queue); ++ return pagedir; +} + -+void mali_pp_job_list_add(struct mali_pp_job *job, _mali_osk_list_t *list) ++void mali_mmu_pagedir_free(struct mali_page_directory *pagedir) +{ -+ struct mali_pp_job *iter; -+ struct mali_pp_job *tmp; -+ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); -+ -+ /* Find position in list/queue where job should be added. */ -+ _MALI_OSK_LIST_FOREACHENTRY_REVERSE(iter, tmp, list, -+ struct mali_pp_job, list) { -+ /* job should be started after iter if iter is in progress. */ -+ if (0 < iter->sub_jobs_started) { -+ break; -+ } ++ const int num_page_table_entries = sizeof(pagedir->page_entries_mapped) / sizeof(pagedir->page_entries_mapped[0]); ++ int i; + -+ /* -+ * job should be started after iter if it has a higher -+ * job id. A span is used to handle job id wrapping. -+ */ -+ if ((mali_pp_job_get_id(job) - -+ mali_pp_job_get_id(iter)) < -+ MALI_SCHEDULER_JOB_ID_SPAN) { -+ break; ++ /* Free referenced page tables and zero PDEs. */ ++ for (i = 0; i < num_page_table_entries; i++) { ++ if (pagedir->page_directory_mapped && (_mali_osk_mem_ioread32( ++ pagedir->page_directory_mapped, ++ sizeof(u32)*i) & MALI_MMU_FLAGS_PRESENT)) { ++ mali_dma_addr phys = _mali_osk_mem_ioread32(pagedir->page_directory_mapped, ++ i * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK; ++ _mali_osk_mem_iowrite32_relaxed(pagedir->page_directory_mapped, i * sizeof(u32), 0); ++ mali_mmu_release_table_page(phys, pagedir->page_entries_mapped[i]); + } + } ++ _mali_osk_write_mem_barrier(); + -+ _mali_osk_list_add(&job->list, &iter->list); ++ /* Free the page directory page. */ ++ mali_mmu_release_table_page(pagedir->page_directory, pagedir->page_directory_mapped); ++ ++ _mali_osk_free(pagedir); +} + + -+u32 mali_pp_job_get_perf_counter_src0(struct mali_pp_job *job, u32 sub_job) ++void mali_mmu_pagedir_update(struct mali_page_directory *pagedir, u32 mali_address, ++ mali_dma_addr phys_address, u32 size, u32 permission_bits) +{ -+ /* Virtual jobs always use the global job counter (or if there are per sub job counters at all) */ -+ if (mali_pp_job_is_virtual(job) || 0 == job->perf_counter_per_sub_job_count) { -+ return job->uargs.perf_counter_src0; -+ } ++ u32 end_address = mali_address + size; ++ u32 mali_phys = (u32)phys_address; + -+ /* Use per sub job counter if enabled... */ -+ if (MALI_HW_CORE_NO_COUNTER != job->perf_counter_per_sub_job_src0[sub_job]) { -+ return job->perf_counter_per_sub_job_src0[sub_job]; ++ /* Map physical pages into MMU page tables */ ++ for (; mali_address < end_address; mali_address += MALI_MMU_PAGE_SIZE, mali_phys += MALI_MMU_PAGE_SIZE) { ++ MALI_DEBUG_ASSERT_POINTER(pagedir->page_entries_mapped[MALI_MMU_PDE_ENTRY(mali_address)]); ++ _mali_osk_mem_iowrite32_relaxed(pagedir->page_entries_mapped[MALI_MMU_PDE_ENTRY(mali_address)], ++ MALI_MMU_PTE_ENTRY(mali_address) * sizeof(u32), ++ mali_phys | permission_bits); + } -+ -+ /* ...else default to global job counter */ -+ return job->uargs.perf_counter_src0; +} + -+u32 mali_pp_job_get_perf_counter_src1(struct mali_pp_job *job, u32 sub_job) ++void mali_mmu_pagedir_diag(struct mali_page_directory *pagedir, u32 fault_addr) +{ -+ /* Virtual jobs always use the global job counter (or if there are per sub job counters at all) */ -+ if (mali_pp_job_is_virtual(job) || 0 == job->perf_counter_per_sub_job_count) { -+ /* Virtual jobs always use the global job counter */ -+ return job->uargs.perf_counter_src1; -+ } ++#if defined(DEBUG) ++ u32 pde_index, pte_index; ++ u32 pde, pte; + -+ /* Use per sub job counter if enabled... */ -+ if (MALI_HW_CORE_NO_COUNTER != job->perf_counter_per_sub_job_src1[sub_job]) { -+ return job->perf_counter_per_sub_job_src1[sub_job]; -+ } ++ pde_index = MALI_MMU_PDE_ENTRY(fault_addr); ++ pte_index = MALI_MMU_PTE_ENTRY(fault_addr); + -+ /* ...else default to global job counter */ -+ return job->uargs.perf_counter_src1; -+} + -+void mali_pp_job_set_pp_counter_global_src0(u32 counter) -+{ -+ pp_counter_src0 = counter; -+} ++ pde = _mali_osk_mem_ioread32(pagedir->page_directory_mapped, ++ pde_index * sizeof(u32)); + -+void mali_pp_job_set_pp_counter_global_src1(u32 counter) -+{ -+ pp_counter_src1 = counter; ++ ++ if (pde & MALI_MMU_FLAGS_PRESENT) { ++ u32 pte_addr = MALI_MMU_ENTRY_ADDRESS(pde); ++ ++ pte = _mali_osk_mem_ioread32(pagedir->page_entries_mapped[pde_index], ++ pte_index * sizeof(u32)); ++ ++ MALI_DEBUG_PRINT(2, ("\tMMU: %08x: Page table present: %08x\n" ++ "\t\tPTE: %08x, page %08x is %s\n", ++ fault_addr, pte_addr, pte, ++ MALI_MMU_ENTRY_ADDRESS(pte), ++ pte & MALI_MMU_FLAGS_DEFAULT ? "rw" : "not present")); ++ } else { ++ MALI_DEBUG_PRINT(2, ("\tMMU: %08x: Page table not present: %08x\n", ++ fault_addr, pde)); ++ } ++#else ++ MALI_IGNORE(pagedir); ++ MALI_IGNORE(fault_addr); ++#endif +} + -+void mali_pp_job_set_pp_counter_sub_job_src0(u32 sub_job, u32 counter) ++/* For instrumented */ ++struct dump_info { ++ u32 buffer_left; ++ u32 register_writes_size; ++ u32 page_table_dump_size; ++ u32 *buffer; ++}; ++ ++static _mali_osk_errcode_t writereg(u32 where, u32 what, const char *comment, struct dump_info *info) +{ -+ MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS); ++ if (NULL != info) { ++ info->register_writes_size += sizeof(u32) * 2; /* two 32-bit words */ + -+ if (MALI_HW_CORE_NO_COUNTER == pp_counter_per_sub_job_src0[sub_job]) { -+ /* increment count since existing counter was disabled */ -+ _mali_osk_atomic_inc(&pp_counter_per_sub_job_count); -+ } ++ if (NULL != info->buffer) { ++ /* check that we have enough space */ ++ if (info->buffer_left < sizeof(u32) * 2) MALI_ERROR(_MALI_OSK_ERR_NOMEM); + -+ if (MALI_HW_CORE_NO_COUNTER == counter) { -+ /* decrement count since new counter is disabled */ -+ _mali_osk_atomic_dec(&pp_counter_per_sub_job_count); -+ } ++ *info->buffer = where; ++ info->buffer++; + -+ /* PS: A change from MALI_HW_CORE_NO_COUNTER to MALI_HW_CORE_NO_COUNTER will inc and dec, result will be 0 change */ ++ *info->buffer = what; ++ info->buffer++; + -+ pp_counter_per_sub_job_src0[sub_job] = counter; ++ info->buffer_left -= sizeof(u32) * 2; ++ } ++ } ++ ++ MALI_SUCCESS; +} + -+void mali_pp_job_set_pp_counter_sub_job_src1(u32 sub_job, u32 counter) ++static _mali_osk_errcode_t mali_mmu_dump_page(mali_io_address page, u32 phys_addr, struct dump_info *info) +{ -+ MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS); ++ if (NULL != info) { ++ /* 4096 for the page and 4 bytes for the address */ ++ const u32 page_size_in_elements = MALI_MMU_PAGE_SIZE / 4; ++ const u32 page_size_in_bytes = MALI_MMU_PAGE_SIZE; ++ const u32 dump_size_in_bytes = MALI_MMU_PAGE_SIZE + 4; + -+ if (MALI_HW_CORE_NO_COUNTER == pp_counter_per_sub_job_src1[sub_job]) { -+ /* increment count since existing counter was disabled */ -+ _mali_osk_atomic_inc(&pp_counter_per_sub_job_count); -+ } ++ info->page_table_dump_size += dump_size_in_bytes; + -+ if (MALI_HW_CORE_NO_COUNTER == counter) { -+ /* decrement count since new counter is disabled */ -+ _mali_osk_atomic_dec(&pp_counter_per_sub_job_count); -+ } ++ if (NULL != info->buffer) { ++ if (info->buffer_left < dump_size_in_bytes) MALI_ERROR(_MALI_OSK_ERR_NOMEM); + -+ /* PS: A change from MALI_HW_CORE_NO_COUNTER to MALI_HW_CORE_NO_COUNTER will inc and dec, result will be 0 change */ ++ *info->buffer = phys_addr; ++ info->buffer++; + -+ pp_counter_per_sub_job_src1[sub_job] = counter; ++ _mali_osk_memcpy(info->buffer, page, page_size_in_bytes); ++ info->buffer += page_size_in_elements; ++ ++ info->buffer_left -= dump_size_in_bytes; ++ } ++ } ++ ++ MALI_SUCCESS; +} + -+u32 mali_pp_job_get_pp_counter_global_src0(void) ++static _mali_osk_errcode_t dump_mmu_page_table(struct mali_page_directory *pagedir, struct dump_info *info) +{ -+ return pp_counter_src0; ++ MALI_DEBUG_ASSERT_POINTER(pagedir); ++ MALI_DEBUG_ASSERT_POINTER(info); ++ ++ if (NULL != pagedir->page_directory_mapped) { ++ int i; ++ ++ MALI_CHECK_NO_ERROR( ++ mali_mmu_dump_page(pagedir->page_directory_mapped, pagedir->page_directory, info) ++ ); ++ ++ for (i = 0; i < 1024; i++) { ++ if (NULL != pagedir->page_entries_mapped[i]) { ++ MALI_CHECK_NO_ERROR( ++ mali_mmu_dump_page(pagedir->page_entries_mapped[i], ++ _mali_osk_mem_ioread32(pagedir->page_directory_mapped, ++ i * sizeof(u32)) & ~MALI_MMU_FLAGS_MASK, info) ++ ); ++ } ++ } ++ } ++ ++ MALI_SUCCESS; +} + -+u32 mali_pp_job_get_pp_counter_global_src1(void) ++static _mali_osk_errcode_t dump_mmu_registers(struct mali_page_directory *pagedir, struct dump_info *info) +{ -+ return pp_counter_src1; ++ MALI_CHECK_NO_ERROR(writereg(0x00000000, pagedir->page_directory, ++ "set the page directory address", info)); ++ MALI_CHECK_NO_ERROR(writereg(0x00000008, 4, "zap???", info)); ++ MALI_CHECK_NO_ERROR(writereg(0x00000008, 0, "enable paging", info)); ++ MALI_SUCCESS; +} + -+u32 mali_pp_job_get_pp_counter_sub_job_src0(u32 sub_job) ++_mali_osk_errcode_t _mali_ukk_query_mmu_page_table_dump_size(_mali_uk_query_mmu_page_table_dump_size_s *args) +{ -+ MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS); -+ return pp_counter_per_sub_job_src0[sub_job]; ++ struct dump_info info = { 0, 0, 0, NULL }; ++ struct mali_session_data *session_data; ++ ++ session_data = (struct mali_session_data *)(uintptr_t)(args->ctx); ++ MALI_DEBUG_ASSERT_POINTER(session_data); ++ MALI_DEBUG_ASSERT_POINTER(args); ++ ++ MALI_CHECK_NO_ERROR(dump_mmu_registers(session_data->page_directory, &info)); ++ MALI_CHECK_NO_ERROR(dump_mmu_page_table(session_data->page_directory, &info)); ++ args->size = info.register_writes_size + info.page_table_dump_size; ++ MALI_SUCCESS; +} + -+u32 mali_pp_job_get_pp_counter_sub_job_src1(u32 sub_job) ++_mali_osk_errcode_t _mali_ukk_dump_mmu_page_table(_mali_uk_dump_mmu_page_table_s *args) +{ -+ MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS); -+ return pp_counter_per_sub_job_src1[sub_job]; ++ struct dump_info info = { 0, 0, 0, NULL }; ++ struct mali_session_data *session_data; ++ ++ MALI_DEBUG_ASSERT_POINTER(args); ++ ++ session_data = (struct mali_session_data *)(uintptr_t)(args->ctx); ++ MALI_DEBUG_ASSERT_POINTER(session_data); ++ ++ info.buffer_left = args->size; ++ info.buffer = (u32 *)(uintptr_t)args->buffer; ++ ++ args->register_writes = (uintptr_t)info.buffer; ++ MALI_CHECK_NO_ERROR(dump_mmu_registers(session_data->page_directory, &info)); ++ ++ args->page_table_dump = (uintptr_t)info.buffer; ++ MALI_CHECK_NO_ERROR(dump_mmu_page_table(session_data->page_directory, &info)); ++ ++ args->register_writes_size = info.register_writes_size; ++ args->page_table_dump_size = info.page_table_dump_size; ++ ++ MALI_SUCCESS; +} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pp_job.h b/drivers/gpu/arm/mali400/mali/common/mali_pp_job.h +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_mmu_page_directory.h b/drivers/gpu/arm/mali400/mali/common/mali_mmu_page_directory.h new file mode 100644 -index 000000000..d0331f398 +index 000000000..3fdf07210 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_pp_job.h -@@ -0,0 +1,594 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_mmu_page_directory.h +@@ -0,0 +1,110 @@ +/* -+ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2011-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -296666,3071 +299545,4116 @@ index 000000000..d0331f398 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_PP_JOB_H__ -+#define __MALI_PP_JOB_H__ ++#ifndef __MALI_MMU_PAGE_DIRECTORY_H__ ++#define __MALI_MMU_PAGE_DIRECTORY_H__ + +#include "mali_osk.h" -+#include "mali_osk_list.h" -+#include "mali_uk_types.h" -+#include "mali_session.h" -+#include "mali_kernel_common.h" -+#include "regs/mali_200_regs.h" -+#include "mali_kernel_core.h" -+#include "mali_dlbu.h" -+#include "mali_timeline.h" -+#include "mali_scheduler.h" -+#include "mali_executor.h" -+#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) -+#include "linux/mali_memory_dma_buf.h" -+#endif -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) -+#include "linux/mali_dma_fence.h" -+#endif -+ -+typedef enum pp_job_status { -+ MALI_NO_SWAP_IN, -+ MALI_SWAP_IN_FAIL, -+ MALI_SWAP_IN_SUCC, -+} pp_job_status; + +/** -+ * This structure represents a PP job, including all sub jobs. -+ * -+ * The PP job object itself is not protected by any single lock, -+ * but relies on other locks instead (scheduler, executor and timeline lock). -+ * Think of the job object as moving between these sub systems through-out -+ * its lifetime. Different part of the PP job struct is used by different -+ * subsystems. Accessor functions ensure that correct lock is taken. -+ * Do NOT access any data members directly from outside this module! ++ * Size of an MMU page in bytes + */ -+struct mali_pp_job { -+ /* -+ * These members are typically only set at creation, -+ * and only read later on. -+ * They do not require any lock protection. -+ */ -+ _mali_uk_pp_start_job_s uargs; /**< Arguments from user space */ -+ struct mali_session_data *session; /**< Session which submitted this job */ -+ u32 pid; /**< Process ID of submitting process */ -+ u32 tid; /**< Thread ID of submitting thread */ -+ u32 id; /**< Identifier for this job in kernel space (sequential numbering) */ -+ u32 cache_order; /**< Cache order used for L2 cache flushing (sequential numbering) */ -+ struct mali_timeline_tracker tracker; /**< Timeline tracker for this job */ -+ _mali_osk_notification_t *finished_notification; /**< Notification sent back to userspace on job complete */ -+ u32 perf_counter_per_sub_job_count; /**< Number of values in the two arrays which is != MALI_HW_CORE_NO_COUNTER */ -+ u32 perf_counter_per_sub_job_src0[_MALI_PP_MAX_SUB_JOBS]; /**< Per sub job counters src0 */ -+ u32 perf_counter_per_sub_job_src1[_MALI_PP_MAX_SUB_JOBS]; /**< Per sub job counters src1 */ -+ u32 sub_jobs_num; /**< Number of subjobs; set to 1 for Mali-450 if DLBU is used, otherwise equals number of PP cores */ -+ -+ pp_job_status swap_status; /**< Used to track each PP job swap status, if fail, we need to drop them in scheduler part */ -+ mali_bool user_notification; /**< When we deferred delete PP job, we need to judge if we need to send job finish notification to user space */ -+ u32 num_pp_cores_in_virtual; /**< How many PP cores we have when job finished */ ++#define MALI_MMU_PAGE_SIZE 0x1000 + -+ /* -+ * These members are used by both scheduler and executor. -+ * They are "protected" by atomic operations. -+ */ -+ _mali_osk_atomic_t sub_jobs_completed; /**< Number of completed sub-jobs in this superjob */ -+ _mali_osk_atomic_t sub_job_errors; /**< Bitfield with errors (errors for each single sub-job is or'ed together) */ ++/* ++ * Size of the address space referenced by a page table page ++ */ ++#define MALI_MMU_VIRTUAL_PAGE_SIZE 0x400000 /* 4 MiB */ + -+ /* -+ * These members are used by scheduler, but only when no one else -+ * knows about this job object but the working function. -+ * No lock is thus needed for these. -+ */ -+ u32 *memory_cookies; /**< Memory cookies attached to job */ ++/** ++ * Page directory index from address ++ * Calculates the page directory index from the given address ++ */ ++#define MALI_MMU_PDE_ENTRY(address) (((address)>>22) & 0x03FF) + -+ /* -+ * These members are used by the scheduler, -+ * protected by scheduler lock -+ */ -+ _mali_osk_list_t list; /**< Used to link jobs together in the scheduler queue */ -+ _mali_osk_list_t session_fb_lookup_list; /**< Used to link jobs together from the same frame builder in the session */ ++/** ++ * Page table index from address ++ * Calculates the page table index from the given address ++ */ ++#define MALI_MMU_PTE_ENTRY(address) (((address)>>12) & 0x03FF) + -+ u32 sub_jobs_started; /**< Total number of sub-jobs started (always started in ascending order) */ ++/** ++ * Extract the memory address from an PDE/PTE entry ++ */ ++#define MALI_MMU_ENTRY_ADDRESS(value) ((value) & 0xFFFFFC00) + -+ /* -+ * Set by executor/group on job completion, read by scheduler when -+ * returning job to user. Hold executor lock when setting, -+ * no lock needed when reading -+ */ -+ u32 perf_counter_value0[_MALI_PP_MAX_SUB_JOBS]; /**< Value of performance counter 0 (to be returned to user space), one for each sub job */ -+ u32 perf_counter_value1[_MALI_PP_MAX_SUB_JOBS]; /**< Value of performance counter 1 (to be returned to user space), one for each sub job */ ++#define MALI_INVALID_PAGE ((u32)(~0)) + -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) -+ struct mali_dma_fence_context dma_fence_context; /**< The mali dma fence context to record dma fence waiters that this job wait for */ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ struct dma_fence *rendered_dma_fence; /**< the new dma fence link to this job */ -+#else -+ struct fence *rendered_dma_fence; /**< the new dma fence link to this job */ -+#endif -+#endif -+}; ++/** ++ * ++ */ ++typedef enum mali_mmu_entry_flags { ++ MALI_MMU_FLAGS_PRESENT = 0x01, ++ MALI_MMU_FLAGS_READ_PERMISSION = 0x02, ++ MALI_MMU_FLAGS_WRITE_PERMISSION = 0x04, ++ MALI_MMU_FLAGS_OVERRIDE_CACHE = 0x8, ++ MALI_MMU_FLAGS_WRITE_CACHEABLE = 0x10, ++ MALI_MMU_FLAGS_WRITE_ALLOCATE = 0x20, ++ MALI_MMU_FLAGS_WRITE_BUFFERABLE = 0x40, ++ MALI_MMU_FLAGS_READ_CACHEABLE = 0x80, ++ MALI_MMU_FLAGS_READ_ALLOCATE = 0x100, ++ MALI_MMU_FLAGS_MASK = 0x1FF, ++} mali_mmu_entry_flags; + -+void mali_pp_job_initialize(void); -+void mali_pp_job_terminate(void); + -+struct mali_pp_job *mali_pp_job_create(struct mali_session_data *session, _mali_uk_pp_start_job_s *uargs, u32 id); -+void mali_pp_job_delete(struct mali_pp_job *job); ++#define MALI_MMU_FLAGS_FORCE_GP_READ_ALLOCATE ( \ ++ MALI_MMU_FLAGS_PRESENT | \ ++ MALI_MMU_FLAGS_READ_PERMISSION | \ ++ MALI_MMU_FLAGS_WRITE_PERMISSION | \ ++ MALI_MMU_FLAGS_OVERRIDE_CACHE | \ ++ MALI_MMU_FLAGS_WRITE_CACHEABLE | \ ++ MALI_MMU_FLAGS_WRITE_BUFFERABLE | \ ++ MALI_MMU_FLAGS_READ_CACHEABLE | \ ++ MALI_MMU_FLAGS_READ_ALLOCATE ) + -+u32 mali_pp_job_get_perf_counter_src0(struct mali_pp_job *job, u32 sub_job); -+u32 mali_pp_job_get_perf_counter_src1(struct mali_pp_job *job, u32 sub_job); ++#define MALI_MMU_FLAGS_DEFAULT ( \ ++ MALI_MMU_FLAGS_PRESENT | \ ++ MALI_MMU_FLAGS_READ_PERMISSION | \ ++ MALI_MMU_FLAGS_WRITE_PERMISSION ) + -+void mali_pp_job_set_pp_counter_global_src0(u32 counter); -+void mali_pp_job_set_pp_counter_global_src1(u32 counter); -+void mali_pp_job_set_pp_counter_sub_job_src0(u32 sub_job, u32 counter); -+void mali_pp_job_set_pp_counter_sub_job_src1(u32 sub_job, u32 counter); + -+u32 mali_pp_job_get_pp_counter_global_src0(void); -+u32 mali_pp_job_get_pp_counter_global_src1(void); -+u32 mali_pp_job_get_pp_counter_sub_job_src0(u32 sub_job); -+u32 mali_pp_job_get_pp_counter_sub_job_src1(u32 sub_job); ++struct mali_page_directory { ++ u32 page_directory; /**< Physical address of the memory session's page directory */ ++ mali_io_address page_directory_mapped; /**< Pointer to the mapped version of the page directory into the kernel's address space */ + -+MALI_STATIC_INLINE u32 mali_pp_job_get_id(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return (NULL == job) ? 0 : job->id; -+} ++ mali_io_address page_entries_mapped[1024]; /**< Pointers to the page tables which exists in the page directory mapped into the kernel's address space */ ++ u32 page_entries_usage_count[1024]; /**< Tracks usage count of the page table pages, so they can be releases on the last reference */ ++}; + -+MALI_STATIC_INLINE void mali_pp_job_set_cache_order(struct mali_pp_job *job, -+ u32 cache_order) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); -+ job->cache_order = cache_order; -+} ++/* Map Mali virtual address space (i.e. ensure page tables exist for the virtual range) */ ++_mali_osk_errcode_t mali_mmu_pagedir_map(struct mali_page_directory *pagedir, u32 mali_address, u32 size); ++_mali_osk_errcode_t mali_mmu_pagedir_unmap(struct mali_page_directory *pagedir, u32 mali_address, u32 size); + -+MALI_STATIC_INLINE u32 mali_pp_job_get_cache_order(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return (NULL == job) ? 0 : job->cache_order; -+} ++/* Back virtual address space with actual pages. Assumes input is contiguous and 4k aligned. */ ++void mali_mmu_pagedir_update(struct mali_page_directory *pagedir, u32 mali_address, ++ mali_dma_addr phys_address, u32 size, u32 permission_bits); + -+MALI_STATIC_INLINE u64 mali_pp_job_get_user_id(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.user_job_ptr; -+} ++u32 mali_allocate_empty_page(mali_io_address *virtual); ++void mali_free_empty_page(mali_dma_addr address, mali_io_address virt_addr); ++_mali_osk_errcode_t mali_create_fault_flush_pages(mali_dma_addr *page_directory, ++ mali_io_address *page_directory_mapping, ++ mali_dma_addr *page_table, mali_io_address *page_table_mapping, ++ mali_dma_addr *data_page, mali_io_address *data_page_mapping); ++void mali_destroy_fault_flush_pages( ++ mali_dma_addr *page_directory, mali_io_address *page_directory_mapping, ++ mali_dma_addr *page_table, mali_io_address *page_table_mapping, ++ mali_dma_addr *data_page, mali_io_address *data_page_mapping); + -+MALI_STATIC_INLINE u32 mali_pp_job_get_frame_builder_id(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.frame_builder_id; -+} ++struct mali_page_directory *mali_mmu_pagedir_alloc(void); ++void mali_mmu_pagedir_free(struct mali_page_directory *pagedir); + -+MALI_STATIC_INLINE u32 mali_pp_job_get_flush_id(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.flush_id; -+} ++void mali_mmu_pagedir_diag(struct mali_page_directory *pagedir, u32 fault_addr); + -+MALI_STATIC_INLINE u32 mali_pp_job_get_pid(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->pid; -+} ++#endif /* __MALI_MMU_PAGE_DIRECTORY_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_osk.h b/drivers/gpu/arm/mali400/mali/common/mali_osk.h +new file mode 100644 +index 000000000..9ade362d6 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_osk.h +@@ -0,0 +1,1389 @@ ++/* ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+MALI_STATIC_INLINE u32 mali_pp_job_get_tid(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->tid; -+} ++/** ++ * @file mali_osk.h ++ * Defines the OS abstraction layer for the kernel device driver (OSK) ++ */ + -+MALI_STATIC_INLINE u32 *mali_pp_job_get_frame_registers(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.frame_registers; -+} ++#ifndef __MALI_OSK_H__ ++#define __MALI_OSK_H__ + -+MALI_STATIC_INLINE u32 *mali_pp_job_get_dlbu_registers(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.dlbu_registers; -+} ++#include ++#include "mali_osk_types.h" ++#include "mali_osk_specific.h" /* include any per-os specifics */ ++#include "mali_osk_locks.h" + -+MALI_STATIC_INLINE mali_bool mali_pp_job_is_virtual(struct mali_pp_job *job) -+{ -+#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return (0 == job->uargs.num_cores) ? MALI_TRUE : MALI_FALSE; -+#else -+ return MALI_FALSE; ++#ifdef __cplusplus ++extern "C" { +#endif -+} + -+MALI_STATIC_INLINE u32 mali_pp_job_get_addr_frame(struct mali_pp_job *job, u32 sub_job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); ++/** ++ * @addtogroup uddapi Unified Device Driver (UDD) APIs ++ * ++ * @{ ++ */ + -+ if (mali_pp_job_is_virtual(job)) { -+ return MALI_DLBU_VIRT_ADDR; -+ } else if (0 == sub_job) { -+ return job->uargs.frame_registers[MALI200_REG_ADDR_FRAME / sizeof(u32)]; -+ } else if (sub_job < _MALI_PP_MAX_SUB_JOBS) { -+ return job->uargs.frame_registers_addr_frame[sub_job - 1]; -+ } ++/** ++ * @addtogroup oskapi UDD OS Abstraction for Kernel-side (OSK) APIs ++ * ++ * @{ ++ */ + -+ return 0; -+} ++/** @addtogroup _mali_osk_lock OSK Mutual Exclusion Locks ++ * @{ */ + -+MALI_STATIC_INLINE u32 mali_pp_job_get_addr_stack(struct mali_pp_job *job, u32 sub_job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); ++#ifdef DEBUG ++/** @brief Macro for asserting that the current thread holds a given lock ++ */ ++#define MALI_DEBUG_ASSERT_LOCK_HELD(l) MALI_DEBUG_ASSERT(_mali_osk_lock_get_owner((_mali_osk_lock_debug_t *)l) == _mali_osk_get_tid()); + -+ if (0 == sub_job) { -+ return job->uargs.frame_registers[MALI200_REG_ADDR_STACK / sizeof(u32)]; -+ } else if (sub_job < _MALI_PP_MAX_SUB_JOBS) { -+ return job->uargs.frame_registers_addr_stack[sub_job - 1]; -+ } ++/** @brief returns a lock's owner (thread id) if debugging is enabled ++ */ ++#else ++#define MALI_DEBUG_ASSERT_LOCK_HELD(l) do {} while(0) ++#endif + -+ return 0; -+} ++#define _mali_osk_ctxprintf seq_printf + -+void mali_pp_job_list_add(struct mali_pp_job *job, _mali_osk_list_t *list); ++/** @} */ /* end group _mali_osk_lock */ + -+MALI_STATIC_INLINE void mali_pp_job_list_addtail(struct mali_pp_job *job, -+ _mali_osk_list_t *list) -+{ -+ _mali_osk_list_addtail(&job->list, list); -+} ++/** @addtogroup _mali_osk_miscellaneous ++ * @{ */ + -+MALI_STATIC_INLINE void mali_pp_job_list_move(struct mali_pp_job *job, -+ _mali_osk_list_t *list) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); -+ MALI_DEBUG_ASSERT(!_mali_osk_list_empty(&job->list)); -+ _mali_osk_list_move(&job->list, list); -+} ++/** @brief Find the containing structure of another structure ++ * ++ * This is the reverse of the operation 'offsetof'. This means that the ++ * following condition is satisfied: ++ * ++ * ptr == _MALI_OSK_CONTAINER_OF( &ptr->member, type, member ) ++ * ++ * When ptr is of type 'type'. ++ * ++ * Its purpose it to recover a larger structure that has wrapped a smaller one. ++ * ++ * @note no type or memory checking occurs to ensure that a wrapper structure ++ * does in fact exist, and that it is being recovered with respect to the ++ * correct member. ++ * ++ * @param ptr the pointer to the member that is contained within the larger ++ * structure ++ * @param type the type of the structure that contains the member ++ * @param member the name of the member in the structure that ptr points to. ++ * @return a pointer to a \a type object which contains \a member, as pointed ++ * to by \a ptr. ++ */ ++#define _MALI_OSK_CONTAINER_OF(ptr, type, member) \ ++ ((type *)( ((char *)ptr) - offsetof(type,member) )) + -+MALI_STATIC_INLINE void mali_pp_job_list_remove(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); -+ _mali_osk_list_delinit(&job->list); -+} ++/** @addtogroup _mali_osk_wq ++ * @{ */ + -+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb0_registers(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.wb0_registers; -+} ++/** @brief Initialize work queues (for deferred work) ++ * ++ * @return _MALI_OSK_ERR_OK on success, otherwise failure. ++ */ ++_mali_osk_errcode_t _mali_osk_wq_init(void); + -+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb1_registers(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.wb1_registers; -+} ++/** @brief Terminate work queues (for deferred work) ++ */ ++void _mali_osk_wq_term(void); + -+MALI_STATIC_INLINE u32 *mali_pp_job_get_wb2_registers(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.wb2_registers; -+} ++/** @brief Create work in the work queue ++ * ++ * Creates a work object which can be scheduled in the work queue. When ++ * scheduled, \a handler will be called with \a data as the argument. ++ * ++ * Refer to \ref _mali_osk_wq_schedule_work() for details on how work ++ * is scheduled in the queue. ++ * ++ * The returned pointer must be freed with \ref _mali_osk_wq_delete_work() ++ * when no longer needed. ++ */ ++_mali_osk_wq_work_t *_mali_osk_wq_create_work(_mali_osk_wq_work_handler_t handler, void *data); + -+MALI_STATIC_INLINE u32 mali_pp_job_get_wb0_source_addr(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)]; -+} ++/** @brief A high priority version of \a _mali_osk_wq_create_work() ++ * ++ * Creates a work object which can be scheduled in the high priority work queue. ++ * ++ * This is unfortunately needed to get low latency scheduling of the Mali cores. Normally we would ++ * schedule the next job in hw_irq or tasklet, but often we can't since we need to synchronously map ++ * and unmap shared memory when a job is connected to external fences (timelines). And this requires ++ * taking a mutex. ++ * ++ * We do signal a lot of other (low priority) work also as part of the job being finished, and if we ++ * don't set this Mali scheduling thread as high priority, we see that the CPU scheduler often runs ++ * random things instead of starting the next GPU job when the GPU is idle. So setting the gpu ++ * scheduler to high priority does give a visually more responsive system. ++ * ++ * Start the high priority work with: \a _mali_osk_wq_schedule_work_high_pri() ++ */ ++_mali_osk_wq_work_t *_mali_osk_wq_create_work_high_pri(_mali_osk_wq_work_handler_t handler, void *data); + -+MALI_STATIC_INLINE u32 mali_pp_job_get_wb1_source_addr(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)]; -+} ++/** @brief Delete a work object ++ * ++ * This will flush the work queue to ensure that the work handler will not ++ * be called after deletion. ++ */ ++void _mali_osk_wq_delete_work(_mali_osk_wq_work_t *work); + -+MALI_STATIC_INLINE u32 mali_pp_job_get_wb2_source_addr(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)]; -+} ++/** @brief Delete a work object ++ * ++ * This will NOT flush the work queue, so only call this if you are sure that the work handler will ++ * not be called after deletion. ++ */ ++void _mali_osk_wq_delete_work_nonflush(_mali_osk_wq_work_t *work); + -+MALI_STATIC_INLINE void mali_pp_job_disable_wb0(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0; -+} ++/** @brief Cause a queued, deferred call of the work handler ++ * ++ * _mali_osk_wq_schedule_work provides a mechanism for enqueuing deferred calls ++ * to the work handler. After calling \ref _mali_osk_wq_schedule_work(), the ++ * work handler will be scheduled to run at some point in the future. ++ * ++ * Typically this is called by the IRQ upper-half to defer further processing of ++ * IRQ-related work to the IRQ bottom-half handler. This is necessary for work ++ * that cannot be done in an IRQ context by the IRQ upper-half handler. Timer ++ * callbacks also use this mechanism, because they are treated as though they ++ * operate in an IRQ context. Refer to \ref _mali_osk_timer_t for more ++ * information. ++ * ++ * Code that operates in a kernel-process context (with no IRQ context ++ * restrictions) may also enqueue deferred calls to the IRQ bottom-half. The ++ * advantage over direct calling is that deferred calling allows the caller and ++ * IRQ bottom half to hold the same mutex, with a guarantee that they will not ++ * deadlock just by using this mechanism. ++ * ++ * _mali_osk_wq_schedule_work() places deferred call requests on a queue, to ++ * allow for more than one thread to make a deferred call. Therfore, if it is ++ * called 'K' times, then the IRQ bottom-half will be scheduled 'K' times too. ++ * 'K' is a number that is implementation-specific. ++ * ++ * _mali_osk_wq_schedule_work() is guaranteed to not block on: ++ * - enqueuing a deferred call request. ++ * - the completion of the work handler. ++ * ++ * This is to prevent deadlock. For example, if _mali_osk_wq_schedule_work() ++ * blocked, then it would cause a deadlock when the following two conditions ++ * hold: ++ * - The work handler callback (of type _mali_osk_wq_work_handler_t) locks ++ * a mutex ++ * - And, at the same time, the caller of _mali_osk_wq_schedule_work() also ++ * holds the same mutex ++ * ++ * @note care must be taken to not overflow the queue that ++ * _mali_osk_wq_schedule_work() operates on. Code must be structured to ++ * ensure that the number of requests made to the queue is bounded. Otherwise, ++ * work will be lost. ++ * ++ * The queue that _mali_osk_wq_schedule_work implements is a FIFO of N-writer, ++ * 1-reader type. The writers are the callers of _mali_osk_wq_schedule_work ++ * (all OSK-registered IRQ upper-half handlers in the system, watchdog timers, ++ * callers from a Kernel-process context). The reader is a single thread that ++ * handles all OSK-registered work. ++ * ++ * @param work a pointer to the _mali_osk_wq_work_t object corresponding to the ++ * work to begin processing. ++ */ ++void _mali_osk_wq_schedule_work(_mali_osk_wq_work_t *work); + -+MALI_STATIC_INLINE void mali_pp_job_disable_wb1(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0; -+} ++/** @brief Cause a queued, deferred call of the high priority work handler ++ * ++ * Function is the same as \a _mali_osk_wq_schedule_work() with the only ++ * difference that it runs in a high (real time) priority on the system. ++ * ++ * Should only be used as a substitue for doing the same work in interrupts. ++ * ++ * This is allowed to sleep, but the work should be small since it will block ++ * all other applications. ++*/ ++void _mali_osk_wq_schedule_work_high_pri(_mali_osk_wq_work_t *work); + -+MALI_STATIC_INLINE void mali_pp_job_disable_wb2(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0; -+} ++/** @brief Flush the work queue ++ * ++ * This will flush the OSK work queue, ensuring all work in the queue has ++ * completed before returning. ++ * ++ * Since this blocks on the completion of work in the work-queue, the ++ * caller of this function \b must \b not hold any mutexes that are taken by ++ * any registered work handler. To do so may cause a deadlock. ++ * ++ */ ++void _mali_osk_wq_flush(void); + -+MALI_STATIC_INLINE mali_bool mali_pp_job_all_writeback_unit_disabled(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); ++/** @brief Create work in the delayed work queue ++ * ++ * Creates a work object which can be scheduled in the work queue. When ++ * scheduled, a timer will be start and the \a handler will be called with ++ * \a data as the argument when timer out ++ * ++ * Refer to \ref _mali_osk_wq_delayed_schedule_work() for details on how work ++ * is scheduled in the queue. ++ * ++ * The returned pointer must be freed with \ref _mali_osk_wq_delayed_delete_work_nonflush() ++ * when no longer needed. ++ */ ++_mali_osk_wq_delayed_work_t *_mali_osk_wq_delayed_create_work(_mali_osk_wq_work_handler_t handler, void *data); + -+ if (job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] || -+ job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] || -+ job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] -+ ) { -+ /* At least one output unit active */ -+ return MALI_FALSE; -+ } ++/** @brief Delete a work object ++ * ++ * This will NOT flush the work queue, so only call this if you are sure that the work handler will ++ * not be called after deletion. ++ */ ++void _mali_osk_wq_delayed_delete_work_nonflush(_mali_osk_wq_delayed_work_t *work); + -+ /* All outputs are disabled - we can abort the job */ -+ return MALI_TRUE; -+} ++/** @brief Cancel a delayed work without waiting for it to finish ++ * ++ * Note that the \a work callback function may still be running on return from ++ * _mali_osk_wq_delayed_cancel_work_async(). ++ * ++ * @param work The delayed work to be cancelled ++ */ ++void _mali_osk_wq_delayed_cancel_work_async(_mali_osk_wq_delayed_work_t *work); + -+MALI_STATIC_INLINE void mali_pp_job_fb_lookup_add(struct mali_pp_job *job) -+{ -+ u32 fb_lookup_id; ++/** @brief Cancel a delayed work and wait for it to finish ++ * ++ * When this function returns, the \a work was either cancelled or it finished running. ++ * ++ * @param work The delayed work to be cancelled ++ */ ++void _mali_osk_wq_delayed_cancel_work_sync(_mali_osk_wq_delayed_work_t *work); + -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++/** @brief Put \a work task in global workqueue after delay ++ * ++ * After waiting for a given time this puts a job in the kernel-global ++ * workqueue. ++ * ++ * If \a work was already on a queue, this function will return without doing anything ++ * ++ * @param work job to be done ++ * @param delay number of jiffies to wait or 0 for immediate execution ++ */ ++void _mali_osk_wq_delayed_schedule_work(_mali_osk_wq_delayed_work_t *work, u32 delay); + -+ fb_lookup_id = MALI_PP_JOB_FB_LOOKUP_LIST_MASK & job->uargs.frame_builder_id; ++/** @} */ /* end group _mali_osk_wq */ + -+ MALI_DEBUG_ASSERT(MALI_PP_JOB_FB_LOOKUP_LIST_SIZE > fb_lookup_id); + -+ _mali_osk_list_addtail(&job->session_fb_lookup_list, -+ &job->session->pp_job_fb_lookup_list[fb_lookup_id]); -+} ++/** @addtogroup _mali_osk_irq ++ * @{ */ + -+MALI_STATIC_INLINE void mali_pp_job_fb_lookup_remove(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); -+ _mali_osk_list_delinit(&job->session_fb_lookup_list); -+} ++/** @brief Initialize IRQ handling for a resource ++ * ++ * Registers an interrupt handler \a uhandler for the given IRQ number \a irqnum. ++ * \a data will be passed as argument to the handler when an interrupt occurs. ++ * ++ * If \a irqnum is -1, _mali_osk_irq_init will probe for the IRQ number using ++ * the supplied \a trigger_func and \a ack_func. These functions will also ++ * receive \a data as their argument. ++ * ++ * @param irqnum The IRQ number that the resource uses, as seen by the CPU. ++ * The value -1 has a special meaning which indicates the use of probing, and ++ * trigger_func and ack_func must be non-NULL. ++ * @param uhandler The interrupt handler, corresponding to a ISR handler for ++ * the resource ++ * @param int_data resource specific data, which will be passed to uhandler ++ * @param trigger_func Optional: a function to trigger the resource's irq, to ++ * probe for the interrupt. Use NULL if irqnum != -1. ++ * @param ack_func Optional: a function to acknowledge the resource's irq, to ++ * probe for the interrupt. Use NULL if irqnum != -1. ++ * @param probe_data resource-specific data, which will be passed to ++ * (if present) trigger_func and ack_func ++ * @param description textual description of the IRQ resource. ++ * @return on success, a pointer to a _mali_osk_irq_t object, which represents ++ * the IRQ handling on this resource. NULL on failure. ++ */ ++_mali_osk_irq_t *_mali_osk_irq_init(u32 irqnum, _mali_osk_irq_uhandler_t uhandler, void *int_data, _mali_osk_irq_trigger_t trigger_func, _mali_osk_irq_ack_t ack_func, void *probe_data, const char *description); + -+MALI_STATIC_INLINE struct mali_session_data *mali_pp_job_get_session(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->session; -+} ++/** @brief Terminate IRQ handling on a resource. ++ * ++ * This will disable the interrupt from the device, and then waits for any ++ * currently executing IRQ handlers to complete. ++ * ++ * @note If work is deferred to an IRQ bottom-half handler through ++ * \ref _mali_osk_wq_schedule_work(), be sure to flush any remaining work ++ * with \ref _mali_osk_wq_flush() or (implicitly) with \ref _mali_osk_wq_delete_work() ++ * ++ * @param irq a pointer to the _mali_osk_irq_t object corresponding to the ++ * resource whose IRQ handling is to be terminated. ++ */ ++void _mali_osk_irq_term(_mali_osk_irq_t *irq); + -+MALI_STATIC_INLINE mali_bool mali_pp_job_has_started_sub_jobs(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); -+ return (0 < job->sub_jobs_started) ? MALI_TRUE : MALI_FALSE; -+} ++/** @} */ /* end group _mali_osk_irq */ + -+MALI_STATIC_INLINE mali_bool mali_pp_job_has_unstarted_sub_jobs(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); -+ return (job->sub_jobs_started < job->sub_jobs_num) ? MALI_TRUE : MALI_FALSE; -+} + -+/* Function used when we are terminating a session with jobs. Return TRUE if it has a rendering job. -+ Makes sure that no new subjobs are started. */ -+MALI_STATIC_INLINE void mali_pp_job_mark_unstarted_failed(struct mali_pp_job *job) -+{ -+ u32 jobs_remaining; -+ u32 i; ++/** @addtogroup _mali_osk_atomic ++ * @{ */ + -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++/** @brief Decrement an atomic counter ++ * ++ * @note It is an error to decrement the counter beyond -(1<<23) ++ * ++ * @param atom pointer to an atomic counter */ ++void _mali_osk_atomic_dec(_mali_osk_atomic_t *atom); + -+ jobs_remaining = job->sub_jobs_num - job->sub_jobs_started; -+ job->sub_jobs_started += jobs_remaining; ++/** @brief Decrement an atomic counter, return new value ++ * ++ * @param atom pointer to an atomic counter ++ * @return The new value, after decrement */ ++u32 _mali_osk_atomic_dec_return(_mali_osk_atomic_t *atom); + -+ /* Not the most optimal way, but this is only used in error cases */ -+ for (i = 0; i < jobs_remaining; i++) { -+ _mali_osk_atomic_inc(&job->sub_jobs_completed); -+ _mali_osk_atomic_inc(&job->sub_job_errors); -+ } -+} ++/** @brief Increment an atomic counter ++ * ++ * @note It is an error to increment the counter beyond (1<<23)-1 ++ * ++ * @param atom pointer to an atomic counter */ ++void _mali_osk_atomic_inc(_mali_osk_atomic_t *atom); + -+MALI_STATIC_INLINE mali_bool mali_pp_job_is_complete(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return (job->sub_jobs_num == -+ _mali_osk_atomic_read(&job->sub_jobs_completed)) ? -+ MALI_TRUE : MALI_FALSE; -+} ++/** @brief Increment an atomic counter, return new value ++ * ++ * @param atom pointer to an atomic counter */ ++u32 _mali_osk_atomic_inc_return(_mali_osk_atomic_t *atom); + -+MALI_STATIC_INLINE u32 mali_pp_job_get_first_unstarted_sub_job(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); -+ return job->sub_jobs_started; -+} ++/** @brief Initialize an atomic counter ++ * ++ * @note the parameter required is a u32, and so signed integers should be ++ * cast to u32. ++ * ++ * @param atom pointer to an atomic counter ++ * @param val the value to initialize the atomic counter. ++ */ ++void _mali_osk_atomic_init(_mali_osk_atomic_t *atom, u32 val); + -+MALI_STATIC_INLINE u32 mali_pp_job_get_sub_job_count(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->sub_jobs_num; -+} ++/** @brief Read a value from an atomic counter ++ * ++ * This can only be safely used to determine the value of the counter when it ++ * is guaranteed that other threads will not be modifying the counter. This ++ * makes its usefulness limited. ++ * ++ * @param atom pointer to an atomic counter ++ */ ++u32 _mali_osk_atomic_read(_mali_osk_atomic_t *atom); + -+MALI_STATIC_INLINE u32 mali_pp_job_unstarted_sub_job_count(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); -+ MALI_DEBUG_ASSERT(job->sub_jobs_num >= job->sub_jobs_started); -+ return (job->sub_jobs_num - job->sub_jobs_started); -+} ++/** @brief Terminate an atomic counter ++ * ++ * @param atom pointer to an atomic counter ++ */ ++void _mali_osk_atomic_term(_mali_osk_atomic_t *atom); + -+MALI_STATIC_INLINE u32 mali_pp_job_num_memory_cookies(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.num_memory_cookies; -+} ++/** @brief Assign a new val to atomic counter, and return the old atomic counter ++ * ++ * @param atom pointer to an atomic counter ++ * @param val the new value assign to the atomic counter ++ * @return the old value of the atomic counter ++ */ ++u32 _mali_osk_atomic_xchg(_mali_osk_atomic_t *atom, u32 val); ++/** @} */ /* end group _mali_osk_atomic */ + -+MALI_STATIC_INLINE u32 mali_pp_job_get_memory_cookie( -+ struct mali_pp_job *job, u32 index) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT(index < job->uargs.num_memory_cookies); -+ MALI_DEBUG_ASSERT_POINTER(job->memory_cookies); -+ return job->memory_cookies[index]; -+} + -+MALI_STATIC_INLINE mali_bool mali_pp_job_needs_dma_buf_mapping(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); ++/** @defgroup _mali_osk_memory OSK Memory Allocation ++ * @{ */ + -+ if (0 < job->uargs.num_memory_cookies) { -+ return MALI_TRUE; -+ } ++/** @brief Allocate zero-initialized memory. ++ * ++ * Returns a buffer capable of containing at least \a n elements of \a size ++ * bytes each. The buffer is initialized to zero. ++ * ++ * If there is a need for a bigger block of memory (16KB or bigger), then ++ * consider to use _mali_osk_vmalloc() instead, as this function might ++ * map down to a OS function with size limitations. ++ * ++ * The buffer is suitably aligned for storage and subsequent access of every ++ * type that the compiler supports. Therefore, the pointer to the start of the ++ * buffer may be cast into any pointer type, and be subsequently accessed from ++ * such a pointer, without loss of information. ++ * ++ * When the buffer is no longer in use, it must be freed with _mali_osk_free(). ++ * Failure to do so will cause a memory leak. ++ * ++ * @note Most toolchains supply memory allocation functions that meet the ++ * compiler's alignment requirements. ++ * ++ * @param n Number of elements to allocate ++ * @param size Size of each element ++ * @return On success, the zero-initialized buffer allocated. NULL on failure ++ */ ++void *_mali_osk_calloc(u32 n, u32 size); + -+ return MALI_FALSE; -+} ++/** @brief Allocate memory. ++ * ++ * Returns a buffer capable of containing at least \a size bytes. The ++ * contents of the buffer are undefined. ++ * ++ * If there is a need for a bigger block of memory (16KB or bigger), then ++ * consider to use _mali_osk_vmalloc() instead, as this function might ++ * map down to a OS function with size limitations. ++ * ++ * The buffer is suitably aligned for storage and subsequent access of every ++ * type that the compiler supports. Therefore, the pointer to the start of the ++ * buffer may be cast into any pointer type, and be subsequently accessed from ++ * such a pointer, without loss of information. ++ * ++ * When the buffer is no longer in use, it must be freed with _mali_osk_free(). ++ * Failure to do so will cause a memory leak. ++ * ++ * @note Most toolchains supply memory allocation functions that meet the ++ * compiler's alignment requirements. ++ * ++ * Remember to free memory using _mali_osk_free(). ++ * @param size Number of bytes to allocate ++ * @return On success, the buffer allocated. NULL on failure. ++ */ ++void *_mali_osk_malloc(u32 size); + -+MALI_STATIC_INLINE void mali_pp_job_mark_sub_job_started(struct mali_pp_job *job, u32 sub_job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++/** @brief Free memory. ++ * ++ * Reclaims the buffer pointed to by the parameter \a ptr for the system. ++ * All memory returned from _mali_osk_malloc() and _mali_osk_calloc() ++ * must be freed before the application exits. Otherwise, ++ * a memory leak will occur. ++ * ++ * Memory must be freed once. It is an error to free the same non-NULL pointer ++ * more than once. ++ * ++ * It is legal to free the NULL pointer. ++ * ++ * @param ptr Pointer to buffer to free ++ */ ++void _mali_osk_free(void *ptr); + -+ /* Assert that we are marking the "first unstarted sub job" as started */ -+ MALI_DEBUG_ASSERT(job->sub_jobs_started == sub_job); ++/** @brief Allocate memory. ++ * ++ * Returns a buffer capable of containing at least \a size bytes. The ++ * contents of the buffer are undefined. ++ * ++ * This function is potentially slower than _mali_osk_malloc() and _mali_osk_calloc(), ++ * but do support bigger sizes. ++ * ++ * The buffer is suitably aligned for storage and subsequent access of every ++ * type that the compiler supports. Therefore, the pointer to the start of the ++ * buffer may be cast into any pointer type, and be subsequently accessed from ++ * such a pointer, without loss of information. ++ * ++ * When the buffer is no longer in use, it must be freed with _mali_osk_free(). ++ * Failure to do so will cause a memory leak. ++ * ++ * @note Most toolchains supply memory allocation functions that meet the ++ * compiler's alignment requirements. ++ * ++ * Remember to free memory using _mali_osk_free(). ++ * @param size Number of bytes to allocate ++ * @return On success, the buffer allocated. NULL on failure. ++ */ ++void *_mali_osk_valloc(u32 size); + -+ job->sub_jobs_started++; -+} ++/** @brief Free memory. ++ * ++ * Reclaims the buffer pointed to by the parameter \a ptr for the system. ++ * All memory returned from _mali_osk_valloc() must be freed before the ++ * application exits. Otherwise a memory leak will occur. ++ * ++ * Memory must be freed once. It is an error to free the same non-NULL pointer ++ * more than once. ++ * ++ * It is legal to free the NULL pointer. ++ * ++ * @param ptr Pointer to buffer to free ++ */ ++void _mali_osk_vfree(void *ptr); + -+MALI_STATIC_INLINE void mali_pp_job_mark_sub_job_completed(struct mali_pp_job *job, mali_bool success) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); ++/** @brief Copies memory. ++ * ++ * Copies the \a len bytes from the buffer pointed by the parameter \a src ++ * directly to the buffer pointed by \a dst. ++ * ++ * It is an error for \a src to overlap \a dst anywhere in \a len bytes. ++ * ++ * @param dst Pointer to the destination array where the content is to be ++ * copied. ++ * @param src Pointer to the source of data to be copied. ++ * @param len Number of bytes to copy. ++ * @return \a dst is always passed through unmodified. ++ */ ++void *_mali_osk_memcpy(void *dst, const void *src, u32 len); + -+ _mali_osk_atomic_inc(&job->sub_jobs_completed); -+ if (MALI_FALSE == success) { -+ _mali_osk_atomic_inc(&job->sub_job_errors); -+ } -+} ++/** @brief Fills memory. ++ * ++ * Sets the first \a n bytes of the block of memory pointed to by \a s to ++ * the specified value ++ * @param s Pointer to the block of memory to fill. ++ * @param c Value to be set, passed as u32. Only the 8 Least Significant Bits (LSB) ++ * are used. ++ * @param n Number of bytes to be set to the value. ++ * @return \a s is always passed through unmodified ++ */ ++void *_mali_osk_memset(void *s, u32 c, u32 n); ++/** @} */ /* end group _mali_osk_memory */ + -+MALI_STATIC_INLINE mali_bool mali_pp_job_was_success(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ if (0 == _mali_osk_atomic_read(&job->sub_job_errors)) { -+ return MALI_TRUE; -+ } -+ return MALI_FALSE; -+} + -+MALI_STATIC_INLINE mali_bool mali_pp_job_use_no_notification( -+ struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return (job->uargs.flags & _MALI_PP_JOB_FLAG_NO_NOTIFICATION) ? -+ MALI_TRUE : MALI_FALSE; -+} ++/** @brief Checks the amount of memory allocated ++ * ++ * Checks that not more than \a max_allocated bytes are allocated. ++ * ++ * Some OS bring up an interactive out of memory dialogue when the ++ * system runs out of memory. This can stall non-interactive ++ * apps (e.g. automated test runs). This function can be used to ++ * not trigger the OOM dialogue by keeping allocations ++ * within a certain limit. ++ * ++ * @return MALI_TRUE when \a max_allocated bytes are not in use yet. MALI_FALSE ++ * when at least \a max_allocated bytes are in use. ++ */ ++mali_bool _mali_osk_mem_check_allocated(u32 max_allocated); + -+MALI_STATIC_INLINE mali_bool mali_pp_job_is_pilot_job(struct mali_pp_job *job) -+{ -+ /* -+ * A pilot job is currently identified as jobs which -+ * require no callback notification. -+ */ -+ return mali_pp_job_use_no_notification(job); -+} + -+MALI_STATIC_INLINE _mali_osk_notification_t * -+mali_pp_job_get_finished_notification(struct mali_pp_job *job) -+{ -+ _mali_osk_notification_t *notification; ++/** @addtogroup _mali_osk_low_level_memory ++ * @{ */ + -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_POINTER(job->finished_notification); ++/** @brief Issue a memory barrier ++ * ++ * This defines an arbitrary memory barrier operation, which forces an ordering constraint ++ * on memory read and write operations. ++ */ ++void _mali_osk_mem_barrier(void); + -+ notification = job->finished_notification; -+ job->finished_notification = NULL; ++/** @brief Issue a write memory barrier ++ * ++ * This defines an write memory barrier operation which forces an ordering constraint ++ * on memory write operations. ++ */ ++void _mali_osk_write_mem_barrier(void); + -+ return notification; -+} ++/** @brief Map a physically contiguous region into kernel space ++ * ++ * This is primarily used for mapping in registers from resources, and Mali-MMU ++ * page tables. The mapping is only visable from kernel-space. ++ * ++ * Access has to go through _mali_osk_mem_ioread32 and _mali_osk_mem_iowrite32 ++ * ++ * @param phys CPU-physical base address of the memory to map in. This must ++ * be aligned to the system's page size, which is assumed to be 4K. ++ * @param size the number of bytes of physically contiguous address space to ++ * map in ++ * @param description A textual description of the memory being mapped in. ++ * @return On success, a Mali IO address through which the mapped-in ++ * memory/registers can be accessed. NULL on failure. ++ */ ++mali_io_address _mali_osk_mem_mapioregion(uintptr_t phys, u32 size, const char *description); + -+MALI_STATIC_INLINE mali_bool mali_pp_job_is_window_surface( -+ struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return (job->uargs.flags & _MALI_PP_JOB_FLAG_IS_WINDOW_SURFACE) -+ ? MALI_TRUE : MALI_FALSE; -+} ++/** @brief Unmap a physically contiguous address range from kernel space. ++ * ++ * The address range should be one previously mapped in through ++ * _mali_osk_mem_mapioregion. ++ * ++ * It is a programming error to do (but not limited to) the following: ++ * - attempt an unmap twice ++ * - unmap only part of a range obtained through _mali_osk_mem_mapioregion ++ * - unmap more than the range obtained through _mali_osk_mem_mapioregion ++ * - unmap an address range that was not successfully mapped using ++ * _mali_osk_mem_mapioregion ++ * - provide a mapping that does not map to phys. ++ * ++ * @param phys CPU-physical base address of the memory that was originally ++ * mapped in. This must be aligned to the system's page size, which is assumed ++ * to be 4K ++ * @param size The number of bytes that were originally mapped in. ++ * @param mapping The Mali IO address through which the mapping is ++ * accessed. ++ */ ++void _mali_osk_mem_unmapioregion(uintptr_t phys, u32 size, mali_io_address mapping); + -+MALI_STATIC_INLINE mali_bool mali_pp_job_is_protected_job(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return (job->uargs.flags & _MALI_PP_JOB_FLAG_PROTECTED) -+ ? MALI_TRUE : MALI_FALSE; -+} ++/** @brief Allocate and Map a physically contiguous region into kernel space ++ * ++ * This is used for allocating physically contiguous regions (such as Mali-MMU ++ * page tables) and mapping them into kernel space. The mapping is only ++ * visible from kernel-space. ++ * ++ * The alignment of the returned memory is guaranteed to be at least ++ * _MALI_OSK_CPU_PAGE_SIZE. ++ * ++ * Access must go through _mali_osk_mem_ioread32 and _mali_osk_mem_iowrite32 ++ * ++ * @note This function is primarily to provide support for OSs that are ++ * incapable of separating the tasks 'allocate physically contiguous memory' ++ * and 'map it into kernel space' ++ * ++ * @param[out] phys CPU-physical base address of memory that was allocated. ++ * (*phys) will be guaranteed to be aligned to at least ++ * _MALI_OSK_CPU_PAGE_SIZE on success. ++ * ++ * @param[in] size the number of bytes of physically contiguous memory to ++ * allocate. This must be a multiple of _MALI_OSK_CPU_PAGE_SIZE. ++ * ++ * @return On success, a Mali IO address through which the mapped-in ++ * memory/registers can be accessed. NULL on failure, and (*phys) is unmodified. ++ */ ++mali_io_address _mali_osk_mem_allocioregion(u32 *phys, u32 size); + -+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_flag(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->uargs.perf_counter_flag; -+} ++/** @brief Free a physically contiguous address range from kernel space. ++ * ++ * The address range should be one previously mapped in through ++ * _mali_osk_mem_allocioregion. ++ * ++ * It is a programming error to do (but not limited to) the following: ++ * - attempt a free twice on the same ioregion ++ * - free only part of a range obtained through _mali_osk_mem_allocioregion ++ * - free more than the range obtained through _mali_osk_mem_allocioregion ++ * - free an address range that was not successfully mapped using ++ * _mali_osk_mem_allocioregion ++ * - provide a mapping that does not map to phys. ++ * ++ * @param phys CPU-physical base address of the memory that was originally ++ * mapped in, which was aligned to _MALI_OSK_CPU_PAGE_SIZE. ++ * @param size The number of bytes that were originally mapped in, which was ++ * a multiple of _MALI_OSK_CPU_PAGE_SIZE. ++ * @param mapping The Mali IO address through which the mapping is ++ * accessed. ++ */ ++void _mali_osk_mem_freeioregion(u32 phys, u32 size, mali_io_address mapping); + -+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_value0(struct mali_pp_job *job, u32 sub_job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->perf_counter_value0[sub_job]; -+} ++/** @brief Request a region of physically contiguous memory ++ * ++ * This is used to ensure exclusive access to a region of physically contigous ++ * memory. ++ * ++ * It is acceptable to implement this as a stub. However, it is then the job ++ * of the System Integrator to ensure that no other device driver will be using ++ * the physical address ranges used by Mali, while the Mali device driver is ++ * loaded. ++ * ++ * @param phys CPU-physical base address of the memory to request. This must ++ * be aligned to the system's page size, which is assumed to be 4K. ++ * @param size the number of bytes of physically contiguous address space to ++ * request. ++ * @param description A textual description of the memory being requested. ++ * @return _MALI_OSK_ERR_OK on success. Otherwise, a suitable ++ * _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_osk_mem_reqregion(uintptr_t phys, u32 size, const char *description); + -+MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_value1(struct mali_pp_job *job, u32 sub_job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return job->perf_counter_value1[sub_job]; -+} ++/** @brief Un-request a region of physically contiguous memory ++ * ++ * This is used to release a regious of physically contiguous memory previously ++ * requested through _mali_osk_mem_reqregion, so that other device drivers may ++ * use it. This will be called at time of Mali device driver termination. ++ * ++ * It is a programming error to attempt to: ++ * - unrequest a region twice ++ * - unrequest only part of a range obtained through _mali_osk_mem_reqregion ++ * - unrequest more than the range obtained through _mali_osk_mem_reqregion ++ * - unrequest an address range that was not successfully requested using ++ * _mali_osk_mem_reqregion ++ * ++ * @param phys CPU-physical base address of the memory to un-request. This must ++ * be aligned to the system's page size, which is assumed to be 4K ++ * @param size the number of bytes of physically contiguous address space to ++ * un-request. ++ */ ++void _mali_osk_mem_unreqregion(uintptr_t phys, u32 size); + -+MALI_STATIC_INLINE void mali_pp_job_set_perf_counter_value0(struct mali_pp_job *job, u32 sub_job, u32 value) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ job->perf_counter_value0[sub_job] = value; -+} ++/** @brief Read from a location currently mapped in through ++ * _mali_osk_mem_mapioregion ++ * ++ * This reads a 32-bit word from a 32-bit aligned location. It is a programming ++ * error to provide unaligned locations, or to read from memory that is not ++ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or ++ * _mali_osk_mem_allocioregion(). ++ * ++ * @param mapping Mali IO address to read from ++ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4 ++ * @return the 32-bit word from the specified location. ++ */ ++u32 _mali_osk_mem_ioread32(volatile mali_io_address mapping, u32 offset); + -+MALI_STATIC_INLINE void mali_pp_job_set_perf_counter_value1(struct mali_pp_job *job, u32 sub_job, u32 value) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); -+ job->perf_counter_value1[sub_job] = value; -+} ++/** @brief Write to a location currently mapped in through ++ * _mali_osk_mem_mapioregion without memory barriers ++ * ++ * This write a 32-bit word to a 32-bit aligned location without using memory barrier. ++ * It is a programming error to provide unaligned locations, or to write to memory that is not ++ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or ++ * _mali_osk_mem_allocioregion(). ++ * ++ * @param mapping Mali IO address to write to ++ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4 ++ * @param val the 32-bit word to write. ++ */ ++void _mali_osk_mem_iowrite32_relaxed(volatile mali_io_address addr, u32 offset, u32 val); + -+MALI_STATIC_INLINE _mali_osk_errcode_t mali_pp_job_check(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ if (mali_pp_job_is_virtual(job) && job->sub_jobs_num != 1) { -+ return _MALI_OSK_ERR_FAULT; -+ } -+ return _MALI_OSK_ERR_OK; -+} ++/** @brief Write to a location currently mapped in through ++ * _mali_osk_mem_mapioregion with write memory barrier ++ * ++ * This write a 32-bit word to a 32-bit aligned location. It is a programming ++ * error to provide unaligned locations, or to write to memory that is not ++ * mapped in, or not mapped through either _mali_osk_mem_mapioregion() or ++ * _mali_osk_mem_allocioregion(). ++ * ++ * @param mapping Mali IO address to write to ++ * @param offset Byte offset from the given IO address to operate on, must be a multiple of 4 ++ * @param val the 32-bit word to write. ++ */ ++void _mali_osk_mem_iowrite32(volatile mali_io_address mapping, u32 offset, u32 val); + -+/** -+ * Returns MALI_TRUE if this job has more than two sub jobs and all sub jobs are unstarted. ++/** @brief Flush all CPU caches + * -+ * @param job Job to check. -+ * @return MALI_TRUE if job has more than two sub jobs and all sub jobs are unstarted, MALI_FALSE if not. ++ * This should only be implemented if flushing of the cache is required for ++ * memory mapped in through _mali_osk_mem_mapregion. + */ -+MALI_STATIC_INLINE mali_bool mali_pp_job_is_large_and_unstarted(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); -+ MALI_DEBUG_ASSERT(!mali_pp_job_is_virtual(job)); ++void _mali_osk_cache_flushall(void); + -+ return (0 == job->sub_jobs_started && 2 < job->sub_jobs_num); -+} ++/** @brief Flush any caches necessary for the CPU and MALI to have the same view of a range of uncached mapped memory ++ * ++ * This should only be implemented if your OS doesn't do a full cache flush (inner & outer) ++ * after allocating uncached mapped memory. ++ * ++ * Some OS do not perform a full cache flush (including all outer caches) for uncached mapped memory. ++ * They zero the memory through a cached mapping, then flush the inner caches but not the outer caches. ++ * This is required for MALI to have the correct view of the memory. ++ */ ++void _mali_osk_cache_ensure_uncached_range_flushed(void *uncached_mapping, u32 offset, u32 size); + -+/** -+ * Get PP job's Timeline tracker. ++/** @brief Safely copy as much data as possible from src to dest + * -+ * @param job PP job. -+ * @return Pointer to Timeline tracker for the job. ++ * Do not crash if src or dest isn't available. ++ * ++ * @param dest Destination buffer (limited to user space mapped Mali memory) ++ * @param src Source buffer ++ * @param size Number of bytes to copy ++ * @return Number of bytes actually copied + */ -+MALI_STATIC_INLINE struct mali_timeline_tracker *mali_pp_job_get_tracker(struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return &(job->tracker); -+} ++u32 _mali_osk_mem_write_safe(void *dest, const void *src, u32 size); + -+MALI_STATIC_INLINE u32 *mali_pp_job_get_timeline_point_ptr( -+ struct mali_pp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ return (u32 __user *)(uintptr_t)job->uargs.timeline_point_ptr; -+} ++/** @} */ /* end group _mali_osk_low_level_memory */ + + -+#endif /* __MALI_PP_JOB_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_scheduler.c b/drivers/gpu/arm/mali400/mali/common/mali_scheduler.c -new file mode 100644 -index 000000000..b5e6cfddb ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_scheduler.c -@@ -0,0 +1,1548 @@ -+/* -+ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++/** @addtogroup _mali_osk_notification ++ * ++ * User space notification framework ++ * ++ * Communication with user space of asynchronous events is performed through a ++ * synchronous call to the \ref u_k_api. ++ * ++ * Since the events are asynchronous, the events have to be queued until a ++ * synchronous U/K API call can be made by user-space. A U/K API call might also ++ * be received before any event has happened. Therefore the notifications the ++ * different subsystems wants to send to user space has to be queued for later ++ * reception, or a U/K API call has to be blocked until an event has occured. ++ * ++ * Typical uses of notifications are after running of jobs on the hardware or ++ * when changes to the system is detected that needs to be relayed to user ++ * space. ++ * ++ * After an event has occured user space has to be notified using some kind of ++ * message. The notification framework supports sending messages to waiting ++ * threads or queueing of messages until a U/K API call is made. ++ * ++ * The notification queue is a FIFO. There are no restrictions on the numbers ++ * of readers or writers in the queue. ++ * ++ * A message contains what user space needs to identifiy how to handle an ++ * event. This includes a type field and a possible type specific payload. ++ * ++ * A notification to user space is represented by a ++ * \ref _mali_osk_notification_t object. A sender gets hold of such an object ++ * using _mali_osk_notification_create(). The buffer given by the ++ * _mali_osk_notification_t::result_buffer field in the object is used to store ++ * any type specific data. The other fields are internal to the queue system ++ * and should not be touched. ++ * ++ * @{ */ ++ ++/** @brief Create a notification object ++ * ++ * Returns a notification object which can be added to the queue of ++ * notifications pending for user space transfer. ++ * ++ * The implementation will initialize all members of the ++ * \ref _mali_osk_notification_t object. In particular, the ++ * _mali_osk_notification_t::result_buffer member will be initialized to point ++ * to \a size bytes of storage, and that storage will be suitably aligned for ++ * storage of any structure. That is, the created buffer meets the same ++ * requirements as _mali_osk_malloc(). ++ * ++ * The notification object must be deleted when not in use. Use ++ * _mali_osk_notification_delete() for deleting it. ++ * ++ * @note You \b must \b not call _mali_osk_free() on a \ref _mali_osk_notification_t, ++ * object, or on a _mali_osk_notification_t::result_buffer. You must only use ++ * _mali_osk_notification_delete() to free the resources assocaited with a ++ * \ref _mali_osk_notification_t object. ++ * ++ * @param type The notification type ++ * @param size The size of the type specific buffer to send ++ * @return Pointer to a notification object with a suitable buffer, or NULL on error. + */ ++_mali_osk_notification_t *_mali_osk_notification_create(u32 type, u32 size); + -+#include "mali_scheduler.h" -+#include "mali_kernel_common.h" -+#include "mali_osk.h" -+#include "mali_osk_profiling.h" -+#include "mali_kernel_utilization.h" -+#include "mali_timeline.h" -+#include "mali_gp_job.h" -+#include "mali_pp_job.h" -+#include "mali_executor.h" -+#include "mali_group.h" -+#include -+#include -+#include "mali_pm_metrics.h" ++/** @brief Delete a notification object ++ * ++ * This must be called to reclaim the resources of a notification object. This ++ * includes: ++ * - The _mali_osk_notification_t::result_buffer ++ * - The \ref _mali_osk_notification_t itself. ++ * ++ * A notification object \b must \b not be used after it has been deleted by ++ * _mali_osk_notification_delete(). ++ * ++ * In addition, the notification object may not be deleted while it is in a ++ * queue. That is, if it has been placed on a queue with ++ * _mali_osk_notification_queue_send(), then it must not be deleted until ++ * it has been received by a call to _mali_osk_notification_queue_receive(). ++ * Otherwise, the queue may be corrupted. ++ * ++ * @param object the notification object to delete. ++ */ ++void _mali_osk_notification_delete(_mali_osk_notification_t *object); + -+#if defined(CONFIG_DMA_SHARED_BUFFER) -+#include "mali_memory_dma_buf.h" -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) -+#include "mali_dma_fence.h" -+#include -+#endif -+#endif ++/** @brief Create a notification queue ++ * ++ * Creates a notification queue which can be used to queue messages for user ++ * delivery and get queued messages from ++ * ++ * The queue is a FIFO, and has no restrictions on the numbers of readers or ++ * writers. ++ * ++ * When the queue is no longer in use, it must be terminated with ++ * \ref _mali_osk_notification_queue_term(). Failure to do so will result in a ++ * memory leak. ++ * ++ * @return Pointer to a new notification queue or NULL on error. ++ */ ++_mali_osk_notification_queue_t *_mali_osk_notification_queue_init(void); + -+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS) -+#include -+#include -+#endif -+/* -+ * ---------- static defines/constants ---------- ++/** @brief Destroy a notification queue ++ * ++ * Destroys a notification queue and frees associated resources from the queue. ++ * ++ * A notification queue \b must \b not be destroyed in the following cases: ++ * - while there are \ref _mali_osk_notification_t objects in the queue. ++ * - while there are writers currently acting upon the queue. That is, while ++ * a thread is currently calling \ref _mali_osk_notification_queue_send() on ++ * the queue, or while a thread may call ++ * \ref _mali_osk_notification_queue_send() on the queue in the future. ++ * - while there are readers currently waiting upon the queue. That is, while ++ * a thread is currently calling \ref _mali_osk_notification_queue_receive() on ++ * the queue, or while a thread may call ++ * \ref _mali_osk_notification_queue_receive() on the queue in the future. ++ * ++ * Therefore, all \ref _mali_osk_notification_t objects must be flushed and ++ * deleted by the code that makes use of the notification queues, since only ++ * they know the structure of the _mali_osk_notification_t::result_buffer ++ * (even if it may only be a flat sturcture). ++ * ++ * @note Since the queue is a FIFO, the code using notification queues may ++ * create its own 'flush' type of notification, to assist in flushing the ++ * queue. ++ * ++ * Once the queue has been destroyed, it must not be used again. ++ * ++ * @param queue The queue to destroy + */ ++void _mali_osk_notification_queue_term(_mali_osk_notification_queue_t *queue); + -+/* -+ * If dma_buf with map on demand is used, we defer job queue -+ * if in atomic context, since both might sleep. ++/** @brief Schedule notification for delivery ++ * ++ * When a \ref _mali_osk_notification_t object has been created successfully ++ * and set up, it may be added to the queue of objects waiting for user space ++ * transfer. ++ * ++ * The sending will not block if the queue is full. ++ * ++ * A \ref _mali_osk_notification_t object \b must \b not be put on two different ++ * queues at the same time, or enqueued twice onto a single queue before ++ * reception. However, it is acceptable for it to be requeued \em after reception ++ * from a call to _mali_osk_notification_queue_receive(), even onto the same queue. ++ * ++ * Again, requeuing must also not enqueue onto two different queues at the same ++ * time, or enqueue onto the same queue twice before reception. ++ * ++ * @param queue The notification queue to add this notification to ++ * @param object The entry to add + */ -+#if defined(CONFIG_DMA_SHARED_BUFFER) -+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) -+#define MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE 1 -+#endif -+#endif ++void _mali_osk_notification_queue_send(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t *object); + ++/** @brief Receive a notification from a queue ++ * ++ * Receives a single notification from the given queue. ++ * ++ * If no notifciations are ready the thread will sleep until one becomes ready. ++ * Therefore, notifications may not be received into an ++ * IRQ or 'atomic' context (that is, a context where sleeping is disallowed). ++ * ++ * @param queue The queue to receive from ++ * @param result Pointer to storage of a pointer of type ++ * \ref _mali_osk_notification_t*. \a result will be written to such that the ++ * expression \a (*result) will evaluate to a pointer to a valid ++ * \ref _mali_osk_notification_t object, or NULL if none were received. ++ * @return _MALI_OSK_ERR_OK on success. _MALI_OSK_ERR_RESTARTSYSCALL if the sleep was interrupted. ++ */ ++_mali_osk_errcode_t _mali_osk_notification_queue_receive(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result); + -+/* -+ * ---------- global variables (exported due to inline functions) ---------- ++/** @brief Dequeues a notification from a queue ++ * ++ * Receives a single notification from the given queue. ++ * ++ * If no notifciations are ready the function call will return an error code. ++ * ++ * @param queue The queue to receive from ++ * @param result Pointer to storage of a pointer of type ++ * \ref _mali_osk_notification_t*. \a result will be written to such that the ++ * expression \a (*result) will evaluate to a pointer to a valid ++ * \ref _mali_osk_notification_t object, or NULL if none were received. ++ * @return _MALI_OSK_ERR_OK on success, _MALI_OSK_ERR_ITEM_NOT_FOUND if queue was empty. + */ ++_mali_osk_errcode_t _mali_osk_notification_queue_dequeue(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result); + -+/* Lock protecting this module */ -+_mali_osk_spinlock_irq_t *mali_scheduler_lock_obj = NULL; ++/** @} */ /* end group _mali_osk_notification */ + -+/* Queue of jobs to be executed on the GP group */ -+struct mali_scheduler_job_queue job_queue_gp; + -+/* Queue of PP jobs */ -+struct mali_scheduler_job_queue job_queue_pp; ++/** @addtogroup _mali_osk_timer ++ * ++ * Timers use the OS's representation of time, which are 'ticks'. This is to ++ * prevent aliasing problems between the internal timer time, and the time ++ * asked for. ++ * ++ * @{ */ + -+_mali_osk_atomic_t mali_job_id_autonumber; -+_mali_osk_atomic_t mali_job_cache_order_autonumber; -+/* -+ * ---------- static variables ---------- ++/** @brief Initialize a timer ++ * ++ * Allocates resources for a new timer, and initializes them. This does not ++ * start the timer. ++ * ++ * @return a pointer to the allocated timer object, or NULL on failure. + */ ++_mali_osk_timer_t *_mali_osk_timer_init(_mali_osk_timer_callback_t callback); + -+_mali_osk_wq_work_t *scheduler_wq_pp_job_delete = NULL; -+_mali_osk_spinlock_irq_t *scheduler_pp_job_delete_lock = NULL; -+static _MALI_OSK_LIST_HEAD_STATIC_INIT(scheduler_pp_job_deletion_queue); ++/** @brief Start a timer ++ * ++ * It is an error to start a timer without setting the callback via ++ * _mali_osk_timer_setcallback(). ++ * ++ * It is an error to use this to start an already started timer. ++ * ++ * The timer will expire in \a ticks_to_expire ticks, at which point, the ++ * callback function will be invoked with the callback-specific data, ++ * as registered by _mali_osk_timer_setcallback(). ++ * ++ * @param tim the timer to start ++ * @param ticks_to_expire the amount of time in ticks for the timer to run ++ * before triggering. ++ */ ++void _mali_osk_timer_add(_mali_osk_timer_t *tim, unsigned long ticks_to_expire); + -+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) -+static _mali_osk_wq_work_t *scheduler_wq_pp_job_queue = NULL; -+static _mali_osk_spinlock_irq_t *scheduler_pp_job_queue_lock = NULL; -+static _MALI_OSK_LIST_HEAD_STATIC_INIT(scheduler_pp_job_queue_list); -+#endif ++/** @brief Modify a timer ++ * ++ * Set the relative time at which a timer will expire, and start it if it is ++ * stopped. If \a ticks_to_expire 0 the timer fires immediately. ++ * ++ * It is an error to modify a timer without setting the callback via ++ * _mali_osk_timer_setcallback(). ++ * ++ * The timer will expire at \a ticks_to_expire from the time of the call, at ++ * which point, the callback function will be invoked with the ++ * callback-specific data, as set by _mali_osk_timer_setcallback(). ++ * ++ * @param tim the timer to modify, and start if necessary ++ * @param ticks_to_expire the \em absolute time in ticks at which this timer ++ * should trigger. ++ * ++ */ ++void _mali_osk_timer_mod(_mali_osk_timer_t *tim, unsigned long ticks_to_expire); + -+/* -+ * ---------- Forward declaration of static functions ---------- ++/** @brief Stop a timer, and block on its completion. ++ * ++ * Stop the timer. When the function returns, it is guaranteed that the timer's ++ * callback will not be running on any CPU core. ++ * ++ * Since stoping the timer blocks on compeletion of the callback, the callback ++ * may not obtain any mutexes that the caller holds. Otherwise, a deadlock will ++ * occur. ++ * ++ * @note While the callback itself is guaranteed to not be running, work ++ * enqueued on the work-queue by the timer (with ++ * \ref _mali_osk_wq_schedule_work()) may still run. The timer callback and ++ * work handler must take this into account. ++ * ++ * It is legal to stop an already stopped timer. ++ * ++ * @param tim the timer to stop. ++ * + */ ++void _mali_osk_timer_del(_mali_osk_timer_t *tim); + -+static mali_timeline_point mali_scheduler_submit_gp_job( -+ struct mali_session_data *session, struct mali_gp_job *job); -+static _mali_osk_errcode_t mali_scheduler_submit_pp_job( -+ struct mali_session_data *session, struct mali_pp_job *job, mali_timeline_point *point); ++/** @brief Stop a timer. ++ * ++ * Stop the timer. When the function returns, the timer's callback may still be ++ * running on any CPU core. ++ * ++ * It is legal to stop an already stopped timer. ++ * ++ * @param tim the timer to stop. ++ */ ++void _mali_osk_timer_del_async(_mali_osk_timer_t *tim); + -+static mali_bool mali_scheduler_queue_gp_job(struct mali_gp_job *job); -+static mali_bool mali_scheduler_queue_pp_job(struct mali_pp_job *job); ++/** @brief Check if timer is pending. ++ * ++ * Check if timer is active. ++ * ++ * @param tim the timer to check ++ * @return MALI_TRUE if time is active, MALI_FALSE if it is not active ++ */ ++mali_bool _mali_osk_timer_pending(_mali_osk_timer_t *tim); + -+static void mali_scheduler_return_gp_job_to_user(struct mali_gp_job *job, -+ mali_bool success); ++/** @brief Set a timer's callback parameters. ++ * ++ * This must be called at least once before a timer is started/modified. ++ * ++ * After a timer has been stopped or expires, the callback remains set. This ++ * means that restarting the timer will call the same function with the same ++ * parameters on expiry. ++ * ++ * @param tim the timer to set callback on. ++ * @param callback Function to call when timer expires ++ * @param data Function-specific data to supply to the function on expiry. ++ */ ++void _mali_osk_timer_setcallback(_mali_osk_timer_t *tim, _mali_osk_timer_callback_t callback, void *data); + -+static void mali_scheduler_deferred_pp_job_delete(struct mali_pp_job *job); -+void mali_scheduler_do_pp_job_delete(void *arg); ++/** @brief Terminate a timer, and deallocate resources. ++ * ++ * The timer must first be stopped by calling _mali_osk_timer_del(). ++ * ++ * It is a programming error for _mali_osk_timer_term() to be called on: ++ * - timer that is currently running ++ * - a timer that is currently executing its callback. ++ * ++ * @param tim the timer to deallocate. ++ */ ++void _mali_osk_timer_term(_mali_osk_timer_t *tim); ++/** @} */ /* end group _mali_osk_timer */ + -+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) -+static void mali_scheduler_deferred_pp_job_queue(struct mali_pp_job *job); -+static void mali_scheduler_do_pp_job_queue(void *arg); -+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */ + -+/* -+ * ---------- Actual implementation ---------- ++/** @defgroup _mali_osk_time OSK Time functions ++ * ++ * \ref _mali_osk_time use the OS's representation of time, which are ++ * 'ticks'. This is to prevent aliasing problems between the internal timer ++ * time, and the time asked for. ++ * ++ * OS tick time is measured as a u32. The time stored in a u32 may either be ++ * an absolute time, or a time delta between two events. Whilst it is valid to ++ * use math opeartors to \em change the tick value represented as a u32, it ++ * is often only meaningful to do such operations on time deltas, rather than ++ * on absolute time. However, it is meaningful to add/subtract time deltas to ++ * absolute times. ++ * ++ * Conversion between tick time and milliseconds (ms) may not be loss-less, ++ * and are \em implementation \em depenedant. ++ * ++ * Code use OS time must take this into account, since: ++ * - a small OS time may (or may not) be rounded ++ * - a large time may (or may not) overflow ++ * ++ * @{ */ ++ ++/** @brief Return whether ticka occurs after or at the same time as tickb ++ * ++ * Systems where ticks can wrap must handle that. ++ * ++ * @param ticka ticka ++ * @param tickb tickb ++ * @return MALI_TRUE if ticka represents a time that occurs at or after tickb. + */ ++mali_bool _mali_osk_time_after_eq(unsigned long ticka, unsigned long tickb); + -+_mali_osk_errcode_t mali_scheduler_initialize(void) -+{ -+ _mali_osk_atomic_init(&mali_job_id_autonumber, 0); -+ _mali_osk_atomic_init(&mali_job_cache_order_autonumber, 0); ++/** @brief Convert milliseconds to OS 'ticks' ++ * ++ * @param ms time interval in milliseconds ++ * @return the corresponding time interval in OS ticks. ++ */ ++unsigned long _mali_osk_time_mstoticks(u32 ms); + -+ _MALI_OSK_INIT_LIST_HEAD(&job_queue_gp.normal_pri); -+ _MALI_OSK_INIT_LIST_HEAD(&job_queue_gp.high_pri); -+ job_queue_gp.depth = 0; -+ job_queue_gp.big_job_num = 0; ++/** @brief Convert OS 'ticks' to milliseconds ++ * ++ * @param ticks time interval in OS ticks. ++ * @return the corresponding time interval in milliseconds ++ */ ++u32 _mali_osk_time_tickstoms(unsigned long ticks); + -+ _MALI_OSK_INIT_LIST_HEAD(&job_queue_pp.normal_pri); -+ _MALI_OSK_INIT_LIST_HEAD(&job_queue_pp.high_pri); -+ job_queue_pp.depth = 0; -+ job_queue_pp.big_job_num = 0; + -+ mali_scheduler_lock_obj = _mali_osk_spinlock_irq_init( -+ _MALI_OSK_LOCKFLAG_ORDERED, -+ _MALI_OSK_LOCK_ORDER_SCHEDULER); -+ if (NULL == mali_scheduler_lock_obj) { -+ mali_scheduler_terminate(); -+ } ++/** @brief Get the current time in OS 'ticks'. ++ * @return the current time in OS 'ticks'. ++ */ ++unsigned long _mali_osk_time_tickcount(void); + -+ scheduler_wq_pp_job_delete = _mali_osk_wq_create_work( -+ mali_scheduler_do_pp_job_delete, NULL); -+ if (NULL == scheduler_wq_pp_job_delete) { -+ mali_scheduler_terminate(); -+ return _MALI_OSK_ERR_FAULT; -+ } ++/** @brief Cause a microsecond delay ++ * ++ * The delay will have microsecond resolution, and is necessary for correct ++ * operation of the driver. At worst, the delay will be \b at least \a usecs ++ * microseconds, and so may be (significantly) more. ++ * ++ * This function may be implemented as a busy-wait, which is the most sensible ++ * implementation. On OSs where there are situations in which a thread must not ++ * sleep, this is definitely implemented as a busy-wait. ++ * ++ * @param usecs the number of microseconds to wait for. ++ */ ++void _mali_osk_time_ubusydelay(u32 usecs); + -+ scheduler_pp_job_delete_lock = _mali_osk_spinlock_irq_init( -+ _MALI_OSK_LOCKFLAG_ORDERED, -+ _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED); -+ if (NULL == scheduler_pp_job_delete_lock) { -+ mali_scheduler_terminate(); -+ return _MALI_OSK_ERR_FAULT; -+ } ++/** @brief Return time in nano seconds, since any given reference. ++ * ++ * @return Time in nano seconds ++ */ ++u64 _mali_osk_time_get_ns(void); + -+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) -+ scheduler_wq_pp_job_queue = _mali_osk_wq_create_work( -+ mali_scheduler_do_pp_job_queue, NULL); -+ if (NULL == scheduler_wq_pp_job_queue) { -+ mali_scheduler_terminate(); -+ return _MALI_OSK_ERR_FAULT; -+ } ++/** @brief Return time in nano seconds, since boot time. ++ * ++ * @return Time in nano seconds ++ */ ++u64 _mali_osk_boot_time_get_ns(void); + -+ scheduler_pp_job_queue_lock = _mali_osk_spinlock_irq_init( -+ _MALI_OSK_LOCKFLAG_ORDERED, -+ _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED); -+ if (NULL == scheduler_pp_job_queue_lock) { -+ mali_scheduler_terminate(); -+ return _MALI_OSK_ERR_FAULT; -+ } -+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */ ++/** @} */ /* end group _mali_osk_time */ + -+ return _MALI_OSK_ERR_OK; -+} ++/** @defgroup _mali_osk_math OSK Math ++ * @{ */ + -+void mali_scheduler_terminate(void) -+{ -+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) -+ if (NULL != scheduler_pp_job_queue_lock) { -+ _mali_osk_spinlock_irq_term(scheduler_pp_job_queue_lock); -+ scheduler_pp_job_queue_lock = NULL; -+ } ++/** @brief Count Leading Zeros (Little-endian) ++ * ++ * @note This function must be implemented to support the reference ++ * implementation of _mali_osk_find_first_zero_bit, as defined in ++ * mali_osk_bitops.h. ++ * ++ * @param val 32-bit words to count leading zeros on ++ * @return the number of leading zeros. ++ */ ++u32 _mali_osk_clz(u32 val); + -+ if (NULL != scheduler_wq_pp_job_queue) { -+ _mali_osk_wq_delete_work(scheduler_wq_pp_job_queue); -+ scheduler_wq_pp_job_queue = NULL; -+ } -+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */ ++/** @brief find last (most-significant) bit set ++ * ++ * @param val 32-bit words to count last bit set on ++ * @return last bit set. ++ */ ++u32 _mali_osk_fls(u32 val); + -+ if (NULL != scheduler_pp_job_delete_lock) { -+ _mali_osk_spinlock_irq_term(scheduler_pp_job_delete_lock); -+ scheduler_pp_job_delete_lock = NULL; -+ } ++/** @} */ /* end group _mali_osk_math */ + -+ if (NULL != scheduler_wq_pp_job_delete) { -+ _mali_osk_wq_delete_work(scheduler_wq_pp_job_delete); -+ scheduler_wq_pp_job_delete = NULL; -+ } ++/** @addtogroup _mali_osk_wait_queue OSK Wait Queue functionality ++ * @{ */ + -+ if (NULL != mali_scheduler_lock_obj) { -+ _mali_osk_spinlock_irq_term(mali_scheduler_lock_obj); -+ mali_scheduler_lock_obj = NULL; -+ } ++/** @brief Initialize an empty Wait Queue */ ++_mali_osk_wait_queue_t *_mali_osk_wait_queue_init(void); + -+ _mali_osk_atomic_term(&mali_job_cache_order_autonumber); -+ _mali_osk_atomic_term(&mali_job_id_autonumber); -+} ++/** @brief Sleep if condition is false ++ * ++ * @param queue the queue to use ++ * @param condition function pointer to a boolean function ++ * @param data data parameter for condition function ++ * ++ * Put thread to sleep if the given \a condition function returns false. When ++ * being asked to wake up again, the condition will be re-checked and the ++ * thread only woken up if the condition is now true. ++ */ ++void _mali_osk_wait_queue_wait_event(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data); + -+u32 mali_scheduler_job_physical_head_count(mali_bool gpu_mode_is_secure) -+{ -+ /* -+ * Count how many physical sub jobs are present from the head of queue -+ * until the first virtual job is present. -+ * Early out when we have reached maximum number of PP cores (8) -+ */ -+ u32 count = 0; -+ struct mali_pp_job *job; -+ struct mali_pp_job *temp; ++/** @brief Sleep if condition is false ++ * ++ * @param queue the queue to use ++ * @param condition function pointer to a boolean function ++ * @param data data parameter for condition function ++ * @param timeout timeout in ms ++ * ++ * Put thread to sleep if the given \a condition function returns false. When ++ * being asked to wake up again, the condition will be re-checked and the ++ * thread only woken up if the condition is now true. Will return if time ++ * exceeds timeout. ++ */ ++void _mali_osk_wait_queue_wait_event_timeout(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data, u32 timeout); + -+ /* Check for partially started normal pri jobs */ -+ if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) { -+ MALI_DEBUG_ASSERT(0 < job_queue_pp.depth); ++/** @brief Wake up all threads in wait queue if their respective conditions are ++ * true ++ * ++ * @param queue the queue whose threads should be woken up ++ * ++ * Wake up all threads in wait queue \a queue whose condition is now true. ++ */ ++void _mali_osk_wait_queue_wake_up(_mali_osk_wait_queue_t *queue); + -+ job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next, -+ struct mali_pp_job, list); ++/** @brief terminate a wait queue ++ * ++ * @param queue the queue to terminate. ++ */ ++void _mali_osk_wait_queue_term(_mali_osk_wait_queue_t *queue); ++/** @} */ /* end group _mali_osk_wait_queue */ + -+ MALI_DEBUG_ASSERT_POINTER(job); + -+ if (MALI_TRUE == mali_pp_job_has_started_sub_jobs(job)) { -+ /* -+ * Remember; virtual jobs can't be queued and started -+ * at the same time, so this must be a physical job -+ */ -+ if ((MALI_FALSE == gpu_mode_is_secure && MALI_FALSE == mali_pp_job_is_protected_job(job)) -+ || (MALI_TRUE == gpu_mode_is_secure && MALI_TRUE == mali_pp_job_is_protected_job(job))) { ++/** @addtogroup _mali_osk_miscellaneous ++ * @{ */ + -+ count += mali_pp_job_unstarted_sub_job_count(job); -+ if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <= count) { -+ return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; -+ } -+ } -+ } -+ } ++/** @brief Output a device driver debug message. ++ * ++ * The interpretation of \a fmt is the same as the \c format parameter in ++ * _mali_osu_vsnprintf(). ++ * ++ * @param fmt a _mali_osu_vsnprintf() style format string ++ * @param ... a variable-number of parameters suitable for \a fmt ++ */ ++void _mali_osk_dbgmsg(const char *fmt, ...); + -+ _MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.high_pri, -+ struct mali_pp_job, list) { -+ if ((MALI_FALSE == mali_pp_job_is_virtual(job)) -+ && ((MALI_FALSE == gpu_mode_is_secure && MALI_FALSE == mali_pp_job_is_protected_job(job)) -+ || (MALI_TRUE == gpu_mode_is_secure && MALI_TRUE == mali_pp_job_is_protected_job(job)))) { ++/** @brief Print fmt into buf. ++ * ++ * The interpretation of \a fmt is the same as the \c format parameter in ++ * _mali_osu_vsnprintf(). ++ * ++ * @param buf a pointer to the result buffer ++ * @param size the total number of bytes allowed to write to \a buf ++ * @param fmt a _mali_osu_vsnprintf() style format string ++ * @param ... a variable-number of parameters suitable for \a fmt ++ * @return The number of bytes written to \a buf ++ */ ++u32 _mali_osk_snprintf(char *buf, u32 size, const char *fmt, ...); + -+ count += mali_pp_job_unstarted_sub_job_count(job); -+ if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <= count) { -+ return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; -+ } -+ } else { -+ /* Came across a virtual job, so stop counting */ -+ return count; -+ } -+ } ++/** @brief Abnormal process abort. ++ * ++ * Terminates the caller-process if this function is called. ++ * ++ * This function will be called from Debug assert-macros in mali_kernel_common.h. ++ * ++ * This function will never return - because to continue from a Debug assert ++ * could cause even more problems, and hinder debugging of the initial problem. ++ * ++ * This function is only used in Debug builds, and is not used in Release builds. ++ */ ++void _mali_osk_abort(void); + -+ _MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.normal_pri, -+ struct mali_pp_job, list) { -+ if ((MALI_FALSE == mali_pp_job_is_virtual(job)) -+ && (MALI_FALSE == mali_pp_job_has_started_sub_jobs(job)) -+ && ((MALI_FALSE == gpu_mode_is_secure && MALI_FALSE == mali_pp_job_is_protected_job(job)) -+ || (MALI_TRUE == gpu_mode_is_secure && MALI_TRUE == mali_pp_job_is_protected_job(job)))) { ++/** @brief Sets breakpoint at point where function is called. ++ * ++ * This function will be called from Debug assert-macros in mali_kernel_common.h, ++ * to assist in debugging. If debugging at this level is not required, then this ++ * function may be implemented as a stub. ++ * ++ * This function is only used in Debug builds, and is not used in Release builds. ++ */ ++void _mali_osk_break(void); + -+ count += mali_pp_job_unstarted_sub_job_count(job); -+ if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <= count) { -+ return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; -+ } -+ } else { -+ /* Came across a virtual job, so stop counting */ -+ return count; -+ } -+ } -+ return count; -+} ++/** @brief Return an identificator for calling process. ++ * ++ * @return Identificator for calling process. ++ */ ++u32 _mali_osk_get_pid(void); + -+struct mali_pp_job *mali_scheduler_job_pp_next(void) -+{ -+ struct mali_pp_job *job; -+ struct mali_pp_job *temp; ++/** @brief Return an name for calling process. ++ * ++ * @return name for calling process. ++ */ ++char *_mali_osk_get_comm(void); + -+ MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj); ++/** @brief Return an identificator for calling thread. ++ * ++ * @return Identificator for calling thread. ++ */ ++u32 _mali_osk_get_tid(void); + -+ /* Check for partially started normal pri jobs */ -+ if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) { -+ MALI_DEBUG_ASSERT(0 < job_queue_pp.depth); + -+ job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next, -+ struct mali_pp_job, list); ++/** @brief Take a reference to the power manager system for the Mali device (synchronously). ++ * ++ * When function returns successfully, Mali is ON. ++ * ++ * @note Call \a _mali_osk_pm_dev_ref_put() to release this reference. ++ */ ++_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_sync(void); + -+ MALI_DEBUG_ASSERT_POINTER(job); ++/** @brief Take a reference to the external power manager system for the Mali device (asynchronously). ++ * ++ * Mali might not yet be on after this function as returned. ++ * Please use \a _mali_osk_pm_dev_barrier() or \a _mali_osk_pm_dev_ref_get_sync() ++ * to wait for Mali to be powered on. ++ * ++ * @note Call \a _mali_osk_pm_dev_ref_dec() to release this reference. ++ */ ++_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_async(void); + -+ if (MALI_TRUE == mali_pp_job_has_started_sub_jobs(job)) { -+ return job; -+ } -+ } ++/** @brief Release the reference to the external power manger system for the Mali device. ++ * ++ * When reference count reach zero, the cores can be off. ++ * ++ * @note This must be used to release references taken with ++ * \a _mali_osk_pm_dev_ref_get_sync() or \a _mali_osk_pm_dev_ref_get_sync(). ++ */ ++void _mali_osk_pm_dev_ref_put(void); + -+ _MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.high_pri, -+ struct mali_pp_job, list) { -+ return job; -+ } ++/** @brief Block until pending PM operations are done ++ */ ++void _mali_osk_pm_dev_barrier(void); + -+ _MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.normal_pri, -+ struct mali_pp_job, list) { -+ return job; -+ } ++/** @} */ /* end group _mali_osk_miscellaneous */ + -+ return NULL; -+} ++/** @defgroup _mali_osk_bitmap OSK Bitmap ++ * @{ */ + -+mali_bool mali_scheduler_job_next_is_virtual(void) -+{ -+ struct mali_pp_job *job; ++/** @brief Allocate a unique number from the bitmap object. ++ * ++ * @param bitmap Initialized bitmap object. ++ * @return An unique existence in the bitmap object. ++ */ ++u32 _mali_osk_bitmap_alloc(struct _mali_osk_bitmap *bitmap); + -+ job = mali_scheduler_job_pp_virtual_peek(); -+ if (NULL != job) { -+ MALI_DEBUG_ASSERT(mali_pp_job_is_virtual(job)); ++/** @brief Free a interger to the bitmap object. ++ * ++ * @param bitmap Initialized bitmap object. ++ * @param obj An number allocated from bitmap object. ++ */ ++void _mali_osk_bitmap_free(struct _mali_osk_bitmap *bitmap, u32 obj); + -+ return MALI_TRUE; -+ } ++/** @brief Allocate continuous number from the bitmap object. ++ * ++ * @param bitmap Initialized bitmap object. ++ * @return start number of the continuous number block. ++ */ ++u32 _mali_osk_bitmap_alloc_range(struct _mali_osk_bitmap *bitmap, int cnt); + -+ return MALI_FALSE; -+} ++/** @brief Free a block of continuous number block to the bitmap object. ++ * ++ * @param bitmap Initialized bitmap object. ++ * @param obj Start number. ++ * @param cnt The size of the continuous number block. ++ */ ++void _mali_osk_bitmap_free_range(struct _mali_osk_bitmap *bitmap, u32 obj, int cnt); + -+struct mali_gp_job *mali_scheduler_job_gp_get(void) -+{ -+ _mali_osk_list_t *queue; -+ struct mali_gp_job *job = NULL; ++/** @brief Available count could be used to allocate in the given bitmap object. ++ * ++ */ ++u32 _mali_osk_bitmap_avail(struct _mali_osk_bitmap *bitmap); + -+ MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj); -+ MALI_DEBUG_ASSERT(0 < job_queue_gp.depth); -+ MALI_DEBUG_ASSERT(job_queue_gp.big_job_num <= job_queue_gp.depth); ++/** @brief Initialize an bitmap object.. ++ * ++ * @param bitmap An poiter of uninitialized bitmap object. ++ * @param num Size of thei bitmap object and decide the memory size allocated. ++ * @param reserve start number used to allocate. ++ */ ++int _mali_osk_bitmap_init(struct _mali_osk_bitmap *bitmap, u32 num, u32 reserve); + -+ if (!_mali_osk_list_empty(&job_queue_gp.high_pri)) { -+ queue = &job_queue_gp.high_pri; -+ } else { -+ queue = &job_queue_gp.normal_pri; -+ MALI_DEBUG_ASSERT(!_mali_osk_list_empty(queue)); -+ } ++/** @brief Free the given bitmap object. ++ * ++ * @param bitmap Initialized bitmap object. ++ */ ++void _mali_osk_bitmap_term(struct _mali_osk_bitmap *bitmap); ++/** @} */ /* end group _mali_osk_bitmap */ + -+ job = _MALI_OSK_LIST_ENTRY(queue->next, struct mali_gp_job, list); ++/** @} */ /* end group osuapi */ + -+ MALI_DEBUG_ASSERT_POINTER(job); ++/** @} */ /* end group uddapi */ + -+ mali_gp_job_list_remove(job); -+ job_queue_gp.depth--; -+ if (job->big_job) { -+ job_queue_gp.big_job_num --; -+ if (job_queue_gp.big_job_num < MALI_MAX_PENDING_BIG_JOB) { -+ /* wake up process */ -+ wait_queue_head_t *queue = mali_session_get_wait_queue(); -+ wake_up(queue); -+ } -+ } -+ return job; -+} + -+struct mali_pp_job *mali_scheduler_job_pp_physical_peek(void) -+{ -+ struct mali_pp_job *job = NULL; -+ struct mali_pp_job *tmp_job = NULL; + -+ MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj); ++#ifdef __cplusplus ++} ++#endif + -+ /* -+ * For PP jobs we favour partially started jobs in normal -+ * priority queue over unstarted jobs in high priority queue -+ */ ++/* Check standard inlines */ ++#ifndef MALI_STATIC_INLINE ++#error MALI_STATIC_INLINE not defined on your OS ++#endif + -+ if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) { -+ MALI_DEBUG_ASSERT(0 < job_queue_pp.depth); ++#ifndef MALI_NON_STATIC_INLINE ++#error MALI_NON_STATIC_INLINE not defined on your OS ++#endif + -+ tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next, -+ struct mali_pp_job, list); -+ MALI_DEBUG_ASSERT(NULL != tmp_job); ++#endif /* __MALI_OSK_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_osk_bitops.h b/drivers/gpu/arm/mali400/mali/common/mali_osk_bitops.h +new file mode 100644 +index 000000000..bb1831753 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_osk_bitops.h +@@ -0,0 +1,162 @@ ++/* ++ * Copyright (C) 2010, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ if (MALI_FALSE == mali_pp_job_is_virtual(tmp_job)) { -+ job = tmp_job; -+ } -+ } ++/** ++ * @file mali_osk_bitops.h ++ * Implementation of the OS abstraction layer for the kernel device driver ++ */ + -+ if (NULL == job || -+ MALI_FALSE == mali_pp_job_has_started_sub_jobs(job)) { -+ /* -+ * There isn't a partially started job in normal queue, so -+ * look in high priority queue. -+ */ -+ if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) { -+ MALI_DEBUG_ASSERT(0 < job_queue_pp.depth); ++#ifndef __MALI_OSK_BITOPS_H__ ++#define __MALI_OSK_BITOPS_H__ + -+ tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.high_pri.next, -+ struct mali_pp_job, list); -+ MALI_DEBUG_ASSERT(NULL != tmp_job); ++#ifdef __cplusplus ++extern "C" { ++#endif + -+ if (MALI_FALSE == mali_pp_job_is_virtual(tmp_job)) { -+ job = tmp_job; -+ } -+ } -+ } ++MALI_STATIC_INLINE void _mali_internal_clear_bit(u32 bit, u32 *addr) ++{ ++ MALI_DEBUG_ASSERT(bit < 32); ++ MALI_DEBUG_ASSERT(NULL != addr); + -+ return job; ++ (*addr) &= ~(1 << bit); +} + -+struct mali_pp_job *mali_scheduler_job_pp_virtual_peek(void) ++MALI_STATIC_INLINE void _mali_internal_set_bit(u32 bit, u32 *addr) +{ -+ struct mali_pp_job *job = NULL; -+ struct mali_pp_job *tmp_job = NULL; ++ MALI_DEBUG_ASSERT(bit < 32); ++ MALI_DEBUG_ASSERT(NULL != addr); + -+ MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj); ++ (*addr) |= (1 << bit); ++} + -+ if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) { -+ MALI_DEBUG_ASSERT(0 < job_queue_pp.depth); ++MALI_STATIC_INLINE u32 _mali_internal_test_bit(u32 bit, u32 value) ++{ ++ MALI_DEBUG_ASSERT(bit < 32); ++ return value & (1 << bit); ++} + -+ tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.high_pri.next, -+ struct mali_pp_job, list); ++MALI_STATIC_INLINE int _mali_internal_find_first_zero_bit(u32 value) ++{ ++ u32 inverted; ++ u32 negated; ++ u32 isolated; ++ u32 leading_zeros; + -+ if (MALI_TRUE == mali_pp_job_is_virtual(tmp_job)) { -+ job = tmp_job; -+ } -+ } ++ /* Begin with xxx...x0yyy...y, where ys are 1, number of ys is in range 0..31 */ ++ inverted = ~value; /* zzz...z1000...0 */ ++ /* Using count_trailing_zeros on inverted value - ++ * See ARM System Developers Guide for details of count_trailing_zeros */ + -+ if (NULL == job) { -+ if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) { -+ MALI_DEBUG_ASSERT(0 < job_queue_pp.depth); ++ /* Isolate the zero: it is preceeded by a run of 1s, so add 1 to it */ ++ negated = (u32) - inverted ; /* -a == ~a + 1 (mod 2^n) for n-bit numbers */ ++ /* negated = xxx...x1000...0 */ + -+ tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next, -+ struct mali_pp_job, list); ++ isolated = negated & inverted ; /* xxx...x1000...0 & zzz...z1000...0, zs are ~xs */ ++ /* And so the first zero bit is in the same position as the 1 == number of 1s that preceeded it ++ * Note that the output is zero if value was all 1s */ + -+ if (MALI_TRUE == mali_pp_job_is_virtual(tmp_job)) { -+ job = tmp_job; -+ } -+ } -+ } ++ leading_zeros = _mali_osk_clz(isolated); + -+ return job; ++ return 31 - leading_zeros; +} + -+struct mali_pp_job *mali_scheduler_job_pp_physical_get(u32 *sub_job) -+{ -+ struct mali_pp_job *job = mali_scheduler_job_pp_physical_peek(); + -+ MALI_DEBUG_ASSERT(MALI_FALSE == mali_pp_job_is_virtual(job)); ++/** @defgroup _mali_osk_bitops OSK Non-atomic Bit-operations ++ * @{ */ + -+ if (NULL != job) { -+ *sub_job = mali_pp_job_get_first_unstarted_sub_job(job); ++/** ++ * These bit-operations do not work atomically, and so locks must be used if ++ * atomicity is required. ++ * ++ * Reference implementations for Little Endian are provided, and so it should ++ * not normally be necessary to re-implement these. Efficient bit-twiddling ++ * techniques are used where possible, implemented in portable C. ++ * ++ * Note that these reference implementations rely on _mali_osk_clz() being ++ * implemented. ++ */ + -+ mali_pp_job_mark_sub_job_started(job, *sub_job); -+ if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(job)) { -+ /* Remove from queue when last sub job has been retrieved */ -+ mali_pp_job_list_remove(job); -+ } ++/** @brief Clear a bit in a sequence of 32-bit words ++ * @param nr bit number to clear, starting from the (Little-endian) least ++ * significant bit ++ * @param addr starting point for counting. ++ */ ++MALI_STATIC_INLINE void _mali_osk_clear_nonatomic_bit(u32 nr, u32 *addr) ++{ ++ addr += nr >> 5; /* find the correct word */ ++ nr = nr & ((1 << 5) - 1); /* The bit number within the word */ + -+ job_queue_pp.depth--; ++ _mali_internal_clear_bit(nr, addr); ++} + -+ /* -+ * Job about to start so it is no longer be -+ * possible to discard WB -+ */ -+ mali_pp_job_fb_lookup_remove(job); -+ } ++/** @brief Set a bit in a sequence of 32-bit words ++ * @param nr bit number to set, starting from the (Little-endian) least ++ * significant bit ++ * @param addr starting point for counting. ++ */ ++MALI_STATIC_INLINE void _mali_osk_set_nonatomic_bit(u32 nr, u32 *addr) ++{ ++ addr += nr >> 5; /* find the correct word */ ++ nr = nr & ((1 << 5) - 1); /* The bit number within the word */ + -+ return job; ++ _mali_internal_set_bit(nr, addr); +} + -+struct mali_pp_job *mali_scheduler_job_pp_virtual_get(void) ++/** @brief Test a bit in a sequence of 32-bit words ++ * @param nr bit number to test, starting from the (Little-endian) least ++ * significant bit ++ * @param addr starting point for counting. ++ * @return zero if bit was clear, non-zero if set. Do not rely on the return ++ * value being related to the actual word under test. ++ */ ++MALI_STATIC_INLINE u32 _mali_osk_test_bit(u32 nr, u32 *addr) +{ -+ struct mali_pp_job *job = mali_scheduler_job_pp_virtual_peek(); -+ -+ MALI_DEBUG_ASSERT(MALI_TRUE == mali_pp_job_is_virtual(job)); ++ addr += nr >> 5; /* find the correct word */ ++ nr = nr & ((1 << 5) - 1); /* The bit number within the word */ + -+ if (NULL != job) { -+ MALI_DEBUG_ASSERT(0 == -+ mali_pp_job_get_first_unstarted_sub_job(job)); -+ MALI_DEBUG_ASSERT(1 == -+ mali_pp_job_get_sub_job_count(job)); ++ return _mali_internal_test_bit(nr, *addr); ++} + -+ mali_pp_job_mark_sub_job_started(job, 0); ++/* Return maxbit if not found */ ++/** @brief Find the first zero bit in a sequence of 32-bit words ++ * @param addr starting point for search. ++ * @param maxbit the maximum number of bits to search ++ * @return the number of the first zero bit found, or maxbit if none were found ++ * in the specified range. ++ */ ++MALI_STATIC_INLINE u32 _mali_osk_find_first_zero_bit(const u32 *addr, u32 maxbit) ++{ ++ u32 total; + -+ mali_pp_job_list_remove(job); ++ for (total = 0; total < maxbit; total += 32, ++addr) { ++ int result; ++ result = _mali_internal_find_first_zero_bit(*addr); + -+ job_queue_pp.depth--; ++ /* non-negative signifies the bit was found */ ++ if (result >= 0) { ++ total += (u32)result; ++ break; ++ } ++ } + -+ /* -+ * Job about to start so it is no longer be -+ * possible to discard WB -+ */ -+ mali_pp_job_fb_lookup_remove(job); ++ /* Now check if we reached maxbit or above */ ++ if (total >= maxbit) { ++ total = maxbit; + } + -+ return job; ++ return total; /* either the found bit nr, or maxbit if not found */ +} ++/** @} */ /* end group _mali_osk_bitops */ + -+mali_scheduler_mask mali_scheduler_activate_gp_job(struct mali_gp_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ -+ MALI_DEBUG_PRINT(4, ("Mali GP scheduler: Timeline activation for job %u (0x%08X).\n", -+ mali_gp_job_get_id(job), job)); -+ -+ mali_scheduler_lock(); -+ -+ if (!mali_scheduler_queue_gp_job(job)) { -+ /* Failed to enqueue job, release job (with error) */ ++#ifdef __cplusplus ++} ++#endif + -+ mali_scheduler_unlock(); ++#endif /* __MALI_OSK_BITOPS_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_osk_list.h b/drivers/gpu/arm/mali400/mali/common/mali_osk_list.h +new file mode 100644 +index 000000000..9af2d7d4d +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_osk_list.h +@@ -0,0 +1,273 @@ ++/* ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ mali_timeline_tracker_release(mali_gp_job_get_tracker(job)); -+ mali_gp_job_signal_pp_tracker(job, MALI_FALSE); ++/** ++ * @file mali_osk_list.h ++ * Implementation of the OS abstraction layer for the kernel device driver ++ */ + -+ /* This will notify user space and close the job object */ -+ mali_scheduler_complete_gp_job(job, MALI_FALSE, -+ MALI_TRUE, MALI_FALSE); ++#ifndef __MALI_OSK_LIST_H__ ++#define __MALI_OSK_LIST_H__ + -+ return MALI_SCHEDULER_MASK_EMPTY; -+ } ++#include "mali_osk.h" ++#include "mali_kernel_common.h" + -+ mali_scheduler_unlock(); ++#ifdef __cplusplus ++extern "C" { ++#endif + -+ return MALI_SCHEDULER_MASK_GP; ++MALI_STATIC_INLINE void __mali_osk_list_add(_mali_osk_list_t *new_entry, _mali_osk_list_t *prev, _mali_osk_list_t *next) ++{ ++ next->prev = new_entry; ++ new_entry->next = next; ++ new_entry->prev = prev; ++ prev->next = new_entry; +} + -+mali_scheduler_mask mali_scheduler_activate_pp_job(struct mali_pp_job *job) ++MALI_STATIC_INLINE void __mali_osk_list_del(_mali_osk_list_t *prev, _mali_osk_list_t *next) +{ -+ MALI_DEBUG_ASSERT_POINTER(job); ++ next->prev = prev; ++ prev->next = next; ++} + -+ MALI_DEBUG_PRINT(4, ("Mali PP scheduler: Timeline activation for job %u (0x%08X).\n", -+ mali_pp_job_get_id(job), job)); -+ -+ if (MALI_TRUE == mali_timeline_tracker_activation_error( -+ mali_pp_job_get_tracker(job))) { -+ MALI_DEBUG_PRINT(3, ("Mali PP scheduler: Job %u (0x%08X) activated with error, aborting.\n", -+ mali_pp_job_get_id(job), job)); ++/** @addtogroup _mali_osk_list OSK Doubly-Linked Circular Lists ++ * @{ */ + -+ mali_scheduler_lock(); -+ mali_pp_job_fb_lookup_remove(job); -+ mali_pp_job_mark_unstarted_failed(job); -+ mali_scheduler_unlock(); ++/** Reference implementations of Doubly-linked Circular Lists are provided. ++ * There is often no need to re-implement these. ++ * ++ * @note The implementation may differ subtly from any lists the OS provides. ++ * For this reason, these lists should not be mixed with OS-specific lists ++ * inside the OSK/UKK implementation. */ + -+ mali_timeline_tracker_release(mali_pp_job_get_tracker(job)); ++/** @brief Initialize a list to be a head of an empty list ++ * @param exp the list to initialize. */ ++#define _MALI_OSK_INIT_LIST_HEAD(exp) _mali_osk_list_init(exp) + -+ /* This will notify user space and close the job object */ -+ mali_scheduler_complete_pp_job(job, 0, MALI_TRUE, MALI_FALSE); ++/** @brief Define a list variable, which is uninitialized. ++ * @param exp the name of the variable that the list will be defined as. */ ++#define _MALI_OSK_LIST_HEAD(exp) _mali_osk_list_t exp + -+ return MALI_SCHEDULER_MASK_EMPTY; -+ } ++/** @brief Define a list variable, which is initialized. ++ * @param exp the name of the variable that the list will be defined as. */ ++#define _MALI_OSK_LIST_HEAD_STATIC_INIT(exp) _mali_osk_list_t exp = { &exp, &exp } + -+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) -+ if (mali_pp_job_needs_dma_buf_mapping(job)) { -+ mali_scheduler_deferred_pp_job_queue(job); -+ return MALI_SCHEDULER_MASK_EMPTY; -+ } -+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */ ++/** @brief Initialize a list element. ++ * ++ * All list elements must be initialized before use. ++ * ++ * Do not use on any list element that is present in a list without using ++ * _mali_osk_list_del first, otherwise this will break the list. ++ * ++ * @param list the list element to initialize ++ */ ++MALI_STATIC_INLINE void _mali_osk_list_init(_mali_osk_list_t *list) ++{ ++ list->next = list; ++ list->prev = list; ++} + -+ mali_scheduler_lock(); ++/** @brief Insert a single list element after an entry in a list ++ * ++ * As an example, if this is inserted to the head of a list, then this becomes ++ * the first element of the list. ++ * ++ * Do not use to move list elements from one list to another, as it will break ++ * the originating list. ++ * ++ * ++ * @param newlist the list element to insert ++ * @param list the list in which to insert. The new element will be the next ++ * entry in this list ++ */ ++MALI_STATIC_INLINE void _mali_osk_list_add(_mali_osk_list_t *new_entry, _mali_osk_list_t *list) ++{ ++ __mali_osk_list_add(new_entry, list, list->next); ++} + -+ if (!mali_scheduler_queue_pp_job(job)) { -+ /* Failed to enqueue job, release job (with error) */ -+ mali_pp_job_fb_lookup_remove(job); -+ mali_pp_job_mark_unstarted_failed(job); -+ mali_scheduler_unlock(); ++/** @brief Insert a single list element before an entry in a list ++ * ++ * As an example, if this is inserted to the head of a list, then this becomes ++ * the last element of the list. ++ * ++ * Do not use to move list elements from one list to another, as it will break ++ * the originating list. ++ * ++ * @param newlist the list element to insert ++ * @param list the list in which to insert. The new element will be the previous ++ * entry in this list ++ */ ++MALI_STATIC_INLINE void _mali_osk_list_addtail(_mali_osk_list_t *new_entry, _mali_osk_list_t *list) ++{ ++ __mali_osk_list_add(new_entry, list->prev, list); ++} + -+ mali_timeline_tracker_release(mali_pp_job_get_tracker(job)); ++/** @brief Remove a single element from a list ++ * ++ * The element will no longer be present in the list. The removed list element ++ * will be uninitialized, and so should not be traversed. It must be ++ * initialized before further use. ++ * ++ * @param list the list element to remove. ++ */ ++MALI_STATIC_INLINE void _mali_osk_list_del(_mali_osk_list_t *list) ++{ ++ __mali_osk_list_del(list->prev, list->next); ++} + -+ /* This will notify user space and close the job object */ -+ mali_scheduler_complete_pp_job(job, 0, MALI_TRUE, MALI_FALSE); ++/** @brief Remove a single element from a list, and re-initialize it ++ * ++ * The element will no longer be present in the list. The removed list element ++ * will initialized, and so can be used as normal. ++ * ++ * @param list the list element to remove and initialize. ++ */ ++MALI_STATIC_INLINE void _mali_osk_list_delinit(_mali_osk_list_t *list) ++{ ++ __mali_osk_list_del(list->prev, list->next); ++ _mali_osk_list_init(list); ++} + -+ return MALI_SCHEDULER_MASK_EMPTY; -+ } ++/** @brief Determine whether a list is empty. ++ * ++ * An empty list is one that contains a single element that points to itself. ++ * ++ * @param list the list to check. ++ * @return non-zero if the list is empty, and zero otherwise. ++ */ ++MALI_STATIC_INLINE mali_bool _mali_osk_list_empty(_mali_osk_list_t *list) ++{ ++ return list->next == list; ++} + -+ mali_scheduler_unlock(); -+ return MALI_SCHEDULER_MASK_PP; ++/** @brief Move a list element from one list to another. ++ * ++ * The list element must be initialized. ++ * ++ * As an example, moving a list item to the head of a new list causes this item ++ * to be the first element in the new list. ++ * ++ * @param move the list element to move ++ * @param list the new list into which the element will be inserted, as the next ++ * element in the list. ++ */ ++MALI_STATIC_INLINE void _mali_osk_list_move(_mali_osk_list_t *move_entry, _mali_osk_list_t *list) ++{ ++ __mali_osk_list_del(move_entry->prev, move_entry->next); ++ _mali_osk_list_add(move_entry, list); +} + -+void mali_scheduler_complete_gp_job(struct mali_gp_job *job, -+ mali_bool success, -+ mali_bool user_notification, -+ mali_bool dequeued) ++/** @brief Move an entire list ++ * ++ * The list element must be initialized. ++ * ++ * Allows you to move a list from one list head to another list head ++ * ++ * @param old_list The existing list head ++ * @param new_list The new list head (must be an empty list) ++ */ ++MALI_STATIC_INLINE void _mali_osk_list_move_list(_mali_osk_list_t *old_list, _mali_osk_list_t *new_list) +{ -+ if (user_notification) { -+ mali_scheduler_return_gp_job_to_user(job, success); ++ MALI_DEBUG_ASSERT(_mali_osk_list_empty(new_list)); ++ if (!_mali_osk_list_empty(old_list)) { ++ new_list->next = old_list->next; ++ new_list->prev = old_list->prev; ++ new_list->next->prev = new_list; ++ new_list->prev->next = new_list; ++ old_list->next = old_list; ++ old_list->prev = old_list; + } ++} + -+ if (dequeued) { -+ _mali_osk_pm_dev_ref_put(); ++/** @brief Find the containing structure of a list ++ * ++ * When traversing a list, this is used to recover the containing structure, ++ * given that is contains a _mali_osk_list_t member. ++ * ++ * Each list must be of structures of one type, and must link the same members ++ * together, otherwise it will not be possible to correctly recover the ++ * sturctures that the lists link. ++ * ++ * @note no type or memory checking occurs to ensure that a structure does in ++ * fact exist for the list entry, and that it is being recovered with respect ++ * to the correct list member. ++ * ++ * @param ptr the pointer to the _mali_osk_list_t member in this structure ++ * @param type the type of the structure that contains the member ++ * @param member the member of the structure that ptr points to. ++ * @return a pointer to a \a type object which contains the _mali_osk_list_t ++ * \a member, as pointed to by the _mali_osk_list_t \a *ptr. ++ */ ++#define _MALI_OSK_LIST_ENTRY(ptr, type, member) \ ++ _MALI_OSK_CONTAINER_OF(ptr, type, member) + -+ if (mali_utilization_enabled()) { -+ mali_utilization_gp_end(); -+ } -+ mali_pm_record_gpu_idle(MALI_TRUE); -+ } ++/** @brief Enumerate a list safely ++ * ++ * With this macro, lists can be enumerated in a 'safe' manner. That is, ++ * entries can be deleted from the list without causing an error during ++ * enumeration. To achieve this, a 'temporary' pointer is required, which must ++ * be provided to the macro. ++ * ++ * Use it like a 'for()', 'while()' or 'do()' construct, and so it must be ++ * followed by a statement or compound-statement which will be executed for ++ * each list entry. ++ * ++ * Upon loop completion, providing that an early out was not taken in the ++ * loop body, then it is guaranteed that ptr->member == list, even if the loop ++ * body never executed. ++ * ++ * @param ptr a pointer to an object of type 'type', which points to the ++ * structure that contains the currently enumerated list entry. ++ * @param tmp a pointer to an object of type 'type', which must not be used ++ * inside the list-execution statement. ++ * @param list a pointer to a _mali_osk_list_t, from which enumeration will ++ * begin ++ * @param type the type of the structure that contains the _mali_osk_list_t ++ * member that is part of the list to be enumerated. ++ * @param member the _mali_osk_list_t member of the structure that is part of ++ * the list to be enumerated. ++ */ ++#define _MALI_OSK_LIST_FOREACHENTRY(ptr, tmp, list, type, member) \ ++ for (ptr = _MALI_OSK_LIST_ENTRY((list)->next, type, member), \ ++ tmp = _MALI_OSK_LIST_ENTRY(ptr->member.next, type, member); \ ++ &ptr->member != (list); \ ++ ptr = tmp, \ ++ tmp = _MALI_OSK_LIST_ENTRY(tmp->member.next, type, member)) + -+ mali_gp_job_delete(job); -+} ++/** @brief Enumerate a list in reverse order safely ++ * ++ * This macro is identical to @ref _MALI_OSK_LIST_FOREACHENTRY, except that ++ * entries are enumerated in reverse order. ++ * ++ * @param ptr a pointer to an object of type 'type', which points to the ++ * structure that contains the currently enumerated list entry. ++ * @param tmp a pointer to an object of type 'type', which must not be used ++ * inside the list-execution statement. ++ * @param list a pointer to a _mali_osk_list_t, from which enumeration will ++ * begin ++ * @param type the type of the structure that contains the _mali_osk_list_t ++ * member that is part of the list to be enumerated. ++ * @param member the _mali_osk_list_t member of the structure that is part of ++ * the list to be enumerated. ++ */ ++#define _MALI_OSK_LIST_FOREACHENTRY_REVERSE(ptr, tmp, list, type, member) \ ++ for (ptr = _MALI_OSK_LIST_ENTRY((list)->prev, type, member), \ ++ tmp = _MALI_OSK_LIST_ENTRY(ptr->member.prev, type, member); \ ++ &ptr->member != (list); \ ++ ptr = tmp, \ ++ tmp = _MALI_OSK_LIST_ENTRY(tmp->member.prev, type, member)) + -+void mali_scheduler_complete_pp_job(struct mali_pp_job *job, -+ u32 num_cores_in_virtual, -+ mali_bool user_notification, -+ mali_bool dequeued) -+{ -+ job->user_notification = user_notification; -+ job->num_pp_cores_in_virtual = num_cores_in_virtual; ++/** @} */ /* end group _mali_osk_list */ + -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) -+ if (NULL != job->rendered_dma_fence) -+ mali_dma_fence_signal_and_put(&job->rendered_dma_fence); ++#ifdef __cplusplus ++} +#endif + -+ if (dequeued) { -+#if defined(CONFIG_MALI_DVFS) -+ if (mali_pp_job_is_window_surface(job)) { -+ struct mali_session_data *session; -+ session = mali_pp_job_get_session(job); -+ mali_session_inc_num_window_jobs(session); -+ } -+#endif -+ _mali_osk_pm_dev_ref_put(); ++#endif /* __MALI_OSK_LIST_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_osk_mali.h b/drivers/gpu/arm/mali400/mali/common/mali_osk_mali.h +new file mode 100644 +index 000000000..ebcc277fa +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_osk_mali.h +@@ -0,0 +1,157 @@ ++/* ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ if (mali_utilization_enabled()) { -+ mali_utilization_pp_end(); -+ } -+ mali_pm_record_gpu_idle(MALI_FALSE); -+ } ++/** ++ * @file mali_osk_mali.h ++ * Defines the OS abstraction layer which is specific for the Mali kernel device driver (OSK) ++ */ + -+ /* With ZRAM feature enabled, all pp jobs will be force to use deferred delete. */ -+ mali_scheduler_deferred_pp_job_delete(job); -+} ++#ifndef __MALI_OSK_MALI_H__ ++#define __MALI_OSK_MALI_H__ + -+void mali_scheduler_abort_session(struct mali_session_data *session) -+{ -+ struct mali_gp_job *gp_job; -+ struct mali_gp_job *gp_tmp; -+ struct mali_pp_job *pp_job; -+ struct mali_pp_job *pp_tmp; -+ _MALI_OSK_LIST_HEAD_STATIC_INIT(removed_jobs_gp); -+ _MALI_OSK_LIST_HEAD_STATIC_INIT(removed_jobs_pp); ++#include ++#include ++#include + -+ MALI_DEBUG_ASSERT_POINTER(session); -+ MALI_DEBUG_ASSERT(session->is_aborting); ++#ifdef __cplusplus ++extern "C" { ++#endif + -+ MALI_DEBUG_PRINT(3, ("Mali scheduler: Aborting all queued jobs from session 0x%08X.\n", -+ session)); ++#ifdef CONFIG_MALI_DEVFREQ ++struct mali_device { ++ struct device *dev; ++#ifdef CONFIG_HAVE_CLK ++ struct clk *clock; ++ struct clk_bulk_data *clks; ++ int num_clks; ++#endif ++#ifdef CONFIG_REGULATOR ++ struct regulator *regulator; ++ struct opp_table *opp_table; ++#endif ++#ifdef CONFIG_PM_DEVFREQ ++ struct devfreq_dev_profile devfreq_profile; ++ struct devfreq *devfreq; ++ unsigned long current_freq; ++ unsigned long current_voltage; ++ struct monitor_dev_info *mdev_info; ++ struct rockchip_opp_info opp_info; ++#ifdef CONFIG_DEVFREQ_THERMAL ++ struct thermal_cooling_device *devfreq_cooling; ++#endif ++#endif ++ struct mali_pm_metrics_data mali_metrics; ++}; ++#endif + -+ mali_scheduler_lock(); ++/** @addtogroup _mali_osk_miscellaneous ++ * @{ */ + -+ /* Remove from GP normal priority queue */ -+ _MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &job_queue_gp.normal_pri, -+ struct mali_gp_job, list) { -+ if (mali_gp_job_get_session(gp_job) == session) { -+ mali_gp_job_list_move(gp_job, &removed_jobs_gp); -+ job_queue_gp.depth--; -+ job_queue_gp.big_job_num -= gp_job->big_job ? 1 : 0; -+ } -+ } ++/** @brief Struct with device specific configuration data ++ */ ++typedef struct mali_gpu_device_data _mali_osk_device_data; + -+ /* Remove from GP high priority queue */ -+ _MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &job_queue_gp.high_pri, -+ struct mali_gp_job, list) { -+ if (mali_gp_job_get_session(gp_job) == session) { -+ mali_gp_job_list_move(gp_job, &removed_jobs_gp); -+ job_queue_gp.depth--; -+ job_queue_gp.big_job_num -= gp_job->big_job ? 1 : 0; -+ } -+ } ++#ifdef CONFIG_MALI_DT ++/** @brief Initialize those device resources when we use device tree ++ * ++ * @return _MALI_OSK_ERR_OK on success, otherwise failure. ++ */ ++_mali_osk_errcode_t _mali_osk_resource_initialize(void); ++#endif + -+ /* Remove from PP normal priority queue */ -+ _MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp, -+ &job_queue_pp.normal_pri, -+ struct mali_pp_job, list) { -+ if (mali_pp_job_get_session(pp_job) == session) { -+ mali_pp_job_fb_lookup_remove(pp_job); ++/** @brief Find Mali GPU HW resource ++ * ++ * @param addr Address of Mali GPU resource to find ++ * @param res Storage for resource information if resource is found. ++ * @return _MALI_OSK_ERR_OK on success, _MALI_OSK_ERR_ITEM_NOT_FOUND if resource is not found ++ */ ++_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res); + -+ job_queue_pp.depth -= -+ mali_pp_job_unstarted_sub_job_count( -+ pp_job); -+ mali_pp_job_mark_unstarted_failed(pp_job); + -+ if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(pp_job)) { -+ if (mali_pp_job_is_complete(pp_job)) { -+ mali_pp_job_list_move(pp_job, -+ &removed_jobs_pp); -+ } else { -+ mali_pp_job_list_remove(pp_job); -+ } -+ } -+ } -+ } ++/** @brief Find Mali GPU HW base address ++ * ++ * @return 0 if resources are found, otherwise the Mali GPU component with lowest address. ++ */ ++uintptr_t _mali_osk_resource_base_address(void); + -+ /* Remove from PP high priority queue */ -+ _MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp, -+ &job_queue_pp.high_pri, -+ struct mali_pp_job, list) { -+ if (mali_pp_job_get_session(pp_job) == session) { -+ mali_pp_job_fb_lookup_remove(pp_job); ++/** @brief Find the specific GPU resource. ++ * ++ * @return value ++ * 0x400 if Mali 400 specific GPU resource identified ++ * 0x450 if Mali 450 specific GPU resource identified ++ * 0x470 if Mali 470 specific GPU resource identified ++ * ++ */ ++u32 _mali_osk_identify_gpu_resource(void); + -+ job_queue_pp.depth -= -+ mali_pp_job_unstarted_sub_job_count( -+ pp_job); -+ mali_pp_job_mark_unstarted_failed(pp_job); ++/** @brief Retrieve the Mali GPU specific data ++ * ++ * @return _MALI_OSK_ERR_OK on success, otherwise failure. ++ */ ++_mali_osk_errcode_t _mali_osk_device_data_get(_mali_osk_device_data *data); + -+ if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(pp_job)) { -+ if (mali_pp_job_is_complete(pp_job)) { -+ mali_pp_job_list_move(pp_job, -+ &removed_jobs_pp); -+ } else { -+ mali_pp_job_list_remove(pp_job); -+ } -+ } -+ } -+ } ++/** @brief Find the pmu domain config from device data. ++ * ++ * @param domain_config_array used to store pmu domain config found in device data. ++ * @param array_size is the size of array domain_config_array. ++ */ ++void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size); + -+ /* -+ * Release scheduler lock so we can release trackers -+ * (which will potentially queue new jobs) -+ */ -+ mali_scheduler_unlock(); ++/** @brief Get Mali PMU switch delay ++ * ++ *@return pmu switch delay if it is configured ++ */ ++u32 _mali_osk_get_pmu_switch_delay(void); + -+ /* Release and complete all (non-running) found GP jobs */ -+ _MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &removed_jobs_gp, -+ struct mali_gp_job, list) { -+ mali_timeline_tracker_release(mali_gp_job_get_tracker(gp_job)); -+ mali_gp_job_signal_pp_tracker(gp_job, MALI_FALSE); -+ _mali_osk_list_delinit(&gp_job->list); -+ mali_scheduler_complete_gp_job(gp_job, -+ MALI_FALSE, MALI_FALSE, MALI_TRUE); -+ } ++/** @brief Determines if Mali GPU has been configured with shared interrupts. ++ * ++ * @return MALI_TRUE if shared interrupts, MALI_FALSE if not. ++ */ ++mali_bool _mali_osk_shared_interrupts(void); + -+ /* Release and complete non-running PP jobs */ -+ _MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp, &removed_jobs_pp, -+ struct mali_pp_job, list) { -+ mali_timeline_tracker_release(mali_pp_job_get_tracker(pp_job)); -+ _mali_osk_list_delinit(&pp_job->list); -+ mali_scheduler_complete_pp_job(pp_job, 0, -+ MALI_FALSE, MALI_TRUE); -+ } -+} ++/** @brief Initialize the gpu secure mode. ++ * The gpu secure mode will initially be in a disabled state. ++ * @return _MALI_OSK_ERR_OK on success, otherwise failure. ++ */ ++_mali_osk_errcode_t _mali_osk_gpu_secure_mode_init(void); + -+_mali_osk_errcode_t _mali_ukk_gp_start_job(void *ctx, -+ _mali_uk_gp_start_job_s *uargs) -+{ -+ struct mali_session_data *session; -+ struct mali_gp_job *job; -+ mali_timeline_point point; -+ u32 __user *point_ptr = NULL; ++/** @brief Deinitialize the gpu secure mode. ++ * @return _MALI_OSK_ERR_OK on success, otherwise failure. ++ */ ++_mali_osk_errcode_t _mali_osk_gpu_secure_mode_deinit(void); + -+ MALI_DEBUG_ASSERT_POINTER(uargs); -+ MALI_DEBUG_ASSERT_POINTER(ctx); ++/** @brief Reset GPU and enable the gpu secure mode. ++ * @return _MALI_OSK_ERR_OK on success, otherwise failure. ++ */ ++_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_enable(void); + -+ session = (struct mali_session_data *)(uintptr_t)ctx; ++/** @brief Reset GPU and disable the gpu secure mode. ++ * @return _MALI_OSK_ERR_OK on success, otherwise failure. ++ */ ++_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_disable(void); + -+ job = mali_gp_job_create(session, uargs, mali_scheduler_get_new_id(), -+ NULL); -+ if (NULL == job) { -+ MALI_PRINT_ERROR(("Failed to create GP job.\n")); -+ return _MALI_OSK_ERR_NOMEM; -+ } ++/** @brief Check if the gpu secure mode has been enabled. ++ * @return MALI_TRUE if enabled, otherwise MALI_FALSE. ++ */ ++mali_bool _mali_osk_gpu_secure_mode_is_enabled(void); + -+ point_ptr = (u32 __user *)(uintptr_t)mali_gp_job_get_timeline_point_ptr(job); ++/** @brief Check if the gpu secure mode is supported. ++ * @return MALI_TRUE if supported, otherwise MALI_FALSE. ++ */ ++mali_bool _mali_osk_gpu_secure_mode_is_supported(void); + -+ point = mali_scheduler_submit_gp_job(session, job); + -+ if (0 != _mali_osk_put_user(((u32) point), point_ptr)) { -+ /* -+ * Let user space know that something failed -+ * after the job was started. -+ */ -+ return _MALI_OSK_ERR_ITEM_NOT_FOUND; -+ } ++/** @} */ /* end group _mali_osk_miscellaneous */ + -+ return _MALI_OSK_ERR_OK; ++#ifdef __cplusplus +} ++#endif + -+_mali_osk_errcode_t _mali_ukk_pp_start_job(void *ctx, -+ _mali_uk_pp_start_job_s *uargs) -+{ -+ _mali_osk_errcode_t ret; -+ struct mali_session_data *session; -+ struct mali_pp_job *job; -+ mali_timeline_point point; -+ u32 __user *point_ptr = NULL; ++#endif /* __MALI_OSK_MALI_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_osk_profiling.h b/drivers/gpu/arm/mali400/mali/common/mali_osk_profiling.h +new file mode 100644 +index 000000000..6e4583db1 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_osk_profiling.h +@@ -0,0 +1,146 @@ ++/* ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ MALI_DEBUG_ASSERT_POINTER(uargs); -+ MALI_DEBUG_ASSERT_POINTER(ctx); ++#ifndef __MALI_OSK_PROFILING_H__ ++#define __MALI_OSK_PROFILING_H__ + -+ session = (struct mali_session_data *)(uintptr_t)ctx; ++#if defined(CONFIG_MALI400_PROFILING) && defined (CONFIG_TRACEPOINTS) + -+ job = mali_pp_job_create(session, uargs, mali_scheduler_get_new_id()); -+ if (NULL == job) { -+ MALI_PRINT_ERROR(("Failed to create PP job.\n")); -+ return _MALI_OSK_ERR_NOMEM; -+ } ++#include "mali_linux_trace.h" ++#include "mali_profiling_events.h" ++#include "mali_profiling_gator_api.h" + -+ point_ptr = (u32 __user *)(uintptr_t)mali_pp_job_get_timeline_point_ptr(job); ++#define MALI_PROFILING_MAX_BUFFER_ENTRIES 1048576 + -+ /* Submit PP job. */ -+ ret = mali_scheduler_submit_pp_job(session, job, &point); -+ job = NULL; ++#define MALI_PROFILING_NO_HW_COUNTER = ((u32)-1) + -+ if (_MALI_OSK_ERR_OK == ret) { -+ if (0 != _mali_osk_put_user(((u32) point), point_ptr)) { -+ /* -+ * Let user space know that something failed -+ * after the jobs were started. -+ */ -+ return _MALI_OSK_ERR_ITEM_NOT_FOUND; -+ } -+ } ++/** @defgroup _mali_osk_profiling External profiling connectivity ++ * @{ */ + -+ return ret; -+} ++/** ++ * Initialize the profiling module. ++ * @return _MALI_OSK_ERR_OK on success, otherwise failure. ++ */ ++_mali_osk_errcode_t _mali_osk_profiling_init(mali_bool auto_start); + -+_mali_osk_errcode_t _mali_ukk_pp_and_gp_start_job(void *ctx, -+ _mali_uk_pp_and_gp_start_job_s *uargs) -+{ -+ _mali_osk_errcode_t ret; -+ struct mali_session_data *session; -+ _mali_uk_pp_and_gp_start_job_s kargs; -+ struct mali_pp_job *pp_job; -+ struct mali_gp_job *gp_job; -+ u32 __user *point_ptr = NULL; -+ mali_timeline_point point; -+ _mali_uk_pp_start_job_s __user *pp_args; -+ _mali_uk_gp_start_job_s __user *gp_args; ++/* ++ * Terminate the profiling module. ++ */ ++void _mali_osk_profiling_term(void); + -+ MALI_DEBUG_ASSERT_POINTER(ctx); -+ MALI_DEBUG_ASSERT_POINTER(uargs); ++/** ++ * Stop the profile sampling operation. ++ */ ++void _mali_osk_profiling_stop_sampling(u32 pid); + -+ session = (struct mali_session_data *) ctx; ++/** ++ * Start recording profiling data ++ * ++ * The specified limit will determine how large the capture buffer is. ++ * MALI_PROFILING_MAX_BUFFER_ENTRIES determines the maximum size allowed by the device driver. ++ * ++ * @param limit The desired maximum number of events to record on input, the actual maximum on output. ++ * @return _MALI_OSK_ERR_OK on success, otherwise failure. ++ */ ++_mali_osk_errcode_t _mali_osk_profiling_start(u32 *limit); + -+ if (0 != _mali_osk_copy_from_user(&kargs, uargs, -+ sizeof(_mali_uk_pp_and_gp_start_job_s))) { -+ return _MALI_OSK_ERR_NOMEM; -+ } ++/** ++ * Add an profiling event ++ * ++ * @param event_id The event identificator. ++ * @param data0 First data parameter, depending on event_id specified. ++ * @param data1 Second data parameter, depending on event_id specified. ++ * @param data2 Third data parameter, depending on event_id specified. ++ * @param data3 Fourth data parameter, depending on event_id specified. ++ * @param data4 Fifth data parameter, depending on event_id specified. ++ */ ++void _mali_osk_profiling_add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4); + -+ pp_args = (_mali_uk_pp_start_job_s __user *)(uintptr_t)kargs.pp_args; -+ gp_args = (_mali_uk_gp_start_job_s __user *)(uintptr_t)kargs.gp_args; ++/** ++ * Report a hardware counter event. ++ * ++ * @param counter_id The ID of the counter. ++ * @param value The value of the counter. ++ */ + -+ pp_job = mali_pp_job_create(session, pp_args, -+ mali_scheduler_get_new_id()); -+ if (NULL == pp_job) { -+ MALI_PRINT_ERROR(("Failed to create PP job.\n")); -+ return _MALI_OSK_ERR_NOMEM; -+ } ++/* Call Linux tracepoint directly */ ++#define _mali_osk_profiling_report_hw_counter(counter_id, value) trace_mali_hw_counter(counter_id, value) + -+ gp_job = mali_gp_job_create(session, gp_args, -+ mali_scheduler_get_new_id(), -+ mali_pp_job_get_tracker(pp_job)); -+ if (NULL == gp_job) { -+ MALI_PRINT_ERROR(("Failed to create GP job.\n")); -+ mali_pp_job_delete(pp_job); -+ return _MALI_OSK_ERR_NOMEM; -+ } ++/** ++ * Report SW counters ++ * ++ * @param counters array of counter values ++ */ ++void _mali_osk_profiling_report_sw_counters(u32 *counters); + -+ point_ptr = (u32 __user *)(uintptr_t)mali_pp_job_get_timeline_point_ptr(pp_job); ++void _mali_osk_profiling_record_global_counters(int counter_id, u32 value); + -+ /* Submit GP job. */ -+ mali_scheduler_submit_gp_job(session, gp_job); -+ gp_job = NULL; ++/** ++ * Stop recording profiling data ++ * ++ * @param count Returns the number of recorded events. ++ * @return _MALI_OSK_ERR_OK on success, otherwise failure. ++ */ ++_mali_osk_errcode_t _mali_osk_profiling_stop(u32 *count); + -+ /* Submit PP job. */ -+ ret = mali_scheduler_submit_pp_job(session, pp_job, &point); -+ pp_job = NULL; ++/** ++ * Retrieves the number of events that can be retrieved ++ * ++ * @return The number of recorded events that can be retrieved. ++ */ ++u32 _mali_osk_profiling_get_count(void); + -+ if (_MALI_OSK_ERR_OK == ret) { -+ if (0 != _mali_osk_put_user(((u32) point), point_ptr)) { -+ /* -+ * Let user space know that something failed -+ * after the jobs were started. -+ */ -+ return _MALI_OSK_ERR_ITEM_NOT_FOUND; -+ } -+ } ++/** ++ * Retrieve an event ++ * ++ * @param index Event index (start with 0 and continue until this function fails to retrieve all events) ++ * @param timestamp The timestamp for the retrieved event will be stored here. ++ * @param event_id The event ID for the retrieved event will be stored here. ++ * @param data The 5 data values for the retrieved event will be stored here. ++ * @return _MALI_OSK_ERR_OK on success, otherwise failure. ++ */ ++_mali_osk_errcode_t _mali_osk_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5]); + -+ return ret; -+} ++/** ++ * Clear the recorded buffer. ++ * ++ * This is needed in order to start another recording. ++ * ++ * @return _MALI_OSK_ERR_OK on success, otherwise failure. ++ */ ++_mali_osk_errcode_t _mali_osk_profiling_clear(void); + -+void _mali_ukk_pp_job_disable_wb(_mali_uk_pp_disable_wb_s *args) -+{ -+ struct mali_session_data *session; -+ struct mali_pp_job *job; -+ struct mali_pp_job *tmp; -+ u32 fb_lookup_id; ++/** ++ * Checks if a recording of profiling data is in progress ++ * ++ * @return MALI_TRUE if recording of profiling data is in progress, MALI_FALSE if not ++ */ ++mali_bool _mali_osk_profiling_is_recording(void); + -+ MALI_DEBUG_ASSERT_POINTER(args); -+ MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); ++/** ++ * Checks if profiling data is available for retrival ++ * ++ * @return MALI_TRUE if profiling data is avaiable, MALI_FALSE if not ++ */ ++mali_bool _mali_osk_profiling_have_recording(void); + -+ session = (struct mali_session_data *)(uintptr_t)args->ctx; ++/** @} */ /* end group _mali_osk_profiling */ + -+ fb_lookup_id = args->fb_id & MALI_PP_JOB_FB_LOOKUP_LIST_MASK; ++#else /* defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_TRACEPOINTS) */ + -+ mali_scheduler_lock(); ++/* Dummy add_event, for when profiling is disabled. */ + -+ /* Iterate over all jobs for given frame builder_id. */ -+ _MALI_OSK_LIST_FOREACHENTRY(job, tmp, -+ &session->pp_job_fb_lookup_list[fb_lookup_id], -+ struct mali_pp_job, session_fb_lookup_list) { -+ MALI_DEBUG_CODE(u32 disable_mask = 0); ++#define _mali_osk_profiling_add_event(event_id, data0, data1, data2, data3, data4) + -+ if (mali_pp_job_get_frame_builder_id(job) != -+ (u32) args->fb_id) { -+ MALI_DEBUG_PRINT(4, ("Mali PP scheduler: Disable WB mismatching FB.\n")); -+ continue; -+ } ++#endif /* defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_TRACEPOINTS) */ + -+ MALI_DEBUG_CODE(disable_mask |= 0xD << (4 * 3)); ++#endif /* __MALI_OSK_PROFILING_H__ */ + -+ if (mali_pp_job_get_wb0_source_addr(job) == args->wb0_memory) { -+ MALI_DEBUG_CODE(disable_mask |= 0x1 << (4 * 1)); -+ mali_pp_job_disable_wb0(job); -+ } + -+ if (mali_pp_job_get_wb1_source_addr(job) == args->wb1_memory) { -+ MALI_DEBUG_CODE(disable_mask |= 0x2 << (4 * 2)); -+ mali_pp_job_disable_wb1(job); -+ } +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_osk_types.h b/drivers/gpu/arm/mali400/mali/common/mali_osk_types.h +new file mode 100644 +index 000000000..b6fa94ce1 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_osk_types.h +@@ -0,0 +1,471 @@ ++/* ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ if (mali_pp_job_get_wb2_source_addr(job) == args->wb2_memory) { -+ MALI_DEBUG_CODE(disable_mask |= 0x3 << (4 * 3)); -+ mali_pp_job_disable_wb2(job); -+ } -+ MALI_DEBUG_PRINT(3, ("Mali PP scheduler: Disable WB: 0x%X.\n", -+ disable_mask)); -+ } ++/** ++ * @file mali_osk_types.h ++ * Defines types of the OS abstraction layer for the kernel device driver (OSK) ++ */ + -+ mali_scheduler_unlock(); -+} ++#ifndef __MALI_OSK_TYPES_H__ ++#define __MALI_OSK_TYPES_H__ + -+#if MALI_STATE_TRACKING -+u32 mali_scheduler_dump_state(char *buf, u32 size) -+{ -+ int n = 0; ++#ifdef __cplusplus ++extern "C" { ++#endif + -+ n += _mali_osk_snprintf(buf + n, size - n, "GP queues\n"); -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "\tQueue depth: %u\n", job_queue_gp.depth); -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "\tNormal priority queue is %s\n", -+ _mali_osk_list_empty(&job_queue_gp.normal_pri) ? -+ "empty" : "not empty"); -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "\tHigh priority queue is %s\n", -+ _mali_osk_list_empty(&job_queue_gp.high_pri) ? -+ "empty" : "not empty"); ++/** ++ * @addtogroup uddapi Unified Device Driver (UDD) APIs ++ * ++ * @{ ++ */ + -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "PP queues\n"); -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "\tQueue depth: %u\n", job_queue_pp.depth); -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "\tNormal priority queue is %s\n", -+ _mali_osk_list_empty(&job_queue_pp.normal_pri) -+ ? "empty" : "not empty"); -+ n += _mali_osk_snprintf(buf + n, size - n, -+ "\tHigh priority queue is %s\n", -+ _mali_osk_list_empty(&job_queue_pp.high_pri) -+ ? "empty" : "not empty"); ++/** ++ * @addtogroup oskapi UDD OS Abstraction for Kernel-side (OSK) APIs ++ * ++ * @{ ++ */ + -+ n += _mali_osk_snprintf(buf + n, size - n, "\n"); ++/** @defgroup _mali_osk_miscellaneous OSK Miscellaneous functions, constants and types ++ * @{ */ + -+ return n; -+} ++/* Define integer types used by OSK. Note: these currently clash with Linux so we only define them if not defined already */ ++#ifndef __KERNEL__ ++typedef unsigned char u8; ++typedef signed char s8; ++typedef unsigned short u16; ++typedef signed short s16; ++typedef unsigned int u32; ++typedef signed int s32; ++typedef unsigned long long u64; ++#define BITS_PER_LONG (sizeof(long)*8) ++#else ++/* Ensure Linux types u32, etc. are defined */ ++#include +#endif + -+/* -+ * ---------- Implementation of static functions ---------- ++/** @brief Mali Boolean type which uses MALI_TRUE and MALI_FALSE ++ */ ++typedef unsigned long mali_bool; ++ ++#ifndef MALI_TRUE ++#define MALI_TRUE ((mali_bool)1) ++#endif ++ ++#ifndef MALI_FALSE ++#define MALI_FALSE ((mali_bool)0) ++#endif ++ ++#define MALI_HW_CORE_NO_COUNTER ((u32)-1) ++ ++ ++#define MALI_S32_MAX 0x7fffffff ++ ++/** ++ * @brief OSK Error codes ++ * ++ * Each OS may use its own set of error codes, and may require that the ++ * User/Kernel interface take certain error code. This means that the common ++ * error codes need to be sufficiently rich to pass the correct error code ++ * thorugh from the OSK to U/K layer, across all OSs. ++ * ++ * The result is that some error codes will appear redundant on some OSs. ++ * Under all OSs, the OSK layer must translate native OS error codes to ++ * _mali_osk_errcode_t codes. Similarly, the U/K layer must translate from ++ * _mali_osk_errcode_t codes to native OS error codes. + */ ++typedef enum { ++ _MALI_OSK_ERR_OK = 0, /**< Success. */ ++ _MALI_OSK_ERR_FAULT = -1, /**< General non-success */ ++ _MALI_OSK_ERR_INVALID_FUNC = -2, /**< Invalid function requested through User/Kernel interface (e.g. bad IOCTL number) */ ++ _MALI_OSK_ERR_INVALID_ARGS = -3, /**< Invalid arguments passed through User/Kernel interface */ ++ _MALI_OSK_ERR_NOMEM = -4, /**< Insufficient memory */ ++ _MALI_OSK_ERR_TIMEOUT = -5, /**< Timeout occurred */ ++ _MALI_OSK_ERR_RESTARTSYSCALL = -6, /**< Special: On certain OSs, must report when an interruptable mutex is interrupted. Ignore otherwise. */ ++ _MALI_OSK_ERR_ITEM_NOT_FOUND = -7, /**< Table Lookup failed */ ++ _MALI_OSK_ERR_BUSY = -8, /**< Device/operation is busy. Try again later */ ++ _MALI_OSK_ERR_UNSUPPORTED = -9, /**< Optional part of the interface used, and is unsupported */ ++} _mali_osk_errcode_t; + -+static mali_timeline_point mali_scheduler_submit_gp_job( -+ struct mali_session_data *session, struct mali_gp_job *job) -+{ -+ mali_timeline_point point; ++/** @} */ /* end group _mali_osk_miscellaneous */ + -+ MALI_DEBUG_ASSERT_POINTER(session); -+ MALI_DEBUG_ASSERT_POINTER(job); ++/** @defgroup _mali_osk_wq OSK work queues ++ * @{ */ + -+ /* Add job to Timeline system. */ -+ point = mali_timeline_system_add_tracker(session->timeline_system, -+ mali_gp_job_get_tracker(job), MALI_TIMELINE_GP); ++/** @brief Private type for work objects */ ++typedef struct _mali_osk_wq_work_s _mali_osk_wq_work_t; ++typedef struct _mali_osk_wq_delayed_work_s _mali_osk_wq_delayed_work_t; + -+ return point; -+} ++/** @brief Work queue handler function ++ * ++ * This function type is called when the work is scheduled by the work queue, ++ * e.g. as an IRQ bottom-half handler. ++ * ++ * Refer to \ref _mali_osk_wq_schedule_work() for more information on the ++ * work-queue and work handlers. ++ * ++ * @param arg resource-specific data ++ */ ++typedef void (*_mali_osk_wq_work_handler_t)(void *arg); + -+static _mali_osk_errcode_t mali_scheduler_submit_pp_job( -+ struct mali_session_data *session, struct mali_pp_job *job, mali_timeline_point *point) ++/* @} */ /* end group _mali_osk_wq */ + -+{ -+ _mali_osk_errcode_t ret = _MALI_OSK_ERR_OK; ++/** @defgroup _mali_osk_irq OSK IRQ handling ++ * @{ */ + -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) -+ struct ww_acquire_ctx ww_actx; -+ u32 i; -+ u32 num_memory_cookies = 0; -+ struct reservation_object **reservation_object_list = NULL; -+ unsigned int num_reservation_object = 0; -+#endif ++/** @brief Private type for IRQ handling objects */ ++typedef struct _mali_osk_irq_t_struct _mali_osk_irq_t; + -+ MALI_DEBUG_ASSERT_POINTER(session); -+ MALI_DEBUG_ASSERT_POINTER(job); ++/** @brief Optional function to trigger an irq from a resource ++ * ++ * This function is implemented by the common layer to allow probing of a resource's IRQ. ++ * @param arg resource-specific data */ ++typedef void (*_mali_osk_irq_trigger_t)(void *arg); + -+ mali_scheduler_lock(); -+ /* -+ * Adding job to the lookup list used to quickly discard -+ * writeback units of queued jobs. -+ */ -+ mali_pp_job_fb_lookup_add(job); -+ mali_scheduler_unlock(); ++/** @brief Optional function to acknowledge an irq from a resource ++ * ++ * This function is implemented by the common layer to allow probing of a resource's IRQ. ++ * @param arg resource-specific data ++ * @return _MALI_OSK_ERR_OK if the IRQ was successful, or a suitable _mali_osk_errcode_t on failure. */ ++typedef _mali_osk_errcode_t (*_mali_osk_irq_ack_t)(void *arg); + -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) ++/** @brief IRQ 'upper-half' handler callback. ++ * ++ * This function is implemented by the common layer to do the initial handling of a ++ * resource's IRQ. This maps on to the concept of an ISR that does the minimum ++ * work necessary before handing off to an IST. ++ * ++ * The communication of the resource-specific data from the ISR to the IST is ++ * handled by the OSK implementation. ++ * ++ * On most systems, the IRQ upper-half handler executes in IRQ context. ++ * Therefore, the system may have restrictions about what can be done in this ++ * context ++ * ++ * If an IRQ upper-half handler requires more work to be done than can be ++ * acheived in an IRQ context, then it may defer the work with ++ * _mali_osk_wq_schedule_work(). Refer to \ref _mali_osk_wq_create_work() for ++ * more information. ++ * ++ * @param arg resource-specific data ++ * @return _MALI_OSK_ERR_OK if the IRQ was correctly handled, or a suitable ++ * _mali_osk_errcode_t otherwise. ++ */ ++typedef _mali_osk_errcode_t (*_mali_osk_irq_uhandler_t)(void *arg); + -+ /* Allocate the reservation_object_list to list the dma reservation object of dependent dma buffer */ -+ num_memory_cookies = mali_pp_job_num_memory_cookies(job); -+ if (0 < num_memory_cookies) { -+ reservation_object_list = kzalloc(sizeof(struct reservation_object *) * num_memory_cookies, GFP_KERNEL); -+ if (NULL == reservation_object_list) { -+ MALI_PRINT_ERROR(("Failed to alloc the reservation object list.\n")); -+ ret = _MALI_OSK_ERR_NOMEM; -+ goto failed_to_alloc_reservation_object_list; -+ } -+ } + -+ /* Add the dma reservation object into reservation_object_list*/ -+ for (i = 0; i < num_memory_cookies; i++) { -+ mali_mem_backend *mem_backend = NULL; -+ struct reservation_object *tmp_reservation_object = NULL; -+ u32 mali_addr = mali_pp_job_get_memory_cookie(job, i); ++/** @} */ /* end group _mali_osk_irq */ + -+ mem_backend = mali_mem_backend_struct_search(session, mali_addr); + -+ MALI_DEBUG_ASSERT_POINTER(mem_backend); ++/** @defgroup _mali_osk_atomic OSK Atomic counters ++ * @{ */ + -+ if (NULL == mem_backend) { -+ MALI_PRINT_ERROR(("Failed to find the memory backend for memory cookie[%d].\n", i)); -+ goto failed_to_find_mem_backend; -+ } ++/** @brief Public type of atomic counters ++ * ++ * This is public for allocation on stack. On systems that support it, this is just a single 32-bit value. ++ * On others, it could be encapsulating an object stored elsewhere. ++ * ++ * Regardless of implementation, the \ref _mali_osk_atomic functions \b must be used ++ * for all accesses to the variable's value, even if atomicity is not required. ++ * Do not access u.val or u.obj directly. ++ */ ++typedef struct { ++ union { ++ u32 val; ++ void *obj; ++ } u; ++} _mali_osk_atomic_t; ++/** @} */ /* end group _mali_osk_atomic */ + -+ if (MALI_MEM_DMA_BUF != mem_backend->type) -+ continue; + -+ tmp_reservation_object = mem_backend->dma_buf.attachment->buf->resv; ++/** @defgroup _mali_osk_lock OSK Mutual Exclusion Locks ++ * @{ */ + -+ if (NULL != tmp_reservation_object) { -+ mali_dma_fence_add_reservation_object_list(tmp_reservation_object, -+ reservation_object_list, &num_reservation_object); -+ } -+ } + -+ /* -+ * Add the mali dma fence callback to wait for all dependent dma buf, -+ * and extend the timeline system to support dma fence, -+ * then create the new internal dma fence to replace all last dma fence for dependent dma buf. -+ */ -+ if (0 < num_reservation_object) { -+ int error; -+ int num_dma_fence_waiter = 0; -+ /* Create one new dma fence.*/ -+ job->rendered_dma_fence = mali_dma_fence_new(job->session->fence_context, -+ _mali_osk_atomic_inc_return(&job->session->fence_seqno)); ++/** @brief OSK Mutual Exclusion Lock ordered list ++ * ++ * This lists the various types of locks in the system and is used to check ++ * that locks are taken in the correct order. ++ * ++ * - Holding more than one lock of the same order at the same time is not ++ * allowed. ++ * - Taking a lock of a lower order than the highest-order lock currently held ++ * is not allowed. ++ * ++ */ ++typedef enum { ++ /* || Locks || */ ++ /* || must be || */ ++ /* _||_ taken in _||_ */ ++ /* \ / this \ / */ ++ /* \/ order! \/ */ + -+ if (NULL == job->rendered_dma_fence) { -+ MALI_PRINT_ERROR(("Failed to creat one new dma fence.\n")); -+ ret = _MALI_OSK_ERR_FAULT; -+ goto failed_to_create_dma_fence; -+ } ++ _MALI_OSK_LOCK_ORDER_FIRST = 0, + -+ /* In order to avoid deadlock, wait/wound mutex lock to lock all dma buffers*/ ++ _MALI_OSK_LOCK_ORDER_SESSIONS, ++ _MALI_OSK_LOCK_ORDER_MEM_SESSION, ++ _MALI_OSK_LOCK_ORDER_MEM_INFO, ++ _MALI_OSK_LOCK_ORDER_MEM_PT_CACHE, ++ _MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP, ++ _MALI_OSK_LOCK_ORDER_PM_EXECUTION, ++ _MALI_OSK_LOCK_ORDER_EXECUTOR, ++ _MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM, ++ _MALI_OSK_LOCK_ORDER_SCHEDULER, ++ _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED, ++ _MALI_OSK_LOCK_ORDER_PROFILING, ++ _MALI_OSK_LOCK_ORDER_L2, ++ _MALI_OSK_LOCK_ORDER_L2_COMMAND, ++ _MALI_OSK_LOCK_ORDER_UTILIZATION, ++ _MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS, ++ _MALI_OSK_LOCK_ORDER_PM_STATE, + -+ error = mali_dma_fence_lock_reservation_object_list(reservation_object_list, -+ num_reservation_object, &ww_actx); ++ _MALI_OSK_LOCK_ORDER_LAST, ++} _mali_osk_lock_order_t; + -+ if (0 != error) { -+ MALI_PRINT_ERROR(("Failed to lock all reservation objects.\n")); -+ ret = _MALI_OSK_ERR_FAULT; -+ goto failed_to_lock_reservation_object_list; -+ } + -+ mali_dma_fence_context_init(&job->dma_fence_context, -+ mali_timeline_dma_fence_callback, (void *)job); ++/** @brief OSK Mutual Exclusion Lock flags type ++ * ++ * - Any lock can use the order parameter. ++ */ ++typedef enum { ++ _MALI_OSK_LOCKFLAG_UNORDERED = 0x1, /**< Indicate that the order of this lock should not be checked */ ++ _MALI_OSK_LOCKFLAG_ORDERED = 0x2, ++ /** @enum _mali_osk_lock_flags_t ++ * ++ * Flags from 0x10000--0x80000000 are RESERVED for User-mode */ + -+ /* Add dma fence waiters and dma fence callback. */ -+ for (i = 0; i < num_reservation_object; i++) { -+ ret = mali_dma_fence_context_add_waiters(&job->dma_fence_context, reservation_object_list[i]); -+ if (_MALI_OSK_ERR_OK != ret) { -+ MALI_PRINT_ERROR(("Failed to add waiter into mali dma fence context.\n")); -+ goto failed_to_add_dma_fence_waiter; -+ } -+ } ++} _mali_osk_lock_flags_t; + -+ for (i = 0; i < num_reservation_object; i++) { -+ reservation_object_add_excl_fence(reservation_object_list[i], job->rendered_dma_fence); -+ } ++/** @brief Mutual Exclusion Lock Mode Optimization hint ++ * ++ * The lock mode is used to implement the read/write locking of locks when we call ++ * functions _mali_osk_mutex_rw_init/wait/signal/term/. In this case, the RO mode can ++ * be used to allow multiple concurrent readers, but no writers. The RW mode is used for ++ * writers, and so will wait for all readers to release the lock (if any present). ++ * Further readers and writers will wait until the writer releases the lock. ++ * ++ * The mode is purely an optimization hint: for example, it is permissible for ++ * all locks to behave in RW mode, regardless of that supplied. ++ * ++ * It is an error to attempt to use locks in anything other that RW mode when ++ * call functions _mali_osk_mutex_rw_wait/signal(). ++ * ++ */ ++typedef enum { ++ _MALI_OSK_LOCKMODE_UNDEF = -1, /**< Undefined lock mode. For internal use only */ ++ _MALI_OSK_LOCKMODE_RW = 0x0, /**< Read-write mode, default. All readers and writers are mutually-exclusive */ ++ _MALI_OSK_LOCKMODE_RO, /**< Read-only mode, to support multiple concurrent readers, but mutual exclusion in the presence of writers. */ ++ /** @enum _mali_osk_lock_mode_t ++ * ++ * Lock modes 0x40--0x7F are RESERVED for User-mode */ ++} _mali_osk_lock_mode_t; + -+ num_dma_fence_waiter = job->dma_fence_context.num_dma_fence_waiter; ++/** @brief Private types for Mutual Exclusion lock objects */ ++typedef struct _mali_osk_lock_debug_s _mali_osk_lock_debug_t; ++typedef struct _mali_osk_spinlock_s _mali_osk_spinlock_t; ++typedef struct _mali_osk_spinlock_irq_s _mali_osk_spinlock_irq_t; ++typedef struct _mali_osk_mutex_s _mali_osk_mutex_t; ++typedef struct _mali_osk_mutex_rw_s _mali_osk_mutex_rw_t; + -+ /* Add job to Timeline system. */ -+ (*point) = mali_timeline_system_add_tracker(session->timeline_system, -+ mali_pp_job_get_tracker(job), MALI_TIMELINE_PP); ++/** @} */ /* end group _mali_osk_lock */ + -+ if (0 != num_dma_fence_waiter) { -+ mali_dma_fence_context_dec_count(&job->dma_fence_context); -+ } ++/** @defgroup _mali_osk_low_level_memory OSK Low-level Memory Operations ++ * @{ */ + -+ /* Unlock all wait/wound mutex lock. */ -+ mali_dma_fence_unlock_reservation_object_list(reservation_object_list, -+ num_reservation_object, &ww_actx); -+ } else { -+ /* Add job to Timeline system. */ -+ (*point) = mali_timeline_system_add_tracker(session->timeline_system, -+ mali_pp_job_get_tracker(job), MALI_TIMELINE_PP); -+ } ++/** ++ * @brief Private data type for use in IO accesses to/from devices. ++ * ++ * This represents some range that is accessible from the device. Examples ++ * include: ++ * - Device Registers, which could be readable and/or writeable. ++ * - Memory that the device has access to, for storing configuration structures. ++ * ++ * Access to this range must be made through the _mali_osk_mem_ioread32() and ++ * _mali_osk_mem_iowrite32() functions. ++ */ ++typedef struct _mali_io_address *mali_io_address; + -+ kfree(reservation_object_list); -+ return ret; -+#else -+ /* Add job to Timeline system. */ -+ (*point) = mali_timeline_system_add_tracker(session->timeline_system, -+ mali_pp_job_get_tracker(job), MALI_TIMELINE_PP); -+#endif ++/** @defgroup _MALI_OSK_CPU_PAGE CPU Physical page size macros. ++ * ++ * The order of the page size is supplied for ++ * ease of use by algorithms that might require it, since it is easier to know ++ * it ahead of time rather than calculating it. ++ * ++ * The Mali Page Mask macro masks off the lower bits of a physical address to ++ * give the start address of the page for that physical address. ++ * ++ * @note The Mali device driver code is designed for systems with 4KB page size. ++ * Changing these macros will not make the entire Mali device driver work with ++ * page sizes other than 4KB. ++ * ++ * @note The CPU Physical Page Size has been assumed to be the same as the Mali ++ * Physical Page Size. ++ * ++ * @{ ++ */ + -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) -+failed_to_add_dma_fence_waiter: -+ mali_dma_fence_context_term(&job->dma_fence_context); -+ mali_dma_fence_unlock_reservation_object_list(reservation_object_list, -+ num_reservation_object, &ww_actx); -+failed_to_lock_reservation_object_list: -+ mali_dma_fence_signal_and_put(&job->rendered_dma_fence); -+failed_to_create_dma_fence: -+failed_to_find_mem_backend: -+ if (NULL != reservation_object_list) -+ kfree(reservation_object_list); -+failed_to_alloc_reservation_object_list: -+ mali_pp_job_fb_lookup_remove(job); -+#endif -+ return ret; -+} ++/** CPU Page Order, as log to base 2 of the Page size. @see _MALI_OSK_CPU_PAGE_SIZE */ ++#define _MALI_OSK_CPU_PAGE_ORDER ((u32)12) ++/** CPU Page Size, in bytes. */ ++#define _MALI_OSK_CPU_PAGE_SIZE (((u32)1) << (_MALI_OSK_CPU_PAGE_ORDER)) ++/** CPU Page Mask, which masks off the offset within a page */ ++#define _MALI_OSK_CPU_PAGE_MASK (~((((u32)1) << (_MALI_OSK_CPU_PAGE_ORDER)) - ((u32)1))) ++/** @} */ /* end of group _MALI_OSK_CPU_PAGE */ + -+static mali_bool mali_scheduler_queue_gp_job(struct mali_gp_job *job) -+{ -+ struct mali_session_data *session; -+ _mali_osk_list_t *queue; ++/** @defgroup _MALI_OSK_MALI_PAGE Mali Physical Page size macros ++ * ++ * Mali Physical page size macros. The order of the page size is supplied for ++ * ease of use by algorithms that might require it, since it is easier to know ++ * it ahead of time rather than calculating it. ++ * ++ * The Mali Page Mask macro masks off the lower bits of a physical address to ++ * give the start address of the page for that physical address. ++ * ++ * @note The Mali device driver code is designed for systems with 4KB page size. ++ * Changing these macros will not make the entire Mali device driver work with ++ * page sizes other than 4KB. ++ * ++ * @note The Mali Physical Page Size has been assumed to be the same as the CPU ++ * Physical Page Size. ++ * ++ * @{ ++ */ + -+ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); -+ MALI_DEBUG_ASSERT_POINTER(job); ++/** Mali Page Order, as log to base 2 of the Page size. @see _MALI_OSK_MALI_PAGE_SIZE */ ++#define _MALI_OSK_MALI_PAGE_ORDER PAGE_SHIFT ++/** Mali Page Size, in bytes. */ ++#define _MALI_OSK_MALI_PAGE_SIZE PAGE_SIZE ++/** Mali Page Mask, which masks off the offset within a page */ ++#define _MALI_OSK_MALI_PAGE_MASK PAGE_MASK ++/** @} */ /* end of group _MALI_OSK_MALI_PAGE*/ + -+ session = mali_gp_job_get_session(job); -+ MALI_DEBUG_ASSERT_POINTER(session); ++/** @brief flags for mapping a user-accessible memory range ++ * ++ * Where a function with prefix '_mali_osk_mem_mapregion' accepts flags as one ++ * of the function parameters, it will use one of these. These allow per-page ++ * control over mappings. Compare with the mali_memory_allocation_flag type, ++ * which acts over an entire range ++ * ++ * These may be OR'd together with bitwise OR (|), but must be cast back into ++ * the type after OR'ing. ++ */ ++typedef enum { ++ _MALI_OSK_MEM_MAPREGION_FLAG_OS_ALLOCATED_PHYSADDR = 0x1, /**< Physical address is OS Allocated */ ++} _mali_osk_mem_mapregion_flags_t; ++/** @} */ /* end group _mali_osk_low_level_memory */ + -+ if (unlikely(session->is_aborting)) { -+ MALI_DEBUG_PRINT(4, ("Mali GP scheduler: Job %u (0x%08X) queued while session is aborting.\n", -+ mali_gp_job_get_id(job), job)); -+ return MALI_FALSE; /* job not queued */ -+ } ++/** @defgroup _mali_osk_notification OSK Notification Queues ++ * @{ */ + -+ mali_gp_job_set_cache_order(job, mali_scheduler_get_new_cache_order()); ++/** @brief Private type for notification queue objects */ ++typedef struct _mali_osk_notification_queue_t_struct _mali_osk_notification_queue_t; + -+ /* Determine which queue the job should be added to. */ -+ if (session->use_high_priority_job_queue) { -+ queue = &job_queue_gp.high_pri; -+ } else { -+ queue = &job_queue_gp.normal_pri; -+ } ++/** @brief Public notification data object type */ ++typedef struct _mali_osk_notification_t_struct { ++ u32 notification_type; /**< The notification type */ ++ u32 result_buffer_size; /**< Size of the result buffer to copy to user space */ ++ void *result_buffer; /**< Buffer containing any type specific data */ ++} _mali_osk_notification_t; + -+ job_queue_gp.depth += 1; -+ job_queue_gp.big_job_num += (job->big_job) ? 1 : 0; ++/** @} */ /* end group _mali_osk_notification */ + -+ /* Add job to queue (mali_gp_job_queue_add find correct place). */ -+ mali_gp_job_list_add(job, queue); + -+ /* -+ * We hold a PM reference for every job we hold queued (and running) -+ * It is important that we take this reference after job has been -+ * added the the queue so that any runtime resume could schedule this -+ * job right there and then. -+ */ -+ _mali_osk_pm_dev_ref_get_async(); ++/** @defgroup _mali_osk_timer OSK Timer Callbacks ++ * @{ */ + -+ if (mali_utilization_enabled()) { -+ /* -+ * We cheat a little bit by counting the GP as busy from the -+ * time a GP job is queued. This will be fine because we only -+ * loose the tiny idle gap between jobs, but we will instead -+ * get less utilization work to do (less locks taken) -+ */ -+ mali_utilization_gp_start(); -+ } ++/** @brief Function to call when a timer expires ++ * ++ * When a timer expires, this function is called. Note that on many systems, ++ * a timer callback will be executed in IRQ context. Therefore, restrictions ++ * may apply on what can be done inside the timer callback. ++ * ++ * If a timer requires more work to be done than can be acheived in an IRQ ++ * context, then it may defer the work with a work-queue. For example, it may ++ * use \ref _mali_osk_wq_schedule_work() to make use of a bottom-half handler ++ * to carry out the remaining work. ++ * ++ * Stopping the timer with \ref _mali_osk_timer_del() blocks on compeletion of ++ * the callback. Therefore, the callback may not obtain any mutexes also held ++ * by any callers of _mali_osk_timer_del(). Otherwise, a deadlock may occur. ++ * ++ * @param arg Function-specific data */ ++typedef void (*_mali_osk_timer_callback_t)(void *arg); + -+ mali_pm_record_gpu_active(MALI_TRUE); ++/** @brief Private type for Timer Callback Objects */ ++typedef struct _mali_osk_timer_t_struct _mali_osk_timer_t; ++/** @} */ /* end group _mali_osk_timer */ + -+ /* Add profiling events for job enqueued */ -+ _mali_osk_profiling_add_event( -+ MALI_PROFILING_EVENT_TYPE_SINGLE | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_GP_ENQUEUE, -+ mali_gp_job_get_pid(job), -+ mali_gp_job_get_tid(job), -+ mali_gp_job_get_frame_builder_id(job), -+ mali_gp_job_get_flush_id(job), -+ 0); + -+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS) -+ trace_gpu_job_enqueue(mali_gp_job_get_tid(job), -+ mali_gp_job_get_id(job), "GP"); -+#endif ++/** @addtogroup _mali_osk_list OSK Doubly-Linked Circular Lists ++ * @{ */ + -+ MALI_DEBUG_PRINT(3, ("Mali GP scheduler: Job %u (0x%08X) queued\n", -+ mali_gp_job_get_id(job), job)); ++/** @brief Public List objects. ++ * ++ * To use, add a _mali_osk_list_t member to the structure that may become part ++ * of a list. When traversing the _mali_osk_list_t objects, use the ++ * _MALI_OSK_CONTAINER_OF() macro to recover the structure from its ++ *_mali_osk_list_t member ++ * ++ * Each structure may have multiple _mali_osk_list_t members, so that the ++ * structure is part of multiple lists. When traversing lists, ensure that the ++ * correct _mali_osk_list_t member is used, because type-checking will be ++ * lost by the compiler. ++ */ ++typedef struct _mali_osk_list_s { ++ struct _mali_osk_list_s *next; ++ struct _mali_osk_list_s *prev; ++} _mali_osk_list_t; ++/** @} */ /* end group _mali_osk_list */ + -+ return MALI_TRUE; /* job queued */ -+} ++/** @addtogroup _mali_osk_miscellaneous ++ * @{ */ + -+static mali_bool mali_scheduler_queue_pp_job(struct mali_pp_job *job) -+{ -+ struct mali_session_data *session; -+ _mali_osk_list_t *queue = NULL; ++/** @brief resource description struct ++ * ++ * Platform independent representation of a Mali HW resource ++ */ ++typedef struct _mali_osk_resource { ++ const char *description; /**< short description of the resource */ ++ uintptr_t base; /**< Physical base address of the resource, as seen by Mali resources. */ ++ const char *irq_name; /**< Name of irq belong to this resource */ ++ u32 irq; /**< IRQ number delivered to the CPU, or -1 to tell the driver to probe for it (if possible) */ ++} _mali_osk_resource_t; ++/** @} */ /* end group _mali_osk_miscellaneous */ + -+ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); -+ MALI_DEBUG_ASSERT_POINTER(job); ++/** @defgroup _mali_osk_wait_queue OSK Wait Queue functionality ++ * @{ */ ++/** @brief Private type for wait queue objects */ ++typedef struct _mali_osk_wait_queue_t_struct _mali_osk_wait_queue_t; ++/** @} */ /* end group _mali_osk_wait_queue */ + -+ session = mali_pp_job_get_session(job); -+ MALI_DEBUG_ASSERT_POINTER(session); ++/** @} */ /* end group osuapi */ + -+ if (unlikely(session->is_aborting)) { -+ MALI_DEBUG_PRINT(2, ("Mali PP scheduler: Job %u (0x%08X) queued while session is aborting.\n", -+ mali_pp_job_get_id(job), job)); -+ return MALI_FALSE; /* job not queued */ -+ } else if (unlikely(MALI_SWAP_IN_FAIL == job->swap_status)) { -+ MALI_DEBUG_PRINT(2, ("Mali PP scheduler: Job %u (0x%08X) queued while swap in failed.\n", -+ mali_pp_job_get_id(job), job)); -+ return MALI_FALSE; -+ } ++/** @} */ /* end group uddapi */ + -+ mali_pp_job_set_cache_order(job, mali_scheduler_get_new_cache_order()); ++/** @brief Mali print ctx type which uses seq_file ++ */ ++typedef struct seq_file _mali_osk_print_ctx; + -+ if (session->use_high_priority_job_queue) { -+ queue = &job_queue_pp.high_pri; -+ } else { -+ queue = &job_queue_pp.normal_pri; -+ } ++#define _MALI_OSK_BITMAP_INVALIDATE_INDEX -1 + -+ job_queue_pp.depth += -+ mali_pp_job_get_sub_job_count(job); ++typedef struct _mali_osk_bitmap { ++ u32 reserve; ++ u32 last; ++ u32 max; ++ u32 avail; ++ _mali_osk_spinlock_t *lock; ++ unsigned long *table; ++} _mali_osk_bitmap_t; + -+ /* Add job to queue (mali_gp_job_queue_add find correct place). */ -+ mali_pp_job_list_add(job, queue); + -+ /* -+ * We hold a PM reference for every job we hold queued (and running) -+ * It is important that we take this reference after job has been -+ * added the the queue so that any runtime resume could schedule this -+ * job right there and then. -+ */ -+ _mali_osk_pm_dev_ref_get_async(); ++#ifdef __cplusplus ++} ++#endif + -+ if (mali_utilization_enabled()) { -+ /* -+ * We cheat a little bit by counting the PP as busy from the -+ * time a PP job is queued. This will be fine because we only -+ * loose the tiny idle gap between jobs, but we will instead -+ * get less utilization work to do (less locks taken) -+ */ -+ mali_utilization_pp_start(); -+ } ++#endif /* __MALI_OSK_TYPES_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pm.c b/drivers/gpu/arm/mali400/mali/common/mali_pm.c +new file mode 100644 +index 000000000..3989a33ae +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_pm.c +@@ -0,0 +1,1362 @@ ++/* ++ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ mali_pm_record_gpu_active(MALI_FALSE); ++#include "mali_pm.h" ++#include "mali_kernel_common.h" ++#include "mali_osk.h" ++#include "mali_osk_mali.h" ++#include "mali_scheduler.h" ++#include "mali_group.h" ++#include "mali_pm_domain.h" ++#include "mali_pmu.h" + -+ /* Add profiling events for job enqueued */ -+ _mali_osk_profiling_add_event( -+ MALI_PROFILING_EVENT_TYPE_SINGLE | -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_PP_ENQUEUE, -+ mali_pp_job_get_pid(job), -+ mali_pp_job_get_tid(job), -+ mali_pp_job_get_frame_builder_id(job), -+ mali_pp_job_get_flush_id(job), -+ 0); ++#include "mali_executor.h" ++#include "mali_control_timer.h" + -+#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS) -+ trace_gpu_job_enqueue(mali_pp_job_get_tid(job), -+ mali_pp_job_get_id(job), "PP"); ++#if defined(DEBUG) ++u32 num_pm_runtime_resume = 0; ++u32 num_pm_updates = 0; ++u32 num_pm_updates_up = 0; ++u32 num_pm_updates_down = 0; +#endif + -+ MALI_DEBUG_PRINT(3, ("Mali PP scheduler: %s job %u (0x%08X) with %u parts queued.\n", -+ mali_pp_job_is_virtual(job) -+ ? "Virtual" : "Physical", -+ mali_pp_job_get_id(job), job, -+ mali_pp_job_get_sub_job_count(job))); ++#define MALI_PM_DOMAIN_DUMMY_MASK (1 << MALI_DOMAIN_INDEX_DUMMY) + -+ return MALI_TRUE; /* job queued */ -+} ++/* lock protecting power state (including pm_domains) */ ++static _mali_osk_spinlock_irq_t *pm_lock_state = NULL; + -+static void mali_scheduler_return_gp_job_to_user(struct mali_gp_job *job, -+ mali_bool success) -+{ -+ _mali_uk_gp_job_finished_s *jobres; -+ struct mali_session_data *session; -+ _mali_osk_notification_t *notification; ++/* the wanted domain mask (protected by pm_lock_state) */ ++static u32 pd_mask_wanted = 0; + -+ MALI_DEBUG_ASSERT_POINTER(job); ++/* used to deferring the actual power changes */ ++static _mali_osk_wq_work_t *pm_work = NULL; + -+ session = mali_gp_job_get_session(job); -+ MALI_DEBUG_ASSERT_POINTER(session); ++/* lock protecting power change execution */ ++static _mali_osk_mutex_t *pm_lock_exec = NULL; + -+ notification = mali_gp_job_get_finished_notification(job); -+ MALI_DEBUG_ASSERT_POINTER(notification); ++/* PMU domains which are actually powered on (protected by pm_lock_exec) */ ++static u32 pmu_mask_current = 0; + -+ jobres = notification->result_buffer; -+ MALI_DEBUG_ASSERT_POINTER(jobres); ++/* ++ * domains which marked as powered on (protected by pm_lock_exec) ++ * This can be different from pmu_mask_current right after GPU power on ++ * if the PMU domains default to powered up. ++ */ ++static u32 pd_mask_current = 0; + -+ jobres->pending_big_job_num = mali_scheduler_job_gp_big_job_count(); ++static u16 domain_config[MALI_MAX_NUMBER_OF_DOMAINS] = { ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ++ 1 << MALI_DOMAIN_INDEX_DUMMY ++}; + -+ jobres->user_job_ptr = mali_gp_job_get_user_id(job); -+ if (MALI_TRUE == success) { -+ jobres->status = _MALI_UK_JOB_STATUS_END_SUCCESS; -+ } else { -+ jobres->status = _MALI_UK_JOB_STATUS_END_UNKNOWN_ERR; -+ } -+ jobres->heap_current_addr = mali_gp_job_get_current_heap_addr(job); -+ jobres->perf_counter0 = mali_gp_job_get_perf_counter_value0(job); -+ jobres->perf_counter1 = mali_gp_job_get_perf_counter_value1(job); ++/* The relative core power cost */ ++#define MALI_GP_COST 3 ++#define MALI_PP_COST 6 ++#define MALI_L2_COST 1 + -+ mali_session_send_notification(session, notification); -+} ++/* ++ *We have MALI_MAX_NUMBER_OF_PP_PHYSICAL_CORES + 1 rows in this matrix ++ *because we mush store the mask of different pp cores: 0, 1, 2, 3, 4, 5, 6, 7, 8. ++ */ ++static int mali_pm_domain_power_cost_result[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS + 1][MALI_MAX_NUMBER_OF_DOMAINS]; ++/* ++ * Keep track of runtime PM state, so that we know ++ * how to resume during OS resume. ++ */ ++#ifdef CONFIG_PM_RUNTIME ++static mali_bool mali_pm_runtime_active = MALI_FALSE; ++#else ++/* when kernel don't enable PM_RUNTIME, set the flag always true, ++ * for GPU will not power off by runtime */ ++static mali_bool mali_pm_runtime_active = MALI_TRUE; ++#endif + -+void mali_scheduler_return_pp_job_to_user(struct mali_pp_job *job, -+ u32 num_cores_in_virtual) ++static void mali_pm_state_lock(void); ++static void mali_pm_state_unlock(void); ++static _mali_osk_errcode_t mali_pm_create_pm_domains(void); ++static void mali_pm_set_pmu_domain_config(void); ++static u32 mali_pm_get_registered_cores_mask(void); ++static void mali_pm_update_sync_internal(void); ++static mali_bool mali_pm_common_suspend(void); ++static void mali_pm_update_work(void *data); ++#if defined(DEBUG) ++const char *mali_pm_mask_to_string(u32 mask); ++const char *mali_pm_group_stats_to_string(void); ++#endif ++ ++_mali_osk_errcode_t mali_pm_initialize(void) +{ -+ u32 i; -+ u32 num_counters_to_copy; -+ _mali_uk_pp_job_finished_s *jobres; -+ struct mali_session_data *session; -+ _mali_osk_notification_t *notification; ++ _mali_osk_errcode_t err; ++ struct mali_pmu_core *pmu; + -+ if (MALI_TRUE == mali_pp_job_use_no_notification(job)) { -+ return; ++ pm_lock_state = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, ++ _MALI_OSK_LOCK_ORDER_PM_STATE); ++ if (NULL == pm_lock_state) { ++ mali_pm_terminate(); ++ return _MALI_OSK_ERR_FAULT; + } + -+ MALI_DEBUG_ASSERT_POINTER(job); ++ pm_lock_exec = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED, ++ _MALI_OSK_LOCK_ORDER_PM_STATE); ++ if (NULL == pm_lock_exec) { ++ mali_pm_terminate(); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ session = mali_pp_job_get_session(job); -+ MALI_DEBUG_ASSERT_POINTER(session); ++ pm_work = _mali_osk_wq_create_work(mali_pm_update_work, NULL); ++ if (NULL == pm_work) { ++ mali_pm_terminate(); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ notification = mali_pp_job_get_finished_notification(job); -+ MALI_DEBUG_ASSERT_POINTER(notification); ++ pmu = mali_pmu_get_global_pmu_core(); ++ if (NULL != pmu) { ++ /* ++ * We have a Mali PMU, set the correct domain ++ * configuration (default or custom) ++ */ + -+ jobres = notification->result_buffer; -+ MALI_DEBUG_ASSERT_POINTER(jobres); ++ u32 registered_cores_mask; + -+ jobres->user_job_ptr = mali_pp_job_get_user_id(job); -+ if (MALI_TRUE == mali_pp_job_was_success(job)) { -+ jobres->status = _MALI_UK_JOB_STATUS_END_SUCCESS; -+ } else { -+ jobres->status = _MALI_UK_JOB_STATUS_END_UNKNOWN_ERR; -+ } ++ mali_pm_set_pmu_domain_config(); + -+ if (mali_pp_job_is_virtual(job)) { -+ num_counters_to_copy = num_cores_in_virtual; -+ } else { -+ num_counters_to_copy = mali_pp_job_get_sub_job_count(job); ++ registered_cores_mask = mali_pm_get_registered_cores_mask(); ++ mali_pmu_set_registered_cores_mask(pmu, registered_cores_mask); ++ ++ MALI_DEBUG_ASSERT(0 == pd_mask_wanted); + } + -+ for (i = 0; i < num_counters_to_copy; i++) { -+ jobres->perf_counter0[i] = -+ mali_pp_job_get_perf_counter_value0(job, i); -+ jobres->perf_counter1[i] = -+ mali_pp_job_get_perf_counter_value1(job, i); -+ jobres->perf_counter_src0 = -+ mali_pp_job_get_pp_counter_global_src0(); -+ jobres->perf_counter_src1 = -+ mali_pp_job_get_pp_counter_global_src1(); ++ /* Create all power domains needed (at least one dummy domain) */ ++ err = mali_pm_create_pm_domains(); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_pm_terminate(); ++ return err; + } + -+ mali_session_send_notification(session, notification); ++ return _MALI_OSK_ERR_OK; +} + -+static void mali_scheduler_deferred_pp_job_delete(struct mali_pp_job *job) ++void mali_pm_terminate(void) +{ -+ MALI_DEBUG_ASSERT_POINTER(job); ++ if (NULL != pm_work) { ++ _mali_osk_wq_delete_work(pm_work); ++ pm_work = NULL; ++ } + -+ _mali_osk_spinlock_irq_lock(scheduler_pp_job_delete_lock); -+ mali_pp_job_list_addtail(job, &scheduler_pp_job_deletion_queue); -+ _mali_osk_spinlock_irq_unlock(scheduler_pp_job_delete_lock); ++ mali_pm_domain_terminate(); + -+ _mali_osk_wq_schedule_work(scheduler_wq_pp_job_delete); ++ if (NULL != pm_lock_exec) { ++ _mali_osk_mutex_term(pm_lock_exec); ++ pm_lock_exec = NULL; ++ } ++ ++ if (NULL != pm_lock_state) { ++ _mali_osk_spinlock_irq_term(pm_lock_state); ++ pm_lock_state = NULL; ++ } +} + -+void mali_scheduler_do_pp_job_delete(void *arg) ++struct mali_pm_domain *mali_pm_register_l2_cache(u32 domain_index, ++ struct mali_l2_cache_core *l2_cache) +{ -+ _MALI_OSK_LIST_HEAD_STATIC_INIT(list); -+ struct mali_pp_job *job; -+ struct mali_pp_job *tmp; -+ -+ MALI_IGNORE(arg); ++ struct mali_pm_domain *domain; + -+ /* -+ * Quickly "unhook" the jobs pending to be deleted, so we can release -+ * the lock before we start deleting the job objects -+ * (without any locks held) -+ */ -+ _mali_osk_spinlock_irq_lock(scheduler_pp_job_delete_lock); -+ _mali_osk_list_move_list(&scheduler_pp_job_deletion_queue, &list); -+ _mali_osk_spinlock_irq_unlock(scheduler_pp_job_delete_lock); ++ domain = mali_pm_domain_get_from_mask(domain_config[domain_index]); ++ if (NULL == domain) { ++ MALI_DEBUG_ASSERT(0 == domain_config[domain_index]); ++ domain = mali_pm_domain_get_from_index( ++ MALI_DOMAIN_INDEX_DUMMY); ++ domain_config[domain_index] = MALI_PM_DOMAIN_DUMMY_MASK; ++ } else { ++ MALI_DEBUG_ASSERT(0 != domain_config[domain_index]); ++ } + -+ _MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list, -+ struct mali_pp_job, list) { -+ _mali_osk_list_delinit(&job->list); ++ MALI_DEBUG_ASSERT(NULL != domain); + -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) -+ mali_dma_fence_context_term(&job->dma_fence_context); -+#endif ++ mali_pm_domain_add_l2_cache(domain, l2_cache); + -+ mali_pp_job_delete(job); /* delete the job object itself */ -+ } ++ return domain; /* return the actual domain this was registered in */ +} + -+#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) -+ -+static void mali_scheduler_deferred_pp_job_queue(struct mali_pp_job *job) ++struct mali_pm_domain *mali_pm_register_group(u32 domain_index, ++ struct mali_group *group) +{ -+ MALI_DEBUG_ASSERT_POINTER(job); ++ struct mali_pm_domain *domain; + -+ _mali_osk_spinlock_irq_lock(scheduler_pp_job_queue_lock); -+ mali_pp_job_list_addtail(job, &scheduler_pp_job_queue_list); -+ _mali_osk_spinlock_irq_unlock(scheduler_pp_job_queue_lock); ++ domain = mali_pm_domain_get_from_mask(domain_config[domain_index]); ++ if (NULL == domain) { ++ MALI_DEBUG_ASSERT(0 == domain_config[domain_index]); ++ domain = mali_pm_domain_get_from_index( ++ MALI_DOMAIN_INDEX_DUMMY); ++ domain_config[domain_index] = MALI_PM_DOMAIN_DUMMY_MASK; ++ } else { ++ MALI_DEBUG_ASSERT(0 != domain_config[domain_index]); ++ } + -+ _mali_osk_wq_schedule_work(scheduler_wq_pp_job_queue); ++ MALI_DEBUG_ASSERT(NULL != domain); ++ ++ mali_pm_domain_add_group(domain, group); ++ ++ return domain; /* return the actual domain this was registered in */ +} + -+static void mali_scheduler_do_pp_job_queue(void *arg) ++mali_bool mali_pm_get_domain_refs(struct mali_pm_domain **domains, ++ struct mali_group **groups, ++ u32 num_domains) +{ -+ _MALI_OSK_LIST_HEAD_STATIC_INIT(list); -+ struct mali_pp_job *job; -+ struct mali_pp_job *tmp; -+ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; -+ -+ MALI_IGNORE(arg); ++ mali_bool ret = MALI_TRUE; /* Assume all is powered on instantly */ ++ u32 i; + -+ /* -+ * Quickly "unhook" the jobs pending to be queued, so we can release -+ * the lock before we start queueing the job objects -+ * (without any locks held) -+ */ -+ _mali_osk_spinlock_irq_lock(scheduler_pp_job_queue_lock); -+ _mali_osk_list_move_list(&scheduler_pp_job_queue_list, &list); -+ _mali_osk_spinlock_irq_unlock(scheduler_pp_job_queue_lock); ++ mali_pm_state_lock(); + -+ /* First loop through all jobs and do the pre-work (no locks needed) */ -+ _MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list, -+ struct mali_pp_job, list) { -+ if (mali_pp_job_needs_dma_buf_mapping(job)) { ++ for (i = 0; i < num_domains; i++) { ++ MALI_DEBUG_ASSERT_POINTER(domains[i]); ++ pd_mask_wanted |= mali_pm_domain_ref_get(domains[i]); ++ if (MALI_FALSE == mali_pm_domain_power_is_on(domains[i])) { + /* -+ * This operation could fail, but we continue anyway, -+ * because the worst that could happen is that this -+ * job will fail due to a Mali page fault. ++ * Tell caller that the corresponding group ++ * was not already powered on. + */ -+ mali_dma_buf_map_job(job); ++ ret = MALI_FALSE; ++ } else { ++ /* ++ * There is a time gap between we power on the domain and ++ * set the power state of the corresponding groups to be on. ++ */ ++ if (NULL != groups[i] && ++ MALI_FALSE == mali_group_power_is_on(groups[i])) { ++ ret = MALI_FALSE; ++ } + } + } + -+ mali_scheduler_lock(); ++ MALI_DEBUG_PRINT(3, ("PM: wanted domain mask = 0x%08X (get refs)\n", pd_mask_wanted)); + -+ /* Then loop through all jobs again to queue them (lock needed) */ -+ _MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list, -+ struct mali_pp_job, list) { ++ mali_pm_state_unlock(); + -+ /* Remove from scheduler_pp_job_queue_list before queueing */ -+ mali_pp_job_list_remove(job); ++ return ret; ++} + -+ if (mali_scheduler_queue_pp_job(job)) { -+ /* Job queued successfully */ -+ schedule_mask |= MALI_SCHEDULER_MASK_PP; -+ } else { -+ /* Failed to enqueue job, release job (with error) */ -+ mali_pp_job_fb_lookup_remove(job); -+ mali_pp_job_mark_unstarted_failed(job); ++mali_bool mali_pm_put_domain_refs(struct mali_pm_domain **domains, ++ u32 num_domains) ++{ ++ u32 mask = 0; ++ mali_bool ret; ++ u32 i; + -+ /* unlock scheduler in this uncommon case */ -+ mali_scheduler_unlock(); ++ mali_pm_state_lock(); + -+ schedule_mask |= mali_timeline_tracker_release( -+ mali_pp_job_get_tracker(job)); ++ for (i = 0; i < num_domains; i++) { ++ MALI_DEBUG_ASSERT_POINTER(domains[i]); ++ mask |= mali_pm_domain_ref_put(domains[i]); ++ } + -+ /* Notify user space and close the job object */ -+ mali_scheduler_complete_pp_job(job, 0, MALI_TRUE, -+ MALI_FALSE); ++ if (0 == mask) { ++ /* return false, all domains should still stay on */ ++ ret = MALI_FALSE; ++ } else { ++ /* Assert that we are dealing with a change */ ++ MALI_DEBUG_ASSERT((pd_mask_wanted & mask) == mask); + -+ mali_scheduler_lock(); -+ } ++ /* Update our desired domain mask */ ++ pd_mask_wanted &= ~mask; ++ ++ /* return true; one or more domains can now be powered down */ ++ ret = MALI_TRUE; + } + -+ mali_scheduler_unlock(); ++ MALI_DEBUG_PRINT(3, ("PM: wanted domain mask = 0x%08X (put refs)\n", pd_mask_wanted)); + -+ /* Trigger scheduling of jobs */ -+ mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE); -+} ++ mali_pm_state_unlock(); + -+#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */ ++ return ret; ++} + -+void mali_scheduler_gp_pp_job_queue_print(void) ++void mali_pm_init_begin(void) +{ -+ struct mali_gp_job *gp_job = NULL; -+ struct mali_gp_job *tmp_gp_job = NULL; -+ struct mali_pp_job *pp_job = NULL; -+ struct mali_pp_job *tmp_pp_job = NULL; ++ struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core(); + -+ MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj); -+ MALI_DEBUG_ASSERT_LOCK_HELD(mali_executor_lock_obj); ++ _mali_osk_pm_dev_ref_get_sync(); + -+ /* dump job queup status */ -+ if ((0 == job_queue_gp.depth) && (0 == job_queue_pp.depth)) { -+ MALI_PRINT(("No GP&PP job in the job queue.\n")); -+ return; ++ /* Ensure all PMU domains are on */ ++ if (NULL != pmu) { ++ mali_pmu_power_up_all(pmu); + } ++} + -+ MALI_PRINT(("Total (%d) GP job in the job queue.\n", job_queue_gp.depth)); -+ if (job_queue_gp.depth > 0) { -+ if (!_mali_osk_list_empty(&job_queue_gp.high_pri)) { -+ _MALI_OSK_LIST_FOREACHENTRY(gp_job, tmp_gp_job, &job_queue_gp.high_pri, -+ struct mali_gp_job, list) { -+ MALI_PRINT(("GP job(%p) id = %d tid = %d pid = %d in the gp job high_pri queue\n", gp_job, gp_job->id, gp_job->tid, gp_job->pid)); -+ } -+ } ++void mali_pm_init_end(void) ++{ ++ struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core(); + -+ if (!_mali_osk_list_empty(&job_queue_gp.normal_pri)) { -+ _MALI_OSK_LIST_FOREACHENTRY(gp_job, tmp_gp_job, &job_queue_gp.normal_pri, -+ struct mali_gp_job, list) { -+ MALI_PRINT(("GP job(%p) id = %d tid = %d pid = %d in the gp job normal_pri queue\n", gp_job, gp_job->id, gp_job->tid, gp_job->pid)); -+ } -+ } ++ /* Ensure all PMU domains are off */ ++ if (NULL != pmu) { ++ mali_pmu_power_down_all(pmu); + } + -+ MALI_PRINT(("Total (%d) PP job in the job queue.\n", job_queue_pp.depth)); -+ if (job_queue_pp.depth > 0) { -+ if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) { -+ _MALI_OSK_LIST_FOREACHENTRY(pp_job, tmp_pp_job, &job_queue_pp.high_pri, -+ struct mali_pp_job, list) { -+ if (mali_pp_job_is_virtual(pp_job)) { -+ MALI_PRINT(("PP Virtual job(%p) id = %d tid = %d pid = %d in the pp job high_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid)); -+ } else { -+ MALI_PRINT(("PP Physical job(%p) id = %d tid = %d pid = %d in the pp job high_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid)); -+ } -+ } -+ } ++ _mali_osk_pm_dev_ref_put(); ++} + -+ if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) { -+ _MALI_OSK_LIST_FOREACHENTRY(pp_job, tmp_pp_job, &job_queue_pp.normal_pri, -+ struct mali_pp_job, list) { -+ if (mali_pp_job_is_virtual(pp_job)) { -+ MALI_PRINT(("PP Virtual job(%p) id = %d tid = %d pid = %d in the pp job normal_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid)); -+ } else { -+ MALI_PRINT(("PP Physical job(%p) id = %d tid = %d pid = %d in the pp job normal_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid)); -+ } -+ } -+ } ++void mali_pm_update_sync(void) ++{ ++ mali_pm_exec_lock(); ++ ++ if (MALI_TRUE == mali_pm_runtime_active) { ++ /* ++ * Only update if GPU is powered on. ++ * Deactivation of the last group will result in both a ++ * deferred runtime PM suspend operation and ++ * deferred execution of this function. ++ * mali_pm_runtime_active will be false if runtime PM ++ * executed first and thus the GPU is now fully powered off. ++ */ ++ mali_pm_update_sync_internal(); + } + -+ /* dump group running job status */ -+ mali_executor_running_status_print(); ++ mali_pm_exec_unlock(); +} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_scheduler.h b/drivers/gpu/arm/mali400/mali/common/mali_scheduler.h -new file mode 100644 -index 000000000..de81a421e ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_scheduler.h -@@ -0,0 +1,131 @@ -+/* -+ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+#ifndef __MALI_SCHEDULER_H__ -+#define __MALI_SCHEDULER_H__ + -+#include "mali_osk.h" -+#include "mali_osk_list.h" -+#include "mali_scheduler_types.h" -+#include "mali_session.h" ++void mali_pm_update_async(void) ++{ ++ _mali_osk_wq_schedule_work(pm_work); ++} + -+struct mali_scheduler_job_queue { -+ _MALI_OSK_LIST_HEAD(normal_pri); /* Queued jobs with normal priority */ -+ _MALI_OSK_LIST_HEAD(high_pri); /* Queued jobs with high priority */ -+ u32 depth; /* Depth of combined queues. */ -+ u32 big_job_num; -+}; ++void mali_pm_os_suspend(mali_bool os_suspend) ++{ ++ int ret; + -+extern _mali_osk_spinlock_irq_t *mali_scheduler_lock_obj; ++ MALI_DEBUG_PRINT(3, ("Mali PM: OS suspend\n")); + -+/* Queue of jobs to be executed on the GP group */ -+extern struct mali_scheduler_job_queue job_queue_gp; ++ /* Suspend execution of all jobs, and go to inactive state */ ++ mali_executor_suspend(); + -+/* Queue of PP jobs */ -+extern struct mali_scheduler_job_queue job_queue_pp; ++ if (os_suspend) { ++ mali_control_timer_suspend(MALI_TRUE); ++ } + -+extern _mali_osk_atomic_t mali_job_id_autonumber; -+extern _mali_osk_atomic_t mali_job_cache_order_autonumber; ++ mali_pm_exec_lock(); + -+#define MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD() MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj); ++ ret = mali_pm_common_suspend(); + -+_mali_osk_errcode_t mali_scheduler_initialize(void); -+void mali_scheduler_terminate(void); ++ MALI_DEBUG_ASSERT(MALI_TRUE == ret); ++ MALI_IGNORE(ret); + -+MALI_STATIC_INLINE void mali_scheduler_lock(void) -+{ -+ _mali_osk_spinlock_irq_lock(mali_scheduler_lock_obj); -+ MALI_DEBUG_PRINT(5, ("Mali scheduler: scheduler lock taken.\n")); ++ mali_pm_exec_unlock(); +} + -+MALI_STATIC_INLINE void mali_scheduler_unlock(void) ++void mali_pm_os_resume(void) +{ -+ MALI_DEBUG_PRINT(5, ("Mali scheduler: Releasing scheduler lock.\n")); -+ _mali_osk_spinlock_irq_unlock(mali_scheduler_lock_obj); -+} ++ struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core(); + -+MALI_STATIC_INLINE u32 mali_scheduler_job_gp_count(void) -+{ -+ return job_queue_gp.depth; -+} -+MALI_STATIC_INLINE u32 mali_scheduler_job_gp_big_job_count(void) -+{ -+ return job_queue_gp.big_job_num; -+} ++ MALI_DEBUG_PRINT(3, ("Mali PM: OS resume\n")); + -+u32 mali_scheduler_job_physical_head_count(mali_bool gpu_mode_is_secure); ++ mali_pm_exec_lock(); + -+mali_bool mali_scheduler_job_next_is_virtual(void); -+struct mali_pp_job *mali_scheduler_job_pp_next(void); ++#if defined(DEBUG) ++ mali_pm_state_lock(); + -+struct mali_gp_job *mali_scheduler_job_gp_get(void); -+struct mali_pp_job *mali_scheduler_job_pp_physical_peek(void); -+struct mali_pp_job *mali_scheduler_job_pp_virtual_peek(void); -+struct mali_pp_job *mali_scheduler_job_pp_physical_get(u32 *sub_job); -+struct mali_pp_job *mali_scheduler_job_pp_virtual_get(void); ++ /* Assert that things are as we left them in os_suspend(). */ ++ MALI_DEBUG_ASSERT(0 == pd_mask_wanted); ++ MALI_DEBUG_ASSERT(0 == pd_mask_current); ++ MALI_DEBUG_ASSERT(0 == pmu_mask_current); + -+MALI_STATIC_INLINE u32 mali_scheduler_get_new_id(void) -+{ -+ return _mali_osk_atomic_inc_return(&mali_job_id_autonumber); -+} ++ MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused()); + -+MALI_STATIC_INLINE u32 mali_scheduler_get_new_cache_order(void) -+{ -+ return _mali_osk_atomic_inc_return(&mali_job_cache_order_autonumber); -+} ++ mali_pm_state_unlock(); ++#endif + -+/** -+ * @brief Used by the Timeline system to queue a GP job. -+ * -+ * @note @ref mali_executor_schedule_from_mask() should be called if this -+ * function returns non-zero. -+ * -+ * @param job The GP job that is being activated. -+ * -+ * @return A scheduling bitmask that can be used to decide if scheduling is -+ * necessary after this call. -+ */ -+mali_scheduler_mask mali_scheduler_activate_gp_job(struct mali_gp_job *job); ++ if (MALI_TRUE == mali_pm_runtime_active) { ++ /* Runtime PM was active, so reset PMU */ ++ if (NULL != pmu) { ++ mali_pmu_reset(pmu); ++ pmu_mask_current = mali_pmu_get_mask(pmu); + -+/** -+ * @brief Used by the Timeline system to queue a PP job. -+ * -+ * @note @ref mali_executor_schedule_from_mask() should be called if this -+ * function returns non-zero. -+ * -+ * @param job The PP job that is being activated. -+ * -+ * @return A scheduling bitmask that can be used to decide if scheduling is -+ * necessary after this call. -+ */ -+mali_scheduler_mask mali_scheduler_activate_pp_job(struct mali_pp_job *job); ++ MALI_DEBUG_PRINT(3, ("Mali PM: OS resume 0x%x \n", pmu_mask_current)); ++ } + -+void mali_scheduler_complete_gp_job(struct mali_gp_job *job, -+ mali_bool success, -+ mali_bool user_notification, -+ mali_bool dequeued); ++ mali_pm_update_sync_internal(); ++ } + -+void mali_scheduler_complete_pp_job(struct mali_pp_job *job, -+ u32 num_cores_in_virtual, -+ mali_bool user_notification, -+ mali_bool dequeued); ++ mali_pm_exec_unlock(); + -+void mali_scheduler_abort_session(struct mali_session_data *session); ++ /* Start executing jobs again */ ++ mali_executor_resume(); ++} + -+void mali_scheduler_return_pp_job_to_user(struct mali_pp_job *job, -+ u32 num_cores_in_virtual); ++mali_bool mali_pm_runtime_suspend(void) ++{ ++ mali_bool ret; + -+#if MALI_STATE_TRACKING -+u32 mali_scheduler_dump_state(char *buf, u32 size); -+#endif ++ MALI_DEBUG_PRINT(3, ("Mali PM: Runtime suspend\n")); + -+void mali_scheduler_gp_pp_job_queue_print(void); ++ mali_pm_exec_lock(); + -+#endif /* __MALI_SCHEDULER_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_scheduler_types.h b/drivers/gpu/arm/mali400/mali/common/mali_scheduler_types.h -new file mode 100644 -index 000000000..ba1d71d01 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_scheduler_types.h -@@ -0,0 +1,29 @@ -+/* -+ * Copyright (C) 2013-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ /* ++ * Put SW state directly into "off" state, and do not bother to power ++ * down each power domain, because entire GPU will be powered off ++ * when we return. ++ * For runtime PM suspend, in contrast to OS suspend, there is a race ++ * between this function and the mali_pm_update_sync_internal(), which ++ * is fine... ++ */ ++ ret = mali_pm_common_suspend(); ++ if (MALI_TRUE == ret) { ++ mali_pm_runtime_active = MALI_FALSE; ++ } else { ++ /* ++ * Process the "power up" instead, ++ * which could have been "lost" ++ */ ++ mali_pm_update_sync_internal(); ++ } + -+#ifndef __MALI_SCHEDULER_TYPES_H__ -+#define __MALI_SCHEDULER_TYPES_H__ ++ mali_pm_exec_unlock(); + -+#include "mali_osk.h" ++ return ret; ++} + -+#define MALI_SCHEDULER_JOB_ID_SPAN 65535 ++void mali_pm_runtime_resume(void) ++{ ++ struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core(); + -+/** -+ * Bitmask used for defered scheduling of subsystems. -+ */ -+typedef u32 mali_scheduler_mask; ++ mali_pm_exec_lock(); + -+#define MALI_SCHEDULER_MASK_GP (1<<0) -+#define MALI_SCHEDULER_MASK_PP (1<<1) ++ mali_pm_runtime_active = MALI_TRUE; + -+#define MALI_SCHEDULER_MASK_EMPTY 0 -+#define MALI_SCHEDULER_MASK_ALL (MALI_SCHEDULER_MASK_GP | MALI_SCHEDULER_MASK_PP) ++#if defined(DEBUG) ++ ++num_pm_runtime_resume; + -+#endif /* __MALI_SCHEDULER_TYPES_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_session.c b/drivers/gpu/arm/mali400/mali/common/mali_session.c -new file mode 100644 -index 000000000..7504fb108 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_session.c -@@ -0,0 +1,155 @@ -+/* -+ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ mali_pm_state_lock(); + -+#include "mali_osk.h" -+#include "mali_osk_list.h" -+#include "mali_session.h" -+#include "mali_ukk.h" -+#ifdef MALI_MEM_SWAP_TRACKING -+#include "mali_memory_swap_alloc.h" ++ /* ++ * Assert that things are as we left them in runtime_suspend(), ++ * except for pd_mask_wanted which normally will be the reason we ++ * got here (job queued => domains wanted) ++ */ ++ MALI_DEBUG_ASSERT(0 == pd_mask_current); ++ MALI_DEBUG_ASSERT(0 == pmu_mask_current); ++ ++ mali_pm_state_unlock(); +#endif + -+_MALI_OSK_LIST_HEAD(mali_sessions); -+static u32 mali_session_count = 0; ++ if (NULL != pmu) { ++ mali_pmu_reset(pmu); ++ pmu_mask_current = mali_pmu_get_mask(pmu); ++ MALI_DEBUG_PRINT(3, ("Mali PM: Runtime resume 0x%x \n", pmu_mask_current)); ++ } + -+_mali_osk_spinlock_irq_t *mali_sessions_lock = NULL; -+wait_queue_head_t pending_queue; ++ /* ++ * Normally we are resumed because a job has just been queued. ++ * pd_mask_wanted should thus be != 0. ++ * It is however possible for others to take a Mali Runtime PM ref ++ * without having a job queued. ++ * We should however always call mali_pm_update_sync_internal(), ++ * because this will take care of any potential mismatch between ++ * pmu_mask_current and pd_mask_current. ++ */ ++ mali_pm_update_sync_internal(); + -+_mali_osk_errcode_t mali_session_initialize(void) ++ mali_pm_exec_unlock(); ++} ++ ++#if MALI_STATE_TRACKING ++u32 mali_pm_dump_state_domain(struct mali_pm_domain *domain, ++ char *buf, u32 size) +{ -+ _MALI_OSK_INIT_LIST_HEAD(&mali_sessions); -+ /* init wait queue for big varying job */ -+ init_waitqueue_head(&pending_queue); ++ int n = 0; + -+ mali_sessions_lock = _mali_osk_spinlock_irq_init( -+ _MALI_OSK_LOCKFLAG_ORDERED, -+ _MALI_OSK_LOCK_ORDER_SESSIONS); -+ if (NULL == mali_sessions_lock) { -+ return _MALI_OSK_ERR_NOMEM; -+ } ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "\tPower domain: id %u\n", ++ mali_pm_domain_get_id(domain)); + -+ return _MALI_OSK_ERR_OK; -+} ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "\t\tMask: 0x%04x\n", ++ mali_pm_domain_get_mask(domain)); + -+void mali_session_terminate(void) -+{ -+ if (NULL != mali_sessions_lock) { -+ _mali_osk_spinlock_irq_term(mali_sessions_lock); -+ mali_sessions_lock = NULL; -+ } -+} ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "\t\tUse count: %u\n", ++ mali_pm_domain_get_use_count(domain)); + -+void mali_session_add(struct mali_session_data *session) -+{ -+ mali_session_lock(); -+ _mali_osk_list_add(&session->link, &mali_sessions); -+ mali_session_count++; -+ mali_session_unlock(); ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "\t\tCurrent power state: %s\n", ++ (mali_pm_domain_get_mask(domain) & pd_mask_current) ? ++ "On" : "Off"); ++ ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "\t\tWanted power state: %s\n", ++ (mali_pm_domain_get_mask(domain) & pd_mask_wanted) ? ++ "On" : "Off"); ++ ++ return n; +} ++#endif + -+void mali_session_remove(struct mali_session_data *session) ++static void mali_pm_state_lock(void) +{ -+ mali_session_lock(); -+ _mali_osk_list_delinit(&session->link); -+ mali_session_count--; -+ mali_session_unlock(); ++ _mali_osk_spinlock_irq_lock(pm_lock_state); +} + -+u32 mali_session_get_count(void) ++static void mali_pm_state_unlock(void) +{ -+ return mali_session_count; ++ _mali_osk_spinlock_irq_unlock(pm_lock_state); +} + -+mali_bool mali_session_pp_job_is_empty(void *data) ++void mali_pm_exec_lock(void) +{ -+ struct mali_session_data *session = (struct mali_session_data *)data; -+ MALI_DEBUG_ASSERT_POINTER(session); -+ -+ if ( 0 == _mali_osk_atomic_read(&session->number_of_pp_jobs)) { -+ return MALI_TRUE; -+ } -+ return MALI_FALSE; ++ _mali_osk_mutex_wait(pm_lock_exec); +} + -+wait_queue_head_t *mali_session_get_wait_queue(void) ++void mali_pm_exec_unlock(void) +{ -+ return &pending_queue; ++ _mali_osk_mutex_signal(pm_lock_exec); +} + -+/* -+ * Get the max completed window jobs from all active session, -+ * which will be used in window render frame per sec calculate -+ */ -+#if defined(CONFIG_MALI_DVFS) -+u32 mali_session_max_window_num(void) ++static void mali_pm_domain_power_up(u32 power_up_mask, ++ struct mali_group *groups_up[MALI_MAX_NUMBER_OF_GROUPS], ++ u32 *num_groups_up, ++ struct mali_l2_cache_core *l2_up[MALI_MAX_NUMBER_OF_L2_CACHE_CORES], ++ u32 *num_l2_up) +{ -+ struct mali_session_data *session, *tmp; -+ u32 max_window_num = 0; -+ u32 tmp_number = 0; ++ u32 domain_bit; ++ u32 notify_mask = power_up_mask; + -+ mali_session_lock(); ++ MALI_DEBUG_ASSERT(0 != power_up_mask); ++ MALI_DEBUG_ASSERT_POINTER(groups_up); ++ MALI_DEBUG_ASSERT_POINTER(num_groups_up); ++ MALI_DEBUG_ASSERT(0 == *num_groups_up); ++ MALI_DEBUG_ASSERT_POINTER(l2_up); ++ MALI_DEBUG_ASSERT_POINTER(num_l2_up); ++ MALI_DEBUG_ASSERT(0 == *num_l2_up); + -+ MALI_SESSION_FOREACH(session, tmp, link) { -+ tmp_number = _mali_osk_atomic_xchg( -+ &session->number_of_window_jobs, 0); -+ if (max_window_num < tmp_number) { -+ max_window_num = tmp_number; -+ } -+ } ++ MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec); ++ MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_state); + -+ mali_session_unlock(); ++ MALI_DEBUG_PRINT(5, ++ ("PM update: Powering up domains: . [%s]\n", ++ mali_pm_mask_to_string(power_up_mask))); + -+ return max_window_num; -+} -+#endif ++ pd_mask_current |= power_up_mask; + -+void mali_session_memory_tracking(_mali_osk_print_ctx *print_ctx) -+{ -+ struct mali_session_data *session, *tmp; -+ u32 mali_mem_usage; -+ u32 total_mali_mem_size; -+#ifdef MALI_MEM_SWAP_TRACKING -+ u32 swap_pool_size; -+ u32 swap_unlock_size; -+#endif ++ domain_bit = _mali_osk_fls(notify_mask); ++ while (0 != domain_bit) { ++ u32 domain_id = domain_bit - 1; ++ struct mali_pm_domain *domain = ++ mali_pm_domain_get_from_index( ++ domain_id); ++ struct mali_l2_cache_core *l2_cache; ++ struct mali_l2_cache_core *l2_cache_tmp; ++ struct mali_group *group; ++ struct mali_group *group_tmp; + -+ MALI_DEBUG_ASSERT_POINTER(print_ctx); -+ mali_session_lock(); -+ MALI_SESSION_FOREACH(session, tmp, link) { -+#ifdef MALI_MEM_SWAP_TRACKING -+ _mali_osk_ctxprintf(print_ctx, " %-25s %-10u %-10u %-15u %-15u %-10u %-10u %-10u\n", -+ session->comm, session->pid, -+ (atomic_read(&session->mali_mem_allocated_pages)) * _MALI_OSK_MALI_PAGE_SIZE, -+ (unsigned int)session->max_mali_mem_allocated_size, -+ (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_EXTERNAL])) * _MALI_OSK_MALI_PAGE_SIZE), -+ (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_UMP])) * _MALI_OSK_MALI_PAGE_SIZE), -+ (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_DMA_BUF])) * _MALI_OSK_MALI_PAGE_SIZE), -+ (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_SWAP])) * _MALI_OSK_MALI_PAGE_SIZE) -+ ); -+#else -+ _mali_osk_ctxprintf(print_ctx, " %-25s %-10u %-10u %-15u %-15u %-10u %-10u \n", -+ session->comm, session->pid, -+ (unsigned int)((atomic_read(&session->mali_mem_allocated_pages)) * _MALI_OSK_MALI_PAGE_SIZE), -+ (unsigned int)session->max_mali_mem_allocated_size, -+ (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_EXTERNAL])) * _MALI_OSK_MALI_PAGE_SIZE), -+ (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_UMP])) * _MALI_OSK_MALI_PAGE_SIZE), -+ (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_DMA_BUF])) * _MALI_OSK_MALI_PAGE_SIZE) -+ ); -+#endif -+ } -+ mali_session_unlock(); -+ mali_mem_usage = _mali_ukk_report_memory_usage(); -+ total_mali_mem_size = _mali_ukk_report_total_memory_size(); -+ _mali_osk_ctxprintf(print_ctx, "Mali mem usage: %u\nMali mem limit: %u\n", mali_mem_usage, total_mali_mem_size); -+#ifdef MALI_MEM_SWAP_TRACKING -+ mali_mem_swap_tracking(&swap_pool_size, &swap_unlock_size); -+ _mali_osk_ctxprintf(print_ctx, "Mali swap mem pool : %u\nMali swap mem unlock: %u\n", swap_pool_size, swap_unlock_size); -+#endif -+} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_session.h b/drivers/gpu/arm/mali400/mali/common/mali_session.h -new file mode 100644 -index 000000000..da8b9927e ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_session.h -@@ -0,0 +1,136 @@ -+/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ /* Mark domain as powered up */ ++ mali_pm_domain_set_power_on(domain, MALI_TRUE); + -+#ifndef __MALI_SESSION_H__ -+#define __MALI_SESSION_H__ ++ /* ++ * Make a note of the L2 and/or group(s) to notify ++ * (need to release the PM state lock before doing so) ++ */ + -+#include "mali_mmu_page_directory.h" -+#include "mali_osk.h" -+#include "mali_osk_list.h" -+#include "mali_memory_types.h" -+#include "mali_memory_manager.h" ++ _MALI_OSK_LIST_FOREACHENTRY(l2_cache, ++ l2_cache_tmp, ++ mali_pm_domain_get_l2_cache_list( ++ domain), ++ struct mali_l2_cache_core, ++ pm_domain_list) { ++ MALI_DEBUG_ASSERT(*num_l2_up < ++ MALI_MAX_NUMBER_OF_L2_CACHE_CORES); ++ l2_up[*num_l2_up] = l2_cache; ++ (*num_l2_up)++; ++ } + -+struct mali_timeline_system; -+struct mali_soft_system; ++ _MALI_OSK_LIST_FOREACHENTRY(group, ++ group_tmp, ++ mali_pm_domain_get_group_list(domain), ++ struct mali_group, ++ pm_domain_list) { ++ MALI_DEBUG_ASSERT(*num_groups_up < ++ MALI_MAX_NUMBER_OF_GROUPS); ++ groups_up[*num_groups_up] = group; + -+/* Number of frame builder job lists per session. */ -+#define MALI_PP_JOB_FB_LOOKUP_LIST_SIZE 16 -+#define MALI_PP_JOB_FB_LOOKUP_LIST_MASK (MALI_PP_JOB_FB_LOOKUP_LIST_SIZE - 1) -+/*Max pending big job allowed in kernel*/ -+#define MALI_MAX_PENDING_BIG_JOB (2) ++ (*num_groups_up)++; ++ } + -+struct mali_session_data { -+ _mali_osk_notification_queue_t *ioctl_queue; ++ /* Remove current bit and find next */ ++ notify_mask &= ~(1 << (domain_id)); ++ domain_bit = _mali_osk_fls(notify_mask); ++ } ++} ++static void mali_pm_domain_power_down(u32 power_down_mask, ++ struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS], ++ u32 *num_groups_down, ++ struct mali_l2_cache_core *l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES], ++ u32 *num_l2_down) ++{ ++ u32 domain_bit; ++ u32 notify_mask = power_down_mask; + -+ _mali_osk_wait_queue_t *wait_queue; /**The wait queue to wait for the number of pp job become 0.*/ ++ MALI_DEBUG_ASSERT(0 != power_down_mask); ++ MALI_DEBUG_ASSERT_POINTER(groups_down); ++ MALI_DEBUG_ASSERT_POINTER(num_groups_down); ++ MALI_DEBUG_ASSERT(0 == *num_groups_down); ++ MALI_DEBUG_ASSERT_POINTER(l2_down); ++ MALI_DEBUG_ASSERT_POINTER(num_l2_down); ++ MALI_DEBUG_ASSERT(0 == *num_l2_down); + -+ _mali_osk_mutex_t *memory_lock; /**< Lock protecting the vm manipulation */ -+ _mali_osk_mutex_t *cow_lock; /** < Lock protecting the cow memory free manipulation */ -+#if 0 -+ _mali_osk_list_t memory_head; /**< Track all the memory allocated in this session, for freeing on abnormal termination */ -+#endif -+ struct mali_page_directory *page_directory; /**< MMU page directory for this session */ ++ MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec); ++ MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_state); + -+ _MALI_OSK_LIST_HEAD(link); /**< Link for list of all sessions */ -+ _MALI_OSK_LIST_HEAD(pp_job_list); /**< List of all PP jobs on this session */ ++ MALI_DEBUG_PRINT(5, ++ ("PM update: Powering down domains: [%s]\n", ++ mali_pm_mask_to_string(power_down_mask))); + -+#if defined(CONFIG_MALI_DVFS) -+ _mali_osk_atomic_t number_of_window_jobs; /**< Record the window jobs completed on this session in a period */ -+#endif -+ _mali_osk_atomic_t number_of_pp_jobs; /** < Record the pp jobs on this session */ ++ pd_mask_current &= ~power_down_mask; + -+ _mali_osk_list_t pp_job_fb_lookup_list[MALI_PP_JOB_FB_LOOKUP_LIST_SIZE]; /**< List of PP job lists per frame builder id. Used to link jobs from same frame builder. */ -+ struct mali_soft_job_system *soft_job_system; /**< Soft job system for this session. */ -+ struct mali_timeline_system *timeline_system; /**< Timeline system for this session. */ ++ domain_bit = _mali_osk_fls(notify_mask); ++ while (0 != domain_bit) { ++ u32 domain_id = domain_bit - 1; ++ struct mali_pm_domain *domain = ++ mali_pm_domain_get_from_index(domain_id); ++ struct mali_l2_cache_core *l2_cache; ++ struct mali_l2_cache_core *l2_cache_tmp; ++ struct mali_group *group; ++ struct mali_group *group_tmp; + -+ mali_bool is_aborting; /**< MALI_TRUE if the session is aborting, MALI_FALSE if not. */ -+ mali_bool use_high_priority_job_queue; /**< If MALI_TRUE, jobs added from this session will use the high priority job queues. */ -+ u32 pid; -+ char *comm; -+ atomic_t mali_mem_array[MALI_MEM_TYPE_MAX]; /**< The array to record mem types' usage for this session. */ -+ atomic_t mali_mem_allocated_pages; /** The current allocated mali memory pages, which include mali os memory and mali dedicated memory.*/ -+ size_t max_mali_mem_allocated_size; /**< The past max mali memory allocated size, which include mali os memory and mali dedicated memory. */ -+ /* Added for new memroy system */ -+ struct mali_allocation_manager allocation_mgr; ++ /* Mark domain as powered down */ ++ mali_pm_domain_set_power_on(domain, MALI_FALSE); + -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) -+ u32 fence_context; /** < The execution dma fence context this fence is run on. */ -+ _mali_osk_atomic_t fence_seqno; /** < Alinear increasing sequence number for this dma fence context. */ -+#endif -+}; ++ /* ++ * Make a note of the L2s and/or groups to notify ++ * (need to release the PM state lock before doing so) ++ */ + -+_mali_osk_errcode_t mali_session_initialize(void); -+void mali_session_terminate(void); ++ _MALI_OSK_LIST_FOREACHENTRY(l2_cache, ++ l2_cache_tmp, ++ mali_pm_domain_get_l2_cache_list(domain), ++ struct mali_l2_cache_core, ++ pm_domain_list) { ++ MALI_DEBUG_ASSERT(*num_l2_down < ++ MALI_MAX_NUMBER_OF_L2_CACHE_CORES); ++ l2_down[*num_l2_down] = l2_cache; ++ (*num_l2_down)++; ++ } + -+/* List of all sessions. Actual list head in mali_kernel_core.c */ -+extern _mali_osk_list_t mali_sessions; -+/* Lock to protect modification and access to the mali_sessions list */ -+extern _mali_osk_spinlock_irq_t *mali_sessions_lock; ++ _MALI_OSK_LIST_FOREACHENTRY(group, ++ group_tmp, ++ mali_pm_domain_get_group_list(domain), ++ struct mali_group, ++ pm_domain_list) { ++ MALI_DEBUG_ASSERT(*num_groups_down < ++ MALI_MAX_NUMBER_OF_GROUPS); ++ groups_down[*num_groups_down] = group; ++ (*num_groups_down)++; ++ } + -+MALI_STATIC_INLINE void mali_session_lock(void) -+{ -+ _mali_osk_spinlock_irq_lock(mali_sessions_lock); ++ /* Remove current bit and find next */ ++ notify_mask &= ~(1 << (domain_id)); ++ domain_bit = _mali_osk_fls(notify_mask); ++ } +} + -+MALI_STATIC_INLINE void mali_session_unlock(void) ++/* ++ * Execute pending power domain changes ++ * pm_lock_exec lock must be taken by caller. ++ */ ++static void mali_pm_update_sync_internal(void) +{ -+ _mali_osk_spinlock_irq_unlock(mali_sessions_lock); -+} -+ -+void mali_session_add(struct mali_session_data *session); -+void mali_session_remove(struct mali_session_data *session); -+u32 mali_session_get_count(void); -+mali_bool mali_session_pp_job_is_empty(void *data); -+wait_queue_head_t *mali_session_get_wait_queue(void); ++ /* ++ * This should only be called in non-atomic context ++ * (normally as deferred work) ++ * ++ * Look at the pending power domain changes, and execute these. ++ * Make sure group and schedulers are notified about changes. ++ */ + -+#define MALI_SESSION_FOREACH(session, tmp, link) \ -+ _MALI_OSK_LIST_FOREACHENTRY(session, tmp, &mali_sessions, struct mali_session_data, link) ++ struct mali_pmu_core *pmu = mali_pmu_get_global_pmu_core(); + -+MALI_STATIC_INLINE struct mali_page_directory *mali_session_get_page_directory(struct mali_session_data *session) -+{ -+ return session->page_directory; -+} ++ u32 power_down_mask; ++ u32 power_up_mask; + -+MALI_STATIC_INLINE void mali_session_memory_lock(struct mali_session_data *session) -+{ -+ MALI_DEBUG_ASSERT_POINTER(session); -+ _mali_osk_mutex_wait(session->memory_lock); -+} ++ MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec); + -+MALI_STATIC_INLINE void mali_session_memory_unlock(struct mali_session_data *session) -+{ -+ MALI_DEBUG_ASSERT_POINTER(session); -+ _mali_osk_mutex_signal(session->memory_lock); -+} ++#if defined(DEBUG) ++ ++num_pm_updates; ++#endif + -+MALI_STATIC_INLINE void mali_session_send_notification(struct mali_session_data *session, _mali_osk_notification_t *object) -+{ -+ _mali_osk_notification_queue_send(session->ioctl_queue, object); -+} ++ /* Hold PM state lock while we look at (and obey) the wanted state */ ++ mali_pm_state_lock(); + -+#if defined(CONFIG_MALI_DVFS) ++ MALI_DEBUG_PRINT(5, ("PM update pre: Wanted domain mask: .. [%s]\n", ++ mali_pm_mask_to_string(pd_mask_wanted))); ++ MALI_DEBUG_PRINT(5, ("PM update pre: Current domain mask: . [%s]\n", ++ mali_pm_mask_to_string(pd_mask_current))); ++ MALI_DEBUG_PRINT(5, ("PM update pre: Current PMU mask: .... [%s]\n", ++ mali_pm_mask_to_string(pmu_mask_current))); ++ MALI_DEBUG_PRINT(5, ("PM update pre: Group power stats: ... <%s>\n", ++ mali_pm_group_stats_to_string())); + -+MALI_STATIC_INLINE void mali_session_inc_num_window_jobs(struct mali_session_data *session) -+{ -+ MALI_DEBUG_ASSERT_POINTER(session); -+ _mali_osk_atomic_inc(&session->number_of_window_jobs); -+} ++ /* Figure out which cores we need to power on */ ++ power_up_mask = pd_mask_wanted & ++ (pd_mask_wanted ^ pd_mask_current); + -+/* -+ * Get the max completed window jobs from all active session, -+ * which will be used in window render frame per sec calculate -+ */ -+u32 mali_session_max_window_num(void); ++ if (0 != power_up_mask) { ++ u32 power_up_mask_pmu; ++ struct mali_group *groups_up[MALI_MAX_NUMBER_OF_GROUPS]; ++ u32 num_groups_up = 0; ++ struct mali_l2_cache_core * ++ l2_up[MALI_MAX_NUMBER_OF_L2_CACHE_CORES]; ++ u32 num_l2_up = 0; ++ u32 i; + ++#if defined(DEBUG) ++ ++num_pm_updates_up; +#endif + -+void mali_session_memory_tracking(_mali_osk_print_ctx *print_ctx); ++ /* ++ * Make sure dummy/global domain is always included when ++ * powering up, since this is controlled by runtime PM, ++ * and device power is on at this stage. ++ */ ++ power_up_mask |= MALI_PM_DOMAIN_DUMMY_MASK; + -+#endif /* __MALI_SESSION_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_soft_job.c b/drivers/gpu/arm/mali400/mali/common/mali_soft_job.c -new file mode 100644 -index 000000000..35cd830bc ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_soft_job.c -@@ -0,0 +1,438 @@ -+/* -+ * Copyright (C) 2013-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ /* Power up only real PMU domains */ ++ power_up_mask_pmu = power_up_mask & ~MALI_PM_DOMAIN_DUMMY_MASK; + -+#include "mali_soft_job.h" -+#include "mali_osk.h" -+#include "mali_timeline.h" -+#include "mali_session.h" -+#include "mali_kernel_common.h" -+#include "mali_uk_types.h" -+#include "mali_scheduler.h" -+#include "mali_executor.h" ++ /* But not those that happen to be powered on already */ ++ power_up_mask_pmu &= (power_up_mask ^ pmu_mask_current) & ++ power_up_mask; + -+MALI_STATIC_INLINE void mali_soft_job_system_lock(struct mali_soft_job_system *system) -+{ -+ MALI_DEBUG_ASSERT_POINTER(system); -+ _mali_osk_spinlock_irq_lock(system->lock); -+ MALI_DEBUG_PRINT(5, ("Mali Soft Job: soft system %p lock taken\n", system)); -+ MALI_DEBUG_ASSERT(0 == system->lock_owner); -+ MALI_DEBUG_CODE(system->lock_owner = _mali_osk_get_tid()); -+} ++ if (0 != power_up_mask_pmu) { ++ MALI_DEBUG_ASSERT(NULL != pmu); ++ pmu_mask_current |= power_up_mask_pmu; ++ mali_pmu_power_up(pmu, power_up_mask_pmu); ++ } + -+MALI_STATIC_INLINE void mali_soft_job_system_unlock(struct mali_soft_job_system *system) -+{ -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_PRINT(5, ("Mali Soft Job: releasing soft system %p lock\n", system)); -+ MALI_DEBUG_ASSERT(_mali_osk_get_tid() == system->lock_owner); -+ MALI_DEBUG_CODE(system->lock_owner = 0); -+ _mali_osk_spinlock_irq_unlock(system->lock); -+} ++ /* ++ * Put the domains themselves in power up state. ++ * We get the groups and L2s to notify in return. ++ */ ++ mali_pm_domain_power_up(power_up_mask, ++ groups_up, &num_groups_up, ++ l2_up, &num_l2_up); + -+#if defined(DEBUG) -+MALI_STATIC_INLINE void mali_soft_job_system_assert_locked(struct mali_soft_job_system *system) -+{ -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT(_mali_osk_get_tid() == system->lock_owner); -+} -+#define MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system) mali_soft_job_system_assert_locked(system) -+#else -+#define MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system) -+#endif /* defined(DEBUG) */ ++ /* Need to unlock PM state lock before notifying L2 + groups */ ++ mali_pm_state_unlock(); + -+struct mali_soft_job_system *mali_soft_job_system_create(struct mali_session_data *session) -+{ -+ struct mali_soft_job_system *system; ++ /* Notify each L2 cache that we have be powered up */ ++ for (i = 0; i < num_l2_up; i++) { ++ mali_l2_cache_power_up(l2_up[i]); ++ } + -+ MALI_DEBUG_ASSERT_POINTER(session); ++ /* ++ * Tell execution module about all the groups we have ++ * powered up. Groups will be notified as a result of this. ++ */ ++ mali_executor_group_power_up(groups_up, num_groups_up); + -+ system = (struct mali_soft_job_system *) _mali_osk_calloc(1, sizeof(struct mali_soft_job_system)); -+ if (NULL == system) { -+ return NULL; ++ /* Lock state again before checking for power down */ ++ mali_pm_state_lock(); + } + -+ system->session = session; ++ /* Figure out which cores we need to power off */ ++ power_down_mask = pd_mask_current & ++ (pd_mask_wanted ^ pd_mask_current); + -+ system->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_SCHEDULER); -+ if (NULL == system->lock) { -+ mali_soft_job_system_destroy(system); -+ return NULL; -+ } -+ system->lock_owner = 0; -+ system->last_job_id = 0; ++ /* ++ * Never power down the dummy/global domain here. This is to be done ++ * from a suspend request (since this domain is only physicall powered ++ * down at that point) ++ */ ++ power_down_mask &= ~MALI_PM_DOMAIN_DUMMY_MASK; + -+ _MALI_OSK_INIT_LIST_HEAD(&(system->jobs_used)); ++ if (0 != power_down_mask) { ++ u32 power_down_mask_pmu; ++ struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS]; ++ u32 num_groups_down = 0; ++ struct mali_l2_cache_core * ++ l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES]; ++ u32 num_l2_down = 0; ++ u32 i; + -+ return system; -+} ++#if defined(DEBUG) ++ ++num_pm_updates_down; ++#endif + -+void mali_soft_job_system_destroy(struct mali_soft_job_system *system) -+{ -+ MALI_DEBUG_ASSERT_POINTER(system); ++ /* ++ * Put the domains themselves in power down state. ++ * We get the groups and L2s to notify in return. ++ */ ++ mali_pm_domain_power_down(power_down_mask, ++ groups_down, &num_groups_down, ++ l2_down, &num_l2_down); + -+ /* All jobs should be free at this point. */ -+ MALI_DEBUG_ASSERT(_mali_osk_list_empty(&(system->jobs_used))); ++ /* Need to unlock PM state lock before notifying L2 + groups */ ++ mali_pm_state_unlock(); + -+ if (NULL != system) { -+ if (NULL != system->lock) { -+ _mali_osk_spinlock_irq_term(system->lock); ++ /* ++ * Tell execution module about all the groups we will be ++ * powering down. Groups will be notified as a result of this. ++ */ ++ if (0 < num_groups_down) { ++ mali_executor_group_power_down(groups_down, num_groups_down); + } -+ _mali_osk_free(system); -+ } -+} -+ -+static void mali_soft_job_system_free_job(struct mali_soft_job_system *system, struct mali_soft_job *job) -+{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_POINTER(system); + -+ mali_soft_job_system_lock(job->system); -+ -+ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_INVALID_ID != job->id); -+ MALI_DEBUG_ASSERT(system == job->system); ++ /* Notify each L2 cache that we will be powering down */ ++ for (i = 0; i < num_l2_down; i++) { ++ mali_l2_cache_power_down(l2_down[i]); ++ } + -+ _mali_osk_list_del(&(job->system_list)); ++ /* ++ * Power down only PMU domains which should not stay on ++ * Some domains might for instance currently be incorrectly ++ * powered up if default domain power state is all on. ++ */ ++ power_down_mask_pmu = pmu_mask_current & (~pd_mask_current); + -+ mali_soft_job_system_unlock(job->system); ++ if (0 != power_down_mask_pmu) { ++ MALI_DEBUG_ASSERT(NULL != pmu); ++ pmu_mask_current &= ~power_down_mask_pmu; ++ mali_pmu_power_down(pmu, power_down_mask_pmu); + -+ _mali_osk_free(job); -+} ++ } ++ } else { ++ /* ++ * Power down only PMU domains which should not stay on ++ * Some domains might for instance currently be incorrectly ++ * powered up if default domain power state is all on. ++ */ ++ u32 power_down_mask_pmu; + -+MALI_STATIC_INLINE struct mali_soft_job *mali_soft_job_system_lookup_job(struct mali_soft_job_system *system, u32 job_id) -+{ -+ struct mali_soft_job *job, *tmp; ++ /* No need for state lock since we'll only update PMU */ ++ mali_pm_state_unlock(); + -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system); ++ power_down_mask_pmu = pmu_mask_current & (~pd_mask_current); + -+ _MALI_OSK_LIST_FOREACHENTRY(job, tmp, &system->jobs_used, struct mali_soft_job, system_list) { -+ if (job->id == job_id) -+ return job; ++ if (0 != power_down_mask_pmu) { ++ MALI_DEBUG_ASSERT(NULL != pmu); ++ pmu_mask_current &= ~power_down_mask_pmu; ++ mali_pmu_power_down(pmu, power_down_mask_pmu); ++ } + } + -+ return NULL; ++ MALI_DEBUG_PRINT(5, ("PM update post: Current domain mask: . [%s]\n", ++ mali_pm_mask_to_string(pd_mask_current))); ++ MALI_DEBUG_PRINT(5, ("PM update post: Current PMU mask: .... [%s]\n", ++ mali_pm_mask_to_string(pmu_mask_current))); ++ MALI_DEBUG_PRINT(5, ("PM update post: Group power stats: ... <%s>\n", ++ mali_pm_group_stats_to_string())); +} + -+void mali_soft_job_destroy(struct mali_soft_job *job) ++static mali_bool mali_pm_common_suspend(void) +{ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_POINTER(job->system); -+ -+ MALI_DEBUG_PRINT(4, ("Mali Soft Job: destroying soft job %u (0x%08X)\n", job->id, job)); -+ -+ if (NULL != job) { -+ if (0 < _mali_osk_atomic_dec_return(&job->refcount)) return; -+ -+ _mali_osk_atomic_term(&job->refcount); -+ -+ if (NULL != job->activated_notification) { -+ _mali_osk_notification_delete(job->activated_notification); -+ job->activated_notification = NULL; -+ } ++ mali_pm_state_lock(); + -+ mali_soft_job_system_free_job(job->system, job); ++ if (0 != pd_mask_wanted) { ++ MALI_DEBUG_PRINT(5, ("PM: Aborting suspend operation\n\n\n")); ++ mali_pm_state_unlock(); ++ return MALI_FALSE; + } -+} + -+struct mali_soft_job *mali_soft_job_create(struct mali_soft_job_system *system, mali_soft_job_type type, u64 user_job) -+{ -+ struct mali_soft_job *job; -+ _mali_osk_notification_t *notification = NULL; ++ MALI_DEBUG_PRINT(5, ("PM suspend pre: Wanted domain mask: .. [%s]\n", ++ mali_pm_mask_to_string(pd_mask_wanted))); ++ MALI_DEBUG_PRINT(5, ("PM suspend pre: Current domain mask: . [%s]\n", ++ mali_pm_mask_to_string(pd_mask_current))); ++ MALI_DEBUG_PRINT(5, ("PM suspend pre: Current PMU mask: .... [%s]\n", ++ mali_pm_mask_to_string(pmu_mask_current))); ++ MALI_DEBUG_PRINT(5, ("PM suspend pre: Group power stats: ... <%s>\n", ++ mali_pm_group_stats_to_string())); + -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT((MALI_SOFT_JOB_TYPE_USER_SIGNALED == type) || -+ (MALI_SOFT_JOB_TYPE_SELF_SIGNALED == type)); ++ if (0 != pd_mask_current) { ++ /* ++ * We have still some domains powered on. ++ * It is for instance very normal that at least the ++ * dummy/global domain is marked as powered on at this point. ++ * (because it is physically powered on until this function ++ * returns) ++ */ + -+ notification = _mali_osk_notification_create(_MALI_NOTIFICATION_SOFT_ACTIVATED, sizeof(_mali_uk_soft_job_activated_s)); -+ if (unlikely(NULL == notification)) { -+ MALI_PRINT_ERROR(("Mali Soft Job: failed to allocate notification")); -+ return NULL; -+ } ++ struct mali_group *groups_down[MALI_MAX_NUMBER_OF_GROUPS]; ++ u32 num_groups_down = 0; ++ struct mali_l2_cache_core * ++ l2_down[MALI_MAX_NUMBER_OF_L2_CACHE_CORES]; ++ u32 num_l2_down = 0; ++ u32 i; + -+ job = _mali_osk_malloc(sizeof(struct mali_soft_job)); -+ if (unlikely(NULL == job)) { -+ MALI_DEBUG_PRINT(2, ("Mali Soft Job: system alloc job failed. \n")); -+ return NULL; -+ } ++ /* ++ * Put the domains themselves in power down state. ++ * We get the groups and L2s to notify in return. ++ */ ++ mali_pm_domain_power_down(pd_mask_current, ++ groups_down, ++ &num_groups_down, ++ l2_down, ++ &num_l2_down); + -+ mali_soft_job_system_lock(system); ++ MALI_DEBUG_ASSERT(0 == pd_mask_current); ++ MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused()); + -+ job->system = system; -+ job->id = system->last_job_id++; -+ job->state = MALI_SOFT_JOB_STATE_ALLOCATED; ++ /* Need to unlock PM state lock before notifying L2 + groups */ ++ mali_pm_state_unlock(); + -+ _mali_osk_list_add(&(job->system_list), &(system->jobs_used)); ++ /* ++ * Tell execution module about all the groups we will be ++ * powering down. Groups will be notified as a result of this. ++ */ ++ if (0 < num_groups_down) { ++ mali_executor_group_power_down(groups_down, num_groups_down); ++ } + -+ job->type = type; -+ job->user_job = user_job; -+ job->activated = MALI_FALSE; ++ /* Notify each L2 cache that we will be powering down */ ++ for (i = 0; i < num_l2_down; i++) { ++ mali_l2_cache_power_down(l2_down[i]); ++ } + -+ job->activated_notification = notification; ++ pmu_mask_current = 0; ++ } else { ++ MALI_DEBUG_ASSERT(0 == pmu_mask_current); + -+ _mali_osk_atomic_init(&job->refcount, 1); ++ MALI_DEBUG_ASSERT(MALI_TRUE == mali_pm_domain_all_unused()); + -+ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_ALLOCATED == job->state); -+ MALI_DEBUG_ASSERT(system == job->system); -+ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_INVALID_ID != job->id); ++ mali_pm_state_unlock(); ++ } + -+ mali_soft_job_system_unlock(system); ++ MALI_DEBUG_PRINT(5, ("PM suspend post: Current domain mask: [%s]\n", ++ mali_pm_mask_to_string(pd_mask_current))); ++ MALI_DEBUG_PRINT(5, ("PM suspend post: Current PMU mask: ... [%s]\n", ++ mali_pm_mask_to_string(pmu_mask_current))); ++ MALI_DEBUG_PRINT(5, ("PM suspend post: Group power stats: .. <%s>\n", ++ mali_pm_group_stats_to_string())); + -+ return job; ++ return MALI_TRUE; +} + -+mali_timeline_point mali_soft_job_start(struct mali_soft_job *job, struct mali_timeline_fence *fence) ++static void mali_pm_update_work(void *data) +{ -+ mali_timeline_point point; -+ struct mali_soft_job_system *system; -+ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_POINTER(fence); -+ -+ MALI_DEBUG_ASSERT_POINTER(job->system); -+ system = job->system; -+ -+ MALI_DEBUG_ASSERT_POINTER(system->session); -+ MALI_DEBUG_ASSERT_POINTER(system->session->timeline_system); -+ -+ mali_soft_job_system_lock(system); -+ -+ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_ALLOCATED == job->state); -+ job->state = MALI_SOFT_JOB_STATE_STARTED; -+ -+ mali_soft_job_system_unlock(system); -+ -+ MALI_DEBUG_PRINT(4, ("Mali Soft Job: starting soft job %u (0x%08X)\n", job->id, job)); -+ -+ mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_SOFT, fence, job); -+ point = mali_timeline_system_add_tracker(system->session->timeline_system, &job->tracker, MALI_TIMELINE_SOFT); -+ -+ return point; ++ MALI_IGNORE(data); ++ mali_pm_update_sync(); +} + -+static mali_bool mali_soft_job_is_activated(void *data) ++static _mali_osk_errcode_t mali_pm_create_pm_domains(void) +{ -+ struct mali_soft_job *job; ++ int i; + -+ job = (struct mali_soft_job *) data; -+ MALI_DEBUG_ASSERT_POINTER(job); ++ /* Create all domains (including dummy domain) */ ++ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { ++ if (0x0 == domain_config[i]) continue; + -+ return job->activated; ++ if (NULL == mali_pm_domain_create(domain_config[i])) { ++ return _MALI_OSK_ERR_NOMEM; ++ } ++ } ++ ++ return _MALI_OSK_ERR_OK; +} + -+_mali_osk_errcode_t mali_soft_job_system_signal_job(struct mali_soft_job_system *system, u32 job_id) ++static void mali_pm_set_default_pm_domain_config(void) +{ -+ struct mali_soft_job *job; -+ struct mali_timeline_system *timeline_system; -+ mali_scheduler_mask schedule_mask; -+ -+ MALI_DEBUG_ASSERT_POINTER(system); -+ -+ mali_soft_job_system_lock(system); -+ -+ job = mali_soft_job_system_lookup_job(system, job_id); ++ MALI_DEBUG_ASSERT(0 != _mali_osk_resource_base_address()); + -+ if ((NULL == job) || (MALI_SOFT_JOB_TYPE_USER_SIGNALED != job->type) -+ || !(MALI_SOFT_JOB_STATE_STARTED == job->state || MALI_SOFT_JOB_STATE_TIMED_OUT == job->state)) { -+ mali_soft_job_system_unlock(system); -+ MALI_PRINT_ERROR(("Mali Soft Job: invalid soft job id %u", job_id)); -+ return _MALI_OSK_ERR_ITEM_NOT_FOUND; ++ /* GP core */ ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( ++ MALI_OFFSET_GP, NULL)) { ++ domain_config[MALI_DOMAIN_INDEX_GP] = 0x01; + } + -+ if (MALI_SOFT_JOB_STATE_TIMED_OUT == job->state) { -+ job->state = MALI_SOFT_JOB_STATE_SIGNALED; -+ mali_soft_job_system_unlock(system); -+ -+ MALI_DEBUG_ASSERT(MALI_TRUE == job->activated); -+ MALI_DEBUG_PRINT(4, ("Mali Soft Job: soft job %u (0x%08X) was timed out\n", job->id, job)); -+ mali_soft_job_destroy(job); ++ /* PP0 - PP3 core */ ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( ++ MALI_OFFSET_PP0, NULL)) { ++ if (mali_is_mali400()) { ++ domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 2; ++ } else if (mali_is_mali450()) { ++ domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 1; ++ } else if (mali_is_mali470()) { ++ domain_config[MALI_DOMAIN_INDEX_PP0] = 0x01 << 0; ++ } ++ } + -+ return _MALI_OSK_ERR_TIMEOUT; ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( ++ MALI_OFFSET_PP1, NULL)) { ++ if (mali_is_mali400()) { ++ domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 3; ++ } else if (mali_is_mali450()) { ++ domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 2; ++ } else if (mali_is_mali470()) { ++ domain_config[MALI_DOMAIN_INDEX_PP1] = 0x01 << 1; ++ } + } + -+ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state); ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( ++ MALI_OFFSET_PP2, NULL)) { ++ if (mali_is_mali400()) { ++ domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 4; ++ } else if (mali_is_mali450()) { ++ domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 2; ++ } else if (mali_is_mali470()) { ++ domain_config[MALI_DOMAIN_INDEX_PP2] = 0x01 << 1; ++ } ++ } + -+ job->state = MALI_SOFT_JOB_STATE_SIGNALED; -+ mali_soft_job_system_unlock(system); ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( ++ MALI_OFFSET_PP3, NULL)) { ++ if (mali_is_mali400()) { ++ domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 5; ++ } else if (mali_is_mali450()) { ++ domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 2; ++ } else if (mali_is_mali470()) { ++ domain_config[MALI_DOMAIN_INDEX_PP3] = 0x01 << 1; ++ } ++ } + -+ /* Since the job now is in signaled state, timeouts from the timeline system will be -+ * ignored, and it is not possible to signal this job again. */ ++ /* PP4 - PP7 */ ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( ++ MALI_OFFSET_PP4, NULL)) { ++ domain_config[MALI_DOMAIN_INDEX_PP4] = 0x01 << 3; ++ } + -+ timeline_system = system->session->timeline_system; -+ MALI_DEBUG_ASSERT_POINTER(timeline_system); ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( ++ MALI_OFFSET_PP5, NULL)) { ++ domain_config[MALI_DOMAIN_INDEX_PP5] = 0x01 << 3; ++ } + -+ /* Wait until activated. */ -+ _mali_osk_wait_queue_wait_event(timeline_system->wait_queue, mali_soft_job_is_activated, (void *) job); ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( ++ MALI_OFFSET_PP6, NULL)) { ++ domain_config[MALI_DOMAIN_INDEX_PP6] = 0x01 << 3; ++ } + -+ MALI_DEBUG_PRINT(4, ("Mali Soft Job: signaling soft job %u (0x%08X)\n", job->id, job)); ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( ++ MALI_OFFSET_PP7, NULL)) { ++ domain_config[MALI_DOMAIN_INDEX_PP7] = 0x01 << 3; ++ } + -+ schedule_mask = mali_timeline_tracker_release(&job->tracker); -+ mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE); ++ /* L2gp/L2PP0/L2PP4 */ ++ if (mali_is_mali400()) { ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( ++ MALI400_OFFSET_L2_CACHE0, NULL)) { ++ domain_config[MALI_DOMAIN_INDEX_L20] = 0x01 << 1; ++ } ++ } else if (mali_is_mali450()) { ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( ++ MALI450_OFFSET_L2_CACHE0, NULL)) { ++ domain_config[MALI_DOMAIN_INDEX_L20] = 0x01 << 0; ++ } + -+ mali_soft_job_destroy(job); ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( ++ MALI450_OFFSET_L2_CACHE1, NULL)) { ++ domain_config[MALI_DOMAIN_INDEX_L21] = 0x01 << 1; ++ } + -+ return _MALI_OSK_ERR_OK; ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( ++ MALI450_OFFSET_L2_CACHE2, NULL)) { ++ domain_config[MALI_DOMAIN_INDEX_L22] = 0x01 << 3; ++ } ++ } else if (mali_is_mali470()) { ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find( ++ MALI470_OFFSET_L2_CACHE1, NULL)) { ++ domain_config[MALI_DOMAIN_INDEX_L21] = 0x01 << 0; ++ } ++ } +} + -+static void mali_soft_job_send_activated_notification(struct mali_soft_job *job) ++static u32 mali_pm_get_registered_cores_mask(void) +{ -+ if (NULL != job->activated_notification) { -+ _mali_uk_soft_job_activated_s *res = job->activated_notification->result_buffer; -+ res->user_job = job->user_job; -+ mali_session_send_notification(job->system->session, job->activated_notification); ++ int i = 0; ++ u32 mask = 0; ++ ++ for (i = 0; i < MALI_DOMAIN_INDEX_DUMMY; i++) { ++ mask |= domain_config[i]; + } -+ job->activated_notification = NULL; ++ ++ return mask; +} + -+mali_scheduler_mask mali_soft_job_system_activate_job(struct mali_soft_job *job) ++static void mali_pm_set_pmu_domain_config(void) +{ -+ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; ++ int i = 0; + -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_POINTER(job->system); -+ MALI_DEBUG_ASSERT_POINTER(job->system->session); ++ _mali_osk_device_data_pmu_config_get(domain_config, MALI_MAX_NUMBER_OF_DOMAINS - 1); + -+ MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeline activation for soft job %u (0x%08X).\n", job->id, job)); ++ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS - 1; i++) { ++ if (0 != domain_config[i]) { ++ MALI_DEBUG_PRINT(2, ("Using customer pmu config:\n")); ++ break; ++ } ++ } + -+ mali_soft_job_system_lock(job->system); ++ if (MALI_MAX_NUMBER_OF_DOMAINS - 1 == i) { ++ MALI_DEBUG_PRINT(2, ("Using hw detect pmu config:\n")); ++ mali_pm_set_default_pm_domain_config(); ++ } + -+ if (unlikely(job->system->session->is_aborting)) { -+ MALI_DEBUG_PRINT(3, ("Mali Soft Job: Soft job %u (0x%08X) activated while session is aborting.\n", job->id, job)); ++ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS - 1; i++) { ++ if (domain_config[i]) { ++ MALI_DEBUG_PRINT(2, ("domain_config[%d] = 0x%x \n", i, domain_config[i])); ++ } ++ } ++ /* Can't override dummy domain mask */ ++ domain_config[MALI_DOMAIN_INDEX_DUMMY] = ++ 1 << MALI_DOMAIN_INDEX_DUMMY; ++} + -+ mali_soft_job_system_unlock(job->system); ++#if defined(DEBUG) ++const char *mali_pm_mask_to_string(u32 mask) ++{ ++ static char bit_str[MALI_MAX_NUMBER_OF_DOMAINS + 1]; ++ int bit; ++ int str_pos = 0; + -+ /* Since we are in shutdown, we can ignore the scheduling bitmask. */ -+ mali_timeline_tracker_release(&job->tracker); -+ mali_soft_job_destroy(job); -+ return schedule_mask; ++ /* Must be protected by lock since we use shared string buffer */ ++ if (NULL != pm_lock_exec) { ++ MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec); + } + -+ /* Send activated notification. */ -+ mali_soft_job_send_activated_notification(job); ++ for (bit = MALI_MAX_NUMBER_OF_DOMAINS - 1; bit >= 0; bit--) { ++ if (mask & (1 << bit)) { ++ bit_str[str_pos] = 'X'; ++ } else { ++ bit_str[str_pos] = '-'; ++ } ++ str_pos++; ++ } + -+ /* Wake up sleeping signaler. */ -+ job->activated = MALI_TRUE; ++ bit_str[MALI_MAX_NUMBER_OF_DOMAINS] = '\0'; + -+ /* If job type is self signaled, release tracker, move soft job to free list, and scheduler at once */ -+ if (MALI_SOFT_JOB_TYPE_SELF_SIGNALED == job->type) { -+ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state); ++ return bit_str; ++} + -+ job->state = MALI_SOFT_JOB_STATE_SIGNALED; -+ mali_soft_job_system_unlock(job->system); ++const char *mali_pm_group_stats_to_string(void) ++{ ++ static char bit_str[MALI_MAX_NUMBER_OF_GROUPS + 1]; ++ u32 num_groups = mali_group_get_glob_num_groups(); ++ u32 i; + -+ schedule_mask |= mali_timeline_tracker_release(&job->tracker); ++ /* Must be protected by lock since we use shared string buffer */ ++ if (NULL != pm_lock_exec) { ++ MALI_DEBUG_ASSERT_LOCK_HELD(pm_lock_exec); ++ } + -+ mali_soft_job_destroy(job); -+ } else { -+ _mali_osk_wait_queue_wake_up(job->tracker.system->wait_queue); ++ for (i = 0; i < num_groups && i < MALI_MAX_NUMBER_OF_GROUPS; i++) { ++ struct mali_group *group; + -+ mali_soft_job_system_unlock(job->system); ++ group = mali_group_get_glob_group(i); ++ ++ if (MALI_TRUE == mali_group_power_is_on(group)) { ++ bit_str[i] = 'X'; ++ } else { ++ bit_str[i] = '-'; ++ } + } + -+ return schedule_mask; ++ bit_str[i] = '\0'; ++ ++ return bit_str; +} ++#endif + -+mali_scheduler_mask mali_soft_job_system_timeout_job(struct mali_soft_job *job) ++/* ++ * num_pp is the number of PP cores which will be powered on given this mask ++ * cost is the total power cost of cores which will be powered on given this mask ++ */ ++static void mali_pm_stat_from_mask(u32 mask, u32 *num_pp, u32 *cost) +{ -+ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; -+ -+ MALI_DEBUG_ASSERT_POINTER(job); -+ MALI_DEBUG_ASSERT_POINTER(job->system); -+ MALI_DEBUG_ASSERT_POINTER(job->system->session); -+ MALI_DEBUG_ASSERT(MALI_TRUE == job->activated); ++ u32 i; + -+ MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeline timeout for soft job %u (0x%08X).\n", job->id, job)); ++ /* loop through all cores */ ++ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { ++ if (!(domain_config[i] & mask)) { ++ continue; ++ } + -+ mali_soft_job_system_lock(job->system); ++ switch (i) { ++ case MALI_DOMAIN_INDEX_GP: ++ *cost += MALI_GP_COST; + -+ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state || -+ MALI_SOFT_JOB_STATE_SIGNALED == job->state); ++ break; ++ case MALI_DOMAIN_INDEX_PP0: /* Fall through */ ++ case MALI_DOMAIN_INDEX_PP1: /* Fall through */ ++ case MALI_DOMAIN_INDEX_PP2: /* Fall through */ ++ case MALI_DOMAIN_INDEX_PP3: ++ if (mali_is_mali400()) { ++ if ((domain_config[MALI_DOMAIN_INDEX_L20] & mask) ++ || (domain_config[MALI_DOMAIN_INDEX_DUMMY] ++ == domain_config[MALI_DOMAIN_INDEX_L20])) { ++ *num_pp += 1; ++ } ++ } else { ++ if ((domain_config[MALI_DOMAIN_INDEX_L21] & mask) ++ || (domain_config[MALI_DOMAIN_INDEX_DUMMY] ++ == domain_config[MALI_DOMAIN_INDEX_L21])) { ++ *num_pp += 1; ++ } ++ } + -+ if (unlikely(job->system->session->is_aborting)) { -+ /* The session is aborting. This job will be released and destroyed by @ref -+ * mali_soft_job_system_abort(). */ -+ mali_soft_job_system_unlock(job->system); ++ *cost += MALI_PP_COST; ++ break; ++ case MALI_DOMAIN_INDEX_PP4: /* Fall through */ ++ case MALI_DOMAIN_INDEX_PP5: /* Fall through */ ++ case MALI_DOMAIN_INDEX_PP6: /* Fall through */ ++ case MALI_DOMAIN_INDEX_PP7: ++ MALI_DEBUG_ASSERT(mali_is_mali450()); + -+ return MALI_SCHEDULER_MASK_EMPTY; -+ } ++ if ((domain_config[MALI_DOMAIN_INDEX_L22] & mask) ++ || (domain_config[MALI_DOMAIN_INDEX_DUMMY] ++ == domain_config[MALI_DOMAIN_INDEX_L22])) { ++ *num_pp += 1; ++ } + -+ if (MALI_SOFT_JOB_STATE_STARTED != job->state) { -+ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_SIGNALED == job->state); ++ *cost += MALI_PP_COST; ++ break; ++ case MALI_DOMAIN_INDEX_L20: /* Fall through */ ++ case MALI_DOMAIN_INDEX_L21: /* Fall through */ ++ case MALI_DOMAIN_INDEX_L22: ++ *cost += MALI_L2_COST; + -+ /* The job is about to be signaled, ignore timeout. */ -+ MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeout on soft job %u (0x%08X) in signaled state.\n", job->id, job)); -+ mali_soft_job_system_unlock(job->system); -+ return schedule_mask; ++ break; ++ } + } ++} + -+ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state); ++void mali_pm_power_cost_setup(void) ++{ ++ /* ++ * Two parallel arrays which store the best domain mask and its cost ++ * The index is the number of PP cores, E.g. Index 0 is for 1 PP option, ++ * might have mask 0x2 and with cost of 1, lower cost is better ++ */ ++ u32 best_mask[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS] = { 0 }; ++ u32 best_cost[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS] = { 0 }; ++ /* Array cores_in_domain is used to store the total pp cores in each pm domain. */ ++ u32 cores_in_domain[MALI_MAX_NUMBER_OF_DOMAINS] = { 0 }; ++ /* Domain_count is used to represent the max domain we have.*/ ++ u32 max_domain_mask = 0; ++ u32 max_domain_id = 0; ++ u32 always_on_pp_cores = 0; + -+ job->state = MALI_SOFT_JOB_STATE_TIMED_OUT; -+ _mali_osk_atomic_inc(&job->refcount); ++ u32 num_pp, cost, mask; ++ u32 i, j , k; + -+ mali_soft_job_system_unlock(job->system); ++ /* Initialize statistics */ ++ for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; i++) { ++ best_mask[i] = 0; ++ best_cost[i] = 0xFFFFFFFF; /* lower cost is better */ ++ } + -+ schedule_mask = mali_timeline_tracker_release(&job->tracker); ++ for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS + 1; i++) { ++ for (j = 0; j < MALI_MAX_NUMBER_OF_DOMAINS; j++) { ++ mali_pm_domain_power_cost_result[i][j] = 0; ++ } ++ } + -+ mali_soft_job_destroy(job); ++ /* Caculate number of pp cores of a given domain config. */ ++ for (i = MALI_DOMAIN_INDEX_PP0; i <= MALI_DOMAIN_INDEX_PP7; i++) { ++ if (0 < domain_config[i]) { ++ /* Get the max domain mask value used to caculate power cost ++ * and we don't count in always on pp cores. */ ++ if (MALI_PM_DOMAIN_DUMMY_MASK != domain_config[i] ++ && max_domain_mask < domain_config[i]) { ++ max_domain_mask = domain_config[i]; ++ } + -+ return schedule_mask; -+} ++ if (MALI_PM_DOMAIN_DUMMY_MASK == domain_config[i]) { ++ always_on_pp_cores++; ++ } ++ } ++ } ++ max_domain_id = _mali_osk_fls(max_domain_mask); + -+void mali_soft_job_system_abort(struct mali_soft_job_system *system) -+{ -+ struct mali_soft_job *job, *tmp; -+ _MALI_OSK_LIST_HEAD_STATIC_INIT(jobs); ++ /* ++ * Try all combinations of power domains and check how many PP cores ++ * they have and their power cost. ++ */ ++ for (mask = 0; mask < (1 << max_domain_id); mask++) { ++ num_pp = 0; ++ cost = 0; + -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT_POINTER(system->session); -+ MALI_DEBUG_ASSERT(system->session->is_aborting); ++ mali_pm_stat_from_mask(mask, &num_pp, &cost); + -+ MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting soft job system for session 0x%08X.\n", system->session)); ++ /* This mask is usable for all MP1 up to num_pp PP cores, check statistics for all */ ++ for (i = 0; i < num_pp; i++) { ++ if (best_cost[i] >= cost) { ++ best_cost[i] = cost; ++ best_mask[i] = mask; ++ } ++ } ++ } + -+ mali_soft_job_system_lock(system); ++ /* ++ * If we want to enable x pp cores, if x is less than number of always_on pp cores, ++ * all of pp cores we will enable must be always_on pp cores. ++ */ ++ for (i = 0; i < mali_executor_get_num_cores_total(); i++) { ++ if (i < always_on_pp_cores) { ++ mali_pm_domain_power_cost_result[i + 1][MALI_MAX_NUMBER_OF_DOMAINS - 1] ++ = i + 1; ++ } else { ++ mali_pm_domain_power_cost_result[i + 1][MALI_MAX_NUMBER_OF_DOMAINS - 1] ++ = always_on_pp_cores; ++ } ++ } + -+ _MALI_OSK_LIST_FOREACHENTRY(job, tmp, &system->jobs_used, struct mali_soft_job, system_list) { -+ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state || -+ MALI_SOFT_JOB_STATE_TIMED_OUT == job->state); ++ /* In this loop, variable i represent for the number of non-always on pp cores we want to enabled. */ ++ for (i = 0; i < (mali_executor_get_num_cores_total() - always_on_pp_cores); i++) { ++ if (best_mask[i] == 0) { ++ /* This MP variant is not available */ ++ continue; ++ } + -+ if (MALI_SOFT_JOB_STATE_STARTED == job->state) { -+ /* If the job has been activated, we have to release the tracker and destroy -+ * the job. If not, the tracker will be released and the job destroyed when -+ * it is activated. */ -+ if (MALI_TRUE == job->activated) { -+ MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting unsignaled soft job %u (0x%08X).\n", job->id, job)); ++ for (j = 0; j < MALI_MAX_NUMBER_OF_DOMAINS; j++) { ++ cores_in_domain[j] = 0; ++ } + -+ job->state = MALI_SOFT_JOB_STATE_SIGNALED; -+ _mali_osk_list_move(&job->system_list, &jobs); ++ for (j = MALI_DOMAIN_INDEX_PP0; j <= MALI_DOMAIN_INDEX_PP7; j++) { ++ if (0 < domain_config[j] ++ && (MALI_PM_DOMAIN_DUMMY_MASK != domain_config[i])) { ++ cores_in_domain[_mali_osk_fls(domain_config[j]) - 1]++; + } -+ } else if (MALI_SOFT_JOB_STATE_TIMED_OUT == job->state) { -+ MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting timed out soft job %u (0x%08X).\n", job->id, job)); ++ } + -+ /* We need to destroy this soft job. */ -+ _mali_osk_list_move(&job->system_list, &jobs); ++ /* In this loop, j represent for the number we have already enabled.*/ ++ for (j = 0; j <= i;) { ++ /* j used to visit all of domain to get the number of pp cores remained in it. */ ++ for (k = 0; k < max_domain_id; k++) { ++ /* If domain k in best_mask[i] is enabled and this domain has extra pp cores, ++ * we know we must pick at least one pp core from this domain. ++ * And then we move to next enabled pm domain. */ ++ if ((best_mask[i] & (0x1 << k)) && (0 < cores_in_domain[k])) { ++ cores_in_domain[k]--; ++ mali_pm_domain_power_cost_result[always_on_pp_cores + i + 1][k]++; ++ j++; ++ if (j > i) { ++ break; ++ } ++ } ++ } + } + } ++} + -+ mali_soft_job_system_unlock(system); -+ -+ /* Release and destroy jobs. */ -+ _MALI_OSK_LIST_FOREACHENTRY(job, tmp, &jobs, struct mali_soft_job, system_list) { -+ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_SIGNALED == job->state || -+ MALI_SOFT_JOB_STATE_TIMED_OUT == job->state); ++/* ++ * When we are doing core scaling, ++ * this function is called to return the best mask to ++ * achieve the best pp group power cost. ++ */ ++void mali_pm_get_best_power_cost_mask(int num_requested, int *dst) ++{ ++ MALI_DEBUG_ASSERT((mali_executor_get_num_cores_total() >= num_requested) && (0 <= num_requested)); + -+ if (MALI_SOFT_JOB_STATE_SIGNALED == job->state) { -+ mali_timeline_tracker_release(&job->tracker); -+ } ++ _mali_osk_memcpy(dst, mali_pm_domain_power_cost_result[num_requested], MALI_MAX_NUMBER_OF_DOMAINS * sizeof(int)); ++} + -+ /* Move job back to used list before destroying. */ -+ _mali_osk_list_move(&job->system_list, &system->jobs_used); ++u32 mali_pm_get_current_mask(void) ++{ ++ return pd_mask_current; ++} + -+ mali_soft_job_destroy(job); -+ } ++u32 mali_pm_get_wanted_mask(void) ++{ ++ return pd_mask_wanted; +} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_soft_job.h b/drivers/gpu/arm/mali400/mali/common/mali_soft_job.h +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pm.h b/drivers/gpu/arm/mali400/mali/common/mali_pm.h new file mode 100644 -index 000000000..018ef4c52 +index 000000000..dac69958e --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_soft_job.h -@@ -0,0 +1,190 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_pm.h +@@ -0,0 +1,91 @@ +/* -+ * Copyright (C) 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -299739,194 +303663,95 @@ index 000000000..018ef4c52 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_SOFT_JOB_H__ -+#define __MALI_SOFT_JOB_H__ ++#ifndef __MALI_PM_H__ ++#define __MALI_PM_H__ + +#include "mali_osk.h" ++#include "mali_pm_domain.h" + -+#include "mali_timeline.h" -+ -+struct mali_timeline_fence; -+struct mali_session_data; -+struct mali_soft_job; -+struct mali_soft_job_system; -+ -+/** -+ * Soft job types. -+ * -+ * Soft jobs of type MALI_SOFT_JOB_TYPE_USER_SIGNALED will only complete after activation if either -+ * they are signaled by user-space (@ref mali_soft_job_system_signaled_job) or if they are timed out -+ * by the Timeline system. -+ * Soft jobs of type MALI_SOFT_JOB_TYPE_SELF_SIGNALED will release job resource automatically -+ * in kernel when the job is activated. ++#define MALI_DOMAIN_INDEX_GP 0 ++#define MALI_DOMAIN_INDEX_PP0 1 ++#define MALI_DOMAIN_INDEX_PP1 2 ++#define MALI_DOMAIN_INDEX_PP2 3 ++#define MALI_DOMAIN_INDEX_PP3 4 ++#define MALI_DOMAIN_INDEX_PP4 5 ++#define MALI_DOMAIN_INDEX_PP5 6 ++#define MALI_DOMAIN_INDEX_PP6 7 ++#define MALI_DOMAIN_INDEX_PP7 8 ++#define MALI_DOMAIN_INDEX_L20 9 ++#define MALI_DOMAIN_INDEX_L21 10 ++#define MALI_DOMAIN_INDEX_L22 11 ++/* ++ * The dummy domain is used when there is no physical power domain ++ * (e.g. no PMU or always on cores) + */ -+typedef enum mali_soft_job_type { -+ MALI_SOFT_JOB_TYPE_SELF_SIGNALED, -+ MALI_SOFT_JOB_TYPE_USER_SIGNALED, -+} mali_soft_job_type; ++#define MALI_DOMAIN_INDEX_DUMMY 12 ++#define MALI_MAX_NUMBER_OF_DOMAINS 13 + +/** -+ * Soft job state. -+ * -+ * mali_soft_job_system_start_job a job will first be allocated.The job's state set to MALI_SOFT_JOB_STATE_ALLOCATED. -+ * Once the job is added to the timeline system, the state changes to MALI_SOFT_JOB_STATE_STARTED. -+ * -+ * For soft jobs of type MALI_SOFT_JOB_TYPE_USER_SIGNALED the state is changed to -+ * MALI_SOFT_JOB_STATE_SIGNALED when @ref mali_soft_job_system_signal_job is called and the soft -+ * job's state is MALI_SOFT_JOB_STATE_STARTED or MALI_SOFT_JOB_STATE_TIMED_OUT. -+ * -+ * If a soft job of type MALI_SOFT_JOB_TYPE_USER_SIGNALED is timed out before being signaled, the -+ * state is changed to MALI_SOFT_JOB_STATE_TIMED_OUT. This can only happen to soft jobs in state -+ * MALI_SOFT_JOB_STATE_STARTED. ++ * Initialize the Mali PM module + * ++ * PM module covers Mali PM core, PM domains and Mali PMU + */ -+typedef enum mali_soft_job_state { -+ MALI_SOFT_JOB_STATE_ALLOCATED, -+ MALI_SOFT_JOB_STATE_STARTED, -+ MALI_SOFT_JOB_STATE_SIGNALED, -+ MALI_SOFT_JOB_STATE_TIMED_OUT, -+} mali_soft_job_state; -+ -+#define MALI_SOFT_JOB_INVALID_ID ((u32) -1) ++_mali_osk_errcode_t mali_pm_initialize(void); + +/** -+ * Soft job struct. -+ * -+ * Soft job can be used to represent any kind of CPU work done in kernel-space. ++ * Terminate the Mali PM module + */ -+typedef struct mali_soft_job { -+ mali_soft_job_type type; /**< Soft job type. Must be one of MALI_SOFT_JOB_TYPE_*. */ -+ u64 user_job; /**< Identifier for soft job in user space. */ -+ _mali_osk_atomic_t refcount; /**< Soft jobs are reference counted to prevent premature deletion. */ -+ struct mali_timeline_tracker tracker; /**< Timeline tracker for soft job. */ -+ mali_bool activated; /**< MALI_TRUE if the job has been activated, MALI_FALSE if not. */ -+ _mali_osk_notification_t *activated_notification; /**< Pre-allocated notification object for ACTIVATED_NOTIFICATION. */ ++void mali_pm_terminate(void); + -+ /* Protected by soft job system lock. */ -+ u32 id; /**< Used by user-space to find corresponding soft job in kernel-space. */ -+ mali_soft_job_state state; /**< State of soft job, must be one of MALI_SOFT_JOB_STATE_*. */ -+ struct mali_soft_job_system *system; /**< The soft job system this job is in. */ -+ _mali_osk_list_t system_list; /**< List element used by soft job system. */ -+} mali_soft_job; ++void mali_pm_exec_lock(void); ++void mali_pm_exec_unlock(void); + -+/** -+ * Per-session soft job system. -+ * -+ * The soft job system is used to manage all soft jobs that belongs to a session. -+ */ -+typedef struct mali_soft_job_system { -+ struct mali_session_data *session; /**< The session this soft job system belongs to. */ -+ _MALI_OSK_LIST_HEAD(jobs_used); /**< List of all allocated soft jobs. */ + -+ _mali_osk_spinlock_irq_t *lock; /**< Lock used to protect soft job system and its soft jobs. */ -+ u32 lock_owner; /**< Contains tid of thread that locked the system or 0, if not locked. */ -+ u32 last_job_id; /**< Recored the last job id protected by lock. */ -+} mali_soft_job_system; ++struct mali_pm_domain *mali_pm_register_l2_cache(u32 domain_index, ++ struct mali_l2_cache_core *l2_cache); ++struct mali_pm_domain *mali_pm_register_group(u32 domain_index, ++ struct mali_group *group); + -+/** -+ * Create a soft job system. -+ * -+ * @param session The session this soft job system will belong to. -+ * @return The new soft job system, or NULL if unsuccessful. -+ */ -+struct mali_soft_job_system *mali_soft_job_system_create(struct mali_session_data *session); ++mali_bool mali_pm_get_domain_refs(struct mali_pm_domain **domains, ++ struct mali_group **groups, ++ u32 num_domains); ++mali_bool mali_pm_put_domain_refs(struct mali_pm_domain **domains, ++ u32 num_domains); + -+/** -+ * Destroy a soft job system. -+ * -+ * @note The soft job must not have any started or activated jobs. Call @ref -+ * mali_soft_job_system_abort first. -+ * -+ * @param system The soft job system we are destroying. -+ */ -+void mali_soft_job_system_destroy(struct mali_soft_job_system *system); ++void mali_pm_init_begin(void); ++void mali_pm_init_end(void); + -+/** -+ * Create a soft job. -+ * -+ * @param system Soft job system to create soft job from. -+ * @param type Type of the soft job. -+ * @param user_job Identifier for soft job in user space. -+ * @return New soft job if successful, NULL if not. -+ */ -+struct mali_soft_job *mali_soft_job_create(struct mali_soft_job_system *system, mali_soft_job_type type, u64 user_job); ++void mali_pm_update_sync(void); ++void mali_pm_update_async(void); + -+/** -+ * Destroy soft job. -+ * -+ * @param job Soft job to destroy. -+ */ -+void mali_soft_job_destroy(struct mali_soft_job *job); ++/* Callback functions for system power management */ ++void mali_pm_os_suspend(mali_bool os_suspend); ++void mali_pm_os_resume(void); + -+/** -+ * Start a soft job. -+ * -+ * The soft job will be added to the Timeline system which will then activate it after all -+ * dependencies have been resolved. -+ * -+ * Create soft jobs with @ref mali_soft_job_create before starting them. -+ * -+ * @param job Soft job to start. -+ * @param fence Fence representing dependencies for this soft job. -+ * @return Point on soft job timeline. -+ */ -+mali_timeline_point mali_soft_job_start(struct mali_soft_job *job, struct mali_timeline_fence *fence); ++mali_bool mali_pm_runtime_suspend(void); ++void mali_pm_runtime_resume(void); + -+/** -+ * Use by user-space to signal that a soft job has completed. -+ * -+ * @note Only valid for soft jobs with type MALI_SOFT_JOB_TYPE_USER_SIGNALED. -+ * -+ * @note The soft job must be in state MALI_SOFT_JOB_STATE_STARTED for the signal to be successful. -+ * -+ * @note If the soft job was signaled successfully, or it received a time out, the soft job will be -+ * destroyed after this call and should no longer be used. -+ * -+ * @note This function will block until the soft job has been activated. -+ * -+ * @param system The soft job system the job was started in. -+ * @param job_id ID of soft job we are signaling. -+ * -+ * @return _MALI_OSK_ERR_ITEM_NOT_FOUND if the soft job ID was invalid, _MALI_OSK_ERR_TIMEOUT if the -+ * soft job was timed out or _MALI_OSK_ERR_OK if we successfully signaled the soft job. -+ */ -+_mali_osk_errcode_t mali_soft_job_system_signal_job(struct mali_soft_job_system *system, u32 job_id); ++#if MALI_STATE_TRACKING ++u32 mali_pm_dump_state_domain(struct mali_pm_domain *domain, ++ char *buf, u32 size); ++#endif + -+/** -+ * Used by the Timeline system to activate a soft job. -+ * -+ * @param job The soft job that is being activated. -+ * @return A scheduling bitmask. -+ */ -+mali_scheduler_mask mali_soft_job_system_activate_job(struct mali_soft_job *job); ++void mali_pm_power_cost_setup(void); + -+/** -+ * Used by the Timeline system to timeout a soft job. -+ * -+ * A soft job is timed out if it completes or is signaled later than MALI_TIMELINE_TIMEOUT_HZ after -+ * activation. -+ * -+ * @param job The soft job that is being timed out. -+ * @return A scheduling bitmask. -+ */ -+mali_scheduler_mask mali_soft_job_system_timeout_job(struct mali_soft_job *job); ++void mali_pm_get_best_power_cost_mask(int num_requested, int *dst); + -+/** -+ * Used to cleanup activated soft jobs in the soft job system on session abort. -+ * -+ * @param system The soft job system that is being aborted. -+ */ -+void mali_soft_job_system_abort(struct mali_soft_job_system *system); ++#if defined(DEBUG) ++const char *mali_pm_mask_to_string(u32 mask); ++#endif + -+#endif /* __MALI_SOFT_JOB_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_spinlock_reentrant.c b/drivers/gpu/arm/mali400/mali/common/mali_spinlock_reentrant.c ++u32 mali_pm_get_current_mask(void); ++u32 mali_pm_get_wanted_mask(void); ++#endif /* __MALI_PM_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pm_domain.c b/drivers/gpu/arm/mali400/mali/common/mali_pm_domain.c new file mode 100644 -index 000000000..f829e99f0 +index 000000000..8290f7d88 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_spinlock_reentrant.c -@@ -0,0 +1,77 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_pm_domain.c +@@ -0,0 +1,209 @@ +/* -+ * Copyright (C) 2013, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2013-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -299935,157 +303760,213 @@ index 000000000..f829e99f0 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#include "mali_spinlock_reentrant.h" -+ -+#include "mali_osk.h" +#include "mali_kernel_common.h" ++#include "mali_osk.h" ++#include "mali_pm_domain.h" ++#include "mali_pmu.h" ++#include "mali_group.h" ++#include "mali_pm.h" + -+struct mali_spinlock_reentrant *mali_spinlock_reentrant_init(_mali_osk_lock_order_t lock_order) ++static struct mali_pm_domain *mali_pm_domains[MALI_MAX_NUMBER_OF_DOMAINS] = ++{ NULL, }; ++ ++void mali_pm_domain_initialize(void) +{ -+ struct mali_spinlock_reentrant *spinlock; ++ /* Domains will be initialized/created on demand */ ++} + -+ spinlock = _mali_osk_calloc(1, sizeof(struct mali_spinlock_reentrant)); -+ if (NULL == spinlock) { -+ return NULL; ++void mali_pm_domain_terminate(void) ++{ ++ int i; ++ ++ /* Delete all domains that has been created */ ++ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { ++ mali_pm_domain_delete(mali_pm_domains[i]); ++ mali_pm_domains[i] = NULL; + } ++} + -+ spinlock->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, lock_order); -+ if (NULL == spinlock->lock) { -+ mali_spinlock_reentrant_term(spinlock); -+ return NULL; ++struct mali_pm_domain *mali_pm_domain_create(u32 pmu_mask) ++{ ++ struct mali_pm_domain *domain = NULL; ++ u32 domain_id = 0; ++ ++ domain = mali_pm_domain_get_from_mask(pmu_mask); ++ if (NULL != domain) return domain; ++ ++ MALI_DEBUG_PRINT(2, ++ ("Mali PM domain: Creating Mali PM domain (mask=0x%08X)\n", ++ pmu_mask)); ++ ++ domain = (struct mali_pm_domain *)_mali_osk_malloc( ++ sizeof(struct mali_pm_domain)); ++ if (NULL != domain) { ++ domain->power_is_on = MALI_FALSE; ++ domain->pmu_mask = pmu_mask; ++ domain->use_count = 0; ++ _mali_osk_list_init(&domain->group_list); ++ _mali_osk_list_init(&domain->l2_cache_list); ++ ++ domain_id = _mali_osk_fls(pmu_mask) - 1; ++ /* Verify the domain_id */ ++ MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > domain_id); ++ /* Verify that pmu_mask only one bit is set */ ++ MALI_DEBUG_ASSERT((1 << domain_id) == pmu_mask); ++ mali_pm_domains[domain_id] = domain; ++ ++ return domain; ++ } else { ++ MALI_DEBUG_PRINT_ERROR(("Unable to create PM domain\n")); + } + -+ return spinlock; ++ return NULL; +} + -+void mali_spinlock_reentrant_term(struct mali_spinlock_reentrant *spinlock) ++void mali_pm_domain_delete(struct mali_pm_domain *domain) +{ -+ MALI_DEBUG_ASSERT_POINTER(spinlock); -+ MALI_DEBUG_ASSERT(0 == spinlock->counter && 0 == spinlock->owner); -+ -+ if (NULL != spinlock->lock) { -+ _mali_osk_spinlock_irq_term(spinlock->lock); ++ if (NULL == domain) { ++ return; + } + -+ _mali_osk_free(spinlock); ++ _mali_osk_list_delinit(&domain->group_list); ++ _mali_osk_list_delinit(&domain->l2_cache_list); ++ ++ _mali_osk_free(domain); +} + -+void mali_spinlock_reentrant_wait(struct mali_spinlock_reentrant *spinlock, u32 tid) ++void mali_pm_domain_add_group(struct mali_pm_domain *domain, ++ struct mali_group *group) +{ -+ MALI_DEBUG_ASSERT_POINTER(spinlock); -+ MALI_DEBUG_ASSERT_POINTER(spinlock->lock); -+ MALI_DEBUG_ASSERT(0 != tid); ++ MALI_DEBUG_ASSERT_POINTER(domain); ++ MALI_DEBUG_ASSERT_POINTER(group); + -+ MALI_DEBUG_PRINT(5, ("%s ^\n", __FUNCTION__)); ++ /* ++ * Use addtail because virtual group is created last and it needs ++ * to be at the end of the list (in order to be activated after ++ * all children. ++ */ ++ _mali_osk_list_addtail(&group->pm_domain_list, &domain->group_list); ++} + -+ if (tid != spinlock->owner) { -+ _mali_osk_spinlock_irq_lock(spinlock->lock); -+ MALI_DEBUG_ASSERT(0 == spinlock->owner && 0 == spinlock->counter); -+ spinlock->owner = tid; ++void mali_pm_domain_add_l2_cache(struct mali_pm_domain *domain, ++ struct mali_l2_cache_core *l2_cache) ++{ ++ MALI_DEBUG_ASSERT_POINTER(domain); ++ MALI_DEBUG_ASSERT_POINTER(l2_cache); ++ _mali_osk_list_add(&l2_cache->pm_domain_list, &domain->l2_cache_list); ++} ++ ++struct mali_pm_domain *mali_pm_domain_get_from_mask(u32 mask) ++{ ++ u32 id = 0; ++ ++ if (0 == mask) { ++ return NULL; + } + -+ MALI_DEBUG_PRINT(5, ("%s v\n", __FUNCTION__)); ++ id = _mali_osk_fls(mask) - 1; + -+ ++spinlock->counter; ++ MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id); ++ /* Verify that pmu_mask only one bit is set */ ++ MALI_DEBUG_ASSERT((1 << id) == mask); ++ ++ return mali_pm_domains[id]; +} + -+void mali_spinlock_reentrant_signal(struct mali_spinlock_reentrant *spinlock, u32 tid) ++struct mali_pm_domain *mali_pm_domain_get_from_index(u32 id) +{ -+ MALI_DEBUG_ASSERT_POINTER(spinlock); -+ MALI_DEBUG_ASSERT_POINTER(spinlock->lock); -+ MALI_DEBUG_ASSERT(0 != tid && tid == spinlock->owner); ++ MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id); + -+ --spinlock->counter; -+ if (0 == spinlock->counter) { -+ spinlock->owner = 0; -+ MALI_DEBUG_PRINT(5, ("%s release last\n", __FUNCTION__)); -+ _mali_osk_spinlock_irq_unlock(spinlock->lock); -+ } ++ return mali_pm_domains[id]; +} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_spinlock_reentrant.h b/drivers/gpu/arm/mali400/mali/common/mali_spinlock_reentrant.h -new file mode 100644 -index 000000000..4d788ec1b ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_spinlock_reentrant.h -@@ -0,0 +1,70 @@ -+/* -+ * Copyright (C) 2013, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ + -+#ifndef __MALI_SPINLOCK_REENTRANT_H__ -+#define __MALI_SPINLOCK_REENTRANT_H__ ++u32 mali_pm_domain_ref_get(struct mali_pm_domain *domain) ++{ ++ MALI_DEBUG_ASSERT_POINTER(domain); + -+#include "mali_osk.h" -+#include "mali_kernel_common.h" ++ if (0 == domain->use_count) { ++ _mali_osk_pm_dev_ref_get_async(); ++ } + -+/** -+ * Reentrant spinlock. -+ */ -+struct mali_spinlock_reentrant { -+ _mali_osk_spinlock_irq_t *lock; -+ u32 owner; -+ u32 counter; -+}; ++ ++domain->use_count; ++ MALI_DEBUG_PRINT(4, ("PM domain %p: ref_get, use_count => %u\n", domain, domain->use_count)); + -+/** -+ * Create a new reentrant spinlock. -+ * -+ * @param lock_order Lock order. -+ * @return New reentrant spinlock. -+ */ -+struct mali_spinlock_reentrant *mali_spinlock_reentrant_init(_mali_osk_lock_order_t lock_order); ++ /* Return our mask so caller can check this against wanted mask */ ++ return domain->pmu_mask; ++} + -+/** -+ * Terminate reentrant spinlock and free any associated resources. -+ * -+ * @param spinlock Reentrant spinlock to terminate. -+ */ -+void mali_spinlock_reentrant_term(struct mali_spinlock_reentrant *spinlock); ++u32 mali_pm_domain_ref_put(struct mali_pm_domain *domain) ++{ ++ MALI_DEBUG_ASSERT_POINTER(domain); + -+/** -+ * Wait for reentrant spinlock to be signaled. -+ * -+ * @param spinlock Reentrant spinlock. -+ * @param tid Thread ID. -+ */ -+void mali_spinlock_reentrant_wait(struct mali_spinlock_reentrant *spinlock, u32 tid); ++ --domain->use_count; ++ MALI_DEBUG_PRINT(4, ("PM domain %p: ref_put, use_count => %u\n", domain, domain->use_count)); + -+/** -+ * Signal reentrant spinlock. -+ * -+ * @param spinlock Reentrant spinlock. -+ * @param tid Thread ID. -+ */ -+void mali_spinlock_reentrant_signal(struct mali_spinlock_reentrant *spinlock, u32 tid); ++ if (0 == domain->use_count) { ++ _mali_osk_pm_dev_ref_put(); ++ } + -+/** -+ * Check if thread is holding reentrant spinlock. -+ * -+ * @param spinlock Reentrant spinlock. -+ * @param tid Thread ID. -+ * @return MALI_TRUE if thread is holding spinlock, MALI_FALSE if not. -+ */ -+MALI_STATIC_INLINE mali_bool mali_spinlock_reentrant_is_held(struct mali_spinlock_reentrant *spinlock, u32 tid) ++ /* ++ * Return the PMU mask which now could be be powered down ++ * (the bit for this domain). ++ * This is the responsibility of the caller (mali_pm) ++ */ ++ return (0 == domain->use_count ? domain->pmu_mask : 0); ++} ++ ++#if MALI_STATE_TRACKING ++u32 mali_pm_domain_get_id(struct mali_pm_domain *domain) +{ -+ MALI_DEBUG_ASSERT_POINTER(spinlock->lock); -+ return (tid == spinlock->owner && 0 < spinlock->counter); ++ u32 id = 0; ++ ++ MALI_DEBUG_ASSERT_POINTER(domain); ++ MALI_DEBUG_ASSERT(0 != domain->pmu_mask); ++ ++ id = _mali_osk_fls(domain->pmu_mask) - 1; ++ ++ MALI_DEBUG_ASSERT(MALI_MAX_NUMBER_OF_DOMAINS > id); ++ /* Verify that pmu_mask only one bit is set */ ++ MALI_DEBUG_ASSERT((1 << id) == domain->pmu_mask); ++ /* Verify that we have stored the domain at right id/index */ ++ MALI_DEBUG_ASSERT(domain == mali_pm_domains[id]); ++ ++ return id; +} ++#endif + -+#endif /* __MALI_SPINLOCK_REENTRANT_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_timeline.c b/drivers/gpu/arm/mali400/mali/common/mali_timeline.c ++#if defined(DEBUG) ++mali_bool mali_pm_domain_all_unused(void) ++{ ++ int i; ++ ++ for (i = 0; i < MALI_MAX_NUMBER_OF_DOMAINS; i++) { ++ if (NULL == mali_pm_domains[i]) { ++ /* Nothing to check */ ++ continue; ++ } ++ ++ if (MALI_TRUE == mali_pm_domains[i]->power_is_on) { ++ /* Not ready for suspend! */ ++ return MALI_FALSE; ++ } ++ ++ if (0 != mali_pm_domains[i]->use_count) { ++ /* Not ready for suspend! */ ++ return MALI_FALSE; ++ } ++ } ++ ++ return MALI_TRUE; ++} ++#endif +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pm_domain.h b/drivers/gpu/arm/mali400/mali/common/mali_pm_domain.h new file mode 100644 -index 000000000..ffffee930 +index 000000000..5776abe39 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_timeline.c -@@ -0,0 +1,1964 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_pm_domain.h +@@ -0,0 +1,104 @@ +/* -+ * Copyright (C) 2013-2018 ARM Limited. All rights reserved. ++ * Copyright (C) 2013-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -300093,1969 +303974,1829 @@ index 000000000..ffffee930 + * A copy of the licence is included with the program, and can also be obtained from Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ -+#include -+#include "mali_timeline.h" -+#include "mali_kernel_common.h" -+#include "mali_scheduler.h" -+#include "mali_soft_job.h" -+#include "mali_timeline_fence_wait.h" -+#include "mali_timeline_sync_fence.h" -+#include "mali_executor.h" -+#include "mali_pp_job.h" + -+#define MALI_TIMELINE_SYSTEM_LOCKED(system) (mali_spinlock_reentrant_is_held((system)->spinlock, _mali_osk_get_tid())) ++#ifndef __MALI_PM_DOMAIN_H__ ++#define __MALI_PM_DOMAIN_H__ + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+_mali_osk_wq_work_t *sync_fence_callback_work_t = NULL; -+_mali_osk_spinlock_irq_t *sync_fence_callback_list_lock = NULL; -+static _MALI_OSK_LIST_HEAD_STATIC_INIT(sync_fence_callback_queue); -+#endif ++#include "mali_kernel_common.h" ++#include "mali_osk.h" + -+/* -+ * Following three elements are used to record how many -+ * gp, physical pp or virtual pp jobs are delayed in the whole -+ * timeline system, we can use these three value to decide -+ * if need to deactivate idle group. -+ */ -+_mali_osk_atomic_t gp_tracker_count; -+_mali_osk_atomic_t phy_pp_tracker_count; -+_mali_osk_atomic_t virt_pp_tracker_count; ++#include "mali_l2_cache.h" ++#include "mali_group.h" ++#include "mali_pmu.h" + -+static mali_scheduler_mask mali_timeline_system_release_waiter(struct mali_timeline_system *system, -+ struct mali_timeline_waiter *waiter); ++/* Instances are protected by PM state lock */ ++struct mali_pm_domain { ++ mali_bool power_is_on; ++ s32 use_count; ++ u32 pmu_mask; + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+#include -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) -+#include -+#include -+#include ++ /* Zero or more groups can belong to this domain */ ++ _mali_osk_list_t group_list; + -+struct mali_deferred_fence_put_entry { -+ struct hlist_node list; -+ struct sync_fence *fence; ++ /* Zero or more L2 caches can belong to this domain */ ++ _mali_osk_list_t l2_cache_list; +}; + -+static HLIST_HEAD(mali_timeline_sync_fence_to_free_list); -+static DEFINE_SPINLOCK(mali_timeline_sync_fence_to_free_lock); -+ -+static void put_sync_fences(struct work_struct *ignore) -+{ -+ struct hlist_head list; -+ struct hlist_node *tmp, *pos; -+ unsigned long flags; -+ struct mali_deferred_fence_put_entry *o; -+ -+ spin_lock_irqsave(&mali_timeline_sync_fence_to_free_lock, flags); -+ hlist_move_list(&mali_timeline_sync_fence_to_free_list, &list); -+ spin_unlock_irqrestore(&mali_timeline_sync_fence_to_free_lock, flags); -+ -+ hlist_for_each_entry_safe(o, pos, tmp, &list, list) { -+ sync_fence_put(o->fence); -+ kfree(o); -+ } -+} + -+static DECLARE_DELAYED_WORK(delayed_sync_fence_put, put_sync_fences); -+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) */ ++void mali_pm_domain_initialize(void); ++void mali_pm_domain_terminate(void); + -+/* Callback that is called when a sync fence a tracker is waiting on is signaled. */ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+static void mali_timeline_sync_fence_callback(struct sync_fence *sync_fence, struct sync_fence_waiter *sync_fence_waiter) -+#else -+static void mali_timeline_sync_fence_callback(struct mali_internal_sync_fence *sync_fence, struct mali_internal_sync_fence_waiter *sync_fence_waiter) -+#endif -+{ -+ struct mali_timeline_tracker *tracker; ++struct mali_pm_domain *mali_pm_domain_create(u32 pmu_mask); ++void mali_pm_domain_delete(struct mali_pm_domain *domain); + -+ MALI_IGNORE(sync_fence); -+ MALI_DEBUG_ASSERT_POINTER(sync_fence_waiter); ++void mali_pm_domain_add_l2_cache( ++ struct mali_pm_domain *domain, ++ struct mali_l2_cache_core *l2_cache); ++void mali_pm_domain_add_group(struct mali_pm_domain *domain, ++ struct mali_group *group); + -+ tracker = _MALI_OSK_CONTAINER_OF(sync_fence_waiter, struct mali_timeline_tracker, sync_fence_waiter); -+ MALI_DEBUG_ASSERT_POINTER(tracker); ++struct mali_pm_domain *mali_pm_domain_get_from_mask(u32 mask); ++struct mali_pm_domain *mali_pm_domain_get_from_index(u32 id); + -+ _mali_osk_spinlock_irq_lock(sync_fence_callback_list_lock); -+ _mali_osk_list_addtail(&tracker->sync_fence_signal_list, &sync_fence_callback_queue); -+ _mali_osk_spinlock_irq_unlock(sync_fence_callback_list_lock); ++/* Ref counting */ ++u32 mali_pm_domain_ref_get(struct mali_pm_domain *domain); ++u32 mali_pm_domain_ref_put(struct mali_pm_domain *domain); + -+ _mali_osk_wq_schedule_work(sync_fence_callback_work_t); ++MALI_STATIC_INLINE _mali_osk_list_t *mali_pm_domain_get_group_list( ++ struct mali_pm_domain *domain) ++{ ++ MALI_DEBUG_ASSERT_POINTER(domain); ++ return &domain->group_list; +} -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ + -+static mali_scheduler_mask mali_timeline_tracker_time_out(struct mali_timeline_tracker *tracker) ++MALI_STATIC_INLINE _mali_osk_list_t *mali_pm_domain_get_l2_cache_list( ++ struct mali_pm_domain *domain) +{ -+ MALI_DEBUG_ASSERT_POINTER(tracker); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_SOFT == tracker->type); -+ -+ return mali_soft_job_system_timeout_job((struct mali_soft_job *) tracker->job); ++ MALI_DEBUG_ASSERT_POINTER(domain); ++ return &domain->l2_cache_list; +} + -+static void mali_timeline_timer_callback(void *data) ++MALI_STATIC_INLINE mali_bool mali_pm_domain_power_is_on( ++ struct mali_pm_domain *domain) +{ -+ struct mali_timeline_system *system; -+ struct mali_timeline_tracker *tracker; -+ struct mali_timeline *timeline; -+ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; -+ u32 tid = _mali_osk_get_tid(); ++ MALI_DEBUG_ASSERT_POINTER(domain); ++ return domain->power_is_on; ++} + -+ timeline = (struct mali_timeline *) data; -+ MALI_DEBUG_ASSERT_POINTER(timeline); -+ -+ system = timeline->system; -+ MALI_DEBUG_ASSERT_POINTER(system); -+ -+ mali_spinlock_reentrant_wait(system->spinlock, tid); ++MALI_STATIC_INLINE void mali_pm_domain_set_power_on( ++ struct mali_pm_domain *domain, ++ mali_bool power_is_on) ++{ ++ MALI_DEBUG_ASSERT_POINTER(domain); ++ domain->power_is_on = power_is_on; ++} + -+ if (!system->timer_enabled) { -+ mali_spinlock_reentrant_signal(system->spinlock, tid); -+ return; -+ } ++MALI_STATIC_INLINE u32 mali_pm_domain_get_use_count( ++ struct mali_pm_domain *domain) ++{ ++ MALI_DEBUG_ASSERT_POINTER(domain); ++ return domain->use_count; ++} + -+ tracker = timeline->tracker_tail; -+ timeline->timer_active = MALI_FALSE; ++#if MALI_STATE_TRACKING ++u32 mali_pm_domain_get_id(struct mali_pm_domain *domain); + -+ if (NULL != tracker && MALI_TRUE == tracker->timer_active) { -+ /* This is likely the delayed work that has been schedule out before cancelled. */ -+ if (MALI_TIMELINE_TIMEOUT_HZ > (_mali_osk_time_tickcount() - tracker->os_tick_activate)) { -+ mali_spinlock_reentrant_signal(system->spinlock, tid); -+ return; -+ } ++MALI_STATIC_INLINE u32 mali_pm_domain_get_mask(struct mali_pm_domain *domain) ++{ ++ MALI_DEBUG_ASSERT_POINTER(domain); ++ return domain->pmu_mask; ++} ++#endif + -+ schedule_mask = mali_timeline_tracker_time_out(tracker); -+ tracker->timer_active = MALI_FALSE; -+ } else { -+ MALI_PRINT_ERROR(("Mali Timeline: Soft job timer callback without a waiting tracker.\n")); -+ } ++#if defined(DEBUG) ++mali_bool mali_pm_domain_all_unused(void); ++#endif + -+ mali_spinlock_reentrant_signal(system->spinlock, tid); ++#endif /* __MALI_PM_DOMAIN_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pm_metrics.c b/drivers/gpu/arm/mali400/mali/common/mali_pm_metrics.c +new file mode 100644 +index 000000000..cf7482323 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_pm_metrics.c +@@ -0,0 +1,255 @@ ++/* ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ ++#include "mali_pm_metrics.h" ++#include "mali_osk_locks.h" ++#include "mali_osk_mali.h" ++#include + -+ mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE); -+} ++#define MALI_PM_TIME_SHIFT 0 ++#define MALI_UTILIZATION_MAX_PERIOD 80000000/* ns = 100ms */ + -+void mali_timeline_system_stop_timer(struct mali_timeline_system *system) ++_mali_osk_errcode_t mali_pm_metrics_init(struct mali_device *mdev) +{ -+ u32 i; -+ u32 tid = _mali_osk_get_tid(); ++ int i = 0; + -+ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT(mdev != NULL); + -+ mali_spinlock_reentrant_wait(system->spinlock, tid); -+ system->timer_enabled = MALI_FALSE; -+ mali_spinlock_reentrant_signal(system->spinlock, tid); ++ mdev->mali_metrics.time_period_start = ktime_get(); ++ mdev->mali_metrics.time_period_start_gp = mdev->mali_metrics.time_period_start; ++ mdev->mali_metrics.time_period_start_pp = mdev->mali_metrics.time_period_start; + -+ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { -+ struct mali_timeline *timeline = system->timelines[i]; ++ mdev->mali_metrics.time_busy = 0; ++ mdev->mali_metrics.time_idle = 0; ++ mdev->mali_metrics.prev_busy = 0; ++ mdev->mali_metrics.prev_idle = 0; ++ mdev->mali_metrics.num_running_gp_cores = 0; ++ mdev->mali_metrics.num_running_pp_cores = 0; ++ mdev->mali_metrics.time_busy_gp = 0; ++ mdev->mali_metrics.time_idle_gp = 0; + -+ MALI_DEBUG_ASSERT_POINTER(timeline); ++ for (i = 0; i < MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; i++) { ++ mdev->mali_metrics.time_busy_pp[i] = 0; ++ mdev->mali_metrics.time_idle_pp[i] = 0; ++ } ++ mdev->mali_metrics.gpu_active = MALI_FALSE; + -+ if (NULL != timeline->delayed_work) { -+ _mali_osk_wq_delayed_cancel_work_sync(timeline->delayed_work); -+ timeline->timer_active = MALI_FALSE; -+ } ++ mdev->mali_metrics.lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_UNORDERED, _MALI_OSK_LOCK_ORDER_FIRST); ++ if (NULL == mdev->mali_metrics.lock) { ++ return _MALI_OSK_ERR_NOMEM; + } ++ ++ return _MALI_OSK_ERR_OK; +} + -+static void mali_timeline_destroy(struct mali_timeline *timeline) ++void mali_pm_metrics_term(struct mali_device *mdev) +{ -+ MALI_DEBUG_ASSERT_POINTER(timeline); -+ if (NULL != timeline) { -+ /* Assert that the timeline object has been properly cleaned up before destroying it. */ -+ MALI_DEBUG_ASSERT(timeline->point_oldest == timeline->point_next); -+ MALI_DEBUG_ASSERT(NULL == timeline->tracker_head); -+ MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail); -+ MALI_DEBUG_ASSERT(NULL == timeline->waiter_head); -+ MALI_DEBUG_ASSERT(NULL == timeline->waiter_tail); -+ MALI_DEBUG_ASSERT(NULL != timeline->system); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_MAX > timeline->id); -+ -+ if (NULL != timeline->delayed_work) { -+ _mali_osk_wq_delayed_cancel_work_sync(timeline->delayed_work); -+ _mali_osk_wq_delayed_delete_work_nonflush(timeline->delayed_work); -+ } -+ -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ if (NULL != timeline->sync_tl) { -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ sync_timeline_destroy(timeline->sync_tl); -+#else -+ mali_internal_sync_timeline_destroy(timeline->sync_tl); -+#endif -+ } -+#else -+ _mali_osk_free(timeline); -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ -+ } ++ _mali_osk_spinlock_irq_term(mdev->mali_metrics.lock); +} + -+static struct mali_timeline *mali_timeline_create(struct mali_timeline_system *system, enum mali_timeline_id id) ++/*caller needs to hold mdev->mali_metrics.lock before calling this function*/ ++void mali_pm_record_job_status(struct mali_device *mdev) +{ -+ struct mali_timeline *timeline; -+ -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT(id < MALI_TIMELINE_MAX); -+ -+ timeline = (struct mali_timeline *) _mali_osk_calloc(1, sizeof(struct mali_timeline)); -+ if (NULL == timeline) { -+ return NULL; -+ } -+ -+ /* Initially the timeline is empty. */ -+#if defined(MALI_TIMELINE_DEBUG_START_POINT) -+ /* Start the timeline a bit before wrapping when debugging. */ -+ timeline->point_next = UINT_MAX - MALI_TIMELINE_MAX_POINT_SPAN - 128; -+#else -+ timeline->point_next = 1; -+#endif -+ timeline->point_oldest = timeline->point_next; ++ ktime_t now; ++ ktime_t diff; ++ u64 ns_time; + -+ /* The tracker and waiter lists will initially be empty. */ ++ MALI_DEBUG_ASSERT(mdev != NULL); + -+ timeline->system = system; -+ timeline->id = id; ++ now = ktime_get(); ++ diff = ktime_sub(now, mdev->mali_metrics.time_period_start); + -+ timeline->delayed_work = _mali_osk_wq_delayed_create_work(mali_timeline_timer_callback, timeline); -+ if (NULL == timeline->delayed_work) { -+ mali_timeline_destroy(timeline); -+ return NULL; -+ } ++ ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); ++ mdev->mali_metrics.time_busy += ns_time; ++ mdev->mali_metrics.time_period_start = now; ++} + -+ timeline->timer_active = MALI_FALSE; ++void mali_pm_record_gpu_idle(mali_bool is_gp) ++{ ++ ktime_t now; ++ ktime_t diff; ++ u64 ns_time; ++ struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev); + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ { -+ char timeline_name[32]; ++ MALI_DEBUG_ASSERT(mdev != NULL); + -+ switch (id) { -+ case MALI_TIMELINE_GP: -+ _mali_osk_snprintf(timeline_name, 32, "mali-%u-gp", _mali_osk_get_pid()); -+ break; -+ case MALI_TIMELINE_PP: -+ _mali_osk_snprintf(timeline_name, 32, "mali-%u-pp", _mali_osk_get_pid()); -+ break; -+ case MALI_TIMELINE_SOFT: -+ _mali_osk_snprintf(timeline_name, 32, "mali-%u-soft", _mali_osk_get_pid()); -+ break; -+ default: -+ MALI_PRINT_ERROR(("Mali Timeline: Invalid timeline id %d\n", id)); -+ mali_timeline_destroy(timeline); -+ return NULL; -+ } ++ _mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock); ++ now = ktime_get(); + -+ timeline->destroyed = MALI_FALSE; ++ if (MALI_TRUE == is_gp) { ++ --mdev->mali_metrics.num_running_gp_cores; ++ if (0 == mdev->mali_metrics.num_running_gp_cores) { ++ diff = ktime_sub(now, mdev->mali_metrics.time_period_start_gp); ++ ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); ++ mdev->mali_metrics.time_busy_gp += ns_time; ++ mdev->mali_metrics.time_period_start_gp = now; + -+ timeline->sync_tl = mali_sync_timeline_create(timeline, timeline_name); -+ if (NULL == timeline->sync_tl) { -+ mali_timeline_destroy(timeline); -+ return NULL; ++ if (0 == mdev->mali_metrics.num_running_pp_cores) { ++ MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE); ++ diff = ktime_sub(now, mdev->mali_metrics.time_period_start); ++ ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); ++ mdev->mali_metrics.time_busy += ns_time; ++ mdev->mali_metrics.time_period_start = now; ++ mdev->mali_metrics.gpu_active = MALI_FALSE; ++ } + } ++ } else { ++ --mdev->mali_metrics.num_running_pp_cores; ++ if (0 == mdev->mali_metrics.num_running_pp_cores) { ++ diff = ktime_sub(now, mdev->mali_metrics.time_period_start_pp); ++ ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); ++ mdev->mali_metrics.time_busy_pp[0] += ns_time; ++ mdev->mali_metrics.time_period_start_pp = now; + -+ timeline->spinlock = mali_spinlock_reentrant_init(_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM); -+ if (NULL == timeline->spinlock) { -+ mali_timeline_destroy(timeline); -+ return NULL; ++ if (0 == mdev->mali_metrics.num_running_gp_cores) { ++ MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE); ++ diff = ktime_sub(now, mdev->mali_metrics.time_period_start); ++ ns_time = (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); ++ mdev->mali_metrics.time_busy += ns_time; ++ mdev->mali_metrics.time_period_start = now; ++ mdev->mali_metrics.gpu_active = MALI_FALSE; ++ } + } + } -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ + -+ return timeline; ++ _mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock); +} + -+static void mali_timeline_insert_tracker(struct mali_timeline *timeline, struct mali_timeline_tracker *tracker) ++void mali_pm_record_gpu_active(mali_bool is_gp) +{ -+ MALI_DEBUG_ASSERT_POINTER(timeline); -+ MALI_DEBUG_ASSERT_POINTER(tracker); -+ -+ if (mali_timeline_is_full(timeline)) { -+ /* Don't add tracker if timeline is full. */ -+ tracker->point = MALI_TIMELINE_NO_POINT; -+ return; -+ } -+ -+ tracker->timeline = timeline; -+ tracker->point = timeline->point_next; ++ ktime_t now; ++ ktime_t diff; ++ struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev); + -+ /* Find next available point. */ -+ timeline->point_next++; -+ if (MALI_TIMELINE_NO_POINT == timeline->point_next) { -+ timeline->point_next++; -+ } ++ MALI_DEBUG_ASSERT(mdev != NULL); + -+ MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline)); ++ _mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock); ++ now = ktime_get(); + -+ if (MALI_TIMELINE_TRACKER_GP == tracker->type) { -+ _mali_osk_atomic_inc(&gp_tracker_count); -+ } else if (MALI_TIMELINE_TRACKER_PP == tracker->type) { -+ if (mali_pp_job_is_virtual((struct mali_pp_job *)tracker->job)) { -+ _mali_osk_atomic_inc(&virt_pp_tracker_count); ++ if (MALI_TRUE == is_gp) { ++ mdev->mali_metrics.num_running_gp_cores++; ++ if (1 == mdev->mali_metrics.num_running_gp_cores) { ++ diff = ktime_sub(now, mdev->mali_metrics.time_period_start_gp); ++ mdev->mali_metrics.time_idle_gp += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); ++ mdev->mali_metrics.time_period_start_gp = now; ++ if (0 == mdev->mali_metrics.num_running_pp_cores) { ++ MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_FALSE); ++ diff = ktime_sub(now, mdev->mali_metrics.time_period_start); ++ mdev->mali_metrics.time_idle += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); ++ mdev->mali_metrics.time_period_start = now; ++ mdev->mali_metrics.gpu_active = MALI_TRUE; ++ } + } else { -+ _mali_osk_atomic_inc(&phy_pp_tracker_count); ++ MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE); ++ } ++ } else { ++ mdev->mali_metrics.num_running_pp_cores++; ++ if (1 == mdev->mali_metrics.num_running_pp_cores) { ++ diff = ktime_sub(now, mdev->mali_metrics.time_period_start_pp); ++ mdev->mali_metrics.time_idle_pp[0] += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); ++ mdev->mali_metrics.time_period_start_pp = now; ++ if (0 == mdev->mali_metrics.num_running_gp_cores) { ++ MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_FALSE); ++ diff = ktime_sub(now, mdev->mali_metrics.time_period_start); ++ mdev->mali_metrics.time_idle += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); ++ mdev->mali_metrics.time_period_start = now; ++ mdev->mali_metrics.gpu_active = MALI_TRUE; ++ } ++ } else { ++ MALI_DEBUG_ASSERT(mdev->mali_metrics.gpu_active == MALI_TRUE); + } + } + -+ /* Add tracker as new head on timeline's tracker list. */ -+ if (NULL == timeline->tracker_head) { -+ /* Tracker list is empty. */ -+ MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail); ++ _mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock); ++} + -+ timeline->tracker_tail = tracker; + -+ MALI_DEBUG_ASSERT(NULL == tracker->timeline_next); -+ MALI_DEBUG_ASSERT(NULL == tracker->timeline_prev); -+ } else { -+ MALI_DEBUG_ASSERT(NULL == timeline->tracker_head->timeline_next); ++/*caller needs to hold mdev->mali_metrics.lock before calling this function*/ ++static void mali_pm_get_dvfs_utilisation_calc(struct mali_device *mdev, ktime_t now) ++{ ++ ktime_t diff; + -+ tracker->timeline_prev = timeline->tracker_head; -+ timeline->tracker_head->timeline_next = tracker; ++ MALI_DEBUG_ASSERT(mdev != NULL); + -+ MALI_DEBUG_ASSERT(NULL == tracker->timeline_next); -+ } -+ timeline->tracker_head = tracker; ++ diff = ktime_sub(now, mdev->mali_metrics.time_period_start); + -+ MALI_DEBUG_ASSERT(NULL == timeline->tracker_head->timeline_next); -+ MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail->timeline_prev); ++ if (mdev->mali_metrics.gpu_active) { ++ mdev->mali_metrics.time_busy += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); ++ } else { ++ mdev->mali_metrics.time_idle += (u64)(ktime_to_ns(diff) >> MALI_PM_TIME_SHIFT); ++ } +} + -+/* Inserting the waiter object into the given timeline */ -+static void mali_timeline_insert_waiter(struct mali_timeline *timeline, struct mali_timeline_waiter *waiter_new) ++/* Caller needs to hold mdev->mali_metrics.lock before calling this function. */ ++static void mali_pm_reset_dvfs_utilisation_unlocked(struct mali_device *mdev, ktime_t now) +{ -+ struct mali_timeline_waiter *waiter_prev; -+ struct mali_timeline_waiter *waiter_next; -+ -+ /* Waiter time must be between timeline head and tail, and there must -+ * be less than MALI_TIMELINE_MAX_POINT_SPAN elements between */ -+ MALI_DEBUG_ASSERT((waiter_new->point - timeline->point_oldest) < MALI_TIMELINE_MAX_POINT_SPAN); -+ MALI_DEBUG_ASSERT((-waiter_new->point + timeline->point_next) < MALI_TIMELINE_MAX_POINT_SPAN); -+ -+ /* Finding out where to put this waiter, in the linked waiter list of the given timeline **/ -+ waiter_prev = timeline->waiter_head; /* Insert new after waiter_prev */ -+ waiter_next = NULL; /* Insert new before waiter_next */ ++ /* Store previous value */ ++ mdev->mali_metrics.prev_idle = mdev->mali_metrics.time_idle; ++ mdev->mali_metrics.prev_busy = mdev->mali_metrics.time_busy; + -+ /* Iterating backwards from head (newest) to tail (oldest) until we -+ * find the correct spot to insert the new waiter */ -+ while (waiter_prev && mali_timeline_point_after(waiter_prev->point, waiter_new->point)) { -+ waiter_next = waiter_prev; -+ waiter_prev = waiter_prev->timeline_prev; -+ } ++ /* Reset current values */ ++ mdev->mali_metrics.time_period_start = now; ++ mdev->mali_metrics.time_period_start_gp = now; ++ mdev->mali_metrics.time_period_start_pp = now; ++ mdev->mali_metrics.time_idle = 0; ++ mdev->mali_metrics.time_busy = 0; + -+ if (NULL == waiter_prev && NULL == waiter_next) { -+ /* list is empty */ -+ timeline->waiter_head = waiter_new; -+ timeline->waiter_tail = waiter_new; -+ } else if (NULL == waiter_next) { -+ /* insert at head */ -+ waiter_new->timeline_prev = timeline->waiter_head; -+ timeline->waiter_head->timeline_next = waiter_new; -+ timeline->waiter_head = waiter_new; -+ } else if (NULL == waiter_prev) { -+ /* insert at tail */ -+ waiter_new->timeline_next = timeline->waiter_tail; -+ timeline->waiter_tail->timeline_prev = waiter_new; -+ timeline->waiter_tail = waiter_new; -+ } else { -+ /* insert between */ -+ waiter_new->timeline_next = waiter_next; -+ waiter_new->timeline_prev = waiter_prev; -+ waiter_next->timeline_prev = waiter_new; -+ waiter_prev->timeline_next = waiter_new; -+ } ++ mdev->mali_metrics.time_busy_gp = 0; ++ mdev->mali_metrics.time_idle_gp = 0; ++ mdev->mali_metrics.time_busy_pp[0] = 0; ++ mdev->mali_metrics.time_idle_pp[0] = 0; +} + -+static void mali_timeline_update_delayed_work(struct mali_timeline *timeline) ++void mali_pm_reset_dvfs_utilisation(struct mali_device *mdev) +{ -+ struct mali_timeline_system *system; -+ struct mali_timeline_tracker *oldest_tracker; ++ _mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock); ++ mali_pm_reset_dvfs_utilisation_unlocked(mdev, ktime_get()); ++ _mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock); ++} + -+ MALI_DEBUG_ASSERT_POINTER(timeline); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_SOFT == timeline->id); ++void mali_pm_get_dvfs_utilisation(struct mali_device *mdev, ++ unsigned long *total_out, unsigned long *busy_out) ++{ ++ ktime_t now = ktime_get(); ++ u64 busy = 0; ++ u64 total = 0; + -+ system = timeline->system; -+ MALI_DEBUG_ASSERT_POINTER(system); ++ _mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock); + -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); ++ mali_pm_get_dvfs_utilisation_calc(mdev, now); + -+ /* Timer is disabled, early out. */ -+ if (!system->timer_enabled) return; ++ busy = mdev->mali_metrics.time_busy; ++ total = busy + mdev->mali_metrics.time_idle; + -+ oldest_tracker = timeline->tracker_tail; -+ if (NULL != oldest_tracker && 0 == oldest_tracker->trigger_ref_count) { -+ if (MALI_FALSE == oldest_tracker->timer_active) { -+ if (MALI_TRUE == timeline->timer_active) { -+ _mali_osk_wq_delayed_cancel_work_async(timeline->delayed_work); -+ } -+ _mali_osk_wq_delayed_schedule_work(timeline->delayed_work, MALI_TIMELINE_TIMEOUT_HZ); -+ oldest_tracker->timer_active = MALI_TRUE; -+ timeline->timer_active = MALI_TRUE; -+ } -+ } else if (MALI_TRUE == timeline->timer_active) { -+ _mali_osk_wq_delayed_cancel_work_async(timeline->delayed_work); -+ timeline->timer_active = MALI_FALSE; ++ /* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default ++ * 100ms) */ ++ if (total >= MALI_UTILIZATION_MAX_PERIOD) { ++ mali_pm_reset_dvfs_utilisation_unlocked(mdev, now); ++ } else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) { ++ total += mdev->mali_metrics.prev_idle + ++ mdev->mali_metrics.prev_busy; ++ busy += mdev->mali_metrics.prev_busy; + } ++ ++ *total_out = (unsigned long)total; ++ *busy_out = (unsigned long)busy; ++ _mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock); +} + -+static mali_scheduler_mask mali_timeline_update_oldest_point(struct mali_timeline *timeline) ++void mali_pm_metrics_spin_lock(void) +{ -+ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; ++ struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev); ++ _mali_osk_spinlock_irq_lock(mdev->mali_metrics.lock); ++} + -+ MALI_DEBUG_ASSERT_POINTER(timeline); ++void mali_pm_metrics_spin_unlock(void) ++{ ++ struct mali_device *mdev = dev_get_drvdata(&mali_platform_device->dev); ++ _mali_osk_spinlock_irq_unlock(mdev->mali_metrics.lock); ++} +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pm_metrics.h b/drivers/gpu/arm/mali400/mali/common/mali_pm_metrics.h +new file mode 100644 +index 000000000..2b136b0de +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_pm_metrics.h +@@ -0,0 +1,74 @@ ++/* ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ MALI_DEBUG_CODE({ -+ struct mali_timeline_system *system = timeline->system; -+ MALI_DEBUG_ASSERT_POINTER(system); ++#ifndef __MALI_PM_METRICS_H__ ++#define __MALI_PM_METRICS_H__ + -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); -+ }); ++#ifdef CONFIG_MALI_DEVFREQ ++#include "mali_osk_locks.h" ++#include "mali_group.h" + -+ if (NULL != timeline->tracker_tail) { -+ /* Set oldest point to oldest tracker's point */ -+ timeline->point_oldest = timeline->tracker_tail->point; -+ } else { -+ /* No trackers, mark point list as empty */ -+ timeline->point_oldest = timeline->point_next; -+ } ++struct mali_device; + -+ /* Release all waiters no longer on the timeline's point list. -+ * Releasing a waiter can trigger this function to be called again, so -+ * we do not store any pointers on stack. */ -+ while (NULL != timeline->waiter_tail) { -+ u32 waiter_time_relative; -+ u32 time_head_relative; -+ struct mali_timeline_waiter *waiter = timeline->waiter_tail; ++/** ++ * Metrics data collected for use by the power management framework. ++ */ ++struct mali_pm_metrics_data { ++ ktime_t time_period_start; ++ u64 time_busy; ++ u64 time_idle; ++ u64 prev_busy; ++ u64 prev_idle; ++ u32 num_running_gp_cores; ++ u32 num_running_pp_cores; ++ ktime_t time_period_start_gp; ++ u64 time_busy_gp; ++ u64 time_idle_gp; ++ ktime_t time_period_start_pp; ++ u64 time_busy_pp[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS]; ++ u64 time_idle_pp[MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS]; ++ mali_bool gpu_active; ++ _mali_osk_spinlock_irq_t *lock; ++}; + -+ time_head_relative = timeline->point_next - timeline->point_oldest; -+ waiter_time_relative = waiter->point - timeline->point_oldest; ++/** ++ * Initialize/start the Mali GPU pm_metrics metrics reporting. ++ * ++ * @return _MALI_OSK_ERR_OK on success, otherwise failure. ++ */ ++_mali_osk_errcode_t mali_pm_metrics_init(struct mali_device *mdev); + -+ if (waiter_time_relative < time_head_relative) { -+ /* This and all following waiters are on the point list, so we are done. */ -+ break; -+ } ++/** ++ * Terminate the Mali GPU pm_metrics metrics reporting ++ */ ++void mali_pm_metrics_term(struct mali_device *mdev); + -+ /* Remove waiter from timeline's waiter list. */ -+ if (NULL != waiter->timeline_next) { -+ waiter->timeline_next->timeline_prev = NULL; -+ } else { -+ /* This was the last waiter */ -+ timeline->waiter_head = NULL; -+ } -+ timeline->waiter_tail = waiter->timeline_next; ++/** ++ * Should be called when a job is about to execute a GPU job ++ */ ++void mali_pm_record_gpu_active(mali_bool is_gp); + -+ /* Release waiter. This could activate a tracker, if this was -+ * the last waiter for the tracker. */ -+ schedule_mask |= mali_timeline_system_release_waiter(timeline->system, waiter); -+ } ++/** ++ * Should be called when a job is finished ++ */ ++void mali_pm_record_gpu_idle(mali_bool is_gp); + -+ return schedule_mask; -+} ++void mali_pm_reset_dvfs_utilisation(struct mali_device *mdev); + -+static mali_scheduler_mask mali_timeline_release_with_depended_point(struct mali_timeline_tracker *tracker) -+{ -+ struct mali_timeline *timeline; -+ struct mali_timeline_waiter *waiter; -+ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; ++void mali_pm_get_dvfs_utilisation(struct mali_device *mdev, unsigned long *total_out, unsigned long *busy_out); + -+ timeline = tracker->timeline; -+ MALI_DEBUG_ASSERT_POINTER(timeline); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_SOFT == timeline->id); ++void mali_pm_metrics_spin_lock(void); + -+ MALI_DEBUG_CODE({ -+ struct mali_timeline_system *system = timeline->system; -+ MALI_DEBUG_ASSERT_POINTER(system); ++void mali_pm_metrics_spin_unlock(void); ++#else ++void mali_pm_record_gpu_idle(mali_bool is_gp) {} ++void mali_pm_record_gpu_active(mali_bool is_gp) {} ++#endif ++#endif /* __MALI_PM_METRICS_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pmu.c b/drivers/gpu/arm/mali400/mali/common/mali_pmu.c +new file mode 100644 +index 000000000..6f0af59f6 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_pmu.c +@@ -0,0 +1,270 @@ ++/* ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); -+ }); ++/** ++ * @file mali_pmu.c ++ * Mali driver functions for Mali 400 PMU hardware ++ */ ++#include "mali_hw_core.h" ++#include "mali_pmu.h" ++#include "mali_pp.h" ++#include "mali_kernel_common.h" ++#include "mali_osk.h" ++#include "mali_pm.h" ++#include "mali_osk_mali.h" + -+ /* Only release the waiter that wait for the tracker. */ -+ waiter = timeline->waiter_tail; -+ while (NULL != waiter) { -+ if (waiter->point == tracker->point) { ++struct mali_pmu_core *mali_global_pmu_core = NULL; + -+ struct mali_timeline_waiter *waiter_next; -+ struct mali_timeline_waiter *waiter_prev; ++static _mali_osk_errcode_t mali_pmu_wait_for_command_finish( ++ struct mali_pmu_core *pmu); + -+ waiter_next = waiter->timeline_next; -+ waiter_prev = waiter->timeline_prev; -+ waiter->timeline_next = NULL; -+ waiter->timeline_prev = NULL; ++struct mali_pmu_core *mali_pmu_create(_mali_osk_resource_t *resource) ++{ ++ struct mali_pmu_core *pmu; + -+ if (NULL != waiter_prev) { -+ waiter_prev->timeline_next = waiter_next; -+ } ++ MALI_DEBUG_ASSERT(NULL == mali_global_pmu_core); ++ MALI_DEBUG_PRINT(2, ("Mali PMU: Creating Mali PMU core\n")); + -+ if (NULL != waiter_next) { -+ waiter_next->timeline_prev = waiter_prev; -+ } ++ pmu = (struct mali_pmu_core *)_mali_osk_malloc( ++ sizeof(struct mali_pmu_core)); ++ if (NULL != pmu) { ++ pmu->registered_cores_mask = 0; /* to be set later */ + -+ if (waiter == timeline->waiter_tail) -+ timeline->waiter_tail = waiter_next; ++ if (_MALI_OSK_ERR_OK == mali_hw_core_create(&pmu->hw_core, ++ resource, PMU_REGISTER_ADDRESS_SPACE_SIZE)) { + -+ if (waiter == timeline->waiter_head) -+ timeline->waiter_head = NULL; ++ pmu->switch_delay = _mali_osk_get_pmu_switch_delay(); + -+ schedule_mask |= mali_timeline_system_release_waiter(timeline->system, waiter); -+ waiter = waiter_next; -+ }else { ++ mali_global_pmu_core = pmu; + -+ waiter = waiter->timeline_next; ++ return pmu; + } ++ _mali_osk_free(pmu); + } + -+ return schedule_mask; ++ return NULL; +} + -+void mali_timeline_tracker_init(struct mali_timeline_tracker *tracker, -+ mali_timeline_tracker_type type, -+ struct mali_timeline_fence *fence, -+ void *job) ++void mali_pmu_delete(struct mali_pmu_core *pmu) +{ -+ MALI_DEBUG_ASSERT_POINTER(tracker); -+ MALI_DEBUG_ASSERT_POINTER(job); -+ -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAX > type); -+ -+ /* Zero out all tracker members. */ -+ _mali_osk_memset(tracker, 0, sizeof(*tracker)); ++ MALI_DEBUG_ASSERT_POINTER(pmu); ++ MALI_DEBUG_ASSERT(pmu == mali_global_pmu_core); + -+ tracker->type = type; -+ tracker->job = job; -+ tracker->trigger_ref_count = 1; /* Prevents any callback from trigging while adding it */ -+ tracker->os_tick_create = _mali_osk_time_tickcount(); -+ MALI_DEBUG_CODE(tracker->magic = MALI_TIMELINE_TRACKER_MAGIC); ++ MALI_DEBUG_PRINT(2, ("Mali PMU: Deleting Mali PMU core\n")); + -+ tracker->activation_error = MALI_TIMELINE_ACTIVATION_ERROR_NONE; ++ mali_global_pmu_core = NULL; + -+ /* Copy fence. */ -+ if (NULL != fence) { -+ _mali_osk_memcpy(&tracker->fence, fence, sizeof(struct mali_timeline_fence)); -+ } ++ mali_hw_core_delete(&pmu->hw_core); ++ _mali_osk_free(pmu); +} + -+mali_scheduler_mask mali_timeline_tracker_release(struct mali_timeline_tracker *tracker) ++void mali_pmu_set_registered_cores_mask(struct mali_pmu_core *pmu, u32 mask) +{ -+ struct mali_timeline *timeline; -+ struct mali_timeline_system *system; -+ struct mali_timeline_tracker *tracker_next, *tracker_prev; -+ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; -+ u32 tid = _mali_osk_get_tid(); -+ -+ /* Upon entry a group lock will be held, but not a scheduler lock. */ -+ MALI_DEBUG_ASSERT_POINTER(tracker); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic); -+ -+ /* Tracker should have been triggered */ -+ MALI_DEBUG_ASSERT(0 == tracker->trigger_ref_count); -+ -+ /* All waiters should have been released at this point */ -+ MALI_DEBUG_ASSERT(NULL == tracker->waiter_head); -+ MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail); ++ pmu->registered_cores_mask = mask; ++} + -+ MALI_DEBUG_PRINT(3, ("Mali Timeline: releasing tracker for job 0x%08X\n", tracker->job)); ++void mali_pmu_reset(struct mali_pmu_core *pmu) ++{ ++ MALI_DEBUG_ASSERT_POINTER(pmu); ++ MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0); + -+ timeline = tracker->timeline; -+ if (NULL == timeline) { -+ /* Tracker was not on a timeline, there is nothing to release. */ -+ return MALI_SCHEDULER_MASK_EMPTY; -+ } ++ /* Setup the desired defaults */ ++ mali_hw_core_register_write_relaxed(&pmu->hw_core, ++ PMU_REG_ADDR_MGMT_INT_MASK, 0); ++ mali_hw_core_register_write_relaxed(&pmu->hw_core, ++ PMU_REG_ADDR_MGMT_SW_DELAY, pmu->switch_delay); ++} + -+ system = timeline->system; -+ MALI_DEBUG_ASSERT_POINTER(system); ++void mali_pmu_power_up_all(struct mali_pmu_core *pmu) ++{ ++ u32 stat; + -+ mali_spinlock_reentrant_wait(system->spinlock, tid); ++ MALI_DEBUG_ASSERT_POINTER(pmu); ++ MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0); + -+ /* Tracker should still be on timeline */ -+ MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline)); -+ MALI_DEBUG_ASSERT(mali_timeline_is_point_on(timeline, tracker->point)); ++ mali_pm_exec_lock(); + -+ /* Tracker is no longer valid. */ -+ MALI_DEBUG_CODE(tracker->magic = 0); ++ mali_pmu_reset(pmu); + -+ tracker_next = tracker->timeline_next; -+ tracker_prev = tracker->timeline_prev; -+ tracker->timeline_next = NULL; -+ tracker->timeline_prev = NULL; ++ /* Now simply power up the domains which are marked as powered down */ ++ stat = mali_hw_core_register_read(&pmu->hw_core, ++ PMU_REG_ADDR_MGMT_STATUS); ++ mali_pmu_power_up(pmu, stat); + -+ /* Removing tracker from timeline's tracker list */ -+ if (NULL == tracker_next) { -+ /* This tracker was the head */ -+ timeline->tracker_head = tracker_prev; -+ } else { -+ tracker_next->timeline_prev = tracker_prev; -+ } ++ mali_pm_exec_unlock(); ++} + -+ if (NULL == tracker_prev) { -+ /* This tracker was the tail */ -+ timeline->tracker_tail = tracker_next; -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); -+ /* Update the timeline's oldest time and release any waiters */ -+ schedule_mask |= mali_timeline_update_oldest_point(timeline); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); -+ } else { -+ tracker_prev->timeline_next = tracker_next; -+ if (MALI_TIMELINE_SOFT == tracker->timeline->id) { -+ /* Use the signaled soft tracker to release the depended soft waiter */ -+ schedule_mask |= mali_timeline_release_with_depended_point(tracker); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); -+ } -+ } ++void mali_pmu_power_down_all(struct mali_pmu_core *pmu) ++{ ++ u32 stat; + -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); ++ MALI_DEBUG_ASSERT_POINTER(pmu); ++ MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0); + -+ /* Update delayed work only when it is the soft job timeline */ -+ if (MALI_TIMELINE_SOFT == tracker->timeline->id) { -+ mali_timeline_update_delayed_work(tracker->timeline); -+ } ++ mali_pm_exec_lock(); + -+ mali_spinlock_reentrant_signal(system->spinlock, tid); ++ /* Now simply power down the domains which are marked as powered up */ ++ stat = mali_hw_core_register_read(&pmu->hw_core, ++ PMU_REG_ADDR_MGMT_STATUS); ++ mali_pmu_power_down(pmu, (~stat) & pmu->registered_cores_mask); + -+ return schedule_mask; ++ mali_pm_exec_unlock(); +} + -+void mali_timeline_system_release_waiter_list(struct mali_timeline_system *system, -+ struct mali_timeline_waiter *tail, -+ struct mali_timeline_waiter *head) ++_mali_osk_errcode_t mali_pmu_power_down(struct mali_pmu_core *pmu, u32 mask) +{ -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT_POINTER(head); -+ MALI_DEBUG_ASSERT_POINTER(tail); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); ++ u32 stat; ++ _mali_osk_errcode_t err; + -+ head->tracker_next = system->waiter_empty_list; -+ system->waiter_empty_list = tail; -+} ++ MALI_DEBUG_ASSERT_POINTER(pmu); ++ MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0); ++ MALI_DEBUG_ASSERT(mask <= pmu->registered_cores_mask); ++ MALI_DEBUG_ASSERT(0 == (mali_hw_core_register_read(&pmu->hw_core, ++ PMU_REG_ADDR_MGMT_INT_RAWSTAT) & ++ PMU_REG_VAL_IRQ)); + -+static mali_scheduler_mask mali_timeline_tracker_activate(struct mali_timeline_tracker *tracker) -+{ -+ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; -+ struct mali_timeline_system *system; -+ struct mali_timeline *timeline; -+ u32 tid = _mali_osk_get_tid(); ++ MALI_DEBUG_PRINT(3, ++ ("PMU power down: ...................... [%s]\n", ++ mali_pm_mask_to_string(mask))); + -+ MALI_DEBUG_ASSERT_POINTER(tracker); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic); ++ stat = mali_hw_core_register_read(&pmu->hw_core, ++ PMU_REG_ADDR_MGMT_STATUS); + -+ system = tracker->system; -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); ++ /* ++ * Assert that we are not powering down domains which are already ++ * powered down. ++ */ ++ MALI_DEBUG_ASSERT(0 == (stat & mask)); + -+ tracker->os_tick_activate = _mali_osk_time_tickcount(); ++ mask &= ~(0x1 << MALI_DOMAIN_INDEX_DUMMY); + -+ if (NULL != tracker->waiter_head) { -+ mali_timeline_system_release_waiter_list(system, tracker->waiter_tail, tracker->waiter_head); -+ tracker->waiter_head = NULL; -+ tracker->waiter_tail = NULL; -+ } ++ if (0 == mask || 0 == ((~stat) & mask)) return _MALI_OSK_ERR_OK; + -+ switch (tracker->type) { -+ case MALI_TIMELINE_TRACKER_GP: -+ schedule_mask = mali_scheduler_activate_gp_job((struct mali_gp_job *) tracker->job); ++ mali_hw_core_register_write(&pmu->hw_core, ++ PMU_REG_ADDR_MGMT_POWER_DOWN, mask); + -+ _mali_osk_atomic_dec(&gp_tracker_count); -+ break; -+ case MALI_TIMELINE_TRACKER_PP: -+ if (mali_pp_job_is_virtual((struct mali_pp_job *)tracker->job)) { -+ _mali_osk_atomic_dec(&virt_pp_tracker_count); -+ } else { -+ _mali_osk_atomic_dec(&phy_pp_tracker_count); ++ /* ++ * Do not wait for interrupt on Mali-300/400 if all domains are ++ * powered off by our power down command, because the HW will simply ++ * not generate an interrupt in this case. ++ */ ++ if (mali_is_mali450() || mali_is_mali470() || pmu->registered_cores_mask != (mask | stat)) { ++ err = mali_pmu_wait_for_command_finish(pmu); ++ if (_MALI_OSK_ERR_OK != err) { ++ return err; + } -+ schedule_mask = mali_scheduler_activate_pp_job((struct mali_pp_job *) tracker->job); -+ break; -+ case MALI_TIMELINE_TRACKER_SOFT: -+ timeline = tracker->timeline; -+ MALI_DEBUG_ASSERT_POINTER(timeline); -+ -+ schedule_mask |= mali_soft_job_system_activate_job((struct mali_soft_job *) tracker->job); -+ -+ /* Start a soft timer to make sure the soft job be released in a limited time */ -+ mali_spinlock_reentrant_wait(system->spinlock, tid); -+ mali_timeline_update_delayed_work(timeline); -+ mali_spinlock_reentrant_signal(system->spinlock, tid); -+ break; -+ case MALI_TIMELINE_TRACKER_WAIT: -+ mali_timeline_fence_wait_activate((struct mali_timeline_fence_wait_tracker *) tracker->job); -+ break; -+ case MALI_TIMELINE_TRACKER_SYNC: -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ mali_timeline_sync_fence_activate((struct mali_timeline_sync_fence_tracker *) tracker->job); -+#else -+ MALI_PRINT_ERROR(("Mali Timeline: sync tracker not supported\n", tracker->type)); -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ -+ break; -+ default: -+ MALI_PRINT_ERROR(("Mali Timeline - Illegal tracker type: %d\n", tracker->type)); -+ break; ++ } else { ++ mali_hw_core_register_write(&pmu->hw_core, ++ PMU_REG_ADDR_MGMT_INT_CLEAR, PMU_REG_VAL_IRQ); + } + -+ return schedule_mask; ++#if defined(DEBUG) ++ /* Verify power status of domains after power down */ ++ stat = mali_hw_core_register_read(&pmu->hw_core, ++ PMU_REG_ADDR_MGMT_STATUS); ++ MALI_DEBUG_ASSERT(mask == (stat & mask)); ++#endif ++ ++ return _MALI_OSK_ERR_OK; +} + -+void mali_timeline_system_tracker_get(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker) ++_mali_osk_errcode_t mali_pmu_power_up(struct mali_pmu_core *pmu, u32 mask) +{ -+ u32 tid = _mali_osk_get_tid(); -+ -+ MALI_DEBUG_ASSERT_POINTER(tracker); -+ MALI_DEBUG_ASSERT_POINTER(system); -+ -+ mali_spinlock_reentrant_wait(system->spinlock, tid); ++ u32 stat; ++ _mali_osk_errcode_t err; ++#if !defined(CONFIG_MALI_PMU_PARALLEL_POWER_UP) ++ u32 current_domain; ++#endif + -+ MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count); -+ tracker->trigger_ref_count++; ++ MALI_DEBUG_ASSERT_POINTER(pmu); ++ MALI_DEBUG_ASSERT(pmu->registered_cores_mask != 0); ++ MALI_DEBUG_ASSERT(mask <= pmu->registered_cores_mask); ++ MALI_DEBUG_ASSERT(0 == (mali_hw_core_register_read(&pmu->hw_core, ++ PMU_REG_ADDR_MGMT_INT_RAWSTAT) & ++ PMU_REG_VAL_IRQ)); + -+ mali_spinlock_reentrant_signal(system->spinlock, tid); -+} ++ MALI_DEBUG_PRINT(3, ++ ("PMU power up: ........................ [%s]\n", ++ mali_pm_mask_to_string(mask))); + -+mali_scheduler_mask mali_timeline_system_tracker_put(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker, mali_timeline_activation_error activation_error) -+{ -+ u32 tid = _mali_osk_get_tid(); -+ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; ++ stat = mali_hw_core_register_read(&pmu->hw_core, ++ PMU_REG_ADDR_MGMT_STATUS); ++ stat &= pmu->registered_cores_mask; + -+ MALI_DEBUG_ASSERT_POINTER(tracker); -+ MALI_DEBUG_ASSERT_POINTER(system); ++ mask &= ~(0x1 << MALI_DOMAIN_INDEX_DUMMY); ++ if (0 == mask || 0 == (stat & mask)) return _MALI_OSK_ERR_OK; + -+ mali_spinlock_reentrant_wait(system->spinlock, tid); ++ /* ++ * Assert that we are only powering up domains which are currently ++ * powered down. ++ */ ++ MALI_DEBUG_ASSERT(mask == (stat & mask)); + -+ MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count); -+ tracker->trigger_ref_count--; ++#if defined(CONFIG_MALI_PMU_PARALLEL_POWER_UP) ++ mali_hw_core_register_write(&pmu->hw_core, ++ PMU_REG_ADDR_MGMT_POWER_UP, mask); + -+ tracker->activation_error |= activation_error; ++ err = mali_pmu_wait_for_command_finish(pmu); ++ if (_MALI_OSK_ERR_OK != err) { ++ return err; ++ } ++#else ++ for (current_domain = 1; ++ current_domain <= pmu->registered_cores_mask; ++ current_domain <<= 1) { ++ if (current_domain & mask & stat) { ++ mali_hw_core_register_write(&pmu->hw_core, ++ PMU_REG_ADDR_MGMT_POWER_UP, ++ current_domain); + -+ if (0 == tracker->trigger_ref_count) { -+ schedule_mask |= mali_timeline_tracker_activate(tracker); -+ tracker = NULL; ++ err = mali_pmu_wait_for_command_finish(pmu); ++ if (_MALI_OSK_ERR_OK != err) { ++ return err; ++ } ++ } + } ++#endif + -+ mali_spinlock_reentrant_signal(system->spinlock, tid); ++#if defined(DEBUG) ++ /* Verify power status of domains after power up */ ++ stat = mali_hw_core_register_read(&pmu->hw_core, ++ PMU_REG_ADDR_MGMT_STATUS); ++ MALI_DEBUG_ASSERT(0 == (stat & mask)); ++#endif /* defined(DEBUG) */ + -+ return schedule_mask; ++ return _MALI_OSK_ERR_OK; +} + -+void mali_timeline_fence_copy_uk_fence(struct mali_timeline_fence *fence, _mali_uk_fence_t *uk_fence) ++static _mali_osk_errcode_t mali_pmu_wait_for_command_finish( ++ struct mali_pmu_core *pmu) +{ -+ u32 i; ++ u32 rawstat; ++ u32 timeout = MALI_REG_POLL_COUNT_SLOW; + -+ MALI_DEBUG_ASSERT_POINTER(fence); -+ MALI_DEBUG_ASSERT_POINTER(uk_fence); ++ MALI_DEBUG_ASSERT(pmu); + -+ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { -+ fence->points[i] = uk_fence->points[i]; -+ } ++ /* Wait for the command to complete */ ++ do { ++ rawstat = mali_hw_core_register_read(&pmu->hw_core, ++ PMU_REG_ADDR_MGMT_INT_RAWSTAT); ++ --timeout; ++ } while (0 == (rawstat & PMU_REG_VAL_IRQ) && 0 < timeout); + -+ fence->sync_fd = uk_fence->sync_fd; -+} ++ MALI_DEBUG_ASSERT(0 < timeout); + -+struct mali_timeline_system *mali_timeline_system_create(struct mali_session_data *session) -+{ -+ u32 i; -+ struct mali_timeline_system *system; ++ if (0 == timeout) { ++ return _MALI_OSK_ERR_TIMEOUT; ++ } + -+ MALI_DEBUG_ASSERT_POINTER(session); -+ MALI_DEBUG_PRINT(4, ("Mali Timeline: creating timeline system\n")); ++ mali_hw_core_register_write(&pmu->hw_core, ++ PMU_REG_ADDR_MGMT_INT_CLEAR, PMU_REG_VAL_IRQ); + -+ system = (struct mali_timeline_system *) _mali_osk_calloc(1, sizeof(struct mali_timeline_system)); -+ if (NULL == system) { -+ return NULL; -+ } ++ return _MALI_OSK_ERR_OK; ++} +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pmu.h b/drivers/gpu/arm/mali400/mali/common/mali_pmu.h +new file mode 100644 +index 000000000..5b856240f +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_pmu.h +@@ -0,0 +1,123 @@ ++/* ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ system->spinlock = mali_spinlock_reentrant_init(_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM); -+ if (NULL == system->spinlock) { -+ mali_timeline_system_destroy(system); -+ return NULL; -+ } ++/** ++ * @file mali_platform.h ++ * Platform specific Mali driver functions ++ */ + -+ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { -+ system->timelines[i] = mali_timeline_create(system, (enum mali_timeline_id)i); -+ if (NULL == system->timelines[i]) { -+ mali_timeline_system_destroy(system); -+ return NULL; -+ } -+ } ++#ifndef __MALI_PMU_H__ ++#define __MALI_PMU_H__ + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ system->signaled_sync_tl = mali_sync_timeline_create(NULL, "mali-always-signaled"); -+ if (NULL == system->signaled_sync_tl) { -+ mali_timeline_system_destroy(system); -+ return NULL; -+ } -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++#include "mali_osk.h" ++#include "mali_kernel_common.h" ++#include "mali_hw_core.h" + -+ system->waiter_empty_list = NULL; -+ system->session = session; -+ system->timer_enabled = MALI_TRUE; ++/** @brief MALI inbuilt PMU hardware info and PMU hardware has knowledge of cores power mask ++ */ ++struct mali_pmu_core { ++ struct mali_hw_core hw_core; ++ u32 registered_cores_mask; ++ u32 switch_delay; ++}; + -+ system->wait_queue = _mali_osk_wait_queue_init(); -+ if (NULL == system->wait_queue) { -+ mali_timeline_system_destroy(system); -+ return NULL; -+ } ++/** @brief Register layout for hardware PMU ++ */ ++typedef enum { ++ PMU_REG_ADDR_MGMT_POWER_UP = 0x00, /*< Power up register */ ++ PMU_REG_ADDR_MGMT_POWER_DOWN = 0x04, /*< Power down register */ ++ PMU_REG_ADDR_MGMT_STATUS = 0x08, /*< Core sleep status register */ ++ PMU_REG_ADDR_MGMT_INT_MASK = 0x0C, /*< Interrupt mask register */ ++ PMU_REG_ADDR_MGMT_INT_RAWSTAT = 0x10, /*< Interrupt raw status register */ ++ PMU_REG_ADDR_MGMT_INT_CLEAR = 0x18, /*< Interrupt clear register */ ++ PMU_REG_ADDR_MGMT_SW_DELAY = 0x1C, /*< Switch delay register */ ++ PMU_REGISTER_ADDRESS_SPACE_SIZE = 0x28, /*< Size of register space */ ++} pmu_reg_addr_mgmt_addr; + -+ return system; -+} ++#define PMU_REG_VAL_IRQ 1 + -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) ||defined(CONFIG_SYNC) ||defined(CONFIG_SYNC_FILE) -+/** -+ * Check if there are any trackers left on timeline. ++extern struct mali_pmu_core *mali_global_pmu_core; ++ ++/** @brief Initialisation of MALI PMU + * -+ * Used as a wait queue conditional. ++ * This is called from entry point of the driver in order to create and intialize the PMU resource + * -+ * @param data Timeline. -+ * @return MALI_TRUE if there are no trackers on timeline, MALI_FALSE if not. ++ * @param resource it will be a pointer to a PMU resource ++ * @param number_of_pp_cores Number of found PP resources in configuration ++ * @param number_of_l2_caches Number of found L2 cache resources in configuration ++ * @return The created PMU object, or NULL in case of failure. + */ -+static mali_bool mali_timeline_has_no_trackers(void *data) -+{ -+ struct mali_timeline *timeline = (struct mali_timeline *) data; -+ -+ MALI_DEBUG_ASSERT_POINTER(timeline); ++struct mali_pmu_core *mali_pmu_create(_mali_osk_resource_t *resource); + -+ return mali_timeline_is_empty(timeline); -+} -+#if defined(CONFIG_SYNC) ||defined(CONFIG_SYNC_FILE) -+/** -+ * Cancel sync fence waiters waited upon by trackers on all timelines. ++/** @brief It deallocates the PMU resource + * -+ * Will return after all timelines have no trackers left. ++ * This is called on the exit of the driver to terminate the PMU resource + * -+ * @param system Timeline system. ++ * @param pmu Pointer to PMU core object to delete + */ -+static void mali_timeline_cancel_sync_fence_waiters(struct mali_timeline_system *system) -+{ -+ u32 i; -+ u32 tid = _mali_osk_get_tid(); -+ struct mali_timeline_tracker *tracker, *tracker_next; -+ _MALI_OSK_LIST_HEAD_STATIC_INIT(tracker_list); ++void mali_pmu_delete(struct mali_pmu_core *pmu); + -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT_POINTER(system->session); -+ MALI_DEBUG_ASSERT(system->session->is_aborting); ++/** @brief Set registered cores mask ++ * ++ * @param pmu Pointer to PMU core object ++ * @param mask All available/valid domain bits ++ */ ++void mali_pmu_set_registered_cores_mask(struct mali_pmu_core *pmu, u32 mask); + -+ mali_spinlock_reentrant_wait(system->spinlock, tid); ++/** @brief Retrieves the Mali PMU core object (if any) ++ * ++ * @return The Mali PMU object, or NULL if no PMU exists. ++ */ ++MALI_STATIC_INLINE struct mali_pmu_core *mali_pmu_get_global_pmu_core(void) ++{ ++ return mali_global_pmu_core; ++} + -+ /* Cancel sync fence waiters. */ -+ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { -+ struct mali_timeline *timeline = system->timelines[i]; ++/** @brief Reset PMU core ++ * ++ * @param pmu Pointer to PMU core object to reset ++ */ ++void mali_pmu_reset(struct mali_pmu_core *pmu); + -+ MALI_DEBUG_ASSERT_POINTER(timeline); ++void mali_pmu_power_up_all(struct mali_pmu_core *pmu); + -+ tracker_next = timeline->tracker_tail; -+ while (NULL != tracker_next) { -+ tracker = tracker_next; -+ tracker_next = tracker->timeline_next; ++void mali_pmu_power_down_all(struct mali_pmu_core *pmu); + -+ if (NULL == tracker->sync_fence) continue; ++/** @brief Returns a mask of the currently powered up domains ++ * ++ * @param pmu Pointer to PMU core object ++ */ ++MALI_STATIC_INLINE u32 mali_pmu_get_mask(struct mali_pmu_core *pmu) ++{ ++ u32 stat = mali_hw_core_register_read(&pmu->hw_core, PMU_REG_ADDR_MGMT_STATUS); ++ return ((~stat) & pmu->registered_cores_mask); ++} + -+ MALI_DEBUG_PRINT(3, ("Mali Timeline: Cancelling sync fence wait for tracker 0x%08X.\n", tracker)); ++/** @brief MALI GPU power down using MALI in-built PMU ++ * ++ * Called to power down the specified cores. ++ * ++ * @param pmu Pointer to PMU core object to power down ++ * @param mask Mask specifying which power domains to power down ++ * @return _MALI_OSK_ERR_OK on success otherwise, a suitable _mali_osk_errcode_t error. ++ */ ++_mali_osk_errcode_t mali_pmu_power_down(struct mali_pmu_core *pmu, u32 mask); + -+ /* Cancel sync fence waiter. */ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ if (0 == sync_fence_cancel_async(tracker->sync_fence, &tracker->sync_fence_waiter)) { -+#else -+ if (0 == mali_internal_sync_fence_cancel_async(tracker->sync_fence, &tracker->sync_fence_waiter)) { -+#endif -+ /* Callback was not called, move tracker to local list. */ -+ _mali_osk_list_add(&tracker->sync_fence_cancel_list, &tracker_list); -+ } -+ } -+ } ++/** @brief MALI GPU power up using MALI in-built PMU ++ * ++ * Called to power up the specified cores. ++ * ++ * @param pmu Pointer to PMU core object to power up ++ * @param mask Mask specifying which power domains to power up ++ * @return _MALI_OSK_ERR_OK on success otherwise, a suitable _mali_osk_errcode_t error. ++ */ ++_mali_osk_errcode_t mali_pmu_power_up(struct mali_pmu_core *pmu, u32 mask); + -+ mali_spinlock_reentrant_signal(system->spinlock, tid); ++#endif /* __MALI_PMU_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pp.c b/drivers/gpu/arm/mali400/mali/common/mali_pp.c +new file mode 100644 +index 000000000..2dd8b8766 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_pp.c +@@ -0,0 +1,502 @@ ++/* ++ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ /* Manually call sync fence callback in order to release waiter and trigger activation of tracker. */ -+ _MALI_OSK_LIST_FOREACHENTRY(tracker, tracker_next, &tracker_list, struct mali_timeline_tracker, sync_fence_cancel_list) { -+ mali_timeline_sync_fence_callback(tracker->sync_fence, &tracker->sync_fence_waiter); -+ } ++#include "mali_pp_job.h" ++#include "mali_pp.h" ++#include "mali_hw_core.h" ++#include "mali_group.h" ++#include "regs/mali_200_regs.h" ++#include "mali_kernel_common.h" ++#include "mali_kernel_core.h" + -+ /* Sleep until all sync fence callbacks are done and all timelines are empty. */ -+ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { -+ struct mali_timeline *timeline = system->timelines[i]; ++#if defined(CONFIG_MALI400_PROFILING) ++#include "mali_osk_profiling.h" ++#endif + -+ MALI_DEBUG_ASSERT_POINTER(timeline); ++/* Number of frame registers on Mali-200 */ ++#define MALI_PP_MALI200_NUM_FRAME_REGISTERS ((0x04C/4)+1) ++/* Number of frame registers on Mali-300 and later */ ++#define MALI_PP_MALI400_NUM_FRAME_REGISTERS ((0x058/4)+1) + -+ _mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_has_no_trackers, (void *) timeline); -+ } -+} ++static struct mali_pp_core *mali_global_pp_cores[MALI_MAX_NUMBER_OF_PP_CORES] = { NULL }; ++static u32 mali_global_num_pp_cores = 0; + -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++/* Interrupt handlers */ ++static void mali_pp_irq_probe_trigger(void *data); ++static _mali_osk_errcode_t mali_pp_irq_probe_ack(void *data); + -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) -+static void mali_timeline_cancel_dma_fence_waiters(struct mali_timeline_system *system) ++struct mali_pp_core *mali_pp_create(const _mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual, u32 bcast_id) +{ -+ u32 i, j; -+ u32 tid = _mali_osk_get_tid(); -+ struct mali_pp_job *pp_job = NULL; -+ struct mali_pp_job *next_pp_job = NULL; -+ struct mali_timeline *timeline = NULL; -+ struct mali_timeline_tracker *tracker, *tracker_next; -+ _MALI_OSK_LIST_HEAD_STATIC_INIT(pp_job_list); ++ struct mali_pp_core *core = NULL; + -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT_POINTER(system->session); -+ MALI_DEBUG_ASSERT(system->session->is_aborting); ++ MALI_DEBUG_PRINT(2, ("Mali PP: Creating Mali PP core: %s\n", resource->description)); ++ MALI_DEBUG_PRINT(2, ("Mali PP: Base address of PP core: 0x%x\n", resource->base)); + -+ mali_spinlock_reentrant_wait(system->spinlock, tid); ++ if (mali_global_num_pp_cores >= MALI_MAX_NUMBER_OF_PP_CORES) { ++ MALI_PRINT_ERROR(("Mali PP: Too many PP core objects created\n")); ++ return NULL; ++ } + -+ /* Cancel dma fence waiters. */ -+ timeline = system->timelines[MALI_TIMELINE_PP]; -+ MALI_DEBUG_ASSERT_POINTER(timeline); ++ core = _mali_osk_calloc(1, sizeof(struct mali_pp_core)); ++ if (NULL != core) { ++ core->core_id = mali_global_num_pp_cores; ++ core->bcast_id = bcast_id; + -+ tracker_next = timeline->tracker_tail; -+ while (NULL != tracker_next) { -+ mali_bool fence_is_signaled = MALI_TRUE; -+ tracker = tracker_next; -+ tracker_next = tracker->timeline_next; ++ if (_MALI_OSK_ERR_OK == mali_hw_core_create(&core->hw_core, resource, MALI200_REG_SIZEOF_REGISTER_BANK)) { ++ _mali_osk_errcode_t ret; + -+ if (NULL == tracker->waiter_dma_fence) continue; -+ pp_job = (struct mali_pp_job *)tracker->job; -+ MALI_DEBUG_ASSERT_POINTER(pp_job); -+ MALI_DEBUG_PRINT(3, ("Mali Timeline: Cancelling dma fence waiter for tracker 0x%08X.\n", tracker)); ++ if (!is_virtual) { ++ ret = mali_pp_reset(core); ++ } else { ++ ret = _MALI_OSK_ERR_OK; ++ } + -+ for (j = 0; j < pp_job->dma_fence_context.num_dma_fence_waiter; j++) { -+ if (pp_job->dma_fence_context.mali_dma_fence_waiters[j]) { -+ /* Cancel a previously callback from the fence. -+ * This function returns true if the callback is successfully removed, -+ * or false if the fence has already been signaled. -+ */ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ bool ret = dma_fence_remove_callback(pp_job->dma_fence_context.mali_dma_fence_waiters[j]->fence, -+ &pp_job->dma_fence_context.mali_dma_fence_waiters[j]->base); ++ if (_MALI_OSK_ERR_OK == ret) { ++ ret = mali_group_add_pp_core(group, core); ++ if (_MALI_OSK_ERR_OK == ret) { ++ /* Setup IRQ handlers (which will do IRQ probing if needed) */ ++ MALI_DEBUG_ASSERT(!is_virtual || -1 != resource->irq); + -+#else -+ bool ret = fence_remove_callback(pp_job->dma_fence_context.mali_dma_fence_waiters[j]->fence, -+ &pp_job->dma_fence_context.mali_dma_fence_waiters[j]->base); -+#endif -+ if (ret) { -+ fence_is_signaled = MALI_FALSE; ++ core->irq = _mali_osk_irq_init(resource->irq, ++ mali_group_upper_half_pp, ++ group, ++ mali_pp_irq_probe_trigger, ++ mali_pp_irq_probe_ack, ++ core, ++ resource->description); ++ if (NULL != core->irq) { ++ mali_global_pp_cores[mali_global_num_pp_cores] = core; ++ mali_global_num_pp_cores++; ++ ++ return core; ++ } else { ++ MALI_PRINT_ERROR(("Mali PP: Failed to setup interrupt handlers for PP core %s\n", core->hw_core.description)); ++ } ++ mali_group_remove_pp_core(group); ++ } else { ++ MALI_PRINT_ERROR(("Mali PP: Failed to add core %s to group\n", core->hw_core.description)); + } + } ++ mali_hw_core_delete(&core->hw_core); + } + -+ /* Callbacks were not called, move pp job to local list. */ -+ if (MALI_FALSE == fence_is_signaled) -+ _mali_osk_list_add(&pp_job->list, &pp_job_list); -+ } -+ -+ mali_spinlock_reentrant_signal(system->spinlock, tid); -+ -+ /* Manually call dma fence callback in order to release waiter and trigger activation of tracker. */ -+ _MALI_OSK_LIST_FOREACHENTRY(pp_job, next_pp_job, &pp_job_list, struct mali_pp_job, list) { -+ mali_timeline_dma_fence_callback((void *)pp_job); ++ _mali_osk_free(core); ++ } else { ++ MALI_PRINT_ERROR(("Mali PP: Failed to allocate memory for PP core\n")); + } + -+ /* Sleep until all dma fence callbacks are done and all timelines are empty. */ -+ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { -+ struct mali_timeline *timeline = system->timelines[i]; -+ MALI_DEBUG_ASSERT_POINTER(timeline); -+ _mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_has_no_trackers, (void *) timeline); -+ } ++ return NULL; +} -+#endif -+#endif -+void mali_timeline_system_abort(struct mali_timeline_system *system) ++ ++void mali_pp_delete(struct mali_pp_core *core) +{ -+ MALI_DEBUG_CODE(u32 tid = _mali_osk_get_tid();); ++ u32 i; + -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT_POINTER(system->session); -+ MALI_DEBUG_ASSERT(system->session->is_aborting); ++ MALI_DEBUG_ASSERT_POINTER(core); + -+ MALI_DEBUG_PRINT(3, ("Mali Timeline: Aborting timeline system for session 0x%08X.\n", system->session)); ++ _mali_osk_irq_term(core->irq); ++ mali_hw_core_delete(&core->hw_core); + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ mali_timeline_cancel_sync_fence_waiters(system); -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++ /* Remove core from global list */ ++ for (i = 0; i < mali_global_num_pp_cores; i++) { ++ if (mali_global_pp_cores[i] == core) { ++ mali_global_pp_cores[i] = NULL; ++ mali_global_num_pp_cores--; + -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) -+ mali_timeline_cancel_dma_fence_waiters(system); -+#endif ++ if (i != mali_global_num_pp_cores) { ++ /* We removed a PP core from the middle of the array -- move the last ++ * PP core to the current position to close the gap */ ++ mali_global_pp_cores[i] = mali_global_pp_cores[mali_global_num_pp_cores]; ++ mali_global_pp_cores[mali_global_num_pp_cores] = NULL; ++ } + -+ /* Should not be any waiters or trackers left at this point. */ -+ MALI_DEBUG_CODE({ -+ u32 i; -+ mali_spinlock_reentrant_wait(system->spinlock, tid); -+ for (i = 0; i < MALI_TIMELINE_MAX; ++i) -+ { -+ struct mali_timeline *timeline = system->timelines[i]; -+ MALI_DEBUG_ASSERT_POINTER(timeline); -+ MALI_DEBUG_ASSERT(timeline->point_oldest == timeline->point_next); -+ MALI_DEBUG_ASSERT(NULL == timeline->tracker_head); -+ MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail); -+ MALI_DEBUG_ASSERT(NULL == timeline->waiter_head); -+ MALI_DEBUG_ASSERT(NULL == timeline->waiter_tail); ++ break; + } -+ mali_spinlock_reentrant_signal(system->spinlock, tid); -+ }); ++ } ++ ++ _mali_osk_free(core); +} + -+void mali_timeline_system_destroy(struct mali_timeline_system *system) ++void mali_pp_stop_bus(struct mali_pp_core *core) +{ -+ u32 i; -+ struct mali_timeline_waiter *waiter, *next; -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ u32 tid = _mali_osk_get_tid(); -+#endif ++ MALI_DEBUG_ASSERT_POINTER(core); ++ /* Will only send the stop bus command, and not wait for it to complete */ ++ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_STOP_BUS); ++} + -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT_POINTER(system->session); ++_mali_osk_errcode_t mali_pp_stop_bus_wait(struct mali_pp_core *core) ++{ ++ int i; + -+ MALI_DEBUG_PRINT(4, ("Mali Timeline: destroying timeline system\n")); ++ MALI_DEBUG_ASSERT_POINTER(core); + -+ if (NULL != system) { ++ /* Send the stop bus command. */ ++ mali_pp_stop_bus(core); + -+ /* There should be no waiters left on this queue. */ -+ if (NULL != system->wait_queue) { -+ _mali_osk_wait_queue_term(system->wait_queue); -+ system->wait_queue = NULL; -+ } ++ /* Wait for bus to be stopped */ ++ for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) { ++ if (mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS) & MALI200_REG_VAL_STATUS_BUS_STOPPED) ++ break; ++ } + -+ /* Free all waiters in empty list */ -+ waiter = system->waiter_empty_list; -+ while (NULL != waiter) { -+ next = waiter->tracker_next; -+ _mali_osk_free(waiter); -+ waiter = next; -+ } ++ if (MALI_REG_POLL_COUNT_FAST == i) { ++ MALI_PRINT_ERROR(("Mali PP: Failed to stop bus on %s. Status: 0x%08x\n", core->hw_core.description, mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS))); ++ return _MALI_OSK_ERR_FAULT; ++ } ++ return _MALI_OSK_ERR_OK; ++} + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ if (NULL != system->signaled_sync_tl) { -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ sync_timeline_destroy(system->signaled_sync_tl); -+#else -+ mali_internal_sync_timeline_destroy(system->signaled_sync_tl); -+#endif -+ } ++/* Frame register reset values. ++ * Taken from the Mali400 TRM, 3.6. Pixel processor control register summary */ ++static const u32 mali_frame_registers_reset_values[_MALI_PP_MAX_FRAME_REGISTERS] = { ++ 0x0, /* Renderer List Address Register */ ++ 0x0, /* Renderer State Word Base Address Register */ ++ 0x0, /* Renderer Vertex Base Register */ ++ 0x2, /* Feature Enable Register */ ++ 0x0, /* Z Clear Value Register */ ++ 0x0, /* Stencil Clear Value Register */ ++ 0x0, /* ABGR Clear Value 0 Register */ ++ 0x0, /* ABGR Clear Value 1 Register */ ++ 0x0, /* ABGR Clear Value 2 Register */ ++ 0x0, /* ABGR Clear Value 3 Register */ ++ 0x0, /* Bounding Box Left Right Register */ ++ 0x0, /* Bounding Box Bottom Register */ ++ 0x0, /* FS Stack Address Register */ ++ 0x0, /* FS Stack Size and Initial Value Register */ ++ 0x0, /* Reserved */ ++ 0x0, /* Reserved */ ++ 0x0, /* Origin Offset X Register */ ++ 0x0, /* Origin Offset Y Register */ ++ 0x75, /* Subpixel Specifier Register */ ++ 0x0, /* Tiebreak mode Register */ ++ 0x0, /* Polygon List Format Register */ ++ 0x0, /* Scaling Register */ ++ 0x0 /* Tilebuffer configuration Register */ ++}; + -+ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { -+ if ((NULL != system->timelines[i]) && (NULL != system->timelines[i]->spinlock)) { -+ mali_spinlock_reentrant_wait(system->timelines[i]->spinlock, tid); -+ system->timelines[i]->destroyed = MALI_TRUE; -+ mali_spinlock_reentrant_signal(system->timelines[i]->spinlock, tid); -+ } -+ } -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++/* WBx register reset values */ ++static const u32 mali_wb_registers_reset_values[_MALI_PP_MAX_WB_REGISTERS] = { ++ 0x0, /* WBx Source Select Register */ ++ 0x0, /* WBx Target Address Register */ ++ 0x0, /* WBx Target Pixel Format Register */ ++ 0x0, /* WBx Target AA Format Register */ ++ 0x0, /* WBx Target Layout */ ++ 0x0, /* WBx Target Scanline Length */ ++ 0x0, /* WBx Target Flags Register */ ++ 0x0, /* WBx MRT Enable Register */ ++ 0x0, /* WBx MRT Offset Register */ ++ 0x0, /* WBx Global Test Enable Register */ ++ 0x0, /* WBx Global Test Reference Value Register */ ++ 0x0 /* WBx Global Test Compare Function Register */ ++}; + -+ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { -+ if (NULL != system->timelines[i]) { -+ mali_timeline_destroy(system->timelines[i]); -+ } -+ } ++/* Performance Counter 0 Enable Register reset value */ ++static const u32 mali_perf_cnt_enable_reset_value = 0; + -+ if (NULL != system->spinlock) { -+ mali_spinlock_reentrant_term(system->spinlock); -+ } ++_mali_osk_errcode_t mali_pp_hard_reset(struct mali_pp_core *core) ++{ ++ /* Bus must be stopped before calling this function */ ++ const u32 reset_wait_target_register = MALI200_REG_ADDR_MGMT_PERF_CNT_0_LIMIT; ++ const u32 reset_invalid_value = 0xC0FFE000; ++ const u32 reset_check_value = 0xC01A0000; ++ int i; + -+ _mali_osk_free(system); -+ } -+} ++ MALI_DEBUG_ASSERT_POINTER(core); ++ MALI_DEBUG_PRINT(2, ("Mali PP: Hard reset of core %s\n", core->hw_core.description)); + -+/** -+ * Find how many waiters are needed for a given fence. -+ * -+ * @param fence The fence to check. -+ * @return Number of waiters needed for fence. -+ */ -+static u32 mali_timeline_fence_num_waiters(struct mali_timeline_fence *fence) -+{ -+ u32 i, num_waiters = 0; ++ /* Set register to a bogus value. The register will be used to detect when reset is complete */ ++ mali_hw_core_register_write_relaxed(&core->hw_core, reset_wait_target_register, reset_invalid_value); ++ mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_NONE); + -+ MALI_DEBUG_ASSERT_POINTER(fence); ++ /* Force core to reset */ ++ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_FORCE_RESET); + -+ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { -+ if (MALI_TIMELINE_NO_POINT != fence->points[i]) { -+ ++num_waiters; ++ /* Wait for reset to be complete */ ++ for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) { ++ mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, reset_check_value); ++ if (reset_check_value == mali_hw_core_register_read(&core->hw_core, reset_wait_target_register)) { ++ break; + } + } + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ if (-1 != fence->sync_fd) ++num_waiters; -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++ if (MALI_REG_POLL_COUNT_FAST == i) { ++ MALI_PRINT_ERROR(("Mali PP: The hard reset loop didn't work, unable to recover\n")); ++ } + -+ return num_waiters; ++ mali_hw_core_register_write(&core->hw_core, reset_wait_target_register, 0x00000000); /* set it back to the default */ ++ /* Re-enable interrupts */ ++ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_MASK_ALL); ++ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED); ++ ++ return _MALI_OSK_ERR_OK; +} + -+static struct mali_timeline_waiter *mali_timeline_system_get_zeroed_waiter(struct mali_timeline_system *system) ++void mali_pp_reset_async(struct mali_pp_core *core) +{ -+ struct mali_timeline_waiter *waiter; -+ -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); ++ MALI_DEBUG_ASSERT_POINTER(core); + -+ waiter = system->waiter_empty_list; -+ if (NULL != waiter) { -+ /* Remove waiter from empty list and zero it */ -+ system->waiter_empty_list = waiter->tracker_next; -+ _mali_osk_memset(waiter, 0, sizeof(*waiter)); -+ } ++ MALI_DEBUG_PRINT(4, ("Mali PP: Reset of core %s\n", core->hw_core.description)); + -+ /* Return NULL if list was empty. */ -+ return waiter; ++ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, 0); /* disable the IRQs */ ++ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_MASK_ALL); ++ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI400PP_REG_VAL_CTRL_MGMT_SOFT_RESET); +} + -+static void mali_timeline_system_allocate_waiters(struct mali_timeline_system *system, -+ struct mali_timeline_waiter **tail, -+ struct mali_timeline_waiter **head, -+ int max_num_waiters) ++_mali_osk_errcode_t mali_pp_reset_wait(struct mali_pp_core *core) +{ -+ u32 i, tid = _mali_osk_get_tid(); -+ mali_bool do_alloc; -+ struct mali_timeline_waiter *waiter; -+ -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT_POINTER(tail); -+ MALI_DEBUG_ASSERT_POINTER(head); -+ -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); ++ int i; ++ u32 rawstat = 0; + -+ *head = *tail = NULL; -+ do_alloc = MALI_FALSE; -+ i = 0; -+ while (i < max_num_waiters) { -+ if (MALI_FALSE == do_alloc) { -+ waiter = mali_timeline_system_get_zeroed_waiter(system); -+ if (NULL == waiter) { -+ do_alloc = MALI_TRUE; -+ mali_spinlock_reentrant_signal(system->spinlock, tid); -+ continue; ++ for (i = 0; i < MALI_REG_POLL_COUNT_FAST; i++) { ++ u32 status = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS); ++ if (!(status & MALI200_REG_VAL_STATUS_RENDERING_ACTIVE)) { ++ rawstat = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT); ++ if (rawstat == MALI400PP_REG_VAL_IRQ_RESET_COMPLETED) { ++ break; + } -+ } else { -+ waiter = _mali_osk_calloc(1, sizeof(struct mali_timeline_waiter)); -+ if (NULL == waiter) break; -+ } -+ ++i; -+ if (NULL == *tail) { -+ *tail = waiter; -+ *head = waiter; -+ } else { -+ (*head)->tracker_next = waiter; -+ *head = waiter; + } + } -+ if (MALI_TRUE == do_alloc) { -+ mali_spinlock_reentrant_wait(system->spinlock, tid); ++ ++ if (i == MALI_REG_POLL_COUNT_FAST) { ++ MALI_PRINT_ERROR(("Mali PP: Failed to reset core %s, rawstat: 0x%08x\n", ++ core->hw_core.description, rawstat)); ++ return _MALI_OSK_ERR_FAULT; + } -+} + -+/** -+ * Create waiters for the given tracker. The tracker is activated when all waiters are release. -+ * -+ * @note Tracker can potentially be activated before this function returns. -+ * -+ * @param system Timeline system. -+ * @param tracker Tracker we will create waiters for. -+ * @param waiter_tail List of pre-allocated waiters. -+ * @param waiter_head List of pre-allocated waiters. -+ */ -+static void mali_timeline_system_create_waiters_and_unlock(struct mali_timeline_system *system, -+ struct mali_timeline_tracker *tracker, -+ struct mali_timeline_waiter *waiter_tail, -+ struct mali_timeline_waiter *waiter_head) -+{ -+ int i; -+ u32 tid = _mali_osk_get_tid(); -+ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ struct sync_fence *sync_fence = NULL; -+#else -+ struct mali_internal_sync_fence *sync_fence = NULL; -+#endif -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++ /* Re-enable interrupts */ ++ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_MASK_ALL); ++ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED); + -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT_POINTER(tracker); ++ return _MALI_OSK_ERR_OK; ++} + -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); ++_mali_osk_errcode_t mali_pp_reset(struct mali_pp_core *core) ++{ ++ mali_pp_reset_async(core); ++ return mali_pp_reset_wait(core); ++} + -+ MALI_DEBUG_ASSERT(NULL == tracker->waiter_head); -+ MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail); -+ MALI_DEBUG_ASSERT(NULL != tracker->job); ++void mali_pp_job_start(struct mali_pp_core *core, struct mali_pp_job *job, u32 sub_job, mali_bool restart_virtual) ++{ ++ u32 relative_address; ++ u32 start_index; ++ u32 nr_of_regs; ++ u32 *frame_registers = mali_pp_job_get_frame_registers(job); ++ u32 *wb0_registers = mali_pp_job_get_wb0_registers(job); ++ u32 *wb1_registers = mali_pp_job_get_wb1_registers(job); ++ u32 *wb2_registers = mali_pp_job_get_wb2_registers(job); ++ u32 counter_src0 = mali_pp_job_get_perf_counter_src0(job, sub_job); ++ u32 counter_src1 = mali_pp_job_get_perf_counter_src1(job, sub_job); + -+ /* Creating waiter object for all the timelines the fence is put on. Inserting this waiter -+ * into the timelines sorted list of waiters */ -+ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { -+ mali_timeline_point point; -+ struct mali_timeline *timeline; -+ struct mali_timeline_waiter *waiter; ++ MALI_DEBUG_ASSERT_POINTER(core); + -+ /* Get point on current timeline from tracker's fence. */ -+ point = tracker->fence.points[i]; ++ /* Write frame registers */ + -+ if (likely(MALI_TIMELINE_NO_POINT == point)) { -+ /* Fence contains no point on this timeline so we don't need a waiter. */ -+ continue; -+ } ++ /* ++ * There are two frame registers which are different for each sub job: ++ * 1. The Renderer List Address Register (MALI200_REG_ADDR_FRAME) ++ * 2. The FS Stack Address Register (MALI200_REG_ADDR_STACK) ++ */ ++ mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_FRAME, mali_pp_job_get_addr_frame(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_FRAME / sizeof(u32)]); + -+ timeline = system->timelines[i]; -+ MALI_DEBUG_ASSERT_POINTER(timeline); ++ /* For virtual jobs, the stack address shouldn't be broadcast but written individually */ ++ if (!mali_pp_job_is_virtual(job) || restart_virtual) { ++ mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_STACK, mali_pp_job_get_addr_stack(job, sub_job), mali_frame_registers_reset_values[MALI200_REG_ADDR_STACK / sizeof(u32)]); ++ } + -+ if (unlikely(!mali_timeline_is_point_valid(timeline, point))) { -+ MALI_PRINT_ERROR(("Mali Timeline: point %d is not valid (oldest=%d, next=%d)\n", -+ point, timeline->point_oldest, timeline->point_next)); -+ continue; -+ } ++ /* Write registers between MALI200_REG_ADDR_FRAME and MALI200_REG_ADDR_STACK */ ++ relative_address = MALI200_REG_ADDR_RSW; ++ start_index = MALI200_REG_ADDR_RSW / sizeof(u32); ++ nr_of_regs = (MALI200_REG_ADDR_STACK - MALI200_REG_ADDR_RSW) / sizeof(u32); + -+ if (likely(mali_timeline_is_point_released(timeline, point))) { -+ /* Tracker representing the point has been released so we don't need a -+ * waiter. */ -+ continue; -+ } ++ mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, ++ relative_address, &frame_registers[start_index], ++ nr_of_regs, &mali_frame_registers_reset_values[start_index]); + -+ if ((MALI_TIMELINE_SOFT == timeline->id) && mali_timeline_is_tracker_released(timeline, point)) { -+ /* The tracker that the point related to has already been released, so no need to a waiter. */ -+ continue; -+ } ++ /* MALI200_REG_ADDR_STACK_SIZE */ ++ relative_address = MALI200_REG_ADDR_STACK_SIZE; ++ start_index = MALI200_REG_ADDR_STACK_SIZE / sizeof(u32); + -+ /* The point is on timeline. */ -+ MALI_DEBUG_ASSERT(mali_timeline_is_point_on(timeline, point)); ++ mali_hw_core_register_write_relaxed_conditional(&core->hw_core, ++ relative_address, frame_registers[start_index], ++ mali_frame_registers_reset_values[start_index]); + -+ /* Get a new zeroed waiter object. */ -+ if (likely(NULL != waiter_tail)) { -+ waiter = waiter_tail; -+ waiter_tail = waiter_tail->tracker_next; -+ } else { -+ MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n")); -+ continue; -+ } ++ /* Skip 2 reserved registers */ + -+ /* Yanking the trigger ref count of the tracker. */ -+ tracker->trigger_ref_count++; ++ /* Write remaining registers */ ++ relative_address = MALI200_REG_ADDR_ORIGIN_OFFSET_X; ++ start_index = MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32); ++ nr_of_regs = MALI_PP_MALI400_NUM_FRAME_REGISTERS - MALI200_REG_ADDR_ORIGIN_OFFSET_X / sizeof(u32); + -+ waiter->point = point; -+ waiter->tracker = tracker; ++ mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, ++ relative_address, &frame_registers[start_index], ++ nr_of_regs, &mali_frame_registers_reset_values[start_index]); + -+ /* Insert waiter on tracker's singly-linked waiter list. */ -+ if (NULL == tracker->waiter_head) { -+ /* list is empty */ -+ MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail); -+ tracker->waiter_tail = waiter; -+ } else { -+ tracker->waiter_head->tracker_next = waiter; -+ } -+ tracker->waiter_head = waiter; ++ /* Write WBx registers */ ++ if (wb0_registers[0]) { /* M200_WB0_REG_SOURCE_SELECT register */ ++ mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB0, wb0_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values); ++ } + -+ /* Add waiter to timeline. */ -+ mali_timeline_insert_waiter(timeline, waiter); ++ if (wb1_registers[0]) { /* M200_WB1_REG_SOURCE_SELECT register */ ++ mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB1, wb1_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values); + } -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ if (-1 != tracker->fence.sync_fd) { -+ int ret; -+ struct mali_timeline_waiter *waiter; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ sync_fence = sync_fence_fdget(tracker->fence.sync_fd); -+#else -+ sync_fence = mali_internal_sync_fence_fdget(tracker->fence.sync_fd); -+#endif -+ if (unlikely(NULL == sync_fence)) { -+ MALI_PRINT_ERROR(("Mali Timeline: failed to get sync fence from fd %d\n", tracker->fence.sync_fd)); -+ goto exit; -+ } + -+ /* Check if we have a zeroed waiter object available. */ -+ if (unlikely(NULL == waiter_tail)) { -+ MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n")); -+ goto exit; -+ } ++ if (wb2_registers[0]) { /* M200_WB2_REG_SOURCE_SELECT register */ ++ mali_hw_core_register_write_array_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_WB2, wb2_registers, _MALI_PP_MAX_WB_REGISTERS, mali_wb_registers_reset_values); ++ } + -+ /* Start asynchronous wait that will release waiter when the fence is signaled. */ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ sync_fence_waiter_init(&tracker->sync_fence_waiter, mali_timeline_sync_fence_callback); -+ ret = sync_fence_wait_async(sync_fence, &tracker->sync_fence_waiter); -+#else -+ mali_internal_sync_fence_waiter_init(&tracker->sync_fence_waiter, mali_timeline_sync_fence_callback); -+ ret = mali_internal_sync_fence_wait_async(sync_fence, &tracker->sync_fence_waiter); -+#endif -+ if (1 == ret) { -+ /* Fence already signaled, no waiter needed. */ -+ tracker->fence.sync_fd = -1; -+ goto exit; -+ } else if (0 != ret) { -+ MALI_PRINT_ERROR(("Mali Timeline: sync fence fd %d signaled with error %d\n", tracker->fence.sync_fd, ret)); -+ tracker->activation_error |= MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT; -+ goto exit; -+ } ++ if (MALI_HW_CORE_NO_COUNTER != counter_src0) { ++ mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC, counter_src0); ++ mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value); ++ } ++ if (MALI_HW_CORE_NO_COUNTER != counter_src1) { ++ mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC, counter_src1); ++ mali_hw_core_register_write_relaxed_conditional(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE, MALI200_REG_VAL_PERF_CNT_ENABLE, mali_perf_cnt_enable_reset_value); ++ } + -+ /* Grab new zeroed waiter object. */ -+ waiter = waiter_tail; -+ waiter_tail = waiter_tail->tracker_next; ++#ifdef CONFIG_MALI400_HEATMAPS_ENABLED ++ if (job->uargs.perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_HEATMAP_ENABLE) { ++ mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERFMON_CONTR, ((job->uargs.tilesx & 0x3FF) << 16) | 1); ++ mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_PERFMON_BASE, job->uargs.heatmap_mem & 0xFFFFFFF8); ++ } ++#endif /* CONFIG_MALI400_HEATMAPS_ENABLED */ + -+ /* Increase the trigger ref count of the tracker. */ -+ tracker->trigger_ref_count++; ++ MALI_DEBUG_PRINT(3, ("Mali PP: Starting job 0x%08X part %u/%u on PP core %s\n", job, sub_job + 1, mali_pp_job_get_sub_job_count(job), core->hw_core.description)); + -+ waiter->point = MALI_TIMELINE_NO_POINT; -+ waiter->tracker = tracker; ++ /* Adding barrier to make sure all rester writes are finished */ ++ _mali_osk_write_mem_barrier(); + -+ /* Insert waiter on tracker's singly-linked waiter list. */ -+ if (NULL == tracker->waiter_head) { -+ /* list is empty */ -+ MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail); -+ tracker->waiter_tail = waiter; -+ } else { -+ tracker->waiter_head->tracker_next = waiter; -+ } -+ tracker->waiter_head = waiter; ++ /* This is the command that starts the core. ++ * ++ * Don't actually run the job if PROFILING_SKIP_PP_JOBS are set, just ++ * force core to assert the completion interrupt. ++ */ ++#if !defined(PROFILING_SKIP_PP_JOBS) ++ mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_CTRL_MGMT, MALI200_REG_VAL_CTRL_MGMT_START_RENDERING); ++#else ++ mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_END_OF_FRAME); ++#endif + -+ /* Also store waiter in separate field for easy access by sync callback. */ -+ tracker->waiter_sync = waiter; ++ /* Adding barrier to make sure previous rester writes is finished */ ++ _mali_osk_write_mem_barrier(); ++} + -+ /* Store the sync fence in tracker so we can retrieve in abort session, if needed. */ -+ tracker->sync_fence = sync_fence; ++u32 mali_pp_core_get_version(struct mali_pp_core *core) ++{ ++ MALI_DEBUG_ASSERT_POINTER(core); ++ return mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_VERSION); ++} + -+ sync_fence = NULL; ++struct mali_pp_core *mali_pp_get_global_pp_core(u32 index) ++{ ++ if (mali_global_num_pp_cores > index) { ++ return mali_global_pp_cores[index]; + } -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)*/ -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) -+ if ((NULL != tracker->timeline) && (MALI_TIMELINE_PP == tracker->timeline->id)) { + -+ struct mali_pp_job *job = (struct mali_pp_job *)tracker->job; ++ return NULL; ++} + -+ if (0 < job->dma_fence_context.num_dma_fence_waiter) { -+ struct mali_timeline_waiter *waiter; -+ /* Check if we have a zeroed waiter object available. */ -+ if (unlikely(NULL == waiter_tail)) { -+ MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n")); -+ goto exit; -+ } ++u32 mali_pp_get_glob_num_pp_cores(void) ++{ ++ return mali_global_num_pp_cores; ++} + -+ /* Grab new zeroed waiter object. */ -+ waiter = waiter_tail; -+ waiter_tail = waiter_tail->tracker_next; ++/* ------------- interrupt handling below ------------------ */ ++static void mali_pp_irq_probe_trigger(void *data) ++{ ++ struct mali_pp_core *core = (struct mali_pp_core *)data; ++ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED); ++ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT, MALI200_REG_VAL_IRQ_BUS_ERROR); ++ _mali_osk_mem_barrier(); ++} + -+ /* Increase the trigger ref count of the tracker. */ -+ tracker->trigger_ref_count++; ++static _mali_osk_errcode_t mali_pp_irq_probe_ack(void *data) ++{ ++ struct mali_pp_core *core = (struct mali_pp_core *)data; ++ u32 irq_readout; + -+ waiter->point = MALI_TIMELINE_NO_POINT; -+ waiter->tracker = tracker; ++ irq_readout = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_STATUS); ++ if (MALI200_REG_VAL_IRQ_BUS_ERROR & irq_readout) { ++ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_CLEAR, MALI200_REG_VAL_IRQ_BUS_ERROR); ++ _mali_osk_mem_barrier(); ++ return _MALI_OSK_ERR_OK; ++ } + -+ /* Insert waiter on tracker's singly-linked waiter list. */ -+ if (NULL == tracker->waiter_head) { -+ /* list is empty */ -+ MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail); -+ tracker->waiter_tail = waiter; -+ } else { -+ tracker->waiter_head->tracker_next = waiter; -+ } -+ tracker->waiter_head = waiter; ++ return _MALI_OSK_ERR_FAULT; ++} + -+ /* Also store waiter in separate field for easy access by sync callback. */ -+ tracker->waiter_dma_fence = waiter; -+ } -+ } -+#endif /* defined(CONFIG_MALI_DMA_BUF_FENCE)*/ + -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) ||defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+exit: -+#endif /* defined(CONFIG_MALI_DMA_BUF_FENCE) || defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++#if 0 ++static void mali_pp_print_registers(struct mali_pp_core *core) ++{ ++ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_VERSION = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_VERSION))); ++ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR))); ++ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS))); ++ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_RAWSTAT = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT))); ++ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_MASK = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK))); ++ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_INT_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_STATUS))); ++ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS))); ++ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE))); ++ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC))); ++ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE))); ++ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE))); ++ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC))); ++ MALI_DEBUG_PRINT(2, ("Mali PP: Register MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE = 0x%08X\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE))); ++} ++#endif + -+ if (NULL != waiter_tail) { -+ mali_timeline_system_release_waiter_list(system, waiter_tail, waiter_head); -+ } ++#if 0 ++void mali_pp_print_state(struct mali_pp_core *core) ++{ ++ MALI_DEBUG_PRINT(2, ("Mali PP: State: 0x%08x\n", mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS))); ++} ++#endif + -+ /* Release the initial trigger ref count. */ -+ tracker->trigger_ref_count--; ++void mali_pp_update_performance_counters(struct mali_pp_core *parent, struct mali_pp_core *child, struct mali_pp_job *job, u32 subjob) ++{ ++ u32 val0 = 0; ++ u32 val1 = 0; ++ u32 counter_src0 = mali_pp_job_get_perf_counter_src0(job, subjob); ++ u32 counter_src1 = mali_pp_job_get_perf_counter_src1(job, subjob); ++#if defined(CONFIG_MALI400_PROFILING) ++ int counter_index = COUNTER_FP_0_C0 + (2 * child->core_id); ++#endif + -+ /* If there were no waiters added to this tracker we activate immediately. */ -+ if (0 == tracker->trigger_ref_count) { -+ schedule_mask |= mali_timeline_tracker_activate(tracker); ++ if (MALI_HW_CORE_NO_COUNTER != counter_src0) { ++ val0 = mali_hw_core_register_read(&child->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE); ++ mali_pp_job_set_perf_counter_value0(job, subjob, val0); ++ ++#if defined(CONFIG_MALI400_PROFILING) ++ _mali_osk_profiling_report_hw_counter(counter_index, val0); ++ _mali_osk_profiling_record_global_counters(counter_index, val0); ++#endif + } + -+ mali_spinlock_reentrant_signal(system->spinlock, tid); ++ if (MALI_HW_CORE_NO_COUNTER != counter_src1) { ++ val1 = mali_hw_core_register_read(&child->hw_core, MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE); ++ mali_pp_job_set_perf_counter_value1(job, subjob, val1); + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ if (NULL != sync_fence) { -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ sync_fence_put(sync_fence); -+#else -+ fput(sync_fence->file); ++#if defined(CONFIG_MALI400_PROFILING) ++ _mali_osk_profiling_report_hw_counter(counter_index + 1, val1); ++ _mali_osk_profiling_record_global_counters(counter_index + 1, val1); +#endif + } -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ -+ -+ mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE); +} + -+mali_timeline_point mali_timeline_system_add_tracker(struct mali_timeline_system *system, -+ struct mali_timeline_tracker *tracker, -+ enum mali_timeline_id timeline_id) ++#if MALI_STATE_TRACKING ++u32 mali_pp_dump_state(struct mali_pp_core *core, char *buf, u32 size) +{ -+ int num_waiters = 0; -+ struct mali_timeline_waiter *waiter_tail, *waiter_head; -+ u32 tid = _mali_osk_get_tid(); ++ int n = 0; + -+ mali_timeline_point point = MALI_TIMELINE_NO_POINT; ++ n += _mali_osk_snprintf(buf + n, size - n, "\tPP #%d: %s\n", core->core_id, core->hw_core.description); + -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT_POINTER(system->session); -+ MALI_DEBUG_ASSERT_POINTER(tracker); ++ return n; ++} ++#endif +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pp.h b/drivers/gpu/arm/mali400/mali/common/mali_pp.h +new file mode 100644 +index 000000000..f98b29866 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_pp.h +@@ -0,0 +1,138 @@ ++/* ++ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ MALI_DEBUG_ASSERT(MALI_FALSE == system->session->is_aborting); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAX > tracker->type); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic); ++#ifndef __MALI_PP_H__ ++#define __MALI_PP_H__ + -+ MALI_DEBUG_PRINT(4, ("Mali Timeline: adding tracker for job %p, timeline: %d\n", tracker->job, timeline_id)); ++#include "mali_osk.h" ++#include "mali_pp_job.h" ++#include "mali_hw_core.h" + -+ MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count); -+ tracker->system = system; ++struct mali_group; + -+ mali_spinlock_reentrant_wait(system->spinlock, tid); ++#define MALI_MAX_NUMBER_OF_PP_CORES 9 + -+ num_waiters = mali_timeline_fence_num_waiters(&tracker->fence); ++/** ++ * Definition of the PP core struct ++ * Used to track a PP core in the system. ++ */ ++struct mali_pp_core { ++ struct mali_hw_core hw_core; /**< Common for all HW cores */ ++ _mali_osk_irq_t *irq; /**< IRQ handler */ ++ u32 core_id; /**< Unique core ID */ ++ u32 bcast_id; /**< The "flag" value used by the Mali-450 broadcast and DLBU unit */ ++}; + -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) -+ if (MALI_TIMELINE_PP == timeline_id) { -+ struct mali_pp_job *job = (struct mali_pp_job *)tracker->job; -+ if (0 < job->dma_fence_context.num_dma_fence_waiter) -+ num_waiters++; -+ } -+#endif ++_mali_osk_errcode_t mali_pp_initialize(void); ++void mali_pp_terminate(void); + -+ /* Allocate waiters. */ -+ mali_timeline_system_allocate_waiters(system, &waiter_tail, &waiter_head, num_waiters); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); -+ -+ /* Add tracker to timeline. This will allocate a point for the tracker on the timeline. If -+ * timeline ID is MALI_TIMELINE_NONE the tracker will NOT be added to a timeline and the -+ * point will be MALI_TIMELINE_NO_POINT. -+ * -+ * NOTE: the tracker can fail to be added if the timeline is full. If this happens, the -+ * point will be MALI_TIMELINE_NO_POINT. */ -+ MALI_DEBUG_ASSERT(timeline_id < MALI_TIMELINE_MAX || timeline_id == MALI_TIMELINE_NONE); -+ if (likely(timeline_id < MALI_TIMELINE_MAX)) { -+ struct mali_timeline *timeline = system->timelines[timeline_id]; -+ mali_timeline_insert_tracker(timeline, tracker); -+ MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline)); -+ } -+ -+ point = tracker->point; ++struct mali_pp_core *mali_pp_create(const _mali_osk_resource_t *resource, struct mali_group *group, mali_bool is_virtual, u32 bcast_id); ++void mali_pp_delete(struct mali_pp_core *core); + -+ /* Create waiters for tracker based on supplied fence. Each waiter will increase the -+ * trigger ref count. */ -+ mali_timeline_system_create_waiters_and_unlock(system, tracker, waiter_tail, waiter_head); -+ tracker = NULL; ++void mali_pp_stop_bus(struct mali_pp_core *core); ++_mali_osk_errcode_t mali_pp_stop_bus_wait(struct mali_pp_core *core); ++void mali_pp_reset_async(struct mali_pp_core *core); ++_mali_osk_errcode_t mali_pp_reset_wait(struct mali_pp_core *core); ++_mali_osk_errcode_t mali_pp_reset(struct mali_pp_core *core); ++_mali_osk_errcode_t mali_pp_hard_reset(struct mali_pp_core *core); + -+ /* At this point the tracker object might have been freed so we should no longer -+ * access it. */ ++void mali_pp_job_start(struct mali_pp_core *core, struct mali_pp_job *job, u32 sub_job, mali_bool restart_virtual); + ++u32 mali_pp_core_get_version(struct mali_pp_core *core); + -+ /* The tracker will always be activated after calling add_tracker, even if NO_POINT is -+ * returned. */ -+ return point; ++MALI_STATIC_INLINE u32 mali_pp_core_get_id(struct mali_pp_core *core) ++{ ++ MALI_DEBUG_ASSERT_POINTER(core); ++ return core->core_id; +} + -+static mali_scheduler_mask mali_timeline_system_release_waiter(struct mali_timeline_system *system, -+ struct mali_timeline_waiter *waiter) ++MALI_STATIC_INLINE u32 mali_pp_core_get_bcast_id(struct mali_pp_core *core) +{ -+ struct mali_timeline_tracker *tracker; -+ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; -+ -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT_POINTER(waiter); ++ MALI_DEBUG_ASSERT_POINTER(core); ++ return core->bcast_id; ++} + -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); ++struct mali_pp_core *mali_pp_get_global_pp_core(u32 index); ++u32 mali_pp_get_glob_num_pp_cores(void); + -+ tracker = waiter->tracker; -+ MALI_DEBUG_ASSERT_POINTER(tracker); ++/* Debug */ ++u32 mali_pp_dump_state(struct mali_pp_core *core, char *buf, u32 size); + -+ /* At this point the waiter has been removed from the timeline's waiter list, but it is -+ * still on the tracker's waiter list. All of the tracker's waiters will be released when -+ * the tracker is activated. */ ++/** ++ * Put instrumented HW counters from the core(s) to the job object (if enabled) ++ * ++ * parent and child is always the same, except for virtual jobs on Mali-450. ++ * In this case, the counters will be enabled on the virtual core (parent), ++ * but values need to be read from the child cores. ++ * ++ * @param parent The core used to see if the counters was enabled ++ * @param child The core to actually read the values from ++ * @job Job object to update with counter values (if enabled) ++ * @subjob Which subjob the counters are applicable for (core ID for virtual jobs) ++ */ ++void mali_pp_update_performance_counters(struct mali_pp_core *parent, struct mali_pp_core *child, struct mali_pp_job *job, u32 subjob); + -+ waiter->point = MALI_TIMELINE_NO_POINT; -+ waiter->tracker = NULL; ++MALI_STATIC_INLINE const char *mali_pp_core_description(struct mali_pp_core *core) ++{ ++ return core->hw_core.description; ++} + -+ tracker->trigger_ref_count--; -+ if (0 == tracker->trigger_ref_count) { -+ /* This was the last waiter; activate tracker */ -+ schedule_mask |= mali_timeline_tracker_activate(tracker); -+ tracker = NULL; ++MALI_STATIC_INLINE enum mali_interrupt_result mali_pp_get_interrupt_result(struct mali_pp_core *core) ++{ ++ u32 rawstat_used = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_RAWSTAT) & ++ MALI200_REG_VAL_IRQ_MASK_USED; ++ if (0 == rawstat_used) { ++ return MALI_INTERRUPT_RESULT_NONE; ++ } else if (MALI200_REG_VAL_IRQ_END_OF_FRAME == rawstat_used) { ++ return MALI_INTERRUPT_RESULT_SUCCESS; + } + -+ return schedule_mask; ++ return MALI_INTERRUPT_RESULT_ERROR; +} + -+mali_timeline_point mali_timeline_system_get_latest_point(struct mali_timeline_system *system, -+ enum mali_timeline_id timeline_id) ++MALI_STATIC_INLINE u32 mali_pp_get_rawstat(struct mali_pp_core *core) +{ -+ mali_timeline_point point; -+ struct mali_timeline *timeline; -+ u32 tid = _mali_osk_get_tid(); ++ MALI_DEBUG_ASSERT_POINTER(core); ++ return mali_hw_core_register_read(&core->hw_core, ++ MALI200_REG_ADDR_MGMT_INT_RAWSTAT); ++} + -+ MALI_DEBUG_ASSERT_POINTER(system); + -+ if (MALI_TIMELINE_MAX <= timeline_id) { -+ return MALI_TIMELINE_NO_POINT; -+ } ++MALI_STATIC_INLINE u32 mali_pp_is_active(struct mali_pp_core *core) ++{ ++ u32 status = mali_hw_core_register_read(&core->hw_core, MALI200_REG_ADDR_MGMT_STATUS); ++ return (status & MALI200_REG_VAL_STATUS_RENDERING_ACTIVE) ? MALI_TRUE : MALI_FALSE; ++} + -+ mali_spinlock_reentrant_wait(system->spinlock, tid); ++MALI_STATIC_INLINE void mali_pp_mask_all_interrupts(struct mali_pp_core *core) ++{ ++ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_NONE); ++} + -+ timeline = system->timelines[timeline_id]; -+ MALI_DEBUG_ASSERT_POINTER(timeline); ++MALI_STATIC_INLINE void mali_pp_enable_interrupts(struct mali_pp_core *core) ++{ ++ mali_hw_core_register_write(&core->hw_core, MALI200_REG_ADDR_MGMT_INT_MASK, MALI200_REG_VAL_IRQ_MASK_USED); ++} + -+ point = MALI_TIMELINE_NO_POINT; -+ if (timeline->point_oldest != timeline->point_next) { -+ point = timeline->point_next - 1; -+ if (MALI_TIMELINE_NO_POINT == point) point--; -+ } ++MALI_STATIC_INLINE void mali_pp_write_addr_renderer_list(struct mali_pp_core *core, ++ struct mali_pp_job *job, u32 subjob) ++{ ++ u32 addr = mali_pp_job_get_addr_frame(job, subjob); ++ mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_FRAME, addr); ++} + -+ mali_spinlock_reentrant_signal(system->spinlock, tid); + -+ return point; ++MALI_STATIC_INLINE void mali_pp_write_addr_stack(struct mali_pp_core *core, struct mali_pp_job *job) ++{ ++ u32 addr = mali_pp_job_get_addr_stack(job, core->core_id); ++ mali_hw_core_register_write_relaxed(&core->hw_core, MALI200_REG_ADDR_STACK, addr); +} + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+static void mali_timeline_do_sync_fence_callback(void *arg) -+{ -+ _MALI_OSK_LIST_HEAD_STATIC_INIT(list); -+ struct mali_timeline_tracker *tracker; -+ struct mali_timeline_tracker *tmp_tracker; -+ u32 tid = _mali_osk_get_tid(); ++#endif /* __MALI_PP_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pp_job.c b/drivers/gpu/arm/mali400/mali/common/mali_pp_job.c +new file mode 100644 +index 000000000..b0216d4c1 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_pp_job.c +@@ -0,0 +1,316 @@ ++/* ++ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ MALI_IGNORE(arg); ++#include "mali_pp.h" ++#include "mali_pp_job.h" ++#include "mali_osk.h" ++#include "mali_osk_list.h" ++#include "mali_kernel_common.h" ++#include "mali_uk_types.h" ++#include "mali_executor.h" ++#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) ++#include "linux/mali_memory_dma_buf.h" ++#endif ++#include "mali_memory_swap_alloc.h" ++#include "mali_scheduler.h" + -+ /* -+ * Quickly "unhook" the jobs pending to be deleted, so we can release -+ * the lock before we start deleting the job objects -+ * (without any locks held) -+ */ -+ _mali_osk_spinlock_irq_lock(sync_fence_callback_list_lock); -+ _mali_osk_list_move_list(&sync_fence_callback_queue, &list); -+ _mali_osk_spinlock_irq_unlock(sync_fence_callback_list_lock); ++static u32 pp_counter_src0 = MALI_HW_CORE_NO_COUNTER; /**< Performance counter 0, MALI_HW_CORE_NO_COUNTER for disabled */ ++static u32 pp_counter_src1 = MALI_HW_CORE_NO_COUNTER; /**< Performance counter 1, MALI_HW_CORE_NO_COUNTER for disabled */ ++static _mali_osk_atomic_t pp_counter_per_sub_job_count; /**< Number of values in the two arrays which is != MALI_HW_CORE_NO_COUNTER */ ++static u32 pp_counter_per_sub_job_src0[_MALI_PP_MAX_SUB_JOBS] = { MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER }; ++static u32 pp_counter_per_sub_job_src1[_MALI_PP_MAX_SUB_JOBS] = { MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER, MALI_HW_CORE_NO_COUNTER }; + -+ _MALI_OSK_LIST_FOREACHENTRY(tracker, tmp_tracker, &list, -+ struct mali_timeline_tracker, sync_fence_signal_list) { -+ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; -+ mali_bool is_aborting = MALI_FALSE; -+ int fence_status = 0; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ struct sync_fence *sync_fence = NULL; -+#else -+ struct mali_internal_sync_fence *sync_fence = NULL; -+#endif -+ struct mali_timeline_system *system = NULL; -+ struct mali_timeline_waiter *waiter = NULL; ++void mali_pp_job_initialize(void) ++{ ++ _mali_osk_atomic_init(&pp_counter_per_sub_job_count, 0); ++} + -+ _mali_osk_list_delinit(&tracker->sync_fence_signal_list); ++void mali_pp_job_terminate(void) ++{ ++ _mali_osk_atomic_term(&pp_counter_per_sub_job_count); ++} + -+ sync_fence = tracker->sync_fence; -+ MALI_DEBUG_ASSERT_POINTER(sync_fence); ++struct mali_pp_job *mali_pp_job_create(struct mali_session_data *session, ++ _mali_uk_pp_start_job_s __user *uargs, u32 id) ++{ ++ struct mali_pp_job *job; ++ u32 perf_counter_flag; + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) -+ fence_status = sync_fence->status; -+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) -+ fence_status = atomic_read(&sync_fence->status); -+#else -+ fence_status = sync_fence->fence->ops->signaled(sync_fence->fence); -+#endif ++ job = _mali_osk_calloc(1, sizeof(struct mali_pp_job)); ++ if (NULL != job) { ++ ++ _mali_osk_list_init(&job->list); ++ _mali_osk_list_init(&job->session_fb_lookup_list); ++ _mali_osk_atomic_inc(&session->number_of_pp_jobs); + -+ system = tracker->system; -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT_POINTER(system->session); ++ if (0 != _mali_osk_copy_from_user(&job->uargs, uargs, sizeof(_mali_uk_pp_start_job_s))) { ++ goto fail; ++ } + -+ mali_spinlock_reentrant_wait(system->spinlock, tid); ++ if (job->uargs.num_cores > _MALI_PP_MAX_SUB_JOBS) { ++ MALI_PRINT_ERROR(("Mali PP job: Too many sub jobs specified in job object\n")); ++ goto fail; ++ } + -+ is_aborting = system->session->is_aborting; -+ if (!is_aborting && (0 > fence_status)) { -+ MALI_PRINT_ERROR(("Mali Timeline: sync fence fd %d signaled with error %d\n", tracker->fence.sync_fd, fence_status)); -+ tracker->activation_error |= MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT; ++ if (!mali_pp_job_use_no_notification(job)) { ++ job->finished_notification = _mali_osk_notification_create(_MALI_NOTIFICATION_PP_FINISHED, sizeof(_mali_uk_pp_job_finished_s)); ++ if (NULL == job->finished_notification) goto fail; + } + -+ waiter = tracker->waiter_sync; -+ MALI_DEBUG_ASSERT_POINTER(waiter); ++ perf_counter_flag = mali_pp_job_get_perf_counter_flag(job); + -+ tracker->sync_fence = NULL; -+ tracker->fence.sync_fd = -1; ++ /* case when no counters came from user space ++ * so pass the debugfs / DS-5 provided global ones to the job object */ ++ if (!((perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE) || ++ (perf_counter_flag & _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE))) { ++ u32 sub_job_count = _mali_osk_atomic_read(&pp_counter_per_sub_job_count); + -+ schedule_mask |= mali_timeline_system_release_waiter(system, waiter); ++ /* These counters apply for all virtual jobs, and where no per sub job counter is specified */ ++ job->uargs.perf_counter_src0 = pp_counter_src0; ++ job->uargs.perf_counter_src1 = pp_counter_src1; + -+ /* If aborting, wake up sleepers that are waiting for sync fence callbacks to complete. */ -+ if (is_aborting) { -+ _mali_osk_wait_queue_wake_up(system->wait_queue); ++ /* We only copy the per sub job array if it is enabled with at least one counter */ ++ if (0 < sub_job_count) { ++ job->perf_counter_per_sub_job_count = sub_job_count; ++ _mali_osk_memcpy(job->perf_counter_per_sub_job_src0, pp_counter_per_sub_job_src0, sizeof(pp_counter_per_sub_job_src0)); ++ _mali_osk_memcpy(job->perf_counter_per_sub_job_src1, pp_counter_per_sub_job_src1, sizeof(pp_counter_per_sub_job_src1)); ++ } + } + -+ mali_spinlock_reentrant_signal(system->spinlock, tid); ++ job->session = session; ++ job->id = id; + -+ /* -+ * Older versions of Linux, before 3.5, doesn't support fput() in interrupt -+ * context. For those older kernels, allocate a list object and put the -+ * fence object on that and defer the call to sync_fence_put() to a workqueue. -+ */ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) -+ { -+ struct mali_deferred_fence_put_entry *obj; ++ job->sub_jobs_num = job->uargs.num_cores ? job->uargs.num_cores : 1; ++ job->pid = _mali_osk_get_pid(); ++ job->tid = _mali_osk_get_tid(); + -+ obj = kzalloc(sizeof(struct mali_deferred_fence_put_entry), GFP_ATOMIC); -+ if (obj) { -+ unsigned long flags; -+ mali_bool schedule = MALI_FALSE; ++ _mali_osk_atomic_init(&job->sub_jobs_completed, 0); ++ _mali_osk_atomic_init(&job->sub_job_errors, 0); ++ job->swap_status = MALI_NO_SWAP_IN; ++ job->user_notification = MALI_FALSE; ++ job->num_pp_cores_in_virtual = 0; + -+ obj->fence = sync_fence; ++ if (job->uargs.num_memory_cookies > session->allocation_mgr.mali_allocation_num) { ++ MALI_PRINT_ERROR(("Mali PP job: The number of memory cookies is invalid !\n")); ++ goto fail; ++ } + -+ spin_lock_irqsave(&mali_timeline_sync_fence_to_free_lock, flags); -+ if (hlist_empty(&mali_timeline_sync_fence_to_free_list)) -+ schedule = MALI_TRUE; -+ hlist_add_head(&obj->list, &mali_timeline_sync_fence_to_free_list); -+ spin_unlock_irqrestore(&mali_timeline_sync_fence_to_free_lock, flags); ++ if (job->uargs.num_memory_cookies > 0) { ++ u32 size; ++ u32 __user *memory_cookies = (u32 __user *)(uintptr_t)job->uargs.memory_cookies; + -+ if (schedule) -+ schedule_delayed_work(&delayed_sync_fence_put, 0); ++ size = sizeof(*memory_cookies) * (job->uargs.num_memory_cookies); ++ ++ job->memory_cookies = _mali_osk_malloc(size); ++ if (NULL == job->memory_cookies) { ++ MALI_PRINT_ERROR(("Mali PP job: Failed to allocate %d bytes of memory cookies!\n", size)); ++ goto fail; ++ } ++ ++ if (0 != _mali_osk_copy_from_user(job->memory_cookies, memory_cookies, size)) { ++ MALI_PRINT_ERROR(("Mali PP job: Failed to copy %d bytes of memory cookies from user!\n", size)); ++ goto fail; + } + } -+#else -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ sync_fence_put(sync_fence); -+#else -+ fput(sync_fence->file); -+#endif -+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) */ + -+ if (!is_aborting) { -+ mali_executor_schedule_from_mask(schedule_mask, MALI_TRUE); ++ if (_MALI_OSK_ERR_OK != mali_pp_job_check(job)) { ++ /* Not a valid job. */ ++ goto fail; + } -+ } -+} -+#endif -+_mali_osk_errcode_t mali_timeline_initialize(void) -+{ -+ _mali_osk_atomic_init(&gp_tracker_count, 0); -+ _mali_osk_atomic_init(&phy_pp_tracker_count, 0); -+ _mali_osk_atomic_init(&virt_pp_tracker_count, 0); + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ sync_fence_callback_list_lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_UNORDERED, _MALI_OSK_LOCK_ORDER_FIRST); -+ if (NULL == sync_fence_callback_list_lock) { -+ return _MALI_OSK_ERR_NOMEM; -+ } ++ mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_PP, NULL, job); ++ mali_timeline_fence_copy_uk_fence(&(job->tracker.fence), &(job->uargs.fence)); + -+ sync_fence_callback_work_t = _mali_osk_wq_create_work( -+ mali_timeline_do_sync_fence_callback, NULL); ++ mali_mem_swap_in_pages(job); + -+ if (NULL == sync_fence_callback_work_t) { -+ return _MALI_OSK_ERR_FAULT; ++ return job; + } -+#endif -+ return _MALI_OSK_ERR_OK; -+} -+ + -+void mali_timeline_terminate(void) -+{ -+ _mali_osk_atomic_term(&gp_tracker_count); -+ _mali_osk_atomic_term(&phy_pp_tracker_count); -+ _mali_osk_atomic_term(&virt_pp_tracker_count); -+ -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ if (NULL != sync_fence_callback_list_lock) { -+ _mali_osk_spinlock_irq_term(sync_fence_callback_list_lock); -+ sync_fence_callback_list_lock = NULL; ++fail: ++ if (NULL != job) { ++ mali_pp_job_delete(job); + } + -+ if (NULL != sync_fence_callback_work_t) { -+ _mali_osk_wq_delete_work(sync_fence_callback_work_t); -+ sync_fence_callback_work_t = NULL; -+ } -+#endif ++ return NULL; +} + -+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS) -+ -+static mali_bool is_waiting_on_timeline(struct mali_timeline_tracker *tracker, enum mali_timeline_id id) ++void mali_pp_job_delete(struct mali_pp_job *job) +{ -+ struct mali_timeline *timeline; -+ struct mali_timeline_system *system; -+ -+ MALI_DEBUG_ASSERT_POINTER(tracker); ++ struct mali_session_data *session; + -+ MALI_DEBUG_ASSERT_POINTER(tracker->timeline); -+ timeline = tracker->timeline; ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->list)); ++ MALI_DEBUG_ASSERT(_mali_osk_list_empty(&job->session_fb_lookup_list)); + -+ MALI_DEBUG_ASSERT_POINTER(timeline->system); -+ system = timeline->system; ++ session = mali_pp_job_get_session(job); ++ MALI_DEBUG_ASSERT_POINTER(session); + -+ if (MALI_TIMELINE_MAX > id) { -+ if (MALI_TIMELINE_NO_POINT != tracker->fence.points[id]) { -+ return mali_timeline_is_point_on(system->timelines[id], tracker->fence.points[id]); -+ } else { -+ return MALI_FALSE; ++ if (NULL != job->memory_cookies) { ++#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) ++ /* Unmap buffers attached to job */ ++ mali_dma_buf_unmap_job(job); ++#endif ++ if (MALI_NO_SWAP_IN != job->swap_status) { ++ mali_mem_swap_out_pages(job); + } -+ } else { -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_NONE == id); -+ return MALI_FALSE; -+ } -+} -+ -+static const char *timeline_id_to_string(enum mali_timeline_id id) -+{ -+ switch (id) { -+ case MALI_TIMELINE_GP: -+ return "GP"; -+ case MALI_TIMELINE_PP: -+ return "PP"; -+ case MALI_TIMELINE_SOFT: -+ return "SOFT"; -+ default: -+ return "NONE"; -+ } -+} + -+static const char *timeline_tracker_type_to_string(enum mali_timeline_tracker_type type) -+{ -+ switch (type) { -+ case MALI_TIMELINE_TRACKER_GP: -+ return "GP"; -+ case MALI_TIMELINE_TRACKER_PP: -+ return "PP"; -+ case MALI_TIMELINE_TRACKER_SOFT: -+ return "SOFT"; -+ case MALI_TIMELINE_TRACKER_WAIT: -+ return "WAIT"; -+ case MALI_TIMELINE_TRACKER_SYNC: -+ return "SYNC"; -+ default: -+ return "INVALID"; ++ _mali_osk_free(job->memory_cookies); + } -+} -+ -+mali_timeline_tracker_state mali_timeline_debug_get_tracker_state(struct mali_timeline_tracker *tracker) -+{ -+ struct mali_timeline *timeline = NULL; -+ -+ MALI_DEBUG_ASSERT_POINTER(tracker); -+ timeline = tracker->timeline; + -+ if (0 != tracker->trigger_ref_count) { -+ return MALI_TIMELINE_TS_WAITING; ++ if (job->user_notification) { ++ mali_scheduler_return_pp_job_to_user(job, ++ job->num_pp_cores_in_virtual); + } + -+ if (timeline && (timeline->tracker_tail == tracker || NULL != tracker->timeline_prev)) { -+ return MALI_TIMELINE_TS_ACTIVE; ++ if (NULL != job->finished_notification) { ++ _mali_osk_notification_delete(job->finished_notification); + } + -+ if (timeline && (MALI_TIMELINE_NO_POINT == tracker->point)) { -+ return MALI_TIMELINE_TS_INIT; -+ } ++ _mali_osk_atomic_term(&job->sub_jobs_completed); ++ _mali_osk_atomic_term(&job->sub_job_errors); ++ _mali_osk_atomic_dec(&session->number_of_pp_jobs); ++ _mali_osk_free(job); + -+ return MALI_TIMELINE_TS_FINISH; ++ _mali_osk_wait_queue_wake_up(session->wait_queue); +} + -+void mali_timeline_debug_print_tracker(struct mali_timeline_tracker *tracker, _mali_osk_print_ctx *print_ctx) ++void mali_pp_job_list_add(struct mali_pp_job *job, _mali_osk_list_t *list) +{ -+ const char *tracker_state = "IWAF"; -+ char state_char = 'I'; -+ char tracker_type[32] = {0}; -+ -+ MALI_DEBUG_ASSERT_POINTER(tracker); ++ struct mali_pp_job *iter; ++ struct mali_pp_job *tmp; + -+ state_char = *(tracker_state + mali_timeline_debug_get_tracker_state(tracker)); -+ _mali_osk_snprintf(tracker_type, sizeof(tracker_type), "%s", timeline_tracker_type_to_string(tracker->type)); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ if (0 != tracker->trigger_ref_count) { -+ if (print_ctx) -+ _mali_osk_ctxprintf(print_ctx, "TL: %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u), fd:%d, fence:(0x%08X)] job:(0x%08X)\n", -+ tracker_type, tracker->point, state_char, tracker->trigger_ref_count, -+ is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0], -+ is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1], -+ is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2], -+ tracker->fence.sync_fd, (unsigned int)(uintptr_t)(tracker->sync_fence), (unsigned int)(uintptr_t)(tracker->job)); -+ else -+ MALI_DEBUG_PRINT(2, ("TL: %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u), fd:%d, fence:(0x%08X)] job:(0x%08X)\n", -+ tracker_type, tracker->point, state_char, tracker->trigger_ref_count, -+ is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0], -+ is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1], -+ is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2], -+ tracker->fence.sync_fd, (unsigned int)(uintptr_t)(tracker->sync_fence), (unsigned int)(uintptr_t)(tracker->job))); -+ } else { -+ if (print_ctx) -+ _mali_osk_ctxprintf(print_ctx, "TL: %s %u %c fd:%d fence:(0x%08X) job:(0x%08X)\n", -+ tracker_type, tracker->point, state_char, -+ tracker->fence.sync_fd, (unsigned int)(uintptr_t)(tracker->sync_fence), (unsigned int)(uintptr_t)(tracker->job)); -+ else -+ MALI_DEBUG_PRINT(2, ("TL: %s %u %c fd:%d fence:(0x%08X) job:(0x%08X)\n", -+ tracker_type, tracker->point, state_char, -+ tracker->fence.sync_fd, (unsigned int)(uintptr_t)(tracker->sync_fence), (unsigned int)(uintptr_t)(tracker->job))); ++ /* Find position in list/queue where job should be added. */ ++ _MALI_OSK_LIST_FOREACHENTRY_REVERSE(iter, tmp, list, ++ struct mali_pp_job, list) { ++ /* job should be started after iter if iter is in progress. */ ++ if (0 < iter->sub_jobs_started) { ++ break; ++ } + ++ /* ++ * job should be started after iter if it has a higher ++ * job id. A span is used to handle job id wrapping. ++ */ ++ if ((mali_pp_job_get_id(job) - ++ mali_pp_job_get_id(iter)) < ++ MALI_SCHEDULER_JOB_ID_SPAN) { ++ break; ++ } + } -+#else -+ if (0 != tracker->trigger_ref_count) { -+ if (print_ctx) -+ _mali_osk_ctxprintf(print_ctx, "TL: %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u)] job:(0x%08X)\n", -+ tracker_type, tracker->point, state_char, tracker->trigger_ref_count, -+ is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0], -+ is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1], -+ is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2], -+ (unsigned int)(uintptr_t)(tracker->job)); -+ else -+ MALI_DEBUG_PRINT(2, ("TL: %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u)] job:(0x%08X)\n", -+ tracker_type, tracker->point, state_char, tracker->trigger_ref_count, -+ is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0], -+ is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1], -+ is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2], -+ (unsigned int)(uintptr_t)(tracker->job))); -+ } else { -+ if (print_ctx) -+ _mali_osk_ctxprintf(print_ctx, "TL: %s %u %c job:(0x%08X)\n", -+ tracker_type, tracker->point, state_char, -+ (unsigned int)(uintptr_t)(tracker->job)); -+ else -+ MALI_DEBUG_PRINT(2, ("TL: %s %u %c job:(0x%08X)\n", -+ tracker_type, tracker->point, state_char, -+ (unsigned int)(uintptr_t)(tracker->job))); + -+ } -+#endif ++ _mali_osk_list_add(&job->list, &iter->list); +} + -+void mali_timeline_debug_print_timeline(struct mali_timeline *timeline, _mali_osk_print_ctx *print_ctx) -+{ -+ struct mali_timeline_tracker *tracker = NULL; + -+ MALI_DEBUG_ASSERT_POINTER(timeline); ++u32 mali_pp_job_get_perf_counter_src0(struct mali_pp_job *job, u32 sub_job) ++{ ++ /* Virtual jobs always use the global job counter (or if there are per sub job counters at all) */ ++ if (mali_pp_job_is_virtual(job) || 0 == job->perf_counter_per_sub_job_count) { ++ return job->uargs.perf_counter_src0; ++ } + -+ tracker = timeline->tracker_tail; -+ while (NULL != tracker) { -+ mali_timeline_debug_print_tracker(tracker, print_ctx); -+ tracker = tracker->timeline_next; ++ /* Use per sub job counter if enabled... */ ++ if (MALI_HW_CORE_NO_COUNTER != job->perf_counter_per_sub_job_src0[sub_job]) { ++ return job->perf_counter_per_sub_job_src0[sub_job]; + } ++ ++ /* ...else default to global job counter */ ++ return job->uargs.perf_counter_src0; +} + -+#if !(LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)) -+void mali_timeline_debug_direct_print_tracker(struct mali_timeline_tracker *tracker) ++u32 mali_pp_job_get_perf_counter_src1(struct mali_pp_job *job, u32 sub_job) +{ -+ const char *tracker_state = "IWAF"; -+ char state_char = 'I'; -+ char tracker_type[32] = {0}; -+ -+ MALI_DEBUG_ASSERT_POINTER(tracker); -+ -+ state_char = *(tracker_state + mali_timeline_debug_get_tracker_state(tracker)); -+ _mali_osk_snprintf(tracker_type, sizeof(tracker_type), "%s", timeline_tracker_type_to_string(tracker->type)); -+ -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ if (0 != tracker->trigger_ref_count) { -+ MALI_PRINT(("TL: %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u), fd:%d, fence:(0x%08X)] job:(0x%08X)\n", -+ tracker_type, tracker->point, state_char, tracker->trigger_ref_count, -+ is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0], -+ is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1], -+ is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2], -+ tracker->fence.sync_fd, tracker->sync_fence, tracker->job)); -+ } else { -+ MALI_PRINT(("TL: %s %u %c fd:%d fence:(0x%08X) job:(0x%08X)\n", -+ tracker_type, tracker->point, state_char, -+ tracker->fence.sync_fd, tracker->sync_fence, tracker->job)); ++ /* Virtual jobs always use the global job counter (or if there are per sub job counters at all) */ ++ if (mali_pp_job_is_virtual(job) || 0 == job->perf_counter_per_sub_job_count) { ++ /* Virtual jobs always use the global job counter */ ++ return job->uargs.perf_counter_src1; + } -+#else -+ if (0 != tracker->trigger_ref_count) { -+ MALI_PRINT(("TL: %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u)] job:(0x%08X)\n", -+ tracker_type, tracker->point, state_char, tracker->trigger_ref_count, -+ is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0], -+ is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1], -+ is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2], -+ tracker->job)); -+ } else { -+ MALI_PRINT(("TL: %s %u %c job:(0x%08X)\n", -+ tracker_type, tracker->point, state_char, -+ tracker->job)); ++ ++ /* Use per sub job counter if enabled... */ ++ if (MALI_HW_CORE_NO_COUNTER != job->perf_counter_per_sub_job_src1[sub_job]) { ++ return job->perf_counter_per_sub_job_src1[sub_job]; + } -+#endif ++ ++ /* ...else default to global job counter */ ++ return job->uargs.perf_counter_src1; +} + -+void mali_timeline_debug_direct_print_timeline(struct mali_timeline *timeline) ++void mali_pp_job_set_pp_counter_global_src0(u32 counter) +{ -+ struct mali_timeline_tracker *tracker = NULL; -+ -+ MALI_DEBUG_ASSERT_POINTER(timeline); -+ -+ tracker = timeline->tracker_tail; -+ while (NULL != tracker) { -+ mali_timeline_debug_direct_print_tracker(tracker); -+ tracker = tracker->timeline_next; -+ } ++ pp_counter_src0 = counter; +} + -+#endif -+ -+void mali_timeline_debug_print_system(struct mali_timeline_system *system, _mali_osk_print_ctx *print_ctx) ++void mali_pp_job_set_pp_counter_global_src1(u32 counter) +{ -+ int i; -+ int num_printed = 0; -+ u32 tid = _mali_osk_get_tid(); -+ -+ MALI_DEBUG_ASSERT_POINTER(system); -+ -+ mali_spinlock_reentrant_wait(system->spinlock, tid); -+ -+ /* Print all timelines */ -+ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { -+ struct mali_timeline *timeline = system->timelines[i]; -+ -+ MALI_DEBUG_ASSERT_POINTER(timeline); ++ pp_counter_src1 = counter; ++} + -+ if (NULL == timeline->tracker_head) continue; -+ if (print_ctx) -+ _mali_osk_ctxprintf(print_ctx, "TL: Timeline %s:\n", -+ timeline_id_to_string((enum mali_timeline_id)i)); -+ else -+ MALI_DEBUG_PRINT(2, ("TL: Timeline %s: oldest (%u) next(%u)\n", -+ timeline_id_to_string((enum mali_timeline_id)i), timeline->point_oldest, timeline->point_next)); ++void mali_pp_job_set_pp_counter_sub_job_src0(u32 sub_job, u32 counter) ++{ ++ MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS); + -+ mali_timeline_debug_print_timeline(timeline, print_ctx); -+ num_printed++; ++ if (MALI_HW_CORE_NO_COUNTER == pp_counter_per_sub_job_src0[sub_job]) { ++ /* increment count since existing counter was disabled */ ++ _mali_osk_atomic_inc(&pp_counter_per_sub_job_count); + } + -+ if (0 == num_printed) { -+ if (print_ctx) -+ _mali_osk_ctxprintf(print_ctx, "TL: All timelines empty\n"); -+ else -+ MALI_DEBUG_PRINT(2, ("TL: All timelines empty\n")); ++ if (MALI_HW_CORE_NO_COUNTER == counter) { ++ /* decrement count since new counter is disabled */ ++ _mali_osk_atomic_dec(&pp_counter_per_sub_job_count); + } + -+ mali_spinlock_reentrant_signal(system->spinlock, tid); -+} ++ /* PS: A change from MALI_HW_CORE_NO_COUNTER to MALI_HW_CORE_NO_COUNTER will inc and dec, result will be 0 change */ + -+#endif /* defined(MALI_TIMELINE_DEBUG_FUNCTIONS) */ ++ pp_counter_per_sub_job_src0[sub_job] = counter; ++} + -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) -+void mali_timeline_dma_fence_callback(void *pp_job_ptr) ++void mali_pp_job_set_pp_counter_sub_job_src1(u32 sub_job, u32 counter) +{ -+ struct mali_timeline_system *system; -+ struct mali_timeline_waiter *waiter; -+ struct mali_timeline_tracker *tracker; -+ struct mali_pp_job *pp_job = (struct mali_pp_job *)pp_job_ptr; -+ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; -+ u32 tid = _mali_osk_get_tid(); -+ mali_bool is_aborting = MALI_FALSE; -+ -+ MALI_DEBUG_ASSERT_POINTER(pp_job); -+ -+ tracker = &pp_job->tracker; -+ MALI_DEBUG_ASSERT_POINTER(tracker); ++ MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS); + -+ system = tracker->system; -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT_POINTER(system->session); ++ if (MALI_HW_CORE_NO_COUNTER == pp_counter_per_sub_job_src1[sub_job]) { ++ /* increment count since existing counter was disabled */ ++ _mali_osk_atomic_inc(&pp_counter_per_sub_job_count); ++ } + -+ mali_spinlock_reentrant_wait(system->spinlock, tid); ++ if (MALI_HW_CORE_NO_COUNTER == counter) { ++ /* decrement count since new counter is disabled */ ++ _mali_osk_atomic_dec(&pp_counter_per_sub_job_count); ++ } + -+ waiter = tracker->waiter_dma_fence; -+ MALI_DEBUG_ASSERT_POINTER(waiter); ++ /* PS: A change from MALI_HW_CORE_NO_COUNTER to MALI_HW_CORE_NO_COUNTER will inc and dec, result will be 0 change */ + -+ schedule_mask |= mali_timeline_system_release_waiter(system, waiter); ++ pp_counter_per_sub_job_src1[sub_job] = counter; ++} + -+ is_aborting = system->session->is_aborting; ++u32 mali_pp_job_get_pp_counter_global_src0(void) ++{ ++ return pp_counter_src0; ++} + -+ /* If aborting, wake up sleepers that are waiting for dma fence callbacks to complete. */ -+ if (is_aborting) { -+ _mali_osk_wait_queue_wake_up(system->wait_queue); -+ } ++u32 mali_pp_job_get_pp_counter_global_src1(void) ++{ ++ return pp_counter_src1; ++} + -+ mali_spinlock_reentrant_signal(system->spinlock, tid); ++u32 mali_pp_job_get_pp_counter_sub_job_src0(u32 sub_job) ++{ ++ MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS); ++ return pp_counter_per_sub_job_src0[sub_job]; ++} + -+ if (!is_aborting) { -+ mali_executor_schedule_from_mask(schedule_mask, MALI_TRUE); -+ } ++u32 mali_pp_job_get_pp_counter_sub_job_src1(u32 sub_job) ++{ ++ MALI_DEBUG_ASSERT(sub_job < _MALI_PP_MAX_SUB_JOBS); ++ return pp_counter_per_sub_job_src1[sub_job]; +} -+#endif -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_timeline.h b/drivers/gpu/arm/mali400/mali/common/mali_timeline.h +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_pp_job.h b/drivers/gpu/arm/mali400/mali/common/mali_pp_job.h new file mode 100644 -index 000000000..3e8bfc8fb +index 000000000..d0331f398 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_timeline.h -@@ -0,0 +1,587 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_pp_job.h +@@ -0,0 +1,594 @@ +/* -+ * Copyright (C) 2013-2018 ARM Limited. All rights reserved. ++ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -302064,591 +305805,598 @@ index 000000000..3e8bfc8fb + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_TIMELINE_H__ -+#define __MALI_TIMELINE_H__ ++#ifndef __MALI_PP_JOB_H__ ++#define __MALI_PP_JOB_H__ + +#include "mali_osk.h" -+#include "mali_ukk.h" ++#include "mali_osk_list.h" ++#include "mali_uk_types.h" +#include "mali_session.h" +#include "mali_kernel_common.h" -+#include "mali_spinlock_reentrant.h" -+#include "mali_sync.h" -+#include "mali_scheduler_types.h" -+#include ++#include "regs/mali_200_regs.h" ++#include "mali_kernel_core.h" ++#include "mali_dlbu.h" ++#include "mali_timeline.h" ++#include "mali_scheduler.h" ++#include "mali_executor.h" ++#if defined(CONFIG_DMA_SHARED_BUFFER) && !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) ++#include "linux/mali_memory_dma_buf.h" ++#endif ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) ++#include "linux/mali_dma_fence.h" ++#endif ++ ++typedef enum pp_job_status { ++ MALI_NO_SWAP_IN, ++ MALI_SWAP_IN_FAIL, ++ MALI_SWAP_IN_SUCC, ++} pp_job_status; + +/** -+ * Soft job timeout. ++ * This structure represents a PP job, including all sub jobs. + * -+ * Soft jobs have to be signaled as complete after activation. Normally this is done by user space, -+ * but in order to guarantee that every soft job is completed, we also have a timer. ++ * The PP job object itself is not protected by any single lock, ++ * but relies on other locks instead (scheduler, executor and timeline lock). ++ * Think of the job object as moving between these sub systems through-out ++ * its lifetime. Different part of the PP job struct is used by different ++ * subsystems. Accessor functions ensure that correct lock is taken. ++ * Do NOT access any data members directly from outside this module! + */ -+#define MALI_TIMELINE_TIMEOUT_HZ ((unsigned long) (HZ * 3 / 2)) /* 1500 ms. */ ++struct mali_pp_job { ++ /* ++ * These members are typically only set at creation, ++ * and only read later on. ++ * They do not require any lock protection. ++ */ ++ _mali_uk_pp_start_job_s uargs; /**< Arguments from user space */ ++ struct mali_session_data *session; /**< Session which submitted this job */ ++ u32 pid; /**< Process ID of submitting process */ ++ u32 tid; /**< Thread ID of submitting thread */ ++ u32 id; /**< Identifier for this job in kernel space (sequential numbering) */ ++ u32 cache_order; /**< Cache order used for L2 cache flushing (sequential numbering) */ ++ struct mali_timeline_tracker tracker; /**< Timeline tracker for this job */ ++ _mali_osk_notification_t *finished_notification; /**< Notification sent back to userspace on job complete */ ++ u32 perf_counter_per_sub_job_count; /**< Number of values in the two arrays which is != MALI_HW_CORE_NO_COUNTER */ ++ u32 perf_counter_per_sub_job_src0[_MALI_PP_MAX_SUB_JOBS]; /**< Per sub job counters src0 */ ++ u32 perf_counter_per_sub_job_src1[_MALI_PP_MAX_SUB_JOBS]; /**< Per sub job counters src1 */ ++ u32 sub_jobs_num; /**< Number of subjobs; set to 1 for Mali-450 if DLBU is used, otherwise equals number of PP cores */ + -+/** -+ * Timeline type. -+ */ -+typedef enum mali_timeline_id { -+ MALI_TIMELINE_GP = MALI_UK_TIMELINE_GP, /**< GP job timeline. */ -+ MALI_TIMELINE_PP = MALI_UK_TIMELINE_PP, /**< PP job timeline. */ -+ MALI_TIMELINE_SOFT = MALI_UK_TIMELINE_SOFT, /**< Soft job timeline. */ -+ MALI_TIMELINE_MAX = MALI_UK_TIMELINE_MAX -+} mali_timeline_id; ++ pp_job_status swap_status; /**< Used to track each PP job swap status, if fail, we need to drop them in scheduler part */ ++ mali_bool user_notification; /**< When we deferred delete PP job, we need to judge if we need to send job finish notification to user space */ ++ u32 num_pp_cores_in_virtual; /**< How many PP cores we have when job finished */ + -+/** -+ * Used by trackers that should not be added to a timeline (@ref mali_timeline_system_add_tracker). -+ */ -+#define MALI_TIMELINE_NONE MALI_TIMELINE_MAX ++ /* ++ * These members are used by both scheduler and executor. ++ * They are "protected" by atomic operations. ++ */ ++ _mali_osk_atomic_t sub_jobs_completed; /**< Number of completed sub-jobs in this superjob */ ++ _mali_osk_atomic_t sub_job_errors; /**< Bitfield with errors (errors for each single sub-job is or'ed together) */ + -+/** -+ * Tracker type. -+ */ -+typedef enum mali_timeline_tracker_type { -+ MALI_TIMELINE_TRACKER_GP = 0, /**< Tracker used by GP jobs. */ -+ MALI_TIMELINE_TRACKER_PP = 1, /**< Tracker used by PP jobs. */ -+ MALI_TIMELINE_TRACKER_SOFT = 2, /**< Tracker used by soft jobs. */ -+ MALI_TIMELINE_TRACKER_WAIT = 3, /**< Tracker used for fence wait. */ -+ MALI_TIMELINE_TRACKER_SYNC = 4, /**< Tracker used for sync fence. */ -+ MALI_TIMELINE_TRACKER_MAX = 5, -+} mali_timeline_tracker_type; ++ /* ++ * These members are used by scheduler, but only when no one else ++ * knows about this job object but the working function. ++ * No lock is thus needed for these. ++ */ ++ u32 *memory_cookies; /**< Memory cookies attached to job */ + -+/** -+ * Tracker activation error. -+ */ -+typedef u32 mali_timeline_activation_error; -+#define MALI_TIMELINE_ACTIVATION_ERROR_NONE 0 -+#define MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT (1<<1) -+#define MALI_TIMELINE_ACTIVATION_ERROR_FATAL_BIT (1<<0) ++ /* ++ * These members are used by the scheduler, ++ * protected by scheduler lock ++ */ ++ _mali_osk_list_t list; /**< Used to link jobs together in the scheduler queue */ ++ _mali_osk_list_t session_fb_lookup_list; /**< Used to link jobs together from the same frame builder in the session */ + -+/** -+ * Type used to represent a point on a timeline. -+ */ -+typedef u32 mali_timeline_point; ++ u32 sub_jobs_started; /**< Total number of sub-jobs started (always started in ascending order) */ + -+/** -+ * Used to represent that no point on a timeline. -+ */ -+#define MALI_TIMELINE_NO_POINT ((mali_timeline_point) 0) ++ /* ++ * Set by executor/group on job completion, read by scheduler when ++ * returning job to user. Hold executor lock when setting, ++ * no lock needed when reading ++ */ ++ u32 perf_counter_value0[_MALI_PP_MAX_SUB_JOBS]; /**< Value of performance counter 0 (to be returned to user space), one for each sub job */ ++ u32 perf_counter_value1[_MALI_PP_MAX_SUB_JOBS]; /**< Value of performance counter 1 (to be returned to user space), one for each sub job */ + -+/** -+ * The maximum span of points on a timeline. A timeline will be considered full if the difference -+ * between the oldest and newest points is equal or larger to this value. -+ */ -+#define MALI_TIMELINE_MAX_POINT_SPAN 65536 ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) ++ struct mali_dma_fence_context dma_fence_context; /**< The mali dma fence context to record dma fence waiters that this job wait for */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ struct dma_fence *rendered_dma_fence; /**< the new dma fence link to this job */ ++#else ++ struct fence *rendered_dma_fence; /**< the new dma fence link to this job */ ++#endif ++#endif ++}; + -+/** -+ * Magic value used to assert on validity of trackers. -+ */ -+#define MALI_TIMELINE_TRACKER_MAGIC 0xabcdabcd ++void mali_pp_job_initialize(void); ++void mali_pp_job_terminate(void); + -+struct mali_timeline; -+struct mali_timeline_waiter; -+struct mali_timeline_tracker; ++struct mali_pp_job *mali_pp_job_create(struct mali_session_data *session, _mali_uk_pp_start_job_s *uargs, u32 id); ++void mali_pp_job_delete(struct mali_pp_job *job); + -+/** -+ * Timeline fence. -+ */ -+struct mali_timeline_fence { -+ mali_timeline_point points[MALI_TIMELINE_MAX]; /**< For each timeline, a point or MALI_TIMELINE_NO_POINT. */ -+ s32 sync_fd; /**< A file descriptor representing a sync fence, or -1. */ -+}; ++u32 mali_pp_job_get_perf_counter_src0(struct mali_pp_job *job, u32 sub_job); ++u32 mali_pp_job_get_perf_counter_src1(struct mali_pp_job *job, u32 sub_job); + -+/** -+ * Timeline system. -+ * -+ * The Timeline system has a set of timelines associated with a session. -+ */ -+struct mali_timeline_system { -+ struct mali_spinlock_reentrant *spinlock; /**< Spin lock protecting the timeline system */ -+ struct mali_timeline *timelines[MALI_TIMELINE_MAX]; /**< The timelines in this system */ ++void mali_pp_job_set_pp_counter_global_src0(u32 counter); ++void mali_pp_job_set_pp_counter_global_src1(u32 counter); ++void mali_pp_job_set_pp_counter_sub_job_src0(u32 sub_job, u32 counter); ++void mali_pp_job_set_pp_counter_sub_job_src1(u32 sub_job, u32 counter); + -+ /* Single-linked list of unused waiter objects. Uses the tracker_next field in tracker. */ -+ struct mali_timeline_waiter *waiter_empty_list; ++u32 mali_pp_job_get_pp_counter_global_src0(void); ++u32 mali_pp_job_get_pp_counter_global_src1(void); ++u32 mali_pp_job_get_pp_counter_sub_job_src0(u32 sub_job); ++u32 mali_pp_job_get_pp_counter_sub_job_src1(u32 sub_job); + -+ struct mali_session_data *session; /**< Session that owns this system. */ ++MALI_STATIC_INLINE u32 mali_pp_job_get_id(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return (NULL == job) ? 0 : job->id; ++} + -+ mali_bool timer_enabled; /**< Set to MALI_TRUE if soft job timer should be enabled, MALI_FALSE if not. */ ++MALI_STATIC_INLINE void mali_pp_job_set_cache_order(struct mali_pp_job *job, ++ u32 cache_order) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++ job->cache_order = cache_order; ++} + -+ _mali_osk_wait_queue_t *wait_queue; /**< Wait queue. */ ++MALI_STATIC_INLINE u32 mali_pp_job_get_cache_order(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return (NULL == job) ? 0 : job->cache_order; ++} + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ struct sync_timeline *signaled_sync_tl; /**< Special sync timeline used to create pre-signaled sync fences */ -+#else -+ struct mali_internal_sync_timeline *signaled_sync_tl; /**< Special sync timeline used to create pre-signaled sync fences */ -+#endif -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ -+}; ++MALI_STATIC_INLINE u64 mali_pp_job_get_user_id(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.user_job_ptr; ++} + -+/** -+ * Timeline. Each Timeline system will have MALI_TIMELINE_MAX timelines. -+ */ -+struct mali_timeline { -+ mali_timeline_point point_next; /**< The next available point. */ -+ mali_timeline_point point_oldest; /**< The oldest point not released. */ ++MALI_STATIC_INLINE u32 mali_pp_job_get_frame_builder_id(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.frame_builder_id; ++} + -+ /* Double-linked list of trackers. Sorted in ascending order by tracker->time_number with -+ * tail pointing to the tracker with the oldest time. */ -+ struct mali_timeline_tracker *tracker_head; -+ struct mali_timeline_tracker *tracker_tail; ++MALI_STATIC_INLINE u32 mali_pp_job_get_flush_id(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.flush_id; ++} + -+ /* Double-linked list of waiters. Sorted in ascending order by waiter->time_number_wait -+ * with tail pointing to the waiter with oldest wait time. */ -+ struct mali_timeline_waiter *waiter_head; -+ struct mali_timeline_waiter *waiter_tail; ++MALI_STATIC_INLINE u32 mali_pp_job_get_pid(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->pid; ++} + -+ struct mali_timeline_system *system; /**< Timeline system this timeline belongs to. */ -+ enum mali_timeline_id id; /**< Timeline type. */ ++MALI_STATIC_INLINE u32 mali_pp_job_get_tid(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->tid; ++} + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ struct sync_timeline *sync_tl; /**< Sync timeline that corresponds to this timeline. */ ++MALI_STATIC_INLINE u32 *mali_pp_job_get_frame_registers(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.frame_registers; ++} ++ ++MALI_STATIC_INLINE u32 *mali_pp_job_get_dlbu_registers(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.dlbu_registers; ++} ++ ++MALI_STATIC_INLINE mali_bool mali_pp_job_is_virtual(struct mali_pp_job *job) ++{ ++#if (defined(CONFIG_MALI450) || defined(CONFIG_MALI470)) ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return (0 == job->uargs.num_cores) ? MALI_TRUE : MALI_FALSE; +#else -+ struct mali_internal_sync_timeline *sync_tl; ++ return MALI_FALSE; +#endif -+ mali_bool destroyed; -+ struct mali_spinlock_reentrant *spinlock; /**< Spin lock protecting the timeline system */ -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++} + -+ /* The following fields are used to time out soft job trackers. */ -+ _mali_osk_wq_delayed_work_t *delayed_work; -+ mali_bool timer_active; -+}; ++MALI_STATIC_INLINE u32 mali_pp_job_get_addr_frame(struct mali_pp_job *job, u32 sub_job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); + -+/** -+ * Timeline waiter. -+ */ -+struct mali_timeline_waiter { -+ mali_timeline_point point; /**< Point on timeline we are waiting for to be released. */ -+ struct mali_timeline_tracker *tracker; /**< Tracker that is waiting. */ ++ if (mali_pp_job_is_virtual(job)) { ++ return MALI_DLBU_VIRT_ADDR; ++ } else if (0 == sub_job) { ++ return job->uargs.frame_registers[MALI200_REG_ADDR_FRAME / sizeof(u32)]; ++ } else if (sub_job < _MALI_PP_MAX_SUB_JOBS) { ++ return job->uargs.frame_registers_addr_frame[sub_job - 1]; ++ } + -+ struct mali_timeline_waiter *timeline_next; /**< Next waiter on timeline's waiter list. */ -+ struct mali_timeline_waiter *timeline_prev; /**< Previous waiter on timeline's waiter list. */ ++ return 0; ++} + -+ struct mali_timeline_waiter *tracker_next; /**< Next waiter on tracker's waiter list. */ -+}; ++MALI_STATIC_INLINE u32 mali_pp_job_get_addr_stack(struct mali_pp_job *job, u32 sub_job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); + -+/** -+ * Timeline tracker. -+ */ -+struct mali_timeline_tracker { -+ MALI_DEBUG_CODE(u32 magic); /**< Should always be MALI_TIMELINE_TRACKER_MAGIC for a valid tracker. */ ++ if (0 == sub_job) { ++ return job->uargs.frame_registers[MALI200_REG_ADDR_STACK / sizeof(u32)]; ++ } else if (sub_job < _MALI_PP_MAX_SUB_JOBS) { ++ return job->uargs.frame_registers_addr_stack[sub_job - 1]; ++ } + -+ mali_timeline_point point; /**< Point on timeline for this tracker */ ++ return 0; ++} + -+ struct mali_timeline_tracker *timeline_next; /**< Next tracker on timeline's tracker list */ -+ struct mali_timeline_tracker *timeline_prev; /**< Previous tracker on timeline's tracker list */ ++void mali_pp_job_list_add(struct mali_pp_job *job, _mali_osk_list_t *list); + -+ u32 trigger_ref_count; /**< When zero tracker will be activated */ -+ mali_timeline_activation_error activation_error; /**< Activation error. */ -+ struct mali_timeline_fence fence; /**< Fence used to create this tracker */ ++MALI_STATIC_INLINE void mali_pp_job_list_addtail(struct mali_pp_job *job, ++ _mali_osk_list_t *list) ++{ ++ _mali_osk_list_addtail(&job->list, list); ++} + -+ /* Single-linked list of waiters. Sorted in order of insertions with -+ * tail pointing to first waiter. */ -+ struct mali_timeline_waiter *waiter_head; -+ struct mali_timeline_waiter *waiter_tail; ++MALI_STATIC_INLINE void mali_pp_job_list_move(struct mali_pp_job *job, ++ _mali_osk_list_t *list) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++ MALI_DEBUG_ASSERT(!_mali_osk_list_empty(&job->list)); ++ _mali_osk_list_move(&job->list, list); ++} + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ /* These are only used if the tracker is waiting on a sync fence. */ -+ struct mali_timeline_waiter *waiter_sync; /**< A direct pointer to timeline waiter representing sync fence. */ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ struct sync_fence_waiter sync_fence_waiter; /**< Used to connect sync fence and tracker in sync fence wait callback. */ -+ struct sync_fence *sync_fence; /**< The sync fence this tracker is waiting on. */ -+#else -+ struct mali_internal_sync_fence_waiter sync_fence_waiter; /**< Used to connect sync fence and tracker in sync fence wait callback. */ -+ struct mali_internal_sync_fence *sync_fence; /**< The sync fence this tracker is waiting on. */ -+#endif -+ _mali_osk_list_t sync_fence_cancel_list; /**< List node used to cancel sync fence waiters. */ -+ _mali_osk_list_t sync_fence_signal_list; /** < List node used to singal sync fence callback function. */ ++MALI_STATIC_INLINE void mali_pp_job_list_remove(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++ _mali_osk_list_delinit(&job->list); ++} + -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++MALI_STATIC_INLINE u32 *mali_pp_job_get_wb0_registers(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.wb0_registers; ++} + -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) -+ struct mali_timeline_waiter *waiter_dma_fence; /**< A direct pointer to timeline waiter representing dma fence. */ -+#endif ++MALI_STATIC_INLINE u32 *mali_pp_job_get_wb1_registers(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.wb1_registers; ++} + -+ struct mali_timeline_system *system; /**< Timeline system. */ -+ struct mali_timeline *timeline; /**< Timeline, or NULL if not on a timeline. */ -+ enum mali_timeline_tracker_type type; /**< Type of tracker. */ -+ void *job; /**< Owner of tracker. */ ++MALI_STATIC_INLINE u32 *mali_pp_job_get_wb2_registers(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.wb2_registers; ++} + -+ /* The following fields are used to time out soft job trackers. */ -+ unsigned long os_tick_create; -+ unsigned long os_tick_activate; -+ mali_bool timer_active; -+}; ++MALI_STATIC_INLINE u32 mali_pp_job_get_wb0_source_addr(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)]; ++} + -+extern _mali_osk_atomic_t gp_tracker_count; -+extern _mali_osk_atomic_t phy_pp_tracker_count; -+extern _mali_osk_atomic_t virt_pp_tracker_count; ++MALI_STATIC_INLINE u32 mali_pp_job_get_wb1_source_addr(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)]; ++} + -+/** -+ * What follows is a set of functions to check the state of a timeline and to determine where on a -+ * timeline a given point is. Most of these checks will translate the timeline so the oldest point -+ * on the timeline is aligned with zero. Remember that all of these calculation are done on -+ * unsigned integers. -+ * -+ * The following example illustrates the three different states a point can be in. The timeline has -+ * been translated to put the oldest point at zero: -+ * -+ * -+ * -+ * [ point is in forbidden zone ] -+ * 64k wide -+ * MALI_TIMELINE_MAX_POINT_SPAN -+ * -+ * [ point is on timeline ) ( point is released ] -+ * -+ * 0--------------------------##############################--------------------2^32 - 1 -+ * ^ ^ -+ * \ | -+ * oldest point on timeline | -+ * \ -+ * next point on timeline -+ */ ++MALI_STATIC_INLINE u32 mali_pp_job_get_wb2_source_addr(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_ADDR / sizeof(u32)]; ++} + -+/** -+ * Compare two timeline points -+ * -+ * Returns true if a is after b, false if a is before or equal to b. -+ * -+ * This funcion ignores MALI_TIMELINE_MAX_POINT_SPAN. Wrapping is supported and -+ * the result will be correct if the points is less then UINT_MAX/2 apart. -+ * -+ * @param a Point on timeline -+ * @param b Point on timeline -+ * @return MALI_TRUE if a is after b -+ */ -+MALI_STATIC_INLINE mali_bool mali_timeline_point_after(mali_timeline_point a, mali_timeline_point b) ++MALI_STATIC_INLINE void mali_pp_job_disable_wb0(struct mali_pp_job *job) +{ -+ return 0 > ((s32)b) - ((s32)a); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0; +} + -+/** -+ * Check if a point is on timeline. A point is on a timeline if it is greater than, or equal to, -+ * the oldest point, and less than the next point. -+ * -+ * @param timeline Timeline. -+ * @param point Point on timeline. -+ * @return MALI_TRUE if point is on timeline, MALI_FALSE if not. -+ */ -+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_on(struct mali_timeline *timeline, mali_timeline_point point) ++MALI_STATIC_INLINE void mali_pp_job_disable_wb1(struct mali_pp_job *job) +{ -+ MALI_DEBUG_ASSERT_POINTER(timeline); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0; ++} + -+ return (point - timeline->point_oldest) < (timeline->point_next - timeline->point_oldest); ++MALI_STATIC_INLINE void mali_pp_job_disable_wb2(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] = 0; +} + -+/** -+ * Check if a point has been released. A point is released if it is older than the oldest point on -+ * the timeline, newer than the next point, and also not in the forbidden zone. -+ * -+ * @param timeline Timeline. -+ * @param point Point on timeline. -+ * @return MALI_TRUE if point has been release, MALI_FALSE if not. -+ */ -+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_released(struct mali_timeline *timeline, mali_timeline_point point) ++MALI_STATIC_INLINE mali_bool mali_pp_job_all_writeback_unit_disabled(struct mali_pp_job *job) +{ -+ mali_timeline_point point_normalized; -+ mali_timeline_point next_normalized; ++ MALI_DEBUG_ASSERT_POINTER(job); + -+ MALI_DEBUG_ASSERT_POINTER(timeline); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point); ++ if (job->uargs.wb0_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] || ++ job->uargs.wb1_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] || ++ job->uargs.wb2_registers[MALI200_REG_ADDR_WB_SOURCE_SELECT] ++ ) { ++ /* At least one output unit active */ ++ return MALI_FALSE; ++ } + -+ point_normalized = point - timeline->point_oldest; -+ next_normalized = timeline->point_next - timeline->point_oldest; ++ /* All outputs are disabled - we can abort the job */ ++ return MALI_TRUE; ++} + -+ return point_normalized > (next_normalized + MALI_TIMELINE_MAX_POINT_SPAN); ++MALI_STATIC_INLINE void mali_pp_job_fb_lookup_add(struct mali_pp_job *job) ++{ ++ u32 fb_lookup_id; ++ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++ ++ fb_lookup_id = MALI_PP_JOB_FB_LOOKUP_LIST_MASK & job->uargs.frame_builder_id; ++ ++ MALI_DEBUG_ASSERT(MALI_PP_JOB_FB_LOOKUP_LIST_SIZE > fb_lookup_id); ++ ++ _mali_osk_list_addtail(&job->session_fb_lookup_list, ++ &job->session->pp_job_fb_lookup_list[fb_lookup_id]); +} + -+/** -+ * Check if the tracker that the point relate to has been released. A point is released if the tracker is not on the timeline. -+ * @param timeline Timeline. -+ * @param point Point on timeline. -+ * @return MALI_TRUE if the tracker has been release, MALI_FALSE if not. -+ */ -+MALI_STATIC_INLINE mali_bool mali_timeline_is_tracker_released(struct mali_timeline *timeline, mali_timeline_point point) ++MALI_STATIC_INLINE void mali_pp_job_fb_lookup_remove(struct mali_pp_job *job) +{ -+ struct mali_timeline_tracker *tracker; ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++ _mali_osk_list_delinit(&job->session_fb_lookup_list); ++} + -+ MALI_DEBUG_ASSERT_POINTER(timeline); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point); ++MALI_STATIC_INLINE struct mali_session_data *mali_pp_job_get_session(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->session; ++} + -+ tracker = timeline->tracker_tail; ++MALI_STATIC_INLINE mali_bool mali_pp_job_has_started_sub_jobs(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++ return (0 < job->sub_jobs_started) ? MALI_TRUE : MALI_FALSE; ++} + -+ while (NULL != tracker) { -+ if (point == tracker->point) -+ return MALI_FALSE; -+ tracker = tracker->timeline_next; ++MALI_STATIC_INLINE mali_bool mali_pp_job_has_unstarted_sub_jobs(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++ return (job->sub_jobs_started < job->sub_jobs_num) ? MALI_TRUE : MALI_FALSE; ++} ++ ++/* Function used when we are terminating a session with jobs. Return TRUE if it has a rendering job. ++ Makes sure that no new subjobs are started. */ ++MALI_STATIC_INLINE void mali_pp_job_mark_unstarted_failed(struct mali_pp_job *job) ++{ ++ u32 jobs_remaining; ++ u32 i; ++ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++ ++ jobs_remaining = job->sub_jobs_num - job->sub_jobs_started; ++ job->sub_jobs_started += jobs_remaining; ++ ++ /* Not the most optimal way, but this is only used in error cases */ ++ for (i = 0; i < jobs_remaining; i++) { ++ _mali_osk_atomic_inc(&job->sub_jobs_completed); ++ _mali_osk_atomic_inc(&job->sub_job_errors); + } ++} + -+ return MALI_TRUE; ++MALI_STATIC_INLINE mali_bool mali_pp_job_is_complete(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return (job->sub_jobs_num == ++ _mali_osk_atomic_read(&job->sub_jobs_completed)) ? ++ MALI_TRUE : MALI_FALSE; +} + -+/** -+ * Check if a point is valid. A point is valid if is on the timeline or has been released. -+ * -+ * @param timeline Timeline. -+ * @param point Point on timeline. -+ * @return MALI_TRUE if point is valid, MALI_FALSE if not. -+ */ -+MALI_STATIC_INLINE mali_bool mali_timeline_is_point_valid(struct mali_timeline *timeline, mali_timeline_point point) ++MALI_STATIC_INLINE u32 mali_pp_job_get_first_unstarted_sub_job(struct mali_pp_job *job) +{ -+ MALI_DEBUG_ASSERT_POINTER(timeline); -+ return mali_timeline_is_point_on(timeline, point) || mali_timeline_is_point_released(timeline, point); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++ return job->sub_jobs_started; +} + -+/** -+ * Check if timeline is empty (has no points on it). A timeline is empty if next == oldest. -+ * -+ * @param timeline Timeline. -+ * @return MALI_TRUE if timeline is empty, MALI_FALSE if not. -+ */ -+MALI_STATIC_INLINE mali_bool mali_timeline_is_empty(struct mali_timeline *timeline) ++MALI_STATIC_INLINE u32 mali_pp_job_get_sub_job_count(struct mali_pp_job *job) +{ -+ MALI_DEBUG_ASSERT_POINTER(timeline); -+ return timeline->point_next == timeline->point_oldest; ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->sub_jobs_num; +} + -+/** -+ * Check if timeline is full. A valid timeline cannot span more than 64k points (@ref -+ * MALI_TIMELINE_MAX_POINT_SPAN). -+ * -+ * @param timeline Timeline. -+ * @return MALI_TRUE if timeline is full, MALI_FALSE if not. -+ */ -+MALI_STATIC_INLINE mali_bool mali_timeline_is_full(struct mali_timeline *timeline) ++MALI_STATIC_INLINE u32 mali_pp_job_unstarted_sub_job_count(struct mali_pp_job *job) +{ -+ MALI_DEBUG_ASSERT_POINTER(timeline); -+ return MALI_TIMELINE_MAX_POINT_SPAN <= (timeline->point_next - timeline->point_oldest); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++ MALI_DEBUG_ASSERT(job->sub_jobs_num >= job->sub_jobs_started); ++ return (job->sub_jobs_num - job->sub_jobs_started); +} + -+/** -+ * Create a new timeline system. -+ * -+ * @param session The session this timeline system will belong to. -+ * @return New timeline system. -+ */ -+struct mali_timeline_system *mali_timeline_system_create(struct mali_session_data *session); ++MALI_STATIC_INLINE u32 mali_pp_job_num_memory_cookies(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.num_memory_cookies; ++} + -+/** -+ * Abort timeline system. -+ * -+ * This will release all pending waiters in the timeline system causing all trackers to be -+ * activated. -+ * -+ * @param system Timeline system to abort all jobs from. -+ */ -+void mali_timeline_system_abort(struct mali_timeline_system *system); ++MALI_STATIC_INLINE u32 mali_pp_job_get_memory_cookie( ++ struct mali_pp_job *job, u32 index) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT(index < job->uargs.num_memory_cookies); ++ MALI_DEBUG_ASSERT_POINTER(job->memory_cookies); ++ return job->memory_cookies[index]; ++} + -+/** -+ * Destroy an empty timeline system. -+ * -+ * @note @ref mali_timeline_system_abort() should be called prior to this function. -+ * -+ * @param system Timeline system to destroy. -+ */ -+void mali_timeline_system_destroy(struct mali_timeline_system *system); ++MALI_STATIC_INLINE mali_bool mali_pp_job_needs_dma_buf_mapping(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); + -+/** -+ * Stop the soft job timer. -+ * -+ * @param system Timeline system -+ */ -+void mali_timeline_system_stop_timer(struct mali_timeline_system *system); ++ if (0 < job->uargs.num_memory_cookies) { ++ return MALI_TRUE; ++ } + -+/** -+ * Add a tracker to a timeline system and optionally also on a timeline. -+ * -+ * Once added to the timeline system, the tracker is guaranteed to be activated. The tracker can be -+ * activated before this function returns. Thus, it is also possible that the tracker is released -+ * before this function returns, depending on the tracker type. -+ * -+ * @note Tracker must be initialized (@ref mali_timeline_tracker_init) before being added to the -+ * timeline system. -+ * -+ * @param system Timeline system the tracker will be added to. -+ * @param tracker The tracker to be added. -+ * @param timeline_id Id of the timeline the tracker will be added to, or -+ * MALI_TIMELINE_NONE if it should not be added on a timeline. -+ * @return Point on timeline identifying this tracker, or MALI_TIMELINE_NO_POINT if not on timeline. -+ */ -+mali_timeline_point mali_timeline_system_add_tracker(struct mali_timeline_system *system, -+ struct mali_timeline_tracker *tracker, -+ enum mali_timeline_id timeline_id); ++ return MALI_FALSE; ++} + -+/** -+ * Get latest point on timeline. -+ * -+ * @param system Timeline system. -+ * @param timeline_id Id of timeline to get latest point from. -+ * @return Latest point on timeline, or MALI_TIMELINE_NO_POINT if the timeline is empty. -+ */ -+mali_timeline_point mali_timeline_system_get_latest_point(struct mali_timeline_system *system, -+ enum mali_timeline_id timeline_id); ++MALI_STATIC_INLINE void mali_pp_job_mark_sub_job_started(struct mali_pp_job *job, u32 sub_job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); + -+/** -+ * Initialize tracker. -+ * -+ * Must be called before tracker is added to timeline system (@ref mali_timeline_system_add_tracker). -+ * -+ * @param tracker Tracker to initialize. -+ * @param type Type of tracker. -+ * @param fence Fence used to set up dependencies for tracker. -+ * @param job Pointer to job struct this tracker is associated with. -+ */ -+void mali_timeline_tracker_init(struct mali_timeline_tracker *tracker, -+ mali_timeline_tracker_type type, -+ struct mali_timeline_fence *fence, -+ void *job); ++ /* Assert that we are marking the "first unstarted sub job" as started */ ++ MALI_DEBUG_ASSERT(job->sub_jobs_started == sub_job); + -+/** -+ * Grab trigger ref count on tracker. -+ * -+ * This will prevent tracker from being activated until the trigger ref count reaches zero. -+ * -+ * @note Tracker must have been initialized (@ref mali_timeline_tracker_init). -+ * -+ * @param system Timeline system. -+ * @param tracker Tracker. -+ */ -+void mali_timeline_system_tracker_get(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker); ++ job->sub_jobs_started++; ++} + -+/** -+ * Release trigger ref count on tracker. -+ * -+ * If the trigger ref count reaches zero, the tracker will be activated. -+ * -+ * @param system Timeline system. -+ * @param tracker Tracker. -+ * @param activation_error Error bitmask if activated with error, or MALI_TIMELINE_ACTIVATION_ERROR_NONE if no error. -+ * @return Scheduling bitmask. -+ */ -+mali_scheduler_mask mali_timeline_system_tracker_put(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker, mali_timeline_activation_error activation_error); ++MALI_STATIC_INLINE void mali_pp_job_mark_sub_job_completed(struct mali_pp_job *job, mali_bool success) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); + -+/** -+ * Release a tracker from the timeline system. -+ * -+ * This is used to signal that the job being tracker is finished, either due to normal circumstances -+ * (job complete/abort) or due to a timeout. -+ * -+ * We may need to schedule some subsystems after a tracker has been released and the returned -+ * bitmask will tell us if it is necessary. If the return value is non-zero, this value needs to be -+ * sent as an input parameter to @ref mali_scheduler_schedule_from_mask() to do the scheduling. -+ * -+ * @note Tracker must have been activated before being released. -+ * @warning Not calling @ref mali_scheduler_schedule_from_mask() after releasing a tracker can lead -+ * to a deadlock. -+ * -+ * @param tracker Tracker being released. -+ * @return Scheduling bitmask. -+ */ -+mali_scheduler_mask mali_timeline_tracker_release(struct mali_timeline_tracker *tracker); ++ _mali_osk_atomic_inc(&job->sub_jobs_completed); ++ if (MALI_FALSE == success) { ++ _mali_osk_atomic_inc(&job->sub_job_errors); ++ } ++} + -+MALI_STATIC_INLINE mali_bool mali_timeline_tracker_activation_error( -+ struct mali_timeline_tracker *tracker) ++MALI_STATIC_INLINE mali_bool mali_pp_job_was_success(struct mali_pp_job *job) +{ -+ MALI_DEBUG_ASSERT_POINTER(tracker); -+ return (MALI_TIMELINE_ACTIVATION_ERROR_FATAL_BIT & -+ tracker->activation_error) ? MALI_TRUE : MALI_FALSE; ++ MALI_DEBUG_ASSERT_POINTER(job); ++ if (0 == _mali_osk_atomic_read(&job->sub_job_errors)) { ++ return MALI_TRUE; ++ } ++ return MALI_FALSE; +} + -+/** -+ * Copy data from a UK fence to a Timeline fence. -+ * -+ * @param fence Timeline fence. -+ * @param uk_fence UK fence. -+ */ -+void mali_timeline_fence_copy_uk_fence(struct mali_timeline_fence *fence, _mali_uk_fence_t *uk_fence); ++MALI_STATIC_INLINE mali_bool mali_pp_job_use_no_notification( ++ struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return (job->uargs.flags & _MALI_PP_JOB_FLAG_NO_NOTIFICATION) ? ++ MALI_TRUE : MALI_FALSE; ++} + -+_mali_osk_errcode_t mali_timeline_initialize(void); ++MALI_STATIC_INLINE mali_bool mali_pp_job_is_pilot_job(struct mali_pp_job *job) ++{ ++ /* ++ * A pilot job is currently identified as jobs which ++ * require no callback notification. ++ */ ++ return mali_pp_job_use_no_notification(job); ++} + -+void mali_timeline_terminate(void); ++MALI_STATIC_INLINE _mali_osk_notification_t * ++mali_pp_job_get_finished_notification(struct mali_pp_job *job) ++{ ++ _mali_osk_notification_t *notification; + -+MALI_STATIC_INLINE mali_bool mali_timeline_has_gp_job(void) ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_POINTER(job->finished_notification); ++ ++ notification = job->finished_notification; ++ job->finished_notification = NULL; ++ ++ return notification; ++} ++ ++MALI_STATIC_INLINE mali_bool mali_pp_job_is_window_surface( ++ struct mali_pp_job *job) +{ -+ return 0 < _mali_osk_atomic_read(&gp_tracker_count); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return (job->uargs.flags & _MALI_PP_JOB_FLAG_IS_WINDOW_SURFACE) ++ ? MALI_TRUE : MALI_FALSE; +} + -+MALI_STATIC_INLINE mali_bool mali_timeline_has_physical_pp_job(void) ++MALI_STATIC_INLINE mali_bool mali_pp_job_is_protected_job(struct mali_pp_job *job) +{ -+ return 0 < _mali_osk_atomic_read(&phy_pp_tracker_count); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return (job->uargs.flags & _MALI_PP_JOB_FLAG_PROTECTED) ++ ? MALI_TRUE : MALI_FALSE; +} + -+MALI_STATIC_INLINE mali_bool mali_timeline_has_virtual_pp_job(void) ++MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_flag(struct mali_pp_job *job) +{ -+ return 0 < _mali_osk_atomic_read(&virt_pp_tracker_count); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->uargs.perf_counter_flag; +} + -+#if defined(DEBUG) -+#define MALI_TIMELINE_DEBUG_FUNCTIONS -+#endif /* DEBUG */ -+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS) ++MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_value0(struct mali_pp_job *job, u32 sub_job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->perf_counter_value0[sub_job]; ++} + -+/** -+ * Tracker state. Used for debug printing. -+ */ -+typedef enum mali_timeline_tracker_state { -+ MALI_TIMELINE_TS_INIT = 0, -+ MALI_TIMELINE_TS_WAITING = 1, -+ MALI_TIMELINE_TS_ACTIVE = 2, -+ MALI_TIMELINE_TS_FINISH = 3, -+} mali_timeline_tracker_state; ++MALI_STATIC_INLINE u32 mali_pp_job_get_perf_counter_value1(struct mali_pp_job *job, u32 sub_job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return job->perf_counter_value1[sub_job]; ++} + -+/** -+ * Get tracker state. -+ * -+ * @param tracker Tracker to check. -+ * @return State of tracker. -+ */ -+mali_timeline_tracker_state mali_timeline_debug_get_tracker_state(struct mali_timeline_tracker *tracker); ++MALI_STATIC_INLINE void mali_pp_job_set_perf_counter_value0(struct mali_pp_job *job, u32 sub_job, u32 value) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ job->perf_counter_value0[sub_job] = value; ++} + -+/** -+ * Print debug information about tracker. -+ * -+ * @param tracker Tracker to print. -+ */ -+void mali_timeline_debug_print_tracker(struct mali_timeline_tracker *tracker, _mali_osk_print_ctx *print_ctx); ++MALI_STATIC_INLINE void mali_pp_job_set_perf_counter_value1(struct mali_pp_job *job, u32 sub_job, u32 value) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_EXECUTOR_LOCK_HELD(); ++ job->perf_counter_value1[sub_job] = value; ++} ++ ++MALI_STATIC_INLINE _mali_osk_errcode_t mali_pp_job_check(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ if (mali_pp_job_is_virtual(job) && job->sub_jobs_num != 1) { ++ return _MALI_OSK_ERR_FAULT; ++ } ++ return _MALI_OSK_ERR_OK; ++} + +/** -+ * Print debug information about timeline. ++ * Returns MALI_TRUE if this job has more than two sub jobs and all sub jobs are unstarted. + * -+ * @param timeline Timeline to print. ++ * @param job Job to check. ++ * @return MALI_TRUE if job has more than two sub jobs and all sub jobs are unstarted, MALI_FALSE if not. + */ -+void mali_timeline_debug_print_timeline(struct mali_timeline *timeline, _mali_osk_print_ctx *print_ctx); ++MALI_STATIC_INLINE mali_bool mali_pp_job_is_large_and_unstarted(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++ MALI_DEBUG_ASSERT(!mali_pp_job_is_virtual(job)); + -+#if !(LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)) -+void mali_timeline_debug_direct_print_tracker(struct mali_timeline_tracker *tracker); -+void mali_timeline_debug_direct_print_timeline(struct mali_timeline *timeline); -+#endif ++ return (0 == job->sub_jobs_started && 2 < job->sub_jobs_num); ++} + +/** -+ * Print debug information about timeline system. ++ * Get PP job's Timeline tracker. + * -+ * @param system Timeline system to print. ++ * @param job PP job. ++ * @return Pointer to Timeline tracker for the job. + */ -+void mali_timeline_debug_print_system(struct mali_timeline_system *system, _mali_osk_print_ctx *print_ctx); ++MALI_STATIC_INLINE struct mali_timeline_tracker *mali_pp_job_get_tracker(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return &(job->tracker); ++} + -+#endif /* defined(MALI_TIMELINE_DEBUG_FUNCTIONS) */ ++MALI_STATIC_INLINE u32 *mali_pp_job_get_timeline_point_ptr( ++ struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ return (u32 __user *)(uintptr_t)job->uargs.timeline_point_ptr; ++} + -+#if defined(CONFIG_MALI_DMA_BUF_FENCE) -+/** -+ * The timeline dma fence callback when dma fence signal. -+ * -+ * @param pp_job_ptr The pointer to pp job that link to the signaled dma fence. -+ */ -+void mali_timeline_dma_fence_callback(void *pp_job_ptr); -+#endif + -+#endif /* __MALI_TIMELINE_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_timeline_fence_wait.c b/drivers/gpu/arm/mali400/mali/common/mali_timeline_fence_wait.c ++#endif /* __MALI_PP_JOB_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_scheduler.c b/drivers/gpu/arm/mali400/mali/common/mali_scheduler.c new file mode 100644 -index 000000000..1ab13f509 +index 000000000..b5e6cfddb --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_timeline_fence_wait.c -@@ -0,0 +1,218 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_scheduler.c +@@ -0,0 +1,1548 @@ +/* -+ * Copyright (C) 2013-2014, 2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -302656,1825 +306404,1553 @@ index 000000000..1ab13f509 + * A copy of the licence is included with the program, and can also be obtained from Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ -+#include -+#include "mali_timeline_fence_wait.h" -+#include "mali_osk.h" ++ ++#include "mali_scheduler.h" +#include "mali_kernel_common.h" -+#include "mali_spinlock_reentrant.h" ++#include "mali_osk.h" ++#include "mali_osk_profiling.h" ++#include "mali_kernel_utilization.h" ++#include "mali_timeline.h" ++#include "mali_gp_job.h" ++#include "mali_pp_job.h" ++#include "mali_executor.h" ++#include "mali_group.h" ++#include ++#include ++#include "mali_pm_metrics.h" + -+/** -+ * Allocate a fence waiter tracker. -+ * -+ * @return New fence waiter if successful, NULL if not. ++#if defined(CONFIG_DMA_SHARED_BUFFER) ++#include "mali_memory_dma_buf.h" ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) ++#include "mali_dma_fence.h" ++#include ++#endif ++#endif ++ ++#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS) ++#include ++#include ++#endif ++/* ++ * ---------- static defines/constants ---------- + */ -+static struct mali_timeline_fence_wait_tracker *mali_timeline_fence_wait_tracker_alloc(void) -+{ -+ return (struct mali_timeline_fence_wait_tracker *) _mali_osk_calloc(1, sizeof(struct mali_timeline_fence_wait_tracker)); -+} + -+/** -+ * Free fence waiter tracker. -+ * -+ * @param wait Fence wait tracker to free. ++/* ++ * If dma_buf with map on demand is used, we defer job queue ++ * if in atomic context, since both might sleep. + */ -+static void mali_timeline_fence_wait_tracker_free(struct mali_timeline_fence_wait_tracker *wait) -+{ -+ MALI_DEBUG_ASSERT_POINTER(wait); -+ _mali_osk_atomic_term(&wait->refcount); -+ _mali_osk_free(wait); -+} ++#if defined(CONFIG_DMA_SHARED_BUFFER) ++#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) ++#define MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE 1 ++#endif ++#endif + -+/** -+ * Check if fence wait tracker has been activated. Used as a wait queue condition. -+ * -+ * @param data Fence waiter. -+ * @return MALI_TRUE if tracker has been activated, MALI_FALSE if not. ++ ++/* ++ * ---------- global variables (exported due to inline functions) ---------- + */ -+static mali_bool mali_timeline_fence_wait_tracker_is_activated(void *data) -+{ -+ struct mali_timeline_fence_wait_tracker *wait; + -+ wait = (struct mali_timeline_fence_wait_tracker *) data; -+ MALI_DEBUG_ASSERT_POINTER(wait); ++/* Lock protecting this module */ ++_mali_osk_spinlock_irq_t *mali_scheduler_lock_obj = NULL; + -+ return wait->activated; -+} ++/* Queue of jobs to be executed on the GP group */ ++struct mali_scheduler_job_queue job_queue_gp; + -+/** -+ * Check if fence has been signaled. -+ * -+ * @param system Timeline system. -+ * @param fence Timeline fence. -+ * @return MALI_TRUE if fence is signaled, MALI_FALSE if not. ++/* Queue of PP jobs */ ++struct mali_scheduler_job_queue job_queue_pp; ++ ++_mali_osk_atomic_t mali_job_id_autonumber; ++_mali_osk_atomic_t mali_job_cache_order_autonumber; ++/* ++ * ---------- static variables ---------- + */ -+static mali_bool mali_timeline_fence_wait_check_status(struct mali_timeline_system *system, struct mali_timeline_fence *fence) -+{ -+ int i; -+ u32 tid = _mali_osk_get_tid(); -+ mali_bool ret = MALI_TRUE; -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ struct sync_fence *sync_fence = NULL; -+#else -+ struct mali_internal_sync_fence *sync_fence = NULL; -+#endif ++ ++_mali_osk_wq_work_t *scheduler_wq_pp_job_delete = NULL; ++_mali_osk_spinlock_irq_t *scheduler_pp_job_delete_lock = NULL; ++static _MALI_OSK_LIST_HEAD_STATIC_INIT(scheduler_pp_job_deletion_queue); ++ ++#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) ++static _mali_osk_wq_work_t *scheduler_wq_pp_job_queue = NULL; ++static _mali_osk_spinlock_irq_t *scheduler_pp_job_queue_lock = NULL; ++static _MALI_OSK_LIST_HEAD_STATIC_INIT(scheduler_pp_job_queue_list); +#endif + -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT_POINTER(fence); ++/* ++ * ---------- Forward declaration of static functions ---------- ++ */ + -+ mali_spinlock_reentrant_wait(system->spinlock, tid); ++static mali_timeline_point mali_scheduler_submit_gp_job( ++ struct mali_session_data *session, struct mali_gp_job *job); ++static _mali_osk_errcode_t mali_scheduler_submit_pp_job( ++ struct mali_session_data *session, struct mali_pp_job *job, mali_timeline_point *point); + -+ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { -+ struct mali_timeline *timeline; -+ mali_timeline_point point; ++static mali_bool mali_scheduler_queue_gp_job(struct mali_gp_job *job); ++static mali_bool mali_scheduler_queue_pp_job(struct mali_pp_job *job); + -+ point = fence->points[i]; ++static void mali_scheduler_return_gp_job_to_user(struct mali_gp_job *job, ++ mali_bool success); + -+ if (likely(MALI_TIMELINE_NO_POINT == point)) { -+ /* Fence contains no point on this timeline. */ -+ continue; -+ } ++static void mali_scheduler_deferred_pp_job_delete(struct mali_pp_job *job); ++void mali_scheduler_do_pp_job_delete(void *arg); + -+ timeline = system->timelines[i]; -+ MALI_DEBUG_ASSERT_POINTER(timeline); ++#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) ++static void mali_scheduler_deferred_pp_job_queue(struct mali_pp_job *job); ++static void mali_scheduler_do_pp_job_queue(void *arg); ++#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */ + -+ if (unlikely(!mali_timeline_is_point_valid(timeline, point))) { -+ MALI_PRINT_ERROR(("Mali Timeline: point %d is not valid (oldest=%d, next=%d)\n", point, timeline->point_oldest, timeline->point_next)); -+ } ++/* ++ * ---------- Actual implementation ---------- ++ */ + -+ if (!mali_timeline_is_point_released(timeline, point)) { -+ ret = MALI_FALSE; -+ goto exit; -+ } ++_mali_osk_errcode_t mali_scheduler_initialize(void) ++{ ++ _mali_osk_atomic_init(&mali_job_id_autonumber, 0); ++ _mali_osk_atomic_init(&mali_job_cache_order_autonumber, 0); ++ ++ _MALI_OSK_INIT_LIST_HEAD(&job_queue_gp.normal_pri); ++ _MALI_OSK_INIT_LIST_HEAD(&job_queue_gp.high_pri); ++ job_queue_gp.depth = 0; ++ job_queue_gp.big_job_num = 0; ++ ++ _MALI_OSK_INIT_LIST_HEAD(&job_queue_pp.normal_pri); ++ _MALI_OSK_INIT_LIST_HEAD(&job_queue_pp.high_pri); ++ job_queue_pp.depth = 0; ++ job_queue_pp.big_job_num = 0; ++ ++ mali_scheduler_lock_obj = _mali_osk_spinlock_irq_init( ++ _MALI_OSK_LOCKFLAG_ORDERED, ++ _MALI_OSK_LOCK_ORDER_SCHEDULER); ++ if (NULL == mali_scheduler_lock_obj) { ++ mali_scheduler_terminate(); + } + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ if (-1 != fence->sync_fd) { -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ sync_fence = sync_fence_fdget(fence->sync_fd); -+#else -+ sync_fence = mali_internal_sync_fence_fdget(fence->sync_fd); -+#endif -+ if (likely(NULL != sync_fence)) { -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) -+ if (0 == sync_fence->status) { -+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) -+ if (0 < atomic_read(&sync_fence->status)) { -+#else -+ if (0 == sync_fence->fence->ops->signaled(sync_fence->fence)) { -+#endif -+ ret = MALI_FALSE; ++ scheduler_wq_pp_job_delete = _mali_osk_wq_create_work( ++ mali_scheduler_do_pp_job_delete, NULL); ++ if (NULL == scheduler_wq_pp_job_delete) { ++ mali_scheduler_terminate(); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ } else { -+ ret = MALI_TRUE; -+ } -+ } else { -+ MALI_PRINT_ERROR(("Mali Timeline: failed to get sync fence from fd %d\n", fence->sync_fd)); -+ } ++ scheduler_pp_job_delete_lock = _mali_osk_spinlock_irq_init( ++ _MALI_OSK_LOCKFLAG_ORDERED, ++ _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED); ++ if (NULL == scheduler_pp_job_delete_lock) { ++ mali_scheduler_terminate(); ++ return _MALI_OSK_ERR_FAULT; + } -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ + -+exit: -+ mali_spinlock_reentrant_signal(system->spinlock, tid); ++#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) ++ scheduler_wq_pp_job_queue = _mali_osk_wq_create_work( ++ mali_scheduler_do_pp_job_queue, NULL); ++ if (NULL == scheduler_wq_pp_job_queue) { ++ mali_scheduler_terminate(); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ if (NULL != sync_fence) { -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ sync_fence_put(sync_fence); -+#else -+ fput(sync_fence->file); -+#endif ++ scheduler_pp_job_queue_lock = _mali_osk_spinlock_irq_init( ++ _MALI_OSK_LOCKFLAG_ORDERED, ++ _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED); ++ if (NULL == scheduler_pp_job_queue_lock) { ++ mali_scheduler_terminate(); ++ return _MALI_OSK_ERR_FAULT; + } -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */ + -+ return ret; ++ return _MALI_OSK_ERR_OK; +} + -+mali_bool mali_timeline_fence_wait(struct mali_timeline_system *system, struct mali_timeline_fence *fence, u32 timeout) ++void mali_scheduler_terminate(void) +{ -+ struct mali_timeline_fence_wait_tracker *wait; -+ mali_timeline_point point; -+ mali_bool ret; ++#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) ++ if (NULL != scheduler_pp_job_queue_lock) { ++ _mali_osk_spinlock_irq_term(scheduler_pp_job_queue_lock); ++ scheduler_pp_job_queue_lock = NULL; ++ } + -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT_POINTER(fence); ++ if (NULL != scheduler_wq_pp_job_queue) { ++ _mali_osk_wq_delete_work(scheduler_wq_pp_job_queue); ++ scheduler_wq_pp_job_queue = NULL; ++ } ++#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */ + -+ MALI_DEBUG_PRINT(4, ("Mali Timeline: wait on fence\n")); ++ if (NULL != scheduler_pp_job_delete_lock) { ++ _mali_osk_spinlock_irq_term(scheduler_pp_job_delete_lock); ++ scheduler_pp_job_delete_lock = NULL; ++ } + -+ if (MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY == timeout) { -+ return mali_timeline_fence_wait_check_status(system, fence); ++ if (NULL != scheduler_wq_pp_job_delete) { ++ _mali_osk_wq_delete_work(scheduler_wq_pp_job_delete); ++ scheduler_wq_pp_job_delete = NULL; + } + -+ wait = mali_timeline_fence_wait_tracker_alloc(); -+ if (unlikely(NULL == wait)) { -+ MALI_PRINT_ERROR(("Mali Timeline: failed to allocate data for fence wait\n")); -+ return MALI_FALSE; ++ if (NULL != mali_scheduler_lock_obj) { ++ _mali_osk_spinlock_irq_term(mali_scheduler_lock_obj); ++ mali_scheduler_lock_obj = NULL; + } + -+ wait->activated = MALI_FALSE; -+ wait->system = system; ++ _mali_osk_atomic_term(&mali_job_cache_order_autonumber); ++ _mali_osk_atomic_term(&mali_job_id_autonumber); ++} + -+ /* Initialize refcount to two references. The reference first will be released by this -+ * function after the wait is over. The second reference will be released when the tracker -+ * is activated. */ -+ _mali_osk_atomic_init(&wait->refcount, 2); ++u32 mali_scheduler_job_physical_head_count(mali_bool gpu_mode_is_secure) ++{ ++ /* ++ * Count how many physical sub jobs are present from the head of queue ++ * until the first virtual job is present. ++ * Early out when we have reached maximum number of PP cores (8) ++ */ ++ u32 count = 0; ++ struct mali_pp_job *job; ++ struct mali_pp_job *temp; + -+ /* Add tracker to timeline system, but not to a timeline. */ -+ mali_timeline_tracker_init(&wait->tracker, MALI_TIMELINE_TRACKER_WAIT, fence, wait); -+ point = mali_timeline_system_add_tracker(system, &wait->tracker, MALI_TIMELINE_NONE); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT == point); -+ MALI_IGNORE(point); ++ /* Check for partially started normal pri jobs */ ++ if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) { ++ MALI_DEBUG_ASSERT(0 < job_queue_pp.depth); + -+ /* Wait for the tracker to be activated or time out. */ -+ if (MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER == timeout) { -+ _mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_fence_wait_tracker_is_activated, (void *) wait); -+ } else { -+ _mali_osk_wait_queue_wait_event_timeout(system->wait_queue, mali_timeline_fence_wait_tracker_is_activated, (void *) wait, timeout); ++ job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next, ++ struct mali_pp_job, list); ++ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ ++ if (MALI_TRUE == mali_pp_job_has_started_sub_jobs(job)) { ++ /* ++ * Remember; virtual jobs can't be queued and started ++ * at the same time, so this must be a physical job ++ */ ++ if ((MALI_FALSE == gpu_mode_is_secure && MALI_FALSE == mali_pp_job_is_protected_job(job)) ++ || (MALI_TRUE == gpu_mode_is_secure && MALI_TRUE == mali_pp_job_is_protected_job(job))) { ++ ++ count += mali_pp_job_unstarted_sub_job_count(job); ++ if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <= count) { ++ return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; ++ } ++ } ++ } + } + -+ ret = wait->activated; ++ _MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.high_pri, ++ struct mali_pp_job, list) { ++ if ((MALI_FALSE == mali_pp_job_is_virtual(job)) ++ && ((MALI_FALSE == gpu_mode_is_secure && MALI_FALSE == mali_pp_job_is_protected_job(job)) ++ || (MALI_TRUE == gpu_mode_is_secure && MALI_TRUE == mali_pp_job_is_protected_job(job)))) { + -+ if (0 == _mali_osk_atomic_dec_return(&wait->refcount)) { -+ mali_timeline_fence_wait_tracker_free(wait); ++ count += mali_pp_job_unstarted_sub_job_count(job); ++ if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <= count) { ++ return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; ++ } ++ } else { ++ /* Came across a virtual job, so stop counting */ ++ return count; ++ } + } + -+ return ret; ++ _MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.normal_pri, ++ struct mali_pp_job, list) { ++ if ((MALI_FALSE == mali_pp_job_is_virtual(job)) ++ && (MALI_FALSE == mali_pp_job_has_started_sub_jobs(job)) ++ && ((MALI_FALSE == gpu_mode_is_secure && MALI_FALSE == mali_pp_job_is_protected_job(job)) ++ || (MALI_TRUE == gpu_mode_is_secure && MALI_TRUE == mali_pp_job_is_protected_job(job)))) { ++ ++ count += mali_pp_job_unstarted_sub_job_count(job); ++ if (MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS <= count) { ++ return MALI_MAX_NUMBER_OF_PHYSICAL_PP_GROUPS; ++ } ++ } else { ++ /* Came across a virtual job, so stop counting */ ++ return count; ++ } ++ } ++ return count; +} + -+void mali_timeline_fence_wait_activate(struct mali_timeline_fence_wait_tracker *wait) ++struct mali_pp_job *mali_scheduler_job_pp_next(void) +{ -+ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; ++ struct mali_pp_job *job; ++ struct mali_pp_job *temp; + -+ MALI_DEBUG_ASSERT_POINTER(wait); -+ MALI_DEBUG_ASSERT_POINTER(wait->system); ++ MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj); + -+ MALI_DEBUG_PRINT(4, ("Mali Timeline: activation for fence wait tracker\n")); ++ /* Check for partially started normal pri jobs */ ++ if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) { ++ MALI_DEBUG_ASSERT(0 < job_queue_pp.depth); + -+ MALI_DEBUG_ASSERT(MALI_FALSE == wait->activated); -+ wait->activated = MALI_TRUE; ++ job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next, ++ struct mali_pp_job, list); + -+ _mali_osk_wait_queue_wake_up(wait->system->wait_queue); ++ MALI_DEBUG_ASSERT_POINTER(job); + -+ /* Nothing can wait on this tracker, so nothing to schedule after release. */ -+ schedule_mask = mali_timeline_tracker_release(&wait->tracker); -+ MALI_DEBUG_ASSERT(MALI_SCHEDULER_MASK_EMPTY == schedule_mask); -+ MALI_IGNORE(schedule_mask); ++ if (MALI_TRUE == mali_pp_job_has_started_sub_jobs(job)) { ++ return job; ++ } ++ } + -+ if (0 == _mali_osk_atomic_dec_return(&wait->refcount)) { -+ mali_timeline_fence_wait_tracker_free(wait); ++ _MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.high_pri, ++ struct mali_pp_job, list) { ++ return job; ++ } ++ ++ _MALI_OSK_LIST_FOREACHENTRY(job, temp, &job_queue_pp.normal_pri, ++ struct mali_pp_job, list) { ++ return job; + } ++ ++ return NULL; +} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_timeline_fence_wait.h b/drivers/gpu/arm/mali400/mali/common/mali_timeline_fence_wait.h -new file mode 100644 -index 000000000..9da12baee ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_timeline_fence_wait.h -@@ -0,0 +1,67 @@ -+/* -+ * Copyright (C) 2013, 2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ + -+/** -+ * @file mali_timeline_fence_wait.h -+ * -+ * This file contains functions used to wait until a Timeline fence is signaled. -+ */ ++mali_bool mali_scheduler_job_next_is_virtual(void) ++{ ++ struct mali_pp_job *job; + -+#ifndef __MALI_TIMELINE_FENCE_WAIT_H__ -+#define __MALI_TIMELINE_FENCE_WAIT_H__ ++ job = mali_scheduler_job_pp_virtual_peek(); ++ if (NULL != job) { ++ MALI_DEBUG_ASSERT(mali_pp_job_is_virtual(job)); + -+#include "mali_osk.h" -+#include "mali_timeline.h" ++ return MALI_TRUE; ++ } + -+/** -+ * If used as the timeout argument in @ref mali_timeline_fence_wait, a timer is not used and the -+ * function only returns when the fence is signaled. -+ */ -+#define MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER ((u32) -1) ++ return MALI_FALSE; ++} + -+/** -+ * If used as the timeout argument in @ref mali_timeline_fence_wait, the function will return -+ * immediately with the current state of the fence. -+ */ -+#define MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY 0 ++struct mali_gp_job *mali_scheduler_job_gp_get(void) ++{ ++ _mali_osk_list_t *queue; ++ struct mali_gp_job *job = NULL; + -+/** -+ * Fence wait tracker. -+ * -+ * The fence wait tracker is added to the Timeline system with the fence we are waiting on as a -+ * dependency. We will then perform a blocking wait, possibly with a timeout, until the tracker is -+ * activated, which happens when the fence is signaled. -+ */ -+struct mali_timeline_fence_wait_tracker { -+ mali_bool activated; /**< MALI_TRUE if the tracker has been activated, MALI_FALSE if not. */ -+ _mali_osk_atomic_t refcount; /**< Reference count. */ -+ struct mali_timeline_system *system; /**< Timeline system. */ -+ struct mali_timeline_tracker tracker; /**< Timeline tracker. */ -+}; ++ MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj); ++ MALI_DEBUG_ASSERT(0 < job_queue_gp.depth); ++ MALI_DEBUG_ASSERT(job_queue_gp.big_job_num <= job_queue_gp.depth); + -+/** -+ * Wait for a fence to be signaled, or timeout is reached. -+ * -+ * @param system Timeline system. -+ * @param fence Fence to wait on. -+ * @param timeout Timeout in ms, or MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER or -+ * MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY. -+ * @return MALI_TRUE if signaled, MALI_FALSE if timed out. -+ */ -+mali_bool mali_timeline_fence_wait(struct mali_timeline_system *system, struct mali_timeline_fence *fence, u32 timeout); ++ if (!_mali_osk_list_empty(&job_queue_gp.high_pri)) { ++ queue = &job_queue_gp.high_pri; ++ } else { ++ queue = &job_queue_gp.normal_pri; ++ MALI_DEBUG_ASSERT(!_mali_osk_list_empty(queue)); ++ } + -+/** -+ * Used by the Timeline system to activate a fence wait tracker. -+ * -+ * @param fence_wait_tracker Fence waiter tracker. -+ */ -+void mali_timeline_fence_wait_activate(struct mali_timeline_fence_wait_tracker *fence_wait_tracker); ++ job = _MALI_OSK_LIST_ENTRY(queue->next, struct mali_gp_job, list); + -+#endif /* __MALI_TIMELINE_FENCE_WAIT_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_timeline_sync_fence.c b/drivers/gpu/arm/mali400/mali/common/mali_timeline_sync_fence.c -new file mode 100644 -index 000000000..bb7f6a04e ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_timeline_sync_fence.c -@@ -0,0 +1,179 @@ -+/* -+ * Copyright (C) 2013, 2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+#include -+#include "mali_timeline_sync_fence.h" ++ MALI_DEBUG_ASSERT_POINTER(job); + -+#include "mali_osk.h" -+#include "mali_kernel_common.h" -+#include "mali_sync.h" ++ mali_gp_job_list_remove(job); ++ job_queue_gp.depth--; ++ if (job->big_job) { ++ job_queue_gp.big_job_num --; ++ if (job_queue_gp.big_job_num < MALI_MAX_PENDING_BIG_JOB) { ++ /* wake up process */ ++ wait_queue_head_t *queue = mali_session_get_wait_queue(); ++ wake_up(queue); ++ } ++ } ++ return job; ++} + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+/** -+ * Creates a sync fence tracker and a sync fence. Adds sync fence tracker to Timeline system and -+ * returns sync fence. The sync fence will be signaled when the sync fence tracker is activated. -+ * -+ * @param timeline Timeline. -+ * @param point Point on timeline. -+ * @return Sync fence that will be signaled when tracker is activated. -+ */ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+static struct sync_fence *mali_timeline_sync_fence_create_and_add_tracker(struct mali_timeline *timeline, mali_timeline_point point) -+#else -+static struct mali_internal_sync_fence *mali_timeline_sync_fence_create_and_add_tracker(struct mali_timeline *timeline, mali_timeline_point point) -+#endif ++struct mali_pp_job *mali_scheduler_job_pp_physical_peek(void) +{ -+ struct mali_timeline_sync_fence_tracker *sync_fence_tracker; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ struct sync_fence *sync_fence; -+#else -+ struct mali_internal_sync_fence *sync_fence; -+#endif -+ struct mali_timeline_fence fence; ++ struct mali_pp_job *job = NULL; ++ struct mali_pp_job *tmp_job = NULL; + -+ MALI_DEBUG_ASSERT_POINTER(timeline); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point); ++ MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj); + -+ /* Allocate sync fence tracker. */ -+ sync_fence_tracker = _mali_osk_calloc(1, sizeof(struct mali_timeline_sync_fence_tracker)); -+ if (NULL == sync_fence_tracker) { -+ MALI_PRINT_ERROR(("Mali Timeline: sync_fence_tracker allocation failed\n")); -+ return NULL; -+ } ++ /* ++ * For PP jobs we favour partially started jobs in normal ++ * priority queue over unstarted jobs in high priority queue ++ */ + -+ /* Create sync flag. */ -+ MALI_DEBUG_ASSERT_POINTER(timeline->sync_tl); -+ sync_fence_tracker->flag = mali_sync_flag_create(timeline->sync_tl, point); -+ if (NULL == sync_fence_tracker->flag) { -+ MALI_PRINT_ERROR(("Mali Timeline: sync_flag creation failed\n")); -+ _mali_osk_free(sync_fence_tracker); -+ return NULL; -+ } ++ if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) { ++ MALI_DEBUG_ASSERT(0 < job_queue_pp.depth); + -+ /* Create sync fence from sync flag. */ -+ sync_fence = mali_sync_flag_create_fence(sync_fence_tracker->flag); -+ if (NULL == sync_fence) { -+ MALI_PRINT_ERROR(("Mali Timeline: sync_fence creation failed\n")); -+ mali_sync_flag_put(sync_fence_tracker->flag); -+ _mali_osk_free(sync_fence_tracker); -+ return NULL; ++ tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next, ++ struct mali_pp_job, list); ++ MALI_DEBUG_ASSERT(NULL != tmp_job); ++ ++ if (MALI_FALSE == mali_pp_job_is_virtual(tmp_job)) { ++ job = tmp_job; ++ } + } + -+ /* Setup fence for tracker. */ -+ _mali_osk_memset(&fence, 0, sizeof(struct mali_timeline_fence)); -+ fence.sync_fd = -1; -+ fence.points[timeline->id] = point; ++ if (NULL == job || ++ MALI_FALSE == mali_pp_job_has_started_sub_jobs(job)) { ++ /* ++ * There isn't a partially started job in normal queue, so ++ * look in high priority queue. ++ */ ++ if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) { ++ MALI_DEBUG_ASSERT(0 < job_queue_pp.depth); + -+ /* Finally, add the tracker to Timeline system. */ -+ mali_timeline_tracker_init(&sync_fence_tracker->tracker, MALI_TIMELINE_TRACKER_SYNC, &fence, sync_fence_tracker); -+ point = mali_timeline_system_add_tracker(timeline->system, &sync_fence_tracker->tracker, MALI_TIMELINE_NONE); -+ MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT == point); ++ tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.high_pri.next, ++ struct mali_pp_job, list); ++ MALI_DEBUG_ASSERT(NULL != tmp_job); + -+ return sync_fence; ++ if (MALI_FALSE == mali_pp_job_is_virtual(tmp_job)) { ++ job = tmp_job; ++ } ++ } ++ } ++ ++ return job; +} + -+s32 mali_timeline_sync_fence_create(struct mali_timeline_system *system, struct mali_timeline_fence *fence) ++struct mali_pp_job *mali_scheduler_job_pp_virtual_peek(void) +{ -+ u32 i; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ struct sync_fence *sync_fence_acc = NULL; -+#else -+ struct mali_internal_sync_fence *sync_fence_acc = NULL; -+#endif -+ MALI_DEBUG_ASSERT_POINTER(system); -+ MALI_DEBUG_ASSERT_POINTER(fence); ++ struct mali_pp_job *job = NULL; ++ struct mali_pp_job *tmp_job = NULL; + -+ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { -+ struct mali_timeline *timeline; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ struct sync_fence *sync_fence; -+#else -+ struct mali_internal_sync_fence *sync_fence; -+#endif -+ if (MALI_TIMELINE_NO_POINT == fence->points[i]) continue; ++ MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj); + -+ timeline = system->timelines[i]; -+ MALI_DEBUG_ASSERT_POINTER(timeline); ++ if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) { ++ MALI_DEBUG_ASSERT(0 < job_queue_pp.depth); + -+ sync_fence = mali_timeline_sync_fence_create_and_add_tracker(timeline, fence->points[i]); -+ if (NULL == sync_fence) goto error; ++ tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.high_pri.next, ++ struct mali_pp_job, list); + -+ if (NULL != sync_fence_acc) { -+ /* Merge sync fences. */ -+ sync_fence_acc = mali_sync_fence_merge(sync_fence_acc, sync_fence); -+ if (NULL == sync_fence_acc) goto error; -+ } else { -+ /* This was the first sync fence created. */ -+ sync_fence_acc = sync_fence; ++ if (MALI_TRUE == mali_pp_job_is_virtual(tmp_job)) { ++ job = tmp_job; + } + } + -+ if (-1 != fence->sync_fd) { -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ struct sync_fence *sync_fence; -+ sync_fence = sync_fence_fdget(fence->sync_fd); -+#else -+ struct mali_internal_sync_fence *sync_fence; -+ sync_fence = mali_internal_sync_fence_fdget(fence->sync_fd); -+#endif ++ if (NULL == job) { ++ if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) { ++ MALI_DEBUG_ASSERT(0 < job_queue_pp.depth); + -+ if (NULL == sync_fence) goto error; ++ tmp_job = _MALI_OSK_LIST_ENTRY(job_queue_pp.normal_pri.next, ++ struct mali_pp_job, list); + -+ if (NULL != sync_fence_acc) { -+ sync_fence_acc = mali_sync_fence_merge(sync_fence_acc, sync_fence); -+ if (NULL == sync_fence_acc) goto error; -+ } else { -+ sync_fence_acc = sync_fence; ++ if (MALI_TRUE == mali_pp_job_is_virtual(tmp_job)) { ++ job = tmp_job; ++ } + } + } + -+ if (NULL == sync_fence_acc) { -+ MALI_DEBUG_ASSERT_POINTER(system->signaled_sync_tl); ++ return job; ++} + -+ /* There was nothing to wait on, so return an already signaled fence. */ ++struct mali_pp_job *mali_scheduler_job_pp_physical_get(u32 *sub_job) ++{ ++ struct mali_pp_job *job = mali_scheduler_job_pp_physical_peek(); + -+ sync_fence_acc = mali_sync_timeline_create_signaled_fence(system->signaled_sync_tl); -+ if (NULL == sync_fence_acc) goto error; ++ MALI_DEBUG_ASSERT(MALI_FALSE == mali_pp_job_is_virtual(job)); ++ ++ if (NULL != job) { ++ *sub_job = mali_pp_job_get_first_unstarted_sub_job(job); ++ ++ mali_pp_job_mark_sub_job_started(job, *sub_job); ++ if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(job)) { ++ /* Remove from queue when last sub job has been retrieved */ ++ mali_pp_job_list_remove(job); ++ } ++ ++ job_queue_pp.depth--; ++ ++ /* ++ * Job about to start so it is no longer be ++ * possible to discard WB ++ */ ++ mali_pp_job_fb_lookup_remove(job); + } + -+ /* Return file descriptor for the accumulated sync fence. */ -+ return mali_sync_fence_fd_alloc(sync_fence_acc); ++ return job; ++} + -+error: -+ if (NULL != sync_fence_acc) { -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ sync_fence_put(sync_fence_acc); -+#else -+ fput(sync_fence_acc->file); -+#endif ++struct mali_pp_job *mali_scheduler_job_pp_virtual_get(void) ++{ ++ struct mali_pp_job *job = mali_scheduler_job_pp_virtual_peek(); ++ ++ MALI_DEBUG_ASSERT(MALI_TRUE == mali_pp_job_is_virtual(job)); ++ ++ if (NULL != job) { ++ MALI_DEBUG_ASSERT(0 == ++ mali_pp_job_get_first_unstarted_sub_job(job)); ++ MALI_DEBUG_ASSERT(1 == ++ mali_pp_job_get_sub_job_count(job)); ++ ++ mali_pp_job_mark_sub_job_started(job, 0); ++ ++ mali_pp_job_list_remove(job); ++ ++ job_queue_pp.depth--; ++ ++ /* ++ * Job about to start so it is no longer be ++ * possible to discard WB ++ */ ++ mali_pp_job_fb_lookup_remove(job); + } + -+ return -1; ++ return job; +} + -+void mali_timeline_sync_fence_activate(struct mali_timeline_sync_fence_tracker *sync_fence_tracker) ++mali_scheduler_mask mali_scheduler_activate_gp_job(struct mali_gp_job *job) +{ -+ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; ++ MALI_DEBUG_ASSERT_POINTER(job); + -+ MALI_DEBUG_ASSERT_POINTER(sync_fence_tracker); -+ MALI_DEBUG_ASSERT_POINTER(sync_fence_tracker->flag); ++ MALI_DEBUG_PRINT(4, ("Mali GP scheduler: Timeline activation for job %u (0x%08X).\n", ++ mali_gp_job_get_id(job), job)); + -+ MALI_DEBUG_PRINT(4, ("Mali Timeline: activation for sync fence tracker\n")); ++ mali_scheduler_lock(); + -+ /* Signal flag and release reference. */ -+ mali_sync_flag_signal(sync_fence_tracker->flag, 0); -+ mali_sync_flag_put(sync_fence_tracker->flag); ++ if (!mali_scheduler_queue_gp_job(job)) { ++ /* Failed to enqueue job, release job (with error) */ + -+ /* Nothing can wait on this tracker, so nothing to schedule after release. */ -+ schedule_mask = mali_timeline_tracker_release(&sync_fence_tracker->tracker); -+ MALI_DEBUG_ASSERT(MALI_SCHEDULER_MASK_EMPTY == schedule_mask); ++ mali_scheduler_unlock(); + -+ _mali_osk_free(sync_fence_tracker); ++ mali_timeline_tracker_release(mali_gp_job_get_tracker(job)); ++ mali_gp_job_signal_pp_tracker(job, MALI_FALSE); ++ ++ /* This will notify user space and close the job object */ ++ mali_scheduler_complete_gp_job(job, MALI_FALSE, ++ MALI_TRUE, MALI_FALSE); ++ ++ return MALI_SCHEDULER_MASK_EMPTY; ++ } ++ ++ mali_scheduler_unlock(); ++ ++ return MALI_SCHEDULER_MASK_GP; +} -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_timeline_sync_fence.h b/drivers/gpu/arm/mali400/mali/common/mali_timeline_sync_fence.h -new file mode 100644 -index 000000000..65e368ae7 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_timeline_sync_fence.h -@@ -0,0 +1,51 @@ -+/* -+ * Copyright (C) 2013, 2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ + -+/** -+ * @file mali_timeline_sync_fence.h -+ * -+ * This file contains code related to creating sync fences from timeline fences. -+ */ ++mali_scheduler_mask mali_scheduler_activate_pp_job(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); + -+#ifndef __MALI_TIMELINE_SYNC_FENCE_H__ -+#define __MALI_TIMELINE_SYNC_FENCE_H__ ++ MALI_DEBUG_PRINT(4, ("Mali PP scheduler: Timeline activation for job %u (0x%08X).\n", ++ mali_pp_job_get_id(job), job)); + -+#include "mali_timeline.h" ++ if (MALI_TRUE == mali_timeline_tracker_activation_error( ++ mali_pp_job_get_tracker(job))) { ++ MALI_DEBUG_PRINT(3, ("Mali PP scheduler: Job %u (0x%08X) activated with error, aborting.\n", ++ mali_pp_job_get_id(job), job)); + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ mali_scheduler_lock(); ++ mali_pp_job_fb_lookup_remove(job); ++ mali_pp_job_mark_unstarted_failed(job); ++ mali_scheduler_unlock(); + -+/** -+ * Sync fence tracker. -+ */ -+struct mali_timeline_sync_fence_tracker { -+ struct mali_sync_flag *flag; /**< Sync flag used to connect tracker and sync fence. */ -+ struct mali_timeline_tracker tracker; /**< Timeline tracker. */ -+}; ++ mali_timeline_tracker_release(mali_pp_job_get_tracker(job)); + -+/** -+ * Create a sync fence that will be signaled when @ref fence is signaled. -+ * -+ * @param system Timeline system. -+ * @param fence Fence to create sync fence from. -+ * @return File descriptor for new sync fence, or -1 on error. -+ */ -+s32 mali_timeline_sync_fence_create(struct mali_timeline_system *system, struct mali_timeline_fence *fence); ++ /* This will notify user space and close the job object */ ++ mali_scheduler_complete_pp_job(job, 0, MALI_TRUE, MALI_FALSE); + -+/** -+ * Used by the Timeline system to activate a sync fence tracker. -+ * -+ * @param sync_fence_tracker Sync fence tracker. -+ * -+ */ -+void mali_timeline_sync_fence_activate(struct mali_timeline_sync_fence_tracker *sync_fence_tracker); ++ return MALI_SCHEDULER_MASK_EMPTY; ++ } + -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) ++ if (mali_pp_job_needs_dma_buf_mapping(job)) { ++ mali_scheduler_deferred_pp_job_queue(job); ++ return MALI_SCHEDULER_MASK_EMPTY; ++ } ++#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */ + -+#endif /* __MALI_TIMELINE_SYNC_FENCE_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_ukk.h b/drivers/gpu/arm/mali400/mali/common/mali_ukk.h -new file mode 100644 -index 000000000..55a05c504 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_ukk.h -@@ -0,0 +1,551 @@ -+/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ mali_scheduler_lock(); + -+/** -+ * @file mali_ukk.h -+ * Defines the kernel-side interface of the user-kernel interface -+ */ ++ if (!mali_scheduler_queue_pp_job(job)) { ++ /* Failed to enqueue job, release job (with error) */ ++ mali_pp_job_fb_lookup_remove(job); ++ mali_pp_job_mark_unstarted_failed(job); ++ mali_scheduler_unlock(); + -+#ifndef __MALI_UKK_H__ -+#define __MALI_UKK_H__ ++ mali_timeline_tracker_release(mali_pp_job_get_tracker(job)); + -+#include "mali_osk.h" -+#include "mali_uk_types.h" ++ /* This will notify user space and close the job object */ ++ mali_scheduler_complete_pp_job(job, 0, MALI_TRUE, MALI_FALSE); + -+#ifdef __cplusplus -+extern "C" { ++ return MALI_SCHEDULER_MASK_EMPTY; ++ } ++ ++ mali_scheduler_unlock(); ++ return MALI_SCHEDULER_MASK_PP; ++} ++ ++void mali_scheduler_complete_gp_job(struct mali_gp_job *job, ++ mali_bool success, ++ mali_bool user_notification, ++ mali_bool dequeued) ++{ ++ if (user_notification) { ++ mali_scheduler_return_gp_job_to_user(job, success); ++ } ++ ++ if (dequeued) { ++ _mali_osk_pm_dev_ref_put(); ++ ++ if (mali_utilization_enabled()) { ++ mali_utilization_gp_end(); ++ } ++ mali_pm_record_gpu_idle(MALI_TRUE); ++ } ++ ++ mali_gp_job_delete(job); ++} ++ ++void mali_scheduler_complete_pp_job(struct mali_pp_job *job, ++ u32 num_cores_in_virtual, ++ mali_bool user_notification, ++ mali_bool dequeued) ++{ ++ job->user_notification = user_notification; ++ job->num_pp_cores_in_virtual = num_cores_in_virtual; ++ ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) ++ if (NULL != job->rendered_dma_fence) ++ mali_dma_fence_signal_and_put(&job->rendered_dma_fence); +#endif + -+/** -+ * @addtogroup uddapi Unified Device Driver (UDD) APIs -+ * -+ * @{ -+ */ ++ if (dequeued) { ++#if defined(CONFIG_MALI_DVFS) ++ if (mali_pp_job_is_window_surface(job)) { ++ struct mali_session_data *session; ++ session = mali_pp_job_get_session(job); ++ mali_session_inc_num_window_jobs(session); ++ } ++#endif ++ _mali_osk_pm_dev_ref_put(); + -+/** -+ * @addtogroup u_k_api UDD User/Kernel Interface (U/K) APIs -+ * -+ * - The _mali_uk functions are an abstraction of the interface to the device -+ * driver. On certain OSs, this would be implemented via the IOCTL interface. -+ * On other OSs, it could be via extension of some Device Driver Class, or -+ * direct function call for Bare metal/RTOSs. -+ * - It is important to note that: -+ * - The Device Driver has implemented the _mali_ukk set of functions -+ * - The Base Driver calls the corresponding set of _mali_uku functions. -+ * - What requires porting is solely the calling mechanism from User-side to -+ * Kernel-side, and propagating back the results. -+ * - Each U/K function is associated with a (group, number) pair from -+ * \ref _mali_uk_functions to make it possible for a common function in the -+ * Base Driver and Device Driver to route User/Kernel calls from/to the -+ * correct _mali_uk function. For example, in an IOCTL system, the IOCTL number -+ * would be formed based on the group and number assigned to the _mali_uk -+ * function, as listed in \ref _mali_uk_functions. On the user-side, each -+ * _mali_uku function would just make an IOCTL with the IOCTL-code being an -+ * encoded form of the (group, number) pair. On the kernel-side, the Device -+ * Driver's IOCTL handler decodes the IOCTL-code back into a (group, number) -+ * pair, and uses this to determine which corresponding _mali_ukk should be -+ * called. -+ * - Refer to \ref _mali_uk_functions for more information about this -+ * (group, number) pairing. -+ * - In a system where there is no distinction between user and kernel-side, -+ * the U/K interface may be implemented as:@code -+ * MALI_STATIC_INLINE _mali_osk_errcode_t _mali_uku_examplefunction( _mali_uk_examplefunction_s *args ) -+ * { -+ * return mali_ukk_examplefunction( args ); -+ * } -+ * @endcode -+ * - Therefore, all U/K calls behave \em as \em though they were direct -+ * function calls (but the \b implementation \em need \em not be a direct -+ * function calls) -+ * -+ * @note Naming the _mali_uk functions the same on both User and Kernel sides -+ * on non-RTOS systems causes debugging issues when setting breakpoints. In -+ * this case, it is not clear which function the breakpoint is put on. -+ * Therefore the _mali_uk functions in user space are prefixed with \c _mali_uku -+ * and in kernel space with \c _mali_ukk. The naming for the argument -+ * structures is unaffected. -+ * -+ * - The _mali_uk functions are synchronous. -+ * - Arguments to the _mali_uk functions are passed in a structure. The only -+ * parameter passed to the _mali_uk functions is a pointer to this structure. -+ * This first member of this structure, ctx, is a pointer to a context returned -+ * by _mali_uku_open(). For example:@code -+ * typedef struct -+ * { -+ * void *ctx; -+ * u32 number_of_cores; -+ * } _mali_uk_get_gp_number_of_cores_s; -+ * @endcode -+ * -+ * - Each _mali_uk function has its own argument structure named after the -+ * function. The argument is distinguished by the _s suffix. -+ * - The argument types are defined by the base driver and user-kernel -+ * interface. -+ * - All _mali_uk functions return a standard \ref _mali_osk_errcode_t. -+ * - Only arguments of type input or input/output need be initialized before -+ * calling a _mali_uk function. -+ * - Arguments of type output and input/output are only valid when the -+ * _mali_uk function returns \ref _MALI_OSK_ERR_OK. -+ * - The \c ctx member is always invalid after it has been used by a -+ * _mali_uk function, except for the context management functions -+ * -+ * -+ * \b Interface \b restrictions -+ * -+ * The requirements of the interface mean that an implementation of the -+ * User-kernel interface may do no 'real' work. For example, the following are -+ * illegal in the User-kernel implementation: -+ * - Calling functions necessary for operation on all systems, which would -+ * not otherwise get called on RTOS systems. -+ * - For example, a U/K interface that calls multiple _mali_ukk functions -+ * during one particular U/K call. This could not be achieved by the same code -+ * which uses direct function calls for the U/K interface. -+ * - Writing in values to the args members, when otherwise these members would -+ * not hold a useful value for a direct function call U/K interface. -+ * - For example, U/K interface implementation that take NULL members in -+ * their arguments structure from the user side, but those members are -+ * replaced with non-NULL values in the kernel-side of the U/K interface -+ * implementation. A scratch area for writing data is one such example. In this -+ * case, a direct function call U/K interface would segfault, because no code -+ * would be present to replace the NULL pointer with a meaningful pointer. -+ * - Note that we discourage the case where the U/K implementation changes -+ * a NULL argument member to non-NULL, and then the Device Driver code (outside -+ * of the U/K layer) re-checks this member for NULL, and corrects it when -+ * necessary. Whilst such code works even on direct function call U/K -+ * intefaces, it reduces the testing coverage of the Device Driver code. This -+ * is because we have no way of testing the NULL == value path on an OS -+ * implementation. -+ * -+ * A number of allowable examples exist where U/K interfaces do 'real' work: -+ * - The 'pointer switching' technique for \ref _mali_ukk_get_system_info -+ * - In this case, without the pointer switching on direct function call -+ * U/K interface, the Device Driver code still sees the same thing: a pointer -+ * to which it can write memory. This is because such a system has no -+ * distinction between a user and kernel pointer. -+ * - Writing an OS-specific value into the ukk_private member for -+ * _mali_ukk_mem_mmap(). -+ * - In this case, this value is passed around by Device Driver code, but -+ * its actual value is never checked. Device Driver code simply passes it from -+ * the U/K layer to the OSK layer, where it can be acted upon. In this case, -+ * \em some OS implementations of the U/K (_mali_ukk_mem_mmap()) and OSK -+ * (_mali_osk_mem_mapregion_init()) functions will collaborate on the -+ * meaning of ukk_private member. On other OSs, it may be unused by both -+ * U/K and OSK layers -+ * - Therefore, on error inside the U/K interface implementation itself, -+ * it will be as though the _mali_ukk function itself had failed, and cleaned -+ * up after itself. -+ * - Compare this to a direct function call U/K implementation, where all -+ * error cleanup is handled by the _mali_ukk function itself. The direct -+ * function call U/K interface implementation is automatically atomic. -+ * -+ * The last example highlights a consequence of all U/K interface -+ * implementations: they must be atomic with respect to the Device Driver code. -+ * And therefore, should Device Driver code succeed but the U/K implementation -+ * fail afterwards (but before return to user-space), then the U/K -+ * implementation must cause appropriate cleanup actions to preserve the -+ * atomicity of the interface. -+ * -+ * @{ -+ */ ++ if (mali_utilization_enabled()) { ++ mali_utilization_pp_end(); ++ } ++ mali_pm_record_gpu_idle(MALI_FALSE); ++ } + ++ /* With ZRAM feature enabled, all pp jobs will be force to use deferred delete. */ ++ mali_scheduler_deferred_pp_job_delete(job); ++} + -+/** @defgroup _mali_uk_context U/K Context management -+ * -+ * These functions allow for initialisation of the user-kernel interface once per process. -+ * -+ * Generally the context will store the OS specific object to communicate with the kernel device driver and further -+ * state information required by the specific implementation. The context is shareable among all threads in the caller process. -+ * -+ * On IOCTL systems, this is likely to be a file descriptor as a result of opening the kernel device driver. -+ * -+ * On a bare-metal/RTOS system with no distinction between kernel and -+ * user-space, the U/K interface simply calls the _mali_ukk variant of the -+ * function by direct function call. In this case, the context returned is the -+ * mali_session_data from _mali_ukk_open(). -+ * -+ * The kernel side implementations of the U/K interface expect the first member of the argument structure to -+ * be the context created by _mali_uku_open(). On some OS implementations, the meaning of this context -+ * will be different between user-side and kernel-side. In which case, the kernel-side will need to replace this context -+ * with the kernel-side equivalent, because user-side will not have access to kernel-side data. The context parameter -+ * in the argument structure therefore has to be of type input/output. -+ * -+ * It should be noted that the caller cannot reuse the \c ctx member of U/K -+ * argument structure after a U/K call, because it may be overwritten. Instead, -+ * the context handle must always be stored elsewhere, and copied into -+ * the appropriate U/K argument structure for each user-side call to -+ * the U/K interface. This is not usually a problem, since U/K argument -+ * structures are usually placed on the stack. -+ * -+ * @{ */ ++void mali_scheduler_abort_session(struct mali_session_data *session) ++{ ++ struct mali_gp_job *gp_job; ++ struct mali_gp_job *gp_tmp; ++ struct mali_pp_job *pp_job; ++ struct mali_pp_job *pp_tmp; ++ _MALI_OSK_LIST_HEAD_STATIC_INIT(removed_jobs_gp); ++ _MALI_OSK_LIST_HEAD_STATIC_INIT(removed_jobs_pp); + -+/** @brief Begin a new Mali Device Driver session -+ * -+ * This is used to obtain a per-process context handle for all future U/K calls. -+ * -+ * @param context pointer to storage to return a (void*)context handle. -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_open(void **context); ++ MALI_DEBUG_ASSERT_POINTER(session); ++ MALI_DEBUG_ASSERT(session->is_aborting); + -+/** @brief End a Mali Device Driver session -+ * -+ * This should be called when the process no longer requires use of the Mali Device Driver. -+ * -+ * The context handle must not be used after it has been closed. -+ * -+ * @param context pointer to a stored (void*)context handle. -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_close(void **context); ++ MALI_DEBUG_PRINT(3, ("Mali scheduler: Aborting all queued jobs from session 0x%08X.\n", ++ session)); + -+/** @} */ /* end group _mali_uk_context */ ++ mali_scheduler_lock(); + ++ /* Remove from GP normal priority queue */ ++ _MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &job_queue_gp.normal_pri, ++ struct mali_gp_job, list) { ++ if (mali_gp_job_get_session(gp_job) == session) { ++ mali_gp_job_list_move(gp_job, &removed_jobs_gp); ++ job_queue_gp.depth--; ++ job_queue_gp.big_job_num -= gp_job->big_job ? 1 : 0; ++ } ++ } + -+/** @addtogroup _mali_uk_core U/K Core -+ * -+ * The core functions provide the following functionality: -+ * - verify that the user and kernel API are compatible -+ * - retrieve information about the cores and memory banks in the system -+ * - wait for the result of jobs started on a core -+ * -+ * @{ */ ++ /* Remove from GP high priority queue */ ++ _MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &job_queue_gp.high_pri, ++ struct mali_gp_job, list) { ++ if (mali_gp_job_get_session(gp_job) == session) { ++ mali_gp_job_list_move(gp_job, &removed_jobs_gp); ++ job_queue_gp.depth--; ++ job_queue_gp.big_job_num -= gp_job->big_job ? 1 : 0; ++ } ++ } + -+/** @brief Waits for a job notification. -+ * -+ * Sleeps until notified or a timeout occurs. Returns information about the notification. -+ * -+ * @param args see _mali_uk_wait_for_notification_s in "mali_utgard_uk_types.h" -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_wait_for_notification(_mali_uk_wait_for_notification_s *args); ++ /* Remove from PP normal priority queue */ ++ _MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp, ++ &job_queue_pp.normal_pri, ++ struct mali_pp_job, list) { ++ if (mali_pp_job_get_session(pp_job) == session) { ++ mali_pp_job_fb_lookup_remove(pp_job); + -+/** @brief Post a notification to the notification queue of this application. -+ * -+ * @param args see _mali_uk_post_notification_s in "mali_utgard_uk_types.h" -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_post_notification(_mali_uk_post_notification_s *args); ++ job_queue_pp.depth -= ++ mali_pp_job_unstarted_sub_job_count( ++ pp_job); ++ mali_pp_job_mark_unstarted_failed(pp_job); + -+/** @brief Verifies if the user and kernel side of this API are compatible. -+ * -+ * This function is obsolete, but kept to allow old, incompatible user space -+ * clients to robustly detect the incompatibility. -+ * -+ * @param args see _mali_uk_get_api_version_s in "mali_utgard_uk_types.h" -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_get_api_version(_mali_uk_get_api_version_s *args); ++ if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(pp_job)) { ++ if (mali_pp_job_is_complete(pp_job)) { ++ mali_pp_job_list_move(pp_job, ++ &removed_jobs_pp); ++ } else { ++ mali_pp_job_list_remove(pp_job); ++ } ++ } ++ } ++ } + -+/** @brief Verifies if the user and kernel side of this API are compatible. -+ * -+ * @param args see _mali_uk_get_api_version_v2_s in "mali_utgard_uk_types.h" -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_get_api_version_v2(_mali_uk_get_api_version_v2_s *args); ++ /* Remove from PP high priority queue */ ++ _MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp, ++ &job_queue_pp.high_pri, ++ struct mali_pp_job, list) { ++ if (mali_pp_job_get_session(pp_job) == session) { ++ mali_pp_job_fb_lookup_remove(pp_job); + -+/** @brief Get the user space settings applicable for calling process. -+ * -+ * @param args see _mali_uk_get_user_settings_s in "mali_utgard_uk_types.h" -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_get_user_settings(_mali_uk_get_user_settings_s *args); ++ job_queue_pp.depth -= ++ mali_pp_job_unstarted_sub_job_count( ++ pp_job); ++ mali_pp_job_mark_unstarted_failed(pp_job); + -+/** @brief Get a user space setting applicable for calling process. -+ * -+ * @param args see _mali_uk_get_user_setting_s in "mali_utgard_uk_types.h" -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_get_user_setting(_mali_uk_get_user_setting_s *args); ++ if (MALI_FALSE == mali_pp_job_has_unstarted_sub_jobs(pp_job)) { ++ if (mali_pp_job_is_complete(pp_job)) { ++ mali_pp_job_list_move(pp_job, ++ &removed_jobs_pp); ++ } else { ++ mali_pp_job_list_remove(pp_job); ++ } ++ } ++ } ++ } + -+/* @brief Grant or deny high priority scheduling for this session. -+ * -+ * @param args see _mali_uk_request_high_priority_s in "mali_utgard_uk_types.h" -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_request_high_priority(_mali_uk_request_high_priority_s *args); ++ /* ++ * Release scheduler lock so we can release trackers ++ * (which will potentially queue new jobs) ++ */ ++ mali_scheduler_unlock(); + -+/** @brief Make process sleep if the pending big job in kernel >= MALI_MAX_PENDING_BIG_JOB -+ * -+ */ -+_mali_osk_errcode_t _mali_ukk_pending_submit(_mali_uk_pending_submit_s *args); ++ /* Release and complete all (non-running) found GP jobs */ ++ _MALI_OSK_LIST_FOREACHENTRY(gp_job, gp_tmp, &removed_jobs_gp, ++ struct mali_gp_job, list) { ++ mali_timeline_tracker_release(mali_gp_job_get_tracker(gp_job)); ++ mali_gp_job_signal_pp_tracker(gp_job, MALI_FALSE); ++ _mali_osk_list_delinit(&gp_job->list); ++ mali_scheduler_complete_gp_job(gp_job, ++ MALI_FALSE, MALI_FALSE, MALI_TRUE); ++ } + -+/** @} */ /* end group _mali_uk_core */ ++ /* Release and complete non-running PP jobs */ ++ _MALI_OSK_LIST_FOREACHENTRY(pp_job, pp_tmp, &removed_jobs_pp, ++ struct mali_pp_job, list) { ++ mali_timeline_tracker_release(mali_pp_job_get_tracker(pp_job)); ++ _mali_osk_list_delinit(&pp_job->list); ++ mali_scheduler_complete_pp_job(pp_job, 0, ++ MALI_FALSE, MALI_TRUE); ++ } ++} + ++_mali_osk_errcode_t _mali_ukk_gp_start_job(void *ctx, ++ _mali_uk_gp_start_job_s *uargs) ++{ ++ struct mali_session_data *session; ++ struct mali_gp_job *job; ++ mali_timeline_point point; ++ u32 __user *point_ptr = NULL; + -+/** @addtogroup _mali_uk_memory U/K Memory -+ * -+ * The memory functions provide functionality with and without a Mali-MMU present. -+ * -+ * For Mali-MMU based systems, the following functionality is provided: -+ * - Initialize and terminate MALI virtual address space -+ * - Allocate/deallocate physical memory to a MALI virtual address range and map into/unmap from the -+ * current process address space -+ * - Map/unmap external physical memory into the MALI virtual address range -+ * -+ * For Mali-nonMMU based systems: -+ * - Allocate/deallocate MALI memory -+ * -+ * @{ */ ++ MALI_DEBUG_ASSERT_POINTER(uargs); ++ MALI_DEBUG_ASSERT_POINTER(ctx); + -+/** @brief Map Mali Memory into the current user process -+ * -+ * Maps Mali memory into the current user process in a generic way. -+ * -+ * This function is to be used for Mali-MMU mode. The function is available in both Mali-MMU and Mali-nonMMU modes, -+ * but should not be called by a user process in Mali-nonMMU mode. -+ * -+ * The implementation and operation of _mali_ukk_mem_mmap() is dependant on whether the driver is built for Mali-MMU -+ * or Mali-nonMMU: -+ * - In the nonMMU case, _mali_ukk_mem_mmap() requires a physical address to be specified. For this reason, an OS U/K -+ * implementation should not allow this to be called from user-space. In any case, nonMMU implementations are -+ * inherently insecure, and so the overall impact is minimal. Mali-MMU mode should be used if security is desired. -+ * - In the MMU case, _mali_ukk_mem_mmap() the _mali_uk_mem_mmap_s::phys_addr -+ * member is used for the \em Mali-virtual address desired for the mapping. The -+ * implementation of _mali_ukk_mem_mmap() will allocate both the CPU-virtual -+ * and CPU-physical addresses, and can cope with mapping a contiguous virtual -+ * address range to a sequence of non-contiguous physical pages. In this case, -+ * the CPU-physical addresses are not communicated back to the user-side, as -+ * they are unnecsessary; the \em Mali-virtual address range must be used for -+ * programming Mali structures. -+ * -+ * In the second (MMU) case, _mali_ukk_mem_mmap() handles management of -+ * CPU-virtual and CPU-physical ranges, but the \em caller must manage the -+ * \em Mali-virtual address range from the user-side. -+ * -+ * @note Mali-virtual address ranges are entirely separate between processes. -+ * It is not possible for a process to accidentally corrupt another process' -+ * \em Mali-virtual address space. -+ * -+ * @param args see _mali_uk_mem_mmap_s in "mali_utgard_uk_types.h" -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_mem_mmap(_mali_uk_mem_mmap_s *args); ++ session = (struct mali_session_data *)(uintptr_t)ctx; + -+/** @brief Unmap Mali Memory from the current user process -+ * -+ * Unmaps Mali memory from the current user process in a generic way. This only operates on Mali memory supplied -+ * from _mali_ukk_mem_mmap(). -+ * -+ * @param args see _mali_uk_mem_munmap_s in "mali_utgard_uk_types.h" -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_mem_munmap(_mali_uk_mem_munmap_s *args); ++ job = mali_gp_job_create(session, uargs, mali_scheduler_get_new_id(), ++ NULL); ++ if (NULL == job) { ++ MALI_PRINT_ERROR(("Failed to create GP job.\n")); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+/** @brief Determine the buffer size necessary for an MMU page table dump. -+ * @param args see _mali_uk_query_mmu_page_table_dump_size_s in mali_utgard_uk_types.h -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_query_mmu_page_table_dump_size(_mali_uk_query_mmu_page_table_dump_size_s *args); -+/** @brief Dump MMU Page tables. -+ * @param args see _mali_uk_dump_mmu_page_table_s in mali_utgard_uk_types.h -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_dump_mmu_page_table(_mali_uk_dump_mmu_page_table_s *args); ++ point_ptr = (u32 __user *)(uintptr_t)mali_gp_job_get_timeline_point_ptr(job); + -+/** @brief Write user data to specified Mali memory without causing segfaults. -+ * @param args see _mali_uk_mem_write_safe_s in mali_utgard_uk_types.h -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_mem_write_safe(_mali_uk_mem_write_safe_s *args); ++ point = mali_scheduler_submit_gp_job(session, job); + -+/** @} */ /* end group _mali_uk_memory */ ++ if (0 != _mali_osk_put_user(((u32) point), point_ptr)) { ++ /* ++ * Let user space know that something failed ++ * after the job was started. ++ */ ++ return _MALI_OSK_ERR_ITEM_NOT_FOUND; ++ } + ++ return _MALI_OSK_ERR_OK; ++} + -+/** @addtogroup _mali_uk_pp U/K Fragment Processor -+ * -+ * The Fragment Processor (aka PP (Pixel Processor)) functions provide the following functionality: -+ * - retrieving version of the fragment processors -+ * - determine number of fragment processors -+ * - starting a job on a fragment processor -+ * -+ * @{ */ ++_mali_osk_errcode_t _mali_ukk_pp_start_job(void *ctx, ++ _mali_uk_pp_start_job_s *uargs) ++{ ++ _mali_osk_errcode_t ret; ++ struct mali_session_data *session; ++ struct mali_pp_job *job; ++ mali_timeline_point point; ++ u32 __user *point_ptr = NULL; + -+/** @brief Issue a request to start a new job on a Fragment Processor. -+ * -+ * If the request fails args->status is set to _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE and you can -+ * try to start the job again. -+ * -+ * An existing job could be returned for requeueing if the new job has a higher priority than a previously started job -+ * which the hardware hasn't actually started processing yet. In this case the new job will be started instead and the -+ * existing one returned, otherwise the new job is started and the status field args->status is set to -+ * _MALI_UK_START_JOB_STARTED. -+ * -+ * Job completion can be awaited with _mali_ukk_wait_for_notification(). -+ * -+ * @param ctx user-kernel context (mali_session) -+ * @param uargs see _mali_uk_pp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data! -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_pp_start_job(void *ctx, _mali_uk_pp_start_job_s *uargs); ++ MALI_DEBUG_ASSERT_POINTER(uargs); ++ MALI_DEBUG_ASSERT_POINTER(ctx); + -+/** -+ * @brief Issue a request to start new jobs on both Vertex Processor and Fragment Processor. -+ * -+ * @note Will call into @ref _mali_ukk_pp_start_job and @ref _mali_ukk_gp_start_job. -+ * -+ * @param ctx user-kernel context (mali_session) -+ * @param uargs see _mali_uk_pp_and_gp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data! -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_pp_and_gp_start_job(void *ctx, _mali_uk_pp_and_gp_start_job_s *uargs); ++ session = (struct mali_session_data *)(uintptr_t)ctx; + -+/** @brief Returns the number of Fragment Processors in the system -+ * -+ * @param args see _mali_uk_get_pp_number_of_cores_s in "mali_utgard_uk_types.h" -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_get_pp_number_of_cores(_mali_uk_get_pp_number_of_cores_s *args); ++ job = mali_pp_job_create(session, uargs, mali_scheduler_get_new_id()); ++ if (NULL == job) { ++ MALI_PRINT_ERROR(("Failed to create PP job.\n")); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+/** @brief Returns the version that all Fragment Processor cores are compatible with. -+ * -+ * This function may only be called when _mali_ukk_get_pp_number_of_cores() indicated at least one Fragment -+ * Processor core is available. -+ * -+ * @param args see _mali_uk_get_pp_core_version_s in "mali_utgard_uk_types.h" -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_get_pp_core_version(_mali_uk_get_pp_core_version_s *args); ++ point_ptr = (u32 __user *)(uintptr_t)mali_pp_job_get_timeline_point_ptr(job); + -+/** @brief Disable Write-back unit(s) on specified job -+ * -+ * @param args see _mali_uk_get_pp_core_version_s in "mali_utgard_uk_types.h" -+ */ -+void _mali_ukk_pp_job_disable_wb(_mali_uk_pp_disable_wb_s *args); ++ /* Submit PP job. */ ++ ret = mali_scheduler_submit_pp_job(session, job, &point); ++ job = NULL; + ++ if (_MALI_OSK_ERR_OK == ret) { ++ if (0 != _mali_osk_put_user(((u32) point), point_ptr)) { ++ /* ++ * Let user space know that something failed ++ * after the jobs were started. ++ */ ++ return _MALI_OSK_ERR_ITEM_NOT_FOUND; ++ } ++ } + -+/** @} */ /* end group _mali_uk_pp */ ++ return ret; ++} + ++_mali_osk_errcode_t _mali_ukk_pp_and_gp_start_job(void *ctx, ++ _mali_uk_pp_and_gp_start_job_s *uargs) ++{ ++ _mali_osk_errcode_t ret; ++ struct mali_session_data *session; ++ _mali_uk_pp_and_gp_start_job_s kargs; ++ struct mali_pp_job *pp_job; ++ struct mali_gp_job *gp_job; ++ u32 __user *point_ptr = NULL; ++ mali_timeline_point point; ++ _mali_uk_pp_start_job_s __user *pp_args; ++ _mali_uk_gp_start_job_s __user *gp_args; + -+/** @addtogroup _mali_uk_gp U/K Vertex Processor -+ * -+ * The Vertex Processor (aka GP (Geometry Processor)) functions provide the following functionality: -+ * - retrieving version of the Vertex Processors -+ * - determine number of Vertex Processors available -+ * - starting a job on a Vertex Processor -+ * -+ * @{ */ ++ MALI_DEBUG_ASSERT_POINTER(ctx); ++ MALI_DEBUG_ASSERT_POINTER(uargs); + -+/** @brief Issue a request to start a new job on a Vertex Processor. -+ * -+ * If the request fails args->status is set to _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE and you can -+ * try to start the job again. -+ * -+ * An existing job could be returned for requeueing if the new job has a higher priority than a previously started job -+ * which the hardware hasn't actually started processing yet. In this case the new job will be started and the -+ * existing one returned, otherwise the new job is started and the status field args->status is set to -+ * _MALI_UK_START_JOB_STARTED. -+ * -+ * Job completion can be awaited with _mali_ukk_wait_for_notification(). -+ * -+ * @param ctx user-kernel context (mali_session) -+ * @param uargs see _mali_uk_gp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data! -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_gp_start_job(void *ctx, _mali_uk_gp_start_job_s *uargs); ++ session = (struct mali_session_data *) ctx; + -+/** @brief Returns the number of Vertex Processors in the system. -+ * -+ * @param args see _mali_uk_get_gp_number_of_cores_s in "mali_utgard_uk_types.h" -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_get_gp_number_of_cores(_mali_uk_get_gp_number_of_cores_s *args); ++ if (0 != _mali_osk_copy_from_user(&kargs, uargs, ++ sizeof(_mali_uk_pp_and_gp_start_job_s))) { ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+/** @brief Returns the version that all Vertex Processor cores are compatible with. -+ * -+ * This function may only be called when _mali_uk_get_gp_number_of_cores() indicated at least one Vertex -+ * Processor core is available. -+ * -+ * @param args see _mali_uk_get_gp_core_version_s in "mali_utgard_uk_types.h" -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_get_gp_core_version(_mali_uk_get_gp_core_version_s *args); ++ pp_args = (_mali_uk_pp_start_job_s __user *)(uintptr_t)kargs.pp_args; ++ gp_args = (_mali_uk_gp_start_job_s __user *)(uintptr_t)kargs.gp_args; + -+/** @brief Resume or abort suspended Vertex Processor jobs. -+ * -+ * After receiving notification that a Vertex Processor job was suspended from -+ * _mali_ukk_wait_for_notification() you can use this function to resume or abort the job. -+ * -+ * @param args see _mali_uk_gp_suspend_response_s in "mali_utgard_uk_types.h" -+ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. -+ */ -+_mali_osk_errcode_t _mali_ukk_gp_suspend_response(_mali_uk_gp_suspend_response_s *args); ++ pp_job = mali_pp_job_create(session, pp_args, ++ mali_scheduler_get_new_id()); ++ if (NULL == pp_job) { ++ MALI_PRINT_ERROR(("Failed to create PP job.\n")); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+/** @} */ /* end group _mali_uk_gp */ ++ gp_job = mali_gp_job_create(session, gp_args, ++ mali_scheduler_get_new_id(), ++ mali_pp_job_get_tracker(pp_job)); ++ if (NULL == gp_job) { ++ MALI_PRINT_ERROR(("Failed to create GP job.\n")); ++ mali_pp_job_delete(pp_job); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+#if defined(CONFIG_MALI400_PROFILING) -+/** @addtogroup _mali_uk_profiling U/K Timeline profiling module -+ * @{ */ ++ point_ptr = (u32 __user *)(uintptr_t)mali_pp_job_get_timeline_point_ptr(pp_job); + -+/** @brief Add event to profiling buffer. -+ * -+ * @param args see _mali_uk_profiling_add_event_s in "mali_utgard_uk_types.h" -+ */ -+_mali_osk_errcode_t _mali_ukk_profiling_add_event(_mali_uk_profiling_add_event_s *args); ++ /* Submit GP job. */ ++ mali_scheduler_submit_gp_job(session, gp_job); ++ gp_job = NULL; + -+/** @brief Get profiling stream fd. -+ * -+ * @param args see _mali_uk_profiling_stream_fd_get_s in "mali_utgard_uk_types.h" -+ */ -+_mali_osk_errcode_t _mali_ukk_profiling_stream_fd_get(_mali_uk_profiling_stream_fd_get_s *args); ++ /* Submit PP job. */ ++ ret = mali_scheduler_submit_pp_job(session, pp_job, &point); ++ pp_job = NULL; + -+/** @brief Profiling control set. -+ * -+ * @param args see _mali_uk_profiling_control_set_s in "mali_utgard_uk_types.h" -+ */ -+_mali_osk_errcode_t _mali_ukk_profiling_control_set(_mali_uk_profiling_control_set_s *args); ++ if (_MALI_OSK_ERR_OK == ret) { ++ if (0 != _mali_osk_put_user(((u32) point), point_ptr)) { ++ /* ++ * Let user space know that something failed ++ * after the jobs were started. ++ */ ++ return _MALI_OSK_ERR_ITEM_NOT_FOUND; ++ } ++ } + -+/** @} */ /* end group _mali_uk_profiling */ -+#endif ++ return ret; ++} + -+/** @addtogroup _mali_uk_vsync U/K VSYNC reporting module -+ * @{ */ ++void _mali_ukk_pp_job_disable_wb(_mali_uk_pp_disable_wb_s *args) ++{ ++ struct mali_session_data *session; ++ struct mali_pp_job *job; ++ struct mali_pp_job *tmp; ++ u32 fb_lookup_id; + -+/** @brief Report events related to vsync. -+ * -+ * @note Events should be reported when starting to wait for vsync and when the -+ * waiting is finished. This information can then be used in kernel space to -+ * complement the GPU utilization metric. -+ * -+ * @param args see _mali_uk_vsync_event_report_s in "mali_utgard_uk_types.h" -+ */ -+_mali_osk_errcode_t _mali_ukk_vsync_event_report(_mali_uk_vsync_event_report_s *args); ++ MALI_DEBUG_ASSERT_POINTER(args); ++ MALI_DEBUG_ASSERT(NULL != (void *)(uintptr_t)args->ctx); + -+/** @} */ /* end group _mali_uk_vsync */ ++ session = (struct mali_session_data *)(uintptr_t)args->ctx; + -+/** @addtogroup _mali_sw_counters_report U/K Software counter reporting -+ * @{ */ ++ fb_lookup_id = args->fb_id & MALI_PP_JOB_FB_LOOKUP_LIST_MASK; + -+/** @brief Report software counters. -+ * -+ * @param args see _mali_uk_sw_counters_report_s in "mali_uk_types.h" -+ */ -+_mali_osk_errcode_t _mali_ukk_sw_counters_report(_mali_uk_sw_counters_report_s *args); ++ mali_scheduler_lock(); + -+/** @} */ /* end group _mali_sw_counters_report */ ++ /* Iterate over all jobs for given frame builder_id. */ ++ _MALI_OSK_LIST_FOREACHENTRY(job, tmp, ++ &session->pp_job_fb_lookup_list[fb_lookup_id], ++ struct mali_pp_job, session_fb_lookup_list) { ++ MALI_DEBUG_CODE(u32 disable_mask = 0); + -+/** @} */ /* end group u_k_api */ ++ if (mali_pp_job_get_frame_builder_id(job) != ++ (u32) args->fb_id) { ++ MALI_DEBUG_PRINT(4, ("Mali PP scheduler: Disable WB mismatching FB.\n")); ++ continue; ++ } + -+/** @} */ /* end group uddapi */ ++ MALI_DEBUG_CODE(disable_mask |= 0xD << (4 * 3)); + -+u32 _mali_ukk_report_memory_usage(void); ++ if (mali_pp_job_get_wb0_source_addr(job) == args->wb0_memory) { ++ MALI_DEBUG_CODE(disable_mask |= 0x1 << (4 * 1)); ++ mali_pp_job_disable_wb0(job); ++ } + -+u32 _mali_ukk_report_total_memory_size(void); ++ if (mali_pp_job_get_wb1_source_addr(job) == args->wb1_memory) { ++ MALI_DEBUG_CODE(disable_mask |= 0x2 << (4 * 2)); ++ mali_pp_job_disable_wb1(job); ++ } + -+u32 _mali_ukk_utilization_gp_pp(void); ++ if (mali_pp_job_get_wb2_source_addr(job) == args->wb2_memory) { ++ MALI_DEBUG_CODE(disable_mask |= 0x3 << (4 * 3)); ++ mali_pp_job_disable_wb2(job); ++ } ++ MALI_DEBUG_PRINT(3, ("Mali PP scheduler: Disable WB: 0x%X.\n", ++ disable_mask)); ++ } + -+u32 _mali_ukk_utilization_gp(void); ++ mali_scheduler_unlock(); ++} + -+u32 _mali_ukk_utilization_pp(void); ++#if MALI_STATE_TRACKING ++u32 mali_scheduler_dump_state(char *buf, u32 size) ++{ ++ int n = 0; + -+#ifdef __cplusplus ++ n += _mali_osk_snprintf(buf + n, size - n, "GP queues\n"); ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "\tQueue depth: %u\n", job_queue_gp.depth); ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "\tNormal priority queue is %s\n", ++ _mali_osk_list_empty(&job_queue_gp.normal_pri) ? ++ "empty" : "not empty"); ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "\tHigh priority queue is %s\n", ++ _mali_osk_list_empty(&job_queue_gp.high_pri) ? ++ "empty" : "not empty"); ++ ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "PP queues\n"); ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "\tQueue depth: %u\n", job_queue_pp.depth); ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "\tNormal priority queue is %s\n", ++ _mali_osk_list_empty(&job_queue_pp.normal_pri) ++ ? "empty" : "not empty"); ++ n += _mali_osk_snprintf(buf + n, size - n, ++ "\tHigh priority queue is %s\n", ++ _mali_osk_list_empty(&job_queue_pp.high_pri) ++ ? "empty" : "not empty"); ++ ++ n += _mali_osk_snprintf(buf + n, size - n, "\n"); ++ ++ return n; +} +#endif + -+#endif /* __MALI_UKK_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_user_settings_db.c b/drivers/gpu/arm/mali400/mali/common/mali_user_settings_db.c -new file mode 100644 -index 000000000..1911eff87 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_user_settings_db.c -@@ -0,0 +1,147 @@ -+/** -+ * Copyright (C) 2012-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++/* ++ * ---------- Implementation of static functions ---------- + */ + -+#include "mali_kernel_common.h" -+#include "mali_osk.h" -+#include "mali_ukk.h" -+#include "mali_uk_types.h" -+#include "mali_user_settings_db.h" -+#include "mali_session.h" ++static mali_timeline_point mali_scheduler_submit_gp_job( ++ struct mali_session_data *session, struct mali_gp_job *job) ++{ ++ mali_timeline_point point; + -+static u32 mali_user_settings[_MALI_UK_USER_SETTING_MAX]; -+const char *_mali_uk_user_setting_descriptions[] = _MALI_UK_USER_SETTING_DESCRIPTIONS; ++ MALI_DEBUG_ASSERT_POINTER(session); ++ MALI_DEBUG_ASSERT_POINTER(job); ++ ++ /* Add job to Timeline system. */ ++ point = mali_timeline_system_add_tracker(session->timeline_system, ++ mali_gp_job_get_tracker(job), MALI_TIMELINE_GP); ++ ++ return point; ++} ++ ++static _mali_osk_errcode_t mali_scheduler_submit_pp_job( ++ struct mali_session_data *session, struct mali_pp_job *job, mali_timeline_point *point) + -+static void mali_user_settings_notify(_mali_uk_user_setting_t setting, u32 value) +{ -+ mali_bool done = MALI_FALSE; ++ _mali_osk_errcode_t ret = _MALI_OSK_ERR_OK; ++ ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) ++ struct ww_acquire_ctx ww_actx; ++ u32 i; ++ u32 num_memory_cookies = 0; ++ struct reservation_object **reservation_object_list = NULL; ++ unsigned int num_reservation_object = 0; ++#endif ++ ++ MALI_DEBUG_ASSERT_POINTER(session); ++ MALI_DEBUG_ASSERT_POINTER(job); + ++ mali_scheduler_lock(); + /* -+ * This function gets a bit complicated because we can't hold the session lock while -+ * allocating notification objects. ++ * Adding job to the lookup list used to quickly discard ++ * writeback units of queued jobs. + */ ++ mali_pp_job_fb_lookup_add(job); ++ mali_scheduler_unlock(); + -+ while (!done) { -+ u32 i; -+ u32 num_sessions_alloc; -+ u32 num_sessions_with_lock; -+ u32 used_notification_objects = 0; -+ _mali_osk_notification_t **notobjs; ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) + -+ /* Pre allocate the number of notifications objects we need right now (might change after lock has been taken) */ -+ num_sessions_alloc = mali_session_get_count(); -+ if (0 == num_sessions_alloc) { -+ /* No sessions to report to */ -+ return; ++ /* Allocate the reservation_object_list to list the dma reservation object of dependent dma buffer */ ++ num_memory_cookies = mali_pp_job_num_memory_cookies(job); ++ if (0 < num_memory_cookies) { ++ reservation_object_list = kzalloc(sizeof(struct reservation_object *) * num_memory_cookies, GFP_KERNEL); ++ if (NULL == reservation_object_list) { ++ MALI_PRINT_ERROR(("Failed to alloc the reservation object list.\n")); ++ ret = _MALI_OSK_ERR_NOMEM; ++ goto failed_to_alloc_reservation_object_list; + } ++ } + -+ notobjs = (_mali_osk_notification_t **)_mali_osk_malloc(sizeof(_mali_osk_notification_t *) * num_sessions_alloc); -+ if (NULL == notobjs) { -+ MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure)\n")); -+ return; ++ /* Add the dma reservation object into reservation_object_list*/ ++ for (i = 0; i < num_memory_cookies; i++) { ++ mali_mem_backend *mem_backend = NULL; ++ struct reservation_object *tmp_reservation_object = NULL; ++ u32 mali_addr = mali_pp_job_get_memory_cookie(job, i); ++ ++ mem_backend = mali_mem_backend_struct_search(session, mali_addr); ++ ++ MALI_DEBUG_ASSERT_POINTER(mem_backend); ++ ++ if (NULL == mem_backend) { ++ MALI_PRINT_ERROR(("Failed to find the memory backend for memory cookie[%d].\n", i)); ++ goto failed_to_find_mem_backend; + } + -+ for (i = 0; i < num_sessions_alloc; i++) { -+ notobjs[i] = _mali_osk_notification_create(_MALI_NOTIFICATION_SETTINGS_CHANGED, -+ sizeof(_mali_uk_settings_changed_s)); -+ if (NULL != notobjs[i]) { -+ _mali_uk_settings_changed_s *data; -+ data = notobjs[i]->result_buffer; ++ if (MALI_MEM_DMA_BUF != mem_backend->type) ++ continue; + -+ data->setting = setting; -+ data->value = value; -+ } else { -+ MALI_PRINT_ERROR(("Failed to notify user space session about setting change (alloc failure %u)\n", i)); -+ } ++ tmp_reservation_object = mem_backend->dma_buf.attachment->buf->resv; ++ ++ if (NULL != tmp_reservation_object) { ++ mali_dma_fence_add_reservation_object_list(tmp_reservation_object, ++ reservation_object_list, &num_reservation_object); + } ++ } + -+ mali_session_lock(); ++ /* ++ * Add the mali dma fence callback to wait for all dependent dma buf, ++ * and extend the timeline system to support dma fence, ++ * then create the new internal dma fence to replace all last dma fence for dependent dma buf. ++ */ ++ if (0 < num_reservation_object) { ++ int error; ++ int num_dma_fence_waiter = 0; ++ /* Create one new dma fence.*/ ++ job->rendered_dma_fence = mali_dma_fence_new(job->session->fence_context, ++ _mali_osk_atomic_inc_return(&job->session->fence_seqno)); + -+ /* number of sessions will not change while we hold the lock */ -+ num_sessions_with_lock = mali_session_get_count(); ++ if (NULL == job->rendered_dma_fence) { ++ MALI_PRINT_ERROR(("Failed to creat one new dma fence.\n")); ++ ret = _MALI_OSK_ERR_FAULT; ++ goto failed_to_create_dma_fence; ++ } + -+ if (num_sessions_alloc >= num_sessions_with_lock) { -+ /* We have allocated enough notification objects for all the sessions atm */ -+ struct mali_session_data *session, *tmp; -+ MALI_SESSION_FOREACH(session, tmp, link) { -+ MALI_DEBUG_ASSERT(used_notification_objects < num_sessions_alloc); -+ if (NULL != notobjs[used_notification_objects]) { -+ mali_session_send_notification(session, notobjs[used_notification_objects]); -+ notobjs[used_notification_objects] = NULL; /* Don't track this notification object any more */ -+ } -+ used_notification_objects++; -+ } -+ done = MALI_TRUE; ++ /* In order to avoid deadlock, wait/wound mutex lock to lock all dma buffers*/ ++ ++ error = mali_dma_fence_lock_reservation_object_list(reservation_object_list, ++ num_reservation_object, &ww_actx); ++ ++ if (0 != error) { ++ MALI_PRINT_ERROR(("Failed to lock all reservation objects.\n")); ++ ret = _MALI_OSK_ERR_FAULT; ++ goto failed_to_lock_reservation_object_list; + } + -+ mali_session_unlock(); ++ mali_dma_fence_context_init(&job->dma_fence_context, ++ mali_timeline_dma_fence_callback, (void *)job); + -+ /* Delete any remaining/unused notification objects */ -+ for (; used_notification_objects < num_sessions_alloc; used_notification_objects++) { -+ if (NULL != notobjs[used_notification_objects]) { -+ _mali_osk_notification_delete(notobjs[used_notification_objects]); ++ /* Add dma fence waiters and dma fence callback. */ ++ for (i = 0; i < num_reservation_object; i++) { ++ ret = mali_dma_fence_context_add_waiters(&job->dma_fence_context, reservation_object_list[i]); ++ if (_MALI_OSK_ERR_OK != ret) { ++ MALI_PRINT_ERROR(("Failed to add waiter into mali dma fence context.\n")); ++ goto failed_to_add_dma_fence_waiter; + } + } + -+ _mali_osk_free(notobjs); -+ } -+} ++ for (i = 0; i < num_reservation_object; i++) { ++ reservation_object_add_excl_fence(reservation_object_list[i], job->rendered_dma_fence); ++ } + -+void mali_set_user_setting(_mali_uk_user_setting_t setting, u32 value) -+{ -+ mali_bool notify = MALI_FALSE; ++ num_dma_fence_waiter = job->dma_fence_context.num_dma_fence_waiter; + -+ if (setting >= _MALI_UK_USER_SETTING_MAX) { -+ MALI_DEBUG_PRINT_ERROR(("Invalid user setting %ud\n")); -+ return; -+ } ++ /* Add job to Timeline system. */ ++ (*point) = mali_timeline_system_add_tracker(session->timeline_system, ++ mali_pp_job_get_tracker(job), MALI_TIMELINE_PP); + -+ if (mali_user_settings[setting] != value) { -+ notify = MALI_TRUE; ++ if (0 != num_dma_fence_waiter) { ++ mali_dma_fence_context_dec_count(&job->dma_fence_context); ++ } ++ ++ /* Unlock all wait/wound mutex lock. */ ++ mali_dma_fence_unlock_reservation_object_list(reservation_object_list, ++ num_reservation_object, &ww_actx); ++ } else { ++ /* Add job to Timeline system. */ ++ (*point) = mali_timeline_system_add_tracker(session->timeline_system, ++ mali_pp_job_get_tracker(job), MALI_TIMELINE_PP); + } + -+ mali_user_settings[setting] = value; ++ kfree(reservation_object_list); ++ return ret; ++#else ++ /* Add job to Timeline system. */ ++ (*point) = mali_timeline_system_add_tracker(session->timeline_system, ++ mali_pp_job_get_tracker(job), MALI_TIMELINE_PP); ++#endif + -+ if (notify) { -+ mali_user_settings_notify(setting, value); -+ } ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) ++failed_to_add_dma_fence_waiter: ++ mali_dma_fence_context_term(&job->dma_fence_context); ++ mali_dma_fence_unlock_reservation_object_list(reservation_object_list, ++ num_reservation_object, &ww_actx); ++failed_to_lock_reservation_object_list: ++ mali_dma_fence_signal_and_put(&job->rendered_dma_fence); ++failed_to_create_dma_fence: ++failed_to_find_mem_backend: ++ if (NULL != reservation_object_list) ++ kfree(reservation_object_list); ++failed_to_alloc_reservation_object_list: ++ mali_pp_job_fb_lookup_remove(job); ++#endif ++ return ret; +} + -+u32 mali_get_user_setting(_mali_uk_user_setting_t setting) ++static mali_bool mali_scheduler_queue_gp_job(struct mali_gp_job *job) +{ -+ if (setting >= _MALI_UK_USER_SETTING_MAX) { -+ return 0; -+ } ++ struct mali_session_data *session; ++ _mali_osk_list_t *queue; + -+ return mali_user_settings[setting]; -+} ++ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++ MALI_DEBUG_ASSERT_POINTER(job); + -+_mali_osk_errcode_t _mali_ukk_get_user_setting(_mali_uk_get_user_setting_s *args) -+{ -+ _mali_uk_user_setting_t setting; -+ MALI_DEBUG_ASSERT_POINTER(args); ++ session = mali_gp_job_get_session(job); ++ MALI_DEBUG_ASSERT_POINTER(session); + -+ setting = args->setting; ++ if (unlikely(session->is_aborting)) { ++ MALI_DEBUG_PRINT(4, ("Mali GP scheduler: Job %u (0x%08X) queued while session is aborting.\n", ++ mali_gp_job_get_id(job), job)); ++ return MALI_FALSE; /* job not queued */ ++ } + -+ if (_MALI_UK_USER_SETTING_MAX > setting) { -+ args->value = mali_user_settings[setting]; -+ return _MALI_OSK_ERR_OK; ++ mali_gp_job_set_cache_order(job, mali_scheduler_get_new_cache_order()); ++ ++ /* Determine which queue the job should be added to. */ ++ if (session->use_high_priority_job_queue) { ++ queue = &job_queue_gp.high_pri; + } else { -+ return _MALI_OSK_ERR_INVALID_ARGS; ++ queue = &job_queue_gp.normal_pri; + } -+} + -+_mali_osk_errcode_t _mali_ukk_get_user_settings(_mali_uk_get_user_settings_s *args) -+{ -+ MALI_DEBUG_ASSERT_POINTER(args); ++ job_queue_gp.depth += 1; ++ job_queue_gp.big_job_num += (job->big_job) ? 1 : 0; + -+ _mali_osk_memcpy(args->settings, mali_user_settings, sizeof(mali_user_settings)); ++ /* Add job to queue (mali_gp_job_queue_add find correct place). */ ++ mali_gp_job_list_add(job, queue); + -+ return _MALI_OSK_ERR_OK; -+} -diff --git a/drivers/gpu/arm/mali400/mali/common/mali_user_settings_db.h b/drivers/gpu/arm/mali400/mali/common/mali_user_settings_db.h -new file mode 100644 -index 000000000..da9c0630e ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/common/mali_user_settings_db.h -@@ -0,0 +1,39 @@ -+/** -+ * Copyright (C) 2012-2013, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ /* ++ * We hold a PM reference for every job we hold queued (and running) ++ * It is important that we take this reference after job has been ++ * added the the queue so that any runtime resume could schedule this ++ * job right there and then. ++ */ ++ _mali_osk_pm_dev_ref_get_async(); + -+#ifndef __MALI_USER_SETTINGS_DB_H__ -+#define __MALI_USER_SETTINGS_DB_H__ ++ if (mali_utilization_enabled()) { ++ /* ++ * We cheat a little bit by counting the GP as busy from the ++ * time a GP job is queued. This will be fine because we only ++ * loose the tiny idle gap between jobs, but we will instead ++ * get less utilization work to do (less locks taken) ++ */ ++ mali_utilization_gp_start(); ++ } + -+#ifdef __cplusplus -+extern "C" { -+#endif ++ mali_pm_record_gpu_active(MALI_TRUE); + -+#include "mali_uk_types.h" ++ /* Add profiling events for job enqueued */ ++ _mali_osk_profiling_add_event( ++ MALI_PROFILING_EVENT_TYPE_SINGLE | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_GP_ENQUEUE, ++ mali_gp_job_get_pid(job), ++ mali_gp_job_get_tid(job), ++ mali_gp_job_get_frame_builder_id(job), ++ mali_gp_job_get_flush_id(job), ++ 0); + -+/** @brief Set Mali user setting in DB -+ * -+ * Update the DB with a new value for \a setting. If the value is different from theprevious set value running sessions will be notified of the change. -+ * -+ * @param setting the setting to be changed -+ * @param value the new value to set -+ */ -+void mali_set_user_setting(_mali_uk_user_setting_t setting, u32 value); ++#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS) ++ trace_gpu_job_enqueue(mali_gp_job_get_tid(job), ++ mali_gp_job_get_id(job), "GP"); ++#endif + -+/** @brief Get current Mali user setting value from DB -+ * -+ * @param setting the setting to extract -+ * @return the value of the selected setting -+ */ -+u32 mali_get_user_setting(_mali_uk_user_setting_t setting); ++ MALI_DEBUG_PRINT(3, ("Mali GP scheduler: Job %u (0x%08X) queued\n", ++ mali_gp_job_get_id(job), job)); + -+#ifdef __cplusplus ++ return MALI_TRUE; /* job queued */ +} -+#endif -+#endif /* __MALI_KERNEL_USER_SETTING__ */ -diff --git a/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard.h b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard.h -new file mode 100644 -index 000000000..7df55c951 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard.h -@@ -0,0 +1,526 @@ -+/* -+ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ + -+/** -+ * @file mali_utgard.h -+ * Defines types and interface exposed by the Mali Utgard device driver -+ */ ++static mali_bool mali_scheduler_queue_pp_job(struct mali_pp_job *job) ++{ ++ struct mali_session_data *session; ++ _mali_osk_list_t *queue = NULL; + -+#ifndef __MALI_UTGARD_H__ -+#define __MALI_UTGARD_H__ ++ MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD(); ++ MALI_DEBUG_ASSERT_POINTER(job); + -+#include "mali_osk_types.h" -+#ifdef CONFIG_MALI_DEVFREQ -+#include -+#include "mali_pm_metrics.h" -+#ifdef CONFIG_DEVFREQ_THERMAL -+#include -+#endif -+#endif ++ session = mali_pp_job_get_session(job); ++ MALI_DEBUG_ASSERT_POINTER(session); + -+#define MALI_GPU_NAME_UTGARD "mali-utgard" ++ if (unlikely(session->is_aborting)) { ++ MALI_DEBUG_PRINT(2, ("Mali PP scheduler: Job %u (0x%08X) queued while session is aborting.\n", ++ mali_pp_job_get_id(job), job)); ++ return MALI_FALSE; /* job not queued */ ++ } else if (unlikely(MALI_SWAP_IN_FAIL == job->swap_status)) { ++ MALI_DEBUG_PRINT(2, ("Mali PP scheduler: Job %u (0x%08X) queued while swap in failed.\n", ++ mali_pp_job_get_id(job), job)); ++ return MALI_FALSE; ++ } + ++ mali_pp_job_set_cache_order(job, mali_scheduler_get_new_cache_order()); + -+#define MALI_OFFSET_GP 0x00000 -+#define MALI_OFFSET_GP_MMU 0x03000 ++ if (session->use_high_priority_job_queue) { ++ queue = &job_queue_pp.high_pri; ++ } else { ++ queue = &job_queue_pp.normal_pri; ++ } + -+#define MALI_OFFSET_PP0 0x08000 -+#define MALI_OFFSET_PP0_MMU 0x04000 -+#define MALI_OFFSET_PP1 0x0A000 -+#define MALI_OFFSET_PP1_MMU 0x05000 -+#define MALI_OFFSET_PP2 0x0C000 -+#define MALI_OFFSET_PP2_MMU 0x06000 -+#define MALI_OFFSET_PP3 0x0E000 -+#define MALI_OFFSET_PP3_MMU 0x07000 ++ job_queue_pp.depth += ++ mali_pp_job_get_sub_job_count(job); + -+#define MALI_OFFSET_PP4 0x28000 -+#define MALI_OFFSET_PP4_MMU 0x1C000 -+#define MALI_OFFSET_PP5 0x2A000 -+#define MALI_OFFSET_PP5_MMU 0x1D000 -+#define MALI_OFFSET_PP6 0x2C000 -+#define MALI_OFFSET_PP6_MMU 0x1E000 -+#define MALI_OFFSET_PP7 0x2E000 -+#define MALI_OFFSET_PP7_MMU 0x1F000 ++ /* Add job to queue (mali_gp_job_queue_add find correct place). */ ++ mali_pp_job_list_add(job, queue); + -+#define MALI_OFFSET_L2_RESOURCE0 0x01000 -+#define MALI_OFFSET_L2_RESOURCE1 0x10000 -+#define MALI_OFFSET_L2_RESOURCE2 0x11000 ++ /* ++ * We hold a PM reference for every job we hold queued (and running) ++ * It is important that we take this reference after job has been ++ * added the the queue so that any runtime resume could schedule this ++ * job right there and then. ++ */ ++ _mali_osk_pm_dev_ref_get_async(); + -+#define MALI400_OFFSET_L2_CACHE0 MALI_OFFSET_L2_RESOURCE0 -+#define MALI450_OFFSET_L2_CACHE0 MALI_OFFSET_L2_RESOURCE1 -+#define MALI450_OFFSET_L2_CACHE1 MALI_OFFSET_L2_RESOURCE0 -+#define MALI450_OFFSET_L2_CACHE2 MALI_OFFSET_L2_RESOURCE2 -+#define MALI470_OFFSET_L2_CACHE1 MALI_OFFSET_L2_RESOURCE0 ++ if (mali_utilization_enabled()) { ++ /* ++ * We cheat a little bit by counting the PP as busy from the ++ * time a PP job is queued. This will be fine because we only ++ * loose the tiny idle gap between jobs, but we will instead ++ * get less utilization work to do (less locks taken) ++ */ ++ mali_utilization_pp_start(); ++ } + -+#define MALI_OFFSET_BCAST 0x13000 -+#define MALI_OFFSET_DLBU 0x14000 ++ mali_pm_record_gpu_active(MALI_FALSE); + -+#define MALI_OFFSET_PP_BCAST 0x16000 -+#define MALI_OFFSET_PP_BCAST_MMU 0x15000 ++ /* Add profiling events for job enqueued */ ++ _mali_osk_profiling_add_event( ++ MALI_PROFILING_EVENT_TYPE_SINGLE | ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_PP_ENQUEUE, ++ mali_pp_job_get_pid(job), ++ mali_pp_job_get_tid(job), ++ mali_pp_job_get_frame_builder_id(job), ++ mali_pp_job_get_flush_id(job), ++ 0); + -+#define MALI_OFFSET_PMU 0x02000 -+#define MALI_OFFSET_DMA 0x12000 ++#if defined(CONFIG_GPU_TRACEPOINTS) && defined(CONFIG_TRACEPOINTS) ++ trace_gpu_job_enqueue(mali_pp_job_get_tid(job), ++ mali_pp_job_get_id(job), "PP"); ++#endif + -+/* Mali-300 */ ++ MALI_DEBUG_PRINT(3, ("Mali PP scheduler: %s job %u (0x%08X) with %u parts queued.\n", ++ mali_pp_job_is_virtual(job) ++ ? "Virtual" : "Physical", ++ mali_pp_job_get_id(job), job, ++ mali_pp_job_get_sub_job_count(job))); + -+#define MALI_GPU_RESOURCES_MALI300(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) \ -+ MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) ++ return MALI_TRUE; /* job queued */ ++} + -+#define MALI_GPU_RESOURCES_MALI300_PMU(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) \ -+ MALI_GPU_RESOURCES_MALI400_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) ++static void mali_scheduler_return_gp_job_to_user(struct mali_gp_job *job, ++ mali_bool success) ++{ ++ _mali_uk_gp_job_finished_s *jobres; ++ struct mali_session_data *session; ++ _mali_osk_notification_t *notification; + -+/* Mali-400 */ ++ MALI_DEBUG_ASSERT_POINTER(job); + -+#define MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \ -+ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) ++ session = mali_gp_job_get_session(job); ++ MALI_DEBUG_ASSERT_POINTER(session); + -+#define MALI_GPU_RESOURCES_MALI400_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \ -+ MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \ -+ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) ++ notification = mali_gp_job_get_finished_notification(job); ++ MALI_DEBUG_ASSERT_POINTER(notification); + -+#define MALI_GPU_RESOURCES_MALI400_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \ -+ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) ++ jobres = notification->result_buffer; ++ MALI_DEBUG_ASSERT_POINTER(jobres); + -+#define MALI_GPU_RESOURCES_MALI400_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \ -+ MALI_GPU_RESOURCES_MALI400_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \ -+ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) ++ jobres->pending_big_job_num = mali_scheduler_job_gp_big_job_count(); + -+#define MALI_GPU_RESOURCES_MALI400_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \ -+ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) ++ jobres->user_job_ptr = mali_gp_job_get_user_id(job); ++ if (MALI_TRUE == success) { ++ jobres->status = _MALI_UK_JOB_STATUS_END_SUCCESS; ++ } else { ++ jobres->status = _MALI_UK_JOB_STATUS_END_UNKNOWN_ERR; ++ } ++ jobres->heap_current_addr = mali_gp_job_get_current_heap_addr(job); ++ jobres->perf_counter0 = mali_gp_job_get_perf_counter_value0(job); ++ jobres->perf_counter1 = mali_gp_job_get_perf_counter_value1(job); + -+#define MALI_GPU_RESOURCES_MALI400_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \ -+ MALI_GPU_RESOURCES_MALI400_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \ -+ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) ++ mali_session_send_notification(session, notification); ++} + -+#define MALI_GPU_RESOURCES_MALI400_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \ -+ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) ++void mali_scheduler_return_pp_job_to_user(struct mali_pp_job *job, ++ u32 num_cores_in_virtual) ++{ ++ u32 i; ++ u32 num_counters_to_copy; ++ _mali_uk_pp_job_finished_s *jobres; ++ struct mali_session_data *session; ++ _mali_osk_notification_t *notification; + -+#define MALI_GPU_RESOURCES_MALI400_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \ -+ MALI_GPU_RESOURCES_MALI400_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \ -+ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ ++ if (MALI_TRUE == mali_pp_job_use_no_notification(job)) { ++ return; ++ } + -+ /* Mali-450 */ -+#define MALI_GPU_RESOURCES_MALI450_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \ -+ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ -+ MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \ -+ MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \ -+ MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \ -+ MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA) ++ MALI_DEBUG_ASSERT_POINTER(job); + -+#define MALI_GPU_RESOURCES_MALI450_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCES_MALI450_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ ++ session = mali_pp_job_get_session(job); ++ MALI_DEBUG_ASSERT_POINTER(session); + -+#define MALI_GPU_RESOURCES_MALI450_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \ -+ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \ -+ MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \ -+ MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \ -+ MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) ++ notification = mali_pp_job_get_finished_notification(job); ++ MALI_DEBUG_ASSERT_POINTER(notification); + -+#define MALI_GPU_RESOURCES_MALI450_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCES_MALI450_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ ++ jobres = notification->result_buffer; ++ MALI_DEBUG_ASSERT_POINTER(jobres); + -+#define MALI_GPU_RESOURCES_MALI450_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \ -+ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \ -+ MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \ -+ MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \ -+ MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \ -+ MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA) ++ jobres->user_job_ptr = mali_pp_job_get_user_id(job); ++ if (MALI_TRUE == mali_pp_job_was_success(job)) { ++ jobres->status = _MALI_UK_JOB_STATUS_END_SUCCESS; ++ } else { ++ jobres->status = _MALI_UK_JOB_STATUS_END_UNKNOWN_ERR; ++ } + -+#define MALI_GPU_RESOURCES_MALI450_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCES_MALI450_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ ++ if (mali_pp_job_is_virtual(job)) { ++ num_counters_to_copy = num_cores_in_virtual; ++ } else { ++ num_counters_to_copy = mali_pp_job_get_sub_job_count(job); ++ } + -+#define MALI_GPU_RESOURCES_MALI450_MP6(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \ -+ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE2) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP4, pp3_irq, base_addr + MALI_OFFSET_PP4_MMU, pp3_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(4, base_addr + MALI_OFFSET_PP5, pp4_irq, base_addr + MALI_OFFSET_PP5_MMU, pp4_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(5, base_addr + MALI_OFFSET_PP6, pp5_irq, base_addr + MALI_OFFSET_PP6_MMU, pp5_mmu_irq) \ -+ MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \ -+ MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \ -+ MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \ -+ MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA) ++ for (i = 0; i < num_counters_to_copy; i++) { ++ jobres->perf_counter0[i] = ++ mali_pp_job_get_perf_counter_value0(job, i); ++ jobres->perf_counter1[i] = ++ mali_pp_job_get_perf_counter_value1(job, i); ++ jobres->perf_counter_src0 = ++ mali_pp_job_get_pp_counter_global_src0(); ++ jobres->perf_counter_src1 = ++ mali_pp_job_get_pp_counter_global_src1(); ++ } + -+#define MALI_GPU_RESOURCES_MALI450_MP6_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCES_MALI450_MP6(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ ++ mali_session_send_notification(session, notification); ++} + -+#define MALI_GPU_RESOURCES_MALI450_MP8(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \ -+ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE2) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(4, base_addr + MALI_OFFSET_PP4, pp4_irq, base_addr + MALI_OFFSET_PP4_MMU, pp4_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(5, base_addr + MALI_OFFSET_PP5, pp5_irq, base_addr + MALI_OFFSET_PP5_MMU, pp5_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(6, base_addr + MALI_OFFSET_PP6, pp6_irq, base_addr + MALI_OFFSET_PP6_MMU, pp6_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(7, base_addr + MALI_OFFSET_PP7, pp7_irq, base_addr + MALI_OFFSET_PP7_MMU, pp7_mmu_irq) \ -+ MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \ -+ MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \ -+ MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \ -+ MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA) ++static void mali_scheduler_deferred_pp_job_delete(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); + -+#define MALI_GPU_RESOURCES_MALI450_MP8_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCES_MALI450_MP8(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ ++ _mali_osk_spinlock_irq_lock(scheduler_pp_job_delete_lock); ++ mali_pp_job_list_addtail(job, &scheduler_pp_job_deletion_queue); ++ _mali_osk_spinlock_irq_unlock(scheduler_pp_job_delete_lock); + -+ /* Mali - 470 */ -+#define MALI_GPU_RESOURCES_MALI470_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \ -+ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ -+ MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \ -+ MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \ -+ MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) ++ _mali_osk_wq_schedule_work(scheduler_wq_pp_job_delete); ++} + -+#define MALI_GPU_RESOURCES_MALI470_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCES_MALI470_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ ++void mali_scheduler_do_pp_job_delete(void *arg) ++{ ++ _MALI_OSK_LIST_HEAD_STATIC_INIT(list); ++ struct mali_pp_job *job; ++ struct mali_pp_job *tmp; + -+#define MALI_GPU_RESOURCES_MALI470_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \ -+ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ -+ MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \ -+ MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \ -+ MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) ++ MALI_IGNORE(arg); + -+#define MALI_GPU_RESOURCES_MALI470_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCES_MALI470_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ ++ /* ++ * Quickly "unhook" the jobs pending to be deleted, so we can release ++ * the lock before we start deleting the job objects ++ * (without any locks held) ++ */ ++ _mali_osk_spinlock_irq_lock(scheduler_pp_job_delete_lock); ++ _mali_osk_list_move_list(&scheduler_pp_job_deletion_queue, &list); ++ _mali_osk_spinlock_irq_unlock(scheduler_pp_job_delete_lock); + -+#define MALI_GPU_RESOURCES_MALI470_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \ -+ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \ -+ MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \ -+ MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \ -+ MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) ++ _MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list, ++ struct mali_pp_job, list) { ++ _mali_osk_list_delinit(&job->list); + -+#define MALI_GPU_RESOURCES_MALI470_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCES_MALI470_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) ++ mali_dma_fence_context_term(&job->dma_fence_context); ++#endif + -+#define MALI_GPU_RESOURCES_MALI470_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \ -+ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \ -+ MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \ -+ MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \ -+ MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \ -+ MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) ++ mali_pp_job_delete(job); /* delete the job object itself */ ++ } ++} + -+#define MALI_GPU_RESOURCES_MALI470_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCES_MALI470_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \ -+ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ ++#if defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) + -+#define MALI_GPU_RESOURCE_L2(addr) \ -+ { \ -+ .name = "Mali_L2", \ -+ .flags = IORESOURCE_MEM, \ -+ .start = addr, \ -+ .end = addr + 0x200, \ -+ }, ++static void mali_scheduler_deferred_pp_job_queue(struct mali_pp_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); + -+#define MALI_GPU_RESOURCE_GP(gp_addr, gp_irq) \ -+ { \ -+ .name = "Mali_GP", \ -+ .flags = IORESOURCE_MEM, \ -+ .start = gp_addr, \ -+ .end = gp_addr + 0x100, \ -+ }, \ -+ { \ -+ .name = "Mali_GP_IRQ", \ -+ .flags = IORESOURCE_IRQ, \ -+ .start = gp_irq, \ -+ .end = gp_irq, \ -+ }, \ -+ -+#define MALI_GPU_RESOURCE_GP_WITH_MMU(gp_addr, gp_irq, gp_mmu_addr, gp_mmu_irq) \ -+ { \ -+ .name = "Mali_GP", \ -+ .flags = IORESOURCE_MEM, \ -+ .start = gp_addr, \ -+ .end = gp_addr + 0x100, \ -+ }, \ -+ { \ -+ .name = "Mali_GP_IRQ", \ -+ .flags = IORESOURCE_IRQ, \ -+ .start = gp_irq, \ -+ .end = gp_irq, \ -+ }, \ -+ { \ -+ .name = "Mali_GP_MMU", \ -+ .flags = IORESOURCE_MEM, \ -+ .start = gp_mmu_addr, \ -+ .end = gp_mmu_addr + 0x100, \ -+ }, \ -+ { \ -+ .name = "Mali_GP_MMU_IRQ", \ -+ .flags = IORESOURCE_IRQ, \ -+ .start = gp_mmu_irq, \ -+ .end = gp_mmu_irq, \ -+ }, -+ -+#define MALI_GPU_RESOURCE_PP(pp_addr, pp_irq) \ -+ { \ -+ .name = "Mali_PP", \ -+ .flags = IORESOURCE_MEM, \ -+ .start = pp_addr, \ -+ .end = pp_addr + 0x1100, \ -+ }, \ -+ { \ -+ .name = "Mali_PP_IRQ", \ -+ .flags = IORESOURCE_IRQ, \ -+ .start = pp_irq, \ -+ .end = pp_irq, \ -+ }, \ ++ _mali_osk_spinlock_irq_lock(scheduler_pp_job_queue_lock); ++ mali_pp_job_list_addtail(job, &scheduler_pp_job_queue_list); ++ _mali_osk_spinlock_irq_unlock(scheduler_pp_job_queue_lock); + -+#define MALI_GPU_RESOURCE_PP_WITH_MMU(id, pp_addr, pp_irq, pp_mmu_addr, pp_mmu_irq) \ -+ { \ -+ .name = "Mali_PP" #id, \ -+ .flags = IORESOURCE_MEM, \ -+ .start = pp_addr, \ -+ .end = pp_addr + 0x1100, \ -+ }, \ -+ { \ -+ .name = "Mali_PP" #id "_IRQ", \ -+ .flags = IORESOURCE_IRQ, \ -+ .start = pp_irq, \ -+ .end = pp_irq, \ -+ }, \ -+ { \ -+ .name = "Mali_PP" #id "_MMU", \ -+ .flags = IORESOURCE_MEM, \ -+ .start = pp_mmu_addr, \ -+ .end = pp_mmu_addr + 0x100, \ -+ }, \ -+ { \ -+ .name = "Mali_PP" #id "_MMU_IRQ", \ -+ .flags = IORESOURCE_IRQ, \ -+ .start = pp_mmu_irq, \ -+ .end = pp_mmu_irq, \ -+ }, ++ _mali_osk_wq_schedule_work(scheduler_wq_pp_job_queue); ++} + -+#define MALI_GPU_RESOURCE_MMU(mmu_addr, mmu_irq) \ -+ { \ -+ .name = "Mali_MMU", \ -+ .flags = IORESOURCE_MEM, \ -+ .start = mmu_addr, \ -+ .end = mmu_addr + 0x100, \ -+ }, \ -+ { \ -+ .name = "Mali_MMU_IRQ", \ -+ .flags = IORESOURCE_IRQ, \ -+ .start = mmu_irq, \ -+ .end = mmu_irq, \ -+ }, ++static void mali_scheduler_do_pp_job_queue(void *arg) ++{ ++ _MALI_OSK_LIST_HEAD_STATIC_INIT(list); ++ struct mali_pp_job *job; ++ struct mali_pp_job *tmp; ++ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; + -+#define MALI_GPU_RESOURCE_PMU(pmu_addr) \ -+ { \ -+ .name = "Mali_PMU", \ -+ .flags = IORESOURCE_MEM, \ -+ .start = pmu_addr, \ -+ .end = pmu_addr + 0x100, \ -+ }, ++ MALI_IGNORE(arg); + -+#define MALI_GPU_RESOURCE_DMA(dma_addr) \ -+ { \ -+ .name = "Mali_DMA", \ -+ .flags = IORESOURCE_MEM, \ -+ .start = dma_addr, \ -+ .end = dma_addr + 0x100, \ -+ }, ++ /* ++ * Quickly "unhook" the jobs pending to be queued, so we can release ++ * the lock before we start queueing the job objects ++ * (without any locks held) ++ */ ++ _mali_osk_spinlock_irq_lock(scheduler_pp_job_queue_lock); ++ _mali_osk_list_move_list(&scheduler_pp_job_queue_list, &list); ++ _mali_osk_spinlock_irq_unlock(scheduler_pp_job_queue_lock); + -+#define MALI_GPU_RESOURCE_DLBU(dlbu_addr) \ -+ { \ -+ .name = "Mali_DLBU", \ -+ .flags = IORESOURCE_MEM, \ -+ .start = dlbu_addr, \ -+ .end = dlbu_addr + 0x100, \ -+ }, ++ /* First loop through all jobs and do the pre-work (no locks needed) */ ++ _MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list, ++ struct mali_pp_job, list) { ++ if (mali_pp_job_needs_dma_buf_mapping(job)) { ++ /* ++ * This operation could fail, but we continue anyway, ++ * because the worst that could happen is that this ++ * job will fail due to a Mali page fault. ++ */ ++ mali_dma_buf_map_job(job); ++ } ++ } + -+#define MALI_GPU_RESOURCE_BCAST(bcast_addr) \ -+ { \ -+ .name = "Mali_Broadcast", \ -+ .flags = IORESOURCE_MEM, \ -+ .start = bcast_addr, \ -+ .end = bcast_addr + 0x100, \ -+ }, ++ mali_scheduler_lock(); + -+#define MALI_GPU_RESOURCE_PP_BCAST(pp_addr, pp_irq) \ -+ { \ -+ .name = "Mali_PP_Broadcast", \ -+ .flags = IORESOURCE_MEM, \ -+ .start = pp_addr, \ -+ .end = pp_addr + 0x1100, \ -+ }, \ -+ { \ -+ .name = "Mali_PP_Broadcast_IRQ", \ -+ .flags = IORESOURCE_IRQ, \ -+ .start = pp_irq, \ -+ .end = pp_irq, \ -+ }, \ ++ /* Then loop through all jobs again to queue them (lock needed) */ ++ _MALI_OSK_LIST_FOREACHENTRY(job, tmp, &list, ++ struct mali_pp_job, list) { + -+#define MALI_GPU_RESOURCE_PP_MMU_BCAST(pp_mmu_bcast_addr) \ -+ { \ -+ .name = "Mali_PP_MMU_Broadcast", \ -+ .flags = IORESOURCE_MEM, \ -+ .start = pp_mmu_bcast_addr, \ -+ .end = pp_mmu_bcast_addr + 0x100, \ -+ }, ++ /* Remove from scheduler_pp_job_queue_list before queueing */ ++ mali_pp_job_list_remove(job); + -+ struct mali_gpu_utilization_data { -+ unsigned int utilization_gpu; /* Utilization for GP and all PP cores combined, 0 = no utilization, 256 = full utilization */ -+ unsigned int utilization_gp; /* Utilization for GP core only, 0 = no utilization, 256 = full utilization */ -+ unsigned int utilization_pp; /* Utilization for all PP cores combined, 0 = no utilization, 256 = full utilization */ -+ }; ++ if (mali_scheduler_queue_pp_job(job)) { ++ /* Job queued successfully */ ++ schedule_mask |= MALI_SCHEDULER_MASK_PP; ++ } else { ++ /* Failed to enqueue job, release job (with error) */ ++ mali_pp_job_fb_lookup_remove(job); ++ mali_pp_job_mark_unstarted_failed(job); + -+ struct mali_gpu_clk_item { -+ unsigned int clock; /* unit(MHz) */ -+ unsigned int vol; -+ }; ++ /* unlock scheduler in this uncommon case */ ++ mali_scheduler_unlock(); + -+ struct mali_gpu_clock { -+ struct mali_gpu_clk_item *item; -+ unsigned int num_of_steps; -+ }; ++ schedule_mask |= mali_timeline_tracker_release( ++ mali_pp_job_get_tracker(job)); + -+ struct mali_gpu_device_data { -+ /* Shared GPU memory */ -+ unsigned long shared_mem_size; ++ /* Notify user space and close the job object */ ++ mali_scheduler_complete_pp_job(job, 0, MALI_TRUE, ++ MALI_FALSE); + -+ /* -+ * Mali PMU switch delay. -+ * Only needed if the power gates are connected to the PMU in a high fanout -+ * network. This value is the number of Mali clock cycles it takes to -+ * enable the power gates and turn on the power mesh. -+ * This value will have no effect if a daisy chain implementation is used. -+ */ -+ u32 pmu_switch_delay; ++ mali_scheduler_lock(); ++ } ++ } + -+ /* Mali Dynamic power domain configuration in sequence from 0-11 -+ * GP PP0 PP1 PP2 PP3 PP4 PP5 PP6 PP7, L2$0 L2$1 L2$2 -+ */ -+ u16 pmu_domain_config[12]; ++ mali_scheduler_unlock(); + -+ /* Dedicated GPU memory range (physical). */ -+ unsigned long dedicated_mem_start; -+ unsigned long dedicated_mem_size; ++ /* Trigger scheduling of jobs */ ++ mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE); ++} + -+ /* Frame buffer memory to be accessible by Mali GPU (physical) */ -+ unsigned long fb_start; -+ unsigned long fb_size; ++#endif /* defined(MALI_SCHEDULER_USE_DEFERRED_PP_JOB_QUEUE) */ + -+ /* Max runtime [ms] for jobs */ -+ int max_job_runtime; ++void mali_scheduler_gp_pp_job_queue_print(void) ++{ ++ struct mali_gp_job *gp_job = NULL; ++ struct mali_gp_job *tmp_gp_job = NULL; ++ struct mali_pp_job *pp_job = NULL; ++ struct mali_pp_job *tmp_pp_job = NULL; + -+ /* Report GPU utilization and related control in this interval (specified in ms) */ -+ unsigned long control_interval; ++ MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj); ++ MALI_DEBUG_ASSERT_LOCK_HELD(mali_executor_lock_obj); + -+ /* Function that will receive periodic GPU utilization numbers */ -+ void (*utilization_callback)(struct mali_gpu_utilization_data *data); ++ /* dump job queup status */ ++ if ((0 == job_queue_gp.depth) && (0 == job_queue_pp.depth)) { ++ MALI_PRINT(("No GP&PP job in the job queue.\n")); ++ return; ++ } + -+ /* Fuction that platform callback for freq setting, needed when CONFIG_MALI_DVFS enabled */ -+ int (*set_freq)(int setting_clock_step); -+ /* Function that platfrom report it's clock info which driver can set, needed when CONFIG_MALI_DVFS enabled */ -+ void (*get_clock_info)(struct mali_gpu_clock **data); -+ /* Function that get the current clock info, needed when CONFIG_MALI_DVFS enabled */ -+ int (*get_freq)(void); -+ /* Function that init the mali gpu secure mode */ -+ int (*secure_mode_init)(void); -+ /* Function that deinit the mali gpu secure mode */ -+ void (*secure_mode_deinit)(void); -+ /* Function that reset GPU and enable gpu secure mode */ -+ int (*gpu_reset_and_secure_mode_enable)(void); -+ /* Function that Reset GPU and disable gpu secure mode */ -+ int (*gpu_reset_and_secure_mode_disable)(void); -+ /* ipa related interface customer need register */ -+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) -+ struct devfreq_cooling_power *gpu_cooling_ops; -+#endif -+ }; ++ MALI_PRINT(("Total (%d) GP job in the job queue.\n", job_queue_gp.depth)); ++ if (job_queue_gp.depth > 0) { ++ if (!_mali_osk_list_empty(&job_queue_gp.high_pri)) { ++ _MALI_OSK_LIST_FOREACHENTRY(gp_job, tmp_gp_job, &job_queue_gp.high_pri, ++ struct mali_gp_job, list) { ++ MALI_PRINT(("GP job(%p) id = %d tid = %d pid = %d in the gp job high_pri queue\n", gp_job, gp_job->id, gp_job->tid, gp_job->pid)); ++ } ++ } + -+ /** -+ * Pause the scheduling and power state changes of Mali device driver. -+ * mali_dev_resume() must always be called as soon as possible after this function -+ * in order to resume normal operation of the Mali driver. -+ */ -+ void mali_dev_pause(void); ++ if (!_mali_osk_list_empty(&job_queue_gp.normal_pri)) { ++ _MALI_OSK_LIST_FOREACHENTRY(gp_job, tmp_gp_job, &job_queue_gp.normal_pri, ++ struct mali_gp_job, list) { ++ MALI_PRINT(("GP job(%p) id = %d tid = %d pid = %d in the gp job normal_pri queue\n", gp_job, gp_job->id, gp_job->tid, gp_job->pid)); ++ } ++ } ++ } + -+ /** -+ * Resume scheduling and allow power changes in Mali device driver. -+ * This must always be called after mali_dev_pause(). -+ */ -+ void mali_dev_resume(void); ++ MALI_PRINT(("Total (%d) PP job in the job queue.\n", job_queue_pp.depth)); ++ if (job_queue_pp.depth > 0) { ++ if (!_mali_osk_list_empty(&job_queue_pp.high_pri)) { ++ _MALI_OSK_LIST_FOREACHENTRY(pp_job, tmp_pp_job, &job_queue_pp.high_pri, ++ struct mali_pp_job, list) { ++ if (mali_pp_job_is_virtual(pp_job)) { ++ MALI_PRINT(("PP Virtual job(%p) id = %d tid = %d pid = %d in the pp job high_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid)); ++ } else { ++ MALI_PRINT(("PP Physical job(%p) id = %d tid = %d pid = %d in the pp job high_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid)); ++ } ++ } ++ } + -+ /** @brief Set the desired number of PP cores to use. -+ * -+ * The internal Mali PMU will be used, if present, to physically power off the PP cores. -+ * -+ * @param num_cores The number of desired cores -+ * @return 0 on success, otherwise error. -EINVAL means an invalid number of cores was specified. -+ */ -+ int mali_perf_set_num_pp_cores(unsigned int num_cores); ++ if (!_mali_osk_list_empty(&job_queue_pp.normal_pri)) { ++ _MALI_OSK_LIST_FOREACHENTRY(pp_job, tmp_pp_job, &job_queue_pp.normal_pri, ++ struct mali_pp_job, list) { ++ if (mali_pp_job_is_virtual(pp_job)) { ++ MALI_PRINT(("PP Virtual job(%p) id = %d tid = %d pid = %d in the pp job normal_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid)); ++ } else { ++ MALI_PRINT(("PP Physical job(%p) id = %d tid = %d pid = %d in the pp job normal_pri queue\n", pp_job, pp_job->id, pp_job->tid, pp_job->pid)); ++ } ++ } ++ } ++ } + -+#endif -diff --git a/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_ioctl.h b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_ioctl.h ++ /* dump group running job status */ ++ mali_executor_running_status_print(); ++} +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_scheduler.h b/drivers/gpu/arm/mali400/mali/common/mali_scheduler.h new file mode 100644 -index 000000000..686708eae +index 000000000..de81a421e --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_ioctl.h -@@ -0,0 +1,97 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_scheduler.h +@@ -0,0 +1,131 @@ +/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -304483,297 +307959,170 @@ index 000000000..686708eae + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_UTGARD_IOCTL_H__ -+#define __MALI_UTGARD_IOCTL_H__ ++#ifndef __MALI_SCHEDULER_H__ ++#define __MALI_SCHEDULER_H__ + -+#include -+#include -+#include /* file system operations */ ++#include "mali_osk.h" ++#include "mali_osk_list.h" ++#include "mali_scheduler_types.h" ++#include "mali_session.h" + -+#ifdef __cplusplus -+extern "C" { -+#endif ++struct mali_scheduler_job_queue { ++ _MALI_OSK_LIST_HEAD(normal_pri); /* Queued jobs with normal priority */ ++ _MALI_OSK_LIST_HEAD(high_pri); /* Queued jobs with high priority */ ++ u32 depth; /* Depth of combined queues. */ ++ u32 big_job_num; ++}; + -+/** -+ * @file mali_kernel_ioctl.h -+ * Interface to the Linux device driver. -+ * This file describes the interface needed to use the Linux device driver. -+ * Its interface is designed to used by the HAL implementation through a thin arch layer. -+ */ ++extern _mali_osk_spinlock_irq_t *mali_scheduler_lock_obj; + -+/** -+ * ioctl commands -+ */ ++/* Queue of jobs to be executed on the GP group */ ++extern struct mali_scheduler_job_queue job_queue_gp; + -+#define MALI_IOC_BASE 0x82 -+#define MALI_IOC_CORE_BASE (_MALI_UK_CORE_SUBSYSTEM + MALI_IOC_BASE) -+#define MALI_IOC_MEMORY_BASE (_MALI_UK_MEMORY_SUBSYSTEM + MALI_IOC_BASE) -+#define MALI_IOC_PP_BASE (_MALI_UK_PP_SUBSYSTEM + MALI_IOC_BASE) -+#define MALI_IOC_GP_BASE (_MALI_UK_GP_SUBSYSTEM + MALI_IOC_BASE) -+#define MALI_IOC_PROFILING_BASE (_MALI_UK_PROFILING_SUBSYSTEM + MALI_IOC_BASE) -+#define MALI_IOC_VSYNC_BASE (_MALI_UK_VSYNC_SUBSYSTEM + MALI_IOC_BASE) ++/* Queue of PP jobs */ ++extern struct mali_scheduler_job_queue job_queue_pp; + -+#define MALI_IOC_WAIT_FOR_NOTIFICATION _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_WAIT_FOR_NOTIFICATION, _mali_uk_wait_for_notification_s) -+#define MALI_IOC_GET_API_VERSION _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_API_VERSION, u32) -+#define MALI_IOC_GET_API_VERSION_V2 _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_API_VERSION, _mali_uk_get_api_version_v2_s) -+/* rk_ext. */ -+#define MALI_IOC_GET_RK_KO_VERSION _IOWR(MALI_IOC_CORE_BASE, _MALI_GET_RK_KO_VERSION, _mali_rk_ko_version_s) -+#define MALI_IOC_POST_NOTIFICATION _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_POST_NOTIFICATION, _mali_uk_post_notification_s) -+#define MALI_IOC_GET_USER_SETTING _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_USER_SETTING, _mali_uk_get_user_setting_s) -+#define MALI_IOC_GET_USER_SETTINGS _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_USER_SETTINGS, _mali_uk_get_user_settings_s) -+#define MALI_IOC_REQUEST_HIGH_PRIORITY _IOW (MALI_IOC_CORE_BASE, _MALI_UK_REQUEST_HIGH_PRIORITY, _mali_uk_request_high_priority_s) -+#define MALI_IOC_TIMELINE_GET_LATEST_POINT _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_GET_LATEST_POINT, _mali_uk_timeline_get_latest_point_s) -+#define MALI_IOC_TIMELINE_WAIT _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_WAIT, _mali_uk_timeline_wait_s) -+#define MALI_IOC_TIMELINE_CREATE_SYNC_FENCE _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_CREATE_SYNC_FENCE, _mali_uk_timeline_create_sync_fence_s) -+#define MALI_IOC_SOFT_JOB_START _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_SOFT_JOB_START, _mali_uk_soft_job_start_s) -+#define MALI_IOC_SOFT_JOB_SIGNAL _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_SOFT_JOB_SIGNAL, _mali_uk_soft_job_signal_s) -+#define MALI_IOC_PENDING_SUBMIT _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_PENDING_SUBMIT, _mali_uk_pending_submit_s) ++extern _mali_osk_atomic_t mali_job_id_autonumber; ++extern _mali_osk_atomic_t mali_job_cache_order_autonumber; + -+#define MALI_IOC_MEM_ALLOC _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_ALLOC_MEM, _mali_uk_alloc_mem_s) -+#define MALI_IOC_MEM_FREE _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_FREE_MEM, _mali_uk_free_mem_s) -+#define MALI_IOC_MEM_BIND _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_BIND_MEM, _mali_uk_bind_mem_s) -+#define MALI_IOC_MEM_UNBIND _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_UNBIND_MEM, _mali_uk_unbind_mem_s) -+#define MALI_IOC_MEM_COW _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_COW_MEM, _mali_uk_cow_mem_s) -+#define MALI_IOC_MEM_COW_MODIFY_RANGE _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_COW_MODIFY_RANGE, _mali_uk_cow_modify_range_s) -+#define MALI_IOC_MEM_RESIZE _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_RESIZE_MEM, _mali_uk_mem_resize_s) -+#define MALI_IOC_MEM_DMA_BUF_GET_SIZE _IOR(MALI_IOC_MEMORY_BASE, _MALI_UK_DMA_BUF_GET_SIZE, _mali_uk_dma_buf_get_size_s) -+#define MALI_IOC_MEM_QUERY_MMU_PAGE_TABLE_DUMP_SIZE _IOR (MALI_IOC_MEMORY_BASE, _MALI_UK_QUERY_MMU_PAGE_TABLE_DUMP_SIZE, _mali_uk_query_mmu_page_table_dump_size_s) -+#define MALI_IOC_MEM_DUMP_MMU_PAGE_TABLE _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_DUMP_MMU_PAGE_TABLE, _mali_uk_dump_mmu_page_table_s) -+#define MALI_IOC_MEM_WRITE_SAFE _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_MEM_WRITE_SAFE, _mali_uk_mem_write_safe_s) ++#define MALI_DEBUG_ASSERT_SCHEDULER_LOCK_HELD() MALI_DEBUG_ASSERT_LOCK_HELD(mali_scheduler_lock_obj); + -+#define MALI_IOC_PP_START_JOB _IOWR(MALI_IOC_PP_BASE, _MALI_UK_PP_START_JOB, _mali_uk_pp_start_job_s) -+#define MALI_IOC_PP_AND_GP_START_JOB _IOWR(MALI_IOC_PP_BASE, _MALI_UK_PP_AND_GP_START_JOB, _mali_uk_pp_and_gp_start_job_s) -+#define MALI_IOC_PP_NUMBER_OF_CORES_GET _IOR (MALI_IOC_PP_BASE, _MALI_UK_GET_PP_NUMBER_OF_CORES, _mali_uk_get_pp_number_of_cores_s) -+#define MALI_IOC_PP_CORE_VERSION_GET _IOR (MALI_IOC_PP_BASE, _MALI_UK_GET_PP_CORE_VERSION, _mali_uk_get_pp_core_version_s) -+#define MALI_IOC_PP_DISABLE_WB _IOW (MALI_IOC_PP_BASE, _MALI_UK_PP_DISABLE_WB, _mali_uk_pp_disable_wb_s) ++_mali_osk_errcode_t mali_scheduler_initialize(void); ++void mali_scheduler_terminate(void); + -+#define MALI_IOC_GP2_START_JOB _IOWR(MALI_IOC_GP_BASE, _MALI_UK_GP_START_JOB, _mali_uk_gp_start_job_s) -+#define MALI_IOC_GP2_NUMBER_OF_CORES_GET _IOR (MALI_IOC_GP_BASE, _MALI_UK_GET_GP_NUMBER_OF_CORES, _mali_uk_get_gp_number_of_cores_s) -+#define MALI_IOC_GP2_CORE_VERSION_GET _IOR (MALI_IOC_GP_BASE, _MALI_UK_GET_GP_CORE_VERSION, _mali_uk_get_gp_core_version_s) -+#define MALI_IOC_GP2_SUSPEND_RESPONSE _IOW (MALI_IOC_GP_BASE, _MALI_UK_GP_SUSPEND_RESPONSE,_mali_uk_gp_suspend_response_s) ++MALI_STATIC_INLINE void mali_scheduler_lock(void) ++{ ++ _mali_osk_spinlock_irq_lock(mali_scheduler_lock_obj); ++ MALI_DEBUG_PRINT(5, ("Mali scheduler: scheduler lock taken.\n")); ++} + -+#define MALI_IOC_PROFILING_ADD_EVENT _IOWR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_ADD_EVENT, _mali_uk_profiling_add_event_s) -+#define MALI_IOC_PROFILING_REPORT_SW_COUNTERS _IOW (MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_REPORT_SW_COUNTERS, _mali_uk_sw_counters_report_s) -+#define MALI_IOC_PROFILING_MEMORY_USAGE_GET _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_MEMORY_USAGE_GET, _mali_uk_profiling_memory_usage_get_s) -+#define MALI_IOC_PROFILING_STREAM_FD_GET _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_STREAM_FD_GET, _mali_uk_profiling_stream_fd_get_s) -+#define MALI_IOC_PROILING_CONTROL_SET _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_CONTROL_SET, _mali_uk_profiling_control_set_s) ++MALI_STATIC_INLINE void mali_scheduler_unlock(void) ++{ ++ MALI_DEBUG_PRINT(5, ("Mali scheduler: Releasing scheduler lock.\n")); ++ _mali_osk_spinlock_irq_unlock(mali_scheduler_lock_obj); ++} + -+#define MALI_IOC_VSYNC_EVENT_REPORT _IOW (MALI_IOC_VSYNC_BASE, _MALI_UK_VSYNC_EVENT_REPORT, _mali_uk_vsync_event_report_s) ++MALI_STATIC_INLINE u32 mali_scheduler_job_gp_count(void) ++{ ++ return job_queue_gp.depth; ++} ++MALI_STATIC_INLINE u32 mali_scheduler_job_gp_big_job_count(void) ++{ ++ return job_queue_gp.big_job_num; ++} + -+/* rk_ext : 对 r5p0 集æˆä¹‹åŽ, mali_so ä¸å†ä½¿ç”¨ä¸‹é¢çš„ ioctl, 而使用 MALI_IOC_GET_RK_KO_VERSION. */ -+#if 0 -+#define MALI_IOC_GET_MALI_VERSION_IN_RK30 _IOWR(MALI_IOC_CORE_BASE,_MALI_UK_GET_MALI_VERSION_IN_RK30,_mali_uk_get_mali_version_in_rk30_s *) -+#endif ++u32 mali_scheduler_job_physical_head_count(mali_bool gpu_mode_is_secure); + -+#ifdef __cplusplus -+} -+#endif ++mali_bool mali_scheduler_job_next_is_virtual(void); ++struct mali_pp_job *mali_scheduler_job_pp_next(void); + -+#endif /* __MALI_UTGARD_IOCTL_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_profiling_events.h b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_profiling_events.h -new file mode 100644 -index 000000000..17d31de93 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_profiling_events.h -@@ -0,0 +1,190 @@ -+/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++struct mali_gp_job *mali_scheduler_job_gp_get(void); ++struct mali_pp_job *mali_scheduler_job_pp_physical_peek(void); ++struct mali_pp_job *mali_scheduler_job_pp_virtual_peek(void); ++struct mali_pp_job *mali_scheduler_job_pp_physical_get(u32 *sub_job); ++struct mali_pp_job *mali_scheduler_job_pp_virtual_get(void); + -+#ifndef _MALI_UTGARD_PROFILING_EVENTS_H_ -+#define _MALI_UTGARD_PROFILING_EVENTS_H_ ++MALI_STATIC_INLINE u32 mali_scheduler_get_new_id(void) ++{ ++ return _mali_osk_atomic_inc_return(&mali_job_id_autonumber); ++} + -+/* -+ * The event ID is a 32 bit value consisting of different fields -+ * reserved, 4 bits, for future use -+ * event type, 4 bits, cinstr_profiling_event_type_t -+ * event channel, 8 bits, the source of the event. -+ * event data, 16 bit field, data depending on event type -+ */ ++MALI_STATIC_INLINE u32 mali_scheduler_get_new_cache_order(void) ++{ ++ return _mali_osk_atomic_inc_return(&mali_job_cache_order_autonumber); ++} + +/** -+ * Specifies what kind of event this is ++ * @brief Used by the Timeline system to queue a GP job. ++ * ++ * @note @ref mali_executor_schedule_from_mask() should be called if this ++ * function returns non-zero. ++ * ++ * @param job The GP job that is being activated. ++ * ++ * @return A scheduling bitmask that can be used to decide if scheduling is ++ * necessary after this call. + */ -+typedef enum { -+ MALI_PROFILING_EVENT_TYPE_SINGLE = 0 << 24, -+ MALI_PROFILING_EVENT_TYPE_START = 1 << 24, -+ MALI_PROFILING_EVENT_TYPE_STOP = 2 << 24, -+ MALI_PROFILING_EVENT_TYPE_SUSPEND = 3 << 24, -+ MALI_PROFILING_EVENT_TYPE_RESUME = 4 << 24, -+} cinstr_profiling_event_type_t; -+ ++mali_scheduler_mask mali_scheduler_activate_gp_job(struct mali_gp_job *job); + +/** -+ * Secifies the channel/source of the event ++ * @brief Used by the Timeline system to queue a PP job. ++ * ++ * @note @ref mali_executor_schedule_from_mask() should be called if this ++ * function returns non-zero. ++ * ++ * @param job The PP job that is being activated. ++ * ++ * @return A scheduling bitmask that can be used to decide if scheduling is ++ * necessary after this call. + */ -+typedef enum { -+ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE = 0 << 16, -+ MALI_PROFILING_EVENT_CHANNEL_GP0 = 1 << 16, -+ MALI_PROFILING_EVENT_CHANNEL_PP0 = 5 << 16, -+ MALI_PROFILING_EVENT_CHANNEL_PP1 = 6 << 16, -+ MALI_PROFILING_EVENT_CHANNEL_PP2 = 7 << 16, -+ MALI_PROFILING_EVENT_CHANNEL_PP3 = 8 << 16, -+ MALI_PROFILING_EVENT_CHANNEL_PP4 = 9 << 16, -+ MALI_PROFILING_EVENT_CHANNEL_PP5 = 10 << 16, -+ MALI_PROFILING_EVENT_CHANNEL_PP6 = 11 << 16, -+ MALI_PROFILING_EVENT_CHANNEL_PP7 = 12 << 16, -+ MALI_PROFILING_EVENT_CHANNEL_GPU = 21 << 16, -+} cinstr_profiling_event_channel_t; ++mali_scheduler_mask mali_scheduler_activate_pp_job(struct mali_pp_job *job); + ++void mali_scheduler_complete_gp_job(struct mali_gp_job *job, ++ mali_bool success, ++ mali_bool user_notification, ++ mali_bool dequeued); + -+#define MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(num) (((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) + (num)) << 16) -+#define MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(num) (((MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) + (num)) << 16) ++void mali_scheduler_complete_pp_job(struct mali_pp_job *job, ++ u32 num_cores_in_virtual, ++ mali_bool user_notification, ++ mali_bool dequeued); + -+/** -+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from software channel -+ */ -+typedef enum { -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_NONE = 0, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_NEW_FRAME = 1, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_FLUSH = 2, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_SWAP_BUFFERS = 3, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_FB_EVENT = 4, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_GP_ENQUEUE = 5, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_PP_ENQUEUE = 6, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_READBACK = 7, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_WRITEBACK = 8, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_ENTER_API_FUNC = 10, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_LEAVE_API_FUNC = 11, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_DISCARD_ATTACHMENTS = 13, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_TRY_LOCK = 53, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_LOCK = 54, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_UNLOCK = 55, -+ MALI_PROFILING_EVENT_REASON_SINGLE_LOCK_CONTENDED = 56, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_MALI_FENCE_DUP = 57, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_SET_PP_JOB_FENCE = 58, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_WAIT_SYNC = 59, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_CREATE_FENCE_SYNC = 60, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_CREATE_NATIVE_FENCE_SYNC = 61, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_FENCE_FLUSH = 62, -+ MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_FLUSH_SERVER_WAITS = 63, -+} cinstr_profiling_event_reason_single_sw_t; ++void mali_scheduler_abort_session(struct mali_session_data *session); + -+/** -+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_START/STOP is used from software channel -+ * to inform whether the core is physical or virtual -+ */ -+typedef enum { -+ MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL = 0, -+ MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL = 1, -+} cinstr_profiling_event_reason_start_stop_hw_t; ++void mali_scheduler_return_pp_job_to_user(struct mali_pp_job *job, ++ u32 num_cores_in_virtual); + -+/** -+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_START/STOP is used from software channel -+ */ -+typedef enum { -+ /*MALI_PROFILING_EVENT_REASON_START_STOP_SW_NONE = 0,*/ -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_MALI = 1, -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_CALLBACK_THREAD = 2, -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_WORKER_THREAD = 3, -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF = 4, -+ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF = 5, -+} cinstr_profiling_event_reason_start_stop_sw_t; ++#if MALI_STATE_TRACKING ++u32 mali_scheduler_dump_state(char *buf, u32 size); ++#endif + -+/** -+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SUSPEND/RESUME is used from software channel -+ */ -+typedef enum { -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_NONE = 0, /* used */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_PIPELINE_FULL = 1, /* NOT used */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC = 26, /* used in some build configurations */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_IFRAME_WAIT = 27, /* USED */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_IFRAME_SYNC = 28, /* USED */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VG_WAIT_FILTER_CLEANUP = 29, /* used */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VG_WAIT_TEXTURE = 30, /* used */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GLES_WAIT_MIPLEVEL = 31, /* used */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GLES_WAIT_READPIXELS = 32, /* used */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_WAIT_SWAP_IMMEDIATE = 33, /* NOT used */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_ICS_QUEUE_BUFFER = 34, /* USED */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_ICS_DEQUEUE_BUFFER = 35, /* USED */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_UMP_LOCK = 36, /* Not currently used */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_X11_GLOBAL_LOCK = 37, /* Not currently used */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_X11_SWAP = 38, /* Not currently used */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_MALI_EGL_IMAGE_SYNC_WAIT = 39, /* USED */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GP_JOB_HANDLING = 40, /* USED */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_PP_JOB_HANDLING = 41, /* USED */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_MALI_FENCE_MERGE = 42, /* USED */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_MALI_FENCE_DUP = 43, -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_FLUSH_SERVER_WAITS = 44, -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_WAIT_SYNC = 45, /* USED */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_JOBS_WAIT = 46, /* USED */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_NOFRAMES_WAIT = 47, /* USED */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_NOJOBS_WAIT = 48, /* USED */ -+ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_SUBMIT_LIMITER_WAIT = 49, /* USED */ -+} cinstr_profiling_event_reason_suspend_resume_sw_t; ++void mali_scheduler_gp_pp_job_queue_print(void); + -+/** -+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from a HW channel (GPx+PPx) ++#endif /* __MALI_SCHEDULER_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_scheduler_types.h b/drivers/gpu/arm/mali400/mali/common/mali_scheduler_types.h +new file mode 100644 +index 000000000..ba1d71d01 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_scheduler_types.h +@@ -0,0 +1,29 @@ ++/* ++ * Copyright (C) 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ -+typedef enum { -+ MALI_PROFILING_EVENT_REASON_SINGLE_HW_NONE = 0, -+ MALI_PROFILING_EVENT_REASON_SINGLE_HW_INTERRUPT = 1, -+ MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH = 2, -+} cinstr_profiling_event_reason_single_hw_t; + -+/** -+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from the GPU channel -+ */ -+typedef enum { -+ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_NONE = 0, -+ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE = 1, -+ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS = 2, -+ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L21_COUNTERS = 3, -+ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L22_COUNTERS = 4, -+} cinstr_profiling_event_reason_single_gpu_t; ++#ifndef __MALI_SCHEDULER_TYPES_H__ ++#define __MALI_SCHEDULER_TYPES_H__ ++ ++#include "mali_osk.h" ++ ++#define MALI_SCHEDULER_JOB_ID_SPAN 65535 + +/** -+ * These values are applicable for the 3rd data parameter when -+ * the type MALI_PROFILING_EVENT_TYPE_START is used from the software channel -+ * with the MALI_PROFILING_EVENT_REASON_START_STOP_BOTTOM_HALF reason. ++ * Bitmask used for defered scheduling of subsystems. + */ -+typedef enum { -+ MALI_PROFILING_EVENT_DATA_CORE_GP0 = 1, -+ MALI_PROFILING_EVENT_DATA_CORE_PP0 = 5, -+ MALI_PROFILING_EVENT_DATA_CORE_PP1 = 6, -+ MALI_PROFILING_EVENT_DATA_CORE_PP2 = 7, -+ MALI_PROFILING_EVENT_DATA_CORE_PP3 = 8, -+ MALI_PROFILING_EVENT_DATA_CORE_PP4 = 9, -+ MALI_PROFILING_EVENT_DATA_CORE_PP5 = 10, -+ MALI_PROFILING_EVENT_DATA_CORE_PP6 = 11, -+ MALI_PROFILING_EVENT_DATA_CORE_PP7 = 12, -+ MALI_PROFILING_EVENT_DATA_CORE_GP0_MMU = 22, /* GP0 + 21 */ -+ MALI_PROFILING_EVENT_DATA_CORE_PP0_MMU = 26, /* PP0 + 21 */ -+ MALI_PROFILING_EVENT_DATA_CORE_PP1_MMU = 27, /* PP1 + 21 */ -+ MALI_PROFILING_EVENT_DATA_CORE_PP2_MMU = 28, /* PP2 + 21 */ -+ MALI_PROFILING_EVENT_DATA_CORE_PP3_MMU = 29, /* PP3 + 21 */ -+ MALI_PROFILING_EVENT_DATA_CORE_PP4_MMU = 30, /* PP4 + 21 */ -+ MALI_PROFILING_EVENT_DATA_CORE_PP5_MMU = 31, /* PP5 + 21 */ -+ MALI_PROFILING_EVENT_DATA_CORE_PP6_MMU = 32, /* PP6 + 21 */ -+ MALI_PROFILING_EVENT_DATA_CORE_PP7_MMU = 33, /* PP7 + 21 */ -+ -+} cinstr_profiling_event_data_core_t; ++typedef u32 mali_scheduler_mask; + -+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(num) (MALI_PROFILING_EVENT_DATA_CORE_GP0 + (num)) -+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(num) (MALI_PROFILING_EVENT_DATA_CORE_GP0_MMU + (num)) -+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(num) (MALI_PROFILING_EVENT_DATA_CORE_PP0 + (num)) -+#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(num) (MALI_PROFILING_EVENT_DATA_CORE_PP0_MMU + (num)) ++#define MALI_SCHEDULER_MASK_GP (1<<0) ++#define MALI_SCHEDULER_MASK_PP (1<<1) + ++#define MALI_SCHEDULER_MASK_EMPTY 0 ++#define MALI_SCHEDULER_MASK_ALL (MALI_SCHEDULER_MASK_GP | MALI_SCHEDULER_MASK_PP) + -+#endif /*_MALI_UTGARD_PROFILING_EVENTS_H_*/ -diff --git a/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_profiling_gator_api.h b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_profiling_gator_api.h ++#endif /* __MALI_SCHEDULER_TYPES_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_session.c b/drivers/gpu/arm/mali400/mali/common/mali_session.c new file mode 100644 -index 000000000..c1927d145 +index 000000000..7504fb108 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_profiling_gator_api.h -@@ -0,0 +1,305 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_session.c +@@ -0,0 +1,155 @@ +/* -+ * Copyright (C) 2013, 2015-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -304782,309 +308131,301 @@ index 000000000..c1927d145 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_UTGARD_PROFILING_GATOR_API_H__ -+#define __MALI_UTGARD_PROFILING_GATOR_API_H__ -+ -+#ifdef __cplusplus -+extern "C" { ++#include "mali_osk.h" ++#include "mali_osk_list.h" ++#include "mali_session.h" ++#include "mali_ukk.h" ++#ifdef MALI_MEM_SWAP_TRACKING ++#include "mali_memory_swap_alloc.h" +#endif + -+#define MALI_PROFILING_API_VERSION 4 ++_MALI_OSK_LIST_HEAD(mali_sessions); ++static u32 mali_session_count = 0; + -+#define MAX_NUM_L2_CACHE_CORES 3 -+#define MAX_NUM_FP_CORES 8 -+#define MAX_NUM_VP_CORES 1 ++_mali_osk_spinlock_irq_t *mali_sessions_lock = NULL; ++wait_queue_head_t pending_queue; + -+#define _MALI_SPCIAL_COUNTER_DESCRIPTIONS \ -+ { \ -+ "Filmstrip_cnt0", \ -+ "Frequency", \ -+ "Voltage", \ -+ "vertex", \ -+ "fragment", \ -+ "Total_alloc_pages", \ -+ }; ++_mali_osk_errcode_t mali_session_initialize(void) ++{ ++ _MALI_OSK_INIT_LIST_HEAD(&mali_sessions); ++ /* init wait queue for big varying job */ ++ init_waitqueue_head(&pending_queue); + -+#define _MALI_MEM_COUTNER_DESCRIPTIONS \ -+ { \ -+ "untyped_memory", \ -+ "vertex_index_buffer", \ -+ "texture_buffer", \ -+ "varying_buffer", \ -+ "render_target", \ -+ "pbuffer_buffer", \ -+ "plbu_heap", \ -+ "pointer_array_buffer", \ -+ "slave_tilelist", \ -+ "untyped_gp_cmdlist", \ -+ "polygon_cmdlist", \ -+ "texture_descriptor", \ -+ "render_state_word", \ -+ "shader", \ -+ "stream_buffer", \ -+ "fragment_stack", \ -+ "uniform", \ -+ "untyped_frame_pool", \ -+ "untyped_surface", \ -+ }; ++ mali_sessions_lock = _mali_osk_spinlock_irq_init( ++ _MALI_OSK_LOCKFLAG_ORDERED, ++ _MALI_OSK_LOCK_ORDER_SESSIONS); ++ if (NULL == mali_sessions_lock) { ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+/** The list of events supported by the Mali DDK. */ -+typedef enum { -+ /* Vertex processor activity */ -+ ACTIVITY_VP_0 = 0, ++ return _MALI_OSK_ERR_OK; ++} + -+ /* Fragment processor activity */ -+ ACTIVITY_FP_0, -+ ACTIVITY_FP_1, -+ ACTIVITY_FP_2, -+ ACTIVITY_FP_3, -+ ACTIVITY_FP_4, -+ ACTIVITY_FP_5, -+ ACTIVITY_FP_6, -+ ACTIVITY_FP_7, ++void mali_session_terminate(void) ++{ ++ if (NULL != mali_sessions_lock) { ++ _mali_osk_spinlock_irq_term(mali_sessions_lock); ++ mali_sessions_lock = NULL; ++ } ++} + -+ /* L2 cache counters */ -+ COUNTER_L2_0_C0, -+ COUNTER_L2_0_C1, -+ COUNTER_L2_1_C0, -+ COUNTER_L2_1_C1, -+ COUNTER_L2_2_C0, -+ COUNTER_L2_2_C1, ++void mali_session_add(struct mali_session_data *session) ++{ ++ mali_session_lock(); ++ _mali_osk_list_add(&session->link, &mali_sessions); ++ mali_session_count++; ++ mali_session_unlock(); ++} + -+ /* Vertex processor counters */ -+ COUNTER_VP_0_C0, -+ COUNTER_VP_0_C1, ++void mali_session_remove(struct mali_session_data *session) ++{ ++ mali_session_lock(); ++ _mali_osk_list_delinit(&session->link); ++ mali_session_count--; ++ mali_session_unlock(); ++} + -+ /* Fragment processor counters */ -+ COUNTER_FP_0_C0, -+ COUNTER_FP_0_C1, -+ COUNTER_FP_1_C0, -+ COUNTER_FP_1_C1, -+ COUNTER_FP_2_C0, -+ COUNTER_FP_2_C1, -+ COUNTER_FP_3_C0, -+ COUNTER_FP_3_C1, -+ COUNTER_FP_4_C0, -+ COUNTER_FP_4_C1, -+ COUNTER_FP_5_C0, -+ COUNTER_FP_5_C1, -+ COUNTER_FP_6_C0, -+ COUNTER_FP_6_C1, -+ COUNTER_FP_7_C0, -+ COUNTER_FP_7_C1, ++u32 mali_session_get_count(void) ++{ ++ return mali_session_count; ++} + -+ /* -+ * If more hardware counters are added, the _mali_osk_hw_counter_table -+ * below should also be updated. -+ */ ++mali_bool mali_session_pp_job_is_empty(void *data) ++{ ++ struct mali_session_data *session = (struct mali_session_data *)data; ++ MALI_DEBUG_ASSERT_POINTER(session); + -+ /* EGL software counters */ -+ COUNTER_EGL_BLIT_TIME, ++ if ( 0 == _mali_osk_atomic_read(&session->number_of_pp_jobs)) { ++ return MALI_TRUE; ++ } ++ return MALI_FALSE; ++} + -+ /* GLES software counters */ -+ COUNTER_GLES_DRAW_ELEMENTS_CALLS, -+ COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES, -+ COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED, -+ COUNTER_GLES_DRAW_ARRAYS_CALLS, -+ COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED, -+ COUNTER_GLES_DRAW_POINTS, -+ COUNTER_GLES_DRAW_LINES, -+ COUNTER_GLES_DRAW_LINE_LOOP, -+ COUNTER_GLES_DRAW_LINE_STRIP, -+ COUNTER_GLES_DRAW_TRIANGLES, -+ COUNTER_GLES_DRAW_TRIANGLE_STRIP, -+ COUNTER_GLES_DRAW_TRIANGLE_FAN, -+ COUNTER_GLES_NON_VBO_DATA_COPY_TIME, -+ COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI, -+ COUNTER_GLES_UPLOAD_TEXTURE_TIME, -+ COUNTER_GLES_UPLOAD_VBO_TIME, -+ COUNTER_GLES_NUM_FLUSHES, -+ COUNTER_GLES_NUM_VSHADERS_GENERATED, -+ COUNTER_GLES_NUM_FSHADERS_GENERATED, -+ COUNTER_GLES_VSHADER_GEN_TIME, -+ COUNTER_GLES_FSHADER_GEN_TIME, -+ COUNTER_GLES_INPUT_TRIANGLES, -+ COUNTER_GLES_VXCACHE_HIT, -+ COUNTER_GLES_VXCACHE_MISS, -+ COUNTER_GLES_VXCACHE_COLLISION, -+ COUNTER_GLES_CULLED_TRIANGLES, -+ COUNTER_GLES_CULLED_LINES, -+ COUNTER_GLES_BACKFACE_TRIANGLES, -+ COUNTER_GLES_GBCLIP_TRIANGLES, -+ COUNTER_GLES_GBCLIP_LINES, -+ COUNTER_GLES_TRIANGLES_DRAWN, -+ COUNTER_GLES_DRAWCALL_TIME, -+ COUNTER_GLES_TRIANGLES_COUNT, -+ COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT, -+ COUNTER_GLES_STRIP_TRIANGLES_COUNT, -+ COUNTER_GLES_FAN_TRIANGLES_COUNT, -+ COUNTER_GLES_LINES_COUNT, -+ COUNTER_GLES_INDEPENDENT_LINES_COUNT, -+ COUNTER_GLES_STRIP_LINES_COUNT, -+ COUNTER_GLES_LOOP_LINES_COUNT, ++wait_queue_head_t *mali_session_get_wait_queue(void) ++{ ++ return &pending_queue; ++} + -+ /* Special counter */ ++/* ++ * Get the max completed window jobs from all active session, ++ * which will be used in window render frame per sec calculate ++ */ ++#if defined(CONFIG_MALI_DVFS) ++u32 mali_session_max_window_num(void) ++{ ++ struct mali_session_data *session, *tmp; ++ u32 max_window_num = 0; ++ u32 tmp_number = 0; + -+ /* Framebuffer capture pseudo-counter */ -+ COUNTER_FILMSTRIP, -+ COUNTER_FREQUENCY, -+ COUNTER_VOLTAGE, -+ COUNTER_VP_ACTIVITY, -+ COUNTER_FP_ACTIVITY, -+ COUNTER_TOTAL_ALLOC_PAGES, ++ mali_session_lock(); + -+ /* Memory usage counter */ -+ COUNTER_MEM_UNTYPED, -+ COUNTER_MEM_VB_IB, -+ COUNTER_MEM_TEXTURE, -+ COUNTER_MEM_VARYING, -+ COUNTER_MEM_RT, -+ COUNTER_MEM_PBUFFER, -+ /* memory usages for gp command */ -+ COUNTER_MEM_PLBU_HEAP, -+ COUNTER_MEM_POINTER_ARRAY, -+ COUNTER_MEM_SLAVE_TILELIST, -+ COUNTER_MEM_UNTYPE_GP_CMDLIST, -+ /* memory usages for polygon list command */ -+ COUNTER_MEM_POLYGON_CMDLIST, -+ /* memory usages for pp command */ -+ COUNTER_MEM_TD, -+ COUNTER_MEM_RSW, -+ /* other memory usages */ -+ COUNTER_MEM_SHADER, -+ COUNTER_MEM_STREAMS, -+ COUNTER_MEM_FRAGMENT_STACK, -+ COUNTER_MEM_UNIFORM, -+ /* Special mem usage, which is used for mem pool allocation */ -+ COUNTER_MEM_UNTYPE_MEM_POOL, -+ COUNTER_MEM_UNTYPE_SURFACE, ++ MALI_SESSION_FOREACH(session, tmp, link) { ++ tmp_number = _mali_osk_atomic_xchg( ++ &session->number_of_window_jobs, 0); ++ if (max_window_num < tmp_number) { ++ max_window_num = tmp_number; ++ } ++ } + -+ NUMBER_OF_EVENTS -+} _mali_osk_counter_id; ++ mali_session_unlock(); + -+#define FIRST_ACTIVITY_EVENT ACTIVITY_VP_0 -+#define LAST_ACTIVITY_EVENT ACTIVITY_FP_7 ++ return max_window_num; ++} ++#endif + -+#define FIRST_HW_COUNTER COUNTER_L2_0_C0 -+#define LAST_HW_COUNTER COUNTER_FP_7_C1 ++void mali_session_memory_tracking(_mali_osk_print_ctx *print_ctx) ++{ ++ struct mali_session_data *session, *tmp; ++ u32 mali_mem_usage; ++ u32 total_mali_mem_size; ++#ifdef MALI_MEM_SWAP_TRACKING ++ u32 swap_pool_size; ++ u32 swap_unlock_size; ++#endif + -+#define FIRST_SW_COUNTER COUNTER_EGL_BLIT_TIME -+#define LAST_SW_COUNTER COUNTER_GLES_LOOP_LINES_COUNT ++ MALI_DEBUG_ASSERT_POINTER(print_ctx); ++ mali_session_lock(); ++ MALI_SESSION_FOREACH(session, tmp, link) { ++#ifdef MALI_MEM_SWAP_TRACKING ++ _mali_osk_ctxprintf(print_ctx, " %-25s %-10u %-10u %-15u %-15u %-10u %-10u %-10u\n", ++ session->comm, session->pid, ++ (atomic_read(&session->mali_mem_allocated_pages)) * _MALI_OSK_MALI_PAGE_SIZE, ++ (unsigned int)session->max_mali_mem_allocated_size, ++ (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_EXTERNAL])) * _MALI_OSK_MALI_PAGE_SIZE), ++ (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_UMP])) * _MALI_OSK_MALI_PAGE_SIZE), ++ (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_DMA_BUF])) * _MALI_OSK_MALI_PAGE_SIZE), ++ (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_SWAP])) * _MALI_OSK_MALI_PAGE_SIZE) ++ ); ++#else ++ _mali_osk_ctxprintf(print_ctx, " %-25s %-10u %-10u %-15u %-15u %-10u %-10u \n", ++ session->comm, session->pid, ++ (unsigned int)((atomic_read(&session->mali_mem_allocated_pages)) * _MALI_OSK_MALI_PAGE_SIZE), ++ (unsigned int)session->max_mali_mem_allocated_size, ++ (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_EXTERNAL])) * _MALI_OSK_MALI_PAGE_SIZE), ++ (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_UMP])) * _MALI_OSK_MALI_PAGE_SIZE), ++ (unsigned int)((atomic_read(&session->mali_mem_array[MALI_MEM_DMA_BUF])) * _MALI_OSK_MALI_PAGE_SIZE) ++ ); ++#endif ++ } ++ mali_session_unlock(); ++ mali_mem_usage = _mali_ukk_report_memory_usage(); ++ total_mali_mem_size = _mali_ukk_report_total_memory_size(); ++ _mali_osk_ctxprintf(print_ctx, "Mali mem usage: %u\nMali mem limit: %u\n", mali_mem_usage, total_mali_mem_size); ++#ifdef MALI_MEM_SWAP_TRACKING ++ mali_mem_swap_tracking(&swap_pool_size, &swap_unlock_size); ++ _mali_osk_ctxprintf(print_ctx, "Mali swap mem pool : %u\nMali swap mem unlock: %u\n", swap_pool_size, swap_unlock_size); ++#endif ++} +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_session.h b/drivers/gpu/arm/mali400/mali/common/mali_session.h +new file mode 100644 +index 000000000..da8b9927e +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_session.h +@@ -0,0 +1,136 @@ ++/* ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+#define FIRST_SPECIAL_COUNTER COUNTER_FILMSTRIP -+#define LAST_SPECIAL_COUNTER COUNTER_TOTAL_ALLOC_PAGES ++#ifndef __MALI_SESSION_H__ ++#define __MALI_SESSION_H__ + -+#define FIRST_MEM_COUNTER COUNTER_MEM_UNTYPED -+#define LAST_MEM_COUNTER COUNTER_MEM_UNTYPE_SURFACE ++#include "mali_mmu_page_directory.h" ++#include "mali_osk.h" ++#include "mali_osk_list.h" ++#include "mali_memory_types.h" ++#include "mali_memory_manager.h" + -+#define MALI_PROFILING_MEM_COUNTERS_NUM (LAST_MEM_COUNTER - FIRST_MEM_COUNTER + 1) -+#define MALI_PROFILING_SPECIAL_COUNTERS_NUM (LAST_SPECIAL_COUNTER - FIRST_SPECIAL_COUNTER + 1) -+#define MALI_PROFILING_SW_COUNTERS_NUM (LAST_SW_COUNTER - FIRST_SW_COUNTER + 1) ++struct mali_timeline_system; ++struct mali_soft_system; + -+/** -+ * Define the stream header type for porfiling stream. -+ */ -+#define STREAM_HEADER_FRAMEBUFFER 0x05 /* The stream packet header type for framebuffer dumping. */ -+#define STREAM_HEADER_COUNTER_VALUE 0x09 /* The stream packet header type for hw/sw/memory counter sampling. */ -+#define STREAM_HEADER_CORE_ACTIVITY 0x0a /* The stream packet header type for activity counter sampling. */ -+#define STREAM_HEADER_SIZE 5 ++/* Number of frame builder job lists per session. */ ++#define MALI_PP_JOB_FB_LOOKUP_LIST_SIZE 16 ++#define MALI_PP_JOB_FB_LOOKUP_LIST_MASK (MALI_PP_JOB_FB_LOOKUP_LIST_SIZE - 1) ++/*Max pending big job allowed in kernel*/ ++#define MALI_MAX_PENDING_BIG_JOB (2) + -+/** -+ * Define the packet header type of profiling control packet. -+ */ -+#define PACKET_HEADER_ERROR 0x80 /* The response packet header type if error. */ -+#define PACKET_HEADER_ACK 0x81 /* The response packet header type if OK. */ -+#define PACKET_HEADER_COUNTERS_REQUEST 0x82 /* The control packet header type to request counter information from ddk. */ -+#define PACKET_HEADER_COUNTERS_ACK 0x83 /* The response packet header type to send out counter information. */ -+#define PACKET_HEADER_COUNTERS_ENABLE 0x84 /* The control packet header type to enable counters. */ -+#define PACKET_HEADER_START_CAPTURE_VALUE 0x85 /* The control packet header type to start capture values. */ ++struct mali_session_data { ++ _mali_osk_notification_queue_t *ioctl_queue; + -+#define PACKET_HEADER_SIZE 5 ++ _mali_osk_wait_queue_t *wait_queue; /**The wait queue to wait for the number of pp job become 0.*/ + -+/** -+ * Structure to pass performance counter data of a Mali core -+ */ -+typedef struct _mali_profiling_core_counters { -+ u32 source0; -+ u32 value0; -+ u32 source1; -+ u32 value1; -+} _mali_profiling_core_counters; ++ _mali_osk_mutex_t *memory_lock; /**< Lock protecting the vm manipulation */ ++ _mali_osk_mutex_t *cow_lock; /** < Lock protecting the cow memory free manipulation */ ++#if 0 ++ _mali_osk_list_t memory_head; /**< Track all the memory allocated in this session, for freeing on abnormal termination */ ++#endif ++ struct mali_page_directory *page_directory; /**< MMU page directory for this session */ + -+/** -+ * Structure to pass performance counter data of Mali L2 cache cores -+ */ -+typedef struct _mali_profiling_l2_counter_values { -+ struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES]; -+} _mali_profiling_l2_counter_values; ++ _MALI_OSK_LIST_HEAD(link); /**< Link for list of all sessions */ ++ _MALI_OSK_LIST_HEAD(pp_job_list); /**< List of all PP jobs on this session */ + -+/** -+ * Structure to pass data defining Mali instance in use: -+ * -+ * mali_product_id - Mali product id -+ * mali_version_major - Mali version major number -+ * mali_version_minor - Mali version minor number -+ * num_of_l2_cores - number of L2 cache cores -+ * num_of_fp_cores - number of fragment processor cores -+ * num_of_vp_cores - number of vertex processor cores -+ */ -+typedef struct _mali_profiling_mali_version { -+ u32 mali_product_id; -+ u32 mali_version_major; -+ u32 mali_version_minor; -+ u32 num_of_l2_cores; -+ u32 num_of_fp_cores; -+ u32 num_of_vp_cores; -+} _mali_profiling_mali_version; ++#if defined(CONFIG_MALI_DVFS) ++ _mali_osk_atomic_t number_of_window_jobs; /**< Record the window jobs completed on this session in a period */ ++#endif ++ _mali_osk_atomic_t number_of_pp_jobs; /** < Record the pp jobs on this session */ + -+/** -+ * Structure to define the mali profiling counter struct. -+ */ -+typedef struct mali_profiling_counter { -+ char counter_name[40]; -+ u32 counter_id; -+ u32 counter_event; -+ u32 prev_counter_value; -+ u32 current_counter_value; -+ u32 key; -+ int enabled; -+} mali_profiling_counter; ++ _mali_osk_list_t pp_job_fb_lookup_list[MALI_PP_JOB_FB_LOOKUP_LIST_SIZE]; /**< List of PP job lists per frame builder id. Used to link jobs from same frame builder. */ ++ struct mali_soft_job_system *soft_job_system; /**< Soft job system for this session. */ ++ struct mali_timeline_system *timeline_system; /**< Timeline system for this session. */ + -+/* -+ * List of possible actions to be controlled by Streamline. -+ * The following numbers are used by gator to control the frame buffer dumping and s/w counter reporting. -+ * We cannot use the enums in mali_uk_types.h because they are unknown inside gator. -+ */ -+#define FBDUMP_CONTROL_ENABLE (1) -+#define FBDUMP_CONTROL_RATE (2) -+#define SW_COUNTER_ENABLE (3) -+#define FBDUMP_CONTROL_RESIZE_FACTOR (4) -+#define MEM_COUNTER_ENABLE (5) -+#define ANNOTATE_PROFILING_ENABLE (6) ++ mali_bool is_aborting; /**< MALI_TRUE if the session is aborting, MALI_FALSE if not. */ ++ mali_bool use_high_priority_job_queue; /**< If MALI_TRUE, jobs added from this session will use the high priority job queues. */ ++ u32 pid; ++ char *comm; ++ atomic_t mali_mem_array[MALI_MEM_TYPE_MAX]; /**< The array to record mem types' usage for this session. */ ++ atomic_t mali_mem_allocated_pages; /** The current allocated mali memory pages, which include mali os memory and mali dedicated memory.*/ ++ size_t max_mali_mem_allocated_size; /**< The past max mali memory allocated size, which include mali os memory and mali dedicated memory. */ ++ /* Added for new memroy system */ ++ struct mali_allocation_manager allocation_mgr; + -+void _mali_profiling_control(u32 action, u32 value); ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) ++ u32 fence_context; /** < The execution dma fence context this fence is run on. */ ++ _mali_osk_atomic_t fence_seqno; /** < Alinear increasing sequence number for this dma fence context. */ ++#endif ++}; + -+u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values); ++_mali_osk_errcode_t mali_session_initialize(void); ++void mali_session_terminate(void); + -+int _mali_profiling_set_event(u32 counter_id, s32 event_id); ++/* List of all sessions. Actual list head in mali_kernel_core.c */ ++extern _mali_osk_list_t mali_sessions; ++/* Lock to protect modification and access to the mali_sessions list */ ++extern _mali_osk_spinlock_irq_t *mali_sessions_lock; + -+u32 _mali_profiling_get_api_version(void); ++MALI_STATIC_INLINE void mali_session_lock(void) ++{ ++ _mali_osk_spinlock_irq_lock(mali_sessions_lock); ++} + -+void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values); ++MALI_STATIC_INLINE void mali_session_unlock(void) ++{ ++ _mali_osk_spinlock_irq_unlock(mali_sessions_lock); ++} + -+#ifdef __cplusplus ++void mali_session_add(struct mali_session_data *session); ++void mali_session_remove(struct mali_session_data *session); ++u32 mali_session_get_count(void); ++mali_bool mali_session_pp_job_is_empty(void *data); ++wait_queue_head_t *mali_session_get_wait_queue(void); ++ ++#define MALI_SESSION_FOREACH(session, tmp, link) \ ++ _MALI_OSK_LIST_FOREACHENTRY(session, tmp, &mali_sessions, struct mali_session_data, link) ++ ++MALI_STATIC_INLINE struct mali_page_directory *mali_session_get_page_directory(struct mali_session_data *session) ++{ ++ return session->page_directory; ++} ++ ++MALI_STATIC_INLINE void mali_session_memory_lock(struct mali_session_data *session) ++{ ++ MALI_DEBUG_ASSERT_POINTER(session); ++ _mali_osk_mutex_wait(session->memory_lock); ++} ++ ++MALI_STATIC_INLINE void mali_session_memory_unlock(struct mali_session_data *session) ++{ ++ MALI_DEBUG_ASSERT_POINTER(session); ++ _mali_osk_mutex_signal(session->memory_lock); ++} ++ ++MALI_STATIC_INLINE void mali_session_send_notification(struct mali_session_data *session, _mali_osk_notification_t *object) ++{ ++ _mali_osk_notification_queue_send(session->ioctl_queue, object); +} ++ ++#if defined(CONFIG_MALI_DVFS) ++ ++MALI_STATIC_INLINE void mali_session_inc_num_window_jobs(struct mali_session_data *session) ++{ ++ MALI_DEBUG_ASSERT_POINTER(session); ++ _mali_osk_atomic_inc(&session->number_of_window_jobs); ++} ++ ++/* ++ * Get the max completed window jobs from all active session, ++ * which will be used in window render frame per sec calculate ++ */ ++u32 mali_session_max_window_num(void); ++ +#endif + -+#endif /* __MALI_UTGARD_PROFILING_GATOR_API_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_uk_types.h b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_uk_types.h ++void mali_session_memory_tracking(_mali_osk_print_ctx *print_ctx); ++ ++#endif /* __MALI_SESSION_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_soft_job.c b/drivers/gpu/arm/mali400/mali/common/mali_soft_job.c new file mode 100644 -index 000000000..34656f09b +index 000000000..35cd830bc --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_uk_types.h -@@ -0,0 +1,1108 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_soft_job.c +@@ -0,0 +1,438 @@ +/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2013-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -305093,1112 +308434,721 @@ index 000000000..34656f09b + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+/** -+ * @file mali_uk_types.h -+ * Defines the types and constants used in the user-kernel interface -+ */ ++#include "mali_soft_job.h" ++#include "mali_osk.h" ++#include "mali_timeline.h" ++#include "mali_session.h" ++#include "mali_kernel_common.h" ++#include "mali_uk_types.h" ++#include "mali_scheduler.h" ++#include "mali_executor.h" + -+#ifndef __MALI_UTGARD_UK_TYPES_H__ -+#define __MALI_UTGARD_UK_TYPES_H__ ++MALI_STATIC_INLINE void mali_soft_job_system_lock(struct mali_soft_job_system *system) ++{ ++ MALI_DEBUG_ASSERT_POINTER(system); ++ _mali_osk_spinlock_irq_lock(system->lock); ++ MALI_DEBUG_PRINT(5, ("Mali Soft Job: soft system %p lock taken\n", system)); ++ MALI_DEBUG_ASSERT(0 == system->lock_owner); ++ MALI_DEBUG_CODE(system->lock_owner = _mali_osk_get_tid()); ++} + -+#ifdef __cplusplus -+extern "C" { -+#endif ++MALI_STATIC_INLINE void mali_soft_job_system_unlock(struct mali_soft_job_system *system) ++{ ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_PRINT(5, ("Mali Soft Job: releasing soft system %p lock\n", system)); ++ MALI_DEBUG_ASSERT(_mali_osk_get_tid() == system->lock_owner); ++ MALI_DEBUG_CODE(system->lock_owner = 0); ++ _mali_osk_spinlock_irq_unlock(system->lock); ++} + -+/* Iteration functions depend on these values being consecutive. */ -+#define MALI_UK_TIMELINE_GP 0 -+#define MALI_UK_TIMELINE_PP 1 -+#define MALI_UK_TIMELINE_SOFT 2 -+#define MALI_UK_TIMELINE_MAX 3 ++#if defined(DEBUG) ++MALI_STATIC_INLINE void mali_soft_job_system_assert_locked(struct mali_soft_job_system *system) ++{ ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT(_mali_osk_get_tid() == system->lock_owner); ++} ++#define MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system) mali_soft_job_system_assert_locked(system) ++#else ++#define MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system) ++#endif /* defined(DEBUG) */ + -+#define MALI_UK_BIG_VARYING_SIZE (1024*1024*2) ++struct mali_soft_job_system *mali_soft_job_system_create(struct mali_session_data *session) ++{ ++ struct mali_soft_job_system *system; + -+typedef struct { -+ u32 points[MALI_UK_TIMELINE_MAX]; -+ s32 sync_fd; -+} _mali_uk_fence_t; ++ MALI_DEBUG_ASSERT_POINTER(session); + -+/** -+ * @addtogroup uddapi Unified Device Driver (UDD) APIs -+ * -+ * @{ -+ */ ++ system = (struct mali_soft_job_system *) _mali_osk_calloc(1, sizeof(struct mali_soft_job_system)); ++ if (NULL == system) { ++ return NULL; ++ } + -+/** -+ * @addtogroup u_k_api UDD User/Kernel Interface (U/K) APIs -+ * -+ * @{ -+ */ ++ system->session = session; + -+/** @defgroup _mali_uk_core U/K Core -+ * @{ */ ++ system->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_SCHEDULER); ++ if (NULL == system->lock) { ++ mali_soft_job_system_destroy(system); ++ return NULL; ++ } ++ system->lock_owner = 0; ++ system->last_job_id = 0; + -+/** Definition of subsystem numbers, to assist in creating a unique identifier -+ * for each U/K call. -+ * -+ * @see _mali_uk_functions */ -+typedef enum { -+ _MALI_UK_CORE_SUBSYSTEM, /**< Core Group of U/K calls */ -+ _MALI_UK_MEMORY_SUBSYSTEM, /**< Memory Group of U/K calls */ -+ _MALI_UK_PP_SUBSYSTEM, /**< Fragment Processor Group of U/K calls */ -+ _MALI_UK_GP_SUBSYSTEM, /**< Vertex Processor Group of U/K calls */ -+ _MALI_UK_PROFILING_SUBSYSTEM, /**< Profiling Group of U/K calls */ -+ _MALI_UK_VSYNC_SUBSYSTEM, /**< VSYNC Group of U/K calls */ -+} _mali_uk_subsystem_t; ++ _MALI_OSK_INIT_LIST_HEAD(&(system->jobs_used)); + -+/** Within a function group each function has its unique sequence number -+ * to assist in creating a unique identifier for each U/K call. -+ * -+ * An ordered pair of numbers selected from -+ * ( \ref _mali_uk_subsystem_t,\ref _mali_uk_functions) will uniquely identify the -+ * U/K call across all groups of functions, and all functions. */ -+typedef enum { -+ /** Core functions */ ++ return system; ++} + -+ _MALI_UK_OPEN = 0, /**< _mali_ukk_open() */ -+ _MALI_UK_CLOSE, /**< _mali_ukk_close() */ -+ _MALI_UK_WAIT_FOR_NOTIFICATION, /**< _mali_ukk_wait_for_notification() */ -+ _MALI_UK_GET_API_VERSION, /**< _mali_ukk_get_api_version() */ -+ _MALI_UK_POST_NOTIFICATION, /**< _mali_ukk_post_notification() */ -+ _MALI_UK_GET_USER_SETTING, /**< _mali_ukk_get_user_setting() *//**< [out] */ -+ _MALI_UK_GET_USER_SETTINGS, /**< _mali_ukk_get_user_settings() *//**< [out] */ -+ _MALI_UK_REQUEST_HIGH_PRIORITY, /**< _mali_ukk_request_high_priority() */ -+ _MALI_UK_TIMELINE_GET_LATEST_POINT, /**< _mali_ukk_timeline_get_latest_point() */ -+ _MALI_UK_TIMELINE_WAIT, /**< _mali_ukk_timeline_wait() */ -+ _MALI_UK_TIMELINE_CREATE_SYNC_FENCE, /**< _mali_ukk_timeline_create_sync_fence() */ -+ _MALI_UK_SOFT_JOB_START, /**< _mali_ukk_soft_job_start() */ -+ _MALI_UK_SOFT_JOB_SIGNAL, /**< _mali_ukk_soft_job_signal() */ -+ _MALI_UK_PENDING_SUBMIT, /**< _mali_ukk_pending_submit() */ ++void mali_soft_job_system_destroy(struct mali_soft_job_system *system) ++{ ++ MALI_DEBUG_ASSERT_POINTER(system); + -+ _MALI_GET_RK_KO_VERSION, /* rk_ext */ -+ _MALI_UK_GET_MALI_VERSION_IN_RK30, ++ /* All jobs should be free at this point. */ ++ MALI_DEBUG_ASSERT(_mali_osk_list_empty(&(system->jobs_used))); + -+ /** Memory functions */ ++ if (NULL != system) { ++ if (NULL != system->lock) { ++ _mali_osk_spinlock_irq_term(system->lock); ++ } ++ _mali_osk_free(system); ++ } ++} + -+ _MALI_UK_ALLOC_MEM = 0, /**< _mali_ukk_alloc_mem() */ -+ _MALI_UK_FREE_MEM, /**< _mali_ukk_free_mem() */ -+ _MALI_UK_BIND_MEM, /**< _mali_ukk_mem_bind() */ -+ _MALI_UK_UNBIND_MEM, /**< _mali_ukk_mem_unbind() */ -+ _MALI_UK_COW_MEM, /**< _mali_ukk_mem_cow() */ -+ _MALI_UK_COW_MODIFY_RANGE, /**< _mali_ukk_mem_cow_modify_range() */ -+ _MALI_UK_RESIZE_MEM, /**<._mali_ukk_mem_resize() */ -+ _MALI_UK_QUERY_MMU_PAGE_TABLE_DUMP_SIZE, /**< _mali_ukk_mem_get_mmu_page_table_dump_size() */ -+ _MALI_UK_DUMP_MMU_PAGE_TABLE, /**< _mali_ukk_mem_dump_mmu_page_table() */ -+ _MALI_UK_DMA_BUF_GET_SIZE, /**< _mali_ukk_dma_buf_get_size() */ -+ _MALI_UK_MEM_WRITE_SAFE, /**< _mali_uku_mem_write_safe() */ ++static void mali_soft_job_system_free_job(struct mali_soft_job_system *system, struct mali_soft_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_POINTER(system); + -+ /** Common functions for each core */ ++ mali_soft_job_system_lock(job->system); + -+ _MALI_UK_START_JOB = 0, /**< Start a Fragment/Vertex Processor Job on a core */ -+ _MALI_UK_GET_NUMBER_OF_CORES, /**< Get the number of Fragment/Vertex Processor cores */ -+ _MALI_UK_GET_CORE_VERSION, /**< Get the Fragment/Vertex Processor version compatible with all cores */ ++ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_INVALID_ID != job->id); ++ MALI_DEBUG_ASSERT(system == job->system); + -+ /** Fragment Processor Functions */ ++ _mali_osk_list_del(&(job->system_list)); + -+ _MALI_UK_PP_START_JOB = _MALI_UK_START_JOB, /**< _mali_ukk_pp_start_job() */ -+ _MALI_UK_GET_PP_NUMBER_OF_CORES = _MALI_UK_GET_NUMBER_OF_CORES, /**< _mali_ukk_get_pp_number_of_cores() */ -+ _MALI_UK_GET_PP_CORE_VERSION = _MALI_UK_GET_CORE_VERSION, /**< _mali_ukk_get_pp_core_version() */ -+ _MALI_UK_PP_DISABLE_WB, /**< _mali_ukk_pp_job_disable_wb() */ -+ _MALI_UK_PP_AND_GP_START_JOB, /**< _mali_ukk_pp_and_gp_start_job() */ ++ mali_soft_job_system_unlock(job->system); + -+ /** Vertex Processor Functions */ ++ _mali_osk_free(job); ++} + -+ _MALI_UK_GP_START_JOB = _MALI_UK_START_JOB, /**< _mali_ukk_gp_start_job() */ -+ _MALI_UK_GET_GP_NUMBER_OF_CORES = _MALI_UK_GET_NUMBER_OF_CORES, /**< _mali_ukk_get_gp_number_of_cores() */ -+ _MALI_UK_GET_GP_CORE_VERSION = _MALI_UK_GET_CORE_VERSION, /**< _mali_ukk_get_gp_core_version() */ -+ _MALI_UK_GP_SUSPEND_RESPONSE, /**< _mali_ukk_gp_suspend_response() */ ++MALI_STATIC_INLINE struct mali_soft_job *mali_soft_job_system_lookup_job(struct mali_soft_job_system *system, u32 job_id) ++{ ++ struct mali_soft_job *job, *tmp; + -+ /** Profiling functions */ ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_ASSERT_SOFT_JOB_SYSTEM_LOCKED(system); + -+ _MALI_UK_PROFILING_ADD_EVENT = 0, /**< __mali_uku_profiling_add_event() */ -+ _MALI_UK_PROFILING_REPORT_SW_COUNTERS,/**< __mali_uku_profiling_report_sw_counters() */ -+ _MALI_UK_PROFILING_MEMORY_USAGE_GET, /**< __mali_uku_profiling_memory_usage_get() */ -+ _MALI_UK_PROFILING_STREAM_FD_GET, /** < __mali_uku_profiling_stream_fd_get() */ -+ _MALI_UK_PROFILING_CONTROL_SET, /** < __mali_uku_profiling_control_set() */ ++ _MALI_OSK_LIST_FOREACHENTRY(job, tmp, &system->jobs_used, struct mali_soft_job, system_list) { ++ if (job->id == job_id) ++ return job; ++ } + -+ /** VSYNC reporting fuctions */ -+ _MALI_UK_VSYNC_EVENT_REPORT = 0, /**< _mali_ukk_vsync_event_report() */ -+} _mali_uk_functions; ++ return NULL; ++} + -+/** @defgroup _mali_uk_getsysteminfo U/K Get System Info -+ * @{ */ ++void mali_soft_job_destroy(struct mali_soft_job *job) ++{ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_POINTER(job->system); + -+/** -+ * Type definition for the core version number. -+ * Used when returning the version number read from a core -+ * -+ * Its format is that of the 32-bit Version register for a particular core. -+ * Refer to the "Mali200 and MaliGP2 3D Graphics Processor Technical Reference -+ * Manual", ARM DDI 0415C, for more information. -+ */ -+typedef u32 _mali_core_version; ++ MALI_DEBUG_PRINT(4, ("Mali Soft Job: destroying soft job %u (0x%08X)\n", job->id, job)); + -+/** @} */ /* end group _mali_uk_core */ ++ if (NULL != job) { ++ if (0 < _mali_osk_atomic_dec_return(&job->refcount)) return; + ++ _mali_osk_atomic_term(&job->refcount); + -+/** @defgroup _mali_uk_gp U/K Vertex Processor -+ * @{ */ ++ if (NULL != job->activated_notification) { ++ _mali_osk_notification_delete(job->activated_notification); ++ job->activated_notification = NULL; ++ } + -+/** @defgroup _mali_uk_gp_suspend_response_s Vertex Processor Suspend Response -+ * @{ */ ++ mali_soft_job_system_free_job(job->system, job); ++ } ++} + -+/** @brief Arguments for _mali_ukk_gp_suspend_response() -+ * -+ * When _mali_wait_for_notification() receives notification that a -+ * Vertex Processor job was suspended, you need to send a response to indicate -+ * what needs to happen with this job. You can either abort or resume the job. -+ * -+ * - set @c code to indicate response code. This is either @c _MALIGP_JOB_ABORT or -+ * @c _MALIGP_JOB_RESUME_WITH_NEW_HEAP to indicate you will provide a new heap -+ * for the job that will resolve the out of memory condition for the job. -+ * - copy the @c cookie value from the @c _mali_uk_gp_job_suspended_s notification; -+ * this is an identifier for the suspended job -+ * - set @c arguments[0] and @c arguments[1] to zero if you abort the job. If -+ * you resume it, @c argument[0] should specify the Mali start address for the new -+ * heap and @c argument[1] the Mali end address of the heap. -+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open() -+ * -+ */ -+typedef enum _maligp_job_suspended_response_code { -+ _MALIGP_JOB_ABORT, /**< Abort the Vertex Processor job */ -+ _MALIGP_JOB_RESUME_WITH_NEW_HEAP /**< Resume the Vertex Processor job with a new heap */ -+} _maligp_job_suspended_response_code; ++struct mali_soft_job *mali_soft_job_create(struct mali_soft_job_system *system, mali_soft_job_type type, u64 user_job) ++{ ++ struct mali_soft_job *job; ++ _mali_osk_notification_t *notification = NULL; + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 cookie; /**< [in] cookie from the _mali_uk_gp_job_suspended_s notification */ -+ _maligp_job_suspended_response_code code; /**< [in] abort or resume response code, see \ref _maligp_job_suspended_response_code */ -+ u32 arguments[2]; /**< [in] 0 when aborting a job. When resuming a job, the Mali start and end address for a new heap to resume the job with */ -+} _mali_uk_gp_suspend_response_s; ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT((MALI_SOFT_JOB_TYPE_USER_SIGNALED == type) || ++ (MALI_SOFT_JOB_TYPE_SELF_SIGNALED == type)); + -+/** @} */ /* end group _mali_uk_gp_suspend_response_s */ ++ notification = _mali_osk_notification_create(_MALI_NOTIFICATION_SOFT_ACTIVATED, sizeof(_mali_uk_soft_job_activated_s)); ++ if (unlikely(NULL == notification)) { ++ MALI_PRINT_ERROR(("Mali Soft Job: failed to allocate notification")); ++ return NULL; ++ } + -+/** @defgroup _mali_uk_gpstartjob_s Vertex Processor Start Job -+ * @{ */ ++ job = _mali_osk_malloc(sizeof(struct mali_soft_job)); ++ if (unlikely(NULL == job)) { ++ MALI_DEBUG_PRINT(2, ("Mali Soft Job: system alloc job failed. \n")); ++ return NULL; ++ } + -+/** @brief Status indicating the result of the execution of a Vertex or Fragment processor job */ -+typedef enum { -+ _MALI_UK_JOB_STATUS_END_SUCCESS = 1 << (16 + 0), -+ _MALI_UK_JOB_STATUS_END_OOM = 1 << (16 + 1), -+ _MALI_UK_JOB_STATUS_END_ABORT = 1 << (16 + 2), -+ _MALI_UK_JOB_STATUS_END_TIMEOUT_SW = 1 << (16 + 3), -+ _MALI_UK_JOB_STATUS_END_HANG = 1 << (16 + 4), -+ _MALI_UK_JOB_STATUS_END_SEG_FAULT = 1 << (16 + 5), -+ _MALI_UK_JOB_STATUS_END_ILLEGAL_JOB = 1 << (16 + 6), -+ _MALI_UK_JOB_STATUS_END_UNKNOWN_ERR = 1 << (16 + 7), -+ _MALI_UK_JOB_STATUS_END_SHUTDOWN = 1 << (16 + 8), -+ _MALI_UK_JOB_STATUS_END_SYSTEM_UNUSABLE = 1 << (16 + 9) -+} _mali_uk_job_status; ++ mali_soft_job_system_lock(system); + -+#define MALIGP2_NUM_REGS_FRAME (6) ++ job->system = system; ++ job->id = system->last_job_id++; ++ job->state = MALI_SOFT_JOB_STATE_ALLOCATED; + -+/** @brief Arguments for _mali_ukk_gp_start_job() -+ * -+ * To start a Vertex Processor job -+ * - associate the request with a reference to a @c mali_gp_job_info by setting -+ * user_job_ptr to the address of the @c mali_gp_job_info of the job. -+ * - set @c priority to the priority of the @c mali_gp_job_info -+ * - specify a timeout for the job by setting @c watchdog_msecs to the number of -+ * milliseconds the job is allowed to run. Specifying a value of 0 selects the -+ * default timeout in use by the device driver. -+ * - copy the frame registers from the @c mali_gp_job_info into @c frame_registers. -+ * - set the @c perf_counter_flag, @c perf_counter_src0 and @c perf_counter_src1 to zero -+ * for a non-instrumented build. For an instrumented build you can use up -+ * to two performance counters. Set the corresponding bit in @c perf_counter_flag -+ * to enable them. @c perf_counter_src0 and @c perf_counter_src1 specify -+ * the source of what needs to get counted (e.g. number of vertex loader -+ * cache hits). For source id values, see ARM DDI0415A, Table 3-60. -+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open() -+ * -+ * When @c _mali_ukk_gp_start_job() returns @c _MALI_OSK_ERR_OK, status contains the -+ * result of the request (see \ref _mali_uk_start_job_status). If the job could -+ * not get started (@c _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE) it should be -+ * tried again. -+ * -+ * After the job has started, @c _mali_wait_for_notification() will be notified -+ * that the job finished or got suspended. It may get suspended due to -+ * resource shortage. If it finished (see _mali_ukk_wait_for_notification()) -+ * the notification will contain a @c _mali_uk_gp_job_finished_s result. If -+ * it got suspended the notification will contain a @c _mali_uk_gp_job_suspended_s -+ * result. -+ * -+ * The @c _mali_uk_gp_job_finished_s contains the job status (see \ref _mali_uk_job_status), -+ * the number of milliseconds the job took to render, and values of core registers -+ * when the job finished (irq status, performance counters, renderer list -+ * address). A job has finished succesfully when its status is -+ * @c _MALI_UK_JOB_STATUS_FINISHED. If the hardware detected a timeout while rendering -+ * the job, or software detected the job is taking more than watchdog_msecs to -+ * complete, the status will indicate @c _MALI_UK_JOB_STATUS_HANG. -+ * If the hardware detected a bus error while accessing memory associated with the -+ * job, status will indicate @c _MALI_UK_JOB_STATUS_SEG_FAULT. -+ * status will indicate @c _MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to -+ * stop the job but the job didn't start on the hardware yet, e.g. when the -+ * driver shutdown. -+ * -+ * In case the job got suspended, @c _mali_uk_gp_job_suspended_s contains -+ * the @c user_job_ptr identifier used to start the job with, the @c reason -+ * why the job stalled (see \ref _maligp_job_suspended_reason) and a @c cookie -+ * to identify the core on which the job stalled. This @c cookie will be needed -+ * when responding to this nofication by means of _mali_ukk_gp_suspend_response(). -+ * (see _mali_ukk_gp_suspend_response()). The response is either to abort or -+ * resume the job. If the job got suspended due to an out of memory condition -+ * you may be able to resolve this by providing more memory and resuming the job. -+ * -+ */ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u64 user_job_ptr; /**< [in] identifier for the job in user space, a @c mali_gp_job_info* */ -+ u32 priority; /**< [in] job priority. A lower number means higher priority */ -+ u32 frame_registers[MALIGP2_NUM_REGS_FRAME]; /**< [in] core specific registers associated with this job */ -+ u32 perf_counter_flag; /**< [in] bitmask indicating which performance counters to enable, see \ref _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE and related macro definitions */ -+ u32 perf_counter_src0; /**< [in] source id for performance counter 0 (see ARM DDI0415A, Table 3-60) */ -+ u32 perf_counter_src1; /**< [in] source id for performance counter 1 (see ARM DDI0415A, Table 3-60) */ -+ u32 frame_builder_id; /**< [in] id of the originating frame builder */ -+ u32 flush_id; /**< [in] flush id within the originating frame builder */ -+ _mali_uk_fence_t fence; /**< [in] fence this job must wait on */ -+ u64 timeline_point_ptr; /**< [in,out] pointer to u32: location where point on gp timeline for this job will be written */ -+ u32 varying_memsize; /** < [in] size of varying memory to use deffer bind*/ -+ u32 deferred_mem_num; -+ u64 deferred_mem_list; /** < [in] memory hanlde list of varying buffer to use deffer bind */ -+} _mali_uk_gp_start_job_s; ++ _mali_osk_list_add(&(job->system_list), &(system->jobs_used)); + -+#define _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE (1<<0) /**< Enable performance counter SRC0 for a job */ -+#define _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE (1<<1) /**< Enable performance counter SRC1 for a job */ -+#define _MALI_PERFORMANCE_COUNTER_FLAG_HEATMAP_ENABLE (1<<2) /**< Enable per tile (aka heatmap) generation with for a job (using the enabled counter sources) */ ++ job->type = type; ++ job->user_job = user_job; ++ job->activated = MALI_FALSE; + -+/** @} */ /* end group _mali_uk_gpstartjob_s */ ++ job->activated_notification = notification; + -+typedef struct { -+ u64 user_job_ptr; /**< [out] identifier for the job in user space */ -+ _mali_uk_job_status status; /**< [out] status of finished job */ -+ u32 heap_current_addr; /**< [out] value of the GP PLB PL heap start address register */ -+ u32 perf_counter0; /**< [out] value of performance counter 0 (see ARM DDI0415A) */ -+ u32 perf_counter1; /**< [out] value of performance counter 1 (see ARM DDI0415A) */ -+ u32 pending_big_job_num; -+} _mali_uk_gp_job_finished_s; ++ _mali_osk_atomic_init(&job->refcount, 1); + -+typedef struct { -+ u64 user_job_ptr; /**< [out] identifier for the job in user space */ -+ u32 cookie; /**< [out] identifier for the core in kernel space on which the job stalled */ -+} _mali_uk_gp_job_suspended_s; ++ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_ALLOCATED == job->state); ++ MALI_DEBUG_ASSERT(system == job->system); ++ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_INVALID_ID != job->id); + -+/** @} */ /* end group _mali_uk_gp */ ++ mali_soft_job_system_unlock(system); + ++ return job; ++} + -+/** @defgroup _mali_uk_pp U/K Fragment Processor -+ * @{ */ ++mali_timeline_point mali_soft_job_start(struct mali_soft_job *job, struct mali_timeline_fence *fence) ++{ ++ mali_timeline_point point; ++ struct mali_soft_job_system *system; + -+#define _MALI_PP_MAX_SUB_JOBS 8 ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_POINTER(fence); + -+#define _MALI_PP_MAX_FRAME_REGISTERS ((0x058/4)+1) ++ MALI_DEBUG_ASSERT_POINTER(job->system); ++ system = job->system; + -+#define _MALI_PP_MAX_WB_REGISTERS ((0x02C/4)+1) ++ MALI_DEBUG_ASSERT_POINTER(system->session); ++ MALI_DEBUG_ASSERT_POINTER(system->session->timeline_system); + -+#define _MALI_DLBU_MAX_REGISTERS 4 ++ mali_soft_job_system_lock(system); + -+/** Flag for _mali_uk_pp_start_job_s */ -+#define _MALI_PP_JOB_FLAG_NO_NOTIFICATION (1<<0) -+#define _MALI_PP_JOB_FLAG_IS_WINDOW_SURFACE (1<<1) -+#define _MALI_PP_JOB_FLAG_PROTECTED (1<<2) ++ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_ALLOCATED == job->state); ++ job->state = MALI_SOFT_JOB_STATE_STARTED; + -+/** @defgroup _mali_uk_ppstartjob_s Fragment Processor Start Job -+ * @{ */ ++ mali_soft_job_system_unlock(system); + -+/** @brief Arguments for _mali_ukk_pp_start_job() -+ * -+ * To start a Fragment Processor job -+ * - associate the request with a reference to a mali_pp_job by setting -+ * @c user_job_ptr to the address of the @c mali_pp_job of the job. -+ * - set @c priority to the priority of the mali_pp_job -+ * - specify a timeout for the job by setting @c watchdog_msecs to the number of -+ * milliseconds the job is allowed to run. Specifying a value of 0 selects the -+ * default timeout in use by the device driver. -+ * - copy the frame registers from the @c mali_pp_job into @c frame_registers. -+ * For MALI200 you also need to copy the write back 0,1 and 2 registers. -+ * - set the @c perf_counter_flag, @c perf_counter_src0 and @c perf_counter_src1 to zero -+ * for a non-instrumented build. For an instrumented build you can use up -+ * to two performance counters. Set the corresponding bit in @c perf_counter_flag -+ * to enable them. @c perf_counter_src0 and @c perf_counter_src1 specify -+ * the source of what needs to get counted (e.g. number of vertex loader -+ * cache hits). For source id values, see ARM DDI0415A, Table 3-60. -+ * - pass in the user-kernel context in @c ctx that was returned from _mali_ukk_open() -+ * -+ * When _mali_ukk_pp_start_job() returns @c _MALI_OSK_ERR_OK, @c status contains the -+ * result of the request (see \ref _mali_uk_start_job_status). If the job could -+ * not get started (@c _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE) it should be -+ * tried again. -+ * -+ * After the job has started, _mali_wait_for_notification() will be notified -+ * when the job finished. The notification will contain a -+ * @c _mali_uk_pp_job_finished_s result. It contains the @c user_job_ptr -+ * identifier used to start the job with, the job @c status (see \ref _mali_uk_job_status), -+ * the number of milliseconds the job took to render, and values of core registers -+ * when the job finished (irq status, performance counters, renderer list -+ * address). A job has finished succesfully when its status is -+ * @c _MALI_UK_JOB_STATUS_FINISHED. If the hardware detected a timeout while rendering -+ * the job, or software detected the job is taking more than @c watchdog_msecs to -+ * complete, the status will indicate @c _MALI_UK_JOB_STATUS_HANG. -+ * If the hardware detected a bus error while accessing memory associated with the -+ * job, status will indicate @c _MALI_UK_JOB_STATUS_SEG_FAULT. -+ * status will indicate @c _MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to -+ * stop the job but the job didn't start on the hardware yet, e.g. when the -+ * driver shutdown. -+ * -+ */ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u64 user_job_ptr; /**< [in] identifier for the job in user space */ -+ u32 priority; /**< [in] job priority. A lower number means higher priority */ -+ u32 frame_registers[_MALI_PP_MAX_FRAME_REGISTERS]; /**< [in] core specific registers associated with first sub job, see ARM DDI0415A */ -+ u32 frame_registers_addr_frame[_MALI_PP_MAX_SUB_JOBS - 1]; /**< [in] ADDR_FRAME registers for sub job 1-7 */ -+ u32 frame_registers_addr_stack[_MALI_PP_MAX_SUB_JOBS - 1]; /**< [in] ADDR_STACK registers for sub job 1-7 */ -+ u32 wb0_registers[_MALI_PP_MAX_WB_REGISTERS]; -+ u32 wb1_registers[_MALI_PP_MAX_WB_REGISTERS]; -+ u32 wb2_registers[_MALI_PP_MAX_WB_REGISTERS]; -+ u32 dlbu_registers[_MALI_DLBU_MAX_REGISTERS]; /**< [in] Dynamic load balancing unit registers */ -+ u32 num_cores; /**< [in] Number of cores to set up (valid range: 1-8(M450) or 4(M400)) */ -+ u32 perf_counter_flag; /**< [in] bitmask indicating which performance counters to enable, see \ref _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE and related macro definitions */ -+ u32 perf_counter_src0; /**< [in] source id for performance counter 0 (see ARM DDI0415A, Table 3-60) */ -+ u32 perf_counter_src1; /**< [in] source id for performance counter 1 (see ARM DDI0415A, Table 3-60) */ -+ u32 frame_builder_id; /**< [in] id of the originating frame builder */ -+ u32 flush_id; /**< [in] flush id within the originating frame builder */ -+ u32 flags; /**< [in] See _MALI_PP_JOB_FLAG_* for a list of avaiable flags */ -+ u32 tilesx; /**< [in] number of tiles in the x direction (needed for heatmap generation */ -+ u32 tilesy; /**< [in] number of tiles in y direction (needed for reading the heatmap memory) */ -+ u32 heatmap_mem; /**< [in] memory address to store counter values per tile (aka heatmap) */ -+ u32 num_memory_cookies; /**< [in] number of memory cookies attached to job */ -+ u64 memory_cookies; /**< [in] pointer to array of u32 memory cookies attached to job */ -+ _mali_uk_fence_t fence; /**< [in] fence this job must wait on */ -+ u64 timeline_point_ptr; /**< [in,out] pointer to location of u32 where point on pp timeline for this job will be written */ -+} _mali_uk_pp_start_job_s; ++ MALI_DEBUG_PRINT(4, ("Mali Soft Job: starting soft job %u (0x%08X)\n", job->id, job)); + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u64 gp_args; /**< [in,out] GP uk arguments (see _mali_uk_gp_start_job_s) */ -+ u64 pp_args; /**< [in,out] PP uk arguments (see _mali_uk_pp_start_job_s) */ -+} _mali_uk_pp_and_gp_start_job_s; ++ mali_timeline_tracker_init(&job->tracker, MALI_TIMELINE_TRACKER_SOFT, fence, job); ++ point = mali_timeline_system_add_tracker(system->session->timeline_system, &job->tracker, MALI_TIMELINE_SOFT); + -+/** @} */ /* end group _mali_uk_ppstartjob_s */ ++ return point; ++} + -+typedef struct { -+ u64 user_job_ptr; /**< [out] identifier for the job in user space */ -+ _mali_uk_job_status status; /**< [out] status of finished job */ -+ u32 perf_counter0[_MALI_PP_MAX_SUB_JOBS]; /**< [out] value of perfomance counter 0 (see ARM DDI0415A), one for each sub job */ -+ u32 perf_counter1[_MALI_PP_MAX_SUB_JOBS]; /**< [out] value of perfomance counter 1 (see ARM DDI0415A), one for each sub job */ -+ u32 perf_counter_src0; -+ u32 perf_counter_src1; -+} _mali_uk_pp_job_finished_s; ++static mali_bool mali_soft_job_is_activated(void *data) ++{ ++ struct mali_soft_job *job; + -+typedef struct { -+ u32 number_of_enabled_cores; /**< [out] the new number of enabled cores */ -+} _mali_uk_pp_num_cores_changed_s; ++ job = (struct mali_soft_job *) data; ++ MALI_DEBUG_ASSERT_POINTER(job); + ++ return job->activated; ++} + ++_mali_osk_errcode_t mali_soft_job_system_signal_job(struct mali_soft_job_system *system, u32 job_id) ++{ ++ struct mali_soft_job *job; ++ struct mali_timeline_system *timeline_system; ++ mali_scheduler_mask schedule_mask; + -+/** -+ * Flags to indicate write-back units -+ */ -+typedef enum { -+ _MALI_UK_PP_JOB_WB0 = 1, -+ _MALI_UK_PP_JOB_WB1 = 2, -+ _MALI_UK_PP_JOB_WB2 = 4, -+} _mali_uk_pp_job_wbx_flag; ++ MALI_DEBUG_ASSERT_POINTER(system); + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 fb_id; /**< [in] Frame builder ID of job to disable WB units for */ -+ u32 wb0_memory; -+ u32 wb1_memory; -+ u32 wb2_memory; -+} _mali_uk_pp_disable_wb_s; ++ mali_soft_job_system_lock(system); + ++ job = mali_soft_job_system_lookup_job(system, job_id); + -+/** @} */ /* end group _mali_uk_pp */ ++ if ((NULL == job) || (MALI_SOFT_JOB_TYPE_USER_SIGNALED != job->type) ++ || !(MALI_SOFT_JOB_STATE_STARTED == job->state || MALI_SOFT_JOB_STATE_TIMED_OUT == job->state)) { ++ mali_soft_job_system_unlock(system); ++ MALI_PRINT_ERROR(("Mali Soft Job: invalid soft job id %u", job_id)); ++ return _MALI_OSK_ERR_ITEM_NOT_FOUND; ++ } + -+/** @defgroup _mali_uk_soft_job U/K Soft Job -+ * @{ */ ++ if (MALI_SOFT_JOB_STATE_TIMED_OUT == job->state) { ++ job->state = MALI_SOFT_JOB_STATE_SIGNALED; ++ mali_soft_job_system_unlock(system); + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u64 user_job; /**< [in] identifier for the job in user space */ -+ u64 job_id_ptr; /**< [in,out] pointer to location of u32 where job id will be written */ -+ _mali_uk_fence_t fence; /**< [in] fence this job must wait on */ -+ u32 point; /**< [out] point on soft timeline for this job */ -+ u32 type; /**< [in] type of soft job */ -+} _mali_uk_soft_job_start_s; ++ MALI_DEBUG_ASSERT(MALI_TRUE == job->activated); ++ MALI_DEBUG_PRINT(4, ("Mali Soft Job: soft job %u (0x%08X) was timed out\n", job->id, job)); ++ mali_soft_job_destroy(job); + -+typedef struct { -+ u64 user_job; /**< [out] identifier for the job in user space */ -+} _mali_uk_soft_job_activated_s; ++ return _MALI_OSK_ERR_TIMEOUT; ++ } + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 job_id; /**< [in] id for soft job */ -+} _mali_uk_soft_job_signal_s; ++ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state); + -+/** @} */ /* end group _mali_uk_soft_job */ ++ job->state = MALI_SOFT_JOB_STATE_SIGNALED; ++ mali_soft_job_system_unlock(system); + -+typedef struct { -+ u32 counter_id; -+ u32 key; -+ int enable; -+} _mali_uk_annotate_profiling_mem_counter_s; ++ /* Since the job now is in signaled state, timeouts from the timeline system will be ++ * ignored, and it is not possible to signal this job again. */ + -+typedef struct { -+ u32 sampling_rate; -+ int enable; -+} _mali_uk_annotate_profiling_enable_s; ++ timeline_system = system->session->timeline_system; ++ MALI_DEBUG_ASSERT_POINTER(timeline_system); + ++ /* Wait until activated. */ ++ _mali_osk_wait_queue_wait_event(timeline_system->wait_queue, mali_soft_job_is_activated, (void *) job); + -+/** @addtogroup _mali_uk_core U/K Core -+ * @{ */ ++ MALI_DEBUG_PRINT(4, ("Mali Soft Job: signaling soft job %u (0x%08X)\n", job->id, job)); + -+/** @defgroup _mali_uk_waitfornotification_s Wait For Notification -+ * @{ */ ++ schedule_mask = mali_timeline_tracker_release(&job->tracker); ++ mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE); + -+/** @brief Notification type encodings -+ * -+ * Each Notification type is an ordered pair of (subsystem,id), and is unique. -+ * -+ * The encoding of subsystem,id into a 32-bit word is: -+ * encoding = (( subsystem << _MALI_NOTIFICATION_SUBSYSTEM_SHIFT ) & _MALI_NOTIFICATION_SUBSYSTEM_MASK) -+ * | (( id << _MALI_NOTIFICATION_ID_SHIFT ) & _MALI_NOTIFICATION_ID_MASK) -+ * -+ * @see _mali_uk_wait_for_notification_s -+ */ -+typedef enum { -+ /** core notifications */ ++ mali_soft_job_destroy(job); + -+ _MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x20, -+ _MALI_NOTIFICATION_APPLICATION_QUIT = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x40, -+ _MALI_NOTIFICATION_SETTINGS_CHANGED = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x80, -+ _MALI_NOTIFICATION_SOFT_ACTIVATED = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x100, ++ return _MALI_OSK_ERR_OK; ++} + -+ /** Fragment Processor notifications */ ++static void mali_soft_job_send_activated_notification(struct mali_soft_job *job) ++{ ++ if (NULL != job->activated_notification) { ++ _mali_uk_soft_job_activated_s *res = job->activated_notification->result_buffer; ++ res->user_job = job->user_job; ++ mali_session_send_notification(job->system->session, job->activated_notification); ++ } ++ job->activated_notification = NULL; ++} + -+ _MALI_NOTIFICATION_PP_FINISHED = (_MALI_UK_PP_SUBSYSTEM << 16) | 0x10, -+ _MALI_NOTIFICATION_PP_NUM_CORE_CHANGE = (_MALI_UK_PP_SUBSYSTEM << 16) | 0x20, ++mali_scheduler_mask mali_soft_job_system_activate_job(struct mali_soft_job *job) ++{ ++ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; + -+ /** Vertex Processor notifications */ ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_POINTER(job->system); ++ MALI_DEBUG_ASSERT_POINTER(job->system->session); + -+ _MALI_NOTIFICATION_GP_FINISHED = (_MALI_UK_GP_SUBSYSTEM << 16) | 0x10, -+ _MALI_NOTIFICATION_GP_STALLED = (_MALI_UK_GP_SUBSYSTEM << 16) | 0x20, ++ MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeline activation for soft job %u (0x%08X).\n", job->id, job)); + -+ /** Profiling notifications */ -+ _MALI_NOTIFICATION_ANNOTATE_PROFILING_MEM_COUNTER = (_MALI_UK_PROFILING_SUBSYSTEM << 16) | 0x10, -+ _MALI_NOTIFICATION_ANNOTATE_PROFILING_ENABLE = (_MALI_UK_PROFILING_SUBSYSTEM << 16) | 0x20, -+} _mali_uk_notification_type; ++ mali_soft_job_system_lock(job->system); + -+/** to assist in splitting up 32-bit notification value in subsystem and id value */ -+#define _MALI_NOTIFICATION_SUBSYSTEM_MASK 0xFFFF0000 -+#define _MALI_NOTIFICATION_SUBSYSTEM_SHIFT 16 -+#define _MALI_NOTIFICATION_ID_MASK 0x0000FFFF -+#define _MALI_NOTIFICATION_ID_SHIFT 0 ++ if (unlikely(job->system->session->is_aborting)) { ++ MALI_DEBUG_PRINT(3, ("Mali Soft Job: Soft job %u (0x%08X) activated while session is aborting.\n", job->id, job)); + ++ mali_soft_job_system_unlock(job->system); + -+/** @brief Enumeration of possible settings which match mali_setting_t in user space -+ * -+ * -+ */ -+typedef enum { -+ _MALI_UK_USER_SETTING_SW_EVENTS_ENABLE = 0, -+ _MALI_UK_USER_SETTING_COLORBUFFER_CAPTURE_ENABLED, -+ _MALI_UK_USER_SETTING_DEPTHBUFFER_CAPTURE_ENABLED, -+ _MALI_UK_USER_SETTING_STENCILBUFFER_CAPTURE_ENABLED, -+ _MALI_UK_USER_SETTING_PER_TILE_COUNTERS_CAPTURE_ENABLED, -+ _MALI_UK_USER_SETTING_BUFFER_CAPTURE_COMPOSITOR, -+ _MALI_UK_USER_SETTING_BUFFER_CAPTURE_WINDOW, -+ _MALI_UK_USER_SETTING_BUFFER_CAPTURE_OTHER, -+ _MALI_UK_USER_SETTING_BUFFER_CAPTURE_N_FRAMES, -+ _MALI_UK_USER_SETTING_BUFFER_CAPTURE_RESIZE_FACTOR, -+ _MALI_UK_USER_SETTING_SW_COUNTER_ENABLED, -+ _MALI_UK_USER_SETTING_MAX, -+} _mali_uk_user_setting_t; ++ /* Since we are in shutdown, we can ignore the scheduling bitmask. */ ++ mali_timeline_tracker_release(&job->tracker); ++ mali_soft_job_destroy(job); ++ return schedule_mask; ++ } + -+/* See mali_user_settings_db.c */ -+extern const char *_mali_uk_user_setting_descriptions[]; -+#define _MALI_UK_USER_SETTING_DESCRIPTIONS \ -+ { \ -+ "sw_events_enable", \ -+ "colorbuffer_capture_enable", \ -+ "depthbuffer_capture_enable", \ -+ "stencilbuffer_capture_enable", \ -+ "per_tile_counters_enable", \ -+ "buffer_capture_compositor", \ -+ "buffer_capture_window", \ -+ "buffer_capture_other", \ -+ "buffer_capture_n_frames", \ -+ "buffer_capture_resize_factor", \ -+ "sw_counters_enable", \ -+ }; ++ /* Send activated notification. */ ++ mali_soft_job_send_activated_notification(job); + -+/** @brief struct to hold the value to a particular setting as seen in the kernel space -+ */ -+typedef struct { -+ _mali_uk_user_setting_t setting; -+ u32 value; -+} _mali_uk_settings_changed_s; ++ /* Wake up sleeping signaler. */ ++ job->activated = MALI_TRUE; + -+/** @brief Arguments for _mali_ukk_wait_for_notification() -+ * -+ * On successful return from _mali_ukk_wait_for_notification(), the members of -+ * this structure will indicate the reason for notification. -+ * -+ * Specifically, the source of the notification can be identified by the -+ * subsystem and id fields of the mali_uk_notification_type in the code.type -+ * member. The type member is encoded in a way to divide up the types into a -+ * subsystem field, and a per-subsystem ID field. See -+ * _mali_uk_notification_type for more information. -+ * -+ * Interpreting the data union member depends on the notification type: -+ * -+ * - type == _MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS -+ * - The kernel side is shutting down. No further -+ * _mali_uk_wait_for_notification() calls should be made. -+ * - In this case, the value of the data union member is undefined. -+ * - This is used to indicate to the user space client that it should close -+ * the connection to the Mali Device Driver. -+ * - type == _MALI_NOTIFICATION_PP_FINISHED -+ * - The notification data is of type _mali_uk_pp_job_finished_s. It contains the user_job_ptr -+ * identifier used to start the job with, the job status, the number of milliseconds the job took to render, -+ * and values of core registers when the job finished (irq status, performance counters, renderer list -+ * address). -+ * - A job has finished succesfully when its status member is _MALI_UK_JOB_STATUS_FINISHED. -+ * - If the hardware detected a timeout while rendering the job, or software detected the job is -+ * taking more than watchdog_msecs (see _mali_ukk_pp_start_job()) to complete, the status member will -+ * indicate _MALI_UK_JOB_STATUS_HANG. -+ * - If the hardware detected a bus error while accessing memory associated with the job, status will -+ * indicate _MALI_UK_JOB_STATUS_SEG_FAULT. -+ * - Status will indicate MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to stop the job but the job -+ * didn't start the hardware yet, e.g. when the driver closes. -+ * - type == _MALI_NOTIFICATION_GP_FINISHED -+ * - The notification data is of type _mali_uk_gp_job_finished_s. The notification is similar to that of -+ * type == _MALI_NOTIFICATION_PP_FINISHED, except that several other GP core register values are returned. -+ * The status values have the same meaning for type == _MALI_NOTIFICATION_PP_FINISHED. -+ * - type == _MALI_NOTIFICATION_GP_STALLED -+ * - The nofication data is of type _mali_uk_gp_job_suspended_s. It contains the user_job_ptr -+ * identifier used to start the job with, the reason why the job stalled and a cookie to identify the core on -+ * which the job stalled. -+ * - The reason member of gp_job_suspended is set to _MALIGP_JOB_SUSPENDED_OUT_OF_MEMORY -+ * when the polygon list builder unit has run out of memory. -+ */ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ _mali_uk_notification_type type; /**< [out] Type of notification available */ -+ union { -+ _mali_uk_gp_job_suspended_s gp_job_suspended;/**< [out] Notification data for _MALI_NOTIFICATION_GP_STALLED notification type */ -+ _mali_uk_gp_job_finished_s gp_job_finished; /**< [out] Notification data for _MALI_NOTIFICATION_GP_FINISHED notification type */ -+ _mali_uk_pp_job_finished_s pp_job_finished; /**< [out] Notification data for _MALI_NOTIFICATION_PP_FINISHED notification type */ -+ _mali_uk_settings_changed_s setting_changed;/**< [out] Notification data for _MALI_NOTIFICAATION_SETTINGS_CHANGED notification type */ -+ _mali_uk_soft_job_activated_s soft_job_activated; /**< [out] Notification data for _MALI_NOTIFICATION_SOFT_ACTIVATED notification type */ -+ _mali_uk_annotate_profiling_mem_counter_s profiling_mem_counter; -+ _mali_uk_annotate_profiling_enable_s profiling_enable; -+ } data; -+} _mali_uk_wait_for_notification_s; ++ /* If job type is self signaled, release tracker, move soft job to free list, and scheduler at once */ ++ if (MALI_SOFT_JOB_TYPE_SELF_SIGNALED == job->type) { ++ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state); + -+/** @brief Arguments for _mali_ukk_post_notification() -+ * -+ * Posts the specified notification to the notification queue for this application. -+ * This is used to send a quit message to the callback thread. -+ */ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ _mali_uk_notification_type type; /**< [in] Type of notification to post */ -+} _mali_uk_post_notification_s; ++ job->state = MALI_SOFT_JOB_STATE_SIGNALED; ++ mali_soft_job_system_unlock(job->system); + -+/** @} */ /* end group _mali_uk_waitfornotification_s */ ++ schedule_mask |= mali_timeline_tracker_release(&job->tracker); + -+/** @defgroup _mali_uk_getapiversion_s Get API Version -+ * @{ */ ++ mali_soft_job_destroy(job); ++ } else { ++ _mali_osk_wait_queue_wake_up(job->tracker.system->wait_queue); + -+/** helpers for Device Driver API version handling */ ++ mali_soft_job_system_unlock(job->system); ++ } + -+/** @brief Encode a version ID from a 16-bit input -+ * -+ * @note the input is assumed to be 16 bits. It must not exceed 16 bits. */ -+#define _MAKE_VERSION_ID(x) (((x) << 16UL) | (x)) ++ return schedule_mask; ++} + -+/** @brief Check whether a 32-bit value is likely to be Device Driver API -+ * version ID. */ -+#define _IS_VERSION_ID(x) (((x) & 0xFFFF) == (((x) >> 16UL) & 0xFFFF)) ++mali_scheduler_mask mali_soft_job_system_timeout_job(struct mali_soft_job *job) ++{ ++ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; + -+/** @brief Decode a 16-bit version number from a 32-bit Device Driver API version -+ * ID */ -+#define _GET_VERSION(x) (((x) >> 16UL) & 0xFFFF) ++ MALI_DEBUG_ASSERT_POINTER(job); ++ MALI_DEBUG_ASSERT_POINTER(job->system); ++ MALI_DEBUG_ASSERT_POINTER(job->system->session); ++ MALI_DEBUG_ASSERT(MALI_TRUE == job->activated); + -+/** @brief Determine whether two 32-bit encoded version IDs match */ -+#define _IS_API_MATCH(x, y) (IS_VERSION_ID((x)) && IS_VERSION_ID((y)) && (GET_VERSION((x)) == GET_VERSION((y)))) -+ /** -+ * RK MALI version code -+ */ -+#define _MALI_RK_LIBS_VERSION 1 ++ MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeline timeout for soft job %u (0x%08X).\n", job->id, job)); + -+/** -+ * API version define. -+ * Indicates the version of the kernel API -+ * The version is a 16bit integer incremented on each API change. -+ * The 16bit integer is stored twice in a 32bit integer -+ * For example, for version 1 the value would be 0x00010001 -+ */ -+#define _MALI_API_VERSION 900 -+#define _MALI_UK_API_VERSION _MAKE_VERSION_ID(_MALI_API_VERSION) ++ mali_soft_job_system_lock(job->system); + -+/** -+ * The API version is a 16-bit integer stored in both the lower and upper 16-bits -+ * of a 32-bit value. The 16-bit API version value is incremented on each API -+ * change. Version 1 would be 0x00010001. Used in _mali_uk_get_api_version_s. -+ */ -+typedef u32 _mali_uk_api_version; ++ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state || ++ MALI_SOFT_JOB_STATE_SIGNALED == job->state); + -+/** @brief Arguments for _mali_uk_get_api_version() -+ * -+ * The user-side interface version must be written into the version member, -+ * encoded using _MAKE_VERSION_ID(). It will be compared to the API version of -+ * the kernel-side interface. -+ * -+ * On successful return, the version member will be the API version of the -+ * kernel-side interface. _MALI_UK_API_VERSION macro defines the current version -+ * of the API. -+ * -+ * The compatible member must be checked to see if the version of the user-side -+ * interface is compatible with the kernel-side interface, since future versions -+ * of the interface may be backwards compatible. -+ */ -+typedef struct { -+ u32 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ _mali_uk_api_version version; /**< [in,out] API version of user-side interface. */ -+ int compatible; /**< [out] @c 1 when @version is compatible, @c 0 otherwise */ -+} _mali_uk_get_api_version_s; -+ -+/** @brief Arguments for _mali_uk_get_api_version_v2() -+ * -+ * The user-side interface version must be written into the version member, -+ * encoded using _MAKE_VERSION_ID(). It will be compared to the API version of -+ * the kernel-side interface. -+ * -+ * On successful return, the version member will be the API version of the -+ * kernel-side interface. _MALI_UK_API_VERSION macro defines the current version -+ * of the API. -+ * -+ * The compatible member must be checked to see if the version of the user-side -+ * interface is compatible with the kernel-side interface, since future versions -+ * of the interface may be backwards compatible. -+ */ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ _mali_uk_api_version version; /**< [in,out] API version of user-side interface. */ -+ int compatible; /**< [out] @c 1 when @version is compatible, @c 0 otherwise */ -+} _mali_uk_get_api_version_v2_s; -+ -+typedef struct -+{ -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ _mali_uk_api_version version; /**< [in,out] API version of user-side interface. */ -+} _mali_uk_get_mali_version_in_rk30_s; -+ -+/* rk_ext : rk_ko_ver_t. */ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ _mali_uk_api_version version; /**< [in,out] API version of user-side interface. */ -+} _mali_rk_ko_version_s; -+/** @} */ /* end group _mali_uk_getapiversion_s */ -+ -+/** @defgroup _mali_uk_get_user_settings_s Get user space settings */ -+ -+/** @brief struct to keep the matching values of the user space settings within certain context -+ * -+ * Each member of the settings array corresponds to a matching setting in the user space and its value is the value -+ * of that particular setting. -+ * -+ * All settings are given reference to the context pointed to by the ctx pointer. -+ * -+ */ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 settings[_MALI_UK_USER_SETTING_MAX]; /**< [out] The values for all settings */ -+} _mali_uk_get_user_settings_s; ++ if (unlikely(job->system->session->is_aborting)) { ++ /* The session is aborting. This job will be released and destroyed by @ref ++ * mali_soft_job_system_abort(). */ ++ mali_soft_job_system_unlock(job->system); + -+/** @brief struct to hold the value of a particular setting from the user space within a given context -+ */ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ _mali_uk_user_setting_t setting; /**< [in] setting to get */ -+ u32 value; /**< [out] value of setting */ -+} _mali_uk_get_user_setting_s; ++ return MALI_SCHEDULER_MASK_EMPTY; ++ } + -+/** @brief Arguments for _mali_ukk_request_high_priority() */ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+} _mali_uk_request_high_priority_s; ++ if (MALI_SOFT_JOB_STATE_STARTED != job->state) { ++ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_SIGNALED == job->state); + -+/** @brief Arguments for _mali_ukk_pending_submit() */ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+} _mali_uk_pending_submit_s; ++ /* The job is about to be signaled, ignore timeout. */ ++ MALI_DEBUG_PRINT(4, ("Mali Soft Job: Timeout on soft job %u (0x%08X) in signaled state.\n", job->id, job)); ++ mali_soft_job_system_unlock(job->system); ++ return schedule_mask; ++ } + -+/** @} */ /* end group _mali_uk_core */ ++ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state); + ++ job->state = MALI_SOFT_JOB_STATE_TIMED_OUT; ++ _mali_osk_atomic_inc(&job->refcount); + -+/** @defgroup _mali_uk_memory U/K Memory -+ * @{ */ ++ mali_soft_job_system_unlock(job->system); + -+#define _MALI_MEMORY_ALLOCATE_RESIZEABLE (1<<4) /* BUFFER can trim dow/grow*/ -+#define _MALI_MEMORY_ALLOCATE_NO_BIND_GPU (1<<5) /*Not map to GPU when allocate, must call bind later*/ -+#define _MALI_MEMORY_ALLOCATE_SWAPPABLE (1<<6) /* Allocate swappale memory. */ -+#define _MALI_MEMORY_ALLOCATE_DEFER_BIND (1<<7) /*Not map to GPU when allocate, must call bind later*/ -+#define _MALI_MEMORY_ALLOCATE_SECURE (1<<8) /* Allocate secure memory. */ ++ schedule_mask = mali_timeline_tracker_release(&job->tracker); + ++ mali_soft_job_destroy(job); + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 gpu_vaddr; /**< [in] GPU virtual address */ -+ u32 vsize; /**< [in] vitrual size of the allocation */ -+ u32 psize; /**< [in] physical size of the allocation */ -+ u32 flags; -+ u64 backend_handle; /**< [out] backend handle */ -+ s32 secure_shared_fd; /** < [in] the mem handle for secure mem */ -+} _mali_uk_alloc_mem_s; ++ return schedule_mask; ++} + ++void mali_soft_job_system_abort(struct mali_soft_job_system *system) ++{ ++ struct mali_soft_job *job, *tmp; ++ _MALI_OSK_LIST_HEAD_STATIC_INIT(jobs); + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 gpu_vaddr; /**< [in] use as handle to free allocation */ -+ u32 free_pages_nr; /** < [out] record the number of free pages */ -+} _mali_uk_free_mem_s; ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT_POINTER(system->session); ++ MALI_DEBUG_ASSERT(system->session->is_aborting); + ++ MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting soft job system for session 0x%08X.\n", system->session)); + -+#define _MALI_MEMORY_BIND_BACKEND_UMP (1<<8) -+#define _MALI_MEMORY_BIND_BACKEND_DMA_BUF (1<<9) -+#define _MALI_MEMORY_BIND_BACKEND_MALI_MEMORY (1<<10) -+#define _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY (1<<11) -+#define _MALI_MEMORY_BIND_BACKEND_EXT_COW (1<<12) -+#define _MALI_MEMORY_BIND_BACKEND_HAVE_ALLOCATION (1<<13) ++ mali_soft_job_system_lock(system); + ++ _MALI_OSK_LIST_FOREACHENTRY(job, tmp, &system->jobs_used, struct mali_soft_job, system_list) { ++ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_STARTED == job->state || ++ MALI_SOFT_JOB_STATE_TIMED_OUT == job->state); + -+#define _MALI_MEMORY_BIND_BACKEND_MASK (_MALI_MEMORY_BIND_BACKEND_UMP| \ -+ _MALI_MEMORY_BIND_BACKEND_DMA_BUF |\ -+ _MALI_MEMORY_BIND_BACKEND_MALI_MEMORY |\ -+ _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY |\ -+ _MALI_MEMORY_BIND_BACKEND_EXT_COW |\ -+ _MALI_MEMORY_BIND_BACKEND_HAVE_ALLOCATION) ++ if (MALI_SOFT_JOB_STATE_STARTED == job->state) { ++ /* If the job has been activated, we have to release the tracker and destroy ++ * the job. If not, the tracker will be released and the job destroyed when ++ * it is activated. */ ++ if (MALI_TRUE == job->activated) { ++ MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting unsignaled soft job %u (0x%08X).\n", job->id, job)); + ++ job->state = MALI_SOFT_JOB_STATE_SIGNALED; ++ _mali_osk_list_move(&job->system_list, &jobs); ++ } ++ } else if (MALI_SOFT_JOB_STATE_TIMED_OUT == job->state) { ++ MALI_DEBUG_PRINT(3, ("Mali Soft Job: Aborting timed out soft job %u (0x%08X).\n", job->id, job)); + -+#define _MALI_MEMORY_GPU_READ_ALLOCATE (1<<16) ++ /* We need to destroy this soft job. */ ++ _mali_osk_list_move(&job->system_list, &jobs); ++ } ++ } + ++ mali_soft_job_system_unlock(system); + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 vaddr; /**< [in] mali address to map the physical memory to */ -+ u32 size; /**< [in] size */ -+ u32 flags; /**< [in] see_MALI_MEMORY_BIND_BACKEND_* */ -+ u32 padding; /** padding for 32/64 struct alignment */ -+ union { -+ struct { -+ u32 secure_id; /**< [in] secure id */ -+ u32 rights; /**< [in] rights necessary for accessing memory */ -+ u32 flags; /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */ -+ } bind_ump; -+ struct { -+ u32 mem_fd; /**< [in] Memory descriptor */ -+ u32 rights; /**< [in] rights necessary for accessing memory */ -+ u32 flags; /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */ -+ } bind_dma_buf; -+ struct { -+ u32 phys_addr; /**< [in] physical address */ -+ u32 rights; /**< [in] rights necessary for accessing memory */ -+ u32 flags; /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */ -+ } bind_ext_memory; -+ } mem_union; -+} _mali_uk_bind_mem_s; ++ /* Release and destroy jobs. */ ++ _MALI_OSK_LIST_FOREACHENTRY(job, tmp, &jobs, struct mali_soft_job, system_list) { ++ MALI_DEBUG_ASSERT(MALI_SOFT_JOB_STATE_SIGNALED == job->state || ++ MALI_SOFT_JOB_STATE_TIMED_OUT == job->state); + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 flags; /**< [in] see_MALI_MEMORY_BIND_BACKEND_* */ -+ u32 vaddr; /**< [in] identifier for mapped memory object in kernel space */ -+} _mali_uk_unbind_mem_s; ++ if (MALI_SOFT_JOB_STATE_SIGNALED == job->state) { ++ mali_timeline_tracker_release(&job->tracker); ++ } + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 target_handle; /**< [in] handle of allocation need to do COW */ -+ u32 target_offset; /**< [in] offset in target allocation to do COW(for support COW a memory allocated from memory_bank, PAGE_SIZE align)*/ -+ u32 target_size; /**< [in] size of target allocation to do COW (for support memory bank, PAGE_SIZE align)(in byte) */ -+ u32 range_start; /**< [in] re allocate range start offset, offset from the start of allocation (PAGE_SIZE align)*/ -+ u32 range_size; /**< [in] re allocate size (PAGE_SIZE align)*/ -+ u32 vaddr; /**< [in] mali address for the new allocaiton */ -+ u32 backend_handle; /**< [out] backend handle */ -+ u32 flags; -+} _mali_uk_cow_mem_s; ++ /* Move job back to used list before destroying. */ ++ _mali_osk_list_move(&job->system_list, &system->jobs_used); + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 range_start; /**< [in] re allocate range start offset, offset from the start of allocation */ -+ u32 size; /**< [in] re allocate size*/ -+ u32 vaddr; /**< [in] mali address for the new allocaiton */ -+ s32 change_pages_nr; /**< [out] record the page number change for cow operation */ -+} _mali_uk_cow_modify_range_s; ++ mali_soft_job_destroy(job); ++ } ++} +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_soft_job.h b/drivers/gpu/arm/mali400/mali/common/mali_soft_job.h +new file mode 100644 +index 000000000..018ef4c52 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_soft_job.h +@@ -0,0 +1,190 @@ ++/* ++ * Copyright (C) 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + ++#ifndef __MALI_SOFT_JOB_H__ ++#define __MALI_SOFT_JOB_H__ + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 mem_fd; /**< [in] Memory descriptor */ -+ u32 size; /**< [out] size */ -+} _mali_uk_dma_buf_get_size_s; ++#include "mali_osk.h" + -+/** Flag for _mali_uk_map_external_mem_s, _mali_uk_attach_ump_mem_s and _mali_uk_attach_dma_buf_s */ -+#define _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE (1<<0) ++#include "mali_timeline.h" + ++struct mali_timeline_fence; ++struct mali_session_data; ++struct mali_soft_job; ++struct mali_soft_job_system; + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u64 vaddr; /* the buffer to do resize*/ -+ u32 psize; /* wanted physical size of this memory */ -+} _mali_uk_mem_resize_s; ++/** ++ * Soft job types. ++ * ++ * Soft jobs of type MALI_SOFT_JOB_TYPE_USER_SIGNALED will only complete after activation if either ++ * they are signaled by user-space (@ref mali_soft_job_system_signaled_job) or if they are timed out ++ * by the Timeline system. ++ * Soft jobs of type MALI_SOFT_JOB_TYPE_SELF_SIGNALED will release job resource automatically ++ * in kernel when the job is activated. ++ */ ++typedef enum mali_soft_job_type { ++ MALI_SOFT_JOB_TYPE_SELF_SIGNALED, ++ MALI_SOFT_JOB_TYPE_USER_SIGNALED, ++} mali_soft_job_type; + +/** -+ * @brief Arguments for _mali_uk[uk]_mem_write_safe() ++ * Soft job state. ++ * ++ * mali_soft_job_system_start_job a job will first be allocated.The job's state set to MALI_SOFT_JOB_STATE_ALLOCATED. ++ * Once the job is added to the timeline system, the state changes to MALI_SOFT_JOB_STATE_STARTED. ++ * ++ * For soft jobs of type MALI_SOFT_JOB_TYPE_USER_SIGNALED the state is changed to ++ * MALI_SOFT_JOB_STATE_SIGNALED when @ref mali_soft_job_system_signal_job is called and the soft ++ * job's state is MALI_SOFT_JOB_STATE_STARTED or MALI_SOFT_JOB_STATE_TIMED_OUT. ++ * ++ * If a soft job of type MALI_SOFT_JOB_TYPE_USER_SIGNALED is timed out before being signaled, the ++ * state is changed to MALI_SOFT_JOB_STATE_TIMED_OUT. This can only happen to soft jobs in state ++ * MALI_SOFT_JOB_STATE_STARTED. ++ * + */ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u64 src; /**< [in] Pointer to source data */ -+ u64 dest; /**< [in] Destination Mali buffer */ -+ u32 size; /**< [in,out] Number of bytes to write/copy on input, number of bytes actually written/copied on output */ -+} _mali_uk_mem_write_safe_s; ++typedef enum mali_soft_job_state { ++ MALI_SOFT_JOB_STATE_ALLOCATED, ++ MALI_SOFT_JOB_STATE_STARTED, ++ MALI_SOFT_JOB_STATE_SIGNALED, ++ MALI_SOFT_JOB_STATE_TIMED_OUT, ++} mali_soft_job_state; + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 size; /**< [out] size of MMU page table information (registers + page tables) */ -+} _mali_uk_query_mmu_page_table_dump_size_s; ++#define MALI_SOFT_JOB_INVALID_ID ((u32) -1) + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 size; /**< [in] size of buffer to receive mmu page table information */ -+ u64 buffer; /**< [in,out] buffer to receive mmu page table information */ -+ u32 register_writes_size; /**< [out] size of MMU register dump */ -+ u64 register_writes; /**< [out] pointer within buffer where MMU register dump is stored */ -+ u32 page_table_dump_size; /**< [out] size of MMU page table dump */ -+ u64 page_table_dump; /**< [out] pointer within buffer where MMU page table dump is stored */ -+} _mali_uk_dump_mmu_page_table_s; ++/** ++ * Soft job struct. ++ * ++ * Soft job can be used to represent any kind of CPU work done in kernel-space. ++ */ ++typedef struct mali_soft_job { ++ mali_soft_job_type type; /**< Soft job type. Must be one of MALI_SOFT_JOB_TYPE_*. */ ++ u64 user_job; /**< Identifier for soft job in user space. */ ++ _mali_osk_atomic_t refcount; /**< Soft jobs are reference counted to prevent premature deletion. */ ++ struct mali_timeline_tracker tracker; /**< Timeline tracker for soft job. */ ++ mali_bool activated; /**< MALI_TRUE if the job has been activated, MALI_FALSE if not. */ ++ _mali_osk_notification_t *activated_notification; /**< Pre-allocated notification object for ACTIVATED_NOTIFICATION. */ + -+/** @} */ /* end group _mali_uk_memory */ ++ /* Protected by soft job system lock. */ ++ u32 id; /**< Used by user-space to find corresponding soft job in kernel-space. */ ++ mali_soft_job_state state; /**< State of soft job, must be one of MALI_SOFT_JOB_STATE_*. */ ++ struct mali_soft_job_system *system; /**< The soft job system this job is in. */ ++ _mali_osk_list_t system_list; /**< List element used by soft job system. */ ++} mali_soft_job; + ++/** ++ * Per-session soft job system. ++ * ++ * The soft job system is used to manage all soft jobs that belongs to a session. ++ */ ++typedef struct mali_soft_job_system { ++ struct mali_session_data *session; /**< The session this soft job system belongs to. */ ++ _MALI_OSK_LIST_HEAD(jobs_used); /**< List of all allocated soft jobs. */ + -+/** @addtogroup _mali_uk_pp U/K Fragment Processor -+ * @{ */ ++ _mali_osk_spinlock_irq_t *lock; /**< Lock used to protect soft job system and its soft jobs. */ ++ u32 lock_owner; /**< Contains tid of thread that locked the system or 0, if not locked. */ ++ u32 last_job_id; /**< Recored the last job id protected by lock. */ ++} mali_soft_job_system; + -+/** @brief Arguments for _mali_ukk_get_pp_number_of_cores() ++/** ++ * Create a soft job system. + * -+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open() -+ * - Upon successful return from _mali_ukk_get_pp_number_of_cores(), @c number_of_cores -+ * will contain the number of Fragment Processor cores in the system. ++ * @param session The session this soft job system will belong to. ++ * @return The new soft job system, or NULL if unsuccessful. + */ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 number_of_total_cores; /**< [out] Total number of Fragment Processor cores in the system */ -+ u32 number_of_enabled_cores; /**< [out] Number of enabled Fragment Processor cores */ -+} _mali_uk_get_pp_number_of_cores_s; ++struct mali_soft_job_system *mali_soft_job_system_create(struct mali_session_data *session); + -+/** @brief Arguments for _mali_ukk_get_pp_core_version() ++/** ++ * Destroy a soft job system. + * -+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open() -+ * - Upon successful return from _mali_ukk_get_pp_core_version(), @c version contains -+ * the version that all Fragment Processor cores are compatible with. ++ * @note The soft job must not have any started or activated jobs. Call @ref ++ * mali_soft_job_system_abort first. ++ * ++ * @param system The soft job system we are destroying. + */ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ _mali_core_version version; /**< [out] version returned from core, see \ref _mali_core_version */ -+ u32 padding; -+} _mali_uk_get_pp_core_version_s; -+ -+/** @} */ /* end group _mali_uk_pp */ -+ -+ -+/** @addtogroup _mali_uk_gp U/K Vertex Processor -+ * @{ */ ++void mali_soft_job_system_destroy(struct mali_soft_job_system *system); + -+/** @brief Arguments for _mali_ukk_get_gp_number_of_cores() ++/** ++ * Create a soft job. + * -+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open() -+ * - Upon successful return from _mali_ukk_get_gp_number_of_cores(), @c number_of_cores -+ * will contain the number of Vertex Processor cores in the system. ++ * @param system Soft job system to create soft job from. ++ * @param type Type of the soft job. ++ * @param user_job Identifier for soft job in user space. ++ * @return New soft job if successful, NULL if not. + */ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 number_of_cores; /**< [out] number of Vertex Processor cores in the system */ -+} _mali_uk_get_gp_number_of_cores_s; ++struct mali_soft_job *mali_soft_job_create(struct mali_soft_job_system *system, mali_soft_job_type type, u64 user_job); + -+/** @brief Arguments for _mali_ukk_get_gp_core_version() ++/** ++ * Destroy soft job. + * -+ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open() -+ * - Upon successful return from _mali_ukk_get_gp_core_version(), @c version contains -+ * the version that all Vertex Processor cores are compatible with. ++ * @param job Soft job to destroy. + */ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ _mali_core_version version; /**< [out] version returned from core, see \ref _mali_core_version */ -+} _mali_uk_get_gp_core_version_s; -+ -+/** @} */ /* end group _mali_uk_gp */ -+ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 event_id; /**< [in] event id to register (see enum mali_profiling_events for values) */ -+ u32 data[5]; /**< [in] event specific data */ -+} _mali_uk_profiling_add_event_s; -+ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 memory_usage; /**< [out] total memory usage */ -+ u32 vaddr; /**< [in] mali address for the cow allocaiton */ -+ s32 change_pages_nr; /**< [out] record the page number change for cow operation */ -+} _mali_uk_profiling_memory_usage_get_s; -+ -+ -+/** @addtogroup _mali_uk_memory U/K Memory -+ * @{ */ ++void mali_soft_job_destroy(struct mali_soft_job *job); + -+/** @brief Arguments to _mali_ukk_mem_mmap() -+ * -+ * Use of the phys_addr member depends on whether the driver is compiled for -+ * Mali-MMU or nonMMU: -+ * - in the nonMMU case, this is the physical address of the memory as seen by -+ * the CPU (which may be a constant offset from that used by Mali) -+ * - in the MMU case, this is the Mali Virtual base address of the memory to -+ * allocate, and the particular physical pages used to back the memory are -+ * entirely determined by _mali_ukk_mem_mmap(). The details of the physical pages -+ * are not reported to user-space for security reasons. ++/** ++ * Start a soft job. + * -+ * The cookie member must be stored for use later when freeing the memory by -+ * calling _mali_ukk_mem_munmap(). In the Mali-MMU case, the cookie is secure. ++ * The soft job will be added to the Timeline system which will then activate it after all ++ * dependencies have been resolved. + * -+ * The ukk_private word must be set to zero when calling from user-space. On -+ * Kernel-side, the OS implementation of the U/K interface can use it to -+ * communicate data to the OS implementation of the OSK layer. In particular, -+ * _mali_ukk_get_big_block() directly calls _mali_ukk_mem_mmap directly, and -+ * will communicate its own ukk_private word through the ukk_private member -+ * here. The common code itself will not inspect or modify the ukk_private -+ * word, and so it may be safely used for whatever purposes necessary to -+ * integrate Mali Memory handling into the OS. ++ * Create soft jobs with @ref mali_soft_job_create before starting them. + * -+ * The uku_private member is currently reserved for use by the user-side -+ * implementation of the U/K interface. Its value must be zero. ++ * @param job Soft job to start. ++ * @param fence Fence representing dependencies for this soft job. ++ * @return Point on soft job timeline. + */ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ void *mapping; /**< [out] Returns user-space virtual address for the mapping */ -+ u32 size; /**< [in] Size of the requested mapping */ -+ u32 phys_addr; /**< [in] Physical address - could be offset, depending on caller+callee convention */ -+ mali_bool writeable; -+} _mali_uk_mem_mmap_s; ++mali_timeline_point mali_soft_job_start(struct mali_soft_job *job, struct mali_timeline_fence *fence); + -+/** @brief Arguments to _mali_ukk_mem_munmap() ++/** ++ * Use by user-space to signal that a soft job has completed. + * -+ * The cookie and mapping members must be that returned from the same previous -+ * call to _mali_ukk_mem_mmap(). The size member must correspond to cookie -+ * and mapping - that is, it must be the value originally supplied to a call to -+ * _mali_ukk_mem_mmap that returned the values of mapping and cookie. ++ * @note Only valid for soft jobs with type MALI_SOFT_JOB_TYPE_USER_SIGNALED. + * -+ * An error will be returned if an attempt is made to unmap only part of the -+ * originally obtained range, or to unmap more than was originally obtained. ++ * @note The soft job must be in state MALI_SOFT_JOB_STATE_STARTED for the signal to be successful. ++ * ++ * @note If the soft job was signaled successfully, or it received a time out, the soft job will be ++ * destroyed after this call and should no longer be used. ++ * ++ * @note This function will block until the soft job has been activated. ++ * ++ * @param system The soft job system the job was started in. ++ * @param job_id ID of soft job we are signaling. ++ * ++ * @return _MALI_OSK_ERR_ITEM_NOT_FOUND if the soft job ID was invalid, _MALI_OSK_ERR_TIMEOUT if the ++ * soft job was timed out or _MALI_OSK_ERR_OK if we successfully signaled the soft job. + */ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ void *mapping; /**< [in] The mapping returned from mmap call */ -+ u32 size; /**< [in] The size passed to mmap call */ -+} _mali_uk_mem_munmap_s; -+/** @} */ /* end group _mali_uk_memory */ ++_mali_osk_errcode_t mali_soft_job_system_signal_job(struct mali_soft_job_system *system, u32 job_id); + -+/** @defgroup _mali_uk_vsync U/K VSYNC Wait Reporting Module -+ * @{ */ ++/** ++ * Used by the Timeline system to activate a soft job. ++ * ++ * @param job The soft job that is being activated. ++ * @return A scheduling bitmask. ++ */ ++mali_scheduler_mask mali_soft_job_system_activate_job(struct mali_soft_job *job); + -+/** @brief VSYNC events ++/** ++ * Used by the Timeline system to timeout a soft job. + * -+ * These events are reported when DDK starts to wait for vsync and when the -+ * vsync has occured and the DDK can continue on the next frame. ++ * A soft job is timed out if it completes or is signaled later than MALI_TIMELINE_TIMEOUT_HZ after ++ * activation. ++ * ++ * @param job The soft job that is being timed out. ++ * @return A scheduling bitmask. + */ -+typedef enum _mali_uk_vsync_event { -+ _MALI_UK_VSYNC_EVENT_BEGIN_WAIT = 0, -+ _MALI_UK_VSYNC_EVENT_END_WAIT -+} _mali_uk_vsync_event; ++mali_scheduler_mask mali_soft_job_system_timeout_job(struct mali_soft_job *job); + -+/** @brief Arguments to _mali_ukk_vsync_event() ++/** ++ * Used to cleanup activated soft jobs in the soft job system on session abort. + * ++ * @param system The soft job system that is being aborted. + */ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ _mali_uk_vsync_event event; /**< [in] VSYNCH event type */ -+} _mali_uk_vsync_event_report_s; ++void mali_soft_job_system_abort(struct mali_soft_job_system *system); + -+/** @} */ /* end group _mali_uk_vsync */ ++#endif /* __MALI_SOFT_JOB_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_spinlock_reentrant.c b/drivers/gpu/arm/mali400/mali/common/mali_spinlock_reentrant.c +new file mode 100644 +index 000000000..f829e99f0 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_spinlock_reentrant.c +@@ -0,0 +1,77 @@ ++/* ++ * Copyright (C) 2013, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+/** @defgroup _mali_uk_sw_counters_report U/K Software Counter Reporting -+ * @{ */ ++#include "mali_spinlock_reentrant.h" + -+/** @brief Software counter values -+ * -+ * Values recorded for each of the software counters during a single renderpass. -+ */ -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u64 counters; /**< [in] The array of u32 counter values */ -+ u32 num_counters; /**< [in] The number of elements in counters array */ -+} _mali_uk_sw_counters_report_s; ++#include "mali_osk.h" ++#include "mali_kernel_common.h" + -+/** @} */ /* end group _mali_uk_sw_counters_report */ ++struct mali_spinlock_reentrant *mali_spinlock_reentrant_init(_mali_osk_lock_order_t lock_order) ++{ ++ struct mali_spinlock_reentrant *spinlock; + -+/** @defgroup _mali_uk_timeline U/K Mali Timeline -+ * @{ */ ++ spinlock = _mali_osk_calloc(1, sizeof(struct mali_spinlock_reentrant)); ++ if (NULL == spinlock) { ++ return NULL; ++ } + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 timeline; /**< [in] timeline id */ -+ u32 point; /**< [out] latest point on timeline */ -+} _mali_uk_timeline_get_latest_point_s; ++ spinlock->lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, lock_order); ++ if (NULL == spinlock->lock) { ++ mali_spinlock_reentrant_term(spinlock); ++ return NULL; ++ } + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ _mali_uk_fence_t fence; /**< [in] fence */ -+ u32 timeout; /**< [in] timeout (0 for no wait, -1 for blocking) */ -+ u32 status; /**< [out] status of fence (1 if signaled, 0 if timeout) */ -+} _mali_uk_timeline_wait_s; ++ return spinlock; ++} + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ _mali_uk_fence_t fence; /**< [in] mali fence to create linux sync fence from */ -+ s32 sync_fd; /**< [out] file descriptor for new linux sync fence */ -+} _mali_uk_timeline_create_sync_fence_s; ++void mali_spinlock_reentrant_term(struct mali_spinlock_reentrant *spinlock) ++{ ++ MALI_DEBUG_ASSERT_POINTER(spinlock); ++ MALI_DEBUG_ASSERT(0 == spinlock->counter && 0 == spinlock->owner); + -+/** @} */ /* end group _mali_uk_timeline */ ++ if (NULL != spinlock->lock) { ++ _mali_osk_spinlock_irq_term(spinlock->lock); ++ } + -+/** @} */ /* end group u_k_api */ ++ _mali_osk_free(spinlock); ++} + -+/** @} */ /* end group uddapi */ ++void mali_spinlock_reentrant_wait(struct mali_spinlock_reentrant *spinlock, u32 tid) ++{ ++ MALI_DEBUG_ASSERT_POINTER(spinlock); ++ MALI_DEBUG_ASSERT_POINTER(spinlock->lock); ++ MALI_DEBUG_ASSERT(0 != tid); + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ s32 stream_fd; /**< [in] The profiling kernel base stream fd handle */ -+} _mali_uk_profiling_stream_fd_get_s; ++ MALI_DEBUG_PRINT(5, ("%s ^\n", __FUNCTION__)); + -+typedef struct { -+ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u64 control_packet_data; /**< [in] the control packet data for control settings */ -+ u32 control_packet_size; /**< [in] The control packet size */ -+ u64 response_packet_data; /** < [out] The response packet data */ -+ u32 response_packet_size; /** < [in,out] The response packet data */ -+} _mali_uk_profiling_control_set_s; ++ if (tid != spinlock->owner) { ++ _mali_osk_spinlock_irq_lock(spinlock->lock); ++ MALI_DEBUG_ASSERT(0 == spinlock->owner && 0 == spinlock->counter); ++ spinlock->owner = tid; ++ } + -+#ifdef __cplusplus ++ MALI_DEBUG_PRINT(5, ("%s v\n", __FUNCTION__)); ++ ++ ++spinlock->counter; +} -+#endif + -+#endif /* __MALI_UTGARD_UK_TYPES_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/license/gpl/mali_kernel_license.h b/drivers/gpu/arm/mali400/mali/linux/license/gpl/mali_kernel_license.h ++void mali_spinlock_reentrant_signal(struct mali_spinlock_reentrant *spinlock, u32 tid) ++{ ++ MALI_DEBUG_ASSERT_POINTER(spinlock); ++ MALI_DEBUG_ASSERT_POINTER(spinlock->lock); ++ MALI_DEBUG_ASSERT(0 != tid && tid == spinlock->owner); ++ ++ --spinlock->counter; ++ if (0 == spinlock->counter) { ++ spinlock->owner = 0; ++ MALI_DEBUG_PRINT(5, ("%s release last\n", __FUNCTION__)); ++ _mali_osk_spinlock_irq_unlock(spinlock->lock); ++ } ++} +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_spinlock_reentrant.h b/drivers/gpu/arm/mali400/mali/common/mali_spinlock_reentrant.h new file mode 100644 -index 000000000..6fafc6777 +index 000000000..4d788ec1b --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/license/gpl/mali_kernel_license.h -@@ -0,0 +1,30 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_spinlock_reentrant.h +@@ -0,0 +1,70 @@ +/* -+ * Copyright (C) 2010, 2013, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2013, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -306207,34 +309157,74 @@ index 000000000..6fafc6777 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + ++#ifndef __MALI_SPINLOCK_REENTRANT_H__ ++#define __MALI_SPINLOCK_REENTRANT_H__ ++ ++#include "mali_osk.h" ++#include "mali_kernel_common.h" ++ +/** -+ * @file mali_kernel_license.h -+ * Defines for the macro MODULE_LICENSE. ++ * Reentrant spinlock. + */ ++struct mali_spinlock_reentrant { ++ _mali_osk_spinlock_irq_t *lock; ++ u32 owner; ++ u32 counter; ++}; + -+#ifndef __MALI_KERNEL_LICENSE_H__ -+#define __MALI_KERNEL_LICENSE_H__ ++/** ++ * Create a new reentrant spinlock. ++ * ++ * @param lock_order Lock order. ++ * @return New reentrant spinlock. ++ */ ++struct mali_spinlock_reentrant *mali_spinlock_reentrant_init(_mali_osk_lock_order_t lock_order); + -+#ifdef __cplusplus -+extern "C" { -+#endif ++/** ++ * Terminate reentrant spinlock and free any associated resources. ++ * ++ * @param spinlock Reentrant spinlock to terminate. ++ */ ++void mali_spinlock_reentrant_term(struct mali_spinlock_reentrant *spinlock); + -+#define MALI_KERNEL_LINUX_LICENSE "GPL" -+#define MALI_LICENSE_IS_GPL 1 ++/** ++ * Wait for reentrant spinlock to be signaled. ++ * ++ * @param spinlock Reentrant spinlock. ++ * @param tid Thread ID. ++ */ ++void mali_spinlock_reentrant_wait(struct mali_spinlock_reentrant *spinlock, u32 tid); + -+#ifdef __cplusplus ++/** ++ * Signal reentrant spinlock. ++ * ++ * @param spinlock Reentrant spinlock. ++ * @param tid Thread ID. ++ */ ++void mali_spinlock_reentrant_signal(struct mali_spinlock_reentrant *spinlock, u32 tid); ++ ++/** ++ * Check if thread is holding reentrant spinlock. ++ * ++ * @param spinlock Reentrant spinlock. ++ * @param tid Thread ID. ++ * @return MALI_TRUE if thread is holding spinlock, MALI_FALSE if not. ++ */ ++MALI_STATIC_INLINE mali_bool mali_spinlock_reentrant_is_held(struct mali_spinlock_reentrant *spinlock, u32 tid) ++{ ++ MALI_DEBUG_ASSERT_POINTER(spinlock->lock); ++ return (tid == spinlock->owner && 0 < spinlock->counter); +} -+#endif + -+#endif /* __MALI_KERNEL_LICENSE_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_devfreq.c b/drivers/gpu/arm/mali400/mali/linux/mali_devfreq.c ++#endif /* __MALI_SPINLOCK_REENTRANT_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_timeline.c b/drivers/gpu/arm/mali400/mali/common/mali_timeline.c new file mode 100644 -index 000000000..7abd1532a +index 000000000..ffffee930 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_devfreq.c -@@ -0,0 +1,368 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_timeline.c +@@ -0,0 +1,1964 @@ +/* -+ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2013-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -306242,1802 +309232,1969 @@ index 000000000..7abd1532a + * A copy of the licence is included with the program, and can also be obtained from Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ -+ -+#include "mali_osk_mali.h" ++#include ++#include "mali_timeline.h" +#include "mali_kernel_common.h" ++#include "mali_scheduler.h" ++#include "mali_soft_job.h" ++#include "mali_timeline_fence_wait.h" ++#include "mali_timeline_sync_fence.h" ++#include "mali_executor.h" ++#include "mali_pp_job.h" + -+#include -+#include -+#include -+#include -+#include -+#ifdef CONFIG_DEVFREQ_THERMAL -+#include ++#define MALI_TIMELINE_SYSTEM_LOCKED(system) (mali_spinlock_reentrant_is_held((system)->spinlock, _mali_osk_get_tid())) ++ ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++_mali_osk_wq_work_t *sync_fence_callback_work_t = NULL; ++_mali_osk_spinlock_irq_t *sync_fence_callback_list_lock = NULL; ++static _MALI_OSK_LIST_HEAD_STATIC_INIT(sync_fence_callback_queue); +#endif + -+#include -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) -+#include -+#else /* Linux >= 3.13 */ -+/* In 3.13 the OPP include header file, types, and functions were all -+ * renamed. Use the old filename for the include, and define the new names to -+ * the old, when an old kernel is detected. ++/* ++ * Following three elements are used to record how many ++ * gp, physical pp or virtual pp jobs are delayed in the whole ++ * timeline system, we can use these three value to decide ++ * if need to deactivate idle group. + */ -+#include -+#define dev_pm_opp opp -+#define dev_pm_opp_get_voltage opp_get_voltage -+#define dev_pm_opp_get_opp_count opp_get_opp_count -+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil -+#endif /* Linux >= 3.13 */ ++_mali_osk_atomic_t gp_tracker_count; ++_mali_osk_atomic_t phy_pp_tracker_count; ++_mali_osk_atomic_t virt_pp_tracker_count; + -+#include "mali_pm_metrics.h" ++static mali_scheduler_mask mali_timeline_system_release_waiter(struct mali_timeline_system *system, ++ struct mali_timeline_waiter *waiter); + -+#include -+#include ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++#include ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) ++#include ++#include ++#include + -+static struct monitor_dev_profile mali_mdevp = { -+ .type = MONITOR_TYPE_DEV, -+ .low_temp_adjust = rockchip_monitor_dev_low_temp_adjust, -+ .high_temp_adjust = rockchip_monitor_dev_high_temp_adjust, ++struct mali_deferred_fence_put_entry { ++ struct hlist_node list; ++ struct sync_fence *fence; +}; + -+static struct devfreq_simple_ondemand_data ondemand_data; ++static HLIST_HEAD(mali_timeline_sync_fence_to_free_list); ++static DEFINE_SPINLOCK(mali_timeline_sync_fence_to_free_lock); + -+static int -+mali_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) ++static void put_sync_fences(struct work_struct *ignore) +{ -+ struct mali_device *mdev = dev_get_drvdata(dev); -+ struct dev_pm_opp *opp; -+ unsigned long freq = 0; -+ unsigned long old_freq = mdev->current_freq; -+ unsigned long voltage; -+ int err; -+ -+ freq = *target_freq; ++ struct hlist_head list; ++ struct hlist_node *tmp, *pos; ++ unsigned long flags; ++ struct mali_deferred_fence_put_entry *o; + -+ opp = devfreq_recommended_opp(dev, &freq, flags); -+ if (IS_ERR(opp)) { -+ MALI_PRINT_ERROR(("Failed to get opp (%ld)\n", PTR_ERR(opp))); -+ return PTR_ERR(opp); -+ } -+ voltage = dev_pm_opp_get_voltage(opp); -+ dev_pm_opp_put(opp); ++ spin_lock_irqsave(&mali_timeline_sync_fence_to_free_lock, flags); ++ hlist_move_list(&mali_timeline_sync_fence_to_free_list, &list); ++ spin_unlock_irqrestore(&mali_timeline_sync_fence_to_free_lock, flags); + -+ MALI_DEBUG_PRINT(2, ("mali_devfreq_target:set_freq = %lld flags = 0x%x\n", freq, flags)); -+ /* -+ * Only update if there is a change of frequency -+ */ -+ if (old_freq == freq) { -+ *target_freq = freq; -+ mali_pm_reset_dvfs_utilisation(mdev); -+#ifdef CONFIG_REGULATOR -+ if (mdev->current_voltage == voltage) -+ return 0; -+ err = regulator_set_voltage(mdev->regulator, voltage, INT_MAX); -+ if (err) { -+ dev_err(dev, "Failed to set voltage (%d)\n", err); -+ return err; -+ } -+ mdev->current_voltage = voltage; -+#endif -+ return 0; ++ hlist_for_each_entry_safe(o, pos, tmp, &list, list) { ++ sync_fence_put(o->fence); ++ kfree(o); + } ++} + -+ err = clk_bulk_enable(mdev->num_clks, mdev->clks); -+ if (err) -+ return err; ++static DECLARE_DELAYED_WORK(delayed_sync_fence_put, put_sync_fences); ++#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) */ + -+#ifdef CONFIG_REGULATOR -+ if (mdev->regulator && mdev->current_voltage != voltage && -+ old_freq < freq) { -+ err = regulator_set_voltage(mdev->regulator, voltage, INT_MAX); -+ if (err) { -+ MALI_PRINT_ERROR(("Failed to increase voltage (%d)\n", err)); -+ goto err; -+ } -+ } ++/* Callback that is called when a sync fence a tracker is waiting on is signaled. */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++static void mali_timeline_sync_fence_callback(struct sync_fence *sync_fence, struct sync_fence_waiter *sync_fence_waiter) ++#else ++static void mali_timeline_sync_fence_callback(struct mali_internal_sync_fence *sync_fence, struct mali_internal_sync_fence_waiter *sync_fence_waiter) +#endif ++{ ++ struct mali_timeline_tracker *tracker; + -+ err = clk_set_rate(mdev->clock, freq); -+ if (err) { -+ MALI_PRINT_ERROR(("Failed to set clock %lu (target %lu)\n", freq, *target_freq)); -+ goto err; -+ } -+ -+ *target_freq = freq; -+ mdev->current_freq = freq; -+ if (mdev->devfreq) -+ mdev->devfreq->last_status.current_frequency = freq; -+ -+#ifdef CONFIG_REGULATOR -+ if (mdev->regulator && mdev->current_voltage != voltage && -+ old_freq > freq) { -+ err = regulator_set_voltage(mdev->regulator, voltage, INT_MAX); -+ if (err) { -+ MALI_PRINT_ERROR(("Failed to decrease voltage (%d)\n", err)); -+ goto err; -+ } -+ } -+#endif ++ MALI_IGNORE(sync_fence); ++ MALI_DEBUG_ASSERT_POINTER(sync_fence_waiter); + -+ mdev->current_voltage = voltage; ++ tracker = _MALI_OSK_CONTAINER_OF(sync_fence_waiter, struct mali_timeline_tracker, sync_fence_waiter); ++ MALI_DEBUG_ASSERT_POINTER(tracker); + -+ mali_pm_reset_dvfs_utilisation(mdev); -+err: -+ clk_bulk_disable(mdev->num_clks, mdev->clks); ++ _mali_osk_spinlock_irq_lock(sync_fence_callback_list_lock); ++ _mali_osk_list_addtail(&tracker->sync_fence_signal_list, &sync_fence_callback_queue); ++ _mali_osk_spinlock_irq_unlock(sync_fence_callback_list_lock); + -+ return err; ++ _mali_osk_wq_schedule_work(sync_fence_callback_work_t); +} ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ + -+static int -+mali_devfreq_cur_freq(struct device *dev, unsigned long *freq) ++static mali_scheduler_mask mali_timeline_tracker_time_out(struct mali_timeline_tracker *tracker) +{ -+ struct mali_device *mdev = dev_get_drvdata(dev); -+ -+ *freq = mdev->current_freq; ++ MALI_DEBUG_ASSERT_POINTER(tracker); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_SOFT == tracker->type); + -+ MALI_DEBUG_PRINT(2, ("mali_devfreq_cur_freq: freq = %d \n", *freq)); -+ return 0; ++ return mali_soft_job_system_timeout_job((struct mali_soft_job *) tracker->job); +} + -+static int -+mali_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) ++static void mali_timeline_timer_callback(void *data) +{ -+ struct mali_device *mdev = dev_get_drvdata(dev); ++ struct mali_timeline_system *system; ++ struct mali_timeline_tracker *tracker; ++ struct mali_timeline *timeline; ++ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; ++ u32 tid = _mali_osk_get_tid(); + -+ stat->current_frequency = mdev->current_freq; ++ timeline = (struct mali_timeline *) data; ++ MALI_DEBUG_ASSERT_POINTER(timeline); + -+ mali_pm_get_dvfs_utilisation(mdev, -+ &stat->total_time, &stat->busy_time); ++ system = timeline->system; ++ MALI_DEBUG_ASSERT_POINTER(system); + -+ stat->private_data = NULL; ++ mali_spinlock_reentrant_wait(system->spinlock, tid); + -+#ifdef CONFIG_DEVFREQ_THERMAL -+ memcpy(&mdev->devfreq->last_status, stat, sizeof(*stat)); -+#endif ++ if (!system->timer_enabled) { ++ mali_spinlock_reentrant_signal(system->spinlock, tid); ++ return; ++ } + -+ return 0; -+} ++ tracker = timeline->tracker_tail; ++ timeline->timer_active = MALI_FALSE; + -+/* setup platform specific opp in platform.c*/ -+int __weak setup_opps(void) -+{ -+ return 0; -+} ++ if (NULL != tracker && MALI_TRUE == tracker->timer_active) { ++ /* This is likely the delayed work that has been schedule out before cancelled. */ ++ if (MALI_TIMELINE_TIMEOUT_HZ > (_mali_osk_time_tickcount() - tracker->os_tick_activate)) { ++ mali_spinlock_reentrant_signal(system->spinlock, tid); ++ return; ++ } + -+/* term platform specific opp in platform.c*/ -+int __weak term_opps(struct device *dev) -+{ -+ return 0; ++ schedule_mask = mali_timeline_tracker_time_out(tracker); ++ tracker->timer_active = MALI_FALSE; ++ } else { ++ MALI_PRINT_ERROR(("Mali Timeline: Soft job timer callback without a waiting tracker.\n")); ++ } ++ ++ mali_spinlock_reentrant_signal(system->spinlock, tid); ++ ++ mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE); +} + -+static int mali_devfreq_init_freq_table(struct mali_device *mdev, -+ struct devfreq_dev_profile *dp) ++void mali_timeline_system_stop_timer(struct mali_timeline_system *system) +{ -+ int err, count; -+ int i = 0; -+ unsigned long freq = 0; -+ struct dev_pm_opp *opp; -+ -+ err = setup_opps(); -+ if (err) -+ return err; ++ u32 i; ++ u32 tid = _mali_osk_get_tid(); + -+ count = dev_pm_opp_get_opp_count(mdev->dev); -+ if (count < 0) { -+ return count; -+ } ++ MALI_DEBUG_ASSERT_POINTER(system); + -+ MALI_DEBUG_PRINT(2, ("mali devfreq table count %d\n", count)); ++ mali_spinlock_reentrant_wait(system->spinlock, tid); ++ system->timer_enabled = MALI_FALSE; ++ mali_spinlock_reentrant_signal(system->spinlock, tid); + -+ dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]), -+ GFP_KERNEL); -+ if (!dp->freq_table) -+ return -ENOMEM; ++ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { ++ struct mali_timeline *timeline = system->timelines[i]; + -+ for (i = 0; i < count; i++, freq++) { -+ opp = dev_pm_opp_find_freq_ceil(mdev->dev, &freq); -+ if (IS_ERR(opp)) -+ break; -+ dev_pm_opp_put(opp); ++ MALI_DEBUG_ASSERT_POINTER(timeline); + -+ dp->freq_table[i] = freq; -+ MALI_DEBUG_PRINT(2, ("mali devfreq table array[%d] = %d\n", i, freq)); ++ if (NULL != timeline->delayed_work) { ++ _mali_osk_wq_delayed_cancel_work_sync(timeline->delayed_work); ++ timeline->timer_active = MALI_FALSE; ++ } + } -+ -+ if (count != i) -+ MALI_PRINT_ERROR(("Unable to enumerate all OPPs (%d!=%d)\n", -+ count, i)); -+ -+ dp->max_state = i; -+ -+ return 0; +} + -+static void mali_devfreq_term_freq_table(struct mali_device *mdev) ++static void mali_timeline_destroy(struct mali_timeline *timeline) +{ -+ struct devfreq_dev_profile *dp = mdev->devfreq->profile; ++ MALI_DEBUG_ASSERT_POINTER(timeline); ++ if (NULL != timeline) { ++ /* Assert that the timeline object has been properly cleaned up before destroying it. */ ++ MALI_DEBUG_ASSERT(timeline->point_oldest == timeline->point_next); ++ MALI_DEBUG_ASSERT(NULL == timeline->tracker_head); ++ MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail); ++ MALI_DEBUG_ASSERT(NULL == timeline->waiter_head); ++ MALI_DEBUG_ASSERT(NULL == timeline->waiter_tail); ++ MALI_DEBUG_ASSERT(NULL != timeline->system); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_MAX > timeline->id); + -+ kfree(dp->freq_table); -+ term_opps(mdev->dev); ++ if (NULL != timeline->delayed_work) { ++ _mali_osk_wq_delayed_cancel_work_sync(timeline->delayed_work); ++ _mali_osk_wq_delayed_delete_work_nonflush(timeline->delayed_work); ++ } ++ ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ if (NULL != timeline->sync_tl) { ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ sync_timeline_destroy(timeline->sync_tl); ++#else ++ mali_internal_sync_timeline_destroy(timeline->sync_tl); ++#endif ++ } ++#else ++ _mali_osk_free(timeline); ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++ } +} + -+static void mali_devfreq_exit(struct device *dev) ++static struct mali_timeline *mali_timeline_create(struct mali_timeline_system *system, enum mali_timeline_id id) +{ -+ struct mali_device *mdev = dev_get_drvdata(dev); ++ struct mali_timeline *timeline; + -+ mali_devfreq_term_freq_table(mdev); -+} ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT(id < MALI_TIMELINE_MAX); + -+int mali_devfreq_init(struct mali_device *mdev) -+{ -+ struct device_node *np = mdev->dev->of_node; -+#ifdef CONFIG_DEVFREQ_THERMAL -+ struct devfreq_cooling_power *callbacks = NULL; -+ _mali_osk_device_data data; ++ timeline = (struct mali_timeline *) _mali_osk_calloc(1, sizeof(struct mali_timeline)); ++ if (NULL == timeline) { ++ return NULL; ++ } ++ ++ /* Initially the timeline is empty. */ ++#if defined(MALI_TIMELINE_DEBUG_START_POINT) ++ /* Start the timeline a bit before wrapping when debugging. */ ++ timeline->point_next = UINT_MAX - MALI_TIMELINE_MAX_POINT_SPAN - 128; ++#else ++ timeline->point_next = 1; +#endif -+ struct devfreq_dev_profile *dp; -+ struct dev_pm_opp *opp; -+ unsigned long opp_rate; -+ unsigned int dyn_power_coeff = 0; -+ int err; ++ timeline->point_oldest = timeline->point_next; + -+ MALI_DEBUG_PRINT(2, ("Init Mali devfreq\n")); ++ /* The tracker and waiter lists will initially be empty. */ + -+ if (!mdev->clock) -+ return -ENODEV; ++ timeline->system = system; ++ timeline->id = id; + -+ mdev->current_freq = clk_get_rate(mdev->clock); ++ timeline->delayed_work = _mali_osk_wq_delayed_create_work(mali_timeline_timer_callback, timeline); ++ if (NULL == timeline->delayed_work) { ++ mali_timeline_destroy(timeline); ++ return NULL; ++ } + -+ dp = &mdev->devfreq_profile; ++ timeline->timer_active = MALI_FALSE; + -+ dp->initial_freq = mdev->current_freq; -+ dp->polling_ms = 100; -+ dp->target = mali_devfreq_target; -+ dp->get_dev_status = mali_devfreq_status; -+ dp->get_cur_freq = mali_devfreq_cur_freq; -+ dp->exit = mali_devfreq_exit; ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ { ++ char timeline_name[32]; + -+ if (mali_devfreq_init_freq_table(mdev, dp)) -+ return -EFAULT; ++ switch (id) { ++ case MALI_TIMELINE_GP: ++ _mali_osk_snprintf(timeline_name, 32, "mali-%u-gp", _mali_osk_get_pid()); ++ break; ++ case MALI_TIMELINE_PP: ++ _mali_osk_snprintf(timeline_name, 32, "mali-%u-pp", _mali_osk_get_pid()); ++ break; ++ case MALI_TIMELINE_SOFT: ++ _mali_osk_snprintf(timeline_name, 32, "mali-%u-soft", _mali_osk_get_pid()); ++ break; ++ default: ++ MALI_PRINT_ERROR(("Mali Timeline: Invalid timeline id %d\n", id)); ++ mali_timeline_destroy(timeline); ++ return NULL; ++ } + -+ of_property_read_u32(np, "upthreshold", -+ &ondemand_data.upthreshold); -+ of_property_read_u32(np, "downdifferential", -+ &ondemand_data.downdifferential); -+ of_property_read_u32(np, "dynamic-power-coefficient", -+ &dyn_power_coeff); -+ if (dyn_power_coeff) -+ dp->is_cooling_device = true; ++ timeline->destroyed = MALI_FALSE; + -+ mdev->devfreq = devfreq_add_device(mdev->dev, dp, -+ "simple_ondemand", &ondemand_data); -+ if (IS_ERR(mdev->devfreq)) { -+ mali_devfreq_term_freq_table(mdev); -+ return PTR_ERR(mdev->devfreq); -+ } ++ timeline->sync_tl = mali_sync_timeline_create(timeline, timeline_name); ++ if (NULL == timeline->sync_tl) { ++ mali_timeline_destroy(timeline); ++ return NULL; ++ } + -+ err = devfreq_register_opp_notifier(mdev->dev, mdev->devfreq); -+ if (err) { -+ MALI_PRINT_ERROR(("Failed to register OPP notifier (%d)\n", err)); -+ goto opp_notifier_failed; ++ timeline->spinlock = mali_spinlock_reentrant_init(_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM); ++ if (NULL == timeline->spinlock) { ++ mali_timeline_destroy(timeline); ++ return NULL; ++ } + } ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ + -+ opp_rate = mdev->current_freq; -+ opp = devfreq_recommended_opp(mdev->dev, &opp_rate, 0); -+ if (!IS_ERR(opp)) -+ dev_pm_opp_put(opp); -+ mdev->devfreq->last_status.current_frequency = opp_rate; ++ return timeline; ++} + -+ mali_mdevp.data = mdev->devfreq; -+ mali_mdevp.opp_info = &mdev->opp_info; -+ mdev->mdev_info = rockchip_system_monitor_register(mdev->dev, -+ &mali_mdevp); -+ if (IS_ERR(mdev->mdev_info)) { -+ dev_dbg(mdev->dev, "without system monitor\n"); -+ mdev->mdev_info = NULL; ++static void mali_timeline_insert_tracker(struct mali_timeline *timeline, struct mali_timeline_tracker *tracker) ++{ ++ MALI_DEBUG_ASSERT_POINTER(timeline); ++ MALI_DEBUG_ASSERT_POINTER(tracker); ++ ++ if (mali_timeline_is_full(timeline)) { ++ /* Don't add tracker if timeline is full. */ ++ tracker->point = MALI_TIMELINE_NO_POINT; ++ return; + } -+#ifdef CONFIG_DEVFREQ_THERMAL -+ if (of_machine_is_compatible("rockchip,rk3036")) -+ return 0; + -+ /* Initilization last_status it will be used when first power allocate called */ -+ mdev->devfreq->last_status.current_frequency = mdev->current_freq; ++ tracker->timeline = timeline; ++ tracker->point = timeline->point_next; + -+ if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) { -+ if (NULL != data.gpu_cooling_ops) { -+ callbacks = data.gpu_cooling_ops; -+ MALI_DEBUG_PRINT(2, ("Mali GPU Thermal: Callback handler installed \n")); -+ } ++ /* Find next available point. */ ++ timeline->point_next++; ++ if (MALI_TIMELINE_NO_POINT == timeline->point_next) { ++ timeline->point_next++; + } + -+ if (callbacks && !dp->is_cooling_device) { -+ mdev->devfreq_cooling = devfreq_cooling_em_register( -+ mdev->devfreq, -+ callbacks); -+ if (IS_ERR_OR_NULL(mdev->devfreq_cooling)) { -+ err = PTR_ERR(mdev->devfreq_cooling); -+ MALI_PRINT_ERROR(("Failed to register cooling device (%d)\n", err)); -+ goto cooling_failed; ++ MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline)); ++ ++ if (MALI_TIMELINE_TRACKER_GP == tracker->type) { ++ _mali_osk_atomic_inc(&gp_tracker_count); ++ } else if (MALI_TIMELINE_TRACKER_PP == tracker->type) { ++ if (mali_pp_job_is_virtual((struct mali_pp_job *)tracker->job)) { ++ _mali_osk_atomic_inc(&virt_pp_tracker_count); + } else { -+ MALI_DEBUG_PRINT(2, ("Mali GPU Thermal Cooling installed \n")); ++ _mali_osk_atomic_inc(&phy_pp_tracker_count); + } + } -+#endif + -+ return 0; ++ /* Add tracker as new head on timeline's tracker list. */ ++ if (NULL == timeline->tracker_head) { ++ /* Tracker list is empty. */ ++ MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail); + -+#ifdef CONFIG_DEVFREQ_THERMAL -+cooling_failed: -+ devfreq_unregister_opp_notifier(mdev->dev, mdev->devfreq); -+#endif /* CONFIG_DEVFREQ_THERMAL */ -+opp_notifier_failed: -+ err = devfreq_remove_device(mdev->devfreq); -+ if (err) -+ MALI_PRINT_ERROR(("Failed to terminate devfreq (%d)\n", err)); -+ else -+ mdev->devfreq = NULL; ++ timeline->tracker_tail = tracker; + -+ return err; ++ MALI_DEBUG_ASSERT(NULL == tracker->timeline_next); ++ MALI_DEBUG_ASSERT(NULL == tracker->timeline_prev); ++ } else { ++ MALI_DEBUG_ASSERT(NULL == timeline->tracker_head->timeline_next); ++ ++ tracker->timeline_prev = timeline->tracker_head; ++ timeline->tracker_head->timeline_next = tracker; ++ ++ MALI_DEBUG_ASSERT(NULL == tracker->timeline_next); ++ } ++ timeline->tracker_head = tracker; ++ ++ MALI_DEBUG_ASSERT(NULL == timeline->tracker_head->timeline_next); ++ MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail->timeline_prev); +} + -+void mali_devfreq_term(struct mali_device *mdev) ++/* Inserting the waiter object into the given timeline */ ++static void mali_timeline_insert_waiter(struct mali_timeline *timeline, struct mali_timeline_waiter *waiter_new) +{ -+ int err; ++ struct mali_timeline_waiter *waiter_prev; ++ struct mali_timeline_waiter *waiter_next; + -+ MALI_DEBUG_PRINT(2, ("Term Mali devfreq\n")); ++ /* Waiter time must be between timeline head and tail, and there must ++ * be less than MALI_TIMELINE_MAX_POINT_SPAN elements between */ ++ MALI_DEBUG_ASSERT((waiter_new->point - timeline->point_oldest) < MALI_TIMELINE_MAX_POINT_SPAN); ++ MALI_DEBUG_ASSERT((-waiter_new->point + timeline->point_next) < MALI_TIMELINE_MAX_POINT_SPAN); + -+ rockchip_system_monitor_unregister(mdev->mdev_info); -+#ifdef CONFIG_DEVFREQ_THERMAL -+ if (!IS_ERR_OR_NULL(mdev->devfreq_cooling)) -+ devfreq_cooling_unregister(mdev->devfreq_cooling); -+#endif ++ /* Finding out where to put this waiter, in the linked waiter list of the given timeline **/ ++ waiter_prev = timeline->waiter_head; /* Insert new after waiter_prev */ ++ waiter_next = NULL; /* Insert new before waiter_next */ + -+ devfreq_unregister_opp_notifier(mdev->dev, mdev->devfreq); ++ /* Iterating backwards from head (newest) to tail (oldest) until we ++ * find the correct spot to insert the new waiter */ ++ while (waiter_prev && mali_timeline_point_after(waiter_prev->point, waiter_new->point)) { ++ waiter_next = waiter_prev; ++ waiter_prev = waiter_prev->timeline_prev; ++ } + -+ err = devfreq_remove_device(mdev->devfreq); -+ if (err) -+ MALI_PRINT_ERROR(("Failed to terminate devfreq (%d)\n", err)); -+ else -+ mdev->devfreq = NULL; ++ if (NULL == waiter_prev && NULL == waiter_next) { ++ /* list is empty */ ++ timeline->waiter_head = waiter_new; ++ timeline->waiter_tail = waiter_new; ++ } else if (NULL == waiter_next) { ++ /* insert at head */ ++ waiter_new->timeline_prev = timeline->waiter_head; ++ timeline->waiter_head->timeline_next = waiter_new; ++ timeline->waiter_head = waiter_new; ++ } else if (NULL == waiter_prev) { ++ /* insert at tail */ ++ waiter_new->timeline_next = timeline->waiter_tail; ++ timeline->waiter_tail->timeline_prev = waiter_new; ++ timeline->waiter_tail = waiter_new; ++ } else { ++ /* insert between */ ++ waiter_new->timeline_next = waiter_next; ++ waiter_new->timeline_prev = waiter_prev; ++ waiter_next->timeline_prev = waiter_new; ++ waiter_prev->timeline_next = waiter_new; ++ } +} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_devfreq.h b/drivers/gpu/arm/mali400/mali/linux/mali_devfreq.h -new file mode 100644 -index 000000000..ba7c017d8 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_devfreq.h -@@ -0,0 +1,17 @@ -+/* -+ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+#ifndef _MALI_DEVFREQ_H_ -+#define _MALI_DEVFREQ_H_ + -+int mali_devfreq_init(struct mali_device *mdev); ++static void mali_timeline_update_delayed_work(struct mali_timeline *timeline) ++{ ++ struct mali_timeline_system *system; ++ struct mali_timeline_tracker *oldest_tracker; + -+void mali_devfreq_term(struct mali_device *mdev); ++ MALI_DEBUG_ASSERT_POINTER(timeline); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_SOFT == timeline->id); + -+#endif -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_device_pause_resume.c b/drivers/gpu/arm/mali400/mali/linux/mali_device_pause_resume.c -new file mode 100644 -index 000000000..95c3ea12d ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_device_pause_resume.c -@@ -0,0 +1,36 @@ -+/** -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ system = timeline->system; ++ MALI_DEBUG_ASSERT_POINTER(system); + -+/** -+ * @file mali_device_pause_resume.c -+ * Implementation of the Mali pause/resume functionality -+ */ ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); + -+#include -+#include -+#include "mali_pm.h" ++ /* Timer is disabled, early out. */ ++ if (!system->timer_enabled) return; + -+void mali_dev_pause(void) -+{ -+ /* -+ * Deactive all groups to prevent hardware being touched -+ * during the period of mali device pausing -+ */ -+ mali_pm_os_suspend(MALI_FALSE); ++ oldest_tracker = timeline->tracker_tail; ++ if (NULL != oldest_tracker && 0 == oldest_tracker->trigger_ref_count) { ++ if (MALI_FALSE == oldest_tracker->timer_active) { ++ if (MALI_TRUE == timeline->timer_active) { ++ _mali_osk_wq_delayed_cancel_work_async(timeline->delayed_work); ++ } ++ _mali_osk_wq_delayed_schedule_work(timeline->delayed_work, MALI_TIMELINE_TIMEOUT_HZ); ++ oldest_tracker->timer_active = MALI_TRUE; ++ timeline->timer_active = MALI_TRUE; ++ } ++ } else if (MALI_TRUE == timeline->timer_active) { ++ _mali_osk_wq_delayed_cancel_work_async(timeline->delayed_work); ++ timeline->timer_active = MALI_FALSE; ++ } +} + -+EXPORT_SYMBOL(mali_dev_pause); -+ -+void mali_dev_resume(void) ++static mali_scheduler_mask mali_timeline_update_oldest_point(struct mali_timeline *timeline) +{ -+ mali_pm_os_resume(); -+} ++ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; + -+EXPORT_SYMBOL(mali_dev_resume); -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_dma_fence.c b/drivers/gpu/arm/mali400/mali/linux/mali_dma_fence.c -new file mode 100644 -index 000000000..e026e11e4 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_dma_fence.c -@@ -0,0 +1,439 @@ -+/* -+ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+#include -+#include "mali_osk.h" -+#include "mali_kernel_common.h" ++ MALI_DEBUG_ASSERT_POINTER(timeline); + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) -+#include "mali_dma_fence.h" -+#include -+#include -+#endif ++ MALI_DEBUG_CODE({ ++ struct mali_timeline_system *system = timeline->system; ++ MALI_DEBUG_ASSERT_POINTER(system); + -+static DEFINE_SPINLOCK(mali_dma_fence_lock); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); ++ }); + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+static bool mali_dma_fence_enable_signaling(struct dma_fence *fence) -+{ -+ MALI_IGNORE(fence); -+ return true; -+} ++ if (NULL != timeline->tracker_tail) { ++ /* Set oldest point to oldest tracker's point */ ++ timeline->point_oldest = timeline->tracker_tail->point; ++ } else { ++ /* No trackers, mark point list as empty */ ++ timeline->point_oldest = timeline->point_next; ++ } + -+static const char *mali_dma_fence_get_driver_name(struct dma_fence *fence) -+{ -+ MALI_IGNORE(fence); -+ return "mali"; -+} ++ /* Release all waiters no longer on the timeline's point list. ++ * Releasing a waiter can trigger this function to be called again, so ++ * we do not store any pointers on stack. */ ++ while (NULL != timeline->waiter_tail) { ++ u32 waiter_time_relative; ++ u32 time_head_relative; ++ struct mali_timeline_waiter *waiter = timeline->waiter_tail; + -+static const char *mali_dma_fence_get_timeline_name(struct dma_fence *fence) -+{ -+ MALI_IGNORE(fence); -+ return "mali_dma_fence"; -+} ++ time_head_relative = timeline->point_next - timeline->point_oldest; ++ waiter_time_relative = waiter->point - timeline->point_oldest; + -+static const struct dma_fence_ops mali_dma_fence_ops = { -+ .get_driver_name = mali_dma_fence_get_driver_name, -+ .get_timeline_name = mali_dma_fence_get_timeline_name, -+ .enable_signaling = mali_dma_fence_enable_signaling, -+ .signaled = NULL, -+ .wait = dma_fence_default_wait, -+ .release = NULL -+}; -+#else -+static bool mali_dma_fence_enable_signaling(struct fence *fence) -+{ -+ MALI_IGNORE(fence); -+ return true; -+} ++ if (waiter_time_relative < time_head_relative) { ++ /* This and all following waiters are on the point list, so we are done. */ ++ break; ++ } + -+static const char *mali_dma_fence_get_driver_name(struct fence *fence) -+{ -+ MALI_IGNORE(fence); -+ return "mali"; ++ /* Remove waiter from timeline's waiter list. */ ++ if (NULL != waiter->timeline_next) { ++ waiter->timeline_next->timeline_prev = NULL; ++ } else { ++ /* This was the last waiter */ ++ timeline->waiter_head = NULL; ++ } ++ timeline->waiter_tail = waiter->timeline_next; ++ ++ /* Release waiter. This could activate a tracker, if this was ++ * the last waiter for the tracker. */ ++ schedule_mask |= mali_timeline_system_release_waiter(timeline->system, waiter); ++ } ++ ++ return schedule_mask; +} + -+static const char *mali_dma_fence_get_timeline_name(struct fence *fence) ++static mali_scheduler_mask mali_timeline_release_with_depended_point(struct mali_timeline_tracker *tracker) +{ -+ MALI_IGNORE(fence); -+ return "mali_dma_fence"; -+} ++ struct mali_timeline *timeline; ++ struct mali_timeline_waiter *waiter; ++ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; + -+static const struct fence_ops mali_dma_fence_ops = { -+ .get_driver_name = mali_dma_fence_get_driver_name, -+ .get_timeline_name = mali_dma_fence_get_timeline_name, -+ .enable_signaling = mali_dma_fence_enable_signaling, -+ .signaled = NULL, -+ .wait = fence_default_wait, -+ .release = NULL -+}; -+#endif ++ timeline = tracker->timeline; ++ MALI_DEBUG_ASSERT_POINTER(timeline); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_SOFT == timeline->id); + -+static void mali_dma_fence_context_cleanup(struct mali_dma_fence_context *dma_fence_context) -+{ -+ u32 i; ++ MALI_DEBUG_CODE({ ++ struct mali_timeline_system *system = timeline->system; ++ MALI_DEBUG_ASSERT_POINTER(system); + -+ MALI_DEBUG_ASSERT_POINTER(dma_fence_context); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); ++ }); + -+ for (i = 0; i < dma_fence_context->num_dma_fence_waiter; i++) { -+ if (dma_fence_context->mali_dma_fence_waiters[i]) { -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ dma_fence_remove_callback(dma_fence_context->mali_dma_fence_waiters[i]->fence, -+ &dma_fence_context->mali_dma_fence_waiters[i]->base); -+ dma_fence_put(dma_fence_context->mali_dma_fence_waiters[i]->fence); ++ /* Only release the waiter that wait for the tracker. */ ++ waiter = timeline->waiter_tail; ++ while (NULL != waiter) { ++ if (waiter->point == tracker->point) { + -+#else -+ fence_remove_callback(dma_fence_context->mali_dma_fence_waiters[i]->fence, -+ &dma_fence_context->mali_dma_fence_waiters[i]->base); -+ fence_put(dma_fence_context->mali_dma_fence_waiters[i]->fence); -+#endif -+ kfree(dma_fence_context->mali_dma_fence_waiters[i]); -+ dma_fence_context->mali_dma_fence_waiters[i] = NULL; -+ } -+ } ++ struct mali_timeline_waiter *waiter_next; ++ struct mali_timeline_waiter *waiter_prev; + -+ if (NULL != dma_fence_context->mali_dma_fence_waiters) -+ kfree(dma_fence_context->mali_dma_fence_waiters); ++ waiter_next = waiter->timeline_next; ++ waiter_prev = waiter->timeline_prev; ++ waiter->timeline_next = NULL; ++ waiter->timeline_prev = NULL; + -+ dma_fence_context->mali_dma_fence_waiters = NULL; -+ dma_fence_context->num_dma_fence_waiter = 0; -+} ++ if (NULL != waiter_prev) { ++ waiter_prev->timeline_next = waiter_next; ++ } + -+static void mali_dma_fence_context_work_func(struct work_struct *work_handle) -+{ -+ struct mali_dma_fence_context *dma_fence_context; ++ if (NULL != waiter_next) { ++ waiter_next->timeline_prev = waiter_prev; ++ } + -+ MALI_DEBUG_ASSERT_POINTER(work_handle); ++ if (waiter == timeline->waiter_tail) ++ timeline->waiter_tail = waiter_next; + -+ dma_fence_context = container_of(work_handle, struct mali_dma_fence_context, work_handle); ++ if (waiter == timeline->waiter_head) ++ timeline->waiter_head = NULL; + -+ dma_fence_context->cb_func(dma_fence_context->pp_job_ptr); ++ schedule_mask |= mali_timeline_system_release_waiter(timeline->system, waiter); ++ waiter = waiter_next; ++ }else { ++ ++ waiter = waiter->timeline_next; ++ } ++ } ++ ++ return schedule_mask; +} + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+static void mali_dma_fence_callback(struct dma_fence *fence, struct dma_fence_cb *cb) -+#else -+static void mali_dma_fence_callback(struct fence *fence, struct fence_cb *cb) -+#endif ++void mali_timeline_tracker_init(struct mali_timeline_tracker *tracker, ++ mali_timeline_tracker_type type, ++ struct mali_timeline_fence *fence, ++ void *job) +{ -+ struct mali_dma_fence_waiter *dma_fence_waiter = NULL; -+ struct mali_dma_fence_context *dma_fence_context = NULL; ++ MALI_DEBUG_ASSERT_POINTER(tracker); ++ MALI_DEBUG_ASSERT_POINTER(job); + -+ MALI_DEBUG_ASSERT_POINTER(fence); -+ MALI_DEBUG_ASSERT_POINTER(cb); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAX > type); + -+ MALI_IGNORE(fence); ++ /* Zero out all tracker members. */ ++ _mali_osk_memset(tracker, 0, sizeof(*tracker)); + -+ dma_fence_waiter = container_of(cb, struct mali_dma_fence_waiter, base); -+ dma_fence_context = dma_fence_waiter->parent; ++ tracker->type = type; ++ tracker->job = job; ++ tracker->trigger_ref_count = 1; /* Prevents any callback from trigging while adding it */ ++ tracker->os_tick_create = _mali_osk_time_tickcount(); ++ MALI_DEBUG_CODE(tracker->magic = MALI_TIMELINE_TRACKER_MAGIC); + -+ MALI_DEBUG_ASSERT_POINTER(dma_fence_context); ++ tracker->activation_error = MALI_TIMELINE_ACTIVATION_ERROR_NONE; + -+ if (atomic_dec_and_test(&dma_fence_context->count)) -+ schedule_work(&dma_fence_context->work_handle); ++ /* Copy fence. */ ++ if (NULL != fence) { ++ _mali_osk_memcpy(&tracker->fence, fence, sizeof(struct mali_timeline_fence)); ++ } +} + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+static _mali_osk_errcode_t mali_dma_fence_add_callback(struct mali_dma_fence_context *dma_fence_context, struct dma_fence *fence) -+#else -+static _mali_osk_errcode_t mali_dma_fence_add_callback(struct mali_dma_fence_context *dma_fence_context, struct fence *fence) -+#endif ++mali_scheduler_mask mali_timeline_tracker_release(struct mali_timeline_tracker *tracker) +{ -+ int ret = 0; -+ struct mali_dma_fence_waiter *dma_fence_waiter; -+ struct mali_dma_fence_waiter **dma_fence_waiters; ++ struct mali_timeline *timeline; ++ struct mali_timeline_system *system; ++ struct mali_timeline_tracker *tracker_next, *tracker_prev; ++ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; ++ u32 tid = _mali_osk_get_tid(); + -+ MALI_DEBUG_ASSERT_POINTER(dma_fence_context); -+ MALI_DEBUG_ASSERT_POINTER(fence); ++ /* Upon entry a group lock will be held, but not a scheduler lock. */ ++ MALI_DEBUG_ASSERT_POINTER(tracker); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic); + -+ dma_fence_waiters = krealloc(dma_fence_context->mali_dma_fence_waiters, -+ (dma_fence_context->num_dma_fence_waiter + 1) -+ * sizeof(struct mali_dma_fence_waiter *), -+ GFP_KERNEL); ++ /* Tracker should have been triggered */ ++ MALI_DEBUG_ASSERT(0 == tracker->trigger_ref_count); + -+ if (NULL == dma_fence_waiters) { -+ MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to realloc the dma fence waiters.\n")); -+ return _MALI_OSK_ERR_NOMEM; ++ /* All waiters should have been released at this point */ ++ MALI_DEBUG_ASSERT(NULL == tracker->waiter_head); ++ MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail); ++ ++ MALI_DEBUG_PRINT(3, ("Mali Timeline: releasing tracker for job 0x%08X\n", tracker->job)); ++ ++ timeline = tracker->timeline; ++ if (NULL == timeline) { ++ /* Tracker was not on a timeline, there is nothing to release. */ ++ return MALI_SCHEDULER_MASK_EMPTY; + } + -+ dma_fence_context->mali_dma_fence_waiters = dma_fence_waiters; ++ system = timeline->system; ++ MALI_DEBUG_ASSERT_POINTER(system); + -+ dma_fence_waiter = kzalloc(sizeof(struct mali_dma_fence_waiter), GFP_KERNEL); ++ mali_spinlock_reentrant_wait(system->spinlock, tid); + -+ if (NULL == dma_fence_waiter) { -+ MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to create mali dma fence waiter.\n")); -+ return _MALI_OSK_ERR_NOMEM; ++ /* Tracker should still be on timeline */ ++ MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline)); ++ MALI_DEBUG_ASSERT(mali_timeline_is_point_on(timeline, tracker->point)); ++ ++ /* Tracker is no longer valid. */ ++ MALI_DEBUG_CODE(tracker->magic = 0); ++ ++ tracker_next = tracker->timeline_next; ++ tracker_prev = tracker->timeline_prev; ++ tracker->timeline_next = NULL; ++ tracker->timeline_prev = NULL; ++ ++ /* Removing tracker from timeline's tracker list */ ++ if (NULL == tracker_next) { ++ /* This tracker was the head */ ++ timeline->tracker_head = tracker_prev; ++ } else { ++ tracker_next->timeline_prev = tracker_prev; + } + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ dma_fence_get(fence); -+#else -+ fence_get(fence); -+#endif -+ dma_fence_waiter->fence = fence; -+ dma_fence_waiter->parent = dma_fence_context; -+ atomic_inc(&dma_fence_context->count); -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ ret = dma_fence_add_callback(fence, &dma_fence_waiter->base, -+ mali_dma_fence_callback); -+#else -+ ret = fence_add_callback(fence, &dma_fence_waiter->base, -+ mali_dma_fence_callback); -+#endif -+ if (0 > ret) { -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ dma_fence_put(fence); -+#else -+ fence_put(fence); -+#endif -+ kfree(dma_fence_waiter); -+ atomic_dec(&dma_fence_context->count); -+ if (-ENOENT == ret) { -+ /*-ENOENT if fence has already been signaled, return _MALI_OSK_ERR_OK*/ -+ return _MALI_OSK_ERR_OK; ++ if (NULL == tracker_prev) { ++ /* This tracker was the tail */ ++ timeline->tracker_tail = tracker_next; ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); ++ /* Update the timeline's oldest time and release any waiters */ ++ schedule_mask |= mali_timeline_update_oldest_point(timeline); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); ++ } else { ++ tracker_prev->timeline_next = tracker_next; ++ if (MALI_TIMELINE_SOFT == tracker->timeline->id) { ++ /* Use the signaled soft tracker to release the depended soft waiter */ ++ schedule_mask |= mali_timeline_release_with_depended_point(tracker); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); + } -+ /* Failed to add the fence callback into fence, return _MALI_OSK_ERR_FAULT*/ -+ MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to add callback into fence.\n")); -+ return _MALI_OSK_ERR_FAULT; + } + -+ dma_fence_context->mali_dma_fence_waiters[dma_fence_context->num_dma_fence_waiter] = dma_fence_waiter; -+ dma_fence_context->num_dma_fence_waiter++; ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); + -+ return _MALI_OSK_ERR_OK; -+} ++ /* Update delayed work only when it is the soft job timeline */ ++ if (MALI_TIMELINE_SOFT == tracker->timeline->id) { ++ mali_timeline_update_delayed_work(tracker->timeline); ++ } + ++ mali_spinlock_reentrant_signal(system->spinlock, tid); + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+struct dma_fence *mali_dma_fence_new(u32 context, u32 seqno) -+#else -+struct fence *mali_dma_fence_new(u32 context, u32 seqno) -+#endif -+{ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ struct dma_fence *fence = NULL; -+ fence = kzalloc(sizeof(struct dma_fence), GFP_KERNEL); -+#else -+ struct fence *fence = NULL; -+ fence = kzalloc(sizeof(struct fence), GFP_KERNEL); -+#endif -+ if (NULL == fence) { -+ MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to create dma fence.\n")); -+ return fence; -+ } -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ dma_fence_init(fence, -+ &mali_dma_fence_ops, -+ &mali_dma_fence_lock, -+ context, seqno); -+#else -+ fence_init(fence, -+ &mali_dma_fence_ops, -+ &mali_dma_fence_lock, -+ context, seqno); -+#endif -+ return fence; ++ return schedule_mask; +} + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+void mali_dma_fence_signal_and_put(struct dma_fence **fence) -+#else -+void mali_dma_fence_signal_and_put(struct fence **fence) -+#endif ++void mali_timeline_system_release_waiter_list(struct mali_timeline_system *system, ++ struct mali_timeline_waiter *tail, ++ struct mali_timeline_waiter *head) +{ -+ MALI_DEBUG_ASSERT_POINTER(fence); -+ MALI_DEBUG_ASSERT_POINTER(*fence); -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ dma_fence_signal(*fence); -+ dma_fence_put(*fence); -+#else -+ fence_signal(*fence); -+ fence_put(*fence); -+#endif -+ *fence = NULL; ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT_POINTER(head); ++ MALI_DEBUG_ASSERT_POINTER(tail); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); ++ ++ head->tracker_next = system->waiter_empty_list; ++ system->waiter_empty_list = tail; +} + -+void mali_dma_fence_context_init(struct mali_dma_fence_context *dma_fence_context, -+ mali_dma_fence_context_callback_func_t cb_func, -+ void *pp_job_ptr) ++static mali_scheduler_mask mali_timeline_tracker_activate(struct mali_timeline_tracker *tracker) +{ -+ MALI_DEBUG_ASSERT_POINTER(dma_fence_context); ++ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; ++ struct mali_timeline_system *system; ++ struct mali_timeline *timeline; ++ u32 tid = _mali_osk_get_tid(); + -+ INIT_WORK(&dma_fence_context->work_handle, mali_dma_fence_context_work_func); -+ atomic_set(&dma_fence_context->count, 1); -+ dma_fence_context->num_dma_fence_waiter = 0; -+ dma_fence_context->mali_dma_fence_waiters = NULL; -+ dma_fence_context->cb_func = cb_func; -+ dma_fence_context->pp_job_ptr = pp_job_ptr; -+} ++ MALI_DEBUG_ASSERT_POINTER(tracker); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic); + -+_mali_osk_errcode_t mali_dma_fence_context_add_waiters(struct mali_dma_fence_context *dma_fence_context, -+ struct reservation_object *dma_reservation_object) -+{ -+ _mali_osk_errcode_t ret = _MALI_OSK_ERR_OK; -+ u32 shared_count = 0, i; -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ struct dma_fence *exclusive_fence = NULL; -+ struct dma_fence **shared_fences = NULL; -+#else -+ struct fence *exclusive_fence = NULL; -+ struct fence **shared_fences = NULL; -+#endif -+ MALI_DEBUG_ASSERT_POINTER(dma_fence_context); -+ MALI_DEBUG_ASSERT_POINTER(dma_reservation_object); ++ system = tracker->system; ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); + -+ /* Get all the shared/exclusive fences in the reservation object of dma buf*/ -+ ret = reservation_object_get_fences_rcu(dma_reservation_object, &exclusive_fence, -+ &shared_count, &shared_fences); -+ if (ret < 0) { -+ MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to get shared or exclusive_fence dma fences from the reservation object of dma buf.\n")); -+ return _MALI_OSK_ERR_FAULT; -+ } ++ tracker->os_tick_activate = _mali_osk_time_tickcount(); + -+ if (exclusive_fence) { -+ ret = mali_dma_fence_add_callback(dma_fence_context, exclusive_fence); -+ if (_MALI_OSK_ERR_OK != ret) { -+ MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to add callback into exclusive fence.\n")); -+ mali_dma_fence_context_cleanup(dma_fence_context); -+ goto ended; -+ } ++ if (NULL != tracker->waiter_head) { ++ mali_timeline_system_release_waiter_list(system, tracker->waiter_tail, tracker->waiter_head); ++ tracker->waiter_head = NULL; ++ tracker->waiter_tail = NULL; + } + ++ switch (tracker->type) { ++ case MALI_TIMELINE_TRACKER_GP: ++ schedule_mask = mali_scheduler_activate_gp_job((struct mali_gp_job *) tracker->job); + -+ for (i = 0; i < shared_count; i++) { -+ ret = mali_dma_fence_add_callback(dma_fence_context, shared_fences[i]); -+ if (_MALI_OSK_ERR_OK != ret) { -+ MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to add callback into shared fence [%d].\n", i)); -+ mali_dma_fence_context_cleanup(dma_fence_context); -+ break; ++ _mali_osk_atomic_dec(&gp_tracker_count); ++ break; ++ case MALI_TIMELINE_TRACKER_PP: ++ if (mali_pp_job_is_virtual((struct mali_pp_job *)tracker->job)) { ++ _mali_osk_atomic_dec(&virt_pp_tracker_count); ++ } else { ++ _mali_osk_atomic_dec(&phy_pp_tracker_count); + } -+ } -+ -+ended: ++ schedule_mask = mali_scheduler_activate_pp_job((struct mali_pp_job *) tracker->job); ++ break; ++ case MALI_TIMELINE_TRACKER_SOFT: ++ timeline = tracker->timeline; ++ MALI_DEBUG_ASSERT_POINTER(timeline); + -+ if (exclusive_fence) -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ dma_fence_put(exclusive_fence); -+#else -+ fence_put(exclusive_fence); -+#endif ++ schedule_mask |= mali_soft_job_system_activate_job((struct mali_soft_job *) tracker->job); + -+ if (shared_fences) { -+ for (i = 0; i < shared_count; i++) { -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ dma_fence_put(shared_fences[i]); ++ /* Start a soft timer to make sure the soft job be released in a limited time */ ++ mali_spinlock_reentrant_wait(system->spinlock, tid); ++ mali_timeline_update_delayed_work(timeline); ++ mali_spinlock_reentrant_signal(system->spinlock, tid); ++ break; ++ case MALI_TIMELINE_TRACKER_WAIT: ++ mali_timeline_fence_wait_activate((struct mali_timeline_fence_wait_tracker *) tracker->job); ++ break; ++ case MALI_TIMELINE_TRACKER_SYNC: ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ mali_timeline_sync_fence_activate((struct mali_timeline_sync_fence_tracker *) tracker->job); +#else -+ fence_put(shared_fences[i]); -+#endif -+ } -+ kfree(shared_fences); ++ MALI_PRINT_ERROR(("Mali Timeline: sync tracker not supported\n", tracker->type)); ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++ break; ++ default: ++ MALI_PRINT_ERROR(("Mali Timeline - Illegal tracker type: %d\n", tracker->type)); ++ break; + } + -+ return ret; ++ return schedule_mask; +} + -+ -+void mali_dma_fence_context_term(struct mali_dma_fence_context *dma_fence_context) ++void mali_timeline_system_tracker_get(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker) +{ -+ MALI_DEBUG_ASSERT_POINTER(dma_fence_context); -+ atomic_set(&dma_fence_context->count, 0); -+ if (dma_fence_context->work_handle.func) { -+ cancel_work_sync(&dma_fence_context->work_handle); -+ } -+ mali_dma_fence_context_cleanup(dma_fence_context); ++ u32 tid = _mali_osk_get_tid(); ++ ++ MALI_DEBUG_ASSERT_POINTER(tracker); ++ MALI_DEBUG_ASSERT_POINTER(system); ++ ++ mali_spinlock_reentrant_wait(system->spinlock, tid); ++ ++ MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count); ++ tracker->trigger_ref_count++; ++ ++ mali_spinlock_reentrant_signal(system->spinlock, tid); +} + -+void mali_dma_fence_context_dec_count(struct mali_dma_fence_context *dma_fence_context) ++mali_scheduler_mask mali_timeline_system_tracker_put(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker, mali_timeline_activation_error activation_error) +{ -+ MALI_DEBUG_ASSERT_POINTER(dma_fence_context); ++ u32 tid = _mali_osk_get_tid(); ++ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; + -+ if (atomic_dec_and_test(&dma_fence_context->count)) -+ schedule_work(&dma_fence_context->work_handle); -+} ++ MALI_DEBUG_ASSERT_POINTER(tracker); ++ MALI_DEBUG_ASSERT_POINTER(system); + ++ mali_spinlock_reentrant_wait(system->spinlock, tid); + -+void mali_dma_fence_add_reservation_object_list(struct reservation_object *dma_reservation_object, -+ struct reservation_object **dma_reservation_object_list, -+ u32 *num_dma_reservation_object) -+{ -+ u32 i; ++ MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count); ++ tracker->trigger_ref_count--; + -+ MALI_DEBUG_ASSERT_POINTER(dma_reservation_object); -+ MALI_DEBUG_ASSERT_POINTER(dma_reservation_object_list); -+ MALI_DEBUG_ASSERT_POINTER(num_dma_reservation_object); ++ tracker->activation_error |= activation_error; + -+ for (i = 0; i < *num_dma_reservation_object; i++) { -+ if (dma_reservation_object_list[i] == dma_reservation_object) -+ return; ++ if (0 == tracker->trigger_ref_count) { ++ schedule_mask |= mali_timeline_tracker_activate(tracker); ++ tracker = NULL; + } + -+ dma_reservation_object_list[*num_dma_reservation_object] = dma_reservation_object; -+ (*num_dma_reservation_object)++; ++ mali_spinlock_reentrant_signal(system->spinlock, tid); ++ ++ return schedule_mask; +} + -+int mali_dma_fence_lock_reservation_object_list(struct reservation_object **dma_reservation_object_list, -+ u32 num_dma_reservation_object, struct ww_acquire_ctx *ww_actx) ++void mali_timeline_fence_copy_uk_fence(struct mali_timeline_fence *fence, _mali_uk_fence_t *uk_fence) +{ + u32 i; + -+ struct reservation_object *reservation_object_to_slow_lock = NULL; ++ MALI_DEBUG_ASSERT_POINTER(fence); ++ MALI_DEBUG_ASSERT_POINTER(uk_fence); + -+ MALI_DEBUG_ASSERT_POINTER(dma_reservation_object_list); -+ MALI_DEBUG_ASSERT_POINTER(ww_actx); ++ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { ++ fence->points[i] = uk_fence->points[i]; ++ } + -+ ww_acquire_init(ww_actx, &reservation_ww_class); ++ fence->sync_fd = uk_fence->sync_fd; ++} + -+again: -+ for (i = 0; i < num_dma_reservation_object; i++) { -+ int ret; ++struct mali_timeline_system *mali_timeline_system_create(struct mali_session_data *session) ++{ ++ u32 i; ++ struct mali_timeline_system *system; + -+ if (dma_reservation_object_list[i] == reservation_object_to_slow_lock) { -+ reservation_object_to_slow_lock = NULL; -+ continue; -+ } ++ MALI_DEBUG_ASSERT_POINTER(session); ++ MALI_DEBUG_PRINT(4, ("Mali Timeline: creating timeline system\n")); + -+ ret = ww_mutex_lock(&dma_reservation_object_list[i]->lock, ww_actx); ++ system = (struct mali_timeline_system *) _mali_osk_calloc(1, sizeof(struct mali_timeline_system)); ++ if (NULL == system) { ++ return NULL; ++ } + -+ if (ret < 0) { -+ u32 slow_lock_index = i; ++ system->spinlock = mali_spinlock_reentrant_init(_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM); ++ if (NULL == system->spinlock) { ++ mali_timeline_system_destroy(system); ++ return NULL; ++ } + -+ /* unlock all pre locks we have already locked.*/ -+ while (i > 0) { -+ i--; -+ ww_mutex_unlock(&dma_reservation_object_list[i]->lock); -+ } ++ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { ++ system->timelines[i] = mali_timeline_create(system, (enum mali_timeline_id)i); ++ if (NULL == system->timelines[i]) { ++ mali_timeline_system_destroy(system); ++ return NULL; ++ } ++ } + -+ if (NULL != reservation_object_to_slow_lock) -+ ww_mutex_unlock(&reservation_object_to_slow_lock->lock); ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ system->signaled_sync_tl = mali_sync_timeline_create(NULL, "mali-always-signaled"); ++ if (NULL == system->signaled_sync_tl) { ++ mali_timeline_system_destroy(system); ++ return NULL; ++ } ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ + -+ if (ret == -EDEADLK) { -+ reservation_object_to_slow_lock = dma_reservation_object_list[slow_lock_index]; -+ ww_mutex_lock_slow(&reservation_object_to_slow_lock->lock, ww_actx); -+ goto again; -+ } -+ ww_acquire_fini(ww_actx); -+ MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to lock all dma reservation objects.\n", i)); -+ return ret; -+ } ++ system->waiter_empty_list = NULL; ++ system->session = session; ++ system->timer_enabled = MALI_TRUE; ++ ++ system->wait_queue = _mali_osk_wait_queue_init(); ++ if (NULL == system->wait_queue) { ++ mali_timeline_system_destroy(system); ++ return NULL; + } + -+ ww_acquire_done(ww_actx); -+ return 0; ++ return system; +} + -+void mali_dma_fence_unlock_reservation_object_list(struct reservation_object **dma_reservation_object_list, -+ u32 num_dma_reservation_object, struct ww_acquire_ctx *ww_actx) ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) ||defined(CONFIG_SYNC) ||defined(CONFIG_SYNC_FILE) ++/** ++ * Check if there are any trackers left on timeline. ++ * ++ * Used as a wait queue conditional. ++ * ++ * @param data Timeline. ++ * @return MALI_TRUE if there are no trackers on timeline, MALI_FALSE if not. ++ */ ++static mali_bool mali_timeline_has_no_trackers(void *data) +{ -+ u32 i; ++ struct mali_timeline *timeline = (struct mali_timeline *) data; + -+ for (i = 0; i < num_dma_reservation_object; i++) -+ ww_mutex_unlock(&dma_reservation_object_list[i]->lock); ++ MALI_DEBUG_ASSERT_POINTER(timeline); + -+ ww_acquire_fini(ww_actx); ++ return mali_timeline_is_empty(timeline); +} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_dma_fence.h b/drivers/gpu/arm/mali400/mali/linux/mali_dma_fence.h -new file mode 100644 -index 000000000..d44f6d1a8 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_dma_fence.h -@@ -0,0 +1,124 @@ -+/* -+ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ ++#if defined(CONFIG_SYNC) ||defined(CONFIG_SYNC_FILE) +/** -+ * @file mali_dma_fence.h ++ * Cancel sync fence waiters waited upon by trackers on all timelines. + * -+ * Mali interface for Linux dma buf fence objects. ++ * Will return after all timelines have no trackers left. ++ * ++ * @param system Timeline system. + */ ++static void mali_timeline_cancel_sync_fence_waiters(struct mali_timeline_system *system) ++{ ++ u32 i; ++ u32 tid = _mali_osk_get_tid(); ++ struct mali_timeline_tracker *tracker, *tracker_next; ++ _MALI_OSK_LIST_HEAD_STATIC_INIT(tracker_list); + -+#ifndef _MALI_DMA_FENCE_H_ -+#define _MALI_DMA_FENCE_H_ ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT_POINTER(system->session); ++ MALI_DEBUG_ASSERT(system->session->is_aborting); + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+#include -+#else -+#include -+#endif -+#include -+#endif ++ mali_spinlock_reentrant_wait(system->spinlock, tid); + -+struct mali_dma_fence_context; ++ /* Cancel sync fence waiters. */ ++ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { ++ struct mali_timeline *timeline = system->timelines[i]; + -+/* The mali dma fence context callback function */ -+typedef void (*mali_dma_fence_context_callback_func_t)(void *pp_job_ptr); ++ MALI_DEBUG_ASSERT_POINTER(timeline); + -+struct mali_dma_fence_waiter { -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ struct dma_fence *fence; -+ struct dma_fence_cb base; -+#else -+ struct fence_cb base; -+ struct fence *fence; -+#endif -+ struct mali_dma_fence_context *parent; -+}; ++ tracker_next = timeline->tracker_tail; ++ while (NULL != tracker_next) { ++ tracker = tracker_next; ++ tracker_next = tracker->timeline_next; + -+struct mali_dma_fence_context { -+ struct work_struct work_handle; -+ struct mali_dma_fence_waiter **mali_dma_fence_waiters; -+ u32 num_dma_fence_waiter; -+ atomic_t count; -+ void *pp_job_ptr; /* the mali pp job pointer */; -+ mali_dma_fence_context_callback_func_t cb_func; -+}; ++ if (NULL == tracker->sync_fence) continue; + -+/* Create a dma fence -+ * @param context The execution context this fence is run on -+ * @param seqno A linearly increasing sequence number for this context -+ * @return the new dma fence if success, or NULL on failure. -+ */ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+struct dma_fence *mali_dma_fence_new(u32 context, u32 seqno); -+#else -+struct fence *mali_dma_fence_new(u32 context, u32 seqno); -+#endif -+/* Signal and put dma fence -+ * @param fence The dma fence to signal and put -+ */ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+void mali_dma_fence_signal_and_put(struct dma_fence **fence); ++ MALI_DEBUG_PRINT(3, ("Mali Timeline: Cancelling sync fence wait for tracker 0x%08X.\n", tracker)); ++ ++ /* Cancel sync fence waiter. */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ if (0 == sync_fence_cancel_async(tracker->sync_fence, &tracker->sync_fence_waiter)) { +#else -+void mali_dma_fence_signal_and_put(struct fence **fence); ++ if (0 == mali_internal_sync_fence_cancel_async(tracker->sync_fence, &tracker->sync_fence_waiter)) { +#endif -+/** -+ * Initialize a mali dma fence context for pp job. -+ * @param dma_fence_context The mali dma fence context to initialize. -+ * @param cb_func The dma fence context callback function to call when all dma fence release. -+ * @param pp_job_ptr The pp_job to call function with. -+ */ -+void mali_dma_fence_context_init(struct mali_dma_fence_context *dma_fence_context, -+ mali_dma_fence_context_callback_func_t cb_func, -+ void *pp_job_ptr); -+ -+/** -+ * Add new mali dma fence waiter into mali dma fence context -+ * @param dma_fence_context The mali dma fence context -+ * @param dma_reservation_object the reservation object to create new mali dma fence waiters -+ * @return _MALI_OSK_ERR_OK if success, or not. -+ */ -+_mali_osk_errcode_t mali_dma_fence_context_add_waiters(struct mali_dma_fence_context *dma_fence_context, -+ struct reservation_object *dma_reservation_object); -+ -+/** -+ * Release the dma fence context -+ * @param dma_fence_text The mali dma fence context. -+ */ -+void mali_dma_fence_context_term(struct mali_dma_fence_context *dma_fence_context); -+ -+/** -+ * Decrease the dma fence context atomic count -+ * @param dma_fence_text The mali dma fence context. -+ */ -+void mali_dma_fence_context_dec_count(struct mali_dma_fence_context *dma_fence_context); ++ /* Callback was not called, move tracker to local list. */ ++ _mali_osk_list_add(&tracker->sync_fence_cancel_list, &tracker_list); ++ } ++ } ++ } + -+/** -+ * Get all reservation object -+ * @param dma_reservation_object The reservation object to add into the reservation object list -+ * @param dma_reservation_object_list The reservation object list to store all reservation object -+ * @param num_dma_reservation_object The number of all reservation object -+ */ -+void mali_dma_fence_add_reservation_object_list(struct reservation_object *dma_reservation_object, -+ struct reservation_object **dma_reservation_object_list, -+ u32 *num_dma_reservation_object); ++ mali_spinlock_reentrant_signal(system->spinlock, tid); + -+/** -+ * Wait/wound mutex lock to lock all reservation object. -+ */ -+int mali_dma_fence_lock_reservation_object_list(struct reservation_object **dma_reservation_object_list, -+ u32 num_dma_reservation_object, struct ww_acquire_ctx *ww_actx); ++ /* Manually call sync fence callback in order to release waiter and trigger activation of tracker. */ ++ _MALI_OSK_LIST_FOREACHENTRY(tracker, tracker_next, &tracker_list, struct mali_timeline_tracker, sync_fence_cancel_list) { ++ mali_timeline_sync_fence_callback(tracker->sync_fence, &tracker->sync_fence_waiter); ++ } + -+/** -+ * Wait/wound mutex lock to unlock all reservation object. -+ */ -+void mali_dma_fence_unlock_reservation_object_list(struct reservation_object **dma_reservation_object_list, -+ u32 num_dma_reservation_object, struct ww_acquire_ctx *ww_actx); -+#endif -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_internal_sync.c b/drivers/gpu/arm/mali400/mali/linux/mali_internal_sync.c -new file mode 100644 -index 000000000..e13cbad3e ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_internal_sync.c -@@ -0,0 +1,783 @@ -+/* -+ * Copyright (C) 2012-2018 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ /* Sleep until all sync fence callbacks are done and all timelines are empty. */ ++ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { ++ struct mali_timeline *timeline = system->timelines[i]; + -+#include "mali_internal_sync.h" -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0) -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ MALI_DEBUG_ASSERT_POINTER(timeline); + -+#include "mali_osk.h" -+#include "mali_kernel_common.h" -+#if defined(DEBUG) -+#include "mali_session.h" -+#include "mali_timeline.h" -+#endif ++ _mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_has_no_trackers, (void *) timeline); ++ } ++} + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+static const struct dma_fence_ops fence_ops; -+#else -+static const struct fence_ops fence_ops; -+#endif ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+static struct mali_internal_sync_point *mali_internal_fence_to_sync_pt(struct dma_fence *fence) -+#else -+static struct mali_internal_sync_point *mali_internal_fence_to_sync_pt(struct fence *fence) -+#endif ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) ++static void mali_timeline_cancel_dma_fence_waiters(struct mali_timeline_system *system) +{ -+ MALI_DEBUG_ASSERT_POINTER(fence); -+ return container_of(fence, struct mali_internal_sync_point, base); -+} ++ u32 i, j; ++ u32 tid = _mali_osk_get_tid(); ++ struct mali_pp_job *pp_job = NULL; ++ struct mali_pp_job *next_pp_job = NULL; ++ struct mali_timeline *timeline = NULL; ++ struct mali_timeline_tracker *tracker, *tracker_next; ++ _MALI_OSK_LIST_HEAD_STATIC_INIT(pp_job_list); + -+static inline struct mali_internal_sync_timeline *mali_internal_sync_pt_to_sync_timeline(struct mali_internal_sync_point *sync_pt) -+{ -+ MALI_DEBUG_ASSERT_POINTER(sync_pt); -+ return container_of(sync_pt->base.lock, struct mali_internal_sync_timeline, sync_pt_list_lock); -+} ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT_POINTER(system->session); ++ MALI_DEBUG_ASSERT(system->session->is_aborting); + -+static void mali_internal_sync_timeline_free(struct kref *kref_count) -+{ -+ struct mali_internal_sync_timeline *sync_timeline; ++ mali_spinlock_reentrant_wait(system->spinlock, tid); + -+ MALI_DEBUG_ASSERT_POINTER(kref_count); ++ /* Cancel dma fence waiters. */ ++ timeline = system->timelines[MALI_TIMELINE_PP]; ++ MALI_DEBUG_ASSERT_POINTER(timeline); + -+ sync_timeline = container_of(kref_count, struct mali_internal_sync_timeline, kref_count); ++ tracker_next = timeline->tracker_tail; ++ while (NULL != tracker_next) { ++ mali_bool fence_is_signaled = MALI_TRUE; ++ tracker = tracker_next; ++ tracker_next = tracker->timeline_next; + -+ if (sync_timeline->ops->release_obj) -+ sync_timeline->ops->release_obj(sync_timeline); ++ if (NULL == tracker->waiter_dma_fence) continue; ++ pp_job = (struct mali_pp_job *)tracker->job; ++ MALI_DEBUG_ASSERT_POINTER(pp_job); ++ MALI_DEBUG_PRINT(3, ("Mali Timeline: Cancelling dma fence waiter for tracker 0x%08X.\n", tracker)); + -+ kfree(sync_timeline); -+} ++ for (j = 0; j < pp_job->dma_fence_context.num_dma_fence_waiter; j++) { ++ if (pp_job->dma_fence_context.mali_dma_fence_waiters[j]) { ++ /* Cancel a previously callback from the fence. ++ * This function returns true if the callback is successfully removed, ++ * or false if the fence has already been signaled. ++ */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ bool ret = dma_fence_remove_callback(pp_job->dma_fence_context.mali_dma_fence_waiters[j]->fence, ++ &pp_job->dma_fence_context.mali_dma_fence_waiters[j]->base); + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) -+static void mali_internal_fence_check_cb_func(struct fence *fence, struct fence_cb *cb) -+#else -+static void mali_internal_fence_check_cb_func(struct dma_fence *fence, struct dma_fence_cb *cb) -+#endif -+{ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) -+ struct mali_internal_sync_fence_cb *check; -+#else -+ struct mali_internal_sync_fence_waiter *waiter; -+#endif -+ struct mali_internal_sync_fence *sync_fence; -+ int ret; -+ MALI_DEBUG_ASSERT_POINTER(cb); -+ MALI_IGNORE(fence); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) -+ check = container_of(cb, struct mali_internal_sync_fence_cb, cb); -+ sync_fence = check->sync_file; -+#else -+ waiter = container_of(cb, struct mali_internal_sync_fence_waiter, cb); -+ sync_fence = (struct mali_internal_sync_fence *)waiter->work.private; -+#endif -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) -+ ret = atomic_dec_and_test(&sync_fence->status); -+ if (ret) -+ wake_up_all(&sync_fence->wq); +#else -+ ret = sync_fence->fence->ops->signaled(sync_fence->fence); -+ -+ if (0 > ret) -+ MALI_PRINT_ERROR(("Mali internal sync:Failed to wait fence 0x%x for sync_fence 0x%x.\n", fence, sync_fence)); -+ if (1 == ret) -+ wake_up_all(&sync_fence->wq); ++ bool ret = fence_remove_callback(pp_job->dma_fence_context.mali_dma_fence_waiters[j]->fence, ++ &pp_job->dma_fence_context.mali_dma_fence_waiters[j]->base); +#endif -+} ++ if (ret) { ++ fence_is_signaled = MALI_FALSE; ++ } ++ } ++ } + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) -+static void mali_internal_sync_fence_add_fence(struct mali_internal_sync_fence *sync_fence, struct fence *sync_pt) -+{ -+ int fence_num = 0; -+ MALI_DEBUG_ASSERT_POINTER(sync_fence); -+ MALI_DEBUG_ASSERT_POINTER(sync_pt); ++ /* Callbacks were not called, move pp job to local list. */ ++ if (MALI_FALSE == fence_is_signaled) ++ _mali_osk_list_add(&pp_job->list, &pp_job_list); ++ } + -+ fence_num = sync_fence->num_fences; ++ mali_spinlock_reentrant_signal(system->spinlock, tid); + -+ sync_fence->cbs[fence_num].fence = sync_pt; -+ sync_fence->cbs[fence_num].sync_file = sync_fence; ++ /* Manually call dma fence callback in order to release waiter and trigger activation of tracker. */ ++ _MALI_OSK_LIST_FOREACHENTRY(pp_job, next_pp_job, &pp_job_list, struct mali_pp_job, list) { ++ mali_timeline_dma_fence_callback((void *)pp_job); ++ } + -+ if (!fence_add_callback(sync_pt, &sync_fence->cbs[fence_num].cb, mali_internal_fence_check_cb_func)) { -+ fence_get(sync_pt); -+ sync_fence->num_fences++; -+ atomic_inc(&sync_fence->status); ++ /* Sleep until all dma fence callbacks are done and all timelines are empty. */ ++ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { ++ struct mali_timeline *timeline = system->timelines[i]; ++ MALI_DEBUG_ASSERT_POINTER(timeline); ++ _mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_has_no_trackers, (void *) timeline); + } +} +#endif -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) -+static int mali_internal_sync_fence_wake_up_wq(wait_queue_entry_t *curr, unsigned mode, -+ int wake_flags, void *key) -+#else -+static int mali_internal_sync_fence_wake_up_wq(wait_queue_t *curr, unsigned mode, -+ int wake_flags, void *key) +#endif ++void mali_timeline_system_abort(struct mali_timeline_system *system) +{ -+ struct mali_internal_sync_fence_waiter *wait; -+ MALI_IGNORE(mode); -+ MALI_IGNORE(wake_flags); -+ MALI_IGNORE(key); ++ MALI_DEBUG_CODE(u32 tid = _mali_osk_get_tid();); + -+ wait = container_of(curr, struct mali_internal_sync_fence_waiter, work); -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) -+ list_del_init(&wait->work.entry); -+#else -+ list_del_init(&wait->work.task_list); ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT_POINTER(system->session); ++ MALI_DEBUG_ASSERT(system->session->is_aborting); ++ ++ MALI_DEBUG_PRINT(3, ("Mali Timeline: Aborting timeline system for session 0x%08X.\n", system->session)); ++ ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ mali_timeline_cancel_sync_fence_waiters(system); ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++ ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) ++ mali_timeline_cancel_dma_fence_waiters(system); +#endif -+ wait->callback(wait->work.private, wait); -+ return 1; ++ ++ /* Should not be any waiters or trackers left at this point. */ ++ MALI_DEBUG_CODE({ ++ u32 i; ++ mali_spinlock_reentrant_wait(system->spinlock, tid); ++ for (i = 0; i < MALI_TIMELINE_MAX; ++i) ++ { ++ struct mali_timeline *timeline = system->timelines[i]; ++ MALI_DEBUG_ASSERT_POINTER(timeline); ++ MALI_DEBUG_ASSERT(timeline->point_oldest == timeline->point_next); ++ MALI_DEBUG_ASSERT(NULL == timeline->tracker_head); ++ MALI_DEBUG_ASSERT(NULL == timeline->tracker_tail); ++ MALI_DEBUG_ASSERT(NULL == timeline->waiter_head); ++ MALI_DEBUG_ASSERT(NULL == timeline->waiter_tail); ++ } ++ mali_spinlock_reentrant_signal(system->spinlock, tid); ++ }); +} + -+struct mali_internal_sync_timeline *mali_internal_sync_timeline_create(const struct mali_internal_sync_timeline_ops *ops, -+ int size, const char *name) ++void mali_timeline_system_destroy(struct mali_timeline_system *system) +{ -+ struct mali_internal_sync_timeline *sync_timeline = NULL; ++ u32 i; ++ struct mali_timeline_waiter *waiter, *next; ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ u32 tid = _mali_osk_get_tid(); ++#endif + -+ MALI_DEBUG_ASSERT_POINTER(ops); ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT_POINTER(system->session); + -+ if (size < sizeof(struct mali_internal_sync_timeline)) { -+ MALI_PRINT_ERROR(("Mali internal sync:Invalid size to create the mali internal sync timeline.\n")); -+ goto err; -+ } ++ MALI_DEBUG_PRINT(4, ("Mali Timeline: destroying timeline system\n")); + -+ sync_timeline = kzalloc(size, GFP_KERNEL); -+ if (NULL == sync_timeline) { -+ MALI_PRINT_ERROR(("Mali internal sync:Failed to allocate buffer for the mali internal sync timeline.\n")); -+ goto err; -+ } -+ kref_init(&sync_timeline->kref_count); -+ sync_timeline->ops = ops; -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ sync_timeline->fence_context = dma_fence_context_alloc(1); ++ if (NULL != system) { ++ ++ /* There should be no waiters left on this queue. */ ++ if (NULL != system->wait_queue) { ++ _mali_osk_wait_queue_term(system->wait_queue); ++ system->wait_queue = NULL; ++ } ++ ++ /* Free all waiters in empty list */ ++ waiter = system->waiter_empty_list; ++ while (NULL != waiter) { ++ next = waiter->tracker_next; ++ _mali_osk_free(waiter); ++ waiter = next; ++ } ++ ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ if (NULL != system->signaled_sync_tl) { ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ sync_timeline_destroy(system->signaled_sync_tl); +#else -+ sync_timeline->fence_context = fence_context_alloc(1); ++ mali_internal_sync_timeline_destroy(system->signaled_sync_tl); +#endif -+ strlcpy(sync_timeline->name, name, sizeof(sync_timeline->name)); ++ } + -+ INIT_LIST_HEAD(&sync_timeline->sync_pt_list_head); -+ spin_lock_init(&sync_timeline->sync_pt_list_lock); ++ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { ++ if ((NULL != system->timelines[i]) && (NULL != system->timelines[i]->spinlock)) { ++ mali_spinlock_reentrant_wait(system->timelines[i]->spinlock, tid); ++ system->timelines[i]->destroyed = MALI_TRUE; ++ mali_spinlock_reentrant_signal(system->timelines[i]->spinlock, tid); ++ } ++ } ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ + -+ return sync_timeline; -+err: -+ if (NULL != sync_timeline) { -+ kfree(sync_timeline); ++ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { ++ if (NULL != system->timelines[i]) { ++ mali_timeline_destroy(system->timelines[i]); ++ } ++ } ++ ++ if (NULL != system->spinlock) { ++ mali_spinlock_reentrant_term(system->spinlock); ++ } ++ ++ _mali_osk_free(system); + } -+ return NULL; +} + -+void mali_internal_sync_timeline_destroy(struct mali_internal_sync_timeline *sync_timeline) ++/** ++ * Find how many waiters are needed for a given fence. ++ * ++ * @param fence The fence to check. ++ * @return Number of waiters needed for fence. ++ */ ++static u32 mali_timeline_fence_num_waiters(struct mali_timeline_fence *fence) +{ -+ MALI_DEBUG_ASSERT_POINTER(sync_timeline); ++ u32 i, num_waiters = 0; + -+ sync_timeline->destroyed = MALI_TRUE; ++ MALI_DEBUG_ASSERT_POINTER(fence); + -+ smp_wmb(); ++ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { ++ if (MALI_TIMELINE_NO_POINT != fence->points[i]) { ++ ++num_waiters; ++ } ++ } + -+ mali_internal_sync_timeline_signal(sync_timeline); -+ kref_put(&sync_timeline->kref_count, mali_internal_sync_timeline_free); ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ if (-1 != fence->sync_fd) ++num_waiters; ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++ ++ return num_waiters; +} + -+void mali_internal_sync_timeline_signal(struct mali_internal_sync_timeline *sync_timeline) ++static struct mali_timeline_waiter *mali_timeline_system_get_zeroed_waiter(struct mali_timeline_system *system) +{ -+ unsigned long flags; -+ struct mali_internal_sync_point *sync_pt, *next; -+ -+ MALI_DEBUG_ASSERT_POINTER(sync_timeline); ++ struct mali_timeline_waiter *waiter; + -+ spin_lock_irqsave(&sync_timeline->sync_pt_list_lock, flags); ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); + -+ list_for_each_entry_safe(sync_pt, next, &sync_timeline->sync_pt_list_head, -+ sync_pt_list) { -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ if (dma_fence_is_signaled_locked(&sync_pt->base)) -+#else -+ if (fence_is_signaled_locked(&sync_pt->base)) -+#endif -+ list_del_init(&sync_pt->sync_pt_list); ++ waiter = system->waiter_empty_list; ++ if (NULL != waiter) { ++ /* Remove waiter from empty list and zero it */ ++ system->waiter_empty_list = waiter->tracker_next; ++ _mali_osk_memset(waiter, 0, sizeof(*waiter)); + } + -+ spin_unlock_irqrestore(&sync_timeline->sync_pt_list_lock, flags); ++ /* Return NULL if list was empty. */ ++ return waiter; +} + -+struct mali_internal_sync_point *mali_internal_sync_point_create(struct mali_internal_sync_timeline *sync_timeline, int size) ++static void mali_timeline_system_allocate_waiters(struct mali_timeline_system *system, ++ struct mali_timeline_waiter **tail, ++ struct mali_timeline_waiter **head, ++ int max_num_waiters) +{ -+ unsigned long flags; -+ struct mali_internal_sync_point *sync_pt = NULL; ++ u32 i, tid = _mali_osk_get_tid(); ++ mali_bool do_alloc; ++ struct mali_timeline_waiter *waiter; + -+ MALI_DEBUG_ASSERT_POINTER(sync_timeline); ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT_POINTER(tail); ++ MALI_DEBUG_ASSERT_POINTER(head); + -+ if (size < sizeof(struct mali_internal_sync_point)) { -+ MALI_PRINT_ERROR(("Mali internal sync:Invalid size to create the mali internal sync point.\n")); -+ goto err; -+ } ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); + -+ sync_pt = kzalloc(size, GFP_KERNEL); -+ if (NULL == sync_pt) { -+ MALI_PRINT_ERROR(("Mali internal sync:Failed to allocate buffer for the mali internal sync point.\n")); -+ goto err; ++ *head = *tail = NULL; ++ do_alloc = MALI_FALSE; ++ i = 0; ++ while (i < max_num_waiters) { ++ if (MALI_FALSE == do_alloc) { ++ waiter = mali_timeline_system_get_zeroed_waiter(system); ++ if (NULL == waiter) { ++ do_alloc = MALI_TRUE; ++ mali_spinlock_reentrant_signal(system->spinlock, tid); ++ continue; ++ } ++ } else { ++ waiter = _mali_osk_calloc(1, sizeof(struct mali_timeline_waiter)); ++ if (NULL == waiter) break; ++ } ++ ++i; ++ if (NULL == *tail) { ++ *tail = waiter; ++ *head = waiter; ++ } else { ++ (*head)->tracker_next = waiter; ++ *head = waiter; ++ } + } -+ spin_lock_irqsave(&sync_timeline->sync_pt_list_lock, flags); -+ kref_get(&sync_timeline->kref_count); -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ dma_fence_init(&sync_pt->base, &fence_ops, &sync_timeline->sync_pt_list_lock, -+ sync_timeline->fence_context, ++sync_timeline->value); -+#else -+ fence_init(&sync_pt->base, &fence_ops, &sync_timeline->sync_pt_list_lock, -+ sync_timeline->fence_context, ++sync_timeline->value); -+#endif -+ INIT_LIST_HEAD(&sync_pt->sync_pt_list); -+ spin_unlock_irqrestore(&sync_timeline->sync_pt_list_lock, flags); -+ -+ return sync_pt; -+err: -+ if (NULL != sync_pt) { -+ kfree(sync_pt); ++ if (MALI_TRUE == do_alloc) { ++ mali_spinlock_reentrant_wait(system->spinlock, tid); + } -+ return NULL; +} + -+struct mali_internal_sync_fence *mali_internal_sync_fence_fdget(int fd) ++/** ++ * Create waiters for the given tracker. The tracker is activated when all waiters are release. ++ * ++ * @note Tracker can potentially be activated before this function returns. ++ * ++ * @param system Timeline system. ++ * @param tracker Tracker we will create waiters for. ++ * @param waiter_tail List of pre-allocated waiters. ++ * @param waiter_head List of pre-allocated waiters. ++ */ ++static void mali_timeline_system_create_waiters_and_unlock(struct mali_timeline_system *system, ++ struct mali_timeline_tracker *tracker, ++ struct mali_timeline_waiter *waiter_tail, ++ struct mali_timeline_waiter *waiter_head) +{ -+ struct file *file = fget(fd); ++ int i; ++ u32 tid = _mali_osk_get_tid(); ++ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ struct sync_fence *sync_fence = NULL; ++#else ++ struct mali_internal_sync_fence *sync_fence = NULL; ++#endif ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ + -+ if (NULL == file) { -+ return NULL; -+ } ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT_POINTER(tracker); + -+ return file->private_data; -+} ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) -+struct mali_internal_sync_fence *mali_internal_sync_fence_merge( -+ struct mali_internal_sync_fence *sync_fence1, struct mali_internal_sync_fence *sync_fence2) -+{ -+ struct mali_internal_sync_fence *new_sync_fence; -+ int i, j, num_fence1, num_fence2, total_fences; -+ struct fence *fence0 = NULL; ++ MALI_DEBUG_ASSERT(NULL == tracker->waiter_head); ++ MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail); ++ MALI_DEBUG_ASSERT(NULL != tracker->job); + -+ MALI_DEBUG_ASSERT_POINTER(sync_fence1); -+ MALI_DEBUG_ASSERT_POINTER(sync_fence2); ++ /* Creating waiter object for all the timelines the fence is put on. Inserting this waiter ++ * into the timelines sorted list of waiters */ ++ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { ++ mali_timeline_point point; ++ struct mali_timeline *timeline; ++ struct mali_timeline_waiter *waiter; + -+ num_fence1 = sync_fence1->num_fences; -+ num_fence2 = sync_fence2->num_fences; ++ /* Get point on current timeline from tracker's fence. */ ++ point = tracker->fence.points[i]; + -+ total_fences = num_fence1 + num_fence2; ++ if (likely(MALI_TIMELINE_NO_POINT == point)) { ++ /* Fence contains no point on this timeline so we don't need a waiter. */ ++ continue; ++ } + -+ i = 0; -+ j = 0; ++ timeline = system->timelines[i]; ++ MALI_DEBUG_ASSERT_POINTER(timeline); + -+ if (num_fence1 > 0) { -+ fence0 = sync_fence1->cbs[i].fence; -+ i = 1; -+ } else if (num_fence2 > 0) { -+ fence0 = sync_fence2->cbs[i].fence; -+ j = 1; -+ } ++ if (unlikely(!mali_timeline_is_point_valid(timeline, point))) { ++ MALI_PRINT_ERROR(("Mali Timeline: point %d is not valid (oldest=%d, next=%d)\n", ++ point, timeline->point_oldest, timeline->point_next)); ++ continue; ++ } + -+ new_sync_fence = (struct mali_internal_sync_fence *)sync_file_create(fence0); -+ if (NULL == new_sync_fence) { -+ MALI_PRINT_ERROR(("Mali internal sync:Failed to create the mali internal sync fence when merging sync fence.\n")); -+ return NULL; -+ } ++ if (likely(mali_timeline_is_point_released(timeline, point))) { ++ /* Tracker representing the point has been released so we don't need a ++ * waiter. */ ++ continue; ++ } + -+ fence_remove_callback(new_sync_fence->cb[0].fence, &new_sync_fence->cb[0].cb); -+ new_sync_fence->num_fences = 0; -+ atomic_dec(&new_sync_fence->status); ++ if ((MALI_TIMELINE_SOFT == timeline->id) && mali_timeline_is_tracker_released(timeline, point)) { ++ /* The tracker that the point related to has already been released, so no need to a waiter. */ ++ continue; ++ } + -+ for (; i < num_fence1 && j < num_fence2;) { -+ struct fence *fence1 = sync_fence1->cbs[i].fence; -+ struct fence *fence2 = sync_fence2->cbs[j].fence; ++ /* The point is on timeline. */ ++ MALI_DEBUG_ASSERT(mali_timeline_is_point_on(timeline, point)); + -+ if (fence1->context < fence2->context) { -+ mali_internal_sync_fence_add_fence(new_sync_fence, fence1); ++ /* Get a new zeroed waiter object. */ ++ if (likely(NULL != waiter_tail)) { ++ waiter = waiter_tail; ++ waiter_tail = waiter_tail->tracker_next; ++ } else { ++ MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n")); ++ continue; ++ } + -+ i++; -+ } else if (fence1->context > fence2->context) { -+ mali_internal_sync_fence_add_fence(new_sync_fence, fence2); ++ /* Yanking the trigger ref count of the tracker. */ ++ tracker->trigger_ref_count++; + -+ j++; ++ waiter->point = point; ++ waiter->tracker = tracker; ++ ++ /* Insert waiter on tracker's singly-linked waiter list. */ ++ if (NULL == tracker->waiter_head) { ++ /* list is empty */ ++ MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail); ++ tracker->waiter_tail = waiter; + } else { -+ if (fence1->seqno - fence2->seqno <= INT_MAX) -+ mali_internal_sync_fence_add_fence(new_sync_fence, fence1); -+ else -+ mali_internal_sync_fence_add_fence(new_sync_fence, fence2); -+ i++; -+ j++; ++ tracker->waiter_head->tracker_next = waiter; + } -+ } ++ tracker->waiter_head = waiter; + -+ for (; i < num_fence1; i++) -+ mali_internal_sync_fence_add_fence(new_sync_fence, sync_fence1->cbs[i].fence); ++ /* Add waiter to timeline. */ ++ mali_timeline_insert_waiter(timeline, waiter); ++ } ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ if (-1 != tracker->fence.sync_fd) { ++ int ret; ++ struct mali_timeline_waiter *waiter; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ sync_fence = sync_fence_fdget(tracker->fence.sync_fd); ++#else ++ sync_fence = mali_internal_sync_fence_fdget(tracker->fence.sync_fd); ++#endif ++ if (unlikely(NULL == sync_fence)) { ++ MALI_PRINT_ERROR(("Mali Timeline: failed to get sync fence from fd %d\n", tracker->fence.sync_fd)); ++ goto exit; ++ } + -+ for (; j < num_fence2; j++) -+ mali_internal_sync_fence_add_fence(new_sync_fence, sync_fence2->cbs[j].fence); ++ /* Check if we have a zeroed waiter object available. */ ++ if (unlikely(NULL == waiter_tail)) { ++ MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n")); ++ goto exit; ++ } + -+ return new_sync_fence; -+} -+#else -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) -+static struct fence **mali_internal_get_fences(struct mali_internal_sync_fence *sync_fence, int *num_fences) ++ /* Start asynchronous wait that will release waiter when the fence is signaled. */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ sync_fence_waiter_init(&tracker->sync_fence_waiter, mali_timeline_sync_fence_callback); ++ ret = sync_fence_wait_async(sync_fence, &tracker->sync_fence_waiter); +#else -+static struct dma_fence **mali_internal_get_fences(struct mali_internal_sync_fence *sync_fence, int *num_fences) ++ mali_internal_sync_fence_waiter_init(&tracker->sync_fence_waiter, mali_timeline_sync_fence_callback); ++ ret = mali_internal_sync_fence_wait_async(sync_fence, &tracker->sync_fence_waiter); +#endif -+{ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) -+ if (sync_fence->fence->ops == &fence_array_ops) { -+ struct fence_array *fence_array = container_of(sync_fence->fence, struct fence_array, base); -+ *num_fences = fence_array->num_fences; -+ return fence_array->fences; ++ if (1 == ret) { ++ /* Fence already signaled, no waiter needed. */ ++ tracker->fence.sync_fd = -1; ++ goto exit; ++ } else if (0 != ret) { ++ MALI_PRINT_ERROR(("Mali Timeline: sync fence fd %d signaled with error %d\n", tracker->fence.sync_fd, ret)); ++ tracker->activation_error |= MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT; ++ goto exit; ++ } ++ ++ /* Grab new zeroed waiter object. */ ++ waiter = waiter_tail; ++ waiter_tail = waiter_tail->tracker_next; ++ ++ /* Increase the trigger ref count of the tracker. */ ++ tracker->trigger_ref_count++; ++ ++ waiter->point = MALI_TIMELINE_NO_POINT; ++ waiter->tracker = tracker; ++ ++ /* Insert waiter on tracker's singly-linked waiter list. */ ++ if (NULL == tracker->waiter_head) { ++ /* list is empty */ ++ MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail); ++ tracker->waiter_tail = waiter; ++ } else { ++ tracker->waiter_head->tracker_next = waiter; ++ } ++ tracker->waiter_head = waiter; ++ ++ /* Also store waiter in separate field for easy access by sync callback. */ ++ tracker->waiter_sync = waiter; ++ ++ /* Store the sync fence in tracker so we can retrieve in abort session, if needed. */ ++ tracker->sync_fence = sync_fence; ++ ++ sync_fence = NULL; + } -+#else -+ if (sync_fence->fence->ops == &dma_fence_array_ops) { -+ struct dma_fence_array *fence_array = container_of(sync_fence->fence, struct dma_fence_array, base); -+ *num_fences = fence_array->num_fences; -+ return fence_array->fences; ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)*/ ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) ++ if ((NULL != tracker->timeline) && (MALI_TIMELINE_PP == tracker->timeline->id)) { ++ ++ struct mali_pp_job *job = (struct mali_pp_job *)tracker->job; ++ ++ if (0 < job->dma_fence_context.num_dma_fence_waiter) { ++ struct mali_timeline_waiter *waiter; ++ /* Check if we have a zeroed waiter object available. */ ++ if (unlikely(NULL == waiter_tail)) { ++ MALI_PRINT_ERROR(("Mali Timeline: failed to allocate memory for waiter\n")); ++ goto exit; ++ } ++ ++ /* Grab new zeroed waiter object. */ ++ waiter = waiter_tail; ++ waiter_tail = waiter_tail->tracker_next; ++ ++ /* Increase the trigger ref count of the tracker. */ ++ tracker->trigger_ref_count++; ++ ++ waiter->point = MALI_TIMELINE_NO_POINT; ++ waiter->tracker = tracker; ++ ++ /* Insert waiter on tracker's singly-linked waiter list. */ ++ if (NULL == tracker->waiter_head) { ++ /* list is empty */ ++ MALI_DEBUG_ASSERT(NULL == tracker->waiter_tail); ++ tracker->waiter_tail = waiter; ++ } else { ++ tracker->waiter_head->tracker_next = waiter; ++ } ++ tracker->waiter_head = waiter; ++ ++ /* Also store waiter in separate field for easy access by sync callback. */ ++ tracker->waiter_dma_fence = waiter; ++ } + } -+#endif -+ *num_fences = 1; -+ return &sync_fence->fence; -+} ++#endif /* defined(CONFIG_MALI_DMA_BUF_FENCE)*/ + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) -+static void mali_internal_add_fence_array(struct fence **fences, int *num_fences, struct fence *fence) -+#else -+static void mali_internal_add_fence_array(struct dma_fence **fences, int *num_fences, struct dma_fence *fence) -+#endif -+{ -+ fences[*num_fences] = fence; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) -+ if (!fence_is_signaled(fence)) { -+ fence_get(fence); -+ (*num_fences)++; ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) ||defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++exit: ++#endif /* defined(CONFIG_MALI_DMA_BUF_FENCE) || defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++ ++ if (NULL != waiter_tail) { ++ mali_timeline_system_release_waiter_list(system, waiter_tail, waiter_head); + } -+#else -+ if (!dma_fence_is_signaled(fence)) { -+ dma_fence_get(fence); -+ (*num_fences)++; ++ ++ /* Release the initial trigger ref count. */ ++ tracker->trigger_ref_count--; ++ ++ /* If there were no waiters added to this tracker we activate immediately. */ ++ if (0 == tracker->trigger_ref_count) { ++ schedule_mask |= mali_timeline_tracker_activate(tracker); + } -+#endif -+} + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) -+static int mali_internal_sync_fence_set_fence_array(struct mali_internal_sync_fence *sync_fence, -+ struct fence **fences, int num_fences) -+#else -+static int mali_internal_sync_fence_set_fence_array(struct mali_internal_sync_fence *sync_fence, -+ struct dma_fence **fences, int num_fences) -+#endif -+{ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) -+ struct fence_array *array; -+#else -+ struct dma_fence_array *array; -+#endif -+ if(num_fences == 1) { -+ sync_fence->fence =fences[0]; -+ kfree(fences); -+ } else { -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) -+ array = fence_array_create(num_fences, fences, -+ fence_context_alloc(1), 1, false); ++ mali_spinlock_reentrant_signal(system->spinlock, tid); ++ ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ if (NULL != sync_fence) { ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ sync_fence_put(sync_fence); +#else -+ array = dma_fence_array_create(num_fences, fences, -+ dma_fence_context_alloc(1), 1, false); ++ fput(sync_fence->file); +#endif -+ if (!array){ -+ return -ENOMEM; -+ } -+ sync_fence->fence = &array->base; + } -+ return 0; ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++ ++ mali_executor_schedule_from_mask(schedule_mask, MALI_FALSE); +} + -+struct mali_internal_sync_fence *mali_internal_sync_fence_merge( -+ struct mali_internal_sync_fence *sync_fence1, struct mali_internal_sync_fence *sync_fence2) ++mali_timeline_point mali_timeline_system_add_tracker(struct mali_timeline_system *system, ++ struct mali_timeline_tracker *tracker, ++ enum mali_timeline_id timeline_id) +{ -+ struct mali_internal_sync_fence *sync_fence; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) -+ struct fence **fences, **nfences, **fences1, **fences2; -+#else -+ struct dma_fence **fences, **nfences, **fences1, **fences2; -+#endif -+ int real_num_fences, i, j, num_fences, num_fences1, num_fences2; ++ int num_waiters = 0; ++ struct mali_timeline_waiter *waiter_tail, *waiter_head; ++ u32 tid = _mali_osk_get_tid(); + -+ fences1 = mali_internal_get_fences(sync_fence1, &num_fences1); -+ fences2 = mali_internal_get_fences(sync_fence2, &num_fences2); ++ mali_timeline_point point = MALI_TIMELINE_NO_POINT; + -+ num_fences = num_fences1 + num_fences2; ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT_POINTER(system->session); ++ MALI_DEBUG_ASSERT_POINTER(tracker); + -+ fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL); -+ if (!fences) { -+ MALI_PRINT_ERROR(("Mali internal sync:Failed to alloc buffer for fences.\n")); -+ goto fences_alloc_failed; -+ } ++ MALI_DEBUG_ASSERT(MALI_FALSE == system->session->is_aborting); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAX > tracker->type); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_TRACKER_MAGIC == tracker->magic); + -+ for (real_num_fences = i = j = 0; i < num_fences1 && j < num_fences2;) { -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) -+ struct fence *fence1 = fences1[i]; -+ struct fence *fence2 = fences2[j]; -+#else -+ struct dma_fence *fence1 = fences1[i]; -+ struct dma_fence *fence2 = fences2[j]; -+#endif -+ if (fence1->context < fence2->context) { -+ mali_internal_add_fence_array(fences, &real_num_fences, fence1); ++ MALI_DEBUG_PRINT(4, ("Mali Timeline: adding tracker for job %p, timeline: %d\n", tracker->job, timeline_id)); + -+ i++; -+ } else if (fence1->context > fence2->context) { -+ mali_internal_add_fence_array(fences, &real_num_fences, fence2); ++ MALI_DEBUG_ASSERT(0 < tracker->trigger_ref_count); ++ tracker->system = system; + -+ j++; -+ } else { -+ if (fence1->seqno - fence2->seqno <= INT_MAX) -+ mali_internal_add_fence_array(fences, &real_num_fences, fence1); -+ else -+ mali_internal_add_fence_array(fences, &real_num_fences, fence2); ++ mali_spinlock_reentrant_wait(system->spinlock, tid); + -+ i++; -+ j++; -+ } ++ num_waiters = mali_timeline_fence_num_waiters(&tracker->fence); ++ ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) ++ if (MALI_TIMELINE_PP == timeline_id) { ++ struct mali_pp_job *job = (struct mali_pp_job *)tracker->job; ++ if (0 < job->dma_fence_context.num_dma_fence_waiter) ++ num_waiters++; + } ++#endif + -+ for (; i < num_fences1; i++) -+ mali_internal_add_fence_array(fences, &real_num_fences, fences1[i]); ++ /* Allocate waiters. */ ++ mali_timeline_system_allocate_waiters(system, &waiter_tail, &waiter_head, num_waiters); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); + -+ for (; j < num_fences2; j++) -+ mali_internal_add_fence_array(fences, &real_num_fences, fences2[j]); ++ /* Add tracker to timeline. This will allocate a point for the tracker on the timeline. If ++ * timeline ID is MALI_TIMELINE_NONE the tracker will NOT be added to a timeline and the ++ * point will be MALI_TIMELINE_NO_POINT. ++ * ++ * NOTE: the tracker can fail to be added if the timeline is full. If this happens, the ++ * point will be MALI_TIMELINE_NO_POINT. */ ++ MALI_DEBUG_ASSERT(timeline_id < MALI_TIMELINE_MAX || timeline_id == MALI_TIMELINE_NONE); ++ if (likely(timeline_id < MALI_TIMELINE_MAX)) { ++ struct mali_timeline *timeline = system->timelines[timeline_id]; ++ mali_timeline_insert_tracker(timeline, tracker); ++ MALI_DEBUG_ASSERT(!mali_timeline_is_empty(timeline)); ++ } + -+ if (0 == real_num_fences) -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) -+ fences[real_num_fences++] = fence_get(fences1[0]); -+#else -+ fences[real_num_fences++] = dma_fence_get(fences1[0]); -+#endif ++ point = tracker->point; + -+ if (num_fences > real_num_fences) { -+ nfences = krealloc(fences, real_num_fences * sizeof(*fences), -+ GFP_KERNEL); -+ if (!nfences) -+ goto nfences_alloc_failed; ++ /* Create waiters for tracker based on supplied fence. Each waiter will increase the ++ * trigger ref count. */ ++ mali_timeline_system_create_waiters_and_unlock(system, tracker, waiter_tail, waiter_head); ++ tracker = NULL; + -+ fences = nfences; ++ /* At this point the tracker object might have been freed so we should no longer ++ * access it. */ ++ ++ ++ /* The tracker will always be activated after calling add_tracker, even if NO_POINT is ++ * returned. */ ++ return point; ++} ++ ++static mali_scheduler_mask mali_timeline_system_release_waiter(struct mali_timeline_system *system, ++ struct mali_timeline_waiter *waiter) ++{ ++ struct mali_timeline_tracker *tracker; ++ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; ++ ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT_POINTER(waiter); ++ ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_SYSTEM_LOCKED(system)); ++ ++ tracker = waiter->tracker; ++ MALI_DEBUG_ASSERT_POINTER(tracker); ++ ++ /* At this point the waiter has been removed from the timeline's waiter list, but it is ++ * still on the tracker's waiter list. All of the tracker's waiters will be released when ++ * the tracker is activated. */ ++ ++ waiter->point = MALI_TIMELINE_NO_POINT; ++ waiter->tracker = NULL; ++ ++ tracker->trigger_ref_count--; ++ if (0 == tracker->trigger_ref_count) { ++ /* This was the last waiter; activate tracker */ ++ schedule_mask |= mali_timeline_tracker_activate(tracker); ++ tracker = NULL; + } + -+ sync_fence = (struct mali_internal_sync_fence *)sync_file_create(fences[0]); -+ if (NULL == sync_fence) { -+ MALI_PRINT_ERROR(("Mali internal sync:Failed to create the mali internal sync fence when merging sync fence.\n")); -+ goto sync_fence_alloc_failed; ++ return schedule_mask; ++} ++ ++mali_timeline_point mali_timeline_system_get_latest_point(struct mali_timeline_system *system, ++ enum mali_timeline_id timeline_id) ++{ ++ mali_timeline_point point; ++ struct mali_timeline *timeline; ++ u32 tid = _mali_osk_get_tid(); ++ ++ MALI_DEBUG_ASSERT_POINTER(system); ++ ++ if (MALI_TIMELINE_MAX <= timeline_id) { ++ return MALI_TIMELINE_NO_POINT; + } + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) -+ fence_put(fences[0]); -+#else -+ dma_fence_put(fences[0]); -+#endif ++ mali_spinlock_reentrant_wait(system->spinlock, tid); + -+ if (mali_internal_sync_fence_set_fence_array(sync_fence, fences, real_num_fences) < 0) { -+ MALI_PRINT_ERROR(("Mali internal sync:Failed to set fence for sync fence.\n")); -+ goto sync_fence_set_failed; ++ timeline = system->timelines[timeline_id]; ++ MALI_DEBUG_ASSERT_POINTER(timeline); ++ ++ point = MALI_TIMELINE_NO_POINT; ++ if (timeline->point_oldest != timeline->point_next) { ++ point = timeline->point_next - 1; ++ if (MALI_TIMELINE_NO_POINT == point) point--; + } + -+ return sync_fence; ++ mali_spinlock_reentrant_signal(system->spinlock, tid); + -+sync_fence_set_failed: -+ fput(sync_fence->file); -+sync_fence_alloc_failed: -+ for (i = 0; i < real_num_fences; i++) -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) -+ fence_put(fences[i]); -+#else -+ dma_fence_put(fences[i]); -+#endif -+nfences_alloc_failed: -+ kfree(fences); -+fences_alloc_failed: -+ return NULL; ++ return point; +} -+#endif + -+void mali_internal_sync_fence_waiter_init(struct mali_internal_sync_fence_waiter *waiter, -+ mali_internal_sync_callback_t callback) ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++static void mali_timeline_do_sync_fence_callback(void *arg) +{ -+ MALI_DEBUG_ASSERT_POINTER(waiter); -+ MALI_DEBUG_ASSERT_POINTER(callback); ++ _MALI_OSK_LIST_HEAD_STATIC_INIT(list); ++ struct mali_timeline_tracker *tracker; ++ struct mali_timeline_tracker *tmp_tracker; ++ u32 tid = _mali_osk_get_tid(); + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) -+ INIT_LIST_HEAD(&waiter->work.entry); ++ MALI_IGNORE(arg); ++ ++ /* ++ * Quickly "unhook" the jobs pending to be deleted, so we can release ++ * the lock before we start deleting the job objects ++ * (without any locks held) ++ */ ++ _mali_osk_spinlock_irq_lock(sync_fence_callback_list_lock); ++ _mali_osk_list_move_list(&sync_fence_callback_queue, &list); ++ _mali_osk_spinlock_irq_unlock(sync_fence_callback_list_lock); ++ ++ _MALI_OSK_LIST_FOREACHENTRY(tracker, tmp_tracker, &list, ++ struct mali_timeline_tracker, sync_fence_signal_list) { ++ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; ++ mali_bool is_aborting = MALI_FALSE; ++ int fence_status = 0; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ struct sync_fence *sync_fence = NULL; +#else -+ INIT_LIST_HEAD(&waiter->work.task_list); ++ struct mali_internal_sync_fence *sync_fence = NULL; +#endif -+ waiter->callback = callback; -+} ++ struct mali_timeline_system *system = NULL; ++ struct mali_timeline_waiter *waiter = NULL; + -+int mali_internal_sync_fence_wait_async(struct mali_internal_sync_fence *sync_fence, -+ struct mali_internal_sync_fence_waiter *waiter) -+{ -+ int err; -+ unsigned long flags; ++ _mali_osk_list_delinit(&tracker->sync_fence_signal_list); + -+ MALI_DEBUG_ASSERT_POINTER(sync_fence); -+ MALI_DEBUG_ASSERT_POINTER(waiter); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) -+ err = atomic_read(&sync_fence->status); ++ sync_fence = tracker->sync_fence; ++ MALI_DEBUG_ASSERT_POINTER(sync_fence); + -+ if (0 > err) -+ return err; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) ++ fence_status = sync_fence->status; ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) ++ fence_status = atomic_read(&sync_fence->status); ++#else ++ fence_status = sync_fence->fence->ops->signaled(sync_fence->fence); ++#endif + -+ if (!err) -+ return 1; ++ system = tracker->system; ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT_POINTER(system->session); + -+ init_waitqueue_func_entry(&waiter->work, mali_internal_sync_fence_wake_up_wq); -+ waiter->work.private = sync_fence; ++ mali_spinlock_reentrant_wait(system->spinlock, tid); + -+ spin_lock_irqsave(&sync_fence->wq.lock, flags); -+ err = atomic_read(&sync_fence->status); ++ is_aborting = system->session->is_aborting; ++ if (!is_aborting && (0 > fence_status)) { ++ MALI_PRINT_ERROR(("Mali Timeline: sync fence fd %d signaled with error %d\n", tracker->fence.sync_fd, fence_status)); ++ tracker->activation_error |= MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT; ++ } + -+ if (0 < err) -+ __add_wait_queue_tail(&sync_fence->wq, &waiter->work); -+ spin_unlock_irqrestore(&sync_fence->wq.lock, flags); ++ waiter = tracker->waiter_sync; ++ MALI_DEBUG_ASSERT_POINTER(waiter); + -+ if (0 > err) -+ return err; ++ tracker->sync_fence = NULL; ++ tracker->fence.sync_fd = -1; + -+ return !err; -+#else -+ if ((sync_fence->fence) && (sync_fence->fence->ops) && (sync_fence->fence->ops->signaled)) -+ err = sync_fence->fence->ops->signaled(sync_fence->fence); -+ else -+ err = -1; ++ schedule_mask |= mali_timeline_system_release_waiter(system, waiter); + -+ if (0 > err) -+ return err; ++ /* If aborting, wake up sleepers that are waiting for sync fence callbacks to complete. */ ++ if (is_aborting) { ++ _mali_osk_wait_queue_wake_up(system->wait_queue); ++ } + -+ if (1 == err) -+ return err; ++ mali_spinlock_reentrant_signal(system->spinlock, tid); + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ err = dma_fence_add_callback(sync_fence->fence, &waiter->cb, mali_internal_fence_check_cb_func); -+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) -+ err = fence_add_callback(sync_fence->fence, &waiter->cb, mali_internal_fence_check_cb_func); -+#endif ++ /* ++ * Older versions of Linux, before 3.5, doesn't support fput() in interrupt ++ * context. For those older kernels, allocate a list object and put the ++ * fence object on that and defer the call to sync_fence_put() to a workqueue. ++ */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) ++ { ++ struct mali_deferred_fence_put_entry *obj; + -+ if (0 != err) { -+ if (-ENOENT == err) -+ err = 1; -+ return err; -+ } -+ init_waitqueue_func_entry(&waiter->work, mali_internal_sync_fence_wake_up_wq); -+ waiter->work.private = sync_fence; ++ obj = kzalloc(sizeof(struct mali_deferred_fence_put_entry), GFP_ATOMIC); ++ if (obj) { ++ unsigned long flags; ++ mali_bool schedule = MALI_FALSE; + -+ spin_lock_irqsave(&sync_fence->wq.lock, flags); -+ err = sync_fence->fence->ops->signaled(sync_fence->fence); ++ obj->fence = sync_fence; + -+ if (0 == err){ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) -+ __add_wait_queue_entry_tail(&sync_fence->wq, &waiter->work); ++ spin_lock_irqsave(&mali_timeline_sync_fence_to_free_lock, flags); ++ if (hlist_empty(&mali_timeline_sync_fence_to_free_list)) ++ schedule = MALI_TRUE; ++ hlist_add_head(&obj->list, &mali_timeline_sync_fence_to_free_list); ++ spin_unlock_irqrestore(&mali_timeline_sync_fence_to_free_lock, flags); ++ ++ if (schedule) ++ schedule_delayed_work(&delayed_sync_fence_put, 0); ++ } ++ } +#else -+ __add_wait_queue_tail(&sync_fence->wq, &waiter->work); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ sync_fence_put(sync_fence); ++#else ++ fput(sync_fence->file); +#endif -+ } -+ spin_unlock_irqrestore(&sync_fence->wq.lock, flags); ++#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) */ + -+ return err; ++ if (!is_aborting) { ++ mali_executor_schedule_from_mask(schedule_mask, MALI_TRUE); ++ } ++ } ++} ++#endif ++_mali_osk_errcode_t mali_timeline_initialize(void) ++{ ++ _mali_osk_atomic_init(&gp_tracker_count, 0); ++ _mali_osk_atomic_init(&phy_pp_tracker_count, 0); ++ _mali_osk_atomic_init(&virt_pp_tracker_count, 0); ++ ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ sync_fence_callback_list_lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_UNORDERED, _MALI_OSK_LOCK_ORDER_FIRST); ++ if (NULL == sync_fence_callback_list_lock) { ++ return _MALI_OSK_ERR_NOMEM; ++ } ++ ++ sync_fence_callback_work_t = _mali_osk_wq_create_work( ++ mali_timeline_do_sync_fence_callback, NULL); ++ ++ if (NULL == sync_fence_callback_work_t) { ++ return _MALI_OSK_ERR_FAULT; ++ } +#endif ++ return _MALI_OSK_ERR_OK; +} + -+int mali_internal_sync_fence_cancel_async(struct mali_internal_sync_fence *sync_fence, -+ struct mali_internal_sync_fence_waiter *waiter) ++ ++void mali_timeline_terminate(void) +{ -+ unsigned long flags; -+ int ret = 0; ++ _mali_osk_atomic_term(&gp_tracker_count); ++ _mali_osk_atomic_term(&phy_pp_tracker_count); ++ _mali_osk_atomic_term(&virt_pp_tracker_count); + -+ MALI_DEBUG_ASSERT_POINTER(sync_fence); -+ MALI_DEBUG_ASSERT_POINTER(waiter); ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ if (NULL != sync_fence_callback_list_lock) { ++ _mali_osk_spinlock_irq_term(sync_fence_callback_list_lock); ++ sync_fence_callback_list_lock = NULL; ++ } + -+ spin_lock_irqsave(&sync_fence->wq.lock, flags); -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) -+ if (!list_empty(&waiter->work.entry)) -+ list_del_init(&waiter->work.entry); -+#else -+ if (!list_empty(&waiter->work.task_list)) -+ list_del_init(&waiter->work.task_list); ++ if (NULL != sync_fence_callback_work_t) { ++ _mali_osk_wq_delete_work(sync_fence_callback_work_t); ++ sync_fence_callback_work_t = NULL; ++ } +#endif -+ else -+ ret = -ENOENT; -+ spin_unlock_irqrestore(&sync_fence->wq.lock, flags); ++} + -+ if (0 == ret) { -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ dma_fence_remove_callback(sync_fence->fence, &waiter->cb); -+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) -+ fence_remove_callback(sync_fence->fence, &waiter->cb); -+#endif ++#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS) ++ ++static mali_bool is_waiting_on_timeline(struct mali_timeline_tracker *tracker, enum mali_timeline_id id) ++{ ++ struct mali_timeline *timeline; ++ struct mali_timeline_system *system; ++ ++ MALI_DEBUG_ASSERT_POINTER(tracker); + ++ MALI_DEBUG_ASSERT_POINTER(tracker->timeline); ++ timeline = tracker->timeline; ++ ++ MALI_DEBUG_ASSERT_POINTER(timeline->system); ++ system = timeline->system; ++ ++ if (MALI_TIMELINE_MAX > id) { ++ if (MALI_TIMELINE_NO_POINT != tracker->fence.points[id]) { ++ return mali_timeline_is_point_on(system->timelines[id], tracker->fence.points[id]); ++ } else { ++ return MALI_FALSE; ++ } ++ } else { ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_NONE == id); ++ return MALI_FALSE; + } ++} + -+ return ret; ++static const char *timeline_id_to_string(enum mali_timeline_id id) ++{ ++ switch (id) { ++ case MALI_TIMELINE_GP: ++ return "GP"; ++ case MALI_TIMELINE_PP: ++ return "PP"; ++ case MALI_TIMELINE_SOFT: ++ return "SOFT"; ++ default: ++ return "NONE"; ++ } +} + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+static const char *mali_internal_fence_get_driver_name(struct dma_fence *fence) -+#else -+static const char *mali_internal_fence_get_driver_name(struct fence *fence) -+#endif ++static const char *timeline_tracker_type_to_string(enum mali_timeline_tracker_type type) +{ -+ struct mali_internal_sync_point *sync_pt; -+ struct mali_internal_sync_timeline *parent; ++ switch (type) { ++ case MALI_TIMELINE_TRACKER_GP: ++ return "GP"; ++ case MALI_TIMELINE_TRACKER_PP: ++ return "PP"; ++ case MALI_TIMELINE_TRACKER_SOFT: ++ return "SOFT"; ++ case MALI_TIMELINE_TRACKER_WAIT: ++ return "WAIT"; ++ case MALI_TIMELINE_TRACKER_SYNC: ++ return "SYNC"; ++ default: ++ return "INVALID"; ++ } ++} + -+ MALI_DEBUG_ASSERT_POINTER(fence); ++mali_timeline_tracker_state mali_timeline_debug_get_tracker_state(struct mali_timeline_tracker *tracker) ++{ ++ struct mali_timeline *timeline = NULL; + -+ sync_pt = mali_internal_fence_to_sync_pt(fence); -+ parent = mali_internal_sync_pt_to_sync_timeline(sync_pt); ++ MALI_DEBUG_ASSERT_POINTER(tracker); ++ timeline = tracker->timeline; + -+ return parent->ops->driver_name; ++ if (0 != tracker->trigger_ref_count) { ++ return MALI_TIMELINE_TS_WAITING; ++ } ++ ++ if (timeline && (timeline->tracker_tail == tracker || NULL != tracker->timeline_prev)) { ++ return MALI_TIMELINE_TS_ACTIVE; ++ } ++ ++ if (timeline && (MALI_TIMELINE_NO_POINT == tracker->point)) { ++ return MALI_TIMELINE_TS_INIT; ++ } ++ ++ return MALI_TIMELINE_TS_FINISH; +} + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+static const char *mali_internal_fence_get_timeline_name(struct dma_fence *fence) -+#else -+static const char *mali_internal_fence_get_timeline_name(struct fence *fence) -+#endif ++void mali_timeline_debug_print_tracker(struct mali_timeline_tracker *tracker, _mali_osk_print_ctx *print_ctx) +{ -+ struct mali_internal_sync_point *sync_pt; -+ struct mali_internal_sync_timeline *parent; ++ const char *tracker_state = "IWAF"; ++ char state_char = 'I'; ++ char tracker_type[32] = {0}; + -+ MALI_DEBUG_ASSERT_POINTER(fence); ++ MALI_DEBUG_ASSERT_POINTER(tracker); + -+ sync_pt = mali_internal_fence_to_sync_pt(fence); -+ parent = mali_internal_sync_pt_to_sync_timeline(sync_pt); ++ state_char = *(tracker_state + mali_timeline_debug_get_tracker_state(tracker)); ++ _mali_osk_snprintf(tracker_type, sizeof(tracker_type), "%s", timeline_tracker_type_to_string(tracker->type)); + -+ return parent->name; -+} ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ if (0 != tracker->trigger_ref_count) { ++ if (print_ctx) ++ _mali_osk_ctxprintf(print_ctx, "TL: %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u), fd:%d, fence:(0x%08X)] job:(0x%08X)\n", ++ tracker_type, tracker->point, state_char, tracker->trigger_ref_count, ++ is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0], ++ is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1], ++ is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2], ++ tracker->fence.sync_fd, (unsigned int)(uintptr_t)(tracker->sync_fence), (unsigned int)(uintptr_t)(tracker->job)); ++ else ++ MALI_DEBUG_PRINT(2, ("TL: %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u), fd:%d, fence:(0x%08X)] job:(0x%08X)\n", ++ tracker_type, tracker->point, state_char, tracker->trigger_ref_count, ++ is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0], ++ is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1], ++ is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2], ++ tracker->fence.sync_fd, (unsigned int)(uintptr_t)(tracker->sync_fence), (unsigned int)(uintptr_t)(tracker->job))); ++ } else { ++ if (print_ctx) ++ _mali_osk_ctxprintf(print_ctx, "TL: %s %u %c fd:%d fence:(0x%08X) job:(0x%08X)\n", ++ tracker_type, tracker->point, state_char, ++ tracker->fence.sync_fd, (unsigned int)(uintptr_t)(tracker->sync_fence), (unsigned int)(uintptr_t)(tracker->job)); ++ else ++ MALI_DEBUG_PRINT(2, ("TL: %s %u %c fd:%d fence:(0x%08X) job:(0x%08X)\n", ++ tracker_type, tracker->point, state_char, ++ tracker->fence.sync_fd, (unsigned int)(uintptr_t)(tracker->sync_fence), (unsigned int)(uintptr_t)(tracker->job))); + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+static void mali_internal_fence_release(struct dma_fence *fence) ++ } +#else -+static void mali_internal_fence_release(struct fence *fence) ++ if (0 != tracker->trigger_ref_count) { ++ if (print_ctx) ++ _mali_osk_ctxprintf(print_ctx, "TL: %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u)] job:(0x%08X)\n", ++ tracker_type, tracker->point, state_char, tracker->trigger_ref_count, ++ is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0], ++ is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1], ++ is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2], ++ (unsigned int)(uintptr_t)(tracker->job)); ++ else ++ MALI_DEBUG_PRINT(2, ("TL: %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u)] job:(0x%08X)\n", ++ tracker_type, tracker->point, state_char, tracker->trigger_ref_count, ++ is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0], ++ is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1], ++ is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2], ++ (unsigned int)(uintptr_t)(tracker->job))); ++ } else { ++ if (print_ctx) ++ _mali_osk_ctxprintf(print_ctx, "TL: %s %u %c job:(0x%08X)\n", ++ tracker_type, tracker->point, state_char, ++ (unsigned int)(uintptr_t)(tracker->job)); ++ else ++ MALI_DEBUG_PRINT(2, ("TL: %s %u %c job:(0x%08X)\n", ++ tracker_type, tracker->point, state_char, ++ (unsigned int)(uintptr_t)(tracker->job))); ++ ++ } +#endif ++} ++ ++void mali_timeline_debug_print_timeline(struct mali_timeline *timeline, _mali_osk_print_ctx *print_ctx) +{ -+ unsigned long flags; -+ struct mali_internal_sync_point *sync_pt; -+ struct mali_internal_sync_timeline *parent; ++ struct mali_timeline_tracker *tracker = NULL; + -+ MALI_DEBUG_ASSERT_POINTER(fence); ++ MALI_DEBUG_ASSERT_POINTER(timeline); + -+ sync_pt = mali_internal_fence_to_sync_pt(fence); -+ parent = mali_internal_sync_pt_to_sync_timeline(sync_pt); ++ tracker = timeline->tracker_tail; ++ while (NULL != tracker) { ++ mali_timeline_debug_print_tracker(tracker, print_ctx); ++ tracker = tracker->timeline_next; ++ } ++} + ++#if !(LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)) ++void mali_timeline_debug_direct_print_tracker(struct mali_timeline_tracker *tracker) ++{ ++ const char *tracker_state = "IWAF"; ++ char state_char = 'I'; ++ char tracker_type[32] = {0}; + -+ spin_lock_irqsave(fence->lock, flags); -+ if (WARN_ON_ONCE(!list_empty(&sync_pt->sync_pt_list))) -+ list_del(&sync_pt->sync_pt_list); -+ spin_unlock_irqrestore(fence->lock, flags); ++ MALI_DEBUG_ASSERT_POINTER(tracker); + -+ if (parent->ops->free_pt) -+ parent->ops->free_pt(sync_pt); ++ state_char = *(tracker_state + mali_timeline_debug_get_tracker_state(tracker)); ++ _mali_osk_snprintf(tracker_type, sizeof(tracker_type), "%s", timeline_tracker_type_to_string(tracker->type)); + -+ kref_put(&parent->kref_count, mali_internal_sync_timeline_free); -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ dma_fence_free(&sync_pt->base); ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ if (0 != tracker->trigger_ref_count) { ++ MALI_PRINT(("TL: %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u), fd:%d, fence:(0x%08X)] job:(0x%08X)\n", ++ tracker_type, tracker->point, state_char, tracker->trigger_ref_count, ++ is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0], ++ is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1], ++ is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2], ++ tracker->fence.sync_fd, tracker->sync_fence, tracker->job)); ++ } else { ++ MALI_PRINT(("TL: %s %u %c fd:%d fence:(0x%08X) job:(0x%08X)\n", ++ tracker_type, tracker->point, state_char, ++ tracker->fence.sync_fd, tracker->sync_fence, tracker->job)); ++ } +#else -+ fence_free(&sync_pt->base); ++ if (0 != tracker->trigger_ref_count) { ++ MALI_PRINT(("TL: %s %u %c - ref_wait:%u [%s(%u),%s(%u),%s(%u)] job:(0x%08X)\n", ++ tracker_type, tracker->point, state_char, tracker->trigger_ref_count, ++ is_waiting_on_timeline(tracker, MALI_TIMELINE_GP) ? "WaitGP" : " ", tracker->fence.points[0], ++ is_waiting_on_timeline(tracker, MALI_TIMELINE_PP) ? "WaitPP" : " ", tracker->fence.points[1], ++ is_waiting_on_timeline(tracker, MALI_TIMELINE_SOFT) ? "WaitSOFT" : " ", tracker->fence.points[2], ++ tracker->job)); ++ } else { ++ MALI_PRINT(("TL: %s %u %c job:(0x%08X)\n", ++ tracker_type, tracker->point, state_char, ++ tracker->job)); ++ } +#endif +} + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+static bool mali_internal_fence_signaled(struct dma_fence *fence) -+#else -+static bool mali_internal_fence_signaled(struct fence *fence) -+#endif ++void mali_timeline_debug_direct_print_timeline(struct mali_timeline *timeline) +{ -+ int ret; -+ struct mali_internal_sync_point *sync_pt; -+ struct mali_internal_sync_timeline *parent; -+ -+ MALI_DEBUG_ASSERT_POINTER(fence); ++ struct mali_timeline_tracker *tracker = NULL; + -+ sync_pt = mali_internal_fence_to_sync_pt(fence); -+ parent = mali_internal_sync_pt_to_sync_timeline(sync_pt); ++ MALI_DEBUG_ASSERT_POINTER(timeline); + -+ ret = parent->ops->has_signaled(sync_pt); -+ if (0 > ret) -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) \ -+ || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 68))) -+ fence->error = ret; -+#else -+ fence->status = ret; -+#endif -+ return ret; ++ tracker = timeline->tracker_tail; ++ while (NULL != tracker) { ++ mali_timeline_debug_direct_print_tracker(tracker); ++ tracker = tracker->timeline_next; ++ } +} + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+static bool mali_internal_fence_enable_signaling(struct dma_fence *fence) -+#else -+static bool mali_internal_fence_enable_signaling(struct fence *fence) +#endif ++ ++void mali_timeline_debug_print_system(struct mali_timeline_system *system, _mali_osk_print_ctx *print_ctx) +{ -+ struct mali_internal_sync_point *sync_pt; -+ struct mali_internal_sync_timeline *parent; ++ int i; ++ int num_printed = 0; ++ u32 tid = _mali_osk_get_tid(); + -+ MALI_DEBUG_ASSERT_POINTER(fence); ++ MALI_DEBUG_ASSERT_POINTER(system); + -+ sync_pt = mali_internal_fence_to_sync_pt(fence); -+ parent = mali_internal_sync_pt_to_sync_timeline(sync_pt); ++ mali_spinlock_reentrant_wait(system->spinlock, tid); + -+ if (mali_internal_fence_signaled(fence)) -+ return false; ++ /* Print all timelines */ ++ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { ++ struct mali_timeline *timeline = system->timelines[i]; + -+ list_add_tail(&sync_pt->sync_pt_list, &parent->sync_pt_list_head); -+ return true; ++ MALI_DEBUG_ASSERT_POINTER(timeline); ++ ++ if (NULL == timeline->tracker_head) continue; ++ if (print_ctx) ++ _mali_osk_ctxprintf(print_ctx, "TL: Timeline %s:\n", ++ timeline_id_to_string((enum mali_timeline_id)i)); ++ else ++ MALI_DEBUG_PRINT(2, ("TL: Timeline %s: oldest (%u) next(%u)\n", ++ timeline_id_to_string((enum mali_timeline_id)i), timeline->point_oldest, timeline->point_next)); ++ ++ mali_timeline_debug_print_timeline(timeline, print_ctx); ++ num_printed++; ++ } ++ ++ if (0 == num_printed) { ++ if (print_ctx) ++ _mali_osk_ctxprintf(print_ctx, "TL: All timelines empty\n"); ++ else ++ MALI_DEBUG_PRINT(2, ("TL: All timelines empty\n")); ++ } ++ ++ mali_spinlock_reentrant_signal(system->spinlock, tid); +} + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+static void mali_internal_fence_value_str(struct dma_fence *fence, char *str, int size) -+#else -+static void mali_internal_fence_value_str(struct fence *fence, char *str, int size) -+#endif ++#endif /* defined(MALI_TIMELINE_DEBUG_FUNCTIONS) */ ++ ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) ++void mali_timeline_dma_fence_callback(void *pp_job_ptr) +{ -+ struct mali_internal_sync_point *sync_pt; -+ struct mali_internal_sync_timeline *parent; ++ struct mali_timeline_system *system; ++ struct mali_timeline_waiter *waiter; ++ struct mali_timeline_tracker *tracker; ++ struct mali_pp_job *pp_job = (struct mali_pp_job *)pp_job_ptr; ++ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; ++ u32 tid = _mali_osk_get_tid(); ++ mali_bool is_aborting = MALI_FALSE; + -+ MALI_DEBUG_ASSERT_POINTER(fence); -+ MALI_IGNORE(str); -+ MALI_IGNORE(size); ++ MALI_DEBUG_ASSERT_POINTER(pp_job); + -+ sync_pt = mali_internal_fence_to_sync_pt(fence); -+ parent = mali_internal_sync_pt_to_sync_timeline(sync_pt); ++ tracker = &pp_job->tracker; ++ MALI_DEBUG_ASSERT_POINTER(tracker); + -+ parent->ops->print_sync_pt(sync_pt); -+} ++ system = tracker->system; ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT_POINTER(system->session); + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+static const struct dma_fence_ops fence_ops = { -+#else -+static const struct fence_ops fence_ops = { -+#endif -+ .get_driver_name = mali_internal_fence_get_driver_name, -+ .get_timeline_name = mali_internal_fence_get_timeline_name, -+ .enable_signaling = mali_internal_fence_enable_signaling, -+ .signaled = mali_internal_fence_signaled, -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ .wait = dma_fence_default_wait, -+#else -+ .wait = fence_default_wait, -+#endif -+ .release = mali_internal_fence_release, -+ .fence_value_str = mali_internal_fence_value_str, -+}; ++ mali_spinlock_reentrant_wait(system->spinlock, tid); ++ ++ waiter = tracker->waiter_dma_fence; ++ MALI_DEBUG_ASSERT_POINTER(waiter); ++ ++ schedule_mask |= mali_timeline_system_release_waiter(system, waiter); ++ ++ is_aborting = system->session->is_aborting; ++ ++ /* If aborting, wake up sleepers that are waiting for dma fence callbacks to complete. */ ++ if (is_aborting) { ++ _mali_osk_wait_queue_wake_up(system->wait_queue); ++ } ++ ++ mali_spinlock_reentrant_signal(system->spinlock, tid); ++ ++ if (!is_aborting) { ++ mali_executor_schedule_from_mask(schedule_mask, MALI_TRUE); ++ } ++} +#endif -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_internal_sync.h b/drivers/gpu/arm/mali400/mali/linux/mali_internal_sync.h +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_timeline.h b/drivers/gpu/arm/mali400/mali/common/mali_timeline.h new file mode 100644 -index 000000000..dbb29222b +index 000000000..3e8bfc8fb --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_internal_sync.h -@@ -0,0 +1,191 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_timeline.h +@@ -0,0 +1,587 @@ +/* -+ * Copyright (C) 2012-2015, 2017-2018 ARM Limited. All rights reserved. ++ * Copyright (C) 2013-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -308046,1369 +311203,1073 @@ index 000000000..dbb29222b + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+/** -+ * @file mali_internal_sync.h -+ * -+ * Mali internal structure/interface for sync. -+ */ ++#ifndef __MALI_TIMELINE_H__ ++#define __MALI_TIMELINE_H__ + -+#ifndef _MALI_INTERNAL_SYNC_H -+#define _MALI_INTERNAL_SYNC_H ++#include "mali_osk.h" ++#include "mali_ukk.h" ++#include "mali_session.h" ++#include "mali_kernel_common.h" ++#include "mali_spinlock_reentrant.h" ++#include "mali_sync.h" ++#include "mali_scheduler_types.h" +#include -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0) -+#include -+#include -+#include -+#include -+#include -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0) -+#include -+#else -+#include -+#endif -+ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+#include -+#else -+#include -+#endif -+ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) -+#include -+#else -+#include -+#endif -+#endif -+ -+struct mali_internal_sync_timeline; -+struct mali_internal_sync_point; -+struct mali_internal_sync_fence; -+ -+struct mali_internal_sync_timeline_ops { -+ const char *driver_name; -+ int (*has_signaled)(struct mali_internal_sync_point *pt); -+ void (*free_pt)(struct mali_internal_sync_point *sync_pt); -+ void (*release_obj)(struct mali_internal_sync_timeline *sync_timeline); -+ void (*print_sync_pt)(struct mali_internal_sync_point *sync_pt); -+}; -+ -+struct mali_internal_sync_timeline { -+ struct kref kref_count; -+ const struct mali_internal_sync_timeline_ops *ops; -+ char name[32]; -+ bool destroyed; -+ int fence_context; -+ int value; -+ spinlock_t sync_pt_list_lock; -+ struct list_head sync_pt_list_head; -+}; -+ -+struct mali_internal_sync_point { -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ struct dma_fence base; -+#else -+ struct fence base; -+#endif -+ struct list_head sync_pt_list; -+}; + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) -+struct mali_internal_sync_fence_cb { -+ struct fence_cb cb; -+ struct fence *fence; -+ struct mali_internal_sync_fence *sync_file; -+}; -+#endif ++/** ++ * Soft job timeout. ++ * ++ * Soft jobs have to be signaled as complete after activation. Normally this is done by user space, ++ * but in order to guarantee that every soft job is completed, we also have a timer. ++ */ ++#define MALI_TIMELINE_TIMEOUT_HZ ((unsigned long) (HZ * 3 / 2)) /* 1500 ms. */ + -+struct mali_internal_sync_fence { -+ struct file *file; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) -+ struct kref kref; -+#endif -+ char name[32]; -+#ifdef CONFIG_DEBUG_FS -+ struct list_head sync_file_list; -+#endif -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) -+ int num_fences; -+#endif -+ wait_queue_head_t wq; -+#if LINUX_VERSION_CODE > KERNEL_VERSION(4, 12, 0) -+ unsigned long flags; -+#endif -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) -+ atomic_t status; -+ struct mali_internal_sync_fence_cb cbs[]; -+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) -+ struct fence *fence; -+ struct fence_cb cb; -+#else -+ struct dma_fence *fence; -+ struct dma_fence_cb cb; -+#endif -+}; ++/** ++ * Timeline type. ++ */ ++typedef enum mali_timeline_id { ++ MALI_TIMELINE_GP = MALI_UK_TIMELINE_GP, /**< GP job timeline. */ ++ MALI_TIMELINE_PP = MALI_UK_TIMELINE_PP, /**< PP job timeline. */ ++ MALI_TIMELINE_SOFT = MALI_UK_TIMELINE_SOFT, /**< Soft job timeline. */ ++ MALI_TIMELINE_MAX = MALI_UK_TIMELINE_MAX ++} mali_timeline_id; + -+struct mali_internal_sync_fence_waiter; ++/** ++ * Used by trackers that should not be added to a timeline (@ref mali_timeline_system_add_tracker). ++ */ ++#define MALI_TIMELINE_NONE MALI_TIMELINE_MAX + -+typedef void (*mali_internal_sync_callback_t)(struct mali_internal_sync_fence *sync_fence, -+ struct mali_internal_sync_fence_waiter *waiter); ++/** ++ * Tracker type. ++ */ ++typedef enum mali_timeline_tracker_type { ++ MALI_TIMELINE_TRACKER_GP = 0, /**< Tracker used by GP jobs. */ ++ MALI_TIMELINE_TRACKER_PP = 1, /**< Tracker used by PP jobs. */ ++ MALI_TIMELINE_TRACKER_SOFT = 2, /**< Tracker used by soft jobs. */ ++ MALI_TIMELINE_TRACKER_WAIT = 3, /**< Tracker used for fence wait. */ ++ MALI_TIMELINE_TRACKER_SYNC = 4, /**< Tracker used for sync fence. */ ++ MALI_TIMELINE_TRACKER_MAX = 5, ++} mali_timeline_tracker_type; + -+struct mali_internal_sync_fence_waiter { -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) -+ wait_queue_entry_t work; -+#else -+ wait_queue_t work; -+#endif -+ mali_internal_sync_callback_t callback; -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) -+ struct fence_cb cb; -+#else -+ struct dma_fence_cb cb; -+#endif -+#endif -+}; ++/** ++ * Tracker activation error. ++ */ ++typedef u32 mali_timeline_activation_error; ++#define MALI_TIMELINE_ACTIVATION_ERROR_NONE 0 ++#define MALI_TIMELINE_ACTIVATION_ERROR_SYNC_BIT (1<<1) ++#define MALI_TIMELINE_ACTIVATION_ERROR_FATAL_BIT (1<<0) + +/** -+ * Create a mali internal sync timeline. -+ * @param ops The implementation ops for the mali internal sync timeline -+ * @param size The size to allocate -+ * @param name The sync_timeline name -+ * @return The new mali internal sync timeline if successful, NULL if not. ++ * Type used to represent a point on a timeline. + */ -+struct mali_internal_sync_timeline *mali_internal_sync_timeline_create(const struct mali_internal_sync_timeline_ops *ops, -+ int size, const char *name); ++typedef u32 mali_timeline_point; + +/** -+ * Destroy one mali internal sync timeline. -+ * @param sync_timeline The mali internal sync timeline to destroy. ++ * Used to represent that no point on a timeline. + */ -+void mali_internal_sync_timeline_destroy(struct mali_internal_sync_timeline *sync_timeline); ++#define MALI_TIMELINE_NO_POINT ((mali_timeline_point) 0) + +/** -+ * Signal one mali internal sync timeline. -+ * @param sync_timeline The mali internal sync timeline to signal. ++ * The maximum span of points on a timeline. A timeline will be considered full if the difference ++ * between the oldest and newest points is equal or larger to this value. + */ -+void mali_internal_sync_timeline_signal(struct mali_internal_sync_timeline *sync_timeline); ++#define MALI_TIMELINE_MAX_POINT_SPAN 65536 + +/** -+ * Create one mali internal sync point. -+ * @param sync_timeline The mali internal sync timeline to add this mali internal sync point. -+ * @return the new mali internal sync point if successful, NULL if not. ++ * Magic value used to assert on validity of trackers. + */ -+struct mali_internal_sync_point *mali_internal_sync_point_create(struct mali_internal_sync_timeline *sync_timeline, int size); ++#define MALI_TIMELINE_TRACKER_MAGIC 0xabcdabcd ++ ++struct mali_timeline; ++struct mali_timeline_waiter; ++struct mali_timeline_tracker; + +/** -+ * Merge mali internal sync fences -+ * @param sync_fence1 The mali internal sync fence to merge -+ * @param sync_fence2 The mali internal sync fence to merge -+ * @return the new mali internal sync fence if successful, NULL if not. ++ * Timeline fence. + */ -+struct mali_internal_sync_fence *mali_internal_sync_fence_merge(struct mali_internal_sync_fence *sync_fence1, -+ struct mali_internal_sync_fence *sync_fence2); ++struct mali_timeline_fence { ++ mali_timeline_point points[MALI_TIMELINE_MAX]; /**< For each timeline, a point or MALI_TIMELINE_NO_POINT. */ ++ s32 sync_fd; /**< A file descriptor representing a sync fence, or -1. */ ++}; + +/** -+ * Get the mali internal sync fence from sync fd -+ * @param fd The sync handle to get the mali internal sync fence -+ * @return the mali internal sync fence if successful, NULL if not. ++ * Timeline system. ++ * ++ * The Timeline system has a set of timelines associated with a session. + */ -+struct mali_internal_sync_fence *mali_internal_sync_fence_fdget(int fd); ++struct mali_timeline_system { ++ struct mali_spinlock_reentrant *spinlock; /**< Spin lock protecting the timeline system */ ++ struct mali_timeline *timelines[MALI_TIMELINE_MAX]; /**< The timelines in this system */ + ++ /* Single-linked list of unused waiter objects. Uses the tracker_next field in tracker. */ ++ struct mali_timeline_waiter *waiter_empty_list; + -+void mali_internal_sync_fence_waiter_init(struct mali_internal_sync_fence_waiter *waiter, -+ mali_internal_sync_callback_t callback); ++ struct mali_session_data *session; /**< Session that owns this system. */ + -+int mali_internal_sync_fence_wait_async(struct mali_internal_sync_fence *sync_fence, -+ struct mali_internal_sync_fence_waiter *waiter); ++ mali_bool timer_enabled; /**< Set to MALI_TRUE if soft job timer should be enabled, MALI_FALSE if not. */ + -+int mali_internal_sync_fence_cancel_async(struct mali_internal_sync_fence *sync_fence, -+ struct mali_internal_sync_fence_waiter *waiter); ++ _mali_osk_wait_queue_t *wait_queue; /**< Wait queue. */ ++ ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ struct sync_timeline *signaled_sync_tl; /**< Special sync timeline used to create pre-signaled sync fences */ ++#else ++ struct mali_internal_sync_timeline *signaled_sync_tl; /**< Special sync timeline used to create pre-signaled sync fences */ ++#endif ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++}; + -+#endif /*LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)*/ -+#endif /* _MALI_INTERNAL_SYNC_H */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_kernel_linux.c b/drivers/gpu/arm/mali400/mali/linux/mali_kernel_linux.c -new file mode 100644 -index 000000000..10ee22028 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_kernel_linux.c -@@ -0,0 +1,1168 @@ +/** -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ * Timeline. Each Timeline system will have MALI_TIMELINE_MAX timelines. + */ ++struct mali_timeline { ++ mali_timeline_point point_next; /**< The next available point. */ ++ mali_timeline_point point_oldest; /**< The oldest point not released. */ + ++ /* Double-linked list of trackers. Sorted in ascending order by tracker->time_number with ++ * tail pointing to the tracker with the oldest time. */ ++ struct mali_timeline_tracker *tracker_head; ++ struct mali_timeline_tracker *tracker_tail; + -+/** -+ * @file mali_kernel_linux.c -+ * Implementation of the Linux device driver entrypoints -+ */ -+#include "../platform/rk/custom_log.h" -+#include "../platform/rk/rk_ext.h" ++ /* Double-linked list of waiters. Sorted in ascending order by waiter->time_number_wait ++ * with tail pointing to the waiter with oldest wait time. */ ++ struct mali_timeline_waiter *waiter_head; ++ struct mali_timeline_waiter *waiter_tail; + -+#include /* kernel module definitions */ -+#include /* file system operations */ -+#include /* character device definitions */ -+#include /* memory manager definitions */ -+#include -+#include -+#include -+#include "mali_kernel_license.h" -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ struct mali_timeline_system *system; /**< Timeline system this timeline belongs to. */ ++ enum mali_timeline_id id; /**< Timeline type. */ + -+#include "mali_kernel_common.h" -+#include "mali_session.h" -+#include "mali_kernel_core.h" -+#include "mali_osk.h" -+#include "mali_kernel_linux.h" -+#include "mali_ukk.h" -+#include "mali_ukk_wrappers.h" -+#include "mali_kernel_sysfs.h" -+#include "mali_pm.h" -+#include "mali_kernel_license.h" -+#include "mali_memory.h" -+#include "mali_memory_dma_buf.h" -+#include "mali_memory_manager.h" -+#include "mali_memory_swap_alloc.h" -+#if defined(CONFIG_MALI400_INTERNAL_PROFILING) -+#include "mali_profiling_internal.h" ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ struct sync_timeline *sync_tl; /**< Sync timeline that corresponds to this timeline. */ ++#else ++ struct mali_internal_sync_timeline *sync_tl; +#endif -+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS) -+#include "mali_osk_profiling.h" -+#include "mali_dvfs_policy.h" ++ mali_bool destroyed; ++ struct mali_spinlock_reentrant *spinlock; /**< Spin lock protecting the timeline system */ ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ + -+static int is_first_resume = 1; -+/*Store the clk and vol for boot/insmod and mali_resume*/ -+static struct mali_gpu_clk_item mali_gpu_clk[2]; -+#endif ++ /* The following fields are used to time out soft job trackers. */ ++ _mali_osk_wq_delayed_work_t *delayed_work; ++ mali_bool timer_active; ++}; + -+/* Streamline support for the Mali driver */ -+#if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_MALI400_PROFILING) -+/* Ask Linux to create the tracepoints */ -+#define CREATE_TRACE_POINTS -+#include "mali_linux_trace.h" ++/** ++ * Timeline waiter. ++ */ ++struct mali_timeline_waiter { ++ mali_timeline_point point; /**< Point on timeline we are waiting for to be released. */ ++ struct mali_timeline_tracker *tracker; /**< Tracker that is waiting. */ + -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_event); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_hw_counter); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_sw_counters); -+#endif /* CONFIG_TRACEPOINTS */ ++ struct mali_timeline_waiter *timeline_next; /**< Next waiter on timeline's waiter list. */ ++ struct mali_timeline_waiter *timeline_prev; /**< Previous waiter on timeline's waiter list. */ + -+#ifdef CONFIG_MALI_DEVFREQ -+#include "mali_devfreq.h" -+#include "mali_osk_mali.h" ++ struct mali_timeline_waiter *tracker_next; /**< Next waiter on tracker's waiter list. */ ++}; + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) -+#include -+#else -+/* In 3.13 the OPP include header file, types, and functions were all -+ * renamed. Use the old filename for the include, and define the new names to -+ * the old, when an old kernel is detected. ++/** ++ * Timeline tracker. + */ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) -+#include -+#else -+#include -+#endif /* Linux >= 3.13*/ -+#define dev_pm_opp_of_add_table of_init_opp_table -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) -+#define dev_pm_opp_of_remove_table of_free_opp_table -+#endif /* Linux >= 3.19 */ -+#endif /* Linux >= 4.4.0 */ -+#endif -+ -+/* from the __malidrv_build_info.c file that is generated during build */ -+extern const char *__malidrv_build_info(void); ++struct mali_timeline_tracker { ++ MALI_DEBUG_CODE(u32 magic); /**< Should always be MALI_TIMELINE_TRACKER_MAGIC for a valid tracker. */ + -+/* Module parameter to control log level */ -+int mali_debug_level = 2; -+module_param(mali_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */ -+MODULE_PARM_DESC(mali_debug_level, "Higher number, more dmesg output"); ++ mali_timeline_point point; /**< Point on timeline for this tracker */ + -+extern int mali_max_job_runtime; -+module_param(mali_max_job_runtime, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); -+MODULE_PARM_DESC(mali_max_job_runtime, "Maximum allowed job runtime in msecs.\nJobs will be killed after this no matter what"); ++ struct mali_timeline_tracker *timeline_next; /**< Next tracker on timeline's tracker list */ ++ struct mali_timeline_tracker *timeline_prev; /**< Previous tracker on timeline's tracker list */ + -+extern int mali_l2_max_reads; -+module_param(mali_l2_max_reads, int, S_IRUSR | S_IRGRP | S_IROTH); -+MODULE_PARM_DESC(mali_l2_max_reads, "Maximum reads for Mali L2 cache"); ++ u32 trigger_ref_count; /**< When zero tracker will be activated */ ++ mali_timeline_activation_error activation_error; /**< Activation error. */ ++ struct mali_timeline_fence fence; /**< Fence used to create this tracker */ + -+extern unsigned int mali_dedicated_mem_start; -+module_param(mali_dedicated_mem_start, uint, S_IRUSR | S_IRGRP | S_IROTH); -+MODULE_PARM_DESC(mali_dedicated_mem_start, "Physical start address of dedicated Mali GPU memory."); ++ /* Single-linked list of waiters. Sorted in order of insertions with ++ * tail pointing to first waiter. */ ++ struct mali_timeline_waiter *waiter_head; ++ struct mali_timeline_waiter *waiter_tail; + -+extern unsigned int mali_dedicated_mem_size; -+module_param(mali_dedicated_mem_size, uint, S_IRUSR | S_IRGRP | S_IROTH); -+MODULE_PARM_DESC(mali_dedicated_mem_size, "Size of dedicated Mali GPU memory."); ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ /* These are only used if the tracker is waiting on a sync fence. */ ++ struct mali_timeline_waiter *waiter_sync; /**< A direct pointer to timeline waiter representing sync fence. */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ struct sync_fence_waiter sync_fence_waiter; /**< Used to connect sync fence and tracker in sync fence wait callback. */ ++ struct sync_fence *sync_fence; /**< The sync fence this tracker is waiting on. */ ++#else ++ struct mali_internal_sync_fence_waiter sync_fence_waiter; /**< Used to connect sync fence and tracker in sync fence wait callback. */ ++ struct mali_internal_sync_fence *sync_fence; /**< The sync fence this tracker is waiting on. */ ++#endif ++ _mali_osk_list_t sync_fence_cancel_list; /**< List node used to cancel sync fence waiters. */ ++ _mali_osk_list_t sync_fence_signal_list; /** < List node used to singal sync fence callback function. */ + -+extern unsigned int mali_shared_mem_size; -+module_param(mali_shared_mem_size, uint, S_IRUSR | S_IRGRP | S_IROTH); -+MODULE_PARM_DESC(mali_shared_mem_size, "Size of shared Mali GPU memory."); ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ + -+#if defined(CONFIG_MALI400_PROFILING) -+extern int mali_boot_profiling; -+module_param(mali_boot_profiling, int, S_IRUSR | S_IRGRP | S_IROTH); -+MODULE_PARM_DESC(mali_boot_profiling, "Start profiling as a part of Mali driver initialization"); ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) ++ struct mali_timeline_waiter *waiter_dma_fence; /**< A direct pointer to timeline waiter representing dma fence. */ +#endif + -+extern int mali_max_pp_cores_group_1; -+module_param(mali_max_pp_cores_group_1, int, S_IRUSR | S_IRGRP | S_IROTH); -+MODULE_PARM_DESC(mali_max_pp_cores_group_1, "Limit the number of PP cores to use from first PP group."); ++ struct mali_timeline_system *system; /**< Timeline system. */ ++ struct mali_timeline *timeline; /**< Timeline, or NULL if not on a timeline. */ ++ enum mali_timeline_tracker_type type; /**< Type of tracker. */ ++ void *job; /**< Owner of tracker. */ + -+extern int mali_max_pp_cores_group_2; -+module_param(mali_max_pp_cores_group_2, int, S_IRUSR | S_IRGRP | S_IROTH); -+MODULE_PARM_DESC(mali_max_pp_cores_group_2, "Limit the number of PP cores to use from second PP group (Mali-450 only)."); ++ /* The following fields are used to time out soft job trackers. */ ++ unsigned long os_tick_create; ++ unsigned long os_tick_activate; ++ mali_bool timer_active; ++}; + -+extern unsigned int mali_mem_swap_out_threshold_value; -+module_param(mali_mem_swap_out_threshold_value, uint, S_IRUSR | S_IRGRP | S_IROTH); -+MODULE_PARM_DESC(mali_mem_swap_out_threshold_value, "Threshold value used to limit how much swappable memory cached in Mali driver."); -+ -+#if defined(CONFIG_MALI_DVFS) -+/** the max fps the same as display vsync default 60, can set by module insert parameter */ -+extern int mali_max_system_fps; -+module_param(mali_max_system_fps, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); -+MODULE_PARM_DESC(mali_max_system_fps, "Max system fps the same as display VSYNC."); -+ -+/** a lower limit on their desired FPS default 58, can set by module insert parameter*/ -+extern int mali_desired_fps; -+module_param(mali_desired_fps, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); -+MODULE_PARM_DESC(mali_desired_fps, "A bit lower than max_system_fps which user desired fps"); -+#endif -+ -+#if MALI_ENABLE_CPU_CYCLES -+#include -+#include -+#include -+static struct timer_list mali_init_cpu_clock_timers[8]; -+static u32 mali_cpu_clock_last_value[8] = {0,}; -+#endif -+ -+/* Export symbols from common code: mali_user_settings.c */ -+#include "mali_user_settings_db.h" -+EXPORT_SYMBOL(mali_set_user_setting); -+EXPORT_SYMBOL(mali_get_user_setting); -+ -+static char mali_dev_name[] = "mali"; /* should be const, but the functions we call requires non-cost */ -+ -+/* This driver only supports one Mali device, and this variable stores this single platform device */ -+struct platform_device *mali_platform_device = NULL; -+ -+/* This driver only supports one Mali device, and this variable stores the exposed misc device (/dev/mali) */ -+static struct miscdevice mali_miscdevice = { 0, }; -+ -+static int mali_miscdevice_register(struct platform_device *pdev); -+static void mali_miscdevice_unregister(void); -+ -+static int mali_open(struct inode *inode, struct file *filp); -+static int mali_release(struct inode *inode, struct file *filp); -+static long mali_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); -+ -+static int mali_probe(struct platform_device *pdev); -+static int mali_remove(struct platform_device *pdev); -+ -+static int mali_driver_suspend_scheduler(struct device *dev); -+static int mali_driver_resume_scheduler(struct device *dev); ++extern _mali_osk_atomic_t gp_tracker_count; ++extern _mali_osk_atomic_t phy_pp_tracker_count; ++extern _mali_osk_atomic_t virt_pp_tracker_count; + -+#ifdef CONFIG_PM_RUNTIME -+static int mali_driver_runtime_suspend(struct device *dev); -+static int mali_driver_runtime_resume(struct device *dev); -+static int mali_driver_runtime_idle(struct device *dev); -+#endif ++/** ++ * What follows is a set of functions to check the state of a timeline and to determine where on a ++ * timeline a given point is. Most of these checks will translate the timeline so the oldest point ++ * on the timeline is aligned with zero. Remember that all of these calculation are done on ++ * unsigned integers. ++ * ++ * The following example illustrates the three different states a point can be in. The timeline has ++ * been translated to put the oldest point at zero: ++ * ++ * ++ * ++ * [ point is in forbidden zone ] ++ * 64k wide ++ * MALI_TIMELINE_MAX_POINT_SPAN ++ * ++ * [ point is on timeline ) ( point is released ] ++ * ++ * 0--------------------------##############################--------------------2^32 - 1 ++ * ^ ^ ++ * \ | ++ * oldest point on timeline | ++ * \ ++ * next point on timeline ++ */ + -+#if defined(MALI_FAKE_PLATFORM_DEVICE) -+#if defined(CONFIG_MALI_DT) -+extern int mali_platform_device_init(struct platform_device *device); -+extern int mali_platform_device_deinit(struct platform_device *device); -+#else -+extern int mali_platform_device_register(void); -+extern int mali_platform_device_unregister(void); -+#endif -+#endif ++/** ++ * Compare two timeline points ++ * ++ * Returns true if a is after b, false if a is before or equal to b. ++ * ++ * This funcion ignores MALI_TIMELINE_MAX_POINT_SPAN. Wrapping is supported and ++ * the result will be correct if the points is less then UINT_MAX/2 apart. ++ * ++ * @param a Point on timeline ++ * @param b Point on timeline ++ * @return MALI_TRUE if a is after b ++ */ ++MALI_STATIC_INLINE mali_bool mali_timeline_point_after(mali_timeline_point a, mali_timeline_point b) ++{ ++ return 0 > ((s32)b) - ((s32)a); ++} + -+extern int rk_platform_init_opp_table(struct mali_device *mdev); -+extern void rk_platform_uninit_opp_table(struct mali_device *mdev); ++/** ++ * Check if a point is on timeline. A point is on a timeline if it is greater than, or equal to, ++ * the oldest point, and less than the next point. ++ * ++ * @param timeline Timeline. ++ * @param point Point on timeline. ++ * @return MALI_TRUE if point is on timeline, MALI_FALSE if not. ++ */ ++MALI_STATIC_INLINE mali_bool mali_timeline_is_point_on(struct mali_timeline *timeline, mali_timeline_point point) ++{ ++ MALI_DEBUG_ASSERT_POINTER(timeline); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point); + -+/* Linux power management operations provided by the Mali device driver */ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29)) -+struct pm_ext_ops mali_dev_ext_pm_ops = { -+ .base = -+ { -+ .suspend = mali_driver_suspend_scheduler, -+ .resume = mali_driver_resume_scheduler, -+ .freeze = mali_driver_suspend_scheduler, -+ .thaw = mali_driver_resume_scheduler, -+ }, -+}; -+#else -+static const struct dev_pm_ops mali_dev_pm_ops = { -+#ifdef CONFIG_PM_RUNTIME -+ .runtime_suspend = mali_driver_runtime_suspend, -+ .runtime_resume = mali_driver_runtime_resume, -+ .runtime_idle = mali_driver_runtime_idle, -+#endif -+ .suspend = mali_driver_suspend_scheduler, -+ .resume = mali_driver_resume_scheduler, -+ .freeze = mali_driver_suspend_scheduler, -+ .thaw = mali_driver_resume_scheduler, -+ .poweroff = mali_driver_suspend_scheduler, -+}; -+#endif ++ return (point - timeline->point_oldest) < (timeline->point_next - timeline->point_oldest); ++} + -+#ifdef CONFIG_MALI_DT -+static struct of_device_id base_dt_ids[] = { -+ {.compatible = "arm,mali-300"}, -+ /*-------------------------------------------------------*/ -+ /* rk_ext : to use dts_for_mali_ko_befor_r5p0-01rel0. */ -+ // {.compatible = "arm,mali-400"}, -+ {.compatible = "arm,mali400"}, -+ /*-------------------------------------------------------*/ -+ {.compatible = "arm,mali-450"}, -+ {.compatible = "arm,mali-470"}, -+ {}, -+}; ++/** ++ * Check if a point has been released. A point is released if it is older than the oldest point on ++ * the timeline, newer than the next point, and also not in the forbidden zone. ++ * ++ * @param timeline Timeline. ++ * @param point Point on timeline. ++ * @return MALI_TRUE if point has been release, MALI_FALSE if not. ++ */ ++MALI_STATIC_INLINE mali_bool mali_timeline_is_point_released(struct mali_timeline *timeline, mali_timeline_point point) ++{ ++ mali_timeline_point point_normalized; ++ mali_timeline_point next_normalized; + -+MODULE_DEVICE_TABLE(of, base_dt_ids); -+#endif ++ MALI_DEBUG_ASSERT_POINTER(timeline); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point); + -+/* The Mali device driver struct */ -+static struct platform_driver mali_platform_driver = { -+ .probe = mali_probe, -+ .remove = mali_remove, -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29)) -+ .pm = &mali_dev_ext_pm_ops, -+#endif -+ .driver = -+ { -+ .name = MALI_GPU_NAME_UTGARD, -+ .owner = THIS_MODULE, -+ .bus = &platform_bus_type, -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)) -+ .pm = &mali_dev_pm_ops, -+#endif -+#ifdef CONFIG_MALI_DT -+ .of_match_table = of_match_ptr(base_dt_ids), -+#endif -+ }, -+}; ++ point_normalized = point - timeline->point_oldest; ++ next_normalized = timeline->point_next - timeline->point_oldest; + -+/* Linux misc device operations (/dev/mali) */ -+struct file_operations mali_fops = { -+ .owner = THIS_MODULE, -+ .open = mali_open, -+ .release = mali_release, -+ .unlocked_ioctl = mali_ioctl, -+ .compat_ioctl = mali_ioctl, -+ .mmap = mali_mmap -+}; ++ return point_normalized > (next_normalized + MALI_TIMELINE_MAX_POINT_SPAN); ++} + -+#if MALI_ENABLE_CPU_CYCLES -+void mali_init_cpu_time_counters(int reset, int enable_divide_by_64) ++/** ++ * Check if the tracker that the point relate to has been released. A point is released if the tracker is not on the timeline. ++ * @param timeline Timeline. ++ * @param point Point on timeline. ++ * @return MALI_TRUE if the tracker has been release, MALI_FALSE if not. ++ */ ++MALI_STATIC_INLINE mali_bool mali_timeline_is_tracker_released(struct mali_timeline *timeline, mali_timeline_point point) +{ -+ /* The CPU assembly reference used is: ARM Architecture Reference Manual ARMv7-AR C.b */ -+ u32 write_value; ++ struct mali_timeline_tracker *tracker; + -+ /* See B4.1.116 PMCNTENSET, Performance Monitors Count Enable Set register, VMSA */ -+ /* setting p15 c9 c12 1 to 0x8000000f==CPU_CYCLE_ENABLE |EVENT_3_ENABLE|EVENT_2_ENABLE|EVENT_1_ENABLE|EVENT_0_ENABLE */ -+ asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(0x8000000f)); ++ MALI_DEBUG_ASSERT_POINTER(timeline); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point); + ++ tracker = timeline->tracker_tail; + -+ /* See B4.1.117 PMCR, Performance Monitors Control Register. Writing to p15, c9, c12, 0 */ -+ write_value = 1 << 0; /* Bit 0 set. Enable counters */ -+ if (reset) { -+ write_value |= 1 << 1; /* Reset event counters */ -+ write_value |= 1 << 2; /* Reset cycle counter */ -+ } -+ if (enable_divide_by_64) { -+ write_value |= 1 << 3; /* Enable the Clock divider by 64 */ ++ while (NULL != tracker) { ++ if (point == tracker->point) ++ return MALI_FALSE; ++ tracker = tracker->timeline_next; + } -+ write_value |= 1 << 4; /* Export enable. Not needed */ -+ asm volatile("MCR p15, 0, %0, c9, c12, 0\t\n" :: "r"(write_value)); -+ -+ /* PMOVSR Overflow Flag Status Register - Clear Clock and Event overflows */ -+ asm volatile("MCR p15, 0, %0, c9, c12, 3\t\n" :: "r"(0x8000000f)); + -+ -+ /* See B4.1.124 PMUSERENR - setting p15 c9 c14 to 1" */ -+ /* User mode access to the Performance Monitors enabled. */ -+ /* Lets User space read cpu clock cycles */ -+ asm volatile("mcr p15, 0, %0, c9, c14, 0" :: "r"(1)); ++ return MALI_TRUE; +} + -+/** A timer function that configures the cycle clock counter on current CPU. -+ * The function \a mali_init_cpu_time_counters_on_all_cpus sets up this -+ * function to trigger on all Cpus during module load. ++/** ++ * Check if a point is valid. A point is valid if is on the timeline or has been released. ++ * ++ * @param timeline Timeline. ++ * @param point Point on timeline. ++ * @return MALI_TRUE if point is valid, MALI_FALSE if not. + */ -+static void mali_init_cpu_clock_timer_func(unsigned long data) ++MALI_STATIC_INLINE mali_bool mali_timeline_is_point_valid(struct mali_timeline *timeline, mali_timeline_point point) +{ -+ int reset_counters, enable_divide_clock_counter_by_64; -+ int current_cpu = raw_smp_processor_id(); -+ unsigned int sample0; -+ unsigned int sample1; -+ -+ MALI_IGNORE(data); -+ -+ reset_counters = 1; -+ enable_divide_clock_counter_by_64 = 0; -+ mali_init_cpu_time_counters(reset_counters, enable_divide_clock_counter_by_64); -+ -+ sample0 = mali_get_cpu_cyclecount(); -+ sample1 = mali_get_cpu_cyclecount(); -+ -+ MALI_DEBUG_PRINT(3, ("Init Cpu %d cycle counter- First two samples: %08x %08x \n", current_cpu, sample0, sample1)); ++ MALI_DEBUG_ASSERT_POINTER(timeline); ++ return mali_timeline_is_point_on(timeline, point) || mali_timeline_is_point_released(timeline, point); +} + -+/** A timer functions for storing current time on all cpus. -+ * Used for checking if the clocks have similar values or if they are drifting. ++/** ++ * Check if timeline is empty (has no points on it). A timeline is empty if next == oldest. ++ * ++ * @param timeline Timeline. ++ * @return MALI_TRUE if timeline is empty, MALI_FALSE if not. + */ -+static void mali_print_cpu_clock_timer_func(unsigned long data) ++MALI_STATIC_INLINE mali_bool mali_timeline_is_empty(struct mali_timeline *timeline) +{ -+ int current_cpu = raw_smp_processor_id(); -+ unsigned int sample0; -+ -+ MALI_IGNORE(data); -+ sample0 = mali_get_cpu_cyclecount(); -+ if (current_cpu < 8) { -+ mali_cpu_clock_last_value[current_cpu] = sample0; -+ } ++ MALI_DEBUG_ASSERT_POINTER(timeline); ++ return timeline->point_next == timeline->point_oldest; +} + -+/** Init the performance registers on all CPUs to count clock cycles. -+ * For init \a print_only should be 0. -+ * If \a print_only is 1, it will intead print the current clock value of all CPUs. ++/** ++ * Check if timeline is full. A valid timeline cannot span more than 64k points (@ref ++ * MALI_TIMELINE_MAX_POINT_SPAN). ++ * ++ * @param timeline Timeline. ++ * @return MALI_TRUE if timeline is full, MALI_FALSE if not. + */ -+void mali_init_cpu_time_counters_on_all_cpus(int print_only) ++MALI_STATIC_INLINE mali_bool mali_timeline_is_full(struct mali_timeline *timeline) +{ -+ int i = 0; -+ int cpu_number; -+ int jiffies_trigger; -+ int jiffies_wait; ++ MALI_DEBUG_ASSERT_POINTER(timeline); ++ return MALI_TIMELINE_MAX_POINT_SPAN <= (timeline->point_next - timeline->point_oldest); ++} + -+ jiffies_wait = 2; -+ jiffies_trigger = jiffies + jiffies_wait; ++/** ++ * Create a new timeline system. ++ * ++ * @param session The session this timeline system will belong to. ++ * @return New timeline system. ++ */ ++struct mali_timeline_system *mali_timeline_system_create(struct mali_session_data *session); + -+ for (i = 0 ; i < 8 ; i++) { -+ init_timer(&mali_init_cpu_clock_timers[i]); -+ if (print_only) mali_init_cpu_clock_timers[i].function = mali_print_cpu_clock_timer_func; -+ else mali_init_cpu_clock_timers[i].function = mali_init_cpu_clock_timer_func; -+ mali_init_cpu_clock_timers[i].expires = jiffies_trigger ; -+ } -+ cpu_number = cpumask_first(cpu_online_mask); -+ for (i = 0 ; i < 8 ; i++) { -+ int next_cpu; -+ add_timer_on(&mali_init_cpu_clock_timers[i], cpu_number); -+ next_cpu = cpumask_next(cpu_number, cpu_online_mask); -+ if (next_cpu >= nr_cpu_ids) break; -+ cpu_number = next_cpu; -+ } ++/** ++ * Abort timeline system. ++ * ++ * This will release all pending waiters in the timeline system causing all trackers to be ++ * activated. ++ * ++ * @param system Timeline system to abort all jobs from. ++ */ ++void mali_timeline_system_abort(struct mali_timeline_system *system); + -+ while (jiffies_wait) jiffies_wait = schedule_timeout_uninterruptible(jiffies_wait); ++/** ++ * Destroy an empty timeline system. ++ * ++ * @note @ref mali_timeline_system_abort() should be called prior to this function. ++ * ++ * @param system Timeline system to destroy. ++ */ ++void mali_timeline_system_destroy(struct mali_timeline_system *system); + -+ for (i = 0 ; i < 8 ; i++) { -+ del_timer_sync(&mali_init_cpu_clock_timers[i]); -+ } ++/** ++ * Stop the soft job timer. ++ * ++ * @param system Timeline system ++ */ ++void mali_timeline_system_stop_timer(struct mali_timeline_system *system); + -+ if (print_only) { -+ if ((0 == mali_cpu_clock_last_value[2]) && (0 == mali_cpu_clock_last_value[3])) { -+ /* Diff can be printed if we want to check if the clocks are in sync -+ int diff = mali_cpu_clock_last_value[0] - mali_cpu_clock_last_value[1];*/ -+ MALI_DEBUG_PRINT(2, ("CPU cycle counters readout all: %08x %08x\n", mali_cpu_clock_last_value[0], mali_cpu_clock_last_value[1])); -+ } else { -+ MALI_DEBUG_PRINT(2, ("CPU cycle counters readout all: %08x %08x %08x %08x\n", mali_cpu_clock_last_value[0], mali_cpu_clock_last_value[1], mali_cpu_clock_last_value[2], mali_cpu_clock_last_value[3])); -+ } -+ } -+} -+#endif ++/** ++ * Add a tracker to a timeline system and optionally also on a timeline. ++ * ++ * Once added to the timeline system, the tracker is guaranteed to be activated. The tracker can be ++ * activated before this function returns. Thus, it is also possible that the tracker is released ++ * before this function returns, depending on the tracker type. ++ * ++ * @note Tracker must be initialized (@ref mali_timeline_tracker_init) before being added to the ++ * timeline system. ++ * ++ * @param system Timeline system the tracker will be added to. ++ * @param tracker The tracker to be added. ++ * @param timeline_id Id of the timeline the tracker will be added to, or ++ * MALI_TIMELINE_NONE if it should not be added on a timeline. ++ * @return Point on timeline identifying this tracker, or MALI_TIMELINE_NO_POINT if not on timeline. ++ */ ++mali_timeline_point mali_timeline_system_add_tracker(struct mali_timeline_system *system, ++ struct mali_timeline_tracker *tracker, ++ enum mali_timeline_id timeline_id); + -+int mali_module_init(void) -+{ -+ int err = 0; ++/** ++ * Get latest point on timeline. ++ * ++ * @param system Timeline system. ++ * @param timeline_id Id of timeline to get latest point from. ++ * @return Latest point on timeline, or MALI_TIMELINE_NO_POINT if the timeline is empty. ++ */ ++mali_timeline_point mali_timeline_system_get_latest_point(struct mali_timeline_system *system, ++ enum mali_timeline_id timeline_id); + -+ MALI_DEBUG_PRINT(2, ("Inserting Mali v%d device driver. \n", _MALI_API_VERSION)); -+ MALI_DEBUG_PRINT(2, ("Compiled: %s, time: %s.\n", __DATE__, __TIME__)); -+ MALI_DEBUG_PRINT(2, ("Driver revision: %s\n", SVN_REV_STRING)); -+ -+ I("svn_rev_string_from_arm of this mali_ko is '%s', rk_ko_ver is '%d', built at '%s', on '%s'.", -+ SVN_REV_STRING, -+ RK_KO_VER, -+ __TIME__, -+ __DATE__); ++/** ++ * Initialize tracker. ++ * ++ * Must be called before tracker is added to timeline system (@ref mali_timeline_system_add_tracker). ++ * ++ * @param tracker Tracker to initialize. ++ * @param type Type of tracker. ++ * @param fence Fence used to set up dependencies for tracker. ++ * @param job Pointer to job struct this tracker is associated with. ++ */ ++void mali_timeline_tracker_init(struct mali_timeline_tracker *tracker, ++ mali_timeline_tracker_type type, ++ struct mali_timeline_fence *fence, ++ void *job); + -+#if MALI_ENABLE_CPU_CYCLES -+ mali_init_cpu_time_counters_on_all_cpus(0); -+ MALI_DEBUG_PRINT(2, ("CPU cycle counter setup complete\n")); -+ /* Printing the current cpu counters */ -+ mali_init_cpu_time_counters_on_all_cpus(1); -+#endif ++/** ++ * Grab trigger ref count on tracker. ++ * ++ * This will prevent tracker from being activated until the trigger ref count reaches zero. ++ * ++ * @note Tracker must have been initialized (@ref mali_timeline_tracker_init). ++ * ++ * @param system Timeline system. ++ * @param tracker Tracker. ++ */ ++void mali_timeline_system_tracker_get(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker); + -+ /* Initialize module wide settings */ -+#ifdef MALI_FAKE_PLATFORM_DEVICE -+#ifndef CONFIG_MALI_DT -+ MALI_DEBUG_PRINT(2, ("mali_module_init() registering device\n")); -+ err = mali_platform_device_register(); -+ if (0 != err) { -+ return err; -+ } -+#endif -+#endif ++/** ++ * Release trigger ref count on tracker. ++ * ++ * If the trigger ref count reaches zero, the tracker will be activated. ++ * ++ * @param system Timeline system. ++ * @param tracker Tracker. ++ * @param activation_error Error bitmask if activated with error, or MALI_TIMELINE_ACTIVATION_ERROR_NONE if no error. ++ * @return Scheduling bitmask. ++ */ ++mali_scheduler_mask mali_timeline_system_tracker_put(struct mali_timeline_system *system, struct mali_timeline_tracker *tracker, mali_timeline_activation_error activation_error); + -+ MALI_DEBUG_PRINT(2, ("mali_module_init() registering driver\n")); ++/** ++ * Release a tracker from the timeline system. ++ * ++ * This is used to signal that the job being tracker is finished, either due to normal circumstances ++ * (job complete/abort) or due to a timeout. ++ * ++ * We may need to schedule some subsystems after a tracker has been released and the returned ++ * bitmask will tell us if it is necessary. If the return value is non-zero, this value needs to be ++ * sent as an input parameter to @ref mali_scheduler_schedule_from_mask() to do the scheduling. ++ * ++ * @note Tracker must have been activated before being released. ++ * @warning Not calling @ref mali_scheduler_schedule_from_mask() after releasing a tracker can lead ++ * to a deadlock. ++ * ++ * @param tracker Tracker being released. ++ * @return Scheduling bitmask. ++ */ ++mali_scheduler_mask mali_timeline_tracker_release(struct mali_timeline_tracker *tracker); + -+ err = platform_driver_register(&mali_platform_driver); ++MALI_STATIC_INLINE mali_bool mali_timeline_tracker_activation_error( ++ struct mali_timeline_tracker *tracker) ++{ ++ MALI_DEBUG_ASSERT_POINTER(tracker); ++ return (MALI_TIMELINE_ACTIVATION_ERROR_FATAL_BIT & ++ tracker->activation_error) ? MALI_TRUE : MALI_FALSE; ++} + -+ if (0 != err) { -+ MALI_DEBUG_PRINT(2, ("mali_module_init() Failed to register driver (%d)\n", err)); -+#ifdef MALI_FAKE_PLATFORM_DEVICE -+#ifndef CONFIG_MALI_DT -+ mali_platform_device_unregister(); -+#endif -+#endif -+ mali_platform_device = NULL; -+ return err; -+ } ++/** ++ * Copy data from a UK fence to a Timeline fence. ++ * ++ * @param fence Timeline fence. ++ * @param uk_fence UK fence. ++ */ ++void mali_timeline_fence_copy_uk_fence(struct mali_timeline_fence *fence, _mali_uk_fence_t *uk_fence); + -+#if defined(CONFIG_MALI400_INTERNAL_PROFILING) -+ err = _mali_internal_profiling_init(mali_boot_profiling ? MALI_TRUE : MALI_FALSE); -+ if (0 != err) { -+ /* No biggie if we wheren't able to initialize the profiling */ -+ MALI_PRINT_ERROR(("Failed to initialize profiling, feature will be unavailable\n")); -+ } -+#endif ++_mali_osk_errcode_t mali_timeline_initialize(void); + -+ /* Tracing the current frequency and voltage from boot/insmod*/ -+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS) -+ /* Just call mali_get_current_gpu_clk_item(),to record current clk info.*/ -+ mali_get_current_gpu_clk_item(&mali_gpu_clk[0]); -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | -+ MALI_PROFILING_EVENT_CHANNEL_GPU | -+ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, -+ mali_gpu_clk[0].clock, -+ mali_gpu_clk[0].vol / 1000, -+ 0, 0, 0); -+#endif ++void mali_timeline_terminate(void); + -+ MALI_PRINT(("Mali device driver loaded\n")); ++MALI_STATIC_INLINE mali_bool mali_timeline_has_gp_job(void) ++{ ++ return 0 < _mali_osk_atomic_read(&gp_tracker_count); ++} + -+ return 0; /* Success */ ++MALI_STATIC_INLINE mali_bool mali_timeline_has_physical_pp_job(void) ++{ ++ return 0 < _mali_osk_atomic_read(&phy_pp_tracker_count); +} + -+void mali_module_exit(void) ++MALI_STATIC_INLINE mali_bool mali_timeline_has_virtual_pp_job(void) +{ -+ MALI_DEBUG_PRINT(2, ("Unloading Mali v%d device driver.\n", _MALI_API_VERSION)); ++ return 0 < _mali_osk_atomic_read(&virt_pp_tracker_count); ++} + -+ MALI_DEBUG_PRINT(2, ("mali_module_exit() unregistering driver\n")); ++#if defined(DEBUG) ++#define MALI_TIMELINE_DEBUG_FUNCTIONS ++#endif /* DEBUG */ ++#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS) + -+ platform_driver_unregister(&mali_platform_driver); ++/** ++ * Tracker state. Used for debug printing. ++ */ ++typedef enum mali_timeline_tracker_state { ++ MALI_TIMELINE_TS_INIT = 0, ++ MALI_TIMELINE_TS_WAITING = 1, ++ MALI_TIMELINE_TS_ACTIVE = 2, ++ MALI_TIMELINE_TS_FINISH = 3, ++} mali_timeline_tracker_state; + -+#if defined(MALI_FAKE_PLATFORM_DEVICE) -+#ifndef CONFIG_MALI_DT -+ MALI_DEBUG_PRINT(2, ("mali_module_exit() unregistering device\n")); -+ mali_platform_device_unregister(); -+#endif ++/** ++ * Get tracker state. ++ * ++ * @param tracker Tracker to check. ++ * @return State of tracker. ++ */ ++mali_timeline_tracker_state mali_timeline_debug_get_tracker_state(struct mali_timeline_tracker *tracker); ++ ++/** ++ * Print debug information about tracker. ++ * ++ * @param tracker Tracker to print. ++ */ ++void mali_timeline_debug_print_tracker(struct mali_timeline_tracker *tracker, _mali_osk_print_ctx *print_ctx); ++ ++/** ++ * Print debug information about timeline. ++ * ++ * @param timeline Timeline to print. ++ */ ++void mali_timeline_debug_print_timeline(struct mali_timeline *timeline, _mali_osk_print_ctx *print_ctx); ++ ++#if !(LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)) ++void mali_timeline_debug_direct_print_tracker(struct mali_timeline_tracker *tracker); ++void mali_timeline_debug_direct_print_timeline(struct mali_timeline *timeline); +#endif + -+ /* Tracing the current frequency and voltage from rmmod*/ -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | -+ MALI_PROFILING_EVENT_CHANNEL_GPU | -+ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, -+ 0, -+ 0, -+ 0, 0, 0); ++/** ++ * Print debug information about timeline system. ++ * ++ * @param system Timeline system to print. ++ */ ++void mali_timeline_debug_print_system(struct mali_timeline_system *system, _mali_osk_print_ctx *print_ctx); + -+#if defined(CONFIG_MALI400_INTERNAL_PROFILING) -+ _mali_internal_profiling_term(); ++#endif /* defined(MALI_TIMELINE_DEBUG_FUNCTIONS) */ ++ ++#if defined(CONFIG_MALI_DMA_BUF_FENCE) ++/** ++ * The timeline dma fence callback when dma fence signal. ++ * ++ * @param pp_job_ptr The pointer to pp job that link to the signaled dma fence. ++ */ ++void mali_timeline_dma_fence_callback(void *pp_job_ptr); +#endif + -+ MALI_PRINT(("Mali device driver unloaded\n")); -+} ++#endif /* __MALI_TIMELINE_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_timeline_fence_wait.c b/drivers/gpu/arm/mali400/mali/common/mali_timeline_fence_wait.c +new file mode 100644 +index 000000000..1ab13f509 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_timeline_fence_wait.c +@@ -0,0 +1,218 @@ ++/* ++ * Copyright (C) 2013-2014, 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ ++#include ++#include "mali_timeline_fence_wait.h" ++#include "mali_osk.h" ++#include "mali_kernel_common.h" ++#include "mali_spinlock_reentrant.h" + -+#ifdef CONFIG_MALI_DEVFREQ -+struct mali_device *mali_device_alloc(void) ++/** ++ * Allocate a fence waiter tracker. ++ * ++ * @return New fence waiter if successful, NULL if not. ++ */ ++static struct mali_timeline_fence_wait_tracker *mali_timeline_fence_wait_tracker_alloc(void) +{ -+ return kzalloc(sizeof(struct mali_device), GFP_KERNEL); ++ return (struct mali_timeline_fence_wait_tracker *) _mali_osk_calloc(1, sizeof(struct mali_timeline_fence_wait_tracker)); +} + -+void mali_device_free(struct mali_device *mdev) ++/** ++ * Free fence waiter tracker. ++ * ++ * @param wait Fence wait tracker to free. ++ */ ++static void mali_timeline_fence_wait_tracker_free(struct mali_timeline_fence_wait_tracker *wait) +{ -+ kfree(mdev); ++ MALI_DEBUG_ASSERT_POINTER(wait); ++ _mali_osk_atomic_term(&wait->refcount); ++ _mali_osk_free(wait); +} -+#endif + -+static int mali_probe(struct platform_device *pdev) ++/** ++ * Check if fence wait tracker has been activated. Used as a wait queue condition. ++ * ++ * @param data Fence waiter. ++ * @return MALI_TRUE if tracker has been activated, MALI_FALSE if not. ++ */ ++static mali_bool mali_timeline_fence_wait_tracker_is_activated(void *data) +{ -+ int err; -+#ifdef CONFIG_MALI_DEVFREQ -+ struct mali_device *mdev; -+#endif ++ struct mali_timeline_fence_wait_tracker *wait; + -+ MALI_DEBUG_PRINT(2, ("mali_probe(): Called for platform device %s\n", pdev->name)); ++ wait = (struct mali_timeline_fence_wait_tracker *) data; ++ MALI_DEBUG_ASSERT_POINTER(wait); + -+ if (NULL != mali_platform_device) { -+ /* Already connected to a device, return error */ -+ MALI_PRINT_ERROR(("mali_probe(): The Mali driver is already connected with a Mali device.")); -+ return -EEXIST; -+ } ++ return wait->activated; ++} + -+ mali_platform_device = pdev; ++/** ++ * Check if fence has been signaled. ++ * ++ * @param system Timeline system. ++ * @param fence Timeline fence. ++ * @return MALI_TRUE if fence is signaled, MALI_FALSE if not. ++ */ ++static mali_bool mali_timeline_fence_wait_check_status(struct mali_timeline_system *system, struct mali_timeline_fence *fence) ++{ ++ int i; ++ u32 tid = _mali_osk_get_tid(); ++ mali_bool ret = MALI_TRUE; ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ struct sync_fence *sync_fence = NULL; ++#else ++ struct mali_internal_sync_fence *sync_fence = NULL; ++#endif ++#endif + -+ dev_info(&pdev->dev, "mali_platform_device->num_resources = %d\n", -+ mali_platform_device->num_resources); -+ -+ { -+ int i = 0; ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT_POINTER(fence); + -+ for(i = 0; i < mali_platform_device->num_resources; i++) -+ dev_info(&pdev->dev, -+ "resource[%d].start = 0x%pa\n", -+ i, -+ &mali_platform_device->resource[i].start); -+ } ++ mali_spinlock_reentrant_wait(system->spinlock, tid); + -+#ifdef CONFIG_MALI_DT -+ /* If we use DT to initialize our DDK, we have to prepare somethings. */ -+ err = mali_platform_device_init(mali_platform_device); -+ if (0 != err) { -+ MALI_PRINT_ERROR(("mali_probe(): Failed to initialize platform device.")); -+ mali_platform_device = NULL; -+ return -EFAULT; -+ } -+#endif ++ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { ++ struct mali_timeline *timeline; ++ mali_timeline_point point; + -+#ifdef CONFIG_MALI_DEVFREQ -+ mdev = mali_device_alloc(); -+ if (!mdev) { -+ MALI_PRINT_ERROR(("Can't allocate mali device private data\n")); -+ return -ENOMEM; -+ } ++ point = fence->points[i]; + -+ mdev->dev = &pdev->dev; -+ dev_set_drvdata(mdev->dev, mdev); ++ if (likely(MALI_TIMELINE_NO_POINT == point)) { ++ /* Fence contains no point on this timeline. */ ++ continue; ++ } + -+ /*Initilization clock and regulator*/ -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ -+ && defined(CONFIG_REGULATOR) -+ mdev->regulator = regulator_get_optional(mdev->dev, "mali"); -+ if (IS_ERR_OR_NULL(mdev->regulator)) { -+ MALI_DEBUG_PRINT(2, ("Continuing without Mali regulator control\n")); -+ mdev->regulator = NULL; -+ /* Allow probe to continue without regulator */ -+ } -+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ ++ timeline = system->timelines[i]; ++ MALI_DEBUG_ASSERT_POINTER(timeline); + -+ mdev->num_clks = devm_clk_bulk_get_all(mdev->dev, &mdev->clks); -+ if (mdev->num_clks < 1) { -+ MALI_DEBUG_PRINT(2, ("Continuing without Mali clock control\n")); -+ mdev->num_clks = 0; -+ mdev->clock = NULL; -+ } else { -+ /* Get "clk_mali" in the device tree for gpu dvfs */ -+ mdev->clock = clk_get(mdev->dev, "clk_mali"); -+ if (IS_ERR_OR_NULL(mdev->clock)) { -+ MALI_DEBUG_PRINT(2, ("Continuing without Mali dvfs clock\n")); -+ /* Allow probe to continue without clock. */ -+ mdev->clock = NULL; ++ if (unlikely(!mali_timeline_is_point_valid(timeline, point))) { ++ MALI_PRINT_ERROR(("Mali Timeline: point %d is not valid (oldest=%d, next=%d)\n", point, timeline->point_oldest, timeline->point_next)); + } -+ } -+ err = clk_bulk_prepare_enable(mdev->num_clks, mdev->clks); -+ if (err) { -+ MALI_PRINT_ERROR(("Failed to prepare clock (%d)\n", err)); -+ goto clock_prepare_failed; -+ } -+ -+ err = rk_platform_init_opp_table(mdev); -+ if (err) -+ MALI_DEBUG_PRINT(3, ("Failed to init_opp_table\n")); + -+ /* initilize pm metrics related */ -+ if (mali_pm_metrics_init(mdev) < 0) { -+ MALI_DEBUG_PRINT(2, ("mali pm metrics init failed\n")); -+ goto pm_metrics_init_failed; ++ if (!mali_timeline_is_point_released(timeline, point)) { ++ ret = MALI_FALSE; ++ goto exit; ++ } + } + -+ if (mali_devfreq_init(mdev) < 0) { -+ MALI_DEBUG_PRINT(2, ("mali devfreq init failed\n")); -+ goto devfreq_init_failed; -+ } -+ clk_bulk_disable(mdev->num_clks, mdev->clks); ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ if (-1 != fence->sync_fd) { ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ sync_fence = sync_fence_fdget(fence->sync_fd); ++#else ++ sync_fence = mali_internal_sync_fence_fdget(fence->sync_fd); +#endif ++ if (likely(NULL != sync_fence)) { ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) ++ if (0 == sync_fence->status) { ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) ++ if (0 < atomic_read(&sync_fence->status)) { ++#else ++ if (0 == sync_fence->fence->ops->signaled(sync_fence->fence)) { ++#endif ++ ret = MALI_FALSE; + -+ -+ if (_MALI_OSK_ERR_OK == _mali_osk_wq_init()) { -+ /* Initialize the Mali GPU HW specified by pdev */ -+ if (_MALI_OSK_ERR_OK == mali_initialize_subsystems()) { -+ /* Register a misc device (so we are accessible from user space) */ -+ err = mali_miscdevice_register(pdev); -+ if (0 == err) { -+ /* Setup sysfs entries */ -+ err = mali_sysfs_register(mali_dev_name); -+ -+ if (0 == err) { -+ MALI_DEBUG_PRINT(2, ("mali_probe(): Successfully initialized driver for platform device %s\n", pdev->name)); -+ -+ return 0; -+ } else { -+ MALI_PRINT_ERROR(("mali_probe(): failed to register sysfs entries")); -+ } -+ mali_miscdevice_unregister(); + } else { -+ MALI_PRINT_ERROR(("mali_probe(): failed to register Mali misc device.")); ++ ret = MALI_TRUE; + } -+ mali_terminate_subsystems(); + } else { -+ MALI_PRINT_ERROR(("mali_probe(): Failed to initialize Mali device driver.")); ++ MALI_PRINT_ERROR(("Mali Timeline: failed to get sync fence from fd %d\n", fence->sync_fd)); + } -+ _mali_osk_wq_term(); + } ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ + -+#ifdef CONFIG_MALI_DEVFREQ -+ mali_devfreq_term(mdev); -+devfreq_init_failed: -+ mali_pm_metrics_term(mdev); -+pm_metrics_init_failed: -+ clk_bulk_disable_unprepare(mdev->num_clks, mdev->clks); -+clock_prepare_failed: -+ clk_bulk_put(mdev->num_clks, mdev->clks); -+ clk_put(mdev->clock); -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) && defined(CONFIG_OF) \ -+ && defined(CONFIG_PM_OPP) -+ rk_platform_uninit_opp_table(mdev); -+#endif ++exit: ++ mali_spinlock_reentrant_signal(system->spinlock, tid); + -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ -+ && defined(CONFIG_REGULATOR) -+ regulator_put(mdev->regulator); -+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ -+ mali_device_free(mdev); ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ if (NULL != sync_fence) { ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ sync_fence_put(sync_fence); ++#else ++ fput(sync_fence->file); +#endif ++ } ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ + -+#ifdef CONFIG_MALI_DT -+ mali_platform_device_deinit(mali_platform_device); -+#endif -+ mali_platform_device = NULL; -+ return -EFAULT; ++ return ret; +} + -+static int mali_remove(struct platform_device *pdev) ++mali_bool mali_timeline_fence_wait(struct mali_timeline_system *system, struct mali_timeline_fence *fence, u32 timeout) +{ -+#ifdef CONFIG_MALI_DEVFREQ -+ struct mali_device *mdev = dev_get_drvdata(&pdev->dev); -+#endif -+ -+ MALI_DEBUG_PRINT(2, ("mali_remove() called for platform device %s\n", pdev->name)); -+ mali_sysfs_unregister(); -+ mali_miscdevice_unregister(); -+ mali_terminate_subsystems(); -+ _mali_osk_wq_term(); ++ struct mali_timeline_fence_wait_tracker *wait; ++ mali_timeline_point point; ++ mali_bool ret; + -+#ifdef CONFIG_MALI_DEVFREQ -+ mali_devfreq_term(mdev); ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT_POINTER(fence); + -+ mali_pm_metrics_term(mdev); ++ MALI_DEBUG_PRINT(4, ("Mali Timeline: wait on fence\n")); + -+ if (mdev->clock) { -+ clk_put(mdev->clock); -+ mdev->clock = NULL; ++ if (MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY == timeout) { ++ return mali_timeline_fence_wait_check_status(system, fence); + } -+ clk_bulk_unprepare(mdev->num_clks, mdev->clks); -+ clk_bulk_put(mdev->num_clks, mdev->clks); -+ -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) && defined(CONFIG_OF) \ -+ && defined(CONFIG_PM_OPP) -+ rk_platform_uninit_opp_table(mdev); -+#endif + -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ -+ && defined(CONFIG_REGULATOR) -+ regulator_put(mdev->regulator); -+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ -+ mali_device_free(mdev); -+#endif ++ wait = mali_timeline_fence_wait_tracker_alloc(); ++ if (unlikely(NULL == wait)) { ++ MALI_PRINT_ERROR(("Mali Timeline: failed to allocate data for fence wait\n")); ++ return MALI_FALSE; ++ } + -+#ifdef CONFIG_MALI_DT -+ mali_platform_device_deinit(mali_platform_device); -+#endif -+ mali_platform_device = NULL; -+ return 0; -+} ++ wait->activated = MALI_FALSE; ++ wait->system = system; + -+static int mali_miscdevice_register(struct platform_device *pdev) -+{ -+ int err; ++ /* Initialize refcount to two references. The reference first will be released by this ++ * function after the wait is over. The second reference will be released when the tracker ++ * is activated. */ ++ _mali_osk_atomic_init(&wait->refcount, 2); + -+ mali_miscdevice.minor = MISC_DYNAMIC_MINOR; -+ mali_miscdevice.name = mali_dev_name; -+ mali_miscdevice.fops = &mali_fops; -+ mali_miscdevice.parent = get_device(&pdev->dev); ++ /* Add tracker to timeline system, but not to a timeline. */ ++ mali_timeline_tracker_init(&wait->tracker, MALI_TIMELINE_TRACKER_WAIT, fence, wait); ++ point = mali_timeline_system_add_tracker(system, &wait->tracker, MALI_TIMELINE_NONE); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT == point); ++ MALI_IGNORE(point); + -+ err = misc_register(&mali_miscdevice); -+ if (0 != err) { -+ MALI_PRINT_ERROR(("Failed to register misc device, misc_register() returned %d\n", err)); ++ /* Wait for the tracker to be activated or time out. */ ++ if (MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER == timeout) { ++ _mali_osk_wait_queue_wait_event(system->wait_queue, mali_timeline_fence_wait_tracker_is_activated, (void *) wait); ++ } else { ++ _mali_osk_wait_queue_wait_event_timeout(system->wait_queue, mali_timeline_fence_wait_tracker_is_activated, (void *) wait, timeout); + } + -+ return err; -+} -+ -+static void mali_miscdevice_unregister(void) -+{ -+ misc_deregister(&mali_miscdevice); -+} -+ -+static int mali_driver_suspend_scheduler(struct device *dev) -+{ -+#ifdef CONFIG_MALI_DEVFREQ -+ struct mali_device *mdev = dev_get_drvdata(dev); -+ if (!mdev) -+ return -ENODEV; -+#endif ++ ret = wait->activated; + -+ pm_runtime_force_suspend(dev); -+#if defined(CONFIG_MALI_DEVFREQ) && \ -+ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) -+ devfreq_suspend_device(mdev->devfreq); -+#endif ++ if (0 == _mali_osk_atomic_dec_return(&wait->refcount)) { ++ mali_timeline_fence_wait_tracker_free(wait); ++ } + -+ mali_pm_os_suspend(MALI_TRUE); -+ /* Tracing the frequency and voltage after mali is suspended */ -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | -+ MALI_PROFILING_EVENT_CHANNEL_GPU | -+ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, -+ 0, -+ 0, -+ 0, 0, 0); -+ return 0; ++ return ret; +} + -+static int mali_driver_resume_scheduler(struct device *dev) ++void mali_timeline_fence_wait_activate(struct mali_timeline_fence_wait_tracker *wait) +{ -+#ifdef CONFIG_MALI_DEVFREQ -+ struct mali_device *mdev = dev_get_drvdata(dev); -+ if (!mdev) -+ return -ENODEV; -+#endif -+ -+ /* Tracing the frequency and voltage after mali is resumed */ -+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS) -+ /* Just call mali_get_current_gpu_clk_item() once,to record current clk info.*/ -+ if (is_first_resume == 1) { -+ mali_get_current_gpu_clk_item(&mali_gpu_clk[1]); -+ is_first_resume = 0; -+ } -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | -+ MALI_PROFILING_EVENT_CHANNEL_GPU | -+ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, -+ mali_gpu_clk[1].clock, -+ mali_gpu_clk[1].vol / 1000, -+ 0, 0, 0); -+#endif -+ mali_pm_os_resume(); ++ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; + -+#if defined(CONFIG_MALI_DEVFREQ) && \ -+ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) -+ devfreq_resume_device(mdev->devfreq); -+#endif -+ pm_runtime_force_resume(dev); ++ MALI_DEBUG_ASSERT_POINTER(wait); ++ MALI_DEBUG_ASSERT_POINTER(wait->system); + -+ return 0; -+} ++ MALI_DEBUG_PRINT(4, ("Mali Timeline: activation for fence wait tracker\n")); + -+#ifdef CONFIG_PM_RUNTIME -+static int mali_driver_runtime_suspend(struct device *dev) -+{ -+#ifdef CONFIG_MALI_DEVFREQ -+ struct mali_device *mdev = dev_get_drvdata(dev); -+ if (!mdev) -+ return -ENODEV; -+#endif ++ MALI_DEBUG_ASSERT(MALI_FALSE == wait->activated); ++ wait->activated = MALI_TRUE; + -+ if (MALI_TRUE == mali_pm_runtime_suspend()) { -+ /* Tracing the frequency and voltage after mali is suspended */ -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | -+ MALI_PROFILING_EVENT_CHANNEL_GPU | -+ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, -+ 0, -+ 0, -+ 0, 0, 0); ++ _mali_osk_wait_queue_wake_up(wait->system->wait_queue); + -+#if defined(CONFIG_MALI_DEVFREQ) && \ -+ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) -+ MALI_DEBUG_PRINT(4, ("devfreq_suspend_device: stop devfreq monitor\n")); -+ devfreq_suspend_device(mdev->devfreq); -+#endif ++ /* Nothing can wait on this tracker, so nothing to schedule after release. */ ++ schedule_mask = mali_timeline_tracker_release(&wait->tracker); ++ MALI_DEBUG_ASSERT(MALI_SCHEDULER_MASK_EMPTY == schedule_mask); ++ MALI_IGNORE(schedule_mask); + -+ return 0; -+ } else { -+ return -EBUSY; ++ if (0 == _mali_osk_atomic_dec_return(&wait->refcount)) { ++ mali_timeline_fence_wait_tracker_free(wait); + } +} +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_timeline_fence_wait.h b/drivers/gpu/arm/mali400/mali/common/mali_timeline_fence_wait.h +new file mode 100644 +index 000000000..9da12baee +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_timeline_fence_wait.h +@@ -0,0 +1,67 @@ ++/* ++ * Copyright (C) 2013, 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+static int mali_driver_runtime_resume(struct device *dev) -+{ -+#ifdef CONFIG_MALI_DEVFREQ -+ struct mali_device *mdev = dev_get_drvdata(dev); -+ if (!mdev) -+ return -ENODEV; -+#endif -+ -+ /* Tracing the frequency and voltage after mali is resumed */ -+#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS) -+ /* Just call mali_get_current_gpu_clk_item() once,to record current clk info.*/ -+ if (is_first_resume == 1) { -+ mali_get_current_gpu_clk_item(&mali_gpu_clk[1]); -+ is_first_resume = 0; -+ } -+ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | -+ MALI_PROFILING_EVENT_CHANNEL_GPU | -+ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, -+ mali_gpu_clk[1].clock, -+ mali_gpu_clk[1].vol / 1000, -+ 0, 0, 0); -+#endif -+ -+ mali_pm_runtime_resume(); ++/** ++ * @file mali_timeline_fence_wait.h ++ * ++ * This file contains functions used to wait until a Timeline fence is signaled. ++ */ + -+#if defined(CONFIG_MALI_DEVFREQ) && \ -+ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) -+ MALI_DEBUG_PRINT(4, ("devfreq_resume_device: start devfreq monitor\n")); -+ devfreq_resume_device(mdev->devfreq); -+#endif -+ return 0; -+} ++#ifndef __MALI_TIMELINE_FENCE_WAIT_H__ ++#define __MALI_TIMELINE_FENCE_WAIT_H__ + -+static int mali_driver_runtime_idle(struct device *dev) -+{ -+ /* Nothing to do */ -+ return 0; -+} -+#endif ++#include "mali_osk.h" ++#include "mali_timeline.h" + -+static int mali_open(struct inode *inode, struct file *filp) -+{ -+ struct mali_session_data *session_data; -+ _mali_osk_errcode_t err; ++/** ++ * If used as the timeout argument in @ref mali_timeline_fence_wait, a timer is not used and the ++ * function only returns when the fence is signaled. ++ */ ++#define MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER ((u32) -1) + -+ /* input validation */ -+ if (mali_miscdevice.minor != iminor(inode)) { -+ MALI_PRINT_ERROR(("mali_open() Minor does not match\n")); -+ return -ENODEV; -+ } ++/** ++ * If used as the timeout argument in @ref mali_timeline_fence_wait, the function will return ++ * immediately with the current state of the fence. ++ */ ++#define MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY 0 + -+ /* allocated struct to track this session */ -+ err = _mali_ukk_open((void **)&session_data); -+ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); ++/** ++ * Fence wait tracker. ++ * ++ * The fence wait tracker is added to the Timeline system with the fence we are waiting on as a ++ * dependency. We will then perform a blocking wait, possibly with a timeout, until the tracker is ++ * activated, which happens when the fence is signaled. ++ */ ++struct mali_timeline_fence_wait_tracker { ++ mali_bool activated; /**< MALI_TRUE if the tracker has been activated, MALI_FALSE if not. */ ++ _mali_osk_atomic_t refcount; /**< Reference count. */ ++ struct mali_timeline_system *system; /**< Timeline system. */ ++ struct mali_timeline_tracker tracker; /**< Timeline tracker. */ ++}; + -+ /* initialize file pointer */ -+ filp->f_pos = 0; ++/** ++ * Wait for a fence to be signaled, or timeout is reached. ++ * ++ * @param system Timeline system. ++ * @param fence Fence to wait on. ++ * @param timeout Timeout in ms, or MALI_TIMELINE_FENCE_WAIT_TIMEOUT_NEVER or ++ * MALI_TIMELINE_FENCE_WAIT_TIMEOUT_IMMEDIATELY. ++ * @return MALI_TRUE if signaled, MALI_FALSE if timed out. ++ */ ++mali_bool mali_timeline_fence_wait(struct mali_timeline_system *system, struct mali_timeline_fence *fence, u32 timeout); + -+ /* link in our session data */ -+ filp->private_data = (void *)session_data; ++/** ++ * Used by the Timeline system to activate a fence wait tracker. ++ * ++ * @param fence_wait_tracker Fence waiter tracker. ++ */ ++void mali_timeline_fence_wait_activate(struct mali_timeline_fence_wait_tracker *fence_wait_tracker); + -+ filp->f_mapping = mali_mem_swap_get_global_swap_file()->f_mapping; ++#endif /* __MALI_TIMELINE_FENCE_WAIT_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_timeline_sync_fence.c b/drivers/gpu/arm/mali400/mali/common/mali_timeline_sync_fence.c +new file mode 100644 +index 000000000..bb7f6a04e +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_timeline_sync_fence.c +@@ -0,0 +1,179 @@ ++/* ++ * Copyright (C) 2013, 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ ++#include ++#include "mali_timeline_sync_fence.h" + -+ return 0; -+} ++#include "mali_osk.h" ++#include "mali_kernel_common.h" ++#include "mali_sync.h" + -+static int mali_release(struct inode *inode, struct file *filp) ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++/** ++ * Creates a sync fence tracker and a sync fence. Adds sync fence tracker to Timeline system and ++ * returns sync fence. The sync fence will be signaled when the sync fence tracker is activated. ++ * ++ * @param timeline Timeline. ++ * @param point Point on timeline. ++ * @return Sync fence that will be signaled when tracker is activated. ++ */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++static struct sync_fence *mali_timeline_sync_fence_create_and_add_tracker(struct mali_timeline *timeline, mali_timeline_point point) ++#else ++static struct mali_internal_sync_fence *mali_timeline_sync_fence_create_and_add_tracker(struct mali_timeline *timeline, mali_timeline_point point) ++#endif +{ -+ _mali_osk_errcode_t err; -+ -+ /* input validation */ -+ if (mali_miscdevice.minor != iminor(inode)) { -+ MALI_PRINT_ERROR(("mali_release() Minor does not match\n")); -+ return -ENODEV; -+ } -+ -+ err = _mali_ukk_close((void **)&filp->private_data); -+ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); ++ struct mali_timeline_sync_fence_tracker *sync_fence_tracker; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ struct sync_fence *sync_fence; ++#else ++ struct mali_internal_sync_fence *sync_fence; ++#endif ++ struct mali_timeline_fence fence; + -+ return 0; -+} ++ MALI_DEBUG_ASSERT_POINTER(timeline); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT != point); + -+int map_errcode(_mali_osk_errcode_t err) -+{ -+ switch (err) { -+ case _MALI_OSK_ERR_OK : -+ return 0; -+ case _MALI_OSK_ERR_FAULT: -+ return -EFAULT; -+ case _MALI_OSK_ERR_INVALID_FUNC: -+ return -ENOTTY; -+ case _MALI_OSK_ERR_INVALID_ARGS: -+ return -EINVAL; -+ case _MALI_OSK_ERR_NOMEM: -+ return -ENOMEM; -+ case _MALI_OSK_ERR_TIMEOUT: -+ return -ETIMEDOUT; -+ case _MALI_OSK_ERR_RESTARTSYSCALL: -+ return -ERESTARTSYS; -+ case _MALI_OSK_ERR_ITEM_NOT_FOUND: -+ return -ENOENT; -+ default: -+ return -EFAULT; ++ /* Allocate sync fence tracker. */ ++ sync_fence_tracker = _mali_osk_calloc(1, sizeof(struct mali_timeline_sync_fence_tracker)); ++ if (NULL == sync_fence_tracker) { ++ MALI_PRINT_ERROR(("Mali Timeline: sync_fence_tracker allocation failed\n")); ++ return NULL; + } -+} -+ -+static long mali_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -+{ -+ int err; -+ struct mali_session_data *session_data; -+ -+ MALI_DEBUG_PRINT(7, ("Ioctl received 0x%08X 0x%08lX\n", cmd, arg)); + -+ session_data = (struct mali_session_data *)filp->private_data; -+ if (NULL == session_data) { -+ MALI_DEBUG_PRINT(7, ("filp->private_data was NULL\n")); -+ return -ENOTTY; ++ /* Create sync flag. */ ++ MALI_DEBUG_ASSERT_POINTER(timeline->sync_tl); ++ sync_fence_tracker->flag = mali_sync_flag_create(timeline->sync_tl, point); ++ if (NULL == sync_fence_tracker->flag) { ++ MALI_PRINT_ERROR(("Mali Timeline: sync_flag creation failed\n")); ++ _mali_osk_free(sync_fence_tracker); ++ return NULL; + } + -+ if (NULL == (void *)arg) { -+ MALI_DEBUG_PRINT(7, ("arg was NULL\n")); -+ return -ENOTTY; ++ /* Create sync fence from sync flag. */ ++ sync_fence = mali_sync_flag_create_fence(sync_fence_tracker->flag); ++ if (NULL == sync_fence) { ++ MALI_PRINT_ERROR(("Mali Timeline: sync_fence creation failed\n")); ++ mali_sync_flag_put(sync_fence_tracker->flag); ++ _mali_osk_free(sync_fence_tracker); ++ return NULL; + } + -+ switch (cmd) { -+ case MALI_IOC_WAIT_FOR_NOTIFICATION: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_wait_for_notification_s), sizeof(u64))); -+ err = wait_for_notification_wrapper(session_data, (_mali_uk_wait_for_notification_s __user *)arg); -+ break; -+ -+ case MALI_IOC_GET_API_VERSION_V2: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_api_version_v2_s), sizeof(u64))); -+ err = get_api_version_v2_wrapper(session_data, (_mali_uk_get_api_version_v2_s __user *)arg); -+ break; ++ /* Setup fence for tracker. */ ++ _mali_osk_memset(&fence, 0, sizeof(struct mali_timeline_fence)); ++ fence.sync_fd = -1; ++ fence.points[timeline->id] = point; + -+ case MALI_IOC_GET_API_VERSION: -+ err = get_api_version_wrapper(session_data, (_mali_uk_get_api_version_s __user *)arg); -+ break; ++ /* Finally, add the tracker to Timeline system. */ ++ mali_timeline_tracker_init(&sync_fence_tracker->tracker, MALI_TIMELINE_TRACKER_SYNC, &fence, sync_fence_tracker); ++ point = mali_timeline_system_add_tracker(timeline->system, &sync_fence_tracker->tracker, MALI_TIMELINE_NONE); ++ MALI_DEBUG_ASSERT(MALI_TIMELINE_NO_POINT == point); + -+ case MALI_IOC_POST_NOTIFICATION: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_post_notification_s), sizeof(u64))); -+ err = post_notification_wrapper(session_data, (_mali_uk_post_notification_s __user *)arg); -+ break; ++ return sync_fence; ++} + -+ /* rk_ext : 从对 r5p0-01rel0 集æˆå¼€å§‹, ä¸å†ä½¿ç”¨. */ -+#if 0 -+ case MALI_IOC_GET_MALI_VERSION_IN_RK30: -+ err = get_mali_version_in_rk30_wrapper(session_data, (_mali_uk_get_mali_version_in_rk30_s __user *)arg); -+ break; ++s32 mali_timeline_sync_fence_create(struct mali_timeline_system *system, struct mali_timeline_fence *fence) ++{ ++ u32 i; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ struct sync_fence *sync_fence_acc = NULL; +#else -+ case MALI_IOC_GET_RK_KO_VERSION: -+ err = get_rk_ko_version_wrapper(session_data, (_mali_rk_ko_version_s __user *)arg); -+ break; ++ struct mali_internal_sync_fence *sync_fence_acc = NULL; +#endif -+ -+ case MALI_IOC_GET_USER_SETTINGS: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_user_settings_s), sizeof(u64))); -+ err = get_user_settings_wrapper(session_data, (_mali_uk_get_user_settings_s __user *)arg); -+ break; -+ -+ case MALI_IOC_REQUEST_HIGH_PRIORITY: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_request_high_priority_s), sizeof(u64))); -+ err = request_high_priority_wrapper(session_data, (_mali_uk_request_high_priority_s __user *)arg); -+ break; -+ -+ case MALI_IOC_PENDING_SUBMIT: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pending_submit_s), sizeof(u64))); -+ err = pending_submit_wrapper(session_data, (_mali_uk_pending_submit_s __user *)arg); -+ break; -+ -+#if defined(CONFIG_MALI400_PROFILING) -+ case MALI_IOC_PROFILING_ADD_EVENT: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_add_event_s), sizeof(u64))); -+ err = profiling_add_event_wrapper(session_data, (_mali_uk_profiling_add_event_s __user *)arg); -+ break; -+ -+ case MALI_IOC_PROFILING_REPORT_SW_COUNTERS: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_sw_counters_report_s), sizeof(u64))); -+ err = profiling_report_sw_counters_wrapper(session_data, (_mali_uk_sw_counters_report_s __user *)arg); -+ break; -+ -+ case MALI_IOC_PROFILING_STREAM_FD_GET: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_stream_fd_get_s), sizeof(u64))); -+ err = profiling_get_stream_fd_wrapper(session_data, (_mali_uk_profiling_stream_fd_get_s __user *)arg); -+ break; ++ MALI_DEBUG_ASSERT_POINTER(system); ++ MALI_DEBUG_ASSERT_POINTER(fence); + -+ case MALI_IOC_PROILING_CONTROL_SET: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_control_set_s), sizeof(u64))); -+ err = profiling_control_set_wrapper(session_data, (_mali_uk_profiling_control_set_s __user *)arg); -+ break; ++ for (i = 0; i < MALI_TIMELINE_MAX; ++i) { ++ struct mali_timeline *timeline; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ struct sync_fence *sync_fence; +#else -+ -+ case MALI_IOC_PROFILING_ADD_EVENT: /* FALL-THROUGH */ -+ case MALI_IOC_PROFILING_REPORT_SW_COUNTERS: /* FALL-THROUGH */ -+ MALI_DEBUG_PRINT(2, ("Profiling not supported\n")); -+ err = -ENOTTY; -+ break; ++ struct mali_internal_sync_fence *sync_fence; +#endif ++ if (MALI_TIMELINE_NO_POINT == fence->points[i]) continue; + -+ case MALI_IOC_PROFILING_MEMORY_USAGE_GET: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_memory_usage_get_s), sizeof(u64))); -+ err = mem_usage_get_wrapper(session_data, (_mali_uk_profiling_memory_usage_get_s __user *)arg); -+ break; -+ -+ case MALI_IOC_MEM_ALLOC: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_alloc_mem_s), sizeof(u64))); -+ err = mem_alloc_wrapper(session_data, (_mali_uk_alloc_mem_s __user *)arg); -+ break; ++ timeline = system->timelines[i]; ++ MALI_DEBUG_ASSERT_POINTER(timeline); + -+ case MALI_IOC_MEM_FREE: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_free_mem_s), sizeof(u64))); -+ err = mem_free_wrapper(session_data, (_mali_uk_free_mem_s __user *)arg); -+ break; ++ sync_fence = mali_timeline_sync_fence_create_and_add_tracker(timeline, fence->points[i]); ++ if (NULL == sync_fence) goto error; + -+ case MALI_IOC_MEM_BIND: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_bind_mem_s), sizeof(u64))); -+ err = mem_bind_wrapper(session_data, (_mali_uk_bind_mem_s __user *)arg); -+ break; ++ if (NULL != sync_fence_acc) { ++ /* Merge sync fences. */ ++ sync_fence_acc = mali_sync_fence_merge(sync_fence_acc, sync_fence); ++ if (NULL == sync_fence_acc) goto error; ++ } else { ++ /* This was the first sync fence created. */ ++ sync_fence_acc = sync_fence; ++ } ++ } + -+ case MALI_IOC_MEM_UNBIND: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_unbind_mem_s), sizeof(u64))); -+ err = mem_unbind_wrapper(session_data, (_mali_uk_unbind_mem_s __user *)arg); -+ break; ++ if (-1 != fence->sync_fd) { ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ struct sync_fence *sync_fence; ++ sync_fence = sync_fence_fdget(fence->sync_fd); ++#else ++ struct mali_internal_sync_fence *sync_fence; ++ sync_fence = mali_internal_sync_fence_fdget(fence->sync_fd); ++#endif + -+ case MALI_IOC_MEM_COW: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_cow_mem_s), sizeof(u64))); -+ err = mem_cow_wrapper(session_data, (_mali_uk_cow_mem_s __user *)arg); -+ break; ++ if (NULL == sync_fence) goto error; + -+ case MALI_IOC_MEM_COW_MODIFY_RANGE: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_cow_modify_range_s), sizeof(u64))); -+ err = mem_cow_modify_range_wrapper(session_data, (_mali_uk_cow_modify_range_s __user *)arg); -+ break; ++ if (NULL != sync_fence_acc) { ++ sync_fence_acc = mali_sync_fence_merge(sync_fence_acc, sync_fence); ++ if (NULL == sync_fence_acc) goto error; ++ } else { ++ sync_fence_acc = sync_fence; ++ } ++ } + -+ case MALI_IOC_MEM_RESIZE: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_mem_resize_s), sizeof(u64))); -+ err = mem_resize_mem_wrapper(session_data, (_mali_uk_mem_resize_s __user *)arg); -+ break; ++ if (NULL == sync_fence_acc) { ++ MALI_DEBUG_ASSERT_POINTER(system->signaled_sync_tl); + -+ case MALI_IOC_MEM_WRITE_SAFE: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_mem_write_safe_s), sizeof(u64))); -+ err = mem_write_safe_wrapper(session_data, (_mali_uk_mem_write_safe_s __user *)arg); -+ break; ++ /* There was nothing to wait on, so return an already signaled fence. */ + -+ case MALI_IOC_MEM_QUERY_MMU_PAGE_TABLE_DUMP_SIZE: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_query_mmu_page_table_dump_size_s), sizeof(u64))); -+ err = mem_query_mmu_page_table_dump_size_wrapper(session_data, (_mali_uk_query_mmu_page_table_dump_size_s __user *)arg); -+ break; ++ sync_fence_acc = mali_sync_timeline_create_signaled_fence(system->signaled_sync_tl); ++ if (NULL == sync_fence_acc) goto error; ++ } + -+ case MALI_IOC_MEM_DUMP_MMU_PAGE_TABLE: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_dump_mmu_page_table_s), sizeof(u64))); -+ err = mem_dump_mmu_page_table_wrapper(session_data, (_mali_uk_dump_mmu_page_table_s __user *)arg); -+ break; ++ /* Return file descriptor for the accumulated sync fence. */ ++ return mali_sync_fence_fd_alloc(sync_fence_acc); + -+ case MALI_IOC_MEM_DMA_BUF_GET_SIZE: -+#ifdef CONFIG_DMA_SHARED_BUFFER -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_dma_buf_get_size_s), sizeof(u64))); -+ err = mali_dma_buf_get_size(session_data, (_mali_uk_dma_buf_get_size_s __user *)arg); ++error: ++ if (NULL != sync_fence_acc) { ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ sync_fence_put(sync_fence_acc); +#else -+ MALI_DEBUG_PRINT(2, ("DMA-BUF not supported\n")); -+ err = -ENOTTY; ++ fput(sync_fence_acc->file); +#endif -+ break; -+ -+ case MALI_IOC_PP_START_JOB: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_start_job_s), sizeof(u64))); -+ err = pp_start_job_wrapper(session_data, (_mali_uk_pp_start_job_s __user *)arg); -+ break; -+ -+ case MALI_IOC_PP_AND_GP_START_JOB: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_and_gp_start_job_s), sizeof(u64))); -+ err = pp_and_gp_start_job_wrapper(session_data, (_mali_uk_pp_and_gp_start_job_s __user *)arg); -+ break; -+ -+ case MALI_IOC_PP_NUMBER_OF_CORES_GET: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_pp_number_of_cores_s), sizeof(u64))); -+ err = pp_get_number_of_cores_wrapper(session_data, (_mali_uk_get_pp_number_of_cores_s __user *)arg); -+ break; -+ -+ case MALI_IOC_PP_CORE_VERSION_GET: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_pp_core_version_s), sizeof(u64))); -+ err = pp_get_core_version_wrapper(session_data, (_mali_uk_get_pp_core_version_s __user *)arg); -+ break; -+ -+ case MALI_IOC_PP_DISABLE_WB: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_disable_wb_s), sizeof(u64))); -+ err = pp_disable_wb_wrapper(session_data, (_mali_uk_pp_disable_wb_s __user *)arg); -+ break; -+ -+ case MALI_IOC_GP2_START_JOB: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_gp_start_job_s), sizeof(u64))); -+ err = gp_start_job_wrapper(session_data, (_mali_uk_gp_start_job_s __user *)arg); -+ break; ++ } + -+ case MALI_IOC_GP2_NUMBER_OF_CORES_GET: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_gp_number_of_cores_s), sizeof(u64))); -+ err = gp_get_number_of_cores_wrapper(session_data, (_mali_uk_get_gp_number_of_cores_s __user *)arg); -+ break; ++ return -1; ++} + -+ case MALI_IOC_GP2_CORE_VERSION_GET: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_gp_core_version_s), sizeof(u64))); -+ err = gp_get_core_version_wrapper(session_data, (_mali_uk_get_gp_core_version_s __user *)arg); -+ break; ++void mali_timeline_sync_fence_activate(struct mali_timeline_sync_fence_tracker *sync_fence_tracker) ++{ ++ mali_scheduler_mask schedule_mask = MALI_SCHEDULER_MASK_EMPTY; + -+ case MALI_IOC_GP2_SUSPEND_RESPONSE: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_gp_suspend_response_s), sizeof(u64))); -+ err = gp_suspend_response_wrapper(session_data, (_mali_uk_gp_suspend_response_s __user *)arg); -+ break; ++ MALI_DEBUG_ASSERT_POINTER(sync_fence_tracker); ++ MALI_DEBUG_ASSERT_POINTER(sync_fence_tracker->flag); + -+ case MALI_IOC_VSYNC_EVENT_REPORT: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_vsync_event_report_s), sizeof(u64))); -+ err = vsync_event_report_wrapper(session_data, (_mali_uk_vsync_event_report_s __user *)arg); -+ break; ++ MALI_DEBUG_PRINT(4, ("Mali Timeline: activation for sync fence tracker\n")); + -+ case MALI_IOC_TIMELINE_GET_LATEST_POINT: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_get_latest_point_s), sizeof(u64))); -+ err = timeline_get_latest_point_wrapper(session_data, (_mali_uk_timeline_get_latest_point_s __user *)arg); -+ break; -+ case MALI_IOC_TIMELINE_WAIT: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_wait_s), sizeof(u64))); -+ err = timeline_wait_wrapper(session_data, (_mali_uk_timeline_wait_s __user *)arg); -+ break; -+ case MALI_IOC_TIMELINE_CREATE_SYNC_FENCE: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_create_sync_fence_s), sizeof(u64))); -+ err = timeline_create_sync_fence_wrapper(session_data, (_mali_uk_timeline_create_sync_fence_s __user *)arg); -+ break; -+ case MALI_IOC_SOFT_JOB_START: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_soft_job_start_s), sizeof(u64))); -+ err = soft_job_start_wrapper(session_data, (_mali_uk_soft_job_start_s __user *)arg); -+ break; -+ case MALI_IOC_SOFT_JOB_SIGNAL: -+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_soft_job_signal_s), sizeof(u64))); -+ err = soft_job_signal_wrapper(session_data, (_mali_uk_soft_job_signal_s __user *)arg); -+ break; ++ /* Signal flag and release reference. */ ++ mali_sync_flag_signal(sync_fence_tracker->flag, 0); ++ mali_sync_flag_put(sync_fence_tracker->flag); + -+ default: -+ MALI_DEBUG_PRINT(2, ("No handler for ioctl 0x%08X 0x%08lX\n", cmd, arg)); -+ err = -ENOTTY; -+ }; ++ /* Nothing can wait on this tracker, so nothing to schedule after release. */ ++ schedule_mask = mali_timeline_tracker_release(&sync_fence_tracker->tracker); ++ MALI_DEBUG_ASSERT(MALI_SCHEDULER_MASK_EMPTY == schedule_mask); + -+ return err; ++ _mali_osk_free(sync_fence_tracker); +} -+ -+late_initcall_sync(mali_module_init); -+module_exit(mali_module_exit); -+ -+MODULE_LICENSE(MALI_KERNEL_LINUX_LICENSE); -+MODULE_AUTHOR("ARM Ltd."); -+MODULE_VERSION(SVN_REV_STRING); -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_kernel_linux.h b/drivers/gpu/arm/mali400/mali/linux/mali_kernel_linux.h ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_timeline_sync_fence.h b/drivers/gpu/arm/mali400/mali/common/mali_timeline_sync_fence.h new file mode 100644 -index 000000000..be754cb15 +index 000000000..65e368ae7 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_kernel_linux.h -@@ -0,0 +1,36 @@ ++++ b/drivers/gpu/arm/mali400/mali/common/mali_timeline_sync_fence.h +@@ -0,0 +1,51 @@ +/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2013, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -309417,40 +312278,55 @@ index 000000000..be754cb15 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_KERNEL_LINUX_H__ -+#define __MALI_KERNEL_LINUX_H__ ++/** ++ * @file mali_timeline_sync_fence.h ++ * ++ * This file contains code related to creating sync fences from timeline fences. ++ */ + -+#ifdef __cplusplus -+extern "C" { -+#endif ++#ifndef __MALI_TIMELINE_SYNC_FENCE_H__ ++#define __MALI_TIMELINE_SYNC_FENCE_H__ + -+#include /* character device definitions */ -+#include -+#include -+#include "mali_kernel_license.h" -+#include "mali_osk_types.h" -+#include ++#include "mali_timeline.h" + -+extern struct platform_device *mali_platform_device; ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) + -+/* After 3.19.0 kenrel droped CONFIG_PM_RUNTIME define,define by ourself */ -+#if defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) -+#define CONFIG_PM_RUNTIME 1 -+#endif ++/** ++ * Sync fence tracker. ++ */ ++struct mali_timeline_sync_fence_tracker { ++ struct mali_sync_flag *flag; /**< Sync flag used to connect tracker and sync fence. */ ++ struct mali_timeline_tracker tracker; /**< Timeline tracker. */ ++}; + -+#ifdef __cplusplus -+} -+#endif ++/** ++ * Create a sync fence that will be signaled when @ref fence is signaled. ++ * ++ * @param system Timeline system. ++ * @param fence Fence to create sync fence from. ++ * @return File descriptor for new sync fence, or -1 on error. ++ */ ++s32 mali_timeline_sync_fence_create(struct mali_timeline_system *system, struct mali_timeline_fence *fence); + -+#endif /* __MALI_KERNEL_LINUX_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_kernel_sysfs.c b/drivers/gpu/arm/mali400/mali/linux/mali_kernel_sysfs.c ++/** ++ * Used by the Timeline system to activate a sync fence tracker. ++ * ++ * @param sync_fence_tracker Sync fence tracker. ++ * ++ */ ++void mali_timeline_sync_fence_activate(struct mali_timeline_sync_fence_tracker *sync_fence_tracker); ++ ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++ ++#endif /* __MALI_TIMELINE_SYNC_FENCE_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_ukk.h b/drivers/gpu/arm/mali400/mali/common/mali_ukk.h new file mode 100644 -index 000000000..7bda438fe +index 000000000..55a05c504 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_kernel_sysfs.c -@@ -0,0 +1,1410 @@ -+/** -+ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. ++++ b/drivers/gpu/arm/mali400/mali/common/mali_ukk.h +@@ -0,0 +1,551 @@ ++/* ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -309459,1414 +312335,1285 @@ index 000000000..7bda438fe + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+ +/** -+ * @file mali_kernel_sysfs.c -+ * Implementation of some sysfs data exports ++ * @file mali_ukk.h ++ * Defines the kernel-side interface of the user-kernel interface + */ + -+#include -+#include -+#include -+#include -+#include "mali_kernel_license.h" -+#include "mali_kernel_common.h" -+#include "mali_ukk.h" ++#ifndef __MALI_UKK_H__ ++#define __MALI_UKK_H__ + -+#if MALI_LICENSE_IS_GPL ++#include "mali_osk.h" ++#include "mali_uk_types.h" + -+#include -+#include -+#include -+#include -+#include -+#include "mali_kernel_sysfs.h" -+#if defined(CONFIG_MALI400_INTERNAL_PROFILING) -+#include -+#include "mali_osk_profiling.h" ++#ifdef __cplusplus ++extern "C" { +#endif + -+#include -+#include "mali_pm.h" -+#include "mali_pmu.h" -+#include "mali_group.h" -+#include "mali_gp.h" -+#include "mali_pp.h" -+#include "mali_l2_cache.h" -+#include "mali_hw_core.h" -+#include "mali_kernel_core.h" -+#include "mali_user_settings_db.h" -+#include "mali_profiling_internal.h" -+#include "mali_gp_job.h" -+#include "mali_pp_job.h" -+#include "mali_executor.h" -+ -+#define PRIVATE_DATA_COUNTER_MAKE_GP(src) (src) -+#define PRIVATE_DATA_COUNTER_MAKE_PP(src) ((1 << 24) | src) -+#define PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(src, sub_job) ((1 << 24) | (1 << 16) | (sub_job << 8) | src) -+#define PRIVATE_DATA_COUNTER_IS_PP(a) ((((a) >> 24) & 0xFF) ? MALI_TRUE : MALI_FALSE) -+#define PRIVATE_DATA_COUNTER_GET_SRC(a) (a & 0xFF) -+#define PRIVATE_DATA_COUNTER_IS_SUB_JOB(a) ((((a) >> 16) & 0xFF) ? MALI_TRUE : MALI_FALSE) -+#define PRIVATE_DATA_COUNTER_GET_SUB_JOB(a) (((a) >> 8) & 0xFF) ++/** ++ * @addtogroup uddapi Unified Device Driver (UDD) APIs ++ * ++ * @{ ++ */ + -+#define POWER_BUFFER_SIZE 3 ++/** ++ * @addtogroup u_k_api UDD User/Kernel Interface (U/K) APIs ++ * ++ * - The _mali_uk functions are an abstraction of the interface to the device ++ * driver. On certain OSs, this would be implemented via the IOCTL interface. ++ * On other OSs, it could be via extension of some Device Driver Class, or ++ * direct function call for Bare metal/RTOSs. ++ * - It is important to note that: ++ * - The Device Driver has implemented the _mali_ukk set of functions ++ * - The Base Driver calls the corresponding set of _mali_uku functions. ++ * - What requires porting is solely the calling mechanism from User-side to ++ * Kernel-side, and propagating back the results. ++ * - Each U/K function is associated with a (group, number) pair from ++ * \ref _mali_uk_functions to make it possible for a common function in the ++ * Base Driver and Device Driver to route User/Kernel calls from/to the ++ * correct _mali_uk function. For example, in an IOCTL system, the IOCTL number ++ * would be formed based on the group and number assigned to the _mali_uk ++ * function, as listed in \ref _mali_uk_functions. On the user-side, each ++ * _mali_uku function would just make an IOCTL with the IOCTL-code being an ++ * encoded form of the (group, number) pair. On the kernel-side, the Device ++ * Driver's IOCTL handler decodes the IOCTL-code back into a (group, number) ++ * pair, and uses this to determine which corresponding _mali_ukk should be ++ * called. ++ * - Refer to \ref _mali_uk_functions for more information about this ++ * (group, number) pairing. ++ * - In a system where there is no distinction between user and kernel-side, ++ * the U/K interface may be implemented as:@code ++ * MALI_STATIC_INLINE _mali_osk_errcode_t _mali_uku_examplefunction( _mali_uk_examplefunction_s *args ) ++ * { ++ * return mali_ukk_examplefunction( args ); ++ * } ++ * @endcode ++ * - Therefore, all U/K calls behave \em as \em though they were direct ++ * function calls (but the \b implementation \em need \em not be a direct ++ * function calls) ++ * ++ * @note Naming the _mali_uk functions the same on both User and Kernel sides ++ * on non-RTOS systems causes debugging issues when setting breakpoints. In ++ * this case, it is not clear which function the breakpoint is put on. ++ * Therefore the _mali_uk functions in user space are prefixed with \c _mali_uku ++ * and in kernel space with \c _mali_ukk. The naming for the argument ++ * structures is unaffected. ++ * ++ * - The _mali_uk functions are synchronous. ++ * - Arguments to the _mali_uk functions are passed in a structure. The only ++ * parameter passed to the _mali_uk functions is a pointer to this structure. ++ * This first member of this structure, ctx, is a pointer to a context returned ++ * by _mali_uku_open(). For example:@code ++ * typedef struct ++ * { ++ * void *ctx; ++ * u32 number_of_cores; ++ * } _mali_uk_get_gp_number_of_cores_s; ++ * @endcode ++ * ++ * - Each _mali_uk function has its own argument structure named after the ++ * function. The argument is distinguished by the _s suffix. ++ * - The argument types are defined by the base driver and user-kernel ++ * interface. ++ * - All _mali_uk functions return a standard \ref _mali_osk_errcode_t. ++ * - Only arguments of type input or input/output need be initialized before ++ * calling a _mali_uk function. ++ * - Arguments of type output and input/output are only valid when the ++ * _mali_uk function returns \ref _MALI_OSK_ERR_OK. ++ * - The \c ctx member is always invalid after it has been used by a ++ * _mali_uk function, except for the context management functions ++ * ++ * ++ * \b Interface \b restrictions ++ * ++ * The requirements of the interface mean that an implementation of the ++ * User-kernel interface may do no 'real' work. For example, the following are ++ * illegal in the User-kernel implementation: ++ * - Calling functions necessary for operation on all systems, which would ++ * not otherwise get called on RTOS systems. ++ * - For example, a U/K interface that calls multiple _mali_ukk functions ++ * during one particular U/K call. This could not be achieved by the same code ++ * which uses direct function calls for the U/K interface. ++ * - Writing in values to the args members, when otherwise these members would ++ * not hold a useful value for a direct function call U/K interface. ++ * - For example, U/K interface implementation that take NULL members in ++ * their arguments structure from the user side, but those members are ++ * replaced with non-NULL values in the kernel-side of the U/K interface ++ * implementation. A scratch area for writing data is one such example. In this ++ * case, a direct function call U/K interface would segfault, because no code ++ * would be present to replace the NULL pointer with a meaningful pointer. ++ * - Note that we discourage the case where the U/K implementation changes ++ * a NULL argument member to non-NULL, and then the Device Driver code (outside ++ * of the U/K layer) re-checks this member for NULL, and corrects it when ++ * necessary. Whilst such code works even on direct function call U/K ++ * intefaces, it reduces the testing coverage of the Device Driver code. This ++ * is because we have no way of testing the NULL == value path on an OS ++ * implementation. ++ * ++ * A number of allowable examples exist where U/K interfaces do 'real' work: ++ * - The 'pointer switching' technique for \ref _mali_ukk_get_system_info ++ * - In this case, without the pointer switching on direct function call ++ * U/K interface, the Device Driver code still sees the same thing: a pointer ++ * to which it can write memory. This is because such a system has no ++ * distinction between a user and kernel pointer. ++ * - Writing an OS-specific value into the ukk_private member for ++ * _mali_ukk_mem_mmap(). ++ * - In this case, this value is passed around by Device Driver code, but ++ * its actual value is never checked. Device Driver code simply passes it from ++ * the U/K layer to the OSK layer, where it can be acted upon. In this case, ++ * \em some OS implementations of the U/K (_mali_ukk_mem_mmap()) and OSK ++ * (_mali_osk_mem_mapregion_init()) functions will collaborate on the ++ * meaning of ukk_private member. On other OSs, it may be unused by both ++ * U/K and OSK layers ++ * - Therefore, on error inside the U/K interface implementation itself, ++ * it will be as though the _mali_ukk function itself had failed, and cleaned ++ * up after itself. ++ * - Compare this to a direct function call U/K implementation, where all ++ * error cleanup is handled by the _mali_ukk function itself. The direct ++ * function call U/K interface implementation is automatically atomic. ++ * ++ * The last example highlights a consequence of all U/K interface ++ * implementations: they must be atomic with respect to the Device Driver code. ++ * And therefore, should Device Driver code succeed but the U/K implementation ++ * fail afterwards (but before return to user-space), then the U/K ++ * implementation must cause appropriate cleanup actions to preserve the ++ * atomicity of the interface. ++ * ++ * @{ ++ */ + -+static struct dentry *mali_debugfs_dir = NULL; + -+typedef enum { -+ _MALI_DEVICE_SUSPEND, -+ _MALI_DEVICE_RESUME, -+ _MALI_DEVICE_DVFS_PAUSE, -+ _MALI_DEVICE_DVFS_RESUME, -+ _MALI_MAX_EVENTS -+} _mali_device_debug_power_events; ++/** @defgroup _mali_uk_context U/K Context management ++ * ++ * These functions allow for initialisation of the user-kernel interface once per process. ++ * ++ * Generally the context will store the OS specific object to communicate with the kernel device driver and further ++ * state information required by the specific implementation. The context is shareable among all threads in the caller process. ++ * ++ * On IOCTL systems, this is likely to be a file descriptor as a result of opening the kernel device driver. ++ * ++ * On a bare-metal/RTOS system with no distinction between kernel and ++ * user-space, the U/K interface simply calls the _mali_ukk variant of the ++ * function by direct function call. In this case, the context returned is the ++ * mali_session_data from _mali_ukk_open(). ++ * ++ * The kernel side implementations of the U/K interface expect the first member of the argument structure to ++ * be the context created by _mali_uku_open(). On some OS implementations, the meaning of this context ++ * will be different between user-side and kernel-side. In which case, the kernel-side will need to replace this context ++ * with the kernel-side equivalent, because user-side will not have access to kernel-side data. The context parameter ++ * in the argument structure therefore has to be of type input/output. ++ * ++ * It should be noted that the caller cannot reuse the \c ctx member of U/K ++ * argument structure after a U/K call, because it may be overwritten. Instead, ++ * the context handle must always be stored elsewhere, and copied into ++ * the appropriate U/K argument structure for each user-side call to ++ * the U/K interface. This is not usually a problem, since U/K argument ++ * structures are usually placed on the stack. ++ * ++ * @{ */ + -+static const char *const mali_power_events[_MALI_MAX_EVENTS] = { -+ [_MALI_DEVICE_SUSPEND] = "suspend", -+ [_MALI_DEVICE_RESUME] = "resume", -+ [_MALI_DEVICE_DVFS_PAUSE] = "dvfs_pause", -+ [_MALI_DEVICE_DVFS_RESUME] = "dvfs_resume", -+}; ++/** @brief Begin a new Mali Device Driver session ++ * ++ * This is used to obtain a per-process context handle for all future U/K calls. ++ * ++ * @param context pointer to storage to return a (void*)context handle. ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_open(void **context); + -+static mali_bool power_always_on_enabled = MALI_FALSE; ++/** @brief End a Mali Device Driver session ++ * ++ * This should be called when the process no longer requires use of the Mali Device Driver. ++ * ++ * The context handle must not be used after it has been closed. ++ * ++ * @param context pointer to a stored (void*)context handle. ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_close(void **context); + -+static int open_copy_private_data(struct inode *inode, struct file *filp) -+{ -+ filp->private_data = inode->i_private; -+ return 0; -+} ++/** @} */ /* end group _mali_uk_context */ + -+static ssize_t group_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp) -+{ -+ int r; -+ char buffer[64]; -+ struct mali_group *group; + -+ group = (struct mali_group *)filp->private_data; -+ MALI_DEBUG_ASSERT_POINTER(group); ++/** @addtogroup _mali_uk_core U/K Core ++ * ++ * The core functions provide the following functionality: ++ * - verify that the user and kernel API are compatible ++ * - retrieve information about the cores and memory banks in the system ++ * - wait for the result of jobs started on a core ++ * ++ * @{ */ + -+ r = snprintf(buffer, 64, "%u\n", -+ mali_executor_group_is_disabled(group) ? 0 : 1); ++/** @brief Waits for a job notification. ++ * ++ * Sleeps until notified or a timeout occurs. Returns information about the notification. ++ * ++ * @param args see _mali_uk_wait_for_notification_s in "mali_utgard_uk_types.h" ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_wait_for_notification(_mali_uk_wait_for_notification_s *args); + -+ return simple_read_from_buffer(buf, count, offp, buffer, r); -+} ++/** @brief Post a notification to the notification queue of this application. ++ * ++ * @param args see _mali_uk_post_notification_s in "mali_utgard_uk_types.h" ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_post_notification(_mali_uk_post_notification_s *args); + -+static ssize_t group_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp) -+{ -+ int r; -+ char buffer[64]; -+ unsigned long val; -+ struct mali_group *group; ++/** @brief Verifies if the user and kernel side of this API are compatible. ++ * ++ * This function is obsolete, but kept to allow old, incompatible user space ++ * clients to robustly detect the incompatibility. ++ * ++ * @param args see _mali_uk_get_api_version_s in "mali_utgard_uk_types.h" ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_get_api_version(_mali_uk_get_api_version_s *args); + -+ group = (struct mali_group *)filp->private_data; -+ MALI_DEBUG_ASSERT_POINTER(group); ++/** @brief Verifies if the user and kernel side of this API are compatible. ++ * ++ * @param args see _mali_uk_get_api_version_v2_s in "mali_utgard_uk_types.h" ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_get_api_version_v2(_mali_uk_get_api_version_v2_s *args); + -+ if (count >= sizeof(buffer)) { -+ return -ENOMEM; -+ } ++/** @brief Get the user space settings applicable for calling process. ++ * ++ * @param args see _mali_uk_get_user_settings_s in "mali_utgard_uk_types.h" ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_get_user_settings(_mali_uk_get_user_settings_s *args); + -+ if (copy_from_user(&buffer[0], buf, count)) { -+ return -EFAULT; -+ } -+ buffer[count] = '\0'; ++/** @brief Get a user space setting applicable for calling process. ++ * ++ * @param args see _mali_uk_get_user_setting_s in "mali_utgard_uk_types.h" ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_get_user_setting(_mali_uk_get_user_setting_s *args); + -+ r = kstrtoul(&buffer[0], 10, &val); -+ if (0 != r) { -+ return -EINVAL; -+ } ++/* @brief Grant or deny high priority scheduling for this session. ++ * ++ * @param args see _mali_uk_request_high_priority_s in "mali_utgard_uk_types.h" ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_request_high_priority(_mali_uk_request_high_priority_s *args); + -+ switch (val) { -+ case 1: -+ mali_executor_group_enable(group); -+ break; -+ case 0: -+ mali_executor_group_disable(group); -+ break; -+ default: -+ return -EINVAL; -+ break; -+ } ++/** @brief Make process sleep if the pending big job in kernel >= MALI_MAX_PENDING_BIG_JOB ++ * ++ */ ++_mali_osk_errcode_t _mali_ukk_pending_submit(_mali_uk_pending_submit_s *args); + -+ *offp += count; -+ return count; -+} ++/** @} */ /* end group _mali_uk_core */ + -+static const struct file_operations group_enabled_fops = { -+ .owner = THIS_MODULE, -+ .open = open_copy_private_data, -+ .read = group_enabled_read, -+ .write = group_enabled_write, -+}; + -+static ssize_t hw_core_base_addr_read(struct file *filp, char __user *buf, size_t count, loff_t *offp) -+{ -+ int r; -+ char buffer[64]; -+ struct mali_hw_core *hw_core; ++/** @addtogroup _mali_uk_memory U/K Memory ++ * ++ * The memory functions provide functionality with and without a Mali-MMU present. ++ * ++ * For Mali-MMU based systems, the following functionality is provided: ++ * - Initialize and terminate MALI virtual address space ++ * - Allocate/deallocate physical memory to a MALI virtual address range and map into/unmap from the ++ * current process address space ++ * - Map/unmap external physical memory into the MALI virtual address range ++ * ++ * For Mali-nonMMU based systems: ++ * - Allocate/deallocate MALI memory ++ * ++ * @{ */ + -+ hw_core = (struct mali_hw_core *)filp->private_data; -+ MALI_DEBUG_ASSERT_POINTER(hw_core); ++/** @brief Map Mali Memory into the current user process ++ * ++ * Maps Mali memory into the current user process in a generic way. ++ * ++ * This function is to be used for Mali-MMU mode. The function is available in both Mali-MMU and Mali-nonMMU modes, ++ * but should not be called by a user process in Mali-nonMMU mode. ++ * ++ * The implementation and operation of _mali_ukk_mem_mmap() is dependant on whether the driver is built for Mali-MMU ++ * or Mali-nonMMU: ++ * - In the nonMMU case, _mali_ukk_mem_mmap() requires a physical address to be specified. For this reason, an OS U/K ++ * implementation should not allow this to be called from user-space. In any case, nonMMU implementations are ++ * inherently insecure, and so the overall impact is minimal. Mali-MMU mode should be used if security is desired. ++ * - In the MMU case, _mali_ukk_mem_mmap() the _mali_uk_mem_mmap_s::phys_addr ++ * member is used for the \em Mali-virtual address desired for the mapping. The ++ * implementation of _mali_ukk_mem_mmap() will allocate both the CPU-virtual ++ * and CPU-physical addresses, and can cope with mapping a contiguous virtual ++ * address range to a sequence of non-contiguous physical pages. In this case, ++ * the CPU-physical addresses are not communicated back to the user-side, as ++ * they are unnecsessary; the \em Mali-virtual address range must be used for ++ * programming Mali structures. ++ * ++ * In the second (MMU) case, _mali_ukk_mem_mmap() handles management of ++ * CPU-virtual and CPU-physical ranges, but the \em caller must manage the ++ * \em Mali-virtual address range from the user-side. ++ * ++ * @note Mali-virtual address ranges are entirely separate between processes. ++ * It is not possible for a process to accidentally corrupt another process' ++ * \em Mali-virtual address space. ++ * ++ * @param args see _mali_uk_mem_mmap_s in "mali_utgard_uk_types.h" ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_mem_mmap(_mali_uk_mem_mmap_s *args); + -+ r = snprintf(buffer, 64, "0x%lX\n", hw_core->phys_addr); ++/** @brief Unmap Mali Memory from the current user process ++ * ++ * Unmaps Mali memory from the current user process in a generic way. This only operates on Mali memory supplied ++ * from _mali_ukk_mem_mmap(). ++ * ++ * @param args see _mali_uk_mem_munmap_s in "mali_utgard_uk_types.h" ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_mem_munmap(_mali_uk_mem_munmap_s *args); + -+ return simple_read_from_buffer(buf, count, offp, buffer, r); -+} -+ -+static const struct file_operations hw_core_base_addr_fops = { -+ .owner = THIS_MODULE, -+ .open = open_copy_private_data, -+ .read = hw_core_base_addr_read, -+}; -+ -+static ssize_t profiling_counter_src_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ u32 is_pp = PRIVATE_DATA_COUNTER_IS_PP((uintptr_t)filp->private_data); -+ u32 src_id = PRIVATE_DATA_COUNTER_GET_SRC((uintptr_t)filp->private_data); -+ mali_bool is_sub_job = PRIVATE_DATA_COUNTER_IS_SUB_JOB((uintptr_t)filp->private_data); -+ u32 sub_job = PRIVATE_DATA_COUNTER_GET_SUB_JOB((uintptr_t)filp->private_data); -+ char buf[64]; -+ int r; -+ u32 val; -+ -+ if (MALI_TRUE == is_pp) { -+ /* PP counter */ -+ if (MALI_TRUE == is_sub_job) { -+ /* Get counter for a particular sub job */ -+ if (0 == src_id) { -+ val = mali_pp_job_get_pp_counter_sub_job_src0(sub_job); -+ } else { -+ val = mali_pp_job_get_pp_counter_sub_job_src1(sub_job); -+ } -+ } else { -+ /* Get default counter for all PP sub jobs */ -+ if (0 == src_id) { -+ val = mali_pp_job_get_pp_counter_global_src0(); -+ } else { -+ val = mali_pp_job_get_pp_counter_global_src1(); -+ } -+ } -+ } else { -+ /* GP counter */ -+ if (0 == src_id) { -+ val = mali_gp_job_get_gp_counter_src0(); -+ } else { -+ val = mali_gp_job_get_gp_counter_src1(); -+ } -+ } -+ -+ if (MALI_HW_CORE_NO_COUNTER == val) { -+ r = snprintf(buf, 64, "-1\n"); -+ } else { -+ r = snprintf(buf, 64, "%u\n", val); -+ } -+ -+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -+} -+ -+static ssize_t profiling_counter_src_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ u32 is_pp = PRIVATE_DATA_COUNTER_IS_PP((uintptr_t)filp->private_data); -+ u32 src_id = PRIVATE_DATA_COUNTER_GET_SRC((uintptr_t)filp->private_data); -+ mali_bool is_sub_job = PRIVATE_DATA_COUNTER_IS_SUB_JOB((uintptr_t)filp->private_data); -+ u32 sub_job = PRIVATE_DATA_COUNTER_GET_SUB_JOB((uintptr_t)filp->private_data); -+ char buf[64]; -+ long val; -+ int ret; -+ -+ if (cnt >= sizeof(buf)) { -+ return -EINVAL; -+ } -+ -+ if (copy_from_user(&buf, ubuf, cnt)) { -+ return -EFAULT; -+ } -+ -+ buf[cnt] = 0; -+ -+ ret = kstrtol(buf, 10, &val); -+ if (ret < 0) { -+ return ret; -+ } -+ -+ if (val < 0) { -+ /* any negative input will disable counter */ -+ val = MALI_HW_CORE_NO_COUNTER; -+ } -+ -+ if (MALI_TRUE == is_pp) { -+ /* PP counter */ -+ if (MALI_TRUE == is_sub_job) { -+ /* Set counter for a particular sub job */ -+ if (0 == src_id) { -+ mali_pp_job_set_pp_counter_sub_job_src0(sub_job, (u32)val); -+ } else { -+ mali_pp_job_set_pp_counter_sub_job_src1(sub_job, (u32)val); -+ } -+ } else { -+ /* Set default counter for all PP sub jobs */ -+ if (0 == src_id) { -+ mali_pp_job_set_pp_counter_global_src0((u32)val); -+ } else { -+ mali_pp_job_set_pp_counter_global_src1((u32)val); -+ } -+ } -+ } else { -+ /* GP counter */ -+ if (0 == src_id) { -+ mali_gp_job_set_gp_counter_src0((u32)val); -+ } else { -+ mali_gp_job_set_gp_counter_src1((u32)val); -+ } -+ } -+ -+ *ppos += cnt; -+ return cnt; -+} -+ -+static const struct file_operations profiling_counter_src_fops = { -+ .owner = THIS_MODULE, -+ .open = open_copy_private_data, -+ .read = profiling_counter_src_read, -+ .write = profiling_counter_src_write, -+}; -+ -+static ssize_t l2_l2x_counter_srcx_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id) -+{ -+ char buf[64]; -+ int r; -+ u32 val; -+ struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data; -+ -+ if (0 == src_id) { -+ val = mali_l2_cache_core_get_counter_src0(l2_core); -+ } else { -+ val = mali_l2_cache_core_get_counter_src1(l2_core); -+ } -+ -+ if (MALI_HW_CORE_NO_COUNTER == val) { -+ r = snprintf(buf, 64, "-1\n"); -+ } else { -+ r = snprintf(buf, 64, "%u\n", val); -+ } -+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -+} -+ -+static ssize_t l2_l2x_counter_srcx_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id) -+{ -+ struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data; -+ char buf[64]; -+ long val; -+ int ret; -+ -+ if (cnt >= sizeof(buf)) { -+ return -EINVAL; -+ } -+ -+ if (copy_from_user(&buf, ubuf, cnt)) { -+ return -EFAULT; -+ } -+ -+ buf[cnt] = 0; -+ -+ ret = kstrtol(buf, 10, &val); -+ if (ret < 0) { -+ return ret; -+ } -+ -+ if (val < 0) { -+ /* any negative input will disable counter */ -+ val = MALI_HW_CORE_NO_COUNTER; -+ } -+ -+ mali_l2_cache_core_set_counter_src(l2_core, src_id, (u32)val); -+ -+ *ppos += cnt; -+ return cnt; -+} ++/** @brief Determine the buffer size necessary for an MMU page table dump. ++ * @param args see _mali_uk_query_mmu_page_table_dump_size_s in mali_utgard_uk_types.h ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_query_mmu_page_table_dump_size(_mali_uk_query_mmu_page_table_dump_size_s *args); ++/** @brief Dump MMU Page tables. ++ * @param args see _mali_uk_dump_mmu_page_table_s in mali_utgard_uk_types.h ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_dump_mmu_page_table(_mali_uk_dump_mmu_page_table_s *args); + -+static ssize_t l2_all_counter_srcx_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id) -+{ -+ char buf[64]; -+ long val; -+ int ret; -+ u32 l2_id; -+ struct mali_l2_cache_core *l2_cache; ++/** @brief Write user data to specified Mali memory without causing segfaults. ++ * @param args see _mali_uk_mem_write_safe_s in mali_utgard_uk_types.h ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_mem_write_safe(_mali_uk_mem_write_safe_s *args); + -+ if (cnt >= sizeof(buf)) { -+ return -EINVAL; -+ } ++/** @} */ /* end group _mali_uk_memory */ + -+ if (copy_from_user(&buf, ubuf, cnt)) { -+ return -EFAULT; -+ } + -+ buf[cnt] = 0; ++/** @addtogroup _mali_uk_pp U/K Fragment Processor ++ * ++ * The Fragment Processor (aka PP (Pixel Processor)) functions provide the following functionality: ++ * - retrieving version of the fragment processors ++ * - determine number of fragment processors ++ * - starting a job on a fragment processor ++ * ++ * @{ */ + -+ ret = kstrtol(buf, 10, &val); -+ if (ret < 0) { -+ return ret; -+ } ++/** @brief Issue a request to start a new job on a Fragment Processor. ++ * ++ * If the request fails args->status is set to _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE and you can ++ * try to start the job again. ++ * ++ * An existing job could be returned for requeueing if the new job has a higher priority than a previously started job ++ * which the hardware hasn't actually started processing yet. In this case the new job will be started instead and the ++ * existing one returned, otherwise the new job is started and the status field args->status is set to ++ * _MALI_UK_START_JOB_STARTED. ++ * ++ * Job completion can be awaited with _mali_ukk_wait_for_notification(). ++ * ++ * @param ctx user-kernel context (mali_session) ++ * @param uargs see _mali_uk_pp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data! ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_pp_start_job(void *ctx, _mali_uk_pp_start_job_s *uargs); + -+ if (val < 0) { -+ /* any negative input will disable counter */ -+ val = MALI_HW_CORE_NO_COUNTER; -+ } ++/** ++ * @brief Issue a request to start new jobs on both Vertex Processor and Fragment Processor. ++ * ++ * @note Will call into @ref _mali_ukk_pp_start_job and @ref _mali_ukk_gp_start_job. ++ * ++ * @param ctx user-kernel context (mali_session) ++ * @param uargs see _mali_uk_pp_and_gp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data! ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_pp_and_gp_start_job(void *ctx, _mali_uk_pp_and_gp_start_job_s *uargs); + -+ l2_id = 0; -+ l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id); -+ while (NULL != l2_cache) { -+ mali_l2_cache_core_set_counter_src(l2_cache, src_id, (u32)val); ++/** @brief Returns the number of Fragment Processors in the system ++ * ++ * @param args see _mali_uk_get_pp_number_of_cores_s in "mali_utgard_uk_types.h" ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_get_pp_number_of_cores(_mali_uk_get_pp_number_of_cores_s *args); + -+ /* try next L2 */ -+ l2_id++; -+ l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id); -+ } ++/** @brief Returns the version that all Fragment Processor cores are compatible with. ++ * ++ * This function may only be called when _mali_ukk_get_pp_number_of_cores() indicated at least one Fragment ++ * Processor core is available. ++ * ++ * @param args see _mali_uk_get_pp_core_version_s in "mali_utgard_uk_types.h" ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_get_pp_core_version(_mali_uk_get_pp_core_version_s *args); + -+ *ppos += cnt; -+ return cnt; -+} ++/** @brief Disable Write-back unit(s) on specified job ++ * ++ * @param args see _mali_uk_get_pp_core_version_s in "mali_utgard_uk_types.h" ++ */ ++void _mali_ukk_pp_job_disable_wb(_mali_uk_pp_disable_wb_s *args); + -+static ssize_t l2_l2x_counter_src0_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ return l2_l2x_counter_srcx_read(filp, ubuf, cnt, ppos, 0); -+} + -+static ssize_t l2_l2x_counter_src1_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ return l2_l2x_counter_srcx_read(filp, ubuf, cnt, ppos, 1); -+} ++/** @} */ /* end group _mali_uk_pp */ + -+static ssize_t l2_l2x_counter_src0_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ return l2_l2x_counter_srcx_write(filp, ubuf, cnt, ppos, 0); -+} + -+static ssize_t l2_l2x_counter_src1_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ return l2_l2x_counter_srcx_write(filp, ubuf, cnt, ppos, 1); -+} ++/** @addtogroup _mali_uk_gp U/K Vertex Processor ++ * ++ * The Vertex Processor (aka GP (Geometry Processor)) functions provide the following functionality: ++ * - retrieving version of the Vertex Processors ++ * - determine number of Vertex Processors available ++ * - starting a job on a Vertex Processor ++ * ++ * @{ */ + -+static ssize_t l2_all_counter_src0_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ return l2_all_counter_srcx_write(filp, ubuf, cnt, ppos, 0); -+} ++/** @brief Issue a request to start a new job on a Vertex Processor. ++ * ++ * If the request fails args->status is set to _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE and you can ++ * try to start the job again. ++ * ++ * An existing job could be returned for requeueing if the new job has a higher priority than a previously started job ++ * which the hardware hasn't actually started processing yet. In this case the new job will be started and the ++ * existing one returned, otherwise the new job is started and the status field args->status is set to ++ * _MALI_UK_START_JOB_STARTED. ++ * ++ * Job completion can be awaited with _mali_ukk_wait_for_notification(). ++ * ++ * @param ctx user-kernel context (mali_session) ++ * @param uargs see _mali_uk_gp_start_job_s in "mali_utgard_uk_types.h". Use _mali_osk_copy_from_user to retrieve data! ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_gp_start_job(void *ctx, _mali_uk_gp_start_job_s *uargs); + -+static ssize_t l2_all_counter_src1_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ return l2_all_counter_srcx_write(filp, ubuf, cnt, ppos, 1); -+} ++/** @brief Returns the number of Vertex Processors in the system. ++ * ++ * @param args see _mali_uk_get_gp_number_of_cores_s in "mali_utgard_uk_types.h" ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_get_gp_number_of_cores(_mali_uk_get_gp_number_of_cores_s *args); + -+static const struct file_operations l2_l2x_counter_src0_fops = { -+ .owner = THIS_MODULE, -+ .open = open_copy_private_data, -+ .read = l2_l2x_counter_src0_read, -+ .write = l2_l2x_counter_src0_write, -+}; ++/** @brief Returns the version that all Vertex Processor cores are compatible with. ++ * ++ * This function may only be called when _mali_uk_get_gp_number_of_cores() indicated at least one Vertex ++ * Processor core is available. ++ * ++ * @param args see _mali_uk_get_gp_core_version_s in "mali_utgard_uk_types.h" ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_get_gp_core_version(_mali_uk_get_gp_core_version_s *args); + -+static const struct file_operations l2_l2x_counter_src1_fops = { -+ .owner = THIS_MODULE, -+ .open = open_copy_private_data, -+ .read = l2_l2x_counter_src1_read, -+ .write = l2_l2x_counter_src1_write, -+}; ++/** @brief Resume or abort suspended Vertex Processor jobs. ++ * ++ * After receiving notification that a Vertex Processor job was suspended from ++ * _mali_ukk_wait_for_notification() you can use this function to resume or abort the job. ++ * ++ * @param args see _mali_uk_gp_suspend_response_s in "mali_utgard_uk_types.h" ++ * @return _MALI_OSK_ERR_OK on success, otherwise a suitable _mali_osk_errcode_t on failure. ++ */ ++_mali_osk_errcode_t _mali_ukk_gp_suspend_response(_mali_uk_gp_suspend_response_s *args); + -+static const struct file_operations l2_all_counter_src0_fops = { -+ .owner = THIS_MODULE, -+ .write = l2_all_counter_src0_write, -+}; ++/** @} */ /* end group _mali_uk_gp */ + -+static const struct file_operations l2_all_counter_src1_fops = { -+ .owner = THIS_MODULE, -+ .write = l2_all_counter_src1_write, -+}; ++#if defined(CONFIG_MALI400_PROFILING) ++/** @addtogroup _mali_uk_profiling U/K Timeline profiling module ++ * @{ */ + -+static ssize_t l2_l2x_counter_valx_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id) -+{ -+ char buf[64]; -+ int r; -+ u32 src0 = 0; -+ u32 val0 = 0; -+ u32 src1 = 0; -+ u32 val1 = 0; -+ u32 val = -1; -+ struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data; ++/** @brief Add event to profiling buffer. ++ * ++ * @param args see _mali_uk_profiling_add_event_s in "mali_utgard_uk_types.h" ++ */ ++_mali_osk_errcode_t _mali_ukk_profiling_add_event(_mali_uk_profiling_add_event_s *args); + -+ mali_l2_cache_core_get_counter_values(l2_core, &src0, &val0, &src1, &val1); ++/** @brief Get profiling stream fd. ++ * ++ * @param args see _mali_uk_profiling_stream_fd_get_s in "mali_utgard_uk_types.h" ++ */ ++_mali_osk_errcode_t _mali_ukk_profiling_stream_fd_get(_mali_uk_profiling_stream_fd_get_s *args); + -+ if (0 == src_id) { -+ if (MALI_HW_CORE_NO_COUNTER != val0) { -+ val = val0; -+ } -+ } else { -+ if (MALI_HW_CORE_NO_COUNTER != val1) { -+ val = val1; -+ } -+ } ++/** @brief Profiling control set. ++ * ++ * @param args see _mali_uk_profiling_control_set_s in "mali_utgard_uk_types.h" ++ */ ++_mali_osk_errcode_t _mali_ukk_profiling_control_set(_mali_uk_profiling_control_set_s *args); + -+ r = snprintf(buf, 64, "%u\n", val); ++/** @} */ /* end group _mali_uk_profiling */ ++#endif + -+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -+} ++/** @addtogroup _mali_uk_vsync U/K VSYNC reporting module ++ * @{ */ + -+static ssize_t l2_l2x_counter_val0_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ return l2_l2x_counter_valx_read(filp, ubuf, cnt, ppos, 0); -+} ++/** @brief Report events related to vsync. ++ * ++ * @note Events should be reported when starting to wait for vsync and when the ++ * waiting is finished. This information can then be used in kernel space to ++ * complement the GPU utilization metric. ++ * ++ * @param args see _mali_uk_vsync_event_report_s in "mali_utgard_uk_types.h" ++ */ ++_mali_osk_errcode_t _mali_ukk_vsync_event_report(_mali_uk_vsync_event_report_s *args); + -+static ssize_t l2_l2x_counter_val1_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ return l2_l2x_counter_valx_read(filp, ubuf, cnt, ppos, 1); -+} ++/** @} */ /* end group _mali_uk_vsync */ + -+static const struct file_operations l2_l2x_counter_val0_fops = { -+ .owner = THIS_MODULE, -+ .open = open_copy_private_data, -+ .read = l2_l2x_counter_val0_read, -+}; ++/** @addtogroup _mali_sw_counters_report U/K Software counter reporting ++ * @{ */ + -+static const struct file_operations l2_l2x_counter_val1_fops = { -+ .owner = THIS_MODULE, -+ .open = open_copy_private_data, -+ .read = l2_l2x_counter_val1_read, -+}; ++/** @brief Report software counters. ++ * ++ * @param args see _mali_uk_sw_counters_report_s in "mali_uk_types.h" ++ */ ++_mali_osk_errcode_t _mali_ukk_sw_counters_report(_mali_uk_sw_counters_report_s *args); + -+static ssize_t power_always_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ unsigned long val; -+ int ret; -+ char buf[32]; ++/** @} */ /* end group _mali_sw_counters_report */ + -+ cnt = min(cnt, sizeof(buf) - 1); -+ if (copy_from_user(buf, ubuf, cnt)) { -+ return -EFAULT; -+ } -+ buf[cnt] = '\0'; ++/** @} */ /* end group u_k_api */ + -+ ret = kstrtoul(buf, 10, &val); -+ if (0 != ret) { -+ return ret; -+ } ++/** @} */ /* end group uddapi */ + -+ /* Update setting (not exactly thread safe) */ -+ if (1 == val && MALI_FALSE == power_always_on_enabled) { -+ power_always_on_enabled = MALI_TRUE; -+ _mali_osk_pm_dev_ref_get_sync(); -+ } else if (0 == val && MALI_TRUE == power_always_on_enabled) { -+ power_always_on_enabled = MALI_FALSE; -+ _mali_osk_pm_dev_ref_put(); -+ } ++u32 _mali_ukk_report_memory_usage(void); + -+ *ppos += cnt; -+ return cnt; -+} ++u32 _mali_ukk_report_total_memory_size(void); + -+static ssize_t power_always_on_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ if (MALI_TRUE == power_always_on_enabled) { -+ return simple_read_from_buffer(ubuf, cnt, ppos, "1\n", 2); -+ } else { -+ return simple_read_from_buffer(ubuf, cnt, ppos, "0\n", 2); -+ } -+} ++u32 _mali_ukk_utilization_gp_pp(void); + -+static const struct file_operations power_always_on_fops = { -+ .owner = THIS_MODULE, -+ .read = power_always_on_read, -+ .write = power_always_on_write, -+}; ++u32 _mali_ukk_utilization_gp(void); + -+static ssize_t power_power_events_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_SUSPEND], strlen(mali_power_events[_MALI_DEVICE_SUSPEND]) - 1)) { -+ mali_pm_os_suspend(MALI_TRUE); -+ } else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_RESUME], strlen(mali_power_events[_MALI_DEVICE_RESUME]) - 1)) { -+ mali_pm_os_resume(); -+ } else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_DVFS_PAUSE], strlen(mali_power_events[_MALI_DEVICE_DVFS_PAUSE]) - 1)) { -+ mali_dev_pause(); -+ } else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_DVFS_RESUME], strlen(mali_power_events[_MALI_DEVICE_DVFS_RESUME]) - 1)) { -+ mali_dev_resume(); -+ } -+ *ppos += cnt; -+ return cnt; -+} ++u32 _mali_ukk_utilization_pp(void); + -+static loff_t power_power_events_seek(struct file *file, loff_t offset, int orig) -+{ -+ file->f_pos = offset; -+ return 0; ++#ifdef __cplusplus +} ++#endif + -+static const struct file_operations power_power_events_fops = { -+ .owner = THIS_MODULE, -+ .write = power_power_events_write, -+ .llseek = power_power_events_seek, -+}; -+ -+#if MALI_STATE_TRACKING -+static int mali_seq_internal_state_show(struct seq_file *seq_file, void *v) -+{ -+ u32 len = 0; -+ u32 size; -+ char *buf; -+ -+ size = seq_get_buf(seq_file, &buf); -+ -+ if (!size) { -+ return -ENOMEM; -+ } -+ -+ /* Create the internal state dump. */ -+ len = snprintf(buf + len, size - len, "Mali device driver %s\n", SVN_REV_STRING); -+ len += snprintf(buf + len, size - len, "License: %s\n\n", MALI_KERNEL_LINUX_LICENSE); -+ -+ len += _mali_kernel_core_dump_state(buf + len, size - len); -+ -+ seq_commit(seq_file, len); -+ -+ return 0; -+} ++#endif /* __MALI_UKK_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_user_settings_db.c b/drivers/gpu/arm/mali400/mali/common/mali_user_settings_db.c +new file mode 100644 +index 000000000..1911eff87 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_user_settings_db.c +@@ -0,0 +1,147 @@ ++/** ++ * Copyright (C) 2012-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+static int mali_seq_internal_state_open(struct inode *inode, struct file *file) -+{ -+ return single_open(file, mali_seq_internal_state_show, NULL); -+} ++#include "mali_kernel_common.h" ++#include "mali_osk.h" ++#include "mali_ukk.h" ++#include "mali_uk_types.h" ++#include "mali_user_settings_db.h" ++#include "mali_session.h" + -+static const struct file_operations mali_seq_internal_state_fops = { -+ .owner = THIS_MODULE, -+ .open = mali_seq_internal_state_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; -+#endif /* MALI_STATE_TRACKING */ ++static u32 mali_user_settings[_MALI_UK_USER_SETTING_MAX]; ++const char *_mali_uk_user_setting_descriptions[] = _MALI_UK_USER_SETTING_DESCRIPTIONS; + -+#if defined(CONFIG_MALI400_INTERNAL_PROFILING) -+static ssize_t profiling_record_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) ++static void mali_user_settings_notify(_mali_uk_user_setting_t setting, u32 value) +{ -+ char buf[64]; -+ int r; ++ mali_bool done = MALI_FALSE; + -+ r = snprintf(buf, 64, "%u\n", _mali_internal_profiling_is_recording() ? 1 : 0); -+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -+} ++ /* ++ * This function gets a bit complicated because we can't hold the session lock while ++ * allocating notification objects. ++ */ + -+static ssize_t profiling_record_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ char buf[64]; -+ unsigned long val; -+ int ret; ++ while (!done) { ++ u32 i; ++ u32 num_sessions_alloc; ++ u32 num_sessions_with_lock; ++ u32 used_notification_objects = 0; ++ _mali_osk_notification_t **notobjs; + -+ if (cnt >= sizeof(buf)) { -+ return -EINVAL; -+ } ++ /* Pre allocate the number of notifications objects we need right now (might change after lock has been taken) */ ++ num_sessions_alloc = mali_session_get_count(); ++ if (0 == num_sessions_alloc) { ++ /* No sessions to report to */ ++ return; ++ } + -+ if (copy_from_user(&buf, ubuf, cnt)) { -+ return -EFAULT; -+ } ++ notobjs = (_mali_osk_notification_t **)_mali_osk_malloc(sizeof(_mali_osk_notification_t *) * num_sessions_alloc); ++ if (NULL == notobjs) { ++ MALI_PRINT_ERROR(("Failed to notify user space session about num PP core change (alloc failure)\n")); ++ return; ++ } + -+ buf[cnt] = 0; ++ for (i = 0; i < num_sessions_alloc; i++) { ++ notobjs[i] = _mali_osk_notification_create(_MALI_NOTIFICATION_SETTINGS_CHANGED, ++ sizeof(_mali_uk_settings_changed_s)); ++ if (NULL != notobjs[i]) { ++ _mali_uk_settings_changed_s *data; ++ data = notobjs[i]->result_buffer; + -+ ret = kstrtoul(buf, 10, &val); -+ if (ret < 0) { -+ return ret; -+ } ++ data->setting = setting; ++ data->value = value; ++ } else { ++ MALI_PRINT_ERROR(("Failed to notify user space session about setting change (alloc failure %u)\n", i)); ++ } ++ } + -+ if (val != 0) { -+ u32 limit = MALI_PROFILING_MAX_BUFFER_ENTRIES; /* This can be made configurable at a later stage if we need to */ ++ mali_session_lock(); + -+ /* check if we are already recording */ -+ if (MALI_TRUE == _mali_internal_profiling_is_recording()) { -+ MALI_DEBUG_PRINT(3, ("Recording of profiling events already in progress\n")); -+ return -EFAULT; -+ } ++ /* number of sessions will not change while we hold the lock */ ++ num_sessions_with_lock = mali_session_get_count(); + -+ /* check if we need to clear out an old recording first */ -+ if (MALI_TRUE == _mali_internal_profiling_have_recording()) { -+ if (_MALI_OSK_ERR_OK != _mali_internal_profiling_clear()) { -+ MALI_DEBUG_PRINT(3, ("Failed to clear existing recording of profiling events\n")); -+ return -EFAULT; ++ if (num_sessions_alloc >= num_sessions_with_lock) { ++ /* We have allocated enough notification objects for all the sessions atm */ ++ struct mali_session_data *session, *tmp; ++ MALI_SESSION_FOREACH(session, tmp, link) { ++ MALI_DEBUG_ASSERT(used_notification_objects < num_sessions_alloc); ++ if (NULL != notobjs[used_notification_objects]) { ++ mali_session_send_notification(session, notobjs[used_notification_objects]); ++ notobjs[used_notification_objects] = NULL; /* Don't track this notification object any more */ ++ } ++ used_notification_objects++; + } ++ done = MALI_TRUE; + } + -+ /* start recording profiling data */ -+ if (_MALI_OSK_ERR_OK != _mali_internal_profiling_start(&limit)) { -+ MALI_DEBUG_PRINT(3, ("Failed to start recording of profiling events\n")); -+ return -EFAULT; -+ } ++ mali_session_unlock(); + -+ MALI_DEBUG_PRINT(3, ("Profiling recording started (max %u events)\n", limit)); -+ } else { -+ /* stop recording profiling data */ -+ u32 count = 0; -+ if (_MALI_OSK_ERR_OK != _mali_internal_profiling_stop(&count)) { -+ MALI_DEBUG_PRINT(2, ("Failed to stop recording of profiling events\n")); -+ return -EFAULT; ++ /* Delete any remaining/unused notification objects */ ++ for (; used_notification_objects < num_sessions_alloc; used_notification_objects++) { ++ if (NULL != notobjs[used_notification_objects]) { ++ _mali_osk_notification_delete(notobjs[used_notification_objects]); ++ } + } + -+ MALI_DEBUG_PRINT(2, ("Profiling recording stopped (recorded %u events)\n", count)); ++ _mali_osk_free(notobjs); + } -+ -+ *ppos += cnt; -+ return cnt; +} + -+static const struct file_operations profiling_record_fops = { -+ .owner = THIS_MODULE, -+ .read = profiling_record_read, -+ .write = profiling_record_write, -+}; -+ -+static void *profiling_events_start(struct seq_file *s, loff_t *pos) ++void mali_set_user_setting(_mali_uk_user_setting_t setting, u32 value) +{ -+ loff_t *spos; ++ mali_bool notify = MALI_FALSE; + -+ /* check if we have data avaiable */ -+ if (MALI_TRUE != _mali_internal_profiling_have_recording()) { -+ return NULL; ++ if (setting >= _MALI_UK_USER_SETTING_MAX) { ++ MALI_DEBUG_PRINT_ERROR(("Invalid user setting %ud\n")); ++ return; + } + -+ spos = kmalloc(sizeof(loff_t), GFP_KERNEL); -+ if (NULL == spos) { -+ return NULL; ++ if (mali_user_settings[setting] != value) { ++ notify = MALI_TRUE; + } + -+ *spos = *pos; -+ return spos; -+} -+ -+static void *profiling_events_next(struct seq_file *s, void *v, loff_t *pos) -+{ -+ loff_t *spos = v; -+ -+ /* check if we have data avaiable */ -+ if (MALI_TRUE != _mali_internal_profiling_have_recording()) { -+ return NULL; -+ } ++ mali_user_settings[setting] = value; + -+ /* check if the next entry actually is avaiable */ -+ if (_mali_internal_profiling_get_count() <= (u32)(*spos + 1)) { -+ return NULL; ++ if (notify) { ++ mali_user_settings_notify(setting, value); + } -+ -+ *pos = ++*spos; -+ return spos; -+} -+ -+static void profiling_events_stop(struct seq_file *s, void *v) -+{ -+ kfree(v); +} + -+static int profiling_events_show(struct seq_file *seq_file, void *v) ++u32 mali_get_user_setting(_mali_uk_user_setting_t setting) +{ -+ loff_t *spos = v; -+ u32 index; -+ u64 timestamp; -+ u32 event_id; -+ u32 data[5]; -+ -+ index = (u32) * spos; -+ -+ /* Retrieve all events */ -+ if (_MALI_OSK_ERR_OK == _mali_internal_profiling_get_event(index, ×tamp, &event_id, data)) { -+ seq_printf(seq_file, "%llu %u %u %u %u %u %u\n", timestamp, event_id, data[0], data[1], data[2], data[3], data[4]); ++ if (setting >= _MALI_UK_USER_SETTING_MAX) { + return 0; + } + -+ return 0; ++ return mali_user_settings[setting]; +} + -+static int profiling_events_show_human_readable(struct seq_file *seq_file, void *v) ++_mali_osk_errcode_t _mali_ukk_get_user_setting(_mali_uk_get_user_setting_s *args) +{ -+#define MALI_EVENT_ID_IS_HW(event_id) (((event_id & 0x00FF0000) >= MALI_PROFILING_EVENT_CHANNEL_GP0) && ((event_id & 0x00FF0000) <= MALI_PROFILING_EVENT_CHANNEL_PP7)) -+ -+ static u64 start_time = 0; -+ loff_t *spos = v; -+ u32 index; -+ u64 timestamp; -+ u32 event_id; -+ u32 data[5]; -+ -+ index = (u32) * spos; -+ -+ /* Retrieve all events */ -+ if (_MALI_OSK_ERR_OK == _mali_internal_profiling_get_event(index, ×tamp, &event_id, data)) { -+ seq_printf(seq_file, "%llu %u %u %u %u %u %u # ", timestamp, event_id, data[0], data[1], data[2], data[3], data[4]); -+ -+ if (0 == index) { -+ start_time = timestamp; -+ } -+ -+ seq_printf(seq_file, "[%06u] ", index); -+ -+ switch (event_id & 0x0F000000) { -+ case MALI_PROFILING_EVENT_TYPE_SINGLE: -+ seq_printf(seq_file, "SINGLE | "); -+ break; -+ case MALI_PROFILING_EVENT_TYPE_START: -+ seq_printf(seq_file, "START | "); -+ break; -+ case MALI_PROFILING_EVENT_TYPE_STOP: -+ seq_printf(seq_file, "STOP | "); -+ break; -+ case MALI_PROFILING_EVENT_TYPE_SUSPEND: -+ seq_printf(seq_file, "SUSPEND | "); -+ break; -+ case MALI_PROFILING_EVENT_TYPE_RESUME: -+ seq_printf(seq_file, "RESUME | "); -+ break; -+ default: -+ seq_printf(seq_file, "0x%01X | ", (event_id & 0x0F000000) >> 24); -+ break; -+ } -+ -+ switch (event_id & 0x00FF0000) { -+ case MALI_PROFILING_EVENT_CHANNEL_SOFTWARE: -+ seq_printf(seq_file, "SW | "); -+ break; -+ case MALI_PROFILING_EVENT_CHANNEL_GP0: -+ seq_printf(seq_file, "GP0 | "); -+ break; -+ case MALI_PROFILING_EVENT_CHANNEL_PP0: -+ seq_printf(seq_file, "PP0 | "); -+ break; -+ case MALI_PROFILING_EVENT_CHANNEL_PP1: -+ seq_printf(seq_file, "PP1 | "); -+ break; -+ case MALI_PROFILING_EVENT_CHANNEL_PP2: -+ seq_printf(seq_file, "PP2 | "); -+ break; -+ case MALI_PROFILING_EVENT_CHANNEL_PP3: -+ seq_printf(seq_file, "PP3 | "); -+ break; -+ case MALI_PROFILING_EVENT_CHANNEL_PP4: -+ seq_printf(seq_file, "PP4 | "); -+ break; -+ case MALI_PROFILING_EVENT_CHANNEL_PP5: -+ seq_printf(seq_file, "PP5 | "); -+ break; -+ case MALI_PROFILING_EVENT_CHANNEL_PP6: -+ seq_printf(seq_file, "PP6 | "); -+ break; -+ case MALI_PROFILING_EVENT_CHANNEL_PP7: -+ seq_printf(seq_file, "PP7 | "); -+ break; -+ case MALI_PROFILING_EVENT_CHANNEL_GPU: -+ seq_printf(seq_file, "GPU | "); -+ break; -+ default: -+ seq_printf(seq_file, "0x%02X | ", (event_id & 0x00FF0000) >> 16); -+ break; -+ } -+ -+ if (MALI_EVENT_ID_IS_HW(event_id)) { -+ if (((event_id & 0x0F000000) == MALI_PROFILING_EVENT_TYPE_START) || ((event_id & 0x0F000000) == MALI_PROFILING_EVENT_TYPE_STOP)) { -+ switch (event_id & 0x0000FFFF) { -+ case MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL: -+ seq_printf(seq_file, "PHYSICAL | "); -+ break; -+ case MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL: -+ seq_printf(seq_file, "VIRTUAL | "); -+ break; -+ default: -+ seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF); -+ break; -+ } -+ } else { -+ seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF); -+ } -+ } else { -+ seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF); -+ } ++ _mali_uk_user_setting_t setting; ++ MALI_DEBUG_ASSERT_POINTER(args); + -+ seq_printf(seq_file, "T0 + 0x%016llX\n", timestamp - start_time); ++ setting = args->setting; + -+ return 0; ++ if (_MALI_UK_USER_SETTING_MAX > setting) { ++ args->value = mali_user_settings[setting]; ++ return _MALI_OSK_ERR_OK; ++ } else { ++ return _MALI_OSK_ERR_INVALID_ARGS; + } -+ -+ return 0; +} + -+static const struct seq_operations profiling_events_seq_ops = { -+ .start = profiling_events_start, -+ .next = profiling_events_next, -+ .stop = profiling_events_stop, -+ .show = profiling_events_show -+}; -+ -+static int profiling_events_open(struct inode *inode, struct file *file) ++_mali_osk_errcode_t _mali_ukk_get_user_settings(_mali_uk_get_user_settings_s *args) +{ -+ return seq_open(file, &profiling_events_seq_ops); -+} -+ -+static const struct file_operations profiling_events_fops = { -+ .owner = THIS_MODULE, -+ .open = profiling_events_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = seq_release, -+}; ++ MALI_DEBUG_ASSERT_POINTER(args); + -+static const struct seq_operations profiling_events_human_readable_seq_ops = { -+ .start = profiling_events_start, -+ .next = profiling_events_next, -+ .stop = profiling_events_stop, -+ .show = profiling_events_show_human_readable -+}; ++ _mali_osk_memcpy(args->settings, mali_user_settings, sizeof(mali_user_settings)); + -+static int profiling_events_human_readable_open(struct inode *inode, struct file *file) -+{ -+ return seq_open(file, &profiling_events_human_readable_seq_ops); ++ return _MALI_OSK_ERR_OK; +} +diff --git a/drivers/gpu/arm/mali400/mali/common/mali_user_settings_db.h b/drivers/gpu/arm/mali400/mali/common/mali_user_settings_db.h +new file mode 100644 +index 000000000..da9c0630e +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/common/mali_user_settings_db.h +@@ -0,0 +1,39 @@ ++/** ++ * Copyright (C) 2012-2013, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+static const struct file_operations profiling_events_human_readable_fops = { -+ .owner = THIS_MODULE, -+ .open = profiling_events_human_readable_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = seq_release, -+}; -+ -+#endif ++#ifndef __MALI_USER_SETTINGS_DB_H__ ++#define __MALI_USER_SETTINGS_DB_H__ + -+static int memory_debugfs_show(struct seq_file *s, void *private_data) -+{ -+#ifdef MALI_MEM_SWAP_TRACKING -+ seq_printf(s, " %-25s %-10s %-10s %-15s %-15s %-10s %-10s %-10s \n"\ -+ "=================================================================================================================================\n", -+ "Name (:bytes)", "pid", "mali_mem", "max_mali_mem", -+ "external_mem", "ump_mem", "dma_mem", "swap_mem"); -+#else -+ seq_printf(s, " %-25s %-10s %-10s %-15s %-15s %-10s %-10s \n"\ -+ "========================================================================================================================\n", -+ "Name (:bytes)", "pid", "mali_mem", "max_mali_mem", -+ "external_mem", "ump_mem", "dma_mem"); ++#ifdef __cplusplus ++extern "C" { +#endif -+ mali_session_memory_tracking(s); -+ return 0; -+} -+ -+static int memory_debugfs_open(struct inode *inode, struct file *file) -+{ -+ return single_open(file, memory_debugfs_show, inode->i_private); -+} -+ -+static const struct file_operations memory_usage_fops = { -+ .owner = THIS_MODULE, -+ .open = memory_debugfs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; + -+static ssize_t utilization_gp_pp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ char buf[64]; -+ size_t r; -+ u32 uval = _mali_ukk_utilization_gp_pp(); ++#include "mali_uk_types.h" + -+ r = snprintf(buf, 64, "%u\n", uval); -+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -+} ++/** @brief Set Mali user setting in DB ++ * ++ * Update the DB with a new value for \a setting. If the value is different from theprevious set value running sessions will be notified of the change. ++ * ++ * @param setting the setting to be changed ++ * @param value the new value to set ++ */ ++void mali_set_user_setting(_mali_uk_user_setting_t setting, u32 value); + -+static ssize_t utilization_gp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ char buf[64]; -+ size_t r; -+ u32 uval = _mali_ukk_utilization_gp(); ++/** @brief Get current Mali user setting value from DB ++ * ++ * @param setting the setting to extract ++ * @return the value of the selected setting ++ */ ++u32 mali_get_user_setting(_mali_uk_user_setting_t setting); + -+ r = snprintf(buf, 64, "%u\n", uval); -+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); ++#ifdef __cplusplus +} ++#endif ++#endif /* __MALI_KERNEL_USER_SETTING__ */ +diff --git a/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard.h b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard.h +new file mode 100644 +index 000000000..7df55c951 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard.h +@@ -0,0 +1,526 @@ ++/* ++ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+static ssize_t utilization_pp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ char buf[64]; -+ size_t r; -+ u32 uval = _mali_ukk_utilization_pp(); ++/** ++ * @file mali_utgard.h ++ * Defines types and interface exposed by the Mali Utgard device driver ++ */ + -+ r = snprintf(buf, 64, "%u\n", uval); -+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -+} ++#ifndef __MALI_UTGARD_H__ ++#define __MALI_UTGARD_H__ + ++#include "mali_osk_types.h" ++#ifdef CONFIG_MALI_DEVFREQ ++#include ++#include "mali_pm_metrics.h" ++#ifdef CONFIG_DEVFREQ_THERMAL ++#include ++#endif ++#endif + -+static const struct file_operations utilization_gp_pp_fops = { -+ .owner = THIS_MODULE, -+ .read = utilization_gp_pp_read, -+}; ++#define MALI_GPU_NAME_UTGARD "mali-utgard" + -+static const struct file_operations utilization_gp_fops = { -+ .owner = THIS_MODULE, -+ .read = utilization_gp_read, -+}; + -+static const struct file_operations utilization_pp_fops = { -+ .owner = THIS_MODULE, -+ .read = utilization_pp_read, -+}; ++#define MALI_OFFSET_GP 0x00000 ++#define MALI_OFFSET_GP_MMU 0x03000 + -+static ssize_t user_settings_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ unsigned long val; -+ int ret; -+ _mali_uk_user_setting_t setting; -+ char buf[32]; ++#define MALI_OFFSET_PP0 0x08000 ++#define MALI_OFFSET_PP0_MMU 0x04000 ++#define MALI_OFFSET_PP1 0x0A000 ++#define MALI_OFFSET_PP1_MMU 0x05000 ++#define MALI_OFFSET_PP2 0x0C000 ++#define MALI_OFFSET_PP2_MMU 0x06000 ++#define MALI_OFFSET_PP3 0x0E000 ++#define MALI_OFFSET_PP3_MMU 0x07000 + -+ cnt = min(cnt, sizeof(buf) - 1); -+ if (copy_from_user(buf, ubuf, cnt)) { -+ return -EFAULT; -+ } -+ buf[cnt] = '\0'; ++#define MALI_OFFSET_PP4 0x28000 ++#define MALI_OFFSET_PP4_MMU 0x1C000 ++#define MALI_OFFSET_PP5 0x2A000 ++#define MALI_OFFSET_PP5_MMU 0x1D000 ++#define MALI_OFFSET_PP6 0x2C000 ++#define MALI_OFFSET_PP6_MMU 0x1E000 ++#define MALI_OFFSET_PP7 0x2E000 ++#define MALI_OFFSET_PP7_MMU 0x1F000 + -+ ret = kstrtoul(buf, 10, &val); -+ if (0 != ret) { -+ return ret; -+ } ++#define MALI_OFFSET_L2_RESOURCE0 0x01000 ++#define MALI_OFFSET_L2_RESOURCE1 0x10000 ++#define MALI_OFFSET_L2_RESOURCE2 0x11000 + -+ /* Update setting */ -+ setting = (_mali_uk_user_setting_t)(filp->private_data); -+ mali_set_user_setting(setting, val); ++#define MALI400_OFFSET_L2_CACHE0 MALI_OFFSET_L2_RESOURCE0 ++#define MALI450_OFFSET_L2_CACHE0 MALI_OFFSET_L2_RESOURCE1 ++#define MALI450_OFFSET_L2_CACHE1 MALI_OFFSET_L2_RESOURCE0 ++#define MALI450_OFFSET_L2_CACHE2 MALI_OFFSET_L2_RESOURCE2 ++#define MALI470_OFFSET_L2_CACHE1 MALI_OFFSET_L2_RESOURCE0 + -+ *ppos += cnt; -+ return cnt; -+} ++#define MALI_OFFSET_BCAST 0x13000 ++#define MALI_OFFSET_DLBU 0x14000 + -+static ssize_t user_settings_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ char buf[64]; -+ size_t r; -+ u32 value; -+ _mali_uk_user_setting_t setting; ++#define MALI_OFFSET_PP_BCAST 0x16000 ++#define MALI_OFFSET_PP_BCAST_MMU 0x15000 + -+ setting = (_mali_uk_user_setting_t)(filp->private_data); -+ value = mali_get_user_setting(setting); ++#define MALI_OFFSET_PMU 0x02000 ++#define MALI_OFFSET_DMA 0x12000 + -+ r = snprintf(buf, 64, "%u\n", value); -+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -+} ++/* Mali-300 */ + -+static const struct file_operations user_settings_fops = { -+ .owner = THIS_MODULE, -+ .open = open_copy_private_data, -+ .read = user_settings_read, -+ .write = user_settings_write, -+}; ++#define MALI_GPU_RESOURCES_MALI300(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) \ ++ MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) + -+static int mali_sysfs_user_settings_register(void) -+{ -+ struct dentry *mali_user_settings_dir = debugfs_create_dir("userspace_settings", mali_debugfs_dir); ++#define MALI_GPU_RESOURCES_MALI300_PMU(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) \ ++ MALI_GPU_RESOURCES_MALI400_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp_irq, pp_mmu_irq) + -+ if (mali_user_settings_dir != NULL) { -+ long i; -+ for (i = 0; i < _MALI_UK_USER_SETTING_MAX; i++) { -+ debugfs_create_file(_mali_uk_user_setting_descriptions[i], -+ 0600, mali_user_settings_dir, (void *)i, -+ &user_settings_fops); -+ } -+ } ++/* Mali-400 */ + -+ return 0; -+} ++#define MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \ ++ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) + -+static ssize_t pp_num_cores_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp) -+{ -+ int ret; -+ char buffer[32]; -+ unsigned long val; ++#define MALI_GPU_RESOURCES_MALI400_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \ ++ MALI_GPU_RESOURCES_MALI400_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq) \ ++ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) + -+ if (count >= sizeof(buffer)) { -+ return -ENOMEM; -+ } ++#define MALI_GPU_RESOURCES_MALI400_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \ ++ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) + -+ if (copy_from_user(&buffer[0], buf, count)) { -+ return -EFAULT; -+ } -+ buffer[count] = '\0'; ++#define MALI_GPU_RESOURCES_MALI400_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \ ++ MALI_GPU_RESOURCES_MALI400_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq) \ ++ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) + -+ ret = kstrtoul(&buffer[0], 10, &val); -+ if (0 != ret) { -+ return -EINVAL; -+ } ++#define MALI_GPU_RESOURCES_MALI400_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \ ++ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) + -+ ret = mali_executor_set_perf_level(val, MALI_TRUE); /* override even if core scaling is disabled */ -+ if (ret) { -+ return ret; -+ } ++#define MALI_GPU_RESOURCES_MALI400_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \ ++ MALI_GPU_RESOURCES_MALI400_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq) \ ++ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) + -+ *offp += count; -+ return count; -+} ++#define MALI_GPU_RESOURCES_MALI400_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI400_OFFSET_L2_CACHE0) \ ++ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) + -+static ssize_t pp_num_cores_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp) -+{ -+ int r; -+ char buffer[64]; ++#define MALI_GPU_RESOURCES_MALI400_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \ ++ MALI_GPU_RESOURCES_MALI400_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq) \ ++ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ + -+ r = snprintf(buffer, 64, "%u\n", mali_executor_get_num_cores_enabled()); ++ /* Mali-450 */ ++#define MALI_GPU_RESOURCES_MALI450_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \ ++ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ ++ MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \ ++ MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \ ++ MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \ ++ MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA) + -+ return simple_read_from_buffer(buf, count, offp, buffer, r); -+} ++#define MALI_GPU_RESOURCES_MALI450_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCES_MALI450_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ + -+static const struct file_operations pp_num_cores_enabled_fops = { -+ .owner = THIS_MODULE, -+ .write = pp_num_cores_enabled_write, -+ .read = pp_num_cores_enabled_read, -+ .llseek = default_llseek, -+}; ++#define MALI_GPU_RESOURCES_MALI450_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \ ++ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \ ++ MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \ ++ MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \ ++ MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) + -+static ssize_t pp_num_cores_total_read(struct file *filp, char __user *buf, size_t count, loff_t *offp) -+{ -+ int r; -+ char buffer[64]; ++#define MALI_GPU_RESOURCES_MALI450_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCES_MALI450_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ + -+ r = snprintf(buffer, 64, "%u\n", mali_executor_get_num_cores_total()); ++#define MALI_GPU_RESOURCES_MALI450_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \ ++ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \ ++ MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \ ++ MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \ ++ MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \ ++ MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA) + -+ return simple_read_from_buffer(buf, count, offp, buffer, r); -+} ++#define MALI_GPU_RESOURCES_MALI450_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCES_MALI450_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ + -+static const struct file_operations pp_num_cores_total_fops = { -+ .owner = THIS_MODULE, -+ .read = pp_num_cores_total_read, -+}; ++#define MALI_GPU_RESOURCES_MALI450_MP6(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \ ++ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE2) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP4, pp3_irq, base_addr + MALI_OFFSET_PP4_MMU, pp3_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(4, base_addr + MALI_OFFSET_PP5, pp4_irq, base_addr + MALI_OFFSET_PP5_MMU, pp4_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(5, base_addr + MALI_OFFSET_PP6, pp5_irq, base_addr + MALI_OFFSET_PP6_MMU, pp5_mmu_irq) \ ++ MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \ ++ MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \ ++ MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \ ++ MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA) + -+static ssize_t pp_core_scaling_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp) -+{ -+ int ret; -+ char buffer[32]; -+ unsigned long val; ++#define MALI_GPU_RESOURCES_MALI450_MP6_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCES_MALI450_MP6(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ + -+ if (count >= sizeof(buffer)) { -+ return -ENOMEM; -+ } ++#define MALI_GPU_RESOURCES_MALI450_MP8(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE0) \ ++ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE1) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI450_OFFSET_L2_CACHE2) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(4, base_addr + MALI_OFFSET_PP4, pp4_irq, base_addr + MALI_OFFSET_PP4_MMU, pp4_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(5, base_addr + MALI_OFFSET_PP5, pp5_irq, base_addr + MALI_OFFSET_PP5_MMU, pp5_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(6, base_addr + MALI_OFFSET_PP6, pp6_irq, base_addr + MALI_OFFSET_PP6_MMU, pp6_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(7, base_addr + MALI_OFFSET_PP7, pp7_irq, base_addr + MALI_OFFSET_PP7_MMU, pp7_mmu_irq) \ ++ MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \ ++ MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \ ++ MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) \ ++ MALI_GPU_RESOURCE_DMA(base_addr + MALI_OFFSET_DMA) + -+ if (copy_from_user(&buffer[0], buf, count)) { -+ return -EFAULT; -+ } -+ buffer[count] = '\0'; ++#define MALI_GPU_RESOURCES_MALI450_MP8_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCES_MALI450_MP8(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp4_irq, pp4_mmu_irq, pp5_irq, pp5_mmu_irq, pp6_irq, pp6_mmu_irq, pp7_irq, pp7_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ + -+ ret = kstrtoul(&buffer[0], 10, &val); -+ if (0 != ret) { -+ return -EINVAL; -+ } ++ /* Mali - 470 */ ++#define MALI_GPU_RESOURCES_MALI470_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \ ++ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ ++ MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \ ++ MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \ ++ MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) + -+ switch (val) { -+ case 1: -+ mali_executor_core_scaling_enable(); -+ break; -+ case 0: -+ mali_executor_core_scaling_disable(); -+ break; -+ default: -+ return -EINVAL; -+ break; -+ } ++#define MALI_GPU_RESOURCES_MALI470_MP1_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCES_MALI470_MP1(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ + -+ *offp += count; -+ return count; -+} ++#define MALI_GPU_RESOURCES_MALI470_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \ ++ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ ++ MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \ ++ MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \ ++ MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) + -+static ssize_t pp_core_scaling_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp) -+{ -+ return simple_read_from_buffer(buf, count, offp, mali_executor_core_scaling_is_enabled() ? "1\n" : "0\n", 2); -+} -+static const struct file_operations pp_core_scaling_enabled_fops = { -+ .owner = THIS_MODULE, -+ .write = pp_core_scaling_enabled_write, -+ .read = pp_core_scaling_enabled_read, -+ .llseek = default_llseek, -+}; ++#define MALI_GPU_RESOURCES_MALI470_MP2_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCES_MALI470_MP2(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ + -+static ssize_t version_read(struct file *filp, char __user *buf, size_t count, loff_t *offp) -+{ -+ int r = 0; -+ char buffer[64]; ++#define MALI_GPU_RESOURCES_MALI470_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \ ++ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \ ++ MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \ ++ MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \ ++ MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) + -+ switch (mali_kernel_core_get_product_id()) { -+ case _MALI_PRODUCT_ID_MALI200: -+ r = snprintf(buffer, 64, "Mali-200\n"); -+ break; -+ case _MALI_PRODUCT_ID_MALI300: -+ r = snprintf(buffer, 64, "Mali-300\n"); -+ break; -+ case _MALI_PRODUCT_ID_MALI400: -+ r = snprintf(buffer, 64, "Mali-400 MP\n"); -+ break; -+ case _MALI_PRODUCT_ID_MALI450: -+ r = snprintf(buffer, 64, "Mali-450 MP\n"); -+ break; -+ case _MALI_PRODUCT_ID_MALI470: -+ r = snprintf(buffer, 64, "Mali-470 MP\n"); -+ break; -+ case _MALI_PRODUCT_ID_UNKNOWN: -+ return -EINVAL; -+ break; -+ }; ++#define MALI_GPU_RESOURCES_MALI470_MP3_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCES_MALI470_MP3(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ + -+ return simple_read_from_buffer(buf, count, offp, buffer, r); -+} ++#define MALI_GPU_RESOURCES_MALI470_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_L2(base_addr + MALI470_OFFSET_L2_CACHE1) \ ++ MALI_GPU_RESOURCE_GP_WITH_MMU(base_addr + MALI_OFFSET_GP, gp_irq, base_addr + MALI_OFFSET_GP_MMU, gp_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(0, base_addr + MALI_OFFSET_PP0, pp0_irq, base_addr + MALI_OFFSET_PP0_MMU, pp0_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(1, base_addr + MALI_OFFSET_PP1, pp1_irq, base_addr + MALI_OFFSET_PP1_MMU, pp1_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(2, base_addr + MALI_OFFSET_PP2, pp2_irq, base_addr + MALI_OFFSET_PP2_MMU, pp2_mmu_irq) \ ++ MALI_GPU_RESOURCE_PP_WITH_MMU(3, base_addr + MALI_OFFSET_PP3, pp3_irq, base_addr + MALI_OFFSET_PP3_MMU, pp3_mmu_irq) \ ++ MALI_GPU_RESOURCE_BCAST(base_addr + MALI_OFFSET_BCAST) \ ++ MALI_GPU_RESOURCE_DLBU(base_addr + MALI_OFFSET_DLBU) \ ++ MALI_GPU_RESOURCE_PP_BCAST(base_addr + MALI_OFFSET_PP_BCAST, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_PP_MMU_BCAST(base_addr + MALI_OFFSET_PP_BCAST_MMU) + -+static const struct file_operations version_fops = { -+ .owner = THIS_MODULE, -+ .read = version_read, -+}; ++#define MALI_GPU_RESOURCES_MALI470_MP4_PMU(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCES_MALI470_MP4(base_addr, gp_irq, gp_mmu_irq, pp0_irq, pp0_mmu_irq, pp1_irq, pp1_mmu_irq, pp2_irq, pp2_mmu_irq, pp3_irq, pp3_mmu_irq, pp_bcast_irq) \ ++ MALI_GPU_RESOURCE_PMU(base_addr + MALI_OFFSET_PMU) \ + -+#if defined(DEBUG) -+static int timeline_debugfs_show(struct seq_file *s, void *private_data) -+{ -+ struct mali_session_data *session, *tmp; -+ u32 session_seq = 1; ++#define MALI_GPU_RESOURCE_L2(addr) \ ++ { \ ++ .name = "Mali_L2", \ ++ .flags = IORESOURCE_MEM, \ ++ .start = addr, \ ++ .end = addr + 0x200, \ ++ }, + -+ seq_printf(s, "timeline system info: \n=================\n\n"); ++#define MALI_GPU_RESOURCE_GP(gp_addr, gp_irq) \ ++ { \ ++ .name = "Mali_GP", \ ++ .flags = IORESOURCE_MEM, \ ++ .start = gp_addr, \ ++ .end = gp_addr + 0x100, \ ++ }, \ ++ { \ ++ .name = "Mali_GP_IRQ", \ ++ .flags = IORESOURCE_IRQ, \ ++ .start = gp_irq, \ ++ .end = gp_irq, \ ++ }, \ + -+ mali_session_lock(); -+ MALI_SESSION_FOREACH(session, tmp, link) { -+ seq_printf(s, "session %d <%p> start:\n", session_seq, session); -+ mali_timeline_debug_print_system(session->timeline_system, s); -+ seq_printf(s, "session %d end\n\n\n", session_seq++); -+ } -+ mali_session_unlock(); ++#define MALI_GPU_RESOURCE_GP_WITH_MMU(gp_addr, gp_irq, gp_mmu_addr, gp_mmu_irq) \ ++ { \ ++ .name = "Mali_GP", \ ++ .flags = IORESOURCE_MEM, \ ++ .start = gp_addr, \ ++ .end = gp_addr + 0x100, \ ++ }, \ ++ { \ ++ .name = "Mali_GP_IRQ", \ ++ .flags = IORESOURCE_IRQ, \ ++ .start = gp_irq, \ ++ .end = gp_irq, \ ++ }, \ ++ { \ ++ .name = "Mali_GP_MMU", \ ++ .flags = IORESOURCE_MEM, \ ++ .start = gp_mmu_addr, \ ++ .end = gp_mmu_addr + 0x100, \ ++ }, \ ++ { \ ++ .name = "Mali_GP_MMU_IRQ", \ ++ .flags = IORESOURCE_IRQ, \ ++ .start = gp_mmu_irq, \ ++ .end = gp_mmu_irq, \ ++ }, + -+ return 0; -+} ++#define MALI_GPU_RESOURCE_PP(pp_addr, pp_irq) \ ++ { \ ++ .name = "Mali_PP", \ ++ .flags = IORESOURCE_MEM, \ ++ .start = pp_addr, \ ++ .end = pp_addr + 0x1100, \ ++ }, \ ++ { \ ++ .name = "Mali_PP_IRQ", \ ++ .flags = IORESOURCE_IRQ, \ ++ .start = pp_irq, \ ++ .end = pp_irq, \ ++ }, \ + -+static int timeline_debugfs_open(struct inode *inode, struct file *file) -+{ -+ return single_open(file, timeline_debugfs_show, inode->i_private); -+} ++#define MALI_GPU_RESOURCE_PP_WITH_MMU(id, pp_addr, pp_irq, pp_mmu_addr, pp_mmu_irq) \ ++ { \ ++ .name = "Mali_PP" #id, \ ++ .flags = IORESOURCE_MEM, \ ++ .start = pp_addr, \ ++ .end = pp_addr + 0x1100, \ ++ }, \ ++ { \ ++ .name = "Mali_PP" #id "_IRQ", \ ++ .flags = IORESOURCE_IRQ, \ ++ .start = pp_irq, \ ++ .end = pp_irq, \ ++ }, \ ++ { \ ++ .name = "Mali_PP" #id "_MMU", \ ++ .flags = IORESOURCE_MEM, \ ++ .start = pp_mmu_addr, \ ++ .end = pp_mmu_addr + 0x100, \ ++ }, \ ++ { \ ++ .name = "Mali_PP" #id "_MMU_IRQ", \ ++ .flags = IORESOURCE_IRQ, \ ++ .start = pp_mmu_irq, \ ++ .end = pp_mmu_irq, \ ++ }, + -+static const struct file_operations timeline_dump_fops = { -+ .owner = THIS_MODULE, -+ .open = timeline_debugfs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release -+}; -+#endif ++#define MALI_GPU_RESOURCE_MMU(mmu_addr, mmu_irq) \ ++ { \ ++ .name = "Mali_MMU", \ ++ .flags = IORESOURCE_MEM, \ ++ .start = mmu_addr, \ ++ .end = mmu_addr + 0x100, \ ++ }, \ ++ { \ ++ .name = "Mali_MMU_IRQ", \ ++ .flags = IORESOURCE_IRQ, \ ++ .start = mmu_irq, \ ++ .end = mmu_irq, \ ++ }, + -+int mali_sysfs_register(const char *mali_dev_name) -+{ -+ mali_debugfs_dir = debugfs_create_dir(mali_dev_name, NULL); -+ if (ERR_PTR(-ENODEV) == mali_debugfs_dir) { -+ /* Debugfs not supported. */ -+ mali_debugfs_dir = NULL; -+ } else { -+ if (NULL != mali_debugfs_dir) { -+ /* Debugfs directory created successfully; create files now */ -+ struct dentry *mali_power_dir; -+ struct dentry *mali_gp_dir; -+ struct dentry *mali_pp_dir; -+ struct dentry *mali_l2_dir; -+ struct dentry *mali_profiling_dir; ++#define MALI_GPU_RESOURCE_PMU(pmu_addr) \ ++ { \ ++ .name = "Mali_PMU", \ ++ .flags = IORESOURCE_MEM, \ ++ .start = pmu_addr, \ ++ .end = pmu_addr + 0x100, \ ++ }, + -+ debugfs_create_file("version", 0400, mali_debugfs_dir, NULL, &version_fops); ++#define MALI_GPU_RESOURCE_DMA(dma_addr) \ ++ { \ ++ .name = "Mali_DMA", \ ++ .flags = IORESOURCE_MEM, \ ++ .start = dma_addr, \ ++ .end = dma_addr + 0x100, \ ++ }, + -+ mali_power_dir = debugfs_create_dir("power", mali_debugfs_dir); -+ if (mali_power_dir != NULL) { -+ debugfs_create_file("always_on", 0600, mali_power_dir, NULL, &power_always_on_fops); -+ debugfs_create_file("power_events", 0200, mali_power_dir, NULL, &power_power_events_fops); -+ } ++#define MALI_GPU_RESOURCE_DLBU(dlbu_addr) \ ++ { \ ++ .name = "Mali_DLBU", \ ++ .flags = IORESOURCE_MEM, \ ++ .start = dlbu_addr, \ ++ .end = dlbu_addr + 0x100, \ ++ }, + -+ mali_gp_dir = debugfs_create_dir("gp", mali_debugfs_dir); -+ if (mali_gp_dir != NULL) { -+ u32 num_groups; -+ long i; ++#define MALI_GPU_RESOURCE_BCAST(bcast_addr) \ ++ { \ ++ .name = "Mali_Broadcast", \ ++ .flags = IORESOURCE_MEM, \ ++ .start = bcast_addr, \ ++ .end = bcast_addr + 0x100, \ ++ }, + -+ num_groups = mali_group_get_glob_num_groups(); -+ for (i = 0; i < num_groups; i++) { -+ struct mali_group *group = mali_group_get_glob_group(i); ++#define MALI_GPU_RESOURCE_PP_BCAST(pp_addr, pp_irq) \ ++ { \ ++ .name = "Mali_PP_Broadcast", \ ++ .flags = IORESOURCE_MEM, \ ++ .start = pp_addr, \ ++ .end = pp_addr + 0x1100, \ ++ }, \ ++ { \ ++ .name = "Mali_PP_Broadcast_IRQ", \ ++ .flags = IORESOURCE_IRQ, \ ++ .start = pp_irq, \ ++ .end = pp_irq, \ ++ }, \ + -+ struct mali_gp_core *gp_core = mali_group_get_gp_core(group); -+ if (NULL != gp_core) { -+ struct dentry *mali_gp_gpx_dir; -+ mali_gp_gpx_dir = debugfs_create_dir("gp0", mali_gp_dir); -+ if (NULL != mali_gp_gpx_dir) { -+ debugfs_create_file("base_addr", 0400, mali_gp_gpx_dir, &gp_core->hw_core, &hw_core_base_addr_fops); -+ debugfs_create_file("enabled", 0600, mali_gp_gpx_dir, group, &group_enabled_fops); -+ } -+ break; /* no need to look for any other GP cores */ -+ } ++#define MALI_GPU_RESOURCE_PP_MMU_BCAST(pp_mmu_bcast_addr) \ ++ { \ ++ .name = "Mali_PP_MMU_Broadcast", \ ++ .flags = IORESOURCE_MEM, \ ++ .start = pp_mmu_bcast_addr, \ ++ .end = pp_mmu_bcast_addr + 0x100, \ ++ }, + -+ } -+ } ++ struct mali_gpu_utilization_data { ++ unsigned int utilization_gpu; /* Utilization for GP and all PP cores combined, 0 = no utilization, 256 = full utilization */ ++ unsigned int utilization_gp; /* Utilization for GP core only, 0 = no utilization, 256 = full utilization */ ++ unsigned int utilization_pp; /* Utilization for all PP cores combined, 0 = no utilization, 256 = full utilization */ ++ }; + -+ mali_pp_dir = debugfs_create_dir("pp", mali_debugfs_dir); -+ if (mali_pp_dir != NULL) { -+ u32 num_groups; -+ long i; ++ struct mali_gpu_clk_item { ++ unsigned int clock; /* unit(MHz) */ ++ unsigned int vol; ++ }; + -+ debugfs_create_file("num_cores_total", 0400, mali_pp_dir, NULL, &pp_num_cores_total_fops); -+ debugfs_create_file("num_cores_enabled", 0600, mali_pp_dir, NULL, &pp_num_cores_enabled_fops); -+ debugfs_create_file("core_scaling_enabled", 0600, mali_pp_dir, NULL, &pp_core_scaling_enabled_fops); ++ struct mali_gpu_clock { ++ struct mali_gpu_clk_item *item; ++ unsigned int num_of_steps; ++ }; + -+ num_groups = mali_group_get_glob_num_groups(); -+ for (i = 0; i < num_groups; i++) { -+ struct mali_group *group = mali_group_get_glob_group(i); ++ struct mali_gpu_device_data { ++ /* Shared GPU memory */ ++ unsigned long shared_mem_size; + -+ struct mali_pp_core *pp_core = mali_group_get_pp_core(group); -+ if (NULL != pp_core) { -+ char buf[16]; -+ struct dentry *mali_pp_ppx_dir; -+ _mali_osk_snprintf(buf, sizeof(buf), "pp%u", mali_pp_core_get_id(pp_core)); -+ mali_pp_ppx_dir = debugfs_create_dir(buf, mali_pp_dir); -+ if (NULL != mali_pp_ppx_dir) { -+ debugfs_create_file("base_addr", 0400, mali_pp_ppx_dir, &pp_core->hw_core, &hw_core_base_addr_fops); -+ if (!mali_group_is_virtual(group)) { -+ debugfs_create_file("enabled", 0600, mali_pp_ppx_dir, group, &group_enabled_fops); -+ } -+ } -+ } -+ } -+ } ++ /* ++ * Mali PMU switch delay. ++ * Only needed if the power gates are connected to the PMU in a high fanout ++ * network. This value is the number of Mali clock cycles it takes to ++ * enable the power gates and turn on the power mesh. ++ * This value will have no effect if a daisy chain implementation is used. ++ */ ++ u32 pmu_switch_delay; + -+ mali_l2_dir = debugfs_create_dir("l2", mali_debugfs_dir); -+ if (mali_l2_dir != NULL) { -+ struct dentry *mali_l2_all_dir; -+ u32 l2_id; -+ struct mali_l2_cache_core *l2_cache; ++ /* Mali Dynamic power domain configuration in sequence from 0-11 ++ * GP PP0 PP1 PP2 PP3 PP4 PP5 PP6 PP7, L2$0 L2$1 L2$2 ++ */ ++ u16 pmu_domain_config[12]; + -+ mali_l2_all_dir = debugfs_create_dir("all", mali_l2_dir); -+ if (mali_l2_all_dir != NULL) { -+ debugfs_create_file("counter_src0", 0200, mali_l2_all_dir, NULL, &l2_all_counter_src0_fops); -+ debugfs_create_file("counter_src1", 0200, mali_l2_all_dir, NULL, &l2_all_counter_src1_fops); -+ } ++ /* Dedicated GPU memory range (physical). */ ++ unsigned long dedicated_mem_start; ++ unsigned long dedicated_mem_size; + -+ l2_id = 0; -+ l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id); -+ while (NULL != l2_cache) { -+ char buf[16]; -+ struct dentry *mali_l2_l2x_dir; -+ _mali_osk_snprintf(buf, sizeof(buf), "l2%u", l2_id); -+ mali_l2_l2x_dir = debugfs_create_dir(buf, mali_l2_dir); -+ if (NULL != mali_l2_l2x_dir) { -+ debugfs_create_file("counter_src0", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_src0_fops); -+ debugfs_create_file("counter_src1", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_src1_fops); -+ debugfs_create_file("counter_val0", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_val0_fops); -+ debugfs_create_file("counter_val1", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_val1_fops); -+ debugfs_create_file("base_addr", 0400, mali_l2_l2x_dir, &l2_cache->hw_core, &hw_core_base_addr_fops); -+ } ++ /* Frame buffer memory to be accessible by Mali GPU (physical) */ ++ unsigned long fb_start; ++ unsigned long fb_size; + -+ /* try next L2 */ -+ l2_id++; -+ l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id); -+ } -+ } ++ /* Max runtime [ms] for jobs */ ++ int max_job_runtime; + -+ debugfs_create_file("gpu_memory", 0444, mali_debugfs_dir, NULL, &memory_usage_fops); ++ /* Report GPU utilization and related control in this interval (specified in ms) */ ++ unsigned long control_interval; + -+ debugfs_create_file("utilization_gp_pp", 0400, mali_debugfs_dir, NULL, &utilization_gp_pp_fops); -+ debugfs_create_file("utilization_gp", 0400, mali_debugfs_dir, NULL, &utilization_gp_fops); -+ debugfs_create_file("utilization_pp", 0400, mali_debugfs_dir, NULL, &utilization_pp_fops); ++ /* Function that will receive periodic GPU utilization numbers */ ++ void (*utilization_callback)(struct mali_gpu_utilization_data *data); + -+ mali_profiling_dir = debugfs_create_dir("profiling", mali_debugfs_dir); -+ if (mali_profiling_dir != NULL) { -+ u32 max_sub_jobs; -+ long i; -+ struct dentry *mali_profiling_gp_dir; -+ struct dentry *mali_profiling_pp_dir; -+#if defined(CONFIG_MALI400_INTERNAL_PROFILING) -+ struct dentry *mali_profiling_proc_dir; ++ /* Fuction that platform callback for freq setting, needed when CONFIG_MALI_DVFS enabled */ ++ int (*set_freq)(int setting_clock_step); ++ /* Function that platfrom report it's clock info which driver can set, needed when CONFIG_MALI_DVFS enabled */ ++ void (*get_clock_info)(struct mali_gpu_clock **data); ++ /* Function that get the current clock info, needed when CONFIG_MALI_DVFS enabled */ ++ int (*get_freq)(void); ++ /* Function that init the mali gpu secure mode */ ++ int (*secure_mode_init)(void); ++ /* Function that deinit the mali gpu secure mode */ ++ void (*secure_mode_deinit)(void); ++ /* Function that reset GPU and enable gpu secure mode */ ++ int (*gpu_reset_and_secure_mode_enable)(void); ++ /* Function that Reset GPU and disable gpu secure mode */ ++ int (*gpu_reset_and_secure_mode_disable)(void); ++ /* ipa related interface customer need register */ ++#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) ++ struct devfreq_cooling_power *gpu_cooling_ops; +#endif -+ /* -+ * Create directory where we can set GP HW counters. -+ */ -+ mali_profiling_gp_dir = debugfs_create_dir("gp", mali_profiling_dir); -+ if (mali_profiling_gp_dir != NULL) { -+ debugfs_create_file("counter_src0", 0600, mali_profiling_gp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_GP(0), &profiling_counter_src_fops); -+ debugfs_create_file("counter_src1", 0600, mali_profiling_gp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_GP(1), &profiling_counter_src_fops); -+ } -+ -+ /* -+ * Create directory where we can set PP HW counters. -+ * Possible override with specific HW counters for a particular sub job -+ * (Disable core scaling before using the override!) -+ */ -+ mali_profiling_pp_dir = debugfs_create_dir("pp", mali_profiling_dir); -+ if (mali_profiling_pp_dir != NULL) { -+ debugfs_create_file("counter_src0", 0600, mali_profiling_pp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_PP(0), &profiling_counter_src_fops); -+ debugfs_create_file("counter_src1", 0600, mali_profiling_pp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_PP(1), &profiling_counter_src_fops); -+ } ++ }; + -+ max_sub_jobs = mali_executor_get_num_cores_total(); -+ for (i = 0; i < max_sub_jobs; i++) { -+ char buf[16]; -+ struct dentry *mali_profiling_pp_x_dir; -+ _mali_osk_snprintf(buf, sizeof(buf), "%u", i); -+ mali_profiling_pp_x_dir = debugfs_create_dir(buf, mali_profiling_pp_dir); -+ if (NULL != mali_profiling_pp_x_dir) { -+ debugfs_create_file("counter_src0", -+ 0600, mali_profiling_pp_x_dir, -+ (void *)PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(0, i), -+ &profiling_counter_src_fops); -+ debugfs_create_file("counter_src1", -+ 0600, mali_profiling_pp_x_dir, -+ (void *)PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(1, i), -+ &profiling_counter_src_fops); -+ } -+ } ++ /** ++ * Pause the scheduling and power state changes of Mali device driver. ++ * mali_dev_resume() must always be called as soon as possible after this function ++ * in order to resume normal operation of the Mali driver. ++ */ ++ void mali_dev_pause(void); + -+#if defined(CONFIG_MALI400_INTERNAL_PROFILING) -+ mali_profiling_proc_dir = debugfs_create_dir("proc", mali_profiling_dir); -+ if (mali_profiling_proc_dir != NULL) { -+ struct dentry *mali_profiling_proc_default_dir = debugfs_create_dir("default", mali_profiling_proc_dir); -+ if (mali_profiling_proc_default_dir != NULL) { -+ debugfs_create_file("enable", 0600, mali_profiling_proc_default_dir, (void *)_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, &user_settings_fops); -+ } -+ } -+ debugfs_create_file("record", 0600, mali_profiling_dir, NULL, &profiling_record_fops); -+ debugfs_create_file("events", 0400, mali_profiling_dir, NULL, &profiling_events_fops); -+ debugfs_create_file("events_human_readable", 0400, mali_profiling_dir, NULL, &profiling_events_human_readable_fops); -+#endif -+ } ++ /** ++ * Resume scheduling and allow power changes in Mali device driver. ++ * This must always be called after mali_dev_pause(). ++ */ ++ void mali_dev_resume(void); + -+#if MALI_STATE_TRACKING -+ debugfs_create_file("state_dump", 0400, mali_debugfs_dir, NULL, &mali_seq_internal_state_fops); -+#endif ++ /** @brief Set the desired number of PP cores to use. ++ * ++ * The internal Mali PMU will be used, if present, to physically power off the PP cores. ++ * ++ * @param num_cores The number of desired cores ++ * @return 0 on success, otherwise error. -EINVAL means an invalid number of cores was specified. ++ */ ++ int mali_perf_set_num_pp_cores(unsigned int num_cores); + -+#if defined(DEBUG) -+ debugfs_create_file("timeline_dump", 0400, mali_debugfs_dir, NULL, &timeline_dump_fops); +#endif -+ if (mali_sysfs_user_settings_register()) { -+ /* Failed to create the debugfs entries for the user settings DB. */ -+ MALI_DEBUG_PRINT(2, ("Failed to create user setting debugfs files. Ignoring...\n")); -+ } -+ } -+ } -+ -+ /* Success! */ -+ return 0; -+} -+ -+int mali_sysfs_unregister(void) -+{ -+ if (NULL != mali_debugfs_dir) { -+ debugfs_remove_recursive(mali_debugfs_dir); -+ } -+ return 0; -+} -+ -+#else /* MALI_LICENSE_IS_GPL */ -+ -+/* Dummy implementations for non-GPL */ -+ -+int mali_sysfs_register(struct mali_dev *device, dev_t dev, const char *mali_dev_name) -+{ -+ return 0; -+} -+ -+int mali_sysfs_unregister(void) -+{ -+ return 0; -+} -+ -+#endif /* MALI_LICENSE_IS_GPL */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_kernel_sysfs.h b/drivers/gpu/arm/mali400/mali/linux/mali_kernel_sysfs.h +diff --git a/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_ioctl.h b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_ioctl.h new file mode 100644 -index 000000000..91580a87c +index 000000000..686708eae --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_kernel_sysfs.h -@@ -0,0 +1,29 @@ ++++ b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_ioctl.h +@@ -0,0 +1,97 @@ +/* -+ * Copyright (C) 2011-2013, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -310875,33 +313622,101 @@ index 000000000..91580a87c + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_KERNEL_SYSFS_H__ -+#define __MALI_KERNEL_SYSFS_H__ ++#ifndef __MALI_UTGARD_IOCTL_H__ ++#define __MALI_UTGARD_IOCTL_H__ ++ ++#include ++#include ++#include /* file system operations */ + +#ifdef __cplusplus +extern "C" { +#endif + -+#include ++/** ++ * @file mali_kernel_ioctl.h ++ * Interface to the Linux device driver. ++ * This file describes the interface needed to use the Linux device driver. ++ * Its interface is designed to used by the HAL implementation through a thin arch layer. ++ */ + -+#define MALI_PROC_DIR "driver/mali" ++/** ++ * ioctl commands ++ */ + -+int mali_sysfs_register(const char *mali_dev_name); -+int mali_sysfs_unregister(void); ++#define MALI_IOC_BASE 0x82 ++#define MALI_IOC_CORE_BASE (_MALI_UK_CORE_SUBSYSTEM + MALI_IOC_BASE) ++#define MALI_IOC_MEMORY_BASE (_MALI_UK_MEMORY_SUBSYSTEM + MALI_IOC_BASE) ++#define MALI_IOC_PP_BASE (_MALI_UK_PP_SUBSYSTEM + MALI_IOC_BASE) ++#define MALI_IOC_GP_BASE (_MALI_UK_GP_SUBSYSTEM + MALI_IOC_BASE) ++#define MALI_IOC_PROFILING_BASE (_MALI_UK_PROFILING_SUBSYSTEM + MALI_IOC_BASE) ++#define MALI_IOC_VSYNC_BASE (_MALI_UK_VSYNC_SUBSYSTEM + MALI_IOC_BASE) ++ ++#define MALI_IOC_WAIT_FOR_NOTIFICATION _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_WAIT_FOR_NOTIFICATION, _mali_uk_wait_for_notification_s) ++#define MALI_IOC_GET_API_VERSION _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_API_VERSION, u32) ++#define MALI_IOC_GET_API_VERSION_V2 _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_API_VERSION, _mali_uk_get_api_version_v2_s) ++/* rk_ext. */ ++#define MALI_IOC_GET_RK_KO_VERSION _IOWR(MALI_IOC_CORE_BASE, _MALI_GET_RK_KO_VERSION, _mali_rk_ko_version_s) ++#define MALI_IOC_POST_NOTIFICATION _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_POST_NOTIFICATION, _mali_uk_post_notification_s) ++#define MALI_IOC_GET_USER_SETTING _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_USER_SETTING, _mali_uk_get_user_setting_s) ++#define MALI_IOC_GET_USER_SETTINGS _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_GET_USER_SETTINGS, _mali_uk_get_user_settings_s) ++#define MALI_IOC_REQUEST_HIGH_PRIORITY _IOW (MALI_IOC_CORE_BASE, _MALI_UK_REQUEST_HIGH_PRIORITY, _mali_uk_request_high_priority_s) ++#define MALI_IOC_TIMELINE_GET_LATEST_POINT _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_GET_LATEST_POINT, _mali_uk_timeline_get_latest_point_s) ++#define MALI_IOC_TIMELINE_WAIT _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_WAIT, _mali_uk_timeline_wait_s) ++#define MALI_IOC_TIMELINE_CREATE_SYNC_FENCE _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_TIMELINE_CREATE_SYNC_FENCE, _mali_uk_timeline_create_sync_fence_s) ++#define MALI_IOC_SOFT_JOB_START _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_SOFT_JOB_START, _mali_uk_soft_job_start_s) ++#define MALI_IOC_SOFT_JOB_SIGNAL _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_SOFT_JOB_SIGNAL, _mali_uk_soft_job_signal_s) ++#define MALI_IOC_PENDING_SUBMIT _IOWR(MALI_IOC_CORE_BASE, _MALI_UK_PENDING_SUBMIT, _mali_uk_pending_submit_s) ++ ++#define MALI_IOC_MEM_ALLOC _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_ALLOC_MEM, _mali_uk_alloc_mem_s) ++#define MALI_IOC_MEM_FREE _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_FREE_MEM, _mali_uk_free_mem_s) ++#define MALI_IOC_MEM_BIND _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_BIND_MEM, _mali_uk_bind_mem_s) ++#define MALI_IOC_MEM_UNBIND _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_UNBIND_MEM, _mali_uk_unbind_mem_s) ++#define MALI_IOC_MEM_COW _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_COW_MEM, _mali_uk_cow_mem_s) ++#define MALI_IOC_MEM_COW_MODIFY_RANGE _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_COW_MODIFY_RANGE, _mali_uk_cow_modify_range_s) ++#define MALI_IOC_MEM_RESIZE _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_RESIZE_MEM, _mali_uk_mem_resize_s) ++#define MALI_IOC_MEM_DMA_BUF_GET_SIZE _IOR(MALI_IOC_MEMORY_BASE, _MALI_UK_DMA_BUF_GET_SIZE, _mali_uk_dma_buf_get_size_s) ++#define MALI_IOC_MEM_QUERY_MMU_PAGE_TABLE_DUMP_SIZE _IOR (MALI_IOC_MEMORY_BASE, _MALI_UK_QUERY_MMU_PAGE_TABLE_DUMP_SIZE, _mali_uk_query_mmu_page_table_dump_size_s) ++#define MALI_IOC_MEM_DUMP_MMU_PAGE_TABLE _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_DUMP_MMU_PAGE_TABLE, _mali_uk_dump_mmu_page_table_s) ++#define MALI_IOC_MEM_WRITE_SAFE _IOWR(MALI_IOC_MEMORY_BASE, _MALI_UK_MEM_WRITE_SAFE, _mali_uk_mem_write_safe_s) ++ ++#define MALI_IOC_PP_START_JOB _IOWR(MALI_IOC_PP_BASE, _MALI_UK_PP_START_JOB, _mali_uk_pp_start_job_s) ++#define MALI_IOC_PP_AND_GP_START_JOB _IOWR(MALI_IOC_PP_BASE, _MALI_UK_PP_AND_GP_START_JOB, _mali_uk_pp_and_gp_start_job_s) ++#define MALI_IOC_PP_NUMBER_OF_CORES_GET _IOR (MALI_IOC_PP_BASE, _MALI_UK_GET_PP_NUMBER_OF_CORES, _mali_uk_get_pp_number_of_cores_s) ++#define MALI_IOC_PP_CORE_VERSION_GET _IOR (MALI_IOC_PP_BASE, _MALI_UK_GET_PP_CORE_VERSION, _mali_uk_get_pp_core_version_s) ++#define MALI_IOC_PP_DISABLE_WB _IOW (MALI_IOC_PP_BASE, _MALI_UK_PP_DISABLE_WB, _mali_uk_pp_disable_wb_s) ++ ++#define MALI_IOC_GP2_START_JOB _IOWR(MALI_IOC_GP_BASE, _MALI_UK_GP_START_JOB, _mali_uk_gp_start_job_s) ++#define MALI_IOC_GP2_NUMBER_OF_CORES_GET _IOR (MALI_IOC_GP_BASE, _MALI_UK_GET_GP_NUMBER_OF_CORES, _mali_uk_get_gp_number_of_cores_s) ++#define MALI_IOC_GP2_CORE_VERSION_GET _IOR (MALI_IOC_GP_BASE, _MALI_UK_GET_GP_CORE_VERSION, _mali_uk_get_gp_core_version_s) ++#define MALI_IOC_GP2_SUSPEND_RESPONSE _IOW (MALI_IOC_GP_BASE, _MALI_UK_GP_SUSPEND_RESPONSE,_mali_uk_gp_suspend_response_s) ++ ++#define MALI_IOC_PROFILING_ADD_EVENT _IOWR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_ADD_EVENT, _mali_uk_profiling_add_event_s) ++#define MALI_IOC_PROFILING_REPORT_SW_COUNTERS _IOW (MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_REPORT_SW_COUNTERS, _mali_uk_sw_counters_report_s) ++#define MALI_IOC_PROFILING_MEMORY_USAGE_GET _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_MEMORY_USAGE_GET, _mali_uk_profiling_memory_usage_get_s) ++#define MALI_IOC_PROFILING_STREAM_FD_GET _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_STREAM_FD_GET, _mali_uk_profiling_stream_fd_get_s) ++#define MALI_IOC_PROILING_CONTROL_SET _IOR(MALI_IOC_PROFILING_BASE, _MALI_UK_PROFILING_CONTROL_SET, _mali_uk_profiling_control_set_s) ++ ++#define MALI_IOC_VSYNC_EVENT_REPORT _IOW (MALI_IOC_VSYNC_BASE, _MALI_UK_VSYNC_EVENT_REPORT, _mali_uk_vsync_event_report_s) ++ ++/* rk_ext : 对 r5p0 集æˆä¹‹åŽ, mali_so ä¸å†ä½¿ç”¨ä¸‹é¢çš„ ioctl, 而使用 MALI_IOC_GET_RK_KO_VERSION. */ ++#if 0 ++#define MALI_IOC_GET_MALI_VERSION_IN_RK30 _IOWR(MALI_IOC_CORE_BASE,_MALI_UK_GET_MALI_VERSION_IN_RK30,_mali_uk_get_mali_version_in_rk30_s *) ++#endif + +#ifdef __cplusplus +} +#endif + -+#endif /* __MALI_KERNEL_LINUX_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_linux_trace.h b/drivers/gpu/arm/mali400/mali/linux/mali_linux_trace.h ++#endif /* __MALI_UTGARD_IOCTL_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_profiling_events.h b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_profiling_events.h new file mode 100644 -index 000000000..222260823 +index 000000000..17d31de93 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_linux_trace.h -@@ -0,0 +1,161 @@ ++++ b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_profiling_events.h +@@ -0,0 +1,190 @@ +/* -+ * Copyright (C) 2012-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -310910,165 +313725,194 @@ index 000000000..222260823 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#if !defined (MALI_LINUX_TRACE_H) || defined (TRACE_HEADER_MULTI_READ) -+#define MALI_LINUX_TRACE_H -+ -+#include -+ -+#include -+#include -+ -+#undef TRACE_SYSTEM -+#define TRACE_SYSTEM mali ++#ifndef _MALI_UTGARD_PROFILING_EVENTS_H_ ++#define _MALI_UTGARD_PROFILING_EVENTS_H_ + -+#define TRACE_INCLUDE_PATH . -+#define TRACE_INCLUDE_FILE mali_linux_trace ++/* ++ * The event ID is a 32 bit value consisting of different fields ++ * reserved, 4 bits, for future use ++ * event type, 4 bits, cinstr_profiling_event_type_t ++ * event channel, 8 bits, the source of the event. ++ * event data, 16 bit field, data depending on event type ++ */ + +/** -+ * Define the tracepoint used to communicate the status of a GPU. Called -+ * when a GPU turns on or turns off. -+ * -+ * @param event_id The type of the event. This parameter is a bitfield -+ * encoding the type of the event. -+ * -+ * @param d0 First data parameter. -+ * @param d1 Second data parameter. -+ * @param d2 Third data parameter. -+ * @param d3 Fourth data parameter. -+ * @param d4 Fifth data parameter. ++ * Specifies what kind of event this is + */ -+TRACE_EVENT(mali_timeline_event, -+ -+ TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1, -+ unsigned int d2, unsigned int d3, unsigned int d4), ++typedef enum { ++ MALI_PROFILING_EVENT_TYPE_SINGLE = 0 << 24, ++ MALI_PROFILING_EVENT_TYPE_START = 1 << 24, ++ MALI_PROFILING_EVENT_TYPE_STOP = 2 << 24, ++ MALI_PROFILING_EVENT_TYPE_SUSPEND = 3 << 24, ++ MALI_PROFILING_EVENT_TYPE_RESUME = 4 << 24, ++} cinstr_profiling_event_type_t; + -+ TP_ARGS(event_id, d0, d1, d2, d3, d4), + -+ TP_STRUCT__entry( -+ __field(unsigned int, event_id) -+ __field(unsigned int, d0) -+ __field(unsigned int, d1) -+ __field(unsigned int, d2) -+ __field(unsigned int, d3) -+ __field(unsigned int, d4) -+ ), ++/** ++ * Secifies the channel/source of the event ++ */ ++typedef enum { ++ MALI_PROFILING_EVENT_CHANNEL_SOFTWARE = 0 << 16, ++ MALI_PROFILING_EVENT_CHANNEL_GP0 = 1 << 16, ++ MALI_PROFILING_EVENT_CHANNEL_PP0 = 5 << 16, ++ MALI_PROFILING_EVENT_CHANNEL_PP1 = 6 << 16, ++ MALI_PROFILING_EVENT_CHANNEL_PP2 = 7 << 16, ++ MALI_PROFILING_EVENT_CHANNEL_PP3 = 8 << 16, ++ MALI_PROFILING_EVENT_CHANNEL_PP4 = 9 << 16, ++ MALI_PROFILING_EVENT_CHANNEL_PP5 = 10 << 16, ++ MALI_PROFILING_EVENT_CHANNEL_PP6 = 11 << 16, ++ MALI_PROFILING_EVENT_CHANNEL_PP7 = 12 << 16, ++ MALI_PROFILING_EVENT_CHANNEL_GPU = 21 << 16, ++} cinstr_profiling_event_channel_t; + -+ TP_fast_assign( -+ __entry->event_id = event_id; -+ __entry->d0 = d0; -+ __entry->d1 = d1; -+ __entry->d2 = d2; -+ __entry->d3 = d3; -+ __entry->d4 = d4; -+ ), + -+ TP_printk("event=%d", __entry->event_id) -+ ); ++#define MALI_PROFILING_MAKE_EVENT_CHANNEL_GP(num) (((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) + (num)) << 16) ++#define MALI_PROFILING_MAKE_EVENT_CHANNEL_PP(num) (((MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) + (num)) << 16) + +/** -+ * Define a tracepoint used to regsiter the value of a hardware counter. -+ * Hardware counters belonging to the vertex or fragment processor are -+ * reported via this tracepoint each frame, whilst L2 cache hardware -+ * counters are reported continuously. -+ * -+ * @param counter_id The counter ID. -+ * @param value The value of the counter. ++ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from software channel + */ -+TRACE_EVENT(mali_hw_counter, -+ -+ TP_PROTO(unsigned int counter_id, unsigned int value), -+ -+ TP_ARGS(counter_id, value), -+ -+ TP_STRUCT__entry( -+ __field(unsigned int, counter_id) -+ __field(unsigned int, value) -+ ), -+ -+ TP_fast_assign( -+ __entry->counter_id = counter_id; -+ ), -+ -+ TP_printk("event %d = %d", __entry->counter_id, __entry->value) -+ ); ++typedef enum { ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_NONE = 0, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_NEW_FRAME = 1, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_FLUSH = 2, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_SWAP_BUFFERS = 3, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_FB_EVENT = 4, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_GP_ENQUEUE = 5, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_PP_ENQUEUE = 6, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_READBACK = 7, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_WRITEBACK = 8, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_ENTER_API_FUNC = 10, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_LEAVE_API_FUNC = 11, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_DISCARD_ATTACHMENTS = 13, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_TRY_LOCK = 53, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_LOCK = 54, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_UMP_UNLOCK = 55, ++ MALI_PROFILING_EVENT_REASON_SINGLE_LOCK_CONTENDED = 56, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_MALI_FENCE_DUP = 57, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_SET_PP_JOB_FENCE = 58, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_WAIT_SYNC = 59, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_CREATE_FENCE_SYNC = 60, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_CREATE_NATIVE_FENCE_SYNC = 61, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_FENCE_FLUSH = 62, ++ MALI_PROFILING_EVENT_REASON_SINGLE_SW_EGL_FLUSH_SERVER_WAITS = 63, ++} cinstr_profiling_event_reason_single_sw_t; + +/** -+ * Define a tracepoint used to send a bundle of software counters. -+ * -+ * @param counters The bundle of counters. ++ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_START/STOP is used from software channel ++ * to inform whether the core is physical or virtual + */ -+TRACE_EVENT(mali_sw_counters, -+ -+ TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters), -+ -+ TP_ARGS(pid, tid, surface_id, counters), -+ -+ TP_STRUCT__entry( -+ __field(pid_t, pid) -+ __field(pid_t, tid) -+ __field(void *, surface_id) -+ __field(unsigned int *, counters) -+ ), -+ -+ TP_fast_assign( -+ __entry->pid = pid; -+ __entry->tid = tid; -+ __entry->surface_id = surface_id; -+ __entry->counters = counters; -+ ), -+ -+ TP_printk("counters were %s", __entry->counters == NULL ? "NULL" : "not NULL") -+ ); ++typedef enum { ++ MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL = 0, ++ MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL = 1, ++} cinstr_profiling_event_reason_start_stop_hw_t; + +/** -+ * Define a tracepoint used to gather core activity for systrace -+ * @param pid The process id for which the core activity originates from -+ * @param active If the core is active (1) or not (0) -+ * @param core_type The type of core active, either GP (1) or PP (0) -+ * @param core_id The core id that is active for the core_type -+ * @param frame_builder_id The frame builder id associated with this core activity -+ * @param flush_id The flush id associated with this core activity ++ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_START/STOP is used from software channel + */ -+TRACE_EVENT(mali_core_active, ++typedef enum { ++ /*MALI_PROFILING_EVENT_REASON_START_STOP_SW_NONE = 0,*/ ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_MALI = 1, ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_CALLBACK_THREAD = 2, ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_WORKER_THREAD = 3, ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_BOTTOM_HALF = 4, ++ MALI_PROFILING_EVENT_REASON_START_STOP_SW_UPPER_HALF = 5, ++} cinstr_profiling_event_reason_start_stop_sw_t; + -+ TP_PROTO(pid_t pid, unsigned int active, unsigned int core_type, unsigned int core_id, unsigned int frame_builder_id, unsigned int flush_id), ++/** ++ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SUSPEND/RESUME is used from software channel ++ */ ++typedef enum { ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_NONE = 0, /* used */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_PIPELINE_FULL = 1, /* NOT used */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VSYNC = 26, /* used in some build configurations */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_IFRAME_WAIT = 27, /* USED */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_IFRAME_SYNC = 28, /* USED */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VG_WAIT_FILTER_CLEANUP = 29, /* used */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_VG_WAIT_TEXTURE = 30, /* used */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GLES_WAIT_MIPLEVEL = 31, /* used */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GLES_WAIT_READPIXELS = 32, /* used */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_WAIT_SWAP_IMMEDIATE = 33, /* NOT used */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_ICS_QUEUE_BUFFER = 34, /* USED */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_ICS_DEQUEUE_BUFFER = 35, /* USED */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_UMP_LOCK = 36, /* Not currently used */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_X11_GLOBAL_LOCK = 37, /* Not currently used */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_X11_SWAP = 38, /* Not currently used */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_MALI_EGL_IMAGE_SYNC_WAIT = 39, /* USED */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_GP_JOB_HANDLING = 40, /* USED */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_PP_JOB_HANDLING = 41, /* USED */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_MALI_FENCE_MERGE = 42, /* USED */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_MALI_FENCE_DUP = 43, ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_FLUSH_SERVER_WAITS = 44, ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_EGL_WAIT_SYNC = 45, /* USED */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_JOBS_WAIT = 46, /* USED */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_NOFRAMES_WAIT = 47, /* USED */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_FB_NOJOBS_WAIT = 48, /* USED */ ++ MALI_PROFILING_EVENT_REASON_SUSPEND_RESUME_SW_SUBMIT_LIMITER_WAIT = 49, /* USED */ ++} cinstr_profiling_event_reason_suspend_resume_sw_t; + -+ TP_ARGS(pid, active, core_type, core_id, frame_builder_id, flush_id), ++/** ++ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from a HW channel (GPx+PPx) ++ */ ++typedef enum { ++ MALI_PROFILING_EVENT_REASON_SINGLE_HW_NONE = 0, ++ MALI_PROFILING_EVENT_REASON_SINGLE_HW_INTERRUPT = 1, ++ MALI_PROFILING_EVENT_REASON_SINGLE_HW_FLUSH = 2, ++} cinstr_profiling_event_reason_single_hw_t; + -+ TP_STRUCT__entry( -+ __field(pid_t, pid) -+ __field(unsigned int, active) -+ __field(unsigned int, core_type) -+ __field(unsigned int, core_id) -+ __field(unsigned int, frame_builder_id) -+ __field(unsigned int, flush_id) -+ ), ++/** ++ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from the GPU channel ++ */ ++typedef enum { ++ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_NONE = 0, ++ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE = 1, ++ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L20_COUNTERS = 2, ++ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L21_COUNTERS = 3, ++ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_L22_COUNTERS = 4, ++} cinstr_profiling_event_reason_single_gpu_t; + -+ TP_fast_assign( -+ __entry->pid = pid; -+ __entry->active = active; -+ __entry->core_type = core_type; -+ __entry->core_id = core_id; -+ __entry->frame_builder_id = frame_builder_id; -+ __entry->flush_id = flush_id; -+ ), ++/** ++ * These values are applicable for the 3rd data parameter when ++ * the type MALI_PROFILING_EVENT_TYPE_START is used from the software channel ++ * with the MALI_PROFILING_EVENT_REASON_START_STOP_BOTTOM_HALF reason. ++ */ ++typedef enum { ++ MALI_PROFILING_EVENT_DATA_CORE_GP0 = 1, ++ MALI_PROFILING_EVENT_DATA_CORE_PP0 = 5, ++ MALI_PROFILING_EVENT_DATA_CORE_PP1 = 6, ++ MALI_PROFILING_EVENT_DATA_CORE_PP2 = 7, ++ MALI_PROFILING_EVENT_DATA_CORE_PP3 = 8, ++ MALI_PROFILING_EVENT_DATA_CORE_PP4 = 9, ++ MALI_PROFILING_EVENT_DATA_CORE_PP5 = 10, ++ MALI_PROFILING_EVENT_DATA_CORE_PP6 = 11, ++ MALI_PROFILING_EVENT_DATA_CORE_PP7 = 12, ++ MALI_PROFILING_EVENT_DATA_CORE_GP0_MMU = 22, /* GP0 + 21 */ ++ MALI_PROFILING_EVENT_DATA_CORE_PP0_MMU = 26, /* PP0 + 21 */ ++ MALI_PROFILING_EVENT_DATA_CORE_PP1_MMU = 27, /* PP1 + 21 */ ++ MALI_PROFILING_EVENT_DATA_CORE_PP2_MMU = 28, /* PP2 + 21 */ ++ MALI_PROFILING_EVENT_DATA_CORE_PP3_MMU = 29, /* PP3 + 21 */ ++ MALI_PROFILING_EVENT_DATA_CORE_PP4_MMU = 30, /* PP4 + 21 */ ++ MALI_PROFILING_EVENT_DATA_CORE_PP5_MMU = 31, /* PP5 + 21 */ ++ MALI_PROFILING_EVENT_DATA_CORE_PP6_MMU = 32, /* PP6 + 21 */ ++ MALI_PROFILING_EVENT_DATA_CORE_PP7_MMU = 33, /* PP7 + 21 */ + -+ TP_printk("%s|%d|%s%i:%x|%d", __entry->active ? "S" : "F", __entry->pid, __entry->core_type ? "GP" : "PP", __entry->core_id, __entry->flush_id, __entry->frame_builder_id) -+ ); ++} cinstr_profiling_event_data_core_t; + -+#endif /* MALI_LINUX_TRACE_H */ ++#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP(num) (MALI_PROFILING_EVENT_DATA_CORE_GP0 + (num)) ++#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_GP_MMU(num) (MALI_PROFILING_EVENT_DATA_CORE_GP0_MMU + (num)) ++#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP(num) (MALI_PROFILING_EVENT_DATA_CORE_PP0 + (num)) ++#define MALI_PROFILING_MAKE_EVENT_DATA_CORE_PP_MMU(num) (MALI_PROFILING_EVENT_DATA_CORE_PP0_MMU + (num)) + -+/* This part must exist outside the header guard. */ -+#include + -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory.c ++#endif /*_MALI_UTGARD_PROFILING_EVENTS_H_*/ +diff --git a/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_profiling_gator_api.h b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_profiling_gator_api.h new file mode 100644 -index 000000000..cf3851490 +index 000000000..c1927d145 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory.c -@@ -0,0 +1,528 @@ ++++ b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_profiling_gator_api.h +@@ -0,0 +1,305 @@ +/* -+ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2013, 2015-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -311077,1049 +313921,1423 @@ index 000000000..cf3851490 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "mali_osk.h" -+#include "mali_executor.h" ++#ifndef __MALI_UTGARD_PROFILING_GATOR_API_H__ ++#define __MALI_UTGARD_PROFILING_GATOR_API_H__ + -+#include "mali_memory.h" -+#include "mali_memory_os_alloc.h" -+#include "mali_memory_block_alloc.h" -+#include "mali_memory_util.h" -+#include "mali_memory_virtual.h" -+#include "mali_memory_manager.h" -+#include "mali_memory_cow.h" -+#include "mali_memory_swap_alloc.h" -+#include "mali_memory_defer_bind.h" -+#if defined(CONFIG_DMA_SHARED_BUFFER) -+#include "mali_memory_secure.h" ++#ifdef __cplusplus ++extern "C" { +#endif + -+extern unsigned int mali_dedicated_mem_size; -+extern unsigned int mali_shared_mem_size; ++#define MALI_PROFILING_API_VERSION 4 + -+#define MALI_VM_NUM_FAULT_PREFETCH (0x8) ++#define MAX_NUM_L2_CACHE_CORES 3 ++#define MAX_NUM_FP_CORES 8 ++#define MAX_NUM_VP_CORES 1 + -+static void mali_mem_vma_open(struct vm_area_struct *vma) -+{ -+ mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data; -+ MALI_DEBUG_PRINT(4, ("Open called on vma %p\n", vma)); ++#define _MALI_SPCIAL_COUNTER_DESCRIPTIONS \ ++ { \ ++ "Filmstrip_cnt0", \ ++ "Frequency", \ ++ "Voltage", \ ++ "vertex", \ ++ "fragment", \ ++ "Total_alloc_pages", \ ++ }; + -+ /* If need to share the allocation, add ref_count here */ -+ mali_allocation_ref(alloc); -+ return; -+} -+static void mali_mem_vma_close(struct vm_area_struct *vma) -+{ -+ /* If need to share the allocation, unref ref_count here */ -+ mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data; ++#define _MALI_MEM_COUTNER_DESCRIPTIONS \ ++ { \ ++ "untyped_memory", \ ++ "vertex_index_buffer", \ ++ "texture_buffer", \ ++ "varying_buffer", \ ++ "render_target", \ ++ "pbuffer_buffer", \ ++ "plbu_heap", \ ++ "pointer_array_buffer", \ ++ "slave_tilelist", \ ++ "untyped_gp_cmdlist", \ ++ "polygon_cmdlist", \ ++ "texture_descriptor", \ ++ "render_state_word", \ ++ "shader", \ ++ "stream_buffer", \ ++ "fragment_stack", \ ++ "uniform", \ ++ "untyped_frame_pool", \ ++ "untyped_surface", \ ++ }; + -+ mali_allocation_unref(&alloc); -+ vma->vm_private_data = NULL; -+} ++/** The list of events supported by the Mali DDK. */ ++typedef enum { ++ /* Vertex processor activity */ ++ ACTIVITY_VP_0 = 0, + -+static vm_fault_t mali_mem_vma_fault(struct vm_fault *vmf) -+{ -+ struct vm_area_struct *vma = vmf->vma; -+ mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data; -+ mali_mem_backend *mem_bkend = NULL; -+ int ret; -+ int prefetch_num = MALI_VM_NUM_FAULT_PREFETCH; ++ /* Fragment processor activity */ ++ ACTIVITY_FP_0, ++ ACTIVITY_FP_1, ++ ACTIVITY_FP_2, ++ ACTIVITY_FP_3, ++ ACTIVITY_FP_4, ++ ACTIVITY_FP_5, ++ ACTIVITY_FP_6, ++ ACTIVITY_FP_7, + -+ unsigned long address = (unsigned long)vmf->address; -+ MALI_DEBUG_ASSERT(alloc->backend_handle); -+ MALI_DEBUG_ASSERT((unsigned long)alloc->cpu_mapping.addr <= address); ++ /* L2 cache counters */ ++ COUNTER_L2_0_C0, ++ COUNTER_L2_0_C1, ++ COUNTER_L2_1_C0, ++ COUNTER_L2_1_C1, ++ COUNTER_L2_2_C0, ++ COUNTER_L2_2_C1, + -+ /* Get backend memory & Map on CPU */ -+ mutex_lock(&mali_idr_mutex); -+ if (!(mem_bkend = idr_find(&mali_backend_idr, alloc->backend_handle))) { -+ MALI_DEBUG_PRINT(1, ("Can't find memory backend in mmap!\n")); -+ mutex_unlock(&mali_idr_mutex); -+ return VM_FAULT_SIGBUS; -+ } -+ mutex_unlock(&mali_idr_mutex); -+ MALI_DEBUG_ASSERT(mem_bkend->type == alloc->type); ++ /* Vertex processor counters */ ++ COUNTER_VP_0_C0, ++ COUNTER_VP_0_C1, + -+ if ((mem_bkend->type == MALI_MEM_COW && (MALI_MEM_BACKEND_FLAG_SWAP_COWED != -+ (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) && -+ (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE)) { -+ /*check if use page fault to do COW*/ -+ MALI_DEBUG_PRINT(4, ("mali_vma_fault: do cow allocate on demand!, address=0x%x\n", address)); -+ mutex_lock(&mem_bkend->mutex); -+ ret = mali_mem_cow_allocate_on_demand(mem_bkend, -+ (address - vma->vm_start) / PAGE_SIZE); -+ mutex_unlock(&mem_bkend->mutex); ++ /* Fragment processor counters */ ++ COUNTER_FP_0_C0, ++ COUNTER_FP_0_C1, ++ COUNTER_FP_1_C0, ++ COUNTER_FP_1_C1, ++ COUNTER_FP_2_C0, ++ COUNTER_FP_2_C1, ++ COUNTER_FP_3_C0, ++ COUNTER_FP_3_C1, ++ COUNTER_FP_4_C0, ++ COUNTER_FP_4_C1, ++ COUNTER_FP_5_C0, ++ COUNTER_FP_5_C1, ++ COUNTER_FP_6_C0, ++ COUNTER_FP_6_C1, ++ COUNTER_FP_7_C0, ++ COUNTER_FP_7_C1, + -+ if (ret != _MALI_OSK_ERR_OK) { -+ return VM_FAULT_OOM; -+ } -+ prefetch_num = 1; ++ /* ++ * If more hardware counters are added, the _mali_osk_hw_counter_table ++ * below should also be updated. ++ */ + -+ /* handle COW modified range cpu mapping -+ we zap the mapping in cow_modify_range, it will trigger page fault -+ when CPU access it, so here we map it to CPU*/ -+ mutex_lock(&mem_bkend->mutex); -+ ret = mali_mem_cow_cpu_map_pages_locked(mem_bkend, vma, address, prefetch_num); -+ mutex_unlock(&mem_bkend->mutex); ++ /* EGL software counters */ ++ COUNTER_EGL_BLIT_TIME, + -+ if (unlikely(ret != _MALI_OSK_ERR_OK)) { -+ return VM_FAULT_SIGBUS; -+ } -+ } else if ((mem_bkend->type == MALI_MEM_SWAP) || -+ (mem_bkend->type == MALI_MEM_COW && (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) { -+ u32 offset_in_bkend = (address - vma->vm_start) / PAGE_SIZE; -+ int ret = _MALI_OSK_ERR_OK; ++ /* GLES software counters */ ++ COUNTER_GLES_DRAW_ELEMENTS_CALLS, ++ COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES, ++ COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED, ++ COUNTER_GLES_DRAW_ARRAYS_CALLS, ++ COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED, ++ COUNTER_GLES_DRAW_POINTS, ++ COUNTER_GLES_DRAW_LINES, ++ COUNTER_GLES_DRAW_LINE_LOOP, ++ COUNTER_GLES_DRAW_LINE_STRIP, ++ COUNTER_GLES_DRAW_TRIANGLES, ++ COUNTER_GLES_DRAW_TRIANGLE_STRIP, ++ COUNTER_GLES_DRAW_TRIANGLE_FAN, ++ COUNTER_GLES_NON_VBO_DATA_COPY_TIME, ++ COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI, ++ COUNTER_GLES_UPLOAD_TEXTURE_TIME, ++ COUNTER_GLES_UPLOAD_VBO_TIME, ++ COUNTER_GLES_NUM_FLUSHES, ++ COUNTER_GLES_NUM_VSHADERS_GENERATED, ++ COUNTER_GLES_NUM_FSHADERS_GENERATED, ++ COUNTER_GLES_VSHADER_GEN_TIME, ++ COUNTER_GLES_FSHADER_GEN_TIME, ++ COUNTER_GLES_INPUT_TRIANGLES, ++ COUNTER_GLES_VXCACHE_HIT, ++ COUNTER_GLES_VXCACHE_MISS, ++ COUNTER_GLES_VXCACHE_COLLISION, ++ COUNTER_GLES_CULLED_TRIANGLES, ++ COUNTER_GLES_CULLED_LINES, ++ COUNTER_GLES_BACKFACE_TRIANGLES, ++ COUNTER_GLES_GBCLIP_TRIANGLES, ++ COUNTER_GLES_GBCLIP_LINES, ++ COUNTER_GLES_TRIANGLES_DRAWN, ++ COUNTER_GLES_DRAWCALL_TIME, ++ COUNTER_GLES_TRIANGLES_COUNT, ++ COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT, ++ COUNTER_GLES_STRIP_TRIANGLES_COUNT, ++ COUNTER_GLES_FAN_TRIANGLES_COUNT, ++ COUNTER_GLES_LINES_COUNT, ++ COUNTER_GLES_INDEPENDENT_LINES_COUNT, ++ COUNTER_GLES_STRIP_LINES_COUNT, ++ COUNTER_GLES_LOOP_LINES_COUNT, + -+ mutex_lock(&mem_bkend->mutex); -+ if (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE) { -+ ret = mali_mem_swap_cow_page_on_demand(mem_bkend, offset_in_bkend, &vmf->page); -+ } else { -+ ret = mali_mem_swap_allocate_page_on_demand(mem_bkend, offset_in_bkend, &vmf->page); -+ } -+ mutex_unlock(&mem_bkend->mutex); ++ /* Special counter */ + -+ if (ret != _MALI_OSK_ERR_OK) { -+ MALI_DEBUG_PRINT(2, ("Mali swap memory page fault process failed, address=0x%x\n", address)); -+ return VM_FAULT_OOM; -+ } else { -+ return VM_FAULT_LOCKED; -+ } -+ } else { -+ MALI_PRINT_ERROR(("Mali vma fault! It never happen, indicating some logic errors in caller.\n")); -+ /*NOT support yet or OOM*/ -+ return VM_FAULT_OOM; -+ } -+ return VM_FAULT_NOPAGE; -+} ++ /* Framebuffer capture pseudo-counter */ ++ COUNTER_FILMSTRIP, ++ COUNTER_FREQUENCY, ++ COUNTER_VOLTAGE, ++ COUNTER_VP_ACTIVITY, ++ COUNTER_FP_ACTIVITY, ++ COUNTER_TOTAL_ALLOC_PAGES, + -+static struct vm_operations_struct mali_kernel_vm_ops = { -+ .open = mali_mem_vma_open, -+ .close = mali_mem_vma_close, -+ .fault = mali_mem_vma_fault, -+}; ++ /* Memory usage counter */ ++ COUNTER_MEM_UNTYPED, ++ COUNTER_MEM_VB_IB, ++ COUNTER_MEM_TEXTURE, ++ COUNTER_MEM_VARYING, ++ COUNTER_MEM_RT, ++ COUNTER_MEM_PBUFFER, ++ /* memory usages for gp command */ ++ COUNTER_MEM_PLBU_HEAP, ++ COUNTER_MEM_POINTER_ARRAY, ++ COUNTER_MEM_SLAVE_TILELIST, ++ COUNTER_MEM_UNTYPE_GP_CMDLIST, ++ /* memory usages for polygon list command */ ++ COUNTER_MEM_POLYGON_CMDLIST, ++ /* memory usages for pp command */ ++ COUNTER_MEM_TD, ++ COUNTER_MEM_RSW, ++ /* other memory usages */ ++ COUNTER_MEM_SHADER, ++ COUNTER_MEM_STREAMS, ++ COUNTER_MEM_FRAGMENT_STACK, ++ COUNTER_MEM_UNIFORM, ++ /* Special mem usage, which is used for mem pool allocation */ ++ COUNTER_MEM_UNTYPE_MEM_POOL, ++ COUNTER_MEM_UNTYPE_SURFACE, + ++ NUMBER_OF_EVENTS ++} _mali_osk_counter_id; + -+/** @ map mali allocation to CPU address -+* -+* Supported backend types: -+* --MALI_MEM_OS -+* -- need to add COW? -+ *Not supported backend types: -+* -_MALI_MEMORY_BIND_BACKEND_UMP -+* -_MALI_MEMORY_BIND_BACKEND_DMA_BUF -+* -_MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY -+* -+*/ -+int mali_mmap(struct file *filp, struct vm_area_struct *vma) -+{ -+ struct mali_session_data *session; -+ mali_mem_allocation *mali_alloc = NULL; -+ u32 mali_addr = vma->vm_pgoff << PAGE_SHIFT; -+ struct mali_vma_node *mali_vma_node = NULL; -+ mali_mem_backend *mem_bkend = NULL; -+ int ret = -EFAULT; ++#define FIRST_ACTIVITY_EVENT ACTIVITY_VP_0 ++#define LAST_ACTIVITY_EVENT ACTIVITY_FP_7 + -+ session = (struct mali_session_data *)filp->private_data; -+ if (NULL == session) { -+ MALI_PRINT_ERROR(("mmap called without any session data available\n")); -+ return -EFAULT; -+ } ++#define FIRST_HW_COUNTER COUNTER_L2_0_C0 ++#define LAST_HW_COUNTER COUNTER_FP_7_C1 + -+ MALI_DEBUG_PRINT(4, ("MMap() handler: start=0x%08X, phys=0x%08X, size=0x%08X vma->flags 0x%08x\n", -+ (unsigned int)vma->vm_start, (unsigned int)(vma->vm_pgoff << PAGE_SHIFT), -+ (unsigned int)(vma->vm_end - vma->vm_start), vma->vm_flags)); ++#define FIRST_SW_COUNTER COUNTER_EGL_BLIT_TIME ++#define LAST_SW_COUNTER COUNTER_GLES_LOOP_LINES_COUNT + -+ /* Operations used on any memory system */ -+ /* do not need to anything in vm open/close now */ ++#define FIRST_SPECIAL_COUNTER COUNTER_FILMSTRIP ++#define LAST_SPECIAL_COUNTER COUNTER_TOTAL_ALLOC_PAGES + -+ /* find mali allocation structure by vaddress*/ -+ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0); -+ if (likely(mali_vma_node)) { -+ mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node); -+ MALI_DEBUG_ASSERT(mali_addr == mali_vma_node->vm_node.start); -+ if (unlikely(mali_addr != mali_vma_node->vm_node.start)) { -+ /* only allow to use start address for mmap */ -+ MALI_DEBUG_PRINT(1, ("mali_addr != mali_vma_node->vm_node.start\n")); -+ return -EFAULT; -+ } -+ } else { -+ MALI_DEBUG_ASSERT(NULL == mali_vma_node); -+ return -EFAULT; -+ } ++#define FIRST_MEM_COUNTER COUNTER_MEM_UNTYPED ++#define LAST_MEM_COUNTER COUNTER_MEM_UNTYPE_SURFACE + -+ mali_alloc->cpu_mapping.addr = (void __user *)vma->vm_start; ++#define MALI_PROFILING_MEM_COUNTERS_NUM (LAST_MEM_COUNTER - FIRST_MEM_COUNTER + 1) ++#define MALI_PROFILING_SPECIAL_COUNTERS_NUM (LAST_SPECIAL_COUNTER - FIRST_SPECIAL_COUNTER + 1) ++#define MALI_PROFILING_SW_COUNTERS_NUM (LAST_SW_COUNTER - FIRST_SW_COUNTER + 1) + -+ if (mali_alloc->flags & _MALI_MEMORY_ALLOCATE_DEFER_BIND) { -+ MALI_DEBUG_PRINT(1, ("ERROR : trying to access varying memory by CPU!\n")); -+ return -EFAULT; -+ } ++/** ++ * Define the stream header type for porfiling stream. ++ */ ++#define STREAM_HEADER_FRAMEBUFFER 0x05 /* The stream packet header type for framebuffer dumping. */ ++#define STREAM_HEADER_COUNTER_VALUE 0x09 /* The stream packet header type for hw/sw/memory counter sampling. */ ++#define STREAM_HEADER_CORE_ACTIVITY 0x0a /* The stream packet header type for activity counter sampling. */ ++#define STREAM_HEADER_SIZE 5 + -+ /* Get backend memory & Map on CPU */ -+ mutex_lock(&mali_idr_mutex); -+ if (!(mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle))) { -+ MALI_DEBUG_PRINT(1, ("Can't find memory backend in mmap!\n")); -+ mutex_unlock(&mali_idr_mutex); -+ return -EFAULT; -+ } -+ mutex_unlock(&mali_idr_mutex); ++/** ++ * Define the packet header type of profiling control packet. ++ */ ++#define PACKET_HEADER_ERROR 0x80 /* The response packet header type if error. */ ++#define PACKET_HEADER_ACK 0x81 /* The response packet header type if OK. */ ++#define PACKET_HEADER_COUNTERS_REQUEST 0x82 /* The control packet header type to request counter information from ddk. */ ++#define PACKET_HEADER_COUNTERS_ACK 0x83 /* The response packet header type to send out counter information. */ ++#define PACKET_HEADER_COUNTERS_ENABLE 0x84 /* The control packet header type to enable counters. */ ++#define PACKET_HEADER_START_CAPTURE_VALUE 0x85 /* The control packet header type to start capture values. */ + -+ if (!(MALI_MEM_SWAP == mali_alloc->type || -+ (MALI_MEM_COW == mali_alloc->type && (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)))) { -+ /* Set some bits which indicate that, the memory is IO memory, meaning -+ * that no paging is to be performed and the memory should not be -+ * included in crash dumps. And that the memory is reserved, meaning -+ * that it's present and can never be paged out (see also previous -+ * entry) -+ */ -+ vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_PFNMAP); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0) -+ vma->vm_flags |= VM_RESERVED; -+#else -+ vm_flags_set(vma, VM_DONTDUMP | VM_DONTEXPAND); ++#define PACKET_HEADER_SIZE 5 ++ ++/** ++ * Structure to pass performance counter data of a Mali core ++ */ ++typedef struct _mali_profiling_core_counters { ++ u32 source0; ++ u32 value0; ++ u32 source1; ++ u32 value1; ++} _mali_profiling_core_counters; ++ ++/** ++ * Structure to pass performance counter data of Mali L2 cache cores ++ */ ++typedef struct _mali_profiling_l2_counter_values { ++ struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES]; ++} _mali_profiling_l2_counter_values; ++ ++/** ++ * Structure to pass data defining Mali instance in use: ++ * ++ * mali_product_id - Mali product id ++ * mali_version_major - Mali version major number ++ * mali_version_minor - Mali version minor number ++ * num_of_l2_cores - number of L2 cache cores ++ * num_of_fp_cores - number of fragment processor cores ++ * num_of_vp_cores - number of vertex processor cores ++ */ ++typedef struct _mali_profiling_mali_version { ++ u32 mali_product_id; ++ u32 mali_version_major; ++ u32 mali_version_minor; ++ u32 num_of_l2_cores; ++ u32 num_of_fp_cores; ++ u32 num_of_vp_cores; ++} _mali_profiling_mali_version; ++ ++/** ++ * Structure to define the mali profiling counter struct. ++ */ ++typedef struct mali_profiling_counter { ++ char counter_name[40]; ++ u32 counter_id; ++ u32 counter_event; ++ u32 prev_counter_value; ++ u32 current_counter_value; ++ u32 key; ++ int enabled; ++} mali_profiling_counter; ++ ++/* ++ * List of possible actions to be controlled by Streamline. ++ * The following numbers are used by gator to control the frame buffer dumping and s/w counter reporting. ++ * We cannot use the enums in mali_uk_types.h because they are unknown inside gator. ++ */ ++#define FBDUMP_CONTROL_ENABLE (1) ++#define FBDUMP_CONTROL_RATE (2) ++#define SW_COUNTER_ENABLE (3) ++#define FBDUMP_CONTROL_RESIZE_FACTOR (4) ++#define MEM_COUNTER_ENABLE (5) ++#define ANNOTATE_PROFILING_ENABLE (6) ++ ++void _mali_profiling_control(u32 action, u32 value); ++ ++u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values); ++ ++int _mali_profiling_set_event(u32 counter_id, s32 event_id); ++ ++u32 _mali_profiling_get_api_version(void); ++ ++void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values); ++ ++#ifdef __cplusplus ++} +#endif -+ } else if (MALI_MEM_SWAP == mali_alloc->type) { -+ vma->vm_pgoff = mem_bkend->start_idx; -+ } + -+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); -+ vma->vm_ops = &mali_kernel_vm_ops; ++#endif /* __MALI_UTGARD_PROFILING_GATOR_API_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_uk_types.h b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_uk_types.h +new file mode 100644 +index 000000000..34656f09b +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/include/linux/mali/mali_utgard_uk_types.h +@@ -0,0 +1,1108 @@ ++/* ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ mali_alloc->cpu_mapping.addr = (void __user *)vma->vm_start; ++/** ++ * @file mali_uk_types.h ++ * Defines the types and constants used in the user-kernel interface ++ */ + -+ /* If it's a copy-on-write mapping, map to read only */ -+ if (!(vma->vm_flags & VM_WRITE)) { -+ MALI_DEBUG_PRINT(4, ("mmap allocation with read only !\n")); -+ /* add VM_WRITE for do_page_fault will check this when a write fault */ -+ vm_flags_set(vma, VM_WRITE | VM_READ); -+ vma->vm_page_prot = PAGE_READONLY; -+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); -+ mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE; -+ goto out; -+ } ++#ifndef __MALI_UTGARD_UK_TYPES_H__ ++#define __MALI_UTGARD_UK_TYPES_H__ + -+ if (mem_bkend->type == MALI_MEM_OS) { -+ ret = mali_mem_os_cpu_map(mem_bkend, vma); -+ } else if (mem_bkend->type == MALI_MEM_COW && -+ (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) { -+ ret = mali_mem_cow_cpu_map(mem_bkend, vma); -+ } else if (mem_bkend->type == MALI_MEM_BLOCK) { -+ ret = mali_mem_block_cpu_map(mem_bkend, vma); -+ } else if ((mem_bkend->type == MALI_MEM_SWAP) || (mem_bkend->type == MALI_MEM_COW && -+ (MALI_MEM_BACKEND_FLAG_SWAP_COWED == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)))) { -+ /*For swappable memory, CPU page table will be created by page fault handler. */ -+ ret = 0; -+ } else if (mem_bkend->type == MALI_MEM_SECURE) { -+#if defined(CONFIG_DMA_SHARED_BUFFER) -+ ret = mali_mem_secure_cpu_map(mem_bkend, vma); -+#else -+ MALI_DEBUG_PRINT(1, ("DMA not supported for mali secure memory\n")); -+ return -EFAULT; ++#ifdef __cplusplus ++extern "C" { +#endif -+ } else { -+ /* Not support yet*/ -+ MALI_DEBUG_PRINT_ERROR(("Invalid type of backend memory! \n")); -+ return -EFAULT; -+ } + -+ if (ret != 0) { -+ MALI_DEBUG_PRINT(1, ("ret != 0\n")); -+ return -EFAULT; -+ } -+out: -+ MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == mali_alloc->magic); ++/* Iteration functions depend on these values being consecutive. */ ++#define MALI_UK_TIMELINE_GP 0 ++#define MALI_UK_TIMELINE_PP 1 ++#define MALI_UK_TIMELINE_SOFT 2 ++#define MALI_UK_TIMELINE_MAX 3 + -+ vma->vm_private_data = (void *)mali_alloc; -+ mali_alloc->cpu_mapping.vma = vma; ++#define MALI_UK_BIG_VARYING_SIZE (1024*1024*2) + -+ mali_allocation_ref(mali_alloc); ++typedef struct { ++ u32 points[MALI_UK_TIMELINE_MAX]; ++ s32 sync_fd; ++} _mali_uk_fence_t; + -+ return 0; -+} ++/** ++ * @addtogroup uddapi Unified Device Driver (UDD) APIs ++ * ++ * @{ ++ */ + -+_mali_osk_errcode_t mali_mem_mali_map_prepare(mali_mem_allocation *descriptor) -+{ -+ u32 size = descriptor->psize; -+ struct mali_session_data *session = descriptor->session; ++/** ++ * @addtogroup u_k_api UDD User/Kernel Interface (U/K) APIs ++ * ++ * @{ ++ */ + -+ MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == descriptor->magic); ++/** @defgroup _mali_uk_core U/K Core ++ * @{ */ + -+ /* Map dma-buf into this session's page tables */ ++/** Definition of subsystem numbers, to assist in creating a unique identifier ++ * for each U/K call. ++ * ++ * @see _mali_uk_functions */ ++typedef enum { ++ _MALI_UK_CORE_SUBSYSTEM, /**< Core Group of U/K calls */ ++ _MALI_UK_MEMORY_SUBSYSTEM, /**< Memory Group of U/K calls */ ++ _MALI_UK_PP_SUBSYSTEM, /**< Fragment Processor Group of U/K calls */ ++ _MALI_UK_GP_SUBSYSTEM, /**< Vertex Processor Group of U/K calls */ ++ _MALI_UK_PROFILING_SUBSYSTEM, /**< Profiling Group of U/K calls */ ++ _MALI_UK_VSYNC_SUBSYSTEM, /**< VSYNC Group of U/K calls */ ++} _mali_uk_subsystem_t; + -+ if (descriptor->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) { -+ size += MALI_MMU_PAGE_SIZE; -+ } ++/** Within a function group each function has its unique sequence number ++ * to assist in creating a unique identifier for each U/K call. ++ * ++ * An ordered pair of numbers selected from ++ * ( \ref _mali_uk_subsystem_t,\ref _mali_uk_functions) will uniquely identify the ++ * U/K call across all groups of functions, and all functions. */ ++typedef enum { ++ /** Core functions */ + -+ return mali_mmu_pagedir_map(session->page_directory, descriptor->mali_vma_node.vm_node.start, size); -+} ++ _MALI_UK_OPEN = 0, /**< _mali_ukk_open() */ ++ _MALI_UK_CLOSE, /**< _mali_ukk_close() */ ++ _MALI_UK_WAIT_FOR_NOTIFICATION, /**< _mali_ukk_wait_for_notification() */ ++ _MALI_UK_GET_API_VERSION, /**< _mali_ukk_get_api_version() */ ++ _MALI_UK_POST_NOTIFICATION, /**< _mali_ukk_post_notification() */ ++ _MALI_UK_GET_USER_SETTING, /**< _mali_ukk_get_user_setting() *//**< [out] */ ++ _MALI_UK_GET_USER_SETTINGS, /**< _mali_ukk_get_user_settings() *//**< [out] */ ++ _MALI_UK_REQUEST_HIGH_PRIORITY, /**< _mali_ukk_request_high_priority() */ ++ _MALI_UK_TIMELINE_GET_LATEST_POINT, /**< _mali_ukk_timeline_get_latest_point() */ ++ _MALI_UK_TIMELINE_WAIT, /**< _mali_ukk_timeline_wait() */ ++ _MALI_UK_TIMELINE_CREATE_SYNC_FENCE, /**< _mali_ukk_timeline_create_sync_fence() */ ++ _MALI_UK_SOFT_JOB_START, /**< _mali_ukk_soft_job_start() */ ++ _MALI_UK_SOFT_JOB_SIGNAL, /**< _mali_ukk_soft_job_signal() */ ++ _MALI_UK_PENDING_SUBMIT, /**< _mali_ukk_pending_submit() */ + -+_mali_osk_errcode_t mali_mem_mali_map_resize(mali_mem_allocation *descriptor, u32 new_size) -+{ -+ u32 old_size = descriptor->psize; -+ struct mali_session_data *session = descriptor->session; ++ _MALI_GET_RK_KO_VERSION, /* rk_ext */ ++ _MALI_UK_GET_MALI_VERSION_IN_RK30, + -+ MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == descriptor->magic); ++ /** Memory functions */ + -+ if (descriptor->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) { -+ new_size += MALI_MMU_PAGE_SIZE; -+ } ++ _MALI_UK_ALLOC_MEM = 0, /**< _mali_ukk_alloc_mem() */ ++ _MALI_UK_FREE_MEM, /**< _mali_ukk_free_mem() */ ++ _MALI_UK_BIND_MEM, /**< _mali_ukk_mem_bind() */ ++ _MALI_UK_UNBIND_MEM, /**< _mali_ukk_mem_unbind() */ ++ _MALI_UK_COW_MEM, /**< _mali_ukk_mem_cow() */ ++ _MALI_UK_COW_MODIFY_RANGE, /**< _mali_ukk_mem_cow_modify_range() */ ++ _MALI_UK_RESIZE_MEM, /**<._mali_ukk_mem_resize() */ ++ _MALI_UK_QUERY_MMU_PAGE_TABLE_DUMP_SIZE, /**< _mali_ukk_mem_get_mmu_page_table_dump_size() */ ++ _MALI_UK_DUMP_MMU_PAGE_TABLE, /**< _mali_ukk_mem_dump_mmu_page_table() */ ++ _MALI_UK_DMA_BUF_GET_SIZE, /**< _mali_ukk_dma_buf_get_size() */ ++ _MALI_UK_MEM_WRITE_SAFE, /**< _mali_uku_mem_write_safe() */ + -+ if (new_size > old_size) { -+ MALI_DEBUG_ASSERT(new_size <= descriptor->mali_vma_node.vm_node.size); -+ return mali_mmu_pagedir_map(session->page_directory, descriptor->mali_vma_node.vm_node.start + old_size, new_size - old_size); -+ } -+ return _MALI_OSK_ERR_OK; -+} ++ /** Common functions for each core */ + -+void mali_mem_mali_map_free(struct mali_session_data *session, u32 size, mali_address_t vaddr, u32 flags) -+{ -+ if (flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) { -+ size += MALI_MMU_PAGE_SIZE; -+ } ++ _MALI_UK_START_JOB = 0, /**< Start a Fragment/Vertex Processor Job on a core */ ++ _MALI_UK_GET_NUMBER_OF_CORES, /**< Get the number of Fragment/Vertex Processor cores */ ++ _MALI_UK_GET_CORE_VERSION, /**< Get the Fragment/Vertex Processor version compatible with all cores */ + -+ /* Umap and flush L2 */ -+ mali_mmu_pagedir_unmap(session->page_directory, vaddr, size); -+ mali_executor_zap_all_active(session); -+} ++ /** Fragment Processor Functions */ + -+u32 _mali_ukk_report_memory_usage(void) -+{ -+ u32 sum = 0; ++ _MALI_UK_PP_START_JOB = _MALI_UK_START_JOB, /**< _mali_ukk_pp_start_job() */ ++ _MALI_UK_GET_PP_NUMBER_OF_CORES = _MALI_UK_GET_NUMBER_OF_CORES, /**< _mali_ukk_get_pp_number_of_cores() */ ++ _MALI_UK_GET_PP_CORE_VERSION = _MALI_UK_GET_CORE_VERSION, /**< _mali_ukk_get_pp_core_version() */ ++ _MALI_UK_PP_DISABLE_WB, /**< _mali_ukk_pp_job_disable_wb() */ ++ _MALI_UK_PP_AND_GP_START_JOB, /**< _mali_ukk_pp_and_gp_start_job() */ + -+ if (MALI_TRUE == mali_memory_have_dedicated_memory()) { -+ sum += mali_mem_block_allocator_stat(); -+ } ++ /** Vertex Processor Functions */ + -+ sum += mali_mem_os_stat(); ++ _MALI_UK_GP_START_JOB = _MALI_UK_START_JOB, /**< _mali_ukk_gp_start_job() */ ++ _MALI_UK_GET_GP_NUMBER_OF_CORES = _MALI_UK_GET_NUMBER_OF_CORES, /**< _mali_ukk_get_gp_number_of_cores() */ ++ _MALI_UK_GET_GP_CORE_VERSION = _MALI_UK_GET_CORE_VERSION, /**< _mali_ukk_get_gp_core_version() */ ++ _MALI_UK_GP_SUSPEND_RESPONSE, /**< _mali_ukk_gp_suspend_response() */ + -+ return sum; -+} ++ /** Profiling functions */ + -+u32 _mali_ukk_report_total_memory_size(void) -+{ -+ return mali_dedicated_mem_size + mali_shared_mem_size; -+} ++ _MALI_UK_PROFILING_ADD_EVENT = 0, /**< __mali_uku_profiling_add_event() */ ++ _MALI_UK_PROFILING_REPORT_SW_COUNTERS,/**< __mali_uku_profiling_report_sw_counters() */ ++ _MALI_UK_PROFILING_MEMORY_USAGE_GET, /**< __mali_uku_profiling_memory_usage_get() */ ++ _MALI_UK_PROFILING_STREAM_FD_GET, /** < __mali_uku_profiling_stream_fd_get() */ ++ _MALI_UK_PROFILING_CONTROL_SET, /** < __mali_uku_profiling_control_set() */ + ++ /** VSYNC reporting fuctions */ ++ _MALI_UK_VSYNC_EVENT_REPORT = 0, /**< _mali_ukk_vsync_event_report() */ ++} _mali_uk_functions; ++ ++/** @defgroup _mali_uk_getsysteminfo U/K Get System Info ++ * @{ */ + +/** -+ * Per-session memory descriptor mapping table sizes ++ * Type definition for the core version number. ++ * Used when returning the version number read from a core ++ * ++ * Its format is that of the 32-bit Version register for a particular core. ++ * Refer to the "Mali200 and MaliGP2 3D Graphics Processor Technical Reference ++ * Manual", ARM DDI 0415C, for more information. + */ -+#define MALI_MEM_DESCRIPTORS_INIT 64 -+#define MALI_MEM_DESCRIPTORS_MAX 65536 ++typedef u32 _mali_core_version; + -+_mali_osk_errcode_t mali_memory_session_begin(struct mali_session_data *session_data) -+{ -+ MALI_DEBUG_PRINT(5, ("Memory session begin\n")); ++/** @} */ /* end group _mali_uk_core */ + -+ session_data->memory_lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED, -+ _MALI_OSK_LOCK_ORDER_MEM_SESSION); + -+ if (NULL == session_data->memory_lock) { -+ MALI_ERROR(_MALI_OSK_ERR_FAULT); -+ } ++/** @defgroup _mali_uk_gp U/K Vertex Processor ++ * @{ */ + -+ session_data->cow_lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_UNORDERED, 0); -+ if (NULL == session_data->cow_lock) { -+ _mali_osk_mutex_term(session_data->memory_lock); -+ MALI_ERROR(_MALI_OSK_ERR_FAULT); -+ } ++/** @defgroup _mali_uk_gp_suspend_response_s Vertex Processor Suspend Response ++ * @{ */ + -+ mali_memory_manager_init(&session_data->allocation_mgr); ++/** @brief Arguments for _mali_ukk_gp_suspend_response() ++ * ++ * When _mali_wait_for_notification() receives notification that a ++ * Vertex Processor job was suspended, you need to send a response to indicate ++ * what needs to happen with this job. You can either abort or resume the job. ++ * ++ * - set @c code to indicate response code. This is either @c _MALIGP_JOB_ABORT or ++ * @c _MALIGP_JOB_RESUME_WITH_NEW_HEAP to indicate you will provide a new heap ++ * for the job that will resolve the out of memory condition for the job. ++ * - copy the @c cookie value from the @c _mali_uk_gp_job_suspended_s notification; ++ * this is an identifier for the suspended job ++ * - set @c arguments[0] and @c arguments[1] to zero if you abort the job. If ++ * you resume it, @c argument[0] should specify the Mali start address for the new ++ * heap and @c argument[1] the Mali end address of the heap. ++ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open() ++ * ++ */ ++typedef enum _maligp_job_suspended_response_code { ++ _MALIGP_JOB_ABORT, /**< Abort the Vertex Processor job */ ++ _MALIGP_JOB_RESUME_WITH_NEW_HEAP /**< Resume the Vertex Processor job with a new heap */ ++} _maligp_job_suspended_response_code; + -+ MALI_DEBUG_PRINT(5, ("MMU session begin: success\n")); -+ MALI_SUCCESS; -+} ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 cookie; /**< [in] cookie from the _mali_uk_gp_job_suspended_s notification */ ++ _maligp_job_suspended_response_code code; /**< [in] abort or resume response code, see \ref _maligp_job_suspended_response_code */ ++ u32 arguments[2]; /**< [in] 0 when aborting a job. When resuming a job, the Mali start and end address for a new heap to resume the job with */ ++} _mali_uk_gp_suspend_response_s; + -+void mali_memory_session_end(struct mali_session_data *session) -+{ -+ MALI_DEBUG_PRINT(3, ("MMU session end\n")); ++/** @} */ /* end group _mali_uk_gp_suspend_response_s */ + -+ if (NULL == session) { -+ MALI_DEBUG_PRINT(1, ("No session data found during session end\n")); -+ return; -+ } -+ /* free allocation */ -+ mali_free_session_allocations(session); -+ /* do some check in unint*/ -+ mali_memory_manager_uninit(&session->allocation_mgr); ++/** @defgroup _mali_uk_gpstartjob_s Vertex Processor Start Job ++ * @{ */ + -+ /* Free the lock */ -+ _mali_osk_mutex_term(session->memory_lock); -+ _mali_osk_mutex_term(session->cow_lock); -+ return; -+} ++/** @brief Status indicating the result of the execution of a Vertex or Fragment processor job */ ++typedef enum { ++ _MALI_UK_JOB_STATUS_END_SUCCESS = 1 << (16 + 0), ++ _MALI_UK_JOB_STATUS_END_OOM = 1 << (16 + 1), ++ _MALI_UK_JOB_STATUS_END_ABORT = 1 << (16 + 2), ++ _MALI_UK_JOB_STATUS_END_TIMEOUT_SW = 1 << (16 + 3), ++ _MALI_UK_JOB_STATUS_END_HANG = 1 << (16 + 4), ++ _MALI_UK_JOB_STATUS_END_SEG_FAULT = 1 << (16 + 5), ++ _MALI_UK_JOB_STATUS_END_ILLEGAL_JOB = 1 << (16 + 6), ++ _MALI_UK_JOB_STATUS_END_UNKNOWN_ERR = 1 << (16 + 7), ++ _MALI_UK_JOB_STATUS_END_SHUTDOWN = 1 << (16 + 8), ++ _MALI_UK_JOB_STATUS_END_SYSTEM_UNUSABLE = 1 << (16 + 9) ++} _mali_uk_job_status; + -+_mali_osk_errcode_t mali_memory_initialize(void) -+{ -+ _mali_osk_errcode_t err; ++#define MALIGP2_NUM_REGS_FRAME (6) + -+ idr_init(&mali_backend_idr); -+ mutex_init(&mali_idr_mutex); ++/** @brief Arguments for _mali_ukk_gp_start_job() ++ * ++ * To start a Vertex Processor job ++ * - associate the request with a reference to a @c mali_gp_job_info by setting ++ * user_job_ptr to the address of the @c mali_gp_job_info of the job. ++ * - set @c priority to the priority of the @c mali_gp_job_info ++ * - specify a timeout for the job by setting @c watchdog_msecs to the number of ++ * milliseconds the job is allowed to run. Specifying a value of 0 selects the ++ * default timeout in use by the device driver. ++ * - copy the frame registers from the @c mali_gp_job_info into @c frame_registers. ++ * - set the @c perf_counter_flag, @c perf_counter_src0 and @c perf_counter_src1 to zero ++ * for a non-instrumented build. For an instrumented build you can use up ++ * to two performance counters. Set the corresponding bit in @c perf_counter_flag ++ * to enable them. @c perf_counter_src0 and @c perf_counter_src1 specify ++ * the source of what needs to get counted (e.g. number of vertex loader ++ * cache hits). For source id values, see ARM DDI0415A, Table 3-60. ++ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open() ++ * ++ * When @c _mali_ukk_gp_start_job() returns @c _MALI_OSK_ERR_OK, status contains the ++ * result of the request (see \ref _mali_uk_start_job_status). If the job could ++ * not get started (@c _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE) it should be ++ * tried again. ++ * ++ * After the job has started, @c _mali_wait_for_notification() will be notified ++ * that the job finished or got suspended. It may get suspended due to ++ * resource shortage. If it finished (see _mali_ukk_wait_for_notification()) ++ * the notification will contain a @c _mali_uk_gp_job_finished_s result. If ++ * it got suspended the notification will contain a @c _mali_uk_gp_job_suspended_s ++ * result. ++ * ++ * The @c _mali_uk_gp_job_finished_s contains the job status (see \ref _mali_uk_job_status), ++ * the number of milliseconds the job took to render, and values of core registers ++ * when the job finished (irq status, performance counters, renderer list ++ * address). A job has finished succesfully when its status is ++ * @c _MALI_UK_JOB_STATUS_FINISHED. If the hardware detected a timeout while rendering ++ * the job, or software detected the job is taking more than watchdog_msecs to ++ * complete, the status will indicate @c _MALI_UK_JOB_STATUS_HANG. ++ * If the hardware detected a bus error while accessing memory associated with the ++ * job, status will indicate @c _MALI_UK_JOB_STATUS_SEG_FAULT. ++ * status will indicate @c _MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to ++ * stop the job but the job didn't start on the hardware yet, e.g. when the ++ * driver shutdown. ++ * ++ * In case the job got suspended, @c _mali_uk_gp_job_suspended_s contains ++ * the @c user_job_ptr identifier used to start the job with, the @c reason ++ * why the job stalled (see \ref _maligp_job_suspended_reason) and a @c cookie ++ * to identify the core on which the job stalled. This @c cookie will be needed ++ * when responding to this nofication by means of _mali_ukk_gp_suspend_response(). ++ * (see _mali_ukk_gp_suspend_response()). The response is either to abort or ++ * resume the job. If the job got suspended due to an out of memory condition ++ * you may be able to resolve this by providing more memory and resuming the job. ++ * ++ */ ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u64 user_job_ptr; /**< [in] identifier for the job in user space, a @c mali_gp_job_info* */ ++ u32 priority; /**< [in] job priority. A lower number means higher priority */ ++ u32 frame_registers[MALIGP2_NUM_REGS_FRAME]; /**< [in] core specific registers associated with this job */ ++ u32 perf_counter_flag; /**< [in] bitmask indicating which performance counters to enable, see \ref _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE and related macro definitions */ ++ u32 perf_counter_src0; /**< [in] source id for performance counter 0 (see ARM DDI0415A, Table 3-60) */ ++ u32 perf_counter_src1; /**< [in] source id for performance counter 1 (see ARM DDI0415A, Table 3-60) */ ++ u32 frame_builder_id; /**< [in] id of the originating frame builder */ ++ u32 flush_id; /**< [in] flush id within the originating frame builder */ ++ _mali_uk_fence_t fence; /**< [in] fence this job must wait on */ ++ u64 timeline_point_ptr; /**< [in,out] pointer to u32: location where point on gp timeline for this job will be written */ ++ u32 varying_memsize; /** < [in] size of varying memory to use deffer bind*/ ++ u32 deferred_mem_num; ++ u64 deferred_mem_list; /** < [in] memory hanlde list of varying buffer to use deffer bind */ ++} _mali_uk_gp_start_job_s; + -+ err = mali_mem_swap_init(); -+ if (err != _MALI_OSK_ERR_OK) { -+ return err; -+ } -+ err = mali_mem_os_init(); -+ if (_MALI_OSK_ERR_OK == err) { -+ err = mali_mem_defer_bind_manager_init(); -+ } ++#define _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE (1<<0) /**< Enable performance counter SRC0 for a job */ ++#define _MALI_PERFORMANCE_COUNTER_FLAG_SRC1_ENABLE (1<<1) /**< Enable performance counter SRC1 for a job */ ++#define _MALI_PERFORMANCE_COUNTER_FLAG_HEATMAP_ENABLE (1<<2) /**< Enable per tile (aka heatmap) generation with for a job (using the enabled counter sources) */ + -+ return err; -+} ++/** @} */ /* end group _mali_uk_gpstartjob_s */ + -+void mali_memory_terminate(void) -+{ -+ mali_mem_swap_term(); -+ mali_mem_defer_bind_manager_destory(); -+ mali_mem_os_term(); -+ if (mali_memory_have_dedicated_memory()) { -+ mali_mem_block_allocator_destroy(); -+ } -+} ++typedef struct { ++ u64 user_job_ptr; /**< [out] identifier for the job in user space */ ++ _mali_uk_job_status status; /**< [out] status of finished job */ ++ u32 heap_current_addr; /**< [out] value of the GP PLB PL heap start address register */ ++ u32 perf_counter0; /**< [out] value of performance counter 0 (see ARM DDI0415A) */ ++ u32 perf_counter1; /**< [out] value of performance counter 1 (see ARM DDI0415A) */ ++ u32 pending_big_job_num; ++} _mali_uk_gp_job_finished_s; + ++typedef struct { ++ u64 user_job_ptr; /**< [out] identifier for the job in user space */ ++ u32 cookie; /**< [out] identifier for the core in kernel space on which the job stalled */ ++} _mali_uk_gp_job_suspended_s; + -+struct mali_page_node *_mali_page_node_allocate(mali_page_node_type type) -+{ -+ mali_page_node *page_node = NULL; ++/** @} */ /* end group _mali_uk_gp */ + -+ page_node = kzalloc(sizeof(mali_page_node), GFP_KERNEL); -+ MALI_DEBUG_ASSERT(NULL != page_node); + -+ if (page_node) { -+ page_node->type = type; -+ INIT_LIST_HEAD(&page_node->list); -+ } ++/** @defgroup _mali_uk_pp U/K Fragment Processor ++ * @{ */ + -+ return page_node; -+} ++#define _MALI_PP_MAX_SUB_JOBS 8 + -+void _mali_page_node_ref(struct mali_page_node *node) -+{ -+ if (node->type == MALI_PAGE_NODE_OS) { -+ /* add ref to this page */ -+ get_page(node->page); -+ } else if (node->type == MALI_PAGE_NODE_BLOCK) { -+ mali_mem_block_add_ref(node); -+ } else if (node->type == MALI_PAGE_NODE_SWAP) { -+ atomic_inc(&node->swap_it->ref_count); -+ } else { -+ MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n")); -+ } -+} ++#define _MALI_PP_MAX_FRAME_REGISTERS ((0x058/4)+1) + -+void _mali_page_node_unref(struct mali_page_node *node) -+{ -+ if (node->type == MALI_PAGE_NODE_OS) { -+ /* unref to this page */ -+ put_page(node->page); -+ } else if (node->type == MALI_PAGE_NODE_BLOCK) { -+ mali_mem_block_dec_ref(node); -+ } else { -+ MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n")); -+ } -+} ++#define _MALI_PP_MAX_WB_REGISTERS ((0x02C/4)+1) + ++#define _MALI_DLBU_MAX_REGISTERS 4 + -+void _mali_page_node_add_page(struct mali_page_node *node, struct page *page) -+{ -+ MALI_DEBUG_ASSERT(MALI_PAGE_NODE_OS == node->type); -+ node->page = page; -+} ++/** Flag for _mali_uk_pp_start_job_s */ ++#define _MALI_PP_JOB_FLAG_NO_NOTIFICATION (1<<0) ++#define _MALI_PP_JOB_FLAG_IS_WINDOW_SURFACE (1<<1) ++#define _MALI_PP_JOB_FLAG_PROTECTED (1<<2) + ++/** @defgroup _mali_uk_ppstartjob_s Fragment Processor Start Job ++ * @{ */ + -+void _mali_page_node_add_swap_item(struct mali_page_node *node, struct mali_swap_item *item) -+{ -+ MALI_DEBUG_ASSERT(MALI_PAGE_NODE_SWAP == node->type); -+ node->swap_it = item; -+} ++/** @brief Arguments for _mali_ukk_pp_start_job() ++ * ++ * To start a Fragment Processor job ++ * - associate the request with a reference to a mali_pp_job by setting ++ * @c user_job_ptr to the address of the @c mali_pp_job of the job. ++ * - set @c priority to the priority of the mali_pp_job ++ * - specify a timeout for the job by setting @c watchdog_msecs to the number of ++ * milliseconds the job is allowed to run. Specifying a value of 0 selects the ++ * default timeout in use by the device driver. ++ * - copy the frame registers from the @c mali_pp_job into @c frame_registers. ++ * For MALI200 you also need to copy the write back 0,1 and 2 registers. ++ * - set the @c perf_counter_flag, @c perf_counter_src0 and @c perf_counter_src1 to zero ++ * for a non-instrumented build. For an instrumented build you can use up ++ * to two performance counters. Set the corresponding bit in @c perf_counter_flag ++ * to enable them. @c perf_counter_src0 and @c perf_counter_src1 specify ++ * the source of what needs to get counted (e.g. number of vertex loader ++ * cache hits). For source id values, see ARM DDI0415A, Table 3-60. ++ * - pass in the user-kernel context in @c ctx that was returned from _mali_ukk_open() ++ * ++ * When _mali_ukk_pp_start_job() returns @c _MALI_OSK_ERR_OK, @c status contains the ++ * result of the request (see \ref _mali_uk_start_job_status). If the job could ++ * not get started (@c _MALI_UK_START_JOB_NOT_STARTED_DO_REQUEUE) it should be ++ * tried again. ++ * ++ * After the job has started, _mali_wait_for_notification() will be notified ++ * when the job finished. The notification will contain a ++ * @c _mali_uk_pp_job_finished_s result. It contains the @c user_job_ptr ++ * identifier used to start the job with, the job @c status (see \ref _mali_uk_job_status), ++ * the number of milliseconds the job took to render, and values of core registers ++ * when the job finished (irq status, performance counters, renderer list ++ * address). A job has finished succesfully when its status is ++ * @c _MALI_UK_JOB_STATUS_FINISHED. If the hardware detected a timeout while rendering ++ * the job, or software detected the job is taking more than @c watchdog_msecs to ++ * complete, the status will indicate @c _MALI_UK_JOB_STATUS_HANG. ++ * If the hardware detected a bus error while accessing memory associated with the ++ * job, status will indicate @c _MALI_UK_JOB_STATUS_SEG_FAULT. ++ * status will indicate @c _MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to ++ * stop the job but the job didn't start on the hardware yet, e.g. when the ++ * driver shutdown. ++ * ++ */ ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u64 user_job_ptr; /**< [in] identifier for the job in user space */ ++ u32 priority; /**< [in] job priority. A lower number means higher priority */ ++ u32 frame_registers[_MALI_PP_MAX_FRAME_REGISTERS]; /**< [in] core specific registers associated with first sub job, see ARM DDI0415A */ ++ u32 frame_registers_addr_frame[_MALI_PP_MAX_SUB_JOBS - 1]; /**< [in] ADDR_FRAME registers for sub job 1-7 */ ++ u32 frame_registers_addr_stack[_MALI_PP_MAX_SUB_JOBS - 1]; /**< [in] ADDR_STACK registers for sub job 1-7 */ ++ u32 wb0_registers[_MALI_PP_MAX_WB_REGISTERS]; ++ u32 wb1_registers[_MALI_PP_MAX_WB_REGISTERS]; ++ u32 wb2_registers[_MALI_PP_MAX_WB_REGISTERS]; ++ u32 dlbu_registers[_MALI_DLBU_MAX_REGISTERS]; /**< [in] Dynamic load balancing unit registers */ ++ u32 num_cores; /**< [in] Number of cores to set up (valid range: 1-8(M450) or 4(M400)) */ ++ u32 perf_counter_flag; /**< [in] bitmask indicating which performance counters to enable, see \ref _MALI_PERFORMANCE_COUNTER_FLAG_SRC0_ENABLE and related macro definitions */ ++ u32 perf_counter_src0; /**< [in] source id for performance counter 0 (see ARM DDI0415A, Table 3-60) */ ++ u32 perf_counter_src1; /**< [in] source id for performance counter 1 (see ARM DDI0415A, Table 3-60) */ ++ u32 frame_builder_id; /**< [in] id of the originating frame builder */ ++ u32 flush_id; /**< [in] flush id within the originating frame builder */ ++ u32 flags; /**< [in] See _MALI_PP_JOB_FLAG_* for a list of avaiable flags */ ++ u32 tilesx; /**< [in] number of tiles in the x direction (needed for heatmap generation */ ++ u32 tilesy; /**< [in] number of tiles in y direction (needed for reading the heatmap memory) */ ++ u32 heatmap_mem; /**< [in] memory address to store counter values per tile (aka heatmap) */ ++ u32 num_memory_cookies; /**< [in] number of memory cookies attached to job */ ++ u64 memory_cookies; /**< [in] pointer to array of u32 memory cookies attached to job */ ++ _mali_uk_fence_t fence; /**< [in] fence this job must wait on */ ++ u64 timeline_point_ptr; /**< [in,out] pointer to location of u32 where point on pp timeline for this job will be written */ ++} _mali_uk_pp_start_job_s; + -+void _mali_page_node_add_block_item(struct mali_page_node *node, mali_block_item *item) -+{ -+ MALI_DEBUG_ASSERT(MALI_PAGE_NODE_BLOCK == node->type); -+ node->blk_it = item; -+} ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u64 gp_args; /**< [in,out] GP uk arguments (see _mali_uk_gp_start_job_s) */ ++ u64 pp_args; /**< [in,out] PP uk arguments (see _mali_uk_pp_start_job_s) */ ++} _mali_uk_pp_and_gp_start_job_s; + ++/** @} */ /* end group _mali_uk_ppstartjob_s */ + -+int _mali_page_node_get_ref_count(struct mali_page_node *node) -+{ -+ if (node->type == MALI_PAGE_NODE_OS) { -+ /* get ref count of this page */ -+ return page_count(node->page); -+ } else if (node->type == MALI_PAGE_NODE_BLOCK) { -+ return mali_mem_block_get_ref_count(node); -+ } else if (node->type == MALI_PAGE_NODE_SWAP) { -+ return atomic_read(&node->swap_it->ref_count); -+ } else { -+ MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n")); -+ } -+ return -1; -+} ++typedef struct { ++ u64 user_job_ptr; /**< [out] identifier for the job in user space */ ++ _mali_uk_job_status status; /**< [out] status of finished job */ ++ u32 perf_counter0[_MALI_PP_MAX_SUB_JOBS]; /**< [out] value of perfomance counter 0 (see ARM DDI0415A), one for each sub job */ ++ u32 perf_counter1[_MALI_PP_MAX_SUB_JOBS]; /**< [out] value of perfomance counter 1 (see ARM DDI0415A), one for each sub job */ ++ u32 perf_counter_src0; ++ u32 perf_counter_src1; ++} _mali_uk_pp_job_finished_s; + ++typedef struct { ++ u32 number_of_enabled_cores; /**< [out] the new number of enabled cores */ ++} _mali_uk_pp_num_cores_changed_s; + -+dma_addr_t _mali_page_node_get_dma_addr(struct mali_page_node *node) -+{ -+ if (node->type == MALI_PAGE_NODE_OS) { -+ return page_private(node->page); -+ } else if (node->type == MALI_PAGE_NODE_BLOCK) { -+ return _mali_blk_item_get_phy_addr(node->blk_it); -+ } else if (node->type == MALI_PAGE_NODE_SWAP) { -+ return node->swap_it->dma_addr; -+ } else { -+ MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n")); -+ } -+ return 0; -+} + + -+unsigned long _mali_page_node_get_pfn(struct mali_page_node *node) -+{ -+ if (node->type == MALI_PAGE_NODE_OS) { -+ return page_to_pfn(node->page); -+ } else if (node->type == MALI_PAGE_NODE_BLOCK) { -+ /* get phy addr for BLOCK page*/ -+ return _mali_blk_item_get_pfn(node->blk_it); -+ } else if (node->type == MALI_PAGE_NODE_SWAP) { -+ return page_to_pfn(node->swap_it->page); -+ } else { -+ MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n")); -+ } -+ return 0; -+} ++/** ++ * Flags to indicate write-back units ++ */ ++typedef enum { ++ _MALI_UK_PP_JOB_WB0 = 1, ++ _MALI_UK_PP_JOB_WB1 = 2, ++ _MALI_UK_PP_JOB_WB2 = 4, ++} _mali_uk_pp_job_wbx_flag; + ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 fb_id; /**< [in] Frame builder ID of job to disable WB units for */ ++ u32 wb0_memory; ++ u32 wb1_memory; ++ u32 wb2_memory; ++} _mali_uk_pp_disable_wb_s; + -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory.h -new file mode 100644 -index 000000000..efebbef23 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory.h -@@ -0,0 +1,143 @@ -+/* -+ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ + -+#ifndef __MALI_MEMORY_H__ -+#define __MALI_MEMORY_H__ ++/** @} */ /* end group _mali_uk_pp */ + -+#include "mali_osk.h" -+#include "mali_session.h" ++/** @defgroup _mali_uk_soft_job U/K Soft Job ++ * @{ */ + -+#include -+#include ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u64 user_job; /**< [in] identifier for the job in user space */ ++ u64 job_id_ptr; /**< [in,out] pointer to location of u32 where job id will be written */ ++ _mali_uk_fence_t fence; /**< [in] fence this job must wait on */ ++ u32 point; /**< [out] point on soft timeline for this job */ ++ u32 type; /**< [in] type of soft job */ ++} _mali_uk_soft_job_start_s; + -+#include "mali_memory_types.h" -+#include "mali_memory_os_alloc.h" ++typedef struct { ++ u64 user_job; /**< [out] identifier for the job in user space */ ++} _mali_uk_soft_job_activated_s; + -+_mali_osk_errcode_t mali_memory_initialize(void); -+void mali_memory_terminate(void); ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 job_id; /**< [in] id for soft job */ ++} _mali_uk_soft_job_signal_s; + -+/** @brief Allocate a page table page -+ * -+ * Allocate a page for use as a page directory or page table. The page is -+ * mapped into kernel space. -+ * -+ * @return _MALI_OSK_ERR_OK on success, otherwise an error code -+ * @param table_page GPU pointer to the allocated page -+ * @param mapping CPU pointer to the mapping of the allocated page -+ */ -+MALI_STATIC_INLINE _mali_osk_errcode_t -+mali_mmu_get_table_page(mali_dma_addr *table_page, mali_io_address *mapping) -+{ -+ return mali_mem_os_get_table_page(table_page, mapping); -+} ++/** @} */ /* end group _mali_uk_soft_job */ + -+/** @brief Release a page table page ++typedef struct { ++ u32 counter_id; ++ u32 key; ++ int enable; ++} _mali_uk_annotate_profiling_mem_counter_s; ++ ++typedef struct { ++ u32 sampling_rate; ++ int enable; ++} _mali_uk_annotate_profiling_enable_s; ++ ++ ++/** @addtogroup _mali_uk_core U/K Core ++ * @{ */ ++ ++/** @defgroup _mali_uk_waitfornotification_s Wait For Notification ++ * @{ */ ++ ++/** @brief Notification type encodings + * -+ * Release a page table page allocated through \a mali_mmu_get_table_page ++ * Each Notification type is an ordered pair of (subsystem,id), and is unique. + * -+ * @param pa the GPU address of the page to release ++ * The encoding of subsystem,id into a 32-bit word is: ++ * encoding = (( subsystem << _MALI_NOTIFICATION_SUBSYSTEM_SHIFT ) & _MALI_NOTIFICATION_SUBSYSTEM_MASK) ++ * | (( id << _MALI_NOTIFICATION_ID_SHIFT ) & _MALI_NOTIFICATION_ID_MASK) ++ * ++ * @see _mali_uk_wait_for_notification_s + */ -+MALI_STATIC_INLINE void -+mali_mmu_release_table_page(mali_dma_addr phys, void *virt) -+{ -+ mali_mem_os_release_table_page(phys, virt); -+} ++typedef enum { ++ /** core notifications */ + -+/** @brief mmap function ++ _MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x20, ++ _MALI_NOTIFICATION_APPLICATION_QUIT = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x40, ++ _MALI_NOTIFICATION_SETTINGS_CHANGED = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x80, ++ _MALI_NOTIFICATION_SOFT_ACTIVATED = (_MALI_UK_CORE_SUBSYSTEM << 16) | 0x100, ++ ++ /** Fragment Processor notifications */ ++ ++ _MALI_NOTIFICATION_PP_FINISHED = (_MALI_UK_PP_SUBSYSTEM << 16) | 0x10, ++ _MALI_NOTIFICATION_PP_NUM_CORE_CHANGE = (_MALI_UK_PP_SUBSYSTEM << 16) | 0x20, ++ ++ /** Vertex Processor notifications */ ++ ++ _MALI_NOTIFICATION_GP_FINISHED = (_MALI_UK_GP_SUBSYSTEM << 16) | 0x10, ++ _MALI_NOTIFICATION_GP_STALLED = (_MALI_UK_GP_SUBSYSTEM << 16) | 0x20, ++ ++ /** Profiling notifications */ ++ _MALI_NOTIFICATION_ANNOTATE_PROFILING_MEM_COUNTER = (_MALI_UK_PROFILING_SUBSYSTEM << 16) | 0x10, ++ _MALI_NOTIFICATION_ANNOTATE_PROFILING_ENABLE = (_MALI_UK_PROFILING_SUBSYSTEM << 16) | 0x20, ++} _mali_uk_notification_type; ++ ++/** to assist in splitting up 32-bit notification value in subsystem and id value */ ++#define _MALI_NOTIFICATION_SUBSYSTEM_MASK 0xFFFF0000 ++#define _MALI_NOTIFICATION_SUBSYSTEM_SHIFT 16 ++#define _MALI_NOTIFICATION_ID_MASK 0x0000FFFF ++#define _MALI_NOTIFICATION_ID_SHIFT 0 ++ ++ ++/** @brief Enumeration of possible settings which match mali_setting_t in user space + * -+ * mmap syscalls on the Mali device node will end up here. + * -+ * This function allocates Mali memory and maps it on CPU and Mali. + */ -+int mali_mmap(struct file *filp, struct vm_area_struct *vma); ++typedef enum { ++ _MALI_UK_USER_SETTING_SW_EVENTS_ENABLE = 0, ++ _MALI_UK_USER_SETTING_COLORBUFFER_CAPTURE_ENABLED, ++ _MALI_UK_USER_SETTING_DEPTHBUFFER_CAPTURE_ENABLED, ++ _MALI_UK_USER_SETTING_STENCILBUFFER_CAPTURE_ENABLED, ++ _MALI_UK_USER_SETTING_PER_TILE_COUNTERS_CAPTURE_ENABLED, ++ _MALI_UK_USER_SETTING_BUFFER_CAPTURE_COMPOSITOR, ++ _MALI_UK_USER_SETTING_BUFFER_CAPTURE_WINDOW, ++ _MALI_UK_USER_SETTING_BUFFER_CAPTURE_OTHER, ++ _MALI_UK_USER_SETTING_BUFFER_CAPTURE_N_FRAMES, ++ _MALI_UK_USER_SETTING_BUFFER_CAPTURE_RESIZE_FACTOR, ++ _MALI_UK_USER_SETTING_SW_COUNTER_ENABLED, ++ _MALI_UK_USER_SETTING_MAX, ++} _mali_uk_user_setting_t; + -+/** @brief Start a new memory session -+ * -+ * Called when a process opens the Mali device node. -+ * -+ * @param session Pointer to session to initialize ++/* See mali_user_settings_db.c */ ++extern const char *_mali_uk_user_setting_descriptions[]; ++#define _MALI_UK_USER_SETTING_DESCRIPTIONS \ ++ { \ ++ "sw_events_enable", \ ++ "colorbuffer_capture_enable", \ ++ "depthbuffer_capture_enable", \ ++ "stencilbuffer_capture_enable", \ ++ "per_tile_counters_enable", \ ++ "buffer_capture_compositor", \ ++ "buffer_capture_window", \ ++ "buffer_capture_other", \ ++ "buffer_capture_n_frames", \ ++ "buffer_capture_resize_factor", \ ++ "sw_counters_enable", \ ++ }; ++ ++/** @brief struct to hold the value to a particular setting as seen in the kernel space + */ -+_mali_osk_errcode_t mali_memory_session_begin(struct mali_session_data *session); ++typedef struct { ++ _mali_uk_user_setting_t setting; ++ u32 value; ++} _mali_uk_settings_changed_s; + -+/** @brief Close a memory session ++/** @brief Arguments for _mali_ukk_wait_for_notification() + * -+ * Called when a process closes the Mali device node. ++ * On successful return from _mali_ukk_wait_for_notification(), the members of ++ * this structure will indicate the reason for notification. + * -+ * Memory allocated by the session will be freed ++ * Specifically, the source of the notification can be identified by the ++ * subsystem and id fields of the mali_uk_notification_type in the code.type ++ * member. The type member is encoded in a way to divide up the types into a ++ * subsystem field, and a per-subsystem ID field. See ++ * _mali_uk_notification_type for more information. + * -+ * @param session Pointer to the session to terminate ++ * Interpreting the data union member depends on the notification type: ++ * ++ * - type == _MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS ++ * - The kernel side is shutting down. No further ++ * _mali_uk_wait_for_notification() calls should be made. ++ * - In this case, the value of the data union member is undefined. ++ * - This is used to indicate to the user space client that it should close ++ * the connection to the Mali Device Driver. ++ * - type == _MALI_NOTIFICATION_PP_FINISHED ++ * - The notification data is of type _mali_uk_pp_job_finished_s. It contains the user_job_ptr ++ * identifier used to start the job with, the job status, the number of milliseconds the job took to render, ++ * and values of core registers when the job finished (irq status, performance counters, renderer list ++ * address). ++ * - A job has finished succesfully when its status member is _MALI_UK_JOB_STATUS_FINISHED. ++ * - If the hardware detected a timeout while rendering the job, or software detected the job is ++ * taking more than watchdog_msecs (see _mali_ukk_pp_start_job()) to complete, the status member will ++ * indicate _MALI_UK_JOB_STATUS_HANG. ++ * - If the hardware detected a bus error while accessing memory associated with the job, status will ++ * indicate _MALI_UK_JOB_STATUS_SEG_FAULT. ++ * - Status will indicate MALI_UK_JOB_STATUS_NOT_STARTED if the driver had to stop the job but the job ++ * didn't start the hardware yet, e.g. when the driver closes. ++ * - type == _MALI_NOTIFICATION_GP_FINISHED ++ * - The notification data is of type _mali_uk_gp_job_finished_s. The notification is similar to that of ++ * type == _MALI_NOTIFICATION_PP_FINISHED, except that several other GP core register values are returned. ++ * The status values have the same meaning for type == _MALI_NOTIFICATION_PP_FINISHED. ++ * - type == _MALI_NOTIFICATION_GP_STALLED ++ * - The nofication data is of type _mali_uk_gp_job_suspended_s. It contains the user_job_ptr ++ * identifier used to start the job with, the reason why the job stalled and a cookie to identify the core on ++ * which the job stalled. ++ * - The reason member of gp_job_suspended is set to _MALIGP_JOB_SUSPENDED_OUT_OF_MEMORY ++ * when the polygon list builder unit has run out of memory. + */ -+void mali_memory_session_end(struct mali_session_data *session); ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ _mali_uk_notification_type type; /**< [out] Type of notification available */ ++ union { ++ _mali_uk_gp_job_suspended_s gp_job_suspended;/**< [out] Notification data for _MALI_NOTIFICATION_GP_STALLED notification type */ ++ _mali_uk_gp_job_finished_s gp_job_finished; /**< [out] Notification data for _MALI_NOTIFICATION_GP_FINISHED notification type */ ++ _mali_uk_pp_job_finished_s pp_job_finished; /**< [out] Notification data for _MALI_NOTIFICATION_PP_FINISHED notification type */ ++ _mali_uk_settings_changed_s setting_changed;/**< [out] Notification data for _MALI_NOTIFICAATION_SETTINGS_CHANGED notification type */ ++ _mali_uk_soft_job_activated_s soft_job_activated; /**< [out] Notification data for _MALI_NOTIFICATION_SOFT_ACTIVATED notification type */ ++ _mali_uk_annotate_profiling_mem_counter_s profiling_mem_counter; ++ _mali_uk_annotate_profiling_enable_s profiling_enable; ++ } data; ++} _mali_uk_wait_for_notification_s; + -+/** @brief Prepare Mali page tables for mapping -+ * -+ * This function will prepare the Mali page tables for mapping the memory -+ * described by \a descriptor. ++/** @brief Arguments for _mali_ukk_post_notification() + * -+ * Page tables will be reference counted and allocated, if not yet present. ++ * Posts the specified notification to the notification queue for this application. ++ * This is used to send a quit message to the callback thread. ++ */ ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ _mali_uk_notification_type type; /**< [in] Type of notification to post */ ++} _mali_uk_post_notification_s; ++ ++/** @} */ /* end group _mali_uk_waitfornotification_s */ ++ ++/** @defgroup _mali_uk_getapiversion_s Get API Version ++ * @{ */ ++ ++/** helpers for Device Driver API version handling */ ++ ++/** @brief Encode a version ID from a 16-bit input + * -+ * @param descriptor Pointer to the memory descriptor to the mapping ++ * @note the input is assumed to be 16 bits. It must not exceed 16 bits. */ ++#define _MAKE_VERSION_ID(x) (((x) << 16UL) | (x)) ++ ++/** @brief Check whether a 32-bit value is likely to be Device Driver API ++ * version ID. */ ++#define _IS_VERSION_ID(x) (((x) & 0xFFFF) == (((x) >> 16UL) & 0xFFFF)) ++ ++/** @brief Decode a 16-bit version number from a 32-bit Device Driver API version ++ * ID */ ++#define _GET_VERSION(x) (((x) >> 16UL) & 0xFFFF) ++ ++/** @brief Determine whether two 32-bit encoded version IDs match */ ++#define _IS_API_MATCH(x, y) (IS_VERSION_ID((x)) && IS_VERSION_ID((y)) && (GET_VERSION((x)) == GET_VERSION((y)))) ++ /** ++ * RK MALI version code ++ */ ++#define _MALI_RK_LIBS_VERSION 1 ++ ++/** ++ * API version define. ++ * Indicates the version of the kernel API ++ * The version is a 16bit integer incremented on each API change. ++ * The 16bit integer is stored twice in a 32bit integer ++ * For example, for version 1 the value would be 0x00010001 + */ -+_mali_osk_errcode_t mali_mem_mali_map_prepare(mali_mem_allocation *descriptor); ++#define _MALI_API_VERSION 900 ++#define _MALI_UK_API_VERSION _MAKE_VERSION_ID(_MALI_API_VERSION) + -+/** @brief Resize Mali page tables for mapping ++/** ++ * The API version is a 16-bit integer stored in both the lower and upper 16-bits ++ * of a 32-bit value. The 16-bit API version value is incremented on each API ++ * change. Version 1 would be 0x00010001. Used in _mali_uk_get_api_version_s. ++ */ ++typedef u32 _mali_uk_api_version; ++ ++/** @brief Arguments for _mali_uk_get_api_version() + * -+ * This function will Resize the Mali page tables for mapping the memory -+ * described by \a descriptor. ++ * The user-side interface version must be written into the version member, ++ * encoded using _MAKE_VERSION_ID(). It will be compared to the API version of ++ * the kernel-side interface. + * -+ * Page tables will be reference counted and allocated, if not yet present. ++ * On successful return, the version member will be the API version of the ++ * kernel-side interface. _MALI_UK_API_VERSION macro defines the current version ++ * of the API. + * -+ * @param descriptor Pointer to the memory descriptor to the mapping -+ * @param new_size The new size of descriptor ++ * The compatible member must be checked to see if the version of the user-side ++ * interface is compatible with the kernel-side interface, since future versions ++ * of the interface may be backwards compatible. + */ -+_mali_osk_errcode_t mali_mem_mali_map_resize(mali_mem_allocation *descriptor, u32 new_size); ++typedef struct { ++ u32 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ _mali_uk_api_version version; /**< [in,out] API version of user-side interface. */ ++ int compatible; /**< [out] @c 1 when @version is compatible, @c 0 otherwise */ ++} _mali_uk_get_api_version_s; + -+/** @brief Free Mali page tables for mapping ++/** @brief Arguments for _mali_uk_get_api_version_v2() + * -+ * This function will unmap pages from Mali memory and free the page tables -+ * that are now unused. ++ * The user-side interface version must be written into the version member, ++ * encoded using _MAKE_VERSION_ID(). It will be compared to the API version of ++ * the kernel-side interface. + * -+ * The updated pages in the Mali L2 cache will be invalidated, and the MMU TLBs will be zapped if necessary. ++ * On successful return, the version member will be the API version of the ++ * kernel-side interface. _MALI_UK_API_VERSION macro defines the current version ++ * of the API. + * -+ * @param descriptor Pointer to the memory descriptor to unmap ++ * The compatible member must be checked to see if the version of the user-side ++ * interface is compatible with the kernel-side interface, since future versions ++ * of the interface may be backwards compatible. + */ -+void mali_mem_mali_map_free(struct mali_session_data *session, u32 size, mali_address_t vaddr, u32 flags); ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ _mali_uk_api_version version; /**< [in,out] API version of user-side interface. */ ++ int compatible; /**< [out] @c 1 when @version is compatible, @c 0 otherwise */ ++} _mali_uk_get_api_version_v2_s; + -+/** @brief Parse resource and prepare the OS memory allocator -+ * -+ * @param size Maximum size to allocate for Mali GPU. -+ * @return _MALI_OSK_ERR_OK on success, otherwise failure. -+ */ -+_mali_osk_errcode_t mali_memory_core_resource_os_memory(u32 size); ++typedef struct ++{ ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ _mali_uk_api_version version; /**< [in,out] API version of user-side interface. */ ++} _mali_uk_get_mali_version_in_rk30_s; + -+/** @brief Parse resource and prepare the dedicated memory allocator ++/* rk_ext : rk_ko_ver_t. */ ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ _mali_uk_api_version version; /**< [in,out] API version of user-side interface. */ ++} _mali_rk_ko_version_s; ++/** @} */ /* end group _mali_uk_getapiversion_s */ ++ ++/** @defgroup _mali_uk_get_user_settings_s Get user space settings */ ++ ++/** @brief struct to keep the matching values of the user space settings within certain context ++ * ++ * Each member of the settings array corresponds to a matching setting in the user space and its value is the value ++ * of that particular setting. ++ * ++ * All settings are given reference to the context pointed to by the ctx pointer. + * -+ * @param start Physical start address of dedicated Mali GPU memory. -+ * @param size Size of dedicated Mali GPU memory. -+ * @return _MALI_OSK_ERR_OK on success, otherwise failure. + */ -+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size); ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 settings[_MALI_UK_USER_SETTING_MAX]; /**< [out] The values for all settings */ ++} _mali_uk_get_user_settings_s; + ++/** @brief struct to hold the value of a particular setting from the user space within a given context ++ */ ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ _mali_uk_user_setting_t setting; /**< [in] setting to get */ ++ u32 value; /**< [out] value of setting */ ++} _mali_uk_get_user_setting_s; + -+struct mali_page_node *_mali_page_node_allocate(mali_page_node_type type); ++/** @brief Arguments for _mali_ukk_request_high_priority() */ ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++} _mali_uk_request_high_priority_s; + -+void _mali_page_node_ref(struct mali_page_node *node); -+void _mali_page_node_unref(struct mali_page_node *node); -+void _mali_page_node_add_page(struct mali_page_node *node, struct page *page); ++/** @brief Arguments for _mali_ukk_pending_submit() */ ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++} _mali_uk_pending_submit_s; + -+void _mali_page_node_add_block_item(struct mali_page_node *node, mali_block_item *item); ++/** @} */ /* end group _mali_uk_core */ + -+void _mali_page_node_add_swap_item(struct mali_page_node *node, struct mali_swap_item *item); + -+int _mali_page_node_get_ref_count(struct mali_page_node *node); -+dma_addr_t _mali_page_node_get_dma_addr(struct mali_page_node *node); -+unsigned long _mali_page_node_get_pfn(struct mali_page_node *node); ++/** @defgroup _mali_uk_memory U/K Memory ++ * @{ */ + -+#endif /* __MALI_MEMORY_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_block_alloc.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_block_alloc.c -new file mode 100644 -index 000000000..bccef3576 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_block_alloc.c -@@ -0,0 +1,362 @@ -+/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++#define _MALI_MEMORY_ALLOCATE_RESIZEABLE (1<<4) /* BUFFER can trim dow/grow*/ ++#define _MALI_MEMORY_ALLOCATE_NO_BIND_GPU (1<<5) /*Not map to GPU when allocate, must call bind later*/ ++#define _MALI_MEMORY_ALLOCATE_SWAPPABLE (1<<6) /* Allocate swappale memory. */ ++#define _MALI_MEMORY_ALLOCATE_DEFER_BIND (1<<7) /*Not map to GPU when allocate, must call bind later*/ ++#define _MALI_MEMORY_ALLOCATE_SECURE (1<<8) /* Allocate secure memory. */ + -+#include "mali_kernel_common.h" -+#include "mali_memory.h" -+#include "mali_memory_block_alloc.h" -+#include "mali_osk.h" -+#include + ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 gpu_vaddr; /**< [in] GPU virtual address */ ++ u32 vsize; /**< [in] vitrual size of the allocation */ ++ u32 psize; /**< [in] physical size of the allocation */ ++ u32 flags; ++ u64 backend_handle; /**< [out] backend handle */ ++ s32 secure_shared_fd; /** < [in] the mem handle for secure mem */ ++} _mali_uk_alloc_mem_s; + -+static mali_block_allocator *mali_mem_block_gobal_allocator = NULL; + -+unsigned long _mali_blk_item_get_phy_addr(mali_block_item *item) -+{ -+ return (item->phy_addr & ~(MALI_BLOCK_REF_MASK)); -+} ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 gpu_vaddr; /**< [in] use as handle to free allocation */ ++ u32 free_pages_nr; /** < [out] record the number of free pages */ ++} _mali_uk_free_mem_s; + + -+unsigned long _mali_blk_item_get_pfn(mali_block_item *item) -+{ -+ return (item->phy_addr / MALI_BLOCK_SIZE); -+} ++#define _MALI_MEMORY_BIND_BACKEND_UMP (1<<8) ++#define _MALI_MEMORY_BIND_BACKEND_DMA_BUF (1<<9) ++#define _MALI_MEMORY_BIND_BACKEND_MALI_MEMORY (1<<10) ++#define _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY (1<<11) ++#define _MALI_MEMORY_BIND_BACKEND_EXT_COW (1<<12) ++#define _MALI_MEMORY_BIND_BACKEND_HAVE_ALLOCATION (1<<13) + + -+u32 mali_mem_block_get_ref_count(mali_page_node *node) -+{ -+ MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK); -+ return (node->blk_it->phy_addr & MALI_BLOCK_REF_MASK); -+} ++#define _MALI_MEMORY_BIND_BACKEND_MASK (_MALI_MEMORY_BIND_BACKEND_UMP| \ ++ _MALI_MEMORY_BIND_BACKEND_DMA_BUF |\ ++ _MALI_MEMORY_BIND_BACKEND_MALI_MEMORY |\ ++ _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY |\ ++ _MALI_MEMORY_BIND_BACKEND_EXT_COW |\ ++ _MALI_MEMORY_BIND_BACKEND_HAVE_ALLOCATION) + + -+/* Increase the refence count -+* It not atomic, so it need to get sp_lock before call this function -+*/ ++#define _MALI_MEMORY_GPU_READ_ALLOCATE (1<<16) + -+u32 mali_mem_block_add_ref(mali_page_node *node) -+{ -+ MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK); -+ MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(node) < MALI_BLOCK_MAX_REF_COUNT); -+ return (node->blk_it->phy_addr++ & MALI_BLOCK_REF_MASK); -+} + -+/* Decase the refence count -+* It not atomic, so it need to get sp_lock before call this function -+*/ -+u32 mali_mem_block_dec_ref(mali_page_node *node) -+{ -+ MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK); -+ MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(node) > 0); -+ return (node->blk_it->phy_addr-- & MALI_BLOCK_REF_MASK); -+} ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 vaddr; /**< [in] mali address to map the physical memory to */ ++ u32 size; /**< [in] size */ ++ u32 flags; /**< [in] see_MALI_MEMORY_BIND_BACKEND_* */ ++ u32 padding; /** padding for 32/64 struct alignment */ ++ union { ++ struct { ++ u32 secure_id; /**< [in] secure id */ ++ u32 rights; /**< [in] rights necessary for accessing memory */ ++ u32 flags; /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */ ++ } bind_ump; ++ struct { ++ u32 mem_fd; /**< [in] Memory descriptor */ ++ u32 rights; /**< [in] rights necessary for accessing memory */ ++ u32 flags; /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */ ++ } bind_dma_buf; ++ struct { ++ u32 phys_addr; /**< [in] physical address */ ++ u32 rights; /**< [in] rights necessary for accessing memory */ ++ u32 flags; /**< [in] flags, see \ref _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE */ ++ } bind_ext_memory; ++ } mem_union; ++} _mali_uk_bind_mem_s; + ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 flags; /**< [in] see_MALI_MEMORY_BIND_BACKEND_* */ ++ u32 vaddr; /**< [in] identifier for mapped memory object in kernel space */ ++} _mali_uk_unbind_mem_s; + -+static mali_block_allocator *mali_mem_block_allocator_create(u32 base_address, u32 size) -+{ -+ mali_block_allocator *info; -+ u32 usable_size; -+ u32 num_blocks; -+ mali_page_node *m_node; -+ mali_block_item *mali_blk_items = NULL; -+ int i = 0; ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 target_handle; /**< [in] handle of allocation need to do COW */ ++ u32 target_offset; /**< [in] offset in target allocation to do COW(for support COW a memory allocated from memory_bank, PAGE_SIZE align)*/ ++ u32 target_size; /**< [in] size of target allocation to do COW (for support memory bank, PAGE_SIZE align)(in byte) */ ++ u32 range_start; /**< [in] re allocate range start offset, offset from the start of allocation (PAGE_SIZE align)*/ ++ u32 range_size; /**< [in] re allocate size (PAGE_SIZE align)*/ ++ u32 vaddr; /**< [in] mali address for the new allocaiton */ ++ u32 backend_handle; /**< [out] backend handle */ ++ u32 flags; ++} _mali_uk_cow_mem_s; + -+ usable_size = size & ~(MALI_BLOCK_SIZE - 1); -+ MALI_DEBUG_PRINT(3, ("Mali block allocator create for region starting at 0x%08X length 0x%08X\n", base_address, size)); -+ MALI_DEBUG_PRINT(4, ("%d usable bytes\n", usable_size)); -+ num_blocks = usable_size / MALI_BLOCK_SIZE; -+ MALI_DEBUG_PRINT(4, ("which becomes %d blocks\n", num_blocks)); ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 range_start; /**< [in] re allocate range start offset, offset from the start of allocation */ ++ u32 size; /**< [in] re allocate size*/ ++ u32 vaddr; /**< [in] mali address for the new allocaiton */ ++ s32 change_pages_nr; /**< [out] record the page number change for cow operation */ ++} _mali_uk_cow_modify_range_s; + -+ if (usable_size == 0) { -+ MALI_DEBUG_PRINT(1, ("Memory block of size %d is unusable\n", size)); -+ return NULL; -+ } + -+ info = _mali_osk_calloc(1, sizeof(mali_block_allocator)); -+ if (NULL != info) { -+ INIT_LIST_HEAD(&info->free); -+ spin_lock_init(&info->sp_lock); -+ info->total_num = num_blocks; -+ mali_blk_items = _mali_osk_calloc(1, sizeof(mali_block_item) * num_blocks); ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 mem_fd; /**< [in] Memory descriptor */ ++ u32 size; /**< [out] size */ ++} _mali_uk_dma_buf_get_size_s; + -+ if (mali_blk_items) { -+ info->items = mali_blk_items; -+ /* add blocks(4k size) to free list*/ -+ for (i = 0 ; i < num_blocks ; i++) { -+ /* add block information*/ -+ mali_blk_items[i].phy_addr = base_address + (i * MALI_BLOCK_SIZE); -+ /* add to free list */ -+ m_node = _mali_page_node_allocate(MALI_PAGE_NODE_BLOCK); -+ if (m_node == NULL) -+ goto fail; -+ _mali_page_node_add_block_item(m_node, &(mali_blk_items[i])); -+ list_add_tail(&m_node->list, &info->free); -+ atomic_add(1, &info->free_num); -+ } -+ return info; -+ } -+ } -+fail: -+ mali_mem_block_allocator_destroy(); -+ return NULL; -+} ++/** Flag for _mali_uk_map_external_mem_s, _mali_uk_attach_ump_mem_s and _mali_uk_attach_dma_buf_s */ ++#define _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE (1<<0) + -+void mali_mem_block_allocator_destroy(void) -+{ -+ struct mali_page_node *m_page, *m_tmp; -+ mali_block_allocator *info = mali_mem_block_gobal_allocator; -+ MALI_DEBUG_ASSERT_POINTER(info); -+ MALI_DEBUG_PRINT(4, ("Memory block destroy !\n")); + -+ if (NULL == info) -+ return; ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u64 vaddr; /* the buffer to do resize*/ ++ u32 psize; /* wanted physical size of this memory */ ++} _mali_uk_mem_resize_s; + -+ list_for_each_entry_safe(m_page, m_tmp , &info->free, list) { -+ MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK); -+ list_del(&m_page->list); -+ kfree(m_page); -+ } ++/** ++ * @brief Arguments for _mali_uk[uk]_mem_write_safe() ++ */ ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u64 src; /**< [in] Pointer to source data */ ++ u64 dest; /**< [in] Destination Mali buffer */ ++ u32 size; /**< [in,out] Number of bytes to write/copy on input, number of bytes actually written/copied on output */ ++} _mali_uk_mem_write_safe_s; + -+ _mali_osk_free(info->items); -+ _mali_osk_free(info); -+} ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 size; /**< [out] size of MMU page table information (registers + page tables) */ ++} _mali_uk_query_mmu_page_table_dump_size_s; + -+u32 mali_mem_block_release(mali_mem_backend *mem_bkend) -+{ -+ mali_mem_allocation *alloc = mem_bkend->mali_allocation; -+ u32 free_pages_nr = 0; -+ MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_BLOCK); ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 size; /**< [in] size of buffer to receive mmu page table information */ ++ u64 buffer; /**< [in,out] buffer to receive mmu page table information */ ++ u32 register_writes_size; /**< [out] size of MMU register dump */ ++ u64 register_writes; /**< [out] pointer within buffer where MMU register dump is stored */ ++ u32 page_table_dump_size; /**< [out] size of MMU page table dump */ ++ u64 page_table_dump; /**< [out] pointer within buffer where MMU page table dump is stored */ ++} _mali_uk_dump_mmu_page_table_s; + -+ /* Unmap the memory from the mali virtual address space. */ -+ mali_mem_block_mali_unmap(alloc); -+ mutex_lock(&mem_bkend->mutex); -+ free_pages_nr = mali_mem_block_free(&mem_bkend->block_mem); -+ mutex_unlock(&mem_bkend->mutex); -+ return free_pages_nr; -+} ++/** @} */ /* end group _mali_uk_memory */ + + -+int mali_mem_block_alloc(mali_mem_block_mem *block_mem, u32 size) -+{ -+ struct mali_page_node *m_page, *m_tmp; -+ size_t page_count = PAGE_ALIGN(size) / _MALI_OSK_MALI_PAGE_SIZE; -+ mali_block_allocator *info = mali_mem_block_gobal_allocator; -+ MALI_DEBUG_ASSERT_POINTER(info); ++/** @addtogroup _mali_uk_pp U/K Fragment Processor ++ * @{ */ + -+ MALI_DEBUG_PRINT(4, ("BLOCK Mem: Allocate size = 0x%x\n", size)); -+ /*do some init */ -+ INIT_LIST_HEAD(&block_mem->pfns); ++/** @brief Arguments for _mali_ukk_get_pp_number_of_cores() ++ * ++ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open() ++ * - Upon successful return from _mali_ukk_get_pp_number_of_cores(), @c number_of_cores ++ * will contain the number of Fragment Processor cores in the system. ++ */ ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 number_of_total_cores; /**< [out] Total number of Fragment Processor cores in the system */ ++ u32 number_of_enabled_cores; /**< [out] Number of enabled Fragment Processor cores */ ++} _mali_uk_get_pp_number_of_cores_s; + -+ spin_lock(&info->sp_lock); -+ /*check if have enough space*/ -+ if (atomic_read(&info->free_num) > page_count) { -+ list_for_each_entry_safe(m_page, m_tmp , &info->free, list) { -+ if (page_count > 0) { -+ MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK); -+ MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(m_page) == 0); -+ list_move(&m_page->list, &block_mem->pfns); -+ block_mem->count++; -+ atomic_dec(&info->free_num); -+ _mali_page_node_ref(m_page); -+ } else { -+ break; -+ } -+ page_count--; -+ } -+ } else { -+ /* can't allocate from BLOCK memory*/ -+ spin_unlock(&info->sp_lock); -+ return -1; -+ } ++/** @brief Arguments for _mali_ukk_get_pp_core_version() ++ * ++ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open() ++ * - Upon successful return from _mali_ukk_get_pp_core_version(), @c version contains ++ * the version that all Fragment Processor cores are compatible with. ++ */ ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ _mali_core_version version; /**< [out] version returned from core, see \ref _mali_core_version */ ++ u32 padding; ++} _mali_uk_get_pp_core_version_s; + -+ spin_unlock(&info->sp_lock); -+ return 0; -+} ++/** @} */ /* end group _mali_uk_pp */ + -+u32 mali_mem_block_free(mali_mem_block_mem *block_mem) -+{ -+ u32 free_pages_nr = 0; + -+ free_pages_nr = mali_mem_block_free_list(&block_mem->pfns); -+ MALI_DEBUG_PRINT(4, ("BLOCK Mem free : allocated size = 0x%x, free size = 0x%x\n", block_mem->count * _MALI_OSK_MALI_PAGE_SIZE, -+ free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE)); -+ block_mem->count = 0; -+ MALI_DEBUG_ASSERT(list_empty(&block_mem->pfns)); ++/** @addtogroup _mali_uk_gp U/K Vertex Processor ++ * @{ */ + -+ return free_pages_nr; -+} ++/** @brief Arguments for _mali_ukk_get_gp_number_of_cores() ++ * ++ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open() ++ * - Upon successful return from _mali_ukk_get_gp_number_of_cores(), @c number_of_cores ++ * will contain the number of Vertex Processor cores in the system. ++ */ ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 number_of_cores; /**< [out] number of Vertex Processor cores in the system */ ++} _mali_uk_get_gp_number_of_cores_s; + ++/** @brief Arguments for _mali_ukk_get_gp_core_version() ++ * ++ * - pass in the user-kernel context @c ctx that was returned from _mali_ukk_open() ++ * - Upon successful return from _mali_ukk_get_gp_core_version(), @c version contains ++ * the version that all Vertex Processor cores are compatible with. ++ */ ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ _mali_core_version version; /**< [out] version returned from core, see \ref _mali_core_version */ ++} _mali_uk_get_gp_core_version_s; + -+u32 mali_mem_block_free_list(struct list_head *list) -+{ -+ struct mali_page_node *m_page, *m_tmp; -+ mali_block_allocator *info = mali_mem_block_gobal_allocator; -+ u32 free_pages_nr = 0; ++/** @} */ /* end group _mali_uk_gp */ + -+ if (info) { -+ spin_lock(&info->sp_lock); -+ list_for_each_entry_safe(m_page, m_tmp , list, list) { -+ if (1 == _mali_page_node_get_ref_count(m_page)) { -+ free_pages_nr++; -+ } -+ mali_mem_block_free_node(m_page); -+ } -+ spin_unlock(&info->sp_lock); -+ } -+ return free_pages_nr; -+} ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 event_id; /**< [in] event id to register (see enum mali_profiling_events for values) */ ++ u32 data[5]; /**< [in] event specific data */ ++} _mali_uk_profiling_add_event_s; + -+/* free the node,*/ -+void mali_mem_block_free_node(struct mali_page_node *node) -+{ -+ mali_block_allocator *info = mali_mem_block_gobal_allocator; ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 memory_usage; /**< [out] total memory usage */ ++ u32 vaddr; /**< [in] mali address for the cow allocaiton */ ++ s32 change_pages_nr; /**< [out] record the page number change for cow operation */ ++} _mali_uk_profiling_memory_usage_get_s; + -+ /* only handle BLOCK node */ -+ if (node->type == MALI_PAGE_NODE_BLOCK && info) { -+ /*Need to make this atomic?*/ -+ if (1 == _mali_page_node_get_ref_count(node)) { -+ /*Move to free list*/ -+ _mali_page_node_unref(node); -+ list_move_tail(&node->list, &info->free); -+ atomic_add(1, &info->free_num); -+ } else { -+ _mali_page_node_unref(node); -+ list_del(&node->list); -+ kfree(node); -+ } -+ } -+} + -+/* unref the node, but not free it */ -+_mali_osk_errcode_t mali_mem_block_unref_node(struct mali_page_node *node) -+{ -+ mali_block_allocator *info = mali_mem_block_gobal_allocator; -+ mali_page_node *new_node; ++/** @addtogroup _mali_uk_memory U/K Memory ++ * @{ */ + -+ /* only handle BLOCK node */ -+ if (node->type == MALI_PAGE_NODE_BLOCK && info) { -+ /*Need to make this atomic?*/ -+ if (1 == _mali_page_node_get_ref_count(node)) { -+ /* allocate a new node, Add to free list, keep the old node*/ -+ _mali_page_node_unref(node); -+ new_node = _mali_page_node_allocate(MALI_PAGE_NODE_BLOCK); -+ if (new_node) { -+ memcpy(new_node, node, sizeof(mali_page_node)); -+ list_add(&new_node->list, &info->free); -+ atomic_add(1, &info->free_num); -+ } else -+ return _MALI_OSK_ERR_FAULT; -+ -+ } else { -+ _mali_page_node_unref(node); -+ } -+ } -+ return _MALI_OSK_ERR_OK; -+} -+ -+ -+int mali_mem_block_mali_map(mali_mem_block_mem *block_mem, struct mali_session_data *session, u32 vaddr, u32 props) -+{ -+ struct mali_page_directory *pagedir = session->page_directory; -+ struct mali_page_node *m_page; -+ dma_addr_t phys; -+ u32 virt = vaddr; -+ u32 prop = props; -+ -+ list_for_each_entry(m_page, &block_mem->pfns, list) { -+ MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK); -+ phys = _mali_page_node_get_dma_addr(m_page); -+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) -+ /* Verify that the "physical" address is 32-bit and -+ * usable for Mali, when on a system with bus addresses -+ * wider than 32-bit. */ -+ MALI_DEBUG_ASSERT(0 == (phys >> 32)); -+#endif -+ mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop); -+ virt += MALI_MMU_PAGE_SIZE; -+ } ++/** @brief Arguments to _mali_ukk_mem_mmap() ++ * ++ * Use of the phys_addr member depends on whether the driver is compiled for ++ * Mali-MMU or nonMMU: ++ * - in the nonMMU case, this is the physical address of the memory as seen by ++ * the CPU (which may be a constant offset from that used by Mali) ++ * - in the MMU case, this is the Mali Virtual base address of the memory to ++ * allocate, and the particular physical pages used to back the memory are ++ * entirely determined by _mali_ukk_mem_mmap(). The details of the physical pages ++ * are not reported to user-space for security reasons. ++ * ++ * The cookie member must be stored for use later when freeing the memory by ++ * calling _mali_ukk_mem_munmap(). In the Mali-MMU case, the cookie is secure. ++ * ++ * The ukk_private word must be set to zero when calling from user-space. On ++ * Kernel-side, the OS implementation of the U/K interface can use it to ++ * communicate data to the OS implementation of the OSK layer. In particular, ++ * _mali_ukk_get_big_block() directly calls _mali_ukk_mem_mmap directly, and ++ * will communicate its own ukk_private word through the ukk_private member ++ * here. The common code itself will not inspect or modify the ukk_private ++ * word, and so it may be safely used for whatever purposes necessary to ++ * integrate Mali Memory handling into the OS. ++ * ++ * The uku_private member is currently reserved for use by the user-side ++ * implementation of the U/K interface. Its value must be zero. ++ */ ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ void *mapping; /**< [out] Returns user-space virtual address for the mapping */ ++ u32 size; /**< [in] Size of the requested mapping */ ++ u32 phys_addr; /**< [in] Physical address - could be offset, depending on caller+callee convention */ ++ mali_bool writeable; ++} _mali_uk_mem_mmap_s; + -+ return 0; -+} ++/** @brief Arguments to _mali_ukk_mem_munmap() ++ * ++ * The cookie and mapping members must be that returned from the same previous ++ * call to _mali_ukk_mem_mmap(). The size member must correspond to cookie ++ * and mapping - that is, it must be the value originally supplied to a call to ++ * _mali_ukk_mem_mmap that returned the values of mapping and cookie. ++ * ++ * An error will be returned if an attempt is made to unmap only part of the ++ * originally obtained range, or to unmap more than was originally obtained. ++ */ ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ void *mapping; /**< [in] The mapping returned from mmap call */ ++ u32 size; /**< [in] The size passed to mmap call */ ++} _mali_uk_mem_munmap_s; ++/** @} */ /* end group _mali_uk_memory */ + -+void mali_mem_block_mali_unmap(mali_mem_allocation *alloc) -+{ -+ struct mali_session_data *session; -+ MALI_DEBUG_ASSERT_POINTER(alloc); -+ session = alloc->session; -+ MALI_DEBUG_ASSERT_POINTER(session); ++/** @defgroup _mali_uk_vsync U/K VSYNC Wait Reporting Module ++ * @{ */ + -+ mali_session_memory_lock(session); -+ mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start, -+ alloc->flags); -+ mali_session_memory_unlock(session); -+} ++/** @brief VSYNC events ++ * ++ * These events are reported when DDK starts to wait for vsync and when the ++ * vsync has occured and the DDK can continue on the next frame. ++ */ ++typedef enum _mali_uk_vsync_event { ++ _MALI_UK_VSYNC_EVENT_BEGIN_WAIT = 0, ++ _MALI_UK_VSYNC_EVENT_END_WAIT ++} _mali_uk_vsync_event; + ++/** @brief Arguments to _mali_ukk_vsync_event() ++ * ++ */ ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ _mali_uk_vsync_event event; /**< [in] VSYNCH event type */ ++} _mali_uk_vsync_event_report_s; + -+int mali_mem_block_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma) -+{ -+ int ret; -+ mali_mem_block_mem *block_mem = &mem_bkend->block_mem; -+ unsigned long addr = vma->vm_start; -+ struct mali_page_node *m_page; -+ MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_BLOCK); ++/** @} */ /* end group _mali_uk_vsync */ + -+ list_for_each_entry(m_page, &block_mem->pfns, list) { -+ MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK); -+ ret = vmf_insert_pfn(vma, addr, _mali_page_node_get_pfn(m_page)); ++/** @defgroup _mali_uk_sw_counters_report U/K Software Counter Reporting ++ * @{ */ + -+ if (unlikely(0 != ret)) { -+ return -EFAULT; -+ } -+ addr += _MALI_OSK_MALI_PAGE_SIZE; ++/** @brief Software counter values ++ * ++ * Values recorded for each of the software counters during a single renderpass. ++ */ ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u64 counters; /**< [in] The array of u32 counter values */ ++ u32 num_counters; /**< [in] The number of elements in counters array */ ++} _mali_uk_sw_counters_report_s; + -+ } ++/** @} */ /* end group _mali_uk_sw_counters_report */ + -+ return 0; -+} ++/** @defgroup _mali_uk_timeline U/K Mali Timeline ++ * @{ */ + ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 timeline; /**< [in] timeline id */ ++ u32 point; /**< [out] latest point on timeline */ ++} _mali_uk_timeline_get_latest_point_s; + -+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size) -+{ -+ mali_block_allocator *allocator; ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ _mali_uk_fence_t fence; /**< [in] fence */ ++ u32 timeout; /**< [in] timeout (0 for no wait, -1 for blocking) */ ++ u32 status; /**< [out] status of fence (1 if signaled, 0 if timeout) */ ++} _mali_uk_timeline_wait_s; + -+ /* Do the low level linux operation first */ ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ _mali_uk_fence_t fence; /**< [in] mali fence to create linux sync fence from */ ++ s32 sync_fd; /**< [out] file descriptor for new linux sync fence */ ++} _mali_uk_timeline_create_sync_fence_s; + -+ /* Request ownership of the memory */ -+ if (_MALI_OSK_ERR_OK != _mali_osk_mem_reqregion(start, size, "Dedicated Mali GPU memory")) { -+ MALI_DEBUG_PRINT(1, ("Failed to request memory region for frame buffer (0x%08X - 0x%08X)\n", start, start + size - 1)); -+ return _MALI_OSK_ERR_FAULT; -+ } ++/** @} */ /* end group _mali_uk_timeline */ + -+ /* Create generic block allocator object to handle it */ -+ allocator = mali_mem_block_allocator_create(start, size); ++/** @} */ /* end group u_k_api */ + -+ if (NULL == allocator) { -+ MALI_DEBUG_PRINT(1, ("Memory bank registration failed\n")); -+ _mali_osk_mem_unreqregion(start, size); -+ MALI_ERROR(_MALI_OSK_ERR_FAULT); -+ } ++/** @} */ /* end group uddapi */ + -+ mali_mem_block_gobal_allocator = (mali_block_allocator *)allocator; ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ s32 stream_fd; /**< [in] The profiling kernel base stream fd handle */ ++} _mali_uk_profiling_stream_fd_get_s; + -+ return _MALI_OSK_ERR_OK; -+} ++typedef struct { ++ u64 ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u64 control_packet_data; /**< [in] the control packet data for control settings */ ++ u32 control_packet_size; /**< [in] The control packet size */ ++ u64 response_packet_data; /** < [out] The response packet data */ ++ u32 response_packet_size; /** < [in,out] The response packet data */ ++} _mali_uk_profiling_control_set_s; + -+mali_bool mali_memory_have_dedicated_memory(void) -+{ -+ return mali_mem_block_gobal_allocator ? MALI_TRUE : MALI_FALSE; ++#ifdef __cplusplus +} ++#endif + -+u32 mali_mem_block_allocator_stat(void) -+{ -+ mali_block_allocator *allocator = mali_mem_block_gobal_allocator; -+ MALI_DEBUG_ASSERT_POINTER(allocator); -+ -+ return (allocator->total_num - atomic_read(&allocator->free_num)) * _MALI_OSK_MALI_PAGE_SIZE; -+} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_block_alloc.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_block_alloc.h ++#endif /* __MALI_UTGARD_UK_TYPES_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/license/gpl/mali_kernel_license.h b/drivers/gpu/arm/mali400/mali/linux/license/gpl/mali_kernel_license.h new file mode 100644 -index 000000000..70fd9ec25 +index 000000000..6fafc6777 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_block_alloc.h -@@ -0,0 +1,58 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/license/gpl/mali_kernel_license.h +@@ -0,0 +1,30 @@ +/* -+ * Copyright (C) 2010, 2013, 2015-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010, 2013, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -312128,1236 +315346,34 @@ index 000000000..70fd9ec25 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_BLOCK_ALLOCATOR_H__ -+#define __MALI_BLOCK_ALLOCATOR_H__ -+ -+#include "mali_session.h" -+#include "mali_memory.h" -+#include -+ -+#include "mali_memory_types.h" -+ -+#define MALI_BLOCK_SIZE (PAGE_SIZE) /* 4 kB, manage BLOCK memory as page size */ -+#define MALI_BLOCK_REF_MASK (0xFFF) -+#define MALI_BLOCK_MAX_REF_COUNT (0xFFF) -+ ++/** ++ * @file mali_kernel_license.h ++ * Defines for the macro MODULE_LICENSE. ++ */ + ++#ifndef __MALI_KERNEL_LICENSE_H__ ++#define __MALI_KERNEL_LICENSE_H__ + -+typedef struct mali_block_allocator { -+ /* -+ * In free list, each node's ref_count is 0, -+ * ref_count added when allocated or referenced in COW -+ */ -+ mali_block_item *items; /* information for each block item*/ -+ struct list_head free; /*free list of mali_memory_node*/ -+ spinlock_t sp_lock; /*lock for reference count & free list opertion*/ -+ u32 total_num; /* Number of total pages*/ -+ atomic_t free_num; /*number of free pages*/ -+} mali_block_allocator; ++#ifdef __cplusplus ++extern "C" { ++#endif + -+unsigned long _mali_blk_item_get_phy_addr(mali_block_item *item); -+unsigned long _mali_blk_item_get_pfn(mali_block_item *item); -+u32 mali_mem_block_get_ref_count(mali_page_node *node); -+u32 mali_mem_block_add_ref(mali_page_node *node); -+u32 mali_mem_block_dec_ref(mali_page_node *node); -+u32 mali_mem_block_release(mali_mem_backend *mem_bkend); -+int mali_mem_block_alloc(mali_mem_block_mem *block_mem, u32 size); -+int mali_mem_block_mali_map(mali_mem_block_mem *block_mem, struct mali_session_data *session, u32 vaddr, u32 props); -+void mali_mem_block_mali_unmap(mali_mem_allocation *alloc); ++#define MALI_KERNEL_LINUX_LICENSE "GPL" ++#define MALI_LICENSE_IS_GPL 1 + -+int mali_mem_block_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma); -+_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size); -+mali_bool mali_memory_have_dedicated_memory(void); -+u32 mali_mem_block_free(mali_mem_block_mem *block_mem); -+u32 mali_mem_block_free_list(struct list_head *list); -+void mali_mem_block_free_node(struct mali_page_node *node); -+void mali_mem_block_allocator_destroy(void); -+_mali_osk_errcode_t mali_mem_block_unref_node(struct mali_page_node *node); -+u32 mali_mem_block_allocator_stat(void); ++#ifdef __cplusplus ++} ++#endif + -+#endif /* __MALI_BLOCK_ALLOCATOR_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_cow.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_cow.c -new file mode 100644 -index 000000000..cd9b6d2f7 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_cow.c -@@ -0,0 +1,776 @@ -+/* -+ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#ifdef CONFIG_ARM -+#include -+#endif -+#include -+ -+#include "mali_memory.h" -+#include "mali_kernel_common.h" -+#include "mali_uk_types.h" -+#include "mali_osk.h" -+#include "mali_kernel_linux.h" -+#include "mali_memory_cow.h" -+#include "mali_memory_block_alloc.h" -+#include "mali_memory_swap_alloc.h" -+ -+/** -+* allocate pages for COW backend and flush cache -+*/ -+static struct page *mali_mem_cow_alloc_page(void) -+ -+{ -+ mali_mem_os_mem os_mem; -+ struct mali_page_node *node; -+ struct page *new_page; -+ -+ int ret = 0; -+ /* allocate pages from os mem */ -+ ret = mali_mem_os_alloc_pages(&os_mem, _MALI_OSK_MALI_PAGE_SIZE); -+ -+ if (ret) { -+ return NULL; -+ } -+ -+ MALI_DEBUG_ASSERT(1 == os_mem.count); -+ -+ node = _MALI_OSK_CONTAINER_OF(os_mem.pages.next, struct mali_page_node, list); -+ new_page = node->page; -+ node->page = NULL; -+ list_del(&node->list); -+ kfree(node); -+ -+ return new_page; -+} -+ -+ -+static struct list_head *_mali_memory_cow_get_node_list(mali_mem_backend *target_bk, -+ u32 target_offset, -+ u32 target_size) -+{ -+ MALI_DEBUG_ASSERT(MALI_MEM_OS == target_bk->type || MALI_MEM_COW == target_bk->type || -+ MALI_MEM_BLOCK == target_bk->type || MALI_MEM_SWAP == target_bk->type); -+ -+ if (MALI_MEM_OS == target_bk->type) { -+ MALI_DEBUG_ASSERT(&target_bk->os_mem); -+ MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->os_mem.count); -+ return &target_bk->os_mem.pages; -+ } else if (MALI_MEM_COW == target_bk->type) { -+ MALI_DEBUG_ASSERT(&target_bk->cow_mem); -+ MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->cow_mem.count); -+ return &target_bk->cow_mem.pages; -+ } else if (MALI_MEM_BLOCK == target_bk->type) { -+ MALI_DEBUG_ASSERT(&target_bk->block_mem); -+ MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->block_mem.count); -+ return &target_bk->block_mem.pfns; -+ } else if (MALI_MEM_SWAP == target_bk->type) { -+ MALI_DEBUG_ASSERT(&target_bk->swap_mem); -+ MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->swap_mem.count); -+ return &target_bk->swap_mem.pages; -+ } -+ -+ return NULL; -+} -+ -+/** -+* Do COW for os memory - support do COW for memory from bank memory -+* The range_start/size can be zero, which means it will call cow_modify_range -+* latter. -+* This function allocate new pages for COW backend from os mem for a modified range -+* It will keep the page which not in the modified range and Add ref to it -+* -+* @target_bk - target allocation's backend(the allocation need to do COW) -+* @target_offset - the offset in target allocation to do COW(for support COW a memory allocated from memory_bank, 4K align) -+* @target_size - size of target allocation to do COW (for support memory bank) -+* @backend -COW backend -+* @range_start - offset of modified range (4K align) -+* @range_size - size of modified range -+*/ -+_mali_osk_errcode_t mali_memory_cow_os_memory(mali_mem_backend *target_bk, -+ u32 target_offset, -+ u32 target_size, -+ mali_mem_backend *backend, -+ u32 range_start, -+ u32 range_size) -+{ -+ mali_mem_cow *cow = &backend->cow_mem; -+ struct mali_page_node *m_page, *m_tmp, *page_node; -+ int target_page = 0; -+ struct page *new_page; -+ struct list_head *pages = NULL; -+ -+ pages = _mali_memory_cow_get_node_list(target_bk, target_offset, target_size); -+ -+ if (NULL == pages) { -+ MALI_DEBUG_PRINT_ERROR(("No memory page need to cow ! \n")); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ -+ MALI_DEBUG_ASSERT(0 == cow->count); -+ -+ INIT_LIST_HEAD(&cow->pages); -+ mutex_lock(&target_bk->mutex); -+ list_for_each_entry_safe(m_page, m_tmp, pages, list) { -+ /* add page from (target_offset,target_offset+size) to cow backend */ -+ if ((target_page >= target_offset / _MALI_OSK_MALI_PAGE_SIZE) && -+ (target_page < ((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE))) { -+ -+ /* allocate a new page node, alway use OS memory for COW */ -+ page_node = _mali_page_node_allocate(MALI_PAGE_NODE_OS); -+ -+ if (NULL == page_node) { -+ mutex_unlock(&target_bk->mutex); -+ goto error; -+ } -+ -+ INIT_LIST_HEAD(&page_node->list); -+ -+ /* check if in the modified range*/ -+ if ((cow->count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) && -+ (cow->count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) { -+ /* need to allocate a new page */ -+ /* To simplify the case, All COW memory is allocated from os memory ?*/ -+ new_page = mali_mem_cow_alloc_page(); -+ -+ if (NULL == new_page) { -+ kfree(page_node); -+ mutex_unlock(&target_bk->mutex); -+ goto error; -+ } -+ -+ _mali_page_node_add_page(page_node, new_page); -+ } else { -+ /*Add Block memory case*/ -+ if (m_page->type != MALI_PAGE_NODE_BLOCK) { -+ _mali_page_node_add_page(page_node, m_page->page); -+ } else { -+ page_node->type = MALI_PAGE_NODE_BLOCK; -+ _mali_page_node_add_block_item(page_node, m_page->blk_it); -+ } -+ -+ /* add ref to this page */ -+ _mali_page_node_ref(m_page); -+ } -+ -+ /* add it to COW backend page list */ -+ list_add_tail(&page_node->list, &cow->pages); -+ cow->count++; -+ } -+ target_page++; -+ } -+ mutex_unlock(&target_bk->mutex); -+ return _MALI_OSK_ERR_OK; -+error: -+ mali_mem_cow_release(backend, MALI_FALSE); -+ return _MALI_OSK_ERR_FAULT; -+} -+ -+_mali_osk_errcode_t mali_memory_cow_swap_memory(mali_mem_backend *target_bk, -+ u32 target_offset, -+ u32 target_size, -+ mali_mem_backend *backend, -+ u32 range_start, -+ u32 range_size) -+{ -+ mali_mem_cow *cow = &backend->cow_mem; -+ struct mali_page_node *m_page, *m_tmp, *page_node; -+ int target_page = 0; -+ struct mali_swap_item *swap_item; -+ struct list_head *pages = NULL; -+ -+ pages = _mali_memory_cow_get_node_list(target_bk, target_offset, target_size); -+ if (NULL == pages) { -+ MALI_DEBUG_PRINT_ERROR(("No swap memory page need to cow ! \n")); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ -+ MALI_DEBUG_ASSERT(0 == cow->count); -+ -+ INIT_LIST_HEAD(&cow->pages); -+ mutex_lock(&target_bk->mutex); -+ -+ backend->flags |= MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN; -+ -+ list_for_each_entry_safe(m_page, m_tmp, pages, list) { -+ /* add page from (target_offset,target_offset+size) to cow backend */ -+ if ((target_page >= target_offset / _MALI_OSK_MALI_PAGE_SIZE) && -+ (target_page < ((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE))) { -+ -+ /* allocate a new page node, use swap memory for COW memory swap cowed flag. */ -+ page_node = _mali_page_node_allocate(MALI_PAGE_NODE_SWAP); -+ -+ if (NULL == page_node) { -+ mutex_unlock(&target_bk->mutex); -+ goto error; -+ } -+ -+ /* check if in the modified range*/ -+ if ((cow->count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) && -+ (cow->count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) { -+ /* need to allocate a new page */ -+ /* To simplify the case, All COW memory is allocated from os memory ?*/ -+ swap_item = mali_mem_swap_alloc_swap_item(); -+ -+ if (NULL == swap_item) { -+ kfree(page_node); -+ mutex_unlock(&target_bk->mutex); -+ goto error; -+ } -+ -+ swap_item->idx = mali_mem_swap_idx_alloc(); -+ -+ if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == swap_item->idx) { -+ MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW.\n")); -+ kfree(page_node); -+ kfree(swap_item); -+ mutex_unlock(&target_bk->mutex); -+ goto error; -+ } -+ -+ _mali_page_node_add_swap_item(page_node, swap_item); -+ } else { -+ _mali_page_node_add_swap_item(page_node, m_page->swap_it); -+ -+ /* add ref to this page */ -+ _mali_page_node_ref(m_page); -+ } -+ -+ list_add_tail(&page_node->list, &cow->pages); -+ cow->count++; -+ } -+ target_page++; -+ } -+ mutex_unlock(&target_bk->mutex); -+ -+ return _MALI_OSK_ERR_OK; -+error: -+ mali_mem_swap_release(backend, MALI_FALSE); -+ return _MALI_OSK_ERR_FAULT; -+ -+} -+ -+ -+_mali_osk_errcode_t _mali_mem_put_page_node(mali_page_node *node) -+{ -+ if (node->type == MALI_PAGE_NODE_OS) { -+ return mali_mem_os_put_page(node->page); -+ } else if (node->type == MALI_PAGE_NODE_BLOCK) { -+ return mali_mem_block_unref_node(node); -+ } else if (node->type == MALI_PAGE_NODE_SWAP) { -+ return _mali_mem_swap_put_page_node(node); -+ } else -+ MALI_DEBUG_ASSERT(0); -+ return _MALI_OSK_ERR_FAULT; -+} -+ -+ -+/** -+* Modify a range of a exist COW backend -+* @backend -COW backend -+* @range_start - offset of modified range (4K align) -+* @range_size - size of modified range(in byte) -+*/ -+_mali_osk_errcode_t mali_memory_cow_modify_range(mali_mem_backend *backend, -+ u32 range_start, -+ u32 range_size) -+{ -+ mali_mem_allocation *alloc = NULL; -+ struct mali_session_data *session; -+ mali_mem_cow *cow = &backend->cow_mem; -+ struct mali_page_node *m_page, *m_tmp; -+ LIST_HEAD(pages); -+ struct page *new_page; -+ u32 count = 0; -+ s32 change_pages_nr = 0; -+ _mali_osk_errcode_t ret = _MALI_OSK_ERR_OK; -+ -+ if (range_start % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS); -+ if (range_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS); -+ -+ alloc = backend->mali_allocation; -+ MALI_DEBUG_ASSERT_POINTER(alloc); -+ -+ session = alloc->session; -+ MALI_DEBUG_ASSERT_POINTER(session); -+ -+ MALI_DEBUG_ASSERT(MALI_MEM_COW == backend->type); -+ MALI_DEBUG_ASSERT(((range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE) <= cow->count); -+ -+ mutex_lock(&backend->mutex); -+ -+ /* free pages*/ -+ list_for_each_entry_safe(m_page, m_tmp, &cow->pages, list) { -+ -+ /* check if in the modified range*/ -+ if ((count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) && -+ (count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) { -+ if (MALI_PAGE_NODE_SWAP != m_page->type) { -+ new_page = mali_mem_cow_alloc_page(); -+ -+ if (NULL == new_page) { -+ goto error; -+ } -+ if (1 != _mali_page_node_get_ref_count(m_page)) -+ change_pages_nr++; -+ /* unref old page*/ -+ _mali_osk_mutex_wait(session->cow_lock); -+ if (_mali_mem_put_page_node(m_page)) { -+ __free_page(new_page); -+ _mali_osk_mutex_signal(session->cow_lock); -+ goto error; -+ } -+ _mali_osk_mutex_signal(session->cow_lock); -+ /* add new page*/ -+ /* always use OS for COW*/ -+ m_page->type = MALI_PAGE_NODE_OS; -+ _mali_page_node_add_page(m_page, new_page); -+ } else { -+ struct mali_swap_item *swap_item; -+ -+ swap_item = mali_mem_swap_alloc_swap_item(); -+ -+ if (NULL == swap_item) { -+ goto error; -+ } -+ -+ swap_item->idx = mali_mem_swap_idx_alloc(); -+ -+ if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == swap_item->idx) { -+ MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW modify range.\n")); -+ kfree(swap_item); -+ goto error; -+ } -+ -+ if (1 != _mali_page_node_get_ref_count(m_page)) { -+ change_pages_nr++; -+ } -+ -+ if (_mali_mem_put_page_node(m_page)) { -+ mali_mem_swap_free_swap_item(swap_item); -+ goto error; -+ } -+ -+ _mali_page_node_add_swap_item(m_page, swap_item); -+ } -+ } -+ count++; -+ } -+ cow->change_pages_nr = change_pages_nr; -+ -+ MALI_DEBUG_ASSERT(MALI_MEM_COW == alloc->type); -+ -+ /* ZAP cpu mapping(modified range), and do cpu mapping here if need */ -+ if (NULL != alloc->cpu_mapping.vma) { -+ MALI_DEBUG_ASSERT(0 != alloc->backend_handle); -+ MALI_DEBUG_ASSERT(NULL != alloc->cpu_mapping.vma); -+ MALI_DEBUG_ASSERT(alloc->cpu_mapping.vma->vm_end - alloc->cpu_mapping.vma->vm_start >= range_size); -+ -+ if (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) { -+ zap_vma_ptes(alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start + range_start, range_size); -+ -+ ret = mali_mem_cow_cpu_map_pages_locked(backend, alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start + range_start, range_size / _MALI_OSK_MALI_PAGE_SIZE); -+ -+ if (unlikely(ret != _MALI_OSK_ERR_OK)) { -+ MALI_DEBUG_PRINT(2, ("mali_memory_cow_modify_range: cpu mapping failed !\n")); -+ ret = _MALI_OSK_ERR_FAULT; -+ } -+ } else { -+ /* used to trigger page fault for swappable cowed memory. */ -+ vm_flags_set(alloc->cpu_mapping.vma, VM_PFNMAP); -+ vm_flags_set(alloc->cpu_mapping.vma, VM_MIXEDMAP); -+ -+ zap_vma_ptes(alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start + range_start, range_size); -+ /* delete this flag to let swappble is ummapped regard to stauct page not page frame. */ -+ vm_flags_clear(alloc->cpu_mapping.vma, VM_PFNMAP); -+ vm_flags_clear(alloc->cpu_mapping.vma, VM_MIXEDMAP); -+ } -+ } -+ -+error: -+ mutex_unlock(&backend->mutex); -+ return ret; -+ -+} -+ -+ -+/** -+* Allocate pages for COW backend -+* @alloc -allocation for COW allocation -+* @target_bk - target allocation's backend(the allocation need to do COW) -+* @target_offset - the offset in target allocation to do COW(for support COW a memory allocated from memory_bank, 4K align) -+* @target_size - size of target allocation to do COW (for support memory bank)(in byte) -+* @backend -COW backend -+* @range_start - offset of modified range (4K align) -+* @range_size - size of modified range(in byte) -+*/ -+_mali_osk_errcode_t mali_memory_do_cow(mali_mem_backend *target_bk, -+ u32 target_offset, -+ u32 target_size, -+ mali_mem_backend *backend, -+ u32 range_start, -+ u32 range_size) -+{ -+ struct mali_session_data *session = backend->mali_allocation->session; -+ -+ MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS); -+ -+ /* size & offset must be a multiple of the system page size */ -+ if (target_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS); -+ if (range_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS); -+ if (target_offset % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS); -+ if (range_start % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS); -+ -+ /* check backend type */ -+ MALI_DEBUG_ASSERT(MALI_MEM_COW == backend->type); -+ -+ switch (target_bk->type) { -+ case MALI_MEM_OS: -+ case MALI_MEM_BLOCK: -+ return mali_memory_cow_os_memory(target_bk, target_offset, target_size, backend, range_start, range_size); -+ break; -+ case MALI_MEM_COW: -+ if (backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED) { -+ return mali_memory_cow_swap_memory(target_bk, target_offset, target_size, backend, range_start, range_size); -+ } else { -+ return mali_memory_cow_os_memory(target_bk, target_offset, target_size, backend, range_start, range_size); -+ } -+ break; -+ case MALI_MEM_SWAP: -+ return mali_memory_cow_swap_memory(target_bk, target_offset, target_size, backend, range_start, range_size); -+ break; -+ case MALI_MEM_EXTERNAL: -+ /*NOT support yet*/ -+ MALI_DEBUG_PRINT_ERROR(("External physical memory not supported ! \n")); -+ return _MALI_OSK_ERR_UNSUPPORTED; -+ break; -+ case MALI_MEM_DMA_BUF: -+ /*NOT support yet*/ -+ MALI_DEBUG_PRINT_ERROR(("DMA buffer not supported ! \n")); -+ return _MALI_OSK_ERR_UNSUPPORTED; -+ break; -+ case MALI_MEM_UMP: -+ /*NOT support yet*/ -+ MALI_DEBUG_PRINT_ERROR(("UMP buffer not supported ! \n")); -+ return _MALI_OSK_ERR_UNSUPPORTED; -+ break; -+ default: -+ /*Not support yet*/ -+ MALI_DEBUG_PRINT_ERROR(("Invalid memory type not supported ! \n")); -+ return _MALI_OSK_ERR_UNSUPPORTED; -+ break; -+ } -+ return _MALI_OSK_ERR_OK; -+} -+ -+ -+/** -+* Map COW backend memory to mali -+* Support OS/BLOCK for mali_page_node -+*/ -+int mali_mem_cow_mali_map(mali_mem_backend *mem_bkend, u32 range_start, u32 range_size) -+{ -+ mali_mem_allocation *cow_alloc; -+ struct mali_page_node *m_page; -+ struct mali_session_data *session; -+ struct mali_page_directory *pagedir; -+ u32 virt, start; -+ -+ cow_alloc = mem_bkend->mali_allocation; -+ virt = cow_alloc->mali_vma_node.vm_node.start; -+ start = virt; -+ -+ MALI_DEBUG_ASSERT_POINTER(mem_bkend); -+ MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type); -+ MALI_DEBUG_ASSERT_POINTER(cow_alloc); -+ -+ session = cow_alloc->session; -+ pagedir = session->page_directory; -+ MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS); -+ list_for_each_entry(m_page, &mem_bkend->cow_mem.pages, list) { -+ if ((virt - start >= range_start) && (virt - start < range_start + range_size)) { -+ dma_addr_t phys = _mali_page_node_get_dma_addr(m_page); -+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) -+ MALI_DEBUG_ASSERT(0 == (phys >> 32)); -+#endif -+ mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, -+ MALI_MMU_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT); -+ } -+ virt += MALI_MMU_PAGE_SIZE; -+ } -+ return 0; -+} -+ -+/** -+* Map COW backend to cpu -+* support OS/BLOCK memory -+*/ -+int mali_mem_cow_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma) -+{ -+ mali_mem_cow *cow = &mem_bkend->cow_mem; -+ struct mali_page_node *m_page; -+ int ret; -+ unsigned long addr = vma->vm_start; -+ MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_COW); -+ -+ list_for_each_entry(m_page, &cow->pages, list) { -+ /* We should use vm_insert_page, but it does a dcache -+ * flush which makes it way slower than remap_pfn_range or vmf_insert_pfn. -+ ret = vm_insert_page(vma, addr, page); -+ */ -+ ret = vmf_insert_pfn(vma, addr, _mali_page_node_get_pfn(m_page)); -+ -+ if (unlikely(VM_FAULT_NOPAGE != ret)) { -+ return -EFAULT; -+ } -+ addr += _MALI_OSK_MALI_PAGE_SIZE; -+ } -+ -+ return 0; -+} -+ -+/** -+* Map some pages(COW backend) to CPU vma@vaddr -+*@ mem_bkend - COW backend -+*@ vma -+*@ vaddr -start CPU vaddr mapped to -+*@ num - max number of pages to map to CPU vaddr -+*/ -+_mali_osk_errcode_t mali_mem_cow_cpu_map_pages_locked(mali_mem_backend *mem_bkend, -+ struct vm_area_struct *vma, -+ unsigned long vaddr, -+ int num) -+{ -+ mali_mem_cow *cow = &mem_bkend->cow_mem; -+ struct mali_page_node *m_page; -+ int ret; -+ int offset; -+ int count ; -+ unsigned long vstart = vma->vm_start; -+ count = 0; -+ MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_COW); -+ MALI_DEBUG_ASSERT(0 == vaddr % _MALI_OSK_MALI_PAGE_SIZE); -+ MALI_DEBUG_ASSERT(0 == vstart % _MALI_OSK_MALI_PAGE_SIZE); -+ offset = (vaddr - vstart) / _MALI_OSK_MALI_PAGE_SIZE; -+ -+ list_for_each_entry(m_page, &cow->pages, list) { -+ if ((count >= offset) && (count < offset + num)) { -+ ret = vmf_insert_pfn(vma, vaddr, _mali_page_node_get_pfn(m_page)); -+ -+ if (unlikely(VM_FAULT_NOPAGE != ret)) { -+ if (count == offset) { -+ return _MALI_OSK_ERR_FAULT; -+ } else { -+ /* ret is EBUSY when page isn't in modify range, but now it's OK*/ -+ return _MALI_OSK_ERR_OK; -+ } -+ } -+ vaddr += _MALI_OSK_MALI_PAGE_SIZE; -+ } -+ count++; -+ } -+ return _MALI_OSK_ERR_OK; -+} -+ -+/** -+* Release COW backend memory -+* free it directly(put_page--unref page), not put into pool -+*/ -+u32 mali_mem_cow_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped) -+{ -+ mali_mem_allocation *alloc; -+ struct mali_session_data *session; -+ u32 free_pages_nr = 0; -+ MALI_DEBUG_ASSERT_POINTER(mem_bkend); -+ MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type); -+ alloc = mem_bkend->mali_allocation; -+ MALI_DEBUG_ASSERT_POINTER(alloc); -+ -+ session = alloc->session; -+ MALI_DEBUG_ASSERT_POINTER(session); -+ -+ if (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (MALI_MEM_BACKEND_FLAG_SWAP_COWED & mem_bkend->flags)) { -+ /* Unmap the memory from the mali virtual address space. */ -+ if (MALI_TRUE == is_mali_mapped) -+ mali_mem_os_mali_unmap(alloc); -+ /* free cow backend list*/ -+ _mali_osk_mutex_wait(session->cow_lock); -+ free_pages_nr = mali_mem_os_free(&mem_bkend->cow_mem.pages, mem_bkend->cow_mem.count, MALI_TRUE); -+ _mali_osk_mutex_signal(session->cow_lock); -+ -+ free_pages_nr += mali_mem_block_free_list(&mem_bkend->cow_mem.pages); -+ -+ MALI_DEBUG_ASSERT(list_empty(&mem_bkend->cow_mem.pages)); -+ } else { -+ free_pages_nr = mali_mem_swap_release(mem_bkend, is_mali_mapped); -+ } -+ -+ -+ MALI_DEBUG_PRINT(4, ("COW Mem free : allocated size = 0x%x, free size = 0x%x\n", mem_bkend->cow_mem.count * _MALI_OSK_MALI_PAGE_SIZE, -+ free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE)); -+ -+ mem_bkend->cow_mem.count = 0; -+ return free_pages_nr; -+} -+ -+ -+/* Dst node could os node or swap node. */ -+void _mali_mem_cow_copy_page(mali_page_node *src_node, mali_page_node *dst_node) -+{ -+ void *dst, *src; -+ struct page *dst_page; -+ dma_addr_t dma_addr; -+ -+ MALI_DEBUG_ASSERT(src_node != NULL); -+ MALI_DEBUG_ASSERT(dst_node != NULL); -+ MALI_DEBUG_ASSERT(dst_node->type == MALI_PAGE_NODE_OS -+ || dst_node->type == MALI_PAGE_NODE_SWAP); -+ -+ if (dst_node->type == MALI_PAGE_NODE_OS) { -+ dst_page = dst_node->page; -+ } else { -+ dst_page = dst_node->swap_it->page; -+ } -+ -+ dma_unmap_page(&mali_platform_device->dev, _mali_page_node_get_dma_addr(dst_node), -+ _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); -+ -+ /* map it , and copy the content*/ -+ dst = kmap_atomic(dst_page); -+ -+ if (src_node->type == MALI_PAGE_NODE_OS || -+ src_node->type == MALI_PAGE_NODE_SWAP) { -+ struct page *src_page; -+ -+ if (src_node->type == MALI_PAGE_NODE_OS) { -+ src_page = src_node->page; -+ } else { -+ src_page = src_node->swap_it->page; -+ } -+ -+ /* Clear and invaliate cache */ -+ /* In ARM architecture, speculative read may pull stale data into L1 cache -+ * for kernel linear mapping page table. DMA_BIDIRECTIONAL could -+ * invalidate the L1 cache so that following read get the latest data -+ */ -+ dma_unmap_page(&mali_platform_device->dev, _mali_page_node_get_dma_addr(src_node), -+ _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); -+ -+ src = kmap_atomic(src_page); -+ memcpy(dst, src , _MALI_OSK_MALI_PAGE_SIZE); -+ kunmap_atomic(src); -+ dma_addr = dma_map_page(&mali_platform_device->dev, src_page, -+ 0, _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); -+ -+ if (src_node->type == MALI_PAGE_NODE_SWAP) { -+ src_node->swap_it->dma_addr = dma_addr; -+ } -+ } else if (src_node->type == MALI_PAGE_NODE_BLOCK) { -+ /* -+ * use ioremap to map src for BLOCK memory -+ */ -+ src = ioremap(_mali_page_node_get_dma_addr(src_node), _MALI_OSK_MALI_PAGE_SIZE); -+ memcpy(dst, src , _MALI_OSK_MALI_PAGE_SIZE); -+ iounmap(src); -+ } -+ kunmap_atomic(dst); -+ dma_addr = dma_map_page(&mali_platform_device->dev, dst_page, -+ 0, _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); -+ -+ if (dst_node->type == MALI_PAGE_NODE_SWAP) { -+ dst_node->swap_it->dma_addr = dma_addr; -+ } -+} -+ -+ -+/* -+* allocate page on demand when CPU access it, -+* THis used in page fault handler -+*/ -+_mali_osk_errcode_t mali_mem_cow_allocate_on_demand(mali_mem_backend *mem_bkend, u32 offset_page) -+{ -+ struct page *new_page = NULL; -+ struct mali_page_node *new_node = NULL; -+ int i = 0; -+ struct mali_page_node *m_page, *found_node = NULL; -+ struct mali_session_data *session = NULL; -+ mali_mem_cow *cow = &mem_bkend->cow_mem; -+ MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type); -+ MALI_DEBUG_ASSERT(offset_page < mem_bkend->size / _MALI_OSK_MALI_PAGE_SIZE); -+ MALI_DEBUG_PRINT(4, ("mali_mem_cow_allocate_on_demand !, offset_page =0x%x\n", offset_page)); -+ -+ /* allocate new page here */ -+ new_page = mali_mem_cow_alloc_page(); -+ if (!new_page) -+ return _MALI_OSK_ERR_NOMEM; -+ -+ new_node = _mali_page_node_allocate(MALI_PAGE_NODE_OS); -+ if (!new_node) { -+ __free_page(new_page); -+ return _MALI_OSK_ERR_NOMEM; -+ } -+ -+ /* find the page in backend*/ -+ list_for_each_entry(m_page, &cow->pages, list) { -+ if (i == offset_page) { -+ found_node = m_page; -+ break; -+ } -+ i++; -+ } -+ MALI_DEBUG_ASSERT(found_node); -+ if (NULL == found_node) { -+ __free_page(new_page); -+ kfree(new_node); -+ return _MALI_OSK_ERR_ITEM_NOT_FOUND; -+ } -+ -+ _mali_page_node_add_page(new_node, new_page); -+ -+ /* Copy the src page's content to new page */ -+ _mali_mem_cow_copy_page(found_node, new_node); -+ -+ MALI_DEBUG_ASSERT_POINTER(mem_bkend->mali_allocation); -+ session = mem_bkend->mali_allocation->session; -+ MALI_DEBUG_ASSERT_POINTER(session); -+ if (1 != _mali_page_node_get_ref_count(found_node)) { -+ atomic_add(1, &session->mali_mem_allocated_pages); -+ if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) { -+ session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE; -+ } -+ mem_bkend->cow_mem.change_pages_nr++; -+ } -+ -+ _mali_osk_mutex_wait(session->cow_lock); -+ if (_mali_mem_put_page_node(found_node)) { -+ __free_page(new_page); -+ kfree(new_node); -+ _mali_osk_mutex_signal(session->cow_lock); -+ return _MALI_OSK_ERR_NOMEM; -+ } -+ _mali_osk_mutex_signal(session->cow_lock); -+ -+ list_replace(&found_node->list, &new_node->list); -+ -+ kfree(found_node); -+ -+ /* map to GPU side*/ -+ _mali_osk_mutex_wait(session->memory_lock); -+ mali_mem_cow_mali_map(mem_bkend, offset_page * _MALI_OSK_MALI_PAGE_SIZE, _MALI_OSK_MALI_PAGE_SIZE); -+ _mali_osk_mutex_signal(session->memory_lock); -+ return _MALI_OSK_ERR_OK; -+} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_cow.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_cow.h -new file mode 100644 -index 000000000..5f83a37fc ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_cow.h -@@ -0,0 +1,48 @@ -+/* -+ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+#ifndef __MALI_MEMORY_COW_H__ -+#define __MALI_MEMORY_COW_H__ -+ -+#include "mali_osk.h" -+#include "mali_session.h" -+#include "mali_memory_types.h" -+ -+int mali_mem_cow_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma); -+_mali_osk_errcode_t mali_mem_cow_cpu_map_pages_locked(mali_mem_backend *mem_bkend, -+ struct vm_area_struct *vma, -+ unsigned long vaddr, -+ int num); -+ -+_mali_osk_errcode_t mali_memory_do_cow(mali_mem_backend *target_bk, -+ u32 target_offset, -+ u32 target_size, -+ mali_mem_backend *backend, -+ u32 range_start, -+ u32 range_size); -+ -+_mali_osk_errcode_t mali_memory_cow_modify_range(mali_mem_backend *backend, -+ u32 range_start, -+ u32 range_size); -+ -+_mali_osk_errcode_t mali_memory_cow_os_memory(mali_mem_backend *target_bk, -+ u32 target_offset, -+ u32 target_size, -+ mali_mem_backend *backend, -+ u32 range_start, -+ u32 range_size); -+ -+void _mali_mem_cow_copy_page(mali_page_node *src_node, mali_page_node *dst_node); -+ -+int mali_mem_cow_mali_map(mali_mem_backend *mem_bkend, u32 range_start, u32 range_size); -+u32 mali_mem_cow_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped); -+_mali_osk_errcode_t mali_mem_cow_allocate_on_demand(mali_mem_backend *mem_bkend, u32 offset_page); -+#endif -+ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_defer_bind.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_defer_bind.c -new file mode 100644 -index 000000000..a9db577cb ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_defer_bind.c -@@ -0,0 +1,262 @@ -+/* -+ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#ifdef CONFIG_ARM -+#include -+#endif -+#include -+ -+#include "mali_memory.h" -+#include "mali_kernel_common.h" -+#include "mali_uk_types.h" -+#include "mali_osk.h" -+#include "mali_kernel_linux.h" -+#include "mali_memory_defer_bind.h" -+#include "mali_executor.h" -+#include "mali_osk.h" -+#include "mali_scheduler.h" -+#include "mali_gp_job.h" -+ -+mali_defer_bind_manager *mali_dmem_man = NULL; -+ -+static u32 mali_dmem_get_gp_varying_size(struct mali_gp_job *gp_job) -+{ -+ return gp_job->required_varying_memsize / _MALI_OSK_MALI_PAGE_SIZE; -+} -+ -+_mali_osk_errcode_t mali_mem_defer_bind_manager_init(void) -+{ -+ mali_dmem_man = _mali_osk_calloc(1, sizeof(struct mali_defer_bind_manager)); -+ if (!mali_dmem_man) -+ return _MALI_OSK_ERR_NOMEM; -+ -+ atomic_set(&mali_dmem_man->num_used_pages, 0); -+ atomic_set(&mali_dmem_man->num_dmem, 0); -+ -+ return _MALI_OSK_ERR_OK; -+} -+ -+ -+void mali_mem_defer_bind_manager_destory(void) -+{ -+ if (mali_dmem_man) { -+ MALI_DEBUG_ASSERT(0 == atomic_read(&mali_dmem_man->num_dmem)); -+ kfree(mali_dmem_man); -+ } -+ mali_dmem_man = NULL; -+} -+ -+ -+/*allocate pages from OS memory*/ -+_mali_osk_errcode_t mali_mem_defer_alloc_mem(u32 require, struct mali_session_data *session, mali_defer_mem_block *dblock) -+{ -+ int retval = 0; -+ u32 num_pages = require; -+ mali_mem_os_mem os_mem; -+ -+ retval = mali_mem_os_alloc_pages(&os_mem, num_pages * _MALI_OSK_MALI_PAGE_SIZE); -+ -+ /* add to free pages list */ -+ if (0 == retval) { -+ MALI_DEBUG_PRINT(4, ("mali_mem_defer_alloc_mem ,,*** pages allocate = 0x%x \n", num_pages)); -+ list_splice(&os_mem.pages, &dblock->free_pages); -+ atomic_add(os_mem.count, &dblock->num_free_pages); -+ atomic_add(os_mem.count, &session->mali_mem_allocated_pages); -+ if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) { -+ session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE; -+ } -+ return _MALI_OSK_ERR_OK; -+ } else -+ return _MALI_OSK_ERR_FAULT; -+} -+ -+_mali_osk_errcode_t mali_mem_prepare_mem_for_job(struct mali_gp_job *next_gp_job, mali_defer_mem_block *dblock) -+{ -+ u32 require_page; -+ -+ if (!next_gp_job) -+ return _MALI_OSK_ERR_FAULT; -+ -+ require_page = mali_dmem_get_gp_varying_size(next_gp_job); -+ -+ MALI_DEBUG_PRINT(4, ("mali_mem_defer_prepare_mem_work, require alloc page 0x%x\n", -+ require_page)); -+ /* allocate more pages from OS */ -+ if (_MALI_OSK_ERR_OK != mali_mem_defer_alloc_mem(require_page, next_gp_job->session, dblock)) { -+ MALI_DEBUG_PRINT(1, ("ERROR##mali_mem_defer_prepare_mem_work, allocate page failed!!")); -+ return _MALI_OSK_ERR_NOMEM; -+ } -+ -+ next_gp_job->bind_flag = MALI_DEFER_BIND_MEMORY_PREPARED; -+ -+ return _MALI_OSK_ERR_OK; -+} -+ -+ -+/* do preparetion for allocation before defer bind */ -+_mali_osk_errcode_t mali_mem_defer_bind_allocation_prepare(mali_mem_allocation *alloc, struct list_head *list, u32 *required_varying_memsize) -+{ -+ mali_mem_backend *mem_bkend = NULL; -+ struct mali_backend_bind_list *bk_list = _mali_osk_calloc(1, sizeof(struct mali_backend_bind_list)); -+ if (NULL == bk_list) -+ return _MALI_OSK_ERR_FAULT; -+ -+ INIT_LIST_HEAD(&bk_list->node); -+ /* Get backend memory */ -+ mutex_lock(&mali_idr_mutex); -+ if (!(mem_bkend = idr_find(&mali_backend_idr, alloc->backend_handle))) { -+ MALI_DEBUG_PRINT(1, ("Can't find memory backend in defer bind!\n")); -+ mutex_unlock(&mali_idr_mutex); -+ _mali_osk_free(bk_list); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ mutex_unlock(&mali_idr_mutex); -+ -+ /* If the mem backend has already been bound, no need to bind again.*/ -+ if (mem_bkend->os_mem.count > 0) { -+ _mali_osk_free(bk_list); -+ return _MALI_OSK_ERR_OK; -+ } -+ -+ MALI_DEBUG_PRINT(4, ("bind_allocation_prepare:: allocation =%x vaddr=0x%x!\n", alloc, alloc->mali_vma_node.vm_node.start)); -+ -+ INIT_LIST_HEAD(&mem_bkend->os_mem.pages); -+ -+ bk_list->bkend = mem_bkend; -+ bk_list->vaddr = alloc->mali_vma_node.vm_node.start; -+ bk_list->session = alloc->session; -+ bk_list->page_num = mem_bkend->size / _MALI_OSK_MALI_PAGE_SIZE; -+ *required_varying_memsize += mem_bkend->size; -+ MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS); -+ -+ /* add to job to do list */ -+ list_add(&bk_list->node, list); -+ -+ return _MALI_OSK_ERR_OK; -+} -+ -+ -+ -+/* bind phyiscal memory to allocation -+This function will be called in IRQ handler*/ -+static _mali_osk_errcode_t mali_mem_defer_bind_allocation(struct mali_backend_bind_list *bk_node, -+ struct list_head *pages) -+{ -+ struct mali_session_data *session = bk_node->session; -+ mali_mem_backend *mem_bkend = bk_node->bkend; -+ MALI_DEBUG_PRINT(4, ("mali_mem_defer_bind_allocation, bind bkend = %x page num=0x%x vaddr=%x session=%x\n", mem_bkend, bk_node->page_num, bk_node->vaddr, session)); -+ -+ MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS); -+ list_splice(pages, &mem_bkend->os_mem.pages); -+ mem_bkend->os_mem.count = bk_node->page_num; -+ -+ if (mem_bkend->type == MALI_MEM_OS) { -+ mali_mem_os_mali_map(&mem_bkend->os_mem, session, bk_node->vaddr, 0, -+ mem_bkend->os_mem.count, MALI_MMU_FLAGS_DEFAULT); -+ } -+ smp_wmb(); -+ bk_node->flag = MALI_DEFER_BIND_MEMORY_BINDED; -+ mem_bkend->flags &= ~MALI_MEM_BACKEND_FLAG_NOT_BINDED; -+ mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_BINDED; -+ return _MALI_OSK_ERR_OK; -+} -+ -+ -+static struct list_head *mali_mem_defer_get_free_page_list(u32 count, struct list_head *pages, mali_defer_mem_block *dblock) -+{ -+ int i = 0; -+ struct mali_page_node *m_page, *m_tmp; -+ -+ if (atomic_read(&dblock->num_free_pages) < count) { -+ return NULL; -+ } else { -+ list_for_each_entry_safe(m_page, m_tmp, &dblock->free_pages, list) { -+ if (i < count) { -+ list_move_tail(&m_page->list, pages); -+ } else { -+ break; -+ } -+ i++; -+ } -+ MALI_DEBUG_ASSERT(i == count); -+ atomic_sub(count, &dblock->num_free_pages); -+ return pages; -+ } -+} -+ -+ -+/* called in job start IOCTL to bind physical memory for each allocations -+@ bk_list backend list to do defer bind -+@ pages page list to do this bind -+@ count number of pages -+*/ -+_mali_osk_errcode_t mali_mem_defer_bind(struct mali_gp_job *gp, -+ struct mali_defer_mem_block *dmem_block) -+{ -+ struct mali_defer_mem *dmem = NULL; -+ struct mali_backend_bind_list *bkn, *bkn_tmp; -+ LIST_HEAD(pages); -+ -+ if (gp->required_varying_memsize != (atomic_read(&dmem_block->num_free_pages) * _MALI_OSK_MALI_PAGE_SIZE)) { -+ MALI_DEBUG_PRINT_ERROR(("#BIND: The memsize of varying buffer not match to the pagesize of the dmem_block!!## \n")); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ -+ MALI_DEBUG_PRINT(4, ("#BIND: GP job=%x## \n", gp)); -+ dmem = (mali_defer_mem *)_mali_osk_calloc(1, sizeof(struct mali_defer_mem)); -+ if (dmem) { -+ INIT_LIST_HEAD(&dmem->node); -+ gp->dmem = dmem; -+ } else { -+ return _MALI_OSK_ERR_NOMEM; -+ } -+ -+ atomic_add(1, &mali_dmem_man->num_dmem); -+ /* for each bk_list backend, do bind */ -+ list_for_each_entry_safe(bkn, bkn_tmp , &gp->vary_todo, node) { -+ INIT_LIST_HEAD(&pages); -+ if (likely(mali_mem_defer_get_free_page_list(bkn->page_num, &pages, dmem_block))) { -+ list_del(&bkn->node); -+ mali_mem_defer_bind_allocation(bkn, &pages); -+ _mali_osk_free(bkn); -+ } else { -+ /* not enough memory will not happen */ -+ MALI_DEBUG_PRINT_ERROR(("#BIND: NOT enough memory when binded !!## \n")); -+ _mali_osk_free(gp->dmem); -+ return _MALI_OSK_ERR_NOMEM; -+ } -+ } -+ -+ if (!list_empty(&gp->vary_todo)) { -+ MALI_DEBUG_PRINT_ERROR(("#BIND: The deferbind backend list isn't empty !!## \n")); -+ _mali_osk_free(gp->dmem); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ -+ dmem->flag = MALI_DEFER_BIND_MEMORY_BINDED; -+ -+ return _MALI_OSK_ERR_OK; -+} -+ -+void mali_mem_defer_dmem_free(struct mali_gp_job *gp) -+{ -+ if (gp->dmem) { -+ atomic_dec(&mali_dmem_man->num_dmem); -+ _mali_osk_free(gp->dmem); -+ } -+} -+ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_defer_bind.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_defer_bind.h -new file mode 100644 -index 000000000..defa08d52 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_defer_bind.h -@@ -0,0 +1,64 @@ -+/* -+ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+#ifndef __MALI_MEMORY_DEFER_BIND_H_ -+#define __MALI_MEMORY_DEFER_BIND_H_ -+ -+ -+#include "mali_osk.h" -+#include "mali_session.h" -+ -+#include -+#include -+#include -+#include -+#include -+ -+ -+#include "mali_memory_types.h" -+#include "mali_memory_os_alloc.h" -+#include "mali_uk_types.h" -+ -+struct mali_gp_job; -+ -+typedef struct mali_defer_mem { -+ struct list_head node; /*dlist node in bind manager */ -+ u32 flag; -+} mali_defer_mem; -+ -+ -+typedef struct mali_defer_mem_block { -+ struct list_head free_pages; /* page pool */ -+ atomic_t num_free_pages; -+} mali_defer_mem_block; -+ -+/* varying memory list need to bind */ -+typedef struct mali_backend_bind_list { -+ struct list_head node; -+ struct mali_mem_backend *bkend; -+ u32 vaddr; -+ u32 page_num; -+ struct mali_session_data *session; -+ u32 flag; -+} mali_backend_bind_lists; -+ -+ -+typedef struct mali_defer_bind_manager { -+ atomic_t num_used_pages; -+ atomic_t num_dmem; -+} mali_defer_bind_manager; -+ -+_mali_osk_errcode_t mali_mem_defer_bind_manager_init(void); -+void mali_mem_defer_bind_manager_destory(void); -+_mali_osk_errcode_t mali_mem_defer_bind(struct mali_gp_job *gp, struct mali_defer_mem_block *dmem_block); -+_mali_osk_errcode_t mali_mem_defer_bind_allocation_prepare(mali_mem_allocation *alloc, struct list_head *list, u32 *required_varying_memsize); -+_mali_osk_errcode_t mali_mem_prepare_mem_for_job(struct mali_gp_job *next_gp_job, mali_defer_mem_block *dblock); -+void mali_mem_defer_dmem_free(struct mali_gp_job *gp); -+ -+#endif -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_dma_buf.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_dma_buf.c ++#endif /* __MALI_KERNEL_LICENSE_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_devfreq.c b/drivers/gpu/arm/mali400/mali/linux/mali_devfreq.c new file mode 100644 -index 000000000..1f4565127 +index 000000000..7abd1532a --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_dma_buf.c -@@ -0,0 +1,369 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_devfreq.c +@@ -0,0 +1,368 @@ +/* -+ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -313366,371 +315382,370 @@ index 000000000..1f4565127 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#include /* file system operations */ -+#include /* user space access */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "mali_ukk.h" -+#include "mali_osk.h" ++#include "mali_osk_mali.h" +#include "mali_kernel_common.h" -+#include "mali_session.h" -+#include "mali_kernel_linux.h" + -+#include "mali_memory.h" -+#include "mali_memory_dma_buf.h" -+#include "mali_memory_virtual.h" -+#include "mali_pp_job.h" ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_DEVFREQ_THERMAL ++#include ++#endif + -+/* -+ * Map DMA buf attachment \a mem into \a session at virtual address \a virt. ++#include ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) ++#include ++#else /* Linux >= 3.13 */ ++/* In 3.13 the OPP include header file, types, and functions were all ++ * renamed. Use the old filename for the include, and define the new names to ++ * the old, when an old kernel is detected. + */ -+static int mali_dma_buf_map(mali_mem_backend *mem_backend) -+{ -+ mali_mem_allocation *alloc; -+ struct mali_dma_buf_attachment *mem; -+ struct mali_session_data *session; -+ struct mali_page_directory *pagedir; -+ _mali_osk_errcode_t err; -+ struct scatterlist *sg; -+ u32 virt, flags; -+ int i; -+ -+ MALI_DEBUG_ASSERT_POINTER(mem_backend); -+ -+ alloc = mem_backend->mali_allocation; -+ MALI_DEBUG_ASSERT_POINTER(alloc); -+ -+ mem = mem_backend->dma_buf.attachment; -+ MALI_DEBUG_ASSERT_POINTER(mem); ++#include ++#define dev_pm_opp opp ++#define dev_pm_opp_get_voltage opp_get_voltage ++#define dev_pm_opp_get_opp_count opp_get_opp_count ++#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil ++#endif /* Linux >= 3.13 */ + -+ session = alloc->session; -+ MALI_DEBUG_ASSERT_POINTER(session); -+ MALI_DEBUG_ASSERT(mem->session == session); ++#include "mali_pm_metrics.h" + -+ virt = alloc->mali_vma_node.vm_node.start; -+ flags = alloc->flags; ++#include ++#include + -+ mali_session_memory_lock(session); -+ mem->map_ref++; ++static struct monitor_dev_profile mali_mdevp = { ++ .type = MONITOR_TYPE_DEV, ++ .low_temp_adjust = rockchip_monitor_dev_low_temp_adjust, ++ .high_temp_adjust = rockchip_monitor_dev_high_temp_adjust, ++}; + -+ MALI_DEBUG_PRINT(5, ("Mali DMA-buf: map attachment %p, new map_ref = %d\n", mem, mem->map_ref)); ++static struct devfreq_simple_ondemand_data ondemand_data; + -+ if (1 == mem->map_ref) { ++static int ++mali_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) ++{ ++ struct mali_device *mdev = dev_get_drvdata(dev); ++ struct dev_pm_opp *opp; ++ unsigned long freq = 0; ++ unsigned long old_freq = mdev->current_freq; ++ unsigned long voltage; ++ int err; + -+ /* First reference taken, so we need to map the dma buf */ -+ MALI_DEBUG_ASSERT(!mem->is_mapped); ++ freq = *target_freq; + -+ mem->sgt = dma_buf_map_attachment(mem->attachment, DMA_BIDIRECTIONAL); -+ if (IS_ERR_OR_NULL(mem->sgt)) { -+ MALI_DEBUG_PRINT_ERROR(("Failed to map dma-buf attachment\n")); -+ mem->map_ref--; -+ mali_session_memory_unlock(session); -+ return -EFAULT; -+ } ++ opp = devfreq_recommended_opp(dev, &freq, flags); ++ if (IS_ERR(opp)) { ++ MALI_PRINT_ERROR(("Failed to get opp (%ld)\n", PTR_ERR(opp))); ++ return PTR_ERR(opp); ++ } ++ voltage = dev_pm_opp_get_voltage(opp); ++ dev_pm_opp_put(opp); + -+ err = mali_mem_mali_map_prepare(alloc); -+ if (_MALI_OSK_ERR_OK != err) { -+ MALI_DEBUG_PRINT(1, ("Mapping of DMA memory failed\n")); -+ mem->map_ref--; -+ mali_session_memory_unlock(session); -+ return -ENOMEM; ++ MALI_DEBUG_PRINT(2, ("mali_devfreq_target:set_freq = %lld flags = 0x%x\n", freq, flags)); ++ /* ++ * Only update if there is a change of frequency ++ */ ++ if (old_freq == freq) { ++ *target_freq = freq; ++ mali_pm_reset_dvfs_utilisation(mdev); ++#ifdef CONFIG_REGULATOR ++ if (mdev->current_voltage == voltage) ++ return 0; ++ err = regulator_set_voltage(mdev->regulator, voltage, INT_MAX); ++ if (err) { ++ dev_err(dev, "Failed to set voltage (%d)\n", err); ++ return err; + } ++ mdev->current_voltage = voltage; ++#endif ++ return 0; ++ } + -+ pagedir = mali_session_get_page_directory(session); -+ MALI_DEBUG_ASSERT_POINTER(pagedir); ++ err = clk_bulk_enable(mdev->num_clks, mdev->clks); ++ if (err) ++ return err; + -+ for_each_sg(mem->sgt->sgl, sg, mem->sgt->nents, i) { -+ u32 size = sg_dma_len(sg); -+ dma_addr_t phys = sg_dma_address(sg); ++#ifdef CONFIG_REGULATOR ++ if (mdev->regulator && mdev->current_voltage != voltage && ++ old_freq < freq) { ++ err = regulator_set_voltage(mdev->regulator, voltage, INT_MAX); ++ if (err) { ++ MALI_PRINT_ERROR(("Failed to increase voltage (%d)\n", err)); ++ goto err; ++ } ++ } ++#endif + -+ /* sg must be page aligned. */ -+ MALI_DEBUG_ASSERT(0 == size % MALI_MMU_PAGE_SIZE); -+ MALI_DEBUG_ASSERT(0 == (phys & ~(uintptr_t)0xFFFFFFFF)); ++ err = clk_set_rate(mdev->clock, freq); ++ if (err) { ++ MALI_PRINT_ERROR(("Failed to set clock %lu (target %lu)\n", freq, *target_freq)); ++ goto err; ++ } + -+ mali_mmu_pagedir_update(pagedir, virt, phys, size, MALI_MMU_FLAGS_DEFAULT); ++ *target_freq = freq; ++ mdev->current_freq = freq; ++ if (mdev->devfreq) ++ mdev->devfreq->last_status.current_frequency = freq; + -+ virt += size; ++#ifdef CONFIG_REGULATOR ++ if (mdev->regulator && mdev->current_voltage != voltage && ++ old_freq > freq) { ++ err = regulator_set_voltage(mdev->regulator, voltage, INT_MAX); ++ if (err) { ++ MALI_PRINT_ERROR(("Failed to decrease voltage (%d)\n", err)); ++ goto err; + } ++ } ++#endif + -+ if (flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) { -+ u32 guard_phys; -+ MALI_DEBUG_PRINT(7, ("Mapping in extra guard page\n")); -+ -+ guard_phys = sg_dma_address(mem->sgt->sgl); -+ mali_mmu_pagedir_update(pagedir, virt, guard_phys, MALI_MMU_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT); -+ } ++ mdev->current_voltage = voltage; + -+ mem->is_mapped = MALI_TRUE; -+ mali_session_memory_unlock(session); -+ /* Wake up any thread waiting for buffer to become mapped */ -+ wake_up_all(&mem->wait_queue); -+ } else { -+ MALI_DEBUG_ASSERT(mem->is_mapped); -+ mali_session_memory_unlock(session); -+ } ++ mali_pm_reset_dvfs_utilisation(mdev); ++err: ++ clk_bulk_disable(mdev->num_clks, mdev->clks); + -+ return 0; ++ return err; +} + -+static void mali_dma_buf_unmap(mali_mem_allocation *alloc, struct mali_dma_buf_attachment *mem) ++static int ++mali_devfreq_cur_freq(struct device *dev, unsigned long *freq) +{ -+ MALI_DEBUG_ASSERT_POINTER(alloc); -+ MALI_DEBUG_ASSERT_POINTER(mem); -+ MALI_DEBUG_ASSERT_POINTER(mem->attachment); -+ MALI_DEBUG_ASSERT_POINTER(mem->buf); -+ MALI_DEBUG_ASSERT_POINTER(alloc->session); -+ -+ mali_session_memory_lock(alloc->session); -+ mem->map_ref--; ++ struct mali_device *mdev = dev_get_drvdata(dev); + -+ MALI_DEBUG_PRINT(5, ("Mali DMA-buf: unmap attachment %p, new map_ref = %d\n", mem, mem->map_ref)); ++ *freq = mdev->current_freq; + -+ if (0 == mem->map_ref) { -+ dma_buf_unmap_attachment(mem->attachment, mem->sgt, DMA_BIDIRECTIONAL); -+ if (MALI_TRUE == mem->is_mapped) { -+ mali_mem_mali_map_free(alloc->session, alloc->psize, alloc->mali_vma_node.vm_node.start, -+ alloc->flags); -+ } -+ mem->is_mapped = MALI_FALSE; -+ } -+ mali_session_memory_unlock(alloc->session); -+ /* Wake up any thread waiting for buffer to become unmapped */ -+ wake_up_all(&mem->wait_queue); ++ MALI_DEBUG_PRINT(2, ("mali_devfreq_cur_freq: freq = %d \n", *freq)); ++ return 0; +} + -+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) -+int mali_dma_buf_map_job(struct mali_pp_job *job) ++static int ++mali_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) +{ -+ struct mali_dma_buf_attachment *mem; -+ _mali_osk_errcode_t err; -+ int i; -+ int ret = 0; -+ u32 num_memory_cookies; -+ struct mali_session_data *session; -+ struct mali_vma_node *mali_vma_node = NULL; -+ mali_mem_allocation *mali_alloc = NULL; -+ mali_mem_backend *mem_bkend = NULL; ++ struct mali_device *mdev = dev_get_drvdata(dev); + -+ MALI_DEBUG_ASSERT_POINTER(job); ++ stat->current_frequency = mdev->current_freq; + -+ num_memory_cookies = mali_pp_job_num_memory_cookies(job); ++ mali_pm_get_dvfs_utilisation(mdev, ++ &stat->total_time, &stat->busy_time); + -+ session = mali_pp_job_get_session(job); ++ stat->private_data = NULL; + -+ MALI_DEBUG_ASSERT_POINTER(session); ++#ifdef CONFIG_DEVFREQ_THERMAL ++ memcpy(&mdev->devfreq->last_status, stat, sizeof(*stat)); ++#endif + -+ for (i = 0; i < num_memory_cookies; i++) { -+ u32 mali_addr = mali_pp_job_get_memory_cookie(job, i); -+ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0); -+ MALI_DEBUG_ASSERT(NULL != mali_vma_node); -+ mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node); -+ MALI_DEBUG_ASSERT(NULL != mali_alloc); -+ if (MALI_MEM_DMA_BUF != mali_alloc->type) { -+ continue; -+ } ++ return 0; ++} + -+ /* Get backend memory & Map on CPU */ -+ mutex_lock(&mali_idr_mutex); -+ mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle); -+ mutex_unlock(&mali_idr_mutex); -+ MALI_DEBUG_ASSERT(NULL != mem_bkend); ++/* setup platform specific opp in platform.c*/ ++int __weak setup_opps(void) ++{ ++ return 0; ++} + -+ mem = mem_bkend->dma_buf.attachment; ++/* term platform specific opp in platform.c*/ ++int __weak term_opps(struct device *dev) ++{ ++ return 0; ++} + -+ MALI_DEBUG_ASSERT_POINTER(mem); -+ MALI_DEBUG_ASSERT(mem->session == mali_pp_job_get_session(job)); ++static int mali_devfreq_init_freq_table(struct mali_device *mdev, ++ struct devfreq_dev_profile *dp) ++{ ++ int err, count; ++ int i = 0; ++ unsigned long freq = 0; ++ struct dev_pm_opp *opp; + -+ err = mali_dma_buf_map(mem_bkend); -+ if (0 != err) { -+ MALI_DEBUG_PRINT_ERROR(("Mali DMA-buf: Failed to map dma-buf for mali address %x\n", mali_addr)); -+ ret = -EFAULT; -+ continue; -+ } ++ err = setup_opps(); ++ if (err) ++ return err; ++ ++ count = dev_pm_opp_get_opp_count(mdev->dev); ++ if (count < 0) { ++ return count; + } -+ return ret; -+} + -+void mali_dma_buf_unmap_job(struct mali_pp_job *job) -+{ -+ struct mali_dma_buf_attachment *mem; -+ int i; -+ u32 num_memory_cookies; -+ struct mali_session_data *session; -+ struct mali_vma_node *mali_vma_node = NULL; -+ mali_mem_allocation *mali_alloc = NULL; -+ mali_mem_backend *mem_bkend = NULL; ++ MALI_DEBUG_PRINT(2, ("mali devfreq table count %d\n", count)); + -+ MALI_DEBUG_ASSERT_POINTER(job); ++ dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]), ++ GFP_KERNEL); ++ if (!dp->freq_table) ++ return -ENOMEM; + -+ num_memory_cookies = mali_pp_job_num_memory_cookies(job); ++ for (i = 0; i < count; i++, freq++) { ++ opp = dev_pm_opp_find_freq_ceil(mdev->dev, &freq); ++ if (IS_ERR(opp)) ++ break; ++ dev_pm_opp_put(opp); + -+ session = mali_pp_job_get_session(job); ++ dp->freq_table[i] = freq; ++ MALI_DEBUG_PRINT(2, ("mali devfreq table array[%d] = %d\n", i, freq)); ++ } + -+ MALI_DEBUG_ASSERT_POINTER(session); ++ if (count != i) ++ MALI_PRINT_ERROR(("Unable to enumerate all OPPs (%d!=%d)\n", ++ count, i)); + -+ for (i = 0; i < num_memory_cookies; i++) { -+ u32 mali_addr = mali_pp_job_get_memory_cookie(job, i); -+ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0); -+ MALI_DEBUG_ASSERT(NULL != mali_vma_node); -+ mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node); -+ MALI_DEBUG_ASSERT(NULL != mali_alloc); -+ if (MALI_MEM_DMA_BUF != mali_alloc->type) { -+ continue; -+ } ++ dp->max_state = i; + -+ /* Get backend memory & Map on CPU */ -+ mutex_lock(&mali_idr_mutex); -+ mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle); -+ mutex_unlock(&mali_idr_mutex); -+ MALI_DEBUG_ASSERT(NULL != mem_bkend); ++ return 0; ++} + -+ mem = mem_bkend->dma_buf.attachment; ++static void mali_devfreq_term_freq_table(struct mali_device *mdev) ++{ ++ struct devfreq_dev_profile *dp = mdev->devfreq->profile; + -+ MALI_DEBUG_ASSERT_POINTER(mem); -+ MALI_DEBUG_ASSERT(mem->session == mali_pp_job_get_session(job)); -+ mali_dma_buf_unmap(mem_bkend->mali_allocation, mem); -+ } ++ kfree(dp->freq_table); ++ term_opps(mdev->dev); +} -+#endif /* !CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH */ + -+int mali_dma_buf_get_size(struct mali_session_data *session, _mali_uk_dma_buf_get_size_s __user *user_arg) ++static void mali_devfreq_exit(struct device *dev) +{ -+ _mali_uk_dma_buf_get_size_s args; -+ int fd; -+ struct dma_buf *buf; ++ struct mali_device *mdev = dev_get_drvdata(dev); + -+ /* get call arguments from user space. copy_from_user returns how many bytes which where NOT copied */ -+ if (0 != copy_from_user(&args, (void __user *)user_arg, sizeof(_mali_uk_dma_buf_get_size_s))) { -+ return -EFAULT; -+ } ++ mali_devfreq_term_freq_table(mdev); ++} + -+ /* Do DMA-BUF stuff */ -+ fd = args.mem_fd; ++int mali_devfreq_init(struct mali_device *mdev) ++{ ++ struct device_node *np = mdev->dev->of_node; ++#ifdef CONFIG_DEVFREQ_THERMAL ++ struct devfreq_cooling_power *callbacks = NULL; ++ _mali_osk_device_data data; ++#endif ++ struct devfreq_dev_profile *dp; ++ struct dev_pm_opp *opp; ++ unsigned long opp_rate; ++ unsigned int dyn_power_coeff = 0; ++ int err; + -+ buf = dma_buf_get(fd); -+ if (IS_ERR_OR_NULL(buf)) { -+ MALI_DEBUG_PRINT_ERROR(("Failed to get dma-buf from fd: %d\n", fd)); -+ return PTR_ERR_OR_ZERO(buf); -+ } ++ MALI_DEBUG_PRINT(2, ("Init Mali devfreq\n")); + -+ if (0 != put_user(buf->size, &user_arg->size)) { -+ dma_buf_put(buf); -+ return -EFAULT; -+ } ++ if (!mdev->clock) ++ return -ENODEV; + -+ dma_buf_put(buf); ++ mdev->current_freq = clk_get_rate(mdev->clock); + -+ return 0; -+} ++ dp = &mdev->devfreq_profile; + -+_mali_osk_errcode_t mali_mem_bind_dma_buf(mali_mem_allocation *alloc, -+ mali_mem_backend *mem_backend, -+ int fd, u32 flags) -+{ -+ struct dma_buf *buf; -+ struct mali_dma_buf_attachment *dma_mem; -+ struct mali_session_data *session = alloc->session; ++ dp->initial_freq = mdev->current_freq; ++ dp->polling_ms = 100; ++ dp->target = mali_devfreq_target; ++ dp->get_dev_status = mali_devfreq_status; ++ dp->get_cur_freq = mali_devfreq_cur_freq; ++ dp->exit = mali_devfreq_exit; + -+ MALI_DEBUG_ASSERT_POINTER(session); -+ MALI_DEBUG_ASSERT_POINTER(mem_backend); -+ MALI_DEBUG_ASSERT_POINTER(alloc); ++ if (mali_devfreq_init_freq_table(mdev, dp)) ++ return -EFAULT; + -+ /* get dma buffer */ -+ buf = dma_buf_get(fd); -+ if (IS_ERR_OR_NULL(buf)) { -+ return _MALI_OSK_ERR_FAULT; -+ } ++ of_property_read_u32(np, "upthreshold", ++ &ondemand_data.upthreshold); ++ of_property_read_u32(np, "downdifferential", ++ &ondemand_data.downdifferential); ++ of_property_read_u32(np, "dynamic-power-coefficient", ++ &dyn_power_coeff); ++ if (dyn_power_coeff) ++ dp->is_cooling_device = true; + -+ /* Currently, mapping of the full buffer are supported. */ -+ if (alloc->psize != buf->size) { -+ goto failed_alloc_mem; ++ mdev->devfreq = devfreq_add_device(mdev->dev, dp, ++ "simple_ondemand", &ondemand_data); ++ if (IS_ERR(mdev->devfreq)) { ++ mali_devfreq_term_freq_table(mdev); ++ return PTR_ERR(mdev->devfreq); + } + -+ dma_mem = _mali_osk_calloc(1, sizeof(struct mali_dma_buf_attachment)); -+ if (NULL == dma_mem) { -+ goto failed_alloc_mem; ++ err = devfreq_register_opp_notifier(mdev->dev, mdev->devfreq); ++ if (err) { ++ MALI_PRINT_ERROR(("Failed to register OPP notifier (%d)\n", err)); ++ goto opp_notifier_failed; + } + -+ dma_mem->buf = buf; -+ dma_mem->session = session; -+ dma_mem->map_ref = 0; -+ init_waitqueue_head(&dma_mem->wait_queue); ++ opp_rate = mdev->current_freq; ++ opp = devfreq_recommended_opp(mdev->dev, &opp_rate, 0); ++ if (!IS_ERR(opp)) ++ dev_pm_opp_put(opp); ++ mdev->devfreq->last_status.current_frequency = opp_rate; + -+ dma_mem->attachment = dma_buf_attach(dma_mem->buf, &mali_platform_device->dev); -+ if (NULL == dma_mem->attachment) { -+ goto failed_dma_attach; ++ mali_mdevp.data = mdev->devfreq; ++ mali_mdevp.opp_info = &mdev->opp_info; ++ mdev->mdev_info = rockchip_system_monitor_register(mdev->dev, ++ &mali_mdevp); ++ if (IS_ERR(mdev->mdev_info)) { ++ dev_dbg(mdev->dev, "without system monitor\n"); ++ mdev->mdev_info = NULL; + } ++#ifdef CONFIG_DEVFREQ_THERMAL ++ if (of_machine_is_compatible("rockchip,rk3036")) ++ return 0; + -+ mem_backend->dma_buf.attachment = dma_mem; ++ /* Initilization last_status it will be used when first power allocate called */ ++ mdev->devfreq->last_status.current_frequency = mdev->current_freq; + -+ alloc->flags |= MALI_MEM_FLAG_DONT_CPU_MAP; -+ if (flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) { -+ alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE; ++ if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) { ++ if (NULL != data.gpu_cooling_ops) { ++ callbacks = data.gpu_cooling_ops; ++ MALI_DEBUG_PRINT(2, ("Mali GPU Thermal: Callback handler installed \n")); ++ } + } + -+ -+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) -+ /* Map memory into session's Mali virtual address space. */ -+ if (0 != mali_dma_buf_map(mem_backend)) { -+ goto Failed_dma_map; ++ if (callbacks && !dp->is_cooling_device) { ++ mdev->devfreq_cooling = devfreq_cooling_em_register( ++ mdev->devfreq, ++ callbacks); ++ if (IS_ERR_OR_NULL(mdev->devfreq_cooling)) { ++ err = PTR_ERR(mdev->devfreq_cooling); ++ MALI_PRINT_ERROR(("Failed to register cooling device (%d)\n", err)); ++ goto cooling_failed; ++ } else { ++ MALI_DEBUG_PRINT(2, ("Mali GPU Thermal Cooling installed \n")); ++ } + } +#endif + -+ return _MALI_OSK_ERR_OK; ++ return 0; + -+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) -+Failed_dma_map: -+ mali_dma_buf_unmap(alloc, dma_mem); -+#endif -+ /* Wait for buffer to become unmapped */ -+ wait_event(dma_mem->wait_queue, !dma_mem->is_mapped); -+ MALI_DEBUG_ASSERT(!dma_mem->is_mapped); -+ dma_buf_detach(dma_mem->buf, dma_mem->attachment); -+failed_dma_attach: -+ _mali_osk_free(dma_mem); -+failed_alloc_mem: -+ dma_buf_put(buf); -+ return _MALI_OSK_ERR_FAULT; ++#ifdef CONFIG_DEVFREQ_THERMAL ++cooling_failed: ++ devfreq_unregister_opp_notifier(mdev->dev, mdev->devfreq); ++#endif /* CONFIG_DEVFREQ_THERMAL */ ++opp_notifier_failed: ++ err = devfreq_remove_device(mdev->devfreq); ++ if (err) ++ MALI_PRINT_ERROR(("Failed to terminate devfreq (%d)\n", err)); ++ else ++ mdev->devfreq = NULL; ++ ++ return err; +} + -+void mali_mem_unbind_dma_buf(mali_mem_backend *mem_backend) ++void mali_devfreq_term(struct mali_device *mdev) +{ -+ struct mali_dma_buf_attachment *mem; -+ MALI_DEBUG_ASSERT_POINTER(mem_backend); -+ MALI_DEBUG_ASSERT(MALI_MEM_DMA_BUF == mem_backend->type); ++ int err; + -+ mem = mem_backend->dma_buf.attachment; -+ MALI_DEBUG_ASSERT_POINTER(mem); -+ MALI_DEBUG_ASSERT_POINTER(mem->attachment); -+ MALI_DEBUG_ASSERT_POINTER(mem->buf); -+ MALI_DEBUG_PRINT(3, ("Mali DMA-buf: release attachment %p\n", mem)); ++ MALI_DEBUG_PRINT(2, ("Term Mali devfreq\n")); + -+#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) -+ MALI_DEBUG_ASSERT_POINTER(mem_backend->mali_allocation); -+ /* We mapped implicitly on attach, so we need to unmap on release */ -+ mali_dma_buf_unmap(mem_backend->mali_allocation, mem); ++ rockchip_system_monitor_unregister(mdev->mdev_info); ++#ifdef CONFIG_DEVFREQ_THERMAL ++ if (!IS_ERR_OR_NULL(mdev->devfreq_cooling)) ++ devfreq_cooling_unregister(mdev->devfreq_cooling); +#endif -+ /* Wait for buffer to become unmapped */ -+ wait_event(mem->wait_queue, !mem->is_mapped); -+ MALI_DEBUG_ASSERT(!mem->is_mapped); + -+ dma_buf_detach(mem->buf, mem->attachment); -+ dma_buf_put(mem->buf); ++ devfreq_unregister_opp_notifier(mdev->dev, mdev->devfreq); + -+ _mali_osk_free(mem); ++ err = devfreq_remove_device(mdev->devfreq); ++ if (err) ++ MALI_PRINT_ERROR(("Failed to terminate devfreq (%d)\n", err)); ++ else ++ mdev->devfreq = NULL; +} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_dma_buf.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_dma_buf.h +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_devfreq.h b/drivers/gpu/arm/mali400/mali/linux/mali_devfreq.h new file mode 100644 -index 000000000..a9b287038 +index 000000000..ba7c017d8 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_dma_buf.h -@@ -0,0 +1,53 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_devfreq.h +@@ -0,0 +1,17 @@ +/* + * Copyright (C) 2011-2017 ARM Limited. All rights reserved. + * @@ -313740,58 +315755,22 @@ index 000000000..a9b287038 + * A copy of the licence is included with the program, and can also be obtained from Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ ++#ifndef _MALI_DEVFREQ_H_ ++#define _MALI_DEVFREQ_H_ + -+#ifndef __MALI_MEMORY_DMA_BUF_H__ -+#define __MALI_MEMORY_DMA_BUF_H__ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#include "mali_uk_types.h" -+#include "mali_osk.h" -+#include "mali_memory.h" -+ -+struct mali_pp_job; -+ -+struct mali_dma_buf_attachment; -+struct mali_dma_buf_attachment { -+ struct dma_buf *buf; -+ struct dma_buf_attachment *attachment; -+ struct sg_table *sgt; -+ struct mali_session_data *session; -+ int map_ref; -+ struct mutex map_lock; -+ mali_bool is_mapped; -+ wait_queue_head_t wait_queue; -+}; -+ -+int mali_dma_buf_get_size(struct mali_session_data *session, _mali_uk_dma_buf_get_size_s __user *arg); -+ -+void mali_mem_unbind_dma_buf(mali_mem_backend *mem_backend); -+ -+_mali_osk_errcode_t mali_mem_bind_dma_buf(mali_mem_allocation *alloc, -+ mali_mem_backend *mem_backend, -+ int fd, u32 flags); ++int mali_devfreq_init(struct mali_device *mdev); + -+#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) -+int mali_dma_buf_map_job(struct mali_pp_job *job); -+void mali_dma_buf_unmap_job(struct mali_pp_job *job); -+#endif ++void mali_devfreq_term(struct mali_device *mdev); + -+#ifdef __cplusplus -+} +#endif -+ -+#endif /* __MALI_MEMORY_DMA_BUF_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_external.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_external.c +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_device_pause_resume.c b/drivers/gpu/arm/mali400/mali/linux/mali_device_pause_resume.c new file mode 100644 -index 000000000..76018b7ab +index 000000000..95c3ea12d --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_external.c -@@ -0,0 +1,89 @@ -+/* -+ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_device_pause_resume.c +@@ -0,0 +1,36 @@ ++/** ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -313800,128 +315779,40 @@ index 000000000..76018b7ab + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#include "mali_kernel_common.h" -+#include "mali_osk.h" -+#include "mali_ukk.h" -+#include "mali_memory.h" -+#include "mali_mem_validation.h" -+#include "mali_uk_types.h" -+ -+void mali_mem_unbind_ext_buf(mali_mem_backend *mem_backend) -+{ -+ mali_mem_allocation *alloc; -+ struct mali_session_data *session; -+ MALI_DEBUG_ASSERT_POINTER(mem_backend); -+ alloc = mem_backend->mali_allocation; -+ MALI_DEBUG_ASSERT_POINTER(alloc); -+ MALI_DEBUG_ASSERT(MALI_MEM_EXTERNAL == mem_backend->type); ++/** ++ * @file mali_device_pause_resume.c ++ * Implementation of the Mali pause/resume functionality ++ */ + -+ session = alloc->session; -+ MALI_DEBUG_ASSERT_POINTER(session); -+ mali_session_memory_lock(session); -+ mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start, -+ alloc->flags); -+ mali_session_memory_unlock(session); -+} ++#include ++#include ++#include "mali_pm.h" + -+_mali_osk_errcode_t mali_mem_bind_ext_buf(mali_mem_allocation *alloc, -+ mali_mem_backend *mem_backend, -+ u32 phys_addr, -+ u32 flag) ++void mali_dev_pause(void) +{ -+ struct mali_session_data *session; -+ _mali_osk_errcode_t err; -+ u32 virt, phys, size; -+ MALI_DEBUG_ASSERT_POINTER(mem_backend); -+ MALI_DEBUG_ASSERT_POINTER(alloc); -+ size = alloc->psize; -+ session = (struct mali_session_data *)(uintptr_t)alloc->session; -+ MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS); -+ -+ /* check arguments */ -+ /* NULL might be a valid Mali address */ -+ if (!size) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS); -+ -+ /* size must be a multiple of the system page size */ -+ if (size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS); -+ -+ /* Validate the mali physical range */ -+ if (_MALI_OSK_ERR_OK != mali_mem_validation_check(phys_addr, size)) { -+ return _MALI_OSK_ERR_FAULT; -+ } -+ -+ if (flag & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) { -+ alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE; -+ } -+ -+ mali_session_memory_lock(session); -+ -+ virt = alloc->mali_vma_node.vm_node.start; -+ phys = phys_addr; -+ -+ err = mali_mem_mali_map_prepare(alloc); -+ if (_MALI_OSK_ERR_OK != err) { -+ mali_session_memory_unlock(session); -+ return _MALI_OSK_ERR_NOMEM; -+ } -+ -+ mali_mmu_pagedir_update(session->page_directory, virt, phys, size, MALI_MMU_FLAGS_DEFAULT); -+ -+ if (alloc->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) { -+ mali_mmu_pagedir_update(session->page_directory, virt + size, phys, _MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT); -+ } -+ MALI_DEBUG_PRINT(3, -+ ("Requested to map physical memory 0x%x-0x%x into virtual memory 0x%x\n", -+ phys_addr, (phys_addr + size - 1), -+ virt)); -+ mali_session_memory_unlock(session); -+ -+ MALI_SUCCESS; ++ /* ++ * Deactive all groups to prevent hardware being touched ++ * during the period of mali device pausing ++ */ ++ mali_pm_os_suspend(MALI_FALSE); +} + -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_external.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_external.h -new file mode 100644 -index 000000000..2db178d96 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_external.h -@@ -0,0 +1,29 @@ -+ -+/* -+ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+#ifndef __MALI_MEMORY_EXTERNAL_H__ -+#define __MALI_MEMORY_EXTERNAL_H__ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+_mali_osk_errcode_t mali_mem_bind_ext_buf(mali_mem_allocation *alloc, -+ mali_mem_backend *mem_backend, -+ u32 phys_addr, -+ u32 flag); -+void mali_mem_unbind_ext_buf(mali_mem_backend *mem_backend); ++EXPORT_SYMBOL(mali_dev_pause); + -+#ifdef __cplusplus ++void mali_dev_resume(void) ++{ ++ mali_pm_os_resume(); +} -+#endif + -+#endif -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_manager.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_manager.c ++EXPORT_SYMBOL(mali_dev_resume); +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_dma_fence.c b/drivers/gpu/arm/mali400/mali/linux/mali_dma_fence.c new file mode 100644 -index 000000000..6c0a2e9a8 +index 000000000..e026e11e4 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_manager.c -@@ -0,0 +1,992 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_dma_fence.c +@@ -0,0 +1,439 @@ +/* -+ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -313929,1870 +315820,1363 @@ index 000000000..6c0a2e9a8 + * A copy of the licence is included with the program, and can also be obtained from Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ -+ -+#include -+#include -+#include -+#include -+#include -+#include +#include -+#include -+ -+#include -+#if defined(CONFIG_DMA_SHARED_BUFFER) -+#include -+#endif -+#include -+ +#include "mali_osk.h" -+#include "mali_osk_mali.h" -+#include "mali_kernel_linux.h" -+#include "mali_scheduler.h" -+#include "mali_memory.h" -+#include "mali_memory_os_alloc.h" -+#if defined(CONFIG_DMA_SHARED_BUFFER) -+#include "mali_memory_dma_buf.h" -+#include "mali_memory_secure.h" -+#endif -+#if defined(CONFIG_MALI400_UMP) -+#include "mali_memory_ump.h" -+#endif -+#include "mali_memory_manager.h" -+#include "mali_memory_virtual.h" -+#include "mali_memory_util.h" -+#include "mali_memory_external.h" -+#include "mali_memory_cow.h" -+#include "mali_memory_block_alloc.h" -+#include "mali_ukk.h" -+#include "mali_memory_swap_alloc.h" ++#include "mali_kernel_common.h" + -+/* -+* New memory system interface -+*/ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) ++#include "mali_dma_fence.h" ++#include ++#include ++#endif + -+/*inti idr for backend memory */ -+struct idr mali_backend_idr; -+struct mutex mali_idr_mutex; ++static DEFINE_SPINLOCK(mali_dma_fence_lock); + -+/* init allocation manager */ -+int mali_memory_manager_init(struct mali_allocation_manager *mgr) ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++static bool mali_dma_fence_enable_signaling(struct dma_fence *fence) +{ -+ /* init Locks */ -+ rwlock_init(&mgr->vm_lock); -+ mutex_init(&mgr->list_mutex); -+ -+ /* init link */ -+ INIT_LIST_HEAD(&mgr->head); -+ -+ /* init RB tree */ -+ mgr->allocation_mgr_rb = RB_ROOT; -+ mgr->mali_allocation_num = 0; -+ return 0; ++ MALI_IGNORE(fence); ++ return true; +} + -+/* Deinit allocation manager -+* Do some check for debug -+*/ -+void mali_memory_manager_uninit(struct mali_allocation_manager *mgr) ++static const char *mali_dma_fence_get_driver_name(struct dma_fence *fence) +{ -+ /* check RB tree is empty */ -+ MALI_DEBUG_ASSERT(((void *)(mgr->allocation_mgr_rb.rb_node) == (void *)rb_last(&mgr->allocation_mgr_rb))); -+ /* check allocation List */ -+ MALI_DEBUG_ASSERT(list_empty(&mgr->head)); ++ MALI_IGNORE(fence); ++ return "mali"; +} + -+/* Prepare memory descriptor */ -+static mali_mem_allocation *mali_mem_allocation_struct_create(struct mali_session_data *session) ++static const char *mali_dma_fence_get_timeline_name(struct dma_fence *fence) +{ -+ mali_mem_allocation *mali_allocation; -+ -+ /* Allocate memory */ -+ mali_allocation = (mali_mem_allocation *)kzalloc(sizeof(mali_mem_allocation), GFP_KERNEL); -+ if (NULL == mali_allocation) { -+ MALI_DEBUG_PRINT(1, ("mali_mem_allocation_struct_create: descriptor was NULL\n")); -+ return NULL; -+ } -+ -+ MALI_DEBUG_CODE(mali_allocation->magic = MALI_MEM_ALLOCATION_VALID_MAGIC); -+ -+ /* do init */ -+ mali_allocation->flags = 0; -+ mali_allocation->session = session; -+ -+ INIT_LIST_HEAD(&mali_allocation->list); -+ _mali_osk_atomic_init(&mali_allocation->mem_alloc_refcount, 1); -+ -+ /** -+ *add to session list -+ */ -+ mutex_lock(&session->allocation_mgr.list_mutex); -+ list_add_tail(&mali_allocation->list, &session->allocation_mgr.head); -+ session->allocation_mgr.mali_allocation_num++; -+ mutex_unlock(&session->allocation_mgr.list_mutex); ++ MALI_IGNORE(fence); ++ return "mali_dma_fence"; ++} + -+ return mali_allocation; ++static const struct dma_fence_ops mali_dma_fence_ops = { ++ .get_driver_name = mali_dma_fence_get_driver_name, ++ .get_timeline_name = mali_dma_fence_get_timeline_name, ++ .enable_signaling = mali_dma_fence_enable_signaling, ++ .signaled = NULL, ++ .wait = dma_fence_default_wait, ++ .release = NULL ++}; ++#else ++static bool mali_dma_fence_enable_signaling(struct fence *fence) ++{ ++ MALI_IGNORE(fence); ++ return true; +} + -+void mali_mem_allocation_struct_destory(mali_mem_allocation *alloc) ++static const char *mali_dma_fence_get_driver_name(struct fence *fence) +{ -+ MALI_DEBUG_ASSERT_POINTER(alloc); -+ MALI_DEBUG_ASSERT_POINTER(alloc->session); -+ mutex_lock(&alloc->session->allocation_mgr.list_mutex); -+ list_del(&alloc->list); -+ alloc->session->allocation_mgr.mali_allocation_num--; -+ mutex_unlock(&alloc->session->allocation_mgr.list_mutex); ++ MALI_IGNORE(fence); ++ return "mali"; ++} + -+ kfree(alloc); ++static const char *mali_dma_fence_get_timeline_name(struct fence *fence) ++{ ++ MALI_IGNORE(fence); ++ return "mali_dma_fence"; +} + -+int mali_mem_backend_struct_create(mali_mem_backend **backend, u32 psize) ++static const struct fence_ops mali_dma_fence_ops = { ++ .get_driver_name = mali_dma_fence_get_driver_name, ++ .get_timeline_name = mali_dma_fence_get_timeline_name, ++ .enable_signaling = mali_dma_fence_enable_signaling, ++ .signaled = NULL, ++ .wait = fence_default_wait, ++ .release = NULL ++}; ++#endif ++ ++static void mali_dma_fence_context_cleanup(struct mali_dma_fence_context *dma_fence_context) +{ -+ mali_mem_backend *mem_backend = NULL; -+ s32 ret = -ENOSPC; -+ s32 index = -1; -+ *backend = (mali_mem_backend *)kzalloc(sizeof(mali_mem_backend), GFP_KERNEL); -+ if (NULL == *backend) { -+ MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_create: backend descriptor was NULL\n")); -+ return -1; -+ } -+ mem_backend = *backend; -+ mem_backend->size = psize; -+ mutex_init(&mem_backend->mutex); -+ INIT_LIST_HEAD(&mem_backend->list); -+ mem_backend->using_count = 0; ++ u32 i; + ++ MALI_DEBUG_ASSERT_POINTER(dma_fence_context); + -+ /* link backend with id */ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) -+again: -+ if (!idr_pre_get(&mali_backend_idr, GFP_KERNEL)) { -+ kfree(mem_backend); -+ return -ENOMEM; -+ } -+ mutex_lock(&mali_idr_mutex); -+ ret = idr_get_new_above(&mali_backend_idr, mem_backend, 1, &index); -+ mutex_unlock(&mali_idr_mutex); ++ for (i = 0; i < dma_fence_context->num_dma_fence_waiter; i++) { ++ if (dma_fence_context->mali_dma_fence_waiters[i]) { ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ dma_fence_remove_callback(dma_fence_context->mali_dma_fence_waiters[i]->fence, ++ &dma_fence_context->mali_dma_fence_waiters[i]->base); ++ dma_fence_put(dma_fence_context->mali_dma_fence_waiters[i]->fence); + -+ if (-ENOSPC == ret) { -+ kfree(mem_backend); -+ return -ENOSPC; -+ } -+ if (-EAGAIN == ret) -+ goto again; +#else -+ mutex_lock(&mali_idr_mutex); -+ ret = idr_alloc(&mali_backend_idr, mem_backend, 1, MALI_S32_MAX, GFP_KERNEL); -+ mutex_unlock(&mali_idr_mutex); -+ index = ret; -+ if (ret < 0) { -+ MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_create: Can't allocate idr for backend! \n")); -+ kfree(mem_backend); -+ return -ENOSPC; -+ } ++ fence_remove_callback(dma_fence_context->mali_dma_fence_waiters[i]->fence, ++ &dma_fence_context->mali_dma_fence_waiters[i]->base); ++ fence_put(dma_fence_context->mali_dma_fence_waiters[i]->fence); +#endif -+ return index; -+} -+ ++ kfree(dma_fence_context->mali_dma_fence_waiters[i]); ++ dma_fence_context->mali_dma_fence_waiters[i] = NULL; ++ } ++ } + -+static void mali_mem_backend_struct_destory(mali_mem_backend **backend, s32 backend_handle) -+{ -+ mali_mem_backend *mem_backend = *backend; ++ if (NULL != dma_fence_context->mali_dma_fence_waiters) ++ kfree(dma_fence_context->mali_dma_fence_waiters); + -+ mutex_lock(&mali_idr_mutex); -+ idr_remove(&mali_backend_idr, backend_handle); -+ mutex_unlock(&mali_idr_mutex); -+ kfree(mem_backend); -+ *backend = NULL; ++ dma_fence_context->mali_dma_fence_waiters = NULL; ++ dma_fence_context->num_dma_fence_waiter = 0; +} + -+mali_mem_backend *mali_mem_backend_struct_search(struct mali_session_data *session, u32 mali_address) ++static void mali_dma_fence_context_work_func(struct work_struct *work_handle) +{ -+ struct mali_vma_node *mali_vma_node = NULL; -+ mali_mem_backend *mem_bkend = NULL; -+ mali_mem_allocation *mali_alloc = NULL; -+ MALI_DEBUG_ASSERT_POINTER(session); -+ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_address, 0); -+ if (NULL == mali_vma_node) { -+ MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_search:vma node was NULL\n")); -+ return NULL; -+ } -+ mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node); -+ /* Get backend memory & Map on CPU */ -+ mutex_lock(&mali_idr_mutex); -+ mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle); -+ mutex_unlock(&mali_idr_mutex); -+ MALI_DEBUG_ASSERT(NULL != mem_bkend); -+ return mem_bkend; ++ struct mali_dma_fence_context *dma_fence_context; ++ ++ MALI_DEBUG_ASSERT_POINTER(work_handle); ++ ++ dma_fence_context = container_of(work_handle, struct mali_dma_fence_context, work_handle); ++ ++ dma_fence_context->cb_func(dma_fence_context->pp_job_ptr); +} + -+static _mali_osk_errcode_t mali_mem_resize(struct mali_session_data *session, mali_mem_backend *mem_backend, u32 physical_size) ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++static void mali_dma_fence_callback(struct dma_fence *fence, struct dma_fence_cb *cb) ++#else ++static void mali_dma_fence_callback(struct fence *fence, struct fence_cb *cb) ++#endif +{ -+ _mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT; -+ int retval = 0; -+ mali_mem_allocation *mali_allocation = NULL; -+ mali_mem_os_mem tmp_os_mem; -+ s32 change_page_count; ++ struct mali_dma_fence_waiter *dma_fence_waiter = NULL; ++ struct mali_dma_fence_context *dma_fence_context = NULL; + -+ MALI_DEBUG_ASSERT_POINTER(session); -+ MALI_DEBUG_ASSERT_POINTER(mem_backend); -+ MALI_DEBUG_PRINT(4, (" mali_mem_resize_memory called! \n")); -+ MALI_DEBUG_ASSERT(0 == physical_size % MALI_MMU_PAGE_SIZE); ++ MALI_DEBUG_ASSERT_POINTER(fence); ++ MALI_DEBUG_ASSERT_POINTER(cb); + -+ mali_allocation = mem_backend->mali_allocation; -+ MALI_DEBUG_ASSERT_POINTER(mali_allocation); ++ MALI_IGNORE(fence); + -+ MALI_DEBUG_ASSERT(MALI_MEM_FLAG_CAN_RESIZE & mali_allocation->flags); -+ MALI_DEBUG_ASSERT(MALI_MEM_OS == mali_allocation->type); ++ dma_fence_waiter = container_of(cb, struct mali_dma_fence_waiter, base); ++ dma_fence_context = dma_fence_waiter->parent; + -+ mutex_lock(&mem_backend->mutex); ++ MALI_DEBUG_ASSERT_POINTER(dma_fence_context); + -+ /* Do resize*/ -+ if (physical_size > mem_backend->size) { -+ u32 add_size = physical_size - mem_backend->size; ++ if (atomic_dec_and_test(&dma_fence_context->count)) ++ schedule_work(&dma_fence_context->work_handle); ++} + -+ MALI_DEBUG_ASSERT(0 == add_size % MALI_MMU_PAGE_SIZE); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++static _mali_osk_errcode_t mali_dma_fence_add_callback(struct mali_dma_fence_context *dma_fence_context, struct dma_fence *fence) ++#else ++static _mali_osk_errcode_t mali_dma_fence_add_callback(struct mali_dma_fence_context *dma_fence_context, struct fence *fence) ++#endif ++{ ++ int ret = 0; ++ struct mali_dma_fence_waiter *dma_fence_waiter; ++ struct mali_dma_fence_waiter **dma_fence_waiters; + -+ /* Allocate new pages from os mem */ -+ retval = mali_mem_os_alloc_pages(&tmp_os_mem, add_size); ++ MALI_DEBUG_ASSERT_POINTER(dma_fence_context); ++ MALI_DEBUG_ASSERT_POINTER(fence); + -+ if (retval) { -+ if (-ENOMEM == retval) { -+ ret = _MALI_OSK_ERR_NOMEM; -+ } else { -+ ret = _MALI_OSK_ERR_FAULT; -+ } -+ MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory allocation failed !\n")); -+ goto failed_alloc_memory; -+ } ++ dma_fence_waiters = krealloc(dma_fence_context->mali_dma_fence_waiters, ++ (dma_fence_context->num_dma_fence_waiter + 1) ++ * sizeof(struct mali_dma_fence_waiter *), ++ GFP_KERNEL); + -+ MALI_DEBUG_ASSERT(tmp_os_mem.count == add_size / MALI_MMU_PAGE_SIZE); ++ if (NULL == dma_fence_waiters) { ++ MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to realloc the dma fence waiters.\n")); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+ /* Resize the memory of the backend */ -+ ret = mali_mem_os_resize_pages(&tmp_os_mem, &mem_backend->os_mem, 0, tmp_os_mem.count); ++ dma_fence_context->mali_dma_fence_waiters = dma_fence_waiters; + -+ if (ret) { -+ MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory resizing failed !\n")); -+ goto failed_resize_pages; -+ } ++ dma_fence_waiter = kzalloc(sizeof(struct mali_dma_fence_waiter), GFP_KERNEL); + -+ /*Resize cpu mapping */ -+ if (NULL != mali_allocation->cpu_mapping.vma) { -+ ret = mali_mem_os_resize_cpu_map_locked(mem_backend, mali_allocation->cpu_mapping.vma, mali_allocation->cpu_mapping.vma->vm_start + mem_backend->size, add_size); -+ if (unlikely(ret != _MALI_OSK_ERR_OK)) { -+ MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: cpu mapping failed !\n")); -+ goto failed_cpu_map; -+ } ++ if (NULL == dma_fence_waiter) { ++ MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to create mali dma fence waiter.\n")); ++ return _MALI_OSK_ERR_NOMEM; ++ } ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ dma_fence_get(fence); ++#else ++ fence_get(fence); ++#endif ++ dma_fence_waiter->fence = fence; ++ dma_fence_waiter->parent = dma_fence_context; ++ atomic_inc(&dma_fence_context->count); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ ret = dma_fence_add_callback(fence, &dma_fence_waiter->base, ++ mali_dma_fence_callback); ++#else ++ ret = fence_add_callback(fence, &dma_fence_waiter->base, ++ mali_dma_fence_callback); ++#endif ++ if (0 > ret) { ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ dma_fence_put(fence); ++#else ++ fence_put(fence); ++#endif ++ kfree(dma_fence_waiter); ++ atomic_dec(&dma_fence_context->count); ++ if (-ENOENT == ret) { ++ /*-ENOENT if fence has already been signaled, return _MALI_OSK_ERR_OK*/ ++ return _MALI_OSK_ERR_OK; + } ++ /* Failed to add the fence callback into fence, return _MALI_OSK_ERR_FAULT*/ ++ MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to add callback into fence.\n")); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ /* Resize mali mapping */ -+ _mali_osk_mutex_wait(session->memory_lock); -+ ret = mali_mem_mali_map_resize(mali_allocation, physical_size); ++ dma_fence_context->mali_dma_fence_waiters[dma_fence_context->num_dma_fence_waiter] = dma_fence_waiter; ++ dma_fence_context->num_dma_fence_waiter++; + -+ if (ret) { -+ MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_resize: mali map resize fail !\n")); -+ goto failed_gpu_map; -+ } ++ return _MALI_OSK_ERR_OK; ++} + -+ ret = mali_mem_os_mali_map(&mem_backend->os_mem, session, mali_allocation->mali_vma_node.vm_node.start, -+ mali_allocation->psize / MALI_MMU_PAGE_SIZE, add_size / MALI_MMU_PAGE_SIZE, mali_allocation->mali_mapping.properties); -+ if (ret) { -+ MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: mali mapping failed !\n")); -+ goto failed_gpu_map; -+ } + -+ _mali_osk_mutex_signal(session->memory_lock); -+ } else { -+ u32 dec_size, page_count; -+ u32 vaddr = 0; -+ INIT_LIST_HEAD(&tmp_os_mem.pages); -+ tmp_os_mem.count = 0; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++struct dma_fence *mali_dma_fence_new(u32 context, u32 seqno) ++#else ++struct fence *mali_dma_fence_new(u32 context, u32 seqno) ++#endif ++{ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ struct dma_fence *fence = NULL; ++ fence = kzalloc(sizeof(struct dma_fence), GFP_KERNEL); ++#else ++ struct fence *fence = NULL; ++ fence = kzalloc(sizeof(struct fence), GFP_KERNEL); ++#endif ++ if (NULL == fence) { ++ MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to create dma fence.\n")); ++ return fence; ++ } ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ dma_fence_init(fence, ++ &mali_dma_fence_ops, ++ &mali_dma_fence_lock, ++ context, seqno); ++#else ++ fence_init(fence, ++ &mali_dma_fence_ops, ++ &mali_dma_fence_lock, ++ context, seqno); ++#endif ++ return fence; ++} + -+ dec_size = mem_backend->size - physical_size; -+ MALI_DEBUG_ASSERT(0 == dec_size % MALI_MMU_PAGE_SIZE); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++void mali_dma_fence_signal_and_put(struct dma_fence **fence) ++#else ++void mali_dma_fence_signal_and_put(struct fence **fence) ++#endif ++{ ++ MALI_DEBUG_ASSERT_POINTER(fence); ++ MALI_DEBUG_ASSERT_POINTER(*fence); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ dma_fence_signal(*fence); ++ dma_fence_put(*fence); ++#else ++ fence_signal(*fence); ++ fence_put(*fence); ++#endif ++ *fence = NULL; ++} + -+ page_count = dec_size / MALI_MMU_PAGE_SIZE; -+ vaddr = mali_allocation->mali_vma_node.vm_node.start + physical_size; ++void mali_dma_fence_context_init(struct mali_dma_fence_context *dma_fence_context, ++ mali_dma_fence_context_callback_func_t cb_func, ++ void *pp_job_ptr) ++{ ++ MALI_DEBUG_ASSERT_POINTER(dma_fence_context); + -+ /* Resize the memory of the backend */ -+ ret = mali_mem_os_resize_pages(&mem_backend->os_mem, &tmp_os_mem, physical_size / MALI_MMU_PAGE_SIZE, page_count); ++ INIT_WORK(&dma_fence_context->work_handle, mali_dma_fence_context_work_func); ++ atomic_set(&dma_fence_context->count, 1); ++ dma_fence_context->num_dma_fence_waiter = 0; ++ dma_fence_context->mali_dma_fence_waiters = NULL; ++ dma_fence_context->cb_func = cb_func; ++ dma_fence_context->pp_job_ptr = pp_job_ptr; ++} + -+ if (ret) { -+ MALI_DEBUG_PRINT(4, ("_mali_ukk_mem_resize: mali map resize failed!\n")); -+ goto failed_resize_pages; -+ } ++_mali_osk_errcode_t mali_dma_fence_context_add_waiters(struct mali_dma_fence_context *dma_fence_context, ++ struct reservation_object *dma_reservation_object) ++{ ++ _mali_osk_errcode_t ret = _MALI_OSK_ERR_OK; ++ u32 shared_count = 0, i; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ struct dma_fence *exclusive_fence = NULL; ++ struct dma_fence **shared_fences = NULL; ++#else ++ struct fence *exclusive_fence = NULL; ++ struct fence **shared_fences = NULL; ++#endif ++ MALI_DEBUG_ASSERT_POINTER(dma_fence_context); ++ MALI_DEBUG_ASSERT_POINTER(dma_reservation_object); + -+ /* Resize mali map */ -+ _mali_osk_mutex_wait(session->memory_lock); -+ mali_mem_mali_map_free(session, dec_size, vaddr, mali_allocation->flags); -+ _mali_osk_mutex_signal(session->memory_lock); ++ /* Get all the shared/exclusive fences in the reservation object of dma buf*/ ++ ret = reservation_object_get_fences_rcu(dma_reservation_object, &exclusive_fence, ++ &shared_count, &shared_fences); ++ if (ret < 0) { ++ MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to get shared or exclusive_fence dma fences from the reservation object of dma buf.\n")); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ /* Zap cpu mapping */ -+ if (0 != mali_allocation->cpu_mapping.addr) { -+ MALI_DEBUG_ASSERT(NULL != mali_allocation->cpu_mapping.vma); -+ zap_vma_ptes(mali_allocation->cpu_mapping.vma, mali_allocation->cpu_mapping.vma->vm_start + physical_size, dec_size); ++ if (exclusive_fence) { ++ ret = mali_dma_fence_add_callback(dma_fence_context, exclusive_fence); ++ if (_MALI_OSK_ERR_OK != ret) { ++ MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to add callback into exclusive fence.\n")); ++ mali_dma_fence_context_cleanup(dma_fence_context); ++ goto ended; + } -+ -+ /* Free those extra pages */ -+ mali_mem_os_free(&tmp_os_mem.pages, tmp_os_mem.count, MALI_FALSE); + } + -+ /* Resize memory allocation and memory backend */ -+ change_page_count = (s32)(physical_size - mem_backend->size) / MALI_MMU_PAGE_SIZE; -+ mali_allocation->psize = physical_size; -+ mem_backend->size = physical_size; -+ mutex_unlock(&mem_backend->mutex); + -+ if (change_page_count > 0) { -+ atomic_add(change_page_count, &session->mali_mem_allocated_pages); -+ if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) { -+ session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE; ++ for (i = 0; i < shared_count; i++) { ++ ret = mali_dma_fence_add_callback(dma_fence_context, shared_fences[i]); ++ if (_MALI_OSK_ERR_OK != ret) { ++ MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to add callback into shared fence [%d].\n", i)); ++ mali_dma_fence_context_cleanup(dma_fence_context); ++ break; + } -+ -+ } else { -+ atomic_sub((s32)(-change_page_count), &session->mali_mem_allocated_pages); + } + -+ return _MALI_OSK_ERR_OK; ++ended: + -+failed_gpu_map: -+ _mali_osk_mutex_signal(session->memory_lock); -+failed_cpu_map: -+ if (physical_size > mem_backend->size) { -+ mali_mem_os_resize_pages(&mem_backend->os_mem, &tmp_os_mem, mem_backend->size / MALI_MMU_PAGE_SIZE, -+ (physical_size - mem_backend->size) / MALI_MMU_PAGE_SIZE); -+ } else { -+ mali_mem_os_resize_pages(&tmp_os_mem, &mem_backend->os_mem, 0, tmp_os_mem.count); ++ if (exclusive_fence) ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ dma_fence_put(exclusive_fence); ++#else ++ fence_put(exclusive_fence); ++#endif ++ ++ if (shared_fences) { ++ for (i = 0; i < shared_count; i++) { ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ dma_fence_put(shared_fences[i]); ++#else ++ fence_put(shared_fences[i]); ++#endif ++ } ++ kfree(shared_fences); + } -+failed_resize_pages: -+ if (0 != tmp_os_mem.count) -+ mali_mem_os_free(&tmp_os_mem.pages, tmp_os_mem.count, MALI_FALSE); -+failed_alloc_memory: + -+ mutex_unlock(&mem_backend->mutex); + return ret; +} + + -+/* Set GPU MMU properties */ -+static void _mali_memory_gpu_map_property_set(u32 *properties, u32 flags) ++void mali_dma_fence_context_term(struct mali_dma_fence_context *dma_fence_context) +{ -+ if (_MALI_MEMORY_GPU_READ_ALLOCATE & flags) { -+ *properties = MALI_MMU_FLAGS_FORCE_GP_READ_ALLOCATE; -+ } else { -+ *properties = MALI_MMU_FLAGS_DEFAULT; ++ MALI_DEBUG_ASSERT_POINTER(dma_fence_context); ++ atomic_set(&dma_fence_context->count, 0); ++ if (dma_fence_context->work_handle.func) { ++ cancel_work_sync(&dma_fence_context->work_handle); + } ++ mali_dma_fence_context_cleanup(dma_fence_context); +} + -+_mali_osk_errcode_t mali_mem_add_mem_size(struct mali_session_data *session, u32 mali_addr, u32 add_size) ++void mali_dma_fence_context_dec_count(struct mali_dma_fence_context *dma_fence_context) +{ -+ mali_mem_backend *mem_backend = NULL; -+ _mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT; -+ mali_mem_allocation *mali_allocation = NULL; -+ u32 new_physical_size; -+ MALI_DEBUG_ASSERT_POINTER(session); -+ MALI_DEBUG_ASSERT(0 == add_size % MALI_MMU_PAGE_SIZE); -+ -+ /* Get the memory backend that need to be resize. */ -+ mem_backend = mali_mem_backend_struct_search(session, mali_addr); -+ -+ if (NULL == mem_backend) { -+ MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory backend = NULL!\n")); -+ return ret; -+ } -+ -+ mali_allocation = mem_backend->mali_allocation; ++ MALI_DEBUG_ASSERT_POINTER(dma_fence_context); + -+ MALI_DEBUG_ASSERT_POINTER(mali_allocation); ++ if (atomic_dec_and_test(&dma_fence_context->count)) ++ schedule_work(&dma_fence_context->work_handle); ++} + -+ new_physical_size = add_size + mem_backend->size; + -+ if (new_physical_size > (mali_allocation->mali_vma_node.vm_node.size)) -+ return ret; ++void mali_dma_fence_add_reservation_object_list(struct reservation_object *dma_reservation_object, ++ struct reservation_object **dma_reservation_object_list, ++ u32 *num_dma_reservation_object) ++{ ++ u32 i; + -+ MALI_DEBUG_ASSERT(new_physical_size != mem_backend->size); ++ MALI_DEBUG_ASSERT_POINTER(dma_reservation_object); ++ MALI_DEBUG_ASSERT_POINTER(dma_reservation_object_list); ++ MALI_DEBUG_ASSERT_POINTER(num_dma_reservation_object); + -+ ret = mali_mem_resize(session, mem_backend, new_physical_size); ++ for (i = 0; i < *num_dma_reservation_object; i++) { ++ if (dma_reservation_object_list[i] == dma_reservation_object) ++ return; ++ } + -+ return ret; ++ dma_reservation_object_list[*num_dma_reservation_object] = dma_reservation_object; ++ (*num_dma_reservation_object)++; +} + -+/** -+* function@_mali_ukk_mem_allocate - allocate mali memory -+*/ -+_mali_osk_errcode_t _mali_ukk_mem_allocate(_mali_uk_alloc_mem_s *args) ++int mali_dma_fence_lock_reservation_object_list(struct reservation_object **dma_reservation_object_list, ++ u32 num_dma_reservation_object, struct ww_acquire_ctx *ww_actx) +{ -+ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; -+ mali_mem_backend *mem_backend = NULL; -+ _mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT; -+ int retval = 0; -+ mali_mem_allocation *mali_allocation = NULL; -+ struct mali_vma_node *mali_vma_node = NULL; ++ u32 i; + -+ MALI_DEBUG_PRINT(4, (" _mali_ukk_mem_allocate, vaddr=0x%x, size =0x%x! \n", args->gpu_vaddr, args->psize)); ++ struct reservation_object *reservation_object_to_slow_lock = NULL; + -+ /* Check if the address is allocated -+ */ -+ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, args->gpu_vaddr, 0); ++ MALI_DEBUG_ASSERT_POINTER(dma_reservation_object_list); ++ MALI_DEBUG_ASSERT_POINTER(ww_actx); + -+ if (unlikely(mali_vma_node)) { -+ MALI_DEBUG_PRINT_ERROR(("The mali virtual address has already been used ! \n")); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ /** -+ *create mali memory allocation -+ */ ++ ww_acquire_init(ww_actx, &reservation_ww_class); + -+ mali_allocation = mali_mem_allocation_struct_create(session); ++again: ++ for (i = 0; i < num_dma_reservation_object; i++) { ++ int ret; + -+ if (mali_allocation == NULL) { -+ MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_allocate: Failed to create allocation struct! \n")); -+ return _MALI_OSK_ERR_NOMEM; -+ } -+ mali_allocation->psize = args->psize; -+ mali_allocation->vsize = args->vsize; ++ if (dma_reservation_object_list[i] == reservation_object_to_slow_lock) { ++ reservation_object_to_slow_lock = NULL; ++ continue; ++ } + -+ /* MALI_MEM_OS if need to support mem resize, -+ * or MALI_MEM_BLOCK if have dedicated memory, -+ * or MALI_MEM_OS, -+ * or MALI_MEM_SWAP. -+ */ -+ if (args->flags & _MALI_MEMORY_ALLOCATE_SWAPPABLE) { -+ mali_allocation->type = MALI_MEM_SWAP; -+ } else if (args->flags & _MALI_MEMORY_ALLOCATE_RESIZEABLE) { -+ mali_allocation->type = MALI_MEM_OS; -+ mali_allocation->flags |= MALI_MEM_FLAG_CAN_RESIZE; -+ } else if (args->flags & _MALI_MEMORY_ALLOCATE_SECURE) { -+ mali_allocation->type = MALI_MEM_SECURE; -+ } else if (MALI_TRUE == mali_memory_have_dedicated_memory()) { -+ mali_allocation->type = MALI_MEM_BLOCK; -+ } else { -+ mali_allocation->type = MALI_MEM_OS; -+ } ++ ret = ww_mutex_lock(&dma_reservation_object_list[i]->lock, ww_actx); + -+ /** -+ *add allocation node to RB tree for index -+ */ -+ mali_allocation->mali_vma_node.vm_node.start = args->gpu_vaddr; -+ mali_allocation->mali_vma_node.vm_node.size = args->vsize; ++ if (ret < 0) { ++ u32 slow_lock_index = i; + -+ mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node); ++ /* unlock all pre locks we have already locked.*/ ++ while (i > 0) { ++ i--; ++ ww_mutex_unlock(&dma_reservation_object_list[i]->lock); ++ } + -+ mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, args->psize); -+ if (mali_allocation->backend_handle < 0) { -+ ret = _MALI_OSK_ERR_NOMEM; -+ MALI_DEBUG_PRINT(1, ("mali_allocation->backend_handle < 0! \n")); -+ goto failed_alloc_backend; ++ if (NULL != reservation_object_to_slow_lock) ++ ww_mutex_unlock(&reservation_object_to_slow_lock->lock); ++ ++ if (ret == -EDEADLK) { ++ reservation_object_to_slow_lock = dma_reservation_object_list[slow_lock_index]; ++ ww_mutex_lock_slow(&reservation_object_to_slow_lock->lock, ww_actx); ++ goto again; ++ } ++ ww_acquire_fini(ww_actx); ++ MALI_DEBUG_PRINT(1, ("Mali dma fence: failed to lock all dma reservation objects.\n", i)); ++ return ret; ++ } + } + ++ ww_acquire_done(ww_actx); ++ return 0; ++} + -+ mem_backend->mali_allocation = mali_allocation; -+ mem_backend->type = mali_allocation->type; ++void mali_dma_fence_unlock_reservation_object_list(struct reservation_object **dma_reservation_object_list, ++ u32 num_dma_reservation_object, struct ww_acquire_ctx *ww_actx) ++{ ++ u32 i; + -+ mali_allocation->mali_mapping.addr = args->gpu_vaddr; ++ for (i = 0; i < num_dma_reservation_object; i++) ++ ww_mutex_unlock(&dma_reservation_object_list[i]->lock); + -+ /* set gpu mmu propery */ -+ _mali_memory_gpu_map_property_set(&mali_allocation->mali_mapping.properties, args->flags); -+ /* do prepare for MALI mapping */ -+ if (!(args->flags & _MALI_MEMORY_ALLOCATE_NO_BIND_GPU) && mali_allocation->psize > 0) { -+ _mali_osk_mutex_wait(session->memory_lock); ++ ww_acquire_fini(ww_actx); ++} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_dma_fence.h b/drivers/gpu/arm/mali400/mali/linux/mali_dma_fence.h +new file mode 100644 +index 000000000..d44f6d1a8 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_dma_fence.h +@@ -0,0 +1,124 @@ ++/* ++ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ ret = mali_mem_mali_map_prepare(mali_allocation); -+ if (0 != ret) { -+ _mali_osk_mutex_signal(session->memory_lock); -+ goto failed_prepare_map; -+ } -+ _mali_osk_mutex_signal(session->memory_lock); -+ } ++/** ++ * @file mali_dma_fence.h ++ * ++ * Mali interface for Linux dma buf fence objects. ++ */ + -+ if (mali_allocation->psize == 0) { -+ mem_backend->os_mem.count = 0; -+ INIT_LIST_HEAD(&mem_backend->os_mem.pages); -+ goto done; -+ } ++#ifndef _MALI_DMA_FENCE_H_ ++#define _MALI_DMA_FENCE_H_ + -+ if (args->flags & _MALI_MEMORY_ALLOCATE_DEFER_BIND) { -+ mali_allocation->flags |= _MALI_MEMORY_ALLOCATE_DEFER_BIND; -+ mem_backend->flags |= MALI_MEM_BACKEND_FLAG_NOT_BINDED; -+ /* init for defer bind backend*/ -+ mem_backend->os_mem.count = 0; -+ INIT_LIST_HEAD(&mem_backend->os_mem.pages); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++#include ++#else ++#include ++#endif ++#include ++#endif + -+ goto done; -+ } ++struct mali_dma_fence_context; + -+ if (likely(mali_allocation->psize > 0)) { ++/* The mali dma fence context callback function */ ++typedef void (*mali_dma_fence_context_callback_func_t)(void *pp_job_ptr); + -+ if (MALI_MEM_SECURE == mem_backend->type) { -+#if defined(CONFIG_DMA_SHARED_BUFFER) -+ ret = mali_mem_secure_attach_dma_buf(&mem_backend->secure_mem, mem_backend->size, args->secure_shared_fd); -+ if (_MALI_OSK_ERR_OK != ret) { -+ MALI_DEBUG_PRINT(1, ("Failed to attach dma buf for secure memory! \n")); -+ goto failed_alloc_pages; -+ } ++struct mali_dma_fence_waiter { ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ struct dma_fence *fence; ++ struct dma_fence_cb base; +#else -+ ret = _MALI_OSK_ERR_UNSUPPORTED; -+ MALI_DEBUG_PRINT(1, ("DMA not supported for mali secure memory! \n")); -+ goto failed_alloc_pages; ++ struct fence_cb base; ++ struct fence *fence; +#endif -+ } else { ++ struct mali_dma_fence_context *parent; ++}; + -+ /** -+ *allocate physical memory -+ */ -+ if (mem_backend->type == MALI_MEM_OS) { -+ retval = mali_mem_os_alloc_pages(&mem_backend->os_mem, mem_backend->size); -+ } else if (mem_backend->type == MALI_MEM_BLOCK) { -+ /* try to allocated from BLOCK memory first, then try OS memory if failed.*/ -+ if (mali_mem_block_alloc(&mem_backend->block_mem, mem_backend->size)) { -+ retval = mali_mem_os_alloc_pages(&mem_backend->os_mem, mem_backend->size); -+ mem_backend->type = MALI_MEM_OS; -+ mali_allocation->type = MALI_MEM_OS; -+ } -+ } else if (MALI_MEM_SWAP == mem_backend->type) { -+ retval = mali_mem_swap_alloc_pages(&mem_backend->swap_mem, mali_allocation->mali_vma_node.vm_node.size, &mem_backend->start_idx); -+ } else { -+ /* ONLY support mem_os type */ -+ MALI_DEBUG_ASSERT(0); -+ } ++struct mali_dma_fence_context { ++ struct work_struct work_handle; ++ struct mali_dma_fence_waiter **mali_dma_fence_waiters; ++ u32 num_dma_fence_waiter; ++ atomic_t count; ++ void *pp_job_ptr; /* the mali pp job pointer */; ++ mali_dma_fence_context_callback_func_t cb_func; ++}; + -+ if (retval) { -+ ret = _MALI_OSK_ERR_NOMEM; -+ MALI_DEBUG_PRINT(1, (" can't allocate enough pages! \n")); -+ goto failed_alloc_pages; -+ } -+ } -+ } ++/* Create a dma fence ++ * @param context The execution context this fence is run on ++ * @param seqno A linearly increasing sequence number for this context ++ * @return the new dma fence if success, or NULL on failure. ++ */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++struct dma_fence *mali_dma_fence_new(u32 context, u32 seqno); ++#else ++struct fence *mali_dma_fence_new(u32 context, u32 seqno); ++#endif ++/* Signal and put dma fence ++ * @param fence The dma fence to signal and put ++ */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++void mali_dma_fence_signal_and_put(struct dma_fence **fence); ++#else ++void mali_dma_fence_signal_and_put(struct fence **fence); ++#endif ++/** ++ * Initialize a mali dma fence context for pp job. ++ * @param dma_fence_context The mali dma fence context to initialize. ++ * @param cb_func The dma fence context callback function to call when all dma fence release. ++ * @param pp_job_ptr The pp_job to call function with. ++ */ ++void mali_dma_fence_context_init(struct mali_dma_fence_context *dma_fence_context, ++ mali_dma_fence_context_callback_func_t cb_func, ++ void *pp_job_ptr); + -+ /** -+ *map to GPU side -+ */ -+ if (!(args->flags & _MALI_MEMORY_ALLOCATE_NO_BIND_GPU) && mali_allocation->psize > 0) { -+ _mali_osk_mutex_wait(session->memory_lock); -+ /* Map on Mali */ ++/** ++ * Add new mali dma fence waiter into mali dma fence context ++ * @param dma_fence_context The mali dma fence context ++ * @param dma_reservation_object the reservation object to create new mali dma fence waiters ++ * @return _MALI_OSK_ERR_OK if success, or not. ++ */ ++_mali_osk_errcode_t mali_dma_fence_context_add_waiters(struct mali_dma_fence_context *dma_fence_context, ++ struct reservation_object *dma_reservation_object); + -+ if (mem_backend->type == MALI_MEM_OS) { -+ ret = mali_mem_os_mali_map(&mem_backend->os_mem, session, args->gpu_vaddr, 0, -+ mem_backend->size / MALI_MMU_PAGE_SIZE, mali_allocation->mali_mapping.properties); ++/** ++ * Release the dma fence context ++ * @param dma_fence_text The mali dma fence context. ++ */ ++void mali_dma_fence_context_term(struct mali_dma_fence_context *dma_fence_context); + -+ } else if (mem_backend->type == MALI_MEM_BLOCK) { -+ mali_mem_block_mali_map(&mem_backend->block_mem, session, args->gpu_vaddr, -+ mali_allocation->mali_mapping.properties); -+ } else if (mem_backend->type == MALI_MEM_SWAP) { -+ ret = mali_mem_swap_mali_map(&mem_backend->swap_mem, session, args->gpu_vaddr, -+ mali_allocation->mali_mapping.properties); -+ } else if (mem_backend->type == MALI_MEM_SECURE) { -+#if defined(CONFIG_DMA_SHARED_BUFFER) -+ ret = mali_mem_secure_mali_map(&mem_backend->secure_mem, session, args->gpu_vaddr, mali_allocation->mali_mapping.properties); ++/** ++ * Decrease the dma fence context atomic count ++ * @param dma_fence_text The mali dma fence context. ++ */ ++void mali_dma_fence_context_dec_count(struct mali_dma_fence_context *dma_fence_context); ++ ++/** ++ * Get all reservation object ++ * @param dma_reservation_object The reservation object to add into the reservation object list ++ * @param dma_reservation_object_list The reservation object list to store all reservation object ++ * @param num_dma_reservation_object The number of all reservation object ++ */ ++void mali_dma_fence_add_reservation_object_list(struct reservation_object *dma_reservation_object, ++ struct reservation_object **dma_reservation_object_list, ++ u32 *num_dma_reservation_object); ++ ++/** ++ * Wait/wound mutex lock to lock all reservation object. ++ */ ++int mali_dma_fence_lock_reservation_object_list(struct reservation_object **dma_reservation_object_list, ++ u32 num_dma_reservation_object, struct ww_acquire_ctx *ww_actx); ++ ++/** ++ * Wait/wound mutex lock to unlock all reservation object. ++ */ ++void mali_dma_fence_unlock_reservation_object_list(struct reservation_object **dma_reservation_object_list, ++ u32 num_dma_reservation_object, struct ww_acquire_ctx *ww_actx); +#endif -+ } else { /* unsupport type */ -+ MALI_DEBUG_ASSERT(0); -+ } +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_internal_sync.c b/drivers/gpu/arm/mali400/mali/linux/mali_internal_sync.c +new file mode 100644 +index 000000000..e13cbad3e +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_internal_sync.c +@@ -0,0 +1,783 @@ ++/* ++ * Copyright (C) 2012-2018 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ _mali_osk_mutex_signal(session->memory_lock); -+ } -+done: -+ if (MALI_MEM_OS == mem_backend->type) { -+ atomic_add(mem_backend->os_mem.count, &session->mali_mem_allocated_pages); -+ } else if (MALI_MEM_BLOCK == mem_backend->type) { -+ atomic_add(mem_backend->block_mem.count, &session->mali_mem_allocated_pages); -+ } else if (MALI_MEM_SECURE == mem_backend->type) { -+ atomic_add(mem_backend->secure_mem.count, &session->mali_mem_allocated_pages); -+ } else { -+ MALI_DEBUG_ASSERT(MALI_MEM_SWAP == mem_backend->type); -+ atomic_add(mem_backend->swap_mem.count, &session->mali_mem_allocated_pages); -+ atomic_add(mem_backend->swap_mem.count, &session->mali_mem_array[mem_backend->type]); -+ } ++#include "mali_internal_sync.h" ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0) ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) { -+ session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE; -+ } -+ return _MALI_OSK_ERR_OK; ++#include "mali_osk.h" ++#include "mali_kernel_common.h" ++#if defined(DEBUG) ++#include "mali_session.h" ++#include "mali_timeline.h" ++#endif + -+failed_alloc_pages: -+ mali_mem_mali_map_free(session, mali_allocation->psize, mali_allocation->mali_vma_node.vm_node.start, mali_allocation->flags); -+failed_prepare_map: -+ mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle); -+failed_alloc_backend: -+ mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node); -+ mali_mem_allocation_struct_destory(mali_allocation); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++static const struct dma_fence_ops fence_ops; ++#else ++static const struct fence_ops fence_ops; ++#endif + -+ return ret; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++static struct mali_internal_sync_point *mali_internal_fence_to_sync_pt(struct dma_fence *fence) ++#else ++static struct mali_internal_sync_point *mali_internal_fence_to_sync_pt(struct fence *fence) ++#endif ++{ ++ MALI_DEBUG_ASSERT_POINTER(fence); ++ return container_of(fence, struct mali_internal_sync_point, base); +} + ++static inline struct mali_internal_sync_timeline *mali_internal_sync_pt_to_sync_timeline(struct mali_internal_sync_point *sync_pt) ++{ ++ MALI_DEBUG_ASSERT_POINTER(sync_pt); ++ return container_of(sync_pt->base.lock, struct mali_internal_sync_timeline, sync_pt_list_lock); ++} + -+_mali_osk_errcode_t _mali_ukk_mem_free(_mali_uk_free_mem_s *args) ++static void mali_internal_sync_timeline_free(struct kref *kref_count) +{ -+ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; -+ u32 vaddr = args->gpu_vaddr; -+ mali_mem_allocation *mali_alloc = NULL; -+ struct mali_vma_node *mali_vma_node = NULL; ++ struct mali_internal_sync_timeline *sync_timeline; + -+ /* find mali allocation structure by vaddress*/ -+ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, vaddr, 0); -+ if (NULL == mali_vma_node) { -+ MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_free: invalid addr: 0x%x\n", vaddr)); -+ return _MALI_OSK_ERR_INVALID_ARGS; -+ } -+ MALI_DEBUG_ASSERT(NULL != mali_vma_node); -+ mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node); ++ MALI_DEBUG_ASSERT_POINTER(kref_count); + -+ if (mali_alloc) -+ /* check ref_count */ -+ args->free_pages_nr = mali_allocation_unref(&mali_alloc); ++ sync_timeline = container_of(kref_count, struct mali_internal_sync_timeline, kref_count); + -+ return _MALI_OSK_ERR_OK; -+} ++ if (sync_timeline->ops->release_obj) ++ sync_timeline->ops->release_obj(sync_timeline); + ++ kfree(sync_timeline); ++} + -+/** -+* Function _mali_ukk_mem_bind -- bind a external memory to a new GPU address -+* It will allocate a new mem allocation and bind external memory to it. -+* Supported backend type are: -+* _MALI_MEMORY_BIND_BACKEND_UMP -+* _MALI_MEMORY_BIND_BACKEND_DMA_BUF -+* _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY -+* CPU access is not supported yet -+*/ -+_mali_osk_errcode_t _mali_ukk_mem_bind(_mali_uk_bind_mem_s *args) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++static void mali_internal_fence_check_cb_func(struct fence *fence, struct fence_cb *cb) ++#else ++static void mali_internal_fence_check_cb_func(struct dma_fence *fence, struct dma_fence_cb *cb) ++#endif +{ -+ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; -+ mali_mem_backend *mem_backend = NULL; -+ _mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT; -+ mali_mem_allocation *mali_allocation = NULL; -+ MALI_DEBUG_PRINT(5, (" _mali_ukk_mem_bind, vaddr=0x%x, size =0x%x! \n", args->vaddr, args->size)); -+ -+ /** -+ * allocate mali allocation. -+ */ -+ mali_allocation = mali_mem_allocation_struct_create(session); -+ -+ if (mali_allocation == NULL) { -+ return _MALI_OSK_ERR_NOMEM; -+ } -+ mali_allocation->psize = args->size; -+ mali_allocation->vsize = args->size; -+ mali_allocation->mali_mapping.addr = args->vaddr; -+ -+ /* add allocation node to RB tree for index */ -+ mali_allocation->mali_vma_node.vm_node.start = args->vaddr; -+ mali_allocation->mali_vma_node.vm_node.size = args->size; -+ mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node); -+ -+ /* allocate backend*/ -+ if (mali_allocation->psize > 0) { -+ mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, mali_allocation->psize); -+ if (mali_allocation->backend_handle < 0) { -+ goto Failed_alloc_backend; -+ } -+ -+ } else { -+ goto Failed_alloc_backend; -+ } -+ -+ mem_backend->size = mali_allocation->psize; -+ mem_backend->mali_allocation = mali_allocation; -+ -+ switch (args->flags & _MALI_MEMORY_BIND_BACKEND_MASK) { -+ case _MALI_MEMORY_BIND_BACKEND_UMP: -+#if defined(CONFIG_MALI400_UMP) -+ mali_allocation->type = MALI_MEM_UMP; -+ mem_backend->type = MALI_MEM_UMP; -+ ret = mali_mem_bind_ump_buf(mali_allocation, mem_backend, -+ args->mem_union.bind_ump.secure_id, args->mem_union.bind_ump.flags); -+ if (_MALI_OSK_ERR_OK != ret) { -+ MALI_DEBUG_PRINT(1, ("Bind ump buf failed\n")); -+ goto Failed_bind_backend; -+ } ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) ++ struct mali_internal_sync_fence_cb *check; +#else -+ MALI_DEBUG_PRINT(1, ("UMP not supported\n")); -+ goto Failed_bind_backend; ++ struct mali_internal_sync_fence_waiter *waiter; +#endif -+ break; -+ case _MALI_MEMORY_BIND_BACKEND_DMA_BUF: -+#if defined(CONFIG_DMA_SHARED_BUFFER) -+ mali_allocation->type = MALI_MEM_DMA_BUF; -+ mem_backend->type = MALI_MEM_DMA_BUF; -+ ret = mali_mem_bind_dma_buf(mali_allocation, mem_backend, -+ args->mem_union.bind_dma_buf.mem_fd, args->mem_union.bind_dma_buf.flags); -+ if (_MALI_OSK_ERR_OK != ret) { -+ MALI_DEBUG_PRINT(1, ("Bind dma buf failed\n")); -+ goto Failed_bind_backend; -+ } ++ struct mali_internal_sync_fence *sync_fence; ++ int ret; ++ MALI_DEBUG_ASSERT_POINTER(cb); ++ MALI_IGNORE(fence); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) ++ check = container_of(cb, struct mali_internal_sync_fence_cb, cb); ++ sync_fence = check->sync_file; +#else -+ MALI_DEBUG_PRINT(1, ("DMA not supported\n")); -+ goto Failed_bind_backend; ++ waiter = container_of(cb, struct mali_internal_sync_fence_waiter, cb); ++ sync_fence = (struct mali_internal_sync_fence *)waiter->work.private; +#endif -+ break; -+ case _MALI_MEMORY_BIND_BACKEND_MALI_MEMORY: -+ /* not allowed */ -+ MALI_DEBUG_PRINT_ERROR(("Mali internal memory type not supported !\n")); -+ goto Failed_bind_backend; -+ break; -+ -+ case _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY: -+ mali_allocation->type = MALI_MEM_EXTERNAL; -+ mem_backend->type = MALI_MEM_EXTERNAL; -+ ret = mali_mem_bind_ext_buf(mali_allocation, mem_backend, args->mem_union.bind_ext_memory.phys_addr, -+ args->mem_union.bind_ext_memory.flags); -+ if (_MALI_OSK_ERR_OK != ret) { -+ MALI_DEBUG_PRINT(1, ("Bind external buf failed\n")); -+ goto Failed_bind_backend; -+ } -+ break; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) ++ ret = atomic_dec_and_test(&sync_fence->status); ++ if (ret) ++ wake_up_all(&sync_fence->wq); ++#else ++ ret = sync_fence->fence->ops->signaled(sync_fence->fence); + -+ case _MALI_MEMORY_BIND_BACKEND_EXT_COW: -+ /* not allowed */ -+ MALI_DEBUG_PRINT_ERROR(("External cow memory type not supported !\n")); -+ goto Failed_bind_backend; -+ break; ++ if (0 > ret) ++ MALI_PRINT_ERROR(("Mali internal sync:Failed to wait fence 0x%x for sync_fence 0x%x.\n", fence, sync_fence)); ++ if (1 == ret) ++ wake_up_all(&sync_fence->wq); ++#endif ++} + -+ default: -+ MALI_DEBUG_PRINT_ERROR(("Invalid memory type not supported !\n")); -+ goto Failed_bind_backend; -+ break; -+ } -+ MALI_DEBUG_ASSERT(0 == mem_backend->size % MALI_MMU_PAGE_SIZE); -+ atomic_add(mem_backend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_backend->type]); -+ return _MALI_OSK_ERR_OK; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) ++static void mali_internal_sync_fence_add_fence(struct mali_internal_sync_fence *sync_fence, struct fence *sync_pt) ++{ ++ int fence_num = 0; ++ MALI_DEBUG_ASSERT_POINTER(sync_fence); ++ MALI_DEBUG_ASSERT_POINTER(sync_pt); + -+Failed_bind_backend: -+ mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle); ++ fence_num = sync_fence->num_fences; + -+Failed_alloc_backend: -+ mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node); -+ mali_mem_allocation_struct_destory(mali_allocation); ++ sync_fence->cbs[fence_num].fence = sync_pt; ++ sync_fence->cbs[fence_num].sync_file = sync_fence; + -+ MALI_DEBUG_PRINT(1, (" _mali_ukk_mem_bind, return ERROR! \n")); -+ return ret; ++ if (!fence_add_callback(sync_pt, &sync_fence->cbs[fence_num].cb, mali_internal_fence_check_cb_func)) { ++ fence_get(sync_pt); ++ sync_fence->num_fences++; ++ atomic_inc(&sync_fence->status); ++ } +} -+ -+ -+/* -+* Function _mali_ukk_mem_unbind -- unbind a external memory to a new GPU address -+* This function unbind the backend memory and free the allocation -+* no ref_count for this type of memory -+*/ -+_mali_osk_errcode_t _mali_ukk_mem_unbind(_mali_uk_unbind_mem_s *args) ++#endif ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) ++static int mali_internal_sync_fence_wake_up_wq(wait_queue_entry_t *curr, unsigned mode, ++ int wake_flags, void *key) ++#else ++static int mali_internal_sync_fence_wake_up_wq(wait_queue_t *curr, unsigned mode, ++ int wake_flags, void *key) ++#endif +{ -+ /**/ -+ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; -+ mali_mem_allocation *mali_allocation = NULL; -+ struct mali_vma_node *mali_vma_node = NULL; -+ u32 mali_addr = args->vaddr; -+ MALI_DEBUG_PRINT(5, (" _mali_ukk_mem_unbind, vaddr=0x%x! \n", args->vaddr)); -+ -+ /* find the allocation by vaddr */ -+ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0); -+ if (likely(mali_vma_node)) { -+ MALI_DEBUG_ASSERT(mali_addr == mali_vma_node->vm_node.start); -+ mali_allocation = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node); -+ } else { -+ MALI_DEBUG_ASSERT(NULL != mali_vma_node); -+ return _MALI_OSK_ERR_INVALID_ARGS; -+ } ++ struct mali_internal_sync_fence_waiter *wait; ++ MALI_IGNORE(mode); ++ MALI_IGNORE(wake_flags); ++ MALI_IGNORE(key); + -+ if (NULL != mali_allocation) -+ /* check ref_count */ -+ mali_allocation_unref(&mali_allocation); -+ return _MALI_OSK_ERR_OK; ++ wait = container_of(curr, struct mali_internal_sync_fence_waiter, work); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) ++ list_del_init(&wait->work.entry); ++#else ++ list_del_init(&wait->work.task_list); ++#endif ++ wait->callback(wait->work.private, wait); ++ return 1; +} + -+/* -+* Function _mali_ukk_mem_cow -- COW for an allocation -+* This function allocate new pages for a range (range, range+size) of allocation -+* And Map it(keep use the not in range pages from target allocation ) to an GPU vaddr -+*/ -+_mali_osk_errcode_t _mali_ukk_mem_cow(_mali_uk_cow_mem_s *args) ++struct mali_internal_sync_timeline *mali_internal_sync_timeline_create(const struct mali_internal_sync_timeline_ops *ops, ++ int size, const char *name) +{ -+ _mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT; -+ mali_mem_backend *target_backend = NULL; -+ mali_mem_backend *mem_backend = NULL; -+ struct mali_vma_node *mali_vma_node = NULL; -+ mali_mem_allocation *mali_allocation = NULL; -+ -+ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; -+ /* Get the target backend for cow */ -+ target_backend = mali_mem_backend_struct_search(session, args->target_handle); -+ -+ if (NULL == target_backend || 0 == target_backend->size) { -+ MALI_DEBUG_ASSERT_POINTER(target_backend); -+ MALI_DEBUG_ASSERT(0 != target_backend->size); -+ return ret; -+ } -+ -+ /*Cow not support resized mem */ -+ MALI_DEBUG_ASSERT(MALI_MEM_FLAG_CAN_RESIZE != (MALI_MEM_FLAG_CAN_RESIZE & target_backend->mali_allocation->flags)); ++ struct mali_internal_sync_timeline *sync_timeline = NULL; + -+ /* Check if the new mali address is allocated */ -+ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, args->vaddr, 0); ++ MALI_DEBUG_ASSERT_POINTER(ops); + -+ if (unlikely(mali_vma_node)) { -+ MALI_DEBUG_PRINT_ERROR(("The mali virtual address has already been used ! \n")); -+ return ret; ++ if (size < sizeof(struct mali_internal_sync_timeline)) { ++ MALI_PRINT_ERROR(("Mali internal sync:Invalid size to create the mali internal sync timeline.\n")); ++ goto err; + } + -+ /* create new alloction for COW*/ -+ mali_allocation = mali_mem_allocation_struct_create(session); -+ if (mali_allocation == NULL) { -+ MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_cow: Failed to create allocation struct!\n")); -+ return _MALI_OSK_ERR_NOMEM; ++ sync_timeline = kzalloc(size, GFP_KERNEL); ++ if (NULL == sync_timeline) { ++ MALI_PRINT_ERROR(("Mali internal sync:Failed to allocate buffer for the mali internal sync timeline.\n")); ++ goto err; + } -+ mali_allocation->psize = args->target_size; -+ mali_allocation->vsize = args->target_size; -+ mali_allocation->type = MALI_MEM_COW; -+ -+ /*add allocation node to RB tree for index*/ -+ mali_allocation->mali_vma_node.vm_node.start = args->vaddr; -+ mali_allocation->mali_vma_node.vm_node.size = mali_allocation->vsize; -+ mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node); ++ kref_init(&sync_timeline->kref_count); ++ sync_timeline->ops = ops; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ sync_timeline->fence_context = dma_fence_context_alloc(1); ++#else ++ sync_timeline->fence_context = fence_context_alloc(1); ++#endif ++ strlcpy(sync_timeline->name, name, sizeof(sync_timeline->name)); + -+ /* create new backend for COW memory */ -+ mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, mali_allocation->psize); -+ if (mali_allocation->backend_handle < 0) { -+ ret = _MALI_OSK_ERR_NOMEM; -+ MALI_DEBUG_PRINT(1, ("mali_allocation->backend_handle < 0! \n")); -+ goto failed_alloc_backend; -+ } -+ mem_backend->mali_allocation = mali_allocation; -+ mem_backend->type = mali_allocation->type; ++ INIT_LIST_HEAD(&sync_timeline->sync_pt_list_head); ++ spin_lock_init(&sync_timeline->sync_pt_list_lock); + -+ if (target_backend->type == MALI_MEM_SWAP || -+ (MALI_MEM_COW == target_backend->type && (MALI_MEM_BACKEND_FLAG_SWAP_COWED & target_backend->flags))) { -+ mem_backend->flags |= MALI_MEM_BACKEND_FLAG_SWAP_COWED; -+ /** -+ * CoWed swap backends couldn't be mapped as non-linear vma, because if one -+ * vma is set with flag VM_NONLINEAR, the vma->vm_private_data will be used by kernel, -+ * while in mali driver, we use this variable to store the pointer of mali_allocation, so there -+ * is a conflict. -+ * To resolve this problem, we have to do some fake things, we reserved about 64MB -+ * space from index 0, there isn't really page's index will be set from 0 to (64MB>>PAGE_SHIFT_NUM), -+ * and all of CoWed swap memory backends' start_idx will be assigned with 0, and these -+ * backends will be mapped as linear and will add to priority tree of global swap file, while -+ * these vmas will never be found by using normal page->index, these pages in those vma -+ * also couldn't be swapped out. -+ */ -+ mem_backend->start_idx = 0; ++ return sync_timeline; ++err: ++ if (NULL != sync_timeline) { ++ kfree(sync_timeline); + } ++ return NULL; ++} + -+ /* Add the target backend's cow count, also allocate new pages for COW backend from os mem -+ *for a modified range and keep the page which not in the modified range and Add ref to it -+ */ -+ MALI_DEBUG_PRINT(3, ("Cow mapping: target_addr: 0x%x; cow_addr: 0x%x, size: %u\n", target_backend->mali_allocation->mali_vma_node.vm_node.start, -+ mali_allocation->mali_vma_node.vm_node.start, mali_allocation->mali_vma_node.vm_node.size)); ++void mali_internal_sync_timeline_destroy(struct mali_internal_sync_timeline *sync_timeline) ++{ ++ MALI_DEBUG_ASSERT_POINTER(sync_timeline); + -+ ret = mali_memory_do_cow(target_backend, args->target_offset, args->target_size, mem_backend, args->range_start, args->range_size); -+ if (_MALI_OSK_ERR_OK != ret) { -+ MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_cow: Failed to cow!\n")); -+ goto failed_do_cow; -+ } ++ sync_timeline->destroyed = MALI_TRUE; + -+ /** -+ *map to GPU side -+ */ -+ mali_allocation->mali_mapping.addr = args->vaddr; -+ /* set gpu mmu propery */ -+ _mali_memory_gpu_map_property_set(&mali_allocation->mali_mapping.properties, args->flags); ++ smp_wmb(); + -+ _mali_osk_mutex_wait(session->memory_lock); -+ /* Map on Mali */ -+ ret = mali_mem_mali_map_prepare(mali_allocation); -+ if (0 != ret) { -+ MALI_DEBUG_PRINT(1, (" prepare map fail! \n")); -+ goto failed_gpu_map; -+ } ++ mali_internal_sync_timeline_signal(sync_timeline); ++ kref_put(&sync_timeline->kref_count, mali_internal_sync_timeline_free); ++} + -+ if (!(mem_backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) { -+ mali_mem_cow_mali_map(mem_backend, 0, mem_backend->size); -+ } ++void mali_internal_sync_timeline_signal(struct mali_internal_sync_timeline *sync_timeline) ++{ ++ unsigned long flags; ++ struct mali_internal_sync_point *sync_pt, *next; + -+ _mali_osk_mutex_signal(session->memory_lock); ++ MALI_DEBUG_ASSERT_POINTER(sync_timeline); + -+ mutex_lock(&target_backend->mutex); -+ target_backend->flags |= MALI_MEM_BACKEND_FLAG_COWED; -+ mutex_unlock(&target_backend->mutex); ++ spin_lock_irqsave(&sync_timeline->sync_pt_list_lock, flags); + -+ atomic_add(args->range_size / MALI_MMU_PAGE_SIZE, &session->mali_mem_allocated_pages); -+ if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) { -+ session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE; ++ list_for_each_entry_safe(sync_pt, next, &sync_timeline->sync_pt_list_head, ++ sync_pt_list) { ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ if (dma_fence_is_signaled_locked(&sync_pt->base)) ++#else ++ if (fence_is_signaled_locked(&sync_pt->base)) ++#endif ++ list_del_init(&sync_pt->sync_pt_list); + } -+ return _MALI_OSK_ERR_OK; -+ -+failed_gpu_map: -+ _mali_osk_mutex_signal(session->memory_lock); -+ mali_mem_cow_release(mem_backend, MALI_FALSE); -+ mem_backend->cow_mem.count = 0; -+failed_do_cow: -+ mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle); -+failed_alloc_backend: -+ mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node); -+ mali_mem_allocation_struct_destory(mali_allocation); + -+ return ret; ++ spin_unlock_irqrestore(&sync_timeline->sync_pt_list_lock, flags); +} + -+_mali_osk_errcode_t _mali_ukk_mem_cow_modify_range(_mali_uk_cow_modify_range_s *args) ++struct mali_internal_sync_point *mali_internal_sync_point_create(struct mali_internal_sync_timeline *sync_timeline, int size) +{ -+ _mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT; -+ mali_mem_backend *mem_backend = NULL; -+ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; ++ unsigned long flags; ++ struct mali_internal_sync_point *sync_pt = NULL; + -+ MALI_DEBUG_PRINT(4, (" _mali_ukk_mem_cow_modify_range called! \n")); -+ /* Get the backend that need to be modified. */ -+ mem_backend = mali_mem_backend_struct_search(session, args->vaddr); ++ MALI_DEBUG_ASSERT_POINTER(sync_timeline); + -+ if (NULL == mem_backend || 0 == mem_backend->size) { -+ MALI_DEBUG_ASSERT_POINTER(mem_backend); -+ MALI_DEBUG_ASSERT(0 != mem_backend->size); -+ return ret; ++ if (size < sizeof(struct mali_internal_sync_point)) { ++ MALI_PRINT_ERROR(("Mali internal sync:Invalid size to create the mali internal sync point.\n")); ++ goto err; + } + -+ MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_backend->type); -+ -+ ret = mali_memory_cow_modify_range(mem_backend, args->range_start, args->size); -+ args->change_pages_nr = mem_backend->cow_mem.change_pages_nr; -+ if (_MALI_OSK_ERR_OK != ret) -+ return ret; -+ _mali_osk_mutex_wait(session->memory_lock); -+ if (!(mem_backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) { -+ mali_mem_cow_mali_map(mem_backend, args->range_start, args->size); ++ sync_pt = kzalloc(size, GFP_KERNEL); ++ if (NULL == sync_pt) { ++ MALI_PRINT_ERROR(("Mali internal sync:Failed to allocate buffer for the mali internal sync point.\n")); ++ goto err; + } -+ _mali_osk_mutex_signal(session->memory_lock); ++ spin_lock_irqsave(&sync_timeline->sync_pt_list_lock, flags); ++ kref_get(&sync_timeline->kref_count); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ dma_fence_init(&sync_pt->base, &fence_ops, &sync_timeline->sync_pt_list_lock, ++ sync_timeline->fence_context, ++sync_timeline->value); ++#else ++ fence_init(&sync_pt->base, &fence_ops, &sync_timeline->sync_pt_list_lock, ++ sync_timeline->fence_context, ++sync_timeline->value); ++#endif ++ INIT_LIST_HEAD(&sync_pt->sync_pt_list); ++ spin_unlock_irqrestore(&sync_timeline->sync_pt_list_lock, flags); + -+ atomic_add(args->change_pages_nr, &session->mali_mem_allocated_pages); -+ if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) { -+ session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE; ++ return sync_pt; ++err: ++ if (NULL != sync_pt) { ++ kfree(sync_pt); + } -+ -+ return _MALI_OSK_ERR_OK; ++ return NULL; +} + -+ -+_mali_osk_errcode_t _mali_ukk_mem_resize(_mali_uk_mem_resize_s *args) ++struct mali_internal_sync_fence *mali_internal_sync_fence_fdget(int fd) +{ -+ mali_mem_backend *mem_backend = NULL; -+ _mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT; -+ -+ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; -+ -+ MALI_DEBUG_ASSERT_POINTER(session); -+ MALI_DEBUG_PRINT(4, (" mali_mem_resize_memory called! \n")); -+ MALI_DEBUG_ASSERT(0 == args->psize % MALI_MMU_PAGE_SIZE); -+ -+ /* Get the memory backend that need to be resize. */ -+ mem_backend = mali_mem_backend_struct_search(session, args->vaddr); ++ struct file *file = fget(fd); + -+ if (NULL == mem_backend) { -+ MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory backend = NULL!\n")); -+ return ret; ++ if (NULL == file) { ++ return NULL; + } + -+ MALI_DEBUG_ASSERT(args->psize != mem_backend->size); -+ -+ ret = mali_mem_resize(session, mem_backend, args->psize); -+ -+ return ret; ++ return file->private_data; +} + -+_mali_osk_errcode_t _mali_ukk_mem_usage_get(_mali_uk_profiling_memory_usage_get_s *args) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) ++struct mali_internal_sync_fence *mali_internal_sync_fence_merge( ++ struct mali_internal_sync_fence *sync_fence1, struct mali_internal_sync_fence *sync_fence2) +{ -+ args->memory_usage = _mali_ukk_report_memory_usage(); -+ if (0 != args->vaddr) { -+ mali_mem_backend *mem_backend = NULL; -+ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; -+ /* Get the backend that need to be modified. */ -+ mem_backend = mali_mem_backend_struct_search(session, args->vaddr); -+ if (NULL == mem_backend) { -+ MALI_DEBUG_ASSERT_POINTER(mem_backend); -+ return _MALI_OSK_ERR_FAULT; -+ } ++ struct mali_internal_sync_fence *new_sync_fence; ++ int i, j, num_fence1, num_fence2, total_fences; ++ struct fence *fence0 = NULL; + -+ if (MALI_MEM_COW == mem_backend->type) -+ args->change_pages_nr = mem_backend->cow_mem.change_pages_nr; -+ } -+ return _MALI_OSK_ERR_OK; -+} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_manager.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_manager.h -new file mode 100644 -index 000000000..23d8cde75 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_manager.h -@@ -0,0 +1,51 @@ -+/* -+ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+#ifndef __MALI_MEMORY_MANAGER_H__ -+#define __MALI_MEMORY_MANAGER_H__ -+ -+#include "mali_osk.h" -+#include -+#include -+#include -+#include -+#include -+#include "mali_memory_types.h" -+#include "mali_memory_os_alloc.h" -+#include "mali_uk_types.h" -+ -+struct mali_allocation_manager { -+ rwlock_t vm_lock; -+ struct rb_root allocation_mgr_rb; -+ struct list_head head; -+ struct mutex list_mutex; -+ u32 mali_allocation_num; -+}; -+ -+extern struct idr mali_backend_idr; -+extern struct mutex mali_idr_mutex; -+ -+int mali_memory_manager_init(struct mali_allocation_manager *mgr); -+void mali_memory_manager_uninit(struct mali_allocation_manager *mgr); -+ -+void mali_mem_allocation_struct_destory(mali_mem_allocation *alloc); -+_mali_osk_errcode_t mali_mem_add_mem_size(struct mali_session_data *session, u32 mali_addr, u32 add_size); -+mali_mem_backend *mali_mem_backend_struct_search(struct mali_session_data *session, u32 mali_address); -+_mali_osk_errcode_t _mali_ukk_mem_allocate(_mali_uk_alloc_mem_s *args); -+_mali_osk_errcode_t _mali_ukk_mem_free(_mali_uk_free_mem_s *args); -+_mali_osk_errcode_t _mali_ukk_mem_bind(_mali_uk_bind_mem_s *args); -+_mali_osk_errcode_t _mali_ukk_mem_unbind(_mali_uk_unbind_mem_s *args); -+_mali_osk_errcode_t _mali_ukk_mem_cow(_mali_uk_cow_mem_s *args); -+_mali_osk_errcode_t _mali_ukk_mem_cow_modify_range(_mali_uk_cow_modify_range_s *args); -+_mali_osk_errcode_t _mali_ukk_mem_usage_get(_mali_uk_profiling_memory_usage_get_s *args); -+_mali_osk_errcode_t _mali_ukk_mem_resize(_mali_uk_mem_resize_s *args); -+ -+#endif -+ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_os_alloc.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_os_alloc.c -new file mode 100644 -index 000000000..277534fc1 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_os_alloc.c -@@ -0,0 +1,810 @@ -+/* -+ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ MALI_DEBUG_ASSERT_POINTER(sync_fence1); ++ MALI_DEBUG_ASSERT_POINTER(sync_fence2); + -+#include "../platform/rk/custom_log.h" ++ num_fence1 = sync_fence1->num_fences; ++ num_fence2 = sync_fence2->num_fences; + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ total_fences = num_fence1 + num_fence2; + -+#include "mali_osk.h" -+#include "mali_memory.h" -+#include "mali_memory_os_alloc.h" -+#include "mali_kernel_linux.h" ++ i = 0; ++ j = 0; + -+/* Minimum size of allocator page pool */ -+#define MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB * 256) -+#define MALI_OS_MEMORY_POOL_TRIM_JIFFIES (10 * CONFIG_HZ) /* Default to 10s */ ++ if (num_fence1 > 0) { ++ fence0 = sync_fence1->cbs[i].fence; ++ i = 1; ++ } else if (num_fence2 > 0) { ++ fence0 = sync_fence2->cbs[i].fence; ++ j = 1; ++ } + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) -+static int mali_mem_os_shrink(int nr_to_scan, gfp_t gfp_mask); -+#else -+static int mali_mem_os_shrink(struct shrinker *shrinker, int nr_to_scan, gfp_t gfp_mask); -+#endif -+#else -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) -+static int mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc); -+#else -+static unsigned long mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc); -+static unsigned long mali_mem_os_shrink_count(struct shrinker *shrinker, struct shrink_control *sc); -+#endif -+#endif -+static void mali_mem_os_trim_pool(struct work_struct *work); ++ new_sync_fence = (struct mali_internal_sync_fence *)sync_file_create(fence0); ++ if (NULL == new_sync_fence) { ++ MALI_PRINT_ERROR(("Mali internal sync:Failed to create the mali internal sync fence when merging sync fence.\n")); ++ return NULL; ++ } + -+struct mali_mem_os_allocator mali_mem_os_allocator = { -+ .pool_lock = __SPIN_LOCK_UNLOCKED(pool_lock), -+ .pool_pages = LIST_HEAD_INIT(mali_mem_os_allocator.pool_pages), -+ .pool_count = 0, ++ fence_remove_callback(new_sync_fence->cb[0].fence, &new_sync_fence->cb[0].cb); ++ new_sync_fence->num_fences = 0; ++ atomic_dec(&new_sync_fence->status); + -+ .allocated_pages = ATOMIC_INIT(0), -+ .allocation_limit = 0, ++ for (; i < num_fence1 && j < num_fence2;) { ++ struct fence *fence1 = sync_fence1->cbs[i].fence; ++ struct fence *fence2 = sync_fence2->cbs[j].fence; + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) -+ .shrinker.shrink = mali_mem_os_shrink, -+#else -+ .shrinker.count_objects = mali_mem_os_shrink_count, -+ .shrinker.scan_objects = mali_mem_os_shrink, -+#endif -+ .shrinker.seeks = DEFAULT_SEEKS, -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) -+ .timed_shrinker = __DELAYED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool, TIMER_DEFERRABLE), -+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38) -+ .timed_shrinker = __DEFERRED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool), -+#else -+ .timed_shrinker = __DELAYED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool), -+#endif -+}; ++ if (fence1->context < fence2->context) { ++ mali_internal_sync_fence_add_fence(new_sync_fence, fence1); + -+u32 mali_mem_os_free(struct list_head *os_pages, u32 pages_count, mali_bool cow_flag) -+{ -+ LIST_HEAD(pages); -+ struct mali_page_node *m_page, *m_tmp; -+ u32 free_pages_nr = 0; ++ i++; ++ } else if (fence1->context > fence2->context) { ++ mali_internal_sync_fence_add_fence(new_sync_fence, fence2); + -+ if (MALI_TRUE == cow_flag) { -+ list_for_each_entry_safe(m_page, m_tmp, os_pages, list) { -+ /*only handle OS node here */ -+ if (m_page->type == MALI_PAGE_NODE_OS) { -+ if (1 == _mali_page_node_get_ref_count(m_page)) { -+ list_move(&m_page->list, &pages); -+ atomic_sub(1, &mali_mem_os_allocator.allocated_pages); -+ free_pages_nr ++; -+ } else { -+ _mali_page_node_unref(m_page); -+ m_page->page = NULL; -+ list_del(&m_page->list); -+ kfree(m_page); -+ } -+ } ++ j++; ++ } else { ++ if (fence1->seqno - fence2->seqno <= INT_MAX) ++ mali_internal_sync_fence_add_fence(new_sync_fence, fence1); ++ else ++ mali_internal_sync_fence_add_fence(new_sync_fence, fence2); ++ i++; ++ j++; + } -+ } else { -+ list_cut_position(&pages, os_pages, os_pages->prev); -+ atomic_sub(pages_count, &mali_mem_os_allocator.allocated_pages); -+ free_pages_nr = pages_count; + } + -+ /* Put pages on pool. */ -+ spin_lock(&mali_mem_os_allocator.pool_lock); -+ list_splice(&pages, &mali_mem_os_allocator.pool_pages); -+ mali_mem_os_allocator.pool_count += free_pages_nr; -+ spin_unlock(&mali_mem_os_allocator.pool_lock); ++ for (; i < num_fence1; i++) ++ mali_internal_sync_fence_add_fence(new_sync_fence, sync_fence1->cbs[i].fence); + -+ if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) { -+ MALI_DEBUG_PRINT(5, ("OS Mem: Starting pool trim timer %u\n", mali_mem_os_allocator.pool_count)); -+ queue_delayed_work(mali_mem_os_allocator.wq, &mali_mem_os_allocator.timed_shrinker, MALI_OS_MEMORY_POOL_TRIM_JIFFIES); -+ } -+ return free_pages_nr; -+} ++ for (; j < num_fence2; j++) ++ mali_internal_sync_fence_add_fence(new_sync_fence, sync_fence2->cbs[j].fence); + -+/** -+* put page without put it into page pool -+*/ -+_mali_osk_errcode_t mali_mem_os_put_page(struct page *page) ++ return new_sync_fence; ++} ++#else ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++static struct fence **mali_internal_get_fences(struct mali_internal_sync_fence *sync_fence, int *num_fences) ++#else ++static struct dma_fence **mali_internal_get_fences(struct mali_internal_sync_fence *sync_fence, int *num_fences) ++#endif +{ -+ MALI_DEBUG_ASSERT_POINTER(page); -+ if (1 == page_count(page)) { -+ atomic_sub(1, &mali_mem_os_allocator.allocated_pages); -+ dma_unmap_page(&mali_platform_device->dev, page_private(page), -+ _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); -+ ClearPagePrivate(page); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++ if (sync_fence->fence->ops == &fence_array_ops) { ++ struct fence_array *fence_array = container_of(sync_fence->fence, struct fence_array, base); ++ *num_fences = fence_array->num_fences; ++ return fence_array->fences; + } -+ put_page(page); -+ return _MALI_OSK_ERR_OK; ++#else ++ if (sync_fence->fence->ops == &dma_fence_array_ops) { ++ struct dma_fence_array *fence_array = container_of(sync_fence->fence, struct dma_fence_array, base); ++ *num_fences = fence_array->num_fences; ++ return fence_array->fences; ++ } ++#endif ++ *num_fences = 1; ++ return &sync_fence->fence; +} + -+_mali_osk_errcode_t mali_mem_os_resize_pages(mali_mem_os_mem *mem_from, mali_mem_os_mem *mem_to, u32 start_page, u32 page_count) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++static void mali_internal_add_fence_array(struct fence **fences, int *num_fences, struct fence *fence) ++#else ++static void mali_internal_add_fence_array(struct dma_fence **fences, int *num_fences, struct dma_fence *fence) ++#endif +{ -+ struct mali_page_node *m_page, *m_tmp; -+ u32 i = 0; -+ -+ MALI_DEBUG_ASSERT_POINTER(mem_from); -+ MALI_DEBUG_ASSERT_POINTER(mem_to); -+ -+ if (mem_from->count < start_page + page_count) { -+ return _MALI_OSK_ERR_INVALID_ARGS; ++ fences[*num_fences] = fence; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++ if (!fence_is_signaled(fence)) { ++ fence_get(fence); ++ (*num_fences)++; + } -+ -+ list_for_each_entry_safe(m_page, m_tmp, &mem_from->pages, list) { -+ if (i >= start_page && i < start_page + page_count) { -+ list_move_tail(&m_page->list, &mem_to->pages); -+ mem_from->count--; -+ mem_to->count++; -+ } -+ i++; ++#else ++ if (!dma_fence_is_signaled(fence)) { ++ dma_fence_get(fence); ++ (*num_fences)++; + } -+ -+ return _MALI_OSK_ERR_OK; ++#endif +} + -+ -+int mali_mem_os_alloc_pages(mali_mem_os_mem *os_mem, u32 size) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++static int mali_internal_sync_fence_set_fence_array(struct mali_internal_sync_fence *sync_fence, ++ struct fence **fences, int num_fences) ++#else ++static int mali_internal_sync_fence_set_fence_array(struct mali_internal_sync_fence *sync_fence, ++ struct dma_fence **fences, int num_fences) ++#endif +{ -+ struct page *new_page; -+ LIST_HEAD(pages_list); -+ size_t page_count = PAGE_ALIGN(size) / _MALI_OSK_MALI_PAGE_SIZE; -+ size_t remaining = page_count; -+ struct mali_page_node *m_page, *m_tmp; -+ u32 i; -+ -+ MALI_DEBUG_ASSERT_POINTER(os_mem); -+ -+ if (atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE + size > mali_mem_os_allocator.allocation_limit) { -+ MALI_DEBUG_PRINT(2, ("Mali Mem: Unable to allocate %u bytes. Currently allocated: %lu, max limit %lu\n", -+ size, -+ atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE, -+ mali_mem_os_allocator.allocation_limit)); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++ struct fence_array *array; ++#else ++ struct dma_fence_array *array; ++#endif ++ if(num_fences == 1) { ++ sync_fence->fence =fences[0]; ++ kfree(fences); ++ } else { ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++ array = fence_array_create(num_fences, fences, ++ fence_context_alloc(1), 1, false); ++#else ++ array = dma_fence_array_create(num_fences, fences, ++ dma_fence_context_alloc(1), 1, false); ++#endif ++ if (!array){ + return -ENOMEM; ++ } ++ sync_fence->fence = &array->base; + } ++ return 0; ++} + -+ INIT_LIST_HEAD(&os_mem->pages); -+ os_mem->count = page_count; ++struct mali_internal_sync_fence *mali_internal_sync_fence_merge( ++ struct mali_internal_sync_fence *sync_fence1, struct mali_internal_sync_fence *sync_fence2) ++{ ++ struct mali_internal_sync_fence *sync_fence; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++ struct fence **fences, **nfences, **fences1, **fences2; ++#else ++ struct dma_fence **fences, **nfences, **fences1, **fences2; ++#endif ++ int real_num_fences, i, j, num_fences, num_fences1, num_fences2; + -+ /* Grab pages from pool. */ -+ { -+ size_t pool_pages; -+ spin_lock(&mali_mem_os_allocator.pool_lock); -+ pool_pages = min(remaining, mali_mem_os_allocator.pool_count); -+ for (i = pool_pages; i > 0; i--) { -+ BUG_ON(list_empty(&mali_mem_os_allocator.pool_pages)); -+ list_move(mali_mem_os_allocator.pool_pages.next, &pages_list); -+ } -+ mali_mem_os_allocator.pool_count -= pool_pages; -+ remaining -= pool_pages; -+ spin_unlock(&mali_mem_os_allocator.pool_lock); -+ } ++ fences1 = mali_internal_get_fences(sync_fence1, &num_fences1); ++ fences2 = mali_internal_get_fences(sync_fence2, &num_fences2); + -+ /* Process pages from pool. */ -+ i = 0; -+ list_for_each_entry_safe(m_page, m_tmp, &pages_list, list) { -+ BUG_ON(NULL == m_page); ++ num_fences = num_fences1 + num_fences2; + -+ list_move_tail(&m_page->list, &os_mem->pages); ++ fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL); ++ if (!fences) { ++ MALI_PRINT_ERROR(("Mali internal sync:Failed to alloc buffer for fences.\n")); ++ goto fences_alloc_failed; + } + -+ /* Allocate new pages, if needed. */ -+ for (i = 0; i < remaining; i++) { -+ dma_addr_t dma_addr; -+ gfp_t flags = __GFP_ZERO | GFP_HIGHUSER; -+ int err; -+ -+#if defined(CONFIG_ARM) && !defined(CONFIG_ARM_LPAE) -+ flags |= GFP_HIGHUSER; -+#else -+#ifdef CONFIG_ZONE_DMA32 -+ flags |= GFP_DMA32; -+#else -+#ifdef CONFIG_ZONE_DMA ++ for (real_num_fences = i = j = 0; i < num_fences1 && j < num_fences2;) { ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++ struct fence *fence1 = fences1[i]; ++ struct fence *fence2 = fences2[j]; +#else -+ /* arm64 utgard only work on < 4G, but the kernel -+ * didn't provide method to allocte memory < 4G -+ */ -+ MALI_DEBUG_ASSERT(0); -+#endif -+#endif ++ struct dma_fence *fence1 = fences1[i]; ++ struct dma_fence *fence2 = fences2[j]; +#endif ++ if (fence1->context < fence2->context) { ++ mali_internal_add_fence_array(fences, &real_num_fences, fence1); + -+ new_page = alloc_page(flags); -+ -+ if (unlikely(NULL == new_page)) { -+ E("err."); -+ /* Calculate the number of pages actually allocated, and free them. */ -+ os_mem->count = (page_count - remaining) + i; -+ atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages); -+ mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE); -+ return -ENOMEM; -+ } -+ -+ /* Ensure page is flushed from CPU caches. */ -+ dma_addr = dma_map_page(&mali_platform_device->dev, new_page, -+ 0, _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); -+ dma_unmap_page(&mali_platform_device->dev, dma_addr, -+ _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); -+ dma_addr = dma_map_page(&mali_platform_device->dev, new_page, -+ 0, _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); -+ -+ err = dma_mapping_error(&mali_platform_device->dev, dma_addr); -+ if (unlikely(err)) { -+ MALI_DEBUG_PRINT_ERROR(("OS Mem: Failed to DMA map page %p: %u", -+ new_page, err)); -+ __free_page(new_page); -+ os_mem->count = (page_count - remaining) + i; -+ atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages); -+ mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE); -+ return -EFAULT; -+ } ++ i++; ++ } else if (fence1->context > fence2->context) { ++ mali_internal_add_fence_array(fences, &real_num_fences, fence2); + -+ /* Store page phys addr */ -+ SetPagePrivate(new_page); -+ set_page_private(new_page, dma_addr); ++ j++; ++ } else { ++ if (fence1->seqno - fence2->seqno <= INT_MAX) ++ mali_internal_add_fence_array(fences, &real_num_fences, fence1); ++ else ++ mali_internal_add_fence_array(fences, &real_num_fences, fence2); + -+ m_page = _mali_page_node_allocate(MALI_PAGE_NODE_OS); -+ if (unlikely(NULL == m_page)) { -+ MALI_PRINT_ERROR(("OS Mem: Can't allocate mali_page node! \n")); -+ dma_unmap_page(&mali_platform_device->dev, page_private(new_page), -+ _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); -+ ClearPagePrivate(new_page); -+ __free_page(new_page); -+ os_mem->count = (page_count - remaining) + i; -+ atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages); -+ mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE); -+ return -EFAULT; ++ i++; ++ j++; + } -+ m_page->page = new_page; -+ -+ list_add_tail(&m_page->list, &os_mem->pages); + } + -+ atomic_add(page_count, &mali_mem_os_allocator.allocated_pages); -+ -+ if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES > mali_mem_os_allocator.pool_count) { -+ MALI_DEBUG_PRINT(4, ("OS Mem: Stopping pool trim timer, only %u pages on pool\n", mali_mem_os_allocator.pool_count)); -+ cancel_delayed_work(&mali_mem_os_allocator.timed_shrinker); -+ } -+ -+ return 0; -+} -+ -+ -+_mali_osk_errcode_t mali_mem_os_mali_map(mali_mem_os_mem *os_mem, struct mali_session_data *session, u32 vaddr, u32 start_page, u32 mapping_pgae_num, u32 props) -+{ -+ struct mali_page_directory *pagedir = session->page_directory; -+ struct mali_page_node *m_page; -+ u32 virt; -+ u32 prop = props; ++ for (; i < num_fences1; i++) ++ mali_internal_add_fence_array(fences, &real_num_fences, fences1[i]); + -+ MALI_DEBUG_ASSERT_POINTER(session); -+ MALI_DEBUG_ASSERT_POINTER(os_mem); ++ for (; j < num_fences2; j++) ++ mali_internal_add_fence_array(fences, &real_num_fences, fences2[j]); + -+ MALI_DEBUG_ASSERT(start_page <= os_mem->count); -+ MALI_DEBUG_ASSERT((start_page + mapping_pgae_num) <= os_mem->count); ++ if (0 == real_num_fences) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++ fences[real_num_fences++] = fence_get(fences1[0]); ++#else ++ fences[real_num_fences++] = dma_fence_get(fences1[0]); ++#endif + -+ if ((start_page + mapping_pgae_num) == os_mem->count) { ++ if (num_fences > real_num_fences) { ++ nfences = krealloc(fences, real_num_fences * sizeof(*fences), ++ GFP_KERNEL); ++ if (!nfences) ++ goto nfences_alloc_failed; + -+ virt = vaddr + MALI_MMU_PAGE_SIZE * (start_page + mapping_pgae_num); ++ fences = nfences; ++ } + -+ list_for_each_entry_reverse(m_page, &os_mem->pages, list) { ++ sync_fence = (struct mali_internal_sync_fence *)sync_file_create(fences[0]); ++ if (NULL == sync_fence) { ++ MALI_PRINT_ERROR(("Mali internal sync:Failed to create the mali internal sync fence when merging sync fence.\n")); ++ goto sync_fence_alloc_failed; ++ } + -+ virt -= MALI_MMU_PAGE_SIZE; -+ if (mapping_pgae_num > 0) { -+ dma_addr_t phys = page_private(m_page->page); -+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) -+ /* Verify that the "physical" address is 32-bit and -+ * usable for Mali, when on a system with bus addresses -+ * wider than 32-bit. */ -+ MALI_DEBUG_ASSERT(0 == (phys >> 32)); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++ fence_put(fences[0]); ++#else ++ dma_fence_put(fences[0]); +#endif -+ mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop); -+ } else { -+ break; -+ } -+ mapping_pgae_num--; -+ } + -+ } else { -+ u32 i = 0; -+ virt = vaddr; -+ list_for_each_entry(m_page, &os_mem->pages, list) { ++ if (mali_internal_sync_fence_set_fence_array(sync_fence, fences, real_num_fences) < 0) { ++ MALI_PRINT_ERROR(("Mali internal sync:Failed to set fence for sync fence.\n")); ++ goto sync_fence_set_failed; ++ } + -+ if (i >= start_page) { -+ dma_addr_t phys = page_private(m_page->page); ++ return sync_fence; + -+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) -+ /* Verify that the "physical" address is 32-bit and -+ * usable for Mali, when on a system with bus addresses -+ * wider than 32-bit. */ -+ MALI_DEBUG_ASSERT(0 == (phys >> 32)); ++sync_fence_set_failed: ++ fput(sync_fence->file); ++sync_fence_alloc_failed: ++ for (i = 0; i < real_num_fences; i++) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++ fence_put(fences[i]); ++#else ++ dma_fence_put(fences[i]); +#endif -+ mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop); -+ } -+ i++; -+ virt += MALI_MMU_PAGE_SIZE; -+ } -+ } -+ return _MALI_OSK_ERR_OK; ++nfences_alloc_failed: ++ kfree(fences); ++fences_alloc_failed: ++ return NULL; +} ++#endif + -+ -+void mali_mem_os_mali_unmap(mali_mem_allocation *alloc) ++void mali_internal_sync_fence_waiter_init(struct mali_internal_sync_fence_waiter *waiter, ++ mali_internal_sync_callback_t callback) +{ -+ struct mali_session_data *session; -+ MALI_DEBUG_ASSERT_POINTER(alloc); -+ session = alloc->session; -+ MALI_DEBUG_ASSERT_POINTER(session); ++ MALI_DEBUG_ASSERT_POINTER(waiter); ++ MALI_DEBUG_ASSERT_POINTER(callback); + -+ mali_session_memory_lock(session); -+ mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start, -+ alloc->flags); -+ mali_session_memory_unlock(session); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) ++ INIT_LIST_HEAD(&waiter->work.entry); ++#else ++ INIT_LIST_HEAD(&waiter->work.task_list); ++#endif ++ waiter->callback = callback; +} + -+int mali_mem_os_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma) ++int mali_internal_sync_fence_wait_async(struct mali_internal_sync_fence *sync_fence, ++ struct mali_internal_sync_fence_waiter *waiter) +{ -+ mali_mem_os_mem *os_mem = &mem_bkend->os_mem; -+ struct mali_page_node *m_page; -+ struct page *page; -+ int ret; -+ unsigned long addr = vma->vm_start; -+ MALI_DEBUG_ASSERT(MALI_MEM_OS == mem_bkend->type); -+ -+ list_for_each_entry(m_page, &os_mem->pages, list) { -+ /* We should use vm_insert_page, but it does a dcache -+ * flush which makes it way slower than remap_pfn_range or vmf_insert_pfn. -+ ret = vm_insert_page(vma, addr, page); -+ */ -+ page = m_page->page; -+ ret = vmf_insert_pfn(vma, addr, page_to_pfn(page)); -+ -+ if (unlikely(VM_FAULT_NOPAGE != ret)) { -+ return -EFAULT; -+ } -+ addr += _MALI_OSK_MALI_PAGE_SIZE; -+ } ++ int err; ++ unsigned long flags; + -+ return 0; -+} ++ MALI_DEBUG_ASSERT_POINTER(sync_fence); ++ MALI_DEBUG_ASSERT_POINTER(waiter); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) ++ err = atomic_read(&sync_fence->status); + -+_mali_osk_errcode_t mali_mem_os_resize_cpu_map_locked(mali_mem_backend *mem_bkend, struct vm_area_struct *vma, unsigned long start_vaddr, u32 mappig_size) -+{ -+ mali_mem_os_mem *os_mem = &mem_bkend->os_mem; -+ struct mali_page_node *m_page; -+ int ret; -+ int offset; -+ int mapping_page_num; -+ int count ; ++ if (0 > err) ++ return err; + -+ unsigned long vstart = vma->vm_start; -+ count = 0; -+ MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS); -+ MALI_DEBUG_ASSERT(0 == start_vaddr % _MALI_OSK_MALI_PAGE_SIZE); -+ MALI_DEBUG_ASSERT(0 == vstart % _MALI_OSK_MALI_PAGE_SIZE); -+ offset = (start_vaddr - vstart) / _MALI_OSK_MALI_PAGE_SIZE; -+ MALI_DEBUG_ASSERT(offset <= os_mem->count); -+ mapping_page_num = mappig_size / _MALI_OSK_MALI_PAGE_SIZE; -+ MALI_DEBUG_ASSERT((offset + mapping_page_num) <= os_mem->count); ++ if (!err) ++ return 1; + -+ if ((offset + mapping_page_num) == os_mem->count) { ++ init_waitqueue_func_entry(&waiter->work, mali_internal_sync_fence_wake_up_wq); ++ waiter->work.private = sync_fence; + -+ unsigned long vm_end = start_vaddr + mappig_size; ++ spin_lock_irqsave(&sync_fence->wq.lock, flags); ++ err = atomic_read(&sync_fence->status); + -+ list_for_each_entry_reverse(m_page, &os_mem->pages, list) { ++ if (0 < err) ++ __add_wait_queue_tail(&sync_fence->wq, &waiter->work); ++ spin_unlock_irqrestore(&sync_fence->wq.lock, flags); + -+ vm_end -= _MALI_OSK_MALI_PAGE_SIZE; -+ if (mapping_page_num > 0) { -+ ret = vmf_insert_pfn(vma, vm_end, page_to_pfn(m_page->page)); ++ if (0 > err) ++ return err; + -+ if (unlikely(VM_FAULT_NOPAGE != ret)) { -+ /*will return -EBUSY If the page has already been mapped into table, but it's OK*/ -+ if (-EBUSY == ret) { -+ break; -+ } else { -+ MALI_DEBUG_PRINT(1, ("OS Mem: mali_mem_os_resize_cpu_map_locked failed, ret = %d, offset is %d,page_count is %d\n", -+ ret, offset + mapping_page_num, os_mem->count)); -+ } -+ return _MALI_OSK_ERR_FAULT; -+ } -+ } else { -+ break; -+ } -+ mapping_page_num--; ++ return !err; ++#else ++ if ((sync_fence->fence) && (sync_fence->fence->ops) && (sync_fence->fence->ops->signaled)) ++ err = sync_fence->fence->ops->signaled(sync_fence->fence); ++ else ++ err = -1; + -+ } -+ } else { ++ if (0 > err) ++ return err; + -+ list_for_each_entry(m_page, &os_mem->pages, list) { -+ if (count >= offset) { ++ if (1 == err) ++ return err; + -+ ret = vmf_insert_pfn(vma, vstart, page_to_pfn(m_page->page)); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ err = dma_fence_add_callback(sync_fence->fence, &waiter->cb, mali_internal_fence_check_cb_func); ++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) ++ err = fence_add_callback(sync_fence->fence, &waiter->cb, mali_internal_fence_check_cb_func); ++#endif + -+ if (unlikely(VM_FAULT_NOPAGE != ret)) { -+ /*will return -EBUSY If the page has already been mapped into table, but it's OK*/ -+ if (-EBUSY == ret) { -+ break; -+ } else { -+ MALI_DEBUG_PRINT(1, ("OS Mem: mali_mem_os_resize_cpu_map_locked failed, ret = %d, count is %d, offset is %d,page_count is %d\n", -+ ret, count, offset, os_mem->count)); -+ } -+ return _MALI_OSK_ERR_FAULT; -+ } -+ } -+ count++; -+ vstart += _MALI_OSK_MALI_PAGE_SIZE; -+ } ++ if (0 != err) { ++ if (-ENOENT == err) ++ err = 1; ++ return err; + } -+ return _MALI_OSK_ERR_OK; -+} -+ -+u32 mali_mem_os_release(mali_mem_backend *mem_bkend) -+{ -+ -+ mali_mem_allocation *alloc; -+ struct mali_session_data *session; -+ u32 free_pages_nr = 0; -+ MALI_DEBUG_ASSERT_POINTER(mem_bkend); -+ MALI_DEBUG_ASSERT(MALI_MEM_OS == mem_bkend->type); -+ -+ alloc = mem_bkend->mali_allocation; -+ MALI_DEBUG_ASSERT_POINTER(alloc); -+ -+ session = alloc->session; -+ MALI_DEBUG_ASSERT_POINTER(session); ++ init_waitqueue_func_entry(&waiter->work, mali_internal_sync_fence_wake_up_wq); ++ waiter->work.private = sync_fence; + -+ /* Unmap the memory from the mali virtual address space. */ -+ mali_mem_os_mali_unmap(alloc); -+ mutex_lock(&mem_bkend->mutex); -+ /* Free pages */ -+ if (MALI_MEM_BACKEND_FLAG_COWED & mem_bkend->flags) { -+ /* Lock to avoid the free race condition for the cow shared memory page node. */ -+ _mali_osk_mutex_wait(session->cow_lock); -+ free_pages_nr = mali_mem_os_free(&mem_bkend->os_mem.pages, mem_bkend->os_mem.count, MALI_TRUE); -+ _mali_osk_mutex_signal(session->cow_lock); -+ } else { -+ free_pages_nr = mali_mem_os_free(&mem_bkend->os_mem.pages, mem_bkend->os_mem.count, MALI_FALSE); -+ } -+ mutex_unlock(&mem_bkend->mutex); ++ spin_lock_irqsave(&sync_fence->wq.lock, flags); ++ err = sync_fence->fence->ops->signaled(sync_fence->fence); + -+ MALI_DEBUG_PRINT(4, ("OS Mem free : allocated size = 0x%x, free size = 0x%x\n", mem_bkend->os_mem.count * _MALI_OSK_MALI_PAGE_SIZE, -+ free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE)); ++ if (0 == err){ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) ++ __add_wait_queue_entry_tail(&sync_fence->wq, &waiter->work); ++#else ++ __add_wait_queue_tail(&sync_fence->wq, &waiter->work); ++#endif ++ } ++ spin_unlock_irqrestore(&sync_fence->wq.lock, flags); + -+ mem_bkend->os_mem.count = 0; -+ return free_pages_nr; ++ return err; ++#endif +} + -+ -+#define MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE 128 -+static struct { -+ struct { -+ mali_dma_addr phys; -+ mali_io_address mapping; -+ } page[MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE]; -+ size_t count; -+ spinlock_t lock; -+} mali_mem_page_table_page_pool = { -+ .count = 0, -+ .lock = __SPIN_LOCK_UNLOCKED(pool_lock), -+}; -+ -+_mali_osk_errcode_t mali_mem_os_get_table_page(mali_dma_addr *phys, mali_io_address *mapping) ++int mali_internal_sync_fence_cancel_async(struct mali_internal_sync_fence *sync_fence, ++ struct mali_internal_sync_fence_waiter *waiter) +{ -+ _mali_osk_errcode_t ret = _MALI_OSK_ERR_NOMEM; -+ dma_addr_t tmp_phys; -+ -+ spin_lock(&mali_mem_page_table_page_pool.lock); -+ if (0 < mali_mem_page_table_page_pool.count) { -+ u32 i = --mali_mem_page_table_page_pool.count; -+ *phys = mali_mem_page_table_page_pool.page[i].phys; -+ *mapping = mali_mem_page_table_page_pool.page[i].mapping; ++ unsigned long flags; ++ int ret = 0; + -+ ret = _MALI_OSK_ERR_OK; -+ } -+ spin_unlock(&mali_mem_page_table_page_pool.lock); ++ MALI_DEBUG_ASSERT_POINTER(sync_fence); ++ MALI_DEBUG_ASSERT_POINTER(waiter); + -+ if (_MALI_OSK_ERR_OK != ret) { -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) -+ *mapping = dma_alloc_attrs(&mali_platform_device->dev, -+ _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys, -+ GFP_KERNEL, DMA_ATTR_WRITE_COMBINE); ++ spin_lock_irqsave(&sync_fence->wq.lock, flags); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) ++ if (!list_empty(&waiter->work.entry)) ++ list_del_init(&waiter->work.entry); +#else -+ *mapping = dma_alloc_writecombine(&mali_platform_device->dev, -+ _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys, GFP_KERNEL); ++ if (!list_empty(&waiter->work.task_list)) ++ list_del_init(&waiter->work.task_list); +#endif -+ if (NULL != *mapping) { -+ ret = _MALI_OSK_ERR_OK; ++ else ++ ret = -ENOENT; ++ spin_unlock_irqrestore(&sync_fence->wq.lock, flags); + -+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) -+ /* Verify that the "physical" address is 32-bit and -+ * usable for Mali, when on a system with bus addresses -+ * wider than 32-bit. */ -+ MALI_DEBUG_ASSERT(0 == (tmp_phys >> 32)); ++ if (0 == ret) { ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ dma_fence_remove_callback(sync_fence->fence, &waiter->cb); ++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) ++ fence_remove_callback(sync_fence->fence, &waiter->cb); +#endif + -+ *phys = (mali_dma_addr)tmp_phys; -+ } + } + + return ret; +} + -+void mali_mem_os_release_table_page(mali_dma_addr phys, void *virt) -+{ -+ spin_lock(&mali_mem_page_table_page_pool.lock); -+ if (MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE > mali_mem_page_table_page_pool.count) { -+ u32 i = mali_mem_page_table_page_pool.count; -+ mali_mem_page_table_page_pool.page[i].phys = phys; -+ mali_mem_page_table_page_pool.page[i].mapping = virt; -+ -+ ++mali_mem_page_table_page_pool.count; -+ -+ spin_unlock(&mali_mem_page_table_page_pool.lock); -+ } else { -+ spin_unlock(&mali_mem_page_table_page_pool.lock); -+ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) -+ dma_free_attrs(&mali_platform_device->dev, -+ _MALI_OSK_MALI_PAGE_SIZE, virt, phys, -+ DMA_ATTR_WRITE_COMBINE); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++static const char *mali_internal_fence_get_driver_name(struct dma_fence *fence) +#else -+ dma_free_writecombine(&mali_platform_device->dev, -+ _MALI_OSK_MALI_PAGE_SIZE, virt, phys); ++static const char *mali_internal_fence_get_driver_name(struct fence *fence) +#endif -+ } -+} -+ -+void mali_mem_os_free_page_node(struct mali_page_node *m_page) +{ -+ struct page *page = m_page->page; -+ MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_OS); -+ -+ if (1 == page_count(page)) { -+ dma_unmap_page(&mali_platform_device->dev, page_private(page), -+ _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); -+ ClearPagePrivate(page); -+ } -+ __free_page(page); -+ m_page->page = NULL; -+ list_del(&m_page->list); -+ kfree(m_page); -+} -+ -+/* The maximum number of page table pool pages to free in one go. */ -+#define MALI_MEM_OS_CHUNK_TO_FREE 64UL -+ -+/* Free a certain number of pages from the page table page pool. -+ * The pool lock must be held when calling the function, and the lock will be -+ * released before returning. -+ */ -+static void mali_mem_os_page_table_pool_free(size_t nr_to_free) -+{ -+ mali_dma_addr phys_arr[MALI_MEM_OS_CHUNK_TO_FREE]; -+ void *virt_arr[MALI_MEM_OS_CHUNK_TO_FREE]; -+ u32 i; -+ -+ MALI_DEBUG_ASSERT(nr_to_free <= MALI_MEM_OS_CHUNK_TO_FREE); -+ -+ /* Remove nr_to_free pages from the pool and store them locally on stack. */ -+ for (i = 0; i < nr_to_free; i++) { -+ u32 pool_index = mali_mem_page_table_page_pool.count - i - 1; ++ struct mali_internal_sync_point *sync_pt; ++ struct mali_internal_sync_timeline *parent; + -+ phys_arr[i] = mali_mem_page_table_page_pool.page[pool_index].phys; -+ virt_arr[i] = mali_mem_page_table_page_pool.page[pool_index].mapping; -+ } ++ MALI_DEBUG_ASSERT_POINTER(fence); + -+ mali_mem_page_table_page_pool.count -= nr_to_free; ++ sync_pt = mali_internal_fence_to_sync_pt(fence); ++ parent = mali_internal_sync_pt_to_sync_timeline(sync_pt); + -+ spin_unlock(&mali_mem_page_table_page_pool.lock); ++ return parent->ops->driver_name; ++} + -+ /* After releasing the spinlock: free the pages we removed from the pool. */ -+ for (i = 0; i < nr_to_free; i++) { -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) -+ dma_free_attrs(&mali_platform_device->dev, _MALI_OSK_MALI_PAGE_SIZE, -+ virt_arr[i], (dma_addr_t)phys_arr[i], -+ DMA_ATTR_WRITE_COMBINE); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++static const char *mali_internal_fence_get_timeline_name(struct dma_fence *fence) +#else -+ dma_free_writecombine(&mali_platform_device->dev, -+ _MALI_OSK_MALI_PAGE_SIZE, -+ virt_arr[i], (dma_addr_t)phys_arr[i]); ++static const char *mali_internal_fence_get_timeline_name(struct fence *fence) +#endif -+ } -+} -+ -+static void mali_mem_os_trim_page_table_page_pool(void) +{ -+ size_t nr_to_free = 0; -+ size_t nr_to_keep; -+ -+ /* Keep 2 page table pages for each 1024 pages in the page cache. */ -+ nr_to_keep = mali_mem_os_allocator.pool_count / 512; -+ /* And a minimum of eight pages, to accomodate new sessions. */ -+ nr_to_keep += 8; -+ -+ if (0 == spin_trylock(&mali_mem_page_table_page_pool.lock)) return; ++ struct mali_internal_sync_point *sync_pt; ++ struct mali_internal_sync_timeline *parent; + -+ if (nr_to_keep < mali_mem_page_table_page_pool.count) { -+ nr_to_free = mali_mem_page_table_page_pool.count - nr_to_keep; -+ nr_to_free = min((size_t)MALI_MEM_OS_CHUNK_TO_FREE, nr_to_free); -+ } ++ MALI_DEBUG_ASSERT_POINTER(fence); + -+ /* Pool lock will be released by the callee. */ -+ mali_mem_os_page_table_pool_free(nr_to_free); -+} ++ sync_pt = mali_internal_fence_to_sync_pt(fence); ++ parent = mali_internal_sync_pt_to_sync_timeline(sync_pt); + -+static unsigned long mali_mem_os_shrink_count(struct shrinker *shrinker, struct shrink_control *sc) -+{ -+ return mali_mem_os_allocator.pool_count; ++ return parent->name; +} + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) -+static int mali_mem_os_shrink(int nr_to_scan, gfp_t gfp_mask) -+#else -+static int mali_mem_os_shrink(struct shrinker *shrinker, int nr_to_scan, gfp_t gfp_mask) -+#endif /* Linux < 2.6.35 */ -+#else -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) -+static int mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc) ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++static void mali_internal_fence_release(struct dma_fence *fence) +#else -+static unsigned long mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc) -+#endif /* Linux < 3.12.0 */ -+#endif /* Linux < 3.0.0 */ ++static void mali_internal_fence_release(struct fence *fence) ++#endif +{ -+ struct mali_page_node *m_page, *m_tmp; + unsigned long flags; -+ struct list_head *le, pages; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) -+ int nr = nr_to_scan; -+#else -+ int nr = sc->nr_to_scan; -+#endif -+ -+ if (0 == nr) { -+ return mali_mem_os_shrink_count(shrinker, sc); -+ } ++ struct mali_internal_sync_point *sync_pt; ++ struct mali_internal_sync_timeline *parent; + -+ if (0 == spin_trylock_irqsave(&mali_mem_os_allocator.pool_lock, flags)) { -+ /* Not able to lock. */ -+ return -1; -+ } ++ MALI_DEBUG_ASSERT_POINTER(fence); + -+ if (0 == mali_mem_os_allocator.pool_count) { -+ /* No pages availble */ -+ spin_unlock_irqrestore(&mali_mem_os_allocator.pool_lock, flags); -+ return 0; -+ } ++ sync_pt = mali_internal_fence_to_sync_pt(fence); ++ parent = mali_internal_sync_pt_to_sync_timeline(sync_pt); + -+ /* Release from general page pool */ -+ nr = min((size_t)nr, mali_mem_os_allocator.pool_count); -+ mali_mem_os_allocator.pool_count -= nr; -+ list_for_each(le, &mali_mem_os_allocator.pool_pages) { -+ --nr; -+ if (0 == nr) break; -+ } -+ list_cut_position(&pages, &mali_mem_os_allocator.pool_pages, le); -+ spin_unlock_irqrestore(&mali_mem_os_allocator.pool_lock, flags); + -+ list_for_each_entry_safe(m_page, m_tmp, &pages, list) { -+ mali_mem_os_free_page_node(m_page); -+ } ++ spin_lock_irqsave(fence->lock, flags); ++ if (WARN_ON_ONCE(!list_empty(&sync_pt->sync_pt_list))) ++ list_del(&sync_pt->sync_pt_list); ++ spin_unlock_irqrestore(fence->lock, flags); + -+ if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES > mali_mem_os_allocator.pool_count) { -+ /* Pools are empty, stop timer */ -+ MALI_DEBUG_PRINT(5, ("Stopping timer, only %u pages on pool\n", mali_mem_os_allocator.pool_count)); -+ cancel_delayed_work(&mali_mem_os_allocator.timed_shrinker); -+ } ++ if (parent->ops->free_pt) ++ parent->ops->free_pt(sync_pt); + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) -+ return mali_mem_os_shrink_count(shrinker, sc); ++ kref_put(&parent->kref_count, mali_internal_sync_timeline_free); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ dma_fence_free(&sync_pt->base); +#else -+ return nr; ++ fence_free(&sync_pt->base); +#endif +} + -+static void mali_mem_os_trim_pool(struct work_struct *data) ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++static bool mali_internal_fence_signaled(struct dma_fence *fence) ++#else ++static bool mali_internal_fence_signaled(struct fence *fence) ++#endif +{ -+ struct mali_page_node *m_page, *m_tmp; -+ struct list_head *le; -+ LIST_HEAD(pages); -+ size_t nr_to_free; -+ -+ MALI_IGNORE(data); -+ -+ MALI_DEBUG_PRINT(3, ("OS Mem: Trimming pool %u\n", mali_mem_os_allocator.pool_count)); -+ -+ /* Release from general page pool */ -+ spin_lock(&mali_mem_os_allocator.pool_lock); -+ if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) { -+ size_t count = mali_mem_os_allocator.pool_count - MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES; -+ const size_t min_to_free = min(64, MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES); -+ -+ /* Free half the pages on the pool above the static limit. Or 64 pages, 256KB. */ -+ nr_to_free = max(count / 2, min_to_free); -+ -+ mali_mem_os_allocator.pool_count -= nr_to_free; -+ list_for_each(le, &mali_mem_os_allocator.pool_pages) { -+ --nr_to_free; -+ if (0 == nr_to_free) break; -+ } -+ list_cut_position(&pages, &mali_mem_os_allocator.pool_pages, le); -+ } -+ spin_unlock(&mali_mem_os_allocator.pool_lock); -+ -+ list_for_each_entry_safe(m_page, m_tmp, &pages, list) { -+ mali_mem_os_free_page_node(m_page); -+ } -+ -+ /* Release some pages from page table page pool */ -+ mali_mem_os_trim_page_table_page_pool(); -+ -+ if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) { -+ MALI_DEBUG_PRINT(4, ("OS Mem: Starting pool trim timer %u\n", mali_mem_os_allocator.pool_count)); -+ queue_delayed_work(mali_mem_os_allocator.wq, &mali_mem_os_allocator.timed_shrinker, MALI_OS_MEMORY_POOL_TRIM_JIFFIES); -+ } -+} ++ int ret; ++ struct mali_internal_sync_point *sync_pt; ++ struct mali_internal_sync_timeline *parent; + -+_mali_osk_errcode_t mali_mem_os_init(void) -+{ -+ mali_mem_os_allocator.wq = alloc_workqueue("mali-mem", WQ_UNBOUND, 1); -+ if (NULL == mali_mem_os_allocator.wq) { -+ return _MALI_OSK_ERR_NOMEM; -+ } ++ MALI_DEBUG_ASSERT_POINTER(fence); + -+ register_shrinker(&mali_mem_os_allocator.shrinker, "mali-mem"); ++ sync_pt = mali_internal_fence_to_sync_pt(fence); ++ parent = mali_internal_sync_pt_to_sync_timeline(sync_pt); + -+ return _MALI_OSK_ERR_OK; ++ ret = parent->ops->has_signaled(sync_pt); ++ if (0 > ret) ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) \ ++ || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 68))) ++ fence->error = ret; ++#else ++ fence->status = ret; ++#endif ++ return ret; +} + -+void mali_mem_os_term(void) ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++static bool mali_internal_fence_enable_signaling(struct dma_fence *fence) ++#else ++static bool mali_internal_fence_enable_signaling(struct fence *fence) ++#endif +{ -+ struct mali_page_node *m_page, *m_tmp; -+ unregister_shrinker(&mali_mem_os_allocator.shrinker); -+ cancel_delayed_work_sync(&mali_mem_os_allocator.timed_shrinker); -+ -+ if (NULL != mali_mem_os_allocator.wq) { -+ destroy_workqueue(mali_mem_os_allocator.wq); -+ mali_mem_os_allocator.wq = NULL; -+ } -+ -+ spin_lock(&mali_mem_os_allocator.pool_lock); -+ list_for_each_entry_safe(m_page, m_tmp, &mali_mem_os_allocator.pool_pages, list) { -+ mali_mem_os_free_page_node(m_page); -+ -+ --mali_mem_os_allocator.pool_count; -+ } -+ BUG_ON(mali_mem_os_allocator.pool_count); -+ spin_unlock(&mali_mem_os_allocator.pool_lock); ++ struct mali_internal_sync_point *sync_pt; ++ struct mali_internal_sync_timeline *parent; + -+ /* Release from page table page pool */ -+ do { -+ u32 nr_to_free; ++ MALI_DEBUG_ASSERT_POINTER(fence); + -+ spin_lock(&mali_mem_page_table_page_pool.lock); ++ sync_pt = mali_internal_fence_to_sync_pt(fence); ++ parent = mali_internal_sync_pt_to_sync_timeline(sync_pt); + -+ nr_to_free = min((size_t)MALI_MEM_OS_CHUNK_TO_FREE, mali_mem_page_table_page_pool.count); ++ if (mali_internal_fence_signaled(fence)) ++ return false; + -+ /* Pool lock will be released by the callee. */ -+ mali_mem_os_page_table_pool_free(nr_to_free); -+ } while (0 != mali_mem_page_table_page_pool.count); ++ list_add_tail(&sync_pt->sync_pt_list, &parent->sync_pt_list_head); ++ return true; +} + -+_mali_osk_errcode_t mali_memory_core_resource_os_memory(u32 size) ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++static void mali_internal_fence_value_str(struct dma_fence *fence, char *str, int size) ++#else ++static void mali_internal_fence_value_str(struct fence *fence, char *str, int size) ++#endif +{ -+ mali_mem_os_allocator.allocation_limit = size; ++ struct mali_internal_sync_point *sync_pt; ++ struct mali_internal_sync_timeline *parent; + -+ MALI_SUCCESS; -+} ++ MALI_DEBUG_ASSERT_POINTER(fence); ++ MALI_IGNORE(str); ++ MALI_IGNORE(size); + -+u32 mali_mem_os_stat(void) -+{ -+ return atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE; ++ sync_pt = mali_internal_fence_to_sync_pt(fence); ++ parent = mali_internal_sync_pt_to_sync_timeline(sync_pt); ++ ++ parent->ops->print_sync_pt(sync_pt); +} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_os_alloc.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_os_alloc.h ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++static const struct dma_fence_ops fence_ops = { ++#else ++static const struct fence_ops fence_ops = { ++#endif ++ .get_driver_name = mali_internal_fence_get_driver_name, ++ .get_timeline_name = mali_internal_fence_get_timeline_name, ++ .enable_signaling = mali_internal_fence_enable_signaling, ++ .signaled = mali_internal_fence_signaled, ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ .wait = dma_fence_default_wait, ++#else ++ .wait = fence_default_wait, ++#endif ++ .release = mali_internal_fence_release, ++ .fence_value_str = mali_internal_fence_value_str, ++}; ++#endif +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_internal_sync.h b/drivers/gpu/arm/mali400/mali/linux/mali_internal_sync.h new file mode 100644 -index 000000000..8c9b35d0b +index 000000000..dbb29222b --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_os_alloc.h -@@ -0,0 +1,54 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_internal_sync.h +@@ -0,0 +1,191 @@ +/* -+ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2012-2015, 2017-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -315801,57 +317185,194 @@ index 000000000..8c9b35d0b + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_MEMORY_OS_ALLOC_H__ -+#define __MALI_MEMORY_OS_ALLOC_H__ ++/** ++ * @file mali_internal_sync.h ++ * ++ * Mali internal structure/interface for sync. ++ */ + -+#include "mali_osk.h" -+#include "mali_memory_types.h" ++#ifndef _MALI_INTERNAL_SYNC_H ++#define _MALI_INTERNAL_SYNC_H ++#include ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0) ++#include ++#include ++#include ++#include ++#include ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0) ++#include ++#else ++#include ++#endif + ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++#include ++#else ++#include ++#endif + -+/** @brief Release Mali OS memory -+ * -+ * The session memory_lock must be held when calling this function. -+ * -+ * @param mem_bkend Pointer to the mali_mem_backend to release -+ */ -+u32 mali_mem_os_release(mali_mem_backend *mem_bkend); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++#include ++#else ++#include ++#endif ++#endif + -+_mali_osk_errcode_t mali_mem_os_get_table_page(mali_dma_addr *phys, mali_io_address *mapping); ++struct mali_internal_sync_timeline; ++struct mali_internal_sync_point; ++struct mali_internal_sync_fence; + -+void mali_mem_os_release_table_page(mali_dma_addr phys, void *virt); ++struct mali_internal_sync_timeline_ops { ++ const char *driver_name; ++ int (*has_signaled)(struct mali_internal_sync_point *pt); ++ void (*free_pt)(struct mali_internal_sync_point *sync_pt); ++ void (*release_obj)(struct mali_internal_sync_timeline *sync_timeline); ++ void (*print_sync_pt)(struct mali_internal_sync_point *sync_pt); ++}; + -+_mali_osk_errcode_t mali_mem_os_init(void); ++struct mali_internal_sync_timeline { ++ struct kref kref_count; ++ const struct mali_internal_sync_timeline_ops *ops; ++ char name[32]; ++ bool destroyed; ++ int fence_context; ++ int value; ++ spinlock_t sync_pt_list_lock; ++ struct list_head sync_pt_list_head; ++}; + -+void mali_mem_os_term(void); ++struct mali_internal_sync_point { ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ struct dma_fence base; ++#else ++ struct fence base; ++#endif ++ struct list_head sync_pt_list; ++}; + -+u32 mali_mem_os_stat(void); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) ++struct mali_internal_sync_fence_cb { ++ struct fence_cb cb; ++ struct fence *fence; ++ struct mali_internal_sync_fence *sync_file; ++}; ++#endif + -+void mali_mem_os_free_page_node(struct mali_page_node *m_page); ++struct mali_internal_sync_fence { ++ struct file *file; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) ++ struct kref kref; ++#endif ++ char name[32]; ++#ifdef CONFIG_DEBUG_FS ++ struct list_head sync_file_list; ++#endif ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) ++ int num_fences; ++#endif ++ wait_queue_head_t wq; ++#if LINUX_VERSION_CODE > KERNEL_VERSION(4, 12, 0) ++ unsigned long flags; ++#endif ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) ++ atomic_t status; ++ struct mali_internal_sync_fence_cb cbs[]; ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++ struct fence *fence; ++ struct fence_cb cb; ++#else ++ struct dma_fence *fence; ++ struct dma_fence_cb cb; ++#endif ++}; + -+int mali_mem_os_alloc_pages(mali_mem_os_mem *os_mem, u32 size); ++struct mali_internal_sync_fence_waiter; + -+u32 mali_mem_os_free(struct list_head *os_pages, u32 pages_count, mali_bool cow_flag); ++typedef void (*mali_internal_sync_callback_t)(struct mali_internal_sync_fence *sync_fence, ++ struct mali_internal_sync_fence_waiter *waiter); + -+_mali_osk_errcode_t mali_mem_os_put_page(struct page *page); ++struct mali_internal_sync_fence_waiter { ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) ++ wait_queue_entry_t work; ++#else ++ wait_queue_t work; ++#endif ++ mali_internal_sync_callback_t callback; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++ struct fence_cb cb; ++#else ++ struct dma_fence_cb cb; ++#endif ++#endif ++}; + -+_mali_osk_errcode_t mali_mem_os_resize_pages(mali_mem_os_mem *mem_from, mali_mem_os_mem *mem_to, u32 start_page, u32 page_count); ++/** ++ * Create a mali internal sync timeline. ++ * @param ops The implementation ops for the mali internal sync timeline ++ * @param size The size to allocate ++ * @param name The sync_timeline name ++ * @return The new mali internal sync timeline if successful, NULL if not. ++ */ ++struct mali_internal_sync_timeline *mali_internal_sync_timeline_create(const struct mali_internal_sync_timeline_ops *ops, ++ int size, const char *name); + -+_mali_osk_errcode_t mali_mem_os_mali_map(mali_mem_os_mem *os_mem, struct mali_session_data *session, u32 vaddr, u32 start_page, u32 mapping_pgae_num, u32 props); ++/** ++ * Destroy one mali internal sync timeline. ++ * @param sync_timeline The mali internal sync timeline to destroy. ++ */ ++void mali_internal_sync_timeline_destroy(struct mali_internal_sync_timeline *sync_timeline); + -+void mali_mem_os_mali_unmap(mali_mem_allocation *alloc); ++/** ++ * Signal one mali internal sync timeline. ++ * @param sync_timeline The mali internal sync timeline to signal. ++ */ ++void mali_internal_sync_timeline_signal(struct mali_internal_sync_timeline *sync_timeline); + -+int mali_mem_os_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma); ++/** ++ * Create one mali internal sync point. ++ * @param sync_timeline The mali internal sync timeline to add this mali internal sync point. ++ * @return the new mali internal sync point if successful, NULL if not. ++ */ ++struct mali_internal_sync_point *mali_internal_sync_point_create(struct mali_internal_sync_timeline *sync_timeline, int size); + -+_mali_osk_errcode_t mali_mem_os_resize_cpu_map_locked(mali_mem_backend *mem_bkend, struct vm_area_struct *vma, unsigned long start_vaddr, u32 mappig_size); ++/** ++ * Merge mali internal sync fences ++ * @param sync_fence1 The mali internal sync fence to merge ++ * @param sync_fence2 The mali internal sync fence to merge ++ * @return the new mali internal sync fence if successful, NULL if not. ++ */ ++struct mali_internal_sync_fence *mali_internal_sync_fence_merge(struct mali_internal_sync_fence *sync_fence1, ++ struct mali_internal_sync_fence *sync_fence2); + -+#endif /* __MALI_MEMORY_OS_ALLOC_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_secure.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_secure.c ++/** ++ * Get the mali internal sync fence from sync fd ++ * @param fd The sync handle to get the mali internal sync fence ++ * @return the mali internal sync fence if successful, NULL if not. ++ */ ++struct mali_internal_sync_fence *mali_internal_sync_fence_fdget(int fd); ++ ++ ++void mali_internal_sync_fence_waiter_init(struct mali_internal_sync_fence_waiter *waiter, ++ mali_internal_sync_callback_t callback); ++ ++int mali_internal_sync_fence_wait_async(struct mali_internal_sync_fence *sync_fence, ++ struct mali_internal_sync_fence_waiter *waiter); ++ ++int mali_internal_sync_fence_cancel_async(struct mali_internal_sync_fence *sync_fence, ++ struct mali_internal_sync_fence_waiter *waiter); ++ ++#endif /*LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)*/ ++#endif /* _MALI_INTERNAL_SYNC_H */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_kernel_linux.c b/drivers/gpu/arm/mali400/mali/linux/mali_kernel_linux.c new file mode 100644 -index 000000000..63506bfbe +index 000000000..10ee22028 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_secure.c -@@ -0,0 +1,170 @@ -+/* ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_kernel_linux.c +@@ -0,0 +1,1168 @@ ++/** + * Copyright (C) 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 @@ -315861,2792 +317382,2630 @@ index 000000000..63506bfbe + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#include "mali_kernel_common.h" -+#include "mali_memory.h" -+#include "mali_memory_secure.h" -+#include "mali_osk.h" -+#include -+#include -+#include -+#include -+ -+_mali_osk_errcode_t mali_mem_secure_attach_dma_buf(mali_mem_secure *secure_mem, u32 size, int mem_fd) -+{ -+ struct dma_buf *buf; -+ MALI_DEBUG_ASSERT_POINTER(secure_mem); + -+ /* get dma buffer */ -+ buf = dma_buf_get(mem_fd); -+ if (IS_ERR_OR_NULL(buf)) { -+ MALI_DEBUG_PRINT_ERROR(("Failed to get dma buf!\n")); -+ return _MALI_OSK_ERR_FAULT; -+ } ++/** ++ * @file mali_kernel_linux.c ++ * Implementation of the Linux device driver entrypoints ++ */ ++#include "../platform/rk/custom_log.h" ++#include "../platform/rk/rk_ext.h" + -+ if (size != buf->size) { -+ MALI_DEBUG_PRINT_ERROR(("The secure mem size not match to the dma buf size!\n")); -+ goto failed_alloc_mem; -+ } ++#include /* kernel module definitions */ ++#include /* file system operations */ ++#include /* character device definitions */ ++#include /* memory manager definitions */ ++#include ++#include ++#include ++#include "mali_kernel_license.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ secure_mem->buf = buf; -+ secure_mem->attachment = dma_buf_attach(secure_mem->buf, &mali_platform_device->dev); -+ if (NULL == secure_mem->attachment) { -+ MALI_DEBUG_PRINT_ERROR(("Failed to get dma buf attachment!\n")); -+ goto failed_dma_attach; -+ } ++#include "mali_kernel_common.h" ++#include "mali_session.h" ++#include "mali_kernel_core.h" ++#include "mali_osk.h" ++#include "mali_kernel_linux.h" ++#include "mali_ukk.h" ++#include "mali_ukk_wrappers.h" ++#include "mali_kernel_sysfs.h" ++#include "mali_pm.h" ++#include "mali_kernel_license.h" ++#include "mali_memory.h" ++#include "mali_memory_dma_buf.h" ++#include "mali_memory_manager.h" ++#include "mali_memory_swap_alloc.h" ++#if defined(CONFIG_MALI400_INTERNAL_PROFILING) ++#include "mali_profiling_internal.h" ++#endif ++#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS) ++#include "mali_osk_profiling.h" ++#include "mali_dvfs_policy.h" + -+ secure_mem->sgt = dma_buf_map_attachment(secure_mem->attachment, DMA_BIDIRECTIONAL); -+ if (IS_ERR_OR_NULL(secure_mem->sgt)) { -+ MALI_DEBUG_PRINT_ERROR(("Failed to map dma buf attachment\n")); -+ goto failed_dma_map; -+ } ++static int is_first_resume = 1; ++/*Store the clk and vol for boot/insmod and mali_resume*/ ++static struct mali_gpu_clk_item mali_gpu_clk[2]; ++#endif + -+ secure_mem->count = size / MALI_MMU_PAGE_SIZE; ++/* Streamline support for the Mali driver */ ++#if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_MALI400_PROFILING) ++/* Ask Linux to create the tracepoints */ ++#define CREATE_TRACE_POINTS ++#include "mali_linux_trace.h" + -+ return _MALI_OSK_ERR_OK; ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_event); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_hw_counter); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_sw_counters); ++#endif /* CONFIG_TRACEPOINTS */ + -+failed_dma_map: -+ dma_buf_detach(secure_mem->buf, secure_mem->attachment); -+failed_dma_attach: -+failed_alloc_mem: -+ dma_buf_put(buf); -+ return _MALI_OSK_ERR_FAULT; -+} ++#ifdef CONFIG_MALI_DEVFREQ ++#include "mali_devfreq.h" ++#include "mali_osk_mali.h" + -+_mali_osk_errcode_t mali_mem_secure_mali_map(mali_mem_secure *secure_mem, struct mali_session_data *session, u32 vaddr, u32 props) -+{ -+ struct mali_page_directory *pagedir; -+ struct scatterlist *sg; -+ u32 virt = vaddr; -+ u32 prop = props; -+ int i; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) ++#include ++#else ++/* In 3.13 the OPP include header file, types, and functions were all ++ * renamed. Use the old filename for the include, and define the new names to ++ * the old, when an old kernel is detected. ++ */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) ++#include ++#else ++#include ++#endif /* Linux >= 3.13*/ ++#define dev_pm_opp_of_add_table of_init_opp_table ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) ++#define dev_pm_opp_of_remove_table of_free_opp_table ++#endif /* Linux >= 3.19 */ ++#endif /* Linux >= 4.4.0 */ ++#endif + -+ MALI_DEBUG_ASSERT_POINTER(secure_mem); -+ MALI_DEBUG_ASSERT_POINTER(secure_mem->sgt); -+ MALI_DEBUG_ASSERT_POINTER(session); ++/* from the __malidrv_build_info.c file that is generated during build */ ++extern const char *__malidrv_build_info(void); + -+ pagedir = session->page_directory; ++/* Module parameter to control log level */ ++int mali_debug_level = 2; ++module_param(mali_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */ ++MODULE_PARM_DESC(mali_debug_level, "Higher number, more dmesg output"); + -+ for_each_sg(secure_mem->sgt->sgl, sg, secure_mem->sgt->nents, i) { -+ u32 size = sg_dma_len(sg); -+ dma_addr_t phys = sg_dma_address(sg); ++extern int mali_max_job_runtime; ++module_param(mali_max_job_runtime, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); ++MODULE_PARM_DESC(mali_max_job_runtime, "Maximum allowed job runtime in msecs.\nJobs will be killed after this no matter what"); + -+ /* sg must be page aligned. */ -+ MALI_DEBUG_ASSERT(0 == size % MALI_MMU_PAGE_SIZE); -+ MALI_DEBUG_ASSERT(0 == (phys & ~(uintptr_t)0xFFFFFFFF)); ++extern int mali_l2_max_reads; ++module_param(mali_l2_max_reads, int, S_IRUSR | S_IRGRP | S_IROTH); ++MODULE_PARM_DESC(mali_l2_max_reads, "Maximum reads for Mali L2 cache"); + -+ mali_mmu_pagedir_update(pagedir, virt, phys, size, prop); ++extern unsigned int mali_dedicated_mem_start; ++module_param(mali_dedicated_mem_start, uint, S_IRUSR | S_IRGRP | S_IROTH); ++MODULE_PARM_DESC(mali_dedicated_mem_start, "Physical start address of dedicated Mali GPU memory."); + -+ MALI_DEBUG_PRINT(3, ("The secure mem physical address: 0x%x gpu virtual address: 0x%x! \n", phys, virt)); -+ virt += size; -+ } ++extern unsigned int mali_dedicated_mem_size; ++module_param(mali_dedicated_mem_size, uint, S_IRUSR | S_IRGRP | S_IROTH); ++MODULE_PARM_DESC(mali_dedicated_mem_size, "Size of dedicated Mali GPU memory."); + -+ return _MALI_OSK_ERR_OK; -+} ++extern unsigned int mali_shared_mem_size; ++module_param(mali_shared_mem_size, uint, S_IRUSR | S_IRGRP | S_IROTH); ++MODULE_PARM_DESC(mali_shared_mem_size, "Size of shared Mali GPU memory."); + -+void mali_mem_secure_mali_unmap(mali_mem_allocation *alloc) -+{ -+ struct mali_session_data *session; -+ MALI_DEBUG_ASSERT_POINTER(alloc); -+ session = alloc->session; -+ MALI_DEBUG_ASSERT_POINTER(session); ++#if defined(CONFIG_MALI400_PROFILING) ++extern int mali_boot_profiling; ++module_param(mali_boot_profiling, int, S_IRUSR | S_IRGRP | S_IROTH); ++MODULE_PARM_DESC(mali_boot_profiling, "Start profiling as a part of Mali driver initialization"); ++#endif + -+ mali_session_memory_lock(session); -+ mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start, -+ alloc->flags); -+ mali_session_memory_unlock(session); -+} ++extern int mali_max_pp_cores_group_1; ++module_param(mali_max_pp_cores_group_1, int, S_IRUSR | S_IRGRP | S_IROTH); ++MODULE_PARM_DESC(mali_max_pp_cores_group_1, "Limit the number of PP cores to use from first PP group."); + ++extern int mali_max_pp_cores_group_2; ++module_param(mali_max_pp_cores_group_2, int, S_IRUSR | S_IRGRP | S_IROTH); ++MODULE_PARM_DESC(mali_max_pp_cores_group_2, "Limit the number of PP cores to use from second PP group (Mali-450 only)."); + -+int mali_mem_secure_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma) -+{ ++extern unsigned int mali_mem_swap_out_threshold_value; ++module_param(mali_mem_swap_out_threshold_value, uint, S_IRUSR | S_IRGRP | S_IROTH); ++MODULE_PARM_DESC(mali_mem_swap_out_threshold_value, "Threshold value used to limit how much swappable memory cached in Mali driver."); + -+ int ret = 0; -+ struct scatterlist *sg; -+ mali_mem_secure *secure_mem = &mem_bkend->secure_mem; -+ unsigned long addr = vma->vm_start; -+ int i; ++#if defined(CONFIG_MALI_DVFS) ++/** the max fps the same as display vsync default 60, can set by module insert parameter */ ++extern int mali_max_system_fps; ++module_param(mali_max_system_fps, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); ++MODULE_PARM_DESC(mali_max_system_fps, "Max system fps the same as display VSYNC."); + -+ MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_SECURE); ++/** a lower limit on their desired FPS default 58, can set by module insert parameter*/ ++extern int mali_desired_fps; ++module_param(mali_desired_fps, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); ++MODULE_PARM_DESC(mali_desired_fps, "A bit lower than max_system_fps which user desired fps"); ++#endif + -+ for_each_sg(secure_mem->sgt->sgl, sg, secure_mem->sgt->nents, i) { -+ phys_addr_t phys; -+ dma_addr_t dev_addr; -+ u32 size, j; -+ dev_addr = sg_dma_address(sg); -+#if defined(CONFIG_ARM64) ||LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) -+ phys = dma_to_phys(&mali_platform_device->dev, dev_addr); -+#else -+ phys = page_to_phys(pfn_to_page(dma_to_pfn(&mali_platform_device->dev, dev_addr))); ++#if MALI_ENABLE_CPU_CYCLES ++#include ++#include ++#include ++static struct timer_list mali_init_cpu_clock_timers[8]; ++static u32 mali_cpu_clock_last_value[8] = {0,}; +#endif -+ size = sg_dma_len(sg); -+ MALI_DEBUG_ASSERT(0 == size % _MALI_OSK_MALI_PAGE_SIZE); + -+ for (j = 0; j < size / _MALI_OSK_MALI_PAGE_SIZE; j++) { -+ ret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys)); ++/* Export symbols from common code: mali_user_settings.c */ ++#include "mali_user_settings_db.h" ++EXPORT_SYMBOL(mali_set_user_setting); ++EXPORT_SYMBOL(mali_get_user_setting); + -+ if (unlikely(VM_FAULT_NOPAGE != ret)) { -+ return -EFAULT; -+ } -+ addr += _MALI_OSK_MALI_PAGE_SIZE; -+ phys += _MALI_OSK_MALI_PAGE_SIZE; ++static char mali_dev_name[] = "mali"; /* should be const, but the functions we call requires non-cost */ + -+ MALI_DEBUG_PRINT(3, ("The secure mem physical address: 0x%x , cpu virtual address: 0x%x! \n", phys, addr)); -+ } -+ } -+ return ret; -+} ++/* This driver only supports one Mali device, and this variable stores this single platform device */ ++struct platform_device *mali_platform_device = NULL; + -+u32 mali_mem_secure_release(mali_mem_backend *mem_bkend) -+{ -+ struct mali_mem_secure *mem; -+ mali_mem_allocation *alloc = mem_bkend->mali_allocation; -+ u32 free_pages_nr = 0; -+ MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_SECURE); ++/* This driver only supports one Mali device, and this variable stores the exposed misc device (/dev/mali) */ ++static struct miscdevice mali_miscdevice = { 0, }; + -+ mem = &mem_bkend->secure_mem; -+ MALI_DEBUG_ASSERT_POINTER(mem->attachment); -+ MALI_DEBUG_ASSERT_POINTER(mem->buf); -+ MALI_DEBUG_ASSERT_POINTER(mem->sgt); -+ /* Unmap the memory from the mali virtual address space. */ -+ mali_mem_secure_mali_unmap(alloc); -+ mutex_lock(&mem_bkend->mutex); -+ dma_buf_unmap_attachment(mem->attachment, mem->sgt, DMA_BIDIRECTIONAL); -+ dma_buf_detach(mem->buf, mem->attachment); -+ dma_buf_put(mem->buf); -+ mutex_unlock(&mem_bkend->mutex); ++static int mali_miscdevice_register(struct platform_device *pdev); ++static void mali_miscdevice_unregister(void); + -+ free_pages_nr = mem->count; ++static int mali_open(struct inode *inode, struct file *filp); ++static int mali_release(struct inode *inode, struct file *filp); ++static long mali_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); + -+ return free_pages_nr; -+} ++static int mali_probe(struct platform_device *pdev); ++static int mali_remove(struct platform_device *pdev); + ++static int mali_driver_suspend_scheduler(struct device *dev); ++static int mali_driver_resume_scheduler(struct device *dev); + -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_secure.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_secure.h -new file mode 100644 -index 000000000..48691d479 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_secure.h -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2010, 2013, 2015-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+#ifndef __MALI_MEMORY_SECURE_H__ -+#define __MALI_MEMORY_SECURE_H__ ++#ifdef CONFIG_PM_RUNTIME ++static int mali_driver_runtime_suspend(struct device *dev); ++static int mali_driver_runtime_resume(struct device *dev); ++static int mali_driver_runtime_idle(struct device *dev); ++#endif + -+#include "mali_session.h" -+#include "mali_memory.h" -+#include ++#if defined(MALI_FAKE_PLATFORM_DEVICE) ++#if defined(CONFIG_MALI_DT) ++extern int mali_platform_device_init(struct platform_device *device); ++extern int mali_platform_device_deinit(struct platform_device *device); ++#else ++extern int mali_platform_device_register(void); ++extern int mali_platform_device_unregister(void); ++#endif ++#endif + -+#include "mali_memory_types.h" ++extern int rk_platform_init_opp_table(struct mali_device *mdev); ++extern void rk_platform_uninit_opp_table(struct mali_device *mdev); + -+_mali_osk_errcode_t mali_mem_secure_attach_dma_buf(mali_mem_secure *secure_mem, u32 size, int mem_fd); ++/* Linux power management operations provided by the Mali device driver */ ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29)) ++struct pm_ext_ops mali_dev_ext_pm_ops = { ++ .base = ++ { ++ .suspend = mali_driver_suspend_scheduler, ++ .resume = mali_driver_resume_scheduler, ++ .freeze = mali_driver_suspend_scheduler, ++ .thaw = mali_driver_resume_scheduler, ++ }, ++}; ++#else ++static const struct dev_pm_ops mali_dev_pm_ops = { ++#ifdef CONFIG_PM_RUNTIME ++ .runtime_suspend = mali_driver_runtime_suspend, ++ .runtime_resume = mali_driver_runtime_resume, ++ .runtime_idle = mali_driver_runtime_idle, ++#endif ++ .suspend = mali_driver_suspend_scheduler, ++ .resume = mali_driver_resume_scheduler, ++ .freeze = mali_driver_suspend_scheduler, ++ .thaw = mali_driver_resume_scheduler, ++ .poweroff = mali_driver_suspend_scheduler, ++}; ++#endif + -+_mali_osk_errcode_t mali_mem_secure_mali_map(mali_mem_secure *secure_mem, struct mali_session_data *session, u32 vaddr, u32 props); ++#ifdef CONFIG_MALI_DT ++static struct of_device_id base_dt_ids[] = { ++ {.compatible = "arm,mali-300"}, ++ /*-------------------------------------------------------*/ ++ /* rk_ext : to use dts_for_mali_ko_befor_r5p0-01rel0. */ ++ // {.compatible = "arm,mali-400"}, ++ {.compatible = "arm,mali400"}, ++ /*-------------------------------------------------------*/ ++ {.compatible = "arm,mali-450"}, ++ {.compatible = "arm,mali-470"}, ++ {}, ++}; + -+void mali_mem_secure_mali_unmap(mali_mem_allocation *alloc); ++MODULE_DEVICE_TABLE(of, base_dt_ids); ++#endif + -+int mali_mem_secure_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma); ++/* The Mali device driver struct */ ++static struct platform_driver mali_platform_driver = { ++ .probe = mali_probe, ++ .remove = mali_remove, ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29)) ++ .pm = &mali_dev_ext_pm_ops, ++#endif ++ .driver = ++ { ++ .name = MALI_GPU_NAME_UTGARD, ++ .owner = THIS_MODULE, ++ .bus = &platform_bus_type, ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)) ++ .pm = &mali_dev_pm_ops, ++#endif ++#ifdef CONFIG_MALI_DT ++ .of_match_table = of_match_ptr(base_dt_ids), ++#endif ++ }, ++}; + -+u32 mali_mem_secure_release(mali_mem_backend *mem_bkend); ++/* Linux misc device operations (/dev/mali) */ ++struct file_operations mali_fops = { ++ .owner = THIS_MODULE, ++ .open = mali_open, ++ .release = mali_release, ++ .unlocked_ioctl = mali_ioctl, ++ .compat_ioctl = mali_ioctl, ++ .mmap = mali_mmap ++}; + -+#endif /* __MALI_MEMORY_SECURE_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_swap_alloc.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_swap_alloc.c -new file mode 100644 -index 000000000..d682785b9 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_swap_alloc.c -@@ -0,0 +1,943 @@ -+/* -+ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++#if MALI_ENABLE_CPU_CYCLES ++void mali_init_cpu_time_counters(int reset, int enable_divide_by_64) ++{ ++ /* The CPU assembly reference used is: ARM Architecture Reference Manual ARMv7-AR C.b */ ++ u32 write_value; + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "mali_osk.h" -+#include "mali_osk_mali.h" -+#include "mali_memory.h" -+#include "mali_memory_manager.h" -+#include "mali_memory_virtual.h" -+#include "mali_memory_cow.h" -+#include "mali_ukk.h" -+#include "mali_kernel_utilization.h" -+#include "mali_memory_swap_alloc.h" ++ /* See B4.1.116 PMCNTENSET, Performance Monitors Count Enable Set register, VMSA */ ++ /* setting p15 c9 c12 1 to 0x8000000f==CPU_CYCLE_ENABLE |EVENT_3_ENABLE|EVENT_2_ENABLE|EVENT_1_ENABLE|EVENT_0_ENABLE */ ++ asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(0x8000000f)); + + -+static struct _mali_osk_bitmap idx_mgr; -+static struct file *global_swap_file; -+static struct address_space *global_swap_space; -+static _mali_osk_wq_work_t *mali_mem_swap_out_workq = NULL; -+static u32 mem_backend_swapped_pool_size; -+#ifdef MALI_MEM_SWAP_TRACKING -+static u32 mem_backend_swapped_unlock_size; -+#endif -+/* Lock order: mem_backend_swapped_pool_lock > each memory backend's mutex lock. -+ * This lock used to protect mem_backend_swapped_pool_size and mem_backend_swapped_pool. */ -+static struct mutex mem_backend_swapped_pool_lock; -+static struct list_head mem_backend_swapped_pool; ++ /* See B4.1.117 PMCR, Performance Monitors Control Register. Writing to p15, c9, c12, 0 */ ++ write_value = 1 << 0; /* Bit 0 set. Enable counters */ ++ if (reset) { ++ write_value |= 1 << 1; /* Reset event counters */ ++ write_value |= 1 << 2; /* Reset cycle counter */ ++ } ++ if (enable_divide_by_64) { ++ write_value |= 1 << 3; /* Enable the Clock divider by 64 */ ++ } ++ write_value |= 1 << 4; /* Export enable. Not needed */ ++ asm volatile("MCR p15, 0, %0, c9, c12, 0\t\n" :: "r"(write_value)); + -+extern struct mali_mem_os_allocator mali_mem_os_allocator; ++ /* PMOVSR Overflow Flag Status Register - Clear Clock and Event overflows */ ++ asm volatile("MCR p15, 0, %0, c9, c12, 3\t\n" :: "r"(0x8000000f)); + -+#define MALI_SWAP_LOW_MEM_DEFAULT_VALUE (60*1024*1024) -+#define MALI_SWAP_INVALIDATE_MALI_ADDRESS (0) /* Used to mark the given memory cookie is invalidate. */ -+#define MALI_SWAP_GLOBAL_SWAP_FILE_SIZE (0xFFFFFFFF) -+#define MALI_SWAP_GLOBAL_SWAP_FILE_INDEX \ -+ ((MALI_SWAP_GLOBAL_SWAP_FILE_SIZE) >> PAGE_SHIFT) -+#define MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE (1 << 15) /* Reserved for CoW nonlinear swap backend memory, the space size is 128MB. */ + -+unsigned int mali_mem_swap_out_threshold_value = MALI_SWAP_LOW_MEM_DEFAULT_VALUE; ++ /* See B4.1.124 PMUSERENR - setting p15 c9 c14 to 1" */ ++ /* User mode access to the Performance Monitors enabled. */ ++ /* Lets User space read cpu clock cycles */ ++ asm volatile("mcr p15, 0, %0, c9, c14, 0" :: "r"(1)); ++} + -+/** -+ * We have two situations to do shrinking things, one is we met low GPU utilization which shows GPU needn't touch too -+ * swappable backends in short time, and the other one is we add new swappable backends, the total pool size exceed -+ * the threshold value of the swapped pool size. ++/** A timer function that configures the cycle clock counter on current CPU. ++ * The function \a mali_init_cpu_time_counters_on_all_cpus sets up this ++ * function to trigger on all Cpus during module load. + */ -+typedef enum { -+ MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION = 100, -+ MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS = 257, -+} _mali_mem_swap_pool_shrink_type_t; ++static void mali_init_cpu_clock_timer_func(unsigned long data) ++{ ++ int reset_counters, enable_divide_clock_counter_by_64; ++ int current_cpu = raw_smp_processor_id(); ++ unsigned int sample0; ++ unsigned int sample1; + -+static void mali_mem_swap_swapped_bkend_pool_check_for_low_utilization(void *arg); ++ MALI_IGNORE(data); + -+_mali_osk_errcode_t mali_mem_swap_init(void) ++ reset_counters = 1; ++ enable_divide_clock_counter_by_64 = 0; ++ mali_init_cpu_time_counters(reset_counters, enable_divide_clock_counter_by_64); ++ ++ sample0 = mali_get_cpu_cyclecount(); ++ sample1 = mali_get_cpu_cyclecount(); ++ ++ MALI_DEBUG_PRINT(3, ("Init Cpu %d cycle counter- First two samples: %08x %08x \n", current_cpu, sample0, sample1)); ++} ++ ++/** A timer functions for storing current time on all cpus. ++ * Used for checking if the clocks have similar values or if they are drifting. ++ */ ++static void mali_print_cpu_clock_timer_func(unsigned long data) +{ -+ gfp_t flags = __GFP_NORETRY | __GFP_NOWARN; ++ int current_cpu = raw_smp_processor_id(); ++ unsigned int sample0; + -+ if (_MALI_OSK_ERR_OK != _mali_osk_bitmap_init(&idx_mgr, MALI_SWAP_GLOBAL_SWAP_FILE_INDEX, MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE)) { -+ return _MALI_OSK_ERR_NOMEM; ++ MALI_IGNORE(data); ++ sample0 = mali_get_cpu_cyclecount(); ++ if (current_cpu < 8) { ++ mali_cpu_clock_last_value[current_cpu] = sample0; + } ++} + -+ global_swap_file = shmem_file_setup("mali_swap", MALI_SWAP_GLOBAL_SWAP_FILE_SIZE, VM_NORESERVE); -+ if (IS_ERR(global_swap_file)) { -+ _mali_osk_bitmap_term(&idx_mgr); -+ return _MALI_OSK_ERR_NOMEM; -+ } ++/** Init the performance registers on all CPUs to count clock cycles. ++ * For init \a print_only should be 0. ++ * If \a print_only is 1, it will intead print the current clock value of all CPUs. ++ */ ++void mali_init_cpu_time_counters_on_all_cpus(int print_only) ++{ ++ int i = 0; ++ int cpu_number; ++ int jiffies_trigger; ++ int jiffies_wait; + -+ global_swap_space = global_swap_file->f_path.dentry->d_inode->i_mapping; ++ jiffies_wait = 2; ++ jiffies_trigger = jiffies + jiffies_wait; + -+ mali_mem_swap_out_workq = _mali_osk_wq_create_work(mali_mem_swap_swapped_bkend_pool_check_for_low_utilization, NULL); -+ if (NULL == mali_mem_swap_out_workq) { -+ _mali_osk_bitmap_term(&idx_mgr); -+ fput(global_swap_file); -+ return _MALI_OSK_ERR_NOMEM; ++ for (i = 0 ; i < 8 ; i++) { ++ init_timer(&mali_init_cpu_clock_timers[i]); ++ if (print_only) mali_init_cpu_clock_timers[i].function = mali_print_cpu_clock_timer_func; ++ else mali_init_cpu_clock_timers[i].function = mali_init_cpu_clock_timer_func; ++ mali_init_cpu_clock_timers[i].expires = jiffies_trigger ; ++ } ++ cpu_number = cpumask_first(cpu_online_mask); ++ for (i = 0 ; i < 8 ; i++) { ++ int next_cpu; ++ add_timer_on(&mali_init_cpu_clock_timers[i], cpu_number); ++ next_cpu = cpumask_next(cpu_number, cpu_online_mask); ++ if (next_cpu >= nr_cpu_ids) break; ++ cpu_number = next_cpu; + } + -+#if defined(CONFIG_ARM) && !defined(CONFIG_ARM_LPAE) -+ flags |= GFP_HIGHUSER; -+#else -+#ifdef CONFIG_ZONE_DMA32 -+ flags |= GFP_DMA32; -+#else -+#ifdef CONFIG_ZONE_DMA -+ flags |= GFP_DMA; -+#else -+ /* arm64 utgard only work on < 4G, but the kernel -+ * didn't provide method to allocte memory < 4G -+ */ -+ MALI_DEBUG_ASSERT(0); -+#endif -+#endif -+#endif ++ while (jiffies_wait) jiffies_wait = schedule_timeout_uninterruptible(jiffies_wait); + -+ /* When we use shmem_read_mapping_page to allocate/swap-in, it will -+ * use these flags to allocate new page if need.*/ -+ mapping_set_gfp_mask(global_swap_space, flags); ++ for (i = 0 ; i < 8 ; i++) { ++ del_timer_sync(&mali_init_cpu_clock_timers[i]); ++ } + -+ mem_backend_swapped_pool_size = 0; -+#ifdef MALI_MEM_SWAP_TRACKING -+ mem_backend_swapped_unlock_size = 0; ++ if (print_only) { ++ if ((0 == mali_cpu_clock_last_value[2]) && (0 == mali_cpu_clock_last_value[3])) { ++ /* Diff can be printed if we want to check if the clocks are in sync ++ int diff = mali_cpu_clock_last_value[0] - mali_cpu_clock_last_value[1];*/ ++ MALI_DEBUG_PRINT(2, ("CPU cycle counters readout all: %08x %08x\n", mali_cpu_clock_last_value[0], mali_cpu_clock_last_value[1])); ++ } else { ++ MALI_DEBUG_PRINT(2, ("CPU cycle counters readout all: %08x %08x %08x %08x\n", mali_cpu_clock_last_value[0], mali_cpu_clock_last_value[1], mali_cpu_clock_last_value[2], mali_cpu_clock_last_value[3])); ++ } ++ } ++} +#endif -+ mutex_init(&mem_backend_swapped_pool_lock); -+ INIT_LIST_HEAD(&mem_backend_swapped_pool); + -+ MALI_DEBUG_PRINT(2, ("Mali SWAP: Swap out threshold vaule is %uM\n", mali_mem_swap_out_threshold_value >> 20)); ++int mali_module_init(void) ++{ ++ int err = 0; + -+ return _MALI_OSK_ERR_OK; -+} ++ MALI_DEBUG_PRINT(2, ("Inserting Mali v%d device driver. \n", _MALI_API_VERSION)); ++ MALI_DEBUG_PRINT(2, ("Compiled: %s, time: %s.\n", __DATE__, __TIME__)); ++ MALI_DEBUG_PRINT(2, ("Driver revision: %s\n", SVN_REV_STRING)); ++ ++ I("svn_rev_string_from_arm of this mali_ko is '%s', rk_ko_ver is '%d', built at '%s', on '%s'.", ++ SVN_REV_STRING, ++ RK_KO_VER, ++ __TIME__, ++ __DATE__); + -+void mali_mem_swap_term(void) -+{ -+ _mali_osk_bitmap_term(&idx_mgr); ++#if MALI_ENABLE_CPU_CYCLES ++ mali_init_cpu_time_counters_on_all_cpus(0); ++ MALI_DEBUG_PRINT(2, ("CPU cycle counter setup complete\n")); ++ /* Printing the current cpu counters */ ++ mali_init_cpu_time_counters_on_all_cpus(1); ++#endif + -+ fput(global_swap_file); ++ /* Initialize module wide settings */ ++#ifdef MALI_FAKE_PLATFORM_DEVICE ++#ifndef CONFIG_MALI_DT ++ MALI_DEBUG_PRINT(2, ("mali_module_init() registering device\n")); ++ err = mali_platform_device_register(); ++ if (0 != err) { ++ return err; ++ } ++#endif ++#endif + -+ _mali_osk_wq_delete_work(mali_mem_swap_out_workq); ++ MALI_DEBUG_PRINT(2, ("mali_module_init() registering driver\n")); + -+ MALI_DEBUG_ASSERT(list_empty(&mem_backend_swapped_pool)); -+ MALI_DEBUG_ASSERT(0 == mem_backend_swapped_pool_size); ++ err = platform_driver_register(&mali_platform_driver); + -+ return; -+} ++ if (0 != err) { ++ MALI_DEBUG_PRINT(2, ("mali_module_init() Failed to register driver (%d)\n", err)); ++#ifdef MALI_FAKE_PLATFORM_DEVICE ++#ifndef CONFIG_MALI_DT ++ mali_platform_device_unregister(); ++#endif ++#endif ++ mali_platform_device = NULL; ++ return err; ++ } + -+struct file *mali_mem_swap_get_global_swap_file(void) -+{ -+ return global_swap_file; -+} ++#if defined(CONFIG_MALI400_INTERNAL_PROFILING) ++ err = _mali_internal_profiling_init(mali_boot_profiling ? MALI_TRUE : MALI_FALSE); ++ if (0 != err) { ++ /* No biggie if we wheren't able to initialize the profiling */ ++ MALI_PRINT_ERROR(("Failed to initialize profiling, feature will be unavailable\n")); ++ } ++#endif + -+/* Judge if swappable backend in swapped pool. */ -+static mali_bool mali_memory_swap_backend_in_swapped_pool(mali_mem_backend *mem_bkend) -+{ -+ MALI_DEBUG_ASSERT_POINTER(mem_bkend); ++ /* Tracing the current frequency and voltage from boot/insmod*/ ++#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS) ++ /* Just call mali_get_current_gpu_clk_item(),to record current clk info.*/ ++ mali_get_current_gpu_clk_item(&mali_gpu_clk[0]); ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | ++ MALI_PROFILING_EVENT_CHANNEL_GPU | ++ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, ++ mali_gpu_clk[0].clock, ++ mali_gpu_clk[0].vol / 1000, ++ 0, 0, 0); ++#endif + -+ return !list_empty(&mem_bkend->list); ++ MALI_PRINT(("Mali device driver loaded\n")); ++ ++ return 0; /* Success */ +} + -+void mali_memory_swap_list_backend_delete(mali_mem_backend *mem_bkend) ++void mali_module_exit(void) +{ -+ MALI_DEBUG_ASSERT_POINTER(mem_bkend); -+ -+ mutex_lock(&mem_backend_swapped_pool_lock); -+ mutex_lock(&mem_bkend->mutex); ++ MALI_DEBUG_PRINT(2, ("Unloading Mali v%d device driver.\n", _MALI_API_VERSION)); + -+ if (MALI_FALSE == mali_memory_swap_backend_in_swapped_pool(mem_bkend)) { -+ mutex_unlock(&mem_bkend->mutex); -+ mutex_unlock(&mem_backend_swapped_pool_lock); -+ return; -+ } ++ MALI_DEBUG_PRINT(2, ("mali_module_exit() unregistering driver\n")); + -+ MALI_DEBUG_ASSERT(!list_empty(&mem_bkend->list)); ++ platform_driver_unregister(&mali_platform_driver); + -+ list_del_init(&mem_bkend->list); ++#if defined(MALI_FAKE_PLATFORM_DEVICE) ++#ifndef CONFIG_MALI_DT ++ MALI_DEBUG_PRINT(2, ("mali_module_exit() unregistering device\n")); ++ mali_platform_device_unregister(); ++#endif ++#endif + -+ mutex_unlock(&mem_bkend->mutex); ++ /* Tracing the current frequency and voltage from rmmod*/ ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | ++ MALI_PROFILING_EVENT_CHANNEL_GPU | ++ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, ++ 0, ++ 0, ++ 0, 0, 0); + -+ mem_backend_swapped_pool_size -= mem_bkend->size; ++#if defined(CONFIG_MALI400_INTERNAL_PROFILING) ++ _mali_internal_profiling_term(); ++#endif + -+ mutex_unlock(&mem_backend_swapped_pool_lock); ++ MALI_PRINT(("Mali device driver unloaded\n")); +} + -+static void mali_mem_swap_out_page_node(mali_page_node *page_node) ++#ifdef CONFIG_MALI_DEVFREQ ++struct mali_device *mali_device_alloc(void) +{ -+ MALI_DEBUG_ASSERT(page_node); ++ return kzalloc(sizeof(struct mali_device), GFP_KERNEL); ++} + -+ dma_unmap_page(&mali_platform_device->dev, page_node->swap_it->dma_addr, -+ _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE); -+ set_page_dirty(page_node->swap_it->page); -+ put_page(page_node->swap_it->page); ++void mali_device_free(struct mali_device *mdev) ++{ ++ kfree(mdev); +} ++#endif + -+void mali_mem_swap_unlock_single_mem_backend(mali_mem_backend *mem_bkend) ++static int mali_probe(struct platform_device *pdev) +{ -+ mali_page_node *m_page; ++ int err; ++#ifdef CONFIG_MALI_DEVFREQ ++ struct mali_device *mdev; ++#endif + -+ MALI_DEBUG_ASSERT(1 == mutex_is_locked(&mem_bkend->mutex)); ++ MALI_DEBUG_PRINT(2, ("mali_probe(): Called for platform device %s\n", pdev->name)); + -+ if (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN)) { -+ return; ++ if (NULL != mali_platform_device) { ++ /* Already connected to a device, return error */ ++ MALI_PRINT_ERROR(("mali_probe(): The Mali driver is already connected with a Mali device.")); ++ return -EEXIST; + } + -+ mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN; ++ mali_platform_device = pdev; + -+ list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) { -+ mali_mem_swap_out_page_node(m_page); ++ dev_info(&pdev->dev, "mali_platform_device->num_resources = %d\n", ++ mali_platform_device->num_resources); ++ ++ { ++ int i = 0; ++ ++ for(i = 0; i < mali_platform_device->num_resources; i++) ++ dev_info(&pdev->dev, ++ "resource[%d].start = 0x%pa\n", ++ i, ++ &mali_platform_device->resource[i].start); + } + -+ return; -+} ++#ifdef CONFIG_MALI_DT ++ /* If we use DT to initialize our DDK, we have to prepare somethings. */ ++ err = mali_platform_device_init(mali_platform_device); ++ if (0 != err) { ++ MALI_PRINT_ERROR(("mali_probe(): Failed to initialize platform device.")); ++ mali_platform_device = NULL; ++ return -EFAULT; ++ } ++#endif + -+static void mali_mem_swap_unlock_partial_locked_mem_backend(mali_mem_backend *mem_bkend, mali_page_node *page_node) -+{ -+ mali_page_node *m_page; ++#ifdef CONFIG_MALI_DEVFREQ ++ mdev = mali_device_alloc(); ++ if (!mdev) { ++ MALI_PRINT_ERROR(("Can't allocate mali device private data\n")); ++ return -ENOMEM; ++ } + -+ MALI_DEBUG_ASSERT(1 == mutex_is_locked(&mem_bkend->mutex)); ++ mdev->dev = &pdev->dev; ++ dev_set_drvdata(mdev->dev, mdev); + -+ list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) { -+ if (m_page == page_node) { -+ break; ++ /*Initilization clock and regulator*/ ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ ++ && defined(CONFIG_REGULATOR) ++ mdev->regulator = regulator_get_optional(mdev->dev, "mali"); ++ if (IS_ERR_OR_NULL(mdev->regulator)) { ++ MALI_DEBUG_PRINT(2, ("Continuing without Mali regulator control\n")); ++ mdev->regulator = NULL; ++ /* Allow probe to continue without regulator */ ++ } ++#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ ++ ++ mdev->num_clks = devm_clk_bulk_get_all(mdev->dev, &mdev->clks); ++ if (mdev->num_clks < 1) { ++ MALI_DEBUG_PRINT(2, ("Continuing without Mali clock control\n")); ++ mdev->num_clks = 0; ++ mdev->clock = NULL; ++ } else { ++ /* Get "clk_mali" in the device tree for gpu dvfs */ ++ mdev->clock = clk_get(mdev->dev, "clk_mali"); ++ if (IS_ERR_OR_NULL(mdev->clock)) { ++ MALI_DEBUG_PRINT(2, ("Continuing without Mali dvfs clock\n")); ++ /* Allow probe to continue without clock. */ ++ mdev->clock = NULL; + } -+ mali_mem_swap_out_page_node(m_page); + } -+} ++ err = clk_bulk_prepare_enable(mdev->num_clks, mdev->clks); ++ if (err) { ++ MALI_PRINT_ERROR(("Failed to prepare clock (%d)\n", err)); ++ goto clock_prepare_failed; ++ } + -+static void mali_mem_swap_swapped_bkend_pool_shrink(_mali_mem_swap_pool_shrink_type_t shrink_type) -+{ -+ mali_mem_backend *bkend, *tmp_bkend; -+ long system_free_size; -+ u32 last_gpu_utilization, gpu_utilization_threshold_value, temp_swap_out_threshold_value; ++ err = rk_platform_init_opp_table(mdev); ++ if (err) ++ MALI_DEBUG_PRINT(3, ("Failed to init_opp_table\n")); + -+ MALI_DEBUG_ASSERT(1 == mutex_is_locked(&mem_backend_swapped_pool_lock)); ++ /* initilize pm metrics related */ ++ if (mali_pm_metrics_init(mdev) < 0) { ++ MALI_DEBUG_PRINT(2, ("mali pm metrics init failed\n")); ++ goto pm_metrics_init_failed; ++ } + -+ if (MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION == shrink_type) { -+ /** -+ * When we met that system memory is very low and Mali locked swappable memory size is less than -+ * threshold value, and at the same time, GPU load is very low and don't need high performance, -+ * at this condition, we can unlock more swap memory backend from swapped backends pool. -+ */ -+ gpu_utilization_threshold_value = MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION; -+ temp_swap_out_threshold_value = (mali_mem_swap_out_threshold_value >> 2); -+ } else { -+ /* When we add swappable memory backends to swapped pool, we need to think that we couldn't -+ * hold too much swappable backends in Mali driver, and also we need considering performance. -+ * So there is a balance for swapping out memory backend, we should follow the following conditions: -+ * 1. Total memory size in global mem backend swapped pool is more than the defined threshold value. -+ * 2. System level free memory size is less than the defined threshold value. -+ * 3. Please note that GPU utilization problem isn't considered in this condition. -+ */ -+ gpu_utilization_threshold_value = MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS; -+ temp_swap_out_threshold_value = mali_mem_swap_out_threshold_value; ++ if (mali_devfreq_init(mdev) < 0) { ++ MALI_DEBUG_PRINT(2, ("mali devfreq init failed\n")); ++ goto devfreq_init_failed; + } ++ clk_bulk_disable(mdev->num_clks, mdev->clks); ++#endif + -+ /* Get system free pages number. */ -+ system_free_size = global_zone_page_state(NR_FREE_PAGES) * PAGE_SIZE; -+ last_gpu_utilization = _mali_ukk_utilization_gp_pp(); + -+ if ((last_gpu_utilization < gpu_utilization_threshold_value) -+ && (system_free_size < mali_mem_swap_out_threshold_value) -+ && (mem_backend_swapped_pool_size > temp_swap_out_threshold_value)) { -+ list_for_each_entry_safe(bkend, tmp_bkend, &mem_backend_swapped_pool, list) { -+ if (mem_backend_swapped_pool_size <= temp_swap_out_threshold_value) { -+ break; -+ } ++ if (_MALI_OSK_ERR_OK == _mali_osk_wq_init()) { ++ /* Initialize the Mali GPU HW specified by pdev */ ++ if (_MALI_OSK_ERR_OK == mali_initialize_subsystems()) { ++ /* Register a misc device (so we are accessible from user space) */ ++ err = mali_miscdevice_register(pdev); ++ if (0 == err) { ++ /* Setup sysfs entries */ ++ err = mali_sysfs_register(mali_dev_name); + -+ mutex_lock(&bkend->mutex); ++ if (0 == err) { ++ MALI_DEBUG_PRINT(2, ("mali_probe(): Successfully initialized driver for platform device %s\n", pdev->name)); + -+ /* check if backend is in use. */ -+ if (0 < bkend->using_count) { -+ mutex_unlock(&bkend->mutex); -+ continue; ++ return 0; ++ } else { ++ MALI_PRINT_ERROR(("mali_probe(): failed to register sysfs entries")); ++ } ++ mali_miscdevice_unregister(); ++ } else { ++ MALI_PRINT_ERROR(("mali_probe(): failed to register Mali misc device.")); + } -+ -+ mali_mem_swap_unlock_single_mem_backend(bkend); -+ list_del_init(&bkend->list); -+ mem_backend_swapped_pool_size -= bkend->size; -+#ifdef MALI_MEM_SWAP_TRACKING -+ mem_backend_swapped_unlock_size += bkend->size; -+#endif -+ mutex_unlock(&bkend->mutex); ++ mali_terminate_subsystems(); ++ } else { ++ MALI_PRINT_ERROR(("mali_probe(): Failed to initialize Mali device driver.")); + } ++ _mali_osk_wq_term(); + } + -+ return; -+} -+ -+static void mali_mem_swap_swapped_bkend_pool_check_for_low_utilization(void *arg) -+{ -+ MALI_IGNORE(arg); -+ -+ mutex_lock(&mem_backend_swapped_pool_lock); ++#ifdef CONFIG_MALI_DEVFREQ ++ mali_devfreq_term(mdev); ++devfreq_init_failed: ++ mali_pm_metrics_term(mdev); ++pm_metrics_init_failed: ++ clk_bulk_disable_unprepare(mdev->num_clks, mdev->clks); ++clock_prepare_failed: ++ clk_bulk_put(mdev->num_clks, mdev->clks); ++ clk_put(mdev->clock); ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) && defined(CONFIG_OF) \ ++ && defined(CONFIG_PM_OPP) ++ rk_platform_uninit_opp_table(mdev); ++#endif + -+ mali_mem_swap_swapped_bkend_pool_shrink(MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION); ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ ++ && defined(CONFIG_REGULATOR) ++ regulator_put(mdev->regulator); ++#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ ++ mali_device_free(mdev); ++#endif + -+ mutex_unlock(&mem_backend_swapped_pool_lock); ++#ifdef CONFIG_MALI_DT ++ mali_platform_device_deinit(mali_platform_device); ++#endif ++ mali_platform_device = NULL; ++ return -EFAULT; +} + -+/** -+ * After PP job finished, we add all of swappable memory backend used by this PP -+ * job to the tail of the global swapped pool, and if the total size of swappable memory is more than threshold -+ * value, we also need to shrink the swapped pool start from the head of the list. -+ */ -+void mali_memory_swap_list_backend_add(mali_mem_backend *mem_bkend) ++static int mali_remove(struct platform_device *pdev) +{ -+ mutex_lock(&mem_backend_swapped_pool_lock); -+ mutex_lock(&mem_bkend->mutex); -+ -+ if (mali_memory_swap_backend_in_swapped_pool(mem_bkend)) { -+ MALI_DEBUG_ASSERT(!list_empty(&mem_bkend->list)); ++#ifdef CONFIG_MALI_DEVFREQ ++ struct mali_device *mdev = dev_get_drvdata(&pdev->dev); ++#endif + -+ list_del_init(&mem_bkend->list); -+ list_add_tail(&mem_bkend->list, &mem_backend_swapped_pool); -+ mutex_unlock(&mem_bkend->mutex); -+ mutex_unlock(&mem_backend_swapped_pool_lock); -+ return; -+ } ++ MALI_DEBUG_PRINT(2, ("mali_remove() called for platform device %s\n", pdev->name)); ++ mali_sysfs_unregister(); ++ mali_miscdevice_unregister(); ++ mali_terminate_subsystems(); ++ _mali_osk_wq_term(); + -+ list_add_tail(&mem_bkend->list, &mem_backend_swapped_pool); ++#ifdef CONFIG_MALI_DEVFREQ ++ mali_devfreq_term(mdev); + -+ mutex_unlock(&mem_bkend->mutex); -+ mem_backend_swapped_pool_size += mem_bkend->size; ++ mali_pm_metrics_term(mdev); + -+ mali_mem_swap_swapped_bkend_pool_shrink(MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS); ++ if (mdev->clock) { ++ clk_put(mdev->clock); ++ mdev->clock = NULL; ++ } ++ clk_bulk_unprepare(mdev->num_clks, mdev->clks); ++ clk_bulk_put(mdev->num_clks, mdev->clks); + -+ mutex_unlock(&mem_backend_swapped_pool_lock); -+ return; -+} ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) && defined(CONFIG_OF) \ ++ && defined(CONFIG_PM_OPP) ++ rk_platform_uninit_opp_table(mdev); ++#endif + ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ ++ && defined(CONFIG_REGULATOR) ++ regulator_put(mdev->regulator); ++#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ ++ mali_device_free(mdev); ++#endif + -+u32 mali_mem_swap_idx_alloc(void) -+{ -+ return _mali_osk_bitmap_alloc(&idx_mgr); ++#ifdef CONFIG_MALI_DT ++ mali_platform_device_deinit(mali_platform_device); ++#endif ++ mali_platform_device = NULL; ++ return 0; +} + -+void mali_mem_swap_idx_free(u32 idx) ++static int mali_miscdevice_register(struct platform_device *pdev) +{ -+ _mali_osk_bitmap_free(&idx_mgr, idx); -+} ++ int err; + -+static u32 mali_mem_swap_idx_range_alloc(u32 count) -+{ -+ u32 index; ++ mali_miscdevice.minor = MISC_DYNAMIC_MINOR; ++ mali_miscdevice.name = mali_dev_name; ++ mali_miscdevice.fops = &mali_fops; ++ mali_miscdevice.parent = get_device(&pdev->dev); + -+ index = _mali_osk_bitmap_alloc_range(&idx_mgr, count); ++ err = misc_register(&mali_miscdevice); ++ if (0 != err) { ++ MALI_PRINT_ERROR(("Failed to register misc device, misc_register() returned %d\n", err)); ++ } + -+ return index; ++ return err; +} + -+static void mali_mem_swap_idx_range_free(u32 idx, int num) ++static void mali_miscdevice_unregister(void) +{ -+ _mali_osk_bitmap_free_range(&idx_mgr, idx, num); ++ misc_deregister(&mali_miscdevice); +} + -+struct mali_swap_item *mali_mem_swap_alloc_swap_item(void) ++static int mali_driver_suspend_scheduler(struct device *dev) +{ -+ mali_swap_item *swap_item; ++#ifdef CONFIG_MALI_DEVFREQ ++ struct mali_device *mdev = dev_get_drvdata(dev); ++ if (!mdev) ++ return -ENODEV; ++#endif + -+ swap_item = kzalloc(sizeof(mali_swap_item), GFP_KERNEL); ++ pm_runtime_force_suspend(dev); ++#if defined(CONFIG_MALI_DEVFREQ) && \ ++ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) ++ devfreq_suspend_device(mdev->devfreq); ++#endif + -+ if (NULL == swap_item) { -+ return NULL; ++ mali_pm_os_suspend(MALI_TRUE); ++ /* Tracing the frequency and voltage after mali is suspended */ ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | ++ MALI_PROFILING_EVENT_CHANNEL_GPU | ++ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, ++ 0, ++ 0, ++ 0, 0, 0); ++ return 0; ++} ++ ++static int mali_driver_resume_scheduler(struct device *dev) ++{ ++#ifdef CONFIG_MALI_DEVFREQ ++ struct mali_device *mdev = dev_get_drvdata(dev); ++ if (!mdev) ++ return -ENODEV; ++#endif ++ ++ /* Tracing the frequency and voltage after mali is resumed */ ++#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS) ++ /* Just call mali_get_current_gpu_clk_item() once,to record current clk info.*/ ++ if (is_first_resume == 1) { ++ mali_get_current_gpu_clk_item(&mali_gpu_clk[1]); ++ is_first_resume = 0; + } ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | ++ MALI_PROFILING_EVENT_CHANNEL_GPU | ++ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, ++ mali_gpu_clk[1].clock, ++ mali_gpu_clk[1].vol / 1000, ++ 0, 0, 0); ++#endif ++ mali_pm_os_resume(); + -+ atomic_set(&swap_item->ref_count, 1); -+ swap_item->page = NULL; -+ atomic_add(1, &mali_mem_os_allocator.allocated_pages); ++#if defined(CONFIG_MALI_DEVFREQ) && \ ++ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) ++ devfreq_resume_device(mdev->devfreq); ++#endif ++ pm_runtime_force_resume(dev); + -+ return swap_item; ++ return 0; +} + -+void mali_mem_swap_free_swap_item(mali_swap_item *swap_item) ++#ifdef CONFIG_PM_RUNTIME ++static int mali_driver_runtime_suspend(struct device *dev) +{ -+ struct inode *file_node; -+ long long start, end; -+ -+ /* If this swap item is shared, we just reduce the reference counter. */ -+ if (0 == atomic_dec_return(&swap_item->ref_count)) { -+ file_node = global_swap_file->f_path.dentry->d_inode; -+ start = swap_item->idx; -+ start = start << 12; -+ end = start + PAGE_SIZE; -+ -+ shmem_truncate_range(file_node, start, (end - 1)); ++#ifdef CONFIG_MALI_DEVFREQ ++ struct mali_device *mdev = dev_get_drvdata(dev); ++ if (!mdev) ++ return -ENODEV; ++#endif + -+ mali_mem_swap_idx_free(swap_item->idx); ++ if (MALI_TRUE == mali_pm_runtime_suspend()) { ++ /* Tracing the frequency and voltage after mali is suspended */ ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | ++ MALI_PROFILING_EVENT_CHANNEL_GPU | ++ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, ++ 0, ++ 0, ++ 0, 0, 0); + -+ atomic_sub(1, &mali_mem_os_allocator.allocated_pages); ++#if defined(CONFIG_MALI_DEVFREQ) && \ ++ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) ++ MALI_DEBUG_PRINT(4, ("devfreq_suspend_device: stop devfreq monitor\n")); ++ devfreq_suspend_device(mdev->devfreq); ++#endif + -+ kfree(swap_item); ++ return 0; ++ } else { ++ return -EBUSY; + } +} + -+/* Used to allocate new swap item for new memory allocation and cow page for write. */ -+struct mali_page_node *_mali_mem_swap_page_node_allocate(void) ++static int mali_driver_runtime_resume(struct device *dev) +{ -+ struct mali_page_node *m_page; -+ -+ m_page = _mali_page_node_allocate(MALI_PAGE_NODE_SWAP); ++#ifdef CONFIG_MALI_DEVFREQ ++ struct mali_device *mdev = dev_get_drvdata(dev); ++ if (!mdev) ++ return -ENODEV; ++#endif + -+ if (NULL == m_page) { -+ return NULL; ++ /* Tracing the frequency and voltage after mali is resumed */ ++#if defined(CONFIG_MALI400_PROFILING) && defined(CONFIG_MALI_DVFS) ++ /* Just call mali_get_current_gpu_clk_item() once,to record current clk info.*/ ++ if (is_first_resume == 1) { ++ mali_get_current_gpu_clk_item(&mali_gpu_clk[1]); ++ is_first_resume = 0; + } ++ _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | ++ MALI_PROFILING_EVENT_CHANNEL_GPU | ++ MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, ++ mali_gpu_clk[1].clock, ++ mali_gpu_clk[1].vol / 1000, ++ 0, 0, 0); ++#endif + -+ m_page->swap_it = mali_mem_swap_alloc_swap_item(); ++ mali_pm_runtime_resume(); + -+ if (NULL == m_page->swap_it) { -+ kfree(m_page); -+ return NULL; -+ } ++#if defined(CONFIG_MALI_DEVFREQ) && \ ++ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) ++ MALI_DEBUG_PRINT(4, ("devfreq_resume_device: start devfreq monitor\n")); ++ devfreq_resume_device(mdev->devfreq); ++#endif ++ return 0; ++} + -+ return m_page; ++static int mali_driver_runtime_idle(struct device *dev) ++{ ++ /* Nothing to do */ ++ return 0; +} ++#endif + -+_mali_osk_errcode_t _mali_mem_swap_put_page_node(struct mali_page_node *m_page) ++static int mali_open(struct inode *inode, struct file *filp) +{ ++ struct mali_session_data *session_data; ++ _mali_osk_errcode_t err; + -+ mali_mem_swap_free_swap_item(m_page->swap_it); ++ /* input validation */ ++ if (mali_miscdevice.minor != iminor(inode)) { ++ MALI_PRINT_ERROR(("mali_open() Minor does not match\n")); ++ return -ENODEV; ++ } + -+ return _MALI_OSK_ERR_OK; -+} ++ /* allocated struct to track this session */ ++ err = _mali_ukk_open((void **)&session_data); ++ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); + -+void _mali_mem_swap_page_node_free(struct mali_page_node *m_page) -+{ -+ _mali_mem_swap_put_page_node(m_page); ++ /* initialize file pointer */ ++ filp->f_pos = 0; + -+ kfree(m_page); ++ /* link in our session data */ ++ filp->private_data = (void *)session_data; + -+ return; ++ filp->f_mapping = mali_mem_swap_get_global_swap_file()->f_mapping; ++ ++ return 0; +} + -+u32 mali_mem_swap_free(mali_mem_swap *swap_mem) ++static int mali_release(struct inode *inode, struct file *filp) +{ -+ struct mali_page_node *m_page, *m_tmp; -+ u32 free_pages_nr = 0; ++ _mali_osk_errcode_t err; + -+ MALI_DEBUG_ASSERT_POINTER(swap_mem); ++ /* input validation */ ++ if (mali_miscdevice.minor != iminor(inode)) { ++ MALI_PRINT_ERROR(("mali_release() Minor does not match\n")); ++ return -ENODEV; ++ } + -+ list_for_each_entry_safe(m_page, m_tmp, &swap_mem->pages, list) { -+ MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_SWAP); ++ err = _mali_ukk_close((void **)&filp->private_data); ++ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); + -+ /* free the page node and release the swap item, if the ref count is 1, -+ * then need also free the swap item. */ -+ list_del(&m_page->list); -+ if (1 == _mali_page_node_get_ref_count(m_page)) { -+ free_pages_nr++; -+ } ++ return 0; ++} + -+ _mali_mem_swap_page_node_free(m_page); ++int map_errcode(_mali_osk_errcode_t err) ++{ ++ switch (err) { ++ case _MALI_OSK_ERR_OK : ++ return 0; ++ case _MALI_OSK_ERR_FAULT: ++ return -EFAULT; ++ case _MALI_OSK_ERR_INVALID_FUNC: ++ return -ENOTTY; ++ case _MALI_OSK_ERR_INVALID_ARGS: ++ return -EINVAL; ++ case _MALI_OSK_ERR_NOMEM: ++ return -ENOMEM; ++ case _MALI_OSK_ERR_TIMEOUT: ++ return -ETIMEDOUT; ++ case _MALI_OSK_ERR_RESTARTSYSCALL: ++ return -ERESTARTSYS; ++ case _MALI_OSK_ERR_ITEM_NOT_FOUND: ++ return -ENOENT; ++ default: ++ return -EFAULT; + } -+ -+ return free_pages_nr; +} + -+static u32 mali_mem_swap_cow_free(mali_mem_cow *cow_mem) ++static long mali_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ -+ struct mali_page_node *m_page, *m_tmp; -+ u32 free_pages_nr = 0; -+ -+ MALI_DEBUG_ASSERT_POINTER(cow_mem); ++ int err; ++ struct mali_session_data *session_data; + -+ list_for_each_entry_safe(m_page, m_tmp, &cow_mem->pages, list) { -+ MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_SWAP); ++ MALI_DEBUG_PRINT(7, ("Ioctl received 0x%08X 0x%08lX\n", cmd, arg)); + -+ /* free the page node and release the swap item, if the ref count is 1, -+ * then need also free the swap item. */ -+ list_del(&m_page->list); -+ if (1 == _mali_page_node_get_ref_count(m_page)) { -+ free_pages_nr++; -+ } ++ session_data = (struct mali_session_data *)filp->private_data; ++ if (NULL == session_data) { ++ MALI_DEBUG_PRINT(7, ("filp->private_data was NULL\n")); ++ return -ENOTTY; ++ } + -+ _mali_mem_swap_page_node_free(m_page); ++ if (NULL == (void *)arg) { ++ MALI_DEBUG_PRINT(7, ("arg was NULL\n")); ++ return -ENOTTY; + } + -+ return free_pages_nr; -+} ++ switch (cmd) { ++ case MALI_IOC_WAIT_FOR_NOTIFICATION: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_wait_for_notification_s), sizeof(u64))); ++ err = wait_for_notification_wrapper(session_data, (_mali_uk_wait_for_notification_s __user *)arg); ++ break; + -+u32 mali_mem_swap_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped) -+{ -+ mali_mem_allocation *alloc; -+ u32 free_pages_nr = 0; ++ case MALI_IOC_GET_API_VERSION_V2: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_api_version_v2_s), sizeof(u64))); ++ err = get_api_version_v2_wrapper(session_data, (_mali_uk_get_api_version_v2_s __user *)arg); ++ break; + -+ MALI_DEBUG_ASSERT_POINTER(mem_bkend); -+ alloc = mem_bkend->mali_allocation; -+ MALI_DEBUG_ASSERT_POINTER(alloc); ++ case MALI_IOC_GET_API_VERSION: ++ err = get_api_version_wrapper(session_data, (_mali_uk_get_api_version_s __user *)arg); ++ break; + -+ if (is_mali_mapped) { -+ mali_mem_swap_mali_unmap(alloc); -+ } ++ case MALI_IOC_POST_NOTIFICATION: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_post_notification_s), sizeof(u64))); ++ err = post_notification_wrapper(session_data, (_mali_uk_post_notification_s __user *)arg); ++ break; + -+ mali_memory_swap_list_backend_delete(mem_bkend); ++ /* rk_ext : 从对 r5p0-01rel0 集æˆå¼€å§‹, ä¸å†ä½¿ç”¨. */ ++#if 0 ++ case MALI_IOC_GET_MALI_VERSION_IN_RK30: ++ err = get_mali_version_in_rk30_wrapper(session_data, (_mali_uk_get_mali_version_in_rk30_s __user *)arg); ++ break; ++#else ++ case MALI_IOC_GET_RK_KO_VERSION: ++ err = get_rk_ko_version_wrapper(session_data, (_mali_rk_ko_version_s __user *)arg); ++ break; ++#endif ++ ++ case MALI_IOC_GET_USER_SETTINGS: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_user_settings_s), sizeof(u64))); ++ err = get_user_settings_wrapper(session_data, (_mali_uk_get_user_settings_s __user *)arg); ++ break; + -+ mutex_lock(&mem_bkend->mutex); -+ /* To make sure the given memory backend was unlocked from Mali side, -+ * and then free this memory block. */ -+ mali_mem_swap_unlock_single_mem_backend(mem_bkend); -+ mutex_unlock(&mem_bkend->mutex); ++ case MALI_IOC_REQUEST_HIGH_PRIORITY: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_request_high_priority_s), sizeof(u64))); ++ err = request_high_priority_wrapper(session_data, (_mali_uk_request_high_priority_s __user *)arg); ++ break; + -+ if (MALI_MEM_SWAP == mem_bkend->type) { -+ free_pages_nr = mali_mem_swap_free(&mem_bkend->swap_mem); -+ } else { -+ free_pages_nr = mali_mem_swap_cow_free(&mem_bkend->cow_mem); -+ } ++ case MALI_IOC_PENDING_SUBMIT: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pending_submit_s), sizeof(u64))); ++ err = pending_submit_wrapper(session_data, (_mali_uk_pending_submit_s __user *)arg); ++ break; + -+ return free_pages_nr; -+} ++#if defined(CONFIG_MALI400_PROFILING) ++ case MALI_IOC_PROFILING_ADD_EVENT: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_add_event_s), sizeof(u64))); ++ err = profiling_add_event_wrapper(session_data, (_mali_uk_profiling_add_event_s __user *)arg); ++ break; + -+mali_bool mali_mem_swap_in_page_node(struct mali_page_node *page_node) -+{ -+ MALI_DEBUG_ASSERT(NULL != page_node); ++ case MALI_IOC_PROFILING_REPORT_SW_COUNTERS: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_sw_counters_report_s), sizeof(u64))); ++ err = profiling_report_sw_counters_wrapper(session_data, (_mali_uk_sw_counters_report_s __user *)arg); ++ break; + -+ page_node->swap_it->page = shmem_read_mapping_page(global_swap_space, page_node->swap_it->idx); ++ case MALI_IOC_PROFILING_STREAM_FD_GET: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_stream_fd_get_s), sizeof(u64))); ++ err = profiling_get_stream_fd_wrapper(session_data, (_mali_uk_profiling_stream_fd_get_s __user *)arg); ++ break; + -+ if (IS_ERR(page_node->swap_it->page)) { -+ MALI_DEBUG_PRINT_ERROR(("SWAP Mem: failed to swap in page with index: %d.\n", page_node->swap_it->idx)); -+ return MALI_FALSE; -+ } ++ case MALI_IOC_PROILING_CONTROL_SET: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_control_set_s), sizeof(u64))); ++ err = profiling_control_set_wrapper(session_data, (_mali_uk_profiling_control_set_s __user *)arg); ++ break; ++#else + -+ /* Ensure page is flushed from CPU caches. */ -+ page_node->swap_it->dma_addr = dma_map_page(&mali_platform_device->dev, page_node->swap_it->page, -+ 0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE); ++ case MALI_IOC_PROFILING_ADD_EVENT: /* FALL-THROUGH */ ++ case MALI_IOC_PROFILING_REPORT_SW_COUNTERS: /* FALL-THROUGH */ ++ MALI_DEBUG_PRINT(2, ("Profiling not supported\n")); ++ err = -ENOTTY; ++ break; ++#endif + -+ return MALI_TRUE; -+} ++ case MALI_IOC_PROFILING_MEMORY_USAGE_GET: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_profiling_memory_usage_get_s), sizeof(u64))); ++ err = mem_usage_get_wrapper(session_data, (_mali_uk_profiling_memory_usage_get_s __user *)arg); ++ break; + -+int mali_mem_swap_alloc_pages(mali_mem_swap *swap_mem, u32 size, u32 *bkend_idx) -+{ -+ size_t page_count = PAGE_ALIGN(size) / PAGE_SIZE; -+ struct mali_page_node *m_page; -+ long system_free_size; -+ u32 i, index; -+ mali_bool ret; ++ case MALI_IOC_MEM_ALLOC: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_alloc_mem_s), sizeof(u64))); ++ err = mem_alloc_wrapper(session_data, (_mali_uk_alloc_mem_s __user *)arg); ++ break; + -+ MALI_DEBUG_ASSERT(NULL != swap_mem); -+ MALI_DEBUG_ASSERT(NULL != bkend_idx); -+ MALI_DEBUG_ASSERT(page_count <= MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE); ++ case MALI_IOC_MEM_FREE: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_free_mem_s), sizeof(u64))); ++ err = mem_free_wrapper(session_data, (_mali_uk_free_mem_s __user *)arg); ++ break; + -+ if (atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE + size > mali_mem_os_allocator.allocation_limit) { -+ MALI_DEBUG_PRINT(2, ("Mali Mem: Unable to allocate %u bytes. Currently allocated: %lu, max limit %lu\n", -+ size, -+ atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE, -+ mali_mem_os_allocator.allocation_limit)); -+ return _MALI_OSK_ERR_NOMEM; -+ } ++ case MALI_IOC_MEM_BIND: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_bind_mem_s), sizeof(u64))); ++ err = mem_bind_wrapper(session_data, (_mali_uk_bind_mem_s __user *)arg); ++ break; + -+ INIT_LIST_HEAD(&swap_mem->pages); -+ swap_mem->count = page_count; -+ index = mali_mem_swap_idx_range_alloc(page_count); ++ case MALI_IOC_MEM_UNBIND: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_unbind_mem_s), sizeof(u64))); ++ err = mem_unbind_wrapper(session_data, (_mali_uk_unbind_mem_s __user *)arg); ++ break; + -+ if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == index) { -+ MALI_PRINT_ERROR(("Mali Swap: Failed to allocate continuous index for swappable Mali memory.")); -+ return _MALI_OSK_ERR_FAULT; -+ } ++ case MALI_IOC_MEM_COW: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_cow_mem_s), sizeof(u64))); ++ err = mem_cow_wrapper(session_data, (_mali_uk_cow_mem_s __user *)arg); ++ break; + -+ for (i = 0; i < page_count; i++) { -+ m_page = _mali_mem_swap_page_node_allocate(); ++ case MALI_IOC_MEM_COW_MODIFY_RANGE: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_cow_modify_range_s), sizeof(u64))); ++ err = mem_cow_modify_range_wrapper(session_data, (_mali_uk_cow_modify_range_s __user *)arg); ++ break; + -+ if (NULL == m_page) { -+ MALI_DEBUG_PRINT_ERROR(("SWAP Mem: Failed to allocate mali page node.")); -+ swap_mem->count = i; ++ case MALI_IOC_MEM_RESIZE: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_mem_resize_s), sizeof(u64))); ++ err = mem_resize_mem_wrapper(session_data, (_mali_uk_mem_resize_s __user *)arg); ++ break; + -+ mali_mem_swap_free(swap_mem); -+ mali_mem_swap_idx_range_free(index + i, page_count - i); -+ return _MALI_OSK_ERR_FAULT; -+ } ++ case MALI_IOC_MEM_WRITE_SAFE: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_mem_write_safe_s), sizeof(u64))); ++ err = mem_write_safe_wrapper(session_data, (_mali_uk_mem_write_safe_s __user *)arg); ++ break; + -+ m_page->swap_it->idx = index + i; ++ case MALI_IOC_MEM_QUERY_MMU_PAGE_TABLE_DUMP_SIZE: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_query_mmu_page_table_dump_size_s), sizeof(u64))); ++ err = mem_query_mmu_page_table_dump_size_wrapper(session_data, (_mali_uk_query_mmu_page_table_dump_size_s __user *)arg); ++ break; + -+ ret = mali_mem_swap_in_page_node(m_page); ++ case MALI_IOC_MEM_DUMP_MMU_PAGE_TABLE: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_dump_mmu_page_table_s), sizeof(u64))); ++ err = mem_dump_mmu_page_table_wrapper(session_data, (_mali_uk_dump_mmu_page_table_s __user *)arg); ++ break; + -+ if (MALI_FALSE == ret) { -+ MALI_DEBUG_PRINT_ERROR(("SWAP Mem: Allocate new page from SHMEM file failed.")); -+ _mali_mem_swap_page_node_free(m_page); -+ mali_mem_swap_idx_range_free(index + i + 1, page_count - i - 1); ++ case MALI_IOC_MEM_DMA_BUF_GET_SIZE: ++#ifdef CONFIG_DMA_SHARED_BUFFER ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_dma_buf_get_size_s), sizeof(u64))); ++ err = mali_dma_buf_get_size(session_data, (_mali_uk_dma_buf_get_size_s __user *)arg); ++#else ++ MALI_DEBUG_PRINT(2, ("DMA-BUF not supported\n")); ++ err = -ENOTTY; ++#endif ++ break; + -+ swap_mem->count = i; -+ mali_mem_swap_free(swap_mem); -+ return _MALI_OSK_ERR_NOMEM; -+ } ++ case MALI_IOC_PP_START_JOB: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_start_job_s), sizeof(u64))); ++ err = pp_start_job_wrapper(session_data, (_mali_uk_pp_start_job_s __user *)arg); ++ break; + -+ list_add_tail(&m_page->list, &swap_mem->pages); -+ } ++ case MALI_IOC_PP_AND_GP_START_JOB: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_and_gp_start_job_s), sizeof(u64))); ++ err = pp_and_gp_start_job_wrapper(session_data, (_mali_uk_pp_and_gp_start_job_s __user *)arg); ++ break; + -+ system_free_size = global_zone_page_state(NR_FREE_PAGES) * PAGE_SIZE; ++ case MALI_IOC_PP_NUMBER_OF_CORES_GET: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_pp_number_of_cores_s), sizeof(u64))); ++ err = pp_get_number_of_cores_wrapper(session_data, (_mali_uk_get_pp_number_of_cores_s __user *)arg); ++ break; + -+ if ((system_free_size < mali_mem_swap_out_threshold_value) -+ && (mem_backend_swapped_pool_size > (mali_mem_swap_out_threshold_value >> 2)) -+ && mali_utilization_enabled()) { -+ _mali_osk_wq_schedule_work(mali_mem_swap_out_workq); -+ } ++ case MALI_IOC_PP_CORE_VERSION_GET: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_pp_core_version_s), sizeof(u64))); ++ err = pp_get_core_version_wrapper(session_data, (_mali_uk_get_pp_core_version_s __user *)arg); ++ break; + -+ *bkend_idx = index; -+ return 0; -+} ++ case MALI_IOC_PP_DISABLE_WB: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_pp_disable_wb_s), sizeof(u64))); ++ err = pp_disable_wb_wrapper(session_data, (_mali_uk_pp_disable_wb_s __user *)arg); ++ break; + -+void mali_mem_swap_mali_unmap(mali_mem_allocation *alloc) -+{ -+ struct mali_session_data *session; ++ case MALI_IOC_GP2_START_JOB: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_gp_start_job_s), sizeof(u64))); ++ err = gp_start_job_wrapper(session_data, (_mali_uk_gp_start_job_s __user *)arg); ++ break; + -+ MALI_DEBUG_ASSERT_POINTER(alloc); -+ session = alloc->session; -+ MALI_DEBUG_ASSERT_POINTER(session); ++ case MALI_IOC_GP2_NUMBER_OF_CORES_GET: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_gp_number_of_cores_s), sizeof(u64))); ++ err = gp_get_number_of_cores_wrapper(session_data, (_mali_uk_get_gp_number_of_cores_s __user *)arg); ++ break; + -+ mali_session_memory_lock(session); -+ mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start, -+ alloc->flags); -+ mali_session_memory_unlock(session); -+} ++ case MALI_IOC_GP2_CORE_VERSION_GET: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_get_gp_core_version_s), sizeof(u64))); ++ err = gp_get_core_version_wrapper(session_data, (_mali_uk_get_gp_core_version_s __user *)arg); ++ break; + ++ case MALI_IOC_GP2_SUSPEND_RESPONSE: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_gp_suspend_response_s), sizeof(u64))); ++ err = gp_suspend_response_wrapper(session_data, (_mali_uk_gp_suspend_response_s __user *)arg); ++ break; + -+/* Insert these pages from shmem to mali page table*/ -+_mali_osk_errcode_t mali_mem_swap_mali_map(mali_mem_swap *swap_mem, struct mali_session_data *session, u32 vaddr, u32 props) -+{ -+ struct mali_page_directory *pagedir = session->page_directory; -+ struct mali_page_node *m_page; -+ dma_addr_t phys; -+ u32 virt = vaddr; -+ u32 prop = props; ++ case MALI_IOC_VSYNC_EVENT_REPORT: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_vsync_event_report_s), sizeof(u64))); ++ err = vsync_event_report_wrapper(session_data, (_mali_uk_vsync_event_report_s __user *)arg); ++ break; + -+ list_for_each_entry(m_page, &swap_mem->pages, list) { -+ MALI_DEBUG_ASSERT(NULL != m_page->swap_it->page); -+ phys = m_page->swap_it->dma_addr; ++ case MALI_IOC_TIMELINE_GET_LATEST_POINT: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_get_latest_point_s), sizeof(u64))); ++ err = timeline_get_latest_point_wrapper(session_data, (_mali_uk_timeline_get_latest_point_s __user *)arg); ++ break; ++ case MALI_IOC_TIMELINE_WAIT: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_wait_s), sizeof(u64))); ++ err = timeline_wait_wrapper(session_data, (_mali_uk_timeline_wait_s __user *)arg); ++ break; ++ case MALI_IOC_TIMELINE_CREATE_SYNC_FENCE: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_timeline_create_sync_fence_s), sizeof(u64))); ++ err = timeline_create_sync_fence_wrapper(session_data, (_mali_uk_timeline_create_sync_fence_s __user *)arg); ++ break; ++ case MALI_IOC_SOFT_JOB_START: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_soft_job_start_s), sizeof(u64))); ++ err = soft_job_start_wrapper(session_data, (_mali_uk_soft_job_start_s __user *)arg); ++ break; ++ case MALI_IOC_SOFT_JOB_SIGNAL: ++ BUILD_BUG_ON(!IS_ALIGNED(sizeof(_mali_uk_soft_job_signal_s), sizeof(u64))); ++ err = soft_job_signal_wrapper(session_data, (_mali_uk_soft_job_signal_s __user *)arg); ++ break; + -+ mali_mmu_pagedir_update(pagedir, virt, phys, MALI_MMU_PAGE_SIZE, prop); -+ virt += MALI_MMU_PAGE_SIZE; -+ } ++ default: ++ MALI_DEBUG_PRINT(2, ("No handler for ioctl 0x%08X 0x%08lX\n", cmd, arg)); ++ err = -ENOTTY; ++ }; + -+ return _MALI_OSK_ERR_OK; ++ return err; +} + -+int mali_mem_swap_in_pages(struct mali_pp_job *job) -+{ -+ u32 num_memory_cookies; -+ struct mali_session_data *session; -+ struct mali_vma_node *mali_vma_node = NULL; -+ mali_mem_allocation *mali_alloc = NULL; -+ mali_mem_backend *mem_bkend = NULL; -+ struct mali_page_node *m_page; -+ mali_bool swap_in_success = MALI_TRUE; -+ int i; ++late_initcall_sync(mali_module_init); ++module_exit(mali_module_exit); + -+ MALI_DEBUG_ASSERT_POINTER(job); ++MODULE_LICENSE(MALI_KERNEL_LINUX_LICENSE); ++MODULE_AUTHOR("ARM Ltd."); ++MODULE_VERSION(SVN_REV_STRING); +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_kernel_linux.h b/drivers/gpu/arm/mali400/mali/linux/mali_kernel_linux.h +new file mode 100644 +index 000000000..be754cb15 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_kernel_linux.h +@@ -0,0 +1,36 @@ ++/* ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ num_memory_cookies = mali_pp_job_num_memory_cookies(job); -+ session = mali_pp_job_get_session(job); ++#ifndef __MALI_KERNEL_LINUX_H__ ++#define __MALI_KERNEL_LINUX_H__ + -+ MALI_DEBUG_ASSERT_POINTER(session); ++#ifdef __cplusplus ++extern "C" { ++#endif + -+ for (i = 0; i < num_memory_cookies; i++) { ++#include /* character device definitions */ ++#include ++#include ++#include "mali_kernel_license.h" ++#include "mali_osk_types.h" ++#include + -+ u32 mali_addr = mali_pp_job_get_memory_cookie(job, i); ++extern struct platform_device *mali_platform_device; + -+ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0); -+ if (NULL == mali_vma_node) { -+ job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS; -+ swap_in_success = MALI_FALSE; -+ MALI_PRINT_ERROR(("SWAP Mem: failed to find mali_vma_node through Mali address: 0x%08x.\n", mali_addr)); -+ continue; -+ } ++/* After 3.19.0 kenrel droped CONFIG_PM_RUNTIME define,define by ourself */ ++#if defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) ++#define CONFIG_PM_RUNTIME 1 ++#endif + -+ mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node); -+ MALI_DEBUG_ASSERT(NULL != mali_alloc); ++#ifdef __cplusplus ++} ++#endif + -+ if (MALI_MEM_SWAP != mali_alloc->type && -+ MALI_MEM_COW != mali_alloc->type) { -+ continue; -+ } ++#endif /* __MALI_KERNEL_LINUX_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_kernel_sysfs.c b/drivers/gpu/arm/mali400/mali/linux/mali_kernel_sysfs.c +new file mode 100644 +index 000000000..7bda438fe +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_kernel_sysfs.c +@@ -0,0 +1,1410 @@ ++/** ++ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ /* Get backend memory & Map on GPU */ -+ mutex_lock(&mali_idr_mutex); -+ mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle); -+ mutex_unlock(&mali_idr_mutex); -+ MALI_DEBUG_ASSERT(NULL != mem_bkend); + -+ /* We neednot hold backend's lock here, race safe.*/ -+ if ((MALI_MEM_COW == mem_bkend->type) && -+ (!(mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) { -+ continue; -+ } ++/** ++ * @file mali_kernel_sysfs.c ++ * Implementation of some sysfs data exports ++ */ + -+ mutex_lock(&mem_bkend->mutex); ++#include ++#include ++#include ++#include ++#include "mali_kernel_license.h" ++#include "mali_kernel_common.h" ++#include "mali_ukk.h" + -+ /* When swap_in_success is MALI_FALSE, it means this job has memory backend that could not be swapped in, -+ * and it will be aborted in mali scheduler, so here, we just mark those memory cookies which -+ * should not be swapped out when delete job to invalide */ -+ if (MALI_FALSE == swap_in_success) { -+ job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS; -+ mutex_unlock(&mem_bkend->mutex); -+ continue; -+ } ++#if MALI_LICENSE_IS_GPL + -+ /* Before swap in, checking if this memory backend has been swapped in by the latest flushed jobs. */ -+ ++mem_bkend->using_count; ++#include ++#include ++#include ++#include ++#include ++#include "mali_kernel_sysfs.h" ++#if defined(CONFIG_MALI400_INTERNAL_PROFILING) ++#include ++#include "mali_osk_profiling.h" ++#endif + -+ if (1 < mem_bkend->using_count) { -+ MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN != (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags)); -+ mutex_unlock(&mem_bkend->mutex); -+ continue; -+ } ++#include ++#include "mali_pm.h" ++#include "mali_pmu.h" ++#include "mali_group.h" ++#include "mali_gp.h" ++#include "mali_pp.h" ++#include "mali_l2_cache.h" ++#include "mali_hw_core.h" ++#include "mali_kernel_core.h" ++#include "mali_user_settings_db.h" ++#include "mali_profiling_internal.h" ++#include "mali_gp_job.h" ++#include "mali_pp_job.h" ++#include "mali_executor.h" + -+ if (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN != (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags)) { -+ mutex_unlock(&mem_bkend->mutex); -+ continue; -+ } ++#define PRIVATE_DATA_COUNTER_MAKE_GP(src) (src) ++#define PRIVATE_DATA_COUNTER_MAKE_PP(src) ((1 << 24) | src) ++#define PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(src, sub_job) ((1 << 24) | (1 << 16) | (sub_job << 8) | src) ++#define PRIVATE_DATA_COUNTER_IS_PP(a) ((((a) >> 24) & 0xFF) ? MALI_TRUE : MALI_FALSE) ++#define PRIVATE_DATA_COUNTER_GET_SRC(a) (a & 0xFF) ++#define PRIVATE_DATA_COUNTER_IS_SUB_JOB(a) ((((a) >> 16) & 0xFF) ? MALI_TRUE : MALI_FALSE) ++#define PRIVATE_DATA_COUNTER_GET_SUB_JOB(a) (((a) >> 8) & 0xFF) + ++#define POWER_BUFFER_SIZE 3 + -+ list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) { -+ if (MALI_FALSE == mali_mem_swap_in_page_node(m_page)) { -+ /* Don't have enough memory to swap in page, so release pages have already been swapped -+ * in and then mark this pp job to be fail. */ -+ mali_mem_swap_unlock_partial_locked_mem_backend(mem_bkend, m_page); -+ swap_in_success = MALI_FALSE; -+ break; -+ } -+ } ++static struct dentry *mali_debugfs_dir = NULL; + -+ if (swap_in_success) { -+#ifdef MALI_MEM_SWAP_TRACKING -+ mem_backend_swapped_unlock_size -= mem_bkend->size; -+#endif -+ _mali_osk_mutex_wait(session->memory_lock); -+ mali_mem_swap_mali_map(&mem_bkend->swap_mem, session, mali_alloc->mali_mapping.addr, mali_alloc->mali_mapping.properties); -+ _mali_osk_mutex_signal(session->memory_lock); ++typedef enum { ++ _MALI_DEVICE_SUSPEND, ++ _MALI_DEVICE_RESUME, ++ _MALI_DEVICE_DVFS_PAUSE, ++ _MALI_DEVICE_DVFS_RESUME, ++ _MALI_MAX_EVENTS ++} _mali_device_debug_power_events; + -+ /* Remove the unlock flag from mem backend flags, mark this backend has been swapped in. */ -+ mem_bkend->flags &= ~(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN); -+ mutex_unlock(&mem_bkend->mutex); -+ } else { -+ --mem_bkend->using_count; -+ /* Marking that this backend is not swapped in, need not to be processed anymore. */ -+ job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS; -+ mutex_unlock(&mem_bkend->mutex); -+ } -+ } ++static const char *const mali_power_events[_MALI_MAX_EVENTS] = { ++ [_MALI_DEVICE_SUSPEND] = "suspend", ++ [_MALI_DEVICE_RESUME] = "resume", ++ [_MALI_DEVICE_DVFS_PAUSE] = "dvfs_pause", ++ [_MALI_DEVICE_DVFS_RESUME] = "dvfs_resume", ++}; + -+ job->swap_status = swap_in_success ? MALI_SWAP_IN_SUCC : MALI_SWAP_IN_FAIL; ++static mali_bool power_always_on_enabled = MALI_FALSE; + -+ return _MALI_OSK_ERR_OK; ++static int open_copy_private_data(struct inode *inode, struct file *filp) ++{ ++ filp->private_data = inode->i_private; ++ return 0; +} + -+int mali_mem_swap_out_pages(struct mali_pp_job *job) ++static ssize_t group_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp) +{ -+ u32 num_memory_cookies; -+ struct mali_session_data *session; -+ struct mali_vma_node *mali_vma_node = NULL; -+ mali_mem_allocation *mali_alloc = NULL; -+ mali_mem_backend *mem_bkend = NULL; -+ int i; ++ int r; ++ char buffer[64]; ++ struct mali_group *group; + -+ MALI_DEBUG_ASSERT_POINTER(job); ++ group = (struct mali_group *)filp->private_data; ++ MALI_DEBUG_ASSERT_POINTER(group); + -+ num_memory_cookies = mali_pp_job_num_memory_cookies(job); -+ session = mali_pp_job_get_session(job); ++ r = snprintf(buffer, 64, "%u\n", ++ mali_executor_group_is_disabled(group) ? 0 : 1); + -+ MALI_DEBUG_ASSERT_POINTER(session); ++ return simple_read_from_buffer(buf, count, offp, buffer, r); ++} + ++static ssize_t group_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp) ++{ ++ int r; ++ char buffer[64]; ++ unsigned long val; ++ struct mali_group *group; + -+ for (i = 0; i < num_memory_cookies; i++) { -+ u32 mali_addr = mali_pp_job_get_memory_cookie(job, i); ++ group = (struct mali_group *)filp->private_data; ++ MALI_DEBUG_ASSERT_POINTER(group); + -+ if (MALI_SWAP_INVALIDATE_MALI_ADDRESS == mali_addr) { -+ continue; -+ } ++ if (count >= sizeof(buffer)) { ++ return -ENOMEM; ++ } + -+ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0); ++ if (copy_from_user(&buffer[0], buf, count)) { ++ return -EFAULT; ++ } ++ buffer[count] = '\0'; + -+ if (NULL == mali_vma_node) { -+ MALI_PRINT_ERROR(("SWAP Mem: failed to find mali_vma_node through Mali address: 0x%08x.\n", mali_addr)); -+ continue; -+ } ++ r = kstrtoul(&buffer[0], 10, &val); ++ if (0 != r) { ++ return -EINVAL; ++ } + -+ mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node); -+ MALI_DEBUG_ASSERT(NULL != mali_alloc); ++ switch (val) { ++ case 1: ++ mali_executor_group_enable(group); ++ break; ++ case 0: ++ mali_executor_group_disable(group); ++ break; ++ default: ++ return -EINVAL; ++ break; ++ } + -+ if (MALI_MEM_SWAP != mali_alloc->type && -+ MALI_MEM_COW != mali_alloc->type) { -+ continue; -+ } ++ *offp += count; ++ return count; ++} + -+ mutex_lock(&mali_idr_mutex); -+ mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle); -+ mutex_unlock(&mali_idr_mutex); -+ MALI_DEBUG_ASSERT(NULL != mem_bkend); ++static const struct file_operations group_enabled_fops = { ++ .owner = THIS_MODULE, ++ .open = open_copy_private_data, ++ .read = group_enabled_read, ++ .write = group_enabled_write, ++}; + -+ /* We neednot hold backend's lock here, race safe.*/ -+ if ((MALI_MEM_COW == mem_bkend->type) && -+ (!(mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) { -+ continue; -+ } ++static ssize_t hw_core_base_addr_read(struct file *filp, char __user *buf, size_t count, loff_t *offp) ++{ ++ int r; ++ char buffer[64]; ++ struct mali_hw_core *hw_core; + -+ mutex_lock(&mem_bkend->mutex); ++ hw_core = (struct mali_hw_core *)filp->private_data; ++ MALI_DEBUG_ASSERT_POINTER(hw_core); + -+ MALI_DEBUG_ASSERT(0 < mem_bkend->using_count); ++ r = snprintf(buffer, 64, "0x%lX\n", hw_core->phys_addr); + -+ /* Reducing the using_count of mem backend means less pp job are using this memory backend, -+ * if this count get to zero, it means no pp job is using it now, could put it to swap out list. */ -+ --mem_bkend->using_count; ++ return simple_read_from_buffer(buf, count, offp, buffer, r); ++} + -+ if (0 < mem_bkend->using_count) { -+ mutex_unlock(&mem_bkend->mutex); -+ continue; ++static const struct file_operations hw_core_base_addr_fops = { ++ .owner = THIS_MODULE, ++ .open = open_copy_private_data, ++ .read = hw_core_base_addr_read, ++}; ++ ++static ssize_t profiling_counter_src_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) ++{ ++ u32 is_pp = PRIVATE_DATA_COUNTER_IS_PP((uintptr_t)filp->private_data); ++ u32 src_id = PRIVATE_DATA_COUNTER_GET_SRC((uintptr_t)filp->private_data); ++ mali_bool is_sub_job = PRIVATE_DATA_COUNTER_IS_SUB_JOB((uintptr_t)filp->private_data); ++ u32 sub_job = PRIVATE_DATA_COUNTER_GET_SUB_JOB((uintptr_t)filp->private_data); ++ char buf[64]; ++ int r; ++ u32 val; ++ ++ if (MALI_TRUE == is_pp) { ++ /* PP counter */ ++ if (MALI_TRUE == is_sub_job) { ++ /* Get counter for a particular sub job */ ++ if (0 == src_id) { ++ val = mali_pp_job_get_pp_counter_sub_job_src0(sub_job); ++ } else { ++ val = mali_pp_job_get_pp_counter_sub_job_src1(sub_job); ++ } ++ } else { ++ /* Get default counter for all PP sub jobs */ ++ if (0 == src_id) { ++ val = mali_pp_job_get_pp_counter_global_src0(); ++ } else { ++ val = mali_pp_job_get_pp_counter_global_src1(); ++ } + } -+ mutex_unlock(&mem_bkend->mutex); ++ } else { ++ /* GP counter */ ++ if (0 == src_id) { ++ val = mali_gp_job_get_gp_counter_src0(); ++ } else { ++ val = mali_gp_job_get_gp_counter_src1(); ++ } ++ } + -+ mali_memory_swap_list_backend_add(mem_bkend); ++ if (MALI_HW_CORE_NO_COUNTER == val) { ++ r = snprintf(buf, 64, "-1\n"); ++ } else { ++ r = snprintf(buf, 64, "%u\n", val); + } + -+ return _MALI_OSK_ERR_OK; ++ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + -+int mali_mem_swap_allocate_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep) ++static ssize_t profiling_counter_src_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) +{ -+ struct mali_page_node *m_page, *found_node = NULL; -+ struct page *found_page; -+ mali_mem_swap *swap = NULL; -+ mali_mem_cow *cow = NULL; -+ dma_addr_t dma_addr; -+ u32 i = 0; ++ u32 is_pp = PRIVATE_DATA_COUNTER_IS_PP((uintptr_t)filp->private_data); ++ u32 src_id = PRIVATE_DATA_COUNTER_GET_SRC((uintptr_t)filp->private_data); ++ mali_bool is_sub_job = PRIVATE_DATA_COUNTER_IS_SUB_JOB((uintptr_t)filp->private_data); ++ u32 sub_job = PRIVATE_DATA_COUNTER_GET_SUB_JOB((uintptr_t)filp->private_data); ++ char buf[64]; ++ long val; ++ int ret; + -+ if (MALI_MEM_SWAP == mem_bkend->type) { -+ swap = &mem_bkend->swap_mem; -+ list_for_each_entry(m_page, &swap->pages, list) { -+ if (i == offset) { -+ found_node = m_page; -+ break; ++ if (cnt >= sizeof(buf)) { ++ return -EINVAL; ++ } ++ ++ if (copy_from_user(&buf, ubuf, cnt)) { ++ return -EFAULT; ++ } ++ ++ buf[cnt] = 0; ++ ++ ret = kstrtol(buf, 10, &val); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ if (val < 0) { ++ /* any negative input will disable counter */ ++ val = MALI_HW_CORE_NO_COUNTER; ++ } ++ ++ if (MALI_TRUE == is_pp) { ++ /* PP counter */ ++ if (MALI_TRUE == is_sub_job) { ++ /* Set counter for a particular sub job */ ++ if (0 == src_id) { ++ mali_pp_job_set_pp_counter_sub_job_src0(sub_job, (u32)val); ++ } else { ++ mali_pp_job_set_pp_counter_sub_job_src1(sub_job, (u32)val); ++ } ++ } else { ++ /* Set default counter for all PP sub jobs */ ++ if (0 == src_id) { ++ mali_pp_job_set_pp_counter_global_src0((u32)val); ++ } else { ++ mali_pp_job_set_pp_counter_global_src1((u32)val); + } -+ i++; + } + } else { -+ MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type); -+ MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_SWAP_COWED == (MALI_MEM_BACKEND_FLAG_SWAP_COWED & mem_bkend->flags)); -+ -+ cow = &mem_bkend->cow_mem; -+ list_for_each_entry(m_page, &cow->pages, list) { -+ if (i == offset) { -+ found_node = m_page; -+ break; -+ } -+ i++; ++ /* GP counter */ ++ if (0 == src_id) { ++ mali_gp_job_set_gp_counter_src0((u32)val); ++ } else { ++ mali_gp_job_set_gp_counter_src1((u32)val); + } + } + -+ if (NULL == found_node) { -+ return _MALI_OSK_ERR_FAULT; -+ } ++ *ppos += cnt; ++ return cnt; ++} + -+ found_page = shmem_read_mapping_page(global_swap_space, found_node->swap_it->idx); ++static const struct file_operations profiling_counter_src_fops = { ++ .owner = THIS_MODULE, ++ .open = open_copy_private_data, ++ .read = profiling_counter_src_read, ++ .write = profiling_counter_src_write, ++}; + -+ if (!IS_ERR(found_page)) { -+ lock_page(found_page); -+ dma_addr = dma_map_page(&mali_platform_device->dev, found_page, -+ 0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE); -+ dma_unmap_page(&mali_platform_device->dev, dma_addr, -+ _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE); ++static ssize_t l2_l2x_counter_srcx_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id) ++{ ++ char buf[64]; ++ int r; ++ u32 val; ++ struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data; + -+ *pagep = found_page; ++ if (0 == src_id) { ++ val = mali_l2_cache_core_get_counter_src0(l2_core); + } else { -+ return _MALI_OSK_ERR_NOMEM; ++ val = mali_l2_cache_core_get_counter_src1(l2_core); + } + -+ return _MALI_OSK_ERR_OK; ++ if (MALI_HW_CORE_NO_COUNTER == val) { ++ r = snprintf(buf, 64, "-1\n"); ++ } else { ++ r = snprintf(buf, 64, "%u\n", val); ++ } ++ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + -+int mali_mem_swap_cow_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep) ++static ssize_t l2_l2x_counter_srcx_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id) +{ -+ struct mali_page_node *m_page, *found_node = NULL, *new_node = NULL; -+ mali_mem_cow *cow = NULL; -+ u32 i = 0; -+ -+ MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type); -+ MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_SWAP_COWED == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)); -+ MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN == (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags)); -+ MALI_DEBUG_ASSERT(!mali_memory_swap_backend_in_swapped_pool(mem_bkend)); ++ struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data; ++ char buf[64]; ++ long val; ++ int ret; + -+ cow = &mem_bkend->cow_mem; -+ list_for_each_entry(m_page, &cow->pages, list) { -+ if (i == offset) { -+ found_node = m_page; -+ break; -+ } -+ i++; ++ if (cnt >= sizeof(buf)) { ++ return -EINVAL; + } + -+ if (NULL == found_node) { -+ return _MALI_OSK_ERR_FAULT; ++ if (copy_from_user(&buf, ubuf, cnt)) { ++ return -EFAULT; + } + -+ new_node = _mali_mem_swap_page_node_allocate(); ++ buf[cnt] = 0; + -+ if (NULL == new_node) { -+ return _MALI_OSK_ERR_FAULT; ++ ret = kstrtol(buf, 10, &val); ++ if (ret < 0) { ++ return ret; + } + -+ new_node->swap_it->idx = mali_mem_swap_idx_alloc(); -+ -+ if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == new_node->swap_it->idx) { -+ MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW on demand.\n")); -+ kfree(new_node->swap_it); -+ kfree(new_node); -+ return _MALI_OSK_ERR_FAULT; ++ if (val < 0) { ++ /* any negative input will disable counter */ ++ val = MALI_HW_CORE_NO_COUNTER; + } + -+ if (MALI_FALSE == mali_mem_swap_in_page_node(new_node)) { -+ _mali_mem_swap_page_node_free(new_node); -+ return _MALI_OSK_ERR_FAULT; ++ mali_l2_cache_core_set_counter_src(l2_core, src_id, (u32)val); ++ ++ *ppos += cnt; ++ return cnt; ++} ++ ++static ssize_t l2_all_counter_srcx_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id) ++{ ++ char buf[64]; ++ long val; ++ int ret; ++ u32 l2_id; ++ struct mali_l2_cache_core *l2_cache; ++ ++ if (cnt >= sizeof(buf)) { ++ return -EINVAL; + } + -+ /* swap in found node for copy in kernel. */ -+ if (MALI_FALSE == mali_mem_swap_in_page_node(found_node)) { -+ mali_mem_swap_out_page_node(new_node); -+ _mali_mem_swap_page_node_free(new_node); -+ return _MALI_OSK_ERR_FAULT; ++ if (copy_from_user(&buf, ubuf, cnt)) { ++ return -EFAULT; + } + -+ _mali_mem_cow_copy_page(found_node, new_node); ++ buf[cnt] = 0; + -+ list_replace(&found_node->list, &new_node->list); ++ ret = kstrtol(buf, 10, &val); ++ if (ret < 0) { ++ return ret; ++ } + -+ if (1 != _mali_page_node_get_ref_count(found_node)) { -+ atomic_add(1, &mem_bkend->mali_allocation->session->mali_mem_allocated_pages); -+ if (atomic_read(&mem_bkend->mali_allocation->session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > mem_bkend->mali_allocation->session->max_mali_mem_allocated_size) { -+ mem_bkend->mali_allocation->session->max_mali_mem_allocated_size = atomic_read(&mem_bkend->mali_allocation->session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE; -+ } -+ mem_bkend->cow_mem.change_pages_nr++; ++ if (val < 0) { ++ /* any negative input will disable counter */ ++ val = MALI_HW_CORE_NO_COUNTER; + } + -+ mali_mem_swap_out_page_node(found_node); -+ _mali_mem_swap_page_node_free(found_node); ++ l2_id = 0; ++ l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id); ++ while (NULL != l2_cache) { ++ mali_l2_cache_core_set_counter_src(l2_cache, src_id, (u32)val); + -+ /* When swap in the new page node, we have called dma_map_page for this page.\n */ -+ dma_unmap_page(&mali_platform_device->dev, new_node->swap_it->dma_addr, -+ _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE); ++ /* try next L2 */ ++ l2_id++; ++ l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id); ++ } + -+ lock_page(new_node->swap_it->page); ++ *ppos += cnt; ++ return cnt; ++} + -+ *pagep = new_node->swap_it->page; ++static ssize_t l2_l2x_counter_src0_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) ++{ ++ return l2_l2x_counter_srcx_read(filp, ubuf, cnt, ppos, 0); ++} + -+ return _MALI_OSK_ERR_OK; ++static ssize_t l2_l2x_counter_src1_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) ++{ ++ return l2_l2x_counter_srcx_read(filp, ubuf, cnt, ppos, 1); +} + -+#ifdef MALI_MEM_SWAP_TRACKING -+void mali_mem_swap_tracking(u32 *swap_pool_size, u32 *unlock_size) ++static ssize_t l2_l2x_counter_src0_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) +{ -+ *swap_pool_size = mem_backend_swapped_pool_size; -+ *unlock_size = mem_backend_swapped_unlock_size; ++ return l2_l2x_counter_srcx_write(filp, ubuf, cnt, ppos, 0); +} -+#endif -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_swap_alloc.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_swap_alloc.h -new file mode 100644 -index 000000000..5810960e2 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_swap_alloc.h -@@ -0,0 +1,121 @@ -+/* -+ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+#ifndef __MALI_MEMORY_SWAP_ALLOC_H__ -+#define __MALI_MEMORY_SWAP_ALLOC_H__ -+ -+#include "mali_osk.h" -+#include "mali_session.h" -+ -+#include "mali_memory_types.h" -+#include "mali_pp_job.h" -+ -+/** -+ * Initialize memory swapping module. -+ */ -+_mali_osk_errcode_t mali_mem_swap_init(void); -+ -+void mali_mem_swap_term(void); -+ -+/** -+ * Return global share memory file to other modules. -+ */ -+struct file *mali_mem_swap_get_global_swap_file(void); -+ -+/** -+ * Unlock the given memory backend and pages in it could be swapped out by kernel. -+ */ -+void mali_mem_swap_unlock_single_mem_backend(mali_mem_backend *mem_bkend); -+ -+/** -+ * Remove the given memory backend from global swap list. -+ */ -+void mali_memory_swap_list_backend_delete(mali_mem_backend *mem_bkend); -+ -+/** -+ * Add the given memory backend to global swap list. -+ */ -+void mali_memory_swap_list_backend_add(mali_mem_backend *mem_bkend); -+ -+/** -+ * Allocate 1 index from bitmap used as page index in global swap file. -+ */ -+u32 mali_mem_swap_idx_alloc(void); -+ -+void mali_mem_swap_idx_free(u32 idx); -+ -+/** -+ * Allocate a new swap item without page index. -+ */ -+struct mali_swap_item *mali_mem_swap_alloc_swap_item(void); -+ -+/** -+ * Free a swap item, truncate the corresponding space in page cache and free index of page. -+ */ -+void mali_mem_swap_free_swap_item(mali_swap_item *swap_item); -+ -+/** -+ * Allocate a page node with swap item. -+ */ -+struct mali_page_node *_mali_mem_swap_page_node_allocate(void); -+ -+/** -+ * Reduce the reference count of given page node and if return 0, just free this page node. -+ */ -+_mali_osk_errcode_t _mali_mem_swap_put_page_node(struct mali_page_node *m_page); -+ -+void _mali_mem_swap_page_node_free(struct mali_page_node *m_page); -+ -+/** -+ * Free a swappable memory backend. -+ */ -+u32 mali_mem_swap_free(mali_mem_swap *swap_mem); -+ -+/** -+ * Ummap and free. -+ */ -+u32 mali_mem_swap_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped); -+ -+/** -+ * Read in a page from global swap file with the pre-allcated page index. -+ */ -+mali_bool mali_mem_swap_in_page_node(struct mali_page_node *page_node); -+ -+int mali_mem_swap_alloc_pages(mali_mem_swap *swap_mem, u32 size, u32 *bkend_idx); -+ -+_mali_osk_errcode_t mali_mem_swap_mali_map(mali_mem_swap *swap_mem, struct mali_session_data *session, u32 vaddr, u32 props); -+ -+void mali_mem_swap_mali_unmap(mali_mem_allocation *alloc); -+ -+/** -+ * When pp job created, we need swap in all of memory backend needed by this pp job. -+ */ -+int mali_mem_swap_in_pages(struct mali_pp_job *job); -+ -+/** -+ * Put all of memory backends used this pp job to the global swap list. -+ */ -+int mali_mem_swap_out_pages(struct mali_pp_job *job); -+ -+/** -+ * This will be called in page fault to process CPU read&write. -+ */ -+int mali_mem_swap_allocate_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep) ; -+ -+/** -+ * Used to process cow on demand for swappable memory backend. -+ */ -+int mali_mem_swap_cow_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep); -+ -+#ifdef MALI_MEM_SWAP_TRACKING -+void mali_mem_swap_tracking(u32 *swap_pool_size, u32 *unlock_size); -+#endif -+#endif /* __MALI_MEMORY_SWAP_ALLOC_H__ */ -+ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_types.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_types.h -new file mode 100644 -index 000000000..33db40929 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_types.h -@@ -0,0 +1,219 @@ -+/* -+ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ + -+#ifndef __MALI_MEMORY_TYPES_H__ -+#define __MALI_MEMORY_TYPES_H__ ++static ssize_t l2_l2x_counter_src1_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) ++{ ++ return l2_l2x_counter_srcx_write(filp, ubuf, cnt, ppos, 1); ++} + -+#include ++static ssize_t l2_all_counter_src0_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) ++{ ++ return l2_all_counter_srcx_write(filp, ubuf, cnt, ppos, 0); ++} + -+#if defined(CONFIG_MALI400_UMP) -+#include "ump_kernel_interface.h" -+#endif ++static ssize_t l2_all_counter_src1_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) ++{ ++ return l2_all_counter_srcx_write(filp, ubuf, cnt, ppos, 1); ++} + -+typedef u32 mali_address_t; ++static const struct file_operations l2_l2x_counter_src0_fops = { ++ .owner = THIS_MODULE, ++ .open = open_copy_private_data, ++ .read = l2_l2x_counter_src0_read, ++ .write = l2_l2x_counter_src0_write, ++}; + -+typedef enum mali_mem_type { -+ MALI_MEM_OS, -+ MALI_MEM_EXTERNAL, -+ MALI_MEM_SWAP, -+ MALI_MEM_DMA_BUF, -+ MALI_MEM_UMP, -+ MALI_MEM_BLOCK, -+ MALI_MEM_COW, -+ MALI_MEM_SECURE, -+ MALI_MEM_TYPE_MAX, -+} mali_mem_type; ++static const struct file_operations l2_l2x_counter_src1_fops = { ++ .owner = THIS_MODULE, ++ .open = open_copy_private_data, ++ .read = l2_l2x_counter_src1_read, ++ .write = l2_l2x_counter_src1_write, ++}; + -+typedef struct mali_block_item { -+ /* for block type, the block_phy is alway page size align -+ * so use low 12bit used for ref_cout. -+ */ -+ unsigned long phy_addr; -+} mali_block_item; ++static const struct file_operations l2_all_counter_src0_fops = { ++ .owner = THIS_MODULE, ++ .write = l2_all_counter_src0_write, ++}; + -+/** -+ * idx is used to locate the given page in the address space of swap file. -+ * ref_count is used to mark how many memory backends are using this item. -+ */ -+typedef struct mali_swap_item { -+ u32 idx; -+ atomic_t ref_count; -+ struct page *page; -+ dma_addr_t dma_addr; -+} mali_swap_item; ++static const struct file_operations l2_all_counter_src1_fops = { ++ .owner = THIS_MODULE, ++ .write = l2_all_counter_src1_write, ++}; + -+typedef enum mali_page_node_type { -+ MALI_PAGE_NODE_OS, -+ MALI_PAGE_NODE_BLOCK, -+ MALI_PAGE_NODE_SWAP, -+} mali_page_node_type; ++static ssize_t l2_l2x_counter_valx_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos, u32 src_id) ++{ ++ char buf[64]; ++ int r; ++ u32 src0 = 0; ++ u32 val0 = 0; ++ u32 src1 = 0; ++ u32 val1 = 0; ++ u32 val = -1; ++ struct mali_l2_cache_core *l2_core = (struct mali_l2_cache_core *)filp->private_data; + -+typedef struct mali_page_node { -+ struct list_head list; -+ union { -+ struct page *page; -+ mali_block_item *blk_it; /*pointer to block item*/ -+ mali_swap_item *swap_it; -+ }; ++ mali_l2_cache_core_get_counter_values(l2_core, &src0, &val0, &src1, &val1); + -+ u32 type; -+} mali_page_node; ++ if (0 == src_id) { ++ if (MALI_HW_CORE_NO_COUNTER != val0) { ++ val = val0; ++ } ++ } else { ++ if (MALI_HW_CORE_NO_COUNTER != val1) { ++ val = val1; ++ } ++ } + -+typedef struct mali_mem_os_mem { -+ struct list_head pages; -+ u32 count; -+} mali_mem_os_mem; ++ r = snprintf(buf, 64, "%u\n", val); + -+typedef struct mali_mem_dma_buf { -+#if defined(CONFIG_DMA_SHARED_BUFFER) -+ struct mali_dma_buf_attachment *attachment; -+#endif -+} mali_mem_dma_buf; ++ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); ++} + -+typedef struct mali_mem_external { -+ dma_addr_t phys; -+ u32 size; -+} mali_mem_external; ++static ssize_t l2_l2x_counter_val0_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) ++{ ++ return l2_l2x_counter_valx_read(filp, ubuf, cnt, ppos, 0); ++} + -+typedef struct mali_mem_ump { -+#if defined(CONFIG_MALI400_UMP) -+ ump_dd_handle handle; -+#endif -+} mali_mem_ump; ++static ssize_t l2_l2x_counter_val1_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) ++{ ++ return l2_l2x_counter_valx_read(filp, ubuf, cnt, ppos, 1); ++} + -+typedef struct block_allocator_allocation { -+ /* The list will be released in reverse order */ -+ struct block_info *last_allocated; -+ u32 mapping_length; -+ struct block_allocator *info; -+} block_allocator_allocation; ++static const struct file_operations l2_l2x_counter_val0_fops = { ++ .owner = THIS_MODULE, ++ .open = open_copy_private_data, ++ .read = l2_l2x_counter_val0_read, ++}; + -+typedef struct mali_mem_block_mem { -+ struct list_head pfns; -+ u32 count; -+} mali_mem_block_mem; ++static const struct file_operations l2_l2x_counter_val1_fops = { ++ .owner = THIS_MODULE, ++ .open = open_copy_private_data, ++ .read = l2_l2x_counter_val1_read, ++}; + -+typedef struct mali_mem_virt_mali_mapping { -+ mali_address_t addr; /* Virtual Mali address */ -+ u32 properties; /* MMU Permissions + cache, must match MMU HW */ -+} mali_mem_virt_mali_mapping; ++static ssize_t power_always_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) ++{ ++ unsigned long val; ++ int ret; ++ char buf[32]; + -+typedef struct mali_mem_virt_cpu_mapping { -+ void __user *addr; -+ struct vm_area_struct *vma; -+} mali_mem_virt_cpu_mapping; ++ cnt = min(cnt, sizeof(buf) - 1); ++ if (copy_from_user(buf, ubuf, cnt)) { ++ return -EFAULT; ++ } ++ buf[cnt] = '\0'; + -+#define MALI_MEM_ALLOCATION_VALID_MAGIC 0xdeda110c -+#define MALI_MEM_ALLOCATION_FREED_MAGIC 0x10101010 ++ ret = kstrtoul(buf, 10, &val); ++ if (0 != ret) { ++ return ret; ++ } + -+typedef struct mali_mm_node { -+ /* MALI GPU vaddr start, use u32 for mmu only support 32bit address*/ -+ uint32_t start; /* GPU vaddr */ -+ uint32_t size; /* GPU allocation virtual size */ -+ unsigned allocated : 1; -+} mali_mm_node; -+ -+typedef struct mali_vma_node { -+ struct mali_mm_node vm_node; -+ struct rb_node vm_rb; -+} mali_vma_node; -+ -+ -+typedef struct mali_mem_allocation { -+ MALI_DEBUG_CODE(u32 magic); -+ mali_mem_type type; /**< Type of memory */ -+ u32 flags; /**< Flags for this allocation */ ++ /* Update setting (not exactly thread safe) */ ++ if (1 == val && MALI_FALSE == power_always_on_enabled) { ++ power_always_on_enabled = MALI_TRUE; ++ _mali_osk_pm_dev_ref_get_sync(); ++ } else if (0 == val && MALI_TRUE == power_always_on_enabled) { ++ power_always_on_enabled = MALI_FALSE; ++ _mali_osk_pm_dev_ref_put(); ++ } + -+ struct mali_session_data *session; /**< Pointer to session that owns the allocation */ ++ *ppos += cnt; ++ return cnt; ++} + -+ mali_mem_virt_cpu_mapping cpu_mapping; /**< CPU mapping */ -+ mali_mem_virt_mali_mapping mali_mapping; /**< Mali mapping */ ++static ssize_t power_always_on_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) ++{ ++ if (MALI_TRUE == power_always_on_enabled) { ++ return simple_read_from_buffer(ubuf, cnt, ppos, "1\n", 2); ++ } else { ++ return simple_read_from_buffer(ubuf, cnt, ppos, "0\n", 2); ++ } ++} + -+ /* add for new memory system */ -+ struct mali_vma_node mali_vma_node; -+ u32 vsize; /* virtual size*/ -+ u32 psize; /* physical backend memory size*/ -+ struct list_head list; -+ s32 backend_handle; /* idr for mem_backend */ -+ _mali_osk_atomic_t mem_alloc_refcount; -+} mali_mem_allocation; ++static const struct file_operations power_always_on_fops = { ++ .owner = THIS_MODULE, ++ .read = power_always_on_read, ++ .write = power_always_on_write, ++}; + -+struct mali_mem_os_allocator { -+ spinlock_t pool_lock; -+ struct list_head pool_pages; -+ size_t pool_count; ++static ssize_t power_power_events_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) ++{ ++ if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_SUSPEND], strlen(mali_power_events[_MALI_DEVICE_SUSPEND]) - 1)) { ++ mali_pm_os_suspend(MALI_TRUE); ++ } else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_RESUME], strlen(mali_power_events[_MALI_DEVICE_RESUME]) - 1)) { ++ mali_pm_os_resume(); ++ } else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_DVFS_PAUSE], strlen(mali_power_events[_MALI_DEVICE_DVFS_PAUSE]) - 1)) { ++ mali_dev_pause(); ++ } else if (!strncmp(ubuf, mali_power_events[_MALI_DEVICE_DVFS_RESUME], strlen(mali_power_events[_MALI_DEVICE_DVFS_RESUME]) - 1)) { ++ mali_dev_resume(); ++ } ++ *ppos += cnt; ++ return cnt; ++} + -+ atomic_t allocated_pages; -+ size_t allocation_limit; ++static loff_t power_power_events_seek(struct file *file, loff_t offset, int orig) ++{ ++ file->f_pos = offset; ++ return 0; ++} + -+ struct shrinker shrinker; -+ struct delayed_work timed_shrinker; -+ struct workqueue_struct *wq; ++static const struct file_operations power_power_events_fops = { ++ .owner = THIS_MODULE, ++ .write = power_power_events_write, ++ .llseek = power_power_events_seek, +}; + -+/* COW backend memory type */ -+typedef struct mali_mem_cow { -+ struct list_head pages; /**< all pages for this cow backend allocation, -+ including new allocated pages for modified range*/ -+ u32 count; /**< number of pages */ -+ s32 change_pages_nr; -+} mali_mem_cow; -+ -+typedef struct mali_mem_swap { -+ struct list_head pages; -+ u32 count; -+} mali_mem_swap; ++#if MALI_STATE_TRACKING ++static int mali_seq_internal_state_show(struct seq_file *seq_file, void *v) ++{ ++ u32 len = 0; ++ u32 size; ++ char *buf; + -+typedef struct mali_mem_secure { -+#if defined(CONFIG_DMA_SHARED_BUFFER) -+ struct dma_buf *buf; -+ struct dma_buf_attachment *attachment; -+ struct sg_table *sgt; -+#endif -+ u32 count; -+} mali_mem_secure; ++ size = seq_get_buf(seq_file, &buf); + -+#define MALI_MEM_BACKEND_FLAG_COWED (0x1) /* COW has happen on this backend */ -+#define MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE (0x2) /* This is an COW backend, mapped as not allowed cpu to write */ -+#define MALI_MEM_BACKEND_FLAG_SWAP_COWED (0x4) /* Mark the given backend is cowed from swappable memory. */ -+/* Mark this backend is not swapped_in in MALI driver, and before using it, -+ * we should swap it in and set up corresponding page table. */ -+#define MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN (0x8) -+#define MALI_MEM_BACKEND_FLAG_NOT_BINDED (0x1 << 5) /* this backend it not back with physical memory, used for defer bind */ -+#define MALI_MEM_BACKEND_FLAG_BINDED (0x1 << 6) /* this backend it back with physical memory, used for defer bind */ ++ if (!size) { ++ return -ENOMEM; ++ } + -+typedef struct mali_mem_backend { -+ mali_mem_type type; /**< Type of backend memory */ -+ u32 flags; /**< Flags for this allocation */ -+ u32 size; -+ /* Union selected by type. */ -+ union { -+ mali_mem_os_mem os_mem; /**< MALI_MEM_OS */ -+ mali_mem_external ext_mem; /**< MALI_MEM_EXTERNAL */ -+ mali_mem_dma_buf dma_buf; /**< MALI_MEM_DMA_BUF */ -+ mali_mem_ump ump_mem; /**< MALI_MEM_UMP */ -+ mali_mem_block_mem block_mem; /**< MALI_MEM_BLOCK */ -+ mali_mem_cow cow_mem; -+ mali_mem_swap swap_mem; -+ mali_mem_secure secure_mem; -+ }; -+ mali_mem_allocation *mali_allocation; -+ struct mutex mutex; -+ mali_mem_type cow_type; ++ /* Create the internal state dump. */ ++ len = snprintf(buf + len, size - len, "Mali device driver %s\n", SVN_REV_STRING); ++ len += snprintf(buf + len, size - len, "License: %s\n\n", MALI_KERNEL_LINUX_LICENSE); + -+ struct list_head list; /**< Used to link swappable memory backend to the global swappable list */ -+ int using_count; /**< Mark how many PP jobs are using this memory backend */ -+ u32 start_idx; /**< If the correspondign vma of this backend is linear, this value will be used to set vma->vm_pgoff */ -+} mali_mem_backend; ++ len += _mali_kernel_core_dump_state(buf + len, size - len); + -+#define MALI_MEM_FLAG_MALI_GUARD_PAGE (_MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) -+#define MALI_MEM_FLAG_DONT_CPU_MAP (1 << 1) -+#define MALI_MEM_FLAG_CAN_RESIZE (_MALI_MEMORY_ALLOCATE_RESIZEABLE) -+#endif /* __MALI_MEMORY_TYPES__ */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_ump.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_ump.c -new file mode 100644 -index 000000000..666d4b0fb ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_ump.c -@@ -0,0 +1,154 @@ -+/* -+ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ seq_commit(seq_file, len); + -+#include "mali_ukk.h" -+#include "mali_osk.h" -+#include "mali_kernel_common.h" -+#include "mali_session.h" -+#include "mali_kernel_linux.h" -+#include "mali_memory.h" -+#include "ump_kernel_interface.h" ++ return 0; ++} + -+static int mali_mem_ump_map(mali_mem_backend *mem_backend) ++static int mali_seq_internal_state_open(struct inode *inode, struct file *file) +{ -+ ump_dd_handle ump_mem; -+ mali_mem_allocation *alloc; -+ struct mali_session_data *session; -+ u32 nr_blocks; -+ u32 i; -+ ump_dd_physical_block *ump_blocks; -+ struct mali_page_directory *pagedir; -+ u32 offset = 0; -+ _mali_osk_errcode_t err; ++ return single_open(file, mali_seq_internal_state_show, NULL); ++} + -+ MALI_DEBUG_ASSERT_POINTER(mem_backend); -+ MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type); ++static const struct file_operations mali_seq_internal_state_fops = { ++ .owner = THIS_MODULE, ++ .open = mali_seq_internal_state_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++#endif /* MALI_STATE_TRACKING */ + -+ alloc = mem_backend->mali_allocation; -+ MALI_DEBUG_ASSERT_POINTER(alloc); ++#if defined(CONFIG_MALI400_INTERNAL_PROFILING) ++static ssize_t profiling_record_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) ++{ ++ char buf[64]; ++ int r; + -+ session = alloc->session; -+ MALI_DEBUG_ASSERT_POINTER(session); ++ r = snprintf(buf, 64, "%u\n", _mali_internal_profiling_is_recording() ? 1 : 0); ++ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); ++} + -+ ump_mem = mem_backend->ump_mem.handle; -+ MALI_DEBUG_ASSERT(UMP_DD_HANDLE_INVALID != ump_mem); ++static ssize_t profiling_record_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) ++{ ++ char buf[64]; ++ unsigned long val; ++ int ret; + -+ nr_blocks = ump_dd_phys_block_count_get(ump_mem); -+ if (nr_blocks == 0) { -+ MALI_DEBUG_PRINT(1, ("No block count\n")); ++ if (cnt >= sizeof(buf)) { + return -EINVAL; + } + -+ ump_blocks = _mali_osk_malloc(sizeof(*ump_blocks) * nr_blocks); -+ if (NULL == ump_blocks) { -+ return -ENOMEM; -+ } -+ -+ if (UMP_DD_INVALID == ump_dd_phys_blocks_get(ump_mem, ump_blocks, nr_blocks)) { -+ _mali_osk_free(ump_blocks); ++ if (copy_from_user(&buf, ubuf, cnt)) { + return -EFAULT; + } + -+ pagedir = session->page_directory; ++ buf[cnt] = 0; + -+ mali_session_memory_lock(session); ++ ret = kstrtoul(buf, 10, &val); ++ if (ret < 0) { ++ return ret; ++ } + -+ err = mali_mem_mali_map_prepare(alloc); -+ if (_MALI_OSK_ERR_OK != err) { -+ MALI_DEBUG_PRINT(1, ("Mapping of UMP memory failed\n")); ++ if (val != 0) { ++ u32 limit = MALI_PROFILING_MAX_BUFFER_ENTRIES; /* This can be made configurable at a later stage if we need to */ + -+ _mali_osk_free(ump_blocks); -+ mali_session_memory_unlock(session); -+ return -ENOMEM; -+ } ++ /* check if we are already recording */ ++ if (MALI_TRUE == _mali_internal_profiling_is_recording()) { ++ MALI_DEBUG_PRINT(3, ("Recording of profiling events already in progress\n")); ++ return -EFAULT; ++ } + -+ for (i = 0; i < nr_blocks; ++i) { -+ u32 virt = alloc->mali_vma_node.vm_node.start + offset; ++ /* check if we need to clear out an old recording first */ ++ if (MALI_TRUE == _mali_internal_profiling_have_recording()) { ++ if (_MALI_OSK_ERR_OK != _mali_internal_profiling_clear()) { ++ MALI_DEBUG_PRINT(3, ("Failed to clear existing recording of profiling events\n")); ++ return -EFAULT; ++ } ++ } + -+ MALI_DEBUG_PRINT(7, ("Mapping in 0x%08x size %d\n", ump_blocks[i].addr , ump_blocks[i].size)); ++ /* start recording profiling data */ ++ if (_MALI_OSK_ERR_OK != _mali_internal_profiling_start(&limit)) { ++ MALI_DEBUG_PRINT(3, ("Failed to start recording of profiling events\n")); ++ return -EFAULT; ++ } + -+ mali_mmu_pagedir_update(pagedir, virt, ump_blocks[i].addr, -+ ump_blocks[i].size, MALI_MMU_FLAGS_DEFAULT); ++ MALI_DEBUG_PRINT(3, ("Profiling recording started (max %u events)\n", limit)); ++ } else { ++ /* stop recording profiling data */ ++ u32 count = 0; ++ if (_MALI_OSK_ERR_OK != _mali_internal_profiling_stop(&count)) { ++ MALI_DEBUG_PRINT(2, ("Failed to stop recording of profiling events\n")); ++ return -EFAULT; ++ } + -+ offset += ump_blocks[i].size; ++ MALI_DEBUG_PRINT(2, ("Profiling recording stopped (recorded %u events)\n", count)); + } + -+ if (alloc->flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) { -+ u32 virt = alloc->mali_vma_node.vm_node.start + offset; ++ *ppos += cnt; ++ return cnt; ++} + -+ /* Map in an extra virtual guard page at the end of the VMA */ -+ MALI_DEBUG_PRINT(6, ("Mapping in extra guard page\n")); ++static const struct file_operations profiling_record_fops = { ++ .owner = THIS_MODULE, ++ .read = profiling_record_read, ++ .write = profiling_record_write, ++}; + -+ mali_mmu_pagedir_update(pagedir, virt, ump_blocks[0].addr, _MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT); ++static void *profiling_events_start(struct seq_file *s, loff_t *pos) ++{ ++ loff_t *spos; + -+ offset += _MALI_OSK_MALI_PAGE_SIZE; ++ /* check if we have data avaiable */ ++ if (MALI_TRUE != _mali_internal_profiling_have_recording()) { ++ return NULL; + } -+ mali_session_memory_unlock(session); -+ _mali_osk_free(ump_blocks); -+ return 0; -+} + -+static void mali_mem_ump_unmap(mali_mem_allocation *alloc) -+{ -+ struct mali_session_data *session; -+ MALI_DEBUG_ASSERT_POINTER(alloc); -+ session = alloc->session; -+ MALI_DEBUG_ASSERT_POINTER(session); -+ mali_session_memory_lock(session); -+ mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start, -+ alloc->flags); -+ mali_session_memory_unlock(session); ++ spos = kmalloc(sizeof(loff_t), GFP_KERNEL); ++ if (NULL == spos) { ++ return NULL; ++ } ++ ++ *spos = *pos; ++ return spos; +} + -+int mali_mem_bind_ump_buf(mali_mem_allocation *alloc, mali_mem_backend *mem_backend, u32 secure_id, u32 flags) ++static void *profiling_events_next(struct seq_file *s, void *v, loff_t *pos) +{ -+ ump_dd_handle ump_mem; -+ int ret; -+ MALI_DEBUG_ASSERT_POINTER(alloc); -+ MALI_DEBUG_ASSERT_POINTER(mem_backend); -+ MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type); -+ -+ MALI_DEBUG_PRINT(3, -+ ("Requested to map ump memory with secure id %d into virtual memory 0x%08X, size 0x%08X\n", -+ secure_id, alloc->mali_vma_node.vm_node.start, alloc->mali_vma_node.vm_node.size)); ++ loff_t *spos = v; + -+ ump_mem = ump_dd_handle_create_from_secure_id(secure_id); -+ if (UMP_DD_HANDLE_INVALID == ump_mem) MALI_ERROR(_MALI_OSK_ERR_FAULT); -+ alloc->flags |= MALI_MEM_FLAG_DONT_CPU_MAP; -+ if (flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) { -+ alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE; ++ /* check if we have data avaiable */ ++ if (MALI_TRUE != _mali_internal_profiling_have_recording()) { ++ return NULL; + } + -+ mem_backend->ump_mem.handle = ump_mem; -+ -+ ret = mali_mem_ump_map(mem_backend); -+ if (0 != ret) { -+ ump_dd_reference_release(ump_mem); -+ return _MALI_OSK_ERR_FAULT; ++ /* check if the next entry actually is avaiable */ ++ if (_mali_internal_profiling_get_count() <= (u32)(*spos + 1)) { ++ return NULL; + } -+ MALI_DEBUG_PRINT(3, ("Returning from UMP bind\n")); -+ return _MALI_OSK_ERR_OK; ++ ++ *pos = ++*spos; ++ return spos; +} + -+void mali_mem_unbind_ump_buf(mali_mem_backend *mem_backend) ++static void profiling_events_stop(struct seq_file *s, void *v) +{ -+ ump_dd_handle ump_mem; -+ mali_mem_allocation *alloc; -+ MALI_DEBUG_ASSERT_POINTER(mem_backend); -+ MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type); -+ ump_mem = mem_backend->ump_mem.handle; -+ MALI_DEBUG_ASSERT(UMP_DD_HANDLE_INVALID != ump_mem); -+ -+ alloc = mem_backend->mali_allocation; -+ MALI_DEBUG_ASSERT_POINTER(alloc); -+ mali_mem_ump_unmap(alloc); -+ ump_dd_reference_release(ump_mem); ++ kfree(v); +} + -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_ump.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_ump.h -new file mode 100644 -index 000000000..c314c8dcb ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_ump.h -@@ -0,0 +1,29 @@ -+/* -+ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+#ifndef __MALI_MEMORY_UMP_BUF_H__ -+#define __MALI_MEMORY_UMP_BUF_H__ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif ++static int profiling_events_show(struct seq_file *seq_file, void *v) ++{ ++ loff_t *spos = v; ++ u32 index; ++ u64 timestamp; ++ u32 event_id; ++ u32 data[5]; + -+#include "mali_uk_types.h" -+#include "mali_osk.h" -+#include "mali_memory.h" ++ index = (u32) * spos; + -+int mali_mem_bind_ump_buf(mali_mem_allocation *alloc, mali_mem_backend *mem_backend, u32 secure_id, u32 flags); -+void mali_mem_unbind_ump_buf(mali_mem_backend *mem_backend); ++ /* Retrieve all events */ ++ if (_MALI_OSK_ERR_OK == _mali_internal_profiling_get_event(index, ×tamp, &event_id, data)) { ++ seq_printf(seq_file, "%llu %u %u %u %u %u %u\n", timestamp, event_id, data[0], data[1], data[2], data[3], data[4]); ++ return 0; ++ } + -+#ifdef __cplusplus ++ return 0; +} -+#endif -+ -+#endif /* __MALI_MEMORY_DMA_BUF_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_util.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_util.c -new file mode 100644 -index 000000000..8e13e923c ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_util.c -@@ -0,0 +1,158 @@ -+/* -+ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "mali_osk.h" -+#include "mali_osk_mali.h" -+#include "mali_kernel_linux.h" -+#include "mali_scheduler.h" + -+#include "mali_memory.h" -+#include "mali_memory_os_alloc.h" -+#if defined(CONFIG_DMA_SHARED_BUFFER) -+#include "mali_memory_dma_buf.h" -+#include "mali_memory_secure.h" -+#endif -+#if defined(CONFIG_MALI400_UMP) -+#include "mali_memory_ump.h" -+#endif -+#include "mali_memory_external.h" -+#include "mali_memory_manager.h" -+#include "mali_memory_virtual.h" -+#include "mali_memory_cow.h" -+#include "mali_memory_block_alloc.h" -+#include "mali_memory_swap_alloc.h" ++static int profiling_events_show_human_readable(struct seq_file *seq_file, void *v) ++{ ++#define MALI_EVENT_ID_IS_HW(event_id) (((event_id & 0x00FF0000) >= MALI_PROFILING_EVENT_CHANNEL_GP0) && ((event_id & 0x00FF0000) <= MALI_PROFILING_EVENT_CHANNEL_PP7)) + ++ static u64 start_time = 0; ++ loff_t *spos = v; ++ u32 index; ++ u64 timestamp; ++ u32 event_id; ++ u32 data[5]; + ++ index = (u32) * spos; + -+/** -+*function @_mali_free_allocation_mem - free a memory allocation -+*/ -+static u32 _mali_free_allocation_mem(mali_mem_allocation *mali_alloc) -+{ -+ mali_mem_backend *mem_bkend = NULL; -+ u32 free_pages_nr = 0; ++ /* Retrieve all events */ ++ if (_MALI_OSK_ERR_OK == _mali_internal_profiling_get_event(index, ×tamp, &event_id, data)) { ++ seq_printf(seq_file, "%llu %u %u %u %u %u %u # ", timestamp, event_id, data[0], data[1], data[2], data[3], data[4]); + -+ struct mali_session_data *session = mali_alloc->session; -+ MALI_DEBUG_PRINT(4, (" _mali_free_allocation_mem, psize =0x%x! \n", mali_alloc->psize)); -+ if (0 == mali_alloc->psize) -+ goto out; ++ if (0 == index) { ++ start_time = timestamp; ++ } + -+ /* Get backend memory & Map on CPU */ -+ mutex_lock(&mali_idr_mutex); -+ mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle); -+ mutex_unlock(&mali_idr_mutex); -+ MALI_DEBUG_ASSERT(NULL != mem_bkend); ++ seq_printf(seq_file, "[%06u] ", index); + -+ switch (mem_bkend->type) { -+ case MALI_MEM_OS: -+ free_pages_nr = mali_mem_os_release(mem_bkend); -+ atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages); -+ break; -+ case MALI_MEM_UMP: -+#if defined(CONFIG_MALI400_UMP) -+ mali_mem_unbind_ump_buf(mem_bkend); -+ atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]); -+#else -+ MALI_DEBUG_PRINT(1, ("UMP not supported\n")); -+#endif -+ break; -+ case MALI_MEM_DMA_BUF: -+#if defined(CONFIG_DMA_SHARED_BUFFER) -+ mali_mem_unbind_dma_buf(mem_bkend); -+ atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]); -+#else -+ MALI_DEBUG_PRINT(1, ("DMA not supported\n")); -+#endif -+ break; -+ case MALI_MEM_EXTERNAL: -+ mali_mem_unbind_ext_buf(mem_bkend); -+ atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]); -+ break; ++ switch (event_id & 0x0F000000) { ++ case MALI_PROFILING_EVENT_TYPE_SINGLE: ++ seq_printf(seq_file, "SINGLE | "); ++ break; ++ case MALI_PROFILING_EVENT_TYPE_START: ++ seq_printf(seq_file, "START | "); ++ break; ++ case MALI_PROFILING_EVENT_TYPE_STOP: ++ seq_printf(seq_file, "STOP | "); ++ break; ++ case MALI_PROFILING_EVENT_TYPE_SUSPEND: ++ seq_printf(seq_file, "SUSPEND | "); ++ break; ++ case MALI_PROFILING_EVENT_TYPE_RESUME: ++ seq_printf(seq_file, "RESUME | "); ++ break; ++ default: ++ seq_printf(seq_file, "0x%01X | ", (event_id & 0x0F000000) >> 24); ++ break; ++ } + -+ case MALI_MEM_BLOCK: -+ free_pages_nr = mali_mem_block_release(mem_bkend); -+ atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages); -+ break; ++ switch (event_id & 0x00FF0000) { ++ case MALI_PROFILING_EVENT_CHANNEL_SOFTWARE: ++ seq_printf(seq_file, "SW | "); ++ break; ++ case MALI_PROFILING_EVENT_CHANNEL_GP0: ++ seq_printf(seq_file, "GP0 | "); ++ break; ++ case MALI_PROFILING_EVENT_CHANNEL_PP0: ++ seq_printf(seq_file, "PP0 | "); ++ break; ++ case MALI_PROFILING_EVENT_CHANNEL_PP1: ++ seq_printf(seq_file, "PP1 | "); ++ break; ++ case MALI_PROFILING_EVENT_CHANNEL_PP2: ++ seq_printf(seq_file, "PP2 | "); ++ break; ++ case MALI_PROFILING_EVENT_CHANNEL_PP3: ++ seq_printf(seq_file, "PP3 | "); ++ break; ++ case MALI_PROFILING_EVENT_CHANNEL_PP4: ++ seq_printf(seq_file, "PP4 | "); ++ break; ++ case MALI_PROFILING_EVENT_CHANNEL_PP5: ++ seq_printf(seq_file, "PP5 | "); ++ break; ++ case MALI_PROFILING_EVENT_CHANNEL_PP6: ++ seq_printf(seq_file, "PP6 | "); ++ break; ++ case MALI_PROFILING_EVENT_CHANNEL_PP7: ++ seq_printf(seq_file, "PP7 | "); ++ break; ++ case MALI_PROFILING_EVENT_CHANNEL_GPU: ++ seq_printf(seq_file, "GPU | "); ++ break; ++ default: ++ seq_printf(seq_file, "0x%02X | ", (event_id & 0x00FF0000) >> 16); ++ break; ++ } + -+ case MALI_MEM_COW: -+ if (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED) { -+ free_pages_nr = mali_mem_swap_release(mem_bkend, MALI_TRUE); ++ if (MALI_EVENT_ID_IS_HW(event_id)) { ++ if (((event_id & 0x0F000000) == MALI_PROFILING_EVENT_TYPE_START) || ((event_id & 0x0F000000) == MALI_PROFILING_EVENT_TYPE_STOP)) { ++ switch (event_id & 0x0000FFFF) { ++ case MALI_PROFILING_EVENT_REASON_START_STOP_HW_PHYSICAL: ++ seq_printf(seq_file, "PHYSICAL | "); ++ break; ++ case MALI_PROFILING_EVENT_REASON_START_STOP_HW_VIRTUAL: ++ seq_printf(seq_file, "VIRTUAL | "); ++ break; ++ default: ++ seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF); ++ break; ++ } ++ } else { ++ seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF); ++ } + } else { -+ free_pages_nr = mali_mem_cow_release(mem_bkend, MALI_TRUE); ++ seq_printf(seq_file, "0x%04X | ", event_id & 0x0000FFFF); + } -+ atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages); -+ break; -+ case MALI_MEM_SWAP: -+ free_pages_nr = mali_mem_swap_release(mem_bkend, MALI_TRUE); -+ atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages); -+ atomic_sub(free_pages_nr, &session->mali_mem_array[mem_bkend->type]); -+ break; -+ case MALI_MEM_SECURE: -+#if defined(CONFIG_DMA_SHARED_BUFFER) -+ free_pages_nr = mali_mem_secure_release(mem_bkend); -+ atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages); -+#else -+ MALI_DEBUG_PRINT(1, ("DMA not supported for mali secure memory\n")); -+#endif -+ break; -+ default: -+ MALI_DEBUG_PRINT(1, ("mem type %d is not in the mali_mem_type enum.\n", mem_bkend->type)); -+ break; -+ } + -+ /*Remove backend memory idex */ -+ mutex_lock(&mali_idr_mutex); -+ idr_remove(&mali_backend_idr, mali_alloc->backend_handle); -+ mutex_unlock(&mali_idr_mutex); -+ kfree(mem_bkend); -+out: -+ /* remove memory allocation */ -+ mali_vma_offset_remove(&session->allocation_mgr, &mali_alloc->mali_vma_node); -+ mali_mem_allocation_struct_destory(mali_alloc); -+ return free_pages_nr; -+} ++ seq_printf(seq_file, "T0 + 0x%016llX\n", timestamp - start_time); + -+/** -+* ref_count for allocation -+*/ -+u32 mali_allocation_unref(struct mali_mem_allocation **alloc) -+{ -+ u32 free_pages_nr = 0; -+ mali_mem_allocation *mali_alloc = *alloc; -+ *alloc = NULL; -+ if (0 == _mali_osk_atomic_dec_return(&mali_alloc->mem_alloc_refcount)) { -+ free_pages_nr = _mali_free_allocation_mem(mali_alloc); ++ return 0; + } -+ return free_pages_nr; ++ ++ return 0; +} + -+void mali_allocation_ref(struct mali_mem_allocation *alloc) ++static const struct seq_operations profiling_events_seq_ops = { ++ .start = profiling_events_start, ++ .next = profiling_events_next, ++ .stop = profiling_events_stop, ++ .show = profiling_events_show ++}; ++ ++static int profiling_events_open(struct inode *inode, struct file *file) +{ -+ _mali_osk_atomic_inc(&alloc->mem_alloc_refcount); ++ return seq_open(file, &profiling_events_seq_ops); +} + -+void mali_free_session_allocations(struct mali_session_data *session) -+{ -+ struct mali_mem_allocation *entry, *next; ++static const struct file_operations profiling_events_fops = { ++ .owner = THIS_MODULE, ++ .open = profiling_events_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = seq_release, ++}; + -+ MALI_DEBUG_PRINT(4, (" mali_free_session_allocations! \n")); ++static const struct seq_operations profiling_events_human_readable_seq_ops = { ++ .start = profiling_events_start, ++ .next = profiling_events_next, ++ .stop = profiling_events_stop, ++ .show = profiling_events_show_human_readable ++}; + -+ list_for_each_entry_safe(entry, next, &session->allocation_mgr.head, list) { -+ mali_allocation_unref(&entry); -+ } ++static int profiling_events_human_readable_open(struct inode *inode, struct file *file) ++{ ++ return seq_open(file, &profiling_events_human_readable_seq_ops); +} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_util.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_util.h -new file mode 100644 -index 000000000..33ac99509 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_util.h -@@ -0,0 +1,20 @@ -+/* -+ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+#ifndef __MALI_MEMORY_UTIL_H__ -+#define __MALI_MEMORY_UTIL_H__ -+ -+u32 mali_allocation_unref(struct mali_mem_allocation **alloc); -+ -+void mali_allocation_ref(struct mali_mem_allocation *alloc); -+ -+void mali_free_session_allocations(struct mali_session_data *session); -+ -+#endif -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_virtual.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_virtual.c -new file mode 100644 -index 000000000..0b31e3a23 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_virtual.c -@@ -0,0 +1,127 @@ -+/* -+ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "mali_osk.h" -+#include "mali_osk_mali.h" -+#include "mali_kernel_linux.h" -+#include "mali_scheduler.h" -+#include "mali_memory_os_alloc.h" -+#include "mali_memory_manager.h" -+#include "mali_memory_virtual.h" -+ -+ -+/** -+*internal helper to link node into the rb-tree -+*/ -+static inline void _mali_vma_offset_add_rb(struct mali_allocation_manager *mgr, -+ struct mali_vma_node *node) -+{ -+ struct rb_node **iter = &mgr->allocation_mgr_rb.rb_node; -+ struct rb_node *parent = NULL; -+ struct mali_vma_node *iter_node; -+ -+ while (likely(*iter)) { -+ parent = *iter; -+ iter_node = rb_entry(*iter, struct mali_vma_node, vm_rb); -+ -+ if (node->vm_node.start < iter_node->vm_node.start) -+ iter = &(*iter)->rb_left; -+ else if (node->vm_node.start > iter_node->vm_node.start) -+ iter = &(*iter)->rb_right; -+ else -+ MALI_DEBUG_ASSERT(0); -+ } -+ -+ rb_link_node(&node->vm_rb, parent, iter); -+ rb_insert_color(&node->vm_rb, &mgr->allocation_mgr_rb); -+} -+ -+/** -+ * mali_vma_offset_add() - Add offset node to RB Tree -+ */ -+int mali_vma_offset_add(struct mali_allocation_manager *mgr, -+ struct mali_vma_node *node) -+{ -+ int ret = 0; -+ write_lock(&mgr->vm_lock); -+ -+ if (node->vm_node.allocated) { -+ goto out; -+ } -+ -+ _mali_vma_offset_add_rb(mgr, node); -+ /* set to allocated */ -+ node->vm_node.allocated = 1; -+ -+out: -+ write_unlock(&mgr->vm_lock); -+ return ret; -+} -+ -+/** -+ * mali_vma_offset_remove() - Remove offset node from RB tree -+ */ -+void mali_vma_offset_remove(struct mali_allocation_manager *mgr, -+ struct mali_vma_node *node) -+{ -+ write_lock(&mgr->vm_lock); -+ -+ if (node->vm_node.allocated) { -+ rb_erase(&node->vm_rb, &mgr->allocation_mgr_rb); -+ memset(&node->vm_node, 0, sizeof(node->vm_node)); -+ } -+ write_unlock(&mgr->vm_lock); -+} -+ -+/** -+* mali_vma_offset_search - Search the node in RB tree -+*/ -+struct mali_vma_node *mali_vma_offset_search(struct mali_allocation_manager *mgr, -+ unsigned long start, unsigned long pages) -+{ -+ struct mali_vma_node *node, *best; -+ struct rb_node *iter; -+ unsigned long offset; -+ read_lock(&mgr->vm_lock); -+ -+ iter = mgr->allocation_mgr_rb.rb_node; -+ best = NULL; -+ -+ while (likely(iter)) { -+ node = rb_entry(iter, struct mali_vma_node, vm_rb); -+ offset = node->vm_node.start; -+ if (start >= offset) { -+ iter = iter->rb_right; -+ best = node; -+ if (start == offset) -+ break; -+ } else { -+ iter = iter->rb_left; -+ } -+ } -+ -+ if (best) { -+ offset = best->vm_node.start + best->vm_node.size; -+ if (offset <= start + pages) -+ best = NULL; -+ } -+ read_unlock(&mgr->vm_lock); -+ -+ return best; -+} -+ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_virtual.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_virtual.h -new file mode 100644 -index 000000000..fd03ed9f2 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_virtual.h -@@ -0,0 +1,35 @@ -+/* -+ * Copyright (C) 2013-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+#ifndef __MALI_GPU_VMEM_H__ -+#define __MALI_GPU_VMEM_H__ -+ -+#include "mali_osk.h" -+#include "mali_session.h" -+#include -+#include -+#include -+#include -+#include -+#include "mali_memory_types.h" -+#include "mali_memory_os_alloc.h" -+#include "mali_memory_manager.h" -+ -+ -+ -+int mali_vma_offset_add(struct mali_allocation_manager *mgr, -+ struct mali_vma_node *node); -+ -+void mali_vma_offset_remove(struct mali_allocation_manager *mgr, -+ struct mali_vma_node *node); -+ -+struct mali_vma_node *mali_vma_offset_search(struct mali_allocation_manager *mgr, -+ unsigned long start, unsigned long pages); -+ -+#endif -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_atomics.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_atomics.c -new file mode 100644 -index 000000000..5bc0e52eb ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_atomics.c -@@ -0,0 +1,59 @@ -+/* -+ * Copyright (C) 2010, 2013-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ + -+/** -+ * @file mali_osk_atomics.c -+ * Implementation of the OS abstraction layer for the kernel device driver -+ */ ++static const struct file_operations profiling_events_human_readable_fops = { ++ .owner = THIS_MODULE, ++ .open = profiling_events_human_readable_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = seq_release, ++}; + -+#include "mali_osk.h" -+#include -+#include "mali_kernel_common.h" ++#endif + -+void _mali_osk_atomic_dec(_mali_osk_atomic_t *atom) ++static int memory_debugfs_show(struct seq_file *s, void *private_data) +{ -+ atomic_dec((atomic_t *)&atom->u.val); ++#ifdef MALI_MEM_SWAP_TRACKING ++ seq_printf(s, " %-25s %-10s %-10s %-15s %-15s %-10s %-10s %-10s \n"\ ++ "=================================================================================================================================\n", ++ "Name (:bytes)", "pid", "mali_mem", "max_mali_mem", ++ "external_mem", "ump_mem", "dma_mem", "swap_mem"); ++#else ++ seq_printf(s, " %-25s %-10s %-10s %-15s %-15s %-10s %-10s \n"\ ++ "========================================================================================================================\n", ++ "Name (:bytes)", "pid", "mali_mem", "max_mali_mem", ++ "external_mem", "ump_mem", "dma_mem"); ++#endif ++ mali_session_memory_tracking(s); ++ return 0; +} + -+u32 _mali_osk_atomic_dec_return(_mali_osk_atomic_t *atom) ++static int memory_debugfs_open(struct inode *inode, struct file *file) +{ -+ return atomic_dec_return((atomic_t *)&atom->u.val); ++ return single_open(file, memory_debugfs_show, inode->i_private); +} + -+void _mali_osk_atomic_inc(_mali_osk_atomic_t *atom) -+{ -+ atomic_inc((atomic_t *)&atom->u.val); -+} ++static const struct file_operations memory_usage_fops = { ++ .owner = THIS_MODULE, ++ .open = memory_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + -+u32 _mali_osk_atomic_inc_return(_mali_osk_atomic_t *atom) ++static ssize_t utilization_gp_pp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) +{ -+ return atomic_inc_return((atomic_t *)&atom->u.val); -+} ++ char buf[64]; ++ size_t r; ++ u32 uval = _mali_ukk_utilization_gp_pp(); + -+void _mali_osk_atomic_init(_mali_osk_atomic_t *atom, u32 val) -+{ -+ MALI_DEBUG_ASSERT_POINTER(atom); -+ atomic_set((atomic_t *)&atom->u.val, val); ++ r = snprintf(buf, 64, "%u\n", uval); ++ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + -+u32 _mali_osk_atomic_read(_mali_osk_atomic_t *atom) ++static ssize_t utilization_gp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) +{ -+ return atomic_read((atomic_t *)&atom->u.val); ++ char buf[64]; ++ size_t r; ++ u32 uval = _mali_ukk_utilization_gp(); ++ ++ r = snprintf(buf, 64, "%u\n", uval); ++ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + -+void _mali_osk_atomic_term(_mali_osk_atomic_t *atom) ++static ssize_t utilization_pp_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) +{ -+ MALI_IGNORE(atom); ++ char buf[64]; ++ size_t r; ++ u32 uval = _mali_ukk_utilization_pp(); ++ ++ r = snprintf(buf, 64, "%u\n", uval); ++ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + -+u32 _mali_osk_atomic_xchg(_mali_osk_atomic_t *atom, u32 val) ++ ++static const struct file_operations utilization_gp_pp_fops = { ++ .owner = THIS_MODULE, ++ .read = utilization_gp_pp_read, ++}; ++ ++static const struct file_operations utilization_gp_fops = { ++ .owner = THIS_MODULE, ++ .read = utilization_gp_read, ++}; ++ ++static const struct file_operations utilization_pp_fops = { ++ .owner = THIS_MODULE, ++ .read = utilization_pp_read, ++}; ++ ++static ssize_t user_settings_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) +{ -+ return atomic_xchg((atomic_t *)&atom->u.val, val); -+} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_bitmap.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_bitmap.c -new file mode 100644 -index 000000000..fb9ccd2ad ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_bitmap.c -@@ -0,0 +1,152 @@ -+/* -+ * Copyright (C) 2010, 2013-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+/** -+ * @file mali_osk_bitmap.c -+ * Implementation of the OS abstraction layer for the kernel device driver -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include "common/mali_kernel_common.h" -+#include "mali_osk_types.h" -+#include "mali_osk.h" -+ -+u32 _mali_osk_bitmap_alloc(struct _mali_osk_bitmap *bitmap) -+{ -+ u32 obj; -+ -+ MALI_DEBUG_ASSERT_POINTER(bitmap); -+ -+ _mali_osk_spinlock_lock(bitmap->lock); -+ -+ obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->reserve); -+ -+ if (obj < bitmap->max) { -+ set_bit(obj, bitmap->table); -+ } else { -+ obj = -1; -+ } -+ -+ if (obj != -1) -+ --bitmap->avail; -+ _mali_osk_spinlock_unlock(bitmap->lock); -+ -+ return obj; -+} -+ -+void _mali_osk_bitmap_free(struct _mali_osk_bitmap *bitmap, u32 obj) -+{ -+ MALI_DEBUG_ASSERT_POINTER(bitmap); -+ -+ _mali_osk_bitmap_free_range(bitmap, obj, 1); -+} -+ -+u32 _mali_osk_bitmap_alloc_range(struct _mali_osk_bitmap *bitmap, int cnt) -+{ -+ u32 obj; -+ -+ MALI_DEBUG_ASSERT_POINTER(bitmap); -+ -+ if (0 >= cnt) { -+ return -1; -+ } -+ -+ if (1 == cnt) { -+ return _mali_osk_bitmap_alloc(bitmap); -+ } -+ -+ _mali_osk_spinlock_lock(bitmap->lock); -+ obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max, -+ bitmap->last, cnt, 0); -+ -+ if (obj >= bitmap->max) { -+ obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max, -+ bitmap->reserve, cnt, 0); -+ } -+ -+ if (obj < bitmap->max) { -+ bitmap_set(bitmap->table, obj, cnt); -+ -+ bitmap->last = (obj + cnt); -+ if (bitmap->last >= bitmap->max) { -+ bitmap->last = bitmap->reserve; -+ } -+ } else { -+ obj = -1; -+ } -+ -+ if (obj != -1) { -+ bitmap->avail -= cnt; -+ } -+ -+ _mali_osk_spinlock_unlock(bitmap->lock); -+ -+ return obj; -+} -+ -+u32 _mali_osk_bitmap_avail(struct _mali_osk_bitmap *bitmap) -+{ -+ MALI_DEBUG_ASSERT_POINTER(bitmap); -+ -+ return bitmap->avail; -+} -+ -+void _mali_osk_bitmap_free_range(struct _mali_osk_bitmap *bitmap, u32 obj, int cnt) -+{ -+ MALI_DEBUG_ASSERT_POINTER(bitmap); -+ -+ _mali_osk_spinlock_lock(bitmap->lock); -+ bitmap_clear(bitmap->table, obj, cnt); -+ bitmap->last = min(bitmap->last, obj); -+ -+ bitmap->avail += cnt; -+ _mali_osk_spinlock_unlock(bitmap->lock); -+} -+ -+int _mali_osk_bitmap_init(struct _mali_osk_bitmap *bitmap, u32 num, u32 reserve) -+{ -+ MALI_DEBUG_ASSERT_POINTER(bitmap); -+ MALI_DEBUG_ASSERT(reserve <= num); -+ -+ bitmap->reserve = reserve; -+ bitmap->last = reserve; -+ bitmap->max = num; -+ bitmap->avail = num - reserve; -+ bitmap->lock = _mali_osk_spinlock_init(_MALI_OSK_LOCKFLAG_UNORDERED, _MALI_OSK_LOCK_ORDER_FIRST); -+ if (!bitmap->lock) { -+ return _MALI_OSK_ERR_NOMEM; -+ } -+ bitmap->table = kzalloc(BITS_TO_LONGS(bitmap->max) * -+ sizeof(long), GFP_KERNEL); -+ if (!bitmap->table) { -+ _mali_osk_spinlock_term(bitmap->lock); -+ return _MALI_OSK_ERR_NOMEM; -+ } -+ -+ return _MALI_OSK_ERR_OK; -+} -+ -+void _mali_osk_bitmap_term(struct _mali_osk_bitmap *bitmap) -+{ -+ MALI_DEBUG_ASSERT_POINTER(bitmap); -+ -+ if (NULL != bitmap->lock) { -+ _mali_osk_spinlock_term(bitmap->lock); -+ } -+ -+ if (NULL != bitmap->table) { -+ kfree(bitmap->table); -+ } -+} -+ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_irq.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_irq.c -new file mode 100644 -index 000000000..5c8b9ceab ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_irq.c -@@ -0,0 +1,200 @@ -+/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ unsigned long val; ++ int ret; ++ _mali_uk_user_setting_t setting; ++ char buf[32]; + -+/** -+ * @file mali_osk_irq.c -+ * Implementation of the OS abstraction layer for the kernel device driver -+ */ ++ cnt = min(cnt, sizeof(buf) - 1); ++ if (copy_from_user(buf, ubuf, cnt)) { ++ return -EFAULT; ++ } ++ buf[cnt] = '\0'; + -+#include /* For memory allocation */ -+#include -+#include -+#include ++ ret = kstrtoul(buf, 10, &val); ++ if (0 != ret) { ++ return ret; ++ } + -+#include "mali_osk.h" -+#include "mali_kernel_common.h" ++ /* Update setting */ ++ setting = (_mali_uk_user_setting_t)(filp->private_data); ++ mali_set_user_setting(setting, val); + -+typedef struct _mali_osk_irq_t_struct { -+ u32 irqnum; -+ void *data; -+ _mali_osk_irq_uhandler_t uhandler; -+} mali_osk_irq_object_t; ++ *ppos += cnt; ++ return cnt; ++} + -+typedef irqreturn_t (*irq_handler_func_t)(int, void *, struct pt_regs *); -+static irqreturn_t irq_handler_upper_half(int port_name, void *dev_id); /* , struct pt_regs *regs*/ ++static ssize_t user_settings_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) ++{ ++ char buf[64]; ++ size_t r; ++ u32 value; ++ _mali_uk_user_setting_t setting; + -+#if defined(DEBUG) ++ setting = (_mali_uk_user_setting_t)(filp->private_data); ++ value = mali_get_user_setting(setting); + -+struct test_interrupt_data { -+ _mali_osk_irq_ack_t ack_func; -+ void *probe_data; -+ mali_bool interrupt_received; -+ wait_queue_head_t wq; ++ r = snprintf(buf, 64, "%u\n", value); ++ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); ++} ++ ++static const struct file_operations user_settings_fops = { ++ .owner = THIS_MODULE, ++ .open = open_copy_private_data, ++ .read = user_settings_read, ++ .write = user_settings_write, +}; + -+static irqreturn_t test_interrupt_upper_half(int port_name, void *dev_id) ++static int mali_sysfs_user_settings_register(void) +{ -+ irqreturn_t ret = IRQ_NONE; -+ struct test_interrupt_data *data = (struct test_interrupt_data *)dev_id; ++ struct dentry *mali_user_settings_dir = debugfs_create_dir("userspace_settings", mali_debugfs_dir); + -+ if (_MALI_OSK_ERR_OK == data->ack_func(data->probe_data)) { -+ data->interrupt_received = MALI_TRUE; -+ wake_up(&data->wq); -+ ret = IRQ_HANDLED; ++ if (mali_user_settings_dir != NULL) { ++ long i; ++ for (i = 0; i < _MALI_UK_USER_SETTING_MAX; i++) { ++ debugfs_create_file(_mali_uk_user_setting_descriptions[i], ++ 0600, mali_user_settings_dir, (void *)i, ++ &user_settings_fops); ++ } + } + -+ return ret; ++ return 0; +} + -+static _mali_osk_errcode_t test_interrupt(u32 irqnum, -+ _mali_osk_irq_trigger_t trigger_func, -+ _mali_osk_irq_ack_t ack_func, -+ void *probe_data, -+ const char *description) ++static ssize_t pp_num_cores_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp) +{ -+ unsigned long irq_flags = 0; -+ struct test_interrupt_data data = { -+ .ack_func = ack_func, -+ .probe_data = probe_data, -+ .interrupt_received = MALI_FALSE, -+ }; -+ -+#if defined(CONFIG_MALI_SHARED_INTERRUPTS) -+ irq_flags |= IRQF_SHARED; -+#endif /* defined(CONFIG_MALI_SHARED_INTERRUPTS) */ ++ int ret; ++ char buffer[32]; ++ unsigned long val; + -+ if (0 != request_irq(irqnum, test_interrupt_upper_half, irq_flags, description, &data)) { -+ MALI_DEBUG_PRINT(2, ("Unable to install test IRQ handler for core '%s'\n", description)); -+ return _MALI_OSK_ERR_FAULT; ++ if (count >= sizeof(buffer)) { ++ return -ENOMEM; + } + -+ init_waitqueue_head(&data.wq); -+ -+ trigger_func(probe_data); -+ wait_event_timeout(data.wq, data.interrupt_received, 100); ++ if (copy_from_user(&buffer[0], buf, count)) { ++ return -EFAULT; ++ } ++ buffer[count] = '\0'; + -+ free_irq(irqnum, &data); ++ ret = kstrtoul(&buffer[0], 10, &val); ++ if (0 != ret) { ++ return -EINVAL; ++ } + -+ if (data.interrupt_received) { -+ MALI_DEBUG_PRINT(3, ("%s: Interrupt test OK\n", description)); -+ return _MALI_OSK_ERR_OK; -+ } else { -+ MALI_PRINT_ERROR(("%s: Failed interrupt test on %u\n", description, irqnum)); -+ return _MALI_OSK_ERR_FAULT; ++ ret = mali_executor_set_perf_level(val, MALI_TRUE); /* override even if core scaling is disabled */ ++ if (ret) { ++ return ret; + } -+} + -+#endif /* defined(DEBUG) */ ++ *offp += count; ++ return count; ++} + -+_mali_osk_irq_t *_mali_osk_irq_init(u32 irqnum, _mali_osk_irq_uhandler_t uhandler, void *int_data, _mali_osk_irq_trigger_t trigger_func, _mali_osk_irq_ack_t ack_func, void *probe_data, const char *description) ++static ssize_t pp_num_cores_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp) +{ -+ mali_osk_irq_object_t *irq_object; -+ unsigned long irq_flags = 0; -+ -+#if defined(CONFIG_MALI_SHARED_INTERRUPTS) -+ irq_flags |= IRQF_SHARED; -+#endif /* defined(CONFIG_MALI_SHARED_INTERRUPTS) */ ++ int r; ++ char buffer[64]; + -+ irq_object = kmalloc(sizeof(mali_osk_irq_object_t), GFP_KERNEL); -+ if (NULL == irq_object) { -+ return NULL; -+ } ++ r = snprintf(buffer, 64, "%u\n", mali_executor_get_num_cores_enabled()); + -+ if (-1 == irqnum) { -+ /* Probe for IRQ */ -+ if ((NULL != trigger_func) && (NULL != ack_func)) { -+ unsigned long probe_count = 3; -+ _mali_osk_errcode_t err; -+ int irq; ++ return simple_read_from_buffer(buf, count, offp, buffer, r); ++} + -+ MALI_DEBUG_PRINT(2, ("Probing for irq\n")); ++static const struct file_operations pp_num_cores_enabled_fops = { ++ .owner = THIS_MODULE, ++ .write = pp_num_cores_enabled_write, ++ .read = pp_num_cores_enabled_read, ++ .llseek = default_llseek, ++}; + -+ do { -+ unsigned long mask; ++static ssize_t pp_num_cores_total_read(struct file *filp, char __user *buf, size_t count, loff_t *offp) ++{ ++ int r; ++ char buffer[64]; + -+ mask = probe_irq_on(); -+ trigger_func(probe_data); ++ r = snprintf(buffer, 64, "%u\n", mali_executor_get_num_cores_total()); + -+ _mali_osk_time_ubusydelay(5); ++ return simple_read_from_buffer(buf, count, offp, buffer, r); ++} + -+ irq = probe_irq_off(mask); -+ err = ack_func(probe_data); -+ } while (irq < 0 && (err == _MALI_OSK_ERR_OK) && probe_count--); ++static const struct file_operations pp_num_cores_total_fops = { ++ .owner = THIS_MODULE, ++ .read = pp_num_cores_total_read, ++}; + -+ if (irq < 0 || (_MALI_OSK_ERR_OK != err)) irqnum = -1; -+ else irqnum = irq; -+ } else irqnum = -1; /* no probe functions, fault */ ++static ssize_t pp_core_scaling_enabled_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp) ++{ ++ int ret; ++ char buffer[32]; ++ unsigned long val; + -+ if (-1 != irqnum) { -+ /* found an irq */ -+ MALI_DEBUG_PRINT(2, ("Found irq %d\n", irqnum)); -+ } else { -+ MALI_DEBUG_PRINT(2, ("Probe for irq failed\n")); -+ } ++ if (count >= sizeof(buffer)) { ++ return -ENOMEM; + } + -+ irq_object->irqnum = irqnum; -+ irq_object->uhandler = uhandler; -+ irq_object->data = int_data; -+ -+ if (-1 == irqnum) { -+ MALI_DEBUG_PRINT(2, ("No IRQ for core '%s' found during probe\n", description)); -+ kfree(irq_object); -+ return NULL; ++ if (copy_from_user(&buffer[0], buf, count)) { ++ return -EFAULT; + } ++ buffer[count] = '\0'; + -+#if defined(DEBUG) -+ /* Verify that the configured interrupt settings are working */ -+ if (_MALI_OSK_ERR_OK != test_interrupt(irqnum, trigger_func, ack_func, probe_data, description)) { -+ MALI_DEBUG_PRINT(2, ("Test of IRQ(%d) handler for core '%s' failed\n", irqnum, description)); -+ kfree(irq_object); -+ return NULL; ++ ret = kstrtoul(&buffer[0], 10, &val); ++ if (0 != ret) { ++ return -EINVAL; + } -+#endif + -+ if (0 != request_irq(irqnum, irq_handler_upper_half, irq_flags, description, irq_object)) { -+ MALI_DEBUG_PRINT(2, ("Unable to install IRQ handler for core '%s'\n", description)); -+ kfree(irq_object); -+ return NULL; ++ switch (val) { ++ case 1: ++ mali_executor_core_scaling_enable(); ++ break; ++ case 0: ++ mali_executor_core_scaling_disable(); ++ break; ++ default: ++ return -EINVAL; ++ break; + } + -+ return irq_object; ++ *offp += count; ++ return count; +} + -+void _mali_osk_irq_term(_mali_osk_irq_t *irq) ++static ssize_t pp_core_scaling_enabled_read(struct file *filp, char __user *buf, size_t count, loff_t *offp) +{ -+ mali_osk_irq_object_t *irq_object = (mali_osk_irq_object_t *)irq; -+ free_irq(irq_object->irqnum, irq_object); -+ kfree(irq_object); ++ return simple_read_from_buffer(buf, count, offp, mali_executor_core_scaling_is_enabled() ? "1\n" : "0\n", 2); +} ++static const struct file_operations pp_core_scaling_enabled_fops = { ++ .owner = THIS_MODULE, ++ .write = pp_core_scaling_enabled_write, ++ .read = pp_core_scaling_enabled_read, ++ .llseek = default_llseek, ++}; + -+ -+/** This function is called directly in interrupt context from the OS just after -+ * the CPU get the hw-irq from mali, or other devices on the same IRQ-channel. -+ * It is registered one of these function for each mali core. When an interrupt -+ * arrives this function will be called equal times as registered mali cores. -+ * That means that we only check one mali core in one function call, and the -+ * core we check for each turn is given by the \a dev_id variable. -+ * If we detect an pending interrupt on the given core, we mask the interrupt -+ * out by settging the core's IRQ_MASK register to zero. -+ * Then we schedule the mali_core_irq_handler_bottom_half to run as high priority -+ * work queue job. -+ */ -+static irqreturn_t irq_handler_upper_half(int port_name, void *dev_id) /* , struct pt_regs *regs*/ ++static ssize_t version_read(struct file *filp, char __user *buf, size_t count, loff_t *offp) +{ -+ irqreturn_t ret = IRQ_NONE; -+ mali_osk_irq_object_t *irq_object = (mali_osk_irq_object_t *)dev_id; ++ int r = 0; ++ char buffer[64]; + -+ if (_MALI_OSK_ERR_OK == irq_object->uhandler(irq_object->data)) { -+ ret = IRQ_HANDLED; -+ } ++ switch (mali_kernel_core_get_product_id()) { ++ case _MALI_PRODUCT_ID_MALI200: ++ r = snprintf(buffer, 64, "Mali-200\n"); ++ break; ++ case _MALI_PRODUCT_ID_MALI300: ++ r = snprintf(buffer, 64, "Mali-300\n"); ++ break; ++ case _MALI_PRODUCT_ID_MALI400: ++ r = snprintf(buffer, 64, "Mali-400 MP\n"); ++ break; ++ case _MALI_PRODUCT_ID_MALI450: ++ r = snprintf(buffer, 64, "Mali-450 MP\n"); ++ break; ++ case _MALI_PRODUCT_ID_MALI470: ++ r = snprintf(buffer, 64, "Mali-470 MP\n"); ++ break; ++ case _MALI_PRODUCT_ID_UNKNOWN: ++ return -EINVAL; ++ break; ++ }; + -+ return ret; ++ return simple_read_from_buffer(buf, count, offp, buffer, r); +} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_locks.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_locks.c -new file mode 100644 -index 000000000..ed5f0b0da ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_locks.c -@@ -0,0 +1,287 @@ -+/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+/** -+ * @file mali_osk_locks.c -+ * Implemenation of the OS abstraction layer for the kernel device driver -+ */ + -+#include "mali_osk_locks.h" -+#include "mali_kernel_common.h" -+#include "mali_osk.h" -+ -+ -+#ifdef DEBUG -+#ifdef LOCK_ORDER_CHECKING -+static DEFINE_SPINLOCK(lock_tracking_lock); -+static mali_bool add_lock_to_log_and_check(struct _mali_osk_lock_debug_s *lock, uint32_t tid); -+static void remove_lock_from_log(struct _mali_osk_lock_debug_s *lock, uint32_t tid); -+static const char *const lock_order_to_string(_mali_osk_lock_order_t order); -+#endif /* LOCK_ORDER_CHECKING */ ++static const struct file_operations version_fops = { ++ .owner = THIS_MODULE, ++ .read = version_read, ++}; + -+void _mali_osk_locks_debug_init(struct _mali_osk_lock_debug_s *checker, _mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order) ++#if defined(DEBUG) ++static int timeline_debugfs_show(struct seq_file *s, void *private_data) +{ -+ checker->orig_flags = flags; -+ checker->owner = 0; -+ -+#ifdef LOCK_ORDER_CHECKING -+ checker->order = order; -+ checker->next = NULL; -+#endif -+} ++ struct mali_session_data *session, *tmp; ++ u32 session_seq = 1; + -+void _mali_osk_locks_debug_add(struct _mali_osk_lock_debug_s *checker) -+{ -+ checker->owner = _mali_osk_get_tid(); ++ seq_printf(s, "timeline system info: \n=================\n\n"); + -+#ifdef LOCK_ORDER_CHECKING -+ if (!(checker->orig_flags & _MALI_OSK_LOCKFLAG_UNORDERED)) { -+ if (!add_lock_to_log_and_check(checker, _mali_osk_get_tid())) { -+ printk(KERN_ERR "%d: ERROR lock %p taken while holding a lock of a higher order.\n", -+ _mali_osk_get_tid(), checker); -+ dump_stack(); -+ } ++ mali_session_lock(); ++ MALI_SESSION_FOREACH(session, tmp, link) { ++ seq_printf(s, "session %d <%p> start:\n", session_seq, session); ++ mali_timeline_debug_print_system(session->timeline_system, s); ++ seq_printf(s, "session %d end\n\n\n", session_seq++); + } -+#endif ++ mali_session_unlock(); ++ ++ return 0; +} + -+void _mali_osk_locks_debug_remove(struct _mali_osk_lock_debug_s *checker) ++static int timeline_debugfs_open(struct inode *inode, struct file *file) +{ -+ -+#ifdef LOCK_ORDER_CHECKING -+ if (!(checker->orig_flags & _MALI_OSK_LOCKFLAG_UNORDERED)) { -+ remove_lock_from_log(checker, _mali_osk_get_tid()); -+ } -+#endif -+ checker->owner = 0; ++ return single_open(file, timeline_debugfs_show, inode->i_private); +} + ++static const struct file_operations timeline_dump_fops = { ++ .owner = THIS_MODULE, ++ .open = timeline_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release ++}; ++#endif + -+#ifdef LOCK_ORDER_CHECKING -+/* Lock order checking -+ * ------------------- -+ * -+ * To assure that lock ordering scheme defined by _mali_osk_lock_order_t is strictly adhered to, the -+ * following function will, together with a linked list and some extra members in _mali_osk_lock_debug_s, -+ * make sure that a lock that is taken has a higher order than the current highest-order lock a -+ * thread holds. -+ * -+ * This is done in the following manner: -+ * - A linked list keeps track of locks held by a thread. -+ * - A `next' pointer is added to each lock. This is used to chain the locks together. -+ * - When taking a lock, the `add_lock_to_log_and_check' makes sure that taking -+ * the given lock is legal. It will follow the linked list to find the last -+ * lock taken by this thread. If the last lock's order was lower than the -+ * lock that is to be taken, it appends the new lock to the list and returns -+ * true, if not, it return false. This return value is assert()'ed on in -+ * _mali_osk_lock_wait(). -+ */ ++int mali_sysfs_register(const char *mali_dev_name) ++{ ++ mali_debugfs_dir = debugfs_create_dir(mali_dev_name, NULL); ++ if (ERR_PTR(-ENODEV) == mali_debugfs_dir) { ++ /* Debugfs not supported. */ ++ mali_debugfs_dir = NULL; ++ } else { ++ if (NULL != mali_debugfs_dir) { ++ /* Debugfs directory created successfully; create files now */ ++ struct dentry *mali_power_dir; ++ struct dentry *mali_gp_dir; ++ struct dentry *mali_pp_dir; ++ struct dentry *mali_l2_dir; ++ struct dentry *mali_profiling_dir; + -+static struct _mali_osk_lock_debug_s *lock_lookup_list; ++ debugfs_create_file("version", 0400, mali_debugfs_dir, NULL, &version_fops); + -+static void dump_lock_tracking_list(void) -+{ -+ struct _mali_osk_lock_debug_s *l; -+ u32 n = 1; ++ mali_power_dir = debugfs_create_dir("power", mali_debugfs_dir); ++ if (mali_power_dir != NULL) { ++ debugfs_create_file("always_on", 0600, mali_power_dir, NULL, &power_always_on_fops); ++ debugfs_create_file("power_events", 0200, mali_power_dir, NULL, &power_power_events_fops); ++ } + -+ /* print list for debugging purposes */ -+ l = lock_lookup_list; ++ mali_gp_dir = debugfs_create_dir("gp", mali_debugfs_dir); ++ if (mali_gp_dir != NULL) { ++ u32 num_groups; ++ long i; + -+ while (NULL != l) { -+ printk(" [lock: %p, tid_owner: %d, order: %d] ->", l, l->owner, l->order); -+ l = l->next; -+ MALI_DEBUG_ASSERT(n++ < 100); -+ } -+ printk(" NULL\n"); -+} ++ num_groups = mali_group_get_glob_num_groups(); ++ for (i = 0; i < num_groups; i++) { ++ struct mali_group *group = mali_group_get_glob_group(i); + -+static int tracking_list_length(void) -+{ -+ struct _mali_osk_lock_debug_s *l; -+ u32 n = 0; -+ l = lock_lookup_list; ++ struct mali_gp_core *gp_core = mali_group_get_gp_core(group); ++ if (NULL != gp_core) { ++ struct dentry *mali_gp_gpx_dir; ++ mali_gp_gpx_dir = debugfs_create_dir("gp0", mali_gp_dir); ++ if (NULL != mali_gp_gpx_dir) { ++ debugfs_create_file("base_addr", 0400, mali_gp_gpx_dir, &gp_core->hw_core, &hw_core_base_addr_fops); ++ debugfs_create_file("enabled", 0600, mali_gp_gpx_dir, group, &group_enabled_fops); ++ } ++ break; /* no need to look for any other GP cores */ ++ } + -+ while (NULL != l) { -+ l = l->next; -+ n++; -+ MALI_DEBUG_ASSERT(n < 100); -+ } -+ return n; -+} ++ } ++ } + -+static mali_bool add_lock_to_log_and_check(struct _mali_osk_lock_debug_s *lock, uint32_t tid) -+{ -+ mali_bool ret = MALI_FALSE; -+ _mali_osk_lock_order_t highest_order_for_tid = _MALI_OSK_LOCK_ORDER_FIRST; -+ struct _mali_osk_lock_debug_s *highest_order_lock = (struct _mali_osk_lock_debug_s *)0xbeefbabe; -+ struct _mali_osk_lock_debug_s *l; -+ unsigned long local_lock_flag; -+ u32 len; ++ mali_pp_dir = debugfs_create_dir("pp", mali_debugfs_dir); ++ if (mali_pp_dir != NULL) { ++ u32 num_groups; ++ long i; + -+ spin_lock_irqsave(&lock_tracking_lock, local_lock_flag); -+ len = tracking_list_length(); ++ debugfs_create_file("num_cores_total", 0400, mali_pp_dir, NULL, &pp_num_cores_total_fops); ++ debugfs_create_file("num_cores_enabled", 0600, mali_pp_dir, NULL, &pp_num_cores_enabled_fops); ++ debugfs_create_file("core_scaling_enabled", 0600, mali_pp_dir, NULL, &pp_core_scaling_enabled_fops); + -+ l = lock_lookup_list; -+ if (NULL == l) { /* This is the first lock taken by this thread -- record and return true */ -+ lock_lookup_list = lock; -+ spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag); -+ return MALI_TRUE; -+ } else { -+ /* Traverse the locks taken and find the lock of the highest order. -+ * Since several threads may hold locks, each lock's owner must be -+ * checked so that locks not owned by this thread can be ignored. */ -+ for (;;) { -+ MALI_DEBUG_ASSERT_POINTER(l); -+ if (tid == l->owner && l->order >= highest_order_for_tid) { -+ highest_order_for_tid = l->order; -+ highest_order_lock = l; -+ } ++ num_groups = mali_group_get_glob_num_groups(); ++ for (i = 0; i < num_groups; i++) { ++ struct mali_group *group = mali_group_get_glob_group(i); + -+ if (NULL != l->next) { -+ l = l->next; -+ } else { -+ break; ++ struct mali_pp_core *pp_core = mali_group_get_pp_core(group); ++ if (NULL != pp_core) { ++ char buf[16]; ++ struct dentry *mali_pp_ppx_dir; ++ _mali_osk_snprintf(buf, sizeof(buf), "pp%u", mali_pp_core_get_id(pp_core)); ++ mali_pp_ppx_dir = debugfs_create_dir(buf, mali_pp_dir); ++ if (NULL != mali_pp_ppx_dir) { ++ debugfs_create_file("base_addr", 0400, mali_pp_ppx_dir, &pp_core->hw_core, &hw_core_base_addr_fops); ++ if (!mali_group_is_virtual(group)) { ++ debugfs_create_file("enabled", 0600, mali_pp_ppx_dir, group, &group_enabled_fops); ++ } ++ } ++ } ++ } + } -+ } + -+ l->next = lock; -+ l->next = NULL; -+ } ++ mali_l2_dir = debugfs_create_dir("l2", mali_debugfs_dir); ++ if (mali_l2_dir != NULL) { ++ struct dentry *mali_l2_all_dir; ++ u32 l2_id; ++ struct mali_l2_cache_core *l2_cache; + -+ /* We have now found the highest order lock currently held by this thread and can see if it is -+ * legal to take the requested lock. */ -+ ret = highest_order_for_tid < lock->order; ++ mali_l2_all_dir = debugfs_create_dir("all", mali_l2_dir); ++ if (mali_l2_all_dir != NULL) { ++ debugfs_create_file("counter_src0", 0200, mali_l2_all_dir, NULL, &l2_all_counter_src0_fops); ++ debugfs_create_file("counter_src1", 0200, mali_l2_all_dir, NULL, &l2_all_counter_src1_fops); ++ } + -+ if (!ret) { -+ printk(KERN_ERR "Took lock of order %d (%s) while holding lock of order %d (%s)\n", -+ lock->order, lock_order_to_string(lock->order), -+ highest_order_for_tid, lock_order_to_string(highest_order_for_tid)); -+ dump_lock_tracking_list(); -+ } ++ l2_id = 0; ++ l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id); ++ while (NULL != l2_cache) { ++ char buf[16]; ++ struct dentry *mali_l2_l2x_dir; ++ _mali_osk_snprintf(buf, sizeof(buf), "l2%u", l2_id); ++ mali_l2_l2x_dir = debugfs_create_dir(buf, mali_l2_dir); ++ if (NULL != mali_l2_l2x_dir) { ++ debugfs_create_file("counter_src0", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_src0_fops); ++ debugfs_create_file("counter_src1", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_src1_fops); ++ debugfs_create_file("counter_val0", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_val0_fops); ++ debugfs_create_file("counter_val1", 0600, mali_l2_l2x_dir, l2_cache, &l2_l2x_counter_val1_fops); ++ debugfs_create_file("base_addr", 0400, mali_l2_l2x_dir, &l2_cache->hw_core, &hw_core_base_addr_fops); ++ } + -+ if (len + 1 != tracking_list_length()) { -+ printk(KERN_ERR "************ lock: %p\n", lock); -+ printk(KERN_ERR "************ before: %d *** after: %d ****\n", len, tracking_list_length()); -+ dump_lock_tracking_list(); -+ MALI_DEBUG_ASSERT_POINTER(NULL); -+ } ++ /* try next L2 */ ++ l2_id++; ++ l2_cache = mali_l2_cache_core_get_glob_l2_core(l2_id); ++ } ++ } + -+ spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag); -+ return ret; -+} ++ debugfs_create_file("gpu_memory", 0444, mali_debugfs_dir, NULL, &memory_usage_fops); + -+static void remove_lock_from_log(struct _mali_osk_lock_debug_s *lock, uint32_t tid) -+{ -+ struct _mali_osk_lock_debug_s *curr; -+ struct _mali_osk_lock_debug_s *prev = NULL; -+ unsigned long local_lock_flag; -+ u32 len; -+ u32 n = 0; ++ debugfs_create_file("utilization_gp_pp", 0400, mali_debugfs_dir, NULL, &utilization_gp_pp_fops); ++ debugfs_create_file("utilization_gp", 0400, mali_debugfs_dir, NULL, &utilization_gp_fops); ++ debugfs_create_file("utilization_pp", 0400, mali_debugfs_dir, NULL, &utilization_pp_fops); + -+ spin_lock_irqsave(&lock_tracking_lock, local_lock_flag); -+ len = tracking_list_length(); -+ curr = lock_lookup_list; ++ mali_profiling_dir = debugfs_create_dir("profiling", mali_debugfs_dir); ++ if (mali_profiling_dir != NULL) { ++ u32 max_sub_jobs; ++ long i; ++ struct dentry *mali_profiling_gp_dir; ++ struct dentry *mali_profiling_pp_dir; ++#if defined(CONFIG_MALI400_INTERNAL_PROFILING) ++ struct dentry *mali_profiling_proc_dir; ++#endif ++ /* ++ * Create directory where we can set GP HW counters. ++ */ ++ mali_profiling_gp_dir = debugfs_create_dir("gp", mali_profiling_dir); ++ if (mali_profiling_gp_dir != NULL) { ++ debugfs_create_file("counter_src0", 0600, mali_profiling_gp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_GP(0), &profiling_counter_src_fops); ++ debugfs_create_file("counter_src1", 0600, mali_profiling_gp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_GP(1), &profiling_counter_src_fops); ++ } + -+ if (NULL == curr) { -+ printk(KERN_ERR "Error: Lock tracking list was empty on call to remove_lock_from_log\n"); -+ dump_lock_tracking_list(); -+ } ++ /* ++ * Create directory where we can set PP HW counters. ++ * Possible override with specific HW counters for a particular sub job ++ * (Disable core scaling before using the override!) ++ */ ++ mali_profiling_pp_dir = debugfs_create_dir("pp", mali_profiling_dir); ++ if (mali_profiling_pp_dir != NULL) { ++ debugfs_create_file("counter_src0", 0600, mali_profiling_pp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_PP(0), &profiling_counter_src_fops); ++ debugfs_create_file("counter_src1", 0600, mali_profiling_pp_dir, (void *)PRIVATE_DATA_COUNTER_MAKE_PP(1), &profiling_counter_src_fops); ++ } + -+ MALI_DEBUG_ASSERT_POINTER(curr); ++ max_sub_jobs = mali_executor_get_num_cores_total(); ++ for (i = 0; i < max_sub_jobs; i++) { ++ char buf[16]; ++ struct dentry *mali_profiling_pp_x_dir; ++ _mali_osk_snprintf(buf, sizeof(buf), "%u", i); ++ mali_profiling_pp_x_dir = debugfs_create_dir(buf, mali_profiling_pp_dir); ++ if (NULL != mali_profiling_pp_x_dir) { ++ debugfs_create_file("counter_src0", ++ 0600, mali_profiling_pp_x_dir, ++ (void *)PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(0, i), ++ &profiling_counter_src_fops); ++ debugfs_create_file("counter_src1", ++ 0600, mali_profiling_pp_x_dir, ++ (void *)PRIVATE_DATA_COUNTER_MAKE_PP_SUB_JOB(1, i), ++ &profiling_counter_src_fops); ++ } ++ } + ++#if defined(CONFIG_MALI400_INTERNAL_PROFILING) ++ mali_profiling_proc_dir = debugfs_create_dir("proc", mali_profiling_dir); ++ if (mali_profiling_proc_dir != NULL) { ++ struct dentry *mali_profiling_proc_default_dir = debugfs_create_dir("default", mali_profiling_proc_dir); ++ if (mali_profiling_proc_default_dir != NULL) { ++ debugfs_create_file("enable", 0600, mali_profiling_proc_default_dir, (void *)_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, &user_settings_fops); ++ } ++ } ++ debugfs_create_file("record", 0600, mali_profiling_dir, NULL, &profiling_record_fops); ++ debugfs_create_file("events", 0400, mali_profiling_dir, NULL, &profiling_events_fops); ++ debugfs_create_file("events_human_readable", 0400, mali_profiling_dir, NULL, &profiling_events_human_readable_fops); ++#endif ++ } + -+ while (lock != curr) { -+ prev = curr; ++#if MALI_STATE_TRACKING ++ debugfs_create_file("state_dump", 0400, mali_debugfs_dir, NULL, &mali_seq_internal_state_fops); ++#endif + -+ MALI_DEBUG_ASSERT_POINTER(curr); -+ curr = curr->next; -+ MALI_DEBUG_ASSERT(n++ < 100); ++#if defined(DEBUG) ++ debugfs_create_file("timeline_dump", 0400, mali_debugfs_dir, NULL, &timeline_dump_fops); ++#endif ++ if (mali_sysfs_user_settings_register()) { ++ /* Failed to create the debugfs entries for the user settings DB. */ ++ MALI_DEBUG_PRINT(2, ("Failed to create user setting debugfs files. Ignoring...\n")); ++ } ++ } + } + -+ if (NULL == prev) { -+ lock_lookup_list = curr->next; -+ } else { -+ MALI_DEBUG_ASSERT_POINTER(curr); -+ MALI_DEBUG_ASSERT_POINTER(prev); -+ prev->next = curr->next; ++ /* Success! */ ++ return 0; ++} ++ ++int mali_sysfs_unregister(void) ++{ ++ if (NULL != mali_debugfs_dir) { ++ debugfs_remove_recursive(mali_debugfs_dir); + } ++ return 0; ++} + -+ lock->next = NULL; ++#else /* MALI_LICENSE_IS_GPL */ + -+ if (len - 1 != tracking_list_length()) { -+ printk(KERN_ERR "************ lock: %p\n", lock); -+ printk(KERN_ERR "************ before: %d *** after: %d ****\n", len, tracking_list_length()); -+ dump_lock_tracking_list(); -+ MALI_DEBUG_ASSERT_POINTER(NULL); -+ } ++/* Dummy implementations for non-GPL */ + -+ spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag); ++int mali_sysfs_register(struct mali_dev *device, dev_t dev, const char *mali_dev_name) ++{ ++ return 0; +} + -+static const char *const lock_order_to_string(_mali_osk_lock_order_t order) ++int mali_sysfs_unregister(void) +{ -+ switch (order) { -+ case _MALI_OSK_LOCK_ORDER_SESSIONS: -+ return "_MALI_OSK_LOCK_ORDER_SESSIONS"; -+ break; -+ case _MALI_OSK_LOCK_ORDER_MEM_SESSION: -+ return "_MALI_OSK_LOCK_ORDER_MEM_SESSION"; -+ break; -+ case _MALI_OSK_LOCK_ORDER_MEM_INFO: -+ return "_MALI_OSK_LOCK_ORDER_MEM_INFO"; -+ break; -+ case _MALI_OSK_LOCK_ORDER_MEM_PT_CACHE: -+ return "_MALI_OSK_LOCK_ORDER_MEM_PT_CACHE"; -+ break; -+ case _MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP: -+ return "_MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP"; -+ break; -+ case _MALI_OSK_LOCK_ORDER_PM_EXECUTION: -+ return "_MALI_OSK_LOCK_ORDER_PM_EXECUTION"; -+ break; -+ case _MALI_OSK_LOCK_ORDER_EXECUTOR: -+ return "_MALI_OSK_LOCK_ORDER_EXECUTOR"; -+ break; -+ case _MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM: -+ return "_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM"; -+ break; -+ case _MALI_OSK_LOCK_ORDER_SCHEDULER: -+ return "_MALI_OSK_LOCK_ORDER_SCHEDULER"; -+ break; -+ case _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED: -+ return "_MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED"; -+ break; -+ case _MALI_OSK_LOCK_ORDER_DMA_COMMAND: -+ return "_MALI_OSK_LOCK_ORDER_DMA_COMMAND"; -+ break; -+ case _MALI_OSK_LOCK_ORDER_PROFILING: -+ return "_MALI_OSK_LOCK_ORDER_PROFILING"; -+ break; -+ case _MALI_OSK_LOCK_ORDER_L2: -+ return "_MALI_OSK_LOCK_ORDER_L2"; -+ break; -+ case _MALI_OSK_LOCK_ORDER_L2_COMMAND: -+ return "_MALI_OSK_LOCK_ORDER_L2_COMMAND"; -+ break; -+ case _MALI_OSK_LOCK_ORDER_UTILIZATION: -+ return "_MALI_OSK_LOCK_ORDER_UTILIZATION"; -+ break; -+ case _MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS: -+ return "_MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS"; -+ break; -+ case _MALI_OSK_LOCK_ORDER_PM_STATE: -+ return "_MALI_OSK_LOCK_ORDER_PM_STATE"; -+ break; -+ default: -+ return ""; -+ } ++ return 0; +} -+#endif /* LOCK_ORDER_CHECKING */ -+#endif /* DEBUG */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_locks.h b/drivers/gpu/arm/mali400/mali/linux/mali_osk_locks.h ++ ++#endif /* MALI_LICENSE_IS_GPL */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_kernel_sysfs.h b/drivers/gpu/arm/mali400/mali/linux/mali_kernel_sysfs.h new file mode 100644 -index 000000000..6fd5af952 +index 000000000..91580a87c --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_locks.h -@@ -0,0 +1,326 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_kernel_sysfs.h +@@ -0,0 +1,29 @@ +/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2011-2013, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -318655,330 +320014,200 @@ index 000000000..6fd5af952 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+/** -+ * @file mali_osk_locks.h -+ * Defines OS abstraction of lock and mutex -+ */ -+#ifndef _MALI_OSK_LOCKS_H -+#define _MALI_OSK_LOCKS_H -+ -+#include -+#include -+#include -+ -+#include -+ -+#include "mali_osk_types.h" ++#ifndef __MALI_KERNEL_SYSFS_H__ ++#define __MALI_KERNEL_SYSFS_H__ + -+#ifdef _cplusplus ++#ifdef __cplusplus +extern "C" { +#endif + -+ /* When DEBUG is enabled, this struct will be used to track owner, mode and order checking */ -+#ifdef DEBUG -+ struct _mali_osk_lock_debug_s { -+ u32 owner; -+ _mali_osk_lock_flags_t orig_flags; -+ _mali_osk_lock_order_t order; -+ struct _mali_osk_lock_debug_s *next; -+ }; -+#endif -+ -+ /* Anstraction of spinlock_t */ -+ struct _mali_osk_spinlock_s { -+#ifdef DEBUG -+ struct _mali_osk_lock_debug_s checker; -+#endif -+ spinlock_t spinlock; -+ }; -+ -+ /* Abstration of spinlock_t and lock flag which is used to store register's state before locking */ -+ struct _mali_osk_spinlock_irq_s { -+#ifdef DEBUG -+ struct _mali_osk_lock_debug_s checker; -+#endif -+ -+ spinlock_t spinlock; -+ unsigned long flags; -+ }; -+ -+ /* Abstraction of rw_semaphore in OS */ -+ struct _mali_osk_mutex_rw_s { -+#ifdef DEBUG -+ struct _mali_osk_lock_debug_s checker; -+ _mali_osk_lock_mode_t mode; -+#endif -+ -+ struct rw_semaphore rw_sema; -+ }; ++#include + -+ /* Mutex and mutex_interruptible functions share the same osk mutex struct */ -+ struct _mali_osk_mutex_s { -+#ifdef DEBUG -+ struct _mali_osk_lock_debug_s checker; -+#endif -+ struct mutex mutex; -+ }; ++#define MALI_PROC_DIR "driver/mali" + -+#ifdef DEBUG -+ /** @brief _mali_osk_locks_debug_init/add/remove() functions are declared when DEBUG is enabled and -+ * defined in file mali_osk_locks.c. When LOCK_ORDER_CHECKING is enabled, calling these functions when we -+ * init/lock/unlock a lock/mutex, we could track lock order of a given tid. */ -+ void _mali_osk_locks_debug_init(struct _mali_osk_lock_debug_s *checker, _mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order); -+ void _mali_osk_locks_debug_add(struct _mali_osk_lock_debug_s *checker); -+ void _mali_osk_locks_debug_remove(struct _mali_osk_lock_debug_s *checker); ++int mali_sysfs_register(const char *mali_dev_name); ++int mali_sysfs_unregister(void); + -+ /** @brief This function can return a given lock's owner when DEBUG is enabled. */ -+ static inline u32 _mali_osk_lock_get_owner(struct _mali_osk_lock_debug_s *lock) -+ { -+ return lock->owner; -+ } -+#else -+#define _mali_osk_locks_debug_init(x, y, z) do {} while (0) -+#define _mali_osk_locks_debug_add(x) do {} while (0) -+#define _mali_osk_locks_debug_remove(x) do {} while (0) ++#ifdef __cplusplus ++} +#endif + -+ /** @brief Before use _mali_osk_spin_lock, init function should be used to allocate memory and initial spinlock*/ -+ static inline _mali_osk_spinlock_t *_mali_osk_spinlock_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order) -+ { -+ _mali_osk_spinlock_t *lock = NULL; -+ -+ lock = kmalloc(sizeof(_mali_osk_spinlock_t), GFP_KERNEL); -+ if (NULL == lock) { -+ return NULL; -+ } -+ spin_lock_init(&lock->spinlock); -+ _mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order); -+ return lock; -+ } -+ -+ /** @brief Lock a spinlock */ -+ static inline void _mali_osk_spinlock_lock(_mali_osk_spinlock_t *lock) -+ { -+ BUG_ON(NULL == lock); -+ spin_lock(&lock->spinlock); -+ _mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock); -+ } -+ -+ /** @brief Unlock a spinlock */ -+ static inline void _mali_osk_spinlock_unlock(_mali_osk_spinlock_t *lock) -+ { -+ BUG_ON(NULL == lock); -+ _mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock); -+ spin_unlock(&lock->spinlock); -+ } -+ -+ /** @brief Free a memory block which the argument lock pointed to and its type must be -+ * _mali_osk_spinlock_t *. */ -+ static inline void _mali_osk_spinlock_term(_mali_osk_spinlock_t *lock) -+ { -+ /* Parameter validation */ -+ BUG_ON(NULL == lock); -+ -+ /* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */ -+ kfree(lock); -+ } -+ -+ /** @brief Before _mali_osk_spinlock_irq_lock/unlock/term() is called, init function should be -+ * called to initial spinlock and flags in struct _mali_osk_spinlock_irq_t. */ -+ static inline _mali_osk_spinlock_irq_t *_mali_osk_spinlock_irq_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order) -+ { -+ _mali_osk_spinlock_irq_t *lock = NULL; -+ lock = kmalloc(sizeof(_mali_osk_spinlock_irq_t), GFP_KERNEL); -+ -+ if (NULL == lock) { -+ return NULL; -+ } ++#endif /* __MALI_KERNEL_LINUX_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_linux_trace.h b/drivers/gpu/arm/mali400/mali/linux/mali_linux_trace.h +new file mode 100644 +index 000000000..222260823 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_linux_trace.h +@@ -0,0 +1,161 @@ ++/* ++ * Copyright (C) 2012-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ lock->flags = 0; -+ spin_lock_init(&lock->spinlock); -+ _mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order); -+ return lock; -+ } ++#if !defined (MALI_LINUX_TRACE_H) || defined (TRACE_HEADER_MULTI_READ) ++#define MALI_LINUX_TRACE_H + -+ /** @brief Lock spinlock and save the register's state */ -+ static inline void _mali_osk_spinlock_irq_lock(_mali_osk_spinlock_irq_t *lock) -+ { -+ unsigned long tmp_flags; ++#include + -+ BUG_ON(NULL == lock); -+ spin_lock_irqsave(&lock->spinlock, tmp_flags); -+ lock->flags = tmp_flags; -+ _mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock); -+ } ++#include ++#include + -+ /** @brief Unlock spinlock with saved register's state */ -+ static inline void _mali_osk_spinlock_irq_unlock(_mali_osk_spinlock_irq_t *lock) -+ { -+ BUG_ON(NULL == lock); -+ _mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock); -+ spin_unlock_irqrestore(&lock->spinlock, lock->flags); -+ } ++#undef TRACE_SYSTEM ++#define TRACE_SYSTEM mali + -+ /** @brief Destroy a given memory block which lock pointed to, and the lock type must be -+ * _mali_osk_spinlock_irq_t *. */ -+ static inline void _mali_osk_spinlock_irq_term(_mali_osk_spinlock_irq_t *lock) -+ { -+ /* Parameter validation */ -+ BUG_ON(NULL == lock); ++#define TRACE_INCLUDE_PATH . ++#define TRACE_INCLUDE_FILE mali_linux_trace + -+ /* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */ -+ kfree(lock); -+ } ++/** ++ * Define the tracepoint used to communicate the status of a GPU. Called ++ * when a GPU turns on or turns off. ++ * ++ * @param event_id The type of the event. This parameter is a bitfield ++ * encoding the type of the event. ++ * ++ * @param d0 First data parameter. ++ * @param d1 Second data parameter. ++ * @param d2 Third data parameter. ++ * @param d3 Fourth data parameter. ++ * @param d4 Fifth data parameter. ++ */ ++TRACE_EVENT(mali_timeline_event, + -+ /** @brief Before _mali_osk_mutex_rw_wait/signal/term() is called, we should call -+ * _mali_osk_mutex_rw_init() to kmalloc a memory block and initial part of elements in it. */ -+ static inline _mali_osk_mutex_rw_t *_mali_osk_mutex_rw_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order) -+ { -+ _mali_osk_mutex_rw_t *lock = NULL; ++ TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1, ++ unsigned int d2, unsigned int d3, unsigned int d4), + -+ lock = kmalloc(sizeof(_mali_osk_mutex_rw_t), GFP_KERNEL); ++ TP_ARGS(event_id, d0, d1, d2, d3, d4), + -+ if (NULL == lock) { -+ return NULL; -+ } ++ TP_STRUCT__entry( ++ __field(unsigned int, event_id) ++ __field(unsigned int, d0) ++ __field(unsigned int, d1) ++ __field(unsigned int, d2) ++ __field(unsigned int, d3) ++ __field(unsigned int, d4) ++ ), + -+ init_rwsem(&lock->rw_sema); -+ _mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order); -+ return lock; -+ } ++ TP_fast_assign( ++ __entry->event_id = event_id; ++ __entry->d0 = d0; ++ __entry->d1 = d1; ++ __entry->d2 = d2; ++ __entry->d3 = d3; ++ __entry->d4 = d4; ++ ), + -+ /** @brief When call _mali_osk_mutex_rw_wait/signal() functions, the second argument mode -+ * should be assigned with value _MALI_OSK_LOCKMODE_RO or _MALI_OSK_LOCKMODE_RW */ -+ static inline void _mali_osk_mutex_rw_wait(_mali_osk_mutex_rw_t *lock, _mali_osk_lock_mode_t mode) -+ { -+ BUG_ON(NULL == lock); -+ BUG_ON(!(_MALI_OSK_LOCKMODE_RO == mode || _MALI_OSK_LOCKMODE_RW == mode)); ++ TP_printk("event=%d", __entry->event_id) ++ ); + -+ if (mode == _MALI_OSK_LOCKMODE_RO) { -+ down_read(&lock->rw_sema); -+ } else { -+ down_write(&lock->rw_sema); -+ } ++/** ++ * Define a tracepoint used to regsiter the value of a hardware counter. ++ * Hardware counters belonging to the vertex or fragment processor are ++ * reported via this tracepoint each frame, whilst L2 cache hardware ++ * counters are reported continuously. ++ * ++ * @param counter_id The counter ID. ++ * @param value The value of the counter. ++ */ ++TRACE_EVENT(mali_hw_counter, + -+#ifdef DEBUG -+ if (mode == _MALI_OSK_LOCKMODE_RW) { -+ lock->mode = mode; -+ } else { /* mode == _MALI_OSK_LOCKMODE_RO */ -+ lock->mode = mode; -+ } -+ _mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock); -+#endif -+ } ++ TP_PROTO(unsigned int counter_id, unsigned int value), + -+ /** @brief Up lock->rw_sema with up_read/write() accordinf argument mode's value. */ -+ static inline void _mali_osk_mutex_rw_signal(_mali_osk_mutex_rw_t *lock, _mali_osk_lock_mode_t mode) -+ { -+ BUG_ON(NULL == lock); -+ BUG_ON(!(_MALI_OSK_LOCKMODE_RO == mode || _MALI_OSK_LOCKMODE_RW == mode)); -+#ifdef DEBUG -+ /* make sure the thread releasing the lock actually was the owner */ -+ if (mode == _MALI_OSK_LOCKMODE_RW) { -+ _mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock); -+ /* This lock now has no owner */ -+ lock->checker.owner = 0; -+ } -+#endif ++ TP_ARGS(counter_id, value), + -+ if (mode == _MALI_OSK_LOCKMODE_RO) { -+ up_read(&lock->rw_sema); -+ } else { -+ up_write(&lock->rw_sema); -+ } -+ } ++ TP_STRUCT__entry( ++ __field(unsigned int, counter_id) ++ __field(unsigned int, value) ++ ), + -+ /** @brief Free a given memory block which lock pointed to and its type must be -+ * _mali_sok_mutex_rw_t *. */ -+ static inline void _mali_osk_mutex_rw_term(_mali_osk_mutex_rw_t *lock) -+ { -+ /* Parameter validation */ -+ BUG_ON(NULL == lock); ++ TP_fast_assign( ++ __entry->counter_id = counter_id; ++ ), + -+ /* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */ -+ kfree(lock); -+ } ++ TP_printk("event %d = %d", __entry->counter_id, __entry->value) ++ ); + -+ /** @brief Mutex & mutex_interruptible share the same init and term function, because they have the -+ * same osk mutex struct, and the difference between them is which locking function they use */ -+ static inline _mali_osk_mutex_t *_mali_osk_mutex_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order) -+ { -+ _mali_osk_mutex_t *lock = NULL; ++/** ++ * Define a tracepoint used to send a bundle of software counters. ++ * ++ * @param counters The bundle of counters. ++ */ ++TRACE_EVENT(mali_sw_counters, + -+ lock = kmalloc(sizeof(_mali_osk_mutex_t), GFP_KERNEL); ++ TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters), + -+ if (NULL == lock) { -+ return NULL; -+ } -+ mutex_init(&lock->mutex); ++ TP_ARGS(pid, tid, surface_id, counters), + -+ _mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order); -+ return lock; -+ } ++ TP_STRUCT__entry( ++ __field(pid_t, pid) ++ __field(pid_t, tid) ++ __field(void *, surface_id) ++ __field(unsigned int *, counters) ++ ), + -+ /** @brief Lock the lock->mutex with mutex_lock_interruptible function */ -+ static inline _mali_osk_errcode_t _mali_osk_mutex_wait_interruptible(_mali_osk_mutex_t *lock) -+ { -+ _mali_osk_errcode_t err = _MALI_OSK_ERR_OK; ++ TP_fast_assign( ++ __entry->pid = pid; ++ __entry->tid = tid; ++ __entry->surface_id = surface_id; ++ __entry->counters = counters; ++ ), + -+ BUG_ON(NULL == lock); ++ TP_printk("counters were %s", __entry->counters == NULL ? "NULL" : "not NULL") ++ ); + -+ if (mutex_lock_interruptible(&lock->mutex)) { -+ printk(KERN_WARNING "Mali: Can not lock mutex\n"); -+ err = _MALI_OSK_ERR_RESTARTSYSCALL; -+ } ++/** ++ * Define a tracepoint used to gather core activity for systrace ++ * @param pid The process id for which the core activity originates from ++ * @param active If the core is active (1) or not (0) ++ * @param core_type The type of core active, either GP (1) or PP (0) ++ * @param core_id The core id that is active for the core_type ++ * @param frame_builder_id The frame builder id associated with this core activity ++ * @param flush_id The flush id associated with this core activity ++ */ ++TRACE_EVENT(mali_core_active, + -+ _mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock); -+ return err; -+ } ++ TP_PROTO(pid_t pid, unsigned int active, unsigned int core_type, unsigned int core_id, unsigned int frame_builder_id, unsigned int flush_id), + -+ /** @brief Unlock the lock->mutex which is locked with mutex_lock_interruptible() function. */ -+ static inline void _mali_osk_mutex_signal_interruptible(_mali_osk_mutex_t *lock) -+ { -+ BUG_ON(NULL == lock); -+ _mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock); -+ mutex_unlock(&lock->mutex); -+ } ++ TP_ARGS(pid, active, core_type, core_id, frame_builder_id, flush_id), + -+ /** @brief Lock the lock->mutex just with mutex_lock() function which could not be interruptted. */ -+ static inline void _mali_osk_mutex_wait(_mali_osk_mutex_t *lock) -+ { -+ BUG_ON(NULL == lock); -+ mutex_lock(&lock->mutex); -+ _mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock); -+ } ++ TP_STRUCT__entry( ++ __field(pid_t, pid) ++ __field(unsigned int, active) ++ __field(unsigned int, core_type) ++ __field(unsigned int, core_id) ++ __field(unsigned int, frame_builder_id) ++ __field(unsigned int, flush_id) ++ ), + -+ /** @brief Unlock the lock->mutex which is locked with mutex_lock() function. */ -+ static inline void _mali_osk_mutex_signal(_mali_osk_mutex_t *lock) -+ { -+ BUG_ON(NULL == lock); -+ _mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock); -+ mutex_unlock(&lock->mutex); -+ } ++ TP_fast_assign( ++ __entry->pid = pid; ++ __entry->active = active; ++ __entry->core_type = core_type; ++ __entry->core_id = core_id; ++ __entry->frame_builder_id = frame_builder_id; ++ __entry->flush_id = flush_id; ++ ), + -+ /** @brief Free a given memory block which lock point. */ -+ static inline void _mali_osk_mutex_term(_mali_osk_mutex_t *lock) -+ { -+ /* Parameter validation */ -+ BUG_ON(NULL == lock); ++ TP_printk("%s|%d|%s%i:%x|%d", __entry->active ? "S" : "F", __entry->pid, __entry->core_type ? "GP" : "PP", __entry->core_id, __entry->flush_id, __entry->frame_builder_id) ++ ); + -+ /* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */ -+ kfree(lock); -+ } ++#endif /* MALI_LINUX_TRACE_H */ + -+#ifdef _cplusplus -+} -+#endif ++/* This part must exist outside the header guard. */ ++#include + -+#endif -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_low_level_mem.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_low_level_mem.c +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory.c new file mode 100644 -index 000000000..994b04dad +index 000000000..cf3851490 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_low_level_mem.c -@@ -0,0 +1,146 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory.c +@@ -0,0 +1,528 @@ +/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -318987,659 +320216,532 @@ index 000000000..994b04dad + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+/** -+ * @file mali_osk_low_level_mem.c -+ * Implementation of the OS abstraction layer for the kernel device driver -+ */ -+ -+#include -+#include ++#include ++#include ++#include ++#include ++#include +#include ++#include ++#include ++#include + -+#include "mali_kernel_common.h" +#include "mali_osk.h" -+#include "mali_ukk.h" ++#include "mali_executor.h" + -+void _mali_osk_mem_barrier(void) -+{ -+ mb(); -+} -+ -+void _mali_osk_write_mem_barrier(void) -+{ -+ wmb(); -+} -+ -+mali_io_address _mali_osk_mem_mapioregion(uintptr_t phys, u32 size, const char *description) -+{ -+ return (mali_io_address)ioremap(phys, size); -+} -+ -+void _mali_osk_mem_unmapioregion(uintptr_t phys, u32 size, mali_io_address virt) -+{ -+ iounmap((void *)virt); -+} -+ -+_mali_osk_errcode_t inline _mali_osk_mem_reqregion(uintptr_t phys, u32 size, const char *description) -+{ -+#if MALI_LICENSE_IS_GPL -+ return _MALI_OSK_ERR_OK; /* GPL driver gets the mem region for the resources registered automatically */ -+#else -+ return ((NULL == request_mem_region(phys, size, description)) ? _MALI_OSK_ERR_NOMEM : _MALI_OSK_ERR_OK); ++#include "mali_memory.h" ++#include "mali_memory_os_alloc.h" ++#include "mali_memory_block_alloc.h" ++#include "mali_memory_util.h" ++#include "mali_memory_virtual.h" ++#include "mali_memory_manager.h" ++#include "mali_memory_cow.h" ++#include "mali_memory_swap_alloc.h" ++#include "mali_memory_defer_bind.h" ++#if defined(CONFIG_DMA_SHARED_BUFFER) ++#include "mali_memory_secure.h" +#endif -+} + -+void inline _mali_osk_mem_unreqregion(uintptr_t phys, u32 size) -+{ -+#if !MALI_LICENSE_IS_GPL -+ release_mem_region(phys, size); -+#endif -+} ++extern unsigned int mali_dedicated_mem_size; ++extern unsigned int mali_shared_mem_size; + -+void inline _mali_osk_mem_iowrite32_relaxed(volatile mali_io_address addr, u32 offset, u32 val) -+{ -+ __raw_writel(cpu_to_le32(val), ((u8 *)addr) + offset); -+} ++#define MALI_VM_NUM_FAULT_PREFETCH (0x8) + -+u32 inline _mali_osk_mem_ioread32(volatile mali_io_address addr, u32 offset) ++static void mali_mem_vma_open(struct vm_area_struct *vma) +{ -+ return ioread32(((u8 *)addr) + offset); -+} ++ mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data; ++ MALI_DEBUG_PRINT(4, ("Open called on vma %p\n", vma)); + -+void inline _mali_osk_mem_iowrite32(volatile mali_io_address addr, u32 offset, u32 val) -+{ -+ iowrite32(val, ((u8 *)addr) + offset); ++ /* If need to share the allocation, add ref_count here */ ++ mali_allocation_ref(alloc); ++ return; +} -+ -+void _mali_osk_cache_flushall(void) ++static void mali_mem_vma_close(struct vm_area_struct *vma) +{ -+ /** @note Cached memory is not currently supported in this implementation */ -+} ++ /* If need to share the allocation, unref ref_count here */ ++ mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data; + -+void _mali_osk_cache_ensure_uncached_range_flushed(void *uncached_mapping, u32 offset, u32 size) -+{ -+ _mali_osk_write_mem_barrier(); ++ mali_allocation_unref(&alloc); ++ vma->vm_private_data = NULL; +} + -+u32 _mali_osk_mem_write_safe(void __user *dest, const void __user *src, u32 size) ++static vm_fault_t mali_mem_vma_fault(struct vm_fault *vmf) +{ -+#define MALI_MEM_SAFE_COPY_BLOCK_SIZE 4096 -+ u32 retval = 0; -+ void *temp_buf; -+ -+ temp_buf = kmalloc(MALI_MEM_SAFE_COPY_BLOCK_SIZE, GFP_KERNEL); -+ if (NULL != temp_buf) { -+ u32 bytes_left_to_copy = size; -+ u32 i; -+ for (i = 0; i < size; i += MALI_MEM_SAFE_COPY_BLOCK_SIZE) { -+ u32 size_to_copy; -+ u32 size_copied; -+ u32 bytes_left; -+ -+ if (bytes_left_to_copy > MALI_MEM_SAFE_COPY_BLOCK_SIZE) { -+ size_to_copy = MALI_MEM_SAFE_COPY_BLOCK_SIZE; -+ } else { -+ size_to_copy = bytes_left_to_copy; -+ } ++ struct vm_area_struct *vma = vmf->vma; ++ mali_mem_allocation *alloc = (mali_mem_allocation *)vma->vm_private_data; ++ mali_mem_backend *mem_bkend = NULL; ++ int ret; ++ int prefetch_num = MALI_VM_NUM_FAULT_PREFETCH; + -+ bytes_left = copy_from_user(temp_buf, ((char *)src) + i, size_to_copy); -+ size_copied = size_to_copy - bytes_left; ++ unsigned long address = (unsigned long)vmf->address; ++ MALI_DEBUG_ASSERT(alloc->backend_handle); ++ MALI_DEBUG_ASSERT((unsigned long)alloc->cpu_mapping.addr <= address); + -+ bytes_left = copy_to_user(((char *)dest) + i, temp_buf, size_copied); -+ size_copied -= bytes_left; ++ /* Get backend memory & Map on CPU */ ++ mutex_lock(&mali_idr_mutex); ++ if (!(mem_bkend = idr_find(&mali_backend_idr, alloc->backend_handle))) { ++ MALI_DEBUG_PRINT(1, ("Can't find memory backend in mmap!\n")); ++ mutex_unlock(&mali_idr_mutex); ++ return VM_FAULT_SIGBUS; ++ } ++ mutex_unlock(&mali_idr_mutex); ++ MALI_DEBUG_ASSERT(mem_bkend->type == alloc->type); + -+ bytes_left_to_copy -= size_copied; -+ retval += size_copied; ++ if ((mem_bkend->type == MALI_MEM_COW && (MALI_MEM_BACKEND_FLAG_SWAP_COWED != ++ (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) && ++ (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE)) { ++ /*check if use page fault to do COW*/ ++ MALI_DEBUG_PRINT(4, ("mali_vma_fault: do cow allocate on demand!, address=0x%x\n", address)); ++ mutex_lock(&mem_bkend->mutex); ++ ret = mali_mem_cow_allocate_on_demand(mem_bkend, ++ (address - vma->vm_start) / PAGE_SIZE); ++ mutex_unlock(&mem_bkend->mutex); + -+ if (size_copied != size_to_copy) { -+ break; /* Early out, we was not able to copy this entire block */ -+ } ++ if (ret != _MALI_OSK_ERR_OK) { ++ return VM_FAULT_OOM; + } ++ prefetch_num = 1; + -+ kfree(temp_buf); -+ } -+ -+ return retval; -+} -+ -+_mali_osk_errcode_t _mali_ukk_mem_write_safe(_mali_uk_mem_write_safe_s *args) -+{ -+ void __user *src; -+ void __user *dst; -+ struct mali_session_data *session; ++ /* handle COW modified range cpu mapping ++ we zap the mapping in cow_modify_range, it will trigger page fault ++ when CPU access it, so here we map it to CPU*/ ++ mutex_lock(&mem_bkend->mutex); ++ ret = mali_mem_cow_cpu_map_pages_locked(mem_bkend, vma, address, prefetch_num); ++ mutex_unlock(&mem_bkend->mutex); + -+ MALI_DEBUG_ASSERT_POINTER(args); ++ if (unlikely(ret != _MALI_OSK_ERR_OK)) { ++ return VM_FAULT_SIGBUS; ++ } ++ } else if ((mem_bkend->type == MALI_MEM_SWAP) || ++ (mem_bkend->type == MALI_MEM_COW && (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) { ++ u32 offset_in_bkend = (address - vma->vm_start) / PAGE_SIZE; ++ int ret = _MALI_OSK_ERR_OK; + -+ session = (struct mali_session_data *)(uintptr_t)args->ctx; ++ mutex_lock(&mem_bkend->mutex); ++ if (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE) { ++ ret = mali_mem_swap_cow_page_on_demand(mem_bkend, offset_in_bkend, &vmf->page); ++ } else { ++ ret = mali_mem_swap_allocate_page_on_demand(mem_bkend, offset_in_bkend, &vmf->page); ++ } ++ mutex_unlock(&mem_bkend->mutex); + -+ if (NULL == session) { -+ return _MALI_OSK_ERR_INVALID_ARGS; ++ if (ret != _MALI_OSK_ERR_OK) { ++ MALI_DEBUG_PRINT(2, ("Mali swap memory page fault process failed, address=0x%x\n", address)); ++ return VM_FAULT_OOM; ++ } else { ++ return VM_FAULT_LOCKED; ++ } ++ } else { ++ MALI_PRINT_ERROR(("Mali vma fault! It never happen, indicating some logic errors in caller.\n")); ++ /*NOT support yet or OOM*/ ++ return VM_FAULT_OOM; + } -+ -+ src = (void __user *)(uintptr_t)args->src; -+ dst = (void __user *)(uintptr_t)args->dest; -+ -+ /* Return number of bytes actually copied */ -+ args->size = _mali_osk_mem_write_safe(dst, src, args->size); -+ return _MALI_OSK_ERR_OK; ++ return VM_FAULT_NOPAGE; +} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_mali.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_mali.c -new file mode 100644 -index 000000000..5d28d0eb3 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_mali.c -@@ -0,0 +1,503 @@ -+/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+ -+/** -+ * @file mali_osk_mali.c -+ * Implementation of the OS abstraction layer which is specific for the Mali kernel device driver -+ */ -+#include "../platform/rk/custom_log.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "mali_osk_mali.h" -+#include "mali_kernel_common.h" /* MALI_xxx macros */ -+#include "mali_osk.h" /* kernel side OS functions */ -+#include "mali_kernel_linux.h" -+ -+static mali_bool mali_secure_mode_enabled = MALI_FALSE; -+static mali_bool mali_secure_mode_supported = MALI_FALSE; -+ -+/* Function that init the mali gpu secure mode */ -+void (*mali_secure_mode_deinit)(void) = NULL; -+/* Function that reset GPU and enable the mali gpu secure mode */ -+int (*mali_gpu_reset_and_secure_mode_enable)(void) = NULL; -+/* Function that reset GPU and disable the mali gpu secure mode */ -+int (*mali_gpu_reset_and_secure_mode_disable)(void) = NULL; -+ -+ -+#ifdef CONFIG_MALI_DT -+ -+#define MALI_OSK_INVALID_RESOURCE_ADDRESS 0xFFFFFFFF -+ -+/** -+ * Define the max number of resource we could have. -+ */ -+#define MALI_OSK_MAX_RESOURCE_NUMBER 27 -+ -+/** -+ * Define the max number of resource with interrupts, and they are -+ * the first 20 elements in array mali_osk_resource_bank. -+ */ -+#define MALI_OSK_RESOURCE_WITH_IRQ_NUMBER 20 -+ -+/** -+ * pp core start and end location in mali_osk_resource_bank array. -+ */ -+#define MALI_OSK_RESOURCE_PP_LOCATION_START 2 -+#define MALI_OSK_RESOURCE_PP_LOCATION_END 17 -+ -+/** -+ * L2 cache start and end location in mali_osk_resource_bank array. -+ */ -+#define MALI_OSK_RESOURCE_L2_LOCATION_START 20 -+#define MALI_OSK_RESOURCE_l2_LOCATION_END 22 -+ -+/** -+ * DMA unit location. -+ */ -+#define MALI_OSK_RESOURCE_DMA_LOCATION 26 -+ -+static _mali_osk_resource_t mali_osk_resource_bank[MALI_OSK_MAX_RESOURCE_NUMBER] = { -+ /*-------------------------------------------------------*/ -+ /* rk_ext : to use dts_for_mali_ko_befor_r5p0-01rel0. */ -+ /* {.description = "Mali_GP", .base = MALI_OFFSET_GP, .irq_name = "IRQGP",}, */ -+ {.description = "Mali_GP", .base = MALI_OFFSET_GP, .irq_name = "Mali_GP_IRQ",}, -+ /* {.description = "Mali_GP_MMU", .base = MALI_OFFSET_GP_MMU, .irq_name = "IRQGPMMU",}, */ -+ {.description = "Mali_GP_MMU", .base = MALI_OFFSET_GP_MMU, .irq_name = "Mali_GP_MMU_IRQ",}, -+ /* {.description = "Mali_PP0", .base = MALI_OFFSET_PP0, .irq_name = "IRQPP0",}, */ -+ {.description = "Mali_PP0", .base = MALI_OFFSET_PP0, .irq_name = "Mali_PP0_IRQ",}, -+ /* {.description = "Mali_PP0_MMU", .base = MALI_OFFSET_PP0_MMU, .irq_name = "IRQPPMMU0",}, */ -+ {.description = "Mali_PP0_MMU", .base = MALI_OFFSET_PP0_MMU, .irq_name = "Mali_PP0_MMU_IRQ",}, -+ /* {.description = "Mali_PP1", .base = MALI_OFFSET_PP1, .irq_name = "IRQPP1",}, */ -+ {.description = "Mali_PP1", .base = MALI_OFFSET_PP1, .irq_name = "Mali_PP1_IRQ",}, -+ /* {.description = "Mali_PP1_MMU", .base = MALI_OFFSET_PP1_MMU, .irq_name = "IRQPPMMU1",}, */ -+ {.description = "Mali_PP1_MMU", .base = MALI_OFFSET_PP1_MMU, .irq_name = "Mali_PP1_MMU_IRQ",}, + -+ {.description = "Mali_PP2", .base = MALI_OFFSET_PP2, .irq_name = "Mali_PP2_IRQ",}, -+ {.description = "Mali_PP2_MMU", .base = MALI_OFFSET_PP2_MMU, .irq_name = "Mali_PP2_MMU_IRQ",}, -+ {.description = "Mali_PP3", .base = MALI_OFFSET_PP3, .irq_name = "Mali_PP3_IRQ",}, -+ {.description = "Mali_PP3_MMU", .base = MALI_OFFSET_PP3_MMU, .irq_name = "Mali_PP3_MMU_IRQ",}, -+ /*-------------------------------------------------------*/ -+ {.description = "Mali_PP4", .base = MALI_OFFSET_PP4, .irq_name = "IRQPP4",}, -+ {.description = "Mali_PP4_MMU", .base = MALI_OFFSET_PP4_MMU, .irq_name = "IRQPPMMU4",}, -+ {.description = "Mali_PP5", .base = MALI_OFFSET_PP5, .irq_name = "IRQPP5",}, -+ {.description = "Mali_PP5_MMU", .base = MALI_OFFSET_PP5_MMU, .irq_name = "IRQPPMMU5",}, -+ {.description = "Mali_PP6", .base = MALI_OFFSET_PP6, .irq_name = "IRQPP6",}, -+ {.description = "Mali_PP6_MMU", .base = MALI_OFFSET_PP6_MMU, .irq_name = "IRQPPMMU6",}, -+ {.description = "Mali_PP7", .base = MALI_OFFSET_PP7, .irq_name = "IRQPP7",}, -+ {.description = "Mali_PP7_MMU", .base = MALI_OFFSET_PP7_MMU, .irq_name = "IRQPPMMU",}, -+ {.description = "Mali_PP_Broadcast", .base = MALI_OFFSET_PP_BCAST, .irq_name = "IRQPP",}, -+ {.description = "Mali_PMU", .base = MALI_OFFSET_PMU, .irq_name = "IRQPMU",}, -+ {.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE0,}, -+ {.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE1,}, -+ {.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE2,}, -+ {.description = "Mali_PP_MMU_Broadcast", .base = MALI_OFFSET_PP_BCAST_MMU,}, -+ {.description = "Mali_Broadcast", .base = MALI_OFFSET_BCAST,}, -+ {.description = "Mali_DLBU", .base = MALI_OFFSET_DLBU,}, -+ {.description = "Mali_DMA", .base = MALI_OFFSET_DMA,}, ++static struct vm_operations_struct mali_kernel_vm_ops = { ++ .open = mali_mem_vma_open, ++ .close = mali_mem_vma_close, ++ .fault = mali_mem_vma_fault, +}; + -+static int _mali_osk_get_compatible_name(const char **out_string) -+{ -+ struct device_node *node = mali_platform_device->dev.of_node; -+ -+ MALI_DEBUG_ASSERT(NULL != node); -+ -+ return of_property_read_string(node, "compatible", out_string); -+} + -+_mali_osk_errcode_t _mali_osk_resource_initialize(void) ++/** @ map mali allocation to CPU address ++* ++* Supported backend types: ++* --MALI_MEM_OS ++* -- need to add COW? ++ *Not supported backend types: ++* -_MALI_MEMORY_BIND_BACKEND_UMP ++* -_MALI_MEMORY_BIND_BACKEND_DMA_BUF ++* -_MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY ++* ++*/ ++int mali_mmap(struct file *filp, struct vm_area_struct *vma) +{ -+ mali_bool mali_is_450 = MALI_FALSE, mali_is_470 = MALI_FALSE; -+ int i, pp_core_num = 0, l2_core_num = 0; -+ struct resource *res; -+ const char *compatible_name = NULL; ++ struct mali_session_data *session; ++ mali_mem_allocation *mali_alloc = NULL; ++ u32 mali_addr = vma->vm_pgoff << PAGE_SHIFT; ++ struct mali_vma_node *mali_vma_node = NULL; ++ mali_mem_backend *mem_bkend = NULL; ++ int ret = -EFAULT; + -+ if (0 == _mali_osk_get_compatible_name(&compatible_name)) { -+ if (0 == strncmp(compatible_name, "arm,mali-450", strlen("arm,mali-450"))) { -+ mali_is_450 = MALI_TRUE; -+ MALI_DEBUG_PRINT(2, ("mali-450 device tree detected.")); -+ } else if (0 == strncmp(compatible_name, "arm,mali-470", strlen("arm,mali-470"))) { -+ mali_is_470 = MALI_TRUE; -+ MALI_DEBUG_PRINT(2, ("mali-470 device tree detected.")); -+ } ++ session = (struct mali_session_data *)filp->private_data; ++ if (NULL == session) { ++ MALI_PRINT_ERROR(("mmap called without any session data available\n")); ++ return -EFAULT; + } + -+ for (i = 0; i < MALI_OSK_RESOURCE_WITH_IRQ_NUMBER; i++) { -+ res = platform_get_resource_byname(mali_platform_device, IORESOURCE_IRQ, mali_osk_resource_bank[i].irq_name); -+ if (res) { -+ mali_osk_resource_bank[i].irq = res->start; -+ } else { -+ mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS; -+ } -+ } ++ MALI_DEBUG_PRINT(4, ("MMap() handler: start=0x%08X, phys=0x%08X, size=0x%08X vma->flags 0x%08x\n", ++ (unsigned int)vma->vm_start, (unsigned int)(vma->vm_pgoff << PAGE_SHIFT), ++ (unsigned int)(vma->vm_end - vma->vm_start), vma->vm_flags)); + -+ for (i = MALI_OSK_RESOURCE_PP_LOCATION_START; i <= MALI_OSK_RESOURCE_PP_LOCATION_END; i++) { -+ if (MALI_OSK_INVALID_RESOURCE_ADDRESS != mali_osk_resource_bank[i].base) { -+ pp_core_num++; -+ } -+ } ++ /* Operations used on any memory system */ ++ /* do not need to anything in vm open/close now */ + -+ /* We have to divide by 2, because we caculate twice for only one pp(pp_core and pp_mmu_core). */ -+ if (0 != pp_core_num % 2) { -+ MALI_DEBUG_PRINT(2, ("The value of pp core number isn't normal.")); -+ return _MALI_OSK_ERR_FAULT; ++ /* find mali allocation structure by vaddress*/ ++ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0); ++ if (likely(mali_vma_node)) { ++ mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node); ++ MALI_DEBUG_ASSERT(mali_addr == mali_vma_node->vm_node.start); ++ if (unlikely(mali_addr != mali_vma_node->vm_node.start)) { ++ /* only allow to use start address for mmap */ ++ MALI_DEBUG_PRINT(1, ("mali_addr != mali_vma_node->vm_node.start\n")); ++ return -EFAULT; ++ } ++ } else { ++ MALI_DEBUG_ASSERT(NULL == mali_vma_node); ++ return -EFAULT; + } + -+ pp_core_num /= 2; ++ mali_alloc->cpu_mapping.addr = (void __user *)vma->vm_start; + -+ /** -+ * we can caculate the number of l2 cache core according the number of pp core number -+ * and device type(mali400/mali450/mali470). -+ */ -+ l2_core_num = 1; -+ if (mali_is_450) { -+ if (pp_core_num > 4) { -+ l2_core_num = 3; -+ } else if (pp_core_num <= 4) { -+ l2_core_num = 2; -+ } ++ if (mali_alloc->flags & _MALI_MEMORY_ALLOCATE_DEFER_BIND) { ++ MALI_DEBUG_PRINT(1, ("ERROR : trying to access varying memory by CPU!\n")); ++ return -EFAULT; + } + -+ for (i = MALI_OSK_RESOURCE_l2_LOCATION_END; i > MALI_OSK_RESOURCE_L2_LOCATION_START + l2_core_num - 1; i--) { -+ mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS; ++ /* Get backend memory & Map on CPU */ ++ mutex_lock(&mali_idr_mutex); ++ if (!(mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle))) { ++ MALI_DEBUG_PRINT(1, ("Can't find memory backend in mmap!\n")); ++ mutex_unlock(&mali_idr_mutex); ++ return -EFAULT; + } ++ mutex_unlock(&mali_idr_mutex); + -+ /* If device is not mali-450 type, we have to remove related resource from resource bank. */ -+ if (!(mali_is_450 || mali_is_470)) { -+ for (i = MALI_OSK_RESOURCE_l2_LOCATION_END + 1; i < MALI_OSK_MAX_RESOURCE_NUMBER; i++) { -+ mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS; -+ } ++ if (!(MALI_MEM_SWAP == mali_alloc->type || ++ (MALI_MEM_COW == mali_alloc->type && (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)))) { ++ /* Set some bits which indicate that, the memory is IO memory, meaning ++ * that no paging is to be performed and the memory should not be ++ * included in crash dumps. And that the memory is reserved, meaning ++ * that it's present and can never be paged out (see also previous ++ * entry) ++ */ ++ vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_PFNMAP); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0) ++ vma->vm_flags |= VM_RESERVED; ++#else ++ vm_flags_set(vma, VM_DONTDUMP | VM_DONTEXPAND); ++#endif ++ } else if (MALI_MEM_SWAP == mali_alloc->type) { ++ vma->vm_pgoff = mem_bkend->start_idx; + } + -+ if (mali_is_470) -+ mali_osk_resource_bank[MALI_OSK_RESOURCE_DMA_LOCATION].base = MALI_OSK_INVALID_RESOURCE_ADDRESS; ++ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); ++ vma->vm_ops = &mali_kernel_vm_ops; + -+ return _MALI_OSK_ERR_OK; -+} ++ mali_alloc->cpu_mapping.addr = (void __user *)vma->vm_start; + -+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res) -+{ -+ int i; ++ /* If it's a copy-on-write mapping, map to read only */ ++ if (!(vma->vm_flags & VM_WRITE)) { ++ MALI_DEBUG_PRINT(4, ("mmap allocation with read only !\n")); ++ /* add VM_WRITE for do_page_fault will check this when a write fault */ ++ vm_flags_set(vma, VM_WRITE | VM_READ); ++ vma->vm_page_prot = PAGE_READONLY; ++ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); ++ mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE; ++ goto out; ++ } + -+ if (NULL == mali_platform_device) { -+ return _MALI_OSK_ERR_ITEM_NOT_FOUND; ++ if (mem_bkend->type == MALI_MEM_OS) { ++ ret = mali_mem_os_cpu_map(mem_bkend, vma); ++ } else if (mem_bkend->type == MALI_MEM_COW && ++ (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) { ++ ret = mali_mem_cow_cpu_map(mem_bkend, vma); ++ } else if (mem_bkend->type == MALI_MEM_BLOCK) { ++ ret = mali_mem_block_cpu_map(mem_bkend, vma); ++ } else if ((mem_bkend->type == MALI_MEM_SWAP) || (mem_bkend->type == MALI_MEM_COW && ++ (MALI_MEM_BACKEND_FLAG_SWAP_COWED == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)))) { ++ /*For swappable memory, CPU page table will be created by page fault handler. */ ++ ret = 0; ++ } else if (mem_bkend->type == MALI_MEM_SECURE) { ++#if defined(CONFIG_DMA_SHARED_BUFFER) ++ ret = mali_mem_secure_cpu_map(mem_bkend, vma); ++#else ++ MALI_DEBUG_PRINT(1, ("DMA not supported for mali secure memory\n")); ++ return -EFAULT; ++#endif ++ } else { ++ /* Not support yet*/ ++ MALI_DEBUG_PRINT_ERROR(("Invalid type of backend memory! \n")); ++ return -EFAULT; + } + -+ /* Traverse all of resources in resources bank to find the matching one. */ -+ for (i = 0; i < MALI_OSK_MAX_RESOURCE_NUMBER; i++) { -+ if (mali_osk_resource_bank[i].base == addr) { -+ if (NULL != res) { -+ res->base = addr + _mali_osk_resource_base_address(); -+ res->description = mali_osk_resource_bank[i].description; -+ res->irq = mali_osk_resource_bank[i].irq; -+ } -+ return _MALI_OSK_ERR_OK; -+ } ++ if (ret != 0) { ++ MALI_DEBUG_PRINT(1, ("ret != 0\n")); ++ return -EFAULT; + } ++out: ++ MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == mali_alloc->magic); + -+ return _MALI_OSK_ERR_ITEM_NOT_FOUND; ++ vma->vm_private_data = (void *)mali_alloc; ++ mali_alloc->cpu_mapping.vma = vma; ++ ++ mali_allocation_ref(mali_alloc); ++ ++ return 0; +} + -+uintptr_t _mali_osk_resource_base_address(void) ++_mali_osk_errcode_t mali_mem_mali_map_prepare(mali_mem_allocation *descriptor) +{ -+ struct resource *reg_res = NULL; -+ uintptr_t ret = 0; ++ u32 size = descriptor->psize; ++ struct mali_session_data *session = descriptor->session; + -+ reg_res = platform_get_resource(mali_platform_device, IORESOURCE_MEM, 0); ++ MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == descriptor->magic); + -+ if (NULL != reg_res) { -+ ret = reg_res->start; ++ /* Map dma-buf into this session's page tables */ ++ ++ if (descriptor->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) { ++ size += MALI_MMU_PAGE_SIZE; + } + -+ return ret; ++ return mali_mmu_pagedir_map(session->page_directory, descriptor->mali_vma_node.vm_node.start, size); +} + -+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size) ++_mali_osk_errcode_t mali_mem_mali_map_resize(mali_mem_allocation *descriptor, u32 new_size) +{ -+ struct device_node *node = mali_platform_device->dev.of_node; -+ int length = 0, i = 0; -+ u32 u; -+ -+ MALI_DEBUG_PRINT(2, ("Get pmu config from device tree configuration.\n")); ++ u32 old_size = descriptor->psize; ++ struct mali_session_data *session = descriptor->session; + -+ MALI_DEBUG_ASSERT(NULL != node); ++ MALI_DEBUG_ASSERT(MALI_MEM_ALLOCATION_VALID_MAGIC == descriptor->magic); + -+ if (!of_get_property(node, "pmu_domain_config", &length)) { -+ return; ++ if (descriptor->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) { ++ new_size += MALI_MMU_PAGE_SIZE; + } + -+ if (array_size != length / sizeof(u32)) { -+ MALI_PRINT_ERROR(("Wrong pmu domain config in device tree.")); -+ return; ++ if (new_size > old_size) { ++ MALI_DEBUG_ASSERT(new_size <= descriptor->mali_vma_node.vm_node.size); ++ return mali_mmu_pagedir_map(session->page_directory, descriptor->mali_vma_node.vm_node.start + old_size, new_size - old_size); + } ++ return _MALI_OSK_ERR_OK; ++} + -+ of_property_for_each_u32(node, "pmu_domain_config", u) { -+ domain_config_array[i] = (u16)u; -+ i++; ++void mali_mem_mali_map_free(struct mali_session_data *session, u32 size, mali_address_t vaddr, u32 flags) ++{ ++ if (flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) { ++ size += MALI_MMU_PAGE_SIZE; + } + -+ return; ++ /* Umap and flush L2 */ ++ mali_mmu_pagedir_unmap(session->page_directory, vaddr, size); ++ mali_executor_zap_all_active(session); +} + -+u32 _mali_osk_get_pmu_switch_delay(void) ++u32 _mali_ukk_report_memory_usage(void) +{ -+ struct device_node *node = mali_platform_device->dev.of_node; -+ u32 switch_delay; -+ -+ MALI_DEBUG_ASSERT(NULL != node); ++ u32 sum = 0; + -+ if (0 == of_property_read_u32(node, "pmu_switch_delay", &switch_delay)) { -+ return switch_delay; -+ } else { -+ MALI_DEBUG_PRINT(2, ("Couldn't find pmu_switch_delay in device tree configuration.\n")); ++ if (MALI_TRUE == mali_memory_have_dedicated_memory()) { ++ sum += mali_mem_block_allocator_stat(); + } + -+ return 0; -+} ++ sum += mali_mem_os_stat(); + -+#else /* CONFIG_MALI_DT */ /* 若未 定义 CONFIG_MALI_DT. */ ++ return sum; ++} + -+_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res) ++u32 _mali_ukk_report_total_memory_size(void) +{ -+ int i; -+ uintptr_t phys_addr; ++ return mali_dedicated_mem_size + mali_shared_mem_size; ++} + -+ if (NULL == mali_platform_device) { -+ /* Not connected to a device */ -+ return _MALI_OSK_ERR_ITEM_NOT_FOUND; -+ } + -+ phys_addr = addr + _mali_osk_resource_base_address(); -+ for (i = 0; i < mali_platform_device->num_resources; i++) { -+ if (IORESOURCE_MEM == resource_type(&(mali_platform_device->resource[i])) && -+ mali_platform_device->resource[i].start == phys_addr) { -+ if (NULL != res) { -+ res->base = phys_addr; -+ res->description = mali_platform_device->resource[i].name; ++/** ++ * Per-session memory descriptor mapping table sizes ++ */ ++#define MALI_MEM_DESCRIPTORS_INIT 64 ++#define MALI_MEM_DESCRIPTORS_MAX 65536 + -+ /* Any (optional) IRQ resource belonging to this resource will follow */ -+ if ((i + 1) < mali_platform_device->num_resources && -+ IORESOURCE_IRQ == resource_type(&(mali_platform_device->resource[i + 1]))) { -+ res->irq = mali_platform_device->resource[i + 1].start; -+ } else { -+ res->irq = -1; -+ } -+ } -+ return _MALI_OSK_ERR_OK; -+ } -+ } ++_mali_osk_errcode_t mali_memory_session_begin(struct mali_session_data *session_data) ++{ ++ MALI_DEBUG_PRINT(5, ("Memory session begin\n")); + -+ return _MALI_OSK_ERR_ITEM_NOT_FOUND; -+} ++ session_data->memory_lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED, ++ _MALI_OSK_LOCK_ORDER_MEM_SESSION); + -+uintptr_t _mali_osk_resource_base_address(void) -+{ -+ uintptr_t lowest_addr = (uintptr_t)(0 - 1); -+ uintptr_t ret = 0; ++ if (NULL == session_data->memory_lock) { ++ MALI_ERROR(_MALI_OSK_ERR_FAULT); ++ } + -+ if (NULL != mali_platform_device) { -+ int i; -+ for (i = 0; i < mali_platform_device->num_resources; i++) { -+ if (mali_platform_device->resource[i].flags & IORESOURCE_MEM && -+ mali_platform_device->resource[i].start < lowest_addr) { -+ lowest_addr = mali_platform_device->resource[i].start; -+ ret = lowest_addr; -+ } -+ } ++ session_data->cow_lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_UNORDERED, 0); ++ if (NULL == session_data->cow_lock) { ++ _mali_osk_mutex_term(session_data->memory_lock); ++ MALI_ERROR(_MALI_OSK_ERR_FAULT); + } + -+ return ret; ++ mali_memory_manager_init(&session_data->allocation_mgr); ++ ++ MALI_DEBUG_PRINT(5, ("MMU session begin: success\n")); ++ MALI_SUCCESS; +} + -+void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size) ++void mali_memory_session_end(struct mali_session_data *session) +{ -+ _mali_osk_device_data data = { 0, }; ++ MALI_DEBUG_PRINT(3, ("MMU session end\n")); + -+ MALI_DEBUG_PRINT(2, ("Get pmu config from platform device data.\n")); -+ if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) { -+ /* Copy the custom customer power domain config */ -+ _mali_osk_memcpy(domain_config_array, data.pmu_domain_config, sizeof(data.pmu_domain_config)); ++ if (NULL == session) { ++ MALI_DEBUG_PRINT(1, ("No session data found during session end\n")); ++ return; + } ++ /* free allocation */ ++ mali_free_session_allocations(session); ++ /* do some check in unint*/ ++ mali_memory_manager_uninit(&session->allocation_mgr); + ++ /* Free the lock */ ++ _mali_osk_mutex_term(session->memory_lock); ++ _mali_osk_mutex_term(session->cow_lock); + return; +} + -+u32 _mali_osk_get_pmu_switch_delay(void) ++_mali_osk_errcode_t mali_memory_initialize(void) +{ + _mali_osk_errcode_t err; -+ _mali_osk_device_data data = { 0, }; + -+ err = _mali_osk_device_data_get(&data); ++ idr_init(&mali_backend_idr); ++ mutex_init(&mali_idr_mutex); + ++ err = mali_mem_swap_init(); ++ if (err != _MALI_OSK_ERR_OK) { ++ return err; ++ } ++ err = mali_mem_os_init(); + if (_MALI_OSK_ERR_OK == err) { -+ return data.pmu_switch_delay; ++ err = mali_mem_defer_bind_manager_init(); + } + -+ return 0; ++ return err; +} -+#endif /* CONFIG_MALI_DT */ + -+_mali_osk_errcode_t _mali_osk_device_data_get(_mali_osk_device_data *data) ++void mali_memory_terminate(void) +{ -+ MALI_DEBUG_ASSERT_POINTER(data); -+ -+ if (NULL != mali_platform_device) { -+ struct mali_gpu_device_data *os_data = NULL; -+ -+ os_data = (struct mali_gpu_device_data *)mali_platform_device->dev.platform_data; -+ if (NULL != os_data) { -+ /* Copy data from OS dependant struct to Mali neutral struct (identical!) */ -+ BUILD_BUG_ON(sizeof(*os_data) != sizeof(*data)); -+ _mali_osk_memcpy(data, os_data, sizeof(*os_data)); -+ -+ return _MALI_OSK_ERR_OK; -+ } ++ mali_mem_swap_term(); ++ mali_mem_defer_bind_manager_destory(); ++ mali_mem_os_term(); ++ if (mali_memory_have_dedicated_memory()) { ++ mali_mem_block_allocator_destroy(); + } -+ -+ return _MALI_OSK_ERR_ITEM_NOT_FOUND; +} + -+u32 _mali_osk_identify_gpu_resource(void) -+{ -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_L2_RESOURCE1, NULL)) -+ /* Mali 450 */ -+ return 0x450; -+ -+ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_DLBU, NULL)) -+ /* Mali 470 */ -+ return 0x470; -+ -+ /* Mali 400 */ -+ return 0x400; -+} + -+mali_bool _mali_osk_shared_interrupts(void) ++struct mali_page_node *_mali_page_node_allocate(mali_page_node_type type) +{ -+ u32 irqs[128]; -+ u32 i, j, irq, num_irqs_found = 0; -+ -+ MALI_DEBUG_ASSERT_POINTER(mali_platform_device); -+ MALI_DEBUG_ASSERT(128 >= mali_platform_device->num_resources); -+ -+ for (i = 0; i < mali_platform_device->num_resources; i++) { -+ if (IORESOURCE_IRQ & mali_platform_device->resource[i].flags) { -+ irq = mali_platform_device->resource[i].start; ++ mali_page_node *page_node = NULL; + -+ for (j = 0; j < num_irqs_found; ++j) { -+ if (irq == irqs[j]) { -+ return MALI_TRUE; -+ } -+ } ++ page_node = kzalloc(sizeof(mali_page_node), GFP_KERNEL); ++ MALI_DEBUG_ASSERT(NULL != page_node); + -+ irqs[num_irqs_found++] = irq; -+ } ++ if (page_node) { ++ page_node->type = type; ++ INIT_LIST_HEAD(&page_node->list); + } + -+ return MALI_FALSE; ++ return page_node; +} + -+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_init(void) ++void _mali_page_node_ref(struct mali_page_node *node) +{ -+ _mali_osk_device_data data = { 0, }; -+ -+ if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) { -+ if ((NULL != data.secure_mode_init) && (NULL != data.secure_mode_deinit) -+ && (NULL != data.gpu_reset_and_secure_mode_enable) && (NULL != data.gpu_reset_and_secure_mode_disable)) { -+ int err = data.secure_mode_init(); -+ if (err) { -+ MALI_DEBUG_PRINT(1, ("Failed to init gpu secure mode.\n")); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ -+ mali_secure_mode_deinit = data.secure_mode_deinit; -+ mali_gpu_reset_and_secure_mode_enable = data.gpu_reset_and_secure_mode_enable; -+ mali_gpu_reset_and_secure_mode_disable = data.gpu_reset_and_secure_mode_disable; -+ -+ mali_secure_mode_supported = MALI_TRUE; -+ mali_secure_mode_enabled = MALI_FALSE; -+ return _MALI_OSK_ERR_OK; -+ } ++ if (node->type == MALI_PAGE_NODE_OS) { ++ /* add ref to this page */ ++ get_page(node->page); ++ } else if (node->type == MALI_PAGE_NODE_BLOCK) { ++ mali_mem_block_add_ref(node); ++ } else if (node->type == MALI_PAGE_NODE_SWAP) { ++ atomic_inc(&node->swap_it->ref_count); ++ } else { ++ MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n")); + } -+ MALI_DEBUG_PRINT(3, ("GPU secure mode not supported.\n")); -+ return _MALI_OSK_ERR_UNSUPPORTED; -+ +} + -+_mali_osk_errcode_t _mali_osk_gpu_secure_mode_deinit(void) ++void _mali_page_node_unref(struct mali_page_node *node) +{ -+ if (NULL != mali_secure_mode_deinit) { -+ mali_secure_mode_deinit(); -+ mali_secure_mode_enabled = MALI_FALSE; -+ mali_secure_mode_supported = MALI_FALSE; -+ return _MALI_OSK_ERR_OK; ++ if (node->type == MALI_PAGE_NODE_OS) { ++ /* unref to this page */ ++ put_page(node->page); ++ } else if (node->type == MALI_PAGE_NODE_BLOCK) { ++ mali_mem_block_dec_ref(node); ++ } else { ++ MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n")); + } -+ MALI_DEBUG_PRINT(3, ("GPU secure mode not supported.\n")); -+ return _MALI_OSK_ERR_UNSUPPORTED; -+ +} + + -+_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_enable(void) ++void _mali_page_node_add_page(struct mali_page_node *node, struct page *page) +{ -+ /* the mali executor lock must be held before enter this function. */ ++ MALI_DEBUG_ASSERT(MALI_PAGE_NODE_OS == node->type); ++ node->page = page; ++} + -+ MALI_DEBUG_ASSERT(MALI_FALSE == mali_secure_mode_enabled); + -+ if (NULL != mali_gpu_reset_and_secure_mode_enable) { -+ if (mali_gpu_reset_and_secure_mode_enable()) { -+ MALI_DEBUG_PRINT(1, ("Failed to reset GPU or enable gpu secure mode.\n")); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ mali_secure_mode_enabled = MALI_TRUE; -+ return _MALI_OSK_ERR_OK; -+ } -+ MALI_DEBUG_PRINT(1, ("GPU secure mode not supported.\n")); -+ return _MALI_OSK_ERR_UNSUPPORTED; ++void _mali_page_node_add_swap_item(struct mali_page_node *node, struct mali_swap_item *item) ++{ ++ MALI_DEBUG_ASSERT(MALI_PAGE_NODE_SWAP == node->type); ++ node->swap_it = item; +} + -+_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_disable(void) ++void _mali_page_node_add_block_item(struct mali_page_node *node, mali_block_item *item) +{ -+ /* the mali executor lock must be held before enter this function. */ -+ -+ MALI_DEBUG_ASSERT(MALI_TRUE == mali_secure_mode_enabled); -+ -+ if (NULL != mali_gpu_reset_and_secure_mode_disable) { -+ if (mali_gpu_reset_and_secure_mode_disable()) { -+ MALI_DEBUG_PRINT(1, ("Failed to reset GPU or disable gpu secure mode.\n")); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ mali_secure_mode_enabled = MALI_FALSE; ++ MALI_DEBUG_ASSERT(MALI_PAGE_NODE_BLOCK == node->type); ++ node->blk_it = item; ++} + -+ return _MALI_OSK_ERR_OK; + ++int _mali_page_node_get_ref_count(struct mali_page_node *node) ++{ ++ if (node->type == MALI_PAGE_NODE_OS) { ++ /* get ref count of this page */ ++ return page_count(node->page); ++ } else if (node->type == MALI_PAGE_NODE_BLOCK) { ++ return mali_mem_block_get_ref_count(node); ++ } else if (node->type == MALI_PAGE_NODE_SWAP) { ++ return atomic_read(&node->swap_it->ref_count); ++ } else { ++ MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n")); + } -+ MALI_DEBUG_PRINT(1, ("GPU secure mode not supported.\n")); -+ return _MALI_OSK_ERR_UNSUPPORTED; -+ ++ return -1; +} + -+mali_bool _mali_osk_gpu_secure_mode_is_enabled(void) ++ ++dma_addr_t _mali_page_node_get_dma_addr(struct mali_page_node *node) +{ -+ return mali_secure_mode_enabled; ++ if (node->type == MALI_PAGE_NODE_OS) { ++ return page_private(node->page); ++ } else if (node->type == MALI_PAGE_NODE_BLOCK) { ++ return _mali_blk_item_get_phy_addr(node->blk_it); ++ } else if (node->type == MALI_PAGE_NODE_SWAP) { ++ return node->swap_it->dma_addr; ++ } else { ++ MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n")); ++ } ++ return 0; +} + -+mali_bool _mali_osk_gpu_secure_mode_is_supported(void) ++ ++unsigned long _mali_page_node_get_pfn(struct mali_page_node *node) +{ -+ return mali_secure_mode_supported; ++ if (node->type == MALI_PAGE_NODE_OS) { ++ return page_to_pfn(node->page); ++ } else if (node->type == MALI_PAGE_NODE_BLOCK) { ++ /* get phy addr for BLOCK page*/ ++ return _mali_blk_item_get_pfn(node->blk_it); ++ } else if (node->type == MALI_PAGE_NODE_SWAP) { ++ return page_to_pfn(node->swap_it->page); ++ } else { ++ MALI_DEBUG_PRINT_ERROR(("Invalid type of mali page node! \n")); ++ } ++ return 0; +} + + -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_math.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_math.c +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory.h new file mode 100644 -index 000000000..0b2d00762 +index 000000000..efebbef23 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_math.c -@@ -0,0 +1,27 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory.h +@@ -0,0 +1,143 @@ +/* -+ * Copyright (C) 2010, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -319648,98 +320750,147 @@ index 000000000..0b2d00762 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+/** -+ * @file mali_osk_math.c -+ * Implementation of the OS abstraction layer for the kernel device driver -+ */ ++#ifndef __MALI_MEMORY_H__ ++#define __MALI_MEMORY_H__ + +#include "mali_osk.h" -+#include ++#include "mali_session.h" + -+u32 _mali_osk_clz(u32 input) ++#include ++#include ++ ++#include "mali_memory_types.h" ++#include "mali_memory_os_alloc.h" ++ ++_mali_osk_errcode_t mali_memory_initialize(void); ++void mali_memory_terminate(void); ++ ++/** @brief Allocate a page table page ++ * ++ * Allocate a page for use as a page directory or page table. The page is ++ * mapped into kernel space. ++ * ++ * @return _MALI_OSK_ERR_OK on success, otherwise an error code ++ * @param table_page GPU pointer to the allocated page ++ * @param mapping CPU pointer to the mapping of the allocated page ++ */ ++MALI_STATIC_INLINE _mali_osk_errcode_t ++mali_mmu_get_table_page(mali_dma_addr *table_page, mali_io_address *mapping) +{ -+ return 32 - fls(input); ++ return mali_mem_os_get_table_page(table_page, mapping); +} + -+u32 _mali_osk_fls(u32 input) ++/** @brief Release a page table page ++ * ++ * Release a page table page allocated through \a mali_mmu_get_table_page ++ * ++ * @param pa the GPU address of the page to release ++ */ ++MALI_STATIC_INLINE void ++mali_mmu_release_table_page(mali_dma_addr phys, void *virt) +{ -+ return fls(input); ++ mali_mem_os_release_table_page(phys, virt); +} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_memory.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_memory.c -new file mode 100644 -index 000000000..174616b56 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_memory.c -@@ -0,0 +1,61 @@ -+/* -+ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ ++/** @brief mmap function ++ * ++ * mmap syscalls on the Mali device node will end up here. ++ * ++ * This function allocates Mali memory and maps it on CPU and Mali. + */ ++int mali_mmap(struct file *filp, struct vm_area_struct *vma); + -+/** -+ * @file mali_osk_memory.c -+ * Implementation of the OS abstraction layer for the kernel device driver ++/** @brief Start a new memory session ++ * ++ * Called when a process opens the Mali device node. ++ * ++ * @param session Pointer to session to initialize + */ ++_mali_osk_errcode_t mali_memory_session_begin(struct mali_session_data *session); + -+#include "mali_osk.h" -+#include -+#include ++/** @brief Close a memory session ++ * ++ * Called when a process closes the Mali device node. ++ * ++ * Memory allocated by the session will be freed ++ * ++ * @param session Pointer to the session to terminate ++ */ ++void mali_memory_session_end(struct mali_session_data *session); + -+void inline *_mali_osk_calloc(u32 n, u32 size) -+{ -+ return kcalloc(n, size, GFP_KERNEL); -+} ++/** @brief Prepare Mali page tables for mapping ++ * ++ * This function will prepare the Mali page tables for mapping the memory ++ * described by \a descriptor. ++ * ++ * Page tables will be reference counted and allocated, if not yet present. ++ * ++ * @param descriptor Pointer to the memory descriptor to the mapping ++ */ ++_mali_osk_errcode_t mali_mem_mali_map_prepare(mali_mem_allocation *descriptor); + -+void inline *_mali_osk_malloc(u32 size) -+{ -+ return kmalloc(size, GFP_KERNEL); -+} ++/** @brief Resize Mali page tables for mapping ++ * ++ * This function will Resize the Mali page tables for mapping the memory ++ * described by \a descriptor. ++ * ++ * Page tables will be reference counted and allocated, if not yet present. ++ * ++ * @param descriptor Pointer to the memory descriptor to the mapping ++ * @param new_size The new size of descriptor ++ */ ++_mali_osk_errcode_t mali_mem_mali_map_resize(mali_mem_allocation *descriptor, u32 new_size); + -+void inline _mali_osk_free(void *ptr) -+{ -+ kfree(ptr); -+} ++/** @brief Free Mali page tables for mapping ++ * ++ * This function will unmap pages from Mali memory and free the page tables ++ * that are now unused. ++ * ++ * The updated pages in the Mali L2 cache will be invalidated, and the MMU TLBs will be zapped if necessary. ++ * ++ * @param descriptor Pointer to the memory descriptor to unmap ++ */ ++void mali_mem_mali_map_free(struct mali_session_data *session, u32 size, mali_address_t vaddr, u32 flags); + -+void inline *_mali_osk_valloc(u32 size) -+{ -+ return vmalloc(size); -+} ++/** @brief Parse resource and prepare the OS memory allocator ++ * ++ * @param size Maximum size to allocate for Mali GPU. ++ * @return _MALI_OSK_ERR_OK on success, otherwise failure. ++ */ ++_mali_osk_errcode_t mali_memory_core_resource_os_memory(u32 size); + -+void inline _mali_osk_vfree(void *ptr) -+{ -+ vfree(ptr); -+} ++/** @brief Parse resource and prepare the dedicated memory allocator ++ * ++ * @param start Physical start address of dedicated Mali GPU memory. ++ * @param size Size of dedicated Mali GPU memory. ++ * @return _MALI_OSK_ERR_OK on success, otherwise failure. ++ */ ++_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size); + -+void inline *_mali_osk_memcpy(void *dst, const void *src, u32 len) -+{ -+ return memcpy(dst, src, len); -+} + -+void inline *_mali_osk_memset(void *s, u32 c, u32 n) -+{ -+ return memset(s, c, n); -+} ++struct mali_page_node *_mali_page_node_allocate(mali_page_node_type type); + -+mali_bool _mali_osk_mem_check_allocated(u32 max_allocated) -+{ -+ /* No need to prevent an out-of-memory dialogue appearing on Linux, -+ * so we always return MALI_TRUE. -+ */ -+ return MALI_TRUE; -+} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_misc.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_misc.c ++void _mali_page_node_ref(struct mali_page_node *node); ++void _mali_page_node_unref(struct mali_page_node *node); ++void _mali_page_node_add_page(struct mali_page_node *node, struct page *page); ++ ++void _mali_page_node_add_block_item(struct mali_page_node *node, mali_block_item *item); ++ ++void _mali_page_node_add_swap_item(struct mali_page_node *node, struct mali_swap_item *item); ++ ++int _mali_page_node_get_ref_count(struct mali_page_node *node); ++dma_addr_t _mali_page_node_get_dma_addr(struct mali_page_node *node); ++unsigned long _mali_page_node_get_pfn(struct mali_page_node *node); ++ ++#endif /* __MALI_MEMORY_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_block_alloc.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_block_alloc.c new file mode 100644 -index 000000000..9845187f8 +index 000000000..bccef3576 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_misc.c -@@ -0,0 +1,81 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_block_alloc.c +@@ -0,0 +1,362 @@ +/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -319748,273 +320899,366 @@ index 000000000..9845187f8 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+/** -+ * @file mali_osk_misc.c -+ * Implementation of the OS abstraction layer for the kernel device driver -+ */ -+#include -+#include -+#include -+#include -+#include -+#include ++#include "mali_kernel_common.h" ++#include "mali_memory.h" ++#include "mali_memory_block_alloc.h" +#include "mali_osk.h" ++#include + -+#if !defined(CONFIG_MALI_QUIET) -+void _mali_osk_dbgmsg(const char *fmt, ...) -+{ -+ va_list args; -+ va_start(args, fmt); -+ vprintk(fmt, args); -+ va_end(args); -+} -+#endif /* !defined(CONFIG_MALI_QUIET) */ -+ -+u32 _mali_osk_snprintf(char *buf, u32 size, const char *fmt, ...) -+{ -+ int res; -+ va_list args; -+ va_start(args, fmt); + -+ res = vscnprintf(buf, (size_t)size, fmt, args); ++static mali_block_allocator *mali_mem_block_gobal_allocator = NULL; + -+ va_end(args); -+ return res; ++unsigned long _mali_blk_item_get_phy_addr(mali_block_item *item) ++{ ++ return (item->phy_addr & ~(MALI_BLOCK_REF_MASK)); +} + -+void _mali_osk_abort(void) ++ ++unsigned long _mali_blk_item_get_pfn(mali_block_item *item) +{ -+ /* make a simple fault by dereferencing a NULL pointer */ -+ dump_stack(); -+ *(volatile int *)0 = 0; ++ return (item->phy_addr / MALI_BLOCK_SIZE); +} + -+void _mali_osk_break(void) ++ ++u32 mali_mem_block_get_ref_count(mali_page_node *node) +{ -+ _mali_osk_abort(); ++ MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK); ++ return (node->blk_it->phy_addr & MALI_BLOCK_REF_MASK); +} + -+u32 _mali_osk_get_pid(void) ++ ++/* Increase the refence count ++* It not atomic, so it need to get sp_lock before call this function ++*/ ++ ++u32 mali_mem_block_add_ref(mali_page_node *node) +{ -+ /* Thread group ID is the process ID on Linux */ -+ return (u32)current->tgid; ++ MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK); ++ MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(node) < MALI_BLOCK_MAX_REF_COUNT); ++ return (node->blk_it->phy_addr++ & MALI_BLOCK_REF_MASK); +} + -+char *_mali_osk_get_comm(void) ++/* Decase the refence count ++* It not atomic, so it need to get sp_lock before call this function ++*/ ++u32 mali_mem_block_dec_ref(mali_page_node *node) +{ -+ return (char *)current->comm; ++ MALI_DEBUG_ASSERT(node->type == MALI_PAGE_NODE_BLOCK); ++ MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(node) > 0); ++ return (node->blk_it->phy_addr-- & MALI_BLOCK_REF_MASK); +} + + -+u32 _mali_osk_get_tid(void) ++static mali_block_allocator *mali_mem_block_allocator_create(u32 base_address, u32 size) +{ -+ /* pid is actually identifying the thread on Linux */ -+ u32 tid = current->pid; ++ mali_block_allocator *info; ++ u32 usable_size; ++ u32 num_blocks; ++ mali_page_node *m_node; ++ mali_block_item *mali_blk_items = NULL; ++ int i = 0; + -+ /* If the pid is 0 the core was idle. Instead of returning 0 we return a special number -+ * identifying which core we are on. */ -+ if (0 == tid) { -+ tid = -(1 + raw_smp_processor_id()); ++ usable_size = size & ~(MALI_BLOCK_SIZE - 1); ++ MALI_DEBUG_PRINT(3, ("Mali block allocator create for region starting at 0x%08X length 0x%08X\n", base_address, size)); ++ MALI_DEBUG_PRINT(4, ("%d usable bytes\n", usable_size)); ++ num_blocks = usable_size / MALI_BLOCK_SIZE; ++ MALI_DEBUG_PRINT(4, ("which becomes %d blocks\n", num_blocks)); ++ ++ if (usable_size == 0) { ++ MALI_DEBUG_PRINT(1, ("Memory block of size %d is unusable\n", size)); ++ return NULL; + } + -+ return tid; ++ info = _mali_osk_calloc(1, sizeof(mali_block_allocator)); ++ if (NULL != info) { ++ INIT_LIST_HEAD(&info->free); ++ spin_lock_init(&info->sp_lock); ++ info->total_num = num_blocks; ++ mali_blk_items = _mali_osk_calloc(1, sizeof(mali_block_item) * num_blocks); ++ ++ if (mali_blk_items) { ++ info->items = mali_blk_items; ++ /* add blocks(4k size) to free list*/ ++ for (i = 0 ; i < num_blocks ; i++) { ++ /* add block information*/ ++ mali_blk_items[i].phy_addr = base_address + (i * MALI_BLOCK_SIZE); ++ /* add to free list */ ++ m_node = _mali_page_node_allocate(MALI_PAGE_NODE_BLOCK); ++ if (m_node == NULL) ++ goto fail; ++ _mali_page_node_add_block_item(m_node, &(mali_blk_items[i])); ++ list_add_tail(&m_node->list, &info->free); ++ atomic_add(1, &info->free_num); ++ } ++ return info; ++ } ++ } ++fail: ++ mali_mem_block_allocator_destroy(); ++ return NULL; +} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_notification.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_notification.c -new file mode 100644 -index 000000000..a05f8f066 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_notification.c -@@ -0,0 +1,182 @@ -+/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ + -+/** -+ * @file mali_osk_notification.c -+ * Implementation of the OS abstraction layer for the kernel device driver -+ */ ++void mali_mem_block_allocator_destroy(void) ++{ ++ struct mali_page_node *m_page, *m_tmp; ++ mali_block_allocator *info = mali_mem_block_gobal_allocator; ++ MALI_DEBUG_ASSERT_POINTER(info); ++ MALI_DEBUG_PRINT(4, ("Memory block destroy !\n")); + -+#include "mali_osk.h" -+#include "mali_kernel_common.h" ++ if (NULL == info) ++ return; + -+#include -+#include -+#include ++ list_for_each_entry_safe(m_page, m_tmp , &info->free, list) { ++ MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK); ++ list_del(&m_page->list); ++ kfree(m_page); ++ } + -+/** -+ * Declaration of the notification queue object type -+ * Contains a linked list of notification pending delivery to user space. -+ * It also contains a wait queue of exclusive waiters blocked in the ioctl -+ * When a new notification is posted a single thread is resumed. -+ */ -+struct _mali_osk_notification_queue_t_struct { -+ spinlock_t mutex; /**< Mutex protecting the list */ -+ wait_queue_head_t receive_queue; /**< Threads waiting for new entries to the queue */ -+ struct list_head head; /**< List of notifications waiting to be picked up */ -+}; ++ _mali_osk_free(info->items); ++ _mali_osk_free(info); ++} + -+typedef struct _mali_osk_notification_wrapper_t_struct { -+ struct list_head list; /**< Internal linked list variable */ -+ _mali_osk_notification_t data; /**< Notification data */ -+} _mali_osk_notification_wrapper_t; ++u32 mali_mem_block_release(mali_mem_backend *mem_bkend) ++{ ++ mali_mem_allocation *alloc = mem_bkend->mali_allocation; ++ u32 free_pages_nr = 0; ++ MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_BLOCK); + -+_mali_osk_notification_queue_t *_mali_osk_notification_queue_init(void) ++ /* Unmap the memory from the mali virtual address space. */ ++ mali_mem_block_mali_unmap(alloc); ++ mutex_lock(&mem_bkend->mutex); ++ free_pages_nr = mali_mem_block_free(&mem_bkend->block_mem); ++ mutex_unlock(&mem_bkend->mutex); ++ return free_pages_nr; ++} ++ ++ ++int mali_mem_block_alloc(mali_mem_block_mem *block_mem, u32 size) +{ -+ _mali_osk_notification_queue_t *result; ++ struct mali_page_node *m_page, *m_tmp; ++ size_t page_count = PAGE_ALIGN(size) / _MALI_OSK_MALI_PAGE_SIZE; ++ mali_block_allocator *info = mali_mem_block_gobal_allocator; ++ MALI_DEBUG_ASSERT_POINTER(info); + -+ result = (_mali_osk_notification_queue_t *)kmalloc(sizeof(_mali_osk_notification_queue_t), GFP_KERNEL); -+ if (NULL == result) return NULL; ++ MALI_DEBUG_PRINT(4, ("BLOCK Mem: Allocate size = 0x%x\n", size)); ++ /*do some init */ ++ INIT_LIST_HEAD(&block_mem->pfns); + -+ spin_lock_init(&result->mutex); -+ init_waitqueue_head(&result->receive_queue); -+ INIT_LIST_HEAD(&result->head); ++ spin_lock(&info->sp_lock); ++ /*check if have enough space*/ ++ if (atomic_read(&info->free_num) > page_count) { ++ list_for_each_entry_safe(m_page, m_tmp , &info->free, list) { ++ if (page_count > 0) { ++ MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK); ++ MALI_DEBUG_ASSERT(mali_mem_block_get_ref_count(m_page) == 0); ++ list_move(&m_page->list, &block_mem->pfns); ++ block_mem->count++; ++ atomic_dec(&info->free_num); ++ _mali_page_node_ref(m_page); ++ } else { ++ break; ++ } ++ page_count--; ++ } ++ } else { ++ /* can't allocate from BLOCK memory*/ ++ spin_unlock(&info->sp_lock); ++ return -1; ++ } + -+ return result; ++ spin_unlock(&info->sp_lock); ++ return 0; +} + -+_mali_osk_notification_t *_mali_osk_notification_create(u32 type, u32 size) ++u32 mali_mem_block_free(mali_mem_block_mem *block_mem) +{ -+ /* OPT Recycling of notification objects */ -+ _mali_osk_notification_wrapper_t *notification; ++ u32 free_pages_nr = 0; + -+ notification = (_mali_osk_notification_wrapper_t *)kmalloc(sizeof(_mali_osk_notification_wrapper_t) + size, -+ GFP_KERNEL | __GFP_HIGH | __GFP_RETRY_MAYFAIL); -+ if (NULL == notification) { -+ MALI_DEBUG_PRINT(1, ("Failed to create a notification object\n")); -+ return NULL; -+ } ++ free_pages_nr = mali_mem_block_free_list(&block_mem->pfns); ++ MALI_DEBUG_PRINT(4, ("BLOCK Mem free : allocated size = 0x%x, free size = 0x%x\n", block_mem->count * _MALI_OSK_MALI_PAGE_SIZE, ++ free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE)); ++ block_mem->count = 0; ++ MALI_DEBUG_ASSERT(list_empty(&block_mem->pfns)); + -+ /* Init the list */ -+ INIT_LIST_HEAD(¬ification->list); ++ return free_pages_nr; ++} + -+ if (0 != size) { -+ notification->data.result_buffer = ((u8 *)notification) + sizeof(_mali_osk_notification_wrapper_t); -+ } else { -+ notification->data.result_buffer = NULL; ++ ++u32 mali_mem_block_free_list(struct list_head *list) ++{ ++ struct mali_page_node *m_page, *m_tmp; ++ mali_block_allocator *info = mali_mem_block_gobal_allocator; ++ u32 free_pages_nr = 0; ++ ++ if (info) { ++ spin_lock(&info->sp_lock); ++ list_for_each_entry_safe(m_page, m_tmp , list, list) { ++ if (1 == _mali_page_node_get_ref_count(m_page)) { ++ free_pages_nr++; ++ } ++ mali_mem_block_free_node(m_page); ++ } ++ spin_unlock(&info->sp_lock); + } ++ return free_pages_nr; ++} + -+ /* set up the non-allocating fields */ -+ notification->data.notification_type = type; -+ notification->data.result_buffer_size = size; ++/* free the node,*/ ++void mali_mem_block_free_node(struct mali_page_node *node) ++{ ++ mali_block_allocator *info = mali_mem_block_gobal_allocator; + -+ /* all ok */ -+ return &(notification->data); ++ /* only handle BLOCK node */ ++ if (node->type == MALI_PAGE_NODE_BLOCK && info) { ++ /*Need to make this atomic?*/ ++ if (1 == _mali_page_node_get_ref_count(node)) { ++ /*Move to free list*/ ++ _mali_page_node_unref(node); ++ list_move_tail(&node->list, &info->free); ++ atomic_add(1, &info->free_num); ++ } else { ++ _mali_page_node_unref(node); ++ list_del(&node->list); ++ kfree(node); ++ } ++ } +} + -+void _mali_osk_notification_delete(_mali_osk_notification_t *object) ++/* unref the node, but not free it */ ++_mali_osk_errcode_t mali_mem_block_unref_node(struct mali_page_node *node) +{ -+ _mali_osk_notification_wrapper_t *notification; -+ MALI_DEBUG_ASSERT_POINTER(object); ++ mali_block_allocator *info = mali_mem_block_gobal_allocator; ++ mali_page_node *new_node; + -+ notification = container_of(object, _mali_osk_notification_wrapper_t, data); ++ /* only handle BLOCK node */ ++ if (node->type == MALI_PAGE_NODE_BLOCK && info) { ++ /*Need to make this atomic?*/ ++ if (1 == _mali_page_node_get_ref_count(node)) { ++ /* allocate a new node, Add to free list, keep the old node*/ ++ _mali_page_node_unref(node); ++ new_node = _mali_page_node_allocate(MALI_PAGE_NODE_BLOCK); ++ if (new_node) { ++ memcpy(new_node, node, sizeof(mali_page_node)); ++ list_add(&new_node->list, &info->free); ++ atomic_add(1, &info->free_num); ++ } else ++ return _MALI_OSK_ERR_FAULT; + -+ /* Free the container */ -+ kfree(notification); ++ } else { ++ _mali_page_node_unref(node); ++ } ++ } ++ return _MALI_OSK_ERR_OK; +} + -+void _mali_osk_notification_queue_term(_mali_osk_notification_queue_t *queue) ++ ++int mali_mem_block_mali_map(mali_mem_block_mem *block_mem, struct mali_session_data *session, u32 vaddr, u32 props) +{ -+ _mali_osk_notification_t *result; -+ MALI_DEBUG_ASSERT_POINTER(queue); ++ struct mali_page_directory *pagedir = session->page_directory; ++ struct mali_page_node *m_page; ++ dma_addr_t phys; ++ u32 virt = vaddr; ++ u32 prop = props; + -+ while (_MALI_OSK_ERR_OK == _mali_osk_notification_queue_dequeue(queue, &result)) { -+ _mali_osk_notification_delete(result); ++ list_for_each_entry(m_page, &block_mem->pfns, list) { ++ MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK); ++ phys = _mali_page_node_get_dma_addr(m_page); ++#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) ++ /* Verify that the "physical" address is 32-bit and ++ * usable for Mali, when on a system with bus addresses ++ * wider than 32-bit. */ ++ MALI_DEBUG_ASSERT(0 == (phys >> 32)); ++#endif ++ mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop); ++ virt += MALI_MMU_PAGE_SIZE; + } + -+ /* not much to do, just free the memory */ -+ kfree(queue); ++ return 0; +} -+void _mali_osk_notification_queue_send(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t *object) ++ ++void mali_mem_block_mali_unmap(mali_mem_allocation *alloc) +{ -+#if defined(MALI_UPPER_HALF_SCHEDULING) -+ unsigned long irq_flags; -+#endif ++ struct mali_session_data *session; ++ MALI_DEBUG_ASSERT_POINTER(alloc); ++ session = alloc->session; ++ MALI_DEBUG_ASSERT_POINTER(session); + -+ _mali_osk_notification_wrapper_t *notification; -+ MALI_DEBUG_ASSERT_POINTER(queue); -+ MALI_DEBUG_ASSERT_POINTER(object); ++ mali_session_memory_lock(session); ++ mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start, ++ alloc->flags); ++ mali_session_memory_unlock(session); ++} + -+ notification = container_of(object, _mali_osk_notification_wrapper_t, data); + -+#if defined(MALI_UPPER_HALF_SCHEDULING) -+ spin_lock_irqsave(&queue->mutex, irq_flags); -+#else -+ spin_lock(&queue->mutex); -+#endif ++int mali_mem_block_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma) ++{ ++ int ret; ++ mali_mem_block_mem *block_mem = &mem_bkend->block_mem; ++ unsigned long addr = vma->vm_start; ++ struct mali_page_node *m_page; ++ MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_BLOCK); + -+ list_add_tail(¬ification->list, &queue->head); ++ list_for_each_entry(m_page, &block_mem->pfns, list) { ++ MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_BLOCK); ++ ret = vmf_insert_pfn(vma, addr, _mali_page_node_get_pfn(m_page)); + -+#if defined(MALI_UPPER_HALF_SCHEDULING) -+ spin_unlock_irqrestore(&queue->mutex, irq_flags); -+#else -+ spin_unlock(&queue->mutex); -+#endif ++ if (unlikely(0 != ret)) { ++ return -EFAULT; ++ } ++ addr += _MALI_OSK_MALI_PAGE_SIZE; + -+ /* and wake up one possible exclusive waiter */ -+ wake_up(&queue->receive_queue); ++ } ++ ++ return 0; +} + -+_mali_osk_errcode_t _mali_osk_notification_queue_dequeue(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result) ++ ++_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size) +{ -+#if defined(MALI_UPPER_HALF_SCHEDULING) -+ unsigned long irq_flags; -+#endif ++ mali_block_allocator *allocator; + -+ _mali_osk_errcode_t ret = _MALI_OSK_ERR_ITEM_NOT_FOUND; -+ _mali_osk_notification_wrapper_t *wrapper_object; ++ /* Do the low level linux operation first */ + -+#if defined(MALI_UPPER_HALF_SCHEDULING) -+ spin_lock_irqsave(&queue->mutex, irq_flags); -+#else -+ spin_lock(&queue->mutex); -+#endif ++ /* Request ownership of the memory */ ++ if (_MALI_OSK_ERR_OK != _mali_osk_mem_reqregion(start, size, "Dedicated Mali GPU memory")) { ++ MALI_DEBUG_PRINT(1, ("Failed to request memory region for frame buffer (0x%08X - 0x%08X)\n", start, start + size - 1)); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ if (!list_empty(&queue->head)) { -+ wrapper_object = list_entry(queue->head.next, _mali_osk_notification_wrapper_t, list); -+ *result = &(wrapper_object->data); -+ list_del_init(&wrapper_object->list); -+ ret = _MALI_OSK_ERR_OK; ++ /* Create generic block allocator object to handle it */ ++ allocator = mali_mem_block_allocator_create(start, size); ++ ++ if (NULL == allocator) { ++ MALI_DEBUG_PRINT(1, ("Memory bank registration failed\n")); ++ _mali_osk_mem_unreqregion(start, size); ++ MALI_ERROR(_MALI_OSK_ERR_FAULT); + } + -+#if defined(MALI_UPPER_HALF_SCHEDULING) -+ spin_unlock_irqrestore(&queue->mutex, irq_flags); -+#else -+ spin_unlock(&queue->mutex); -+#endif ++ mali_mem_block_gobal_allocator = (mali_block_allocator *)allocator; + -+ return ret; ++ return _MALI_OSK_ERR_OK; +} + -+_mali_osk_errcode_t _mali_osk_notification_queue_receive(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result) ++mali_bool mali_memory_have_dedicated_memory(void) +{ -+ /* check input */ -+ MALI_DEBUG_ASSERT_POINTER(queue); -+ MALI_DEBUG_ASSERT_POINTER(result); -+ -+ /* default result */ -+ *result = NULL; ++ return mali_mem_block_gobal_allocator ? MALI_TRUE : MALI_FALSE; ++} + -+ if (wait_event_interruptible(queue->receive_queue, -+ _MALI_OSK_ERR_OK == _mali_osk_notification_queue_dequeue(queue, result))) { -+ return _MALI_OSK_ERR_RESTARTSYSCALL; -+ } ++u32 mali_mem_block_allocator_stat(void) ++{ ++ mali_block_allocator *allocator = mali_mem_block_gobal_allocator; ++ MALI_DEBUG_ASSERT_POINTER(allocator); + -+ return _MALI_OSK_ERR_OK; /* all ok */ ++ return (allocator->total_num - atomic_read(&allocator->free_num)) * _MALI_OSK_MALI_PAGE_SIZE; +} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_pm.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_pm.c +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_block_alloc.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_block_alloc.h new file mode 100644 -index 000000000..e28e2eb21 +index 000000000..70fd9ec25 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_pm.c -@@ -0,0 +1,83 @@ -+/** -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_block_alloc.h +@@ -0,0 +1,58 @@ ++/* ++ * Copyright (C) 2010, 2013, 2015-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -320023,87 +321267,62 @@ index 000000000..e28e2eb21 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+/** -+ * @file mali_osk_pm.c -+ * Implementation of the callback functions from common power management -+ */ ++#ifndef __MALI_BLOCK_ALLOCATOR_H__ ++#define __MALI_BLOCK_ALLOCATOR_H__ + -+#include ++#include "mali_session.h" ++#include "mali_memory.h" ++#include + -+#include "mali_kernel_linux.h" -+#ifdef CONFIG_PM_RUNTIME -+#include -+#endif /* CONFIG_PM_RUNTIME */ -+#include -+#include -+#include "mali_osk.h" -+#include "mali_kernel_common.h" ++#include "mali_memory_types.h" + -+/* Can NOT run in atomic context */ -+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_sync(void) -+{ -+#ifdef CONFIG_PM_RUNTIME -+ int err; -+ MALI_DEBUG_ASSERT_POINTER(mali_platform_device); -+ err = pm_runtime_get_sync(&(mali_platform_device->dev)); -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)) -+ pm_runtime_mark_last_busy(&(mali_platform_device->dev)); -+#endif -+ if (0 > err) { -+ MALI_PRINT_ERROR(("Mali OSK PM: pm_runtime_get_sync() returned error code %d\n", err)); -+ return _MALI_OSK_ERR_FAULT; -+ } -+#endif -+ return _MALI_OSK_ERR_OK; -+} ++#define MALI_BLOCK_SIZE (PAGE_SIZE) /* 4 kB, manage BLOCK memory as page size */ ++#define MALI_BLOCK_REF_MASK (0xFFF) ++#define MALI_BLOCK_MAX_REF_COUNT (0xFFF) + -+/* Can run in atomic context */ -+_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_async(void) -+{ -+#ifdef CONFIG_PM_RUNTIME -+ int err; -+ MALI_DEBUG_ASSERT_POINTER(mali_platform_device); -+ err = pm_runtime_get(&(mali_platform_device->dev)); -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)) -+ pm_runtime_mark_last_busy(&(mali_platform_device->dev)); -+#endif -+ if (0 > err && -EINPROGRESS != err) { -+ MALI_PRINT_ERROR(("Mali OSK PM: pm_runtime_get() returned error code %d\n", err)); -+ return _MALI_OSK_ERR_FAULT; -+ } -+#endif -+ return _MALI_OSK_ERR_OK; -+} + + -+/* Can run in atomic context */ -+void _mali_osk_pm_dev_ref_put(void) -+{ -+#ifdef CONFIG_PM_RUNTIME -+ MALI_DEBUG_ASSERT_POINTER(mali_platform_device); -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)) -+ pm_runtime_mark_last_busy(&(mali_platform_device->dev)); -+ pm_runtime_put_autosuspend(&(mali_platform_device->dev)); -+#else -+ pm_runtime_put(&(mali_platform_device->dev)); -+#endif -+#endif -+} ++typedef struct mali_block_allocator { ++ /* ++ * In free list, each node's ref_count is 0, ++ * ref_count added when allocated or referenced in COW ++ */ ++ mali_block_item *items; /* information for each block item*/ ++ struct list_head free; /*free list of mali_memory_node*/ ++ spinlock_t sp_lock; /*lock for reference count & free list opertion*/ ++ u32 total_num; /* Number of total pages*/ ++ atomic_t free_num; /*number of free pages*/ ++} mali_block_allocator; + -+void _mali_osk_pm_dev_barrier(void) -+{ -+#ifdef CONFIG_PM_RUNTIME -+ pm_runtime_barrier(&(mali_platform_device->dev)); -+#endif -+} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_profiling.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_profiling.c ++unsigned long _mali_blk_item_get_phy_addr(mali_block_item *item); ++unsigned long _mali_blk_item_get_pfn(mali_block_item *item); ++u32 mali_mem_block_get_ref_count(mali_page_node *node); ++u32 mali_mem_block_add_ref(mali_page_node *node); ++u32 mali_mem_block_dec_ref(mali_page_node *node); ++u32 mali_mem_block_release(mali_mem_backend *mem_bkend); ++int mali_mem_block_alloc(mali_mem_block_mem *block_mem, u32 size); ++int mali_mem_block_mali_map(mali_mem_block_mem *block_mem, struct mali_session_data *session, u32 vaddr, u32 props); ++void mali_mem_block_mali_unmap(mali_mem_allocation *alloc); ++ ++int mali_mem_block_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma); ++_mali_osk_errcode_t mali_memory_core_resource_dedicated_memory(u32 start, u32 size); ++mali_bool mali_memory_have_dedicated_memory(void); ++u32 mali_mem_block_free(mali_mem_block_mem *block_mem); ++u32 mali_mem_block_free_list(struct list_head *list); ++void mali_mem_block_free_node(struct mali_page_node *node); ++void mali_mem_block_allocator_destroy(void); ++_mali_osk_errcode_t mali_mem_block_unref_node(struct mali_page_node *node); ++u32 mali_mem_block_allocator_stat(void); ++ ++#endif /* __MALI_BLOCK_ALLOCATOR_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_cow.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_cow.c new file mode 100644 -index 000000000..9e977ea4d +index 000000000..2b7fdc62d --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_profiling.c -@@ -0,0 +1,1282 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_cow.c +@@ -0,0 +1,776 @@ +/* -+ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -320111,1287 +321330,1103 @@ index 000000000..9e977ea4d + * A copy of the licence is included with the program, and can also be obtained from Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ -+#include -+#include -+#include -+#include -+#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include +#include ++#ifdef CONFIG_ARM ++#include ++#endif ++#include + -+#include ++#include "mali_memory.h" +#include "mali_kernel_common.h" -+#include "mali_osk.h" -+#include "mali_ukk.h" +#include "mali_uk_types.h" -+#include "mali_osk_profiling.h" -+#include "mali_linux_trace.h" -+#include "mali_gp.h" -+#include "mali_pp.h" -+#include "mali_l2_cache.h" -+#include "mali_user_settings_db.h" -+#include "mali_executor.h" -+#include "mali_memory_manager.h" -+ -+#define MALI_PROFILING_STREAM_DATA_DEFAULT_SIZE 100 -+#define MALI_PROFILING_STREAM_HOLD_TIME 1000000 /*1 ms */ -+ -+#define MALI_PROFILING_STREAM_BUFFER_SIZE (1 << 12) -+#define MALI_PROFILING_STREAM_BUFFER_NUM 100 ++#include "mali_osk.h" ++#include "mali_kernel_linux.h" ++#include "mali_memory_cow.h" ++#include "mali_memory_block_alloc.h" ++#include "mali_memory_swap_alloc.h" + +/** -+ * Define the mali profiling stream struct. -+ */ -+typedef struct mali_profiling_stream { -+ u8 data[MALI_PROFILING_STREAM_BUFFER_SIZE]; -+ u32 used_size; -+ struct list_head list; -+} mali_profiling_stream; -+ -+typedef struct mali_profiling_stream_list { -+ spinlock_t spin_lock; -+ struct list_head free_list; -+ struct list_head queue_list; -+} mali_profiling_stream_list; -+ -+static const char mali_name[] = "4xx"; -+static const char utgard_setup_version[] = "ANNOTATE_SETUP 1\n"; ++* allocate pages for COW backend and flush cache ++*/ ++static struct page *mali_mem_cow_alloc_page(void) + -+static u32 profiling_sample_rate = 0; -+static u32 first_sw_counter_index = 0; ++{ ++ mali_mem_os_mem os_mem; ++ struct mali_page_node *node; ++ struct page *new_page; + -+static mali_bool l2_cache_counter_if_enabled = MALI_FALSE; -+static u32 num_counters_enabled = 0; -+static u32 mem_counters_enabled = 0; ++ int ret = 0; ++ /* allocate pages from os mem */ ++ ret = mali_mem_os_alloc_pages(&os_mem, _MALI_OSK_MALI_PAGE_SIZE); + -+static _mali_osk_atomic_t stream_fd_if_used; ++ if (ret) { ++ return NULL; ++ } + -+static wait_queue_head_t stream_fd_wait_queue; -+static mali_profiling_counter *global_mali_profiling_counters = NULL; -+static u32 num_global_mali_profiling_counters = 0; ++ MALI_DEBUG_ASSERT(1 == os_mem.count); + -+static mali_profiling_stream_list *global_mali_stream_list = NULL; -+static mali_profiling_stream *mali_counter_stream = NULL; -+static mali_profiling_stream *mali_core_activity_stream = NULL; -+static u64 mali_core_activity_stream_dequeue_time = 0; -+static spinlock_t mali_activity_lock; -+static u32 mali_activity_cores_num = 0; -+static struct hrtimer profiling_sampling_timer; ++ node = _MALI_OSK_CONTAINER_OF(os_mem.pages.next, struct mali_page_node, list); ++ new_page = node->page; ++ node->page = NULL; ++ list_del(&node->list); ++ kfree(node); + -+const char *_mali_mem_counter_descriptions[] = _MALI_MEM_COUTNER_DESCRIPTIONS; -+const char *_mali_special_counter_descriptions[] = _MALI_SPCIAL_COUNTER_DESCRIPTIONS; ++ return new_page; ++} + -+static u32 current_profiling_pid = 0; + -+static void _mali_profiling_stream_list_destory(mali_profiling_stream_list *profiling_stream_list) ++static struct list_head *_mali_memory_cow_get_node_list(mali_mem_backend *target_bk, ++ u32 target_offset, ++ u32 target_size) +{ -+ mali_profiling_stream *profiling_stream, *tmp_profiling_stream; -+ MALI_DEBUG_ASSERT_POINTER(profiling_stream_list); ++ MALI_DEBUG_ASSERT(MALI_MEM_OS == target_bk->type || MALI_MEM_COW == target_bk->type || ++ MALI_MEM_BLOCK == target_bk->type || MALI_MEM_SWAP == target_bk->type); + -+ list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &profiling_stream_list->free_list, list) { -+ list_del(&profiling_stream->list); -+ kfree(profiling_stream); -+ } -+ -+ list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &profiling_stream_list->queue_list, list) { -+ list_del(&profiling_stream->list); -+ kfree(profiling_stream); ++ if (MALI_MEM_OS == target_bk->type) { ++ MALI_DEBUG_ASSERT(&target_bk->os_mem); ++ MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->os_mem.count); ++ return &target_bk->os_mem.pages; ++ } else if (MALI_MEM_COW == target_bk->type) { ++ MALI_DEBUG_ASSERT(&target_bk->cow_mem); ++ MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->cow_mem.count); ++ return &target_bk->cow_mem.pages; ++ } else if (MALI_MEM_BLOCK == target_bk->type) { ++ MALI_DEBUG_ASSERT(&target_bk->block_mem); ++ MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->block_mem.count); ++ return &target_bk->block_mem.pfns; ++ } else if (MALI_MEM_SWAP == target_bk->type) { ++ MALI_DEBUG_ASSERT(&target_bk->swap_mem); ++ MALI_DEBUG_ASSERT(((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE) <= target_bk->swap_mem.count); ++ return &target_bk->swap_mem.pages; + } + -+ kfree(profiling_stream_list); ++ return NULL; +} + -+static void _mali_profiling_global_stream_list_free(void) -+{ -+ mali_profiling_stream *profiling_stream, *tmp_profiling_stream; -+ unsigned long irq_flags; ++/** ++* Do COW for os memory - support do COW for memory from bank memory ++* The range_start/size can be zero, which means it will call cow_modify_range ++* latter. ++* This function allocate new pages for COW backend from os mem for a modified range ++* It will keep the page which not in the modified range and Add ref to it ++* ++* @target_bk - target allocation's backend(the allocation need to do COW) ++* @target_offset - the offset in target allocation to do COW(for support COW a memory allocated from memory_bank, 4K align) ++* @target_size - size of target allocation to do COW (for support memory bank) ++* @backend -COW backend ++* @range_start - offset of modified range (4K align) ++* @range_size - size of modified range ++*/ ++_mali_osk_errcode_t mali_memory_cow_os_memory(mali_mem_backend *target_bk, ++ u32 target_offset, ++ u32 target_size, ++ mali_mem_backend *backend, ++ u32 range_start, ++ u32 range_size) ++{ ++ mali_mem_cow *cow = &backend->cow_mem; ++ struct mali_page_node *m_page, *m_tmp, *page_node; ++ int target_page = 0; ++ struct page *new_page; ++ struct list_head *pages = NULL; + -+ MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list); -+ spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags); -+ list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &global_mali_stream_list->queue_list, list) { -+ profiling_stream->used_size = 0; -+ list_move(&profiling_stream->list, &global_mali_stream_list->free_list); ++ pages = _mali_memory_cow_get_node_list(target_bk, target_offset, target_size); ++ ++ if (NULL == pages) { ++ MALI_DEBUG_PRINT_ERROR(("No memory page need to cow ! \n")); ++ return _MALI_OSK_ERR_FAULT; + } -+ spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags); -+} + -+static _mali_osk_errcode_t _mali_profiling_global_stream_list_dequeue(struct list_head *stream_list, mali_profiling_stream **new_mali_profiling_stream) -+{ -+ unsigned long irq_flags; -+ _mali_osk_errcode_t ret = _MALI_OSK_ERR_OK; -+ MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list); -+ MALI_DEBUG_ASSERT_POINTER(stream_list); ++ MALI_DEBUG_ASSERT(0 == cow->count); + -+ spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags); ++ INIT_LIST_HEAD(&cow->pages); ++ mutex_lock(&target_bk->mutex); ++ list_for_each_entry_safe(m_page, m_tmp, pages, list) { ++ /* add page from (target_offset,target_offset+size) to cow backend */ ++ if ((target_page >= target_offset / _MALI_OSK_MALI_PAGE_SIZE) && ++ (target_page < ((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE))) { + -+ if (!list_empty(stream_list)) { -+ *new_mali_profiling_stream = list_entry(stream_list->next, mali_profiling_stream, list); -+ list_del_init(&(*new_mali_profiling_stream)->list); -+ } else { -+ ret = _MALI_OSK_ERR_NOMEM; -+ } ++ /* allocate a new page node, alway use OS memory for COW */ ++ page_node = _mali_page_node_allocate(MALI_PAGE_NODE_OS); + -+ spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags); ++ if (NULL == page_node) { ++ mutex_unlock(&target_bk->mutex); ++ goto error; ++ } + -+ return ret; -+} ++ INIT_LIST_HEAD(&page_node->list); + -+static void _mali_profiling_global_stream_list_queue(struct list_head *stream_list, mali_profiling_stream *current_mali_profiling_stream) -+{ -+ unsigned long irq_flags; -+ MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list); -+ MALI_DEBUG_ASSERT_POINTER(stream_list); ++ /* check if in the modified range*/ ++ if ((cow->count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) && ++ (cow->count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) { ++ /* need to allocate a new page */ ++ /* To simplify the case, All COW memory is allocated from os memory ?*/ ++ new_page = mali_mem_cow_alloc_page(); + -+ spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags); -+ list_add_tail(¤t_mali_profiling_stream->list, stream_list); -+ spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags); -+} ++ if (NULL == new_page) { ++ kfree(page_node); ++ mutex_unlock(&target_bk->mutex); ++ goto error; ++ } + -+static mali_bool _mali_profiling_global_stream_queue_list_if_empty(void) -+{ -+ MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list); -+ return list_empty(&global_mali_stream_list->queue_list); ++ _mali_page_node_add_page(page_node, new_page); ++ } else { ++ /*Add Block memory case*/ ++ if (m_page->type != MALI_PAGE_NODE_BLOCK) { ++ _mali_page_node_add_page(page_node, m_page->page); ++ } else { ++ page_node->type = MALI_PAGE_NODE_BLOCK; ++ _mali_page_node_add_block_item(page_node, m_page->blk_it); ++ } ++ ++ /* add ref to this page */ ++ _mali_page_node_ref(m_page); ++ } ++ ++ /* add it to COW backend page list */ ++ list_add_tail(&page_node->list, &cow->pages); ++ cow->count++; ++ } ++ target_page++; ++ } ++ mutex_unlock(&target_bk->mutex); ++ return _MALI_OSK_ERR_OK; ++error: ++ mali_mem_cow_release(backend, MALI_FALSE); ++ return _MALI_OSK_ERR_FAULT; +} + -+static u32 _mali_profiling_global_stream_queue_list_next_size(void) ++_mali_osk_errcode_t mali_memory_cow_swap_memory(mali_mem_backend *target_bk, ++ u32 target_offset, ++ u32 target_size, ++ mali_mem_backend *backend, ++ u32 range_start, ++ u32 range_size) +{ -+ unsigned long irq_flags; -+ u32 size = 0; -+ MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list); ++ mali_mem_cow *cow = &backend->cow_mem; ++ struct mali_page_node *m_page, *m_tmp, *page_node; ++ int target_page = 0; ++ struct mali_swap_item *swap_item; ++ struct list_head *pages = NULL; + -+ spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags); -+ if (!list_empty(&global_mali_stream_list->queue_list)) { -+ mali_profiling_stream *next_mali_profiling_stream = -+ list_entry(global_mali_stream_list->queue_list.next, mali_profiling_stream, list); -+ size = next_mali_profiling_stream->used_size; ++ pages = _mali_memory_cow_get_node_list(target_bk, target_offset, target_size); ++ if (NULL == pages) { ++ MALI_DEBUG_PRINT_ERROR(("No swap memory page need to cow ! \n")); ++ return _MALI_OSK_ERR_FAULT; + } -+ spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags); -+ return size; -+} + -+/* The mali profiling stream file operations functions. */ -+static ssize_t _mali_profiling_stream_read( -+ struct file *filp, -+ char __user *buffer, -+ size_t size, -+ loff_t *f_pos); ++ MALI_DEBUG_ASSERT(0 == cow->count); + -+static unsigned int _mali_profiling_stream_poll(struct file *filp, poll_table *wait); ++ INIT_LIST_HEAD(&cow->pages); ++ mutex_lock(&target_bk->mutex); + -+static int _mali_profiling_stream_release(struct inode *inode, struct file *filp); ++ backend->flags |= MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN; + -+/* The timeline stream file operations structure. */ -+static const struct file_operations mali_profiling_stream_fops = { -+ .release = _mali_profiling_stream_release, -+ .read = _mali_profiling_stream_read, -+ .poll = _mali_profiling_stream_poll, -+}; ++ list_for_each_entry_safe(m_page, m_tmp, pages, list) { ++ /* add page from (target_offset,target_offset+size) to cow backend */ ++ if ((target_page >= target_offset / _MALI_OSK_MALI_PAGE_SIZE) && ++ (target_page < ((target_size + target_offset) / _MALI_OSK_MALI_PAGE_SIZE))) { + -+static ssize_t _mali_profiling_stream_read( -+ struct file *filp, -+ char __user *buffer, -+ size_t size, -+ loff_t *f_pos) -+{ -+ u32 copy_len = 0; -+ mali_profiling_stream *current_mali_profiling_stream; -+ u32 used_size; -+ MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list); ++ /* allocate a new page node, use swap memory for COW memory swap cowed flag. */ ++ page_node = _mali_page_node_allocate(MALI_PAGE_NODE_SWAP); + -+ while (!_mali_profiling_global_stream_queue_list_if_empty()) { -+ used_size = _mali_profiling_global_stream_queue_list_next_size(); -+ if (used_size <= ((u32)size - copy_len)) { -+ current_mali_profiling_stream = NULL; -+ _mali_profiling_global_stream_list_dequeue(&global_mali_stream_list->queue_list, -+ ¤t_mali_profiling_stream); -+ MALI_DEBUG_ASSERT_POINTER(current_mali_profiling_stream); -+ if (copy_to_user(&buffer[copy_len], current_mali_profiling_stream->data, current_mali_profiling_stream->used_size)) { -+ current_mali_profiling_stream->used_size = 0; -+ _mali_profiling_global_stream_list_queue(&global_mali_stream_list->free_list, current_mali_profiling_stream); -+ return -EFAULT; ++ if (NULL == page_node) { ++ mutex_unlock(&target_bk->mutex); ++ goto error; + } -+ copy_len += current_mali_profiling_stream->used_size; -+ current_mali_profiling_stream->used_size = 0; -+ _mali_profiling_global_stream_list_queue(&global_mali_stream_list->free_list, current_mali_profiling_stream); -+ } else { -+ break; -+ } -+ } -+ return (ssize_t)copy_len; -+} + -+static unsigned int _mali_profiling_stream_poll(struct file *filp, poll_table *wait) -+{ -+ poll_wait(filp, &stream_fd_wait_queue, wait); -+ if (!_mali_profiling_global_stream_queue_list_if_empty()) -+ return POLLIN; -+ return 0; -+} ++ /* check if in the modified range*/ ++ if ((cow->count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) && ++ (cow->count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) { ++ /* need to allocate a new page */ ++ /* To simplify the case, All COW memory is allocated from os memory ?*/ ++ swap_item = mali_mem_swap_alloc_swap_item(); + -+static int _mali_profiling_stream_release(struct inode *inode, struct file *filp) -+{ -+ _mali_osk_atomic_init(&stream_fd_if_used, 0); -+ return 0; -+} ++ if (NULL == swap_item) { ++ kfree(page_node); ++ mutex_unlock(&target_bk->mutex); ++ goto error; ++ } + -+/* The funs for control packet and stream data.*/ -+static void _mali_profiling_set_packet_size(unsigned char *const buf, const u32 size) -+{ -+ u32 i; ++ swap_item->idx = mali_mem_swap_idx_alloc(); + -+ for (i = 0; i < sizeof(size); ++i) -+ buf[i] = (size >> 8 * i) & 0xFF; -+} ++ if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == swap_item->idx) { ++ MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW.\n")); ++ kfree(page_node); ++ kfree(swap_item); ++ mutex_unlock(&target_bk->mutex); ++ goto error; ++ } + -+static u32 _mali_profiling_get_packet_size(unsigned char *const buf) -+{ -+ u32 i; -+ u32 size = 0; -+ for (i = 0; i < sizeof(size); ++i) -+ size |= (u32)buf[i] << 8 * i; -+ return size; -+} ++ _mali_page_node_add_swap_item(page_node, swap_item); ++ } else { ++ _mali_page_node_add_swap_item(page_node, m_page->swap_it); + -+static u32 _mali_profiling_read_packet_int(unsigned char *const buf, u32 *const pos, u32 const packet_size) -+{ -+ u64 int_value = 0; -+ u8 shift = 0; -+ u8 byte_value = ~0; ++ /* add ref to this page */ ++ _mali_page_node_ref(m_page); ++ } + -+ while ((byte_value & 0x80) != 0) { -+ if ((*pos) >= packet_size) -+ return -1; -+ byte_value = buf[*pos]; -+ *pos += 1; -+ int_value |= (u32)(byte_value & 0x7f) << shift; -+ shift += 7; ++ list_add_tail(&page_node->list, &cow->pages); ++ cow->count++; ++ } ++ target_page++; + } ++ mutex_unlock(&target_bk->mutex); + -+ if (shift < 8 * sizeof(int_value) && (byte_value & 0x40) != 0) { -+ int_value |= -(1 << shift); -+ } ++ return _MALI_OSK_ERR_OK; ++error: ++ mali_mem_swap_release(backend, MALI_FALSE); ++ return _MALI_OSK_ERR_FAULT; + -+ return int_value; +} + -+static u32 _mali_profiling_pack_int(u8 *const buf, u32 const buf_size, u32 const pos, s32 value) ++ ++_mali_osk_errcode_t _mali_mem_put_page_node(mali_page_node *node) +{ -+ u32 add_bytes = 0; -+ int more = 1; -+ while (more) { -+ /* low order 7 bits of val */ -+ char byte_value = value & 0x7f; -+ value >>= 7; ++ if (node->type == MALI_PAGE_NODE_OS) { ++ return mali_mem_os_put_page(node->page); ++ } else if (node->type == MALI_PAGE_NODE_BLOCK) { ++ return mali_mem_block_unref_node(node); ++ } else if (node->type == MALI_PAGE_NODE_SWAP) { ++ return _mali_mem_swap_put_page_node(node); ++ } else ++ MALI_DEBUG_ASSERT(0); ++ return _MALI_OSK_ERR_FAULT; ++} + -+ if ((value == 0 && (byte_value & 0x40) == 0) || (value == -1 && (byte_value & 0x40) != 0)) { -+ more = 0; -+ } else { -+ byte_value |= 0x80; -+ } + -+ if ((pos + add_bytes) >= buf_size) -+ return 0; -+ buf[pos + add_bytes] = byte_value; -+ add_bytes++; -+ } ++/** ++* Modify a range of a exist COW backend ++* @backend -COW backend ++* @range_start - offset of modified range (4K align) ++* @range_size - size of modified range(in byte) ++*/ ++_mali_osk_errcode_t mali_memory_cow_modify_range(mali_mem_backend *backend, ++ u32 range_start, ++ u32 range_size) ++{ ++ mali_mem_allocation *alloc = NULL; ++ struct mali_session_data *session; ++ mali_mem_cow *cow = &backend->cow_mem; ++ struct mali_page_node *m_page, *m_tmp; ++ LIST_HEAD(pages); ++ struct page *new_page; ++ u32 count = 0; ++ s32 change_pages_nr = 0; ++ _mali_osk_errcode_t ret = _MALI_OSK_ERR_OK; + -+ return add_bytes; -+} ++ if (range_start % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS); ++ if (range_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS); + -+static int _mali_profiling_pack_long(uint8_t *const buf, u32 const buf_size, u32 const pos, s64 val) -+{ -+ int add_bytes = 0; -+ int more = 1; -+ while (more) { -+ /* low order 7 bits of x */ -+ char byte_value = val & 0x7f; -+ val >>= 7; ++ alloc = backend->mali_allocation; ++ MALI_DEBUG_ASSERT_POINTER(alloc); + -+ if ((val == 0 && (byte_value & 0x40) == 0) || (val == -1 && (byte_value & 0x40) != 0)) { -+ more = 0; -+ } else { -+ byte_value |= 0x80; -+ } ++ session = alloc->session; ++ MALI_DEBUG_ASSERT_POINTER(session); + -+ MALI_DEBUG_ASSERT((pos + add_bytes) < buf_size); -+ buf[pos + add_bytes] = byte_value; -+ add_bytes++; -+ } ++ MALI_DEBUG_ASSERT(MALI_MEM_COW == backend->type); ++ MALI_DEBUG_ASSERT(((range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE) <= cow->count); + -+ return add_bytes; -+} ++ mutex_lock(&backend->mutex); + -+static void _mali_profiling_stream_add_counter(mali_profiling_stream *profiling_stream, s64 current_time, u32 key, u32 counter_value) -+{ -+ u32 add_size = STREAM_HEADER_SIZE; -+ MALI_DEBUG_ASSERT_POINTER(profiling_stream); -+ MALI_DEBUG_ASSERT((profiling_stream->used_size) < MALI_PROFILING_STREAM_BUFFER_SIZE); ++ /* free pages*/ ++ list_for_each_entry_safe(m_page, m_tmp, &cow->pages, list) { + -+ profiling_stream->data[profiling_stream->used_size] = STREAM_HEADER_COUNTER_VALUE; ++ /* check if in the modified range*/ ++ if ((count >= range_start / _MALI_OSK_MALI_PAGE_SIZE) && ++ (count < (range_start + range_size) / _MALI_OSK_MALI_PAGE_SIZE)) { ++ if (MALI_PAGE_NODE_SWAP != m_page->type) { ++ new_page = mali_mem_cow_alloc_page(); + -+ add_size += _mali_profiling_pack_long(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE, -+ profiling_stream->used_size + add_size, current_time); -+ add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE, -+ profiling_stream->used_size + add_size, (s32)0); -+ add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE, -+ profiling_stream->used_size + add_size, (s32)key); -+ add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE, -+ profiling_stream->used_size + add_size, (s32)counter_value); ++ if (NULL == new_page) { ++ goto error; ++ } ++ if (1 != _mali_page_node_get_ref_count(m_page)) ++ change_pages_nr++; ++ /* unref old page*/ ++ _mali_osk_mutex_wait(session->cow_lock); ++ if (_mali_mem_put_page_node(m_page)) { ++ __free_page(new_page); ++ _mali_osk_mutex_signal(session->cow_lock); ++ goto error; ++ } ++ _mali_osk_mutex_signal(session->cow_lock); ++ /* add new page*/ ++ /* always use OS for COW*/ ++ m_page->type = MALI_PAGE_NODE_OS; ++ _mali_page_node_add_page(m_page, new_page); ++ } else { ++ struct mali_swap_item *swap_item; + -+ _mali_profiling_set_packet_size(profiling_stream->data + profiling_stream->used_size + 1, -+ add_size - STREAM_HEADER_SIZE); ++ swap_item = mali_mem_swap_alloc_swap_item(); + -+ profiling_stream->used_size += add_size; -+} ++ if (NULL == swap_item) { ++ goto error; ++ } + -+/* The callback function for sampling timer.*/ -+static enum hrtimer_restart _mali_profiling_sampling_counters(struct hrtimer *timer) -+{ -+ u32 counter_index; -+ s64 current_time; -+ MALI_DEBUG_ASSERT_POINTER(global_mali_profiling_counters); -+ MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list); ++ swap_item->idx = mali_mem_swap_idx_alloc(); + -+ MALI_DEBUG_ASSERT(NULL == mali_counter_stream); -+ if (_MALI_OSK_ERR_OK == _mali_profiling_global_stream_list_dequeue( -+ &global_mali_stream_list->free_list, &mali_counter_stream)) { ++ if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == swap_item->idx) { ++ MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW modify range.\n")); ++ kfree(swap_item); ++ goto error; ++ } + -+ MALI_DEBUG_ASSERT_POINTER(mali_counter_stream); -+ MALI_DEBUG_ASSERT(0 == mali_counter_stream->used_size); ++ if (1 != _mali_page_node_get_ref_count(m_page)) { ++ change_pages_nr++; ++ } + -+ /* Capture l2 cache counter values if enabled */ -+ if (MALI_TRUE == l2_cache_counter_if_enabled) { -+ int i, j = 0; -+ _mali_profiling_l2_counter_values l2_counters_values; -+ _mali_profiling_get_l2_counters(&l2_counters_values); ++ if (_mali_mem_put_page_node(m_page)) { ++ mali_mem_swap_free_swap_item(swap_item); ++ goto error; ++ } + -+ for (i = COUNTER_L2_0_C0; i <= COUNTER_L2_2_C1; i++) { -+ if (0 == (j % 2)) -+ _mali_osk_profiling_record_global_counters(i, l2_counters_values.cores[j / 2].value0); -+ else -+ _mali_osk_profiling_record_global_counters(i, l2_counters_values.cores[j / 2].value1); -+ j++; ++ _mali_page_node_add_swap_item(m_page, swap_item); + } + } ++ count++; ++ } ++ cow->change_pages_nr = change_pages_nr; + -+ current_time = (s64)_mali_osk_boot_time_get_ns(); ++ MALI_DEBUG_ASSERT(MALI_MEM_COW == alloc->type); + -+ /* Add all enabled counter values into stream */ -+ for (counter_index = 0; counter_index < num_global_mali_profiling_counters; counter_index++) { -+ /* No need to sample these couners here. */ -+ if (global_mali_profiling_counters[counter_index].enabled) { -+ if ((global_mali_profiling_counters[counter_index].counter_id >= FIRST_MEM_COUNTER && -+ global_mali_profiling_counters[counter_index].counter_id <= LAST_MEM_COUNTER) -+ || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_VP_ACTIVITY) -+ || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_FP_ACTIVITY) -+ || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_FILMSTRIP)) { ++ /* ZAP cpu mapping(modified range), and do cpu mapping here if need */ ++ if (NULL != alloc->cpu_mapping.vma) { ++ MALI_DEBUG_ASSERT(0 != alloc->backend_handle); ++ MALI_DEBUG_ASSERT(NULL != alloc->cpu_mapping.vma); ++ MALI_DEBUG_ASSERT(alloc->cpu_mapping.vma->vm_end - alloc->cpu_mapping.vma->vm_start >= range_size); + -+ continue; -+ } ++ if (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) { ++ zap_vma_ptes(alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start + range_start, range_size); + -+ if (global_mali_profiling_counters[counter_index].counter_id >= COUNTER_L2_0_C0 && -+ global_mali_profiling_counters[counter_index].counter_id <= COUNTER_L2_2_C1) { ++ ret = mali_mem_cow_cpu_map_pages_locked(backend, alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start + range_start, range_size / _MALI_OSK_MALI_PAGE_SIZE); + -+ u32 prev_val = global_mali_profiling_counters[counter_index].prev_counter_value; ++ if (unlikely(ret != _MALI_OSK_ERR_OK)) { ++ MALI_DEBUG_PRINT(2, ("mali_memory_cow_modify_range: cpu mapping failed !\n")); ++ ret = _MALI_OSK_ERR_FAULT; ++ } ++ } else { ++ /* used to trigger page fault for swappable cowed memory. */ ++ vm_flags_set(alloc->cpu_mapping.vma, VM_PFNMAP); ++ vm_flags_set(alloc->cpu_mapping.vma, VM_MIXEDMAP); + -+ _mali_profiling_stream_add_counter(mali_counter_stream, current_time, global_mali_profiling_counters[counter_index].key, -+ global_mali_profiling_counters[counter_index].current_counter_value - prev_val); ++ zap_vma_ptes(alloc->cpu_mapping.vma, alloc->cpu_mapping.vma->vm_start + range_start, range_size); ++ /* delete this flag to let swappble is ummapped regard to stauct page not page frame. */ ++ vm_flags_clear(alloc->cpu_mapping.vma, VM_PFNMAP); ++ vm_flags_clear(alloc->cpu_mapping.vma, VM_MIXEDMAP); ++ } ++ } + -+ prev_val = global_mali_profiling_counters[counter_index].current_counter_value; ++error: ++ mutex_unlock(&backend->mutex); ++ return ret; + -+ global_mali_profiling_counters[counter_index].prev_counter_value = prev_val; -+ } else { ++} + -+ if (global_mali_profiling_counters[counter_index].counter_id == COUNTER_TOTAL_ALLOC_PAGES) { -+ u32 total_alloc_mem = _mali_ukk_report_memory_usage(); -+ global_mali_profiling_counters[counter_index].current_counter_value = total_alloc_mem / _MALI_OSK_MALI_PAGE_SIZE; -+ } -+ _mali_profiling_stream_add_counter(mali_counter_stream, current_time, global_mali_profiling_counters[counter_index].key, -+ global_mali_profiling_counters[counter_index].current_counter_value); -+ if (global_mali_profiling_counters[counter_index].counter_id < FIRST_SPECIAL_COUNTER) -+ global_mali_profiling_counters[counter_index].current_counter_value = 0; -+ } -+ } -+ } -+ _mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_counter_stream); -+ mali_counter_stream = NULL; -+ } else { -+ MALI_DEBUG_PRINT(1, ("Not enough mali profiling stream buffer!\n")); -+ } -+ -+ wake_up_interruptible(&stream_fd_wait_queue); -+ -+ /*Enable the sampling timer again*/ -+ if (0 != num_counters_enabled && 0 != profiling_sample_rate) { -+ hrtimer_forward_now(&profiling_sampling_timer, ns_to_ktime(profiling_sample_rate)); -+ return HRTIMER_RESTART; -+ } -+ return HRTIMER_NORESTART; -+} + -+static void _mali_profiling_sampling_core_activity_switch(int counter_id, int core, u32 activity, u32 pid) ++/** ++* Allocate pages for COW backend ++* @alloc -allocation for COW allocation ++* @target_bk - target allocation's backend(the allocation need to do COW) ++* @target_offset - the offset in target allocation to do COW(for support COW a memory allocated from memory_bank, 4K align) ++* @target_size - size of target allocation to do COW (for support memory bank)(in byte) ++* @backend -COW backend ++* @range_start - offset of modified range (4K align) ++* @range_size - size of modified range(in byte) ++*/ ++_mali_osk_errcode_t mali_memory_do_cow(mali_mem_backend *target_bk, ++ u32 target_offset, ++ u32 target_size, ++ mali_mem_backend *backend, ++ u32 range_start, ++ u32 range_size) +{ -+ unsigned long irq_flags; -+ -+ spin_lock_irqsave(&mali_activity_lock, irq_flags); -+ if (activity == 0) -+ mali_activity_cores_num--; -+ else -+ mali_activity_cores_num++; -+ spin_unlock_irqrestore(&mali_activity_lock, irq_flags); -+ -+ if (NULL != global_mali_profiling_counters) { -+ int i ; -+ for (i = 0; i < num_global_mali_profiling_counters; i++) { -+ if (counter_id == global_mali_profiling_counters[i].counter_id && global_mali_profiling_counters[i].enabled) { -+ u64 current_time = _mali_osk_boot_time_get_ns(); -+ u32 add_size = STREAM_HEADER_SIZE; ++ struct mali_session_data *session = backend->mali_allocation->session; + -+ if (NULL != mali_core_activity_stream) { -+ if ((mali_core_activity_stream_dequeue_time + MALI_PROFILING_STREAM_HOLD_TIME < current_time) || -+ (MALI_PROFILING_STREAM_DATA_DEFAULT_SIZE > MALI_PROFILING_STREAM_BUFFER_SIZE -+ - mali_core_activity_stream->used_size)) { -+ _mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_core_activity_stream); -+ mali_core_activity_stream = NULL; -+ wake_up_interruptible(&stream_fd_wait_queue); -+ } -+ } ++ MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS); + -+ if (NULL == mali_core_activity_stream) { -+ if (_MALI_OSK_ERR_OK == _mali_profiling_global_stream_list_dequeue( -+ &global_mali_stream_list->free_list, &mali_core_activity_stream)) { -+ mali_core_activity_stream_dequeue_time = current_time; -+ } else { -+ MALI_DEBUG_PRINT(1, ("Not enough mali profiling stream buffer!\n")); -+ wake_up_interruptible(&stream_fd_wait_queue); -+ break; -+ } ++ /* size & offset must be a multiple of the system page size */ ++ if (target_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS); ++ if (range_size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS); ++ if (target_offset % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS); ++ if (range_start % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS); + -+ } ++ /* check backend type */ ++ MALI_DEBUG_ASSERT(MALI_MEM_COW == backend->type); + -+ mali_core_activity_stream->data[mali_core_activity_stream->used_size] = STREAM_HEADER_CORE_ACTIVITY; ++ switch (target_bk->type) { ++ case MALI_MEM_OS: ++ case MALI_MEM_BLOCK: ++ return mali_memory_cow_os_memory(target_bk, target_offset, target_size, backend, range_start, range_size); ++ break; ++ case MALI_MEM_COW: ++ if (backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED) { ++ return mali_memory_cow_swap_memory(target_bk, target_offset, target_size, backend, range_start, range_size); ++ } else { ++ return mali_memory_cow_os_memory(target_bk, target_offset, target_size, backend, range_start, range_size); ++ } ++ break; ++ case MALI_MEM_SWAP: ++ return mali_memory_cow_swap_memory(target_bk, target_offset, target_size, backend, range_start, range_size); ++ break; ++ case MALI_MEM_EXTERNAL: ++ /*NOT support yet*/ ++ MALI_DEBUG_PRINT_ERROR(("External physical memory not supported ! \n")); ++ return _MALI_OSK_ERR_UNSUPPORTED; ++ break; ++ case MALI_MEM_DMA_BUF: ++ /*NOT support yet*/ ++ MALI_DEBUG_PRINT_ERROR(("DMA buffer not supported ! \n")); ++ return _MALI_OSK_ERR_UNSUPPORTED; ++ break; ++ case MALI_MEM_UMP: ++ /*NOT support yet*/ ++ MALI_DEBUG_PRINT_ERROR(("UMP buffer not supported ! \n")); ++ return _MALI_OSK_ERR_UNSUPPORTED; ++ break; ++ default: ++ /*Not support yet*/ ++ MALI_DEBUG_PRINT_ERROR(("Invalid memory type not supported ! \n")); ++ return _MALI_OSK_ERR_UNSUPPORTED; ++ break; ++ } ++ return _MALI_OSK_ERR_OK; ++} + -+ add_size += _mali_profiling_pack_long(mali_core_activity_stream->data, -+ MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, (s64)current_time); -+ add_size += _mali_profiling_pack_int(mali_core_activity_stream->data, -+ MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, core); -+ add_size += _mali_profiling_pack_int(mali_core_activity_stream->data, -+ MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, (s32)global_mali_profiling_counters[i].key); -+ add_size += _mali_profiling_pack_int(mali_core_activity_stream->data, -+ MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, activity); -+ add_size += _mali_profiling_pack_int(mali_core_activity_stream->data, -+ MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, pid); + -+ _mali_profiling_set_packet_size(mali_core_activity_stream->data + mali_core_activity_stream->used_size + 1, -+ add_size - STREAM_HEADER_SIZE); ++/** ++* Map COW backend memory to mali ++* Support OS/BLOCK for mali_page_node ++*/ ++int mali_mem_cow_mali_map(mali_mem_backend *mem_bkend, u32 range_start, u32 range_size) ++{ ++ mali_mem_allocation *cow_alloc; ++ struct mali_page_node *m_page; ++ struct mali_session_data *session; ++ struct mali_page_directory *pagedir; ++ u32 virt, start; + -+ mali_core_activity_stream->used_size += add_size; ++ cow_alloc = mem_bkend->mali_allocation; ++ virt = cow_alloc->mali_vma_node.vm_node.start; ++ start = virt; + -+ if (0 == mali_activity_cores_num) { -+ _mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_core_activity_stream); -+ mali_core_activity_stream = NULL; -+ wake_up_interruptible(&stream_fd_wait_queue); -+ } ++ MALI_DEBUG_ASSERT_POINTER(mem_bkend); ++ MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type); ++ MALI_DEBUG_ASSERT_POINTER(cow_alloc); + -+ break; -+ } ++ session = cow_alloc->session; ++ pagedir = session->page_directory; ++ MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS); ++ list_for_each_entry(m_page, &mem_bkend->cow_mem.pages, list) { ++ if ((virt - start >= range_start) && (virt - start < range_start + range_size)) { ++ dma_addr_t phys = _mali_page_node_get_dma_addr(m_page); ++#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) ++ MALI_DEBUG_ASSERT(0 == (phys >> 32)); ++#endif ++ mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, ++ MALI_MMU_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT); + } ++ virt += MALI_MMU_PAGE_SIZE; + } ++ return 0; +} + -+static mali_bool _mali_profiling_global_counters_init(void) ++/** ++* Map COW backend to cpu ++* support OS/BLOCK memory ++*/ ++int mali_mem_cow_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma) +{ -+ int core_id, counter_index, counter_number, counter_id; -+ u32 num_l2_cache_cores; -+ u32 num_pp_cores; -+ u32 num_gp_cores = 1; -+ -+ MALI_DEBUG_ASSERT(NULL == global_mali_profiling_counters); -+ num_pp_cores = mali_pp_get_glob_num_pp_cores(); -+ num_l2_cache_cores = mali_l2_cache_core_get_glob_num_l2_cores(); -+ -+ num_global_mali_profiling_counters = 3 * (num_gp_cores + num_pp_cores) + 2 * num_l2_cache_cores -+ + MALI_PROFILING_SW_COUNTERS_NUM -+ + MALI_PROFILING_SPECIAL_COUNTERS_NUM -+ + MALI_PROFILING_MEM_COUNTERS_NUM; -+ global_mali_profiling_counters = _mali_osk_calloc(num_global_mali_profiling_counters, sizeof(mali_profiling_counter)); -+ -+ if (NULL == global_mali_profiling_counters) -+ return MALI_FALSE; ++ mali_mem_cow *cow = &mem_bkend->cow_mem; ++ struct mali_page_node *m_page; ++ int ret; ++ unsigned long addr = vma->vm_start; ++ MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_COW); + -+ counter_index = 0; -+ /*Vertex processor counters */ -+ for (core_id = 0; core_id < num_gp_cores; core_id ++) { -+ global_mali_profiling_counters[counter_index].counter_id = ACTIVITY_VP_0 + core_id; -+ _mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name, -+ sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_VP_%d_active", mali_name, core_id); ++ list_for_each_entry(m_page, &cow->pages, list) { ++ /* We should use vm_insert_page, but it does a dcache ++ * flush which makes it way slower than remap_pfn_range or vmf_insert_pfn. ++ ret = vm_insert_page(vma, addr, page); ++ */ ++ ret = vmf_insert_pfn(vma, addr, _mali_page_node_get_pfn(m_page)); + -+ for (counter_number = 0; counter_number < 2; counter_number++) { -+ counter_index++; -+ global_mali_profiling_counters[counter_index].counter_id = COUNTER_VP_0_C0 + (2 * core_id) + counter_number; -+ _mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name, -+ sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_VP_%d_cnt%d", mali_name, core_id, counter_number); ++ if (unlikely(VM_FAULT_NOPAGE != ret)) { ++ return -EFAULT; + } ++ addr += _MALI_OSK_MALI_PAGE_SIZE; + } + -+ /* Fragment processors' counters */ -+ for (core_id = 0; core_id < num_pp_cores; core_id++) { -+ counter_index++; -+ global_mali_profiling_counters[counter_index].counter_id = ACTIVITY_FP_0 + core_id; -+ _mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name, -+ sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_FP_%d_active", mali_name, core_id); ++ return 0; ++} + -+ for (counter_number = 0; counter_number < 2; counter_number++) { -+ counter_index++; -+ global_mali_profiling_counters[counter_index].counter_id = COUNTER_FP_0_C0 + (2 * core_id) + counter_number; -+ _mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name, -+ sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_FP_%d_cnt%d", mali_name, core_id, counter_number); -+ } -+ } ++/** ++* Map some pages(COW backend) to CPU vma@vaddr ++*@ mem_bkend - COW backend ++*@ vma ++*@ vaddr -start CPU vaddr mapped to ++*@ num - max number of pages to map to CPU vaddr ++*/ ++_mali_osk_errcode_t mali_mem_cow_cpu_map_pages_locked(mali_mem_backend *mem_bkend, ++ struct vm_area_struct *vma, ++ unsigned long vaddr, ++ int num) ++{ ++ mali_mem_cow *cow = &mem_bkend->cow_mem; ++ struct mali_page_node *m_page; ++ int ret; ++ int offset; ++ int count ; ++ unsigned long vstart = vma->vm_start; ++ count = 0; ++ MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_COW); ++ MALI_DEBUG_ASSERT(0 == vaddr % _MALI_OSK_MALI_PAGE_SIZE); ++ MALI_DEBUG_ASSERT(0 == vstart % _MALI_OSK_MALI_PAGE_SIZE); ++ offset = (vaddr - vstart) / _MALI_OSK_MALI_PAGE_SIZE; + -+ /* L2 Cache counters */ -+ for (core_id = 0; core_id < num_l2_cache_cores; core_id++) { -+ for (counter_number = 0; counter_number < 2; counter_number++) { -+ counter_index++; -+ global_mali_profiling_counters[counter_index].counter_id = COUNTER_L2_0_C0 + (2 * core_id) + counter_number; -+ _mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name, -+ sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_L2_%d_cnt%d", mali_name, core_id, counter_number); ++ list_for_each_entry(m_page, &cow->pages, list) { ++ if ((count >= offset) && (count < offset + num)) { ++ ret = vmf_insert_pfn(vma, vaddr, _mali_page_node_get_pfn(m_page)); ++ ++ if (unlikely(VM_FAULT_NOPAGE != ret)) { ++ if (count == offset) { ++ return _MALI_OSK_ERR_FAULT; ++ } else { ++ /* ret is EBUSY when page isn't in modify range, but now it's OK*/ ++ return _MALI_OSK_ERR_OK; ++ } ++ } ++ vaddr += _MALI_OSK_MALI_PAGE_SIZE; + } ++ count++; + } ++ return _MALI_OSK_ERR_OK; ++} + -+ /* Now set up the software counter entries */ -+ for (counter_id = FIRST_SW_COUNTER; counter_id <= LAST_SW_COUNTER; counter_id++) { -+ counter_index++; -+ -+ if (0 == first_sw_counter_index) -+ first_sw_counter_index = counter_index; ++/** ++* Release COW backend memory ++* free it directly(put_page--unref page), not put into pool ++*/ ++u32 mali_mem_cow_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped) ++{ ++ mali_mem_allocation *alloc; ++ struct mali_session_data *session; ++ u32 free_pages_nr = 0; ++ MALI_DEBUG_ASSERT_POINTER(mem_bkend); ++ MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type); ++ alloc = mem_bkend->mali_allocation; ++ MALI_DEBUG_ASSERT_POINTER(alloc); + -+ global_mali_profiling_counters[counter_index].counter_id = counter_id; -+ _mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name, -+ sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_SW_%d", mali_name, counter_id - FIRST_SW_COUNTER); -+ } ++ session = alloc->session; ++ MALI_DEBUG_ASSERT_POINTER(session); + -+ /* Now set up the special counter entries */ -+ for (counter_id = FIRST_SPECIAL_COUNTER; counter_id <= LAST_SPECIAL_COUNTER; counter_id++) { ++ if (MALI_MEM_BACKEND_FLAG_SWAP_COWED != (MALI_MEM_BACKEND_FLAG_SWAP_COWED & mem_bkend->flags)) { ++ /* Unmap the memory from the mali virtual address space. */ ++ if (MALI_TRUE == is_mali_mapped) ++ mali_mem_os_mali_unmap(alloc); ++ /* free cow backend list*/ ++ _mali_osk_mutex_wait(session->cow_lock); ++ free_pages_nr = mali_mem_os_free(&mem_bkend->cow_mem.pages, mem_bkend->cow_mem.count, MALI_TRUE); ++ _mali_osk_mutex_signal(session->cow_lock); + -+ counter_index++; -+ _mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name, -+ sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_%s", -+ mali_name, _mali_special_counter_descriptions[counter_id - FIRST_SPECIAL_COUNTER]); ++ free_pages_nr += mali_mem_block_free_list(&mem_bkend->cow_mem.pages); + -+ global_mali_profiling_counters[counter_index].counter_id = counter_id; ++ MALI_DEBUG_ASSERT(list_empty(&mem_bkend->cow_mem.pages)); ++ } else { ++ free_pages_nr = mali_mem_swap_release(mem_bkend, is_mali_mapped); + } + -+ /* Now set up the mem counter entries*/ -+ for (counter_id = FIRST_MEM_COUNTER; counter_id <= LAST_MEM_COUNTER; counter_id++) { -+ -+ counter_index++; -+ _mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name, -+ sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_%s", -+ mali_name, _mali_mem_counter_descriptions[counter_id - FIRST_MEM_COUNTER]); -+ -+ global_mali_profiling_counters[counter_index].counter_id = counter_id; -+ } + -+ MALI_DEBUG_ASSERT((counter_index + 1) == num_global_mali_profiling_counters); ++ MALI_DEBUG_PRINT(4, ("COW Mem free : allocated size = 0x%x, free size = 0x%x\n", mem_bkend->cow_mem.count * _MALI_OSK_MALI_PAGE_SIZE, ++ free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE)); + -+ return MALI_TRUE; ++ mem_bkend->cow_mem.count = 0; ++ return free_pages_nr; +} + -+void _mali_profiling_notification_mem_counter(struct mali_session_data *session, u32 counter_id, u32 key, int enable) ++ ++/* Dst node could os node or swap node. */ ++void _mali_mem_cow_copy_page(mali_page_node *src_node, mali_page_node *dst_node) +{ ++ void *dst, *src; ++ struct page *dst_page; ++ dma_addr_t dma_addr; + -+ MALI_DEBUG_ASSERT_POINTER(session); ++ MALI_DEBUG_ASSERT(src_node != NULL); ++ MALI_DEBUG_ASSERT(dst_node != NULL); ++ MALI_DEBUG_ASSERT(dst_node->type == MALI_PAGE_NODE_OS ++ || dst_node->type == MALI_PAGE_NODE_SWAP); + -+ if (NULL != session) { -+ _mali_osk_notification_t *notification; -+ _mali_osk_notification_queue_t *queue; ++ if (dst_node->type == MALI_PAGE_NODE_OS) { ++ dst_page = dst_node->page; ++ } else { ++ dst_page = dst_node->swap_it->page; ++ } + -+ queue = session->ioctl_queue; -+ MALI_DEBUG_ASSERT(NULL != queue); ++ dma_unmap_page(&mali_platform_device->dev, _mali_page_node_get_dma_addr(dst_node), ++ _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); + -+ notification = _mali_osk_notification_create(_MALI_NOTIFICATION_ANNOTATE_PROFILING_MEM_COUNTER, -+ sizeof(_mali_uk_annotate_profiling_mem_counter_s)); ++ /* map it , and copy the content*/ ++ dst = kmap_atomic(dst_page); + -+ if (NULL != notification) { -+ _mali_uk_annotate_profiling_mem_counter_s *data = notification->result_buffer; -+ data->counter_id = counter_id; -+ data->key = key; -+ data->enable = enable; ++ if (src_node->type == MALI_PAGE_NODE_OS || ++ src_node->type == MALI_PAGE_NODE_SWAP) { ++ struct page *src_page; + -+ _mali_osk_notification_queue_send(queue, notification); ++ if (src_node->type == MALI_PAGE_NODE_OS) { ++ src_page = src_node->page; + } else { -+ MALI_PRINT_ERROR(("Failed to create notification object!\n")); ++ src_page = src_node->swap_it->page; + } -+ } else { -+ MALI_PRINT_ERROR(("Failed to find the right session!\n")); -+ } -+} -+ -+void _mali_profiling_notification_enable(struct mali_session_data *session, u32 sampling_rate, int enable) -+{ -+ MALI_DEBUG_ASSERT_POINTER(session); -+ -+ if (NULL != session) { -+ _mali_osk_notification_t *notification; -+ _mali_osk_notification_queue_t *queue; -+ -+ queue = session->ioctl_queue; -+ MALI_DEBUG_ASSERT(NULL != queue); + -+ notification = _mali_osk_notification_create(_MALI_NOTIFICATION_ANNOTATE_PROFILING_ENABLE, -+ sizeof(_mali_uk_annotate_profiling_enable_s)); ++ /* Clear and invaliate cache */ ++ /* In ARM architecture, speculative read may pull stale data into L1 cache ++ * for kernel linear mapping page table. DMA_BIDIRECTIONAL could ++ * invalidate the L1 cache so that following read get the latest data ++ */ ++ dma_unmap_page(&mali_platform_device->dev, _mali_page_node_get_dma_addr(src_node), ++ _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); + -+ if (NULL != notification) { -+ _mali_uk_annotate_profiling_enable_s *data = notification->result_buffer; -+ data->sampling_rate = sampling_rate; -+ data->enable = enable; ++ src = kmap_atomic(src_page); ++ memcpy(dst, src , _MALI_OSK_MALI_PAGE_SIZE); ++ kunmap_atomic(src); ++ dma_addr = dma_map_page(&mali_platform_device->dev, src_page, ++ 0, _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); + -+ _mali_osk_notification_queue_send(queue, notification); -+ } else { -+ MALI_PRINT_ERROR(("Failed to create notification object!\n")); ++ if (src_node->type == MALI_PAGE_NODE_SWAP) { ++ src_node->swap_it->dma_addr = dma_addr; + } -+ } else { -+ MALI_PRINT_ERROR(("Failed to find the right session!\n")); ++ } else if (src_node->type == MALI_PAGE_NODE_BLOCK) { ++ /* ++ * use ioremap to map src for BLOCK memory ++ */ ++ src = ioremap(_mali_page_node_get_dma_addr(src_node), _MALI_OSK_MALI_PAGE_SIZE); ++ memcpy(dst, src , _MALI_OSK_MALI_PAGE_SIZE); ++ iounmap(src); ++ } ++ kunmap_atomic(dst); ++ dma_addr = dma_map_page(&mali_platform_device->dev, dst_page, ++ 0, _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); ++ ++ if (dst_node->type == MALI_PAGE_NODE_SWAP) { ++ dst_node->swap_it->dma_addr = dma_addr; + } +} + + -+_mali_osk_errcode_t _mali_osk_profiling_init(mali_bool auto_start) ++/* ++* allocate page on demand when CPU access it, ++* THis used in page fault handler ++*/ ++_mali_osk_errcode_t mali_mem_cow_allocate_on_demand(mali_mem_backend *mem_bkend, u32 offset_page) +{ -+ int i; -+ mali_profiling_stream *new_mali_profiling_stream = NULL; -+ mali_profiling_stream_list *new_mali_profiling_stream_list = NULL; -+ if (MALI_TRUE == auto_start) { -+ mali_set_user_setting(_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, MALI_TRUE); -+ } ++ struct page *new_page = NULL; ++ struct mali_page_node *new_node = NULL; ++ int i = 0; ++ struct mali_page_node *m_page, *found_node = NULL; ++ struct mali_session_data *session = NULL; ++ mali_mem_cow *cow = &mem_bkend->cow_mem; ++ MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type); ++ MALI_DEBUG_ASSERT(offset_page < mem_bkend->size / _MALI_OSK_MALI_PAGE_SIZE); ++ MALI_DEBUG_PRINT(4, ("mali_mem_cow_allocate_on_demand !, offset_page =0x%x\n", offset_page)); + -+ /*Init the global_mali_stream_list*/ -+ MALI_DEBUG_ASSERT(NULL == global_mali_stream_list); -+ new_mali_profiling_stream_list = (mali_profiling_stream_list *)kmalloc(sizeof(mali_profiling_stream_list), GFP_KERNEL); ++ /* allocate new page here */ ++ new_page = mali_mem_cow_alloc_page(); ++ if (!new_page) ++ return _MALI_OSK_ERR_NOMEM; + -+ if (NULL == new_mali_profiling_stream_list) { ++ new_node = _mali_page_node_allocate(MALI_PAGE_NODE_OS); ++ if (!new_node) { ++ __free_page(new_page); + return _MALI_OSK_ERR_NOMEM; + } + -+ spin_lock_init(&new_mali_profiling_stream_list->spin_lock); -+ INIT_LIST_HEAD(&new_mali_profiling_stream_list->free_list); -+ INIT_LIST_HEAD(&new_mali_profiling_stream_list->queue_list); -+ -+ spin_lock_init(&mali_activity_lock); -+ mali_activity_cores_num = 0; -+ -+ for (i = 0; i < MALI_PROFILING_STREAM_BUFFER_NUM; i++) { -+ new_mali_profiling_stream = (mali_profiling_stream *)kmalloc(sizeof(mali_profiling_stream), GFP_KERNEL); -+ if (NULL == new_mali_profiling_stream) { -+ _mali_profiling_stream_list_destory(new_mali_profiling_stream_list); -+ return _MALI_OSK_ERR_NOMEM; ++ /* find the page in backend*/ ++ list_for_each_entry(m_page, &cow->pages, list) { ++ if (i == offset_page) { ++ found_node = m_page; ++ break; + } ++ i++; ++ } ++ MALI_DEBUG_ASSERT(found_node); ++ if (NULL == found_node) { ++ __free_page(new_page); ++ kfree(new_node); ++ return _MALI_OSK_ERR_ITEM_NOT_FOUND; ++ } + -+ INIT_LIST_HEAD(&new_mali_profiling_stream->list); -+ new_mali_profiling_stream->used_size = 0; -+ list_add_tail(&new_mali_profiling_stream->list, &new_mali_profiling_stream_list->free_list); ++ _mali_page_node_add_page(new_node, new_page); + -+ } ++ /* Copy the src page's content to new page */ ++ _mali_mem_cow_copy_page(found_node, new_node); + -+ _mali_osk_atomic_init(&stream_fd_if_used, 0); -+ init_waitqueue_head(&stream_fd_wait_queue); ++ MALI_DEBUG_ASSERT_POINTER(mem_bkend->mali_allocation); ++ session = mem_bkend->mali_allocation->session; ++ MALI_DEBUG_ASSERT_POINTER(session); ++ if (1 != _mali_page_node_get_ref_count(found_node)) { ++ atomic_add(1, &session->mali_mem_allocated_pages); ++ if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) { ++ session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE; ++ } ++ mem_bkend->cow_mem.change_pages_nr++; ++ } + -+ hrtimer_init(&profiling_sampling_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ _mali_osk_mutex_wait(session->cow_lock); ++ if (_mali_mem_put_page_node(found_node)) { ++ __free_page(new_page); ++ kfree(new_node); ++ _mali_osk_mutex_signal(session->cow_lock); ++ return _MALI_OSK_ERR_NOMEM; ++ } ++ _mali_osk_mutex_signal(session->cow_lock); + -+ profiling_sampling_timer.function = _mali_profiling_sampling_counters; ++ list_replace(&found_node->list, &new_node->list); + -+ global_mali_stream_list = new_mali_profiling_stream_list; ++ kfree(found_node); + ++ /* map to GPU side*/ ++ _mali_osk_mutex_wait(session->memory_lock); ++ mali_mem_cow_mali_map(mem_bkend, offset_page * _MALI_OSK_MALI_PAGE_SIZE, _MALI_OSK_MALI_PAGE_SIZE); ++ _mali_osk_mutex_signal(session->memory_lock); + return _MALI_OSK_ERR_OK; +} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_cow.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_cow.h +new file mode 100644 +index 000000000..ea1a0bb56 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_cow.h +@@ -0,0 +1,48 @@ ++/* ++ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+void _mali_osk_profiling_term(void) -+{ -+ if (0 != profiling_sample_rate) { -+ hrtimer_cancel(&profiling_sampling_timer); -+ profiling_sample_rate = 0; -+ } -+ _mali_osk_atomic_term(&stream_fd_if_used); ++#ifndef __MALI_MEMORY_COW_H__ ++#define __MALI_MEMORY_COW_H__ + -+ if (NULL != global_mali_profiling_counters) { -+ _mali_osk_free(global_mali_profiling_counters); -+ global_mali_profiling_counters = NULL; -+ num_global_mali_profiling_counters = 0; -+ } ++#include "mali_osk.h" ++#include "mali_session.h" ++#include "mali_memory_types.h" + -+ if (NULL != global_mali_stream_list) { -+ _mali_profiling_stream_list_destory(global_mali_stream_list); -+ global_mali_stream_list = NULL; -+ } ++int mali_mem_cow_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma); ++_mali_osk_errcode_t mali_mem_cow_cpu_map_pages_locked(mali_mem_backend *mem_bkend, ++ struct vm_area_struct *vma, ++ unsigned long vaddr, ++ int num); + -+} ++_mali_osk_errcode_t mali_memory_do_cow(mali_mem_backend *target_bk, ++ u32 target_offset, ++ u32 target_size, ++ mali_mem_backend *backend, ++ u32 range_start, ++ u32 range_size); + -+void _mali_osk_profiling_stop_sampling(u32 pid) -+{ -+ if (pid == current_profiling_pid) { ++_mali_osk_errcode_t mali_memory_cow_modify_range(mali_mem_backend *backend, ++ u32 range_start, ++ u32 range_size); + -+ int i; -+ /* Reset all counter states when closing connection.*/ -+ for (i = 0; i < num_global_mali_profiling_counters; ++i) { -+ _mali_profiling_set_event(global_mali_profiling_counters[i].counter_id, MALI_HW_CORE_NO_COUNTER); -+ global_mali_profiling_counters[i].enabled = 0; -+ global_mali_profiling_counters[i].prev_counter_value = 0; -+ global_mali_profiling_counters[i].current_counter_value = 0; -+ } -+ l2_cache_counter_if_enabled = MALI_FALSE; -+ num_counters_enabled = 0; -+ mem_counters_enabled = 0; -+ _mali_profiling_control(FBDUMP_CONTROL_ENABLE, 0); -+ _mali_profiling_control(SW_COUNTER_ENABLE, 0); -+ /* Delete sampling timer when closing connection. */ -+ if (0 != profiling_sample_rate) { -+ hrtimer_cancel(&profiling_sampling_timer); -+ profiling_sample_rate = 0; -+ } -+ current_profiling_pid = 0; -+ } -+} ++_mali_osk_errcode_t mali_memory_cow_os_memory(mali_mem_backend *target_bk, ++ u32 target_offset, ++ u32 target_size, ++ mali_mem_backend *backend, ++ u32 range_start, ++ u32 range_size); + -+void _mali_osk_profiling_add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4) -+{ -+ /*Record the freq & volt to global_mali_profiling_counters here. */ -+ if (0 != profiling_sample_rate) { -+ u32 channel; -+ u32 state; -+ channel = (event_id >> 16) & 0xFF; -+ state = ((event_id >> 24) & 0xF) << 24; ++void _mali_mem_cow_copy_page(mali_page_node *src_node, mali_page_node *dst_node); + -+ switch (state) { -+ case MALI_PROFILING_EVENT_TYPE_SINGLE: -+ if ((MALI_PROFILING_EVENT_CHANNEL_GPU >> 16) == channel) { -+ u32 reason = (event_id & 0xFFFF); -+ if (MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE == reason) { -+ _mali_osk_profiling_record_global_counters(COUNTER_FREQUENCY, data0); -+ _mali_osk_profiling_record_global_counters(COUNTER_VOLTAGE, data1); -+ } -+ } -+ break; -+ case MALI_PROFILING_EVENT_TYPE_START: -+ if ((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) == channel) { -+ _mali_profiling_sampling_core_activity_switch(COUNTER_VP_ACTIVITY, 0, 1, data1); -+ } else if (channel >= (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) && -+ (MALI_PROFILING_EVENT_CHANNEL_PP7 >> 16) >= channel) { -+ u32 core_id = channel - (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16); -+ _mali_profiling_sampling_core_activity_switch(COUNTER_FP_ACTIVITY, core_id, 1, data1); -+ } -+ break; -+ case MALI_PROFILING_EVENT_TYPE_STOP: -+ if ((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) == channel) { -+ _mali_profiling_sampling_core_activity_switch(COUNTER_VP_ACTIVITY, 0, 0, 0); -+ } else if (channel >= (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) && -+ (MALI_PROFILING_EVENT_CHANNEL_PP7 >> 16) >= channel) { -+ u32 core_id = channel - (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16); -+ _mali_profiling_sampling_core_activity_switch(COUNTER_FP_ACTIVITY, core_id, 0, 0); -+ } -+ break; -+ default: -+ break; -+ } -+ } -+ trace_mali_timeline_event(event_id, data0, data1, data2, data3, data4); -+} ++int mali_mem_cow_mali_map(mali_mem_backend *mem_bkend, u32 range_start, u32 range_size); ++u32 mali_mem_cow_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped); ++_mali_osk_errcode_t mali_mem_cow_allocate_on_demand(mali_mem_backend *mem_bkend, u32 offset_page); ++#endif + -+void _mali_osk_profiling_report_sw_counters(u32 *counters) -+{ -+ trace_mali_sw_counters(_mali_osk_get_pid(), _mali_osk_get_tid(), NULL, counters); -+} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_defer_bind.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_defer_bind.c +new file mode 100644 +index 000000000..9924f58c2 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_defer_bind.c +@@ -0,0 +1,262 @@ ++/* ++ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_ARM ++#include ++#endif ++#include + -+void _mali_osk_profiling_record_global_counters(int counter_id, u32 value) ++#include "mali_memory.h" ++#include "mali_kernel_common.h" ++#include "mali_uk_types.h" ++#include "mali_osk.h" ++#include "mali_kernel_linux.h" ++#include "mali_memory_defer_bind.h" ++#include "mali_executor.h" ++#include "mali_osk.h" ++#include "mali_scheduler.h" ++#include "mali_gp_job.h" ++ ++mali_defer_bind_manager *mali_dmem_man = NULL; ++ ++static u32 mali_dmem_get_gp_varying_size(struct mali_gp_job *gp_job) +{ -+ if (NULL != global_mali_profiling_counters) { -+ int i ; -+ for (i = 0; i < num_global_mali_profiling_counters; i++) { -+ if (counter_id == global_mali_profiling_counters[i].counter_id && global_mali_profiling_counters[i].enabled) { -+ global_mali_profiling_counters[i].current_counter_value = value; -+ break; -+ } -+ } -+ } ++ return gp_job->required_varying_memsize / _MALI_OSK_MALI_PAGE_SIZE; +} + -+_mali_osk_errcode_t _mali_ukk_profiling_add_event(_mali_uk_profiling_add_event_s *args) ++_mali_osk_errcode_t mali_mem_defer_bind_manager_init(void) +{ -+ /* Always add process and thread identificator in the first two data elements for events from user space */ -+ _mali_osk_profiling_add_event(args->event_id, _mali_osk_get_pid(), _mali_osk_get_tid(), args->data[2], args->data[3], args->data[4]); ++ mali_dmem_man = _mali_osk_calloc(1, sizeof(struct mali_defer_bind_manager)); ++ if (!mali_dmem_man) ++ return _MALI_OSK_ERR_NOMEM; ++ ++ atomic_set(&mali_dmem_man->num_used_pages, 0); ++ atomic_set(&mali_dmem_man->num_dmem, 0); + + return _MALI_OSK_ERR_OK; +} + -+_mali_osk_errcode_t _mali_ukk_sw_counters_report(_mali_uk_sw_counters_report_s *args) -+{ -+ u32 *counters = (u32 *)(uintptr_t)args->counters; -+ -+ _mali_osk_profiling_report_sw_counters(counters); + -+ if (NULL != global_mali_profiling_counters) { -+ int i; -+ for (i = 0; i < MALI_PROFILING_SW_COUNTERS_NUM; i ++) { -+ if (global_mali_profiling_counters[first_sw_counter_index + i].enabled) { -+ global_mali_profiling_counters[first_sw_counter_index + i].current_counter_value = *(counters + i); -+ } -+ } ++void mali_mem_defer_bind_manager_destory(void) ++{ ++ if (mali_dmem_man) { ++ MALI_DEBUG_ASSERT(0 == atomic_read(&mali_dmem_man->num_dmem)); ++ kfree(mali_dmem_man); + } -+ -+ return _MALI_OSK_ERR_OK; ++ mali_dmem_man = NULL; +} + -+_mali_osk_errcode_t _mali_ukk_profiling_stream_fd_get(_mali_uk_profiling_stream_fd_get_s *args) -+{ -+ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; -+ MALI_DEBUG_ASSERT_POINTER(session); + -+ if (1 == _mali_osk_atomic_inc_return(&stream_fd_if_used)) { ++/*allocate pages from OS memory*/ ++_mali_osk_errcode_t mali_mem_defer_alloc_mem(u32 require, struct mali_session_data *session, mali_defer_mem_block *dblock) ++{ ++ int retval = 0; ++ u32 num_pages = require; ++ mali_mem_os_mem os_mem; + -+ s32 fd = anon_inode_getfd("[mali_profiling_stream]", &mali_profiling_stream_fops, -+ session, -+ O_RDONLY | O_CLOEXEC); ++ retval = mali_mem_os_alloc_pages(&os_mem, num_pages * _MALI_OSK_MALI_PAGE_SIZE); + -+ args->stream_fd = fd; -+ if (0 > fd) { -+ _mali_osk_atomic_dec(&stream_fd_if_used); -+ return _MALI_OSK_ERR_FAULT; ++ /* add to free pages list */ ++ if (0 == retval) { ++ MALI_DEBUG_PRINT(4, ("mali_mem_defer_alloc_mem ,,*** pages allocate = 0x%x \n", num_pages)); ++ list_splice(&os_mem.pages, &dblock->free_pages); ++ atomic_add(os_mem.count, &dblock->num_free_pages); ++ atomic_add(os_mem.count, &session->mali_mem_allocated_pages); ++ if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) { ++ session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE; + } -+ args->stream_fd = fd; -+ } else { -+ _mali_osk_atomic_dec(&stream_fd_if_used); -+ args->stream_fd = -1; -+ return _MALI_OSK_ERR_BUSY; -+ } -+ -+ return _MALI_OSK_ERR_OK; ++ return _MALI_OSK_ERR_OK; ++ } else ++ return _MALI_OSK_ERR_FAULT; +} + -+_mali_osk_errcode_t _mali_ukk_profiling_control_set(_mali_uk_profiling_control_set_s *args) ++_mali_osk_errcode_t mali_mem_prepare_mem_for_job(struct mali_gp_job *next_gp_job, mali_defer_mem_block *dblock) +{ -+ u32 control_packet_size; -+ u32 output_buffer_size; ++ u32 require_page; + -+ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; -+ MALI_DEBUG_ASSERT_POINTER(session); -+ -+ if (NULL == global_mali_profiling_counters && MALI_FALSE == _mali_profiling_global_counters_init()) { -+ MALI_PRINT_ERROR(("Failed to create global_mali_profiling_counters.\n")); ++ if (!next_gp_job) + return _MALI_OSK_ERR_FAULT; -+ } -+ -+ control_packet_size = args->control_packet_size; -+ output_buffer_size = args->response_packet_size; -+ -+ if (0 != control_packet_size) { -+ u8 control_type; -+ u8 *control_packet_data; -+ u8 *response_packet_data; -+ u32 version_length = sizeof(utgard_setup_version) - 1; -+ -+ control_packet_data = (u8 *)(uintptr_t)args->control_packet_data; -+ MALI_DEBUG_ASSERT_POINTER(control_packet_data); -+ response_packet_data = (u8 *)(uintptr_t)args->response_packet_data; -+ MALI_DEBUG_ASSERT_POINTER(response_packet_data); -+ -+ /*Decide if need to ignore Utgard setup version.*/ -+ if (control_packet_size >= version_length) { -+ if (0 == memcmp(control_packet_data, utgard_setup_version, version_length)) { -+ if (control_packet_size == version_length) { -+ args->response_packet_size = 0; -+ return _MALI_OSK_ERR_OK; -+ } else { -+ control_packet_data += version_length; -+ control_packet_size -= version_length; -+ } -+ } -+ } -+ -+ current_profiling_pid = _mali_osk_get_pid(); -+ -+ control_type = control_packet_data[0]; -+ switch (control_type) { -+ case PACKET_HEADER_COUNTERS_REQUEST: { -+ int i; + -+ if (PACKET_HEADER_SIZE > control_packet_size || -+ control_packet_size != _mali_profiling_get_packet_size(control_packet_data + 1)) { -+ MALI_PRINT_ERROR(("Wrong control packet size, type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size)); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ -+ /* Send supported counters */ -+ if (PACKET_HEADER_SIZE > output_buffer_size) -+ return _MALI_OSK_ERR_FAULT; -+ -+ *response_packet_data = PACKET_HEADER_COUNTERS_ACK; -+ args->response_packet_size = PACKET_HEADER_SIZE; -+ -+ for (i = 0; i < num_global_mali_profiling_counters; ++i) { -+ u32 name_size = strlen(global_mali_profiling_counters[i].counter_name); -+ -+ if ((args->response_packet_size + name_size + 1) > output_buffer_size) { -+ MALI_PRINT_ERROR(("Response packet data is too large..\n")); -+ return _MALI_OSK_ERR_FAULT; -+ } ++ require_page = mali_dmem_get_gp_varying_size(next_gp_job); + -+ memcpy(response_packet_data + args->response_packet_size, -+ global_mali_profiling_counters[i].counter_name, name_size + 1); -+ -+ args->response_packet_size += (name_size + 1); ++ MALI_DEBUG_PRINT(4, ("mali_mem_defer_prepare_mem_work, require alloc page 0x%x\n", ++ require_page)); ++ /* allocate more pages from OS */ ++ if (_MALI_OSK_ERR_OK != mali_mem_defer_alloc_mem(require_page, next_gp_job->session, dblock)) { ++ MALI_DEBUG_PRINT(1, ("ERROR##mali_mem_defer_prepare_mem_work, allocate page failed!!")); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+ if (global_mali_profiling_counters[i].counter_id == COUNTER_VP_ACTIVITY) { -+ args->response_packet_size += _mali_profiling_pack_int(response_packet_data, -+ output_buffer_size, args->response_packet_size, (s32)1); -+ } else if (global_mali_profiling_counters[i].counter_id == COUNTER_FP_ACTIVITY) { -+ args->response_packet_size += _mali_profiling_pack_int(response_packet_data, -+ output_buffer_size, args->response_packet_size, (s32)mali_pp_get_glob_num_pp_cores()); -+ } else { -+ args->response_packet_size += _mali_profiling_pack_int(response_packet_data, -+ output_buffer_size, args->response_packet_size, (s32) - 1); -+ } -+ } ++ next_gp_job->bind_flag = MALI_DEFER_BIND_MEMORY_PREPARED; + -+ _mali_profiling_set_packet_size(response_packet_data + 1, args->response_packet_size); -+ break; -+ } ++ return _MALI_OSK_ERR_OK; ++} + -+ case PACKET_HEADER_COUNTERS_ENABLE: { -+ int i; -+ u32 request_pos = PACKET_HEADER_SIZE; -+ mali_bool sw_counter_if_enabled = MALI_FALSE; + -+ if (PACKET_HEADER_SIZE > control_packet_size || -+ control_packet_size != _mali_profiling_get_packet_size(control_packet_data + 1)) { -+ MALI_PRINT_ERROR(("Wrong control packet size , type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size)); -+ return _MALI_OSK_ERR_FAULT; -+ } ++/* do preparetion for allocation before defer bind */ ++_mali_osk_errcode_t mali_mem_defer_bind_allocation_prepare(mali_mem_allocation *alloc, struct list_head *list, u32 *required_varying_memsize) ++{ ++ mali_mem_backend *mem_bkend = NULL; ++ struct mali_backend_bind_list *bk_list = _mali_osk_calloc(1, sizeof(struct mali_backend_bind_list)); ++ if (NULL == bk_list) ++ return _MALI_OSK_ERR_FAULT; + -+ /* Init all counter states before enable requested counters.*/ -+ for (i = 0; i < num_global_mali_profiling_counters; ++i) { -+ _mali_profiling_set_event(global_mali_profiling_counters[i].counter_id, MALI_HW_CORE_NO_COUNTER); -+ global_mali_profiling_counters[i].enabled = 0; -+ global_mali_profiling_counters[i].prev_counter_value = 0; -+ global_mali_profiling_counters[i].current_counter_value = 0; ++ INIT_LIST_HEAD(&bk_list->node); ++ /* Get backend memory */ ++ mutex_lock(&mali_idr_mutex); ++ if (!(mem_bkend = idr_find(&mali_backend_idr, alloc->backend_handle))) { ++ MALI_DEBUG_PRINT(1, ("Can't find memory backend in defer bind!\n")); ++ mutex_unlock(&mali_idr_mutex); ++ _mali_osk_free(bk_list); ++ return _MALI_OSK_ERR_FAULT; ++ } ++ mutex_unlock(&mali_idr_mutex); + -+ if (global_mali_profiling_counters[i].counter_id >= FIRST_MEM_COUNTER && -+ global_mali_profiling_counters[i].counter_id <= LAST_MEM_COUNTER) { -+ _mali_profiling_notification_mem_counter(session, global_mali_profiling_counters[i].counter_id, 0, 0); -+ } -+ } ++ /* If the mem backend has already been bound, no need to bind again.*/ ++ if (mem_bkend->os_mem.count > 0) { ++ _mali_osk_free(bk_list); ++ return _MALI_OSK_ERR_OK; ++ } + -+ l2_cache_counter_if_enabled = MALI_FALSE; -+ num_counters_enabled = 0; -+ mem_counters_enabled = 0; -+ _mali_profiling_control(FBDUMP_CONTROL_ENABLE, 0); -+ _mali_profiling_control(SW_COUNTER_ENABLE, 0); -+ _mali_profiling_notification_enable(session, 0, 0); ++ MALI_DEBUG_PRINT(4, ("bind_allocation_prepare:: allocation =%x vaddr=0x%x!\n", alloc, alloc->mali_vma_node.vm_node.start)); + -+ /* Enable requested counters */ -+ while (request_pos < control_packet_size) { -+ u32 begin = request_pos; -+ u32 event; -+ u32 key; ++ INIT_LIST_HEAD(&mem_bkend->os_mem.pages); + -+ /* Check the counter name which should be ended with null */ -+ while (request_pos < control_packet_size && control_packet_data[request_pos] != '\0') { -+ ++request_pos; -+ } ++ bk_list->bkend = mem_bkend; ++ bk_list->vaddr = alloc->mali_vma_node.vm_node.start; ++ bk_list->session = alloc->session; ++ bk_list->page_num = mem_bkend->size / _MALI_OSK_MALI_PAGE_SIZE; ++ *required_varying_memsize += mem_bkend->size; ++ MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS); + -+ if (request_pos >= control_packet_size) -+ return _MALI_OSK_ERR_FAULT; ++ /* add to job to do list */ ++ list_add(&bk_list->node, list); + -+ ++request_pos; -+ event = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size); -+ key = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size); ++ return _MALI_OSK_ERR_OK; ++} + -+ for (i = 0; i < num_global_mali_profiling_counters; ++i) { -+ u32 name_size = strlen((char *)(control_packet_data + begin)); + -+ if (strncmp(global_mali_profiling_counters[i].counter_name, (char *)(control_packet_data + begin), name_size) == 0) { -+ if (!sw_counter_if_enabled && (FIRST_SW_COUNTER <= global_mali_profiling_counters[i].counter_id -+ && global_mali_profiling_counters[i].counter_id <= LAST_SW_COUNTER)) { -+ sw_counter_if_enabled = MALI_TRUE; -+ _mali_profiling_control(SW_COUNTER_ENABLE, 1); -+ } + -+ if (COUNTER_FILMSTRIP == global_mali_profiling_counters[i].counter_id) { -+ _mali_profiling_control(FBDUMP_CONTROL_ENABLE, 1); -+ _mali_profiling_control(FBDUMP_CONTROL_RATE, event & 0xff); -+ _mali_profiling_control(FBDUMP_CONTROL_RESIZE_FACTOR, (event >> 8) & 0xff); -+ } ++/* bind phyiscal memory to allocation ++This function will be called in IRQ handler*/ ++static _mali_osk_errcode_t mali_mem_defer_bind_allocation(struct mali_backend_bind_list *bk_node, ++ struct list_head *pages) ++{ ++ struct mali_session_data *session = bk_node->session; ++ mali_mem_backend *mem_bkend = bk_node->bkend; ++ MALI_DEBUG_PRINT(4, ("mali_mem_defer_bind_allocation, bind bkend = %x page num=0x%x vaddr=%x session=%x\n", mem_bkend, bk_node->page_num, bk_node->vaddr, session)); + -+ if (global_mali_profiling_counters[i].counter_id >= FIRST_MEM_COUNTER && -+ global_mali_profiling_counters[i].counter_id <= LAST_MEM_COUNTER) { -+ _mali_profiling_notification_mem_counter(session, global_mali_profiling_counters[i].counter_id, -+ key, 1); -+ mem_counters_enabled++; -+ } ++ MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS); ++ list_splice(pages, &mem_bkend->os_mem.pages); ++ mem_bkend->os_mem.count = bk_node->page_num; + -+ global_mali_profiling_counters[i].counter_event = event; -+ global_mali_profiling_counters[i].key = key; -+ global_mali_profiling_counters[i].enabled = 1; ++ if (mem_bkend->type == MALI_MEM_OS) { ++ mali_mem_os_mali_map(&mem_bkend->os_mem, session, bk_node->vaddr, 0, ++ mem_bkend->os_mem.count, MALI_MMU_FLAGS_DEFAULT); ++ } ++ smp_wmb(); ++ bk_node->flag = MALI_DEFER_BIND_MEMORY_BINDED; ++ mem_bkend->flags &= ~MALI_MEM_BACKEND_FLAG_NOT_BINDED; ++ mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_BINDED; ++ return _MALI_OSK_ERR_OK; ++} + -+ _mali_profiling_set_event(global_mali_profiling_counters[i].counter_id, -+ global_mali_profiling_counters[i].counter_event); -+ num_counters_enabled++; -+ break; -+ } -+ } + -+ if (i == num_global_mali_profiling_counters) { -+ MALI_PRINT_ERROR(("Counter name does not match for type %u.\n", control_type)); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ } ++static struct list_head *mali_mem_defer_get_free_page_list(u32 count, struct list_head *pages, mali_defer_mem_block *dblock) ++{ ++ int i = 0; ++ struct mali_page_node *m_page, *m_tmp; + -+ if (PACKET_HEADER_SIZE <= output_buffer_size) { -+ *response_packet_data = PACKET_HEADER_ACK; -+ _mali_profiling_set_packet_size(response_packet_data + 1, PACKET_HEADER_SIZE); -+ args->response_packet_size = PACKET_HEADER_SIZE; ++ if (atomic_read(&dblock->num_free_pages) < count) { ++ return NULL; ++ } else { ++ list_for_each_entry_safe(m_page, m_tmp, &dblock->free_pages, list) { ++ if (i < count) { ++ list_move_tail(&m_page->list, pages); + } else { -+ return _MALI_OSK_ERR_FAULT; ++ break; + } -+ -+ break; ++ i++; + } ++ MALI_DEBUG_ASSERT(i == count); ++ atomic_sub(count, &dblock->num_free_pages); ++ return pages; ++ } ++} + -+ case PACKET_HEADER_START_CAPTURE_VALUE: { -+ u32 live_rate; -+ u32 request_pos = PACKET_HEADER_SIZE; -+ -+ if (PACKET_HEADER_SIZE > control_packet_size || -+ control_packet_size != _mali_profiling_get_packet_size(control_packet_data + 1)) { -+ MALI_PRINT_ERROR(("Wrong control packet size , type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size)); -+ return _MALI_OSK_ERR_FAULT; -+ } -+ -+ /* Read samping rate in nanoseconds and live rate, start capture.*/ -+ profiling_sample_rate = _mali_profiling_read_packet_int(control_packet_data, -+ &request_pos, control_packet_size); -+ -+ live_rate = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size); + -+ if (PACKET_HEADER_SIZE <= output_buffer_size) { -+ *response_packet_data = PACKET_HEADER_ACK; -+ _mali_profiling_set_packet_size(response_packet_data + 1, PACKET_HEADER_SIZE); -+ args->response_packet_size = PACKET_HEADER_SIZE; -+ } else { -+ return _MALI_OSK_ERR_FAULT; -+ } ++/* called in job start IOCTL to bind physical memory for each allocations ++@ bk_list backend list to do defer bind ++@ pages page list to do this bind ++@ count number of pages ++*/ ++_mali_osk_errcode_t mali_mem_defer_bind(struct mali_gp_job *gp, ++ struct mali_defer_mem_block *dmem_block) ++{ ++ struct mali_defer_mem *dmem = NULL; ++ struct mali_backend_bind_list *bkn, *bkn_tmp; ++ LIST_HEAD(pages); + -+ if (0 != num_counters_enabled && 0 != profiling_sample_rate) { -+ _mali_profiling_global_stream_list_free(); -+ if (mem_counters_enabled > 0) { -+ _mali_profiling_notification_enable(session, profiling_sample_rate, 1); -+ } -+ hrtimer_start(&profiling_sampling_timer, -+ ktime_set(profiling_sample_rate / 1000000000, profiling_sample_rate % 1000000000), -+ HRTIMER_MODE_REL_PINNED); -+ } ++ if (gp->required_varying_memsize != (atomic_read(&dmem_block->num_free_pages) * _MALI_OSK_MALI_PAGE_SIZE)) { ++ MALI_DEBUG_PRINT_ERROR(("#BIND: The memsize of varying buffer not match to the pagesize of the dmem_block!!## \n")); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ break; -+ } -+ default: -+ MALI_PRINT_ERROR(("Unsupported profiling packet header type %u.\n", control_type)); -+ args->response_packet_size = 0; -+ return _MALI_OSK_ERR_FAULT; -+ } ++ MALI_DEBUG_PRINT(4, ("#BIND: GP job=%x## \n", gp)); ++ dmem = (mali_defer_mem *)_mali_osk_calloc(1, sizeof(struct mali_defer_mem)); ++ if (dmem) { ++ INIT_LIST_HEAD(&dmem->node); ++ gp->dmem = dmem; + } else { -+ _mali_osk_profiling_stop_sampling(current_profiling_pid); -+ _mali_profiling_notification_enable(session, 0, 0); ++ return _MALI_OSK_ERR_NOMEM; + } + -+ return _MALI_OSK_ERR_OK; -+} -+ -+/** -+ * Called by gator.ko to set HW counters -+ * -+ * @param counter_id The counter ID. -+ * @param event_id Event ID that the counter should count (HW counter value from TRM). -+ * -+ * @return 1 on success, 0 on failure. -+ */ -+int _mali_profiling_set_event(u32 counter_id, s32 event_id) -+{ -+ if (COUNTER_VP_0_C0 == counter_id) { -+ mali_gp_job_set_gp_counter_src0(event_id); -+ } else if (COUNTER_VP_0_C1 == counter_id) { -+ mali_gp_job_set_gp_counter_src1(event_id); -+ } else if (COUNTER_FP_0_C0 <= counter_id && COUNTER_FP_7_C1 >= counter_id) { -+ /* -+ * Two compatibility notes for this function: -+ * -+ * 1) Previously the DDK allowed per core counters. -+ * -+ * This did not make much sense on Mali-450 with the "virtual PP core" concept, -+ * so this option was removed, and only the same pair of HW counters was allowed on all cores, -+ * beginning with r3p2 release. -+ * -+ * Starting with r4p0, it is now possible to set different HW counters for the different sub jobs. -+ * This should be almost the same, since sub job 0 is designed to run on core 0, -+ * sub job 1 on core 1, and so on. -+ * -+ * The scheduling of PP sub jobs is not predictable, and this often led to situations where core 0 ran 2 -+ * sub jobs, while for instance core 1 ran zero. Having the counters set per sub job would thus increase -+ * the predictability of the returned data (as you would be guaranteed data for all the selected HW counters). -+ * -+ * PS: Core scaling needs to be disabled in order to use this reliably (goes for both solutions). -+ * -+ * The framework/#defines with Gator still indicates that the counter is for a particular core, -+ * but this is internally used as a sub job ID instead (no translation needed). -+ * -+ * 2) Global/default vs per sub job counters -+ * -+ * Releases before r3p2 had only per PP core counters. -+ * r3p2 releases had only one set of default/global counters which applied to all PP cores -+ * Starting with r4p0, we have both a set of default/global counters, -+ * and individual counters per sub job (equal to per core). -+ * -+ * To keep compatibility with Gator/DS-5/streamline, the following scheme is used: -+ * -+ * r3p2 release; only counters set for core 0 is handled, -+ * this is applied as the default/global set of counters, and will thus affect all cores. -+ * -+ * r4p0 release; counters set for core 0 is applied as both the global/default set of counters, -+ * and counters for sub job 0. -+ * Counters set for core 1-7 is only applied for the corresponding sub job. -+ * -+ * This should allow the DS-5/Streamline GUI to have a simple mode where it only allows setting the -+ * values for core 0, and thus this will be applied to all PP sub jobs/cores. -+ * Advanced mode will also be supported, where individual pairs of HW counters can be selected. -+ * -+ * The GUI will (until it is updated) still refer to cores instead of sub jobs, but this is probably -+ * something we can live with! -+ * -+ * Mali-450 note: Each job is not divided into a deterministic number of sub jobs, as the HW DLBU -+ * automatically distributes the load between whatever number of cores is available at this particular time. -+ * A normal PP job on Mali-450 is thus considered a single (virtual) job, and it will thus only be possible -+ * to use a single pair of HW counters (even if the job ran on multiple PP cores). -+ * In other words, only the global/default pair of PP HW counters will be used for normal Mali-450 jobs. -+ */ -+ u32 sub_job = (counter_id - COUNTER_FP_0_C0) >> 1; -+ u32 counter_src = (counter_id - COUNTER_FP_0_C0) & 1; -+ if (0 == counter_src) { -+ mali_pp_job_set_pp_counter_sub_job_src0(sub_job, event_id); -+ if (0 == sub_job) { -+ mali_pp_job_set_pp_counter_global_src0(event_id); -+ } ++ atomic_add(1, &mali_dmem_man->num_dmem); ++ /* for each bk_list backend, do bind */ ++ list_for_each_entry_safe(bkn, bkn_tmp , &gp->vary_todo, node) { ++ INIT_LIST_HEAD(&pages); ++ if (likely(mali_mem_defer_get_free_page_list(bkn->page_num, &pages, dmem_block))) { ++ list_del(&bkn->node); ++ mali_mem_defer_bind_allocation(bkn, &pages); ++ _mali_osk_free(bkn); + } else { -+ mali_pp_job_set_pp_counter_sub_job_src1(sub_job, event_id); -+ if (0 == sub_job) { -+ mali_pp_job_set_pp_counter_global_src1(event_id); -+ } -+ } -+ } else if (COUNTER_L2_0_C0 <= counter_id && COUNTER_L2_2_C1 >= counter_id) { -+ u32 core_id = (counter_id - COUNTER_L2_0_C0) >> 1; -+ struct mali_l2_cache_core *l2_cache_core = mali_l2_cache_core_get_glob_l2_core(core_id); -+ -+ if (NULL != l2_cache_core) { -+ u32 counter_src = (counter_id - COUNTER_L2_0_C0) & 1; -+ mali_l2_cache_core_set_counter_src(l2_cache_core, -+ counter_src, event_id); -+ l2_cache_counter_if_enabled = MALI_TRUE; ++ /* not enough memory will not happen */ ++ MALI_DEBUG_PRINT_ERROR(("#BIND: NOT enough memory when binded !!## \n")); ++ _mali_osk_free(gp->dmem); ++ return _MALI_OSK_ERR_NOMEM; + } -+ } else { -+ return 0; /* Failure, unknown event */ + } + -+ return 1; /* success */ -+} -+ -+/** -+ * Called by gator.ko to retrieve the L2 cache counter values for all L2 cache cores. -+ * The L2 cache counters are unique in that they are polled by gator, rather than being -+ * transmitted via the tracepoint mechanism. -+ * -+ * @param values Pointer to a _mali_profiling_l2_counter_values structure where -+ * the counter sources and values will be output -+ * @return 0 if all went well; otherwise, return the mask with the bits set for the powered off cores -+ */ -+u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values) -+{ -+ u32 l2_cores_num = mali_l2_cache_core_get_glob_num_l2_cores(); -+ u32 i; -+ -+ MALI_DEBUG_ASSERT(l2_cores_num <= 3); -+ -+ for (i = 0; i < l2_cores_num; i++) { -+ struct mali_l2_cache_core *l2_cache = mali_l2_cache_core_get_glob_l2_core(i); -+ -+ if (NULL == l2_cache) { -+ continue; -+ } -+ -+ mali_l2_cache_core_get_counter_values(l2_cache, -+ &values->cores[i].source0, -+ &values->cores[i].value0, -+ &values->cores[i].source1, -+ &values->cores[i].value1); ++ if (!list_empty(&gp->vary_todo)) { ++ MALI_DEBUG_PRINT_ERROR(("#BIND: The deferbind backend list isn't empty !!## \n")); ++ _mali_osk_free(gp->dmem); ++ return _MALI_OSK_ERR_FAULT; + } + -+ return 0; -+} -+ -+/** -+ * Called by gator to control the production of profiling information at runtime. -+ */ -+void _mali_profiling_control(u32 action, u32 value) -+{ -+ switch (action) { -+ case FBDUMP_CONTROL_ENABLE: -+ mali_set_user_setting(_MALI_UK_USER_SETTING_COLORBUFFER_CAPTURE_ENABLED, (value == 0 ? MALI_FALSE : MALI_TRUE)); -+ break; -+ case FBDUMP_CONTROL_RATE: -+ mali_set_user_setting(_MALI_UK_USER_SETTING_BUFFER_CAPTURE_N_FRAMES, value); -+ break; -+ case SW_COUNTER_ENABLE: -+ mali_set_user_setting(_MALI_UK_USER_SETTING_SW_COUNTER_ENABLED, value); -+ break; -+ case FBDUMP_CONTROL_RESIZE_FACTOR: -+ mali_set_user_setting(_MALI_UK_USER_SETTING_BUFFER_CAPTURE_RESIZE_FACTOR, value); -+ break; -+ default: -+ break; /* Ignore unimplemented actions */ -+ } -+} ++ dmem->flag = MALI_DEFER_BIND_MEMORY_BINDED; + -+/** -+ * Called by gator to get mali api version. -+ */ -+u32 _mali_profiling_get_api_version(void) -+{ -+ return MALI_PROFILING_API_VERSION; ++ return _MALI_OSK_ERR_OK; +} + -+/** -+* Called by gator to get the data about Mali instance in use: -+* product id, version, number of cores -+*/ -+void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values) ++void mali_mem_defer_dmem_free(struct mali_gp_job *gp) +{ -+ values->mali_product_id = (u32)mali_kernel_core_get_product_id(); -+ values->mali_version_major = mali_kernel_core_get_gpu_major_version(); -+ values->mali_version_minor = mali_kernel_core_get_gpu_minor_version(); -+ values->num_of_l2_cores = mali_l2_cache_core_get_glob_num_l2_cores(); -+ values->num_of_fp_cores = mali_executor_get_num_cores_total(); -+ values->num_of_vp_cores = 1; ++ if (gp->dmem) { ++ atomic_dec(&mali_dmem_man->num_dmem); ++ _mali_osk_free(gp->dmem); ++ } +} + -+ -+EXPORT_SYMBOL(_mali_profiling_set_event); -+EXPORT_SYMBOL(_mali_profiling_get_l2_counters); -+EXPORT_SYMBOL(_mali_profiling_control); -+EXPORT_SYMBOL(_mali_profiling_get_api_version); -+EXPORT_SYMBOL(_mali_profiling_get_mali_version); -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_specific.h b/drivers/gpu/arm/mali400/mali/linux/mali_osk_specific.h +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_defer_bind.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_defer_bind.h new file mode 100644 -index 000000000..af51161f9 +index 000000000..75a709a71 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_specific.h -@@ -0,0 +1,74 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_defer_bind.h +@@ -0,0 +1,64 @@ +/* -+ * Copyright (C) 2010, 2012-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -321399,79 +322434,69 @@ index 000000000..af51161f9 + * A copy of the licence is included with the program, and can also be obtained from Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ ++#ifndef __MALI_MEMORY_DEFER_BIND_H_ ++#define __MALI_MEMORY_DEFER_BIND_H_ + -+/** -+ * @file mali_osk_specific.h -+ * Defines per-OS Kernel level specifics, such as unusual workarounds for -+ * certain OSs. -+ */ -+ -+#ifndef __MALI_OSK_SPECIFIC_H__ -+#define __MALI_OSK_SPECIFIC_H__ + -+#include -+#include -+#include -+#include -+#include -+#include ++#include "mali_osk.h" ++#include "mali_session.h" + ++#include ++#include ++#include ++#include ++#include + -+#include "mali_osk_types.h" -+#include "mali_kernel_linux.h" + -+#define MALI_STATIC_INLINE static inline -+#define MALI_NON_STATIC_INLINE inline ++#include "mali_memory_types.h" ++#include "mali_memory_os_alloc.h" ++#include "mali_uk_types.h" + -+typedef struct dma_pool *mali_dma_pool; ++struct mali_gp_job; + -+typedef u32 mali_dma_addr; ++typedef struct mali_defer_mem { ++ struct list_head node; /*dlist node in bind manager */ ++ u32 flag; ++} mali_defer_mem; + -+#if MALI_ENABLE_CPU_CYCLES -+/* Reads out the clock cycle performance counter of the current cpu. -+ It is useful for cost-free (2 cycle) measuring of the time spent -+ in a code path. Sample before and after, the diff number of cycles. -+ When the CPU is idle it will not increase this clock counter. -+ It means that the counter is accurate if only spin-locks are used, -+ but mutexes may lead to too low values since the cpu might "idle" -+ waiting for the mutex to become available. -+ The clock source is configured on the CPU during mali module load, -+ but will not give useful output after a CPU has been power cycled. -+ It is therefore important to configure the system to not turn of -+ the cpu cores when using this functionallity.*/ -+static inline unsigned int mali_get_cpu_cyclecount(void) -+{ -+ unsigned int value; -+ /* Reading the CCNT Register - CPU clock counter */ -+ asm volatile("MRC p15, 0, %0, c9, c13, 0\t\n": "=r"(value)); -+ return value; -+} + -+void mali_init_cpu_time_counters(int reset, int enable_divide_by_64); -+#endif ++typedef struct mali_defer_mem_block { ++ struct list_head free_pages; /* page pool */ ++ atomic_t num_free_pages; ++} mali_defer_mem_block; + ++/* varying memory list need to bind */ ++typedef struct mali_backend_bind_list { ++ struct list_head node; ++ struct mali_mem_backend *bkend; ++ u32 vaddr; ++ u32 page_num; ++ struct mali_session_data *session; ++ u32 flag; ++} mali_backend_bind_lists; + -+MALI_STATIC_INLINE u32 _mali_osk_copy_from_user(void *to, void *from, u32 n) -+{ -+ return (u32)copy_from_user(to, from, (unsigned long)n); -+} + -+MALI_STATIC_INLINE mali_bool _mali_osk_in_atomic(void) -+{ -+ return in_atomic(); -+} ++typedef struct mali_defer_bind_manager { ++ atomic_t num_used_pages; ++ atomic_t num_dmem; ++} mali_defer_bind_manager; + -+#define _mali_osk_put_user(x, ptr) put_user(x, ptr) ++_mali_osk_errcode_t mali_mem_defer_bind_manager_init(void); ++void mali_mem_defer_bind_manager_destory(void); ++_mali_osk_errcode_t mali_mem_defer_bind(struct mali_gp_job *gp, struct mali_defer_mem_block *dmem_block); ++_mali_osk_errcode_t mali_mem_defer_bind_allocation_prepare(mali_mem_allocation *alloc, struct list_head *list, u32 *required_varying_memsize); ++_mali_osk_errcode_t mali_mem_prepare_mem_for_job(struct mali_gp_job *next_gp_job, mali_defer_mem_block *dblock); ++void mali_mem_defer_dmem_free(struct mali_gp_job *gp); + -+#endif /* __MALI_OSK_SPECIFIC_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_time.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_time.c ++#endif +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_dma_buf.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_dma_buf.c new file mode 100644 -index 000000000..d295e712a +index 000000000..1f4565127 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_time.c -@@ -0,0 +1,59 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_dma_buf.c +@@ -0,0 +1,369 @@ +/* -+ * Copyright (C) 2010, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -321480,475 +322505,373 @@ index 000000000..d295e712a + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+/** -+ * @file mali_osk_time.c -+ * Implementation of the OS abstraction layer for the kernel device driver -+ */ ++#include /* file system operations */ ++#include /* user space access */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include + ++#include "mali_ukk.h" +#include "mali_osk.h" -+#include -+#include -+#include -+ -+mali_bool _mali_osk_time_after_eq(unsigned long ticka, unsigned long tickb) -+{ -+ return time_after_eq(ticka, tickb) ? -+ MALI_TRUE : MALI_FALSE; -+} -+ -+unsigned long _mali_osk_time_mstoticks(u32 ms) -+{ -+ return msecs_to_jiffies(ms); -+} -+ -+u32 _mali_osk_time_tickstoms(unsigned long ticks) -+{ -+ return jiffies_to_msecs(ticks); -+} -+ -+unsigned long _mali_osk_time_tickcount(void) -+{ -+ return jiffies; -+} -+ -+void _mali_osk_time_ubusydelay(u32 usecs) -+{ -+ udelay(usecs); -+} ++#include "mali_kernel_common.h" ++#include "mali_session.h" ++#include "mali_kernel_linux.h" + -+u64 _mali_osk_time_get_ns(void) -+{ -+ struct timespec64 tsval; -+ ktime_get_real_ts64(&tsval); -+ return (u64)timespec64_to_ns(&tsval); -+} ++#include "mali_memory.h" ++#include "mali_memory_dma_buf.h" ++#include "mali_memory_virtual.h" ++#include "mali_pp_job.h" + -+u64 _mali_osk_boot_time_get_ns(void) -+{ -+ struct timespec64 tsval; -+ ktime_get_boottime_ts64(&tsval); -+ return (u64)timespec64_to_ns(&tsval); -+} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_timers.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_timers.c -new file mode 100644 -index 000000000..d01c11482 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_timers.c -@@ -0,0 +1,76 @@ +/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ * Map DMA buf attachment \a mem into \a session at virtual address \a virt. + */ ++static int mali_dma_buf_map(mali_mem_backend *mem_backend) ++{ ++ mali_mem_allocation *alloc; ++ struct mali_dma_buf_attachment *mem; ++ struct mali_session_data *session; ++ struct mali_page_directory *pagedir; ++ _mali_osk_errcode_t err; ++ struct scatterlist *sg; ++ u32 virt, flags; ++ int i; + -+/** -+ * @file mali_osk_timers.c -+ * Implementation of the OS abstraction layer for the kernel device driver -+ */ ++ MALI_DEBUG_ASSERT_POINTER(mem_backend); + -+#include -+#include -+#include "mali_osk.h" -+#include "mali_kernel_common.h" ++ alloc = mem_backend->mali_allocation; ++ MALI_DEBUG_ASSERT_POINTER(alloc); + -+struct _mali_osk_timer_t_struct { -+ struct timer_list timer; -+}; ++ mem = mem_backend->dma_buf.attachment; ++ MALI_DEBUG_ASSERT_POINTER(mem); + -+typedef void (*timer_timeout_function_t)(unsigned long); ++ session = alloc->session; ++ MALI_DEBUG_ASSERT_POINTER(session); ++ MALI_DEBUG_ASSERT(mem->session == session); + -+_mali_osk_timer_t *_mali_osk_timer_init(_mali_osk_timer_callback_t callback) -+{ -+ _mali_osk_timer_t *t = (_mali_osk_timer_t *)kmalloc(sizeof(_mali_osk_timer_t), GFP_KERNEL); -+ if (NULL != t) -+ timer_setup(&t->timer, -+ (void (*)(struct timer_list *))callback, 0); -+ return t; -+} ++ virt = alloc->mali_vma_node.vm_node.start; ++ flags = alloc->flags; + -+void _mali_osk_timer_add(_mali_osk_timer_t *tim, unsigned long ticks_to_expire) -+{ -+ MALI_DEBUG_ASSERT_POINTER(tim); -+ tim->timer.expires = jiffies + ticks_to_expire; -+ add_timer(&(tim->timer)); -+} ++ mali_session_memory_lock(session); ++ mem->map_ref++; + -+void _mali_osk_timer_mod(_mali_osk_timer_t *tim, unsigned long ticks_to_expire) -+{ -+ MALI_DEBUG_ASSERT_POINTER(tim); -+ mod_timer(&(tim->timer), jiffies + ticks_to_expire); -+} ++ MALI_DEBUG_PRINT(5, ("Mali DMA-buf: map attachment %p, new map_ref = %d\n", mem, mem->map_ref)); + -+void _mali_osk_timer_del(_mali_osk_timer_t *tim) -+{ -+ MALI_DEBUG_ASSERT_POINTER(tim); -+ del_timer_sync(&(tim->timer)); -+} ++ if (1 == mem->map_ref) { + -+void _mali_osk_timer_del_async(_mali_osk_timer_t *tim) -+{ -+ MALI_DEBUG_ASSERT_POINTER(tim); -+ del_timer(&(tim->timer)); -+} ++ /* First reference taken, so we need to map the dma buf */ ++ MALI_DEBUG_ASSERT(!mem->is_mapped); + -+mali_bool _mali_osk_timer_pending(_mali_osk_timer_t *tim) -+{ -+ MALI_DEBUG_ASSERT_POINTER(tim); -+ return 1 == timer_pending(&(tim->timer)); -+} ++ mem->sgt = dma_buf_map_attachment(mem->attachment, DMA_BIDIRECTIONAL); ++ if (IS_ERR_OR_NULL(mem->sgt)) { ++ MALI_DEBUG_PRINT_ERROR(("Failed to map dma-buf attachment\n")); ++ mem->map_ref--; ++ mali_session_memory_unlock(session); ++ return -EFAULT; ++ } + -+void _mali_osk_timer_setcallback(_mali_osk_timer_t *tim, _mali_osk_timer_callback_t callback, void *data) -+{ -+ MALI_DEBUG_ASSERT_POINTER(tim); -+} ++ err = mali_mem_mali_map_prepare(alloc); ++ if (_MALI_OSK_ERR_OK != err) { ++ MALI_DEBUG_PRINT(1, ("Mapping of DMA memory failed\n")); ++ mem->map_ref--; ++ mali_session_memory_unlock(session); ++ return -ENOMEM; ++ } + -+void _mali_osk_timer_term(_mali_osk_timer_t *tim) -+{ -+ MALI_DEBUG_ASSERT_POINTER(tim); -+ kfree(tim); -+} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_wait_queue.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_wait_queue.c -new file mode 100644 -index 000000000..fa12abd3f ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_wait_queue.c -@@ -0,0 +1,78 @@ -+/* -+ * Copyright (C) 2012-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ pagedir = mali_session_get_page_directory(session); ++ MALI_DEBUG_ASSERT_POINTER(pagedir); + -+/** -+ * @file mali_osk_wait_queue.c -+ * Implemenation of the OS abstraction layer for the kernel device driver -+ */ ++ for_each_sg(mem->sgt->sgl, sg, mem->sgt->nents, i) { ++ u32 size = sg_dma_len(sg); ++ dma_addr_t phys = sg_dma_address(sg); + -+#include -+#include -+#include ++ /* sg must be page aligned. */ ++ MALI_DEBUG_ASSERT(0 == size % MALI_MMU_PAGE_SIZE); ++ MALI_DEBUG_ASSERT(0 == (phys & ~(uintptr_t)0xFFFFFFFF)); + -+#include "mali_osk.h" -+#include "mali_kernel_common.h" ++ mali_mmu_pagedir_update(pagedir, virt, phys, size, MALI_MMU_FLAGS_DEFAULT); + -+struct _mali_osk_wait_queue_t_struct { -+ wait_queue_head_t wait_queue; -+}; ++ virt += size; ++ } + -+_mali_osk_wait_queue_t *_mali_osk_wait_queue_init(void) -+{ -+ _mali_osk_wait_queue_t *ret = NULL; ++ if (flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) { ++ u32 guard_phys; ++ MALI_DEBUG_PRINT(7, ("Mapping in extra guard page\n")); + -+ ret = kmalloc(sizeof(_mali_osk_wait_queue_t), GFP_KERNEL); ++ guard_phys = sg_dma_address(mem->sgt->sgl); ++ mali_mmu_pagedir_update(pagedir, virt, guard_phys, MALI_MMU_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT); ++ } + -+ if (NULL == ret) { -+ return ret; ++ mem->is_mapped = MALI_TRUE; ++ mali_session_memory_unlock(session); ++ /* Wake up any thread waiting for buffer to become mapped */ ++ wake_up_all(&mem->wait_queue); ++ } else { ++ MALI_DEBUG_ASSERT(mem->is_mapped); ++ mali_session_memory_unlock(session); + } + -+ init_waitqueue_head(&ret->wait_queue); -+ MALI_DEBUG_ASSERT(!waitqueue_active(&ret->wait_queue)); -+ -+ return ret; -+} -+ -+void _mali_osk_wait_queue_wait_event(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data) -+{ -+ MALI_DEBUG_ASSERT_POINTER(queue); -+ MALI_DEBUG_PRINT(6, ("Adding to wait queue %p\n", queue)); -+ wait_event(queue->wait_queue, condition(data)); -+} -+ -+void _mali_osk_wait_queue_wait_event_timeout(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data, u32 timeout) -+{ -+ MALI_DEBUG_ASSERT_POINTER(queue); -+ MALI_DEBUG_PRINT(6, ("Adding to wait queue %p\n", queue)); -+ wait_event_timeout(queue->wait_queue, condition(data), _mali_osk_time_mstoticks(timeout)); ++ return 0; +} + -+void _mali_osk_wait_queue_wake_up(_mali_osk_wait_queue_t *queue) ++static void mali_dma_buf_unmap(mali_mem_allocation *alloc, struct mali_dma_buf_attachment *mem) +{ -+ MALI_DEBUG_ASSERT_POINTER(queue); -+ -+ /* if queue is empty, don't attempt to wake up its elements */ -+ if (!waitqueue_active(&queue->wait_queue)) return; ++ MALI_DEBUG_ASSERT_POINTER(alloc); ++ MALI_DEBUG_ASSERT_POINTER(mem); ++ MALI_DEBUG_ASSERT_POINTER(mem->attachment); ++ MALI_DEBUG_ASSERT_POINTER(mem->buf); ++ MALI_DEBUG_ASSERT_POINTER(alloc->session); + -+ MALI_DEBUG_PRINT(6, ("Waking up elements in wait queue %p ....\n", queue)); ++ mali_session_memory_lock(alloc->session); ++ mem->map_ref--; + -+ wake_up_all(&queue->wait_queue); ++ MALI_DEBUG_PRINT(5, ("Mali DMA-buf: unmap attachment %p, new map_ref = %d\n", mem, mem->map_ref)); + -+ MALI_DEBUG_PRINT(6, ("... elements in wait queue %p woken up\n", queue)); ++ if (0 == mem->map_ref) { ++ dma_buf_unmap_attachment(mem->attachment, mem->sgt, DMA_BIDIRECTIONAL); ++ if (MALI_TRUE == mem->is_mapped) { ++ mali_mem_mali_map_free(alloc->session, alloc->psize, alloc->mali_vma_node.vm_node.start, ++ alloc->flags); ++ } ++ mem->is_mapped = MALI_FALSE; ++ } ++ mali_session_memory_unlock(alloc->session); ++ /* Wake up any thread waiting for buffer to become unmapped */ ++ wake_up_all(&mem->wait_queue); +} + -+void _mali_osk_wait_queue_term(_mali_osk_wait_queue_t *queue) ++#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) ++int mali_dma_buf_map_job(struct mali_pp_job *job) +{ -+ /* Parameter validation */ -+ MALI_DEBUG_ASSERT_POINTER(queue); -+ -+ /* Linux requires no explicit termination of wait queues */ -+ kfree(queue); -+} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_wq.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_wq.c -new file mode 100644 -index 000000000..d5e258a83 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_wq.c -@@ -0,0 +1,240 @@ -+/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+/** -+ * @file mali_osk_wq.c -+ * Implementation of the OS abstraction layer for the kernel device driver -+ */ -+ -+#include /* For memory allocation */ -+#include -+#include -+#include -+ -+#include "mali_osk.h" -+#include "mali_kernel_common.h" -+#include "mali_kernel_license.h" -+#include "mali_kernel_linux.h" ++ struct mali_dma_buf_attachment *mem; ++ _mali_osk_errcode_t err; ++ int i; ++ int ret = 0; ++ u32 num_memory_cookies; ++ struct mali_session_data *session; ++ struct mali_vma_node *mali_vma_node = NULL; ++ mali_mem_allocation *mali_alloc = NULL; ++ mali_mem_backend *mem_bkend = NULL; + -+typedef struct _mali_osk_wq_work_s { -+ _mali_osk_wq_work_handler_t handler; -+ void *data; -+ mali_bool high_pri; -+ struct work_struct work_handle; -+} mali_osk_wq_work_object_t; ++ MALI_DEBUG_ASSERT_POINTER(job); + -+typedef struct _mali_osk_wq_delayed_work_s { -+ _mali_osk_wq_work_handler_t handler; -+ void *data; -+ struct delayed_work work; -+} mali_osk_wq_delayed_work_object_t; ++ num_memory_cookies = mali_pp_job_num_memory_cookies(job); + -+#if MALI_LICENSE_IS_GPL -+static struct workqueue_struct *mali_wq_normal = NULL; -+static struct workqueue_struct *mali_wq_high = NULL; -+#endif ++ session = mali_pp_job_get_session(job); + -+static void _mali_osk_wq_work_func(struct work_struct *work); ++ MALI_DEBUG_ASSERT_POINTER(session); + -+_mali_osk_errcode_t _mali_osk_wq_init(void) -+{ -+#if MALI_LICENSE_IS_GPL -+ MALI_DEBUG_ASSERT(NULL == mali_wq_normal); -+ MALI_DEBUG_ASSERT(NULL == mali_wq_high); ++ for (i = 0; i < num_memory_cookies; i++) { ++ u32 mali_addr = mali_pp_job_get_memory_cookie(job, i); ++ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0); ++ MALI_DEBUG_ASSERT(NULL != mali_vma_node); ++ mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node); ++ MALI_DEBUG_ASSERT(NULL != mali_alloc); ++ if (MALI_MEM_DMA_BUF != mali_alloc->type) { ++ continue; ++ } + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36) -+ mali_wq_normal = alloc_workqueue("mali", WQ_UNBOUND, 0); -+ mali_wq_high = alloc_workqueue("mali_high_pri", WQ_HIGHPRI | WQ_UNBOUND, 0); -+#else -+ mali_wq_normal = create_workqueue("mali"); -+ mali_wq_high = create_workqueue("mali_high_pri"); -+#endif -+ if (NULL == mali_wq_normal || NULL == mali_wq_high) { -+ MALI_PRINT_ERROR(("Unable to create Mali workqueues\n")); ++ /* Get backend memory & Map on CPU */ ++ mutex_lock(&mali_idr_mutex); ++ mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle); ++ mutex_unlock(&mali_idr_mutex); ++ MALI_DEBUG_ASSERT(NULL != mem_bkend); + -+ if (mali_wq_normal) destroy_workqueue(mali_wq_normal); -+ if (mali_wq_high) destroy_workqueue(mali_wq_high); ++ mem = mem_bkend->dma_buf.attachment; + -+ mali_wq_normal = NULL; -+ mali_wq_high = NULL; ++ MALI_DEBUG_ASSERT_POINTER(mem); ++ MALI_DEBUG_ASSERT(mem->session == mali_pp_job_get_session(job)); + -+ return _MALI_OSK_ERR_FAULT; ++ err = mali_dma_buf_map(mem_bkend); ++ if (0 != err) { ++ MALI_DEBUG_PRINT_ERROR(("Mali DMA-buf: Failed to map dma-buf for mali address %x\n", mali_addr)); ++ ret = -EFAULT; ++ continue; ++ } + } -+#endif /* MALI_LICENSE_IS_GPL */ -+ -+ return _MALI_OSK_ERR_OK; -+} -+ -+void _mali_osk_wq_flush(void) -+{ -+#if MALI_LICENSE_IS_GPL -+ flush_workqueue(mali_wq_high); -+ flush_workqueue(mali_wq_normal); -+#else -+ flush_scheduled_work(); -+#endif ++ return ret; +} + -+void _mali_osk_wq_term(void) ++void mali_dma_buf_unmap_job(struct mali_pp_job *job) +{ -+#if MALI_LICENSE_IS_GPL -+ MALI_DEBUG_ASSERT(NULL != mali_wq_normal); -+ MALI_DEBUG_ASSERT(NULL != mali_wq_high); ++ struct mali_dma_buf_attachment *mem; ++ int i; ++ u32 num_memory_cookies; ++ struct mali_session_data *session; ++ struct mali_vma_node *mali_vma_node = NULL; ++ mali_mem_allocation *mali_alloc = NULL; ++ mali_mem_backend *mem_bkend = NULL; + -+ flush_workqueue(mali_wq_normal); -+ destroy_workqueue(mali_wq_normal); ++ MALI_DEBUG_ASSERT_POINTER(job); + -+ flush_workqueue(mali_wq_high); -+ destroy_workqueue(mali_wq_high); ++ num_memory_cookies = mali_pp_job_num_memory_cookies(job); + -+ mali_wq_normal = NULL; -+ mali_wq_high = NULL; -+#else -+ flush_scheduled_work(); -+#endif -+} ++ session = mali_pp_job_get_session(job); + -+_mali_osk_wq_work_t *_mali_osk_wq_create_work(_mali_osk_wq_work_handler_t handler, void *data) -+{ -+ mali_osk_wq_work_object_t *work = kmalloc(sizeof(mali_osk_wq_work_object_t), GFP_KERNEL); ++ MALI_DEBUG_ASSERT_POINTER(session); + -+ if (NULL == work) return NULL; ++ for (i = 0; i < num_memory_cookies; i++) { ++ u32 mali_addr = mali_pp_job_get_memory_cookie(job, i); ++ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0); ++ MALI_DEBUG_ASSERT(NULL != mali_vma_node); ++ mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node); ++ MALI_DEBUG_ASSERT(NULL != mali_alloc); ++ if (MALI_MEM_DMA_BUF != mali_alloc->type) { ++ continue; ++ } + -+ work->handler = handler; -+ work->data = data; -+ work->high_pri = MALI_FALSE; ++ /* Get backend memory & Map on CPU */ ++ mutex_lock(&mali_idr_mutex); ++ mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle); ++ mutex_unlock(&mali_idr_mutex); ++ MALI_DEBUG_ASSERT(NULL != mem_bkend); + -+ INIT_WORK(&work->work_handle, _mali_osk_wq_work_func); ++ mem = mem_bkend->dma_buf.attachment; + -+ return work; ++ MALI_DEBUG_ASSERT_POINTER(mem); ++ MALI_DEBUG_ASSERT(mem->session == mali_pp_job_get_session(job)); ++ mali_dma_buf_unmap(mem_bkend->mali_allocation, mem); ++ } +} ++#endif /* !CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH */ + -+_mali_osk_wq_work_t *_mali_osk_wq_create_work_high_pri(_mali_osk_wq_work_handler_t handler, void *data) ++int mali_dma_buf_get_size(struct mali_session_data *session, _mali_uk_dma_buf_get_size_s __user *user_arg) +{ -+ mali_osk_wq_work_object_t *work = kmalloc(sizeof(mali_osk_wq_work_object_t), GFP_KERNEL); -+ -+ if (NULL == work) return NULL; -+ -+ work->handler = handler; -+ work->data = data; -+ work->high_pri = MALI_TRUE; ++ _mali_uk_dma_buf_get_size_s args; ++ int fd; ++ struct dma_buf *buf; + -+ INIT_WORK(&work->work_handle, _mali_osk_wq_work_func); ++ /* get call arguments from user space. copy_from_user returns how many bytes which where NOT copied */ ++ if (0 != copy_from_user(&args, (void __user *)user_arg, sizeof(_mali_uk_dma_buf_get_size_s))) { ++ return -EFAULT; ++ } + -+ return work; -+} ++ /* Do DMA-BUF stuff */ ++ fd = args.mem_fd; + -+void _mali_osk_wq_delete_work(_mali_osk_wq_work_t *work) -+{ -+ mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work; -+ _mali_osk_wq_flush(); -+ kfree(work_object); -+} ++ buf = dma_buf_get(fd); ++ if (IS_ERR_OR_NULL(buf)) { ++ MALI_DEBUG_PRINT_ERROR(("Failed to get dma-buf from fd: %d\n", fd)); ++ return PTR_ERR_OR_ZERO(buf); ++ } + -+void _mali_osk_wq_delete_work_nonflush(_mali_osk_wq_work_t *work) -+{ -+ mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work; -+ kfree(work_object); -+} ++ if (0 != put_user(buf->size, &user_arg->size)) { ++ dma_buf_put(buf); ++ return -EFAULT; ++ } + -+void _mali_osk_wq_schedule_work(_mali_osk_wq_work_t *work) -+{ -+ mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work; -+#if MALI_LICENSE_IS_GPL -+ queue_work(mali_wq_normal, &work_object->work_handle); -+#else -+ schedule_work(&work_object->work_handle); -+#endif -+} ++ dma_buf_put(buf); + -+void _mali_osk_wq_schedule_work_high_pri(_mali_osk_wq_work_t *work) -+{ -+ mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work; -+#if MALI_LICENSE_IS_GPL -+ queue_work(mali_wq_high, &work_object->work_handle); -+#else -+ schedule_work(&work_object->work_handle); -+#endif ++ return 0; +} + -+static void _mali_osk_wq_work_func(struct work_struct *work) ++_mali_osk_errcode_t mali_mem_bind_dma_buf(mali_mem_allocation *alloc, ++ mali_mem_backend *mem_backend, ++ int fd, u32 flags) +{ -+ mali_osk_wq_work_object_t *work_object; ++ struct dma_buf *buf; ++ struct mali_dma_buf_attachment *dma_mem; ++ struct mali_session_data *session = alloc->session; + -+ work_object = _MALI_OSK_CONTAINER_OF(work, mali_osk_wq_work_object_t, work_handle); ++ MALI_DEBUG_ASSERT_POINTER(session); ++ MALI_DEBUG_ASSERT_POINTER(mem_backend); ++ MALI_DEBUG_ASSERT_POINTER(alloc); + -+#if MALI_LICENSE_IS_GPL -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) -+ /* We want highest Dynamic priority of the thread so that the Jobs depending -+ ** on this thread could be scheduled in time. Without this, this thread might -+ ** sometimes need to wait for some threads in user mode to finish its round-robin -+ ** time, causing *bubble* in the Mali pipeline. Thanks to the new implementation -+ ** of high-priority workqueue in new kernel, this only happens in older kernel. -+ */ -+ if (MALI_TRUE == work_object->high_pri) { -+ set_user_nice(current, -19); ++ /* get dma buffer */ ++ buf = dma_buf_get(fd); ++ if (IS_ERR_OR_NULL(buf)) { ++ return _MALI_OSK_ERR_FAULT; + } -+#endif -+#endif /* MALI_LICENSE_IS_GPL */ + -+ work_object->handler(work_object->data); -+} ++ /* Currently, mapping of the full buffer are supported. */ ++ if (alloc->psize != buf->size) { ++ goto failed_alloc_mem; ++ } + -+static void _mali_osk_wq_delayed_work_func(struct work_struct *work) -+{ -+ mali_osk_wq_delayed_work_object_t *work_object; ++ dma_mem = _mali_osk_calloc(1, sizeof(struct mali_dma_buf_attachment)); ++ if (NULL == dma_mem) { ++ goto failed_alloc_mem; ++ } + -+ work_object = _MALI_OSK_CONTAINER_OF(work, mali_osk_wq_delayed_work_object_t, work.work); -+ work_object->handler(work_object->data); -+} ++ dma_mem->buf = buf; ++ dma_mem->session = session; ++ dma_mem->map_ref = 0; ++ init_waitqueue_head(&dma_mem->wait_queue); + -+mali_osk_wq_delayed_work_object_t *_mali_osk_wq_delayed_create_work(_mali_osk_wq_work_handler_t handler, void *data) -+{ -+ mali_osk_wq_delayed_work_object_t *work = kmalloc(sizeof(mali_osk_wq_delayed_work_object_t), GFP_KERNEL); ++ dma_mem->attachment = dma_buf_attach(dma_mem->buf, &mali_platform_device->dev); ++ if (NULL == dma_mem->attachment) { ++ goto failed_dma_attach; ++ } + -+ if (NULL == work) return NULL; ++ mem_backend->dma_buf.attachment = dma_mem; + -+ work->handler = handler; -+ work->data = data; ++ alloc->flags |= MALI_MEM_FLAG_DONT_CPU_MAP; ++ if (flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) { ++ alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE; ++ } + -+ INIT_DELAYED_WORK(&work->work, _mali_osk_wq_delayed_work_func); + -+ return work; -+} ++#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) ++ /* Map memory into session's Mali virtual address space. */ ++ if (0 != mali_dma_buf_map(mem_backend)) { ++ goto Failed_dma_map; ++ } ++#endif + -+void _mali_osk_wq_delayed_delete_work_nonflush(_mali_osk_wq_delayed_work_t *work) -+{ -+ mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work; -+ kfree(work_object); -+} ++ return _MALI_OSK_ERR_OK; + -+void _mali_osk_wq_delayed_cancel_work_async(_mali_osk_wq_delayed_work_t *work) -+{ -+ mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work; -+ cancel_delayed_work(&work_object->work); ++#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) ++Failed_dma_map: ++ mali_dma_buf_unmap(alloc, dma_mem); ++#endif ++ /* Wait for buffer to become unmapped */ ++ wait_event(dma_mem->wait_queue, !dma_mem->is_mapped); ++ MALI_DEBUG_ASSERT(!dma_mem->is_mapped); ++ dma_buf_detach(dma_mem->buf, dma_mem->attachment); ++failed_dma_attach: ++ _mali_osk_free(dma_mem); ++failed_alloc_mem: ++ dma_buf_put(buf); ++ return _MALI_OSK_ERR_FAULT; +} + -+void _mali_osk_wq_delayed_cancel_work_sync(_mali_osk_wq_delayed_work_t *work) ++void mali_mem_unbind_dma_buf(mali_mem_backend *mem_backend) +{ -+ mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work; -+ cancel_delayed_work_sync(&work_object->work); -+} ++ struct mali_dma_buf_attachment *mem; ++ MALI_DEBUG_ASSERT_POINTER(mem_backend); ++ MALI_DEBUG_ASSERT(MALI_MEM_DMA_BUF == mem_backend->type); + -+void _mali_osk_wq_delayed_schedule_work(_mali_osk_wq_delayed_work_t *work, u32 delay) -+{ -+ mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work; ++ mem = mem_backend->dma_buf.attachment; ++ MALI_DEBUG_ASSERT_POINTER(mem); ++ MALI_DEBUG_ASSERT_POINTER(mem->attachment); ++ MALI_DEBUG_ASSERT_POINTER(mem->buf); ++ MALI_DEBUG_PRINT(3, ("Mali DMA-buf: release attachment %p\n", mem)); + -+#if MALI_LICENSE_IS_GPL -+ queue_delayed_work(mali_wq_normal, &work_object->work, delay); -+#else -+ schedule_delayed_work(&work_object->work, delay); ++#if defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) ++ MALI_DEBUG_ASSERT_POINTER(mem_backend->mali_allocation); ++ /* We mapped implicitly on attach, so we need to unmap on release */ ++ mali_dma_buf_unmap(mem_backend->mali_allocation, mem); +#endif ++ /* Wait for buffer to become unmapped */ ++ wait_event(mem->wait_queue, !mem->is_mapped); ++ MALI_DEBUG_ASSERT(!mem->is_mapped); ++ ++ dma_buf_detach(mem->buf, mem->attachment); ++ dma_buf_put(mem->buf); + ++ _mali_osk_free(mem); +} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_pmu_power_up_down.c b/drivers/gpu/arm/mali400/mali/linux/mali_pmu_power_up_down.c +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_dma_buf.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_dma_buf.h new file mode 100644 -index 000000000..931d7f07a +index 000000000..a9b287038 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_pmu_power_up_down.c -@@ -0,0 +1,23 @@ -+/** -+ * Copyright (C) 2010, 2012-2014, 2016-2017 ARM Limited. All rights reserved. ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_dma_buf.h +@@ -0,0 +1,53 @@ ++/* ++ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -321957,27 +322880,57 @@ index 000000000..931d7f07a + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+/** -+ * @file mali_pmu_power_up_down.c -+ */ ++#ifndef __MALI_MEMORY_DMA_BUF_H__ ++#define __MALI_MEMORY_DMA_BUF_H__ + -+#include -+#include "mali_executor.h" ++#ifdef __cplusplus ++extern "C" { ++#endif + -+int mali_perf_set_num_pp_cores(unsigned int num_cores) -+{ -+ return mali_executor_set_perf_level(num_cores, MALI_FALSE); ++#include "mali_uk_types.h" ++#include "mali_osk.h" ++#include "mali_memory.h" ++ ++struct mali_pp_job; ++ ++struct mali_dma_buf_attachment; ++struct mali_dma_buf_attachment { ++ struct dma_buf *buf; ++ struct dma_buf_attachment *attachment; ++ struct sg_table *sgt; ++ struct mali_session_data *session; ++ int map_ref; ++ struct mutex map_lock; ++ mali_bool is_mapped; ++ wait_queue_head_t wait_queue; ++}; ++ ++int mali_dma_buf_get_size(struct mali_session_data *session, _mali_uk_dma_buf_get_size_s __user *arg); ++ ++void mali_mem_unbind_dma_buf(mali_mem_backend *mem_backend); ++ ++_mali_osk_errcode_t mali_mem_bind_dma_buf(mali_mem_allocation *alloc, ++ mali_mem_backend *mem_backend, ++ int fd, u32 flags); ++ ++#if !defined(CONFIG_MALI_DMA_BUF_MAP_ON_ATTACH) ++int mali_dma_buf_map_job(struct mali_pp_job *job); ++void mali_dma_buf_unmap_job(struct mali_pp_job *job); ++#endif ++ ++#ifdef __cplusplus +} ++#endif + -+EXPORT_SYMBOL(mali_perf_set_num_pp_cores); -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_profiling_events.h b/drivers/gpu/arm/mali400/mali/linux/mali_profiling_events.h ++#endif /* __MALI_MEMORY_DMA_BUF_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_external.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_external.c new file mode 100644 -index 000000000..4661cac42 +index 000000000..76018b7ab --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_profiling_events.h -@@ -0,0 +1,17 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_external.c +@@ -0,0 +1,89 @@ +/* -+ * Copyright (C) 2012, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -321986,21 +322939,94 @@ index 000000000..4661cac42 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_PROFILING_EVENTS_H__ -+#define __MALI_PROFILING_EVENTS_H__ ++#include "mali_kernel_common.h" ++#include "mali_osk.h" ++#include "mali_ukk.h" ++#include "mali_memory.h" ++#include "mali_mem_validation.h" ++#include "mali_uk_types.h" + -+/* Simple wrapper in order to find the OS specific location of this file */ -+#include ++void mali_mem_unbind_ext_buf(mali_mem_backend *mem_backend) ++{ ++ mali_mem_allocation *alloc; ++ struct mali_session_data *session; ++ MALI_DEBUG_ASSERT_POINTER(mem_backend); ++ alloc = mem_backend->mali_allocation; ++ MALI_DEBUG_ASSERT_POINTER(alloc); ++ MALI_DEBUG_ASSERT(MALI_MEM_EXTERNAL == mem_backend->type); + -+#endif /* __MALI_PROFILING_EVENTS_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_profiling_gator_api.h b/drivers/gpu/arm/mali400/mali/linux/mali_profiling_gator_api.h ++ session = alloc->session; ++ MALI_DEBUG_ASSERT_POINTER(session); ++ mali_session_memory_lock(session); ++ mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start, ++ alloc->flags); ++ mali_session_memory_unlock(session); ++} ++ ++_mali_osk_errcode_t mali_mem_bind_ext_buf(mali_mem_allocation *alloc, ++ mali_mem_backend *mem_backend, ++ u32 phys_addr, ++ u32 flag) ++{ ++ struct mali_session_data *session; ++ _mali_osk_errcode_t err; ++ u32 virt, phys, size; ++ MALI_DEBUG_ASSERT_POINTER(mem_backend); ++ MALI_DEBUG_ASSERT_POINTER(alloc); ++ size = alloc->psize; ++ session = (struct mali_session_data *)(uintptr_t)alloc->session; ++ MALI_CHECK_NON_NULL(session, _MALI_OSK_ERR_INVALID_ARGS); ++ ++ /* check arguments */ ++ /* NULL might be a valid Mali address */ ++ if (!size) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS); ++ ++ /* size must be a multiple of the system page size */ ++ if (size % _MALI_OSK_MALI_PAGE_SIZE) MALI_ERROR(_MALI_OSK_ERR_INVALID_ARGS); ++ ++ /* Validate the mali physical range */ ++ if (_MALI_OSK_ERR_OK != mali_mem_validation_check(phys_addr, size)) { ++ return _MALI_OSK_ERR_FAULT; ++ } ++ ++ if (flag & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) { ++ alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE; ++ } ++ ++ mali_session_memory_lock(session); ++ ++ virt = alloc->mali_vma_node.vm_node.start; ++ phys = phys_addr; ++ ++ err = mali_mem_mali_map_prepare(alloc); ++ if (_MALI_OSK_ERR_OK != err) { ++ mali_session_memory_unlock(session); ++ return _MALI_OSK_ERR_NOMEM; ++ } ++ ++ mali_mmu_pagedir_update(session->page_directory, virt, phys, size, MALI_MMU_FLAGS_DEFAULT); ++ ++ if (alloc->flags & MALI_MEM_FLAG_MALI_GUARD_PAGE) { ++ mali_mmu_pagedir_update(session->page_directory, virt + size, phys, _MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT); ++ } ++ MALI_DEBUG_PRINT(3, ++ ("Requested to map physical memory 0x%x-0x%x into virtual memory 0x%x\n", ++ phys_addr, (phys_addr + size - 1), ++ virt)); ++ mali_session_memory_unlock(session); ++ ++ MALI_SUCCESS; ++} ++ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_external.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_external.h new file mode 100644 -index 000000000..6fdaa427c +index 000000000..2db178d96 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_profiling_gator_api.h -@@ -0,0 +1,17 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_external.h +@@ -0,0 +1,29 @@ ++ +/* -+ * Copyright (C) 2012-2013, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -322009,21 +323035,32 @@ index 000000000..6fdaa427c + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_PROFILING_GATOR_API_H__ -+#define __MALI_PROFILING_GATOR_API_H__ ++#ifndef __MALI_MEMORY_EXTERNAL_H__ ++#define __MALI_MEMORY_EXTERNAL_H__ + -+/* Simple wrapper in order to find the OS specific location of this file */ -+#include ++#ifdef __cplusplus ++extern "C" { ++#endif + -+#endif /* __MALI_PROFILING_GATOR_API_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_profiling_internal.c b/drivers/gpu/arm/mali400/mali/linux/mali_profiling_internal.c ++_mali_osk_errcode_t mali_mem_bind_ext_buf(mali_mem_allocation *alloc, ++ mali_mem_backend *mem_backend, ++ u32 phys_addr, ++ u32 flag); ++void mali_mem_unbind_ext_buf(mali_mem_backend *mem_backend); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_manager.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_manager.c new file mode 100644 -index 000000000..c3a526f0a +index 000000000..6c0a2e9a8 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_profiling_internal.c -@@ -0,0 +1,275 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_manager.c +@@ -0,0 +1,992 @@ +/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -322032,279 +323069,996 @@ index 000000000..c3a526f0a + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#include "mali_kernel_common.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#if defined(CONFIG_DMA_SHARED_BUFFER) ++#include ++#endif ++#include ++ +#include "mali_osk.h" +#include "mali_osk_mali.h" ++#include "mali_kernel_linux.h" ++#include "mali_scheduler.h" ++#include "mali_memory.h" ++#include "mali_memory_os_alloc.h" ++#if defined(CONFIG_DMA_SHARED_BUFFER) ++#include "mali_memory_dma_buf.h" ++#include "mali_memory_secure.h" ++#endif ++#if defined(CONFIG_MALI400_UMP) ++#include "mali_memory_ump.h" ++#endif ++#include "mali_memory_manager.h" ++#include "mali_memory_virtual.h" ++#include "mali_memory_util.h" ++#include "mali_memory_external.h" ++#include "mali_memory_cow.h" ++#include "mali_memory_block_alloc.h" +#include "mali_ukk.h" -+#include "mali_timestamp.h" -+#include "mali_osk_profiling.h" -+#include "mali_user_settings_db.h" -+#include "mali_profiling_internal.h" ++#include "mali_memory_swap_alloc.h" + -+typedef struct mali_profiling_entry { -+ u64 timestamp; -+ u32 event_id; -+ u32 data[5]; -+} mali_profiling_entry; ++/* ++* New memory system interface ++*/ + -+typedef enum mali_profiling_state { -+ MALI_PROFILING_STATE_UNINITIALIZED, -+ MALI_PROFILING_STATE_IDLE, -+ MALI_PROFILING_STATE_RUNNING, -+ MALI_PROFILING_STATE_RETURN, -+} mali_profiling_state; ++/*inti idr for backend memory */ ++struct idr mali_backend_idr; ++struct mutex mali_idr_mutex; + -+static _mali_osk_mutex_t *lock = NULL; -+static mali_profiling_state prof_state = MALI_PROFILING_STATE_UNINITIALIZED; -+static mali_profiling_entry *profile_entries = NULL; -+static _mali_osk_atomic_t profile_insert_index; -+static u32 profile_mask = 0; ++/* init allocation manager */ ++int mali_memory_manager_init(struct mali_allocation_manager *mgr) ++{ ++ /* init Locks */ ++ rwlock_init(&mgr->vm_lock); ++ mutex_init(&mgr->list_mutex); + -+static inline void add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4); ++ /* init link */ ++ INIT_LIST_HEAD(&mgr->head); + -+void probe_mali_timeline_event(void *data, TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1, unsigned -+ int d2, unsigned int d3, unsigned int d4)) ++ /* init RB tree */ ++ mgr->allocation_mgr_rb = RB_ROOT; ++ mgr->mali_allocation_num = 0; ++ return 0; ++} ++ ++/* Deinit allocation manager ++* Do some check for debug ++*/ ++void mali_memory_manager_uninit(struct mali_allocation_manager *mgr) +{ -+ add_event(event_id, d0, d1, d2, d3, d4); ++ /* check RB tree is empty */ ++ MALI_DEBUG_ASSERT(((void *)(mgr->allocation_mgr_rb.rb_node) == (void *)rb_last(&mgr->allocation_mgr_rb))); ++ /* check allocation List */ ++ MALI_DEBUG_ASSERT(list_empty(&mgr->head)); +} + -+_mali_osk_errcode_t _mali_internal_profiling_init(mali_bool auto_start) ++/* Prepare memory descriptor */ ++static mali_mem_allocation *mali_mem_allocation_struct_create(struct mali_session_data *session) +{ -+ profile_entries = NULL; -+ profile_mask = 0; -+ _mali_osk_atomic_init(&profile_insert_index, 0); ++ mali_mem_allocation *mali_allocation; + -+ lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_PROFILING); -+ if (NULL == lock) { -+ return _MALI_OSK_ERR_FAULT; ++ /* Allocate memory */ ++ mali_allocation = (mali_mem_allocation *)kzalloc(sizeof(mali_mem_allocation), GFP_KERNEL); ++ if (NULL == mali_allocation) { ++ MALI_DEBUG_PRINT(1, ("mali_mem_allocation_struct_create: descriptor was NULL\n")); ++ return NULL; + } + -+ prof_state = MALI_PROFILING_STATE_IDLE; ++ MALI_DEBUG_CODE(mali_allocation->magic = MALI_MEM_ALLOCATION_VALID_MAGIC); + -+ if (MALI_TRUE == auto_start) { -+ u32 limit = MALI_PROFILING_MAX_BUFFER_ENTRIES; /* Use maximum buffer size */ ++ /* do init */ ++ mali_allocation->flags = 0; ++ mali_allocation->session = session; + -+ mali_set_user_setting(_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, MALI_TRUE); -+ if (_MALI_OSK_ERR_OK != _mali_internal_profiling_start(&limit)) { -+ return _MALI_OSK_ERR_FAULT; -+ } -+ } ++ INIT_LIST_HEAD(&mali_allocation->list); ++ _mali_osk_atomic_init(&mali_allocation->mem_alloc_refcount, 1); + -+ return _MALI_OSK_ERR_OK; ++ /** ++ *add to session list ++ */ ++ mutex_lock(&session->allocation_mgr.list_mutex); ++ list_add_tail(&mali_allocation->list, &session->allocation_mgr.head); ++ session->allocation_mgr.mali_allocation_num++; ++ mutex_unlock(&session->allocation_mgr.list_mutex); ++ ++ return mali_allocation; +} + -+void _mali_internal_profiling_term(void) ++void mali_mem_allocation_struct_destory(mali_mem_allocation *alloc) +{ -+ u32 count; -+ -+ /* Ensure profiling is stopped */ -+ _mali_internal_profiling_stop(&count); ++ MALI_DEBUG_ASSERT_POINTER(alloc); ++ MALI_DEBUG_ASSERT_POINTER(alloc->session); ++ mutex_lock(&alloc->session->allocation_mgr.list_mutex); ++ list_del(&alloc->list); ++ alloc->session->allocation_mgr.mali_allocation_num--; ++ mutex_unlock(&alloc->session->allocation_mgr.list_mutex); + -+ prof_state = MALI_PROFILING_STATE_UNINITIALIZED; ++ kfree(alloc); ++} + -+ if (NULL != profile_entries) { -+ _mali_osk_vfree(profile_entries); -+ profile_entries = NULL; -+ } ++int mali_mem_backend_struct_create(mali_mem_backend **backend, u32 psize) ++{ ++ mali_mem_backend *mem_backend = NULL; ++ s32 ret = -ENOSPC; ++ s32 index = -1; ++ *backend = (mali_mem_backend *)kzalloc(sizeof(mali_mem_backend), GFP_KERNEL); ++ if (NULL == *backend) { ++ MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_create: backend descriptor was NULL\n")); ++ return -1; ++ } ++ mem_backend = *backend; ++ mem_backend->size = psize; ++ mutex_init(&mem_backend->mutex); ++ INIT_LIST_HEAD(&mem_backend->list); ++ mem_backend->using_count = 0; + -+ if (NULL != lock) { -+ _mali_osk_mutex_term(lock); -+ lock = NULL; ++ ++ /* link backend with id */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) ++again: ++ if (!idr_pre_get(&mali_backend_idr, GFP_KERNEL)) { ++ kfree(mem_backend); ++ return -ENOMEM; ++ } ++ mutex_lock(&mali_idr_mutex); ++ ret = idr_get_new_above(&mali_backend_idr, mem_backend, 1, &index); ++ mutex_unlock(&mali_idr_mutex); ++ ++ if (-ENOSPC == ret) { ++ kfree(mem_backend); ++ return -ENOSPC; ++ } ++ if (-EAGAIN == ret) ++ goto again; ++#else ++ mutex_lock(&mali_idr_mutex); ++ ret = idr_alloc(&mali_backend_idr, mem_backend, 1, MALI_S32_MAX, GFP_KERNEL); ++ mutex_unlock(&mali_idr_mutex); ++ index = ret; ++ if (ret < 0) { ++ MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_create: Can't allocate idr for backend! \n")); ++ kfree(mem_backend); ++ return -ENOSPC; + } ++#endif ++ return index; +} + -+_mali_osk_errcode_t _mali_internal_profiling_start(u32 *limit) ++ ++static void mali_mem_backend_struct_destory(mali_mem_backend **backend, s32 backend_handle) +{ -+ _mali_osk_errcode_t ret; -+ mali_profiling_entry *new_profile_entries; ++ mali_mem_backend *mem_backend = *backend; + -+ _mali_osk_mutex_wait(lock); ++ mutex_lock(&mali_idr_mutex); ++ idr_remove(&mali_backend_idr, backend_handle); ++ mutex_unlock(&mali_idr_mutex); ++ kfree(mem_backend); ++ *backend = NULL; ++} + -+ if (MALI_PROFILING_STATE_RUNNING == prof_state) { -+ _mali_osk_mutex_signal(lock); -+ return _MALI_OSK_ERR_BUSY; ++mali_mem_backend *mali_mem_backend_struct_search(struct mali_session_data *session, u32 mali_address) ++{ ++ struct mali_vma_node *mali_vma_node = NULL; ++ mali_mem_backend *mem_bkend = NULL; ++ mali_mem_allocation *mali_alloc = NULL; ++ MALI_DEBUG_ASSERT_POINTER(session); ++ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_address, 0); ++ if (NULL == mali_vma_node) { ++ MALI_DEBUG_PRINT(1, ("mali_mem_backend_struct_search:vma node was NULL\n")); ++ return NULL; + } ++ mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node); ++ /* Get backend memory & Map on CPU */ ++ mutex_lock(&mali_idr_mutex); ++ mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle); ++ mutex_unlock(&mali_idr_mutex); ++ MALI_DEBUG_ASSERT(NULL != mem_bkend); ++ return mem_bkend; ++} + -+ new_profile_entries = _mali_osk_valloc(*limit * sizeof(mali_profiling_entry)); ++static _mali_osk_errcode_t mali_mem_resize(struct mali_session_data *session, mali_mem_backend *mem_backend, u32 physical_size) ++{ ++ _mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT; ++ int retval = 0; ++ mali_mem_allocation *mali_allocation = NULL; ++ mali_mem_os_mem tmp_os_mem; ++ s32 change_page_count; + -+ if (NULL == new_profile_entries) { -+ _mali_osk_mutex_signal(lock); -+ _mali_osk_vfree(new_profile_entries); -+ return _MALI_OSK_ERR_NOMEM; -+ } ++ MALI_DEBUG_ASSERT_POINTER(session); ++ MALI_DEBUG_ASSERT_POINTER(mem_backend); ++ MALI_DEBUG_PRINT(4, (" mali_mem_resize_memory called! \n")); ++ MALI_DEBUG_ASSERT(0 == physical_size % MALI_MMU_PAGE_SIZE); + -+ if (MALI_PROFILING_MAX_BUFFER_ENTRIES < *limit) { -+ *limit = MALI_PROFILING_MAX_BUFFER_ENTRIES; -+ } ++ mali_allocation = mem_backend->mali_allocation; ++ MALI_DEBUG_ASSERT_POINTER(mali_allocation); + -+ profile_mask = 1; -+ while (profile_mask <= *limit) { -+ profile_mask <<= 1; -+ } -+ profile_mask >>= 1; ++ MALI_DEBUG_ASSERT(MALI_MEM_FLAG_CAN_RESIZE & mali_allocation->flags); ++ MALI_DEBUG_ASSERT(MALI_MEM_OS == mali_allocation->type); + -+ *limit = profile_mask; ++ mutex_lock(&mem_backend->mutex); + -+ profile_mask--; /* turns the power of two into a mask of one less */ ++ /* Do resize*/ ++ if (physical_size > mem_backend->size) { ++ u32 add_size = physical_size - mem_backend->size; + -+ if (MALI_PROFILING_STATE_IDLE != prof_state) { -+ _mali_osk_mutex_signal(lock); -+ _mali_osk_vfree(new_profile_entries); -+ return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */ ++ MALI_DEBUG_ASSERT(0 == add_size % MALI_MMU_PAGE_SIZE); ++ ++ /* Allocate new pages from os mem */ ++ retval = mali_mem_os_alloc_pages(&tmp_os_mem, add_size); ++ ++ if (retval) { ++ if (-ENOMEM == retval) { ++ ret = _MALI_OSK_ERR_NOMEM; ++ } else { ++ ret = _MALI_OSK_ERR_FAULT; ++ } ++ MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory allocation failed !\n")); ++ goto failed_alloc_memory; ++ } ++ ++ MALI_DEBUG_ASSERT(tmp_os_mem.count == add_size / MALI_MMU_PAGE_SIZE); ++ ++ /* Resize the memory of the backend */ ++ ret = mali_mem_os_resize_pages(&tmp_os_mem, &mem_backend->os_mem, 0, tmp_os_mem.count); ++ ++ if (ret) { ++ MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory resizing failed !\n")); ++ goto failed_resize_pages; ++ } ++ ++ /*Resize cpu mapping */ ++ if (NULL != mali_allocation->cpu_mapping.vma) { ++ ret = mali_mem_os_resize_cpu_map_locked(mem_backend, mali_allocation->cpu_mapping.vma, mali_allocation->cpu_mapping.vma->vm_start + mem_backend->size, add_size); ++ if (unlikely(ret != _MALI_OSK_ERR_OK)) { ++ MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: cpu mapping failed !\n")); ++ goto failed_cpu_map; ++ } ++ } ++ ++ /* Resize mali mapping */ ++ _mali_osk_mutex_wait(session->memory_lock); ++ ret = mali_mem_mali_map_resize(mali_allocation, physical_size); ++ ++ if (ret) { ++ MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_resize: mali map resize fail !\n")); ++ goto failed_gpu_map; ++ } ++ ++ ret = mali_mem_os_mali_map(&mem_backend->os_mem, session, mali_allocation->mali_vma_node.vm_node.start, ++ mali_allocation->psize / MALI_MMU_PAGE_SIZE, add_size / MALI_MMU_PAGE_SIZE, mali_allocation->mali_mapping.properties); ++ if (ret) { ++ MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: mali mapping failed !\n")); ++ goto failed_gpu_map; ++ } ++ ++ _mali_osk_mutex_signal(session->memory_lock); ++ } else { ++ u32 dec_size, page_count; ++ u32 vaddr = 0; ++ INIT_LIST_HEAD(&tmp_os_mem.pages); ++ tmp_os_mem.count = 0; ++ ++ dec_size = mem_backend->size - physical_size; ++ MALI_DEBUG_ASSERT(0 == dec_size % MALI_MMU_PAGE_SIZE); ++ ++ page_count = dec_size / MALI_MMU_PAGE_SIZE; ++ vaddr = mali_allocation->mali_vma_node.vm_node.start + physical_size; ++ ++ /* Resize the memory of the backend */ ++ ret = mali_mem_os_resize_pages(&mem_backend->os_mem, &tmp_os_mem, physical_size / MALI_MMU_PAGE_SIZE, page_count); ++ ++ if (ret) { ++ MALI_DEBUG_PRINT(4, ("_mali_ukk_mem_resize: mali map resize failed!\n")); ++ goto failed_resize_pages; ++ } ++ ++ /* Resize mali map */ ++ _mali_osk_mutex_wait(session->memory_lock); ++ mali_mem_mali_map_free(session, dec_size, vaddr, mali_allocation->flags); ++ _mali_osk_mutex_signal(session->memory_lock); ++ ++ /* Zap cpu mapping */ ++ if (0 != mali_allocation->cpu_mapping.addr) { ++ MALI_DEBUG_ASSERT(NULL != mali_allocation->cpu_mapping.vma); ++ zap_vma_ptes(mali_allocation->cpu_mapping.vma, mali_allocation->cpu_mapping.vma->vm_start + physical_size, dec_size); ++ } ++ ++ /* Free those extra pages */ ++ mali_mem_os_free(&tmp_os_mem.pages, tmp_os_mem.count, MALI_FALSE); + } + -+ profile_entries = new_profile_entries; ++ /* Resize memory allocation and memory backend */ ++ change_page_count = (s32)(physical_size - mem_backend->size) / MALI_MMU_PAGE_SIZE; ++ mali_allocation->psize = physical_size; ++ mem_backend->size = physical_size; ++ mutex_unlock(&mem_backend->mutex); + -+ ret = _mali_timestamp_reset(); ++ if (change_page_count > 0) { ++ atomic_add(change_page_count, &session->mali_mem_allocated_pages); ++ if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) { ++ session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE; ++ } + -+ if (_MALI_OSK_ERR_OK == ret) { -+ prof_state = MALI_PROFILING_STATE_RUNNING; + } else { -+ _mali_osk_vfree(profile_entries); -+ profile_entries = NULL; ++ atomic_sub((s32)(-change_page_count), &session->mali_mem_allocated_pages); + } + -+ register_trace_mali_timeline_event(probe_mali_timeline_event, NULL); ++ return _MALI_OSK_ERR_OK; + -+ _mali_osk_mutex_signal(lock); ++failed_gpu_map: ++ _mali_osk_mutex_signal(session->memory_lock); ++failed_cpu_map: ++ if (physical_size > mem_backend->size) { ++ mali_mem_os_resize_pages(&mem_backend->os_mem, &tmp_os_mem, mem_backend->size / MALI_MMU_PAGE_SIZE, ++ (physical_size - mem_backend->size) / MALI_MMU_PAGE_SIZE); ++ } else { ++ mali_mem_os_resize_pages(&tmp_os_mem, &mem_backend->os_mem, 0, tmp_os_mem.count); ++ } ++failed_resize_pages: ++ if (0 != tmp_os_mem.count) ++ mali_mem_os_free(&tmp_os_mem.pages, tmp_os_mem.count, MALI_FALSE); ++failed_alloc_memory: ++ ++ mutex_unlock(&mem_backend->mutex); + return ret; +} + -+static inline void add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4) ++ ++/* Set GPU MMU properties */ ++static void _mali_memory_gpu_map_property_set(u32 *properties, u32 flags) +{ -+ u32 cur_index = (_mali_osk_atomic_inc_return(&profile_insert_index) - 1) & profile_mask; ++ if (_MALI_MEMORY_GPU_READ_ALLOCATE & flags) { ++ *properties = MALI_MMU_FLAGS_FORCE_GP_READ_ALLOCATE; ++ } else { ++ *properties = MALI_MMU_FLAGS_DEFAULT; ++ } ++} + -+ profile_entries[cur_index].timestamp = _mali_timestamp_get(); -+ profile_entries[cur_index].event_id = event_id; -+ profile_entries[cur_index].data[0] = data0; -+ profile_entries[cur_index].data[1] = data1; -+ profile_entries[cur_index].data[2] = data2; -+ profile_entries[cur_index].data[3] = data3; -+ profile_entries[cur_index].data[4] = data4; ++_mali_osk_errcode_t mali_mem_add_mem_size(struct mali_session_data *session, u32 mali_addr, u32 add_size) ++{ ++ mali_mem_backend *mem_backend = NULL; ++ _mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT; ++ mali_mem_allocation *mali_allocation = NULL; ++ u32 new_physical_size; ++ MALI_DEBUG_ASSERT_POINTER(session); ++ MALI_DEBUG_ASSERT(0 == add_size % MALI_MMU_PAGE_SIZE); + -+ /* If event is "leave API function", add current memory usage to the event -+ * as data point 4. This is used in timeline profiling to indicate how -+ * much memory was used when leaving a function. */ -+ if (event_id == (MALI_PROFILING_EVENT_TYPE_SINGLE | MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | MALI_PROFILING_EVENT_REASON_SINGLE_SW_LEAVE_API_FUNC)) { -+ profile_entries[cur_index].data[4] = _mali_ukk_report_memory_usage(); ++ /* Get the memory backend that need to be resize. */ ++ mem_backend = mali_mem_backend_struct_search(session, mali_addr); ++ ++ if (NULL == mem_backend) { ++ MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory backend = NULL!\n")); ++ return ret; + } ++ ++ mali_allocation = mem_backend->mali_allocation; ++ ++ MALI_DEBUG_ASSERT_POINTER(mali_allocation); ++ ++ new_physical_size = add_size + mem_backend->size; ++ ++ if (new_physical_size > (mali_allocation->mali_vma_node.vm_node.size)) ++ return ret; ++ ++ MALI_DEBUG_ASSERT(new_physical_size != mem_backend->size); ++ ++ ret = mali_mem_resize(session, mem_backend, new_physical_size); ++ ++ return ret; +} + -+_mali_osk_errcode_t _mali_internal_profiling_stop(u32 *count) ++/** ++* function@_mali_ukk_mem_allocate - allocate mali memory ++*/ ++_mali_osk_errcode_t _mali_ukk_mem_allocate(_mali_uk_alloc_mem_s *args) +{ -+ _mali_osk_mutex_wait(lock); ++ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; ++ mali_mem_backend *mem_backend = NULL; ++ _mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT; ++ int retval = 0; ++ mali_mem_allocation *mali_allocation = NULL; ++ struct mali_vma_node *mali_vma_node = NULL; + -+ if (MALI_PROFILING_STATE_RUNNING != prof_state) { -+ _mali_osk_mutex_signal(lock); -+ return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */ ++ MALI_DEBUG_PRINT(4, (" _mali_ukk_mem_allocate, vaddr=0x%x, size =0x%x! \n", args->gpu_vaddr, args->psize)); ++ ++ /* Check if the address is allocated ++ */ ++ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, args->gpu_vaddr, 0); ++ ++ if (unlikely(mali_vma_node)) { ++ MALI_DEBUG_PRINT_ERROR(("The mali virtual address has already been used ! \n")); ++ return _MALI_OSK_ERR_FAULT; + } ++ /** ++ *create mali memory allocation ++ */ + -+ /* go into return state (user to retreive events), no more events will be added after this */ -+ prof_state = MALI_PROFILING_STATE_RETURN; ++ mali_allocation = mali_mem_allocation_struct_create(session); + -+ unregister_trace_mali_timeline_event(probe_mali_timeline_event, NULL); ++ if (mali_allocation == NULL) { ++ MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_allocate: Failed to create allocation struct! \n")); ++ return _MALI_OSK_ERR_NOMEM; ++ } ++ mali_allocation->psize = args->psize; ++ mali_allocation->vsize = args->vsize; + -+ _mali_osk_mutex_signal(lock); ++ /* MALI_MEM_OS if need to support mem resize, ++ * or MALI_MEM_BLOCK if have dedicated memory, ++ * or MALI_MEM_OS, ++ * or MALI_MEM_SWAP. ++ */ ++ if (args->flags & _MALI_MEMORY_ALLOCATE_SWAPPABLE) { ++ mali_allocation->type = MALI_MEM_SWAP; ++ } else if (args->flags & _MALI_MEMORY_ALLOCATE_RESIZEABLE) { ++ mali_allocation->type = MALI_MEM_OS; ++ mali_allocation->flags |= MALI_MEM_FLAG_CAN_RESIZE; ++ } else if (args->flags & _MALI_MEMORY_ALLOCATE_SECURE) { ++ mali_allocation->type = MALI_MEM_SECURE; ++ } else if (MALI_TRUE == mali_memory_have_dedicated_memory()) { ++ mali_allocation->type = MALI_MEM_BLOCK; ++ } else { ++ mali_allocation->type = MALI_MEM_OS; ++ } + -+ tracepoint_synchronize_unregister(); ++ /** ++ *add allocation node to RB tree for index ++ */ ++ mali_allocation->mali_vma_node.vm_node.start = args->gpu_vaddr; ++ mali_allocation->mali_vma_node.vm_node.size = args->vsize; + -+ *count = _mali_osk_atomic_read(&profile_insert_index); -+ if (*count > profile_mask) *count = profile_mask; ++ mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node); ++ ++ mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, args->psize); ++ if (mali_allocation->backend_handle < 0) { ++ ret = _MALI_OSK_ERR_NOMEM; ++ MALI_DEBUG_PRINT(1, ("mali_allocation->backend_handle < 0! \n")); ++ goto failed_alloc_backend; ++ } ++ ++ ++ mem_backend->mali_allocation = mali_allocation; ++ mem_backend->type = mali_allocation->type; ++ ++ mali_allocation->mali_mapping.addr = args->gpu_vaddr; ++ ++ /* set gpu mmu propery */ ++ _mali_memory_gpu_map_property_set(&mali_allocation->mali_mapping.properties, args->flags); ++ /* do prepare for MALI mapping */ ++ if (!(args->flags & _MALI_MEMORY_ALLOCATE_NO_BIND_GPU) && mali_allocation->psize > 0) { ++ _mali_osk_mutex_wait(session->memory_lock); ++ ++ ret = mali_mem_mali_map_prepare(mali_allocation); ++ if (0 != ret) { ++ _mali_osk_mutex_signal(session->memory_lock); ++ goto failed_prepare_map; ++ } ++ _mali_osk_mutex_signal(session->memory_lock); ++ } ++ ++ if (mali_allocation->psize == 0) { ++ mem_backend->os_mem.count = 0; ++ INIT_LIST_HEAD(&mem_backend->os_mem.pages); ++ goto done; ++ } ++ ++ if (args->flags & _MALI_MEMORY_ALLOCATE_DEFER_BIND) { ++ mali_allocation->flags |= _MALI_MEMORY_ALLOCATE_DEFER_BIND; ++ mem_backend->flags |= MALI_MEM_BACKEND_FLAG_NOT_BINDED; ++ /* init for defer bind backend*/ ++ mem_backend->os_mem.count = 0; ++ INIT_LIST_HEAD(&mem_backend->os_mem.pages); ++ ++ goto done; ++ } ++ ++ if (likely(mali_allocation->psize > 0)) { ++ ++ if (MALI_MEM_SECURE == mem_backend->type) { ++#if defined(CONFIG_DMA_SHARED_BUFFER) ++ ret = mali_mem_secure_attach_dma_buf(&mem_backend->secure_mem, mem_backend->size, args->secure_shared_fd); ++ if (_MALI_OSK_ERR_OK != ret) { ++ MALI_DEBUG_PRINT(1, ("Failed to attach dma buf for secure memory! \n")); ++ goto failed_alloc_pages; ++ } ++#else ++ ret = _MALI_OSK_ERR_UNSUPPORTED; ++ MALI_DEBUG_PRINT(1, ("DMA not supported for mali secure memory! \n")); ++ goto failed_alloc_pages; ++#endif ++ } else { ++ ++ /** ++ *allocate physical memory ++ */ ++ if (mem_backend->type == MALI_MEM_OS) { ++ retval = mali_mem_os_alloc_pages(&mem_backend->os_mem, mem_backend->size); ++ } else if (mem_backend->type == MALI_MEM_BLOCK) { ++ /* try to allocated from BLOCK memory first, then try OS memory if failed.*/ ++ if (mali_mem_block_alloc(&mem_backend->block_mem, mem_backend->size)) { ++ retval = mali_mem_os_alloc_pages(&mem_backend->os_mem, mem_backend->size); ++ mem_backend->type = MALI_MEM_OS; ++ mali_allocation->type = MALI_MEM_OS; ++ } ++ } else if (MALI_MEM_SWAP == mem_backend->type) { ++ retval = mali_mem_swap_alloc_pages(&mem_backend->swap_mem, mali_allocation->mali_vma_node.vm_node.size, &mem_backend->start_idx); ++ } else { ++ /* ONLY support mem_os type */ ++ MALI_DEBUG_ASSERT(0); ++ } ++ ++ if (retval) { ++ ret = _MALI_OSK_ERR_NOMEM; ++ MALI_DEBUG_PRINT(1, (" can't allocate enough pages! \n")); ++ goto failed_alloc_pages; ++ } ++ } ++ } ++ ++ /** ++ *map to GPU side ++ */ ++ if (!(args->flags & _MALI_MEMORY_ALLOCATE_NO_BIND_GPU) && mali_allocation->psize > 0) { ++ _mali_osk_mutex_wait(session->memory_lock); ++ /* Map on Mali */ + ++ if (mem_backend->type == MALI_MEM_OS) { ++ ret = mali_mem_os_mali_map(&mem_backend->os_mem, session, args->gpu_vaddr, 0, ++ mem_backend->size / MALI_MMU_PAGE_SIZE, mali_allocation->mali_mapping.properties); ++ ++ } else if (mem_backend->type == MALI_MEM_BLOCK) { ++ mali_mem_block_mali_map(&mem_backend->block_mem, session, args->gpu_vaddr, ++ mali_allocation->mali_mapping.properties); ++ } else if (mem_backend->type == MALI_MEM_SWAP) { ++ ret = mali_mem_swap_mali_map(&mem_backend->swap_mem, session, args->gpu_vaddr, ++ mali_allocation->mali_mapping.properties); ++ } else if (mem_backend->type == MALI_MEM_SECURE) { ++#if defined(CONFIG_DMA_SHARED_BUFFER) ++ ret = mali_mem_secure_mali_map(&mem_backend->secure_mem, session, args->gpu_vaddr, mali_allocation->mali_mapping.properties); ++#endif ++ } else { /* unsupport type */ ++ MALI_DEBUG_ASSERT(0); ++ } ++ ++ _mali_osk_mutex_signal(session->memory_lock); ++ } ++done: ++ if (MALI_MEM_OS == mem_backend->type) { ++ atomic_add(mem_backend->os_mem.count, &session->mali_mem_allocated_pages); ++ } else if (MALI_MEM_BLOCK == mem_backend->type) { ++ atomic_add(mem_backend->block_mem.count, &session->mali_mem_allocated_pages); ++ } else if (MALI_MEM_SECURE == mem_backend->type) { ++ atomic_add(mem_backend->secure_mem.count, &session->mali_mem_allocated_pages); ++ } else { ++ MALI_DEBUG_ASSERT(MALI_MEM_SWAP == mem_backend->type); ++ atomic_add(mem_backend->swap_mem.count, &session->mali_mem_allocated_pages); ++ atomic_add(mem_backend->swap_mem.count, &session->mali_mem_array[mem_backend->type]); ++ } ++ ++ if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) { ++ session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE; ++ } + return _MALI_OSK_ERR_OK; ++ ++failed_alloc_pages: ++ mali_mem_mali_map_free(session, mali_allocation->psize, mali_allocation->mali_vma_node.vm_node.start, mali_allocation->flags); ++failed_prepare_map: ++ mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle); ++failed_alloc_backend: ++ mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node); ++ mali_mem_allocation_struct_destory(mali_allocation); ++ ++ return ret; +} + -+u32 _mali_internal_profiling_get_count(void) ++ ++_mali_osk_errcode_t _mali_ukk_mem_free(_mali_uk_free_mem_s *args) +{ -+ u32 retval = 0; ++ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; ++ u32 vaddr = args->gpu_vaddr; ++ mali_mem_allocation *mali_alloc = NULL; ++ struct mali_vma_node *mali_vma_node = NULL; + -+ _mali_osk_mutex_wait(lock); -+ if (MALI_PROFILING_STATE_RETURN == prof_state) { -+ retval = _mali_osk_atomic_read(&profile_insert_index); -+ if (retval > profile_mask) retval = profile_mask; ++ /* find mali allocation structure by vaddress*/ ++ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, vaddr, 0); ++ if (NULL == mali_vma_node) { ++ MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_free: invalid addr: 0x%x\n", vaddr)); ++ return _MALI_OSK_ERR_INVALID_ARGS; + } -+ _mali_osk_mutex_signal(lock); ++ MALI_DEBUG_ASSERT(NULL != mali_vma_node); ++ mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node); + -+ return retval; ++ if (mali_alloc) ++ /* check ref_count */ ++ args->free_pages_nr = mali_allocation_unref(&mali_alloc); ++ ++ return _MALI_OSK_ERR_OK; +} + -+_mali_osk_errcode_t _mali_internal_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5]) ++ ++/** ++* Function _mali_ukk_mem_bind -- bind a external memory to a new GPU address ++* It will allocate a new mem allocation and bind external memory to it. ++* Supported backend type are: ++* _MALI_MEMORY_BIND_BACKEND_UMP ++* _MALI_MEMORY_BIND_BACKEND_DMA_BUF ++* _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY ++* CPU access is not supported yet ++*/ ++_mali_osk_errcode_t _mali_ukk_mem_bind(_mali_uk_bind_mem_s *args) +{ -+ u32 raw_index = _mali_osk_atomic_read(&profile_insert_index); ++ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; ++ mali_mem_backend *mem_backend = NULL; ++ _mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT; ++ mali_mem_allocation *mali_allocation = NULL; ++ MALI_DEBUG_PRINT(5, (" _mali_ukk_mem_bind, vaddr=0x%x, size =0x%x! \n", args->vaddr, args->size)); + -+ _mali_osk_mutex_wait(lock); ++ /** ++ * allocate mali allocation. ++ */ ++ mali_allocation = mali_mem_allocation_struct_create(session); + -+ if (index < profile_mask) { -+ if ((raw_index & ~profile_mask) != 0) { -+ index += raw_index; -+ index &= profile_mask; ++ if (mali_allocation == NULL) { ++ return _MALI_OSK_ERR_NOMEM; ++ } ++ mali_allocation->psize = args->size; ++ mali_allocation->vsize = args->size; ++ mali_allocation->mali_mapping.addr = args->vaddr; ++ ++ /* add allocation node to RB tree for index */ ++ mali_allocation->mali_vma_node.vm_node.start = args->vaddr; ++ mali_allocation->mali_vma_node.vm_node.size = args->size; ++ mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node); ++ ++ /* allocate backend*/ ++ if (mali_allocation->psize > 0) { ++ mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, mali_allocation->psize); ++ if (mali_allocation->backend_handle < 0) { ++ goto Failed_alloc_backend; + } + -+ if (prof_state != MALI_PROFILING_STATE_RETURN) { -+ _mali_osk_mutex_signal(lock); -+ return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */ ++ } else { ++ goto Failed_alloc_backend; ++ } ++ ++ mem_backend->size = mali_allocation->psize; ++ mem_backend->mali_allocation = mali_allocation; ++ ++ switch (args->flags & _MALI_MEMORY_BIND_BACKEND_MASK) { ++ case _MALI_MEMORY_BIND_BACKEND_UMP: ++#if defined(CONFIG_MALI400_UMP) ++ mali_allocation->type = MALI_MEM_UMP; ++ mem_backend->type = MALI_MEM_UMP; ++ ret = mali_mem_bind_ump_buf(mali_allocation, mem_backend, ++ args->mem_union.bind_ump.secure_id, args->mem_union.bind_ump.flags); ++ if (_MALI_OSK_ERR_OK != ret) { ++ MALI_DEBUG_PRINT(1, ("Bind ump buf failed\n")); ++ goto Failed_bind_backend; ++ } ++#else ++ MALI_DEBUG_PRINT(1, ("UMP not supported\n")); ++ goto Failed_bind_backend; ++#endif ++ break; ++ case _MALI_MEMORY_BIND_BACKEND_DMA_BUF: ++#if defined(CONFIG_DMA_SHARED_BUFFER) ++ mali_allocation->type = MALI_MEM_DMA_BUF; ++ mem_backend->type = MALI_MEM_DMA_BUF; ++ ret = mali_mem_bind_dma_buf(mali_allocation, mem_backend, ++ args->mem_union.bind_dma_buf.mem_fd, args->mem_union.bind_dma_buf.flags); ++ if (_MALI_OSK_ERR_OK != ret) { ++ MALI_DEBUG_PRINT(1, ("Bind dma buf failed\n")); ++ goto Failed_bind_backend; + } ++#else ++ MALI_DEBUG_PRINT(1, ("DMA not supported\n")); ++ goto Failed_bind_backend; ++#endif ++ break; ++ case _MALI_MEMORY_BIND_BACKEND_MALI_MEMORY: ++ /* not allowed */ ++ MALI_DEBUG_PRINT_ERROR(("Mali internal memory type not supported !\n")); ++ goto Failed_bind_backend; ++ break; + -+ if (index >= raw_index) { -+ _mali_osk_mutex_signal(lock); -+ return _MALI_OSK_ERR_FAULT; ++ case _MALI_MEMORY_BIND_BACKEND_EXTERNAL_MEMORY: ++ mali_allocation->type = MALI_MEM_EXTERNAL; ++ mem_backend->type = MALI_MEM_EXTERNAL; ++ ret = mali_mem_bind_ext_buf(mali_allocation, mem_backend, args->mem_union.bind_ext_memory.phys_addr, ++ args->mem_union.bind_ext_memory.flags); ++ if (_MALI_OSK_ERR_OK != ret) { ++ MALI_DEBUG_PRINT(1, ("Bind external buf failed\n")); ++ goto Failed_bind_backend; + } ++ break; + -+ *timestamp = profile_entries[index].timestamp; -+ *event_id = profile_entries[index].event_id; -+ data[0] = profile_entries[index].data[0]; -+ data[1] = profile_entries[index].data[1]; -+ data[2] = profile_entries[index].data[2]; -+ data[3] = profile_entries[index].data[3]; -+ data[4] = profile_entries[index].data[4]; ++ case _MALI_MEMORY_BIND_BACKEND_EXT_COW: ++ /* not allowed */ ++ MALI_DEBUG_PRINT_ERROR(("External cow memory type not supported !\n")); ++ goto Failed_bind_backend; ++ break; ++ ++ default: ++ MALI_DEBUG_PRINT_ERROR(("Invalid memory type not supported !\n")); ++ goto Failed_bind_backend; ++ break; ++ } ++ MALI_DEBUG_ASSERT(0 == mem_backend->size % MALI_MMU_PAGE_SIZE); ++ atomic_add(mem_backend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_backend->type]); ++ return _MALI_OSK_ERR_OK; ++ ++Failed_bind_backend: ++ mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle); ++ ++Failed_alloc_backend: ++ mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node); ++ mali_mem_allocation_struct_destory(mali_allocation); ++ ++ MALI_DEBUG_PRINT(1, (" _mali_ukk_mem_bind, return ERROR! \n")); ++ return ret; ++} ++ ++ ++/* ++* Function _mali_ukk_mem_unbind -- unbind a external memory to a new GPU address ++* This function unbind the backend memory and free the allocation ++* no ref_count for this type of memory ++*/ ++_mali_osk_errcode_t _mali_ukk_mem_unbind(_mali_uk_unbind_mem_s *args) ++{ ++ /**/ ++ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; ++ mali_mem_allocation *mali_allocation = NULL; ++ struct mali_vma_node *mali_vma_node = NULL; ++ u32 mali_addr = args->vaddr; ++ MALI_DEBUG_PRINT(5, (" _mali_ukk_mem_unbind, vaddr=0x%x! \n", args->vaddr)); ++ ++ /* find the allocation by vaddr */ ++ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0); ++ if (likely(mali_vma_node)) { ++ MALI_DEBUG_ASSERT(mali_addr == mali_vma_node->vm_node.start); ++ mali_allocation = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node); + } else { -+ _mali_osk_mutex_signal(lock); -+ return _MALI_OSK_ERR_FAULT; ++ MALI_DEBUG_ASSERT(NULL != mali_vma_node); ++ return _MALI_OSK_ERR_INVALID_ARGS; + } + -+ _mali_osk_mutex_signal(lock); ++ if (NULL != mali_allocation) ++ /* check ref_count */ ++ mali_allocation_unref(&mali_allocation); + return _MALI_OSK_ERR_OK; +} + -+_mali_osk_errcode_t _mali_internal_profiling_clear(void) ++/* ++* Function _mali_ukk_mem_cow -- COW for an allocation ++* This function allocate new pages for a range (range, range+size) of allocation ++* And Map it(keep use the not in range pages from target allocation ) to an GPU vaddr ++*/ ++_mali_osk_errcode_t _mali_ukk_mem_cow(_mali_uk_cow_mem_s *args) +{ -+ _mali_osk_mutex_wait(lock); ++ _mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT; ++ mali_mem_backend *target_backend = NULL; ++ mali_mem_backend *mem_backend = NULL; ++ struct mali_vma_node *mali_vma_node = NULL; ++ mali_mem_allocation *mali_allocation = NULL; + -+ if (MALI_PROFILING_STATE_RETURN != prof_state) { -+ _mali_osk_mutex_signal(lock); -+ return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */ ++ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; ++ /* Get the target backend for cow */ ++ target_backend = mali_mem_backend_struct_search(session, args->target_handle); ++ ++ if (NULL == target_backend || 0 == target_backend->size) { ++ MALI_DEBUG_ASSERT_POINTER(target_backend); ++ MALI_DEBUG_ASSERT(0 != target_backend->size); ++ return ret; + } + -+ prof_state = MALI_PROFILING_STATE_IDLE; -+ profile_mask = 0; -+ _mali_osk_atomic_init(&profile_insert_index, 0); ++ /*Cow not support resized mem */ ++ MALI_DEBUG_ASSERT(MALI_MEM_FLAG_CAN_RESIZE != (MALI_MEM_FLAG_CAN_RESIZE & target_backend->mali_allocation->flags)); + -+ if (NULL != profile_entries) { -+ _mali_osk_vfree(profile_entries); -+ profile_entries = NULL; ++ /* Check if the new mali address is allocated */ ++ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, args->vaddr, 0); ++ ++ if (unlikely(mali_vma_node)) { ++ MALI_DEBUG_PRINT_ERROR(("The mali virtual address has already been used ! \n")); ++ return ret; + } + -+ _mali_osk_mutex_signal(lock); ++ /* create new alloction for COW*/ ++ mali_allocation = mali_mem_allocation_struct_create(session); ++ if (mali_allocation == NULL) { ++ MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_cow: Failed to create allocation struct!\n")); ++ return _MALI_OSK_ERR_NOMEM; ++ } ++ mali_allocation->psize = args->target_size; ++ mali_allocation->vsize = args->target_size; ++ mali_allocation->type = MALI_MEM_COW; ++ ++ /*add allocation node to RB tree for index*/ ++ mali_allocation->mali_vma_node.vm_node.start = args->vaddr; ++ mali_allocation->mali_vma_node.vm_node.size = mali_allocation->vsize; ++ mali_vma_offset_add(&session->allocation_mgr, &mali_allocation->mali_vma_node); ++ ++ /* create new backend for COW memory */ ++ mali_allocation->backend_handle = mali_mem_backend_struct_create(&mem_backend, mali_allocation->psize); ++ if (mali_allocation->backend_handle < 0) { ++ ret = _MALI_OSK_ERR_NOMEM; ++ MALI_DEBUG_PRINT(1, ("mali_allocation->backend_handle < 0! \n")); ++ goto failed_alloc_backend; ++ } ++ mem_backend->mali_allocation = mali_allocation; ++ mem_backend->type = mali_allocation->type; ++ ++ if (target_backend->type == MALI_MEM_SWAP || ++ (MALI_MEM_COW == target_backend->type && (MALI_MEM_BACKEND_FLAG_SWAP_COWED & target_backend->flags))) { ++ mem_backend->flags |= MALI_MEM_BACKEND_FLAG_SWAP_COWED; ++ /** ++ * CoWed swap backends couldn't be mapped as non-linear vma, because if one ++ * vma is set with flag VM_NONLINEAR, the vma->vm_private_data will be used by kernel, ++ * while in mali driver, we use this variable to store the pointer of mali_allocation, so there ++ * is a conflict. ++ * To resolve this problem, we have to do some fake things, we reserved about 64MB ++ * space from index 0, there isn't really page's index will be set from 0 to (64MB>>PAGE_SHIFT_NUM), ++ * and all of CoWed swap memory backends' start_idx will be assigned with 0, and these ++ * backends will be mapped as linear and will add to priority tree of global swap file, while ++ * these vmas will never be found by using normal page->index, these pages in those vma ++ * also couldn't be swapped out. ++ */ ++ mem_backend->start_idx = 0; ++ } ++ ++ /* Add the target backend's cow count, also allocate new pages for COW backend from os mem ++ *for a modified range and keep the page which not in the modified range and Add ref to it ++ */ ++ MALI_DEBUG_PRINT(3, ("Cow mapping: target_addr: 0x%x; cow_addr: 0x%x, size: %u\n", target_backend->mali_allocation->mali_vma_node.vm_node.start, ++ mali_allocation->mali_vma_node.vm_node.start, mali_allocation->mali_vma_node.vm_node.size)); ++ ++ ret = mali_memory_do_cow(target_backend, args->target_offset, args->target_size, mem_backend, args->range_start, args->range_size); ++ if (_MALI_OSK_ERR_OK != ret) { ++ MALI_DEBUG_PRINT(1, ("_mali_ukk_mem_cow: Failed to cow!\n")); ++ goto failed_do_cow; ++ } ++ ++ /** ++ *map to GPU side ++ */ ++ mali_allocation->mali_mapping.addr = args->vaddr; ++ /* set gpu mmu propery */ ++ _mali_memory_gpu_map_property_set(&mali_allocation->mali_mapping.properties, args->flags); ++ ++ _mali_osk_mutex_wait(session->memory_lock); ++ /* Map on Mali */ ++ ret = mali_mem_mali_map_prepare(mali_allocation); ++ if (0 != ret) { ++ MALI_DEBUG_PRINT(1, (" prepare map fail! \n")); ++ goto failed_gpu_map; ++ } ++ ++ if (!(mem_backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) { ++ mali_mem_cow_mali_map(mem_backend, 0, mem_backend->size); ++ } ++ ++ _mali_osk_mutex_signal(session->memory_lock); ++ ++ mutex_lock(&target_backend->mutex); ++ target_backend->flags |= MALI_MEM_BACKEND_FLAG_COWED; ++ mutex_unlock(&target_backend->mutex); ++ ++ atomic_add(args->range_size / MALI_MMU_PAGE_SIZE, &session->mali_mem_allocated_pages); ++ if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) { ++ session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE; ++ } + return _MALI_OSK_ERR_OK; ++ ++failed_gpu_map: ++ _mali_osk_mutex_signal(session->memory_lock); ++ mali_mem_cow_release(mem_backend, MALI_FALSE); ++ mem_backend->cow_mem.count = 0; ++failed_do_cow: ++ mali_mem_backend_struct_destory(&mem_backend, mali_allocation->backend_handle); ++failed_alloc_backend: ++ mali_vma_offset_remove(&session->allocation_mgr, &mali_allocation->mali_vma_node); ++ mali_mem_allocation_struct_destory(mali_allocation); ++ ++ return ret; +} + -+mali_bool _mali_internal_profiling_is_recording(void) ++_mali_osk_errcode_t _mali_ukk_mem_cow_modify_range(_mali_uk_cow_modify_range_s *args) +{ -+ return prof_state == MALI_PROFILING_STATE_RUNNING ? MALI_TRUE : MALI_FALSE; ++ _mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT; ++ mali_mem_backend *mem_backend = NULL; ++ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; ++ ++ MALI_DEBUG_PRINT(4, (" _mali_ukk_mem_cow_modify_range called! \n")); ++ /* Get the backend that need to be modified. */ ++ mem_backend = mali_mem_backend_struct_search(session, args->vaddr); ++ ++ if (NULL == mem_backend || 0 == mem_backend->size) { ++ MALI_DEBUG_ASSERT_POINTER(mem_backend); ++ MALI_DEBUG_ASSERT(0 != mem_backend->size); ++ return ret; ++ } ++ ++ MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_backend->type); ++ ++ ret = mali_memory_cow_modify_range(mem_backend, args->range_start, args->size); ++ args->change_pages_nr = mem_backend->cow_mem.change_pages_nr; ++ if (_MALI_OSK_ERR_OK != ret) ++ return ret; ++ _mali_osk_mutex_wait(session->memory_lock); ++ if (!(mem_backend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)) { ++ mali_mem_cow_mali_map(mem_backend, args->range_start, args->size); ++ } ++ _mali_osk_mutex_signal(session->memory_lock); ++ ++ atomic_add(args->change_pages_nr, &session->mali_mem_allocated_pages); ++ if (atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > session->max_mali_mem_allocated_size) { ++ session->max_mali_mem_allocated_size = atomic_read(&session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE; ++ } ++ ++ return _MALI_OSK_ERR_OK; +} + -+mali_bool _mali_internal_profiling_have_recording(void) ++ ++_mali_osk_errcode_t _mali_ukk_mem_resize(_mali_uk_mem_resize_s *args) +{ -+ return prof_state == MALI_PROFILING_STATE_RETURN ? MALI_TRUE : MALI_FALSE; ++ mali_mem_backend *mem_backend = NULL; ++ _mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT; ++ ++ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; ++ ++ MALI_DEBUG_ASSERT_POINTER(session); ++ MALI_DEBUG_PRINT(4, (" mali_mem_resize_memory called! \n")); ++ MALI_DEBUG_ASSERT(0 == args->psize % MALI_MMU_PAGE_SIZE); ++ ++ /* Get the memory backend that need to be resize. */ ++ mem_backend = mali_mem_backend_struct_search(session, args->vaddr); ++ ++ if (NULL == mem_backend) { ++ MALI_DEBUG_PRINT(2, ("_mali_ukk_mem_resize: memory backend = NULL!\n")); ++ return ret; ++ } ++ ++ MALI_DEBUG_ASSERT(args->psize != mem_backend->size); ++ ++ ret = mali_mem_resize(session, mem_backend, args->psize); ++ ++ return ret; +} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_profiling_internal.h b/drivers/gpu/arm/mali400/mali/linux/mali_profiling_internal.h ++ ++_mali_osk_errcode_t _mali_ukk_mem_usage_get(_mali_uk_profiling_memory_usage_get_s *args) ++{ ++ args->memory_usage = _mali_ukk_report_memory_usage(); ++ if (0 != args->vaddr) { ++ mali_mem_backend *mem_backend = NULL; ++ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; ++ /* Get the backend that need to be modified. */ ++ mem_backend = mali_mem_backend_struct_search(session, args->vaddr); ++ if (NULL == mem_backend) { ++ MALI_DEBUG_ASSERT_POINTER(mem_backend); ++ return _MALI_OSK_ERR_FAULT; ++ } ++ ++ if (MALI_MEM_COW == mem_backend->type) ++ args->change_pages_nr = mem_backend->cow_mem.change_pages_nr; ++ } ++ return _MALI_OSK_ERR_OK; ++} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_manager.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_manager.h new file mode 100644 -index 000000000..f17b45833 +index 000000000..f9b541134 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_profiling_internal.h -@@ -0,0 +1,35 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_manager.h +@@ -0,0 +1,51 @@ +/* -+ * Copyright (C) 2012-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -322313,39 +324067,55 @@ index 000000000..f17b45833 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_PROFILING_INTERNAL_H__ -+#define __MALI_PROFILING_INTERNAL_H__ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif ++#ifndef __MALI_MEMORY_MANAGER_H__ ++#define __MALI_MEMORY_MANAGER_H__ + +#include "mali_osk.h" ++#include ++#include ++#include ++#include ++#include ++#include "mali_memory_types.h" ++#include "mali_memory_os_alloc.h" ++#include "mali_uk_types.h" + -+int _mali_internal_profiling_init(mali_bool auto_start); -+void _mali_internal_profiling_term(void); ++struct mali_allocation_manager { ++ rwlock_t vm_lock; ++ struct rb_root allocation_mgr_rb; ++ struct list_head head; ++ struct mutex list_mutex; ++ u32 mali_allocation_num; ++}; + -+mali_bool _mali_internal_profiling_is_recording(void); -+mali_bool _mali_internal_profiling_have_recording(void); -+_mali_osk_errcode_t _mali_internal_profiling_clear(void); -+_mali_osk_errcode_t _mali_internal_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5]); -+u32 _mali_internal_profiling_get_count(void); -+int _mali_internal_profiling_stop(u32 *count); -+int _mali_internal_profiling_start(u32 *limit); ++extern struct idr mali_backend_idr; ++extern struct mutex mali_idr_mutex; ++ ++int mali_memory_manager_init(struct mali_allocation_manager *mgr); ++void mali_memory_manager_uninit(struct mali_allocation_manager *mgr); ++ ++void mali_mem_allocation_struct_destory(mali_mem_allocation *alloc); ++_mali_osk_errcode_t mali_mem_add_mem_size(struct mali_session_data *session, u32 mali_addr, u32 add_size); ++mali_mem_backend *mali_mem_backend_struct_search(struct mali_session_data *session, u32 mali_address); ++_mali_osk_errcode_t _mali_ukk_mem_allocate(_mali_uk_alloc_mem_s *args); ++_mali_osk_errcode_t _mali_ukk_mem_free(_mali_uk_free_mem_s *args); ++_mali_osk_errcode_t _mali_ukk_mem_bind(_mali_uk_bind_mem_s *args); ++_mali_osk_errcode_t _mali_ukk_mem_unbind(_mali_uk_unbind_mem_s *args); ++_mali_osk_errcode_t _mali_ukk_mem_cow(_mali_uk_cow_mem_s *args); ++_mali_osk_errcode_t _mali_ukk_mem_cow_modify_range(_mali_uk_cow_modify_range_s *args); ++_mali_osk_errcode_t _mali_ukk_mem_usage_get(_mali_uk_profiling_memory_usage_get_s *args); ++_mali_osk_errcode_t _mali_ukk_mem_resize(_mali_uk_mem_resize_s *args); + -+#ifdef __cplusplus -+} +#endif + -+#endif /* __MALI_PROFILING_INTERNAL_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_sync.c b/drivers/gpu/arm/mali400/mali/linux/mali_sync.c +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_os_alloc.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_os_alloc.c new file mode 100644 -index 000000000..0d98b518f +index 000000000..277534fc1 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_sync.c -@@ -0,0 +1,665 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_os_alloc.c +@@ -0,0 +1,810 @@ +/* -+ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -322354,669 +324124,874 @@ index 000000000..0d98b518f + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#include "mali_sync.h" ++#include "../platform/rk/custom_log.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + +#include "mali_osk.h" -+#include "mali_kernel_common.h" -+#include "mali_timeline.h" -+#include "mali_executor.h" ++#include "mali_memory.h" ++#include "mali_memory_os_alloc.h" ++#include "mali_kernel_linux.h" + -+#include -+#include -+#include ++/* Minimum size of allocator page pool */ ++#define MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_MB * 256) ++#define MALI_OS_MEMORY_POOL_TRIM_JIFFIES (10 * CONFIG_HZ) /* Default to 10s */ + -+struct mali_sync_pt { -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ struct sync_pt sync_pt; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) ++static int mali_mem_os_shrink(int nr_to_scan, gfp_t gfp_mask); +#else -+ struct mali_internal_sync_point sync_pt; ++static int mali_mem_os_shrink(struct shrinker *shrinker, int nr_to_scan, gfp_t gfp_mask); +#endif -+ struct mali_sync_flag *flag; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ struct sync_timeline *sync_tl; /**< Sync timeline this pt is connected to. */ +#else -+ struct mali_internal_sync_timeline *sync_tl; /**< Sync timeline this pt is connected to. */ -+#endif -+}; -+ -+/** -+ * The sync flag is used to connect sync fences to the Mali Timeline system. Sync fences can be -+ * created from a sync flag, and when the flag is signaled, the sync fences will also be signaled. -+ */ -+struct mali_sync_flag { -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ struct sync_timeline *sync_tl; /**< Sync timeline this flag is connected to. */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) ++static int mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc); +#else -+ struct mali_internal_sync_timeline *sync_tl; /**< Sync timeline this flag is connected to. */ ++static unsigned long mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc); ++static unsigned long mali_mem_os_shrink_count(struct shrinker *shrinker, struct shrink_control *sc); +#endif -+ u32 point; /**< Point on timeline. */ -+ int status; /**< 0 if unsignaled, 1 if signaled without error or negative if signaled with error. */ -+ struct kref refcount; /**< Reference count. */ -+}; -+ -+/** -+ * Mali sync timeline is used to connect mali timeline to sync_timeline. -+ * When fence timeout can print more detailed mali timeline system info. -+ */ -+struct mali_sync_timeline_container { -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ struct sync_timeline sync_timeline; -+#else -+ struct mali_internal_sync_timeline sync_timeline; +#endif -+ struct mali_timeline *timeline; -+}; ++static void mali_mem_os_trim_pool(struct work_struct *work); + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+MALI_STATIC_INLINE struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt) -+#else -+MALI_STATIC_INLINE struct mali_sync_pt *to_mali_sync_pt(struct mali_internal_sync_point *pt) -+#endif -+{ -+ return container_of(pt, struct mali_sync_pt, sync_pt); -+} ++struct mali_mem_os_allocator mali_mem_os_allocator = { ++ .pool_lock = __SPIN_LOCK_UNLOCKED(pool_lock), ++ .pool_pages = LIST_HEAD_INIT(mali_mem_os_allocator.pool_pages), ++ .pool_count = 0, + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+MALI_STATIC_INLINE struct mali_sync_timeline_container *to_mali_sync_tl_container(struct sync_timeline *sync_tl) ++ .allocated_pages = ATOMIC_INIT(0), ++ .allocation_limit = 0, ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) ++ .shrinker.shrink = mali_mem_os_shrink, +#else -+MALI_STATIC_INLINE struct mali_sync_timeline_container *to_mali_sync_tl_container(struct mali_internal_sync_timeline *sync_tl) ++ .shrinker.count_objects = mali_mem_os_shrink_count, ++ .shrinker.scan_objects = mali_mem_os_shrink, +#endif -+{ -+ return container_of(sync_tl, struct mali_sync_timeline_container, sync_timeline); -+} -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+static int timeline_has_signaled(struct sync_pt *pt) ++ .shrinker.seeks = DEFAULT_SEEKS, ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) ++ .timed_shrinker = __DELAYED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool, TIMER_DEFERRABLE), ++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38) ++ .timed_shrinker = __DEFERRED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool), +#else -+static int timeline_has_signaled(struct mali_internal_sync_point *pt) ++ .timed_shrinker = __DELAYED_WORK_INITIALIZER(mali_mem_os_allocator.timed_shrinker, mali_mem_os_trim_pool), +#endif ++}; ++ ++u32 mali_mem_os_free(struct list_head *os_pages, u32 pages_count, mali_bool cow_flag) +{ -+ struct mali_sync_pt *mpt; ++ LIST_HEAD(pages); ++ struct mali_page_node *m_page, *m_tmp; ++ u32 free_pages_nr = 0; + -+ MALI_DEBUG_ASSERT_POINTER(pt); -+ mpt = to_mali_sync_pt(pt); ++ if (MALI_TRUE == cow_flag) { ++ list_for_each_entry_safe(m_page, m_tmp, os_pages, list) { ++ /*only handle OS node here */ ++ if (m_page->type == MALI_PAGE_NODE_OS) { ++ if (1 == _mali_page_node_get_ref_count(m_page)) { ++ list_move(&m_page->list, &pages); ++ atomic_sub(1, &mali_mem_os_allocator.allocated_pages); ++ free_pages_nr ++; ++ } else { ++ _mali_page_node_unref(m_page); ++ m_page->page = NULL; ++ list_del(&m_page->list); ++ kfree(m_page); ++ } ++ } ++ } ++ } else { ++ list_cut_position(&pages, os_pages, os_pages->prev); ++ atomic_sub(pages_count, &mali_mem_os_allocator.allocated_pages); ++ free_pages_nr = pages_count; ++ } + -+ MALI_DEBUG_ASSERT_POINTER(mpt->flag); ++ /* Put pages on pool. */ ++ spin_lock(&mali_mem_os_allocator.pool_lock); ++ list_splice(&pages, &mali_mem_os_allocator.pool_pages); ++ mali_mem_os_allocator.pool_count += free_pages_nr; ++ spin_unlock(&mali_mem_os_allocator.pool_lock); + -+ return mpt->flag->status; ++ if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) { ++ MALI_DEBUG_PRINT(5, ("OS Mem: Starting pool trim timer %u\n", mali_mem_os_allocator.pool_count)); ++ queue_delayed_work(mali_mem_os_allocator.wq, &mali_mem_os_allocator.timed_shrinker, MALI_OS_MEMORY_POOL_TRIM_JIFFIES); ++ } ++ return free_pages_nr; +} + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+static void timeline_free_pt(struct sync_pt *pt) -+#else -+static void timeline_free_pt(struct mali_internal_sync_point *pt) -+#endif ++/** ++* put page without put it into page pool ++*/ ++_mali_osk_errcode_t mali_mem_os_put_page(struct page *page) +{ -+ struct mali_sync_pt *mpt; -+ -+ MALI_DEBUG_ASSERT_POINTER(pt); -+ mpt = to_mali_sync_pt(pt); -+ -+ mali_sync_flag_put(mpt->flag); ++ MALI_DEBUG_ASSERT_POINTER(page); ++ if (1 == page_count(page)) { ++ atomic_sub(1, &mali_mem_os_allocator.allocated_pages); ++ dma_unmap_page(&mali_platform_device->dev, page_private(page), ++ _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); ++ ClearPagePrivate(page); ++ } ++ put_page(page); ++ return _MALI_OSK_ERR_OK; +} + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+static void timeline_release(struct sync_timeline *sync_timeline) -+#else -+static void timeline_release(struct mali_internal_sync_timeline *sync_timeline) -+#endif ++_mali_osk_errcode_t mali_mem_os_resize_pages(mali_mem_os_mem *mem_from, mali_mem_os_mem *mem_to, u32 start_page, u32 page_count) +{ -+ struct mali_sync_timeline_container *mali_sync_tl = NULL; -+ struct mali_timeline *mali_tl = NULL; -+ -+ MALI_DEBUG_ASSERT_POINTER(sync_timeline); ++ struct mali_page_node *m_page, *m_tmp; ++ u32 i = 0; + -+ mali_sync_tl = to_mali_sync_tl_container(sync_timeline); -+ MALI_DEBUG_ASSERT_POINTER(mali_sync_tl); ++ MALI_DEBUG_ASSERT_POINTER(mem_from); ++ MALI_DEBUG_ASSERT_POINTER(mem_to); + -+ mali_tl = mali_sync_tl->timeline; ++ if (mem_from->count < start_page + page_count) { ++ return _MALI_OSK_ERR_INVALID_ARGS; ++ } + -+ /* always signaled timeline didn't have mali container */ -+ if (mali_tl) { -+ if (NULL != mali_tl->spinlock) { -+ mali_spinlock_reentrant_term(mali_tl->spinlock); ++ list_for_each_entry_safe(m_page, m_tmp, &mem_from->pages, list) { ++ if (i >= start_page && i < start_page + page_count) { ++ list_move_tail(&m_page->list, &mem_to->pages); ++ mem_from->count--; ++ mem_to->count++; + } -+ _mali_osk_free(mali_tl); ++ i++; + } + -+ module_put(THIS_MODULE); ++ return _MALI_OSK_ERR_OK; +} + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+static struct sync_pt *timeline_dup(struct sync_pt *pt) ++ ++int mali_mem_os_alloc_pages(mali_mem_os_mem *os_mem, u32 size) +{ -+ struct mali_sync_pt *mpt, *new_mpt; -+ struct sync_pt *new_pt; -+ MALI_DEBUG_ASSERT_POINTER(pt); ++ struct page *new_page; ++ LIST_HEAD(pages_list); ++ size_t page_count = PAGE_ALIGN(size) / _MALI_OSK_MALI_PAGE_SIZE; ++ size_t remaining = page_count; ++ struct mali_page_node *m_page, *m_tmp; ++ u32 i; + -+ mpt = to_mali_sync_pt(pt); ++ MALI_DEBUG_ASSERT_POINTER(os_mem); + -+ new_pt = sync_pt_create(mpt->sync_tl, sizeof(struct mali_sync_pt)); -+ if (NULL == new_pt) return NULL; ++ if (atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE + size > mali_mem_os_allocator.allocation_limit) { ++ MALI_DEBUG_PRINT(2, ("Mali Mem: Unable to allocate %u bytes. Currently allocated: %lu, max limit %lu\n", ++ size, ++ atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE, ++ mali_mem_os_allocator.allocation_limit)); ++ return -ENOMEM; ++ } + -+ new_mpt = to_mali_sync_pt(new_pt); ++ INIT_LIST_HEAD(&os_mem->pages); ++ os_mem->count = page_count; + -+ mali_sync_flag_get(mpt->flag); -+ new_mpt->flag = mpt->flag; -+ new_mpt->sync_tl = mpt->sync_tl; ++ /* Grab pages from pool. */ ++ { ++ size_t pool_pages; ++ spin_lock(&mali_mem_os_allocator.pool_lock); ++ pool_pages = min(remaining, mali_mem_os_allocator.pool_count); ++ for (i = pool_pages; i > 0; i--) { ++ BUG_ON(list_empty(&mali_mem_os_allocator.pool_pages)); ++ list_move(mali_mem_os_allocator.pool_pages.next, &pages_list); ++ } ++ mali_mem_os_allocator.pool_count -= pool_pages; ++ remaining -= pool_pages; ++ spin_unlock(&mali_mem_os_allocator.pool_lock); ++ } + -+ return new_pt; -+} ++ /* Process pages from pool. */ ++ i = 0; ++ list_for_each_entry_safe(m_page, m_tmp, &pages_list, list) { ++ BUG_ON(NULL == m_page); + -+static int timeline_compare(struct sync_pt *pta, struct sync_pt *ptb) -+{ -+ struct mali_sync_pt *mpta; -+ struct mali_sync_pt *mptb; -+ u32 a, b; ++ list_move_tail(&m_page->list, &os_mem->pages); ++ } + -+ MALI_DEBUG_ASSERT_POINTER(pta); -+ MALI_DEBUG_ASSERT_POINTER(ptb); -+ mpta = to_mali_sync_pt(pta); -+ mptb = to_mali_sync_pt(ptb); ++ /* Allocate new pages, if needed. */ ++ for (i = 0; i < remaining; i++) { ++ dma_addr_t dma_addr; ++ gfp_t flags = __GFP_ZERO | GFP_HIGHUSER; ++ int err; + -+ MALI_DEBUG_ASSERT_POINTER(mpta->flag); -+ MALI_DEBUG_ASSERT_POINTER(mptb->flag); ++#if defined(CONFIG_ARM) && !defined(CONFIG_ARM_LPAE) ++ flags |= GFP_HIGHUSER; ++#else ++#ifdef CONFIG_ZONE_DMA32 ++ flags |= GFP_DMA32; ++#else ++#ifdef CONFIG_ZONE_DMA ++#else ++ /* arm64 utgard only work on < 4G, but the kernel ++ * didn't provide method to allocte memory < 4G ++ */ ++ MALI_DEBUG_ASSERT(0); ++#endif ++#endif ++#endif + -+ a = mpta->flag->point; -+ b = mptb->flag->point; ++ new_page = alloc_page(flags); + -+ if (a == b) return 0; ++ if (unlikely(NULL == new_page)) { ++ E("err."); ++ /* Calculate the number of pages actually allocated, and free them. */ ++ os_mem->count = (page_count - remaining) + i; ++ atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages); ++ mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE); ++ return -ENOMEM; ++ } + -+ return ((b - a) < (a - b) ? -1 : 1); -+} -+#endif ++ /* Ensure page is flushed from CPU caches. */ ++ dma_addr = dma_map_page(&mali_platform_device->dev, new_page, ++ 0, _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); ++ dma_unmap_page(&mali_platform_device->dev, dma_addr, ++ _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); ++ dma_addr = dma_map_page(&mali_platform_device->dev, new_page, ++ 0, _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) -+static void timeline_print_pt(struct seq_file *s, struct sync_pt *sync_pt) -+{ -+ struct mali_sync_pt *mpt; ++ err = dma_mapping_error(&mali_platform_device->dev, dma_addr); ++ if (unlikely(err)) { ++ MALI_DEBUG_PRINT_ERROR(("OS Mem: Failed to DMA map page %p: %u", ++ new_page, err)); ++ __free_page(new_page); ++ os_mem->count = (page_count - remaining) + i; ++ atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages); ++ mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE); ++ return -EFAULT; ++ } + -+ MALI_DEBUG_ASSERT_POINTER(s); -+ MALI_DEBUG_ASSERT_POINTER(sync_pt); ++ /* Store page phys addr */ ++ SetPagePrivate(new_page); ++ set_page_private(new_page, dma_addr); + -+ mpt = to_mali_sync_pt(sync_pt); ++ m_page = _mali_page_node_allocate(MALI_PAGE_NODE_OS); ++ if (unlikely(NULL == m_page)) { ++ MALI_PRINT_ERROR(("OS Mem: Can't allocate mali_page node! \n")); ++ dma_unmap_page(&mali_platform_device->dev, page_private(new_page), ++ _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); ++ ClearPagePrivate(new_page); ++ __free_page(new_page); ++ os_mem->count = (page_count - remaining) + i; ++ atomic_add(os_mem->count, &mali_mem_os_allocator.allocated_pages); ++ mali_mem_os_free(&os_mem->pages, os_mem->count, MALI_FALSE); ++ return -EFAULT; ++ } ++ m_page->page = new_page; + -+ /* It is possible this sync point is just under construct, -+ * make sure the flag is valid before accessing it -+ */ -+ if (mpt->flag) { -+ seq_printf(s, "%u", mpt->flag->point); -+ } else { -+ seq_printf(s, "uninitialized"); ++ list_add_tail(&m_page->list, &os_mem->pages); ++ } ++ ++ atomic_add(page_count, &mali_mem_os_allocator.allocated_pages); ++ ++ if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES > mali_mem_os_allocator.pool_count) { ++ MALI_DEBUG_PRINT(4, ("OS Mem: Stopping pool trim timer, only %u pages on pool\n", mali_mem_os_allocator.pool_count)); ++ cancel_delayed_work(&mali_mem_os_allocator.timed_shrinker); + } ++ ++ return 0; +} + -+static void timeline_print_obj(struct seq_file *s, struct sync_timeline *sync_tl) ++ ++_mali_osk_errcode_t mali_mem_os_mali_map(mali_mem_os_mem *os_mem, struct mali_session_data *session, u32 vaddr, u32 start_page, u32 mapping_pgae_num, u32 props) +{ -+ struct mali_sync_timeline_container *mali_sync_tl = NULL; -+ struct mali_timeline *mali_tl = NULL; ++ struct mali_page_directory *pagedir = session->page_directory; ++ struct mali_page_node *m_page; ++ u32 virt; ++ u32 prop = props; + -+ MALI_DEBUG_ASSERT_POINTER(sync_tl); ++ MALI_DEBUG_ASSERT_POINTER(session); ++ MALI_DEBUG_ASSERT_POINTER(os_mem); + -+ mali_sync_tl = to_mali_sync_tl_container(sync_tl); -+ MALI_DEBUG_ASSERT_POINTER(mali_sync_tl); ++ MALI_DEBUG_ASSERT(start_page <= os_mem->count); ++ MALI_DEBUG_ASSERT((start_page + mapping_pgae_num) <= os_mem->count); + -+ mali_tl = mali_sync_tl->timeline; ++ if ((start_page + mapping_pgae_num) == os_mem->count) { + -+ if (NULL != mali_tl) { -+ seq_printf(s, "oldest (%u) ", mali_tl->point_oldest); -+ seq_printf(s, "next (%u)", mali_tl->point_next); -+ seq_printf(s, "\n"); ++ virt = vaddr + MALI_MMU_PAGE_SIZE * (start_page + mapping_pgae_num); + -+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS) -+ { -+ u32 tid = _mali_osk_get_tid(); -+ struct mali_timeline_system *system = mali_tl->system; ++ list_for_each_entry_reverse(m_page, &os_mem->pages, list) { + -+ mali_spinlock_reentrant_wait(mali_tl->spinlock, tid); -+ if (!mali_tl->destroyed) { -+ mali_spinlock_reentrant_wait(system->spinlock, tid); -+ mali_timeline_debug_print_timeline(mali_tl, s); -+ mali_spinlock_reentrant_signal(system->spinlock, tid); ++ virt -= MALI_MMU_PAGE_SIZE; ++ if (mapping_pgae_num > 0) { ++ dma_addr_t phys = page_private(m_page->page); ++#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) ++ /* Verify that the "physical" address is 32-bit and ++ * usable for Mali, when on a system with bus addresses ++ * wider than 32-bit. */ ++ MALI_DEBUG_ASSERT(0 == (phys >> 32)); ++#endif ++ mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop); ++ } else { ++ break; + } -+ mali_spinlock_reentrant_signal(mali_tl->spinlock, tid); -+ -+ /* dump job queue status and group running status */ -+ mali_executor_status_dump(); ++ mapping_pgae_num--; + } ++ ++ } else { ++ u32 i = 0; ++ virt = vaddr; ++ list_for_each_entry(m_page, &os_mem->pages, list) { ++ ++ if (i >= start_page) { ++ dma_addr_t phys = page_private(m_page->page); ++ ++#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) ++ /* Verify that the "physical" address is 32-bit and ++ * usable for Mali, when on a system with bus addresses ++ * wider than 32-bit. */ ++ MALI_DEBUG_ASSERT(0 == (phys >> 32)); +#endif ++ mali_mmu_pagedir_update(pagedir, virt, (mali_dma_addr)phys, MALI_MMU_PAGE_SIZE, prop); ++ } ++ i++; ++ virt += MALI_MMU_PAGE_SIZE; ++ } + } ++ return _MALI_OSK_ERR_OK; +} -+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+static void timeline_pt_value_str(struct sync_pt *pt, char *str, int size) ++ ++ ++void mali_mem_os_mali_unmap(mali_mem_allocation *alloc) +{ -+ struct mali_sync_pt *mpt; ++ struct mali_session_data *session; ++ MALI_DEBUG_ASSERT_POINTER(alloc); ++ session = alloc->session; ++ MALI_DEBUG_ASSERT_POINTER(session); + -+ MALI_DEBUG_ASSERT_POINTER(str); -+ MALI_DEBUG_ASSERT_POINTER(pt); ++ mali_session_memory_lock(session); ++ mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start, ++ alloc->flags); ++ mali_session_memory_unlock(session); ++} + -+ mpt = to_mali_sync_pt(pt); ++int mali_mem_os_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma) ++{ ++ mali_mem_os_mem *os_mem = &mem_bkend->os_mem; ++ struct mali_page_node *m_page; ++ struct page *page; ++ int ret; ++ unsigned long addr = vma->vm_start; ++ MALI_DEBUG_ASSERT(MALI_MEM_OS == mem_bkend->type); + -+ /* It is possible this sync point is just under construct, -+ * make sure the flag is valid before accessing it -+ */ -+ if (mpt->flag) { -+ _mali_osk_snprintf(str, size, "%u", mpt->flag->point); -+ } else { -+ _mali_osk_snprintf(str, size, "uninitialized"); ++ list_for_each_entry(m_page, &os_mem->pages, list) { ++ /* We should use vm_insert_page, but it does a dcache ++ * flush which makes it way slower than remap_pfn_range or vmf_insert_pfn. ++ ret = vm_insert_page(vma, addr, page); ++ */ ++ page = m_page->page; ++ ret = vmf_insert_pfn(vma, addr, page_to_pfn(page)); ++ ++ if (unlikely(VM_FAULT_NOPAGE != ret)) { ++ return -EFAULT; ++ } ++ addr += _MALI_OSK_MALI_PAGE_SIZE; + } ++ ++ return 0; +} + -+static void timeline_value_str(struct sync_timeline *timeline, char *str, int size) ++_mali_osk_errcode_t mali_mem_os_resize_cpu_map_locked(mali_mem_backend *mem_bkend, struct vm_area_struct *vma, unsigned long start_vaddr, u32 mappig_size) +{ -+ struct mali_sync_timeline_container *mali_sync_tl = NULL; -+ struct mali_timeline *mali_tl = NULL; ++ mali_mem_os_mem *os_mem = &mem_bkend->os_mem; ++ struct mali_page_node *m_page; ++ int ret; ++ int offset; ++ int mapping_page_num; ++ int count ; + -+ MALI_DEBUG_ASSERT_POINTER(timeline); ++ unsigned long vstart = vma->vm_start; ++ count = 0; ++ MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_OS); ++ MALI_DEBUG_ASSERT(0 == start_vaddr % _MALI_OSK_MALI_PAGE_SIZE); ++ MALI_DEBUG_ASSERT(0 == vstart % _MALI_OSK_MALI_PAGE_SIZE); ++ offset = (start_vaddr - vstart) / _MALI_OSK_MALI_PAGE_SIZE; ++ MALI_DEBUG_ASSERT(offset <= os_mem->count); ++ mapping_page_num = mappig_size / _MALI_OSK_MALI_PAGE_SIZE; ++ MALI_DEBUG_ASSERT((offset + mapping_page_num) <= os_mem->count); + -+ mali_sync_tl = to_mali_sync_tl_container(timeline); -+ MALI_DEBUG_ASSERT_POINTER(mali_sync_tl); ++ if ((offset + mapping_page_num) == os_mem->count) { + -+ mali_tl = mali_sync_tl->timeline; ++ unsigned long vm_end = start_vaddr + mappig_size; + -+ if (NULL != mali_tl) { -+ _mali_osk_snprintf(str, size, "oldest (%u) ", mali_tl->point_oldest); -+ _mali_osk_snprintf(str, size, "next (%u)", mali_tl->point_next); -+ _mali_osk_snprintf(str, size, "\n"); ++ list_for_each_entry_reverse(m_page, &os_mem->pages, list) { + -+#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS) -+ { -+ u32 tid = _mali_osk_get_tid(); -+ struct mali_timeline_system *system = mali_tl->system; ++ vm_end -= _MALI_OSK_MALI_PAGE_SIZE; ++ if (mapping_page_num > 0) { ++ ret = vmf_insert_pfn(vma, vm_end, page_to_pfn(m_page->page)); + -+ mali_spinlock_reentrant_wait(mali_tl->spinlock, tid); -+ if (!mali_tl->destroyed) { -+ mali_spinlock_reentrant_wait(system->spinlock, tid); -+ mali_timeline_debug_direct_print_timeline(mali_tl); -+ mali_spinlock_reentrant_signal(system->spinlock, tid); ++ if (unlikely(VM_FAULT_NOPAGE != ret)) { ++ /*will return -EBUSY If the page has already been mapped into table, but it's OK*/ ++ if (-EBUSY == ret) { ++ break; ++ } else { ++ MALI_DEBUG_PRINT(1, ("OS Mem: mali_mem_os_resize_cpu_map_locked failed, ret = %d, offset is %d,page_count is %d\n", ++ ret, offset + mapping_page_num, os_mem->count)); ++ } ++ return _MALI_OSK_ERR_FAULT; ++ } ++ } else { ++ break; + } -+ mali_spinlock_reentrant_signal(mali_tl->spinlock, tid); ++ mapping_page_num--; + -+ /* dump job queue status and group running status */ -+ mali_executor_status_dump(); + } -+#endif ++ } else { ++ ++ list_for_each_entry(m_page, &os_mem->pages, list) { ++ if (count >= offset) { ++ ++ ret = vmf_insert_pfn(vma, vstart, page_to_pfn(m_page->page)); ++ ++ if (unlikely(VM_FAULT_NOPAGE != ret)) { ++ /*will return -EBUSY If the page has already been mapped into table, but it's OK*/ ++ if (-EBUSY == ret) { ++ break; ++ } else { ++ MALI_DEBUG_PRINT(1, ("OS Mem: mali_mem_os_resize_cpu_map_locked failed, ret = %d, count is %d, offset is %d,page_count is %d\n", ++ ret, count, offset, os_mem->count)); ++ } ++ return _MALI_OSK_ERR_FAULT; ++ } ++ } ++ count++; ++ vstart += _MALI_OSK_MALI_PAGE_SIZE; ++ } + } ++ return _MALI_OSK_ERR_OK; +} -+#else -+static void timeline_print_sync_pt(struct mali_internal_sync_point *sync_pt) ++ ++u32 mali_mem_os_release(mali_mem_backend *mem_bkend) +{ -+ struct mali_sync_pt *mpt; + -+ MALI_DEBUG_ASSERT_POINTER(sync_pt); ++ mali_mem_allocation *alloc; ++ struct mali_session_data *session; ++ u32 free_pages_nr = 0; ++ MALI_DEBUG_ASSERT_POINTER(mem_bkend); ++ MALI_DEBUG_ASSERT(MALI_MEM_OS == mem_bkend->type); + -+ mpt = to_mali_sync_pt(sync_pt); ++ alloc = mem_bkend->mali_allocation; ++ MALI_DEBUG_ASSERT_POINTER(alloc); + -+ if (mpt->flag) { -+ MALI_DEBUG_PRINT(2, ("mali_internal_sync_pt: %u\n", mpt->flag->point)); ++ session = alloc->session; ++ MALI_DEBUG_ASSERT_POINTER(session); ++ ++ /* Unmap the memory from the mali virtual address space. */ ++ mali_mem_os_mali_unmap(alloc); ++ mutex_lock(&mem_bkend->mutex); ++ /* Free pages */ ++ if (MALI_MEM_BACKEND_FLAG_COWED & mem_bkend->flags) { ++ /* Lock to avoid the free race condition for the cow shared memory page node. */ ++ _mali_osk_mutex_wait(session->cow_lock); ++ free_pages_nr = mali_mem_os_free(&mem_bkend->os_mem.pages, mem_bkend->os_mem.count, MALI_TRUE); ++ _mali_osk_mutex_signal(session->cow_lock); + } else { -+ MALI_DEBUG_PRINT(2, ("uninitialized\n", mpt->flag->point)); ++ free_pages_nr = mali_mem_os_free(&mem_bkend->os_mem.pages, mem_bkend->os_mem.count, MALI_FALSE); + } -+} -+#endif ++ mutex_unlock(&mem_bkend->mutex); + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+static struct sync_timeline_ops mali_timeline_ops = { -+ .driver_name = "Mali", -+ .dup = timeline_dup, -+ .has_signaled = timeline_has_signaled, -+ .compare = timeline_compare, -+ .free_pt = timeline_free_pt, -+ .release_obj = timeline_release, -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) -+ .print_pt = timeline_print_pt, -+ .print_obj = timeline_print_obj, -+#else -+ .pt_value_str = timeline_pt_value_str, -+ .timeline_value_str = timeline_value_str, -+#endif -+}; ++ MALI_DEBUG_PRINT(4, ("OS Mem free : allocated size = 0x%x, free size = 0x%x\n", mem_bkend->os_mem.count * _MALI_OSK_MALI_PAGE_SIZE, ++ free_pages_nr * _MALI_OSK_MALI_PAGE_SIZE)); + -+struct sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name) -+{ -+ struct sync_timeline *sync_tl; -+ struct mali_sync_timeline_container *mali_sync_tl; ++ mem_bkend->os_mem.count = 0; ++ return free_pages_nr; ++} + -+ sync_tl = sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline_container), name); -+ if (NULL == sync_tl) return NULL; + -+ mali_sync_tl = to_mali_sync_tl_container(sync_tl); -+ mali_sync_tl->timeline = timeline; ++#define MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE 128 ++static struct { ++ struct { ++ mali_dma_addr phys; ++ mali_io_address mapping; ++ } page[MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE]; ++ size_t count; ++ spinlock_t lock; ++} mali_mem_page_table_page_pool = { ++ .count = 0, ++ .lock = __SPIN_LOCK_UNLOCKED(pool_lock), ++}; + -+ /* Grab a reference on the module to ensure the callbacks are present -+ * as long some timeline exists. The reference is released when the -+ * timeline is freed. -+ * Since this function is called from a ioctl on an open file we know -+ * we already have a reference, so using __module_get is safe. */ -+ __module_get(THIS_MODULE); ++_mali_osk_errcode_t mali_mem_os_get_table_page(mali_dma_addr *phys, mali_io_address *mapping) ++{ ++ _mali_osk_errcode_t ret = _MALI_OSK_ERR_NOMEM; ++ dma_addr_t tmp_phys; + -+ return sync_tl; -+} ++ spin_lock(&mali_mem_page_table_page_pool.lock); ++ if (0 < mali_mem_page_table_page_pool.count) { ++ u32 i = --mali_mem_page_table_page_pool.count; ++ *phys = mali_mem_page_table_page_pool.page[i].phys; ++ *mapping = mali_mem_page_table_page_pool.page[i].mapping; + -+s32 mali_sync_fence_fd_alloc(struct sync_fence *sync_fence) -+{ -+ s32 fd = -1; ++ ret = _MALI_OSK_ERR_OK; ++ } ++ spin_unlock(&mali_mem_page_table_page_pool.lock); + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) -+ fd = get_unused_fd(); ++ if (_MALI_OSK_ERR_OK != ret) { ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) ++ *mapping = dma_alloc_attrs(&mali_platform_device->dev, ++ _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys, ++ GFP_KERNEL, DMA_ATTR_WRITE_COMBINE); +#else -+ fd = get_unused_fd_flags(0); ++ *mapping = dma_alloc_writecombine(&mali_platform_device->dev, ++ _MALI_OSK_MALI_PAGE_SIZE, &tmp_phys, GFP_KERNEL); +#endif ++ if (NULL != *mapping) { ++ ret = _MALI_OSK_ERR_OK; + -+ if (fd < 0) { -+ sync_fence_put(sync_fence); -+ return -1; ++#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) ++ /* Verify that the "physical" address is 32-bit and ++ * usable for Mali, when on a system with bus addresses ++ * wider than 32-bit. */ ++ MALI_DEBUG_ASSERT(0 == (tmp_phys >> 32)); ++#endif ++ ++ *phys = (mali_dma_addr)tmp_phys; ++ } + } -+ sync_fence_install(sync_fence, fd); + -+ return fd; ++ return ret; +} + -+struct sync_fence *mali_sync_fence_merge(struct sync_fence *sync_fence1, struct sync_fence *sync_fence2) ++void mali_mem_os_release_table_page(mali_dma_addr phys, void *virt) +{ -+ struct sync_fence *sync_fence; ++ spin_lock(&mali_mem_page_table_page_pool.lock); ++ if (MALI_MEM_OS_PAGE_TABLE_PAGE_POOL_SIZE > mali_mem_page_table_page_pool.count) { ++ u32 i = mali_mem_page_table_page_pool.count; ++ mali_mem_page_table_page_pool.page[i].phys = phys; ++ mali_mem_page_table_page_pool.page[i].mapping = virt; + -+ MALI_DEBUG_ASSERT_POINTER(sync_fence1); -+ MALI_DEBUG_ASSERT_POINTER(sync_fence1); ++ ++mali_mem_page_table_page_pool.count; + -+ sync_fence = sync_fence_merge("mali_merge_fence", sync_fence1, sync_fence2); -+ sync_fence_put(sync_fence1); -+ sync_fence_put(sync_fence2); ++ spin_unlock(&mali_mem_page_table_page_pool.lock); ++ } else { ++ spin_unlock(&mali_mem_page_table_page_pool.lock); + -+ return sync_fence; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) ++ dma_free_attrs(&mali_platform_device->dev, ++ _MALI_OSK_MALI_PAGE_SIZE, virt, phys, ++ DMA_ATTR_WRITE_COMBINE); ++#else ++ dma_free_writecombine(&mali_platform_device->dev, ++ _MALI_OSK_MALI_PAGE_SIZE, virt, phys); ++#endif ++ } +} + -+struct sync_fence *mali_sync_timeline_create_signaled_fence(struct sync_timeline *sync_tl) ++void mali_mem_os_free_page_node(struct mali_page_node *m_page) +{ -+ struct mali_sync_flag *flag; -+ struct sync_fence *sync_fence; -+ -+ MALI_DEBUG_ASSERT_POINTER(sync_tl); -+ -+ flag = mali_sync_flag_create(sync_tl, 0); -+ if (NULL == flag) return NULL; -+ -+ sync_fence = mali_sync_flag_create_fence(flag); -+ -+ mali_sync_flag_signal(flag, 0); -+ mali_sync_flag_put(flag); ++ struct page *page = m_page->page; ++ MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_OS); + -+ return sync_fence; ++ if (1 == page_count(page)) { ++ dma_unmap_page(&mali_platform_device->dev, page_private(page), ++ _MALI_OSK_MALI_PAGE_SIZE, DMA_BIDIRECTIONAL); ++ ClearPagePrivate(page); ++ } ++ __free_page(page); ++ m_page->page = NULL; ++ list_del(&m_page->list); ++ kfree(m_page); +} + -+struct mali_sync_flag *mali_sync_flag_create(struct sync_timeline *sync_tl, mali_timeline_point point) ++/* The maximum number of page table pool pages to free in one go. */ ++#define MALI_MEM_OS_CHUNK_TO_FREE 64UL ++ ++/* Free a certain number of pages from the page table page pool. ++ * The pool lock must be held when calling the function, and the lock will be ++ * released before returning. ++ */ ++static void mali_mem_os_page_table_pool_free(size_t nr_to_free) +{ -+ struct mali_sync_flag *flag; ++ mali_dma_addr phys_arr[MALI_MEM_OS_CHUNK_TO_FREE]; ++ void *virt_arr[MALI_MEM_OS_CHUNK_TO_FREE]; ++ u32 i; + -+ if (NULL == sync_tl) return NULL; ++ MALI_DEBUG_ASSERT(nr_to_free <= MALI_MEM_OS_CHUNK_TO_FREE); + -+ flag = _mali_osk_calloc(1, sizeof(*flag)); -+ if (NULL == flag) return NULL; ++ /* Remove nr_to_free pages from the pool and store them locally on stack. */ ++ for (i = 0; i < nr_to_free; i++) { ++ u32 pool_index = mali_mem_page_table_page_pool.count - i - 1; + -+ flag->sync_tl = sync_tl; -+ flag->point = point; ++ phys_arr[i] = mali_mem_page_table_page_pool.page[pool_index].phys; ++ virt_arr[i] = mali_mem_page_table_page_pool.page[pool_index].mapping; ++ } + -+ flag->status = 0; -+ kref_init(&flag->refcount); ++ mali_mem_page_table_page_pool.count -= nr_to_free; + -+ return flag; ++ spin_unlock(&mali_mem_page_table_page_pool.lock); ++ ++ /* After releasing the spinlock: free the pages we removed from the pool. */ ++ for (i = 0; i < nr_to_free; i++) { ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) ++ dma_free_attrs(&mali_platform_device->dev, _MALI_OSK_MALI_PAGE_SIZE, ++ virt_arr[i], (dma_addr_t)phys_arr[i], ++ DMA_ATTR_WRITE_COMBINE); ++#else ++ dma_free_writecombine(&mali_platform_device->dev, ++ _MALI_OSK_MALI_PAGE_SIZE, ++ virt_arr[i], (dma_addr_t)phys_arr[i]); ++#endif ++ } +} + -+/** -+ * Create a sync point attached to given sync flag. -+ * -+ * @note Sync points must be triggered in *exactly* the same order as they are created. -+ * -+ * @param flag Sync flag. -+ * @return New sync point if successful, NULL if not. -+ */ -+static struct sync_pt *mali_sync_flag_create_pt(struct mali_sync_flag *flag) ++static void mali_mem_os_trim_page_table_page_pool(void) +{ -+ struct sync_pt *pt; -+ struct mali_sync_pt *mpt; ++ size_t nr_to_free = 0; ++ size_t nr_to_keep; + -+ MALI_DEBUG_ASSERT_POINTER(flag); -+ MALI_DEBUG_ASSERT_POINTER(flag->sync_tl); ++ /* Keep 2 page table pages for each 1024 pages in the page cache. */ ++ nr_to_keep = mali_mem_os_allocator.pool_count / 512; ++ /* And a minimum of eight pages, to accomodate new sessions. */ ++ nr_to_keep += 8; + -+ pt = sync_pt_create(flag->sync_tl, sizeof(struct mali_sync_pt)); -+ if (NULL == pt) return NULL; ++ if (0 == spin_trylock(&mali_mem_page_table_page_pool.lock)) return; + -+ mali_sync_flag_get(flag); ++ if (nr_to_keep < mali_mem_page_table_page_pool.count) { ++ nr_to_free = mali_mem_page_table_page_pool.count - nr_to_keep; ++ nr_to_free = min((size_t)MALI_MEM_OS_CHUNK_TO_FREE, nr_to_free); ++ } + -+ mpt = to_mali_sync_pt(pt); -+ mpt->flag = flag; -+ mpt->sync_tl = flag->sync_tl; ++ /* Pool lock will be released by the callee. */ ++ mali_mem_os_page_table_pool_free(nr_to_free); ++} + -+ return pt; ++static unsigned long mali_mem_os_shrink_count(struct shrinker *shrinker, struct shrink_control *sc) ++{ ++ return mali_mem_os_allocator.pool_count; +} + -+struct sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) ++static int mali_mem_os_shrink(int nr_to_scan, gfp_t gfp_mask) ++#else ++static int mali_mem_os_shrink(struct shrinker *shrinker, int nr_to_scan, gfp_t gfp_mask) ++#endif /* Linux < 2.6.35 */ ++#else ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) ++static int mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc) ++#else ++static unsigned long mali_mem_os_shrink(struct shrinker *shrinker, struct shrink_control *sc) ++#endif /* Linux < 3.12.0 */ ++#endif /* Linux < 3.0.0 */ +{ -+ struct sync_pt *sync_pt; -+ struct sync_fence *sync_fence; ++ struct mali_page_node *m_page, *m_tmp; ++ unsigned long flags; ++ struct list_head *le, pages; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) ++ int nr = nr_to_scan; ++#else ++ int nr = sc->nr_to_scan; ++#endif + -+ MALI_DEBUG_ASSERT_POINTER(flag); -+ MALI_DEBUG_ASSERT_POINTER(flag->sync_tl); ++ if (0 == nr) { ++ return mali_mem_os_shrink_count(shrinker, sc); ++ } + -+ sync_pt = mali_sync_flag_create_pt(flag); -+ if (NULL == sync_pt) return NULL; ++ if (0 == spin_trylock_irqsave(&mali_mem_os_allocator.pool_lock, flags)) { ++ /* Not able to lock. */ ++ return -1; ++ } + -+ sync_fence = sync_fence_create("mali_flag_fence", sync_pt); -+ if (NULL == sync_fence) { -+ sync_pt_free(sync_pt); -+ return NULL; ++ if (0 == mali_mem_os_allocator.pool_count) { ++ /* No pages availble */ ++ spin_unlock_irqrestore(&mali_mem_os_allocator.pool_lock, flags); ++ return 0; + } + -+ return sync_fence; -+} ++ /* Release from general page pool */ ++ nr = min((size_t)nr, mali_mem_os_allocator.pool_count); ++ mali_mem_os_allocator.pool_count -= nr; ++ list_for_each(le, &mali_mem_os_allocator.pool_pages) { ++ --nr; ++ if (0 == nr) break; ++ } ++ list_cut_position(&pages, &mali_mem_os_allocator.pool_pages, le); ++ spin_unlock_irqrestore(&mali_mem_os_allocator.pool_lock, flags); ++ ++ list_for_each_entry_safe(m_page, m_tmp, &pages, list) { ++ mali_mem_os_free_page_node(m_page); ++ } ++ ++ if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES > mali_mem_os_allocator.pool_count) { ++ /* Pools are empty, stop timer */ ++ MALI_DEBUG_PRINT(5, ("Stopping timer, only %u pages on pool\n", mali_mem_os_allocator.pool_count)); ++ cancel_delayed_work(&mali_mem_os_allocator.timed_shrinker); ++ } ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) ++ return mali_mem_os_shrink_count(shrinker, sc); +#else -+static struct mali_internal_sync_timeline_ops mali_timeline_ops = { -+ .driver_name = "Mali", -+ .has_signaled = timeline_has_signaled, -+ .free_pt = timeline_free_pt, -+ .release_obj = timeline_release, -+ .print_sync_pt = timeline_print_sync_pt, -+}; ++ return nr; ++#endif ++} + -+struct mali_internal_sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name) ++static void mali_mem_os_trim_pool(struct work_struct *data) +{ -+ struct mali_internal_sync_timeline *sync_tl; -+ struct mali_sync_timeline_container *mali_sync_tl; ++ struct mali_page_node *m_page, *m_tmp; ++ struct list_head *le; ++ LIST_HEAD(pages); ++ size_t nr_to_free; + -+ sync_tl = mali_internal_sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline_container), name); -+ if (NULL == sync_tl) return NULL; ++ MALI_IGNORE(data); + -+ mali_sync_tl = to_mali_sync_tl_container(sync_tl); -+ mali_sync_tl->timeline = timeline; ++ MALI_DEBUG_PRINT(3, ("OS Mem: Trimming pool %u\n", mali_mem_os_allocator.pool_count)); + -+ /* Grab a reference on the module to ensure the callbacks are present -+ * as long some timeline exists. The reference is released when the -+ * timeline is freed. -+ * Since this function is called from a ioctl on an open file we know -+ * we already have a reference, so using __module_get is safe. */ -+ __module_get(THIS_MODULE); ++ /* Release from general page pool */ ++ spin_lock(&mali_mem_os_allocator.pool_lock); ++ if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) { ++ size_t count = mali_mem_os_allocator.pool_count - MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES; ++ const size_t min_to_free = min(64, MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES); + -+ return sync_tl; -+} ++ /* Free half the pages on the pool above the static limit. Or 64 pages, 256KB. */ ++ nr_to_free = max(count / 2, min_to_free); + -+s32 mali_sync_fence_fd_alloc(struct mali_internal_sync_fence *sync_fence) -+{ -+ s32 fd = -1; ++ mali_mem_os_allocator.pool_count -= nr_to_free; ++ list_for_each(le, &mali_mem_os_allocator.pool_pages) { ++ --nr_to_free; ++ if (0 == nr_to_free) break; ++ } ++ list_cut_position(&pages, &mali_mem_os_allocator.pool_pages, le); ++ } ++ spin_unlock(&mali_mem_os_allocator.pool_lock); + -+ fd = get_unused_fd_flags(0); ++ list_for_each_entry_safe(m_page, m_tmp, &pages, list) { ++ mali_mem_os_free_page_node(m_page); ++ } + -+ if (fd < 0) { -+ fput(sync_fence->file); -+ return -1; ++ /* Release some pages from page table page pool */ ++ mali_mem_os_trim_page_table_page_pool(); ++ ++ if (MALI_OS_MEMORY_KERNEL_BUFFER_SIZE_IN_PAGES < mali_mem_os_allocator.pool_count) { ++ MALI_DEBUG_PRINT(4, ("OS Mem: Starting pool trim timer %u\n", mali_mem_os_allocator.pool_count)); ++ queue_delayed_work(mali_mem_os_allocator.wq, &mali_mem_os_allocator.timed_shrinker, MALI_OS_MEMORY_POOL_TRIM_JIFFIES); + } -+ fd_install(fd, sync_fence->file); -+ return fd; +} + -+struct mali_internal_sync_fence *mali_sync_fence_merge(struct mali_internal_sync_fence *sync_fence1, struct mali_internal_sync_fence *sync_fence2) ++_mali_osk_errcode_t mali_mem_os_init(void) +{ -+ struct mali_internal_sync_fence *sync_fence; -+ -+ MALI_DEBUG_ASSERT_POINTER(sync_fence1); -+ MALI_DEBUG_ASSERT_POINTER(sync_fence1); ++ mali_mem_os_allocator.wq = alloc_workqueue("mali-mem", WQ_UNBOUND, 1); ++ if (NULL == mali_mem_os_allocator.wq) { ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+ sync_fence = mali_internal_sync_fence_merge(sync_fence1, sync_fence2); -+ fput(sync_fence1->file); -+ fput(sync_fence2->file); ++ register_shrinker(&mali_mem_os_allocator.shrinker, "mali-mem"); + -+ return sync_fence; ++ return _MALI_OSK_ERR_OK; +} + -+struct mali_internal_sync_fence *mali_sync_timeline_create_signaled_fence(struct mali_internal_sync_timeline *sync_tl) ++void mali_mem_os_term(void) +{ -+ struct mali_sync_flag *flag; -+ struct mali_internal_sync_fence *sync_fence; ++ struct mali_page_node *m_page, *m_tmp; ++ unregister_shrinker(&mali_mem_os_allocator.shrinker); ++ cancel_delayed_work_sync(&mali_mem_os_allocator.timed_shrinker); + -+ MALI_DEBUG_ASSERT_POINTER(sync_tl); ++ if (NULL != mali_mem_os_allocator.wq) { ++ destroy_workqueue(mali_mem_os_allocator.wq); ++ mali_mem_os_allocator.wq = NULL; ++ } + -+ flag = mali_sync_flag_create(sync_tl, 0); -+ if (NULL == flag) return NULL; ++ spin_lock(&mali_mem_os_allocator.pool_lock); ++ list_for_each_entry_safe(m_page, m_tmp, &mali_mem_os_allocator.pool_pages, list) { ++ mali_mem_os_free_page_node(m_page); + -+ sync_fence = mali_sync_flag_create_fence(flag); ++ --mali_mem_os_allocator.pool_count; ++ } ++ BUG_ON(mali_mem_os_allocator.pool_count); ++ spin_unlock(&mali_mem_os_allocator.pool_lock); + -+ mali_sync_flag_signal(flag, 0); -+ mali_sync_flag_put(flag); ++ /* Release from page table page pool */ ++ do { ++ u32 nr_to_free; + -+ return sync_fence; ++ spin_lock(&mali_mem_page_table_page_pool.lock); ++ ++ nr_to_free = min((size_t)MALI_MEM_OS_CHUNK_TO_FREE, mali_mem_page_table_page_pool.count); ++ ++ /* Pool lock will be released by the callee. */ ++ mali_mem_os_page_table_pool_free(nr_to_free); ++ } while (0 != mali_mem_page_table_page_pool.count); +} + -+struct mali_sync_flag *mali_sync_flag_create(struct mali_internal_sync_timeline *sync_tl, mali_timeline_point point) ++_mali_osk_errcode_t mali_memory_core_resource_os_memory(u32 size) +{ -+ struct mali_sync_flag *flag; ++ mali_mem_os_allocator.allocation_limit = size; + -+ if (NULL == sync_tl) return NULL; ++ MALI_SUCCESS; ++} + -+ flag = _mali_osk_calloc(1, sizeof(*flag)); -+ if (NULL == flag) return NULL; ++u32 mali_mem_os_stat(void) ++{ ++ return atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE; ++} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_os_alloc.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_os_alloc.h +new file mode 100644 +index 000000000..8c9b35d0b +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_os_alloc.h +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ flag->sync_tl = sync_tl; -+ flag->point = point; ++#ifndef __MALI_MEMORY_OS_ALLOC_H__ ++#define __MALI_MEMORY_OS_ALLOC_H__ + -+ flag->status = 0; -+ kref_init(&flag->refcount); ++#include "mali_osk.h" ++#include "mali_memory_types.h" + -+ return flag; -+} + -+/** -+ * Create a sync point attached to given sync flag. ++/** @brief Release Mali OS memory + * -+ * @note Sync points must be triggered in *exactly* the same order as they are created. ++ * The session memory_lock must be held when calling this function. + * -+ * @param flag Sync flag. -+ * @return New sync point if successful, NULL if not. ++ * @param mem_bkend Pointer to the mali_mem_backend to release + */ -+static struct mali_internal_sync_point *mali_sync_flag_create_pt(struct mali_sync_flag *flag) -+{ -+ struct mali_internal_sync_point *pt; -+ struct mali_sync_pt *mpt; ++u32 mali_mem_os_release(mali_mem_backend *mem_bkend); + -+ MALI_DEBUG_ASSERT_POINTER(flag); -+ MALI_DEBUG_ASSERT_POINTER(flag->sync_tl); ++_mali_osk_errcode_t mali_mem_os_get_table_page(mali_dma_addr *phys, mali_io_address *mapping); + -+ pt = mali_internal_sync_point_create(flag->sync_tl, sizeof(struct mali_sync_pt)); ++void mali_mem_os_release_table_page(mali_dma_addr phys, void *virt); + -+ if (pt == NULL) { -+ MALI_PRINT_ERROR(("Mali sync: sync_pt creation failed\n")); -+ return NULL; -+ } -+ mali_sync_flag_get(flag); ++_mali_osk_errcode_t mali_mem_os_init(void); + -+ mpt = to_mali_sync_pt(pt); -+ mpt->flag = flag; -+ mpt->sync_tl = flag->sync_tl; ++void mali_mem_os_term(void); + -+ return pt; -+} ++u32 mali_mem_os_stat(void); + -+struct mali_internal_sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag) -+{ -+ struct mali_internal_sync_point *sync_pt; -+ struct mali_internal_sync_fence *sync_fence; ++void mali_mem_os_free_page_node(struct mali_page_node *m_page); + -+ MALI_DEBUG_ASSERT_POINTER(flag); -+ MALI_DEBUG_ASSERT_POINTER(flag->sync_tl); ++int mali_mem_os_alloc_pages(mali_mem_os_mem *os_mem, u32 size); + -+ sync_pt = mali_sync_flag_create_pt(flag); -+ if (NULL == sync_pt) { -+ MALI_PRINT_ERROR(("Mali sync: sync_pt creation failed\n")); -+ return NULL; -+ } -+ sync_fence = (struct mali_internal_sync_fence *)sync_file_create(&sync_pt->base); -+ if (NULL == sync_fence) { -+ MALI_PRINT_ERROR(("Mali sync: sync_fence creation failed\n")); -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -+ dma_fence_put(&sync_pt->base); -+#else -+ fence_put(&sync_pt->base); -+#endif -+ return NULL; -+ } ++u32 mali_mem_os_free(struct list_head *os_pages, u32 pages_count, mali_bool cow_flag); + -+ /* 'sync_pt' no longer needs to hold a refcount of '*sync_pt', to put it off. */ -+ dma_fence_put(&sync_pt->base); -+ sync_pt = NULL; ++_mali_osk_errcode_t mali_mem_os_put_page(struct page *page); + -+ return sync_fence; -+} -+#endif ++_mali_osk_errcode_t mali_mem_os_resize_pages(mali_mem_os_mem *mem_from, mali_mem_os_mem *mem_to, u32 start_page, u32 page_count); + -+void mali_sync_flag_get(struct mali_sync_flag *flag) -+{ -+ MALI_DEBUG_ASSERT_POINTER(flag); -+ kref_get(&flag->refcount); -+} ++_mali_osk_errcode_t mali_mem_os_mali_map(mali_mem_os_mem *os_mem, struct mali_session_data *session, u32 vaddr, u32 start_page, u32 mapping_pgae_num, u32 props); + -+/** -+ * Free sync flag. -+ * -+ * @param ref kref object embedded in sync flag that should be freed. -+ */ -+static void mali_sync_flag_free(struct kref *ref) -+{ -+ struct mali_sync_flag *flag; -+ -+ MALI_DEBUG_ASSERT_POINTER(ref); -+ flag = container_of(ref, struct mali_sync_flag, refcount); -+ -+ _mali_osk_free(flag); -+} -+ -+void mali_sync_flag_put(struct mali_sync_flag *flag) -+{ -+ MALI_DEBUG_ASSERT_POINTER(flag); -+ kref_put(&flag->refcount, mali_sync_flag_free); -+} -+ -+void mali_sync_flag_signal(struct mali_sync_flag *flag, int error) -+{ -+ MALI_DEBUG_ASSERT_POINTER(flag); -+ -+ MALI_DEBUG_ASSERT(0 == flag->status); -+ flag->status = (0 > error) ? error : 1; ++void mali_mem_os_mali_unmap(mali_mem_allocation *alloc); + -+ _mali_osk_write_mem_barrier(); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ sync_timeline_signal(flag->sync_tl); -+#else -+ mali_internal_sync_timeline_signal(flag->sync_tl); -+#endif -+} ++int mali_mem_os_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma); + ++_mali_osk_errcode_t mali_mem_os_resize_cpu_map_locked(mali_mem_backend *mem_bkend, struct vm_area_struct *vma, unsigned long start_vaddr, u32 mappig_size); + -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_sync.h b/drivers/gpu/arm/mali400/mali/linux/mali_sync.h ++#endif /* __MALI_MEMORY_OS_ALLOC_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_secure.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_secure.c new file mode 100644 -index 000000000..91be8b9cf +index 000000000..63506bfbe --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_sync.h -@@ -0,0 +1,169 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_secure.c +@@ -0,0 +1,170 @@ +/* -+ * Copyright (C) 2012-2015, 2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -323025,173 +325000,174 @@ index 000000000..91be8b9cf + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+/** -+ * @file mali_sync.h -+ * -+ * Mali interface for Linux sync objects. -+ */ ++#include "mali_kernel_common.h" ++#include "mali_memory.h" ++#include "mali_memory_secure.h" ++#include "mali_osk.h" ++#include ++#include ++#include ++#include + -+#ifndef _MALI_SYNC_H_ -+#define _MALI_SYNC_H_ ++_mali_osk_errcode_t mali_mem_secure_attach_dma_buf(mali_mem_secure *secure_mem, u32 size, int mem_fd) ++{ ++ struct dma_buf *buf; ++ MALI_DEBUG_ASSERT_POINTER(secure_mem); + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ /* get dma buffer */ ++ buf = dma_buf_get(mem_fd); ++ if (IS_ERR_OR_NULL(buf)) { ++ MALI_DEBUG_PRINT_ERROR(("Failed to get dma buf!\n")); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+#include -+#include ++ if (size != buf->size) { ++ MALI_DEBUG_PRINT_ERROR(("The secure mem size not match to the dma buf size!\n")); ++ goto failed_alloc_mem; ++ } + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) -+#include -+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+#include -+#else -+#include "mali_internal_sync.h" -+#endif ++ secure_mem->buf = buf; ++ secure_mem->attachment = dma_buf_attach(secure_mem->buf, &mali_platform_device->dev); ++ if (NULL == secure_mem->attachment) { ++ MALI_DEBUG_PRINT_ERROR(("Failed to get dma buf attachment!\n")); ++ goto failed_dma_attach; ++ } + ++ secure_mem->sgt = dma_buf_map_attachment(secure_mem->attachment, DMA_BIDIRECTIONAL); ++ if (IS_ERR_OR_NULL(secure_mem->sgt)) { ++ MALI_DEBUG_PRINT_ERROR(("Failed to map dma buf attachment\n")); ++ goto failed_dma_map; ++ } + -+#include "mali_osk.h" ++ secure_mem->count = size / MALI_MMU_PAGE_SIZE; + -+struct mali_sync_flag; -+struct mali_timeline; ++ return _MALI_OSK_ERR_OK; + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+/** -+ * Create a sync timeline. -+ * -+ * @param name Name of the sync timeline. -+ * @return The new sync timeline if successful, NULL if not. -+ */ -+struct sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name); ++failed_dma_map: ++ dma_buf_detach(secure_mem->buf, secure_mem->attachment); ++failed_dma_attach: ++failed_alloc_mem: ++ dma_buf_put(buf); ++ return _MALI_OSK_ERR_FAULT; ++} + -+/** -+ * Creates a file descriptor representing the sync fence. Will release sync fence if allocation of -+ * file descriptor fails. -+ * -+ * @param sync_fence Sync fence. -+ * @return File descriptor representing sync fence if successful, or -1 if not. -+ */ -+s32 mali_sync_fence_fd_alloc(struct sync_fence *sync_fence); ++_mali_osk_errcode_t mali_mem_secure_mali_map(mali_mem_secure *secure_mem, struct mali_session_data *session, u32 vaddr, u32 props) ++{ ++ struct mali_page_directory *pagedir; ++ struct scatterlist *sg; ++ u32 virt = vaddr; ++ u32 prop = props; ++ int i; + -+/** -+ * Merges two sync fences. Both input sync fences will be released. -+ * -+ * @param sync_fence1 First sync fence. -+ * @param sync_fence2 Second sync fence. -+ * @return New sync fence that is the result of the merger if successful, or NULL if not. -+ */ -+struct sync_fence *mali_sync_fence_merge(struct sync_fence *sync_fence1, struct sync_fence *sync_fence2); ++ MALI_DEBUG_ASSERT_POINTER(secure_mem); ++ MALI_DEBUG_ASSERT_POINTER(secure_mem->sgt); ++ MALI_DEBUG_ASSERT_POINTER(session); + -+/** -+ * Create a sync fence that is already signaled. -+ * -+ * @param tl Sync timeline. -+ * @return New signaled sync fence if successful, NULL if not. -+ */ -+struct sync_fence *mali_sync_timeline_create_signaled_fence(struct sync_timeline *sync_tl); ++ pagedir = session->page_directory; + ++ for_each_sg(secure_mem->sgt->sgl, sg, secure_mem->sgt->nents, i) { ++ u32 size = sg_dma_len(sg); ++ dma_addr_t phys = sg_dma_address(sg); + -+/** -+ * Create a sync flag. -+ * -+ * @param sync_tl Sync timeline. -+ * @param point Point on Mali timeline. -+ * @return New sync flag if successful, NULL if not. -+ */ -+struct mali_sync_flag *mali_sync_flag_create(struct sync_timeline *sync_tl, u32 point); ++ /* sg must be page aligned. */ ++ MALI_DEBUG_ASSERT(0 == size % MALI_MMU_PAGE_SIZE); ++ MALI_DEBUG_ASSERT(0 == (phys & ~(uintptr_t)0xFFFFFFFF)); + -+/** -+ * Create a sync fence attached to given sync flag. -+ * -+ * @param flag Sync flag. -+ * @return New sync fence if successful, NULL if not. -+ */ -+struct sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag); -+#else -+/** -+ * Create a sync timeline. -+ * -+ * @param name Name of the sync timeline. -+ * @return The new sync timeline if successful, NULL if not. -+ */ -+struct mali_internal_sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name); ++ mali_mmu_pagedir_update(pagedir, virt, phys, size, prop); + -+/** -+ * Creates a file descriptor representing the sync fence. Will release sync fence if allocation of -+ * file descriptor fails. -+ * -+ * @param sync_fence Sync fence. -+ * @return File descriptor representing sync fence if successful, or -1 if not. -+ */ -+s32 mali_sync_fence_fd_alloc(struct mali_internal_sync_fence *sync_fence); ++ MALI_DEBUG_PRINT(3, ("The secure mem physical address: 0x%x gpu virtual address: 0x%x! \n", phys, virt)); ++ virt += size; ++ } + -+/** -+ * Merges two sync fences. Both input sync fences will be released. -+ * -+ * @param sync_fence1 First sync fence. -+ * @param sync_fence2 Second sync fence. -+ * @return New sync fence that is the result of the merger if successful, or NULL if not. -+ */ -+struct mali_internal_sync_fence *mali_sync_fence_merge(struct mali_internal_sync_fence *sync_fence1, struct mali_internal_sync_fence *sync_fence2); ++ return _MALI_OSK_ERR_OK; ++} + -+/** -+ * Create a sync fence that is already signaled. -+ * -+ * @param tl Sync timeline. -+ * @return New signaled sync fence if successful, NULL if not. -+ */ -+struct mali_internal_sync_fence *mali_sync_timeline_create_signaled_fence(struct mali_internal_sync_timeline *sync_tl); ++void mali_mem_secure_mali_unmap(mali_mem_allocation *alloc) ++{ ++ struct mali_session_data *session; ++ MALI_DEBUG_ASSERT_POINTER(alloc); ++ session = alloc->session; ++ MALI_DEBUG_ASSERT_POINTER(session); + ++ mali_session_memory_lock(session); ++ mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start, ++ alloc->flags); ++ mali_session_memory_unlock(session); ++} + -+/** -+ * Create a sync flag. -+ * -+ * @param sync_tl Sync timeline. -+ * @param point Point on Mali timeline. -+ * @return New sync flag if successful, NULL if not. -+ */ -+struct mali_sync_flag *mali_sync_flag_create(struct mali_internal_sync_timeline *sync_tl, u32 point); + -+/** -+ * Create a sync fence attached to given sync flag. -+ * -+ * @param flag Sync flag. -+ * @return New sync fence if successful, NULL if not. -+ */ -+struct mali_internal_sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag); ++int mali_mem_secure_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma) ++{ ++ ++ int ret = 0; ++ struct scatterlist *sg; ++ mali_mem_secure *secure_mem = &mem_bkend->secure_mem; ++ unsigned long addr = vma->vm_start; ++ int i; + ++ MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_SECURE); ++ ++ for_each_sg(secure_mem->sgt->sgl, sg, secure_mem->sgt->nents, i) { ++ phys_addr_t phys; ++ dma_addr_t dev_addr; ++ u32 size, j; ++ dev_addr = sg_dma_address(sg); ++#if defined(CONFIG_ARM64) ||LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) ++ phys = dma_to_phys(&mali_platform_device->dev, dev_addr); ++#else ++ phys = page_to_phys(pfn_to_page(dma_to_pfn(&mali_platform_device->dev, dev_addr))); +#endif -+/** -+ * Grab sync flag reference. -+ * -+ * @param flag Sync flag. -+ */ -+void mali_sync_flag_get(struct mali_sync_flag *flag); ++ size = sg_dma_len(sg); ++ MALI_DEBUG_ASSERT(0 == size % _MALI_OSK_MALI_PAGE_SIZE); + -+/** -+ * Release sync flag reference. If this was the last reference, the sync flag will be freed. -+ * -+ * @param flag Sync flag. -+ */ -+void mali_sync_flag_put(struct mali_sync_flag *flag); ++ for (j = 0; j < size / _MALI_OSK_MALI_PAGE_SIZE; j++) { ++ ret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys)); + -+/** -+ * Signal sync flag. All sync fences created from this flag will be signaled. -+ * -+ * @param flag Sync flag to signal. -+ * @param error Negative error code, or 0 if no error. -+ */ -+void mali_sync_flag_signal(struct mali_sync_flag *flag, int error); ++ if (unlikely(VM_FAULT_NOPAGE != ret)) { ++ return -EFAULT; ++ } ++ addr += _MALI_OSK_MALI_PAGE_SIZE; ++ phys += _MALI_OSK_MALI_PAGE_SIZE; + -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++ MALI_DEBUG_PRINT(3, ("The secure mem physical address: 0x%x , cpu virtual address: 0x%x! \n", phys, addr)); ++ } ++ } ++ return ret; ++} + -+#endif /* _MALI_SYNC_H_ */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_uk_types.h b/drivers/gpu/arm/mali400/mali/linux/mali_uk_types.h ++u32 mali_mem_secure_release(mali_mem_backend *mem_bkend) ++{ ++ struct mali_mem_secure *mem; ++ mali_mem_allocation *alloc = mem_bkend->mali_allocation; ++ u32 free_pages_nr = 0; ++ MALI_DEBUG_ASSERT(mem_bkend->type == MALI_MEM_SECURE); ++ ++ mem = &mem_bkend->secure_mem; ++ MALI_DEBUG_ASSERT_POINTER(mem->attachment); ++ MALI_DEBUG_ASSERT_POINTER(mem->buf); ++ MALI_DEBUG_ASSERT_POINTER(mem->sgt); ++ /* Unmap the memory from the mali virtual address space. */ ++ mali_mem_secure_mali_unmap(alloc); ++ mutex_lock(&mem_bkend->mutex); ++ dma_buf_unmap_attachment(mem->attachment, mem->sgt, DMA_BIDIRECTIONAL); ++ dma_buf_detach(mem->buf, mem->attachment); ++ dma_buf_put(mem->buf); ++ mutex_unlock(&mem_bkend->mutex); ++ ++ free_pages_nr = mem->count; ++ ++ return free_pages_nr; ++} ++ ++ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_secure.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_secure.h new file mode 100644 -index 000000000..68b27b8be +index 000000000..48691d479 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_uk_types.h -@@ -0,0 +1,17 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_secure.h +@@ -0,0 +1,30 @@ +/* -+ * Copyright (C) 2012, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010, 2013, 2015-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -323200,21 +325176,34 @@ index 000000000..68b27b8be + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __MALI_UK_TYPES_H__ -+#define __MALI_UK_TYPES_H__ ++#ifndef __MALI_MEMORY_SECURE_H__ ++#define __MALI_MEMORY_SECURE_H__ + -+/* Simple wrapper in order to find the OS specific location of this file */ -+#include ++#include "mali_session.h" ++#include "mali_memory.h" ++#include + -+#endif /* __MALI_UK_TYPES_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_ukk_core.c b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_core.c ++#include "mali_memory_types.h" ++ ++_mali_osk_errcode_t mali_mem_secure_attach_dma_buf(mali_mem_secure *secure_mem, u32 size, int mem_fd); ++ ++_mali_osk_errcode_t mali_mem_secure_mali_map(mali_mem_secure *secure_mem, struct mali_session_data *session, u32 vaddr, u32 props); ++ ++void mali_mem_secure_mali_unmap(mali_mem_allocation *alloc); ++ ++int mali_mem_secure_cpu_map(mali_mem_backend *mem_bkend, struct vm_area_struct *vma); ++ ++u32 mali_mem_secure_release(mali_mem_backend *mem_bkend); ++ ++#endif /* __MALI_MEMORY_SECURE_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_swap_alloc.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_swap_alloc.c new file mode 100644 -index 000000000..0bd1cddb1 +index 000000000..d682785b9 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_core.c -@@ -0,0 +1,171 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_swap_alloc.c +@@ -0,0 +1,943 @@ +/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -323222,1102 +325211,948 @@ index 000000000..0bd1cddb1 + * A copy of the licence is included with the program, and can also be obtained from Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ -+#include /* file system operations */ -+#include /* memort allocation functions */ -+#include /* user space access */ + -+#include "mali_ukk.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include +#include "mali_osk.h" -+#include "mali_kernel_common.h" -+#include "mali_session.h" -+#include "mali_ukk_wrappers.h" ++#include "mali_osk_mali.h" ++#include "mali_memory.h" ++#include "mali_memory_manager.h" ++#include "mali_memory_virtual.h" ++#include "mali_memory_cow.h" ++#include "mali_ukk.h" ++#include "mali_kernel_utilization.h" ++#include "mali_memory_swap_alloc.h" + -+int get_api_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_s __user *uargs) -+{ -+ _mali_uk_get_api_version_s kargs; -+ _mali_osk_errcode_t err; + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++static struct _mali_osk_bitmap idx_mgr; ++static struct file *global_swap_file; ++static struct address_space *global_swap_space; ++static _mali_osk_wq_work_t *mali_mem_swap_out_workq = NULL; ++static u32 mem_backend_swapped_pool_size; ++#ifdef MALI_MEM_SWAP_TRACKING ++static u32 mem_backend_swapped_unlock_size; ++#endif ++/* Lock order: mem_backend_swapped_pool_lock > each memory backend's mutex lock. ++ * This lock used to protect mem_backend_swapped_pool_size and mem_backend_swapped_pool. */ ++static struct mutex mem_backend_swapped_pool_lock; ++static struct list_head mem_backend_swapped_pool; + -+ if (0 != get_user(kargs.version, &uargs->version)) return -EFAULT; ++extern struct mali_mem_os_allocator mali_mem_os_allocator; + -+ kargs.ctx = (uintptr_t)session_data; -+ err = _mali_ukk_get_api_version(&kargs); -+ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); ++#define MALI_SWAP_LOW_MEM_DEFAULT_VALUE (60*1024*1024) ++#define MALI_SWAP_INVALIDATE_MALI_ADDRESS (0) /* Used to mark the given memory cookie is invalidate. */ ++#define MALI_SWAP_GLOBAL_SWAP_FILE_SIZE (0xFFFFFFFF) ++#define MALI_SWAP_GLOBAL_SWAP_FILE_INDEX \ ++ ((MALI_SWAP_GLOBAL_SWAP_FILE_SIZE) >> PAGE_SHIFT) ++#define MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE (1 << 15) /* Reserved for CoW nonlinear swap backend memory, the space size is 128MB. */ + -+ if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT; -+ if (0 != put_user(kargs.compatible, &uargs->compatible)) return -EFAULT; ++unsigned int mali_mem_swap_out_threshold_value = MALI_SWAP_LOW_MEM_DEFAULT_VALUE; + -+ return 0; -+} ++/** ++ * We have two situations to do shrinking things, one is we met low GPU utilization which shows GPU needn't touch too ++ * swappable backends in short time, and the other one is we add new swappable backends, the total pool size exceed ++ * the threshold value of the swapped pool size. ++ */ ++typedef enum { ++ MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION = 100, ++ MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS = 257, ++} _mali_mem_swap_pool_shrink_type_t; + -+int get_api_version_v2_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_v2_s __user *uargs) -+{ -+ _mali_uk_get_api_version_v2_s kargs; -+ _mali_osk_errcode_t err; ++static void mali_mem_swap_swapped_bkend_pool_check_for_low_utilization(void *arg); + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++_mali_osk_errcode_t mali_mem_swap_init(void) ++{ ++ gfp_t flags = __GFP_NORETRY | __GFP_NOWARN; + -+ if (0 != get_user(kargs.version, &uargs->version)) return -EFAULT; ++ if (_MALI_OSK_ERR_OK != _mali_osk_bitmap_init(&idx_mgr, MALI_SWAP_GLOBAL_SWAP_FILE_INDEX, MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE)) { ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+ kargs.ctx = (uintptr_t)session_data; -+ err = _mali_ukk_get_api_version_v2(&kargs); -+ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); ++ global_swap_file = shmem_file_setup("mali_swap", MALI_SWAP_GLOBAL_SWAP_FILE_SIZE, VM_NORESERVE); ++ if (IS_ERR(global_swap_file)) { ++ _mali_osk_bitmap_term(&idx_mgr); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+ if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT; -+ if (0 != put_user(kargs.compatible, &uargs->compatible)) return -EFAULT; ++ global_swap_space = global_swap_file->f_path.dentry->d_inode->i_mapping; + -+ return 0; -+} ++ mali_mem_swap_out_workq = _mali_osk_wq_create_work(mali_mem_swap_swapped_bkend_pool_check_for_low_utilization, NULL); ++ if (NULL == mali_mem_swap_out_workq) { ++ _mali_osk_bitmap_term(&idx_mgr); ++ fput(global_swap_file); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+/* rk_ext : 从对 r5p0-01rel0 集æˆå¼€å§‹, ä¸å†ä½¿ç”¨. */ -+#if 0 -+#define mali400_in_rk30_version 0x01 -+int get_mali_version_in_rk30_wrapper(struct mali_session_data *session_data, _mali_uk_get_mali_version_in_rk30_s __user *uargs) -+{ -+ _mali_uk_get_mali_version_in_rk30_s kargs; -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ kargs.ctx = (uintptr_t)session_data; -+ kargs.version = mali400_in_rk30_version; -+ if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT; -+ return 0; -+} ++#if defined(CONFIG_ARM) && !defined(CONFIG_ARM_LPAE) ++ flags |= GFP_HIGHUSER; +#else -+#include "../platform/rk/rk_ext.h" -+int get_rk_ko_version_wrapper(struct mali_session_data *session_data, _mali_rk_ko_version_s __user *uargs) -+{ -+ _mali_rk_ko_version_s kargs; -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ kargs.ctx = (uintptr_t)session_data; -+ kargs.version = RK_KO_VER; -+ if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT; -+ return 0; -+} ++#ifdef CONFIG_ZONE_DMA32 ++ flags |= GFP_DMA32; ++#else ++#ifdef CONFIG_ZONE_DMA ++ flags |= GFP_DMA; ++#else ++ /* arm64 utgard only work on < 4G, but the kernel ++ * didn't provide method to allocte memory < 4G ++ */ ++ MALI_DEBUG_ASSERT(0); ++#endif ++#endif +#endif + -+int wait_for_notification_wrapper(struct mali_session_data *session_data, _mali_uk_wait_for_notification_s __user *uargs) -+{ -+ _mali_uk_wait_for_notification_s kargs; -+ _mali_osk_errcode_t err; -+ -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ /* When we use shmem_read_mapping_page to allocate/swap-in, it will ++ * use these flags to allocate new page if need.*/ ++ mapping_set_gfp_mask(global_swap_space, flags); + -+ kargs.ctx = (uintptr_t)session_data; -+ err = _mali_ukk_wait_for_notification(&kargs); -+ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); ++ mem_backend_swapped_pool_size = 0; ++#ifdef MALI_MEM_SWAP_TRACKING ++ mem_backend_swapped_unlock_size = 0; ++#endif ++ mutex_init(&mem_backend_swapped_pool_lock); ++ INIT_LIST_HEAD(&mem_backend_swapped_pool); + -+ if (_MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS != kargs.type) { -+ kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */ -+ if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_wait_for_notification_s))) return -EFAULT; -+ } else { -+ if (0 != put_user(kargs.type, &uargs->type)) return -EFAULT; -+ } ++ MALI_DEBUG_PRINT(2, ("Mali SWAP: Swap out threshold vaule is %uM\n", mali_mem_swap_out_threshold_value >> 20)); + -+ return 0; ++ return _MALI_OSK_ERR_OK; +} + -+int post_notification_wrapper(struct mali_session_data *session_data, _mali_uk_post_notification_s __user *uargs) ++void mali_mem_swap_term(void) +{ -+ _mali_uk_post_notification_s kargs; -+ _mali_osk_errcode_t err; -+ -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ _mali_osk_bitmap_term(&idx_mgr); + -+ kargs.ctx = (uintptr_t)session_data; ++ fput(global_swap_file); + -+ if (0 != get_user(kargs.type, &uargs->type)) { -+ return -EFAULT; -+ } ++ _mali_osk_wq_delete_work(mali_mem_swap_out_workq); + -+ err = _mali_ukk_post_notification(&kargs); -+ if (_MALI_OSK_ERR_OK != err) { -+ return map_errcode(err); -+ } ++ MALI_DEBUG_ASSERT(list_empty(&mem_backend_swapped_pool)); ++ MALI_DEBUG_ASSERT(0 == mem_backend_swapped_pool_size); + -+ return 0; ++ return; +} + -+int get_user_settings_wrapper(struct mali_session_data *session_data, _mali_uk_get_user_settings_s __user *uargs) ++struct file *mali_mem_swap_get_global_swap_file(void) +{ -+ _mali_uk_get_user_settings_s kargs; -+ _mali_osk_errcode_t err; -+ -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ -+ kargs.ctx = (uintptr_t)session_data; -+ err = _mali_ukk_get_user_settings(&kargs); -+ if (_MALI_OSK_ERR_OK != err) { -+ return map_errcode(err); -+ } -+ -+ kargs.ctx = 0; /* prevent kernel address to be returned to user space */ -+ if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_get_user_settings_s))) return -EFAULT; -+ -+ return 0; ++ return global_swap_file; +} + -+int request_high_priority_wrapper(struct mali_session_data *session_data, _mali_uk_request_high_priority_s __user *uargs) ++/* Judge if swappable backend in swapped pool. */ ++static mali_bool mali_memory_swap_backend_in_swapped_pool(mali_mem_backend *mem_bkend) +{ -+ _mali_uk_request_high_priority_s kargs; -+ _mali_osk_errcode_t err; -+ -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ -+ kargs.ctx = (uintptr_t)session_data; -+ err = _mali_ukk_request_high_priority(&kargs); -+ -+ kargs.ctx = 0; ++ MALI_DEBUG_ASSERT_POINTER(mem_bkend); + -+ return map_errcode(err); ++ return !list_empty(&mem_bkend->list); +} + -+int pending_submit_wrapper(struct mali_session_data *session_data, _mali_uk_pending_submit_s __user *uargs) ++void mali_memory_swap_list_backend_delete(mali_mem_backend *mem_bkend) +{ -+ _mali_uk_pending_submit_s kargs; -+ _mali_osk_errcode_t err; ++ MALI_DEBUG_ASSERT_POINTER(mem_bkend); + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ mutex_lock(&mem_backend_swapped_pool_lock); ++ mutex_lock(&mem_bkend->mutex); + -+ kargs.ctx = (uintptr_t)session_data; -+ err = _mali_ukk_pending_submit(&kargs); -+ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); ++ if (MALI_FALSE == mali_memory_swap_backend_in_swapped_pool(mem_bkend)) { ++ mutex_unlock(&mem_bkend->mutex); ++ mutex_unlock(&mem_backend_swapped_pool_lock); ++ return; ++ } + -+ return 0; -+} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_ukk_gp.c b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_gp.c -new file mode 100644 -index 000000000..68fcd9719 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_gp.c -@@ -0,0 +1,91 @@ -+/* -+ * Copyright (C) 2010, 2012-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+#include /* file system operations */ -+#include /* user space access */ ++ MALI_DEBUG_ASSERT(!list_empty(&mem_bkend->list)); + -+#include "mali_ukk.h" -+#include "mali_osk.h" -+#include "mali_kernel_common.h" -+#include "mali_session.h" -+#include "mali_ukk_wrappers.h" ++ list_del_init(&mem_bkend->list); + -+int gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_gp_start_job_s __user *uargs) -+{ -+ _mali_osk_errcode_t err; ++ mutex_unlock(&mem_bkend->mutex); + -+ /* If the job was started successfully, 0 is returned. If there was an error, but the job -+ * was started, we return -ENOENT. For anything else returned, the job was not started. */ ++ mem_backend_swapped_pool_size -= mem_bkend->size; + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session_data, -EINVAL); ++ mutex_unlock(&mem_backend_swapped_pool_lock); ++} + -+ err = _mali_ukk_gp_start_job(session_data, uargs); -+ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); ++static void mali_mem_swap_out_page_node(mali_page_node *page_node) ++{ ++ MALI_DEBUG_ASSERT(page_node); + -+ return 0; ++ dma_unmap_page(&mali_platform_device->dev, page_node->swap_it->dma_addr, ++ _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE); ++ set_page_dirty(page_node->swap_it->page); ++ put_page(page_node->swap_it->page); +} + -+int gp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_core_version_s __user *uargs) ++void mali_mem_swap_unlock_single_mem_backend(mali_mem_backend *mem_bkend) +{ -+ _mali_uk_get_gp_core_version_s kargs; -+ _mali_osk_errcode_t err; ++ mali_page_node *m_page; + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session_data, -EINVAL); ++ MALI_DEBUG_ASSERT(1 == mutex_is_locked(&mem_bkend->mutex)); + -+ kargs.ctx = (uintptr_t)session_data; -+ err = _mali_ukk_get_gp_core_version(&kargs); -+ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); ++ if (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN)) { ++ return; ++ } + -+ /* no known transactions to roll-back */ ++ mem_bkend->flags |= MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN; + -+ if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT; ++ list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) { ++ mali_mem_swap_out_page_node(m_page); ++ } + -+ return 0; ++ return; +} + -+int gp_suspend_response_wrapper(struct mali_session_data *session_data, _mali_uk_gp_suspend_response_s __user *uargs) ++static void mali_mem_swap_unlock_partial_locked_mem_backend(mali_mem_backend *mem_bkend, mali_page_node *page_node) +{ -+ _mali_uk_gp_suspend_response_s kargs; -+ _mali_osk_errcode_t err; -+ -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session_data, -EINVAL); -+ -+ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_gp_suspend_response_s))) return -EFAULT; -+ -+ kargs.ctx = (uintptr_t)session_data; -+ err = _mali_ukk_gp_suspend_response(&kargs); -+ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); ++ mali_page_node *m_page; + -+ if (0 != put_user(kargs.cookie, &uargs->cookie)) return -EFAULT; ++ MALI_DEBUG_ASSERT(1 == mutex_is_locked(&mem_bkend->mutex)); + -+ /* no known transactions to roll-back */ -+ return 0; ++ list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) { ++ if (m_page == page_node) { ++ break; ++ } ++ mali_mem_swap_out_page_node(m_page); ++ } +} + -+int gp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_number_of_cores_s __user *uargs) ++static void mali_mem_swap_swapped_bkend_pool_shrink(_mali_mem_swap_pool_shrink_type_t shrink_type) +{ -+ _mali_uk_get_gp_number_of_cores_s kargs; -+ _mali_osk_errcode_t err; -+ -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session_data, -EINVAL); -+ -+ kargs.ctx = (uintptr_t)session_data; -+ err = _mali_ukk_get_gp_number_of_cores(&kargs); -+ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); ++ mali_mem_backend *bkend, *tmp_bkend; ++ long system_free_size; ++ u32 last_gpu_utilization, gpu_utilization_threshold_value, temp_swap_out_threshold_value; + -+ /* no known transactions to roll-back */ ++ MALI_DEBUG_ASSERT(1 == mutex_is_locked(&mem_backend_swapped_pool_lock)); + -+ if (0 != put_user(kargs.number_of_cores, &uargs->number_of_cores)) return -EFAULT; ++ if (MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION == shrink_type) { ++ /** ++ * When we met that system memory is very low and Mali locked swappable memory size is less than ++ * threshold value, and at the same time, GPU load is very low and don't need high performance, ++ * at this condition, we can unlock more swap memory backend from swapped backends pool. ++ */ ++ gpu_utilization_threshold_value = MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION; ++ temp_swap_out_threshold_value = (mali_mem_swap_out_threshold_value >> 2); ++ } else { ++ /* When we add swappable memory backends to swapped pool, we need to think that we couldn't ++ * hold too much swappable backends in Mali driver, and also we need considering performance. ++ * So there is a balance for swapping out memory backend, we should follow the following conditions: ++ * 1. Total memory size in global mem backend swapped pool is more than the defined threshold value. ++ * 2. System level free memory size is less than the defined threshold value. ++ * 3. Please note that GPU utilization problem isn't considered in this condition. ++ */ ++ gpu_utilization_threshold_value = MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS; ++ temp_swap_out_threshold_value = mali_mem_swap_out_threshold_value; ++ } + -+ return 0; -+} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_ukk_mem.c b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_mem.c -new file mode 100644 -index 000000000..baea4c688 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_mem.c -@@ -0,0 +1,333 @@ -+/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+#include /* file system operations */ -+#include /* user space access */ ++ /* Get system free pages number. */ ++ system_free_size = global_zone_page_state(NR_FREE_PAGES) * PAGE_SIZE; ++ last_gpu_utilization = _mali_ukk_utilization_gp_pp(); + -+#include "mali_ukk.h" -+#include "mali_osk.h" -+#include "mali_kernel_common.h" -+#include "mali_session.h" -+#include "mali_ukk_wrappers.h" ++ if ((last_gpu_utilization < gpu_utilization_threshold_value) ++ && (system_free_size < mali_mem_swap_out_threshold_value) ++ && (mem_backend_swapped_pool_size > temp_swap_out_threshold_value)) { ++ list_for_each_entry_safe(bkend, tmp_bkend, &mem_backend_swapped_pool, list) { ++ if (mem_backend_swapped_pool_size <= temp_swap_out_threshold_value) { ++ break; ++ } + -+int mem_alloc_wrapper(struct mali_session_data *session_data, _mali_uk_alloc_mem_s __user *uargs) -+{ -+ _mali_uk_alloc_mem_s kargs; -+ _mali_osk_errcode_t err; ++ mutex_lock(&bkend->mutex); + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session_data, -EINVAL); ++ /* check if backend is in use. */ ++ if (0 < bkend->using_count) { ++ mutex_unlock(&bkend->mutex); ++ continue; ++ } + -+ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_alloc_mem_s))) { -+ return -EFAULT; ++ mali_mem_swap_unlock_single_mem_backend(bkend); ++ list_del_init(&bkend->list); ++ mem_backend_swapped_pool_size -= bkend->size; ++#ifdef MALI_MEM_SWAP_TRACKING ++ mem_backend_swapped_unlock_size += bkend->size; ++#endif ++ mutex_unlock(&bkend->mutex); ++ } + } -+ kargs.ctx = (uintptr_t)session_data; + -+ err = _mali_ukk_mem_allocate(&kargs); ++ return; ++} + -+ if (_MALI_OSK_ERR_OK != err) { -+ return map_errcode(err); -+ } ++static void mali_mem_swap_swapped_bkend_pool_check_for_low_utilization(void *arg) ++{ ++ MALI_IGNORE(arg); + -+ if (0 != put_user(kargs.backend_handle, &uargs->backend_handle)) { -+ return -EFAULT; -+ } ++ mutex_lock(&mem_backend_swapped_pool_lock); + -+ return 0; ++ mali_mem_swap_swapped_bkend_pool_shrink(MALI_MEM_SWAP_SHRINK_WITH_LOW_UTILIZATION); ++ ++ mutex_unlock(&mem_backend_swapped_pool_lock); +} + -+int mem_free_wrapper(struct mali_session_data *session_data, _mali_uk_free_mem_s __user *uargs) ++/** ++ * After PP job finished, we add all of swappable memory backend used by this PP ++ * job to the tail of the global swapped pool, and if the total size of swappable memory is more than threshold ++ * value, we also need to shrink the swapped pool start from the head of the list. ++ */ ++void mali_memory_swap_list_backend_add(mali_mem_backend *mem_bkend) +{ -+ _mali_uk_free_mem_s kargs; -+ _mali_osk_errcode_t err; ++ mutex_lock(&mem_backend_swapped_pool_lock); ++ mutex_lock(&mem_bkend->mutex); + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session_data, -EINVAL); ++ if (mali_memory_swap_backend_in_swapped_pool(mem_bkend)) { ++ MALI_DEBUG_ASSERT(!list_empty(&mem_bkend->list)); + -+ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_free_mem_s))) { -+ return -EFAULT; ++ list_del_init(&mem_bkend->list); ++ list_add_tail(&mem_bkend->list, &mem_backend_swapped_pool); ++ mutex_unlock(&mem_bkend->mutex); ++ mutex_unlock(&mem_backend_swapped_pool_lock); ++ return; + } -+ kargs.ctx = (uintptr_t)session_data; + -+ err = _mali_ukk_mem_free(&kargs); ++ list_add_tail(&mem_bkend->list, &mem_backend_swapped_pool); + -+ if (_MALI_OSK_ERR_OK != err) { -+ return map_errcode(err); -+ } ++ mutex_unlock(&mem_bkend->mutex); ++ mem_backend_swapped_pool_size += mem_bkend->size; + -+ if (0 != put_user(kargs.free_pages_nr, &uargs->free_pages_nr)) { -+ return -EFAULT; -+ } ++ mali_mem_swap_swapped_bkend_pool_shrink(MALI_MEM_SWAP_SHRINK_FOR_ADDING_NEW_BACKENDS); + -+ return 0; ++ mutex_unlock(&mem_backend_swapped_pool_lock); ++ return; +} + -+int mem_bind_wrapper(struct mali_session_data *session_data, _mali_uk_bind_mem_s __user *uargs) ++ ++u32 mali_mem_swap_idx_alloc(void) +{ -+ _mali_uk_bind_mem_s kargs; -+ _mali_osk_errcode_t err; ++ return _mali_osk_bitmap_alloc(&idx_mgr); ++} + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session_data, -EINVAL); ++void mali_mem_swap_idx_free(u32 idx) ++{ ++ _mali_osk_bitmap_free(&idx_mgr, idx); ++} + -+ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_bind_mem_s))) { -+ return -EFAULT; -+ } -+ kargs.ctx = (uintptr_t)session_data; ++static u32 mali_mem_swap_idx_range_alloc(u32 count) ++{ ++ u32 index; + -+ err = _mali_ukk_mem_bind(&kargs); ++ index = _mali_osk_bitmap_alloc_range(&idx_mgr, count); + -+ if (_MALI_OSK_ERR_OK != err) { -+ return map_errcode(err); -+ } ++ return index; ++} + -+ return 0; ++static void mali_mem_swap_idx_range_free(u32 idx, int num) ++{ ++ _mali_osk_bitmap_free_range(&idx_mgr, idx, num); +} + -+int mem_unbind_wrapper(struct mali_session_data *session_data, _mali_uk_unbind_mem_s __user *uargs) ++struct mali_swap_item *mali_mem_swap_alloc_swap_item(void) +{ -+ _mali_uk_unbind_mem_s kargs; -+ _mali_osk_errcode_t err; ++ mali_swap_item *swap_item; + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session_data, -EINVAL); ++ swap_item = kzalloc(sizeof(mali_swap_item), GFP_KERNEL); + -+ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_unbind_mem_s))) { -+ return -EFAULT; ++ if (NULL == swap_item) { ++ return NULL; + } -+ kargs.ctx = (uintptr_t)session_data; -+ -+ err = _mali_ukk_mem_unbind(&kargs); + -+ if (_MALI_OSK_ERR_OK != err) { -+ return map_errcode(err); -+ } ++ atomic_set(&swap_item->ref_count, 1); ++ swap_item->page = NULL; ++ atomic_add(1, &mali_mem_os_allocator.allocated_pages); + -+ return 0; ++ return swap_item; +} + -+ -+int mem_cow_wrapper(struct mali_session_data *session_data, _mali_uk_cow_mem_s __user *uargs) ++void mali_mem_swap_free_swap_item(mali_swap_item *swap_item) +{ -+ _mali_uk_cow_mem_s kargs; -+ _mali_osk_errcode_t err; ++ struct inode *file_node; ++ long long start, end; + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session_data, -EINVAL); ++ /* If this swap item is shared, we just reduce the reference counter. */ ++ if (0 == atomic_dec_return(&swap_item->ref_count)) { ++ file_node = global_swap_file->f_path.dentry->d_inode; ++ start = swap_item->idx; ++ start = start << 12; ++ end = start + PAGE_SIZE; + -+ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_cow_mem_s))) { -+ return -EFAULT; -+ } -+ kargs.ctx = (uintptr_t)session_data; ++ shmem_truncate_range(file_node, start, (end - 1)); + -+ err = _mali_ukk_mem_cow(&kargs); ++ mali_mem_swap_idx_free(swap_item->idx); + -+ if (_MALI_OSK_ERR_OK != err) { -+ return map_errcode(err); -+ } ++ atomic_sub(1, &mali_mem_os_allocator.allocated_pages); + -+ if (0 != put_user(kargs.backend_handle, &uargs->backend_handle)) { -+ return -EFAULT; ++ kfree(swap_item); + } -+ -+ return 0; +} + -+int mem_cow_modify_range_wrapper(struct mali_session_data *session_data, _mali_uk_cow_modify_range_s __user *uargs) ++/* Used to allocate new swap item for new memory allocation and cow page for write. */ ++struct mali_page_node *_mali_mem_swap_page_node_allocate(void) +{ -+ _mali_uk_cow_modify_range_s kargs; -+ _mali_osk_errcode_t err; ++ struct mali_page_node *m_page; + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session_data, -EINVAL); ++ m_page = _mali_page_node_allocate(MALI_PAGE_NODE_SWAP); + -+ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_cow_modify_range_s))) { -+ return -EFAULT; ++ if (NULL == m_page) { ++ return NULL; + } -+ kargs.ctx = (uintptr_t)session_data; + -+ err = _mali_ukk_mem_cow_modify_range(&kargs); ++ m_page->swap_it = mali_mem_swap_alloc_swap_item(); + -+ if (_MALI_OSK_ERR_OK != err) { -+ return map_errcode(err); ++ if (NULL == m_page->swap_it) { ++ kfree(m_page); ++ return NULL; + } + -+ if (0 != put_user(kargs.change_pages_nr, &uargs->change_pages_nr)) { -+ return -EFAULT; -+ } -+ return 0; ++ return m_page; +} + -+ -+int mem_resize_mem_wrapper(struct mali_session_data *session_data, _mali_uk_mem_resize_s __user *uargs) ++_mali_osk_errcode_t _mali_mem_swap_put_page_node(struct mali_page_node *m_page) +{ -+ _mali_uk_mem_resize_s kargs; -+ _mali_osk_errcode_t err; -+ -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session_data, -EINVAL); -+ -+ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_mem_resize_s))) { -+ return -EFAULT; -+ } -+ kargs.ctx = (uintptr_t)session_data; -+ -+ err = _mali_ukk_mem_resize(&kargs); + -+ if (_MALI_OSK_ERR_OK != err) { -+ return map_errcode(err); -+ } ++ mali_mem_swap_free_swap_item(m_page->swap_it); + -+ return 0; ++ return _MALI_OSK_ERR_OK; +} + -+int mem_write_safe_wrapper(struct mali_session_data *session_data, _mali_uk_mem_write_safe_s __user *uargs) ++void _mali_mem_swap_page_node_free(struct mali_page_node *m_page) +{ -+ _mali_uk_mem_write_safe_s kargs; -+ _mali_osk_errcode_t err; ++ _mali_mem_swap_put_page_node(m_page); + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session_data, -EINVAL); ++ kfree(m_page); + -+ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_mem_write_safe_s))) { -+ return -EFAULT; -+ } ++ return; ++} + -+ kargs.ctx = (uintptr_t)session_data; ++u32 mali_mem_swap_free(mali_mem_swap *swap_mem) ++{ ++ struct mali_page_node *m_page, *m_tmp; ++ u32 free_pages_nr = 0; + -+ /* Check if we can access the buffers */ -+ if (!access_ok((const void *)(uintptr_t)kargs.dest, kargs.size) || -+ !access_ok((const void *)(uintptr_t)kargs.src, kargs.size)) { -+ return -EINVAL; -+ } ++ MALI_DEBUG_ASSERT_POINTER(swap_mem); + -+ /* Check if size wraps */ -+ if ((kargs.size + kargs.dest) <= kargs.dest -+ || (kargs.size + kargs.src) <= kargs.src) { -+ return -EINVAL; -+ } ++ list_for_each_entry_safe(m_page, m_tmp, &swap_mem->pages, list) { ++ MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_SWAP); + -+ err = _mali_ukk_mem_write_safe(&kargs); -+ if (_MALI_OSK_ERR_OK != err) { -+ return map_errcode(err); -+ } ++ /* free the page node and release the swap item, if the ref count is 1, ++ * then need also free the swap item. */ ++ list_del(&m_page->list); ++ if (1 == _mali_page_node_get_ref_count(m_page)) { ++ free_pages_nr++; ++ } + -+ if (0 != put_user(kargs.size, &uargs->size)) { -+ return -EFAULT; ++ _mali_mem_swap_page_node_free(m_page); + } + -+ return 0; ++ return free_pages_nr; +} + -+ -+ -+int mem_query_mmu_page_table_dump_size_wrapper(struct mali_session_data *session_data, _mali_uk_query_mmu_page_table_dump_size_s __user *uargs) ++static u32 mali_mem_swap_cow_free(mali_mem_cow *cow_mem) +{ -+ _mali_uk_query_mmu_page_table_dump_size_s kargs; -+ _mali_osk_errcode_t err; ++ struct mali_page_node *m_page, *m_tmp; ++ u32 free_pages_nr = 0; + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session_data, -EINVAL); ++ MALI_DEBUG_ASSERT_POINTER(cow_mem); + -+ kargs.ctx = (uintptr_t)session_data; ++ list_for_each_entry_safe(m_page, m_tmp, &cow_mem->pages, list) { ++ MALI_DEBUG_ASSERT(m_page->type == MALI_PAGE_NODE_SWAP); + -+ err = _mali_ukk_query_mmu_page_table_dump_size(&kargs); -+ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); ++ /* free the page node and release the swap item, if the ref count is 1, ++ * then need also free the swap item. */ ++ list_del(&m_page->list); ++ if (1 == _mali_page_node_get_ref_count(m_page)) { ++ free_pages_nr++; ++ } + -+ if (0 != put_user(kargs.size, &uargs->size)) return -EFAULT; ++ _mali_mem_swap_page_node_free(m_page); ++ } + -+ return 0; ++ return free_pages_nr; +} + -+int mem_dump_mmu_page_table_wrapper(struct mali_session_data *session_data, _mali_uk_dump_mmu_page_table_s __user *uargs) ++u32 mali_mem_swap_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped) +{ -+ _mali_uk_dump_mmu_page_table_s kargs; -+ _mali_osk_errcode_t err; -+ void __user *user_buffer; -+ void *buffer = NULL; -+ int rc = -EFAULT; -+ -+ /* validate input */ -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ /* the session_data pointer was validated by caller */ -+ -+ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_dump_mmu_page_table_s))) -+ goto err_exit; -+ -+ user_buffer = (void __user *)(uintptr_t)kargs.buffer; -+ if (!access_ok(user_buffer, kargs.size)) -+ goto err_exit; -+ -+ /* allocate temporary buffer (kernel side) to store mmu page table info */ -+ if (kargs.size <= 0) -+ return -EINVAL; -+ /* Allow at most 8MiB buffers, this is more than enough to dump a fully -+ * populated page table. */ -+ if (kargs.size > SZ_8M) -+ return -EINVAL; ++ mali_mem_allocation *alloc; ++ u32 free_pages_nr = 0; + -+ buffer = (void *)(uintptr_t)_mali_osk_valloc(kargs.size); -+ if (NULL == buffer) { -+ rc = -ENOMEM; -+ goto err_exit; -+ } ++ MALI_DEBUG_ASSERT_POINTER(mem_bkend); ++ alloc = mem_bkend->mali_allocation; ++ MALI_DEBUG_ASSERT_POINTER(alloc); + -+ kargs.ctx = (uintptr_t)session_data; -+ kargs.buffer = (uintptr_t)buffer; -+ err = _mali_ukk_dump_mmu_page_table(&kargs); -+ if (_MALI_OSK_ERR_OK != err) { -+ rc = map_errcode(err); -+ goto err_exit; ++ if (is_mali_mapped) { ++ mali_mem_swap_mali_unmap(alloc); + } + -+ /* copy mmu page table info back to user space and update pointers */ -+ if (0 != copy_to_user(user_buffer, buffer, kargs.size)) -+ goto err_exit; -+ -+ kargs.register_writes = kargs.register_writes - -+ (uintptr_t)buffer + (uintptr_t)user_buffer; -+ kargs.page_table_dump = kargs.page_table_dump - -+ (uintptr_t)buffer + (uintptr_t)user_buffer; ++ mali_memory_swap_list_backend_delete(mem_bkend); + -+ if (0 != copy_to_user(uargs, &kargs, sizeof(kargs))) -+ goto err_exit; ++ mutex_lock(&mem_bkend->mutex); ++ /* To make sure the given memory backend was unlocked from Mali side, ++ * and then free this memory block. */ ++ mali_mem_swap_unlock_single_mem_backend(mem_bkend); ++ mutex_unlock(&mem_bkend->mutex); + -+ rc = 0; ++ if (MALI_MEM_SWAP == mem_bkend->type) { ++ free_pages_nr = mali_mem_swap_free(&mem_bkend->swap_mem); ++ } else { ++ free_pages_nr = mali_mem_swap_cow_free(&mem_bkend->cow_mem); ++ } + -+err_exit: -+ if (buffer) _mali_osk_vfree(buffer); -+ return rc; ++ return free_pages_nr; +} + -+int mem_usage_get_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_memory_usage_get_s __user *uargs) ++mali_bool mali_mem_swap_in_page_node(struct mali_page_node *page_node) +{ -+ _mali_osk_errcode_t err; -+ _mali_uk_profiling_memory_usage_get_s kargs; -+ -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session_data, -EINVAL); ++ MALI_DEBUG_ASSERT(NULL != page_node); + -+ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_memory_usage_get_s))) { -+ return -EFAULT; -+ } ++ page_node->swap_it->page = shmem_read_mapping_page(global_swap_space, page_node->swap_it->idx); + -+ kargs.ctx = (uintptr_t)session_data; -+ err = _mali_ukk_mem_usage_get(&kargs); -+ if (_MALI_OSK_ERR_OK != err) { -+ return map_errcode(err); ++ if (IS_ERR(page_node->swap_it->page)) { ++ MALI_DEBUG_PRINT_ERROR(("SWAP Mem: failed to swap in page with index: %d.\n", page_node->swap_it->idx)); ++ return MALI_FALSE; + } + -+ kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */ -+ if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_profiling_memory_usage_get_s))) { -+ return -EFAULT; -+ } ++ /* Ensure page is flushed from CPU caches. */ ++ page_node->swap_it->dma_addr = dma_map_page(&mali_platform_device->dev, page_node->swap_it->page, ++ 0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE); + -+ return 0; ++ return MALI_TRUE; +} + -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_ukk_pp.c b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_pp.c -new file mode 100644 -index 000000000..a9b0958c0 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_pp.c -@@ -0,0 +1,105 @@ -+/* -+ * Copyright (C) 2010, 2012-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+#include /* file system operations */ -+#include /* user space access */ -+ -+#include "mali_ukk.h" -+#include "mali_osk.h" -+#include "mali_kernel_common.h" -+#include "mali_session.h" -+#include "mali_ukk_wrappers.h" -+ -+int pp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_start_job_s __user *uargs) ++int mali_mem_swap_alloc_pages(mali_mem_swap *swap_mem, u32 size, u32 *bkend_idx) +{ -+ _mali_osk_errcode_t err; -+ -+ /* If the job was started successfully, 0 is returned. If there was an error, but the job -+ * was started, we return -ENOENT. For anything else returned, the job was not started. */ ++ size_t page_count = PAGE_ALIGN(size) / PAGE_SIZE; ++ struct mali_page_node *m_page; ++ long system_free_size; ++ u32 i, index; ++ mali_bool ret; + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session_data, -EINVAL); ++ MALI_DEBUG_ASSERT(NULL != swap_mem); ++ MALI_DEBUG_ASSERT(NULL != bkend_idx); ++ MALI_DEBUG_ASSERT(page_count <= MALI_SWAP_GLOBAL_SWAP_FILE_INDEX_RESERVE); + -+ err = _mali_ukk_pp_start_job(session_data, uargs); -+ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); ++ if (atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE + size > mali_mem_os_allocator.allocation_limit) { ++ MALI_DEBUG_PRINT(2, ("Mali Mem: Unable to allocate %u bytes. Currently allocated: %lu, max limit %lu\n", ++ size, ++ atomic_read(&mali_mem_os_allocator.allocated_pages) * _MALI_OSK_MALI_PAGE_SIZE, ++ mali_mem_os_allocator.allocation_limit)); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+ return 0; -+} ++ INIT_LIST_HEAD(&swap_mem->pages); ++ swap_mem->count = page_count; ++ index = mali_mem_swap_idx_range_alloc(page_count); + -+int pp_and_gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_and_gp_start_job_s __user *uargs) -+{ -+ _mali_osk_errcode_t err; ++ if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == index) { ++ MALI_PRINT_ERROR(("Mali Swap: Failed to allocate continuous index for swappable Mali memory.")); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ /* If the jobs were started successfully, 0 is returned. If there was an error, but the -+ * jobs were started, we return -ENOENT. For anything else returned, the jobs were not -+ * started. */ ++ for (i = 0; i < page_count; i++) { ++ m_page = _mali_mem_swap_page_node_allocate(); + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session_data, -EINVAL); ++ if (NULL == m_page) { ++ MALI_DEBUG_PRINT_ERROR(("SWAP Mem: Failed to allocate mali page node.")); ++ swap_mem->count = i; + -+ err = _mali_ukk_pp_and_gp_start_job(session_data, uargs); -+ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); ++ mali_mem_swap_free(swap_mem); ++ mali_mem_swap_idx_range_free(index + i, page_count - i); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ return 0; -+} ++ m_page->swap_it->idx = index + i; + -+int pp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_number_of_cores_s __user *uargs) -+{ -+ _mali_uk_get_pp_number_of_cores_s kargs; -+ _mali_osk_errcode_t err; ++ ret = mali_mem_swap_in_page_node(m_page); + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session_data, -EINVAL); ++ if (MALI_FALSE == ret) { ++ MALI_DEBUG_PRINT_ERROR(("SWAP Mem: Allocate new page from SHMEM file failed.")); ++ _mali_mem_swap_page_node_free(m_page); ++ mali_mem_swap_idx_range_free(index + i + 1, page_count - i - 1); + -+ kargs.ctx = (uintptr_t)session_data; ++ swap_mem->count = i; ++ mali_mem_swap_free(swap_mem); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+ err = _mali_ukk_get_pp_number_of_cores(&kargs); -+ if (_MALI_OSK_ERR_OK != err) { -+ return map_errcode(err); ++ list_add_tail(&m_page->list, &swap_mem->pages); + } + -+ kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */ -+ if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_get_pp_number_of_cores_s))) { -+ return -EFAULT; ++ system_free_size = global_zone_page_state(NR_FREE_PAGES) * PAGE_SIZE; ++ ++ if ((system_free_size < mali_mem_swap_out_threshold_value) ++ && (mem_backend_swapped_pool_size > (mali_mem_swap_out_threshold_value >> 2)) ++ && mali_utilization_enabled()) { ++ _mali_osk_wq_schedule_work(mali_mem_swap_out_workq); + } + ++ *bkend_idx = index; + return 0; +} + -+int pp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_core_version_s __user *uargs) ++void mali_mem_swap_mali_unmap(mali_mem_allocation *alloc) +{ -+ _mali_uk_get_pp_core_version_s kargs; -+ _mali_osk_errcode_t err; -+ -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session_data, -EINVAL); -+ -+ kargs.ctx = (uintptr_t)session_data; -+ err = _mali_ukk_get_pp_core_version(&kargs); -+ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); ++ struct mali_session_data *session; + -+ if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT; ++ MALI_DEBUG_ASSERT_POINTER(alloc); ++ session = alloc->session; ++ MALI_DEBUG_ASSERT_POINTER(session); + -+ return 0; ++ mali_session_memory_lock(session); ++ mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start, ++ alloc->flags); ++ mali_session_memory_unlock(session); +} + -+int pp_disable_wb_wrapper(struct mali_session_data *session_data, _mali_uk_pp_disable_wb_s __user *uargs) -+{ -+ _mali_uk_pp_disable_wb_s kargs; + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session_data, -EINVAL); ++/* Insert these pages from shmem to mali page table*/ ++_mali_osk_errcode_t mali_mem_swap_mali_map(mali_mem_swap *swap_mem, struct mali_session_data *session, u32 vaddr, u32 props) ++{ ++ struct mali_page_directory *pagedir = session->page_directory; ++ struct mali_page_node *m_page; ++ dma_addr_t phys; ++ u32 virt = vaddr; ++ u32 prop = props; + -+ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_pp_disable_wb_s))) return -EFAULT; ++ list_for_each_entry(m_page, &swap_mem->pages, list) { ++ MALI_DEBUG_ASSERT(NULL != m_page->swap_it->page); ++ phys = m_page->swap_it->dma_addr; + -+ kargs.ctx = (uintptr_t)session_data; -+ _mali_ukk_pp_job_disable_wb(&kargs); ++ mali_mmu_pagedir_update(pagedir, virt, phys, MALI_MMU_PAGE_SIZE, prop); ++ virt += MALI_MMU_PAGE_SIZE; ++ } + -+ return 0; ++ return _MALI_OSK_ERR_OK; +} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_ukk_profiling.c b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_profiling.c -new file mode 100644 -index 000000000..8b49ebc50 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_profiling.c -@@ -0,0 +1,183 @@ -+/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+#include /* file system operations */ -+#include /* user space access */ -+#include -+ -+#include "mali_ukk.h" -+#include "mali_osk.h" -+#include "mali_kernel_common.h" -+#include "mali_session.h" -+#include "mali_ukk_wrappers.h" + -+int profiling_add_event_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_add_event_s __user *uargs) ++int mali_mem_swap_in_pages(struct mali_pp_job *job) +{ -+ _mali_uk_profiling_add_event_s kargs; -+ _mali_osk_errcode_t err; -+ -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ u32 num_memory_cookies; ++ struct mali_session_data *session; ++ struct mali_vma_node *mali_vma_node = NULL; ++ mali_mem_allocation *mali_alloc = NULL; ++ mali_mem_backend *mem_bkend = NULL; ++ struct mali_page_node *m_page; ++ mali_bool swap_in_success = MALI_TRUE; ++ int i; + -+ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_add_event_s))) { -+ return -EFAULT; -+ } ++ MALI_DEBUG_ASSERT_POINTER(job); + -+ kargs.ctx = (uintptr_t)session_data; -+ err = _mali_ukk_profiling_add_event(&kargs); -+ if (_MALI_OSK_ERR_OK != err) { -+ return map_errcode(err); -+ } ++ num_memory_cookies = mali_pp_job_num_memory_cookies(job); ++ session = mali_pp_job_get_session(job); + -+ return 0; -+} ++ MALI_DEBUG_ASSERT_POINTER(session); + -+int profiling_report_sw_counters_wrapper(struct mali_session_data *session_data, _mali_uk_sw_counters_report_s __user *uargs) -+{ -+ _mali_uk_sw_counters_report_s kargs; -+ _mali_osk_errcode_t err; -+ u32 *counter_buffer; -+ u32 __user *counters; ++ for (i = 0; i < num_memory_cookies; i++) { + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ u32 mali_addr = mali_pp_job_get_memory_cookie(job, i); + -+ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_sw_counters_report_s))) { -+ return -EFAULT; -+ } ++ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0); ++ if (NULL == mali_vma_node) { ++ job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS; ++ swap_in_success = MALI_FALSE; ++ MALI_PRINT_ERROR(("SWAP Mem: failed to find mali_vma_node through Mali address: 0x%08x.\n", mali_addr)); ++ continue; ++ } + -+ /* make sure that kargs.num_counters is [at least somewhat] sane */ -+ if (kargs.num_counters > 10000) { -+ MALI_DEBUG_PRINT(1, ("User space attempted to allocate too many counters.\n")); -+ return -EINVAL; -+ } ++ mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node); ++ MALI_DEBUG_ASSERT(NULL != mali_alloc); + -+ counter_buffer = (u32 *)kmalloc(sizeof(u32) * kargs.num_counters, GFP_KERNEL); -+ if (NULL == counter_buffer) { -+ return -ENOMEM; -+ } ++ if (MALI_MEM_SWAP != mali_alloc->type && ++ MALI_MEM_COW != mali_alloc->type) { ++ continue; ++ } + -+ counters = (u32 *)(uintptr_t)kargs.counters; ++ /* Get backend memory & Map on GPU */ ++ mutex_lock(&mali_idr_mutex); ++ mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle); ++ mutex_unlock(&mali_idr_mutex); ++ MALI_DEBUG_ASSERT(NULL != mem_bkend); + -+ if (0 != copy_from_user(counter_buffer, counters, sizeof(u32) * kargs.num_counters)) { -+ kfree(counter_buffer); -+ return -EFAULT; -+ } ++ /* We neednot hold backend's lock here, race safe.*/ ++ if ((MALI_MEM_COW == mem_bkend->type) && ++ (!(mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) { ++ continue; ++ } + -+ kargs.ctx = (uintptr_t)session_data; -+ kargs.counters = (uintptr_t)counter_buffer; ++ mutex_lock(&mem_bkend->mutex); + -+ err = _mali_ukk_sw_counters_report(&kargs); ++ /* When swap_in_success is MALI_FALSE, it means this job has memory backend that could not be swapped in, ++ * and it will be aborted in mali scheduler, so here, we just mark those memory cookies which ++ * should not be swapped out when delete job to invalide */ ++ if (MALI_FALSE == swap_in_success) { ++ job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS; ++ mutex_unlock(&mem_bkend->mutex); ++ continue; ++ } + -+ kfree(counter_buffer); ++ /* Before swap in, checking if this memory backend has been swapped in by the latest flushed jobs. */ ++ ++mem_bkend->using_count; + -+ if (_MALI_OSK_ERR_OK != err) { -+ return map_errcode(err); -+ } ++ if (1 < mem_bkend->using_count) { ++ MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN != (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags)); ++ mutex_unlock(&mem_bkend->mutex); ++ continue; ++ } + -+ return 0; -+} ++ if (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN != (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags)) { ++ mutex_unlock(&mem_bkend->mutex); ++ continue; ++ } + -+int profiling_get_stream_fd_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_stream_fd_get_s __user *uargs) -+{ -+ _mali_uk_profiling_stream_fd_get_s kargs; -+ _mali_osk_errcode_t err; + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ list_for_each_entry(m_page, &mem_bkend->swap_mem.pages, list) { ++ if (MALI_FALSE == mali_mem_swap_in_page_node(m_page)) { ++ /* Don't have enough memory to swap in page, so release pages have already been swapped ++ * in and then mark this pp job to be fail. */ ++ mali_mem_swap_unlock_partial_locked_mem_backend(mem_bkend, m_page); ++ swap_in_success = MALI_FALSE; ++ break; ++ } ++ } + -+ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_stream_fd_get_s))) { -+ return -EFAULT; -+ } ++ if (swap_in_success) { ++#ifdef MALI_MEM_SWAP_TRACKING ++ mem_backend_swapped_unlock_size -= mem_bkend->size; ++#endif ++ _mali_osk_mutex_wait(session->memory_lock); ++ mali_mem_swap_mali_map(&mem_bkend->swap_mem, session, mali_alloc->mali_mapping.addr, mali_alloc->mali_mapping.properties); ++ _mali_osk_mutex_signal(session->memory_lock); + -+ kargs.ctx = (uintptr_t)session_data; -+ err = _mali_ukk_profiling_stream_fd_get(&kargs); -+ if (_MALI_OSK_ERR_OK != err) { -+ return map_errcode(err); ++ /* Remove the unlock flag from mem backend flags, mark this backend has been swapped in. */ ++ mem_bkend->flags &= ~(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN); ++ mutex_unlock(&mem_bkend->mutex); ++ } else { ++ --mem_bkend->using_count; ++ /* Marking that this backend is not swapped in, need not to be processed anymore. */ ++ job->memory_cookies[i] = MALI_SWAP_INVALIDATE_MALI_ADDRESS; ++ mutex_unlock(&mem_bkend->mutex); ++ } + } + -+ if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_profiling_stream_fd_get_s))) { -+ return -EFAULT; -+ } ++ job->swap_status = swap_in_success ? MALI_SWAP_IN_SUCC : MALI_SWAP_IN_FAIL; + -+ return 0; ++ return _MALI_OSK_ERR_OK; +} + -+int profiling_control_set_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_control_set_s __user *uargs) ++int mali_mem_swap_out_pages(struct mali_pp_job *job) +{ -+ _mali_uk_profiling_control_set_s kargs; -+ _mali_osk_errcode_t err; -+ u8 *kernel_control_data = NULL; -+ u8 *kernel_response_data = NULL; -+ -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ -+ if (0 != get_user(kargs.control_packet_size, &uargs->control_packet_size)) return -EFAULT; -+ if (0 != get_user(kargs.response_packet_size, &uargs->response_packet_size)) return -EFAULT; ++ u32 num_memory_cookies; ++ struct mali_session_data *session; ++ struct mali_vma_node *mali_vma_node = NULL; ++ mali_mem_allocation *mali_alloc = NULL; ++ mali_mem_backend *mem_bkend = NULL; ++ int i; + -+ kargs.ctx = (uintptr_t)session_data; ++ MALI_DEBUG_ASSERT_POINTER(job); + ++ num_memory_cookies = mali_pp_job_num_memory_cookies(job); ++ session = mali_pp_job_get_session(job); + -+ /* Sanity check about the size */ -+ if (kargs.control_packet_size > PAGE_SIZE || kargs.response_packet_size > PAGE_SIZE) -+ return -EINVAL; ++ MALI_DEBUG_ASSERT_POINTER(session); + -+ if (0 != kargs.control_packet_size) { + -+ if (0 == kargs.response_packet_size) -+ return -EINVAL; ++ for (i = 0; i < num_memory_cookies; i++) { ++ u32 mali_addr = mali_pp_job_get_memory_cookie(job, i); + -+ kernel_control_data = _mali_osk_calloc(1, kargs.control_packet_size); -+ if (NULL == kernel_control_data) { -+ return -ENOMEM; ++ if (MALI_SWAP_INVALIDATE_MALI_ADDRESS == mali_addr) { ++ continue; + } + -+ kernel_response_data = _mali_osk_calloc(1, kargs.response_packet_size); -+ if (NULL == kernel_response_data) { -+ _mali_osk_free(kernel_control_data); -+ return -ENOMEM; ++ mali_vma_node = mali_vma_offset_search(&session->allocation_mgr, mali_addr, 0); ++ ++ if (NULL == mali_vma_node) { ++ MALI_PRINT_ERROR(("SWAP Mem: failed to find mali_vma_node through Mali address: 0x%08x.\n", mali_addr)); ++ continue; + } + -+ kargs.control_packet_data = (uintptr_t)kernel_control_data; -+ kargs.response_packet_data = (uintptr_t)kernel_response_data; ++ mali_alloc = container_of(mali_vma_node, struct mali_mem_allocation, mali_vma_node); ++ MALI_DEBUG_ASSERT(NULL != mali_alloc); + -+ if (0 != copy_from_user((void *)(uintptr_t)kernel_control_data, (void *)(uintptr_t)uargs->control_packet_data, kargs.control_packet_size)) { -+ _mali_osk_free(kernel_control_data); -+ _mali_osk_free(kernel_response_data); -+ return -EFAULT; ++ if (MALI_MEM_SWAP != mali_alloc->type && ++ MALI_MEM_COW != mali_alloc->type) { ++ continue; + } + -+ err = _mali_ukk_profiling_control_set(&kargs); -+ if (_MALI_OSK_ERR_OK != err) { -+ _mali_osk_free(kernel_control_data); -+ _mali_osk_free(kernel_response_data); -+ return map_errcode(err); -+ } ++ mutex_lock(&mali_idr_mutex); ++ mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle); ++ mutex_unlock(&mali_idr_mutex); ++ MALI_DEBUG_ASSERT(NULL != mem_bkend); + -+ if (0 != kargs.response_packet_size && 0 != copy_to_user(((void *)(uintptr_t)uargs->response_packet_data), ((void *)(uintptr_t)kargs.response_packet_data), kargs.response_packet_size)) { -+ _mali_osk_free(kernel_control_data); -+ _mali_osk_free(kernel_response_data); -+ return -EFAULT; ++ /* We neednot hold backend's lock here, race safe.*/ ++ if ((MALI_MEM_COW == mem_bkend->type) && ++ (!(mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED))) { ++ continue; + } + -+ if (0 != put_user(kargs.response_packet_size, &uargs->response_packet_size)) { -+ _mali_osk_free(kernel_control_data); -+ _mali_osk_free(kernel_response_data); -+ return -EFAULT; -+ } ++ mutex_lock(&mem_bkend->mutex); + -+ _mali_osk_free(kernel_control_data); -+ _mali_osk_free(kernel_response_data); -+ } else { ++ MALI_DEBUG_ASSERT(0 < mem_bkend->using_count); + -+ err = _mali_ukk_profiling_control_set(&kargs); -+ if (_MALI_OSK_ERR_OK != err) { -+ return map_errcode(err); ++ /* Reducing the using_count of mem backend means less pp job are using this memory backend, ++ * if this count get to zero, it means no pp job is using it now, could put it to swap out list. */ ++ --mem_bkend->using_count; ++ ++ if (0 < mem_bkend->using_count) { ++ mutex_unlock(&mem_bkend->mutex); ++ continue; + } ++ mutex_unlock(&mem_bkend->mutex); + ++ mali_memory_swap_list_backend_add(mem_bkend); + } -+ return 0; -+} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_ukk_soft_job.c b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_soft_job.c -new file mode 100644 -index 000000000..1dd4a7c6f ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_soft_job.c -@@ -0,0 +1,90 @@ -+/* -+ * Copyright (C) 2013-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+#include /* file system operations */ -+#include /* user space access */ -+ -+#include "mali_ukk.h" -+#include "mali_osk.h" -+#include "mali_kernel_common.h" -+#include "mali_session.h" -+#include "mali_ukk_wrappers.h" + -+#include "mali_soft_job.h" -+#include "mali_timeline.h" ++ return _MALI_OSK_ERR_OK; ++} + -+int soft_job_start_wrapper(struct mali_session_data *session, _mali_uk_soft_job_start_s __user *uargs) ++int mali_mem_swap_allocate_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep) +{ -+ _mali_uk_soft_job_start_s kargs; -+ u32 type, point; -+ u64 user_job; -+ struct mali_timeline_fence fence; -+ struct mali_soft_job *job = NULL; -+ u32 __user *job_id_ptr = NULL; -+ -+ /* If the job was started successfully, 0 is returned. If there was an error, but the job -+ * was started, we return -ENOENT. For anything else returned, the job was not started. */ -+ -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); -+ MALI_CHECK_NON_NULL(session, -EINVAL); -+ -+ MALI_DEBUG_ASSERT_POINTER(session->soft_job_system); -+ -+ if (0 != copy_from_user(&kargs, uargs, sizeof(kargs))) { -+ return -EFAULT; -+ } -+ -+ type = kargs.type; -+ user_job = kargs.user_job; -+ job_id_ptr = (u32 __user *)(uintptr_t)kargs.job_id_ptr; ++ struct mali_page_node *m_page, *found_node = NULL; ++ struct page *found_page; ++ mali_mem_swap *swap = NULL; ++ mali_mem_cow *cow = NULL; ++ dma_addr_t dma_addr; ++ u32 i = 0; + -+ mali_timeline_fence_copy_uk_fence(&fence, &kargs.fence); ++ if (MALI_MEM_SWAP == mem_bkend->type) { ++ swap = &mem_bkend->swap_mem; ++ list_for_each_entry(m_page, &swap->pages, list) { ++ if (i == offset) { ++ found_node = m_page; ++ break; ++ } ++ i++; ++ } ++ } else { ++ MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type); ++ MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_SWAP_COWED == (MALI_MEM_BACKEND_FLAG_SWAP_COWED & mem_bkend->flags)); + -+ if ((MALI_SOFT_JOB_TYPE_USER_SIGNALED != type) && (MALI_SOFT_JOB_TYPE_SELF_SIGNALED != type)) { -+ MALI_DEBUG_PRINT_ERROR(("Invalid soft job type specified\n")); -+ return -EINVAL; ++ cow = &mem_bkend->cow_mem; ++ list_for_each_entry(m_page, &cow->pages, list) { ++ if (i == offset) { ++ found_node = m_page; ++ break; ++ } ++ i++; ++ } + } + -+ /* Create soft job. */ -+ job = mali_soft_job_create(session->soft_job_system, (enum mali_soft_job_type)type, user_job); -+ if (unlikely(NULL == job)) { -+ return map_errcode(_MALI_OSK_ERR_NOMEM); ++ if (NULL == found_node) { ++ return _MALI_OSK_ERR_FAULT; + } + -+ /* Write job id back to user space. */ -+ if (0 != put_user(job->id, job_id_ptr)) { -+ MALI_PRINT_ERROR(("Mali Soft Job: failed to put job id")); -+ mali_soft_job_destroy(job); -+ return map_errcode(_MALI_OSK_ERR_NOMEM); -+ } ++ found_page = shmem_read_mapping_page(global_swap_space, found_node->swap_it->idx); + -+ /* Start soft job. */ -+ point = mali_soft_job_start(job, &fence); ++ if (!IS_ERR(found_page)) { ++ lock_page(found_page); ++ dma_addr = dma_map_page(&mali_platform_device->dev, found_page, ++ 0, _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE); ++ dma_unmap_page(&mali_platform_device->dev, dma_addr, ++ _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE); + -+ if (0 != put_user(point, &uargs->point)) { -+ /* Let user space know that something failed after the job was started. */ -+ return -ENOENT; ++ *pagep = found_page; ++ } else { ++ return _MALI_OSK_ERR_NOMEM; + } + -+ return 0; ++ return _MALI_OSK_ERR_OK; +} + -+int soft_job_signal_wrapper(struct mali_session_data *session, _mali_uk_soft_job_signal_s __user *uargs) ++int mali_mem_swap_cow_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep) +{ -+ u32 job_id; -+ _mali_osk_errcode_t err; -+ -+ MALI_DEBUG_ASSERT_POINTER(session); -+ -+ if (0 != get_user(job_id, &uargs->job_id)) return -EFAULT; -+ -+ err = mali_soft_job_system_signal_job(session->soft_job_system, job_id); ++ struct mali_page_node *m_page, *found_node = NULL, *new_node = NULL; ++ mali_mem_cow *cow = NULL; ++ u32 i = 0; + -+ return map_errcode(err); -+} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_ukk_timeline.c b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_timeline.c -new file mode 100644 -index 000000000..ff0c90939 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_timeline.c -@@ -0,0 +1,88 @@ -+/* -+ * Copyright (C) 2013, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+#include /* file system operations */ -+#include /* user space access */ ++ MALI_DEBUG_ASSERT(MALI_MEM_COW == mem_bkend->type); ++ MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_SWAP_COWED == (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED)); ++ MALI_DEBUG_ASSERT(MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN == (MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN & mem_bkend->flags)); ++ MALI_DEBUG_ASSERT(!mali_memory_swap_backend_in_swapped_pool(mem_bkend)); + -+#include "mali_ukk.h" -+#include "mali_osk.h" -+#include "mali_kernel_common.h" -+#include "mali_session.h" -+#include "mali_ukk_wrappers.h" ++ cow = &mem_bkend->cow_mem; ++ list_for_each_entry(m_page, &cow->pages, list) { ++ if (i == offset) { ++ found_node = m_page; ++ break; ++ } ++ i++; ++ } + -+#include "mali_timeline.h" -+#include "mali_timeline_fence_wait.h" -+#include "mali_timeline_sync_fence.h" ++ if (NULL == found_node) { ++ return _MALI_OSK_ERR_FAULT; ++ } + -+int timeline_get_latest_point_wrapper(struct mali_session_data *session, _mali_uk_timeline_get_latest_point_s __user *uargs) -+{ -+ u32 val; -+ mali_timeline_id timeline; -+ mali_timeline_point point; ++ new_node = _mali_mem_swap_page_node_allocate(); + -+ MALI_DEBUG_ASSERT_POINTER(session); ++ if (NULL == new_node) { ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ if (0 != get_user(val, &uargs->timeline)) return -EFAULT; ++ new_node->swap_it->idx = mali_mem_swap_idx_alloc(); + -+ if (MALI_UK_TIMELINE_MAX <= val) { -+ return -EINVAL; ++ if (_MALI_OSK_BITMAP_INVALIDATE_INDEX == new_node->swap_it->idx) { ++ MALI_DEBUG_PRINT(1, ("Failed to allocate swap index in swap CoW on demand.\n")); ++ kfree(new_node->swap_it); ++ kfree(new_node); ++ return _MALI_OSK_ERR_FAULT; + } + -+ timeline = (mali_timeline_id)val; -+ -+ point = mali_timeline_system_get_latest_point(session->timeline_system, timeline); ++ if (MALI_FALSE == mali_mem_swap_in_page_node(new_node)) { ++ _mali_mem_swap_page_node_free(new_node); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ if (0 != put_user(point, &uargs->point)) return -EFAULT; ++ /* swap in found node for copy in kernel. */ ++ if (MALI_FALSE == mali_mem_swap_in_page_node(found_node)) { ++ mali_mem_swap_out_page_node(new_node); ++ _mali_mem_swap_page_node_free(new_node); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ return 0; -+} ++ _mali_mem_cow_copy_page(found_node, new_node); + -+int timeline_wait_wrapper(struct mali_session_data *session, _mali_uk_timeline_wait_s __user *uargs) -+{ -+ u32 timeout, status; -+ mali_bool ret; -+ _mali_uk_fence_t uk_fence; -+ struct mali_timeline_fence fence; ++ list_replace(&found_node->list, &new_node->list); + -+ MALI_DEBUG_ASSERT_POINTER(session); ++ if (1 != _mali_page_node_get_ref_count(found_node)) { ++ atomic_add(1, &mem_bkend->mali_allocation->session->mali_mem_allocated_pages); ++ if (atomic_read(&mem_bkend->mali_allocation->session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE > mem_bkend->mali_allocation->session->max_mali_mem_allocated_size) { ++ mem_bkend->mali_allocation->session->max_mali_mem_allocated_size = atomic_read(&mem_bkend->mali_allocation->session->mali_mem_allocated_pages) * MALI_MMU_PAGE_SIZE; ++ } ++ mem_bkend->cow_mem.change_pages_nr++; ++ } + -+ if (0 != copy_from_user(&uk_fence, &uargs->fence, sizeof(_mali_uk_fence_t))) return -EFAULT; -+ if (0 != get_user(timeout, &uargs->timeout)) return -EFAULT; ++ mali_mem_swap_out_page_node(found_node); ++ _mali_mem_swap_page_node_free(found_node); + -+ mali_timeline_fence_copy_uk_fence(&fence, &uk_fence); ++ /* When swap in the new page node, we have called dma_map_page for this page.\n */ ++ dma_unmap_page(&mali_platform_device->dev, new_node->swap_it->dma_addr, ++ _MALI_OSK_MALI_PAGE_SIZE, DMA_TO_DEVICE); + -+ ret = mali_timeline_fence_wait(session->timeline_system, &fence, timeout); -+ status = (MALI_TRUE == ret ? 1 : 0); ++ lock_page(new_node->swap_it->page); + -+ if (0 != put_user(status, &uargs->status)) return -EFAULT; ++ *pagep = new_node->swap_it->page; + -+ return 0; ++ return _MALI_OSK_ERR_OK; +} + -+int timeline_create_sync_fence_wrapper(struct mali_session_data *session, _mali_uk_timeline_create_sync_fence_s __user *uargs) ++#ifdef MALI_MEM_SWAP_TRACKING ++void mali_mem_swap_tracking(u32 *swap_pool_size, u32 *unlock_size) +{ -+ s32 sync_fd = -1; -+ _mali_uk_fence_t uk_fence; -+ struct mali_timeline_fence fence; -+ -+ MALI_DEBUG_ASSERT_POINTER(session); -+ -+ if (0 != copy_from_user(&uk_fence, &uargs->fence, sizeof(_mali_uk_fence_t))) return -EFAULT; -+ mali_timeline_fence_copy_uk_fence(&fence, &uk_fence); -+ -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ sync_fd = mali_timeline_sync_fence_create(session->timeline_system, &fence); -+#else -+ sync_fd = -1; -+#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ -+ -+ if (0 != put_user(sync_fd, &uargs->sync_fd)) return -EFAULT; -+ -+ return 0; ++ *swap_pool_size = mem_backend_swapped_pool_size; ++ *unlock_size = mem_backend_swapped_unlock_size; +} -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_ukk_vsync.c b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_vsync.c ++#endif +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_swap_alloc.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_swap_alloc.h new file mode 100644 -index 000000000..52519d1f9 +index 000000000..32f754aea --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_vsync.c -@@ -0,0 +1,39 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_swap_alloc.h +@@ -0,0 +1,121 @@ +/* -+ * Copyright (C) 2011-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -324325,132 +326160,126 @@ index 000000000..52519d1f9 + * A copy of the licence is included with the program, and can also be obtained from Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ -+#include /* file system operations */ -+#include /* user space access */ + -+#include "mali_ukk.h" ++#ifndef __MALI_MEMORY_SWAP_ALLOC_H__ ++#define __MALI_MEMORY_SWAP_ALLOC_H__ ++ +#include "mali_osk.h" -+#include "mali_kernel_common.h" +#include "mali_session.h" -+#include "mali_ukk_wrappers.h" + ++#include "mali_memory_types.h" ++#include "mali_pp_job.h" + -+int vsync_event_report_wrapper(struct mali_session_data *session_data, _mali_uk_vsync_event_report_s __user *uargs) -+{ -+ _mali_uk_vsync_event_report_s kargs; -+ _mali_osk_errcode_t err; ++/** ++ * Initialize memory swapping module. ++ */ ++_mali_osk_errcode_t mali_mem_swap_init(void); + -+ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++void mali_mem_swap_term(void); + -+ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_vsync_event_report_s))) { -+ return -EFAULT; -+ } ++/** ++ * Return global share memory file to other modules. ++ */ ++struct file *mali_mem_swap_get_global_swap_file(void); + -+ kargs.ctx = (uintptr_t)session_data; -+ err = _mali_ukk_vsync_event_report(&kargs); -+ if (_MALI_OSK_ERR_OK != err) { -+ return map_errcode(err); -+ } ++/** ++ * Unlock the given memory backend and pages in it could be swapped out by kernel. ++ */ ++void mali_mem_swap_unlock_single_mem_backend(mali_mem_backend *mem_bkend); + -+ return 0; -+} ++/** ++ * Remove the given memory backend from global swap list. ++ */ ++void mali_memory_swap_list_backend_delete(mali_mem_backend *mem_bkend); + -diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_ukk_wrappers.h b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_wrappers.h -new file mode 100644 -index 000000000..1add628fe ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_wrappers.h -@@ -0,0 +1,82 @@ -+/* -+ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++/** ++ * Add the given memory backend to global swap list. + */ ++void mali_memory_swap_list_backend_add(mali_mem_backend *mem_bkend); + +/** -+ * @file mali_ukk_wrappers.h -+ * Defines the wrapper functions for each user-kernel function ++ * Allocate 1 index from bitmap used as page index in global swap file. + */ ++u32 mali_mem_swap_idx_alloc(void); + -+#ifndef __MALI_UKK_WRAPPERS_H__ -+#define __MALI_UKK_WRAPPERS_H__ ++void mali_mem_swap_idx_free(u32 idx); + -+#include "mali_uk_types.h" -+#include "mali_osk.h" ++/** ++ * Allocate a new swap item without page index. ++ */ ++struct mali_swap_item *mali_mem_swap_alloc_swap_item(void); + -+#ifdef __cplusplus -+extern "C" { -+#endif ++/** ++ * Free a swap item, truncate the corresponding space in page cache and free index of page. ++ */ ++void mali_mem_swap_free_swap_item(mali_swap_item *swap_item); + -+int wait_for_notification_wrapper(struct mali_session_data *session_data, _mali_uk_wait_for_notification_s __user *uargs); -+int get_api_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_s __user *uargs); -+int get_api_version_v2_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_v2_s __user *uargs); -+int get_user_settings_wrapper(struct mali_session_data *session_data, _mali_uk_get_user_settings_s __user *uargs); -+int post_notification_wrapper(struct mali_session_data *session_data, _mali_uk_post_notification_s __user *uargs); -+int request_high_priority_wrapper(struct mali_session_data *session_data, _mali_uk_request_high_priority_s __user *uargs); -+int pending_submit_wrapper(struct mali_session_data *session_data, _mali_uk_pending_submit_s __user *uargs); ++/** ++ * Allocate a page node with swap item. ++ */ ++struct mali_page_node *_mali_mem_swap_page_node_allocate(void); + -+/* rk_ext : 从对 r5p0-01rel0 集æˆå¼€å§‹, ä¸å†ä½¿ç”¨. */ -+#if 0 -+int get_mali_version_in_rk30_wrapper(struct mali_session_data *session_data, _mali_uk_get_mali_version_in_rk30_s __user *uargs); -+#else -+int get_rk_ko_version_wrapper(struct mali_session_data *session_data, _mali_rk_ko_version_s __user *uargs); -+#endif ++/** ++ * Reduce the reference count of given page node and if return 0, just free this page node. ++ */ ++_mali_osk_errcode_t _mali_mem_swap_put_page_node(struct mali_page_node *m_page); + -+int mem_alloc_wrapper(struct mali_session_data *session_data, _mali_uk_alloc_mem_s __user *uargs); -+int mem_free_wrapper(struct mali_session_data *session_data, _mali_uk_free_mem_s __user *uargs); -+int mem_bind_wrapper(struct mali_session_data *session_data, _mali_uk_bind_mem_s __user *uargs); -+int mem_unbind_wrapper(struct mali_session_data *session_data, _mali_uk_unbind_mem_s __user *uargs); -+int mem_cow_wrapper(struct mali_session_data *session_data, _mali_uk_cow_mem_s __user *uargs); -+int mem_cow_modify_range_wrapper(struct mali_session_data *session_data, _mali_uk_cow_modify_range_s __user *uargs); -+int mem_resize_mem_wrapper(struct mali_session_data *session_data, _mali_uk_mem_resize_s __user *uargs); -+int mem_write_safe_wrapper(struct mali_session_data *session_data, _mali_uk_mem_write_safe_s __user *uargs); -+int mem_query_mmu_page_table_dump_size_wrapper(struct mali_session_data *session_data, _mali_uk_query_mmu_page_table_dump_size_s __user *uargs); -+int mem_dump_mmu_page_table_wrapper(struct mali_session_data *session_data, _mali_uk_dump_mmu_page_table_s __user *uargs); -+int mem_usage_get_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_memory_usage_get_s __user *uargs); ++void _mali_mem_swap_page_node_free(struct mali_page_node *m_page); + -+int timeline_get_latest_point_wrapper(struct mali_session_data *session, _mali_uk_timeline_get_latest_point_s __user *uargs); -+int timeline_wait_wrapper(struct mali_session_data *session, _mali_uk_timeline_wait_s __user *uargs); -+int timeline_create_sync_fence_wrapper(struct mali_session_data *session, _mali_uk_timeline_create_sync_fence_s __user *uargs); -+int soft_job_start_wrapper(struct mali_session_data *session, _mali_uk_soft_job_start_s __user *uargs); -+int soft_job_signal_wrapper(struct mali_session_data *session, _mali_uk_soft_job_signal_s __user *uargs); -+int pp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_start_job_s __user *uargs); -+int pp_and_gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_and_gp_start_job_s __user *uargs); -+int pp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_number_of_cores_s __user *uargs); -+int pp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_core_version_s __user *uargs); -+int pp_disable_wb_wrapper(struct mali_session_data *session_data, _mali_uk_pp_disable_wb_s __user *uargs); -+int gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_gp_start_job_s __user *uargs); -+int gp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_number_of_cores_s __user *uargs); -+int gp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_core_version_s __user *uargs); -+int gp_suspend_response_wrapper(struct mali_session_data *session_data, _mali_uk_gp_suspend_response_s __user *uargs); ++/** ++ * Free a swappable memory backend. ++ */ ++u32 mali_mem_swap_free(mali_mem_swap *swap_mem); + -+int profiling_add_event_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_add_event_s __user *uargs); -+int profiling_report_sw_counters_wrapper(struct mali_session_data *session_data, _mali_uk_sw_counters_report_s __user *uargs); -+int profiling_get_stream_fd_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_stream_fd_get_s __user *uargs); -+int profiling_control_set_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_control_set_s __user *uargs); ++/** ++ * Ummap and free. ++ */ ++u32 mali_mem_swap_release(mali_mem_backend *mem_bkend, mali_bool is_mali_mapped); + -+int vsync_event_report_wrapper(struct mali_session_data *session_data, _mali_uk_vsync_event_report_s __user *uargs); ++/** ++ * Read in a page from global swap file with the pre-allcated page index. ++ */ ++mali_bool mali_mem_swap_in_page_node(struct mali_page_node *page_node); + ++int mali_mem_swap_alloc_pages(mali_mem_swap *swap_mem, u32 size, u32 *bkend_idx); + -+int map_errcode(_mali_osk_errcode_t err); ++_mali_osk_errcode_t mali_mem_swap_mali_map(mali_mem_swap *swap_mem, struct mali_session_data *session, u32 vaddr, u32 props); + -+#ifdef __cplusplus -+} ++void mali_mem_swap_mali_unmap(mali_mem_allocation *alloc); ++ ++/** ++ * When pp job created, we need swap in all of memory backend needed by this pp job. ++ */ ++int mali_mem_swap_in_pages(struct mali_pp_job *job); ++ ++/** ++ * Put all of memory backends used this pp job to the global swap list. ++ */ ++int mali_mem_swap_out_pages(struct mali_pp_job *job); ++ ++/** ++ * This will be called in page fault to process CPU read&write. ++ */ ++int mali_mem_swap_allocate_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep) ; ++ ++/** ++ * Used to process cow on demand for swappable memory backend. ++ */ ++int mali_mem_swap_cow_page_on_demand(mali_mem_backend *mem_bkend, u32 offset, struct page **pagep); ++ ++#ifdef MALI_MEM_SWAP_TRACKING ++void mali_mem_swap_tracking(u32 *swap_pool_size, u32 *unlock_size); +#endif ++#endif /* __MALI_MEMORY_SWAP_ALLOC_H__ */ + -+#endif /* __MALI_UKK_WRAPPERS_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/platform/arm/arm.c b/drivers/gpu/arm/mali400/mali/platform/arm/arm.c +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_types.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_types.h new file mode 100644 -index 000000000..fc7017bbf +index 000000000..33db40929 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/platform/arm/arm.c -@@ -0,0 +1,629 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_types.h +@@ -0,0 +1,219 @@ +/* -+ * Copyright (C) 2010, 2012-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -324459,633 +326288,582 @@ index 000000000..fc7017bbf + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+/** -+ * @file mali_platform.c -+ * Platform specific Mali driver functions for: -+ * - Realview Versatile platforms with ARM11 Mpcore and virtex 5. -+ * - Versatile Express platforms with ARM Cortex-A9 and virtex 6. -+ */ -+#include -+#include -+#include -+#include "mali_kernel_linux.h" -+#ifdef CONFIG_PM_RUNTIME -+#include -+#endif -+#include -+#include -+#include "mali_kernel_common.h" -+#include -+#include ++#ifndef __MALI_MEMORY_TYPES_H__ ++#define __MALI_MEMORY_TYPES_H__ + -+#include "arm_core_scaling.h" -+#include "mali_executor.h" ++#include + -+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) -+#include -+#include ++#if defined(CONFIG_MALI400_UMP) ++#include "ump_kernel_interface.h" +#endif + -+static int mali_core_scaling_enable = 0; ++typedef u32 mali_address_t; + -+void mali_gpu_utilization_callback(struct mali_gpu_utilization_data *data); -+static u32 mali_read_phys(u32 phys_addr); -+#if defined(CONFIG_ARCH_REALVIEW) -+static void mali_write_phys(u32 phys_addr, u32 value); -+#endif ++typedef enum mali_mem_type { ++ MALI_MEM_OS, ++ MALI_MEM_EXTERNAL, ++ MALI_MEM_SWAP, ++ MALI_MEM_DMA_BUF, ++ MALI_MEM_UMP, ++ MALI_MEM_BLOCK, ++ MALI_MEM_COW, ++ MALI_MEM_SECURE, ++ MALI_MEM_TYPE_MAX, ++} mali_mem_type; + -+#if defined(CONFIG_ARCH_VEXPRESS) && defined(CONFIG_ARM64) ++typedef struct mali_block_item { ++ /* for block type, the block_phy is alway page size align ++ * so use low 12bit used for ref_cout. ++ */ ++ unsigned long phy_addr; ++} mali_block_item; + -+#define SECURE_MODE_CONTROL_HANDLER 0x6F02006C -+void *secure_mode_mapped_addr = NULL; +/** -+ * Reset GPU and enable/disable Mali secure mode. -+ * @Return value: -+ * 0: success -+ * non-0: failure. ++ * idx is used to locate the given page in the address space of swap file. ++ * ref_count is used to mark how many memory backends are using this item. + */ ++typedef struct mali_swap_item { ++ u32 idx; ++ atomic_t ref_count; ++ struct page *page; ++ dma_addr_t dma_addr; ++} mali_swap_item; + -+static int mali_gpu_reset_and_secure_mode_enable_juno(void) -+{ -+ u32 phys_offset = SECURE_MODE_CONTROL_HANDLER & 0x00001FFF; -+ MALI_DEBUG_ASSERT(NULL != secure_mode_mapped_addr); -+ -+ iowrite32(1, ((u8 *)secure_mode_mapped_addr) + phys_offset); ++typedef enum mali_page_node_type { ++ MALI_PAGE_NODE_OS, ++ MALI_PAGE_NODE_BLOCK, ++ MALI_PAGE_NODE_SWAP, ++} mali_page_node_type; + -+ if (1 == (u32)ioread32(((u8 *)secure_mode_mapped_addr) + phys_offset)) { -+ MALI_DEBUG_PRINT(3, ("Mali reset GPU and enable secured mode successfully! \n")); -+ return 0; -+ } -+ -+ MALI_PRINT_ERROR(("Failed to reset GPU and enable Mali secured mode !!! \n")); -+ -+ return -1; -+ -+} -+ -+static int mali_gpu_reset_and_secure_mode_disable_juno(void) -+{ -+ u32 phys_offset = SECURE_MODE_CONTROL_HANDLER & 0x00001FFF; -+ MALI_DEBUG_ASSERT(NULL != secure_mode_mapped_addr); ++typedef struct mali_page_node { ++ struct list_head list; ++ union { ++ struct page *page; ++ mali_block_item *blk_it; /*pointer to block item*/ ++ mali_swap_item *swap_it; ++ }; + -+ iowrite32(0, ((u8 *)secure_mode_mapped_addr) + phys_offset); ++ u32 type; ++} mali_page_node; + -+ if (0 == (u32)ioread32(((u8 *)secure_mode_mapped_addr) + phys_offset)) { -+ MALI_DEBUG_PRINT(3, ("Mali reset GPU and disable secured mode successfully! \n")); -+ return 0; -+ } ++typedef struct mali_mem_os_mem { ++ struct list_head pages; ++ u32 count; ++} mali_mem_os_mem; + -+ MALI_PRINT_ERROR(("Failed to reset GPU and disable mali secured mode !!! \n")); -+ return -1; -+} ++typedef struct mali_mem_dma_buf { ++#if defined(CONFIG_DMA_SHARED_BUFFER) ++ struct mali_dma_buf_attachment *attachment; ++#endif ++} mali_mem_dma_buf; + -+static int mali_secure_mode_init_juno(void) -+{ -+ u32 phys_addr_page = SECURE_MODE_CONTROL_HANDLER & 0xFFFFE000; -+ u32 phys_offset = SECURE_MODE_CONTROL_HANDLER & 0x00001FFF; -+ u32 map_size = phys_offset + sizeof(u32); ++typedef struct mali_mem_external { ++ dma_addr_t phys; ++ u32 size; ++} mali_mem_external; + -+ MALI_DEBUG_ASSERT(NULL == secure_mode_mapped_addr); ++typedef struct mali_mem_ump { ++#if defined(CONFIG_MALI400_UMP) ++ ump_dd_handle handle; ++#endif ++} mali_mem_ump; + -+ secure_mode_mapped_addr = ioremap(phys_addr_page, map_size); -+ if (NULL != secure_mode_mapped_addr) { -+ return mali_gpu_reset_and_secure_mode_disable_juno(); -+ } -+ MALI_DEBUG_PRINT(2, ("Failed to ioremap for Mali secured mode! \n")); -+ return -1; -+} ++typedef struct block_allocator_allocation { ++ /* The list will be released in reverse order */ ++ struct block_info *last_allocated; ++ u32 mapping_length; ++ struct block_allocator *info; ++} block_allocator_allocation; + -+static void mali_secure_mode_deinit_juno(void) -+{ -+ if (NULL != secure_mode_mapped_addr) { -+ mali_gpu_reset_and_secure_mode_disable_juno(); -+ iounmap(secure_mode_mapped_addr); -+ secure_mode_mapped_addr = NULL; -+ } -+} -+#endif ++typedef struct mali_mem_block_mem { ++ struct list_head pfns; ++ u32 count; ++} mali_mem_block_mem; + -+#ifndef CONFIG_MALI_DT -+static void mali_platform_device_release(struct device *device); ++typedef struct mali_mem_virt_mali_mapping { ++ mali_address_t addr; /* Virtual Mali address */ ++ u32 properties; /* MMU Permissions + cache, must match MMU HW */ ++} mali_mem_virt_mali_mapping; + -+#if defined(CONFIG_ARCH_VEXPRESS) ++typedef struct mali_mem_virt_cpu_mapping { ++ void __user *addr; ++ struct vm_area_struct *vma; ++} mali_mem_virt_cpu_mapping; + -+#if defined(CONFIG_ARM64) -+/* Juno + Mali-450 MP6 in V7 FPGA */ -+static struct resource mali_gpu_resources_m450_mp6[] = { -+ MALI_GPU_RESOURCES_MALI450_MP6_PMU(0x6F040000, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200) -+}; ++#define MALI_MEM_ALLOCATION_VALID_MAGIC 0xdeda110c ++#define MALI_MEM_ALLOCATION_FREED_MAGIC 0x10101010 + -+static struct resource mali_gpu_resources_m470_mp4[] = { -+ MALI_GPU_RESOURCES_MALI470_MP4_PMU(0x6F040000, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200) -+}; ++typedef struct mali_mm_node { ++ /* MALI GPU vaddr start, use u32 for mmu only support 32bit address*/ ++ uint32_t start; /* GPU vaddr */ ++ uint32_t size; /* GPU allocation virtual size */ ++ unsigned allocated : 1; ++} mali_mm_node; + -+static struct resource mali_gpu_resources_m470_mp3[] = { -+ MALI_GPU_RESOURCES_MALI470_MP3_PMU(0x6F040000, 200, 200, 200, 200, 200, 200, 200, 200, 200) -+}; ++typedef struct mali_vma_node { ++ struct mali_mm_node vm_node; ++ struct rb_node vm_rb; ++} mali_vma_node; + -+static struct resource mali_gpu_resources_m470_mp2[] = { -+ MALI_GPU_RESOURCES_MALI470_MP2_PMU(0x6F040000, 200, 200, 200, 200, 200, 200, 200) -+}; + -+static struct resource mali_gpu_resources_m470_mp1[] = { -+ MALI_GPU_RESOURCES_MALI470_MP1_PMU(0x6F040000, 200, 200, 200, 200, 200) -+}; ++typedef struct mali_mem_allocation { ++ MALI_DEBUG_CODE(u32 magic); ++ mali_mem_type type; /**< Type of memory */ ++ u32 flags; /**< Flags for this allocation */ + -+#else -+static struct resource mali_gpu_resources_m450_mp8[] = { -+ MALI_GPU_RESOURCES_MALI450_MP8_PMU(0xFC040000, -1, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 68) -+}; ++ struct mali_session_data *session; /**< Pointer to session that owns the allocation */ + -+static struct resource mali_gpu_resources_m450_mp6[] = { -+ MALI_GPU_RESOURCES_MALI450_MP6_PMU(0xFC040000, -1, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 68) -+}; ++ mali_mem_virt_cpu_mapping cpu_mapping; /**< CPU mapping */ ++ mali_mem_virt_mali_mapping mali_mapping; /**< Mali mapping */ + -+static struct resource mali_gpu_resources_m450_mp4[] = { -+ MALI_GPU_RESOURCES_MALI450_MP4_PMU(0xFC040000, -1, 70, 70, 70, 70, 70, 70, 70, 70, 70, 68) -+}; ++ /* add for new memory system */ ++ struct mali_vma_node mali_vma_node; ++ u32 vsize; /* virtual size*/ ++ u32 psize; /* physical backend memory size*/ ++ struct list_head list; ++ s32 backend_handle; /* idr for mem_backend */ ++ _mali_osk_atomic_t mem_alloc_refcount; ++} mali_mem_allocation; + -+static struct resource mali_gpu_resources_m470_mp4[] = { -+ MALI_GPU_RESOURCES_MALI470_MP4_PMU(0xFC040000, -1, 70, 70, 70, 70, 70, 70, 70, 70, 70, 68) -+}; -+#endif /* CONFIG_ARM64 */ ++struct mali_mem_os_allocator { ++ spinlock_t pool_lock; ++ struct list_head pool_pages; ++ size_t pool_count; + -+#elif defined(CONFIG_ARCH_REALVIEW) ++ atomic_t allocated_pages; ++ size_t allocation_limit; + -+static struct resource mali_gpu_resources_m300[] = { -+ MALI_GPU_RESOURCES_MALI300_PMU(0xC0000000, -1, -1, -1, -1) ++ struct shrinker shrinker; ++ struct delayed_work timed_shrinker; ++ struct workqueue_struct *wq; +}; + -+static struct resource mali_gpu_resources_m400_mp1[] = { -+ MALI_GPU_RESOURCES_MALI400_MP1_PMU(0xC0000000, -1, -1, -1, -1) -+}; ++/* COW backend memory type */ ++typedef struct mali_mem_cow { ++ struct list_head pages; /**< all pages for this cow backend allocation, ++ including new allocated pages for modified range*/ ++ u32 count; /**< number of pages */ ++ s32 change_pages_nr; ++} mali_mem_cow; + -+static struct resource mali_gpu_resources_m400_mp2[] = { -+ MALI_GPU_RESOURCES_MALI400_MP2_PMU(0xC0000000, -1, -1, -1, -1, -1, -1) -+}; ++typedef struct mali_mem_swap { ++ struct list_head pages; ++ u32 count; ++} mali_mem_swap; + ++typedef struct mali_mem_secure { ++#if defined(CONFIG_DMA_SHARED_BUFFER) ++ struct dma_buf *buf; ++ struct dma_buf_attachment *attachment; ++ struct sg_table *sgt; +#endif -+#endif ++ u32 count; ++} mali_mem_secure; + -+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) ++#define MALI_MEM_BACKEND_FLAG_COWED (0x1) /* COW has happen on this backend */ ++#define MALI_MEM_BACKEND_FLAG_COW_CPU_NO_WRITE (0x2) /* This is an COW backend, mapped as not allowed cpu to write */ ++#define MALI_MEM_BACKEND_FLAG_SWAP_COWED (0x4) /* Mark the given backend is cowed from swappable memory. */ ++/* Mark this backend is not swapped_in in MALI driver, and before using it, ++ * we should swap it in and set up corresponding page table. */ ++#define MALI_MEM_BACKEND_FLAG_UNSWAPPED_IN (0x8) ++#define MALI_MEM_BACKEND_FLAG_NOT_BINDED (0x1 << 5) /* this backend it not back with physical memory, used for defer bind */ ++#define MALI_MEM_BACKEND_FLAG_BINDED (0x1 << 6) /* this backend it back with physical memory, used for defer bind */ + -+#define FALLBACK_STATIC_TEMPERATURE 55000 ++typedef struct mali_mem_backend { ++ mali_mem_type type; /**< Type of backend memory */ ++ u32 flags; /**< Flags for this allocation */ ++ u32 size; ++ /* Union selected by type. */ ++ union { ++ mali_mem_os_mem os_mem; /**< MALI_MEM_OS */ ++ mali_mem_external ext_mem; /**< MALI_MEM_EXTERNAL */ ++ mali_mem_dma_buf dma_buf; /**< MALI_MEM_DMA_BUF */ ++ mali_mem_ump ump_mem; /**< MALI_MEM_UMP */ ++ mali_mem_block_mem block_mem; /**< MALI_MEM_BLOCK */ ++ mali_mem_cow cow_mem; ++ mali_mem_swap swap_mem; ++ mali_mem_secure secure_mem; ++ }; ++ mali_mem_allocation *mali_allocation; ++ struct mutex mutex; ++ mali_mem_type cow_type; + -+static struct thermal_zone_device *gpu_tz; ++ struct list_head list; /**< Used to link swappable memory backend to the global swappable list */ ++ int using_count; /**< Mark how many PP jobs are using this memory backend */ ++ u32 start_idx; /**< If the correspondign vma of this backend is linear, this value will be used to set vma->vm_pgoff */ ++} mali_mem_backend; + -+/* Calculate gpu static power example for reference */ -+static unsigned long arm_model_static_power(struct devfreq *devfreq, -+ unsigned long voltage) -+{ -+ int temperature, temp; -+ int temp_squared, temp_cubed, temp_scaling_factor; -+ const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10); -+ const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10; -+ unsigned long static_power; ++#define MALI_MEM_FLAG_MALI_GUARD_PAGE (_MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) ++#define MALI_MEM_FLAG_DONT_CPU_MAP (1 << 1) ++#define MALI_MEM_FLAG_CAN_RESIZE (_MALI_MEMORY_ALLOCATE_RESIZEABLE) ++#endif /* __MALI_MEMORY_TYPES__ */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_ump.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_ump.c +new file mode 100644 +index 000000000..666d4b0fb +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_ump.c +@@ -0,0 +1,154 @@ ++/* ++ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ if (gpu_tz) { -+ int ret; ++#include "mali_ukk.h" ++#include "mali_osk.h" ++#include "mali_kernel_common.h" ++#include "mali_session.h" ++#include "mali_kernel_linux.h" ++#include "mali_memory.h" ++#include "ump_kernel_interface.h" + -+ ret = gpu_tz->ops->get_temp(gpu_tz, &temperature); -+ if (ret) { -+ MALI_DEBUG_PRINT(2, ("Error reading temperature for gpu thermal zone: %d\n", ret)); -+ temperature = FALLBACK_STATIC_TEMPERATURE; -+ } -+ } else { -+ temperature = FALLBACK_STATIC_TEMPERATURE; -+ } ++static int mali_mem_ump_map(mali_mem_backend *mem_backend) ++{ ++ ump_dd_handle ump_mem; ++ mali_mem_allocation *alloc; ++ struct mali_session_data *session; ++ u32 nr_blocks; ++ u32 i; ++ ump_dd_physical_block *ump_blocks; ++ struct mali_page_directory *pagedir; ++ u32 offset = 0; ++ _mali_osk_errcode_t err; + -+ /* Calculate the temperature scaling factor. To be applied to the -+ * voltage scaled power. -+ */ -+ temp = temperature / 1000; -+ temp_squared = temp * temp; -+ temp_cubed = temp_squared * temp; -+ temp_scaling_factor = -+ (2 * temp_cubed) -+ - (80 * temp_squared) -+ + (4700 * temp) -+ + 32000; ++ MALI_DEBUG_ASSERT_POINTER(mem_backend); ++ MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type); + -+ static_power = (((coefficient * voltage_cubed) >> 20) -+ * temp_scaling_factor) -+ / 1000000; ++ alloc = mem_backend->mali_allocation; ++ MALI_DEBUG_ASSERT_POINTER(alloc); + -+ return static_power; -+} ++ session = alloc->session; ++ MALI_DEBUG_ASSERT_POINTER(session); + -+/* Calculate gpu dynamic power example for reference */ -+static unsigned long arm_model_dynamic_power(struct devfreq *devfreq, -+ unsigned long freq, -+ unsigned long voltage) -+{ -+ /* The inputs: freq (f) is in Hz, and voltage (v) in mV. -+ * The coefficient (c) is in mW/(MHz mV mV). -+ * -+ * This function calculates the dynamic power after this formula: -+ * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz) -+ */ -+ const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */ -+ const unsigned long f_mhz = freq / 1000000; /* MHz */ -+ const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */ -+ unsigned long dynamic_power; ++ ump_mem = mem_backend->ump_mem.handle; ++ MALI_DEBUG_ASSERT(UMP_DD_HANDLE_INVALID != ump_mem); + -+ dynamic_power = (coefficient * v2 * f_mhz) / 1000000; /* mW */ ++ nr_blocks = ump_dd_phys_block_count_get(ump_mem); ++ if (nr_blocks == 0) { ++ MALI_DEBUG_PRINT(1, ("No block count\n")); ++ return -EINVAL; ++ } + -+ return dynamic_power; -+} ++ ump_blocks = _mali_osk_malloc(sizeof(*ump_blocks) * nr_blocks); ++ if (NULL == ump_blocks) { ++ return -ENOMEM; ++ } + -+struct devfreq_cooling_power arm_cooling_ops = { -+ .get_static_power = arm_model_static_power, -+ .get_dynamic_power = arm_model_dynamic_power, -+}; -+#endif ++ if (UMP_DD_INVALID == ump_dd_phys_blocks_get(ump_mem, ump_blocks, nr_blocks)) { ++ _mali_osk_free(ump_blocks); ++ return -EFAULT; ++ } + -+static struct mali_gpu_device_data mali_gpu_data = { -+#ifndef CONFIG_MALI_DT -+ .pmu_switch_delay = 0xFF, /* do not have to be this high on FPGA, but it is good for testing to have a delay */ -+#if defined(CONFIG_ARCH_VEXPRESS) -+ .shared_mem_size = 256 * 1024 * 1024, /* 256MB */ -+#endif -+#endif -+ .max_job_runtime = 60000, /* 60 seconds */ ++ pagedir = session->page_directory; + -+#if defined(CONFIG_ARCH_REALVIEW) -+ .dedicated_mem_start = 0x80000000, /* Physical start address (use 0xD0000000 for old indirect setup) */ -+ .dedicated_mem_size = 0x10000000, /* 256MB */ -+#endif -+#if defined(CONFIG_ARM64) -+ /* Some framebuffer drivers get the framebuffer dynamically, such as through GEM, -+ * in which the memory resource can't be predicted in advance. -+ */ -+ .fb_start = 0x0, -+ .fb_size = 0xFFFFF000, -+#else -+ .fb_start = 0xe0000000, -+ .fb_size = 0x01000000, -+#endif -+ .control_interval = 1000, /* 1000ms */ -+ .utilization_callback = mali_gpu_utilization_callback, -+ .get_clock_info = NULL, -+ .get_freq = NULL, -+ .set_freq = NULL, -+#if defined(CONFIG_ARCH_VEXPRESS) && defined(CONFIG_ARM64) -+ .secure_mode_init = mali_secure_mode_init_juno, -+ .secure_mode_deinit = mali_secure_mode_deinit_juno, -+ .gpu_reset_and_secure_mode_enable = mali_gpu_reset_and_secure_mode_enable_juno, -+ .gpu_reset_and_secure_mode_disable = mali_gpu_reset_and_secure_mode_disable_juno, -+#else -+ .secure_mode_init = NULL, -+ .secure_mode_deinit = NULL, -+ .gpu_reset_and_secure_mode_enable = NULL, -+ .gpu_reset_and_secure_mode_disable = NULL, -+#endif -+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) -+ .gpu_cooling_ops = &arm_cooling_ops, -+#endif -+}; ++ mali_session_memory_lock(session); + -+#ifndef CONFIG_MALI_DT -+static struct platform_device mali_gpu_device = { -+ .name = MALI_GPU_NAME_UTGARD, -+ .id = 0, -+ .dev.release = mali_platform_device_release, -+ .dev.dma_mask = &mali_gpu_device.dev.coherent_dma_mask, -+ .dev.coherent_dma_mask = DMA_BIT_MASK(32), ++ err = mali_mem_mali_map_prepare(alloc); ++ if (_MALI_OSK_ERR_OK != err) { ++ MALI_DEBUG_PRINT(1, ("Mapping of UMP memory failed\n")); + -+ .dev.platform_data = &mali_gpu_data, -+}; ++ _mali_osk_free(ump_blocks); ++ mali_session_memory_unlock(session); ++ return -ENOMEM; ++ } + -+int mali_platform_device_register(void) -+{ -+ int err = -1; -+ int num_pp_cores = 0; -+#if defined(CONFIG_ARCH_REALVIEW) -+ u32 m400_gp_version; -+#endif ++ for (i = 0; i < nr_blocks; ++i) { ++ u32 virt = alloc->mali_vma_node.vm_node.start + offset; + -+ MALI_DEBUG_PRINT(4, ("mali_platform_device_register() called\n")); ++ MALI_DEBUG_PRINT(7, ("Mapping in 0x%08x size %d\n", ump_blocks[i].addr , ump_blocks[i].size)); + -+ /* Detect present Mali GPU and connect the correct resources to the device */ -+#if defined(CONFIG_ARCH_VEXPRESS) ++ mali_mmu_pagedir_update(pagedir, virt, ump_blocks[i].addr, ++ ump_blocks[i].size, MALI_MMU_FLAGS_DEFAULT); + -+#if defined(CONFIG_ARM64) -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) -+ mali_gpu_device.dev.archdata.dma_ops = &dummy_dma_ops; -+#else -+ mali_gpu_device.dev.archdata.dma_ops = dma_ops; -+#endif -+ if ((mali_read_phys(0x6F000000) & 0x00600450) == 0x00600450) { -+ MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP6 device\n")); -+ num_pp_cores = 6; -+ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m450_mp6); -+ mali_gpu_device.resource = mali_gpu_resources_m450_mp6; -+ } else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00400430) { -+ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP4 device\n")); -+ num_pp_cores = 4; -+ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m470_mp4); -+ mali_gpu_device.resource = mali_gpu_resources_m470_mp4; -+ } else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00300430) { -+ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP3 device\n")); -+ num_pp_cores = 3; -+ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m470_mp3); -+ mali_gpu_device.resource = mali_gpu_resources_m470_mp3; -+ } else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00200430) { -+ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP2 device\n")); -+ num_pp_cores = 2; -+ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m470_mp2); -+ mali_gpu_device.resource = mali_gpu_resources_m470_mp2; -+ } else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00100430) { -+ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP1 device\n")); -+ num_pp_cores = 1; -+ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m470_mp1); -+ mali_gpu_device.resource = mali_gpu_resources_m470_mp1; -+ } -+#else -+ if (mali_read_phys(0xFC000000) == 0x00000450) { -+ MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP8 device\n")); -+ num_pp_cores = 8; -+ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m450_mp8); -+ mali_gpu_device.resource = mali_gpu_resources_m450_mp8; -+ } else if (mali_read_phys(0xFC000000) == 0x40600450) { -+ MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP6 device\n")); -+ num_pp_cores = 6; -+ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m450_mp6); -+ mali_gpu_device.resource = mali_gpu_resources_m450_mp6; -+ } else if (mali_read_phys(0xFC000000) == 0x40400450) { -+ MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP4 device\n")); -+ num_pp_cores = 4; -+ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m450_mp4); -+ mali_gpu_device.resource = mali_gpu_resources_m450_mp4; -+ } else if (mali_read_phys(0xFC000000) == 0xFFFFFFFF) { -+ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP4 device\n")); -+ num_pp_cores = 4; -+ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m470_mp4); -+ mali_gpu_device.resource = mali_gpu_resources_m470_mp4; ++ offset += ump_blocks[i].size; + } -+#endif /* CONFIG_ARM64 */ + -+#elif defined(CONFIG_ARCH_REALVIEW) ++ if (alloc->flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) { ++ u32 virt = alloc->mali_vma_node.vm_node.start + offset; + -+ m400_gp_version = mali_read_phys(0xC000006C); -+ if ((m400_gp_version & 0xFFFF0000) == 0x0C070000) { -+ MALI_DEBUG_PRINT(4, ("Registering Mali-300 device\n")); -+ num_pp_cores = 1; -+ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m300); -+ mali_gpu_device.resource = mali_gpu_resources_m300; -+ mali_write_phys(0xC0010020, 0xA); /* Enable direct memory mapping for FPGA */ -+ } else if ((m400_gp_version & 0xFFFF0000) == 0x0B070000) { -+ u32 fpga_fw_version = mali_read_phys(0xC0010000); -+ if (fpga_fw_version == 0x130C008F || fpga_fw_version == 0x110C008F) { -+ /* Mali-400 MP1 r1p0 or r1p1 */ -+ MALI_DEBUG_PRINT(4, ("Registering Mali-400 MP1 device\n")); -+ num_pp_cores = 1; -+ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m400_mp1); -+ mali_gpu_device.resource = mali_gpu_resources_m400_mp1; -+ mali_write_phys(0xC0010020, 0xA); /* Enable direct memory mapping for FPGA */ -+ } else if (fpga_fw_version == 0x130C000F) { -+ /* Mali-400 MP2 r1p1 */ -+ MALI_DEBUG_PRINT(4, ("Registering Mali-400 MP2 device\n")); -+ num_pp_cores = 2; -+ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m400_mp2); -+ mali_gpu_device.resource = mali_gpu_resources_m400_mp2; -+ mali_write_phys(0xC0010020, 0xA); /* Enable direct memory mapping for FPGA */ -+ } -+ } ++ /* Map in an extra virtual guard page at the end of the VMA */ ++ MALI_DEBUG_PRINT(6, ("Mapping in extra guard page\n")); + -+#endif -+ /* Register the platform device */ -+ err = platform_device_register(&mali_gpu_device); -+ if (0 == err) { -+#ifdef CONFIG_PM_RUNTIME -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)) -+ pm_runtime_set_autosuspend_delay(&(mali_gpu_device.dev), 1000); -+ pm_runtime_use_autosuspend(&(mali_gpu_device.dev)); -+#endif -+ pm_runtime_enable(&(mali_gpu_device.dev)); -+#endif -+ MALI_DEBUG_ASSERT(0 < num_pp_cores); -+ mali_core_scaling_init(num_pp_cores); ++ mali_mmu_pagedir_update(pagedir, virt, ump_blocks[0].addr, _MALI_OSK_MALI_PAGE_SIZE, MALI_MMU_FLAGS_DEFAULT); + -+ return 0; ++ offset += _MALI_OSK_MALI_PAGE_SIZE; + } ++ mali_session_memory_unlock(session); ++ _mali_osk_free(ump_blocks); ++ return 0; ++} + -+ return err; ++static void mali_mem_ump_unmap(mali_mem_allocation *alloc) ++{ ++ struct mali_session_data *session; ++ MALI_DEBUG_ASSERT_POINTER(alloc); ++ session = alloc->session; ++ MALI_DEBUG_ASSERT_POINTER(session); ++ mali_session_memory_lock(session); ++ mali_mem_mali_map_free(session, alloc->psize, alloc->mali_vma_node.vm_node.start, ++ alloc->flags); ++ mali_session_memory_unlock(session); +} + -+void mali_platform_device_unregister(void) ++int mali_mem_bind_ump_buf(mali_mem_allocation *alloc, mali_mem_backend *mem_backend, u32 secure_id, u32 flags) +{ -+ MALI_DEBUG_PRINT(4, ("mali_platform_device_unregister() called\n")); ++ ump_dd_handle ump_mem; ++ int ret; ++ MALI_DEBUG_ASSERT_POINTER(alloc); ++ MALI_DEBUG_ASSERT_POINTER(mem_backend); ++ MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type); + -+ mali_core_scaling_term(); -+#ifdef CONFIG_PM_RUNTIME -+ pm_runtime_disable(&(mali_gpu_device.dev)); -+#endif -+ platform_device_unregister(&mali_gpu_device); ++ MALI_DEBUG_PRINT(3, ++ ("Requested to map ump memory with secure id %d into virtual memory 0x%08X, size 0x%08X\n", ++ secure_id, alloc->mali_vma_node.vm_node.start, alloc->mali_vma_node.vm_node.size)); + -+ platform_device_put(&mali_gpu_device); ++ ump_mem = ump_dd_handle_create_from_secure_id(secure_id); ++ if (UMP_DD_HANDLE_INVALID == ump_mem) MALI_ERROR(_MALI_OSK_ERR_FAULT); ++ alloc->flags |= MALI_MEM_FLAG_DONT_CPU_MAP; ++ if (flags & _MALI_MAP_EXTERNAL_MAP_GUARD_PAGE) { ++ alloc->flags |= MALI_MEM_FLAG_MALI_GUARD_PAGE; ++ } + -+#if defined(CONFIG_ARCH_REALVIEW) -+ mali_write_phys(0xC0010020, 0x9); /* Restore default (legacy) memory mapping */ -+#endif -+} ++ mem_backend->ump_mem.handle = ump_mem; + -+static void mali_platform_device_release(struct device *device) -+{ -+ MALI_DEBUG_PRINT(4, ("mali_platform_device_release() called\n")); ++ ret = mali_mem_ump_map(mem_backend); ++ if (0 != ret) { ++ ump_dd_reference_release(ump_mem); ++ return _MALI_OSK_ERR_FAULT; ++ } ++ MALI_DEBUG_PRINT(3, ("Returning from UMP bind\n")); ++ return _MALI_OSK_ERR_OK; +} + -+#else /* CONFIG_MALI_DT */ -+int mali_platform_device_init(struct platform_device *device) ++void mali_mem_unbind_ump_buf(mali_mem_backend *mem_backend) +{ -+ int num_pp_cores = 0; -+ int err = -1; -+#if defined(CONFIG_ARCH_REALVIEW) -+ u32 m400_gp_version; -+#endif ++ ump_dd_handle ump_mem; ++ mali_mem_allocation *alloc; ++ MALI_DEBUG_ASSERT_POINTER(mem_backend); ++ MALI_DEBUG_ASSERT(MALI_MEM_UMP == mem_backend->type); ++ ump_mem = mem_backend->ump_mem.handle; ++ MALI_DEBUG_ASSERT(UMP_DD_HANDLE_INVALID != ump_mem); + -+ /* Detect present Mali GPU and connect the correct resources to the device */ -+#if defined(CONFIG_ARCH_VEXPRESS) ++ alloc = mem_backend->mali_allocation; ++ MALI_DEBUG_ASSERT_POINTER(alloc); ++ mali_mem_ump_unmap(alloc); ++ ump_dd_reference_release(ump_mem); ++} + -+#if defined(CONFIG_ARM64) -+ if ((mali_read_phys(0x6F000000) & 0x00600450) == 0x00600450) { -+ MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP6 device\n")); -+ num_pp_cores = 6; -+ } else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00400430) { -+ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP4 device\n")); -+ num_pp_cores = 4; -+ } else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00300430) { -+ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP3 device\n")); -+ num_pp_cores = 3; -+ } else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00200430) { -+ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP2 device\n")); -+ num_pp_cores = 2; -+ } else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00100430) { -+ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP1 device\n")); -+ num_pp_cores = 1; -+ } -+#else -+ if (mali_read_phys(0xFC000000) == 0x00000450) { -+ MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP8 device\n")); -+ num_pp_cores = 8; -+ } else if (mali_read_phys(0xFC000000) == 0x40400450) { -+ MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP4 device\n")); -+ num_pp_cores = 4; -+ } else if (mali_read_phys(0xFC000000) == 0xFFFFFFFF) { -+ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP4 device\n")); -+ num_pp_cores = 4; -+ } -+#endif +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_ump.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_ump.h +new file mode 100644 +index 000000000..c314c8dcb +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_ump.h +@@ -0,0 +1,29 @@ ++/* ++ * Copyright (C) 2011-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+#elif defined(CONFIG_ARCH_REALVIEW) ++#ifndef __MALI_MEMORY_UMP_BUF_H__ ++#define __MALI_MEMORY_UMP_BUF_H__ + -+ m400_gp_version = mali_read_phys(0xC000006C); -+ if ((m400_gp_version & 0xFFFF0000) == 0x0C070000) { -+ MALI_DEBUG_PRINT(4, ("Registering Mali-300 device\n")); -+ num_pp_cores = 1; -+ mali_write_phys(0xC0010020, 0xA); /* Enable direct memory mapping for FPGA */ -+ } else if ((m400_gp_version & 0xFFFF0000) == 0x0B070000) { -+ u32 fpga_fw_version = mali_read_phys(0xC0010000); -+ if (fpga_fw_version == 0x130C008F || fpga_fw_version == 0x110C008F) { -+ /* Mali-400 MP1 r1p0 or r1p1 */ -+ MALI_DEBUG_PRINT(4, ("Registering Mali-400 MP1 device\n")); -+ num_pp_cores = 1; -+ mali_write_phys(0xC0010020, 0xA); /* Enable direct memory mapping for FPGA */ -+ } else if (fpga_fw_version == 0x130C000F) { -+ /* Mali-400 MP2 r1p1 */ -+ MALI_DEBUG_PRINT(4, ("Registering Mali-400 MP2 device\n")); -+ num_pp_cores = 2; -+ mali_write_phys(0xC0010020, 0xA); /* Enable direct memory mapping for FPGA */ -+ } -+ } ++#ifdef __cplusplus ++extern "C" { +#endif + -+ /* After kernel 3.15 device tree will default set dev -+ * related parameters in of_platform_device_create_pdata. -+ * But kernel changes from version to version, -+ * For example 3.10 didn't include device->dev.dma_mask parameter setting, -+ * if we didn't include here will cause dma_mapping error, -+ * but in kernel 3.15 it include device->dev.dma_mask parameter setting, -+ * so it's better to set must need paramter by DDK itself. -+ */ -+ if (!device->dev.dma_mask) -+ device->dev.dma_mask = &device->dev.coherent_dma_mask; -+ device->dev.archdata.dma_ops = dma_ops; ++#include "mali_uk_types.h" ++#include "mali_osk.h" ++#include "mali_memory.h" + -+ err = platform_device_add_data(device, &mali_gpu_data, sizeof(mali_gpu_data)); ++int mali_mem_bind_ump_buf(mali_mem_allocation *alloc, mali_mem_backend *mem_backend, u32 secure_id, u32 flags); ++void mali_mem_unbind_ump_buf(mali_mem_backend *mem_backend); + -+ if (0 == err) { -+#ifdef CONFIG_PM_RUNTIME -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)) -+ pm_runtime_set_autosuspend_delay(&(device->dev), 1000); -+ pm_runtime_use_autosuspend(&(device->dev)); -+#endif -+ pm_runtime_enable(&(device->dev)); ++#ifdef __cplusplus ++} +#endif -+ MALI_DEBUG_ASSERT(0 < num_pp_cores); -+ mali_core_scaling_init(num_pp_cores); -+ } + -+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) -+ /* Some Socs didn't support the devfreq thermal for mali */ -+ if (of_machine_is_compatible("rockchip,rk3036")) -+ return 0; ++#endif /* __MALI_MEMORY_DMA_BUF_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_util.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_util.c +new file mode 100644 +index 000000000..8e13e923c +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_util.c +@@ -0,0 +1,158 @@ ++/* ++ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ /* Get thermal zone */ -+ gpu_tz = thermal_zone_get_zone_by_name("soc_thermal"); -+ if (IS_ERR(gpu_tz)) { -+ MALI_DEBUG_PRINT(2, ("Error getting gpu thermal zone (%ld), not yet ready?\n", -+ PTR_ERR(gpu_tz))); -+ gpu_tz = NULL; ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ err = -EPROBE_DEFER; -+ } ++#include "mali_osk.h" ++#include "mali_osk_mali.h" ++#include "mali_kernel_linux.h" ++#include "mali_scheduler.h" ++ ++#include "mali_memory.h" ++#include "mali_memory_os_alloc.h" ++#if defined(CONFIG_DMA_SHARED_BUFFER) ++#include "mali_memory_dma_buf.h" ++#include "mali_memory_secure.h" ++#endif ++#if defined(CONFIG_MALI400_UMP) ++#include "mali_memory_ump.h" +#endif ++#include "mali_memory_external.h" ++#include "mali_memory_manager.h" ++#include "mali_memory_virtual.h" ++#include "mali_memory_cow.h" ++#include "mali_memory_block_alloc.h" ++#include "mali_memory_swap_alloc.h" + -+ return err; -+} + -+int mali_platform_device_deinit(struct platform_device *device) ++ ++/** ++*function @_mali_free_allocation_mem - free a memory allocation ++*/ ++static u32 _mali_free_allocation_mem(mali_mem_allocation *mali_alloc) +{ -+ MALI_IGNORE(device); ++ mali_mem_backend *mem_bkend = NULL; ++ u32 free_pages_nr = 0; + -+ MALI_DEBUG_PRINT(4, ("mali_platform_device_deinit() called\n")); ++ struct mali_session_data *session = mali_alloc->session; ++ MALI_DEBUG_PRINT(4, (" _mali_free_allocation_mem, psize =0x%x! \n", mali_alloc->psize)); ++ if (0 == mali_alloc->psize) ++ goto out; + -+ mali_core_scaling_term(); -+#ifdef CONFIG_PM_RUNTIME -+ pm_runtime_disable(&(device->dev)); -+#endif ++ /* Get backend memory & Map on CPU */ ++ mutex_lock(&mali_idr_mutex); ++ mem_bkend = idr_find(&mali_backend_idr, mali_alloc->backend_handle); ++ mutex_unlock(&mali_idr_mutex); ++ MALI_DEBUG_ASSERT(NULL != mem_bkend); + -+#if defined(CONFIG_ARCH_REALVIEW) -+ mali_write_phys(0xC0010020, 0x9); /* Restore default (legacy) memory mapping */ ++ switch (mem_bkend->type) { ++ case MALI_MEM_OS: ++ free_pages_nr = mali_mem_os_release(mem_bkend); ++ atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages); ++ break; ++ case MALI_MEM_UMP: ++#if defined(CONFIG_MALI400_UMP) ++ mali_mem_unbind_ump_buf(mem_bkend); ++ atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]); ++#else ++ MALI_DEBUG_PRINT(1, ("UMP not supported\n")); +#endif ++ break; ++ case MALI_MEM_DMA_BUF: ++#if defined(CONFIG_DMA_SHARED_BUFFER) ++ mali_mem_unbind_dma_buf(mem_bkend); ++ atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]); ++#else ++ MALI_DEBUG_PRINT(1, ("DMA not supported\n")); ++#endif ++ break; ++ case MALI_MEM_EXTERNAL: ++ mali_mem_unbind_ext_buf(mem_bkend); ++ atomic_sub(mem_bkend->size / MALI_MMU_PAGE_SIZE, &session->mali_mem_array[mem_bkend->type]); ++ break; + -+ return 0; -+} -+ -+#endif /* CONFIG_MALI_DT */ ++ case MALI_MEM_BLOCK: ++ free_pages_nr = mali_mem_block_release(mem_bkend); ++ atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages); ++ break; + -+static u32 mali_read_phys(u32 phys_addr) -+{ -+ u32 phys_addr_page = phys_addr & 0xFFFFE000; -+ u32 phys_offset = phys_addr & 0x00001FFF; -+ u32 map_size = phys_offset + sizeof(u32); -+ u32 ret = 0xDEADBEEF; -+ void *mem_mapped = ioremap(phys_addr_page, map_size); -+ if (NULL != mem_mapped) { -+ ret = (u32)ioread32(((u8 *)mem_mapped) + phys_offset); -+ iounmap(mem_mapped); ++ case MALI_MEM_COW: ++ if (mem_bkend->flags & MALI_MEM_BACKEND_FLAG_SWAP_COWED) { ++ free_pages_nr = mali_mem_swap_release(mem_bkend, MALI_TRUE); ++ } else { ++ free_pages_nr = mali_mem_cow_release(mem_bkend, MALI_TRUE); ++ } ++ atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages); ++ break; ++ case MALI_MEM_SWAP: ++ free_pages_nr = mali_mem_swap_release(mem_bkend, MALI_TRUE); ++ atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages); ++ atomic_sub(free_pages_nr, &session->mali_mem_array[mem_bkend->type]); ++ break; ++ case MALI_MEM_SECURE: ++#if defined(CONFIG_DMA_SHARED_BUFFER) ++ free_pages_nr = mali_mem_secure_release(mem_bkend); ++ atomic_sub(free_pages_nr, &session->mali_mem_allocated_pages); ++#else ++ MALI_DEBUG_PRINT(1, ("DMA not supported for mali secure memory\n")); ++#endif ++ break; ++ default: ++ MALI_DEBUG_PRINT(1, ("mem type %d is not in the mali_mem_type enum.\n", mem_bkend->type)); ++ break; + } + -+ return ret; ++ /*Remove backend memory idex */ ++ mutex_lock(&mali_idr_mutex); ++ idr_remove(&mali_backend_idr, mali_alloc->backend_handle); ++ mutex_unlock(&mali_idr_mutex); ++ kfree(mem_bkend); ++out: ++ /* remove memory allocation */ ++ mali_vma_offset_remove(&session->allocation_mgr, &mali_alloc->mali_vma_node); ++ mali_mem_allocation_struct_destory(mali_alloc); ++ return free_pages_nr; +} + -+#if defined(CONFIG_ARCH_REALVIEW) -+static void mali_write_phys(u32 phys_addr, u32 value) ++/** ++* ref_count for allocation ++*/ ++u32 mali_allocation_unref(struct mali_mem_allocation **alloc) +{ -+ u32 phys_addr_page = phys_addr & 0xFFFFE000; -+ u32 phys_offset = phys_addr & 0x00001FFF; -+ u32 map_size = phys_offset + sizeof(u32); -+ void *mem_mapped = ioremap(phys_addr_page, map_size); -+ if (NULL != mem_mapped) { -+ iowrite32(value, ((u8 *)mem_mapped) + phys_offset); -+ iounmap(mem_mapped); ++ u32 free_pages_nr = 0; ++ mali_mem_allocation *mali_alloc = *alloc; ++ *alloc = NULL; ++ if (0 == _mali_osk_atomic_dec_return(&mali_alloc->mem_alloc_refcount)) { ++ free_pages_nr = _mali_free_allocation_mem(mali_alloc); + } ++ return free_pages_nr; +} -+#endif + -+static int param_set_core_scaling(const char *val, const struct kernel_param *kp) ++void mali_allocation_ref(struct mali_mem_allocation *alloc) +{ -+ int ret = param_set_int(val, kp); -+ -+ if (1 == mali_core_scaling_enable) { -+ mali_core_scaling_sync(mali_executor_get_num_cores_enabled()); -+ } -+ return ret; ++ _mali_osk_atomic_inc(&alloc->mem_alloc_refcount); +} + -+static struct kernel_param_ops param_ops_core_scaling = { -+ .set = param_set_core_scaling, -+ .get = param_get_int, -+}; ++void mali_free_session_allocations(struct mali_session_data *session) ++{ ++ struct mali_mem_allocation *entry, *next; + -+module_param_cb(mali_core_scaling_enable, ¶m_ops_core_scaling, &mali_core_scaling_enable, 0644); -+MODULE_PARM_DESC(mali_core_scaling_enable, "1 means to enable core scaling policy, 0 means to disable core scaling policy"); ++ MALI_DEBUG_PRINT(4, (" mali_free_session_allocations! \n")); + -+void mali_gpu_utilization_callback(struct mali_gpu_utilization_data *data) -+{ -+ if (1 == mali_core_scaling_enable) { -+ mali_core_scaling_update(data); ++ list_for_each_entry_safe(entry, next, &session->allocation_mgr.head, list) { ++ mali_allocation_unref(&entry); + } +} -diff --git a/drivers/gpu/arm/mali400/mali/platform/arm/arm_core_scaling.c b/drivers/gpu/arm/mali400/mali/platform/arm/arm_core_scaling.c +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_util.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_util.h new file mode 100644 -index 000000000..7a2fc8107 +index 000000000..036248260 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/platform/arm/arm_core_scaling.c -@@ -0,0 +1,122 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_util.h +@@ -0,0 +1,20 @@ +/* -+ * Copyright (C) 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -325094,126 +326872,157 @@ index 000000000..7a2fc8107 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+/** -+ * @file arm_core_scaling.c -+ * Example core scaling policy. -+ */ -+ -+#include "arm_core_scaling.h" ++#ifndef __MALI_MEMORY_UTIL_H__ ++#define __MALI_MEMORY_UTIL_H__ + -+#include -+#include "mali_kernel_common.h" ++u32 mali_allocation_unref(struct mali_mem_allocation **alloc); + -+#include ++void mali_allocation_ref(struct mali_mem_allocation *alloc); + -+static int num_cores_total; -+static int num_cores_enabled; ++void mali_free_session_allocations(struct mali_session_data *session); + -+static struct work_struct wq_work; ++#endif +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_virtual.c b/drivers/gpu/arm/mali400/mali/linux/mali_memory_virtual.c +new file mode 100644 +index 000000000..9ce4d6adf +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_virtual.c +@@ -0,0 +1,127 @@ ++/* ++ * Copyright (C) 2013-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+static void set_num_cores(struct work_struct *work) -+{ -+ int err = mali_perf_set_num_pp_cores(num_cores_enabled); -+ MALI_DEBUG_ASSERT(0 == err); -+ MALI_IGNORE(err); -+} ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+static void enable_one_core(void) -+{ -+ if (num_cores_enabled < num_cores_total) { -+ ++num_cores_enabled; -+ schedule_work(&wq_work); -+ MALI_DEBUG_PRINT(3, ("Core scaling: Enabling one more core\n")); -+ } ++#include "mali_osk.h" ++#include "mali_osk_mali.h" ++#include "mali_kernel_linux.h" ++#include "mali_scheduler.h" ++#include "mali_memory_os_alloc.h" ++#include "mali_memory_manager.h" ++#include "mali_memory_virtual.h" + -+ MALI_DEBUG_ASSERT(1 <= num_cores_enabled); -+ MALI_DEBUG_ASSERT(num_cores_total >= num_cores_enabled); -+} + -+static void disable_one_core(void) ++/** ++*internal helper to link node into the rb-tree ++*/ ++static inline void _mali_vma_offset_add_rb(struct mali_allocation_manager *mgr, ++ struct mali_vma_node *node) +{ -+ if (1 < num_cores_enabled) { -+ --num_cores_enabled; -+ schedule_work(&wq_work); -+ MALI_DEBUG_PRINT(3, ("Core scaling: Disabling one core\n")); -+ } ++ struct rb_node **iter = &mgr->allocation_mgr_rb.rb_node; ++ struct rb_node *parent = NULL; ++ struct mali_vma_node *iter_node; + -+ MALI_DEBUG_ASSERT(1 <= num_cores_enabled); -+ MALI_DEBUG_ASSERT(num_cores_total >= num_cores_enabled); -+} ++ while (likely(*iter)) { ++ parent = *iter; ++ iter_node = rb_entry(*iter, struct mali_vma_node, vm_rb); + -+static void enable_max_num_cores(void) -+{ -+ if (num_cores_enabled < num_cores_total) { -+ num_cores_enabled = num_cores_total; -+ schedule_work(&wq_work); -+ MALI_DEBUG_PRINT(3, ("Core scaling: Enabling maximum number of cores\n")); ++ if (node->vm_node.start < iter_node->vm_node.start) ++ iter = &(*iter)->rb_left; ++ else if (node->vm_node.start > iter_node->vm_node.start) ++ iter = &(*iter)->rb_right; ++ else ++ MALI_DEBUG_ASSERT(0); + } + -+ MALI_DEBUG_ASSERT(num_cores_total == num_cores_enabled); ++ rb_link_node(&node->vm_rb, parent, iter); ++ rb_insert_color(&node->vm_rb, &mgr->allocation_mgr_rb); +} + -+void mali_core_scaling_init(int num_pp_cores) ++/** ++ * mali_vma_offset_add() - Add offset node to RB Tree ++ */ ++int mali_vma_offset_add(struct mali_allocation_manager *mgr, ++ struct mali_vma_node *node) +{ -+ INIT_WORK(&wq_work, set_num_cores); ++ int ret = 0; ++ write_lock(&mgr->vm_lock); + -+ num_cores_total = num_pp_cores; -+ num_cores_enabled = num_pp_cores; ++ if (node->vm_node.allocated) { ++ goto out; ++ } + -+ /* NOTE: Mali is not fully initialized at this point. */ -+} ++ _mali_vma_offset_add_rb(mgr, node); ++ /* set to allocated */ ++ node->vm_node.allocated = 1; + -+void mali_core_scaling_sync(int num_cores) -+{ -+ num_cores_enabled = num_cores; ++out: ++ write_unlock(&mgr->vm_lock); ++ return ret; +} + -+void mali_core_scaling_term(void) ++/** ++ * mali_vma_offset_remove() - Remove offset node from RB tree ++ */ ++void mali_vma_offset_remove(struct mali_allocation_manager *mgr, ++ struct mali_vma_node *node) +{ -+ flush_scheduled_work(); -+} ++ write_lock(&mgr->vm_lock); + -+#define PERCENT_OF(percent, max) ((int) ((percent)*(max)/100.0 + 0.5)) ++ if (node->vm_node.allocated) { ++ rb_erase(&node->vm_rb, &mgr->allocation_mgr_rb); ++ memset(&node->vm_node, 0, sizeof(node->vm_node)); ++ } ++ write_unlock(&mgr->vm_lock); ++} + -+void mali_core_scaling_update(struct mali_gpu_utilization_data *data) ++/** ++* mali_vma_offset_search - Search the node in RB tree ++*/ ++struct mali_vma_node *mali_vma_offset_search(struct mali_allocation_manager *mgr, ++ unsigned long start, unsigned long pages) +{ -+ /* -+ * This function implements a very trivial PP core scaling algorithm. -+ * -+ * It is _NOT_ of production quality. -+ * The only intention behind this algorithm is to exercise and test the -+ * core scaling functionality of the driver. -+ * It is _NOT_ tuned for neither power saving nor performance! -+ * -+ * Other metrics than PP utilization need to be considered as well -+ * in order to make a good core scaling algorithm. -+ */ -+ -+ MALI_DEBUG_PRINT(3, ("Utilization: (%3d, %3d, %3d), cores enabled: %d/%d\n", data->utilization_gpu, data->utilization_gp, data->utilization_pp, num_cores_enabled, num_cores_total)); -+ -+ /* NOTE: this function is normally called directly from the utilization callback which is in -+ * timer context. */ ++ struct mali_vma_node *node, *best; ++ struct rb_node *iter; ++ unsigned long offset; ++ read_lock(&mgr->vm_lock); ++ ++ iter = mgr->allocation_mgr_rb.rb_node; ++ best = NULL; ++ ++ while (likely(iter)) { ++ node = rb_entry(iter, struct mali_vma_node, vm_rb); ++ offset = node->vm_node.start; ++ if (start >= offset) { ++ iter = iter->rb_right; ++ best = node; ++ if (start == offset) ++ break; ++ } else { ++ iter = iter->rb_left; ++ } ++ } + -+ if (PERCENT_OF(90, 256) < data->utilization_pp) { -+ enable_max_num_cores(); -+ } else if (PERCENT_OF(50, 256) < data->utilization_pp) { -+ enable_one_core(); -+ } else if (PERCENT_OF(40, 256) < data->utilization_pp) { -+ /* do nothing */ -+ } else if (PERCENT_OF(0, 256) < data->utilization_pp) { -+ disable_one_core(); -+ } else { -+ /* do nothing */ ++ if (best) { ++ offset = best->vm_node.start + best->vm_node.size; ++ if (offset <= start + pages) ++ best = NULL; + } ++ read_unlock(&mgr->vm_lock); ++ ++ return best; +} -diff --git a/drivers/gpu/arm/mali400/mali/platform/arm/arm_core_scaling.h b/drivers/gpu/arm/mali400/mali/platform/arm/arm_core_scaling.h ++ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_memory_virtual.h b/drivers/gpu/arm/mali400/mali/linux/mali_memory_virtual.h new file mode 100644 -index 000000000..8e0101830 +index 000000000..696db570a --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/platform/arm/arm_core_scaling.h -@@ -0,0 +1,44 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_memory_virtual.h +@@ -0,0 +1,35 @@ +/* -+ * Copyright (C) 2013, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2013-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -325221,49 +327030,40 @@ index 000000000..8e0101830 + * A copy of the licence is included with the program, and can also be obtained from Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ ++#ifndef __MALI_GPU_VMEM_H__ ++#define __MALI_GPU_VMEM_H__ + -+/** -+ * @file arm_core_scaling.h -+ * Example core scaling policy. -+ */ -+ -+#ifndef __ARM_CORE_SCALING_H__ -+#define __ARM_CORE_SCALING_H__ ++#include "mali_osk.h" ++#include "mali_session.h" ++#include ++#include ++#include ++#include ++#include ++#include "mali_memory_types.h" ++#include "mali_memory_os_alloc.h" ++#include "mali_memory_manager.h" + -+struct mali_gpu_utilization_data; + -+/** -+ * Initialize core scaling policy. -+ * -+ * @note The core scaling policy will assume that all PP cores are on initially. -+ * -+ * @param num_pp_cores Total number of PP cores. -+ */ -+void mali_core_scaling_init(int num_pp_cores); + -+/** -+ * Terminate core scaling policy. -+ */ -+void mali_core_scaling_term(void); ++int mali_vma_offset_add(struct mali_allocation_manager *mgr, ++ struct mali_vma_node *node); + -+/** -+ * Update core scaling policy with new utilization data. -+ * -+ * @param data Utilization data. -+ */ -+void mali_core_scaling_update(struct mali_gpu_utilization_data *data); ++void mali_vma_offset_remove(struct mali_allocation_manager *mgr, ++ struct mali_vma_node *node); + -+void mali_core_scaling_sync(int num_cores); ++struct mali_vma_node *mali_vma_offset_search(struct mali_allocation_manager *mgr, ++ unsigned long start, unsigned long pages); + -+#endif /* __ARM_CORE_SCALING_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/platform/arm/juno_opp.c b/drivers/gpu/arm/mali400/mali/platform/arm/juno_opp.c ++#endif +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_atomics.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_atomics.c new file mode 100644 -index 000000000..e4e7ab8b2 +index 000000000..5bc0e52eb --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/platform/arm/juno_opp.c -@@ -0,0 +1,127 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_atomics.c +@@ -0,0 +1,59 @@ +/* -+ * Copyright (C) 2010, 2012-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010, 2013-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -325273,1154 +327073,1203 @@ index 000000000..e4e7ab8b2 + */ + +/** -+ * @file juno_opp.c -+ * Example: Set up opp table -+ * Using ARM64 juno specific SCPI_PROTOCOL get frequence inform -+ * Customer need implement your own platform releated logic -+ */ -+#ifdef CONFIG_ARCH_VEXPRESS -+#ifdef CONFIG_MALI_DEVFREQ -+#ifdef CONFIG_ARM64 -+#ifdef CONFIG_ARM_SCPI_PROTOCOL -+#include -+#include -+#include -+#include -+#include -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) -+#include -+#else /* Linux >= 3.13 */ -+/* In 3.13 the OPP include header file, types, and functions were all -+ * renamed. Use the old filename for the include, and define the new names to -+ * the old, when an old kernel is detected. ++ * @file mali_osk_atomics.c ++ * Implementation of the OS abstraction layer for the kernel device driver + */ -+#include -+#define dev_pm_opp_add opp_add -+#define dev_pm_opp_remove opp_remove -+#endif /* Linux >= 3.13 */ + ++#include "mali_osk.h" ++#include +#include "mali_kernel_common.h" + -+static int init_juno_opps_from_scpi(struct device *dev) ++void _mali_osk_atomic_dec(_mali_osk_atomic_t *atom) +{ -+ struct scpi_dvfs_info *sinfo; -+ struct scpi_ops *sops; -+ -+ int i; -+ -+ sops = get_scpi_ops(); -+ if (NULL == sops) { -+ MALI_DEBUG_PRINT(2, ("Mali didn't get any scpi ops \n")); -+ return -1; -+ } -+ -+ /* Hard coded for Juno. 2 is GPU domain */ -+ sinfo = sops->dvfs_get_info(2); -+ if (IS_ERR_OR_NULL(sinfo)) -+ return PTR_ERR(sinfo); -+ -+ for (i = 0; i < sinfo->count; i++) { -+ struct scpi_opp *e = &sinfo->opps[i]; -+ -+ MALI_DEBUG_PRINT(2, ("Mali OPP from SCPI: %u Hz @ %u mV\n", e->freq, e->m_volt)); -+ -+ dev_pm_opp_add(dev, e->freq, e->m_volt * 1000); -+ } -+ -+ return 0; ++ atomic_dec((atomic_t *)&atom->u.val); +} + -+int setup_opps(void) ++u32 _mali_osk_atomic_dec_return(_mali_osk_atomic_t *atom) +{ -+ struct device_node *np; -+ struct platform_device *pdev; -+ int err; ++ return atomic_dec_return((atomic_t *)&atom->u.val); ++} + -+ np = of_find_node_by_name(NULL, "gpu"); -+ if (!np) { -+ pr_err("Failed to find DT entry for Mali\n"); -+ return -EFAULT; -+ } ++void _mali_osk_atomic_inc(_mali_osk_atomic_t *atom) ++{ ++ atomic_inc((atomic_t *)&atom->u.val); ++} + -+ pdev = of_find_device_by_node(np); -+ if (!pdev) { -+ pr_err("Failed to find device for Mali\n"); -+ of_node_put(np); -+ return -EFAULT; -+ } ++u32 _mali_osk_atomic_inc_return(_mali_osk_atomic_t *atom) ++{ ++ return atomic_inc_return((atomic_t *)&atom->u.val); ++} + -+ err = init_juno_opps_from_scpi(&pdev->dev); ++void _mali_osk_atomic_init(_mali_osk_atomic_t *atom, u32 val) ++{ ++ MALI_DEBUG_ASSERT_POINTER(atom); ++ atomic_set((atomic_t *)&atom->u.val, val); ++} + -+ of_node_put(np); ++u32 _mali_osk_atomic_read(_mali_osk_atomic_t *atom) ++{ ++ return atomic_read((atomic_t *)&atom->u.val); ++} + -+ return err; ++void _mali_osk_atomic_term(_mali_osk_atomic_t *atom) ++{ ++ MALI_IGNORE(atom); +} + -+int term_opps(struct device *dev) ++u32 _mali_osk_atomic_xchg(_mali_osk_atomic_t *atom, u32 val) +{ -+ struct scpi_dvfs_info *sinfo; -+ struct scpi_ops *sops; ++ return atomic_xchg((atomic_t *)&atom->u.val, val); ++} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_bitmap.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_bitmap.c +new file mode 100644 +index 000000000..54c455a37 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_bitmap.c +@@ -0,0 +1,152 @@ ++/* ++ * Copyright (C) 2010, 2013-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ int i; ++/** ++ * @file mali_osk_bitmap.c ++ * Implementation of the OS abstraction layer for the kernel device driver ++ */ + -+ sops = get_scpi_ops(); -+ if (NULL == sops) { -+ MALI_DEBUG_PRINT(2, ("Mali didn't get any scpi ops \n")); -+ return -1; -+ } ++#include ++#include ++#include ++#include ++#include ++#include "common/mali_kernel_common.h" ++#include "mali_osk_types.h" ++#include "mali_osk.h" + -+ /* Hard coded for Juno. 2 is GPU domain */ -+ sinfo = sops->dvfs_get_info(2); -+ if (IS_ERR_OR_NULL(sinfo)) -+ return PTR_ERR(sinfo); ++u32 _mali_osk_bitmap_alloc(struct _mali_osk_bitmap *bitmap) ++{ ++ u32 obj; + -+ for (i = 0; i < sinfo->count; i++) { -+ struct scpi_opp *e = &sinfo->opps[i]; ++ MALI_DEBUG_ASSERT_POINTER(bitmap); + -+ MALI_DEBUG_PRINT(2, ("Mali Remove OPP: %u Hz \n", e->freq)); ++ _mali_osk_spinlock_lock(bitmap->lock); + -+ dev_pm_opp_remove(dev, e->freq); ++ obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->reserve); ++ ++ if (obj < bitmap->max) { ++ set_bit(obj, bitmap->table); ++ } else { ++ obj = -1; + } + -+ return 0; ++ if (obj != -1) ++ --bitmap->avail; ++ _mali_osk_spinlock_unlock(bitmap->lock); + ++ return obj; +} -+#endif -+#endif -+#endif -+#endif -diff --git a/drivers/gpu/arm/mali400/mali/platform/rk/custom_log.h b/drivers/gpu/arm/mali400/mali/platform/rk/custom_log.h -new file mode 100644 -index 000000000..fe5e12241 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/platform/rk/custom_log.h -@@ -0,0 +1,209 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* ---------------------------------------------------------------------------- -+ * File: custom_log.h -+ * -+ * Desc: ChenZhen å好的 log 输出的定制实现. -+ * -+ * -------------------------------------------------------------------- -+ * < 习语 å’Œ 缩略语 > : -+ * -+ * -------------------------------------------------------------------- -+ * Usage: -+ * -+ * Note: -+ * -+ * Author: ChenZhen -+ * -+ * ---------------------------------------------------------------------------- -+ * Version: -+ * v1.0 -+ * ---------------------------------------------------------------------------- -+ * Log: -+ ----Fri Nov 19 15:20:28 2010 v1.0 -+ * -+ * ---------------------------------------------------------------------------- -+ */ -+ -+#ifndef __CUSTOM_LOG_H__ -+#define __CUSTOM_LOG_H__ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* ----------------------------------------------------------------------------- -+ * Include Files -+ * ----------------------------------------------------------------------------- -+ */ -+#include -+#include + -+/* ----------------------------------------------------------------------------- -+ * Macros Definition -+ * ----------------------------------------------------------------------------- -+ */ ++void _mali_osk_bitmap_free(struct _mali_osk_bitmap *bitmap, u32 obj) ++{ ++ MALI_DEBUG_ASSERT_POINTER(bitmap); + -+/** 若下列 macro 有被定义, æ‰ ä½¿èƒ½ log 输出. */ -+/* #define ENABLE_DEBUG_LOG */ ++ _mali_osk_bitmap_free_range(bitmap, obj, 1); ++} + -+/*----------------------------------------------------------------------------*/ ++u32 _mali_osk_bitmap_alloc_range(struct _mali_osk_bitmap *bitmap, int cnt) ++{ ++ u32 obj; + -+#ifdef ENABLE_VERBOSE_LOG -+/** Verbose log. */ -+#define V(fmt, args...) \ -+ pr_debug("V : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ -+ "\n", \ -+ __FILE__, \ -+ __LINE__, \ -+ __func__, \ -+ ## args) -+#else -+#define V(...) ((void)0) -+#endif ++ MALI_DEBUG_ASSERT_POINTER(bitmap); + -+#ifdef ENABLE_DEBUG_LOG -+/** Debug log. */ -+#define D(fmt, args...) \ -+ pr_info("D : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ -+ "\n", \ -+ __FILE__, \ -+ __LINE__, \ -+ __func__, \ -+ ## args) -+#else -+#define D(...) ((void)0) -+#endif ++ if (0 >= cnt) { ++ return -1; ++ } + -+#define I(fmt, args...) \ -+ pr_info("I : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ -+ "\n", \ -+ __FILE__, \ -+ __LINE__, \ -+ __func__, \ -+ ## args) ++ if (1 == cnt) { ++ return _mali_osk_bitmap_alloc(bitmap); ++ } + -+#define W(fmt, args...) \ -+ pr_warn("W : [File] : %s; [Line] : %d; [Func] : %s(); " \ -+ fmt "\n", \ -+ __FILE__, \ -+ __LINE__, \ -+ __func__, \ -+ ## args) ++ _mali_osk_spinlock_lock(bitmap->lock); ++ obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max, ++ bitmap->last, cnt, 0); + -+#define E(fmt, args...) \ -+ pr_err("E : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ -+ "\n", \ -+ __FILE__, \ -+ __LINE__, \ -+ __func__, \ -+ ## args) ++ if (obj >= bitmap->max) { ++ obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max, ++ bitmap->reserve, cnt, 0); ++ } + -+/*-------------------------------------------------------*/ ++ if (obj < bitmap->max) { ++ bitmap_set(bitmap->table, obj, cnt); + -+/** 使用 D(), 以åè¿›åˆ¶çš„å½¢å¼æ‰“å°å˜é‡ 'var' çš„ value. */ -+#define D_DEC(var) D(#var " = %d.", var) ++ bitmap->last = (obj + cnt); ++ if (bitmap->last >= bitmap->max) { ++ bitmap->last = bitmap->reserve; ++ } ++ } else { ++ obj = -1; ++ } + -+#define E_DEC(var) E(#var " = %d.", var) ++ if (obj != -1) { ++ bitmap->avail -= cnt; ++ } + -+/** 使用 D(), 以åå…­è¿›åˆ¶çš„å½¢å¼æ‰“å°å˜é‡ 'var' çš„ value. */ -+#define D_HEX(var) D(#var " = 0x%x.", var) ++ _mali_osk_spinlock_unlock(bitmap->lock); + -+#define E_HEX(var) E(#var " = 0x%x.", var) ++ return obj; ++} + -+/** -+ * 使用 D(), 以å六进制的形å¼, -+ * æ‰“å°æŒ‡é’ˆç±»åž‹å˜é‡ 'ptr' çš„ value. -+ */ -+#define D_PTR(ptr) D(#ptr " = %p.", ptr) ++u32 _mali_osk_bitmap_avail(struct _mali_osk_bitmap *bitmap) ++{ ++ MALI_DEBUG_ASSERT_POINTER(bitmap); + -+#define E_PTR(ptr) E(#ptr " = %p.", ptr) ++ return bitmap->avail; ++} + -+/** 使用 D(), æ‰“å° char 字串. */ -+#define D_STR(p_str) \ -+do { \ -+ if (!p_str) { \ -+ D(#p_str " = NULL."); \ -+ else \ -+ D(#p_str " = '%s'.", p_str); \ -+} while (0) ++void _mali_osk_bitmap_free_range(struct _mali_osk_bitmap *bitmap, u32 obj, int cnt) ++{ ++ MALI_DEBUG_ASSERT_POINTER(bitmap); + -+#define E_STR(p_str) \ -+do { \ -+ if (!p_str) \ -+ E(#p_str " = NULL."); \ -+ else \ -+ E(#p_str " = '%s'.", p_str); \ -+} while (0) ++ _mali_osk_spinlock_lock(bitmap->lock); ++ bitmap_clear(bitmap->table, obj, cnt); ++ bitmap->last = min(bitmap->last, obj); + -+#ifdef ENABLE_DEBUG_LOG -+/** -+ * log 从 'p_start' 地å€å¼€å§‹çš„ 'len' 个字节的数æ®. -+ */ -+#define D_MEM(p_start, len) \ -+do { \ -+ int i = 0; \ -+ char *p = (char *)(p_start); \ -+ D("dump memory from addr of '" #p_start "', from %p, length %d' : ", \ -+ (p_start), \ -+ (len)); \ -+ pr_debug("\t\t"); \ -+ for (i = 0; i < (len); i++) \ -+ pr_debug("0x%02x, ", p[i]); \ -+ pr_debug("\n"); \ -+} while (0) -+#else -+#define D_MEM(...) ((void)0) -+#endif ++ bitmap->avail += cnt; ++ _mali_osk_spinlock_unlock(bitmap->lock); ++} + -+/*-------------------------------------------------------*/ ++int _mali_osk_bitmap_init(struct _mali_osk_bitmap *bitmap, u32 num, u32 reserve) ++{ ++ MALI_DEBUG_ASSERT_POINTER(bitmap); ++ MALI_DEBUG_ASSERT(reserve <= num); + -+/** -+ * 在特定æ¡ä»¶ä¸‹, 判定 error å‘生, -+ * å°†å˜é‡ 'ret_var' 设置 'err_code', -+ * log 输出对应的 Error Caution, -+ * ç„¶åŽè·³è½¬ 'label' 指定的代ç å¤„执行. -+ * @param msg -+ * 纯字串形å¼çš„æç¤ºä¿¡æ¯. -+ * @param ret_var -+ * æ ‡è¯†å‡½æ•°æ‰§è¡ŒçŠ¶æ€æˆ–者结果的å˜é‡, -+ * 将被设置具体的 Error Code. -+ * 通常是 'ret' or 'result'. -+ * @param err_code -+ * 表å¾ç‰¹å®š error 的常数标识, -+ * 通常是 å®çš„å½¢æ€. -+ * @param label -+ * 程åºå°†è¦è·³è½¬åˆ°çš„错误处ç†ä»£ç çš„æ ‡å·, -+ * 通常就是 'EXIT'. -+ * @param args... -+ * 对应 'msg_fmt' 实å‚中, -+ * '%s', '%d', ... 等转æ¢è¯´æ˜Žç¬¦çš„具体å¯å˜é•¿å®žå‚. -+ */ -+#define SET_ERROR_AND_JUMP(msg_fmt, ret_var, err_code, label, args...) \ -+do { \ -+ E("To set '" #ret_var "' to %d('" #err_code "'), because : " msg_fmt, \ -+ (err_code), \ -+ ## args); \ -+ (ret_var) = (err_code); \ -+ goto label; \ -+} while (0) ++ bitmap->reserve = reserve; ++ bitmap->last = reserve; ++ bitmap->max = num; ++ bitmap->avail = num - reserve; ++ bitmap->lock = _mali_osk_spinlock_init(_MALI_OSK_LOCKFLAG_UNORDERED, _MALI_OSK_LOCK_ORDER_FIRST); ++ if (!bitmap->lock) { ++ return _MALI_OSK_ERR_NOMEM; ++ } ++ bitmap->table = kzalloc(BITS_TO_LONGS(bitmap->max) * ++ sizeof(long), GFP_KERNEL); ++ if (!bitmap->table) { ++ _mali_osk_spinlock_term(bitmap->lock); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+/* ----------------------------------------------------------------------------- -+ * Types and Structures Definition -+ * ----------------------------------------------------------------------------- -+ */ ++ return _MALI_OSK_ERR_OK; ++} + -+/* ----------------------------------------------------------------------------- -+ * Global Functions' Prototype -+ * ----------------------------------------------------------------------------- -+ */ ++void _mali_osk_bitmap_term(struct _mali_osk_bitmap *bitmap) ++{ ++ MALI_DEBUG_ASSERT_POINTER(bitmap); + -+/* ----------------------------------------------------------------------------- -+ * Inline Functions Implementation -+ * ----------------------------------------------------------------------------- -+ */ ++ if (NULL != bitmap->lock) { ++ _mali_osk_spinlock_term(bitmap->lock); ++ } + -+#ifdef __cplusplus ++ if (NULL != bitmap->table) { ++ kfree(bitmap->table); ++ } +} -+#endif + -+#endif /* __CUSTOM_LOG_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/platform/rk/rk.c b/drivers/gpu/arm/mali400/mali/platform/rk/rk.c +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_irq.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_irq.c new file mode 100644 -index 000000000..c937e4678 +index 000000000..5c8b9ceab --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/platform/rk/rk.c -@@ -0,0 +1,726 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_irq.c +@@ -0,0 +1,200 @@ +/* -+ * (C) COPYRIGHT RockChip Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +/** -+ * @file rk.c -+ * implementation of platform_specific_code on rk platforms, such as rk3328h. -+ * -+ * mali_device_driver(MDD) includes 2 parts : -+ * .DP : platform_dependent_part : -+ * located in /mali/platform// -+ * .DP : common_part : -+ * common part implemented by ARM. ++ * @file mali_osk_irq.c ++ * Implementation of the OS abstraction layer for the kernel device driver + */ + -+#define ENABLE_DEBUG_LOG -+#include "custom_log.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#ifdef CONFIG_PM -+#include -+#endif -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++#include /* For memory allocation */ ++#include ++#include ++#include + -+#include ++#include "mali_osk.h" +#include "mali_kernel_common.h" -+#include "../../common/mali_osk_mali.h" + -+/*---------------------------------------------------------------------------*/ -+ -+u32 mali_group_error; ++typedef struct _mali_osk_irq_t_struct { ++ u32 irqnum; ++ void *data; ++ _mali_osk_irq_uhandler_t uhandler; ++} mali_osk_irq_object_t; + -+/*---------------------------------------------------------------------------*/ ++typedef irqreturn_t (*irq_handler_func_t)(int, void *, struct pt_regs *); ++static irqreturn_t irq_handler_upper_half(int port_name, void *dev_id); /* , struct pt_regs *regs*/ + -+#define DEFAULT_UTILISATION_PERIOD_IN_MS (100) ++#if defined(DEBUG) + -+/* -+ * rk_platform_context_of_mali_device. -+ */ -+struct rk_context { -+ /* mali device. */ -+ struct device *dev; -+ /* is the GPU powered on? */ -+ bool is_powered; -+ /* debug only, the period in ms to count gpu_utilisation. */ -+ unsigned int utilisation_period; ++struct test_interrupt_data { ++ _mali_osk_irq_ack_t ack_func; ++ void *probe_data; ++ mali_bool interrupt_received; ++ wait_queue_head_t wq; +}; + -+struct rk_context *s_rk_context; -+ -+/*---------------------------------------------------------------------------*/ -+ -+#ifdef CONFIG_MALI_DEVFREQ -+static ssize_t utilisation_period_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) ++static irqreturn_t test_interrupt_upper_half(int port_name, void *dev_id) +{ -+ struct rk_context *platform = s_rk_context; -+ ssize_t ret = 0; ++ irqreturn_t ret = IRQ_NONE; ++ struct test_interrupt_data *data = (struct test_interrupt_data *)dev_id; + -+ ret += snprintf(buf, PAGE_SIZE, "%u\n", platform->utilisation_period); ++ if (_MALI_OSK_ERR_OK == data->ack_func(data->probe_data)) { ++ data->interrupt_received = MALI_TRUE; ++ wake_up(&data->wq); ++ ret = IRQ_HANDLED; ++ } + + return ret; +} + -+static ssize_t utilisation_period_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, -+ size_t count) ++static _mali_osk_errcode_t test_interrupt(u32 irqnum, ++ _mali_osk_irq_trigger_t trigger_func, ++ _mali_osk_irq_ack_t ack_func, ++ void *probe_data, ++ const char *description) +{ -+ struct rk_context *platform = s_rk_context; -+ int ret = 0; ++ unsigned long irq_flags = 0; ++ struct test_interrupt_data data = { ++ .ack_func = ack_func, ++ .probe_data = probe_data, ++ .interrupt_received = MALI_FALSE, ++ }; + -+ ret = kstrtouint(buf, 0, &platform->utilisation_period); -+ if (ret) { -+ E("invalid input period : %s.", buf); -+ return ret; ++#if defined(CONFIG_MALI_SHARED_INTERRUPTS) ++ irq_flags |= IRQF_SHARED; ++#endif /* defined(CONFIG_MALI_SHARED_INTERRUPTS) */ ++ ++ if (0 != request_irq(irqnum, test_interrupt_upper_half, irq_flags, description, &data)) { ++ MALI_DEBUG_PRINT(2, ("Unable to install test IRQ handler for core '%s'\n", description)); ++ return _MALI_OSK_ERR_FAULT; + } -+ D("set utilisation_period to '%d'.", platform->utilisation_period); + -+ return count; ++ init_waitqueue_head(&data.wq); ++ ++ trigger_func(probe_data); ++ wait_event_timeout(data.wq, data.interrupt_received, 100); ++ ++ free_irq(irqnum, &data); ++ ++ if (data.interrupt_received) { ++ MALI_DEBUG_PRINT(3, ("%s: Interrupt test OK\n", description)); ++ return _MALI_OSK_ERR_OK; ++ } else { ++ MALI_PRINT_ERROR(("%s: Failed interrupt test on %u\n", description, irqnum)); ++ return _MALI_OSK_ERR_FAULT; ++ } +} + -+static ssize_t utilisation_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) ++#endif /* defined(DEBUG) */ ++ ++_mali_osk_irq_t *_mali_osk_irq_init(u32 irqnum, _mali_osk_irq_uhandler_t uhandler, void *int_data, _mali_osk_irq_trigger_t trigger_func, _mali_osk_irq_ack_t ack_func, void *probe_data, const char *description) +{ -+ struct rk_context *platform = s_rk_context; -+ struct mali_device *mdev = dev_get_drvdata(dev); -+ ssize_t ret = 0; -+ unsigned long period_in_us = platform->utilisation_period * 1000; -+ unsigned long total_time; -+ unsigned long busy_time; -+ unsigned long utilisation; ++ mali_osk_irq_object_t *irq_object; ++ unsigned long irq_flags = 0; + -+ mali_pm_reset_dvfs_utilisation(mdev); -+ usleep_range(period_in_us, period_in_us + 100); -+ mali_pm_get_dvfs_utilisation(mdev, &total_time, &busy_time); ++#if defined(CONFIG_MALI_SHARED_INTERRUPTS) ++ irq_flags |= IRQF_SHARED; ++#endif /* defined(CONFIG_MALI_SHARED_INTERRUPTS) */ + -+ /* 'devfreq_dev_profile' instance registered to devfreq -+ * also uses mali_pm_reset_dvfs_utilisation() -+ * and mali_pm_get_dvfs_utilisation(). -+ * So, it's better to disable GPU DVFS before reading this node. -+ */ -+ D("total_time : %lu, busy_time : %lu.", total_time, busy_time); ++ irq_object = kmalloc(sizeof(mali_osk_irq_object_t), GFP_KERNEL); ++ if (NULL == irq_object) { ++ return NULL; ++ } + -+ utilisation = busy_time / (total_time / 100); -+ ret += snprintf(buf, PAGE_SIZE, "%lu\n", utilisation); ++ if (-1 == irqnum) { ++ /* Probe for IRQ */ ++ if ((NULL != trigger_func) && (NULL != ack_func)) { ++ unsigned long probe_count = 3; ++ _mali_osk_errcode_t err; ++ int irq; + -+ return ret; -+} ++ MALI_DEBUG_PRINT(2, ("Probing for irq\n")); + -+static DEVICE_ATTR_RW(utilisation_period); -+static DEVICE_ATTR_RO(utilisation); -+#endif ++ do { ++ unsigned long mask; + -+static int rk_context_create_sysfs_files(struct device *dev) -+{ -+#ifdef CONFIG_MALI_DEVFREQ -+ int ret; ++ mask = probe_irq_on(); ++ trigger_func(probe_data); + -+ ret = device_create_file(dev, &dev_attr_utilisation_period); -+ if (ret) { -+ E("fail to create sysfs file 'utilisation_period'."); -+ goto out; -+ } ++ _mali_osk_time_ubusydelay(5); + -+ ret = device_create_file(dev, &dev_attr_utilisation); -+ if (ret) { -+ E("fail to create sysfs file 'utilisation'."); -+ goto remove_utilisation_period; ++ irq = probe_irq_off(mask); ++ err = ack_func(probe_data); ++ } while (irq < 0 && (err == _MALI_OSK_ERR_OK) && probe_count--); ++ ++ if (irq < 0 || (_MALI_OSK_ERR_OK != err)) irqnum = -1; ++ else irqnum = irq; ++ } else irqnum = -1; /* no probe functions, fault */ ++ ++ if (-1 != irqnum) { ++ /* found an irq */ ++ MALI_DEBUG_PRINT(2, ("Found irq %d\n", irqnum)); ++ } else { ++ MALI_DEBUG_PRINT(2, ("Probe for irq failed\n")); ++ } + } + -+ return 0; ++ irq_object->irqnum = irqnum; ++ irq_object->uhandler = uhandler; ++ irq_object->data = int_data; + -+remove_utilisation_period: -+ device_remove_file(dev, &dev_attr_utilisation_period); -+out: -+ return ret; -+#else -+ return 0; ++ if (-1 == irqnum) { ++ MALI_DEBUG_PRINT(2, ("No IRQ for core '%s' found during probe\n", description)); ++ kfree(irq_object); ++ return NULL; ++ } ++ ++#if defined(DEBUG) ++ /* Verify that the configured interrupt settings are working */ ++ if (_MALI_OSK_ERR_OK != test_interrupt(irqnum, trigger_func, ack_func, probe_data, description)) { ++ MALI_DEBUG_PRINT(2, ("Test of IRQ(%d) handler for core '%s' failed\n", irqnum, description)); ++ kfree(irq_object); ++ return NULL; ++ } +#endif ++ ++ if (0 != request_irq(irqnum, irq_handler_upper_half, irq_flags, description, irq_object)) { ++ MALI_DEBUG_PRINT(2, ("Unable to install IRQ handler for core '%s'\n", description)); ++ kfree(irq_object); ++ return NULL; ++ } ++ ++ return irq_object; +} + -+static void rk_context_remove_sysfs_files(struct device *dev) ++void _mali_osk_irq_term(_mali_osk_irq_t *irq) +{ -+#ifdef CONFIG_MALI_DEVFREQ -+ device_remove_file(dev, &dev_attr_utilisation_period); -+ device_remove_file(dev, &dev_attr_utilisation); -+#endif ++ mali_osk_irq_object_t *irq_object = (mali_osk_irq_object_t *)irq; ++ free_irq(irq_object->irqnum, irq_object); ++ kfree(irq_object); +} + -+/*---------------------------------------------------------------------------*/ + -+/* -+ * Init rk_platform_context of mali_device. ++/** This function is called directly in interrupt context from the OS just after ++ * the CPU get the hw-irq from mali, or other devices on the same IRQ-channel. ++ * It is registered one of these function for each mali core. When an interrupt ++ * arrives this function will be called equal times as registered mali cores. ++ * That means that we only check one mali core in one function call, and the ++ * core we check for each turn is given by the \a dev_id variable. ++ * If we detect an pending interrupt on the given core, we mask the interrupt ++ * out by settging the core's IRQ_MASK register to zero. ++ * Then we schedule the mali_core_irq_handler_bottom_half to run as high priority ++ * work queue job. + */ -+static int rk_context_init(struct platform_device *pdev) ++static irqreturn_t irq_handler_upper_half(int port_name, void *dev_id) /* , struct pt_regs *regs*/ +{ -+ int ret = 0; -+ struct device *dev = &pdev->dev; -+ struct rk_context *platform; /* platform_context */ ++ irqreturn_t ret = IRQ_NONE; ++ mali_osk_irq_object_t *irq_object = (mali_osk_irq_object_t *)dev_id; + -+ platform = kzalloc(sizeof(*platform), GFP_KERNEL); -+ if (!platform) { -+ E("no mem."); -+ return _MALI_OSK_ERR_NOMEM; ++ if (_MALI_OSK_ERR_OK == irq_object->uhandler(irq_object->data)) { ++ ret = IRQ_HANDLED; + } + -+ platform->dev = dev; -+ platform->is_powered = false; -+ -+ platform->utilisation_period = DEFAULT_UTILISATION_PERIOD_IN_MS; ++ return ret; ++} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_locks.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_locks.c +new file mode 100644 +index 000000000..ed5f0b0da +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_locks.c +@@ -0,0 +1,287 @@ ++/* ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ ret = rk_context_create_sysfs_files(dev); -+ if (ret) { -+ E("fail to create sysfs files, ret = %d", ret); -+ goto EXIT; -+ } ++/** ++ * @file mali_osk_locks.c ++ * Implemenation of the OS abstraction layer for the kernel device driver ++ */ + -+ s_rk_context = platform; ++#include "mali_osk_locks.h" ++#include "mali_kernel_common.h" ++#include "mali_osk.h" + -+ pm_runtime_set_autosuspend_delay(dev, 1000); -+ pm_runtime_use_autosuspend(dev); -+ pm_runtime_enable(dev); + -+EXIT: -+ return ret; -+} ++#ifdef DEBUG ++#ifdef LOCK_ORDER_CHECKING ++static DEFINE_SPINLOCK(lock_tracking_lock); ++static mali_bool add_lock_to_log_and_check(struct _mali_osk_lock_debug_s *lock, uint32_t tid); ++static void remove_lock_from_log(struct _mali_osk_lock_debug_s *lock, uint32_t tid); ++static const char *const lock_order_to_string(_mali_osk_lock_order_t order); ++#endif /* LOCK_ORDER_CHECKING */ + -+static void rk_context_deinit(struct platform_device *pdev) ++void _mali_osk_locks_debug_init(struct _mali_osk_lock_debug_s *checker, _mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order) +{ -+ struct device *dev = &pdev->dev; -+ struct rk_context *platform = s_rk_context; -+ -+ pm_runtime_disable(dev); ++ checker->orig_flags = flags; ++ checker->owner = 0; + -+ s_rk_context = NULL; ++#ifdef LOCK_ORDER_CHECKING ++ checker->order = order; ++ checker->next = NULL; ++#endif ++} + -+ rk_context_remove_sysfs_files(dev); ++void _mali_osk_locks_debug_add(struct _mali_osk_lock_debug_s *checker) ++{ ++ checker->owner = _mali_osk_get_tid(); + -+ if (platform) { -+ platform->is_powered = false; -+ platform->dev = NULL; -+ kfree(platform); ++#ifdef LOCK_ORDER_CHECKING ++ if (!(checker->orig_flags & _MALI_OSK_LOCKFLAG_UNORDERED)) { ++ if (!add_lock_to_log_and_check(checker, _mali_osk_get_tid())) { ++ printk(KERN_ERR "%d: ERROR lock %p taken while holding a lock of a higher order.\n", ++ _mali_osk_get_tid(), checker); ++ dump_stack(); ++ } + } ++#endif +} + -+/*---------------------------------------------------------------------------*/ -+/* for devfreq cooling. */ ++void _mali_osk_locks_debug_remove(struct _mali_osk_lock_debug_s *checker) ++{ + -+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) ++#ifdef LOCK_ORDER_CHECKING ++ if (!(checker->orig_flags & _MALI_OSK_LOCKFLAG_UNORDERED)) { ++ remove_lock_from_log(checker, _mali_osk_get_tid()); ++ } ++#endif ++ checker->owner = 0; ++} + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) + -+#define FALLBACK_STATIC_TEMPERATURE 55000 ++#ifdef LOCK_ORDER_CHECKING ++/* Lock order checking ++ * ------------------- ++ * ++ * To assure that lock ordering scheme defined by _mali_osk_lock_order_t is strictly adhered to, the ++ * following function will, together with a linked list and some extra members in _mali_osk_lock_debug_s, ++ * make sure that a lock that is taken has a higher order than the current highest-order lock a ++ * thread holds. ++ * ++ * This is done in the following manner: ++ * - A linked list keeps track of locks held by a thread. ++ * - A `next' pointer is added to each lock. This is used to chain the locks together. ++ * - When taking a lock, the `add_lock_to_log_and_check' makes sure that taking ++ * the given lock is legal. It will follow the linked list to find the last ++ * lock taken by this thread. If the last lock's order was lower than the ++ * lock that is to be taken, it appends the new lock to the list and returns ++ * true, if not, it return false. This return value is assert()'ed on in ++ * _mali_osk_lock_wait(). ++ */ + -+static u32 dynamic_coefficient; -+static u32 static_coefficient; -+static s32 ts[4]; -+static struct thermal_zone_device *gpu_tz; -+static struct ipa_power_model_data *model_data; ++static struct _mali_osk_lock_debug_s *lock_lookup_list; + -+/* Calculate gpu static power example for reference */ -+static unsigned long rk_model_static_power(struct devfreq *devfreq, -+ unsigned long voltage) ++static void dump_lock_tracking_list(void) +{ -+ int temperature, temp; -+ int temp_squared, temp_cubed, temp_scaling_factor; -+ const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10; -+ unsigned long static_power; ++ struct _mali_osk_lock_debug_s *l; ++ u32 n = 1; + -+ if (gpu_tz) { -+ int ret; ++ /* print list for debugging purposes */ ++ l = lock_lookup_list; + -+ ret = gpu_tz->ops->get_temp(gpu_tz, &temperature); -+ if (ret) { -+ MALI_DEBUG_PRINT(2, ("fail to read temp: %d\n", ret)); -+ temperature = FALLBACK_STATIC_TEMPERATURE; -+ } -+ } else { -+ temperature = FALLBACK_STATIC_TEMPERATURE; ++ while (NULL != l) { ++ printk(" [lock: %p, tid_owner: %d, order: %d] ->", l, l->owner, l->order); ++ l = l->next; ++ MALI_DEBUG_ASSERT(n++ < 100); + } -+ -+ /* Calculate the temperature scaling factor. To be applied to the -+ * voltage scaled power. -+ */ -+ temp = temperature / 1000; -+ temp_squared = temp * temp; -+ temp_cubed = temp_squared * temp; -+ temp_scaling_factor = -+ (ts[3] * temp_cubed) -+ + (ts[2] * temp_squared) -+ + (ts[1] * temp) -+ + ts[0]; -+ -+ static_power = (((static_coefficient * voltage_cubed) >> 20) -+ * temp_scaling_factor) -+ / 1000000; -+ -+ return static_power; ++ printk(" NULL\n"); +} + -+/* Calculate gpu dynamic power example for reference */ -+static unsigned long rk_model_dynamic_power(struct devfreq *devfreq, -+ unsigned long freq, -+ unsigned long voltage) ++static int tracking_list_length(void) +{ -+ /* The inputs: freq (f) is in Hz, and voltage (v) in mV. -+ * The coefficient (c) is in mW/(MHz mV mV). -+ * -+ * This function calculates the dynamic power after this formula: -+ * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz) -+ */ -+ const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */ -+ const unsigned long f_mhz = freq / 1000000; /* MHz */ -+ unsigned long dynamic_power; -+ -+ dynamic_power = (dynamic_coefficient * v2 * f_mhz) / 1000000; /* mW */ ++ struct _mali_osk_lock_debug_s *l; ++ u32 n = 0; ++ l = lock_lookup_list; + -+ return dynamic_power; ++ while (NULL != l) { ++ l = l->next; ++ n++; ++ MALI_DEBUG_ASSERT(n < 100); ++ } ++ return n; +} + -+static struct devfreq_cooling_power rk_cooling_ops = { -+ .get_static_power = rk_model_static_power, -+ .get_dynamic_power = rk_model_dynamic_power, -+}; -+ -+static unsigned long mali_devfreq_get_static_power(struct devfreq *devfreq, -+ unsigned long voltage) ++static mali_bool add_lock_to_log_and_check(struct _mali_osk_lock_debug_s *lock, uint32_t tid) +{ -+ return rockchip_ipa_get_static_power(model_data, voltage); -+} ++ mali_bool ret = MALI_FALSE; ++ _mali_osk_lock_order_t highest_order_for_tid = _MALI_OSK_LOCK_ORDER_FIRST; ++ struct _mali_osk_lock_debug_s *highest_order_lock = (struct _mali_osk_lock_debug_s *)0xbeefbabe; ++ struct _mali_osk_lock_debug_s *l; ++ unsigned long local_lock_flag; ++ u32 len; + -+static int power_model_simple_init(struct platform_device *pdev) -+{ -+ struct device_node *power_model_node; -+ const char *tz_name; -+ u32 static_power, dynamic_power; -+ u32 voltage, voltage_squared, voltage_cubed, frequency; ++ spin_lock_irqsave(&lock_tracking_lock, local_lock_flag); ++ len = tracking_list_length(); + -+ if (of_find_compatible_node(pdev->dev.of_node, NULL, "simple-power-model")) { -+ of_property_read_u32(pdev->dev.of_node, -+ "dynamic-power-coefficient", -+ (u32 *)&rk_cooling_ops.dyn_power_coeff); -+ model_data = rockchip_ipa_power_model_init(&pdev->dev, -+ "gpu_leakage"); -+ if (IS_ERR_OR_NULL(model_data)) { -+ model_data = NULL; -+ dev_err(&pdev->dev, "failed to initialize power model\n"); -+ } else if (model_data->dynamic_coefficient) { -+ rk_cooling_ops.dyn_power_coeff = -+ model_data->dynamic_coefficient; -+ rk_cooling_ops.get_dynamic_power = NULL; -+ rk_cooling_ops.get_static_power = mali_devfreq_get_static_power; -+ } -+ if (!rk_cooling_ops.dyn_power_coeff) { -+ dev_err(&pdev->dev, "failed to get dynamic-coefficient\n"); -+ return -EINVAL; ++ l = lock_lookup_list; ++ if (NULL == l) { /* This is the first lock taken by this thread -- record and return true */ ++ lock_lookup_list = lock; ++ spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag); ++ return MALI_TRUE; ++ } else { ++ /* Traverse the locks taken and find the lock of the highest order. ++ * Since several threads may hold locks, each lock's owner must be ++ * checked so that locks not owned by this thread can be ignored. */ ++ for (;;) { ++ MALI_DEBUG_ASSERT_POINTER(l); ++ if (tid == l->owner && l->order >= highest_order_for_tid) { ++ highest_order_for_tid = l->order; ++ highest_order_lock = l; ++ } ++ ++ if (NULL != l->next) { ++ l = l->next; ++ } else { ++ break; ++ } + } + -+ return 0; ++ l->next = lock; ++ l->next = NULL; + } + -+ power_model_node = of_get_child_by_name(pdev->dev.of_node, -+ "power_model"); -+ if (!power_model_node) { -+ dev_err(&pdev->dev, "could not find power_model node\n"); -+ return -ENODEV; -+ } -+ if (!of_device_is_compatible(power_model_node, -+ "arm,mali-simple-power-model")) { -+ dev_err(&pdev->dev, "power_model incompatible with simple power model\n"); -+ return -ENODEV; -+ } ++ /* We have now found the highest order lock currently held by this thread and can see if it is ++ * legal to take the requested lock. */ ++ ret = highest_order_for_tid < lock->order; + -+ if (of_property_read_string(power_model_node, "thermal-zone", -+ &tz_name)) { -+ dev_err(&pdev->dev, "ts in power_model not available\n"); -+ return -EINVAL; ++ if (!ret) { ++ printk(KERN_ERR "Took lock of order %d (%s) while holding lock of order %d (%s)\n", ++ lock->order, lock_order_to_string(lock->order), ++ highest_order_for_tid, lock_order_to_string(highest_order_for_tid)); ++ dump_lock_tracking_list(); + } + -+ gpu_tz = thermal_zone_get_zone_by_name(tz_name); -+ if (IS_ERR(gpu_tz)) { -+ pr_warn_ratelimited("Error getting gpu thermal zone '%s'(%ld), not yet ready?\n", -+ tz_name, -+ PTR_ERR(gpu_tz)); -+ gpu_tz = NULL; ++ if (len + 1 != tracking_list_length()) { ++ printk(KERN_ERR "************ lock: %p\n", lock); ++ printk(KERN_ERR "************ before: %d *** after: %d ****\n", len, tracking_list_length()); ++ dump_lock_tracking_list(); ++ MALI_DEBUG_ASSERT_POINTER(NULL); + } + -+ if (of_property_read_u32(power_model_node, "static-power", -+ &static_power)) { -+ dev_err(&pdev->dev, "static-power in power_model not available\n"); -+ return -EINVAL; -+ } -+ if (of_property_read_u32(power_model_node, "dynamic-power", -+ &dynamic_power)) { -+ dev_err(&pdev->dev, "dynamic-power in power_model not available\n"); -+ return -EINVAL; -+ } -+ if (of_property_read_u32(power_model_node, "voltage", -+ &voltage)) { -+ dev_err(&pdev->dev, "voltage in power_model not available\n"); -+ return -EINVAL; -+ } -+ if (of_property_read_u32(power_model_node, "frequency", -+ &frequency)) { -+ dev_err(&pdev->dev, "frequency in power_model not available\n"); -+ return -EINVAL; -+ } -+ voltage_squared = (voltage * voltage) / 1000; -+ voltage_cubed = voltage * voltage * voltage; -+ static_coefficient = (static_power << 20) / (voltage_cubed >> 10); -+ dynamic_coefficient = (((dynamic_power * 1000) / voltage_squared) -+ * 1000) / frequency; ++ spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag); ++ return ret; ++} + -+ if (of_property_read_u32_array(power_model_node, "ts", (u32 *)ts, 4)) { -+ dev_err(&pdev->dev, "ts in power_model not available\n"); -+ return -EINVAL; ++static void remove_lock_from_log(struct _mali_osk_lock_debug_s *lock, uint32_t tid) ++{ ++ struct _mali_osk_lock_debug_s *curr; ++ struct _mali_osk_lock_debug_s *prev = NULL; ++ unsigned long local_lock_flag; ++ u32 len; ++ u32 n = 0; ++ ++ spin_lock_irqsave(&lock_tracking_lock, local_lock_flag); ++ len = tracking_list_length(); ++ curr = lock_lookup_list; ++ ++ if (NULL == curr) { ++ printk(KERN_ERR "Error: Lock tracking list was empty on call to remove_lock_from_log\n"); ++ dump_lock_tracking_list(); + } + -+ return 0; -+} ++ MALI_DEBUG_ASSERT_POINTER(curr); + -+#endif + -+#endif ++ while (lock != curr) { ++ prev = curr; + -+/*---------------------------------------------------------------------------*/ ++ MALI_DEBUG_ASSERT_POINTER(curr); ++ curr = curr->next; ++ MALI_DEBUG_ASSERT(n++ < 100); ++ } + -+#ifdef CONFIG_PM ++ if (NULL == prev) { ++ lock_lookup_list = curr->next; ++ } else { ++ MALI_DEBUG_ASSERT_POINTER(curr); ++ MALI_DEBUG_ASSERT_POINTER(prev); ++ prev->next = curr->next; ++ } + -+static int rk_platform_enable_clk_gpu(struct device *dev) -+{ -+ int ret = 0; -+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_HAVE_CLK) -+ struct mali_device *mdev = dev_get_drvdata(dev); ++ lock->next = NULL; + -+ ret = clk_bulk_enable(mdev->num_clks, mdev->clks); -+#endif -+ return ret; ++ if (len - 1 != tracking_list_length()) { ++ printk(KERN_ERR "************ lock: %p\n", lock); ++ printk(KERN_ERR "************ before: %d *** after: %d ****\n", len, tracking_list_length()); ++ dump_lock_tracking_list(); ++ MALI_DEBUG_ASSERT_POINTER(NULL); ++ } ++ ++ spin_unlock_irqrestore(&lock_tracking_lock, local_lock_flag); +} + -+static void rk_platform_disable_clk_gpu(struct device *dev) ++static const char *const lock_order_to_string(_mali_osk_lock_order_t order) +{ -+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_HAVE_CLK) -+ struct mali_device *mdev = dev_get_drvdata(dev); -+ -+ clk_bulk_disable(mdev->num_clks, mdev->clks); -+#endif ++ switch (order) { ++ case _MALI_OSK_LOCK_ORDER_SESSIONS: ++ return "_MALI_OSK_LOCK_ORDER_SESSIONS"; ++ break; ++ case _MALI_OSK_LOCK_ORDER_MEM_SESSION: ++ return "_MALI_OSK_LOCK_ORDER_MEM_SESSION"; ++ break; ++ case _MALI_OSK_LOCK_ORDER_MEM_INFO: ++ return "_MALI_OSK_LOCK_ORDER_MEM_INFO"; ++ break; ++ case _MALI_OSK_LOCK_ORDER_MEM_PT_CACHE: ++ return "_MALI_OSK_LOCK_ORDER_MEM_PT_CACHE"; ++ break; ++ case _MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP: ++ return "_MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP"; ++ break; ++ case _MALI_OSK_LOCK_ORDER_PM_EXECUTION: ++ return "_MALI_OSK_LOCK_ORDER_PM_EXECUTION"; ++ break; ++ case _MALI_OSK_LOCK_ORDER_EXECUTOR: ++ return "_MALI_OSK_LOCK_ORDER_EXECUTOR"; ++ break; ++ case _MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM: ++ return "_MALI_OSK_LOCK_ORDER_TIMELINE_SYSTEM"; ++ break; ++ case _MALI_OSK_LOCK_ORDER_SCHEDULER: ++ return "_MALI_OSK_LOCK_ORDER_SCHEDULER"; ++ break; ++ case _MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED: ++ return "_MALI_OSK_LOCK_ORDER_SCHEDULER_DEFERRED"; ++ break; ++ case _MALI_OSK_LOCK_ORDER_DMA_COMMAND: ++ return "_MALI_OSK_LOCK_ORDER_DMA_COMMAND"; ++ break; ++ case _MALI_OSK_LOCK_ORDER_PROFILING: ++ return "_MALI_OSK_LOCK_ORDER_PROFILING"; ++ break; ++ case _MALI_OSK_LOCK_ORDER_L2: ++ return "_MALI_OSK_LOCK_ORDER_L2"; ++ break; ++ case _MALI_OSK_LOCK_ORDER_L2_COMMAND: ++ return "_MALI_OSK_LOCK_ORDER_L2_COMMAND"; ++ break; ++ case _MALI_OSK_LOCK_ORDER_UTILIZATION: ++ return "_MALI_OSK_LOCK_ORDER_UTILIZATION"; ++ break; ++ case _MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS: ++ return "_MALI_OSK_LOCK_ORDER_SESSION_PENDING_JOBS"; ++ break; ++ case _MALI_OSK_LOCK_ORDER_PM_STATE: ++ return "_MALI_OSK_LOCK_ORDER_PM_STATE"; ++ break; ++ default: ++ return ""; ++ } +} ++#endif /* LOCK_ORDER_CHECKING */ ++#endif /* DEBUG */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_locks.h b/drivers/gpu/arm/mali400/mali/linux/mali_osk_locks.h +new file mode 100644 +index 000000000..6fd5af952 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_locks.h +@@ -0,0 +1,326 @@ ++/* ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+static int rk_platform_enable_gpu_regulator(struct device *dev) -+{ -+ int ret = 0; -+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_REGULATOR) -+ struct mali_device *mdev = dev_get_drvdata(dev); ++/** ++ * @file mali_osk_locks.h ++ * Defines OS abstraction of lock and mutex ++ */ ++#ifndef _MALI_OSK_LOCKS_H ++#define _MALI_OSK_LOCKS_H + -+ if (mdev->regulator) -+ ret = regulator_enable(mdev->regulator); -+#endif -+ return ret; -+} ++#include ++#include ++#include + -+static void rk_platform_disable_gpu_regulator(struct device *dev) -+{ -+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_REGULATOR) -+ struct mali_device *mdev = dev_get_drvdata(dev); ++#include + -+ if (mdev->regulator) -+ regulator_disable(mdev->regulator); ++#include "mali_osk_types.h" ++ ++#ifdef _cplusplus ++extern "C" { +#endif -+} + -+static int rk_platform_power_on_gpu(struct device *dev) -+{ -+ struct rk_context *platform = s_rk_context; -+ int ret = 0; ++ /* When DEBUG is enabled, this struct will be used to track owner, mode and order checking */ ++#ifdef DEBUG ++ struct _mali_osk_lock_debug_s { ++ u32 owner; ++ _mali_osk_lock_flags_t orig_flags; ++ _mali_osk_lock_order_t order; ++ struct _mali_osk_lock_debug_s *next; ++ }; ++#endif + -+ if (!(platform->is_powered)) { -+ ret = rk_platform_enable_clk_gpu(dev); -+ if (ret) { -+ E("fail to enable clk_gpu, ret : %d.", ret); -+ goto fail_to_enable_clk; -+ } ++ /* Anstraction of spinlock_t */ ++ struct _mali_osk_spinlock_s { ++#ifdef DEBUG ++ struct _mali_osk_lock_debug_s checker; ++#endif ++ spinlock_t spinlock; ++ }; + -+ ret = rk_platform_enable_gpu_regulator(dev); -+ if (ret) { -+ E("fail to enable vdd_gpu, ret : %d.", ret); -+ goto fail_to_enable_regulator; -+ } ++ /* Abstration of spinlock_t and lock flag which is used to store register's state before locking */ ++ struct _mali_osk_spinlock_irq_s { ++#ifdef DEBUG ++ struct _mali_osk_lock_debug_s checker; ++#endif + -+ if (cpu_is_rk3528()) { -+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_HAVE_CLK) -+ struct mali_device *mdev = dev_get_drvdata(dev); ++ spinlock_t spinlock; ++ unsigned long flags; ++ }; + -+ clk_set_rate(mdev->clock, mdev->current_freq); ++ /* Abstraction of rw_semaphore in OS */ ++ struct _mali_osk_mutex_rw_s { ++#ifdef DEBUG ++ struct _mali_osk_lock_debug_s checker; ++ _mali_osk_lock_mode_t mode; +#endif -+ } -+ platform->is_powered = true; -+ } + -+ return 0; ++ struct rw_semaphore rw_sema; ++ }; + -+fail_to_enable_regulator: -+ rk_platform_disable_clk_gpu(dev); ++ /* Mutex and mutex_interruptible functions share the same osk mutex struct */ ++ struct _mali_osk_mutex_s { ++#ifdef DEBUG ++ struct _mali_osk_lock_debug_s checker; ++#endif ++ struct mutex mutex; ++ }; + -+fail_to_enable_clk: -+ return ret; -+} ++#ifdef DEBUG ++ /** @brief _mali_osk_locks_debug_init/add/remove() functions are declared when DEBUG is enabled and ++ * defined in file mali_osk_locks.c. When LOCK_ORDER_CHECKING is enabled, calling these functions when we ++ * init/lock/unlock a lock/mutex, we could track lock order of a given tid. */ ++ void _mali_osk_locks_debug_init(struct _mali_osk_lock_debug_s *checker, _mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order); ++ void _mali_osk_locks_debug_add(struct _mali_osk_lock_debug_s *checker); ++ void _mali_osk_locks_debug_remove(struct _mali_osk_lock_debug_s *checker); + -+static void rk_platform_power_off_gpu(struct device *dev) -+{ -+ struct rk_context *platform = s_rk_context; ++ /** @brief This function can return a given lock's owner when DEBUG is enabled. */ ++ static inline u32 _mali_osk_lock_get_owner(struct _mali_osk_lock_debug_s *lock) ++ { ++ return lock->owner; ++ } ++#else ++#define _mali_osk_locks_debug_init(x, y, z) do {} while (0) ++#define _mali_osk_locks_debug_add(x) do {} while (0) ++#define _mali_osk_locks_debug_remove(x) do {} while (0) ++#endif + -+ if (platform->is_powered) { -+ if (cpu_is_rk3528()) { -+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_HAVE_CLK) -+ struct mali_device *mdev = dev_get_drvdata(dev); ++ /** @brief Before use _mali_osk_spin_lock, init function should be used to allocate memory and initial spinlock*/ ++ static inline _mali_osk_spinlock_t *_mali_osk_spinlock_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order) ++ { ++ _mali_osk_spinlock_t *lock = NULL; + -+ //use normal pll 200M for gpu when suspend -+ clk_set_rate(mdev->clock, 200000000); -+#endif ++ lock = kmalloc(sizeof(_mali_osk_spinlock_t), GFP_KERNEL); ++ if (NULL == lock) { ++ return NULL; + } -+ rk_platform_disable_clk_gpu(dev); -+ rk_platform_disable_gpu_regulator(dev); ++ spin_lock_init(&lock->spinlock); ++ _mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order); ++ return lock; ++ } + -+ platform->is_powered = false; ++ /** @brief Lock a spinlock */ ++ static inline void _mali_osk_spinlock_lock(_mali_osk_spinlock_t *lock) ++ { ++ BUG_ON(NULL == lock); ++ spin_lock(&lock->spinlock); ++ _mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock); + } -+} + -+int rk_platform_init_opp_table(struct mali_device *mdev) -+{ -+ return rockchip_init_opp_table(mdev->dev, &mdev->opp_info, -+ "clk_mali", "mali"); -+} ++ /** @brief Unlock a spinlock */ ++ static inline void _mali_osk_spinlock_unlock(_mali_osk_spinlock_t *lock) ++ { ++ BUG_ON(NULL == lock); ++ _mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock); ++ spin_unlock(&lock->spinlock); ++ } + -+void rk_platform_uninit_opp_table(struct mali_device *mdev) -+{ -+ rockchip_uninit_opp_table(mdev->dev, &mdev->opp_info); -+} ++ /** @brief Free a memory block which the argument lock pointed to and its type must be ++ * _mali_osk_spinlock_t *. */ ++ static inline void _mali_osk_spinlock_term(_mali_osk_spinlock_t *lock) ++ { ++ /* Parameter validation */ ++ BUG_ON(NULL == lock); + -+static int mali_runtime_suspend(struct device *device) -+{ -+ int ret = 0; ++ /* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */ ++ kfree(lock); ++ } + -+ MALI_DEBUG_PRINT(4, ("mali_runtime_suspend() called\n")); ++ /** @brief Before _mali_osk_spinlock_irq_lock/unlock/term() is called, init function should be ++ * called to initial spinlock and flags in struct _mali_osk_spinlock_irq_t. */ ++ static inline _mali_osk_spinlock_irq_t *_mali_osk_spinlock_irq_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order) ++ { ++ _mali_osk_spinlock_irq_t *lock = NULL; ++ lock = kmalloc(sizeof(_mali_osk_spinlock_irq_t), GFP_KERNEL); + -+ if (device->driver && -+ device->driver->pm && -+ device->driver->pm->runtime_suspend) { -+ /* Need to notify Mali driver about this event */ -+ ret = device->driver->pm->runtime_suspend(device); -+ } ++ if (NULL == lock) { ++ return NULL; ++ } + -+ if (!ret) -+ rk_platform_power_off_gpu(device); ++ lock->flags = 0; ++ spin_lock_init(&lock->spinlock); ++ _mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order); ++ return lock; ++ } + -+ return ret; -+} ++ /** @brief Lock spinlock and save the register's state */ ++ static inline void _mali_osk_spinlock_irq_lock(_mali_osk_spinlock_irq_t *lock) ++ { ++ unsigned long tmp_flags; + -+static int mali_runtime_resume(struct device *device) -+{ -+ int ret = 0; ++ BUG_ON(NULL == lock); ++ spin_lock_irqsave(&lock->spinlock, tmp_flags); ++ lock->flags = tmp_flags; ++ _mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock); ++ } + -+ MALI_DEBUG_PRINT(4, ("mali_runtime_resume() called\n")); ++ /** @brief Unlock spinlock with saved register's state */ ++ static inline void _mali_osk_spinlock_irq_unlock(_mali_osk_spinlock_irq_t *lock) ++ { ++ BUG_ON(NULL == lock); ++ _mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock); ++ spin_unlock_irqrestore(&lock->spinlock, lock->flags); ++ } + -+ rk_platform_power_on_gpu(device); ++ /** @brief Destroy a given memory block which lock pointed to, and the lock type must be ++ * _mali_osk_spinlock_irq_t *. */ ++ static inline void _mali_osk_spinlock_irq_term(_mali_osk_spinlock_irq_t *lock) ++ { ++ /* Parameter validation */ ++ BUG_ON(NULL == lock); + -+ if (device->driver && -+ device->driver->pm && -+ device->driver->pm->runtime_resume) { -+ /* Need to notify Mali driver about this event */ -+ ret = device->driver->pm->runtime_resume(device); ++ /* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */ ++ kfree(lock); + } + -+ return ret; -+} ++ /** @brief Before _mali_osk_mutex_rw_wait/signal/term() is called, we should call ++ * _mali_osk_mutex_rw_init() to kmalloc a memory block and initial part of elements in it. */ ++ static inline _mali_osk_mutex_rw_t *_mali_osk_mutex_rw_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order) ++ { ++ _mali_osk_mutex_rw_t *lock = NULL; + -+static int mali_runtime_idle(struct device *device) -+{ -+ int ret = 0; ++ lock = kmalloc(sizeof(_mali_osk_mutex_rw_t), GFP_KERNEL); + -+ MALI_DEBUG_PRINT(4, ("mali_runtime_idle() called\n")); ++ if (NULL == lock) { ++ return NULL; ++ } + -+ if (device->driver && -+ device->driver->pm && -+ device->driver->pm->runtime_idle) { -+ /* Need to notify Mali driver about this event */ -+ ret = device->driver->pm->runtime_idle(device); -+ if (ret) -+ return ret; ++ init_rwsem(&lock->rw_sema); ++ _mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order); ++ return lock; + } + -+ return 0; -+} -+#endif ++ /** @brief When call _mali_osk_mutex_rw_wait/signal() functions, the second argument mode ++ * should be assigned with value _MALI_OSK_LOCKMODE_RO or _MALI_OSK_LOCKMODE_RW */ ++ static inline void _mali_osk_mutex_rw_wait(_mali_osk_mutex_rw_t *lock, _mali_osk_lock_mode_t mode) ++ { ++ BUG_ON(NULL == lock); ++ BUG_ON(!(_MALI_OSK_LOCKMODE_RO == mode || _MALI_OSK_LOCKMODE_RW == mode)); + -+static int mali_os_suspend(struct device *device) -+{ -+ int ret = 0; ++ if (mode == _MALI_OSK_LOCKMODE_RO) { ++ down_read(&lock->rw_sema); ++ } else { ++ down_write(&lock->rw_sema); ++ } + -+ MALI_DEBUG_PRINT(4, ("mali_os_suspend() called\n")); ++#ifdef DEBUG ++ if (mode == _MALI_OSK_LOCKMODE_RW) { ++ lock->mode = mode; ++ } else { /* mode == _MALI_OSK_LOCKMODE_RO */ ++ lock->mode = mode; ++ } ++ _mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock); ++#endif ++ } + -+ if (device->driver && -+ device->driver->pm && -+ device->driver->pm->suspend) { -+ /* Need to notify Mali driver about this event */ -+ ret = device->driver->pm->suspend(device); ++ /** @brief Up lock->rw_sema with up_read/write() accordinf argument mode's value. */ ++ static inline void _mali_osk_mutex_rw_signal(_mali_osk_mutex_rw_t *lock, _mali_osk_lock_mode_t mode) ++ { ++ BUG_ON(NULL == lock); ++ BUG_ON(!(_MALI_OSK_LOCKMODE_RO == mode || _MALI_OSK_LOCKMODE_RW == mode)); ++#ifdef DEBUG ++ /* make sure the thread releasing the lock actually was the owner */ ++ if (mode == _MALI_OSK_LOCKMODE_RW) { ++ _mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock); ++ /* This lock now has no owner */ ++ lock->checker.owner = 0; ++ } ++#endif ++ ++ if (mode == _MALI_OSK_LOCKMODE_RO) { ++ up_read(&lock->rw_sema); ++ } else { ++ up_write(&lock->rw_sema); ++ } + } + -+ if (!ret) -+ rk_platform_power_off_gpu(device); ++ /** @brief Free a given memory block which lock pointed to and its type must be ++ * _mali_sok_mutex_rw_t *. */ ++ static inline void _mali_osk_mutex_rw_term(_mali_osk_mutex_rw_t *lock) ++ { ++ /* Parameter validation */ ++ BUG_ON(NULL == lock); + -+ return ret; -+} ++ /* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */ ++ kfree(lock); ++ } + -+static int mali_os_resume(struct device *device) -+{ -+ int ret = 0; ++ /** @brief Mutex & mutex_interruptible share the same init and term function, because they have the ++ * same osk mutex struct, and the difference between them is which locking function they use */ ++ static inline _mali_osk_mutex_t *_mali_osk_mutex_init(_mali_osk_lock_flags_t flags, _mali_osk_lock_order_t order) ++ { ++ _mali_osk_mutex_t *lock = NULL; + -+ MALI_DEBUG_PRINT(4, ("mali_os_resume() called\n")); ++ lock = kmalloc(sizeof(_mali_osk_mutex_t), GFP_KERNEL); + -+ rk_platform_power_on_gpu(device); ++ if (NULL == lock) { ++ return NULL; ++ } ++ mutex_init(&lock->mutex); + -+ if (device->driver && -+ device->driver->pm && -+ device->driver->pm->resume) { -+ /* Need to notify Mali driver about this event */ -+ ret = device->driver->pm->resume(device); ++ _mali_osk_locks_debug_init((struct _mali_osk_lock_debug_s *)lock, flags, order); ++ return lock; + } + -+ return ret; -+} ++ /** @brief Lock the lock->mutex with mutex_lock_interruptible function */ ++ static inline _mali_osk_errcode_t _mali_osk_mutex_wait_interruptible(_mali_osk_mutex_t *lock) ++ { ++ _mali_osk_errcode_t err = _MALI_OSK_ERR_OK; + -+static int mali_os_freeze(struct device *device) -+{ -+ int ret = 0; ++ BUG_ON(NULL == lock); + -+ MALI_DEBUG_PRINT(4, ("mali_os_freeze() called\n")); ++ if (mutex_lock_interruptible(&lock->mutex)) { ++ printk(KERN_WARNING "Mali: Can not lock mutex\n"); ++ err = _MALI_OSK_ERR_RESTARTSYSCALL; ++ } + -+ if (device->driver && -+ device->driver->pm && -+ device->driver->pm->freeze) { -+ /* Need to notify Mali driver about this event */ -+ ret = device->driver->pm->freeze(device); ++ _mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock); ++ return err; + } + -+ return ret; -+} ++ /** @brief Unlock the lock->mutex which is locked with mutex_lock_interruptible() function. */ ++ static inline void _mali_osk_mutex_signal_interruptible(_mali_osk_mutex_t *lock) ++ { ++ BUG_ON(NULL == lock); ++ _mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock); ++ mutex_unlock(&lock->mutex); ++ } + -+static int mali_os_thaw(struct device *device) -+{ -+ int ret = 0; ++ /** @brief Lock the lock->mutex just with mutex_lock() function which could not be interruptted. */ ++ static inline void _mali_osk_mutex_wait(_mali_osk_mutex_t *lock) ++ { ++ BUG_ON(NULL == lock); ++ mutex_lock(&lock->mutex); ++ _mali_osk_locks_debug_add((struct _mali_osk_lock_debug_s *)lock); ++ } + -+ MALI_DEBUG_PRINT(4, ("mali_os_thaw() called\n")); ++ /** @brief Unlock the lock->mutex which is locked with mutex_lock() function. */ ++ static inline void _mali_osk_mutex_signal(_mali_osk_mutex_t *lock) ++ { ++ BUG_ON(NULL == lock); ++ _mali_osk_locks_debug_remove((struct _mali_osk_lock_debug_s *)lock); ++ mutex_unlock(&lock->mutex); ++ } + -+ if (device->driver && -+ device->driver->pm && -+ device->driver->pm->thaw) { -+ /* Need to notify Mali driver about this event */ -+ ret = device->driver->pm->thaw(device); ++ /** @brief Free a given memory block which lock point. */ ++ static inline void _mali_osk_mutex_term(_mali_osk_mutex_t *lock) ++ { ++ /* Parameter validation */ ++ BUG_ON(NULL == lock); ++ ++ /* Linux requires no explicit termination of spinlocks, semaphores, or rw_semaphores */ ++ kfree(lock); + } + -+ return ret; ++#ifdef _cplusplus +} -+ -+static const struct dev_pm_ops mali_gpu_device_type_pm_ops = { -+ .suspend = mali_os_suspend, -+ .resume = mali_os_resume, -+ .freeze = mali_os_freeze, -+ .thaw = mali_os_thaw, -+#ifdef CONFIG_PM -+ .runtime_suspend = mali_runtime_suspend, -+ .runtime_resume = mali_runtime_resume, -+ .runtime_idle = mali_runtime_idle, +#endif -+}; -+ -+static const struct device_type mali_gpu_device_device_type = { -+ .pm = &mali_gpu_device_type_pm_ops, -+}; + ++#endif +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_low_level_mem.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_low_level_mem.c +new file mode 100644 +index 000000000..994b04dad +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_low_level_mem.c +@@ -0,0 +1,146 @@ +/* -+ * platform_specific_data of platform_device of mali_gpu. ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ -+static const struct mali_gpu_device_data mali_gpu_data = { -+ .shared_mem_size = 1024 * 1024 * 1024, /* 1GB */ -+ .max_job_runtime = 60000, /* 60 seconds */ -+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) -+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) -+ .gpu_cooling_ops = &rk_cooling_ops, -+#endif -+#endif -+}; + -+static void mali_platform_device_add_config(struct platform_device *pdev) -+{ -+ pdev->name = MALI_GPU_NAME_UTGARD, -+ pdev->id = 0; -+ pdev->dev.type = &mali_gpu_device_device_type; -+ pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask, -+ pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32); -+} ++/** ++ * @file mali_osk_low_level_mem.c ++ * Implementation of the OS abstraction layer for the kernel device driver ++ */ + -+/*---------------------------------------------------------------------------*/ -+/* platform_device_functions called by common_part. */ ++#include ++#include ++#include + -+int mali_platform_device_init(struct platform_device *pdev) ++#include "mali_kernel_common.h" ++#include "mali_osk.h" ++#include "mali_ukk.h" ++ ++void _mali_osk_mem_barrier(void) +{ -+ int err = 0; ++ mb(); ++} + -+ mali_platform_device_add_config(pdev); ++void _mali_osk_write_mem_barrier(void) ++{ ++ wmb(); ++} + -+ D("to add platform_specific_data to platform_device_of_mali."); -+ err = platform_device_add_data(pdev, -+ &mali_gpu_data, -+ sizeof(mali_gpu_data)); -+ if (err) { -+ E("fail to add platform_specific_data. err : %d.", err); -+ return err; -+ } ++mali_io_address _mali_osk_mem_mapioregion(uintptr_t phys, u32 size, const char *description) ++{ ++ return (mali_io_address)ioremap(phys, size); ++} + -+ err = rk_context_init(pdev); -+ if (err) { -+ E("fail to init rk_context. err : %d.", err); -+ return err; -+ } ++void _mali_osk_mem_unmapioregion(uintptr_t phys, u32 size, mali_io_address virt) ++{ ++ iounmap((void *)virt); ++} + -+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) -+ if (of_machine_is_compatible("rockchip,rk3036")) -+ return 0; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) -+ err = power_model_simple_init(pdev); -+ if (err) { -+ E("fail to init simple_power_model, err : %d.", err); -+ rk_context_deinit(pdev); -+ return err; -+ } ++_mali_osk_errcode_t inline _mali_osk_mem_reqregion(uintptr_t phys, u32 size, const char *description) ++{ ++#if MALI_LICENSE_IS_GPL ++ return _MALI_OSK_ERR_OK; /* GPL driver gets the mem region for the resources registered automatically */ ++#else ++ return ((NULL == request_mem_region(phys, size, description)) ? _MALI_OSK_ERR_NOMEM : _MALI_OSK_ERR_OK); +#endif ++} ++ ++void inline _mali_osk_mem_unreqregion(uintptr_t phys, u32 size) ++{ ++#if !MALI_LICENSE_IS_GPL ++ release_mem_region(phys, size); +#endif -+ return 0; +} + -+void mali_platform_device_deinit(struct platform_device *pdev) ++void inline _mali_osk_mem_iowrite32_relaxed(volatile mali_io_address addr, u32 offset, u32 val) +{ -+ MALI_DEBUG_PRINT(4, ("mali_platform_device_unregister() called\n")); ++ __raw_writel(cpu_to_le32(val), ((u8 *)addr) + offset); ++} + -+ rk_context_deinit(pdev); ++u32 inline _mali_osk_mem_ioread32(volatile mali_io_address addr, u32 offset) ++{ ++ return ioread32(((u8 *)addr) + offset); +} -diff --git a/drivers/gpu/arm/mali400/mali/platform/rk/rk_ext.h b/drivers/gpu/arm/mali400/mali/platform/rk/rk_ext.h -new file mode 100644 -index 000000000..bd939350c ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/platform/rk/rk_ext.h -@@ -0,0 +1,37 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* ---------------------------------------------------------------------------- -+ * File: rk_ext.h -+ * -+ * Desc: rk_ext_on_mali_ko 中的 通行定义等. -+ * -+ * Usage: -+ * -+ * Note: -+ * -+ * Author: ChenZhen -+ * -+ * Log: -+ * -+ * ---------------------------------------------------------------------------- -+ */ + -+#ifndef __RK_EXT_H__ -+#define __RK_EXT_H__ ++void inline _mali_osk_mem_iowrite32(volatile mali_io_address addr, u32 offset, u32 val) ++{ ++ iowrite32(val, ((u8 *)addr) + offset); ++} + -+#ifdef __cplusplus -+extern "C" { -+#endif ++void _mali_osk_cache_flushall(void) ++{ ++ /** @note Cached memory is not currently supported in this implementation */ ++} + -+/*---------------------------------------------------------------------------*/ ++void _mali_osk_cache_ensure_uncached_range_flushed(void *uncached_mapping, u32 offset, u32 size) ++{ ++ _mali_osk_write_mem_barrier(); ++} + -+/** version of rk_ext on mali_ko, aka. rk_ko_ver. */ -+#define RK_KO_VER (5) ++u32 _mali_osk_mem_write_safe(void __user *dest, const void __user *src, u32 size) ++{ ++#define MALI_MEM_SAFE_COPY_BLOCK_SIZE 4096 ++ u32 retval = 0; ++ void *temp_buf; + -+/*---------------------------------------------------------------------------*/ ++ temp_buf = kmalloc(MALI_MEM_SAFE_COPY_BLOCK_SIZE, GFP_KERNEL); ++ if (NULL != temp_buf) { ++ u32 bytes_left_to_copy = size; ++ u32 i; ++ for (i = 0; i < size; i += MALI_MEM_SAFE_COPY_BLOCK_SIZE) { ++ u32 size_to_copy; ++ u32 size_copied; ++ u32 bytes_left; + -+#ifdef __cplusplus -+} -+#endif ++ if (bytes_left_to_copy > MALI_MEM_SAFE_COPY_BLOCK_SIZE) { ++ size_to_copy = MALI_MEM_SAFE_COPY_BLOCK_SIZE; ++ } else { ++ size_to_copy = bytes_left_to_copy; ++ } + -+#endif /* __RK_EXT_H__ */ ++ bytes_left = copy_from_user(temp_buf, ((char *)src) + i, size_to_copy); ++ size_copied = size_to_copy - bytes_left; + -diff --git a/drivers/gpu/arm/mali400/mali/readme.txt b/drivers/gpu/arm/mali400/mali/readme.txt -new file mode 100755 -index 000000000..6785ac933 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/readme.txt -@@ -0,0 +1,28 @@ -+Building the Mali Device Driver for Linux -+----------------------------------------- ++ bytes_left = copy_to_user(((char *)dest) + i, temp_buf, size_copied); ++ size_copied -= bytes_left; + -+Build the Mali Device Driver for Linux by running the following make command: ++ bytes_left_to_copy -= size_copied; ++ retval += size_copied; + -+KDIR= USING_UMP= BUILD= make ++ if (size_copied != size_to_copy) { ++ break; /* Early out, we was not able to copy this entire block */ ++ } ++ } + -+where -+ kdir_path: Path to your Linux Kernel directory -+ ump_option: 1 = Enable UMP support(*) -+ 0 = disable UMP support -+ build_option: debug = debug build of driver -+ release = release build of driver ++ kfree(temp_buf); ++ } + -+(*) For newer Linux Kernels, the Module.symvers file for the UMP device driver -+ must be available. The UMP_SYMVERS_FILE variable in the Makefile should -+ point to this file. This file is generated when the UMP driver is built. ++ return retval; ++} + -+The result will be a mali.ko file, which can be loaded into the Linux kernel -+by using the insmod command. ++_mali_osk_errcode_t _mali_ukk_mem_write_safe(_mali_uk_mem_write_safe_s *args) ++{ ++ void __user *src; ++ void __user *dst; ++ struct mali_session_data *session; + -+Use of UMP is not recommended. The dma-buf API in the Linux kernel has -+replaced UMP. The Mali Device Driver will be built with dma-buf support if the -+kernel config includes enabled dma-buf. ++ MALI_DEBUG_ASSERT_POINTER(args); + -+The kernel needs to be provided with a platform_device struct for the Mali GPU -+device. See the mali_utgard.h header file for how to set up the Mali GPU -+resources. -diff --git a/drivers/gpu/arm/mali400/mali/regs/mali_200_regs.h b/drivers/gpu/arm/mali400/mali/regs/mali_200_regs.h ++ session = (struct mali_session_data *)(uintptr_t)args->ctx; ++ ++ if (NULL == session) { ++ return _MALI_OSK_ERR_INVALID_ARGS; ++ } ++ ++ src = (void __user *)(uintptr_t)args->src; ++ dst = (void __user *)(uintptr_t)args->dest; ++ ++ /* Return number of bytes actually copied */ ++ args->size = _mali_osk_mem_write_safe(dst, src, args->size); ++ return _MALI_OSK_ERR_OK; ++} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_mali.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_mali.c new file mode 100644 -index 000000000..0345fb169 +index 000000000..5d28d0eb3 --- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/regs/mali_200_regs.h -@@ -0,0 +1,131 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_mali.c +@@ -0,0 +1,503 @@ +/* -+ * Copyright (C) 2010, 2012-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -326429,674 +328278,507 @@ index 000000000..0345fb169 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef _MALI200_REGS_H_ -+#define _MALI200_REGS_H_ + +/** -+ * Enum for management register addresses. ++ * @file mali_osk_mali.c ++ * Implementation of the OS abstraction layer which is specific for the Mali kernel device driver + */ -+enum mali200_mgmt_reg { -+ MALI200_REG_ADDR_MGMT_VERSION = 0x1000, -+ MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR = 0x1004, -+ MALI200_REG_ADDR_MGMT_STATUS = 0x1008, -+ MALI200_REG_ADDR_MGMT_CTRL_MGMT = 0x100c, -+ -+ MALI200_REG_ADDR_MGMT_INT_RAWSTAT = 0x1020, -+ MALI200_REG_ADDR_MGMT_INT_CLEAR = 0x1024, -+ MALI200_REG_ADDR_MGMT_INT_MASK = 0x1028, -+ MALI200_REG_ADDR_MGMT_INT_STATUS = 0x102c, ++#include "../platform/rk/custom_log.h" + -+ MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS = 0x1050, ++#include ++#include ++#include ++#include ++#include ++#include + -+ MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE = 0x1080, -+ MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC = 0x1084, -+ MALI200_REG_ADDR_MGMT_PERF_CNT_0_LIMIT = 0x1088, -+ MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE = 0x108c, ++#include "mali_osk_mali.h" ++#include "mali_kernel_common.h" /* MALI_xxx macros */ ++#include "mali_osk.h" /* kernel side OS functions */ ++#include "mali_kernel_linux.h" + -+ MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE = 0x10a0, -+ MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC = 0x10a4, -+ MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE = 0x10ac, ++static mali_bool mali_secure_mode_enabled = MALI_FALSE; ++static mali_bool mali_secure_mode_supported = MALI_FALSE; + -+ MALI200_REG_ADDR_MGMT_PERFMON_CONTR = 0x10b0, -+ MALI200_REG_ADDR_MGMT_PERFMON_BASE = 0x10b4, ++/* Function that init the mali gpu secure mode */ ++void (*mali_secure_mode_deinit)(void) = NULL; ++/* Function that reset GPU and enable the mali gpu secure mode */ ++int (*mali_gpu_reset_and_secure_mode_enable)(void) = NULL; ++/* Function that reset GPU and disable the mali gpu secure mode */ ++int (*mali_gpu_reset_and_secure_mode_disable)(void) = NULL; + -+ MALI200_REG_SIZEOF_REGISTER_BANK = 0x10f0 + -+}; ++#ifdef CONFIG_MALI_DT + -+#define MALI200_REG_VAL_PERF_CNT_ENABLE 1 ++#define MALI_OSK_INVALID_RESOURCE_ADDRESS 0xFFFFFFFF + -+enum mali200_mgmt_ctrl_mgmt { -+ MALI200_REG_VAL_CTRL_MGMT_STOP_BUS = (1 << 0), -+ MALI200_REG_VAL_CTRL_MGMT_FLUSH_CACHES = (1 << 3), -+ MALI200_REG_VAL_CTRL_MGMT_FORCE_RESET = (1 << 5), -+ MALI200_REG_VAL_CTRL_MGMT_START_RENDERING = (1 << 6), -+ MALI400PP_REG_VAL_CTRL_MGMT_SOFT_RESET = (1 << 7), /* Only valid for Mali-300 and later */ -+}; ++/** ++ * Define the max number of resource we could have. ++ */ ++#define MALI_OSK_MAX_RESOURCE_NUMBER 27 + -+enum mali200_mgmt_irq { -+ MALI200_REG_VAL_IRQ_END_OF_FRAME = (1 << 0), -+ MALI200_REG_VAL_IRQ_END_OF_TILE = (1 << 1), -+ MALI200_REG_VAL_IRQ_HANG = (1 << 2), -+ MALI200_REG_VAL_IRQ_FORCE_HANG = (1 << 3), -+ MALI200_REG_VAL_IRQ_BUS_ERROR = (1 << 4), -+ MALI200_REG_VAL_IRQ_BUS_STOP = (1 << 5), -+ MALI200_REG_VAL_IRQ_CNT_0_LIMIT = (1 << 6), -+ MALI200_REG_VAL_IRQ_CNT_1_LIMIT = (1 << 7), -+ MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR = (1 << 8), -+ MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND = (1 << 9), -+ MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW = (1 << 10), -+ MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW = (1 << 11), -+ MALI400PP_REG_VAL_IRQ_RESET_COMPLETED = (1 << 12), -+}; ++/** ++ * Define the max number of resource with interrupts, and they are ++ * the first 20 elements in array mali_osk_resource_bank. ++ */ ++#define MALI_OSK_RESOURCE_WITH_IRQ_NUMBER 20 + -+#define MALI200_REG_VAL_IRQ_MASK_ALL ((enum mali200_mgmt_irq) (\ -+ MALI200_REG_VAL_IRQ_END_OF_FRAME |\ -+ MALI200_REG_VAL_IRQ_END_OF_TILE |\ -+ MALI200_REG_VAL_IRQ_HANG |\ -+ MALI200_REG_VAL_IRQ_FORCE_HANG |\ -+ MALI200_REG_VAL_IRQ_BUS_ERROR |\ -+ MALI200_REG_VAL_IRQ_BUS_STOP |\ -+ MALI200_REG_VAL_IRQ_CNT_0_LIMIT |\ -+ MALI200_REG_VAL_IRQ_CNT_1_LIMIT |\ -+ MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR |\ -+ MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND |\ -+ MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW |\ -+ MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW |\ -+ MALI400PP_REG_VAL_IRQ_RESET_COMPLETED)) ++/** ++ * pp core start and end location in mali_osk_resource_bank array. ++ */ ++#define MALI_OSK_RESOURCE_PP_LOCATION_START 2 ++#define MALI_OSK_RESOURCE_PP_LOCATION_END 17 + -+#define MALI200_REG_VAL_IRQ_MASK_USED ((enum mali200_mgmt_irq) (\ -+ MALI200_REG_VAL_IRQ_END_OF_FRAME |\ -+ MALI200_REG_VAL_IRQ_FORCE_HANG |\ -+ MALI200_REG_VAL_IRQ_BUS_ERROR |\ -+ MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR |\ -+ MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND |\ -+ MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW |\ -+ MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW)) ++/** ++ * L2 cache start and end location in mali_osk_resource_bank array. ++ */ ++#define MALI_OSK_RESOURCE_L2_LOCATION_START 20 ++#define MALI_OSK_RESOURCE_l2_LOCATION_END 22 + -+#define MALI200_REG_VAL_IRQ_MASK_NONE ((enum mali200_mgmt_irq)(0)) ++/** ++ * DMA unit location. ++ */ ++#define MALI_OSK_RESOURCE_DMA_LOCATION 26 + -+enum mali200_mgmt_status { -+ MALI200_REG_VAL_STATUS_RENDERING_ACTIVE = (1 << 0), -+ MALI200_REG_VAL_STATUS_BUS_STOPPED = (1 << 4), -+}; ++static _mali_osk_resource_t mali_osk_resource_bank[MALI_OSK_MAX_RESOURCE_NUMBER] = { ++ /*-------------------------------------------------------*/ ++ /* rk_ext : to use dts_for_mali_ko_befor_r5p0-01rel0. */ ++ /* {.description = "Mali_GP", .base = MALI_OFFSET_GP, .irq_name = "IRQGP",}, */ ++ {.description = "Mali_GP", .base = MALI_OFFSET_GP, .irq_name = "Mali_GP_IRQ",}, ++ /* {.description = "Mali_GP_MMU", .base = MALI_OFFSET_GP_MMU, .irq_name = "IRQGPMMU",}, */ ++ {.description = "Mali_GP_MMU", .base = MALI_OFFSET_GP_MMU, .irq_name = "Mali_GP_MMU_IRQ",}, ++ /* {.description = "Mali_PP0", .base = MALI_OFFSET_PP0, .irq_name = "IRQPP0",}, */ ++ {.description = "Mali_PP0", .base = MALI_OFFSET_PP0, .irq_name = "Mali_PP0_IRQ",}, ++ /* {.description = "Mali_PP0_MMU", .base = MALI_OFFSET_PP0_MMU, .irq_name = "IRQPPMMU0",}, */ ++ {.description = "Mali_PP0_MMU", .base = MALI_OFFSET_PP0_MMU, .irq_name = "Mali_PP0_MMU_IRQ",}, ++ /* {.description = "Mali_PP1", .base = MALI_OFFSET_PP1, .irq_name = "IRQPP1",}, */ ++ {.description = "Mali_PP1", .base = MALI_OFFSET_PP1, .irq_name = "Mali_PP1_IRQ",}, ++ /* {.description = "Mali_PP1_MMU", .base = MALI_OFFSET_PP1_MMU, .irq_name = "IRQPPMMU1",}, */ ++ {.description = "Mali_PP1_MMU", .base = MALI_OFFSET_PP1_MMU, .irq_name = "Mali_PP1_MMU_IRQ",}, + -+enum mali200_render_unit { -+ MALI200_REG_ADDR_FRAME = 0x0000, -+ MALI200_REG_ADDR_RSW = 0x0004, -+ MALI200_REG_ADDR_STACK = 0x0030, -+ MALI200_REG_ADDR_STACK_SIZE = 0x0034, -+ MALI200_REG_ADDR_ORIGIN_OFFSET_X = 0x0040 ++ {.description = "Mali_PP2", .base = MALI_OFFSET_PP2, .irq_name = "Mali_PP2_IRQ",}, ++ {.description = "Mali_PP2_MMU", .base = MALI_OFFSET_PP2_MMU, .irq_name = "Mali_PP2_MMU_IRQ",}, ++ {.description = "Mali_PP3", .base = MALI_OFFSET_PP3, .irq_name = "Mali_PP3_IRQ",}, ++ {.description = "Mali_PP3_MMU", .base = MALI_OFFSET_PP3_MMU, .irq_name = "Mali_PP3_MMU_IRQ",}, ++ /*-------------------------------------------------------*/ ++ {.description = "Mali_PP4", .base = MALI_OFFSET_PP4, .irq_name = "IRQPP4",}, ++ {.description = "Mali_PP4_MMU", .base = MALI_OFFSET_PP4_MMU, .irq_name = "IRQPPMMU4",}, ++ {.description = "Mali_PP5", .base = MALI_OFFSET_PP5, .irq_name = "IRQPP5",}, ++ {.description = "Mali_PP5_MMU", .base = MALI_OFFSET_PP5_MMU, .irq_name = "IRQPPMMU5",}, ++ {.description = "Mali_PP6", .base = MALI_OFFSET_PP6, .irq_name = "IRQPP6",}, ++ {.description = "Mali_PP6_MMU", .base = MALI_OFFSET_PP6_MMU, .irq_name = "IRQPPMMU6",}, ++ {.description = "Mali_PP7", .base = MALI_OFFSET_PP7, .irq_name = "IRQPP7",}, ++ {.description = "Mali_PP7_MMU", .base = MALI_OFFSET_PP7_MMU, .irq_name = "IRQPPMMU",}, ++ {.description = "Mali_PP_Broadcast", .base = MALI_OFFSET_PP_BCAST, .irq_name = "IRQPP",}, ++ {.description = "Mali_PMU", .base = MALI_OFFSET_PMU, .irq_name = "IRQPMU",}, ++ {.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE0,}, ++ {.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE1,}, ++ {.description = "Mali_L2", .base = MALI_OFFSET_L2_RESOURCE2,}, ++ {.description = "Mali_PP_MMU_Broadcast", .base = MALI_OFFSET_PP_BCAST_MMU,}, ++ {.description = "Mali_Broadcast", .base = MALI_OFFSET_BCAST,}, ++ {.description = "Mali_DLBU", .base = MALI_OFFSET_DLBU,}, ++ {.description = "Mali_DMA", .base = MALI_OFFSET_DMA,}, +}; + -+enum mali200_wb_unit { -+ MALI200_REG_ADDR_WB0 = 0x0100, -+ MALI200_REG_ADDR_WB1 = 0x0200, -+ MALI200_REG_ADDR_WB2 = 0x0300 -+}; ++static int _mali_osk_get_compatible_name(const char **out_string) ++{ ++ struct device_node *node = mali_platform_device->dev.of_node; + -+enum mali200_wb_unit_regs { -+ MALI200_REG_ADDR_WB_SOURCE_SELECT = 0x0000, -+ MALI200_REG_ADDR_WB_SOURCE_ADDR = 0x0004, -+}; ++ MALI_DEBUG_ASSERT(NULL != node); + -+/* This should be in the top 16 bit of the version register of Mali PP */ -+#define MALI200_PP_PRODUCT_ID 0xC807 -+#define MALI300_PP_PRODUCT_ID 0xCE07 -+#define MALI400_PP_PRODUCT_ID 0xCD07 -+#define MALI450_PP_PRODUCT_ID 0xCF07 -+#define MALI470_PP_PRODUCT_ID 0xCF08 ++ return of_property_read_string(node, "compatible", out_string); ++} + ++_mali_osk_errcode_t _mali_osk_resource_initialize(void) ++{ ++ mali_bool mali_is_450 = MALI_FALSE, mali_is_470 = MALI_FALSE; ++ int i, pp_core_num = 0, l2_core_num = 0; ++ struct resource *res; ++ const char *compatible_name = NULL; + ++ if (0 == _mali_osk_get_compatible_name(&compatible_name)) { ++ if (0 == strncmp(compatible_name, "arm,mali-450", strlen("arm,mali-450"))) { ++ mali_is_450 = MALI_TRUE; ++ MALI_DEBUG_PRINT(2, ("mali-450 device tree detected.")); ++ } else if (0 == strncmp(compatible_name, "arm,mali-470", strlen("arm,mali-470"))) { ++ mali_is_470 = MALI_TRUE; ++ MALI_DEBUG_PRINT(2, ("mali-470 device tree detected.")); ++ } ++ } + -+#endif /* _MALI200_REGS_H_ */ -diff --git a/drivers/gpu/arm/mali400/mali/regs/mali_gp_regs.h b/drivers/gpu/arm/mali400/mali/regs/mali_gp_regs.h -new file mode 100644 -index 000000000..7f8b58fd6 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/regs/mali_gp_regs.h -@@ -0,0 +1,172 @@ -+/* -+ * Copyright (C) 2010, 2012-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ for (i = 0; i < MALI_OSK_RESOURCE_WITH_IRQ_NUMBER; i++) { ++ res = platform_get_resource_byname(mali_platform_device, IORESOURCE_IRQ, mali_osk_resource_bank[i].irq_name); ++ if (res) { ++ mali_osk_resource_bank[i].irq = res->start; ++ } else { ++ mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS; ++ } ++ } + -+#ifndef _MALIGP2_CONROL_REGS_H_ -+#define _MALIGP2_CONROL_REGS_H_ ++ for (i = MALI_OSK_RESOURCE_PP_LOCATION_START; i <= MALI_OSK_RESOURCE_PP_LOCATION_END; i++) { ++ if (MALI_OSK_INVALID_RESOURCE_ADDRESS != mali_osk_resource_bank[i].base) { ++ pp_core_num++; ++ } ++ } + -+/** -+ * These are the different geometry processor control registers. -+ * Their usage is to control and monitor the operation of the -+ * Vertex Shader and the Polygon List Builder in the geometry processor. -+ * Addresses are in 32-bit word relative sizes. -+ * @see [P0081] "Geometry Processor Data Structures" for details -+ */ ++ /* We have to divide by 2, because we caculate twice for only one pp(pp_core and pp_mmu_core). */ ++ if (0 != pp_core_num % 2) { ++ MALI_DEBUG_PRINT(2, ("The value of pp core number isn't normal.")); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+typedef enum { -+ MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR = 0x00, -+ MALIGP2_REG_ADDR_MGMT_VSCL_END_ADDR = 0x04, -+ MALIGP2_REG_ADDR_MGMT_PLBUCL_START_ADDR = 0x08, -+ MALIGP2_REG_ADDR_MGMT_PLBUCL_END_ADDR = 0x0c, -+ MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR = 0x10, -+ MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_END_ADDR = 0x14, -+ MALIGP2_REG_ADDR_MGMT_CMD = 0x20, -+ MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT = 0x24, -+ MALIGP2_REG_ADDR_MGMT_INT_CLEAR = 0x28, -+ MALIGP2_REG_ADDR_MGMT_INT_MASK = 0x2C, -+ MALIGP2_REG_ADDR_MGMT_INT_STAT = 0x30, -+ MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_ENABLE = 0x3C, -+ MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_ENABLE = 0x40, -+ MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC = 0x44, -+ MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_SRC = 0x48, -+ MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_VALUE = 0x4C, -+ MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_VALUE = 0x50, -+ MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_LIMIT = 0x54, -+ MALIGP2_REG_ADDR_MGMT_STATUS = 0x68, -+ MALIGP2_REG_ADDR_MGMT_VERSION = 0x6C, -+ MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR_READ = 0x80, -+ MALIGP2_REG_ADDR_MGMT_PLBCL_START_ADDR_READ = 0x84, -+ MALIGP2_CONTR_AXI_BUS_ERROR_STAT = 0x94, -+ MALIGP2_REGISTER_ADDRESS_SPACE_SIZE = 0x98, -+} maligp_reg_addr_mgmt_addr; ++ pp_core_num /= 2; + -+#define MALIGP2_REG_VAL_PERF_CNT_ENABLE 1 ++ /** ++ * we can caculate the number of l2 cache core according the number of pp core number ++ * and device type(mali400/mali450/mali470). ++ */ ++ l2_core_num = 1; ++ if (mali_is_450) { ++ if (pp_core_num > 4) { ++ l2_core_num = 3; ++ } else if (pp_core_num <= 4) { ++ l2_core_num = 2; ++ } ++ } + -+/** -+ * Commands to geometry processor. -+ * @see MALIGP2_CTRL_REG_CMD -+ */ -+typedef enum { -+ MALIGP2_REG_VAL_CMD_START_VS = (1 << 0), -+ MALIGP2_REG_VAL_CMD_START_PLBU = (1 << 1), -+ MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC = (1 << 4), -+ MALIGP2_REG_VAL_CMD_RESET = (1 << 5), -+ MALIGP2_REG_VAL_CMD_FORCE_HANG = (1 << 6), -+ MALIGP2_REG_VAL_CMD_STOP_BUS = (1 << 9), -+ MALI400GP_REG_VAL_CMD_SOFT_RESET = (1 << 10), /* only valid for Mali-300 and later */ -+} mgp_contr_reg_val_cmd; ++ for (i = MALI_OSK_RESOURCE_l2_LOCATION_END; i > MALI_OSK_RESOURCE_L2_LOCATION_START + l2_core_num - 1; i--) { ++ mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS; ++ } + ++ /* If device is not mali-450 type, we have to remove related resource from resource bank. */ ++ if (!(mali_is_450 || mali_is_470)) { ++ for (i = MALI_OSK_RESOURCE_l2_LOCATION_END + 1; i < MALI_OSK_MAX_RESOURCE_NUMBER; i++) { ++ mali_osk_resource_bank[i].base = MALI_OSK_INVALID_RESOURCE_ADDRESS; ++ } ++ } + -+/** @defgroup MALIGP2_IRQ -+ * Interrupt status of geometry processor. -+ * @see MALIGP2_CTRL_REG_INT_RAWSTAT, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, -+ * MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_ADDR_MGMT_INT_STAT -+ * @{ -+ */ -+#define MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST (1 << 0) -+#define MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST (1 << 1) -+#define MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM (1 << 2) -+#define MALIGP2_REG_VAL_IRQ_VS_SEM_IRQ (1 << 3) -+#define MALIGP2_REG_VAL_IRQ_PLBU_SEM_IRQ (1 << 4) -+#define MALIGP2_REG_VAL_IRQ_HANG (1 << 5) -+#define MALIGP2_REG_VAL_IRQ_FORCE_HANG (1 << 6) -+#define MALIGP2_REG_VAL_IRQ_PERF_CNT_0_LIMIT (1 << 7) -+#define MALIGP2_REG_VAL_IRQ_PERF_CNT_1_LIMIT (1 << 8) -+#define MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR (1 << 9) -+#define MALIGP2_REG_VAL_IRQ_SYNC_ERROR (1 << 10) -+#define MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR (1 << 11) -+#define MALI400GP_REG_VAL_IRQ_AXI_BUS_STOPPED (1 << 12) -+#define MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD (1 << 13) -+#define MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD (1 << 14) -+#define MALI400GP_REG_VAL_IRQ_RESET_COMPLETED (1 << 19) -+#define MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW (1 << 20) -+#define MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW (1 << 21) -+#define MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS (1 << 22) ++ if (mali_is_470) ++ mali_osk_resource_bank[MALI_OSK_RESOURCE_DMA_LOCATION].base = MALI_OSK_INVALID_RESOURCE_ADDRESS; + -+/* Mask defining all IRQs in Mali GP */ -+#define MALIGP2_REG_VAL_IRQ_MASK_ALL \ -+ (\ -+ MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST | \ -+ MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST | \ -+ MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM | \ -+ MALIGP2_REG_VAL_IRQ_VS_SEM_IRQ | \ -+ MALIGP2_REG_VAL_IRQ_PLBU_SEM_IRQ | \ -+ MALIGP2_REG_VAL_IRQ_HANG | \ -+ MALIGP2_REG_VAL_IRQ_FORCE_HANG | \ -+ MALIGP2_REG_VAL_IRQ_PERF_CNT_0_LIMIT | \ -+ MALIGP2_REG_VAL_IRQ_PERF_CNT_1_LIMIT | \ -+ MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR | \ -+ MALIGP2_REG_VAL_IRQ_SYNC_ERROR | \ -+ MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR | \ -+ MALI400GP_REG_VAL_IRQ_AXI_BUS_STOPPED | \ -+ MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD | \ -+ MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD | \ -+ MALI400GP_REG_VAL_IRQ_RESET_COMPLETED | \ -+ MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW | \ -+ MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW | \ -+ MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS) ++ return _MALI_OSK_ERR_OK; ++} + -+/* Mask defining the IRQs in Mali GP which we use */ -+#define MALIGP2_REG_VAL_IRQ_MASK_USED \ -+ (\ -+ MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST | \ -+ MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST | \ -+ MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM | \ -+ MALIGP2_REG_VAL_IRQ_FORCE_HANG | \ -+ MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR | \ -+ MALIGP2_REG_VAL_IRQ_SYNC_ERROR | \ -+ MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR | \ -+ MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD | \ -+ MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD | \ -+ MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW | \ -+ MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW | \ -+ MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS) ++_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res) ++{ ++ int i; + -+/* Mask defining non IRQs on MaliGP2*/ -+#define MALIGP2_REG_VAL_IRQ_MASK_NONE 0 ++ if (NULL == mali_platform_device) { ++ return _MALI_OSK_ERR_ITEM_NOT_FOUND; ++ } + -+/** }@ defgroup MALIGP2_IRQ*/ ++ /* Traverse all of resources in resources bank to find the matching one. */ ++ for (i = 0; i < MALI_OSK_MAX_RESOURCE_NUMBER; i++) { ++ if (mali_osk_resource_bank[i].base == addr) { ++ if (NULL != res) { ++ res->base = addr + _mali_osk_resource_base_address(); ++ res->description = mali_osk_resource_bank[i].description; ++ res->irq = mali_osk_resource_bank[i].irq; ++ } ++ return _MALI_OSK_ERR_OK; ++ } ++ } + -+/** @defgroup MALIGP2_STATUS -+ * The different Status values to the geometry processor. -+ * @see MALIGP2_CTRL_REG_STATUS -+ * @{ -+ */ -+#define MALIGP2_REG_VAL_STATUS_VS_ACTIVE 0x0002 -+#define MALIGP2_REG_VAL_STATUS_BUS_STOPPED 0x0004 -+#define MALIGP2_REG_VAL_STATUS_PLBU_ACTIVE 0x0008 -+#define MALIGP2_REG_VAL_STATUS_BUS_ERROR 0x0040 -+#define MALIGP2_REG_VAL_STATUS_WRITE_BOUND_ERR 0x0100 -+/** }@ defgroup MALIGP2_STATUS*/ ++ return _MALI_OSK_ERR_ITEM_NOT_FOUND; ++} + -+#define MALIGP2_REG_VAL_STATUS_MASK_ACTIVE (\ -+ MALIGP2_REG_VAL_STATUS_VS_ACTIVE|\ -+ MALIGP2_REG_VAL_STATUS_PLBU_ACTIVE) ++uintptr_t _mali_osk_resource_base_address(void) ++{ ++ struct resource *reg_res = NULL; ++ uintptr_t ret = 0; + ++ reg_res = platform_get_resource(mali_platform_device, IORESOURCE_MEM, 0); + -+#define MALIGP2_REG_VAL_STATUS_MASK_ERROR (\ -+ MALIGP2_REG_VAL_STATUS_BUS_ERROR |\ -+ MALIGP2_REG_VAL_STATUS_WRITE_BOUND_ERR ) ++ if (NULL != reg_res) { ++ ret = reg_res->start; ++ } + -+/* This should be in the top 16 bit of the version register of gp.*/ -+#define MALI200_GP_PRODUCT_ID 0xA07 -+#define MALI300_GP_PRODUCT_ID 0xC07 -+#define MALI400_GP_PRODUCT_ID 0xB07 -+#define MALI450_GP_PRODUCT_ID 0xD07 ++ return ret; ++} + -+/** -+ * The different sources for instrumented on the geometry processor. -+ * @see MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC -+ */ ++void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size) ++{ ++ struct device_node *node = mali_platform_device->dev.of_node; ++ int length = 0, i = 0; ++ u32 u; + -+enum MALIGP2_cont_reg_perf_cnt_src { -+ MALIGP2_REG_VAL_PERF_CNT1_SRC_NUMBER_OF_VERTICES_PROCESSED = 0x0a, -+}; ++ MALI_DEBUG_PRINT(2, ("Get pmu config from device tree configuration.\n")); + -+#endif -diff --git a/drivers/gpu/arm/mali400/mali/timestamp-arm11-cc/mali_timestamp.c b/drivers/gpu/arm/mali400/mali/timestamp-arm11-cc/mali_timestamp.c -new file mode 100644 -index 000000000..7df934c12 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/timestamp-arm11-cc/mali_timestamp.c -@@ -0,0 +1,13 @@ -+/* -+ * Copyright (C) 2010-2011, 2013, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ MALI_DEBUG_ASSERT(NULL != node); + -+#include "mali_timestamp.h" ++ if (!of_get_property(node, "pmu_domain_config", &length)) { ++ return; ++ } + -+/* This file is intentionally left empty, as all functions are inlined in mali_profiling_sampler.h */ -diff --git a/drivers/gpu/arm/mali400/mali/timestamp-arm11-cc/mali_timestamp.h b/drivers/gpu/arm/mali400/mali/timestamp-arm11-cc/mali_timestamp.h -new file mode 100644 -index 000000000..f52097c19 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/timestamp-arm11-cc/mali_timestamp.h -@@ -0,0 +1,48 @@ -+/* -+ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ if (array_size != length / sizeof(u32)) { ++ MALI_PRINT_ERROR(("Wrong pmu domain config in device tree.")); ++ return; ++ } + -+#ifndef __MALI_TIMESTAMP_H__ -+#define __MALI_TIMESTAMP_H__ ++ of_property_for_each_u32(node, "pmu_domain_config", u) { ++ domain_config_array[i] = (u16)u; ++ i++; ++ } + -+#include "mali_osk.h" ++ return; ++} + -+MALI_STATIC_INLINE _mali_osk_errcode_t _mali_timestamp_reset(void) ++u32 _mali_osk_get_pmu_switch_delay(void) +{ -+ /* -+ * reset counters and overflow flags -+ */ ++ struct device_node *node = mali_platform_device->dev.of_node; ++ u32 switch_delay; + -+ u32 mask = (1 << 0) | /* enable all three counters */ -+ (0 << 1) | /* reset both Count Registers to 0x0 */ -+ (1 << 2) | /* reset the Cycle Counter Register to 0x0 */ -+ (0 << 3) | /* 1 = Cycle Counter Register counts every 64th processor clock cycle */ -+ (0 << 4) | /* Count Register 0 interrupt enable */ -+ (0 << 5) | /* Count Register 1 interrupt enable */ -+ (0 << 6) | /* Cycle Counter interrupt enable */ -+ (0 << 8) | /* Count Register 0 overflow flag (clear or write, flag on read) */ -+ (0 << 9) | /* Count Register 1 overflow flag (clear or write, flag on read) */ -+ (1 << 10); /* Cycle Counter Register overflow flag (clear or write, flag on read) */ ++ MALI_DEBUG_ASSERT(NULL != node); + -+ __asm__ __volatile__("MCR p15, 0, %0, c15, c12, 0" : : "r"(mask)); ++ if (0 == of_property_read_u32(node, "pmu_switch_delay", &switch_delay)) { ++ return switch_delay; ++ } else { ++ MALI_DEBUG_PRINT(2, ("Couldn't find pmu_switch_delay in device tree configuration.\n")); ++ } + -+ return _MALI_OSK_ERR_OK; ++ return 0; +} + -+MALI_STATIC_INLINE u64 _mali_timestamp_get(void) -+{ -+ u32 result; ++#else /* CONFIG_MALI_DT */ /* 若未 定义 CONFIG_MALI_DT. */ + -+ /* this is for the clock cycles */ -+ __asm__ __volatile__("MRC p15, 0, %0, c15, c12, 1" : "=r"(result)); ++_mali_osk_errcode_t _mali_osk_resource_find(u32 addr, _mali_osk_resource_t *res) ++{ ++ int i; ++ uintptr_t phys_addr; + -+ return (u64)result; -+} ++ if (NULL == mali_platform_device) { ++ /* Not connected to a device */ ++ return _MALI_OSK_ERR_ITEM_NOT_FOUND; ++ } + -+#endif /* __MALI_TIMESTAMP_H__ */ -diff --git a/drivers/gpu/arm/mali400/mali/timestamp-default/mali_timestamp.c b/drivers/gpu/arm/mali400/mali/timestamp-default/mali_timestamp.c -new file mode 100644 -index 000000000..7df934c12 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/timestamp-default/mali_timestamp.c -@@ -0,0 +1,13 @@ -+/* -+ * Copyright (C) 2010-2011, 2013, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ phys_addr = addr + _mali_osk_resource_base_address(); ++ for (i = 0; i < mali_platform_device->num_resources; i++) { ++ if (IORESOURCE_MEM == resource_type(&(mali_platform_device->resource[i])) && ++ mali_platform_device->resource[i].start == phys_addr) { ++ if (NULL != res) { ++ res->base = phys_addr; ++ res->description = mali_platform_device->resource[i].name; + -+#include "mali_timestamp.h" ++ /* Any (optional) IRQ resource belonging to this resource will follow */ ++ if ((i + 1) < mali_platform_device->num_resources && ++ IORESOURCE_IRQ == resource_type(&(mali_platform_device->resource[i + 1]))) { ++ res->irq = mali_platform_device->resource[i + 1].start; ++ } else { ++ res->irq = -1; ++ } ++ } ++ return _MALI_OSK_ERR_OK; ++ } ++ } + -+/* This file is intentionally left empty, as all functions are inlined in mali_profiling_sampler.h */ -diff --git a/drivers/gpu/arm/mali400/mali/timestamp-default/mali_timestamp.h b/drivers/gpu/arm/mali400/mali/timestamp-default/mali_timestamp.h -new file mode 100644 -index 000000000..709a16a82 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/mali/timestamp-default/mali_timestamp.h -@@ -0,0 +1,26 @@ -+/* -+ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ return _MALI_OSK_ERR_ITEM_NOT_FOUND; ++} + -+#ifndef __MALI_TIMESTAMP_H__ -+#define __MALI_TIMESTAMP_H__ ++uintptr_t _mali_osk_resource_base_address(void) ++{ ++ uintptr_t lowest_addr = (uintptr_t)(0 - 1); ++ uintptr_t ret = 0; + -+#include "mali_osk.h" ++ if (NULL != mali_platform_device) { ++ int i; ++ for (i = 0; i < mali_platform_device->num_resources; i++) { ++ if (mali_platform_device->resource[i].flags & IORESOURCE_MEM && ++ mali_platform_device->resource[i].start < lowest_addr) { ++ lowest_addr = mali_platform_device->resource[i].start; ++ ret = lowest_addr; ++ } ++ } ++ } + -+MALI_STATIC_INLINE _mali_osk_errcode_t _mali_timestamp_reset(void) -+{ -+ return _MALI_OSK_ERR_OK; ++ return ret; +} + -+MALI_STATIC_INLINE u64 _mali_timestamp_get(void) ++void _mali_osk_device_data_pmu_config_get(u16 *domain_config_array, int array_size) +{ -+ return _mali_osk_boot_time_get_ns(); ++ _mali_osk_device_data data = { 0, }; ++ ++ MALI_DEBUG_PRINT(2, ("Get pmu config from platform device data.\n")); ++ if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) { ++ /* Copy the custom customer power domain config */ ++ _mali_osk_memcpy(domain_config_array, data.pmu_domain_config, sizeof(data.pmu_domain_config)); ++ } ++ ++ return; +} + -+#endif /* __MALI_TIMESTAMP_H__ */ -diff --git a/drivers/gpu/arm/mali400/rk_ver_info.txt b/drivers/gpu/arm/mali400/rk_ver_info.txt -new file mode 100755 -index 000000000..2a6cbbbb5 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/rk_ver_info.txt -@@ -0,0 +1,11 @@ ++u32 _mali_osk_get_pmu_switch_delay(void) ++{ ++ _mali_osk_errcode_t err; ++ _mali_osk_device_data data = { 0, }; + -+r5p0-01rel0-1-x@0 -+ 对 arm_release_ver r5p0-01rel0 的定制集æˆ. -+ r5p0-01rel0 对 gpu çš„ dts 有大修改, 但这里出于兼容考虑, 仿—§ä½¿ç”¨ dts_for_mali_ko_befor_r5p0-01rel0. ++ err = _mali_osk_device_data_get(&data); + -+r5p0-01rel0-2-x@0 -+ æ”¯æŒ mali_so æ¥èŽ·å– rk_ko_ver. -+ -+r5p0-01rel0-3-x@0 -+ 在 mali_control_timer_callback_chain 中使用 mod_timer, 而ä¸å†æ˜¯ add_timer. ++ if (_MALI_OSK_ERR_OK == err) { ++ return data.pmu_switch_delay; ++ } + -diff --git a/drivers/gpu/arm/mali400/ump/Kbuild b/drivers/gpu/arm/mali400/ump/Kbuild -new file mode 100755 -index 000000000..a3067ba72 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/Kbuild -@@ -0,0 +1,92 @@ -+# -+# Copyright (C) 2010-2012 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained from Free Software -+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+# ++ return 0; ++} ++#endif /* CONFIG_MALI_DT */ + -+# Set default configuration to use, if Makefile didn't provide one. -+# Change this to use a different config.h -+CONFIG ?= default ++_mali_osk_errcode_t _mali_osk_device_data_get(_mali_osk_device_data *data) ++{ ++ MALI_DEBUG_ASSERT_POINTER(data); + -+# Link arch to the selected arch-config directory -+$(shell [ -L $(src)/arch ] && rm $(src)/arch) -+$(shell ln -sf arch-$(CONFIG) $(src)/arch) -+$(shell touch $(src)/arch/config.h) ++ if (NULL != mali_platform_device) { ++ struct mali_gpu_device_data *os_data = NULL; + -+UDD_FILE_PREFIX = ../mali/ ++ os_data = (struct mali_gpu_device_data *)mali_platform_device->dev.platform_data; ++ if (NULL != os_data) { ++ /* Copy data from OS dependant struct to Mali neutral struct (identical!) */ ++ BUILD_BUG_ON(sizeof(*os_data) != sizeof(*data)); ++ _mali_osk_memcpy(data, os_data, sizeof(*os_data)); + -+# Get subversion revision number, fall back to 0000 if no svn info is available -+SVN_INFO = (cd $(src); svn info 2>/dev/null) ++ return _MALI_OSK_ERR_OK; ++ } ++ } + -+ifneq ($(shell $(SVN_INFO) 2>/dev/null),) -+# SVN detected -+SVN_REV := $(shell $(SVN_INFO) | grep '^Revision: '| sed -e 's/^Revision: //' 2>/dev/null) -+DRIVER_REV := $(MALI_RELEASE_NAME)-r$(SVN_REV) -+CHANGE_DATE := $(shell $(SVN_INFO) | grep '^Last Changed Date: ' | cut -d: -f2- | cut -b2-) -+CHANGED_REVISION := $(shell $(SVN_INFO) | grep '^Last Changed Rev: ' | cut -d: -f2- | cut -b2-) -+REPO_URL := $(shell $(SVN_INFO) | grep '^URL: ' | cut -d: -f2- | cut -b2-) ++ return _MALI_OSK_ERR_ITEM_NOT_FOUND; ++} + -+else # SVN -+GIT_REV := $(shell cd $(src); git describe --always 2>/dev/null) -+ifneq ($(GIT_REV),) -+# Git detected -+DRIVER_REV := $(MALI_RELEASE_NAME)-$(GIT_REV) -+CHANGE_DATE := $(shell cd $(src); git log -1 --format="%ci") -+CHANGED_REVISION := $(GIT_REV) -+REPO_URL := $(shell cd $(src); git describe --all --always 2>/dev/null) ++u32 _mali_osk_identify_gpu_resource(void) ++{ ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_L2_RESOURCE1, NULL)) ++ /* Mali 450 */ ++ return 0x450; + -+else # Git -+# No Git or SVN detected -+DRIVER_REV := $(MALI_RELEASE_NAME) -+CHANGE_DATE := $(MALI_RELEASE_NAME) -+CHANGED_REVISION := $(MALI_RELEASE_NAME) -+endif -+endif ++ if (_MALI_OSK_ERR_OK == _mali_osk_resource_find(MALI_OFFSET_DLBU, NULL)) ++ /* Mali 470 */ ++ return 0x470; + -+ccflags-y += -DSVN_REV=$(SVN_REV) -+ccflags-y += -DSVN_REV_STRING=\"$(DRIVER_REV)\" ++ /* Mali 400 */ ++ return 0x400; ++} + -+ccflags-y += -I$(src) -I$(src)/common -I$(src)/linux -I$(src)/../mali/common -I$(src)/../mali/linux -I$(src)/include -I$(src)/../../ump/include/ump -+ccflags-y += -DMALI_STATE_TRACKING=0 -+ccflags-y += -DMALI_ENABLE_CPU_CYCLES=0 -+ccflags-$(CONFIG_UMP_DEBUG) += -DDEBUG ++mali_bool _mali_osk_shared_interrupts(void) ++{ ++ u32 irqs[128]; ++ u32 i, j, irq, num_irqs_found = 0; + -+# For customer releases the Linux Device Drivers will be provided as ARM proprietary and GPL releases: -+# The ARM proprietary product will only include the license/proprietary directory -+# The GPL product will only include the license/gpl directory ++ MALI_DEBUG_ASSERT_POINTER(mali_platform_device); ++ MALI_DEBUG_ASSERT(128 >= mali_platform_device->num_resources); + -+ifeq ($(wildcard $(src)/linux/license/gpl/*),) -+ccflags-y += -I$(src)/linux/license/proprietary -I$(src)/../mali/linux/license/proprietary -+else -+ccflags-y += -I$(src)/linux/license/gpl -I$(src)/../mali/linux/license/gpl -+endif ++ for (i = 0; i < mali_platform_device->num_resources; i++) { ++ if (IORESOURCE_IRQ & mali_platform_device->resource[i].flags) { ++ irq = mali_platform_device->resource[i].start; + -+ump-y = common/ump_kernel_common.o \ -+ common/ump_kernel_descriptor_mapping.o \ -+ common/ump_kernel_api.o \ -+ common/ump_kernel_ref_drv.o \ -+ linux/ump_kernel_linux.o \ -+ linux/ump_kernel_memory_backend_os.o \ -+ linux/ump_kernel_memory_backend_dedicated.o \ -+ linux/ump_memory_backend.o \ -+ linux/ump_ukk_wrappers.o \ -+ linux/ump_ukk_ref_wrappers.o \ -+ linux/ump_osk_atomics.o \ -+ linux/ump_osk_low_level_mem.o \ -+ linux/ump_osk_misc.o \ -+ linux/ump_kernel_random_mapping.o ++ for (j = 0; j < num_irqs_found; ++j) { ++ if (irq == irqs[j]) { ++ return MALI_TRUE; ++ } ++ } + -+ifneq ($(CONFIG_MALI400),y) -+ump-y += $(UDD_FILE_PREFIX)linux/mali_osk_atomics.o \ -+ $(UDD_FILE_PREFIX)linux/mali_osk_locks.o \ -+ $(UDD_FILE_PREFIX)linux/mali_osk_memory.o \ -+ $(UDD_FILE_PREFIX)linux/mali_osk_math.o \ -+ $(UDD_FILE_PREFIX)linux/mali_osk_misc.o -+endif ++ irqs[num_irqs_found++] = irq; ++ } ++ } + -+obj-$(CONFIG_UMP) := ump.o ++ return MALI_FALSE; ++} + -diff --git a/drivers/gpu/arm/mali400/ump/Kconfig b/drivers/gpu/arm/mali400/ump/Kconfig -new file mode 100644 -index 000000000..ec3509057 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/Kconfig -@@ -0,0 +1,17 @@ -+# SPDX-License-Identifier: GPL-2.0 -+config UMP -+ tristate "UMP support" -+ depends on ARM -+ help -+ This enables support for the UMP memory allocation and sharing API. ++_mali_osk_errcode_t _mali_osk_gpu_secure_mode_init(void) ++{ ++ _mali_osk_device_data data = { 0, }; + -+ To compile this driver as a module, choose M here: the module will be -+ called ump. ++ if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) { ++ if ((NULL != data.secure_mode_init) && (NULL != data.secure_mode_deinit) ++ && (NULL != data.gpu_reset_and_secure_mode_enable) && (NULL != data.gpu_reset_and_secure_mode_disable)) { ++ int err = data.secure_mode_init(); ++ if (err) { ++ MALI_DEBUG_PRINT(1, ("Failed to init gpu secure mode.\n")); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+config UMP_DEBUG -+ bool "Enable extra debug in UMP" -+ depends on UMP -+ default y -+ help -+ This enabled extra debug checks and messages in UMP. ++ mali_secure_mode_deinit = data.secure_mode_deinit; ++ mali_gpu_reset_and_secure_mode_enable = data.gpu_reset_and_secure_mode_enable; ++ mali_gpu_reset_and_secure_mode_disable = data.gpu_reset_and_secure_mode_disable; + -diff --git a/drivers/gpu/arm/mali400/ump/Makefile b/drivers/gpu/arm/mali400/ump/Makefile -new file mode 100644 -index 000000000..88b02a22f ---- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/Makefile -@@ -0,0 +1,67 @@ -+# -+# Copyright (C) 2010-2012, 2014, 2016-2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained from Free Software -+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+# ++ mali_secure_mode_supported = MALI_TRUE; ++ mali_secure_mode_enabled = MALI_FALSE; ++ return _MALI_OSK_ERR_OK; ++ } ++ } ++ MALI_DEBUG_PRINT(3, ("GPU secure mode not supported.\n")); ++ return _MALI_OSK_ERR_UNSUPPORTED; + -+# For each arch check: CROSS_COMPILE , KDIR , CFLAGS += -DARCH ++} + -+export ARCH ?= arm -+BUILD ?= debug ++_mali_osk_errcode_t _mali_osk_gpu_secure_mode_deinit(void) ++{ ++ if (NULL != mali_secure_mode_deinit) { ++ mali_secure_mode_deinit(); ++ mali_secure_mode_enabled = MALI_FALSE; ++ mali_secure_mode_supported = MALI_FALSE; ++ return _MALI_OSK_ERR_OK; ++ } ++ MALI_DEBUG_PRINT(3, ("GPU secure mode not supported.\n")); ++ return _MALI_OSK_ERR_UNSUPPORTED; + -+check_cc2 = \ -+ $(shell if $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; \ -+ then \ -+ echo "$(2)"; \ -+ else \ -+ echo "$(3)"; \ -+ fi ;) ++} + -+# Check that required parameters are supplied. -+ifeq ($(CONFIG),) -+CONFIG := default -+endif -+ifeq ($(CPU)$(KDIR),) -+$(error "KDIR or CPU must be specified.") -+endif + -+# Get any user defined KDIR- or maybe even a hardcoded KDIR -+-include KDIR_CONFIGURATION ++_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_enable(void) ++{ ++ /* the mali executor lock must be held before enter this function. */ + -+# Define host system directory -+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build ++ MALI_DEBUG_ASSERT(MALI_FALSE == mali_secure_mode_enabled); + -+ifeq ($(ARCH), arm) -+# when compiling for ARM we're cross compiling -+export CROSS_COMPILE ?= $(call check_cc2, arm-linux-gnueabi-gcc, arm-linux-gnueabi-, arm-none-linux-gnueabi-) -+endif ++ if (NULL != mali_gpu_reset_and_secure_mode_enable) { ++ if (mali_gpu_reset_and_secure_mode_enable()) { ++ MALI_DEBUG_PRINT(1, ("Failed to reset GPU or enable gpu secure mode.\n")); ++ return _MALI_OSK_ERR_FAULT; ++ } ++ mali_secure_mode_enabled = MALI_TRUE; ++ return _MALI_OSK_ERR_OK; ++ } ++ MALI_DEBUG_PRINT(1, ("GPU secure mode not supported.\n")); ++ return _MALI_OSK_ERR_UNSUPPORTED; ++} + -+# look up KDIR based om CPU selection -+KDIR ?= $(KDIR-$(CPU)) ++_mali_osk_errcode_t _mali_osk_gpu_reset_and_secure_mode_disable(void) ++{ ++ /* the mali executor lock must be held before enter this function. */ + -+export CONFIG ++ MALI_DEBUG_ASSERT(MALI_TRUE == mali_secure_mode_enabled); + -+export CONFIG_UMP := m -+ifeq ($(BUILD),debug) -+export CONFIG_UMP_DEBUG := y -+else -+export CONFIG_UMP_DEBUG := n -+endif ++ if (NULL != mali_gpu_reset_and_secure_mode_disable) { ++ if (mali_gpu_reset_and_secure_mode_disable()) { ++ MALI_DEBUG_PRINT(1, ("Failed to reset GPU or disable gpu secure mode.\n")); ++ return _MALI_OSK_ERR_FAULT; ++ } ++ mali_secure_mode_enabled = MALI_FALSE; + -+ifeq ($(KDIR),) -+$(error No KDIR found for platform $(CPU)) -+endif ++ return _MALI_OSK_ERR_OK; + -+all: -+ $(MAKE) -C $(KDIR) M=$(CURDIR) modules ++ } ++ MALI_DEBUG_PRINT(1, ("GPU secure mode not supported.\n")); ++ return _MALI_OSK_ERR_UNSUPPORTED; + -+kernelrelease: -+ $(MAKE) -C $(KDIR) kernelrelease ++} + -+clean: -+ $(MAKE) -C $(KDIR) M=$(CURDIR) clean -+ $(MAKE) -C $(KDIR) M=$(CURDIR)/../mali clean -diff --git a/drivers/gpu/arm/mali400/ump/Makefile.common b/drivers/gpu/arm/mali400/ump/Makefile.common -new file mode 100755 -index 000000000..ad2c18da9 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/Makefile.common -@@ -0,0 +1,20 @@ -+# -+# Copyright (C) 2010-2011, 2013, 2016-2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained from Free Software -+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+# ++mali_bool _mali_osk_gpu_secure_mode_is_enabled(void) ++{ ++ return mali_secure_mode_enabled; ++} + -+SRC = $(UMP_FILE_PREFIX)common/ump_kernel_common.c \ -+ $(UMP_FILE_PREFIX)common/ump_kernel_descriptor_mapping.c \ -+ $(UMP_FILE_PREFIX)common/ump_kernel_api.c \ -+ $(UMP_FILE_PREFIX)common/ump_kernel_ref_drv.c ++mali_bool _mali_osk_gpu_secure_mode_is_supported(void) ++{ ++ return mali_secure_mode_supported; ++} + -+# Get subversion revision number, fall back to 0000 if no svn info is available -+SVN_REV:=$(shell ((svnversion | grep -qv exported && echo -n 'Revision: ' && svnversion) || git svn info | sed -e 's/$$$$/M/' | grep '^Revision: ' || echo ${MALI_RELEASE_NAME}) 2>/dev/null | sed -e 's/^Revision: //') + -+EXTRA_CFLAGS += -DSVN_REV=$(SVN_REV) -+EXTRA_CFLAGS += -DSVN_REV_STRING=\"$(SVN_REV)\" -diff --git a/drivers/gpu/arm/mali400/ump/arch-default/config.h b/drivers/gpu/arm/mali400/ump/arch-default/config.h +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_math.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_math.c new file mode 100644 -index 000000000..d4aef9dd0 +index 000000000..0b2d00762 --- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/arch-default/config.h -@@ -0,0 +1,24 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_math.c +@@ -0,0 +1,27 @@ +/* -+ * Copyright (C) 2010, 2012, 2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010, 2013-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -327105,28 +328787,31 @@ index 000000000..d4aef9dd0 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __ARCH_CONFIG_H__ -+#define __ARCH_CONFIG_H__ -+ -+/* Use OS memory. */ -+#define ARCH_UMP_BACKEND_DEFAULT 1 -+ -+/* OS memory won't need a base address. */ -+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT 0x00000000 ++/** ++ * @file mali_osk_math.c ++ * Implementation of the OS abstraction layer for the kernel device driver ++ */ + -+/* 512 MB maximum limit for UMP allocations. */ -+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 512UL * 1024UL * 1024UL ++#include "mali_osk.h" ++#include + ++u32 _mali_osk_clz(u32 input) ++{ ++ return 32 - fls(input); ++} + -+#endif /* __ARCH_CONFIG_H__ */ -diff --git a/drivers/gpu/arm/mali400/ump/arch-pb-virtex5/config.h b/drivers/gpu/arm/mali400/ump/arch-pb-virtex5/config.h ++u32 _mali_osk_fls(u32 input) ++{ ++ return fls(input); ++} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_memory.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_memory.c new file mode 100644 -index 000000000..182e90c1d +index 000000000..174616b56 --- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/arch-pb-virtex5/config.h -@@ -0,0 +1,18 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_memory.c +@@ -0,0 +1,61 @@ +/* -+ * Copyright (C) 2010-2013, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -327135,22 +328820,65 @@ index 000000000..182e90c1d + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __ARCH_CONFIG_H__ -+#define __ARCH_CONFIG_H__ ++/** ++ * @file mali_osk_memory.c ++ * Implementation of the OS abstraction layer for the kernel device driver ++ */ + -+#define ARCH_UMP_BACKEND_DEFAULT 0 -+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT 0xE1000000 -+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 16UL * 1024UL * 1024UL ++#include "mali_osk.h" ++#include ++#include + -+#endif /* __ARCH_CONFIG_H__ */ -diff --git a/drivers/gpu/arm/mali400/ump/arch/config.h b/drivers/gpu/arm/mali400/ump/arch/config.h ++void inline *_mali_osk_calloc(u32 n, u32 size) ++{ ++ return kcalloc(n, size, GFP_KERNEL); ++} ++ ++void inline *_mali_osk_malloc(u32 size) ++{ ++ return kmalloc(size, GFP_KERNEL); ++} ++ ++void inline _mali_osk_free(void *ptr) ++{ ++ kfree(ptr); ++} ++ ++void inline *_mali_osk_valloc(u32 size) ++{ ++ return vmalloc(size); ++} ++ ++void inline _mali_osk_vfree(void *ptr) ++{ ++ vfree(ptr); ++} ++ ++void inline *_mali_osk_memcpy(void *dst, const void *src, u32 len) ++{ ++ return memcpy(dst, src, len); ++} ++ ++void inline *_mali_osk_memset(void *s, u32 c, u32 n) ++{ ++ return memset(s, c, n); ++} ++ ++mali_bool _mali_osk_mem_check_allocated(u32 max_allocated) ++{ ++ /* No need to prevent an out-of-memory dialogue appearing on Linux, ++ * so we always return MALI_TRUE. ++ */ ++ return MALI_TRUE; ++} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_misc.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_misc.c new file mode 100644 -index 000000000..d4aef9dd0 +index 000000000..9845187f8 --- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/arch/config.h -@@ -0,0 +1,24 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_misc.c +@@ -0,0 +1,81 @@ +/* -+ * Copyright (C) 2010, 2012, 2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -327159,26 +328887,83 @@ index 000000000..d4aef9dd0 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __ARCH_CONFIG_H__ -+#define __ARCH_CONFIG_H__ ++/** ++ * @file mali_osk_misc.c ++ * Implementation of the OS abstraction layer for the kernel device driver ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "mali_osk.h" + -+/* Use OS memory. */ -+#define ARCH_UMP_BACKEND_DEFAULT 1 ++#if !defined(CONFIG_MALI_QUIET) ++void _mali_osk_dbgmsg(const char *fmt, ...) ++{ ++ va_list args; ++ va_start(args, fmt); ++ vprintk(fmt, args); ++ va_end(args); ++} ++#endif /* !defined(CONFIG_MALI_QUIET) */ + -+/* OS memory won't need a base address. */ -+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT 0x00000000 ++u32 _mali_osk_snprintf(char *buf, u32 size, const char *fmt, ...) ++{ ++ int res; ++ va_list args; ++ va_start(args, fmt); + -+/* 512 MB maximum limit for UMP allocations. */ -+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 512UL * 1024UL * 1024UL ++ res = vscnprintf(buf, (size_t)size, fmt, args); + ++ va_end(args); ++ return res; ++} + -+#endif /* __ARCH_CONFIG_H__ */ -diff --git a/drivers/gpu/arm/mali400/ump/common/ump_kernel_api.c b/drivers/gpu/arm/mali400/ump/common/ump_kernel_api.c ++void _mali_osk_abort(void) ++{ ++ /* make a simple fault by dereferencing a NULL pointer */ ++ dump_stack(); ++ *(volatile int *)0 = 0; ++} ++ ++void _mali_osk_break(void) ++{ ++ _mali_osk_abort(); ++} ++ ++u32 _mali_osk_get_pid(void) ++{ ++ /* Thread group ID is the process ID on Linux */ ++ return (u32)current->tgid; ++} ++ ++char *_mali_osk_get_comm(void) ++{ ++ return (char *)current->comm; ++} ++ ++ ++u32 _mali_osk_get_tid(void) ++{ ++ /* pid is actually identifying the thread on Linux */ ++ u32 tid = current->pid; ++ ++ /* If the pid is 0 the core was idle. Instead of returning 0 we return a special number ++ * identifying which core we are on. */ ++ if (0 == tid) { ++ tid = -(1 + raw_smp_processor_id()); ++ } ++ ++ return tid; ++} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_notification.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_notification.c new file mode 100644 -index 000000000..36adb2f53 +index 000000000..a05f8f066 --- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/common/ump_kernel_api.c -@@ -0,0 +1,455 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_notification.c +@@ -0,0 +1,182 @@ +/* + * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. + * @@ -327189,1115 +328974,1643 @@ index 000000000..36adb2f53 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#include "mali_osk.h" -+#include "mali_osk_list.h" -+#include "ump_osk.h" -+#include "ump_uk_types.h" -+#include "ump_kernel_interface.h" -+#include "ump_kernel_common.h" -+#include "ump_kernel_random_mapping.h" -+ ++/** ++ * @file mali_osk_notification.c ++ * Implementation of the OS abstraction layer for the kernel device driver ++ */ + ++#include "mali_osk.h" ++#include "mali_kernel_common.h" + -+/* ---------------- UMP kernel space API functions follows ---------------- */ ++#include ++#include ++#include + ++/** ++ * Declaration of the notification queue object type ++ * Contains a linked list of notification pending delivery to user space. ++ * It also contains a wait queue of exclusive waiters blocked in the ioctl ++ * When a new notification is posted a single thread is resumed. ++ */ ++struct _mali_osk_notification_queue_t_struct { ++ spinlock_t mutex; /**< Mutex protecting the list */ ++ wait_queue_head_t receive_queue; /**< Threads waiting for new entries to the queue */ ++ struct list_head head; /**< List of notifications waiting to be picked up */ ++}; + ++typedef struct _mali_osk_notification_wrapper_t_struct { ++ struct list_head list; /**< Internal linked list variable */ ++ _mali_osk_notification_t data; /**< Notification data */ ++} _mali_osk_notification_wrapper_t; + -+UMP_KERNEL_API_EXPORT ump_secure_id ump_dd_secure_id_get(ump_dd_handle memh) ++_mali_osk_notification_queue_t *_mali_osk_notification_queue_init(void) +{ -+ ump_dd_mem *mem = (ump_dd_mem *)memh; ++ _mali_osk_notification_queue_t *result; + -+ DEBUG_ASSERT_POINTER(mem); ++ result = (_mali_osk_notification_queue_t *)kmalloc(sizeof(_mali_osk_notification_queue_t), GFP_KERNEL); ++ if (NULL == result) return NULL; + -+ DBG_MSG(5, ("Returning secure ID. ID: %u\n", mem->secure_id)); ++ spin_lock_init(&result->mutex); ++ init_waitqueue_head(&result->receive_queue); ++ INIT_LIST_HEAD(&result->head); + -+ return mem->secure_id; ++ return result; +} + -+ -+ -+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id) ++_mali_osk_notification_t *_mali_osk_notification_create(u32 type, u32 size) +{ -+ ump_dd_mem *mem; ++ /* OPT Recycling of notification objects */ ++ _mali_osk_notification_wrapper_t *notification; + -+ DBG_MSG(5, ("Getting handle from secure ID. ID: %u\n", secure_id)); -+ mem = ump_random_mapping_get(device.secure_id_map, (int)secure_id); -+ if (NULL == mem) { -+ DBG_MSG(1, ("Secure ID not found. ID: %u\n", secure_id)); -+ return UMP_DD_HANDLE_INVALID; ++ notification = (_mali_osk_notification_wrapper_t *)kmalloc(sizeof(_mali_osk_notification_wrapper_t) + size, ++ GFP_KERNEL | __GFP_HIGH | __GFP_RETRY_MAYFAIL); ++ if (NULL == notification) { ++ MALI_DEBUG_PRINT(1, ("Failed to create a notification object\n")); ++ return NULL; + } + -+ /* Keep the reference taken in ump_random_mapping_get() */ ++ /* Init the list */ ++ INIT_LIST_HEAD(¬ification->list); + -+ return (ump_dd_handle)mem; -+} ++ if (0 != size) { ++ notification->data.result_buffer = ((u8 *)notification) + sizeof(_mali_osk_notification_wrapper_t); ++ } else { ++ notification->data.result_buffer = NULL; ++ } + ++ /* set up the non-allocating fields */ ++ notification->data.notification_type = type; ++ notification->data.result_buffer_size = size; + ++ /* all ok */ ++ return &(notification->data); ++} + -+UMP_KERNEL_API_EXPORT unsigned long ump_dd_phys_block_count_get(ump_dd_handle memh) ++void _mali_osk_notification_delete(_mali_osk_notification_t *object) +{ -+ ump_dd_mem *mem = (ump_dd_mem *) memh; ++ _mali_osk_notification_wrapper_t *notification; ++ MALI_DEBUG_ASSERT_POINTER(object); + -+ DEBUG_ASSERT_POINTER(mem); ++ notification = container_of(object, _mali_osk_notification_wrapper_t, data); + -+ return mem->nr_blocks; ++ /* Free the container */ ++ kfree(notification); +} + -+ -+ -+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle memh, ump_dd_physical_block *blocks, unsigned long num_blocks) ++void _mali_osk_notification_queue_term(_mali_osk_notification_queue_t *queue) +{ -+ ump_dd_mem *mem = (ump_dd_mem *)memh; ++ _mali_osk_notification_t *result; ++ MALI_DEBUG_ASSERT_POINTER(queue); + -+ DEBUG_ASSERT_POINTER(mem); ++ while (_MALI_OSK_ERR_OK == _mali_osk_notification_queue_dequeue(queue, &result)) { ++ _mali_osk_notification_delete(result); ++ } + -+ if (blocks == NULL) { -+ DBG_MSG(1, ("NULL parameter in ump_dd_phys_blocks_get()\n")); -+ return UMP_DD_INVALID; -+ } ++ /* not much to do, just free the memory */ ++ kfree(queue); ++} ++void _mali_osk_notification_queue_send(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t *object) ++{ ++#if defined(MALI_UPPER_HALF_SCHEDULING) ++ unsigned long irq_flags; ++#endif + -+ if (mem->nr_blocks != num_blocks) { -+ DBG_MSG(1, ("Specified number of blocks do not match actual number of blocks\n")); -+ return UMP_DD_INVALID; -+ } ++ _mali_osk_notification_wrapper_t *notification; ++ MALI_DEBUG_ASSERT_POINTER(queue); ++ MALI_DEBUG_ASSERT_POINTER(object); + -+ DBG_MSG(5, ("Returning physical block information. ID: %u\n", mem->secure_id)); ++ notification = container_of(object, _mali_osk_notification_wrapper_t, data); + -+ _mali_osk_memcpy(blocks, mem->block_array, sizeof(ump_dd_physical_block) * mem->nr_blocks); ++#if defined(MALI_UPPER_HALF_SCHEDULING) ++ spin_lock_irqsave(&queue->mutex, irq_flags); ++#else ++ spin_lock(&queue->mutex); ++#endif + -+ return UMP_DD_SUCCESS; -+} ++ list_add_tail(¬ification->list, &queue->head); + ++#if defined(MALI_UPPER_HALF_SCHEDULING) ++ spin_unlock_irqrestore(&queue->mutex, irq_flags); ++#else ++ spin_unlock(&queue->mutex); ++#endif + ++ /* and wake up one possible exclusive waiter */ ++ wake_up(&queue->receive_queue); ++} + -+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle memh, unsigned long index, ump_dd_physical_block *block) ++_mali_osk_errcode_t _mali_osk_notification_queue_dequeue(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result) +{ -+ ump_dd_mem *mem = (ump_dd_mem *)memh; ++#if defined(MALI_UPPER_HALF_SCHEDULING) ++ unsigned long irq_flags; ++#endif + -+ DEBUG_ASSERT_POINTER(mem); ++ _mali_osk_errcode_t ret = _MALI_OSK_ERR_ITEM_NOT_FOUND; ++ _mali_osk_notification_wrapper_t *wrapper_object; + -+ if (block == NULL) { -+ DBG_MSG(1, ("NULL parameter in ump_dd_phys_block_get()\n")); -+ return UMP_DD_INVALID; -+ } ++#if defined(MALI_UPPER_HALF_SCHEDULING) ++ spin_lock_irqsave(&queue->mutex, irq_flags); ++#else ++ spin_lock(&queue->mutex); ++#endif + -+ if (index >= mem->nr_blocks) { -+ DBG_MSG(5, ("Invalid index specified in ump_dd_phys_block_get()\n")); -+ return UMP_DD_INVALID; ++ if (!list_empty(&queue->head)) { ++ wrapper_object = list_entry(queue->head.next, _mali_osk_notification_wrapper_t, list); ++ *result = &(wrapper_object->data); ++ list_del_init(&wrapper_object->list); ++ ret = _MALI_OSK_ERR_OK; + } + -+ DBG_MSG(5, ("Returning physical block information. ID: %u, index: %lu\n", mem->secure_id, index)); -+ -+ *block = mem->block_array[index]; ++#if defined(MALI_UPPER_HALF_SCHEDULING) ++ spin_unlock_irqrestore(&queue->mutex, irq_flags); ++#else ++ spin_unlock(&queue->mutex); ++#endif + -+ return UMP_DD_SUCCESS; ++ return ret; +} + -+ -+ -+UMP_KERNEL_API_EXPORT unsigned long ump_dd_size_get(ump_dd_handle memh) ++_mali_osk_errcode_t _mali_osk_notification_queue_receive(_mali_osk_notification_queue_t *queue, _mali_osk_notification_t **result) +{ -+ ump_dd_mem *mem = (ump_dd_mem *)memh; ++ /* check input */ ++ MALI_DEBUG_ASSERT_POINTER(queue); ++ MALI_DEBUG_ASSERT_POINTER(result); + -+ DEBUG_ASSERT_POINTER(mem); ++ /* default result */ ++ *result = NULL; + -+ DBG_MSG(5, ("Returning size. ID: %u, size: %lu\n", mem->secure_id, mem->size_bytes)); ++ if (wait_event_interruptible(queue->receive_queue, ++ _MALI_OSK_ERR_OK == _mali_osk_notification_queue_dequeue(queue, result))) { ++ return _MALI_OSK_ERR_RESTARTSYSCALL; ++ } + -+ return mem->size_bytes; ++ return _MALI_OSK_ERR_OK; /* all ok */ +} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_pm.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_pm.c +new file mode 100644 +index 000000000..e28e2eb21 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_pm.c +@@ -0,0 +1,83 @@ ++/** ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + ++/** ++ * @file mali_osk_pm.c ++ * Implementation of the callback functions from common power management ++ */ + ++#include + -+UMP_KERNEL_API_EXPORT void ump_dd_reference_add(ump_dd_handle memh) -+{ -+ ump_dd_mem *mem = (ump_dd_mem *)memh; -+ int new_ref; -+ -+ DEBUG_ASSERT_POINTER(mem); -+ -+ new_ref = _ump_osk_atomic_inc_and_read(&mem->ref_count); ++#include "mali_kernel_linux.h" ++#ifdef CONFIG_PM_RUNTIME ++#include ++#endif /* CONFIG_PM_RUNTIME */ ++#include ++#include ++#include "mali_osk.h" ++#include "mali_kernel_common.h" + -+ DBG_MSG(5, ("Memory reference incremented. ID: %u, new value: %d\n", mem->secure_id, new_ref)); ++/* Can NOT run in atomic context */ ++_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_sync(void) ++{ ++#ifdef CONFIG_PM_RUNTIME ++ int err; ++ MALI_DEBUG_ASSERT_POINTER(mali_platform_device); ++ err = pm_runtime_get_sync(&(mali_platform_device->dev)); ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)) ++ pm_runtime_mark_last_busy(&(mali_platform_device->dev)); ++#endif ++ if (0 > err) { ++ MALI_PRINT_ERROR(("Mali OSK PM: pm_runtime_get_sync() returned error code %d\n", err)); ++ return _MALI_OSK_ERR_FAULT; ++ } ++#endif ++ return _MALI_OSK_ERR_OK; +} + -+ -+ -+UMP_KERNEL_API_EXPORT void ump_dd_reference_release(ump_dd_handle memh) ++/* Can run in atomic context */ ++_mali_osk_errcode_t _mali_osk_pm_dev_ref_get_async(void) +{ -+ ump_dd_mem *mem = (ump_dd_mem *)memh; ++#ifdef CONFIG_PM_RUNTIME ++ int err; ++ MALI_DEBUG_ASSERT_POINTER(mali_platform_device); ++ err = pm_runtime_get(&(mali_platform_device->dev)); ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)) ++ pm_runtime_mark_last_busy(&(mali_platform_device->dev)); ++#endif ++ if (0 > err && -EINPROGRESS != err) { ++ MALI_PRINT_ERROR(("Mali OSK PM: pm_runtime_get() returned error code %d\n", err)); ++ return _MALI_OSK_ERR_FAULT; ++ } ++#endif ++ return _MALI_OSK_ERR_OK; ++} + -+ DEBUG_ASSERT_POINTER(mem); + -+ ump_random_mapping_put(mem); ++/* Can run in atomic context */ ++void _mali_osk_pm_dev_ref_put(void) ++{ ++#ifdef CONFIG_PM_RUNTIME ++ MALI_DEBUG_ASSERT_POINTER(mali_platform_device); ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)) ++ pm_runtime_mark_last_busy(&(mali_platform_device->dev)); ++ pm_runtime_put_autosuspend(&(mali_platform_device->dev)); ++#else ++ pm_runtime_put(&(mali_platform_device->dev)); ++#endif ++#endif +} + ++void _mali_osk_pm_dev_barrier(void) ++{ ++#ifdef CONFIG_PM_RUNTIME ++ pm_runtime_barrier(&(mali_platform_device->dev)); ++#endif ++} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_profiling.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_profiling.c +new file mode 100644 +index 000000000..9e977ea4d +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_profiling.c +@@ -0,0 +1,1282 @@ ++/* ++ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include + ++#include ++#include "mali_kernel_common.h" ++#include "mali_osk.h" ++#include "mali_ukk.h" ++#include "mali_uk_types.h" ++#include "mali_osk_profiling.h" ++#include "mali_linux_trace.h" ++#include "mali_gp.h" ++#include "mali_pp.h" ++#include "mali_l2_cache.h" ++#include "mali_user_settings_db.h" ++#include "mali_executor.h" ++#include "mali_memory_manager.h" + -+/* --------------- Handling of user space requests follows --------------- */ -+ ++#define MALI_PROFILING_STREAM_DATA_DEFAULT_SIZE 100 ++#define MALI_PROFILING_STREAM_HOLD_TIME 1000000 /*1 ms */ + -+_mali_osk_errcode_t _ump_uku_get_api_version(_ump_uk_api_version_s *args) -+{ -+ ump_session_data *session_data; ++#define MALI_PROFILING_STREAM_BUFFER_SIZE (1 << 12) ++#define MALI_PROFILING_STREAM_BUFFER_NUM 100 + -+ DEBUG_ASSERT_POINTER(args); -+ DEBUG_ASSERT_POINTER(args->ctx); ++/** ++ * Define the mali profiling stream struct. ++ */ ++typedef struct mali_profiling_stream { ++ u8 data[MALI_PROFILING_STREAM_BUFFER_SIZE]; ++ u32 used_size; ++ struct list_head list; ++} mali_profiling_stream; + -+ session_data = (ump_session_data *)args->ctx; ++typedef struct mali_profiling_stream_list { ++ spinlock_t spin_lock; ++ struct list_head free_list; ++ struct list_head queue_list; ++} mali_profiling_stream_list; + -+ /* check compatability */ -+ if (args->version == UMP_IOCTL_API_VERSION) { -+ DBG_MSG(3, ("API version set to newest %d (compatible)\n", -+ GET_VERSION(args->version))); -+ args->compatible = 1; -+ session_data->api_version = args->version; -+ } else { -+ DBG_MSG(2, ("API version set to %d (incompatible with client version %d)\n", -+ GET_VERSION(UMP_IOCTL_API_VERSION), GET_VERSION(args->version))); -+ args->compatible = 0; -+ args->version = UMP_IOCTL_API_VERSION; /* report our version */ -+ } ++static const char mali_name[] = "4xx"; ++static const char utgard_setup_version[] = "ANNOTATE_SETUP 1\n"; + -+ return _MALI_OSK_ERR_OK; -+} ++static u32 profiling_sample_rate = 0; ++static u32 first_sw_counter_index = 0; + ++static mali_bool l2_cache_counter_if_enabled = MALI_FALSE; ++static u32 num_counters_enabled = 0; ++static u32 mem_counters_enabled = 0; + -+_mali_osk_errcode_t _ump_ukk_release(_ump_uk_release_s *release_info) -+{ -+ ump_session_memory_list_element *session_memory_element; -+ ump_session_memory_list_element *tmp; -+ ump_session_data *session_data; -+ _mali_osk_errcode_t ret = _MALI_OSK_ERR_INVALID_FUNC; -+ int secure_id; ++static _mali_osk_atomic_t stream_fd_if_used; + -+ DEBUG_ASSERT_POINTER(release_info); -+ DEBUG_ASSERT_POINTER(release_info->ctx); ++static wait_queue_head_t stream_fd_wait_queue; ++static mali_profiling_counter *global_mali_profiling_counters = NULL; ++static u32 num_global_mali_profiling_counters = 0; + -+ /* Retreive the session data */ -+ session_data = (ump_session_data *)release_info->ctx; ++static mali_profiling_stream_list *global_mali_stream_list = NULL; ++static mali_profiling_stream *mali_counter_stream = NULL; ++static mali_profiling_stream *mali_core_activity_stream = NULL; ++static u64 mali_core_activity_stream_dequeue_time = 0; ++static spinlock_t mali_activity_lock; ++static u32 mali_activity_cores_num = 0; ++static struct hrtimer profiling_sampling_timer; + -+ /* If there are many items in the memory session list we -+ * could be de-referencing this pointer a lot so keep a local copy -+ */ -+ secure_id = release_info->secure_id; ++const char *_mali_mem_counter_descriptions[] = _MALI_MEM_COUTNER_DESCRIPTIONS; ++const char *_mali_special_counter_descriptions[] = _MALI_SPCIAL_COUNTER_DESCRIPTIONS; + -+ DBG_MSG(4, ("Releasing memory with IOCTL, ID: %u\n", secure_id)); ++static u32 current_profiling_pid = 0; + -+ /* Iterate through the memory list looking for the requested secure ID */ -+ _mali_osk_mutex_wait(session_data->lock); -+ _MALI_OSK_LIST_FOREACHENTRY(session_memory_element, tmp, &session_data->list_head_session_memory_list, ump_session_memory_list_element, list) { -+ if (session_memory_element->mem->secure_id == secure_id) { -+ ump_dd_mem *release_mem; ++static void _mali_profiling_stream_list_destory(mali_profiling_stream_list *profiling_stream_list) ++{ ++ mali_profiling_stream *profiling_stream, *tmp_profiling_stream; ++ MALI_DEBUG_ASSERT_POINTER(profiling_stream_list); + -+ release_mem = session_memory_element->mem; -+ _mali_osk_list_del(&session_memory_element->list); -+ ump_dd_reference_release(release_mem); -+ _mali_osk_free(session_memory_element); ++ list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &profiling_stream_list->free_list, list) { ++ list_del(&profiling_stream->list); ++ kfree(profiling_stream); ++ } + -+ ret = _MALI_OSK_ERR_OK; -+ break; -+ } ++ list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &profiling_stream_list->queue_list, list) { ++ list_del(&profiling_stream->list); ++ kfree(profiling_stream); + } + -+ _mali_osk_mutex_signal(session_data->lock); -+ DBG_MSG_IF(1, _MALI_OSK_ERR_OK != ret, ("UMP memory with ID %u does not belong to this session.\n", secure_id)); ++ kfree(profiling_stream_list); ++} + -+ DBG_MSG(4, ("_ump_ukk_release() returning 0x%x\n", ret)); -+ return ret; ++static void _mali_profiling_global_stream_list_free(void) ++{ ++ mali_profiling_stream *profiling_stream, *tmp_profiling_stream; ++ unsigned long irq_flags; ++ ++ MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list); ++ spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags); ++ list_for_each_entry_safe(profiling_stream, tmp_profiling_stream, &global_mali_stream_list->queue_list, list) { ++ profiling_stream->used_size = 0; ++ list_move(&profiling_stream->list, &global_mali_stream_list->free_list); ++ } ++ spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags); +} + -+_mali_osk_errcode_t _ump_ukk_size_get(_ump_uk_size_get_s *user_interaction) ++static _mali_osk_errcode_t _mali_profiling_global_stream_list_dequeue(struct list_head *stream_list, mali_profiling_stream **new_mali_profiling_stream) +{ -+ ump_dd_mem *mem; -+ _mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT; ++ unsigned long irq_flags; ++ _mali_osk_errcode_t ret = _MALI_OSK_ERR_OK; ++ MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list); ++ MALI_DEBUG_ASSERT_POINTER(stream_list); + -+ DEBUG_ASSERT_POINTER(user_interaction); ++ spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags); + -+ /* We lock the mappings so things don't get removed while we are looking for the memory */ -+ mem = ump_random_mapping_get(device.secure_id_map, user_interaction->secure_id); -+ if (NULL != mem) { -+ user_interaction->size = mem->size_bytes; -+ DBG_MSG(4, ("Returning size. ID: %u, size: %lu ", -+ (ump_secure_id)user_interaction->secure_id, -+ (unsigned long)user_interaction->size)); -+ ump_random_mapping_put(mem); -+ ret = _MALI_OSK_ERR_OK; ++ if (!list_empty(stream_list)) { ++ *new_mali_profiling_stream = list_entry(stream_list->next, mali_profiling_stream, list); ++ list_del_init(&(*new_mali_profiling_stream)->list); + } else { -+ user_interaction->size = 0; -+ DBG_MSG(1, ("Failed to look up mapping in ump_ioctl_size_get(). ID: %u\n", -+ (ump_secure_id)user_interaction->secure_id)); ++ ret = _MALI_OSK_ERR_NOMEM; + } + ++ spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags); ++ + return ret; +} + ++static void _mali_profiling_global_stream_list_queue(struct list_head *stream_list, mali_profiling_stream *current_mali_profiling_stream) ++{ ++ unsigned long irq_flags; ++ MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list); ++ MALI_DEBUG_ASSERT_POINTER(stream_list); ++ ++ spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags); ++ list_add_tail(¤t_mali_profiling_stream->list, stream_list); ++ spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags); ++} + ++static mali_bool _mali_profiling_global_stream_queue_list_if_empty(void) ++{ ++ MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list); ++ return list_empty(&global_mali_stream_list->queue_list); ++} + -+void _ump_ukk_msync(_ump_uk_msync_s *args) ++static u32 _mali_profiling_global_stream_queue_list_next_size(void) +{ -+ ump_dd_mem *mem = NULL; -+ void *virtual = NULL; ++ unsigned long irq_flags; + u32 size = 0; -+ u32 offset = 0; ++ MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list); + -+ mem = ump_random_mapping_get(device.secure_id_map, (int)args->secure_id); -+ if (NULL == mem) { -+ DBG_MSG(1, ("Failed to look up mapping in _ump_ukk_msync(). ID: %u\n", -+ (ump_secure_id)args->secure_id)); -+ return; ++ spin_lock_irqsave(&global_mali_stream_list->spin_lock, irq_flags); ++ if (!list_empty(&global_mali_stream_list->queue_list)) { ++ mali_profiling_stream *next_mali_profiling_stream = ++ list_entry(global_mali_stream_list->queue_list.next, mali_profiling_stream, list); ++ size = next_mali_profiling_stream->used_size; + } ++ spin_unlock_irqrestore(&global_mali_stream_list->spin_lock, irq_flags); ++ return size; ++} + -+ /* Returns the cache settings back to Userspace */ -+ args->is_cached = mem->is_cached; ++/* The mali profiling stream file operations functions. */ ++static ssize_t _mali_profiling_stream_read( ++ struct file *filp, ++ char __user *buffer, ++ size_t size, ++ loff_t *f_pos); + -+ /* If this flag is the only one set, we should not do the actual flush, only the readout */ -+ if (_UMP_UK_MSYNC_READOUT_CACHE_ENABLED == args->op) { -+ DBG_MSG(3, ("_ump_ukk_msync READOUT ID: %u Enabled: %d\n", (ump_secure_id)args->secure_id, mem->is_cached)); -+ goto msync_release_and_return; -+ } ++static unsigned int _mali_profiling_stream_poll(struct file *filp, poll_table *wait); + -+ /* Nothing to do if the memory is not caches */ -+ if (0 == mem->is_cached) { -+ DBG_MSG(3, ("_ump_ukk_msync IGNORING ID: %u Enabled: %d OP: %d\n", (ump_secure_id)args->secure_id, mem->is_cached, args->op)); -+ goto msync_release_and_return; -+ } -+ DBG_MSG(3, ("UMP[%02u] _ump_ukk_msync Flush OP: %d Address: 0x%08x Mapping: 0x%08x\n", -+ (ump_secure_id)args->secure_id, args->op, args->address, args->mapping)); ++static int _mali_profiling_stream_release(struct inode *inode, struct file *filp); + -+ if (args->address) { -+ virtual = (void *)((u32)args->address); -+ offset = (u32)((args->address) - (args->mapping)); -+ } else { -+ /* Flush entire mapping when no address is specified. */ -+ virtual = args->mapping; -+ } -+ if (args->size) { -+ size = args->size; -+ } else { -+ /* Flush entire mapping when no size is specified. */ -+ size = mem->size_bytes - offset; -+ } ++/* The timeline stream file operations structure. */ ++static const struct file_operations mali_profiling_stream_fops = { ++ .release = _mali_profiling_stream_release, ++ .read = _mali_profiling_stream_read, ++ .poll = _mali_profiling_stream_poll, ++}; + -+ if ((offset + size) > mem->size_bytes) { -+ DBG_MSG(1, ("Trying to flush more than the entire UMP allocation: offset: %u + size: %u > %u\n", offset, size, mem->size_bytes)); -+ goto msync_release_and_return; ++static ssize_t _mali_profiling_stream_read( ++ struct file *filp, ++ char __user *buffer, ++ size_t size, ++ loff_t *f_pos) ++{ ++ u32 copy_len = 0; ++ mali_profiling_stream *current_mali_profiling_stream; ++ u32 used_size; ++ MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list); ++ ++ while (!_mali_profiling_global_stream_queue_list_if_empty()) { ++ used_size = _mali_profiling_global_stream_queue_list_next_size(); ++ if (used_size <= ((u32)size - copy_len)) { ++ current_mali_profiling_stream = NULL; ++ _mali_profiling_global_stream_list_dequeue(&global_mali_stream_list->queue_list, ++ ¤t_mali_profiling_stream); ++ MALI_DEBUG_ASSERT_POINTER(current_mali_profiling_stream); ++ if (copy_to_user(&buffer[copy_len], current_mali_profiling_stream->data, current_mali_profiling_stream->used_size)) { ++ current_mali_profiling_stream->used_size = 0; ++ _mali_profiling_global_stream_list_queue(&global_mali_stream_list->free_list, current_mali_profiling_stream); ++ return -EFAULT; ++ } ++ copy_len += current_mali_profiling_stream->used_size; ++ current_mali_profiling_stream->used_size = 0; ++ _mali_profiling_global_stream_list_queue(&global_mali_stream_list->free_list, current_mali_profiling_stream); ++ } else { ++ break; ++ } + } ++ return (ssize_t)copy_len; ++} + -+ /* The actual cache flush - Implemented for each OS*/ -+ _ump_osk_msync(mem, virtual, offset, size, args->op, NULL); ++static unsigned int _mali_profiling_stream_poll(struct file *filp, poll_table *wait) ++{ ++ poll_wait(filp, &stream_fd_wait_queue, wait); ++ if (!_mali_profiling_global_stream_queue_list_if_empty()) ++ return POLLIN; ++ return 0; ++} + -+msync_release_and_return: -+ ump_random_mapping_put(mem); -+ return; ++static int _mali_profiling_stream_release(struct inode *inode, struct file *filp) ++{ ++ _mali_osk_atomic_init(&stream_fd_if_used, 0); ++ return 0; +} + -+void _ump_ukk_cache_operations_control(_ump_uk_cache_operations_control_s *args) ++/* The funs for control packet and stream data.*/ ++static void _mali_profiling_set_packet_size(unsigned char *const buf, const u32 size) +{ -+ ump_session_data *session_data; -+ ump_uk_cache_op_control op; ++ u32 i; + -+ DEBUG_ASSERT_POINTER(args); -+ DEBUG_ASSERT_POINTER(args->ctx); ++ for (i = 0; i < sizeof(size); ++i) ++ buf[i] = (size >> 8 * i) & 0xFF; ++} + -+ op = args->op; -+ session_data = (ump_session_data *)args->ctx; ++static u32 _mali_profiling_get_packet_size(unsigned char *const buf) ++{ ++ u32 i; ++ u32 size = 0; ++ for (i = 0; i < sizeof(size); ++i) ++ size |= (u32)buf[i] << 8 * i; ++ return size; ++} + -+ _mali_osk_mutex_wait(session_data->lock); -+ if (op == _UMP_UK_CACHE_OP_START) { -+ session_data->cache_operations_ongoing++; -+ DBG_MSG(4, ("Cache ops start\n")); -+ if (session_data->cache_operations_ongoing != 1) { -+ DBG_MSG(2, ("UMP: Number of simultanious cache control ops: %d\n", session_data->cache_operations_ongoing)); -+ } -+ } else if (op == _UMP_UK_CACHE_OP_FINISH) { -+ DBG_MSG(4, ("Cache ops finish\n")); -+ session_data->cache_operations_ongoing--; -+#if 0 -+ if (session_data->has_pending_level1_cache_flush) { -+ /* This function will set has_pending_level1_cache_flush=0 */ -+ _ump_osk_msync(NULL, NULL, 0, 0, _UMP_UK_MSYNC_FLUSH_L1, session_data); -+ } -+#endif ++static u32 _mali_profiling_read_packet_int(unsigned char *const buf, u32 *const pos, u32 const packet_size) ++{ ++ u64 int_value = 0; ++ u8 shift = 0; ++ u8 byte_value = ~0; + -+ /* to be on the safe side: always flush l1 cache when cache operations are done */ -+ _ump_osk_msync(NULL, NULL, 0, 0, _UMP_UK_MSYNC_FLUSH_L1, session_data); -+ DBG_MSG(4, ("Cache ops finish end\n")); -+ } else { -+ DBG_MSG(1, ("Illegal call to %s at line %d\n", __FUNCTION__, __LINE__)); ++ while ((byte_value & 0x80) != 0) { ++ if ((*pos) >= packet_size) ++ return -1; ++ byte_value = buf[*pos]; ++ *pos += 1; ++ int_value |= (u32)(byte_value & 0x7f) << shift; ++ shift += 7; + } -+ _mali_osk_mutex_signal(session_data->lock); + ++ if (shift < 8 * sizeof(int_value) && (byte_value & 0x40) != 0) { ++ int_value |= -(1 << shift); ++ } ++ ++ return int_value; +} + -+void _ump_ukk_switch_hw_usage(_ump_uk_switch_hw_usage_s *args) ++static u32 _mali_profiling_pack_int(u8 *const buf, u32 const buf_size, u32 const pos, s32 value) +{ -+ ump_dd_mem *mem = NULL; -+ ump_uk_user old_user; -+ ump_uk_msync_op cache_op = _UMP_UK_MSYNC_CLEAN_AND_INVALIDATE; -+ ump_session_data *session_data; -+ -+ DEBUG_ASSERT_POINTER(args); -+ DEBUG_ASSERT_POINTER(args->ctx); ++ u32 add_bytes = 0; ++ int more = 1; ++ while (more) { ++ /* low order 7 bits of val */ ++ char byte_value = value & 0x7f; ++ value >>= 7; + -+ session_data = (ump_session_data *)args->ctx; ++ if ((value == 0 && (byte_value & 0x40) == 0) || (value == -1 && (byte_value & 0x40) != 0)) { ++ more = 0; ++ } else { ++ byte_value |= 0x80; ++ } + -+ mem = ump_random_mapping_get(device.secure_id_map, (int)args->secure_id); -+ if (NULL == mem) { -+ DBG_MSG(1, ("Failed to look up mapping in _ump_ukk_switch_hw_usage(). ID: %u\n", -+ (ump_secure_id)args->secure_id)); -+ return; ++ if ((pos + add_bytes) >= buf_size) ++ return 0; ++ buf[pos + add_bytes] = byte_value; ++ add_bytes++; + } + -+ old_user = mem->hw_device; -+ mem->hw_device = args->new_user; -+ -+ DBG_MSG(3, ("UMP[%02u] Switch usage Start New: %s Prev: %s.\n", -+ (ump_secure_id)args->secure_id, -+ args->new_user ? "MALI" : "CPU", -+ old_user ? "MALI" : "CPU")); ++ return add_bytes; ++} + -+ if (!mem->is_cached) { -+ DBG_MSG(3, ("UMP[%02u] Changing owner of uncached memory. Cache flushing not needed.\n", -+ (ump_secure_id)args->secure_id)); -+ goto out; -+ } ++static int _mali_profiling_pack_long(uint8_t *const buf, u32 const buf_size, u32 const pos, s64 val) ++{ ++ int add_bytes = 0; ++ int more = 1; ++ while (more) { ++ /* low order 7 bits of x */ ++ char byte_value = val & 0x7f; ++ val >>= 7; + -+ if (old_user == args->new_user) { -+ DBG_MSG(4, ("UMP[%02u] Setting the new_user equal to previous for. Cache flushing not needed.\n", -+ (ump_secure_id)args->secure_id)); -+ goto out; -+ } -+ if ( -+ /* Previous AND new is both different from CPU */ -+ (old_user != _UMP_UK_USED_BY_CPU) && (args->new_user != _UMP_UK_USED_BY_CPU) -+ ) { -+ DBG_MSG(4, ("UMP[%02u] Previous and new user is not CPU. Cache flushing not needed.\n", -+ (ump_secure_id)args->secure_id)); -+ goto out; -+ } ++ if ((val == 0 && (byte_value & 0x40) == 0) || (val == -1 && (byte_value & 0x40) != 0)) { ++ more = 0; ++ } else { ++ byte_value |= 0x80; ++ } + -+ if ((old_user != _UMP_UK_USED_BY_CPU) && (args->new_user == _UMP_UK_USED_BY_CPU)) { -+ cache_op = _UMP_UK_MSYNC_INVALIDATE; -+ DBG_MSG(4, ("UMP[%02u] Cache invalidation needed\n", (ump_secure_id)args->secure_id)); -+#ifdef UMP_SKIP_INVALIDATION -+#error -+ DBG_MSG(4, ("UMP[%02u] Performing Cache invalidation SKIPPED\n", (ump_secure_id)args->secure_id)); -+ goto out; -+#endif ++ MALI_DEBUG_ASSERT((pos + add_bytes) < buf_size); ++ buf[pos + add_bytes] = byte_value; ++ add_bytes++; + } + -+ /* Take lock to protect: session->cache_operations_ongoing and session->has_pending_level1_cache_flush */ -+ _mali_osk_mutex_wait(session_data->lock); -+ /* Actual cache flush */ -+ _ump_osk_msync(mem, NULL, 0, mem->size_bytes, cache_op, session_data); -+ _mali_osk_mutex_signal(session_data->lock); -+ -+out: -+ ump_random_mapping_put(mem); -+ DBG_MSG(4, ("UMP[%02u] Switch usage Finish\n", (ump_secure_id)args->secure_id)); -+ return; ++ return add_bytes; +} + -+void _ump_ukk_lock(_ump_uk_lock_s *args) ++static void _mali_profiling_stream_add_counter(mali_profiling_stream *profiling_stream, s64 current_time, u32 key, u32 counter_value) +{ -+ ump_dd_mem *mem = NULL; ++ u32 add_size = STREAM_HEADER_SIZE; ++ MALI_DEBUG_ASSERT_POINTER(profiling_stream); ++ MALI_DEBUG_ASSERT((profiling_stream->used_size) < MALI_PROFILING_STREAM_BUFFER_SIZE); + -+ mem = ump_random_mapping_get(device.secure_id_map, (int)args->secure_id); -+ if (NULL == mem) { -+ DBG_MSG(1, ("UMP[%02u] Failed to look up mapping in _ump_ukk_lock(). ID: %u\n", -+ (ump_secure_id)args->secure_id)); -+ return; -+ } ++ profiling_stream->data[profiling_stream->used_size] = STREAM_HEADER_COUNTER_VALUE; + -+ DBG_MSG(1, ("UMP[%02u] Lock. New lock flag: %d. Old Lock flag:\n", (u32)args->secure_id, (u32)args->lock_usage, (u32) mem->lock_usage)); ++ add_size += _mali_profiling_pack_long(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE, ++ profiling_stream->used_size + add_size, current_time); ++ add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE, ++ profiling_stream->used_size + add_size, (s32)0); ++ add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE, ++ profiling_stream->used_size + add_size, (s32)key); ++ add_size += _mali_profiling_pack_int(profiling_stream->data, MALI_PROFILING_STREAM_BUFFER_SIZE, ++ profiling_stream->used_size + add_size, (s32)counter_value); + -+ mem->lock_usage = (ump_lock_usage) args->lock_usage; ++ _mali_profiling_set_packet_size(profiling_stream->data + profiling_stream->used_size + 1, ++ add_size - STREAM_HEADER_SIZE); + -+ ump_random_mapping_put(mem); ++ profiling_stream->used_size += add_size; +} + -+void _ump_ukk_unlock(_ump_uk_unlock_s *args) ++/* The callback function for sampling timer.*/ ++static enum hrtimer_restart _mali_profiling_sampling_counters(struct hrtimer *timer) +{ -+ ump_dd_mem *mem = NULL; -+ -+ mem = ump_random_mapping_get(device.secure_id_map, (int)args->secure_id); -+ if (NULL == mem) { -+ DBG_MSG(1, ("Failed to look up mapping in _ump_ukk_unlock(). ID: %u\n", -+ (ump_secure_id)args->secure_id)); -+ return; -+ } ++ u32 counter_index; ++ s64 current_time; ++ MALI_DEBUG_ASSERT_POINTER(global_mali_profiling_counters); ++ MALI_DEBUG_ASSERT_POINTER(global_mali_stream_list); + -+ DBG_MSG(1, ("UMP[%02u] Unlocking. Old Lock flag:\n", -+ (u32)args->secure_id, (u32) mem->lock_usage)); ++ MALI_DEBUG_ASSERT(NULL == mali_counter_stream); ++ if (_MALI_OSK_ERR_OK == _mali_profiling_global_stream_list_dequeue( ++ &global_mali_stream_list->free_list, &mali_counter_stream)) { + -+ mem->lock_usage = (ump_lock_usage) UMP_NOT_LOCKED; ++ MALI_DEBUG_ASSERT_POINTER(mali_counter_stream); ++ MALI_DEBUG_ASSERT(0 == mali_counter_stream->used_size); + -+ ump_random_mapping_put(mem); -+} -diff --git a/drivers/gpu/arm/mali400/ump/common/ump_kernel_common.c b/drivers/gpu/arm/mali400/ump/common/ump_kernel_common.c -new file mode 100644 -index 000000000..73aa9e4c4 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/common/ump_kernel_common.c -@@ -0,0 +1,358 @@ -+/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ /* Capture l2 cache counter values if enabled */ ++ if (MALI_TRUE == l2_cache_counter_if_enabled) { ++ int i, j = 0; ++ _mali_profiling_l2_counter_values l2_counters_values; ++ _mali_profiling_get_l2_counters(&l2_counters_values); + -+#include "mali_kernel_common.h" -+#include "mali_osk.h" -+#include "mali_osk_bitops.h" -+#include "mali_osk_list.h" -+#include "ump_osk.h" -+#include "ump_uk_types.h" -+#include "ump_ukk.h" -+#include "ump_kernel_common.h" -+#include "ump_kernel_descriptor_mapping.h" -+#include "ump_kernel_memory_backend.h" ++ for (i = COUNTER_L2_0_C0; i <= COUNTER_L2_2_C1; i++) { ++ if (0 == (j % 2)) ++ _mali_osk_profiling_record_global_counters(i, l2_counters_values.cores[j / 2].value0); ++ else ++ _mali_osk_profiling_record_global_counters(i, l2_counters_values.cores[j / 2].value1); ++ j++; ++ } ++ } + ++ current_time = (s64)_mali_osk_boot_time_get_ns(); + ++ /* Add all enabled counter values into stream */ ++ for (counter_index = 0; counter_index < num_global_mali_profiling_counters; counter_index++) { ++ /* No need to sample these couners here. */ ++ if (global_mali_profiling_counters[counter_index].enabled) { ++ if ((global_mali_profiling_counters[counter_index].counter_id >= FIRST_MEM_COUNTER && ++ global_mali_profiling_counters[counter_index].counter_id <= LAST_MEM_COUNTER) ++ || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_VP_ACTIVITY) ++ || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_FP_ACTIVITY) ++ || (global_mali_profiling_counters[counter_index].counter_id == COUNTER_FILMSTRIP)) { + -+/** -+ * Define the initial and maximum size of number of secure_ids on the system -+ */ -+#define UMP_SECURE_ID_TABLE_ENTRIES_INITIAL (128 ) -+#define UMP_SECURE_ID_TABLE_ENTRIES_MAXIMUM (4096 ) ++ continue; ++ } + ++ if (global_mali_profiling_counters[counter_index].counter_id >= COUNTER_L2_0_C0 && ++ global_mali_profiling_counters[counter_index].counter_id <= COUNTER_L2_2_C1) { + -+/** -+ * Define the initial and maximum size of the ump_session_data::cookies_map, -+ * which is a \ref ump_descriptor_mapping. This limits how many secure_ids -+ * may be mapped into a particular process using _ump_ukk_map_mem(). -+ */ ++ u32 prev_val = global_mali_profiling_counters[counter_index].prev_counter_value; + -+#define UMP_COOKIES_PER_SESSION_INITIAL (UMP_SECURE_ID_TABLE_ENTRIES_INITIAL ) -+#define UMP_COOKIES_PER_SESSION_MAXIMUM (UMP_SECURE_ID_TABLE_ENTRIES_MAXIMUM) ++ _mali_profiling_stream_add_counter(mali_counter_stream, current_time, global_mali_profiling_counters[counter_index].key, ++ global_mali_profiling_counters[counter_index].current_counter_value - prev_val); + -+struct ump_dev device; ++ prev_val = global_mali_profiling_counters[counter_index].current_counter_value; + -+_mali_osk_errcode_t ump_kernel_constructor(void) -+{ -+ _mali_osk_errcode_t err; ++ global_mali_profiling_counters[counter_index].prev_counter_value = prev_val; ++ } else { + -+ /* Perform OS Specific initialization */ -+ err = _ump_osk_init(); -+ if (_MALI_OSK_ERR_OK != err) { -+ MSG_ERR(("Failed to initiaze the UMP Device Driver")); -+ return err; ++ if (global_mali_profiling_counters[counter_index].counter_id == COUNTER_TOTAL_ALLOC_PAGES) { ++ u32 total_alloc_mem = _mali_ukk_report_memory_usage(); ++ global_mali_profiling_counters[counter_index].current_counter_value = total_alloc_mem / _MALI_OSK_MALI_PAGE_SIZE; ++ } ++ _mali_profiling_stream_add_counter(mali_counter_stream, current_time, global_mali_profiling_counters[counter_index].key, ++ global_mali_profiling_counters[counter_index].current_counter_value); ++ if (global_mali_profiling_counters[counter_index].counter_id < FIRST_SPECIAL_COUNTER) ++ global_mali_profiling_counters[counter_index].current_counter_value = 0; ++ } ++ } ++ } ++ _mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_counter_stream); ++ mali_counter_stream = NULL; ++ } else { ++ MALI_DEBUG_PRINT(1, ("Not enough mali profiling stream buffer!\n")); + } + -+ /* Init the global device */ -+ _mali_osk_memset(&device, 0, sizeof(device)); -+ -+ /* Create the descriptor map, which will be used for mapping secure ID to ump_dd_mem structs */ -+ device.secure_id_map = ump_random_mapping_create(); -+ if (NULL == device.secure_id_map) { -+ MSG_ERR(("Failed to create secure id lookup table\n")); -+ return _MALI_OSK_ERR_NOMEM; -+ } ++ wake_up_interruptible(&stream_fd_wait_queue); + -+ /* Init memory backend */ -+ device.backend = ump_memory_backend_create(); -+ if (NULL == device.backend) { -+ MSG_ERR(("Failed to create memory backend\n")); -+ ump_random_mapping_destroy(device.secure_id_map); -+ return _MALI_OSK_ERR_NOMEM; ++ /*Enable the sampling timer again*/ ++ if (0 != num_counters_enabled && 0 != profiling_sample_rate) { ++ hrtimer_forward_now(&profiling_sampling_timer, ns_to_ktime(profiling_sample_rate)); ++ return HRTIMER_RESTART; + } -+ -+ return _MALI_OSK_ERR_OK; ++ return HRTIMER_NORESTART; +} + -+void ump_kernel_destructor(void) ++static void _mali_profiling_sampling_core_activity_switch(int counter_id, int core, u32 activity, u32 pid) +{ -+ DEBUG_ASSERT_POINTER(device.secure_id_map); ++ unsigned long irq_flags; + -+ ump_random_mapping_destroy(device.secure_id_map); -+ device.secure_id_map = NULL; ++ spin_lock_irqsave(&mali_activity_lock, irq_flags); ++ if (activity == 0) ++ mali_activity_cores_num--; ++ else ++ mali_activity_cores_num++; ++ spin_unlock_irqrestore(&mali_activity_lock, irq_flags); + -+ device.backend->shutdown(device.backend); -+ device.backend = NULL; ++ if (NULL != global_mali_profiling_counters) { ++ int i ; ++ for (i = 0; i < num_global_mali_profiling_counters; i++) { ++ if (counter_id == global_mali_profiling_counters[i].counter_id && global_mali_profiling_counters[i].enabled) { ++ u64 current_time = _mali_osk_boot_time_get_ns(); ++ u32 add_size = STREAM_HEADER_SIZE; + -+ ump_memory_backend_destroy(); ++ if (NULL != mali_core_activity_stream) { ++ if ((mali_core_activity_stream_dequeue_time + MALI_PROFILING_STREAM_HOLD_TIME < current_time) || ++ (MALI_PROFILING_STREAM_DATA_DEFAULT_SIZE > MALI_PROFILING_STREAM_BUFFER_SIZE ++ - mali_core_activity_stream->used_size)) { ++ _mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_core_activity_stream); ++ mali_core_activity_stream = NULL; ++ wake_up_interruptible(&stream_fd_wait_queue); ++ } ++ } + -+ _ump_osk_term(); ++ if (NULL == mali_core_activity_stream) { ++ if (_MALI_OSK_ERR_OK == _mali_profiling_global_stream_list_dequeue( ++ &global_mali_stream_list->free_list, &mali_core_activity_stream)) { ++ mali_core_activity_stream_dequeue_time = current_time; ++ } else { ++ MALI_DEBUG_PRINT(1, ("Not enough mali profiling stream buffer!\n")); ++ wake_up_interruptible(&stream_fd_wait_queue); ++ break; ++ } ++ ++ } ++ ++ mali_core_activity_stream->data[mali_core_activity_stream->used_size] = STREAM_HEADER_CORE_ACTIVITY; ++ ++ add_size += _mali_profiling_pack_long(mali_core_activity_stream->data, ++ MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, (s64)current_time); ++ add_size += _mali_profiling_pack_int(mali_core_activity_stream->data, ++ MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, core); ++ add_size += _mali_profiling_pack_int(mali_core_activity_stream->data, ++ MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, (s32)global_mali_profiling_counters[i].key); ++ add_size += _mali_profiling_pack_int(mali_core_activity_stream->data, ++ MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, activity); ++ add_size += _mali_profiling_pack_int(mali_core_activity_stream->data, ++ MALI_PROFILING_STREAM_BUFFER_SIZE, mali_core_activity_stream->used_size + add_size, pid); ++ ++ _mali_profiling_set_packet_size(mali_core_activity_stream->data + mali_core_activity_stream->used_size + 1, ++ add_size - STREAM_HEADER_SIZE); ++ ++ mali_core_activity_stream->used_size += add_size; ++ ++ if (0 == mali_activity_cores_num) { ++ _mali_profiling_global_stream_list_queue(&global_mali_stream_list->queue_list, mali_core_activity_stream); ++ mali_core_activity_stream = NULL; ++ wake_up_interruptible(&stream_fd_wait_queue); ++ } ++ ++ break; ++ } ++ } ++ } +} + -+/** Creates a new UMP session -+ */ -+_mali_osk_errcode_t _ump_ukk_open(void **context) ++static mali_bool _mali_profiling_global_counters_init(void) +{ -+ struct ump_session_data *session_data; ++ int core_id, counter_index, counter_number, counter_id; ++ u32 num_l2_cache_cores; ++ u32 num_pp_cores; ++ u32 num_gp_cores = 1; + -+ /* allocated struct to track this session */ -+ session_data = (struct ump_session_data *)_mali_osk_malloc(sizeof(struct ump_session_data)); -+ if (NULL == session_data) { -+ MSG_ERR(("Failed to allocate ump_session_data in ump_file_open()\n")); -+ return _MALI_OSK_ERR_NOMEM; ++ MALI_DEBUG_ASSERT(NULL == global_mali_profiling_counters); ++ num_pp_cores = mali_pp_get_glob_num_pp_cores(); ++ num_l2_cache_cores = mali_l2_cache_core_get_glob_num_l2_cores(); ++ ++ num_global_mali_profiling_counters = 3 * (num_gp_cores + num_pp_cores) + 2 * num_l2_cache_cores ++ + MALI_PROFILING_SW_COUNTERS_NUM ++ + MALI_PROFILING_SPECIAL_COUNTERS_NUM ++ + MALI_PROFILING_MEM_COUNTERS_NUM; ++ global_mali_profiling_counters = _mali_osk_calloc(num_global_mali_profiling_counters, sizeof(mali_profiling_counter)); ++ ++ if (NULL == global_mali_profiling_counters) ++ return MALI_FALSE; ++ ++ counter_index = 0; ++ /*Vertex processor counters */ ++ for (core_id = 0; core_id < num_gp_cores; core_id ++) { ++ global_mali_profiling_counters[counter_index].counter_id = ACTIVITY_VP_0 + core_id; ++ _mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name, ++ sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_VP_%d_active", mali_name, core_id); ++ ++ for (counter_number = 0; counter_number < 2; counter_number++) { ++ counter_index++; ++ global_mali_profiling_counters[counter_index].counter_id = COUNTER_VP_0_C0 + (2 * core_id) + counter_number; ++ _mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name, ++ sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_VP_%d_cnt%d", mali_name, core_id, counter_number); ++ } + } + -+ session_data->lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_UNORDERED, 0); -+ if (NULL == session_data->lock) { -+ MSG_ERR(("Failed to initialize lock for ump_session_data in ump_file_open()\n")); -+ _mali_osk_free(session_data); -+ return _MALI_OSK_ERR_NOMEM; ++ /* Fragment processors' counters */ ++ for (core_id = 0; core_id < num_pp_cores; core_id++) { ++ counter_index++; ++ global_mali_profiling_counters[counter_index].counter_id = ACTIVITY_FP_0 + core_id; ++ _mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name, ++ sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_FP_%d_active", mali_name, core_id); ++ ++ for (counter_number = 0; counter_number < 2; counter_number++) { ++ counter_index++; ++ global_mali_profiling_counters[counter_index].counter_id = COUNTER_FP_0_C0 + (2 * core_id) + counter_number; ++ _mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name, ++ sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_FP_%d_cnt%d", mali_name, core_id, counter_number); ++ } + } + -+ session_data->cookies_map = ump_descriptor_mapping_create( -+ UMP_COOKIES_PER_SESSION_INITIAL, -+ UMP_COOKIES_PER_SESSION_MAXIMUM); ++ /* L2 Cache counters */ ++ for (core_id = 0; core_id < num_l2_cache_cores; core_id++) { ++ for (counter_number = 0; counter_number < 2; counter_number++) { ++ counter_index++; ++ global_mali_profiling_counters[counter_index].counter_id = COUNTER_L2_0_C0 + (2 * core_id) + counter_number; ++ _mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name, ++ sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_L2_%d_cnt%d", mali_name, core_id, counter_number); ++ } ++ } + -+ if (NULL == session_data->cookies_map) { -+ MSG_ERR(("Failed to create descriptor mapping for _ump_ukk_map_mem cookies\n")); ++ /* Now set up the software counter entries */ ++ for (counter_id = FIRST_SW_COUNTER; counter_id <= LAST_SW_COUNTER; counter_id++) { ++ counter_index++; + -+ _mali_osk_mutex_term(session_data->lock); -+ _mali_osk_free(session_data); -+ return _MALI_OSK_ERR_NOMEM; ++ if (0 == first_sw_counter_index) ++ first_sw_counter_index = counter_index; ++ ++ global_mali_profiling_counters[counter_index].counter_id = counter_id; ++ _mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name, ++ sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_SW_%d", mali_name, counter_id - FIRST_SW_COUNTER); + } + -+ _MALI_OSK_INIT_LIST_HEAD(&session_data->list_head_session_memory_list); ++ /* Now set up the special counter entries */ ++ for (counter_id = FIRST_SPECIAL_COUNTER; counter_id <= LAST_SPECIAL_COUNTER; counter_id++) { + -+ _MALI_OSK_INIT_LIST_HEAD(&session_data->list_head_session_memory_mappings_list); ++ counter_index++; ++ _mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name, ++ sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_%s", ++ mali_name, _mali_special_counter_descriptions[counter_id - FIRST_SPECIAL_COUNTER]); + -+ /* Since initial version of the UMP interface did not use the API_VERSION ioctl we have to assume -+ that it is this version, and not the "latest" one: UMP_IOCTL_API_VERSION -+ Current and later API versions would do an additional call to this IOCTL and update this variable -+ to the correct one.*/ -+ session_data->api_version = MAKE_VERSION_ID(1); ++ global_mali_profiling_counters[counter_index].counter_id = counter_id; ++ } + -+ *context = (void *)session_data; ++ /* Now set up the mem counter entries*/ ++ for (counter_id = FIRST_MEM_COUNTER; counter_id <= LAST_MEM_COUNTER; counter_id++) { + -+ session_data->cache_operations_ongoing = 0 ; -+ session_data->has_pending_level1_cache_flush = 0; ++ counter_index++; ++ _mali_osk_snprintf(global_mali_profiling_counters[counter_index].counter_name, ++ sizeof(global_mali_profiling_counters[counter_index].counter_name), "ARM_Mali-%s_%s", ++ mali_name, _mali_mem_counter_descriptions[counter_id - FIRST_MEM_COUNTER]); + -+ DBG_MSG(2, ("New session opened\n")); ++ global_mali_profiling_counters[counter_index].counter_id = counter_id; ++ } + -+ return _MALI_OSK_ERR_OK; ++ MALI_DEBUG_ASSERT((counter_index + 1) == num_global_mali_profiling_counters); ++ ++ return MALI_TRUE; +} + -+_mali_osk_errcode_t _ump_ukk_close(void **context) ++void _mali_profiling_notification_mem_counter(struct mali_session_data *session, u32 counter_id, u32 key, int enable) +{ -+ struct ump_session_data *session_data; -+ ump_session_memory_list_element *item; -+ ump_session_memory_list_element *tmp; + -+ session_data = (struct ump_session_data *)*context; -+ if (NULL == session_data) { -+ MSG_ERR(("Session data is NULL in _ump_ukk_close()\n")); -+ return _MALI_OSK_ERR_INVALID_ARGS; -+ } ++ MALI_DEBUG_ASSERT_POINTER(session); + -+ /* Unmap any descriptors mapped in. */ -+ if (0 == _mali_osk_list_empty(&session_data->list_head_session_memory_mappings_list)) { -+ ump_memory_allocation *descriptor; -+ ump_memory_allocation *temp; ++ if (NULL != session) { ++ _mali_osk_notification_t *notification; ++ _mali_osk_notification_queue_t *queue; + -+ DBG_MSG(1, ("Memory mappings found on session usage list during session termination\n")); ++ queue = session->ioctl_queue; ++ MALI_DEBUG_ASSERT(NULL != queue); + -+ /* use the 'safe' list iterator, since freeing removes the active block from the list we're iterating */ -+ _MALI_OSK_LIST_FOREACHENTRY(descriptor, temp, &session_data->list_head_session_memory_mappings_list, ump_memory_allocation, list) { -+ _ump_uk_unmap_mem_s unmap_args; -+ DBG_MSG(4, ("Freeing block with phys address 0x%x size 0x%x mapped in user space at 0x%x\n", -+ descriptor->phys_addr, descriptor->size, descriptor->mapping)); -+ unmap_args.ctx = (void *)session_data; -+ unmap_args.mapping = descriptor->mapping; -+ unmap_args.size = descriptor->size; -+ unmap_args._ukk_private = NULL; /* NOTE: unused */ -+ unmap_args.cookie = descriptor->cookie; ++ notification = _mali_osk_notification_create(_MALI_NOTIFICATION_ANNOTATE_PROFILING_MEM_COUNTER, ++ sizeof(_mali_uk_annotate_profiling_mem_counter_s)); + -+ /* NOTE: This modifies the list_head_session_memory_mappings_list */ -+ _ump_ukk_unmap_mem(&unmap_args); ++ if (NULL != notification) { ++ _mali_uk_annotate_profiling_mem_counter_s *data = notification->result_buffer; ++ data->counter_id = counter_id; ++ data->key = key; ++ data->enable = enable; ++ ++ _mali_osk_notification_queue_send(queue, notification); ++ } else { ++ MALI_PRINT_ERROR(("Failed to create notification object!\n")); + } ++ } else { ++ MALI_PRINT_ERROR(("Failed to find the right session!\n")); + } ++} + -+ /* ASSERT that we really did free everything, because _ump_ukk_unmap_mem() -+ * can fail silently. */ -+ DEBUG_ASSERT(_mali_osk_list_empty(&session_data->list_head_session_memory_mappings_list)); ++void _mali_profiling_notification_enable(struct mali_session_data *session, u32 sampling_rate, int enable) ++{ ++ MALI_DEBUG_ASSERT_POINTER(session); + -+ _MALI_OSK_LIST_FOREACHENTRY(item, tmp, &session_data->list_head_session_memory_list, ump_session_memory_list_element, list) { -+ _mali_osk_list_del(&item->list); -+ DBG_MSG(2, ("Releasing UMP memory %u as part of file close\n", item->mem->secure_id)); -+ ump_dd_reference_release(item->mem); -+ _mali_osk_free(item); -+ } ++ if (NULL != session) { ++ _mali_osk_notification_t *notification; ++ _mali_osk_notification_queue_t *queue; + -+ ump_descriptor_mapping_destroy(session_data->cookies_map); ++ queue = session->ioctl_queue; ++ MALI_DEBUG_ASSERT(NULL != queue); + -+ _mali_osk_mutex_term(session_data->lock); -+ _mali_osk_free(session_data); ++ notification = _mali_osk_notification_create(_MALI_NOTIFICATION_ANNOTATE_PROFILING_ENABLE, ++ sizeof(_mali_uk_annotate_profiling_enable_s)); + -+ DBG_MSG(2, ("Session closed\n")); ++ if (NULL != notification) { ++ _mali_uk_annotate_profiling_enable_s *data = notification->result_buffer; ++ data->sampling_rate = sampling_rate; ++ data->enable = enable; + -+ return _MALI_OSK_ERR_OK; ++ _mali_osk_notification_queue_send(queue, notification); ++ } else { ++ MALI_PRINT_ERROR(("Failed to create notification object!\n")); ++ } ++ } else { ++ MALI_PRINT_ERROR(("Failed to find the right session!\n")); ++ } +} + -+_mali_osk_errcode_t _ump_ukk_map_mem(_ump_uk_map_mem_s *args) -+{ -+ struct ump_session_data *session_data; -+ ump_memory_allocation *descriptor; /* Describes current mapping of memory */ -+ _mali_osk_errcode_t err; -+ unsigned long offset = 0; -+ unsigned long left; -+ ump_dd_handle handle; /* The real UMP handle for this memory. Its real datatype is ump_dd_mem* */ -+ ump_dd_mem *mem; /* The real UMP memory. It is equal to the handle, but with exposed struct */ -+ u32 block; -+ int map_id; + -+ session_data = (ump_session_data *)args->ctx; -+ if (NULL == session_data) { -+ MSG_ERR(("Session data is NULL in _ump_ukk_map_mem()\n")); -+ return _MALI_OSK_ERR_INVALID_ARGS; ++_mali_osk_errcode_t _mali_osk_profiling_init(mali_bool auto_start) ++{ ++ int i; ++ mali_profiling_stream *new_mali_profiling_stream = NULL; ++ mali_profiling_stream_list *new_mali_profiling_stream_list = NULL; ++ if (MALI_TRUE == auto_start) { ++ mali_set_user_setting(_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, MALI_TRUE); + } + -+ descriptor = (ump_memory_allocation *) _mali_osk_calloc(1, sizeof(ump_memory_allocation)); -+ if (NULL == descriptor) { -+ MSG_ERR(("ump_ukk_map_mem: descriptor allocation failed\n")); ++ /*Init the global_mali_stream_list*/ ++ MALI_DEBUG_ASSERT(NULL == global_mali_stream_list); ++ new_mali_profiling_stream_list = (mali_profiling_stream_list *)kmalloc(sizeof(mali_profiling_stream_list), GFP_KERNEL); ++ ++ if (NULL == new_mali_profiling_stream_list) { + return _MALI_OSK_ERR_NOMEM; + } + -+ handle = ump_dd_handle_create_from_secure_id(args->secure_id); -+ if (UMP_DD_HANDLE_INVALID == handle) { -+ _mali_osk_free(descriptor); -+ DBG_MSG(1, ("Trying to map unknown secure ID %u\n", args->secure_id)); -+ return _MALI_OSK_ERR_FAULT; -+ } ++ spin_lock_init(&new_mali_profiling_stream_list->spin_lock); ++ INIT_LIST_HEAD(&new_mali_profiling_stream_list->free_list); ++ INIT_LIST_HEAD(&new_mali_profiling_stream_list->queue_list); + -+ mem = (ump_dd_mem *)handle; -+ DEBUG_ASSERT(mem); -+ if (mem->size_bytes != args->size) { -+ _mali_osk_free(descriptor); -+ ump_dd_reference_release(handle); -+ DBG_MSG(1, ("Trying to map too much or little. ID: %u, virtual size=%lu, UMP size: %lu\n", args->secure_id, args->size, mem->size_bytes)); -+ return _MALI_OSK_ERR_FAULT; -+ } ++ spin_lock_init(&mali_activity_lock); ++ mali_activity_cores_num = 0; + -+ map_id = ump_descriptor_mapping_allocate_mapping(session_data->cookies_map, (void *) descriptor); ++ for (i = 0; i < MALI_PROFILING_STREAM_BUFFER_NUM; i++) { ++ new_mali_profiling_stream = (mali_profiling_stream *)kmalloc(sizeof(mali_profiling_stream), GFP_KERNEL); ++ if (NULL == new_mali_profiling_stream) { ++ _mali_profiling_stream_list_destory(new_mali_profiling_stream_list); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+ if (map_id < 0) { -+ _mali_osk_free(descriptor); -+ ump_dd_reference_release(handle); -+ DBG_MSG(1, ("ump_ukk_map_mem: unable to allocate a descriptor_mapping for return cookie\n")); ++ INIT_LIST_HEAD(&new_mali_profiling_stream->list); ++ new_mali_profiling_stream->used_size = 0; ++ list_add_tail(&new_mali_profiling_stream->list, &new_mali_profiling_stream_list->free_list); + -+ return _MALI_OSK_ERR_NOMEM; + } + -+ descriptor->size = args->size; -+ descriptor->handle = handle; -+ descriptor->phys_addr = args->phys_addr; -+ descriptor->process_mapping_info = args->_ukk_private; -+ descriptor->ump_session = session_data; -+ descriptor->cookie = (u32)map_id; ++ _mali_osk_atomic_init(&stream_fd_if_used, 0); ++ init_waitqueue_head(&stream_fd_wait_queue); + -+ if (mem->is_cached) { -+ descriptor->is_cached = 1; -+ DBG_MSG(3, ("Mapping UMP secure_id: %d as cached.\n", args->secure_id)); -+ } else { -+ descriptor->is_cached = 0; -+ DBG_MSG(3, ("Mapping UMP secure_id: %d as Uncached.\n", args->secure_id)); ++ hrtimer_init(&profiling_sampling_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ ++ profiling_sampling_timer.function = _mali_profiling_sampling_counters; ++ ++ global_mali_stream_list = new_mali_profiling_stream_list; ++ ++ return _MALI_OSK_ERR_OK; ++} ++ ++void _mali_osk_profiling_term(void) ++{ ++ if (0 != profiling_sample_rate) { ++ hrtimer_cancel(&profiling_sampling_timer); ++ profiling_sample_rate = 0; + } ++ _mali_osk_atomic_term(&stream_fd_if_used); + -+ _mali_osk_list_init(&descriptor->list); ++ if (NULL != global_mali_profiling_counters) { ++ _mali_osk_free(global_mali_profiling_counters); ++ global_mali_profiling_counters = NULL; ++ num_global_mali_profiling_counters = 0; ++ } + -+ err = _ump_osk_mem_mapregion_init(descriptor); -+ if (_MALI_OSK_ERR_OK != err) { -+ DBG_MSG(1, ("Failed to initialize memory mapping in _ump_ukk_map_mem(). ID: %u\n", args->secure_id)); -+ ump_descriptor_mapping_free(session_data->cookies_map, map_id); -+ _mali_osk_free(descriptor); -+ ump_dd_reference_release(mem); -+ return err; ++ if (NULL != global_mali_stream_list) { ++ _mali_profiling_stream_list_destory(global_mali_stream_list); ++ global_mali_stream_list = NULL; + } + -+ DBG_MSG(4, ("Mapping virtual to physical memory: ID: %u, size:%lu, first physical addr: 0x%08lx, number of regions: %lu\n", -+ mem->secure_id, -+ mem->size_bytes, -+ ((NULL != mem->block_array) ? mem->block_array->addr : 0), -+ mem->nr_blocks)); ++} + -+ left = descriptor->size; -+ /* loop over all blocks and map them in */ -+ for (block = 0; block < mem->nr_blocks; block++) { -+ unsigned long size_to_map; ++void _mali_osk_profiling_stop_sampling(u32 pid) ++{ ++ if (pid == current_profiling_pid) { + -+ if (left > mem->block_array[block].size) { -+ size_to_map = mem->block_array[block].size; -+ } else { -+ size_to_map = left; ++ int i; ++ /* Reset all counter states when closing connection.*/ ++ for (i = 0; i < num_global_mali_profiling_counters; ++i) { ++ _mali_profiling_set_event(global_mali_profiling_counters[i].counter_id, MALI_HW_CORE_NO_COUNTER); ++ global_mali_profiling_counters[i].enabled = 0; ++ global_mali_profiling_counters[i].prev_counter_value = 0; ++ global_mali_profiling_counters[i].current_counter_value = 0; ++ } ++ l2_cache_counter_if_enabled = MALI_FALSE; ++ num_counters_enabled = 0; ++ mem_counters_enabled = 0; ++ _mali_profiling_control(FBDUMP_CONTROL_ENABLE, 0); ++ _mali_profiling_control(SW_COUNTER_ENABLE, 0); ++ /* Delete sampling timer when closing connection. */ ++ if (0 != profiling_sample_rate) { ++ hrtimer_cancel(&profiling_sampling_timer); ++ profiling_sample_rate = 0; + } ++ current_profiling_pid = 0; ++ } ++} + -+ if (_MALI_OSK_ERR_OK != _ump_osk_mem_mapregion_map(descriptor, offset, (u32 *) & (mem->block_array[block].addr), size_to_map)) { -+ DBG_MSG(1, ("WARNING: _ump_ukk_map_mem failed to map memory into userspace\n")); -+ ump_descriptor_mapping_free(session_data->cookies_map, map_id); -+ ump_dd_reference_release(mem); -+ _ump_osk_mem_mapregion_term(descriptor); -+ _mali_osk_free(descriptor); -+ return _MALI_OSK_ERR_FAULT; ++void _mali_osk_profiling_add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4) ++{ ++ /*Record the freq & volt to global_mali_profiling_counters here. */ ++ if (0 != profiling_sample_rate) { ++ u32 channel; ++ u32 state; ++ channel = (event_id >> 16) & 0xFF; ++ state = ((event_id >> 24) & 0xF) << 24; ++ ++ switch (state) { ++ case MALI_PROFILING_EVENT_TYPE_SINGLE: ++ if ((MALI_PROFILING_EVENT_CHANNEL_GPU >> 16) == channel) { ++ u32 reason = (event_id & 0xFFFF); ++ if (MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE == reason) { ++ _mali_osk_profiling_record_global_counters(COUNTER_FREQUENCY, data0); ++ _mali_osk_profiling_record_global_counters(COUNTER_VOLTAGE, data1); ++ } ++ } ++ break; ++ case MALI_PROFILING_EVENT_TYPE_START: ++ if ((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) == channel) { ++ _mali_profiling_sampling_core_activity_switch(COUNTER_VP_ACTIVITY, 0, 1, data1); ++ } else if (channel >= (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) && ++ (MALI_PROFILING_EVENT_CHANNEL_PP7 >> 16) >= channel) { ++ u32 core_id = channel - (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16); ++ _mali_profiling_sampling_core_activity_switch(COUNTER_FP_ACTIVITY, core_id, 1, data1); ++ } ++ break; ++ case MALI_PROFILING_EVENT_TYPE_STOP: ++ if ((MALI_PROFILING_EVENT_CHANNEL_GP0 >> 16) == channel) { ++ _mali_profiling_sampling_core_activity_switch(COUNTER_VP_ACTIVITY, 0, 0, 0); ++ } else if (channel >= (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16) && ++ (MALI_PROFILING_EVENT_CHANNEL_PP7 >> 16) >= channel) { ++ u32 core_id = channel - (MALI_PROFILING_EVENT_CHANNEL_PP0 >> 16); ++ _mali_profiling_sampling_core_activity_switch(COUNTER_FP_ACTIVITY, core_id, 0, 0); ++ } ++ break; ++ default: ++ break; + } -+ left -= size_to_map; -+ offset += size_to_map; + } ++ trace_mali_timeline_event(event_id, data0, data1, data2, data3, data4); ++} + -+ /* Add to the ump_memory_allocation tracking list */ -+ _mali_osk_mutex_wait(session_data->lock); -+ _mali_osk_list_add(&descriptor->list, &session_data->list_head_session_memory_mappings_list); -+ _mali_osk_mutex_signal(session_data->lock); ++void _mali_osk_profiling_report_sw_counters(u32 *counters) ++{ ++ trace_mali_sw_counters(_mali_osk_get_pid(), _mali_osk_get_tid(), NULL, counters); ++} + -+ args->mapping = descriptor->mapping; -+ args->cookie = descriptor->cookie; ++void _mali_osk_profiling_record_global_counters(int counter_id, u32 value) ++{ ++ if (NULL != global_mali_profiling_counters) { ++ int i ; ++ for (i = 0; i < num_global_mali_profiling_counters; i++) { ++ if (counter_id == global_mali_profiling_counters[i].counter_id && global_mali_profiling_counters[i].enabled) { ++ global_mali_profiling_counters[i].current_counter_value = value; ++ break; ++ } ++ } ++ } ++} ++ ++_mali_osk_errcode_t _mali_ukk_profiling_add_event(_mali_uk_profiling_add_event_s *args) ++{ ++ /* Always add process and thread identificator in the first two data elements for events from user space */ ++ _mali_osk_profiling_add_event(args->event_id, _mali_osk_get_pid(), _mali_osk_get_tid(), args->data[2], args->data[3], args->data[4]); + + return _MALI_OSK_ERR_OK; +} + -+void _ump_ukk_unmap_mem(_ump_uk_unmap_mem_s *args) ++_mali_osk_errcode_t _mali_ukk_sw_counters_report(_mali_uk_sw_counters_report_s *args) +{ -+ struct ump_session_data *session_data; -+ ump_memory_allocation *descriptor; -+ ump_dd_handle handle; ++ u32 *counters = (u32 *)(uintptr_t)args->counters; + -+ session_data = (ump_session_data *)args->ctx; ++ _mali_osk_profiling_report_sw_counters(counters); + -+ if (NULL == session_data) { -+ MSG_ERR(("Session data is NULL in _ump_ukk_map_mem()\n")); -+ return; ++ if (NULL != global_mali_profiling_counters) { ++ int i; ++ for (i = 0; i < MALI_PROFILING_SW_COUNTERS_NUM; i ++) { ++ if (global_mali_profiling_counters[first_sw_counter_index + i].enabled) { ++ global_mali_profiling_counters[first_sw_counter_index + i].current_counter_value = *(counters + i); ++ } ++ } + } + -+ if (0 != ump_descriptor_mapping_get(session_data->cookies_map, (int)args->cookie, (void **)&descriptor)) { -+ MSG_ERR(("_ump_ukk_map_mem: cookie 0x%X not found for this session\n", args->cookie)); -+ return; ++ return _MALI_OSK_ERR_OK; ++} ++ ++_mali_osk_errcode_t _mali_ukk_profiling_stream_fd_get(_mali_uk_profiling_stream_fd_get_s *args) ++{ ++ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; ++ MALI_DEBUG_ASSERT_POINTER(session); ++ ++ if (1 == _mali_osk_atomic_inc_return(&stream_fd_if_used)) { ++ ++ s32 fd = anon_inode_getfd("[mali_profiling_stream]", &mali_profiling_stream_fops, ++ session, ++ O_RDONLY | O_CLOEXEC); ++ ++ args->stream_fd = fd; ++ if (0 > fd) { ++ _mali_osk_atomic_dec(&stream_fd_if_used); ++ return _MALI_OSK_ERR_FAULT; ++ } ++ args->stream_fd = fd; ++ } else { ++ _mali_osk_atomic_dec(&stream_fd_if_used); ++ args->stream_fd = -1; ++ return _MALI_OSK_ERR_BUSY; + } + -+ DEBUG_ASSERT_POINTER(descriptor); ++ return _MALI_OSK_ERR_OK; ++} + -+ handle = descriptor->handle; -+ if (UMP_DD_HANDLE_INVALID == handle) { -+ DBG_MSG(1, ("WARNING: Trying to unmap unknown handle: UNKNOWN\n")); -+ return; ++_mali_osk_errcode_t _mali_ukk_profiling_control_set(_mali_uk_profiling_control_set_s *args) ++{ ++ u32 control_packet_size; ++ u32 output_buffer_size; ++ ++ struct mali_session_data *session = (struct mali_session_data *)(uintptr_t)args->ctx; ++ MALI_DEBUG_ASSERT_POINTER(session); ++ ++ if (NULL == global_mali_profiling_counters && MALI_FALSE == _mali_profiling_global_counters_init()) { ++ MALI_PRINT_ERROR(("Failed to create global_mali_profiling_counters.\n")); ++ return _MALI_OSK_ERR_FAULT; + } + -+ /* Remove the ump_memory_allocation from the list of tracked mappings */ -+ _mali_osk_mutex_wait(session_data->lock); -+ _mali_osk_list_del(&descriptor->list); -+ _mali_osk_mutex_signal(session_data->lock); ++ control_packet_size = args->control_packet_size; ++ output_buffer_size = args->response_packet_size; + -+ ump_descriptor_mapping_free(session_data->cookies_map, (int)args->cookie); ++ if (0 != control_packet_size) { ++ u8 control_type; ++ u8 *control_packet_data; ++ u8 *response_packet_data; ++ u32 version_length = sizeof(utgard_setup_version) - 1; + -+ ump_dd_reference_release(handle); ++ control_packet_data = (u8 *)(uintptr_t)args->control_packet_data; ++ MALI_DEBUG_ASSERT_POINTER(control_packet_data); ++ response_packet_data = (u8 *)(uintptr_t)args->response_packet_data; ++ MALI_DEBUG_ASSERT_POINTER(response_packet_data); + -+ _ump_osk_mem_mapregion_term(descriptor); -+ _mali_osk_free(descriptor); -+} ++ /*Decide if need to ignore Utgard setup version.*/ ++ if (control_packet_size >= version_length) { ++ if (0 == memcmp(control_packet_data, utgard_setup_version, version_length)) { ++ if (control_packet_size == version_length) { ++ args->response_packet_size = 0; ++ return _MALI_OSK_ERR_OK; ++ } else { ++ control_packet_data += version_length; ++ control_packet_size -= version_length; ++ } ++ } ++ } + -+u32 _ump_ukk_report_memory_usage(void) -+{ -+ if (device.backend->stat) -+ return device.backend->stat(device.backend); -+ else -+ return 0; -+} -diff --git a/drivers/gpu/arm/mali400/ump/common/ump_kernel_common.h b/drivers/gpu/arm/mali400/ump/common/ump_kernel_common.h -new file mode 100644 -index 000000000..aa65f1cb6 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/common/ump_kernel_common.h -@@ -0,0 +1,125 @@ -+/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ current_profiling_pid = _mali_osk_get_pid(); + -+#ifndef __UMP_KERNEL_COMMON_H__ -+#define __UMP_KERNEL_COMMON_H__ ++ control_type = control_packet_data[0]; ++ switch (control_type) { ++ case PACKET_HEADER_COUNTERS_REQUEST: { ++ int i; + -+#include "ump_kernel_types.h" -+#include "ump_kernel_interface.h" -+#include "ump_kernel_descriptor_mapping.h" -+#include "ump_kernel_random_mapping.h" -+#include "ump_kernel_memory_backend.h" ++ if (PACKET_HEADER_SIZE > control_packet_size || ++ control_packet_size != _mali_profiling_get_packet_size(control_packet_data + 1)) { ++ MALI_PRINT_ERROR(("Wrong control packet size, type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size)); ++ return _MALI_OSK_ERR_FAULT; ++ } + ++ /* Send supported counters */ ++ if (PACKET_HEADER_SIZE > output_buffer_size) ++ return _MALI_OSK_ERR_FAULT; + -+#ifdef DEBUG -+extern int ump_debug_level; -+#define UMP_DEBUG_PRINT(args) _mali_osk_dbgmsg args -+#define UMP_DEBUG_CODE(args) args -+#define DBG_MSG(level,args) do { /* args should be in brackets */ \ -+ ((level) <= ump_debug_level)?\ -+ UMP_DEBUG_PRINT(("UMP<" #level ">: ")), \ -+ UMP_DEBUG_PRINT(args):0; \ -+ } while (0) ++ *response_packet_data = PACKET_HEADER_COUNTERS_ACK; ++ args->response_packet_size = PACKET_HEADER_SIZE; + -+#define DBG_MSG_IF(level,condition,args) /* args should be in brackets */ \ -+ if((condition)&&((level) <= ump_debug_level)) {\ -+ UMP_DEBUG_PRINT(("UMP<" #level ">: ")); \ -+ UMP_DEBUG_PRINT(args); \ -+ } ++ for (i = 0; i < num_global_mali_profiling_counters; ++i) { ++ u32 name_size = strlen(global_mali_profiling_counters[i].counter_name); + -+#define DBG_MSG_ELSE(level,args) /* args should be in brackets */ \ -+ else if((level) <= ump_debug_level) { \ -+ UMP_DEBUG_PRINT(("UMP<" #level ">: ")); \ -+ UMP_DEBUG_PRINT(args); \ -+ } ++ if ((args->response_packet_size + name_size + 1) > output_buffer_size) { ++ MALI_PRINT_ERROR(("Response packet data is too large..\n")); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+#define DEBUG_ASSERT_POINTER(pointer) do {if( (pointer)== NULL) MSG_ERR(("NULL pointer " #pointer)); } while(0) -+#define DEBUG_ASSERT(condition) do {if(!(condition)) MSG_ERR(("ASSERT failed: " #condition)); } while(0) -+#else /* DEBUG */ -+#define UMP_DEBUG_PRINT(args) do {} while(0) -+#define UMP_DEBUG_CODE(args) -+#define DBG_MSG(level,args) do {} while(0) -+#define DBG_MSG_IF(level,condition,args) do {} while(0) -+#define DBG_MSG_ELSE(level,args) do {} while(0) -+#define DEBUG_ASSERT(condition) do {} while(0) -+#define DEBUG_ASSERT_POINTER(pointer) do {} while(0) -+#endif /* DEBUG */ ++ memcpy(response_packet_data + args->response_packet_size, ++ global_mali_profiling_counters[i].counter_name, name_size + 1); + -+#define MSG_ERR(args) do{ /* args should be in brackets */ \ -+ _mali_osk_dbgmsg("UMP: ERR: %s\n" ,__FILE__); \ -+ _mali_osk_dbgmsg( " %s()%4d\n", __FUNCTION__, __LINE__) ; \ -+ _mali_osk_dbgmsg args ; \ -+ _mali_osk_dbgmsg("\n"); \ -+ } while(0) ++ args->response_packet_size += (name_size + 1); + -+#define MSG(args) do{ /* args should be in brackets */ \ -+ _mali_osk_dbgmsg("UMP: "); \ -+ _mali_osk_dbgmsg args; \ -+ } while (0) ++ if (global_mali_profiling_counters[i].counter_id == COUNTER_VP_ACTIVITY) { ++ args->response_packet_size += _mali_profiling_pack_int(response_packet_data, ++ output_buffer_size, args->response_packet_size, (s32)1); ++ } else if (global_mali_profiling_counters[i].counter_id == COUNTER_FP_ACTIVITY) { ++ args->response_packet_size += _mali_profiling_pack_int(response_packet_data, ++ output_buffer_size, args->response_packet_size, (s32)mali_pp_get_glob_num_pp_cores()); ++ } else { ++ args->response_packet_size += _mali_profiling_pack_int(response_packet_data, ++ output_buffer_size, args->response_packet_size, (s32) - 1); ++ } ++ } ++ ++ _mali_profiling_set_packet_size(response_packet_data + 1, args->response_packet_size); ++ break; ++ } + ++ case PACKET_HEADER_COUNTERS_ENABLE: { ++ int i; ++ u32 request_pos = PACKET_HEADER_SIZE; ++ mali_bool sw_counter_if_enabled = MALI_FALSE; + ++ if (PACKET_HEADER_SIZE > control_packet_size || ++ control_packet_size != _mali_profiling_get_packet_size(control_packet_data + 1)) { ++ MALI_PRINT_ERROR(("Wrong control packet size , type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size)); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+/* -+ * This struct is used to store per session data. -+ * A session is created when someone open() the device, and -+ * closed when someone close() it or the user space application terminates. -+ */ -+typedef struct ump_session_data { -+ _mali_osk_list_t list_head_session_memory_list; /**< List of ump allocations made by the process (elements are ump_session_memory_list_element) */ -+ _mali_osk_list_t list_head_session_memory_mappings_list; /**< List of ump_memory_allocations mapped in */ -+ int api_version; -+ _mali_osk_mutex_t *lock; -+ ump_descriptor_mapping *cookies_map; /**< Secure mapping of cookies from _ump_ukk_map_mem() */ -+ int cache_operations_ongoing; -+ int has_pending_level1_cache_flush; -+} ump_session_data; ++ /* Init all counter states before enable requested counters.*/ ++ for (i = 0; i < num_global_mali_profiling_counters; ++i) { ++ _mali_profiling_set_event(global_mali_profiling_counters[i].counter_id, MALI_HW_CORE_NO_COUNTER); ++ global_mali_profiling_counters[i].enabled = 0; ++ global_mali_profiling_counters[i].prev_counter_value = 0; ++ global_mali_profiling_counters[i].current_counter_value = 0; ++ ++ if (global_mali_profiling_counters[i].counter_id >= FIRST_MEM_COUNTER && ++ global_mali_profiling_counters[i].counter_id <= LAST_MEM_COUNTER) { ++ _mali_profiling_notification_mem_counter(session, global_mali_profiling_counters[i].counter_id, 0, 0); ++ } ++ } + ++ l2_cache_counter_if_enabled = MALI_FALSE; ++ num_counters_enabled = 0; ++ mem_counters_enabled = 0; ++ _mali_profiling_control(FBDUMP_CONTROL_ENABLE, 0); ++ _mali_profiling_control(SW_COUNTER_ENABLE, 0); ++ _mali_profiling_notification_enable(session, 0, 0); + ++ /* Enable requested counters */ ++ while (request_pos < control_packet_size) { ++ u32 begin = request_pos; ++ u32 event; ++ u32 key; + -+/* -+ * This struct is used to track the UMP memory references a session has. -+ * We need to track this in order to be able to clean up after user space processes -+ * which don't do it themself (e.g. due to a crash or premature termination). -+ */ -+typedef struct ump_session_memory_list_element { -+ struct ump_dd_mem *mem; -+ _mali_osk_list_t list; -+} ump_session_memory_list_element; ++ /* Check the counter name which should be ended with null */ ++ while (request_pos < control_packet_size && control_packet_data[request_pos] != '\0') { ++ ++request_pos; ++ } + ++ if (request_pos >= control_packet_size) ++ return _MALI_OSK_ERR_FAULT; + ++ ++request_pos; ++ event = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size); ++ key = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size); + -+/* -+ * Device specific data, created when device driver is loaded, and then kept as the global variable device. -+ */ -+typedef struct ump_dev { -+ ump_random_mapping *secure_id_map; -+ ump_memory_backend *backend; -+} ump_dev; ++ for (i = 0; i < num_global_mali_profiling_counters; ++i) { ++ u32 name_size = strlen((char *)(control_packet_data + begin)); + ++ if (strncmp(global_mali_profiling_counters[i].counter_name, (char *)(control_packet_data + begin), name_size) == 0) { ++ if (!sw_counter_if_enabled && (FIRST_SW_COUNTER <= global_mali_profiling_counters[i].counter_id ++ && global_mali_profiling_counters[i].counter_id <= LAST_SW_COUNTER)) { ++ sw_counter_if_enabled = MALI_TRUE; ++ _mali_profiling_control(SW_COUNTER_ENABLE, 1); ++ } + ++ if (COUNTER_FILMSTRIP == global_mali_profiling_counters[i].counter_id) { ++ _mali_profiling_control(FBDUMP_CONTROL_ENABLE, 1); ++ _mali_profiling_control(FBDUMP_CONTROL_RATE, event & 0xff); ++ _mali_profiling_control(FBDUMP_CONTROL_RESIZE_FACTOR, (event >> 8) & 0xff); ++ } + -+extern int ump_debug_level; -+extern struct ump_dev device; ++ if (global_mali_profiling_counters[i].counter_id >= FIRST_MEM_COUNTER && ++ global_mali_profiling_counters[i].counter_id <= LAST_MEM_COUNTER) { ++ _mali_profiling_notification_mem_counter(session, global_mali_profiling_counters[i].counter_id, ++ key, 1); ++ mem_counters_enabled++; ++ } + -+_mali_osk_errcode_t ump_kernel_constructor(void); -+void ump_kernel_destructor(void); -+int ump_map_errcode(_mali_osk_errcode_t err); ++ global_mali_profiling_counters[i].counter_event = event; ++ global_mali_profiling_counters[i].key = key; ++ global_mali_profiling_counters[i].enabled = 1; + -+/** -+ * variables from user space cannot be dereferenced from kernel space; tagging them -+ * with __user allows the GCC compiler to generate a warning. Other compilers may -+ * not support this so we define it here as an empty macro if the compiler doesn't -+ * define it. -+ */ -+#ifndef __user -+#define __user -+#endif ++ _mali_profiling_set_event(global_mali_profiling_counters[i].counter_id, ++ global_mali_profiling_counters[i].counter_event); ++ num_counters_enabled++; ++ break; ++ } ++ } + -+#endif /* __UMP_KERNEL_COMMON_H__ */ -diff --git a/drivers/gpu/arm/mali400/ump/common/ump_kernel_descriptor_mapping.c b/drivers/gpu/arm/mali400/ump/common/ump_kernel_descriptor_mapping.c -new file mode 100644 -index 000000000..e4642f039 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/common/ump_kernel_descriptor_mapping.c -@@ -0,0 +1,155 @@ -+/* -+ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ if (i == num_global_mali_profiling_counters) { ++ MALI_PRINT_ERROR(("Counter name does not match for type %u.\n", control_type)); ++ return _MALI_OSK_ERR_FAULT; ++ } ++ } + -+#include "mali_kernel_common.h" -+#include "mali_osk.h" -+#include "mali_osk_bitops.h" -+#include "ump_kernel_common.h" -+#include "ump_kernel_descriptor_mapping.h" ++ if (PACKET_HEADER_SIZE <= output_buffer_size) { ++ *response_packet_data = PACKET_HEADER_ACK; ++ _mali_profiling_set_packet_size(response_packet_data + 1, PACKET_HEADER_SIZE); ++ args->response_packet_size = PACKET_HEADER_SIZE; ++ } else { ++ return _MALI_OSK_ERR_FAULT; ++ } + -+#define MALI_PAD_INT(x) (((x) + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1)) ++ break; ++ } + -+/** -+ * Allocate a descriptor table capable of holding 'count' mappings -+ * @param count Number of mappings in the table -+ * @return Pointer to a new table, NULL on error -+ */ -+static ump_descriptor_table *descriptor_table_alloc(int count); ++ case PACKET_HEADER_START_CAPTURE_VALUE: { ++ u32 live_rate; ++ u32 request_pos = PACKET_HEADER_SIZE; + -+/** -+ * Free a descriptor table -+ * @param table The table to free -+ */ -+static void descriptor_table_free(ump_descriptor_table *table); ++ if (PACKET_HEADER_SIZE > control_packet_size || ++ control_packet_size != _mali_profiling_get_packet_size(control_packet_data + 1)) { ++ MALI_PRINT_ERROR(("Wrong control packet size , type 0x%x,size 0x%x.\n", control_packet_data[0], control_packet_size)); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ump_descriptor_mapping *ump_descriptor_mapping_create(int init_entries, int max_entries) -+{ -+ ump_descriptor_mapping *map = _mali_osk_calloc(1, sizeof(ump_descriptor_mapping)); ++ /* Read samping rate in nanoseconds and live rate, start capture.*/ ++ profiling_sample_rate = _mali_profiling_read_packet_int(control_packet_data, ++ &request_pos, control_packet_size); + -+ init_entries = MALI_PAD_INT(init_entries); -+ max_entries = MALI_PAD_INT(max_entries); ++ live_rate = _mali_profiling_read_packet_int(control_packet_data, &request_pos, control_packet_size); + -+ if (NULL != map) { -+ map->table = descriptor_table_alloc(init_entries); -+ if (NULL != map->table) { -+ map->lock = _mali_osk_mutex_rw_init(_MALI_OSK_LOCKFLAG_UNORDERED, 0); -+ if (NULL != map->lock) { -+ _mali_osk_set_nonatomic_bit(0, map->table->usage); /* reserve bit 0 to prevent NULL/zero logic to kick in */ -+ map->max_nr_mappings_allowed = max_entries; -+ map->current_nr_mappings = init_entries; -+ return map; ++ if (PACKET_HEADER_SIZE <= output_buffer_size) { ++ *response_packet_data = PACKET_HEADER_ACK; ++ _mali_profiling_set_packet_size(response_packet_data + 1, PACKET_HEADER_SIZE); ++ args->response_packet_size = PACKET_HEADER_SIZE; ++ } else { ++ return _MALI_OSK_ERR_FAULT; + } -+ descriptor_table_free(map->table); ++ ++ if (0 != num_counters_enabled && 0 != profiling_sample_rate) { ++ _mali_profiling_global_stream_list_free(); ++ if (mem_counters_enabled > 0) { ++ _mali_profiling_notification_enable(session, profiling_sample_rate, 1); ++ } ++ hrtimer_start(&profiling_sampling_timer, ++ ktime_set(profiling_sample_rate / 1000000000, profiling_sample_rate % 1000000000), ++ HRTIMER_MODE_REL_PINNED); ++ } ++ ++ break; + } -+ _mali_osk_free(map); ++ default: ++ MALI_PRINT_ERROR(("Unsupported profiling packet header type %u.\n", control_type)); ++ args->response_packet_size = 0; ++ return _MALI_OSK_ERR_FAULT; ++ } ++ } else { ++ _mali_osk_profiling_stop_sampling(current_profiling_pid); ++ _mali_profiling_notification_enable(session, 0, 0); + } -+ return NULL; ++ ++ return _MALI_OSK_ERR_OK; +} + -+void ump_descriptor_mapping_destroy(ump_descriptor_mapping *map) ++/** ++ * Called by gator.ko to set HW counters ++ * ++ * @param counter_id The counter ID. ++ * @param event_id Event ID that the counter should count (HW counter value from TRM). ++ * ++ * @return 1 on success, 0 on failure. ++ */ ++int _mali_profiling_set_event(u32 counter_id, s32 event_id) +{ -+ descriptor_table_free(map->table); -+ _mali_osk_mutex_rw_term(map->lock); -+ _mali_osk_free(map); ++ if (COUNTER_VP_0_C0 == counter_id) { ++ mali_gp_job_set_gp_counter_src0(event_id); ++ } else if (COUNTER_VP_0_C1 == counter_id) { ++ mali_gp_job_set_gp_counter_src1(event_id); ++ } else if (COUNTER_FP_0_C0 <= counter_id && COUNTER_FP_7_C1 >= counter_id) { ++ /* ++ * Two compatibility notes for this function: ++ * ++ * 1) Previously the DDK allowed per core counters. ++ * ++ * This did not make much sense on Mali-450 with the "virtual PP core" concept, ++ * so this option was removed, and only the same pair of HW counters was allowed on all cores, ++ * beginning with r3p2 release. ++ * ++ * Starting with r4p0, it is now possible to set different HW counters for the different sub jobs. ++ * This should be almost the same, since sub job 0 is designed to run on core 0, ++ * sub job 1 on core 1, and so on. ++ * ++ * The scheduling of PP sub jobs is not predictable, and this often led to situations where core 0 ran 2 ++ * sub jobs, while for instance core 1 ran zero. Having the counters set per sub job would thus increase ++ * the predictability of the returned data (as you would be guaranteed data for all the selected HW counters). ++ * ++ * PS: Core scaling needs to be disabled in order to use this reliably (goes for both solutions). ++ * ++ * The framework/#defines with Gator still indicates that the counter is for a particular core, ++ * but this is internally used as a sub job ID instead (no translation needed). ++ * ++ * 2) Global/default vs per sub job counters ++ * ++ * Releases before r3p2 had only per PP core counters. ++ * r3p2 releases had only one set of default/global counters which applied to all PP cores ++ * Starting with r4p0, we have both a set of default/global counters, ++ * and individual counters per sub job (equal to per core). ++ * ++ * To keep compatibility with Gator/DS-5/streamline, the following scheme is used: ++ * ++ * r3p2 release; only counters set for core 0 is handled, ++ * this is applied as the default/global set of counters, and will thus affect all cores. ++ * ++ * r4p0 release; counters set for core 0 is applied as both the global/default set of counters, ++ * and counters for sub job 0. ++ * Counters set for core 1-7 is only applied for the corresponding sub job. ++ * ++ * This should allow the DS-5/Streamline GUI to have a simple mode where it only allows setting the ++ * values for core 0, and thus this will be applied to all PP sub jobs/cores. ++ * Advanced mode will also be supported, where individual pairs of HW counters can be selected. ++ * ++ * The GUI will (until it is updated) still refer to cores instead of sub jobs, but this is probably ++ * something we can live with! ++ * ++ * Mali-450 note: Each job is not divided into a deterministic number of sub jobs, as the HW DLBU ++ * automatically distributes the load between whatever number of cores is available at this particular time. ++ * A normal PP job on Mali-450 is thus considered a single (virtual) job, and it will thus only be possible ++ * to use a single pair of HW counters (even if the job ran on multiple PP cores). ++ * In other words, only the global/default pair of PP HW counters will be used for normal Mali-450 jobs. ++ */ ++ u32 sub_job = (counter_id - COUNTER_FP_0_C0) >> 1; ++ u32 counter_src = (counter_id - COUNTER_FP_0_C0) & 1; ++ if (0 == counter_src) { ++ mali_pp_job_set_pp_counter_sub_job_src0(sub_job, event_id); ++ if (0 == sub_job) { ++ mali_pp_job_set_pp_counter_global_src0(event_id); ++ } ++ } else { ++ mali_pp_job_set_pp_counter_sub_job_src1(sub_job, event_id); ++ if (0 == sub_job) { ++ mali_pp_job_set_pp_counter_global_src1(event_id); ++ } ++ } ++ } else if (COUNTER_L2_0_C0 <= counter_id && COUNTER_L2_2_C1 >= counter_id) { ++ u32 core_id = (counter_id - COUNTER_L2_0_C0) >> 1; ++ struct mali_l2_cache_core *l2_cache_core = mali_l2_cache_core_get_glob_l2_core(core_id); ++ ++ if (NULL != l2_cache_core) { ++ u32 counter_src = (counter_id - COUNTER_L2_0_C0) & 1; ++ mali_l2_cache_core_set_counter_src(l2_cache_core, ++ counter_src, event_id); ++ l2_cache_counter_if_enabled = MALI_TRUE; ++ } ++ } else { ++ return 0; /* Failure, unknown event */ ++ } ++ ++ return 1; /* success */ +} + -+int ump_descriptor_mapping_allocate_mapping(ump_descriptor_mapping *map, void *target) ++/** ++ * Called by gator.ko to retrieve the L2 cache counter values for all L2 cache cores. ++ * The L2 cache counters are unique in that they are polled by gator, rather than being ++ * transmitted via the tracepoint mechanism. ++ * ++ * @param values Pointer to a _mali_profiling_l2_counter_values structure where ++ * the counter sources and values will be output ++ * @return 0 if all went well; otherwise, return the mask with the bits set for the powered off cores ++ */ ++u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values) +{ -+ int descriptor = -1;/*-EFAULT;*/ -+ _mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RW); -+ descriptor = _mali_osk_find_first_zero_bit(map->table->usage, map->current_nr_mappings); -+ if (descriptor == map->current_nr_mappings) { -+ int nr_mappings_new; -+ /* no free descriptor, try to expand the table */ -+ ump_descriptor_table *new_table; -+ ump_descriptor_table *old_table = map->table; -+ nr_mappings_new = map->current_nr_mappings * 2; ++ u32 l2_cores_num = mali_l2_cache_core_get_glob_num_l2_cores(); ++ u32 i; + -+ if (map->current_nr_mappings >= map->max_nr_mappings_allowed) { -+ descriptor = -1; -+ goto unlock_and_exit; -+ } ++ MALI_DEBUG_ASSERT(l2_cores_num <= 3); + -+ new_table = descriptor_table_alloc(nr_mappings_new); -+ if (NULL == new_table) { -+ descriptor = -1; -+ goto unlock_and_exit; ++ for (i = 0; i < l2_cores_num; i++) { ++ struct mali_l2_cache_core *l2_cache = mali_l2_cache_core_get_glob_l2_core(i); ++ ++ if (NULL == l2_cache) { ++ continue; + } + -+ _mali_osk_memcpy(new_table->usage, old_table->usage, (sizeof(unsigned long)*map->current_nr_mappings) / BITS_PER_LONG); -+ _mali_osk_memcpy(new_table->mappings, old_table->mappings, map->current_nr_mappings * sizeof(void *)); -+ map->table = new_table; -+ map->current_nr_mappings = nr_mappings_new; -+ descriptor_table_free(old_table); ++ mali_l2_cache_core_get_counter_values(l2_cache, ++ &values->cores[i].source0, ++ &values->cores[i].value0, ++ &values->cores[i].source1, ++ &values->cores[i].value1); + } + -+ /* we have found a valid descriptor, set the value and usage bit */ -+ _mali_osk_set_nonatomic_bit(descriptor, map->table->usage); -+ map->table->mappings[descriptor] = target; -+ -+unlock_and_exit: -+ _mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RW); -+ return descriptor; ++ return 0; +} + -+int ump_descriptor_mapping_get(ump_descriptor_mapping *map, int descriptor, void **target) ++/** ++ * Called by gator to control the production of profiling information at runtime. ++ */ ++void _mali_profiling_control(u32 action, u32 value) +{ -+ int result = -1;/*-EFAULT;*/ -+ DEBUG_ASSERT(map); -+ _mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RO); -+ if ((descriptor > 0) && (descriptor < map->current_nr_mappings) && _mali_osk_test_bit(descriptor, map->table->usage)) { -+ *target = map->table->mappings[descriptor]; -+ result = 0; -+ } else *target = NULL; -+ _mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RO); -+ return result; ++ switch (action) { ++ case FBDUMP_CONTROL_ENABLE: ++ mali_set_user_setting(_MALI_UK_USER_SETTING_COLORBUFFER_CAPTURE_ENABLED, (value == 0 ? MALI_FALSE : MALI_TRUE)); ++ break; ++ case FBDUMP_CONTROL_RATE: ++ mali_set_user_setting(_MALI_UK_USER_SETTING_BUFFER_CAPTURE_N_FRAMES, value); ++ break; ++ case SW_COUNTER_ENABLE: ++ mali_set_user_setting(_MALI_UK_USER_SETTING_SW_COUNTER_ENABLED, value); ++ break; ++ case FBDUMP_CONTROL_RESIZE_FACTOR: ++ mali_set_user_setting(_MALI_UK_USER_SETTING_BUFFER_CAPTURE_RESIZE_FACTOR, value); ++ break; ++ default: ++ break; /* Ignore unimplemented actions */ ++ } +} + -+int ump_descriptor_mapping_set(ump_descriptor_mapping *map, int descriptor, void *target) ++/** ++ * Called by gator to get mali api version. ++ */ ++u32 _mali_profiling_get_api_version(void) +{ -+ int result = -1;/*-EFAULT;*/ -+ _mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RO); -+ if ((descriptor > 0) && (descriptor < map->current_nr_mappings) && _mali_osk_test_bit(descriptor, map->table->usage)) { -+ map->table->mappings[descriptor] = target; -+ result = 0; -+ } -+ _mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RO); -+ return result; ++ return MALI_PROFILING_API_VERSION; +} + -+void ump_descriptor_mapping_free(ump_descriptor_mapping *map, int descriptor) ++/** ++* Called by gator to get the data about Mali instance in use: ++* product id, version, number of cores ++*/ ++void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values) +{ -+ _mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RW); -+ if ((descriptor > 0) && (descriptor < map->current_nr_mappings) && _mali_osk_test_bit(descriptor, map->table->usage)) { -+ map->table->mappings[descriptor] = NULL; -+ _mali_osk_clear_nonatomic_bit(descriptor, map->table->usage); -+ } -+ _mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RW); ++ values->mali_product_id = (u32)mali_kernel_core_get_product_id(); ++ values->mali_version_major = mali_kernel_core_get_gpu_major_version(); ++ values->mali_version_minor = mali_kernel_core_get_gpu_minor_version(); ++ values->num_of_l2_cores = mali_l2_cache_core_get_glob_num_l2_cores(); ++ values->num_of_fp_cores = mali_executor_get_num_cores_total(); ++ values->num_of_vp_cores = 1; +} + -+static ump_descriptor_table *descriptor_table_alloc(int count) ++ ++EXPORT_SYMBOL(_mali_profiling_set_event); ++EXPORT_SYMBOL(_mali_profiling_get_l2_counters); ++EXPORT_SYMBOL(_mali_profiling_control); ++EXPORT_SYMBOL(_mali_profiling_get_api_version); ++EXPORT_SYMBOL(_mali_profiling_get_mali_version); +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_specific.h b/drivers/gpu/arm/mali400/mali/linux/mali_osk_specific.h +new file mode 100644 +index 000000000..af51161f9 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_specific.h +@@ -0,0 +1,74 @@ ++/* ++ * Copyright (C) 2010, 2012-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ ++ ++/** ++ * @file mali_osk_specific.h ++ * Defines per-OS Kernel level specifics, such as unusual workarounds for ++ * certain OSs. ++ */ ++ ++#ifndef __MALI_OSK_SPECIFIC_H__ ++#define __MALI_OSK_SPECIFIC_H__ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++ ++#include "mali_osk_types.h" ++#include "mali_kernel_linux.h" ++ ++#define MALI_STATIC_INLINE static inline ++#define MALI_NON_STATIC_INLINE inline ++ ++typedef struct dma_pool *mali_dma_pool; ++ ++typedef u32 mali_dma_addr; ++ ++#if MALI_ENABLE_CPU_CYCLES ++/* Reads out the clock cycle performance counter of the current cpu. ++ It is useful for cost-free (2 cycle) measuring of the time spent ++ in a code path. Sample before and after, the diff number of cycles. ++ When the CPU is idle it will not increase this clock counter. ++ It means that the counter is accurate if only spin-locks are used, ++ but mutexes may lead to too low values since the cpu might "idle" ++ waiting for the mutex to become available. ++ The clock source is configured on the CPU during mali module load, ++ but will not give useful output after a CPU has been power cycled. ++ It is therefore important to configure the system to not turn of ++ the cpu cores when using this functionallity.*/ ++static inline unsigned int mali_get_cpu_cyclecount(void) +{ -+ ump_descriptor_table *table; ++ unsigned int value; ++ /* Reading the CCNT Register - CPU clock counter */ ++ asm volatile("MRC p15, 0, %0, c9, c13, 0\t\n": "=r"(value)); ++ return value; ++} + -+ table = _mali_osk_calloc(1, sizeof(ump_descriptor_table) + ((sizeof(unsigned long) * count) / BITS_PER_LONG) + (sizeof(void *) * count)); ++void mali_init_cpu_time_counters(int reset, int enable_divide_by_64); ++#endif + -+ if (NULL != table) { -+ table->usage = (u32 *)((u8 *)table + sizeof(ump_descriptor_table)); -+ table->mappings = (void **)((u8 *)table + sizeof(ump_descriptor_table) + ((sizeof(unsigned long) * count) / BITS_PER_LONG)); -+ } + -+ return table; ++MALI_STATIC_INLINE u32 _mali_osk_copy_from_user(void *to, void *from, u32 n) ++{ ++ return (u32)copy_from_user(to, from, (unsigned long)n); +} + -+static void descriptor_table_free(ump_descriptor_table *table) ++MALI_STATIC_INLINE mali_bool _mali_osk_in_atomic(void) +{ -+ _mali_osk_free(table); ++ return in_atomic(); +} + -diff --git a/drivers/gpu/arm/mali400/ump/common/ump_kernel_descriptor_mapping.h b/drivers/gpu/arm/mali400/ump/common/ump_kernel_descriptor_mapping.h ++#define _mali_osk_put_user(x, ptr) put_user(x, ptr) ++ ++#endif /* __MALI_OSK_SPECIFIC_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_time.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_time.c new file mode 100644 -index 000000000..a888ba833 +index 000000000..d295e712a --- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/common/ump_kernel_descriptor_mapping.h -@@ -0,0 +1,89 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_time.c +@@ -0,0 +1,59 @@ +/* -+ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010, 2013-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -328307,92 +330620,62 @@ index 000000000..a888ba833 + */ + +/** -+ * @file ump_kernel_descriptor_mapping.h ++ * @file mali_osk_time.c ++ * Implementation of the OS abstraction layer for the kernel device driver + */ + -+#ifndef __UMP_KERNEL_DESCRIPTOR_MAPPING_H__ -+#define __UMP_KERNEL_DESCRIPTOR_MAPPING_H__ -+ +#include "mali_osk.h" ++#include ++#include ++#include + -+/** -+ * The actual descriptor mapping table, never directly accessed by clients -+ */ -+typedef struct ump_descriptor_table { -+ u32 *usage; /**< Pointer to bitpattern indicating if a descriptor is valid/used or not */ -+ void **mappings; /**< Array of the pointers the descriptors map to */ -+} ump_descriptor_table; -+ -+/** -+ * The descriptor mapping object -+ * Provides a separate namespace where we can map an integer to a pointer -+ */ -+typedef struct ump_descriptor_mapping { -+ _mali_osk_mutex_rw_t *lock; /**< Lock protecting access to the mapping object */ -+ int max_nr_mappings_allowed; /**< Max number of mappings to support in this namespace */ -+ int current_nr_mappings; /**< Current number of possible mappings */ -+ ump_descriptor_table *table; /**< Pointer to the current mapping table */ -+} ump_descriptor_mapping; -+ -+/** -+ * Create a descriptor mapping object -+ * Create a descriptor mapping capable of holding init_entries growable to max_entries -+ * @param init_entries Number of entries to preallocate memory for -+ * @param max_entries Number of entries to max support -+ * @return Pointer to a descriptor mapping object, NULL on failure -+ */ -+ump_descriptor_mapping *ump_descriptor_mapping_create(int init_entries, int max_entries); ++mali_bool _mali_osk_time_after_eq(unsigned long ticka, unsigned long tickb) ++{ ++ return time_after_eq(ticka, tickb) ? ++ MALI_TRUE : MALI_FALSE; ++} + -+/** -+ * Destroy a descriptor mapping object -+ * @param map The map to free -+ */ -+void ump_descriptor_mapping_destroy(ump_descriptor_mapping *map); ++unsigned long _mali_osk_time_mstoticks(u32 ms) ++{ ++ return msecs_to_jiffies(ms); ++} + -+/** -+ * Allocate a new mapping entry (descriptor ID) -+ * Allocates a new entry in the map. -+ * @param map The map to allocate a new entry in -+ * @param target The value to map to -+ * @return The descriptor allocated, a negative value on error -+ */ -+int ump_descriptor_mapping_allocate_mapping(ump_descriptor_mapping *map, void *target); ++u32 _mali_osk_time_tickstoms(unsigned long ticks) ++{ ++ return jiffies_to_msecs(ticks); ++} + -+/** -+ * Get the value mapped to by a descriptor ID -+ * @param map The map to lookup the descriptor id in -+ * @param descriptor The descriptor ID to lookup -+ * @param target Pointer to a pointer which will receive the stored value -+ * @return 0 on successful lookup, negative on error -+ */ -+int ump_descriptor_mapping_get(ump_descriptor_mapping *map, int descriptor, void **target); ++unsigned long _mali_osk_time_tickcount(void) ++{ ++ return jiffies; ++} + -+/** -+ * Set the value mapped to by a descriptor ID -+ * @param map The map to lookup the descriptor id in -+ * @param descriptor The descriptor ID to lookup -+ * @param target Pointer to replace the current value with -+ * @return 0 on successful lookup, negative on error -+ */ -+int ump_descriptor_mapping_set(ump_descriptor_mapping *map, int descriptor, void *target); ++void _mali_osk_time_ubusydelay(u32 usecs) ++{ ++ udelay(usecs); ++} + -+/** -+ * Free the descriptor ID -+ * For the descriptor to be reused it has to be freed -+ * @param map The map to free the descriptor from -+ * @param descriptor The descriptor ID to free -+ */ -+void ump_descriptor_mapping_free(ump_descriptor_mapping *map, int descriptor); ++u64 _mali_osk_time_get_ns(void) ++{ ++ struct timespec64 tsval; ++ ktime_get_real_ts64(&tsval); ++ return (u64)timespec64_to_ns(&tsval); ++} + -+#endif /* __UMP_KERNEL_DESCRIPTOR_MAPPING_H__ */ -diff --git a/drivers/gpu/arm/mali400/ump/common/ump_kernel_memory_backend.h b/drivers/gpu/arm/mali400/ump/common/ump_kernel_memory_backend.h ++u64 _mali_osk_boot_time_get_ns(void) ++{ ++ struct timespec64 tsval; ++ ktime_get_boottime_ts64(&tsval); ++ return (u64)timespec64_to_ns(&tsval); ++} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_timers.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_timers.c new file mode 100644 -index 000000000..2b69f68e8 +index 000000000..d01c11482 --- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/common/ump_kernel_memory_backend.h -@@ -0,0 +1,48 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_timers.c +@@ -0,0 +1,76 @@ +/* -+ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -328402,51 +330685,79 @@ index 000000000..2b69f68e8 + */ + +/** -+ * @file ump_kernel_memory_mapping.h ++ * @file mali_osk_timers.c ++ * Implementation of the OS abstraction layer for the kernel device driver + */ + -+#ifndef __UMP_KERNEL_MEMORY_BACKEND_H__ -+#define __UMP_KERNEL_MEMORY_BACKEND_H__ ++#include ++#include ++#include "mali_osk.h" ++#include "mali_kernel_common.h" + -+#include "ump_kernel_interface.h" -+#include "ump_kernel_types.h" ++struct _mali_osk_timer_t_struct { ++ struct timer_list timer; ++}; + ++typedef void (*timer_timeout_function_t)(unsigned long); + -+typedef struct ump_memory_allocation { -+ void *phys_addr; -+ void *mapping; -+ unsigned long size; -+ ump_dd_handle handle; -+ void *process_mapping_info; -+ u32 cookie; /**< necessary on some U/K interface implementations */ -+ struct ump_session_data *ump_session; /**< Session that this allocation belongs to */ -+ _mali_osk_list_t list; /**< List for linking together memory allocations into the session's memory head */ -+ u32 is_cached; -+} ump_memory_allocation; ++_mali_osk_timer_t *_mali_osk_timer_init(_mali_osk_timer_callback_t callback) ++{ ++ _mali_osk_timer_t *t = (_mali_osk_timer_t *)kmalloc(sizeof(_mali_osk_timer_t), GFP_KERNEL); ++ if (NULL != t) ++ timer_setup(&t->timer, ++ (void (*)(struct timer_list *))callback, 0); ++ return t; ++} + -+typedef struct ump_memory_backend { -+ int (*allocate)(void *ctx, ump_dd_mem *descriptor); -+ void (*release)(void *ctx, ump_dd_mem *descriptor); -+ void (*shutdown)(struct ump_memory_backend *backend); -+ u32(*stat)(struct ump_memory_backend *backend); -+ int (*pre_allocate_physical_check)(void *ctx, u32 size); -+ u32(*adjust_to_mali_phys)(void *ctx, u32 cpu_phys); -+ void *ctx; -+} ump_memory_backend; ++void _mali_osk_timer_add(_mali_osk_timer_t *tim, unsigned long ticks_to_expire) ++{ ++ MALI_DEBUG_ASSERT_POINTER(tim); ++ tim->timer.expires = jiffies + ticks_to_expire; ++ add_timer(&(tim->timer)); ++} + -+ump_memory_backend *ump_memory_backend_create(void); -+void ump_memory_backend_destroy(void); ++void _mali_osk_timer_mod(_mali_osk_timer_t *tim, unsigned long ticks_to_expire) ++{ ++ MALI_DEBUG_ASSERT_POINTER(tim); ++ mod_timer(&(tim->timer), jiffies + ticks_to_expire); ++} + -+#endif /*__UMP_KERNEL_MEMORY_BACKEND_H__ */ ++void _mali_osk_timer_del(_mali_osk_timer_t *tim) ++{ ++ MALI_DEBUG_ASSERT_POINTER(tim); ++ del_timer_sync(&(tim->timer)); ++} + -diff --git a/drivers/gpu/arm/mali400/ump/common/ump_kernel_ref_drv.c b/drivers/gpu/arm/mali400/ump/common/ump_kernel_ref_drv.c ++void _mali_osk_timer_del_async(_mali_osk_timer_t *tim) ++{ ++ MALI_DEBUG_ASSERT_POINTER(tim); ++ del_timer(&(tim->timer)); ++} ++ ++mali_bool _mali_osk_timer_pending(_mali_osk_timer_t *tim) ++{ ++ MALI_DEBUG_ASSERT_POINTER(tim); ++ return 1 == timer_pending(&(tim->timer)); ++} ++ ++void _mali_osk_timer_setcallback(_mali_osk_timer_t *tim, _mali_osk_timer_callback_t callback, void *data) ++{ ++ MALI_DEBUG_ASSERT_POINTER(tim); ++} ++ ++void _mali_osk_timer_term(_mali_osk_timer_t *tim) ++{ ++ MALI_DEBUG_ASSERT_POINTER(tim); ++ kfree(tim); ++} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_wait_queue.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_wait_queue.c new file mode 100644 -index 000000000..0b6434bee +index 000000000..fa12abd3f --- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/common/ump_kernel_ref_drv.c -@@ -0,0 +1,181 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_wait_queue.c +@@ -0,0 +1,78 @@ +/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2012-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -328455,183 +330766,80 @@ index 000000000..0b6434bee + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#include "mali_osk.h" -+#include "mali_osk_list.h" -+#include "ump_osk.h" -+#include "ump_uk_types.h" -+ -+#include "ump_kernel_interface_ref_drv.h" -+#include "ump_kernel_common.h" -+#include "ump_kernel_descriptor_mapping.h" -+ -+#define UMP_MINIMUM_SIZE 4096 -+#define UMP_MINIMUM_SIZE_MASK (~(UMP_MINIMUM_SIZE-1)) -+#define UMP_SIZE_ALIGN(x) (((x)+UMP_MINIMUM_SIZE-1)&UMP_MINIMUM_SIZE_MASK) -+#define UMP_ADDR_ALIGN_OFFSET(x) ((x)&(UMP_MINIMUM_SIZE-1)) -+static void phys_blocks_release(void *ctx, struct ump_dd_mem *descriptor); -+ -+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block *blocks, unsigned long num_blocks) -+{ -+ ump_dd_mem *mem; -+ unsigned long size_total = 0; -+ int ret; -+ u32 i; -+ -+ /* Go through the input blocks and verify that they are sane */ -+ for (i = 0; i < num_blocks; i++) { -+ unsigned long addr = blocks[i].addr; -+ unsigned long size = blocks[i].size; -+ -+ DBG_MSG(5, ("Adding physical memory to new handle. Address: 0x%08lx, size: %lu\n", addr, size)); -+ size_total += blocks[i].size; -+ -+ if (0 != UMP_ADDR_ALIGN_OFFSET(addr)) { -+ MSG_ERR(("Trying to create UMP memory from unaligned physical address. Address: 0x%08lx\n", addr)); -+ return UMP_DD_HANDLE_INVALID; -+ } ++/** ++ * @file mali_osk_wait_queue.c ++ * Implemenation of the OS abstraction layer for the kernel device driver ++ */ + -+ if (0 != UMP_ADDR_ALIGN_OFFSET(size)) { -+ MSG_ERR(("Trying to create UMP memory with unaligned size. Size: %lu\n", size)); -+ return UMP_DD_HANDLE_INVALID; -+ } -+ } ++#include ++#include ++#include + -+ /* Allocate the ump_dd_mem struct for this allocation */ -+ mem = _mali_osk_malloc(sizeof(*mem)); -+ if (NULL == mem) { -+ DBG_MSG(1, ("Could not allocate ump_dd_mem in ump_dd_handle_create_from_phys_blocks()\n")); -+ return UMP_DD_HANDLE_INVALID; -+ } ++#include "mali_osk.h" ++#include "mali_kernel_common.h" + -+ /* Now, make a copy of the block information supplied by the user */ -+ mem->block_array = _mali_osk_malloc(sizeof(ump_dd_physical_block) * num_blocks); -+ if (NULL == mem->block_array) { -+ _mali_osk_free(mem); -+ DBG_MSG(1, ("Could not allocate a mem handle for function ump_dd_handle_create_from_phys_blocks().\n")); -+ return UMP_DD_HANDLE_INVALID; -+ } ++struct _mali_osk_wait_queue_t_struct { ++ wait_queue_head_t wait_queue; ++}; + -+ _mali_osk_memcpy(mem->block_array, blocks, sizeof(ump_dd_physical_block) * num_blocks); ++_mali_osk_wait_queue_t *_mali_osk_wait_queue_init(void) ++{ ++ _mali_osk_wait_queue_t *ret = NULL; + -+ /* And setup the rest of the ump_dd_mem struct */ -+ _mali_osk_atomic_init(&mem->ref_count, 1); -+ mem->size_bytes = size_total; -+ mem->nr_blocks = num_blocks; -+ mem->backend_info = NULL; -+ mem->ctx = NULL; -+ mem->release_func = phys_blocks_release; -+ /* For now UMP handles created by ump_dd_handle_create_from_phys_blocks() is forced to be Uncached */ -+ mem->is_cached = 0; -+ mem->hw_device = _UMP_UK_USED_BY_CPU; -+ mem->lock_usage = UMP_NOT_LOCKED; ++ ret = kmalloc(sizeof(_mali_osk_wait_queue_t), GFP_KERNEL); + -+ /* Find a secure ID for this allocation */ -+ ret = ump_random_mapping_insert(device.secure_id_map, mem); -+ if (unlikely(ret)) { -+ _mali_osk_free(mem->block_array); -+ _mali_osk_free(mem); -+ DBG_MSG(1, ("Failed to allocate secure ID in ump_dd_handle_create_from_phys_blocks()\n")); -+ return UMP_DD_HANDLE_INVALID; ++ if (NULL == ret) { ++ return ret; + } + -+ DBG_MSG(3, ("UMP memory created. ID: %u, size: %lu\n", mem->secure_id, mem->size_bytes)); ++ init_waitqueue_head(&ret->wait_queue); ++ MALI_DEBUG_ASSERT(!waitqueue_active(&ret->wait_queue)); + -+ return (ump_dd_handle)mem; ++ return ret; +} + -+static void phys_blocks_release(void *ctx, struct ump_dd_mem *descriptor) ++void _mali_osk_wait_queue_wait_event(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data) +{ -+ _mali_osk_free(descriptor->block_array); -+ descriptor->block_array = NULL; ++ MALI_DEBUG_ASSERT_POINTER(queue); ++ MALI_DEBUG_PRINT(6, ("Adding to wait queue %p\n", queue)); ++ wait_event(queue->wait_queue, condition(data)); +} + -+_mali_osk_errcode_t _ump_ukk_allocate(_ump_uk_allocate_s *user_interaction) ++void _mali_osk_wait_queue_wait_event_timeout(_mali_osk_wait_queue_t *queue, mali_bool(*condition)(void *), void *data, u32 timeout) +{ -+ ump_session_data *session_data = NULL; -+ ump_dd_mem *new_allocation = NULL; -+ ump_session_memory_list_element *session_memory_element = NULL; -+ int ret; -+ -+ DEBUG_ASSERT_POINTER(user_interaction); -+ DEBUG_ASSERT_POINTER(user_interaction->ctx); -+ -+ session_data = (ump_session_data *) user_interaction->ctx; -+ -+ session_memory_element = _mali_osk_calloc(1, sizeof(ump_session_memory_list_element)); -+ if (NULL == session_memory_element) { -+ DBG_MSG(1, ("Failed to allocate ump_session_memory_list_element in ump_ioctl_allocate()\n")); -+ return _MALI_OSK_ERR_NOMEM; -+ } -+ -+ -+ new_allocation = _mali_osk_calloc(1, sizeof(ump_dd_mem)); -+ if (NULL == new_allocation) { -+ _mali_osk_free(session_memory_element); -+ DBG_MSG(1, ("Failed to allocate ump_dd_mem in _ump_ukk_allocate()\n")); -+ return _MALI_OSK_ERR_NOMEM; -+ } -+ -+ /* Initialize the part of the new_allocation that we know so for */ -+ _mali_osk_atomic_init(&new_allocation->ref_count, 1); -+ if (0 == (UMP_REF_DRV_UK_CONSTRAINT_USE_CACHE & user_interaction->constraints)) -+ new_allocation->is_cached = 0; -+ else new_allocation->is_cached = 1; ++ MALI_DEBUG_ASSERT_POINTER(queue); ++ MALI_DEBUG_PRINT(6, ("Adding to wait queue %p\n", queue)); ++ wait_event_timeout(queue->wait_queue, condition(data), _mali_osk_time_mstoticks(timeout)); ++} + -+ /* Special case a size of 0, we should try to emulate what malloc does -+ * in this case, which is to return a valid pointer that must be freed, -+ * but can't be dereferenced */ -+ if (0 == user_interaction->size) { -+ /* Emulate by actually allocating the minimum block size */ -+ user_interaction->size = 1; -+ } ++void _mali_osk_wait_queue_wake_up(_mali_osk_wait_queue_t *queue) ++{ ++ MALI_DEBUG_ASSERT_POINTER(queue); + -+ /* Page align the size */ -+ new_allocation->size_bytes = UMP_SIZE_ALIGN(user_interaction->size); -+ new_allocation->lock_usage = UMP_NOT_LOCKED; ++ /* if queue is empty, don't attempt to wake up its elements */ ++ if (!waitqueue_active(&queue->wait_queue)) return; + -+ /* Now, ask the active memory backend to do the actual memory allocation */ -+ if (!device.backend->allocate(device.backend->ctx, new_allocation)) { -+ DBG_MSG(3, ("OOM: No more UMP memory left. Failed to allocate memory in ump_ioctl_allocate(). Size: %lu, requested size: %lu\n", -+ new_allocation->size_bytes, -+ (unsigned long)user_interaction->size)); -+ _mali_osk_free(new_allocation); -+ _mali_osk_free(session_memory_element); -+ return _MALI_OSK_ERR_INVALID_FUNC; -+ } -+ new_allocation->hw_device = _UMP_UK_USED_BY_CPU; -+ new_allocation->ctx = device.backend->ctx; -+ new_allocation->release_func = device.backend->release; ++ MALI_DEBUG_PRINT(6, ("Waking up elements in wait queue %p ....\n", queue)); + -+ /* Initialize the session_memory_element, and add it to the session object */ -+ session_memory_element->mem = new_allocation; -+ _mali_osk_mutex_wait(session_data->lock); -+ _mali_osk_list_add(&(session_memory_element->list), &(session_data->list_head_session_memory_list)); -+ _mali_osk_mutex_signal(session_data->lock); ++ wake_up_all(&queue->wait_queue); + -+ /* Create a secure ID for this allocation */ -+ ret = ump_random_mapping_insert(device.secure_id_map, new_allocation); -+ if (unlikely(ret)) { -+ new_allocation->release_func(new_allocation->ctx, new_allocation); -+ _mali_osk_free(session_memory_element); -+ _mali_osk_free(new_allocation); -+ DBG_MSG(1, ("Failed to allocate secure ID in ump_ioctl_allocate()\n")); -+ return _MALI_OSK_ERR_INVALID_FUNC; -+ } ++ MALI_DEBUG_PRINT(6, ("... elements in wait queue %p woken up\n", queue)); ++} + -+ user_interaction->secure_id = new_allocation->secure_id; -+ user_interaction->size = new_allocation->size_bytes; -+ DBG_MSG(3, ("UMP memory allocated. ID: %u, size: %lu\n", -+ new_allocation->secure_id, -+ new_allocation->size_bytes)); ++void _mali_osk_wait_queue_term(_mali_osk_wait_queue_t *queue) ++{ ++ /* Parameter validation */ ++ MALI_DEBUG_ASSERT_POINTER(queue); + -+ return _MALI_OSK_ERR_OK; ++ /* Linux requires no explicit termination of wait queues */ ++ kfree(queue); +} -diff --git a/drivers/gpu/arm/mali400/ump/common/ump_kernel_types.h b/drivers/gpu/arm/mali400/ump/common/ump_kernel_types.h +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_osk_wq.c b/drivers/gpu/arm/mali400/mali/linux/mali_osk_wq.c new file mode 100644 -index 000000000..32f32ccbe +index 000000000..d5e258a83 --- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/common/ump_kernel_types.h -@@ -0,0 +1,58 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_osk_wq.c +@@ -0,0 +1,240 @@ +/* + * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. + * @@ -328642,324 +330850,244 @@ index 000000000..32f32ccbe + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __UMP_KERNEL_TYPES_H__ -+#define __UMP_KERNEL_TYPES_H__ ++/** ++ * @file mali_osk_wq.c ++ * Implementation of the OS abstraction layer for the kernel device driver ++ */ + -+#include "ump_kernel_interface.h" -+#include "mali_osk.h" ++#include /* For memory allocation */ ++#include ++#include ++#include + -+#include -+#ifdef CONFIG_DMA_SHARED_BUFFER -+#include -+#endif ++#include "mali_osk.h" ++#include "mali_kernel_common.h" ++#include "mali_kernel_license.h" ++#include "mali_kernel_linux.h" + -+typedef enum { -+ UMP_USED_BY_CPU = 0, -+ UMP_USED_BY_MALI = 1, -+ UMP_USED_BY_UNKNOWN_DEVICE = 100, -+} ump_hw_usage; ++typedef struct _mali_osk_wq_work_s { ++ _mali_osk_wq_work_handler_t handler; ++ void *data; ++ mali_bool high_pri; ++ struct work_struct work_handle; ++} mali_osk_wq_work_object_t; + -+typedef enum { -+ UMP_NOT_LOCKED = 0, -+ UMP_READ = 1, -+ UMP_READ_WRITE = 3, -+} ump_lock_usage; ++typedef struct _mali_osk_wq_delayed_work_s { ++ _mali_osk_wq_work_handler_t handler; ++ void *data; ++ struct delayed_work work; ++} mali_osk_wq_delayed_work_object_t; + -+/* -+ * This struct is what is "behind" a ump_dd_handle -+ */ -+typedef struct ump_dd_mem { -+ struct rb_node node; -+ ump_secure_id secure_id; -+ _mali_osk_atomic_t ref_count; -+ unsigned long size_bytes; -+ unsigned long nr_blocks; -+ ump_dd_physical_block *block_array; -+ void (*release_func)(void *ctx, struct ump_dd_mem *descriptor); -+ void *ctx; -+ void *backend_info; -+ int is_cached; -+ ump_hw_usage hw_device; -+ ump_lock_usage lock_usage; -+#ifdef CONFIG_DMA_SHARED_BUFFER -+ struct dma_buf_attachment *import_attach; -+ struct sg_table *sgt; ++#if MALI_LICENSE_IS_GPL ++static struct workqueue_struct *mali_wq_normal = NULL; ++static struct workqueue_struct *mali_wq_high = NULL; +#endif -+} ump_dd_mem; + ++static void _mali_osk_wq_work_func(struct work_struct *work); + ++_mali_osk_errcode_t _mali_osk_wq_init(void) ++{ ++#if MALI_LICENSE_IS_GPL ++ MALI_DEBUG_ASSERT(NULL == mali_wq_normal); ++ MALI_DEBUG_ASSERT(NULL == mali_wq_high); + -+#endif /* __UMP_KERNEL_TYPES_H__ */ -diff --git a/drivers/gpu/arm/mali400/ump/common/ump_osk.h b/drivers/gpu/arm/mali400/ump/common/ump_osk.h -new file mode 100644 -index 000000000..9adc4d3df ---- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/common/ump_osk.h -@@ -0,0 +1,48 @@ -+/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36) ++ mali_wq_normal = alloc_workqueue("mali", WQ_UNBOUND, 0); ++ mali_wq_high = alloc_workqueue("mali_high_pri", WQ_HIGHPRI | WQ_UNBOUND, 0); ++#else ++ mali_wq_normal = create_workqueue("mali"); ++ mali_wq_high = create_workqueue("mali_high_pri"); ++#endif ++ if (NULL == mali_wq_normal || NULL == mali_wq_high) { ++ MALI_PRINT_ERROR(("Unable to create Mali workqueues\n")); + -+/** -+ * @file ump_osk.h -+ * Defines the OS abstraction layer for the UMP kernel device driver (OSK) -+ */ ++ if (mali_wq_normal) destroy_workqueue(mali_wq_normal); ++ if (mali_wq_high) destroy_workqueue(mali_wq_high); + -+#ifndef __UMP_OSK_H__ -+#define __UMP_OSK_H__ ++ mali_wq_normal = NULL; ++ mali_wq_high = NULL; + -+#include -+#include -+#include "ump_uk_types.h" -+#include "ump_kernel_common.h" ++ return _MALI_OSK_ERR_FAULT; ++ } ++#endif /* MALI_LICENSE_IS_GPL */ + -+#ifdef __cplusplus -+extern "C" { ++ return _MALI_OSK_ERR_OK; ++} ++ ++void _mali_osk_wq_flush(void) ++{ ++#if MALI_LICENSE_IS_GPL ++ flush_workqueue(mali_wq_high); ++ flush_workqueue(mali_wq_normal); ++#else ++ flush_scheduled_work(); +#endif ++} + -+_mali_osk_errcode_t _ump_osk_init(void); ++void _mali_osk_wq_term(void) ++{ ++#if MALI_LICENSE_IS_GPL ++ MALI_DEBUG_ASSERT(NULL != mali_wq_normal); ++ MALI_DEBUG_ASSERT(NULL != mali_wq_high); + -+_mali_osk_errcode_t _ump_osk_term(void); ++ flush_workqueue(mali_wq_normal); ++ destroy_workqueue(mali_wq_normal); + -+int _ump_osk_atomic_inc_and_read(_mali_osk_atomic_t *atom); ++ flush_workqueue(mali_wq_high); ++ destroy_workqueue(mali_wq_high); + -+int _ump_osk_atomic_dec_and_read(_mali_osk_atomic_t *atom); ++ mali_wq_normal = NULL; ++ mali_wq_high = NULL; ++#else ++ flush_scheduled_work(); ++#endif ++} + -+_mali_osk_errcode_t _ump_osk_mem_mapregion_init(ump_memory_allocation *descriptor); ++_mali_osk_wq_work_t *_mali_osk_wq_create_work(_mali_osk_wq_work_handler_t handler, void *data) ++{ ++ mali_osk_wq_work_object_t *work = kmalloc(sizeof(mali_osk_wq_work_object_t), GFP_KERNEL); + -+_mali_osk_errcode_t _ump_osk_mem_mapregion_map(ump_memory_allocation *descriptor, u32 offset, u32 *phys_addr, unsigned long size); ++ if (NULL == work) return NULL; + -+void _ump_osk_mem_mapregion_term(ump_memory_allocation *descriptor); ++ work->handler = handler; ++ work->data = data; ++ work->high_pri = MALI_FALSE; + -+void _ump_osk_msync(ump_dd_mem *mem, void *virt, u32 offset, u32 size, ump_uk_msync_op op, ump_session_data *session_data); ++ INIT_WORK(&work->work_handle, _mali_osk_wq_work_func); + -+#ifdef __cplusplus ++ return work; +} -+#endif -+ -+#endif -diff --git a/drivers/gpu/arm/mali400/ump/common/ump_uk_types.h b/drivers/gpu/arm/mali400/ump/common/ump_uk_types.h -new file mode 100644 -index 000000000..db842cdcb ---- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/common/ump_uk_types.h -@@ -0,0 +1,202 @@ -+/* -+ * Copyright (C) 2010, 2012-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ + -+/** -+ * @file ump_uk_types.h -+ * Defines the types and constants used in the user-kernel interface -+ */ ++_mali_osk_wq_work_t *_mali_osk_wq_create_work_high_pri(_mali_osk_wq_work_handler_t handler, void *data) ++{ ++ mali_osk_wq_work_object_t *work = kmalloc(sizeof(mali_osk_wq_work_object_t), GFP_KERNEL); + -+#ifndef __UMP_UK_TYPES_H__ -+#define __UMP_UK_TYPES_H__ ++ if (NULL == work) return NULL; + -+#ifdef __cplusplus -+extern "C" { -+#endif ++ work->handler = handler; ++ work->data = data; ++ work->high_pri = MALI_TRUE; + -+/* Helpers for API version handling */ -+#define MAKE_VERSION_ID(x) (((x) << 16UL) | (x)) -+#define IS_VERSION_ID(x) (((x) & 0xFFFF) == (((x) >> 16UL) & 0xFFFF)) -+#define GET_VERSION(x) (((x) >> 16UL) & 0xFFFF) -+#define IS_API_MATCH(x, y) (IS_VERSION_ID((x)) && IS_VERSION_ID((y)) && (GET_VERSION((x)) == GET_VERSION((y)))) ++ INIT_WORK(&work->work_handle, _mali_osk_wq_work_func); + -+/** -+ * API version define. -+ * Indicates the version of the kernel API -+ * The version is a 16bit integer incremented on each API change. -+ * The 16bit integer is stored twice in a 32bit integer -+ * So for version 1 the value would be 0x00010001 -+ */ -+#define UMP_IOCTL_API_VERSION MAKE_VERSION_ID(3) ++ return work; ++} + -+typedef enum ++void _mali_osk_wq_delete_work(_mali_osk_wq_work_t *work) +{ -+ _UMP_IOC_QUERY_API_VERSION = 1, -+ _UMP_IOC_ALLOCATE, -+ _UMP_IOC_RELEASE, -+ _UMP_IOC_SIZE_GET, -+ _UMP_IOC_MAP_MEM, /* not used in Linux */ -+ _UMP_IOC_UNMAP_MEM, /* not used in Linux */ -+ _UMP_IOC_MSYNC, -+ _UMP_IOC_CACHE_OPERATIONS_CONTROL, -+ _UMP_IOC_SWITCH_HW_USAGE, -+ _UMP_IOC_LOCK, -+ _UMP_IOC_UNLOCK, -+ _UMP_IOC_DMABUF_IMPORT, -+} _ump_uk_functions; ++ mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work; ++ _mali_osk_wq_flush(); ++ kfree(work_object); ++} + -+typedef enum ++void _mali_osk_wq_delete_work_nonflush(_mali_osk_wq_work_t *work) +{ -+ UMP_REF_DRV_UK_CONSTRAINT_NONE = 0, -+ UMP_REF_DRV_UK_CONSTRAINT_PHYSICALLY_LINEAR = 1, -+ UMP_REF_DRV_UK_CONSTRAINT_USE_CACHE = 4, -+} ump_uk_alloc_constraints; ++ mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work; ++ kfree(work_object); ++} + -+typedef enum ++void _mali_osk_wq_schedule_work(_mali_osk_wq_work_t *work) +{ -+ _UMP_UK_MSYNC_CLEAN = 0, -+ _UMP_UK_MSYNC_CLEAN_AND_INVALIDATE = 1, -+ _UMP_UK_MSYNC_INVALIDATE = 2, -+ _UMP_UK_MSYNC_FLUSH_L1 = 3, -+ _UMP_UK_MSYNC_READOUT_CACHE_ENABLED = 128, -+} ump_uk_msync_op; ++ mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work; ++#if MALI_LICENSE_IS_GPL ++ queue_work(mali_wq_normal, &work_object->work_handle); ++#else ++ schedule_work(&work_object->work_handle); ++#endif ++} + -+typedef enum ++void _mali_osk_wq_schedule_work_high_pri(_mali_osk_wq_work_t *work) +{ -+ _UMP_UK_CACHE_OP_START = 0, -+ _UMP_UK_CACHE_OP_FINISH = 1, -+} ump_uk_cache_op_control; ++ mali_osk_wq_work_object_t *work_object = (mali_osk_wq_work_object_t *)work; ++#if MALI_LICENSE_IS_GPL ++ queue_work(mali_wq_high, &work_object->work_handle); ++#else ++ schedule_work(&work_object->work_handle); ++#endif ++} + -+typedef enum ++static void _mali_osk_wq_work_func(struct work_struct *work) +{ -+ _UMP_UK_READ = 1, -+ _UMP_UK_READ_WRITE = 3, -+} ump_uk_lock_usage; ++ mali_osk_wq_work_object_t *work_object; + -+typedef enum -+{ -+ _UMP_UK_USED_BY_CPU = 0, -+ _UMP_UK_USED_BY_MALI = 1, -+ _UMP_UK_USED_BY_UNKNOWN_DEVICE = 100, -+} ump_uk_user; ++ work_object = _MALI_OSK_CONTAINER_OF(work, mali_osk_wq_work_object_t, work_handle); + -+/** -+ * Get API version ([in,out] u32 api_version, [out] u32 compatible) -+ */ -+typedef struct _ump_uk_api_version_s -+{ -+ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 version; /**< Set to the user space version on entry, stores the device driver version on exit */ -+ u32 compatible; /**< Non-null if the device is compatible with the client */ -+} _ump_uk_api_version_s; ++#if MALI_LICENSE_IS_GPL ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) ++ /* We want highest Dynamic priority of the thread so that the Jobs depending ++ ** on this thread could be scheduled in time. Without this, this thread might ++ ** sometimes need to wait for some threads in user mode to finish its round-robin ++ ** time, causing *bubble* in the Mali pipeline. Thanks to the new implementation ++ ** of high-priority workqueue in new kernel, this only happens in older kernel. ++ */ ++ if (MALI_TRUE == work_object->high_pri) { ++ set_user_nice(current, -19); ++ } ++#endif ++#endif /* MALI_LICENSE_IS_GPL */ + -+/** -+ * ALLOCATE ([out] u32 secure_id, [in,out] u32 size, [in] contraints) -+ */ -+typedef struct _ump_uk_allocate_s -+{ -+ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 secure_id; /**< Return value from DD to Userdriver */ -+ u32 size; /**< Input and output. Requested size; input. Returned size; output */ -+ ump_uk_alloc_constraints constraints; /**< Only input to Devicedriver */ -+} _ump_uk_allocate_s; ++ work_object->handler(work_object->data); ++} + -+/** -+ * SIZE_GET ([in] u32 secure_id, [out]size ) -+ */ -+typedef struct _ump_uk_size_get_s ++static void _mali_osk_wq_delayed_work_func(struct work_struct *work) +{ -+ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 secure_id; /**< Input to DD */ -+ u32 size; /**< Returned size; output */ -+} _ump_uk_size_get_s; ++ mali_osk_wq_delayed_work_object_t *work_object; + -+/** -+ * Release ([in] u32 secure_id) -+ */ -+typedef struct _ump_uk_release_s -+{ -+ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 secure_id; /**< Input to DD */ -+} _ump_uk_release_s; ++ work_object = _MALI_OSK_CONTAINER_OF(work, mali_osk_wq_delayed_work_object_t, work.work); ++ work_object->handler(work_object->data); ++} + -+typedef struct _ump_uk_map_mem_s ++mali_osk_wq_delayed_work_object_t *_mali_osk_wq_delayed_create_work(_mali_osk_wq_work_handler_t handler, void *data) +{ -+ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ void *mapping; /**< [out] Returns user-space virtual address for the mapping */ -+ void *phys_addr; /**< [in] physical address */ -+ unsigned long size; /**< [in] size */ -+ u32 secure_id; /**< [in] secure_id to assign to mapping */ -+ void *_ukk_private; /**< Only used inside linux port between kernel frontend and common part to store vma */ -+ u32 cookie; -+ u32 is_cached; /**< [in,out] caching of CPU mappings */ -+} _ump_uk_map_mem_s; ++ mali_osk_wq_delayed_work_object_t *work = kmalloc(sizeof(mali_osk_wq_delayed_work_object_t), GFP_KERNEL); + -+typedef struct _ump_uk_unmap_mem_s -+{ -+ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ void *mapping; -+ u32 size; -+ void *_ukk_private; -+ u32 cookie; -+} _ump_uk_unmap_mem_s; ++ if (NULL == work) return NULL; + -+typedef struct _ump_uk_msync_s -+{ -+ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ void *mapping; /**< [in] mapping addr */ -+ void *address; /**< [in] flush start addr */ -+ u32 size; /**< [in] size to flush */ -+ ump_uk_msync_op op; /**< [in] flush operation */ -+ u32 cookie; /**< [in] cookie stored with reference to the kernel mapping internals */ -+ u32 secure_id; /**< [in] secure_id that identifies the ump buffer */ -+ u32 is_cached; /**< [out] caching of CPU mappings */ -+} _ump_uk_msync_s; ++ work->handler = handler; ++ work->data = data; + -+typedef struct _ump_uk_cache_operations_control_s -+{ -+ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ ump_uk_cache_op_control op; /**< [in] cache operations start/stop */ -+} _ump_uk_cache_operations_control_s; ++ INIT_DELAYED_WORK(&work->work, _mali_osk_wq_delayed_work_func); + ++ return work; ++} + -+typedef struct _ump_uk_switch_hw_usage_s ++void _mali_osk_wq_delayed_delete_work_nonflush(_mali_osk_wq_delayed_work_t *work) +{ -+ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 secure_id; /**< [in] secure_id that identifies the ump buffer */ -+ ump_uk_user new_user; /**< [in] cookie stored with reference to the kernel mapping internals */ -+ -+} _ump_uk_switch_hw_usage_s; ++ mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work; ++ kfree(work_object); ++} + -+typedef struct _ump_uk_lock_s ++void _mali_osk_wq_delayed_cancel_work_async(_mali_osk_wq_delayed_work_t *work) +{ -+ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 secure_id; /**< [in] secure_id that identifies the ump buffer */ -+ ump_uk_lock_usage lock_usage; -+} _ump_uk_lock_s; ++ mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work; ++ cancel_delayed_work(&work_object->work); ++} + -+typedef struct _ump_uk_unlock_s ++void _mali_osk_wq_delayed_cancel_work_sync(_mali_osk_wq_delayed_work_t *work) +{ -+ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ u32 secure_id; /**< [in] secure_id that identifies the ump buffer */ -+} _ump_uk_unlock_s; ++ mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work; ++ cancel_delayed_work_sync(&work_object->work); ++} + -+typedef struct _ump_uk_dmabuf_s ++void _mali_osk_wq_delayed_schedule_work(_mali_osk_wq_delayed_work_t *work, u32 delay) +{ -+ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ -+ int fd; /**< [in] dmabuf_fd that identifies the dmabuf buffer */ -+ size_t size; /**< [in] size of the buffer */ -+ u32 secure_id; /**< [out] secure_id that identifies the ump buffer */ -+} _ump_uk_dmabuf_s; ++ mali_osk_wq_delayed_work_object_t *work_object = (mali_osk_wq_delayed_work_object_t *)work; + -+#ifdef __cplusplus -+} ++#if MALI_LICENSE_IS_GPL ++ queue_delayed_work(mali_wq_normal, &work_object->work, delay); ++#else ++ schedule_delayed_work(&work_object->work, delay); +#endif + -+#endif /* __UMP_UK_TYPES_H__ */ -diff --git a/drivers/gpu/arm/mali400/ump/common/ump_ukk.h b/drivers/gpu/arm/mali400/ump/common/ump_ukk.h ++} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_pmu_power_up_down.c b/drivers/gpu/arm/mali400/mali/linux/mali_pmu_power_up_down.c new file mode 100644 -index 000000000..f2906768c +index 000000000..931d7f07a --- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/common/ump_ukk.h -@@ -0,0 +1,60 @@ -+/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_pmu_power_up_down.c +@@ -0,0 +1,23 @@ ++/** ++ * Copyright (C) 2010, 2012-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -328969,63 +331097,26 @@ index 000000000..f2906768c + */ + +/** -+ * @file ump_ukk.h -+ * Defines the kernel-side interface of the user-kernel interface ++ * @file mali_pmu_power_up_down.c + */ + -+#ifndef __UMP_UKK_H__ -+#define __UMP_UKK_H__ -+ -+#include "mali_osk.h" -+#include "ump_uk_types.h" -+ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+ -+_mali_osk_errcode_t _ump_ukk_open(void **context); -+ -+_mali_osk_errcode_t _ump_ukk_close(void **context); -+ -+_mali_osk_errcode_t _ump_ukk_allocate(_ump_uk_allocate_s *user_interaction); -+ -+_mali_osk_errcode_t _ump_ukk_release(_ump_uk_release_s *release_info); -+ -+_mali_osk_errcode_t _ump_ukk_size_get(_ump_uk_size_get_s *user_interaction); -+ -+_mali_osk_errcode_t _ump_ukk_map_mem(_ump_uk_map_mem_s *args); -+ -+_mali_osk_errcode_t _ump_uku_get_api_version(_ump_uk_api_version_s *args); -+ -+void _ump_ukk_unmap_mem(_ump_uk_unmap_mem_s *args); -+ -+void _ump_ukk_msync(_ump_uk_msync_s *args); -+ -+void _ump_ukk_cache_operations_control(_ump_uk_cache_operations_control_s *args); -+ -+void _ump_ukk_switch_hw_usage(_ump_uk_switch_hw_usage_s *args); -+ -+void _ump_ukk_lock(_ump_uk_lock_s *args); -+ -+void _ump_ukk_unlock(_ump_uk_unlock_s *args); -+ -+u32 _ump_ukk_report_memory_usage(void); ++#include ++#include "mali_executor.h" + -+#ifdef __cplusplus ++int mali_perf_set_num_pp_cores(unsigned int num_cores) ++{ ++ return mali_executor_set_perf_level(num_cores, MALI_FALSE); +} -+#endif + -+#endif /* __UMP_UKK_H__ */ -diff --git a/drivers/gpu/arm/mali400/ump/linux/license/gpl/ump_kernel_license.h b/drivers/gpu/arm/mali400/ump/linux/license/gpl/ump_kernel_license.h ++EXPORT_SYMBOL(mali_perf_set_num_pp_cores); +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_profiling_events.h b/drivers/gpu/arm/mali400/mali/linux/mali_profiling_events.h new file mode 100644 -index 000000000..d0174055a +index 000000000..4661cac42 --- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/linux/license/gpl/ump_kernel_license.h -@@ -0,0 +1,30 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_profiling_events.h +@@ -0,0 +1,17 @@ +/* -+ * Copyright (C) 2010, 2013, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2012, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -329034,34 +331125,21 @@ index 000000000..d0174055a + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+/** -+ * @file ump_kernel_license.h -+ * Defines for the macro MODULE_LICENSE. -+ */ -+ -+#ifndef __UMP_KERNEL_LICENSE_H__ -+#define __UMP_KERNEL_LICENSE_H__ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#define UMP_KERNEL_LINUX_LICENSE "GPL" -+#define UMP_LICENSE_IS_GPL 1 ++#ifndef __MALI_PROFILING_EVENTS_H__ ++#define __MALI_PROFILING_EVENTS_H__ + -+#ifdef __cplusplus -+} -+#endif ++/* Simple wrapper in order to find the OS specific location of this file */ ++#include + -+#endif /* __UMP_KERNEL_LICENSE_H__ */ -diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_ioctl.h b/drivers/gpu/arm/mali400/ump/linux/ump_ioctl.h ++#endif /* __MALI_PROFILING_EVENTS_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_profiling_gator_api.h b/drivers/gpu/arm/mali400/mali/linux/mali_profiling_gator_api.h new file mode 100644 -index 000000000..bfb4e8d64 +index 000000000..6fdaa427c --- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/linux/ump_ioctl.h -@@ -0,0 +1,54 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_profiling_gator_api.h +@@ -0,0 +1,17 @@ +/* -+ * Copyright (C) 2010-2013, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2012-2013, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -329070,58 +331148,21 @@ index 000000000..bfb4e8d64 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __UMP_IOCTL_H__ -+#define __UMP_IOCTL_H__ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#include -+#include -+ -+#include -+ -+#ifndef __user -+#define __user -+#endif -+ -+ -+/** -+ * @file UMP_ioctl.h -+ * This file describes the interface needed to use the Linux device driver. -+ * The interface is used by the userpace UMP driver. -+ */ -+ -+#define UMP_IOCTL_NR 0x90 -+ -+ -+#define UMP_IOC_QUERY_API_VERSION _IOR(UMP_IOCTL_NR, _UMP_IOC_QUERY_API_VERSION, _ump_uk_api_version_s) -+#define UMP_IOC_ALLOCATE _IOWR(UMP_IOCTL_NR, _UMP_IOC_ALLOCATE, _ump_uk_allocate_s) -+#define UMP_IOC_RELEASE _IOR(UMP_IOCTL_NR, _UMP_IOC_RELEASE, _ump_uk_release_s) -+#define UMP_IOC_SIZE_GET _IOWR(UMP_IOCTL_NR, _UMP_IOC_SIZE_GET, _ump_uk_size_get_s) -+#define UMP_IOC_MSYNC _IOW(UMP_IOCTL_NR, _UMP_IOC_MSYNC, _ump_uk_msync_s) -+ -+#define UMP_IOC_CACHE_OPERATIONS_CONTROL _IOW(UMP_IOCTL_NR, _UMP_IOC_CACHE_OPERATIONS_CONTROL, _ump_uk_cache_operations_control_s) -+#define UMP_IOC_SWITCH_HW_USAGE _IOW(UMP_IOCTL_NR, _UMP_IOC_SWITCH_HW_USAGE, _ump_uk_switch_hw_usage_s) -+#define UMP_IOC_LOCK _IOW(UMP_IOCTL_NR, _UMP_IOC_LOCK, _ump_uk_lock_s) -+#define UMP_IOC_UNLOCK _IOW(UMP_IOCTL_NR, _UMP_IOC_UNLOCK, _ump_uk_unlock_s) -+ -+#define UMP_IOC_DMABUF_IMPORT _IOW(UMP_IOCTL_NR, _UMP_IOC_DMABUF_IMPORT, _ump_uk_dmabuf_s) ++#ifndef __MALI_PROFILING_GATOR_API_H__ ++#define __MALI_PROFILING_GATOR_API_H__ + -+#ifdef __cplusplus -+} -+#endif ++/* Simple wrapper in order to find the OS specific location of this file */ ++#include + -+#endif /* __UMP_IOCTL_H__ */ -diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_kernel_linux.c b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_linux.c ++#endif /* __MALI_PROFILING_GATOR_API_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_profiling_internal.c b/drivers/gpu/arm/mali400/mali/linux/mali_profiling_internal.c new file mode 100644 -index 000000000..71b30830c +index 000000000..c3a526f0a --- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_linux.c -@@ -0,0 +1,449 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_profiling_internal.c +@@ -0,0 +1,275 @@ +/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -329130,453 +331171,279 @@ index 000000000..71b30830c + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#include /* kernel module definitions */ -+#include /* file system operations */ -+#include /* character device definitions */ -+#include /* request_mem_region */ -+#include /* memory management functions and types */ -+#include /* user space access */ -+#include -+#include -+#include -+ -+#include "arch/config.h" /* Configuration for current platform. The symlinc for arch is set by Makefile */ -+#include "ump_ioctl.h" -+#include "ump_kernel_common.h" -+#include "ump_kernel_interface.h" -+#include "ump_kernel_interface_ref_drv.h" -+#include "ump_kernel_descriptor_mapping.h" -+#include "ump_kernel_memory_backend.h" -+#include "ump_kernel_memory_backend_os.h" -+#include "ump_kernel_memory_backend_dedicated.h" -+#include "ump_kernel_license.h" -+ -+#include "ump_osk.h" -+#include "ump_ukk.h" -+#include "ump_uk_types.h" -+#include "ump_ukk_wrappers.h" -+#include "ump_ukk_ref_wrappers.h" -+ -+ -+/* Module parameter to control log level */ -+int ump_debug_level = 2; -+module_param(ump_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */ -+MODULE_PARM_DESC(ump_debug_level, "Higher number, more dmesg output"); ++#include "mali_kernel_common.h" ++#include "mali_osk.h" ++#include "mali_osk_mali.h" ++#include "mali_ukk.h" ++#include "mali_timestamp.h" ++#include "mali_osk_profiling.h" ++#include "mali_user_settings_db.h" ++#include "mali_profiling_internal.h" + -+/* By default the module uses any available major, but it's possible to set it at load time to a specific number */ -+int ump_major = 0; -+module_param(ump_major, int, S_IRUGO); /* r--r--r-- */ -+MODULE_PARM_DESC(ump_major, "Device major number"); ++typedef struct mali_profiling_entry { ++ u64 timestamp; ++ u32 event_id; ++ u32 data[5]; ++} mali_profiling_entry; + -+/* Name of the UMP device driver */ -+static char ump_dev_name[] = "ump"; /* should be const, but the functions we call requires non-cost */ ++typedef enum mali_profiling_state { ++ MALI_PROFILING_STATE_UNINITIALIZED, ++ MALI_PROFILING_STATE_IDLE, ++ MALI_PROFILING_STATE_RUNNING, ++ MALI_PROFILING_STATE_RETURN, ++} mali_profiling_state; + ++static _mali_osk_mutex_t *lock = NULL; ++static mali_profiling_state prof_state = MALI_PROFILING_STATE_UNINITIALIZED; ++static mali_profiling_entry *profile_entries = NULL; ++static _mali_osk_atomic_t profile_insert_index; ++static u32 profile_mask = 0; + -+#if UMP_LICENSE_IS_GPL -+static struct dentry *ump_debugfs_dir = NULL; -+#endif ++static inline void add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4); + -+/* -+ * The data which we attached to each virtual memory mapping request we get. -+ * Each memory mapping has a reference to the UMP memory it maps. -+ * We release this reference when the last memory mapping is unmapped. -+ */ -+typedef struct ump_vma_usage_tracker { -+ int references; -+ ump_dd_handle handle; -+} ump_vma_usage_tracker; ++void probe_mali_timeline_event(void *data, TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1, unsigned ++ int d2, unsigned int d3, unsigned int d4)) ++{ ++ add_event(event_id, d0, d1, d2, d3, d4); ++} + -+struct ump_device { -+ struct cdev cdev; -+#if UMP_LICENSE_IS_GPL -+ struct class *ump_class; -+#endif -+}; ++_mali_osk_errcode_t _mali_internal_profiling_init(mali_bool auto_start) ++{ ++ profile_entries = NULL; ++ profile_mask = 0; ++ _mali_osk_atomic_init(&profile_insert_index, 0); + -+/* The global variable containing the global device data */ -+static struct ump_device ump_device; -+struct device *ump_global_mdev = NULL; ++ lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_PROFILING); ++ if (NULL == lock) { ++ return _MALI_OSK_ERR_FAULT; ++ } + -+/* Forward declare static functions */ -+static int ump_file_open(struct inode *inode, struct file *filp); -+static int ump_file_release(struct inode *inode, struct file *filp); -+#ifdef HAVE_UNLOCKED_IOCTL -+static long ump_file_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); -+#else -+static int ump_file_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); -+#endif -+static int ump_file_mmap(struct file *filp, struct vm_area_struct *vma); ++ prof_state = MALI_PROFILING_STATE_IDLE; + ++ if (MALI_TRUE == auto_start) { ++ u32 limit = MALI_PROFILING_MAX_BUFFER_ENTRIES; /* Use maximum buffer size */ + -+/* This variable defines the file operations this UMP device driver offer */ -+static struct file_operations ump_fops = { -+ .owner = THIS_MODULE, -+ .open = ump_file_open, -+ .release = ump_file_release, -+#ifdef HAVE_UNLOCKED_IOCTL -+ .unlocked_ioctl = ump_file_ioctl, -+#else -+ .ioctl = ump_file_ioctl, -+#endif -+ .mmap = ump_file_mmap -+}; ++ mali_set_user_setting(_MALI_UK_USER_SETTING_SW_EVENTS_ENABLE, MALI_TRUE); ++ if (_MALI_OSK_ERR_OK != _mali_internal_profiling_start(&limit)) { ++ return _MALI_OSK_ERR_FAULT; ++ } ++ } + ++ return _MALI_OSK_ERR_OK; ++} + -+/* This function is called by Linux to initialize this module. -+ * All we do is initialize the UMP device driver. -+ */ -+static int ump_initialize_module(void) ++void _mali_internal_profiling_term(void) +{ -+ _mali_osk_errcode_t err; ++ u32 count; + -+ DBG_MSG(2, ("Inserting UMP device driver. Compiled: %s, time: %s\n", __DATE__, __TIME__)); ++ /* Ensure profiling is stopped */ ++ _mali_internal_profiling_stop(&count); + -+ err = ump_kernel_constructor(); -+ if (_MALI_OSK_ERR_OK != err) { -+ MSG_ERR(("UMP device driver init failed\n")); -+ return ump_map_errcode(err); ++ prof_state = MALI_PROFILING_STATE_UNINITIALIZED; ++ ++ if (NULL != profile_entries) { ++ _mali_osk_vfree(profile_entries); ++ profile_entries = NULL; + } + -+ MSG(("UMP device driver %s loaded\n", SVN_REV_STRING)); -+ return 0; ++ if (NULL != lock) { ++ _mali_osk_mutex_term(lock); ++ lock = NULL; ++ } +} + -+ -+ -+/* -+ * This function is called by Linux to unload/terminate/exit/cleanup this module. -+ * All we do is terminate the UMP device driver. -+ */ -+static void ump_cleanup_module(void) ++_mali_osk_errcode_t _mali_internal_profiling_start(u32 *limit) +{ -+ DBG_MSG(2, ("Unloading UMP device driver\n")); -+ ump_kernel_destructor(); -+ DBG_MSG(2, ("Module unloaded\n")); -+} -+ ++ _mali_osk_errcode_t ret; ++ mali_profiling_entry *new_profile_entries; + ++ _mali_osk_mutex_wait(lock); + -+static ssize_t ump_memory_used_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) -+{ -+ char buf[64]; -+ size_t r; -+ u32 mem = _ump_ukk_report_memory_usage(); ++ if (MALI_PROFILING_STATE_RUNNING == prof_state) { ++ _mali_osk_mutex_signal(lock); ++ return _MALI_OSK_ERR_BUSY; ++ } + -+ r = snprintf(buf, 64, "%u\n", mem); -+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -+} ++ new_profile_entries = _mali_osk_valloc(*limit * sizeof(mali_profiling_entry)); + -+static const struct file_operations ump_memory_usage_fops = { -+ .owner = THIS_MODULE, -+ .read = ump_memory_used_read, -+}; ++ if (NULL == new_profile_entries) { ++ _mali_osk_mutex_signal(lock); ++ _mali_osk_vfree(new_profile_entries); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+/* -+ * Initialize the UMP device driver. -+ */ -+int ump_kernel_device_initialize(void) -+{ -+ int err; -+ dev_t dev = 0; -+#if UMP_LICENSE_IS_GPL -+ ump_debugfs_dir = debugfs_create_dir(ump_dev_name, NULL); -+ if (ERR_PTR(-ENODEV) == ump_debugfs_dir) { -+ ump_debugfs_dir = NULL; -+ } else { -+ debugfs_create_file("memory_usage", 0400, ump_debugfs_dir, NULL, &ump_memory_usage_fops); ++ if (MALI_PROFILING_MAX_BUFFER_ENTRIES < *limit) { ++ *limit = MALI_PROFILING_MAX_BUFFER_ENTRIES; + } -+#endif + -+ if (0 == ump_major) { -+ /* auto select a major */ -+ err = alloc_chrdev_region(&dev, 0, 1, ump_dev_name); -+ ump_major = MAJOR(dev); -+ } else { -+ /* use load time defined major number */ -+ dev = MKDEV(ump_major, 0); -+ err = register_chrdev_region(dev, 1, ump_dev_name); ++ profile_mask = 1; ++ while (profile_mask <= *limit) { ++ profile_mask <<= 1; + } ++ profile_mask >>= 1; + -+ if (0 == err) { -+ memset(&ump_device, 0, sizeof(ump_device)); ++ *limit = profile_mask; + -+ /* initialize our char dev data */ -+ cdev_init(&ump_device.cdev, &ump_fops); -+ ump_device.cdev.owner = THIS_MODULE; -+ ump_device.cdev.ops = &ump_fops; ++ profile_mask--; /* turns the power of two into a mask of one less */ + -+ /* register char dev with the kernel */ -+ err = cdev_add(&ump_device.cdev, dev, 1/*count*/); -+ if (0 == err) { ++ if (MALI_PROFILING_STATE_IDLE != prof_state) { ++ _mali_osk_mutex_signal(lock); ++ _mali_osk_vfree(new_profile_entries); ++ return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */ ++ } + -+#if UMP_LICENSE_IS_GPL -+ ump_device.ump_class = class_create(THIS_MODULE, ump_dev_name); -+ if (IS_ERR(ump_device.ump_class)) { -+ err = PTR_ERR(ump_device.ump_class); -+ } else { -+ ump_global_mdev = device_create(ump_device.ump_class, NULL, dev, NULL, ump_dev_name); -+ if (!IS_ERR(ump_global_mdev)) { -+ return 0; -+ } ++ profile_entries = new_profile_entries; + -+ err = PTR_ERR(ump_global_mdev); -+ } -+ cdev_del(&ump_device.cdev); -+#else -+ return 0; -+#endif -+ } ++ ret = _mali_timestamp_reset(); + -+ unregister_chrdev_region(dev, 1); ++ if (_MALI_OSK_ERR_OK == ret) { ++ prof_state = MALI_PROFILING_STATE_RUNNING; ++ } else { ++ _mali_osk_vfree(profile_entries); ++ profile_entries = NULL; + } + -+ return err; -+} -+ ++ register_trace_mali_timeline_event(probe_mali_timeline_event, NULL); + ++ _mali_osk_mutex_signal(lock); ++ return ret; ++} + -+/* -+ * Terminate the UMP device driver -+ */ -+void ump_kernel_device_terminate(void) ++static inline void add_event(u32 event_id, u32 data0, u32 data1, u32 data2, u32 data3, u32 data4) +{ -+ dev_t dev = MKDEV(ump_major, 0); -+ -+#if UMP_LICENSE_IS_GPL -+ device_destroy(ump_device.ump_class, dev); -+ class_destroy(ump_device.ump_class); -+#endif -+ -+ /* unregister char device */ -+ cdev_del(&ump_device.cdev); ++ u32 cur_index = (_mali_osk_atomic_inc_return(&profile_insert_index) - 1) & profile_mask; + -+ /* free major */ -+ unregister_chrdev_region(dev, 1); ++ profile_entries[cur_index].timestamp = _mali_timestamp_get(); ++ profile_entries[cur_index].event_id = event_id; ++ profile_entries[cur_index].data[0] = data0; ++ profile_entries[cur_index].data[1] = data1; ++ profile_entries[cur_index].data[2] = data2; ++ profile_entries[cur_index].data[3] = data3; ++ profile_entries[cur_index].data[4] = data4; + -+#if UMP_LICENSE_IS_GPL -+ if (ump_debugfs_dir) -+ debugfs_remove_recursive(ump_debugfs_dir); -+#endif ++ /* If event is "leave API function", add current memory usage to the event ++ * as data point 4. This is used in timeline profiling to indicate how ++ * much memory was used when leaving a function. */ ++ if (event_id == (MALI_PROFILING_EVENT_TYPE_SINGLE | MALI_PROFILING_EVENT_CHANNEL_SOFTWARE | MALI_PROFILING_EVENT_REASON_SINGLE_SW_LEAVE_API_FUNC)) { ++ profile_entries[cur_index].data[4] = _mali_ukk_report_memory_usage(); ++ } +} + -+/* -+ * Open a new session. User space has called open() on us. -+ */ -+static int ump_file_open(struct inode *inode, struct file *filp) ++_mali_osk_errcode_t _mali_internal_profiling_stop(u32 *count) +{ -+ struct ump_session_data *session_data; -+ _mali_osk_errcode_t err; -+ -+ /* input validation */ -+ if (0 != MINOR(inode->i_rdev)) { -+ MSG_ERR(("Minor not zero in ump_file_open()\n")); -+ return -ENODEV; -+ } ++ _mali_osk_mutex_wait(lock); + -+ /* Call the OS-Independent UMP Open function */ -+ err = _ump_ukk_open((void **) &session_data); -+ if (_MALI_OSK_ERR_OK != err) { -+ MSG_ERR(("Ump failed to open a new session\n")); -+ return ump_map_errcode(err); ++ if (MALI_PROFILING_STATE_RUNNING != prof_state) { ++ _mali_osk_mutex_signal(lock); ++ return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */ + } + -+ filp->private_data = (void *)session_data; -+ filp->f_pos = 0; -+ -+ return 0; /* success */ -+} ++ /* go into return state (user to retreive events), no more events will be added after this */ ++ prof_state = MALI_PROFILING_STATE_RETURN; + ++ unregister_trace_mali_timeline_event(probe_mali_timeline_event, NULL); + ++ _mali_osk_mutex_signal(lock); + -+/* -+ * Close a session. User space has called close() or crashed/terminated. -+ */ -+static int ump_file_release(struct inode *inode, struct file *filp) -+{ -+ _mali_osk_errcode_t err; ++ tracepoint_synchronize_unregister(); + -+ err = _ump_ukk_close((void **) &filp->private_data); -+ if (_MALI_OSK_ERR_OK != err) { -+ return ump_map_errcode(err); -+ } ++ *count = _mali_osk_atomic_read(&profile_insert_index); ++ if (*count > profile_mask) *count = profile_mask; + -+ return 0; /* success */ ++ return _MALI_OSK_ERR_OK; +} + -+ -+ -+/* -+ * Handle IOCTL requests. -+ */ -+#ifdef HAVE_UNLOCKED_IOCTL -+static long ump_file_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -+#else -+static int ump_file_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) -+#endif ++u32 _mali_internal_profiling_get_count(void) +{ -+ int err = -ENOTTY; -+ void __user *argument; -+ struct ump_session_data *session_data; -+ -+#ifndef HAVE_UNLOCKED_IOCTL -+ (void)inode; /* inode not used */ -+#endif ++ u32 retval = 0; + -+ session_data = (struct ump_session_data *)filp->private_data; -+ if (NULL == session_data) { -+ MSG_ERR(("No session data attached to file object\n")); -+ return -ENOTTY; ++ _mali_osk_mutex_wait(lock); ++ if (MALI_PROFILING_STATE_RETURN == prof_state) { ++ retval = _mali_osk_atomic_read(&profile_insert_index); ++ if (retval > profile_mask) retval = profile_mask; + } ++ _mali_osk_mutex_signal(lock); + -+ /* interpret the argument as a user pointer to something */ -+ argument = (void __user *)arg; -+ -+ switch (cmd) { -+ case UMP_IOC_QUERY_API_VERSION: -+ err = ump_get_api_version_wrapper((u32 __user *)argument, session_data); -+ break; -+ -+ case UMP_IOC_ALLOCATE : -+ err = ump_allocate_wrapper((u32 __user *)argument, session_data); -+ break; -+ -+ case UMP_IOC_RELEASE: -+ err = ump_release_wrapper((u32 __user *)argument, session_data); -+ break; -+ -+ case UMP_IOC_SIZE_GET: -+ err = ump_size_get_wrapper((u32 __user *)argument, session_data); -+ break; -+ -+ case UMP_IOC_MSYNC: -+ err = ump_msync_wrapper((u32 __user *)argument, session_data); -+ break; ++ return retval; ++} + -+ case UMP_IOC_CACHE_OPERATIONS_CONTROL: -+ err = ump_cache_operations_control_wrapper((u32 __user *)argument, session_data); -+ break; ++_mali_osk_errcode_t _mali_internal_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5]) ++{ ++ u32 raw_index = _mali_osk_atomic_read(&profile_insert_index); + -+ case UMP_IOC_SWITCH_HW_USAGE: -+ err = ump_switch_hw_usage_wrapper((u32 __user *)argument, session_data); -+ break; ++ _mali_osk_mutex_wait(lock); + -+ case UMP_IOC_LOCK: -+ err = ump_lock_wrapper((u32 __user *)argument, session_data); -+ break; ++ if (index < profile_mask) { ++ if ((raw_index & ~profile_mask) != 0) { ++ index += raw_index; ++ index &= profile_mask; ++ } + -+ case UMP_IOC_UNLOCK: -+ err = ump_unlock_wrapper((u32 __user *)argument, session_data); -+ break; ++ if (prof_state != MALI_PROFILING_STATE_RETURN) { ++ _mali_osk_mutex_signal(lock); ++ return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */ ++ } + -+ case UMP_IOC_DMABUF_IMPORT: -+#ifdef CONFIG_DMA_SHARED_BUFFER -+ err = ump_dmabuf_import_wrapper((u32 __user *)argument, session_data); -+#else -+ err = -EFAULT; -+ DBG_MSG(1, ("User space use dmabuf API, but kernel don't support DMA BUF\n")); -+#endif -+ break; ++ if (index >= raw_index) { ++ _mali_osk_mutex_signal(lock); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ default: -+ DBG_MSG(1, ("No handler for IOCTL. cmd: 0x%08x, arg: 0x%08lx\n", cmd, arg)); -+ err = -EFAULT; -+ break; ++ *timestamp = profile_entries[index].timestamp; ++ *event_id = profile_entries[index].event_id; ++ data[0] = profile_entries[index].data[0]; ++ data[1] = profile_entries[index].data[1]; ++ data[2] = profile_entries[index].data[2]; ++ data[3] = profile_entries[index].data[3]; ++ data[4] = profile_entries[index].data[4]; ++ } else { ++ _mali_osk_mutex_signal(lock); ++ return _MALI_OSK_ERR_FAULT; + } + -+ return err; -+} -+ -+int ump_map_errcode(_mali_osk_errcode_t err) -+{ -+ switch (err) { -+ case _MALI_OSK_ERR_OK : -+ return 0; -+ case _MALI_OSK_ERR_FAULT: -+ return -EFAULT; -+ case _MALI_OSK_ERR_INVALID_FUNC: -+ return -ENOTTY; -+ case _MALI_OSK_ERR_INVALID_ARGS: -+ return -EINVAL; -+ case _MALI_OSK_ERR_NOMEM: -+ return -ENOMEM; -+ case _MALI_OSK_ERR_TIMEOUT: -+ return -ETIMEDOUT; -+ case _MALI_OSK_ERR_RESTARTSYSCALL: -+ return -ERESTARTSYS; -+ case _MALI_OSK_ERR_ITEM_NOT_FOUND: -+ return -ENOENT; -+ default: -+ return -EFAULT; -+ } ++ _mali_osk_mutex_signal(lock); ++ return _MALI_OSK_ERR_OK; +} + -+/* -+ * Handle from OS to map specified virtual memory to specified UMP memory. -+ */ -+static int ump_file_mmap(struct file *filp, struct vm_area_struct *vma) ++_mali_osk_errcode_t _mali_internal_profiling_clear(void) +{ -+ _ump_uk_map_mem_s args; -+ _mali_osk_errcode_t err; -+ struct ump_session_data *session_data; ++ _mali_osk_mutex_wait(lock); + -+ /* Validate the session data */ -+ session_data = (struct ump_session_data *)filp->private_data; -+ if (NULL == session_data) { -+ MSG_ERR(("mmap() called without any session data available\n")); -+ return -EFAULT; ++ if (MALI_PROFILING_STATE_RETURN != prof_state) { ++ _mali_osk_mutex_signal(lock); ++ return _MALI_OSK_ERR_INVALID_ARGS; /* invalid to call this function in this state */ + } + -+ /* Re-pack the arguments that mmap() packed for us */ -+ args.ctx = session_data; -+ args.phys_addr = 0; -+ args.size = vma->vm_end - vma->vm_start; -+ args._ukk_private = vma; -+ args.secure_id = vma->vm_pgoff; -+ -+ /* By setting this flag, during a process fork; the child process will not have the parent UMP mappings */ -+ vma->vm_flags |= VM_DONTCOPY; -+ -+ DBG_MSG(4, ("UMP vma->flags: %x\n", vma->vm_flags)); ++ prof_state = MALI_PROFILING_STATE_IDLE; ++ profile_mask = 0; ++ _mali_osk_atomic_init(&profile_insert_index, 0); + -+ /* Call the common mmap handler */ -+ err = _ump_ukk_map_mem(&args); -+ if (_MALI_OSK_ERR_OK != err) { -+ MSG_ERR(("_ump_ukk_map_mem() failed in function ump_file_mmap()")); -+ return ump_map_errcode(err); ++ if (NULL != profile_entries) { ++ _mali_osk_vfree(profile_entries); ++ profile_entries = NULL; + } + -+ return 0; /* success */ ++ _mali_osk_mutex_signal(lock); ++ return _MALI_OSK_ERR_OK; +} + -+/* Export UMP kernel space API functions */ -+EXPORT_SYMBOL(ump_dd_secure_id_get); -+EXPORT_SYMBOL(ump_dd_handle_create_from_secure_id); -+EXPORT_SYMBOL(ump_dd_phys_block_count_get); -+EXPORT_SYMBOL(ump_dd_phys_block_get); -+EXPORT_SYMBOL(ump_dd_phys_blocks_get); -+EXPORT_SYMBOL(ump_dd_size_get); -+EXPORT_SYMBOL(ump_dd_reference_add); -+EXPORT_SYMBOL(ump_dd_reference_release); -+ -+/* Export our own extended kernel space allocator */ -+EXPORT_SYMBOL(ump_dd_handle_create_from_phys_blocks); -+ -+/* Setup init and exit functions for this module */ -+module_init(ump_initialize_module); -+module_exit(ump_cleanup_module); ++mali_bool _mali_internal_profiling_is_recording(void) ++{ ++ return prof_state == MALI_PROFILING_STATE_RUNNING ? MALI_TRUE : MALI_FALSE; ++} + -+/* And some module informatio */ -+MODULE_LICENSE(UMP_KERNEL_LINUX_LICENSE); -+MODULE_AUTHOR("ARM Ltd."); -+MODULE_VERSION(SVN_REV_STRING); -diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_kernel_linux.h b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_linux.h ++mali_bool _mali_internal_profiling_have_recording(void) ++{ ++ return prof_state == MALI_PROFILING_STATE_RETURN ? MALI_TRUE : MALI_FALSE; ++} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_profiling_internal.h b/drivers/gpu/arm/mali400/mali/linux/mali_profiling_internal.h new file mode 100644 -index 000000000..8d32ddbb5 +index 000000000..f17b45833 --- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_linux.h -@@ -0,0 +1,18 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_profiling_internal.h +@@ -0,0 +1,35 @@ +/* -+ * Copyright (C) 2010-2013, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2012-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -329585,22 +331452,39 @@ index 000000000..8d32ddbb5 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __UMP_KERNEL_LINUX_H__ -+#define __UMP_KERNEL_LINUX_H__ ++#ifndef __MALI_PROFILING_INTERNAL_H__ ++#define __MALI_PROFILING_INTERNAL_H__ + -+int ump_kernel_device_initialize(void); -+void ump_kernel_device_terminate(void); ++#ifdef __cplusplus ++extern "C" { ++#endif + ++#include "mali_osk.h" + -+#endif /* __UMP_KERNEL_H__ */ -diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_dedicated.c b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_dedicated.c ++int _mali_internal_profiling_init(mali_bool auto_start); ++void _mali_internal_profiling_term(void); ++ ++mali_bool _mali_internal_profiling_is_recording(void); ++mali_bool _mali_internal_profiling_have_recording(void); ++_mali_osk_errcode_t _mali_internal_profiling_clear(void); ++_mali_osk_errcode_t _mali_internal_profiling_get_event(u32 index, u64 *timestamp, u32 *event_id, u32 data[5]); ++u32 _mali_internal_profiling_get_count(void); ++int _mali_internal_profiling_stop(u32 *count); ++int _mali_internal_profiling_start(u32 *limit); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* __MALI_PROFILING_INTERNAL_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_sync.c b/drivers/gpu/arm/mali400/mali/linux/mali_sync.c new file mode 100644 -index 000000000..5a1257a25 +index 000000000..0d98b518f --- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_dedicated.c -@@ -0,0 +1,271 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_sync.c +@@ -0,0 +1,665 @@ +/* -+ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -329609,802 +331493,669 @@ index 000000000..5a1257a25 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+/* needed to detect kernel version specific code */ -+#include -+ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) -+#include -+#else /* pre 2.6.26 the file was in the arch specific location */ -+#include -+#endif ++#include "mali_sync.h" + -+#include -+#include -+#include -+#include -+#include "ump_kernel_common.h" -+#include "ump_kernel_memory_backend.h" ++#include "mali_osk.h" ++#include "mali_kernel_common.h" ++#include "mali_timeline.h" ++#include "mali_executor.h" + ++#include ++#include ++#include + ++struct mali_sync_pt { ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ struct sync_pt sync_pt; ++#else ++ struct mali_internal_sync_point sync_pt; ++#endif ++ struct mali_sync_flag *flag; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ struct sync_timeline *sync_tl; /**< Sync timeline this pt is connected to. */ ++#else ++ struct mali_internal_sync_timeline *sync_tl; /**< Sync timeline this pt is connected to. */ ++#endif ++}; + -+#define UMP_BLOCK_SIZE (256UL * 1024UL) /* 256kB, remember to keep the ()s */ ++/** ++ * The sync flag is used to connect sync fences to the Mali Timeline system. Sync fences can be ++ * created from a sync flag, and when the flag is signaled, the sync fences will also be signaled. ++ */ ++struct mali_sync_flag { ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ struct sync_timeline *sync_tl; /**< Sync timeline this flag is connected to. */ ++#else ++ struct mali_internal_sync_timeline *sync_tl; /**< Sync timeline this flag is connected to. */ ++#endif ++ u32 point; /**< Point on timeline. */ ++ int status; /**< 0 if unsignaled, 1 if signaled without error or negative if signaled with error. */ ++ struct kref refcount; /**< Reference count. */ ++}; + ++/** ++ * Mali sync timeline is used to connect mali timeline to sync_timeline. ++ * When fence timeout can print more detailed mali timeline system info. ++ */ ++struct mali_sync_timeline_container { ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ struct sync_timeline sync_timeline; ++#else ++ struct mali_internal_sync_timeline sync_timeline; ++#endif ++ struct mali_timeline *timeline; ++}; + ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++MALI_STATIC_INLINE struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt) ++#else ++MALI_STATIC_INLINE struct mali_sync_pt *to_mali_sync_pt(struct mali_internal_sync_point *pt) ++#endif ++{ ++ return container_of(pt, struct mali_sync_pt, sync_pt); ++} + -+typedef struct block_info { -+ struct block_info *next; -+} block_info; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++MALI_STATIC_INLINE struct mali_sync_timeline_container *to_mali_sync_tl_container(struct sync_timeline *sync_tl) ++#else ++MALI_STATIC_INLINE struct mali_sync_timeline_container *to_mali_sync_tl_container(struct mali_internal_sync_timeline *sync_tl) ++#endif ++{ ++ return container_of(sync_tl, struct mali_sync_timeline_container, sync_timeline); ++} + ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++static int timeline_has_signaled(struct sync_pt *pt) ++#else ++static int timeline_has_signaled(struct mali_internal_sync_point *pt) ++#endif ++{ ++ struct mali_sync_pt *mpt; + ++ MALI_DEBUG_ASSERT_POINTER(pt); ++ mpt = to_mali_sync_pt(pt); + -+typedef struct block_allocator { -+ struct semaphore mutex; -+ block_info *all_blocks; -+ block_info *first_free; -+ u32 base; -+ u32 num_blocks; -+ u32 num_free; -+} block_allocator; ++ MALI_DEBUG_ASSERT_POINTER(mpt->flag); + ++ return mpt->flag->status; ++} + -+static void block_allocator_shutdown(ump_memory_backend *backend); -+static int block_allocator_allocate(void *ctx, ump_dd_mem *mem); -+static void block_allocator_release(void *ctx, ump_dd_mem *handle); -+static inline u32 get_phys(block_allocator *allocator, block_info *block); -+static u32 block_allocator_stat(struct ump_memory_backend *backend); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++static void timeline_free_pt(struct sync_pt *pt) ++#else ++static void timeline_free_pt(struct mali_internal_sync_point *pt) ++#endif ++{ ++ struct mali_sync_pt *mpt; + ++ MALI_DEBUG_ASSERT_POINTER(pt); ++ mpt = to_mali_sync_pt(pt); + ++ mali_sync_flag_put(mpt->flag); ++} + -+/* -+ * Create dedicated memory backend -+ */ -+ump_memory_backend *ump_block_allocator_create(u32 base_address, u32 size) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++static void timeline_release(struct sync_timeline *sync_timeline) ++#else ++static void timeline_release(struct mali_internal_sync_timeline *sync_timeline) ++#endif +{ -+ ump_memory_backend *backend; -+ block_allocator *allocator; -+ u32 usable_size; -+ u32 num_blocks; ++ struct mali_sync_timeline_container *mali_sync_tl = NULL; ++ struct mali_timeline *mali_tl = NULL; + -+ usable_size = (size + UMP_BLOCK_SIZE - 1) & ~(UMP_BLOCK_SIZE - 1); -+ num_blocks = usable_size / UMP_BLOCK_SIZE; ++ MALI_DEBUG_ASSERT_POINTER(sync_timeline); + -+ if (0 == usable_size) { -+ DBG_MSG(1, ("Memory block of size %u is unusable\n", size)); -+ return NULL; -+ } ++ mali_sync_tl = to_mali_sync_tl_container(sync_timeline); ++ MALI_DEBUG_ASSERT_POINTER(mali_sync_tl); + -+ DBG_MSG(5, ("Creating dedicated UMP memory backend. Base address: 0x%08x, size: 0x%08x\n", base_address, size)); -+ DBG_MSG(6, ("%u usable bytes which becomes %u blocks\n", usable_size, num_blocks)); ++ mali_tl = mali_sync_tl->timeline; + -+ backend = kzalloc(sizeof(ump_memory_backend), GFP_KERNEL); -+ if (NULL != backend) { -+ allocator = kmalloc(sizeof(block_allocator), GFP_KERNEL); -+ if (NULL != allocator) { -+ allocator->all_blocks = kmalloc(sizeof(block_info) * num_blocks, GFP_KERNEL); -+ if (NULL != allocator->all_blocks) { -+ int i; ++ /* always signaled timeline didn't have mali container */ ++ if (mali_tl) { ++ if (NULL != mali_tl->spinlock) { ++ mali_spinlock_reentrant_term(mali_tl->spinlock); ++ } ++ _mali_osk_free(mali_tl); ++ } + -+ allocator->first_free = NULL; -+ allocator->num_blocks = num_blocks; -+ allocator->num_free = num_blocks; -+ allocator->base = base_address; -+ sema_init(&allocator->mutex, 1); ++ module_put(THIS_MODULE); ++} + -+ for (i = 0; i < num_blocks; i++) { -+ allocator->all_blocks[i].next = allocator->first_free; -+ allocator->first_free = &allocator->all_blocks[i]; -+ } ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++static struct sync_pt *timeline_dup(struct sync_pt *pt) ++{ ++ struct mali_sync_pt *mpt, *new_mpt; ++ struct sync_pt *new_pt; ++ MALI_DEBUG_ASSERT_POINTER(pt); + -+ backend->ctx = allocator; -+ backend->allocate = block_allocator_allocate; -+ backend->release = block_allocator_release; -+ backend->shutdown = block_allocator_shutdown; -+ backend->stat = block_allocator_stat; -+ backend->pre_allocate_physical_check = NULL; -+ backend->adjust_to_mali_phys = NULL; ++ mpt = to_mali_sync_pt(pt); + -+ return backend; -+ } -+ kfree(allocator); -+ } -+ kfree(backend); -+ } ++ new_pt = sync_pt_create(mpt->sync_tl, sizeof(struct mali_sync_pt)); ++ if (NULL == new_pt) return NULL; + -+ return NULL; -+} ++ new_mpt = to_mali_sync_pt(new_pt); + ++ mali_sync_flag_get(mpt->flag); ++ new_mpt->flag = mpt->flag; ++ new_mpt->sync_tl = mpt->sync_tl; + ++ return new_pt; ++} + -+/* -+ * Destroy specified dedicated memory backend -+ */ -+static void block_allocator_shutdown(ump_memory_backend *backend) ++static int timeline_compare(struct sync_pt *pta, struct sync_pt *ptb) +{ -+ block_allocator *allocator; ++ struct mali_sync_pt *mpta; ++ struct mali_sync_pt *mptb; ++ u32 a, b; + -+ BUG_ON(!backend); -+ BUG_ON(!backend->ctx); ++ MALI_DEBUG_ASSERT_POINTER(pta); ++ MALI_DEBUG_ASSERT_POINTER(ptb); ++ mpta = to_mali_sync_pt(pta); ++ mptb = to_mali_sync_pt(ptb); + -+ allocator = (block_allocator *)backend->ctx; ++ MALI_DEBUG_ASSERT_POINTER(mpta->flag); ++ MALI_DEBUG_ASSERT_POINTER(mptb->flag); + -+ DBG_MSG_IF(1, allocator->num_free != allocator->num_blocks, ("%u blocks still in use during shutdown\n", allocator->num_blocks - allocator->num_free)); ++ a = mpta->flag->point; ++ b = mptb->flag->point; + -+ kfree(allocator->all_blocks); -+ kfree(allocator); -+ kfree(backend); ++ if (a == b) return 0; ++ ++ return ((b - a) < (a - b) ? -1 : 1); +} ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) ++static void timeline_print_pt(struct seq_file *s, struct sync_pt *sync_pt) ++{ ++ struct mali_sync_pt *mpt; + ++ MALI_DEBUG_ASSERT_POINTER(s); ++ MALI_DEBUG_ASSERT_POINTER(sync_pt); + ++ mpt = to_mali_sync_pt(sync_pt); + -+static int block_allocator_allocate(void *ctx, ump_dd_mem *mem) ++ /* It is possible this sync point is just under construct, ++ * make sure the flag is valid before accessing it ++ */ ++ if (mpt->flag) { ++ seq_printf(s, "%u", mpt->flag->point); ++ } else { ++ seq_printf(s, "uninitialized"); ++ } ++} ++ ++static void timeline_print_obj(struct seq_file *s, struct sync_timeline *sync_tl) +{ -+ block_allocator *allocator; -+ u32 left; -+ block_info *last_allocated = NULL; -+ int i = 0; ++ struct mali_sync_timeline_container *mali_sync_tl = NULL; ++ struct mali_timeline *mali_tl = NULL; + -+ BUG_ON(!ctx); -+ BUG_ON(!mem); ++ MALI_DEBUG_ASSERT_POINTER(sync_tl); + -+ allocator = (block_allocator *)ctx; -+ left = mem->size_bytes; ++ mali_sync_tl = to_mali_sync_tl_container(sync_tl); ++ MALI_DEBUG_ASSERT_POINTER(mali_sync_tl); + -+ BUG_ON(!left); -+ BUG_ON(!&allocator->mutex); ++ mali_tl = mali_sync_tl->timeline; + -+ mem->nr_blocks = ((left + UMP_BLOCK_SIZE - 1) & ~(UMP_BLOCK_SIZE - 1)) / UMP_BLOCK_SIZE; -+ mem->block_array = (ump_dd_physical_block *)vmalloc(sizeof(ump_dd_physical_block) * mem->nr_blocks); -+ if (NULL == mem->block_array) { -+ MSG_ERR(("Failed to allocate block array\n")); -+ return 0; -+ } ++ if (NULL != mali_tl) { ++ seq_printf(s, "oldest (%u) ", mali_tl->point_oldest); ++ seq_printf(s, "next (%u)", mali_tl->point_next); ++ seq_printf(s, "\n"); + -+ if (down_interruptible(&allocator->mutex)) { -+ MSG_ERR(("Could not get mutex to do block_allocate\n")); -+ return 0; ++#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS) ++ { ++ u32 tid = _mali_osk_get_tid(); ++ struct mali_timeline_system *system = mali_tl->system; ++ ++ mali_spinlock_reentrant_wait(mali_tl->spinlock, tid); ++ if (!mali_tl->destroyed) { ++ mali_spinlock_reentrant_wait(system->spinlock, tid); ++ mali_timeline_debug_print_timeline(mali_tl, s); ++ mali_spinlock_reentrant_signal(system->spinlock, tid); ++ } ++ mali_spinlock_reentrant_signal(mali_tl->spinlock, tid); ++ ++ /* dump job queue status and group running status */ ++ mali_executor_status_dump(); ++ } ++#endif + } ++} ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++static void timeline_pt_value_str(struct sync_pt *pt, char *str, int size) ++{ ++ struct mali_sync_pt *mpt; + -+ mem->size_bytes = 0; ++ MALI_DEBUG_ASSERT_POINTER(str); ++ MALI_DEBUG_ASSERT_POINTER(pt); + -+ while ((left > 0) && (allocator->first_free)) { -+ block_info *block; ++ mpt = to_mali_sync_pt(pt); + -+ block = allocator->first_free; -+ allocator->first_free = allocator->first_free->next; -+ block->next = last_allocated; -+ last_allocated = block; -+ allocator->num_free--; ++ /* It is possible this sync point is just under construct, ++ * make sure the flag is valid before accessing it ++ */ ++ if (mpt->flag) { ++ _mali_osk_snprintf(str, size, "%u", mpt->flag->point); ++ } else { ++ _mali_osk_snprintf(str, size, "uninitialized"); ++ } ++} + -+ mem->block_array[i].addr = get_phys(allocator, block); -+ mem->block_array[i].size = UMP_BLOCK_SIZE; -+ mem->size_bytes += UMP_BLOCK_SIZE; ++static void timeline_value_str(struct sync_timeline *timeline, char *str, int size) ++{ ++ struct mali_sync_timeline_container *mali_sync_tl = NULL; ++ struct mali_timeline *mali_tl = NULL; + -+ i++; ++ MALI_DEBUG_ASSERT_POINTER(timeline); + -+ if (left < UMP_BLOCK_SIZE) left = 0; -+ else left -= UMP_BLOCK_SIZE; -+ } ++ mali_sync_tl = to_mali_sync_tl_container(timeline); ++ MALI_DEBUG_ASSERT_POINTER(mali_sync_tl); + -+ if (left) { -+ block_info *block; -+ /* release all memory back to the pool */ -+ while (last_allocated) { -+ block = last_allocated->next; -+ last_allocated->next = allocator->first_free; -+ allocator->first_free = last_allocated; -+ last_allocated = block; -+ allocator->num_free++; -+ } ++ mali_tl = mali_sync_tl->timeline; + -+ vfree(mem->block_array); -+ mem->backend_info = NULL; -+ mem->block_array = NULL; ++ if (NULL != mali_tl) { ++ _mali_osk_snprintf(str, size, "oldest (%u) ", mali_tl->point_oldest); ++ _mali_osk_snprintf(str, size, "next (%u)", mali_tl->point_next); ++ _mali_osk_snprintf(str, size, "\n"); + -+ DBG_MSG(4, ("Could not find a mem-block for the allocation.\n")); -+ up(&allocator->mutex); ++#if defined(MALI_TIMELINE_DEBUG_FUNCTIONS) ++ { ++ u32 tid = _mali_osk_get_tid(); ++ struct mali_timeline_system *system = mali_tl->system; + -+ return 0; ++ mali_spinlock_reentrant_wait(mali_tl->spinlock, tid); ++ if (!mali_tl->destroyed) { ++ mali_spinlock_reentrant_wait(system->spinlock, tid); ++ mali_timeline_debug_direct_print_timeline(mali_tl); ++ mali_spinlock_reentrant_signal(system->spinlock, tid); ++ } ++ mali_spinlock_reentrant_signal(mali_tl->spinlock, tid); ++ ++ /* dump job queue status and group running status */ ++ mali_executor_status_dump(); ++ } ++#endif + } ++} ++#else ++static void timeline_print_sync_pt(struct mali_internal_sync_point *sync_pt) ++{ ++ struct mali_sync_pt *mpt; + -+ mem->backend_info = last_allocated; ++ MALI_DEBUG_ASSERT_POINTER(sync_pt); + -+ up(&allocator->mutex); -+ mem->is_cached = 0; ++ mpt = to_mali_sync_pt(sync_pt); + -+ return 1; ++ if (mpt->flag) { ++ MALI_DEBUG_PRINT(2, ("mali_internal_sync_pt: %u\n", mpt->flag->point)); ++ } else { ++ MALI_DEBUG_PRINT(2, ("uninitialized\n", mpt->flag->point)); ++ } +} ++#endif + ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++static struct sync_timeline_ops mali_timeline_ops = { ++ .driver_name = "Mali", ++ .dup = timeline_dup, ++ .has_signaled = timeline_has_signaled, ++ .compare = timeline_compare, ++ .free_pt = timeline_free_pt, ++ .release_obj = timeline_release, ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) ++ .print_pt = timeline_print_pt, ++ .print_obj = timeline_print_obj, ++#else ++ .pt_value_str = timeline_pt_value_str, ++ .timeline_value_str = timeline_value_str, ++#endif ++}; + -+ -+static void block_allocator_release(void *ctx, ump_dd_mem *handle) ++struct sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name) +{ -+ block_allocator *allocator; -+ block_info *block, * next; ++ struct sync_timeline *sync_tl; ++ struct mali_sync_timeline_container *mali_sync_tl; + -+ BUG_ON(!ctx); -+ BUG_ON(!handle); ++ sync_tl = sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline_container), name); ++ if (NULL == sync_tl) return NULL; + -+ allocator = (block_allocator *)ctx; -+ block = (block_info *)handle->backend_info; -+ BUG_ON(!block); ++ mali_sync_tl = to_mali_sync_tl_container(sync_tl); ++ mali_sync_tl->timeline = timeline; + -+ if (down_interruptible(&allocator->mutex)) { -+ MSG_ERR(("Allocator release: Failed to get mutex - memory leak\n")); -+ return; -+ } ++ /* Grab a reference on the module to ensure the callbacks are present ++ * as long some timeline exists. The reference is released when the ++ * timeline is freed. ++ * Since this function is called from a ioctl on an open file we know ++ * we already have a reference, so using __module_get is safe. */ ++ __module_get(THIS_MODULE); + -+ while (block) { -+ next = block->next; ++ return sync_tl; ++} + -+ BUG_ON((block < allocator->all_blocks) || (block > (allocator->all_blocks + allocator->num_blocks))); ++s32 mali_sync_fence_fd_alloc(struct sync_fence *sync_fence) ++{ ++ s32 fd = -1; + -+ block->next = allocator->first_free; -+ allocator->first_free = block; -+ allocator->num_free++; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) ++ fd = get_unused_fd(); ++#else ++ fd = get_unused_fd_flags(0); ++#endif + -+ block = next; ++ if (fd < 0) { ++ sync_fence_put(sync_fence); ++ return -1; + } -+ DBG_MSG(3, ("%d blocks free after release call\n", allocator->num_free)); -+ up(&allocator->mutex); ++ sync_fence_install(sync_fence, fd); + -+ vfree(handle->block_array); -+ handle->block_array = NULL; ++ return fd; +} + ++struct sync_fence *mali_sync_fence_merge(struct sync_fence *sync_fence1, struct sync_fence *sync_fence2) ++{ ++ struct sync_fence *sync_fence; + ++ MALI_DEBUG_ASSERT_POINTER(sync_fence1); ++ MALI_DEBUG_ASSERT_POINTER(sync_fence1); + -+/* -+ * Helper function for calculating the physical base adderss of a memory block -+ */ -+static inline u32 get_phys(block_allocator *allocator, block_info *block) -+{ -+ return allocator->base + ((block - allocator->all_blocks) * UMP_BLOCK_SIZE); ++ sync_fence = sync_fence_merge("mali_merge_fence", sync_fence1, sync_fence2); ++ sync_fence_put(sync_fence1); ++ sync_fence_put(sync_fence2); ++ ++ return sync_fence; +} + -+static u32 block_allocator_stat(struct ump_memory_backend *backend) ++struct sync_fence *mali_sync_timeline_create_signaled_fence(struct sync_timeline *sync_tl) +{ -+ block_allocator *allocator; -+ BUG_ON(!backend); -+ allocator = (block_allocator *)backend->ctx; -+ BUG_ON(!allocator); ++ struct mali_sync_flag *flag; ++ struct sync_fence *sync_fence; + -+ return (allocator->num_blocks - allocator->num_free) * UMP_BLOCK_SIZE; -+} -diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_dedicated.h b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_dedicated.h -new file mode 100644 -index 000000000..949fd245c ---- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_dedicated.h -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2010, 2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+/** -+ * @file ump_kernel_memory_backend_dedicated.h -+ */ ++ MALI_DEBUG_ASSERT_POINTER(sync_tl); + -+#ifndef __UMP_KERNEL_MEMORY_BACKEND_DEDICATED_H__ -+#define __UMP_KERNEL_MEMORY_BACKEND_DEDICATED_H__ ++ flag = mali_sync_flag_create(sync_tl, 0); ++ if (NULL == flag) return NULL; + -+#include "ump_kernel_memory_backend.h" ++ sync_fence = mali_sync_flag_create_fence(flag); + -+ump_memory_backend *ump_block_allocator_create(u32 base_address, u32 size); ++ mali_sync_flag_signal(flag, 0); ++ mali_sync_flag_put(flag); + -+#endif /* __UMP_KERNEL_MEMORY_BACKEND_DEDICATED_H__ */ ++ return sync_fence; ++} + -diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_os.c b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_os.c -new file mode 100644 -index 000000000..7cd8d5d38 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_os.c -@@ -0,0 +1,235 @@ -+/* -+ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++struct mali_sync_flag *mali_sync_flag_create(struct sync_timeline *sync_tl, mali_timeline_point point) ++{ ++ struct mali_sync_flag *flag; + -+/* needed to detect kernel version specific code */ -+#include ++ if (NULL == sync_tl) return NULL; + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) -+#include -+#else /* pre 2.6.26 the file was in the arch specific location */ -+#include -+#endif ++ flag = _mali_osk_calloc(1, sizeof(*flag)); ++ if (NULL == flag) return NULL; + -+#include -+#include -+#include -+#include -+#include -+#include -+#include "ump_kernel_common.h" -+#include "ump_kernel_memory_backend.h" ++ flag->sync_tl = sync_tl; ++ flag->point = point; + ++ flag->status = 0; ++ kref_init(&flag->refcount); + ++ return flag; ++} + -+typedef struct os_allocator { -+ struct semaphore mutex; -+ u32 num_pages_max; /**< Maximum number of pages to allocate from the OS */ -+ u32 num_pages_allocated; /**< Number of pages allocated from the OS */ -+} os_allocator; ++/** ++ * Create a sync point attached to given sync flag. ++ * ++ * @note Sync points must be triggered in *exactly* the same order as they are created. ++ * ++ * @param flag Sync flag. ++ * @return New sync point if successful, NULL if not. ++ */ ++static struct sync_pt *mali_sync_flag_create_pt(struct mali_sync_flag *flag) ++{ ++ struct sync_pt *pt; ++ struct mali_sync_pt *mpt; + ++ MALI_DEBUG_ASSERT_POINTER(flag); ++ MALI_DEBUG_ASSERT_POINTER(flag->sync_tl); + ++ pt = sync_pt_create(flag->sync_tl, sizeof(struct mali_sync_pt)); ++ if (NULL == pt) return NULL; + -+static void os_free(void *ctx, ump_dd_mem *descriptor); -+static int os_allocate(void *ctx, ump_dd_mem *descriptor); -+static void os_memory_backend_destroy(ump_memory_backend *backend); -+static u32 os_stat(struct ump_memory_backend *backend); ++ mali_sync_flag_get(flag); + ++ mpt = to_mali_sync_pt(pt); ++ mpt->flag = flag; ++ mpt->sync_tl = flag->sync_tl; + ++ return pt; ++} + -+/* -+ * Create OS memory backend -+ */ -+ump_memory_backend *ump_os_memory_backend_create(const int max_allocation) ++struct sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag) +{ -+ ump_memory_backend *backend; -+ os_allocator *info; -+ -+ info = kmalloc(sizeof(os_allocator), GFP_KERNEL); -+ if (NULL == info) { -+ return NULL; -+ } ++ struct sync_pt *sync_pt; ++ struct sync_fence *sync_fence; + -+ info->num_pages_max = max_allocation >> PAGE_SHIFT; -+ info->num_pages_allocated = 0; ++ MALI_DEBUG_ASSERT_POINTER(flag); ++ MALI_DEBUG_ASSERT_POINTER(flag->sync_tl); + -+ sema_init(&info->mutex, 1); ++ sync_pt = mali_sync_flag_create_pt(flag); ++ if (NULL == sync_pt) return NULL; + -+ backend = kmalloc(sizeof(ump_memory_backend), GFP_KERNEL); -+ if (NULL == backend) { -+ kfree(info); ++ sync_fence = sync_fence_create("mali_flag_fence", sync_pt); ++ if (NULL == sync_fence) { ++ sync_pt_free(sync_pt); + return NULL; + } + -+ backend->ctx = info; -+ backend->allocate = os_allocate; -+ backend->release = os_free; -+ backend->shutdown = os_memory_backend_destroy; -+ backend->stat = os_stat; -+ backend->pre_allocate_physical_check = NULL; -+ backend->adjust_to_mali_phys = NULL; -+ -+ return backend; ++ return sync_fence; +} ++#else ++static struct mali_internal_sync_timeline_ops mali_timeline_ops = { ++ .driver_name = "Mali", ++ .has_signaled = timeline_has_signaled, ++ .free_pt = timeline_free_pt, ++ .release_obj = timeline_release, ++ .print_sync_pt = timeline_print_sync_pt, ++}; + -+ -+ -+/* -+ * Destroy specified OS memory backend -+ */ -+static void os_memory_backend_destroy(ump_memory_backend *backend) ++struct mali_internal_sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name) +{ -+ os_allocator *info = (os_allocator *)backend->ctx; ++ struct mali_internal_sync_timeline *sync_tl; ++ struct mali_sync_timeline_container *mali_sync_tl; + -+ DBG_MSG_IF(1, 0 != info->num_pages_allocated, ("%d pages still in use during shutdown\n", info->num_pages_allocated)); ++ sync_tl = mali_internal_sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline_container), name); ++ if (NULL == sync_tl) return NULL; + -+ kfree(info); -+ kfree(backend); -+} ++ mali_sync_tl = to_mali_sync_tl_container(sync_tl); ++ mali_sync_tl->timeline = timeline; + ++ /* Grab a reference on the module to ensure the callbacks are present ++ * as long some timeline exists. The reference is released when the ++ * timeline is freed. ++ * Since this function is called from a ioctl on an open file we know ++ * we already have a reference, so using __module_get is safe. */ ++ __module_get(THIS_MODULE); + ++ return sync_tl; ++} + -+/* -+ * Allocate UMP memory -+ */ -+static int os_allocate(void *ctx, ump_dd_mem *descriptor) ++s32 mali_sync_fence_fd_alloc(struct mali_internal_sync_fence *sync_fence) +{ -+ u32 left; -+ os_allocator *info; -+ int pages_allocated = 0; -+ int is_cached; -+ -+ BUG_ON(!descriptor); -+ BUG_ON(!ctx); -+ -+ info = (os_allocator *)ctx; -+ left = descriptor->size_bytes; -+ is_cached = descriptor->is_cached; -+ -+ if (down_interruptible(&info->mutex)) { -+ DBG_MSG(1, ("Failed to get mutex in os_free\n")); -+ return 0; /* failure */ -+ } -+ -+ descriptor->backend_info = NULL; -+ descriptor->nr_blocks = ((left + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)) >> PAGE_SHIFT; -+ -+ DBG_MSG(5, ("Allocating page array. Size: %lu\n", descriptor->nr_blocks * sizeof(ump_dd_physical_block))); -+ -+ descriptor->block_array = (ump_dd_physical_block *)vmalloc(sizeof(ump_dd_physical_block) * descriptor->nr_blocks); -+ if (NULL == descriptor->block_array) { -+ up(&info->mutex); -+ DBG_MSG(1, ("Block array could not be allocated\n")); -+ return 0; /* failure */ -+ } -+ -+ while (left > 0 && ((info->num_pages_allocated + pages_allocated) < info->num_pages_max)) { -+ struct page *new_page; -+ -+ if (is_cached) { -+ new_page = alloc_page(GFP_HIGHUSER | __GFP_ZERO | __GFP_REPEAT | __GFP_NOWARN); -+ } else { -+ new_page = alloc_page(GFP_HIGHUSER | __GFP_ZERO | __GFP_REPEAT | __GFP_NOWARN | __GFP_COLD); -+ } -+ if (NULL == new_page) { -+ break; -+ } -+ -+ /* Ensure page caches are flushed. */ -+ if (is_cached) { -+ descriptor->block_array[pages_allocated].addr = page_to_phys(new_page); -+ descriptor->block_array[pages_allocated].size = PAGE_SIZE; -+ } else { -+ descriptor->block_array[pages_allocated].addr = dma_map_page(NULL, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); -+ descriptor->block_array[pages_allocated].size = PAGE_SIZE; -+ } -+ -+ DBG_MSG(5, ("Allocated page 0x%08lx cached: %d\n", descriptor->block_array[pages_allocated].addr, is_cached)); -+ -+ if (left < PAGE_SIZE) { -+ left = 0; -+ } else { -+ left -= PAGE_SIZE; -+ } -+ -+ pages_allocated++; -+ } -+ -+ DBG_MSG(5, ("Alloce for ID:%2d got %d pages, cached: %d\n", descriptor->secure_id, pages_allocated)); -+ -+ if (left) { -+ DBG_MSG(1, ("Failed to allocate needed pages\n")); -+ -+ while (pages_allocated) { -+ pages_allocated--; -+ if (!is_cached) { -+ dma_unmap_page(NULL, descriptor->block_array[pages_allocated].addr, PAGE_SIZE, DMA_BIDIRECTIONAL); -+ } -+ __free_page(pfn_to_page(descriptor->block_array[pages_allocated].addr >> PAGE_SHIFT)); -+ } ++ s32 fd = -1; + -+ up(&info->mutex); ++ fd = get_unused_fd_flags(0); + -+ return 0; /* failure */ ++ if (fd < 0) { ++ fput(sync_fence->file); ++ return -1; + } -+ -+ info->num_pages_allocated += pages_allocated; -+ -+ DBG_MSG(6, ("%d out of %d pages now allocated\n", info->num_pages_allocated, info->num_pages_max)); -+ -+ up(&info->mutex); -+ -+ return 1; /* success*/ ++ fd_install(fd, sync_fence->file); ++ return fd; +} + -+ -+/* -+ * Free specified UMP memory -+ */ -+static void os_free(void *ctx, ump_dd_mem *descriptor) ++struct mali_internal_sync_fence *mali_sync_fence_merge(struct mali_internal_sync_fence *sync_fence1, struct mali_internal_sync_fence *sync_fence2) +{ -+ os_allocator *info; -+ int i; ++ struct mali_internal_sync_fence *sync_fence; + -+ BUG_ON(!ctx); -+ BUG_ON(!descriptor); ++ MALI_DEBUG_ASSERT_POINTER(sync_fence1); ++ MALI_DEBUG_ASSERT_POINTER(sync_fence1); + -+ info = (os_allocator *)ctx; ++ sync_fence = mali_internal_sync_fence_merge(sync_fence1, sync_fence2); ++ fput(sync_fence1->file); ++ fput(sync_fence2->file); + -+ BUG_ON(descriptor->nr_blocks > info->num_pages_allocated); ++ return sync_fence; ++} + -+ if (down_interruptible(&info->mutex)) { -+ DBG_MSG(1, ("Failed to get mutex in os_free\n")); -+ return; -+ } ++struct mali_internal_sync_fence *mali_sync_timeline_create_signaled_fence(struct mali_internal_sync_timeline *sync_tl) ++{ ++ struct mali_sync_flag *flag; ++ struct mali_internal_sync_fence *sync_fence; + -+ DBG_MSG(5, ("Releasing %lu OS pages\n", descriptor->nr_blocks)); ++ MALI_DEBUG_ASSERT_POINTER(sync_tl); + -+ info->num_pages_allocated -= descriptor->nr_blocks; ++ flag = mali_sync_flag_create(sync_tl, 0); ++ if (NULL == flag) return NULL; + -+ up(&info->mutex); ++ sync_fence = mali_sync_flag_create_fence(flag); + -+ for (i = 0; i < descriptor->nr_blocks; i++) { -+ DBG_MSG(6, ("Freeing physical page. Address: 0x%08lx\n", descriptor->block_array[i].addr)); -+ if (! descriptor->is_cached) { -+ dma_unmap_page(NULL, descriptor->block_array[i].addr, PAGE_SIZE, DMA_BIDIRECTIONAL); -+ } -+ __free_page(pfn_to_page(descriptor->block_array[i].addr >> PAGE_SHIFT)); -+ } ++ mali_sync_flag_signal(flag, 0); ++ mali_sync_flag_put(flag); + -+ vfree(descriptor->block_array); ++ return sync_fence; +} + -+ -+static u32 os_stat(struct ump_memory_backend *backend) ++struct mali_sync_flag *mali_sync_flag_create(struct mali_internal_sync_timeline *sync_tl, mali_timeline_point point) +{ -+ os_allocator *info; -+ info = (os_allocator *)backend->ctx; -+ return info->num_pages_allocated * _MALI_OSK_MALI_PAGE_SIZE; -+} -diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_os.h b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_os.h -new file mode 100644 -index 000000000..d21d50351 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_os.h -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2010, 2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ ++ struct mali_sync_flag *flag; + -+/** -+ * @file ump_kernel_memory_backend_os.h -+ */ ++ if (NULL == sync_tl) return NULL; + -+#ifndef __UMP_KERNEL_MEMORY_BACKEND_OS_H__ -+#define __UMP_KERNEL_MEMORY_BACKEND_OS_H__ ++ flag = _mali_osk_calloc(1, sizeof(*flag)); ++ if (NULL == flag) return NULL; + -+#include "ump_kernel_memory_backend.h" ++ flag->sync_tl = sync_tl; ++ flag->point = point; + -+ump_memory_backend *ump_os_memory_backend_create(const int max_allocation); ++ flag->status = 0; ++ kref_init(&flag->refcount); + -+#endif /* __UMP_KERNEL_MEMORY_BACKEND_OS_H__ */ ++ return flag; ++} + -diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_kernel_random_mapping.c b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_random_mapping.c -new file mode 100644 -index 000000000..6be0f8644 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_random_mapping.c -@@ -0,0 +1,222 @@ -+/* -+ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++/** ++ * Create a sync point attached to given sync flag. ++ * ++ * @note Sync points must be triggered in *exactly* the same order as they are created. ++ * ++ * @param flag Sync flag. ++ * @return New sync point if successful, NULL if not. + */ -+ -+#include "mali_kernel_common.h" -+#include "mali_osk.h" -+#include "ump_osk.h" -+#include "ump_kernel_common.h" -+#include "ump_kernel_types.h" -+#include "ump_kernel_random_mapping.h" -+ -+#include -+#include -+#include -+#include -+ -+ -+static ump_dd_mem *search(struct rb_root *root, int id) ++static struct mali_internal_sync_point *mali_sync_flag_create_pt(struct mali_sync_flag *flag) +{ -+ struct rb_node *node = root->rb_node; -+ -+ while (node) { -+ ump_dd_mem *e = container_of(node, ump_dd_mem, node); -+ -+ if (id < e->secure_id) { -+ node = node->rb_left; -+ } else if (id > e->secure_id) { -+ node = node->rb_right; -+ } else { -+ return e; -+ } -+ } -+ -+ return NULL; -+} ++ struct mali_internal_sync_point *pt; ++ struct mali_sync_pt *mpt; + -+static mali_bool insert(struct rb_root *root, int id, ump_dd_mem *mem) -+{ -+ struct rb_node **new = &(root->rb_node); -+ struct rb_node *parent = NULL; ++ MALI_DEBUG_ASSERT_POINTER(flag); ++ MALI_DEBUG_ASSERT_POINTER(flag->sync_tl); + -+ while (*new) { -+ ump_dd_mem *this = container_of(*new, ump_dd_mem, node); ++ pt = mali_internal_sync_point_create(flag->sync_tl, sizeof(struct mali_sync_pt)); + -+ parent = *new; -+ if (id < this->secure_id) { -+ new = &((*new)->rb_left); -+ } else if (id > this->secure_id) { -+ new = &((*new)->rb_right); -+ } else { -+ printk(KERN_ERR "UMP: ID already used %x\n", id); -+ return MALI_FALSE; -+ } ++ if (pt == NULL) { ++ MALI_PRINT_ERROR(("Mali sync: sync_pt creation failed\n")); ++ return NULL; + } ++ mali_sync_flag_get(flag); + -+ rb_link_node(&mem->node, parent, new); -+ rb_insert_color(&mem->node, root); ++ mpt = to_mali_sync_pt(pt); ++ mpt->flag = flag; ++ mpt->sync_tl = flag->sync_tl; + -+ return MALI_TRUE; ++ return pt; +} + -+ -+ump_random_mapping *ump_random_mapping_create(void) ++struct mali_internal_sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag) +{ -+ ump_random_mapping *map = _mali_osk_calloc(1, sizeof(ump_random_mapping)); ++ struct mali_internal_sync_point *sync_pt; ++ struct mali_internal_sync_fence *sync_fence; + -+ if (NULL == map) -+ return NULL; ++ MALI_DEBUG_ASSERT_POINTER(flag); ++ MALI_DEBUG_ASSERT_POINTER(flag->sync_tl); + -+ map->lock = _mali_osk_mutex_rw_init(_MALI_OSK_LOCKFLAG_ORDERED, -+ _MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP); -+ if (NULL != map->lock) { -+ map->root = RB_ROOT; -+#if UMP_RANDOM_MAP_DELAY -+ map->failed.count = 0; -+ map->failed.timestamp = jiffies; ++ sync_pt = mali_sync_flag_create_pt(flag); ++ if (NULL == sync_pt) { ++ MALI_PRINT_ERROR(("Mali sync: sync_pt creation failed\n")); ++ return NULL; ++ } ++ sync_fence = (struct mali_internal_sync_fence *)sync_file_create(&sync_pt->base); ++ if (NULL == sync_fence) { ++ MALI_PRINT_ERROR(("Mali sync: sync_fence creation failed\n")); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++ dma_fence_put(&sync_pt->base); ++#else ++ fence_put(&sync_pt->base); +#endif -+ return map; ++ return NULL; + } -+ return NULL; -+} + -+void ump_random_mapping_destroy(ump_random_mapping *map) -+{ -+ _mali_osk_mutex_rw_term(map->lock); -+ _mali_osk_free(map); ++ /* 'sync_pt' no longer needs to hold a refcount of '*sync_pt', to put it off. */ ++ dma_fence_put(&sync_pt->base); ++ sync_pt = NULL; ++ ++ return sync_fence; +} ++#endif + -+int ump_random_mapping_insert(ump_random_mapping *map, ump_dd_mem *mem) ++void mali_sync_flag_get(struct mali_sync_flag *flag) +{ -+ _mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RW); -+ -+ while (1) { -+ u32 id; -+ -+ get_random_bytes(&id, sizeof(id)); -+ -+ /* Try a new random number if id happened to be the invalid -+ * secure ID (-1). */ -+ if (unlikely(id == UMP_INVALID_SECURE_ID)) -+ continue; -+ -+ /* Insert into the tree. If the id was already in use, get a -+ * new random id and try again. */ -+ if (insert(&map->root, id, mem)) { -+ mem->secure_id = id; -+ break; -+ } -+ } -+ _mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RW); -+ -+ return 0; ++ MALI_DEBUG_ASSERT_POINTER(flag); ++ kref_get(&flag->refcount); +} + -+ump_dd_mem *ump_random_mapping_get(ump_random_mapping *map, int id) ++/** ++ * Free sync flag. ++ * ++ * @param ref kref object embedded in sync flag that should be freed. ++ */ ++static void mali_sync_flag_free(struct kref *ref) +{ -+ ump_dd_mem *mem = NULL; -+#if UMP_RANDOM_MAP_DELAY -+ int do_delay = 0; -+#endif -+ -+ DEBUG_ASSERT(map); -+ -+ _mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RO); -+ mem = search(&map->root, id); -+ -+ if (unlikely(NULL == mem)) { -+#if UMP_RANDOM_MAP_DELAY -+ map->failed.count++; -+ -+ if (time_is_before_jiffies(map->failed.timestamp + -+ UMP_FAILED_LOOKUP_DELAY * HZ)) { -+ /* If it is a long time since last failure, reset -+ * the counter and skip the delay this time. */ -+ map->failed.count = 0; -+ } else if (map->failed.count > UMP_FAILED_LOOKUPS_ALLOWED) { -+ do_delay = 1; -+ } -+ -+ map->failed.timestamp = jiffies; -+#endif /* UMP_RANDOM_MAP_DELAY */ -+ } else { -+ ump_dd_reference_add(mem); -+ } -+ _mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RO); ++ struct mali_sync_flag *flag; + -+#if UMP_RANDOM_MAP_DELAY -+ if (do_delay) { -+ /* Apply delay */ -+ schedule_timeout_killable(UMP_FAILED_LOOKUP_DELAY); -+ } -+#endif /* UMP_RANDOM_MAP_DELAY */ ++ MALI_DEBUG_ASSERT_POINTER(ref); ++ flag = container_of(ref, struct mali_sync_flag, refcount); + -+ return mem; ++ _mali_osk_free(flag); +} + -+static ump_dd_mem *ump_random_mapping_remove_internal(ump_random_mapping *map, int id) ++void mali_sync_flag_put(struct mali_sync_flag *flag) +{ -+ ump_dd_mem *mem = NULL; -+ -+ mem = search(&map->root, id); -+ -+ if (mem) { -+ rb_erase(&mem->node, &map->root); -+ } -+ -+ return mem; ++ MALI_DEBUG_ASSERT_POINTER(flag); ++ kref_put(&flag->refcount, mali_sync_flag_free); +} + -+void ump_random_mapping_put(ump_dd_mem *mem) ++void mali_sync_flag_signal(struct mali_sync_flag *flag, int error) +{ -+ int new_ref; -+ -+ _mali_osk_mutex_rw_wait(device.secure_id_map->lock, _MALI_OSK_LOCKMODE_RW); -+ -+ new_ref = _ump_osk_atomic_dec_and_read(&mem->ref_count); -+ DBG_MSG(5, ("Memory reference decremented. ID: %u, new value: %d\n", -+ mem->secure_id, new_ref)); -+ -+ if (0 == new_ref) { -+ DBG_MSG(3, ("Final release of memory. ID: %u\n", mem->secure_id)); -+ -+#ifdef CONFIG_DMA_SHARED_BUFFER -+ if (mem->import_attach) { -+ struct dma_buf_attachment *attach = mem->import_attach; -+ struct dma_buf *dma_buf; -+ -+ if (mem->sgt) -+ dma_buf_unmap_attachment(attach, mem->sgt, -+ DMA_BIDIRECTIONAL); ++ MALI_DEBUG_ASSERT_POINTER(flag); + -+ dma_buf = attach->dmabuf; -+ dma_buf_detach(attach->dmabuf, attach); -+ dma_buf_put(dma_buf); ++ MALI_DEBUG_ASSERT(0 == flag->status); ++ flag->status = (0 > error) ? error : 1; + -+ } ++ _mali_osk_write_mem_barrier(); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ sync_timeline_signal(flag->sync_tl); ++#else ++ mali_internal_sync_timeline_signal(flag->sync_tl); +#endif -+ ump_random_mapping_remove_internal(device.secure_id_map, mem->secure_id); -+ -+ mem->release_func(mem->ctx, mem); -+ _mali_osk_free(mem); -+ } -+ -+ _mali_osk_mutex_rw_signal(device.secure_id_map->lock, _MALI_OSK_LOCKMODE_RW); +} + -+ump_dd_mem *ump_random_mapping_remove(ump_random_mapping *map, int descriptor) -+{ -+ ump_dd_mem *mem; -+ -+ _mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RW); -+ mem = ump_random_mapping_remove_internal(map, descriptor); -+ _mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RW); + -+ return mem; -+} -diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_kernel_random_mapping.h b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_random_mapping.h +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_sync.h b/drivers/gpu/arm/mali400/mali/linux/mali_sync.h new file mode 100644 -index 000000000..2cea6cedc +index 000000000..91be8b9cf --- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_random_mapping.h -@@ -0,0 +1,84 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_sync.h +@@ -0,0 +1,169 @@ +/* -+ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2012-2015, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -330414,158 +332165,172 @@ index 000000000..2cea6cedc + */ + +/** -+ * @file ump_kernel_random_mapping.h ++ * @file mali_sync.h ++ * ++ * Mali interface for Linux sync objects. + */ + -+#ifndef __UMP_KERNEL_RANDOM_MAPPING_H__ -+#define __UMP_KERNEL_RANDOM_MAPPING_H__ ++#ifndef _MALI_SYNC_H_ ++#define _MALI_SYNC_H_ + -+#include "mali_osk.h" -+#include ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) + -+#define UMP_RANDOM_MAP_DELAY 1 -+#define UMP_FAILED_LOOKUP_DELAY 10 /* ms */ -+#define UMP_FAILED_LOOKUPS_ALLOWED 10 /* number of allowed failed lookups */ ++#include ++#include + -+/** -+ * The random mapping object -+ * Provides a separate namespace where we can map an integer to a pointer -+ */ -+typedef struct ump_random_mapping { -+ _mali_osk_mutex_rw_t *lock; /**< Lock protecting access to the mapping object */ -+ struct rb_root root; -+#if UMP_RANDOM_MAP_DELAY -+ struct { -+ unsigned long count; -+ unsigned long timestamp; -+ } failed; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) ++#include ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++#include ++#else ++#include "mali_internal_sync.h" +#endif -+} ump_random_mapping; + ++ ++#include "mali_osk.h" ++ ++struct mali_sync_flag; ++struct mali_timeline; ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) +/** -+ * Create a random mapping object -+ * Create a random mapping capable of holding 2^20 entries -+ * @return Pointer to a random mapping object, NULL on failure ++ * Create a sync timeline. ++ * ++ * @param name Name of the sync timeline. ++ * @return The new sync timeline if successful, NULL if not. + */ -+ump_random_mapping *ump_random_mapping_create(void); ++struct sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name); + +/** -+ * Destroy a random mapping object -+ * @param map The map to free ++ * Creates a file descriptor representing the sync fence. Will release sync fence if allocation of ++ * file descriptor fails. ++ * ++ * @param sync_fence Sync fence. ++ * @return File descriptor representing sync fence if successful, or -1 if not. + */ -+void ump_random_mapping_destroy(ump_random_mapping *map); ++s32 mali_sync_fence_fd_alloc(struct sync_fence *sync_fence); + +/** -+ * Allocate a new mapping entry (random ID) -+ * Allocates a new entry in the map. -+ * @param map The map to allocate a new entry in -+ * @param target The value to map to -+ * @return The random allocated, a negative value on error ++ * Merges two sync fences. Both input sync fences will be released. ++ * ++ * @param sync_fence1 First sync fence. ++ * @param sync_fence2 Second sync fence. ++ * @return New sync fence that is the result of the merger if successful, or NULL if not. + */ -+int ump_random_mapping_insert(ump_random_mapping *map, ump_dd_mem *mem); ++struct sync_fence *mali_sync_fence_merge(struct sync_fence *sync_fence1, struct sync_fence *sync_fence2); + +/** -+ * Get the value mapped to by a random ID -+ * -+ * If the lookup fails, punish the calling thread by applying a delay. ++ * Create a sync fence that is already signaled. + * -+ * @param map The map to lookup the random id in -+ * @param id The ID to lookup -+ * @param target Pointer to a pointer which will receive the stored value -+ * @return ump_dd_mem pointer on successful lookup, NULL on error ++ * @param tl Sync timeline. ++ * @return New signaled sync fence if successful, NULL if not. + */ -+ump_dd_mem *ump_random_mapping_get(ump_random_mapping *map, int id); ++struct sync_fence *mali_sync_timeline_create_signaled_fence(struct sync_timeline *sync_tl); + -+void ump_random_mapping_put(ump_dd_mem *mem); + +/** -+ * Free the random ID -+ * For the random to be reused it has to be freed -+ * @param map The map to free the random from -+ * @param id The ID to free ++ * Create a sync flag. ++ * ++ * @param sync_tl Sync timeline. ++ * @param point Point on Mali timeline. ++ * @return New sync flag if successful, NULL if not. + */ -+ump_dd_mem *ump_random_mapping_remove(ump_random_mapping *map, int id); ++struct mali_sync_flag *mali_sync_flag_create(struct sync_timeline *sync_tl, u32 point); + -+#endif /* __UMP_KERNEL_RANDOM_MAPPING_H__ */ -diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_memory_backend.c b/drivers/gpu/arm/mali400/ump/linux/ump_memory_backend.c -new file mode 100644 -index 000000000..e41931e1e ---- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/linux/ump_memory_backend.c -@@ -0,0 +1,65 @@ -+/* -+ * Copyright (C) 2010, 2013-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++/** ++ * Create a sync fence attached to given sync flag. ++ * ++ * @param flag Sync flag. ++ * @return New sync fence if successful, NULL if not. ++ */ ++struct sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag); ++#else ++/** ++ * Create a sync timeline. ++ * ++ * @param name Name of the sync timeline. ++ * @return The new sync timeline if successful, NULL if not. + */ ++struct mali_internal_sync_timeline *mali_sync_timeline_create(struct mali_timeline *timeline, const char *name); + -+#include /* kernel module definitions */ -+#include /* request_mem_region */ ++/** ++ * Creates a file descriptor representing the sync fence. Will release sync fence if allocation of ++ * file descriptor fails. ++ * ++ * @param sync_fence Sync fence. ++ * @return File descriptor representing sync fence if successful, or -1 if not. ++ */ ++s32 mali_sync_fence_fd_alloc(struct mali_internal_sync_fence *sync_fence); + -+#include "arch/config.h" /* Configuration for current platform. The symlink for arch is set by Makefile */ ++/** ++ * Merges two sync fences. Both input sync fences will be released. ++ * ++ * @param sync_fence1 First sync fence. ++ * @param sync_fence2 Second sync fence. ++ * @return New sync fence that is the result of the merger if successful, or NULL if not. ++ */ ++struct mali_internal_sync_fence *mali_sync_fence_merge(struct mali_internal_sync_fence *sync_fence1, struct mali_internal_sync_fence *sync_fence2); + -+#include "ump_osk.h" -+#include "ump_kernel_common.h" -+#include "ump_kernel_memory_backend_os.h" -+#include "ump_kernel_memory_backend_dedicated.h" ++/** ++ * Create a sync fence that is already signaled. ++ * ++ * @param tl Sync timeline. ++ * @return New signaled sync fence if successful, NULL if not. ++ */ ++struct mali_internal_sync_fence *mali_sync_timeline_create_signaled_fence(struct mali_internal_sync_timeline *sync_tl); + -+/* Configure which dynamic memory allocator to use */ -+int ump_backend = ARCH_UMP_BACKEND_DEFAULT; -+module_param(ump_backend, int, S_IRUGO); /* r--r--r-- */ -+MODULE_PARM_DESC(ump_backend, "0 = dedicated memory backend (default), 1 = OS memory backend"); + -+/* The base address of the memory block for the dedicated memory backend */ -+unsigned int ump_memory_address = ARCH_UMP_MEMORY_ADDRESS_DEFAULT; -+module_param(ump_memory_address, uint, S_IRUGO); /* r--r--r-- */ -+MODULE_PARM_DESC(ump_memory_address, "The physical address to map for the dedicated memory backend"); ++/** ++ * Create a sync flag. ++ * ++ * @param sync_tl Sync timeline. ++ * @param point Point on Mali timeline. ++ * @return New sync flag if successful, NULL if not. ++ */ ++struct mali_sync_flag *mali_sync_flag_create(struct mali_internal_sync_timeline *sync_tl, u32 point); + -+/* The size of the memory block for the dedicated memory backend */ -+unsigned int ump_memory_size = ARCH_UMP_MEMORY_SIZE_DEFAULT; -+module_param(ump_memory_size, uint, S_IRUGO); /* r--r--r-- */ -+MODULE_PARM_DESC(ump_memory_size, "The size of fixed memory to map in the dedicated memory backend"); ++/** ++ * Create a sync fence attached to given sync flag. ++ * ++ * @param flag Sync flag. ++ * @return New sync fence if successful, NULL if not. ++ */ ++struct mali_internal_sync_fence *mali_sync_flag_create_fence(struct mali_sync_flag *flag); + -+ump_memory_backend *ump_memory_backend_create(void) -+{ -+ ump_memory_backend *backend = NULL; ++#endif ++/** ++ * Grab sync flag reference. ++ * ++ * @param flag Sync flag. ++ */ ++void mali_sync_flag_get(struct mali_sync_flag *flag); + -+ /* Create the dynamic memory allocator backend */ -+ if (0 == ump_backend) { -+ DBG_MSG(2, ("Using dedicated memory backend\n")); ++/** ++ * Release sync flag reference. If this was the last reference, the sync flag will be freed. ++ * ++ * @param flag Sync flag. ++ */ ++void mali_sync_flag_put(struct mali_sync_flag *flag); + -+ DBG_MSG(2, ("Requesting dedicated memory: 0x%08x, size: %u\n", ump_memory_address, ump_memory_size)); -+ /* Ask the OS if we can use the specified physical memory */ -+ if (NULL == request_mem_region(ump_memory_address, ump_memory_size, "UMP Memory")) { -+ MSG_ERR(("Failed to request memory region (0x%08X - 0x%08X). Is Mali DD already loaded?\n", ump_memory_address, ump_memory_address + ump_memory_size - 1)); -+ return NULL; -+ } -+ backend = ump_block_allocator_create(ump_memory_address, ump_memory_size); -+ } else if (1 == ump_backend) { -+ DBG_MSG(2, ("Using OS memory backend, allocation limit: %d\n", ump_memory_size)); -+ backend = ump_os_memory_backend_create(ump_memory_size); -+ } ++/** ++ * Signal sync flag. All sync fences created from this flag will be signaled. ++ * ++ * @param flag Sync flag to signal. ++ * @param error Negative error code, or 0 if no error. ++ */ ++void mali_sync_flag_signal(struct mali_sync_flag *flag, int error); + -+ return backend; -+} ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ + -+void ump_memory_backend_destroy(void) -+{ -+ if (0 == ump_backend) { -+ DBG_MSG(2, ("Releasing dedicated memory: 0x%08x\n", ump_memory_address)); -+ release_mem_region(ump_memory_address, ump_memory_size); -+ } -+} -diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_osk_atomics.c b/drivers/gpu/arm/mali400/ump/linux/ump_osk_atomics.c ++#endif /* _MALI_SYNC_H_ */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_uk_types.h b/drivers/gpu/arm/mali400/mali/linux/mali_uk_types.h new file mode 100644 -index 000000000..2b634ba79 +index 000000000..68b27b8be --- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/linux/ump_osk_atomics.c -@@ -0,0 +1,27 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_uk_types.h +@@ -0,0 +1,17 @@ +/* -+ * Copyright (C) 2010, 2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2012, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -330574,31 +332339,21 @@ index 000000000..2b634ba79 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+/** -+ * @file ump_osk_atomics.c -+ * Implementation of the OS abstraction layer for the UMP kernel device driver -+ */ -+ -+#include "ump_osk.h" -+#include ++#ifndef __MALI_UK_TYPES_H__ ++#define __MALI_UK_TYPES_H__ + -+int _ump_osk_atomic_dec_and_read(_mali_osk_atomic_t *atom) -+{ -+ return atomic_dec_return((atomic_t *)&atom->u.val); -+} ++/* Simple wrapper in order to find the OS specific location of this file */ ++#include + -+int _ump_osk_atomic_inc_and_read(_mali_osk_atomic_t *atom) -+{ -+ return atomic_inc_return((atomic_t *)&atom->u.val); -+} -diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_osk_low_level_mem.c b/drivers/gpu/arm/mali400/ump/linux/ump_osk_low_level_mem.c ++#endif /* __MALI_UK_TYPES_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_ukk_core.c b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_core.c new file mode 100644 -index 000000000..e08bf2525 +index 000000000..0bd1cddb1 --- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/linux/ump_osk_low_level_mem.c -@@ -0,0 +1,314 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_core.c +@@ -0,0 +1,171 @@ +/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -330606,361 +332361,273 @@ index 000000000..e08bf2525 + * A copy of the licence is included with the program, and can also be obtained from Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ ++#include /* file system operations */ ++#include /* memort allocation functions */ ++#include /* user space access */ + -+/** -+ * @file ump_osk_memory.c -+ * Implementation of the OS abstraction layer for the kernel device driver -+ */ ++#include "mali_ukk.h" ++#include "mali_osk.h" ++#include "mali_kernel_common.h" ++#include "mali_session.h" ++#include "mali_ukk_wrappers.h" + -+/* needed to detect kernel version specific code */ -+#include ++int get_api_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_s __user *uargs) ++{ ++ _mali_uk_get_api_version_s kargs; ++ _mali_osk_errcode_t err; + -+#include "ump_osk.h" -+#include "ump_uk_types.h" -+#include "ump_ukk.h" -+#include "ump_kernel_common.h" -+#include /* kernel module definitions */ -+#include -+#include -+#include ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); + -+#include -+#include /* to verify pointers from user space */ -+#include -+#include ++ if (0 != get_user(kargs.version, &uargs->version)) return -EFAULT; + -+typedef struct ump_vma_usage_tracker { -+ atomic_t references; -+ ump_memory_allocation *descriptor; -+} ump_vma_usage_tracker; ++ kargs.ctx = (uintptr_t)session_data; ++ err = _mali_ukk_get_api_version(&kargs); ++ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); + -+static void ump_vma_open(struct vm_area_struct *vma); -+static void ump_vma_close(struct vm_area_struct *vma); -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) -+static int ump_cpu_page_fault_handler(struct vm_area_struct *vma, struct vm_fault *vmf); -+#else -+static unsigned long ump_cpu_page_fault_handler(struct vm_area_struct *vma, unsigned long address); -+#endif ++ if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT; ++ if (0 != put_user(kargs.compatible, &uargs->compatible)) return -EFAULT; + -+static struct vm_operations_struct ump_vm_ops = { -+ .open = ump_vma_open, -+ .close = ump_vma_close, -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) -+ .fault = ump_cpu_page_fault_handler -+#else -+ .nopfn = ump_cpu_page_fault_handler -+#endif -+}; ++ return 0; ++} + -+/* -+ * Page fault for VMA region -+ * This should never happen since we always map in the entire virtual memory range. -+ */ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) -+static int ump_cpu_page_fault_handler(struct vm_area_struct *vma, struct vm_fault *vmf) -+#else -+static unsigned long ump_cpu_page_fault_handler(struct vm_area_struct *vma, unsigned long address) -+#endif ++int get_api_version_v2_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_v2_s __user *uargs) +{ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) -+ void __user *address; -+ address = vmf->virtual_address; -+#endif -+ MSG_ERR(("Page-fault in UMP memory region caused by the CPU\n")); -+ MSG_ERR(("VMA: 0x%08lx, virtual address: 0x%08lx\n", (unsigned long)vma, address)); ++ _mali_uk_get_api_version_v2_s kargs; ++ _mali_osk_errcode_t err; + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) -+ return VM_FAULT_SIGBUS; -+#else -+ return NOPFN_SIGBUS; -+#endif -+} ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); + -+static void ump_vma_open(struct vm_area_struct *vma) -+{ -+ ump_vma_usage_tracker *vma_usage_tracker; -+ int new_val; ++ if (0 != get_user(kargs.version, &uargs->version)) return -EFAULT; + -+ vma_usage_tracker = (ump_vma_usage_tracker *)vma->vm_private_data; -+ BUG_ON(NULL == vma_usage_tracker); ++ kargs.ctx = (uintptr_t)session_data; ++ err = _mali_ukk_get_api_version_v2(&kargs); ++ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); + -+ new_val = atomic_inc_return(&vma_usage_tracker->references); ++ if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT; ++ if (0 != put_user(kargs.compatible, &uargs->compatible)) return -EFAULT; + -+ DBG_MSG(4, ("VMA open, VMA reference count incremented. VMA: 0x%08lx, reference count: %d\n", (unsigned long)vma, new_val)); ++ return 0; +} + -+static void ump_vma_close(struct vm_area_struct *vma) ++/* rk_ext : 从对 r5p0-01rel0 集æˆå¼€å§‹, ä¸å†ä½¿ç”¨. */ ++#if 0 ++#define mali400_in_rk30_version 0x01 ++int get_mali_version_in_rk30_wrapper(struct mali_session_data *session_data, _mali_uk_get_mali_version_in_rk30_s __user *uargs) +{ -+ ump_vma_usage_tracker *vma_usage_tracker; -+ _ump_uk_unmap_mem_s args; -+ int new_val; -+ -+ vma_usage_tracker = (ump_vma_usage_tracker *)vma->vm_private_data; -+ BUG_ON(NULL == vma_usage_tracker); -+ -+ new_val = atomic_dec_return(&vma_usage_tracker->references); -+ -+ DBG_MSG(4, ("VMA close, VMA reference count decremented. VMA: 0x%08lx, reference count: %d\n", (unsigned long)vma, new_val)); -+ -+ if (0 == new_val) { -+ ump_memory_allocation *descriptor; -+ -+ descriptor = vma_usage_tracker->descriptor; ++ _mali_uk_get_mali_version_in_rk30_s kargs; ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ kargs.ctx = (uintptr_t)session_data; ++ kargs.version = mali400_in_rk30_version; ++ if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT; ++ return 0; ++} ++#else ++#include "../platform/rk/rk_ext.h" ++int get_rk_ko_version_wrapper(struct mali_session_data *session_data, _mali_rk_ko_version_s __user *uargs) ++{ ++ _mali_rk_ko_version_s kargs; ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ kargs.ctx = (uintptr_t)session_data; ++ kargs.version = RK_KO_VER; ++ if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT; ++ return 0; ++} ++#endif + -+ args.ctx = descriptor->ump_session; -+ args.cookie = descriptor->cookie; -+ args.mapping = descriptor->mapping; -+ args.size = descriptor->size; ++int wait_for_notification_wrapper(struct mali_session_data *session_data, _mali_uk_wait_for_notification_s __user *uargs) ++{ ++ _mali_uk_wait_for_notification_s kargs; ++ _mali_osk_errcode_t err; + -+ args._ukk_private = NULL; /** @note unused */ ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); + -+ DBG_MSG(4, ("No more VMA references left, releasing UMP memory\n")); -+ _ump_ukk_unmap_mem(& args); ++ kargs.ctx = (uintptr_t)session_data; ++ err = _mali_ukk_wait_for_notification(&kargs); ++ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); + -+ /* vma_usage_tracker is free()d by _ump_osk_mem_mapregion_term() */ ++ if (_MALI_NOTIFICATION_CORE_SHUTDOWN_IN_PROGRESS != kargs.type) { ++ kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */ ++ if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_wait_for_notification_s))) return -EFAULT; ++ } else { ++ if (0 != put_user(kargs.type, &uargs->type)) return -EFAULT; + } ++ ++ return 0; +} + -+_mali_osk_errcode_t _ump_osk_mem_mapregion_init(ump_memory_allocation *descriptor) ++int post_notification_wrapper(struct mali_session_data *session_data, _mali_uk_post_notification_s __user *uargs) +{ -+ ump_vma_usage_tracker *vma_usage_tracker; -+ struct vm_area_struct *vma; ++ _mali_uk_post_notification_s kargs; ++ _mali_osk_errcode_t err; + -+ if (NULL == descriptor) return _MALI_OSK_ERR_FAULT; ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); + -+ vma_usage_tracker = kmalloc(sizeof(ump_vma_usage_tracker), GFP_KERNEL); -+ if (NULL == vma_usage_tracker) { -+ DBG_MSG(1, ("Failed to allocate memory for ump_vma_usage_tracker in _mali_osk_mem_mapregion_init\n")); -+ return -_MALI_OSK_ERR_FAULT; -+ } ++ kargs.ctx = (uintptr_t)session_data; + -+ vma = (struct vm_area_struct *)descriptor->process_mapping_info; -+ if (NULL == vma) { -+ kfree(vma_usage_tracker); -+ return _MALI_OSK_ERR_FAULT; ++ if (0 != get_user(kargs.type, &uargs->type)) { ++ return -EFAULT; + } + -+ vma->vm_private_data = vma_usage_tracker; -+ vma->vm_flags |= VM_IO; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) -+ vma->vm_flags |= VM_RESERVED; -+#else -+ vma->vm_flags |= VM_DONTDUMP; -+ vma->vm_flags |= VM_DONTEXPAND; -+ vma->vm_flags |= VM_PFNMAP; -+#endif ++ err = _mali_ukk_post_notification(&kargs); ++ if (_MALI_OSK_ERR_OK != err) { ++ return map_errcode(err); ++ } + ++ return 0; ++} + -+ if (0 == descriptor->is_cached) { -+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); -+ } -+ DBG_MSG(3, ("Mapping with page_prot: 0x%x\n", vma->vm_page_prot)); ++int get_user_settings_wrapper(struct mali_session_data *session_data, _mali_uk_get_user_settings_s __user *uargs) ++{ ++ _mali_uk_get_user_settings_s kargs; ++ _mali_osk_errcode_t err; + -+ /* Setup the functions which handle further VMA handling */ -+ vma->vm_ops = &ump_vm_ops; ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); + -+ /* Do the va range allocation - in this case, it was done earlier, so we copy in that information */ -+ descriptor->mapping = (void __user *)vma->vm_start; ++ kargs.ctx = (uintptr_t)session_data; ++ err = _mali_ukk_get_user_settings(&kargs); ++ if (_MALI_OSK_ERR_OK != err) { ++ return map_errcode(err); ++ } + -+ atomic_set(&vma_usage_tracker->references, 1); /*this can later be increased if process is forked, see ump_vma_open() */ -+ vma_usage_tracker->descriptor = descriptor; ++ kargs.ctx = 0; /* prevent kernel address to be returned to user space */ ++ if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_get_user_settings_s))) return -EFAULT; + -+ return _MALI_OSK_ERR_OK; ++ return 0; +} + -+void _ump_osk_mem_mapregion_term(ump_memory_allocation *descriptor) ++int request_high_priority_wrapper(struct mali_session_data *session_data, _mali_uk_request_high_priority_s __user *uargs) +{ -+ struct vm_area_struct *vma; -+ ump_vma_usage_tracker *vma_usage_tracker; ++ _mali_uk_request_high_priority_s kargs; ++ _mali_osk_errcode_t err; + -+ if (NULL == descriptor) return; ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); + -+ /* Linux does the right thing as part of munmap to remove the mapping -+ * All that remains is that we remove the vma_usage_tracker setup in init() */ -+ vma = (struct vm_area_struct *)descriptor->process_mapping_info; ++ kargs.ctx = (uintptr_t)session_data; ++ err = _mali_ukk_request_high_priority(&kargs); + -+ vma_usage_tracker = vma->vm_private_data; ++ kargs.ctx = 0; + -+ /* We only get called if mem_mapregion_init succeeded */ -+ kfree(vma_usage_tracker); -+ return; ++ return map_errcode(err); +} + -+_mali_osk_errcode_t _ump_osk_mem_mapregion_map(ump_memory_allocation *descriptor, u32 offset, u32 *phys_addr, unsigned long size) ++int pending_submit_wrapper(struct mali_session_data *session_data, _mali_uk_pending_submit_s __user *uargs) +{ -+ struct vm_area_struct *vma; -+ _mali_osk_errcode_t retval; ++ _mali_uk_pending_submit_s kargs; ++ _mali_osk_errcode_t err; + -+ if (NULL == descriptor) return _MALI_OSK_ERR_FAULT; ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); + -+ vma = (struct vm_area_struct *)descriptor->process_mapping_info; ++ kargs.ctx = (uintptr_t)session_data; ++ err = _mali_ukk_pending_submit(&kargs); ++ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); + -+ if (NULL == vma) return _MALI_OSK_ERR_FAULT; ++ return 0; ++} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_ukk_gp.c b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_gp.c +new file mode 100644 +index 000000000..68fcd9719 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_gp.c +@@ -0,0 +1,91 @@ ++/* ++ * Copyright (C) 2010, 2012-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ ++#include /* file system operations */ ++#include /* user space access */ + -+ retval = remap_pfn_range(vma, ((u32)descriptor->mapping) + offset, (*phys_addr) >> PAGE_SHIFT, size, vma->vm_page_prot) ? _MALI_OSK_ERR_FAULT : _MALI_OSK_ERR_OK;; ++#include "mali_ukk.h" ++#include "mali_osk.h" ++#include "mali_kernel_common.h" ++#include "mali_session.h" ++#include "mali_ukk_wrappers.h" + -+ DBG_MSG(4, ("Mapping virtual to physical memory. ID: %u, vma: 0x%08lx, virtual addr:0x%08lx, physical addr: 0x%08lx, size:%lu, prot:0x%x, vm_flags:0x%x RETVAL: 0x%x\n", -+ ump_dd_secure_id_get(descriptor->handle), -+ (unsigned long)vma, -+ (unsigned long)(vma->vm_start + offset), -+ (unsigned long)*phys_addr, -+ size, -+ (unsigned int)vma->vm_page_prot, vma->vm_flags, retval)); ++int gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_gp_start_job_s __user *uargs) ++{ ++ _mali_osk_errcode_t err; + -+ return retval; -+} ++ /* If the job was started successfully, 0 is returned. If there was an error, but the job ++ * was started, we return -ENOENT. For anything else returned, the job was not started. */ + -+static void level1_cache_flush_all(void) -+{ -+ DBG_MSG(4, ("UMP[xx] Flushing complete L1 cache\n")); -+ __cpuc_flush_kern_all(); ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session_data, -EINVAL); ++ ++ err = _mali_ukk_gp_start_job(session_data, uargs); ++ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); ++ ++ return 0; +} + -+void _ump_osk_msync(ump_dd_mem *mem, void *virt, u32 offset, u32 size, ump_uk_msync_op op, ump_session_data *session_data) ++int gp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_core_version_s __user *uargs) +{ -+ int i; ++ _mali_uk_get_gp_core_version_s kargs; ++ _mali_osk_errcode_t err; + -+ /* Flush L1 using virtual address, the entire range in one go. -+ * Only flush if user space process has a valid write mapping on given address. */ -+ if ((mem) && (virt != NULL) && (access_ok(virt, size))) { -+ __cpuc_flush_dcache_area(virt, size); -+ DBG_MSG(3, ("UMP[%02u] Flushing CPU L1 Cache. CPU address: %x, size: %x\n", mem->secure_id, virt, size)); -+ } else { -+ if (session_data) { -+ if (op == _UMP_UK_MSYNC_FLUSH_L1) { -+ DBG_MSG(4, ("UMP Pending L1 cache flushes: %d\n", session_data->has_pending_level1_cache_flush)); -+ session_data->has_pending_level1_cache_flush = 0; -+ level1_cache_flush_all(); -+ return; -+ } else { -+ if (session_data->cache_operations_ongoing) { -+ session_data->has_pending_level1_cache_flush++; -+ DBG_MSG(4, ("UMP[%02u] Defering the L1 flush. Nr pending:%d\n", mem->secure_id, session_data->has_pending_level1_cache_flush)); -+ } else { -+ /* Flushing the L1 cache for each switch_user() if ump_cache_operations_control(START) is not called */ -+ level1_cache_flush_all(); -+ } -+ } -+ } else { -+ DBG_MSG(4, ("Unkown state %s %d\n", __FUNCTION__, __LINE__)); -+ level1_cache_flush_all(); -+ } -+ } ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session_data, -EINVAL); + -+ if (NULL == mem) return; ++ kargs.ctx = (uintptr_t)session_data; ++ err = _mali_ukk_get_gp_core_version(&kargs); ++ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); + -+ if (mem->size_bytes == size) { -+ DBG_MSG(3, ("UMP[%02u] Flushing CPU L2 Cache\n", mem->secure_id)); -+ } else { -+ DBG_MSG(3, ("UMP[%02u] Flushing CPU L2 Cache. Blocks:%u, TotalSize:%u. FlushSize:%u Offset:0x%x FirstPaddr:0x%08x\n", -+ mem->secure_id, mem->nr_blocks, mem->size_bytes, size, offset, mem->block_array[0].addr)); -+ } ++ /* no known transactions to roll-back */ + ++ if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT; + -+ /* Flush L2 using physical addresses, block for block. */ -+ for (i = 0 ; i < mem->nr_blocks; i++) { -+ u32 start_p, end_p; -+ ump_dd_physical_block *block; -+ block = &mem->block_array[i]; ++ return 0; ++} + -+ if (offset >= block->size) { -+ offset -= block->size; -+ continue; -+ } ++int gp_suspend_response_wrapper(struct mali_session_data *session_data, _mali_uk_gp_suspend_response_s __user *uargs) ++{ ++ _mali_uk_gp_suspend_response_s kargs; ++ _mali_osk_errcode_t err; + -+ if (offset) { -+ start_p = (u32)block->addr + offset; -+ /* We'll zero the offset later, after using it to calculate end_p. */ -+ } else { -+ start_p = (u32)block->addr; -+ } ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session_data, -EINVAL); + -+ if (size < block->size - offset) { -+ end_p = start_p + size; -+ size = 0; -+ } else { -+ if (offset) { -+ end_p = start_p + (block->size - offset); -+ size -= block->size - offset; -+ offset = 0; -+ } else { -+ end_p = start_p + block->size; -+ size -= block->size; -+ } -+ } ++ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_gp_suspend_response_s))) return -EFAULT; + -+ switch (op) { -+ case _UMP_UK_MSYNC_CLEAN: -+ outer_clean_range(start_p, end_p); -+ break; -+ case _UMP_UK_MSYNC_CLEAN_AND_INVALIDATE: -+ outer_flush_range(start_p, end_p); -+ break; -+ case _UMP_UK_MSYNC_INVALIDATE: -+ outer_inv_range(start_p, end_p); -+ break; -+ default: -+ break; -+ } ++ kargs.ctx = (uintptr_t)session_data; ++ err = _mali_ukk_gp_suspend_response(&kargs); ++ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); + -+ if (0 == size) { -+ /* Nothing left to flush. */ -+ break; -+ } -+ } ++ if (0 != put_user(kargs.cookie, &uargs->cookie)) return -EFAULT; + -+ return; ++ /* no known transactions to roll-back */ ++ return 0; +} -diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_osk_misc.c b/drivers/gpu/arm/mali400/ump/linux/ump_osk_misc.c -new file mode 100644 -index 000000000..58c9f1bf2 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/linux/ump_osk_misc.c -@@ -0,0 +1,36 @@ -+/* -+ * Copyright (C) 2010, 2013-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+/** -+ * @file ump_osk_misc.c -+ * Implementation of the OS abstraction layer for the UMP kernel device driver -+ */ + ++int gp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_number_of_cores_s __user *uargs) ++{ ++ _mali_uk_get_gp_number_of_cores_s kargs; ++ _mali_osk_errcode_t err; + -+#include "ump_osk.h" ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session_data, -EINVAL); + -+#include -+#include "ump_kernel_linux.h" ++ kargs.ctx = (uintptr_t)session_data; ++ err = _mali_ukk_get_gp_number_of_cores(&kargs); ++ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); + -+/* is called from ump_kernel_constructor in common code */ -+_mali_osk_errcode_t _ump_osk_init(void) -+{ -+ if (0 != ump_kernel_device_initialize()) { -+ return _MALI_OSK_ERR_FAULT; -+ } ++ /* no known transactions to roll-back */ + -+ return _MALI_OSK_ERR_OK; -+} ++ if (0 != put_user(kargs.number_of_cores, &uargs->number_of_cores)) return -EFAULT; + -+_mali_osk_errcode_t _ump_osk_term(void) -+{ -+ ump_kernel_device_terminate(); -+ return _MALI_OSK_ERR_OK; ++ return 0; +} -diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_ukk_ref_wrappers.c b/drivers/gpu/arm/mali400/ump/linux/ump_ukk_ref_wrappers.c +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_ukk_mem.c b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_mem.c new file mode 100644 -index 000000000..56a787ff6 +index 000000000..baea4c688 --- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/linux/ump_ukk_ref_wrappers.c -@@ -0,0 +1,230 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_mem.c +@@ -0,0 +1,333 @@ +/* -+ * Copyright (C) 2010, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -330968,561 +332635,336 @@ index 000000000..56a787ff6 + * A copy of the licence is included with the program, and can also be obtained from Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ ++#include /* file system operations */ ++#include /* user space access */ + -+/** -+ * @file ump_ukk_wrappers.c -+ * Defines the wrapper functions which turn Linux IOCTL calls into _ukk_ calls for the reference implementation -+ */ -+ -+ -+#include /* user space access */ -+ -+#include "ump_osk.h" -+#include "ump_uk_types.h" -+#include "ump_ukk.h" -+#include "ump_kernel_common.h" -+#include -+#include "ump_kernel_interface_ref_drv.h" -+#include "mali_osk_list.h" -+ -+extern struct device *ump_global_mdev; ++#include "mali_ukk.h" ++#include "mali_osk.h" ++#include "mali_kernel_common.h" ++#include "mali_session.h" ++#include "mali_ukk_wrappers.h" + -+/* -+ * IOCTL operation; Allocate UMP memory -+ */ -+int ump_allocate_wrapper(u32 __user *argument, struct ump_session_data *session_data) ++int mem_alloc_wrapper(struct mali_session_data *session_data, _mali_uk_alloc_mem_s __user *uargs) +{ -+ _ump_uk_allocate_s user_interaction; ++ _mali_uk_alloc_mem_s kargs; + _mali_osk_errcode_t err; + -+ /* Sanity check input parameters */ -+ if (NULL == argument || NULL == session_data) { -+ MSG_ERR(("NULL parameter in ump_ioctl_allocate()\n")); -+ return -ENOTTY; -+ } ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session_data, -EINVAL); + -+ /* Copy the user space memory to kernel space (so we safely can read it) */ -+ if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) { -+ MSG_ERR(("copy_from_user() in ump_ioctl_allocate()\n")); ++ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_alloc_mem_s))) { + return -EFAULT; + } ++ kargs.ctx = (uintptr_t)session_data; + -+ user_interaction.ctx = (void *) session_data; ++ err = _mali_ukk_mem_allocate(&kargs); + -+ err = _ump_ukk_allocate(&user_interaction); + if (_MALI_OSK_ERR_OK != err) { -+ DBG_MSG(1, ("_ump_ukk_allocate() failed in ump_ioctl_allocate()\n")); -+ return ump_map_errcode(err); ++ return map_errcode(err); + } -+ user_interaction.ctx = NULL; -+ -+ if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) { -+ /* If the copy fails then we should release the memory. We can use the IOCTL release to accomplish this */ -+ _ump_uk_release_s release_args; -+ -+ MSG_ERR(("copy_to_user() failed in ump_ioctl_allocate()\n")); -+ -+ release_args.ctx = (void *) session_data; -+ release_args.secure_id = user_interaction.secure_id; -+ -+ err = _ump_ukk_release(&release_args); -+ if (_MALI_OSK_ERR_OK != err) { -+ MSG_ERR(("_ump_ukk_release() also failed when trying to release newly allocated memory in ump_ioctl_allocate()\n")); -+ } + ++ if (0 != put_user(kargs.backend_handle, &uargs->backend_handle)) { + return -EFAULT; + } + -+ return 0; /* success */ -+} -+ -+#ifdef CONFIG_DMA_SHARED_BUFFER -+static ump_dd_handle get_ump_handle_from_dmabuf(struct ump_session_data *session_data, -+ struct dma_buf *dmabuf) -+{ -+ ump_session_memory_list_element *session_mem, *tmp; -+ struct dma_buf_attachment *attach; -+ ump_dd_handle ump_handle; -+ -+ DEBUG_ASSERT_POINTER(session_data); -+ -+ _mali_osk_mutex_wait(session_data->lock); -+ -+ _MALI_OSK_LIST_FOREACHENTRY(session_mem, tmp, -+ &session_data->list_head_session_memory_list, -+ ump_session_memory_list_element, list) { -+ if (session_mem->mem->import_attach) { -+ attach = session_mem->mem->import_attach; -+ if (attach->dmabuf == dmabuf) { -+ _mali_osk_mutex_signal(session_data->lock); -+ ump_handle = (ump_dd_handle)session_mem->mem; -+ ump_random_mapping_get(device.secure_id_map, ump_dd_secure_id_get(ump_handle)); -+ return ump_handle; -+ } -+ } -+ } -+ -+ _mali_osk_mutex_signal(session_data->lock); -+ -+ return NULL; ++ return 0; +} + -+int ump_dmabuf_import_wrapper(u32 __user *argument, -+ struct ump_session_data *session_data) ++int mem_free_wrapper(struct mali_session_data *session_data, _mali_uk_free_mem_s __user *uargs) +{ -+ ump_session_memory_list_element *session = NULL; -+ _ump_uk_dmabuf_s ump_dmabuf; -+ ump_dd_handle ump_handle; -+ ump_dd_physical_block *blocks = NULL; -+ struct dma_buf_attachment *attach = NULL; -+ struct dma_buf *dma_buf; -+ struct sg_table *sgt = NULL; -+ struct scatterlist *sgl; -+ unsigned int i = 0; -+ int ret = 0; ++ _mali_uk_free_mem_s kargs; ++ _mali_osk_errcode_t err; + -+ /* Sanity check input parameters */ -+ if (!argument || !session_data) { -+ MSG_ERR(("NULL parameter.\n")); -+ return -EINVAL; -+ } ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session_data, -EINVAL); + -+ if (copy_from_user(&ump_dmabuf, argument, -+ sizeof(_ump_uk_dmabuf_s))) { -+ MSG_ERR(("copy_from_user() failed.\n")); ++ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_free_mem_s))) { + return -EFAULT; + } ++ kargs.ctx = (uintptr_t)session_data; + -+ dma_buf = dma_buf_get(ump_dmabuf.fd); -+ if (IS_ERR(dma_buf)) -+ return PTR_ERR(dma_buf); ++ err = _mali_ukk_mem_free(&kargs); + -+ /* -+ * if already imported then increase a refcount to the ump descriptor -+ * and call dma_buf_put() and then go to found to return previous -+ * ump secure id. -+ */ -+ ump_handle = get_ump_handle_from_dmabuf(session_data, dma_buf); -+ if (ump_handle) { -+ dma_buf_put(dma_buf); -+ goto found; ++ if (_MALI_OSK_ERR_OK != err) { ++ return map_errcode(err); + } + -+ attach = dma_buf_attach(dma_buf, ump_global_mdev); -+ if (IS_ERR(attach)) { -+ ret = PTR_ERR(attach); -+ goto err_dma_buf_put; ++ if (0 != put_user(kargs.free_pages_nr, &uargs->free_pages_nr)) { ++ return -EFAULT; + } + -+ sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); -+ if (IS_ERR(sgt)) { -+ ret = PTR_ERR(sgt); -+ goto err_dma_buf_detach; -+ } ++ return 0; ++} + -+ blocks = (ump_dd_physical_block *)_mali_osk_malloc(sizeof(ump_dd_physical_block) * sgt->nents); -+ if (!blocks) { -+ DBG_MSG(1, ("Failed to allocate blocks.\n")); -+ ret = -EFAULT; -+ goto err_dma_buf_unmap; -+ } -+ for_each_sg(sgt->sgl, sgl, sgt->nents, i) { -+ blocks[i].addr = sg_phys(sgl); -+ blocks[i].size = sg_dma_len(sgl); -+ } ++int mem_bind_wrapper(struct mali_session_data *session_data, _mali_uk_bind_mem_s __user *uargs) ++{ ++ _mali_uk_bind_mem_s kargs; ++ _mali_osk_errcode_t err; + -+ /* -+ * Initialize the session memory list element, and add it -+ * to the session object -+ */ -+ session = _mali_osk_calloc(1, sizeof(*session)); -+ if (!session) { -+ DBG_MSG(1, ("Failed to allocate session.\n")); -+ ret = -EFAULT; -+ goto err_free_block; -+ } ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session_data, -EINVAL); + -+ ump_handle = ump_dd_handle_create_from_phys_blocks(blocks, i); -+ if (UMP_DD_HANDLE_INVALID == ump_handle) { -+ DBG_MSG(1, ("Failed to create ump handle.\n")); -+ ret = -EFAULT; -+ goto err_free_session; ++ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_bind_mem_s))) { ++ return -EFAULT; + } ++ kargs.ctx = (uintptr_t)session_data; + -+ session->mem = (ump_dd_mem *)ump_handle; -+ session->mem->import_attach = attach; -+ session->mem->sgt = sgt; -+ -+ _mali_osk_mutex_wait(session_data->lock); -+ _mali_osk_list_add(&(session->list), -+ &(session_data->list_head_session_memory_list)); -+ _mali_osk_mutex_signal(session_data->lock); -+ -+ _mali_osk_free(blocks); -+ -+found: -+ ump_dmabuf.ctx = (void *)session_data; -+ ump_dmabuf.secure_id = ump_dd_secure_id_get(ump_handle); -+ ump_dmabuf.size = ump_dd_size_get(ump_handle); ++ err = _mali_ukk_mem_bind(&kargs); + -+ if (copy_to_user(argument, &ump_dmabuf, -+ sizeof(_ump_uk_dmabuf_s))) { -+ MSG_ERR(("copy_to_user() failed.\n")); -+ ret = -EFAULT; -+ goto err_release_ump_handle; ++ if (_MALI_OSK_ERR_OK != err) { ++ return map_errcode(err); + } + -+ return ret; -+ -+err_release_ump_handle: -+ ump_dd_reference_release(ump_handle); -+err_free_session: -+ _mali_osk_free(session); -+err_free_block: -+ _mali_osk_free(blocks); -+err_dma_buf_unmap: -+ dma_buf_unmap_attachment(attach, sgt, DMA_BIDIRECTIONAL); -+err_dma_buf_detach: -+ dma_buf_detach(dma_buf, attach); -+err_dma_buf_put: -+ dma_buf_put(dma_buf); -+ return ret; ++ return 0; +} -+#endif -diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_ukk_ref_wrappers.h b/drivers/gpu/arm/mali400/ump/linux/ump_ukk_ref_wrappers.h -new file mode 100644 -index 000000000..61a7095a6 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/linux/ump_ukk_ref_wrappers.h -@@ -0,0 +1,36 @@ -+/* -+ * Copyright (C) 2010, 2013-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+/** -+ * @file ump_ukk_wrappers.h -+ * Defines the wrapper functions which turn Linux IOCTL calls into _ukk_ calls for the reference implementation -+ */ + -+#ifndef __UMP_UKK_REF_WRAPPERS_H__ -+#define __UMP_UKK_REF_WRAPPERS_H__ ++int mem_unbind_wrapper(struct mali_session_data *session_data, _mali_uk_unbind_mem_s __user *uargs) ++{ ++ _mali_uk_unbind_mem_s kargs; ++ _mali_osk_errcode_t err; + -+#include -+#include "ump_kernel_common.h" ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session_data, -EINVAL); + -+#ifdef __cplusplus -+extern "C" { -+#endif ++ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_unbind_mem_s))) { ++ return -EFAULT; ++ } ++ kargs.ctx = (uintptr_t)session_data; + ++ err = _mali_ukk_mem_unbind(&kargs); + -+int ump_allocate_wrapper(u32 __user *argument, struct ump_session_data *session_data); -+#ifdef CONFIG_DMA_SHARED_BUFFER -+int ump_dmabuf_import_wrapper(u32 __user *argument, struct ump_session_data *session_data); -+#endif ++ if (_MALI_OSK_ERR_OK != err) { ++ return map_errcode(err); ++ } + -+#ifdef __cplusplus ++ return 0; +} -+#endif -+ -+#endif /* __UMP_UKK_REF_WRAPPERS_H__ */ -diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_ukk_wrappers.c b/drivers/gpu/arm/mali400/ump/linux/ump_ukk_wrappers.c -new file mode 100644 -index 000000000..4d6b69608 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/linux/ump_ukk_wrappers.c -@@ -0,0 +1,280 @@ -+/* -+ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained from Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+ */ -+ -+/** -+ * @file ump_ukk_wrappers.c -+ * Defines the wrapper functions which turn Linux IOCTL calls into _ukk_ calls -+ */ -+ -+#include /* user space access */ + -+#include "ump_osk.h" -+#include "ump_uk_types.h" -+#include "ump_ukk.h" -+#include "ump_kernel_common.h" + -+/* -+ * IOCTL operation; Negotiate version of IOCTL API -+ */ -+int ump_get_api_version_wrapper(u32 __user *argument, struct ump_session_data *session_data) ++int mem_cow_wrapper(struct mali_session_data *session_data, _mali_uk_cow_mem_s __user *uargs) +{ -+ _ump_uk_api_version_s version_info; ++ _mali_uk_cow_mem_s kargs; + _mali_osk_errcode_t err; + -+ /* Sanity check input parameters */ -+ if (NULL == argument || NULL == session_data) { -+ MSG_ERR(("NULL parameter in ump_ioctl_get_api_version()\n")); -+ return -ENOTTY; -+ } ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session_data, -EINVAL); + -+ /* Copy the user space memory to kernel space (so we safely can read it) */ -+ if (0 != copy_from_user(&version_info, argument, sizeof(version_info))) { -+ MSG_ERR(("copy_from_user() in ump_ioctl_get_api_version()\n")); ++ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_cow_mem_s))) { + return -EFAULT; + } ++ kargs.ctx = (uintptr_t)session_data; ++ ++ err = _mali_ukk_mem_cow(&kargs); + -+ version_info.ctx = (void *) session_data; -+ err = _ump_uku_get_api_version(&version_info); + if (_MALI_OSK_ERR_OK != err) { -+ MSG_ERR(("_ump_uku_get_api_version() failed in ump_ioctl_get_api_version()\n")); -+ return ump_map_errcode(err); ++ return map_errcode(err); + } + -+ version_info.ctx = NULL; -+ -+ /* Copy ouput data back to user space */ -+ if (0 != copy_to_user(argument, &version_info, sizeof(version_info))) { -+ MSG_ERR(("copy_to_user() failed in ump_ioctl_get_api_version()\n")); ++ if (0 != put_user(kargs.backend_handle, &uargs->backend_handle)) { + return -EFAULT; + } + -+ return 0; /* success */ ++ return 0; +} + -+ -+/* -+ * IOCTL operation; Release reference to specified UMP memory. -+ */ -+int ump_release_wrapper(u32 __user *argument, struct ump_session_data *session_data) ++int mem_cow_modify_range_wrapper(struct mali_session_data *session_data, _mali_uk_cow_modify_range_s __user *uargs) +{ -+ _ump_uk_release_s release_args; ++ _mali_uk_cow_modify_range_s kargs; + _mali_osk_errcode_t err; + -+ /* Sanity check input parameters */ -+ if (NULL == session_data) { -+ MSG_ERR(("NULL parameter in ump_ioctl_release()\n")); -+ return -ENOTTY; -+ } ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session_data, -EINVAL); + -+ /* Copy the user space memory to kernel space (so we safely can read it) */ -+ if (0 != copy_from_user(&release_args, argument, sizeof(release_args))) { -+ MSG_ERR(("copy_from_user() in ump_ioctl_get_api_version()\n")); ++ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_cow_modify_range_s))) { + return -EFAULT; + } ++ kargs.ctx = (uintptr_t)session_data; ++ ++ err = _mali_ukk_mem_cow_modify_range(&kargs); + -+ release_args.ctx = (void *) session_data; -+ err = _ump_ukk_release(&release_args); + if (_MALI_OSK_ERR_OK != err) { -+ MSG_ERR(("_ump_ukk_release() failed in ump_ioctl_release()\n")); -+ return ump_map_errcode(err); ++ return map_errcode(err); + } + -+ -+ return 0; /* success */ ++ if (0 != put_user(kargs.change_pages_nr, &uargs->change_pages_nr)) { ++ return -EFAULT; ++ } ++ return 0; +} + -+/* -+ * IOCTL operation; Return size for specified UMP memory. -+ */ -+int ump_size_get_wrapper(u32 __user *argument, struct ump_session_data *session_data) ++ ++int mem_resize_mem_wrapper(struct mali_session_data *session_data, _mali_uk_mem_resize_s __user *uargs) +{ -+ _ump_uk_size_get_s user_interaction; ++ _mali_uk_mem_resize_s kargs; + _mali_osk_errcode_t err; + -+ /* Sanity check input parameters */ -+ if (NULL == argument || NULL == session_data) { -+ MSG_ERR(("NULL parameter in ump_ioctl_size_get()\n")); -+ return -ENOTTY; -+ } ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session_data, -EINVAL); + -+ if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) { -+ MSG_ERR(("copy_from_user() in ump_ioctl_size_get()\n")); ++ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_mem_resize_s))) { + return -EFAULT; + } ++ kargs.ctx = (uintptr_t)session_data; + -+ user_interaction.ctx = (void *) session_data; -+ err = _ump_ukk_size_get(&user_interaction); -+ if (_MALI_OSK_ERR_OK != err) { -+ MSG_ERR(("_ump_ukk_size_get() failed in ump_ioctl_size_get()\n")); -+ return ump_map_errcode(err); -+ } -+ -+ user_interaction.ctx = NULL; ++ err = _mali_ukk_mem_resize(&kargs); + -+ if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) { -+ MSG_ERR(("copy_to_user() failed in ump_ioctl_size_get()\n")); -+ return -EFAULT; ++ if (_MALI_OSK_ERR_OK != err) { ++ return map_errcode(err); + } + -+ return 0; /* success */ ++ return 0; +} + -+/* -+ * IOCTL operation; Do cache maintenance on specified UMP memory. -+ */ -+int ump_msync_wrapper(u32 __user *argument, struct ump_session_data *session_data) ++int mem_write_safe_wrapper(struct mali_session_data *session_data, _mali_uk_mem_write_safe_s __user *uargs) +{ -+ _ump_uk_msync_s user_interaction; ++ _mali_uk_mem_write_safe_s kargs; ++ _mali_osk_errcode_t err; + -+ /* Sanity check input parameters */ -+ if (NULL == argument || NULL == session_data) { -+ MSG_ERR(("NULL parameter in ump_ioctl_size_get()\n")); -+ return -ENOTTY; -+ } ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session_data, -EINVAL); + -+ if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) { -+ MSG_ERR(("copy_from_user() in ump_ioctl_msync()\n")); ++ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_mem_write_safe_s))) { + return -EFAULT; + } + -+ user_interaction.ctx = (void *) session_data; -+ -+ _ump_ukk_msync(&user_interaction); -+ -+ user_interaction.ctx = NULL; ++ kargs.ctx = (uintptr_t)session_data; + -+ if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) { -+ MSG_ERR(("copy_to_user() failed in ump_ioctl_msync()\n")); -+ return -EFAULT; ++ /* Check if we can access the buffers */ ++ if (!access_ok((const void *)(uintptr_t)kargs.dest, kargs.size) || ++ !access_ok((const void *)(uintptr_t)kargs.src, kargs.size)) { ++ return -EINVAL; + } + -+ return 0; /* success */ -+} -+int ump_cache_operations_control_wrapper(u32 __user *argument, struct ump_session_data *session_data) -+{ -+ _ump_uk_cache_operations_control_s user_interaction; ++ /* Check if size wraps */ ++ if ((kargs.size + kargs.dest) <= kargs.dest ++ || (kargs.size + kargs.src) <= kargs.src) { ++ return -EINVAL; ++ } + -+ /* Sanity check input parameters */ -+ if (NULL == argument || NULL == session_data) { -+ MSG_ERR(("NULL parameter in ump_ioctl_size_get()\n")); -+ return -ENOTTY; ++ err = _mali_ukk_mem_write_safe(&kargs); ++ if (_MALI_OSK_ERR_OK != err) { ++ return map_errcode(err); + } + -+ if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) { -+ MSG_ERR(("copy_from_user() in ump_ioctl_cache_operations_control()\n")); ++ if (0 != put_user(kargs.size, &uargs->size)) { + return -EFAULT; + } + -+ user_interaction.ctx = (void *) session_data; -+ -+ _ump_ukk_cache_operations_control((_ump_uk_cache_operations_control_s *) &user_interaction); ++ return 0; ++} + -+ user_interaction.ctx = NULL; + -+#if 0 /* No data to copy back */ -+ if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) { -+ MSG_ERR(("copy_to_user() failed in ump_ioctl_cache_operations_control()\n")); -+ return -EFAULT; -+ } -+#endif -+ return 0; /* success */ -+} + -+int ump_switch_hw_usage_wrapper(u32 __user *argument, struct ump_session_data *session_data) ++int mem_query_mmu_page_table_dump_size_wrapper(struct mali_session_data *session_data, _mali_uk_query_mmu_page_table_dump_size_s __user *uargs) +{ -+ _ump_uk_switch_hw_usage_s user_interaction; -+ -+ /* Sanity check input parameters */ -+ if (NULL == argument || NULL == session_data) { -+ MSG_ERR(("NULL parameter in ump_ioctl_size_get()\n")); -+ return -ENOTTY; -+ } ++ _mali_uk_query_mmu_page_table_dump_size_s kargs; ++ _mali_osk_errcode_t err; + -+ if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) { -+ MSG_ERR(("copy_from_user() in ump_ioctl_switch_hw_usage()\n")); -+ return -EFAULT; -+ } ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session_data, -EINVAL); + -+ user_interaction.ctx = (void *) session_data; ++ kargs.ctx = (uintptr_t)session_data; + -+ _ump_ukk_switch_hw_usage(&user_interaction); ++ err = _mali_ukk_query_mmu_page_table_dump_size(&kargs); ++ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); + -+ user_interaction.ctx = NULL; ++ if (0 != put_user(kargs.size, &uargs->size)) return -EFAULT; + -+#if 0 /* No data to copy back */ -+ if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) { -+ MSG_ERR(("copy_to_user() failed in ump_ioctl_switch_hw_usage()\n")); -+ return -EFAULT; -+ } -+#endif -+ return 0; /* success */ ++ return 0; +} + -+int ump_lock_wrapper(u32 __user *argument, struct ump_session_data *session_data) ++int mem_dump_mmu_page_table_wrapper(struct mali_session_data *session_data, _mali_uk_dump_mmu_page_table_s __user *uargs) +{ -+ _ump_uk_lock_s user_interaction; ++ _mali_uk_dump_mmu_page_table_s kargs; ++ _mali_osk_errcode_t err; ++ void __user *user_buffer; ++ void *buffer = NULL; ++ int rc = -EFAULT; + -+ /* Sanity check input parameters */ -+ if (NULL == argument || NULL == session_data) { -+ MSG_ERR(("NULL parameter in ump_ioctl_size_get()\n")); -+ return -ENOTTY; ++ /* validate input */ ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ /* the session_data pointer was validated by caller */ ++ ++ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_dump_mmu_page_table_s))) ++ goto err_exit; ++ ++ user_buffer = (void __user *)(uintptr_t)kargs.buffer; ++ if (!access_ok(user_buffer, kargs.size)) ++ goto err_exit; ++ ++ /* allocate temporary buffer (kernel side) to store mmu page table info */ ++ if (kargs.size <= 0) ++ return -EINVAL; ++ /* Allow at most 8MiB buffers, this is more than enough to dump a fully ++ * populated page table. */ ++ if (kargs.size > SZ_8M) ++ return -EINVAL; ++ ++ buffer = (void *)(uintptr_t)_mali_osk_valloc(kargs.size); ++ if (NULL == buffer) { ++ rc = -ENOMEM; ++ goto err_exit; + } + -+ if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) { -+ MSG_ERR(("copy_from_user() in ump_ioctl_switch_hw_usage()\n")); -+ return -EFAULT; ++ kargs.ctx = (uintptr_t)session_data; ++ kargs.buffer = (uintptr_t)buffer; ++ err = _mali_ukk_dump_mmu_page_table(&kargs); ++ if (_MALI_OSK_ERR_OK != err) { ++ rc = map_errcode(err); ++ goto err_exit; + } + -+ user_interaction.ctx = (void *) session_data; ++ /* copy mmu page table info back to user space and update pointers */ ++ if (0 != copy_to_user(user_buffer, buffer, kargs.size)) ++ goto err_exit; + -+ _ump_ukk_lock(&user_interaction); ++ kargs.register_writes = kargs.register_writes - ++ (uintptr_t)buffer + (uintptr_t)user_buffer; ++ kargs.page_table_dump = kargs.page_table_dump - ++ (uintptr_t)buffer + (uintptr_t)user_buffer; + -+ user_interaction.ctx = NULL; ++ if (0 != copy_to_user(uargs, &kargs, sizeof(kargs))) ++ goto err_exit; + -+#if 0 /* No data to copy back */ -+ if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) { -+ MSG_ERR(("copy_to_user() failed in ump_ioctl_switch_hw_usage()\n")); -+ return -EFAULT; -+ } -+#endif ++ rc = 0; + -+ return 0; /* success */ ++err_exit: ++ if (buffer) _mali_osk_vfree(buffer); ++ return rc; +} + -+int ump_unlock_wrapper(u32 __user *argument, struct ump_session_data *session_data) ++int mem_usage_get_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_memory_usage_get_s __user *uargs) +{ -+ _ump_uk_unlock_s user_interaction; ++ _mali_osk_errcode_t err; ++ _mali_uk_profiling_memory_usage_get_s kargs; + -+ /* Sanity check input parameters */ -+ if (NULL == argument || NULL == session_data) { -+ MSG_ERR(("NULL parameter in ump_ioctl_size_get()\n")); -+ return -ENOTTY; -+ } ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session_data, -EINVAL); + -+ if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) { -+ MSG_ERR(("copy_from_user() in ump_ioctl_switch_hw_usage()\n")); ++ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_memory_usage_get_s))) { + return -EFAULT; + } + -+ user_interaction.ctx = (void *) session_data; -+ -+ _ump_ukk_unlock(&user_interaction); -+ -+ user_interaction.ctx = NULL; ++ kargs.ctx = (uintptr_t)session_data; ++ err = _mali_ukk_mem_usage_get(&kargs); ++ if (_MALI_OSK_ERR_OK != err) { ++ return map_errcode(err); ++ } + -+#if 0 /* No data to copy back */ -+ if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) { -+ MSG_ERR(("copy_to_user() failed in ump_ioctl_switch_hw_usage()\n")); ++ kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */ ++ if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_profiling_memory_usage_get_s))) { + return -EFAULT; + } -+#endif + -+ return 0; /* success */ ++ return 0; +} -diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_ukk_wrappers.h b/drivers/gpu/arm/mali400/ump/linux/ump_ukk_wrappers.h ++ +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_ukk_pp.c b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_pp.c new file mode 100644 -index 000000000..5f8fc683c +index 000000000..a9b0958c0 --- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/linux/ump_ukk_wrappers.h -@@ -0,0 +1,46 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_pp.c +@@ -0,0 +1,105 @@ +/* + * Copyright (C) 2010, 2012-2014, 2016-2017 ARM Limited. All rights reserved. + * @@ -331532,160 +332974,110 @@ index 000000000..5f8fc683c + * A copy of the licence is included with the program, and can also be obtained from Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ ++#include /* file system operations */ ++#include /* user space access */ + -+/** -+ * @file ump_ukk_wrappers.h -+ * Defines the wrapper functions which turn Linux IOCTL calls into _ukk_ calls -+ */ ++#include "mali_ukk.h" ++#include "mali_osk.h" ++#include "mali_kernel_common.h" ++#include "mali_session.h" ++#include "mali_ukk_wrappers.h" + -+#ifndef __UMP_UKK_WRAPPERS_H__ -+#define __UMP_UKK_WRAPPERS_H__ ++int pp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_start_job_s __user *uargs) ++{ ++ _mali_osk_errcode_t err; + -+#include -+#include "ump_kernel_common.h" ++ /* If the job was started successfully, 0 is returned. If there was an error, but the job ++ * was started, we return -ENOENT. For anything else returned, the job was not started. */ + -+#ifdef __cplusplus -+extern "C" { -+#endif ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session_data, -EINVAL); + ++ err = _mali_ukk_pp_start_job(session_data, uargs); ++ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); + ++ return 0; ++} + -+int ump_get_api_version_wrapper(u32 __user *argument, struct ump_session_data *session_data); -+int ump_release_wrapper(u32 __user *argument, struct ump_session_data *session_data); -+int ump_size_get_wrapper(u32 __user *argument, struct ump_session_data *session_data); -+int ump_msync_wrapper(u32 __user *argument, struct ump_session_data *session_data); -+int ump_cache_operations_control_wrapper(u32 __user *argument, struct ump_session_data *session_data); -+int ump_switch_hw_usage_wrapper(u32 __user *argument, struct ump_session_data *session_data); -+int ump_lock_wrapper(u32 __user *argument, struct ump_session_data *session_data); -+int ump_unlock_wrapper(u32 __user *argument, struct ump_session_data *session_data); ++int pp_and_gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_and_gp_start_job_s __user *uargs) ++{ ++ _mali_osk_errcode_t err; + ++ /* If the jobs were started successfully, 0 is returned. If there was an error, but the ++ * jobs were started, we return -ENOENT. For anything else returned, the jobs were not ++ * started. */ + ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session_data, -EINVAL); + ++ err = _mali_ukk_pp_and_gp_start_job(session_data, uargs); ++ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); + -+#ifdef __cplusplus ++ return 0; +} -+#endif + ++int pp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_number_of_cores_s __user *uargs) ++{ ++ _mali_uk_get_pp_number_of_cores_s kargs; ++ _mali_osk_errcode_t err; + ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session_data, -EINVAL); + -+#endif /* __UMP_UKK_WRAPPERS_H__ */ -diff --git a/drivers/gpu/arm/mali400/ump/readme.txt b/drivers/gpu/arm/mali400/ump/readme.txt -new file mode 100755 -index 000000000..c238cf0f2 ---- /dev/null -+++ b/drivers/gpu/arm/mali400/ump/readme.txt -@@ -0,0 +1,28 @@ -+Building the UMP Device Driver for Linux -+---------------------------------------- ++ kargs.ctx = (uintptr_t)session_data; + -+Build the UMP Device Driver for Linux by running the following make command: ++ err = _mali_ukk_get_pp_number_of_cores(&kargs); ++ if (_MALI_OSK_ERR_OK != err) { ++ return map_errcode(err); ++ } + -+KDIR= CONFIG= BUILD= make -+ -+where -+ kdir_path: Path to your Linux Kernel directory -+ your_config: Name of the sub-folder to find the required config.h file -+ ("arch-" will be prepended) -+ build_option: debug or release. Debug is default. -+ -+The config.h contains following configuration parameters: -+ -+ARCH_UMP_BACKEND_DEFAULT -+ 0 specifies the dedicated memory allocator. -+ 1 specifies the OS memory allocator. -+ARCH_UMP_MEMORY_ADDRESS_DEFAULT -+ This is only required for the dedicated memory allocator, and specifies -+ the physical start address of the memory block reserved for UMP. -+ARCH_UMP_MEMORY_SIZE_DEFAULT -+ This specified the size of the memory block reserved for UMP, or the -+ maximum limit for allocations from the OS. -+ -+The result will be a ump.ko file, which can be loaded into the Linux kernel -+by using the insmod command. The driver can also be built as a part of the -+kernel itself. -diff --git a/drivers/gpu/arm/mali400/umplock/Makefile b/drivers/gpu/arm/mali400/umplock/Makefile -new file mode 100644 -index 000000000..e5549a33f ---- /dev/null -+++ b/drivers/gpu/arm/mali400/umplock/Makefile -@@ -0,0 +1,69 @@ -+# -+# Copyright (C) 2012, 2016-2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the GNU General Public License version 2 -+# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained from Free Software -+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -+# -+ -+# default to building for the host -+ARCH ?= $(shell uname -m) -+ -+# linux build system integration -+ -+ifneq ($(KERNELRELEASE),) -+# Inside the kernel build system -+ -+EXTRA_CFLAGS += -I$(KBUILD_EXTMOD) -+ -+SRC = umplock_driver.c -+ -+MODULE:=umplock.ko -+ -+obj-m := $(MODULE:.ko=.o) -+$(MODULE:.ko=-y) := $(SRC:.c=.o) -+ -+$(MODULE:.ko=-objs) := $(SRC:.c=.o) ++ kargs.ctx = (uintptr_t)NULL; /* prevent kernel address to be returned to user space */ ++ if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_get_pp_number_of_cores_s))) { ++ return -EFAULT; ++ } + -+else -+# Outside the kernel build system -+# -+# ++ return 0; ++} + -+# Get any user defined KDIR- or maybe even a hardcoded KDIR -+-include KDIR_CONFIGURATION ++int pp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_core_version_s __user *uargs) ++{ ++ _mali_uk_get_pp_core_version_s kargs; ++ _mali_osk_errcode_t err; + -+# Define host system directory -+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session_data, -EINVAL); + -+ifeq ($(ARCH), arm) -+ # when compiling for ARM we're cross compiling -+ export CROSS_COMPILE ?= arm-none-linux-gnueabi- -+ CONFIG ?= arm -+else -+ # Compiling for the host -+ CONFIG ?= $(shell uname -m) -+endif ++ kargs.ctx = (uintptr_t)session_data; ++ err = _mali_ukk_get_pp_core_version(&kargs); ++ if (_MALI_OSK_ERR_OK != err) return map_errcode(err); + -+# default cpu to select -+CPU ?= $(shell uname -m) ++ if (0 != put_user(kargs.version, &uargs->version)) return -EFAULT; + -+# look up KDIR based om CPU selection -+KDIR ?= $(KDIR-$(CPU)) ++ return 0; ++} + -+ifeq ($(KDIR),) -+$(error No KDIR found for platform $(CPU)) -+endif ++int pp_disable_wb_wrapper(struct mali_session_data *session_data, _mali_uk_pp_disable_wb_s __user *uargs) ++{ ++ _mali_uk_pp_disable_wb_s kargs; + -+all: -+ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session_data, -EINVAL); + -+kernelrelease: -+ $(MAKE) -C $(KDIR) kernelrelease ++ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_pp_disable_wb_s))) return -EFAULT; + -+clean: -+ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean ++ kargs.ctx = (uintptr_t)session_data; ++ _mali_ukk_pp_job_disable_wb(&kargs); + -+endif -diff --git a/drivers/gpu/arm/mali400/umplock/umplock_driver.c b/drivers/gpu/arm/mali400/umplock/umplock_driver.c ++ return 0; ++} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_ukk_profiling.c b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_profiling.c new file mode 100644 -index 000000000..173f4d9bb +index 000000000..8b49ebc50 --- /dev/null -+++ b/drivers/gpu/arm/mali400/umplock/umplock_driver.c -@@ -0,0 +1,618 @@ ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_profiling.c +@@ -0,0 +1,183 @@ +/* -+ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -331693,623 +333085,511 @@ index 000000000..173f4d9bb + * A copy of the licence is included with the program, and can also be obtained from Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ -+ -+#include -+#include -+#include ++#include /* file system operations */ ++#include /* user space access */ +#include -+#include -+#include -+#include -+#include "umplock_ioctl.h" -+#include -+ -+#define MAX_ITEMS 1024 -+#define MAX_PIDS 128 -+ -+typedef struct lock_cmd_priv { -+ uint32_t msg[128]; /*ioctl args*/ -+ u32 pid; /*process id*/ -+} _lock_cmd_priv; -+ -+typedef struct lock_ref { -+ int ref_count; -+ u32 pid; -+ u32 down_count; -+} _lock_ref; -+ -+typedef struct umplock_item { -+ u32 secure_id; -+ u32 id_ref_count; -+ u32 owner; -+ _lock_access_usage usage; -+ _lock_ref references[MAX_PIDS]; -+ struct semaphore item_lock; -+} umplock_item; -+ -+typedef struct umplock_device_private { -+ struct mutex item_list_lock; -+ atomic_t sessions; -+ umplock_item items[MAX_ITEMS]; -+ u32 pids[MAX_PIDS]; -+} umplock_device_private; + -+struct umplock_device { -+ struct cdev cdev; -+ struct class *umplock_class; -+}; ++#include "mali_ukk.h" ++#include "mali_osk.h" ++#include "mali_kernel_common.h" ++#include "mali_session.h" ++#include "mali_ukk_wrappers.h" + -+static struct umplock_device umplock_device; -+static umplock_device_private device; -+static dev_t umplock_dev; -+static char umplock_dev_name[] = "umplock"; ++int profiling_add_event_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_add_event_s __user *uargs) ++{ ++ _mali_uk_profiling_add_event_s kargs; ++ _mali_osk_errcode_t err; + -+int umplock_debug_level = 0; -+module_param(umplock_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */ -+MODULE_PARM_DESC(umplock_debug_level, "set umplock_debug_level to print debug messages"); ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); + -+#define PDEBUG(level, fmt, args...) do { if ((level) <= umplock_debug_level) printk(KERN_DEBUG "umplock: " fmt, ##args); } while (0) -+#define PERROR(fmt, args...) do { printk(KERN_ERR "umplock: " fmt, ##args); } while (0) ++ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_add_event_s))) { ++ return -EFAULT; ++ } + -+int umplock_find_item(u32 secure_id) -+{ -+ int i; -+ for (i = 0; i < MAX_ITEMS; i++) { -+ if (device.items[i].secure_id == secure_id) { -+ return i; -+ } ++ kargs.ctx = (uintptr_t)session_data; ++ err = _mali_ukk_profiling_add_event(&kargs); ++ if (_MALI_OSK_ERR_OK != err) { ++ return map_errcode(err); + } + -+ return -1; ++ return 0; +} + -+static int umplock_find_item_by_pid(_lock_cmd_priv *lock_cmd, int *item_slot, int *ref_slot) ++int profiling_report_sw_counters_wrapper(struct mali_session_data *session_data, _mali_uk_sw_counters_report_s __user *uargs) +{ -+ _lock_item_s *lock_item; -+ int i, j; ++ _mali_uk_sw_counters_report_s kargs; ++ _mali_osk_errcode_t err; ++ u32 *counter_buffer; ++ u32 __user *counters; + -+ lock_item = (_lock_item_s *)&lock_cmd->msg; ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); + -+ i = umplock_find_item(lock_item->secure_id); ++ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_sw_counters_report_s))) { ++ return -EFAULT; ++ } + -+ if (i < 0) { -+ return -1; ++ /* make sure that kargs.num_counters is [at least somewhat] sane */ ++ if (kargs.num_counters > 10000) { ++ MALI_DEBUG_PRINT(1, ("User space attempted to allocate too many counters.\n")); ++ return -EINVAL; + } + -+ for (j = 0; j < MAX_PIDS; j++) { -+ if (device.items[i].references[j].pid == lock_cmd->pid) { -+ *item_slot = i; -+ *ref_slot = j; -+ return 0; -+ } ++ counter_buffer = (u32 *)kmalloc(sizeof(u32) * kargs.num_counters, GFP_KERNEL); ++ if (NULL == counter_buffer) { ++ return -ENOMEM; + } -+ return -1 ; -+} + -+static int umplock_find_client_valid(u32 pid) -+{ -+ int i; ++ counters = (u32 *)(uintptr_t)kargs.counters; + -+ if (pid == 0) { -+ return -1; ++ if (0 != copy_from_user(counter_buffer, counters, sizeof(u32) * kargs.num_counters)) { ++ kfree(counter_buffer); ++ return -EFAULT; + } + -+ for (i = 0; i < MAX_PIDS; i++) { -+ if (device.pids[i] == pid) { -+ return i; -+ } ++ kargs.ctx = (uintptr_t)session_data; ++ kargs.counters = (uintptr_t)counter_buffer; ++ ++ err = _mali_ukk_sw_counters_report(&kargs); ++ ++ kfree(counter_buffer); ++ ++ if (_MALI_OSK_ERR_OK != err) { ++ return map_errcode(err); + } + -+ return -1; ++ return 0; +} + -+static int do_umplock_create_locked(_lock_cmd_priv *lock_cmd) ++int profiling_get_stream_fd_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_stream_fd_get_s __user *uargs) +{ -+ int i_index, ref_index; -+ int ret; -+ _lock_item_s *lock_item = (_lock_item_s *)&lock_cmd->msg; ++ _mali_uk_profiling_stream_fd_get_s kargs; ++ _mali_osk_errcode_t err; + -+ i_index = ref_index = -1; ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); + -+ ret = umplock_find_client_valid(lock_cmd->pid); -+ if (ret < 0) { -+ /*lock request from an invalid client pid, do nothing*/ -+ return -EINVAL; ++ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_profiling_stream_fd_get_s))) { ++ return -EFAULT; + } + -+ ret = umplock_find_item_by_pid(lock_cmd, &i_index, &ref_index); -+ if (ret >= 0) { -+ } else if ((i_index = umplock_find_item(lock_item->secure_id)) >= 0) { -+ for (ref_index = 0; ref_index < MAX_PIDS; ref_index++) { -+ if (device.items[i_index].references[ref_index].pid == 0) { -+ break; -+ } -+ } -+ if (ref_index < MAX_PIDS) { -+ device.items[i_index].references[ref_index].pid = lock_cmd->pid; -+ device.items[i_index].references[ref_index].ref_count = 0; -+ device.items[i_index].references[ref_index].down_count = 0; -+ } else { -+ PERROR("whoops, item ran out of available reference slots\n"); -+ return -EINVAL; -+ -+ } -+ } else { -+ i_index = umplock_find_item(0); ++ kargs.ctx = (uintptr_t)session_data; ++ err = _mali_ukk_profiling_stream_fd_get(&kargs); ++ if (_MALI_OSK_ERR_OK != err) { ++ return map_errcode(err); ++ } + -+ if (i_index >= 0) { -+ device.items[i_index].secure_id = lock_item->secure_id; -+ device.items[i_index].id_ref_count = 0; -+ device.items[i_index].usage = lock_item->usage; -+ device.items[i_index].references[0].pid = lock_cmd->pid; -+ device.items[i_index].references[0].ref_count = 0; -+ device.items[i_index].references[0].down_count = 0; -+ sema_init(&device.items[i_index].item_lock, 1); -+ } else { -+ PERROR("whoops, ran out of available slots\n"); -+ return -EINVAL; -+ } ++ if (0 != copy_to_user(uargs, &kargs, sizeof(_mali_uk_profiling_stream_fd_get_s))) { ++ return -EFAULT; + } + + return 0; +} -+/** IOCTLs **/ + -+static int do_umplock_create(_lock_cmd_priv *lock_cmd) ++int profiling_control_set_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_control_set_s __user *uargs) +{ -+ return 0; -+} ++ _mali_uk_profiling_control_set_s kargs; ++ _mali_osk_errcode_t err; ++ u8 *kernel_control_data = NULL; ++ u8 *kernel_response_data = NULL; + -+static int do_umplock_process(_lock_cmd_priv *lock_cmd) -+{ -+ int ret, i_index, ref_index; -+ _lock_item_s *lock_item = (_lock_item_s *)&lock_cmd->msg; ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); + -+ mutex_lock(&device.item_list_lock); ++ if (0 != get_user(kargs.control_packet_size, &uargs->control_packet_size)) return -EFAULT; ++ if (0 != get_user(kargs.response_packet_size, &uargs->response_packet_size)) return -EFAULT; + -+ if (0 == lock_item->secure_id) { -+ PERROR("IOCTL_UMPLOCK_PROCESS called with secure_id is 0, pid: %d\n", lock_cmd->pid); -+ mutex_unlock(&device.item_list_lock); -+ return -EINVAL; -+ } ++ kargs.ctx = (uintptr_t)session_data; + -+ ret = do_umplock_create_locked(lock_cmd); -+ if (ret < 0) { -+ mutex_unlock(&device.item_list_lock); -+ return -EINVAL; -+ } + -+ ret = umplock_find_item_by_pid(lock_cmd, &i_index, &ref_index); -+ if (ret < 0) { -+ /*fail to find a item*/ -+ PERROR("IOCTL_UMPLOCK_PROCESS called with invalid parameter, pid: %d\n", lock_cmd->pid); -+ mutex_unlock(&device.item_list_lock); ++ /* Sanity check about the size */ ++ if (kargs.control_packet_size > PAGE_SIZE || kargs.response_packet_size > PAGE_SIZE) + return -EINVAL; -+ } -+ device.items[i_index].references[ref_index].ref_count++; -+ device.items[i_index].id_ref_count++; -+ PDEBUG(1, "try to lock, pid: %d, secure_id: 0x%x, ref_count: %d\n", lock_cmd->pid, lock_item->secure_id, device.items[i_index].references[ref_index].ref_count); + -+ if (lock_cmd->pid == device.items[i_index].owner) { -+ PDEBUG(1, "already own the lock, pid: %d, secure_id: 0x%x, ref_count: %d\n", lock_cmd->pid, lock_item->secure_id, device.items[i_index].references[ref_index].ref_count); -+ mutex_unlock(&device.item_list_lock); -+ return 0; -+ } ++ if (0 != kargs.control_packet_size) { + -+ device.items[i_index].references[ref_index].down_count++; -+ mutex_unlock(&device.item_list_lock); -+ if (down_interruptible(&device.items[i_index].item_lock)) { -+ /*wait up without hold the umplock. restore previous state and return*/ -+ mutex_lock(&device.item_list_lock); -+ device.items[i_index].references[ref_index].ref_count--; -+ device.items[i_index].id_ref_count--; -+ device.items[i_index].references[ref_index].down_count--; -+ if (0 == device.items[i_index].references[ref_index].ref_count) { -+ device.items[i_index].references[ref_index].pid = 0; -+ if (0 == device.items[i_index].id_ref_count) { -+ PDEBUG(1, "release item, pid: %d, secure_id: 0x%x\n", lock_cmd->pid, lock_item->secure_id); -+ device.items[i_index].secure_id = 0; -+ } ++ if (0 == kargs.response_packet_size) ++ return -EINVAL; ++ ++ kernel_control_data = _mali_osk_calloc(1, kargs.control_packet_size); ++ if (NULL == kernel_control_data) { ++ return -ENOMEM; + } + -+ PERROR("failed lock, pid: %d, secure_id: 0x%x, ref_count: %d\n", lock_cmd->pid, lock_item->secure_id, device.items[i_index].references[ref_index].ref_count); ++ kernel_response_data = _mali_osk_calloc(1, kargs.response_packet_size); ++ if (NULL == kernel_response_data) { ++ _mali_osk_free(kernel_control_data); ++ return -ENOMEM; ++ } + -+ mutex_unlock(&device.item_list_lock); -+ return -ERESTARTSYS; -+ } ++ kargs.control_packet_data = (uintptr_t)kernel_control_data; ++ kargs.response_packet_data = (uintptr_t)kernel_response_data; + -+ mutex_lock(&device.item_list_lock); -+ PDEBUG(1, "got lock, pid: %d, secure_id: 0x%x, ref_count: %d\n", lock_cmd->pid, lock_item->secure_id, device.items[i_index].references[ref_index].ref_count); -+ device.items[i_index].owner = lock_cmd->pid; -+ mutex_unlock(&device.item_list_lock); ++ if (0 != copy_from_user((void *)(uintptr_t)kernel_control_data, (void *)(uintptr_t)uargs->control_packet_data, kargs.control_packet_size)) { ++ _mali_osk_free(kernel_control_data); ++ _mali_osk_free(kernel_response_data); ++ return -EFAULT; ++ } ++ ++ err = _mali_ukk_profiling_control_set(&kargs); ++ if (_MALI_OSK_ERR_OK != err) { ++ _mali_osk_free(kernel_control_data); ++ _mali_osk_free(kernel_response_data); ++ return map_errcode(err); ++ } ++ ++ if (0 != kargs.response_packet_size && 0 != copy_to_user(((void *)(uintptr_t)uargs->response_packet_data), ((void *)(uintptr_t)kargs.response_packet_data), kargs.response_packet_size)) { ++ _mali_osk_free(kernel_control_data); ++ _mali_osk_free(kernel_response_data); ++ return -EFAULT; ++ } ++ ++ if (0 != put_user(kargs.response_packet_size, &uargs->response_packet_size)) { ++ _mali_osk_free(kernel_control_data); ++ _mali_osk_free(kernel_response_data); ++ return -EFAULT; ++ } + ++ _mali_osk_free(kernel_control_data); ++ _mali_osk_free(kernel_response_data); ++ } else { ++ ++ err = _mali_ukk_profiling_control_set(&kargs); ++ if (_MALI_OSK_ERR_OK != err) { ++ return map_errcode(err); ++ } ++ ++ } + return 0; +} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_ukk_soft_job.c b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_soft_job.c +new file mode 100644 +index 000000000..1dd4a7c6f +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_soft_job.c +@@ -0,0 +1,90 @@ ++/* ++ * Copyright (C) 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ ++#include /* file system operations */ ++#include /* user space access */ + -+static int do_umplock_release(_lock_cmd_priv *lock_cmd) ++#include "mali_ukk.h" ++#include "mali_osk.h" ++#include "mali_kernel_common.h" ++#include "mali_session.h" ++#include "mali_ukk_wrappers.h" ++ ++#include "mali_soft_job.h" ++#include "mali_timeline.h" ++ ++int soft_job_start_wrapper(struct mali_session_data *session, _mali_uk_soft_job_start_s __user *uargs) +{ -+ int ret, i_index, ref_index, call_up; -+ _lock_item_s *lock_item = (_lock_item_s *)&lock_cmd->msg; ++ _mali_uk_soft_job_start_s kargs; ++ u32 type, point; ++ u64 user_job; ++ struct mali_timeline_fence fence; ++ struct mali_soft_job *job = NULL; ++ u32 __user *job_id_ptr = NULL; + -+ mutex_lock(&device.item_list_lock); ++ /* If the job was started successfully, 0 is returned. If there was an error, but the job ++ * was started, we return -ENOENT. For anything else returned, the job was not started. */ + -+ if (0 == lock_item->secure_id) { -+ PERROR("IOCTL_UMPLOCK_RELEASE called with secure_id is 0, pid: %d\n", lock_cmd->pid); -+ mutex_unlock(&device.item_list_lock); -+ return -EINVAL; -+ } ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); ++ MALI_CHECK_NON_NULL(session, -EINVAL); + -+ ret = umplock_find_client_valid(lock_cmd->pid); -+ if (ret < 0) { -+ /*lock request from an invalid client pid, do nothing*/ -+ mutex_unlock(&device.item_list_lock); -+ return -EPERM; ++ MALI_DEBUG_ASSERT_POINTER(session->soft_job_system); ++ ++ if (0 != copy_from_user(&kargs, uargs, sizeof(kargs))) { ++ return -EFAULT; + } + -+ i_index = ref_index = -1; ++ type = kargs.type; ++ user_job = kargs.user_job; ++ job_id_ptr = (u32 __user *)(uintptr_t)kargs.job_id_ptr; + -+ ret = umplock_find_item_by_pid(lock_cmd, &i_index, &ref_index); -+ if (ret < 0) { -+ /*fail to find item*/ -+ PERROR("IOCTL_UMPLOCK_RELEASE called with invalid parameter pid: %d, secid: 0x%x\n", lock_cmd->pid, lock_item->secure_id); -+ mutex_unlock(&device.item_list_lock); ++ mali_timeline_fence_copy_uk_fence(&fence, &kargs.fence); ++ ++ if ((MALI_SOFT_JOB_TYPE_USER_SIGNALED != type) && (MALI_SOFT_JOB_TYPE_SELF_SIGNALED != type)) { ++ MALI_DEBUG_PRINT_ERROR(("Invalid soft job type specified\n")); + return -EINVAL; + } + -+ /* if the lock is not owned by this process */ -+ if (lock_cmd->pid != device.items[i_index].owner) { -+ mutex_unlock(&device.item_list_lock); -+ return -EPERM; ++ /* Create soft job. */ ++ job = mali_soft_job_create(session->soft_job_system, (enum mali_soft_job_type)type, user_job); ++ if (unlikely(NULL == job)) { ++ return map_errcode(_MALI_OSK_ERR_NOMEM); + } + -+ /* if the ref_count is 0, that means nothing to unlock, just return */ -+ if (0 == device.items[i_index].references[ref_index].ref_count) { -+ mutex_unlock(&device.item_list_lock); -+ return 0; ++ /* Write job id back to user space. */ ++ if (0 != put_user(job->id, job_id_ptr)) { ++ MALI_PRINT_ERROR(("Mali Soft Job: failed to put job id")); ++ mali_soft_job_destroy(job); ++ return map_errcode(_MALI_OSK_ERR_NOMEM); + } + -+ device.items[i_index].references[ref_index].ref_count--; -+ device.items[i_index].id_ref_count--; -+ PDEBUG(1, "unlock, pid: %d, secure_id: 0x%x, ref_count: %d\n", lock_cmd->pid, lock_item->secure_id, device.items[i_index].references[ref_index].ref_count); ++ /* Start soft job. */ ++ point = mali_soft_job_start(job, &fence); + -+ call_up = 0; -+ if (device.items[i_index].references[ref_index].down_count > 1) { -+ call_up = 1; -+ device.items[i_index].references[ref_index].down_count--; -+ } -+ if (0 == device.items[i_index].references[ref_index].ref_count) { -+ device.items[i_index].references[ref_index].pid = 0; -+ if (0 == device.items[i_index].id_ref_count) { -+ PDEBUG(1, "release item, pid: %d, secure_id: 0x%x\n", lock_cmd->pid, lock_item->secure_id); -+ device.items[i_index].secure_id = 0; -+ } -+ device.items[i_index].owner = 0; -+ call_up = 1; -+ } -+ if (call_up) { -+ PDEBUG(1, "call up, pid: %d, secure_id: 0x%x\n", lock_cmd->pid, lock_item->secure_id); -+ up(&device.items[i_index].item_lock); ++ if (0 != put_user(point, &uargs->point)) { ++ /* Let user space know that something failed after the job was started. */ ++ return -ENOENT; + } -+ mutex_unlock(&device.item_list_lock); + + return 0; +} + -+static int do_umplock_zap(void) ++int soft_job_signal_wrapper(struct mali_session_data *session, _mali_uk_soft_job_signal_s __user *uargs) +{ -+ int i; -+ -+ PDEBUG(1, "ZAP ALL ENTRIES!\n"); ++ u32 job_id; ++ _mali_osk_errcode_t err; + -+ mutex_lock(&device.item_list_lock); ++ MALI_DEBUG_ASSERT_POINTER(session); + -+ for (i = 0; i < MAX_ITEMS; i++) { -+ device.items[i].secure_id = 0; -+ memset(&device.items[i].references, 0, sizeof(_lock_ref) * MAX_PIDS); -+ sema_init(&device.items[i].item_lock, 1); -+ } ++ if (0 != get_user(job_id, &uargs->job_id)) return -EFAULT; + -+ for (i = 0; i < MAX_PIDS; i++) { -+ device.pids[i] = 0; -+ } -+ mutex_unlock(&device.item_list_lock); ++ err = mali_soft_job_system_signal_job(session->soft_job_system, job_id); + -+ return 0; ++ return map_errcode(err); +} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_ukk_timeline.c b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_timeline.c +new file mode 100644 +index 000000000..ff0c90939 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_timeline.c +@@ -0,0 +1,88 @@ ++/* ++ * Copyright (C) 2013, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ ++#include /* file system operations */ ++#include /* user space access */ + -+static int do_umplock_dump(void) -+{ -+ int i, j; -+ -+ mutex_lock(&device.item_list_lock); -+ PERROR("dump all the items begin\n"); -+ for (i = 0; i < MAX_ITEMS; i++) { -+ for (j = 0; j < MAX_PIDS; j++) { -+ if (device.items[i].secure_id != 0 && device.items[i].references[j].pid != 0) { -+ PERROR("item[%d]->secure_id=0x%x, owner=%d\t reference[%d].ref_count=%d.pid=%d\n", -+ i, -+ device.items[i].secure_id, -+ device.items[i].owner, -+ j, -+ device.items[i].references[j].ref_count, -+ device.items[i].references[j].pid); -+ } -+ } -+ } -+ PERROR("dump all the items end\n"); -+ mutex_unlock(&device.item_list_lock); -+ -+ return 0; -+} ++#include "mali_ukk.h" ++#include "mali_osk.h" ++#include "mali_kernel_common.h" ++#include "mali_session.h" ++#include "mali_ukk_wrappers.h" + -+int do_umplock_client_add(_lock_cmd_priv *lock_cmd) -+{ -+ int i; -+ mutex_lock(&device.item_list_lock); -+ for (i = 0; i < MAX_PIDS; i++) { -+ if (device.pids[i] == lock_cmd->pid) { -+ mutex_unlock(&device.item_list_lock); -+ return 0; -+ } -+ } -+ for (i = 0; i < MAX_PIDS; i++) { -+ if (device.pids[i] == 0) { -+ device.pids[i] = lock_cmd->pid; -+ break; -+ } -+ } -+ mutex_unlock(&device.item_list_lock); -+ if (i == MAX_PIDS) { -+ PERROR("Oops, Run out of client slots\n "); -+ return -EINVAL; -+ } -+ return 0; -+} ++#include "mali_timeline.h" ++#include "mali_timeline_fence_wait.h" ++#include "mali_timeline_sync_fence.h" + -+int do_umplock_client_delete(_lock_cmd_priv *lock_cmd) ++int timeline_get_latest_point_wrapper(struct mali_session_data *session, _mali_uk_timeline_get_latest_point_s __user *uargs) +{ -+ int p_index = -1, i_index = -1, ref_index = -1; -+ int ret; -+ _lock_item_s *lock_item; -+ lock_item = (_lock_item_s *)&lock_cmd->msg; -+ -+ mutex_lock(&device.item_list_lock); -+ p_index = umplock_find_client_valid(lock_cmd->pid); -+ /*lock item pid is not valid.*/ -+ if (p_index < 0) { -+ mutex_unlock(&device.item_list_lock); -+ return 0; -+ } ++ u32 val; ++ mali_timeline_id timeline; ++ mali_timeline_point point; + -+ /*walk through umplock item list and release reference attached to this client*/ -+ for (i_index = 0; i_index < MAX_ITEMS; i_index++) { -+ lock_item->secure_id = device.items[i_index].secure_id; ++ MALI_DEBUG_ASSERT_POINTER(session); + -+ /*find the item index and reference slot for the lock_item*/ -+ ret = umplock_find_item_by_pid(lock_cmd, &i_index, &ref_index); ++ if (0 != get_user(val, &uargs->timeline)) return -EFAULT; + -+ if (ret < 0) { -+ /*client has no reference on this umplock item, skip*/ -+ continue; -+ } -+ while (device.items[i_index].references[ref_index].ref_count) { -+ /*release references on this client*/ ++ if (MALI_UK_TIMELINE_MAX <= val) { ++ return -EINVAL; ++ } + -+ PDEBUG(1, "delete client, pid: %d, ref_count: %d\n", lock_cmd->pid, device.items[i_index].references[ref_index].ref_count); ++ timeline = (mali_timeline_id)val; + -+ mutex_unlock(&device.item_list_lock); -+ do_umplock_release(lock_cmd); -+ mutex_lock(&device.item_list_lock); -+ } -+ } ++ point = mali_timeline_system_get_latest_point(session->timeline_system, timeline); + -+ /*remove the pid from umplock valid pid list*/ -+ device.pids[p_index] = 0; -+ mutex_unlock(&device.item_list_lock); ++ if (0 != put_user(point, &uargs->point)) return -EFAULT; + + return 0; +} + -+static long umplock_driver_ioctl(struct file *f, unsigned int cmd, unsigned long arg) ++int timeline_wait_wrapper(struct mali_session_data *session, _mali_uk_timeline_wait_s __user *uargs) +{ -+ int ret; -+ uint32_t size = _IOC_SIZE(cmd); -+ _lock_cmd_priv lock_cmd ; -+ -+ if (_IOC_TYPE(cmd) != LOCK_IOCTL_GROUP) { -+ return -ENOTTY; -+ } -+ -+ if (_IOC_NR(cmd) >= LOCK_IOCTL_MAX_CMDS) { -+ return -ENOTTY; -+ } -+ -+ switch (cmd) { -+ case LOCK_IOCTL_CREATE: -+ if (size != sizeof(_lock_item_s)) { -+ return -ENOTTY; -+ } -+ -+ if (copy_from_user(&lock_cmd.msg, (void __user *)arg, size)) { -+ return -EFAULT; -+ } -+ lock_cmd.pid = (u32)current->tgid; -+ ret = do_umplock_create(&lock_cmd); -+ if (ret) { -+ return ret; -+ } -+ return 0; -+ -+ case LOCK_IOCTL_PROCESS: -+ if (size != sizeof(_lock_item_s)) { -+ return -ENOTTY; -+ } ++ u32 timeout, status; ++ mali_bool ret; ++ _mali_uk_fence_t uk_fence; ++ struct mali_timeline_fence fence; + -+ if (copy_from_user(&lock_cmd.msg, (void __user *)arg, size)) { -+ return -EFAULT; -+ } -+ lock_cmd.pid = (u32)current->tgid; -+ return do_umplock_process(&lock_cmd); ++ MALI_DEBUG_ASSERT_POINTER(session); + -+ case LOCK_IOCTL_RELEASE: -+ if (size != sizeof(_lock_item_s)) { -+ return -ENOTTY; -+ } ++ if (0 != copy_from_user(&uk_fence, &uargs->fence, sizeof(_mali_uk_fence_t))) return -EFAULT; ++ if (0 != get_user(timeout, &uargs->timeout)) return -EFAULT; + -+ if (copy_from_user(&lock_cmd.msg, (void __user *)arg, size)) { -+ return -EFAULT; -+ } -+ lock_cmd.pid = (u32)current->tgid; -+ ret = do_umplock_release(&lock_cmd); -+ if (ret) { -+ return ret; -+ } -+ return 0; ++ mali_timeline_fence_copy_uk_fence(&fence, &uk_fence); + -+ case LOCK_IOCTL_ZAP: -+ do_umplock_zap(); -+ return 0; ++ ret = mali_timeline_fence_wait(session->timeline_system, &fence, timeout); ++ status = (MALI_TRUE == ret ? 1 : 0); + -+ case LOCK_IOCTL_DUMP: -+ do_umplock_dump(); -+ return 0; -+ } ++ if (0 != put_user(status, &uargs->status)) return -EFAULT; + -+ return -ENOIOCTLCMD; ++ return 0; +} + -+static int umplock_driver_open(struct inode *inode, struct file *filp) ++int timeline_create_sync_fence_wrapper(struct mali_session_data *session, _mali_uk_timeline_create_sync_fence_s __user *uargs) +{ -+ _lock_cmd_priv lock_cmd; ++ s32 sync_fd = -1; ++ _mali_uk_fence_t uk_fence; ++ struct mali_timeline_fence fence; + -+ atomic_inc(&device.sessions); -+ PDEBUG(1, "OPEN SESSION (%i references)\n", atomic_read(&device.sessions)); ++ MALI_DEBUG_ASSERT_POINTER(session); + -+ lock_cmd.pid = (u32)current->tgid; -+ do_umplock_client_add(&lock_cmd); ++ if (0 != copy_from_user(&uk_fence, &uargs->fence, sizeof(_mali_uk_fence_t))) return -EFAULT; ++ mali_timeline_fence_copy_uk_fence(&fence, &uk_fence); ++ ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ sync_fd = mali_timeline_sync_fence_create(session->timeline_system, &fence); ++#else ++ sync_fd = -1; ++#endif /* defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) */ ++ ++ if (0 != put_user(sync_fd, &uargs->sync_fd)) return -EFAULT; + + return 0; +} +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_ukk_vsync.c b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_vsync.c +new file mode 100644 +index 000000000..52519d1f9 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_vsync.c +@@ -0,0 +1,39 @@ ++/* ++ * Copyright (C) 2011-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ ++#include /* file system operations */ ++#include /* user space access */ + -+static int umplock_driver_release(struct inode *inode, struct file *filp) ++#include "mali_ukk.h" ++#include "mali_osk.h" ++#include "mali_kernel_common.h" ++#include "mali_session.h" ++#include "mali_ukk_wrappers.h" ++ ++ ++int vsync_event_report_wrapper(struct mali_session_data *session_data, _mali_uk_vsync_event_report_s __user *uargs) +{ -+ int sessions = 0; -+ _lock_cmd_priv lock_cmd; ++ _mali_uk_vsync_event_report_s kargs; ++ _mali_osk_errcode_t err; + -+ lock_cmd.pid = (u32)current->tgid; -+ do_umplock_client_delete(&lock_cmd); ++ MALI_CHECK_NON_NULL(uargs, -EINVAL); + -+ mutex_lock(&device.item_list_lock); -+ atomic_dec(&device.sessions); -+ sessions = atomic_read(&device.sessions); -+ PDEBUG(1, "CLOSE SESSION (%i references)\n", sessions); -+ mutex_unlock(&device.item_list_lock); -+ if (sessions == 0) { -+ do_umplock_zap(); ++ if (0 != copy_from_user(&kargs, uargs, sizeof(_mali_uk_vsync_event_report_s))) { ++ return -EFAULT; ++ } ++ ++ kargs.ctx = (uintptr_t)session_data; ++ err = _mali_ukk_vsync_event_report(&kargs); ++ if (_MALI_OSK_ERR_OK != err) { ++ return map_errcode(err); + } + + return 0; +} + -+static struct file_operations umplock_fops = { -+ .owner = THIS_MODULE, -+ .open = umplock_driver_open, -+ .release = umplock_driver_release, -+ .unlocked_ioctl = umplock_driver_ioctl, -+}; -+ -+int umplock_device_initialize(void) -+{ -+ int err; -+ -+ err = alloc_chrdev_region(&umplock_dev, 0, 1, umplock_dev_name); -+ -+ if (0 == err) { -+ memset(&umplock_device, 0, sizeof(umplock_device)); -+ cdev_init(&umplock_device.cdev, &umplock_fops); -+ umplock_device.cdev.owner = THIS_MODULE; -+ umplock_device.cdev.ops = &umplock_fops; +diff --git a/drivers/gpu/arm/mali400/mali/linux/mali_ukk_wrappers.h b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_wrappers.h +new file mode 100644 +index 000000000..1add628fe +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/linux/mali_ukk_wrappers.h +@@ -0,0 +1,82 @@ ++/* ++ * Copyright (C) 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ err = cdev_add(&umplock_device.cdev, umplock_dev, 1); -+ if (0 == err) { -+ umplock_device.umplock_class = class_create(THIS_MODULE, umplock_dev_name); -+ if (IS_ERR(umplock_device.umplock_class)) { -+ err = PTR_ERR(umplock_device.umplock_class); -+ } else { -+ struct device *mdev; -+ mdev = device_create(umplock_device.umplock_class, NULL, umplock_dev, NULL, umplock_dev_name); -+ if (!IS_ERR(mdev)) { -+ return 0; /* all ok */ -+ } ++/** ++ * @file mali_ukk_wrappers.h ++ * Defines the wrapper functions for each user-kernel function ++ */ + -+ err = PTR_ERR(mdev); -+ class_destroy(umplock_device.umplock_class); -+ } -+ cdev_del(&umplock_device.cdev); -+ } ++#ifndef __MALI_UKK_WRAPPERS_H__ ++#define __MALI_UKK_WRAPPERS_H__ + -+ unregister_chrdev_region(umplock_dev, 1); -+ } else { -+ PERROR("alloc chardev region failed\n"); -+ } ++#include "mali_uk_types.h" ++#include "mali_osk.h" + -+ return err; -+} ++#ifdef __cplusplus ++extern "C" { ++#endif + -+void umplock_device_terminate(void) -+{ -+ device_destroy(umplock_device.umplock_class, umplock_dev); -+ class_destroy(umplock_device.umplock_class); ++int wait_for_notification_wrapper(struct mali_session_data *session_data, _mali_uk_wait_for_notification_s __user *uargs); ++int get_api_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_s __user *uargs); ++int get_api_version_v2_wrapper(struct mali_session_data *session_data, _mali_uk_get_api_version_v2_s __user *uargs); ++int get_user_settings_wrapper(struct mali_session_data *session_data, _mali_uk_get_user_settings_s __user *uargs); ++int post_notification_wrapper(struct mali_session_data *session_data, _mali_uk_post_notification_s __user *uargs); ++int request_high_priority_wrapper(struct mali_session_data *session_data, _mali_uk_request_high_priority_s __user *uargs); ++int pending_submit_wrapper(struct mali_session_data *session_data, _mali_uk_pending_submit_s __user *uargs); + -+ cdev_del(&umplock_device.cdev); -+ unregister_chrdev_region(umplock_dev, 1); -+} ++/* rk_ext : 从对 r5p0-01rel0 集æˆå¼€å§‹, ä¸å†ä½¿ç”¨. */ ++#if 0 ++int get_mali_version_in_rk30_wrapper(struct mali_session_data *session_data, _mali_uk_get_mali_version_in_rk30_s __user *uargs); ++#else ++int get_rk_ko_version_wrapper(struct mali_session_data *session_data, _mali_rk_ko_version_s __user *uargs); ++#endif + -+static int __init umplock_initialize_module(void) -+{ -+ PDEBUG(1, "Inserting UMP lock device driver. Compiled: %s, time: %s\n", __DATE__, __TIME__); ++int mem_alloc_wrapper(struct mali_session_data *session_data, _mali_uk_alloc_mem_s __user *uargs); ++int mem_free_wrapper(struct mali_session_data *session_data, _mali_uk_free_mem_s __user *uargs); ++int mem_bind_wrapper(struct mali_session_data *session_data, _mali_uk_bind_mem_s __user *uargs); ++int mem_unbind_wrapper(struct mali_session_data *session_data, _mali_uk_unbind_mem_s __user *uargs); ++int mem_cow_wrapper(struct mali_session_data *session_data, _mali_uk_cow_mem_s __user *uargs); ++int mem_cow_modify_range_wrapper(struct mali_session_data *session_data, _mali_uk_cow_modify_range_s __user *uargs); ++int mem_resize_mem_wrapper(struct mali_session_data *session_data, _mali_uk_mem_resize_s __user *uargs); ++int mem_write_safe_wrapper(struct mali_session_data *session_data, _mali_uk_mem_write_safe_s __user *uargs); ++int mem_query_mmu_page_table_dump_size_wrapper(struct mali_session_data *session_data, _mali_uk_query_mmu_page_table_dump_size_s __user *uargs); ++int mem_dump_mmu_page_table_wrapper(struct mali_session_data *session_data, _mali_uk_dump_mmu_page_table_s __user *uargs); ++int mem_usage_get_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_memory_usage_get_s __user *uargs); + -+ mutex_init(&device.item_list_lock); -+ if (umplock_device_initialize() != 0) { -+ PERROR("UMP lock device driver init failed\n"); -+ return -ENOTTY; -+ } -+ memset(&device.items, 0, sizeof(umplock_item) * MAX_ITEMS); -+ memset(&device.pids, 0, sizeof(u32) * MAX_PIDS); -+ atomic_set(&device.sessions, 0); ++int timeline_get_latest_point_wrapper(struct mali_session_data *session, _mali_uk_timeline_get_latest_point_s __user *uargs); ++int timeline_wait_wrapper(struct mali_session_data *session, _mali_uk_timeline_wait_s __user *uargs); ++int timeline_create_sync_fence_wrapper(struct mali_session_data *session, _mali_uk_timeline_create_sync_fence_s __user *uargs); ++int soft_job_start_wrapper(struct mali_session_data *session, _mali_uk_soft_job_start_s __user *uargs); ++int soft_job_signal_wrapper(struct mali_session_data *session, _mali_uk_soft_job_signal_s __user *uargs); ++int pp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_start_job_s __user *uargs); ++int pp_and_gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_pp_and_gp_start_job_s __user *uargs); ++int pp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_number_of_cores_s __user *uargs); ++int pp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_pp_core_version_s __user *uargs); ++int pp_disable_wb_wrapper(struct mali_session_data *session_data, _mali_uk_pp_disable_wb_s __user *uargs); ++int gp_start_job_wrapper(struct mali_session_data *session_data, _mali_uk_gp_start_job_s __user *uargs); ++int gp_get_number_of_cores_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_number_of_cores_s __user *uargs); ++int gp_get_core_version_wrapper(struct mali_session_data *session_data, _mali_uk_get_gp_core_version_s __user *uargs); ++int gp_suspend_response_wrapper(struct mali_session_data *session_data, _mali_uk_gp_suspend_response_s __user *uargs); + -+ PDEBUG(1, "UMP lock device driver loaded\n"); ++int profiling_add_event_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_add_event_s __user *uargs); ++int profiling_report_sw_counters_wrapper(struct mali_session_data *session_data, _mali_uk_sw_counters_report_s __user *uargs); ++int profiling_get_stream_fd_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_stream_fd_get_s __user *uargs); ++int profiling_control_set_wrapper(struct mali_session_data *session_data, _mali_uk_profiling_control_set_s __user *uargs); + -+ return 0; -+} ++int vsync_event_report_wrapper(struct mali_session_data *session_data, _mali_uk_vsync_event_report_s __user *uargs); + -+static void __exit umplock_cleanup_module(void) -+{ -+ PDEBUG(1, "unloading UMP lock module\n"); + -+ memset(&device.items, 0, sizeof(umplock_item) * MAX_ITEMS); -+ memset(&device.pids, 0, sizeof(u32) * MAX_PIDS); -+ umplock_device_terminate(); -+ mutex_destroy(&device.item_list_lock); ++int map_errcode(_mali_osk_errcode_t err); + -+ PDEBUG(1, "UMP lock module unloaded\n"); ++#ifdef __cplusplus +} ++#endif + -+module_init(umplock_initialize_module); -+module_exit(umplock_cleanup_module); -+ -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("ARM Ltd."); -+MODULE_DESCRIPTION("ARM UMP locker"); -diff --git a/drivers/gpu/arm/mali400/umplock/umplock_ioctl.h b/drivers/gpu/arm/mali400/umplock/umplock_ioctl.h ++#endif /* __MALI_UKK_WRAPPERS_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/platform/arm/arm.c b/drivers/gpu/arm/mali400/mali/platform/arm/arm.c new file mode 100644 -index 000000000..8afdaad70 +index 000000000..fc7017bbf --- /dev/null -+++ b/drivers/gpu/arm/mali400/umplock/umplock_ioctl.h -@@ -0,0 +1,66 @@ ++++ b/drivers/gpu/arm/mali400/mali/platform/arm/arm.c +@@ -0,0 +1,629 @@ +/* -+ * Copyright (C) 2012-2013, 2016-2017 ARM Limited. All rights reserved. ++ * Copyright (C) 2010, 2012-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. @@ -332318,9273 +333598,8822 @@ index 000000000..8afdaad70 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef __UMPLOCK_IOCTL_H__ -+#define __UMPLOCK_IOCTL_H__ ++/** ++ * @file mali_platform.c ++ * Platform specific Mali driver functions for: ++ * - Realview Versatile platforms with ARM11 Mpcore and virtex 5. ++ * - Versatile Express platforms with ARM Cortex-A9 and virtex 6. ++ */ ++#include ++#include ++#include ++#include "mali_kernel_linux.h" ++#ifdef CONFIG_PM_RUNTIME ++#include ++#endif ++#include ++#include ++#include "mali_kernel_common.h" ++#include ++#include + -+#ifdef __cplusplus -+extern "C" { ++#include "arm_core_scaling.h" ++#include "mali_executor.h" ++ ++#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) ++#include ++#include +#endif + -+#include -+#include ++static int mali_core_scaling_enable = 0; + -+#ifndef __user -+#define __user ++void mali_gpu_utilization_callback(struct mali_gpu_utilization_data *data); ++static u32 mali_read_phys(u32 phys_addr); ++#if defined(CONFIG_ARCH_REALVIEW) ++static void mali_write_phys(u32 phys_addr, u32 value); +#endif + ++#if defined(CONFIG_ARCH_VEXPRESS) && defined(CONFIG_ARM64) + ++#define SECURE_MODE_CONTROL_HANDLER 0x6F02006C ++void *secure_mode_mapped_addr = NULL; +/** -+ * @file umplock_ioctl.h -+ * This file describes the interface needed to use the Linux device driver. -+ * The interface is used by the userpace Mali DDK. ++ * Reset GPU and enable/disable Mali secure mode. ++ * @Return value: ++ * 0: success ++ * non-0: failure. + */ + -+typedef enum { -+ _LOCK_ACCESS_RENDERABLE = 1, -+ _LOCK_ACCESS_TEXTURE, -+ _LOCK_ACCESS_CPU_WRITE, -+ _LOCK_ACCESS_CPU_READ, -+} _lock_access_usage; ++static int mali_gpu_reset_and_secure_mode_enable_juno(void) ++{ ++ u32 phys_offset = SECURE_MODE_CONTROL_HANDLER & 0x00001FFF; ++ MALI_DEBUG_ASSERT(NULL != secure_mode_mapped_addr); + -+typedef struct _lock_item_s { -+ unsigned int secure_id; -+ _lock_access_usage usage; -+} _lock_item_s; ++ iowrite32(1, ((u8 *)secure_mode_mapped_addr) + phys_offset); + ++ if (1 == (u32)ioread32(((u8 *)secure_mode_mapped_addr) + phys_offset)) { ++ MALI_DEBUG_PRINT(3, ("Mali reset GPU and enable secured mode successfully! \n")); ++ return 0; ++ } + -+#define LOCK_IOCTL_GROUP 0x91 ++ MALI_PRINT_ERROR(("Failed to reset GPU and enable Mali secured mode !!! \n")); + -+#define _LOCK_IOCTL_CREATE_CMD 0 /* create kernel lock item */ -+#define _LOCK_IOCTL_PROCESS_CMD 1 /* process kernel lock item */ -+#define _LOCK_IOCTL_RELEASE_CMD 2 /* release kernel lock item */ -+#define _LOCK_IOCTL_ZAP_CMD 3 /* clean up all kernel lock items */ -+#define _LOCK_IOCTL_DUMP_CMD 4 /* dump all the items */ ++ return -1; + -+#define LOCK_IOCTL_MAX_CMDS 5 ++} + -+#define LOCK_IOCTL_CREATE _IOW( LOCK_IOCTL_GROUP, _LOCK_IOCTL_CREATE_CMD, _lock_item_s ) -+#define LOCK_IOCTL_PROCESS _IOW( LOCK_IOCTL_GROUP, _LOCK_IOCTL_PROCESS_CMD, _lock_item_s ) -+#define LOCK_IOCTL_RELEASE _IOW( LOCK_IOCTL_GROUP, _LOCK_IOCTL_RELEASE_CMD, _lock_item_s ) -+#define LOCK_IOCTL_ZAP _IO ( LOCK_IOCTL_GROUP, _LOCK_IOCTL_ZAP_CMD ) -+#define LOCK_IOCTL_DUMP _IO ( LOCK_IOCTL_GROUP, _LOCK_IOCTL_DUMP_CMD ) ++static int mali_gpu_reset_and_secure_mode_disable_juno(void) ++{ ++ u32 phys_offset = SECURE_MODE_CONTROL_HANDLER & 0x00001FFF; ++ MALI_DEBUG_ASSERT(NULL != secure_mode_mapped_addr); + -+#ifdef __cplusplus ++ iowrite32(0, ((u8 *)secure_mode_mapped_addr) + phys_offset); ++ ++ if (0 == (u32)ioread32(((u8 *)secure_mode_mapped_addr) + phys_offset)) { ++ MALI_DEBUG_PRINT(3, ("Mali reset GPU and disable secured mode successfully! \n")); ++ return 0; ++ } ++ ++ MALI_PRINT_ERROR(("Failed to reset GPU and disable mali secured mode !!! \n")); ++ return -1; +} -+#endif + -+#endif /* __UMPLOCK_IOCTL_H__ */ ++static int mali_secure_mode_init_juno(void) ++{ ++ u32 phys_addr_page = SECURE_MODE_CONTROL_HANDLER & 0xFFFFE000; ++ u32 phys_offset = SECURE_MODE_CONTROL_HANDLER & 0x00001FFF; ++ u32 map_size = phys_offset + sizeof(u32); + -diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild -new file mode 100755 -index 000000000..b2c2bbcda ---- /dev/null -+++ b/drivers/gpu/arm/midgard/Kbuild -@@ -0,0 +1,221 @@ -+# -+# (C) COPYRIGHT 2012-2016, 2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++ MALI_DEBUG_ASSERT(NULL == secure_mode_mapped_addr); + -+KBUILD_CFLAGS += -include rename.h ++ secure_mode_mapped_addr = ioremap(phys_addr_page, map_size); ++ if (NULL != secure_mode_mapped_addr) { ++ return mali_gpu_reset_and_secure_mode_disable_juno(); ++ } ++ MALI_DEBUG_PRINT(2, ("Failed to ioremap for Mali secured mode! \n")); ++ return -1; ++} + -+# Driver version string which is returned to userspace via an ioctl -+MALI_RELEASE_NAME ?= "r18p0-01rel0" ++static void mali_secure_mode_deinit_juno(void) ++{ ++ if (NULL != secure_mode_mapped_addr) { ++ mali_gpu_reset_and_secure_mode_disable_juno(); ++ iounmap(secure_mode_mapped_addr); ++ secure_mode_mapped_addr = NULL; ++ } ++} ++#endif + -+# Paths required for build ++#ifndef CONFIG_MALI_DT ++static void mali_platform_device_release(struct device *device); + -+# make $(src) as absolute path if it isn't already, by prefixing $(srctree) -+src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) -+KBASE_PATH = $(src) -+KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy -+UMP_PATH = $(src)/../../../base ++#if defined(CONFIG_ARCH_VEXPRESS) + -+ifeq ($(CONFIG_MALI_ERROR_INJECTION),y) -+MALI_ERROR_INJECT_ON = 1 -+endif ++#if defined(CONFIG_ARM64) ++/* Juno + Mali-450 MP6 in V7 FPGA */ ++static struct resource mali_gpu_resources_m450_mp6[] = { ++ MALI_GPU_RESOURCES_MALI450_MP6_PMU(0x6F040000, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200) ++}; + -+# Set up defaults if not defined by build system -+MALI_CUSTOMER_RELEASE ?= 1 -+MALI_UNIT_TEST ?= 0 -+MALI_KERNEL_TEST_API ?= 0 -+MALI_ERROR_INJECT_ON ?= 0 -+MALI_MOCK_TEST ?= 0 -+MALI_COVERAGE ?= 0 -+MALI_INSTRUMENTATION_LEVEL ?= 0 -+# This workaround is for what seems to be a compiler bug we observed in -+# GCC 4.7 on AOSP 4.3. The bug caused an intermittent failure compiling -+# the "_Pragma" syntax, where an error message is returned: -+# -+# "internal compiler error: unspellable token PRAGMA" -+# -+# This regression has thus far only been seen on the GCC 4.7 compiler bundled -+# with AOSP 4.3.0. So this makefile, intended for in-tree kernel builds -+# which are not known to be used with AOSP, is hardcoded to disable the -+# workaround, i.e. set the define to 0. -+MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0 ++static struct resource mali_gpu_resources_m470_mp4[] = { ++ MALI_GPU_RESOURCES_MALI470_MP4_PMU(0x6F040000, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200) ++}; + -+# Set up our defines, which will be passed to gcc -+DEFINES = \ -+ -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ -+ -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \ -+ -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ -+ -DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \ -+ -DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \ -+ -DMALI_COVERAGE=$(MALI_COVERAGE) \ -+ -DMALI_INSTRUMENTATION_LEVEL=$(MALI_INSTRUMENTATION_LEVEL) \ -+ -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \ -+ -DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598) ++static struct resource mali_gpu_resources_m470_mp3[] = { ++ MALI_GPU_RESOURCES_MALI470_MP3_PMU(0x6F040000, 200, 200, 200, 200, 200, 200, 200, 200, 200) ++}; + -+ifeq ($(KBUILD_EXTMOD),) -+# in-tree -+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME) -+else -+# out-of-tree -+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME) -+endif ++static struct resource mali_gpu_resources_m470_mp2[] = { ++ MALI_GPU_RESOURCES_MALI470_MP2_PMU(0x6F040000, 200, 200, 200, 200, 200, 200, 200) ++}; + -+DEFINES += -I$(srctree)/drivers/staging/android ++static struct resource mali_gpu_resources_m470_mp1[] = { ++ MALI_GPU_RESOURCES_MALI470_MP1_PMU(0x6F040000, 200, 200, 200, 200, 200) ++}; + -+# Use our defines when compiling -+ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux -+subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux ++#else ++static struct resource mali_gpu_resources_m450_mp8[] = { ++ MALI_GPU_RESOURCES_MALI450_MP8_PMU(0xFC040000, -1, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 68) ++}; + -+SRC := \ -+ mali_kbase_device.c \ -+ mali_kbase_cache_policy.c \ -+ mali_kbase_mem.c \ -+ mali_kbase_mmu.c \ -+ mali_kbase_ctx_sched.c \ -+ mali_kbase_jd.c \ -+ mali_kbase_jd_debugfs.c \ -+ mali_kbase_jm.c \ -+ mali_kbase_gpuprops.c \ -+ mali_kbase_js.c \ -+ mali_kbase_js_ctx_attr.c \ -+ mali_kbase_event.c \ -+ mali_kbase_context.c \ -+ mali_kbase_pm.c \ -+ mali_kbase_config.c \ -+ mali_kbase_vinstr.c \ -+ mali_kbase_softjobs.c \ -+ mali_kbase_10969_workaround.c \ -+ mali_kbase_hw.c \ -+ mali_kbase_utility.c \ -+ mali_kbase_debug.c \ -+ mali_kbase_trace_timeline.c \ -+ mali_kbase_gpu_memory_debugfs.c \ -+ mali_kbase_mem_linux.c \ -+ mali_kbase_core_linux.c \ -+ mali_kbase_replay.c \ -+ mali_kbase_mem_profile_debugfs.c \ -+ mali_kbase_mmu_mode_lpae.c \ -+ mali_kbase_mmu_mode_aarch64.c \ -+ mali_kbase_disjoint_events.c \ -+ mali_kbase_gator_api.c \ -+ mali_kbase_debug_mem_view.c \ -+ mali_kbase_debug_job_fault.c \ -+ mali_kbase_smc.c \ -+ mali_kbase_mem_pool.c \ -+ mali_kbase_mem_pool_debugfs.c \ -+ mali_kbase_tlstream.c \ -+ mali_kbase_strings.c \ -+ mali_kbase_as_fault_debugfs.c \ -+ mali_kbase_regs_history_debugfs.c ++static struct resource mali_gpu_resources_m450_mp6[] = { ++ MALI_GPU_RESOURCES_MALI450_MP6_PMU(0xFC040000, -1, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 68) ++}; + ++static struct resource mali_gpu_resources_m450_mp4[] = { ++ MALI_GPU_RESOURCES_MALI450_MP4_PMU(0xFC040000, -1, 70, 70, 70, 70, 70, 70, 70, 70, 70, 68) ++}; + ++static struct resource mali_gpu_resources_m470_mp4[] = { ++ MALI_GPU_RESOURCES_MALI470_MP4_PMU(0xFC040000, -1, 70, 70, 70, 70, 70, 70, 70, 70, 70, 68) ++}; ++#endif /* CONFIG_ARM64 */ + ++#elif defined(CONFIG_ARCH_REALVIEW) + -+ifeq ($(MALI_UNIT_TEST),1) -+ SRC += mali_kbase_tlstream_test.c -+endif ++static struct resource mali_gpu_resources_m300[] = { ++ MALI_GPU_RESOURCES_MALI300_PMU(0xC0000000, -1, -1, -1, -1) ++}; + -+ifeq ($(MALI_CUSTOMER_RELEASE),0) -+ SRC += mali_kbase_regs_dump_debugfs.c -+endif ++static struct resource mali_gpu_resources_m400_mp1[] = { ++ MALI_GPU_RESOURCES_MALI400_MP1_PMU(0xC0000000, -1, -1, -1, -1) ++}; + ++static struct resource mali_gpu_resources_m400_mp2[] = { ++ MALI_GPU_RESOURCES_MALI400_MP2_PMU(0xC0000000, -1, -1, -1, -1, -1, -1) ++}; + -+ccflags-y += -I$(KBASE_PATH) ++#endif ++#endif + -+ifeq ($(CONFIG_MALI_PLATFORM_FAKE),y) -+ SRC += mali_kbase_platform_fake.c ++#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) + -+ ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS),y) -+ SRC += platform/vexpress/mali_kbase_config_vexpress.c \ -+ platform/vexpress/mali_kbase_cpu_vexpress.c -+ ccflags-y += -I$(src)/platform/vexpress -+ endif ++#define FALLBACK_STATIC_TEMPERATURE 55000 + -+ ifeq ($(CONFIG_MALI_PLATFORM_RTSM_VE),y) -+ SRC += platform/rtsm_ve/mali_kbase_config_vexpress.c -+ ccflags-y += -I$(src)/platform/rtsm_ve -+ endif ++static struct thermal_zone_device *gpu_tz; + -+ ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_1XV7_A57),y) -+ SRC += platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c -+ ccflags-y += -I$(src)/platform/vexpress_1xv7_a57 -+ endif ++/* Calculate gpu static power example for reference */ ++static unsigned long arm_model_static_power(struct devfreq *devfreq, ++ unsigned long voltage) ++{ ++ int temperature, temp; ++ int temp_squared, temp_cubed, temp_scaling_factor; ++ const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10); ++ const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10; ++ unsigned long static_power; + -+ ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_6XVIRTEX7_10MHZ),y) -+ SRC += platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c \ -+ platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c -+ ccflags-y += -I$(src)/platform/vexpress_6xvirtex7_10mhz -+ endif -+endif # CONFIG_MALI_PLATFORM_FAKE=y ++ if (gpu_tz) { ++ int ret; + -+# Tell the Linux build system from which .o file to create the kernel module -+obj-$(CONFIG_MALI_MIDGARD) += midgard_kbase.o ++ ret = gpu_tz->ops->get_temp(gpu_tz, &temperature); ++ if (ret) { ++ MALI_DEBUG_PRINT(2, ("Error reading temperature for gpu thermal zone: %d\n", ret)); ++ temperature = FALLBACK_STATIC_TEMPERATURE; ++ } ++ } else { ++ temperature = FALLBACK_STATIC_TEMPERATURE; ++ } + -+# Tell the Linux build system to enable building of our .c files -+midgard_kbase-y := $(SRC:.c=.o) ++ /* Calculate the temperature scaling factor. To be applied to the ++ * voltage scaled power. ++ */ ++ temp = temperature / 1000; ++ temp_squared = temp * temp; ++ temp_cubed = temp_squared * temp; ++ temp_scaling_factor = ++ (2 * temp_cubed) ++ - (80 * temp_squared) ++ + (4700 * temp) ++ + 32000; + -+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y) -+ # Kconfig passes in the name with quotes for in-tree builds - remove them. -+ platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)) -+ MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name) -+ ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR) -+ include $(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)/Kbuild -+endif ++ static_power = (((coefficient * voltage_cubed) >> 20) ++ * temp_scaling_factor) ++ / 1000000; + -+ifeq ($(CONFIG_MALI_DEVFREQ),y) -+ ifeq ($(CONFIG_DEVFREQ_THERMAL),y) -+ include $(src)/ipa/Kbuild -+ endif -+endif ++ return static_power; ++} + -+midgard_kbase-$(CONFIG_MALI_DMA_FENCE) += \ -+ mali_kbase_dma_fence.o \ -+ mali_kbase_fence.o -+midgard_kbase-$(CONFIG_SYNC) += \ -+ mali_kbase_sync_android.o \ -+ mali_kbase_sync_common.o -+midgard_kbase-$(CONFIG_SYNC_FILE) += \ -+ mali_kbase_sync_file.o \ -+ mali_kbase_sync_common.o \ -+ mali_kbase_fence.o ++/* Calculate gpu dynamic power example for reference */ ++static unsigned long arm_model_dynamic_power(struct devfreq *devfreq, ++ unsigned long freq, ++ unsigned long voltage) ++{ ++ /* The inputs: freq (f) is in Hz, and voltage (v) in mV. ++ * The coefficient (c) is in mW/(MHz mV mV). ++ * ++ * This function calculates the dynamic power after this formula: ++ * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz) ++ */ ++ const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */ ++ const unsigned long f_mhz = freq / 1000000; /* MHz */ ++ const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */ ++ unsigned long dynamic_power; + -+MALI_BACKEND_PATH ?= backend -+CONFIG_MALI_BACKEND ?= gpu -+CONFIG_MALI_BACKEND_REAL ?= $(CONFIG_MALI_BACKEND) ++ dynamic_power = (coefficient * v2 * f_mhz) / 1000000; /* mW */ + -+ifeq ($(MALI_MOCK_TEST),1) -+ifeq ($(CONFIG_MALI_BACKEND_REAL),gpu) -+# Test functionality -+midgard_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o -+endif -+endif ++ return dynamic_power; ++} + -+include $(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)/Kbuild -+midgard_kbase-y += $(BACKEND:.c=.o) ++struct devfreq_cooling_power arm_cooling_ops = { ++ .get_static_power = arm_model_static_power, ++ .get_dynamic_power = arm_model_dynamic_power, ++}; ++#endif + ++static struct mali_gpu_device_data mali_gpu_data = { ++#ifndef CONFIG_MALI_DT ++ .pmu_switch_delay = 0xFF, /* do not have to be this high on FPGA, but it is good for testing to have a delay */ ++#if defined(CONFIG_ARCH_VEXPRESS) ++ .shared_mem_size = 256 * 1024 * 1024, /* 256MB */ ++#endif ++#endif ++ .max_job_runtime = 60000, /* 60 seconds */ + -+ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL) -+subdir-ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL) ++#if defined(CONFIG_ARCH_REALVIEW) ++ .dedicated_mem_start = 0x80000000, /* Physical start address (use 0xD0000000 for old indirect setup) */ ++ .dedicated_mem_size = 0x10000000, /* 256MB */ ++#endif ++#if defined(CONFIG_ARM64) ++ /* Some framebuffer drivers get the framebuffer dynamically, such as through GEM, ++ * in which the memory resource can't be predicted in advance. ++ */ ++ .fb_start = 0x0, ++ .fb_size = 0xFFFFF000, ++#else ++ .fb_start = 0xe0000000, ++ .fb_size = 0x01000000, ++#endif ++ .control_interval = 1000, /* 1000ms */ ++ .utilization_callback = mali_gpu_utilization_callback, ++ .get_clock_info = NULL, ++ .get_freq = NULL, ++ .set_freq = NULL, ++#if defined(CONFIG_ARCH_VEXPRESS) && defined(CONFIG_ARM64) ++ .secure_mode_init = mali_secure_mode_init_juno, ++ .secure_mode_deinit = mali_secure_mode_deinit_juno, ++ .gpu_reset_and_secure_mode_enable = mali_gpu_reset_and_secure_mode_enable_juno, ++ .gpu_reset_and_secure_mode_disable = mali_gpu_reset_and_secure_mode_disable_juno, ++#else ++ .secure_mode_init = NULL, ++ .secure_mode_deinit = NULL, ++ .gpu_reset_and_secure_mode_enable = NULL, ++ .gpu_reset_and_secure_mode_disable = NULL, ++#endif ++#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) ++ .gpu_cooling_ops = &arm_cooling_ops, ++#endif ++}; + -+# Default to devicetree platform if neither a fake platform or a thirdparty -+# platform is configured. -+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY)$(CONFIG_MALI_PLATFORM_FAKE),) -+CONFIG_MALI_PLATFORM_DEVICETREE := y -+endif ++#ifndef CONFIG_MALI_DT ++static struct platform_device mali_gpu_device = { ++ .name = MALI_GPU_NAME_UTGARD, ++ .id = 0, ++ .dev.release = mali_platform_device_release, ++ .dev.dma_mask = &mali_gpu_device.dev.coherent_dma_mask, ++ .dev.coherent_dma_mask = DMA_BIT_MASK(32), + -+midgard_kbase-$(CONFIG_MALI_PLATFORM_DEVICETREE) += \ -+ platform/devicetree/mali_kbase_runtime_pm.o \ -+ platform/devicetree/mali_kbase_config_devicetree.o -+ccflags-$(CONFIG_MALI_PLATFORM_DEVICETREE) += -I$(src)/platform/devicetree ++ .dev.platform_data = &mali_gpu_data, ++}; + -+# For kutf and mali_kutf_irq_latency_test -+obj-$(CONFIG_MALI_KUTF) += tests/ -diff --git a/drivers/gpu/arm/midgard/Kconfig b/drivers/gpu/arm/midgard/Kconfig -new file mode 100644 -index 000000000..043bfc002 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/Kconfig -@@ -0,0 +1,249 @@ -+# -+# (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++int mali_platform_device_register(void) ++{ ++ int err = -1; ++ int num_pp_cores = 0; ++#if defined(CONFIG_ARCH_REALVIEW) ++ u32 m400_gp_version; ++#endif + ++ MALI_DEBUG_PRINT(4, ("mali_platform_device_register() called\n")); + ++ /* Detect present Mali GPU and connect the correct resources to the device */ ++#if defined(CONFIG_ARCH_VEXPRESS) + -+menuconfig MALI_MIDGARD -+ tristate "Mali Midgard series support" -+ select GPU_TRACEPOINTS if ANDROID -+ default n -+ help -+ Enable this option to build support for a ARM Mali Midgard GPU. ++#if defined(CONFIG_ARM64) ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) ++ mali_gpu_device.dev.archdata.dma_ops = &dummy_dma_ops; ++#else ++ mali_gpu_device.dev.archdata.dma_ops = dma_ops; ++#endif ++ if ((mali_read_phys(0x6F000000) & 0x00600450) == 0x00600450) { ++ MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP6 device\n")); ++ num_pp_cores = 6; ++ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m450_mp6); ++ mali_gpu_device.resource = mali_gpu_resources_m450_mp6; ++ } else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00400430) { ++ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP4 device\n")); ++ num_pp_cores = 4; ++ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m470_mp4); ++ mali_gpu_device.resource = mali_gpu_resources_m470_mp4; ++ } else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00300430) { ++ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP3 device\n")); ++ num_pp_cores = 3; ++ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m470_mp3); ++ mali_gpu_device.resource = mali_gpu_resources_m470_mp3; ++ } else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00200430) { ++ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP2 device\n")); ++ num_pp_cores = 2; ++ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m470_mp2); ++ mali_gpu_device.resource = mali_gpu_resources_m470_mp2; ++ } else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00100430) { ++ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP1 device\n")); ++ num_pp_cores = 1; ++ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m470_mp1); ++ mali_gpu_device.resource = mali_gpu_resources_m470_mp1; ++ } ++#else ++ if (mali_read_phys(0xFC000000) == 0x00000450) { ++ MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP8 device\n")); ++ num_pp_cores = 8; ++ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m450_mp8); ++ mali_gpu_device.resource = mali_gpu_resources_m450_mp8; ++ } else if (mali_read_phys(0xFC000000) == 0x40600450) { ++ MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP6 device\n")); ++ num_pp_cores = 6; ++ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m450_mp6); ++ mali_gpu_device.resource = mali_gpu_resources_m450_mp6; ++ } else if (mali_read_phys(0xFC000000) == 0x40400450) { ++ MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP4 device\n")); ++ num_pp_cores = 4; ++ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m450_mp4); ++ mali_gpu_device.resource = mali_gpu_resources_m450_mp4; ++ } else if (mali_read_phys(0xFC000000) == 0xFFFFFFFF) { ++ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP4 device\n")); ++ num_pp_cores = 4; ++ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m470_mp4); ++ mali_gpu_device.resource = mali_gpu_resources_m470_mp4; ++ } ++#endif /* CONFIG_ARM64 */ + -+ To compile this driver as a module, choose M here: -+ this will generate a single module, called mali_kbase. ++#elif defined(CONFIG_ARCH_REALVIEW) + -+config MALI_GATOR_SUPPORT -+ bool "Streamline support via Gator" -+ depends on MALI_MIDGARD -+ default n -+ help -+ Adds diagnostic support for use with the ARM Streamline Performance Analyzer. -+ You will need the Gator device driver already loaded before loading this driver when enabling -+ Streamline debug support. -+ This is a legacy interface required by older versions of Streamline. ++ m400_gp_version = mali_read_phys(0xC000006C); ++ if ((m400_gp_version & 0xFFFF0000) == 0x0C070000) { ++ MALI_DEBUG_PRINT(4, ("Registering Mali-300 device\n")); ++ num_pp_cores = 1; ++ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m300); ++ mali_gpu_device.resource = mali_gpu_resources_m300; ++ mali_write_phys(0xC0010020, 0xA); /* Enable direct memory mapping for FPGA */ ++ } else if ((m400_gp_version & 0xFFFF0000) == 0x0B070000) { ++ u32 fpga_fw_version = mali_read_phys(0xC0010000); ++ if (fpga_fw_version == 0x130C008F || fpga_fw_version == 0x110C008F) { ++ /* Mali-400 MP1 r1p0 or r1p1 */ ++ MALI_DEBUG_PRINT(4, ("Registering Mali-400 MP1 device\n")); ++ num_pp_cores = 1; ++ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m400_mp1); ++ mali_gpu_device.resource = mali_gpu_resources_m400_mp1; ++ mali_write_phys(0xC0010020, 0xA); /* Enable direct memory mapping for FPGA */ ++ } else if (fpga_fw_version == 0x130C000F) { ++ /* Mali-400 MP2 r1p1 */ ++ MALI_DEBUG_PRINT(4, ("Registering Mali-400 MP2 device\n")); ++ num_pp_cores = 2; ++ mali_gpu_device.num_resources = ARRAY_SIZE(mali_gpu_resources_m400_mp2); ++ mali_gpu_device.resource = mali_gpu_resources_m400_mp2; ++ mali_write_phys(0xC0010020, 0xA); /* Enable direct memory mapping for FPGA */ ++ } ++ } + -+config MALI_MIDGARD_DVFS -+ bool "Enable legacy DVFS" -+ depends on MALI_MIDGARD && !MALI_DEVFREQ && !MALI_PLATFORM_DEVICETREE -+ default n -+ help -+ Choose this option to enable legacy DVFS in the Mali Midgard DDK. ++#endif ++ /* Register the platform device */ ++ err = platform_device_register(&mali_gpu_device); ++ if (0 == err) { ++#ifdef CONFIG_PM_RUNTIME ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)) ++ pm_runtime_set_autosuspend_delay(&(mali_gpu_device.dev), 1000); ++ pm_runtime_use_autosuspend(&(mali_gpu_device.dev)); ++#endif ++ pm_runtime_enable(&(mali_gpu_device.dev)); ++#endif ++ MALI_DEBUG_ASSERT(0 < num_pp_cores); ++ mali_core_scaling_init(num_pp_cores); + -+config MALI_MIDGARD_ENABLE_TRACE -+ bool "Enable kbase tracing" -+ depends on MALI_MIDGARD -+ default n -+ help -+ Enables tracing in kbase. Trace log available through -+ the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled ++ return 0; ++ } + -+config MALI_DEVFREQ -+ bool "devfreq support for Mali" -+ depends on MALI_MIDGARD && PM_DEVFREQ -+ select DEVFREQ_GOV_SIMPLE_ONDEMAND -+ help -+ Support devfreq for Mali. ++ return err; ++} + -+ Using the devfreq framework and, by default, the simpleondemand -+ governor, the frequency of Mali will be dynamically selected from the -+ available OPPs. ++void mali_platform_device_unregister(void) ++{ ++ MALI_DEBUG_PRINT(4, ("mali_platform_device_unregister() called\n")); + -+config MALI_DMA_FENCE -+ bool "DMA_BUF fence support for Mali" -+ depends on MALI_MIDGARD && !KDS -+ default n -+ help -+ Support DMA_BUF fences for Mali. ++ mali_core_scaling_term(); ++#ifdef CONFIG_PM_RUNTIME ++ pm_runtime_disable(&(mali_gpu_device.dev)); ++#endif ++ platform_device_unregister(&mali_gpu_device); + -+ This option should only be enabled if KDS is not present and -+ the Linux Kernel has built in support for DMA_BUF fences. ++ platform_device_put(&mali_gpu_device); + -+# MALI_EXPERT configuration options ++#if defined(CONFIG_ARCH_REALVIEW) ++ mali_write_phys(0xC0010020, 0x9); /* Restore default (legacy) memory mapping */ ++#endif ++} + -+menuconfig MALI_EXPERT -+ depends on MALI_MIDGARD -+ bool "Enable Expert Settings" -+ default n -+ help -+ Enabling this option and modifying the default settings may produce a driver with performance or -+ other limitations. ++static void mali_platform_device_release(struct device *device) ++{ ++ MALI_DEBUG_PRINT(4, ("mali_platform_device_release() called\n")); ++} + -+config MALI_CORESTACK -+ bool "Support controlling power to the GPU core stack" -+ depends on MALI_MIDGARD && MALI_EXPERT -+ default n -+ help -+ Enabling this feature on supported GPUs will let the driver powering -+ on/off the GPU core stack independently without involving the Power -+ Domain Controller. This should only be enabled on platforms which -+ integration of the PDC to the Mali GPU is known to be problematic. -+ This feature is currently only supported on t-Six and t-HEx GPUs. ++#else /* CONFIG_MALI_DT */ ++int mali_platform_device_init(struct platform_device *device) ++{ ++ int num_pp_cores = 0; ++ int err = -1; ++#if defined(CONFIG_ARCH_REALVIEW) ++ u32 m400_gp_version; ++#endif + -+ If unsure, say N. ++ /* Detect present Mali GPU and connect the correct resources to the device */ ++#if defined(CONFIG_ARCH_VEXPRESS) + -+config MALI_PRFCNT_SET_SECONDARY -+ bool "Use secondary set of performance counters" -+ depends on MALI_MIDGARD && MALI_EXPERT -+ default n -+ help -+ Select this option to use secondary set of performance counters. Kernel -+ features that depend on an access to the primary set of counters may -+ become unavailable. Enabling this option will prevent power management -+ from working optimally and may cause instrumentation tools to return -+ bogus results. ++#if defined(CONFIG_ARM64) ++ if ((mali_read_phys(0x6F000000) & 0x00600450) == 0x00600450) { ++ MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP6 device\n")); ++ num_pp_cores = 6; ++ } else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00400430) { ++ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP4 device\n")); ++ num_pp_cores = 4; ++ } else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00300430) { ++ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP3 device\n")); ++ num_pp_cores = 3; ++ } else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00200430) { ++ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP2 device\n")); ++ num_pp_cores = 2; ++ } else if ((mali_read_phys(0x6F000000) & 0x00F00430) == 0x00100430) { ++ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP1 device\n")); ++ num_pp_cores = 1; ++ } ++#else ++ if (mali_read_phys(0xFC000000) == 0x00000450) { ++ MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP8 device\n")); ++ num_pp_cores = 8; ++ } else if (mali_read_phys(0xFC000000) == 0x40400450) { ++ MALI_DEBUG_PRINT(4, ("Registering Mali-450 MP4 device\n")); ++ num_pp_cores = 4; ++ } else if (mali_read_phys(0xFC000000) == 0xFFFFFFFF) { ++ MALI_DEBUG_PRINT(4, ("Registering Mali-470 MP4 device\n")); ++ num_pp_cores = 4; ++ } ++#endif + -+ If unsure, say N. ++#elif defined(CONFIG_ARCH_REALVIEW) + -+config MALI_PLATFORM_FAKE -+ bool "Enable fake platform device support" -+ depends on MALI_MIDGARD && MALI_EXPERT -+ default n -+ help -+ When you start to work with the Mali Midgard series device driver the platform-specific code of -+ the Linux kernel for your platform may not be complete. In this situation the kernel device driver -+ supports creating the platform device outside of the Linux platform-specific code. -+ Enable this option if would like to use a platform device configuration from within the device driver. ++ m400_gp_version = mali_read_phys(0xC000006C); ++ if ((m400_gp_version & 0xFFFF0000) == 0x0C070000) { ++ MALI_DEBUG_PRINT(4, ("Registering Mali-300 device\n")); ++ num_pp_cores = 1; ++ mali_write_phys(0xC0010020, 0xA); /* Enable direct memory mapping for FPGA */ ++ } else if ((m400_gp_version & 0xFFFF0000) == 0x0B070000) { ++ u32 fpga_fw_version = mali_read_phys(0xC0010000); ++ if (fpga_fw_version == 0x130C008F || fpga_fw_version == 0x110C008F) { ++ /* Mali-400 MP1 r1p0 or r1p1 */ ++ MALI_DEBUG_PRINT(4, ("Registering Mali-400 MP1 device\n")); ++ num_pp_cores = 1; ++ mali_write_phys(0xC0010020, 0xA); /* Enable direct memory mapping for FPGA */ ++ } else if (fpga_fw_version == 0x130C000F) { ++ /* Mali-400 MP2 r1p1 */ ++ MALI_DEBUG_PRINT(4, ("Registering Mali-400 MP2 device\n")); ++ num_pp_cores = 2; ++ mali_write_phys(0xC0010020, 0xA); /* Enable direct memory mapping for FPGA */ ++ } ++ } ++#endif + -+choice -+ prompt "Platform configuration" -+ depends on MALI_MIDGARD && MALI_EXPERT -+ default MALI_PLATFORM_DEVICETREE -+ help -+ Select the SOC platform that contains a Mali Midgard GPU ++ /* After kernel 3.15 device tree will default set dev ++ * related parameters in of_platform_device_create_pdata. ++ * But kernel changes from version to version, ++ * For example 3.10 didn't include device->dev.dma_mask parameter setting, ++ * if we didn't include here will cause dma_mapping error, ++ * but in kernel 3.15 it include device->dev.dma_mask parameter setting, ++ * so it's better to set must need paramter by DDK itself. ++ */ ++ if (!device->dev.dma_mask) ++ device->dev.dma_mask = &device->dev.coherent_dma_mask; ++ device->dev.archdata.dma_ops = dma_ops; + -+config MALI_PLATFORM_DEVICETREE -+ bool "Device Tree platform" -+ depends on OF -+ help -+ Select this option to use Device Tree with the Mali driver. ++ err = platform_device_add_data(device, &mali_gpu_data, sizeof(mali_gpu_data)); + -+ When using this option the Mali driver will get the details of the -+ GPU hardware from the Device Tree. This means that the same driver -+ binary can run on multiple platforms as long as all the GPU hardware -+ details are described in the device tree. ++ if (0 == err) { ++#ifdef CONFIG_PM_RUNTIME ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)) ++ pm_runtime_set_autosuspend_delay(&(device->dev), 1000); ++ pm_runtime_use_autosuspend(&(device->dev)); ++#endif ++ pm_runtime_enable(&(device->dev)); ++#endif ++ MALI_DEBUG_ASSERT(0 < num_pp_cores); ++ mali_core_scaling_init(num_pp_cores); ++ } + -+ Device Tree is the recommended method for the Mali driver platform -+ integration. ++#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) ++ /* Some Socs didn't support the devfreq thermal for mali */ ++ if (of_machine_is_compatible("rockchip,rk3036")) ++ return 0; + -+config MALI_PLATFORM_VEXPRESS -+ depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4) -+ bool "Versatile Express" -+config MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ -+ depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4) -+ bool "Versatile Express w/Virtex7 @ 40Mhz" -+config MALI_PLATFORM_GOLDFISH -+ depends on ARCH_GOLDFISH -+ bool "Android Goldfish virtual CPU" -+config MALI_PLATFORM_PBX -+ depends on ARCH_REALVIEW && REALVIEW_EB_A9MP && MACH_REALVIEW_PBX -+ bool "Realview PBX-A9" -+config MALI_PLATFORM_THIRDPARTY -+ bool "Third Party Platform" -+endchoice ++ /* Get thermal zone */ ++ gpu_tz = thermal_zone_get_zone_by_name("soc_thermal"); ++ if (IS_ERR(gpu_tz)) { ++ MALI_DEBUG_PRINT(2, ("Error getting gpu thermal zone (%ld), not yet ready?\n", ++ PTR_ERR(gpu_tz))); ++ gpu_tz = NULL; + -+config MALI_PLATFORM_THIRDPARTY_NAME -+ depends on MALI_MIDGARD && MALI_PLATFORM_THIRDPARTY && MALI_EXPERT -+ string "Third party platform name" -+ help -+ Enter the name of a third party platform that is supported. The third part configuration -+ file must be in midgard/config/tpip/mali_kbase_config_xxx.c where xxx is the name -+ specified here. ++ err = -EPROBE_DEFER; ++ } ++#endif + -+config MALI_DEBUG -+ bool "Debug build" -+ depends on MALI_MIDGARD && MALI_EXPERT -+ default n -+ help -+ Select this option for increased checking and reporting of errors. ++ return err; ++} + -+config MALI_FENCE_DEBUG -+ bool "Debug sync fence usage" -+ depends on MALI_MIDGARD && MALI_EXPERT && (SYNC || SYNC_FILE) -+ default y if MALI_DEBUG -+ help -+ Select this option to enable additional checking and reporting on the -+ use of sync fences in the Mali driver. ++int mali_platform_device_deinit(struct platform_device *device) ++{ ++ MALI_IGNORE(device); + -+ This will add a 3s timeout to all sync fence waits in the Mali -+ driver, so that when work for Mali has been waiting on a sync fence -+ for a long time a debug message will be printed, detailing what fence -+ is causing the block, and which dependent Mali atoms are blocked as a -+ result of this. ++ MALI_DEBUG_PRINT(4, ("mali_platform_device_deinit() called\n")); + -+ The timeout can be changed at runtime through the js_soft_timeout -+ device attribute, where the timeout is specified in milliseconds. ++ mali_core_scaling_term(); ++#ifdef CONFIG_PM_RUNTIME ++ pm_runtime_disable(&(device->dev)); ++#endif + -+config MALI_NO_MALI -+ bool "No Mali" -+ depends on MALI_MIDGARD && MALI_EXPERT -+ default n -+ help -+ This can be used to test the driver in a simulated environment -+ whereby the hardware is not physically present. If the hardware is physically -+ present it will not be used. This can be used to test the majority of the -+ driver without needing actual hardware or for software benchmarking. -+ All calls to the simulated hardware will complete immediately as if the hardware -+ completed the task. ++#if defined(CONFIG_ARCH_REALVIEW) ++ mali_write_phys(0xC0010020, 0x9); /* Restore default (legacy) memory mapping */ ++#endif + -+config MALI_ERROR_INJECT -+ bool "Error injection" -+ depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI -+ default n -+ help -+ Enables insertion of errors to test module failure and recovery mechanisms. ++ return 0; ++} + -+config MALI_TRACE_TIMELINE -+ bool "Timeline tracing" -+ depends on MALI_MIDGARD && MALI_EXPERT -+ default n -+ help -+ Enables timeline tracing through the kernel tracepoint system. ++#endif /* CONFIG_MALI_DT */ + -+config MALI_SYSTEM_TRACE -+ bool "Enable system event tracing support" -+ depends on MALI_MIDGARD && MALI_EXPERT -+ default n -+ help -+ Choose this option to enable system trace events for each -+ kbase event. This is typically used for debugging but has -+ minimal overhead when not in use. Enable only if you know what -+ you are doing. ++static u32 mali_read_phys(u32 phys_addr) ++{ ++ u32 phys_addr_page = phys_addr & 0xFFFFE000; ++ u32 phys_offset = phys_addr & 0x00001FFF; ++ u32 map_size = phys_offset + sizeof(u32); ++ u32 ret = 0xDEADBEEF; ++ void *mem_mapped = ioremap(phys_addr_page, map_size); ++ if (NULL != mem_mapped) { ++ ret = (u32)ioread32(((u8 *)mem_mapped) + phys_offset); ++ iounmap(mem_mapped); ++ } + -+config MALI_GPU_MMU_AARCH64 -+ bool "Use AArch64 page tables" -+ depends on MALI_MIDGARD && MALI_EXPERT -+ default n -+ help -+ Use AArch64 format page tables for the GPU instead of LPAE-style. -+ The two formats have the same functionality and performance but a -+ future GPU may deprecate or remove the legacy LPAE-style format. ++ return ret; ++} + -+ The LPAE-style format is supported on all Midgard and current Bifrost -+ GPUs. Enabling AArch64 format restricts the driver to only supporting -+ Bifrost GPUs. ++#if defined(CONFIG_ARCH_REALVIEW) ++static void mali_write_phys(u32 phys_addr, u32 value) ++{ ++ u32 phys_addr_page = phys_addr & 0xFFFFE000; ++ u32 phys_offset = phys_addr & 0x00001FFF; ++ u32 map_size = phys_offset + sizeof(u32); ++ void *mem_mapped = ioremap(phys_addr_page, map_size); ++ if (NULL != mem_mapped) { ++ iowrite32(value, ((u8 *)mem_mapped) + phys_offset); ++ iounmap(mem_mapped); ++ } ++} ++#endif + -+ If in doubt, say N. ++static int param_set_core_scaling(const char *val, const struct kernel_param *kp) ++{ ++ int ret = param_set_int(val, kp); + -+config MALI_PWRSOFT_765 -+ bool "PWRSOFT-765 ticket" -+ depends on MALI_MIDGARD && MALI_EXPERT -+ default n -+ help -+ PWRSOFT-765 fixes devfreq cooling devices issues. However, they are -+ not merged in mainline kernel yet. So this define helps to guard those -+ parts of the code. ++ if (1 == mali_core_scaling_enable) { ++ mali_core_scaling_sync(mali_executor_get_num_cores_enabled()); ++ } ++ return ret; ++} + -+source "drivers/gpu/arm/midgard/platform/Kconfig" -+source "drivers/gpu/arm/midgard/tests/Kconfig" -diff --git a/drivers/gpu/arm/midgard/Makefile b/drivers/gpu/arm/midgard/Makefile -new file mode 100644 -index 000000000..9aa242c4f ---- /dev/null -+++ b/drivers/gpu/arm/midgard/Makefile -@@ -0,0 +1,42 @@ -+# -+# (C) COPYRIGHT 2010-2016, 2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++static struct kernel_param_ops param_ops_core_scaling = { ++ .set = param_set_core_scaling, ++ .get = param_get_int, ++}; + ++module_param_cb(mali_core_scaling_enable, ¶m_ops_core_scaling, &mali_core_scaling_enable, 0644); ++MODULE_PARM_DESC(mali_core_scaling_enable, "1 means to enable core scaling policy, 0 means to disable core scaling policy"); + ++void mali_gpu_utilization_callback(struct mali_gpu_utilization_data *data) ++{ ++ if (1 == mali_core_scaling_enable) { ++ mali_core_scaling_update(data); ++ } ++} +diff --git a/drivers/gpu/arm/mali400/mali/platform/arm/arm_core_scaling.c b/drivers/gpu/arm/mali400/mali/platform/arm/arm_core_scaling.c +new file mode 100644 +index 000000000..7a2fc8107 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/platform/arm/arm_core_scaling.c +@@ -0,0 +1,122 @@ ++/* ++ * Copyright (C) 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+KDIR ?= /lib/modules/$(shell uname -r)/build ++/** ++ * @file arm_core_scaling.c ++ * Example core scaling policy. ++ */ + -+BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../.. -+UMP_PATH_RELATIVE = $(CURDIR)/../../../base/ump -+KBASE_PATH_RELATIVE = $(CURDIR) -+KDS_PATH_RELATIVE = $(CURDIR)/../../../.. -+EXTRA_SYMBOLS = $(UMP_PATH_RELATIVE)/src/Module.symvers ++#include "arm_core_scaling.h" + -+ifeq ($(MALI_UNIT_TEST), 1) -+ EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers -+endif ++#include ++#include "mali_kernel_common.h" + -+ifeq ($(MALI_BUS_LOG), 1) -+#Add bus logger symbols -+EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers -+endif ++#include + -+# GPL driver supports KDS -+EXTRA_SYMBOLS += $(KDS_PATH_RELATIVE)/drivers/base/kds/Module.symvers ++static int num_cores_total; ++static int num_cores_enabled; + -+# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions -+all: -+ $(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules ++static struct work_struct wq_work; + -+clean: -+ $(MAKE) -C $(KDIR) M=$(CURDIR) clean -diff --git a/drivers/gpu/arm/midgard/Makefile.kbase b/drivers/gpu/arm/midgard/Makefile.kbase -new file mode 100755 -index 000000000..2bef9c25e ---- /dev/null -+++ b/drivers/gpu/arm/midgard/Makefile.kbase -@@ -0,0 +1,17 @@ -+# -+# (C) COPYRIGHT 2010 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++static void set_num_cores(struct work_struct *work) ++{ ++ int err = mali_perf_set_num_pp_cores(num_cores_enabled); ++ MALI_DEBUG_ASSERT(0 == err); ++ MALI_IGNORE(err); ++} + ++static void enable_one_core(void) ++{ ++ if (num_cores_enabled < num_cores_total) { ++ ++num_cores_enabled; ++ schedule_work(&wq_work); ++ MALI_DEBUG_PRINT(3, ("Core scaling: Enabling one more core\n")); ++ } + -+EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM) ++ MALI_DEBUG_ASSERT(1 <= num_cores_enabled); ++ MALI_DEBUG_ASSERT(num_cores_total >= num_cores_enabled); ++} + -diff --git a/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/drivers/gpu/arm/midgard/backend/gpu/Kbuild -new file mode 100755 -index 000000000..5f700e9b6 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/Kbuild -@@ -0,0 +1,60 @@ -+# -+# (C) COPYRIGHT 2014,2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++static void disable_one_core(void) ++{ ++ if (1 < num_cores_enabled) { ++ --num_cores_enabled; ++ schedule_work(&wq_work); ++ MALI_DEBUG_PRINT(3, ("Core scaling: Disabling one core\n")); ++ } + ++ MALI_DEBUG_ASSERT(1 <= num_cores_enabled); ++ MALI_DEBUG_ASSERT(num_cores_total >= num_cores_enabled); ++} + -+BACKEND += \ -+ backend/gpu/mali_kbase_cache_policy_backend.c \ -+ backend/gpu/mali_kbase_device_hw.c \ -+ backend/gpu/mali_kbase_gpu.c \ -+ backend/gpu/mali_kbase_gpuprops_backend.c \ -+ backend/gpu/mali_kbase_debug_job_fault_backend.c \ -+ backend/gpu/mali_kbase_irq_linux.c \ -+ backend/gpu/mali_kbase_instr_backend.c \ -+ backend/gpu/mali_kbase_jm_as.c \ -+ backend/gpu/mali_kbase_jm_hw.c \ -+ backend/gpu/mali_kbase_jm_rb.c \ -+ backend/gpu/mali_kbase_js_affinity.c \ -+ backend/gpu/mali_kbase_js_backend.c \ -+ backend/gpu/mali_kbase_mmu_hw_direct.c \ -+ backend/gpu/mali_kbase_pm_backend.c \ -+ backend/gpu/mali_kbase_pm_driver.c \ -+ backend/gpu/mali_kbase_pm_metrics.c \ -+ backend/gpu/mali_kbase_pm_ca.c \ -+ backend/gpu/mali_kbase_pm_ca_fixed.c \ -+ backend/gpu/mali_kbase_pm_always_on.c \ -+ backend/gpu/mali_kbase_pm_coarse_demand.c \ -+ backend/gpu/mali_kbase_pm_demand.c \ -+ backend/gpu/mali_kbase_pm_policy.c \ -+ backend/gpu/mali_kbase_time.c ++static void enable_max_num_cores(void) ++{ ++ if (num_cores_enabled < num_cores_total) { ++ num_cores_enabled = num_cores_total; ++ schedule_work(&wq_work); ++ MALI_DEBUG_PRINT(3, ("Core scaling: Enabling maximum number of cores\n")); ++ } + -+ifeq ($(MALI_CUSTOMER_RELEASE),0) -+BACKEND += \ -+ backend/gpu/mali_kbase_pm_ca_random.c \ -+ backend/gpu/mali_kbase_pm_demand_always_powered.c \ -+ backend/gpu/mali_kbase_pm_fast_start.c -+endif ++ MALI_DEBUG_ASSERT(num_cores_total == num_cores_enabled); ++} + -+ifeq ($(CONFIG_MALI_DEVFREQ),y) -+BACKEND += \ -+ backend/gpu/mali_kbase_devfreq.c \ -+ backend/gpu/mali_kbase_pm_ca_devfreq.c -+endif ++void mali_core_scaling_init(int num_pp_cores) ++{ ++ INIT_WORK(&wq_work, set_num_cores); + -+ifeq ($(CONFIG_MALI_NO_MALI),y) -+ # Dummy model -+ BACKEND += backend/gpu/mali_kbase_model_dummy.c -+ BACKEND += backend/gpu/mali_kbase_model_linux.c -+ # HW error simulation -+ BACKEND += backend/gpu/mali_kbase_model_error_generator.c -+endif -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h ++ num_cores_total = num_pp_cores; ++ num_cores_enabled = num_pp_cores; ++ ++ /* NOTE: Mali is not fully initialized at this point. */ ++} ++ ++void mali_core_scaling_sync(int num_cores) ++{ ++ num_cores_enabled = num_cores; ++} ++ ++void mali_core_scaling_term(void) ++{ ++ flush_scheduled_work(); ++} ++ ++#define PERCENT_OF(percent, max) ((int) ((percent)*(max)/100.0 + 0.5)) ++ ++void mali_core_scaling_update(struct mali_gpu_utilization_data *data) ++{ ++ /* ++ * This function implements a very trivial PP core scaling algorithm. ++ * ++ * It is _NOT_ of production quality. ++ * The only intention behind this algorithm is to exercise and test the ++ * core scaling functionality of the driver. ++ * It is _NOT_ tuned for neither power saving nor performance! ++ * ++ * Other metrics than PP utilization need to be considered as well ++ * in order to make a good core scaling algorithm. ++ */ ++ ++ MALI_DEBUG_PRINT(3, ("Utilization: (%3d, %3d, %3d), cores enabled: %d/%d\n", data->utilization_gpu, data->utilization_gp, data->utilization_pp, num_cores_enabled, num_cores_total)); ++ ++ /* NOTE: this function is normally called directly from the utilization callback which is in ++ * timer context. */ ++ ++ if (PERCENT_OF(90, 256) < data->utilization_pp) { ++ enable_max_num_cores(); ++ } else if (PERCENT_OF(50, 256) < data->utilization_pp) { ++ enable_one_core(); ++ } else if (PERCENT_OF(40, 256) < data->utilization_pp) { ++ /* do nothing */ ++ } else if (PERCENT_OF(0, 256) < data->utilization_pp) { ++ disable_one_core(); ++ } else { ++ /* do nothing */ ++ } ++} +diff --git a/drivers/gpu/arm/mali400/mali/platform/arm/arm_core_scaling.h b/drivers/gpu/arm/mali400/mali/platform/arm/arm_core_scaling.h new file mode 100644 -index 000000000..c8ae87eb8 +index 000000000..8e0101830 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h -@@ -0,0 +1,29 @@ ++++ b/drivers/gpu/arm/mali400/mali/platform/arm/arm_core_scaling.h +@@ -0,0 +1,44 @@ +/* -+ * -+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ * Copyright (C) 2013, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ ++ ++/** ++ * @file arm_core_scaling.h ++ * Example core scaling policy. + */ + ++#ifndef __ARM_CORE_SCALING_H__ ++#define __ARM_CORE_SCALING_H__ + ++struct mali_gpu_utilization_data; + -+/* -+ * Backend specific configuration ++/** ++ * Initialize core scaling policy. ++ * ++ * @note The core scaling policy will assume that all PP cores are on initially. ++ * ++ * @param num_pp_cores Total number of PP cores. + */ ++void mali_core_scaling_init(int num_pp_cores); + -+#ifndef _KBASE_BACKEND_CONFIG_H_ -+#define _KBASE_BACKEND_CONFIG_H_ ++/** ++ * Terminate core scaling policy. ++ */ ++void mali_core_scaling_term(void); + -+/* Enable GPU reset API */ -+#define KBASE_GPU_RESET_EN 1 ++/** ++ * Update core scaling policy with new utilization data. ++ * ++ * @param data Utilization data. ++ */ ++void mali_core_scaling_update(struct mali_gpu_utilization_data *data); + -+#endif /* _KBASE_BACKEND_CONFIG_H_ */ ++void mali_core_scaling_sync(int num_cores); + -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c ++#endif /* __ARM_CORE_SCALING_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/platform/arm/juno_opp.c b/drivers/gpu/arm/mali400/mali/platform/arm/juno_opp.c new file mode 100644 -index 000000000..fef9a2cb7 +index 000000000..e4e7ab8b2 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c -@@ -0,0 +1,29 @@ ++++ b/drivers/gpu/arm/mali400/mali/platform/arm/juno_opp.c +@@ -0,0 +1,127 @@ +/* -+ * -+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ * Copyright (C) 2010, 2012-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ ++ ++/** ++ * @file juno_opp.c ++ * Example: Set up opp table ++ * Using ARM64 juno specific SCPI_PROTOCOL get frequence inform ++ * Customer need implement your own platform releated logic ++ */ ++#ifdef CONFIG_ARCH_VEXPRESS ++#ifdef CONFIG_MALI_DEVFREQ ++#ifdef CONFIG_ARM64 ++#ifdef CONFIG_ARM_SCPI_PROTOCOL ++#include ++#include ++#include ++#include ++#include ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) ++#include ++#else /* Linux >= 3.13 */ ++/* In 3.13 the OPP include header file, types, and functions were all ++ * renamed. Use the old filename for the include, and define the new names to ++ * the old, when an old kernel is detected. + */ ++#include ++#define dev_pm_opp_add opp_add ++#define dev_pm_opp_remove opp_remove ++#endif /* Linux >= 3.13 */ + ++#include "mali_kernel_common.h" + ++static int init_juno_opps_from_scpi(struct device *dev) ++{ ++ struct scpi_dvfs_info *sinfo; ++ struct scpi_ops *sops; + -+#include "backend/gpu/mali_kbase_cache_policy_backend.h" -+#include ++ int i; + -+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, -+ u32 mode) ++ sops = get_scpi_ops(); ++ if (NULL == sops) { ++ MALI_DEBUG_PRINT(2, ("Mali didn't get any scpi ops \n")); ++ return -1; ++ } ++ ++ /* Hard coded for Juno. 2 is GPU domain */ ++ sinfo = sops->dvfs_get_info(2); ++ if (IS_ERR_OR_NULL(sinfo)) ++ return PTR_ERR(sinfo); ++ ++ for (i = 0; i < sinfo->count; i++) { ++ struct scpi_opp *e = &sinfo->opps[i]; ++ ++ MALI_DEBUG_PRINT(2, ("Mali OPP from SCPI: %u Hz @ %u mV\n", e->freq, e->m_volt)); ++ ++ dev_pm_opp_add(dev, e->freq, e->m_volt * 1000); ++ } ++ ++ return 0; ++} ++ ++int setup_opps(void) +{ -+ kbdev->current_gpu_coherency_mode = mode; ++ struct device_node *np; ++ struct platform_device *pdev; ++ int err; + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) -+ kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL); ++ np = of_find_node_by_name(NULL, "gpu"); ++ if (!np) { ++ pr_err("Failed to find DT entry for Mali\n"); ++ return -EFAULT; ++ } ++ ++ pdev = of_find_device_by_node(np); ++ if (!pdev) { ++ pr_err("Failed to find device for Mali\n"); ++ of_node_put(np); ++ return -EFAULT; ++ } ++ ++ err = init_juno_opps_from_scpi(&pdev->dev); ++ ++ of_node_put(np); ++ ++ return err; +} + -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h -new file mode 100644 -index 000000000..fe9869109 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h -@@ -0,0 +1,34 @@ -+/* -+ * -+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++int term_opps(struct device *dev) ++{ ++ struct scpi_dvfs_info *sinfo; ++ struct scpi_ops *sops; + ++ int i; + ++ sops = get_scpi_ops(); ++ if (NULL == sops) { ++ MALI_DEBUG_PRINT(2, ("Mali didn't get any scpi ops \n")); ++ return -1; ++ } + ++ /* Hard coded for Juno. 2 is GPU domain */ ++ sinfo = sops->dvfs_get_info(2); ++ if (IS_ERR_OR_NULL(sinfo)) ++ return PTR_ERR(sinfo); + -+#ifndef _KBASE_CACHE_POLICY_BACKEND_H_ -+#define _KBASE_CACHE_POLICY_BACKEND_H_ ++ for (i = 0; i < sinfo->count; i++) { ++ struct scpi_opp *e = &sinfo->opps[i]; + -+#include "mali_kbase.h" -+#include "mali_base_kernel.h" ++ MALI_DEBUG_PRINT(2, ("Mali Remove OPP: %u Hz \n", e->freq)); + -+/** -+ * kbase_cache_set_coherency_mode() - Sets the system coherency mode -+ * in the GPU. -+ * @kbdev: Device pointer -+ * @mode: Coherency mode. COHERENCY_ACE/ACE_LITE -+ */ -+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, -+ u32 mode); ++ dev_pm_opp_remove(dev, e->freq); ++ } + -+#endif /* _KBASE_CACHE_POLICY_H_ */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c ++ return 0; ++ ++} ++#endif ++#endif ++#endif ++#endif +diff --git a/drivers/gpu/arm/mali400/mali/platform/rk/custom_log.h b/drivers/gpu/arm/mali400/mali/platform/rk/custom_log.h new file mode 100644 -index 000000000..7851ea646 +index 000000000..fe5e12241 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c -@@ -0,0 +1,157 @@ -+/* ++++ b/drivers/gpu/arm/mali400/mali/platform/rk/custom_log.h +@@ -0,0 +1,209 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ---------------------------------------------------------------------------- ++ * File: custom_log.h + * -+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. ++ * Desc: ChenZhen å好的 log 输出的定制实现. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ * -------------------------------------------------------------------- ++ * < 习语 å’Œ 缩略语 > : + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * -------------------------------------------------------------------- ++ * Usage: ++ * ++ * Note: ++ * ++ * Author: ChenZhen ++ * ++ * ---------------------------------------------------------------------------- ++ * Version: ++ * v1.0 ++ * ---------------------------------------------------------------------------- ++ * Log: ++ ----Fri Nov 19 15:20:28 2010 v1.0 + * ++ * ---------------------------------------------------------------------------- + */ + ++#ifndef __CUSTOM_LOG_H__ ++#define __CUSTOM_LOG_H__ + ++#ifdef __cplusplus ++extern "C" { ++#endif + -+#include -+#include -+#include "mali_kbase_debug_job_fault.h" ++/* ----------------------------------------------------------------------------- ++ * Include Files ++ * ----------------------------------------------------------------------------- ++ */ ++#include ++#include + -+#ifdef CONFIG_DEBUG_FS ++/* ----------------------------------------------------------------------------- ++ * Macros Definition ++ * ----------------------------------------------------------------------------- ++ */ + -+/*GPU_CONTROL_REG(r)*/ -+static int gpu_control_reg_snapshot[] = { -+ GPU_ID, -+ SHADER_READY_LO, -+ SHADER_READY_HI, -+ TILER_READY_LO, -+ TILER_READY_HI, -+ L2_READY_LO, -+ L2_READY_HI -+}; ++/** 若下列 macro 有被定义, æ‰ ä½¿èƒ½ log 输出. */ ++/* #define ENABLE_DEBUG_LOG */ + -+/* JOB_CONTROL_REG(r) */ -+static int job_control_reg_snapshot[] = { -+ JOB_IRQ_MASK, -+ JOB_IRQ_STATUS -+}; ++/*----------------------------------------------------------------------------*/ + -+/* JOB_SLOT_REG(n,r) */ -+static int job_slot_reg_snapshot[] = { -+ JS_HEAD_LO, -+ JS_HEAD_HI, -+ JS_TAIL_LO, -+ JS_TAIL_HI, -+ JS_AFFINITY_LO, -+ JS_AFFINITY_HI, -+ JS_CONFIG, -+ JS_STATUS, -+ JS_HEAD_NEXT_LO, -+ JS_HEAD_NEXT_HI, -+ JS_AFFINITY_NEXT_LO, -+ JS_AFFINITY_NEXT_HI, -+ JS_CONFIG_NEXT -+}; ++#ifdef ENABLE_VERBOSE_LOG ++/** Verbose log. */ ++#define V(fmt, args...) \ ++ pr_debug("V : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ ++ "\n", \ ++ __FILE__, \ ++ __LINE__, \ ++ __func__, \ ++ ## args) ++#else ++#define V(...) ((void)0) ++#endif + -+/*MMU_REG(r)*/ -+static int mmu_reg_snapshot[] = { -+ MMU_IRQ_MASK, -+ MMU_IRQ_STATUS -+}; ++#ifdef ENABLE_DEBUG_LOG ++/** Debug log. */ ++#define D(fmt, args...) \ ++ pr_info("D : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ ++ "\n", \ ++ __FILE__, \ ++ __LINE__, \ ++ __func__, \ ++ ## args) ++#else ++#define D(...) ((void)0) ++#endif + -+/* MMU_AS_REG(n,r) */ -+static int as_reg_snapshot[] = { -+ AS_TRANSTAB_LO, -+ AS_TRANSTAB_HI, -+ AS_MEMATTR_LO, -+ AS_MEMATTR_HI, -+ AS_FAULTSTATUS, -+ AS_FAULTADDRESS_LO, -+ AS_FAULTADDRESS_HI, -+ AS_STATUS -+}; ++#define I(fmt, args...) \ ++ pr_info("I : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ ++ "\n", \ ++ __FILE__, \ ++ __LINE__, \ ++ __func__, \ ++ ## args) + -+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, -+ int reg_range) -+{ -+ int i, j; -+ int offset = 0; -+ int slot_number; -+ int as_number; ++#define W(fmt, args...) \ ++ pr_warn("W : [File] : %s; [Line] : %d; [Func] : %s(); " \ ++ fmt "\n", \ ++ __FILE__, \ ++ __LINE__, \ ++ __func__, \ ++ ## args) + -+ if (kctx->reg_dump == NULL) -+ return false; ++#define E(fmt, args...) \ ++ pr_err("E : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ ++ "\n", \ ++ __FILE__, \ ++ __LINE__, \ ++ __func__, \ ++ ## args) + -+ slot_number = kctx->kbdev->gpu_props.num_job_slots; -+ as_number = kctx->kbdev->gpu_props.num_address_spaces; ++/*-------------------------------------------------------*/ + -+ /* get the GPU control registers*/ -+ for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) { -+ kctx->reg_dump[offset] = -+ GPU_CONTROL_REG(gpu_control_reg_snapshot[i]); -+ offset += 2; -+ } ++/** 使用 D(), 以åè¿›åˆ¶çš„å½¢å¼æ‰“å°å˜é‡ 'var' çš„ value. */ ++#define D_DEC(var) D(#var " = %d.", var) + -+ /* get the Job control registers*/ -+ for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) { -+ kctx->reg_dump[offset] = -+ JOB_CONTROL_REG(job_control_reg_snapshot[i]); -+ offset += 2; -+ } ++#define E_DEC(var) E(#var " = %d.", var) + -+ /* get the Job Slot registers*/ -+ for (j = 0; j < slot_number; j++) { -+ for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) { -+ kctx->reg_dump[offset] = -+ JOB_SLOT_REG(j, job_slot_reg_snapshot[i]); -+ offset += 2; -+ } -+ } ++/** 使用 D(), 以åå…­è¿›åˆ¶çš„å½¢å¼æ‰“å°å˜é‡ 'var' çš„ value. */ ++#define D_HEX(var) D(#var " = 0x%x.", var) + -+ /* get the MMU registers*/ -+ for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) { -+ kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]); -+ offset += 2; -+ } ++#define E_HEX(var) E(#var " = 0x%x.", var) + -+ /* get the Address space registers*/ -+ for (j = 0; j < as_number; j++) { -+ for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) { -+ kctx->reg_dump[offset] = -+ MMU_AS_REG(j, as_reg_snapshot[i]); -+ offset += 2; -+ } -+ } ++/** ++ * 使用 D(), 以å六进制的形å¼, ++ * æ‰“å°æŒ‡é’ˆç±»åž‹å˜é‡ 'ptr' çš„ value. ++ */ ++#define D_PTR(ptr) D(#ptr " = %p.", ptr) + -+ WARN_ON(offset >= (reg_range*2/4)); ++#define E_PTR(ptr) E(#ptr " = %p.", ptr) + -+ /* set the termination flag*/ -+ kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG; -+ kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG; ++/** 使用 D(), æ‰“å° char 字串. */ ++#define D_STR(p_str) \ ++do { \ ++ if (!p_str) { \ ++ D(#p_str " = NULL."); \ ++ else \ ++ D(#p_str " = '%s'.", p_str); \ ++} while (0) + -+ dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n", -+ offset); ++#define E_STR(p_str) \ ++do { \ ++ if (!p_str) \ ++ E(#p_str " = NULL."); \ ++ else \ ++ E(#p_str " = '%s'.", p_str); \ ++} while (0) + -+ return true; -+} ++#ifdef ENABLE_DEBUG_LOG ++/** ++ * log 从 'p_start' 地å€å¼€å§‹çš„ 'len' 个字节的数æ®. ++ */ ++#define D_MEM(p_start, len) \ ++do { \ ++ int i = 0; \ ++ char *p = (char *)(p_start); \ ++ D("dump memory from addr of '" #p_start "', from %p, length %d' : ", \ ++ (p_start), \ ++ (len)); \ ++ pr_debug("\t\t"); \ ++ for (i = 0; i < (len); i++) \ ++ pr_debug("0x%02x, ", p[i]); \ ++ pr_debug("\n"); \ ++} while (0) ++#else ++#define D_MEM(...) ((void)0) ++#endif + -+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx) -+{ -+ int offset = 0; ++/*-------------------------------------------------------*/ + -+ if (kctx->reg_dump == NULL) -+ return false; ++/** ++ * 在特定æ¡ä»¶ä¸‹, 判定 error å‘生, ++ * å°†å˜é‡ 'ret_var' 设置 'err_code', ++ * log 输出对应的 Error Caution, ++ * ç„¶åŽè·³è½¬ 'label' 指定的代ç å¤„执行. ++ * @param msg ++ * 纯字串形å¼çš„æç¤ºä¿¡æ¯. ++ * @param ret_var ++ * æ ‡è¯†å‡½æ•°æ‰§è¡ŒçŠ¶æ€æˆ–者结果的å˜é‡, ++ * 将被设置具体的 Error Code. ++ * 通常是 'ret' or 'result'. ++ * @param err_code ++ * 表å¾ç‰¹å®š error 的常数标识, ++ * 通常是 å®çš„å½¢æ€. ++ * @param label ++ * 程åºå°†è¦è·³è½¬åˆ°çš„错误处ç†ä»£ç çš„æ ‡å·, ++ * 通常就是 'EXIT'. ++ * @param args... ++ * 对应 'msg_fmt' 实å‚中, ++ * '%s', '%d', ... 等转æ¢è¯´æ˜Žç¬¦çš„具体å¯å˜é•¿å®žå‚. ++ */ ++#define SET_ERROR_AND_JUMP(msg_fmt, ret_var, err_code, label, args...) \ ++do { \ ++ E("To set '" #ret_var "' to %d('" #err_code "'), because : " msg_fmt, \ ++ (err_code), \ ++ ## args); \ ++ (ret_var) = (err_code); \ ++ goto label; \ ++} while (0) + -+ while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) { -+ kctx->reg_dump[offset+1] = -+ kbase_reg_read(kctx->kbdev, -+ kctx->reg_dump[offset], NULL); -+ offset += 2; -+ } -+ return true; -+} ++/* ----------------------------------------------------------------------------- ++ * Types and Structures Definition ++ * ----------------------------------------------------------------------------- ++ */ ++ ++/* ----------------------------------------------------------------------------- ++ * Global Functions' Prototype ++ * ----------------------------------------------------------------------------- ++ */ + ++/* ----------------------------------------------------------------------------- ++ * Inline Functions Implementation ++ * ----------------------------------------------------------------------------- ++ */ + ++#ifdef __cplusplus ++} +#endif -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c ++ ++#endif /* __CUSTOM_LOG_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/platform/rk/rk.c b/drivers/gpu/arm/mali400/mali/platform/rk/rk.c new file mode 100644 -index 000000000..4e8e56e9b +index 000000000..c937e4678 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c -@@ -0,0 +1,474 @@ ++++ b/drivers/gpu/arm/mali400/mali/platform/rk/rk.c +@@ -0,0 +1,726 @@ +/* -+ * -+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT RockChip Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * + */ + ++/** ++ * @file rk.c ++ * implementation of platform_specific_code on rk platforms, such as rk3328h. ++ * ++ * mali_device_driver(MDD) includes 2 parts : ++ * .DP : platform_dependent_part : ++ * located in /mali/platform// ++ * .DP : common_part : ++ * common part implemented by ARM. ++ */ + +#define ENABLE_DEBUG_LOG -+#include "../../platform/rk/custom_log.h" -+ -+ -+#include -+#include -+#include -+#include ++#include "custom_log.h" + ++#include ++#include ++#include +#include +#include -+#include -+#ifdef CONFIG_DEVFREQ_THERMAL -+#include ++#include ++#ifdef CONFIG_PM ++#include +#endif -+ -+#include -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) -+#include -+#else /* Linux >= 3.13 */ -+/* In 3.13 the OPP include header file, types, and functions were all -+ * renamed. Use the old filename for the include, and define the new names to -+ * the old, when an old kernel is detected. -+ */ -+#include -+#define dev_pm_opp opp -+#define dev_pm_opp_get_voltage opp_get_voltage -+#define dev_pm_opp_get_opp_count opp_get_opp_count -+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil -+#define dev_pm_opp_find_freq_floor opp_find_freq_floor -+#endif /* Linux >= 3.13 */ ++#include ++#include ++#include ++#include ++#include ++#include +#include -+#include + -+static struct devfreq_simple_ondemand_data ondemand_data; ++#include ++#include "mali_kernel_common.h" ++#include "../../common/mali_osk_mali.h" + -+static struct monitor_dev_profile mali_mdevp = { -+ .type = MONITOR_TYPE_DEV, -+ .low_temp_adjust = rockchip_monitor_dev_low_temp_adjust, -+ .high_temp_adjust = rockchip_monitor_dev_high_temp_adjust, -+}; ++/*---------------------------------------------------------------------------*/ + -+/** -+ * opp_translate - Translate nominal OPP frequency from devicetree into real -+ * frequency and core mask -+ * @kbdev: Device pointer -+ * @freq: Nominal frequency -+ * @core_mask: Pointer to u64 to store core mask to -+ * -+ * Return: Real target frequency -+ * -+ * This function will only perform translation if an operating-points-v2-mali -+ * table is present in devicetree. If one is not present then it will return an -+ * untranslated frequency and all cores enabled. -+ */ -+static unsigned long opp_translate(struct kbase_device *kbdev, -+ unsigned long freq, u64 *core_mask) -+{ -+ int i; ++u32 mali_group_error; + -+ for (i = 0; i < kbdev->num_opps; i++) { -+ if (kbdev->opp_table[i].opp_freq == freq) { -+ *core_mask = kbdev->opp_table[i].core_mask; -+ return kbdev->opp_table[i].real_freq; -+ } -+ } ++/*---------------------------------------------------------------------------*/ + -+ /* Failed to find OPP - return all cores enabled & nominal frequency */ -+ *core_mask = kbdev->gpu_props.props.raw_props.shader_present; ++#define DEFAULT_UTILISATION_PERIOD_IN_MS (100) + -+ return freq; -+} ++/* ++ * rk_platform_context_of_mali_device. ++ */ ++struct rk_context { ++ /* mali device. */ ++ struct device *dev; ++ /* is the GPU powered on? */ ++ bool is_powered; ++ /* debug only, the period in ms to count gpu_utilisation. */ ++ unsigned int utilisation_period; ++}; + -+static int -+kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) -+{ -+ struct kbase_device *kbdev = dev_get_drvdata(dev); -+ struct dev_pm_opp *opp; -+ unsigned long nominal_freq; -+ unsigned long freq = 0; -+ unsigned long old_freq = kbdev->current_freq; -+ unsigned long voltage; -+ int err; -+ u64 core_mask; ++struct rk_context *s_rk_context; + -+ freq = *target_freq; ++/*---------------------------------------------------------------------------*/ + -+ opp = devfreq_recommended_opp(dev, &freq, flags); -+ if (IS_ERR(opp)) { -+ dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp)); -+ return PTR_ERR(opp); -+ } -+ voltage = dev_pm_opp_get_voltage(opp); ++#ifdef CONFIG_MALI_DEVFREQ ++static ssize_t utilisation_period_show(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct rk_context *platform = s_rk_context; ++ ssize_t ret = 0; + -+ nominal_freq = freq; ++ ret += snprintf(buf, PAGE_SIZE, "%u\n", platform->utilisation_period); + -+ /* -+ * Only update if there is a change of frequency -+ */ -+ if (kbdev->current_nominal_freq == nominal_freq) { -+ *target_freq = nominal_freq; -+#ifdef CONFIG_REGULATOR -+ if (kbdev->current_voltage == voltage) -+ return 0; -+ err = regulator_set_voltage(kbdev->regulator, voltage, INT_MAX); -+ if (err) { -+ dev_err(dev, "Failed to set voltage (%d)\n", err); -+ return err; -+ } -+ kbdev->current_voltage = voltage; -+#endif -+ return 0; -+ } ++ return ret; ++} + -+ freq = opp_translate(kbdev, nominal_freq, &core_mask); -+#ifdef CONFIG_REGULATOR -+ if (kbdev->regulator && kbdev->current_voltage != voltage && -+ old_freq < freq) { -+ err = regulator_set_voltage(kbdev->regulator, voltage, INT_MAX); -+ if (err) { -+ dev_err(dev, "Failed to increase voltage (%d)\n", err); -+ return err; -+ } -+ } -+#endif ++static ssize_t utilisation_period_store(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, ++ size_t count) ++{ ++ struct rk_context *platform = s_rk_context; ++ int ret = 0; + -+ err = clk_set_rate(kbdev->clock, freq); -+ if (err) { -+ dev_err(dev, "Failed to set clock %lu (target %lu)\n", -+ freq, *target_freq); -+ return err; -+ } -+ *target_freq = freq; -+ kbdev->current_freq = freq; -+ if (kbdev->devfreq) -+ kbdev->devfreq->last_status.current_frequency = freq; -+#ifdef CONFIG_REGULATOR -+ if (kbdev->regulator && kbdev->current_voltage != voltage && -+ old_freq > freq) { -+ err = regulator_set_voltage(kbdev->regulator, voltage, INT_MAX); -+ if (err) { -+ dev_err(dev, "Failed to decrease voltage (%d)\n", err); -+ return err; -+ } ++ ret = kstrtouint(buf, 0, &platform->utilisation_period); ++ if (ret) { ++ E("invalid input period : %s.", buf); ++ return ret; + } -+#endif -+ -+ if (kbdev->pm.backend.ca_current_policy->id == -+ KBASE_PM_CA_POLICY_ID_DEVFREQ) -+ kbase_devfreq_set_core_mask(kbdev, core_mask); -+ -+ *target_freq = nominal_freq; -+ kbdev->current_voltage = voltage; -+ kbdev->current_nominal_freq = nominal_freq; -+ kbdev->current_freq = freq; -+ kbdev->current_core_mask = core_mask; -+ -+ KBASE_TLSTREAM_AUX_DEVFREQ_TARGET((u64)nominal_freq); -+ -+ kbase_pm_reset_dvfs_utilisation(kbdev); ++ D("set utilisation_period to '%d'.", platform->utilisation_period); + -+ return err; ++ return count; +} + -+static int -+kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq) ++static ssize_t utilisation_show(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) +{ -+ struct kbase_device *kbdev = dev_get_drvdata(dev); ++ struct rk_context *platform = s_rk_context; ++ struct mali_device *mdev = dev_get_drvdata(dev); ++ ssize_t ret = 0; ++ unsigned long period_in_us = platform->utilisation_period * 1000; ++ unsigned long total_time; ++ unsigned long busy_time; ++ unsigned long utilisation; + -+ *freq = kbdev->current_nominal_freq; ++ mali_pm_reset_dvfs_utilisation(mdev); ++ usleep_range(period_in_us, period_in_us + 100); ++ mali_pm_get_dvfs_utilisation(mdev, &total_time, &busy_time); + -+ return 0; ++ /* 'devfreq_dev_profile' instance registered to devfreq ++ * also uses mali_pm_reset_dvfs_utilisation() ++ * and mali_pm_get_dvfs_utilisation(). ++ * So, it's better to disable GPU DVFS before reading this node. ++ */ ++ D("total_time : %lu, busy_time : %lu.", total_time, busy_time); ++ ++ utilisation = busy_time / (total_time / 100); ++ ret += snprintf(buf, PAGE_SIZE, "%lu\n", utilisation); ++ ++ return ret; +} + -+static int -+kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) ++static DEVICE_ATTR_RW(utilisation_period); ++static DEVICE_ATTR_RO(utilisation); ++#endif ++ ++static int rk_context_create_sysfs_files(struct device *dev) +{ -+ struct kbase_device *kbdev = dev_get_drvdata(dev); ++#ifdef CONFIG_MALI_DEVFREQ ++ int ret; + -+ stat->current_frequency = kbdev->current_nominal_freq; ++ ret = device_create_file(dev, &dev_attr_utilisation_period); ++ if (ret) { ++ E("fail to create sysfs file 'utilisation_period'."); ++ goto out; ++ } + -+ kbase_pm_get_dvfs_utilisation(kbdev, -+ &stat->total_time, &stat->busy_time); ++ ret = device_create_file(dev, &dev_attr_utilisation); ++ if (ret) { ++ E("fail to create sysfs file 'utilisation'."); ++ goto remove_utilisation_period; ++ } + -+ stat->private_data = NULL; ++ return 0; + ++remove_utilisation_period: ++ device_remove_file(dev, &dev_attr_utilisation_period); ++out: ++ return ret; ++#else + return 0; ++#endif +} + -+static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, -+ struct devfreq_dev_profile *dp) ++static void rk_context_remove_sysfs_files(struct device *dev) +{ -+ int count; -+ int i = 0; -+ unsigned long freq; -+ struct dev_pm_opp *opp; ++#ifdef CONFIG_MALI_DEVFREQ ++ device_remove_file(dev, &dev_attr_utilisation_period); ++ device_remove_file(dev, &dev_attr_utilisation); ++#endif ++} + -+ count = dev_pm_opp_get_opp_count(kbdev->dev); -+ if (count < 0) { -+ return count; ++/*---------------------------------------------------------------------------*/ ++ ++/* ++ * Init rk_platform_context of mali_device. ++ */ ++static int rk_context_init(struct platform_device *pdev) ++{ ++ int ret = 0; ++ struct device *dev = &pdev->dev; ++ struct rk_context *platform; /* platform_context */ ++ ++ platform = kzalloc(sizeof(*platform), GFP_KERNEL); ++ if (!platform) { ++ E("no mem."); ++ return _MALI_OSK_ERR_NOMEM; + } + -+ dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]), -+ GFP_KERNEL); -+ if (!dp->freq_table) -+ return -ENOMEM; ++ platform->dev = dev; ++ platform->is_powered = false; + -+ for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) { -+ opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq); -+ if (IS_ERR(opp)) -+ break; -+ dev_pm_opp_put(opp); ++ platform->utilisation_period = DEFAULT_UTILISATION_PERIOD_IN_MS; + -+ dp->freq_table[i] = freq; ++ ret = rk_context_create_sysfs_files(dev); ++ if (ret) { ++ E("fail to create sysfs files, ret = %d", ret); ++ goto EXIT; + } + -+ if (count != i) -+ dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n", -+ count, i); ++ s_rk_context = platform; + -+ dp->max_state = i; ++ pm_runtime_set_autosuspend_delay(dev, 1000); ++ pm_runtime_use_autosuspend(dev); ++ pm_runtime_enable(dev); + -+ return 0; ++EXIT: ++ return ret; +} + -+static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev) ++static void rk_context_deinit(struct platform_device *pdev) +{ -+ struct devfreq_dev_profile *dp = &kbdev->devfreq_profile; ++ struct device *dev = &pdev->dev; ++ struct rk_context *platform = s_rk_context; + -+ kfree(dp->freq_table); -+ dp->freq_table = NULL; -+} ++ pm_runtime_disable(dev); + -+static void kbase_devfreq_term_core_mask_table(struct kbase_device *kbdev) -+{ -+ kfree(kbdev->opp_table); -+ kbdev->opp_table = NULL; -+} ++ s_rk_context = NULL; + -+static void kbase_devfreq_exit(struct device *dev) -+{ -+ struct kbase_device *kbdev = dev_get_drvdata(dev); ++ rk_context_remove_sysfs_files(dev); + -+ if (kbdev) -+ kbase_devfreq_term_freq_table(kbdev); ++ if (platform) { ++ platform->is_powered = false; ++ platform->dev = NULL; ++ kfree(platform); ++ } +} + -+static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) -+{ -+ struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node, -+ "operating-points-v2", 0); -+ struct device_node *node; -+ int i = 0; -+ int count; -+ -+ if (!opp_node) -+ return 0; -+ if (!of_device_is_compatible(opp_node, "operating-points-v2-mali")) -+ return 0; -+ -+ count = dev_pm_opp_get_opp_count(kbdev->dev); -+ kbdev->opp_table = kmalloc_array(count, -+ sizeof(struct kbase_devfreq_opp), GFP_KERNEL); -+ if (!kbdev->opp_table) -+ return -ENOMEM; ++/*---------------------------------------------------------------------------*/ ++/* for devfreq cooling. */ + -+ for_each_available_child_of_node(opp_node, node) { -+ u64 core_mask; -+ u64 opp_freq, real_freq; -+ const void *core_count_p; ++#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) + -+ if (of_property_read_u64(node, "opp-hz", &opp_freq)) { -+ dev_warn(kbdev->dev, "OPP is missing required opp-hz property\n"); -+ continue; -+ } -+ if (of_property_read_u64(node, "opp-hz-real", &real_freq)) -+ real_freq = opp_freq; -+ if (of_property_read_u64(node, "opp-core-mask", &core_mask)) -+ core_mask = -+ kbdev->gpu_props.props.raw_props.shader_present; -+ core_count_p = of_get_property(node, "opp-core-count", NULL); -+ if (core_count_p) { -+ u64 remaining_core_mask = -+ kbdev->gpu_props.props.raw_props.shader_present; -+ int core_count = be32_to_cpup(core_count_p); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) + -+ core_mask = 0; ++#define FALLBACK_STATIC_TEMPERATURE 55000 + -+ for (; core_count > 0; core_count--) { -+ int core = ffs(remaining_core_mask); ++static u32 dynamic_coefficient; ++static u32 static_coefficient; ++static s32 ts[4]; ++static struct thermal_zone_device *gpu_tz; ++static struct ipa_power_model_data *model_data; + -+ if (!core) { -+ dev_err(kbdev->dev, "OPP has more cores than GPU\n"); -+ return -ENODEV; -+ } ++/* Calculate gpu static power example for reference */ ++static unsigned long rk_model_static_power(struct devfreq *devfreq, ++ unsigned long voltage) ++{ ++ int temperature, temp; ++ int temp_squared, temp_cubed, temp_scaling_factor; ++ const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10; ++ unsigned long static_power; + -+ core_mask |= (1ull << (core-1)); -+ remaining_core_mask &= ~(1ull << (core-1)); -+ } -+ } ++ if (gpu_tz) { ++ int ret; + -+ if (!core_mask) { -+ dev_err(kbdev->dev, "OPP has invalid core mask of 0\n"); -+ return -ENODEV; ++ ret = gpu_tz->ops->get_temp(gpu_tz, &temperature); ++ if (ret) { ++ MALI_DEBUG_PRINT(2, ("fail to read temp: %d\n", ret)); ++ temperature = FALLBACK_STATIC_TEMPERATURE; + } -+ -+ kbdev->opp_table[i].opp_freq = opp_freq; -+ kbdev->opp_table[i].real_freq = real_freq; -+ kbdev->opp_table[i].core_mask = core_mask; -+ -+ dev_info(kbdev->dev, "OPP %d : opp_freq=%llu real_freq=%llu core_mask=%llx\n", -+ i, opp_freq, real_freq, core_mask); -+ -+ i++; ++ } else { ++ temperature = FALLBACK_STATIC_TEMPERATURE; + } + -+ kbdev->num_opps = i; ++ /* Calculate the temperature scaling factor. To be applied to the ++ * voltage scaled power. ++ */ ++ temp = temperature / 1000; ++ temp_squared = temp * temp; ++ temp_cubed = temp_squared * temp; ++ temp_scaling_factor = ++ (ts[3] * temp_cubed) ++ + (ts[2] * temp_squared) ++ + (ts[1] * temp) ++ + ts[0]; + -+ return 0; ++ static_power = (((static_coefficient * voltage_cubed) >> 20) ++ * temp_scaling_factor) ++ / 1000000; ++ ++ return static_power; +} + -+int kbase_devfreq_init(struct kbase_device *kbdev) ++/* Calculate gpu dynamic power example for reference */ ++static unsigned long rk_model_dynamic_power(struct devfreq *devfreq, ++ unsigned long freq, ++ unsigned long voltage) +{ -+ struct device_node *np = kbdev->dev->of_node; -+ struct devfreq_dev_profile *dp; -+ struct dev_pm_opp *opp; -+ unsigned long opp_rate; -+ int err; -+ -+ if (!kbdev->clock) { -+ dev_err(kbdev->dev, "Clock not available for devfreq\n"); -+ return -ENODEV; -+ } ++ /* The inputs: freq (f) is in Hz, and voltage (v) in mV. ++ * The coefficient (c) is in mW/(MHz mV mV). ++ * ++ * This function calculates the dynamic power after this formula: ++ * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz) ++ */ ++ const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */ ++ const unsigned long f_mhz = freq / 1000000; /* MHz */ ++ unsigned long dynamic_power; + -+ kbdev->current_freq = clk_get_rate(kbdev->clock); -+ kbdev->current_nominal_freq = kbdev->current_freq; ++ dynamic_power = (dynamic_coefficient * v2 * f_mhz) / 1000000; /* mW */ + -+ dp = &kbdev->devfreq_profile; ++ return dynamic_power; ++} + -+ dp->initial_freq = kbdev->current_freq; -+ /* .KP : set devfreq_dvfs_interval_in_ms */ -+ dp->polling_ms = 20; -+ dp->target = kbase_devfreq_target; -+ dp->get_dev_status = kbase_devfreq_status; -+ dp->get_cur_freq = kbase_devfreq_cur_freq; -+ dp->exit = kbase_devfreq_exit; ++static struct devfreq_cooling_power rk_cooling_ops = { ++ .get_static_power = rk_model_static_power, ++ .get_dynamic_power = rk_model_dynamic_power, ++}; + -+ if (kbase_devfreq_init_freq_table(kbdev, dp)) -+ return -EFAULT; ++static unsigned long mali_devfreq_get_static_power(struct devfreq *devfreq, ++ unsigned long voltage) ++{ ++ return rockchip_ipa_get_static_power(model_data, voltage); ++} + -+ err = kbase_devfreq_init_core_mask_table(kbdev); -+ if (err) -+ goto init_core_mask_table_failed; ++static int power_model_simple_init(struct platform_device *pdev) ++{ ++ struct device_node *power_model_node; ++ const char *tz_name; ++ u32 static_power, dynamic_power; ++ u32 voltage, voltage_squared, voltage_cubed, frequency; + -+ of_property_read_u32(np, "upthreshold", -+ &ondemand_data.upthreshold); -+ of_property_read_u32(np, "downdifferential", -+ &ondemand_data.downdifferential); ++ if (of_find_compatible_node(pdev->dev.of_node, NULL, "simple-power-model")) { ++ of_property_read_u32(pdev->dev.of_node, ++ "dynamic-power-coefficient", ++ (u32 *)&rk_cooling_ops.dyn_power_coeff); ++ model_data = rockchip_ipa_power_model_init(&pdev->dev, ++ "gpu_leakage"); ++ if (IS_ERR_OR_NULL(model_data)) { ++ model_data = NULL; ++ dev_err(&pdev->dev, "failed to initialize power model\n"); ++ } else if (model_data->dynamic_coefficient) { ++ rk_cooling_ops.dyn_power_coeff = ++ model_data->dynamic_coefficient; ++ rk_cooling_ops.get_dynamic_power = NULL; ++ rk_cooling_ops.get_static_power = mali_devfreq_get_static_power; ++ } ++ if (!rk_cooling_ops.dyn_power_coeff) { ++ dev_err(&pdev->dev, "failed to get dynamic-coefficient\n"); ++ return -EINVAL; ++ } + -+ kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, -+ "simple_ondemand", &ondemand_data); -+ if (IS_ERR(kbdev->devfreq)) { -+ err = PTR_ERR(kbdev->devfreq); -+ kbdev->devfreq = NULL; -+ dev_err(kbdev->dev, "Fail to add devfreq device(%d)", err); -+ goto devfreq_add_dev_failed; ++ return 0; + } + -+ /* devfreq_add_device only copies a few of kbdev->dev's fields, so -+ * set drvdata explicitly so IPA models can access kbdev. */ -+ dev_set_drvdata(&kbdev->devfreq->dev, kbdev); ++ power_model_node = of_get_child_by_name(pdev->dev.of_node, ++ "power_model"); ++ if (!power_model_node) { ++ dev_err(&pdev->dev, "could not find power_model node\n"); ++ return -ENODEV; ++ } ++ if (!of_device_is_compatible(power_model_node, ++ "arm,mali-simple-power-model")) { ++ dev_err(&pdev->dev, "power_model incompatible with simple power model\n"); ++ return -ENODEV; ++ } + -+ err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq); -+ if (err) { -+ dev_err(kbdev->dev, -+ "Failed to register OPP notifier (%d)\n", err); -+ goto opp_notifier_failed; ++ if (of_property_read_string(power_model_node, "thermal-zone", ++ &tz_name)) { ++ dev_err(&pdev->dev, "ts in power_model not available\n"); ++ return -EINVAL; + } + -+ opp_rate = kbdev->current_freq; -+ opp = devfreq_recommended_opp(kbdev->dev, &opp_rate, 0); -+ if (!IS_ERR(opp)) -+ dev_pm_opp_put(opp); -+ kbdev->devfreq->last_status.current_frequency = opp_rate; ++ gpu_tz = thermal_zone_get_zone_by_name(tz_name); ++ if (IS_ERR(gpu_tz)) { ++ pr_warn_ratelimited("Error getting gpu thermal zone '%s'(%ld), not yet ready?\n", ++ tz_name, ++ PTR_ERR(gpu_tz)); ++ gpu_tz = NULL; ++ } + -+ mali_mdevp.data = kbdev->devfreq; -+ kbdev->mdev_info = rockchip_system_monitor_register(kbdev->dev, -+ &mali_mdevp); -+ if (IS_ERR(kbdev->mdev_info)) { -+ dev_dbg(kbdev->dev, "without system monitor\n"); -+ kbdev->mdev_info = NULL; ++ if (of_property_read_u32(power_model_node, "static-power", ++ &static_power)) { ++ dev_err(&pdev->dev, "static-power in power_model not available\n"); ++ return -EINVAL; + } -+#ifdef CONFIG_DEVFREQ_THERMAL -+ err = kbase_ipa_init(kbdev); -+ if (err) { -+ dev_err(kbdev->dev, "IPA initialization failed\n"); -+ goto cooling_failed; ++ if (of_property_read_u32(power_model_node, "dynamic-power", ++ &dynamic_power)) { ++ dev_err(&pdev->dev, "dynamic-power in power_model not available\n"); ++ return -EINVAL; ++ } ++ if (of_property_read_u32(power_model_node, "voltage", ++ &voltage)) { ++ dev_err(&pdev->dev, "voltage in power_model not available\n"); ++ return -EINVAL; + } ++ if (of_property_read_u32(power_model_node, "frequency", ++ &frequency)) { ++ dev_err(&pdev->dev, "frequency in power_model not available\n"); ++ return -EINVAL; ++ } ++ voltage_squared = (voltage * voltage) / 1000; ++ voltage_cubed = voltage * voltage * voltage; ++ static_coefficient = (static_power << 20) / (voltage_cubed >> 10); ++ dynamic_coefficient = (((dynamic_power * 1000) / voltage_squared) ++ * 1000) / frequency; + -+ kbdev->devfreq_cooling = of_devfreq_cooling_register_power( -+ kbdev->dev->of_node, -+ kbdev->devfreq, -+ &kbase_ipa_power_model_ops); -+ if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) { -+ err = PTR_ERR(kbdev->devfreq_cooling); -+ dev_err(kbdev->dev, -+ "Failed to register cooling device (%d)\n", -+ err); -+ goto cooling_failed; ++ if (of_property_read_u32_array(power_model_node, "ts", (u32 *)ts, 4)) { ++ dev_err(&pdev->dev, "ts in power_model not available\n"); ++ return -EINVAL; + } -+ I("success initing power_model_simple."); -+#endif + + return 0; ++} + -+#ifdef CONFIG_DEVFREQ_THERMAL -+cooling_failed: -+ devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); -+#endif /* CONFIG_DEVFREQ_THERMAL */ -+opp_notifier_failed: -+ if (devfreq_remove_device(kbdev->devfreq)) -+ dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); -+ else -+ kbdev->devfreq = NULL; ++#endif + -+devfreq_add_dev_failed: -+ kbase_devfreq_term_core_mask_table(kbdev); ++#endif + -+init_core_mask_table_failed: -+ kbase_devfreq_term_freq_table(kbdev); ++/*---------------------------------------------------------------------------*/ + -+ return err; -+} ++#ifdef CONFIG_PM + -+void kbase_devfreq_term(struct kbase_device *kbdev) ++static int rk_platform_enable_clk_gpu(struct device *dev) +{ -+ int err; ++ int ret = 0; ++#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_HAVE_CLK) ++ struct mali_device *mdev = dev_get_drvdata(dev); + -+ dev_dbg(kbdev->dev, "Term Mali devfreq\n"); ++ ret = clk_bulk_enable(mdev->num_clks, mdev->clks); ++#endif ++ return ret; ++} + -+ rockchip_system_monitor_unregister(kbdev->mdev_info); -+#ifdef CONFIG_DEVFREQ_THERMAL -+ if (kbdev->devfreq_cooling) -+ devfreq_cooling_unregister(kbdev->devfreq_cooling); ++static void rk_platform_disable_clk_gpu(struct device *dev) ++{ ++#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_HAVE_CLK) ++ struct mali_device *mdev = dev_get_drvdata(dev); + -+ kbase_ipa_term(kbdev); ++ clk_bulk_disable(mdev->num_clks, mdev->clks); +#endif ++} + -+ devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); ++static int rk_platform_enable_gpu_regulator(struct device *dev) ++{ ++ int ret = 0; ++#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_REGULATOR) ++ struct mali_device *mdev = dev_get_drvdata(dev); + -+ err = devfreq_remove_device(kbdev->devfreq); -+ if (err) -+ dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); -+ else -+ kbdev->devfreq = NULL; ++ if (mdev->regulator) ++ ret = regulator_enable(mdev->regulator); ++#endif ++ return ret; ++} + -+ kbase_devfreq_term_core_mask_table(kbdev); ++static void rk_platform_disable_gpu_regulator(struct device *dev) ++{ ++#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_REGULATOR) ++ struct mali_device *mdev = dev_get_drvdata(dev); ++ ++ if (mdev->regulator) ++ regulator_disable(mdev->regulator); ++#endif +} -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h -new file mode 100644 -index 000000000..c0bf8b15b ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h -@@ -0,0 +1,24 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ + ++static int rk_platform_power_on_gpu(struct device *dev) ++{ ++ struct rk_context *platform = s_rk_context; ++ int ret = 0; + ++ if (!(platform->is_powered)) { ++ ret = rk_platform_enable_clk_gpu(dev); ++ if (ret) { ++ E("fail to enable clk_gpu, ret : %d.", ret); ++ goto fail_to_enable_clk; ++ } + -+#ifndef _BASE_DEVFREQ_H_ -+#define _BASE_DEVFREQ_H_ ++ ret = rk_platform_enable_gpu_regulator(dev); ++ if (ret) { ++ E("fail to enable vdd_gpu, ret : %d.", ret); ++ goto fail_to_enable_regulator; ++ } + -+int kbase_devfreq_init(struct kbase_device *kbdev); -+void kbase_devfreq_term(struct kbase_device *kbdev); ++ if (cpu_is_rk3528()) { ++#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_HAVE_CLK) ++ struct mali_device *mdev = dev_get_drvdata(dev); + -+#endif /* _BASE_DEVFREQ_H_ */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c -new file mode 100644 -index 000000000..dcdf15cdc ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c -@@ -0,0 +1,255 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ clk_set_rate(mdev->clock, mdev->current_freq); ++#endif ++ } ++ platform->is_powered = true; ++ } + ++ return 0; + ++fail_to_enable_regulator: ++ rk_platform_disable_clk_gpu(dev); + ++fail_to_enable_clk: ++ return ret; ++} + -+/* -+ * -+ */ -+#include -+#include -+#include ++static void rk_platform_power_off_gpu(struct device *dev) ++{ ++ struct rk_context *platform = s_rk_context; + -+#include ++ if (platform->is_powered) { ++ if (cpu_is_rk3528()) { ++#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_HAVE_CLK) ++ struct mali_device *mdev = dev_get_drvdata(dev); + -+#if !defined(CONFIG_MALI_NO_MALI) ++ //use normal pll 200M for gpu when suspend ++ clk_set_rate(mdev->clock, 200000000); ++#endif ++ } ++ rk_platform_disable_clk_gpu(dev); ++ rk_platform_disable_gpu_regulator(dev); + ++ platform->is_powered = false; ++ } ++} + -+#ifdef CONFIG_DEBUG_FS ++int rk_platform_init_opp_table(struct mali_device *mdev) ++{ ++ return rockchip_init_opp_table(mdev->dev, &mdev->opp_info, ++ "clk_mali", "mali"); ++} + ++void rk_platform_uninit_opp_table(struct mali_device *mdev) ++{ ++ rockchip_uninit_opp_table(mdev->dev, &mdev->opp_info); ++} + -+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size) ++static int mali_runtime_suspend(struct device *device) +{ -+ struct kbase_io_access *old_buf; -+ struct kbase_io_access *new_buf; -+ unsigned long flags; ++ int ret = 0; + -+ if (!new_size) -+ goto out_err; /* The new size must not be 0 */ ++ MALI_DEBUG_PRINT(4, ("mali_runtime_suspend() called\n")); + -+ new_buf = vmalloc(new_size * sizeof(*h->buf)); -+ if (!new_buf) -+ goto out_err; ++ if (device->driver && ++ device->driver->pm && ++ device->driver->pm->runtime_suspend) { ++ /* Need to notify Mali driver about this event */ ++ ret = device->driver->pm->runtime_suspend(device); ++ } + -+ spin_lock_irqsave(&h->lock, flags); ++ if (!ret) ++ rk_platform_power_off_gpu(device); + -+ old_buf = h->buf; ++ return ret; ++} + -+ /* Note: we won't bother with copying the old data over. The dumping -+ * logic wouldn't work properly as it relies on 'count' both as a -+ * counter and as an index to the buffer which would have changed with -+ * the new array. This is a corner case that we don't need to support. -+ */ -+ h->count = 0; -+ h->size = new_size; -+ h->buf = new_buf; ++static int mali_runtime_resume(struct device *device) ++{ ++ int ret = 0; + -+ spin_unlock_irqrestore(&h->lock, flags); ++ MALI_DEBUG_PRINT(4, ("mali_runtime_resume() called\n")); + -+ vfree(old_buf); ++ rk_platform_power_on_gpu(device); + -+ return 0; ++ if (device->driver && ++ device->driver->pm && ++ device->driver->pm->runtime_resume) { ++ /* Need to notify Mali driver about this event */ ++ ret = device->driver->pm->runtime_resume(device); ++ } + -+out_err: -+ return -1; ++ return ret; +} + -+ -+int kbase_io_history_init(struct kbase_io_history *h, u16 n) ++static int mali_runtime_idle(struct device *device) +{ -+ h->enabled = false; -+ spin_lock_init(&h->lock); -+ h->count = 0; -+ h->size = 0; -+ h->buf = NULL; -+ if (kbase_io_history_resize(h, n)) -+ return -1; ++ int ret = 0; + -+ return 0; -+} ++ MALI_DEBUG_PRINT(4, ("mali_runtime_idle() called\n")); + ++ if (device->driver && ++ device->driver->pm && ++ device->driver->pm->runtime_idle) { ++ /* Need to notify Mali driver about this event */ ++ ret = device->driver->pm->runtime_idle(device); ++ if (ret) ++ return ret; ++ } + -+void kbase_io_history_term(struct kbase_io_history *h) -+{ -+ vfree(h->buf); -+ h->buf = NULL; ++ return 0; +} ++#endif + -+ -+/* kbase_io_history_add - add new entry to the register access history -+ * -+ * @h: Pointer to the history data structure -+ * @addr: Register address -+ * @value: The value that is either read from or written to the register -+ * @write: 1 if it's a register write, 0 if it's a read -+ */ -+static void kbase_io_history_add(struct kbase_io_history *h, -+ void __iomem const *addr, u32 value, u8 write) ++static int mali_os_suspend(struct device *device) +{ -+ struct kbase_io_access *io; -+ unsigned long flags; ++ int ret = 0; + -+ spin_lock_irqsave(&h->lock, flags); ++ MALI_DEBUG_PRINT(4, ("mali_os_suspend() called\n")); + -+ io = &h->buf[h->count % h->size]; -+ io->addr = (uintptr_t)addr | write; -+ io->value = value; -+ ++h->count; -+ /* If count overflows, move the index by the buffer size so the entire -+ * buffer will still be dumped later */ -+ if (unlikely(!h->count)) -+ h->count = h->size; ++ if (device->driver && ++ device->driver->pm && ++ device->driver->pm->suspend) { ++ /* Need to notify Mali driver about this event */ ++ ret = device->driver->pm->suspend(device); ++ } + -+ spin_unlock_irqrestore(&h->lock, flags); -+} ++ if (!ret) ++ rk_platform_power_off_gpu(device); + ++ return ret; ++} + -+void kbase_io_history_dump(struct kbase_device *kbdev) ++static int mali_os_resume(struct device *device) +{ -+ struct kbase_io_history *const h = &kbdev->io_history; -+ u16 i; -+ size_t iters; -+ unsigned long flags; -+ -+ if (!unlikely(h->enabled)) -+ return; ++ int ret = 0; + -+ spin_lock_irqsave(&h->lock, flags); ++ MALI_DEBUG_PRINT(4, ("mali_os_resume() called\n")); + -+ dev_err(kbdev->dev, "Register IO History:"); -+ iters = (h->size > h->count) ? h->count : h->size; -+ dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters, -+ h->count); -+ for (i = 0; i < iters; ++i) { -+ struct kbase_io_access *io = -+ &h->buf[(h->count - iters + i) % h->size]; -+ char const access = (io->addr & 1) ? 'w' : 'r'; ++ rk_platform_power_on_gpu(device); + -+ dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access, -+ (void *)(io->addr & ~0x1), io->value); ++ if (device->driver && ++ device->driver->pm && ++ device->driver->pm->resume) { ++ /* Need to notify Mali driver about this event */ ++ ret = device->driver->pm->resume(device); + } + -+ spin_unlock_irqrestore(&h->lock, flags); ++ return ret; +} + -+ -+#endif /* CONFIG_DEBUG_FS */ -+ -+ -+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, -+ struct kbase_context *kctx) ++static int mali_os_freeze(struct device *device) +{ -+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); -+ KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); -+ KBASE_DEBUG_ASSERT(kbdev->dev != NULL); ++ int ret = 0; + -+ writel(value, kbdev->reg + offset); ++ MALI_DEBUG_PRINT(4, ("mali_os_freeze() called\n")); + -+#ifdef CONFIG_DEBUG_FS -+ if (unlikely(kbdev->io_history.enabled)) -+ kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, -+ value, 1); -+#endif /* CONFIG_DEBUG_FS */ -+ dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value); ++ if (device->driver && ++ device->driver->pm && ++ device->driver->pm->freeze) { ++ /* Need to notify Mali driver about this event */ ++ ret = device->driver->pm->freeze(device); ++ } + -+ if (kctx && kctx->jctx.tb) -+ kbase_device_trace_register_access(kctx, REG_WRITE, offset, -+ value); ++ return ret; +} + -+KBASE_EXPORT_TEST_API(kbase_reg_write); -+ -+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, -+ struct kbase_context *kctx) ++static int mali_os_thaw(struct device *device) +{ -+ u32 val; -+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); -+ KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); -+ KBASE_DEBUG_ASSERT(kbdev->dev != NULL); ++ int ret = 0; + -+ val = readl(kbdev->reg + offset); ++ MALI_DEBUG_PRINT(4, ("mali_os_thaw() called\n")); + -+#ifdef CONFIG_DEBUG_FS -+ if (unlikely(kbdev->io_history.enabled)) -+ kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, -+ val, 0); -+#endif /* CONFIG_DEBUG_FS */ -+ dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val); ++ if (device->driver && ++ device->driver->pm && ++ device->driver->pm->thaw) { ++ /* Need to notify Mali driver about this event */ ++ ret = device->driver->pm->thaw(device); ++ } + -+ if (kctx && kctx->jctx.tb) -+ kbase_device_trace_register_access(kctx, REG_READ, offset, val); -+ return val; ++ return ret; +} + -+KBASE_EXPORT_TEST_API(kbase_reg_read); -+#endif /* !defined(CONFIG_MALI_NO_MALI) */ ++static const struct dev_pm_ops mali_gpu_device_type_pm_ops = { ++ .suspend = mali_os_suspend, ++ .resume = mali_os_resume, ++ .freeze = mali_os_freeze, ++ .thaw = mali_os_thaw, ++#ifdef CONFIG_PM ++ .runtime_suspend = mali_runtime_suspend, ++ .runtime_resume = mali_runtime_resume, ++ .runtime_idle = mali_runtime_idle, ++#endif ++}; + -+/** -+ * kbase_report_gpu_fault - Report a GPU fault. -+ * @kbdev: Kbase device pointer -+ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS -+ * was also set -+ * -+ * This function is called from the interrupt handler when a GPU fault occurs. -+ * It reports the details of the fault using dev_warn(). -+ */ -+static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple) -+{ -+ u32 status; -+ u64 address; ++static const struct device_type mali_gpu_device_device_type = { ++ .pm = &mali_gpu_device_type_pm_ops, ++}; + -+ status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL); -+ address = (u64) kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32; -+ address |= kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL); ++/* ++ * platform_specific_data of platform_device of mali_gpu. ++ */ ++static const struct mali_gpu_device_data mali_gpu_data = { ++ .shared_mem_size = 1024 * 1024 * 1024, /* 1GB */ ++ .max_job_runtime = 60000, /* 60 seconds */ ++#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++ .gpu_cooling_ops = &rk_cooling_ops, ++#endif ++#endif ++}; + -+ dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx", -+ status & 0xFF, -+ kbase_exception_name(kbdev, status), -+ address); -+ if (multiple) -+ dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n"); ++static void mali_platform_device_add_config(struct platform_device *pdev) ++{ ++ pdev->name = MALI_GPU_NAME_UTGARD, ++ pdev->id = 0; ++ pdev->dev.type = &mali_gpu_device_device_type; ++ pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask, ++ pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32); +} + -+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) ++/*---------------------------------------------------------------------------*/ ++/* platform_device_functions called by common_part. */ ++ ++int mali_platform_device_init(struct platform_device *pdev) +{ -+ KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val); -+ if (val & GPU_FAULT) -+ kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS); ++ int err = 0; + -+ if (val & RESET_COMPLETED) -+ kbase_pm_reset_done(kbdev); ++ mali_platform_device_add_config(pdev); + -+ if (val & PRFCNT_SAMPLE_COMPLETED) -+ kbase_instr_hwcnt_sample_done(kbdev); ++ D("to add platform_specific_data to platform_device_of_mali."); ++ err = platform_device_add_data(pdev, ++ &mali_gpu_data, ++ sizeof(mali_gpu_data)); ++ if (err) { ++ E("fail to add platform_specific_data. err : %d.", err); ++ return err; ++ } + -+ if (val & CLEAN_CACHES_COMPLETED) -+ kbase_clean_caches_done(kbdev); ++ err = rk_context_init(pdev); ++ if (err) { ++ E("fail to init rk_context. err : %d.", err); ++ return err; ++ } + -+ KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL); ++#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) ++ if (of_machine_is_compatible("rockchip,rk3036")) ++ return 0; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++ err = power_model_simple_init(pdev); ++ if (err) { ++ E("fail to init simple_power_model, err : %d.", err); ++ rk_context_deinit(pdev); ++ return err; ++ } ++#endif ++#endif ++ return 0; ++} + -+ /* kbase_pm_check_transitions must be called after the IRQ has been -+ * cleared. This is because it might trigger further power transitions -+ * and we don't want to miss the interrupt raised to notify us that -+ * these further transitions have finished. -+ */ -+ if (val & POWER_CHANGED_ALL) -+ kbase_pm_power_changed(kbdev); ++void mali_platform_device_deinit(struct platform_device *pdev) ++{ ++ MALI_DEBUG_PRINT(4, ("mali_platform_device_unregister() called\n")); + -+ KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val); ++ rk_context_deinit(pdev); +} -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h +diff --git a/drivers/gpu/arm/mali400/mali/platform/rk/rk_ext.h b/drivers/gpu/arm/mali400/mali/platform/rk/rk_ext.h new file mode 100644 -index 000000000..5b2044593 +index 000000000..bd939350c --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h -@@ -0,0 +1,67 @@ -+/* ++++ b/drivers/gpu/arm/mali400/mali/platform/rk/rk_ext.h +@@ -0,0 +1,37 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ---------------------------------------------------------------------------- ++ * File: rk_ext.h + * -+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. ++ * Desc: rk_ext_on_mali_ko 中的 通行定义等. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ * Usage: + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * Note: ++ * ++ * Author: ChenZhen ++ * ++ * Log: + * ++ * ---------------------------------------------------------------------------- + */ + ++#ifndef __RK_EXT_H__ ++#define __RK_EXT_H__ + ++#ifdef __cplusplus ++extern "C" { ++#endif + ++/*---------------------------------------------------------------------------*/ + ++/** version of rk_ext on mali_ko, aka. rk_ko_ver. */ ++#define RK_KO_VER (5) + -+/* -+ * Backend-specific HW access device APIs -+ */ ++/*---------------------------------------------------------------------------*/ + -+#ifndef _KBASE_DEVICE_INTERNAL_H_ -+#define _KBASE_DEVICE_INTERNAL_H_ ++#ifdef __cplusplus ++} ++#endif + -+/** -+ * kbase_reg_write - write to GPU register -+ * @kbdev: Kbase device pointer -+ * @offset: Offset of register -+ * @value: Value to write -+ * @kctx: Kbase context pointer. May be NULL -+ * -+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If -+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr -+ * != KBASEP_AS_NR_INVALID). -+ */ -+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, -+ struct kbase_context *kctx); ++#endif /* __RK_EXT_H__ */ + -+/** -+ * kbase_reg_read - read from GPU register -+ * @kbdev: Kbase device pointer -+ * @offset: Offset of register -+ * @kctx: Kbase context pointer. May be NULL -+ * -+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If -+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr -+ * != KBASEP_AS_NR_INVALID). -+ * -+ * Return: Value in desired register -+ */ -+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, -+ struct kbase_context *kctx); +diff --git a/drivers/gpu/arm/mali400/mali/readme.txt b/drivers/gpu/arm/mali400/mali/readme.txt +new file mode 100755 +index 000000000..6785ac933 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/readme.txt +@@ -0,0 +1,28 @@ ++Building the Mali Device Driver for Linux ++----------------------------------------- + ++Build the Mali Device Driver for Linux by running the following make command: + -+/** -+ * kbase_gpu_interrupt - GPU interrupt handler -+ * @kbdev: Kbase device pointer -+ * @val: The value of the GPU IRQ status register which triggered the call -+ * -+ * This function is called from the interrupt handler when a GPU irq is to be -+ * handled. -+ */ -+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val); ++KDIR= USING_UMP= BUILD= make + -+#endif /* _KBASE_DEVICE_INTERNAL_H_ */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c -new file mode 100644 -index 000000000..d578fd78e ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c -@@ -0,0 +1,123 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++where ++ kdir_path: Path to your Linux Kernel directory ++ ump_option: 1 = Enable UMP support(*) ++ 0 = disable UMP support ++ build_option: debug = debug build of driver ++ release = release build of driver + ++(*) For newer Linux Kernels, the Module.symvers file for the UMP device driver ++ must be available. The UMP_SYMVERS_FILE variable in the Makefile should ++ point to this file. This file is generated when the UMP driver is built. + ++The result will be a mali.ko file, which can be loaded into the Linux kernel ++by using the insmod command. + ++Use of UMP is not recommended. The dma-buf API in the Linux kernel has ++replaced UMP. The Mali Device Driver will be built with dma-buf support if the ++kernel config includes enabled dma-buf. + ++The kernel needs to be provided with a platform_device struct for the Mali GPU ++device. See the mali_utgard.h header file for how to set up the Mali GPU ++resources. +diff --git a/drivers/gpu/arm/mali400/mali/regs/mali_200_regs.h b/drivers/gpu/arm/mali400/mali/regs/mali_200_regs.h +new file mode 100644 +index 000000000..0345fb169 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/regs/mali_200_regs.h +@@ -0,0 +1,131 @@ +/* -+ * Register-based HW access backend APIs ++ * Copyright (C) 2010, 2012-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+int kbase_backend_early_init(struct kbase_device *kbdev) -+{ -+ int err; -+ -+ err = kbasep_platform_device_init(kbdev); -+ if (err) -+ return err; + -+ /* Ensure we can access the GPU registers */ -+ kbase_pm_register_access_enable(kbdev); ++#ifndef _MALI200_REGS_H_ ++#define _MALI200_REGS_H_ + -+ /* Find out GPU properties based on the GPU feature registers */ -+ kbase_gpuprops_set(kbdev); ++/** ++ * Enum for management register addresses. ++ */ ++enum mali200_mgmt_reg { ++ MALI200_REG_ADDR_MGMT_VERSION = 0x1000, ++ MALI200_REG_ADDR_MGMT_CURRENT_REND_LIST_ADDR = 0x1004, ++ MALI200_REG_ADDR_MGMT_STATUS = 0x1008, ++ MALI200_REG_ADDR_MGMT_CTRL_MGMT = 0x100c, + -+ /* We're done accessing the GPU registers for now. */ -+ kbase_pm_register_access_disable(kbdev); ++ MALI200_REG_ADDR_MGMT_INT_RAWSTAT = 0x1020, ++ MALI200_REG_ADDR_MGMT_INT_CLEAR = 0x1024, ++ MALI200_REG_ADDR_MGMT_INT_MASK = 0x1028, ++ MALI200_REG_ADDR_MGMT_INT_STATUS = 0x102c, + -+ err = kbase_hwaccess_pm_init(kbdev); -+ if (err) -+ goto fail_pm; ++ MALI200_REG_ADDR_MGMT_BUS_ERROR_STATUS = 0x1050, + -+ err = kbase_install_interrupts(kbdev); -+ if (err) -+ goto fail_interrupts; ++ MALI200_REG_ADDR_MGMT_PERF_CNT_0_ENABLE = 0x1080, ++ MALI200_REG_ADDR_MGMT_PERF_CNT_0_SRC = 0x1084, ++ MALI200_REG_ADDR_MGMT_PERF_CNT_0_LIMIT = 0x1088, ++ MALI200_REG_ADDR_MGMT_PERF_CNT_0_VALUE = 0x108c, + -+ return 0; ++ MALI200_REG_ADDR_MGMT_PERF_CNT_1_ENABLE = 0x10a0, ++ MALI200_REG_ADDR_MGMT_PERF_CNT_1_SRC = 0x10a4, ++ MALI200_REG_ADDR_MGMT_PERF_CNT_1_VALUE = 0x10ac, + -+fail_interrupts: -+ kbase_hwaccess_pm_term(kbdev); -+fail_pm: -+ kbasep_platform_device_term(kbdev); ++ MALI200_REG_ADDR_MGMT_PERFMON_CONTR = 0x10b0, ++ MALI200_REG_ADDR_MGMT_PERFMON_BASE = 0x10b4, + -+ return err; -+} ++ MALI200_REG_SIZEOF_REGISTER_BANK = 0x10f0 + -+void kbase_backend_early_term(struct kbase_device *kbdev) -+{ -+ kbase_release_interrupts(kbdev); -+ kbase_hwaccess_pm_term(kbdev); -+ kbasep_platform_device_term(kbdev); -+} ++}; + -+int kbase_backend_late_init(struct kbase_device *kbdev) -+{ -+ int err; ++#define MALI200_REG_VAL_PERF_CNT_ENABLE 1 + -+ err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT); -+ if (err) -+ return err; ++enum mali200_mgmt_ctrl_mgmt { ++ MALI200_REG_VAL_CTRL_MGMT_STOP_BUS = (1 << 0), ++ MALI200_REG_VAL_CTRL_MGMT_FLUSH_CACHES = (1 << 3), ++ MALI200_REG_VAL_CTRL_MGMT_FORCE_RESET = (1 << 5), ++ MALI200_REG_VAL_CTRL_MGMT_START_RENDERING = (1 << 6), ++ MALI400PP_REG_VAL_CTRL_MGMT_SOFT_RESET = (1 << 7), /* Only valid for Mali-300 and later */ ++}; + -+ err = kbase_backend_timer_init(kbdev); -+ if (err) -+ goto fail_timer; ++enum mali200_mgmt_irq { ++ MALI200_REG_VAL_IRQ_END_OF_FRAME = (1 << 0), ++ MALI200_REG_VAL_IRQ_END_OF_TILE = (1 << 1), ++ MALI200_REG_VAL_IRQ_HANG = (1 << 2), ++ MALI200_REG_VAL_IRQ_FORCE_HANG = (1 << 3), ++ MALI200_REG_VAL_IRQ_BUS_ERROR = (1 << 4), ++ MALI200_REG_VAL_IRQ_BUS_STOP = (1 << 5), ++ MALI200_REG_VAL_IRQ_CNT_0_LIMIT = (1 << 6), ++ MALI200_REG_VAL_IRQ_CNT_1_LIMIT = (1 << 7), ++ MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR = (1 << 8), ++ MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND = (1 << 9), ++ MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW = (1 << 10), ++ MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW = (1 << 11), ++ MALI400PP_REG_VAL_IRQ_RESET_COMPLETED = (1 << 12), ++}; + -+#ifdef CONFIG_MALI_DEBUG -+#ifndef CONFIG_MALI_NO_MALI -+ if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { -+ dev_err(kbdev->dev, "Interrupt assigment check failed.\n"); -+ err = -EINVAL; -+ goto fail_interrupt_test; -+ } -+#endif /* !CONFIG_MALI_NO_MALI */ -+#endif /* CONFIG_MALI_DEBUG */ ++#define MALI200_REG_VAL_IRQ_MASK_ALL ((enum mali200_mgmt_irq) (\ ++ MALI200_REG_VAL_IRQ_END_OF_FRAME |\ ++ MALI200_REG_VAL_IRQ_END_OF_TILE |\ ++ MALI200_REG_VAL_IRQ_HANG |\ ++ MALI200_REG_VAL_IRQ_FORCE_HANG |\ ++ MALI200_REG_VAL_IRQ_BUS_ERROR |\ ++ MALI200_REG_VAL_IRQ_BUS_STOP |\ ++ MALI200_REG_VAL_IRQ_CNT_0_LIMIT |\ ++ MALI200_REG_VAL_IRQ_CNT_1_LIMIT |\ ++ MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR |\ ++ MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND |\ ++ MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW |\ ++ MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW |\ ++ MALI400PP_REG_VAL_IRQ_RESET_COMPLETED)) + -+ err = kbase_job_slot_init(kbdev); -+ if (err) -+ goto fail_job_slot; ++#define MALI200_REG_VAL_IRQ_MASK_USED ((enum mali200_mgmt_irq) (\ ++ MALI200_REG_VAL_IRQ_END_OF_FRAME |\ ++ MALI200_REG_VAL_IRQ_FORCE_HANG |\ ++ MALI200_REG_VAL_IRQ_BUS_ERROR |\ ++ MALI200_REG_VAL_IRQ_WRITE_BOUNDARY_ERROR |\ ++ MALI400PP_REG_VAL_IRQ_INVALID_PLIST_COMMAND |\ ++ MALI400PP_REG_VAL_IRQ_CALL_STACK_UNDERFLOW |\ ++ MALI400PP_REG_VAL_IRQ_CALL_STACK_OVERFLOW)) + -+ init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); ++#define MALI200_REG_VAL_IRQ_MASK_NONE ((enum mali200_mgmt_irq)(0)) + -+ return 0; ++enum mali200_mgmt_status { ++ MALI200_REG_VAL_STATUS_RENDERING_ACTIVE = (1 << 0), ++ MALI200_REG_VAL_STATUS_BUS_STOPPED = (1 << 4), ++}; + -+fail_job_slot: ++enum mali200_render_unit { ++ MALI200_REG_ADDR_FRAME = 0x0000, ++ MALI200_REG_ADDR_RSW = 0x0004, ++ MALI200_REG_ADDR_STACK = 0x0030, ++ MALI200_REG_ADDR_STACK_SIZE = 0x0034, ++ MALI200_REG_ADDR_ORIGIN_OFFSET_X = 0x0040 ++}; + -+#ifdef CONFIG_MALI_DEBUG -+#ifndef CONFIG_MALI_NO_MALI -+fail_interrupt_test: -+#endif /* !CONFIG_MALI_NO_MALI */ -+#endif /* CONFIG_MALI_DEBUG */ ++enum mali200_wb_unit { ++ MALI200_REG_ADDR_WB0 = 0x0100, ++ MALI200_REG_ADDR_WB1 = 0x0200, ++ MALI200_REG_ADDR_WB2 = 0x0300 ++}; + -+ kbase_backend_timer_term(kbdev); -+fail_timer: -+ kbase_hwaccess_pm_halt(kbdev); ++enum mali200_wb_unit_regs { ++ MALI200_REG_ADDR_WB_SOURCE_SELECT = 0x0000, ++ MALI200_REG_ADDR_WB_SOURCE_ADDR = 0x0004, ++}; + -+ return err; -+} ++/* This should be in the top 16 bit of the version register of Mali PP */ ++#define MALI200_PP_PRODUCT_ID 0xC807 ++#define MALI300_PP_PRODUCT_ID 0xCE07 ++#define MALI400_PP_PRODUCT_ID 0xCD07 ++#define MALI450_PP_PRODUCT_ID 0xCF07 ++#define MALI470_PP_PRODUCT_ID 0xCF08 + -+void kbase_backend_late_term(struct kbase_device *kbdev) -+{ -+ kbase_job_slot_halt(kbdev); -+ kbase_job_slot_term(kbdev); -+ kbase_backend_timer_term(kbdev); -+ kbase_hwaccess_pm_halt(kbdev); -+} + -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c ++ ++#endif /* _MALI200_REGS_H_ */ +diff --git a/drivers/gpu/arm/mali400/mali/regs/mali_gp_regs.h b/drivers/gpu/arm/mali400/mali/regs/mali_gp_regs.h new file mode 100644 -index 000000000..b395325b5 +index 000000000..7f8b58fd6 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c -@@ -0,0 +1,110 @@ ++++ b/drivers/gpu/arm/mali400/mali/regs/mali_gp_regs.h +@@ -0,0 +1,172 @@ +/* -+ * -+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ * Copyright (C) 2010, 2012-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + ++#ifndef _MALIGP2_CONROL_REGS_H_ ++#define _MALIGP2_CONROL_REGS_H_ + ++/** ++ * These are the different geometry processor control registers. ++ * Their usage is to control and monitor the operation of the ++ * Vertex Shader and the Polygon List Builder in the geometry processor. ++ * Addresses are in 32-bit word relative sizes. ++ * @see [P0081] "Geometry Processor Data Structures" for details ++ */ + ++typedef enum { ++ MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR = 0x00, ++ MALIGP2_REG_ADDR_MGMT_VSCL_END_ADDR = 0x04, ++ MALIGP2_REG_ADDR_MGMT_PLBUCL_START_ADDR = 0x08, ++ MALIGP2_REG_ADDR_MGMT_PLBUCL_END_ADDR = 0x0c, ++ MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_START_ADDR = 0x10, ++ MALIGP2_REG_ADDR_MGMT_PLBU_ALLOC_END_ADDR = 0x14, ++ MALIGP2_REG_ADDR_MGMT_CMD = 0x20, ++ MALIGP2_REG_ADDR_MGMT_INT_RAWSTAT = 0x24, ++ MALIGP2_REG_ADDR_MGMT_INT_CLEAR = 0x28, ++ MALIGP2_REG_ADDR_MGMT_INT_MASK = 0x2C, ++ MALIGP2_REG_ADDR_MGMT_INT_STAT = 0x30, ++ MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_ENABLE = 0x3C, ++ MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_ENABLE = 0x40, ++ MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC = 0x44, ++ MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_SRC = 0x48, ++ MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_VALUE = 0x4C, ++ MALIGP2_REG_ADDR_MGMT_PERF_CNT_1_VALUE = 0x50, ++ MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_LIMIT = 0x54, ++ MALIGP2_REG_ADDR_MGMT_STATUS = 0x68, ++ MALIGP2_REG_ADDR_MGMT_VERSION = 0x6C, ++ MALIGP2_REG_ADDR_MGMT_VSCL_START_ADDR_READ = 0x80, ++ MALIGP2_REG_ADDR_MGMT_PLBCL_START_ADDR_READ = 0x84, ++ MALIGP2_CONTR_AXI_BUS_ERROR_STAT = 0x94, ++ MALIGP2_REGISTER_ADDRESS_SPACE_SIZE = 0x98, ++} maligp_reg_addr_mgmt_addr; + ++#define MALIGP2_REG_VAL_PERF_CNT_ENABLE 1 + -+/* -+ * Base kernel property query backend APIs ++/** ++ * Commands to geometry processor. ++ * @see MALIGP2_CTRL_REG_CMD + */ ++typedef enum { ++ MALIGP2_REG_VAL_CMD_START_VS = (1 << 0), ++ MALIGP2_REG_VAL_CMD_START_PLBU = (1 << 1), ++ MALIGP2_REG_VAL_CMD_UPDATE_PLBU_ALLOC = (1 << 4), ++ MALIGP2_REG_VAL_CMD_RESET = (1 << 5), ++ MALIGP2_REG_VAL_CMD_FORCE_HANG = (1 << 6), ++ MALIGP2_REG_VAL_CMD_STOP_BUS = (1 << 9), ++ MALI400GP_REG_VAL_CMD_SOFT_RESET = (1 << 10), /* only valid for Mali-300 and later */ ++} mgp_contr_reg_val_cmd; + -+#include -+#include -+#include -+#include -+ -+void kbase_backend_gpuprops_get(struct kbase_device *kbdev, -+ struct kbase_gpuprops_regdump *regdump) -+{ -+ int i; + -+ /* Fill regdump with the content of the relevant registers */ -+ regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL); ++/** @defgroup MALIGP2_IRQ ++ * Interrupt status of geometry processor. ++ * @see MALIGP2_CTRL_REG_INT_RAWSTAT, MALIGP2_REG_ADDR_MGMT_INT_CLEAR, ++ * MALIGP2_REG_ADDR_MGMT_INT_MASK, MALIGP2_REG_ADDR_MGMT_INT_STAT ++ * @{ ++ */ ++#define MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST (1 << 0) ++#define MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST (1 << 1) ++#define MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM (1 << 2) ++#define MALIGP2_REG_VAL_IRQ_VS_SEM_IRQ (1 << 3) ++#define MALIGP2_REG_VAL_IRQ_PLBU_SEM_IRQ (1 << 4) ++#define MALIGP2_REG_VAL_IRQ_HANG (1 << 5) ++#define MALIGP2_REG_VAL_IRQ_FORCE_HANG (1 << 6) ++#define MALIGP2_REG_VAL_IRQ_PERF_CNT_0_LIMIT (1 << 7) ++#define MALIGP2_REG_VAL_IRQ_PERF_CNT_1_LIMIT (1 << 8) ++#define MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR (1 << 9) ++#define MALIGP2_REG_VAL_IRQ_SYNC_ERROR (1 << 10) ++#define MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR (1 << 11) ++#define MALI400GP_REG_VAL_IRQ_AXI_BUS_STOPPED (1 << 12) ++#define MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD (1 << 13) ++#define MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD (1 << 14) ++#define MALI400GP_REG_VAL_IRQ_RESET_COMPLETED (1 << 19) ++#define MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW (1 << 20) ++#define MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW (1 << 21) ++#define MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS (1 << 22) + -+ regdump->l2_features = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(L2_FEATURES), NULL); -+ regdump->suspend_size = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(SUSPEND_SIZE), NULL); -+ regdump->tiler_features = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(TILER_FEATURES), NULL); -+ regdump->mem_features = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(MEM_FEATURES), NULL); -+ regdump->mmu_features = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(MMU_FEATURES), NULL); -+ regdump->as_present = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(AS_PRESENT), NULL); -+ regdump->js_present = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(JS_PRESENT), NULL); ++/* Mask defining all IRQs in Mali GP */ ++#define MALIGP2_REG_VAL_IRQ_MASK_ALL \ ++ (\ ++ MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST | \ ++ MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST | \ ++ MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM | \ ++ MALIGP2_REG_VAL_IRQ_VS_SEM_IRQ | \ ++ MALIGP2_REG_VAL_IRQ_PLBU_SEM_IRQ | \ ++ MALIGP2_REG_VAL_IRQ_HANG | \ ++ MALIGP2_REG_VAL_IRQ_FORCE_HANG | \ ++ MALIGP2_REG_VAL_IRQ_PERF_CNT_0_LIMIT | \ ++ MALIGP2_REG_VAL_IRQ_PERF_CNT_1_LIMIT | \ ++ MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR | \ ++ MALIGP2_REG_VAL_IRQ_SYNC_ERROR | \ ++ MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR | \ ++ MALI400GP_REG_VAL_IRQ_AXI_BUS_STOPPED | \ ++ MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD | \ ++ MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD | \ ++ MALI400GP_REG_VAL_IRQ_RESET_COMPLETED | \ ++ MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW | \ ++ MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW | \ ++ MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS) + -+ for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) -+ regdump->js_features[i] = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL); ++/* Mask defining the IRQs in Mali GP which we use */ ++#define MALIGP2_REG_VAL_IRQ_MASK_USED \ ++ (\ ++ MALIGP2_REG_VAL_IRQ_VS_END_CMD_LST | \ ++ MALIGP2_REG_VAL_IRQ_PLBU_END_CMD_LST | \ ++ MALIGP2_REG_VAL_IRQ_PLBU_OUT_OF_MEM | \ ++ MALIGP2_REG_VAL_IRQ_FORCE_HANG | \ ++ MALIGP2_REG_VAL_IRQ_WRITE_BOUND_ERR | \ ++ MALIGP2_REG_VAL_IRQ_SYNC_ERROR | \ ++ MALIGP2_REG_VAL_IRQ_AXI_BUS_ERROR | \ ++ MALI400GP_REG_VAL_IRQ_VS_INVALID_CMD | \ ++ MALI400GP_REG_VAL_IRQ_PLB_INVALID_CMD | \ ++ MALI400GP_REG_VAL_IRQ_SEMAPHORE_UNDERFLOW | \ ++ MALI400GP_REG_VAL_IRQ_SEMAPHORE_OVERFLOW | \ ++ MALI400GP_REG_VAL_IRQ_PTR_ARRAY_OUT_OF_BOUNDS) + -+ for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) -+ regdump->texture_features[i] = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL); ++/* Mask defining non IRQs on MaliGP2*/ ++#define MALIGP2_REG_VAL_IRQ_MASK_NONE 0 + -+ regdump->thread_max_threads = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL); -+ regdump->thread_max_workgroup_size = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE), -+ NULL); -+ regdump->thread_max_barrier_size = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL); -+ regdump->thread_features = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(THREAD_FEATURES), NULL); ++/** }@ defgroup MALIGP2_IRQ*/ + -+ regdump->shader_present_lo = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL); -+ regdump->shader_present_hi = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL); ++/** @defgroup MALIGP2_STATUS ++ * The different Status values to the geometry processor. ++ * @see MALIGP2_CTRL_REG_STATUS ++ * @{ ++ */ ++#define MALIGP2_REG_VAL_STATUS_VS_ACTIVE 0x0002 ++#define MALIGP2_REG_VAL_STATUS_BUS_STOPPED 0x0004 ++#define MALIGP2_REG_VAL_STATUS_PLBU_ACTIVE 0x0008 ++#define MALIGP2_REG_VAL_STATUS_BUS_ERROR 0x0040 ++#define MALIGP2_REG_VAL_STATUS_WRITE_BOUND_ERR 0x0100 ++/** }@ defgroup MALIGP2_STATUS*/ + -+ regdump->tiler_present_lo = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(TILER_PRESENT_LO), NULL); -+ regdump->tiler_present_hi = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(TILER_PRESENT_HI), NULL); ++#define MALIGP2_REG_VAL_STATUS_MASK_ACTIVE (\ ++ MALIGP2_REG_VAL_STATUS_VS_ACTIVE|\ ++ MALIGP2_REG_VAL_STATUS_PLBU_ACTIVE) + -+ regdump->l2_present_lo = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(L2_PRESENT_LO), NULL); -+ regdump->l2_present_hi = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(L2_PRESENT_HI), NULL); + -+ regdump->stack_present_lo = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(STACK_PRESENT_LO), NULL); -+ regdump->stack_present_hi = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(STACK_PRESENT_HI), NULL); -+} ++#define MALIGP2_REG_VAL_STATUS_MASK_ERROR (\ ++ MALIGP2_REG_VAL_STATUS_BUS_ERROR |\ ++ MALIGP2_REG_VAL_STATUS_WRITE_BOUND_ERR ) + -+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, -+ struct kbase_gpuprops_regdump *regdump) -+{ -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) { -+ /* Ensure we can access the GPU registers */ -+ kbase_pm_register_access_enable(kbdev); ++/* This should be in the top 16 bit of the version register of gp.*/ ++#define MALI200_GP_PRODUCT_ID 0xA07 ++#define MALI300_GP_PRODUCT_ID 0xC07 ++#define MALI400_GP_PRODUCT_ID 0xB07 ++#define MALI450_GP_PRODUCT_ID 0xD07 + -+ regdump->coherency_features = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(COHERENCY_FEATURES), NULL); ++/** ++ * The different sources for instrumented on the geometry processor. ++ * @see MALIGP2_REG_ADDR_MGMT_PERF_CNT_0_SRC ++ */ + -+ /* We're done accessing the GPU registers for now. */ -+ kbase_pm_register_access_disable(kbdev); -+ } else { -+ /* Pre COHERENCY_FEATURES we only supported ACE_LITE */ -+ regdump->coherency_features = -+ COHERENCY_FEATURE_BIT(COHERENCY_NONE) | -+ COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); -+ } -+} ++enum MALIGP2_cont_reg_perf_cnt_src { ++ MALIGP2_REG_VAL_PERF_CNT1_SRC_NUMBER_OF_VERTICES_PROCESSED = 0x0a, ++}; + -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c ++#endif +diff --git a/drivers/gpu/arm/mali400/mali/timestamp-arm11-cc/mali_timestamp.c b/drivers/gpu/arm/mali400/mali/timestamp-arm11-cc/mali_timestamp.c new file mode 100644 -index 000000000..7ad309e8d +index 000000000..7df934c12 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c -@@ -0,0 +1,492 @@ ++++ b/drivers/gpu/arm/mali400/mali/timestamp-arm11-cc/mali_timestamp.c +@@ -0,0 +1,13 @@ +/* -+ * -+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ * Copyright (C) 2010-2011, 2013, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + ++#include "mali_timestamp.h" + -+ -+ -+ ++/* This file is intentionally left empty, as all functions are inlined in mali_profiling_sampler.h */ +diff --git a/drivers/gpu/arm/mali400/mali/timestamp-arm11-cc/mali_timestamp.h b/drivers/gpu/arm/mali400/mali/timestamp-arm11-cc/mali_timestamp.h +new file mode 100644 +index 000000000..f52097c19 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/timestamp-arm11-cc/mali_timestamp.h +@@ -0,0 +1,48 @@ +/* -+ * GPU backend instrumentation APIs. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/** -+ * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to -+ * hardware -+ * -+ * @kbdev: Kbase device ++ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ -+static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev) -+{ -+ unsigned long flags; -+ unsigned long pm_flags; -+ u32 irq_mask; -+ -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); -+ KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == -+ KBASE_INSTR_STATE_REQUEST_CLEAN); -+ -+ /* Enable interrupt */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); -+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), -+ irq_mask | CLEAN_CACHES_COMPLETED, NULL); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + -+ /* clean&invalidate the caches so we're sure the mmu tables for the dump -+ * buffer is valid */ -+ KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_CLEAN_INV_CACHES, NULL); -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING; ++#ifndef __MALI_TIMESTAMP_H__ ++#define __MALI_TIMESTAMP_H__ + -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+} ++#include "mali_osk.h" + -+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ struct kbase_uk_hwcnt_setup *setup) ++MALI_STATIC_INLINE _mali_osk_errcode_t _mali_timestamp_reset(void) +{ -+ unsigned long flags, pm_flags; -+ int err = -EINVAL; -+ u32 irq_mask; -+ int ret; -+ u64 shader_cores_needed; -+ u32 prfcnt_config; -+ -+ shader_cores_needed = kbase_pm_get_present_cores(kbdev, -+ KBASE_PM_CORE_SHADER); -+ -+ /* alignment failure */ -+ if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1))) -+ goto out_err; -+ -+ /* Override core availability policy to ensure all cores are available -+ */ -+ kbase_pm_ca_instr_enable(kbdev); -+ -+ /* Request the cores early on synchronously - we'll release them on any -+ * errors (e.g. instrumentation already active) */ -+ kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed); -+ -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); -+ -+ if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { -+ /* Instrumentation is already enabled */ -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ goto out_unrequest_cores; -+ } -+ -+ /* Enable interrupt */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); -+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | -+ PRFCNT_SAMPLE_COMPLETED, NULL); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); -+ -+ /* In use, this context is the owner */ -+ kbdev->hwcnt.kctx = kctx; -+ /* Remember the dump address so we can reprogram it later */ -+ kbdev->hwcnt.addr = setup->dump_buffer; -+ -+ /* Request the clean */ -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; -+ kbdev->hwcnt.backend.triggered = 0; -+ /* Clean&invalidate the caches so we're sure the mmu tables for the dump -+ * buffer is valid */ -+ ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq, -+ &kbdev->hwcnt.backend.cache_clean_work); -+ KBASE_DEBUG_ASSERT(ret); -+ -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ -+ /* Wait for cacheclean to complete */ -+ wait_event(kbdev->hwcnt.backend.wait, -+ kbdev->hwcnt.backend.triggered != 0); -+ -+ KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == -+ KBASE_INSTR_STATE_IDLE); -+ -+ kbase_pm_request_l2_caches(kbdev); -+ -+ /* Configure */ -+ prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; -+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY -+ { -+ u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; -+ u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) -+ >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; -+ int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id); -+ -+ if (arch_v6) -+ prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; -+ } -+#endif -+ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), -+ prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx); -+ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), -+ setup->dump_buffer & 0xFFFFFFFF, kctx); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), -+ setup->dump_buffer >> 32, kctx); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), -+ setup->jm_bm, kctx); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), -+ setup->shader_bm, kctx); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), -+ setup->mmu_l2_bm, kctx); -+ /* Due to PRLAM-8186 we need to disable the Tiler before we enable the -+ * HW counter dump. */ -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0, -+ kctx); -+ else -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), -+ setup->tiler_bm, kctx); -+ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), -+ prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx); -+ -+ /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump ++ /* ++ * reset counters and overflow flags + */ -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), -+ setup->tiler_bm, kctx); -+ -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); -+ -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; -+ kbdev->hwcnt.backend.triggered = 1; -+ wake_up(&kbdev->hwcnt.backend.wait); + -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ u32 mask = (1 << 0) | /* enable all three counters */ ++ (0 << 1) | /* reset both Count Registers to 0x0 */ ++ (1 << 2) | /* reset the Cycle Counter Register to 0x0 */ ++ (0 << 3) | /* 1 = Cycle Counter Register counts every 64th processor clock cycle */ ++ (0 << 4) | /* Count Register 0 interrupt enable */ ++ (0 << 5) | /* Count Register 1 interrupt enable */ ++ (0 << 6) | /* Cycle Counter interrupt enable */ ++ (0 << 8) | /* Count Register 0 overflow flag (clear or write, flag on read) */ ++ (0 << 9) | /* Count Register 1 overflow flag (clear or write, flag on read) */ ++ (1 << 10); /* Cycle Counter Register overflow flag (clear or write, flag on read) */ + -+ err = 0; ++ __asm__ __volatile__("MCR p15, 0, %0, c15, c12, 0" : : "r"(mask)); + -+ dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx); -+ return err; -+ out_unrequest_cores: -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ out_err: -+ return err; ++ return _MALI_OSK_ERR_OK; +} + -+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) ++MALI_STATIC_INLINE u64 _mali_timestamp_get(void) +{ -+ unsigned long flags, pm_flags; -+ int err = -EINVAL; -+ u32 irq_mask; -+ struct kbase_device *kbdev = kctx->kbdev; -+ -+ while (1) { -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); -+ -+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) { -+ /* Instrumentation is not enabled */ -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ goto out; -+ } -+ -+ if (kbdev->hwcnt.kctx != kctx) { -+ /* Instrumentation has been setup for another context */ -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ goto out; -+ } -+ -+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) -+ break; -+ -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ -+ /* Ongoing dump/setup - wait for its completion */ -+ wait_event(kbdev->hwcnt.backend.wait, -+ kbdev->hwcnt.backend.triggered != 0); -+ } -+ -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; -+ kbdev->hwcnt.backend.triggered = 0; -+ -+ /* Disable interrupt */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); -+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), -+ irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL); -+ -+ /* Disable the counters */ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx); ++ u32 result; + -+ kbdev->hwcnt.kctx = NULL; -+ kbdev->hwcnt.addr = 0ULL; ++ /* this is for the clock cycles */ ++ __asm__ __volatile__("MRC p15, 0, %0, c15, c12, 1" : "=r"(result)); + -+ kbase_pm_ca_instr_disable(kbdev); ++ return (u64)result; ++} + -+ kbase_pm_unrequest_cores(kbdev, true, -+ kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER)); ++#endif /* __MALI_TIMESTAMP_H__ */ +diff --git a/drivers/gpu/arm/mali400/mali/timestamp-default/mali_timestamp.c b/drivers/gpu/arm/mali400/mali/timestamp-default/mali_timestamp.c +new file mode 100644 +index 000000000..7df934c12 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/timestamp-default/mali_timestamp.c +@@ -0,0 +1,13 @@ ++/* ++ * Copyright (C) 2010-2011, 2013, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ kbase_pm_release_l2_caches(kbdev); ++#include "mali_timestamp.h" + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++/* This file is intentionally left empty, as all functions are inlined in mali_profiling_sampler.h */ +diff --git a/drivers/gpu/arm/mali400/mali/timestamp-default/mali_timestamp.h b/drivers/gpu/arm/mali400/mali/timestamp-default/mali_timestamp.h +new file mode 100644 +index 000000000..709a16a82 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/mali/timestamp-default/mali_timestamp.h +@@ -0,0 +1,26 @@ ++/* ++ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", -+ kctx); ++#ifndef __MALI_TIMESTAMP_H__ ++#define __MALI_TIMESTAMP_H__ + -+ err = 0; ++#include "mali_osk.h" + -+ out: -+ return err; ++MALI_STATIC_INLINE _mali_osk_errcode_t _mali_timestamp_reset(void) ++{ ++ return _MALI_OSK_ERR_OK; +} + -+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) ++MALI_STATIC_INLINE u64 _mali_timestamp_get(void) +{ -+ unsigned long flags; -+ int err = -EINVAL; -+ struct kbase_device *kbdev = kctx->kbdev; -+ -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); -+ -+ if (kbdev->hwcnt.kctx != kctx) { -+ /* The instrumentation has been setup for another context */ -+ goto unlock; -+ } -+ -+ if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) { -+ /* HW counters are disabled or another dump is ongoing, or we're -+ * resetting */ -+ goto unlock; -+ } -+ -+ kbdev->hwcnt.backend.triggered = 0; -+ -+ /* Mark that we're dumping - the PF handler can signal that we faulted -+ */ -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING; -+ -+ /* Reconfigure the dump address */ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), -+ kbdev->hwcnt.addr & 0xFFFFFFFF, NULL); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), -+ kbdev->hwcnt.addr >> 32, NULL); -+ -+ /* Start dumping */ -+ KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL, -+ kbdev->hwcnt.addr, 0); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_PRFCNT_SAMPLE, kctx); -+ -+ dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx); -+ -+ err = 0; -+ -+ unlock: -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ return err; ++ return _mali_osk_boot_time_get_ns(); +} -+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump); + -+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, -+ bool * const success) -+{ -+ unsigned long flags; -+ bool complete = false; -+ struct kbase_device *kbdev = kctx->kbdev; ++#endif /* __MALI_TIMESTAMP_H__ */ +diff --git a/drivers/gpu/arm/mali400/rk_ver_info.txt b/drivers/gpu/arm/mali400/rk_ver_info.txt +new file mode 100755 +index 000000000..2a6cbbbb5 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/rk_ver_info.txt +@@ -0,0 +1,11 @@ + -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++r5p0-01rel0-1-x@0 ++ 对 arm_release_ver r5p0-01rel0 的定制集æˆ. ++ r5p0-01rel0 对 gpu çš„ dts 有大修改, 但这里出于兼容考虑, 仿—§ä½¿ç”¨ dts_for_mali_ko_befor_r5p0-01rel0. + -+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) { -+ *success = true; -+ complete = true; -+ } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { -+ *success = false; -+ complete = true; -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; -+ } ++r5p0-01rel0-2-x@0 ++ æ”¯æŒ mali_so æ¥èŽ·å– rk_ko_ver. ++ ++r5p0-01rel0-3-x@0 ++ 在 mali_control_timer_callback_chain 中使用 mod_timer, 而ä¸å†æ˜¯ add_timer. + -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); +diff --git a/drivers/gpu/arm/mali400/ump/Kbuild b/drivers/gpu/arm/mali400/ump/Kbuild +new file mode 100755 +index 000000000..a3067ba72 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/Kbuild +@@ -0,0 +1,92 @@ ++# ++# Copyright (C) 2010-2012 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained from Free Software ++# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++# + -+ return complete; -+} -+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete); ++# Set default configuration to use, if Makefile didn't provide one. ++# Change this to use a different config.h ++CONFIG ?= default + -+void kbasep_cache_clean_worker(struct work_struct *data) -+{ -+ struct kbase_device *kbdev; -+ unsigned long flags; ++# Link arch to the selected arch-config directory ++$(shell [ -L $(src)/arch ] && rm $(src)/arch) ++$(shell ln -sf arch-$(CONFIG) $(src)/arch) ++$(shell touch $(src)/arch/config.h) + -+ kbdev = container_of(data, struct kbase_device, -+ hwcnt.backend.cache_clean_work); ++UDD_FILE_PREFIX = ../mali/ + -+ mutex_lock(&kbdev->cacheclean_lock); -+ kbasep_instr_hwcnt_cacheclean(kbdev); ++# Get subversion revision number, fall back to 0000 if no svn info is available ++SVN_INFO = (cd $(src); svn info 2>/dev/null) + -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); -+ /* Wait for our condition, and any reset to complete */ -+ while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) { -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ wait_event(kbdev->hwcnt.backend.cache_clean_wait, -+ kbdev->hwcnt.backend.state != -+ KBASE_INSTR_STATE_CLEANING); -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); -+ } -+ KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == -+ KBASE_INSTR_STATE_CLEANED); ++ifneq ($(shell $(SVN_INFO) 2>/dev/null),) ++# SVN detected ++SVN_REV := $(shell $(SVN_INFO) | grep '^Revision: '| sed -e 's/^Revision: //' 2>/dev/null) ++DRIVER_REV := $(MALI_RELEASE_NAME)-r$(SVN_REV) ++CHANGE_DATE := $(shell $(SVN_INFO) | grep '^Last Changed Date: ' | cut -d: -f2- | cut -b2-) ++CHANGED_REVISION := $(shell $(SVN_INFO) | grep '^Last Changed Rev: ' | cut -d: -f2- | cut -b2-) ++REPO_URL := $(shell $(SVN_INFO) | grep '^URL: ' | cut -d: -f2- | cut -b2-) + -+ /* All finished and idle */ -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; -+ kbdev->hwcnt.backend.triggered = 1; -+ wake_up(&kbdev->hwcnt.backend.wait); ++else # SVN ++GIT_REV := $(shell cd $(src); git describe --always 2>/dev/null) ++ifneq ($(GIT_REV),) ++# Git detected ++DRIVER_REV := $(MALI_RELEASE_NAME)-$(GIT_REV) ++CHANGE_DATE := $(shell cd $(src); git log -1 --format="%ci") ++CHANGED_REVISION := $(GIT_REV) ++REPO_URL := $(shell cd $(src); git describe --all --always 2>/dev/null) + -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ mutex_unlock(&kbdev->cacheclean_lock); -+} ++else # Git ++# No Git or SVN detected ++DRIVER_REV := $(MALI_RELEASE_NAME) ++CHANGE_DATE := $(MALI_RELEASE_NAME) ++CHANGED_REVISION := $(MALI_RELEASE_NAME) ++endif ++endif + -+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) -+{ -+ unsigned long flags; ++ccflags-y += -DSVN_REV=$(SVN_REV) ++ccflags-y += -DSVN_REV_STRING=\"$(DRIVER_REV)\" + -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ccflags-y += -I$(src) -I$(src)/common -I$(src)/linux -I$(src)/../mali/common -I$(src)/../mali/linux -I$(src)/include -I$(src)/../../ump/include/ump ++ccflags-y += -DMALI_STATE_TRACKING=0 ++ccflags-y += -DMALI_ENABLE_CPU_CYCLES=0 ++ccflags-$(CONFIG_UMP_DEBUG) += -DDEBUG + -+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { -+ kbdev->hwcnt.backend.triggered = 1; -+ wake_up(&kbdev->hwcnt.backend.wait); -+ } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) { -+ int ret; -+ /* Always clean and invalidate the cache after a successful dump -+ */ -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; -+ ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq, -+ &kbdev->hwcnt.backend.cache_clean_work); -+ KBASE_DEBUG_ASSERT(ret); -+ } ++# For customer releases the Linux Device Drivers will be provided as ARM proprietary and GPL releases: ++# The ARM proprietary product will only include the license/proprietary directory ++# The GPL product will only include the license/gpl directory + -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+} ++ifeq ($(wildcard $(src)/linux/license/gpl/*),) ++ccflags-y += -I$(src)/linux/license/proprietary -I$(src)/../mali/linux/license/proprietary ++else ++ccflags-y += -I$(src)/linux/license/gpl -I$(src)/../mali/linux/license/gpl ++endif + -+void kbase_clean_caches_done(struct kbase_device *kbdev) -+{ -+ u32 irq_mask; ++ump-y = common/ump_kernel_common.o \ ++ common/ump_kernel_descriptor_mapping.o \ ++ common/ump_kernel_api.o \ ++ common/ump_kernel_ref_drv.o \ ++ linux/ump_kernel_linux.o \ ++ linux/ump_kernel_memory_backend_os.o \ ++ linux/ump_kernel_memory_backend_dedicated.o \ ++ linux/ump_memory_backend.o \ ++ linux/ump_ukk_wrappers.o \ ++ linux/ump_ukk_ref_wrappers.o \ ++ linux/ump_osk_atomics.o \ ++ linux/ump_osk_low_level_mem.o \ ++ linux/ump_osk_misc.o \ ++ linux/ump_kernel_random_mapping.o + -+ if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { -+ unsigned long flags; -+ unsigned long pm_flags; ++ifneq ($(CONFIG_MALI400),y) ++ump-y += $(UDD_FILE_PREFIX)linux/mali_osk_atomics.o \ ++ $(UDD_FILE_PREFIX)linux/mali_osk_locks.o \ ++ $(UDD_FILE_PREFIX)linux/mali_osk_memory.o \ ++ $(UDD_FILE_PREFIX)linux/mali_osk_math.o \ ++ $(UDD_FILE_PREFIX)linux/mali_osk_misc.o ++endif + -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); -+ /* Disable interrupt */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); -+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), -+ NULL); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), -+ irq_mask & ~CLEAN_CACHES_COMPLETED, NULL); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); ++obj-$(CONFIG_UMP) := ump.o + -+ /* Wakeup... */ -+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) { -+ /* Only wake if we weren't resetting */ -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED; -+ wake_up(&kbdev->hwcnt.backend.cache_clean_wait); -+ } +diff --git a/drivers/gpu/arm/mali400/ump/Kconfig b/drivers/gpu/arm/mali400/ump/Kconfig +new file mode 100644 +index 000000000..ec3509057 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/Kconfig +@@ -0,0 +1,17 @@ ++# SPDX-License-Identifier: GPL-2.0 ++config UMP ++ tristate "UMP support" ++ depends on ARM ++ help ++ This enables support for the UMP memory allocation and sharing API. + -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ } -+} ++ To compile this driver as a module, choose M here: the module will be ++ called ump. + -+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) -+{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ unsigned long flags; -+ int err; ++config UMP_DEBUG ++ bool "Enable extra debug in UMP" ++ depends on UMP ++ default y ++ help ++ This enabled extra debug checks and messages in UMP. + -+ /* Wait for dump & cacheclean to complete */ -+ wait_event(kbdev->hwcnt.backend.wait, -+ kbdev->hwcnt.backend.triggered != 0); +diff --git a/drivers/gpu/arm/mali400/ump/Makefile b/drivers/gpu/arm/mali400/ump/Makefile +new file mode 100644 +index 000000000..88b02a22f +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/Makefile +@@ -0,0 +1,67 @@ ++# ++# Copyright (C) 2010-2012, 2014, 2016-2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained from Free Software ++# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++# + -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++# For each arch check: CROSS_COMPILE , KDIR , CFLAGS += -DARCH + -+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { -+ err = -EINVAL; -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; -+ } else { -+ /* Dump done */ -+ KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == -+ KBASE_INSTR_STATE_IDLE); -+ err = 0; -+ } ++export ARCH ?= arm ++BUILD ?= debug + -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++check_cc2 = \ ++ $(shell if $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; \ ++ then \ ++ echo "$(2)"; \ ++ else \ ++ echo "$(3)"; \ ++ fi ;) + -+ return err; -+} ++# Check that required parameters are supplied. ++ifeq ($(CONFIG),) ++CONFIG := default ++endif ++ifeq ($(CPU)$(KDIR),) ++$(error "KDIR or CPU must be specified.") ++endif + -+int kbase_instr_hwcnt_clear(struct kbase_context *kctx) -+{ -+ unsigned long flags; -+ int err = -EINVAL; -+ struct kbase_device *kbdev = kctx->kbdev; ++# Get any user defined KDIR- or maybe even a hardcoded KDIR ++-include KDIR_CONFIGURATION + -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++# Define host system directory ++KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build + -+ /* Check it's the context previously set up and we're not already -+ * dumping */ -+ if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != -+ KBASE_INSTR_STATE_IDLE) -+ goto out; ++ifeq ($(ARCH), arm) ++# when compiling for ARM we're cross compiling ++export CROSS_COMPILE ?= $(call check_cc2, arm-linux-gnueabi-gcc, arm-linux-gnueabi-, arm-none-linux-gnueabi-) ++endif + -+ /* Clear the counters */ -+ KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_PRFCNT_CLEAR, kctx); ++# look up KDIR based om CPU selection ++KDIR ?= $(KDIR-$(CPU)) + -+ err = 0; ++export CONFIG + -+out: -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -+ return err; -+} -+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear); ++export CONFIG_UMP := m ++ifeq ($(BUILD),debug) ++export CONFIG_UMP_DEBUG := y ++else ++export CONFIG_UMP_DEBUG := n ++endif + -+int kbase_instr_backend_init(struct kbase_device *kbdev) -+{ -+ int ret = 0; ++ifeq ($(KDIR),) ++$(error No KDIR found for platform $(CPU)) ++endif + -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; ++all: ++ $(MAKE) -C $(KDIR) M=$(CURDIR) modules + -+ init_waitqueue_head(&kbdev->hwcnt.backend.wait); -+ init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait); -+ INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work, -+ kbasep_cache_clean_worker); -+ kbdev->hwcnt.backend.triggered = 0; ++kernelrelease: ++ $(MAKE) -C $(KDIR) kernelrelease + -+ kbdev->hwcnt.backend.cache_clean_wq = -+ alloc_workqueue("Mali cache cleaning workqueue", 0, 1); -+ if (NULL == kbdev->hwcnt.backend.cache_clean_wq) -+ ret = -EINVAL; ++clean: ++ $(MAKE) -C $(KDIR) M=$(CURDIR) clean ++ $(MAKE) -C $(KDIR) M=$(CURDIR)/../mali clean +diff --git a/drivers/gpu/arm/mali400/ump/Makefile.common b/drivers/gpu/arm/mali400/ump/Makefile.common +new file mode 100755 +index 000000000..ad2c18da9 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/Makefile.common +@@ -0,0 +1,20 @@ ++# ++# Copyright (C) 2010-2011, 2013, 2016-2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained from Free Software ++# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++# + -+ return ret; -+} ++SRC = $(UMP_FILE_PREFIX)common/ump_kernel_common.c \ ++ $(UMP_FILE_PREFIX)common/ump_kernel_descriptor_mapping.c \ ++ $(UMP_FILE_PREFIX)common/ump_kernel_api.c \ ++ $(UMP_FILE_PREFIX)common/ump_kernel_ref_drv.c + -+void kbase_instr_backend_term(struct kbase_device *kbdev) -+{ -+ destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq); -+} ++# Get subversion revision number, fall back to 0000 if no svn info is available ++SVN_REV:=$(shell ((svnversion | grep -qv exported && echo -n 'Revision: ' && svnversion) || git svn info | sed -e 's/$$$$/M/' | grep '^Revision: ' || echo ${MALI_RELEASE_NAME}) 2>/dev/null | sed -e 's/^Revision: //') + -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h ++EXTRA_CFLAGS += -DSVN_REV=$(SVN_REV) ++EXTRA_CFLAGS += -DSVN_REV_STRING=\"$(SVN_REV)\" +diff --git a/drivers/gpu/arm/mali400/ump/arch-default/config.h b/drivers/gpu/arm/mali400/ump/arch-default/config.h new file mode 100644 -index 000000000..4794672da +index 000000000..d4aef9dd0 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h -@@ -0,0 +1,58 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ -+ -+ -+ ++++ b/drivers/gpu/arm/mali400/ump/arch-default/config.h +@@ -0,0 +1,24 @@ +/* -+ * Backend-specific instrumentation definitions ++ * Copyright (C) 2010, 2012, 2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef _KBASE_INSTR_DEFS_H_ -+#define _KBASE_INSTR_DEFS_H_ ++#ifndef __ARCH_CONFIG_H__ ++#define __ARCH_CONFIG_H__ + -+/* -+ * Instrumentation State Machine States -+ */ -+enum kbase_instr_state { -+ /* State where instrumentation is not active */ -+ KBASE_INSTR_STATE_DISABLED = 0, -+ /* State machine is active and ready for a command. */ -+ KBASE_INSTR_STATE_IDLE, -+ /* Hardware is currently dumping a frame. */ -+ KBASE_INSTR_STATE_DUMPING, -+ /* We've requested a clean to occur on a workqueue */ -+ KBASE_INSTR_STATE_REQUEST_CLEAN, -+ /* Hardware is currently cleaning and invalidating caches. */ -+ KBASE_INSTR_STATE_CLEANING, -+ /* Cache clean completed, and either a) a dump is complete, or -+ * b) instrumentation can now be setup. */ -+ KBASE_INSTR_STATE_CLEANED, -+ /* An error has occured during DUMPING (page fault). */ -+ KBASE_INSTR_STATE_FAULT -+}; ++/* Use OS memory. */ ++#define ARCH_UMP_BACKEND_DEFAULT 1 + -+/* Structure used for instrumentation and HW counters dumping */ -+struct kbase_instr_backend { -+ wait_queue_head_t wait; -+ int triggered; ++/* OS memory won't need a base address. */ ++#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT 0x00000000 + -+ enum kbase_instr_state state; -+ wait_queue_head_t cache_clean_wait; -+ struct workqueue_struct *cache_clean_wq; -+ struct work_struct cache_clean_work; -+}; ++/* 512 MB maximum limit for UMP allocations. */ ++#define ARCH_UMP_MEMORY_SIZE_DEFAULT 512UL * 1024UL * 1024UL + -+#endif /* _KBASE_INSTR_DEFS_H_ */ + -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h ++#endif /* __ARCH_CONFIG_H__ */ +diff --git a/drivers/gpu/arm/mali400/ump/arch-pb-virtex5/config.h b/drivers/gpu/arm/mali400/ump/arch-pb-virtex5/config.h new file mode 100644 -index 000000000..e96aeae78 +index 000000000..182e90c1d --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h -@@ -0,0 +1,45 @@ ++++ b/drivers/gpu/arm/mali400/ump/arch-pb-virtex5/config.h +@@ -0,0 +1,18 @@ +/* -+ * -+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ * Copyright (C) 2010-2013, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + ++#ifndef __ARCH_CONFIG_H__ ++#define __ARCH_CONFIG_H__ + ++#define ARCH_UMP_BACKEND_DEFAULT 0 ++#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT 0xE1000000 ++#define ARCH_UMP_MEMORY_SIZE_DEFAULT 16UL * 1024UL * 1024UL + -+ -+ ++#endif /* __ARCH_CONFIG_H__ */ +diff --git a/drivers/gpu/arm/mali400/ump/arch/config.h b/drivers/gpu/arm/mali400/ump/arch/config.h +new file mode 100644 +index 000000000..d4aef9dd0 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/arch/config.h +@@ -0,0 +1,24 @@ +/* -+ * Backend-specific HW access instrumentation APIs ++ * Copyright (C) 2010, 2012, 2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#ifndef _KBASE_INSTR_INTERNAL_H_ -+#define _KBASE_INSTR_INTERNAL_H_ ++#ifndef __ARCH_CONFIG_H__ ++#define __ARCH_CONFIG_H__ + -+/** -+ * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning -+ * @data: a &struct work_struct -+ */ -+void kbasep_cache_clean_worker(struct work_struct *data); ++/* Use OS memory. */ ++#define ARCH_UMP_BACKEND_DEFAULT 1 + -+/** -+ * kbase_clean_caches_done() - Cache clean interrupt received -+ * @kbdev: Kbase device -+ */ -+void kbase_clean_caches_done(struct kbase_device *kbdev); ++/* OS memory won't need a base address. */ ++#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT 0x00000000 + -+/** -+ * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received -+ * @kbdev: Kbase device -+ */ -+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev); ++/* 512 MB maximum limit for UMP allocations. */ ++#define ARCH_UMP_MEMORY_SIZE_DEFAULT 512UL * 1024UL * 1024UL + -+#endif /* _KBASE_INSTR_INTERNAL_H_ */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h ++ ++#endif /* __ARCH_CONFIG_H__ */ +diff --git a/drivers/gpu/arm/mali400/ump/common/ump_kernel_api.c b/drivers/gpu/arm/mali400/ump/common/ump_kernel_api.c new file mode 100644 -index 000000000..8781561e7 +index 000000000..36adb2f53 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h -@@ -0,0 +1,39 @@ ++++ b/drivers/gpu/arm/mali400/ump/common/ump_kernel_api.c +@@ -0,0 +1,455 @@ +/* -+ * -+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + ++#include "mali_osk.h" ++#include "mali_osk_list.h" ++#include "ump_osk.h" ++#include "ump_uk_types.h" ++#include "ump_kernel_interface.h" ++#include "ump_kernel_common.h" ++#include "ump_kernel_random_mapping.h" + + -+/* -+ * Backend specific IRQ APIs -+ */ + -+#ifndef _KBASE_IRQ_INTERNAL_H_ -+#define _KBASE_IRQ_INTERNAL_H_ ++/* ---------------- UMP kernel space API functions follows ---------------- */ + -+int kbase_install_interrupts(struct kbase_device *kbdev); + -+void kbase_release_interrupts(struct kbase_device *kbdev); + -+/** -+ * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed -+ * execution -+ * @kbdev: The kbase device -+ */ -+void kbase_synchronize_irqs(struct kbase_device *kbdev); ++UMP_KERNEL_API_EXPORT ump_secure_id ump_dd_secure_id_get(ump_dd_handle memh) ++{ ++ ump_dd_mem *mem = (ump_dd_mem *)memh; + -+int kbasep_common_test_interrupt_handlers( -+ struct kbase_device * const kbdev); ++ DEBUG_ASSERT_POINTER(mem); + -+#endif /* _KBASE_IRQ_INTERNAL_H_ */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c -new file mode 100644 -index 000000000..8416b80e8 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c -@@ -0,0 +1,469 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ DBG_MSG(5, ("Returning secure ID. ID: %u\n", mem->secure_id)); + ++ return mem->secure_id; ++} + + -+#include -+#include -+#include + -+#include ++UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id) ++{ ++ ump_dd_mem *mem; + -+#if !defined(CONFIG_MALI_NO_MALI) ++ DBG_MSG(5, ("Getting handle from secure ID. ID: %u\n", secure_id)); ++ mem = ump_random_mapping_get(device.secure_id_map, (int)secure_id); ++ if (NULL == mem) { ++ DBG_MSG(1, ("Secure ID not found. ID: %u\n", secure_id)); ++ return UMP_DD_HANDLE_INVALID; ++ } + -+/* GPU IRQ Tags */ -+#define JOB_IRQ_TAG 0 -+#define MMU_IRQ_TAG 1 -+#define GPU_IRQ_TAG 2 ++ /* Keep the reference taken in ump_random_mapping_get() */ + -+static void *kbase_tag(void *ptr, u32 tag) -+{ -+ return (void *)(((uintptr_t) ptr) | tag); ++ return (ump_dd_handle)mem; +} + -+static void *kbase_untag(void *ptr) -+{ -+ return (void *)(((uintptr_t) ptr) & ~3); -+} + -+static irqreturn_t kbase_job_irq_handler(int irq, void *data) ++ ++UMP_KERNEL_API_EXPORT unsigned long ump_dd_phys_block_count_get(ump_dd_handle memh) +{ -+ unsigned long flags; -+ struct kbase_device *kbdev = kbase_untag(data); -+ u32 val; ++ ump_dd_mem *mem = (ump_dd_mem *) memh; + -+ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); ++ DEBUG_ASSERT_POINTER(mem); + -+ if (!kbdev->pm.backend.gpu_powered) { -+ /* GPU is turned off - IRQ is not for us */ -+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, -+ flags); -+ return IRQ_NONE; -+ } ++ return mem->nr_blocks; ++} + -+ val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL); + -+#ifdef CONFIG_MALI_DEBUG -+ if (!kbdev->pm.backend.driver_ready_for_irqs) -+ dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", -+ __func__, irq, val); -+#endif /* CONFIG_MALI_DEBUG */ -+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + -+ if (!val) -+ return IRQ_NONE; ++UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle memh, ump_dd_physical_block *blocks, unsigned long num_blocks) ++{ ++ ump_dd_mem *mem = (ump_dd_mem *)memh; + -+ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); ++ DEBUG_ASSERT_POINTER(mem); + -+ kbase_job_done(kbdev, val); ++ if (blocks == NULL) { ++ DBG_MSG(1, ("NULL parameter in ump_dd_phys_blocks_get()\n")); ++ return UMP_DD_INVALID; ++ } + -+ return IRQ_HANDLED; -+} ++ if (mem->nr_blocks != num_blocks) { ++ DBG_MSG(1, ("Specified number of blocks do not match actual number of blocks\n")); ++ return UMP_DD_INVALID; ++ } + -+KBASE_EXPORT_TEST_API(kbase_job_irq_handler); ++ DBG_MSG(5, ("Returning physical block information. ID: %u\n", mem->secure_id)); + -+static irqreturn_t kbase_mmu_irq_handler(int irq, void *data) -+{ -+ unsigned long flags; -+ struct kbase_device *kbdev = kbase_untag(data); -+ u32 val; ++ _mali_osk_memcpy(blocks, mem->block_array, sizeof(ump_dd_physical_block) * mem->nr_blocks); + -+ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); ++ return UMP_DD_SUCCESS; ++} + -+ if (!kbdev->pm.backend.gpu_powered) { -+ /* GPU is turned off - IRQ is not for us */ -+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, -+ flags); -+ return IRQ_NONE; -+ } + -+ atomic_inc(&kbdev->faults_pending); + -+ val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL); ++UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle memh, unsigned long index, ump_dd_physical_block *block) ++{ ++ ump_dd_mem *mem = (ump_dd_mem *)memh; + -+#ifdef CONFIG_MALI_DEBUG -+ if (!kbdev->pm.backend.driver_ready_for_irqs) -+ dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", -+ __func__, irq, val); -+#endif /* CONFIG_MALI_DEBUG */ -+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); ++ DEBUG_ASSERT_POINTER(mem); + -+ if (!val) { -+ atomic_dec(&kbdev->faults_pending); -+ return IRQ_NONE; ++ if (block == NULL) { ++ DBG_MSG(1, ("NULL parameter in ump_dd_phys_block_get()\n")); ++ return UMP_DD_INVALID; + } + -+ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); ++ if (index >= mem->nr_blocks) { ++ DBG_MSG(5, ("Invalid index specified in ump_dd_phys_block_get()\n")); ++ return UMP_DD_INVALID; ++ } + -+ kbase_mmu_interrupt(kbdev, val); ++ DBG_MSG(5, ("Returning physical block information. ID: %u, index: %lu\n", mem->secure_id, index)); + -+ atomic_dec(&kbdev->faults_pending); ++ *block = mem->block_array[index]; + -+ return IRQ_HANDLED; ++ return UMP_DD_SUCCESS; +} + -+static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) -+{ -+ unsigned long flags; -+ struct kbase_device *kbdev = kbase_untag(data); -+ u32 val; -+ -+ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + -+ if (!kbdev->pm.backend.gpu_powered) { -+ /* GPU is turned off - IRQ is not for us */ -+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, -+ flags); -+ return IRQ_NONE; -+ } + -+ val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL); ++UMP_KERNEL_API_EXPORT unsigned long ump_dd_size_get(ump_dd_handle memh) ++{ ++ ump_dd_mem *mem = (ump_dd_mem *)memh; + -+#ifdef CONFIG_MALI_DEBUG -+ if (!kbdev->pm.backend.driver_ready_for_irqs) -+ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", -+ __func__, irq, val); -+#endif /* CONFIG_MALI_DEBUG */ -+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); ++ DEBUG_ASSERT_POINTER(mem); + -+ if (!val) -+ return IRQ_NONE; ++ DBG_MSG(5, ("Returning size. ID: %u, size: %lu\n", mem->secure_id, mem->size_bytes)); + -+ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); ++ return mem->size_bytes; ++} + -+ kbase_gpu_interrupt(kbdev, val); + -+ return IRQ_HANDLED; -+} + -+KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler); ++UMP_KERNEL_API_EXPORT void ump_dd_reference_add(ump_dd_handle memh) ++{ ++ ump_dd_mem *mem = (ump_dd_mem *)memh; ++ int new_ref; + -+static irq_handler_t kbase_handler_table[] = { -+ [JOB_IRQ_TAG] = kbase_job_irq_handler, -+ [MMU_IRQ_TAG] = kbase_mmu_irq_handler, -+ [GPU_IRQ_TAG] = kbase_gpu_irq_handler, -+}; ++ DEBUG_ASSERT_POINTER(mem); + -+#ifdef CONFIG_MALI_DEBUG -+#define JOB_IRQ_HANDLER JOB_IRQ_TAG -+#define MMU_IRQ_HANDLER MMU_IRQ_TAG -+#define GPU_IRQ_HANDLER GPU_IRQ_TAG ++ new_ref = _ump_osk_atomic_inc_and_read(&mem->ref_count); + -+/** -+ * kbase_set_custom_irq_handler - Set a custom IRQ handler -+ * @kbdev: Device for which the handler is to be registered -+ * @custom_handler: Handler to be registered -+ * @irq_type: Interrupt type -+ * -+ * Registers given interrupt handler for requested interrupt type -+ * In the case where irq handler is not specified, the default handler shall be -+ * registered -+ * -+ * Return: 0 case success, error code otherwise -+ */ -+int kbase_set_custom_irq_handler(struct kbase_device *kbdev, -+ irq_handler_t custom_handler, -+ int irq_type) -+{ -+ int result = 0; -+ irq_handler_t requested_irq_handler = NULL; ++ DBG_MSG(5, ("Memory reference incremented. ID: %u, new value: %d\n", mem->secure_id, new_ref)); ++} + -+ KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) && -+ (GPU_IRQ_HANDLER >= irq_type)); + -+ /* Release previous handler */ -+ if (kbdev->irqs[irq_type].irq) -+ free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type)); + -+ requested_irq_handler = (NULL != custom_handler) ? custom_handler : -+ kbase_handler_table[irq_type]; ++UMP_KERNEL_API_EXPORT void ump_dd_reference_release(ump_dd_handle memh) ++{ ++ ump_dd_mem *mem = (ump_dd_mem *)memh; + -+ if (0 != request_irq(kbdev->irqs[irq_type].irq, -+ requested_irq_handler, -+ kbdev->irqs[irq_type].flags | IRQF_SHARED, -+ dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) { -+ result = -EINVAL; -+ dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", -+ kbdev->irqs[irq_type].irq, irq_type); -+#ifdef CONFIG_SPARSE_IRQ -+ dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); -+#endif /* CONFIG_SPARSE_IRQ */ -+ } ++ DEBUG_ASSERT_POINTER(mem); + -+ return result; ++ ump_random_mapping_put(mem); +} + -+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler); + -+/* test correct interrupt assigment and reception by cpu */ -+struct kbasep_irq_test { -+ struct hrtimer timer; -+ wait_queue_head_t wait; -+ int triggered; -+ u32 timeout; -+}; + -+static struct kbasep_irq_test kbasep_irq_test_data; ++/* --------------- Handling of user space requests follows --------------- */ + -+#define IRQ_TEST_TIMEOUT 500 + -+static irqreturn_t kbase_job_irq_test_handler(int irq, void *data) ++_mali_osk_errcode_t _ump_uku_get_api_version(_ump_uk_api_version_s *args) +{ -+ unsigned long flags; -+ struct kbase_device *kbdev = kbase_untag(data); -+ u32 val; ++ ump_session_data *session_data; + -+ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); ++ DEBUG_ASSERT_POINTER(args); ++ DEBUG_ASSERT_POINTER(args->ctx); + -+ if (!kbdev->pm.backend.gpu_powered) { -+ /* GPU is turned off - IRQ is not for us */ -+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, -+ flags); -+ return IRQ_NONE; ++ session_data = (ump_session_data *)args->ctx; ++ ++ /* check compatability */ ++ if (args->version == UMP_IOCTL_API_VERSION) { ++ DBG_MSG(3, ("API version set to newest %d (compatible)\n", ++ GET_VERSION(args->version))); ++ args->compatible = 1; ++ session_data->api_version = args->version; ++ } else { ++ DBG_MSG(2, ("API version set to %d (incompatible with client version %d)\n", ++ GET_VERSION(UMP_IOCTL_API_VERSION), GET_VERSION(args->version))); ++ args->compatible = 0; ++ args->version = UMP_IOCTL_API_VERSION; /* report our version */ + } + -+ val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL); ++ return _MALI_OSK_ERR_OK; ++} + -+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + -+ if (!val) -+ return IRQ_NONE; ++_mali_osk_errcode_t _ump_ukk_release(_ump_uk_release_s *release_info) ++{ ++ ump_session_memory_list_element *session_memory_element; ++ ump_session_memory_list_element *tmp; ++ ump_session_data *session_data; ++ _mali_osk_errcode_t ret = _MALI_OSK_ERR_INVALID_FUNC; ++ int secure_id; + -+ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); ++ DEBUG_ASSERT_POINTER(release_info); ++ DEBUG_ASSERT_POINTER(release_info->ctx); + -+ kbasep_irq_test_data.triggered = 1; -+ wake_up(&kbasep_irq_test_data.wait); ++ /* Retreive the session data */ ++ session_data = (ump_session_data *)release_info->ctx; + -+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL); ++ /* If there are many items in the memory session list we ++ * could be de-referencing this pointer a lot so keep a local copy ++ */ ++ secure_id = release_info->secure_id; + -+ return IRQ_HANDLED; -+} ++ DBG_MSG(4, ("Releasing memory with IOCTL, ID: %u\n", secure_id)); + -+static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) -+{ -+ unsigned long flags; -+ struct kbase_device *kbdev = kbase_untag(data); -+ u32 val; ++ /* Iterate through the memory list looking for the requested secure ID */ ++ _mali_osk_mutex_wait(session_data->lock); ++ _MALI_OSK_LIST_FOREACHENTRY(session_memory_element, tmp, &session_data->list_head_session_memory_list, ump_session_memory_list_element, list) { ++ if (session_memory_element->mem->secure_id == secure_id) { ++ ump_dd_mem *release_mem; + -+ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); ++ release_mem = session_memory_element->mem; ++ _mali_osk_list_del(&session_memory_element->list); ++ ump_dd_reference_release(release_mem); ++ _mali_osk_free(session_memory_element); + -+ if (!kbdev->pm.backend.gpu_powered) { -+ /* GPU is turned off - IRQ is not for us */ -+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, -+ flags); -+ return IRQ_NONE; ++ ret = _MALI_OSK_ERR_OK; ++ break; ++ } + } + -+ val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL); -+ -+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); ++ _mali_osk_mutex_signal(session_data->lock); ++ DBG_MSG_IF(1, _MALI_OSK_ERR_OK != ret, ("UMP memory with ID %u does not belong to this session.\n", secure_id)); + -+ if (!val) -+ return IRQ_NONE; ++ DBG_MSG(4, ("_ump_ukk_release() returning 0x%x\n", ret)); ++ return ret; ++} + -+ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); ++_mali_osk_errcode_t _ump_ukk_size_get(_ump_uk_size_get_s *user_interaction) ++{ ++ ump_dd_mem *mem; ++ _mali_osk_errcode_t ret = _MALI_OSK_ERR_FAULT; + -+ kbasep_irq_test_data.triggered = 1; -+ wake_up(&kbasep_irq_test_data.wait); ++ DEBUG_ASSERT_POINTER(user_interaction); + -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL); ++ /* We lock the mappings so things don't get removed while we are looking for the memory */ ++ mem = ump_random_mapping_get(device.secure_id_map, user_interaction->secure_id); ++ if (NULL != mem) { ++ user_interaction->size = mem->size_bytes; ++ DBG_MSG(4, ("Returning size. ID: %u, size: %lu ", ++ (ump_secure_id)user_interaction->secure_id, ++ (unsigned long)user_interaction->size)); ++ ump_random_mapping_put(mem); ++ ret = _MALI_OSK_ERR_OK; ++ } else { ++ user_interaction->size = 0; ++ DBG_MSG(1, ("Failed to look up mapping in ump_ioctl_size_get(). ID: %u\n", ++ (ump_secure_id)user_interaction->secure_id)); ++ } + -+ return IRQ_HANDLED; ++ return ret; +} + -+static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer) -+{ -+ struct kbasep_irq_test *test_data = container_of(timer, -+ struct kbasep_irq_test, timer); + -+ test_data->timeout = 1; -+ test_data->triggered = 1; -+ wake_up(&test_data->wait); -+ return HRTIMER_NORESTART; -+} + -+static int kbasep_common_test_interrupt( -+ struct kbase_device * const kbdev, u32 tag) ++void _ump_ukk_msync(_ump_uk_msync_s *args) +{ -+ int err = 0; -+ irq_handler_t test_handler; ++ ump_dd_mem *mem = NULL; ++ void *virtual = NULL; ++ u32 size = 0; ++ u32 offset = 0; + -+ u32 old_mask_val; -+ u16 mask_offset; -+ u16 rawstat_offset; ++ mem = ump_random_mapping_get(device.secure_id_map, (int)args->secure_id); ++ if (NULL == mem) { ++ DBG_MSG(1, ("Failed to look up mapping in _ump_ukk_msync(). ID: %u\n", ++ (ump_secure_id)args->secure_id)); ++ return; ++ } + -+ switch (tag) { -+ case JOB_IRQ_TAG: -+ test_handler = kbase_job_irq_test_handler; -+ rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT); -+ mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK); -+ break; -+ case MMU_IRQ_TAG: -+ test_handler = kbase_mmu_irq_test_handler; -+ rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT); -+ mask_offset = MMU_REG(MMU_IRQ_MASK); -+ break; -+ case GPU_IRQ_TAG: -+ /* already tested by pm_driver - bail out */ -+ default: -+ return 0; ++ /* Returns the cache settings back to Userspace */ ++ args->is_cached = mem->is_cached; ++ ++ /* If this flag is the only one set, we should not do the actual flush, only the readout */ ++ if (_UMP_UK_MSYNC_READOUT_CACHE_ENABLED == args->op) { ++ DBG_MSG(3, ("_ump_ukk_msync READOUT ID: %u Enabled: %d\n", (ump_secure_id)args->secure_id, mem->is_cached)); ++ goto msync_release_and_return; + } + -+ /* store old mask */ -+ old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL); -+ /* mask interrupts */ -+ kbase_reg_write(kbdev, mask_offset, 0x0, NULL); ++ /* Nothing to do if the memory is not caches */ ++ if (0 == mem->is_cached) { ++ DBG_MSG(3, ("_ump_ukk_msync IGNORING ID: %u Enabled: %d OP: %d\n", (ump_secure_id)args->secure_id, mem->is_cached, args->op)); ++ goto msync_release_and_return; ++ } ++ DBG_MSG(3, ("UMP[%02u] _ump_ukk_msync Flush OP: %d Address: 0x%08x Mapping: 0x%08x\n", ++ (ump_secure_id)args->secure_id, args->op, args->address, args->mapping)); + -+ if (kbdev->irqs[tag].irq) { -+ /* release original handler and install test handler */ -+ if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) { -+ err = -EINVAL; -+ } else { -+ kbasep_irq_test_data.timeout = 0; -+ hrtimer_init(&kbasep_irq_test_data.timer, -+ CLOCK_MONOTONIC, HRTIMER_MODE_REL); -+ kbasep_irq_test_data.timer.function = -+ kbasep_test_interrupt_timeout; ++ if (args->address) { ++ virtual = (void *)((u32)args->address); ++ offset = (u32)((args->address) - (args->mapping)); ++ } else { ++ /* Flush entire mapping when no address is specified. */ ++ virtual = args->mapping; ++ } ++ if (args->size) { ++ size = args->size; ++ } else { ++ /* Flush entire mapping when no size is specified. */ ++ size = mem->size_bytes - offset; ++ } + -+ /* trigger interrupt */ -+ kbase_reg_write(kbdev, mask_offset, 0x1, NULL); -+ kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL); ++ if ((offset + size) > mem->size_bytes) { ++ DBG_MSG(1, ("Trying to flush more than the entire UMP allocation: offset: %u + size: %u > %u\n", offset, size, mem->size_bytes)); ++ goto msync_release_and_return; ++ } + -+ hrtimer_start(&kbasep_irq_test_data.timer, -+ HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT), -+ HRTIMER_MODE_REL); ++ /* The actual cache flush - Implemented for each OS*/ ++ _ump_osk_msync(mem, virtual, offset, size, args->op, NULL); + -+ wait_event(kbasep_irq_test_data.wait, -+ kbasep_irq_test_data.triggered != 0); ++msync_release_and_return: ++ ump_random_mapping_put(mem); ++ return; ++} + -+ if (kbasep_irq_test_data.timeout != 0) { -+ dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n", -+ kbdev->irqs[tag].irq, tag); -+ err = -EINVAL; -+ } else { -+ dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n", -+ kbdev->irqs[tag].irq, tag); -+ } ++void _ump_ukk_cache_operations_control(_ump_uk_cache_operations_control_s *args) ++{ ++ ump_session_data *session_data; ++ ump_uk_cache_op_control op; + -+ hrtimer_cancel(&kbasep_irq_test_data.timer); -+ kbasep_irq_test_data.triggered = 0; ++ DEBUG_ASSERT_POINTER(args); ++ DEBUG_ASSERT_POINTER(args->ctx); + -+ /* mask interrupts */ -+ kbase_reg_write(kbdev, mask_offset, 0x0, NULL); ++ op = args->op; ++ session_data = (ump_session_data *)args->ctx; + -+ /* release test handler */ -+ free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag)); ++ _mali_osk_mutex_wait(session_data->lock); ++ if (op == _UMP_UK_CACHE_OP_START) { ++ session_data->cache_operations_ongoing++; ++ DBG_MSG(4, ("Cache ops start\n")); ++ if (session_data->cache_operations_ongoing != 1) { ++ DBG_MSG(2, ("UMP: Number of simultanious cache control ops: %d\n", session_data->cache_operations_ongoing)); + } -+ -+ /* restore original interrupt */ -+ if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag], -+ kbdev->irqs[tag].flags | IRQF_SHARED, -+ dev_name(kbdev->dev), kbase_tag(kbdev, tag))) { -+ dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n", -+ kbdev->irqs[tag].irq, tag); -+ err = -EINVAL; ++ } else if (op == _UMP_UK_CACHE_OP_FINISH) { ++ DBG_MSG(4, ("Cache ops finish\n")); ++ session_data->cache_operations_ongoing--; ++#if 0 ++ if (session_data->has_pending_level1_cache_flush) { ++ /* This function will set has_pending_level1_cache_flush=0 */ ++ _ump_osk_msync(NULL, NULL, 0, 0, _UMP_UK_MSYNC_FLUSH_L1, session_data); + } ++#endif ++ ++ /* to be on the safe side: always flush l1 cache when cache operations are done */ ++ _ump_osk_msync(NULL, NULL, 0, 0, _UMP_UK_MSYNC_FLUSH_L1, session_data); ++ DBG_MSG(4, ("Cache ops finish end\n")); ++ } else { ++ DBG_MSG(1, ("Illegal call to %s at line %d\n", __FUNCTION__, __LINE__)); + } -+ /* restore old mask */ -+ kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL); ++ _mali_osk_mutex_signal(session_data->lock); + -+ return err; +} + -+int kbasep_common_test_interrupt_handlers( -+ struct kbase_device * const kbdev) ++void _ump_ukk_switch_hw_usage(_ump_uk_switch_hw_usage_s *args) +{ -+ int err; ++ ump_dd_mem *mem = NULL; ++ ump_uk_user old_user; ++ ump_uk_msync_op cache_op = _UMP_UK_MSYNC_CLEAN_AND_INVALIDATE; ++ ump_session_data *session_data; + -+ init_waitqueue_head(&kbasep_irq_test_data.wait); -+ kbasep_irq_test_data.triggered = 0; ++ DEBUG_ASSERT_POINTER(args); ++ DEBUG_ASSERT_POINTER(args->ctx); + -+ /* A suspend won't happen during startup/insmod */ -+ kbase_pm_context_active(kbdev); ++ session_data = (ump_session_data *)args->ctx; + -+ err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG); -+ if (err) { -+ dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n"); ++ mem = ump_random_mapping_get(device.secure_id_map, (int)args->secure_id); ++ if (NULL == mem) { ++ DBG_MSG(1, ("Failed to look up mapping in _ump_ukk_switch_hw_usage(). ID: %u\n", ++ (ump_secure_id)args->secure_id)); ++ return; ++ } ++ ++ old_user = mem->hw_device; ++ mem->hw_device = args->new_user; ++ ++ DBG_MSG(3, ("UMP[%02u] Switch usage Start New: %s Prev: %s.\n", ++ (ump_secure_id)args->secure_id, ++ args->new_user ? "MALI" : "CPU", ++ old_user ? "MALI" : "CPU")); ++ ++ if (!mem->is_cached) { ++ DBG_MSG(3, ("UMP[%02u] Changing owner of uncached memory. Cache flushing not needed.\n", ++ (ump_secure_id)args->secure_id)); + goto out; + } + -+ err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG); -+ if (err) { -+ dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n"); ++ if (old_user == args->new_user) { ++ DBG_MSG(4, ("UMP[%02u] Setting the new_user equal to previous for. Cache flushing not needed.\n", ++ (ump_secure_id)args->secure_id)); ++ goto out; ++ } ++ if ( ++ /* Previous AND new is both different from CPU */ ++ (old_user != _UMP_UK_USED_BY_CPU) && (args->new_user != _UMP_UK_USED_BY_CPU) ++ ) { ++ DBG_MSG(4, ("UMP[%02u] Previous and new user is not CPU. Cache flushing not needed.\n", ++ (ump_secure_id)args->secure_id)); + goto out; + } + -+ dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n"); ++ if ((old_user != _UMP_UK_USED_BY_CPU) && (args->new_user == _UMP_UK_USED_BY_CPU)) { ++ cache_op = _UMP_UK_MSYNC_INVALIDATE; ++ DBG_MSG(4, ("UMP[%02u] Cache invalidation needed\n", (ump_secure_id)args->secure_id)); ++#ifdef UMP_SKIP_INVALIDATION ++#error ++ DBG_MSG(4, ("UMP[%02u] Performing Cache invalidation SKIPPED\n", (ump_secure_id)args->secure_id)); ++ goto out; ++#endif ++ } + -+ out: -+ kbase_pm_context_idle(kbdev); ++ /* Take lock to protect: session->cache_operations_ongoing and session->has_pending_level1_cache_flush */ ++ _mali_osk_mutex_wait(session_data->lock); ++ /* Actual cache flush */ ++ _ump_osk_msync(mem, NULL, 0, mem->size_bytes, cache_op, session_data); ++ _mali_osk_mutex_signal(session_data->lock); + -+ return err; ++out: ++ ump_random_mapping_put(mem); ++ DBG_MSG(4, ("UMP[%02u] Switch usage Finish\n", (ump_secure_id)args->secure_id)); ++ return; +} -+#endif /* CONFIG_MALI_DEBUG */ + -+int kbase_install_interrupts(struct kbase_device *kbdev) ++void _ump_ukk_lock(_ump_uk_lock_s *args) +{ -+ u32 nr = ARRAY_SIZE(kbase_handler_table); -+ int err; -+ u32 i; ++ ump_dd_mem *mem = NULL; + -+ for (i = 0; i < nr; i++) { -+ err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i], -+ kbdev->irqs[i].flags | IRQF_SHARED, -+ dev_name(kbdev->dev), -+ kbase_tag(kbdev, i)); -+ if (err) { -+ dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", -+ kbdev->irqs[i].irq, i); -+#ifdef CONFIG_SPARSE_IRQ -+ dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); -+#endif /* CONFIG_SPARSE_IRQ */ -+ goto release; -+ } ++ mem = ump_random_mapping_get(device.secure_id_map, (int)args->secure_id); ++ if (NULL == mem) { ++ DBG_MSG(1, ("UMP[%02u] Failed to look up mapping in _ump_ukk_lock(). ID: %u\n", ++ (ump_secure_id)args->secure_id)); ++ return; + } + -+ return 0; ++ DBG_MSG(1, ("UMP[%02u] Lock. New lock flag: %d. Old Lock flag:\n", (u32)args->secure_id, (u32)args->lock_usage, (u32) mem->lock_usage)); + -+ release: -+ while (i-- > 0) -+ free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); ++ mem->lock_usage = (ump_lock_usage) args->lock_usage; + -+ return err; ++ ump_random_mapping_put(mem); +} + -+void kbase_release_interrupts(struct kbase_device *kbdev) ++void _ump_ukk_unlock(_ump_uk_unlock_s *args) +{ -+ u32 nr = ARRAY_SIZE(kbase_handler_table); -+ u32 i; ++ ump_dd_mem *mem = NULL; + -+ for (i = 0; i < nr; i++) { -+ if (kbdev->irqs[i].irq) -+ free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); ++ mem = ump_random_mapping_get(device.secure_id_map, (int)args->secure_id); ++ if (NULL == mem) { ++ DBG_MSG(1, ("Failed to look up mapping in _ump_ukk_unlock(). ID: %u\n", ++ (ump_secure_id)args->secure_id)); ++ return; + } -+} + -+void kbase_synchronize_irqs(struct kbase_device *kbdev) -+{ -+ u32 nr = ARRAY_SIZE(kbase_handler_table); -+ u32 i; ++ DBG_MSG(1, ("UMP[%02u] Unlocking. Old Lock flag:\n", ++ (u32)args->secure_id, (u32) mem->lock_usage)); + -+ for (i = 0; i < nr; i++) { -+ if (kbdev->irqs[i].irq) -+ synchronize_irq(kbdev->irqs[i].irq); -+ } -+} ++ mem->lock_usage = (ump_lock_usage) UMP_NOT_LOCKED; + -+#endif /* !defined(CONFIG_MALI_NO_MALI) */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c ++ ump_random_mapping_put(mem); ++} +diff --git a/drivers/gpu/arm/mali400/ump/common/ump_kernel_common.c b/drivers/gpu/arm/mali400/ump/common/ump_kernel_common.c new file mode 100644 -index 000000000..92358f2bf +index 000000000..73aa9e4c4 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c -@@ -0,0 +1,237 @@ ++++ b/drivers/gpu/arm/mali400/ump/common/ump_kernel_common.c +@@ -0,0 +1,358 @@ +/* -+ * -+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + ++#include "mali_kernel_common.h" ++#include "mali_osk.h" ++#include "mali_osk_bitops.h" ++#include "mali_osk_list.h" ++#include "ump_osk.h" ++#include "ump_uk_types.h" ++#include "ump_ukk.h" ++#include "ump_kernel_common.h" ++#include "ump_kernel_descriptor_mapping.h" ++#include "ump_kernel_memory_backend.h" + + + -+/* -+ * Register backend context / address space management ++/** ++ * Define the initial and maximum size of number of secure_ids on the system + */ ++#define UMP_SECURE_ID_TABLE_ENTRIES_INITIAL (128 ) ++#define UMP_SECURE_ID_TABLE_ENTRIES_MAXIMUM (4096 ) + -+#include -+#include -+#include + +/** -+ * assign_and_activate_kctx_addr_space - Assign an AS to a context -+ * @kbdev: Kbase device -+ * @kctx: Kbase context -+ * @current_as: Address Space to assign -+ * -+ * Assign an Address Space (AS) to a context, and add the context to the Policy. -+ * -+ * This includes -+ * setting up the global runpool_irq structure and the context on the AS, -+ * Activating the MMU on the AS, -+ * Allowing jobs to be submitted on the AS. -+ * -+ * Context: -+ * kbasep_js_kctx_info.jsctx_mutex held, -+ * kbasep_js_device_data.runpool_mutex held, -+ * AS transaction mutex held, -+ * Runpool IRQ lock held ++ * Define the initial and maximum size of the ump_session_data::cookies_map, ++ * which is a \ref ump_descriptor_mapping. This limits how many secure_ids ++ * may be mapped into a particular process using _ump_ukk_map_mem(). + */ -+static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ struct kbase_as *current_as) -+{ -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + -+ lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); -+ lockdep_assert_held(&js_devdata->runpool_mutex); -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++#define UMP_COOKIES_PER_SESSION_INITIAL (UMP_SECURE_ID_TABLE_ENTRIES_INITIAL ) ++#define UMP_COOKIES_PER_SESSION_MAXIMUM (UMP_SECURE_ID_TABLE_ENTRIES_MAXIMUM) + -+ /* Attribute handling */ -+ kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx); ++struct ump_dev device; + -+ /* Allow it to run jobs */ -+ kbasep_js_set_submit_allowed(js_devdata, kctx); ++_mali_osk_errcode_t ump_kernel_constructor(void) ++{ ++ _mali_osk_errcode_t err; + -+ kbase_js_runpool_inc_context_count(kbdev, kctx); -+} ++ /* Perform OS Specific initialization */ ++ err = _ump_osk_init(); ++ if (_MALI_OSK_ERR_OK != err) { ++ MSG_ERR(("Failed to initiaze the UMP Device Driver")); ++ return err; ++ } + -+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, -+ struct kbase_context *kctx) -+{ -+ int i; ++ /* Init the global device */ ++ _mali_osk_memset(&device, 0, sizeof(device)); + -+ if (kbdev->hwaccess.active_kctx == kctx) { -+ /* Context is already active */ -+ return true; ++ /* Create the descriptor map, which will be used for mapping secure ID to ump_dd_mem structs */ ++ device.secure_id_map = ump_random_mapping_create(); ++ if (NULL == device.secure_id_map) { ++ MSG_ERR(("Failed to create secure id lookup table\n")); ++ return _MALI_OSK_ERR_NOMEM; + } + -+ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { -+ if (kbdev->as_to_kctx[i] == kctx) { -+ /* Context already has ASID - mark as active */ -+ return true; -+ } ++ /* Init memory backend */ ++ device.backend = ump_memory_backend_create(); ++ if (NULL == device.backend) { ++ MSG_ERR(("Failed to create memory backend\n")); ++ ump_random_mapping_destroy(device.secure_id_map); ++ return _MALI_OSK_ERR_NOMEM; + } + -+ /* Context does not have address space assigned */ -+ return false; ++ return _MALI_OSK_ERR_OK; +} + -+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, -+ struct kbase_context *kctx) ++void ump_kernel_destructor(void) +{ -+ int as_nr = kctx->as_nr; -+ -+ if (as_nr == KBASEP_AS_NR_INVALID) { -+ WARN(1, "Attempting to release context without ASID\n"); -+ return; -+ } ++ DEBUG_ASSERT_POINTER(device.secure_id_map); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ump_random_mapping_destroy(device.secure_id_map); ++ device.secure_id_map = NULL; + -+ if (atomic_read(&kctx->refcount) != 1) { -+ WARN(1, "Attempting to release active ASID\n"); -+ return; -+ } ++ device.backend->shutdown(device.backend); ++ device.backend = NULL; + -+ kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx); ++ ump_memory_backend_destroy(); + -+ kbase_ctx_sched_release_ctx(kctx); -+ kbase_js_runpool_dec_context_count(kbdev, kctx); ++ _ump_osk_term(); +} + -+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, -+ struct kbase_context *kctx) ++/** Creates a new UMP session ++ */ ++_mali_osk_errcode_t _ump_ukk_open(void **context) +{ -+} ++ struct ump_session_data *session_data; + -+int kbase_backend_find_and_release_free_address_space( -+ struct kbase_device *kbdev, struct kbase_context *kctx) -+{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ unsigned long flags; -+ int i; ++ /* allocated struct to track this session */ ++ session_data = (struct ump_session_data *)_mali_osk_malloc(sizeof(struct ump_session_data)); ++ if (NULL == session_data) { ++ MSG_ERR(("Failed to allocate ump_session_data in ump_file_open()\n")); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+ js_devdata = &kbdev->js_data; -+ js_kctx_info = &kctx->jctx.sched_info; ++ session_data->lock = _mali_osk_mutex_init(_MALI_OSK_LOCKFLAG_UNORDERED, 0); ++ if (NULL == session_data->lock) { ++ MSG_ERR(("Failed to initialize lock for ump_session_data in ump_file_open()\n")); ++ _mali_osk_free(session_data); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_lock(&js_devdata->runpool_mutex); ++ session_data->cookies_map = ump_descriptor_mapping_create( ++ UMP_COOKIES_PER_SESSION_INITIAL, ++ UMP_COOKIES_PER_SESSION_MAXIMUM); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (NULL == session_data->cookies_map) { ++ MSG_ERR(("Failed to create descriptor mapping for _ump_ukk_map_mem cookies\n")); + -+ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { -+ struct kbasep_js_kctx_info *as_js_kctx_info; -+ struct kbase_context *as_kctx; ++ _mali_osk_mutex_term(session_data->lock); ++ _mali_osk_free(session_data); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+ as_kctx = kbdev->as_to_kctx[i]; -+ as_js_kctx_info = &as_kctx->jctx.sched_info; ++ _MALI_OSK_INIT_LIST_HEAD(&session_data->list_head_session_memory_list); + -+ /* Don't release privileged or active contexts, or contexts with -+ * jobs running. -+ * Note that a context will have at least 1 reference (which -+ * was previously taken by kbasep_js_schedule_ctx()) until -+ * descheduled. -+ */ -+ if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) && -+ atomic_read(&as_kctx->refcount) == 1) { -+ if (!kbasep_js_runpool_retain_ctx_nolock(kbdev, -+ as_kctx)) { -+ WARN(1, "Failed to retain active context\n"); ++ _MALI_OSK_INIT_LIST_HEAD(&session_data->list_head_session_memory_mappings_list); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, -+ flags); -+ mutex_unlock(&js_devdata->runpool_mutex); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ /* Since initial version of the UMP interface did not use the API_VERSION ioctl we have to assume ++ that it is this version, and not the "latest" one: UMP_IOCTL_API_VERSION ++ Current and later API versions would do an additional call to this IOCTL and update this variable ++ to the correct one.*/ ++ session_data->api_version = MAKE_VERSION_ID(1); + -+ return KBASEP_AS_NR_INVALID; -+ } ++ *context = (void *)session_data; + -+ kbasep_js_clear_submit_allowed(js_devdata, as_kctx); ++ session_data->cache_operations_ongoing = 0 ; ++ session_data->has_pending_level1_cache_flush = 0; + -+ /* Drop and retake locks to take the jsctx_mutex on the -+ * context we're about to release without violating lock -+ * ordering -+ */ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&js_devdata->runpool_mutex); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ DBG_MSG(2, ("New session opened\n")); + ++ return _MALI_OSK_ERR_OK; ++} + -+ /* Release context from address space */ -+ mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex); -+ mutex_lock(&js_devdata->runpool_mutex); ++_mali_osk_errcode_t _ump_ukk_close(void **context) ++{ ++ struct ump_session_data *session_data; ++ ump_session_memory_list_element *item; ++ ump_session_memory_list_element *tmp; + -+ kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx); ++ session_data = (struct ump_session_data *)*context; ++ if (NULL == session_data) { ++ MSG_ERR(("Session data is NULL in _ump_ukk_close()\n")); ++ return _MALI_OSK_ERR_INVALID_ARGS; ++ } + -+ if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) { -+ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, -+ as_kctx, -+ true); ++ /* Unmap any descriptors mapped in. */ ++ if (0 == _mali_osk_list_empty(&session_data->list_head_session_memory_mappings_list)) { ++ ump_memory_allocation *descriptor; ++ ump_memory_allocation *temp; + -+ mutex_unlock(&js_devdata->runpool_mutex); -+ mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); ++ DBG_MSG(1, ("Memory mappings found on session usage list during session termination\n")); + -+ return i; -+ } ++ /* use the 'safe' list iterator, since freeing removes the active block from the list we're iterating */ ++ _MALI_OSK_LIST_FOREACHENTRY(descriptor, temp, &session_data->list_head_session_memory_mappings_list, ump_memory_allocation, list) { ++ _ump_uk_unmap_mem_s unmap_args; ++ DBG_MSG(4, ("Freeing block with phys address 0x%x size 0x%x mapped in user space at 0x%x\n", ++ descriptor->phys_addr, descriptor->size, descriptor->mapping)); ++ unmap_args.ctx = (void *)session_data; ++ unmap_args.mapping = descriptor->mapping; ++ unmap_args.size = descriptor->size; ++ unmap_args._ukk_private = NULL; /* NOTE: unused */ ++ unmap_args.cookie = descriptor->cookie; + -+ /* Context was retained while locks were dropped, -+ * continue looking for free AS */ ++ /* NOTE: This modifies the list_head_session_memory_mappings_list */ ++ _ump_ukk_unmap_mem(&unmap_args); ++ } ++ } + -+ mutex_unlock(&js_devdata->runpool_mutex); -+ mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); ++ /* ASSERT that we really did free everything, because _ump_ukk_unmap_mem() ++ * can fail silently. */ ++ DEBUG_ASSERT(_mali_osk_list_empty(&session_data->list_head_session_memory_mappings_list)); + -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_lock(&js_devdata->runpool_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ } ++ _MALI_OSK_LIST_FOREACHENTRY(item, tmp, &session_data->list_head_session_memory_list, ump_session_memory_list_element, list) { ++ _mali_osk_list_del(&item->list); ++ DBG_MSG(2, ("Releasing UMP memory %u as part of file close\n", item->mem->secure_id)); ++ ump_dd_reference_release(item->mem); ++ _mali_osk_free(item); + } + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ump_descriptor_mapping_destroy(session_data->cookies_map); + -+ mutex_unlock(&js_devdata->runpool_mutex); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ _mali_osk_mutex_term(session_data->lock); ++ _mali_osk_free(session_data); + -+ return KBASEP_AS_NR_INVALID; ++ DBG_MSG(2, ("Session closed\n")); ++ ++ return _MALI_OSK_ERR_OK; +} + -+bool kbase_backend_use_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ int as_nr) ++_mali_osk_errcode_t _ump_ukk_map_mem(_ump_uk_map_mem_s *args) +{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ struct kbase_as *new_address_space = NULL; ++ struct ump_session_data *session_data; ++ ump_memory_allocation *descriptor; /* Describes current mapping of memory */ ++ _mali_osk_errcode_t err; ++ unsigned long offset = 0; ++ unsigned long left; ++ ump_dd_handle handle; /* The real UMP handle for this memory. Its real datatype is ump_dd_mem* */ ++ ump_dd_mem *mem; /* The real UMP memory. It is equal to the handle, but with exposed struct */ ++ u32 block; ++ int map_id; + -+ js_devdata = &kbdev->js_data; -+ js_kctx_info = &kctx->jctx.sched_info; ++ session_data = (ump_session_data *)args->ctx; ++ if (NULL == session_data) { ++ MSG_ERR(("Session data is NULL in _ump_ukk_map_mem()\n")); ++ return _MALI_OSK_ERR_INVALID_ARGS; ++ } + -+ if (kbdev->hwaccess.active_kctx == kctx) { -+ WARN(1, "Context is already scheduled in\n"); -+ return false; ++ descriptor = (ump_memory_allocation *) _mali_osk_calloc(1, sizeof(ump_memory_allocation)); ++ if (NULL == descriptor) { ++ MSG_ERR(("ump_ukk_map_mem: descriptor allocation failed\n")); ++ return _MALI_OSK_ERR_NOMEM; + } + -+ new_address_space = &kbdev->as[as_nr]; ++ handle = ump_dd_handle_create_from_secure_id(args->secure_id); ++ if (UMP_DD_HANDLE_INVALID == handle) { ++ _mali_osk_free(descriptor); ++ DBG_MSG(1, ("Trying to map unknown secure ID %u\n", args->secure_id)); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ lockdep_assert_held(&js_devdata->runpool_mutex); -+ lockdep_assert_held(&kbdev->mmu_hw_mutex); -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ mem = (ump_dd_mem *)handle; ++ DEBUG_ASSERT(mem); ++ if (mem->size_bytes != args->size) { ++ _mali_osk_free(descriptor); ++ ump_dd_reference_release(handle); ++ DBG_MSG(1, ("Trying to map too much or little. ID: %u, virtual size=%lu, UMP size: %lu\n", args->secure_id, args->size, mem->size_bytes)); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space); ++ map_id = ump_descriptor_mapping_allocate_mapping(session_data->cookies_map, (void *) descriptor); + -+ if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) { -+ /* We need to retain it to keep the corresponding address space -+ */ -+ kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); -+ } ++ if (map_id < 0) { ++ _mali_osk_free(descriptor); ++ ump_dd_reference_release(handle); ++ DBG_MSG(1, ("ump_ukk_map_mem: unable to allocate a descriptor_mapping for return cookie\n")); + -+ return true; -+} ++ return _MALI_OSK_ERR_NOMEM; ++ } + -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h -new file mode 100644 -index 000000000..08a7400e6 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h -@@ -0,0 +1,123 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ descriptor->size = args->size; ++ descriptor->handle = handle; ++ descriptor->phys_addr = args->phys_addr; ++ descriptor->process_mapping_info = args->_ukk_private; ++ descriptor->ump_session = session_data; ++ descriptor->cookie = (u32)map_id; + ++ if (mem->is_cached) { ++ descriptor->is_cached = 1; ++ DBG_MSG(3, ("Mapping UMP secure_id: %d as cached.\n", args->secure_id)); ++ } else { ++ descriptor->is_cached = 0; ++ DBG_MSG(3, ("Mapping UMP secure_id: %d as Uncached.\n", args->secure_id)); ++ } + ++ _mali_osk_list_init(&descriptor->list); + ++ err = _ump_osk_mem_mapregion_init(descriptor); ++ if (_MALI_OSK_ERR_OK != err) { ++ DBG_MSG(1, ("Failed to initialize memory mapping in _ump_ukk_map_mem(). ID: %u\n", args->secure_id)); ++ ump_descriptor_mapping_free(session_data->cookies_map, map_id); ++ _mali_osk_free(descriptor); ++ ump_dd_reference_release(mem); ++ return err; ++ } + -+/* -+ * Register-based HW access backend specific definitions -+ */ ++ DBG_MSG(4, ("Mapping virtual to physical memory: ID: %u, size:%lu, first physical addr: 0x%08lx, number of regions: %lu\n", ++ mem->secure_id, ++ mem->size_bytes, ++ ((NULL != mem->block_array) ? mem->block_array->addr : 0), ++ mem->nr_blocks)); + -+#ifndef _KBASE_HWACCESS_GPU_DEFS_H_ -+#define _KBASE_HWACCESS_GPU_DEFS_H_ ++ left = descriptor->size; ++ /* loop over all blocks and map them in */ ++ for (block = 0; block < mem->nr_blocks; block++) { ++ unsigned long size_to_map; + -+/* SLOT_RB_SIZE must be < 256 */ -+#define SLOT_RB_SIZE 2 -+#define SLOT_RB_MASK (SLOT_RB_SIZE - 1) ++ if (left > mem->block_array[block].size) { ++ size_to_map = mem->block_array[block].size; ++ } else { ++ size_to_map = left; ++ } + -+/** -+ * struct rb_entry - Ringbuffer entry -+ * @katom: Atom associated with this entry -+ */ -+struct rb_entry { -+ struct kbase_jd_atom *katom; -+}; ++ if (_MALI_OSK_ERR_OK != _ump_osk_mem_mapregion_map(descriptor, offset, (u32 *) & (mem->block_array[block].addr), size_to_map)) { ++ DBG_MSG(1, ("WARNING: _ump_ukk_map_mem failed to map memory into userspace\n")); ++ ump_descriptor_mapping_free(session_data->cookies_map, map_id); ++ ump_dd_reference_release(mem); ++ _ump_osk_mem_mapregion_term(descriptor); ++ _mali_osk_free(descriptor); ++ return _MALI_OSK_ERR_FAULT; ++ } ++ left -= size_to_map; ++ offset += size_to_map; ++ } + -+/** -+ * struct slot_rb - Slot ringbuffer -+ * @entries: Ringbuffer entries -+ * @last_context: The last context to submit a job on this slot -+ * @read_idx: Current read index of buffer -+ * @write_idx: Current write index of buffer -+ * @job_chain_flag: Flag used to implement jobchain disambiguation -+ */ -+struct slot_rb { -+ struct rb_entry entries[SLOT_RB_SIZE]; ++ /* Add to the ump_memory_allocation tracking list */ ++ _mali_osk_mutex_wait(session_data->lock); ++ _mali_osk_list_add(&descriptor->list, &session_data->list_head_session_memory_mappings_list); ++ _mali_osk_mutex_signal(session_data->lock); + -+ struct kbase_context *last_context; ++ args->mapping = descriptor->mapping; ++ args->cookie = descriptor->cookie; + -+ u8 read_idx; -+ u8 write_idx; ++ return _MALI_OSK_ERR_OK; ++} + -+ u8 job_chain_flag; -+}; ++void _ump_ukk_unmap_mem(_ump_uk_unmap_mem_s *args) ++{ ++ struct ump_session_data *session_data; ++ ump_memory_allocation *descriptor; ++ ump_dd_handle handle; + -+/** -+ * struct kbase_backend_data - GPU backend specific data for HW access layer -+ * @slot_rb: Slot ringbuffers -+ * @rmu_workaround_flag: When PRLAM-8987 is present, this flag determines -+ * whether slots 0/1 or slot 2 are currently being -+ * pulled from -+ * @scheduling_timer: The timer tick used for rescheduling jobs -+ * @timer_running: Is the timer running? The runpool_mutex must be -+ * held whilst modifying this. -+ * @suspend_timer: Is the timer suspended? Set when a suspend -+ * occurs and cleared on resume. The runpool_mutex -+ * must be held whilst modifying this. -+ * @reset_gpu: Set to a KBASE_RESET_xxx value (see comments) -+ * @reset_workq: Work queue for performing the reset -+ * @reset_work: Work item for performing the reset -+ * @reset_wait: Wait event signalled when the reset is complete -+ * @reset_timer: Timeout for soft-stops before the reset -+ * @timeouts_updated: Have timeout values just been updated? -+ * -+ * The hwaccess_lock (a spinlock) must be held when accessing this structure -+ */ -+struct kbase_backend_data { -+ struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS]; ++ session_data = (ump_session_data *)args->ctx; + -+ bool rmu_workaround_flag; ++ if (NULL == session_data) { ++ MSG_ERR(("Session data is NULL in _ump_ukk_map_mem()\n")); ++ return; ++ } + -+ struct hrtimer scheduling_timer; ++ if (0 != ump_descriptor_mapping_get(session_data->cookies_map, (int)args->cookie, (void **)&descriptor)) { ++ MSG_ERR(("_ump_ukk_map_mem: cookie 0x%X not found for this session\n", args->cookie)); ++ return; ++ } + -+ bool timer_running; -+ bool suspend_timer; ++ DEBUG_ASSERT_POINTER(descriptor); + -+ atomic_t reset_gpu; ++ handle = descriptor->handle; ++ if (UMP_DD_HANDLE_INVALID == handle) { ++ DBG_MSG(1, ("WARNING: Trying to unmap unknown handle: UNKNOWN\n")); ++ return; ++ } + -+/* The GPU reset isn't pending */ -+#define KBASE_RESET_GPU_NOT_PENDING 0 -+/* kbase_prepare_to_reset_gpu has been called */ -+#define KBASE_RESET_GPU_PREPARED 1 -+/* kbase_reset_gpu has been called - the reset will now definitely happen -+ * within the timeout period */ -+#define KBASE_RESET_GPU_COMMITTED 2 -+/* The GPU reset process is currently occuring (timeout has expired or -+ * kbasep_try_reset_gpu_early was called) */ -+#define KBASE_RESET_GPU_HAPPENING 3 -+/* Reset the GPU silently, used when resetting the GPU as part of normal -+ * behavior (e.g. when exiting protected mode). */ -+#define KBASE_RESET_GPU_SILENT 4 -+ struct workqueue_struct *reset_workq; -+ struct work_struct reset_work; -+ wait_queue_head_t reset_wait; -+ struct hrtimer reset_timer; ++ /* Remove the ump_memory_allocation from the list of tracked mappings */ ++ _mali_osk_mutex_wait(session_data->lock); ++ _mali_osk_list_del(&descriptor->list); ++ _mali_osk_mutex_signal(session_data->lock); + -+ bool timeouts_updated; -+}; ++ ump_descriptor_mapping_free(session_data->cookies_map, (int)args->cookie); + -+/** -+ * struct kbase_jd_atom_backend - GPU backend specific katom data -+ */ -+struct kbase_jd_atom_backend { -+}; ++ ump_dd_reference_release(handle); + -+/** -+ * struct kbase_context_backend - GPU backend specific context data -+ */ -+struct kbase_context_backend { -+}; ++ _ump_osk_mem_mapregion_term(descriptor); ++ _mali_osk_free(descriptor); ++} + -+#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c ++u32 _ump_ukk_report_memory_usage(void) ++{ ++ if (device.backend->stat) ++ return device.backend->stat(device.backend); ++ else ++ return 0; ++} +diff --git a/drivers/gpu/arm/mali400/ump/common/ump_kernel_common.h b/drivers/gpu/arm/mali400/ump/common/ump_kernel_common.h new file mode 100644 -index 000000000..a6fb097b9 +index 000000000..aa65f1cb6 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c -@@ -0,0 +1,1518 @@ ++++ b/drivers/gpu/arm/mali400/ump/common/ump_kernel_common.h +@@ -0,0 +1,125 @@ +/* -+ * -+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + ++#ifndef __UMP_KERNEL_COMMON_H__ ++#define __UMP_KERNEL_COMMON_H__ + ++#include "ump_kernel_types.h" ++#include "ump_kernel_interface.h" ++#include "ump_kernel_descriptor_mapping.h" ++#include "ump_kernel_random_mapping.h" ++#include "ump_kernel_memory_backend.h" + -+/* -+ * Base kernel job manager APIs -+ */ -+ -+#include -+#include -+#include -+#if defined(CONFIG_MALI_GATOR_SUPPORT) -+#include -+#endif -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define beenthere(kctx, f, a...) \ -+ dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) + -+#if KBASE_GPU_RESET_EN -+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev); -+static void kbasep_reset_timeout_worker(struct work_struct *data); -+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer); -+#endif /* KBASE_GPU_RESET_EN */ ++#ifdef DEBUG ++extern int ump_debug_level; ++#define UMP_DEBUG_PRINT(args) _mali_osk_dbgmsg args ++#define UMP_DEBUG_CODE(args) args ++#define DBG_MSG(level,args) do { /* args should be in brackets */ \ ++ ((level) <= ump_debug_level)?\ ++ UMP_DEBUG_PRINT(("UMP<" #level ">: ")), \ ++ UMP_DEBUG_PRINT(args):0; \ ++ } while (0) + -+static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, -+ struct kbase_context *kctx) -+{ -+ return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx); -+} ++#define DBG_MSG_IF(level,condition,args) /* args should be in brackets */ \ ++ if((condition)&&((level) <= ump_debug_level)) {\ ++ UMP_DEBUG_PRINT(("UMP<" #level ">: ")); \ ++ UMP_DEBUG_PRINT(args); \ ++ } + -+void kbase_job_hw_submit(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom, -+ int js) -+{ -+ struct kbase_context *kctx; -+ u32 cfg; -+ u64 jc_head = katom->jc; ++#define DBG_MSG_ELSE(level,args) /* args should be in brackets */ \ ++ else if((level) <= ump_debug_level) { \ ++ UMP_DEBUG_PRINT(("UMP<" #level ">: ")); \ ++ UMP_DEBUG_PRINT(args); \ ++ } + -+ KBASE_DEBUG_ASSERT(kbdev); -+ KBASE_DEBUG_ASSERT(katom); ++#define DEBUG_ASSERT_POINTER(pointer) do {if( (pointer)== NULL) MSG_ERR(("NULL pointer " #pointer)); } while(0) ++#define DEBUG_ASSERT(condition) do {if(!(condition)) MSG_ERR(("ASSERT failed: " #condition)); } while(0) ++#else /* DEBUG */ ++#define UMP_DEBUG_PRINT(args) do {} while(0) ++#define UMP_DEBUG_CODE(args) ++#define DBG_MSG(level,args) do {} while(0) ++#define DBG_MSG_IF(level,condition,args) do {} while(0) ++#define DBG_MSG_ELSE(level,args) do {} while(0) ++#define DEBUG_ASSERT(condition) do {} while(0) ++#define DEBUG_ASSERT_POINTER(pointer) do {} while(0) ++#endif /* DEBUG */ + -+ kctx = katom->kctx; ++#define MSG_ERR(args) do{ /* args should be in brackets */ \ ++ _mali_osk_dbgmsg("UMP: ERR: %s\n" ,__FILE__); \ ++ _mali_osk_dbgmsg( " %s()%4d\n", __FUNCTION__, __LINE__) ; \ ++ _mali_osk_dbgmsg args ; \ ++ _mali_osk_dbgmsg("\n"); \ ++ } while(0) + -+ /* Command register must be available */ -+ KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx)); -+ /* Affinity is not violating */ -+ kbase_js_debug_log_current_affinities(kbdev); -+ KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js, -+ katom->affinity)); ++#define MSG(args) do{ /* args should be in brackets */ \ ++ _mali_osk_dbgmsg("UMP: "); \ ++ _mali_osk_dbgmsg args; \ ++ } while (0) + -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), -+ jc_head & 0xFFFFFFFF, kctx); -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), -+ jc_head >> 32, kctx); + -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), -+ katom->affinity & 0xFFFFFFFF, kctx); -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), -+ katom->affinity >> 32, kctx); + -+ /* start MMU, medium priority, cache clean/flush on end, clean/flush on -+ * start */ -+ cfg = kctx->as_nr; ++/* ++ * This struct is used to store per session data. ++ * A session is created when someone open() the device, and ++ * closed when someone close() it or the user space application terminates. ++ */ ++typedef struct ump_session_data { ++ _mali_osk_list_t list_head_session_memory_list; /**< List of ump allocations made by the process (elements are ump_session_memory_list_element) */ ++ _mali_osk_list_t list_head_session_memory_mappings_list; /**< List of ump_memory_allocations mapped in */ ++ int api_version; ++ _mali_osk_mutex_t *lock; ++ ump_descriptor_mapping *cookies_map; /**< Secure mapping of cookies from _ump_ukk_map_mem() */ ++ int cache_operations_ongoing; ++ int has_pending_level1_cache_flush; ++} ump_session_data; + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) -+ cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; + -+#ifndef CONFIG_MALI_COH_GPU -+ if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START)) -+ cfg |= JS_CONFIG_START_FLUSH_NO_ACTION; -+ else -+ cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; + -+ if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END)) -+ cfg |= JS_CONFIG_END_FLUSH_NO_ACTION; -+ else -+ cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; -+#endif /* CONFIG_MALI_COH_GPU */ ++/* ++ * This struct is used to track the UMP memory references a session has. ++ * We need to track this in order to be able to clean up after user space processes ++ * which don't do it themself (e.g. due to a crash or premature termination). ++ */ ++typedef struct ump_session_memory_list_element { ++ struct ump_dd_mem *mem; ++ _mali_osk_list_t list; ++} ump_session_memory_list_element; + -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10649)) -+ cfg |= JS_CONFIG_START_MMU; + -+ cfg |= JS_CONFIG_THREAD_PRI(8); + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) && -+ (katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED)) -+ cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK; ++/* ++ * Device specific data, created when device driver is loaded, and then kept as the global variable device. ++ */ ++typedef struct ump_dev { ++ ump_random_mapping *secure_id_map; ++ ump_memory_backend *backend; ++} ump_dev; + -+ if (kbase_hw_has_feature(kbdev, -+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { -+ if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) { -+ cfg |= JS_CONFIG_JOB_CHAIN_FLAG; -+ katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN; -+ kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = -+ true; -+ } else { -+ katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN; -+ kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = -+ false; -+ } -+ } + -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx); + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT), -+ katom->flush_id, kctx); ++extern int ump_debug_level; ++extern struct ump_dev device; + -+ /* Write an approximate start timestamp. -+ * It's approximate because there might be a job in the HEAD register. -+ */ -+ katom->start_timestamp = ktime_get(); ++_mali_osk_errcode_t ump_kernel_constructor(void); ++void ump_kernel_destructor(void); ++int ump_map_errcode(_mali_osk_errcode_t err); + -+ /* GO ! */ -+ dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx", -+ katom, kctx, js, jc_head, katom->affinity); ++/** ++ * variables from user space cannot be dereferenced from kernel space; tagging them ++ * with __user allows the GCC compiler to generate a warning. Other compilers may ++ * not support this so we define it here as an empty macro if the compiler doesn't ++ * define it. ++ */ ++#ifndef __user ++#define __user ++#endif + -+ KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, -+ (u32) katom->affinity); ++#endif /* __UMP_KERNEL_COMMON_H__ */ +diff --git a/drivers/gpu/arm/mali400/ump/common/ump_kernel_descriptor_mapping.c b/drivers/gpu/arm/mali400/ump/common/ump_kernel_descriptor_mapping.c +new file mode 100644 +index 000000000..e4642f039 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/common/ump_kernel_descriptor_mapping.c +@@ -0,0 +1,155 @@ ++/* ++ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+#if defined(CONFIG_MALI_GATOR_SUPPORT) -+ kbase_trace_mali_job_slots_event( -+ GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js), -+ kctx, kbase_jd_atom_id(kctx, katom)); -+#endif -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(katom, jc_head, -+ katom->affinity, cfg); -+ KBASE_TLSTREAM_TL_RET_CTX_LPU( -+ kctx, -+ &kbdev->gpu_props.props.raw_props.js_features[ -+ katom->slot_nr]); -+ KBASE_TLSTREAM_TL_RET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]); -+ KBASE_TLSTREAM_TL_RET_ATOM_LPU( -+ katom, -+ &kbdev->gpu_props.props.raw_props.js_features[js], -+ "ctx_nr,atom_nr"); -+#ifdef CONFIG_GPU_TRACEPOINTS -+ if (!kbase_backend_nr_atoms_submitted(kbdev, js)) { -+ /* If this is the only job on the slot, trace it as starting */ -+ char js_string[16]; ++#include "mali_kernel_common.h" ++#include "mali_osk.h" ++#include "mali_osk_bitops.h" ++#include "ump_kernel_common.h" ++#include "ump_kernel_descriptor_mapping.h" + -+ trace_gpu_sched_switch( -+ kbasep_make_job_slot_string(js, js_string, -+ sizeof(js_string)), -+ ktime_to_ns(katom->start_timestamp), -+ (u32)katom->kctx->id, 0, katom->work_id); -+ kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx; -+ } -+#endif -+ kbase_timeline_job_slot_submit(kbdev, kctx, katom, js); ++#define MALI_PAD_INT(x) (((x) + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1)) + -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), -+ JS_COMMAND_START, katom->kctx); -+} ++/** ++ * Allocate a descriptor table capable of holding 'count' mappings ++ * @param count Number of mappings in the table ++ * @return Pointer to a new table, NULL on error ++ */ ++static ump_descriptor_table *descriptor_table_alloc(int count); + +/** -+ * kbasep_job_slot_update_head_start_timestamp - Update timestamp -+ * @kbdev: kbase device -+ * @js: job slot -+ * @end_timestamp: timestamp -+ * -+ * Update the start_timestamp of the job currently in the HEAD, based on the -+ * fact that we got an IRQ for the previous set of completed jobs. -+ * -+ * The estimate also takes into account the time the job was submitted, to -+ * work out the best estimate (which might still result in an over-estimate to -+ * the calculated time spent) ++ * Free a descriptor table ++ * @param table The table to free + */ -+static void kbasep_job_slot_update_head_start_timestamp( -+ struct kbase_device *kbdev, -+ int js, -+ ktime_t end_timestamp) ++static void descriptor_table_free(ump_descriptor_table *table); ++ ++ump_descriptor_mapping *ump_descriptor_mapping_create(int init_entries, int max_entries) +{ -+ if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) { -+ struct kbase_jd_atom *katom; -+ ktime_t timestamp_diff; -+ /* The atom in the HEAD */ -+ katom = kbase_gpu_inspect(kbdev, js, 0); ++ ump_descriptor_mapping *map = _mali_osk_calloc(1, sizeof(ump_descriptor_mapping)); + -+ KBASE_DEBUG_ASSERT(katom != NULL); ++ init_entries = MALI_PAD_INT(init_entries); ++ max_entries = MALI_PAD_INT(max_entries); + -+ timestamp_diff = ktime_sub(end_timestamp, -+ katom->start_timestamp); -+ if (ktime_to_ns(timestamp_diff) >= 0) { -+ /* Only update the timestamp if it's a better estimate -+ * than what's currently stored. This is because our -+ * estimate that accounts for the throttle time may be -+ * too much of an overestimate */ -+ katom->start_timestamp = end_timestamp; ++ if (NULL != map) { ++ map->table = descriptor_table_alloc(init_entries); ++ if (NULL != map->table) { ++ map->lock = _mali_osk_mutex_rw_init(_MALI_OSK_LOCKFLAG_UNORDERED, 0); ++ if (NULL != map->lock) { ++ _mali_osk_set_nonatomic_bit(0, map->table->usage); /* reserve bit 0 to prevent NULL/zero logic to kick in */ ++ map->max_nr_mappings_allowed = max_entries; ++ map->current_nr_mappings = init_entries; ++ return map; ++ } ++ descriptor_table_free(map->table); + } ++ _mali_osk_free(map); + } ++ return NULL; +} + -+/** -+ * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline -+ * tracepoint -+ * @kbdev: kbase device -+ * @js: job slot -+ * -+ * Make a tracepoint call to the instrumentation module informing that -+ * softstop happened on given lpu (job slot). -+ */ -+static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, -+ int js) ++void ump_descriptor_mapping_destroy(ump_descriptor_mapping *map) +{ -+ KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( -+ &kbdev->gpu_props.props.raw_props.js_features[js]); ++ descriptor_table_free(map->table); ++ _mali_osk_mutex_rw_term(map->lock); ++ _mali_osk_free(map); +} + -+void kbase_job_done(struct kbase_device *kbdev, u32 done) ++int ump_descriptor_mapping_allocate_mapping(ump_descriptor_mapping *map, void *target) +{ -+ unsigned long flags; -+ int i; -+ u32 count = 0; -+ ktime_t end_timestamp = ktime_get(); -+ struct kbasep_js_device_data *js_devdata; ++ int descriptor = -1;/*-EFAULT;*/ ++ _mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RW); ++ descriptor = _mali_osk_find_first_zero_bit(map->table->usage, map->current_nr_mappings); ++ if (descriptor == map->current_nr_mappings) { ++ int nr_mappings_new; ++ /* no free descriptor, try to expand the table */ ++ ump_descriptor_table *new_table; ++ ump_descriptor_table *old_table = map->table; ++ nr_mappings_new = map->current_nr_mappings * 2; + -+ KBASE_DEBUG_ASSERT(kbdev); -+ js_devdata = &kbdev->js_data; ++ if (map->current_nr_mappings >= map->max_nr_mappings_allowed) { ++ descriptor = -1; ++ goto unlock_and_exit; ++ } + -+ KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done); ++ new_table = descriptor_table_alloc(nr_mappings_new); ++ if (NULL == new_table) { ++ descriptor = -1; ++ goto unlock_and_exit; ++ } + -+ memset(&kbdev->slot_submit_count_irq[0], 0, -+ sizeof(kbdev->slot_submit_count_irq)); ++ _mali_osk_memcpy(new_table->usage, old_table->usage, (sizeof(unsigned long)*map->current_nr_mappings) / BITS_PER_LONG); ++ _mali_osk_memcpy(new_table->mappings, old_table->mappings, map->current_nr_mappings * sizeof(void *)); ++ map->table = new_table; ++ map->current_nr_mappings = nr_mappings_new; ++ descriptor_table_free(old_table); ++ } + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ /* we have found a valid descriptor, set the value and usage bit */ ++ _mali_osk_set_nonatomic_bit(descriptor, map->table->usage); ++ map->table->mappings[descriptor] = target; + -+ while (done) { -+ u32 failed = done >> 16; ++unlock_and_exit: ++ _mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RW); ++ return descriptor; ++} + -+ /* treat failed slots as finished slots */ -+ u32 finished = (done & 0xFFFF) | failed; ++int ump_descriptor_mapping_get(ump_descriptor_mapping *map, int descriptor, void **target) ++{ ++ int result = -1;/*-EFAULT;*/ ++ DEBUG_ASSERT(map); ++ _mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RO); ++ if ((descriptor > 0) && (descriptor < map->current_nr_mappings) && _mali_osk_test_bit(descriptor, map->table->usage)) { ++ *target = map->table->mappings[descriptor]; ++ result = 0; ++ } else *target = NULL; ++ _mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RO); ++ return result; ++} + -+ /* Note: This is inherently unfair, as we always check -+ * for lower numbered interrupts before the higher -+ * numbered ones.*/ -+ i = ffs(finished) - 1; -+ KBASE_DEBUG_ASSERT(i >= 0); ++int ump_descriptor_mapping_set(ump_descriptor_mapping *map, int descriptor, void *target) ++{ ++ int result = -1;/*-EFAULT;*/ ++ _mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RO); ++ if ((descriptor > 0) && (descriptor < map->current_nr_mappings) && _mali_osk_test_bit(descriptor, map->table->usage)) { ++ map->table->mappings[descriptor] = target; ++ result = 0; ++ } ++ _mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RO); ++ return result; ++} + -+ do { -+ int nr_done; -+ u32 active; -+ u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */ -+ u64 job_tail = 0; ++void ump_descriptor_mapping_free(ump_descriptor_mapping *map, int descriptor) ++{ ++ _mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RW); ++ if ((descriptor > 0) && (descriptor < map->current_nr_mappings) && _mali_osk_test_bit(descriptor, map->table->usage)) { ++ map->table->mappings[descriptor] = NULL; ++ _mali_osk_clear_nonatomic_bit(descriptor, map->table->usage); ++ } ++ _mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RW); ++} + -+ if (failed & (1u << i)) { -+ /* read out the job slot status code if the job -+ * slot reported failure */ -+ completion_code = kbase_reg_read(kbdev, -+ JOB_SLOT_REG(i, JS_STATUS), NULL); ++static ump_descriptor_table *descriptor_table_alloc(int count) ++{ ++ ump_descriptor_table *table; + -+ switch (completion_code) { -+ case BASE_JD_EVENT_STOPPED: -+#if defined(CONFIG_MALI_GATOR_SUPPORT) -+ kbase_trace_mali_job_slots_event( -+ GATOR_MAKE_EVENT( -+ GATOR_JOB_SLOT_SOFT_STOPPED, i), -+ NULL, 0); -+#endif ++ table = _mali_osk_calloc(1, sizeof(ump_descriptor_table) + ((sizeof(unsigned long) * count) / BITS_PER_LONG) + (sizeof(void *) * count)); + -+ kbasep_trace_tl_event_lpu_softstop( -+ kbdev, i); ++ if (NULL != table) { ++ table->usage = (u32 *)((u8 *)table + sizeof(ump_descriptor_table)); ++ table->mappings = (void **)((u8 *)table + sizeof(ump_descriptor_table) + ((sizeof(unsigned long) * count) / BITS_PER_LONG)); ++ } + -+ /* Soft-stopped job - read the value of -+ * JS_TAIL so that the job chain can -+ * be resumed */ -+ job_tail = (u64)kbase_reg_read(kbdev, -+ JOB_SLOT_REG(i, JS_TAIL_LO), -+ NULL) | -+ ((u64)kbase_reg_read(kbdev, -+ JOB_SLOT_REG(i, JS_TAIL_HI), -+ NULL) << 32); -+ break; -+ case BASE_JD_EVENT_NOT_STARTED: -+ /* PRLAM-10673 can cause a TERMINATED -+ * job to come back as NOT_STARTED, but -+ * the error interrupt helps us detect -+ * it */ -+ completion_code = -+ BASE_JD_EVENT_TERMINATED; -+ /* fall through */ -+ default: -+ dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", -+ i, completion_code, -+ kbase_exception_name -+ (kbdev, -+ completion_code)); -+ } ++ return table; ++} + -+ kbase_gpu_irq_evict(kbdev, i); -+ } ++static void descriptor_table_free(ump_descriptor_table *table) ++{ ++ _mali_osk_free(table); ++} + -+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), -+ done & ((1 << i) | (1 << (i + 16))), -+ NULL); -+ active = kbase_reg_read(kbdev, -+ JOB_CONTROL_REG(JOB_IRQ_JS_STATE), -+ NULL); +diff --git a/drivers/gpu/arm/mali400/ump/common/ump_kernel_descriptor_mapping.h b/drivers/gpu/arm/mali400/ump/common/ump_kernel_descriptor_mapping.h +new file mode 100644 +index 000000000..a888ba833 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/common/ump_kernel_descriptor_mapping.h +@@ -0,0 +1,89 @@ ++/* ++ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ if (((active >> i) & 1) == 0 && -+ (((done >> (i + 16)) & 1) == 0)) { -+ /* There is a potential race we must work -+ * around: -+ * -+ * 1. A job slot has a job in both current and -+ * next registers -+ * 2. The job in current completes -+ * successfully, the IRQ handler reads -+ * RAWSTAT and calls this function with the -+ * relevant bit set in "done" -+ * 3. The job in the next registers becomes the -+ * current job on the GPU -+ * 4. Sometime before the JOB_IRQ_CLEAR line -+ * above the job on the GPU _fails_ -+ * 5. The IRQ_CLEAR clears the done bit but not -+ * the failed bit. This atomically sets -+ * JOB_IRQ_JS_STATE. However since both jobs -+ * have now completed the relevant bits for -+ * the slot are set to 0. -+ * -+ * If we now did nothing then we'd incorrectly -+ * assume that _both_ jobs had completed -+ * successfully (since we haven't yet observed -+ * the fail bit being set in RAWSTAT). -+ * -+ * So at this point if there are no active jobs -+ * left we check to see if RAWSTAT has a failure -+ * bit set for the job slot. If it does we know -+ * that there has been a new failure that we -+ * didn't previously know about, so we make sure -+ * that we record this in active (but we wait -+ * for the next loop to deal with it). -+ * -+ * If we were handling a job failure (i.e. done -+ * has the relevant high bit set) then we know -+ * that the value read back from -+ * JOB_IRQ_JS_STATE is the correct number of -+ * remaining jobs because the failed job will -+ * have prevented any futher jobs from starting -+ * execution. -+ */ -+ u32 rawstat = kbase_reg_read(kbdev, -+ JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL); ++/** ++ * @file ump_kernel_descriptor_mapping.h ++ */ + -+ if ((rawstat >> (i + 16)) & 1) { -+ /* There is a failed job that we've -+ * missed - add it back to active */ -+ active |= (1u << i); -+ } -+ } ++#ifndef __UMP_KERNEL_DESCRIPTOR_MAPPING_H__ ++#define __UMP_KERNEL_DESCRIPTOR_MAPPING_H__ + -+ dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n", -+ completion_code); ++#include "mali_osk.h" + -+ nr_done = kbase_backend_nr_atoms_submitted(kbdev, i); -+ nr_done -= (active >> i) & 1; -+ nr_done -= (active >> (i + 16)) & 1; ++/** ++ * The actual descriptor mapping table, never directly accessed by clients ++ */ ++typedef struct ump_descriptor_table { ++ u32 *usage; /**< Pointer to bitpattern indicating if a descriptor is valid/used or not */ ++ void **mappings; /**< Array of the pointers the descriptors map to */ ++} ump_descriptor_table; + -+ if (nr_done <= 0) { -+ dev_warn(kbdev->dev, "Spurious interrupt on slot %d", -+ i); ++/** ++ * The descriptor mapping object ++ * Provides a separate namespace where we can map an integer to a pointer ++ */ ++typedef struct ump_descriptor_mapping { ++ _mali_osk_mutex_rw_t *lock; /**< Lock protecting access to the mapping object */ ++ int max_nr_mappings_allowed; /**< Max number of mappings to support in this namespace */ ++ int current_nr_mappings; /**< Current number of possible mappings */ ++ ump_descriptor_table *table; /**< Pointer to the current mapping table */ ++} ump_descriptor_mapping; + -+ goto spurious; -+ } ++/** ++ * Create a descriptor mapping object ++ * Create a descriptor mapping capable of holding init_entries growable to max_entries ++ * @param init_entries Number of entries to preallocate memory for ++ * @param max_entries Number of entries to max support ++ * @return Pointer to a descriptor mapping object, NULL on failure ++ */ ++ump_descriptor_mapping *ump_descriptor_mapping_create(int init_entries, int max_entries); + -+ count += nr_done; ++/** ++ * Destroy a descriptor mapping object ++ * @param map The map to free ++ */ ++void ump_descriptor_mapping_destroy(ump_descriptor_mapping *map); + -+ while (nr_done) { -+ if (nr_done == 1) { -+ kbase_gpu_complete_hw(kbdev, i, -+ completion_code, -+ job_tail, -+ &end_timestamp); -+ kbase_jm_try_kick_all(kbdev); -+ } else { -+ /* More than one job has completed. -+ * Since this is not the last job being -+ * reported this time it must have -+ * passed. This is because the hardware -+ * will not allow further jobs in a job -+ * slot to complete until the failed job -+ * is cleared from the IRQ status. -+ */ -+ kbase_gpu_complete_hw(kbdev, i, -+ BASE_JD_EVENT_DONE, -+ 0, -+ &end_timestamp); -+ } -+ nr_done--; -+ } -+ spurious: -+ done = kbase_reg_read(kbdev, -+ JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL); ++/** ++ * Allocate a new mapping entry (descriptor ID) ++ * Allocates a new entry in the map. ++ * @param map The map to allocate a new entry in ++ * @param target The value to map to ++ * @return The descriptor allocated, a negative value on error ++ */ ++int ump_descriptor_mapping_allocate_mapping(ump_descriptor_mapping *map, void *target); + -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) { -+ /* Workaround for missing interrupt caused by -+ * PRLAM-10883 */ -+ if (((active >> i) & 1) && (0 == -+ kbase_reg_read(kbdev, -+ JOB_SLOT_REG(i, -+ JS_STATUS), NULL))) { -+ /* Force job slot to be processed again -+ */ -+ done |= (1u << i); -+ } -+ } ++/** ++ * Get the value mapped to by a descriptor ID ++ * @param map The map to lookup the descriptor id in ++ * @param descriptor The descriptor ID to lookup ++ * @param target Pointer to a pointer which will receive the stored value ++ * @return 0 on successful lookup, negative on error ++ */ ++int ump_descriptor_mapping_get(ump_descriptor_mapping *map, int descriptor, void **target); + -+ failed = done >> 16; -+ finished = (done & 0xFFFF) | failed; -+ if (done) -+ end_timestamp = ktime_get(); -+ } while (finished & (1 << i)); ++/** ++ * Set the value mapped to by a descriptor ID ++ * @param map The map to lookup the descriptor id in ++ * @param descriptor The descriptor ID to lookup ++ * @param target Pointer to replace the current value with ++ * @return 0 on successful lookup, negative on error ++ */ ++int ump_descriptor_mapping_set(ump_descriptor_mapping *map, int descriptor, void *target); + -+ kbasep_job_slot_update_head_start_timestamp(kbdev, i, -+ end_timestamp); -+ } ++/** ++ * Free the descriptor ID ++ * For the descriptor to be reused it has to be freed ++ * @param map The map to free the descriptor from ++ * @param descriptor The descriptor ID to free ++ */ ++void ump_descriptor_mapping_free(ump_descriptor_mapping *map, int descriptor); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+#if KBASE_GPU_RESET_EN -+ if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == -+ KBASE_RESET_GPU_COMMITTED) { -+ /* If we're trying to reset the GPU then we might be able to do -+ * it early (without waiting for a timeout) because some jobs -+ * have completed -+ */ -+ kbasep_try_reset_gpu_early(kbdev); -+ } -+#endif /* KBASE_GPU_RESET_EN */ -+ KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count); -+} -+KBASE_EXPORT_TEST_API(kbase_job_done); ++#endif /* __UMP_KERNEL_DESCRIPTOR_MAPPING_H__ */ +diff --git a/drivers/gpu/arm/mali400/ump/common/ump_kernel_memory_backend.h b/drivers/gpu/arm/mali400/ump/common/ump_kernel_memory_backend.h +new file mode 100644 +index 000000000..2b69f68e8 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/common/ump_kernel_memory_backend.h +@@ -0,0 +1,48 @@ ++/* ++ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom) -+{ -+ bool soft_stops_allowed = true; ++/** ++ * @file ump_kernel_memory_mapping.h ++ */ + -+ if (kbase_jd_katom_is_protected(katom)) { -+ soft_stops_allowed = false; -+ } else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) { -+ if ((katom->core_req & BASE_JD_REQ_T) != 0) -+ soft_stops_allowed = false; -+ } -+ return soft_stops_allowed; -+} ++#ifndef __UMP_KERNEL_MEMORY_BACKEND_H__ ++#define __UMP_KERNEL_MEMORY_BACKEND_H__ + -+static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev, -+ base_jd_core_req core_reqs) -+{ -+ bool hard_stops_allowed = true; ++#include "ump_kernel_interface.h" ++#include "ump_kernel_types.h" + -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) { -+ if ((core_reqs & BASE_JD_REQ_T) != 0) -+ hard_stops_allowed = false; -+ } -+ return hard_stops_allowed; -+} + -+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, -+ int js, -+ u32 action, -+ base_jd_core_req core_reqs, -+ struct kbase_jd_atom *target_katom) -+{ -+ struct kbase_context *kctx = target_katom->kctx; -+#if KBASE_TRACE_ENABLE -+ u32 status_reg_before; -+ u64 job_in_head_before; -+ u32 status_reg_after; ++typedef struct ump_memory_allocation { ++ void *phys_addr; ++ void *mapping; ++ unsigned long size; ++ ump_dd_handle handle; ++ void *process_mapping_info; ++ u32 cookie; /**< necessary on some U/K interface implementations */ ++ struct ump_session_data *ump_session; /**< Session that this allocation belongs to */ ++ _mali_osk_list_t list; /**< List for linking together memory allocations into the session's memory head */ ++ u32 is_cached; ++} ump_memory_allocation; + -+ KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK))); ++typedef struct ump_memory_backend { ++ int (*allocate)(void *ctx, ump_dd_mem *descriptor); ++ void (*release)(void *ctx, ump_dd_mem *descriptor); ++ void (*shutdown)(struct ump_memory_backend *backend); ++ u32(*stat)(struct ump_memory_backend *backend); ++ int (*pre_allocate_physical_check)(void *ctx, u32 size); ++ u32(*adjust_to_mali_phys)(void *ctx, u32 cpu_phys); ++ void *ctx; ++} ump_memory_backend; + -+ /* Check the head pointer */ -+ job_in_head_before = ((u64) kbase_reg_read(kbdev, -+ JOB_SLOT_REG(js, JS_HEAD_LO), NULL)) -+ | (((u64) kbase_reg_read(kbdev, -+ JOB_SLOT_REG(js, JS_HEAD_HI), NULL)) -+ << 32); -+ status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS), -+ NULL); -+#endif ++ump_memory_backend *ump_memory_backend_create(void); ++void ump_memory_backend_destroy(void); + -+ if (action == JS_COMMAND_SOFT_STOP) { -+ bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev, -+ target_katom); ++#endif /*__UMP_KERNEL_MEMORY_BACKEND_H__ */ + -+ if (!soft_stop_allowed) { -+#ifdef CONFIG_MALI_DEBUG -+ dev_dbg(kbdev->dev, -+ "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X", -+ (unsigned int)core_reqs); -+#endif /* CONFIG_MALI_DEBUG */ -+ return; -+ } +diff --git a/drivers/gpu/arm/mali400/ump/common/ump_kernel_ref_drv.c b/drivers/gpu/arm/mali400/ump/common/ump_kernel_ref_drv.c +new file mode 100644 +index 000000000..0b6434bee +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/common/ump_kernel_ref_drv.c +@@ -0,0 +1,181 @@ ++/* ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ /* We are about to issue a soft stop, so mark the atom as having -+ * been soft stopped */ -+ target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED; ++#include "mali_osk.h" ++#include "mali_osk_list.h" ++#include "ump_osk.h" ++#include "ump_uk_types.h" + -+ /* Mark the point where we issue the soft-stop command */ -+ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(target_katom); ++#include "ump_kernel_interface_ref_drv.h" ++#include "ump_kernel_common.h" ++#include "ump_kernel_descriptor_mapping.h" + -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) { -+ int i; ++#define UMP_MINIMUM_SIZE 4096 ++#define UMP_MINIMUM_SIZE_MASK (~(UMP_MINIMUM_SIZE-1)) ++#define UMP_SIZE_ALIGN(x) (((x)+UMP_MINIMUM_SIZE-1)&UMP_MINIMUM_SIZE_MASK) ++#define UMP_ADDR_ALIGN_OFFSET(x) ((x)&(UMP_MINIMUM_SIZE-1)) ++static void phys_blocks_release(void *ctx, struct ump_dd_mem *descriptor); + -+ for (i = 0; -+ i < kbase_backend_nr_atoms_submitted(kbdev, js); -+ i++) { -+ struct kbase_jd_atom *katom; ++UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block *blocks, unsigned long num_blocks) ++{ ++ ump_dd_mem *mem; ++ unsigned long size_total = 0; ++ int ret; ++ u32 i; + -+ katom = kbase_gpu_inspect(kbdev, js, i); ++ /* Go through the input blocks and verify that they are sane */ ++ for (i = 0; i < num_blocks; i++) { ++ unsigned long addr = blocks[i].addr; ++ unsigned long size = blocks[i].size; + -+ KBASE_DEBUG_ASSERT(katom); ++ DBG_MSG(5, ("Adding physical memory to new handle. Address: 0x%08lx, size: %lu\n", addr, size)); ++ size_total += blocks[i].size; + -+ /* For HW_ISSUE_8316, only 'bad' jobs attacking -+ * the system can cause this issue: normally, -+ * all memory should be allocated in multiples -+ * of 4 pages, and growable memory should be -+ * changed size in multiples of 4 pages. -+ * -+ * Whilst such 'bad' jobs can be cleared by a -+ * GPU reset, the locking up of a uTLB entry -+ * caused by the bad job could also stall other -+ * ASs, meaning that other ASs' jobs don't -+ * complete in the 'grace' period before the -+ * reset. We don't want to lose other ASs' jobs -+ * when they would normally complete fine, so we -+ * must 'poke' the MMU regularly to help other -+ * ASs complete */ -+ kbase_as_poking_timer_retain_atom( -+ kbdev, katom->kctx, katom); -+ } ++ if (0 != UMP_ADDR_ALIGN_OFFSET(addr)) { ++ MSG_ERR(("Trying to create UMP memory from unaligned physical address. Address: 0x%08lx\n", addr)); ++ return UMP_DD_HANDLE_INVALID; + } + -+ if (kbase_hw_has_feature( -+ kbdev, -+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { -+ action = (target_katom->atom_flags & -+ KBASE_KATOM_FLAGS_JOBCHAIN) ? -+ JS_COMMAND_SOFT_STOP_1 : -+ JS_COMMAND_SOFT_STOP_0; ++ if (0 != UMP_ADDR_ALIGN_OFFSET(size)) { ++ MSG_ERR(("Trying to create UMP memory with unaligned size. Size: %lu\n", size)); ++ return UMP_DD_HANDLE_INVALID; + } -+ } else if (action == JS_COMMAND_HARD_STOP) { -+ bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev, -+ core_reqs); ++ } + -+ if (!hard_stop_allowed) { -+ /* Jobs can be hard-stopped for the following reasons: -+ * * CFS decides the job has been running too long (and -+ * soft-stop has not occurred). In this case the GPU -+ * will be reset by CFS if the job remains on the -+ * GPU. -+ * -+ * * The context is destroyed, kbase_jd_zap_context -+ * will attempt to hard-stop the job. However it also -+ * has a watchdog which will cause the GPU to be -+ * reset if the job remains on the GPU. -+ * -+ * * An (unhandled) MMU fault occurred. As long as -+ * BASE_HW_ISSUE_8245 is defined then the GPU will be -+ * reset. -+ * -+ * All three cases result in the GPU being reset if the -+ * hard-stop fails, so it is safe to just return and -+ * ignore the hard-stop request. -+ */ -+ dev_warn(kbdev->dev, -+ "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X", -+ (unsigned int)core_reqs); -+ return; -+ } -+ target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED; ++ /* Allocate the ump_dd_mem struct for this allocation */ ++ mem = _mali_osk_malloc(sizeof(*mem)); ++ if (NULL == mem) { ++ DBG_MSG(1, ("Could not allocate ump_dd_mem in ump_dd_handle_create_from_phys_blocks()\n")); ++ return UMP_DD_HANDLE_INVALID; ++ } + -+ if (kbase_hw_has_feature( -+ kbdev, -+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { -+ action = (target_katom->atom_flags & -+ KBASE_KATOM_FLAGS_JOBCHAIN) ? -+ JS_COMMAND_HARD_STOP_1 : -+ JS_COMMAND_HARD_STOP_0; -+ } ++ /* Now, make a copy of the block information supplied by the user */ ++ mem->block_array = _mali_osk_malloc(sizeof(ump_dd_physical_block) * num_blocks); ++ if (NULL == mem->block_array) { ++ _mali_osk_free(mem); ++ DBG_MSG(1, ("Could not allocate a mem handle for function ump_dd_handle_create_from_phys_blocks().\n")); ++ return UMP_DD_HANDLE_INVALID; + } + -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx); ++ _mali_osk_memcpy(mem->block_array, blocks, sizeof(ump_dd_physical_block) * num_blocks); + -+#if KBASE_TRACE_ENABLE -+ status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS), -+ NULL); -+ if (status_reg_after == BASE_JD_EVENT_ACTIVE) { -+ struct kbase_jd_atom *head; -+ struct kbase_context *head_kctx; ++ /* And setup the rest of the ump_dd_mem struct */ ++ _mali_osk_atomic_init(&mem->ref_count, 1); ++ mem->size_bytes = size_total; ++ mem->nr_blocks = num_blocks; ++ mem->backend_info = NULL; ++ mem->ctx = NULL; ++ mem->release_func = phys_blocks_release; ++ /* For now UMP handles created by ump_dd_handle_create_from_phys_blocks() is forced to be Uncached */ ++ mem->is_cached = 0; ++ mem->hw_device = _UMP_UK_USED_BY_CPU; ++ mem->lock_usage = UMP_NOT_LOCKED; + -+ head = kbase_gpu_inspect(kbdev, js, 0); -+ head_kctx = head->kctx; ++ /* Find a secure ID for this allocation */ ++ ret = ump_random_mapping_insert(device.secure_id_map, mem); ++ if (unlikely(ret)) { ++ _mali_osk_free(mem->block_array); ++ _mali_osk_free(mem); ++ DBG_MSG(1, ("Failed to allocate secure ID in ump_dd_handle_create_from_phys_blocks()\n")); ++ return UMP_DD_HANDLE_INVALID; ++ } + -+ if (status_reg_before == BASE_JD_EVENT_ACTIVE) -+ KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, -+ head, job_in_head_before, js); -+ else -+ KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, -+ 0, js); ++ DBG_MSG(3, ("UMP memory created. ID: %u, size: %lu\n", mem->secure_id, mem->size_bytes)); + -+ switch (action) { -+ case JS_COMMAND_SOFT_STOP: -+ KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx, -+ head, head->jc, js); -+ break; -+ case JS_COMMAND_SOFT_STOP_0: -+ KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, -+ head, head->jc, js); -+ break; -+ case JS_COMMAND_SOFT_STOP_1: -+ KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, -+ head, head->jc, js); -+ break; -+ case JS_COMMAND_HARD_STOP: -+ KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx, -+ head, head->jc, js); -+ break; -+ case JS_COMMAND_HARD_STOP_0: -+ KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, -+ head, head->jc, js); -+ break; -+ case JS_COMMAND_HARD_STOP_1: -+ KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, -+ head, head->jc, js); -+ break; -+ default: -+ BUG(); -+ break; -+ } -+ } else { -+ if (status_reg_before == BASE_JD_EVENT_ACTIVE) -+ KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, -+ job_in_head_before, js); -+ else -+ KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, -+ 0, js); ++ return (ump_dd_handle)mem; ++} + -+ switch (action) { -+ case JS_COMMAND_SOFT_STOP: -+ KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0, -+ js); -+ break; -+ case JS_COMMAND_SOFT_STOP_0: -+ KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL, -+ 0, js); -+ break; -+ case JS_COMMAND_SOFT_STOP_1: -+ KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL, -+ 0, js); -+ break; -+ case JS_COMMAND_HARD_STOP: -+ KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0, -+ js); -+ break; -+ case JS_COMMAND_HARD_STOP_0: -+ KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL, -+ 0, js); -+ break; -+ case JS_COMMAND_HARD_STOP_1: -+ KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, -+ 0, js); -+ break; -+ default: -+ BUG(); -+ break; -+ } -+ } -+#endif ++static void phys_blocks_release(void *ctx, struct ump_dd_mem *descriptor) ++{ ++ _mali_osk_free(descriptor->block_array); ++ descriptor->block_array = NULL; +} + -+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx) ++_mali_osk_errcode_t _ump_ukk_allocate(_ump_uk_allocate_s *user_interaction) +{ -+ unsigned long flags; -+ struct kbase_device *kbdev; -+ struct kbasep_js_device_data *js_devdata; -+ int i; ++ ump_session_data *session_data = NULL; ++ ump_dd_mem *new_allocation = NULL; ++ ump_session_memory_list_element *session_memory_element = NULL; ++ int ret; + -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ kbdev = kctx->kbdev; -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ js_devdata = &kbdev->js_data; ++ DEBUG_ASSERT_POINTER(user_interaction); ++ DEBUG_ASSERT_POINTER(user_interaction->ctx); + -+ /* Cancel any remaining running jobs for this kctx */ -+ mutex_lock(&kctx->jctx.lock); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ session_data = (ump_session_data *) user_interaction->ctx; + -+ /* Invalidate all jobs in context, to prevent re-submitting */ -+ for (i = 0; i < BASE_JD_ATOM_COUNT; i++) { -+ if (!work_pending(&kctx->jctx.atoms[i].work)) -+ kctx->jctx.atoms[i].event_code = -+ BASE_JD_EVENT_JOB_CANCELLED; ++ session_memory_element = _mali_osk_calloc(1, sizeof(ump_session_memory_list_element)); ++ if (NULL == session_memory_element) { ++ DBG_MSG(1, ("Failed to allocate ump_session_memory_list_element in ump_ioctl_allocate()\n")); ++ return _MALI_OSK_ERR_NOMEM; + } + -+ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) -+ kbase_job_slot_hardstop(kctx, i, NULL); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kctx->jctx.lock); -+} ++ new_allocation = _mali_osk_calloc(1, sizeof(ump_dd_mem)); ++ if (NULL == new_allocation) { ++ _mali_osk_free(session_memory_element); ++ DBG_MSG(1, ("Failed to allocate ump_dd_mem in _ump_ukk_allocate()\n")); ++ return _MALI_OSK_ERR_NOMEM; ++ } + -+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, -+ struct kbase_jd_atom *target_katom) -+{ -+ struct kbase_device *kbdev; -+ int js = target_katom->slot_nr; -+ int priority = target_katom->sched_priority; -+ int i; -+ bool stop_sent = false; ++ /* Initialize the part of the new_allocation that we know so for */ ++ _mali_osk_atomic_init(&new_allocation->ref_count, 1); ++ if (0 == (UMP_REF_DRV_UK_CONSTRAINT_USE_CACHE & user_interaction->constraints)) ++ new_allocation->is_cached = 0; ++ else new_allocation->is_cached = 1; + -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ kbdev = kctx->kbdev; -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ /* Special case a size of 0, we should try to emulate what malloc does ++ * in this case, which is to return a valid pointer that must be freed, ++ * but can't be dereferenced */ ++ if (0 == user_interaction->size) { ++ /* Emulate by actually allocating the minimum block size */ ++ user_interaction->size = 1; ++ } + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ /* Page align the size */ ++ new_allocation->size_bytes = UMP_SIZE_ALIGN(user_interaction->size); ++ new_allocation->lock_usage = UMP_NOT_LOCKED; + -+ for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) { -+ struct kbase_jd_atom *katom; ++ /* Now, ask the active memory backend to do the actual memory allocation */ ++ if (!device.backend->allocate(device.backend->ctx, new_allocation)) { ++ DBG_MSG(3, ("OOM: No more UMP memory left. Failed to allocate memory in ump_ioctl_allocate(). Size: %lu, requested size: %lu\n", ++ new_allocation->size_bytes, ++ (unsigned long)user_interaction->size)); ++ _mali_osk_free(new_allocation); ++ _mali_osk_free(session_memory_element); ++ return _MALI_OSK_ERR_INVALID_FUNC; ++ } ++ new_allocation->hw_device = _UMP_UK_USED_BY_CPU; ++ new_allocation->ctx = device.backend->ctx; ++ new_allocation->release_func = device.backend->release; + -+ katom = kbase_gpu_inspect(kbdev, js, i); -+ if (!katom) -+ continue; ++ /* Initialize the session_memory_element, and add it to the session object */ ++ session_memory_element->mem = new_allocation; ++ _mali_osk_mutex_wait(session_data->lock); ++ _mali_osk_list_add(&(session_memory_element->list), &(session_data->list_head_session_memory_list)); ++ _mali_osk_mutex_signal(session_data->lock); + -+ if (katom->kctx != kctx) -+ continue; ++ /* Create a secure ID for this allocation */ ++ ret = ump_random_mapping_insert(device.secure_id_map, new_allocation); ++ if (unlikely(ret)) { ++ new_allocation->release_func(new_allocation->ctx, new_allocation); ++ _mali_osk_free(session_memory_element); ++ _mali_osk_free(new_allocation); ++ DBG_MSG(1, ("Failed to allocate secure ID in ump_ioctl_allocate()\n")); ++ return _MALI_OSK_ERR_INVALID_FUNC; ++ } + -+ if (katom->sched_priority > priority) { -+ if (!stop_sent) -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE( -+ target_katom); ++ user_interaction->secure_id = new_allocation->secure_id; ++ user_interaction->size = new_allocation->size_bytes; ++ DBG_MSG(3, ("UMP memory allocated. ID: %u, size: %lu\n", ++ new_allocation->secure_id, ++ new_allocation->size_bytes)); + -+ kbase_job_slot_softstop(kbdev, js, katom); -+ stop_sent = true; -+ } -+ } ++ return _MALI_OSK_ERR_OK; +} +diff --git a/drivers/gpu/arm/mali400/ump/common/ump_kernel_types.h b/drivers/gpu/arm/mali400/ump/common/ump_kernel_types.h +new file mode 100644 +index 000000000..32f32ccbe +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/common/ump_kernel_types.h +@@ -0,0 +1,58 @@ ++/* ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+struct zap_reset_data { -+ /* The stages are: -+ * 1. The timer has never been called -+ * 2. The zap has timed out, all slots are soft-stopped - the GPU reset -+ * will happen. The GPU has been reset when -+ * kbdev->hwaccess.backend.reset_waitq is signalled -+ * -+ * (-1 - The timer has been cancelled) -+ */ -+ int stage; -+ struct kbase_device *kbdev; -+ struct hrtimer timer; -+ spinlock_t lock; /* protects updates to stage member */ -+}; -+ -+static enum hrtimer_restart zap_timeout_callback(struct hrtimer *timer) -+{ -+ struct zap_reset_data *reset_data = container_of(timer, -+ struct zap_reset_data, timer); -+ struct kbase_device *kbdev = reset_data->kbdev; -+ unsigned long flags; ++#ifndef __UMP_KERNEL_TYPES_H__ ++#define __UMP_KERNEL_TYPES_H__ + -+ spin_lock_irqsave(&reset_data->lock, flags); ++#include "ump_kernel_interface.h" ++#include "mali_osk.h" + -+ if (reset_data->stage == -1) -+ goto out; ++#include ++#ifdef CONFIG_DMA_SHARED_BUFFER ++#include ++#endif + -+#if KBASE_GPU_RESET_EN -+ if (kbase_prepare_to_reset_gpu(kbdev)) { -+ dev_err(kbdev->dev, "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", -+ ZAP_TIMEOUT); -+ kbase_reset_gpu(kbdev); -+ } -+#endif /* KBASE_GPU_RESET_EN */ -+ reset_data->stage = 2; ++typedef enum { ++ UMP_USED_BY_CPU = 0, ++ UMP_USED_BY_MALI = 1, ++ UMP_USED_BY_UNKNOWN_DEVICE = 100, ++} ump_hw_usage; + -+ out: -+ spin_unlock_irqrestore(&reset_data->lock, flags); ++typedef enum { ++ UMP_NOT_LOCKED = 0, ++ UMP_READ = 1, ++ UMP_READ_WRITE = 3, ++} ump_lock_usage; + -+ return HRTIMER_NORESTART; -+} ++/* ++ * This struct is what is "behind" a ump_dd_handle ++ */ ++typedef struct ump_dd_mem { ++ struct rb_node node; ++ ump_secure_id secure_id; ++ _mali_osk_atomic_t ref_count; ++ unsigned long size_bytes; ++ unsigned long nr_blocks; ++ ump_dd_physical_block *block_array; ++ void (*release_func)(void *ctx, struct ump_dd_mem *descriptor); ++ void *ctx; ++ void *backend_info; ++ int is_cached; ++ ump_hw_usage hw_device; ++ ump_lock_usage lock_usage; ++#ifdef CONFIG_DMA_SHARED_BUFFER ++ struct dma_buf_attachment *import_attach; ++ struct sg_table *sgt; ++#endif ++} ump_dd_mem; + -+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) -+{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct zap_reset_data reset_data; -+ unsigned long flags; + -+ hrtimer_init_on_stack(&reset_data.timer, CLOCK_MONOTONIC, -+ HRTIMER_MODE_REL); -+ reset_data.timer.function = zap_timeout_callback; + -+ spin_lock_init(&reset_data.lock); ++#endif /* __UMP_KERNEL_TYPES_H__ */ +diff --git a/drivers/gpu/arm/mali400/ump/common/ump_osk.h b/drivers/gpu/arm/mali400/ump/common/ump_osk.h +new file mode 100644 +index 000000000..9adc4d3df +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/common/ump_osk.h +@@ -0,0 +1,48 @@ ++/* ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ reset_data.kbdev = kbdev; -+ reset_data.stage = 1; ++/** ++ * @file ump_osk.h ++ * Defines the OS abstraction layer for the UMP kernel device driver (OSK) ++ */ + -+ hrtimer_start(&reset_data.timer, HR_TIMER_DELAY_MSEC(ZAP_TIMEOUT), -+ HRTIMER_MODE_REL); ++#ifndef __UMP_OSK_H__ ++#define __UMP_OSK_H__ + -+ /* Wait for all jobs to finish, and for the context to be not-scheduled -+ * (due to kbase_job_zap_context(), we also guarentee it's not in the JS -+ * policy queue either */ -+ wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0); -+ wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, -+ !kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++#include ++#include ++#include "ump_uk_types.h" ++#include "ump_kernel_common.h" + -+ spin_lock_irqsave(&reset_data.lock, flags); -+ if (reset_data.stage == 1) { -+ /* The timer hasn't run yet - so cancel it */ -+ reset_data.stage = -1; -+ } -+ spin_unlock_irqrestore(&reset_data.lock, flags); ++#ifdef __cplusplus ++extern "C" { ++#endif + -+ hrtimer_cancel(&reset_data.timer); ++_mali_osk_errcode_t _ump_osk_init(void); + -+ if (reset_data.stage == 2) { -+ /* The reset has already started. -+ * Wait for the reset to complete -+ */ -+ wait_event(kbdev->hwaccess.backend.reset_wait, -+ atomic_read(&kbdev->hwaccess.backend.reset_gpu) -+ == KBASE_RESET_GPU_NOT_PENDING); -+ } -+ destroy_hrtimer_on_stack(&reset_data.timer); ++_mali_osk_errcode_t _ump_osk_term(void); + -+ dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx); ++int _ump_osk_atomic_inc_and_read(_mali_osk_atomic_t *atom); + -+ /* Ensure that the signallers of the waitqs have finished */ -+ mutex_lock(&kctx->jctx.lock); -+ mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); -+ mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); -+ mutex_unlock(&kctx->jctx.lock); -+} ++int _ump_osk_atomic_dec_and_read(_mali_osk_atomic_t *atom); + -+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev) -+{ -+ u32 flush_id = 0; ++_mali_osk_errcode_t _ump_osk_mem_mapregion_init(ump_memory_allocation *descriptor); + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) { -+ mutex_lock(&kbdev->pm.lock); -+ if (kbdev->pm.backend.gpu_powered) -+ flush_id = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(LATEST_FLUSH), NULL); -+ mutex_unlock(&kbdev->pm.lock); -+ } ++_mali_osk_errcode_t _ump_osk_mem_mapregion_map(ump_memory_allocation *descriptor, u32 offset, u32 *phys_addr, unsigned long size); + -+ return flush_id; -+} ++void _ump_osk_mem_mapregion_term(ump_memory_allocation *descriptor); + -+int kbase_job_slot_init(struct kbase_device *kbdev) -+{ -+#if KBASE_GPU_RESET_EN -+ kbdev->hwaccess.backend.reset_workq = alloc_workqueue( -+ "Mali reset workqueue", 0, 1); -+ if (NULL == kbdev->hwaccess.backend.reset_workq) -+ return -EINVAL; ++void _ump_osk_msync(ump_dd_mem *mem, void *virt, u32 offset, u32 size, ump_uk_msync_op op, ump_session_data *session_data); + -+ KBASE_DEBUG_ASSERT(0 == -+ object_is_on_stack(&kbdev->hwaccess.backend.reset_work)); -+ INIT_WORK(&kbdev->hwaccess.backend.reset_work, -+ kbasep_reset_timeout_worker); ++#ifdef __cplusplus ++} ++#endif + -+ hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC, -+ HRTIMER_MODE_REL); -+ kbdev->hwaccess.backend.reset_timer.function = -+ kbasep_reset_timer_callback; +#endif +diff --git a/drivers/gpu/arm/mali400/ump/common/ump_uk_types.h b/drivers/gpu/arm/mali400/ump/common/ump_uk_types.h +new file mode 100644 +index 000000000..db842cdcb +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/common/ump_uk_types.h +@@ -0,0 +1,202 @@ ++/* ++ * Copyright (C) 2010, 2012-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ return 0; -+} -+KBASE_EXPORT_TEST_API(kbase_job_slot_init); ++/** ++ * @file ump_uk_types.h ++ * Defines the types and constants used in the user-kernel interface ++ */ + -+void kbase_job_slot_halt(struct kbase_device *kbdev) -+{ -+ CSTD_UNUSED(kbdev); -+} ++#ifndef __UMP_UK_TYPES_H__ ++#define __UMP_UK_TYPES_H__ + -+void kbase_job_slot_term(struct kbase_device *kbdev) -+{ -+#if KBASE_GPU_RESET_EN -+ destroy_workqueue(kbdev->hwaccess.backend.reset_workq); ++#ifdef __cplusplus ++extern "C" { +#endif -+} -+KBASE_EXPORT_TEST_API(kbase_job_slot_term); + -+#if KBASE_GPU_RESET_EN ++/* Helpers for API version handling */ ++#define MAKE_VERSION_ID(x) (((x) << 16UL) | (x)) ++#define IS_VERSION_ID(x) (((x) & 0xFFFF) == (((x) >> 16UL) & 0xFFFF)) ++#define GET_VERSION(x) (((x) >> 16UL) & 0xFFFF) ++#define IS_API_MATCH(x, y) (IS_VERSION_ID((x)) && IS_VERSION_ID((y)) && (GET_VERSION((x)) == GET_VERSION((y)))) ++ +/** -+ * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot -+ * @kbdev: kbase device pointer -+ * @kctx: context to check against -+ * @js: slot to check -+ * @target_katom: An atom to check, or NULL if all atoms from @kctx on -+ * slot @js should be checked -+ * -+ * This checks are based upon parameters that would normally be passed to -+ * kbase_job_slot_hardstop(). -+ * -+ * In the event of @target_katom being NULL, this will check the last jobs that -+ * are likely to be running on the slot to see if a) they belong to kctx, and -+ * so would be stopped, and b) whether they have AFBC -+ * -+ * In that case, It's guaranteed that a job currently executing on the HW with -+ * AFBC will be detected. However, this is a conservative check because it also -+ * detects jobs that have just completed too. -+ * -+ * Return: true when hard-stop _might_ stop an afbc atom, else false. ++ * API version define. ++ * Indicates the version of the kernel API ++ * The version is a 16bit integer incremented on each API change. ++ * The 16bit integer is stored twice in a 32bit integer ++ * So for version 1 the value would be 0x00010001 + */ -+static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev, -+ struct kbase_context *kctx, int js, -+ struct kbase_jd_atom *target_katom) -+{ -+ bool ret = false; -+ int i; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++#define UMP_IOCTL_API_VERSION MAKE_VERSION_ID(3) + -+ /* When we have an atom the decision can be made straight away. */ -+ if (target_katom) -+ return !!(target_katom->core_req & BASE_JD_REQ_FS_AFBC); ++typedef enum ++{ ++ _UMP_IOC_QUERY_API_VERSION = 1, ++ _UMP_IOC_ALLOCATE, ++ _UMP_IOC_RELEASE, ++ _UMP_IOC_SIZE_GET, ++ _UMP_IOC_MAP_MEM, /* not used in Linux */ ++ _UMP_IOC_UNMAP_MEM, /* not used in Linux */ ++ _UMP_IOC_MSYNC, ++ _UMP_IOC_CACHE_OPERATIONS_CONTROL, ++ _UMP_IOC_SWITCH_HW_USAGE, ++ _UMP_IOC_LOCK, ++ _UMP_IOC_UNLOCK, ++ _UMP_IOC_DMABUF_IMPORT, ++} _ump_uk_functions; + -+ /* Otherwise, we must chweck the hardware to see if it has atoms from -+ * this context with AFBC. */ -+ for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) { -+ struct kbase_jd_atom *katom; ++typedef enum ++{ ++ UMP_REF_DRV_UK_CONSTRAINT_NONE = 0, ++ UMP_REF_DRV_UK_CONSTRAINT_PHYSICALLY_LINEAR = 1, ++ UMP_REF_DRV_UK_CONSTRAINT_USE_CACHE = 4, ++} ump_uk_alloc_constraints; + -+ katom = kbase_gpu_inspect(kbdev, js, i); -+ if (!katom) -+ continue; ++typedef enum ++{ ++ _UMP_UK_MSYNC_CLEAN = 0, ++ _UMP_UK_MSYNC_CLEAN_AND_INVALIDATE = 1, ++ _UMP_UK_MSYNC_INVALIDATE = 2, ++ _UMP_UK_MSYNC_FLUSH_L1 = 3, ++ _UMP_UK_MSYNC_READOUT_CACHE_ENABLED = 128, ++} ump_uk_msync_op; + -+ /* Ignore atoms from other contexts, they won't be stopped when -+ * we use this for checking if we should hard-stop them */ -+ if (katom->kctx != kctx) -+ continue; ++typedef enum ++{ ++ _UMP_UK_CACHE_OP_START = 0, ++ _UMP_UK_CACHE_OP_FINISH = 1, ++} ump_uk_cache_op_control; + -+ /* An atom on this slot and this context: check for AFBC */ -+ if (katom->core_req & BASE_JD_REQ_FS_AFBC) { -+ ret = true; -+ break; -+ } -+ } ++typedef enum ++{ ++ _UMP_UK_READ = 1, ++ _UMP_UK_READ_WRITE = 3, ++} ump_uk_lock_usage; + -+ return ret; -+} -+#endif /* KBASE_GPU_RESET_EN */ ++typedef enum ++{ ++ _UMP_UK_USED_BY_CPU = 0, ++ _UMP_UK_USED_BY_MALI = 1, ++ _UMP_UK_USED_BY_UNKNOWN_DEVICE = 100, ++} ump_uk_user; + +/** -+ * kbase_job_slot_softstop_swflags - Soft-stop a job with flags -+ * @kbdev: The kbase device -+ * @js: The job slot to soft-stop -+ * @target_katom: The job that should be soft-stopped (or NULL for any job) -+ * @sw_flags: Flags to pass in about the soft-stop -+ * -+ * Context: -+ * The job slot lock must be held when calling this function. -+ * The job slot must not already be in the process of being soft-stopped. -+ * -+ * Soft-stop the specified job slot, with extra information about the stop -+ * -+ * Where possible any job in the next register is evicted before the soft-stop. ++ * Get API version ([in,out] u32 api_version, [out] u32 compatible) + */ -+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, -+ struct kbase_jd_atom *target_katom, u32 sw_flags) ++typedef struct _ump_uk_api_version_s +{ -+ KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK)); -+ kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom, -+ JS_COMMAND_SOFT_STOP | sw_flags); -+} ++ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 version; /**< Set to the user space version on entry, stores the device driver version on exit */ ++ u32 compatible; /**< Non-null if the device is compatible with the client */ ++} _ump_uk_api_version_s; + +/** -+ * kbase_job_slot_softstop - Soft-stop the specified job slot -+ * @kbdev: The kbase device -+ * @js: The job slot to soft-stop -+ * @target_katom: The job that should be soft-stopped (or NULL for any job) -+ * Context: -+ * The job slot lock must be held when calling this function. -+ * The job slot must not already be in the process of being soft-stopped. -+ * -+ * Where possible any job in the next register is evicted before the soft-stop. ++ * ALLOCATE ([out] u32 secure_id, [in,out] u32 size, [in] contraints) + */ -+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, -+ struct kbase_jd_atom *target_katom) ++typedef struct _ump_uk_allocate_s +{ -+ kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u); -+} ++ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 secure_id; /**< Return value from DD to Userdriver */ ++ u32 size; /**< Input and output. Requested size; input. Returned size; output */ ++ ump_uk_alloc_constraints constraints; /**< Only input to Devicedriver */ ++} _ump_uk_allocate_s; + +/** -+ * kbase_job_slot_hardstop - Hard-stop the specified job slot -+ * @kctx: The kbase context that contains the job(s) that should -+ * be hard-stopped -+ * @js: The job slot to hard-stop -+ * @target_katom: The job that should be hard-stopped (or NULL for all -+ * jobs from the context) -+ * Context: -+ * The job slot lock must be held when calling this function. ++ * SIZE_GET ([in] u32 secure_id, [out]size ) + */ -+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, -+ struct kbase_jd_atom *target_katom) ++typedef struct _ump_uk_size_get_s +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ bool stopped; -+#if KBASE_GPU_RESET_EN -+ /* We make the check for AFBC before evicting/stopping atoms. Note -+ * that no other thread can modify the slots whilst we have the -+ * hwaccess_lock. */ -+ int needs_workaround_for_afbc = -+ kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542) -+ && kbasep_check_for_afbc_on_slot(kbdev, kctx, js, -+ target_katom); -+#endif -+ -+ stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js, -+ target_katom, -+ JS_COMMAND_HARD_STOP); -+#if KBASE_GPU_RESET_EN -+ if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) || -+ kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) || -+ needs_workaround_for_afbc)) { -+ /* MIDBASE-2916 if a fragment job with AFBC encoding is -+ * hardstopped, ensure to do a soft reset also in order to -+ * clear the GPU status. -+ * Workaround for HW issue 8401 has an issue,so after -+ * hard-stopping just reset the GPU. This will ensure that the -+ * jobs leave the GPU.*/ -+ if (kbase_prepare_to_reset_gpu_locked(kbdev)) { -+ dev_err(kbdev->dev, "Issueing GPU soft-reset after hard stopping due to hardware issue"); -+ kbase_reset_gpu_locked(kbdev); -+ } -+ } -+#endif -+} ++ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 secure_id; /**< Input to DD */ ++ u32 size; /**< Returned size; output */ ++} _ump_uk_size_get_s; + +/** -+ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode -+ * @kbdev: kbase device -+ * @action: the event which has occurred -+ * @core_reqs: core requirements of the atom -+ * @target_katom: the atom which is being affected -+ * -+ * For a certain soft/hard-stop action, work out whether to enter disjoint -+ * state. -+ * -+ * This does not register multiple disjoint events if the atom has already -+ * started a disjoint period -+ * -+ * @core_reqs can be supplied as 0 if the atom had not started on the hardware -+ * (and so a 'real' soft/hard-stop was not required, but it still interrupted -+ * flow, perhaps on another context) -+ * -+ * kbase_job_check_leave_disjoint() should be used to end the disjoint -+ * state when the soft/hard-stop action is complete ++ * Release ([in] u32 secure_id) + */ -+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, -+ base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom) ++typedef struct _ump_uk_release_s +{ -+ u32 hw_action = action & JS_COMMAND_MASK; -+ -+ /* For hard-stop, don't enter if hard-stop not allowed */ -+ if (hw_action == JS_COMMAND_HARD_STOP && -+ !kbasep_hard_stop_allowed(kbdev, core_reqs)) -+ return; ++ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 secure_id; /**< Input to DD */ ++} _ump_uk_release_s; + -+ /* For soft-stop, don't enter if soft-stop not allowed, or isn't -+ * causing disjoint */ -+ if (hw_action == JS_COMMAND_SOFT_STOP && -+ !(kbasep_soft_stop_allowed(kbdev, target_katom) && -+ (action & JS_COMMAND_SW_CAUSES_DISJOINT))) -+ return; ++typedef struct _ump_uk_map_mem_s ++{ ++ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ void *mapping; /**< [out] Returns user-space virtual address for the mapping */ ++ void *phys_addr; /**< [in] physical address */ ++ unsigned long size; /**< [in] size */ ++ u32 secure_id; /**< [in] secure_id to assign to mapping */ ++ void *_ukk_private; /**< Only used inside linux port between kernel frontend and common part to store vma */ ++ u32 cookie; ++ u32 is_cached; /**< [in,out] caching of CPU mappings */ ++} _ump_uk_map_mem_s; + -+ /* Nothing to do if already logged disjoint state on this atom */ -+ if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) -+ return; ++typedef struct _ump_uk_unmap_mem_s ++{ ++ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ void *mapping; ++ u32 size; ++ void *_ukk_private; ++ u32 cookie; ++} _ump_uk_unmap_mem_s; + -+ target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT; -+ kbase_disjoint_state_up(kbdev); -+} ++typedef struct _ump_uk_msync_s ++{ ++ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ void *mapping; /**< [in] mapping addr */ ++ void *address; /**< [in] flush start addr */ ++ u32 size; /**< [in] size to flush */ ++ ump_uk_msync_op op; /**< [in] flush operation */ ++ u32 cookie; /**< [in] cookie stored with reference to the kernel mapping internals */ ++ u32 secure_id; /**< [in] secure_id that identifies the ump buffer */ ++ u32 is_cached; /**< [out] caching of CPU mappings */ ++} _ump_uk_msync_s; + -+/** -+ * kbase_job_check_enter_disjoint - potentially leave disjoint state -+ * @kbdev: kbase device -+ * @target_katom: atom which is finishing -+ * -+ * Work out whether to leave disjoint state when finishing an atom that was -+ * originated by kbase_job_check_enter_disjoint(). -+ */ -+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, -+ struct kbase_jd_atom *target_katom) ++typedef struct _ump_uk_cache_operations_control_s +{ -+ if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) { -+ target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT; -+ kbase_disjoint_state_down(kbdev); -+ } -+} ++ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ ump_uk_cache_op_control op; /**< [in] cache operations start/stop */ ++} _ump_uk_cache_operations_control_s; + + -+#if KBASE_GPU_RESET_EN -+static void kbase_debug_dump_registers(struct kbase_device *kbdev) ++typedef struct _ump_uk_switch_hw_usage_s +{ -+ int i; -+ -+ kbase_io_history_dump(kbdev); ++ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 secure_id; /**< [in] secure_id that identifies the ump buffer */ ++ ump_uk_user new_user; /**< [in] cookie stored with reference to the kernel mapping internals */ + -+ dev_err(kbdev->dev, "Register state:"); -+ dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL)); -+ dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x", -+ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL), -+ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL)); -+ for (i = 0; i < 3; i++) { -+ dev_err(kbdev->dev, " JS%d_STATUS=0x%08x JS%d_HEAD_LO=0x%08x", -+ i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS), -+ NULL), -+ i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO), -+ NULL)); -+ } -+ dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", -+ kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL)); -+ dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL), -+ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL), -+ kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL)); -+ dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL)); -+ dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL)); -+ dev_err(kbdev->dev, " TILER_CONFIG=0x%08x JM_CONFIG=0x%08x", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG), NULL), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG), NULL)); -+} ++} _ump_uk_switch_hw_usage_s; + -+static void kbasep_reset_timeout_worker(struct work_struct *data) ++typedef struct _ump_uk_lock_s +{ -+ unsigned long flags; -+ struct kbase_device *kbdev; -+ ktime_t end_timestamp = ktime_get(); -+ struct kbasep_js_device_data *js_devdata; -+ bool try_schedule = false; -+ bool silent = false; -+ u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; ++ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 secure_id; /**< [in] secure_id that identifies the ump buffer */ ++ ump_uk_lock_usage lock_usage; ++} _ump_uk_lock_s; + -+ KBASE_DEBUG_ASSERT(data); ++typedef struct _ump_uk_unlock_s ++{ ++ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ u32 secure_id; /**< [in] secure_id that identifies the ump buffer */ ++} _ump_uk_unlock_s; + -+ kbdev = container_of(data, struct kbase_device, -+ hwaccess.backend.reset_work); ++typedef struct _ump_uk_dmabuf_s ++{ ++ void *ctx; /**< [in,out] user-kernel context (trashed on output) */ ++ int fd; /**< [in] dmabuf_fd that identifies the dmabuf buffer */ ++ size_t size; /**< [in] size of the buffer */ ++ u32 secure_id; /**< [out] secure_id that identifies the ump buffer */ ++} _ump_uk_dmabuf_s; + -+ KBASE_DEBUG_ASSERT(kbdev); -+ js_devdata = &kbdev->js_data; ++#ifdef __cplusplus ++} ++#endif + -+ if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == -+ KBASE_RESET_GPU_SILENT) -+ silent = true; ++#endif /* __UMP_UK_TYPES_H__ */ +diff --git a/drivers/gpu/arm/mali400/ump/common/ump_ukk.h b/drivers/gpu/arm/mali400/ump/common/ump_ukk.h +new file mode 100644 +index 000000000..f2906768c +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/common/ump_ukk.h +@@ -0,0 +1,60 @@ ++/* ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0); ++/** ++ * @file ump_ukk.h ++ * Defines the kernel-side interface of the user-kernel interface ++ */ + -+ /* Suspend vinstr. -+ * This call will block until vinstr is suspended. */ -+ kbase_vinstr_suspend(kbdev->vinstr_ctx); ++#ifndef __UMP_UKK_H__ ++#define __UMP_UKK_H__ + -+ /* Make sure the timer has completed - this cannot be done from -+ * interrupt context, so this cannot be done within -+ * kbasep_try_reset_gpu_early. */ -+ hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); ++#include "mali_osk.h" ++#include "ump_uk_types.h" + -+ if (kbase_pm_context_active_handle_suspend(kbdev, -+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { -+ /* This would re-activate the GPU. Since it's already idle, -+ * there's no need to reset it */ -+ atomic_set(&kbdev->hwaccess.backend.reset_gpu, -+ KBASE_RESET_GPU_NOT_PENDING); -+ kbase_disjoint_state_down(kbdev); -+ wake_up(&kbdev->hwaccess.backend.reset_wait); -+ kbase_vinstr_resume(kbdev->vinstr_ctx); -+ return; -+ } + -+ KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false); ++#ifdef __cplusplus ++extern "C" { ++#endif + -+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); -+ spin_lock(&kbdev->hwaccess_lock); -+ spin_lock(&kbdev->mmu_mask_change); -+ /* We're about to flush out the IRQs and their bottom half's */ -+ kbdev->irq_reset_flush = true; + -+ /* Disable IRQ to avoid IRQ handlers to kick in after releasing the -+ * spinlock; this also clears any outstanding interrupts */ -+ kbase_pm_disable_interrupts_nolock(kbdev); ++_mali_osk_errcode_t _ump_ukk_open(void **context); + -+ spin_unlock(&kbdev->mmu_mask_change); -+ spin_unlock(&kbdev->hwaccess_lock); -+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++_mali_osk_errcode_t _ump_ukk_close(void **context); + -+ /* Ensure that any IRQ handlers have finished -+ * Must be done without any locks IRQ handlers will take */ -+ kbase_synchronize_irqs(kbdev); ++_mali_osk_errcode_t _ump_ukk_allocate(_ump_uk_allocate_s *user_interaction); + -+ /* Flush out any in-flight work items */ -+ kbase_flush_mmu_wqs(kbdev); ++_mali_osk_errcode_t _ump_ukk_release(_ump_uk_release_s *release_info); + -+ /* The flush has completed so reset the active indicator */ -+ kbdev->irq_reset_flush = false; ++_mali_osk_errcode_t _ump_ukk_size_get(_ump_uk_size_get_s *user_interaction); + -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) { -+ /* Ensure that L2 is not transitioning when we send the reset -+ * command */ -+ while (--max_loops && kbase_pm_get_trans_cores(kbdev, -+ KBASE_PM_CORE_L2)) -+ ; ++_mali_osk_errcode_t _ump_ukk_map_mem(_ump_uk_map_mem_s *args); + -+ WARN(!max_loops, "L2 power transition timed out while trying to reset\n"); -+ } ++_mali_osk_errcode_t _ump_uku_get_api_version(_ump_uk_api_version_s *args); + -+ mutex_lock(&kbdev->pm.lock); -+ /* We hold the pm lock, so there ought to be a current policy */ -+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy); ++void _ump_ukk_unmap_mem(_ump_uk_unmap_mem_s *args); + -+ /* All slot have been soft-stopped and we've waited -+ * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we -+ * assume that anything that is still left on the GPU is stuck there and -+ * we'll kill it when we reset the GPU */ ++void _ump_ukk_msync(_ump_uk_msync_s *args); + -+ if (!silent) -+ dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", -+ RESET_TIMEOUT); ++void _ump_ukk_cache_operations_control(_ump_uk_cache_operations_control_s *args); + -+ /* Output the state of some interesting registers to help in the -+ * debugging of GPU resets */ -+ if (!silent) -+ kbase_debug_dump_registers(kbdev); ++void _ump_ukk_switch_hw_usage(_ump_uk_switch_hw_usage_s *args); + -+ /* Complete any jobs that were still on the GPU */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->protected_mode = false; -+ kbase_backend_reset(kbdev, &end_timestamp); -+ kbase_pm_metrics_update(kbdev, NULL); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++void _ump_ukk_lock(_ump_uk_lock_s *args); + -+ /* Reset the GPU */ -+ kbase_pm_init_hw(kbdev, 0); ++void _ump_ukk_unlock(_ump_uk_unlock_s *args); + -+ mutex_unlock(&kbdev->pm.lock); ++u32 _ump_ukk_report_memory_usage(void); + -+ mutex_lock(&js_devdata->runpool_mutex); ++#ifdef __cplusplus ++} ++#endif + -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_ctx_sched_restore_all_as(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); ++#endif /* __UMP_UKK_H__ */ +diff --git a/drivers/gpu/arm/mali400/ump/linux/license/gpl/ump_kernel_license.h b/drivers/gpu/arm/mali400/ump/linux/license/gpl/ump_kernel_license.h +new file mode 100644 +index 000000000..d0174055a +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/linux/license/gpl/ump_kernel_license.h +@@ -0,0 +1,30 @@ ++/* ++ * Copyright (C) 2010, 2013, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ kbase_pm_enable_interrupts(kbdev); ++/** ++ * @file ump_kernel_license.h ++ * Defines for the macro MODULE_LICENSE. ++ */ + -+ atomic_set(&kbdev->hwaccess.backend.reset_gpu, -+ KBASE_RESET_GPU_NOT_PENDING); ++#ifndef __UMP_KERNEL_LICENSE_H__ ++#define __UMP_KERNEL_LICENSE_H__ + -+ kbase_disjoint_state_down(kbdev); ++#ifdef __cplusplus ++extern "C" { ++#endif + -+ wake_up(&kbdev->hwaccess.backend.reset_wait); -+ if (!silent) -+ dev_err(kbdev->dev, "Reset complete"); ++#define UMP_KERNEL_LINUX_LICENSE "GPL" ++#define UMP_LICENSE_IS_GPL 1 + -+ if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending) -+ try_schedule = true; ++#ifdef __cplusplus ++} ++#endif + -+ mutex_unlock(&js_devdata->runpool_mutex); ++#endif /* __UMP_KERNEL_LICENSE_H__ */ +diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_ioctl.h b/drivers/gpu/arm/mali400/ump/linux/ump_ioctl.h +new file mode 100644 +index 000000000..bfb4e8d64 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/linux/ump_ioctl.h +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (C) 2010-2013, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ mutex_lock(&kbdev->pm.lock); ++#ifndef __UMP_IOCTL_H__ ++#define __UMP_IOCTL_H__ + -+ /* Find out what cores are required now */ -+ kbase_pm_update_cores_state(kbdev); ++#ifdef __cplusplus ++extern "C" { ++#endif + -+ /* Synchronously request and wait for those cores, because if -+ * instrumentation is enabled it would need them immediately. */ -+ kbase_pm_check_transitions_sync(kbdev); ++#include ++#include + -+ mutex_unlock(&kbdev->pm.lock); ++#include + -+ /* Try submitting some jobs to restart processing */ -+ if (try_schedule) { -+ KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, -+ 0); -+ kbase_js_sched_all(kbdev); -+ } ++#ifndef __user ++#define __user ++#endif + -+ /* Process any pending slot updates */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_backend_slot_update(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ kbase_pm_context_idle(kbdev); ++/** ++ * @file UMP_ioctl.h ++ * This file describes the interface needed to use the Linux device driver. ++ * The interface is used by the userpace UMP driver. ++ */ + -+ /* Release vinstr */ -+ kbase_vinstr_resume(kbdev->vinstr_ctx); ++#define UMP_IOCTL_NR 0x90 + -+ KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0); -+} + -+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) -+{ -+ struct kbase_device *kbdev = container_of(timer, struct kbase_device, -+ hwaccess.backend.reset_timer); ++#define UMP_IOC_QUERY_API_VERSION _IOR(UMP_IOCTL_NR, _UMP_IOC_QUERY_API_VERSION, _ump_uk_api_version_s) ++#define UMP_IOC_ALLOCATE _IOWR(UMP_IOCTL_NR, _UMP_IOC_ALLOCATE, _ump_uk_allocate_s) ++#define UMP_IOC_RELEASE _IOR(UMP_IOCTL_NR, _UMP_IOC_RELEASE, _ump_uk_release_s) ++#define UMP_IOC_SIZE_GET _IOWR(UMP_IOCTL_NR, _UMP_IOC_SIZE_GET, _ump_uk_size_get_s) ++#define UMP_IOC_MSYNC _IOW(UMP_IOCTL_NR, _UMP_IOC_MSYNC, _ump_uk_msync_s) + -+ KBASE_DEBUG_ASSERT(kbdev); ++#define UMP_IOC_CACHE_OPERATIONS_CONTROL _IOW(UMP_IOCTL_NR, _UMP_IOC_CACHE_OPERATIONS_CONTROL, _ump_uk_cache_operations_control_s) ++#define UMP_IOC_SWITCH_HW_USAGE _IOW(UMP_IOCTL_NR, _UMP_IOC_SWITCH_HW_USAGE, _ump_uk_switch_hw_usage_s) ++#define UMP_IOC_LOCK _IOW(UMP_IOCTL_NR, _UMP_IOC_LOCK, _ump_uk_lock_s) ++#define UMP_IOC_UNLOCK _IOW(UMP_IOCTL_NR, _UMP_IOC_UNLOCK, _ump_uk_unlock_s) + -+ /* Reset still pending? */ -+ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, -+ KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) == -+ KBASE_RESET_GPU_COMMITTED) -+ queue_work(kbdev->hwaccess.backend.reset_workq, -+ &kbdev->hwaccess.backend.reset_work); ++#define UMP_IOC_DMABUF_IMPORT _IOW(UMP_IOCTL_NR, _UMP_IOC_DMABUF_IMPORT, _ump_uk_dmabuf_s) + -+ return HRTIMER_NORESTART; ++#ifdef __cplusplus +} ++#endif + ++#endif /* __UMP_IOCTL_H__ */ +diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_kernel_linux.c b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_linux.c +new file mode 100644 +index 000000000..71b30830c +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_linux.c +@@ -0,0 +1,449 @@ +/* -+ * If all jobs are evicted from the GPU then we can reset the GPU -+ * immediately instead of waiting for the timeout to elapse ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) -+{ -+ int i; -+ int pending_jobs = 0; ++#include /* kernel module definitions */ ++#include /* file system operations */ ++#include /* character device definitions */ ++#include /* request_mem_region */ ++#include /* memory management functions and types */ ++#include /* user space access */ ++#include ++#include ++#include + -+ KBASE_DEBUG_ASSERT(kbdev); ++#include "arch/config.h" /* Configuration for current platform. The symlinc for arch is set by Makefile */ ++#include "ump_ioctl.h" ++#include "ump_kernel_common.h" ++#include "ump_kernel_interface.h" ++#include "ump_kernel_interface_ref_drv.h" ++#include "ump_kernel_descriptor_mapping.h" ++#include "ump_kernel_memory_backend.h" ++#include "ump_kernel_memory_backend_os.h" ++#include "ump_kernel_memory_backend_dedicated.h" ++#include "ump_kernel_license.h" + -+ /* Count the number of jobs */ -+ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) -+ pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i); ++#include "ump_osk.h" ++#include "ump_ukk.h" ++#include "ump_uk_types.h" ++#include "ump_ukk_wrappers.h" ++#include "ump_ukk_ref_wrappers.h" + -+ if (pending_jobs > 0) { -+ /* There are still jobs on the GPU - wait */ -+ return; -+ } + -+ /* To prevent getting incorrect registers when dumping failed job, -+ * skip early reset. -+ */ -+ if (kbdev->job_fault_debug != false) -+ return; ++/* Module parameter to control log level */ ++int ump_debug_level = 2; ++module_param(ump_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */ ++MODULE_PARM_DESC(ump_debug_level, "Higher number, more dmesg output"); + -+ /* Check that the reset has been committed to (i.e. kbase_reset_gpu has -+ * been called), and that no other thread beat this thread to starting -+ * the reset */ -+ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, -+ KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) != -+ KBASE_RESET_GPU_COMMITTED) { -+ /* Reset has already occurred */ -+ return; -+ } ++/* By default the module uses any available major, but it's possible to set it at load time to a specific number */ ++int ump_major = 0; ++module_param(ump_major, int, S_IRUGO); /* r--r--r-- */ ++MODULE_PARM_DESC(ump_major, "Device major number"); + -+ queue_work(kbdev->hwaccess.backend.reset_workq, -+ &kbdev->hwaccess.backend.reset_work); -+} ++/* Name of the UMP device driver */ ++static char ump_dev_name[] = "ump"; /* should be const, but the functions we call requires non-cost */ + -+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) -+{ -+ unsigned long flags; -+ struct kbasep_js_device_data *js_devdata; + -+ js_devdata = &kbdev->js_data; -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbasep_try_reset_gpu_early_locked(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} ++#if UMP_LICENSE_IS_GPL ++static struct dentry *ump_debugfs_dir = NULL; ++#endif + -+/** -+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU -+ * @kbdev: kbase device -+ * -+ * This function just soft-stops all the slots to ensure that as many jobs as -+ * possible are saved. -+ * -+ * Return: -+ * The function returns a boolean which should be interpreted as follows: -+ * true - Prepared for reset, kbase_reset_gpu_locked should be called. -+ * false - Another thread is performing a reset, kbase_reset_gpu should -+ * not be called. ++/* ++ * The data which we attached to each virtual memory mapping request we get. ++ * Each memory mapping has a reference to the UMP memory it maps. ++ * We release this reference when the last memory mapping is unmapped. + */ -+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev) ++typedef struct ump_vma_usage_tracker { ++ int references; ++ ump_dd_handle handle; ++} ump_vma_usage_tracker; ++ ++struct ump_device { ++ struct cdev cdev; ++#if UMP_LICENSE_IS_GPL ++ struct class *ump_class; ++#endif ++}; ++ ++/* The global variable containing the global device data */ ++static struct ump_device ump_device; ++struct device *ump_global_mdev = NULL; ++ ++/* Forward declare static functions */ ++static int ump_file_open(struct inode *inode, struct file *filp); ++static int ump_file_release(struct inode *inode, struct file *filp); ++#ifdef HAVE_UNLOCKED_IOCTL ++static long ump_file_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); ++#else ++static int ump_file_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); ++#endif ++static int ump_file_mmap(struct file *filp, struct vm_area_struct *vma); ++ ++ ++/* This variable defines the file operations this UMP device driver offer */ ++static struct file_operations ump_fops = { ++ .owner = THIS_MODULE, ++ .open = ump_file_open, ++ .release = ump_file_release, ++#ifdef HAVE_UNLOCKED_IOCTL ++ .unlocked_ioctl = ump_file_ioctl, ++#else ++ .ioctl = ump_file_ioctl, ++#endif ++ .mmap = ump_file_mmap ++}; ++ ++ ++/* This function is called by Linux to initialize this module. ++ * All we do is initialize the UMP device driver. ++ */ ++static int ump_initialize_module(void) +{ -+ int i; ++ _mali_osk_errcode_t err; + -+ KBASE_DEBUG_ASSERT(kbdev); ++ DBG_MSG(2, ("Inserting UMP device driver. Compiled: %s, time: %s\n", __DATE__, __TIME__)); + -+ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, -+ KBASE_RESET_GPU_NOT_PENDING, -+ KBASE_RESET_GPU_PREPARED) != -+ KBASE_RESET_GPU_NOT_PENDING) { -+ /* Some other thread is already resetting the GPU */ -+ return false; ++ err = ump_kernel_constructor(); ++ if (_MALI_OSK_ERR_OK != err) { ++ MSG_ERR(("UMP device driver init failed\n")); ++ return ump_map_errcode(err); + } + -+ kbase_disjoint_state_up(kbdev); ++ MSG(("UMP device driver %s loaded\n", SVN_REV_STRING)); ++ return 0; ++} + -+ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) -+ kbase_job_slot_softstop(kbdev, i, NULL); + -+ return true; -+} + -+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev) ++/* ++ * This function is called by Linux to unload/terminate/exit/cleanup this module. ++ * All we do is terminate the UMP device driver. ++ */ ++static void ump_cleanup_module(void) +{ -+ unsigned long flags; -+ bool ret; -+ struct kbasep_js_device_data *js_devdata; ++ DBG_MSG(2, ("Unloading UMP device driver\n")); ++ ump_kernel_destructor(); ++ DBG_MSG(2, ("Module unloaded\n")); ++} + -+ js_devdata = &kbdev->js_data; -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ ret = kbase_prepare_to_reset_gpu_locked(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ return ret; ++ ++static ssize_t ump_memory_used_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) ++{ ++ char buf[64]; ++ size_t r; ++ u32 mem = _ump_ukk_report_memory_usage(); ++ ++ r = snprintf(buf, 64, "%u\n", mem); ++ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} -+KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu); ++ ++static const struct file_operations ump_memory_usage_fops = { ++ .owner = THIS_MODULE, ++ .read = ump_memory_used_read, ++}; + +/* -+ * This function should be called after kbase_prepare_to_reset_gpu if it -+ * returns true. It should never be called without a corresponding call to -+ * kbase_prepare_to_reset_gpu. -+ * -+ * After this function is called (or not called if kbase_prepare_to_reset_gpu -+ * returned false), the caller should wait for -+ * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset -+ * has completed. ++ * Initialize the UMP device driver. + */ -+void kbase_reset_gpu(struct kbase_device *kbdev) ++int ump_kernel_device_initialize(void) +{ -+ KBASE_DEBUG_ASSERT(kbdev); ++ int err; ++ dev_t dev = 0; ++#if UMP_LICENSE_IS_GPL ++ ump_debugfs_dir = debugfs_create_dir(ump_dev_name, NULL); ++ if (ERR_PTR(-ENODEV) == ump_debugfs_dir) { ++ ump_debugfs_dir = NULL; ++ } else { ++ debugfs_create_file("memory_usage", 0400, ump_debugfs_dir, NULL, &ump_memory_usage_fops); ++ } ++#endif + -+ /* Note this is an assert/atomic_set because it is a software issue for -+ * a race to be occuring here */ -+ KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == -+ KBASE_RESET_GPU_PREPARED); -+ atomic_set(&kbdev->hwaccess.backend.reset_gpu, -+ KBASE_RESET_GPU_COMMITTED); ++ if (0 == ump_major) { ++ /* auto select a major */ ++ err = alloc_chrdev_region(&dev, 0, 1, ump_dev_name); ++ ump_major = MAJOR(dev); ++ } else { ++ /* use load time defined major number */ ++ dev = MKDEV(ump_major, 0); ++ err = register_chrdev_region(dev, 1, ump_dev_name); ++ } + -+ dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", -+ kbdev->reset_timeout_ms); ++ if (0 == err) { ++ memset(&ump_device, 0, sizeof(ump_device)); + -+ hrtimer_start(&kbdev->hwaccess.backend.reset_timer, -+ HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), -+ HRTIMER_MODE_REL); ++ /* initialize our char dev data */ ++ cdev_init(&ump_device.cdev, &ump_fops); ++ ump_device.cdev.owner = THIS_MODULE; ++ ump_device.cdev.ops = &ump_fops; + -+ /* Try resetting early */ -+ kbasep_try_reset_gpu_early(kbdev); ++ /* register char dev with the kernel */ ++ err = cdev_add(&ump_device.cdev, dev, 1/*count*/); ++ if (0 == err) { ++ ++#if UMP_LICENSE_IS_GPL ++ ump_device.ump_class = class_create(THIS_MODULE, ump_dev_name); ++ if (IS_ERR(ump_device.ump_class)) { ++ err = PTR_ERR(ump_device.ump_class); ++ } else { ++ ump_global_mdev = device_create(ump_device.ump_class, NULL, dev, NULL, ump_dev_name); ++ if (!IS_ERR(ump_global_mdev)) { ++ return 0; ++ } ++ ++ err = PTR_ERR(ump_global_mdev); ++ } ++ cdev_del(&ump_device.cdev); ++#else ++ return 0; ++#endif ++ } ++ ++ unregister_chrdev_region(dev, 1); ++ } ++ ++ return err; +} -+KBASE_EXPORT_TEST_API(kbase_reset_gpu); + -+void kbase_reset_gpu_locked(struct kbase_device *kbdev) ++ ++ ++/* ++ * Terminate the UMP device driver ++ */ ++void ump_kernel_device_terminate(void) +{ -+ KBASE_DEBUG_ASSERT(kbdev); ++ dev_t dev = MKDEV(ump_major, 0); + -+ /* Note this is an assert/atomic_set because it is a software issue for -+ * a race to be occuring here */ -+ KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == -+ KBASE_RESET_GPU_PREPARED); -+ atomic_set(&kbdev->hwaccess.backend.reset_gpu, -+ KBASE_RESET_GPU_COMMITTED); ++#if UMP_LICENSE_IS_GPL ++ device_destroy(ump_device.ump_class, dev); ++ class_destroy(ump_device.ump_class); ++#endif + -+ dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", -+ kbdev->reset_timeout_ms); -+ hrtimer_start(&kbdev->hwaccess.backend.reset_timer, -+ HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), -+ HRTIMER_MODE_REL); ++ /* unregister char device */ ++ cdev_del(&ump_device.cdev); + -+ /* Try resetting early */ -+ kbasep_try_reset_gpu_early_locked(kbdev); ++ /* free major */ ++ unregister_chrdev_region(dev, 1); ++ ++#if UMP_LICENSE_IS_GPL ++ if (ump_debugfs_dir) ++ debugfs_remove_recursive(ump_debugfs_dir); ++#endif +} + -+void kbase_reset_gpu_silent(struct kbase_device *kbdev) ++/* ++ * Open a new session. User space has called open() on us. ++ */ ++static int ump_file_open(struct inode *inode, struct file *filp) +{ -+ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, -+ KBASE_RESET_GPU_NOT_PENDING, -+ KBASE_RESET_GPU_SILENT) != -+ KBASE_RESET_GPU_NOT_PENDING) { -+ /* Some other thread is already resetting the GPU */ -+ return; ++ struct ump_session_data *session_data; ++ _mali_osk_errcode_t err; ++ ++ /* input validation */ ++ if (0 != MINOR(inode->i_rdev)) { ++ MSG_ERR(("Minor not zero in ump_file_open()\n")); ++ return -ENODEV; + } + -+ kbase_disjoint_state_up(kbdev); ++ /* Call the OS-Independent UMP Open function */ ++ err = _ump_ukk_open((void **) &session_data); ++ if (_MALI_OSK_ERR_OK != err) { ++ MSG_ERR(("Ump failed to open a new session\n")); ++ return ump_map_errcode(err); ++ } + -+ queue_work(kbdev->hwaccess.backend.reset_workq, -+ &kbdev->hwaccess.backend.reset_work); ++ filp->private_data = (void *)session_data; ++ filp->f_pos = 0; ++ ++ return 0; /* success */ +} + -+bool kbase_reset_gpu_active(struct kbase_device *kbdev) -+{ -+ if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == -+ KBASE_RESET_GPU_NOT_PENDING) -+ return false; + -+ return true; -+} -+#endif /* KBASE_GPU_RESET_EN */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h -new file mode 100644 -index 000000000..1f382b3c1 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h -@@ -0,0 +1,164 @@ ++ +/* -+ * -+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ * Close a session. User space has called close() or crashed/terminated. + */ ++static int ump_file_release(struct inode *inode, struct file *filp) ++{ ++ _mali_osk_errcode_t err; + ++ err = _ump_ukk_close((void **) &filp->private_data); ++ if (_MALI_OSK_ERR_OK != err) { ++ return ump_map_errcode(err); ++ } + ++ return 0; /* success */ ++} + + + +/* -+ * Job Manager backend-specific low-level APIs. ++ * Handle IOCTL requests. + */ ++#ifdef HAVE_UNLOCKED_IOCTL ++static long ump_file_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ++#else ++static int ump_file_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) ++#endif ++{ ++ int err = -ENOTTY; ++ void __user *argument; ++ struct ump_session_data *session_data; + -+#ifndef _KBASE_JM_HWACCESS_H_ -+#define _KBASE_JM_HWACCESS_H_ ++#ifndef HAVE_UNLOCKED_IOCTL ++ (void)inode; /* inode not used */ ++#endif + -+#include -+#include -+#include ++ session_data = (struct ump_session_data *)filp->private_data; ++ if (NULL == session_data) { ++ MSG_ERR(("No session data attached to file object\n")); ++ return -ENOTTY; ++ } + -+#include ++ /* interpret the argument as a user pointer to something */ ++ argument = (void __user *)arg; + -+/** -+ * kbase_job_submit_nolock() - Submit a job to a certain job-slot -+ * @kbdev: Device pointer -+ * @katom: Atom to submit -+ * @js: Job slot to submit on -+ * -+ * The caller must check kbasep_jm_is_submit_slots_free() != false before -+ * calling this. -+ * -+ * The following locking conditions are made on the caller: -+ * - it must hold the hwaccess_lock -+ */ -+void kbase_job_submit_nolock(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom, int js); ++ switch (cmd) { ++ case UMP_IOC_QUERY_API_VERSION: ++ err = ump_get_api_version_wrapper((u32 __user *)argument, session_data); ++ break; + -+/** -+ * kbase_job_done_slot() - Complete the head job on a particular job-slot -+ * @kbdev: Device pointer -+ * @s: Job slot -+ * @completion_code: Completion code of job reported by GPU -+ * @job_tail: Job tail address reported by GPU -+ * @end_timestamp: Timestamp of job completion -+ */ -+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, -+ u64 job_tail, ktime_t *end_timestamp); ++ case UMP_IOC_ALLOCATE : ++ err = ump_allocate_wrapper((u32 __user *)argument, session_data); ++ break; + -+#ifdef CONFIG_GPU_TRACEPOINTS -+static inline char *kbasep_make_job_slot_string(int js, char *js_string, -+ size_t js_size) ++ case UMP_IOC_RELEASE: ++ err = ump_release_wrapper((u32 __user *)argument, session_data); ++ break; ++ ++ case UMP_IOC_SIZE_GET: ++ err = ump_size_get_wrapper((u32 __user *)argument, session_data); ++ break; ++ ++ case UMP_IOC_MSYNC: ++ err = ump_msync_wrapper((u32 __user *)argument, session_data); ++ break; ++ ++ case UMP_IOC_CACHE_OPERATIONS_CONTROL: ++ err = ump_cache_operations_control_wrapper((u32 __user *)argument, session_data); ++ break; ++ ++ case UMP_IOC_SWITCH_HW_USAGE: ++ err = ump_switch_hw_usage_wrapper((u32 __user *)argument, session_data); ++ break; ++ ++ case UMP_IOC_LOCK: ++ err = ump_lock_wrapper((u32 __user *)argument, session_data); ++ break; ++ ++ case UMP_IOC_UNLOCK: ++ err = ump_unlock_wrapper((u32 __user *)argument, session_data); ++ break; ++ ++ case UMP_IOC_DMABUF_IMPORT: ++#ifdef CONFIG_DMA_SHARED_BUFFER ++ err = ump_dmabuf_import_wrapper((u32 __user *)argument, session_data); ++#else ++ err = -EFAULT; ++ DBG_MSG(1, ("User space use dmabuf API, but kernel don't support DMA BUF\n")); ++#endif ++ break; ++ ++ default: ++ DBG_MSG(1, ("No handler for IOCTL. cmd: 0x%08x, arg: 0x%08lx\n", cmd, arg)); ++ err = -EFAULT; ++ break; ++ } ++ ++ return err; ++} ++ ++int ump_map_errcode(_mali_osk_errcode_t err) +{ -+ snprintf(js_string, js_size, "job_slot_%i", js); -+ return js_string; ++ switch (err) { ++ case _MALI_OSK_ERR_OK : ++ return 0; ++ case _MALI_OSK_ERR_FAULT: ++ return -EFAULT; ++ case _MALI_OSK_ERR_INVALID_FUNC: ++ return -ENOTTY; ++ case _MALI_OSK_ERR_INVALID_ARGS: ++ return -EINVAL; ++ case _MALI_OSK_ERR_NOMEM: ++ return -ENOMEM; ++ case _MALI_OSK_ERR_TIMEOUT: ++ return -ETIMEDOUT; ++ case _MALI_OSK_ERR_RESTARTSYSCALL: ++ return -ERESTARTSYS; ++ case _MALI_OSK_ERR_ITEM_NOT_FOUND: ++ return -ENOENT; ++ default: ++ return -EFAULT; ++ } +} -+#endif + -+/** -+ * kbase_job_hw_submit() - Submit a job to the GPU -+ * @kbdev: Device pointer -+ * @katom: Atom to submit -+ * @js: Job slot to submit on -+ * -+ * The caller must check kbasep_jm_is_submit_slots_free() != false before -+ * calling this. -+ * -+ * The following locking conditions are made on the caller: -+ * - it must hold the hwaccess_lock ++/* ++ * Handle from OS to map specified virtual memory to specified UMP memory. + */ -+void kbase_job_hw_submit(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom, -+ int js); ++static int ump_file_mmap(struct file *filp, struct vm_area_struct *vma) ++{ ++ _ump_uk_map_mem_s args; ++ _mali_osk_errcode_t err; ++ struct ump_session_data *session_data; + -+/** -+ * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop -+ * on the specified atom -+ * @kbdev: Device pointer -+ * @js: Job slot to stop on -+ * @action: The action to perform, either JSn_COMMAND_HARD_STOP or -+ * JSn_COMMAND_SOFT_STOP -+ * @core_reqs: Core requirements of atom to stop -+ * @target_katom: Atom to stop -+ * -+ * The following locking conditions are made on the caller: -+ * - it must hold the hwaccess_lock -+ */ -+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, -+ int js, -+ u32 action, -+ base_jd_core_req core_reqs, -+ struct kbase_jd_atom *target_katom); ++ /* Validate the session data */ ++ session_data = (struct ump_session_data *)filp->private_data; ++ if (NULL == session_data) { ++ MSG_ERR(("mmap() called without any session data available\n")); ++ return -EFAULT; ++ } + -+/** -+ * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job -+ * slot belonging to a given context. -+ * @kbdev: Device pointer -+ * @kctx: Context pointer. May be NULL -+ * @katom: Specific atom to stop. May be NULL -+ * @js: Job slot to hard stop -+ * @action: The action to perform, either JSn_COMMAND_HARD_STOP or -+ * JSn_COMMAND_SOFT_STOP -+ * -+ * If no context is provided then all jobs on the slot will be soft or hard -+ * stopped. -+ * -+ * If a katom is provided then only that specific atom will be stopped. In this -+ * case the kctx parameter is ignored. -+ * -+ * Jobs that are on the slot but are not yet on the GPU will be unpulled and -+ * returned to the job scheduler. -+ * -+ * Return: true if an atom was stopped, false otherwise -+ */ -+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ int js, -+ struct kbase_jd_atom *katom, -+ u32 action); ++ /* Re-pack the arguments that mmap() packed for us */ ++ args.ctx = session_data; ++ args.phys_addr = 0; ++ args.size = vma->vm_end - vma->vm_start; ++ args._ukk_private = vma; ++ args.secure_id = vma->vm_pgoff; + -+/** -+ * kbase_job_slot_init - Initialise job slot framework -+ * @kbdev: Device pointer -+ * -+ * Called on driver initialisation -+ * -+ * Return: 0 on success -+ */ -+int kbase_job_slot_init(struct kbase_device *kbdev); ++ /* By setting this flag, during a process fork; the child process will not have the parent UMP mappings */ ++ vma->vm_flags |= VM_DONTCOPY; + -+/** -+ * kbase_job_slot_halt - Halt the job slot framework -+ * @kbdev: Device pointer -+ * -+ * Should prevent any further job slot processing -+ */ -+void kbase_job_slot_halt(struct kbase_device *kbdev); ++ DBG_MSG(4, ("UMP vma->flags: %x\n", vma->vm_flags)); + -+/** -+ * kbase_job_slot_term - Terminate job slot framework -+ * @kbdev: Device pointer -+ * -+ * Called on driver termination -+ */ -+void kbase_job_slot_term(struct kbase_device *kbdev); ++ /* Call the common mmap handler */ ++ err = _ump_ukk_map_mem(&args); ++ if (_MALI_OSK_ERR_OK != err) { ++ MSG_ERR(("_ump_ukk_map_mem() failed in function ump_file_mmap()")); ++ return ump_map_errcode(err); ++ } + -+/** -+ * kbase_gpu_cacheclean - Cause a GPU cache clean & flush -+ * @kbdev: Device pointer -+ * -+ * Caller must not be in IRQ context -+ */ -+void kbase_gpu_cacheclean(struct kbase_device *kbdev); ++ return 0; /* success */ ++} + -+#endif /* _KBASE_JM_HWACCESS_H_ */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c ++/* Export UMP kernel space API functions */ ++EXPORT_SYMBOL(ump_dd_secure_id_get); ++EXPORT_SYMBOL(ump_dd_handle_create_from_secure_id); ++EXPORT_SYMBOL(ump_dd_phys_block_count_get); ++EXPORT_SYMBOL(ump_dd_phys_block_get); ++EXPORT_SYMBOL(ump_dd_phys_blocks_get); ++EXPORT_SYMBOL(ump_dd_size_get); ++EXPORT_SYMBOL(ump_dd_reference_add); ++EXPORT_SYMBOL(ump_dd_reference_release); ++ ++/* Export our own extended kernel space allocator */ ++EXPORT_SYMBOL(ump_dd_handle_create_from_phys_blocks); ++ ++/* Setup init and exit functions for this module */ ++module_init(ump_initialize_module); ++module_exit(ump_cleanup_module); ++ ++/* And some module informatio */ ++MODULE_LICENSE(UMP_KERNEL_LINUX_LICENSE); ++MODULE_AUTHOR("ARM Ltd."); ++MODULE_VERSION(SVN_REV_STRING); +diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_kernel_linux.h b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_linux.h new file mode 100644 -index 000000000..4b4541660 +index 000000000..8d32ddbb5 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c -@@ -0,0 +1,1952 @@ ++++ b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_linux.h +@@ -0,0 +1,18 @@ +/* -+ * -+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ * Copyright (C) 2010-2013, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + ++#ifndef __UMP_KERNEL_LINUX_H__ ++#define __UMP_KERNEL_LINUX_H__ + ++int ump_kernel_device_initialize(void); ++void ump_kernel_device_terminate(void); + + ++#endif /* __UMP_KERNEL_H__ */ +diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_dedicated.c b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_dedicated.c +new file mode 100644 +index 000000000..5a1257a25 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_dedicated.c +@@ -0,0 +1,271 @@ +/* -+ * Register-based HW access backend specific APIs ++ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++/* needed to detect kernel version specific code */ ++#include + -+/* Return whether the specified ringbuffer is empty. HW access lock must be -+ * held */ -+#define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx) -+/* Return number of atoms currently in the specified ringbuffer. HW access lock -+ * must be held */ -+#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx) ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) ++#include ++#else /* pre 2.6.26 the file was in the arch specific location */ ++#include ++#endif + -+static void kbase_gpu_release_atom(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom, -+ ktime_t *end_timestamp); ++#include ++#include ++#include ++#include ++#include "ump_kernel_common.h" ++#include "ump_kernel_memory_backend.h" + -+/** -+ * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer -+ * @kbdev: Device pointer -+ * @katom: Atom to enqueue -+ * -+ * Context: Caller must hold the HW access lock -+ */ -+static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom) -+{ -+ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr]; + -+ WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++#define UMP_BLOCK_SIZE (256UL * 1024UL) /* 256kB, remember to keep the ()s */ + -+ rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom; -+ rb->write_idx++; + -+ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED; -+} + -+/** -+ * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once -+ * it has been completed -+ * @kbdev: Device pointer -+ * @js: Job slot to remove atom from -+ * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in -+ * which case current time will be used. -+ * -+ * Context: Caller must hold the HW access lock -+ * -+ * Return: Atom removed from ringbuffer ++typedef struct block_info { ++ struct block_info *next; ++} block_info; ++ ++ ++ ++typedef struct block_allocator { ++ struct semaphore mutex; ++ block_info *all_blocks; ++ block_info *first_free; ++ u32 base; ++ u32 num_blocks; ++ u32 num_free; ++} block_allocator; ++ ++ ++static void block_allocator_shutdown(ump_memory_backend *backend); ++static int block_allocator_allocate(void *ctx, ump_dd_mem *mem); ++static void block_allocator_release(void *ctx, ump_dd_mem *handle); ++static inline u32 get_phys(block_allocator *allocator, block_info *block); ++static u32 block_allocator_stat(struct ump_memory_backend *backend); ++ ++ ++ ++/* ++ * Create dedicated memory backend + */ -+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, -+ int js, -+ ktime_t *end_timestamp) ++ump_memory_backend *ump_block_allocator_create(u32 base_address, u32 size) +{ -+ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; -+ struct kbase_jd_atom *katom; ++ ump_memory_backend *backend; ++ block_allocator *allocator; ++ u32 usable_size; ++ u32 num_blocks; + -+ if (SLOT_RB_EMPTY(rb)) { -+ WARN(1, "GPU ringbuffer unexpectedly empty\n"); ++ usable_size = (size + UMP_BLOCK_SIZE - 1) & ~(UMP_BLOCK_SIZE - 1); ++ num_blocks = usable_size / UMP_BLOCK_SIZE; ++ ++ if (0 == usable_size) { ++ DBG_MSG(1, ("Memory block of size %u is unusable\n", size)); + return NULL; + } + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ DBG_MSG(5, ("Creating dedicated UMP memory backend. Base address: 0x%08x, size: 0x%08x\n", base_address, size)); ++ DBG_MSG(6, ("%u usable bytes which becomes %u blocks\n", usable_size, num_blocks)); + -+ katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom; ++ backend = kzalloc(sizeof(ump_memory_backend), GFP_KERNEL); ++ if (NULL != backend) { ++ allocator = kmalloc(sizeof(block_allocator), GFP_KERNEL); ++ if (NULL != allocator) { ++ allocator->all_blocks = kmalloc(sizeof(block_info) * num_blocks, GFP_KERNEL); ++ if (NULL != allocator->all_blocks) { ++ int i; + -+ kbase_gpu_release_atom(kbdev, katom, end_timestamp); ++ allocator->first_free = NULL; ++ allocator->num_blocks = num_blocks; ++ allocator->num_free = num_blocks; ++ allocator->base = base_address; ++ sema_init(&allocator->mutex, 1); + -+ rb->read_idx++; ++ for (i = 0; i < num_blocks; i++) { ++ allocator->all_blocks[i].next = allocator->first_free; ++ allocator->first_free = &allocator->all_blocks[i]; ++ } + -+ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB; ++ backend->ctx = allocator; ++ backend->allocate = block_allocator_allocate; ++ backend->release = block_allocator_release; ++ backend->shutdown = block_allocator_shutdown; ++ backend->stat = block_allocator_stat; ++ backend->pre_allocate_physical_check = NULL; ++ backend->adjust_to_mali_phys = NULL; + -+ kbase_js_debug_log_current_affinities(kbdev); ++ return backend; ++ } ++ kfree(allocator); ++ } ++ kfree(backend); ++ } + -+ return katom; ++ return NULL; +} + -+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, -+ int idx) ++ ++ ++/* ++ * Destroy specified dedicated memory backend ++ */ ++static void block_allocator_shutdown(ump_memory_backend *backend) +{ -+ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; ++ block_allocator *allocator; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ BUG_ON(!backend); ++ BUG_ON(!backend->ctx); + -+ if ((SLOT_RB_ENTRIES(rb) - 1) < idx) -+ return NULL; /* idx out of range */ ++ allocator = (block_allocator *)backend->ctx; + -+ return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom; -+} ++ DBG_MSG_IF(1, allocator->num_free != allocator->num_blocks, ("%u blocks still in use during shutdown\n", allocator->num_blocks - allocator->num_free)); + -+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev, -+ int js) -+{ -+ return kbase_gpu_inspect(kbdev, js, 0); ++ kfree(allocator->all_blocks); ++ kfree(allocator); ++ kfree(backend); +} + -+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, -+ int js) -+{ -+ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; + -+ if (SLOT_RB_EMPTY(rb)) -+ return NULL; -+ -+ return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom; -+} + -+/** -+ * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently -+ * on the GPU -+ * @kbdev: Device pointer -+ * @js: Job slot to inspect -+ * -+ * Return: true if there are atoms on the GPU for slot js, -+ * false otherwise -+ */ -+static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js) ++static int block_allocator_allocate(void *ctx, ump_dd_mem *mem) +{ -+ int i; ++ block_allocator *allocator; ++ u32 left; ++ block_info *last_allocated = NULL; ++ int i = 0; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ BUG_ON(!ctx); ++ BUG_ON(!mem); + -+ for (i = 0; i < SLOT_RB_SIZE; i++) { -+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); ++ allocator = (block_allocator *)ctx; ++ left = mem->size_bytes; + -+ if (!katom) -+ return false; -+ if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED || -+ katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY) -+ return true; ++ BUG_ON(!left); ++ BUG_ON(!&allocator->mutex); ++ ++ mem->nr_blocks = ((left + UMP_BLOCK_SIZE - 1) & ~(UMP_BLOCK_SIZE - 1)) / UMP_BLOCK_SIZE; ++ mem->block_array = (ump_dd_physical_block *)vmalloc(sizeof(ump_dd_physical_block) * mem->nr_blocks); ++ if (NULL == mem->block_array) { ++ MSG_ERR(("Failed to allocate block array\n")); ++ return 0; + } + -+ return false; -+} ++ if (down_interruptible(&allocator->mutex)) { ++ MSG_ERR(("Could not get mutex to do block_allocate\n")); ++ return 0; ++ } + -+/** -+ * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms -+ * currently on the GPU -+ * @kbdev: Device pointer -+ * -+ * Return: true if there are any atoms on the GPU, false otherwise -+ */ -+static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) -+{ -+ int js; -+ int i; ++ mem->size_bytes = 0; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ while ((left > 0) && (allocator->first_free)) { ++ block_info *block; + -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ for (i = 0; i < SLOT_RB_SIZE; i++) { -+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); ++ block = allocator->first_free; ++ allocator->first_free = allocator->first_free->next; ++ block->next = last_allocated; ++ last_allocated = block; ++ allocator->num_free--; + -+ if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) -+ return true; -+ } ++ mem->block_array[i].addr = get_phys(allocator, block); ++ mem->block_array[i].size = UMP_BLOCK_SIZE; ++ mem->size_bytes += UMP_BLOCK_SIZE; ++ ++ i++; ++ ++ if (left < UMP_BLOCK_SIZE) left = 0; ++ else left -= UMP_BLOCK_SIZE; + } -+ return false; -+} + -+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js) -+{ -+ int nr = 0; -+ int i; ++ if (left) { ++ block_info *block; ++ /* release all memory back to the pool */ ++ while (last_allocated) { ++ block = last_allocated->next; ++ last_allocated->next = allocator->first_free; ++ allocator->first_free = last_allocated; ++ last_allocated = block; ++ allocator->num_free++; ++ } + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ vfree(mem->block_array); ++ mem->backend_info = NULL; ++ mem->block_array = NULL; + -+ for (i = 0; i < SLOT_RB_SIZE; i++) { -+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); ++ DBG_MSG(4, ("Could not find a mem-block for the allocation.\n")); ++ up(&allocator->mutex); + -+ if (katom && (katom->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_SUBMITTED)) -+ nr++; ++ return 0; + } + -+ return nr; ++ mem->backend_info = last_allocated; ++ ++ up(&allocator->mutex); ++ mem->is_cached = 0; ++ ++ return 1; +} + -+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js) ++ ++ ++static void block_allocator_release(void *ctx, ump_dd_mem *handle) +{ -+ int nr = 0; -+ int i; ++ block_allocator *allocator; ++ block_info *block, * next; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ BUG_ON(!ctx); ++ BUG_ON(!handle); + -+ for (i = 0; i < SLOT_RB_SIZE; i++) { -+ if (kbase_gpu_inspect(kbdev, js, i)) -+ nr++; -+ } ++ allocator = (block_allocator *)ctx; ++ block = (block_info *)handle->backend_info; ++ BUG_ON(!block); + -+ return nr; -+} ++ if (down_interruptible(&allocator->mutex)) { ++ MSG_ERR(("Allocator release: Failed to get mutex - memory leak\n")); ++ return; ++ } + -+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js, -+ enum kbase_atom_gpu_rb_state min_rb_state) -+{ -+ int nr = 0; -+ int i; ++ while (block) { ++ next = block->next; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ BUG_ON((block < allocator->all_blocks) || (block > (allocator->all_blocks + allocator->num_blocks))); + -+ for (i = 0; i < SLOT_RB_SIZE; i++) { -+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); ++ block->next = allocator->first_free; ++ allocator->first_free = block; ++ allocator->num_free++; + -+ if (katom && (katom->gpu_rb_state >= min_rb_state)) -+ nr++; ++ block = next; + } ++ DBG_MSG(3, ("%d blocks free after release call\n", allocator->num_free)); ++ up(&allocator->mutex); + -+ return nr; ++ vfree(handle->block_array); ++ handle->block_array = NULL; +} + -+/** -+ * check_secure_atom - Check if the given atom is in the given secure state and -+ * has a ringbuffer state of at least -+ * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION -+ * @katom: Atom pointer -+ * @secure: Desired secure state -+ * -+ * Return: true if atom is in the given state, false otherwise -+ */ -+static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure) -+{ -+ if (katom->gpu_rb_state >= -+ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && -+ ((kbase_jd_katom_is_protected(katom) && secure) || -+ (!kbase_jd_katom_is_protected(katom) && !secure))) -+ return true; + -+ return false; -+} + -+/** -+ * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given -+ * secure state in the ringbuffers of at least -+ * state -+ * KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE -+ * @kbdev: Device pointer -+ * @secure: Desired secure state -+ * -+ * Return: true if any atoms are in the given state, false otherwise ++/* ++ * Helper function for calculating the physical base adderss of a memory block + */ -+static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, -+ bool secure) ++static inline u32 get_phys(block_allocator *allocator, block_info *block) +{ -+ int js, i; -+ -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ for (i = 0; i < SLOT_RB_SIZE; i++) { -+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, -+ js, i); -+ -+ if (katom) { -+ if (check_secure_atom(katom, secure)) -+ return true; -+ } -+ } -+ } -+ -+ return false; ++ return allocator->base + ((block - allocator->all_blocks) * UMP_BLOCK_SIZE); +} + -+int kbase_backend_slot_free(struct kbase_device *kbdev, int js) ++static u32 block_allocator_stat(struct ump_memory_backend *backend) +{ -+ if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) != -+ KBASE_RESET_GPU_NOT_PENDING) { -+ /* The GPU is being reset - so prevent submission */ -+ return 0; -+ } ++ block_allocator *allocator; ++ BUG_ON(!backend); ++ allocator = (block_allocator *)backend->ctx; ++ BUG_ON(!allocator); + -+ return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js); ++ return (allocator->num_blocks - allocator->num_free) * UMP_BLOCK_SIZE; +} +diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_dedicated.h b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_dedicated.h +new file mode 100644 +index 000000000..949fd245c +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_dedicated.h +@@ -0,0 +1,23 @@ ++/* ++ * Copyright (C) 2010, 2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + ++/** ++ * @file ump_kernel_memory_backend_dedicated.h ++ */ + -+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom); -+ -+static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev, -+ int js, -+ struct kbase_jd_atom *katom) -+{ -+ /* The most recently checked affinity. Having this at this scope allows -+ * us to guarantee that we've checked the affinity in this function -+ * call. -+ */ -+ u64 recently_chosen_affinity = 0; -+ bool chosen_affinity = false; -+ bool retry; ++#ifndef __UMP_KERNEL_MEMORY_BACKEND_DEDICATED_H__ ++#define __UMP_KERNEL_MEMORY_BACKEND_DEDICATED_H__ + -+ do { -+ retry = false; ++#include "ump_kernel_memory_backend.h" + -+ /* NOTE: The following uses a number of FALLTHROUGHs to optimize -+ * the calls to this function. Ending of the function is -+ * indicated by BREAK OUT */ -+ switch (katom->coreref_state) { -+ /* State when job is first attempted to be run */ -+ case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: -+ KBASE_DEBUG_ASSERT(katom->affinity == 0); ++ump_memory_backend *ump_block_allocator_create(u32 base_address, u32 size); + -+ /* Compute affinity */ -+ if (false == kbase_js_choose_affinity( -+ &recently_chosen_affinity, kbdev, katom, -+ js)) { -+ /* No cores are currently available */ -+ /* *** BREAK OUT: No state transition *** */ -+ break; -+ } ++#endif /* __UMP_KERNEL_MEMORY_BACKEND_DEDICATED_H__ */ + -+ chosen_affinity = true; +diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_os.c b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_os.c +new file mode 100644 +index 000000000..7cd8d5d38 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_os.c +@@ -0,0 +1,235 @@ ++/* ++ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ /* Request the cores */ -+ kbase_pm_request_cores(kbdev, -+ katom->core_req & BASE_JD_REQ_T, -+ recently_chosen_affinity); ++/* needed to detect kernel version specific code */ ++#include + -+ katom->affinity = recently_chosen_affinity; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) ++#include ++#else /* pre 2.6.26 the file was in the arch specific location */ ++#include ++#endif + -+ /* Proceed to next state */ -+ katom->coreref_state = -+ KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES; ++#include ++#include ++#include ++#include ++#include ++#include ++#include "ump_kernel_common.h" ++#include "ump_kernel_memory_backend.h" + -+ /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: -+ { -+ enum kbase_pm_cores_ready cores_ready; + -+ KBASE_DEBUG_ASSERT(katom->affinity != 0 || -+ (katom->core_req & BASE_JD_REQ_T)); + -+ cores_ready = kbase_pm_register_inuse_cores( -+ kbdev, -+ katom->core_req & BASE_JD_REQ_T, -+ katom->affinity); -+ if (cores_ready == KBASE_NEW_AFFINITY) { -+ /* Affinity no longer valid - return to -+ * previous state */ -+ kbasep_js_job_check_deref_cores(kbdev, -+ katom); -+ KBASE_TRACE_ADD_SLOT_INFO(kbdev, -+ JS_CORE_REF_REGISTER_INUSE_FAILED, -+ katom->kctx, katom, -+ katom->jc, js, -+ (u32) katom->affinity); -+ /* *** BREAK OUT: Return to previous -+ * state, retry *** */ -+ retry = true; -+ break; -+ } -+ if (cores_ready == KBASE_CORES_NOT_READY) { -+ /* Stay in this state and return, to -+ * retry at this state later */ -+ KBASE_TRACE_ADD_SLOT_INFO(kbdev, -+ JS_CORE_REF_REGISTER_INUSE_FAILED, -+ katom->kctx, katom, -+ katom->jc, js, -+ (u32) katom->affinity); -+ /* *** BREAK OUT: No state transition -+ * *** */ -+ break; -+ } -+ /* Proceed to next state */ -+ katom->coreref_state = -+ KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY; -+ } ++typedef struct os_allocator { ++ struct semaphore mutex; ++ u32 num_pages_max; /**< Maximum number of pages to allocate from the OS */ ++ u32 num_pages_allocated; /**< Number of pages allocated from the OS */ ++} os_allocator; + -+ /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY: -+ KBASE_DEBUG_ASSERT(katom->affinity != 0 || -+ (katom->core_req & BASE_JD_REQ_T)); + -+ /* Optimize out choosing the affinity twice in the same -+ * function call */ -+ if (chosen_affinity == false) { -+ /* See if the affinity changed since a previous -+ * call. */ -+ if (false == kbase_js_choose_affinity( -+ &recently_chosen_affinity, -+ kbdev, katom, js)) { -+ /* No cores are currently available */ -+ kbasep_js_job_check_deref_cores(kbdev, -+ katom); -+ KBASE_TRACE_ADD_SLOT_INFO(kbdev, -+ JS_CORE_REF_REQUEST_ON_RECHECK_FAILED, -+ katom->kctx, katom, -+ katom->jc, js, -+ (u32) recently_chosen_affinity); -+ /* *** BREAK OUT: Transition to lower -+ * state *** */ -+ break; -+ } -+ chosen_affinity = true; -+ } + -+ /* Now see if this requires a different set of cores */ -+ if (recently_chosen_affinity != katom->affinity) { -+ enum kbase_pm_cores_ready cores_ready; ++static void os_free(void *ctx, ump_dd_mem *descriptor); ++static int os_allocate(void *ctx, ump_dd_mem *descriptor); ++static void os_memory_backend_destroy(ump_memory_backend *backend); ++static u32 os_stat(struct ump_memory_backend *backend); + -+ kbase_pm_request_cores(kbdev, -+ katom->core_req & BASE_JD_REQ_T, -+ recently_chosen_affinity); + -+ /* Register new cores whilst we still hold the -+ * old ones, to minimize power transitions */ -+ cores_ready = -+ kbase_pm_register_inuse_cores(kbdev, -+ katom->core_req & BASE_JD_REQ_T, -+ recently_chosen_affinity); -+ kbasep_js_job_check_deref_cores(kbdev, katom); + -+ /* Fixup the state that was reduced by -+ * deref_cores: */ -+ katom->coreref_state = -+ KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY; -+ katom->affinity = recently_chosen_affinity; -+ if (cores_ready == KBASE_NEW_AFFINITY) { -+ /* Affinity no longer valid - return to -+ * previous state */ -+ katom->coreref_state = -+ KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES; ++/* ++ * Create OS memory backend ++ */ ++ump_memory_backend *ump_os_memory_backend_create(const int max_allocation) ++{ ++ ump_memory_backend *backend; ++ os_allocator *info; + -+ kbasep_js_job_check_deref_cores(kbdev, -+ katom); ++ info = kmalloc(sizeof(os_allocator), GFP_KERNEL); ++ if (NULL == info) { ++ return NULL; ++ } + -+ KBASE_TRACE_ADD_SLOT_INFO(kbdev, -+ JS_CORE_REF_REGISTER_INUSE_FAILED, -+ katom->kctx, katom, -+ katom->jc, js, -+ (u32) katom->affinity); -+ /* *** BREAK OUT: Return to previous -+ * state, retry *** */ -+ retry = true; -+ break; -+ } -+ /* Now might be waiting for powerup again, with -+ * a new affinity */ -+ if (cores_ready == KBASE_CORES_NOT_READY) { -+ /* Return to previous state */ -+ katom->coreref_state = -+ KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES; -+ KBASE_TRACE_ADD_SLOT_INFO(kbdev, -+ JS_CORE_REF_REGISTER_ON_RECHECK_FAILED, -+ katom->kctx, katom, -+ katom->jc, js, -+ (u32) katom->affinity); -+ /* *** BREAK OUT: Transition to lower -+ * state *** */ -+ break; -+ } -+ } -+ /* Proceed to next state */ -+ katom->coreref_state = -+ KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS; ++ info->num_pages_max = max_allocation >> PAGE_SHIFT; ++ info->num_pages_allocated = 0; + -+ /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS: -+ KBASE_DEBUG_ASSERT(katom->affinity != 0 || -+ (katom->core_req & BASE_JD_REQ_T)); -+ KBASE_DEBUG_ASSERT(katom->affinity == -+ recently_chosen_affinity); ++ sema_init(&info->mutex, 1); + -+ /* Note: this is where the caller must've taken the -+ * hwaccess_lock */ ++ backend = kmalloc(sizeof(ump_memory_backend), GFP_KERNEL); ++ if (NULL == backend) { ++ kfree(info); ++ return NULL; ++ } + -+ /* Check for affinity violations - if there are any, -+ * then we just ask the caller to requeue and try again -+ * later */ -+ if (kbase_js_affinity_would_violate(kbdev, js, -+ katom->affinity) != false) { -+ /* Return to previous state */ -+ katom->coreref_state = -+ KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY; -+ /* *** BREAK OUT: Transition to lower state *** -+ */ -+ KBASE_TRACE_ADD_SLOT_INFO(kbdev, -+ JS_CORE_REF_AFFINITY_WOULD_VIOLATE, -+ katom->kctx, katom, katom->jc, js, -+ (u32) katom->affinity); -+ break; -+ } ++ backend->ctx = info; ++ backend->allocate = os_allocate; ++ backend->release = os_free; ++ backend->shutdown = os_memory_backend_destroy; ++ backend->stat = os_stat; ++ backend->pre_allocate_physical_check = NULL; ++ backend->adjust_to_mali_phys = NULL; + -+ /* No affinity violations would result, so the cores are -+ * ready */ -+ katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY; -+ /* *** BREAK OUT: Cores Ready *** */ -+ break; ++ return backend; ++} + -+ default: -+ KBASE_DEBUG_ASSERT_MSG(false, -+ "Unhandled kbase_atom_coreref_state %d", -+ katom->coreref_state); -+ break; -+ } -+ } while (retry != false); + -+ return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY); -+} + -+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom) ++/* ++ * Destroy specified OS memory backend ++ */ ++static void os_memory_backend_destroy(ump_memory_backend *backend) +{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(katom != NULL); ++ os_allocator *info = (os_allocator *)backend->ctx; + -+ switch (katom->coreref_state) { -+ case KBASE_ATOM_COREREF_STATE_READY: -+ /* State where atom was submitted to the HW - just proceed to -+ * power-down */ -+ KBASE_DEBUG_ASSERT(katom->affinity != 0 || -+ (katom->core_req & BASE_JD_REQ_T)); ++ DBG_MSG_IF(1, 0 != info->num_pages_allocated, ("%d pages still in use during shutdown\n", info->num_pages_allocated)); + -+ /* fallthrough */ ++ kfree(info); ++ kfree(backend); ++} + -+ case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY: -+ /* State where cores were registered */ -+ KBASE_DEBUG_ASSERT(katom->affinity != 0 || -+ (katom->core_req & BASE_JD_REQ_T)); -+ kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T, -+ katom->affinity); + -+ break; + -+ case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: -+ /* State where cores were requested, but not registered */ -+ KBASE_DEBUG_ASSERT(katom->affinity != 0 || -+ (katom->core_req & BASE_JD_REQ_T)); -+ kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T, -+ katom->affinity); -+ break; ++/* ++ * Allocate UMP memory ++ */ ++static int os_allocate(void *ctx, ump_dd_mem *descriptor) ++{ ++ u32 left; ++ os_allocator *info; ++ int pages_allocated = 0; ++ int is_cached; + -+ case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: -+ /* Initial state - nothing required */ -+ KBASE_DEBUG_ASSERT(katom->affinity == 0); -+ break; ++ BUG_ON(!descriptor); ++ BUG_ON(!ctx); + -+ default: -+ KBASE_DEBUG_ASSERT_MSG(false, -+ "Unhandled coreref_state: %d", -+ katom->coreref_state); -+ break; ++ info = (os_allocator *)ctx; ++ left = descriptor->size_bytes; ++ is_cached = descriptor->is_cached; ++ ++ if (down_interruptible(&info->mutex)) { ++ DBG_MSG(1, ("Failed to get mutex in os_free\n")); ++ return 0; /* failure */ + } + -+ katom->affinity = 0; -+ katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; -+} ++ descriptor->backend_info = NULL; ++ descriptor->nr_blocks = ((left + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)) >> PAGE_SHIFT; + -+static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev, -+ base_jd_core_req core_req, u64 affinity, -+ enum kbase_atom_coreref_state coreref_state) -+{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ DBG_MSG(5, ("Allocating page array. Size: %lu\n", descriptor->nr_blocks * sizeof(ump_dd_physical_block))); + -+ switch (coreref_state) { -+ case KBASE_ATOM_COREREF_STATE_READY: -+ /* State where atom was submitted to the HW - just proceed to -+ * power-down */ -+ KBASE_DEBUG_ASSERT(affinity != 0 || -+ (core_req & BASE_JD_REQ_T)); ++ descriptor->block_array = (ump_dd_physical_block *)vmalloc(sizeof(ump_dd_physical_block) * descriptor->nr_blocks); ++ if (NULL == descriptor->block_array) { ++ up(&info->mutex); ++ DBG_MSG(1, ("Block array could not be allocated\n")); ++ return 0; /* failure */ ++ } + -+ /* fallthrough */ ++ while (left > 0 && ((info->num_pages_allocated + pages_allocated) < info->num_pages_max)) { ++ struct page *new_page; + -+ case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY: -+ /* State where cores were registered */ -+ KBASE_DEBUG_ASSERT(affinity != 0 || -+ (core_req & BASE_JD_REQ_T)); -+ kbase_pm_release_cores(kbdev, core_req & BASE_JD_REQ_T, -+ affinity); ++ if (is_cached) { ++ new_page = alloc_page(GFP_HIGHUSER | __GFP_ZERO | __GFP_REPEAT | __GFP_NOWARN); ++ } else { ++ new_page = alloc_page(GFP_HIGHUSER | __GFP_ZERO | __GFP_REPEAT | __GFP_NOWARN | __GFP_COLD); ++ } ++ if (NULL == new_page) { ++ break; ++ } + -+ break; ++ /* Ensure page caches are flushed. */ ++ if (is_cached) { ++ descriptor->block_array[pages_allocated].addr = page_to_phys(new_page); ++ descriptor->block_array[pages_allocated].size = PAGE_SIZE; ++ } else { ++ descriptor->block_array[pages_allocated].addr = dma_map_page(NULL, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); ++ descriptor->block_array[pages_allocated].size = PAGE_SIZE; ++ } + -+ case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: -+ /* State where cores were requested, but not registered */ -+ KBASE_DEBUG_ASSERT(affinity != 0 || -+ (core_req & BASE_JD_REQ_T)); -+ kbase_pm_unrequest_cores(kbdev, core_req & BASE_JD_REQ_T, -+ affinity); -+ break; ++ DBG_MSG(5, ("Allocated page 0x%08lx cached: %d\n", descriptor->block_array[pages_allocated].addr, is_cached)); + -+ case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: -+ /* Initial state - nothing required */ -+ KBASE_DEBUG_ASSERT(affinity == 0); -+ break; ++ if (left < PAGE_SIZE) { ++ left = 0; ++ } else { ++ left -= PAGE_SIZE; ++ } + -+ default: -+ KBASE_DEBUG_ASSERT_MSG(false, -+ "Unhandled coreref_state: %d", -+ coreref_state); -+ break; ++ pages_allocated++; + } -+} -+ -+static void kbase_gpu_release_atom(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom, -+ ktime_t *end_timestamp) -+{ -+ struct kbase_context *kctx = katom->kctx; -+ -+ switch (katom->gpu_rb_state) { -+ case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: -+ /* Should be impossible */ -+ WARN(1, "Attempting to release atom not in ringbuffer\n"); -+ break; + -+ case KBASE_ATOM_GPU_RB_SUBMITTED: -+ /* Inform power management at start/finish of atom so it can -+ * update its GPU utilisation metrics. Mark atom as not -+ * submitted beforehand. */ -+ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; -+ kbase_pm_metrics_update(kbdev, end_timestamp); ++ DBG_MSG(5, ("Alloce for ID:%2d got %d pages, cached: %d\n", descriptor->secure_id, pages_allocated)); + -+ if (katom->core_req & BASE_JD_REQ_PERMON) -+ kbase_pm_release_gpu_cycle_counter_nolock(kbdev); -+ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ ++ if (left) { ++ DBG_MSG(1, ("Failed to allocate needed pages\n")); + -+ KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom, -+ &kbdev->gpu_props.props.raw_props.js_features -+ [katom->slot_nr]); -+ KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]); -+ KBASE_TLSTREAM_TL_NRET_CTX_LPU(kctx, -+ &kbdev->gpu_props.props.raw_props.js_features -+ [katom->slot_nr]); -+ /* fallthrough */ -+ case KBASE_ATOM_GPU_RB_READY: -+ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_GPU_RB_WAITING_AFFINITY: -+ kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr, -+ katom->affinity); -+ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: -+ break; ++ while (pages_allocated) { ++ pages_allocated--; ++ if (!is_cached) { ++ dma_unmap_page(NULL, descriptor->block_array[pages_allocated].addr, PAGE_SIZE, DMA_BIDIRECTIONAL); ++ } ++ __free_page(pfn_to_page(descriptor->block_array[pages_allocated].addr >> PAGE_SHIFT)); ++ } + -+ case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: -+ if (katom->protected_state.enter != -+ KBASE_ATOM_ENTER_PROTECTED_CHECK || -+ katom->protected_state.exit != -+ KBASE_ATOM_EXIT_PROTECTED_CHECK) -+ kbdev->protected_mode_transition = false; ++ up(&info->mutex); + -+ if (kbase_jd_katom_is_protected(katom) && -+ (katom->protected_state.enter == -+ KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) { -+ kbase_vinstr_resume(kbdev->vinstr_ctx); ++ return 0; /* failure */ ++ } + -+ /* Go back to configured model for IPA */ -+ kbase_ipa_model_use_configured_locked(kbdev); -+ } ++ info->num_pages_allocated += pages_allocated; + ++ DBG_MSG(6, ("%d out of %d pages now allocated\n", info->num_pages_allocated, info->num_pages_max)); + -+ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: -+ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: -+ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_GPU_RB_RETURN_TO_JS: -+ break; -+ } ++ up(&info->mutex); + -+ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED; -+ katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; ++ return 1; /* success*/ +} + -+static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom) -+{ -+ kbase_gpu_release_atom(kbdev, katom, NULL); -+ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS; -+} + -+static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js) ++/* ++ * Free specified UMP memory ++ */ ++static void os_free(void *ctx, ump_dd_mem *descriptor) +{ -+ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; -+ bool slot_busy[3]; -+ -+ if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) -+ return true; -+ slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0, -+ KBASE_ATOM_GPU_RB_WAITING_AFFINITY); -+ slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1, -+ KBASE_ATOM_GPU_RB_WAITING_AFFINITY); -+ slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2, -+ KBASE_ATOM_GPU_RB_WAITING_AFFINITY); ++ os_allocator *info; ++ int i; + -+ if ((js == 2 && !(slot_busy[0] || slot_busy[1])) || -+ (js != 2 && !slot_busy[2])) -+ return true; ++ BUG_ON(!ctx); ++ BUG_ON(!descriptor); + -+ /* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */ -+ if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) || -+ kbase_gpu_atoms_submitted(kbdev, 1) || -+ backend->rmu_workaround_flag)) -+ return false; ++ info = (os_allocator *)ctx; + -+ /* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */ -+ if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) || -+ !backend->rmu_workaround_flag)) -+ return false; ++ BUG_ON(descriptor->nr_blocks > info->num_pages_allocated); + -+ backend->rmu_workaround_flag = !backend->rmu_workaround_flag; ++ if (down_interruptible(&info->mutex)) { ++ DBG_MSG(1, ("Failed to get mutex in os_free\n")); ++ return; ++ } + -+ return true; -+} ++ DBG_MSG(5, ("Releasing %lu OS pages\n", descriptor->nr_blocks)); + -+/** -+ * other_slots_busy - Determine if any job slots other than @js are currently -+ * running atoms -+ * @kbdev: Device pointer -+ * @js: Job slot -+ * -+ * Return: true if any slots other than @js are busy, false otherwise -+ */ -+static inline bool other_slots_busy(struct kbase_device *kbdev, int js) -+{ -+ int slot; ++ info->num_pages_allocated -= descriptor->nr_blocks; + -+ for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) { -+ if (slot == js) -+ continue; ++ up(&info->mutex); + -+ if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot, -+ KBASE_ATOM_GPU_RB_SUBMITTED)) -+ return true; ++ for (i = 0; i < descriptor->nr_blocks; i++) { ++ DBG_MSG(6, ("Freeing physical page. Address: 0x%08lx\n", descriptor->block_array[i].addr)); ++ if (! descriptor->is_cached) { ++ dma_unmap_page(NULL, descriptor->block_array[i].addr, PAGE_SIZE, DMA_BIDIRECTIONAL); ++ } ++ __free_page(pfn_to_page(descriptor->block_array[i].addr >> PAGE_SHIFT)); + } + -+ return false; ++ vfree(descriptor->block_array); +} + -+static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev) -+{ -+ return kbdev->protected_mode; -+} + -+static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) ++static u32 os_stat(struct ump_memory_backend *backend) +{ -+ int err = -EINVAL; ++ os_allocator *info; ++ info = (os_allocator *)backend->ctx; ++ return info->num_pages_allocated * _MALI_OSK_MALI_PAGE_SIZE; ++} +diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_os.h b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_os.h +new file mode 100644 +index 000000000..d21d50351 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_memory_backend_os.h +@@ -0,0 +1,23 @@ ++/* ++ * Copyright (C) 2010, 2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++/** ++ * @file ump_kernel_memory_backend_os.h ++ */ + -+ WARN_ONCE(!kbdev->protected_ops, -+ "Cannot enter protected mode: protected callbacks not specified.\n"); ++#ifndef __UMP_KERNEL_MEMORY_BACKEND_OS_H__ ++#define __UMP_KERNEL_MEMORY_BACKEND_OS_H__ + -+ /* -+ * When entering into protected mode, we must ensure that the -+ * GPU is not operating in coherent mode as well. This is to -+ * ensure that no protected memory can be leaked. -+ */ -+ if (kbdev->system_coherency == COHERENCY_ACE) -+ kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE); ++#include "ump_kernel_memory_backend.h" + -+ if (kbdev->protected_ops) { -+ /* Switch GPU to protected mode */ -+ err = kbdev->protected_ops->protected_mode_enable( -+ kbdev->protected_dev); ++ump_memory_backend *ump_os_memory_backend_create(const int max_allocation); + -+ if (err) -+ dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n", -+ err); -+ else -+ kbdev->protected_mode = true; -+ } ++#endif /* __UMP_KERNEL_MEMORY_BACKEND_OS_H__ */ + -+ return err; -+} +diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_kernel_random_mapping.c b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_random_mapping.c +new file mode 100644 +index 000000000..6be0f8644 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_random_mapping.c +@@ -0,0 +1,222 @@ ++/* ++ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++#include "mali_kernel_common.h" ++#include "mali_osk.h" ++#include "ump_osk.h" ++#include "ump_kernel_common.h" ++#include "ump_kernel_types.h" ++#include "ump_kernel_random_mapping.h" + -+ WARN_ONCE(!kbdev->protected_ops, -+ "Cannot exit protected mode: protected callbacks not specified.\n"); ++#include ++#include ++#include ++#include + -+ if (!kbdev->protected_ops) -+ return -EINVAL; + -+ /* The protected mode disable callback will be called as part of reset -+ */ -+ kbase_reset_gpu_silent(kbdev); ++static ump_dd_mem *search(struct rb_root *root, int id) ++{ ++ struct rb_node *node = root->rb_node; + -+ return 0; ++ while (node) { ++ ump_dd_mem *e = container_of(node, ump_dd_mem, node); ++ ++ if (id < e->secure_id) { ++ node = node->rb_left; ++ } else if (id > e->secure_id) { ++ node = node->rb_right; ++ } else { ++ return e; ++ } ++ } ++ ++ return NULL; +} + -+static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, -+ struct kbase_jd_atom **katom, int idx, int js) ++static mali_bool insert(struct rb_root *root, int id, ump_dd_mem *mem) +{ -+ int err = 0; -+ -+ switch (katom[idx]->protected_state.enter) { -+ case KBASE_ATOM_ENTER_PROTECTED_CHECK: -+ KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev); -+ /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV -+ * should ensure that we are not already transitiong, and that -+ * there are no atoms currently on the GPU. */ -+ WARN_ON(kbdev->protected_mode_transition); -+ WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); ++ struct rb_node **new = &(root->rb_node); ++ struct rb_node *parent = NULL; + -+ kbdev->protected_mode_transition = true; -+ katom[idx]->protected_state.enter = -+ KBASE_ATOM_ENTER_PROTECTED_VINSTR; ++ while (*new) { ++ ump_dd_mem *this = container_of(*new, ump_dd_mem, node); + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_ENTER_PROTECTED_VINSTR: -+ if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) { -+ /* -+ * We can't switch now because -+ * the vinstr core state switch -+ * is not done yet. -+ */ -+ return -EAGAIN; ++ parent = *new; ++ if (id < this->secure_id) { ++ new = &((*new)->rb_left); ++ } else if (id > this->secure_id) { ++ new = &((*new)->rb_right); ++ } else { ++ printk(KERN_ERR "UMP: ID already used %x\n", id); ++ return MALI_FALSE; + } ++ } + -+ /* Use generic model for IPA in protected mode */ -+ kbase_ipa_model_use_fallback_locked(kbdev); ++ rb_link_node(&mem->node, parent, new); ++ rb_insert_color(&mem->node, root); + -+ /* Once reaching this point GPU must be -+ * switched to protected mode or vinstr -+ * re-enabled. */ ++ return MALI_TRUE; ++} + -+ /* -+ * Not in correct mode, begin protected mode switch. -+ * Entering protected mode requires us to power down the L2, -+ * and drop out of fully coherent mode. -+ */ -+ katom[idx]->protected_state.enter = -+ KBASE_ATOM_ENTER_PROTECTED_IDLE_L2; + -+ kbase_pm_update_cores_state_nolock(kbdev); ++ump_random_mapping *ump_random_mapping_create(void) ++{ ++ ump_random_mapping *map = _mali_osk_calloc(1, sizeof(ump_random_mapping)); + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: -+ /* Avoid unnecessary waiting on non-ACE platforms. */ -+ if (kbdev->current_gpu_coherency_mode == COHERENCY_ACE) { -+ if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) || -+ kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) { -+ /* -+ * The L2 is still powered, wait for all the users to -+ * finish with it before doing the actual reset. -+ */ -+ return -EAGAIN; -+ } -+ } ++ if (NULL == map) ++ return NULL; + -+ katom[idx]->protected_state.enter = -+ KBASE_ATOM_ENTER_PROTECTED_FINISHED; ++ map->lock = _mali_osk_mutex_rw_init(_MALI_OSK_LOCKFLAG_ORDERED, ++ _MALI_OSK_LOCK_ORDER_DESCRIPTOR_MAP); ++ if (NULL != map->lock) { ++ map->root = RB_ROOT; ++#if UMP_RANDOM_MAP_DELAY ++ map->failed.count = 0; ++ map->failed.timestamp = jiffies; ++#endif ++ return map; ++ } ++ return NULL; ++} + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_ENTER_PROTECTED_FINISHED: ++void ump_random_mapping_destroy(ump_random_mapping *map) ++{ ++ _mali_osk_mutex_rw_term(map->lock); ++ _mali_osk_free(map); ++} + -+ /* No jobs running, so we can switch GPU mode right now. */ -+ err = kbase_gpu_protected_mode_enter(kbdev); ++int ump_random_mapping_insert(ump_random_mapping *map, ump_dd_mem *mem) ++{ ++ _mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RW); + -+ /* -+ * Regardless of result, we are no longer transitioning -+ * the GPU. -+ */ -+ kbdev->protected_mode_transition = false; -+ KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev); -+ if (err) { -+ /* -+ * Failed to switch into protected mode, resume -+ * vinstr core and fail atom. -+ */ -+ kbase_vinstr_resume(kbdev->vinstr_ctx); -+ katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; -+ kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); -+ /* Only return if head atom or previous atom -+ * already removed - as atoms must be returned -+ * in order. */ -+ if (idx == 0 || katom[0]->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { -+ kbase_gpu_dequeue_atom(kbdev, js, NULL); -+ kbase_jm_return_atom_to_js(kbdev, katom[idx]); -+ } ++ while (1) { ++ u32 id; + -+ /* Go back to configured model for IPA */ -+ kbase_ipa_model_use_configured_locked(kbdev); ++ get_random_bytes(&id, sizeof(id)); + -+ return -EINVAL; -+ } ++ /* Try a new random number if id happened to be the invalid ++ * secure ID (-1). */ ++ if (unlikely(id == UMP_INVALID_SECURE_ID)) ++ continue; + -+ /* Protected mode sanity checks. */ -+ KBASE_DEBUG_ASSERT_MSG( -+ kbase_jd_katom_is_protected(katom[idx]) == -+ kbase_gpu_in_protected_mode(kbdev), -+ "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", -+ kbase_jd_katom_is_protected(katom[idx]), -+ kbase_gpu_in_protected_mode(kbdev)); -+ katom[idx]->gpu_rb_state = -+ KBASE_ATOM_GPU_RB_READY; ++ /* Insert into the tree. If the id was already in use, get a ++ * new random id and try again. */ ++ if (insert(&map->root, id, mem)) { ++ mem->secure_id = id; ++ break; ++ } + } ++ _mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RW); + + return 0; +} + -+static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, -+ struct kbase_jd_atom **katom, int idx, int js) ++ump_dd_mem *ump_random_mapping_get(ump_random_mapping *map, int id) +{ -+ int err = 0; -+ ++ ump_dd_mem *mem = NULL; ++#if UMP_RANDOM_MAP_DELAY ++ int do_delay = 0; ++#endif + -+ switch (katom[idx]->protected_state.exit) { -+ case KBASE_ATOM_EXIT_PROTECTED_CHECK: -+ KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev); -+ /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV -+ * should ensure that we are not already transitiong, and that -+ * there are no atoms currently on the GPU. */ -+ WARN_ON(kbdev->protected_mode_transition); -+ WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); ++ DEBUG_ASSERT(map); + -+ /* -+ * Exiting protected mode requires a reset, but first the L2 -+ * needs to be powered down to ensure it's not active when the -+ * reset is issued. -+ */ -+ katom[idx]->protected_state.exit = -+ KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; ++ _mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RO); ++ mem = search(&map->root, id); + -+ kbdev->protected_mode_transition = true; -+ kbase_pm_update_cores_state_nolock(kbdev); ++ if (unlikely(NULL == mem)) { ++#if UMP_RANDOM_MAP_DELAY ++ map->failed.count++; + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: -+ if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) || -+ kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) { -+ /* -+ * The L2 is still powered, wait for all the users to -+ * finish with it before doing the actual reset. -+ */ -+ return -EAGAIN; ++ if (time_is_before_jiffies(map->failed.timestamp + ++ UMP_FAILED_LOOKUP_DELAY * HZ)) { ++ /* If it is a long time since last failure, reset ++ * the counter and skip the delay this time. */ ++ map->failed.count = 0; ++ } else if (map->failed.count > UMP_FAILED_LOOKUPS_ALLOWED) { ++ do_delay = 1; + } -+ katom[idx]->protected_state.exit = -+ KBASE_ATOM_EXIT_PROTECTED_RESET; + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_EXIT_PROTECTED_RESET: -+ /* Issue the reset to the GPU */ -+ err = kbase_gpu_protected_mode_reset(kbdev); -+ -+ if (err) { -+ kbdev->protected_mode_transition = false; -+ -+ /* Failed to exit protected mode, fail atom */ -+ katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; -+ kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); -+ /* Only return if head atom or previous atom -+ * already removed - as atoms must be returned -+ * in order */ -+ if (idx == 0 || katom[0]->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { -+ kbase_gpu_dequeue_atom(kbdev, js, NULL); -+ kbase_jm_return_atom_to_js(kbdev, katom[idx]); -+ } ++ map->failed.timestamp = jiffies; ++#endif /* UMP_RANDOM_MAP_DELAY */ ++ } else { ++ ump_dd_reference_add(mem); ++ } ++ _mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RO); + -+ kbase_vinstr_resume(kbdev->vinstr_ctx); ++#if UMP_RANDOM_MAP_DELAY ++ if (do_delay) { ++ /* Apply delay */ ++ schedule_timeout_killable(UMP_FAILED_LOOKUP_DELAY); ++ } ++#endif /* UMP_RANDOM_MAP_DELAY */ + -+ /* Use generic model for IPA in protected mode */ -+ kbase_ipa_model_use_fallback_locked(kbdev); ++ return mem; ++} + -+ return -EINVAL; -+ } ++static ump_dd_mem *ump_random_mapping_remove_internal(ump_random_mapping *map, int id) ++{ ++ ump_dd_mem *mem = NULL; + -+ katom[idx]->protected_state.exit = -+ KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; ++ mem = search(&map->root, id); + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: -+ /* A GPU reset is issued when exiting protected mode. Once the -+ * reset is done all atoms' state will also be reset. For this -+ * reason, if the atom is still in this state we can safely -+ * say that the reset has not completed i.e., we have not -+ * finished exiting protected mode yet. -+ */ -+ return -EAGAIN; ++ if (mem) { ++ rb_erase(&mem->node, &map->root); + } + -+ return 0; ++ return mem; +} + -+void kbase_backend_slot_update(struct kbase_device *kbdev) ++void ump_random_mapping_put(ump_dd_mem *mem) +{ -+ int js; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ int new_ref; + -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ struct kbase_jd_atom *katom[2]; -+ int idx; ++ _mali_osk_mutex_rw_wait(device.secure_id_map->lock, _MALI_OSK_LOCKMODE_RW); + -+ katom[0] = kbase_gpu_inspect(kbdev, js, 0); -+ katom[1] = kbase_gpu_inspect(kbdev, js, 1); -+ WARN_ON(katom[1] && !katom[0]); ++ new_ref = _ump_osk_atomic_dec_and_read(&mem->ref_count); ++ DBG_MSG(5, ("Memory reference decremented. ID: %u, new value: %d\n", ++ mem->secure_id, new_ref)); + -+ for (idx = 0; idx < SLOT_RB_SIZE; idx++) { -+ bool cores_ready; -+ int ret; ++ if (0 == new_ref) { ++ DBG_MSG(3, ("Final release of memory. ID: %u\n", mem->secure_id)); + -+ if (!katom[idx]) -+ continue; ++#ifdef CONFIG_DMA_SHARED_BUFFER ++ if (mem->import_attach) { ++ struct dma_buf_attachment *attach = mem->import_attach; ++ struct dma_buf *dma_buf; + -+ switch (katom[idx]->gpu_rb_state) { -+ case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: -+ /* Should be impossible */ -+ WARN(1, "Attempting to update atom not in ringbuffer\n"); -+ break; ++ if (mem->sgt) ++ dma_buf_unmap_attachment(attach, mem->sgt, ++ DMA_BIDIRECTIONAL); + -+ case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: -+ if (katom[idx]->atom_flags & -+ KBASE_KATOM_FLAG_X_DEP_BLOCKED) -+ break; ++ dma_buf = attach->dmabuf; ++ dma_buf_detach(attach->dmabuf, attach); ++ dma_buf_put(dma_buf); + -+ katom[idx]->gpu_rb_state = -+ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV; ++ } ++#endif ++ ump_random_mapping_remove_internal(device.secure_id_map, mem->secure_id); + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: -+ if (kbase_gpu_check_secure_atoms(kbdev, -+ !kbase_jd_katom_is_protected( -+ katom[idx]))) -+ break; ++ mem->release_func(mem->ctx, mem); ++ _mali_osk_free(mem); ++ } + -+ if ((idx == 1) && (kbase_jd_katom_is_protected( -+ katom[0]) != -+ kbase_jd_katom_is_protected( -+ katom[1]))) -+ break; ++ _mali_osk_mutex_rw_signal(device.secure_id_map->lock, _MALI_OSK_LOCKMODE_RW); ++} + -+ if (kbdev->protected_mode_transition) -+ break; ++ump_dd_mem *ump_random_mapping_remove(ump_random_mapping *map, int descriptor) ++{ ++ ump_dd_mem *mem; + -+ katom[idx]->gpu_rb_state = -+ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION; ++ _mali_osk_mutex_rw_wait(map->lock, _MALI_OSK_LOCKMODE_RW); ++ mem = ump_random_mapping_remove_internal(map, descriptor); ++ _mali_osk_mutex_rw_signal(map->lock, _MALI_OSK_LOCKMODE_RW); + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: ++ return mem; ++} +diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_kernel_random_mapping.h b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_random_mapping.h +new file mode 100644 +index 000000000..2cea6cedc +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/linux/ump_kernel_random_mapping.h +@@ -0,0 +1,84 @@ ++/* ++ * Copyright (C) 2010-2011, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ /* -+ * Exiting protected mode must be done before -+ * the references on the cores are taken as -+ * a power down the L2 is required which -+ * can't happen after the references for this -+ * atom are taken. -+ */ ++/** ++ * @file ump_kernel_random_mapping.h ++ */ + -+ if (!kbase_gpu_in_protected_mode(kbdev) && -+ kbase_jd_katom_is_protected(katom[idx])) { -+ /* Atom needs to transition into protected mode. */ -+ ret = kbase_jm_enter_protected_mode(kbdev, -+ katom, idx, js); -+ if (ret) -+ break; -+ } else if (kbase_gpu_in_protected_mode(kbdev) && -+ !kbase_jd_katom_is_protected(katom[idx])) { -+ /* Atom needs to transition out of protected mode. */ -+ ret = kbase_jm_exit_protected_mode(kbdev, -+ katom, idx, js); -+ if (ret) -+ break; -+ } -+ katom[idx]->protected_state.exit = -+ KBASE_ATOM_EXIT_PROTECTED_CHECK; ++#ifndef __UMP_KERNEL_RANDOM_MAPPING_H__ ++#define __UMP_KERNEL_RANDOM_MAPPING_H__ + -+ /* Atom needs no protected mode transition. */ ++#include "mali_osk.h" ++#include + -+ katom[idx]->gpu_rb_state = -+ KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; ++#define UMP_RANDOM_MAP_DELAY 1 ++#define UMP_FAILED_LOOKUP_DELAY 10 /* ms */ ++#define UMP_FAILED_LOOKUPS_ALLOWED 10 /* number of allowed failed lookups */ + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: -+ if (katom[idx]->will_fail_event_code) { -+ kbase_gpu_mark_atom_for_return(kbdev, -+ katom[idx]); -+ /* Set EVENT_DONE so this atom will be -+ completed, not unpulled. */ -+ katom[idx]->event_code = -+ BASE_JD_EVENT_DONE; -+ /* Only return if head atom or previous -+ * atom already removed - as atoms must -+ * be returned in order. */ -+ if (idx == 0 || katom[0]->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { -+ kbase_gpu_dequeue_atom(kbdev, js, NULL); -+ kbase_jm_return_atom_to_js(kbdev, katom[idx]); -+ } -+ break; -+ } ++/** ++ * The random mapping object ++ * Provides a separate namespace where we can map an integer to a pointer ++ */ ++typedef struct ump_random_mapping { ++ _mali_osk_mutex_rw_t *lock; /**< Lock protecting access to the mapping object */ ++ struct rb_root root; ++#if UMP_RANDOM_MAP_DELAY ++ struct { ++ unsigned long count; ++ unsigned long timestamp; ++ } failed; ++#endif ++} ump_random_mapping; + -+ cores_ready = -+ kbasep_js_job_check_ref_cores(kbdev, js, -+ katom[idx]); ++/** ++ * Create a random mapping object ++ * Create a random mapping capable of holding 2^20 entries ++ * @return Pointer to a random mapping object, NULL on failure ++ */ ++ump_random_mapping *ump_random_mapping_create(void); + -+ if (katom[idx]->event_code == -+ BASE_JD_EVENT_PM_EVENT) { -+ katom[idx]->gpu_rb_state = -+ KBASE_ATOM_GPU_RB_RETURN_TO_JS; -+ break; -+ } ++/** ++ * Destroy a random mapping object ++ * @param map The map to free ++ */ ++void ump_random_mapping_destroy(ump_random_mapping *map); + -+ if (!cores_ready) -+ break; ++/** ++ * Allocate a new mapping entry (random ID) ++ * Allocates a new entry in the map. ++ * @param map The map to allocate a new entry in ++ * @param target The value to map to ++ * @return The random allocated, a negative value on error ++ */ ++int ump_random_mapping_insert(ump_random_mapping *map, ump_dd_mem *mem); + -+ kbase_js_affinity_retain_slot_cores(kbdev, js, -+ katom[idx]->affinity); -+ katom[idx]->gpu_rb_state = -+ KBASE_ATOM_GPU_RB_WAITING_AFFINITY; ++/** ++ * Get the value mapped to by a random ID ++ * ++ * If the lookup fails, punish the calling thread by applying a delay. ++ * ++ * @param map The map to lookup the random id in ++ * @param id The ID to lookup ++ * @param target Pointer to a pointer which will receive the stored value ++ * @return ump_dd_mem pointer on successful lookup, NULL on error ++ */ ++ump_dd_mem *ump_random_mapping_get(ump_random_mapping *map, int id); + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_GPU_RB_WAITING_AFFINITY: -+ if (!kbase_gpu_rmu_workaround(kbdev, js)) -+ break; ++void ump_random_mapping_put(ump_dd_mem *mem); + -+ katom[idx]->gpu_rb_state = -+ KBASE_ATOM_GPU_RB_READY; ++/** ++ * Free the random ID ++ * For the random to be reused it has to be freed ++ * @param map The map to free the random from ++ * @param id The ID to free ++ */ ++ump_dd_mem *ump_random_mapping_remove(ump_random_mapping *map, int id); + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_GPU_RB_READY: ++#endif /* __UMP_KERNEL_RANDOM_MAPPING_H__ */ +diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_memory_backend.c b/drivers/gpu/arm/mali400/ump/linux/ump_memory_backend.c +new file mode 100644 +index 000000000..e41931e1e +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/linux/ump_memory_backend.c +@@ -0,0 +1,65 @@ ++/* ++ * Copyright (C) 2010, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ if (idx == 1) { -+ /* Only submit if head atom or previous -+ * atom already submitted */ -+ if ((katom[0]->gpu_rb_state != -+ KBASE_ATOM_GPU_RB_SUBMITTED && -+ katom[0]->gpu_rb_state != -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) -+ break; ++#include /* kernel module definitions */ ++#include /* request_mem_region */ + -+ /* If intra-slot serialization in use -+ * then don't submit atom to NEXT slot -+ */ -+ if (kbdev->serialize_jobs & -+ KBASE_SERIALIZE_INTRA_SLOT) -+ break; -+ } ++#include "arch/config.h" /* Configuration for current platform. The symlink for arch is set by Makefile */ + -+ /* If inter-slot serialization in use then don't -+ * submit atom if any other slots are in use */ -+ if ((kbdev->serialize_jobs & -+ KBASE_SERIALIZE_INTER_SLOT) && -+ other_slots_busy(kbdev, js)) -+ break; ++#include "ump_osk.h" ++#include "ump_kernel_common.h" ++#include "ump_kernel_memory_backend_os.h" ++#include "ump_kernel_memory_backend_dedicated.h" + -+ if ((kbdev->serialize_jobs & -+ KBASE_SERIALIZE_RESET) && -+ kbase_reset_gpu_active(kbdev)) -+ break; ++/* Configure which dynamic memory allocator to use */ ++int ump_backend = ARCH_UMP_BACKEND_DEFAULT; ++module_param(ump_backend, int, S_IRUGO); /* r--r--r-- */ ++MODULE_PARM_DESC(ump_backend, "0 = dedicated memory backend (default), 1 = OS memory backend"); + -+ /* Check if this job needs the cycle counter -+ * enabled before submission */ -+ if (katom[idx]->core_req & BASE_JD_REQ_PERMON) -+ kbase_pm_request_gpu_cycle_counter_l2_is_on( -+ kbdev); ++/* The base address of the memory block for the dedicated memory backend */ ++unsigned int ump_memory_address = ARCH_UMP_MEMORY_ADDRESS_DEFAULT; ++module_param(ump_memory_address, uint, S_IRUGO); /* r--r--r-- */ ++MODULE_PARM_DESC(ump_memory_address, "The physical address to map for the dedicated memory backend"); + -+ kbase_job_hw_submit(kbdev, katom[idx], js); -+ katom[idx]->gpu_rb_state = -+ KBASE_ATOM_GPU_RB_SUBMITTED; ++/* The size of the memory block for the dedicated memory backend */ ++unsigned int ump_memory_size = ARCH_UMP_MEMORY_SIZE_DEFAULT; ++module_param(ump_memory_size, uint, S_IRUGO); /* r--r--r-- */ ++MODULE_PARM_DESC(ump_memory_size, "The size of fixed memory to map in the dedicated memory backend"); + -+ /* Inform power management at start/finish of -+ * atom so it can update its GPU utilisation -+ * metrics. */ -+ kbase_pm_metrics_update(kbdev, -+ &katom[idx]->start_timestamp); ++ump_memory_backend *ump_memory_backend_create(void) ++{ ++ ump_memory_backend *backend = NULL; + -+ /* ***TRANSITION TO HIGHER STATE*** */ -+ /* fallthrough */ -+ case KBASE_ATOM_GPU_RB_SUBMITTED: -+ /* Atom submitted to HW, nothing else to do */ -+ break; ++ /* Create the dynamic memory allocator backend */ ++ if (0 == ump_backend) { ++ DBG_MSG(2, ("Using dedicated memory backend\n")); + -+ case KBASE_ATOM_GPU_RB_RETURN_TO_JS: -+ /* Only return if head atom or previous atom -+ * already removed - as atoms must be returned -+ * in order */ -+ if (idx == 0 || katom[0]->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { -+ kbase_gpu_dequeue_atom(kbdev, js, NULL); -+ kbase_jm_return_atom_to_js(kbdev, -+ katom[idx]); -+ } -+ break; -+ } ++ DBG_MSG(2, ("Requesting dedicated memory: 0x%08x, size: %u\n", ump_memory_address, ump_memory_size)); ++ /* Ask the OS if we can use the specified physical memory */ ++ if (NULL == request_mem_region(ump_memory_address, ump_memory_size, "UMP Memory")) { ++ MSG_ERR(("Failed to request memory region (0x%08X - 0x%08X). Is Mali DD already loaded?\n", ump_memory_address, ump_memory_address + ump_memory_size - 1)); ++ return NULL; + } ++ backend = ump_block_allocator_create(ump_memory_address, ump_memory_size); ++ } else if (1 == ump_backend) { ++ DBG_MSG(2, ("Using OS memory backend, allocation limit: %d\n", ump_memory_size)); ++ backend = ump_os_memory_backend_create(ump_memory_size); + } + -+ /* Warn if PRLAM-8987 affinity restrictions are violated */ -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) -+ WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) || -+ kbase_gpu_atoms_submitted(kbdev, 1)) && -+ kbase_gpu_atoms_submitted(kbdev, 2)); ++ return backend; +} + -+ -+void kbase_backend_run_atom(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom) ++void ump_memory_backend_destroy(void) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ kbase_gpu_enqueue_atom(kbdev, katom); -+ kbase_backend_slot_update(kbdev); ++ if (0 == ump_backend) { ++ DBG_MSG(2, ("Releasing dedicated memory: 0x%08x\n", ump_memory_address)); ++ release_mem_region(ump_memory_address, ump_memory_size); ++ } +} +diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_osk_atomics.c b/drivers/gpu/arm/mali400/ump/linux/ump_osk_atomics.c +new file mode 100644 +index 000000000..2b634ba79 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/linux/ump_osk_atomics.c +@@ -0,0 +1,27 @@ ++/* ++ * Copyright (C) 2010, 2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+#define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \ -+ (KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER)) ++/** ++ * @file ump_osk_atomics.c ++ * Implementation of the OS abstraction layer for the UMP kernel device driver ++ */ + -+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js) ++#include "ump_osk.h" ++#include ++ ++int _ump_osk_atomic_dec_and_read(_mali_osk_atomic_t *atom) +{ -+ struct kbase_jd_atom *katom; -+ struct kbase_jd_atom *next_katom; ++ return atomic_dec_return((atomic_t *)&atom->u.val); ++} + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++int _ump_osk_atomic_inc_and_read(_mali_osk_atomic_t *atom) ++{ ++ return atomic_inc_return((atomic_t *)&atom->u.val); ++} +diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_osk_low_level_mem.c b/drivers/gpu/arm/mali400/ump/linux/ump_osk_low_level_mem.c +new file mode 100644 +index 000000000..e08bf2525 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/linux/ump_osk_low_level_mem.c +@@ -0,0 +1,314 @@ ++/* ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ katom = kbase_gpu_inspect(kbdev, js, 0); -+ next_katom = kbase_gpu_inspect(kbdev, js, 1); ++/** ++ * @file ump_osk_memory.c ++ * Implementation of the OS abstraction layer for the kernel device driver ++ */ + -+ if (next_katom && katom->kctx == next_katom->kctx && -+ next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED && -+ HAS_DEP(next_katom) && -+ (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL) -+ != 0 || -+ kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL) -+ != 0)) { -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), -+ JS_COMMAND_NOP, NULL); -+ next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; ++/* needed to detect kernel version specific code */ ++#include + -+ KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom, -+ &kbdev->gpu_props.props.raw_props.js_features -+ [katom->slot_nr]); -+ KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as -+ [katom->kctx->as_nr]); -+ KBASE_TLSTREAM_TL_NRET_CTX_LPU(katom->kctx, -+ &kbdev->gpu_props.props.raw_props.js_features -+ [katom->slot_nr]); ++#include "ump_osk.h" ++#include "ump_uk_types.h" ++#include "ump_ukk.h" ++#include "ump_kernel_common.h" ++#include /* kernel module definitions */ ++#include ++#include ++#include + -+ return true; -+ } ++#include ++#include /* to verify pointers from user space */ ++#include ++#include + -+ return false; ++typedef struct ump_vma_usage_tracker { ++ atomic_t references; ++ ump_memory_allocation *descriptor; ++} ump_vma_usage_tracker; ++ ++static void ump_vma_open(struct vm_area_struct *vma); ++static void ump_vma_close(struct vm_area_struct *vma); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) ++static int ump_cpu_page_fault_handler(struct vm_area_struct *vma, struct vm_fault *vmf); ++#else ++static unsigned long ump_cpu_page_fault_handler(struct vm_area_struct *vma, unsigned long address); ++#endif ++ ++static struct vm_operations_struct ump_vm_ops = { ++ .open = ump_vma_open, ++ .close = ump_vma_close, ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) ++ .fault = ump_cpu_page_fault_handler ++#else ++ .nopfn = ump_cpu_page_fault_handler ++#endif ++}; ++ ++/* ++ * Page fault for VMA region ++ * This should never happen since we always map in the entire virtual memory range. ++ */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) ++static int ump_cpu_page_fault_handler(struct vm_area_struct *vma, struct vm_fault *vmf) ++#else ++static unsigned long ump_cpu_page_fault_handler(struct vm_area_struct *vma, unsigned long address) ++#endif ++{ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) ++ void __user *address; ++ address = vmf->virtual_address; ++#endif ++ MSG_ERR(("Page-fault in UMP memory region caused by the CPU\n")); ++ MSG_ERR(("VMA: 0x%08lx, virtual address: 0x%08lx\n", (unsigned long)vma, address)); ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) ++ return VM_FAULT_SIGBUS; ++#else ++ return NOPFN_SIGBUS; ++#endif +} + -+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, -+ u32 completion_code, -+ u64 job_tail, -+ ktime_t *end_timestamp) ++static void ump_vma_open(struct vm_area_struct *vma) +{ -+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); -+ struct kbase_context *kctx = katom->kctx; ++ ump_vma_usage_tracker *vma_usage_tracker; ++ int new_val; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ vma_usage_tracker = (ump_vma_usage_tracker *)vma->vm_private_data; ++ BUG_ON(NULL == vma_usage_tracker); + -+ /* -+ * When a hard-stop is followed close after a soft-stop, the completion -+ * code may be set to STOPPED, even though the job is terminated -+ */ -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8438)) { -+ if (completion_code == BASE_JD_EVENT_STOPPED && -+ (katom->atom_flags & -+ KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) { -+ completion_code = BASE_JD_EVENT_TERMINATED; -+ } -+ } ++ new_val = atomic_inc_return(&vma_usage_tracker->references); + -+ if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) || (katom->core_req & -+ BASE_JD_REQ_SKIP_CACHE_END)) && -+ completion_code != BASE_JD_EVENT_DONE && -+ !(completion_code & BASE_JD_SW_EVENT)) { -+ /* When a job chain fails, on a T60x or when -+ * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not -+ * flushed. To prevent future evictions causing possible memory -+ * corruption we need to flush the cache manually before any -+ * affected memory gets reused. */ -+ katom->need_cache_flush_cores_retained = katom->affinity; -+ kbase_pm_request_cores(kbdev, false, katom->affinity); -+ } else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) { -+ if (kbdev->gpu_props.num_core_groups > 1 && -+ !(katom->affinity & -+ kbdev->gpu_props.props.coherency_info.group[0].core_mask -+ ) && -+ (katom->affinity & -+ kbdev->gpu_props.props.coherency_info.group[1].core_mask -+ )) { -+ dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n"); -+ katom->need_cache_flush_cores_retained = -+ katom->affinity; -+ kbase_pm_request_cores(kbdev, false, -+ katom->affinity); -+ } -+ } ++ DBG_MSG(4, ("VMA open, VMA reference count incremented. VMA: 0x%08lx, reference count: %d\n", (unsigned long)vma, new_val)); ++} + -+ katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); -+ kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0); ++static void ump_vma_close(struct vm_area_struct *vma) ++{ ++ ump_vma_usage_tracker *vma_usage_tracker; ++ _ump_uk_unmap_mem_s args; ++ int new_val; + -+ if (completion_code == BASE_JD_EVENT_STOPPED) { -+ struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, -+ 0); ++ vma_usage_tracker = (ump_vma_usage_tracker *)vma->vm_private_data; ++ BUG_ON(NULL == vma_usage_tracker); + -+ /* -+ * Dequeue next atom from ringbuffers on same slot if required. -+ * This atom will already have been removed from the NEXT -+ * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that -+ * the atoms on this slot are returned in the correct order. -+ */ -+ if (next_katom && katom->kctx == next_katom->kctx && -+ next_katom->sched_priority == -+ katom->sched_priority) { -+ kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); -+ kbase_jm_return_atom_to_js(kbdev, next_katom); -+ } -+ } else if (completion_code != BASE_JD_EVENT_DONE) { -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; -+ int i; ++ new_val = atomic_dec_return(&vma_usage_tracker->references); + -+#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0 -+ KBASE_TRACE_DUMP(kbdev); -+#endif -+ kbasep_js_clear_submit_allowed(js_devdata, katom->kctx); ++ DBG_MSG(4, ("VMA close, VMA reference count decremented. VMA: 0x%08lx, reference count: %d\n", (unsigned long)vma, new_val)); + -+ /* -+ * Remove all atoms on the same context from ringbuffers. This -+ * will not remove atoms that are already on the GPU, as these -+ * are guaranteed not to have fail dependencies on the failed -+ * atom. -+ */ -+ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) { -+ struct kbase_jd_atom *katom_idx0 = -+ kbase_gpu_inspect(kbdev, i, 0); -+ struct kbase_jd_atom *katom_idx1 = -+ kbase_gpu_inspect(kbdev, i, 1); ++ if (0 == new_val) { ++ ump_memory_allocation *descriptor; + -+ if (katom_idx0 && katom_idx0->kctx == katom->kctx && -+ HAS_DEP(katom_idx0) && -+ katom_idx0->gpu_rb_state != -+ KBASE_ATOM_GPU_RB_SUBMITTED) { -+ /* Dequeue katom_idx0 from ringbuffer */ -+ kbase_gpu_dequeue_atom(kbdev, i, end_timestamp); ++ descriptor = vma_usage_tracker->descriptor; + -+ if (katom_idx1 && -+ katom_idx1->kctx == katom->kctx -+ && HAS_DEP(katom_idx1) && -+ katom_idx0->gpu_rb_state != -+ KBASE_ATOM_GPU_RB_SUBMITTED) { -+ /* Dequeue katom_idx1 from ringbuffer */ -+ kbase_gpu_dequeue_atom(kbdev, i, -+ end_timestamp); ++ args.ctx = descriptor->ump_session; ++ args.cookie = descriptor->cookie; ++ args.mapping = descriptor->mapping; ++ args.size = descriptor->size; + -+ katom_idx1->event_code = -+ BASE_JD_EVENT_STOPPED; -+ kbase_jm_return_atom_to_js(kbdev, -+ katom_idx1); -+ } -+ katom_idx0->event_code = BASE_JD_EVENT_STOPPED; -+ kbase_jm_return_atom_to_js(kbdev, katom_idx0); ++ args._ukk_private = NULL; /** @note unused */ + -+ } else if (katom_idx1 && -+ katom_idx1->kctx == katom->kctx && -+ HAS_DEP(katom_idx1) && -+ katom_idx1->gpu_rb_state != -+ KBASE_ATOM_GPU_RB_SUBMITTED) { -+ /* Can not dequeue this atom yet - will be -+ * dequeued when atom at idx0 completes */ -+ katom_idx1->event_code = BASE_JD_EVENT_STOPPED; -+ kbase_gpu_mark_atom_for_return(kbdev, -+ katom_idx1); -+ } -+ } ++ DBG_MSG(4, ("No more VMA references left, releasing UMP memory\n")); ++ _ump_ukk_unmap_mem(& args); ++ ++ /* vma_usage_tracker is free()d by _ump_osk_mem_mapregion_term() */ + } ++} + -+ KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc, -+ js, completion_code); ++_mali_osk_errcode_t _ump_osk_mem_mapregion_init(ump_memory_allocation *descriptor) ++{ ++ ump_vma_usage_tracker *vma_usage_tracker; ++ struct vm_area_struct *vma; + -+ if (job_tail != 0 && job_tail != katom->jc) { -+ bool was_updated = (job_tail != katom->jc); ++ if (NULL == descriptor) return _MALI_OSK_ERR_FAULT; + -+ /* Some of the job has been executed, so we update the job chain -+ * address to where we should resume from */ -+ katom->jc = job_tail; -+ if (was_updated) -+ KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx, -+ katom, job_tail, js); ++ vma_usage_tracker = kmalloc(sizeof(ump_vma_usage_tracker), GFP_KERNEL); ++ if (NULL == vma_usage_tracker) { ++ DBG_MSG(1, ("Failed to allocate memory for ump_vma_usage_tracker in _mali_osk_mem_mapregion_init\n")); ++ return -_MALI_OSK_ERR_FAULT; + } + -+ /* Only update the event code for jobs that weren't cancelled */ -+ if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) -+ katom->event_code = (base_jd_event_code)completion_code; ++ vma = (struct vm_area_struct *)descriptor->process_mapping_info; ++ if (NULL == vma) { ++ kfree(vma_usage_tracker); ++ return _MALI_OSK_ERR_FAULT; ++ } + -+ kbase_device_trace_register_access(kctx, REG_WRITE, -+ JOB_CONTROL_REG(JOB_IRQ_CLEAR), -+ 1 << js); ++ vma->vm_private_data = vma_usage_tracker; ++ vma->vm_flags |= VM_IO; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) ++ vma->vm_flags |= VM_RESERVED; ++#else ++ vma->vm_flags |= VM_DONTDUMP; ++ vma->vm_flags |= VM_DONTEXPAND; ++ vma->vm_flags |= VM_PFNMAP; ++#endif + -+ /* Complete the job, and start new ones -+ * -+ * Also defer remaining work onto the workqueue: -+ * - Re-queue Soft-stopped jobs -+ * - For any other jobs, queue the job back into the dependency system -+ * - Schedule out the parent context if necessary, and schedule a new -+ * one in. -+ */ -+#ifdef CONFIG_GPU_TRACEPOINTS -+ { -+ /* The atom in the HEAD */ -+ struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, -+ 0); + -+ if (next_katom && next_katom->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_SUBMITTED) { -+ char js_string[16]; ++ if (0 == descriptor->is_cached) { ++ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); ++ } ++ DBG_MSG(3, ("Mapping with page_prot: 0x%x\n", vma->vm_page_prot)); + -+ trace_gpu_sched_switch(kbasep_make_job_slot_string(js, -+ js_string, -+ sizeof(js_string)), -+ ktime_to_ns(*end_timestamp), -+ (u32)next_katom->kctx->id, 0, -+ next_katom->work_id); -+ kbdev->hwaccess.backend.slot_rb[js].last_context = -+ next_katom->kctx; -+ } else { -+ char js_string[16]; ++ /* Setup the functions which handle further VMA handling */ ++ vma->vm_ops = &ump_vm_ops; + -+ trace_gpu_sched_switch(kbasep_make_job_slot_string(js, -+ js_string, -+ sizeof(js_string)), -+ ktime_to_ns(ktime_get()), 0, 0, -+ 0); -+ kbdev->hwaccess.backend.slot_rb[js].last_context = 0; -+ } -+ } -+#endif ++ /* Do the va range allocation - in this case, it was done earlier, so we copy in that information */ ++ descriptor->mapping = (void __user *)vma->vm_start; + -+ if (kbdev->serialize_jobs & KBASE_SERIALIZE_RESET) -+ kbase_reset_gpu_silent(kbdev); ++ atomic_set(&vma_usage_tracker->references, 1); /*this can later be increased if process is forked, see ump_vma_open() */ ++ vma_usage_tracker->descriptor = descriptor; + -+ if (completion_code == BASE_JD_EVENT_STOPPED) -+ katom = kbase_jm_return_atom_to_js(kbdev, katom); -+ else -+ katom = kbase_jm_complete(kbdev, katom, end_timestamp); ++ return _MALI_OSK_ERR_OK; ++} + -+ if (katom) { -+ /* Cross-slot dependency has now become runnable. Try to submit -+ * it. */ ++void _ump_osk_mem_mapregion_term(ump_memory_allocation *descriptor) ++{ ++ struct vm_area_struct *vma; ++ ump_vma_usage_tracker *vma_usage_tracker; + -+ /* Check if there are lower priority jobs to soft stop */ -+ kbase_job_slot_ctx_priority_check_locked(kctx, katom); ++ if (NULL == descriptor) return; + -+ kbase_jm_try_kick(kbdev, 1 << katom->slot_nr); -+ } ++ /* Linux does the right thing as part of munmap to remove the mapping ++ * All that remains is that we remove the vma_usage_tracker setup in init() */ ++ vma = (struct vm_area_struct *)descriptor->process_mapping_info; + -+ /* Job completion may have unblocked other atoms. Try to update all job -+ * slots */ -+ kbase_backend_slot_update(kbdev); ++ vma_usage_tracker = vma->vm_private_data; ++ ++ /* We only get called if mem_mapregion_init succeeded */ ++ kfree(vma_usage_tracker); ++ return; +} + -+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) ++_mali_osk_errcode_t _ump_osk_mem_mapregion_map(ump_memory_allocation *descriptor, u32 offset, u32 *phys_addr, unsigned long size) +{ -+ int js; ++ struct vm_area_struct *vma; ++ _mali_osk_errcode_t retval; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (NULL == descriptor) return _MALI_OSK_ERR_FAULT; + -+ /* Reset should always take the GPU out of protected mode */ -+ WARN_ON(kbase_gpu_in_protected_mode(kbdev)); ++ vma = (struct vm_area_struct *)descriptor->process_mapping_info; + -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ int atom_idx = 0; -+ int idx; ++ if (NULL == vma) return _MALI_OSK_ERR_FAULT; + -+ for (idx = 0; idx < SLOT_RB_SIZE; idx++) { -+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, -+ js, atom_idx); -+ bool keep_in_jm_rb = false; ++ retval = remap_pfn_range(vma, ((u32)descriptor->mapping) + offset, (*phys_addr) >> PAGE_SHIFT, size, vma->vm_page_prot) ? _MALI_OSK_ERR_FAULT : _MALI_OSK_ERR_OK;; + -+ if (!katom) -+ break; -+ if (katom->protected_state.exit == -+ KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) -+ { -+ KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev); ++ DBG_MSG(4, ("Mapping virtual to physical memory. ID: %u, vma: 0x%08lx, virtual addr:0x%08lx, physical addr: 0x%08lx, size:%lu, prot:0x%x, vm_flags:0x%x RETVAL: 0x%x\n", ++ ump_dd_secure_id_get(descriptor->handle), ++ (unsigned long)vma, ++ (unsigned long)(vma->vm_start + offset), ++ (unsigned long)*phys_addr, ++ size, ++ (unsigned int)vma->vm_page_prot, vma->vm_flags, retval)); + -+ kbase_vinstr_resume(kbdev->vinstr_ctx); ++ return retval; ++} + -+ /* protected mode sanity checks */ -+ KBASE_DEBUG_ASSERT_MSG( -+ kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev), -+ "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", -+ kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev)); -+ KBASE_DEBUG_ASSERT_MSG( -+ (kbase_jd_katom_is_protected(katom) && js == 0) || -+ !kbase_jd_katom_is_protected(katom), -+ "Protected atom on JS%d not supported", js); ++static void level1_cache_flush_all(void) ++{ ++ DBG_MSG(4, ("UMP[xx] Flushing complete L1 cache\n")); ++ __cpuc_flush_kern_all(); ++} ++ ++void _ump_osk_msync(ump_dd_mem *mem, void *virt, u32 offset, u32 size, ump_uk_msync_op op, ump_session_data *session_data) ++{ ++ int i; ++ ++ /* Flush L1 using virtual address, the entire range in one go. ++ * Only flush if user space process has a valid write mapping on given address. */ ++ if ((mem) && (virt != NULL) && (access_ok(virt, size))) { ++ __cpuc_flush_dcache_area(virt, size); ++ DBG_MSG(3, ("UMP[%02u] Flushing CPU L1 Cache. CPU address: %x, size: %x\n", mem->secure_id, virt, size)); ++ } else { ++ if (session_data) { ++ if (op == _UMP_UK_MSYNC_FLUSH_L1) { ++ DBG_MSG(4, ("UMP Pending L1 cache flushes: %d\n", session_data->has_pending_level1_cache_flush)); ++ session_data->has_pending_level1_cache_flush = 0; ++ level1_cache_flush_all(); ++ return; ++ } else { ++ if (session_data->cache_operations_ongoing) { ++ session_data->has_pending_level1_cache_flush++; ++ DBG_MSG(4, ("UMP[%02u] Defering the L1 flush. Nr pending:%d\n", mem->secure_id, session_data->has_pending_level1_cache_flush)); ++ } else { ++ /* Flushing the L1 cache for each switch_user() if ump_cache_operations_control(START) is not called */ ++ level1_cache_flush_all(); ++ } + } -+ if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) -+ keep_in_jm_rb = true; ++ } else { ++ DBG_MSG(4, ("Unkown state %s %d\n", __FUNCTION__, __LINE__)); ++ level1_cache_flush_all(); ++ } ++ } + -+ kbase_gpu_release_atom(kbdev, katom, NULL); ++ if (NULL == mem) return; + -+ /* -+ * If the atom wasn't on HW when the reset was issued -+ * then leave it in the RB and next time we're kicked -+ * it will be processed again from the starting state. -+ */ -+ if (keep_in_jm_rb) { -+ kbasep_js_job_check_deref_cores(kbdev, katom); -+ katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; -+ katom->affinity = 0; -+ katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; -+ /* As the atom was not removed, increment the -+ * index so that we read the correct atom in the -+ * next iteration. */ -+ atom_idx++; -+ continue; ++ if (mem->size_bytes == size) { ++ DBG_MSG(3, ("UMP[%02u] Flushing CPU L2 Cache\n", mem->secure_id)); ++ } else { ++ DBG_MSG(3, ("UMP[%02u] Flushing CPU L2 Cache. Blocks:%u, TotalSize:%u. FlushSize:%u Offset:0x%x FirstPaddr:0x%08x\n", ++ mem->secure_id, mem->nr_blocks, mem->size_bytes, size, offset, mem->block_array[0].addr)); ++ } ++ ++ ++ /* Flush L2 using physical addresses, block for block. */ ++ for (i = 0 ; i < mem->nr_blocks; i++) { ++ u32 start_p, end_p; ++ ump_dd_physical_block *block; ++ block = &mem->block_array[i]; ++ ++ if (offset >= block->size) { ++ offset -= block->size; ++ continue; ++ } ++ ++ if (offset) { ++ start_p = (u32)block->addr + offset; ++ /* We'll zero the offset later, after using it to calculate end_p. */ ++ } else { ++ start_p = (u32)block->addr; ++ } ++ ++ if (size < block->size - offset) { ++ end_p = start_p + size; ++ size = 0; ++ } else { ++ if (offset) { ++ end_p = start_p + (block->size - offset); ++ size -= block->size - offset; ++ offset = 0; ++ } else { ++ end_p = start_p + block->size; ++ size -= block->size; + } ++ } + -+ /* -+ * The atom was on the HW when the reset was issued -+ * all we can do is fail the atom. -+ */ -+ kbase_gpu_dequeue_atom(kbdev, js, NULL); -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; -+ kbase_jm_complete(kbdev, katom, end_timestamp); ++ switch (op) { ++ case _UMP_UK_MSYNC_CLEAN: ++ outer_clean_range(start_p, end_p); ++ break; ++ case _UMP_UK_MSYNC_CLEAN_AND_INVALIDATE: ++ outer_flush_range(start_p, end_p); ++ break; ++ case _UMP_UK_MSYNC_INVALIDATE: ++ outer_inv_range(start_p, end_p); ++ break; ++ default: ++ break; ++ } ++ ++ if (0 == size) { ++ /* Nothing left to flush. */ ++ break; + } + } + -+ kbdev->protected_mode_transition = false; ++ return; +} +diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_osk_misc.c b/drivers/gpu/arm/mali400/ump/linux/ump_osk_misc.c +new file mode 100644 +index 000000000..58c9f1bf2 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/linux/ump_osk_misc.c +@@ -0,0 +1,36 @@ ++/* ++ * Copyright (C) 2010, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, -+ int js, -+ struct kbase_jd_atom *katom, -+ u32 action) -+{ -+ u32 hw_action = action & JS_COMMAND_MASK; ++/** ++ * @file ump_osk_misc.c ++ * Implementation of the OS abstraction layer for the UMP kernel device driver ++ */ + -+ kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom); -+ kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action, -+ katom->core_req, katom); -+ katom->kctx->blocked_js[js][katom->sched_priority] = true; -+} + -+static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom, -+ u32 action, -+ bool disjoint) -+{ -+ katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; -+ kbase_gpu_mark_atom_for_return(kbdev, katom); -+ katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true; ++#include "ump_osk.h" + -+ if (disjoint) -+ kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, -+ katom); -+} ++#include ++#include "ump_kernel_linux.h" + -+static int should_stop_x_dep_slot(struct kbase_jd_atom *katom) ++/* is called from ump_kernel_constructor in common code */ ++_mali_osk_errcode_t _ump_osk_init(void) +{ -+ if (katom->x_post_dep) { -+ struct kbase_jd_atom *dep_atom = katom->x_post_dep; -+ -+ if (dep_atom->gpu_rb_state != -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB && -+ dep_atom->gpu_rb_state != -+ KBASE_ATOM_GPU_RB_RETURN_TO_JS) -+ return dep_atom->slot_nr; ++ if (0 != ump_kernel_device_initialize()) { ++ return _MALI_OSK_ERR_FAULT; + } -+ return -1; -+} + -+static void kbase_job_evicted(struct kbase_jd_atom *katom) -+{ -+ kbase_timeline_job_slot_done(katom->kctx->kbdev, katom->kctx, katom, -+ katom->slot_nr, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT); ++ return _MALI_OSK_ERR_OK; +} + -+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ int js, -+ struct kbase_jd_atom *katom, -+ u32 action) ++_mali_osk_errcode_t _ump_osk_term(void) +{ -+ struct kbase_jd_atom *katom_idx0; -+ struct kbase_jd_atom *katom_idx1; ++ ump_kernel_device_terminate(); ++ return _MALI_OSK_ERR_OK; ++} +diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_ukk_ref_wrappers.c b/drivers/gpu/arm/mali400/ump/linux/ump_ukk_ref_wrappers.c +new file mode 100644 +index 000000000..56a787ff6 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/linux/ump_ukk_ref_wrappers.c +@@ -0,0 +1,230 @@ ++/* ++ * Copyright (C) 2010, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ bool katom_idx0_valid, katom_idx1_valid; ++/** ++ * @file ump_ukk_wrappers.c ++ * Defines the wrapper functions which turn Linux IOCTL calls into _ukk_ calls for the reference implementation ++ */ + -+ bool ret = false; + -+ int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1; -+ int prio_idx0 = 0, prio_idx1 = 0; ++#include /* user space access */ + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++#include "ump_osk.h" ++#include "ump_uk_types.h" ++#include "ump_ukk.h" ++#include "ump_kernel_common.h" ++#include ++#include "ump_kernel_interface_ref_drv.h" ++#include "mali_osk_list.h" + -+ katom_idx0 = kbase_gpu_inspect(kbdev, js, 0); -+ katom_idx1 = kbase_gpu_inspect(kbdev, js, 1); ++extern struct device *ump_global_mdev; + -+ if (katom_idx0) -+ prio_idx0 = katom_idx0->sched_priority; -+ if (katom_idx1) -+ prio_idx1 = katom_idx1->sched_priority; ++/* ++ * IOCTL operation; Allocate UMP memory ++ */ ++int ump_allocate_wrapper(u32 __user *argument, struct ump_session_data *session_data) ++{ ++ _ump_uk_allocate_s user_interaction; ++ _mali_osk_errcode_t err; + -+ if (katom) { -+ katom_idx0_valid = (katom_idx0 == katom); -+ /* If idx0 is to be removed and idx1 is on the same context, -+ * then idx1 must also be removed otherwise the atoms might be -+ * returned out of order */ -+ if (katom_idx1) -+ katom_idx1_valid = (katom_idx1 == katom) || -+ (katom_idx0_valid && -+ (katom_idx0->kctx == -+ katom_idx1->kctx)); -+ else -+ katom_idx1_valid = false; -+ } else { -+ katom_idx0_valid = (katom_idx0 && -+ (!kctx || katom_idx0->kctx == kctx)); -+ katom_idx1_valid = (katom_idx1 && -+ (!kctx || katom_idx1->kctx == kctx) && -+ prio_idx0 == prio_idx1); ++ /* Sanity check input parameters */ ++ if (NULL == argument || NULL == session_data) { ++ MSG_ERR(("NULL parameter in ump_ioctl_allocate()\n")); ++ return -ENOTTY; + } + -+ if (katom_idx0_valid) -+ stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0); -+ if (katom_idx1_valid) -+ stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1); ++ /* Copy the user space memory to kernel space (so we safely can read it) */ ++ if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) { ++ MSG_ERR(("copy_from_user() in ump_ioctl_allocate()\n")); ++ return -EFAULT; ++ } + -+ if (katom_idx0_valid) { -+ if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { -+ /* Simple case - just dequeue and return */ -+ kbase_gpu_dequeue_atom(kbdev, js, NULL); -+ if (katom_idx1_valid) { -+ kbase_gpu_dequeue_atom(kbdev, js, NULL); -+ katom_idx1->event_code = -+ BASE_JD_EVENT_REMOVED_FROM_NEXT; -+ kbase_jm_return_atom_to_js(kbdev, katom_idx1); -+ katom_idx1->kctx->blocked_js[js][prio_idx1] = -+ true; -+ } ++ user_interaction.ctx = (void *) session_data; + -+ katom_idx0->event_code = -+ BASE_JD_EVENT_REMOVED_FROM_NEXT; -+ kbase_jm_return_atom_to_js(kbdev, katom_idx0); -+ katom_idx0->kctx->blocked_js[js][prio_idx0] = true; -+ } else { -+ /* katom_idx0 is on GPU */ -+ if (katom_idx1 && katom_idx1->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_SUBMITTED) { -+ /* katom_idx0 and katom_idx1 are on GPU */ ++ err = _ump_ukk_allocate(&user_interaction); ++ if (_MALI_OSK_ERR_OK != err) { ++ DBG_MSG(1, ("_ump_ukk_allocate() failed in ump_ioctl_allocate()\n")); ++ return ump_map_errcode(err); ++ } ++ user_interaction.ctx = NULL; + -+ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, -+ JS_COMMAND_NEXT), NULL) == 0) { -+ /* idx0 has already completed - stop -+ * idx1 if needed*/ -+ if (katom_idx1_valid) { -+ kbase_gpu_stop_atom(kbdev, js, -+ katom_idx1, -+ action); -+ ret = true; -+ } -+ } else { -+ /* idx1 is in NEXT registers - attempt -+ * to remove */ -+ kbase_reg_write(kbdev, -+ JOB_SLOT_REG(js, -+ JS_COMMAND_NEXT), -+ JS_COMMAND_NOP, NULL); ++ if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) { ++ /* If the copy fails then we should release the memory. We can use the IOCTL release to accomplish this */ ++ _ump_uk_release_s release_args; + -+ if (kbase_reg_read(kbdev, -+ JOB_SLOT_REG(js, -+ JS_HEAD_NEXT_LO), NULL) -+ != 0 || -+ kbase_reg_read(kbdev, -+ JOB_SLOT_REG(js, -+ JS_HEAD_NEXT_HI), NULL) -+ != 0) { -+ /* idx1 removed successfully, -+ * will be handled in IRQ */ -+ kbase_job_evicted(katom_idx1); -+ kbase_gpu_remove_atom(kbdev, -+ katom_idx1, -+ action, true); -+ stop_x_dep_idx1 = -+ should_stop_x_dep_slot(katom_idx1); ++ MSG_ERR(("copy_to_user() failed in ump_ioctl_allocate()\n")); + -+ /* stop idx0 if still on GPU */ -+ kbase_gpu_stop_atom(kbdev, js, -+ katom_idx0, -+ action); -+ ret = true; -+ } else if (katom_idx1_valid) { -+ /* idx0 has already completed, -+ * stop idx1 if needed */ -+ kbase_gpu_stop_atom(kbdev, js, -+ katom_idx1, -+ action); -+ ret = true; -+ } -+ } -+ } else if (katom_idx1_valid) { -+ /* idx1 not on GPU but must be dequeued*/ ++ release_args.ctx = (void *) session_data; ++ release_args.secure_id = user_interaction.secure_id; + -+ /* idx1 will be handled in IRQ */ -+ kbase_gpu_remove_atom(kbdev, katom_idx1, action, -+ false); -+ /* stop idx0 */ -+ /* This will be repeated for anything removed -+ * from the next registers, since their normal -+ * flow was also interrupted, and this function -+ * might not enter disjoint state e.g. if we -+ * don't actually do a hard stop on the head -+ * atom */ -+ kbase_gpu_stop_atom(kbdev, js, katom_idx0, -+ action); -+ ret = true; -+ } else { -+ /* no atom in idx1 */ -+ /* just stop idx0 */ -+ kbase_gpu_stop_atom(kbdev, js, katom_idx0, -+ action); -+ ret = true; -+ } ++ err = _ump_ukk_release(&release_args); ++ if (_MALI_OSK_ERR_OK != err) { ++ MSG_ERR(("_ump_ukk_release() also failed when trying to release newly allocated memory in ump_ioctl_allocate()\n")); + } -+ } else if (katom_idx1_valid) { -+ if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { -+ /* Mark for return */ -+ /* idx1 will be returned once idx0 completes */ -+ kbase_gpu_remove_atom(kbdev, katom_idx1, action, -+ false); -+ } else { -+ /* idx1 is on GPU */ -+ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, -+ JS_COMMAND_NEXT), NULL) == 0) { -+ /* idx0 has already completed - stop idx1 */ -+ kbase_gpu_stop_atom(kbdev, js, katom_idx1, -+ action); -+ ret = true; -+ } else { -+ /* idx1 is in NEXT registers - attempt to -+ * remove */ -+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, -+ JS_COMMAND_NEXT), -+ JS_COMMAND_NOP, NULL); + -+ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, -+ JS_HEAD_NEXT_LO), NULL) != 0 || -+ kbase_reg_read(kbdev, JOB_SLOT_REG(js, -+ JS_HEAD_NEXT_HI), NULL) != 0) { -+ /* idx1 removed successfully, will be -+ * handled in IRQ once idx0 completes */ -+ kbase_job_evicted(katom_idx1); -+ kbase_gpu_remove_atom(kbdev, katom_idx1, -+ action, -+ false); -+ } else { -+ /* idx0 has already completed - stop -+ * idx1 */ -+ kbase_gpu_stop_atom(kbdev, js, -+ katom_idx1, -+ action); -+ ret = true; -+ } -+ } -+ } ++ return -EFAULT; + } + -+ -+ if (stop_x_dep_idx0 != -1) -+ kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0, -+ NULL, action); -+ -+ if (stop_x_dep_idx1 != -1) -+ kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1, -+ NULL, action); -+ -+ return ret; ++ return 0; /* success */ +} + -+void kbase_gpu_cacheclean(struct kbase_device *kbdev) ++#ifdef CONFIG_DMA_SHARED_BUFFER ++static ump_dd_handle get_ump_handle_from_dmabuf(struct ump_session_data *session_data, ++ struct dma_buf *dmabuf) +{ -+ /* Limit the number of loops to avoid a hang if the interrupt is missed -+ */ -+ u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; ++ ump_session_memory_list_element *session_mem, *tmp; ++ struct dma_buf_attachment *attach; ++ ump_dd_handle ump_handle; + -+ mutex_lock(&kbdev->cacheclean_lock); ++ DEBUG_ASSERT_POINTER(session_data); + -+ /* use GPU_COMMAND completion solution */ -+ /* clean & invalidate the caches */ -+ KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_CLEAN_INV_CACHES, NULL); ++ _mali_osk_mutex_wait(session_data->lock); + -+ /* wait for cache flush to complete before continuing */ -+ while (--max_loops && -+ (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) & -+ CLEAN_CACHES_COMPLETED) == 0) -+ ; ++ _MALI_OSK_LIST_FOREACHENTRY(session_mem, tmp, ++ &session_data->list_head_session_memory_list, ++ ump_session_memory_list_element, list) { ++ if (session_mem->mem->import_attach) { ++ attach = session_mem->mem->import_attach; ++ if (attach->dmabuf == dmabuf) { ++ _mali_osk_mutex_signal(session_data->lock); ++ ump_handle = (ump_dd_handle)session_mem->mem; ++ ump_random_mapping_get(device.secure_id_map, ump_dd_secure_id_get(ump_handle)); ++ return ump_handle; ++ } ++ } ++ } + -+ /* clear the CLEAN_CACHES_COMPLETED irq */ -+ KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, -+ CLEAN_CACHES_COMPLETED); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), -+ CLEAN_CACHES_COMPLETED, NULL); -+ KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state != -+ KBASE_INSTR_STATE_CLEANING, -+ "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang."); ++ _mali_osk_mutex_signal(session_data->lock); + -+ mutex_unlock(&kbdev->cacheclean_lock); ++ return NULL; +} + -+void kbase_backend_cacheclean(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom) ++int ump_dmabuf_import_wrapper(u32 __user *argument, ++ struct ump_session_data *session_data) +{ -+ if (katom->need_cache_flush_cores_retained) { -+ unsigned long flags; ++ ump_session_memory_list_element *session = NULL; ++ _ump_uk_dmabuf_s ump_dmabuf; ++ ump_dd_handle ump_handle; ++ ump_dd_physical_block *blocks = NULL; ++ struct dma_buf_attachment *attach = NULL; ++ struct dma_buf *dma_buf; ++ struct sg_table *sgt = NULL; ++ struct scatterlist *sgl; ++ unsigned int i = 0; ++ int ret = 0; + -+ kbase_gpu_cacheclean(kbdev); ++ /* Sanity check input parameters */ ++ if (!argument || !session_data) { ++ MSG_ERR(("NULL parameter.\n")); ++ return -EINVAL; ++ } + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_pm_unrequest_cores(kbdev, false, -+ katom->need_cache_flush_cores_retained); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ katom->need_cache_flush_cores_retained = 0; ++ if (copy_from_user(&ump_dmabuf, argument, ++ sizeof(_ump_uk_dmabuf_s))) { ++ MSG_ERR(("copy_from_user() failed.\n")); ++ return -EFAULT; + } -+} + -+void kbase_backend_complete_wq(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom) -+{ ++ dma_buf = dma_buf_get(ump_dmabuf.fd); ++ if (IS_ERR(dma_buf)) ++ return PTR_ERR(dma_buf); ++ + /* -+ * If cache flush required due to HW workaround then perform the flush -+ * now ++ * if already imported then increase a refcount to the ump descriptor ++ * and call dma_buf_put() and then go to found to return previous ++ * ump secure id. + */ -+ kbase_backend_cacheclean(kbdev, katom); -+ -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969) && -+ (katom->core_req & BASE_JD_REQ_FS) && -+ katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT && -+ (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) && -+ !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) { -+ dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n"); -+ if (kbasep_10969_workaround_clamp_coordinates(katom)) { -+ /* The job had a TILE_RANGE_FAULT after was soft-stopped -+ * Due to an HW issue we try to execute the job again. -+ */ -+ dev_dbg(kbdev->dev, -+ "Clamping has been executed, try to rerun the job\n" -+ ); -+ katom->event_code = BASE_JD_EVENT_STOPPED; -+ katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN; -+ } ++ ump_handle = get_ump_handle_from_dmabuf(session_data, dma_buf); ++ if (ump_handle) { ++ dma_buf_put(dma_buf); ++ goto found; + } + -+ /* Clear the coreref_state now - while check_deref_cores() may not have -+ * been called yet, the caller will have taken a copy of this field. If -+ * this is not done, then if the atom is re-scheduled (following a soft -+ * stop) then the core reference would not be retaken. */ -+ katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; -+ katom->affinity = 0; -+} -+ -+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, -+ base_jd_core_req core_req, u64 affinity, -+ enum kbase_atom_coreref_state coreref_state) -+{ -+ unsigned long flags; ++ attach = dma_buf_attach(dma_buf, ump_global_mdev); ++ if (IS_ERR(attach)) { ++ ret = PTR_ERR(attach); ++ goto err_dma_buf_put; ++ } + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity, -+ coreref_state); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); ++ if (IS_ERR(sgt)) { ++ ret = PTR_ERR(sgt); ++ goto err_dma_buf_detach; ++ } + -+ if (!kbdev->pm.active_count) { -+ mutex_lock(&kbdev->js_data.runpool_mutex); -+ mutex_lock(&kbdev->pm.lock); -+ kbase_pm_update_active(kbdev); -+ mutex_unlock(&kbdev->pm.lock); -+ mutex_unlock(&kbdev->js_data.runpool_mutex); ++ blocks = (ump_dd_physical_block *)_mali_osk_malloc(sizeof(ump_dd_physical_block) * sgt->nents); ++ if (!blocks) { ++ DBG_MSG(1, ("Failed to allocate blocks.\n")); ++ ret = -EFAULT; ++ goto err_dma_buf_unmap; ++ } ++ for_each_sg(sgt->sgl, sgl, sgt->nents, i) { ++ blocks[i].addr = sg_phys(sgl); ++ blocks[i].size = sg_dma_len(sgl); + } -+} + -+void kbase_gpu_dump_slots(struct kbase_device *kbdev) -+{ -+ struct kbasep_js_device_data *js_devdata; -+ unsigned long flags; -+ int js; ++ /* ++ * Initialize the session memory list element, and add it ++ * to the session object ++ */ ++ session = _mali_osk_calloc(1, sizeof(*session)); ++ if (!session) { ++ DBG_MSG(1, ("Failed to allocate session.\n")); ++ ret = -EFAULT; ++ goto err_free_block; ++ } + -+ js_devdata = &kbdev->js_data; ++ ump_handle = ump_dd_handle_create_from_phys_blocks(blocks, i); ++ if (UMP_DD_HANDLE_INVALID == ump_handle) { ++ DBG_MSG(1, ("Failed to create ump handle.\n")); ++ ret = -EFAULT; ++ goto err_free_session; ++ } + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ session->mem = (ump_dd_mem *)ump_handle; ++ session->mem->import_attach = attach; ++ session->mem->sgt = sgt; + -+ dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n"); ++ _mali_osk_mutex_wait(session_data->lock); ++ _mali_osk_list_add(&(session->list), ++ &(session_data->list_head_session_memory_list)); ++ _mali_osk_mutex_signal(session_data->lock); + -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ int idx; ++ _mali_osk_free(blocks); + -+ for (idx = 0; idx < SLOT_RB_SIZE; idx++) { -+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, -+ js, -+ idx); ++found: ++ ump_dmabuf.ctx = (void *)session_data; ++ ump_dmabuf.secure_id = ump_dd_secure_id_get(ump_handle); ++ ump_dmabuf.size = ump_dd_size_get(ump_handle); + -+ if (katom) -+ dev_info(kbdev->dev, -+ " js%d idx%d : katom=%p gpu_rb_state=%d\n", -+ js, idx, katom, katom->gpu_rb_state); -+ else -+ dev_info(kbdev->dev, " js%d idx%d : empty\n", -+ js, idx); -+ } ++ if (copy_to_user(argument, &ump_dmabuf, ++ sizeof(_ump_uk_dmabuf_s))) { ++ MSG_ERR(("copy_to_user() failed.\n")); ++ ret = -EFAULT; ++ goto err_release_ump_handle; + } + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} -+ -+ ++ return ret; + -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h ++err_release_ump_handle: ++ ump_dd_reference_release(ump_handle); ++err_free_session: ++ _mali_osk_free(session); ++err_free_block: ++ _mali_osk_free(blocks); ++err_dma_buf_unmap: ++ dma_buf_unmap_attachment(attach, sgt, DMA_BIDIRECTIONAL); ++err_dma_buf_detach: ++ dma_buf_detach(dma_buf, attach); ++err_dma_buf_put: ++ dma_buf_put(dma_buf); ++ return ret; ++} ++#endif +diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_ukk_ref_wrappers.h b/drivers/gpu/arm/mali400/ump/linux/ump_ukk_ref_wrappers.h new file mode 100644 -index 000000000..1e0e05ad3 +index 000000000..61a7095a6 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h -@@ -0,0 +1,76 @@ ++++ b/drivers/gpu/arm/mali400/ump/linux/ump_ukk_ref_wrappers.h +@@ -0,0 +1,36 @@ +/* -+ * -+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ * Copyright (C) 2010, 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + ++/** ++ * @file ump_ukk_wrappers.h ++ * Defines the wrapper functions which turn Linux IOCTL calls into _ukk_ calls for the reference implementation ++ */ + ++#ifndef __UMP_UKK_REF_WRAPPERS_H__ ++#define __UMP_UKK_REF_WRAPPERS_H__ + ++#include ++#include "ump_kernel_common.h" + -+/* -+ * Register-based HW access backend specific APIs -+ */ ++#ifdef __cplusplus ++extern "C" { ++#endif + -+#ifndef _KBASE_HWACCESS_GPU_H_ -+#define _KBASE_HWACCESS_GPU_H_ + -+#include ++int ump_allocate_wrapper(u32 __user *argument, struct ump_session_data *session_data); ++#ifdef CONFIG_DMA_SHARED_BUFFER ++int ump_dmabuf_import_wrapper(u32 __user *argument, struct ump_session_data *session_data); ++#endif + -+/** -+ * kbase_gpu_irq_evict - Evict an atom from a NEXT slot -+ * -+ * @kbdev: Device pointer -+ * @js: Job slot to evict from -+ * -+ * Evict the atom in the NEXT slot for the specified job slot. This function is -+ * called from the job complete IRQ handler when the previous job has failed. -+ * -+ * Return: true if job evicted from NEXT registers, false otherwise -+ */ -+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js); ++#ifdef __cplusplus ++} ++#endif + -+/** -+ * kbase_gpu_complete_hw - Complete an atom on job slot js -+ * -+ * @kbdev: Device pointer -+ * @js: Job slot that has completed -+ * @completion_code: Event code from job that has completed -+ * @job_tail: The tail address from the hardware if the job has partially -+ * completed -+ * @end_timestamp: Time of completion ++#endif /* __UMP_UKK_REF_WRAPPERS_H__ */ +diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_ukk_wrappers.c b/drivers/gpu/arm/mali400/ump/linux/ump_ukk_wrappers.c +new file mode 100644 +index 000000000..4d6b69608 +--- /dev/null ++++ b/drivers/gpu/arm/mali400/ump/linux/ump_ukk_wrappers.c +@@ -0,0 +1,280 @@ ++/* ++ * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ -+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, -+ u32 completion_code, -+ u64 job_tail, -+ ktime_t *end_timestamp); + +/** -+ * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer -+ * -+ * @kbdev: Device pointer -+ * @js: Job slot to inspect -+ * @idx: Index into ringbuffer. 0 is the job currently running on -+ * the slot, 1 is the job waiting, all other values are invalid. -+ * Return: The atom at that position in the ringbuffer -+ * or NULL if no atom present ++ * @file ump_ukk_wrappers.c ++ * Defines the wrapper functions which turn Linux IOCTL calls into _ukk_ calls + */ -+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, -+ int idx); + -+/** -+ * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers -+ * -+ * @kbdev: Device pointer -+ */ -+void kbase_gpu_dump_slots(struct kbase_device *kbdev); ++#include /* user space access */ ++ ++#include "ump_osk.h" ++#include "ump_uk_types.h" ++#include "ump_ukk.h" ++#include "ump_kernel_common.h" + -+#endif /* _KBASE_HWACCESS_GPU_H_ */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c -new file mode 100644 -index 000000000..54d8ddd80 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c -@@ -0,0 +1,303 @@ +/* -+ * -+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ * IOCTL operation; Negotiate version of IOCTL API + */ ++int ump_get_api_version_wrapper(u32 __user *argument, struct ump_session_data *session_data) ++{ ++ _ump_uk_api_version_s version_info; ++ _mali_osk_errcode_t err; + ++ /* Sanity check input parameters */ ++ if (NULL == argument || NULL == session_data) { ++ MSG_ERR(("NULL parameter in ump_ioctl_get_api_version()\n")); ++ return -ENOTTY; ++ } + ++ /* Copy the user space memory to kernel space (so we safely can read it) */ ++ if (0 != copy_from_user(&version_info, argument, sizeof(version_info))) { ++ MSG_ERR(("copy_from_user() in ump_ioctl_get_api_version()\n")); ++ return -EFAULT; ++ } + ++ version_info.ctx = (void *) session_data; ++ err = _ump_uku_get_api_version(&version_info); ++ if (_MALI_OSK_ERR_OK != err) { ++ MSG_ERR(("_ump_uku_get_api_version() failed in ump_ioctl_get_api_version()\n")); ++ return ump_map_errcode(err); ++ } + ++ version_info.ctx = NULL; + -+/* -+ * Base kernel affinity manager APIs -+ */ -+ -+#include -+#include "mali_kbase_js_affinity.h" -+#include "mali_kbase_hw.h" ++ /* Copy ouput data back to user space */ ++ if (0 != copy_to_user(argument, &version_info, sizeof(version_info))) { ++ MSG_ERR(("copy_to_user() failed in ump_ioctl_get_api_version()\n")); ++ return -EFAULT; ++ } + -+#include ++ return 0; /* success */ ++} + + -+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, -+ int js) ++/* ++ * IOCTL operation; Release reference to specified UMP memory. ++ */ ++int ump_release_wrapper(u32 __user *argument, struct ump_session_data *session_data) +{ -+ /* -+ * Here are the reasons for using job slot 2: -+ * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose) -+ * - In absence of the above, then: -+ * - Atoms with BASE_JD_REQ_COHERENT_GROUP -+ * - But, only when there aren't contexts with -+ * KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on -+ * all cores on slot 1 could be blocked by those using a coherent group -+ * on slot 2 -+ * - And, only when you actually have 2 or more coregroups - if you -+ * only have 1 coregroup, then having jobs for slot 2 implies they'd -+ * also be for slot 1, meaning you'll get interference from them. Jobs -+ * able to run on slot 2 could also block jobs that can only run on -+ * slot 1 (tiler jobs) -+ */ -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) -+ return true; ++ _ump_uk_release_s release_args; ++ _mali_osk_errcode_t err; + -+ if (js != 2) -+ return true; ++ /* Sanity check input parameters */ ++ if (NULL == session_data) { ++ MSG_ERR(("NULL parameter in ump_ioctl_release()\n")); ++ return -ENOTTY; ++ } + -+ /* Only deal with js==2 now: */ -+ if (kbdev->gpu_props.num_core_groups > 1) { -+ /* Only use slot 2 in the 2+ coregroup case */ -+ if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev, -+ KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) == -+ false) { -+ /* ...But only when we *don't* have atoms that run on -+ * all cores */ ++ /* Copy the user space memory to kernel space (so we safely can read it) */ ++ if (0 != copy_from_user(&release_args, argument, sizeof(release_args))) { ++ MSG_ERR(("copy_from_user() in ump_ioctl_get_api_version()\n")); ++ return -EFAULT; ++ } + -+ /* No specific check for BASE_JD_REQ_COHERENT_GROUP -+ * atoms - the policy will sort that out */ -+ return true; -+ } ++ release_args.ctx = (void *) session_data; ++ err = _ump_ukk_release(&release_args); ++ if (_MALI_OSK_ERR_OK != err) { ++ MSG_ERR(("_ump_ukk_release() failed in ump_ioctl_release()\n")); ++ return ump_map_errcode(err); + } + -+ /* Above checks failed mean we shouldn't use slot 2 */ -+ return false; ++ ++ return 0; /* success */ +} + +/* -+ * As long as it has been decided to have a deeper modification of -+ * what job scheduler, power manager and affinity manager will -+ * implement, this function is just an intermediate step that -+ * assumes: -+ * - all working cores will be powered on when this is called. -+ * - largest current configuration is 2 core groups. -+ * - It has been decided not to have hardcoded values so the low -+ * and high cores in a core split will be evently distributed. -+ * - Odd combinations of core requirements have been filtered out -+ * and do not get to this function (e.g. CS+T+NSS is not -+ * supported here). -+ * - This function is frequently called and can be optimized, -+ * (see notes in loops), but as the functionallity will likely -+ * be modified, optimization has not been addressed. -+*/ -+bool kbase_js_choose_affinity(u64 * const affinity, -+ struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom, int js) ++ * IOCTL operation; Return size for specified UMP memory. ++ */ ++int ump_size_get_wrapper(u32 __user *argument, struct ump_session_data *session_data) +{ -+ base_jd_core_req core_req = katom->core_req; -+ unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; -+ u64 core_availability_mask; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ _ump_uk_size_get_s user_interaction; ++ _mali_osk_errcode_t err; + -+ core_availability_mask = kbase_pm_ca_get_core_mask(kbdev); ++ /* Sanity check input parameters */ ++ if (NULL == argument || NULL == session_data) { ++ MSG_ERR(("NULL parameter in ump_ioctl_size_get()\n")); ++ return -ENOTTY; ++ } + -+ /* -+ * If no cores are currently available (core availability policy is -+ * transitioning) then fail. -+ */ -+ if (0 == core_availability_mask) { -+ *affinity = 0; -+ return false; ++ if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) { ++ MSG_ERR(("copy_from_user() in ump_ioctl_size_get()\n")); ++ return -EFAULT; + } + -+ KBASE_DEBUG_ASSERT(js >= 0); ++ user_interaction.ctx = (void *) session_data; ++ err = _ump_ukk_size_get(&user_interaction); ++ if (_MALI_OSK_ERR_OK != err) { ++ MSG_ERR(("_ump_ukk_size_get() failed in ump_ioctl_size_get()\n")); ++ return ump_map_errcode(err); ++ } + -+ if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == -+ BASE_JD_REQ_T) { -+ /* If the hardware supports XAFFINITY then we'll only enable -+ * the tiler (which is the default so this is a no-op), -+ * otherwise enable shader core 0. */ -+ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) -+ *affinity = 1; -+ else -+ *affinity = 0; ++ user_interaction.ctx = NULL; + -+ return true; ++ if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) { ++ MSG_ERR(("copy_to_user() failed in ump_ioctl_size_get()\n")); ++ return -EFAULT; + } + -+ if (1 == kbdev->gpu_props.num_cores) { -+ /* trivial case only one core, nothing to do */ -+ *affinity = core_availability_mask & -+ kbdev->pm.debug_core_mask[js]; -+ } else { -+ if ((core_req & (BASE_JD_REQ_COHERENT_GROUP | -+ BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) { -+ if (js == 0 || num_core_groups == 1) { -+ /* js[0] and single-core-group systems just get -+ * the first core group */ -+ *affinity = -+ kbdev->gpu_props.props.coherency_info.group[0].core_mask -+ & core_availability_mask & -+ kbdev->pm.debug_core_mask[js]; -+ } else { -+ /* js[1], js[2] use core groups 0, 1 for -+ * dual-core-group systems */ -+ u32 core_group_idx = ((u32) js) - 1; ++ return 0; /* success */ ++} + -+ KBASE_DEBUG_ASSERT(core_group_idx < -+ num_core_groups); -+ *affinity = -+ kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask -+ & core_availability_mask & -+ kbdev->pm.debug_core_mask[js]; ++/* ++ * IOCTL operation; Do cache maintenance on specified UMP memory. ++ */ ++int ump_msync_wrapper(u32 __user *argument, struct ump_session_data *session_data) ++{ ++ _ump_uk_msync_s user_interaction; + -+ /* If the job is specifically targeting core -+ * group 1 and the core availability policy is -+ * keeping that core group off, then fail */ -+ if (*affinity == 0 && core_group_idx == 1 && -+ kbdev->pm.backend.cg1_disabled -+ == true) -+ katom->event_code = -+ BASE_JD_EVENT_PM_EVENT; -+ } -+ } else { -+ /* All cores are available when no core split is -+ * required */ -+ *affinity = core_availability_mask & -+ kbdev->pm.debug_core_mask[js]; -+ } ++ /* Sanity check input parameters */ ++ if (NULL == argument || NULL == session_data) { ++ MSG_ERR(("NULL parameter in ump_ioctl_size_get()\n")); ++ return -ENOTTY; + } + -+ /* -+ * If no cores are currently available in the desired core group(s) -+ * (core availability policy is transitioning) then fail. -+ */ -+ if (*affinity == 0) -+ return false; ++ if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) { ++ MSG_ERR(("copy_from_user() in ump_ioctl_msync()\n")); ++ return -EFAULT; ++ } + -+ /* Enable core 0 if tiler required for hardware without XAFFINITY -+ * support (notes above) */ -+ if (core_req & BASE_JD_REQ_T) { -+ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) -+ *affinity = *affinity | 1; ++ user_interaction.ctx = (void *) session_data; ++ ++ _ump_ukk_msync(&user_interaction); ++ ++ user_interaction.ctx = NULL; ++ ++ if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) { ++ MSG_ERR(("copy_to_user() failed in ump_ioctl_msync()\n")); ++ return -EFAULT; + } + -+ return true; ++ return 0; /* success */ +} -+ -+static inline bool kbase_js_affinity_is_violating( -+ struct kbase_device *kbdev, -+ u64 *affinities) ++int ump_cache_operations_control_wrapper(u32 __user *argument, struct ump_session_data *session_data) +{ -+ /* This implementation checks whether the two slots involved in Generic -+ * thread creation have intersecting affinity. This is due to micro- -+ * architectural issues where a job in slot A targetting cores used by -+ * slot B could prevent the job in slot B from making progress until the -+ * job in slot A has completed. -+ */ -+ u64 affinity_set_left; -+ u64 affinity_set_right; -+ u64 intersection; ++ _ump_uk_cache_operations_control_s user_interaction; + -+ KBASE_DEBUG_ASSERT(affinities != NULL); ++ /* Sanity check input parameters */ ++ if (NULL == argument || NULL == session_data) { ++ MSG_ERR(("NULL parameter in ump_ioctl_size_get()\n")); ++ return -ENOTTY; ++ } + -+ affinity_set_left = affinities[1]; ++ if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) { ++ MSG_ERR(("copy_from_user() in ump_ioctl_cache_operations_control()\n")); ++ return -EFAULT; ++ } + -+ affinity_set_right = affinities[2]; ++ user_interaction.ctx = (void *) session_data; + -+ /* A violation occurs when any bit in the left_set is also in the -+ * right_set */ -+ intersection = affinity_set_left & affinity_set_right; ++ _ump_ukk_cache_operations_control((_ump_uk_cache_operations_control_s *) &user_interaction); + -+ return (bool) (intersection != (u64) 0u); ++ user_interaction.ctx = NULL; ++ ++#if 0 /* No data to copy back */ ++ if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) { ++ MSG_ERR(("copy_to_user() failed in ump_ioctl_cache_operations_control()\n")); ++ return -EFAULT; ++ } ++#endif ++ return 0; /* success */ +} + -+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js, -+ u64 affinity) ++int ump_switch_hw_usage_wrapper(u32 __user *argument, struct ump_session_data *session_data) +{ -+ struct kbasep_js_device_data *js_devdata; -+ u64 new_affinities[BASE_JM_MAX_NR_SLOTS]; ++ _ump_uk_switch_hw_usage_s user_interaction; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); -+ js_devdata = &kbdev->js_data; ++ /* Sanity check input parameters */ ++ if (NULL == argument || NULL == session_data) { ++ MSG_ERR(("NULL parameter in ump_ioctl_size_get()\n")); ++ return -ENOTTY; ++ } + -+ memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities, -+ sizeof(js_devdata->runpool_irq.slot_affinities)); ++ if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) { ++ MSG_ERR(("copy_from_user() in ump_ioctl_switch_hw_usage()\n")); ++ return -EFAULT; ++ } + -+ new_affinities[js] |= affinity; ++ user_interaction.ctx = (void *) session_data; + -+ return kbase_js_affinity_is_violating(kbdev, new_affinities); ++ _ump_ukk_switch_hw_usage(&user_interaction); ++ ++ user_interaction.ctx = NULL; ++ ++#if 0 /* No data to copy back */ ++ if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) { ++ MSG_ERR(("copy_to_user() failed in ump_ioctl_switch_hw_usage()\n")); ++ return -EFAULT; ++ } ++#endif ++ return 0; /* success */ +} + -+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js, -+ u64 affinity) ++int ump_lock_wrapper(u32 __user *argument, struct ump_session_data *session_data) +{ -+ struct kbasep_js_device_data *js_devdata; -+ u64 cores; ++ _ump_uk_lock_s user_interaction; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); -+ js_devdata = &kbdev->js_data; ++ /* Sanity check input parameters */ ++ if (NULL == argument || NULL == session_data) { ++ MSG_ERR(("NULL parameter in ump_ioctl_size_get()\n")); ++ return -ENOTTY; ++ } + -+ KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity) -+ == false); ++ if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) { ++ MSG_ERR(("copy_from_user() in ump_ioctl_switch_hw_usage()\n")); ++ return -EFAULT; ++ } + -+ cores = affinity; -+ while (cores) { -+ int bitnum = fls64(cores) - 1; -+ u64 bit = 1ULL << bitnum; -+ s8 cnt; ++ user_interaction.ctx = (void *) session_data; + -+ cnt = -+ ++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]); ++ _ump_ukk_lock(&user_interaction); + -+ if (cnt == 1) -+ js_devdata->runpool_irq.slot_affinities[js] |= bit; ++ user_interaction.ctx = NULL; + -+ cores &= ~bit; ++#if 0 /* No data to copy back */ ++ if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) { ++ MSG_ERR(("copy_to_user() failed in ump_ioctl_switch_hw_usage()\n")); ++ return -EFAULT; + } ++#endif ++ ++ return 0; /* success */ +} + -+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js, -+ u64 affinity) ++int ump_unlock_wrapper(u32 __user *argument, struct ump_session_data *session_data) +{ -+ struct kbasep_js_device_data *js_devdata; -+ u64 cores; ++ _ump_uk_unlock_s user_interaction; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); -+ js_devdata = &kbdev->js_data; ++ /* Sanity check input parameters */ ++ if (NULL == argument || NULL == session_data) { ++ MSG_ERR(("NULL parameter in ump_ioctl_size_get()\n")); ++ return -ENOTTY; ++ } + -+ cores = affinity; -+ while (cores) { -+ int bitnum = fls64(cores) - 1; -+ u64 bit = 1ULL << bitnum; -+ s8 cnt; ++ if (0 != copy_from_user(&user_interaction, argument, sizeof(user_interaction))) { ++ MSG_ERR(("copy_from_user() in ump_ioctl_switch_hw_usage()\n")); ++ return -EFAULT; ++ } + -+ KBASE_DEBUG_ASSERT( -+ js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0); ++ user_interaction.ctx = (void *) session_data; + -+ cnt = -+ --(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]); ++ _ump_ukk_unlock(&user_interaction); + -+ if (0 == cnt) -+ js_devdata->runpool_irq.slot_affinities[js] &= ~bit; ++ user_interaction.ctx = NULL; + -+ cores &= ~bit; ++#if 0 /* No data to copy back */ ++ if (0 != copy_to_user(argument, &user_interaction, sizeof(user_interaction))) { ++ MSG_ERR(("copy_to_user() failed in ump_ioctl_switch_hw_usage()\n")); ++ return -EFAULT; + } -+} -+ -+#if KBASE_TRACE_ENABLE -+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev) -+{ -+ struct kbasep_js_device_data *js_devdata; -+ int slot_nr; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ js_devdata = &kbdev->js_data; ++#endif + -+ for (slot_nr = 0; slot_nr < 3; ++slot_nr) -+ KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL, -+ NULL, 0u, slot_nr, -+ (u32) js_devdata->runpool_irq.slot_affinities[slot_nr]); ++ return 0; /* success */ +} -+#endif /* KBASE_TRACE_ENABLE */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h +diff --git a/drivers/gpu/arm/mali400/ump/linux/ump_ukk_wrappers.h b/drivers/gpu/arm/mali400/ump/linux/ump_ukk_wrappers.h new file mode 100644 -index 000000000..35d9781ae +index 000000000..5f8fc683c --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h -@@ -0,0 +1,129 @@ ++++ b/drivers/gpu/arm/mali400/ump/linux/ump_ukk_wrappers.h +@@ -0,0 +1,46 @@ +/* -+ * -+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ * Copyright (C) 2010, 2012-2014, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + ++/** ++ * @file ump_ukk_wrappers.h ++ * Defines the wrapper functions which turn Linux IOCTL calls into _ukk_ calls ++ */ + ++#ifndef __UMP_UKK_WRAPPERS_H__ ++#define __UMP_UKK_WRAPPERS_H__ + ++#include ++#include "ump_kernel_common.h" + ++#ifdef __cplusplus ++extern "C" { ++#endif + -+/* -+ * Affinity Manager internal APIs. -+ */ -+ -+#ifndef _KBASE_JS_AFFINITY_H_ -+#define _KBASE_JS_AFFINITY_H_ + -+/** -+ * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to -+ * submit a job to a particular job slot in the current status -+ * -+ * @kbdev: The kbase device structure of the device -+ * @js: Job slot number to check for allowance -+ * -+ * Will check if submitting to the given job slot is allowed in the current -+ * status. For example using job slot 2 while in soft-stoppable state and only -+ * having 1 coregroup is not allowed by the policy. This function should be -+ * called prior to submitting a job to a slot to make sure policy rules are not -+ * violated. -+ * -+ * The following locking conditions are made on the caller -+ * - it must hold hwaccess_lock -+ */ -+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js); + -+/** -+ * kbase_js_choose_affinity - Compute affinity for a given job. -+ * -+ * @affinity: Affinity bitmap computed -+ * @kbdev: The kbase device structure of the device -+ * @katom: Job chain of which affinity is going to be found -+ * @js: Slot the job chain is being submitted -+ * -+ * Currently assumes an all-on/all-off power management policy. -+ * Also assumes there is at least one core with tiler available. -+ * -+ * Returns true if a valid affinity was chosen, false if -+ * no cores were available. -+ */ -+bool kbase_js_choose_affinity(u64 * const affinity, -+ struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom, -+ int js); ++int ump_get_api_version_wrapper(u32 __user *argument, struct ump_session_data *session_data); ++int ump_release_wrapper(u32 __user *argument, struct ump_session_data *session_data); ++int ump_size_get_wrapper(u32 __user *argument, struct ump_session_data *session_data); ++int ump_msync_wrapper(u32 __user *argument, struct ump_session_data *session_data); ++int ump_cache_operations_control_wrapper(u32 __user *argument, struct ump_session_data *session_data); ++int ump_switch_hw_usage_wrapper(u32 __user *argument, struct ump_session_data *session_data); ++int ump_lock_wrapper(u32 __user *argument, struct ump_session_data *session_data); ++int ump_unlock_wrapper(u32 __user *argument, struct ump_session_data *session_data); + -+/** -+ * kbase_js_affinity_would_violate - Determine whether a proposed affinity on -+ * job slot @js would cause a violation of affinity restrictions. -+ * -+ * @kbdev: Kbase device structure -+ * @js: The job slot to test -+ * @affinity: The affinity mask to test -+ * -+ * The following locks must be held by the caller -+ * - hwaccess_lock -+ * -+ * Return: true if the affinity would violate the restrictions -+ */ -+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js, -+ u64 affinity); + -+/** -+ * kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by -+ * a slot -+ * -+ * @kbdev: Kbase device structure -+ * @js: The job slot retaining the cores -+ * @affinity: The cores to retain -+ * -+ * The following locks must be held by the caller -+ * - hwaccess_lock -+ */ -+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js, -+ u64 affinity); + -+/** -+ * kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used -+ * by a slot -+ * -+ * @kbdev: Kbase device structure -+ * @js: Job slot -+ * @affinity: Bit mask of core to be released -+ * -+ * Cores must be released as soon as a job is dequeued from a slot's 'submit -+ * slots', and before another job is submitted to those slots. Otherwise, the -+ * refcount could exceed the maximum number submittable to a slot, -+ * %BASE_JM_SUBMIT_SLOTS. -+ * -+ * The following locks must be held by the caller -+ * - hwaccess_lock -+ */ -+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js, -+ u64 affinity); + -+/** -+ * kbase_js_debug_log_current_affinities - log the current affinities -+ * -+ * @kbdev: Kbase device structure -+ * -+ * Output to the Trace log the current tracked affinities on all slots -+ */ -+#if KBASE_TRACE_ENABLE -+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev); -+#else /* KBASE_TRACE_ENABLE */ -+static inline void -+kbase_js_debug_log_current_affinities(struct kbase_device *kbdev) -+{ ++#ifdef __cplusplus +} -+#endif /* KBASE_TRACE_ENABLE */ ++#endif + -+#endif /* _KBASE_JS_AFFINITY_H_ */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c -new file mode 100644 -index 000000000..a8c1af23a ++ ++ ++#endif /* __UMP_UKK_WRAPPERS_H__ */ +diff --git a/drivers/gpu/arm/mali400/ump/readme.txt b/drivers/gpu/arm/mali400/ump/readme.txt +new file mode 100755 +index 000000000..c238cf0f2 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c -@@ -0,0 +1,356 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++++ b/drivers/gpu/arm/mali400/ump/readme.txt +@@ -0,0 +1,28 @@ ++Building the UMP Device Driver for Linux ++---------------------------------------- + ++Build the UMP Device Driver for Linux by running the following make command: + ++KDIR= CONFIG= BUILD= make + ++where ++ kdir_path: Path to your Linux Kernel directory ++ your_config: Name of the sub-folder to find the required config.h file ++ ("arch-" will be prepended) ++ build_option: debug or release. Debug is default. + -+/* -+ * Register-based HW access backend specific job scheduler APIs -+ */ ++The config.h contains following configuration parameters: + -+#include -+#include -+#include -+#include ++ARCH_UMP_BACKEND_DEFAULT ++ 0 specifies the dedicated memory allocator. ++ 1 specifies the OS memory allocator. ++ARCH_UMP_MEMORY_ADDRESS_DEFAULT ++ This is only required for the dedicated memory allocator, and specifies ++ the physical start address of the memory block reserved for UMP. ++ARCH_UMP_MEMORY_SIZE_DEFAULT ++ This specified the size of the memory block reserved for UMP, or the ++ maximum limit for allocations from the OS. + -+/* -+ * Define for when dumping is enabled. -+ * This should not be based on the instrumentation level as whether dumping is -+ * enabled for a particular level is down to the integrator. However this is -+ * being used for now as otherwise the cinstr headers would be needed. -+ */ -+#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL) ++The result will be a ump.ko file, which can be loaded into the Linux kernel ++by using the insmod command. The driver can also be built as a part of the ++kernel itself. +diff --git a/drivers/gpu/arm/mali400/umplock/Makefile b/drivers/gpu/arm/mali400/umplock/Makefile +new file mode 100644 +index 000000000..e5549a33f +--- /dev/null ++++ b/drivers/gpu/arm/mali400/umplock/Makefile +@@ -0,0 +1,69 @@ ++# ++# Copyright (C) 2012, 2016-2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++# as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained from Free Software ++# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++# + -+/* -+ * Hold the runpool_mutex for this -+ */ -+static inline bool timer_callback_should_run(struct kbase_device *kbdev) -+{ -+ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; -+ s8 nr_running_ctxs; ++# default to building for the host ++ARCH ?= $(shell uname -m) + -+ lockdep_assert_held(&kbdev->js_data.runpool_mutex); ++# linux build system integration + -+ /* Timer must stop if we are suspending */ -+ if (backend->suspend_timer) -+ return false; ++ifneq ($(KERNELRELEASE),) ++# Inside the kernel build system + -+ /* nr_contexts_pullable is updated with the runpool_mutex. However, the -+ * locking in the caller gives us a barrier that ensures -+ * nr_contexts_pullable is up-to-date for reading */ -+ nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable); ++EXTRA_CFLAGS += -I$(KBUILD_EXTMOD) + -+#ifdef CONFIG_MALI_DEBUG -+ if (kbdev->js_data.softstop_always) { -+ /* Debug support for allowing soft-stop on a single context */ -+ return true; -+ } -+#endif /* CONFIG_MALI_DEBUG */ ++SRC = umplock_driver.c + -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) { -+ /* Timeouts would have to be 4x longer (due to micro- -+ * architectural design) to support OpenCL conformance tests, so -+ * only run the timer when there's: -+ * - 2 or more CL contexts -+ * - 1 or more GLES contexts -+ * -+ * NOTE: We will treat a context that has both Compute and Non- -+ * Compute jobs will be treated as an OpenCL context (hence, we -+ * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE). -+ */ -+ { -+ s8 nr_compute_ctxs = -+ kbasep_js_ctx_attr_count_on_runpool(kbdev, -+ KBASEP_JS_CTX_ATTR_COMPUTE); -+ s8 nr_noncompute_ctxs = nr_running_ctxs - -+ nr_compute_ctxs; ++MODULE:=umplock.ko + -+ return (bool) (nr_compute_ctxs >= 2 || -+ nr_noncompute_ctxs > 0); -+ } -+ } else { -+ /* Run the timer callback whenever you have at least 1 context -+ */ -+ return (bool) (nr_running_ctxs > 0); -+ } -+} ++obj-m := $(MODULE:.ko=.o) ++$(MODULE:.ko=-y) := $(SRC:.c=.o) + -+static enum hrtimer_restart timer_callback(struct hrtimer *timer) -+{ -+ unsigned long flags; -+ struct kbase_device *kbdev; -+ struct kbasep_js_device_data *js_devdata; -+ struct kbase_backend_data *backend; -+ int s; -+ bool reset_needed = false; ++$(MODULE:.ko=-objs) := $(SRC:.c=.o) + -+ KBASE_DEBUG_ASSERT(timer != NULL); ++else ++# Outside the kernel build system ++# ++# + -+ backend = container_of(timer, struct kbase_backend_data, -+ scheduling_timer); -+ kbdev = container_of(backend, struct kbase_device, hwaccess.backend); -+ js_devdata = &kbdev->js_data; ++# Get any user defined KDIR- or maybe even a hardcoded KDIR ++-include KDIR_CONFIGURATION + -+ /* Loop through the slots */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) { -+ struct kbase_jd_atom *atom = NULL; ++# Define host system directory ++KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build + -+ if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) { -+ atom = kbase_gpu_inspect(kbdev, s, 0); -+ KBASE_DEBUG_ASSERT(atom != NULL); -+ } ++ifeq ($(ARCH), arm) ++ # when compiling for ARM we're cross compiling ++ export CROSS_COMPILE ?= arm-none-linux-gnueabi- ++ CONFIG ?= arm ++else ++ # Compiling for the host ++ CONFIG ?= $(shell uname -m) ++endif + -+ if (atom != NULL) { -+ /* The current version of the model doesn't support -+ * Soft-Stop */ -+ if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) { -+ u32 ticks = atom->ticks++; ++# default cpu to select ++CPU ?= $(shell uname -m) + -+#if !CINSTR_DUMPING_ENABLED -+ u32 soft_stop_ticks, hard_stop_ticks, -+ gpu_reset_ticks; -+ if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { -+ soft_stop_ticks = -+ js_devdata->soft_stop_ticks_cl; -+ hard_stop_ticks = -+ js_devdata->hard_stop_ticks_cl; -+ gpu_reset_ticks = -+ js_devdata->gpu_reset_ticks_cl; -+ } else { -+ soft_stop_ticks = -+ js_devdata->soft_stop_ticks; -+ hard_stop_ticks = -+ js_devdata->hard_stop_ticks_ss; -+ gpu_reset_ticks = -+ js_devdata->gpu_reset_ticks_ss; -+ } ++# look up KDIR based om CPU selection ++KDIR ?= $(KDIR-$(CPU)) + -+ /* If timeouts have been changed then ensure -+ * that atom tick count is not greater than the -+ * new soft_stop timeout. This ensures that -+ * atoms do not miss any of the timeouts due to -+ * races between this worker and the thread -+ * changing the timeouts. */ -+ if (backend->timeouts_updated && -+ ticks > soft_stop_ticks) -+ ticks = atom->ticks = soft_stop_ticks; ++ifeq ($(KDIR),) ++$(error No KDIR found for platform $(CPU)) ++endif + -+ /* Job is Soft-Stoppable */ -+ if (ticks == soft_stop_ticks) { -+ int disjoint_threshold = -+ KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD; -+ u32 softstop_flags = 0u; -+ /* Job has been scheduled for at least -+ * js_devdata->soft_stop_ticks ticks. -+ * Soft stop the slot so we can run -+ * other jobs. -+ */ -+ dev_dbg(kbdev->dev, "Soft-stop"); -+#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS -+ /* nr_user_contexts_running is updated -+ * with the runpool_mutex, but we can't -+ * take that here. -+ * -+ * However, if it's about to be -+ * increased then the new context can't -+ * run any jobs until they take the -+ * hwaccess_lock, so it's OK to observe -+ * the older value. -+ * -+ * Similarly, if it's about to be -+ * decreased, the last job from another -+ * context has already finished, so it's -+ * not too bad that we observe the older -+ * value and register a disjoint event -+ * when we try soft-stopping */ -+ if (js_devdata->nr_user_contexts_running -+ >= disjoint_threshold) -+ softstop_flags |= -+ JS_COMMAND_SW_CAUSES_DISJOINT; ++all: ++ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) + -+ kbase_job_slot_softstop_swflags(kbdev, -+ s, atom, softstop_flags); -+#endif -+ } else if (ticks == hard_stop_ticks) { -+ /* Job has been scheduled for at least -+ * js_devdata->hard_stop_ticks_ss ticks. -+ * It should have been soft-stopped by -+ * now. Hard stop the slot. -+ */ -+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS -+ int ms = -+ js_devdata->scheduling_period_ns -+ / 1000000u; -+ dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", -+ (unsigned long)ticks, -+ (unsigned long)ms); -+ kbase_job_slot_hardstop(atom->kctx, s, -+ atom); -+#endif -+ } else if (ticks == gpu_reset_ticks) { -+ /* Job has been scheduled for at least -+ * js_devdata->gpu_reset_ticks_ss ticks. -+ * It should have left the GPU by now. -+ * Signal that the GPU needs to be -+ * reset. -+ */ -+ reset_needed = true; -+ } -+#else /* !CINSTR_DUMPING_ENABLED */ -+ /* NOTE: During CINSTR_DUMPING_ENABLED, we use -+ * the alternate timeouts, which makes the hard- -+ * stop and GPU reset timeout much longer. We -+ * also ensure that we don't soft-stop at all. -+ */ -+ if (ticks == js_devdata->soft_stop_ticks) { -+ /* Job has been scheduled for at least -+ * js_devdata->soft_stop_ticks. We do -+ * not soft-stop during -+ * CINSTR_DUMPING_ENABLED, however. -+ */ -+ dev_dbg(kbdev->dev, "Soft-stop"); -+ } else if (ticks == -+ js_devdata->hard_stop_ticks_dumping) { -+ /* Job has been scheduled for at least -+ * js_devdata->hard_stop_ticks_dumping -+ * ticks. Hard stop the slot. -+ */ -+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS -+ int ms = -+ js_devdata->scheduling_period_ns -+ / 1000000u; -+ dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", -+ (unsigned long)ticks, -+ (unsigned long)ms); -+ kbase_job_slot_hardstop(atom->kctx, s, -+ atom); -+#endif -+ } else if (ticks == -+ js_devdata->gpu_reset_ticks_dumping) { -+ /* Job has been scheduled for at least -+ * js_devdata->gpu_reset_ticks_dumping -+ * ticks. It should have left the GPU by -+ * now. Signal that the GPU needs to be -+ * reset. -+ */ -+ reset_needed = true; -+ } -+#endif /* !CINSTR_DUMPING_ENABLED */ -+ } -+ } -+ } -+#if KBASE_GPU_RESET_EN -+ if (reset_needed) { -+ dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve."); ++kernelrelease: ++ $(MAKE) -C $(KDIR) kernelrelease + -+ if (kbase_prepare_to_reset_gpu_locked(kbdev)) -+ kbase_reset_gpu_locked(kbdev); -+ } -+#endif /* KBASE_GPU_RESET_EN */ -+ /* the timer is re-issued if there is contexts in the run-pool */ ++clean: ++ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean + -+ if (backend->timer_running) -+ hrtimer_start(&backend->scheduling_timer, -+ HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), -+ HRTIMER_MODE_REL); ++endif +diff --git a/drivers/gpu/arm/mali400/umplock/umplock_driver.c b/drivers/gpu/arm/mali400/umplock/umplock_driver.c +new file mode 100644 +index 000000000..173f4d9bb +--- /dev/null ++++ b/drivers/gpu/arm/mali400/umplock/umplock_driver.c +@@ -0,0 +1,618 @@ ++/* ++ * Copyright (C) 2012-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++ */ + -+ backend->timeouts_updated = false; ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "umplock_ioctl.h" ++#include + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++#define MAX_ITEMS 1024 ++#define MAX_PIDS 128 + -+ return HRTIMER_NORESTART; -+} ++typedef struct lock_cmd_priv { ++ uint32_t msg[128]; /*ioctl args*/ ++ u32 pid; /*process id*/ ++} _lock_cmd_priv; + -+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) -+{ -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; -+ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; -+ unsigned long flags; ++typedef struct lock_ref { ++ int ref_count; ++ u32 pid; ++ u32 down_count; ++} _lock_ref; + -+ lockdep_assert_held(&js_devdata->runpool_mutex); ++typedef struct umplock_item { ++ u32 secure_id; ++ u32 id_ref_count; ++ u32 owner; ++ _lock_access_usage usage; ++ _lock_ref references[MAX_PIDS]; ++ struct semaphore item_lock; ++} umplock_item; + -+ if (!timer_callback_should_run(kbdev)) { -+ /* Take spinlock to force synchronisation with timer */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ backend->timer_running = false; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ /* From now on, return value of timer_callback_should_run() will -+ * also cause the timer to not requeue itself. Its return value -+ * cannot change, because it depends on variables updated with -+ * the runpool_mutex held, which the caller of this must also -+ * hold */ -+ hrtimer_cancel(&backend->scheduling_timer); -+ } ++typedef struct umplock_device_private { ++ struct mutex item_list_lock; ++ atomic_t sessions; ++ umplock_item items[MAX_ITEMS]; ++ u32 pids[MAX_PIDS]; ++} umplock_device_private; + -+ if (timer_callback_should_run(kbdev) && !backend->timer_running) { -+ /* Take spinlock to force synchronisation with timer */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ backend->timer_running = true; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ hrtimer_start(&backend->scheduling_timer, -+ HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), -+ HRTIMER_MODE_REL); ++struct umplock_device { ++ struct cdev cdev; ++ struct class *umplock_class; ++}; + -+ KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, -+ 0u); ++static struct umplock_device umplock_device; ++static umplock_device_private device; ++static dev_t umplock_dev; ++static char umplock_dev_name[] = "umplock"; ++ ++int umplock_debug_level = 0; ++module_param(umplock_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */ ++MODULE_PARM_DESC(umplock_debug_level, "set umplock_debug_level to print debug messages"); ++ ++#define PDEBUG(level, fmt, args...) do { if ((level) <= umplock_debug_level) printk(KERN_DEBUG "umplock: " fmt, ##args); } while (0) ++#define PERROR(fmt, args...) do { printk(KERN_ERR "umplock: " fmt, ##args); } while (0) ++ ++int umplock_find_item(u32 secure_id) ++{ ++ int i; ++ for (i = 0; i < MAX_ITEMS; i++) { ++ if (device.items[i].secure_id == secure_id) { ++ return i; ++ } + } ++ ++ return -1; +} + -+int kbase_backend_timer_init(struct kbase_device *kbdev) ++static int umplock_find_item_by_pid(_lock_cmd_priv *lock_cmd, int *item_slot, int *ref_slot) +{ -+ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; ++ _lock_item_s *lock_item; ++ int i, j; + -+ hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, -+ HRTIMER_MODE_REL); -+ backend->scheduling_timer.function = timer_callback; ++ lock_item = (_lock_item_s *)&lock_cmd->msg; + -+ backend->timer_running = false; ++ i = umplock_find_item(lock_item->secure_id); + -+ return 0; ++ if (i < 0) { ++ return -1; ++ } ++ ++ for (j = 0; j < MAX_PIDS; j++) { ++ if (device.items[i].references[j].pid == lock_cmd->pid) { ++ *item_slot = i; ++ *ref_slot = j; ++ return 0; ++ } ++ } ++ return -1 ; +} + -+void kbase_backend_timer_term(struct kbase_device *kbdev) ++static int umplock_find_client_valid(u32 pid) +{ -+ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; ++ int i; + -+ hrtimer_cancel(&backend->scheduling_timer); ++ if (pid == 0) { ++ return -1; ++ } ++ ++ for (i = 0; i < MAX_PIDS; i++) { ++ if (device.pids[i] == pid) { ++ return i; ++ } ++ } ++ ++ return -1; +} + -+void kbase_backend_timer_suspend(struct kbase_device *kbdev) ++static int do_umplock_create_locked(_lock_cmd_priv *lock_cmd) +{ -+ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; ++ int i_index, ref_index; ++ int ret; ++ _lock_item_s *lock_item = (_lock_item_s *)&lock_cmd->msg; + -+ backend->suspend_timer = true; ++ i_index = ref_index = -1; + -+ kbase_backend_ctx_count_changed(kbdev); -+} ++ ret = umplock_find_client_valid(lock_cmd->pid); ++ if (ret < 0) { ++ /*lock request from an invalid client pid, do nothing*/ ++ return -EINVAL; ++ } + -+void kbase_backend_timer_resume(struct kbase_device *kbdev) -+{ -+ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; ++ ret = umplock_find_item_by_pid(lock_cmd, &i_index, &ref_index); ++ if (ret >= 0) { ++ } else if ((i_index = umplock_find_item(lock_item->secure_id)) >= 0) { ++ for (ref_index = 0; ref_index < MAX_PIDS; ref_index++) { ++ if (device.items[i_index].references[ref_index].pid == 0) { ++ break; ++ } ++ } ++ if (ref_index < MAX_PIDS) { ++ device.items[i_index].references[ref_index].pid = lock_cmd->pid; ++ device.items[i_index].references[ref_index].ref_count = 0; ++ device.items[i_index].references[ref_index].down_count = 0; ++ } else { ++ PERROR("whoops, item ran out of available reference slots\n"); ++ return -EINVAL; + -+ backend->suspend_timer = false; ++ } ++ } else { ++ i_index = umplock_find_item(0); + -+ kbase_backend_ctx_count_changed(kbdev); ++ if (i_index >= 0) { ++ device.items[i_index].secure_id = lock_item->secure_id; ++ device.items[i_index].id_ref_count = 0; ++ device.items[i_index].usage = lock_item->usage; ++ device.items[i_index].references[0].pid = lock_cmd->pid; ++ device.items[i_index].references[0].ref_count = 0; ++ device.items[i_index].references[0].down_count = 0; ++ sema_init(&device.items[i_index].item_lock, 1); ++ } else { ++ PERROR("whoops, ran out of available slots\n"); ++ return -EINVAL; ++ } ++ } ++ ++ return 0; +} ++/** IOCTLs **/ + -+void kbase_backend_timeouts_changed(struct kbase_device *kbdev) ++static int do_umplock_create(_lock_cmd_priv *lock_cmd) +{ -+ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; -+ -+ backend->timeouts_updated = true; ++ return 0; +} + -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h -new file mode 100644 -index 000000000..3f53779c6 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h -@@ -0,0 +1,69 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++static int do_umplock_process(_lock_cmd_priv *lock_cmd) ++{ ++ int ret, i_index, ref_index; ++ _lock_item_s *lock_item = (_lock_item_s *)&lock_cmd->msg; ++ ++ mutex_lock(&device.item_list_lock); + ++ if (0 == lock_item->secure_id) { ++ PERROR("IOCTL_UMPLOCK_PROCESS called with secure_id is 0, pid: %d\n", lock_cmd->pid); ++ mutex_unlock(&device.item_list_lock); ++ return -EINVAL; ++ } + ++ ret = do_umplock_create_locked(lock_cmd); ++ if (ret < 0) { ++ mutex_unlock(&device.item_list_lock); ++ return -EINVAL; ++ } + ++ ret = umplock_find_item_by_pid(lock_cmd, &i_index, &ref_index); ++ if (ret < 0) { ++ /*fail to find a item*/ ++ PERROR("IOCTL_UMPLOCK_PROCESS called with invalid parameter, pid: %d\n", lock_cmd->pid); ++ mutex_unlock(&device.item_list_lock); ++ return -EINVAL; ++ } ++ device.items[i_index].references[ref_index].ref_count++; ++ device.items[i_index].id_ref_count++; ++ PDEBUG(1, "try to lock, pid: %d, secure_id: 0x%x, ref_count: %d\n", lock_cmd->pid, lock_item->secure_id, device.items[i_index].references[ref_index].ref_count); + -+/* -+ * Register-based HW access backend specific job scheduler APIs -+ */ ++ if (lock_cmd->pid == device.items[i_index].owner) { ++ PDEBUG(1, "already own the lock, pid: %d, secure_id: 0x%x, ref_count: %d\n", lock_cmd->pid, lock_item->secure_id, device.items[i_index].references[ref_index].ref_count); ++ mutex_unlock(&device.item_list_lock); ++ return 0; ++ } + -+#ifndef _KBASE_JS_BACKEND_H_ -+#define _KBASE_JS_BACKEND_H_ ++ device.items[i_index].references[ref_index].down_count++; ++ mutex_unlock(&device.item_list_lock); ++ if (down_interruptible(&device.items[i_index].item_lock)) { ++ /*wait up without hold the umplock. restore previous state and return*/ ++ mutex_lock(&device.item_list_lock); ++ device.items[i_index].references[ref_index].ref_count--; ++ device.items[i_index].id_ref_count--; ++ device.items[i_index].references[ref_index].down_count--; ++ if (0 == device.items[i_index].references[ref_index].ref_count) { ++ device.items[i_index].references[ref_index].pid = 0; ++ if (0 == device.items[i_index].id_ref_count) { ++ PDEBUG(1, "release item, pid: %d, secure_id: 0x%x\n", lock_cmd->pid, lock_item->secure_id); ++ device.items[i_index].secure_id = 0; ++ } ++ } + -+/** -+ * kbase_backend_timer_init() - Initialise the JS scheduling timer -+ * @kbdev: Device pointer -+ * -+ * This function should be called at driver initialisation -+ * -+ * Return: 0 on success -+ */ -+int kbase_backend_timer_init(struct kbase_device *kbdev); ++ PERROR("failed lock, pid: %d, secure_id: 0x%x, ref_count: %d\n", lock_cmd->pid, lock_item->secure_id, device.items[i_index].references[ref_index].ref_count); + -+/** -+ * kbase_backend_timer_term() - Terminate the JS scheduling timer -+ * @kbdev: Device pointer -+ * -+ * This function should be called at driver termination -+ */ -+void kbase_backend_timer_term(struct kbase_device *kbdev); ++ mutex_unlock(&device.item_list_lock); ++ return -ERESTARTSYS; ++ } + -+/** -+ * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling -+ * timer -+ * @kbdev: Device pointer -+ * -+ * This function should be called on suspend, after the active count has reached -+ * zero. This is required as the timer may have been started on job submission -+ * to the job scheduler, but before jobs are submitted to the GPU. -+ * -+ * Caller must hold runpool_mutex. -+ */ -+void kbase_backend_timer_suspend(struct kbase_device *kbdev); ++ mutex_lock(&device.item_list_lock); ++ PDEBUG(1, "got lock, pid: %d, secure_id: 0x%x, ref_count: %d\n", lock_cmd->pid, lock_item->secure_id, device.items[i_index].references[ref_index].ref_count); ++ device.items[i_index].owner = lock_cmd->pid; ++ mutex_unlock(&device.item_list_lock); + -+/** -+ * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS -+ * scheduling timer -+ * @kbdev: Device pointer -+ * -+ * This function should be called on resume. Note that is is not guaranteed to -+ * re-start the timer, only evalute whether it should be re-started. -+ * -+ * Caller must hold runpool_mutex. -+ */ -+void kbase_backend_timer_resume(struct kbase_device *kbdev); ++ return 0; ++} + -+#endif /* _KBASE_JS_BACKEND_H_ */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c -new file mode 100644 -index 000000000..ba826184d ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c -@@ -0,0 +1,407 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++static int do_umplock_release(_lock_cmd_priv *lock_cmd) ++{ ++ int ret, i_index, ref_index, call_up; ++ _lock_item_s *lock_item = (_lock_item_s *)&lock_cmd->msg; + ++ mutex_lock(&device.item_list_lock); + -+/* #define ENABLE_DEBUG_LOG */ -+#include "../../platform/rk/custom_log.h" ++ if (0 == lock_item->secure_id) { ++ PERROR("IOCTL_UMPLOCK_RELEASE called with secure_id is 0, pid: %d\n", lock_cmd->pid); ++ mutex_unlock(&device.item_list_lock); ++ return -EINVAL; ++ } + -+#include ++ ret = umplock_find_client_valid(lock_cmd->pid); ++ if (ret < 0) { ++ /*lock request from an invalid client pid, do nothing*/ ++ mutex_unlock(&device.item_list_lock); ++ return -EPERM; ++ } + -+#include -+#include -+#include -+#include -+#include -+#include ++ i_index = ref_index = -1; + -+static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn, -+ u32 num_pages) -+{ -+ u64 region; ++ ret = umplock_find_item_by_pid(lock_cmd, &i_index, &ref_index); ++ if (ret < 0) { ++ /*fail to find item*/ ++ PERROR("IOCTL_UMPLOCK_RELEASE called with invalid parameter pid: %d, secid: 0x%x\n", lock_cmd->pid, lock_item->secure_id); ++ mutex_unlock(&device.item_list_lock); ++ return -EINVAL; ++ } + -+ /* can't lock a zero sized range */ -+ KBASE_DEBUG_ASSERT(num_pages); ++ /* if the lock is not owned by this process */ ++ if (lock_cmd->pid != device.items[i_index].owner) { ++ mutex_unlock(&device.item_list_lock); ++ return -EPERM; ++ } + -+ region = pfn << PAGE_SHIFT; -+ /* -+ * fls returns (given the ASSERT above): -+ * 1 .. 32 -+ * -+ * 10 + fls(num_pages) -+ * results in the range (11 .. 42) -+ */ ++ /* if the ref_count is 0, that means nothing to unlock, just return */ ++ if (0 == device.items[i_index].references[ref_index].ref_count) { ++ mutex_unlock(&device.item_list_lock); ++ return 0; ++ } + -+ /* gracefully handle num_pages being zero */ -+ if (0 == num_pages) { -+ region |= 11; -+ } else { -+ u8 region_width; ++ device.items[i_index].references[ref_index].ref_count--; ++ device.items[i_index].id_ref_count--; ++ PDEBUG(1, "unlock, pid: %d, secure_id: 0x%x, ref_count: %d\n", lock_cmd->pid, lock_item->secure_id, device.items[i_index].references[ref_index].ref_count); + -+ region_width = 10 + fls(num_pages); -+ if (num_pages != (1ul << (region_width - 11))) { -+ /* not pow2, so must go up to the next pow2 */ -+ region_width += 1; ++ call_up = 0; ++ if (device.items[i_index].references[ref_index].down_count > 1) { ++ call_up = 1; ++ device.items[i_index].references[ref_index].down_count--; ++ } ++ if (0 == device.items[i_index].references[ref_index].ref_count) { ++ device.items[i_index].references[ref_index].pid = 0; ++ if (0 == device.items[i_index].id_ref_count) { ++ PDEBUG(1, "release item, pid: %d, secure_id: 0x%x\n", lock_cmd->pid, lock_item->secure_id); ++ device.items[i_index].secure_id = 0; + } -+ KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE); -+ KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE); -+ region |= region_width; ++ device.items[i_index].owner = 0; ++ call_up = 1; ++ } ++ if (call_up) { ++ PDEBUG(1, "call up, pid: %d, secure_id: 0x%x\n", lock_cmd->pid, lock_item->secure_id); ++ up(&device.items[i_index].item_lock); + } ++ mutex_unlock(&device.item_list_lock); + -+ return region; ++ return 0; +} + -+static int wait_ready(struct kbase_device *kbdev, -+ unsigned int as_nr, struct kbase_context *kctx) ++static int do_umplock_zap(void) +{ -+ unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; -+ u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx); ++ int i; + -+ /* Wait for the MMU status to indicate there is no active command, in -+ * case one is pending. Do not log remaining register accesses. */ -+ while (--max_loops && (val & AS_STATUS_AS_ACTIVE)) -+ val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL); ++ PDEBUG(1, "ZAP ALL ENTRIES!\n"); + -+ if (max_loops == 0) { -+ dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n"); -+ return -1; ++ mutex_lock(&device.item_list_lock); ++ ++ for (i = 0; i < MAX_ITEMS; i++) { ++ device.items[i].secure_id = 0; ++ memset(&device.items[i].references, 0, sizeof(_lock_ref) * MAX_PIDS); ++ sema_init(&device.items[i].item_lock, 1); + } + -+ /* If waiting in loop was performed, log last read value. */ -+ if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops) -+ kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx); ++ for (i = 0; i < MAX_PIDS; i++) { ++ device.pids[i] = 0; ++ } ++ mutex_unlock(&device.item_list_lock); + + return 0; +} + -+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd, -+ struct kbase_context *kctx) ++static int do_umplock_dump(void) +{ -+ int status; ++ int i, j; + -+ /* write AS_COMMAND when MMU is ready to accept another command */ -+ status = wait_ready(kbdev, as_nr, kctx); -+ if (status == 0) -+ kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd, -+ kctx); ++ mutex_lock(&device.item_list_lock); ++ PERROR("dump all the items begin\n"); ++ for (i = 0; i < MAX_ITEMS; i++) { ++ for (j = 0; j < MAX_PIDS; j++) { ++ if (device.items[i].secure_id != 0 && device.items[i].references[j].pid != 0) { ++ PERROR("item[%d]->secure_id=0x%x, owner=%d\t reference[%d].ref_count=%d.pid=%d\n", ++ i, ++ device.items[i].secure_id, ++ device.items[i].owner, ++ j, ++ device.items[i].references[j].ref_count, ++ device.items[i].references[j].pid); ++ } ++ } ++ } ++ PERROR("dump all the items end\n"); ++ mutex_unlock(&device.item_list_lock); + -+ return status; ++ return 0; +} + -+static void validate_protected_page_fault(struct kbase_device *kbdev, -+ struct kbase_context *kctx) ++int do_umplock_client_add(_lock_cmd_priv *lock_cmd) +{ -+ /* GPUs which support (native) protected mode shall not report page -+ * fault addresses unless it has protected debug mode and protected -+ * debug mode is turned on */ -+ u32 protected_debug_mode = 0; -+ -+ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) -+ return; -+ -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { -+ protected_debug_mode = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_STATUS), -+ kctx) & GPU_DBGEN; ++ int i; ++ mutex_lock(&device.item_list_lock); ++ for (i = 0; i < MAX_PIDS; i++) { ++ if (device.pids[i] == lock_cmd->pid) { ++ mutex_unlock(&device.item_list_lock); ++ return 0; ++ } + } -+ -+ if (!protected_debug_mode) { -+ /* fault_addr should never be reported in protected mode. -+ * However, we just continue by printing an error message */ -+ dev_err(kbdev->dev, "Fault address reported in protected mode\n"); ++ for (i = 0; i < MAX_PIDS; i++) { ++ if (device.pids[i] == 0) { ++ device.pids[i] = lock_cmd->pid; ++ break; ++ } ++ } ++ mutex_unlock(&device.item_list_lock); ++ if (i == MAX_PIDS) { ++ PERROR("Oops, Run out of client slots\n "); ++ return -EINVAL; + } ++ return 0; +} + -+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) ++int do_umplock_client_delete(_lock_cmd_priv *lock_cmd) +{ -+ const int num_as = 16; -+ const int busfault_shift = MMU_PAGE_FAULT_FLAGS; -+ const int pf_shift = 0; -+ const unsigned long as_bit_mask = (1UL << num_as) - 1; -+ unsigned long flags; -+ u32 new_mask; -+ u32 tmp; ++ int p_index = -1, i_index = -1, ref_index = -1; ++ int ret; ++ _lock_item_s *lock_item; ++ lock_item = (_lock_item_s *)&lock_cmd->msg; + -+ /* bus faults */ -+ u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask; -+ /* page faults (note: Ignore ASes with both pf and bf) */ -+ u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits; ++ mutex_lock(&device.item_list_lock); ++ p_index = umplock_find_client_valid(lock_cmd->pid); ++ /*lock item pid is not valid.*/ ++ if (p_index < 0) { ++ mutex_unlock(&device.item_list_lock); ++ return 0; ++ } + -+ KBASE_DEBUG_ASSERT(NULL != kbdev); ++ /*walk through umplock item list and release reference attached to this client*/ ++ for (i_index = 0; i_index < MAX_ITEMS; i_index++) { ++ lock_item->secure_id = device.items[i_index].secure_id; + -+ /* remember current mask */ -+ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); -+ new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL); -+ /* mask interrupts for now */ -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL); -+ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); ++ /*find the item index and reference slot for the lock_item*/ ++ ret = umplock_find_item_by_pid(lock_cmd, &i_index, &ref_index); + -+ while (bf_bits | pf_bits) { -+ struct kbase_as *as; -+ int as_no; -+ struct kbase_context *kctx; ++ if (ret < 0) { ++ /*client has no reference on this umplock item, skip*/ ++ continue; ++ } ++ while (device.items[i_index].references[ref_index].ref_count) { ++ /*release references on this client*/ + -+ /* -+ * the while logic ensures we have a bit set, no need to check -+ * for not-found here -+ */ -+ as_no = ffs(bf_bits | pf_bits) - 1; -+ as = &kbdev->as[as_no]; ++ PDEBUG(1, "delete client, pid: %d, ref_count: %d\n", lock_cmd->pid, device.items[i_index].references[ref_index].ref_count); + -+ /* -+ * Refcount the kctx ASAP - it shouldn't disappear anyway, since -+ * Bus/Page faults _should_ only occur whilst jobs are running, -+ * and a job causing the Bus/Page fault shouldn't complete until -+ * the MMU is updated -+ */ -+ kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no); -+ if (!kctx) { -+ E("fail to lookup ctx, to break out."); -+ break; ++ mutex_unlock(&device.item_list_lock); ++ do_umplock_release(lock_cmd); ++ mutex_lock(&device.item_list_lock); + } ++ } + ++ /*remove the pid from umplock valid pid list*/ ++ device.pids[p_index] = 0; ++ mutex_unlock(&device.item_list_lock); + -+ /* find faulting address */ -+ as->fault_addr = kbase_reg_read(kbdev, -+ MMU_AS_REG(as_no, -+ AS_FAULTADDRESS_HI), -+ kctx); -+ as->fault_addr <<= 32; -+ as->fault_addr |= kbase_reg_read(kbdev, -+ MMU_AS_REG(as_no, -+ AS_FAULTADDRESS_LO), -+ kctx); ++ return 0; ++} + -+ /* Mark the fault protected or not */ -+ as->protected_mode = kbdev->protected_mode; ++static long umplock_driver_ioctl(struct file *f, unsigned int cmd, unsigned long arg) ++{ ++ int ret; ++ uint32_t size = _IOC_SIZE(cmd); ++ _lock_cmd_priv lock_cmd ; + -+ if (kbdev->protected_mode && as->fault_addr) -+ { -+ /* check if address reporting is allowed */ -+ validate_protected_page_fault(kbdev, kctx); -+ } ++ if (_IOC_TYPE(cmd) != LOCK_IOCTL_GROUP) { ++ return -ENOTTY; ++ } + -+ /* report the fault to debugfs */ -+ kbase_as_fault_debugfs_new(kbdev, as_no); ++ if (_IOC_NR(cmd) >= LOCK_IOCTL_MAX_CMDS) { ++ return -ENOTTY; ++ } + -+ /* record the fault status */ -+ as->fault_status = kbase_reg_read(kbdev, -+ MMU_AS_REG(as_no, -+ AS_FAULTSTATUS), -+ kctx); ++ switch (cmd) { ++ case LOCK_IOCTL_CREATE: ++ if (size != sizeof(_lock_item_s)) { ++ return -ENOTTY; ++ } + -+ /* find the fault type */ -+ as->fault_type = (bf_bits & (1 << as_no)) ? -+ KBASE_MMU_FAULT_TYPE_BUS : -+ KBASE_MMU_FAULT_TYPE_PAGE; ++ if (copy_from_user(&lock_cmd.msg, (void __user *)arg, size)) { ++ return -EFAULT; ++ } ++ lock_cmd.pid = (u32)current->tgid; ++ ret = do_umplock_create(&lock_cmd); ++ if (ret) { ++ return ret; ++ } ++ return 0; + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { -+ as->fault_extra_addr = kbase_reg_read(kbdev, -+ MMU_AS_REG(as_no, AS_FAULTEXTRA_HI), -+ kctx); -+ as->fault_extra_addr <<= 32; -+ as->fault_extra_addr |= kbase_reg_read(kbdev, -+ MMU_AS_REG(as_no, AS_FAULTEXTRA_LO), -+ kctx); ++ case LOCK_IOCTL_PROCESS: ++ if (size != sizeof(_lock_item_s)) { ++ return -ENOTTY; + } + -+ if (kbase_as_has_bus_fault(as)) { -+ /* Mark bus fault as handled. -+ * Note that a bus fault is processed first in case -+ * where both a bus fault and page fault occur. -+ */ -+ bf_bits &= ~(1UL << as_no); ++ if (copy_from_user(&lock_cmd.msg, (void __user *)arg, size)) { ++ return -EFAULT; ++ } ++ lock_cmd.pid = (u32)current->tgid; ++ return do_umplock_process(&lock_cmd); + -+ /* remove the queued BF (and PF) from the mask */ -+ new_mask &= ~(MMU_BUS_ERROR(as_no) | -+ MMU_PAGE_FAULT(as_no)); -+ } else { -+ /* Mark page fault as handled */ -+ pf_bits &= ~(1UL << as_no); ++ case LOCK_IOCTL_RELEASE: ++ if (size != sizeof(_lock_item_s)) { ++ return -ENOTTY; ++ } + -+ /* remove the queued PF from the mask */ -+ new_mask &= ~MMU_PAGE_FAULT(as_no); ++ if (copy_from_user(&lock_cmd.msg, (void __user *)arg, size)) { ++ return -EFAULT; + } ++ lock_cmd.pid = (u32)current->tgid; ++ ret = do_umplock_release(&lock_cmd); ++ if (ret) { ++ return ret; ++ } ++ return 0; + -+ /* Process the interrupt for this address space */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_mmu_interrupt_process(kbdev, kctx, as); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ case LOCK_IOCTL_ZAP: ++ do_umplock_zap(); ++ return 0; ++ ++ case LOCK_IOCTL_DUMP: ++ do_umplock_dump(); ++ return 0; + } + -+ /* reenable interrupts */ -+ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); -+ tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL); -+ new_mask |= tmp; -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL); -+ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); ++ return -ENOIOCTLCMD; +} + -+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as, -+ struct kbase_context *kctx) ++static int umplock_driver_open(struct inode *inode, struct file *filp) +{ -+ struct kbase_mmu_setup *current_setup = &as->current_setup; -+ u32 transcfg = 0; -+ -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { -+ transcfg = current_setup->transcfg & 0xFFFFFFFFUL; ++ _lock_cmd_priv lock_cmd; + -+ /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */ -+ /* Clear PTW_MEMATTR bits */ -+ transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; -+ /* Enable correct PTW_MEMATTR bits */ -+ transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; ++ atomic_inc(&device.sessions); ++ PDEBUG(1, "OPEN SESSION (%i references)\n", atomic_read(&device.sessions)); + -+ if (kbdev->system_coherency == COHERENCY_ACE) { -+ /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */ -+ /* Clear PTW_SH bits */ -+ transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); -+ /* Enable correct PTW_SH bits */ -+ transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); -+ } ++ lock_cmd.pid = (u32)current->tgid; ++ do_umplock_client_add(&lock_cmd); + -+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), -+ transcfg, kctx); -+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), -+ (current_setup->transcfg >> 32) & 0xFFFFFFFFUL, -+ kctx); -+ } else { -+ if (kbdev->system_coherency == COHERENCY_ACE) -+ current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER; -+ } ++ return 0; ++} + -+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), -+ current_setup->transtab & 0xFFFFFFFFUL, kctx); -+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI), -+ (current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx); ++static int umplock_driver_release(struct inode *inode, struct file *filp) ++{ ++ int sessions = 0; ++ _lock_cmd_priv lock_cmd; + -+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO), -+ current_setup->memattr & 0xFFFFFFFFUL, kctx); -+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI), -+ (current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx); ++ lock_cmd.pid = (u32)current->tgid; ++ do_umplock_client_delete(&lock_cmd); + -+ KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as, -+ current_setup->transtab, -+ current_setup->memattr, -+ transcfg); ++ mutex_lock(&device.item_list_lock); ++ atomic_dec(&device.sessions); ++ sessions = atomic_read(&device.sessions); ++ PDEBUG(1, "CLOSE SESSION (%i references)\n", sessions); ++ mutex_unlock(&device.item_list_lock); ++ if (sessions == 0) { ++ do_umplock_zap(); ++ } + -+ write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx); ++ return 0; +} + -+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, -+ struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op, -+ unsigned int handling_irq) -+{ -+ int ret; -+ -+ lockdep_assert_held(&kbdev->mmu_hw_mutex); ++static struct file_operations umplock_fops = { ++ .owner = THIS_MODULE, ++ .open = umplock_driver_open, ++ .release = umplock_driver_release, ++ .unlocked_ioctl = umplock_driver_ioctl, ++}; + -+ if (op == AS_COMMAND_UNLOCK) { -+ /* Unlock doesn't require a lock first */ -+ ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx); -+ } else { -+ u64 lock_addr = lock_region(kbdev, vpfn, nr); ++int umplock_device_initialize(void) ++{ ++ int err; + -+ /* Lock the region that needs to be updated */ -+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO), -+ lock_addr & 0xFFFFFFFFUL, kctx); -+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI), -+ (lock_addr >> 32) & 0xFFFFFFFFUL, kctx); -+ write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx); ++ err = alloc_chrdev_region(&umplock_dev, 0, 1, umplock_dev_name); + -+ /* Run the MMU operation */ -+ write_cmd(kbdev, as->number, op, kctx); ++ if (0 == err) { ++ memset(&umplock_device, 0, sizeof(umplock_device)); ++ cdev_init(&umplock_device.cdev, &umplock_fops); ++ umplock_device.cdev.owner = THIS_MODULE; ++ umplock_device.cdev.ops = &umplock_fops; + -+ /* Wait for the flush to complete */ -+ ret = wait_ready(kbdev, as->number, kctx); ++ err = cdev_add(&umplock_device.cdev, umplock_dev, 1); ++ if (0 == err) { ++ umplock_device.umplock_class = class_create(THIS_MODULE, umplock_dev_name); ++ if (IS_ERR(umplock_device.umplock_class)) { ++ err = PTR_ERR(umplock_device.umplock_class); ++ } else { ++ struct device *mdev; ++ mdev = device_create(umplock_device.umplock_class, NULL, umplock_dev, NULL, umplock_dev_name); ++ if (!IS_ERR(mdev)) { ++ return 0; /* all ok */ ++ } + -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) { -+ /* Issue an UNLOCK command to ensure that valid page -+ tables are re-read by the GPU after an update. -+ Note that, the FLUSH command should perform all the -+ actions necessary, however the bus logs show that if -+ multiple page faults occur within an 8 page region -+ the MMU does not always re-read the updated page -+ table entries for later faults or is only partially -+ read, it subsequently raises the page fault IRQ for -+ the same addresses, the unlock ensures that the MMU -+ cache is flushed, so updates can be re-read. As the -+ region is now unlocked we need to issue 2 UNLOCK -+ commands in order to flush the MMU/uTLB, -+ see PRLAM-8812. -+ */ -+ write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx); -+ write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx); ++ err = PTR_ERR(mdev); ++ class_destroy(umplock_device.umplock_class); ++ } ++ cdev_del(&umplock_device.cdev); + } ++ ++ unregister_chrdev_region(umplock_dev, 1); ++ } else { ++ PERROR("alloc chardev region failed\n"); + } + -+ return ret; ++ return err; +} + -+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, -+ struct kbase_context *kctx, enum kbase_mmu_fault_type type) ++void umplock_device_terminate(void) +{ -+ unsigned long flags; -+ u32 pf_bf_mask; ++ device_destroy(umplock_device.umplock_class, umplock_dev); ++ class_destroy(umplock_device.umplock_class); + -+ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); ++ cdev_del(&umplock_device.cdev); ++ unregister_chrdev_region(umplock_dev, 1); ++} + -+ /* -+ * A reset is in-flight and we're flushing the IRQ + bottom half -+ * so don't update anything as it could race with the reset code. -+ */ -+ if (kbdev->irq_reset_flush) -+ goto unlock; ++static int __init umplock_initialize_module(void) ++{ ++ PDEBUG(1, "Inserting UMP lock device driver. Compiled: %s, time: %s\n", __DATE__, __TIME__); + -+ /* Clear the page (and bus fault IRQ as well in case one occurred) */ -+ pf_bf_mask = MMU_PAGE_FAULT(as->number); -+ if (type == KBASE_MMU_FAULT_TYPE_BUS || -+ type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) -+ pf_bf_mask |= MMU_BUS_ERROR(as->number); ++ mutex_init(&device.item_list_lock); ++ if (umplock_device_initialize() != 0) { ++ PERROR("UMP lock device driver init failed\n"); ++ return -ENOTTY; ++ } ++ memset(&device.items, 0, sizeof(umplock_item) * MAX_ITEMS); ++ memset(&device.pids, 0, sizeof(u32) * MAX_PIDS); ++ atomic_set(&device.sessions, 0); + -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx); ++ PDEBUG(1, "UMP lock device driver loaded\n"); + -+unlock: -+ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); ++ return 0; +} + -+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, -+ struct kbase_context *kctx, enum kbase_mmu_fault_type type) ++static void __exit umplock_cleanup_module(void) +{ -+ unsigned long flags; -+ u32 irq_mask; -+ -+ /* Enable the page fault IRQ (and bus fault IRQ as well in case one -+ * occurred) */ -+ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); ++ PDEBUG(1, "unloading UMP lock module\n"); + -+ /* -+ * A reset is in-flight and we're flushing the IRQ + bottom half -+ * so don't update anything as it could race with the reset code. -+ */ -+ if (kbdev->irq_reset_flush) -+ goto unlock; ++ memset(&device.items, 0, sizeof(umplock_item) * MAX_ITEMS); ++ memset(&device.pids, 0, sizeof(u32) * MAX_PIDS); ++ umplock_device_terminate(); ++ mutex_destroy(&device.item_list_lock); + -+ irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) | -+ MMU_PAGE_FAULT(as->number); ++ PDEBUG(1, "UMP lock module unloaded\n"); ++} + -+ if (type == KBASE_MMU_FAULT_TYPE_BUS || -+ type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) -+ irq_mask |= MMU_BUS_ERROR(as->number); ++module_init(umplock_initialize_module); ++module_exit(umplock_cleanup_module); + -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx); + -+unlock: -+ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); -+} -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("ARM Ltd."); ++MODULE_DESCRIPTION("ARM UMP locker"); +diff --git a/drivers/gpu/arm/mali400/umplock/umplock_ioctl.h b/drivers/gpu/arm/mali400/umplock/umplock_ioctl.h new file mode 100644 -index 000000000..c02253c6a +index 000000000..8afdaad70 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h -@@ -0,0 +1,42 @@ ++++ b/drivers/gpu/arm/mali400/umplock/umplock_ioctl.h +@@ -0,0 +1,66 @@ +/* -+ * -+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ * Copyright (C) 2012-2013, 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained from Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + ++#ifndef __UMPLOCK_IOCTL_H__ ++#define __UMPLOCK_IOCTL_H__ + ++#ifdef __cplusplus ++extern "C" { ++#endif + -+/* -+ * Interface file for the direct implementation for MMU hardware access -+ * -+ * Direct MMU hardware interface -+ * -+ * This module provides the interface(s) that are required by the direct -+ * register access implementation of the MMU hardware interface -+ */ ++#include ++#include + -+#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_ -+#define _MALI_KBASE_MMU_HW_DIRECT_H_ ++#ifndef __user ++#define __user ++#endif + -+#include + +/** -+ * kbase_mmu_interrupt - Process an MMU interrupt. -+ * -+ * Process the MMU interrupt that was reported by the &kbase_device. -+ * -+ * @kbdev: kbase context to clear the fault from. -+ * @irq_stat: Value of the MMU_IRQ_STATUS register ++ * @file umplock_ioctl.h ++ * This file describes the interface needed to use the Linux device driver. ++ * The interface is used by the userpace Mali DDK. + */ -+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); + -+#endif /* _MALI_KBASE_MMU_HW_DIRECT_H_ */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c -new file mode 100644 -index 000000000..0614348e9 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c -@@ -0,0 +1,63 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++typedef enum { ++ _LOCK_ACCESS_RENDERABLE = 1, ++ _LOCK_ACCESS_TEXTURE, ++ _LOCK_ACCESS_CPU_WRITE, ++ _LOCK_ACCESS_CPU_READ, ++} _lock_access_usage; + ++typedef struct _lock_item_s { ++ unsigned int secure_id; ++ _lock_access_usage usage; ++} _lock_item_s; + + ++#define LOCK_IOCTL_GROUP 0x91 + ++#define _LOCK_IOCTL_CREATE_CMD 0 /* create kernel lock item */ ++#define _LOCK_IOCTL_PROCESS_CMD 1 /* process kernel lock item */ ++#define _LOCK_IOCTL_RELEASE_CMD 2 /* release kernel lock item */ ++#define _LOCK_IOCTL_ZAP_CMD 3 /* clean up all kernel lock items */ ++#define _LOCK_IOCTL_DUMP_CMD 4 /* dump all the items */ + -+/* -+ * "Always on" power management policy -+ */ ++#define LOCK_IOCTL_MAX_CMDS 5 + -+#include -+#include ++#define LOCK_IOCTL_CREATE _IOW( LOCK_IOCTL_GROUP, _LOCK_IOCTL_CREATE_CMD, _lock_item_s ) ++#define LOCK_IOCTL_PROCESS _IOW( LOCK_IOCTL_GROUP, _LOCK_IOCTL_PROCESS_CMD, _lock_item_s ) ++#define LOCK_IOCTL_RELEASE _IOW( LOCK_IOCTL_GROUP, _LOCK_IOCTL_RELEASE_CMD, _lock_item_s ) ++#define LOCK_IOCTL_ZAP _IO ( LOCK_IOCTL_GROUP, _LOCK_IOCTL_ZAP_CMD ) ++#define LOCK_IOCTL_DUMP _IO ( LOCK_IOCTL_GROUP, _LOCK_IOCTL_DUMP_CMD ) + -+static u64 always_on_get_core_mask(struct kbase_device *kbdev) -+{ -+ return kbdev->gpu_props.props.raw_props.shader_present; ++#ifdef __cplusplus +} ++#endif + -+static bool always_on_get_core_active(struct kbase_device *kbdev) -+{ -+ return true; -+} ++#endif /* __UMPLOCK_IOCTL_H__ */ + -+static void always_on_init(struct kbase_device *kbdev) -+{ -+ CSTD_UNUSED(kbdev); -+} +diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild +new file mode 100755 +index 000000000..b2c2bbcda +--- /dev/null ++++ b/drivers/gpu/arm/midgard/Kbuild +@@ -0,0 +1,221 @@ ++# ++# (C) COPYRIGHT 2012-2016, 2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+static void always_on_term(struct kbase_device *kbdev) -+{ -+ CSTD_UNUSED(kbdev); -+} ++KBUILD_CFLAGS += -include rename.h + -+/* -+ * The struct kbase_pm_policy structure for the demand power policy. -+ * -+ * This is the static structure that defines the demand power policy's callback -+ * and name. -+ */ -+const struct kbase_pm_policy kbase_pm_always_on_policy_ops = { -+ "always_on", /* name */ -+ always_on_init, /* init */ -+ always_on_term, /* term */ -+ always_on_get_core_mask, /* get_core_mask */ -+ always_on_get_core_active, /* get_core_active */ -+ 0u, /* flags */ -+ KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */ -+}; ++# Driver version string which is returned to userspace via an ioctl ++MALI_RELEASE_NAME ?= "r18p0-01rel0" + -+KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops); -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h -new file mode 100644 -index 000000000..f9d244b01 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h -@@ -0,0 +1,77 @@ ++# Paths required for build + -+/* -+ * -+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++# make $(src) as absolute path if it isn't already, by prefixing $(srctree) ++src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) ++KBASE_PATH = $(src) ++KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy ++UMP_PATH = $(src)/../../../base + ++ifeq ($(CONFIG_MALI_ERROR_INJECTION),y) ++MALI_ERROR_INJECT_ON = 1 ++endif + ++# Set up defaults if not defined by build system ++MALI_CUSTOMER_RELEASE ?= 1 ++MALI_UNIT_TEST ?= 0 ++MALI_KERNEL_TEST_API ?= 0 ++MALI_ERROR_INJECT_ON ?= 0 ++MALI_MOCK_TEST ?= 0 ++MALI_COVERAGE ?= 0 ++MALI_INSTRUMENTATION_LEVEL ?= 0 ++# This workaround is for what seems to be a compiler bug we observed in ++# GCC 4.7 on AOSP 4.3. The bug caused an intermittent failure compiling ++# the "_Pragma" syntax, where an error message is returned: ++# ++# "internal compiler error: unspellable token PRAGMA" ++# ++# This regression has thus far only been seen on the GCC 4.7 compiler bundled ++# with AOSP 4.3.0. So this makefile, intended for in-tree kernel builds ++# which are not known to be used with AOSP, is hardcoded to disable the ++# workaround, i.e. set the define to 0. ++MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0 + ++# Set up our defines, which will be passed to gcc ++DEFINES = \ ++ -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ ++ -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \ ++ -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ ++ -DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \ ++ -DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \ ++ -DMALI_COVERAGE=$(MALI_COVERAGE) \ ++ -DMALI_INSTRUMENTATION_LEVEL=$(MALI_INSTRUMENTATION_LEVEL) \ ++ -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \ ++ -DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598) + ++ifeq ($(KBUILD_EXTMOD),) ++# in-tree ++DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME) ++else ++# out-of-tree ++DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME) ++endif + -+/* -+ * "Always on" power management policy -+ */ ++DEFINES += -I$(srctree)/drivers/staging/android + -+#ifndef MALI_KBASE_PM_ALWAYS_ON_H -+#define MALI_KBASE_PM_ALWAYS_ON_H ++# Use our defines when compiling ++ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux ++subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux + -+/** -+ * DOC: -+ * The "Always on" power management policy has the following -+ * characteristics: -+ * -+ * - When KBase indicates that the GPU will be powered up, but we don't yet -+ * know which Job Chains are to be run: -+ * All Shader Cores are powered up, regardless of whether or not they will -+ * be needed later. -+ * -+ * - When KBase indicates that a set of Shader Cores are needed to submit the -+ * currently queued Job Chains: -+ * All Shader Cores are kept powered, regardless of whether or not they will -+ * be needed -+ * -+ * - When KBase indicates that the GPU need not be powered: -+ * The Shader Cores are kept powered, regardless of whether or not they will -+ * be needed. The GPU itself is also kept powered, even though it is not -+ * needed. -+ * -+ * This policy is automatically overridden during system suspend: the desired -+ * core state is ignored, and the cores are forced off regardless of what the -+ * policy requests. After resuming from suspend, new changes to the desired -+ * core state made by the policy are honored. -+ * -+ * Note: -+ * -+ * - KBase indicates the GPU will be powered up when it has a User Process that -+ * has just started to submit Job Chains. -+ * -+ * - KBase indicates the GPU need not be powered when all the Job Chains from -+ * User Processes have finished, and it is waiting for a User Process to -+ * submit some more Job Chains. -+ */ ++SRC := \ ++ mali_kbase_device.c \ ++ mali_kbase_cache_policy.c \ ++ mali_kbase_mem.c \ ++ mali_kbase_mmu.c \ ++ mali_kbase_ctx_sched.c \ ++ mali_kbase_jd.c \ ++ mali_kbase_jd_debugfs.c \ ++ mali_kbase_jm.c \ ++ mali_kbase_gpuprops.c \ ++ mali_kbase_js.c \ ++ mali_kbase_js_ctx_attr.c \ ++ mali_kbase_event.c \ ++ mali_kbase_context.c \ ++ mali_kbase_pm.c \ ++ mali_kbase_config.c \ ++ mali_kbase_vinstr.c \ ++ mali_kbase_softjobs.c \ ++ mali_kbase_10969_workaround.c \ ++ mali_kbase_hw.c \ ++ mali_kbase_utility.c \ ++ mali_kbase_debug.c \ ++ mali_kbase_trace_timeline.c \ ++ mali_kbase_gpu_memory_debugfs.c \ ++ mali_kbase_mem_linux.c \ ++ mali_kbase_core_linux.c \ ++ mali_kbase_replay.c \ ++ mali_kbase_mem_profile_debugfs.c \ ++ mali_kbase_mmu_mode_lpae.c \ ++ mali_kbase_mmu_mode_aarch64.c \ ++ mali_kbase_disjoint_events.c \ ++ mali_kbase_gator_api.c \ ++ mali_kbase_debug_mem_view.c \ ++ mali_kbase_debug_job_fault.c \ ++ mali_kbase_smc.c \ ++ mali_kbase_mem_pool.c \ ++ mali_kbase_mem_pool_debugfs.c \ ++ mali_kbase_tlstream.c \ ++ mali_kbase_strings.c \ ++ mali_kbase_as_fault_debugfs.c \ ++ mali_kbase_regs_history_debugfs.c + -+/** -+ * struct kbasep_pm_policy_always_on - Private struct for policy instance data -+ * @dummy: unused dummy variable -+ * -+ * This contains data that is private to the particular power policy that is -+ * active. -+ */ -+struct kbasep_pm_policy_always_on { -+ int dummy; -+}; + -+extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops; + -+#endif /* MALI_KBASE_PM_ALWAYS_ON_H */ + -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c -new file mode 100644 -index 000000000..146fd48ba ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c -@@ -0,0 +1,482 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ifeq ($(MALI_UNIT_TEST),1) ++ SRC += mali_kbase_tlstream_test.c ++endif + ++ifeq ($(MALI_CUSTOMER_RELEASE),0) ++ SRC += mali_kbase_regs_dump_debugfs.c ++endif + + ++ccflags-y += -I$(KBASE_PATH) + -+/* -+ * GPU backend implementation of base kernel power management APIs -+ */ ++ifeq ($(CONFIG_MALI_PLATFORM_FAKE),y) ++ SRC += mali_kbase_platform_fake.c + -+#include -+#include -+#include -+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE -+#include -+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */ ++ ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS),y) ++ SRC += platform/vexpress/mali_kbase_config_vexpress.c \ ++ platform/vexpress/mali_kbase_cpu_vexpress.c ++ ccflags-y += -I$(src)/platform/vexpress ++ endif + -+#include -+#include -+#include -+#include -+#include -+#include ++ ifeq ($(CONFIG_MALI_PLATFORM_RTSM_VE),y) ++ SRC += platform/rtsm_ve/mali_kbase_config_vexpress.c ++ ccflags-y += -I$(src)/platform/rtsm_ve ++ endif + -+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data); ++ ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_1XV7_A57),y) ++ SRC += platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c ++ ccflags-y += -I$(src)/platform/vexpress_1xv7_a57 ++ endif + -+void kbase_pm_register_access_enable(struct kbase_device *kbdev) -+{ -+ struct kbase_pm_callback_conf *callbacks; ++ ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_6XVIRTEX7_10MHZ),y) ++ SRC += platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c \ ++ platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c ++ ccflags-y += -I$(src)/platform/vexpress_6xvirtex7_10mhz ++ endif ++endif # CONFIG_MALI_PLATFORM_FAKE=y + -+ callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; ++# Tell the Linux build system from which .o file to create the kernel module ++obj-$(CONFIG_MALI_MIDGARD) += midgard_kbase.o + -+ if (callbacks) -+ callbacks->power_on_callback(kbdev); ++# Tell the Linux build system to enable building of our .c files ++midgard_kbase-y := $(SRC:.c=.o) + -+ kbdev->pm.backend.gpu_powered = true; -+} ++ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y) ++ # Kconfig passes in the name with quotes for in-tree builds - remove them. ++ platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)) ++ MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name) ++ ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR) ++ include $(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)/Kbuild ++endif + -+void kbase_pm_register_access_disable(struct kbase_device *kbdev) -+{ -+ struct kbase_pm_callback_conf *callbacks; ++ifeq ($(CONFIG_MALI_DEVFREQ),y) ++ ifeq ($(CONFIG_DEVFREQ_THERMAL),y) ++ include $(src)/ipa/Kbuild ++ endif ++endif + -+ callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; ++midgard_kbase-$(CONFIG_MALI_DMA_FENCE) += \ ++ mali_kbase_dma_fence.o \ ++ mali_kbase_fence.o ++midgard_kbase-$(CONFIG_SYNC) += \ ++ mali_kbase_sync_android.o \ ++ mali_kbase_sync_common.o ++midgard_kbase-$(CONFIG_SYNC_FILE) += \ ++ mali_kbase_sync_file.o \ ++ mali_kbase_sync_common.o \ ++ mali_kbase_fence.o + -+ if (callbacks) -+ callbacks->power_off_callback(kbdev); ++MALI_BACKEND_PATH ?= backend ++CONFIG_MALI_BACKEND ?= gpu ++CONFIG_MALI_BACKEND_REAL ?= $(CONFIG_MALI_BACKEND) + -+ kbdev->pm.backend.gpu_powered = false; -+} ++ifeq ($(MALI_MOCK_TEST),1) ++ifeq ($(CONFIG_MALI_BACKEND_REAL),gpu) ++# Test functionality ++midgard_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o ++endif ++endif + -+int kbase_hwaccess_pm_init(struct kbase_device *kbdev) -+{ -+ int ret = 0; -+ struct kbase_pm_callback_conf *callbacks; ++include $(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)/Kbuild ++midgard_kbase-y += $(BACKEND:.c=.o) + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ mutex_init(&kbdev->pm.lock); ++ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL) ++subdir-ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL) + -+ kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait", -+ WQ_HIGHPRI | WQ_UNBOUND, 1); -+ if (!kbdev->pm.backend.gpu_poweroff_wait_wq) -+ return -ENOMEM; ++# Default to devicetree platform if neither a fake platform or a thirdparty ++# platform is configured. ++ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY)$(CONFIG_MALI_PLATFORM_FAKE),) ++CONFIG_MALI_PLATFORM_DEVICETREE := y ++endif + -+ INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, -+ kbase_pm_gpu_poweroff_wait_wq); ++midgard_kbase-$(CONFIG_MALI_PLATFORM_DEVICETREE) += \ ++ platform/devicetree/mali_kbase_runtime_pm.o \ ++ platform/devicetree/mali_kbase_config_devicetree.o ++ccflags-$(CONFIG_MALI_PLATFORM_DEVICETREE) += -I$(src)/platform/devicetree + -+ kbdev->pm.backend.gpu_powered = false; -+ kbdev->pm.suspending = false; -+#ifdef CONFIG_MALI_DEBUG -+ kbdev->pm.backend.driver_ready_for_irqs = false; -+#endif /* CONFIG_MALI_DEBUG */ -+ kbdev->pm.backend.gpu_in_desired_state = true; -+ init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait); ++# For kutf and mali_kutf_irq_latency_test ++obj-$(CONFIG_MALI_KUTF) += tests/ +diff --git a/drivers/gpu/arm/midgard/Kconfig b/drivers/gpu/arm/midgard/Kconfig +new file mode 100644 +index 000000000..043bfc002 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/Kconfig +@@ -0,0 +1,249 @@ ++# ++# (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+ callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; -+ if (callbacks) { -+ kbdev->pm.backend.callback_power_on = -+ callbacks->power_on_callback; -+ kbdev->pm.backend.callback_power_off = -+ callbacks->power_off_callback; -+ kbdev->pm.backend.callback_power_suspend = -+ callbacks->power_suspend_callback; -+ kbdev->pm.backend.callback_power_resume = -+ callbacks->power_resume_callback; -+ kbdev->pm.callback_power_runtime_init = -+ callbacks->power_runtime_init_callback; -+ kbdev->pm.callback_power_runtime_term = -+ callbacks->power_runtime_term_callback; -+ kbdev->pm.backend.callback_power_runtime_on = -+ callbacks->power_runtime_on_callback; -+ kbdev->pm.backend.callback_power_runtime_off = -+ callbacks->power_runtime_off_callback; -+ kbdev->pm.backend.callback_power_runtime_idle = -+ callbacks->power_runtime_idle_callback; -+ } else { -+ kbdev->pm.backend.callback_power_on = NULL; -+ kbdev->pm.backend.callback_power_off = NULL; -+ kbdev->pm.backend.callback_power_suspend = NULL; -+ kbdev->pm.backend.callback_power_resume = NULL; -+ kbdev->pm.callback_power_runtime_init = NULL; -+ kbdev->pm.callback_power_runtime_term = NULL; -+ kbdev->pm.backend.callback_power_runtime_on = NULL; -+ kbdev->pm.backend.callback_power_runtime_off = NULL; -+ kbdev->pm.backend.callback_power_runtime_idle = NULL; -+ } + -+ /* Initialise the metrics subsystem */ -+ ret = kbasep_pm_metrics_init(kbdev); -+ if (ret) -+ return ret; + -+ init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait); -+ kbdev->pm.backend.l2_powered = 0; ++menuconfig MALI_MIDGARD ++ tristate "Mali Midgard series support" ++ select GPU_TRACEPOINTS if ANDROID ++ default n ++ help ++ Enable this option to build support for a ARM Mali Midgard GPU. + -+ init_waitqueue_head(&kbdev->pm.backend.reset_done_wait); -+ kbdev->pm.backend.reset_done = false; ++ To compile this driver as a module, choose M here: ++ this will generate a single module, called mali_kbase. + -+ init_waitqueue_head(&kbdev->pm.zero_active_count_wait); -+ kbdev->pm.active_count = 0; ++config MALI_GATOR_SUPPORT ++ bool "Streamline support via Gator" ++ depends on MALI_MIDGARD ++ default n ++ help ++ Adds diagnostic support for use with the ARM Streamline Performance Analyzer. ++ You will need the Gator device driver already loaded before loading this driver when enabling ++ Streamline debug support. ++ This is a legacy interface required by older versions of Streamline. + -+ spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock); -+ spin_lock_init(&kbdev->pm.backend.gpu_powered_lock); ++config MALI_MIDGARD_DVFS ++ bool "Enable legacy DVFS" ++ depends on MALI_MIDGARD && !MALI_DEVFREQ && !MALI_PLATFORM_DEVICETREE ++ default n ++ help ++ Choose this option to enable legacy DVFS in the Mali Midgard DDK. + -+ init_waitqueue_head(&kbdev->pm.backend.poweroff_wait); ++config MALI_MIDGARD_ENABLE_TRACE ++ bool "Enable kbase tracing" ++ depends on MALI_MIDGARD ++ default n ++ help ++ Enables tracing in kbase. Trace log available through ++ the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled + -+ if (kbase_pm_ca_init(kbdev) != 0) -+ goto workq_fail; ++config MALI_DEVFREQ ++ bool "devfreq support for Mali" ++ depends on MALI_MIDGARD && PM_DEVFREQ ++ select DEVFREQ_GOV_SIMPLE_ONDEMAND ++ help ++ Support devfreq for Mali. + -+ if (kbase_pm_policy_init(kbdev) != 0) -+ goto pm_policy_fail; ++ Using the devfreq framework and, by default, the simpleondemand ++ governor, the frequency of Mali will be dynamically selected from the ++ available OPPs. + -+ return 0; ++config MALI_DMA_FENCE ++ bool "DMA_BUF fence support for Mali" ++ depends on MALI_MIDGARD && !KDS ++ default n ++ help ++ Support DMA_BUF fences for Mali. + -+pm_policy_fail: -+ kbase_pm_ca_term(kbdev); -+workq_fail: -+ kbasep_pm_metrics_term(kbdev); -+ return -EINVAL; -+} ++ This option should only be enabled if KDS is not present and ++ the Linux Kernel has built in support for DMA_BUF fences. + -+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) -+{ -+ lockdep_assert_held(&kbdev->pm.lock); ++# MALI_EXPERT configuration options + -+ /* Turn clocks and interrupts on - no-op if we haven't done a previous -+ * kbase_pm_clock_off() */ -+ kbase_pm_clock_on(kbdev, is_resume); ++menuconfig MALI_EXPERT ++ depends on MALI_MIDGARD ++ bool "Enable Expert Settings" ++ default n ++ help ++ Enabling this option and modifying the default settings may produce a driver with performance or ++ other limitations. + -+ /* Update core status as required by the policy */ -+ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, -+ SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START); -+ kbase_pm_update_cores_state(kbdev); -+ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, -+ SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END); ++config MALI_CORESTACK ++ bool "Support controlling power to the GPU core stack" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ default n ++ help ++ Enabling this feature on supported GPUs will let the driver powering ++ on/off the GPU core stack independently without involving the Power ++ Domain Controller. This should only be enabled on platforms which ++ integration of the PDC to the Mali GPU is known to be problematic. ++ This feature is currently only supported on t-Six and t-HEx GPUs. + -+ /* NOTE: We don't wait to reach the desired state, since running atoms -+ * will wait for that state to be reached anyway */ -+} ++ If unsure, say N. + -+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) -+{ -+ struct kbase_device *kbdev = container_of(data, struct kbase_device, -+ pm.backend.gpu_poweroff_wait_work); -+ struct kbase_pm_device_data *pm = &kbdev->pm; -+ struct kbase_pm_backend_data *backend = &pm->backend; -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; -+ unsigned long flags; ++config MALI_PRFCNT_SET_SECONDARY ++ bool "Use secondary set of performance counters" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ default n ++ help ++ Select this option to use secondary set of performance counters. Kernel ++ features that depend on an access to the primary set of counters may ++ become unavailable. Enabling this option will prevent power management ++ from working optimally and may cause instrumentation tools to return ++ bogus results. + -+#if !PLATFORM_POWER_DOWN_ONLY -+ /* Wait for power transitions to complete. We do this with no locks held -+ * so that we don't deadlock with any pending workqueues */ -+ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, -+ SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START); -+ kbase_pm_check_transitions_sync(kbdev); -+ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, -+ SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END); -+#endif /* !PLATFORM_POWER_DOWN_ONLY */ ++ If unsure, say N. + -+ mutex_lock(&js_devdata->runpool_mutex); -+ mutex_lock(&kbdev->pm.lock); ++config MALI_PLATFORM_FAKE ++ bool "Enable fake platform device support" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ default n ++ help ++ When you start to work with the Mali Midgard series device driver the platform-specific code of ++ the Linux kernel for your platform may not be complete. In this situation the kernel device driver ++ supports creating the platform device outside of the Linux platform-specific code. ++ Enable this option if would like to use a platform device configuration from within the device driver. + -+#if PLATFORM_POWER_DOWN_ONLY -+ if (kbdev->pm.backend.gpu_powered) { -+ if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2)) { -+ /* If L2 cache is powered then we must flush it before -+ * we power off the GPU. Normally this would have been -+ * handled when the L2 was powered off. */ -+ kbase_gpu_cacheclean(kbdev); -+ } -+ } -+#endif /* PLATFORM_POWER_DOWN_ONLY */ ++choice ++ prompt "Platform configuration" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ default MALI_PLATFORM_DEVICETREE ++ help ++ Select the SOC platform that contains a Mali Midgard GPU + -+ if (!backend->poweron_required) { -+#if !PLATFORM_POWER_DOWN_ONLY -+ unsigned long flags; ++config MALI_PLATFORM_DEVICETREE ++ bool "Device Tree platform" ++ depends on OF ++ help ++ Select this option to use Device Tree with the Mali driver. + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ WARN_ON(kbdev->l2_available_bitmap || -+ kbdev->shader_available_bitmap || -+ kbdev->tiler_available_bitmap); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+#endif /* !PLATFORM_POWER_DOWN_ONLY */ ++ When using this option the Mali driver will get the details of the ++ GPU hardware from the Device Tree. This means that the same driver ++ binary can run on multiple platforms as long as all the GPU hardware ++ details are described in the device tree. + -+ /* Consume any change-state events */ -+ kbase_timeline_pm_check_handle_event(kbdev, -+ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); ++ Device Tree is the recommended method for the Mali driver platform ++ integration. + -+ /* Disable interrupts and turn the clock off */ -+ if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) { -+ /* -+ * Page/bus faults are pending, must drop locks to -+ * process. Interrupts are disabled so no more faults -+ * should be generated at this point. -+ */ -+ mutex_unlock(&kbdev->pm.lock); -+ mutex_unlock(&js_devdata->runpool_mutex); -+ kbase_flush_mmu_wqs(kbdev); -+ mutex_lock(&js_devdata->runpool_mutex); -+ mutex_lock(&kbdev->pm.lock); ++config MALI_PLATFORM_VEXPRESS ++ depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4) ++ bool "Versatile Express" ++config MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ ++ depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4) ++ bool "Versatile Express w/Virtex7 @ 40Mhz" ++config MALI_PLATFORM_GOLDFISH ++ depends on ARCH_GOLDFISH ++ bool "Android Goldfish virtual CPU" ++config MALI_PLATFORM_PBX ++ depends on ARCH_REALVIEW && REALVIEW_EB_A9MP && MACH_REALVIEW_PBX ++ bool "Realview PBX-A9" ++config MALI_PLATFORM_THIRDPARTY ++ bool "Third Party Platform" ++endchoice + -+ /* Turn off clock now that fault have been handled. We -+ * dropped locks so poweron_required may have changed - -+ * power back on if this is the case.*/ -+ if (backend->poweron_required) -+ kbase_pm_clock_on(kbdev, false); -+ else -+ WARN_ON(!kbase_pm_clock_off(kbdev, -+ backend->poweroff_is_suspend)); -+ } -+ } ++config MALI_PLATFORM_THIRDPARTY_NAME ++ depends on MALI_MIDGARD && MALI_PLATFORM_THIRDPARTY && MALI_EXPERT ++ string "Third party platform name" ++ help ++ Enter the name of a third party platform that is supported. The third part configuration ++ file must be in midgard/config/tpip/mali_kbase_config_xxx.c where xxx is the name ++ specified here. + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ backend->poweroff_wait_in_progress = false; -+ if (backend->poweron_required) { -+ backend->poweron_required = false; -+ kbase_pm_update_cores_state_nolock(kbdev); -+ kbase_backend_slot_update(kbdev); -+ } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++config MALI_DEBUG ++ bool "Debug build" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ default n ++ help ++ Select this option for increased checking and reporting of errors. + -+ mutex_unlock(&kbdev->pm.lock); -+ mutex_unlock(&js_devdata->runpool_mutex); ++config MALI_FENCE_DEBUG ++ bool "Debug sync fence usage" ++ depends on MALI_MIDGARD && MALI_EXPERT && (SYNC || SYNC_FILE) ++ default y if MALI_DEBUG ++ help ++ Select this option to enable additional checking and reporting on the ++ use of sync fences in the Mali driver. + -+ wake_up(&kbdev->pm.backend.poweroff_wait); -+} ++ This will add a 3s timeout to all sync fence waits in the Mali ++ driver, so that when work for Mali has been waiting on a sync fence ++ for a long time a debug message will be printed, detailing what fence ++ is causing the block, and which dependent Mali atoms are blocked as a ++ result of this. + -+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend) -+{ -+ unsigned long flags; ++ The timeout can be changed at runtime through the js_soft_timeout ++ device attribute, where the timeout is specified in milliseconds. + -+ lockdep_assert_held(&kbdev->pm.lock); ++config MALI_NO_MALI ++ bool "No Mali" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ default n ++ help ++ This can be used to test the driver in a simulated environment ++ whereby the hardware is not physically present. If the hardware is physically ++ present it will not be used. This can be used to test the majority of the ++ driver without needing actual hardware or for software benchmarking. ++ All calls to the simulated hardware will complete immediately as if the hardware ++ completed the task. + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ if (!kbdev->pm.backend.poweroff_wait_in_progress) { -+ /* Force all cores off */ -+ kbdev->pm.backend.desired_shader_state = 0; -+ kbdev->pm.backend.desired_tiler_state = 0; ++config MALI_ERROR_INJECT ++ bool "Error injection" ++ depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI ++ default n ++ help ++ Enables insertion of errors to test module failure and recovery mechanisms. + -+ /* Force all cores to be unavailable, in the situation where -+ * transitions are in progress for some cores but not others, -+ * and kbase_pm_check_transitions_nolock can not immediately -+ * power off the cores */ -+ kbdev->shader_available_bitmap = 0; -+ kbdev->tiler_available_bitmap = 0; -+ kbdev->l2_available_bitmap = 0; ++config MALI_TRACE_TIMELINE ++ bool "Timeline tracing" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ default n ++ help ++ Enables timeline tracing through the kernel tracepoint system. + -+ kbdev->pm.backend.poweroff_wait_in_progress = true; -+ kbdev->pm.backend.poweroff_is_suspend = is_suspend; ++config MALI_SYSTEM_TRACE ++ bool "Enable system event tracing support" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ default n ++ help ++ Choose this option to enable system trace events for each ++ kbase event. This is typically used for debugging but has ++ minimal overhead when not in use. Enable only if you know what ++ you are doing. + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ /*Kick off wq here. Callers will have to wait*/ -+ queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq, -+ &kbdev->pm.backend.gpu_poweroff_wait_work); -+ } else { -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } -+} ++config MALI_GPU_MMU_AARCH64 ++ bool "Use AArch64 page tables" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ default n ++ help ++ Use AArch64 format page tables for the GPU instead of LPAE-style. ++ The two formats have the same functionality and performance but a ++ future GPU may deprecate or remove the legacy LPAE-style format. + -+static bool is_poweroff_in_progress(struct kbase_device *kbdev) -+{ -+ bool ret; -+ unsigned long flags; ++ The LPAE-style format is supported on all Midgard and current Bifrost ++ GPUs. Enabling AArch64 format restricts the driver to only supporting ++ Bifrost GPUs. + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ ret = (kbdev->pm.backend.poweroff_wait_in_progress == false); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ If in doubt, say N. + -+ return ret; -+} ++config MALI_PWRSOFT_765 ++ bool "PWRSOFT-765 ticket" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ default n ++ help ++ PWRSOFT-765 fixes devfreq cooling devices issues. However, they are ++ not merged in mainline kernel yet. So this define helps to guard those ++ parts of the code. + -+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev) -+{ -+ wait_event_killable(kbdev->pm.backend.poweroff_wait, -+ is_poweroff_in_progress(kbdev)); -+} ++source "drivers/gpu/arm/midgard/platform/Kconfig" ++source "drivers/gpu/arm/midgard/tests/Kconfig" +diff --git a/drivers/gpu/arm/midgard/Makefile b/drivers/gpu/arm/midgard/Makefile +new file mode 100644 +index 000000000..9aa242c4f +--- /dev/null ++++ b/drivers/gpu/arm/midgard/Makefile +@@ -0,0 +1,42 @@ ++# ++# (C) COPYRIGHT 2010-2016, 2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, -+ unsigned int flags) -+{ -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; -+ unsigned long irq_flags; -+ int ret; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ mutex_lock(&js_devdata->runpool_mutex); -+ mutex_lock(&kbdev->pm.lock); ++KDIR ?= /lib/modules/$(shell uname -r)/build + -+ /* A suspend won't happen during startup/insmod */ -+ KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); ++BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../.. ++UMP_PATH_RELATIVE = $(CURDIR)/../../../base/ump ++KBASE_PATH_RELATIVE = $(CURDIR) ++KDS_PATH_RELATIVE = $(CURDIR)/../../../.. ++EXTRA_SYMBOLS = $(UMP_PATH_RELATIVE)/src/Module.symvers + -+ /* Power up the GPU, don't enable IRQs as we are not ready to receive -+ * them. */ -+ ret = kbase_pm_init_hw(kbdev, flags); -+ if (ret) { -+ mutex_unlock(&kbdev->pm.lock); -+ mutex_unlock(&js_devdata->runpool_mutex); -+ return ret; -+ } ++ifeq ($(MALI_UNIT_TEST), 1) ++ EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers ++endif + -+ kbasep_pm_init_core_use_bitmaps(kbdev); ++ifeq ($(MALI_BUS_LOG), 1) ++#Add bus logger symbols ++EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers ++endif + -+ kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] = -+ kbdev->pm.debug_core_mask[1] = -+ kbdev->pm.debug_core_mask[2] = -+ kbdev->gpu_props.props.raw_props.shader_present; ++# GPL driver supports KDS ++EXTRA_SYMBOLS += $(KDS_PATH_RELATIVE)/drivers/base/kds/Module.symvers + -+ /* Pretend the GPU is active to prevent a power policy turning the GPU -+ * cores off */ -+ kbdev->pm.active_count = 1; ++# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions ++all: ++ $(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules + -+ spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, -+ irq_flags); -+ /* Ensure cycle counter is off */ -+ kbdev->pm.backend.gpu_cycle_counter_requests = 0; -+ spin_unlock_irqrestore( -+ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, -+ irq_flags); ++clean: ++ $(MAKE) -C $(KDIR) M=$(CURDIR) clean +diff --git a/drivers/gpu/arm/midgard/Makefile.kbase b/drivers/gpu/arm/midgard/Makefile.kbase +new file mode 100755 +index 000000000..2bef9c25e +--- /dev/null ++++ b/drivers/gpu/arm/midgard/Makefile.kbase +@@ -0,0 +1,17 @@ ++# ++# (C) COPYRIGHT 2010 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+ /* We are ready to receive IRQ's now as power policy is set up, so -+ * enable them now. */ -+#ifdef CONFIG_MALI_DEBUG -+ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags); -+ kbdev->pm.backend.driver_ready_for_irqs = true; -+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags); -+#endif -+ kbase_pm_enable_interrupts(kbdev); + -+ /* Turn on the GPU and any cores needed by the policy */ -+ kbase_pm_do_poweron(kbdev, false); -+ mutex_unlock(&kbdev->pm.lock); -+ mutex_unlock(&js_devdata->runpool_mutex); ++EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM) + -+ /* Idle the GPU and/or cores, if the policy wants it to */ -+ kbase_pm_context_idle(kbdev); +diff --git a/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/drivers/gpu/arm/midgard/backend/gpu/Kbuild +new file mode 100755 +index 000000000..5f700e9b6 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/Kbuild +@@ -0,0 +1,60 @@ ++# ++# (C) COPYRIGHT 2014,2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+ return 0; -+} + -+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev) -+{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++BACKEND += \ ++ backend/gpu/mali_kbase_cache_policy_backend.c \ ++ backend/gpu/mali_kbase_device_hw.c \ ++ backend/gpu/mali_kbase_gpu.c \ ++ backend/gpu/mali_kbase_gpuprops_backend.c \ ++ backend/gpu/mali_kbase_debug_job_fault_backend.c \ ++ backend/gpu/mali_kbase_irq_linux.c \ ++ backend/gpu/mali_kbase_instr_backend.c \ ++ backend/gpu/mali_kbase_jm_as.c \ ++ backend/gpu/mali_kbase_jm_hw.c \ ++ backend/gpu/mali_kbase_jm_rb.c \ ++ backend/gpu/mali_kbase_js_affinity.c \ ++ backend/gpu/mali_kbase_js_backend.c \ ++ backend/gpu/mali_kbase_mmu_hw_direct.c \ ++ backend/gpu/mali_kbase_pm_backend.c \ ++ backend/gpu/mali_kbase_pm_driver.c \ ++ backend/gpu/mali_kbase_pm_metrics.c \ ++ backend/gpu/mali_kbase_pm_ca.c \ ++ backend/gpu/mali_kbase_pm_ca_fixed.c \ ++ backend/gpu/mali_kbase_pm_always_on.c \ ++ backend/gpu/mali_kbase_pm_coarse_demand.c \ ++ backend/gpu/mali_kbase_pm_demand.c \ ++ backend/gpu/mali_kbase_pm_policy.c \ ++ backend/gpu/mali_kbase_time.c + -+ mutex_lock(&kbdev->pm.lock); -+ kbase_pm_cancel_deferred_poweroff(kbdev); -+ kbase_pm_do_poweroff(kbdev, false); -+ mutex_unlock(&kbdev->pm.lock); -+} ++ifeq ($(MALI_CUSTOMER_RELEASE),0) ++BACKEND += \ ++ backend/gpu/mali_kbase_pm_ca_random.c \ ++ backend/gpu/mali_kbase_pm_demand_always_powered.c \ ++ backend/gpu/mali_kbase_pm_fast_start.c ++endif + -+KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt); ++ifeq ($(CONFIG_MALI_DEVFREQ),y) ++BACKEND += \ ++ backend/gpu/mali_kbase_devfreq.c \ ++ backend/gpu/mali_kbase_pm_ca_devfreq.c ++endif + -+void kbase_hwaccess_pm_term(struct kbase_device *kbdev) -+{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0); -+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0); ++ifeq ($(CONFIG_MALI_NO_MALI),y) ++ # Dummy model ++ BACKEND += backend/gpu/mali_kbase_model_dummy.c ++ BACKEND += backend/gpu/mali_kbase_model_linux.c ++ # HW error simulation ++ BACKEND += backend/gpu/mali_kbase_model_error_generator.c ++endif +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h +new file mode 100644 +index 000000000..c8ae87eb8 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h +@@ -0,0 +1,29 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ /* Free any resources the policy allocated */ -+ kbase_pm_policy_term(kbdev); -+ kbase_pm_ca_term(kbdev); + -+ /* Shut down the metrics subsystem */ -+ kbasep_pm_metrics_term(kbdev); + -+ destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq); -+} ++/* ++ * Backend specific configuration ++ */ + -+void kbase_pm_power_changed(struct kbase_device *kbdev) -+{ -+ bool cores_are_available; -+ unsigned long flags; ++#ifndef _KBASE_BACKEND_CONFIG_H_ ++#define _KBASE_BACKEND_CONFIG_H_ + -+ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, -+ SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ cores_are_available = kbase_pm_check_transitions_nolock(kbdev); -+ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, -+ SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END); ++/* Enable GPU reset API */ ++#define KBASE_GPU_RESET_EN 1 + -+ if (cores_are_available) { -+ /* Log timelining information that a change in state has -+ * completed */ -+ kbase_timeline_pm_handle_event(kbdev, -+ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); ++#endif /* _KBASE_BACKEND_CONFIG_H_ */ + -+ kbase_backend_slot_update(kbdev); -+ } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c +new file mode 100644 +index 000000000..fef9a2cb7 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c +@@ -0,0 +1,29 @@ ++/* ++ * ++ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, -+ u64 new_core_mask_js0, u64 new_core_mask_js1, -+ u64 new_core_mask_js2) -+{ -+ kbdev->pm.debug_core_mask[0] = new_core_mask_js0; -+ kbdev->pm.debug_core_mask[1] = new_core_mask_js1; -+ kbdev->pm.debug_core_mask[2] = new_core_mask_js2; -+ kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | -+ new_core_mask_js2; + -+ kbase_pm_update_cores_state_nolock(kbdev); -+} + -+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev) -+{ -+ kbase_pm_update_active(kbdev); -+} ++#include "backend/gpu/mali_kbase_cache_policy_backend.h" ++#include + -+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev) ++void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, ++ u32 mode) +{ -+ kbase_pm_update_active(kbdev); ++ kbdev->current_gpu_coherency_mode = mode; ++ ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) ++ kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL); +} + -+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h +new file mode 100644 +index 000000000..fe9869109 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h +@@ -0,0 +1,34 @@ ++/* ++ * ++ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ ++ ++ ++ ++ ++#ifndef _KBASE_CACHE_POLICY_BACKEND_H_ ++#define _KBASE_CACHE_POLICY_BACKEND_H_ ++ ++#include "mali_kbase.h" ++#include "mali_base_kernel.h" ++ ++/** ++ * kbase_cache_set_coherency_mode() - Sets the system coherency mode ++ * in the GPU. ++ * @kbdev: Device pointer ++ * @mode: Coherency mode. COHERENCY_ACE/ACE_LITE ++ */ ++void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, ++ u32 mode); ++ ++#endif /* _KBASE_CACHE_POLICY_H_ */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c +new file mode 100644 +index 000000000..7851ea646 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c +@@ -0,0 +1,157 @@ ++/* ++ * ++ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ ++ ++ ++ ++#include ++#include ++#include "mali_kbase_debug_job_fault.h" ++ ++#ifdef CONFIG_DEBUG_FS ++ ++/*GPU_CONTROL_REG(r)*/ ++static int gpu_control_reg_snapshot[] = { ++ GPU_ID, ++ SHADER_READY_LO, ++ SHADER_READY_HI, ++ TILER_READY_LO, ++ TILER_READY_HI, ++ L2_READY_LO, ++ L2_READY_HI ++}; ++ ++/* JOB_CONTROL_REG(r) */ ++static int job_control_reg_snapshot[] = { ++ JOB_IRQ_MASK, ++ JOB_IRQ_STATUS ++}; ++ ++/* JOB_SLOT_REG(n,r) */ ++static int job_slot_reg_snapshot[] = { ++ JS_HEAD_LO, ++ JS_HEAD_HI, ++ JS_TAIL_LO, ++ JS_TAIL_HI, ++ JS_AFFINITY_LO, ++ JS_AFFINITY_HI, ++ JS_CONFIG, ++ JS_STATUS, ++ JS_HEAD_NEXT_LO, ++ JS_HEAD_NEXT_HI, ++ JS_AFFINITY_NEXT_LO, ++ JS_AFFINITY_NEXT_HI, ++ JS_CONFIG_NEXT ++}; ++ ++/*MMU_REG(r)*/ ++static int mmu_reg_snapshot[] = { ++ MMU_IRQ_MASK, ++ MMU_IRQ_STATUS ++}; ++ ++/* MMU_AS_REG(n,r) */ ++static int as_reg_snapshot[] = { ++ AS_TRANSTAB_LO, ++ AS_TRANSTAB_HI, ++ AS_MEMATTR_LO, ++ AS_MEMATTR_HI, ++ AS_FAULTSTATUS, ++ AS_FAULTADDRESS_LO, ++ AS_FAULTADDRESS_HI, ++ AS_STATUS ++}; ++ ++bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, ++ int reg_range) +{ -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ int i, j; ++ int offset = 0; ++ int slot_number; ++ int as_number; + -+ /* Force power off the GPU and all cores (regardless of policy), only -+ * after the PM active count reaches zero (otherwise, we risk turning it -+ * off prematurely) */ -+ mutex_lock(&js_devdata->runpool_mutex); -+ mutex_lock(&kbdev->pm.lock); ++ if (kctx->reg_dump == NULL) ++ return false; + -+ kbase_pm_cancel_deferred_poweroff(kbdev); -+ kbase_pm_do_poweroff(kbdev, true); ++ slot_number = kctx->kbdev->gpu_props.num_job_slots; ++ as_number = kctx->kbdev->gpu_props.num_address_spaces; + -+ kbase_backend_timer_suspend(kbdev); ++ /* get the GPU control registers*/ ++ for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) { ++ kctx->reg_dump[offset] = ++ GPU_CONTROL_REG(gpu_control_reg_snapshot[i]); ++ offset += 2; ++ } + -+ mutex_unlock(&kbdev->pm.lock); -+ mutex_unlock(&js_devdata->runpool_mutex); ++ /* get the Job control registers*/ ++ for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) { ++ kctx->reg_dump[offset] = ++ JOB_CONTROL_REG(job_control_reg_snapshot[i]); ++ offset += 2; ++ } + -+ kbase_pm_wait_for_poweroff_complete(kbdev); ++ /* get the Job Slot registers*/ ++ for (j = 0; j < slot_number; j++) { ++ for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) { ++ kctx->reg_dump[offset] = ++ JOB_SLOT_REG(j, job_slot_reg_snapshot[i]); ++ offset += 2; ++ } ++ } ++ ++ /* get the MMU registers*/ ++ for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) { ++ kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]); ++ offset += 2; ++ } ++ ++ /* get the Address space registers*/ ++ for (j = 0; j < as_number; j++) { ++ for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) { ++ kctx->reg_dump[offset] = ++ MMU_AS_REG(j, as_reg_snapshot[i]); ++ offset += 2; ++ } ++ } ++ ++ WARN_ON(offset >= (reg_range*2/4)); ++ ++ /* set the termination flag*/ ++ kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG; ++ kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG; ++ ++ dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n", ++ offset); ++ ++ return true; +} + -+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) ++bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx) +{ -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ int offset = 0; + -+ mutex_lock(&js_devdata->runpool_mutex); -+ mutex_lock(&kbdev->pm.lock); ++ if (kctx->reg_dump == NULL) ++ return false; + -+ kbdev->pm.suspending = false; -+ kbase_pm_do_poweron(kbdev, true); ++ while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) { ++ kctx->reg_dump[offset+1] = ++ kbase_reg_read(kctx->kbdev, ++ kctx->reg_dump[offset], NULL); ++ offset += 2; ++ } ++ return true; ++} + -+ kbase_backend_timer_resume(kbdev); + -+ mutex_unlock(&kbdev->pm.lock); -+ mutex_unlock(&js_devdata->runpool_mutex); -+} -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c ++#endif +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c new file mode 100644 -index 000000000..85890f1e8 +index 000000000..4e8e56e9b --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c -@@ -0,0 +1,182 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c +@@ -0,0 +1,474 @@ +/* + * -+ * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -341598,181 +342427,473 @@ index 000000000..85890f1e8 + */ + + ++#define ENABLE_DEBUG_LOG ++#include "../../platform/rk/custom_log.h" + -+/* -+ * Base kernel core availability APIs -+ */ + +#include -+#include ++#include ++#include +#include + -+static const struct kbase_pm_ca_policy *const policy_list[] = { -+ &kbase_pm_ca_fixed_policy_ops, -+#ifdef CONFIG_MALI_DEVFREQ -+ &kbase_pm_ca_devfreq_policy_ops, -+#endif -+#if !MALI_CUSTOMER_RELEASE -+ &kbase_pm_ca_random_policy_ops ++#include ++#include ++#include ++#ifdef CONFIG_DEVFREQ_THERMAL ++#include +#endif ++ ++#include ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) ++#include ++#else /* Linux >= 3.13 */ ++/* In 3.13 the OPP include header file, types, and functions were all ++ * renamed. Use the old filename for the include, and define the new names to ++ * the old, when an old kernel is detected. ++ */ ++#include ++#define dev_pm_opp opp ++#define dev_pm_opp_get_voltage opp_get_voltage ++#define dev_pm_opp_get_opp_count opp_get_opp_count ++#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil ++#define dev_pm_opp_find_freq_floor opp_find_freq_floor ++#endif /* Linux >= 3.13 */ ++#include ++#include ++ ++static struct devfreq_simple_ondemand_data ondemand_data; ++ ++static struct monitor_dev_profile mali_mdevp = { ++ .type = MONITOR_TYPE_DEV, ++ .low_temp_adjust = rockchip_monitor_dev_low_temp_adjust, ++ .high_temp_adjust = rockchip_monitor_dev_high_temp_adjust, +}; + +/** -+ * POLICY_COUNT - The number of policies available in the system. ++ * opp_translate - Translate nominal OPP frequency from devicetree into real ++ * frequency and core mask ++ * @kbdev: Device pointer ++ * @freq: Nominal frequency ++ * @core_mask: Pointer to u64 to store core mask to + * -+ * This is derived from the number of functions listed in policy_list. ++ * Return: Real target frequency ++ * ++ * This function will only perform translation if an operating-points-v2-mali ++ * table is present in devicetree. If one is not present then it will return an ++ * untranslated frequency and all cores enabled. + */ -+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list)) ++static unsigned long opp_translate(struct kbase_device *kbdev, ++ unsigned long freq, u64 *core_mask) ++{ ++ int i; + -+int kbase_pm_ca_init(struct kbase_device *kbdev) ++ for (i = 0; i < kbdev->num_opps; i++) { ++ if (kbdev->opp_table[i].opp_freq == freq) { ++ *core_mask = kbdev->opp_table[i].core_mask; ++ return kbdev->opp_table[i].real_freq; ++ } ++ } ++ ++ /* Failed to find OPP - return all cores enabled & nominal frequency */ ++ *core_mask = kbdev->gpu_props.props.raw_props.shader_present; ++ ++ return freq; ++} ++ ++static int ++kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) +{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ struct kbase_device *kbdev = dev_get_drvdata(dev); ++ struct dev_pm_opp *opp; ++ unsigned long nominal_freq; ++ unsigned long freq = 0; ++ unsigned long old_freq = kbdev->current_freq; ++ unsigned long voltage; ++ int err; ++ u64 core_mask; + -+ kbdev->pm.backend.ca_current_policy = policy_list[0]; ++ freq = *target_freq; + -+ kbdev->pm.backend.ca_current_policy->init(kbdev); ++ opp = devfreq_recommended_opp(dev, &freq, flags); ++ if (IS_ERR(opp)) { ++ dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp)); ++ return PTR_ERR(opp); ++ } ++ voltage = dev_pm_opp_get_voltage(opp); + -+ return 0; ++ nominal_freq = freq; ++ ++ /* ++ * Only update if there is a change of frequency ++ */ ++ if (kbdev->current_nominal_freq == nominal_freq) { ++ *target_freq = nominal_freq; ++#ifdef CONFIG_REGULATOR ++ if (kbdev->current_voltage == voltage) ++ return 0; ++ err = regulator_set_voltage(kbdev->regulator, voltage, INT_MAX); ++ if (err) { ++ dev_err(dev, "Failed to set voltage (%d)\n", err); ++ return err; ++ } ++ kbdev->current_voltage = voltage; ++#endif ++ return 0; ++ } ++ ++ freq = opp_translate(kbdev, nominal_freq, &core_mask); ++#ifdef CONFIG_REGULATOR ++ if (kbdev->regulator && kbdev->current_voltage != voltage && ++ old_freq < freq) { ++ err = regulator_set_voltage(kbdev->regulator, voltage, INT_MAX); ++ if (err) { ++ dev_err(dev, "Failed to increase voltage (%d)\n", err); ++ return err; ++ } ++ } ++#endif ++ ++ err = clk_set_rate(kbdev->clock, freq); ++ if (err) { ++ dev_err(dev, "Failed to set clock %lu (target %lu)\n", ++ freq, *target_freq); ++ return err; ++ } ++ *target_freq = freq; ++ kbdev->current_freq = freq; ++ if (kbdev->devfreq) ++ kbdev->devfreq->last_status.current_frequency = freq; ++#ifdef CONFIG_REGULATOR ++ if (kbdev->regulator && kbdev->current_voltage != voltage && ++ old_freq > freq) { ++ err = regulator_set_voltage(kbdev->regulator, voltage, INT_MAX); ++ if (err) { ++ dev_err(dev, "Failed to decrease voltage (%d)\n", err); ++ return err; ++ } ++ } ++#endif ++ ++ if (kbdev->pm.backend.ca_current_policy->id == ++ KBASE_PM_CA_POLICY_ID_DEVFREQ) ++ kbase_devfreq_set_core_mask(kbdev, core_mask); ++ ++ *target_freq = nominal_freq; ++ kbdev->current_voltage = voltage; ++ kbdev->current_nominal_freq = nominal_freq; ++ kbdev->current_freq = freq; ++ kbdev->current_core_mask = core_mask; ++ ++ KBASE_TLSTREAM_AUX_DEVFREQ_TARGET((u64)nominal_freq); ++ ++ kbase_pm_reset_dvfs_utilisation(kbdev); ++ ++ return err; +} + -+void kbase_pm_ca_term(struct kbase_device *kbdev) ++static int ++kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq) +{ -+ kbdev->pm.backend.ca_current_policy->term(kbdev); ++ struct kbase_device *kbdev = dev_get_drvdata(dev); ++ ++ *freq = kbdev->current_nominal_freq; ++ ++ return 0; +} + -+int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list) ++static int ++kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) +{ -+ if (!list) -+ return POLICY_COUNT; ++ struct kbase_device *kbdev = dev_get_drvdata(dev); + -+ *list = policy_list; ++ stat->current_frequency = kbdev->current_nominal_freq; + -+ return POLICY_COUNT; ++ kbase_pm_get_dvfs_utilisation(kbdev, ++ &stat->total_time, &stat->busy_time); ++ ++ stat->private_data = NULL; ++ ++ return 0; +} + -+KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies); ++static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, ++ struct devfreq_dev_profile *dp) ++{ ++ int count; ++ int i = 0; ++ unsigned long freq; ++ struct dev_pm_opp *opp; + -+const struct kbase_pm_ca_policy -+*kbase_pm_ca_get_policy(struct kbase_device *kbdev) ++ count = dev_pm_opp_get_opp_count(kbdev->dev); ++ if (count < 0) { ++ return count; ++ } ++ ++ dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]), ++ GFP_KERNEL); ++ if (!dp->freq_table) ++ return -ENOMEM; ++ ++ for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) { ++ opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq); ++ if (IS_ERR(opp)) ++ break; ++ dev_pm_opp_put(opp); ++ ++ dp->freq_table[i] = freq; ++ } ++ ++ if (count != i) ++ dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n", ++ count, i); ++ ++ dp->max_state = i; ++ ++ return 0; ++} ++ ++static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev) +{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ struct devfreq_dev_profile *dp = &kbdev->devfreq_profile; + -+ return kbdev->pm.backend.ca_current_policy; ++ kfree(dp->freq_table); ++ dp->freq_table = NULL; +} + -+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy); ++static void kbase_devfreq_term_core_mask_table(struct kbase_device *kbdev) ++{ ++ kfree(kbdev->opp_table); ++ kbdev->opp_table = NULL; ++} + -+void kbase_pm_ca_set_policy(struct kbase_device *kbdev, -+ const struct kbase_pm_ca_policy *new_policy) ++static void kbase_devfreq_exit(struct device *dev) +{ -+ const struct kbase_pm_ca_policy *old_policy; -+ unsigned long flags; ++ struct kbase_device *kbdev = dev_get_drvdata(dev); + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(new_policy != NULL); ++ if (kbdev) ++ kbase_devfreq_term_freq_table(kbdev); ++} + -+ KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u, -+ new_policy->id); ++static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) ++{ ++ struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node, ++ "operating-points-v2", 0); ++ struct device_node *node; ++ int i = 0; ++ int count; + -+ /* During a policy change we pretend the GPU is active */ -+ /* A suspend won't happen here, because we're in a syscall from a -+ * userspace thread */ -+ kbase_pm_context_active(kbdev); ++ if (!opp_node) ++ return 0; ++ if (!of_device_is_compatible(opp_node, "operating-points-v2-mali")) ++ return 0; + -+ mutex_lock(&kbdev->pm.lock); ++ count = dev_pm_opp_get_opp_count(kbdev->dev); ++ kbdev->opp_table = kmalloc_array(count, ++ sizeof(struct kbase_devfreq_opp), GFP_KERNEL); ++ if (!kbdev->opp_table) ++ return -ENOMEM; + -+ /* Remove the policy to prevent IRQ handlers from working on it */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ old_policy = kbdev->pm.backend.ca_current_policy; -+ kbdev->pm.backend.ca_current_policy = NULL; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ for_each_available_child_of_node(opp_node, node) { ++ u64 core_mask; ++ u64 opp_freq, real_freq; ++ const void *core_count_p; + -+ if (old_policy->term) -+ old_policy->term(kbdev); ++ if (of_property_read_u64(node, "opp-hz", &opp_freq)) { ++ dev_warn(kbdev->dev, "OPP is missing required opp-hz property\n"); ++ continue; ++ } ++ if (of_property_read_u64(node, "opp-hz-real", &real_freq)) ++ real_freq = opp_freq; ++ if (of_property_read_u64(node, "opp-core-mask", &core_mask)) ++ core_mask = ++ kbdev->gpu_props.props.raw_props.shader_present; ++ core_count_p = of_get_property(node, "opp-core-count", NULL); ++ if (core_count_p) { ++ u64 remaining_core_mask = ++ kbdev->gpu_props.props.raw_props.shader_present; ++ int core_count = be32_to_cpup(core_count_p); + -+ if (new_policy->init) -+ new_policy->init(kbdev); ++ core_mask = 0; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->pm.backend.ca_current_policy = new_policy; ++ for (; core_count > 0; core_count--) { ++ int core = ffs(remaining_core_mask); + -+ /* If any core power state changes were previously attempted, but -+ * couldn't be made because the policy was changing (current_policy was -+ * NULL), then re-try them here. */ -+ kbase_pm_update_cores_state_nolock(kbdev); ++ if (!core) { ++ dev_err(kbdev->dev, "OPP has more cores than GPU\n"); ++ return -ENODEV; ++ } + -+ kbdev->pm.backend.ca_current_policy->update_core_status(kbdev, -+ kbdev->shader_ready_bitmap, -+ kbdev->shader_transitioning_bitmap); ++ core_mask |= (1ull << (core-1)); ++ remaining_core_mask &= ~(1ull << (core-1)); ++ } ++ } + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (!core_mask) { ++ dev_err(kbdev->dev, "OPP has invalid core mask of 0\n"); ++ return -ENODEV; ++ } + -+ mutex_unlock(&kbdev->pm.lock); ++ kbdev->opp_table[i].opp_freq = opp_freq; ++ kbdev->opp_table[i].real_freq = real_freq; ++ kbdev->opp_table[i].core_mask = core_mask; + -+ /* Now the policy change is finished, we release our fake context active -+ * reference */ -+ kbase_pm_context_idle(kbdev); -+} ++ dev_info(kbdev->dev, "OPP %d : opp_freq=%llu real_freq=%llu core_mask=%llx\n", ++ i, opp_freq, real_freq, core_mask); + -+KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy); ++ i++; ++ } + -+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) ++ kbdev->num_opps = i; ++ ++ return 0; ++} ++ ++int kbase_devfreq_init(struct kbase_device *kbdev) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ struct device_node *np = kbdev->dev->of_node; ++ struct devfreq_dev_profile *dp; ++ struct dev_pm_opp *opp; ++ unsigned long opp_rate; ++ int err; + -+ /* All cores must be enabled when instrumentation is in use */ -+ if (kbdev->pm.backend.instr_enabled) -+ return kbdev->gpu_props.props.raw_props.shader_present & -+ kbdev->pm.debug_core_mask_all; ++ if (!kbdev->clock) { ++ dev_err(kbdev->dev, "Clock not available for devfreq\n"); ++ return -ENODEV; ++ } + -+ if (kbdev->pm.backend.ca_current_policy == NULL) -+ return kbdev->gpu_props.props.raw_props.shader_present & -+ kbdev->pm.debug_core_mask_all; ++ kbdev->current_freq = clk_get_rate(kbdev->clock); ++ kbdev->current_nominal_freq = kbdev->current_freq; + -+ return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) & -+ kbdev->pm.debug_core_mask_all; -+} ++ dp = &kbdev->devfreq_profile; + -+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask); ++ dp->initial_freq = kbdev->current_freq; ++ /* .KP : set devfreq_dvfs_interval_in_ms */ ++ dp->polling_ms = 20; ++ dp->target = kbase_devfreq_target; ++ dp->get_dev_status = kbase_devfreq_status; ++ dp->get_cur_freq = kbase_devfreq_cur_freq; ++ dp->exit = kbase_devfreq_exit; + -+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, -+ u64 cores_transitioning) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (kbase_devfreq_init_freq_table(kbdev, dp)) ++ return -EFAULT; + -+ if (kbdev->pm.backend.ca_current_policy != NULL) -+ kbdev->pm.backend.ca_current_policy->update_core_status(kbdev, -+ cores_ready, -+ cores_transitioning); -+} ++ err = kbase_devfreq_init_core_mask_table(kbdev); ++ if (err) ++ goto init_core_mask_table_failed; + -+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev) -+{ -+ unsigned long flags; ++ of_property_read_u32(np, "upthreshold", ++ &ondemand_data.upthreshold); ++ of_property_read_u32(np, "downdifferential", ++ &ondemand_data.downdifferential); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->pm.backend.instr_enabled = true; ++ kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, ++ "simple_ondemand", &ondemand_data); ++ if (IS_ERR(kbdev->devfreq)) { ++ err = PTR_ERR(kbdev->devfreq); ++ kbdev->devfreq = NULL; ++ dev_err(kbdev->dev, "Fail to add devfreq device(%d)", err); ++ goto devfreq_add_dev_failed; ++ } + -+ kbase_pm_update_cores_state_nolock(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* devfreq_add_device only copies a few of kbdev->dev's fields, so ++ * set drvdata explicitly so IPA models can access kbdev. */ ++ dev_set_drvdata(&kbdev->devfreq->dev, kbdev); ++ ++ err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq); ++ if (err) { ++ dev_err(kbdev->dev, ++ "Failed to register OPP notifier (%d)\n", err); ++ goto opp_notifier_failed; ++ } ++ ++ opp_rate = kbdev->current_freq; ++ opp = devfreq_recommended_opp(kbdev->dev, &opp_rate, 0); ++ if (!IS_ERR(opp)) ++ dev_pm_opp_put(opp); ++ kbdev->devfreq->last_status.current_frequency = opp_rate; ++ ++ mali_mdevp.data = kbdev->devfreq; ++ kbdev->mdev_info = rockchip_system_monitor_register(kbdev->dev, ++ &mali_mdevp); ++ if (IS_ERR(kbdev->mdev_info)) { ++ dev_dbg(kbdev->dev, "without system monitor\n"); ++ kbdev->mdev_info = NULL; ++ } ++#ifdef CONFIG_DEVFREQ_THERMAL ++ err = kbase_ipa_init(kbdev); ++ if (err) { ++ dev_err(kbdev->dev, "IPA initialization failed\n"); ++ goto cooling_failed; ++ } ++ ++ kbdev->devfreq_cooling = of_devfreq_cooling_register_power( ++ kbdev->dev->of_node, ++ kbdev->devfreq, ++ &kbase_ipa_power_model_ops); ++ if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) { ++ err = PTR_ERR(kbdev->devfreq_cooling); ++ dev_err(kbdev->dev, ++ "Failed to register cooling device (%d)\n", ++ err); ++ goto cooling_failed; ++ } ++ I("success initing power_model_simple."); ++#endif ++ ++ return 0; ++ ++#ifdef CONFIG_DEVFREQ_THERMAL ++cooling_failed: ++ devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); ++#endif /* CONFIG_DEVFREQ_THERMAL */ ++opp_notifier_failed: ++ if (devfreq_remove_device(kbdev->devfreq)) ++ dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); ++ else ++ kbdev->devfreq = NULL; ++ ++devfreq_add_dev_failed: ++ kbase_devfreq_term_core_mask_table(kbdev); ++ ++init_core_mask_table_failed: ++ kbase_devfreq_term_freq_table(kbdev); ++ ++ return err; +} + -+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev) ++void kbase_devfreq_term(struct kbase_device *kbdev) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ kbdev->pm.backend.instr_enabled = false; ++ int err; + -+ kbase_pm_update_cores_state_nolock(kbdev); ++ dev_dbg(kbdev->dev, "Term Mali devfreq\n"); ++ ++ rockchip_system_monitor_unregister(kbdev->mdev_info); ++#ifdef CONFIG_DEVFREQ_THERMAL ++ if (kbdev->devfreq_cooling) ++ devfreq_cooling_unregister(kbdev->devfreq_cooling); ++ ++ kbase_ipa_term(kbdev); ++#endif ++ ++ devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); ++ ++ err = devfreq_remove_device(kbdev->devfreq); ++ if (err) ++ dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); ++ else ++ kbdev->devfreq = NULL; ++ ++ kbase_devfreq_term_core_mask_table(kbdev); +} -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h new file mode 100644 -index 000000000..ee9e751f2 +index 000000000..c0bf8b15b --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h -@@ -0,0 +1,92 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h +@@ -0,0 +1,24 @@ +/* + * -+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -341787,90 +342908,22 @@ index 000000000..ee9e751f2 + + + -+/* -+ * Base kernel core availability APIs -+ */ -+ -+#ifndef _KBASE_PM_CA_H_ -+#define _KBASE_PM_CA_H_ -+ -+/** -+ * kbase_pm_ca_init - Initialize core availability framework -+ * -+ * Must be called before calling any other core availability function -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Return: 0 if the core availability framework was successfully initialized, -+ * -errno otherwise -+ */ -+int kbase_pm_ca_init(struct kbase_device *kbdev); -+ -+/** -+ * kbase_pm_ca_term - Terminate core availability framework -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_ca_term(struct kbase_device *kbdev); -+ -+/** -+ * kbase_pm_ca_get_core_mask - Get currently available shaders core mask -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Returns a mask of the currently available shader cores. -+ * Calls into the core availability policy -+ * -+ * Return: The bit mask of available cores -+ */ -+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev); -+ -+/** -+ * kbase_pm_ca_update_core_status - Update core status -+ * -+ * @kbdev: The kbase device structure for the device (must be -+ * a valid pointer) -+ * @cores_ready: The bit mask of cores ready for job submission -+ * @cores_transitioning: The bit mask of cores that are transitioning power -+ * state -+ * -+ * Update core availability policy with current core power status -+ * -+ * Calls into the core availability policy -+ */ -+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, -+ u64 cores_transitioning); -+ -+/** -+ * kbase_pm_ca_instr_enable - Enable override for instrumentation -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This overrides the output of the core availability policy, ensuring that all -+ * cores are available -+ */ -+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev); ++#ifndef _BASE_DEVFREQ_H_ ++#define _BASE_DEVFREQ_H_ + -+/** -+ * kbase_pm_ca_instr_disable - Disable override for instrumentation -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This disables any previously enabled override, and resumes normal policy -+ * functionality -+ */ -+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev); ++int kbase_devfreq_init(struct kbase_device *kbdev); ++void kbase_devfreq_term(struct kbase_device *kbdev); + -+#endif /* _KBASE_PM_CA_H_ */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c ++#endif /* _BASE_DEVFREQ_H_ */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c new file mode 100644 -index 000000000..66bf660cf +index 000000000..dcdf15cdc --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c -@@ -0,0 +1,129 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c +@@ -0,0 +1,255 @@ +/* + * -+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -341885,127 +342938,253 @@ index 000000000..66bf660cf + + + ++ +/* -+ * A core availability policy implementing core mask selection from devfreq OPPs + * + */ -+ +#include -+#include ++#include +#include -+#include + -+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) ++#include ++ ++#if !defined(CONFIG_MALI_NO_MALI) ++ ++ ++#ifdef CONFIG_DEBUG_FS ++ ++ ++int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size) +{ -+ struct kbasep_pm_ca_policy_devfreq *data = -+ &kbdev->pm.backend.ca_policy_data.devfreq; ++ struct kbase_io_access *old_buf; ++ struct kbase_io_access *new_buf; + unsigned long flags; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (!new_size) ++ goto out_err; /* The new size must not be 0 */ + -+ data->cores_desired = core_mask; ++ new_buf = vmalloc(new_size * sizeof(*h->buf)); ++ if (!new_buf) ++ goto out_err; + -+ /* Disable any cores that are now unwanted */ -+ data->cores_enabled &= data->cores_desired; ++ spin_lock_irqsave(&h->lock, flags); + -+ kbdev->pm.backend.ca_in_transition = true; ++ old_buf = h->buf; + -+ /* If there are no cores to be powered off then power on desired cores ++ /* Note: we won't bother with copying the old data over. The dumping ++ * logic wouldn't work properly as it relies on 'count' both as a ++ * counter and as an index to the buffer which would have changed with ++ * the new array. This is a corner case that we don't need to support. + */ -+ if (!(data->cores_used & ~data->cores_desired)) { -+ data->cores_enabled = data->cores_desired; -+ kbdev->pm.backend.ca_in_transition = false; -+ } ++ h->count = 0; ++ h->size = new_size; ++ h->buf = new_buf; + -+ kbase_pm_update_cores_state_nolock(kbdev); ++ spin_unlock_irqrestore(&h->lock, flags); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ vfree(old_buf); + -+ dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX %llX\n", -+ data->cores_desired, data->cores_enabled); ++ return 0; ++ ++out_err: ++ return -1; +} + -+static void devfreq_init(struct kbase_device *kbdev) ++ ++int kbase_io_history_init(struct kbase_io_history *h, u16 n) +{ -+ struct kbasep_pm_ca_policy_devfreq *data = -+ &kbdev->pm.backend.ca_policy_data.devfreq; ++ h->enabled = false; ++ spin_lock_init(&h->lock); ++ h->count = 0; ++ h->size = 0; ++ h->buf = NULL; ++ if (kbase_io_history_resize(h, n)) ++ return -1; + -+ if (kbdev->current_core_mask) { -+ data->cores_enabled = kbdev->current_core_mask; -+ data->cores_desired = kbdev->current_core_mask; -+ } else { -+ data->cores_enabled = -+ kbdev->gpu_props.props.raw_props.shader_present; -+ data->cores_desired = -+ kbdev->gpu_props.props.raw_props.shader_present; -+ } -+ data->cores_used = 0; -+ kbdev->pm.backend.ca_in_transition = false; ++ return 0; +} + -+static void devfreq_term(struct kbase_device *kbdev) ++ ++void kbase_io_history_term(struct kbase_io_history *h) +{ ++ vfree(h->buf); ++ h->buf = NULL; +} + -+static u64 devfreq_get_core_mask(struct kbase_device *kbdev) ++ ++/* kbase_io_history_add - add new entry to the register access history ++ * ++ * @h: Pointer to the history data structure ++ * @addr: Register address ++ * @value: The value that is either read from or written to the register ++ * @write: 1 if it's a register write, 0 if it's a read ++ */ ++static void kbase_io_history_add(struct kbase_io_history *h, ++ void __iomem const *addr, u32 value, u8 write) +{ -+ return kbdev->pm.backend.ca_policy_data.devfreq.cores_enabled; ++ struct kbase_io_access *io; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&h->lock, flags); ++ ++ io = &h->buf[h->count % h->size]; ++ io->addr = (uintptr_t)addr | write; ++ io->value = value; ++ ++h->count; ++ /* If count overflows, move the index by the buffer size so the entire ++ * buffer will still be dumped later */ ++ if (unlikely(!h->count)) ++ h->count = h->size; ++ ++ spin_unlock_irqrestore(&h->lock, flags); +} + -+static void devfreq_update_core_status(struct kbase_device *kbdev, -+ u64 cores_ready, -+ u64 cores_transitioning) ++ ++void kbase_io_history_dump(struct kbase_device *kbdev) +{ -+ struct kbasep_pm_ca_policy_devfreq *data = -+ &kbdev->pm.backend.ca_policy_data.devfreq; ++ struct kbase_io_history *const h = &kbdev->io_history; ++ u16 i; ++ size_t iters; ++ unsigned long flags; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (!unlikely(h->enabled)) ++ return; + -+ data->cores_used = cores_ready | cores_transitioning; ++ spin_lock_irqsave(&h->lock, flags); + -+ /* If in desired state then clear transition flag */ -+ if (data->cores_enabled == data->cores_desired) -+ kbdev->pm.backend.ca_in_transition = false; ++ dev_err(kbdev->dev, "Register IO History:"); ++ iters = (h->size > h->count) ? h->count : h->size; ++ dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters, ++ h->count); ++ for (i = 0; i < iters; ++i) { ++ struct kbase_io_access *io = ++ &h->buf[(h->count - iters + i) % h->size]; ++ char const access = (io->addr & 1) ? 'w' : 'r'; + -+ /* If all undesired cores are now off then power on desired cores. -+ * The direct comparison against cores_enabled limits potential -+ * recursion to one level */ -+ if (!(data->cores_used & ~data->cores_desired) && -+ data->cores_enabled != data->cores_desired) { -+ data->cores_enabled = data->cores_desired; ++ dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access, ++ (void *)(io->addr & ~0x1), io->value); ++ } + -+ kbase_pm_update_cores_state_nolock(kbdev); ++ spin_unlock_irqrestore(&h->lock, flags); ++} + -+ kbdev->pm.backend.ca_in_transition = false; -+ } ++ ++#endif /* CONFIG_DEBUG_FS */ ++ ++ ++void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, ++ struct kbase_context *kctx) ++{ ++ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); ++ KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); ++ KBASE_DEBUG_ASSERT(kbdev->dev != NULL); ++ ++ writel(value, kbdev->reg + offset); ++ ++#ifdef CONFIG_DEBUG_FS ++ if (unlikely(kbdev->io_history.enabled)) ++ kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, ++ value, 1); ++#endif /* CONFIG_DEBUG_FS */ ++ dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value); ++ ++ if (kctx && kctx->jctx.tb) ++ kbase_device_trace_register_access(kctx, REG_WRITE, offset, ++ value); +} + -+/* -+ * The struct kbase_pm_ca_policy structure for the devfreq core availability -+ * policy. ++KBASE_EXPORT_TEST_API(kbase_reg_write); ++ ++u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, ++ struct kbase_context *kctx) ++{ ++ u32 val; ++ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); ++ KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); ++ KBASE_DEBUG_ASSERT(kbdev->dev != NULL); ++ ++ val = readl(kbdev->reg + offset); ++ ++#ifdef CONFIG_DEBUG_FS ++ if (unlikely(kbdev->io_history.enabled)) ++ kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, ++ val, 0); ++#endif /* CONFIG_DEBUG_FS */ ++ dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val); ++ ++ if (kctx && kctx->jctx.tb) ++ kbase_device_trace_register_access(kctx, REG_READ, offset, val); ++ return val; ++} ++ ++KBASE_EXPORT_TEST_API(kbase_reg_read); ++#endif /* !defined(CONFIG_MALI_NO_MALI) */ ++ ++/** ++ * kbase_report_gpu_fault - Report a GPU fault. ++ * @kbdev: Kbase device pointer ++ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS ++ * was also set + * -+ * This is the static structure that defines the devfreq core availability power -+ * policy's callback and name. ++ * This function is called from the interrupt handler when a GPU fault occurs. ++ * It reports the details of the fault using dev_warn(). + */ -+const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops = { -+ "devfreq", /* name */ -+ devfreq_init, /* init */ -+ devfreq_term, /* term */ -+ devfreq_get_core_mask, /* get_core_mask */ -+ devfreq_update_core_status, /* update_core_status */ -+ 0u, /* flags */ -+ KBASE_PM_CA_POLICY_ID_DEVFREQ, /* id */ -+}; ++static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple) ++{ ++ u32 status; ++ u64 address; + -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h ++ status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL); ++ address = (u64) kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32; ++ address |= kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL); ++ ++ dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx", ++ status & 0xFF, ++ kbase_exception_name(kbdev, status), ++ address); ++ if (multiple) ++ dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n"); ++} ++ ++void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) ++{ ++ KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val); ++ if (val & GPU_FAULT) ++ kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS); ++ ++ if (val & RESET_COMPLETED) ++ kbase_pm_reset_done(kbdev); ++ ++ if (val & PRFCNT_SAMPLE_COMPLETED) ++ kbase_instr_hwcnt_sample_done(kbdev); ++ ++ if (val & CLEAN_CACHES_COMPLETED) ++ kbase_clean_caches_done(kbdev); ++ ++ KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL); ++ ++ /* kbase_pm_check_transitions must be called after the IRQ has been ++ * cleared. This is because it might trigger further power transitions ++ * and we don't want to miss the interrupt raised to notify us that ++ * these further transitions have finished. ++ */ ++ if (val & POWER_CHANGED_ALL) ++ kbase_pm_power_changed(kbdev); ++ ++ KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val); ++} +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h new file mode 100644 -index 000000000..7ab3cd4d8 +index 000000000..5b2044593 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h -@@ -0,0 +1,55 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h +@@ -0,0 +1,67 @@ +/* + * -+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -342020,53 +343199,65 @@ index 000000000..7ab3cd4d8 + + + ++ ++ +/* -+ * A core availability policy for use with devfreq, where core masks are -+ * associated with OPPs. ++ * Backend-specific HW access device APIs + */ + -+#ifndef MALI_KBASE_PM_CA_DEVFREQ_H -+#define MALI_KBASE_PM_CA_DEVFREQ_H ++#ifndef _KBASE_DEVICE_INTERNAL_H_ ++#define _KBASE_DEVICE_INTERNAL_H_ + +/** -+ * struct kbasep_pm_ca_policy_devfreq - Private structure for devfreq ca policy ++ * kbase_reg_write - write to GPU register ++ * @kbdev: Kbase device pointer ++ * @offset: Offset of register ++ * @value: Value to write ++ * @kctx: Kbase context pointer. May be NULL + * -+ * This contains data that is private to the devfreq core availability -+ * policy. ++ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If ++ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr ++ * != KBASEP_AS_NR_INVALID). ++ */ ++void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, ++ struct kbase_context *kctx); ++ ++/** ++ * kbase_reg_read - read from GPU register ++ * @kbdev: Kbase device pointer ++ * @offset: Offset of register ++ * @kctx: Kbase context pointer. May be NULL + * -+ * @cores_desired: Cores that the policy wants to be available -+ * @cores_enabled: Cores that the policy is currently returning as available -+ * @cores_used: Cores currently powered or transitioning ++ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If ++ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr ++ * != KBASEP_AS_NR_INVALID). ++ * ++ * Return: Value in desired register + */ -+struct kbasep_pm_ca_policy_devfreq { -+ u64 cores_desired; -+ u64 cores_enabled; -+ u64 cores_used; -+}; ++u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, ++ struct kbase_context *kctx); + -+extern const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops; + +/** -+ * kbase_devfreq_set_core_mask - Set core mask for policy to use -+ * @kbdev: Device pointer -+ * @core_mask: New core mask ++ * kbase_gpu_interrupt - GPU interrupt handler ++ * @kbdev: Kbase device pointer ++ * @val: The value of the GPU IRQ status register which triggered the call + * -+ * The new core mask will have immediate effect if the GPU is powered, or will -+ * take effect when it is next powered on. ++ * This function is called from the interrupt handler when a GPU irq is to be ++ * handled. + */ -+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask); -+ -+#endif /* MALI_KBASE_PM_CA_DEVFREQ_H */ ++void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val); + -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c ++#endif /* _KBASE_DEVICE_INTERNAL_H_ */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c new file mode 100644 -index 000000000..864612d31 +index 000000000..d578fd78e --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c -@@ -0,0 +1,65 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c +@@ -0,0 +1,123 @@ +/* + * -+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -342081,109 +343272,121 @@ index 000000000..864612d31 + + + ++ +/* -+ * A power policy implementing fixed core availability ++ * Register-based HW access backend APIs + */ -+ +#include -+#include ++#include ++#include ++#include ++#include ++#include + -+static void fixed_init(struct kbase_device *kbdev) ++int kbase_backend_early_init(struct kbase_device *kbdev) +{ -+ kbdev->pm.backend.ca_in_transition = false; -+} ++ int err; + -+static void fixed_term(struct kbase_device *kbdev) -+{ -+ CSTD_UNUSED(kbdev); ++ err = kbasep_platform_device_init(kbdev); ++ if (err) ++ return err; ++ ++ /* Ensure we can access the GPU registers */ ++ kbase_pm_register_access_enable(kbdev); ++ ++ /* Find out GPU properties based on the GPU feature registers */ ++ kbase_gpuprops_set(kbdev); ++ ++ /* We're done accessing the GPU registers for now. */ ++ kbase_pm_register_access_disable(kbdev); ++ ++ err = kbase_hwaccess_pm_init(kbdev); ++ if (err) ++ goto fail_pm; ++ ++ err = kbase_install_interrupts(kbdev); ++ if (err) ++ goto fail_interrupts; ++ ++ return 0; ++ ++fail_interrupts: ++ kbase_hwaccess_pm_term(kbdev); ++fail_pm: ++ kbasep_platform_device_term(kbdev); ++ ++ return err; +} + -+static u64 fixed_get_core_mask(struct kbase_device *kbdev) ++void kbase_backend_early_term(struct kbase_device *kbdev) +{ -+ return kbdev->gpu_props.props.raw_props.shader_present; ++ kbase_release_interrupts(kbdev); ++ kbase_hwaccess_pm_term(kbdev); ++ kbasep_platform_device_term(kbdev); +} + -+static void fixed_update_core_status(struct kbase_device *kbdev, -+ u64 cores_ready, -+ u64 cores_transitioning) ++int kbase_backend_late_init(struct kbase_device *kbdev) +{ -+ CSTD_UNUSED(kbdev); -+ CSTD_UNUSED(cores_ready); -+ CSTD_UNUSED(cores_transitioning); -+} ++ int err; + -+/* -+ * The struct kbase_pm_policy structure for the fixed power policy. -+ * -+ * This is the static structure that defines the fixed power policy's callback -+ * and name. -+ */ -+const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = { -+ "fixed", /* name */ -+ fixed_init, /* init */ -+ fixed_term, /* term */ -+ fixed_get_core_mask, /* get_core_mask */ -+ fixed_update_core_status, /* update_core_status */ -+ 0u, /* flags */ -+ KBASE_PM_CA_POLICY_ID_FIXED, /* id */ -+}; ++ err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT); ++ if (err) ++ return err; + -+KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops); -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h -new file mode 100644 -index 000000000..a763155cb ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h -@@ -0,0 +1,40 @@ -+/* -+ * -+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ err = kbase_backend_timer_init(kbdev); ++ if (err) ++ goto fail_timer; + ++#ifdef CONFIG_MALI_DEBUG ++#ifndef CONFIG_MALI_NO_MALI ++ if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { ++ dev_err(kbdev->dev, "Interrupt assigment check failed.\n"); ++ err = -EINVAL; ++ goto fail_interrupt_test; ++ } ++#endif /* !CONFIG_MALI_NO_MALI */ ++#endif /* CONFIG_MALI_DEBUG */ + ++ err = kbase_job_slot_init(kbdev); ++ if (err) ++ goto fail_job_slot; + -+/* -+ * A power policy implementing fixed core availability -+ */ ++ init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); + -+#ifndef MALI_KBASE_PM_CA_FIXED_H -+#define MALI_KBASE_PM_CA_FIXED_H ++ return 0; + -+/** -+ * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data -+ * -+ * @dummy: Dummy member - no state is needed -+ * -+ * This contains data that is private to the particular power policy that is -+ * active. -+ */ -+struct kbasep_pm_ca_policy_fixed { -+ int dummy; -+}; ++fail_job_slot: + -+extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops; ++#ifdef CONFIG_MALI_DEBUG ++#ifndef CONFIG_MALI_NO_MALI ++fail_interrupt_test: ++#endif /* !CONFIG_MALI_NO_MALI */ ++#endif /* CONFIG_MALI_DEBUG */ + -+#endif /* MALI_KBASE_PM_CA_FIXED_H */ ++ kbase_backend_timer_term(kbdev); ++fail_timer: ++ kbase_hwaccess_pm_halt(kbdev); + -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c ++ return err; ++} ++ ++void kbase_backend_late_term(struct kbase_device *kbdev) ++{ ++ kbase_job_slot_halt(kbdev); ++ kbase_job_slot_term(kbdev); ++ kbase_backend_timer_term(kbdev); ++ kbase_hwaccess_pm_halt(kbdev); ++} ++ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c new file mode 100644 -index 000000000..f891fa225 +index 000000000..b395325b5 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c -@@ -0,0 +1,70 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c +@@ -0,0 +1,110 @@ +/* + * -+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -342201,65 +343404,105 @@ index 000000000..f891fa225 + + +/* -+ * "Coarse Demand" power management policy ++ * Base kernel property query backend APIs + */ + +#include -+#include ++#include ++#include ++#include + -+static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev) ++void kbase_backend_gpuprops_get(struct kbase_device *kbdev, ++ struct kbase_gpuprops_regdump *regdump) +{ -+ if (kbdev->pm.active_count == 0) -+ return 0; ++ int i; + -+ return kbdev->gpu_props.props.raw_props.shader_present; -+} ++ /* Fill regdump with the content of the relevant registers */ ++ regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL); + -+static bool coarse_demand_get_core_active(struct kbase_device *kbdev) -+{ -+ if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap | -+ kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt -+ && !kbdev->tiler_inuse_cnt) -+ return false; ++ regdump->l2_features = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(L2_FEATURES), NULL); ++ regdump->suspend_size = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(SUSPEND_SIZE), NULL); ++ regdump->tiler_features = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(TILER_FEATURES), NULL); ++ regdump->mem_features = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(MEM_FEATURES), NULL); ++ regdump->mmu_features = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(MMU_FEATURES), NULL); ++ regdump->as_present = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(AS_PRESENT), NULL); ++ regdump->js_present = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(JS_PRESENT), NULL); + -+ return true; -+} ++ for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) ++ regdump->js_features[i] = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL); + -+static void coarse_demand_init(struct kbase_device *kbdev) -+{ -+ CSTD_UNUSED(kbdev); ++ for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) ++ regdump->texture_features[i] = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL); ++ ++ regdump->thread_max_threads = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL); ++ regdump->thread_max_workgroup_size = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE), ++ NULL); ++ regdump->thread_max_barrier_size = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL); ++ regdump->thread_features = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(THREAD_FEATURES), NULL); ++ ++ regdump->shader_present_lo = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL); ++ regdump->shader_present_hi = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL); ++ ++ regdump->tiler_present_lo = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(TILER_PRESENT_LO), NULL); ++ regdump->tiler_present_hi = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(TILER_PRESENT_HI), NULL); ++ ++ regdump->l2_present_lo = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(L2_PRESENT_LO), NULL); ++ regdump->l2_present_hi = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(L2_PRESENT_HI), NULL); ++ ++ regdump->stack_present_lo = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(STACK_PRESENT_LO), NULL); ++ regdump->stack_present_hi = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(STACK_PRESENT_HI), NULL); +} + -+static void coarse_demand_term(struct kbase_device *kbdev) ++void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, ++ struct kbase_gpuprops_regdump *regdump) +{ -+ CSTD_UNUSED(kbdev); -+} ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) { ++ /* Ensure we can access the GPU registers */ ++ kbase_pm_register_access_enable(kbdev); + -+/* The struct kbase_pm_policy structure for the demand power policy. -+ * -+ * This is the static structure that defines the demand power policy's callback -+ * and name. -+ */ -+const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = { -+ "coarse_demand", /* name */ -+ coarse_demand_init, /* init */ -+ coarse_demand_term, /* term */ -+ coarse_demand_get_core_mask, /* get_core_mask */ -+ coarse_demand_get_core_active, /* get_core_active */ -+ 0u, /* flags */ -+ KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */ -+}; ++ regdump->coherency_features = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(COHERENCY_FEATURES), NULL); + -+KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops); -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h ++ /* We're done accessing the GPU registers for now. */ ++ kbase_pm_register_access_disable(kbdev); ++ } else { ++ /* Pre COHERENCY_FEATURES we only supported ACE_LITE */ ++ regdump->coherency_features = ++ COHERENCY_FEATURE_BIT(COHERENCY_NONE) | ++ COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); ++ } ++} ++ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c new file mode 100644 -index 000000000..749d305ee +index 000000000..7ad309e8d --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h -@@ -0,0 +1,64 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c +@@ -0,0 +1,492 @@ +/* + * -+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -342277,663 +343520,551 @@ index 000000000..749d305ee + + +/* -+ * "Coarse Demand" power management policy ++ * GPU backend instrumentation APIs. + */ + -+#ifndef MALI_KBASE_PM_COARSE_DEMAND_H -+#define MALI_KBASE_PM_COARSE_DEMAND_H ++#include ++#include ++#include ++#include ++#include ++#include + +/** -+ * DOC: -+ * The "Coarse" demand power management policy has the following -+ * characteristics: -+ * - When KBase indicates that the GPU will be powered up, but we don't yet -+ * know which Job Chains are to be run: -+ * - All Shader Cores are powered up, regardless of whether or not they will -+ * be needed later. -+ * - When KBase indicates that a set of Shader Cores are needed to submit the -+ * currently queued Job Chains: -+ * - All Shader Cores are kept powered, regardless of whether or not they will -+ * be needed -+ * - When KBase indicates that the GPU need not be powered: -+ * - The Shader Cores are powered off, and the GPU itself is powered off too. ++ * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to ++ * hardware + * -+ * @note: -+ * - KBase indicates the GPU will be powered up when it has a User Process that -+ * has just started to submit Job Chains. -+ * - KBase indicates the GPU need not be powered when all the Job Chains from -+ * User Processes have finished, and it is waiting for a User Process to -+ * submit some more Job Chains. ++ * @kbdev: Kbase device + */ ++static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ unsigned long pm_flags; ++ u32 irq_mask; + -+/** -+ * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand -+ * policy -+ * -+ * This contains data that is private to the coarse demand power policy. -+ * -+ * @dummy: Dummy member - no state needed -+ */ -+struct kbasep_pm_policy_coarse_demand { -+ int dummy; -+}; ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == ++ KBASE_INSTR_STATE_REQUEST_CLEAN); + -+extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops; ++ /* Enable interrupt */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); ++ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), ++ irq_mask | CLEAN_CACHES_COMPLETED, NULL); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + -+#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h -new file mode 100644 -index 000000000..352744ee6 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h -@@ -0,0 +1,519 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ /* clean&invalidate the caches so we're sure the mmu tables for the dump ++ * buffer is valid */ ++ KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CLEAN_INV_CACHES, NULL); ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING; + ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++} + ++int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ struct kbase_uk_hwcnt_setup *setup) ++{ ++ unsigned long flags, pm_flags; ++ int err = -EINVAL; ++ u32 irq_mask; ++ int ret; ++ u64 shader_cores_needed; ++ u32 prfcnt_config; + -+/* -+ * Backend-specific Power Manager definitions -+ */ ++ shader_cores_needed = kbase_pm_get_present_cores(kbdev, ++ KBASE_PM_CORE_SHADER); + -+#ifndef _KBASE_PM_HWACCESS_DEFS_H_ -+#define _KBASE_PM_HWACCESS_DEFS_H_ ++ /* alignment failure */ ++ if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1))) ++ goto out_err; + -+#include "mali_kbase_pm_ca_fixed.h" -+#include "mali_kbase_pm_ca_devfreq.h" -+#if !MALI_CUSTOMER_RELEASE -+#include "mali_kbase_pm_ca_random.h" -+#endif ++ /* Override core availability policy to ensure all cores are available ++ */ ++ kbase_pm_ca_instr_enable(kbdev); + -+#include "mali_kbase_pm_always_on.h" -+#include "mali_kbase_pm_coarse_demand.h" -+#include "mali_kbase_pm_demand.h" -+#if !MALI_CUSTOMER_RELEASE -+#include "mali_kbase_pm_demand_always_powered.h" -+#include "mali_kbase_pm_fast_start.h" -+#endif ++ /* Request the cores early on synchronously - we'll release them on any ++ * errors (e.g. instrumentation already active) */ ++ kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed); + -+/* Forward definition - see mali_kbase.h */ -+struct kbase_device; -+struct kbase_jd_atom; ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + -+/** -+ * enum kbase_pm_core_type - The types of core in a GPU. -+ * -+ * These enumerated values are used in calls to -+ * - kbase_pm_get_present_cores() -+ * - kbase_pm_get_active_cores() -+ * - kbase_pm_get_trans_cores() -+ * - kbase_pm_get_ready_cores(). -+ * -+ * They specify which type of core should be acted on. These values are set in -+ * a manner that allows core_type_to_reg() function to be simpler and more -+ * efficient. -+ * -+ * @KBASE_PM_CORE_L2: The L2 cache -+ * @KBASE_PM_CORE_SHADER: Shader cores -+ * @KBASE_PM_CORE_TILER: Tiler cores -+ * @KBASE_PM_CORE_STACK: Core stacks -+ */ -+enum kbase_pm_core_type { -+ KBASE_PM_CORE_L2 = L2_PRESENT_LO, -+ KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO, -+ KBASE_PM_CORE_TILER = TILER_PRESENT_LO, -+ KBASE_PM_CORE_STACK = STACK_PRESENT_LO -+}; ++ if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { ++ /* Instrumentation is already enabled */ ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ goto out_unrequest_cores; ++ } + -+/** -+ * struct kbasep_pm_metrics_data - Metrics data collected for use by the power -+ * management framework. -+ * -+ * @time_period_start: time at which busy/idle measurements started -+ * @time_busy: number of ns the GPU was busy executing jobs since the -+ * @time_period_start timestamp. -+ * @time_idle: number of ns since time_period_start the GPU was not executing -+ * jobs since the @time_period_start timestamp. -+ * @prev_busy: busy time in ns of previous time period. -+ * Updated when metrics are reset. -+ * @prev_idle: idle time in ns of previous time period -+ * Updated when metrics are reset. -+ * @gpu_active: true when the GPU is executing jobs. false when -+ * not. Updated when the job scheduler informs us a job in submitted -+ * or removed from a GPU slot. -+ * @busy_cl: number of ns the GPU was busy executing CL jobs. Note that -+ * if two CL jobs were active for 400ns, this value would be updated -+ * with 800. -+ * @busy_gl: number of ns the GPU was busy executing GL jobs. Note that -+ * if two GL jobs were active for 400ns, this value would be updated -+ * with 800. -+ * @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device. -+ * @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As -+ * GL jobs never run on slot 2 this slot is not recorded. -+ * @lock: spinlock protecting the kbasep_pm_metrics_data structure -+ * @timer: timer to regularly make DVFS decisions based on the power -+ * management metrics. -+ * @timer_active: boolean indicating @timer is running -+ * @platform_data: pointer to data controlled by platform specific code -+ * @kbdev: pointer to kbase device for which metrics are collected -+ * -+ */ -+struct kbasep_pm_metrics_data { -+ ktime_t time_period_start; -+ u32 time_busy; -+ u32 time_idle; -+ u32 prev_busy; -+ u32 prev_idle; -+ bool gpu_active; -+ u32 busy_cl[2]; -+ u32 busy_gl; -+ u32 active_cl_ctx[2]; -+ u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */ -+ spinlock_t lock; ++ /* Enable interrupt */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); ++ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | ++ PRFCNT_SAMPLE_COMPLETED, NULL); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + -+#ifdef CONFIG_MALI_MIDGARD_DVFS -+ struct hrtimer timer; -+ bool timer_active; -+#endif ++ /* In use, this context is the owner */ ++ kbdev->hwcnt.kctx = kctx; ++ /* Remember the dump address so we can reprogram it later */ ++ kbdev->hwcnt.addr = setup->dump_buffer; + -+ void *platform_data; -+ struct kbase_device *kbdev; -+}; ++ /* Request the clean */ ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; ++ kbdev->hwcnt.backend.triggered = 0; ++ /* Clean&invalidate the caches so we're sure the mmu tables for the dump ++ * buffer is valid */ ++ ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq, ++ &kbdev->hwcnt.backend.cache_clean_work); ++ KBASE_DEBUG_ASSERT(ret); + -+union kbase_pm_policy_data { -+ struct kbasep_pm_policy_always_on always_on; -+ struct kbasep_pm_policy_coarse_demand coarse_demand; -+ struct kbasep_pm_policy_demand demand; -+#if !MALI_CUSTOMER_RELEASE -+ struct kbasep_pm_policy_demand_always_powered demand_always_powered; -+ struct kbasep_pm_policy_fast_start fast_start; -+#endif -+}; ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + -+union kbase_pm_ca_policy_data { -+ struct kbasep_pm_ca_policy_fixed fixed; -+ struct kbasep_pm_ca_policy_devfreq devfreq; -+#if !MALI_CUSTOMER_RELEASE -+ struct kbasep_pm_ca_policy_random random; ++ /* Wait for cacheclean to complete */ ++ wait_event(kbdev->hwcnt.backend.wait, ++ kbdev->hwcnt.backend.triggered != 0); ++ ++ KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == ++ KBASE_INSTR_STATE_IDLE); ++ ++ kbase_pm_request_l2_caches(kbdev); ++ ++ /* Configure */ ++ prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; ++#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY ++ { ++ u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; ++ u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) ++ >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++ int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id); ++ ++ if (arch_v6) ++ prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; ++ } +#endif -+}; + -+/** -+ * struct kbase_pm_backend_data - Data stored per device for power management. -+ * -+ * This structure contains data for the power management framework. There is one -+ * instance of this structure per device in the system. -+ * -+ * @ca_current_policy: The policy that is currently actively controlling core -+ * availability. -+ * @pm_current_policy: The policy that is currently actively controlling the -+ * power state. -+ * @ca_policy_data: Private data for current CA policy -+ * @pm_policy_data: Private data for current PM policy -+ * @ca_in_transition: Flag indicating when core availability policy is -+ * transitioning cores. The core availability policy must -+ * set this when a change in core availability is occurring. -+ * power_change_lock must be held when accessing this. -+ * @reset_done: Flag when a reset is complete -+ * @reset_done_wait: Wait queue to wait for changes to @reset_done -+ * @l2_powered_wait: Wait queue for whether the l2 cache has been powered as -+ * requested -+ * @l2_powered: State indicating whether all the l2 caches are powered. -+ * Non-zero indicates they're *all* powered -+ * Zero indicates that some (or all) are not powered -+ * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter -+ * users -+ * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests -+ * @desired_shader_state: A bit mask identifying the shader cores that the -+ * power policy would like to be on. The current state -+ * of the cores may be different, but there should be -+ * transitions in progress that will eventually achieve -+ * this state (assuming that the policy doesn't change -+ * its mind in the mean time). -+ * @powering_on_shader_state: A bit mask indicating which shader cores are -+ * currently in a power-on transition -+ * @desired_tiler_state: A bit mask identifying the tiler cores that the power -+ * policy would like to be on. See @desired_shader_state -+ * @powering_on_tiler_state: A bit mask indicating which tiler core are -+ * currently in a power-on transition -+ * @powering_on_l2_state: A bit mask indicating which l2-caches are currently -+ * in a power-on transition -+ * @powering_on_stack_state: A bit mask indicating which core stacks are -+ * currently in a power-on transition -+ * @gpu_in_desired_state: This flag is set if the GPU is powered as requested -+ * by the desired_xxx_state variables -+ * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0 -+ * @gpu_powered: Set to true when the GPU is powered and register -+ * accesses are possible, false otherwise -+ * @instr_enabled: Set to true when instrumentation is enabled, -+ * false otherwise -+ * @cg1_disabled: Set if the policy wants to keep the second core group -+ * powered off -+ * @driver_ready_for_irqs: Debug state indicating whether sufficient -+ * initialization of the driver has occurred to handle -+ * IRQs -+ * @gpu_powered_lock: Spinlock that must be held when writing @gpu_powered or -+ * accessing @driver_ready_for_irqs -+ * @metrics: Structure to hold metrics for the GPU -+ * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is -+ * powered off -+ * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders -+ * and/or timers are powered off -+ * @gpu_poweroff_timer: Timer for powering off GPU -+ * @gpu_poweroff_wq: Workqueue to power off GPU on when timer fires -+ * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq -+ * @shader_poweroff_pending: Bit mask of shaders to be powered off on next -+ * timer callback -+ * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer -+ * callback -+ * @poweroff_timer_needed: true if the poweroff timer is currently required, -+ * false otherwise -+ * @poweroff_timer_running: true if the poweroff timer is currently running, -+ * false otherwise -+ * power_change_lock should be held when accessing, -+ * unless there is no way the timer can be running (eg -+ * hrtimer_cancel() was called immediately before) -+ * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress. -+ * hwaccess_lock must be held when accessing -+ * @poweron_required: true if a GPU power on is required. Should only be set -+ * when poweroff_wait_in_progress is true, and therefore the -+ * GPU can not immediately be powered on. pm.lock must be -+ * held when accessing -+ * @poweroff_is_suspend: true if the GPU is being powered off due to a suspend -+ * request. pm.lock must be held when accessing -+ * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off -+ * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq -+ * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete -+ * @callback_power_on: Callback when the GPU needs to be turned on. See -+ * &struct kbase_pm_callback_conf -+ * @callback_power_off: Callback when the GPU may be turned off. See -+ * &struct kbase_pm_callback_conf -+ * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to -+ * be turned off. See &struct kbase_pm_callback_conf -+ * @callback_power_resume: Callback when a resume occurs and the GPU needs to -+ * be turned on. See &struct kbase_pm_callback_conf -+ * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See -+ * &struct kbase_pm_callback_conf -+ * @callback_power_runtime_off: Callback when the GPU may be turned off. See -+ * &struct kbase_pm_callback_conf -+ * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See -+ * &struct kbase_pm_callback_conf -+ * -+ * Note: -+ * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the -+ * policy is being changed with kbase_pm_ca_set_policy() or -+ * kbase_pm_set_policy(). The change is protected under -+ * kbase_device.pm.power_change_lock. Direct access to this -+ * from IRQ context must therefore check for NULL. If NULL, then -+ * kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy -+ * functions that would have been done under IRQ. -+ */ -+struct kbase_pm_backend_data { -+ const struct kbase_pm_ca_policy *ca_current_policy; -+ const struct kbase_pm_policy *pm_current_policy; -+ union kbase_pm_ca_policy_data ca_policy_data; -+ union kbase_pm_policy_data pm_policy_data; -+ bool ca_in_transition; -+ bool reset_done; -+ wait_queue_head_t reset_done_wait; -+ wait_queue_head_t l2_powered_wait; -+ int l2_powered; -+ int gpu_cycle_counter_requests; -+ spinlock_t gpu_cycle_counter_requests_lock; ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), ++ prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx); + -+ u64 desired_shader_state; -+ u64 powering_on_shader_state; -+ u64 desired_tiler_state; -+ u64 powering_on_tiler_state; -+ u64 powering_on_l2_state; -+#ifdef CONFIG_MALI_CORESTACK -+ u64 powering_on_stack_state; -+#endif /* CONFIG_MALI_CORESTACK */ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), ++ setup->dump_buffer & 0xFFFFFFFF, kctx); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), ++ setup->dump_buffer >> 32, kctx); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), ++ setup->jm_bm, kctx); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), ++ setup->shader_bm, kctx); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), ++ setup->mmu_l2_bm, kctx); ++ /* Due to PRLAM-8186 we need to disable the Tiler before we enable the ++ * HW counter dump. */ ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0, ++ kctx); ++ else ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), ++ setup->tiler_bm, kctx); + -+ bool gpu_in_desired_state; -+ wait_queue_head_t gpu_in_desired_state_wait; ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), ++ prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx); + -+ bool gpu_powered; ++ /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump ++ */ ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), ++ setup->tiler_bm, kctx); + -+ bool instr_enabled; ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + -+ bool cg1_disabled; ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; ++ kbdev->hwcnt.backend.triggered = 1; ++ wake_up(&kbdev->hwcnt.backend.wait); + -+#ifdef CONFIG_MALI_DEBUG -+ bool driver_ready_for_irqs; -+#endif /* CONFIG_MALI_DEBUG */ ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + -+ spinlock_t gpu_powered_lock; ++ err = 0; + ++ dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx); ++ return err; ++ out_unrequest_cores: ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ out_err: ++ return err; ++} + -+ struct kbasep_pm_metrics_data metrics; ++int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) ++{ ++ unsigned long flags, pm_flags; ++ int err = -EINVAL; ++ u32 irq_mask; ++ struct kbase_device *kbdev = kctx->kbdev; + -+ int gpu_poweroff_pending; -+ int shader_poweroff_pending_time; ++ while (1) { ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + -+ struct hrtimer gpu_poweroff_timer; -+ struct workqueue_struct *gpu_poweroff_wq; -+ struct work_struct gpu_poweroff_work; ++ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) { ++ /* Instrumentation is not enabled */ ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ goto out; ++ } + -+ u64 shader_poweroff_pending; -+ u64 tiler_poweroff_pending; ++ if (kbdev->hwcnt.kctx != kctx) { ++ /* Instrumentation has been setup for another context */ ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ goto out; ++ } + -+ bool poweroff_timer_needed; -+ bool poweroff_timer_running; ++ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) ++ break; + -+ bool poweroff_wait_in_progress; -+ bool poweron_required; -+ bool poweroff_is_suspend; ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + -+ struct workqueue_struct *gpu_poweroff_wait_wq; -+ struct work_struct gpu_poweroff_wait_work; ++ /* Ongoing dump/setup - wait for its completion */ ++ wait_event(kbdev->hwcnt.backend.wait, ++ kbdev->hwcnt.backend.triggered != 0); ++ } + -+ wait_queue_head_t poweroff_wait; ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; ++ kbdev->hwcnt.backend.triggered = 0; + -+ int (*callback_power_on)(struct kbase_device *kbdev); -+ void (*callback_power_off)(struct kbase_device *kbdev); -+ void (*callback_power_suspend)(struct kbase_device *kbdev); -+ void (*callback_power_resume)(struct kbase_device *kbdev); -+ int (*callback_power_runtime_on)(struct kbase_device *kbdev); -+ void (*callback_power_runtime_off)(struct kbase_device *kbdev); -+ int (*callback_power_runtime_idle)(struct kbase_device *kbdev); -+}; ++ /* Disable interrupt */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); ++ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), ++ irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL); + ++ /* Disable the counters */ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx); + -+/* List of policy IDs */ -+enum kbase_pm_policy_id { -+ KBASE_PM_POLICY_ID_DEMAND = 1, -+ KBASE_PM_POLICY_ID_ALWAYS_ON, -+ KBASE_PM_POLICY_ID_COARSE_DEMAND, -+#if !MALI_CUSTOMER_RELEASE -+ KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED, -+ KBASE_PM_POLICY_ID_FAST_START -+#endif -+}; ++ kbdev->hwcnt.kctx = NULL; ++ kbdev->hwcnt.addr = 0ULL; + -+typedef u32 kbase_pm_policy_flags; ++ kbase_pm_ca_instr_disable(kbdev); + -+/** -+ * struct kbase_pm_policy - Power policy structure. -+ * -+ * Each power policy exposes a (static) instance of this structure which -+ * contains function pointers to the policy's methods. -+ * -+ * @name: The name of this policy -+ * @init: Function called when the policy is selected -+ * @term: Function called when the policy is unselected -+ * @get_core_mask: Function called to get the current shader core mask -+ * @get_core_active: Function called to get the current overall GPU power -+ * state -+ * @flags: Field indicating flags for this policy -+ * @id: Field indicating an ID for this policy. This is not -+ * necessarily the same as its index in the list returned -+ * by kbase_pm_list_policies(). -+ * It is used purely for debugging. -+ */ -+struct kbase_pm_policy { -+ char *name; ++ kbase_pm_unrequest_cores(kbdev, true, ++ kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER)); + -+ /** -+ * Function called when the policy is selected -+ * -+ * This should initialize the kbdev->pm.pm_policy_data structure. It -+ * should not attempt to make any changes to hardware state. -+ * -+ * It is undefined what state the cores are in when the function is -+ * called. -+ * -+ * @kbdev: The kbase device structure for the device (must be a -+ * valid pointer) -+ */ -+ void (*init)(struct kbase_device *kbdev); ++ kbase_pm_release_l2_caches(kbdev); + -+ /** -+ * Function called when the policy is unselected. -+ * -+ * @kbdev: The kbase device structure for the device (must be a -+ * valid pointer) -+ */ -+ void (*term)(struct kbase_device *kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + -+ /** -+ * Function called to get the current shader core mask -+ * -+ * The returned mask should meet or exceed (kbdev->shader_needed_bitmap -+ * | kbdev->shader_inuse_bitmap). -+ * -+ * @kbdev: The kbase device structure for the device (must be a -+ * valid pointer) -+ * -+ * Return: The mask of shader cores to be powered -+ */ -+ u64 (*get_core_mask)(struct kbase_device *kbdev); ++ dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", ++ kctx); + -+ /** -+ * Function called to get the current overall GPU power state -+ * -+ * This function should consider the state of kbdev->pm.active_count. If -+ * this count is greater than 0 then there is at least one active -+ * context on the device and the GPU should be powered. If it is equal -+ * to 0 then there are no active contexts and the GPU could be powered -+ * off if desired. -+ * -+ * @kbdev: The kbase device structure for the device (must be a -+ * valid pointer) -+ * -+ * Return: true if the GPU should be powered, false otherwise -+ */ -+ bool (*get_core_active)(struct kbase_device *kbdev); ++ err = 0; + -+ kbase_pm_policy_flags flags; -+ enum kbase_pm_policy_id id; -+}; ++ out: ++ return err; ++} + ++int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) ++{ ++ unsigned long flags; ++ int err = -EINVAL; ++ struct kbase_device *kbdev = kctx->kbdev; + -+enum kbase_pm_ca_policy_id { -+ KBASE_PM_CA_POLICY_ID_FIXED = 1, -+ KBASE_PM_CA_POLICY_ID_DEVFREQ, -+ KBASE_PM_CA_POLICY_ID_RANDOM -+}; ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + -+typedef u32 kbase_pm_ca_policy_flags; ++ if (kbdev->hwcnt.kctx != kctx) { ++ /* The instrumentation has been setup for another context */ ++ goto unlock; ++ } + -+/** -+ * Maximum length of a CA policy names -+ */ -+#define KBASE_PM_CA_MAX_POLICY_NAME_LEN 15 ++ if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) { ++ /* HW counters are disabled or another dump is ongoing, or we're ++ * resetting */ ++ goto unlock; ++ } + -+/** -+ * struct kbase_pm_ca_policy - Core availability policy structure. -+ * -+ * Each core availability policy exposes a (static) instance of this structure -+ * which contains function pointers to the policy's methods. -+ * -+ * @name: The name of this policy -+ * @init: Function called when the policy is selected -+ * @term: Function called when the policy is unselected -+ * @get_core_mask: Function called to get the current shader core -+ * availability mask -+ * @update_core_status: Function called to update the current core status -+ * @flags: Field indicating flags for this policy -+ * @id: Field indicating an ID for this policy. This is not -+ * necessarily the same as its index in the list returned -+ * by kbase_pm_list_policies(). -+ * It is used purely for debugging. -+ */ -+struct kbase_pm_ca_policy { -+ char name[KBASE_PM_CA_MAX_POLICY_NAME_LEN + 1]; ++ kbdev->hwcnt.backend.triggered = 0; + -+ /** -+ * Function called when the policy is selected -+ * -+ * This should initialize the kbdev->pm.ca_policy_data structure. It -+ * should not attempt to make any changes to hardware state. -+ * -+ * It is undefined what state the cores are in when the function is -+ * called. -+ * -+ * @kbdev The kbase device structure for the device (must be a -+ * valid pointer) ++ /* Mark that we're dumping - the PF handler can signal that we faulted + */ -+ void (*init)(struct kbase_device *kbdev); ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING; + -+ /** -+ * Function called when the policy is unselected. -+ * -+ * @kbdev The kbase device structure for the device (must be a -+ * valid pointer) -+ */ -+ void (*term)(struct kbase_device *kbdev); ++ /* Reconfigure the dump address */ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), ++ kbdev->hwcnt.addr & 0xFFFFFFFF, NULL); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), ++ kbdev->hwcnt.addr >> 32, NULL); + -+ /** -+ * Function called to get the current shader core availability mask -+ * -+ * When a change in core availability is occurring, the policy must set -+ * kbdev->pm.ca_in_transition to true. This is to indicate that -+ * reporting changes in power state cannot be optimized out, even if -+ * kbdev->pm.desired_shader_state remains unchanged. This must be done -+ * by any functions internal to the Core Availability Policy that change -+ * the return value of kbase_pm_ca_policy::get_core_mask. -+ * -+ * @kbdev The kbase device structure for the device (must be a -+ * valid pointer) -+ * -+ * Return: The current core availability mask -+ */ -+ u64 (*get_core_mask)(struct kbase_device *kbdev); ++ /* Start dumping */ ++ KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL, ++ kbdev->hwcnt.addr, 0); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_PRFCNT_SAMPLE, kctx); + -+ /** -+ * Function called to update the current core status -+ * -+ * If none of the cores in core group 0 are ready or transitioning, then -+ * the policy must ensure that the next call to get_core_mask does not -+ * return 0 for all cores in core group 0. It is an error to disable -+ * core group 0 through the core availability policy. -+ * -+ * When a change in core availability has finished, the policy must set -+ * kbdev->pm.ca_in_transition to false. This is to indicate that -+ * changes in power state can once again be optimized out when -+ * kbdev->pm.desired_shader_state is unchanged. -+ * -+ * @kbdev: The kbase device structure for the device -+ * (must be a valid pointer) -+ * @cores_ready: The mask of cores currently powered and -+ * ready to run jobs -+ * @cores_transitioning: The mask of cores currently transitioning -+ * power state -+ */ -+ void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready, -+ u64 cores_transitioning); ++ dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx); + -+ kbase_pm_ca_policy_flags flags; ++ err = 0; + -+ /** -+ * Field indicating an ID for this policy. This is not necessarily the -+ * same as its index in the list returned by kbase_pm_list_policies(). -+ * It is used purely for debugging. -+ */ -+ enum kbase_pm_ca_policy_id id; -+}; ++ unlock: ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ return err; ++} ++KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump); + -+#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c -new file mode 100644 -index 000000000..81322fd0d ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c -@@ -0,0 +1,73 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, ++ bool * const success) ++{ ++ unsigned long flags; ++ bool complete = false; ++ struct kbase_device *kbdev = kctx->kbdev; ++ ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + ++ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) { ++ *success = true; ++ complete = true; ++ } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { ++ *success = false; ++ complete = true; ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; ++ } + ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + ++ return complete; ++} ++KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete); + ++void kbasep_cache_clean_worker(struct work_struct *data) ++{ ++ struct kbase_device *kbdev; ++ unsigned long flags; + -+/* -+ * A simple demand based power management policy -+ */ ++ kbdev = container_of(data, struct kbase_device, ++ hwcnt.backend.cache_clean_work); + -+#include -+#include ++ mutex_lock(&kbdev->cacheclean_lock); ++ kbasep_instr_hwcnt_cacheclean(kbdev); + -+static u64 demand_get_core_mask(struct kbase_device *kbdev) ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ /* Wait for our condition, and any reset to complete */ ++ while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) { ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ wait_event(kbdev->hwcnt.backend.cache_clean_wait, ++ kbdev->hwcnt.backend.state != ++ KBASE_INSTR_STATE_CLEANING); ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ } ++ KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == ++ KBASE_INSTR_STATE_CLEANED); ++ ++ /* All finished and idle */ ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; ++ kbdev->hwcnt.backend.triggered = 1; ++ wake_up(&kbdev->hwcnt.backend.wait); ++ ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ mutex_unlock(&kbdev->cacheclean_lock); ++} ++ ++void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) +{ -+ u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap; ++ unsigned long flags; + -+ if (0 == kbdev->pm.active_count) -+ return 0; ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + -+ return desired; ++ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { ++ kbdev->hwcnt.backend.triggered = 1; ++ wake_up(&kbdev->hwcnt.backend.wait); ++ } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) { ++ int ret; ++ /* Always clean and invalidate the cache after a successful dump ++ */ ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; ++ ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq, ++ &kbdev->hwcnt.backend.cache_clean_work); ++ KBASE_DEBUG_ASSERT(ret); ++ } ++ ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); +} + -+static bool demand_get_core_active(struct kbase_device *kbdev) ++void kbase_clean_caches_done(struct kbase_device *kbdev) +{ -+ if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap | -+ kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt -+ && !kbdev->tiler_inuse_cnt) -+ return false; ++ u32 irq_mask; + -+ return true; ++ if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { ++ unsigned long flags; ++ unsigned long pm_flags; ++ ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ /* Disable interrupt */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); ++ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), ++ NULL); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), ++ irq_mask & ~CLEAN_CACHES_COMPLETED, NULL); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); ++ ++ /* Wakeup... */ ++ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) { ++ /* Only wake if we weren't resetting */ ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED; ++ wake_up(&kbdev->hwcnt.backend.cache_clean_wait); ++ } ++ ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ } +} + -+static void demand_init(struct kbase_device *kbdev) ++int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) +{ -+ CSTD_UNUSED(kbdev); ++ struct kbase_device *kbdev = kctx->kbdev; ++ unsigned long flags; ++ int err; ++ ++ /* Wait for dump & cacheclean to complete */ ++ wait_event(kbdev->hwcnt.backend.wait, ++ kbdev->hwcnt.backend.triggered != 0); ++ ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ ++ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { ++ err = -EINVAL; ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; ++ } else { ++ /* Dump done */ ++ KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == ++ KBASE_INSTR_STATE_IDLE); ++ err = 0; ++ } ++ ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ ++ return err; +} + -+static void demand_term(struct kbase_device *kbdev) ++int kbase_instr_hwcnt_clear(struct kbase_context *kctx) +{ -+ CSTD_UNUSED(kbdev); ++ unsigned long flags; ++ int err = -EINVAL; ++ struct kbase_device *kbdev = kctx->kbdev; ++ ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ ++ /* Check it's the context previously set up and we're not already ++ * dumping */ ++ if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != ++ KBASE_INSTR_STATE_IDLE) ++ goto out; ++ ++ /* Clear the counters */ ++ KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_PRFCNT_CLEAR, kctx); ++ ++ err = 0; ++ ++out: ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ return err; ++} ++KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear); ++ ++int kbase_instr_backend_init(struct kbase_device *kbdev) ++{ ++ int ret = 0; ++ ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; ++ ++ init_waitqueue_head(&kbdev->hwcnt.backend.wait); ++ init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait); ++ INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work, ++ kbasep_cache_clean_worker); ++ kbdev->hwcnt.backend.triggered = 0; ++ ++ kbdev->hwcnt.backend.cache_clean_wq = ++ alloc_workqueue("Mali cache cleaning workqueue", 0, 1); ++ if (NULL == kbdev->hwcnt.backend.cache_clean_wq) ++ ret = -EINVAL; ++ ++ return ret; ++} ++ ++void kbase_instr_backend_term(struct kbase_device *kbdev) ++{ ++ destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq); +} + +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h +new file mode 100644 +index 000000000..4794672da +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h +@@ -0,0 +1,58 @@ +/* -+ * The struct kbase_pm_policy structure for the demand power policy. + * -+ * This is the static structure that defines the demand power policy's callback -+ * and name. ++ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * + */ -+const struct kbase_pm_policy kbase_pm_demand_policy_ops = { -+ "demand", /* name */ -+ demand_init, /* init */ -+ demand_term, /* term */ -+ demand_get_core_mask, /* get_core_mask */ -+ demand_get_core_active, /* get_core_active */ -+ 0u, /* flags */ -+ KBASE_PM_POLICY_ID_DEMAND, /* id */ ++ ++ ++ ++/* ++ * Backend-specific instrumentation definitions ++ */ ++ ++#ifndef _KBASE_INSTR_DEFS_H_ ++#define _KBASE_INSTR_DEFS_H_ ++ ++/* ++ * Instrumentation State Machine States ++ */ ++enum kbase_instr_state { ++ /* State where instrumentation is not active */ ++ KBASE_INSTR_STATE_DISABLED = 0, ++ /* State machine is active and ready for a command. */ ++ KBASE_INSTR_STATE_IDLE, ++ /* Hardware is currently dumping a frame. */ ++ KBASE_INSTR_STATE_DUMPING, ++ /* We've requested a clean to occur on a workqueue */ ++ KBASE_INSTR_STATE_REQUEST_CLEAN, ++ /* Hardware is currently cleaning and invalidating caches. */ ++ KBASE_INSTR_STATE_CLEANING, ++ /* Cache clean completed, and either a) a dump is complete, or ++ * b) instrumentation can now be setup. */ ++ KBASE_INSTR_STATE_CLEANED, ++ /* An error has occured during DUMPING (page fault). */ ++ KBASE_INSTR_STATE_FAULT +}; + -+KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops); -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h ++/* Structure used for instrumentation and HW counters dumping */ ++struct kbase_instr_backend { ++ wait_queue_head_t wait; ++ int triggered; ++ ++ enum kbase_instr_state state; ++ wait_queue_head_t cache_clean_wait; ++ struct workqueue_struct *cache_clean_wq; ++ struct work_struct cache_clean_work; ++}; ++ ++#endif /* _KBASE_INSTR_DEFS_H_ */ ++ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h new file mode 100644 -index 000000000..c0c84b6e9 +index 000000000..e96aeae78 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h -@@ -0,0 +1,64 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h +@@ -0,0 +1,45 @@ +/* + * -+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -342951,59 +344082,40 @@ index 000000000..c0c84b6e9 + + +/* -+ * A simple demand based power management policy ++ * Backend-specific HW access instrumentation APIs + */ + -+#ifndef MALI_KBASE_PM_DEMAND_H -+#define MALI_KBASE_PM_DEMAND_H ++#ifndef _KBASE_INSTR_INTERNAL_H_ ++#define _KBASE_INSTR_INTERNAL_H_ + +/** -+ * DOC: Demand power management policy -+ * -+ * The demand power management policy has the following characteristics: -+ * - When KBase indicates that the GPU will be powered up, but we don't yet -+ * know which Job Chains are to be run: -+ * - The Shader Cores are not powered up -+ * -+ * - When KBase indicates that a set of Shader Cores are needed to submit the -+ * currently queued Job Chains: -+ * - Only those Shader Cores are powered up -+ * -+ * - When KBase indicates that the GPU need not be powered: -+ * - The Shader Cores are powered off, and the GPU itself is powered off too. -+ * -+ * Note: -+ * - KBase indicates the GPU will be powered up when it has a User Process that -+ * has just started to submit Job Chains. -+ * -+ * - KBase indicates the GPU need not be powered when all the Job Chains from -+ * User Processes have finished, and it is waiting for a User Process to -+ * submit some more Job Chains. ++ * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning ++ * @data: a &struct work_struct + */ ++void kbasep_cache_clean_worker(struct work_struct *data); + +/** -+ * struct kbasep_pm_policy_demand - Private structure for policy instance data -+ * -+ * @dummy: No state is needed, a dummy variable -+ * -+ * This contains data that is private to the demand power policy. ++ * kbase_clean_caches_done() - Cache clean interrupt received ++ * @kbdev: Kbase device + */ -+struct kbasep_pm_policy_demand { -+ int dummy; -+}; ++void kbase_clean_caches_done(struct kbase_device *kbdev); + -+extern const struct kbase_pm_policy kbase_pm_demand_policy_ops; ++/** ++ * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received ++ * @kbdev: Kbase device ++ */ ++void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev); + -+#endif /* MALI_KBASE_PM_DEMAND_H */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c ++#endif /* _KBASE_INSTR_INTERNAL_H_ */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h new file mode 100644 -index 000000000..82727937c +index 000000000..8781561e7 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c -@@ -0,0 +1,1713 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h +@@ -0,0 +1,39 @@ +/* + * -+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -343018,2672 +344130,2578 @@ index 000000000..82727937c + + + ++/* ++ * Backend specific IRQ APIs ++ */ ++ ++#ifndef _KBASE_IRQ_INTERNAL_H_ ++#define _KBASE_IRQ_INTERNAL_H_ ++ ++int kbase_install_interrupts(struct kbase_device *kbdev); ++ ++void kbase_release_interrupts(struct kbase_device *kbdev); ++ ++/** ++ * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed ++ * execution ++ * @kbdev: The kbase device ++ */ ++void kbase_synchronize_irqs(struct kbase_device *kbdev); + ++int kbasep_common_test_interrupt_handlers( ++ struct kbase_device * const kbdev); + ++#endif /* _KBASE_IRQ_INTERNAL_H_ */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c +new file mode 100644 +index 000000000..8416b80e8 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c +@@ -0,0 +1,469 @@ +/* -+ * Base kernel Power Management hardware control ++ * ++ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * + */ + -+// #define ENABLE_DEBUG_LOG -+#include "../../platform/rk/custom_log.h" ++ + +#include -+#include -+#include -+#if defined(CONFIG_MALI_GATOR_SUPPORT) -+#include -+#endif -+#include -+#include -+#include -+#include -+#include -+#include -+#include +#include +#include -+#include -+ -+#include + -+#if MALI_MOCK_TEST -+#define MOCKABLE(function) function##_original -+#else -+#define MOCKABLE(function) function -+#endif /* MALI_MOCK_TEST */ ++#include + -+/** -+ * enum kbasep_pm_action - Actions that can be performed on a core. -+ * -+ * This enumeration is private to the file. Its values are set to allow -+ * core_type_to_reg() function, which decodes this enumeration, to be simpler -+ * and more efficient. -+ * -+ * @ACTION_PRESENT: The cores that are present -+ * @ACTION_READY: The cores that are ready -+ * @ACTION_PWRON: Power on the cores specified -+ * @ACTION_PWROFF: Power off the cores specified -+ * @ACTION_PWRTRANS: The cores that are transitioning -+ * @ACTION_PWRACTIVE: The cores that are active -+ */ -+enum kbasep_pm_action { -+ ACTION_PRESENT = 0, -+ ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO), -+ ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO), -+ ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO), -+ ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO), -+ ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO) -+}; ++#if !defined(CONFIG_MALI_NO_MALI) + -+/*---------------------------------------------------------------------------*/ ++/* GPU IRQ Tags */ ++#define JOB_IRQ_TAG 0 ++#define MMU_IRQ_TAG 1 ++#define GPU_IRQ_TAG 2 + -+static bool is_action_of_powering_off_l2(enum kbase_pm_core_type core_type, -+ enum kbasep_pm_action active) ++static void *kbase_tag(void *ptr, u32 tag) +{ -+ return (KBASE_PM_CORE_L2 == core_type) && (ACTION_PWROFF == active); ++ return (void *)(((uintptr_t) ptr) | tag); +} + -+static bool is_action_of_powering_off_shader(enum kbase_pm_core_type core_type, -+ enum kbasep_pm_action active) ++static void *kbase_untag(void *ptr) +{ -+ return (KBASE_PM_CORE_SHADER == core_type) && (ACTION_PWROFF == active); ++ return (void *)(((uintptr_t) ptr) & ~3); +} + -+static bool is_action_of_powering_off_tiler(enum kbase_pm_core_type core_type, -+ enum kbasep_pm_action active) ++static irqreturn_t kbase_job_irq_handler(int irq, void *data) +{ -+ return (KBASE_PM_CORE_TILER == core_type) && (ACTION_PWROFF == active); -+} ++ unsigned long flags; ++ struct kbase_device *kbdev = kbase_untag(data); ++ u32 val; + -+static u64 kbase_pm_get_state( -+ struct kbase_device *kbdev, -+ enum kbase_pm_core_type core_type, -+ enum kbasep_pm_action action); ++ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + -+/** -+ * core_type_to_reg - Decode a core type and action to a register. -+ * -+ * Given a core type (defined by kbase_pm_core_type) and an action (defined -+ * by kbasep_pm_action) this function will return the register offset that -+ * will perform the action on the core type. The register returned is the _LO -+ * register and an offset must be applied to use the _HI register. -+ * -+ * @core_type: The type of core -+ * @action: The type of action -+ * -+ * Return: The register offset of the _LO register that performs an action of -+ * type @action on a core of type @core_type. -+ */ -+static u32 core_type_to_reg(enum kbase_pm_core_type core_type, -+ enum kbasep_pm_action action) -+{ -+#ifdef CONFIG_MALI_CORESTACK -+ if (core_type == KBASE_PM_CORE_STACK) { -+ switch (action) { -+ case ACTION_PRESENT: -+ return STACK_PRESENT_LO; -+ case ACTION_READY: -+ return STACK_READY_LO; -+ case ACTION_PWRON: -+ return STACK_PWRON_LO; -+ case ACTION_PWROFF: -+ return STACK_PWROFF_LO; -+ case ACTION_PWRTRANS: -+ return STACK_PWRTRANS_LO; -+ default: -+ BUG(); -+ } ++ if (!kbdev->pm.backend.gpu_powered) { ++ /* GPU is turned off - IRQ is not for us */ ++ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, ++ flags); ++ return IRQ_NONE; + } -+#endif /* CONFIG_MALI_CORESTACK */ + -+ return (u32)core_type + (u32)action; -+} ++ val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL); + -+#ifdef CONFIG_ARM64 -+static void mali_cci_flush_l2(struct kbase_device *kbdev) -+{ -+ const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED; -+ u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS; -+ u32 raw; ++#ifdef CONFIG_MALI_DEBUG ++ if (!kbdev->pm.backend.driver_ready_for_irqs) ++ dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", ++ __func__, irq, val); ++#endif /* CONFIG_MALI_DEBUG */ ++ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + -+ /* -+ * Note that we don't take the cache flush mutex here since -+ * we expect to be the last user of the L2, all other L2 users -+ * would have dropped their references, to initiate L2 power -+ * down, L2 power down being the only valid place for this -+ * to be called from. -+ */ ++ if (!val) ++ return IRQ_NONE; + -+ kbase_reg_write(kbdev, -+ GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_CLEAN_INV_CACHES, -+ NULL); ++ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + -+ raw = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), -+ NULL); ++ kbase_job_done(kbdev, val); + -+ /* Wait for cache flush to complete before continuing, exit on -+ * gpu resets or loop expiry. */ -+ while (((raw & mask) == 0) && --loops) { -+ raw = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), -+ NULL); -+ } ++ return IRQ_HANDLED; +} -+#endif + -+/** -+ * kbase_pm_invoke - Invokes an action on a core set -+ * -+ * This function performs the action given by @action on a set of cores of a -+ * type given by @core_type. It is a static function used by -+ * kbase_pm_transition_core_type() -+ * -+ * @kbdev: The kbase device structure of the device -+ * @core_type: The type of core that the action should be performed on -+ * @cores: A bit mask of cores to perform the action on (low 32 bits) -+ * @action: The action to perform on the cores -+ */ -+static void kbase_pm_invoke(struct kbase_device *kbdev, -+ enum kbase_pm_core_type core_type, -+ u64 cores, -+ enum kbasep_pm_action action) -+{ -+ u32 reg; -+ u32 lo = cores & 0xFFFFFFFF; -+ u32 hi = (cores >> 32) & 0xFFFFFFFF; ++KBASE_EXPORT_TEST_API(kbase_job_irq_handler); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++static irqreturn_t kbase_mmu_irq_handler(int irq, void *data) ++{ ++ unsigned long flags; ++ struct kbase_device *kbdev = kbase_untag(data); ++ u32 val; + -+ /*-------------------------------------------------------*/ ++ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + -+ if ( is_action_of_powering_off_l2(core_type, action) ) { -+ D("not to power off l2 actually."); -+ return; -+ } -+ if ( is_action_of_powering_off_shader(core_type, action) ) { -+ D("not to power off shader actually. cores_lo : 0x%x, hi : 0x%x.", -+ lo, -+ hi); -+ return; -+ } -+ if ( is_action_of_powering_off_tiler(core_type, action) ) { -+ D("not to power off tiler actually."); -+ return; ++ if (!kbdev->pm.backend.gpu_powered) { ++ /* GPU is turned off - IRQ is not for us */ ++ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, ++ flags); ++ return IRQ_NONE; + } + -+ /*-------------------------------------------------------*/ -+ -+ reg = core_type_to_reg(core_type, action); ++ atomic_inc(&kbdev->faults_pending); + -+ KBASE_DEBUG_ASSERT(reg); -+#if defined(CONFIG_MALI_GATOR_SUPPORT) -+ if (cores) { -+ if (action == ACTION_PWRON) -+ kbase_trace_mali_pm_power_on(core_type, cores); -+ else if (action == ACTION_PWROFF) -+ kbase_trace_mali_pm_power_off(core_type, cores); -+ } -+#endif ++ val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL); + -+ if (cores) { -+ u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY); ++#ifdef CONFIG_MALI_DEBUG ++ if (!kbdev->pm.backend.driver_ready_for_irqs) ++ dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", ++ __func__, irq, val); ++#endif /* CONFIG_MALI_DEBUG */ ++ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + -+ if (action == ACTION_PWRON) -+ state |= cores; -+ else if (action == ACTION_PWROFF) -+ state &= ~cores; -+ KBASE_TLSTREAM_AUX_PM_STATE(core_type, state); ++ if (!val) { ++ atomic_dec(&kbdev->faults_pending); ++ return IRQ_NONE; + } + -+ /* Tracing */ -+ if (cores) { -+ if (action == ACTION_PWRON) -+ switch (core_type) { -+ case KBASE_PM_CORE_SHADER: -+ KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u, -+ lo); -+ break; -+ case KBASE_PM_CORE_TILER: -+ KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL, -+ NULL, 0u, lo); -+ break; -+ case KBASE_PM_CORE_L2: -+ KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL, -+ 0u, lo); -+ break; -+ default: -+ break; -+ } -+ else if (action == ACTION_PWROFF) -+ switch (core_type) { -+ case KBASE_PM_CORE_SHADER: -+ KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL, -+ 0u, lo); -+ break; -+ case KBASE_PM_CORE_TILER: -+ KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL, -+ NULL, 0u, lo); -+ break; -+ case KBASE_PM_CORE_L2: -+ KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL, -+ 0u, lo); -+ /* disable snoops before L2 is turned off */ -+ kbase_pm_cache_snoop_disable(kbdev); -+ break; -+ default: -+ break; -+ } -+ } ++ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + -+ if (lo != 0) -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL); ++ kbase_mmu_interrupt(kbdev, val); + -+ if (hi != 0) -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL); ++ atomic_dec(&kbdev->faults_pending); ++ ++ return IRQ_HANDLED; +} + -+/** -+ * kbase_pm_get_state - Get information about a core set -+ * -+ * This function gets information (chosen by @action) about a set of cores of -+ * a type given by @core_type. It is a static function used by -+ * kbase_pm_get_active_cores(), kbase_pm_get_trans_cores() and -+ * kbase_pm_get_ready_cores(). -+ * -+ * @kbdev: The kbase device structure of the device -+ * @core_type: The type of core that the should be queried -+ * @action: The property of the cores to query -+ * -+ * Return: A bit mask specifying the state of the cores -+ */ -+static u64 kbase_pm_get_state(struct kbase_device *kbdev, -+ enum kbase_pm_core_type core_type, -+ enum kbasep_pm_action action) ++static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) +{ -+ u32 reg; -+ u32 lo, hi; -+ -+ reg = core_type_to_reg(core_type, action); ++ unsigned long flags; ++ struct kbase_device *kbdev = kbase_untag(data); ++ u32 val; + -+ KBASE_DEBUG_ASSERT(reg); ++ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + -+ lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL); -+ hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL); ++ if (!kbdev->pm.backend.gpu_powered) { ++ /* GPU is turned off - IRQ is not for us */ ++ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, ++ flags); ++ return IRQ_NONE; ++ } + -+ return (((u64) hi) << 32) | ((u64) lo); -+} ++ val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL); + -+void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev) -+{ -+ kbdev->shader_inuse_bitmap = 0; -+ kbdev->shader_needed_bitmap = 0; -+ kbdev->shader_available_bitmap = 0; -+ kbdev->tiler_available_bitmap = 0; -+ kbdev->l2_users_count = 0; -+ kbdev->l2_available_bitmap = 0; -+ kbdev->tiler_needed_cnt = 0; -+ kbdev->tiler_inuse_cnt = 0; ++#ifdef CONFIG_MALI_DEBUG ++ if (!kbdev->pm.backend.driver_ready_for_irqs) ++ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", ++ __func__, irq, val); ++#endif /* CONFIG_MALI_DEBUG */ ++ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + -+ memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt)); -+} ++ if (!val) ++ return IRQ_NONE; + -+/** -+ * kbase_pm_get_present_cores - Get the cores that are present -+ * -+ * @kbdev: Kbase device -+ * @type: The type of cores to query -+ * -+ * Return: Bitmask of the cores that are present -+ */ -+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, -+ enum kbase_pm_core_type type) -+{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + -+ switch (type) { -+ case KBASE_PM_CORE_L2: -+ return kbdev->gpu_props.props.raw_props.l2_present; -+ case KBASE_PM_CORE_SHADER: -+ return kbdev->gpu_props.props.raw_props.shader_present; -+ case KBASE_PM_CORE_TILER: -+ return kbdev->gpu_props.props.raw_props.tiler_present; -+#ifdef CONFIG_MALI_CORESTACK -+ case KBASE_PM_CORE_STACK: -+ return kbdev->gpu_props.props.raw_props.stack_present; -+#endif /* CONFIG_MALI_CORESTACK */ -+ default: -+ break; -+ } -+ KBASE_DEBUG_ASSERT(0); ++ kbase_gpu_interrupt(kbdev, val); + -+ return 0; ++ return IRQ_HANDLED; +} + -+KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores); ++KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler); + -+/** -+ * kbase_pm_get_active_cores - Get the cores that are "active" -+ * (busy processing work) -+ * -+ * @kbdev: Kbase device -+ * @type: The type of cores to query -+ * -+ * Return: Bitmask of cores that are active -+ */ -+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, -+ enum kbase_pm_core_type type) -+{ -+ return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE); -+} ++static irq_handler_t kbase_handler_table[] = { ++ [JOB_IRQ_TAG] = kbase_job_irq_handler, ++ [MMU_IRQ_TAG] = kbase_mmu_irq_handler, ++ [GPU_IRQ_TAG] = kbase_gpu_irq_handler, ++}; + -+KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores); ++#ifdef CONFIG_MALI_DEBUG ++#define JOB_IRQ_HANDLER JOB_IRQ_TAG ++#define MMU_IRQ_HANDLER MMU_IRQ_TAG ++#define GPU_IRQ_HANDLER GPU_IRQ_TAG + +/** -+ * kbase_pm_get_trans_cores - Get the cores that are transitioning between -+ * power states ++ * kbase_set_custom_irq_handler - Set a custom IRQ handler ++ * @kbdev: Device for which the handler is to be registered ++ * @custom_handler: Handler to be registered ++ * @irq_type: Interrupt type + * -+ * @kbdev: Kbase device -+ * @type: The type of cores to query ++ * Registers given interrupt handler for requested interrupt type ++ * In the case where irq handler is not specified, the default handler shall be ++ * registered + * -+ * Return: Bitmask of cores that are transitioning ++ * Return: 0 case success, error code otherwise + */ -+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, -+ enum kbase_pm_core_type type) ++int kbase_set_custom_irq_handler(struct kbase_device *kbdev, ++ irq_handler_t custom_handler, ++ int irq_type) +{ -+ return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS); -+} ++ int result = 0; ++ irq_handler_t requested_irq_handler = NULL; + -+KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores); ++ KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) && ++ (GPU_IRQ_HANDLER >= irq_type)); + -+/** -+ * kbase_pm_get_ready_cores - Get the cores that are powered on -+ * -+ * @kbdev: Kbase device -+ * @type: The type of cores to query -+ * -+ * Return: Bitmask of cores that are ready (powered on) -+ */ -+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, -+ enum kbase_pm_core_type type) -+{ -+ u64 result; ++ /* Release previous handler */ ++ if (kbdev->irqs[irq_type].irq) ++ free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type)); + -+ result = kbase_pm_get_state(kbdev, type, ACTION_READY); ++ requested_irq_handler = (NULL != custom_handler) ? custom_handler : ++ kbase_handler_table[irq_type]; + -+ switch (type) { -+ case KBASE_PM_CORE_SHADER: -+ KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u, -+ (u32) result); -+ break; -+ case KBASE_PM_CORE_TILER: -+ KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u, -+ (u32) result); -+ break; -+ case KBASE_PM_CORE_L2: -+ KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u, -+ (u32) result); -+ break; -+ default: -+ break; ++ if (0 != request_irq(kbdev->irqs[irq_type].irq, ++ requested_irq_handler, ++ kbdev->irqs[irq_type].flags | IRQF_SHARED, ++ dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) { ++ result = -EINVAL; ++ dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", ++ kbdev->irqs[irq_type].irq, irq_type); ++#ifdef CONFIG_SPARSE_IRQ ++ dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); ++#endif /* CONFIG_SPARSE_IRQ */ + } + + return result; +} + -+KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores); -+ -+/** -+ * kbase_pm_transition_core_type - Perform power transitions for a particular -+ * core type. -+ * -+ * This function will perform any available power transitions to make the actual -+ * hardware state closer to the desired state. If a core is currently -+ * transitioning then changes to the power state of that call cannot be made -+ * until the transition has finished. Cores which are not present in the -+ * hardware are ignored if they are specified in the desired_state bitmask, -+ * however the return value will always be 0 in this case. -+ * -+ * @kbdev: The kbase device -+ * @type: The core type to perform transitions for -+ * @desired_state: A bit mask of the desired state of the cores -+ * @in_use: A bit mask of the cores that are currently running -+ * jobs. These cores have to be kept powered up because -+ * there are jobs running (or about to run) on them. -+ * @available: Receives a bit mask of the cores that the job -+ * scheduler can use to submit jobs to. May be NULL if -+ * this is not needed. -+ * @powering_on: Bit mask to update with cores that are -+ * transitioning to a power-on state. -+ * -+ * Return: true if the desired state has been reached, false otherwise -+ */ -+static bool kbase_pm_transition_core_type(struct kbase_device *kbdev, -+ enum kbase_pm_core_type type, -+ u64 desired_state, -+ u64 in_use, -+ u64 * const available, -+ u64 *powering_on) -+{ -+ u64 present; -+ u64 ready; -+ u64 trans; -+ u64 powerup; -+ u64 powerdown; -+ u64 powering_on_trans; -+ u64 desired_state_in_use; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler); + -+ /* Get current state */ -+ present = kbase_pm_get_present_cores(kbdev, type); -+ trans = kbase_pm_get_trans_cores(kbdev, type); -+ ready = kbase_pm_get_ready_cores(kbdev, type); -+ /* mask off ready from trans in case transitions finished between the -+ * register reads */ -+ trans &= ~ready; ++/* test correct interrupt assigment and reception by cpu */ ++struct kbasep_irq_test { ++ struct hrtimer timer; ++ wait_queue_head_t wait; ++ int triggered; ++ u32 timeout; ++}; + -+ if (trans) /* Do not progress if any cores are transitioning */ -+ return false; ++static struct kbasep_irq_test kbasep_irq_test_data; + -+ powering_on_trans = trans & *powering_on; -+ *powering_on = powering_on_trans; ++#define IRQ_TEST_TIMEOUT 500 + -+ if (available != NULL) -+ *available = (ready | powering_on_trans) & desired_state; ++static irqreturn_t kbase_job_irq_test_handler(int irq, void *data) ++{ ++ unsigned long flags; ++ struct kbase_device *kbdev = kbase_untag(data); ++ u32 val; + -+ /* Update desired state to include the in-use cores. These have to be -+ * kept powered up because there are jobs running or about to run on -+ * these cores -+ */ -+ desired_state_in_use = desired_state | in_use; ++ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + -+ /* Update state of whether l2 caches are powered */ -+ if (type == KBASE_PM_CORE_L2) { -+ if ((ready == present) && (desired_state_in_use == ready) && -+ (trans == 0)) { -+ /* All are ready, none will be turned off, and none are -+ * transitioning */ -+ kbdev->pm.backend.l2_powered = 1; -+ /* -+ * Ensure snoops are enabled after L2 is powered up, -+ * note that kbase keeps track of the snoop state, so -+ * safe to repeatedly call. -+ */ -+ kbase_pm_cache_snoop_enable(kbdev); -+ if (kbdev->l2_users_count > 0) { -+ /* Notify any registered l2 cache users -+ * (optimized out when no users waiting) */ -+ wake_up(&kbdev->pm.backend.l2_powered_wait); -+ } -+ } else -+ kbdev->pm.backend.l2_powered = 0; ++ if (!kbdev->pm.backend.gpu_powered) { ++ /* GPU is turned off - IRQ is not for us */ ++ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, ++ flags); ++ return IRQ_NONE; + } + -+ if (desired_state == ready && (trans == 0)) -+ return true; -+ -+ /* Restrict the cores to those that are actually present */ -+ powerup = desired_state_in_use & present; -+ powerdown = (~desired_state_in_use) & present; -+ -+ /* Restrict to cores that are not already in the desired state */ -+ powerup &= ~ready; -+ powerdown &= ready; ++ val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL); + -+ /* Don't transition any cores that are already transitioning, except for -+ * Mali cores that support the following case: -+ * -+ * If the SHADER_PWRON or TILER_PWRON registers are written to turn on -+ * a core that is currently transitioning to power off, then this is -+ * remembered and the shader core is automatically powered up again once -+ * the original transition completes. Once the automatic power on is -+ * complete any job scheduled on the shader core should start. -+ */ -+ powerdown &= ~trans; ++ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + -+ if (kbase_hw_has_feature(kbdev, -+ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS)) -+ if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type) -+ trans = powering_on_trans; /* for exception cases, only -+ * mask off cores in power on -+ * transitions */ ++ if (!val) ++ return IRQ_NONE; + -+ powerup &= ~trans; ++ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + -+ /* Perform transitions if any */ -+ kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON); -+#if !PLATFORM_POWER_DOWN_ONLY -+ kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF); -+#endif ++ kbasep_irq_test_data.triggered = 1; ++ wake_up(&kbasep_irq_test_data.wait); + -+ /* Recalculate cores transitioning on, and re-evaluate our state */ -+ powering_on_trans |= powerup; -+ *powering_on = powering_on_trans; -+ if (available != NULL) -+ *available = (ready | powering_on_trans) & desired_state; ++ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL); + -+ return false; ++ return IRQ_HANDLED; +} + -+KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type); -+ -+/** -+ * get_desired_cache_status - Determine which caches should be on for a -+ * particular core state -+ * -+ * This function takes a bit mask of the present caches and the cores (or -+ * caches) that are attached to the caches that will be powered. It then -+ * computes which caches should be turned on to allow the cores requested to be -+ * powered up. -+ * -+ * @present: The bit mask of present caches -+ * @cores_powered: A bit mask of cores (or L2 caches) that are desired to -+ * be powered -+ * @tilers_powered: The bit mask of tilers that are desired to be powered -+ * -+ * Return: A bit mask of the caches that should be turned on -+ */ -+static u64 get_desired_cache_status(u64 present, u64 cores_powered, -+ u64 tilers_powered) ++static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) +{ -+ u64 desired = 0; -+ -+ while (present) { -+ /* Find out which is the highest set bit */ -+ u64 bit = fls64(present) - 1; -+ u64 bit_mask = 1ull << bit; -+ /* Create a mask which has all bits from 'bit' upwards set */ -+ -+ u64 mask = ~(bit_mask - 1); ++ unsigned long flags; ++ struct kbase_device *kbdev = kbase_untag(data); ++ u32 val; + -+ /* If there are any cores powered at this bit or above (that -+ * haven't previously been processed) then we need this core on -+ */ -+ if (cores_powered & mask) -+ desired |= bit_mask; ++ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + -+ /* Remove bits from cores_powered and present */ -+ cores_powered &= ~mask; -+ present &= ~bit_mask; ++ if (!kbdev->pm.backend.gpu_powered) { ++ /* GPU is turned off - IRQ is not for us */ ++ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, ++ flags); ++ return IRQ_NONE; + } + -+ /* Power up the required L2(s) for the tiler */ -+ if (tilers_powered) -+ desired |= 1; ++ val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL); + -+ return desired; -+} ++ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + -+KBASE_EXPORT_TEST_API(get_desired_cache_status); ++ if (!val) ++ return IRQ_NONE; + -+#ifdef CONFIG_MALI_CORESTACK -+u64 kbase_pm_core_stack_mask(u64 cores) -+{ -+ u64 stack_mask = 0; -+ size_t const MAX_CORE_ID = 31; -+ size_t const NUM_CORES_PER_STACK = 4; -+ size_t i; ++ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + -+ for (i = 0; i <= MAX_CORE_ID; ++i) { -+ if (test_bit(i, (unsigned long *)&cores)) { -+ /* Every core which ID >= 16 is filled to stacks 4-7 -+ * instead of 0-3 */ -+ size_t const stack_num = (i > 16) ? -+ (i % NUM_CORES_PER_STACK) + 4 : -+ (i % NUM_CORES_PER_STACK); -+ set_bit(stack_num, (unsigned long *)&stack_mask); -+ } -+ } ++ kbasep_irq_test_data.triggered = 1; ++ wake_up(&kbasep_irq_test_data.wait); + -+ return stack_mask; ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL); ++ ++ return IRQ_HANDLED; +} -+#endif /* CONFIG_MALI_CORESTACK */ + -+bool -+MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) ++static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer) +{ -+ bool cores_are_available = false; -+ bool in_desired_state = true; -+ u64 desired_l2_state; -+#ifdef CONFIG_MALI_CORESTACK -+ u64 desired_stack_state; -+ u64 stacks_powered; -+#endif /* CONFIG_MALI_CORESTACK */ -+ u64 cores_powered; -+ u64 tilers_powered; -+ u64 tiler_available_bitmap; -+ u64 tiler_transitioning_bitmap; -+ u64 shader_available_bitmap; -+ u64 shader_ready_bitmap; -+ u64 shader_transitioning_bitmap; -+ u64 l2_available_bitmap; -+ u64 prev_l2_available_bitmap; -+ u64 l2_inuse_bitmap; ++ struct kbasep_irq_test *test_data = container_of(timer, ++ struct kbasep_irq_test, timer); + -+ KBASE_DEBUG_ASSERT(NULL != kbdev); -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ test_data->timeout = 1; ++ test_data->triggered = 1; ++ wake_up(&test_data->wait); ++ return HRTIMER_NORESTART; ++} + -+ spin_lock(&kbdev->pm.backend.gpu_powered_lock); -+ if (kbdev->pm.backend.gpu_powered == false) { -+ spin_unlock(&kbdev->pm.backend.gpu_powered_lock); -+ if (kbdev->pm.backend.desired_shader_state == 0 && -+ kbdev->pm.backend.desired_tiler_state == 0) -+ return true; -+ return false; ++static int kbasep_common_test_interrupt( ++ struct kbase_device * const kbdev, u32 tag) ++{ ++ int err = 0; ++ irq_handler_t test_handler; ++ ++ u32 old_mask_val; ++ u16 mask_offset; ++ u16 rawstat_offset; ++ ++ switch (tag) { ++ case JOB_IRQ_TAG: ++ test_handler = kbase_job_irq_test_handler; ++ rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT); ++ mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK); ++ break; ++ case MMU_IRQ_TAG: ++ test_handler = kbase_mmu_irq_test_handler; ++ rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT); ++ mask_offset = MMU_REG(MMU_IRQ_MASK); ++ break; ++ case GPU_IRQ_TAG: ++ /* already tested by pm_driver - bail out */ ++ default: ++ return 0; + } + -+ /* Trace that a change-state is being requested, and that it took -+ * (effectively) no time to start it. This is useful for counting how -+ * many state changes occurred, in a way that's backwards-compatible -+ * with processing the trace data */ -+ kbase_timeline_pm_send_event(kbdev, -+ KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE); -+ kbase_timeline_pm_handle_event(kbdev, -+ KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE); ++ /* store old mask */ ++ old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL); ++ /* mask interrupts */ ++ kbase_reg_write(kbdev, mask_offset, 0x0, NULL); + -+ /* If any cores are already powered then, we must keep the caches on */ -+ shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev, -+ KBASE_PM_CORE_SHADER); -+ cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); -+ cores_powered |= kbdev->pm.backend.desired_shader_state; ++ if (kbdev->irqs[tag].irq) { ++ /* release original handler and install test handler */ ++ if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) { ++ err = -EINVAL; ++ } else { ++ kbasep_irq_test_data.timeout = 0; ++ hrtimer_init(&kbasep_irq_test_data.timer, ++ CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ kbasep_irq_test_data.timer.function = ++ kbasep_test_interrupt_timeout; + -+#ifdef CONFIG_MALI_CORESTACK -+ /* Work out which core stacks want to be powered */ -+ desired_stack_state = kbase_pm_core_stack_mask(cores_powered); -+ stacks_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK) | -+ desired_stack_state; -+#endif /* CONFIG_MALI_CORESTACK */ ++ /* trigger interrupt */ ++ kbase_reg_write(kbdev, mask_offset, 0x1, NULL); ++ kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL); + -+ /* Work out which tilers want to be powered */ -+ tiler_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev, -+ KBASE_PM_CORE_TILER); -+ tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER); -+ tilers_powered |= kbdev->pm.backend.desired_tiler_state; ++ hrtimer_start(&kbasep_irq_test_data.timer, ++ HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT), ++ HRTIMER_MODE_REL); + -+ /* If there are l2 cache users registered, keep all l2s powered even if -+ * all other cores are off. */ -+ if (kbdev->l2_users_count > 0) -+ cores_powered |= kbdev->gpu_props.props.raw_props.l2_present; ++ wait_event(kbasep_irq_test_data.wait, ++ kbasep_irq_test_data.triggered != 0); + -+ desired_l2_state = get_desired_cache_status( -+ kbdev->gpu_props.props.raw_props.l2_present, -+ cores_powered, tilers_powered); ++ if (kbasep_irq_test_data.timeout != 0) { ++ dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n", ++ kbdev->irqs[tag].irq, tag); ++ err = -EINVAL; ++ } else { ++ dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n", ++ kbdev->irqs[tag].irq, tag); ++ } + -+ l2_inuse_bitmap = get_desired_cache_status( -+ kbdev->gpu_props.props.raw_props.l2_present, -+ cores_powered | shader_transitioning_bitmap, -+ tilers_powered | tiler_transitioning_bitmap); ++ hrtimer_cancel(&kbasep_irq_test_data.timer); ++ kbasep_irq_test_data.triggered = 0; + -+#ifdef CONFIG_MALI_CORESTACK -+ if (stacks_powered) -+ desired_l2_state |= 1; -+#endif /* CONFIG_MALI_CORESTACK */ ++ /* mask interrupts */ ++ kbase_reg_write(kbdev, mask_offset, 0x0, NULL); + -+ /* If any l2 cache is on, then enable l2 #0, for use by job manager */ -+ if (0 != desired_l2_state) -+ desired_l2_state |= 1; ++ /* release test handler */ ++ free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag)); ++ } + -+ prev_l2_available_bitmap = kbdev->l2_available_bitmap; -+ in_desired_state &= kbase_pm_transition_core_type(kbdev, -+ KBASE_PM_CORE_L2, desired_l2_state, l2_inuse_bitmap, -+ &l2_available_bitmap, -+ &kbdev->pm.backend.powering_on_l2_state); ++ /* restore original interrupt */ ++ if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag], ++ kbdev->irqs[tag].flags | IRQF_SHARED, ++ dev_name(kbdev->dev), kbase_tag(kbdev, tag))) { ++ dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n", ++ kbdev->irqs[tag].irq, tag); ++ err = -EINVAL; ++ } ++ } ++ /* restore old mask */ ++ kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL); + -+ if (kbdev->l2_available_bitmap != l2_available_bitmap) -+ KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap); ++ return err; ++} + -+ kbdev->l2_available_bitmap = l2_available_bitmap; ++int kbasep_common_test_interrupt_handlers( ++ struct kbase_device * const kbdev) ++{ ++ int err; ++ ++ init_waitqueue_head(&kbasep_irq_test_data.wait); ++ kbasep_irq_test_data.triggered = 0; + ++ /* A suspend won't happen during startup/insmod */ ++ kbase_pm_context_active(kbdev); + -+#ifdef CONFIG_MALI_CORESTACK -+ if (in_desired_state) { -+ in_desired_state &= kbase_pm_transition_core_type(kbdev, -+ KBASE_PM_CORE_STACK, desired_stack_state, 0, -+ &kbdev->stack_available_bitmap, -+ &kbdev->pm.backend.powering_on_stack_state); ++ err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG); ++ if (err) { ++ dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n"); ++ goto out; + } -+#endif /* CONFIG_MALI_CORESTACK */ + -+ if (in_desired_state) { -+ in_desired_state &= kbase_pm_transition_core_type(kbdev, -+ KBASE_PM_CORE_TILER, -+ kbdev->pm.backend.desired_tiler_state, -+ 0, &tiler_available_bitmap, -+ &kbdev->pm.backend.powering_on_tiler_state); -+ in_desired_state &= kbase_pm_transition_core_type(kbdev, -+ KBASE_PM_CORE_SHADER, -+ kbdev->pm.backend.desired_shader_state, -+ kbdev->shader_inuse_bitmap, -+ &shader_available_bitmap, -+ &kbdev->pm.backend.powering_on_shader_state); ++ err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG); ++ if (err) { ++ dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n"); ++ goto out; ++ } + -+ if (kbdev->shader_available_bitmap != shader_available_bitmap) { -+ KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, -+ NULL, 0u, -+ (u32) shader_available_bitmap); -+ KBASE_TIMELINE_POWER_SHADER(kbdev, -+ shader_available_bitmap); -+ } ++ dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n"); + -+ kbdev->shader_available_bitmap = shader_available_bitmap; ++ out: ++ kbase_pm_context_idle(kbdev); + -+ if (kbdev->tiler_available_bitmap != tiler_available_bitmap) { -+ KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, -+ NULL, NULL, 0u, -+ (u32) tiler_available_bitmap); -+ KBASE_TIMELINE_POWER_TILER(kbdev, -+ tiler_available_bitmap); ++ return err; ++} ++#endif /* CONFIG_MALI_DEBUG */ ++ ++int kbase_install_interrupts(struct kbase_device *kbdev) ++{ ++ u32 nr = ARRAY_SIZE(kbase_handler_table); ++ int err; ++ u32 i; ++ ++ for (i = 0; i < nr; i++) { ++ err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i], ++ kbdev->irqs[i].flags | IRQF_SHARED, ++ dev_name(kbdev->dev), ++ kbase_tag(kbdev, i)); ++ if (err) { ++ dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", ++ kbdev->irqs[i].irq, i); ++#ifdef CONFIG_SPARSE_IRQ ++ dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); ++#endif /* CONFIG_SPARSE_IRQ */ ++ goto release; + } ++ } + -+ kbdev->tiler_available_bitmap = tiler_available_bitmap; ++ return 0; + -+ } else if ((l2_available_bitmap & -+ kbdev->gpu_props.props.raw_props.tiler_present) != -+ kbdev->gpu_props.props.raw_props.tiler_present) { -+ tiler_available_bitmap = 0; ++ release: ++ while (i-- > 0) ++ free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); + -+ if (kbdev->tiler_available_bitmap != tiler_available_bitmap) -+ KBASE_TIMELINE_POWER_TILER(kbdev, -+ tiler_available_bitmap); ++ return err; ++} + -+ kbdev->tiler_available_bitmap = tiler_available_bitmap; ++void kbase_release_interrupts(struct kbase_device *kbdev) ++{ ++ u32 nr = ARRAY_SIZE(kbase_handler_table); ++ u32 i; ++ ++ for (i = 0; i < nr; i++) { ++ if (kbdev->irqs[i].irq) ++ free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); + } ++} + -+ /* State updated for slow-path waiters */ -+ kbdev->pm.backend.gpu_in_desired_state = in_desired_state; ++void kbase_synchronize_irqs(struct kbase_device *kbdev) ++{ ++ u32 nr = ARRAY_SIZE(kbase_handler_table); ++ u32 i; + -+ shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev, -+ KBASE_PM_CORE_SHADER); -+ shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev, -+ KBASE_PM_CORE_SHADER); ++ for (i = 0; i < nr; i++) { ++ if (kbdev->irqs[i].irq) ++ synchronize_irq(kbdev->irqs[i].irq); ++ } ++} + -+ /* Determine whether the cores are now available (even if the set of -+ * available cores is empty). Note that they can be available even if -+ * we've not finished transitioning to the desired state */ -+ if ((kbdev->shader_available_bitmap & -+ kbdev->pm.backend.desired_shader_state) -+ == kbdev->pm.backend.desired_shader_state && -+ (kbdev->tiler_available_bitmap & -+ kbdev->pm.backend.desired_tiler_state) -+ == kbdev->pm.backend.desired_tiler_state) { -+ cores_are_available = true; ++#endif /* !defined(CONFIG_MALI_NO_MALI) */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c +new file mode 100644 +index 000000000..92358f2bf +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c +@@ -0,0 +1,237 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u, -+ (u32)(kbdev->shader_available_bitmap & -+ kbdev->pm.backend.desired_shader_state)); -+ KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u, -+ (u32)(kbdev->tiler_available_bitmap & -+ kbdev->pm.backend.desired_tiler_state)); + -+ /* Log timelining information about handling events that power -+ * up cores, to match up either with immediate submission either -+ * because cores already available, or from PM IRQ */ -+ if (!in_desired_state) -+ kbase_timeline_pm_send_event(kbdev, -+ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); -+ } + -+ if (in_desired_state) { -+ KBASE_DEBUG_ASSERT(cores_are_available); + -+#if defined(CONFIG_MALI_GATOR_SUPPORT) -+ kbase_trace_mali_pm_status(KBASE_PM_CORE_L2, -+ kbase_pm_get_ready_cores(kbdev, -+ KBASE_PM_CORE_L2)); -+ kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER, -+ kbase_pm_get_ready_cores(kbdev, -+ KBASE_PM_CORE_SHADER)); -+ kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER, -+ kbase_pm_get_ready_cores(kbdev, -+ KBASE_PM_CORE_TILER)); -+#ifdef CONFIG_MALI_CORESTACK -+ kbase_trace_mali_pm_status(KBASE_PM_CORE_STACK, -+ kbase_pm_get_ready_cores(kbdev, -+ KBASE_PM_CORE_STACK)); -+#endif /* CONFIG_MALI_CORESTACK */ -+#endif ++/* ++ * Register backend context / address space management ++ */ + -+ KBASE_TLSTREAM_AUX_PM_STATE( -+ KBASE_PM_CORE_L2, -+ kbase_pm_get_ready_cores( -+ kbdev, KBASE_PM_CORE_L2)); -+ KBASE_TLSTREAM_AUX_PM_STATE( -+ KBASE_PM_CORE_SHADER, -+ kbase_pm_get_ready_cores( -+ kbdev, KBASE_PM_CORE_SHADER)); -+ KBASE_TLSTREAM_AUX_PM_STATE( -+ KBASE_PM_CORE_TILER, -+ kbase_pm_get_ready_cores( -+ kbdev, -+ KBASE_PM_CORE_TILER)); -+#ifdef CONFIG_MALI_CORESTACK -+ KBASE_TLSTREAM_AUX_PM_STATE( -+ KBASE_PM_CORE_STACK, -+ kbase_pm_get_ready_cores( -+ kbdev, -+ KBASE_PM_CORE_STACK)); -+#endif /* CONFIG_MALI_CORESTACK */ ++#include ++#include ++#include + -+ KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL, -+ kbdev->pm.backend.gpu_in_desired_state, -+ (u32)kbdev->pm.backend.desired_shader_state); -+ KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u, -+ (u32)kbdev->pm.backend.desired_tiler_state); ++/** ++ * assign_and_activate_kctx_addr_space - Assign an AS to a context ++ * @kbdev: Kbase device ++ * @kctx: Kbase context ++ * @current_as: Address Space to assign ++ * ++ * Assign an Address Space (AS) to a context, and add the context to the Policy. ++ * ++ * This includes ++ * setting up the global runpool_irq structure and the context on the AS, ++ * Activating the MMU on the AS, ++ * Allowing jobs to be submitted on the AS. ++ * ++ * Context: ++ * kbasep_js_kctx_info.jsctx_mutex held, ++ * kbasep_js_device_data.runpool_mutex held, ++ * AS transaction mutex held, ++ * Runpool IRQ lock held ++ */ ++static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ struct kbase_as *current_as) ++{ ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + -+ /* Log timelining information for synchronous waiters */ -+ kbase_timeline_pm_send_event(kbdev, -+ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); -+ /* Wake slow-path waiters. Job scheduler does not use this. */ -+ KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0); ++ lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); ++ lockdep_assert_held(&js_devdata->runpool_mutex); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); -+ } ++ /* Attribute handling */ ++ kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx); + -+ spin_unlock(&kbdev->pm.backend.gpu_powered_lock); ++ /* Allow it to run jobs */ ++ kbasep_js_set_submit_allowed(js_devdata, kctx); + -+ /* kbase_pm_ca_update_core_status can cause one-level recursion into -+ * this function, so it must only be called once all changes to kbdev -+ * have been committed, and after the gpu_powered_lock has been -+ * dropped. */ -+ if (kbdev->shader_ready_bitmap != shader_ready_bitmap || -+ kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) { -+ kbdev->shader_ready_bitmap = shader_ready_bitmap; -+ kbdev->shader_transitioning_bitmap = -+ shader_transitioning_bitmap; ++ kbase_js_runpool_inc_context_count(kbdev, kctx); ++} + -+ kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap, -+ shader_transitioning_bitmap); -+ } ++bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ ++ int i; + -+ /* The core availability policy is not allowed to keep core group 0 -+ * turned off (unless it was changing the l2 power state) */ -+ if (!((shader_ready_bitmap | shader_transitioning_bitmap) & -+ kbdev->gpu_props.props.coherency_info.group[0].core_mask) && -+ (prev_l2_available_bitmap == desired_l2_state) && -+ !(kbase_pm_ca_get_core_mask(kbdev) & -+ kbdev->gpu_props.props.coherency_info.group[0].core_mask)) -+ BUG(); ++ if (kbdev->hwaccess.active_kctx == kctx) { ++ /* Context is already active */ ++ return true; ++ } + -+ /* The core availability policy is allowed to keep core group 1 off, -+ * but all jobs specifically targeting CG1 must fail */ -+ if (!((shader_ready_bitmap | shader_transitioning_bitmap) & -+ kbdev->gpu_props.props.coherency_info.group[1].core_mask) && -+ !(kbase_pm_ca_get_core_mask(kbdev) & -+ kbdev->gpu_props.props.coherency_info.group[1].core_mask)) -+ kbdev->pm.backend.cg1_disabled = true; -+ else -+ kbdev->pm.backend.cg1_disabled = false; ++ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { ++ if (kbdev->as_to_kctx[i] == kctx) { ++ /* Context already has ASID - mark as active */ ++ return true; ++ } ++ } + -+ return cores_are_available; ++ /* Context does not have address space assigned */ ++ return false; +} -+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock); -+ -+/* Timeout for kbase_pm_check_transitions_sync when wait_event_killable has -+ * aborted due to a fatal signal. If the time spent waiting has exceeded this -+ * threshold then there is most likely a hardware issue. */ -+#define PM_TIMEOUT (5*HZ) /* 5s */ + -+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev) ++void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, ++ struct kbase_context *kctx) +{ -+ unsigned long flags; -+ unsigned long timeout; -+ bool cores_are_available; -+ int ret; ++ int as_nr = kctx->as_nr; + -+ /* Force the transition to be checked and reported - the cores may be -+ * 'available' (for job submission) but not fully powered up. */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (as_nr == KBASEP_AS_NR_INVALID) { ++ WARN(1, "Attempting to release context without ASID\n"); ++ return; ++ } + -+ cores_are_available = kbase_pm_check_transitions_nolock(kbdev); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* Don't need 'cores_are_available', because we don't return anything */ -+ CSTD_UNUSED(cores_are_available); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (atomic_read(&kctx->refcount) != 1) { ++ WARN(1, "Attempting to release active ASID\n"); ++ return; ++ } + -+ timeout = jiffies + PM_TIMEOUT; ++ kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx); + -+ /* Wait for cores */ -+ ret = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, -+ kbdev->pm.backend.gpu_in_desired_state); ++ kbase_ctx_sched_release_ctx(kctx); ++ kbase_js_runpool_dec_context_count(kbdev, kctx); ++} + -+ if (ret < 0 && time_after(jiffies, timeout)) { -+ dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); -+ dev_err(kbdev->dev, "Desired state :\n"); -+ dev_err(kbdev->dev, "\tShader=%016llx\n", -+ kbdev->pm.backend.desired_shader_state); -+ dev_err(kbdev->dev, "\tTiler =%016llx\n", -+ kbdev->pm.backend.desired_tiler_state); -+ dev_err(kbdev->dev, "Current state :\n"); -+ dev_err(kbdev->dev, "\tShader=%08x%08x\n", -+ kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(SHADER_READY_HI), NULL), -+ kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(SHADER_READY_LO), -+ NULL)); -+ dev_err(kbdev->dev, "\tTiler =%08x%08x\n", -+ kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(TILER_READY_HI), NULL), -+ kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(TILER_READY_LO), NULL)); -+ dev_err(kbdev->dev, "\tL2 =%08x%08x\n", -+ kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(L2_READY_HI), NULL), -+ kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(L2_READY_LO), NULL)); -+ dev_err(kbdev->dev, "Cores transitioning :\n"); -+ dev_err(kbdev->dev, "\tShader=%08x%08x\n", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG( -+ SHADER_PWRTRANS_HI), NULL), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG( -+ SHADER_PWRTRANS_LO), NULL)); -+ dev_err(kbdev->dev, "\tTiler =%08x%08x\n", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG( -+ TILER_PWRTRANS_HI), NULL), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG( -+ TILER_PWRTRANS_LO), NULL)); -+ dev_err(kbdev->dev, "\tL2 =%08x%08x\n", -+ kbase_reg_read(kbdev, GPU_CONTROL_REG( -+ L2_PWRTRANS_HI), NULL), -+ kbase_reg_read(kbdev, GPU_CONTROL_REG( -+ L2_PWRTRANS_LO), NULL)); -+#if KBASE_GPU_RESET_EN -+ dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); -+ if (kbase_prepare_to_reset_gpu(kbdev)) -+ kbase_reset_gpu(kbdev); -+#endif /* KBASE_GPU_RESET_EN */ -+ } else { -+ /* Log timelining information that a change in state has -+ * completed */ -+ kbase_timeline_pm_handle_event(kbdev, -+ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); -+ } ++void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ +} -+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync); + -+void kbase_pm_enable_interrupts(struct kbase_device *kbdev) ++int kbase_backend_find_and_release_free_address_space( ++ struct kbase_device *kbdev, struct kbase_context *kctx) +{ ++ struct kbasep_js_device_data *js_devdata; ++ struct kbasep_js_kctx_info *js_kctx_info; + unsigned long flags; ++ int i; + -+ KBASE_DEBUG_ASSERT(NULL != kbdev); -+ /* -+ * Clear all interrupts, -+ * and unmask them all. -+ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, -+ NULL); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL, -+ NULL); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ js_devdata = &kbdev->js_data; ++ js_kctx_info = &kctx->jctx.sched_info; + -+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, -+ NULL); -+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL); ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_lock(&js_devdata->runpool_mutex); + -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL); -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL); -+} ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts); ++ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { ++ struct kbasep_js_kctx_info *as_js_kctx_info; ++ struct kbase_context *as_kctx; + -+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) -+{ -+ KBASE_DEBUG_ASSERT(NULL != kbdev); -+ /* -+ * Mask all interrupts, -+ * and clear them all. -+ */ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ as_kctx = kbdev->as_to_kctx[i]; ++ as_js_kctx_info = &as_kctx->jctx.sched_info; + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, -+ NULL); -+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL); -+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, -+ NULL); ++ /* Don't release privileged or active contexts, or contexts with ++ * jobs running. ++ * Note that a context will have at least 1 reference (which ++ * was previously taken by kbasep_js_schedule_ctx()) until ++ * descheduled. ++ */ ++ if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) && ++ atomic_read(&as_kctx->refcount) == 1) { ++ if (!kbasep_js_runpool_retain_ctx_nolock(kbdev, ++ as_kctx)) { ++ WARN(1, "Failed to retain active context\n"); + -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL); -+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL); -+} ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, ++ flags); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + -+void kbase_pm_disable_interrupts(struct kbase_device *kbdev) -+{ -+ unsigned long flags; ++ return KBASEP_AS_NR_INVALID; ++ } + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_pm_disable_interrupts_nolock(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} ++ kbasep_js_clear_submit_allowed(js_devdata, as_kctx); + -+KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts); ++ /* Drop and retake locks to take the jsctx_mutex on the ++ * context we're about to release without violating lock ++ * ordering ++ */ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + -+/* -+ * pmu layout: -+ * 0x0000: PMU TAG (RO) (0xCAFECAFE) -+ * 0x0004: PMU VERSION ID (RO) (0x00000000) -+ * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE) -+ */ -+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) -+{ -+ bool reset_required = is_resume; -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; -+ unsigned long flags; ++ /* Release context from address space */ ++ mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex); ++ mutex_lock(&js_devdata->runpool_mutex); + -+ KBASE_DEBUG_ASSERT(NULL != kbdev); -+ lockdep_assert_held(&js_devdata->runpool_mutex); -+ lockdep_assert_held(&kbdev->pm.lock); ++ kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx); + -+ if (kbdev->pm.backend.gpu_powered) { -+ /* Already turned on */ -+ if (kbdev->poweroff_pending) -+ kbase_pm_enable_interrupts(kbdev); -+ kbdev->poweroff_pending = false; -+ KBASE_DEBUG_ASSERT(!is_resume); -+ return; -+ } ++ if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) { ++ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, ++ as_kctx, ++ true); + -+ kbdev->poweroff_pending = false; ++ mutex_unlock(&js_devdata->runpool_mutex); ++ mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); + -+ KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u); ++ return i; ++ } + -+ if (is_resume && kbdev->pm.backend.callback_power_resume) { -+ kbdev->pm.backend.callback_power_resume(kbdev); -+ return; -+ } else if (kbdev->pm.backend.callback_power_on) { -+ kbdev->pm.backend.callback_power_on(kbdev); -+ /* If your platform properly keeps the GPU state you may use the -+ * return value of the callback_power_on function to -+ * conditionally reset the GPU on power up. Currently we are -+ * conservative and always reset the GPU. */ -+ reset_required = true; -+ } ++ /* Context was retained while locks were dropped, ++ * continue looking for free AS */ + -+ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); -+ kbdev->pm.backend.gpu_powered = true; -+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); + -+ if (reset_required) { -+ /* GPU state was lost, reset GPU to ensure it is in a -+ * consistent state */ -+ kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS); ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_lock(&js_devdata->runpool_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ } + } + -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_ctx_sched_restore_all_as(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); + -+ /* Lastly, enable the interrupts */ -+ kbase_pm_enable_interrupts(kbdev); -+} ++ mutex_unlock(&js_devdata->runpool_mutex); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + -+KBASE_EXPORT_TEST_API(kbase_pm_clock_on); ++ return KBASEP_AS_NR_INVALID; ++} + -+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend) ++bool kbase_backend_use_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ int as_nr) +{ -+ unsigned long flags; ++ struct kbasep_js_device_data *js_devdata; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ struct kbase_as *new_address_space = NULL; + -+ KBASE_DEBUG_ASSERT(NULL != kbdev); -+ lockdep_assert_held(&kbdev->pm.lock); ++ js_devdata = &kbdev->js_data; ++ js_kctx_info = &kctx->jctx.sched_info; + -+ /* ASSERT that the cores should now be unavailable. No lock needed. */ -+ KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u); ++ if (kbdev->hwaccess.active_kctx == kctx) { ++ WARN(1, "Context is already scheduled in\n"); ++ return false; ++ } + -+ kbdev->poweroff_pending = true; ++ new_address_space = &kbdev->as[as_nr]; + -+ if (!kbdev->pm.backend.gpu_powered) { -+ /* Already turned off */ -+ if (is_suspend && kbdev->pm.backend.callback_power_suspend) -+ kbdev->pm.backend.callback_power_suspend(kbdev); -+ return true; ++ lockdep_assert_held(&js_devdata->runpool_mutex); ++ lockdep_assert_held(&kbdev->mmu_hw_mutex); ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space); ++ ++ if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) { ++ /* We need to retain it to keep the corresponding address space ++ */ ++ kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); + } + -+ KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u); ++ return true; ++} + -+ /* Disable interrupts. This also clears any outstanding interrupts */ -+ kbase_pm_disable_interrupts(kbdev); -+ /* Ensure that any IRQ handlers have finished */ -+ kbase_synchronize_irqs(kbdev); +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h +new file mode 100644 +index 000000000..08a7400e6 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h +@@ -0,0 +1,123 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + -+ if (atomic_read(&kbdev->faults_pending)) { -+ /* Page/bus faults are still being processed. The GPU can not -+ * be powered off until they have completed */ -+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, -+ flags); -+ return false; -+ } + -+ kbase_pm_cache_snoop_disable(kbdev); + -+ /* The GPU power may be turned off from this point */ -+ kbdev->pm.backend.gpu_powered = false; -+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); ++/* ++ * Register-based HW access backend specific definitions ++ */ + -+ if (is_suspend && kbdev->pm.backend.callback_power_suspend) -+ kbdev->pm.backend.callback_power_suspend(kbdev); -+ else if (kbdev->pm.backend.callback_power_off) -+ kbdev->pm.backend.callback_power_off(kbdev); -+ return true; -+} ++#ifndef _KBASE_HWACCESS_GPU_DEFS_H_ ++#define _KBASE_HWACCESS_GPU_DEFS_H_ + -+KBASE_EXPORT_TEST_API(kbase_pm_clock_off); ++/* SLOT_RB_SIZE must be < 256 */ ++#define SLOT_RB_SIZE 2 ++#define SLOT_RB_MASK (SLOT_RB_SIZE - 1) + -+struct kbasep_reset_timeout_data { -+ struct hrtimer timer; -+ bool timed_out; -+ struct kbase_device *kbdev; ++/** ++ * struct rb_entry - Ringbuffer entry ++ * @katom: Atom associated with this entry ++ */ ++struct rb_entry { ++ struct kbase_jd_atom *katom; +}; + -+void kbase_pm_reset_done(struct kbase_device *kbdev) -+{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ kbdev->pm.backend.reset_done = true; -+ wake_up(&kbdev->pm.backend.reset_done_wait); -+} ++/** ++ * struct slot_rb - Slot ringbuffer ++ * @entries: Ringbuffer entries ++ * @last_context: The last context to submit a job on this slot ++ * @read_idx: Current read index of buffer ++ * @write_idx: Current write index of buffer ++ * @job_chain_flag: Flag used to implement jobchain disambiguation ++ */ ++struct slot_rb { ++ struct rb_entry entries[SLOT_RB_SIZE]; ++ ++ struct kbase_context *last_context; ++ ++ u8 read_idx; ++ u8 write_idx; ++ ++ u8 job_chain_flag; ++}; + +/** -+ * kbase_pm_wait_for_reset - Wait for a reset to happen -+ * -+ * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state. ++ * struct kbase_backend_data - GPU backend specific data for HW access layer ++ * @slot_rb: Slot ringbuffers ++ * @rmu_workaround_flag: When PRLAM-8987 is present, this flag determines ++ * whether slots 0/1 or slot 2 are currently being ++ * pulled from ++ * @scheduling_timer: The timer tick used for rescheduling jobs ++ * @timer_running: Is the timer running? The runpool_mutex must be ++ * held whilst modifying this. ++ * @suspend_timer: Is the timer suspended? Set when a suspend ++ * occurs and cleared on resume. The runpool_mutex ++ * must be held whilst modifying this. ++ * @reset_gpu: Set to a KBASE_RESET_xxx value (see comments) ++ * @reset_workq: Work queue for performing the reset ++ * @reset_work: Work item for performing the reset ++ * @reset_wait: Wait event signalled when the reset is complete ++ * @reset_timer: Timeout for soft-stops before the reset ++ * @timeouts_updated: Have timeout values just been updated? + * -+ * @kbdev: Kbase device ++ * The hwaccess_lock (a spinlock) must be held when accessing this structure + */ -+static void kbase_pm_wait_for_reset(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->pm.lock); ++struct kbase_backend_data { ++ struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS]; + -+ wait_event(kbdev->pm.backend.reset_done_wait, -+ (kbdev->pm.backend.reset_done)); -+ kbdev->pm.backend.reset_done = false; -+} ++ bool rmu_workaround_flag; + -+KBASE_EXPORT_TEST_API(kbase_pm_reset_done); ++ struct hrtimer scheduling_timer; + -+static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) -+{ -+ struct kbasep_reset_timeout_data *rtdata = -+ container_of(timer, struct kbasep_reset_timeout_data, timer); ++ bool timer_running; ++ bool suspend_timer; + -+ rtdata->timed_out = 1; ++ atomic_t reset_gpu; + -+ /* Set the wait queue to wake up kbase_pm_init_hw even though the reset -+ * hasn't completed */ -+ kbase_pm_reset_done(rtdata->kbdev); ++/* The GPU reset isn't pending */ ++#define KBASE_RESET_GPU_NOT_PENDING 0 ++/* kbase_prepare_to_reset_gpu has been called */ ++#define KBASE_RESET_GPU_PREPARED 1 ++/* kbase_reset_gpu has been called - the reset will now definitely happen ++ * within the timeout period */ ++#define KBASE_RESET_GPU_COMMITTED 2 ++/* The GPU reset process is currently occuring (timeout has expired or ++ * kbasep_try_reset_gpu_early was called) */ ++#define KBASE_RESET_GPU_HAPPENING 3 ++/* Reset the GPU silently, used when resetting the GPU as part of normal ++ * behavior (e.g. when exiting protected mode). */ ++#define KBASE_RESET_GPU_SILENT 4 ++ struct workqueue_struct *reset_workq; ++ struct work_struct reset_work; ++ wait_queue_head_t reset_wait; ++ struct hrtimer reset_timer; + -+ return HRTIMER_NORESTART; -+} ++ bool timeouts_updated; ++}; + -+static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) -+{ -+ struct device_node *np = kbdev->dev->of_node; -+ u32 jm_values[4]; -+ const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; -+ const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> -+ GPU_ID_VERSION_PRODUCT_ID_SHIFT; -+ const u32 major = (gpu_id & GPU_ID_VERSION_MAJOR) >> -+ GPU_ID_VERSION_MAJOR_SHIFT; ++/** ++ * struct kbase_jd_atom_backend - GPU backend specific katom data ++ */ ++struct kbase_jd_atom_backend { ++}; + -+ kbdev->hw_quirks_sc = 0; ++/** ++ * struct kbase_context_backend - GPU backend specific context data ++ */ ++struct kbase_context_backend { ++}; + -+ /* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443. -+ * and needed due to MIDGLES-3539. See PRLAM-11035 */ -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) || -+ kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035)) -+ kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE; ++#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c +new file mode 100644 +index 000000000..a6fb097b9 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c +@@ -0,0 +1,1518 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ /* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327. -+ */ -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327)) -+ kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD; + -+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY -+ /* Enable alternative hardware counter selection if configured. */ -+ if (!GPU_ID_IS_NEW_FORMAT(prod_id)) -+ kbdev->hw_quirks_sc |= SC_ALT_COUNTERS; ++ ++/* ++ * Base kernel job manager APIs ++ */ ++ ++#include ++#include ++#include ++#if defined(CONFIG_MALI_GATOR_SUPPORT) ++#include +#endif ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ /* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */ -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797)) -+ kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS; ++#define beenthere(kctx, f, a...) \ ++ dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) + -+ if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) { -+ if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */ -+ kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE; -+ else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */ -+ kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES; -+ } ++#if KBASE_GPU_RESET_EN ++static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev); ++static void kbasep_reset_timeout_worker(struct work_struct *data); ++static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer); ++#endif /* KBASE_GPU_RESET_EN */ + -+ if (!kbdev->hw_quirks_sc) -+ kbdev->hw_quirks_sc = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(SHADER_CONFIG), NULL); ++static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, ++ struct kbase_context *kctx) ++{ ++ return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx); ++} + -+ kbdev->hw_quirks_tiler = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(TILER_CONFIG), NULL); ++void kbase_job_hw_submit(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom, ++ int js) ++{ ++ struct kbase_context *kctx; ++ u32 cfg; ++ u64 jc_head = katom->jc; + -+ /* Set tiler clock gate override if required */ -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953)) -+ kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE; ++ KBASE_DEBUG_ASSERT(kbdev); ++ KBASE_DEBUG_ASSERT(katom); + -+ /* Limit the GPU bus bandwidth if the platform needs this. */ -+ kbdev->hw_quirks_mmu = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(L2_MMU_CONFIG), NULL); ++ kctx = katom->kctx; + -+ /* Limit read ID width for AXI */ -+ kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS); -+ kbdev->hw_quirks_mmu |= (DEFAULT_ARID_LIMIT & 0x3) << -+ L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT; ++ /* Command register must be available */ ++ KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx)); ++ /* Affinity is not violating */ ++ kbase_js_debug_log_current_affinities(kbdev); ++ KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js, ++ katom->affinity)); + -+ /* Limit write ID width for AXI */ -+ kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES); -+ kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) << -+ L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT; ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), ++ jc_head & 0xFFFFFFFF, kctx); ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), ++ jc_head >> 32, kctx); + -+ if (kbdev->system_coherency == COHERENCY_ACE) { -+ /* Allow memory configuration disparity to be ignored, we -+ * optimize the use of shared memory and thus we expect -+ * some disparity in the memory configuration */ -+ kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY; -+ } ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), ++ katom->affinity & 0xFFFFFFFF, kctx); ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), ++ katom->affinity >> 32, kctx); + -+ kbdev->hw_quirks_jm = 0; -+ /* Only for T86x/T88x-based products after r2p0 */ -+ if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) { ++ /* start MMU, medium priority, cache clean/flush on end, clean/flush on ++ * start */ ++ cfg = kctx->as_nr; + -+ if (of_property_read_u32_array(np, -+ "jm_config", -+ &jm_values[0], -+ ARRAY_SIZE(jm_values))) { -+ /* Entry not in device tree, use defaults */ -+ jm_values[0] = 0; -+ jm_values[1] = 0; -+ jm_values[2] = 0; -+ jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT; -+ } ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) ++ cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; + -+ /* Limit throttle limit to 6 bits*/ -+ if (jm_values[3] > JM_MAX_JOB_THROTTLE_LIMIT) { -+ dev_dbg(kbdev->dev, "JOB_THROTTLE_LIMIT supplied in device tree is too large. Limiting to MAX (63)."); -+ jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT; -+ } ++#ifndef CONFIG_MALI_COH_GPU ++ if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START)) ++ cfg |= JS_CONFIG_START_FLUSH_NO_ACTION; ++ else ++ cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; + -+ /* Aggregate to one integer. */ -+ kbdev->hw_quirks_jm |= (jm_values[0] ? -+ JM_TIMESTAMP_OVERRIDE : 0); -+ kbdev->hw_quirks_jm |= (jm_values[1] ? -+ JM_CLOCK_GATE_OVERRIDE : 0); -+ kbdev->hw_quirks_jm |= (jm_values[2] ? -+ JM_JOB_THROTTLE_ENABLE : 0); -+ kbdev->hw_quirks_jm |= (jm_values[3] << -+ JM_JOB_THROTTLE_LIMIT_SHIFT); ++ if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END)) ++ cfg |= JS_CONFIG_END_FLUSH_NO_ACTION; ++ else ++ cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; ++#endif /* CONFIG_MALI_COH_GPU */ + -+ } else if (GPU_ID_IS_NEW_FORMAT(prod_id) && -+ (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == -+ GPU_ID2_PRODUCT_TMIX)) { -+ /* Only for tMIx */ -+ u32 coherency_features; ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10649)) ++ cfg |= JS_CONFIG_START_MMU; + -+ coherency_features = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(COHERENCY_FEATURES), NULL); ++ cfg |= JS_CONFIG_THREAD_PRI(8); + -+ /* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly -+ * documented for tMIx so force correct value here. -+ */ -+ if (coherency_features == -+ COHERENCY_FEATURE_BIT(COHERENCY_ACE)) { -+ kbdev->hw_quirks_jm |= -+ (COHERENCY_ACE_LITE | COHERENCY_ACE) << -+ JM_FORCE_COHERENCY_FEATURES_SHIFT; ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) && ++ (katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED)) ++ cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK; ++ ++ if (kbase_hw_has_feature(kbdev, ++ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { ++ if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) { ++ cfg |= JS_CONFIG_JOB_CHAIN_FLAG; ++ katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN; ++ kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = ++ true; ++ } else { ++ katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN; ++ kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = ++ false; + } + } + -+ if (!kbdev->hw_quirks_jm) -+ kbdev->hw_quirks_jm = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(JM_CONFIG), NULL); ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx); + -+#ifdef CONFIG_MALI_CORESTACK -+#define MANUAL_POWER_CONTROL ((u32)(1 << 8)) -+ kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL; -+#endif /* CONFIG_MALI_CORESTACK */ -+} ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT), ++ katom->flush_id, kctx); + -+static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) -+{ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), -+ kbdev->hw_quirks_sc, NULL); ++ /* Write an approximate start timestamp. ++ * It's approximate because there might be a job in the HEAD register. ++ */ ++ katom->start_timestamp = ktime_get(); + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG), -+ kbdev->hw_quirks_tiler, NULL); ++ /* GO ! */ ++ dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx", ++ katom, kctx, js, jc_head, katom->affinity); + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), -+ kbdev->hw_quirks_mmu, NULL); ++ KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, ++ (u32) katom->affinity); + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG), -+ kbdev->hw_quirks_jm, NULL); ++#if defined(CONFIG_MALI_GATOR_SUPPORT) ++ kbase_trace_mali_job_slots_event( ++ GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js), ++ kctx, kbase_jd_atom_id(kctx, katom)); ++#endif ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(katom, jc_head, ++ katom->affinity, cfg); ++ KBASE_TLSTREAM_TL_RET_CTX_LPU( ++ kctx, ++ &kbdev->gpu_props.props.raw_props.js_features[ ++ katom->slot_nr]); ++ KBASE_TLSTREAM_TL_RET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]); ++ KBASE_TLSTREAM_TL_RET_ATOM_LPU( ++ katom, ++ &kbdev->gpu_props.props.raw_props.js_features[js], ++ "ctx_nr,atom_nr"); ++#ifdef CONFIG_GPU_TRACEPOINTS ++ if (!kbase_backend_nr_atoms_submitted(kbdev, js)) { ++ /* If this is the only job on the slot, trace it as starting */ ++ char js_string[16]; ++ ++ trace_gpu_sched_switch( ++ kbasep_make_job_slot_string(js, js_string, ++ sizeof(js_string)), ++ ktime_to_ns(katom->start_timestamp), ++ (u32)katom->kctx->id, 0, katom->work_id); ++ kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx; ++ } ++#endif ++ kbase_timeline_job_slot_submit(kbdev, kctx, katom, js); + ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), ++ JS_COMMAND_START, katom->kctx); +} + -+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) ++/** ++ * kbasep_job_slot_update_head_start_timestamp - Update timestamp ++ * @kbdev: kbase device ++ * @js: job slot ++ * @end_timestamp: timestamp ++ * ++ * Update the start_timestamp of the job currently in the HEAD, based on the ++ * fact that we got an IRQ for the previous set of completed jobs. ++ * ++ * The estimate also takes into account the time the job was submitted, to ++ * work out the best estimate (which might still result in an over-estimate to ++ * the calculated time spent) ++ */ ++static void kbasep_job_slot_update_head_start_timestamp( ++ struct kbase_device *kbdev, ++ int js, ++ ktime_t end_timestamp) +{ -+ if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) && -+ !kbdev->cci_snoop_enabled) { -+#ifdef CONFIG_ARM64 -+ if (kbdev->snoop_enable_smc != 0) -+ kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0); -+#endif /* CONFIG_ARM64 */ -+ dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n"); -+ kbdev->cci_snoop_enabled = true; ++ if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) { ++ struct kbase_jd_atom *katom; ++ ktime_t timestamp_diff; ++ /* The atom in the HEAD */ ++ katom = kbase_gpu_inspect(kbdev, js, 0); ++ ++ KBASE_DEBUG_ASSERT(katom != NULL); ++ ++ timestamp_diff = ktime_sub(end_timestamp, ++ katom->start_timestamp); ++ if (ktime_to_ns(timestamp_diff) >= 0) { ++ /* Only update the timestamp if it's a better estimate ++ * than what's currently stored. This is because our ++ * estimate that accounts for the throttle time may be ++ * too much of an overestimate */ ++ katom->start_timestamp = end_timestamp; ++ } + } +} + -+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) ++/** ++ * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline ++ * tracepoint ++ * @kbdev: kbase device ++ * @js: job slot ++ * ++ * Make a tracepoint call to the instrumentation module informing that ++ * softstop happened on given lpu (job slot). ++ */ ++static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, ++ int js) +{ -+ if (kbdev->cci_snoop_enabled) { -+#ifdef CONFIG_ARM64 -+ if (kbdev->snoop_disable_smc != 0) { -+ mali_cci_flush_l2(kbdev); -+ kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0); -+ } -+#endif /* CONFIG_ARM64 */ -+ dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n"); -+ kbdev->cci_snoop_enabled = false; -+ } ++ KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( ++ &kbdev->gpu_props.props.raw_props.js_features[js]); +} + -+static int kbase_pm_do_reset(struct kbase_device *kbdev) ++void kbase_job_done(struct kbase_device *kbdev, u32 done) +{ -+ struct kbasep_reset_timeout_data rtdata; ++ unsigned long flags; ++ int i; ++ u32 count = 0; ++ ktime_t end_timestamp = ktime_get(); ++ struct kbasep_js_device_data *js_devdata; + -+ KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0); ++ KBASE_DEBUG_ASSERT(kbdev); ++ js_devdata = &kbdev->js_data; + -+ KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev); ++ KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done); + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_SOFT_RESET, NULL); ++ memset(&kbdev->slot_submit_count_irq[0], 0, ++ sizeof(kbdev->slot_submit_count_irq)); + -+ /* Unmask the reset complete interrupt only */ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED, -+ NULL); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ /* Initialize a structure for tracking the status of the reset */ -+ rtdata.kbdev = kbdev; -+ rtdata.timed_out = 0; ++ while (done) { ++ u32 failed = done >> 16; + -+ /* Create a timer to use as a timeout on the reset */ -+ hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); -+ rtdata.timer.function = kbasep_reset_timeout; ++ /* treat failed slots as finished slots */ ++ u32 finished = (done & 0xFFFF) | failed; + -+ hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), -+ HRTIMER_MODE_REL); ++ /* Note: This is inherently unfair, as we always check ++ * for lower numbered interrupts before the higher ++ * numbered ones.*/ ++ i = ffs(finished) - 1; ++ KBASE_DEBUG_ASSERT(i >= 0); + -+ /* Wait for the RESET_COMPLETED interrupt to be raised */ -+ kbase_pm_wait_for_reset(kbdev); ++ do { ++ int nr_done; ++ u32 active; ++ u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */ ++ u64 job_tail = 0; + -+ if (rtdata.timed_out == 0) { -+ /* GPU has been reset */ -+ hrtimer_cancel(&rtdata.timer); -+ destroy_hrtimer_on_stack(&rtdata.timer); -+ return 0; -+ } ++ if (failed & (1u << i)) { ++ /* read out the job slot status code if the job ++ * slot reported failure */ ++ completion_code = kbase_reg_read(kbdev, ++ JOB_SLOT_REG(i, JS_STATUS), NULL); + -+ /* No interrupt has been received - check if the RAWSTAT register says -+ * the reset has completed */ -+ if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) & -+ RESET_COMPLETED) { -+ /* The interrupt is set in the RAWSTAT; this suggests that the -+ * interrupts are not getting to the CPU */ -+ dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n"); -+ /* If interrupts aren't working we can't continue. */ -+ destroy_hrtimer_on_stack(&rtdata.timer); -+ return -EINVAL; -+ } ++ switch (completion_code) { ++ case BASE_JD_EVENT_STOPPED: ++#if defined(CONFIG_MALI_GATOR_SUPPORT) ++ kbase_trace_mali_job_slots_event( ++ GATOR_MAKE_EVENT( ++ GATOR_JOB_SLOT_SOFT_STOPPED, i), ++ NULL, 0); ++#endif + -+ /* The GPU doesn't seem to be responding to the reset so try a hard -+ * reset */ -+ dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", -+ RESET_TIMEOUT); -+ KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_HARD_RESET, NULL); ++ kbasep_trace_tl_event_lpu_softstop( ++ kbdev, i); + -+ /* Restart the timer to wait for the hard reset to complete */ -+ rtdata.timed_out = 0; ++ /* Soft-stopped job - read the value of ++ * JS_TAIL so that the job chain can ++ * be resumed */ ++ job_tail = (u64)kbase_reg_read(kbdev, ++ JOB_SLOT_REG(i, JS_TAIL_LO), ++ NULL) | ++ ((u64)kbase_reg_read(kbdev, ++ JOB_SLOT_REG(i, JS_TAIL_HI), ++ NULL) << 32); ++ break; ++ case BASE_JD_EVENT_NOT_STARTED: ++ /* PRLAM-10673 can cause a TERMINATED ++ * job to come back as NOT_STARTED, but ++ * the error interrupt helps us detect ++ * it */ ++ completion_code = ++ BASE_JD_EVENT_TERMINATED; ++ /* fall through */ ++ default: ++ dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", ++ i, completion_code, ++ kbase_exception_name ++ (kbdev, ++ completion_code)); ++ } + -+ hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), -+ HRTIMER_MODE_REL); ++ kbase_gpu_irq_evict(kbdev, i); ++ } + -+ /* Wait for the RESET_COMPLETED interrupt to be raised */ -+ kbase_pm_wait_for_reset(kbdev); ++ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), ++ done & ((1 << i) | (1 << (i + 16))), ++ NULL); ++ active = kbase_reg_read(kbdev, ++ JOB_CONTROL_REG(JOB_IRQ_JS_STATE), ++ NULL); + -+ if (rtdata.timed_out == 0) { -+ /* GPU has been reset */ -+ hrtimer_cancel(&rtdata.timer); -+ destroy_hrtimer_on_stack(&rtdata.timer); -+ return 0; -+ } ++ if (((active >> i) & 1) == 0 && ++ (((done >> (i + 16)) & 1) == 0)) { ++ /* There is a potential race we must work ++ * around: ++ * ++ * 1. A job slot has a job in both current and ++ * next registers ++ * 2. The job in current completes ++ * successfully, the IRQ handler reads ++ * RAWSTAT and calls this function with the ++ * relevant bit set in "done" ++ * 3. The job in the next registers becomes the ++ * current job on the GPU ++ * 4. Sometime before the JOB_IRQ_CLEAR line ++ * above the job on the GPU _fails_ ++ * 5. The IRQ_CLEAR clears the done bit but not ++ * the failed bit. This atomically sets ++ * JOB_IRQ_JS_STATE. However since both jobs ++ * have now completed the relevant bits for ++ * the slot are set to 0. ++ * ++ * If we now did nothing then we'd incorrectly ++ * assume that _both_ jobs had completed ++ * successfully (since we haven't yet observed ++ * the fail bit being set in RAWSTAT). ++ * ++ * So at this point if there are no active jobs ++ * left we check to see if RAWSTAT has a failure ++ * bit set for the job slot. If it does we know ++ * that there has been a new failure that we ++ * didn't previously know about, so we make sure ++ * that we record this in active (but we wait ++ * for the next loop to deal with it). ++ * ++ * If we were handling a job failure (i.e. done ++ * has the relevant high bit set) then we know ++ * that the value read back from ++ * JOB_IRQ_JS_STATE is the correct number of ++ * remaining jobs because the failed job will ++ * have prevented any futher jobs from starting ++ * execution. ++ */ ++ u32 rawstat = kbase_reg_read(kbdev, ++ JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL); + -+ destroy_hrtimer_on_stack(&rtdata.timer); ++ if ((rawstat >> (i + 16)) & 1) { ++ /* There is a failed job that we've ++ * missed - add it back to active */ ++ active |= (1u << i); ++ } ++ } + -+ dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", -+ RESET_TIMEOUT); ++ dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n", ++ completion_code); + -+ return -EINVAL; -+} ++ nr_done = kbase_backend_nr_atoms_submitted(kbdev, i); ++ nr_done -= (active >> i) & 1; ++ nr_done -= (active >> (i + 16)) & 1; + -+static int kbasep_protected_mode_enable(struct protected_mode_device *pdev) -+{ -+ struct kbase_device *kbdev = pdev->data; ++ if (nr_done <= 0) { ++ dev_warn(kbdev->dev, "Spurious interrupt on slot %d", ++ i); + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_SET_PROTECTED_MODE, NULL); -+ return 0; -+} ++ goto spurious; ++ } + -+static int kbasep_protected_mode_disable(struct protected_mode_device *pdev) -+{ -+ struct kbase_device *kbdev = pdev->data; ++ count += nr_done; + -+ lockdep_assert_held(&kbdev->pm.lock); ++ while (nr_done) { ++ if (nr_done == 1) { ++ kbase_gpu_complete_hw(kbdev, i, ++ completion_code, ++ job_tail, ++ &end_timestamp); ++ kbase_jm_try_kick_all(kbdev); ++ } else { ++ /* More than one job has completed. ++ * Since this is not the last job being ++ * reported this time it must have ++ * passed. This is because the hardware ++ * will not allow further jobs in a job ++ * slot to complete until the failed job ++ * is cleared from the IRQ status. ++ */ ++ kbase_gpu_complete_hw(kbdev, i, ++ BASE_JD_EVENT_DONE, ++ 0, ++ &end_timestamp); ++ } ++ nr_done--; ++ } ++ spurious: ++ done = kbase_reg_read(kbdev, ++ JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL); + -+ return kbase_pm_do_reset(kbdev); -+} ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) { ++ /* Workaround for missing interrupt caused by ++ * PRLAM-10883 */ ++ if (((active >> i) & 1) && (0 == ++ kbase_reg_read(kbdev, ++ JOB_SLOT_REG(i, ++ JS_STATUS), NULL))) { ++ /* Force job slot to be processed again ++ */ ++ done |= (1u << i); ++ } ++ } + -+struct protected_mode_ops kbase_native_protected_ops = { -+ .protected_mode_enable = kbasep_protected_mode_enable, -+ .protected_mode_disable = kbasep_protected_mode_disable -+}; ++ failed = done >> 16; ++ finished = (done & 0xFFFF) | failed; ++ if (done) ++ end_timestamp = ktime_get(); ++ } while (finished & (1 << i)); + -+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) -+{ -+ unsigned long irq_flags; -+ int err; -+ bool resume_vinstr = false; ++ kbasep_job_slot_update_head_start_timestamp(kbdev, i, ++ end_timestamp); ++ } + -+ KBASE_DEBUG_ASSERT(NULL != kbdev); -+ lockdep_assert_held(&kbdev->pm.lock); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++#if KBASE_GPU_RESET_EN ++ if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == ++ KBASE_RESET_GPU_COMMITTED) { ++ /* If we're trying to reset the GPU then we might be able to do ++ * it early (without waiting for a timeout) because some jobs ++ * have completed ++ */ ++ kbasep_try_reset_gpu_early(kbdev); ++ } ++#endif /* KBASE_GPU_RESET_EN */ ++ KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count); ++} ++KBASE_EXPORT_TEST_API(kbase_job_done); + -+ /* Ensure the clock is on before attempting to access the hardware */ -+ if (!kbdev->pm.backend.gpu_powered) { -+ if (kbdev->pm.backend.callback_power_on) -+ kbdev->pm.backend.callback_power_on(kbdev); ++static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom) ++{ ++ bool soft_stops_allowed = true; + -+ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, -+ irq_flags); -+ kbdev->pm.backend.gpu_powered = true; -+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, -+ irq_flags); ++ if (kbase_jd_katom_is_protected(katom)) { ++ soft_stops_allowed = false; ++ } else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) { ++ if ((katom->core_req & BASE_JD_REQ_T) != 0) ++ soft_stops_allowed = false; + } ++ return soft_stops_allowed; ++} + -+ /* Ensure interrupts are off to begin with, this also clears any -+ * outstanding interrupts */ -+ kbase_pm_disable_interrupts(kbdev); -+ /* Ensure cache snoops are disabled before reset. */ -+ kbase_pm_cache_snoop_disable(kbdev); -+ /* Prepare for the soft-reset */ -+ kbdev->pm.backend.reset_done = false; ++static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev, ++ base_jd_core_req core_reqs) ++{ ++ bool hard_stops_allowed = true; + -+ /* The cores should be made unavailable due to the reset */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); -+ if (kbdev->shader_available_bitmap != 0u) -+ KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, -+ NULL, 0u, (u32)0u); -+ if (kbdev->tiler_available_bitmap != 0u) -+ KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, -+ NULL, NULL, 0u, (u32)0u); -+ kbdev->shader_available_bitmap = 0u; -+ kbdev->tiler_available_bitmap = 0u; -+ kbdev->l2_available_bitmap = 0u; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) { ++ if ((core_reqs & BASE_JD_REQ_T) != 0) ++ hard_stops_allowed = false; ++ } ++ return hard_stops_allowed; ++} + -+ /* Soft reset the GPU */ -+ if (kbdev->protected_mode_support) -+ err = kbdev->protected_ops->protected_mode_disable( -+ kbdev->protected_dev); -+ else -+ err = kbase_pm_do_reset(kbdev); ++void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, ++ int js, ++ u32 action, ++ base_jd_core_req core_reqs, ++ struct kbase_jd_atom *target_katom) ++{ ++ struct kbase_context *kctx = target_katom->kctx; ++#if KBASE_TRACE_ENABLE ++ u32 status_reg_before; ++ u64 job_in_head_before; ++ u32 status_reg_after; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); -+ if (kbdev->protected_mode) -+ resume_vinstr = true; -+ kbdev->protected_mode = false; -+ kbase_ipa_model_use_configured_locked(kbdev); ++ KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK))); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++ /* Check the head pointer */ ++ job_in_head_before = ((u64) kbase_reg_read(kbdev, ++ JOB_SLOT_REG(js, JS_HEAD_LO), NULL)) ++ | (((u64) kbase_reg_read(kbdev, ++ JOB_SLOT_REG(js, JS_HEAD_HI), NULL)) ++ << 32); ++ status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS), ++ NULL); ++#endif + -+ if (err) -+ goto exit; ++ if (action == JS_COMMAND_SOFT_STOP) { ++ bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev, ++ target_katom); + -+ if (flags & PM_HW_ISSUES_DETECT) -+ kbase_pm_hw_issues_detect(kbdev); ++ if (!soft_stop_allowed) { ++#ifdef CONFIG_MALI_DEBUG ++ dev_dbg(kbdev->dev, ++ "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X", ++ (unsigned int)core_reqs); ++#endif /* CONFIG_MALI_DEBUG */ ++ return; ++ } + -+ kbase_pm_hw_issues_apply(kbdev); -+ kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); ++ /* We are about to issue a soft stop, so mark the atom as having ++ * been soft stopped */ ++ target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED; + -+ /* Sanity check protected mode was left after reset */ -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { -+ u32 gpu_status = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(GPU_STATUS), NULL); ++ /* Mark the point where we issue the soft-stop command */ ++ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(target_katom); + -+ WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE); -+ } ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) { ++ int i; + -+ /* If cycle counter was in use re-enable it, enable_irqs will only be -+ * false when called from kbase_pm_powerup */ -+ if (kbdev->pm.backend.gpu_cycle_counter_requests && -+ (flags & PM_ENABLE_IRQS)) { -+ /* enable interrupts as the L2 may have to be powered on */ -+ kbase_pm_enable_interrupts(kbdev); -+ kbase_pm_request_l2_caches(kbdev); ++ for (i = 0; ++ i < kbase_backend_nr_atoms_submitted(kbdev, js); ++ i++) { ++ struct kbase_jd_atom *katom; + -+ /* Re-enable the counters if we need to */ -+ spin_lock_irqsave( -+ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, -+ irq_flags); -+ if (kbdev->pm.backend.gpu_cycle_counter_requests) -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_CYCLE_COUNT_START, NULL); -+ spin_unlock_irqrestore( -+ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, -+ irq_flags); ++ katom = kbase_gpu_inspect(kbdev, js, i); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); -+ kbase_pm_release_l2_caches(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++ KBASE_DEBUG_ASSERT(katom); + -+ kbase_pm_disable_interrupts(kbdev); -+ } ++ /* For HW_ISSUE_8316, only 'bad' jobs attacking ++ * the system can cause this issue: normally, ++ * all memory should be allocated in multiples ++ * of 4 pages, and growable memory should be ++ * changed size in multiples of 4 pages. ++ * ++ * Whilst such 'bad' jobs can be cleared by a ++ * GPU reset, the locking up of a uTLB entry ++ * caused by the bad job could also stall other ++ * ASs, meaning that other ASs' jobs don't ++ * complete in the 'grace' period before the ++ * reset. We don't want to lose other ASs' jobs ++ * when they would normally complete fine, so we ++ * must 'poke' the MMU regularly to help other ++ * ASs complete */ ++ kbase_as_poking_timer_retain_atom( ++ kbdev, katom->kctx, katom); ++ } ++ } + -+ if (flags & PM_ENABLE_IRQS) -+ kbase_pm_enable_interrupts(kbdev); ++ if (kbase_hw_has_feature( ++ kbdev, ++ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { ++ action = (target_katom->atom_flags & ++ KBASE_KATOM_FLAGS_JOBCHAIN) ? ++ JS_COMMAND_SOFT_STOP_1 : ++ JS_COMMAND_SOFT_STOP_0; ++ } ++ } else if (action == JS_COMMAND_HARD_STOP) { ++ bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev, ++ core_reqs); + -+exit: -+ /* If GPU is leaving protected mode resume vinstr operation. */ -+ if (kbdev->vinstr_ctx && resume_vinstr) -+ kbase_vinstr_resume(kbdev->vinstr_ctx); ++ if (!hard_stop_allowed) { ++ /* Jobs can be hard-stopped for the following reasons: ++ * * CFS decides the job has been running too long (and ++ * soft-stop has not occurred). In this case the GPU ++ * will be reset by CFS if the job remains on the ++ * GPU. ++ * ++ * * The context is destroyed, kbase_jd_zap_context ++ * will attempt to hard-stop the job. However it also ++ * has a watchdog which will cause the GPU to be ++ * reset if the job remains on the GPU. ++ * ++ * * An (unhandled) MMU fault occurred. As long as ++ * BASE_HW_ISSUE_8245 is defined then the GPU will be ++ * reset. ++ * ++ * All three cases result in the GPU being reset if the ++ * hard-stop fails, so it is safe to just return and ++ * ignore the hard-stop request. ++ */ ++ dev_warn(kbdev->dev, ++ "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X", ++ (unsigned int)core_reqs); ++ return; ++ } ++ target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED; + -+ return err; -+} ++ if (kbase_hw_has_feature( ++ kbdev, ++ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { ++ action = (target_katom->atom_flags & ++ KBASE_KATOM_FLAGS_JOBCHAIN) ? ++ JS_COMMAND_HARD_STOP_1 : ++ JS_COMMAND_HARD_STOP_0; ++ } ++ } + -+/** -+ * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters -+ * -+ * Increase the count of cycle counter users and turn the cycle counters on if -+ * they were previously off -+ * -+ * This function is designed to be called by -+ * kbase_pm_request_gpu_cycle_counter() or -+ * kbase_pm_request_gpu_cycle_counter_l2_is_on() only -+ * -+ * When this function is called the l2 cache must be on and the l2 cache users -+ * count must have been incremented by a call to ( -+ * kbase_pm_request_l2_caches() or kbase_pm_request_l2_caches_l2_on() ) -+ * -+ * @kbdev: The kbase device structure of the device -+ */ -+static void -+kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) -+{ -+ unsigned long flags; ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx); + -+ spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, -+ flags); ++#if KBASE_TRACE_ENABLE ++ status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS), ++ NULL); ++ if (status_reg_after == BASE_JD_EVENT_ACTIVE) { ++ struct kbase_jd_atom *head; ++ struct kbase_context *head_kctx; + -+ ++kbdev->pm.backend.gpu_cycle_counter_requests; ++ head = kbase_gpu_inspect(kbdev, js, 0); ++ head_kctx = head->kctx; + -+ if (1 == kbdev->pm.backend.gpu_cycle_counter_requests) -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_CYCLE_COUNT_START, NULL); ++ if (status_reg_before == BASE_JD_EVENT_ACTIVE) ++ KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, ++ head, job_in_head_before, js); ++ else ++ KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, ++ 0, js); + -+ spin_unlock_irqrestore( -+ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, -+ flags); ++ switch (action) { ++ case JS_COMMAND_SOFT_STOP: ++ KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx, ++ head, head->jc, js); ++ break; ++ case JS_COMMAND_SOFT_STOP_0: ++ KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, ++ head, head->jc, js); ++ break; ++ case JS_COMMAND_SOFT_STOP_1: ++ KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, ++ head, head->jc, js); ++ break; ++ case JS_COMMAND_HARD_STOP: ++ KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx, ++ head, head->jc, js); ++ break; ++ case JS_COMMAND_HARD_STOP_0: ++ KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, ++ head, head->jc, js); ++ break; ++ case JS_COMMAND_HARD_STOP_1: ++ KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, ++ head, head->jc, js); ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ } else { ++ if (status_reg_before == BASE_JD_EVENT_ACTIVE) ++ KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, ++ job_in_head_before, js); ++ else ++ KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, ++ 0, js); ++ ++ switch (action) { ++ case JS_COMMAND_SOFT_STOP: ++ KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0, ++ js); ++ break; ++ case JS_COMMAND_SOFT_STOP_0: ++ KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL, ++ 0, js); ++ break; ++ case JS_COMMAND_SOFT_STOP_1: ++ KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL, ++ 0, js); ++ break; ++ case JS_COMMAND_HARD_STOP: ++ KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0, ++ js); ++ break; ++ case JS_COMMAND_HARD_STOP_0: ++ KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL, ++ 0, js); ++ break; ++ case JS_COMMAND_HARD_STOP_1: ++ KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, ++ 0, js); ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ } ++#endif +} + -+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev) ++void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx) +{ ++ unsigned long flags; ++ struct kbase_device *kbdev; ++ struct kbasep_js_device_data *js_devdata; ++ int i; ++ ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); ++ js_devdata = &kbdev->js_data; + -+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); ++ /* Cancel any remaining running jobs for this kctx */ ++ mutex_lock(&kctx->jctx.lock); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < -+ INT_MAX); ++ /* Invalidate all jobs in context, to prevent re-submitting */ ++ for (i = 0; i < BASE_JD_ATOM_COUNT; i++) { ++ if (!work_pending(&kctx->jctx.atoms[i].work)) ++ kctx->jctx.atoms[i].event_code = ++ BASE_JD_EVENT_JOB_CANCELLED; ++ } + -+ kbase_pm_request_l2_caches(kbdev); ++ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) ++ kbase_job_slot_hardstop(kctx, i, NULL); + -+ kbase_pm_request_gpu_cycle_counter_do_request(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kctx->jctx.lock); +} + -+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter); -+ -+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev) ++void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, ++ struct kbase_jd_atom *target_katom) +{ ++ struct kbase_device *kbdev; ++ int js = target_katom->slot_nr; ++ int priority = target_katom->sched_priority; ++ int i; ++ bool stop_sent = false; ++ ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < -+ INT_MAX); ++ for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) { ++ struct kbase_jd_atom *katom; + -+ kbase_pm_request_l2_caches_l2_is_on(kbdev); ++ katom = kbase_gpu_inspect(kbdev, js, i); ++ if (!katom) ++ continue; + -+ kbase_pm_request_gpu_cycle_counter_do_request(kbdev); ++ if (katom->kctx != kctx) ++ continue; ++ ++ if (katom->sched_priority > priority) { ++ if (!stop_sent) ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE( ++ target_katom); ++ ++ kbase_job_slot_softstop(kbdev, js, katom); ++ stop_sent = true; ++ } ++ } +} + -+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on); ++struct zap_reset_data { ++ /* The stages are: ++ * 1. The timer has never been called ++ * 2. The zap has timed out, all slots are soft-stopped - the GPU reset ++ * will happen. The GPU has been reset when ++ * kbdev->hwaccess.backend.reset_waitq is signalled ++ * ++ * (-1 - The timer has been cancelled) ++ */ ++ int stage; ++ struct kbase_device *kbdev; ++ struct hrtimer timer; ++ spinlock_t lock; /* protects updates to stage member */ ++}; + -+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev) ++static enum hrtimer_restart zap_timeout_callback(struct hrtimer *timer) +{ ++ struct zap_reset_data *reset_data = container_of(timer, ++ struct zap_reset_data, timer); ++ struct kbase_device *kbdev = reset_data->kbdev; + unsigned long flags; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, -+ flags); -+ -+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0); ++ spin_lock_irqsave(&reset_data->lock, flags); + -+ --kbdev->pm.backend.gpu_cycle_counter_requests; ++ if (reset_data->stage == -1) ++ goto out; + -+ if (0 == kbdev->pm.backend.gpu_cycle_counter_requests) -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), -+ GPU_COMMAND_CYCLE_COUNT_STOP, NULL); ++#if KBASE_GPU_RESET_EN ++ if (kbase_prepare_to_reset_gpu(kbdev)) { ++ dev_err(kbdev->dev, "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", ++ ZAP_TIMEOUT); ++ kbase_reset_gpu(kbdev); ++ } ++#endif /* KBASE_GPU_RESET_EN */ ++ reset_data->stage = 2; + -+ spin_unlock_irqrestore( -+ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, -+ flags); ++ out: ++ spin_unlock_irqrestore(&reset_data->lock, flags); + -+ kbase_pm_release_l2_caches(kbdev); ++ return HRTIMER_NORESTART; +} + -+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) ++void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) +{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct zap_reset_data reset_data; + unsigned long flags; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ hrtimer_init_on_stack(&reset_data.timer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_REL); ++ reset_data.timer.function = zap_timeout_callback; + -+ kbase_pm_release_gpu_cycle_counter_nolock(kbdev); ++ spin_lock_init(&reset_data.lock); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} ++ reset_data.kbdev = kbdev; ++ reset_data.stage = 1; + -+KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter); -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h -new file mode 100644 -index 000000000..6804f45ac ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h -@@ -0,0 +1,548 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ hrtimer_start(&reset_data.timer, HR_TIMER_DELAY_MSEC(ZAP_TIMEOUT), ++ HRTIMER_MODE_REL); + ++ /* Wait for all jobs to finish, and for the context to be not-scheduled ++ * (due to kbase_job_zap_context(), we also guarentee it's not in the JS ++ * policy queue either */ ++ wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0); ++ wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, ++ !kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + ++ spin_lock_irqsave(&reset_data.lock, flags); ++ if (reset_data.stage == 1) { ++ /* The timer hasn't run yet - so cancel it */ ++ reset_data.stage = -1; ++ } ++ spin_unlock_irqrestore(&reset_data.lock, flags); + ++ hrtimer_cancel(&reset_data.timer); + ++ if (reset_data.stage == 2) { ++ /* The reset has already started. ++ * Wait for the reset to complete ++ */ ++ wait_event(kbdev->hwaccess.backend.reset_wait, ++ atomic_read(&kbdev->hwaccess.backend.reset_gpu) ++ == KBASE_RESET_GPU_NOT_PENDING); ++ } ++ destroy_hrtimer_on_stack(&reset_data.timer); + -+/* -+ * Power management API definitions used internally by GPU backend -+ */ ++ dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx); + -+#ifndef _KBASE_BACKEND_PM_INTERNAL_H_ -+#define _KBASE_BACKEND_PM_INTERNAL_H_ ++ /* Ensure that the signallers of the waitqs have finished */ ++ mutex_lock(&kctx->jctx.lock); ++ mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); ++ mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); ++ mutex_unlock(&kctx->jctx.lock); ++} + -+#include ++u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev) ++{ ++ u32 flush_id = 0; + -+#include "mali_kbase_pm_ca.h" -+#include "mali_kbase_pm_policy.h" ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) { ++ mutex_lock(&kbdev->pm.lock); ++ if (kbdev->pm.backend.gpu_powered) ++ flush_id = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(LATEST_FLUSH), NULL); ++ mutex_unlock(&kbdev->pm.lock); ++ } + ++ return flush_id; ++} + -+/** -+ * kbase_pm_dev_idle - The GPU is idle. -+ * -+ * The OS may choose to turn off idle devices -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_dev_idle(struct kbase_device *kbdev); ++int kbase_job_slot_init(struct kbase_device *kbdev) ++{ ++#if KBASE_GPU_RESET_EN ++ kbdev->hwaccess.backend.reset_workq = alloc_workqueue( ++ "Mali reset workqueue", 0, 1); ++ if (NULL == kbdev->hwaccess.backend.reset_workq) ++ return -EINVAL; + -+/** -+ * kbase_pm_dev_activate - The GPU is active. -+ * -+ * The OS should avoid opportunistically turning off the GPU while it is active -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_dev_activate(struct kbase_device *kbdev); ++ KBASE_DEBUG_ASSERT(0 == ++ object_is_on_stack(&kbdev->hwaccess.backend.reset_work)); ++ INIT_WORK(&kbdev->hwaccess.backend.reset_work, ++ kbasep_reset_timeout_worker); + -+/** -+ * kbase_pm_get_present_cores - Get details of the cores that are present in -+ * the device. -+ * -+ * This function can be called by the active power policy to return a bitmask of -+ * the cores (of a specified type) present in the GPU device and also a count of -+ * the number of cores. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid -+ * pointer) -+ * @type: The type of core (see the enum kbase_pm_core_type enumeration) -+ * -+ * Return: The bit mask of cores present -+ */ -+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, -+ enum kbase_pm_core_type type); ++ hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_REL); ++ kbdev->hwaccess.backend.reset_timer.function = ++ kbasep_reset_timer_callback; ++#endif + -+/** -+ * kbase_pm_get_active_cores - Get details of the cores that are currently -+ * active in the device. -+ * -+ * This function can be called by the active power policy to return a bitmask of -+ * the cores (of a specified type) that are actively processing work (i.e. -+ * turned on *and* busy). -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @type: The type of core (see the enum kbase_pm_core_type enumeration) -+ * -+ * Return: The bit mask of active cores -+ */ -+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, -+ enum kbase_pm_core_type type); ++ return 0; ++} ++KBASE_EXPORT_TEST_API(kbase_job_slot_init); + -+/** -+ * kbase_pm_get_trans_cores - Get details of the cores that are currently -+ * transitioning between power states. -+ * -+ * This function can be called by the active power policy to return a bitmask of -+ * the cores (of a specified type) that are currently transitioning between -+ * power states. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @type: The type of core (see the enum kbase_pm_core_type enumeration) -+ * -+ * Return: The bit mask of transitioning cores -+ */ -+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, -+ enum kbase_pm_core_type type); ++void kbase_job_slot_halt(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++} + -+/** -+ * kbase_pm_get_ready_cores - Get details of the cores that are currently -+ * powered and ready for jobs. -+ * -+ * This function can be called by the active power policy to return a bitmask of -+ * the cores (of a specified type) that are powered and ready for jobs (they may -+ * or may not be currently executing jobs). -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @type: The type of core (see the enum kbase_pm_core_type enumeration) -+ * -+ * Return: The bit mask of ready cores -+ */ -+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, -+ enum kbase_pm_core_type type); ++void kbase_job_slot_term(struct kbase_device *kbdev) ++{ ++#if KBASE_GPU_RESET_EN ++ destroy_workqueue(kbdev->hwaccess.backend.reset_workq); ++#endif ++} ++KBASE_EXPORT_TEST_API(kbase_job_slot_term); + ++#if KBASE_GPU_RESET_EN +/** -+ * kbase_pm_clock_on - Turn the clock for the device on, and enable device -+ * interrupts. -+ * -+ * This function can be used by a power policy to turn the clock for the GPU on. -+ * It should be modified during integration to perform the necessary actions to -+ * ensure that the GPU is fully powered and clocked. ++ * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot ++ * @kbdev: kbase device pointer ++ * @kctx: context to check against ++ * @js: slot to check ++ * @target_katom: An atom to check, or NULL if all atoms from @kctx on ++ * slot @js should be checked + * -+ * @kbdev: The kbase device structure for the device (must be a valid -+ * pointer) -+ * @is_resume: true if clock on due to resume after suspend, false otherwise -+ */ -+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume); -+ -+/** -+ * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the -+ * device off. ++ * This checks are based upon parameters that would normally be passed to ++ * kbase_job_slot_hardstop(). + * -+ * This function can be used by a power policy to turn the clock for the GPU -+ * off. It should be modified during integration to perform the necessary -+ * actions to turn the clock off (if this is possible in the integration). ++ * In the event of @target_katom being NULL, this will check the last jobs that ++ * are likely to be running on the slot to see if a) they belong to kctx, and ++ * so would be stopped, and b) whether they have AFBC + * -+ * @kbdev: The kbase device structure for the device (must be a valid -+ * pointer) -+ * @is_suspend: true if clock off due to suspend, false otherwise ++ * In that case, It's guaranteed that a job currently executing on the HW with ++ * AFBC will be detected. However, this is a conservative check because it also ++ * detects jobs that have just completed too. + * -+ * Return: true if clock was turned off, or -+ * false if clock can not be turned off due to pending page/bus fault -+ * workers. Caller must flush MMU workqueues and retry ++ * Return: true when hard-stop _might_ stop an afbc atom, else false. + */ -+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend); ++static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev, ++ struct kbase_context *kctx, int js, ++ struct kbase_jd_atom *target_katom) ++{ ++ bool ret = false; ++ int i; + -+/** -+ * kbase_pm_enable_interrupts - Enable interrupts on the device. -+ * -+ * Interrupts are also enabled after a call to kbase_pm_clock_on(). -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_enable_interrupts(struct kbase_device *kbdev); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+/** -+ * kbase_pm_disable_interrupts - Disable interrupts on the device. -+ * -+ * This prevents delivery of Power Management interrupts to the CPU so that -+ * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler -+ * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called. -+ * -+ * Interrupts are also disabled after a call to kbase_pm_clock_off(). -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_disable_interrupts(struct kbase_device *kbdev); ++ /* When we have an atom the decision can be made straight away. */ ++ if (target_katom) ++ return !!(target_katom->core_req & BASE_JD_REQ_FS_AFBC); + -+/** -+ * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts() -+ * that does not take the hwaccess_lock -+ * -+ * Caller must hold the hwaccess_lock. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev); ++ /* Otherwise, we must chweck the hardware to see if it has atoms from ++ * this context with AFBC. */ ++ for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) { ++ struct kbase_jd_atom *katom; + -+/** -+ * kbase_pm_init_hw - Initialize the hardware. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @flags: Flags specifying the type of PM init -+ * -+ * This function checks the GPU ID register to ensure that the GPU is supported -+ * by the driver and performs a reset on the device so that it is in a known -+ * state before the device is used. -+ * -+ * Return: 0 if the device is supported and successfully reset. -+ */ -+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags); ++ katom = kbase_gpu_inspect(kbdev, js, i); ++ if (!katom) ++ continue; + -+/** -+ * kbase_pm_reset_done - The GPU has been reset successfully. -+ * -+ * This function must be called by the GPU interrupt handler when the -+ * RESET_COMPLETED bit is set. It signals to the power management initialization -+ * code that the GPU has been successfully reset. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_reset_done(struct kbase_device *kbdev); ++ /* Ignore atoms from other contexts, they won't be stopped when ++ * we use this for checking if we should hard-stop them */ ++ if (katom->kctx != kctx) ++ continue; + ++ /* An atom on this slot and this context: check for AFBC */ ++ if (katom->core_req & BASE_JD_REQ_FS_AFBC) { ++ ret = true; ++ break; ++ } ++ } + -+/** -+ * kbase_pm_check_transitions_nolock - Check if there are any power transitions -+ * to make, and if so start them. -+ * -+ * This function will check the desired_xx_state members of -+ * struct kbase_pm_device_data and the actual status of the hardware to see if -+ * any power transitions can be made at this time to make the hardware state -+ * closer to the state desired by the power policy. -+ * -+ * The return value can be used to check whether all the desired cores are -+ * available, and so whether it's worth submitting a job (e.g. from a Power -+ * Management IRQ). -+ * -+ * Note that this still returns true when desired_xx_state has no -+ * cores. That is: of the no cores desired, none were *un*available. In -+ * this case, the caller may still need to try submitting jobs. This is because -+ * the Core Availability Policy might have taken us to an intermediate state -+ * where no cores are powered, before powering on more cores (e.g. for core -+ * rotation) -+ * -+ * The caller must hold kbase_device.pm.power_change_lock -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Return: non-zero when all desired cores are available. That is, -+ * it's worthwhile for the caller to submit a job. -+ * false otherwise -+ */ -+bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev); ++ return ret; ++} ++#endif /* KBASE_GPU_RESET_EN */ + +/** -+ * kbase_pm_check_transitions_sync - Synchronous and locking variant of -+ * kbase_pm_check_transitions_nolock() -+ * -+ * On returning, the desired state at the time of the call will have been met. -+ * -+ * There is nothing to stop the core being switched off by calls to -+ * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the -+ * caller must have already made a call to -+ * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously. ++ * kbase_job_slot_softstop_swflags - Soft-stop a job with flags ++ * @kbdev: The kbase device ++ * @js: The job slot to soft-stop ++ * @target_katom: The job that should be soft-stopped (or NULL for any job) ++ * @sw_flags: Flags to pass in about the soft-stop + * -+ * The usual use-case for this is to ensure cores are 'READY' after performing -+ * a GPU Reset. ++ * Context: ++ * The job slot lock must be held when calling this function. ++ * The job slot must not already be in the process of being soft-stopped. + * -+ * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold -+ * kbase_device.pm.power_change_lock, because this function will take that -+ * lock itself. ++ * Soft-stop the specified job slot, with extra information about the stop + * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * Where possible any job in the next register is evicted before the soft-stop. + */ -+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev); ++void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, ++ struct kbase_jd_atom *target_katom, u32 sw_flags) ++{ ++ KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK)); ++ kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom, ++ JS_COMMAND_SOFT_STOP | sw_flags); ++} + +/** -+ * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state() -+ * where the caller must hold -+ * kbase_device.pm.power_change_lock ++ * kbase_job_slot_softstop - Soft-stop the specified job slot ++ * @kbdev: The kbase device ++ * @js: The job slot to soft-stop ++ * @target_katom: The job that should be soft-stopped (or NULL for any job) ++ * Context: ++ * The job slot lock must be held when calling this function. ++ * The job slot must not already be in the process of being soft-stopped. + * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * Where possible any job in the next register is evicted before the soft-stop. + */ -+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev); ++void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, ++ struct kbase_jd_atom *target_katom) ++{ ++ kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u); ++} + +/** -+ * kbase_pm_update_cores_state - Update the desired state of shader cores from -+ * the Power Policy, and begin any power -+ * transitions. -+ * -+ * This function will update the desired_xx_state members of -+ * struct kbase_pm_device_data by calling into the current Power Policy. It will -+ * then begin power transitions to make the hardware acheive the desired shader -+ * core state. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * kbase_job_slot_hardstop - Hard-stop the specified job slot ++ * @kctx: The kbase context that contains the job(s) that should ++ * be hard-stopped ++ * @js: The job slot to hard-stop ++ * @target_katom: The job that should be hard-stopped (or NULL for all ++ * jobs from the context) ++ * Context: ++ * The job slot lock must be held when calling this function. + */ -+void kbase_pm_update_cores_state(struct kbase_device *kbdev); ++void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, ++ struct kbase_jd_atom *target_katom) ++{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ bool stopped; ++#if KBASE_GPU_RESET_EN ++ /* We make the check for AFBC before evicting/stopping atoms. Note ++ * that no other thread can modify the slots whilst we have the ++ * hwaccess_lock. */ ++ int needs_workaround_for_afbc = ++ kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542) ++ && kbasep_check_for_afbc_on_slot(kbdev, kctx, js, ++ target_katom); ++#endif + -+/** -+ * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off -+ * the GPU and/or shader cores. -+ * -+ * This should be called by any functions which directly power off the GPU. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev); ++ stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js, ++ target_katom, ++ JS_COMMAND_HARD_STOP); ++#if KBASE_GPU_RESET_EN ++ if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) || ++ kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) || ++ needs_workaround_for_afbc)) { ++ /* MIDBASE-2916 if a fragment job with AFBC encoding is ++ * hardstopped, ensure to do a soft reset also in order to ++ * clear the GPU status. ++ * Workaround for HW issue 8401 has an issue,so after ++ * hard-stopping just reset the GPU. This will ensure that the ++ * jobs leave the GPU.*/ ++ if (kbase_prepare_to_reset_gpu_locked(kbdev)) { ++ dev_err(kbdev->dev, "Issueing GPU soft-reset after hard stopping due to hardware issue"); ++ kbase_reset_gpu_locked(kbdev); ++ } ++ } ++#endif ++} + +/** -+ * kbasep_pm_init_core_use_bitmaps - Initialise data tracking the required -+ * and used cores. ++ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode ++ * @kbdev: kbase device ++ * @action: the event which has occurred ++ * @core_reqs: core requirements of the atom ++ * @target_katom: the atom which is being affected + * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev); -+ -+/** -+ * kbasep_pm_metrics_init - Initialize the metrics gathering framework. ++ * For a certain soft/hard-stop action, work out whether to enter disjoint ++ * state. + * -+ * This must be called before other metric gathering APIs are called. ++ * This does not register multiple disjoint events if the atom has already ++ * started a disjoint period + * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @core_reqs can be supplied as 0 if the atom had not started on the hardware ++ * (and so a 'real' soft/hard-stop was not required, but it still interrupted ++ * flow, perhaps on another context) + * -+ * Return: 0 on success, error code on error ++ * kbase_job_check_leave_disjoint() should be used to end the disjoint ++ * state when the soft/hard-stop action is complete + */ -+int kbasep_pm_metrics_init(struct kbase_device *kbdev); ++void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, ++ base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom) ++{ ++ u32 hw_action = action & JS_COMMAND_MASK; + -+/** -+ * kbasep_pm_metrics_term - Terminate the metrics gathering framework. -+ * -+ * This must be called when metric gathering is no longer required. It is an -+ * error to call any metrics gathering function (other than -+ * kbasep_pm_metrics_init()) after calling this function. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbasep_pm_metrics_term(struct kbase_device *kbdev); ++ /* For hard-stop, don't enter if hard-stop not allowed */ ++ if (hw_action == JS_COMMAND_HARD_STOP && ++ !kbasep_hard_stop_allowed(kbdev, core_reqs)) ++ return; + -+/** -+ * kbase_pm_report_vsync - Function to be called by the frame buffer driver to -+ * update the vsync metric. -+ * -+ * This function should be called by the frame buffer driver to update whether -+ * the system is hitting the vsync target or not. buffer_updated should be true -+ * if the vsync corresponded with a new frame being displayed, otherwise it -+ * should be false. This function does not need to be called every vsync, but -+ * only when the value of @buffer_updated differs from a previous call. -+ * -+ * @kbdev: The kbase device structure for the device (must be a -+ * valid pointer) -+ * @buffer_updated: True if the buffer has been updated on this VSync, -+ * false otherwise -+ */ -+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated); ++ /* For soft-stop, don't enter if soft-stop not allowed, or isn't ++ * causing disjoint */ ++ if (hw_action == JS_COMMAND_SOFT_STOP && ++ !(kbasep_soft_stop_allowed(kbdev, target_katom) && ++ (action & JS_COMMAND_SW_CAUSES_DISJOINT))) ++ return; + -+/** -+ * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change -+ * the clock speed of the GPU. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * This function should be called regularly by the DVFS system to check whether -+ * the clock speed of the GPU needs updating. -+ */ -+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev); ++ /* Nothing to do if already logged disjoint state on this atom */ ++ if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) ++ return; + -+/** -+ * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is -+ * needed -+ * -+ * If the caller is the first caller then the GPU cycle counters will be enabled -+ * along with the l2 cache -+ * -+ * The GPU must be powered when calling this function (i.e. -+ * kbase_pm_context_active() must have been called). -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev); ++ target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT; ++ kbase_disjoint_state_up(kbdev); ++} + +/** -+ * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is -+ * needed (l2 cache already on) -+ * -+ * This is a version of the above function -+ * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the -+ * l2 cache is known to be on and assured to be on until the subsequent call of -+ * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does -+ * not sleep and can be called from atomic functions. -+ * -+ * The GPU must be powered when calling this function (i.e. -+ * kbase_pm_context_active() must have been called) and the l2 cache must be -+ * powered on. ++ * kbase_job_check_enter_disjoint - potentially leave disjoint state ++ * @kbdev: kbase device ++ * @target_katom: atom which is finishing + * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * Work out whether to leave disjoint state when finishing an atom that was ++ * originated by kbase_job_check_enter_disjoint(). + */ -+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev); ++void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, ++ struct kbase_jd_atom *target_katom) ++{ ++ if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) { ++ target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT; ++ kbase_disjoint_state_down(kbdev); ++ } ++} + -+/** -+ * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no -+ * longer in use -+ * -+ * If the caller is the last caller then the GPU cycle counters will be -+ * disabled. A request must have been made before a call to this. -+ * -+ * Caller must not hold the hwaccess_lock, as it will be taken in this function. -+ * If the caller is already holding this lock then -+ * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev); + -+/** -+ * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter() -+ * that does not take hwaccess_lock -+ * -+ * Caller must hold the hwaccess_lock. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev); ++#if KBASE_GPU_RESET_EN ++static void kbase_debug_dump_registers(struct kbase_device *kbdev) ++{ ++ int i; + -+/** -+ * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to -+ * complete -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev); ++ kbase_io_history_dump(kbdev); + -+/** -+ * kbase_pm_register_access_enable - Enable access to GPU registers -+ * -+ * Enables access to the GPU registers before power management has powered up -+ * the GPU with kbase_pm_powerup(). -+ * -+ * Access to registers should be done using kbase_os_reg_read()/write() at this -+ * stage, not kbase_reg_read()/write(). -+ * -+ * This results in the power management callbacks provided in the driver -+ * configuration to get called to turn on power and/or clocks to the GPU. See -+ * kbase_pm_callback_conf. -+ * -+ * This should only be used before power management is powered up with -+ * kbase_pm_powerup() -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_register_access_enable(struct kbase_device *kbdev); ++ dev_err(kbdev->dev, "Register state:"); ++ dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL)); ++ dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x", ++ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL), ++ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL)); ++ for (i = 0; i < 3; i++) { ++ dev_err(kbdev->dev, " JS%d_STATUS=0x%08x JS%d_HEAD_LO=0x%08x", ++ i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS), ++ NULL), ++ i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO), ++ NULL)); ++ } ++ dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", ++ kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL)); ++ dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL), ++ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL), ++ kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL)); ++ dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL)); ++ dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL)); ++ dev_err(kbdev->dev, " TILER_CONFIG=0x%08x JM_CONFIG=0x%08x", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG), NULL), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG), NULL)); ++} + -+/** -+ * kbase_pm_register_access_disable - Disable early register access -+ * -+ * Disables access to the GPU registers enabled earlier by a call to -+ * kbase_pm_register_access_enable(). -+ * -+ * This results in the power management callbacks provided in the driver -+ * configuration to get called to turn off power and/or clocks to the GPU. See -+ * kbase_pm_callback_conf -+ * -+ * This should only be used before power management is powered up with -+ * kbase_pm_powerup() -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_register_access_disable(struct kbase_device *kbdev); ++static void kbasep_reset_timeout_worker(struct work_struct *data) ++{ ++ unsigned long flags; ++ struct kbase_device *kbdev; ++ ktime_t end_timestamp = ktime_get(); ++ struct kbasep_js_device_data *js_devdata; ++ bool try_schedule = false; ++ bool silent = false; ++ u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; + -+/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline -+ * function */ ++ KBASE_DEBUG_ASSERT(data); + -+/** -+ * kbase_pm_metrics_is_active - Check if the power management metrics -+ * collection is active. -+ * -+ * Note that this returns if the power management metrics collection was -+ * active at the time of calling, it is possible that after the call the metrics -+ * collection enable may have changed state. -+ * -+ * The caller must handle the consequence that the state may have changed. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * Return: true if metrics collection was active else false. -+ */ -+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev); ++ kbdev = container_of(data, struct kbase_device, ++ hwaccess.backend.reset_work); + -+/** -+ * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid -+ * pointer) -+ * @is_resume: true if power on due to resume after suspend, -+ * false otherwise -+ */ -+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume); ++ KBASE_DEBUG_ASSERT(kbdev); ++ js_devdata = &kbdev->js_data; + -+/** -+ * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been -+ * requested. -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid -+ * pointer) -+ * @is_suspend: true if power off due to suspend, -+ * false otherwise -+ */ -+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend); ++ if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == ++ KBASE_RESET_GPU_SILENT) ++ silent = true; + -+#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) -+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev, -+ unsigned long *total, unsigned long *busy); -+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev); -+#endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */ ++ KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0); + -+#ifdef CONFIG_MALI_MIDGARD_DVFS ++ /* Suspend vinstr. ++ * This call will block until vinstr is suspended. */ ++ kbase_vinstr_suspend(kbdev->vinstr_ctx); + -+/** -+ * kbase_platform_dvfs_event - Report utilisation to DVFS code -+ * -+ * Function provided by platform specific code when DVFS is enabled to allow -+ * the power management metrics system to report utilisation. -+ * -+ * @kbdev: The kbase device structure for the device (must be a -+ * valid pointer) -+ * @utilisation: The current calculated utilisation by the metrics system. -+ * @util_gl_share: The current calculated gl share of utilisation. -+ * @util_cl_share: The current calculated cl share of utilisation per core -+ * group. -+ * Return: Returns 0 on failure and non zero on success. -+ */ ++ /* Make sure the timer has completed - this cannot be done from ++ * interrupt context, so this cannot be done within ++ * kbasep_try_reset_gpu_early. */ ++ hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); + -+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, -+ u32 util_gl_share, u32 util_cl_share[2]); -+#endif ++ if (kbase_pm_context_active_handle_suspend(kbdev, ++ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { ++ /* This would re-activate the GPU. Since it's already idle, ++ * there's no need to reset it */ ++ atomic_set(&kbdev->hwaccess.backend.reset_gpu, ++ KBASE_RESET_GPU_NOT_PENDING); ++ kbase_disjoint_state_down(kbdev); ++ wake_up(&kbdev->hwaccess.backend.reset_wait); ++ kbase_vinstr_resume(kbdev->vinstr_ctx); ++ return; ++ } + -+void kbase_pm_power_changed(struct kbase_device *kbdev); ++ KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false); + -+/** -+ * kbase_pm_metrics_update - Inform the metrics system that an atom is either -+ * about to be run or has just completed. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @now: Pointer to the timestamp of the change, or NULL to use current time -+ * -+ * Caller must hold hwaccess_lock -+ */ -+void kbase_pm_metrics_update(struct kbase_device *kbdev, -+ ktime_t *now); ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ spin_lock(&kbdev->hwaccess_lock); ++ spin_lock(&kbdev->mmu_mask_change); ++ /* We're about to flush out the IRQs and their bottom half's */ ++ kbdev->irq_reset_flush = true; + -+/** -+ * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU -+ * If the GPU does not have coherency this is a no-op -+ * @kbdev: Device pointer -+ * -+ * This function should be called after L2 power up. -+ */ ++ /* Disable IRQ to avoid IRQ handlers to kick in after releasing the ++ * spinlock; this also clears any outstanding interrupts */ ++ kbase_pm_disable_interrupts_nolock(kbdev); + -+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev); ++ spin_unlock(&kbdev->mmu_mask_change); ++ spin_unlock(&kbdev->hwaccess_lock); ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + -+/** -+ * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU -+ * If the GPU does not have coherency this is a no-op -+ * @kbdev: Device pointer -+ * -+ * This function should be called before L2 power off. -+ */ -+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev); ++ /* Ensure that any IRQ handlers have finished ++ * Must be done without any locks IRQ handlers will take */ ++ kbase_synchronize_irqs(kbdev); + -+#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c -new file mode 100644 -index 000000000..024248ca7 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c -@@ -0,0 +1,401 @@ -+/* -+ * -+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ /* Flush out any in-flight work items */ ++ kbase_flush_mmu_wqs(kbdev); + ++ /* The flush has completed so reset the active indicator */ ++ kbdev->irq_reset_flush = false; + ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) { ++ /* Ensure that L2 is not transitioning when we send the reset ++ * command */ ++ while (--max_loops && kbase_pm_get_trans_cores(kbdev, ++ KBASE_PM_CORE_L2)) ++ ; + ++ WARN(!max_loops, "L2 power transition timed out while trying to reset\n"); ++ } + ++ mutex_lock(&kbdev->pm.lock); ++ /* We hold the pm lock, so there ought to be a current policy */ ++ KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy); + -+/* -+ * Metrics for power management -+ */ ++ /* All slot have been soft-stopped and we've waited ++ * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we ++ * assume that anything that is still left on the GPU is stuck there and ++ * we'll kill it when we reset the GPU */ + -+#include -+#include -+#include -+#include ++ if (!silent) ++ dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", ++ RESET_TIMEOUT); + -+/* When VSync is being hit aim for utilisation between 70-90% */ -+#define KBASE_PM_VSYNC_MIN_UTILISATION 70 -+#define KBASE_PM_VSYNC_MAX_UTILISATION 90 -+/* Otherwise aim for 10-40% */ -+#define KBASE_PM_NO_VSYNC_MIN_UTILISATION 10 -+#define KBASE_PM_NO_VSYNC_MAX_UTILISATION 40 ++ /* Output the state of some interesting registers to help in the ++ * debugging of GPU resets */ ++ if (!silent) ++ kbase_debug_dump_registers(kbdev); + -+/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns -+ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly -+ * under 11s. Exceeding this will cause overflow */ -+#define KBASE_PM_TIME_SHIFT 8 ++ /* Complete any jobs that were still on the GPU */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->protected_mode = false; ++ kbase_backend_reset(kbdev, &end_timestamp); ++ kbase_pm_metrics_update(kbdev, NULL); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+/* Maximum time between sampling of utilization data, without resetting the -+ * counters. */ -+#define MALI_UTILIZATION_MAX_PERIOD 100000 /* ns = 100ms */ ++ /* Reset the GPU */ ++ kbase_pm_init_hw(kbdev, 0); + -+#ifdef CONFIG_MALI_MIDGARD_DVFS -+static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) -+{ -+ unsigned long flags; -+ struct kbasep_pm_metrics_data *metrics; ++ mutex_unlock(&kbdev->pm.lock); + -+ KBASE_DEBUG_ASSERT(timer != NULL); ++ mutex_lock(&js_devdata->runpool_mutex); + -+ metrics = container_of(timer, struct kbasep_pm_metrics_data, timer); -+ kbase_pm_get_dvfs_action(metrics->kbdev); ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_ctx_sched_restore_all_as(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); + -+ spin_lock_irqsave(&metrics->lock, flags); ++ kbase_pm_enable_interrupts(kbdev); + -+ if (metrics->timer_active) -+ hrtimer_start(timer, -+ HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period), -+ HRTIMER_MODE_REL); ++ atomic_set(&kbdev->hwaccess.backend.reset_gpu, ++ KBASE_RESET_GPU_NOT_PENDING); + -+ spin_unlock_irqrestore(&metrics->lock, flags); ++ kbase_disjoint_state_down(kbdev); + -+ return HRTIMER_NORESTART; -+} -+#endif /* CONFIG_MALI_MIDGARD_DVFS */ ++ wake_up(&kbdev->hwaccess.backend.reset_wait); ++ if (!silent) ++ dev_err(kbdev->dev, "Reset complete"); + -+int kbasep_pm_metrics_init(struct kbase_device *kbdev) -+{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending) ++ try_schedule = true; + -+ kbdev->pm.backend.metrics.kbdev = kbdev; ++ mutex_unlock(&js_devdata->runpool_mutex); + -+ kbdev->pm.backend.metrics.time_period_start = ktime_get(); -+ kbdev->pm.backend.metrics.time_busy = 0; -+ kbdev->pm.backend.metrics.time_idle = 0; -+ kbdev->pm.backend.metrics.prev_busy = 0; -+ kbdev->pm.backend.metrics.prev_idle = 0; -+ kbdev->pm.backend.metrics.gpu_active = false; -+ kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; -+ kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; -+ kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; -+ kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; -+ kbdev->pm.backend.metrics.busy_cl[0] = 0; -+ kbdev->pm.backend.metrics.busy_cl[1] = 0; -+ kbdev->pm.backend.metrics.busy_gl = 0; ++ mutex_lock(&kbdev->pm.lock); + -+ spin_lock_init(&kbdev->pm.backend.metrics.lock); ++ /* Find out what cores are required now */ ++ kbase_pm_update_cores_state(kbdev); + -+#ifdef CONFIG_MALI_MIDGARD_DVFS -+ kbdev->pm.backend.metrics.timer_active = true; -+ hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC, -+ HRTIMER_MODE_REL); -+ kbdev->pm.backend.metrics.timer.function = dvfs_callback; ++ /* Synchronously request and wait for those cores, because if ++ * instrumentation is enabled it would need them immediately. */ ++ kbase_pm_check_transitions_sync(kbdev); + -+ hrtimer_start(&kbdev->pm.backend.metrics.timer, -+ HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period), -+ HRTIMER_MODE_REL); -+#endif /* CONFIG_MALI_MIDGARD_DVFS */ ++ mutex_unlock(&kbdev->pm.lock); + -+ return 0; -+} ++ /* Try submitting some jobs to restart processing */ ++ if (try_schedule) { ++ KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, ++ 0); ++ kbase_js_sched_all(kbdev); ++ } + -+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init); ++ /* Process any pending slot updates */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_backend_slot_update(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+void kbasep_pm_metrics_term(struct kbase_device *kbdev) ++ kbase_pm_context_idle(kbdev); ++ ++ /* Release vinstr */ ++ kbase_vinstr_resume(kbdev->vinstr_ctx); ++ ++ KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0); ++} ++ ++static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) +{ -+#ifdef CONFIG_MALI_MIDGARD_DVFS -+ unsigned long flags; ++ struct kbase_device *kbdev = container_of(timer, struct kbase_device, ++ hwaccess.backend.reset_timer); + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kbdev); + -+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); -+ kbdev->pm.backend.metrics.timer_active = false; -+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); ++ /* Reset still pending? */ ++ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, ++ KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) == ++ KBASE_RESET_GPU_COMMITTED) ++ queue_work(kbdev->hwaccess.backend.reset_workq, ++ &kbdev->hwaccess.backend.reset_work); + -+ hrtimer_cancel(&kbdev->pm.backend.metrics.timer); -+#endif /* CONFIG_MALI_MIDGARD_DVFS */ ++ return HRTIMER_NORESTART; +} + -+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term); -+ -+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this -+ * function ++/* ++ * If all jobs are evicted from the GPU then we can reset the GPU ++ * immediately instead of waiting for the timeout to elapse + */ -+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, -+ ktime_t now) -+{ -+ ktime_t diff; + -+ lockdep_assert_held(&kbdev->pm.backend.metrics.lock); ++static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) ++{ ++ int i; ++ int pending_jobs = 0; + -+ diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); -+ if (ktime_to_ns(diff) < 0) -+ return; ++ KBASE_DEBUG_ASSERT(kbdev); + -+ if (kbdev->pm.backend.metrics.gpu_active) { -+ u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); ++ /* Count the number of jobs */ ++ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) ++ pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i); + -+ kbdev->pm.backend.metrics.time_busy += ns_time; -+ if (kbdev->pm.backend.metrics.active_cl_ctx[0]) -+ kbdev->pm.backend.metrics.busy_cl[0] += ns_time; -+ if (kbdev->pm.backend.metrics.active_cl_ctx[1]) -+ kbdev->pm.backend.metrics.busy_cl[1] += ns_time; -+ if (kbdev->pm.backend.metrics.active_gl_ctx[0]) -+ kbdev->pm.backend.metrics.busy_gl += ns_time; -+ if (kbdev->pm.backend.metrics.active_gl_ctx[1]) -+ kbdev->pm.backend.metrics.busy_gl += ns_time; -+ } else { -+ kbdev->pm.backend.metrics.time_idle += (u32) (ktime_to_ns(diff) -+ >> KBASE_PM_TIME_SHIFT); ++ if (pending_jobs > 0) { ++ /* There are still jobs on the GPU - wait */ ++ return; + } + -+ kbdev->pm.backend.metrics.time_period_start = now; -+} ++ /* To prevent getting incorrect registers when dumping failed job, ++ * skip early reset. ++ */ ++ if (kbdev->job_fault_debug != false) ++ return; + -+#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) -+/* Caller needs to hold kbdev->pm.backend.metrics.lock before calling this -+ * function. -+ */ -+static void kbase_pm_reset_dvfs_utilisation_unlocked(struct kbase_device *kbdev, -+ ktime_t now) -+{ -+ /* Store previous value */ -+ kbdev->pm.backend.metrics.prev_idle = -+ kbdev->pm.backend.metrics.time_idle; -+ kbdev->pm.backend.metrics.prev_busy = -+ kbdev->pm.backend.metrics.time_busy; ++ /* Check that the reset has been committed to (i.e. kbase_reset_gpu has ++ * been called), and that no other thread beat this thread to starting ++ * the reset */ ++ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, ++ KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) != ++ KBASE_RESET_GPU_COMMITTED) { ++ /* Reset has already occurred */ ++ return; ++ } + -+ /* Reset current values */ -+ kbdev->pm.backend.metrics.time_period_start = now; -+ kbdev->pm.backend.metrics.time_idle = 0; -+ kbdev->pm.backend.metrics.time_busy = 0; -+ kbdev->pm.backend.metrics.busy_cl[0] = 0; -+ kbdev->pm.backend.metrics.busy_cl[1] = 0; -+ kbdev->pm.backend.metrics.busy_gl = 0; ++ queue_work(kbdev->hwaccess.backend.reset_workq, ++ &kbdev->hwaccess.backend.reset_work); +} + -+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev) ++static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) +{ + unsigned long flags; ++ struct kbasep_js_device_data *js_devdata; + -+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); -+ kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, ktime_get()); -+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); ++ js_devdata = &kbdev->js_data; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbasep_try_reset_gpu_early_locked(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + -+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev, -+ unsigned long *total_out, unsigned long *busy_out) ++/** ++ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU ++ * @kbdev: kbase device ++ * ++ * This function just soft-stops all the slots to ensure that as many jobs as ++ * possible are saved. ++ * ++ * Return: ++ * The function returns a boolean which should be interpreted as follows: ++ * true - Prepared for reset, kbase_reset_gpu_locked should be called. ++ * false - Another thread is performing a reset, kbase_reset_gpu should ++ * not be called. ++ */ ++bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev) +{ -+ ktime_t now = ktime_get(); -+ unsigned long flags, busy, total; -+ -+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); -+ kbase_pm_get_dvfs_utilisation_calc(kbdev, now); ++ int i; + -+ busy = kbdev->pm.backend.metrics.time_busy; -+ total = busy + kbdev->pm.backend.metrics.time_idle; ++ KBASE_DEBUG_ASSERT(kbdev); + -+ /* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default -+ * 100ms) */ -+ if (total >= MALI_UTILIZATION_MAX_PERIOD) { -+ kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now); -+ } else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) { -+ total += kbdev->pm.backend.metrics.prev_idle + -+ kbdev->pm.backend.metrics.prev_busy; -+ busy += kbdev->pm.backend.metrics.prev_busy; ++ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, ++ KBASE_RESET_GPU_NOT_PENDING, ++ KBASE_RESET_GPU_PREPARED) != ++ KBASE_RESET_GPU_NOT_PENDING) { ++ /* Some other thread is already resetting the GPU */ ++ return false; + } + -+ *total_out = total; -+ *busy_out = busy; -+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); -+} -+#endif ++ kbase_disjoint_state_up(kbdev); + -+#ifdef CONFIG_MALI_MIDGARD_DVFS ++ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) ++ kbase_job_slot_softstop(kbdev, i, NULL); + -+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this -+ * function -+ */ -+int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev, -+ int *util_gl_share, -+ int util_cl_share[2], -+ ktime_t now) ++ return true; ++} ++ ++bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev) +{ -+ int utilisation; -+ int busy; ++ unsigned long flags; ++ bool ret; ++ struct kbasep_js_device_data *js_devdata; + -+ kbase_pm_get_dvfs_utilisation_calc(kbdev, now); ++ js_devdata = &kbdev->js_data; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ret = kbase_prepare_to_reset_gpu_locked(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ if (kbdev->pm.backend.metrics.time_idle + -+ kbdev->pm.backend.metrics.time_busy == 0) { -+ /* No data - so we return NOP */ -+ utilisation = -1; -+ if (util_gl_share) -+ *util_gl_share = -1; -+ if (util_cl_share) { -+ util_cl_share[0] = -1; -+ util_cl_share[1] = -1; -+ } -+ goto out; -+ } ++ return ret; ++} ++KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu); + -+ utilisation = (100 * kbdev->pm.backend.metrics.time_busy) / -+ (kbdev->pm.backend.metrics.time_idle + -+ kbdev->pm.backend.metrics.time_busy); ++/* ++ * This function should be called after kbase_prepare_to_reset_gpu if it ++ * returns true. It should never be called without a corresponding call to ++ * kbase_prepare_to_reset_gpu. ++ * ++ * After this function is called (or not called if kbase_prepare_to_reset_gpu ++ * returned false), the caller should wait for ++ * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset ++ * has completed. ++ */ ++void kbase_reset_gpu(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev); + -+ busy = kbdev->pm.backend.metrics.busy_gl + -+ kbdev->pm.backend.metrics.busy_cl[0] + -+ kbdev->pm.backend.metrics.busy_cl[1]; ++ /* Note this is an assert/atomic_set because it is a software issue for ++ * a race to be occuring here */ ++ KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == ++ KBASE_RESET_GPU_PREPARED); ++ atomic_set(&kbdev->hwaccess.backend.reset_gpu, ++ KBASE_RESET_GPU_COMMITTED); + -+ if (busy != 0) { -+ if (util_gl_share) -+ *util_gl_share = -+ (100 * kbdev->pm.backend.metrics.busy_gl) / -+ busy; -+ if (util_cl_share) { -+ util_cl_share[0] = -+ (100 * kbdev->pm.backend.metrics.busy_cl[0]) / -+ busy; -+ util_cl_share[1] = -+ (100 * kbdev->pm.backend.metrics.busy_cl[1]) / -+ busy; -+ } -+ } else { -+ if (util_gl_share) -+ *util_gl_share = -1; -+ if (util_cl_share) { -+ util_cl_share[0] = -1; -+ util_cl_share[1] = -1; -+ } -+ } ++ dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", ++ kbdev->reset_timeout_ms); + -+out: -+ return utilisation; ++ hrtimer_start(&kbdev->hwaccess.backend.reset_timer, ++ HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), ++ HRTIMER_MODE_REL); ++ ++ /* Try resetting early */ ++ kbasep_try_reset_gpu_early(kbdev); +} ++KBASE_EXPORT_TEST_API(kbase_reset_gpu); + -+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) ++void kbase_reset_gpu_locked(struct kbase_device *kbdev) +{ -+ unsigned long flags; -+ int utilisation, util_gl_share; -+ int util_cl_share[2]; -+ ktime_t now; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kbdev); + -+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); ++ /* Note this is an assert/atomic_set because it is a software issue for ++ * a race to be occuring here */ ++ KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == ++ KBASE_RESET_GPU_PREPARED); ++ atomic_set(&kbdev->hwaccess.backend.reset_gpu, ++ KBASE_RESET_GPU_COMMITTED); + -+ now = ktime_get(); ++ dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", ++ kbdev->reset_timeout_ms); ++ hrtimer_start(&kbdev->hwaccess.backend.reset_timer, ++ HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), ++ HRTIMER_MODE_REL); + -+ utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share, -+ util_cl_share, now); ++ /* Try resetting early */ ++ kbasep_try_reset_gpu_early_locked(kbdev); ++} + -+ if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 || -+ util_cl_share[1] < 0) { -+ utilisation = 0; -+ util_gl_share = 0; -+ util_cl_share[0] = 0; -+ util_cl_share[1] = 0; -+ goto out; ++void kbase_reset_gpu_silent(struct kbase_device *kbdev) ++{ ++ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, ++ KBASE_RESET_GPU_NOT_PENDING, ++ KBASE_RESET_GPU_SILENT) != ++ KBASE_RESET_GPU_NOT_PENDING) { ++ /* Some other thread is already resetting the GPU */ ++ return; + } + -+out: -+#ifdef CONFIG_MALI_MIDGARD_DVFS -+ kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, -+ util_cl_share); -+#endif /*CONFIG_MALI_MIDGARD_DVFS */ -+ -+ kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now); ++ kbase_disjoint_state_up(kbdev); + -+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); ++ queue_work(kbdev->hwaccess.backend.reset_workq, ++ &kbdev->hwaccess.backend.reset_work); +} + -+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev) ++bool kbase_reset_gpu_active(struct kbase_device *kbdev) +{ -+ bool isactive; -+ unsigned long flags; ++ if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == ++ KBASE_RESET_GPU_NOT_PENDING) ++ return false; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ return true; ++} ++#endif /* KBASE_GPU_RESET_EN */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h +new file mode 100644 +index 000000000..1f382b3c1 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h +@@ -0,0 +1,164 @@ ++/* ++ * ++ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); -+ isactive = kbdev->pm.backend.metrics.timer_active; -+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + -+ return isactive; -+} -+KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active); + -+#endif /* CONFIG_MALI_MIDGARD_DVFS */ + -+/** -+ * kbase_pm_metrics_active_calc - Update PM active counts based on currently -+ * running atoms -+ * @kbdev: Device pointer -+ * -+ * The caller must hold kbdev->pm.backend.metrics.lock ++ ++/* ++ * Job Manager backend-specific low-level APIs. + */ -+static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) -+{ -+ int js; + -+ lockdep_assert_held(&kbdev->pm.backend.metrics.lock); ++#ifndef _KBASE_JM_HWACCESS_H_ ++#define _KBASE_JM_HWACCESS_H_ + -+ kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; -+ kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; -+ kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; -+ kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; -+ kbdev->pm.backend.metrics.gpu_active = false; ++#include ++#include ++#include + -+ for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { -+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); ++#include + -+ /* Head atom may have just completed, so if it isn't running -+ * then try the next atom */ -+ if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) -+ katom = kbase_gpu_inspect(kbdev, js, 1); ++/** ++ * kbase_job_submit_nolock() - Submit a job to a certain job-slot ++ * @kbdev: Device pointer ++ * @katom: Atom to submit ++ * @js: Job slot to submit on ++ * ++ * The caller must check kbasep_jm_is_submit_slots_free() != false before ++ * calling this. ++ * ++ * The following locking conditions are made on the caller: ++ * - it must hold the hwaccess_lock ++ */ ++void kbase_job_submit_nolock(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom, int js); + -+ if (katom && katom->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_SUBMITTED) { -+ if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { -+ int device_nr = (katom->core_req & -+ BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) -+ ? katom->device_nr : 0; -+ if (!WARN_ON(device_nr >= 2)) -+ kbdev->pm.backend.metrics. -+ active_cl_ctx[device_nr] = 1; -+ } else { -+ /* Slot 2 should not be running non-compute -+ * atoms */ -+ if (!WARN_ON(js >= 2)) -+ kbdev->pm.backend.metrics. -+ active_gl_ctx[js] = 1; -+ } -+ kbdev->pm.backend.metrics.gpu_active = true; -+ } -+ } -+} ++/** ++ * kbase_job_done_slot() - Complete the head job on a particular job-slot ++ * @kbdev: Device pointer ++ * @s: Job slot ++ * @completion_code: Completion code of job reported by GPU ++ * @job_tail: Job tail address reported by GPU ++ * @end_timestamp: Timestamp of job completion ++ */ ++void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, ++ u64 job_tail, ktime_t *end_timestamp); + -+/* called when job is submitted to or removed from a GPU slot */ -+void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp) ++#ifdef CONFIG_GPU_TRACEPOINTS ++static inline char *kbasep_make_job_slot_string(int js, char *js_string, ++ size_t js_size) +{ -+ unsigned long flags; -+ ktime_t now; ++ snprintf(js_string, js_size, "job_slot_%i", js); ++ return js_string; ++} ++#endif + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++/** ++ * kbase_job_hw_submit() - Submit a job to the GPU ++ * @kbdev: Device pointer ++ * @katom: Atom to submit ++ * @js: Job slot to submit on ++ * ++ * The caller must check kbasep_jm_is_submit_slots_free() != false before ++ * calling this. ++ * ++ * The following locking conditions are made on the caller: ++ * - it must hold the hwaccess_lock ++ */ ++void kbase_job_hw_submit(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom, ++ int js); + -+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); ++/** ++ * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop ++ * on the specified atom ++ * @kbdev: Device pointer ++ * @js: Job slot to stop on ++ * @action: The action to perform, either JSn_COMMAND_HARD_STOP or ++ * JSn_COMMAND_SOFT_STOP ++ * @core_reqs: Core requirements of atom to stop ++ * @target_katom: Atom to stop ++ * ++ * The following locking conditions are made on the caller: ++ * - it must hold the hwaccess_lock ++ */ ++void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, ++ int js, ++ u32 action, ++ base_jd_core_req core_reqs, ++ struct kbase_jd_atom *target_katom); + -+ if (!timestamp) { -+ now = ktime_get(); -+ timestamp = &now; -+ } ++/** ++ * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job ++ * slot belonging to a given context. ++ * @kbdev: Device pointer ++ * @kctx: Context pointer. May be NULL ++ * @katom: Specific atom to stop. May be NULL ++ * @js: Job slot to hard stop ++ * @action: The action to perform, either JSn_COMMAND_HARD_STOP or ++ * JSn_COMMAND_SOFT_STOP ++ * ++ * If no context is provided then all jobs on the slot will be soft or hard ++ * stopped. ++ * ++ * If a katom is provided then only that specific atom will be stopped. In this ++ * case the kctx parameter is ignored. ++ * ++ * Jobs that are on the slot but are not yet on the GPU will be unpulled and ++ * returned to the job scheduler. ++ * ++ * Return: true if an atom was stopped, false otherwise ++ */ ++bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ int js, ++ struct kbase_jd_atom *katom, ++ u32 action); + -+ /* Track how long CL and/or GL jobs have been busy for */ -+ kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp); ++/** ++ * kbase_job_slot_init - Initialise job slot framework ++ * @kbdev: Device pointer ++ * ++ * Called on driver initialisation ++ * ++ * Return: 0 on success ++ */ ++int kbase_job_slot_init(struct kbase_device *kbdev); + -+ kbase_pm_metrics_active_calc(kbdev); ++/** ++ * kbase_job_slot_halt - Halt the job slot framework ++ * @kbdev: Device pointer ++ * ++ * Should prevent any further job slot processing ++ */ ++void kbase_job_slot_halt(struct kbase_device *kbdev); + -+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); -+} -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c ++/** ++ * kbase_job_slot_term - Terminate job slot framework ++ * @kbdev: Device pointer ++ * ++ * Called on driver termination ++ */ ++void kbase_job_slot_term(struct kbase_device *kbdev); ++ ++/** ++ * kbase_gpu_cacheclean - Cause a GPU cache clean & flush ++ * @kbdev: Device pointer ++ * ++ * Caller must not be in IRQ context ++ */ ++void kbase_gpu_cacheclean(struct kbase_device *kbdev); ++ ++#endif /* _KBASE_JM_HWACCESS_H_ */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c new file mode 100644 -index 000000000..075f020c6 +index 000000000..4b4541660 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c -@@ -0,0 +1,973 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c +@@ -0,0 +1,1952 @@ +/* + * -+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -345698,971 +346716,1950 @@ index 000000000..075f020c6 + + + ++ +/* -+ * Power policy API implementations ++ * Register-based HW access backend specific APIs + */ + +#include -+#include -+#include -+#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include +#include + -+static const struct kbase_pm_policy *const policy_list[] = { -+#ifdef CONFIG_MALI_NO_MALI -+ &kbase_pm_always_on_policy_ops, -+ &kbase_pm_demand_policy_ops, -+ &kbase_pm_coarse_demand_policy_ops, -+#if !MALI_CUSTOMER_RELEASE -+ &kbase_pm_demand_always_powered_policy_ops, -+ &kbase_pm_fast_start_policy_ops, -+#endif -+#else /* CONFIG_MALI_NO_MALI */ -+#if !PLATFORM_POWER_DOWN_ONLY -+ &kbase_pm_demand_policy_ops, -+#endif /* !PLATFORM_POWER_DOWN_ONLY */ -+ &kbase_pm_coarse_demand_policy_ops, -+ &kbase_pm_always_on_policy_ops, -+#if !MALI_CUSTOMER_RELEASE -+#if !PLATFORM_POWER_DOWN_ONLY -+ &kbase_pm_demand_always_powered_policy_ops, -+ &kbase_pm_fast_start_policy_ops, -+#endif /* !PLATFORM_POWER_DOWN_ONLY */ -+#endif -+#endif /* CONFIG_MALI_NO_MALI */ -+}; ++/* Return whether the specified ringbuffer is empty. HW access lock must be ++ * held */ ++#define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx) ++/* Return number of atoms currently in the specified ringbuffer. HW access lock ++ * must be held */ ++#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx) + -+/* The number of policies available in the system. -+ * This is derived from the number of functions listed in policy_get_functions. ++static void kbase_gpu_release_atom(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom, ++ ktime_t *end_timestamp); ++ ++/** ++ * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer ++ * @kbdev: Device pointer ++ * @katom: Atom to enqueue ++ * ++ * Context: Caller must hold the HW access lock + */ -+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list)) ++static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom) ++{ ++ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr]; + ++ WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE); + -+/* Function IDs for looking up Timeline Trace codes in -+ * kbase_pm_change_state_trace_code */ -+enum kbase_pm_func_id { -+ KBASE_PM_FUNC_ID_REQUEST_CORES_START, -+ KBASE_PM_FUNC_ID_REQUEST_CORES_END, -+ KBASE_PM_FUNC_ID_RELEASE_CORES_START, -+ KBASE_PM_FUNC_ID_RELEASE_CORES_END, -+ /* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither -+ * expect to hit it nor tend to hit it very much anyway. We can detect -+ * whether we need more instrumentation by a difference between -+ * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* Must be the last */ -+ KBASE_PM_FUNC_ID_COUNT -+}; ++ rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom; ++ rb->write_idx++; + ++ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED; ++} + -+/* State changes during request/unrequest/release-ing cores */ -+enum { -+ KBASE_PM_CHANGE_STATE_SHADER = (1u << 0), -+ KBASE_PM_CHANGE_STATE_TILER = (1u << 1), ++/** ++ * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once ++ * it has been completed ++ * @kbdev: Device pointer ++ * @js: Job slot to remove atom from ++ * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in ++ * which case current time will be used. ++ * ++ * Context: Caller must hold the HW access lock ++ * ++ * Return: Atom removed from ringbuffer ++ */ ++static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, ++ int js, ++ ktime_t *end_timestamp) ++{ ++ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; ++ struct kbase_jd_atom *katom; + -+ /* These two must be last */ -+ KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER | -+ KBASE_PM_CHANGE_STATE_SHADER), -+ KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1 -+}; -+typedef u32 kbase_pm_change_state; ++ if (SLOT_RB_EMPTY(rb)) { ++ WARN(1, "GPU ringbuffer unexpectedly empty\n"); ++ return NULL; ++ } + ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+#ifdef CONFIG_MALI_TRACE_TIMELINE -+/* Timeline Trace code lookups for each function */ -+static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT] -+ [KBASE_PM_CHANGE_STATE_COUNT] = { -+ /* kbase_pm_request_cores */ -+ [KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0, -+ [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] = -+ SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, -+ [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] = -+ SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, -+ [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER | -+ KBASE_PM_CHANGE_STATE_TILER] = -+ SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, ++ katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom; + -+ [KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0, -+ [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] = -+ SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END, -+ [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] = -+ SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END, -+ [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER | -+ KBASE_PM_CHANGE_STATE_TILER] = -+ SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END, ++ kbase_gpu_release_atom(kbdev, katom, end_timestamp); + -+ /* kbase_pm_release_cores */ -+ [KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0, -+ [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] = -+ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, -+ [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] = -+ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, -+ [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER | -+ KBASE_PM_CHANGE_STATE_TILER] = -+ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, ++ rb->read_idx++; + -+ [KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0, -+ [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] = -+ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END, -+ [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] = -+ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END, -+ [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER | -+ KBASE_PM_CHANGE_STATE_TILER] = -+ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END -+}; ++ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB; + -+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev, -+ enum kbase_pm_func_id func_id, -+ kbase_pm_change_state state) ++ kbase_js_debug_log_current_affinities(kbdev); ++ ++ return katom; ++} ++ ++struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, ++ int idx) +{ -+ int trace_code; ++ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; + -+ KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT); -+ KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) == -+ state); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ trace_code = kbase_pm_change_state_trace_code[func_id][state]; -+ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code); ++ if ((SLOT_RB_ENTRIES(rb) - 1) < idx) ++ return NULL; /* idx out of range */ ++ ++ return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom; +} + -+#else /* CONFIG_MALI_TRACE_TIMELINE */ -+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev, -+ enum kbase_pm_func_id func_id, kbase_pm_change_state state) ++struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev, ++ int js) +{ ++ return kbase_gpu_inspect(kbdev, js, 0); +} + -+#endif /* CONFIG_MALI_TRACE_TIMELINE */ ++struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, ++ int js) ++{ ++ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; ++ ++ if (SLOT_RB_EMPTY(rb)) ++ return NULL; ++ ++ return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom; ++} + +/** -+ * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any -+ * requested shader cores -+ * @kbdev: Device pointer ++ * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently ++ * on the GPU ++ * @kbdev: Device pointer ++ * @js: Job slot to inspect ++ * ++ * Return: true if there are atoms on the GPU for slot js, ++ * false otherwise + */ -+static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev) ++static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js) +{ -+ u64 prev_shader_state = kbdev->pm.backend.desired_shader_state; -+ u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state; ++ int i; + + lockdep_assert_held(&kbdev->hwaccess_lock); + -+ kbdev->pm.backend.desired_shader_state &= -+ ~kbdev->pm.backend.shader_poweroff_pending; -+ kbdev->pm.backend.desired_tiler_state &= -+ ~kbdev->pm.backend.tiler_poweroff_pending; -+ -+ kbdev->pm.backend.shader_poweroff_pending = 0; -+ kbdev->pm.backend.tiler_poweroff_pending = 0; -+ -+ if (prev_shader_state != kbdev->pm.backend.desired_shader_state || -+ prev_tiler_state != -+ kbdev->pm.backend.desired_tiler_state || -+ kbdev->pm.backend.ca_in_transition) { -+ bool cores_are_available; -+ -+ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, -+ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START); -+ cores_are_available = kbase_pm_check_transitions_nolock(kbdev); -+ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, -+ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END); ++ for (i = 0; i < SLOT_RB_SIZE; i++) { ++ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + -+ /* Don't need 'cores_are_available', -+ * because we don't return anything */ -+ CSTD_UNUSED(cores_are_available); ++ if (!katom) ++ return false; ++ if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED || ++ katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY) ++ return true; + } ++ ++ return false; +} + -+static enum hrtimer_restart -+kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) ++/** ++ * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms ++ * currently on the GPU ++ * @kbdev: Device pointer ++ * ++ * Return: true if there are any atoms on the GPU, false otherwise ++ */ ++static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev; -+ unsigned long flags; -+ -+ kbdev = container_of(timer, struct kbase_device, -+ pm.backend.gpu_poweroff_timer); -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ -+ /* It is safe for this call to do nothing if the work item is already -+ * queued. The worker function will read the must up-to-date state of -+ * kbdev->pm.backend.gpu_poweroff_pending under lock. -+ * -+ * If a state change occurs while the worker function is processing, -+ * this call will succeed as a work item can be requeued once it has -+ * started processing. -+ */ -+ if (kbdev->pm.backend.gpu_poweroff_pending) -+ queue_work(kbdev->pm.backend.gpu_poweroff_wq, -+ &kbdev->pm.backend.gpu_poweroff_work); ++ int js; ++ int i; + -+ if (kbdev->pm.backend.shader_poweroff_pending || -+ kbdev->pm.backend.tiler_poweroff_pending) { -+ kbdev->pm.backend.shader_poweroff_pending_time--; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ KBASE_DEBUG_ASSERT( -+ kbdev->pm.backend.shader_poweroff_pending_time -+ >= 0); ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ for (i = 0; i < SLOT_RB_SIZE; i++) { ++ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + -+ if (!kbdev->pm.backend.shader_poweroff_pending_time) -+ kbasep_pm_do_poweroff_cores(kbdev); ++ if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) ++ return true; ++ } + } ++ return false; ++} + -+ if (kbdev->pm.backend.poweroff_timer_needed) { -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js) ++{ ++ int nr = 0; ++ int i; + -+ hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ return HRTIMER_RESTART; -+ } ++ for (i = 0; i < SLOT_RB_SIZE; i++) { ++ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + -+ kbdev->pm.backend.poweroff_timer_running = false; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (katom && (katom->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_SUBMITTED)) ++ nr++; ++ } + -+ return HRTIMER_NORESTART; ++ return nr; +} + -+static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data) ++int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js) +{ -+ unsigned long flags; -+ struct kbase_device *kbdev; -+ bool do_poweroff = false; -+ -+ kbdev = container_of(data, struct kbase_device, -+ pm.backend.gpu_poweroff_work); ++ int nr = 0; ++ int i; + -+ mutex_lock(&kbdev->pm.lock); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (kbdev->pm.backend.gpu_poweroff_pending == 0) { -+ mutex_unlock(&kbdev->pm.lock); -+ return; ++ for (i = 0; i < SLOT_RB_SIZE; i++) { ++ if (kbase_gpu_inspect(kbdev, js, i)) ++ nr++; + } + -+ kbdev->pm.backend.gpu_poweroff_pending--; -+ -+ if (kbdev->pm.backend.gpu_poweroff_pending > 0) { -+ mutex_unlock(&kbdev->pm.lock); -+ return; -+ } ++ return nr; ++} + -+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0); ++static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js, ++ enum kbase_atom_gpu_rb_state min_rb_state) ++{ ++ int nr = 0; ++ int i; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* Only power off the GPU if a request is still pending */ -+ if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev)) -+ do_poweroff = true; ++ for (i = 0; i < SLOT_RB_SIZE; i++) { ++ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (katom && (katom->gpu_rb_state >= min_rb_state)) ++ nr++; ++ } + -+ if (do_poweroff) { -+ kbdev->pm.backend.poweroff_timer_needed = false; -+ hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer); -+ kbdev->pm.backend.poweroff_timer_running = false; ++ return nr; ++} + -+ /* Power off the GPU */ -+ kbase_pm_do_poweroff(kbdev, false); -+ } ++/** ++ * check_secure_atom - Check if the given atom is in the given secure state and ++ * has a ringbuffer state of at least ++ * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION ++ * @katom: Atom pointer ++ * @secure: Desired secure state ++ * ++ * Return: true if atom is in the given state, false otherwise ++ */ ++static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure) ++{ ++ if (katom->gpu_rb_state >= ++ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && ++ ((kbase_jd_katom_is_protected(katom) && secure) || ++ (!kbase_jd_katom_is_protected(katom) && !secure))) ++ return true; + -+ mutex_unlock(&kbdev->pm.lock); ++ return false; +} + -+int kbase_pm_policy_init(struct kbase_device *kbdev) ++/** ++ * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given ++ * secure state in the ringbuffers of at least ++ * state ++ * KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE ++ * @kbdev: Device pointer ++ * @secure: Desired secure state ++ * ++ * Return: true if any atoms are in the given state, false otherwise ++ */ ++static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, ++ bool secure) +{ -+ struct workqueue_struct *wq; ++ int js, i; + -+ wq = alloc_workqueue("kbase_pm_do_poweroff", -+ WQ_HIGHPRI | WQ_UNBOUND, 1); -+ if (!wq) -+ return -ENOMEM; ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ for (i = 0; i < SLOT_RB_SIZE; i++) { ++ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, ++ js, i); + -+ kbdev->pm.backend.gpu_poweroff_wq = wq; -+ INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work, -+ kbasep_pm_do_gpu_poweroff_wq); -+ hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer, -+ CLOCK_MONOTONIC, HRTIMER_MODE_REL); -+ kbdev->pm.backend.gpu_poweroff_timer.function = -+ kbasep_pm_do_gpu_poweroff_callback; -+ kbdev->pm.backend.pm_current_policy = policy_list[0]; -+ kbdev->pm.backend.pm_current_policy->init(kbdev); -+ kbdev->pm.gpu_poweroff_time = -+ HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS); -+ kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER; -+ kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU; ++ if (katom) { ++ if (check_secure_atom(katom, secure)) ++ return true; ++ } ++ } ++ } + -+ return 0; ++ return false; +} + -+void kbase_pm_policy_term(struct kbase_device *kbdev) ++int kbase_backend_slot_free(struct kbase_device *kbdev, int js) +{ -+ kbdev->pm.backend.pm_current_policy->term(kbdev); -+ destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq); ++ if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) != ++ KBASE_RESET_GPU_NOT_PENDING) { ++ /* The GPU is being reset - so prevent submission */ ++ return 0; ++ } ++ ++ return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js); +} + -+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev) ++ ++static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom); ++ ++static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev, ++ int js, ++ struct kbase_jd_atom *katom) +{ -+ unsigned long flags; ++ /* The most recently checked affinity. Having this at this scope allows ++ * us to guarantee that we've checked the affinity in this function ++ * call. ++ */ ++ u64 recently_chosen_affinity = 0; ++ bool chosen_affinity = false; ++ bool retry; + -+ lockdep_assert_held(&kbdev->pm.lock); ++ do { ++ retry = false; + -+ kbdev->pm.backend.poweroff_timer_needed = false; -+ hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->pm.backend.poweroff_timer_running = false; ++ /* NOTE: The following uses a number of FALLTHROUGHs to optimize ++ * the calls to this function. Ending of the function is ++ * indicated by BREAK OUT */ ++ switch (katom->coreref_state) { ++ /* State when job is first attempted to be run */ ++ case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: ++ KBASE_DEBUG_ASSERT(katom->affinity == 0); + -+ /* If wq is already running but is held off by pm.lock, make sure it has -+ * no effect */ -+ kbdev->pm.backend.gpu_poweroff_pending = 0; ++ /* Compute affinity */ ++ if (false == kbase_js_choose_affinity( ++ &recently_chosen_affinity, kbdev, katom, ++ js)) { ++ /* No cores are currently available */ ++ /* *** BREAK OUT: No state transition *** */ ++ break; ++ } + -+ kbdev->pm.backend.shader_poweroff_pending = 0; -+ kbdev->pm.backend.tiler_poweroff_pending = 0; -+ kbdev->pm.backend.shader_poweroff_pending_time = 0; ++ chosen_affinity = true; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} ++ /* Request the cores */ ++ kbase_pm_request_cores(kbdev, ++ katom->core_req & BASE_JD_REQ_T, ++ recently_chosen_affinity); + -+void kbase_pm_update_active(struct kbase_device *kbdev) -+{ -+ struct kbase_pm_device_data *pm = &kbdev->pm; -+ struct kbase_pm_backend_data *backend = &pm->backend; -+ unsigned long flags; -+ bool active; ++ katom->affinity = recently_chosen_affinity; + -+ lockdep_assert_held(&pm->lock); ++ /* Proceed to next state */ ++ katom->coreref_state = ++ KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES; + -+ /* pm_current_policy will never be NULL while pm.lock is held */ -+ KBASE_DEBUG_ASSERT(backend->pm_current_policy); ++ /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: ++ { ++ enum kbase_pm_cores_ready cores_ready; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ KBASE_DEBUG_ASSERT(katom->affinity != 0 || ++ (katom->core_req & BASE_JD_REQ_T)); + -+ active = backend->pm_current_policy->get_core_active(kbdev); ++ cores_ready = kbase_pm_register_inuse_cores( ++ kbdev, ++ katom->core_req & BASE_JD_REQ_T, ++ katom->affinity); ++ if (cores_ready == KBASE_NEW_AFFINITY) { ++ /* Affinity no longer valid - return to ++ * previous state */ ++ kbasep_js_job_check_deref_cores(kbdev, ++ katom); ++ KBASE_TRACE_ADD_SLOT_INFO(kbdev, ++ JS_CORE_REF_REGISTER_INUSE_FAILED, ++ katom->kctx, katom, ++ katom->jc, js, ++ (u32) katom->affinity); ++ /* *** BREAK OUT: Return to previous ++ * state, retry *** */ ++ retry = true; ++ break; ++ } ++ if (cores_ready == KBASE_CORES_NOT_READY) { ++ /* Stay in this state and return, to ++ * retry at this state later */ ++ KBASE_TRACE_ADD_SLOT_INFO(kbdev, ++ JS_CORE_REF_REGISTER_INUSE_FAILED, ++ katom->kctx, katom, ++ katom->jc, js, ++ (u32) katom->affinity); ++ /* *** BREAK OUT: No state transition ++ * *** */ ++ break; ++ } ++ /* Proceed to next state */ ++ katom->coreref_state = ++ KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY; ++ } + -+ if (active) { -+ if (backend->gpu_poweroff_pending) { -+ /* Cancel any pending power off request */ -+ backend->gpu_poweroff_pending = 0; ++ /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY: ++ KBASE_DEBUG_ASSERT(katom->affinity != 0 || ++ (katom->core_req & BASE_JD_REQ_T)); + -+ /* If a request was pending then the GPU was still -+ * powered, so no need to continue */ -+ if (!kbdev->poweroff_pending) { -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, -+ flags); -+ return; ++ /* Optimize out choosing the affinity twice in the same ++ * function call */ ++ if (chosen_affinity == false) { ++ /* See if the affinity changed since a previous ++ * call. */ ++ if (false == kbase_js_choose_affinity( ++ &recently_chosen_affinity, ++ kbdev, katom, js)) { ++ /* No cores are currently available */ ++ kbasep_js_job_check_deref_cores(kbdev, ++ katom); ++ KBASE_TRACE_ADD_SLOT_INFO(kbdev, ++ JS_CORE_REF_REQUEST_ON_RECHECK_FAILED, ++ katom->kctx, katom, ++ katom->jc, js, ++ (u32) recently_chosen_affinity); ++ /* *** BREAK OUT: Transition to lower ++ * state *** */ ++ break; ++ } ++ chosen_affinity = true; + } -+ } + -+ if (!backend->poweroff_timer_running && !backend->gpu_powered && -+ (pm->poweroff_gpu_ticks || -+ pm->poweroff_shader_ticks)) { -+ backend->poweroff_timer_needed = true; -+ backend->poweroff_timer_running = true; -+ hrtimer_start(&backend->gpu_poweroff_timer, -+ pm->gpu_poweroff_time, -+ HRTIMER_MODE_REL); -+ } ++ /* Now see if this requires a different set of cores */ ++ if (recently_chosen_affinity != katom->affinity) { ++ enum kbase_pm_cores_ready cores_ready; + -+ /* Power on the GPU and any cores requested by the policy */ -+ if (pm->backend.poweroff_wait_in_progress) { -+ pm->backend.poweron_required = true; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } else { -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ kbase_pm_do_poweron(kbdev, false); -+ } -+ } else { -+ /* It is an error for the power policy to power off the GPU -+ * when there are contexts active */ -+ KBASE_DEBUG_ASSERT(pm->active_count == 0); ++ kbase_pm_request_cores(kbdev, ++ katom->core_req & BASE_JD_REQ_T, ++ recently_chosen_affinity); + -+ if (backend->shader_poweroff_pending || -+ backend->tiler_poweroff_pending) { -+ backend->shader_poweroff_pending = 0; -+ backend->tiler_poweroff_pending = 0; -+ backend->shader_poweroff_pending_time = 0; -+ } ++ /* Register new cores whilst we still hold the ++ * old ones, to minimize power transitions */ ++ cores_ready = ++ kbase_pm_register_inuse_cores(kbdev, ++ katom->core_req & BASE_JD_REQ_T, ++ recently_chosen_affinity); ++ kbasep_js_job_check_deref_cores(kbdev, katom); + -+ /* Request power off */ -+ if (pm->backend.gpu_powered) { -+ if (pm->poweroff_gpu_ticks) { -+ backend->gpu_poweroff_pending = -+ pm->poweroff_gpu_ticks; -+ backend->poweroff_timer_needed = true; -+ if (!backend->poweroff_timer_running) { -+ /* Start timer if not running (eg if -+ * power policy has been changed from -+ * always_on to something else). This -+ * will ensure the GPU is actually -+ * powered off */ -+ backend->poweroff_timer_running -+ = true; -+ hrtimer_start( -+ &backend->gpu_poweroff_timer, -+ pm->gpu_poweroff_time, -+ HRTIMER_MODE_REL); ++ /* Fixup the state that was reduced by ++ * deref_cores: */ ++ katom->coreref_state = ++ KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY; ++ katom->affinity = recently_chosen_affinity; ++ if (cores_ready == KBASE_NEW_AFFINITY) { ++ /* Affinity no longer valid - return to ++ * previous state */ ++ katom->coreref_state = ++ KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES; ++ ++ kbasep_js_job_check_deref_cores(kbdev, ++ katom); ++ ++ KBASE_TRACE_ADD_SLOT_INFO(kbdev, ++ JS_CORE_REF_REGISTER_INUSE_FAILED, ++ katom->kctx, katom, ++ katom->jc, js, ++ (u32) katom->affinity); ++ /* *** BREAK OUT: Return to previous ++ * state, retry *** */ ++ retry = true; ++ break; + } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, -+ flags); -+ } else { -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, -+ flags); ++ /* Now might be waiting for powerup again, with ++ * a new affinity */ ++ if (cores_ready == KBASE_CORES_NOT_READY) { ++ /* Return to previous state */ ++ katom->coreref_state = ++ KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES; ++ KBASE_TRACE_ADD_SLOT_INFO(kbdev, ++ JS_CORE_REF_REGISTER_ON_RECHECK_FAILED, ++ katom->kctx, katom, ++ katom->jc, js, ++ (u32) katom->affinity); ++ /* *** BREAK OUT: Transition to lower ++ * state *** */ ++ break; ++ } ++ } ++ /* Proceed to next state */ ++ katom->coreref_state = ++ KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS; + -+ /* Power off the GPU immediately */ -+ kbase_pm_do_poweroff(kbdev, false); ++ /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS: ++ KBASE_DEBUG_ASSERT(katom->affinity != 0 || ++ (katom->core_req & BASE_JD_REQ_T)); ++ KBASE_DEBUG_ASSERT(katom->affinity == ++ recently_chosen_affinity); ++ ++ /* Note: this is where the caller must've taken the ++ * hwaccess_lock */ ++ ++ /* Check for affinity violations - if there are any, ++ * then we just ask the caller to requeue and try again ++ * later */ ++ if (kbase_js_affinity_would_violate(kbdev, js, ++ katom->affinity) != false) { ++ /* Return to previous state */ ++ katom->coreref_state = ++ KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY; ++ /* *** BREAK OUT: Transition to lower state *** ++ */ ++ KBASE_TRACE_ADD_SLOT_INFO(kbdev, ++ JS_CORE_REF_AFFINITY_WOULD_VIOLATE, ++ katom->kctx, katom, katom->jc, js, ++ (u32) katom->affinity); ++ break; + } -+ } else { -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ /* No affinity violations would result, so the cores are ++ * ready */ ++ katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY; ++ /* *** BREAK OUT: Cores Ready *** */ ++ break; ++ ++ default: ++ KBASE_DEBUG_ASSERT_MSG(false, ++ "Unhandled kbase_atom_coreref_state %d", ++ katom->coreref_state); ++ break; + } -+ } ++ } while (retry != false); ++ ++ return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY); +} + -+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) ++static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom) +{ -+ u64 desired_bitmap; -+ u64 desired_tiler_bitmap; -+ bool cores_are_available; -+ bool do_poweroff = false; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(katom != NULL); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ switch (katom->coreref_state) { ++ case KBASE_ATOM_COREREF_STATE_READY: ++ /* State where atom was submitted to the HW - just proceed to ++ * power-down */ ++ KBASE_DEBUG_ASSERT(katom->affinity != 0 || ++ (katom->core_req & BASE_JD_REQ_T)); + -+ if (kbdev->pm.backend.pm_current_policy == NULL) -+ return; -+ if (kbdev->pm.backend.poweroff_wait_in_progress) -+ return; ++ /* fallthrough */ + -+ if (kbdev->protected_mode_transition && !kbdev->shader_needed_bitmap && -+ !kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt -+ && !kbdev->tiler_inuse_cnt) { -+ /* We are trying to change in/out of protected mode - force all -+ * cores off so that the L2 powers down */ -+ desired_bitmap = 0; -+ desired_tiler_bitmap = 0; -+ } else { -+ desired_bitmap = -+ kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev); -+ desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev); ++ case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY: ++ /* State where cores were registered */ ++ KBASE_DEBUG_ASSERT(katom->affinity != 0 || ++ (katom->core_req & BASE_JD_REQ_T)); ++ kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T, ++ katom->affinity); + -+ if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0) -+ desired_tiler_bitmap = 1; -+ else -+ desired_tiler_bitmap = 0; ++ break; + -+ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) { -+ /* Unless XAFFINITY is supported, enable core 0 if tiler -+ * required, regardless of core availability */ -+ if (kbdev->tiler_needed_cnt > 0 || -+ kbdev->tiler_inuse_cnt > 0) -+ desired_bitmap |= 1; -+ } ++ case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: ++ /* State where cores were requested, but not registered */ ++ KBASE_DEBUG_ASSERT(katom->affinity != 0 || ++ (katom->core_req & BASE_JD_REQ_T)); ++ kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T, ++ katom->affinity); ++ break; ++ ++ case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: ++ /* Initial state - nothing required */ ++ KBASE_DEBUG_ASSERT(katom->affinity == 0); ++ break; ++ ++ default: ++ KBASE_DEBUG_ASSERT_MSG(false, ++ "Unhandled coreref_state: %d", ++ katom->coreref_state); ++ break; + } + -+ if (kbdev->pm.backend.desired_shader_state != desired_bitmap) -+ KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u, -+ (u32)desired_bitmap); -+ /* Are any cores being powered on? */ -+ if (~kbdev->pm.backend.desired_shader_state & desired_bitmap || -+ ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap || -+ kbdev->pm.backend.ca_in_transition) { -+ /* Check if we are powering off any cores before updating shader -+ * state */ -+ if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap || -+ kbdev->pm.backend.desired_tiler_state & -+ ~desired_tiler_bitmap) { -+ /* Start timer to power off cores */ -+ kbdev->pm.backend.shader_poweroff_pending |= -+ (kbdev->pm.backend.desired_shader_state & -+ ~desired_bitmap); -+ kbdev->pm.backend.tiler_poweroff_pending |= -+ (kbdev->pm.backend.desired_tiler_state & -+ ~desired_tiler_bitmap); ++ katom->affinity = 0; ++ katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; ++} + -+ if (kbdev->pm.poweroff_shader_ticks && -+ !kbdev->protected_mode_transition) -+ kbdev->pm.backend.shader_poweroff_pending_time = -+ kbdev->pm.poweroff_shader_ticks; -+ else -+ do_poweroff = true; -+ } ++static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev, ++ base_jd_core_req core_req, u64 affinity, ++ enum kbase_atom_coreref_state coreref_state) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ kbdev->pm.backend.desired_shader_state = desired_bitmap; -+ kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap; ++ switch (coreref_state) { ++ case KBASE_ATOM_COREREF_STATE_READY: ++ /* State where atom was submitted to the HW - just proceed to ++ * power-down */ ++ KBASE_DEBUG_ASSERT(affinity != 0 || ++ (core_req & BASE_JD_REQ_T)); + -+ /* If any cores are being powered on, transition immediately */ -+ cores_are_available = kbase_pm_check_transitions_nolock(kbdev); -+ } else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap || -+ kbdev->pm.backend.desired_tiler_state & -+ ~desired_tiler_bitmap) { -+ /* Start timer to power off cores */ -+ kbdev->pm.backend.shader_poweroff_pending |= -+ (kbdev->pm.backend.desired_shader_state & -+ ~desired_bitmap); -+ kbdev->pm.backend.tiler_poweroff_pending |= -+ (kbdev->pm.backend.desired_tiler_state & -+ ~desired_tiler_bitmap); -+ if (kbdev->pm.poweroff_shader_ticks && -+ !kbdev->protected_mode_transition) -+ kbdev->pm.backend.shader_poweroff_pending_time = -+ kbdev->pm.poweroff_shader_ticks; -+ else -+ kbasep_pm_do_poweroff_cores(kbdev); -+ } else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 && -+ desired_tiler_bitmap != 0 && -+ kbdev->pm.backend.poweroff_timer_needed) { -+ /* If power policy is keeping cores on despite there being no -+ * active contexts then disable poweroff timer as it isn't -+ * required. -+ * Only reset poweroff_timer_needed if we're not in the middle -+ * of the power off callback */ -+ kbdev->pm.backend.poweroff_timer_needed = false; ++ /* fallthrough */ ++ ++ case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY: ++ /* State where cores were registered */ ++ KBASE_DEBUG_ASSERT(affinity != 0 || ++ (core_req & BASE_JD_REQ_T)); ++ kbase_pm_release_cores(kbdev, core_req & BASE_JD_REQ_T, ++ affinity); ++ ++ break; ++ ++ case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: ++ /* State where cores were requested, but not registered */ ++ KBASE_DEBUG_ASSERT(affinity != 0 || ++ (core_req & BASE_JD_REQ_T)); ++ kbase_pm_unrequest_cores(kbdev, core_req & BASE_JD_REQ_T, ++ affinity); ++ break; ++ ++ case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: ++ /* Initial state - nothing required */ ++ KBASE_DEBUG_ASSERT(affinity == 0); ++ break; ++ ++ default: ++ KBASE_DEBUG_ASSERT_MSG(false, ++ "Unhandled coreref_state: %d", ++ coreref_state); ++ break; + } ++} + -+ /* Ensure timer does not power off wanted cores and make sure to power -+ * off unwanted cores */ -+ if (kbdev->pm.backend.shader_poweroff_pending || -+ kbdev->pm.backend.tiler_poweroff_pending) { -+ kbdev->pm.backend.shader_poweroff_pending &= -+ ~(kbdev->pm.backend.desired_shader_state & -+ desired_bitmap); -+ kbdev->pm.backend.tiler_poweroff_pending &= -+ ~(kbdev->pm.backend.desired_tiler_state & -+ desired_tiler_bitmap); ++static void kbase_gpu_release_atom(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom, ++ ktime_t *end_timestamp) ++{ ++ struct kbase_context *kctx = katom->kctx; + -+ if (!kbdev->pm.backend.shader_poweroff_pending && -+ !kbdev->pm.backend.tiler_poweroff_pending) -+ kbdev->pm.backend.shader_poweroff_pending_time = 0; ++ switch (katom->gpu_rb_state) { ++ case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: ++ /* Should be impossible */ ++ WARN(1, "Attempting to release atom not in ringbuffer\n"); ++ break; ++ ++ case KBASE_ATOM_GPU_RB_SUBMITTED: ++ /* Inform power management at start/finish of atom so it can ++ * update its GPU utilisation metrics. Mark atom as not ++ * submitted beforehand. */ ++ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; ++ kbase_pm_metrics_update(kbdev, end_timestamp); ++ ++ if (katom->core_req & BASE_JD_REQ_PERMON) ++ kbase_pm_release_gpu_cycle_counter_nolock(kbdev); ++ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ ++ ++ KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom, ++ &kbdev->gpu_props.props.raw_props.js_features ++ [katom->slot_nr]); ++ KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]); ++ KBASE_TLSTREAM_TL_NRET_CTX_LPU(kctx, ++ &kbdev->gpu_props.props.raw_props.js_features ++ [katom->slot_nr]); ++ /* fallthrough */ ++ case KBASE_ATOM_GPU_RB_READY: ++ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_GPU_RB_WAITING_AFFINITY: ++ kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr, ++ katom->affinity); ++ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: ++ break; ++ ++ case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: ++ if (katom->protected_state.enter != ++ KBASE_ATOM_ENTER_PROTECTED_CHECK || ++ katom->protected_state.exit != ++ KBASE_ATOM_EXIT_PROTECTED_CHECK) ++ kbdev->protected_mode_transition = false; ++ ++ if (kbase_jd_katom_is_protected(katom) && ++ (katom->protected_state.enter == ++ KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) { ++ kbase_vinstr_resume(kbdev->vinstr_ctx); ++ ++ /* Go back to configured model for IPA */ ++ kbase_ipa_model_use_configured_locked(kbdev); ++ } ++ ++ ++ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: ++ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: ++ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_GPU_RB_RETURN_TO_JS: ++ break; + } + -+ /* Shader poweroff is deferred to the end of the function, to eliminate -+ * issues caused by the core availability policy recursing into this -+ * function */ -+ if (do_poweroff) -+ kbasep_pm_do_poweroff_cores(kbdev); ++ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED; ++ katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; ++} + -+ /* Don't need 'cores_are_available', because we don't return anything */ -+ CSTD_UNUSED(cores_are_available); ++static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom) ++{ ++ kbase_gpu_release_atom(kbdev, katom, NULL); ++ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS; +} + -+void kbase_pm_update_cores_state(struct kbase_device *kbdev) ++static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js) +{ -+ unsigned long flags; ++ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; ++ bool slot_busy[3]; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) ++ return true; ++ slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0, ++ KBASE_ATOM_GPU_RB_WAITING_AFFINITY); ++ slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1, ++ KBASE_ATOM_GPU_RB_WAITING_AFFINITY); ++ slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2, ++ KBASE_ATOM_GPU_RB_WAITING_AFFINITY); + -+ kbase_pm_update_cores_state_nolock(kbdev); ++ if ((js == 2 && !(slot_busy[0] || slot_busy[1])) || ++ (js != 2 && !slot_busy[2])) ++ return true; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */ ++ if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) || ++ kbase_gpu_atoms_submitted(kbdev, 1) || ++ backend->rmu_workaround_flag)) ++ return false; ++ ++ /* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */ ++ if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) || ++ !backend->rmu_workaround_flag)) ++ return false; ++ ++ backend->rmu_workaround_flag = !backend->rmu_workaround_flag; ++ ++ return true; +} + -+int kbase_pm_list_policies(const struct kbase_pm_policy * const **list) ++/** ++ * other_slots_busy - Determine if any job slots other than @js are currently ++ * running atoms ++ * @kbdev: Device pointer ++ * @js: Job slot ++ * ++ * Return: true if any slots other than @js are busy, false otherwise ++ */ ++static inline bool other_slots_busy(struct kbase_device *kbdev, int js) +{ -+ if (!list) -+ return POLICY_COUNT; ++ int slot; + -+ *list = policy_list; ++ for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) { ++ if (slot == js) ++ continue; + -+ return POLICY_COUNT; ++ if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot, ++ KBASE_ATOM_GPU_RB_SUBMITTED)) ++ return true; ++ } ++ ++ return false; +} + -+KBASE_EXPORT_TEST_API(kbase_pm_list_policies); ++static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev) ++{ ++ return kbdev->protected_mode; ++} + -+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev) ++static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) +{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ int err = -EINVAL; + -+ return kbdev->pm.backend.pm_current_policy; ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ WARN_ONCE(!kbdev->protected_ops, ++ "Cannot enter protected mode: protected callbacks not specified.\n"); ++ ++ /* ++ * When entering into protected mode, we must ensure that the ++ * GPU is not operating in coherent mode as well. This is to ++ * ensure that no protected memory can be leaked. ++ */ ++ if (kbdev->system_coherency == COHERENCY_ACE) ++ kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE); ++ ++ if (kbdev->protected_ops) { ++ /* Switch GPU to protected mode */ ++ err = kbdev->protected_ops->protected_mode_enable( ++ kbdev->protected_dev); ++ ++ if (err) ++ dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n", ++ err); ++ else ++ kbdev->protected_mode = true; ++ } ++ ++ return err; +} + -+KBASE_EXPORT_TEST_API(kbase_pm_get_policy); ++static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+void kbase_pm_set_policy(struct kbase_device *kbdev, -+ const struct kbase_pm_policy *new_policy) ++ WARN_ONCE(!kbdev->protected_ops, ++ "Cannot exit protected mode: protected callbacks not specified.\n"); ++ ++ if (!kbdev->protected_ops) ++ return -EINVAL; ++ ++ /* The protected mode disable callback will be called as part of reset ++ */ ++ kbase_reset_gpu_silent(kbdev); ++ ++ return 0; ++} ++ ++static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, ++ struct kbase_jd_atom **katom, int idx, int js) +{ -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; -+ const struct kbase_pm_policy *old_policy; -+ unsigned long flags; ++ int err = 0; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(new_policy != NULL); ++ switch (katom[idx]->protected_state.enter) { ++ case KBASE_ATOM_ENTER_PROTECTED_CHECK: ++ KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev); ++ /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV ++ * should ensure that we are not already transitiong, and that ++ * there are no atoms currently on the GPU. */ ++ WARN_ON(kbdev->protected_mode_transition); ++ WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); + -+ KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id); ++ kbdev->protected_mode_transition = true; ++ katom[idx]->protected_state.enter = ++ KBASE_ATOM_ENTER_PROTECTED_VINSTR; + -+ /* During a policy change we pretend the GPU is active */ -+ /* A suspend won't happen here, because we're in a syscall from a -+ * userspace thread */ -+ kbase_pm_context_active(kbdev); ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_ENTER_PROTECTED_VINSTR: ++ if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) { ++ /* ++ * We can't switch now because ++ * the vinstr core state switch ++ * is not done yet. ++ */ ++ return -EAGAIN; ++ } + -+ mutex_lock(&js_devdata->runpool_mutex); -+ mutex_lock(&kbdev->pm.lock); ++ /* Use generic model for IPA in protected mode */ ++ kbase_ipa_model_use_fallback_locked(kbdev); + -+ /* Remove the policy to prevent IRQ handlers from working on it */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ old_policy = kbdev->pm.backend.pm_current_policy; -+ kbdev->pm.backend.pm_current_policy = NULL; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* Once reaching this point GPU must be ++ * switched to protected mode or vinstr ++ * re-enabled. */ + -+ KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u, -+ old_policy->id); -+ if (old_policy->term) -+ old_policy->term(kbdev); ++ /* ++ * Not in correct mode, begin protected mode switch. ++ * Entering protected mode requires us to power down the L2, ++ * and drop out of fully coherent mode. ++ */ ++ katom[idx]->protected_state.enter = ++ KBASE_ATOM_ENTER_PROTECTED_IDLE_L2; + -+ KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u, -+ new_policy->id); -+ if (new_policy->init) -+ new_policy->init(kbdev); ++ kbase_pm_update_cores_state_nolock(kbdev); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbdev->pm.backend.pm_current_policy = new_policy; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: ++ /* Avoid unnecessary waiting on non-ACE platforms. */ ++ if (kbdev->current_gpu_coherency_mode == COHERENCY_ACE) { ++ if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) || ++ kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) { ++ /* ++ * The L2 is still powered, wait for all the users to ++ * finish with it before doing the actual reset. ++ */ ++ return -EAGAIN; ++ } ++ } + -+ /* If any core power state changes were previously attempted, but -+ * couldn't be made because the policy was changing (current_policy was -+ * NULL), then re-try them here. */ -+ kbase_pm_update_active(kbdev); -+ kbase_pm_update_cores_state(kbdev); ++ katom[idx]->protected_state.enter = ++ KBASE_ATOM_ENTER_PROTECTED_FINISHED; + -+ mutex_unlock(&kbdev->pm.lock); -+ mutex_unlock(&js_devdata->runpool_mutex); ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_ENTER_PROTECTED_FINISHED: + -+ /* Now the policy change is finished, we release our fake context active -+ * reference */ -+ kbase_pm_context_idle(kbdev); -+} ++ /* No jobs running, so we can switch GPU mode right now. */ ++ err = kbase_gpu_protected_mode_enter(kbdev); + -+KBASE_EXPORT_TEST_API(kbase_pm_set_policy); ++ /* ++ * Regardless of result, we are no longer transitioning ++ * the GPU. ++ */ ++ kbdev->protected_mode_transition = false; ++ KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev); ++ if (err) { ++ /* ++ * Failed to switch into protected mode, resume ++ * vinstr core and fail atom. ++ */ ++ kbase_vinstr_resume(kbdev->vinstr_ctx); ++ katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; ++ kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); ++ /* Only return if head atom or previous atom ++ * already removed - as atoms must be returned ++ * in order. */ ++ if (idx == 0 || katom[0]->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { ++ kbase_gpu_dequeue_atom(kbdev, js, NULL); ++ kbase_jm_return_atom_to_js(kbdev, katom[idx]); ++ } + -+/* Check whether a state change has finished, and trace it as completed */ -+static void -+kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev) -+{ -+ if ((kbdev->shader_available_bitmap & -+ kbdev->pm.backend.desired_shader_state) -+ == kbdev->pm.backend.desired_shader_state && -+ (kbdev->tiler_available_bitmap & -+ kbdev->pm.backend.desired_tiler_state) -+ == kbdev->pm.backend.desired_tiler_state) -+ kbase_timeline_pm_check_handle_event(kbdev, -+ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); ++ /* Go back to configured model for IPA */ ++ kbase_ipa_model_use_configured_locked(kbdev); ++ ++ return -EINVAL; ++ } ++ ++ /* Protected mode sanity checks. */ ++ KBASE_DEBUG_ASSERT_MSG( ++ kbase_jd_katom_is_protected(katom[idx]) == ++ kbase_gpu_in_protected_mode(kbdev), ++ "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", ++ kbase_jd_katom_is_protected(katom[idx]), ++ kbase_gpu_in_protected_mode(kbdev)); ++ katom[idx]->gpu_rb_state = ++ KBASE_ATOM_GPU_RB_READY; ++ } ++ ++ return 0; +} + -+void kbase_pm_request_cores(struct kbase_device *kbdev, -+ bool tiler_required, u64 shader_cores) ++static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, ++ struct kbase_jd_atom **katom, int idx, int js) +{ -+ u64 cores; ++ int err = 0; + -+ kbase_pm_change_state change_gpu_state = 0u; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ switch (katom[idx]->protected_state.exit) { ++ case KBASE_ATOM_EXIT_PROTECTED_CHECK: ++ KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev); ++ /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV ++ * should ensure that we are not already transitiong, and that ++ * there are no atoms currently on the GPU. */ ++ WARN_ON(kbdev->protected_mode_transition); ++ WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ /* ++ * Exiting protected mode requires a reset, but first the L2 ++ * needs to be powered down to ensure it's not active when the ++ * reset is issued. ++ */ ++ katom[idx]->protected_state.exit = ++ KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; + -+ cores = shader_cores; -+ while (cores) { -+ int bitnum = fls64(cores) - 1; -+ u64 bit = 1ULL << bitnum; ++ kbdev->protected_mode_transition = true; ++ kbase_pm_update_cores_state_nolock(kbdev); + -+ /* It should be almost impossible for this to overflow. It would -+ * require 2^32 atoms to request a particular core, which would -+ * require 2^24 contexts to submit. This would require an amount -+ * of memory that is impossible on a 32-bit system and extremely -+ * unlikely on a 64-bit system. */ -+ int cnt = ++kbdev->shader_needed_cnt[bitnum]; ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: ++ if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) || ++ kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) { ++ /* ++ * The L2 is still powered, wait for all the users to ++ * finish with it before doing the actual reset. ++ */ ++ return -EAGAIN; ++ } ++ katom[idx]->protected_state.exit = ++ KBASE_ATOM_EXIT_PROTECTED_RESET; + -+ if (1 == cnt) { -+ kbdev->shader_needed_bitmap |= bit; -+ change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_EXIT_PROTECTED_RESET: ++ /* Issue the reset to the GPU */ ++ err = kbase_gpu_protected_mode_reset(kbdev); ++ ++ if (err) { ++ kbdev->protected_mode_transition = false; ++ ++ /* Failed to exit protected mode, fail atom */ ++ katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; ++ kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); ++ /* Only return if head atom or previous atom ++ * already removed - as atoms must be returned ++ * in order */ ++ if (idx == 0 || katom[0]->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { ++ kbase_gpu_dequeue_atom(kbdev, js, NULL); ++ kbase_jm_return_atom_to_js(kbdev, katom[idx]); ++ } ++ ++ kbase_vinstr_resume(kbdev->vinstr_ctx); ++ ++ /* Use generic model for IPA in protected mode */ ++ kbase_ipa_model_use_fallback_locked(kbdev); ++ ++ return -EINVAL; + } + -+ cores &= ~bit; ++ katom[idx]->protected_state.exit = ++ KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; ++ ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: ++ /* A GPU reset is issued when exiting protected mode. Once the ++ * reset is done all atoms' state will also be reset. For this ++ * reason, if the atom is still in this state we can safely ++ * say that the reset has not completed i.e., we have not ++ * finished exiting protected mode yet. ++ */ ++ return -EAGAIN; + } + -+ if (tiler_required) { -+ int cnt = ++kbdev->tiler_needed_cnt; ++ return 0; ++} + -+ if (1 == cnt) -+ change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER; ++void kbase_backend_slot_update(struct kbase_device *kbdev) ++{ ++ int js; + -+ KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0); -+ } ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (change_gpu_state) { -+ KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL, -+ NULL, 0u, (u32) kbdev->shader_needed_bitmap); ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ struct kbase_jd_atom *katom[2]; ++ int idx; + -+ kbase_timeline_pm_cores_func(kbdev, -+ KBASE_PM_FUNC_ID_REQUEST_CORES_START, -+ change_gpu_state); -+ kbase_pm_update_cores_state_nolock(kbdev); -+ kbase_timeline_pm_cores_func(kbdev, -+ KBASE_PM_FUNC_ID_REQUEST_CORES_END, -+ change_gpu_state); ++ katom[0] = kbase_gpu_inspect(kbdev, js, 0); ++ katom[1] = kbase_gpu_inspect(kbdev, js, 1); ++ WARN_ON(katom[1] && !katom[0]); ++ ++ for (idx = 0; idx < SLOT_RB_SIZE; idx++) { ++ bool cores_ready; ++ int ret; ++ ++ if (!katom[idx]) ++ continue; ++ ++ switch (katom[idx]->gpu_rb_state) { ++ case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: ++ /* Should be impossible */ ++ WARN(1, "Attempting to update atom not in ringbuffer\n"); ++ break; ++ ++ case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: ++ if (katom[idx]->atom_flags & ++ KBASE_KATOM_FLAG_X_DEP_BLOCKED) ++ break; ++ ++ katom[idx]->gpu_rb_state = ++ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV; ++ ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: ++ if (kbase_gpu_check_secure_atoms(kbdev, ++ !kbase_jd_katom_is_protected( ++ katom[idx]))) ++ break; ++ ++ if ((idx == 1) && (kbase_jd_katom_is_protected( ++ katom[0]) != ++ kbase_jd_katom_is_protected( ++ katom[1]))) ++ break; ++ ++ if (kbdev->protected_mode_transition) ++ break; ++ ++ katom[idx]->gpu_rb_state = ++ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION; ++ ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: ++ ++ /* ++ * Exiting protected mode must be done before ++ * the references on the cores are taken as ++ * a power down the L2 is required which ++ * can't happen after the references for this ++ * atom are taken. ++ */ ++ ++ if (!kbase_gpu_in_protected_mode(kbdev) && ++ kbase_jd_katom_is_protected(katom[idx])) { ++ /* Atom needs to transition into protected mode. */ ++ ret = kbase_jm_enter_protected_mode(kbdev, ++ katom, idx, js); ++ if (ret) ++ break; ++ } else if (kbase_gpu_in_protected_mode(kbdev) && ++ !kbase_jd_katom_is_protected(katom[idx])) { ++ /* Atom needs to transition out of protected mode. */ ++ ret = kbase_jm_exit_protected_mode(kbdev, ++ katom, idx, js); ++ if (ret) ++ break; ++ } ++ katom[idx]->protected_state.exit = ++ KBASE_ATOM_EXIT_PROTECTED_CHECK; ++ ++ /* Atom needs no protected mode transition. */ ++ ++ katom[idx]->gpu_rb_state = ++ KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; ++ ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: ++ if (katom[idx]->will_fail_event_code) { ++ kbase_gpu_mark_atom_for_return(kbdev, ++ katom[idx]); ++ /* Set EVENT_DONE so this atom will be ++ completed, not unpulled. */ ++ katom[idx]->event_code = ++ BASE_JD_EVENT_DONE; ++ /* Only return if head atom or previous ++ * atom already removed - as atoms must ++ * be returned in order. */ ++ if (idx == 0 || katom[0]->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { ++ kbase_gpu_dequeue_atom(kbdev, js, NULL); ++ kbase_jm_return_atom_to_js(kbdev, katom[idx]); ++ } ++ break; ++ } ++ ++ cores_ready = ++ kbasep_js_job_check_ref_cores(kbdev, js, ++ katom[idx]); ++ ++ if (katom[idx]->event_code == ++ BASE_JD_EVENT_PM_EVENT) { ++ katom[idx]->gpu_rb_state = ++ KBASE_ATOM_GPU_RB_RETURN_TO_JS; ++ break; ++ } ++ ++ if (!cores_ready) ++ break; ++ ++ kbase_js_affinity_retain_slot_cores(kbdev, js, ++ katom[idx]->affinity); ++ katom[idx]->gpu_rb_state = ++ KBASE_ATOM_GPU_RB_WAITING_AFFINITY; ++ ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_GPU_RB_WAITING_AFFINITY: ++ if (!kbase_gpu_rmu_workaround(kbdev, js)) ++ break; ++ ++ katom[idx]->gpu_rb_state = ++ KBASE_ATOM_GPU_RB_READY; ++ ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_GPU_RB_READY: ++ ++ if (idx == 1) { ++ /* Only submit if head atom or previous ++ * atom already submitted */ ++ if ((katom[0]->gpu_rb_state != ++ KBASE_ATOM_GPU_RB_SUBMITTED && ++ katom[0]->gpu_rb_state != ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) ++ break; ++ ++ /* If intra-slot serialization in use ++ * then don't submit atom to NEXT slot ++ */ ++ if (kbdev->serialize_jobs & ++ KBASE_SERIALIZE_INTRA_SLOT) ++ break; ++ } ++ ++ /* If inter-slot serialization in use then don't ++ * submit atom if any other slots are in use */ ++ if ((kbdev->serialize_jobs & ++ KBASE_SERIALIZE_INTER_SLOT) && ++ other_slots_busy(kbdev, js)) ++ break; ++ ++ if ((kbdev->serialize_jobs & ++ KBASE_SERIALIZE_RESET) && ++ kbase_reset_gpu_active(kbdev)) ++ break; ++ ++ /* Check if this job needs the cycle counter ++ * enabled before submission */ ++ if (katom[idx]->core_req & BASE_JD_REQ_PERMON) ++ kbase_pm_request_gpu_cycle_counter_l2_is_on( ++ kbdev); ++ ++ kbase_job_hw_submit(kbdev, katom[idx], js); ++ katom[idx]->gpu_rb_state = ++ KBASE_ATOM_GPU_RB_SUBMITTED; ++ ++ /* Inform power management at start/finish of ++ * atom so it can update its GPU utilisation ++ * metrics. */ ++ kbase_pm_metrics_update(kbdev, ++ &katom[idx]->start_timestamp); ++ ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_GPU_RB_SUBMITTED: ++ /* Atom submitted to HW, nothing else to do */ ++ break; ++ ++ case KBASE_ATOM_GPU_RB_RETURN_TO_JS: ++ /* Only return if head atom or previous atom ++ * already removed - as atoms must be returned ++ * in order */ ++ if (idx == 0 || katom[0]->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { ++ kbase_gpu_dequeue_atom(kbdev, js, NULL); ++ kbase_jm_return_atom_to_js(kbdev, ++ katom[idx]); ++ } ++ break; ++ } ++ } + } ++ ++ /* Warn if PRLAM-8987 affinity restrictions are violated */ ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) ++ WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) || ++ kbase_gpu_atoms_submitted(kbdev, 1)) && ++ kbase_gpu_atoms_submitted(kbdev, 2)); +} + -+KBASE_EXPORT_TEST_API(kbase_pm_request_cores); + -+void kbase_pm_unrequest_cores(struct kbase_device *kbdev, -+ bool tiler_required, u64 shader_cores) ++void kbase_backend_run_atom(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom) +{ -+ kbase_pm_change_state change_gpu_state = 0u; ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ kbase_gpu_enqueue_atom(kbdev, katom); ++ kbase_backend_slot_update(kbdev); ++} + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++#define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \ ++ (KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER)) ++ ++bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js) ++{ ++ struct kbase_jd_atom *katom; ++ struct kbase_jd_atom *next_katom; + + lockdep_assert_held(&kbdev->hwaccess_lock); + -+ while (shader_cores) { -+ int bitnum = fls64(shader_cores) - 1; -+ u64 bit = 1ULL << bitnum; -+ int cnt; ++ katom = kbase_gpu_inspect(kbdev, js, 0); ++ next_katom = kbase_gpu_inspect(kbdev, js, 1); + -+ KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0); ++ if (next_katom && katom->kctx == next_katom->kctx && ++ next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED && ++ HAS_DEP(next_katom) && ++ (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL) ++ != 0 || ++ kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL) ++ != 0)) { ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), ++ JS_COMMAND_NOP, NULL); ++ next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; + -+ cnt = --kbdev->shader_needed_cnt[bitnum]; ++ KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom, ++ &kbdev->gpu_props.props.raw_props.js_features ++ [katom->slot_nr]); ++ KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as ++ [katom->kctx->as_nr]); ++ KBASE_TLSTREAM_TL_NRET_CTX_LPU(katom->kctx, ++ &kbdev->gpu_props.props.raw_props.js_features ++ [katom->slot_nr]); + -+ if (0 == cnt) { -+ kbdev->shader_needed_bitmap &= ~bit; ++ return true; ++ } + -+ change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; ++ return false; ++} ++ ++void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, ++ u32 completion_code, ++ u64 job_tail, ++ ktime_t *end_timestamp) ++{ ++ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); ++ struct kbase_context *kctx = katom->kctx; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ /* ++ * When a hard-stop is followed close after a soft-stop, the completion ++ * code may be set to STOPPED, even though the job is terminated ++ */ ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8438)) { ++ if (completion_code == BASE_JD_EVENT_STOPPED && ++ (katom->atom_flags & ++ KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) { ++ completion_code = BASE_JD_EVENT_TERMINATED; + } ++ } + -+ shader_cores &= ~bit; ++ if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) || (katom->core_req & ++ BASE_JD_REQ_SKIP_CACHE_END)) && ++ completion_code != BASE_JD_EVENT_DONE && ++ !(completion_code & BASE_JD_SW_EVENT)) { ++ /* When a job chain fails, on a T60x or when ++ * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not ++ * flushed. To prevent future evictions causing possible memory ++ * corruption we need to flush the cache manually before any ++ * affected memory gets reused. */ ++ katom->need_cache_flush_cores_retained = katom->affinity; ++ kbase_pm_request_cores(kbdev, false, katom->affinity); ++ } else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) { ++ if (kbdev->gpu_props.num_core_groups > 1 && ++ !(katom->affinity & ++ kbdev->gpu_props.props.coherency_info.group[0].core_mask ++ ) && ++ (katom->affinity & ++ kbdev->gpu_props.props.coherency_info.group[1].core_mask ++ )) { ++ dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n"); ++ katom->need_cache_flush_cores_retained = ++ katom->affinity; ++ kbase_pm_request_cores(kbdev, false, ++ katom->affinity); ++ } + } + -+ if (tiler_required) { -+ int cnt; ++ katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); ++ kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0); + -+ KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0); ++ if (completion_code == BASE_JD_EVENT_STOPPED) { ++ struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, ++ 0); + -+ cnt = --kbdev->tiler_needed_cnt; ++ /* ++ * Dequeue next atom from ringbuffers on same slot if required. ++ * This atom will already have been removed from the NEXT ++ * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that ++ * the atoms on this slot are returned in the correct order. ++ */ ++ if (next_katom && katom->kctx == next_katom->kctx && ++ next_katom->sched_priority == ++ katom->sched_priority) { ++ kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); ++ kbase_jm_return_atom_to_js(kbdev, next_katom); ++ } ++ } else if (completion_code != BASE_JD_EVENT_DONE) { ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ int i; + -+ if (0 == cnt) -+ change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER; ++#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0 ++ KBASE_TRACE_DUMP(kbdev); ++#endif ++ kbasep_js_clear_submit_allowed(js_devdata, katom->kctx); ++ ++ /* ++ * Remove all atoms on the same context from ringbuffers. This ++ * will not remove atoms that are already on the GPU, as these ++ * are guaranteed not to have fail dependencies on the failed ++ * atom. ++ */ ++ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) { ++ struct kbase_jd_atom *katom_idx0 = ++ kbase_gpu_inspect(kbdev, i, 0); ++ struct kbase_jd_atom *katom_idx1 = ++ kbase_gpu_inspect(kbdev, i, 1); ++ ++ if (katom_idx0 && katom_idx0->kctx == katom->kctx && ++ HAS_DEP(katom_idx0) && ++ katom_idx0->gpu_rb_state != ++ KBASE_ATOM_GPU_RB_SUBMITTED) { ++ /* Dequeue katom_idx0 from ringbuffer */ ++ kbase_gpu_dequeue_atom(kbdev, i, end_timestamp); ++ ++ if (katom_idx1 && ++ katom_idx1->kctx == katom->kctx ++ && HAS_DEP(katom_idx1) && ++ katom_idx0->gpu_rb_state != ++ KBASE_ATOM_GPU_RB_SUBMITTED) { ++ /* Dequeue katom_idx1 from ringbuffer */ ++ kbase_gpu_dequeue_atom(kbdev, i, ++ end_timestamp); ++ ++ katom_idx1->event_code = ++ BASE_JD_EVENT_STOPPED; ++ kbase_jm_return_atom_to_js(kbdev, ++ katom_idx1); ++ } ++ katom_idx0->event_code = BASE_JD_EVENT_STOPPED; ++ kbase_jm_return_atom_to_js(kbdev, katom_idx0); ++ ++ } else if (katom_idx1 && ++ katom_idx1->kctx == katom->kctx && ++ HAS_DEP(katom_idx1) && ++ katom_idx1->gpu_rb_state != ++ KBASE_ATOM_GPU_RB_SUBMITTED) { ++ /* Can not dequeue this atom yet - will be ++ * dequeued when atom at idx0 completes */ ++ katom_idx1->event_code = BASE_JD_EVENT_STOPPED; ++ kbase_gpu_mark_atom_for_return(kbdev, ++ katom_idx1); ++ } ++ } + } + -+ if (change_gpu_state) { -+ KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL, -+ NULL, 0u, (u32) kbdev->shader_needed_bitmap); ++ KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc, ++ js, completion_code); + -+ kbase_pm_update_cores_state_nolock(kbdev); ++ if (job_tail != 0 && job_tail != katom->jc) { ++ bool was_updated = (job_tail != katom->jc); + -+ /* Trace that any state change effectively completes immediately -+ * - no-one will wait on the state change */ -+ kbase_pm_trace_check_and_finish_state_change(kbdev); ++ /* Some of the job has been executed, so we update the job chain ++ * address to where we should resume from */ ++ katom->jc = job_tail; ++ if (was_updated) ++ KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx, ++ katom, job_tail, js); + } -+} + -+KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores); ++ /* Only update the event code for jobs that weren't cancelled */ ++ if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) ++ katom->event_code = (base_jd_event_code)completion_code; + -+enum kbase_pm_cores_ready -+kbase_pm_register_inuse_cores(struct kbase_device *kbdev, -+ bool tiler_required, u64 shader_cores) -+{ -+ u64 prev_shader_needed; /* Just for tracing */ -+ u64 prev_shader_inuse; /* Just for tracing */ ++ kbase_device_trace_register_access(kctx, REG_WRITE, ++ JOB_CONTROL_REG(JOB_IRQ_CLEAR), ++ 1 << js); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ /* Complete the job, and start new ones ++ * ++ * Also defer remaining work onto the workqueue: ++ * - Re-queue Soft-stopped jobs ++ * - For any other jobs, queue the job back into the dependency system ++ * - Schedule out the parent context if necessary, and schedule a new ++ * one in. ++ */ ++#ifdef CONFIG_GPU_TRACEPOINTS ++ { ++ /* The atom in the HEAD */ ++ struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, ++ 0); + -+ prev_shader_needed = kbdev->shader_needed_bitmap; -+ prev_shader_inuse = kbdev->shader_inuse_bitmap; ++ if (next_katom && next_katom->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_SUBMITTED) { ++ char js_string[16]; + -+ /* If desired_shader_state does not contain the requested cores, then -+ * power management is not attempting to powering those cores (most -+ * likely due to core availability policy) and a new job affinity must -+ * be chosen */ -+ if ((kbdev->pm.backend.desired_shader_state & shader_cores) != -+ shader_cores) { -+ return (kbdev->pm.backend.poweroff_wait_in_progress || -+ kbdev->pm.backend.pm_current_policy == NULL) ? -+ KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY; ++ trace_gpu_sched_switch(kbasep_make_job_slot_string(js, ++ js_string, ++ sizeof(js_string)), ++ ktime_to_ns(*end_timestamp), ++ (u32)next_katom->kctx->id, 0, ++ next_katom->work_id); ++ kbdev->hwaccess.backend.slot_rb[js].last_context = ++ next_katom->kctx; ++ } else { ++ char js_string[16]; ++ ++ trace_gpu_sched_switch(kbasep_make_job_slot_string(js, ++ js_string, ++ sizeof(js_string)), ++ ktime_to_ns(ktime_get()), 0, 0, ++ 0); ++ kbdev->hwaccess.backend.slot_rb[js].last_context = 0; ++ } + } ++#endif + -+ if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores || -+ (tiler_required && !kbdev->tiler_available_bitmap)) { -+ /* Trace ongoing core transition */ -+ kbase_timeline_pm_l2_transition_start(kbdev); -+ return KBASE_CORES_NOT_READY; ++ if (kbdev->serialize_jobs & KBASE_SERIALIZE_RESET) ++ kbase_reset_gpu_silent(kbdev); ++ ++ if (completion_code == BASE_JD_EVENT_STOPPED) ++ katom = kbase_jm_return_atom_to_js(kbdev, katom); ++ else ++ katom = kbase_jm_complete(kbdev, katom, end_timestamp); ++ ++ if (katom) { ++ /* Cross-slot dependency has now become runnable. Try to submit ++ * it. */ ++ ++ /* Check if there are lower priority jobs to soft stop */ ++ kbase_job_slot_ctx_priority_check_locked(kctx, katom); ++ ++ kbase_jm_try_kick(kbdev, 1 << katom->slot_nr); + } + -+ /* If we started to trace a state change, then trace it has being -+ * finished by now, at the very latest */ -+ kbase_pm_trace_check_and_finish_state_change(kbdev); -+ /* Trace core transition done */ -+ kbase_timeline_pm_l2_transition_done(kbdev); ++ /* Job completion may have unblocked other atoms. Try to update all job ++ * slots */ ++ kbase_backend_slot_update(kbdev); ++} + -+ while (shader_cores) { -+ int bitnum = fls64(shader_cores) - 1; -+ u64 bit = 1ULL << bitnum; -+ int cnt; ++void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) ++{ ++ int js; + -+ KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ cnt = --kbdev->shader_needed_cnt[bitnum]; ++ /* Reset should always take the GPU out of protected mode */ ++ WARN_ON(kbase_gpu_in_protected_mode(kbdev)); + -+ if (0 == cnt) -+ kbdev->shader_needed_bitmap &= ~bit; ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ int atom_idx = 0; ++ int idx; + -+ /* shader_inuse_cnt should not overflow because there can only -+ * be a very limited number of jobs on the h/w at one time */ ++ for (idx = 0; idx < SLOT_RB_SIZE; idx++) { ++ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, ++ js, atom_idx); ++ bool keep_in_jm_rb = false; + -+ kbdev->shader_inuse_cnt[bitnum]++; -+ kbdev->shader_inuse_bitmap |= bit; ++ if (!katom) ++ break; ++ if (katom->protected_state.exit == ++ KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) ++ { ++ KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev); + -+ shader_cores &= ~bit; -+ } ++ kbase_vinstr_resume(kbdev->vinstr_ctx); + -+ if (tiler_required) { -+ KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0); ++ /* protected mode sanity checks */ ++ KBASE_DEBUG_ASSERT_MSG( ++ kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev), ++ "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", ++ kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev)); ++ KBASE_DEBUG_ASSERT_MSG( ++ (kbase_jd_katom_is_protected(katom) && js == 0) || ++ !kbase_jd_katom_is_protected(katom), ++ "Protected atom on JS%d not supported", js); ++ } ++ if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) ++ keep_in_jm_rb = true; + -+ --kbdev->tiler_needed_cnt; ++ kbase_gpu_release_atom(kbdev, katom, NULL); + -+ kbdev->tiler_inuse_cnt++; ++ /* ++ * If the atom wasn't on HW when the reset was issued ++ * then leave it in the RB and next time we're kicked ++ * it will be processed again from the starting state. ++ */ ++ if (keep_in_jm_rb) { ++ kbasep_js_job_check_deref_cores(kbdev, katom); ++ katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; ++ katom->affinity = 0; ++ katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; ++ /* As the atom was not removed, increment the ++ * index so that we read the correct atom in the ++ * next iteration. */ ++ atom_idx++; ++ continue; ++ } + -+ KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0); ++ /* ++ * The atom was on the HW when the reset was issued ++ * all we can do is fail the atom. ++ */ ++ kbase_gpu_dequeue_atom(kbdev, js, NULL); ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ kbase_jm_complete(kbdev, katom, end_timestamp); ++ } + } + -+ if (prev_shader_needed != kbdev->shader_needed_bitmap) -+ KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL, -+ NULL, 0u, (u32) kbdev->shader_needed_bitmap); ++ kbdev->protected_mode_transition = false; ++} + -+ if (prev_shader_inuse != kbdev->shader_inuse_bitmap) -+ KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL, -+ NULL, 0u, (u32) kbdev->shader_inuse_bitmap); ++static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, ++ int js, ++ struct kbase_jd_atom *katom, ++ u32 action) ++{ ++ u32 hw_action = action & JS_COMMAND_MASK; + -+ return KBASE_CORES_READY; ++ kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom); ++ kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action, ++ katom->core_req, katom); ++ katom->kctx->blocked_js[js][katom->sched_priority] = true; +} + -+KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores); ++static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom, ++ u32 action, ++ bool disjoint) ++{ ++ katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; ++ kbase_gpu_mark_atom_for_return(kbdev, katom); ++ katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true; + -+void kbase_pm_release_cores(struct kbase_device *kbdev, -+ bool tiler_required, u64 shader_cores) ++ if (disjoint) ++ kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, ++ katom); ++} ++ ++static int should_stop_x_dep_slot(struct kbase_jd_atom *katom) +{ -+ kbase_pm_change_state change_gpu_state = 0u; ++ if (katom->x_post_dep) { ++ struct kbase_jd_atom *dep_atom = katom->x_post_dep; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ if (dep_atom->gpu_rb_state != ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB && ++ dep_atom->gpu_rb_state != ++ KBASE_ATOM_GPU_RB_RETURN_TO_JS) ++ return dep_atom->slot_nr; ++ } ++ return -1; ++} + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++static void kbase_job_evicted(struct kbase_jd_atom *katom) ++{ ++ kbase_timeline_job_slot_done(katom->kctx->kbdev, katom->kctx, katom, ++ katom->slot_nr, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT); ++} + -+ while (shader_cores) { -+ int bitnum = fls64(shader_cores) - 1; -+ u64 bit = 1ULL << bitnum; -+ int cnt; ++bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ int js, ++ struct kbase_jd_atom *katom, ++ u32 action) ++{ ++ struct kbase_jd_atom *katom_idx0; ++ struct kbase_jd_atom *katom_idx1; + -+ KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0); ++ bool katom_idx0_valid, katom_idx1_valid; + -+ cnt = --kbdev->shader_inuse_cnt[bitnum]; ++ bool ret = false; + -+ if (0 == cnt) { -+ kbdev->shader_inuse_bitmap &= ~bit; -+ change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; -+ } ++ int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1; ++ int prio_idx0 = 0, prio_idx1 = 0; + -+ shader_cores &= ~bit; ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ katom_idx0 = kbase_gpu_inspect(kbdev, js, 0); ++ katom_idx1 = kbase_gpu_inspect(kbdev, js, 1); ++ ++ if (katom_idx0) ++ prio_idx0 = katom_idx0->sched_priority; ++ if (katom_idx1) ++ prio_idx1 = katom_idx1->sched_priority; ++ ++ if (katom) { ++ katom_idx0_valid = (katom_idx0 == katom); ++ /* If idx0 is to be removed and idx1 is on the same context, ++ * then idx1 must also be removed otherwise the atoms might be ++ * returned out of order */ ++ if (katom_idx1) ++ katom_idx1_valid = (katom_idx1 == katom) || ++ (katom_idx0_valid && ++ (katom_idx0->kctx == ++ katom_idx1->kctx)); ++ else ++ katom_idx1_valid = false; ++ } else { ++ katom_idx0_valid = (katom_idx0 && ++ (!kctx || katom_idx0->kctx == kctx)); ++ katom_idx1_valid = (katom_idx1 && ++ (!kctx || katom_idx1->kctx == kctx) && ++ prio_idx0 == prio_idx1); + } + -+ if (tiler_required) { -+ int cnt; ++ if (katom_idx0_valid) ++ stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0); ++ if (katom_idx1_valid) ++ stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1); + -+ KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0); ++ if (katom_idx0_valid) { ++ if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { ++ /* Simple case - just dequeue and return */ ++ kbase_gpu_dequeue_atom(kbdev, js, NULL); ++ if (katom_idx1_valid) { ++ kbase_gpu_dequeue_atom(kbdev, js, NULL); ++ katom_idx1->event_code = ++ BASE_JD_EVENT_REMOVED_FROM_NEXT; ++ kbase_jm_return_atom_to_js(kbdev, katom_idx1); ++ katom_idx1->kctx->blocked_js[js][prio_idx1] = ++ true; ++ } + -+ cnt = --kbdev->tiler_inuse_cnt; ++ katom_idx0->event_code = ++ BASE_JD_EVENT_REMOVED_FROM_NEXT; ++ kbase_jm_return_atom_to_js(kbdev, katom_idx0); ++ katom_idx0->kctx->blocked_js[js][prio_idx0] = true; ++ } else { ++ /* katom_idx0 is on GPU */ ++ if (katom_idx1 && katom_idx1->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_SUBMITTED) { ++ /* katom_idx0 and katom_idx1 are on GPU */ + -+ if (0 == cnt) -+ change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER; -+ } ++ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, ++ JS_COMMAND_NEXT), NULL) == 0) { ++ /* idx0 has already completed - stop ++ * idx1 if needed*/ ++ if (katom_idx1_valid) { ++ kbase_gpu_stop_atom(kbdev, js, ++ katom_idx1, ++ action); ++ ret = true; ++ } ++ } else { ++ /* idx1 is in NEXT registers - attempt ++ * to remove */ ++ kbase_reg_write(kbdev, ++ JOB_SLOT_REG(js, ++ JS_COMMAND_NEXT), ++ JS_COMMAND_NOP, NULL); + -+ if (change_gpu_state) { -+ KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL, -+ NULL, 0u, (u32) kbdev->shader_inuse_bitmap); ++ if (kbase_reg_read(kbdev, ++ JOB_SLOT_REG(js, ++ JS_HEAD_NEXT_LO), NULL) ++ != 0 || ++ kbase_reg_read(kbdev, ++ JOB_SLOT_REG(js, ++ JS_HEAD_NEXT_HI), NULL) ++ != 0) { ++ /* idx1 removed successfully, ++ * will be handled in IRQ */ ++ kbase_job_evicted(katom_idx1); ++ kbase_gpu_remove_atom(kbdev, ++ katom_idx1, ++ action, true); ++ stop_x_dep_idx1 = ++ should_stop_x_dep_slot(katom_idx1); + -+ kbase_timeline_pm_cores_func(kbdev, -+ KBASE_PM_FUNC_ID_RELEASE_CORES_START, -+ change_gpu_state); -+ kbase_pm_update_cores_state_nolock(kbdev); -+ kbase_timeline_pm_cores_func(kbdev, -+ KBASE_PM_FUNC_ID_RELEASE_CORES_END, -+ change_gpu_state); ++ /* stop idx0 if still on GPU */ ++ kbase_gpu_stop_atom(kbdev, js, ++ katom_idx0, ++ action); ++ ret = true; ++ } else if (katom_idx1_valid) { ++ /* idx0 has already completed, ++ * stop idx1 if needed */ ++ kbase_gpu_stop_atom(kbdev, js, ++ katom_idx1, ++ action); ++ ret = true; ++ } ++ } ++ } else if (katom_idx1_valid) { ++ /* idx1 not on GPU but must be dequeued*/ + -+ /* Trace that any state change completed immediately */ -+ kbase_pm_trace_check_and_finish_state_change(kbdev); ++ /* idx1 will be handled in IRQ */ ++ kbase_gpu_remove_atom(kbdev, katom_idx1, action, ++ false); ++ /* stop idx0 */ ++ /* This will be repeated for anything removed ++ * from the next registers, since their normal ++ * flow was also interrupted, and this function ++ * might not enter disjoint state e.g. if we ++ * don't actually do a hard stop on the head ++ * atom */ ++ kbase_gpu_stop_atom(kbdev, js, katom_idx0, ++ action); ++ ret = true; ++ } else { ++ /* no atom in idx1 */ ++ /* just stop idx0 */ ++ kbase_gpu_stop_atom(kbdev, js, katom_idx0, ++ action); ++ ret = true; ++ } ++ } ++ } else if (katom_idx1_valid) { ++ if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { ++ /* Mark for return */ ++ /* idx1 will be returned once idx0 completes */ ++ kbase_gpu_remove_atom(kbdev, katom_idx1, action, ++ false); ++ } else { ++ /* idx1 is on GPU */ ++ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, ++ JS_COMMAND_NEXT), NULL) == 0) { ++ /* idx0 has already completed - stop idx1 */ ++ kbase_gpu_stop_atom(kbdev, js, katom_idx1, ++ action); ++ ret = true; ++ } else { ++ /* idx1 is in NEXT registers - attempt to ++ * remove */ ++ kbase_reg_write(kbdev, JOB_SLOT_REG(js, ++ JS_COMMAND_NEXT), ++ JS_COMMAND_NOP, NULL); ++ ++ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, ++ JS_HEAD_NEXT_LO), NULL) != 0 || ++ kbase_reg_read(kbdev, JOB_SLOT_REG(js, ++ JS_HEAD_NEXT_HI), NULL) != 0) { ++ /* idx1 removed successfully, will be ++ * handled in IRQ once idx0 completes */ ++ kbase_job_evicted(katom_idx1); ++ kbase_gpu_remove_atom(kbdev, katom_idx1, ++ action, ++ false); ++ } else { ++ /* idx0 has already completed - stop ++ * idx1 */ ++ kbase_gpu_stop_atom(kbdev, js, ++ katom_idx1, ++ action); ++ ret = true; ++ } ++ } ++ } + } -+} + -+KBASE_EXPORT_TEST_API(kbase_pm_release_cores); + -+void kbase_pm_request_cores_sync(struct kbase_device *kbdev, -+ bool tiler_required, -+ u64 shader_cores) ++ if (stop_x_dep_idx0 != -1) ++ kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0, ++ NULL, action); ++ ++ if (stop_x_dep_idx1 != -1) ++ kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1, ++ NULL, action); ++ ++ return ret; ++} ++ ++void kbase_gpu_cacheclean(struct kbase_device *kbdev) +{ -+ unsigned long flags; ++ /* Limit the number of loops to avoid a hang if the interrupt is missed ++ */ ++ u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; + -+ kbase_pm_wait_for_poweroff_complete(kbdev); ++ mutex_lock(&kbdev->cacheclean_lock); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_pm_request_cores(kbdev, tiler_required, shader_cores); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* use GPU_COMMAND completion solution */ ++ /* clean & invalidate the caches */ ++ KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CLEAN_INV_CACHES, NULL); + -+ kbase_pm_check_transitions_sync(kbdev); -+} ++ /* wait for cache flush to complete before continuing */ ++ while (--max_loops && ++ (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) & ++ CLEAN_CACHES_COMPLETED) == 0) ++ ; + -+KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync); ++ /* clear the CLEAN_CACHES_COMPLETED irq */ ++ KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, ++ CLEAN_CACHES_COMPLETED); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), ++ CLEAN_CACHES_COMPLETED, NULL); ++ KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state != ++ KBASE_INSTR_STATE_CLEANING, ++ "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang."); + -+void kbase_pm_request_l2_caches(struct kbase_device *kbdev) ++ mutex_unlock(&kbdev->cacheclean_lock); ++} ++ ++void kbase_backend_cacheclean(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom) +{ -+ unsigned long flags; -+ u32 prior_l2_users_count; ++ if (katom->need_cache_flush_cores_retained) { ++ unsigned long flags; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_gpu_cacheclean(kbdev); + -+ prior_l2_users_count = kbdev->l2_users_count++; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_pm_unrequest_cores(kbdev, false, ++ katom->need_cache_flush_cores_retained); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ katom->need_cache_flush_cores_retained = 0; ++ } ++} + -+ KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0); ++void kbase_backend_complete_wq(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom) ++{ ++ /* ++ * If cache flush required due to HW workaround then perform the flush ++ * now ++ */ ++ kbase_backend_cacheclean(kbdev, katom); + -+ /* if the GPU is reset while the l2 is on, l2 will be off but -+ * prior_l2_users_count will be > 0. l2_available_bitmap will have been -+ * set to 0 though by kbase_pm_init_hw */ -+ if (!prior_l2_users_count || !kbdev->l2_available_bitmap) -+ kbase_pm_check_transitions_nolock(kbdev); ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969) && ++ (katom->core_req & BASE_JD_REQ_FS) && ++ katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT && ++ (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) && ++ !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) { ++ dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n"); ++ if (kbasep_10969_workaround_clamp_coordinates(katom)) { ++ /* The job had a TILE_RANGE_FAULT after was soft-stopped ++ * Due to an HW issue we try to execute the job again. ++ */ ++ dev_dbg(kbdev->dev, ++ "Clamping has been executed, try to rerun the job\n" ++ ); ++ katom->event_code = BASE_JD_EVENT_STOPPED; ++ katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN; ++ } ++ } ++ ++ /* Clear the coreref_state now - while check_deref_cores() may not have ++ * been called yet, the caller will have taken a copy of this field. If ++ * this is not done, then if the atom is re-scheduled (following a soft ++ * stop) then the core reference would not be retaken. */ ++ katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; ++ katom->affinity = 0; ++} ++ ++void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, ++ base_jd_core_req core_req, u64 affinity, ++ enum kbase_atom_coreref_state coreref_state) ++{ ++ unsigned long flags; + ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity, ++ coreref_state); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ wait_event(kbdev->pm.backend.l2_powered_wait, -+ kbdev->pm.backend.l2_powered == 1); + -+ /* Trace that any state change completed immediately */ -+ kbase_pm_trace_check_and_finish_state_change(kbdev); ++ if (!kbdev->pm.active_count) { ++ mutex_lock(&kbdev->js_data.runpool_mutex); ++ mutex_lock(&kbdev->pm.lock); ++ kbase_pm_update_active(kbdev); ++ mutex_unlock(&kbdev->pm.lock); ++ mutex_unlock(&kbdev->js_data.runpool_mutex); ++ } +} + -+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches); -+ -+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev) ++void kbase_gpu_dump_slots(struct kbase_device *kbdev) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ struct kbasep_js_device_data *js_devdata; ++ unsigned long flags; ++ int js; + -+ kbdev->l2_users_count++; -+} ++ js_devdata = &kbdev->js_data; + -+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+void kbase_pm_release_l2_caches(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n"); + -+ KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0); ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ int idx; + -+ --kbdev->l2_users_count; ++ for (idx = 0; idx < SLOT_RB_SIZE; idx++) { ++ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, ++ js, ++ idx); + -+ if (!kbdev->l2_users_count) { -+ kbase_pm_check_transitions_nolock(kbdev); -+ /* Trace that any state change completed immediately */ -+ kbase_pm_trace_check_and_finish_state_change(kbdev); ++ if (katom) ++ dev_info(kbdev->dev, ++ " js%d idx%d : katom=%p gpu_rb_state=%d\n", ++ js, idx, katom, katom->gpu_rb_state); ++ else ++ dev_info(kbdev->dev, " js%d idx%d : empty\n", ++ js, idx); ++ } + } ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + -+KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches); -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h ++ ++ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h new file mode 100644 -index 000000000..611a90e66 +index 000000000..1e0e05ad3 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h -@@ -0,0 +1,227 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h +@@ -0,0 +1,76 @@ +/* + * -+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -346677,225 +348674,74 @@ index 000000000..611a90e66 + + + -+/* -+ * Power policy API definitions -+ */ -+ -+#ifndef _KBASE_PM_POLICY_H_ -+#define _KBASE_PM_POLICY_H_ -+ -+/** -+ * kbase_pm_policy_init - Initialize power policy framework -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Must be called before calling any other policy function -+ * -+ * Return: 0 if the power policy framework was successfully -+ * initialized, -errno otherwise. -+ */ -+int kbase_pm_policy_init(struct kbase_device *kbdev); -+ -+/** -+ * kbase_pm_policy_term - Terminate power policy framework -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_policy_term(struct kbase_device *kbdev); -+ -+/** -+ * kbase_pm_update_active - Update the active power state of the GPU -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Calls into the current power policy -+ */ -+void kbase_pm_update_active(struct kbase_device *kbdev); -+ -+/** -+ * kbase_pm_update_cores - Update the desired core state of the GPU -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Calls into the current power policy -+ */ -+void kbase_pm_update_cores(struct kbase_device *kbdev); -+ -+ -+enum kbase_pm_cores_ready { -+ KBASE_CORES_NOT_READY = 0, -+ KBASE_NEW_AFFINITY = 1, -+ KBASE_CORES_READY = 2 -+}; -+ + -+/** -+ * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores() -+ * -+ * @kbdev: The kbase device structure for the device -+ * @tiler_required: true if the tiler is required, false otherwise -+ * @shader_cores: A bitmask of shader cores which are necessary for the job -+ * -+ * When this function returns, the @shader_cores will be in the READY state. -+ * -+ * This is safe variant of kbase_pm_check_transitions_sync(): it handles the -+ * work of ensuring the requested cores will remain powered until a matching -+ * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate) -+ * is made. ++/* ++ * Register-based HW access backend specific APIs + */ -+void kbase_pm_request_cores_sync(struct kbase_device *kbdev, -+ bool tiler_required, u64 shader_cores); + -+/** -+ * kbase_pm_request_cores - Mark one or more cores as being required -+ * for jobs to be submitted -+ * -+ * @kbdev: The kbase device structure for the device -+ * @tiler_required: true if the tiler is required, false otherwise -+ * @shader_cores: A bitmask of shader cores which are necessary for the job -+ * -+ * This function is called by the job scheduler to mark one or more cores as -+ * being required to submit jobs that are ready to run. -+ * -+ * The cores requested are reference counted and a subsequent call to -+ * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be -+ * made to dereference the cores as being 'needed'. -+ * -+ * The active power policy will meet or exceed the requirements of the -+ * requested cores in the system. Any core transitions needed will be begun -+ * immediately, but they might not complete/the cores might not be available -+ * until a Power Management IRQ. -+ * -+ * Return: 0 if the cores were successfully requested, or -errno otherwise. -+ */ -+void kbase_pm_request_cores(struct kbase_device *kbdev, -+ bool tiler_required, u64 shader_cores); ++#ifndef _KBASE_HWACCESS_GPU_H_ ++#define _KBASE_HWACCESS_GPU_H_ + -+/** -+ * kbase_pm_unrequest_cores - Unmark one or more cores as being required for -+ * jobs to be submitted. -+ * -+ * @kbdev: The kbase device structure for the device -+ * @tiler_required: true if the tiler is required, false otherwise -+ * @shader_cores: A bitmask of shader cores (as given to -+ * kbase_pm_request_cores() ) -+ * -+ * This function undoes the effect of kbase_pm_request_cores(). It should be -+ * used when a job is not going to be submitted to the hardware (e.g. the job is -+ * cancelled before it is enqueued). -+ * -+ * The active power policy will meet or exceed the requirements of the -+ * requested cores in the system. Any core transitions needed will be begun -+ * immediately, but they might not complete until a Power Management IRQ. -+ * -+ * The policy may use this as an indication that it can power down cores. -+ */ -+void kbase_pm_unrequest_cores(struct kbase_device *kbdev, -+ bool tiler_required, u64 shader_cores); ++#include + +/** -+ * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job -+ * -+ * @kbdev: The kbase device structure for the device -+ * @tiler_required: true if the tiler is required, false otherwise -+ * @shader_cores: A bitmask of shader cores (as given to -+ * kbase_pm_request_cores() ) -+ * -+ * This function should be called after kbase_pm_request_cores() when the job -+ * is about to be submitted to the hardware. It will check that the necessary -+ * cores are available and if so update the 'needed' and 'inuse' bitmasks to -+ * reflect that the job is now committed to being run. -+ * -+ * If the necessary cores are not currently available then the function will -+ * return %KBASE_CORES_NOT_READY and have no effect. -+ * -+ * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready, -+ * -+ * %KBASE_NEW_AFFINITY if the affinity requested is not allowed, ++ * kbase_gpu_irq_evict - Evict an atom from a NEXT slot + * -+ * %KBASE_CORES_READY if the cores requested are already available -+ */ -+enum kbase_pm_cores_ready kbase_pm_register_inuse_cores( -+ struct kbase_device *kbdev, -+ bool tiler_required, -+ u64 shader_cores); -+ -+/** -+ * kbase_pm_release_cores - Release cores after a job has run ++ * @kbdev: Device pointer ++ * @js: Job slot to evict from + * -+ * @kbdev: The kbase device structure for the device -+ * @tiler_required: true if the tiler is required, false otherwise -+ * @shader_cores: A bitmask of shader cores (as given to -+ * kbase_pm_register_inuse_cores() ) ++ * Evict the atom in the NEXT slot for the specified job slot. This function is ++ * called from the job complete IRQ handler when the previous job has failed. + * -+ * This function should be called when a job has finished running on the -+ * hardware. A call to kbase_pm_register_inuse_cores() must have previously -+ * occurred. The reference counts of the specified cores will be decremented -+ * which may cause the bitmask of 'inuse' cores to be reduced. The power policy -+ * may then turn off any cores which are no longer 'inuse'. ++ * Return: true if job evicted from NEXT registers, false otherwise + */ -+void kbase_pm_release_cores(struct kbase_device *kbdev, -+ bool tiler_required, u64 shader_cores); ++bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js); + +/** -+ * kbase_pm_request_l2_caches - Request l2 caches -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Request the use of l2 caches for all core groups, power up, wait and prevent -+ * the power manager from powering down the l2 caches. -+ * -+ * This tells the power management that the caches should be powered up, and -+ * they should remain powered, irrespective of the usage of shader cores. This -+ * does not return until the l2 caches are powered up. -+ * -+ * The caller must call kbase_pm_release_l2_caches() when they are finished -+ * to allow normal power management of the l2 caches to resume. ++ * kbase_gpu_complete_hw - Complete an atom on job slot js + * -+ * This should only be used when power management is active. ++ * @kbdev: Device pointer ++ * @js: Job slot that has completed ++ * @completion_code: Event code from job that has completed ++ * @job_tail: The tail address from the hardware if the job has partially ++ * completed ++ * @end_timestamp: Time of completion + */ -+void kbase_pm_request_l2_caches(struct kbase_device *kbdev); ++void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, ++ u32 completion_code, ++ u64 job_tail, ++ ktime_t *end_timestamp); + +/** -+ * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Increment the count of l2 users but do not attempt to power on the l2 ++ * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer + * -+ * It is the callers responsibility to ensure that the l2 is already powered up -+ * and to eventually call kbase_pm_release_l2_caches() ++ * @kbdev: Device pointer ++ * @js: Job slot to inspect ++ * @idx: Index into ringbuffer. 0 is the job currently running on ++ * the slot, 1 is the job waiting, all other values are invalid. ++ * Return: The atom at that position in the ringbuffer ++ * or NULL if no atom present + */ -+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev); ++struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, ++ int idx); + +/** -+ * kbase_pm_request_l2_caches - Release l2 caches -+ * -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * -+ * Release the use of l2 caches for all core groups and allow the power manager -+ * to power them down when necessary. -+ * -+ * This tells the power management that the caches can be powered down if -+ * necessary, with respect to the usage of shader cores. -+ * -+ * The caller must have called kbase_pm_request_l2_caches() prior to a call -+ * to this. ++ * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers + * -+ * This should only be used when power management is active. ++ * @kbdev: Device pointer + */ -+void kbase_pm_release_l2_caches(struct kbase_device *kbdev); ++void kbase_gpu_dump_slots(struct kbase_device *kbdev); + -+#endif /* _KBASE_PM_POLICY_H_ */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c ++#endif /* _KBASE_HWACCESS_GPU_H_ */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c new file mode 100644 -index 000000000..d08c628dd +index 000000000..54d8ddd80 --- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c -@@ -0,0 +1,103 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c +@@ -0,0 +1,303 @@ +/* + * -+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -346910,291 +348756,301 @@ index 000000000..d08c628dd + + + -+#include -+#include -+#include -+#include -+ -+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, -+ u64 *system_time, struct timespec64 *ts) -+{ -+ u32 hi1, hi2; + -+ kbase_pm_request_gpu_cycle_counter(kbdev); + -+ /* Read hi, lo, hi to ensure that overflow from lo to hi is handled -+ * correctly */ -+ do { -+ hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI), -+ NULL); -+ *cycle_counter = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL); -+ hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI), -+ NULL); -+ *cycle_counter |= (((u64) hi1) << 32); -+ } while (hi1 != hi2); ++/* ++ * Base kernel affinity manager APIs ++ */ + -+ /* Read hi, lo, hi to ensure that overflow from lo to hi is handled -+ * correctly */ -+ do { -+ hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI), -+ NULL); -+ *system_time = kbase_reg_read(kbdev, -+ GPU_CONTROL_REG(TIMESTAMP_LO), NULL); -+ hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI), -+ NULL); -+ *system_time |= (((u64) hi1) << 32); -+ } while (hi1 != hi2); ++#include ++#include "mali_kbase_js_affinity.h" ++#include "mali_kbase_hw.h" + -+ /* Record the CPU's idea of current time */ -+ ktime_get_raw_ts64(ts); ++#include + -+ kbase_pm_release_gpu_cycle_counter(kbdev); -+} + -+/** -+ * kbase_wait_write_flush - Wait for GPU write flush -+ * @kctx: Context pointer -+ * -+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush -+ * its write buffer. -+ * -+ * Only in use for BASE_HW_ISSUE_6367 -+ * -+ * Note : If GPU resets occur then the counters are reset to zero, the delay may -+ * not be as expected. -+ */ -+#ifndef CONFIG_MALI_NO_MALI -+void kbase_wait_write_flush(struct kbase_context *kctx) ++bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, ++ int js) +{ -+ u32 base_count = 0; -+ + /* -+ * The caller must be holding onto the kctx or the call is from -+ * userspace. ++ * Here are the reasons for using job slot 2: ++ * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose) ++ * - In absence of the above, then: ++ * - Atoms with BASE_JD_REQ_COHERENT_GROUP ++ * - But, only when there aren't contexts with ++ * KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on ++ * all cores on slot 1 could be blocked by those using a coherent group ++ * on slot 2 ++ * - And, only when you actually have 2 or more coregroups - if you ++ * only have 1 coregroup, then having jobs for slot 2 implies they'd ++ * also be for slot 1, meaning you'll get interference from them. Jobs ++ * able to run on slot 2 could also block jobs that can only run on ++ * slot 1 (tiler jobs) + */ -+ kbase_pm_context_active(kctx->kbdev); -+ kbase_pm_request_gpu_cycle_counter(kctx->kbdev); ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) ++ return true; + -+ while (true) { -+ u32 new_count; ++ if (js != 2) ++ return true; + -+ new_count = kbase_reg_read(kctx->kbdev, -+ GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL); -+ /* First time around, just store the count. */ -+ if (base_count == 0) { -+ base_count = new_count; -+ continue; -+ } ++ /* Only deal with js==2 now: */ ++ if (kbdev->gpu_props.num_core_groups > 1) { ++ /* Only use slot 2 in the 2+ coregroup case */ ++ if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev, ++ KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) == ++ false) { ++ /* ...But only when we *don't* have atoms that run on ++ * all cores */ + -+ /* No need to handle wrapping, unsigned maths works for this. */ -+ if ((new_count - base_count) > 1000) -+ break; ++ /* No specific check for BASE_JD_REQ_COHERENT_GROUP ++ * atoms - the policy will sort that out */ ++ return true; ++ } + } + -+ kbase_pm_release_gpu_cycle_counter(kctx->kbdev); -+ kbase_pm_context_idle(kctx->kbdev); ++ /* Above checks failed mean we shouldn't use slot 2 */ ++ return false; +} -+#endif /* CONFIG_MALI_NO_MALI */ -diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h -new file mode 100644 -index 000000000..433aa4b9c ---- /dev/null -+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h -@@ -0,0 +1,52 @@ ++ +/* -+ * -+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ * As long as it has been decided to have a deeper modification of ++ * what job scheduler, power manager and affinity manager will ++ * implement, this function is just an intermediate step that ++ * assumes: ++ * - all working cores will be powered on when this is called. ++ * - largest current configuration is 2 core groups. ++ * - It has been decided not to have hardcoded values so the low ++ * and high cores in a core split will be evently distributed. ++ * - Odd combinations of core requirements have been filtered out ++ * and do not get to this function (e.g. CS+T+NSS is not ++ * supported here). ++ * - This function is frequently called and can be optimized, ++ * (see notes in loops), but as the functionallity will likely ++ * be modified, optimization has not been addressed. ++*/ ++bool kbase_js_choose_affinity(u64 * const affinity, ++ struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom, int js) ++{ ++ base_jd_core_req core_req = katom->core_req; ++ unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; ++ u64 core_availability_mask; + ++ lockdep_assert_held(&kbdev->hwaccess_lock); + ++ core_availability_mask = kbase_pm_ca_get_core_mask(kbdev); + -+#ifndef _KBASE_BACKEND_TIME_H_ -+#define _KBASE_BACKEND_TIME_H_ ++ /* ++ * If no cores are currently available (core availability policy is ++ * transitioning) then fail. ++ */ ++ if (0 == core_availability_mask) { ++ *affinity = 0; ++ return false; ++ } + -+/** -+ * kbase_backend_get_gpu_time() - Get current GPU time -+ * @kbdev: Device pointer -+ * @cycle_counter: Pointer to u64 to store cycle counter in -+ * @system_time: Pointer to u64 to store system time in -+ * @ts: Pointer to struct timespec64 to store current monotonic -+ * time in -+ */ -+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, -+ u64 *system_time, struct timespec64 *ts); ++ KBASE_DEBUG_ASSERT(js >= 0); + -+/** -+ * kbase_wait_write_flush() - Wait for GPU write flush -+ * @kctx: Context pointer -+ * -+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush -+ * its write buffer. -+ * -+ * If GPU resets occur then the counters are reset to zero, the delay may not be -+ * as expected. -+ * -+ * This function is only in use for BASE_HW_ISSUE_6367 -+ */ -+#ifdef CONFIG_MALI_NO_MALI -+static inline void kbase_wait_write_flush(struct kbase_context *kctx) -+{ -+} -+#else -+void kbase_wait_write_flush(struct kbase_context *kctx); -+#endif ++ if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == ++ BASE_JD_REQ_T) { ++ /* If the hardware supports XAFFINITY then we'll only enable ++ * the tiler (which is the default so this is a no-op), ++ * otherwise enable shader core 0. */ ++ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) ++ *affinity = 1; ++ else ++ *affinity = 0; + -+#endif /* _KBASE_BACKEND_TIME_H_ */ -diff --git a/drivers/gpu/arm/midgard/docs/Doxyfile b/drivers/gpu/arm/midgard/docs/Doxyfile -new file mode 100755 -index 000000000..35ff2f1ce ---- /dev/null -+++ b/drivers/gpu/arm/midgard/docs/Doxyfile -@@ -0,0 +1,126 @@ -+# -+# (C) COPYRIGHT 2011-2013, 2015 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++ return true; ++ } + ++ if (1 == kbdev->gpu_props.num_cores) { ++ /* trivial case only one core, nothing to do */ ++ *affinity = core_availability_mask & ++ kbdev->pm.debug_core_mask[js]; ++ } else { ++ if ((core_req & (BASE_JD_REQ_COHERENT_GROUP | ++ BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) { ++ if (js == 0 || num_core_groups == 1) { ++ /* js[0] and single-core-group systems just get ++ * the first core group */ ++ *affinity = ++ kbdev->gpu_props.props.coherency_info.group[0].core_mask ++ & core_availability_mask & ++ kbdev->pm.debug_core_mask[js]; ++ } else { ++ /* js[1], js[2] use core groups 0, 1 for ++ * dual-core-group systems */ ++ u32 core_group_idx = ((u32) js) - 1; + ++ KBASE_DEBUG_ASSERT(core_group_idx < ++ num_core_groups); ++ *affinity = ++ kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask ++ & core_availability_mask & ++ kbdev->pm.debug_core_mask[js]; + -+############################################################################## ++ /* If the job is specifically targeting core ++ * group 1 and the core availability policy is ++ * keeping that core group off, then fail */ ++ if (*affinity == 0 && core_group_idx == 1 && ++ kbdev->pm.backend.cg1_disabled ++ == true) ++ katom->event_code = ++ BASE_JD_EVENT_PM_EVENT; ++ } ++ } else { ++ /* All cores are available when no core split is ++ * required */ ++ *affinity = core_availability_mask & ++ kbdev->pm.debug_core_mask[js]; ++ } ++ } + -+# This file contains per-module Doxygen configuration. Please do not add -+# extra settings to this file without consulting all stakeholders, as they -+# may cause override project-wide settings. -+# -+# Additionally, when defining aliases, macros, sections etc, use the module -+# name as a prefix e.g. gles_my_alias. ++ /* ++ * If no cores are currently available in the desired core group(s) ++ * (core availability policy is transitioning) then fail. ++ */ ++ if (*affinity == 0) ++ return false; + -+############################################################################## ++ /* Enable core 0 if tiler required for hardware without XAFFINITY ++ * support (notes above) */ ++ if (core_req & BASE_JD_REQ_T) { ++ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) ++ *affinity = *affinity | 1; ++ } + -+@INCLUDE = ../../bldsys/Doxyfile_common ++ return true; ++} + -+# The INPUT tag can be used to specify the files and/or directories that contain -+# documented source files. You may enter file names like "myfile.cpp" or -+# directories like "/usr/src/myproject". Separate the files or directories -+# with spaces. ++static inline bool kbase_js_affinity_is_violating( ++ struct kbase_device *kbdev, ++ u64 *affinities) ++{ ++ /* This implementation checks whether the two slots involved in Generic ++ * thread creation have intersecting affinity. This is due to micro- ++ * architectural issues where a job in slot A targetting cores used by ++ * slot B could prevent the job in slot B from making progress until the ++ * job in slot A has completed. ++ */ ++ u64 affinity_set_left; ++ u64 affinity_set_right; ++ u64 intersection; + -+INPUT += ../../kernel/drivers/gpu/arm/midgard/ ++ KBASE_DEBUG_ASSERT(affinities != NULL); + -+############################################################################## -+# Everything below here is optional, and in most cases not required -+############################################################################## ++ affinity_set_left = affinities[1]; + -+# This tag can be used to specify a number of aliases that acts -+# as commands in the documentation. An alias has the form "name=value". -+# For example adding "sideeffect=\par Side Effects:\n" will allow you to -+# put the command \sideeffect (or @sideeffect) in the documentation, which -+# will result in a user-defined paragraph with heading "Side Effects:". -+# You can put \n's in the value part of an alias to insert newlines. ++ affinity_set_right = affinities[2]; + -+ALIASES += ++ /* A violation occurs when any bit in the left_set is also in the ++ * right_set */ ++ intersection = affinity_set_left & affinity_set_right; + -+# The ENABLED_SECTIONS tag can be used to enable conditional -+# documentation sections, marked by \if sectionname ... \endif. ++ return (bool) (intersection != (u64) 0u); ++} + -+ENABLED_SECTIONS += ++bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js, ++ u64 affinity) ++{ ++ struct kbasep_js_device_data *js_devdata; ++ u64 new_affinities[BASE_JM_MAX_NR_SLOTS]; + -+# If the value of the INPUT tag contains directories, you can use the -+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp -+# and *.h) to filter out the source-files in the directories. If left -+# blank the following patterns are tested: -+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx -+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); ++ js_devdata = &kbdev->js_data; + -+FILE_PATTERNS += ++ memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities, ++ sizeof(js_devdata->runpool_irq.slot_affinities)); + -+# The EXCLUDE tag can be used to specify files and/or directories that should -+# excluded from the INPUT source files. This way you can easily exclude a -+# subdirectory from a directory tree whose root is specified with the INPUT tag. -+EXCLUDE += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/pm_test_script.sh ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile ++ new_affinities[js] |= affinity; + ++ return kbase_js_affinity_is_violating(kbdev, new_affinities); ++} + -+# If the value of the INPUT tag contains directories, you can use the -+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude -+# certain files from those directories. Note that the wildcards are matched -+# against the file with absolute path, so to exclude all test directories -+# for example use the pattern */test/* ++void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js, ++ u64 affinity) ++{ ++ struct kbasep_js_device_data *js_devdata; ++ u64 cores; + -+EXCLUDE_PATTERNS += ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); ++ js_devdata = &kbdev->js_data; + -+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names -+# (namespaces, classes, functions, etc.) that should be excluded from the -+# output. The symbol name can be a fully qualified name, a word, or if the -+# wildcard * is used, a substring. Examples: ANamespace, AClass, -+# AClass::ANamespace, ANamespace::*Test ++ KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity) ++ == false); + -+EXCLUDE_SYMBOLS += ++ cores = affinity; ++ while (cores) { ++ int bitnum = fls64(cores) - 1; ++ u64 bit = 1ULL << bitnum; ++ s8 cnt; + -+# The EXAMPLE_PATH tag can be used to specify one or more files or -+# directories that contain example code fragments that are included (see -+# the \include command). ++ cnt = ++ ++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]); + -+EXAMPLE_PATH += ++ if (cnt == 1) ++ js_devdata->runpool_irq.slot_affinities[js] |= bit; + -+# The IMAGE_PATH tag can be used to specify one or more files or -+# directories that contain image that are included in the documentation (see -+# the \image command). ++ cores &= ~bit; ++ } ++} + -+IMAGE_PATH += ++void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js, ++ u64 affinity) ++{ ++ struct kbasep_js_device_data *js_devdata; ++ u64 cores; + -+# The INCLUDE_PATH tag can be used to specify one or more directories that -+# contain include files that are not input files but should be processed by -+# the preprocessor. ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); ++ js_devdata = &kbdev->js_data; + -+INCLUDE_PATH += ++ cores = affinity; ++ while (cores) { ++ int bitnum = fls64(cores) - 1; ++ u64 bit = 1ULL << bitnum; ++ s8 cnt; + -+# The PREDEFINED tag can be used to specify one or more macro names that -+# are defined before the preprocessor is started (similar to the -D option of -+# gcc). The argument of the tag is a list of macros of the form: name -+# or name=definition (no spaces). If the definition and the = are -+# omitted =1 is assumed. To prevent a macro definition from being -+# undefined via #undef or recursively expanded use the := operator -+# instead of the = operator. ++ KBASE_DEBUG_ASSERT( ++ js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0); + -+PREDEFINED += ++ cnt = ++ --(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]); + -+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then -+# this tag can be used to specify a list of macro names that should be expanded. -+# The macro definition that is found in the sources will be used. -+# Use the PREDEFINED tag if you want to use a different macro definition. ++ if (0 == cnt) ++ js_devdata->runpool_irq.slot_affinities[js] &= ~bit; + -+EXPAND_AS_DEFINED += ++ cores &= ~bit; ++ } ++} + -+# The DOTFILE_DIRS tag can be used to specify one or more directories that -+# contain dot files that are included in the documentation (see the -+# \dotfile command). ++#if KBASE_TRACE_ENABLE ++void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev) ++{ ++ struct kbasep_js_device_data *js_devdata; ++ int slot_nr; + -+DOTFILE_DIRS += ../../kernel/drivers/gpu/arm/midgard/docs ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ js_devdata = &kbdev->js_data; + -diff --git a/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot b/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot -new file mode 100755 -index 000000000..7ae05c2f8 ++ for (slot_nr = 0; slot_nr < 3; ++slot_nr) ++ KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL, ++ NULL, 0u, slot_nr, ++ (u32) js_devdata->runpool_irq.slot_affinities[slot_nr]); ++} ++#endif /* KBASE_TRACE_ENABLE */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h +new file mode 100644 +index 000000000..35d9781ae --- /dev/null -+++ b/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot -@@ -0,0 +1,112 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h +@@ -0,0 +1,129 @@ +/* + * -+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -347211,108 +349067,125 @@ index 000000000..7ae05c2f8 + + + -+digraph policy_objects_diagram { -+ rankdir=LR; -+ size="12,8"; -+ compound=true; -+ -+ node [ shape = box ]; -+ -+ subgraph cluster_policy_queues { -+ low_queue [ shape=record label = "LowP | {ctx_lo | ... | ctx_i | ... | ctx_hi}" ]; -+ queues_middle_sep [ label="" shape=plaintext width=0 height=0 ]; -+ -+ rt_queue [ shape=record label = "RT | {ctx_lo | ... | ctx_j | ... | ctx_hi}" ]; -+ -+ label = "Policy's Queue(s)"; -+ } -+ -+ call_enqueue [ shape=plaintext label="enqueue_ctx()" ]; -+ -+ { -+ rank=same; -+ ordering=out; -+ call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ]; -+ call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ]; -+ -+ call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ]; -+ } -+ -+ subgraph cluster_runpool { -+ -+ as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ]; -+ as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ]; -+ as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ]; -+ as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ]; -+ -+ label = "Policy's Run Pool"; -+ } -+ -+ { -+ rank=same; -+ call_jdequeue [ shape=plaintext label="dequeue_job()" ]; -+ sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ]; -+ } -+ -+ { -+ rank=same; -+ ordering=out; -+ sstop [ shape=ellipse label="SS-Timer expires" ] -+ jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ]; -+ -+ irq [ label="IRQ" shape=ellipse ]; -+ -+ job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ]; -+ } -+ -+ hstop [ shape=ellipse label="HS-Timer expires" ] -+ -+ /* -+ * Edges -+ */ -+ -+ call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ]; -+ -+ low_queue:qr -> call_dequeue:w; -+ rt_queue:qr -> call_dequeue:w; -+ -+ call_dequeue -> as1 [lhead=cluster_runpool]; -+ -+ as1->call_jdequeue [ltail=cluster_runpool]; -+ call_jdequeue->jobslots:0; -+ call_jdequeue->sstop_dotfixup [ arrowhead=none]; -+ sstop_dotfixup->sstop [label="Spawn SS-Timer"]; -+ sstop->jobslots [label="SoftStop"]; -+ sstop->hstop [label="Spawn HS-Timer"]; -+ hstop->jobslots:ne [label="HardStop"]; -+ -+ -+ as3->call_ctxfinish:ne [ ltail=cluster_runpool ]; -+ call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ]; ++/* ++ * Affinity Manager internal APIs. ++ */ + -+ call_ctxfinish->call_ctxdone [constraint=false]; ++#ifndef _KBASE_JS_AFFINITY_H_ ++#define _KBASE_JS_AFFINITY_H_ + -+ call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false]; ++/** ++ * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to ++ * submit a job to a particular job slot in the current status ++ * ++ * @kbdev: The kbase device structure of the device ++ * @js: Job slot number to check for allowance ++ * ++ * Will check if submitting to the given job slot is allowed in the current ++ * status. For example using job slot 2 while in soft-stoppable state and only ++ * having 1 coregroup is not allowed by the policy. This function should be ++ * called prior to submitting a job to a slot to make sure policy rules are not ++ * violated. ++ * ++ * The following locking conditions are made on the caller ++ * - it must hold hwaccess_lock ++ */ ++bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js); + ++/** ++ * kbase_js_choose_affinity - Compute affinity for a given job. ++ * ++ * @affinity: Affinity bitmap computed ++ * @kbdev: The kbase device structure of the device ++ * @katom: Job chain of which affinity is going to be found ++ * @js: Slot the job chain is being submitted ++ * ++ * Currently assumes an all-on/all-off power management policy. ++ * Also assumes there is at least one core with tiler available. ++ * ++ * Returns true if a valid affinity was chosen, false if ++ * no cores were available. ++ */ ++bool kbase_js_choose_affinity(u64 * const affinity, ++ struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom, ++ int js); + -+ { -+ jobslots->irq [constraint=false]; ++/** ++ * kbase_js_affinity_would_violate - Determine whether a proposed affinity on ++ * job slot @js would cause a violation of affinity restrictions. ++ * ++ * @kbdev: Kbase device structure ++ * @js: The job slot to test ++ * @affinity: The affinity mask to test ++ * ++ * The following locks must be held by the caller ++ * - hwaccess_lock ++ * ++ * Return: true if the affinity would violate the restrictions ++ */ ++bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js, ++ u64 affinity); + -+ irq->job_finish [constraint=false]; -+ } ++/** ++ * kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by ++ * a slot ++ * ++ * @kbdev: Kbase device structure ++ * @js: The job slot retaining the cores ++ * @affinity: The cores to retain ++ * ++ * The following locks must be held by the caller ++ * - hwaccess_lock ++ */ ++void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js, ++ u64 affinity); + -+ irq->as2 [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ]; ++/** ++ * kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used ++ * by a slot ++ * ++ * @kbdev: Kbase device structure ++ * @js: Job slot ++ * @affinity: Bit mask of core to be released ++ * ++ * Cores must be released as soon as a job is dequeued from a slot's 'submit ++ * slots', and before another job is submitted to those slots. Otherwise, the ++ * refcount could exceed the maximum number submittable to a slot, ++ * %BASE_JM_SUBMIT_SLOTS. ++ * ++ * The following locks must be held by the caller ++ * - hwaccess_lock ++ */ ++void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js, ++ u64 affinity); + ++/** ++ * kbase_js_debug_log_current_affinities - log the current affinities ++ * ++ * @kbdev: Kbase device structure ++ * ++ * Output to the Trace log the current tracked affinities on all slots ++ */ ++#if KBASE_TRACE_ENABLE ++void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev); ++#else /* KBASE_TRACE_ENABLE */ ++static inline void ++kbase_js_debug_log_current_affinities(struct kbase_device *kbdev) ++{ +} -diff --git a/drivers/gpu/arm/midgard/docs/policy_overview.dot b/drivers/gpu/arm/midgard/docs/policy_overview.dot -new file mode 100755 -index 000000000..159b993b7 ++#endif /* KBASE_TRACE_ENABLE */ ++ ++#endif /* _KBASE_JS_AFFINITY_H_ */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c +new file mode 100644 +index 000000000..a8c1af23a --- /dev/null -+++ b/drivers/gpu/arm/midgard/docs/policy_overview.dot -@@ -0,0 +1,63 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c +@@ -0,0 +1,356 @@ +/* + * -+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -347328,685 +349201,841 @@ index 000000000..159b993b7 + + + ++/* ++ * Register-based HW access backend specific job scheduler APIs ++ */ + -+digraph policy_objects_diagram { -+ rankdir=LR -+ size="6,6" -+ compound=true; ++#include ++#include ++#include ++#include + -+ node [ shape = box ]; ++/* ++ * Define for when dumping is enabled. ++ * This should not be based on the instrumentation level as whether dumping is ++ * enabled for a particular level is down to the integrator. However this is ++ * being used for now as otherwise the cinstr headers would be needed. ++ */ ++#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL) + -+ call_enqueue [ shape=plaintext label="enqueue ctx" ]; ++/* ++ * Hold the runpool_mutex for this ++ */ ++static inline bool timer_callback_should_run(struct kbase_device *kbdev) ++{ ++ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; ++ s8 nr_running_ctxs; + ++ lockdep_assert_held(&kbdev->js_data.runpool_mutex); + -+ policy_queue [ label="Policy's Queue" ]; ++ /* Timer must stop if we are suspending */ ++ if (backend->suspend_timer) ++ return false; + -+ { -+ rank=same; -+ runpool [ label="Policy's Run Pool" ]; ++ /* nr_contexts_pullable is updated with the runpool_mutex. However, the ++ * locking in the caller gives us a barrier that ensures ++ * nr_contexts_pullable is up-to-date for reading */ ++ nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable); + -+ ctx_finish [ label="ctx finished" ]; ++#ifdef CONFIG_MALI_DEBUG ++ if (kbdev->js_data.softstop_always) { ++ /* Debug support for allowing soft-stop on a single context */ ++ return true; + } ++#endif /* CONFIG_MALI_DEBUG */ + -+ { -+ rank=same; -+ jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ]; ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) { ++ /* Timeouts would have to be 4x longer (due to micro- ++ * architectural design) to support OpenCL conformance tests, so ++ * only run the timer when there's: ++ * - 2 or more CL contexts ++ * - 1 or more GLES contexts ++ * ++ * NOTE: We will treat a context that has both Compute and Non- ++ * Compute jobs will be treated as an OpenCL context (hence, we ++ * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE). ++ */ ++ { ++ s8 nr_compute_ctxs = ++ kbasep_js_ctx_attr_count_on_runpool(kbdev, ++ KBASEP_JS_CTX_ATTR_COMPUTE); ++ s8 nr_noncompute_ctxs = nr_running_ctxs - ++ nr_compute_ctxs; + -+ job_finish [ label="Job finished" ]; ++ return (bool) (nr_compute_ctxs >= 2 || ++ nr_noncompute_ctxs > 0); ++ } ++ } else { ++ /* Run the timer callback whenever you have at least 1 context ++ */ ++ return (bool) (nr_running_ctxs > 0); + } ++} + ++static enum hrtimer_restart timer_callback(struct hrtimer *timer) ++{ ++ unsigned long flags; ++ struct kbase_device *kbdev; ++ struct kbasep_js_device_data *js_devdata; ++ struct kbase_backend_data *backend; ++ int s; ++ bool reset_needed = false; + ++ KBASE_DEBUG_ASSERT(timer != NULL); + -+ /* -+ * Edges -+ */ -+ -+ call_enqueue -> policy_queue; -+ -+ policy_queue->runpool [label="dequeue ctx" weight=0.1]; -+ runpool->policy_queue [label="requeue ctx" weight=0.1]; ++ backend = container_of(timer, struct kbase_backend_data, ++ scheduling_timer); ++ kbdev = container_of(backend, struct kbase_device, hwaccess.backend); ++ js_devdata = &kbdev->js_data; + -+ runpool->ctx_finish [ style=dotted ]; ++ /* Loop through the slots */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) { ++ struct kbase_jd_atom *atom = NULL; + -+ runpool->jobslots [label="dequeue job" weight=0.1]; -+ jobslots->runpool [label="requeue job" weight=0.1]; ++ if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) { ++ atom = kbase_gpu_inspect(kbdev, s, 0); ++ KBASE_DEBUG_ASSERT(atom != NULL); ++ } + -+ jobslots->job_finish [ style=dotted ]; -+} -diff --git a/drivers/gpu/arm/midgard/ipa/Kbuild b/drivers/gpu/arm/midgard/ipa/Kbuild -new file mode 100755 -index 000000000..602b15f52 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/ipa/Kbuild -@@ -0,0 +1,24 @@ -+# -+# (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++ if (atom != NULL) { ++ /* The current version of the model doesn't support ++ * Soft-Stop */ ++ if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) { ++ u32 ticks = atom->ticks++; + ++#if !CINSTR_DUMPING_ENABLED ++ u32 soft_stop_ticks, hard_stop_ticks, ++ gpu_reset_ticks; ++ if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { ++ soft_stop_ticks = ++ js_devdata->soft_stop_ticks_cl; ++ hard_stop_ticks = ++ js_devdata->hard_stop_ticks_cl; ++ gpu_reset_ticks = ++ js_devdata->gpu_reset_ticks_cl; ++ } else { ++ soft_stop_ticks = ++ js_devdata->soft_stop_ticks; ++ hard_stop_ticks = ++ js_devdata->hard_stop_ticks_ss; ++ gpu_reset_ticks = ++ js_devdata->gpu_reset_ticks_ss; ++ } + -+midgard_kbase-y += \ -+ ipa/mali_kbase_ipa_simple.o \ -+ ipa/mali_kbase_ipa.o ++ /* If timeouts have been changed then ensure ++ * that atom tick count is not greater than the ++ * new soft_stop timeout. This ensures that ++ * atoms do not miss any of the timeouts due to ++ * races between this worker and the thread ++ * changing the timeouts. */ ++ if (backend->timeouts_updated && ++ ticks > soft_stop_ticks) ++ ticks = atom->ticks = soft_stop_ticks; + -+midgard_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o ++ /* Job is Soft-Stoppable */ ++ if (ticks == soft_stop_ticks) { ++ int disjoint_threshold = ++ KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD; ++ u32 softstop_flags = 0u; ++ /* Job has been scheduled for at least ++ * js_devdata->soft_stop_ticks ticks. ++ * Soft stop the slot so we can run ++ * other jobs. ++ */ ++ dev_dbg(kbdev->dev, "Soft-stop"); ++#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS ++ /* nr_user_contexts_running is updated ++ * with the runpool_mutex, but we can't ++ * take that here. ++ * ++ * However, if it's about to be ++ * increased then the new context can't ++ * run any jobs until they take the ++ * hwaccess_lock, so it's OK to observe ++ * the older value. ++ * ++ * Similarly, if it's about to be ++ * decreased, the last job from another ++ * context has already finished, so it's ++ * not too bad that we observe the older ++ * value and register a disjoint event ++ * when we try soft-stopping */ ++ if (js_devdata->nr_user_contexts_running ++ >= disjoint_threshold) ++ softstop_flags |= ++ JS_COMMAND_SW_CAUSES_DISJOINT; + -+ifneq ($(wildcard $(src)/ipa/mali_kbase_ipa_tmix.c),) -+ midgard_kbase-y += ipa/mali_kbase_ipa_tmix.o -+endif -diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c -new file mode 100644 -index 000000000..0c9111700 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c -@@ -0,0 +1,589 @@ -+/* -+ * -+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ kbase_job_slot_softstop_swflags(kbdev, ++ s, atom, softstop_flags); ++#endif ++ } else if (ticks == hard_stop_ticks) { ++ /* Job has been scheduled for at least ++ * js_devdata->hard_stop_ticks_ss ticks. ++ * It should have been soft-stopped by ++ * now. Hard stop the slot. ++ */ ++#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS ++ int ms = ++ js_devdata->scheduling_period_ns ++ / 1000000u; ++ dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", ++ (unsigned long)ticks, ++ (unsigned long)ms); ++ kbase_job_slot_hardstop(atom->kctx, s, ++ atom); ++#endif ++ } else if (ticks == gpu_reset_ticks) { ++ /* Job has been scheduled for at least ++ * js_devdata->gpu_reset_ticks_ss ticks. ++ * It should have left the GPU by now. ++ * Signal that the GPU needs to be ++ * reset. ++ */ ++ reset_needed = true; ++ } ++#else /* !CINSTR_DUMPING_ENABLED */ ++ /* NOTE: During CINSTR_DUMPING_ENABLED, we use ++ * the alternate timeouts, which makes the hard- ++ * stop and GPU reset timeout much longer. We ++ * also ensure that we don't soft-stop at all. ++ */ ++ if (ticks == js_devdata->soft_stop_ticks) { ++ /* Job has been scheduled for at least ++ * js_devdata->soft_stop_ticks. We do ++ * not soft-stop during ++ * CINSTR_DUMPING_ENABLED, however. ++ */ ++ dev_dbg(kbdev->dev, "Soft-stop"); ++ } else if (ticks == ++ js_devdata->hard_stop_ticks_dumping) { ++ /* Job has been scheduled for at least ++ * js_devdata->hard_stop_ticks_dumping ++ * ticks. Hard stop the slot. ++ */ ++#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS ++ int ms = ++ js_devdata->scheduling_period_ns ++ / 1000000u; ++ dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", ++ (unsigned long)ticks, ++ (unsigned long)ms); ++ kbase_job_slot_hardstop(atom->kctx, s, ++ atom); ++#endif ++ } else if (ticks == ++ js_devdata->gpu_reset_ticks_dumping) { ++ /* Job has been scheduled for at least ++ * js_devdata->gpu_reset_ticks_dumping ++ * ticks. It should have left the GPU by ++ * now. Signal that the GPU needs to be ++ * reset. ++ */ ++ reset_needed = true; ++ } ++#endif /* !CINSTR_DUMPING_ENABLED */ ++ } ++ } ++ } ++#if KBASE_GPU_RESET_EN ++ if (reset_needed) { ++ dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve."); + ++ if (kbase_prepare_to_reset_gpu_locked(kbdev)) ++ kbase_reset_gpu_locked(kbdev); ++ } ++#endif /* KBASE_GPU_RESET_EN */ ++ /* the timer is re-issued if there is contexts in the run-pool */ + -+#include -+#include -+#include -+#include "mali_kbase.h" -+#include "mali_kbase_ipa.h" -+#include "mali_kbase_ipa_debugfs.h" ++ if (backend->timer_running) ++ hrtimer_start(&backend->scheduling_timer, ++ HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), ++ HRTIMER_MODE_REL); + -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) -+#include -+#else -+#include -+#define dev_pm_opp_find_freq_exact opp_find_freq_exact -+#define dev_pm_opp_get_voltage opp_get_voltage -+#define dev_pm_opp opp -+#endif -+#include ++ backend->timeouts_updated = false; + -+#define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model" ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+static struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = { -+ &kbase_simple_ipa_model_ops, -+}; ++ return HRTIMER_NORESTART; ++} + -+int kbase_ipa_model_recalculate(struct kbase_ipa_model *model) ++void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) +{ -+ int err = 0; ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; ++ unsigned long flags; + -+ lockdep_assert_held(&model->kbdev->ipa.lock); ++ lockdep_assert_held(&js_devdata->runpool_mutex); + -+ if (model->ops->recalculate) { -+ err = model->ops->recalculate(model); -+ if (err) { -+ dev_err(model->kbdev->dev, -+ "recalculation of power model %s returned error %d\n", -+ model->ops->name, err); -+ } ++ if (!timer_callback_should_run(kbdev)) { ++ /* Take spinlock to force synchronisation with timer */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ backend->timer_running = false; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* From now on, return value of timer_callback_should_run() will ++ * also cause the timer to not requeue itself. Its return value ++ * cannot change, because it depends on variables updated with ++ * the runpool_mutex held, which the caller of this must also ++ * hold */ ++ hrtimer_cancel(&backend->scheduling_timer); + } + -+ return err; ++ if (timer_callback_should_run(kbdev) && !backend->timer_running) { ++ /* Take spinlock to force synchronisation with timer */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ backend->timer_running = true; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ hrtimer_start(&backend->scheduling_timer, ++ HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), ++ HRTIMER_MODE_REL); ++ ++ KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, ++ 0u); ++ } +} + -+static struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, -+ const char *name) ++int kbase_backend_timer_init(struct kbase_device *kbdev) +{ -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) { -+ struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i]; ++ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + -+ if (!strcmp(ops->name, name)) -+ return ops; -+ } ++ hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_REL); ++ backend->scheduling_timer.function = timer_callback; + -+ dev_err(kbdev->dev, "power model \'%s\' not found\n", name); ++ backend->timer_running = false; + -+ return NULL; ++ return 0; +} + -+void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev) ++void kbase_backend_timer_term(struct kbase_device *kbdev) +{ -+ atomic_set(&kbdev->ipa_use_configured_model, false); ++ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; ++ ++ hrtimer_cancel(&backend->scheduling_timer); +} + -+void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev) ++void kbase_backend_timer_suspend(struct kbase_device *kbdev) +{ -+ atomic_set(&kbdev->ipa_use_configured_model, true); ++ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; ++ ++ backend->suspend_timer = true; ++ ++ kbase_backend_ctx_count_changed(kbdev); +} + -+const char *kbase_ipa_model_name_from_id(u32 gpu_id) ++void kbase_backend_timer_resume(struct kbase_device *kbdev) +{ -+ const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> -+ GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + -+ if (GPU_ID_IS_NEW_FORMAT(prod_id)) { -+ switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { -+ case GPU_ID2_PRODUCT_TMIX: -+ return KBASE_IPA_FALLBACK_MODEL_NAME; -+ default: -+ return KBASE_IPA_FALLBACK_MODEL_NAME; -+ } -+ } ++ backend->suspend_timer = false; + -+ return KBASE_IPA_FALLBACK_MODEL_NAME; ++ kbase_backend_ctx_count_changed(kbdev); +} + -+static struct device_node *get_model_dt_node(struct kbase_ipa_model *model) ++void kbase_backend_timeouts_changed(struct kbase_device *kbdev) +{ -+ struct device_node *model_dt_node; -+ char compat_string[64]; ++ struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + -+ snprintf(compat_string, sizeof(compat_string), "arm,%s", -+ model->ops->name); ++ backend->timeouts_updated = true; ++} + -+ model_dt_node = of_find_compatible_node(model->kbdev->dev->of_node, -+ NULL, compat_string); -+ if (!model_dt_node && !model->missing_dt_node_warning) { -+ dev_warn(model->kbdev->dev, -+ "Couldn't find power_model DT node matching \'%s\'\n", -+ compat_string); -+ model->missing_dt_node_warning = true; -+ } +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h +new file mode 100644 +index 000000000..3f53779c6 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h +@@ -0,0 +1,69 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ return model_dt_node; -+} + -+int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, -+ const char *name, s32 *addr, -+ size_t num_elems, bool dt_required) -+{ -+ int err, i; -+ struct device_node *model_dt_node = get_model_dt_node(model); -+ char *origin; + -+ err = of_property_read_u32_array(model_dt_node, name, addr, num_elems); + -+ if (err && dt_required) { -+ memset(addr, 0, sizeof(s32) * num_elems); -+ dev_warn(model->kbdev->dev, -+ "Error %d, no DT entry: %s.%s = %zu*[0]\n", -+ err, model->ops->name, name, num_elems); -+ origin = "zero"; -+ } else if (err && !dt_required) { -+ origin = "default"; -+ } else /* !err */ { -+ origin = "DT"; -+ } ++/* ++ * Register-based HW access backend specific job scheduler APIs ++ */ + -+ /* Create a unique debugfs entry for each element */ -+ for (i = 0; i < num_elems; ++i) { -+ char elem_name[32]; ++#ifndef _KBASE_JS_BACKEND_H_ ++#define _KBASE_JS_BACKEND_H_ + -+ if (num_elems == 1) -+ snprintf(elem_name, sizeof(elem_name), "%s", name); -+ else -+ snprintf(elem_name, sizeof(elem_name), "%s.%d", -+ name, i); ++/** ++ * kbase_backend_timer_init() - Initialise the JS scheduling timer ++ * @kbdev: Device pointer ++ * ++ * This function should be called at driver initialisation ++ * ++ * Return: 0 on success ++ */ ++int kbase_backend_timer_init(struct kbase_device *kbdev); + -+ dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n", -+ model->ops->name, elem_name, addr[i], origin); ++/** ++ * kbase_backend_timer_term() - Terminate the JS scheduling timer ++ * @kbdev: Device pointer ++ * ++ * This function should be called at driver termination ++ */ ++void kbase_backend_timer_term(struct kbase_device *kbdev); + -+ err = kbase_ipa_model_param_add(model, elem_name, -+ &addr[i], sizeof(s32), -+ PARAM_TYPE_S32); -+ if (err) -+ goto exit; -+ } -+exit: -+ return err; -+} ++/** ++ * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling ++ * timer ++ * @kbdev: Device pointer ++ * ++ * This function should be called on suspend, after the active count has reached ++ * zero. This is required as the timer may have been started on job submission ++ * to the job scheduler, but before jobs are submitted to the GPU. ++ * ++ * Caller must hold runpool_mutex. ++ */ ++void kbase_backend_timer_suspend(struct kbase_device *kbdev); + -+int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, -+ const char *name, char *addr, -+ size_t size, bool dt_required) -+{ -+ int err; -+ struct device_node *model_dt_node = get_model_dt_node(model); -+ const char *string_prop_value; -+ char *origin; ++/** ++ * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS ++ * scheduling timer ++ * @kbdev: Device pointer ++ * ++ * This function should be called on resume. Note that is is not guaranteed to ++ * re-start the timer, only evalute whether it should be re-started. ++ * ++ * Caller must hold runpool_mutex. ++ */ ++void kbase_backend_timer_resume(struct kbase_device *kbdev); + -+ err = of_property_read_string(model_dt_node, name, -+ &string_prop_value); -+ if (err && dt_required) { -+ strncpy(addr, "", size - 1); -+ dev_warn(model->kbdev->dev, -+ "Error %d, no DT entry: %s.%s = \'%s\'\n", -+ err, model->ops->name, name, addr); -+ err = 0; -+ origin = "zero"; -+ } else if (err && !dt_required) { -+ origin = "default"; -+ } else /* !err */ { -+ strncpy(addr, string_prop_value, size - 1); -+ origin = "DT"; -+ } ++#endif /* _KBASE_JS_BACKEND_H_ */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c +new file mode 100644 +index 000000000..ba826184d +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c +@@ -0,0 +1,407 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ addr[size - 1] = '\0'; + -+ dev_dbg(model->kbdev->dev, "%s.%s = \'%s\' (%s)\n", -+ model->ops->name, name, string_prop_value, origin); ++/* #define ENABLE_DEBUG_LOG */ ++#include "../../platform/rk/custom_log.h" + -+ err = kbase_ipa_model_param_add(model, name, addr, size, -+ PARAM_TYPE_STRING); ++#include + -+ return err; -+} ++#include ++#include ++#include ++#include ++#include ++#include + -+void kbase_ipa_term_model(struct kbase_ipa_model *model) ++static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn, ++ u32 num_pages) +{ -+ if (!model) -+ return; ++ u64 region; + -+ lockdep_assert_held(&model->kbdev->ipa.lock); ++ /* can't lock a zero sized range */ ++ KBASE_DEBUG_ASSERT(num_pages); + -+ if (model->ops->term) -+ model->ops->term(model); ++ region = pfn << PAGE_SHIFT; ++ /* ++ * fls returns (given the ASSERT above): ++ * 1 .. 32 ++ * ++ * 10 + fls(num_pages) ++ * results in the range (11 .. 42) ++ */ + -+ kbase_ipa_model_param_free_all(model); ++ /* gracefully handle num_pages being zero */ ++ if (0 == num_pages) { ++ region |= 11; ++ } else { ++ u8 region_width; + -+ kfree(model); ++ region_width = 10 + fls(num_pages); ++ if (num_pages != (1ul << (region_width - 11))) { ++ /* not pow2, so must go up to the next pow2 */ ++ region_width += 1; ++ } ++ KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE); ++ KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE); ++ region |= region_width; ++ } ++ ++ return region; +} -+KBASE_EXPORT_TEST_API(kbase_ipa_term_model); + -+struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, -+ struct kbase_ipa_model_ops *ops) ++static int wait_ready(struct kbase_device *kbdev, ++ unsigned int as_nr, struct kbase_context *kctx) +{ -+ struct kbase_ipa_model *model; -+ int err; -+ -+ lockdep_assert_held(&kbdev->ipa.lock); -+ -+ if (!ops || !ops->name) -+ return NULL; -+ -+ model = kzalloc(sizeof(struct kbase_ipa_model), GFP_KERNEL); -+ if (!model) -+ return NULL; ++ unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; ++ u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx); + -+ model->kbdev = kbdev; -+ model->ops = ops; -+ INIT_LIST_HEAD(&model->params); ++ /* Wait for the MMU status to indicate there is no active command, in ++ * case one is pending. Do not log remaining register accesses. */ ++ while (--max_loops && (val & AS_STATUS_AS_ACTIVE)) ++ val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL); + -+ err = model->ops->init(model); -+ if (err) { -+ dev_err(kbdev->dev, -+ "init of power model \'%s\' returned error %d\n", -+ ops->name, err); -+ goto term_model; ++ if (max_loops == 0) { ++ dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n"); ++ return -1; + } + -+ err = kbase_ipa_model_recalculate(model); -+ if (err) -+ goto term_model; -+ -+ return model; ++ /* If waiting in loop was performed, log last read value. */ ++ if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops) ++ kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx); + -+term_model: -+ kbase_ipa_term_model(model); -+ return NULL; ++ return 0; +} -+KBASE_EXPORT_TEST_API(kbase_ipa_init_model); + -+static void kbase_ipa_term_locked(struct kbase_device *kbdev) ++static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd, ++ struct kbase_context *kctx) +{ -+ lockdep_assert_held(&kbdev->ipa.lock); ++ int status; + -+ /* Clean up the models */ -+ if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model) -+ kbase_ipa_term_model(kbdev->ipa.configured_model); -+ kbase_ipa_term_model(kbdev->ipa.fallback_model); ++ /* write AS_COMMAND when MMU is ready to accept another command */ ++ status = wait_ready(kbdev, as_nr, kctx); ++ if (status == 0) ++ kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd, ++ kctx); + -+ kbdev->ipa.configured_model = NULL; -+ kbdev->ipa.fallback_model = NULL; ++ return status; +} + -+int kbase_ipa_init(struct kbase_device *kbdev) ++static void validate_protected_page_fault(struct kbase_device *kbdev, ++ struct kbase_context *kctx) +{ ++ /* GPUs which support (native) protected mode shall not report page ++ * fault addresses unless it has protected debug mode and protected ++ * debug mode is turned on */ ++ u32 protected_debug_mode = 0; + -+ const char *model_name; -+ struct kbase_ipa_model_ops *ops; -+ struct kbase_ipa_model *default_model = NULL; -+ int err; -+ -+ mutex_init(&kbdev->ipa.lock); -+ /* -+ * Lock during init to avoid warnings from lockdep_assert_held (there -+ * shouldn't be any concurrent access yet). -+ */ -+ mutex_lock(&kbdev->ipa.lock); -+ -+ /* The simple IPA model must *always* be present.*/ -+ ops = kbase_ipa_model_ops_find(kbdev, KBASE_IPA_FALLBACK_MODEL_NAME); ++ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) ++ return; + -+ if (!ops->do_utilization_scaling_in_framework) { -+ dev_err(kbdev->dev, -+ "Fallback IPA model %s should not account for utilization\n", -+ ops->name); -+ err = -EINVAL; -+ goto end; ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { ++ protected_debug_mode = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_STATUS), ++ kctx) & GPU_DBGEN; + } + -+ default_model = kbase_ipa_init_model(kbdev, ops); -+ if (!default_model) { -+ err = -EINVAL; -+ goto end; ++ if (!protected_debug_mode) { ++ /* fault_addr should never be reported in protected mode. ++ * However, we just continue by printing an error message */ ++ dev_err(kbdev->dev, "Fault address reported in protected mode\n"); + } ++} + -+ kbdev->ipa.fallback_model = default_model; -+ err = of_property_read_string(kbdev->dev->of_node, -+ "ipa-model", -+ &model_name); -+ if (err) { -+ /* Attempt to load a match from GPU-ID */ -+ u32 gpu_id; ++void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) ++{ ++ const int num_as = 16; ++ const int busfault_shift = MMU_PAGE_FAULT_FLAGS; ++ const int pf_shift = 0; ++ const unsigned long as_bit_mask = (1UL << num_as) - 1; ++ unsigned long flags; ++ u32 new_mask; ++ u32 tmp; + -+ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; -+ model_name = kbase_ipa_model_name_from_id(gpu_id); -+ dev_dbg(kbdev->dev, -+ "Inferring model from GPU ID 0x%x: \'%s\'\n", -+ gpu_id, model_name); -+ } else { -+ dev_dbg(kbdev->dev, -+ "Using ipa-model parameter from DT: \'%s\'\n", -+ model_name); -+ } ++ /* bus faults */ ++ u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask; ++ /* page faults (note: Ignore ASes with both pf and bf) */ ++ u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits; + -+ if (strcmp(KBASE_IPA_FALLBACK_MODEL_NAME, model_name) != 0) { -+ ops = kbase_ipa_model_ops_find(kbdev, model_name); -+ kbdev->ipa.configured_model = kbase_ipa_init_model(kbdev, ops); -+ if (!kbdev->ipa.configured_model) { -+ err = -EINVAL; -+ goto end; -+ } -+ } else { -+ kbdev->ipa.configured_model = default_model; -+ err = 0; -+ } ++ KBASE_DEBUG_ASSERT(NULL != kbdev); + -+ kbase_ipa_model_use_configured_locked(kbdev); ++ /* remember current mask */ ++ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); ++ new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL); ++ /* mask interrupts for now */ ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL); ++ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); + -+end: -+ if (err) -+ kbase_ipa_term_locked(kbdev); -+ else -+ dev_info(kbdev->dev, -+ "Using configured power model %s, and fallback %s\n", -+ kbdev->ipa.configured_model->ops->name, -+ kbdev->ipa.fallback_model->ops->name); ++ while (bf_bits | pf_bits) { ++ struct kbase_as *as; ++ int as_no; ++ struct kbase_context *kctx; + -+ mutex_unlock(&kbdev->ipa.lock); -+ return err; -+} -+KBASE_EXPORT_TEST_API(kbase_ipa_init); ++ /* ++ * the while logic ensures we have a bit set, no need to check ++ * for not-found here ++ */ ++ as_no = ffs(bf_bits | pf_bits) - 1; ++ as = &kbdev->as[as_no]; + -+void kbase_ipa_term(struct kbase_device *kbdev) -+{ -+ mutex_lock(&kbdev->ipa.lock); -+ kbase_ipa_term_locked(kbdev); -+ mutex_unlock(&kbdev->ipa.lock); -+} -+KBASE_EXPORT_TEST_API(kbase_ipa_term); ++ /* ++ * Refcount the kctx ASAP - it shouldn't disappear anyway, since ++ * Bus/Page faults _should_ only occur whilst jobs are running, ++ * and a job causing the Bus/Page fault shouldn't complete until ++ * the MMU is updated ++ */ ++ kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no); ++ if (!kctx) { ++ E("fail to lookup ctx, to break out."); ++ break; ++ } + -+/** -+ * kbase_scale_dynamic_power() - Scale a dynamic power coefficient to an OPP -+ * @c: Dynamic model coefficient, in pW/(Hz V^2). Should be in range -+ * 0 < c < 2^26 to prevent overflow. -+ * @freq: Frequency, in Hz. Range: 2^23 < freq < 2^30 (~8MHz to ~1GHz) -+ * @voltage: Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V) -+ * -+ * Keep a record of the approximate range of each value at every stage of the -+ * calculation, to ensure we don't overflow. This makes heavy use of the -+ * approximations 1000 = 2^10 and 1000000 = 2^20, but does the actual -+ * calculations in decimal for increased accuracy. -+ * -+ * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W) -+ */ -+static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq, -+ const u32 voltage) -+{ -+ /* Range: 2^8 < v2 < 2^16 m(V^2) */ -+ const u32 v2 = (voltage * voltage) / 1000; + -+ /* Range: 2^3 < f_MHz < 2^10 MHz */ -+ const u32 f_MHz = freq / 1000000; ++ /* find faulting address */ ++ as->fault_addr = kbase_reg_read(kbdev, ++ MMU_AS_REG(as_no, ++ AS_FAULTADDRESS_HI), ++ kctx); ++ as->fault_addr <<= 32; ++ as->fault_addr |= kbase_reg_read(kbdev, ++ MMU_AS_REG(as_no, ++ AS_FAULTADDRESS_LO), ++ kctx); + -+ /* Range: 2^11 < v2f_big < 2^26 kHz V^2 */ -+ const u32 v2f_big = v2 * f_MHz; ++ /* Mark the fault protected or not */ ++ as->protected_mode = kbdev->protected_mode; + -+ /* Range: 2^1 < v2f < 2^16 MHz V^2 */ -+ const u32 v2f = v2f_big / 1000; ++ if (kbdev->protected_mode && as->fault_addr) ++ { ++ /* check if address reporting is allowed */ ++ validate_protected_page_fault(kbdev, kctx); ++ } + -+ /* Range (working backwards from next line): 0 < v2fc < 2^23 uW. -+ * Must be < 2^42 to avoid overflowing the return value. */ -+ const u64 v2fc = (u64) c * (u64) v2f; -+ u32 remainder; ++ /* report the fault to debugfs */ ++ kbase_as_fault_debugfs_new(kbdev, as_no); + -+ /* Range: 0 < v2fc / 1000 < 2^13 mW */ -+ // static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) -+ return div_u64_rem(v2fc, 1000, &remainder); -+} ++ /* record the fault status */ ++ as->fault_status = kbase_reg_read(kbdev, ++ MMU_AS_REG(as_no, ++ AS_FAULTSTATUS), ++ kctx); + -+/** -+ * kbase_scale_static_power() - Scale a static power coefficient to an OPP -+ * @c: Static model coefficient, in uW/V^3. Should be in range -+ * 0 < c < 2^32 to prevent overflow. -+ * @voltage: Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V) -+ * -+ * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W) -+ */ -+u32 kbase_scale_static_power(const u32 c, const u32 voltage) -+{ -+ /* Range: 2^8 < v2 < 2^16 m(V^2) */ -+ const u32 v2 = (voltage * voltage) / 1000; ++ /* find the fault type */ ++ as->fault_type = (bf_bits & (1 << as_no)) ? ++ KBASE_MMU_FAULT_TYPE_BUS : ++ KBASE_MMU_FAULT_TYPE_PAGE; + -+ /* Range: 2^17 < v3_big < 2^29 m(V^2) mV */ -+ const u32 v3_big = v2 * voltage; ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { ++ as->fault_extra_addr = kbase_reg_read(kbdev, ++ MMU_AS_REG(as_no, AS_FAULTEXTRA_HI), ++ kctx); ++ as->fault_extra_addr <<= 32; ++ as->fault_extra_addr |= kbase_reg_read(kbdev, ++ MMU_AS_REG(as_no, AS_FAULTEXTRA_LO), ++ kctx); ++ } + -+ /* Range: 2^7 < v3 < 2^19 m(V^3) */ -+ const u32 v3 = v3_big / 1000; ++ if (kbase_as_has_bus_fault(as)) { ++ /* Mark bus fault as handled. ++ * Note that a bus fault is processed first in case ++ * where both a bus fault and page fault occur. ++ */ ++ bf_bits &= ~(1UL << as_no); + -+ /* -+ * Range (working backwards from next line): 0 < v3c_big < 2^33 nW. -+ * The result should be < 2^52 to avoid overflowing the return value. -+ */ -+ const u64 v3c_big = (u64) c * (u64) v3; -+ u32 remainder; ++ /* remove the queued BF (and PF) from the mask */ ++ new_mask &= ~(MMU_BUS_ERROR(as_no) | ++ MMU_PAGE_FAULT(as_no)); ++ } else { ++ /* Mark page fault as handled */ ++ pf_bits &= ~(1UL << as_no); + -+ /* Range: 0 < v3c_big / 1000000 < 2^13 mW */ -+ // return v3c_big / 1000000; -+ return div_u64_rem(v3c_big, 1000000, &remainder); -+} ++ /* remove the queued PF from the mask */ ++ new_mask &= ~MMU_PAGE_FAULT(as_no); ++ } + -+static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->ipa.lock); ++ /* Process the interrupt for this address space */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_mmu_interrupt_process(kbdev, kctx, as); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } + -+ if (atomic_read(&kbdev->ipa_use_configured_model)) -+ return kbdev->ipa.configured_model; -+ else -+ return kbdev->ipa.fallback_model; ++ /* reenable interrupts */ ++ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); ++ tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL); ++ new_mask |= tmp; ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL); ++ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); +} + -+static u32 get_static_power_locked(struct kbase_device *kbdev, -+ struct kbase_ipa_model *model, -+ unsigned long voltage) ++void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as, ++ struct kbase_context *kctx) +{ -+ u32 power = 0; -+ int err; -+ u32 power_coeff; -+ -+ lockdep_assert_held(&model->kbdev->ipa.lock); -+ -+ if (!model->ops->get_static_coeff) -+ model = kbdev->ipa.fallback_model; ++ struct kbase_mmu_setup *current_setup = &as->current_setup; ++ u32 transcfg = 0; + -+ if (model->ops->get_static_coeff) { -+ err = model->ops->get_static_coeff(model, &power_coeff); -+ if (!err) -+ power = kbase_scale_static_power(power_coeff, -+ (u32) voltage); -+ } ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { ++ transcfg = current_setup->transcfg & 0xFFFFFFFFUL; + -+ return power; -+} ++ /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */ ++ /* Clear PTW_MEMATTR bits */ ++ transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; ++ /* Enable correct PTW_MEMATTR bits */ ++ transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; + -+__maybe_unused -+#ifdef CONFIG_MALI_PWRSOFT_765 -+static unsigned long kbase_get_static_power(struct devfreq *df, -+ unsigned long voltage) -+#else -+static unsigned long kbase_get_static_power(unsigned long voltage) -+#endif -+{ -+ struct kbase_ipa_model *model; -+ u32 power = 0; -+#ifdef CONFIG_MALI_PWRSOFT_765 -+ struct kbase_device *kbdev = dev_get_drvdata(&df->dev); -+#else -+ struct kbase_device *kbdev = kbase_find_device(-1); -+#endif ++ if (kbdev->system_coherency == COHERENCY_ACE) { ++ /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */ ++ /* Clear PTW_SH bits */ ++ transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); ++ /* Enable correct PTW_SH bits */ ++ transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); ++ } + -+ mutex_lock(&kbdev->ipa.lock); ++ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), ++ transcfg, kctx); ++ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), ++ (current_setup->transcfg >> 32) & 0xFFFFFFFFUL, ++ kctx); ++ } else { ++ if (kbdev->system_coherency == COHERENCY_ACE) ++ current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER; ++ } + -+ model = get_current_model(kbdev); -+ power = get_static_power_locked(kbdev, model, voltage); ++ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), ++ current_setup->transtab & 0xFFFFFFFFUL, kctx); ++ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI), ++ (current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx); + -+ mutex_unlock(&kbdev->ipa.lock); ++ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO), ++ current_setup->memattr & 0xFFFFFFFFUL, kctx); ++ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI), ++ (current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx); + -+#ifndef CONFIG_MALI_PWRSOFT_765 -+ kbase_release_device(kbdev); -+#endif ++ KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as, ++ current_setup->transtab, ++ current_setup->memattr, ++ transcfg); + -+ return power; ++ write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx); +} + -+__maybe_unused -+#ifdef CONFIG_MALI_PWRSOFT_765 -+static unsigned long kbase_get_dynamic_power(struct devfreq *df, -+ unsigned long freq, -+ unsigned long voltage) -+#else -+static unsigned long kbase_get_dynamic_power(unsigned long freq, -+ unsigned long voltage) -+#endif ++int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, ++ struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op, ++ unsigned int handling_irq) +{ -+ struct kbase_ipa_model *model; -+ u32 power_coeff = 0, power = 0; -+ int err = 0; -+#ifdef CONFIG_MALI_PWRSOFT_765 -+ struct kbase_device *kbdev = dev_get_drvdata(&df->dev); -+#else -+ struct kbase_device *kbdev = kbase_find_device(-1); -+#endif ++ int ret; + -+ mutex_lock(&kbdev->ipa.lock); ++ lockdep_assert_held(&kbdev->mmu_hw_mutex); + -+ model = kbdev->ipa.fallback_model; ++ if (op == AS_COMMAND_UNLOCK) { ++ /* Unlock doesn't require a lock first */ ++ ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx); ++ } else { ++ u64 lock_addr = lock_region(kbdev, vpfn, nr); + -+ err = model->ops->get_dynamic_coeff(model, &power_coeff, freq); ++ /* Lock the region that needs to be updated */ ++ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO), ++ lock_addr & 0xFFFFFFFFUL, kctx); ++ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI), ++ (lock_addr >> 32) & 0xFFFFFFFFUL, kctx); ++ write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx); + -+ if (!err) -+ power = kbase_scale_dynamic_power(power_coeff, freq, voltage); -+ else -+ dev_err_ratelimited(kbdev->dev, -+ "Model %s returned error code %d\n", -+ model->ops->name, err); ++ /* Run the MMU operation */ ++ write_cmd(kbdev, as->number, op, kctx); + -+ mutex_unlock(&kbdev->ipa.lock); ++ /* Wait for the flush to complete */ ++ ret = wait_ready(kbdev, as->number, kctx); + -+#ifndef CONFIG_MALI_PWRSOFT_765 -+ kbase_release_device(kbdev); -+#endif ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) { ++ /* Issue an UNLOCK command to ensure that valid page ++ tables are re-read by the GPU after an update. ++ Note that, the FLUSH command should perform all the ++ actions necessary, however the bus logs show that if ++ multiple page faults occur within an 8 page region ++ the MMU does not always re-read the updated page ++ table entries for later faults or is only partially ++ read, it subsequently raises the page fault IRQ for ++ the same addresses, the unlock ensures that the MMU ++ cache is flushed, so updates can be re-read. As the ++ region is now unlocked we need to issue 2 UNLOCK ++ commands in order to flush the MMU/uTLB, ++ see PRLAM-8812. ++ */ ++ write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx); ++ write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx); ++ } ++ } + -+ return power; ++ return ret; +} + -+int kbase_get_real_power(struct devfreq *df, u32 *power, -+ unsigned long freq, -+ unsigned long voltage) ++void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, ++ struct kbase_context *kctx, enum kbase_mmu_fault_type type) +{ -+ struct kbase_ipa_model *model; -+ u32 power_coeff = 0; -+ int err = 0; -+ struct kbase_device *kbdev = dev_get_drvdata(&df->dev); ++ unsigned long flags; ++ u32 pf_bf_mask; + -+ mutex_lock(&kbdev->ipa.lock); ++ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + -+ model = get_current_model(kbdev); ++ /* ++ * A reset is in-flight and we're flushing the IRQ + bottom half ++ * so don't update anything as it could race with the reset code. ++ */ ++ if (kbdev->irq_reset_flush) ++ goto unlock; + -+ err = model->ops->get_dynamic_coeff(model, &power_coeff, freq); ++ /* Clear the page (and bus fault IRQ as well in case one occurred) */ ++ pf_bf_mask = MMU_PAGE_FAULT(as->number); ++ if (type == KBASE_MMU_FAULT_TYPE_BUS || ++ type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) ++ pf_bf_mask |= MMU_BUS_ERROR(as->number); + -+ /* If we switch to protected model between get_current_model() and -+ * get_dynamic_coeff(), counter reading could fail. If that happens -+ * (unlikely, but possible), revert to the fallback model. */ -+ if (err && model != kbdev->ipa.fallback_model) { -+ model = kbdev->ipa.fallback_model; -+ err = model->ops->get_dynamic_coeff(model, &power_coeff, freq); -+ } ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx); + -+ if (err) -+ goto exit_unlock; ++unlock: ++ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); ++} + -+ *power = kbase_scale_dynamic_power(power_coeff, freq, voltage); ++void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, ++ struct kbase_context *kctx, enum kbase_mmu_fault_type type) ++{ ++ unsigned long flags; ++ u32 irq_mask; + -+ if (model->ops->do_utilization_scaling_in_framework) { -+ struct devfreq_dev_status *status = &df->last_status; -+ unsigned long total_time = max(status->total_time, 1ul); -+ u64 busy_time = min(status->busy_time, total_time); -+ u32 remainder; ++ /* Enable the page fault IRQ (and bus fault IRQ as well in case one ++ * occurred) */ ++ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + -+ // *power = ((u64) *power * (u64) busy_time) / total_time; -+ *power = div_u64_rem(((u64) *power * (u64) busy_time), total_time, &remainder); -+ } ++ /* ++ * A reset is in-flight and we're flushing the IRQ + bottom half ++ * so don't update anything as it could race with the reset code. ++ */ ++ if (kbdev->irq_reset_flush) ++ goto unlock; + -+ *power += get_static_power_locked(kbdev, model, voltage); ++ irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) | ++ MMU_PAGE_FAULT(as->number); + -+exit_unlock: -+ mutex_unlock(&kbdev->ipa.lock); ++ if (type == KBASE_MMU_FAULT_TYPE_BUS || ++ type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) ++ irq_mask |= MMU_BUS_ERROR(as->number); + -+ return err; -+} -+KBASE_EXPORT_TEST_API(kbase_get_real_power); ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx); + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) -+struct devfreq_cooling_ops kbase_ipa_power_model_ops = { -+#else -+struct devfreq_cooling_power kbase_ipa_power_model_ops = { -+#endif -+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) -+ .get_static_power = &kbase_get_static_power, -+ .get_dynamic_power = &kbase_get_dynamic_power, -+#endif -+}; -+KBASE_EXPORT_TEST_API(kbase_ipa_power_model_ops); -diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h ++unlock: ++ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); ++} +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h new file mode 100644 -index 000000000..b2d3db149 +index 000000000..c02253c6a --- /dev/null -+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h -@@ -0,0 +1,148 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h +@@ -0,0 +1,42 @@ +/* + * -+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -348021,146 +350050,110 @@ index 000000000..b2d3db149 + + + -+#ifndef _KBASE_IPA_H_ -+#define _KBASE_IPA_H_ -+ -+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) ++/* ++ * Interface file for the direct implementation for MMU hardware access ++ * ++ * Direct MMU hardware interface ++ * ++ * This module provides the interface(s) that are required by the direct ++ * register access implementation of the MMU hardware interface ++ */ + -+struct devfreq; ++#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_ ++#define _MALI_KBASE_MMU_HW_DIRECT_H_ + -+struct kbase_ipa_model { -+ struct list_head link; -+ struct kbase_device *kbdev; -+ void *model_data; -+ struct kbase_ipa_model_ops *ops; -+ struct list_head params; -+ bool missing_dt_node_warning; -+}; ++#include + +/** -+ * kbase_ipa_model_add_param_s32 - Add an integer model parameter -+ * @model: pointer to IPA model -+ * @name: name of corresponding debugfs entry -+ * @addr: address where the value is stored -+ * @num_elems: number of elements (1 if not an array) -+ * @dt_required: if false, a corresponding devicetree entry is not required, -+ * and the current value will be used. If true, a warning is -+ * output and the data is zeroed ++ * kbase_mmu_interrupt - Process an MMU interrupt. + * -+ * Return: 0 on success, or an error code ++ * Process the MMU interrupt that was reported by the &kbase_device. ++ * ++ * @kbdev: kbase context to clear the fault from. ++ * @irq_stat: Value of the MMU_IRQ_STATUS register + */ -+int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, -+ const char *name, s32 *addr, -+ size_t num_elems, bool dt_required); ++void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); + -+/** -+ * kbase_ipa_model_add_param_string - Add a string model parameter -+ * @model: pointer to IPA model -+ * @name: name of corresponding debugfs entry -+ * @addr: address where the value is stored -+ * @size: size, in bytes, of the value storage (so the maximum string -+ * length is size - 1) -+ * @dt_required: if false, a corresponding devicetree entry is not required, -+ * and the current value will be used. If true, a warning is -+ * output and the data is zeroed ++#endif /* _MALI_KBASE_MMU_HW_DIRECT_H_ */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c +new file mode 100644 +index 000000000..0614348e9 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c +@@ -0,0 +1,63 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * Return: 0 on success, or an error code + */ -+int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, -+ const char *name, char *addr, -+ size_t size, bool dt_required); + -+struct kbase_ipa_model_ops { -+ char *name; -+ /* The init, recalculate and term ops on the default model are always -+ * called. However, all the other models are only invoked if the model -+ * is selected in the device tree. Otherwise they are never -+ * initialized. Additional resources can be acquired by models in -+ * init(), however they must be terminated in the term(). -+ */ -+ int (*init)(struct kbase_ipa_model *model); -+ /* Called immediately after init(), or when a parameter is changed, so -+ * that any coefficients derived from model parameters can be -+ * recalculated. */ -+ int (*recalculate)(struct kbase_ipa_model *model); -+ void (*term)(struct kbase_ipa_model *model); -+ /* -+ * get_dynamic_coeff() - calculate dynamic power coefficient -+ * @model: pointer to model -+ * @coeffp: pointer to return value location -+ * @current_freq: frequency the GPU has been running at for the -+ * previous sampling period. -+ * -+ * Calculate a dynamic power coefficient, with units pW/(Hz V^2), which -+ * is then scaled by the IPA framework according to the current OPP's -+ * frequency and voltage. -+ * -+ * Return: 0 on success, or an error code. -+ */ -+ int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp, -+ u32 current_freq); -+ /* -+ * get_static_coeff() - calculate static power coefficient -+ * @model: pointer to model -+ * @coeffp: pointer to return value location -+ * -+ * Calculate a static power coefficient, with units uW/(V^3), which is -+ * scaled by the IPA framework according to the current OPP's voltage. -+ * -+ * Return: 0 on success, or an error code. -+ */ -+ int (*get_static_coeff)(struct kbase_ipa_model *model, u32 *coeffp); -+ /* If false, the model's get_dynamic_coeff() method accounts for how -+ * long the GPU was active over the sample period. If true, the -+ * framework will scale the calculated power according to the -+ * utilization stats recorded by devfreq in get_real_power(). */ -+ bool do_utilization_scaling_in_framework; -+}; + -+/* Models can be registered only in the platform's platform_init_func call */ -+int kbase_ipa_model_ops_register(struct kbase_device *kbdev, -+ struct kbase_ipa_model_ops *new_model_ops); -+struct kbase_ipa_model *kbase_ipa_get_model(struct kbase_device *kbdev, -+ const char *name); + -+int kbase_ipa_init(struct kbase_device *kbdev); -+void kbase_ipa_term(struct kbase_device *kbdev); -+void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev); -+void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev); -+int kbase_ipa_model_recalculate(struct kbase_ipa_model *model); -+struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, -+ struct kbase_ipa_model_ops *ops); -+void kbase_ipa_term_model(struct kbase_ipa_model *model); + -+extern struct kbase_ipa_model_ops kbase_simple_ipa_model_ops; + ++/* ++ * "Always on" power management policy ++ */ + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) -+extern struct devfreq_cooling_ops kbase_ipa_power_model_ops; -+#else -+extern struct devfreq_cooling_power kbase_ipa_power_model_ops; -+#endif ++#include ++#include + -+#else /* !(defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ ++static u64 always_on_get_core_mask(struct kbase_device *kbdev) ++{ ++ return kbdev->gpu_props.props.raw_props.shader_present; ++} + -+static inline void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev) -+{ } ++static bool always_on_get_core_active(struct kbase_device *kbdev) ++{ ++ return true; ++} + -+static inline void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev) -+{ } ++static void always_on_init(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++} + -+#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ ++static void always_on_term(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++} + -+#endif -diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c ++/* ++ * The struct kbase_pm_policy structure for the demand power policy. ++ * ++ * This is the static structure that defines the demand power policy's callback ++ * and name. ++ */ ++const struct kbase_pm_policy kbase_pm_always_on_policy_ops = { ++ "always_on", /* name */ ++ always_on_init, /* init */ ++ always_on_term, /* term */ ++ always_on_get_core_mask, /* get_core_mask */ ++ always_on_get_core_active, /* get_core_active */ ++ 0u, /* flags */ ++ KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */ ++}; ++ ++KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops); +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h new file mode 100644 -index 000000000..eafc14009 +index 000000000..f9d244b01 --- /dev/null -+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c -@@ -0,0 +1,219 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h +@@ -0,0 +1,77 @@ ++ +/* + * -+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -348175,217 +350168,562 @@ index 000000000..eafc14009 + + + -+#include -+#include -+#include -+ -+#include "mali_kbase.h" -+#include "mali_kbase_ipa.h" -+#include "mali_kbase_ipa_debugfs.h" -+ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)) -+#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE -+#endif -+ -+struct kbase_ipa_model_param { -+ char *name; -+ union { -+ void *voidp; -+ s32 *s32p; -+ char *str; -+ } addr; -+ size_t size; -+ enum kbase_ipa_model_param_type type; -+ struct kbase_ipa_model *model; -+ struct list_head link; -+}; -+ -+static int param_int_get(void *data, u64 *val) -+{ -+ struct kbase_ipa_model_param *param = data; -+ -+ mutex_lock(¶m->model->kbdev->ipa.lock); -+ *(s64 *) val = *param->addr.s32p; -+ mutex_unlock(¶m->model->kbdev->ipa.lock); + -+ return 0; -+} + -+static int param_int_set(void *data, u64 val) -+{ -+ struct kbase_ipa_model_param *param = data; -+ struct kbase_ipa_model *model = param->model; -+ s64 sval = (s64) val; -+ int err = 0; ++/* ++ * "Always on" power management policy ++ */ + -+ if (sval < S32_MIN || sval > S32_MAX) -+ return -ERANGE; ++#ifndef MALI_KBASE_PM_ALWAYS_ON_H ++#define MALI_KBASE_PM_ALWAYS_ON_H + -+ mutex_lock(¶m->model->kbdev->ipa.lock); -+ *param->addr.s32p = val; -+ err = kbase_ipa_model_recalculate(model); -+ mutex_unlock(¶m->model->kbdev->ipa.lock); ++/** ++ * DOC: ++ * The "Always on" power management policy has the following ++ * characteristics: ++ * ++ * - When KBase indicates that the GPU will be powered up, but we don't yet ++ * know which Job Chains are to be run: ++ * All Shader Cores are powered up, regardless of whether or not they will ++ * be needed later. ++ * ++ * - When KBase indicates that a set of Shader Cores are needed to submit the ++ * currently queued Job Chains: ++ * All Shader Cores are kept powered, regardless of whether or not they will ++ * be needed ++ * ++ * - When KBase indicates that the GPU need not be powered: ++ * The Shader Cores are kept powered, regardless of whether or not they will ++ * be needed. The GPU itself is also kept powered, even though it is not ++ * needed. ++ * ++ * This policy is automatically overridden during system suspend: the desired ++ * core state is ignored, and the cores are forced off regardless of what the ++ * policy requests. After resuming from suspend, new changes to the desired ++ * core state made by the policy are honored. ++ * ++ * Note: ++ * ++ * - KBase indicates the GPU will be powered up when it has a User Process that ++ * has just started to submit Job Chains. ++ * ++ * - KBase indicates the GPU need not be powered when all the Job Chains from ++ * User Processes have finished, and it is waiting for a User Process to ++ * submit some more Job Chains. ++ */ + -+ return err; ++/** ++ * struct kbasep_pm_policy_always_on - Private struct for policy instance data ++ * @dummy: unused dummy variable ++ * ++ * This contains data that is private to the particular power policy that is ++ * active. ++ */ ++struct kbasep_pm_policy_always_on { ++ int dummy; ++}; ++ ++extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops; ++ ++#endif /* MALI_KBASE_PM_ALWAYS_ON_H */ ++ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c +new file mode 100644 +index 000000000..146fd48ba +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c +@@ -0,0 +1,482 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ ++ ++ ++ ++ ++/* ++ * GPU backend implementation of base kernel power management APIs ++ */ ++ ++#include ++#include ++#include ++#ifdef CONFIG_MALI_PLATFORM_DEVICETREE ++#include ++#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data); ++ ++void kbase_pm_register_access_enable(struct kbase_device *kbdev) ++{ ++ struct kbase_pm_callback_conf *callbacks; ++ ++ callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; ++ ++ if (callbacks) ++ callbacks->power_on_callback(kbdev); ++ ++ kbdev->pm.backend.gpu_powered = true; +} + -+DEFINE_DEBUGFS_ATTRIBUTE(fops_s32, param_int_get, param_int_set, "%lld\n"); ++void kbase_pm_register_access_disable(struct kbase_device *kbdev) ++{ ++ struct kbase_pm_callback_conf *callbacks; + -+static ssize_t param_string_get(struct file *file, char __user *user_buf, -+ size_t count, loff_t *ppos) ++ callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; ++ ++ if (callbacks) ++ callbacks->power_off_callback(kbdev); ++ ++ kbdev->pm.backend.gpu_powered = false; ++} ++ ++int kbase_hwaccess_pm_init(struct kbase_device *kbdev) +{ -+ struct kbase_ipa_model_param *param = file->private_data; -+ ssize_t ret; -+ size_t len; ++ int ret = 0; ++ struct kbase_pm_callback_conf *callbacks; + -+ mutex_lock(¶m->model->kbdev->ipa.lock); -+ len = strnlen(param->addr.str, param->size - 1) + 1; -+ ret = simple_read_from_buffer(user_buf, count, ppos, -+ param->addr.str, len); -+ mutex_unlock(¶m->model->kbdev->ipa.lock); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ return ret; ++ mutex_init(&kbdev->pm.lock); ++ ++ kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait", ++ WQ_HIGHPRI | WQ_UNBOUND, 1); ++ if (!kbdev->pm.backend.gpu_poweroff_wait_wq) ++ return -ENOMEM; ++ ++ INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, ++ kbase_pm_gpu_poweroff_wait_wq); ++ ++ kbdev->pm.backend.gpu_powered = false; ++ kbdev->pm.suspending = false; ++#ifdef CONFIG_MALI_DEBUG ++ kbdev->pm.backend.driver_ready_for_irqs = false; ++#endif /* CONFIG_MALI_DEBUG */ ++ kbdev->pm.backend.gpu_in_desired_state = true; ++ init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait); ++ ++ callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; ++ if (callbacks) { ++ kbdev->pm.backend.callback_power_on = ++ callbacks->power_on_callback; ++ kbdev->pm.backend.callback_power_off = ++ callbacks->power_off_callback; ++ kbdev->pm.backend.callback_power_suspend = ++ callbacks->power_suspend_callback; ++ kbdev->pm.backend.callback_power_resume = ++ callbacks->power_resume_callback; ++ kbdev->pm.callback_power_runtime_init = ++ callbacks->power_runtime_init_callback; ++ kbdev->pm.callback_power_runtime_term = ++ callbacks->power_runtime_term_callback; ++ kbdev->pm.backend.callback_power_runtime_on = ++ callbacks->power_runtime_on_callback; ++ kbdev->pm.backend.callback_power_runtime_off = ++ callbacks->power_runtime_off_callback; ++ kbdev->pm.backend.callback_power_runtime_idle = ++ callbacks->power_runtime_idle_callback; ++ } else { ++ kbdev->pm.backend.callback_power_on = NULL; ++ kbdev->pm.backend.callback_power_off = NULL; ++ kbdev->pm.backend.callback_power_suspend = NULL; ++ kbdev->pm.backend.callback_power_resume = NULL; ++ kbdev->pm.callback_power_runtime_init = NULL; ++ kbdev->pm.callback_power_runtime_term = NULL; ++ kbdev->pm.backend.callback_power_runtime_on = NULL; ++ kbdev->pm.backend.callback_power_runtime_off = NULL; ++ kbdev->pm.backend.callback_power_runtime_idle = NULL; ++ } ++ ++ /* Initialise the metrics subsystem */ ++ ret = kbasep_pm_metrics_init(kbdev); ++ if (ret) ++ return ret; ++ ++ init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait); ++ kbdev->pm.backend.l2_powered = 0; ++ ++ init_waitqueue_head(&kbdev->pm.backend.reset_done_wait); ++ kbdev->pm.backend.reset_done = false; ++ ++ init_waitqueue_head(&kbdev->pm.zero_active_count_wait); ++ kbdev->pm.active_count = 0; ++ ++ spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock); ++ spin_lock_init(&kbdev->pm.backend.gpu_powered_lock); ++ ++ init_waitqueue_head(&kbdev->pm.backend.poweroff_wait); ++ ++ if (kbase_pm_ca_init(kbdev) != 0) ++ goto workq_fail; ++ ++ if (kbase_pm_policy_init(kbdev) != 0) ++ goto pm_policy_fail; ++ ++ return 0; ++ ++pm_policy_fail: ++ kbase_pm_ca_term(kbdev); ++workq_fail: ++ kbasep_pm_metrics_term(kbdev); ++ return -EINVAL; +} + -+static ssize_t param_string_set(struct file *file, const char __user *user_buf, -+ size_t count, loff_t *ppos) ++void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) +{ -+ struct kbase_ipa_model_param *param = file->private_data; -+ struct kbase_ipa_model *model = param->model; -+ ssize_t ret = count; -+ size_t buf_size; -+ int err; ++ lockdep_assert_held(&kbdev->pm.lock); + -+ mutex_lock(&model->kbdev->ipa.lock); ++ /* Turn clocks and interrupts on - no-op if we haven't done a previous ++ * kbase_pm_clock_off() */ ++ kbase_pm_clock_on(kbdev, is_resume); + -+ if (count > param->size) { -+ ret = -EINVAL; -+ goto end; ++ /* Update core status as required by the policy */ ++ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, ++ SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START); ++ kbase_pm_update_cores_state(kbdev); ++ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, ++ SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END); ++ ++ /* NOTE: We don't wait to reach the desired state, since running atoms ++ * will wait for that state to be reached anyway */ ++} ++ ++static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) ++{ ++ struct kbase_device *kbdev = container_of(data, struct kbase_device, ++ pm.backend.gpu_poweroff_wait_work); ++ struct kbase_pm_device_data *pm = &kbdev->pm; ++ struct kbase_pm_backend_data *backend = &pm->backend; ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ unsigned long flags; ++ ++#if !PLATFORM_POWER_DOWN_ONLY ++ /* Wait for power transitions to complete. We do this with no locks held ++ * so that we don't deadlock with any pending workqueues */ ++ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, ++ SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START); ++ kbase_pm_check_transitions_sync(kbdev); ++ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, ++ SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END); ++#endif /* !PLATFORM_POWER_DOWN_ONLY */ ++ ++ mutex_lock(&js_devdata->runpool_mutex); ++ mutex_lock(&kbdev->pm.lock); ++ ++#if PLATFORM_POWER_DOWN_ONLY ++ if (kbdev->pm.backend.gpu_powered) { ++ if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2)) { ++ /* If L2 cache is powered then we must flush it before ++ * we power off the GPU. Normally this would have been ++ * handled when the L2 was powered off. */ ++ kbase_gpu_cacheclean(kbdev); ++ } + } ++#endif /* PLATFORM_POWER_DOWN_ONLY */ + -+ buf_size = min(param->size - 1, count); -+ if (copy_from_user(param->addr.str, user_buf, buf_size)) { -+ ret = -EFAULT; -+ goto end; ++ if (!backend->poweron_required) { ++#if !PLATFORM_POWER_DOWN_ONLY ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ WARN_ON(kbdev->l2_available_bitmap || ++ kbdev->shader_available_bitmap || ++ kbdev->tiler_available_bitmap); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++#endif /* !PLATFORM_POWER_DOWN_ONLY */ ++ ++ /* Consume any change-state events */ ++ kbase_timeline_pm_check_handle_event(kbdev, ++ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); ++ ++ /* Disable interrupts and turn the clock off */ ++ if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) { ++ /* ++ * Page/bus faults are pending, must drop locks to ++ * process. Interrupts are disabled so no more faults ++ * should be generated at this point. ++ */ ++ mutex_unlock(&kbdev->pm.lock); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ kbase_flush_mmu_wqs(kbdev); ++ mutex_lock(&js_devdata->runpool_mutex); ++ mutex_lock(&kbdev->pm.lock); ++ ++ /* Turn off clock now that fault have been handled. We ++ * dropped locks so poweron_required may have changed - ++ * power back on if this is the case.*/ ++ if (backend->poweron_required) ++ kbase_pm_clock_on(kbdev, false); ++ else ++ WARN_ON(!kbase_pm_clock_off(kbdev, ++ backend->poweroff_is_suspend)); ++ } + } + -+ param->addr.str[buf_size] = '\0'; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ backend->poweroff_wait_in_progress = false; ++ if (backend->poweron_required) { ++ backend->poweron_required = false; ++ kbase_pm_update_cores_state_nolock(kbdev); ++ kbase_backend_slot_update(kbdev); ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ err = kbase_ipa_model_recalculate(model); -+ if (err < 0) -+ ret = err; ++ mutex_unlock(&kbdev->pm.lock); ++ mutex_unlock(&js_devdata->runpool_mutex); + -+end: -+ mutex_unlock(&model->kbdev->ipa.lock); ++ wake_up(&kbdev->pm.backend.poweroff_wait); ++} ++ ++void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend) ++{ ++ unsigned long flags; ++ ++ lockdep_assert_held(&kbdev->pm.lock); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (!kbdev->pm.backend.poweroff_wait_in_progress) { ++ /* Force all cores off */ ++ kbdev->pm.backend.desired_shader_state = 0; ++ kbdev->pm.backend.desired_tiler_state = 0; ++ ++ /* Force all cores to be unavailable, in the situation where ++ * transitions are in progress for some cores but not others, ++ * and kbase_pm_check_transitions_nolock can not immediately ++ * power off the cores */ ++ kbdev->shader_available_bitmap = 0; ++ kbdev->tiler_available_bitmap = 0; ++ kbdev->l2_available_bitmap = 0; ++ ++ kbdev->pm.backend.poweroff_wait_in_progress = true; ++ kbdev->pm.backend.poweroff_is_suspend = is_suspend; ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /*Kick off wq here. Callers will have to wait*/ ++ queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq, ++ &kbdev->pm.backend.gpu_poweroff_wait_work); ++ } else { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } ++} ++ ++static bool is_poweroff_in_progress(struct kbase_device *kbdev) ++{ ++ bool ret; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ret = (kbdev->pm.backend.poweroff_wait_in_progress == false); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return ret; +} + -+static const struct file_operations fops_string = { -+ .read = param_string_get, -+ .write = param_string_set, -+ .open = simple_open, -+ .llseek = default_llseek, -+}; ++void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev) ++{ ++ wait_event_killable(kbdev->pm.backend.poweroff_wait, ++ is_poweroff_in_progress(kbdev)); ++} + -+int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, -+ void *addr, size_t size, -+ enum kbase_ipa_model_param_type type) ++int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, ++ unsigned int flags) +{ -+ struct kbase_ipa_model_param *param; ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ unsigned long irq_flags; ++ int ret; + -+ param = kzalloc(sizeof(*param), GFP_KERNEL); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ if (!param) -+ return -ENOMEM; ++ mutex_lock(&js_devdata->runpool_mutex); ++ mutex_lock(&kbdev->pm.lock); + -+ /* 'name' is stack-allocated for array elements, so copy it into -+ * heap-allocated storage */ -+ param->name = kstrdup(name, GFP_KERNEL); -+ param->addr.voidp = addr; -+ param->size = size; -+ param->type = type; -+ param->model = model; ++ /* A suspend won't happen during startup/insmod */ ++ KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); + -+ list_add(¶m->link, &model->params); ++ /* Power up the GPU, don't enable IRQs as we are not ready to receive ++ * them. */ ++ ret = kbase_pm_init_hw(kbdev, flags); ++ if (ret) { ++ mutex_unlock(&kbdev->pm.lock); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ return ret; ++ } ++ ++ kbasep_pm_init_core_use_bitmaps(kbdev); ++ ++ kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] = ++ kbdev->pm.debug_core_mask[1] = ++ kbdev->pm.debug_core_mask[2] = ++ kbdev->gpu_props.props.raw_props.shader_present; ++ ++ /* Pretend the GPU is active to prevent a power policy turning the GPU ++ * cores off */ ++ kbdev->pm.active_count = 1; ++ ++ spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, ++ irq_flags); ++ /* Ensure cycle counter is off */ ++ kbdev->pm.backend.gpu_cycle_counter_requests = 0; ++ spin_unlock_irqrestore( ++ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, ++ irq_flags); ++ ++ /* We are ready to receive IRQ's now as power policy is set up, so ++ * enable them now. */ ++#ifdef CONFIG_MALI_DEBUG ++ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags); ++ kbdev->pm.backend.driver_ready_for_irqs = true; ++ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags); ++#endif ++ kbase_pm_enable_interrupts(kbdev); ++ ++ /* Turn on the GPU and any cores needed by the policy */ ++ kbase_pm_do_poweron(kbdev, false); ++ mutex_unlock(&kbdev->pm.lock); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ ++ /* Idle the GPU and/or cores, if the policy wants it to */ ++ kbase_pm_context_idle(kbdev); + + return 0; +} + -+void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model) ++void kbase_hwaccess_pm_halt(struct kbase_device *kbdev) +{ -+ struct kbase_ipa_model_param *param_p, *param_n; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ list_for_each_entry_safe(param_p, param_n, &model->params, link) { -+ list_del(¶m_p->link); -+ kfree(param_p->name); -+ kfree(param_p); -+ } ++ mutex_lock(&kbdev->pm.lock); ++ kbase_pm_cancel_deferred_poweroff(kbdev); ++ kbase_pm_do_poweroff(kbdev, false); ++ mutex_unlock(&kbdev->pm.lock); +} + -+static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model) ++KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt); ++ ++void kbase_hwaccess_pm_term(struct kbase_device *kbdev) +{ -+ struct list_head *it; -+ struct dentry *dir; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0); ++ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0); + -+ lockdep_assert_held(&model->kbdev->ipa.lock); ++ /* Free any resources the policy allocated */ ++ kbase_pm_policy_term(kbdev); ++ kbase_pm_ca_term(kbdev); + -+ dir = debugfs_create_dir(model->ops->name, -+ model->kbdev->mali_debugfs_directory); ++ /* Shut down the metrics subsystem */ ++ kbasep_pm_metrics_term(kbdev); + -+ if (!dir) { -+ dev_err(model->kbdev->dev, -+ "Couldn't create mali debugfs %s directory", -+ model->ops->name); -+ return; -+ } ++ destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq); ++} + -+ list_for_each(it, &model->params) { -+ struct kbase_ipa_model_param *param = -+ list_entry(it, -+ struct kbase_ipa_model_param, -+ link); -+ const struct file_operations *fops = NULL; ++void kbase_pm_power_changed(struct kbase_device *kbdev) ++{ ++ bool cores_are_available; ++ unsigned long flags; + -+ switch (param->type) { -+ case PARAM_TYPE_S32: -+ fops = &fops_s32; -+ break; -+ case PARAM_TYPE_STRING: -+ fops = &fops_string; -+ break; -+ } ++ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, ++ SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ cores_are_available = kbase_pm_check_transitions_nolock(kbdev); ++ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, ++ SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END); + -+ if (unlikely(!fops)) { -+ dev_err(model->kbdev->dev, -+ "Type not set for %s parameter %s\n", -+ model->ops->name, param->name); -+ } else { -+ debugfs_create_file(param->name, S_IRUGO | S_IWUSR, -+ dir, param, fops); -+ } ++ if (cores_are_available) { ++ /* Log timelining information that a change in state has ++ * completed */ ++ kbase_timeline_pm_handle_event(kbdev, ++ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); ++ ++ kbase_backend_slot_update(kbdev); + } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + -+void kbase_ipa_debugfs_init(struct kbase_device *kbdev) ++void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, ++ u64 new_core_mask_js0, u64 new_core_mask_js1, ++ u64 new_core_mask_js2) +{ -+ mutex_lock(&kbdev->ipa.lock); ++ kbdev->pm.debug_core_mask[0] = new_core_mask_js0; ++ kbdev->pm.debug_core_mask[1] = new_core_mask_js1; ++ kbdev->pm.debug_core_mask[2] = new_core_mask_js2; ++ kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | ++ new_core_mask_js2; + -+ if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model) -+ kbase_ipa_model_debugfs_init(kbdev->ipa.configured_model); -+ kbase_ipa_model_debugfs_init(kbdev->ipa.fallback_model); ++ kbase_pm_update_cores_state_nolock(kbdev); ++} + -+ mutex_unlock(&kbdev->ipa.lock); ++void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev) ++{ ++ kbase_pm_update_active(kbdev); +} -diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h ++ ++void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev) ++{ ++ kbase_pm_update_active(kbdev); ++} ++ ++void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) ++{ ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ ++ /* Force power off the GPU and all cores (regardless of policy), only ++ * after the PM active count reaches zero (otherwise, we risk turning it ++ * off prematurely) */ ++ mutex_lock(&js_devdata->runpool_mutex); ++ mutex_lock(&kbdev->pm.lock); ++ ++ kbase_pm_cancel_deferred_poweroff(kbdev); ++ kbase_pm_do_poweroff(kbdev, true); ++ ++ kbase_backend_timer_suspend(kbdev); ++ ++ mutex_unlock(&kbdev->pm.lock); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ ++ kbase_pm_wait_for_poweroff_complete(kbdev); ++} ++ ++void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) ++{ ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ ++ mutex_lock(&js_devdata->runpool_mutex); ++ mutex_lock(&kbdev->pm.lock); ++ ++ kbdev->pm.suspending = false; ++ kbase_pm_do_poweron(kbdev, true); ++ ++ kbase_backend_timer_resume(kbdev); ++ ++ mutex_unlock(&kbdev->pm.lock); ++ mutex_unlock(&js_devdata->runpool_mutex); ++} +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c new file mode 100644 -index 000000000..ec06e2096 +index 000000000..85890f1e8 --- /dev/null -+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h -@@ -0,0 +1,49 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c +@@ -0,0 +1,182 @@ +/* + * -+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -348400,47 +350738,180 @@ index 000000000..ec06e2096 + + + -+#ifndef _KBASE_IPA_DEBUGFS_H_ -+#define _KBASE_IPA_DEBUGFS_H_ ++/* ++ * Base kernel core availability APIs ++ */ + -+enum kbase_ipa_model_param_type { -+ PARAM_TYPE_S32 = 1, -+ PARAM_TYPE_STRING, ++#include ++#include ++#include ++ ++static const struct kbase_pm_ca_policy *const policy_list[] = { ++ &kbase_pm_ca_fixed_policy_ops, ++#ifdef CONFIG_MALI_DEVFREQ ++ &kbase_pm_ca_devfreq_policy_ops, ++#endif ++#if !MALI_CUSTOMER_RELEASE ++ &kbase_pm_ca_random_policy_ops ++#endif +}; + -+#ifdef CONFIG_DEBUG_FS ++/** ++ * POLICY_COUNT - The number of policies available in the system. ++ * ++ * This is derived from the number of functions listed in policy_list. ++ */ ++#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list)) + -+void kbase_ipa_debugfs_init(struct kbase_device *kbdev); -+int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, -+ void *addr, size_t size, -+ enum kbase_ipa_model_param_type type); -+void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model); ++int kbase_pm_ca_init(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+#else /* CONFIG_DEBUG_FS */ ++ kbdev->pm.backend.ca_current_policy = policy_list[0]; ++ ++ kbdev->pm.backend.ca_current_policy->init(kbdev); + -+static inline int kbase_ipa_model_param_add(struct kbase_ipa_model *model, -+ const char *name, void *addr, -+ size_t size, -+ enum kbase_ipa_model_param_type type) -+{ + return 0; +} + -+static inline void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model) -+{ } ++void kbase_pm_ca_term(struct kbase_device *kbdev) ++{ ++ kbdev->pm.backend.ca_current_policy->term(kbdev); ++} + -+#endif /* CONFIG_DEBUG_FS */ ++int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list) ++{ ++ if (!list) ++ return POLICY_COUNT; + -+#endif /* _KBASE_IPA_DEBUGFS_H_ */ -diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c ++ *list = policy_list; ++ ++ return POLICY_COUNT; ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies); ++ ++const struct kbase_pm_ca_policy ++*kbase_pm_ca_get_policy(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ ++ return kbdev->pm.backend.ca_current_policy; ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy); ++ ++void kbase_pm_ca_set_policy(struct kbase_device *kbdev, ++ const struct kbase_pm_ca_policy *new_policy) ++{ ++ const struct kbase_pm_ca_policy *old_policy; ++ unsigned long flags; ++ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(new_policy != NULL); ++ ++ KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u, ++ new_policy->id); ++ ++ /* During a policy change we pretend the GPU is active */ ++ /* A suspend won't happen here, because we're in a syscall from a ++ * userspace thread */ ++ kbase_pm_context_active(kbdev); ++ ++ mutex_lock(&kbdev->pm.lock); ++ ++ /* Remove the policy to prevent IRQ handlers from working on it */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ old_policy = kbdev->pm.backend.ca_current_policy; ++ kbdev->pm.backend.ca_current_policy = NULL; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ if (old_policy->term) ++ old_policy->term(kbdev); ++ ++ if (new_policy->init) ++ new_policy->init(kbdev); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->pm.backend.ca_current_policy = new_policy; ++ ++ /* If any core power state changes were previously attempted, but ++ * couldn't be made because the policy was changing (current_policy was ++ * NULL), then re-try them here. */ ++ kbase_pm_update_cores_state_nolock(kbdev); ++ ++ kbdev->pm.backend.ca_current_policy->update_core_status(kbdev, ++ kbdev->shader_ready_bitmap, ++ kbdev->shader_transitioning_bitmap); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ mutex_unlock(&kbdev->pm.lock); ++ ++ /* Now the policy change is finished, we release our fake context active ++ * reference */ ++ kbase_pm_context_idle(kbdev); ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy); ++ ++u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ /* All cores must be enabled when instrumentation is in use */ ++ if (kbdev->pm.backend.instr_enabled) ++ return kbdev->gpu_props.props.raw_props.shader_present & ++ kbdev->pm.debug_core_mask_all; ++ ++ if (kbdev->pm.backend.ca_current_policy == NULL) ++ return kbdev->gpu_props.props.raw_props.shader_present & ++ kbdev->pm.debug_core_mask_all; ++ ++ return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) & ++ kbdev->pm.debug_core_mask_all; ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask); ++ ++void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, ++ u64 cores_transitioning) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ if (kbdev->pm.backend.ca_current_policy != NULL) ++ kbdev->pm.backend.ca_current_policy->update_core_status(kbdev, ++ cores_ready, ++ cores_transitioning); ++} ++ ++void kbase_pm_ca_instr_enable(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->pm.backend.instr_enabled = true; ++ ++ kbase_pm_update_cores_state_nolock(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} ++ ++void kbase_pm_ca_instr_disable(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ kbdev->pm.backend.instr_enabled = false; ++ ++ kbase_pm_update_cores_state_nolock(kbdev); ++} +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h new file mode 100644 -index 000000000..da0a4d4a0 +index 000000000..ee9e751f2 --- /dev/null -+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c -@@ -0,0 +1,222 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h +@@ -0,0 +1,92 @@ +/* + * -+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -348455,220 +350926,357 @@ index 000000000..da0a4d4a0 + + + -+#include -+#ifdef CONFIG_DEVFREQ_THERMAL -+#include -+#endif -+#include -+#include ++/* ++ * Base kernel core availability APIs ++ */ + -+#include "mali_kbase.h" -+#include "mali_kbase_defs.h" ++#ifndef _KBASE_PM_CA_H_ ++#define _KBASE_PM_CA_H_ + -+/* -+ * This model is primarily designed for the Juno platform. It may not be -+ * suitable for other platforms. The additional resources in this model -+ * should preferably be minimal, as this model is rarely used when a dynamic -+ * model is available. ++/** ++ * kbase_pm_ca_init - Initialize core availability framework ++ * ++ * Must be called before calling any other core availability function ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Return: 0 if the core availability framework was successfully initialized, ++ * -errno otherwise + */ ++int kbase_pm_ca_init(struct kbase_device *kbdev); + +/** -+ * struct kbase_ipa_model_simple_data - IPA context per device -+ * @dynamic_coefficient: dynamic coefficient of the model -+ * @static_coefficient: static coefficient of the model -+ * @ts: Thermal scaling coefficients of the model -+ * @tz_name: Thermal zone name -+ * @gpu_tz: thermal zone device ++ * kbase_pm_ca_term - Terminate core availability framework ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ ++void kbase_pm_ca_term(struct kbase_device *kbdev); + -+struct kbase_ipa_model_simple_data { -+ u32 dynamic_coefficient; -+ u32 static_coefficient; -+ s32 ts[4]; -+ char tz_name[16]; -+ struct thermal_zone_device *gpu_tz; -+}; -+#define FALLBACK_STATIC_TEMPERATURE 55000 ++/** ++ * kbase_pm_ca_get_core_mask - Get currently available shaders core mask ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Returns a mask of the currently available shader cores. ++ * Calls into the core availability policy ++ * ++ * Return: The bit mask of available cores ++ */ ++u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev); + +/** -+ * calculate_temp_scaling_factor() - Calculate temperature scaling coefficient -+ * @ts: Signed coefficients, in order t^0 to t^3, with units Deg^-N -+ * @t: Temperature, in mDeg C. Range: -2^17 < t < 2^17 ++ * kbase_pm_ca_update_core_status - Update core status + * -+ * Scale the temperature according to a cubic polynomial whose coefficients are -+ * provided in the device tree. The result is used to scale the static power -+ * coefficient, where 1000000 means no change. ++ * @kbdev: The kbase device structure for the device (must be ++ * a valid pointer) ++ * @cores_ready: The bit mask of cores ready for job submission ++ * @cores_transitioning: The bit mask of cores that are transitioning power ++ * state + * -+ * Return: Temperature scaling factor. Approx range 0 < ret < 10,000,000. ++ * Update core availability policy with current core power status ++ * ++ * Calls into the core availability policy + */ -+static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t) ++void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, ++ u64 cores_transitioning); ++ ++/** ++ * kbase_pm_ca_instr_enable - Enable override for instrumentation ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * This overrides the output of the core availability policy, ensuring that all ++ * cores are available ++ */ ++void kbase_pm_ca_instr_enable(struct kbase_device *kbdev); ++ ++/** ++ * kbase_pm_ca_instr_disable - Disable override for instrumentation ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * This disables any previously enabled override, and resumes normal policy ++ * functionality ++ */ ++void kbase_pm_ca_instr_disable(struct kbase_device *kbdev); ++ ++#endif /* _KBASE_PM_CA_H_ */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c +new file mode 100644 +index 000000000..66bf660cf +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c +@@ -0,0 +1,129 @@ ++/* ++ * ++ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ ++ ++ ++ ++/* ++ * A core availability policy implementing core mask selection from devfreq OPPs ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++ ++void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) +{ -+ /* Range: -2^24 < t2 < 2^24 m(Deg^2) */ -+ u32 remainder; -+ // static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) -+ const s64 t2 = div_s64_rem((t * t), 1000, &remainder); ++ struct kbasep_pm_ca_policy_devfreq *data = ++ &kbdev->pm.backend.ca_policy_data.devfreq; ++ unsigned long flags; + -+ /* Range: -2^31 < t3 < 2^31 m(Deg^3) */ -+ const s64 t3 = div_s64_rem((t * t2), 1000, &remainder); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ /* -+ * Sum the parts. t^[1-3] are in m(Deg^N), but the coefficients are in -+ * Deg^-N, so we need to multiply the last coefficient by 1000. -+ * Range: -2^63 < res_big < 2^63 ++ data->cores_desired = core_mask; ++ ++ /* Disable any cores that are now unwanted */ ++ data->cores_enabled &= data->cores_desired; ++ ++ kbdev->pm.backend.ca_in_transition = true; ++ ++ /* If there are no cores to be powered off then power on desired cores + */ -+ const s64 res_big = ts[3] * t3 /* +/- 2^62 */ -+ + ts[2] * t2 /* +/- 2^55 */ -+ + ts[1] * t /* +/- 2^48 */ -+ + ts[0] * 1000; /* +/- 2^41 */ ++ if (!(data->cores_used & ~data->cores_desired)) { ++ data->cores_enabled = data->cores_desired; ++ kbdev->pm.backend.ca_in_transition = false; ++ } + -+ /* Range: -2^60 < res_unclamped < 2^60 */ -+ s64 res_unclamped = div_s64_rem(res_big, 1000, &remainder); ++ kbase_pm_update_cores_state_nolock(kbdev); + -+ /* Clamp to range of 0x to 10x the static power */ -+ return clamp(res_unclamped, (s64) 0, (s64) 10000000); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX %llX\n", ++ data->cores_desired, data->cores_enabled); +} + -+static int model_static_coeff(struct kbase_ipa_model *model, u32 *coeffp) ++static void devfreq_init(struct kbase_device *kbdev) +{ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) -+ unsigned long temp; -+#else -+ int temp; -+#endif -+ u32 temp_scaling_factor; -+ struct kbase_ipa_model_simple_data *model_data = -+ (struct kbase_ipa_model_simple_data *) model->model_data; -+ struct thermal_zone_device *gpu_tz = model_data->gpu_tz; -+ u64 coeffp_big; -+ -+ if (gpu_tz) { -+ int ret; ++ struct kbasep_pm_ca_policy_devfreq *data = ++ &kbdev->pm.backend.ca_policy_data.devfreq; + -+ ret = gpu_tz->ops->get_temp(gpu_tz, &temp); -+ if (ret) { -+ pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n", -+ ret); -+ temp = FALLBACK_STATIC_TEMPERATURE; -+ } ++ if (kbdev->current_core_mask) { ++ data->cores_enabled = kbdev->current_core_mask; ++ data->cores_desired = kbdev->current_core_mask; + } else { -+ temp = FALLBACK_STATIC_TEMPERATURE; ++ data->cores_enabled = ++ kbdev->gpu_props.props.raw_props.shader_present; ++ data->cores_desired = ++ kbdev->gpu_props.props.raw_props.shader_present; + } ++ data->cores_used = 0; ++ kbdev->pm.backend.ca_in_transition = false; ++} + -+ temp_scaling_factor = calculate_temp_scaling_factor(model_data->ts, -+ temp); -+ coeffp_big = (u64)model_data->static_coefficient * temp_scaling_factor; -+ *coeffp = div_u64(coeffp_big, 1000000); ++static void devfreq_term(struct kbase_device *kbdev) ++{ ++} + -+ return 0; ++static u64 devfreq_get_core_mask(struct kbase_device *kbdev) ++{ ++ return kbdev->pm.backend.ca_policy_data.devfreq.cores_enabled; +} + -+static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp, -+ u32 current_freq) ++static void devfreq_update_core_status(struct kbase_device *kbdev, ++ u64 cores_ready, ++ u64 cores_transitioning) +{ -+ struct kbase_ipa_model_simple_data *model_data = -+ (struct kbase_ipa_model_simple_data *) model->model_data; ++ struct kbasep_pm_ca_policy_devfreq *data = ++ &kbdev->pm.backend.ca_policy_data.devfreq; + -+ *coeffp = model_data->dynamic_coefficient; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ return 0; ++ data->cores_used = cores_ready | cores_transitioning; ++ ++ /* If in desired state then clear transition flag */ ++ if (data->cores_enabled == data->cores_desired) ++ kbdev->pm.backend.ca_in_transition = false; ++ ++ /* If all undesired cores are now off then power on desired cores. ++ * The direct comparison against cores_enabled limits potential ++ * recursion to one level */ ++ if (!(data->cores_used & ~data->cores_desired) && ++ data->cores_enabled != data->cores_desired) { ++ data->cores_enabled = data->cores_desired; ++ ++ kbase_pm_update_cores_state_nolock(kbdev); ++ ++ kbdev->pm.backend.ca_in_transition = false; ++ } +} + -+static int add_params(struct kbase_ipa_model *model) -+{ -+ int err = 0; -+ struct kbase_ipa_model_simple_data *model_data = -+ (struct kbase_ipa_model_simple_data *)model->model_data; ++/* ++ * The struct kbase_pm_ca_policy structure for the devfreq core availability ++ * policy. ++ * ++ * This is the static structure that defines the devfreq core availability power ++ * policy's callback and name. ++ */ ++const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops = { ++ "devfreq", /* name */ ++ devfreq_init, /* init */ ++ devfreq_term, /* term */ ++ devfreq_get_core_mask, /* get_core_mask */ ++ devfreq_update_core_status, /* update_core_status */ ++ 0u, /* flags */ ++ KBASE_PM_CA_POLICY_ID_DEVFREQ, /* id */ ++}; + -+ err = kbase_ipa_model_add_param_s32(model, "static-coefficient", -+ &model_data->static_coefficient, -+ 1, true); -+ if (err) -+ goto end; +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h +new file mode 100644 +index 000000000..7ab3cd4d8 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h +@@ -0,0 +1,55 @@ ++/* ++ * ++ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ err = kbase_ipa_model_add_param_s32(model, "dynamic-coefficient", -+ &model_data->dynamic_coefficient, -+ 1, true); -+ if (err) -+ goto end; + -+ err = kbase_ipa_model_add_param_s32(model, "ts", -+ model_data->ts, 4, true); -+ if (err) -+ goto end; + -+ err = kbase_ipa_model_add_param_string(model, "thermal-zone", -+ model_data->tz_name, -+ sizeof(model_data->tz_name), true); ++/* ++ * A core availability policy for use with devfreq, where core masks are ++ * associated with OPPs. ++ */ + -+end: -+ return err; -+} ++#ifndef MALI_KBASE_PM_CA_DEVFREQ_H ++#define MALI_KBASE_PM_CA_DEVFREQ_H + -+static int kbase_simple_power_model_init(struct kbase_ipa_model *model) -+{ -+ int err; -+ struct kbase_ipa_model_simple_data *model_data; ++/** ++ * struct kbasep_pm_ca_policy_devfreq - Private structure for devfreq ca policy ++ * ++ * This contains data that is private to the devfreq core availability ++ * policy. ++ * ++ * @cores_desired: Cores that the policy wants to be available ++ * @cores_enabled: Cores that the policy is currently returning as available ++ * @cores_used: Cores currently powered or transitioning ++ */ ++struct kbasep_pm_ca_policy_devfreq { ++ u64 cores_desired; ++ u64 cores_enabled; ++ u64 cores_used; ++}; + -+ model_data = kzalloc(sizeof(struct kbase_ipa_model_simple_data), -+ GFP_KERNEL); -+ if (!model_data) -+ return -ENOMEM; ++extern const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops; + -+ model->model_data = (void *) model_data; ++/** ++ * kbase_devfreq_set_core_mask - Set core mask for policy to use ++ * @kbdev: Device pointer ++ * @core_mask: New core mask ++ * ++ * The new core mask will have immediate effect if the GPU is powered, or will ++ * take effect when it is next powered on. ++ */ ++void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask); + -+ err = add_params(model); ++#endif /* MALI_KBASE_PM_CA_DEVFREQ_H */ + -+ return err; -+} +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c +new file mode 100644 +index 000000000..864612d31 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c +@@ -0,0 +1,65 @@ ++/* ++ * ++ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model) -+{ -+ struct kbase_ipa_model_simple_data *model_data = -+ (struct kbase_ipa_model_simple_data *)model->model_data; + -+ if (!strnlen(model_data->tz_name, sizeof(model_data->tz_name))) { -+ model_data->gpu_tz = NULL; -+ } else { -+ model_data->gpu_tz = thermal_zone_get_zone_by_name(model_data->tz_name); + -+ if (IS_ERR(model_data->gpu_tz)) { -+ pr_warn_ratelimited("Error %ld getting thermal zone \'%s\', not yet ready?\n", -+ PTR_ERR(model_data->gpu_tz), -+ model_data->tz_name); -+ model_data->gpu_tz = NULL; -+ return -EPROBE_DEFER; -+ } -+ } ++/* ++ * A power policy implementing fixed core availability ++ */ + -+ return 0; ++#include ++#include ++ ++static void fixed_init(struct kbase_device *kbdev) ++{ ++ kbdev->pm.backend.ca_in_transition = false; +} + -+static void kbase_simple_power_model_term(struct kbase_ipa_model *model) ++static void fixed_term(struct kbase_device *kbdev) +{ -+ struct kbase_ipa_model_simple_data *model_data = -+ (struct kbase_ipa_model_simple_data *)model->model_data; ++ CSTD_UNUSED(kbdev); ++} + -+ kfree(model_data); ++static u64 fixed_get_core_mask(struct kbase_device *kbdev) ++{ ++ return kbdev->gpu_props.props.raw_props.shader_present; +} + -+struct kbase_ipa_model_ops kbase_simple_ipa_model_ops = { -+ .name = "mali-simple-power-model", -+ .init = &kbase_simple_power_model_init, -+ .recalculate = &kbase_simple_power_model_recalculate, -+ .term = &kbase_simple_power_model_term, -+ .get_dynamic_coeff = &model_dynamic_coeff, -+ .get_static_coeff = &model_static_coeff, -+ .do_utilization_scaling_in_framework = true, ++static void fixed_update_core_status(struct kbase_device *kbdev, ++ u64 cores_ready, ++ u64 cores_transitioning) ++{ ++ CSTD_UNUSED(kbdev); ++ CSTD_UNUSED(cores_ready); ++ CSTD_UNUSED(cores_transitioning); ++} ++ ++/* ++ * The struct kbase_pm_policy structure for the fixed power policy. ++ * ++ * This is the static structure that defines the fixed power policy's callback ++ * and name. ++ */ ++const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = { ++ "fixed", /* name */ ++ fixed_init, /* init */ ++ fixed_term, /* term */ ++ fixed_get_core_mask, /* get_core_mask */ ++ fixed_update_core_status, /* update_core_status */ ++ 0u, /* flags */ ++ KBASE_PM_CA_POLICY_ID_FIXED, /* id */ +}; -diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h ++ ++KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops); +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h new file mode 100644 -index 000000000..6be0a334f +index 000000000..a763155cb --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h -@@ -0,0 +1,311 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h +@@ -0,0 +1,40 @@ +/* + * -+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -348683,309 +351291,184 @@ index 000000000..6be0a334f + + + -+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, -+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py -+ * For more information see base/tools/hwconfig_generator/README ++/* ++ * A power policy implementing fixed core availability + */ + -+#ifndef _BASE_HWCONFIG_FEATURES_H_ -+#define _BASE_HWCONFIG_FEATURES_H_ ++#ifndef MALI_KBASE_PM_CA_FIXED_H ++#define MALI_KBASE_PM_CA_FIXED_H + -+enum base_hw_feature { -+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, -+ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, -+ BASE_HW_FEATURE_33BIT_VA, -+ BASE_HW_FEATURE_XAFFINITY, -+ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, -+ BASE_HW_FEATURE_MRT, -+ BASE_HW_FEATURE_BRNDOUT_CC, -+ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, -+ BASE_HW_FEATURE_LD_ST_TILEBUFFER, -+ BASE_HW_FEATURE_MSAA_16X, -+ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, -+ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, -+ BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK, -+ BASE_HW_FEATURE_T7XX_PAIRING_RULES, -+ BASE_HW_FEATURE_LD_ST_LEA_TEX, -+ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, -+ BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4, -+ BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS, -+ BASE_HW_FEATURE_TEST4_DATUM_MODE, -+ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, -+ BASE_HW_FEATURE_BRNDOUT_KILL, -+ BASE_HW_FEATURE_WARPING, -+ BASE_HW_FEATURE_V4, -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_MODE, -+ BASE_HW_FEATURE_COHERENCY_REG, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_AARCH64_MMU, -+ BASE_HW_FEATURE_END ++/** ++ * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data ++ * ++ * @dummy: Dummy member - no state is needed ++ * ++ * This contains data that is private to the particular power policy that is ++ * active. ++ */ ++struct kbasep_pm_ca_policy_fixed { ++ int dummy; +}; + -+static const enum base_hw_feature base_hw_features_generic[] = { -+ BASE_HW_FEATURE_END -+}; ++extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops; + -+static const enum base_hw_feature base_hw_features_t60x[] = { -+ BASE_HW_FEATURE_LD_ST_LEA_TEX, -+ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, -+ BASE_HW_FEATURE_V4, -+ BASE_HW_FEATURE_END -+}; ++#endif /* MALI_KBASE_PM_CA_FIXED_H */ + -+static const enum base_hw_feature base_hw_features_t62x[] = { -+ BASE_HW_FEATURE_LD_ST_LEA_TEX, -+ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, -+ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, -+ BASE_HW_FEATURE_V4, -+ BASE_HW_FEATURE_END -+}; +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c +new file mode 100644 +index 000000000..f891fa225 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c +@@ -0,0 +1,70 @@ ++/* ++ * ++ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+static const enum base_hw_feature base_hw_features_t72x[] = { -+ BASE_HW_FEATURE_33BIT_VA, -+ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, -+ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, -+ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, -+ BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK, -+ BASE_HW_FEATURE_T7XX_PAIRING_RULES, -+ BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4, -+ BASE_HW_FEATURE_WARPING, -+ BASE_HW_FEATURE_V4, -+ BASE_HW_FEATURE_END -+}; + -+static const enum base_hw_feature base_hw_features_t76x[] = { -+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, -+ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, -+ BASE_HW_FEATURE_XAFFINITY, -+ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, -+ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, -+ BASE_HW_FEATURE_BRNDOUT_CC, -+ BASE_HW_FEATURE_LD_ST_LEA_TEX, -+ BASE_HW_FEATURE_LD_ST_TILEBUFFER, -+ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, -+ BASE_HW_FEATURE_MRT, -+ BASE_HW_FEATURE_MSAA_16X, -+ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, -+ BASE_HW_FEATURE_T7XX_PAIRING_RULES, -+ BASE_HW_FEATURE_TEST4_DATUM_MODE, -+ BASE_HW_FEATURE_END -+}; + -+static const enum base_hw_feature base_hw_features_tFxx[] = { -+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, -+ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, -+ BASE_HW_FEATURE_XAFFINITY, -+ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, -+ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, -+ BASE_HW_FEATURE_BRNDOUT_CC, -+ BASE_HW_FEATURE_BRNDOUT_KILL, -+ BASE_HW_FEATURE_LD_ST_LEA_TEX, -+ BASE_HW_FEATURE_LD_ST_TILEBUFFER, -+ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, -+ BASE_HW_FEATURE_MRT, -+ BASE_HW_FEATURE_MSAA_16X, -+ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, -+ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, -+ BASE_HW_FEATURE_T7XX_PAIRING_RULES, -+ BASE_HW_FEATURE_TEST4_DATUM_MODE, -+ BASE_HW_FEATURE_END -+}; + -+static const enum base_hw_feature base_hw_features_t83x[] = { -+ BASE_HW_FEATURE_33BIT_VA, -+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, -+ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, -+ BASE_HW_FEATURE_XAFFINITY, -+ BASE_HW_FEATURE_WARPING, -+ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, -+ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, -+ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, -+ BASE_HW_FEATURE_BRNDOUT_CC, -+ BASE_HW_FEATURE_BRNDOUT_KILL, -+ BASE_HW_FEATURE_LD_ST_LEA_TEX, -+ BASE_HW_FEATURE_LD_ST_TILEBUFFER, -+ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, -+ BASE_HW_FEATURE_MRT, -+ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, -+ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, -+ BASE_HW_FEATURE_T7XX_PAIRING_RULES, -+ BASE_HW_FEATURE_TEST4_DATUM_MODE, -+ BASE_HW_FEATURE_END -+}; + -+static const enum base_hw_feature base_hw_features_t82x[] = { -+ BASE_HW_FEATURE_33BIT_VA, -+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, -+ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, -+ BASE_HW_FEATURE_XAFFINITY, -+ BASE_HW_FEATURE_WARPING, -+ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, -+ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, -+ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, -+ BASE_HW_FEATURE_BRNDOUT_CC, -+ BASE_HW_FEATURE_BRNDOUT_KILL, -+ BASE_HW_FEATURE_LD_ST_LEA_TEX, -+ BASE_HW_FEATURE_LD_ST_TILEBUFFER, -+ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, -+ BASE_HW_FEATURE_MRT, -+ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, -+ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, -+ BASE_HW_FEATURE_T7XX_PAIRING_RULES, -+ BASE_HW_FEATURE_TEST4_DATUM_MODE, -+ BASE_HW_FEATURE_END -+}; ++/* ++ * "Coarse Demand" power management policy ++ */ + -+static const enum base_hw_feature base_hw_features_tMIx[] = { -+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, -+ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, -+ BASE_HW_FEATURE_XAFFINITY, -+ BASE_HW_FEATURE_WARPING, -+ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, -+ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, -+ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, -+ BASE_HW_FEATURE_BRNDOUT_CC, -+ BASE_HW_FEATURE_BRNDOUT_KILL, -+ BASE_HW_FEATURE_LD_ST_LEA_TEX, -+ BASE_HW_FEATURE_LD_ST_TILEBUFFER, -+ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, -+ BASE_HW_FEATURE_MRT, -+ BASE_HW_FEATURE_MSAA_16X, -+ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, -+ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, -+ BASE_HW_FEATURE_T7XX_PAIRING_RULES, -+ BASE_HW_FEATURE_TEST4_DATUM_MODE, -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_MODE, -+ BASE_HW_FEATURE_COHERENCY_REG, -+ BASE_HW_FEATURE_END -+}; ++#include ++#include + -+static const enum base_hw_feature base_hw_features_tHEx[] = { -+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, -+ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, -+ BASE_HW_FEATURE_XAFFINITY, -+ BASE_HW_FEATURE_WARPING, -+ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, -+ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, -+ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, -+ BASE_HW_FEATURE_BRNDOUT_CC, -+ BASE_HW_FEATURE_BRNDOUT_KILL, -+ BASE_HW_FEATURE_LD_ST_LEA_TEX, -+ BASE_HW_FEATURE_LD_ST_TILEBUFFER, -+ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, -+ BASE_HW_FEATURE_MRT, -+ BASE_HW_FEATURE_MSAA_16X, -+ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, -+ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, -+ BASE_HW_FEATURE_T7XX_PAIRING_RULES, -+ BASE_HW_FEATURE_TEST4_DATUM_MODE, -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_MODE, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_COHERENCY_REG, -+ BASE_HW_FEATURE_END -+}; ++static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev) ++{ ++ if (kbdev->pm.active_count == 0) ++ return 0; + -+static const enum base_hw_feature base_hw_features_tSIx[] = { -+ BASE_HW_FEATURE_33BIT_VA, -+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, -+ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, -+ BASE_HW_FEATURE_XAFFINITY, -+ BASE_HW_FEATURE_WARPING, -+ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, -+ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, -+ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, -+ BASE_HW_FEATURE_BRNDOUT_CC, -+ BASE_HW_FEATURE_BRNDOUT_KILL, -+ BASE_HW_FEATURE_LD_ST_LEA_TEX, -+ BASE_HW_FEATURE_LD_ST_TILEBUFFER, -+ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, -+ BASE_HW_FEATURE_MRT, -+ BASE_HW_FEATURE_MSAA_16X, -+ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, -+ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, -+ BASE_HW_FEATURE_T7XX_PAIRING_RULES, -+ BASE_HW_FEATURE_TEST4_DATUM_MODE, -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_MODE, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_COHERENCY_REG, -+ BASE_HW_FEATURE_END -+}; ++ return kbdev->gpu_props.props.raw_props.shader_present; ++} ++ ++static bool coarse_demand_get_core_active(struct kbase_device *kbdev) ++{ ++ if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap | ++ kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt ++ && !kbdev->tiler_inuse_cnt) ++ return false; + ++ return true; ++} + -+#ifdef MALI_INCLUDE_TKAX -+static const enum base_hw_feature base_hw_features_tKAx[] = { -+ BASE_HW_FEATURE_33BIT_VA, -+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, -+ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, -+ BASE_HW_FEATURE_XAFFINITY, -+ BASE_HW_FEATURE_WARPING, -+ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, -+ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, -+ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, -+ BASE_HW_FEATURE_BRNDOUT_CC, -+ BASE_HW_FEATURE_BRNDOUT_KILL, -+ BASE_HW_FEATURE_LD_ST_LEA_TEX, -+ BASE_HW_FEATURE_LD_ST_TILEBUFFER, -+ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, -+ BASE_HW_FEATURE_MRT, -+ BASE_HW_FEATURE_MSAA_16X, -+ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, -+ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, -+ BASE_HW_FEATURE_T7XX_PAIRING_RULES, -+ BASE_HW_FEATURE_TEST4_DATUM_MODE, -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_MODE, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_COHERENCY_REG, -+ BASE_HW_FEATURE_END ++static void coarse_demand_init(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++} ++ ++static void coarse_demand_term(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++} ++ ++/* The struct kbase_pm_policy structure for the demand power policy. ++ * ++ * This is the static structure that defines the demand power policy's callback ++ * and name. ++ */ ++const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = { ++ "coarse_demand", /* name */ ++ coarse_demand_init, /* init */ ++ coarse_demand_term, /* term */ ++ coarse_demand_get_core_mask, /* get_core_mask */ ++ coarse_demand_get_core_active, /* get_core_active */ ++ 0u, /* flags */ ++ KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */ +}; + -+#endif /* MALI_INCLUDE_TKAX */ ++KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops); +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h +new file mode 100644 +index 000000000..749d305ee +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h +@@ -0,0 +1,64 @@ ++/* ++ * ++ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+#ifdef MALI_INCLUDE_TTRX -+static const enum base_hw_feature base_hw_features_tTRx[] = { -+ BASE_HW_FEATURE_33BIT_VA, -+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, -+ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, -+ BASE_HW_FEATURE_XAFFINITY, -+ BASE_HW_FEATURE_WARPING, -+ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, -+ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, -+ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, -+ BASE_HW_FEATURE_BRNDOUT_CC, -+ BASE_HW_FEATURE_BRNDOUT_KILL, -+ BASE_HW_FEATURE_LD_ST_LEA_TEX, -+ BASE_HW_FEATURE_LD_ST_TILEBUFFER, -+ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, -+ BASE_HW_FEATURE_MRT, -+ BASE_HW_FEATURE_MSAA_16X, -+ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, -+ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, -+ BASE_HW_FEATURE_T7XX_PAIRING_RULES, -+ BASE_HW_FEATURE_TEST4_DATUM_MODE, -+ BASE_HW_FEATURE_FLUSH_REDUCTION, -+ BASE_HW_FEATURE_PROTECTED_MODE, -+ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, -+ BASE_HW_FEATURE_COHERENCY_REG, -+ BASE_HW_FEATURE_END ++ ++ ++ ++ ++/* ++ * "Coarse Demand" power management policy ++ */ ++ ++#ifndef MALI_KBASE_PM_COARSE_DEMAND_H ++#define MALI_KBASE_PM_COARSE_DEMAND_H ++ ++/** ++ * DOC: ++ * The "Coarse" demand power management policy has the following ++ * characteristics: ++ * - When KBase indicates that the GPU will be powered up, but we don't yet ++ * know which Job Chains are to be run: ++ * - All Shader Cores are powered up, regardless of whether or not they will ++ * be needed later. ++ * - When KBase indicates that a set of Shader Cores are needed to submit the ++ * currently queued Job Chains: ++ * - All Shader Cores are kept powered, regardless of whether or not they will ++ * be needed ++ * - When KBase indicates that the GPU need not be powered: ++ * - The Shader Cores are powered off, and the GPU itself is powered off too. ++ * ++ * @note: ++ * - KBase indicates the GPU will be powered up when it has a User Process that ++ * has just started to submit Job Chains. ++ * - KBase indicates the GPU need not be powered when all the Job Chains from ++ * User Processes have finished, and it is waiting for a User Process to ++ * submit some more Job Chains. ++ */ ++ ++/** ++ * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand ++ * policy ++ * ++ * This contains data that is private to the coarse demand power policy. ++ * ++ * @dummy: Dummy member - no state needed ++ */ ++struct kbasep_pm_policy_coarse_demand { ++ int dummy; +}; + -+#endif /* MALI_INCLUDE_TTRX */ ++extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops; + -+#endif /* _BASE_HWCONFIG_FEATURES_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h ++#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h new file mode 100644 -index 000000000..6d7e5c57e +index 000000000..352744ee6 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h -@@ -0,0 +1,1098 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h +@@ -0,0 +1,519 @@ +/* + * -+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -349000,1096 +351483,517 @@ index 000000000..6d7e5c57e + + + -+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, -+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py -+ * For more information see base/tools/hwconfig_generator/README ++/* ++ * Backend-specific Power Manager definitions + */ + -+#ifndef _BASE_HWCONFIG_ISSUES_H_ -+#define _BASE_HWCONFIG_ISSUES_H_ ++#ifndef _KBASE_PM_HWACCESS_DEFS_H_ ++#define _KBASE_PM_HWACCESS_DEFS_H_ + -+enum base_hw_issue { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_6367, -+ BASE_HW_ISSUE_6398, -+ BASE_HW_ISSUE_6402, -+ BASE_HW_ISSUE_6787, -+ BASE_HW_ISSUE_7027, -+ BASE_HW_ISSUE_7144, -+ BASE_HW_ISSUE_7304, -+ BASE_HW_ISSUE_8073, -+ BASE_HW_ISSUE_8186, -+ BASE_HW_ISSUE_8215, -+ BASE_HW_ISSUE_8245, -+ BASE_HW_ISSUE_8250, -+ BASE_HW_ISSUE_8260, -+ BASE_HW_ISSUE_8280, -+ BASE_HW_ISSUE_8316, -+ BASE_HW_ISSUE_8381, -+ BASE_HW_ISSUE_8394, -+ BASE_HW_ISSUE_8401, -+ BASE_HW_ISSUE_8408, -+ BASE_HW_ISSUE_8443, -+ BASE_HW_ISSUE_8456, -+ BASE_HW_ISSUE_8564, -+ BASE_HW_ISSUE_8634, -+ BASE_HW_ISSUE_8778, -+ BASE_HW_ISSUE_8791, -+ BASE_HW_ISSUE_8833, -+ BASE_HW_ISSUE_8879, -+ BASE_HW_ISSUE_8896, -+ BASE_HW_ISSUE_8975, -+ BASE_HW_ISSUE_8986, -+ BASE_HW_ISSUE_8987, -+ BASE_HW_ISSUE_9010, -+ BASE_HW_ISSUE_9418, -+ BASE_HW_ISSUE_9423, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_9510, -+ BASE_HW_ISSUE_9566, -+ BASE_HW_ISSUE_9630, -+ BASE_HW_ISSUE_10127, -+ BASE_HW_ISSUE_10327, -+ BASE_HW_ISSUE_10410, -+ BASE_HW_ISSUE_10471, -+ BASE_HW_ISSUE_10472, -+ BASE_HW_ISSUE_10487, -+ BASE_HW_ISSUE_10607, -+ BASE_HW_ISSUE_10632, -+ BASE_HW_ISSUE_10649, -+ BASE_HW_ISSUE_10676, -+ BASE_HW_ISSUE_10682, -+ BASE_HW_ISSUE_10684, -+ BASE_HW_ISSUE_10797, -+ BASE_HW_ISSUE_10817, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10931, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_10959, -+ BASE_HW_ISSUE_10969, -+ BASE_HW_ISSUE_10984, -+ BASE_HW_ISSUE_10995, -+ BASE_HW_ISSUE_11012, -+ BASE_HW_ISSUE_11020, -+ BASE_HW_ISSUE_11024, -+ BASE_HW_ISSUE_11035, -+ BASE_HW_ISSUE_11042, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T720_1386, -+ BASE_HW_ISSUE_T76X_26, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3542, -+ BASE_HW_ISSUE_T76X_3556, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_T76X_3960, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_T76X_3966, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_7940, -+ BASE_HW_ISSUE_TMIX_8042, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TMIX_8138, -+ BASE_HW_ISSUE_TMIX_8206, -+ BASE_HW_ISSUE_TMIX_8343, -+ BASE_HW_ISSUE_TMIX_8463, -+ BASE_HW_ISSUE_TMIX_8456, -+ GPUCORE_1619, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; ++#include "mali_kbase_pm_ca_fixed.h" ++#include "mali_kbase_pm_ca_devfreq.h" ++#if !MALI_CUSTOMER_RELEASE ++#include "mali_kbase_pm_ca_random.h" ++#endif + -+static const enum base_hw_issue base_hw_issues_generic[] = { -+ BASE_HW_ISSUE_END -+}; ++#include "mali_kbase_pm_always_on.h" ++#include "mali_kbase_pm_coarse_demand.h" ++#include "mali_kbase_pm_demand.h" ++#if !MALI_CUSTOMER_RELEASE ++#include "mali_kbase_pm_demand_always_powered.h" ++#include "mali_kbase_pm_fast_start.h" ++#endif + -+static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = { -+ BASE_HW_ISSUE_6367, -+ BASE_HW_ISSUE_6398, -+ BASE_HW_ISSUE_6402, -+ BASE_HW_ISSUE_6787, -+ BASE_HW_ISSUE_7027, -+ BASE_HW_ISSUE_7144, -+ BASE_HW_ISSUE_7304, -+ BASE_HW_ISSUE_8073, -+ BASE_HW_ISSUE_8186, -+ BASE_HW_ISSUE_8215, -+ BASE_HW_ISSUE_8245, -+ BASE_HW_ISSUE_8250, -+ BASE_HW_ISSUE_8260, -+ BASE_HW_ISSUE_8280, -+ BASE_HW_ISSUE_8316, -+ BASE_HW_ISSUE_8381, -+ BASE_HW_ISSUE_8394, -+ BASE_HW_ISSUE_8401, -+ BASE_HW_ISSUE_8408, -+ BASE_HW_ISSUE_8443, -+ BASE_HW_ISSUE_8456, -+ BASE_HW_ISSUE_8564, -+ BASE_HW_ISSUE_8634, -+ BASE_HW_ISSUE_8778, -+ BASE_HW_ISSUE_8791, -+ BASE_HW_ISSUE_8833, -+ BASE_HW_ISSUE_8896, -+ BASE_HW_ISSUE_8975, -+ BASE_HW_ISSUE_8986, -+ BASE_HW_ISSUE_8987, -+ BASE_HW_ISSUE_9010, -+ BASE_HW_ISSUE_9418, -+ BASE_HW_ISSUE_9423, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_9510, -+ BASE_HW_ISSUE_9566, -+ BASE_HW_ISSUE_9630, -+ BASE_HW_ISSUE_10410, -+ BASE_HW_ISSUE_10471, -+ BASE_HW_ISSUE_10472, -+ BASE_HW_ISSUE_10487, -+ BASE_HW_ISSUE_10607, -+ BASE_HW_ISSUE_10632, -+ BASE_HW_ISSUE_10649, -+ BASE_HW_ISSUE_10676, -+ BASE_HW_ISSUE_10682, -+ BASE_HW_ISSUE_10684, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10931, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_10969, -+ BASE_HW_ISSUE_10984, -+ BASE_HW_ISSUE_10995, -+ BASE_HW_ISSUE_11012, -+ BASE_HW_ISSUE_11020, -+ BASE_HW_ISSUE_11035, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_3964, -+ GPUCORE_1619, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; ++/* Forward definition - see mali_kbase.h */ ++struct kbase_device; ++struct kbase_jd_atom; + -+static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = { -+ BASE_HW_ISSUE_6367, -+ BASE_HW_ISSUE_6402, -+ BASE_HW_ISSUE_6787, -+ BASE_HW_ISSUE_7027, -+ BASE_HW_ISSUE_7304, -+ BASE_HW_ISSUE_8408, -+ BASE_HW_ISSUE_8564, -+ BASE_HW_ISSUE_8778, -+ BASE_HW_ISSUE_8975, -+ BASE_HW_ISSUE_9010, -+ BASE_HW_ISSUE_9418, -+ BASE_HW_ISSUE_9423, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_9510, -+ BASE_HW_ISSUE_10410, -+ BASE_HW_ISSUE_10471, -+ BASE_HW_ISSUE_10472, -+ BASE_HW_ISSUE_10487, -+ BASE_HW_ISSUE_10607, -+ BASE_HW_ISSUE_10632, -+ BASE_HW_ISSUE_10649, -+ BASE_HW_ISSUE_10676, -+ BASE_HW_ISSUE_10682, -+ BASE_HW_ISSUE_10684, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10931, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_10969, -+ BASE_HW_ISSUE_11012, -+ BASE_HW_ISSUE_11020, -+ BASE_HW_ISSUE_11035, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END ++/** ++ * enum kbase_pm_core_type - The types of core in a GPU. ++ * ++ * These enumerated values are used in calls to ++ * - kbase_pm_get_present_cores() ++ * - kbase_pm_get_active_cores() ++ * - kbase_pm_get_trans_cores() ++ * - kbase_pm_get_ready_cores(). ++ * ++ * They specify which type of core should be acted on. These values are set in ++ * a manner that allows core_type_to_reg() function to be simpler and more ++ * efficient. ++ * ++ * @KBASE_PM_CORE_L2: The L2 cache ++ * @KBASE_PM_CORE_SHADER: Shader cores ++ * @KBASE_PM_CORE_TILER: Tiler cores ++ * @KBASE_PM_CORE_STACK: Core stacks ++ */ ++enum kbase_pm_core_type { ++ KBASE_PM_CORE_L2 = L2_PRESENT_LO, ++ KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO, ++ KBASE_PM_CORE_TILER = TILER_PRESENT_LO, ++ KBASE_PM_CORE_STACK = STACK_PRESENT_LO +}; + -+static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = { -+ BASE_HW_ISSUE_6367, -+ BASE_HW_ISSUE_6402, -+ BASE_HW_ISSUE_6787, -+ BASE_HW_ISSUE_7027, -+ BASE_HW_ISSUE_7304, -+ BASE_HW_ISSUE_8408, -+ BASE_HW_ISSUE_8564, -+ BASE_HW_ISSUE_8778, -+ BASE_HW_ISSUE_8975, -+ BASE_HW_ISSUE_9010, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_9510, -+ BASE_HW_ISSUE_10410, -+ BASE_HW_ISSUE_10471, -+ BASE_HW_ISSUE_10472, -+ BASE_HW_ISSUE_10487, -+ BASE_HW_ISSUE_10607, -+ BASE_HW_ISSUE_10632, -+ BASE_HW_ISSUE_10649, -+ BASE_HW_ISSUE_10676, -+ BASE_HW_ISSUE_10682, -+ BASE_HW_ISSUE_10684, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10931, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11012, -+ BASE_HW_ISSUE_11020, -+ BASE_HW_ISSUE_11035, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END ++/** ++ * struct kbasep_pm_metrics_data - Metrics data collected for use by the power ++ * management framework. ++ * ++ * @time_period_start: time at which busy/idle measurements started ++ * @time_busy: number of ns the GPU was busy executing jobs since the ++ * @time_period_start timestamp. ++ * @time_idle: number of ns since time_period_start the GPU was not executing ++ * jobs since the @time_period_start timestamp. ++ * @prev_busy: busy time in ns of previous time period. ++ * Updated when metrics are reset. ++ * @prev_idle: idle time in ns of previous time period ++ * Updated when metrics are reset. ++ * @gpu_active: true when the GPU is executing jobs. false when ++ * not. Updated when the job scheduler informs us a job in submitted ++ * or removed from a GPU slot. ++ * @busy_cl: number of ns the GPU was busy executing CL jobs. Note that ++ * if two CL jobs were active for 400ns, this value would be updated ++ * with 800. ++ * @busy_gl: number of ns the GPU was busy executing GL jobs. Note that ++ * if two GL jobs were active for 400ns, this value would be updated ++ * with 800. ++ * @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device. ++ * @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As ++ * GL jobs never run on slot 2 this slot is not recorded. ++ * @lock: spinlock protecting the kbasep_pm_metrics_data structure ++ * @timer: timer to regularly make DVFS decisions based on the power ++ * management metrics. ++ * @timer_active: boolean indicating @timer is running ++ * @platform_data: pointer to data controlled by platform specific code ++ * @kbdev: pointer to kbase device for which metrics are collected ++ * ++ */ ++struct kbasep_pm_metrics_data { ++ ktime_t time_period_start; ++ u32 time_busy; ++ u32 time_idle; ++ u32 prev_busy; ++ u32 prev_idle; ++ bool gpu_active; ++ u32 busy_cl[2]; ++ u32 busy_gl; ++ u32 active_cl_ctx[2]; ++ u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */ ++ spinlock_t lock; ++ ++#ifdef CONFIG_MALI_MIDGARD_DVFS ++ struct hrtimer timer; ++ bool timer_active; ++#endif ++ ++ void *platform_data; ++ struct kbase_device *kbdev; +}; + -+static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = { -+ BASE_HW_ISSUE_6402, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10127, -+ BASE_HW_ISSUE_10327, -+ BASE_HW_ISSUE_10410, -+ BASE_HW_ISSUE_10471, -+ BASE_HW_ISSUE_10472, -+ BASE_HW_ISSUE_10487, -+ BASE_HW_ISSUE_10607, -+ BASE_HW_ISSUE_10632, -+ BASE_HW_ISSUE_10649, -+ BASE_HW_ISSUE_10676, -+ BASE_HW_ISSUE_10682, -+ BASE_HW_ISSUE_10684, -+ BASE_HW_ISSUE_10817, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10931, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_10959, -+ BASE_HW_ISSUE_11012, -+ BASE_HW_ISSUE_11020, -+ BASE_HW_ISSUE_11024, -+ BASE_HW_ISSUE_11035, -+ BASE_HW_ISSUE_11042, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END ++union kbase_pm_policy_data { ++ struct kbasep_pm_policy_always_on always_on; ++ struct kbasep_pm_policy_coarse_demand coarse_demand; ++ struct kbasep_pm_policy_demand demand; ++#if !MALI_CUSTOMER_RELEASE ++ struct kbasep_pm_policy_demand_always_powered demand_always_powered; ++ struct kbasep_pm_policy_fast_start fast_start; ++#endif +}; + -+static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = { -+ BASE_HW_ISSUE_6402, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10471, -+ BASE_HW_ISSUE_10472, -+ BASE_HW_ISSUE_10649, -+ BASE_HW_ISSUE_10684, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10931, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_10959, -+ BASE_HW_ISSUE_11012, -+ BASE_HW_ISSUE_11020, -+ BASE_HW_ISSUE_11024, -+ BASE_HW_ISSUE_11042, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; -+ -+static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = { -+ BASE_HW_ISSUE_6402, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10471, -+ BASE_HW_ISSUE_10472, -+ BASE_HW_ISSUE_10649, -+ BASE_HW_ISSUE_10684, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10931, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_10959, -+ BASE_HW_ISSUE_11012, -+ BASE_HW_ISSUE_11042, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; -+ -+static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11020, -+ BASE_HW_ISSUE_11024, -+ BASE_HW_ISSUE_11042, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_26, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3542, -+ BASE_HW_ISSUE_T76X_3556, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_T76X_3960, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_T76X_3966, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; -+ -+static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11020, -+ BASE_HW_ISSUE_11024, -+ BASE_HW_ISSUE_11042, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_26, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3542, -+ BASE_HW_ISSUE_T76X_3556, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_T76X_3960, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_T76X_3966, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; -+ -+static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11042, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_26, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3542, -+ BASE_HW_ISSUE_T76X_3556, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_T76X_3960, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_T76X_3966, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; -+ -+static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11020, -+ BASE_HW_ISSUE_11024, -+ BASE_HW_ISSUE_11042, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_26, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3542, -+ BASE_HW_ISSUE_T76X_3556, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_T76X_3960, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_T76X_3966, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; -+ -+static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11042, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_26, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3542, -+ BASE_HW_ISSUE_T76X_3556, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_T76X_3960, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_T76X_3966, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; -+ -+static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11042, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_T76X_3960, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_T76X_3966, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; -+ -+static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = { -+ BASE_HW_ISSUE_6402, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10471, -+ BASE_HW_ISSUE_10649, -+ BASE_HW_ISSUE_10684, -+ BASE_HW_ISSUE_10797, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11042, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; -+ -+static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = { -+ BASE_HW_ISSUE_6402, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10471, -+ BASE_HW_ISSUE_10649, -+ BASE_HW_ISSUE_10684, -+ BASE_HW_ISSUE_10797, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11042, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T720_1386, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; -+ -+static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = { -+ BASE_HW_ISSUE_6402, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10471, -+ BASE_HW_ISSUE_10649, -+ BASE_HW_ISSUE_10684, -+ BASE_HW_ISSUE_10797, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11042, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T720_1386, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; -+ -+static const enum base_hw_issue base_hw_issues_model_t72x[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_6402, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10471, -+ BASE_HW_ISSUE_10649, -+ BASE_HW_ISSUE_10797, -+ BASE_HW_ISSUE_11042, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3964, -+ GPUCORE_1619, -+ BASE_HW_ISSUE_END ++union kbase_pm_ca_policy_data { ++ struct kbasep_pm_ca_policy_fixed fixed; ++ struct kbasep_pm_ca_policy_devfreq devfreq; ++#if !MALI_CUSTOMER_RELEASE ++ struct kbasep_pm_ca_policy_random random; ++#endif +}; + -+static const enum base_hw_issue base_hw_issues_model_t76x[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_11020, -+ BASE_HW_ISSUE_11024, -+ BASE_HW_ISSUE_11042, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ GPUCORE_1619, -+ BASE_HW_ISSUE_END -+}; ++/** ++ * struct kbase_pm_backend_data - Data stored per device for power management. ++ * ++ * This structure contains data for the power management framework. There is one ++ * instance of this structure per device in the system. ++ * ++ * @ca_current_policy: The policy that is currently actively controlling core ++ * availability. ++ * @pm_current_policy: The policy that is currently actively controlling the ++ * power state. ++ * @ca_policy_data: Private data for current CA policy ++ * @pm_policy_data: Private data for current PM policy ++ * @ca_in_transition: Flag indicating when core availability policy is ++ * transitioning cores. The core availability policy must ++ * set this when a change in core availability is occurring. ++ * power_change_lock must be held when accessing this. ++ * @reset_done: Flag when a reset is complete ++ * @reset_done_wait: Wait queue to wait for changes to @reset_done ++ * @l2_powered_wait: Wait queue for whether the l2 cache has been powered as ++ * requested ++ * @l2_powered: State indicating whether all the l2 caches are powered. ++ * Non-zero indicates they're *all* powered ++ * Zero indicates that some (or all) are not powered ++ * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter ++ * users ++ * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests ++ * @desired_shader_state: A bit mask identifying the shader cores that the ++ * power policy would like to be on. The current state ++ * of the cores may be different, but there should be ++ * transitions in progress that will eventually achieve ++ * this state (assuming that the policy doesn't change ++ * its mind in the mean time). ++ * @powering_on_shader_state: A bit mask indicating which shader cores are ++ * currently in a power-on transition ++ * @desired_tiler_state: A bit mask identifying the tiler cores that the power ++ * policy would like to be on. See @desired_shader_state ++ * @powering_on_tiler_state: A bit mask indicating which tiler core are ++ * currently in a power-on transition ++ * @powering_on_l2_state: A bit mask indicating which l2-caches are currently ++ * in a power-on transition ++ * @powering_on_stack_state: A bit mask indicating which core stacks are ++ * currently in a power-on transition ++ * @gpu_in_desired_state: This flag is set if the GPU is powered as requested ++ * by the desired_xxx_state variables ++ * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0 ++ * @gpu_powered: Set to true when the GPU is powered and register ++ * accesses are possible, false otherwise ++ * @instr_enabled: Set to true when instrumentation is enabled, ++ * false otherwise ++ * @cg1_disabled: Set if the policy wants to keep the second core group ++ * powered off ++ * @driver_ready_for_irqs: Debug state indicating whether sufficient ++ * initialization of the driver has occurred to handle ++ * IRQs ++ * @gpu_powered_lock: Spinlock that must be held when writing @gpu_powered or ++ * accessing @driver_ready_for_irqs ++ * @metrics: Structure to hold metrics for the GPU ++ * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is ++ * powered off ++ * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders ++ * and/or timers are powered off ++ * @gpu_poweroff_timer: Timer for powering off GPU ++ * @gpu_poweroff_wq: Workqueue to power off GPU on when timer fires ++ * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq ++ * @shader_poweroff_pending: Bit mask of shaders to be powered off on next ++ * timer callback ++ * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer ++ * callback ++ * @poweroff_timer_needed: true if the poweroff timer is currently required, ++ * false otherwise ++ * @poweroff_timer_running: true if the poweroff timer is currently running, ++ * false otherwise ++ * power_change_lock should be held when accessing, ++ * unless there is no way the timer can be running (eg ++ * hrtimer_cancel() was called immediately before) ++ * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress. ++ * hwaccess_lock must be held when accessing ++ * @poweron_required: true if a GPU power on is required. Should only be set ++ * when poweroff_wait_in_progress is true, and therefore the ++ * GPU can not immediately be powered on. pm.lock must be ++ * held when accessing ++ * @poweroff_is_suspend: true if the GPU is being powered off due to a suspend ++ * request. pm.lock must be held when accessing ++ * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off ++ * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq ++ * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete ++ * @callback_power_on: Callback when the GPU needs to be turned on. See ++ * &struct kbase_pm_callback_conf ++ * @callback_power_off: Callback when the GPU may be turned off. See ++ * &struct kbase_pm_callback_conf ++ * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to ++ * be turned off. See &struct kbase_pm_callback_conf ++ * @callback_power_resume: Callback when a resume occurs and the GPU needs to ++ * be turned on. See &struct kbase_pm_callback_conf ++ * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See ++ * &struct kbase_pm_callback_conf ++ * @callback_power_runtime_off: Callback when the GPU may be turned off. See ++ * &struct kbase_pm_callback_conf ++ * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See ++ * &struct kbase_pm_callback_conf ++ * ++ * Note: ++ * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the ++ * policy is being changed with kbase_pm_ca_set_policy() or ++ * kbase_pm_set_policy(). The change is protected under ++ * kbase_device.pm.power_change_lock. Direct access to this ++ * from IRQ context must therefore check for NULL. If NULL, then ++ * kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy ++ * functions that would have been done under IRQ. ++ */ ++struct kbase_pm_backend_data { ++ const struct kbase_pm_ca_policy *ca_current_policy; ++ const struct kbase_pm_policy *pm_current_policy; ++ union kbase_pm_ca_policy_data ca_policy_data; ++ union kbase_pm_policy_data pm_policy_data; ++ bool ca_in_transition; ++ bool reset_done; ++ wait_queue_head_t reset_done_wait; ++ wait_queue_head_t l2_powered_wait; ++ int l2_powered; ++ int gpu_cycle_counter_requests; ++ spinlock_t gpu_cycle_counter_requests_lock; + -+static const enum base_hw_issue base_hw_issues_model_t60x[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_6402, -+ BASE_HW_ISSUE_8778, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10472, -+ BASE_HW_ISSUE_10649, -+ BASE_HW_ISSUE_10931, -+ BASE_HW_ISSUE_11012, -+ BASE_HW_ISSUE_11020, -+ BASE_HW_ISSUE_11024, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3964, -+ GPUCORE_1619, -+ BASE_HW_ISSUE_END -+}; ++ u64 desired_shader_state; ++ u64 powering_on_shader_state; ++ u64 desired_tiler_state; ++ u64 powering_on_tiler_state; ++ u64 powering_on_l2_state; ++#ifdef CONFIG_MALI_CORESTACK ++ u64 powering_on_stack_state; ++#endif /* CONFIG_MALI_CORESTACK */ + -+static const enum base_hw_issue base_hw_issues_model_t62x[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_6402, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10472, -+ BASE_HW_ISSUE_10649, -+ BASE_HW_ISSUE_10931, -+ BASE_HW_ISSUE_11012, -+ BASE_HW_ISSUE_11020, -+ BASE_HW_ISSUE_11024, -+ BASE_HW_ISSUE_11042, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3964, -+ GPUCORE_1619, -+ BASE_HW_ISSUE_END -+}; ++ bool gpu_in_desired_state; ++ wait_queue_head_t gpu_in_desired_state_wait; + -+static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_T76X_3960, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_T76X_3966, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; ++ bool gpu_powered; + -+static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_T76X_3966, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; ++ bool instr_enabled; + -+static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_T76X_3966, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; ++ bool cg1_disabled; + -+static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_T76X_3966, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; ++#ifdef CONFIG_MALI_DEBUG ++ bool driver_ready_for_irqs; ++#endif /* CONFIG_MALI_DEBUG */ + -+static const enum base_hw_issue base_hw_issues_model_tFRx[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ GPUCORE_1619, -+ BASE_HW_ISSUE_END -+}; ++ spinlock_t gpu_powered_lock; + -+static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_T76X_3966, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; + -+static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_T76X_3966, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; ++ struct kbasep_pm_metrics_data metrics; + -+static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_T76X_3966, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; ++ int gpu_poweroff_pending; ++ int shader_poweroff_pending_time; + -+static const enum base_hw_issue base_hw_issues_model_t86x[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ GPUCORE_1619, -+ BASE_HW_ISSUE_END -+}; ++ struct hrtimer gpu_poweroff_timer; ++ struct workqueue_struct *gpu_poweroff_wq; ++ struct work_struct gpu_poweroff_work; + -+static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T720_1386, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_T76X_3960, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; ++ u64 shader_poweroff_pending; ++ u64 tiler_poweroff_pending; + -+static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T720_1386, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_T76X_3960, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; ++ bool poweroff_timer_needed; ++ bool poweroff_timer_running; + -+static const enum base_hw_issue base_hw_issues_model_t83x[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ GPUCORE_1619, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; ++ bool poweroff_wait_in_progress; ++ bool poweron_required; ++ bool poweroff_is_suspend; + -+static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T720_1386, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_T76X_3960, -+ BASE_HW_ISSUE_T76X_3964, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; ++ struct workqueue_struct *gpu_poweroff_wait_wq; ++ struct work_struct gpu_poweroff_wait_work; + -+static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T720_1386, -+ BASE_HW_ISSUE_T76X_1909, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_T76X_3960, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; ++ wait_queue_head_t poweroff_wait; + -+static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10821, -+ BASE_HW_ISSUE_10883, -+ BASE_HW_ISSUE_10946, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T720_1386, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_T76X_3960, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END ++ int (*callback_power_on)(struct kbase_device *kbdev); ++ void (*callback_power_off)(struct kbase_device *kbdev); ++ void (*callback_power_suspend)(struct kbase_device *kbdev); ++ void (*callback_power_resume)(struct kbase_device *kbdev); ++ int (*callback_power_runtime_on)(struct kbase_device *kbdev); ++ void (*callback_power_runtime_off)(struct kbase_device *kbdev); ++ int (*callback_power_runtime_idle)(struct kbase_device *kbdev); +}; + -+static const enum base_hw_issue base_hw_issues_model_t82x[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_11051, -+ BASE_HW_ISSUE_T76X_1963, -+ BASE_HW_ISSUE_T76X_3086, -+ BASE_HW_ISSUE_T76X_3700, -+ BASE_HW_ISSUE_T76X_3793, -+ BASE_HW_ISSUE_T76X_3979, -+ BASE_HW_ISSUE_TMIX_7891, -+ GPUCORE_1619, -+ BASE_HW_ISSUE_END -+}; + -+static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10682, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_T76X_3953, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8042, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TMIX_8138, -+ BASE_HW_ISSUE_TMIX_8206, -+ BASE_HW_ISSUE_TMIX_8343, -+ BASE_HW_ISSUE_TMIX_8463, -+ BASE_HW_ISSUE_TMIX_8456, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END ++/* List of policy IDs */ ++enum kbase_pm_policy_id { ++ KBASE_PM_POLICY_ID_DEMAND = 1, ++ KBASE_PM_POLICY_ID_ALWAYS_ON, ++ KBASE_PM_POLICY_ID_COARSE_DEMAND, ++#if !MALI_CUSTOMER_RELEASE ++ KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED, ++ KBASE_PM_POLICY_ID_FAST_START ++#endif +}; + -+static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10682, -+ BASE_HW_ISSUE_11054, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_7940, -+ BASE_HW_ISSUE_TMIX_8042, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TMIX_8138, -+ BASE_HW_ISSUE_TMIX_8206, -+ BASE_HW_ISSUE_TMIX_8343, -+ BASE_HW_ISSUE_TMIX_8463, -+ BASE_HW_ISSUE_TMIX_8456, -+ BASE_HW_ISSUE_TMIX_8438, -+ BASE_HW_ISSUE_END -+}; ++typedef u32 kbase_pm_policy_flags; + -+static const enum base_hw_issue base_hw_issues_model_tMIx[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_7940, -+ BASE_HW_ISSUE_TMIX_8042, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TMIX_8138, -+ BASE_HW_ISSUE_TMIX_8206, -+ BASE_HW_ISSUE_TMIX_8343, -+ BASE_HW_ISSUE_TMIX_8456, -+ BASE_HW_ISSUE_END -+}; ++/** ++ * struct kbase_pm_policy - Power policy structure. ++ * ++ * Each power policy exposes a (static) instance of this structure which ++ * contains function pointers to the policy's methods. ++ * ++ * @name: The name of this policy ++ * @init: Function called when the policy is selected ++ * @term: Function called when the policy is unselected ++ * @get_core_mask: Function called to get the current shader core mask ++ * @get_core_active: Function called to get the current overall GPU power ++ * state ++ * @flags: Field indicating flags for this policy ++ * @id: Field indicating an ID for this policy. This is not ++ * necessarily the same as its index in the list returned ++ * by kbase_pm_list_policies(). ++ * It is used purely for debugging. ++ */ ++struct kbase_pm_policy { ++ char *name; + -+static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10682, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8042, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_END -+}; ++ /** ++ * Function called when the policy is selected ++ * ++ * This should initialize the kbdev->pm.pm_policy_data structure. It ++ * should not attempt to make any changes to hardware state. ++ * ++ * It is undefined what state the cores are in when the function is ++ * called. ++ * ++ * @kbdev: The kbase device structure for the device (must be a ++ * valid pointer) ++ */ ++ void (*init)(struct kbase_device *kbdev); + -+static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_10682, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8042, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_END -+}; ++ /** ++ * Function called when the policy is unselected. ++ * ++ * @kbdev: The kbase device structure for the device (must be a ++ * valid pointer) ++ */ ++ void (*term)(struct kbase_device *kbdev); + -+static const enum base_hw_issue base_hw_issues_model_tHEx[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_7891, -+ BASE_HW_ISSUE_TMIX_8042, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_END -+}; ++ /** ++ * Function called to get the current shader core mask ++ * ++ * The returned mask should meet or exceed (kbdev->shader_needed_bitmap ++ * | kbdev->shader_inuse_bitmap). ++ * ++ * @kbdev: The kbase device structure for the device (must be a ++ * valid pointer) ++ * ++ * Return: The mask of shader cores to be powered ++ */ ++ u64 (*get_core_mask)(struct kbase_device *kbdev); + -+static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_END -+}; ++ /** ++ * Function called to get the current overall GPU power state ++ * ++ * This function should consider the state of kbdev->pm.active_count. If ++ * this count is greater than 0 then there is at least one active ++ * context on the device and the GPU should be powered. If it is equal ++ * to 0 then there are no active contexts and the GPU could be powered ++ * off if desired. ++ * ++ * @kbdev: The kbase device structure for the device (must be a ++ * valid pointer) ++ * ++ * Return: true if the GPU should be powered, false otherwise ++ */ ++ bool (*get_core_active)(struct kbase_device *kbdev); + -+static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_END ++ kbase_pm_policy_flags flags; ++ enum kbase_pm_policy_id id; +}; + -+static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_END -+}; + -+static const enum base_hw_issue base_hw_issues_model_tSIx[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_END ++enum kbase_pm_ca_policy_id { ++ KBASE_PM_CA_POLICY_ID_FIXED = 1, ++ KBASE_PM_CA_POLICY_ID_DEVFREQ, ++ KBASE_PM_CA_POLICY_ID_RANDOM +}; + ++typedef u32 kbase_pm_ca_policy_flags; + ++/** ++ * Maximum length of a CA policy names ++ */ ++#define KBASE_PM_CA_MAX_POLICY_NAME_LEN 15 + -+#ifdef MALI_INCLUDE_TKAX -+static const enum base_hw_issue base_hw_issues_tKAx_r0p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_END -+}; ++/** ++ * struct kbase_pm_ca_policy - Core availability policy structure. ++ * ++ * Each core availability policy exposes a (static) instance of this structure ++ * which contains function pointers to the policy's methods. ++ * ++ * @name: The name of this policy ++ * @init: Function called when the policy is selected ++ * @term: Function called when the policy is unselected ++ * @get_core_mask: Function called to get the current shader core ++ * availability mask ++ * @update_core_status: Function called to update the current core status ++ * @flags: Field indicating flags for this policy ++ * @id: Field indicating an ID for this policy. This is not ++ * necessarily the same as its index in the list returned ++ * by kbase_pm_list_policies(). ++ * It is used purely for debugging. ++ */ ++struct kbase_pm_ca_policy { ++ char name[KBASE_PM_CA_MAX_POLICY_NAME_LEN + 1]; + -+#endif /* MALI_INCLUDE_TKAX */ ++ /** ++ * Function called when the policy is selected ++ * ++ * This should initialize the kbdev->pm.ca_policy_data structure. It ++ * should not attempt to make any changes to hardware state. ++ * ++ * It is undefined what state the cores are in when the function is ++ * called. ++ * ++ * @kbdev The kbase device structure for the device (must be a ++ * valid pointer) ++ */ ++ void (*init)(struct kbase_device *kbdev); + -+#ifdef MALI_INCLUDE_TKAX -+static const enum base_hw_issue base_hw_issues_model_tKAx[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_END -+}; ++ /** ++ * Function called when the policy is unselected. ++ * ++ * @kbdev The kbase device structure for the device (must be a ++ * valid pointer) ++ */ ++ void (*term)(struct kbase_device *kbdev); + -+#endif /* MALI_INCLUDE_TKAX */ ++ /** ++ * Function called to get the current shader core availability mask ++ * ++ * When a change in core availability is occurring, the policy must set ++ * kbdev->pm.ca_in_transition to true. This is to indicate that ++ * reporting changes in power state cannot be optimized out, even if ++ * kbdev->pm.desired_shader_state remains unchanged. This must be done ++ * by any functions internal to the Core Availability Policy that change ++ * the return value of kbase_pm_ca_policy::get_core_mask. ++ * ++ * @kbdev The kbase device structure for the device (must be a ++ * valid pointer) ++ * ++ * Return: The current core availability mask ++ */ ++ u64 (*get_core_mask)(struct kbase_device *kbdev); + -+#ifdef MALI_INCLUDE_TTRX -+static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = { -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_END -+}; ++ /** ++ * Function called to update the current core status ++ * ++ * If none of the cores in core group 0 are ready or transitioning, then ++ * the policy must ensure that the next call to get_core_mask does not ++ * return 0 for all cores in core group 0. It is an error to disable ++ * core group 0 through the core availability policy. ++ * ++ * When a change in core availability has finished, the policy must set ++ * kbdev->pm.ca_in_transition to false. This is to indicate that ++ * changes in power state can once again be optimized out when ++ * kbdev->pm.desired_shader_state is unchanged. ++ * ++ * @kbdev: The kbase device structure for the device ++ * (must be a valid pointer) ++ * @cores_ready: The mask of cores currently powered and ++ * ready to run jobs ++ * @cores_transitioning: The mask of cores currently transitioning ++ * power state ++ */ ++ void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready, ++ u64 cores_transitioning); + -+#endif /* MALI_INCLUDE_TTRX */ ++ kbase_pm_ca_policy_flags flags; + -+#ifdef MALI_INCLUDE_TTRX -+static const enum base_hw_issue base_hw_issues_model_tTRx[] = { -+ BASE_HW_ISSUE_5736, -+ BASE_HW_ISSUE_9435, -+ BASE_HW_ISSUE_TMIX_8133, -+ BASE_HW_ISSUE_TSIX_1116, -+ BASE_HW_ISSUE_END ++ /** ++ * Field indicating an ID for this policy. This is not necessarily the ++ * same as its index in the list returned by kbase_pm_list_policies(). ++ * It is used purely for debugging. ++ */ ++ enum kbase_pm_ca_policy_id id; +}; + -+#endif /* MALI_INCLUDE_TTRX */ -+ -+#endif /* _BASE_HWCONFIG_ISSUES_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h ++#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c new file mode 100644 -index 000000000..ea5e473ca +index 000000000..81322fd0d --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_base_kernel.h -@@ -0,0 +1,1858 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c +@@ -0,0 +1,73 @@ +/* + * -+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -350106,2560 +352010,2412 @@ index 000000000..ea5e473ca + + + -+/** -+ * @file -+ * Base structures shared with the kernel. ++/* ++ * A simple demand based power management policy + */ + -+#ifndef _BASE_KERNEL_H_ -+#define _BASE_KERNEL_H_ -+ -+#ifndef __user -+#define __user -+#endif -+ -+/* Support UK6 IOCTLS */ -+#define BASE_LEGACY_UK6_SUPPORT 1 ++#include ++#include + -+/* Support UK7 IOCTLS */ -+/* NB: To support UK6 we also need to support UK7 */ -+#define BASE_LEGACY_UK7_SUPPORT 1 ++static u64 demand_get_core_mask(struct kbase_device *kbdev) ++{ ++ u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap; + -+/* Support UK8 IOCTLS */ -+#define BASE_LEGACY_UK8_SUPPORT 1 ++ if (0 == kbdev->pm.active_count) ++ return 0; + -+/* Support UK9 IOCTLS */ -+#define BASE_LEGACY_UK9_SUPPORT 1 ++ return desired; ++} + -+/* Support UK10_2 IOCTLS */ -+#define BASE_LEGACY_UK10_2_SUPPORT 1 ++static bool demand_get_core_active(struct kbase_device *kbdev) ++{ ++ if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap | ++ kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt ++ && !kbdev->tiler_inuse_cnt) ++ return false; + -+/* Support UK10_4 IOCTLS */ -+#define BASE_LEGACY_UK10_4_SUPPORT 1 ++ return true; ++} + -+typedef struct base_mem_handle { -+ struct { -+ u64 handle; -+ } basep; -+} base_mem_handle; ++static void demand_init(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++} + -+#include "mali_base_mem_priv.h" -+#include "mali_kbase_profiling_gator_api.h" -+#include "mali_midg_coherency.h" -+#include "mali_kbase_gpu_id.h" ++static void demand_term(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++} + +/* -+ * Dependency stuff, keep it private for now. May want to expose it if -+ * we decide to make the number of semaphores a configurable -+ * option. ++ * The struct kbase_pm_policy structure for the demand power policy. ++ * ++ * This is the static structure that defines the demand power policy's callback ++ * and name. + */ -+#define BASE_JD_ATOM_COUNT 512 -+ -+#define BASEP_JD_SEM_PER_WORD_LOG2 5 -+#define BASEP_JD_SEM_PER_WORD (1 << BASEP_JD_SEM_PER_WORD_LOG2) -+#define BASEP_JD_SEM_WORD_NR(x) ((x) >> BASEP_JD_SEM_PER_WORD_LOG2) -+#define BASEP_JD_SEM_MASK_IN_WORD(x) (1 << ((x) & (BASEP_JD_SEM_PER_WORD - 1))) -+#define BASEP_JD_SEM_ARRAY_SIZE BASEP_JD_SEM_WORD_NR(BASE_JD_ATOM_COUNT) -+ -+/* Set/reset values for a software event */ -+#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1) -+#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0) -+ -+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3 ++const struct kbase_pm_policy kbase_pm_demand_policy_ops = { ++ "demand", /* name */ ++ demand_init, /* init */ ++ demand_term, /* term */ ++ demand_get_core_mask, /* get_core_mask */ ++ demand_get_core_active, /* get_core_active */ ++ 0u, /* flags */ ++ KBASE_PM_POLICY_ID_DEMAND, /* id */ ++}; + -+#define BASE_MAX_COHERENT_GROUPS 16 ++KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops); +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h +new file mode 100644 +index 000000000..c0c84b6e9 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h +@@ -0,0 +1,64 @@ ++/* ++ * ++ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+#if defined CDBG_ASSERT -+#define LOCAL_ASSERT CDBG_ASSERT -+#elif defined KBASE_DEBUG_ASSERT -+#define LOCAL_ASSERT KBASE_DEBUG_ASSERT -+#else -+#error assert macro not defined! -+#endif + -+#if defined PAGE_MASK -+#define LOCAL_PAGE_LSB ~PAGE_MASK -+#else -+#include + -+#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2 -+#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1) -+#else -+#error Failed to find page size -+#endif -+#endif + -+/** 32/64-bit neutral way to represent pointers */ -+typedef union kbase_pointer { -+ void __user *value; /**< client should store their pointers here */ -+ u32 compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */ -+ u64 sizer; /**< Force 64-bit storage for all clients regardless */ -+} kbase_pointer; + -+/** -+ * @addtogroup base_user_api User-side Base APIs -+ * @{ ++/* ++ * A simple demand based power management policy + */ + -+/** -+ * @addtogroup base_user_api_memory User-side Base Memory APIs -+ * @{ -+ */ ++#ifndef MALI_KBASE_PM_DEMAND_H ++#define MALI_KBASE_PM_DEMAND_H + +/** -+ * typedef base_mem_alloc_flags - Memory allocation, access/hint flags. ++ * DOC: Demand power management policy + * -+ * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator -+ * in order to determine the best cache policy. Some combinations are -+ * of course invalid (e.g. MEM_PROT_CPU_WR | MEM_HINT_CPU_RD), -+ * which defines a write-only region on the CPU side, which is -+ * heavily read by the CPU... -+ * Other flags are only meaningful to a particular allocator. -+ * More flags can be added to this list, as long as they don't clash -+ * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit). -+ */ -+typedef u32 base_mem_alloc_flags; -+ -+/* Memory allocation, access/hint flags. ++ * The demand power management policy has the following characteristics: ++ * - When KBase indicates that the GPU will be powered up, but we don't yet ++ * know which Job Chains are to be run: ++ * - The Shader Cores are not powered up + * -+ * See base_mem_alloc_flags. -+ */ -+ -+/* IN */ -+/* Read access CPU side -+ */ -+#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) -+ -+/* Write access CPU side ++ * - When KBase indicates that a set of Shader Cores are needed to submit the ++ * currently queued Job Chains: ++ * - Only those Shader Cores are powered up ++ * ++ * - When KBase indicates that the GPU need not be powered: ++ * - The Shader Cores are powered off, and the GPU itself is powered off too. ++ * ++ * Note: ++ * - KBase indicates the GPU will be powered up when it has a User Process that ++ * has just started to submit Job Chains. ++ * ++ * - KBase indicates the GPU need not be powered when all the Job Chains from ++ * User Processes have finished, and it is waiting for a User Process to ++ * submit some more Job Chains. + */ -+#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) + -+/* Read access GPU side ++/** ++ * struct kbasep_pm_policy_demand - Private structure for policy instance data ++ * ++ * @dummy: No state is needed, a dummy variable ++ * ++ * This contains data that is private to the demand power policy. + */ -+#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) ++struct kbasep_pm_policy_demand { ++ int dummy; ++}; + -+/* Write access GPU side -+ */ -+#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) ++extern const struct kbase_pm_policy kbase_pm_demand_policy_ops; + -+/* Execute allowed on the GPU side ++#endif /* MALI_KBASE_PM_DEMAND_H */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c +new file mode 100644 +index 000000000..82727937c +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c +@@ -0,0 +1,1713 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * + */ -+#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) + -+ /* BASE_MEM_HINT flags have been removed, but their values are reserved -+ * for backwards compatibility with older user-space drivers. The values -+ * can be re-used once support for r5p0 user-space drivers is removed, -+ * presumably in r7p0. -+ * -+ * RESERVED: (1U << 5) -+ * RESERVED: (1U << 6) -+ * RESERVED: (1U << 7) -+ * RESERVED: (1U << 8) -+ */ + -+/* Grow backing store on GPU Page Fault -+ */ -+#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) + -+/* Page coherence Outer shareable, if available -+ */ -+#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) + -+/* Page coherence Inner shareable -+ */ -+#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) + -+/* Should be cached on the CPU ++/* ++ * Base kernel Power Management hardware control + */ -+#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) + -+/* IN/OUT */ -+/* Must have same VA on both the GPU and the CPU -+ */ -+#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) ++// #define ENABLE_DEBUG_LOG ++#include "../../platform/rk/custom_log.h" + -+/* OUT */ -+/* Must call mmap to acquire a GPU address for the alloc -+ */ -+#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) ++#include ++#include ++#include ++#if defined(CONFIG_MALI_GATOR_SUPPORT) ++#include ++#endif ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+/* IN */ -+/* Page coherence Outer shareable, required. -+ */ -+#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) ++#include + -+/* Secure memory -+ */ -+#define BASE_MEM_SECURE ((base_mem_alloc_flags)1 << 16) ++#if MALI_MOCK_TEST ++#define MOCKABLE(function) function##_original ++#else ++#define MOCKABLE(function) function ++#endif /* MALI_MOCK_TEST */ + -+/* Not needed physical memory ++/** ++ * enum kbasep_pm_action - Actions that can be performed on a core. ++ * ++ * This enumeration is private to the file. Its values are set to allow ++ * core_type_to_reg() function, which decodes this enumeration, to be simpler ++ * and more efficient. ++ * ++ * @ACTION_PRESENT: The cores that are present ++ * @ACTION_READY: The cores that are ready ++ * @ACTION_PWRON: Power on the cores specified ++ * @ACTION_PWROFF: Power off the cores specified ++ * @ACTION_PWRTRANS: The cores that are transitioning ++ * @ACTION_PWRACTIVE: The cores that are active + */ -+#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) ++enum kbasep_pm_action { ++ ACTION_PRESENT = 0, ++ ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO), ++ ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO), ++ ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO), ++ ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO), ++ ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO) ++}; + -+/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the -+ * addresses to be the same -+ */ -+#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) ++/*---------------------------------------------------------------------------*/ + -+/* Number of bits used as flags for base memory management -+ * -+ * Must be kept in sync with the base_mem_alloc_flags flags -+ */ -+#define BASE_MEM_FLAGS_NR_BITS 19 ++static bool is_action_of_powering_off_l2(enum kbase_pm_core_type core_type, ++ enum kbasep_pm_action active) ++{ ++ return (KBASE_PM_CORE_L2 == core_type) && (ACTION_PWROFF == active); ++} + -+/* A mask for all output bits, excluding IN/OUT bits. -+ */ -+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP ++static bool is_action_of_powering_off_shader(enum kbase_pm_core_type core_type, ++ enum kbasep_pm_action active) ++{ ++ return (KBASE_PM_CORE_SHADER == core_type) && (ACTION_PWROFF == active); ++} + -+/* A mask for all input bits, including IN/OUT bits. -+ */ -+#define BASE_MEM_FLAGS_INPUT_MASK \ -+ (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) ++static bool is_action_of_powering_off_tiler(enum kbase_pm_core_type core_type, ++ enum kbasep_pm_action active) ++{ ++ return (KBASE_PM_CORE_TILER == core_type) && (ACTION_PWROFF == active); ++} + -+/* A mask for all the flags which are modifiable via the base_mem_set_flags -+ * interface. -+ */ -+#define BASE_MEM_FLAGS_MODIFIABLE \ -+ (BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \ -+ BASE_MEM_COHERENT_LOCAL) ++static u64 kbase_pm_get_state( ++ struct kbase_device *kbdev, ++ enum kbase_pm_core_type core_type, ++ enum kbasep_pm_action action); + +/** -+ * enum base_mem_import_type - Memory types supported by @a base_mem_import ++ * core_type_to_reg - Decode a core type and action to a register. + * -+ * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type -+ * @BASE_MEM_IMPORT_TYPE_UMP: UMP import. Handle type is ump_secure_id. -+ * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int) -+ * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a -+ * base_mem_import_user_buffer ++ * Given a core type (defined by kbase_pm_core_type) and an action (defined ++ * by kbasep_pm_action) this function will return the register offset that ++ * will perform the action on the core type. The register returned is the _LO ++ * register and an offset must be applied to use the _HI register. + * -+ * Each type defines what the supported handle type is. ++ * @core_type: The type of core ++ * @action: The type of action + * -+ * If any new type is added here ARM must be contacted -+ * to allocate a numeric value for it. -+ * Do not just add a new type without synchronizing with ARM -+ * as future releases from ARM might include other new types -+ * which could clash with your custom types. ++ * Return: The register offset of the _LO register that performs an action of ++ * type @action on a core of type @core_type. + */ -+typedef enum base_mem_import_type { -+ BASE_MEM_IMPORT_TYPE_INVALID = 0, -+ BASE_MEM_IMPORT_TYPE_UMP = 1, -+ BASE_MEM_IMPORT_TYPE_UMM = 2, -+ BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3 -+} base_mem_import_type; ++static u32 core_type_to_reg(enum kbase_pm_core_type core_type, ++ enum kbasep_pm_action action) ++{ ++#ifdef CONFIG_MALI_CORESTACK ++ if (core_type == KBASE_PM_CORE_STACK) { ++ switch (action) { ++ case ACTION_PRESENT: ++ return STACK_PRESENT_LO; ++ case ACTION_READY: ++ return STACK_READY_LO; ++ case ACTION_PWRON: ++ return STACK_PWRON_LO; ++ case ACTION_PWROFF: ++ return STACK_PWROFF_LO; ++ case ACTION_PWRTRANS: ++ return STACK_PWRTRANS_LO; ++ default: ++ BUG(); ++ } ++ } ++#endif /* CONFIG_MALI_CORESTACK */ + -+/** -+ * struct base_mem_import_user_buffer - Handle of an imported user buffer -+ * -+ * @ptr: kbase_pointer to imported user buffer -+ * @length: length of imported user buffer in bytes -+ * -+ * This structure is used to represent a handle of an imported user buffer. -+ */ ++ return (u32)core_type + (u32)action; ++} + -+struct base_mem_import_user_buffer { -+ kbase_pointer ptr; -+ u64 length; -+}; ++#ifdef CONFIG_ARM64 ++static void mali_cci_flush_l2(struct kbase_device *kbdev) ++{ ++ const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED; ++ u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS; ++ u32 raw; + -+/** -+ * @brief Invalid memory handle. -+ * -+ * Return value from functions returning @ref base_mem_handle on error. -+ * -+ * @warning @ref base_mem_handle_new_invalid must be used instead of this macro -+ * in C++ code or other situations where compound literals cannot be used. -+ */ -+#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} }) ++ /* ++ * Note that we don't take the cache flush mutex here since ++ * we expect to be the last user of the L2, all other L2 users ++ * would have dropped their references, to initiate L2 power ++ * down, L2 power down being the only valid place for this ++ * to be called from. ++ */ ++ ++ kbase_reg_write(kbdev, ++ GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CLEAN_INV_CACHES, ++ NULL); ++ ++ raw = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), ++ NULL); ++ ++ /* Wait for cache flush to complete before continuing, exit on ++ * gpu resets or loop expiry. */ ++ while (((raw & mask) == 0) && --loops) { ++ raw = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), ++ NULL); ++ } ++} ++#endif + +/** -+ * @brief Special write-alloc memory handle. ++ * kbase_pm_invoke - Invokes an action on a core set + * -+ * A special handle is used to represent a region where a special page is mapped -+ * with a write-alloc cache setup, typically used when the write result of the -+ * GPU isn't needed, but the GPU must write anyway. ++ * This function performs the action given by @action on a set of cores of a ++ * type given by @core_type. It is a static function used by ++ * kbase_pm_transition_core_type() + * -+ * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro -+ * in C++ code or other situations where compound literals cannot be used. ++ * @kbdev: The kbase device structure of the device ++ * @core_type: The type of core that the action should be performed on ++ * @cores: A bit mask of cores to perform the action on (low 32 bits) ++ * @action: The action to perform on the cores + */ -+#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} }) ++static void kbase_pm_invoke(struct kbase_device *kbdev, ++ enum kbase_pm_core_type core_type, ++ u64 cores, ++ enum kbasep_pm_action action) ++{ ++ u32 reg; ++ u32 lo = cores & 0xFFFFFFFF; ++ u32 hi = (cores >> 32) & 0xFFFFFFFF; + -+#define BASEP_MEM_INVALID_HANDLE (0ull << 12) -+#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) -+#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) -+#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) -+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) -+/* reserved handles ..-64< for future special handles */ -+#define BASE_MEM_COOKIE_BASE (64ul << 12) -+#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ -+ BASE_MEM_COOKIE_BASE) ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+/* Mask to detect 4GB boundary alignment */ -+#define BASE_MEM_MASK_4GB 0xfffff000UL ++ /*-------------------------------------------------------*/ + ++ if ( is_action_of_powering_off_l2(core_type, action) ) { ++ D("not to power off l2 actually."); ++ return; ++ } ++ if ( is_action_of_powering_off_shader(core_type, action) ) { ++ D("not to power off shader actually. cores_lo : 0x%x, hi : 0x%x.", ++ lo, ++ hi); ++ return; ++ } ++ if ( is_action_of_powering_off_tiler(core_type, action) ) { ++ D("not to power off tiler actually."); ++ return; ++ } + -+/* Bit mask of cookies used for for memory allocation setup */ -+#define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ ++ /*-------------------------------------------------------*/ + ++ reg = core_type_to_reg(core_type, action); + -+/** -+ * @brief Result codes of changing the size of the backing store allocated to a tmem region -+ */ -+typedef enum base_backing_threshold_status { -+ BASE_BACKING_THRESHOLD_OK = 0, /**< Resize successful */ -+ BASE_BACKING_THRESHOLD_ERROR_OOM = -2, /**< Increase failed due to an out-of-memory condition */ -+ BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */ -+} base_backing_threshold_status; ++ KBASE_DEBUG_ASSERT(reg); ++#if defined(CONFIG_MALI_GATOR_SUPPORT) ++ if (cores) { ++ if (action == ACTION_PWRON) ++ kbase_trace_mali_pm_power_on(core_type, cores); ++ else if (action == ACTION_PWROFF) ++ kbase_trace_mali_pm_power_off(core_type, cores); ++ } ++#endif + -+/** -+ * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs -+ * @{ -+ */ ++ if (cores) { ++ u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY); ++ ++ if (action == ACTION_PWRON) ++ state |= cores; ++ else if (action == ACTION_PWROFF) ++ state &= ~cores; ++ KBASE_TLSTREAM_AUX_PM_STATE(core_type, state); ++ } ++ ++ /* Tracing */ ++ if (cores) { ++ if (action == ACTION_PWRON) ++ switch (core_type) { ++ case KBASE_PM_CORE_SHADER: ++ KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u, ++ lo); ++ break; ++ case KBASE_PM_CORE_TILER: ++ KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL, ++ NULL, 0u, lo); ++ break; ++ case KBASE_PM_CORE_L2: ++ KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL, ++ 0u, lo); ++ break; ++ default: ++ break; ++ } ++ else if (action == ACTION_PWROFF) ++ switch (core_type) { ++ case KBASE_PM_CORE_SHADER: ++ KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL, ++ 0u, lo); ++ break; ++ case KBASE_PM_CORE_TILER: ++ KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL, ++ NULL, 0u, lo); ++ break; ++ case KBASE_PM_CORE_L2: ++ KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL, ++ 0u, lo); ++ /* disable snoops before L2 is turned off */ ++ kbase_pm_cache_snoop_disable(kbdev); ++ break; ++ default: ++ break; ++ } ++ } ++ ++ if (lo != 0) ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL); ++ ++ if (hi != 0) ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL); ++} + +/** -+ * @brief a basic memory operation (sync-set). ++ * kbase_pm_get_state - Get information about a core set + * -+ * The content of this structure is private, and should only be used -+ * by the accessors. ++ * This function gets information (chosen by @action) about a set of cores of ++ * a type given by @core_type. It is a static function used by ++ * kbase_pm_get_active_cores(), kbase_pm_get_trans_cores() and ++ * kbase_pm_get_ready_cores(). ++ * ++ * @kbdev: The kbase device structure of the device ++ * @core_type: The type of core that the should be queried ++ * @action: The property of the cores to query ++ * ++ * Return: A bit mask specifying the state of the cores + */ -+typedef struct base_syncset { -+ struct basep_syncset basep_sset; -+} base_syncset; ++static u64 kbase_pm_get_state(struct kbase_device *kbdev, ++ enum kbase_pm_core_type core_type, ++ enum kbasep_pm_action action) ++{ ++ u32 reg; ++ u32 lo, hi; + -+/** @} end group base_user_api_memory_defered */ ++ reg = core_type_to_reg(core_type, action); + -+/** -+ * Handle to represent imported memory object. -+ * Simple opague handle to imported memory, can't be used -+ * with anything but base_external_resource_init to bind to an atom. -+ */ -+typedef struct base_import_handle { -+ struct { -+ u64 handle; -+ } basep; -+} base_import_handle; ++ KBASE_DEBUG_ASSERT(reg); + -+/** @} end group base_user_api_memory */ ++ lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL); ++ hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL); + -+/** -+ * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs -+ * @{ -+ */ ++ return (((u64) hi) << 32) | ((u64) lo); ++} + -+typedef int platform_fence_type; -+#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1) ++void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev) ++{ ++ kbdev->shader_inuse_bitmap = 0; ++ kbdev->shader_needed_bitmap = 0; ++ kbdev->shader_available_bitmap = 0; ++ kbdev->tiler_available_bitmap = 0; ++ kbdev->l2_users_count = 0; ++ kbdev->l2_available_bitmap = 0; ++ kbdev->tiler_needed_cnt = 0; ++ kbdev->tiler_inuse_cnt = 0; + -+/** -+ * Base stream handle. -+ * -+ * References an underlying base stream object. -+ */ -+typedef struct base_stream { -+ struct { -+ int fd; -+ } basep; -+} base_stream; ++ memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt)); ++} + +/** -+ * Base fence handle. ++ * kbase_pm_get_present_cores - Get the cores that are present + * -+ * References an underlying base fence object. -+ */ -+typedef struct base_fence { -+ struct { -+ int fd; -+ int stream_fd; -+ } basep; -+} base_fence; -+ -+/** -+ * @brief Per-job data ++ * @kbdev: Kbase device ++ * @type: The type of cores to query + * -+ * This structure is used to store per-job data, and is completely unused -+ * by the Base driver. It can be used to store things such as callback -+ * function pointer, data to handle job completion. It is guaranteed to be -+ * untouched by the Base driver. ++ * Return: Bitmask of the cores that are present + */ -+typedef struct base_jd_udata { -+ u64 blob[2]; /**< per-job data array */ -+} base_jd_udata; ++u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, ++ enum kbase_pm_core_type type) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ ++ switch (type) { ++ case KBASE_PM_CORE_L2: ++ return kbdev->gpu_props.props.raw_props.l2_present; ++ case KBASE_PM_CORE_SHADER: ++ return kbdev->gpu_props.props.raw_props.shader_present; ++ case KBASE_PM_CORE_TILER: ++ return kbdev->gpu_props.props.raw_props.tiler_present; ++#ifdef CONFIG_MALI_CORESTACK ++ case KBASE_PM_CORE_STACK: ++ return kbdev->gpu_props.props.raw_props.stack_present; ++#endif /* CONFIG_MALI_CORESTACK */ ++ default: ++ break; ++ } ++ KBASE_DEBUG_ASSERT(0); ++ ++ return 0; ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores); + +/** -+ * @brief Memory aliasing info -+ * -+ * Describes a memory handle to be aliased. -+ * A subset of the handle can be chosen for aliasing, given an offset and a -+ * length. -+ * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a -+ * region where a special page is mapped with a write-alloc cache setup, -+ * typically used when the write result of the GPU isn't needed, but the GPU -+ * must write anyway. ++ * kbase_pm_get_active_cores - Get the cores that are "active" ++ * (busy processing work) + * -+ * Offset and length are specified in pages. -+ * Offset must be within the size of the handle. -+ * Offset+length must not overrun the size of the handle. ++ * @kbdev: Kbase device ++ * @type: The type of cores to query + * -+ * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE -+ * @offset Offset within the handle to start aliasing from, in pages. -+ * Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE. -+ * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE -+ * specifies the number of times the special page is needed. ++ * Return: Bitmask of cores that are active + */ -+struct base_mem_aliasing_info { -+ base_mem_handle handle; -+ u64 offset; -+ u64 length; -+}; ++u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, ++ enum kbase_pm_core_type type) ++{ ++ return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE); ++} + -+/** -+ * struct base_jit_alloc_info - Structure which describes a JIT allocation -+ * request. -+ * @gpu_alloc_addr: The GPU virtual address to write the JIT -+ * allocated GPU virtual address to. -+ * @va_pages: The minimum number of virtual pages required. -+ * @commit_pages: The minimum number of physical pages which -+ * should back the allocation. -+ * @extent: Granularity of physical pages to grow the -+ * allocation by during a fault. -+ * @id: Unique ID provided by the caller, this is used -+ * to pair allocation and free requests. -+ * Zero is not a valid value. -+ */ -+struct base_jit_alloc_info { -+ u64 gpu_alloc_addr; -+ u64 va_pages; -+ u64 commit_pages; -+ u64 extent; -+ u8 id; -+}; ++KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores); + +/** -+ * @brief Job dependency type. ++ * kbase_pm_get_trans_cores - Get the cores that are transitioning between ++ * power states + * -+ * A flags field will be inserted into the atom structure to specify whether a dependency is a data or -+ * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without -+ * changing the structure size). -+ * When the flag is set for a particular dependency to signal that it is an ordering only dependency then -+ * errors will not be propagated. ++ * @kbdev: Kbase device ++ * @type: The type of cores to query ++ * ++ * Return: Bitmask of cores that are transitioning + */ -+typedef u8 base_jd_dep_type; -+ ++u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, ++ enum kbase_pm_core_type type) ++{ ++ return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS); ++} + -+#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */ -+#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */ -+#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */ ++KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores); + +/** -+ * @brief Job chain hardware requirements. ++ * kbase_pm_get_ready_cores - Get the cores that are powered on + * -+ * A job chain must specify what GPU features it needs to allow the -+ * driver to schedule the job correctly. By not specifying the -+ * correct settings can/will cause an early job termination. Multiple -+ * values can be ORed together to specify multiple requirements. -+ * Special case is ::BASE_JD_REQ_DEP, which is used to express complex -+ * dependencies, and that doesn't execute anything on the hardware. ++ * @kbdev: Kbase device ++ * @type: The type of cores to query ++ * ++ * Return: Bitmask of cores that are ready (powered on) + */ -+typedef u32 base_jd_core_req; ++u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, ++ enum kbase_pm_core_type type) ++{ ++ u64 result; + -+/* Requirements that come from the HW */ ++ result = kbase_pm_get_state(kbdev, type, ACTION_READY); + -+/** -+ * No requirement, dependency only -+ */ -+#define BASE_JD_REQ_DEP ((base_jd_core_req)0) ++ switch (type) { ++ case KBASE_PM_CORE_SHADER: ++ KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u, ++ (u32) result); ++ break; ++ case KBASE_PM_CORE_TILER: ++ KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u, ++ (u32) result); ++ break; ++ case KBASE_PM_CORE_L2: ++ KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u, ++ (u32) result); ++ break; ++ default: ++ break; ++ } + -+/** -+ * Requires fragment shaders -+ */ -+#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0) ++ return result; ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores); + +/** -+ * Requires compute shaders -+ * This covers any of the following Midgard Job types: -+ * - Vertex Shader Job -+ * - Geometry Shader Job -+ * - An actual Compute Shader Job ++ * kbase_pm_transition_core_type - Perform power transitions for a particular ++ * core type. + * -+ * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the -+ * job is specifically just the "Compute Shader" job type, and not the "Vertex -+ * Shader" nor the "Geometry Shader" job type. ++ * This function will perform any available power transitions to make the actual ++ * hardware state closer to the desired state. If a core is currently ++ * transitioning then changes to the power state of that call cannot be made ++ * until the transition has finished. Cores which are not present in the ++ * hardware are ignored if they are specified in the desired_state bitmask, ++ * however the return value will always be 0 in this case. ++ * ++ * @kbdev: The kbase device ++ * @type: The core type to perform transitions for ++ * @desired_state: A bit mask of the desired state of the cores ++ * @in_use: A bit mask of the cores that are currently running ++ * jobs. These cores have to be kept powered up because ++ * there are jobs running (or about to run) on them. ++ * @available: Receives a bit mask of the cores that the job ++ * scheduler can use to submit jobs to. May be NULL if ++ * this is not needed. ++ * @powering_on: Bit mask to update with cores that are ++ * transitioning to a power-on state. ++ * ++ * Return: true if the desired state has been reached, false otherwise + */ -+#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1) -+#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2) /**< Requires tiling */ -+#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3) /**< Requires cache flushes */ -+#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4) /**< Requires value writeback */ ++static bool kbase_pm_transition_core_type(struct kbase_device *kbdev, ++ enum kbase_pm_core_type type, ++ u64 desired_state, ++ u64 in_use, ++ u64 * const available, ++ u64 *powering_on) ++{ ++ u64 present; ++ u64 ready; ++ u64 trans; ++ u64 powerup; ++ u64 powerdown; ++ u64 powering_on_trans; ++ u64 desired_state_in_use; + -+/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+/* Requires fragment job with AFBC encoding */ -+#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13) ++ /* Get current state */ ++ present = kbase_pm_get_present_cores(kbdev, type); ++ trans = kbase_pm_get_trans_cores(kbdev, type); ++ ready = kbase_pm_get_ready_cores(kbdev, type); ++ /* mask off ready from trans in case transitions finished between the ++ * register reads */ ++ trans &= ~ready; + -+/** -+ * SW-only requirement: coalesce completion events. -+ * If this bit is set then completion of this atom will not cause an event to -+ * be sent to userspace, whether successful or not; completion events will be -+ * deferred until an atom completes which does not have this bit set. -+ * -+ * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES. -+ */ -+#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5) ++ if (trans) /* Do not progress if any cores are transitioning */ ++ return false; + -+/** -+ * SW Only requirement: the job chain requires a coherent core group. We don't -+ * mind which coherent core group is used. -+ */ -+#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6) ++ powering_on_trans = trans & *powering_on; ++ *powering_on = powering_on_trans; + -+/** -+ * SW Only requirement: The performance counters should be enabled only when -+ * they are needed, to reduce power consumption. -+ */ ++ if (available != NULL) ++ *available = (ready | powering_on_trans) & desired_state; + -+#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7) ++ /* Update desired state to include the in-use cores. These have to be ++ * kept powered up because there are jobs running or about to run on ++ * these cores ++ */ ++ desired_state_in_use = desired_state | in_use; + -+/** -+ * SW Only requirement: External resources are referenced by this atom. -+ * When external resources are referenced no syncsets can be bundled with the atom -+ * but should instead be part of a NULL jobs inserted into the dependency tree. -+ * The first pre_dep object must be configured for the external resouces to use, -+ * the second pre_dep object can be used to create other dependencies. -+ * -+ * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE. -+ */ -+#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8) ++ /* Update state of whether l2 caches are powered */ ++ if (type == KBASE_PM_CORE_L2) { ++ if ((ready == present) && (desired_state_in_use == ready) && ++ (trans == 0)) { ++ /* All are ready, none will be turned off, and none are ++ * transitioning */ ++ kbdev->pm.backend.l2_powered = 1; ++ /* ++ * Ensure snoops are enabled after L2 is powered up, ++ * note that kbase keeps track of the snoop state, so ++ * safe to repeatedly call. ++ */ ++ kbase_pm_cache_snoop_enable(kbdev); ++ if (kbdev->l2_users_count > 0) { ++ /* Notify any registered l2 cache users ++ * (optimized out when no users waiting) */ ++ wake_up(&kbdev->pm.backend.l2_powered_wait); ++ } ++ } else ++ kbdev->pm.backend.l2_powered = 0; ++ } + -+/** -+ * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted -+ * to the hardware but will cause some action to happen within the driver -+ */ -+#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9) ++ if (desired_state == ready && (trans == 0)) ++ return true; + -+#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1) -+#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2) -+#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3) -+ -+/** -+ * SW Only requirement : Replay job. -+ * -+ * If the preceding job fails, the replay job will cause the jobs specified in -+ * the list of base_jd_replay_payload pointed to by the jc pointer to be -+ * replayed. -+ * -+ * A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT -+ * times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay -+ * job is failed, as well as any following dependencies. -+ * -+ * The replayed jobs will require a number of atom IDs. If there are not enough -+ * free atom IDs then the replay job will fail. -+ * -+ * If the preceding job does not fail, then the replay job is returned as -+ * completed. -+ * -+ * The replayed jobs will never be returned to userspace. The preceding failed -+ * job will be returned to userspace as failed; the status of this job should -+ * be ignored. Completion should be determined by the status of the replay soft -+ * job. -+ * -+ * In order for the jobs to be replayed, the job headers will have to be -+ * modified. The Status field will be reset to NOT_STARTED. If the Job Type -+ * field indicates a Vertex Shader Job then it will be changed to Null Job. -+ * -+ * The replayed jobs have the following assumptions : -+ * -+ * - No external resources. Any required external resources will be held by the -+ * replay atom. -+ * - Pre-dependencies are created based on job order. -+ * - Atom numbers are automatically assigned. -+ * - device_nr is set to 0. This is not relevant as -+ * BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set. -+ * - Priority is inherited from the replay job. -+ */ -+#define BASE_JD_REQ_SOFT_REPLAY (BASE_JD_REQ_SOFT_JOB | 0x4) -+/** -+ * SW only requirement: event wait/trigger job. -+ * -+ * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set. -+ * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the -+ * other waiting jobs. It completes immediately. -+ * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it -+ * possible for other jobs to wait upon. It completes immediately. -+ */ -+#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5) -+#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6) -+#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7) -+ -+#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8) -+ -+/** -+ * SW only requirement: Just In Time allocation -+ * -+ * This job requests a JIT allocation based on the request in the -+ * @base_jit_alloc_info structure which is passed via the jc element of -+ * the atom. -+ * -+ * It should be noted that the id entry in @base_jit_alloc_info must not -+ * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE. -+ * -+ * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE -+ * soft job to free the JIT allocation is still made. -+ * -+ * The job will complete immediately. -+ */ -+#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9) -+/** -+ * SW only requirement: Just In Time free -+ * -+ * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC -+ * to be freed. The ID of the JIT allocation is passed via the jc element of -+ * the atom. -+ * -+ * The job will complete immediately. -+ */ -+#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa) -+ -+/** -+ * SW only requirement: Map external resource -+ * -+ * This job requests external resource(s) are mapped once the dependencies -+ * of the job have been satisfied. The list of external resources are -+ * passed via the jc element of the atom which is a pointer to a -+ * @base_external_resource_list. -+ */ -+#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb) -+/** -+ * SW only requirement: Unmap external resource -+ * -+ * This job requests external resource(s) are unmapped once the dependencies -+ * of the job has been satisfied. The list of external resources are -+ * passed via the jc element of the atom which is a pointer to a -+ * @base_external_resource_list. -+ */ -+#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc) -+ -+/** -+ * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders) -+ * -+ * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type. -+ * -+ * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job -+ * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs. -+ */ -+#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10) -+ -+/** -+ * HW Requirement: Use the base_jd_atom::device_nr field to specify a -+ * particular core group -+ * -+ * If both @ref BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority -+ * -+ * This is only guaranteed to work for @ref BASE_JD_REQ_ONLY_COMPUTE atoms. -+ * -+ * If the core availability policy is keeping the required core group turned off, then -+ * the job will fail with a @ref BASE_JD_EVENT_PM_EVENT error code. -+ */ -+#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11) ++ /* Restrict the cores to those that are actually present */ ++ powerup = desired_state_in_use & present; ++ powerdown = (~desired_state_in_use) & present; + -+/** -+ * SW Flag: If this bit is set then the successful completion of this atom -+ * will not cause an event to be sent to userspace -+ */ -+#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12) ++ /* Restrict to cores that are not already in the desired state */ ++ powerup &= ~ready; ++ powerdown &= ready; + -+/** -+ * SW Flag: If this bit is set then completion of this atom will not cause an -+ * event to be sent to userspace, whether successful or not. -+ */ -+#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14) ++ /* Don't transition any cores that are already transitioning, except for ++ * Mali cores that support the following case: ++ * ++ * If the SHADER_PWRON or TILER_PWRON registers are written to turn on ++ * a core that is currently transitioning to power off, then this is ++ * remembered and the shader core is automatically powered up again once ++ * the original transition completes. Once the automatic power on is ++ * complete any job scheduled on the shader core should start. ++ */ ++ powerdown &= ~trans; + -+/** -+ * SW Flag: Skip GPU cache clean and invalidation before starting a GPU job. -+ * -+ * If this bit is set then the GPU's cache will not be cleaned and invalidated -+ * until a GPU job starts which does not have this bit set or a job completes -+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use if -+ * the CPU may have written to memory addressed by the job since the last job -+ * without this bit set was submitted. -+ */ -+#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15) ++ if (kbase_hw_has_feature(kbdev, ++ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS)) ++ if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type) ++ trans = powering_on_trans; /* for exception cases, only ++ * mask off cores in power on ++ * transitions */ + -+/** -+ * SW Flag: Skip GPU cache clean and invalidation after a GPU job completes. -+ * -+ * If this bit is set then the GPU's cache will not be cleaned and invalidated -+ * until a GPU job completes which does not have this bit set or a job starts -+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_START bti set. Do not use if -+ * the CPU may read from or partially overwrite memory addressed by the job -+ * before the next job without this bit set completes. -+ */ -+#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16) ++ powerup &= ~trans; + -+/** -+ * These requirement bits are currently unused in base_jd_core_req -+ */ -+#define BASEP_JD_REQ_RESERVED \ -+ (~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \ -+ BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \ -+ BASE_JD_REQ_EVENT_COALESCE | \ -+ BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \ -+ BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \ -+ BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END)) ++ /* Perform transitions if any */ ++ kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON); ++#if !PLATFORM_POWER_DOWN_ONLY ++ kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF); ++#endif + -+/** -+ * Mask of all bits in base_jd_core_req that control the type of the atom. -+ * -+ * This allows dependency only atoms to have flags set -+ */ -+#define BASE_JD_REQ_ATOM_TYPE \ -+ (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \ -+ BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE) ++ /* Recalculate cores transitioning on, and re-evaluate our state */ ++ powering_on_trans |= powerup; ++ *powering_on = powering_on_trans; ++ if (available != NULL) ++ *available = (ready | powering_on_trans) & desired_state; + -+/** -+ * Mask of all bits in base_jd_core_req that control the type of a soft job. -+ */ -+#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f) ++ return false; ++} + -+/* -+ * Returns non-zero value if core requirements passed define a soft job or -+ * a dependency only job. -+ */ -+#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \ -+ ((core_req & BASE_JD_REQ_SOFT_JOB) || \ -+ (core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) ++KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type); + +/** -+ * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which -+ * handles retaining cores for power management and affinity management. -+ * -+ * The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack -+ * where lots of atoms could be submitted before powerup, and each has an -+ * affinity chosen that causes other atoms to have an affinity -+ * violation. Whilst the affinity was not causing violations at the time it -+ * was chosen, it could cause violations thereafter. For example, 1000 jobs -+ * could have had their affinity chosen during the powerup time, so any of -+ * those 1000 jobs could cause an affinity violation later on. -+ * -+ * The attack would otherwise occur because other atoms/contexts have to wait for: -+ * -# the currently running atoms (which are causing the violation) to -+ * finish -+ * -# and, the atoms that had their affinity chosen during powerup to -+ * finish. These are run preferentially because they don't cause a -+ * violation, but instead continue to cause the violation in others. -+ * -# or, the attacker is scheduled out (which might not happen for just 2 -+ * contexts) -+ * -+ * By re-choosing the affinity (which is designed to avoid violations at the -+ * time it's chosen), we break condition (2) of the wait, which minimizes the -+ * problem to just waiting for current jobs to finish (which can be bounded if -+ * the Job Scheduling Policy has a timer). -+ */ -+enum kbase_atom_coreref_state { -+ /** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */ -+ KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED, -+ /** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */ -+ KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES, -+ /** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */ -+ KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY, -+ /** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */ -+ KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS, -+ /** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */ -+ KBASE_ATOM_COREREF_STATE_READY -+}; -+ -+/* -+ * Base Atom priority -+ * -+ * Only certain priority levels are actually implemented, as specified by the -+ * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority -+ * level that is not one of those defined below. ++ * get_desired_cache_status - Determine which caches should be on for a ++ * particular core state + * -+ * Priority levels only affect scheduling between atoms of the same type within -+ * a base context, and only after the atoms have had dependencies resolved. -+ * Fragment atoms does not affect non-frament atoms with lower priorities, and -+ * the other way around. For example, a low priority atom that has had its -+ * dependencies resolved might run before a higher priority atom that has not -+ * had its dependencies resolved. ++ * This function takes a bit mask of the present caches and the cores (or ++ * caches) that are attached to the caches that will be powered. It then ++ * computes which caches should be turned on to allow the cores requested to be ++ * powered up. + * -+ * The scheduling between base contexts/processes and between atoms from -+ * different base contexts/processes is unaffected by atom priority. ++ * @present: The bit mask of present caches ++ * @cores_powered: A bit mask of cores (or L2 caches) that are desired to ++ * be powered ++ * @tilers_powered: The bit mask of tilers that are desired to be powered + * -+ * The atoms are scheduled as follows with respect to their priorities: -+ * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies -+ * resolved, and atom 'X' has a higher priority than atom 'Y' -+ * - If atom 'Y' is currently running on the HW, then it is interrupted to -+ * allow atom 'X' to run soon after -+ * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing -+ * the next atom to run, atom 'X' will always be chosen instead of atom 'Y' -+ * - Any two atoms that have the same priority could run in any order with -+ * respect to each other. That is, there is no ordering constraint between -+ * atoms of the same priority. ++ * Return: A bit mask of the caches that should be turned on + */ -+typedef u8 base_jd_prio; -+ -+/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */ -+#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0) -+/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and -+ * BASE_JD_PRIO_LOW */ -+#define BASE_JD_PRIO_HIGH ((base_jd_prio)1) -+/* Low atom priority. */ -+#define BASE_JD_PRIO_LOW ((base_jd_prio)2) ++static u64 get_desired_cache_status(u64 present, u64 cores_powered, ++ u64 tilers_powered) ++{ ++ u64 desired = 0; + -+/* Count of the number of priority levels. This itself is not a valid -+ * base_jd_prio setting */ -+#define BASE_JD_NR_PRIO_LEVELS 3 ++ while (present) { ++ /* Find out which is the highest set bit */ ++ u64 bit = fls64(present) - 1; ++ u64 bit_mask = 1ull << bit; ++ /* Create a mask which has all bits from 'bit' upwards set */ + -+enum kbase_jd_atom_state { -+ /** Atom is not used */ -+ KBASE_JD_ATOM_STATE_UNUSED, -+ /** Atom is queued in JD */ -+ KBASE_JD_ATOM_STATE_QUEUED, -+ /** Atom has been given to JS (is runnable/running) */ -+ KBASE_JD_ATOM_STATE_IN_JS, -+ /** Atom has been completed, but not yet handed back to job dispatcher -+ * for dependency resolution */ -+ KBASE_JD_ATOM_STATE_HW_COMPLETED, -+ /** Atom has been completed, but not yet handed back to userspace */ -+ KBASE_JD_ATOM_STATE_COMPLETED -+}; ++ u64 mask = ~(bit_mask - 1); + -+typedef u16 base_atom_id; /**< Type big enough to store an atom number in */ ++ /* If there are any cores powered at this bit or above (that ++ * haven't previously been processed) then we need this core on ++ */ ++ if (cores_powered & mask) ++ desired |= bit_mask; + -+struct base_dependency { -+ base_atom_id atom_id; /**< An atom number */ -+ base_jd_dep_type dependency_type; /**< Dependency type */ -+}; ++ /* Remove bits from cores_powered and present */ ++ cores_powered &= ~mask; ++ present &= ~bit_mask; ++ } + -+/* This structure has changed since UK 10.2 for which base_jd_core_req was a u16 value. -+ * In order to keep the size of the structure same, padding field has been adjusted -+ * accordingly and core_req field of a u32 type (to which UK 10.3 base_jd_core_req defines) -+ * is added at the end of the structure. Place in the structure previously occupied by u16 core_req -+ * is kept but renamed to compat_core_req and as such it can be used in ioctl call for job submission -+ * as long as UK 10.2 legacy is supported. Once when this support ends, this field can be left -+ * for possible future use. */ -+typedef struct base_jd_atom_v2 { -+ u64 jc; /**< job-chain GPU address */ -+ struct base_jd_udata udata; /**< user data */ -+ kbase_pointer extres_list; /**< list of external resources */ -+ u16 nr_extres; /**< nr of external resources */ -+ u16 compat_core_req; /**< core requirements which correspond to the legacy support for UK 10.2 */ -+ struct base_dependency pre_dep[2]; /**< pre-dependencies, one need to use SETTER function to assign this field, -+ this is done in order to reduce possibility of improper assigment of a dependency field */ -+ base_atom_id atom_number; /**< unique number to identify the atom */ -+ base_jd_prio prio; /**< Atom priority. Refer to @ref base_jd_prio for more details */ -+ u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */ -+ u8 padding[1]; -+ base_jd_core_req core_req; /**< core requirements */ -+} base_jd_atom_v2; ++ /* Power up the required L2(s) for the tiler */ ++ if (tilers_powered) ++ desired |= 1; + -+#ifdef BASE_LEGACY_UK6_SUPPORT -+struct base_jd_atom_v2_uk6 { -+ u64 jc; /**< job-chain GPU address */ -+ struct base_jd_udata udata; /**< user data */ -+ kbase_pointer extres_list; /**< list of external resources */ -+ u16 nr_extres; /**< nr of external resources */ -+ u16 core_req; /**< core requirements */ -+ base_atom_id pre_dep[2]; /**< pre-dependencies */ -+ base_atom_id atom_number; /**< unique number to identify the atom */ -+ base_jd_prio prio; /**< priority - smaller is higher priority */ -+ u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */ -+ u8 padding[7]; -+}; -+#endif /* BASE_LEGACY_UK6_SUPPORT */ ++ return desired; ++} + -+typedef enum base_external_resource_access { -+ BASE_EXT_RES_ACCESS_SHARED, -+ BASE_EXT_RES_ACCESS_EXCLUSIVE -+} base_external_resource_access; ++KBASE_EXPORT_TEST_API(get_desired_cache_status); + -+typedef struct base_external_resource { -+ u64 ext_resource; -+} base_external_resource; ++#ifdef CONFIG_MALI_CORESTACK ++u64 kbase_pm_core_stack_mask(u64 cores) ++{ ++ u64 stack_mask = 0; ++ size_t const MAX_CORE_ID = 31; ++ size_t const NUM_CORES_PER_STACK = 4; ++ size_t i; + ++ for (i = 0; i <= MAX_CORE_ID; ++i) { ++ if (test_bit(i, (unsigned long *)&cores)) { ++ /* Every core which ID >= 16 is filled to stacks 4-7 ++ * instead of 0-3 */ ++ size_t const stack_num = (i > 16) ? ++ (i % NUM_CORES_PER_STACK) + 4 : ++ (i % NUM_CORES_PER_STACK); ++ set_bit(stack_num, (unsigned long *)&stack_mask); ++ } ++ } + -+/** -+ * The maximum number of external resources which can be mapped/unmapped -+ * in a single request. -+ */ -+#define BASE_EXT_RES_COUNT_MAX 10 ++ return stack_mask; ++} ++#endif /* CONFIG_MALI_CORESTACK */ + -+/** -+ * struct base_external_resource_list - Structure which describes a list of -+ * external resources. -+ * @count: The number of resources. -+ * @ext_res: Array of external resources which is -+ * sized at allocation time. -+ */ -+struct base_external_resource_list { -+ u64 count; -+ struct base_external_resource ext_res[1]; -+}; ++bool ++MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) ++{ ++ bool cores_are_available = false; ++ bool in_desired_state = true; ++ u64 desired_l2_state; ++#ifdef CONFIG_MALI_CORESTACK ++ u64 desired_stack_state; ++ u64 stacks_powered; ++#endif /* CONFIG_MALI_CORESTACK */ ++ u64 cores_powered; ++ u64 tilers_powered; ++ u64 tiler_available_bitmap; ++ u64 tiler_transitioning_bitmap; ++ u64 shader_available_bitmap; ++ u64 shader_ready_bitmap; ++ u64 shader_transitioning_bitmap; ++ u64 l2_available_bitmap; ++ u64 prev_l2_available_bitmap; ++ u64 l2_inuse_bitmap; + -+struct base_jd_debug_copy_buffer { -+ u64 address; -+ u64 size; -+ struct base_external_resource extres; -+}; ++ KBASE_DEBUG_ASSERT(NULL != kbdev); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+/** -+ * @brief Setter for a dependency structure -+ * -+ * @param[in] dep The kbase jd atom dependency to be initialized. -+ * @param id The atom_id to be assigned. -+ * @param dep_type The dep_type to be assigned. -+ * -+ */ -+static inline void base_jd_atom_dep_set(struct base_dependency *dep, -+ base_atom_id id, base_jd_dep_type dep_type) -+{ -+ LOCAL_ASSERT(dep != NULL); ++ spin_lock(&kbdev->pm.backend.gpu_powered_lock); ++ if (kbdev->pm.backend.gpu_powered == false) { ++ spin_unlock(&kbdev->pm.backend.gpu_powered_lock); ++ if (kbdev->pm.backend.desired_shader_state == 0 && ++ kbdev->pm.backend.desired_tiler_state == 0) ++ return true; ++ return false; ++ } + -+ /* -+ * make sure we don't set not allowed combinations -+ * of atom_id/dependency_type. -+ */ -+ LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) || -+ (id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID)); ++ /* Trace that a change-state is being requested, and that it took ++ * (effectively) no time to start it. This is useful for counting how ++ * many state changes occurred, in a way that's backwards-compatible ++ * with processing the trace data */ ++ kbase_timeline_pm_send_event(kbdev, ++ KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE); ++ kbase_timeline_pm_handle_event(kbdev, ++ KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE); + -+ dep->atom_id = id; -+ dep->dependency_type = dep_type; -+} ++ /* If any cores are already powered then, we must keep the caches on */ ++ shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev, ++ KBASE_PM_CORE_SHADER); ++ cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); ++ cores_powered |= kbdev->pm.backend.desired_shader_state; + -+/** -+ * @brief Make a copy of a dependency structure -+ * -+ * @param[in,out] dep The kbase jd atom dependency to be written. -+ * @param[in] from The dependency to make a copy from. -+ * -+ */ -+static inline void base_jd_atom_dep_copy(struct base_dependency *dep, -+ const struct base_dependency *from) -+{ -+ LOCAL_ASSERT(dep != NULL); ++#ifdef CONFIG_MALI_CORESTACK ++ /* Work out which core stacks want to be powered */ ++ desired_stack_state = kbase_pm_core_stack_mask(cores_powered); ++ stacks_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK) | ++ desired_stack_state; ++#endif /* CONFIG_MALI_CORESTACK */ + -+ base_jd_atom_dep_set(dep, from->atom_id, from->dependency_type); -+} ++ /* Work out which tilers want to be powered */ ++ tiler_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev, ++ KBASE_PM_CORE_TILER); ++ tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER); ++ tilers_powered |= kbdev->pm.backend.desired_tiler_state; + -+/** -+ * @brief Soft-atom fence trigger setup. -+ * -+ * Sets up an atom to be a SW-only atom signaling a fence -+ * when it reaches the run state. -+ * -+ * Using the existing base dependency system the fence can -+ * be set to trigger when a GPU job has finished. -+ * -+ * The base fence object must not be terminated until the atom -+ * has been submitted to @a base_jd_submit and @a base_jd_submit has returned. -+ * -+ * @a fence must be a valid fence set up with @a base_fence_init. -+ * Calling this function with a uninitialized fence results in undefined behavior. -+ * -+ * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom -+ * @param[in] fence The base fence object to trigger. -+ */ -+static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence) -+{ -+ LOCAL_ASSERT(atom); -+ LOCAL_ASSERT(fence); -+ LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE); -+ LOCAL_ASSERT(fence->basep.stream_fd >= 0); -+ atom->jc = (uintptr_t) fence; -+ atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER; -+} ++ /* If there are l2 cache users registered, keep all l2s powered even if ++ * all other cores are off. */ ++ if (kbdev->l2_users_count > 0) ++ cores_powered |= kbdev->gpu_props.props.raw_props.l2_present; + -+/** -+ * @brief Soft-atom fence wait setup. -+ * -+ * Sets up an atom to be a SW-only atom waiting on a fence. -+ * When the fence becomes triggered the atom becomes runnable -+ * and completes immediately. -+ * -+ * Using the existing base dependency system the fence can -+ * be set to block a GPU job until it has been triggered. -+ * -+ * The base fence object must not be terminated until the atom -+ * has been submitted to @a base_jd_submit and @a base_jd_submit has returned. -+ * -+ * @a fence must be a valid fence set up with @a base_fence_init or @a base_fence_import. -+ * Calling this function with a uninitialized fence results in undefined behavior. -+ * -+ * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom -+ * @param[in] fence The base fence object to wait on -+ */ -+static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence) -+{ -+ LOCAL_ASSERT(atom); -+ LOCAL_ASSERT(fence); -+ LOCAL_ASSERT(fence->basep.fd >= 0); -+ atom->jc = (uintptr_t) fence; -+ atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT; -+} ++ desired_l2_state = get_desired_cache_status( ++ kbdev->gpu_props.props.raw_props.l2_present, ++ cores_powered, tilers_powered); + -+/** -+ * @brief External resource info initialization. -+ * -+ * Sets up an external resource object to reference -+ * a memory allocation and the type of access requested. -+ * -+ * @param[in] res The resource object to initialize -+ * @param handle The handle to the imported memory object, must be -+ * obtained by calling @ref base_mem_as_import_handle(). -+ * @param access The type of access requested -+ */ -+static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access) -+{ -+ u64 address; ++ l2_inuse_bitmap = get_desired_cache_status( ++ kbdev->gpu_props.props.raw_props.l2_present, ++ cores_powered | shader_transitioning_bitmap, ++ tilers_powered | tiler_transitioning_bitmap); + -+ address = handle.basep.handle; ++#ifdef CONFIG_MALI_CORESTACK ++ if (stacks_powered) ++ desired_l2_state |= 1; ++#endif /* CONFIG_MALI_CORESTACK */ + -+ LOCAL_ASSERT(res != NULL); -+ LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB)); -+ LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE); ++ /* If any l2 cache is on, then enable l2 #0, for use by job manager */ ++ if (0 != desired_l2_state) ++ desired_l2_state |= 1; + -+ res->ext_resource = address | (access & LOCAL_PAGE_LSB); -+} ++ prev_l2_available_bitmap = kbdev->l2_available_bitmap; ++ in_desired_state &= kbase_pm_transition_core_type(kbdev, ++ KBASE_PM_CORE_L2, desired_l2_state, l2_inuse_bitmap, ++ &l2_available_bitmap, ++ &kbdev->pm.backend.powering_on_l2_state); + -+/** -+ * @brief Job chain event code bits -+ * Defines the bits used to create ::base_jd_event_code -+ */ -+enum { -+ BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */ -+ BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */ -+ BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */ -+ BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */ -+ BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */ -+ BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */ -+ BASE_JD_SW_EVENT_RESERVED = (3u << 11), /**< Reserved event type */ -+ BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11) /**< Mask to extract the type from an event code */ -+}; ++ if (kbdev->l2_available_bitmap != l2_available_bitmap) ++ KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap); + -+/** -+ * @brief Job chain event codes -+ * -+ * HW and low-level SW events are represented by event codes. -+ * The status of jobs which succeeded are also represented by -+ * an event code (see ::BASE_JD_EVENT_DONE). -+ * Events are usually reported as part of a ::base_jd_event. -+ * -+ * The event codes are encoded in the following way: -+ * @li 10:0 - subtype -+ * @li 12:11 - type -+ * @li 13 - SW success (only valid if the SW bit is set) -+ * @li 14 - SW event (HW event if not set) -+ * @li 15 - Kernel event (should never be seen in userspace) -+ * -+ * Events are split up into ranges as follows: -+ * - BASE_JD_EVENT_RANGE_\_START -+ * - BASE_JD_EVENT_RANGE_\_END -+ * -+ * \a code is in \'s range when: -+ * - BASE_JD_EVENT_RANGE_\_START <= code < BASE_JD_EVENT_RANGE_\_END -+ * -+ * Ranges can be asserted for adjacency by testing that the END of the previous -+ * is equal to the START of the next. This is useful for optimizing some tests -+ * for range. -+ * -+ * A limitation is that the last member of this enum must explicitly be handled -+ * (with an assert-unreachable statement) in switch statements that use -+ * variables of this type. Otherwise, the compiler warns that we have not -+ * handled that enum value. -+ */ -+typedef enum base_jd_event_code { -+ /* HW defined exceptions */ ++ kbdev->l2_available_bitmap = l2_available_bitmap; + -+ /** Start of HW Non-fault status codes -+ * -+ * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault, -+ * because the job was hard-stopped -+ */ -+ BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0, + -+ /* non-fatal exceptions */ -+ BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */ -+ BASE_JD_EVENT_DONE = 0x01, -+ BASE_JD_EVENT_STOPPED = 0x03, /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */ -+ BASE_JD_EVENT_TERMINATED = 0x04, /**< This is actually a fault status code - the job was hard stopped */ -+ BASE_JD_EVENT_ACTIVE = 0x08, /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */ ++#ifdef CONFIG_MALI_CORESTACK ++ if (in_desired_state) { ++ in_desired_state &= kbase_pm_transition_core_type(kbdev, ++ KBASE_PM_CORE_STACK, desired_stack_state, 0, ++ &kbdev->stack_available_bitmap, ++ &kbdev->pm.backend.powering_on_stack_state); ++ } ++#endif /* CONFIG_MALI_CORESTACK */ + -+ /** End of HW Non-fault status codes -+ * -+ * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault, -+ * because the job was hard-stopped -+ */ -+ BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40, ++ if (in_desired_state) { ++ in_desired_state &= kbase_pm_transition_core_type(kbdev, ++ KBASE_PM_CORE_TILER, ++ kbdev->pm.backend.desired_tiler_state, ++ 0, &tiler_available_bitmap, ++ &kbdev->pm.backend.powering_on_tiler_state); ++ in_desired_state &= kbase_pm_transition_core_type(kbdev, ++ KBASE_PM_CORE_SHADER, ++ kbdev->pm.backend.desired_shader_state, ++ kbdev->shader_inuse_bitmap, ++ &shader_available_bitmap, ++ &kbdev->pm.backend.powering_on_shader_state); + -+ /** Start of HW fault and SW Error status codes */ -+ BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40, ++ if (kbdev->shader_available_bitmap != shader_available_bitmap) { ++ KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, ++ NULL, 0u, ++ (u32) shader_available_bitmap); ++ KBASE_TIMELINE_POWER_SHADER(kbdev, ++ shader_available_bitmap); ++ } + -+ /* job exceptions */ -+ BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40, -+ BASE_JD_EVENT_JOB_POWER_FAULT = 0x41, -+ BASE_JD_EVENT_JOB_READ_FAULT = 0x42, -+ BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43, -+ BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44, -+ BASE_JD_EVENT_JOB_BUS_FAULT = 0x48, -+ BASE_JD_EVENT_INSTR_INVALID_PC = 0x50, -+ BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51, -+ BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52, -+ BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53, -+ BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54, -+ BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55, -+ BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56, -+ BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58, -+ BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59, -+ BASE_JD_EVENT_STATE_FAULT = 0x5A, -+ BASE_JD_EVENT_OUT_OF_MEMORY = 0x60, -+ BASE_JD_EVENT_UNKNOWN = 0x7F, ++ kbdev->shader_available_bitmap = shader_available_bitmap; + -+ /* GPU exceptions */ -+ BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80, -+ BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88, ++ if (kbdev->tiler_available_bitmap != tiler_available_bitmap) { ++ KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, ++ NULL, NULL, 0u, ++ (u32) tiler_available_bitmap); ++ KBASE_TIMELINE_POWER_TILER(kbdev, ++ tiler_available_bitmap); ++ } + -+ /* MMU exceptions */ -+ BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1, -+ BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2, -+ BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3, -+ BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4, -+ BASE_JD_EVENT_PERMISSION_FAULT = 0xC8, -+ BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1, -+ BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2, -+ BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3, -+ BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4, -+ BASE_JD_EVENT_ACCESS_FLAG = 0xD8, ++ kbdev->tiler_available_bitmap = tiler_available_bitmap; + -+ /* SW defined exceptions */ -+ BASE_JD_EVENT_MEM_GROWTH_FAILED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000, -+ BASE_JD_EVENT_TIMED_OUT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001, -+ BASE_JD_EVENT_JOB_CANCELLED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002, -+ BASE_JD_EVENT_JOB_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003, -+ BASE_JD_EVENT_PM_EVENT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004, -+ BASE_JD_EVENT_FORCE_REPLAY = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x005, ++ } else if ((l2_available_bitmap & ++ kbdev->gpu_props.props.raw_props.tiler_present) != ++ kbdev->gpu_props.props.raw_props.tiler_present) { ++ tiler_available_bitmap = 0; + -+ BASE_JD_EVENT_BAG_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003, ++ if (kbdev->tiler_available_bitmap != tiler_available_bitmap) ++ KBASE_TIMELINE_POWER_TILER(kbdev, ++ tiler_available_bitmap); + -+ /** End of HW fault and SW Error status codes */ -+ BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF, ++ kbdev->tiler_available_bitmap = tiler_available_bitmap; ++ } + -+ /** Start of SW Success status codes */ -+ BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000, ++ /* State updated for slow-path waiters */ ++ kbdev->pm.backend.gpu_in_desired_state = in_desired_state; + -+ BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000, -+ BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000, -+ BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000, ++ shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev, ++ KBASE_PM_CORE_SHADER); ++ shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev, ++ KBASE_PM_CORE_SHADER); + -+ /** End of SW Success status codes */ -+ BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF, ++ /* Determine whether the cores are now available (even if the set of ++ * available cores is empty). Note that they can be available even if ++ * we've not finished transitioning to the desired state */ ++ if ((kbdev->shader_available_bitmap & ++ kbdev->pm.backend.desired_shader_state) ++ == kbdev->pm.backend.desired_shader_state && ++ (kbdev->tiler_available_bitmap & ++ kbdev->pm.backend.desired_tiler_state) ++ == kbdev->pm.backend.desired_tiler_state) { ++ cores_are_available = true; + -+ /** Start of Kernel-only status codes. Such codes are never returned to user-space */ -+ BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000, -+ BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000, ++ KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u, ++ (u32)(kbdev->shader_available_bitmap & ++ kbdev->pm.backend.desired_shader_state)); ++ KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u, ++ (u32)(kbdev->tiler_available_bitmap & ++ kbdev->pm.backend.desired_tiler_state)); + -+ /** End of Kernel-only status codes. */ -+ BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF -+} base_jd_event_code; ++ /* Log timelining information about handling events that power ++ * up cores, to match up either with immediate submission either ++ * because cores already available, or from PM IRQ */ ++ if (!in_desired_state) ++ kbase_timeline_pm_send_event(kbdev, ++ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); ++ } + -+/** -+ * @brief Event reporting structure -+ * -+ * This structure is used by the kernel driver to report information -+ * about GPU events. The can either be HW-specific events or low-level -+ * SW events, such as job-chain completion. -+ * -+ * The event code contains an event type field which can be extracted -+ * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK. -+ * -+ * Based on the event type base_jd_event::data holds: -+ * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed -+ * job-chain -+ * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has -+ * been completed (ie all contained job-chains have been completed). -+ * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used -+ */ -+typedef struct base_jd_event_v2 { -+ base_jd_event_code event_code; /**< event code */ -+ base_atom_id atom_number; /**< the atom number that has completed */ -+ struct base_jd_udata udata; /**< user data */ -+} base_jd_event_v2; ++ if (in_desired_state) { ++ KBASE_DEBUG_ASSERT(cores_are_available); + -+/** -+ * Padding required to ensure that the @ref struct base_dump_cpu_gpu_counters structure fills -+ * a full cache line. -+ */ ++#if defined(CONFIG_MALI_GATOR_SUPPORT) ++ kbase_trace_mali_pm_status(KBASE_PM_CORE_L2, ++ kbase_pm_get_ready_cores(kbdev, ++ KBASE_PM_CORE_L2)); ++ kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER, ++ kbase_pm_get_ready_cores(kbdev, ++ KBASE_PM_CORE_SHADER)); ++ kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER, ++ kbase_pm_get_ready_cores(kbdev, ++ KBASE_PM_CORE_TILER)); ++#ifdef CONFIG_MALI_CORESTACK ++ kbase_trace_mali_pm_status(KBASE_PM_CORE_STACK, ++ kbase_pm_get_ready_cores(kbdev, ++ KBASE_PM_CORE_STACK)); ++#endif /* CONFIG_MALI_CORESTACK */ ++#endif + -+#define BASE_CPU_GPU_CACHE_LINE_PADDING (36) ++ KBASE_TLSTREAM_AUX_PM_STATE( ++ KBASE_PM_CORE_L2, ++ kbase_pm_get_ready_cores( ++ kbdev, KBASE_PM_CORE_L2)); ++ KBASE_TLSTREAM_AUX_PM_STATE( ++ KBASE_PM_CORE_SHADER, ++ kbase_pm_get_ready_cores( ++ kbdev, KBASE_PM_CORE_SHADER)); ++ KBASE_TLSTREAM_AUX_PM_STATE( ++ KBASE_PM_CORE_TILER, ++ kbase_pm_get_ready_cores( ++ kbdev, ++ KBASE_PM_CORE_TILER)); ++#ifdef CONFIG_MALI_CORESTACK ++ KBASE_TLSTREAM_AUX_PM_STATE( ++ KBASE_PM_CORE_STACK, ++ kbase_pm_get_ready_cores( ++ kbdev, ++ KBASE_PM_CORE_STACK)); ++#endif /* CONFIG_MALI_CORESTACK */ + ++ KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL, ++ kbdev->pm.backend.gpu_in_desired_state, ++ (u32)kbdev->pm.backend.desired_shader_state); ++ KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u, ++ (u32)kbdev->pm.backend.desired_tiler_state); + -+/** -+ * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs. -+ * -+ * This structure is stored into the memory pointed to by the @c jc field of @ref base_jd_atom. -+ * -+ * This structure must be padded to ensure that it will occupy whole cache lines. This is to avoid -+ * cases where access to pages containing the structure is shared between cached and un-cached -+ * memory regions, which would cause memory corruption. Here we set the structure size to be 64 bytes -+ * which is the cache line for ARM A15 processors. -+ */ ++ /* Log timelining information for synchronous waiters */ ++ kbase_timeline_pm_send_event(kbdev, ++ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); ++ /* Wake slow-path waiters. Job scheduler does not use this. */ ++ KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0); + -+typedef struct base_dump_cpu_gpu_counters { -+ u64 system_time; -+ u64 cycle_counter; -+ u64 sec; -+ u32 usec; -+ u8 padding[BASE_CPU_GPU_CACHE_LINE_PADDING]; -+} base_dump_cpu_gpu_counters; ++ wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); ++ } + ++ spin_unlock(&kbdev->pm.backend.gpu_powered_lock); + ++ /* kbase_pm_ca_update_core_status can cause one-level recursion into ++ * this function, so it must only be called once all changes to kbdev ++ * have been committed, and after the gpu_powered_lock has been ++ * dropped. */ ++ if (kbdev->shader_ready_bitmap != shader_ready_bitmap || ++ kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) { ++ kbdev->shader_ready_bitmap = shader_ready_bitmap; ++ kbdev->shader_transitioning_bitmap = ++ shader_transitioning_bitmap; + -+/** @} end group base_user_api_job_dispatch */ ++ kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap, ++ shader_transitioning_bitmap); ++ } + -+#define GPU_MAX_JOB_SLOTS 16 ++ /* The core availability policy is not allowed to keep core group 0 ++ * turned off (unless it was changing the l2 power state) */ ++ if (!((shader_ready_bitmap | shader_transitioning_bitmap) & ++ kbdev->gpu_props.props.coherency_info.group[0].core_mask) && ++ (prev_l2_available_bitmap == desired_l2_state) && ++ !(kbase_pm_ca_get_core_mask(kbdev) & ++ kbdev->gpu_props.props.coherency_info.group[0].core_mask)) ++ BUG(); + -+/** -+ * @page page_base_user_api_gpuprops User-side Base GPU Property Query API -+ * -+ * The User-side Base GPU Property Query API encapsulates two -+ * sub-modules: -+ * -+ * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties" -+ * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties" -+ * -+ * There is a related third module outside of Base, which is owned by the MIDG -+ * module: -+ * - @ref gpu_props_static "Midgard Compile-time GPU Properties" -+ * -+ * Base only deals with properties that vary between different Midgard -+ * implementations - the Dynamic GPU properties and the Platform Config -+ * properties. -+ * -+ * For properties that are constant for the Midgard Architecture, refer to the -+ * MIDG module. However, we will discuss their relevance here just to -+ * provide background information. -+ * -+ * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules -+ * -+ * The compile-time properties (Platform Config, Midgard Compile-time -+ * properties) are exposed as pre-processor macros. -+ * -+ * Complementing the compile-time properties are the Dynamic GPU -+ * Properties, which act as a conduit for the Midgard Configuration -+ * Discovery. -+ * -+ * In general, the dynamic properties are present to verify that the platform -+ * has been configured correctly with the right set of Platform Config -+ * Compile-time Properties. -+ * -+ * As a consistent guide across the entire DDK, the choice for dynamic or -+ * compile-time should consider the following, in order: -+ * -# Can the code be written so that it doesn't need to know the -+ * implementation limits at all? -+ * -# If you need the limits, get the information from the Dynamic Property -+ * lookup. This should be done once as you fetch the context, and then cached -+ * as part of the context data structure, so it's cheap to access. -+ * -# If there's a clear and arguable inefficiency in using Dynamic Properties, -+ * then use a Compile-Time Property (Platform Config, or Midgard Compile-time -+ * property). Examples of where this might be sensible follow: -+ * - Part of a critical inner-loop -+ * - Frequent re-use throughout the driver, causing significant extra load -+ * instructions or control flow that would be worthwhile optimizing out. -+ * -+ * We cannot provide an exhaustive set of examples, neither can we provide a -+ * rule for every possible situation. Use common sense, and think about: what -+ * the rest of the driver will be doing; how the compiler might represent the -+ * value if it is a compile-time constant; whether an OEM shipping multiple -+ * devices would benefit much more from a single DDK binary, instead of -+ * insignificant micro-optimizations. -+ * -+ * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties -+ * -+ * Dynamic GPU properties are presented in two sets: -+ * -# the commonly used properties in @ref base_gpu_props, which have been -+ * unpacked from GPU register bitfields. -+ * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props -+ * (also a member of @ref base_gpu_props). All of these are presented in -+ * the packed form, as presented by the GPU registers themselves. -+ * -+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to -+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device -+ * behaving differently?". In this case, all information about the -+ * configuration is potentially useful, but it does not need to be processed -+ * by the driver. Instead, the raw registers can be processed by the Mali -+ * Tools software on the host PC. -+ * -+ * The properties returned extend the Midgard Configuration Discovery -+ * registers. For example, GPU clock speed is not specified in the Midgard -+ * Architecture, but is necessary for OpenCL's clGetDeviceInfo() function. -+ * -+ * The GPU properties are obtained by a call to -+ * _mali_base_get_gpu_props(). This simply returns a pointer to a const -+ * base_gpu_props structure. It is constant for the life of a base -+ * context. Multiple calls to _mali_base_get_gpu_props() to a base context -+ * return the same pointer to a constant structure. This avoids cache pollution -+ * of the common data. -+ * -+ * This pointer must not be freed, because it does not point to the start of a -+ * region allocated by the memory allocator; instead, just close the @ref -+ * base_context. -+ * -+ * -+ * @section sec_base_user_api_gpuprops_config Platform Config Compile-time Properties -+ * -+ * The Platform Config File sets up gpu properties that are specific to a -+ * certain platform. Properties that are 'Implementation Defined' in the -+ * Midgard Architecture spec are placed here. -+ * -+ * @note Reference configurations are provided for Midgard Implementations, such as -+ * the Mali-T600 family. The customer need not repeat this information, and can select one of -+ * these reference configurations. For example, VA_BITS, PA_BITS and the -+ * maximum number of samples per pixel might vary between Midgard Implementations, but -+ * \b not for platforms using the Mali-T604. This information is placed in -+ * the reference configuration files. -+ * -+ * The System Integrator creates the following structure: -+ * - platform_XYZ -+ * - platform_XYZ/plat -+ * - platform_XYZ/plat/plat_config.h -+ * -+ * They then edit plat_config.h, using the example plat_config.h files as a -+ * guide. -+ * -+ * At the very least, the customer must set @ref CONFIG_GPU_CORE_TYPE, and will -+ * receive a helpful \#error message if they do not do this correctly. This -+ * selects the Reference Configuration for the Midgard Implementation. The rationale -+ * behind this decision (against asking the customer to write \#include -+ * in their plat_config.h) is as follows: -+ * - This mechanism 'looks' like a regular config file (such as Linux's -+ * .config) -+ * - It is difficult to get wrong in a way that will produce strange build -+ * errors: -+ * - They need not know where the mali_t600.h, other_midg_gpu.h etc. files are stored - and -+ * so they won't accidentally pick another file with 'mali_t600' in its name -+ * - When the build doesn't work, the System Integrator may think the DDK is -+ * doesn't work, and attempt to fix it themselves: -+ * - For the @ref CONFIG_GPU_CORE_TYPE mechanism, the only way to get past the -+ * error is to set @ref CONFIG_GPU_CORE_TYPE, and this is what the \#error tells -+ * you. -+ * - For a \#include mechanism, checks must still be made elsewhere, which the -+ * System Integrator may try working around by setting \#defines (such as -+ * VA_BITS) themselves in their plat_config.h. In the worst case, they may -+ * set the prevention-mechanism \#define of -+ * "A_CORRECT_MIDGARD_CORE_WAS_CHOSEN". -+ * - In this case, they would believe they are on the right track, because -+ * the build progresses with their fix, but with errors elsewhere. -+ * -+ * However, there is nothing to prevent the customer using \#include to organize -+ * their own configurations files hierarchically. -+ * -+ * The mechanism for the header file processing is as follows: -+ * -+ * @dot -+ digraph plat_config_mechanism { -+ rankdir=BT -+ size="6,6" ++ /* The core availability policy is allowed to keep core group 1 off, ++ * but all jobs specifically targeting CG1 must fail */ ++ if (!((shader_ready_bitmap | shader_transitioning_bitmap) & ++ kbdev->gpu_props.props.coherency_info.group[1].core_mask) && ++ !(kbase_pm_ca_get_core_mask(kbdev) & ++ kbdev->gpu_props.props.coherency_info.group[1].core_mask)) ++ kbdev->pm.backend.cg1_disabled = true; ++ else ++ kbdev->pm.backend.cg1_disabled = false; + -+ "mali_base.h"; -+ "gpu/mali_gpu.h"; ++ return cores_are_available; ++} ++KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock); + -+ node [ shape=box ]; -+ { -+ rank = same; ordering = out; ++/* Timeout for kbase_pm_check_transitions_sync when wait_event_killable has ++ * aborted due to a fatal signal. If the time spent waiting has exceeded this ++ * threshold then there is most likely a hardware issue. */ ++#define PM_TIMEOUT (5*HZ) /* 5s */ + -+ "gpu/mali_gpu_props.h"; -+ "base/midg_gpus/mali_t600.h"; -+ "base/midg_gpus/other_midg_gpu.h"; -+ } -+ { rank = same; "plat/plat_config.h"; } -+ { -+ rank = same; -+ "gpu/mali_gpu.h" [ shape=box ]; -+ gpu_chooser [ label="" style="invisible" width=0 height=0 fixedsize=true ]; -+ select_gpu [ label="Mali-T600 | Other\n(select_gpu.h)" shape=polygon,sides=4,distortion=0.25 width=3.3 height=0.99 fixedsize=true ] ; -+ } -+ node [ shape=box ]; -+ { rank = same; "plat/plat_config.h"; } -+ { rank = same; "mali_base.h"; } ++void kbase_pm_check_transitions_sync(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ unsigned long timeout; ++ bool cores_are_available; ++ int ret; + -+ "mali_base.h" -> "gpu/mali_gpu.h" -> "gpu/mali_gpu_props.h"; -+ "mali_base.h" -> "plat/plat_config.h" ; -+ "mali_base.h" -> select_gpu ; ++ /* Force the transition to be checked and reported - the cores may be ++ * 'available' (for job submission) but not fully powered up. */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ "plat/plat_config.h" -> gpu_chooser [style="dotted,bold" dir=none weight=4] ; -+ gpu_chooser -> select_gpu [style="dotted,bold"] ; ++ cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + -+ select_gpu -> "base/midg_gpus/mali_t600.h" ; -+ select_gpu -> "base/midg_gpus/other_midg_gpu.h" ; -+ } -+ @enddot -+ * -+ * -+ * @section sec_base_user_api_gpuprops_kernel Kernel Operation -+ * -+ * During Base Context Create time, user-side makes a single kernel call: -+ * - A call to fill user memory with GPU information structures -+ * -+ * The kernel-side will fill the provided the entire processed @ref base_gpu_props -+ * structure, because this information is required in both -+ * user and kernel side; it does not make sense to decode it twice. -+ * -+ * Coherency groups must be derived from the bitmasks, but this can be done -+ * kernel side, and just once at kernel startup: Coherency groups must already -+ * be known kernel-side, to support chains that specify a 'Only Coherent Group' -+ * SW requirement, or 'Only Coherent Group with Tiler' SW requirement. -+ * -+ * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation -+ * Creation of the coherent group data is done at device-driver startup, and so -+ * is one-time. This will most likely involve a loop with CLZ, shifting, and -+ * bit clearing on the L2_PRESENT mask, depending on whether the -+ * system is L2 Coherent. The number of shader cores is done by a -+ * population count, since faulty cores may be disabled during production, -+ * producing a non-contiguous mask. -+ * -+ * The memory requirements for this algorithm can be determined either by a u64 -+ * population count on the L2_PRESENT mask (a LUT helper already is -+ * required for the above), or simple assumption that there can be no more than -+ * 16 coherent groups, since core groups are typically 4 cores. -+ */ ++ /* Don't need 'cores_are_available', because we don't return anything */ ++ CSTD_UNUSED(cores_are_available); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+/** -+ * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs -+ * @{ -+ */ ++ timeout = jiffies + PM_TIMEOUT; + -+/** -+ * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties -+ * @{ -+ */ ++ /* Wait for cores */ ++ ret = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, ++ kbdev->pm.backend.gpu_in_desired_state); + -+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3 ++ if (ret < 0 && time_after(jiffies, timeout)) { ++ dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); ++ dev_err(kbdev->dev, "Desired state :\n"); ++ dev_err(kbdev->dev, "\tShader=%016llx\n", ++ kbdev->pm.backend.desired_shader_state); ++ dev_err(kbdev->dev, "\tTiler =%016llx\n", ++ kbdev->pm.backend.desired_tiler_state); ++ dev_err(kbdev->dev, "Current state :\n"); ++ dev_err(kbdev->dev, "\tShader=%08x%08x\n", ++ kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(SHADER_READY_HI), NULL), ++ kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(SHADER_READY_LO), ++ NULL)); ++ dev_err(kbdev->dev, "\tTiler =%08x%08x\n", ++ kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(TILER_READY_HI), NULL), ++ kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(TILER_READY_LO), NULL)); ++ dev_err(kbdev->dev, "\tL2 =%08x%08x\n", ++ kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(L2_READY_HI), NULL), ++ kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(L2_READY_LO), NULL)); ++ dev_err(kbdev->dev, "Cores transitioning :\n"); ++ dev_err(kbdev->dev, "\tShader=%08x%08x\n", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG( ++ SHADER_PWRTRANS_HI), NULL), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG( ++ SHADER_PWRTRANS_LO), NULL)); ++ dev_err(kbdev->dev, "\tTiler =%08x%08x\n", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG( ++ TILER_PWRTRANS_HI), NULL), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG( ++ TILER_PWRTRANS_LO), NULL)); ++ dev_err(kbdev->dev, "\tL2 =%08x%08x\n", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG( ++ L2_PWRTRANS_HI), NULL), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG( ++ L2_PWRTRANS_LO), NULL)); ++#if KBASE_GPU_RESET_EN ++ dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); ++ if (kbase_prepare_to_reset_gpu(kbdev)) ++ kbase_reset_gpu(kbdev); ++#endif /* KBASE_GPU_RESET_EN */ ++ } else { ++ /* Log timelining information that a change in state has ++ * completed */ ++ kbase_timeline_pm_handle_event(kbdev, ++ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); ++ } ++} ++KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync); + -+#define BASE_MAX_COHERENT_GROUPS 16 ++void kbase_pm_enable_interrupts(struct kbase_device *kbdev) ++{ ++ unsigned long flags; + -+struct mali_base_gpu_core_props { -+ /** -+ * Product specific value. ++ KBASE_DEBUG_ASSERT(NULL != kbdev); ++ /* ++ * Clear all interrupts, ++ * and unmask them all. + */ -+ u32 product_id; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, ++ NULL); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL, ++ NULL); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ /** -+ * Status of the GPU release. -+ * No defined values, but starts at 0 and increases by one for each -+ * release status (alpha, beta, EAC, etc.). -+ * 4 bit values (0-15). -+ */ -+ u16 version_status; ++ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, ++ NULL); ++ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL); + -+ /** -+ * Minor release number of the GPU. "P" part of an "RnPn" release number. -+ * 8 bit values (0-255). -+ */ -+ u16 minor_revision; ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL); ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL); ++} + -+ /** -+ * Major release number of the GPU. "R" part of an "RnPn" release number. -+ * 4 bit values (0-15). ++KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts); ++ ++void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(NULL != kbdev); ++ /* ++ * Mask all interrupts, ++ * and clear them all. + */ -+ u16 major_revision; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ u16 padding; ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, ++ NULL); ++ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL); ++ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, ++ NULL); + -+ /** -+ * This property is deprecated since it has not contained the real current -+ * value of GPU clock speed. It is kept here only for backwards compatibility. -+ * For the new ioctl interface, it is ignored and is treated as a padding -+ * to keep the structure of the same size and retain the placement of its -+ * members. -+ */ -+ u32 gpu_speed_mhz; ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL); ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL); ++} + -+ /** -+ * @usecase GPU clock max/min speed is required for computing best/worst case -+ * in tasks as job scheduling ant irq_throttling. (It is not specified in the -+ * Midgard Architecture). -+ * Also, GPU clock max speed is used for OpenCL's clGetDeviceInfo() function. -+ */ -+ u32 gpu_freq_khz_max; -+ u32 gpu_freq_khz_min; ++void kbase_pm_disable_interrupts(struct kbase_device *kbdev) ++{ ++ unsigned long flags; + -+ /** -+ * Size of the shader program counter, in bits. -+ */ -+ u32 log2_program_counter_size; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_pm_disable_interrupts_nolock(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} + -+ /** -+ * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a -+ * bitpattern where a set bit indicates that the format is supported. -+ * -+ * Before using a texture format, it is recommended that the corresponding -+ * bit be checked. -+ */ -+ u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; ++KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts); + -+ /** -+ * Theoretical maximum memory available to the GPU. It is unlikely that a -+ * client will be able to allocate all of this memory for their own -+ * purposes, but this at least provides an upper bound on the memory -+ * available to the GPU. -+ * -+ * This is required for OpenCL's clGetDeviceInfo() call when -+ * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The -+ * client will not be expecting to allocate anywhere near this value. -+ */ -+ u64 gpu_available_memory_size; -+}; + -+/** -+ * -+ * More information is possible - but associativity and bus width are not -+ * required by upper-level apis. ++/* ++ * pmu layout: ++ * 0x0000: PMU TAG (RO) (0xCAFECAFE) ++ * 0x0004: PMU VERSION ID (RO) (0x00000000) ++ * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE) + */ -+struct mali_base_gpu_l2_cache_props { -+ u8 log2_line_size; -+ u8 log2_cache_size; -+ u8 num_l2_slices; /* Number of L2C slices. 1 or higher */ -+ u8 padding[5]; -+}; ++void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) ++{ ++ bool reset_required = is_resume; ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ unsigned long flags; + -+struct mali_base_gpu_tiler_props { -+ u32 bin_size_bytes; /* Max is 4*2^15 */ -+ u32 max_active_levels; /* Max is 2^15 */ -+}; ++ KBASE_DEBUG_ASSERT(NULL != kbdev); ++ lockdep_assert_held(&js_devdata->runpool_mutex); ++ lockdep_assert_held(&kbdev->pm.lock); + -+/** -+ * GPU threading system details. -+ */ -+struct mali_base_gpu_thread_props { -+ u32 max_threads; /* Max. number of threads per core */ -+ u32 max_workgroup_size; /* Max. number of threads per workgroup */ -+ u32 max_barrier_size; /* Max. number of threads that can synchronize on a simple barrier */ -+ u16 max_registers; /* Total size [1..65535] of the register file available per core. */ -+ u8 max_task_queue; /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */ -+ u8 max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */ -+ u8 impl_tech; /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */ -+ u8 padding[7]; -+}; ++ if (kbdev->pm.backend.gpu_powered) { ++ /* Already turned on */ ++ if (kbdev->poweroff_pending) ++ kbase_pm_enable_interrupts(kbdev); ++ kbdev->poweroff_pending = false; ++ KBASE_DEBUG_ASSERT(!is_resume); ++ return; ++ } + -+/** -+ * @brief descriptor for a coherent group -+ * -+ * \c core_mask exposes all cores in that coherent group, and \c num_cores -+ * provides a cached population-count for that mask. -+ * -+ * @note Whilst all cores are exposed in the mask, not all may be available to -+ * the application, depending on the Kernel Power policy. -+ * -+ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage. -+ */ -+struct mali_base_gpu_coherent_group { -+ u64 core_mask; /**< Core restriction mask required for the group */ -+ u16 num_cores; /**< Number of cores in the group */ -+ u16 padding[3]; -+}; ++ kbdev->poweroff_pending = false; + -+/** -+ * @brief Coherency group information -+ * -+ * Note that the sizes of the members could be reduced. However, the \c group -+ * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte -+ * aligned, thus leading to wastage if the other members sizes were reduced. -+ * -+ * The groups are sorted by core mask. The core masks are non-repeating and do -+ * not intersect. -+ */ -+struct mali_base_gpu_coherent_group_info { -+ u32 num_groups; ++ KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u); + -+ /** -+ * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches. -+ * -+ * The GPU Counter dumping writes 2048 bytes per core group, regardless of -+ * whether the core groups are coherent or not. Hence this member is needed -+ * to calculate how much memory is required for dumping. -+ * -+ * @note Do not use it to work out how many valid elements are in the -+ * group[] member. Use num_groups instead. -+ */ -+ u32 num_core_groups; ++ if (is_resume && kbdev->pm.backend.callback_power_resume) { ++ kbdev->pm.backend.callback_power_resume(kbdev); ++ return; ++ } else if (kbdev->pm.backend.callback_power_on) { ++ kbdev->pm.backend.callback_power_on(kbdev); ++ /* If your platform properly keeps the GPU state you may use the ++ * return value of the callback_power_on function to ++ * conditionally reset the GPU on power up. Currently we are ++ * conservative and always reset the GPU. */ ++ reset_required = true; ++ } + -+ /** -+ * Coherency features of the memory, accessed by @ref gpu_mem_features -+ * methods -+ */ -+ u32 coherency; ++ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); ++ kbdev->pm.backend.gpu_powered = true; ++ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + -+ u32 padding; ++ if (reset_required) { ++ /* GPU state was lost, reset GPU to ensure it is in a ++ * consistent state */ ++ kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS); ++ } + -+ /** -+ * Descriptors of coherent groups -+ */ -+ struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS]; -+}; ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_ctx_sched_restore_all_as(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); + -+/** -+ * A complete description of the GPU's Hardware Configuration Discovery -+ * registers. -+ * -+ * The information is presented inefficiently for access. For frequent access, -+ * the values should be better expressed in an unpacked form in the -+ * base_gpu_props structure. -+ * -+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to -+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device -+ * behaving differently?". In this case, all information about the -+ * configuration is potentially useful, but it does not need to be processed -+ * by the driver. Instead, the raw registers can be processed by the Mali -+ * Tools software on the host PC. -+ * -+ */ -+struct gpu_raw_gpu_props { -+ u64 shader_present; -+ u64 tiler_present; -+ u64 l2_present; -+ u64 stack_present; ++ /* Lastly, enable the interrupts */ ++ kbase_pm_enable_interrupts(kbdev); ++} + -+ u32 l2_features; -+ u32 suspend_size; /* API 8.2+ */ -+ u32 mem_features; -+ u32 mmu_features; ++KBASE_EXPORT_TEST_API(kbase_pm_clock_on); + -+ u32 as_present; ++bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend) ++{ ++ unsigned long flags; + -+ u32 js_present; -+ u32 js_features[GPU_MAX_JOB_SLOTS]; -+ u32 tiler_features; -+ u32 texture_features[3]; ++ KBASE_DEBUG_ASSERT(NULL != kbdev); ++ lockdep_assert_held(&kbdev->pm.lock); + -+ u32 gpu_id; ++ /* ASSERT that the cores should now be unavailable. No lock needed. */ ++ KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u); + -+ u32 thread_max_threads; -+ u32 thread_max_workgroup_size; -+ u32 thread_max_barrier_size; -+ u32 thread_features; ++ kbdev->poweroff_pending = true; + -+ /* -+ * Note: This is the _selected_ coherency mode rather than the -+ * available modes as exposed in the coherency_features register. -+ */ -+ u32 coherency_mode; -+}; ++ if (!kbdev->pm.backend.gpu_powered) { ++ /* Already turned off */ ++ if (is_suspend && kbdev->pm.backend.callback_power_suspend) ++ kbdev->pm.backend.callback_power_suspend(kbdev); ++ return true; ++ } + -+/** -+ * Return structure for _mali_base_get_gpu_props(). -+ * -+ * NOTE: the raw_props member in this data structure contains the register -+ * values from which the value of the other members are derived. The derived -+ * members exist to allow for efficient access and/or shielding the details -+ * of the layout of the registers. -+ * -+ */ -+typedef struct mali_base_gpu_props { -+ struct mali_base_gpu_core_props core_props; -+ struct mali_base_gpu_l2_cache_props l2_props; -+ u64 unused_1; /* keep for backwards compatibility */ -+ struct mali_base_gpu_tiler_props tiler_props; -+ struct mali_base_gpu_thread_props thread_props; ++ KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u); + -+ /** This member is large, likely to be 128 bytes */ -+ struct gpu_raw_gpu_props raw_props; ++ /* Disable interrupts. This also clears any outstanding interrupts */ ++ kbase_pm_disable_interrupts(kbdev); ++ /* Ensure that any IRQ handlers have finished */ ++ kbase_synchronize_irqs(kbdev); + -+ /** This must be last member of the structure */ -+ struct mali_base_gpu_coherent_group_info coherency_info; -+} base_gpu_props; ++ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + -+/** @} end group base_user_api_gpuprops_dyn */ ++ if (atomic_read(&kbdev->faults_pending)) { ++ /* Page/bus faults are still being processed. The GPU can not ++ * be powered off until they have completed */ ++ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, ++ flags); ++ return false; ++ } + -+/** @} end group base_user_api_gpuprops */ ++ kbase_pm_cache_snoop_disable(kbdev); + -+/** -+ * @addtogroup base_user_api_core User-side Base core APIs -+ * @{ -+ */ ++ /* The GPU power may be turned off from this point */ ++ kbdev->pm.backend.gpu_powered = false; ++ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + -+/** -+ * \enum base_context_create_flags -+ * -+ * Flags to pass to ::base_context_init. -+ * Flags can be ORed together to enable multiple things. -+ * -+ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must -+ * not collide with them. -+ */ -+enum base_context_create_flags { -+ /** No flags set */ -+ BASE_CONTEXT_CREATE_FLAG_NONE = 0, ++ if (is_suspend && kbdev->pm.backend.callback_power_suspend) ++ kbdev->pm.backend.callback_power_suspend(kbdev); ++ else if (kbdev->pm.backend.callback_power_off) ++ kbdev->pm.backend.callback_power_off(kbdev); ++ return true; ++} + -+ /** Base context is embedded in a cctx object (flag used for CINSTR software counter macros) */ -+ BASE_CONTEXT_CCTX_EMBEDDED = (1u << 0), ++KBASE_EXPORT_TEST_API(kbase_pm_clock_off); + -+ /** Base context is a 'System Monitor' context for Hardware counters. -+ * -+ * One important side effect of this is that job submission is disabled. */ -+ BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1) ++struct kbasep_reset_timeout_data { ++ struct hrtimer timer; ++ bool timed_out; ++ struct kbase_device *kbdev; +}; + -+/** -+ * Bitpattern describing the ::base_context_create_flags that can be passed to base_context_init() -+ */ -+#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \ -+ (((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \ -+ ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)) ++void kbase_pm_reset_done(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ kbdev->pm.backend.reset_done = true; ++ wake_up(&kbdev->pm.backend.reset_done_wait); ++} + +/** -+ * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel -+ */ -+#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \ -+ ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) -+ -+/* -+ * Private flags used on the base context ++ * kbase_pm_wait_for_reset - Wait for a reset to happen + * -+ * These start at bit 31, and run down to zero. ++ * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state. + * -+ * They share the same space as @ref base_context_create_flags, and so must -+ * not collide with them. ++ * @kbdev: Kbase device + */ -+/** Private flag tracking whether job descriptor dumping is disabled */ -+#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED ((u32)(1 << 31)) ++static void kbase_pm_wait_for_reset(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->pm.lock); + -+/** @} end group base_user_api_core */ ++ wait_event(kbdev->pm.backend.reset_done_wait, ++ (kbdev->pm.backend.reset_done)); ++ kbdev->pm.backend.reset_done = false; ++} + -+/** @} end group base_user_api */ ++KBASE_EXPORT_TEST_API(kbase_pm_reset_done); + -+/** -+ * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties -+ * @{ -+ * -+ * C Pre-processor macros are exposed here to do with Platform -+ * Config. -+ * -+ * These include: -+ * - GPU Properties that are constant on a particular Midgard Family -+ * Implementation e.g. Maximum samples per pixel on Mali-T600. -+ * - General platform config for the GPU, such as the GPU major and minor -+ * revison. -+ */ ++static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) ++{ ++ struct kbasep_reset_timeout_data *rtdata = ++ container_of(timer, struct kbasep_reset_timeout_data, timer); + -+/** @} end group base_plat_config_gpuprops */ ++ rtdata->timed_out = 1; + -+/** -+ * @addtogroup base_api Base APIs -+ * @{ -+ */ ++ /* Set the wait queue to wake up kbase_pm_init_hw even though the reset ++ * hasn't completed */ ++ kbase_pm_reset_done(rtdata->kbdev); + -+/** -+ * @brief The payload for a replay job. This must be in GPU memory. -+ */ -+typedef struct base_jd_replay_payload { -+ /** -+ * Pointer to the first entry in the base_jd_replay_jc list. These -+ * will be replayed in @b reverse order (so that extra ones can be added -+ * to the head in future soft jobs without affecting this soft job) -+ */ -+ u64 tiler_jc_list; ++ return HRTIMER_NORESTART; ++} + -+ /** -+ * Pointer to the fragment job chain. -+ */ -+ u64 fragment_jc; ++static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) ++{ ++ struct device_node *np = kbdev->dev->of_node; ++ u32 jm_values[4]; ++ const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; ++ const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> ++ GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++ const u32 major = (gpu_id & GPU_ID_VERSION_MAJOR) >> ++ GPU_ID_VERSION_MAJOR_SHIFT; + -+ /** -+ * Pointer to the tiler heap free FBD field to be modified. -+ */ -+ u64 tiler_heap_free; ++ kbdev->hw_quirks_sc = 0; + -+ /** -+ * Hierarchy mask for the replayed fragment jobs. May be zero. -+ */ -+ u16 fragment_hierarchy_mask; ++ /* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443. ++ * and needed due to MIDGLES-3539. See PRLAM-11035 */ ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) || ++ kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035)) ++ kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE; + -+ /** -+ * Hierarchy mask for the replayed tiler jobs. May be zero. ++ /* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327. + */ -+ u16 tiler_hierarchy_mask; ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327)) ++ kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD; + -+ /** -+ * Default weight to be used for hierarchy levels not in the original -+ * mask. -+ */ -+ u32 hierarchy_default_weight; ++#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY ++ /* Enable alternative hardware counter selection if configured. */ ++ if (!GPU_ID_IS_NEW_FORMAT(prod_id)) ++ kbdev->hw_quirks_sc |= SC_ALT_COUNTERS; ++#endif + -+ /** -+ * Core requirements for the tiler job chain -+ */ -+ base_jd_core_req tiler_core_req; ++ /* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */ ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797)) ++ kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS; + -+ /** -+ * Core requirements for the fragment job chain -+ */ -+ base_jd_core_req fragment_core_req; -+} base_jd_replay_payload; ++ if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) { ++ if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */ ++ kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE; ++ else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */ ++ kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES; ++ } + -+#ifdef BASE_LEGACY_UK10_2_SUPPORT -+typedef struct base_jd_replay_payload_uk10_2 { -+ u64 tiler_jc_list; -+ u64 fragment_jc; -+ u64 tiler_heap_free; -+ u16 fragment_hierarchy_mask; -+ u16 tiler_hierarchy_mask; -+ u32 hierarchy_default_weight; -+ u16 tiler_core_req; -+ u16 fragment_core_req; -+ u8 padding[4]; -+} base_jd_replay_payload_uk10_2; -+#endif /* BASE_LEGACY_UK10_2_SUPPORT */ ++ if (!kbdev->hw_quirks_sc) ++ kbdev->hw_quirks_sc = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(SHADER_CONFIG), NULL); + -+/** -+ * @brief An entry in the linked list of job chains to be replayed. This must -+ * be in GPU memory. -+ */ -+typedef struct base_jd_replay_jc { -+ /** -+ * Pointer to next entry in the list. A setting of NULL indicates the -+ * end of the list. -+ */ -+ u64 next; ++ kbdev->hw_quirks_tiler = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(TILER_CONFIG), NULL); + -+ /** -+ * Pointer to the job chain. -+ */ -+ u64 jc; ++ /* Set tiler clock gate override if required */ ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953)) ++ kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE; + -+} base_jd_replay_jc; ++ /* Limit the GPU bus bandwidth if the platform needs this. */ ++ kbdev->hw_quirks_mmu = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(L2_MMU_CONFIG), NULL); + -+/* Maximum number of jobs allowed in a fragment chain in the payload of a -+ * replay job */ -+#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256 ++ /* Limit read ID width for AXI */ ++ kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS); ++ kbdev->hw_quirks_mmu |= (DEFAULT_ARID_LIMIT & 0x3) << ++ L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT; + -+/** @} end group base_api */ ++ /* Limit write ID width for AXI */ ++ kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES); ++ kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) << ++ L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT; + -+typedef struct base_profiling_controls { -+ u32 profiling_controls[FBDUMP_CONTROL_MAX]; -+} base_profiling_controls; ++ if (kbdev->system_coherency == COHERENCY_ACE) { ++ /* Allow memory configuration disparity to be ignored, we ++ * optimize the use of shared memory and thus we expect ++ * some disparity in the memory configuration */ ++ kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY; ++ } + -+/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, -+ * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */ -+#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) ++ kbdev->hw_quirks_jm = 0; ++ /* Only for T86x/T88x-based products after r2p0 */ ++ if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) { + -+/* Indicate that job dumping is enabled. This could affect certain timers -+ * to account for the performance impact. */ -+#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) ++ if (of_property_read_u32_array(np, ++ "jm_config", ++ &jm_values[0], ++ ARRAY_SIZE(jm_values))) { ++ /* Entry not in device tree, use defaults */ ++ jm_values[0] = 0; ++ jm_values[1] = 0; ++ jm_values[2] = 0; ++ jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT; ++ } + -+#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ -+ BASE_TLSTREAM_JOB_DUMPING_ENABLED) ++ /* Limit throttle limit to 6 bits*/ ++ if (jm_values[3] > JM_MAX_JOB_THROTTLE_LIMIT) { ++ dev_dbg(kbdev->dev, "JOB_THROTTLE_LIMIT supplied in device tree is too large. Limiting to MAX (63)."); ++ jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT; ++ } + -+#endif /* _BASE_KERNEL_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/drivers/gpu/arm/midgard/mali_base_mem_priv.h -new file mode 100644 -index 000000000..4a98a72cc ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_base_mem_priv.h -@@ -0,0 +1,52 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ /* Aggregate to one integer. */ ++ kbdev->hw_quirks_jm |= (jm_values[0] ? ++ JM_TIMESTAMP_OVERRIDE : 0); ++ kbdev->hw_quirks_jm |= (jm_values[1] ? ++ JM_CLOCK_GATE_OVERRIDE : 0); ++ kbdev->hw_quirks_jm |= (jm_values[2] ? ++ JM_JOB_THROTTLE_ENABLE : 0); ++ kbdev->hw_quirks_jm |= (jm_values[3] << ++ JM_JOB_THROTTLE_LIMIT_SHIFT); + ++ } else if (GPU_ID_IS_NEW_FORMAT(prod_id) && ++ (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == ++ GPU_ID2_PRODUCT_TMIX)) { ++ /* Only for tMIx */ ++ u32 coherency_features; + ++ coherency_features = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(COHERENCY_FEATURES), NULL); + ++ /* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly ++ * documented for tMIx so force correct value here. ++ */ ++ if (coherency_features == ++ COHERENCY_FEATURE_BIT(COHERENCY_ACE)) { ++ kbdev->hw_quirks_jm |= ++ (COHERENCY_ACE_LITE | COHERENCY_ACE) << ++ JM_FORCE_COHERENCY_FEATURES_SHIFT; ++ } ++ } + ++ if (!kbdev->hw_quirks_jm) ++ kbdev->hw_quirks_jm = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(JM_CONFIG), NULL); + -+#ifndef _BASE_MEM_PRIV_H_ -+#define _BASE_MEM_PRIV_H_ ++#ifdef CONFIG_MALI_CORESTACK ++#define MANUAL_POWER_CONTROL ((u32)(1 << 8)) ++ kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL; ++#endif /* CONFIG_MALI_CORESTACK */ ++} + -+#define BASE_SYNCSET_OP_MSYNC (1U << 0) -+#define BASE_SYNCSET_OP_CSYNC (1U << 1) ++static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) ++{ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), ++ kbdev->hw_quirks_sc, NULL); + -+/* -+ * This structure describe a basic memory coherency operation. -+ * It can either be: -+ * @li a sync from CPU to Memory: -+ * - type = ::BASE_SYNCSET_OP_MSYNC -+ * - mem_handle = a handle to the memory object on which the operation -+ * is taking place -+ * - user_addr = the address of the range to be synced -+ * - size = the amount of data to be synced, in bytes -+ * - offset is ignored. -+ * @li a sync from Memory to CPU: -+ * - type = ::BASE_SYNCSET_OP_CSYNC -+ * - mem_handle = a handle to the memory object on which the operation -+ * is taking place -+ * - user_addr = the address of the range to be synced -+ * - size = the amount of data to be synced, in bytes. -+ * - offset is ignored. -+ */ -+struct basep_syncset { -+ base_mem_handle mem_handle; -+ u64 user_addr; -+ u64 size; -+ u8 type; -+ u8 padding[7]; -+}; ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG), ++ kbdev->hw_quirks_tiler, NULL); + -+#endif -diff --git a/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h b/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h -new file mode 100644 -index 000000000..be454a216 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h -@@ -0,0 +1,24 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), ++ kbdev->hw_quirks_mmu, NULL); + ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG), ++ kbdev->hw_quirks_jm, NULL); + ++} + ++void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) ++{ ++ if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) && ++ !kbdev->cci_snoop_enabled) { ++#ifdef CONFIG_ARM64 ++ if (kbdev->snoop_enable_smc != 0) ++ kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0); ++#endif /* CONFIG_ARM64 */ ++ dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n"); ++ kbdev->cci_snoop_enabled = true; ++ } ++} + -+#ifndef _BASE_VENDOR_SPEC_FUNC_H_ -+#define _BASE_VENDOR_SPEC_FUNC_H_ ++void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) ++{ ++ if (kbdev->cci_snoop_enabled) { ++#ifdef CONFIG_ARM64 ++ if (kbdev->snoop_disable_smc != 0) { ++ mali_cci_flush_l2(kbdev); ++ kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0); ++ } ++#endif /* CONFIG_ARM64 */ ++ dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n"); ++ kbdev->cci_snoop_enabled = false; ++ } ++} + -+int kbase_get_vendor_specific_cpu_clock_speed(u32 * const); ++static int kbase_pm_do_reset(struct kbase_device *kbdev) ++{ ++ struct kbasep_reset_timeout_data rtdata; + -+#endif /*_BASE_VENDOR_SPEC_FUNC_H_*/ -diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h -new file mode 100644 -index 000000000..0d9bf23dc ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase.h -@@ -0,0 +1,612 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0); + ++ KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev); + ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_SOFT_RESET, NULL); + ++ /* Unmask the reset complete interrupt only */ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED, ++ NULL); + ++ /* Initialize a structure for tracking the status of the reset */ ++ rtdata.kbdev = kbdev; ++ rtdata.timed_out = 0; + -+#ifndef _KBASE_H_ -+#define _KBASE_H_ ++ /* Create a timer to use as a timeout on the reset */ ++ hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ rtdata.timer.function = kbasep_reset_timeout; + -+#include ++ hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), ++ HRTIMER_MODE_REL); + -+#include ++ /* Wait for the RESET_COMPLETED interrupt to be raised */ ++ kbase_pm_wait_for_reset(kbdev); + -+#include ++ if (rtdata.timed_out == 0) { ++ /* GPU has been reset */ ++ hrtimer_cancel(&rtdata.timer); ++ destroy_hrtimer_on_stack(&rtdata.timer); ++ return 0; ++ } + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ /* No interrupt has been received - check if the RAWSTAT register says ++ * the reset has completed */ ++ if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) & ++ RESET_COMPLETED) { ++ /* The interrupt is set in the RAWSTAT; this suggests that the ++ * interrupts are not getting to the CPU */ ++ dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n"); ++ /* If interrupts aren't working we can't continue. */ ++ destroy_hrtimer_on_stack(&rtdata.timer); ++ return -EINVAL; ++ } + -+#include "mali_base_kernel.h" -+#include -+#include ++ /* The GPU doesn't seem to be responding to the reset so try a hard ++ * reset */ ++ dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", ++ RESET_TIMEOUT); ++ KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_HARD_RESET, NULL); + -+/* -+ * Include mali_kbase_defs.h first as this provides types needed by other local -+ * header files. -+ */ -+#include "mali_kbase_defs.h" ++ /* Restart the timer to wait for the hard reset to complete */ ++ rtdata.timed_out = 0; + -+#include "mali_kbase_context.h" -+#include "mali_kbase_strings.h" -+#include "mali_kbase_mem_lowlevel.h" -+#include "mali_kbase_trace_timeline.h" -+#include "mali_kbase_js.h" -+#include "mali_kbase_mem.h" -+#include "mali_kbase_utility.h" -+#include "mali_kbase_gpu_memory_debugfs.h" -+#include "mali_kbase_mem_profile_debugfs.h" -+#include "mali_kbase_debug_job_fault.h" -+#include "mali_kbase_jd_debugfs.h" -+#include "mali_kbase_gpuprops.h" -+#include "mali_kbase_jm.h" -+#include "mali_kbase_vinstr.h" ++ hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), ++ HRTIMER_MODE_REL); + -+#include "ipa/mali_kbase_ipa.h" ++ /* Wait for the RESET_COMPLETED interrupt to be raised */ ++ kbase_pm_wait_for_reset(kbdev); + -+#ifdef CONFIG_GPU_TRACEPOINTS -+#include -+#endif -+/** -+ * @page page_base_kernel_main Kernel-side Base (KBase) APIs -+ */ ++ if (rtdata.timed_out == 0) { ++ /* GPU has been reset */ ++ hrtimer_cancel(&rtdata.timer); ++ destroy_hrtimer_on_stack(&rtdata.timer); ++ return 0; ++ } + -+/** -+ * @defgroup base_kbase_api Kernel-side Base (KBase) APIs -+ */ ++ destroy_hrtimer_on_stack(&rtdata.timer); + -+struct kbase_device *kbase_device_alloc(void); -+/* -+* note: configuration attributes member of kbdev needs to have -+* been setup before calling kbase_device_init -+*/ ++ dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", ++ RESET_TIMEOUT); + -+/* -+* API to acquire device list semaphore and return pointer -+* to the device list head -+*/ -+const struct list_head *kbase_dev_list_get(void); -+/* API to release the device list semaphore */ -+void kbase_dev_list_put(const struct list_head *dev_list); ++ return -EINVAL; ++} + -+int kbase_device_init(struct kbase_device * const kbdev); -+void kbase_device_term(struct kbase_device *kbdev); -+void kbase_device_free(struct kbase_device *kbdev); -+int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature); ++static int kbasep_protected_mode_enable(struct protected_mode_device *pdev) ++{ ++ struct kbase_device *kbdev = pdev->data; + -+/* Needed for gator integration and for reporting vsync information */ -+struct kbase_device *kbase_find_device(int minor); -+void kbase_release_device(struct kbase_device *kbdev); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_SET_PROTECTED_MODE, NULL); ++ return 0; ++} + -+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value); ++static int kbasep_protected_mode_disable(struct protected_mode_device *pdev) ++{ ++ struct kbase_device *kbdev = pdev->data; + -+struct kbase_context * -+kbase_create_context(struct kbase_device *kbdev, bool is_compat); -+void kbase_destroy_context(struct kbase_context *kctx); ++ lockdep_assert_held(&kbdev->pm.lock); + -+int kbase_jd_init(struct kbase_context *kctx); -+void kbase_jd_exit(struct kbase_context *kctx); ++ return kbase_pm_do_reset(kbdev); ++} + -+/** -+ * kbase_jd_submit - Submit atoms to the job dispatcher -+ * -+ * @kctx: The kbase context to submit to -+ * @user_addr: The address in user space of the struct base_jd_atom_v2 array -+ * @nr_atoms: The number of atoms in the array -+ * @stride: sizeof(struct base_jd_atom_v2) -+ * @uk6_atom: true if the atoms are legacy atoms (struct base_jd_atom_v2_uk6) -+ * -+ * Return: 0 on success or error code -+ */ -+int kbase_jd_submit(struct kbase_context *kctx, -+ void __user *user_addr, u32 nr_atoms, u32 stride, -+ bool uk6_atom); ++struct protected_mode_ops kbase_native_protected_ops = { ++ .protected_mode_enable = kbasep_protected_mode_enable, ++ .protected_mode_disable = kbasep_protected_mode_disable ++}; ++ ++int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) ++{ ++ unsigned long irq_flags; ++ int err; ++ bool resume_vinstr = false; ++ ++ KBASE_DEBUG_ASSERT(NULL != kbdev); ++ lockdep_assert_held(&kbdev->pm.lock); ++ ++ /* Ensure the clock is on before attempting to access the hardware */ ++ if (!kbdev->pm.backend.gpu_powered) { ++ if (kbdev->pm.backend.callback_power_on) ++ kbdev->pm.backend.callback_power_on(kbdev); ++ ++ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, ++ irq_flags); ++ kbdev->pm.backend.gpu_powered = true; ++ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, ++ irq_flags); ++ } ++ ++ /* Ensure interrupts are off to begin with, this also clears any ++ * outstanding interrupts */ ++ kbase_pm_disable_interrupts(kbdev); ++ /* Ensure cache snoops are disabled before reset. */ ++ kbase_pm_cache_snoop_disable(kbdev); ++ /* Prepare for the soft-reset */ ++ kbdev->pm.backend.reset_done = false; ++ ++ /* The cores should be made unavailable due to the reset */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); ++ if (kbdev->shader_available_bitmap != 0u) ++ KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, ++ NULL, 0u, (u32)0u); ++ if (kbdev->tiler_available_bitmap != 0u) ++ KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, ++ NULL, NULL, 0u, (u32)0u); ++ kbdev->shader_available_bitmap = 0u; ++ kbdev->tiler_available_bitmap = 0u; ++ kbdev->l2_available_bitmap = 0u; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++ ++ /* Soft reset the GPU */ ++ if (kbdev->protected_mode_support) ++ err = kbdev->protected_ops->protected_mode_disable( ++ kbdev->protected_dev); ++ else ++ err = kbase_pm_do_reset(kbdev); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); ++ if (kbdev->protected_mode) ++ resume_vinstr = true; ++ kbdev->protected_mode = false; ++ kbase_ipa_model_use_configured_locked(kbdev); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++ ++ if (err) ++ goto exit; ++ ++ if (flags & PM_HW_ISSUES_DETECT) ++ kbase_pm_hw_issues_detect(kbdev); ++ ++ kbase_pm_hw_issues_apply(kbdev); ++ kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); ++ ++ /* Sanity check protected mode was left after reset */ ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { ++ u32 gpu_status = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_STATUS), NULL); ++ ++ WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE); ++ } ++ ++ /* If cycle counter was in use re-enable it, enable_irqs will only be ++ * false when called from kbase_pm_powerup */ ++ if (kbdev->pm.backend.gpu_cycle_counter_requests && ++ (flags & PM_ENABLE_IRQS)) { ++ /* enable interrupts as the L2 may have to be powered on */ ++ kbase_pm_enable_interrupts(kbdev); ++ kbase_pm_request_l2_caches(kbdev); ++ ++ /* Re-enable the counters if we need to */ ++ spin_lock_irqsave( ++ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, ++ irq_flags); ++ if (kbdev->pm.backend.gpu_cycle_counter_requests) ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CYCLE_COUNT_START, NULL); ++ spin_unlock_irqrestore( ++ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, ++ irq_flags); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); ++ kbase_pm_release_l2_caches(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++ ++ kbase_pm_disable_interrupts(kbdev); ++ } ++ ++ if (flags & PM_ENABLE_IRQS) ++ kbase_pm_enable_interrupts(kbdev); ++ ++exit: ++ /* If GPU is leaving protected mode resume vinstr operation. */ ++ if (kbdev->vinstr_ctx && resume_vinstr) ++ kbase_vinstr_resume(kbdev->vinstr_ctx); ++ ++ return err; ++} + +/** -+ * kbase_jd_done_worker - Handle a job completion -+ * @data: a &struct work_struct -+ * -+ * This function requeues the job from the runpool (if it was soft-stopped or -+ * removed from NEXT registers). ++ * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters + * -+ * Removes it from the system if it finished/failed/was cancelled. ++ * Increase the count of cycle counter users and turn the cycle counters on if ++ * they were previously off + * -+ * Resolves dependencies to add dependent jobs to the context, potentially -+ * starting them if necessary (which may add more references to the context) ++ * This function is designed to be called by ++ * kbase_pm_request_gpu_cycle_counter() or ++ * kbase_pm_request_gpu_cycle_counter_l2_is_on() only + * -+ * Releases the reference to the context from the no-longer-running job. ++ * When this function is called the l2 cache must be on and the l2 cache users ++ * count must have been incremented by a call to ( ++ * kbase_pm_request_l2_caches() or kbase_pm_request_l2_caches_l2_on() ) + * -+ * Handles retrying submission outside of IRQ context if it failed from within -+ * IRQ context. ++ * @kbdev: The kbase device structure of the device + */ -+void kbase_jd_done_worker(struct work_struct *data); ++static void ++kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) ++{ ++ unsigned long flags; + -+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp, -+ kbasep_js_atom_done_code done_code); -+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom); -+void kbase_jd_zap_context(struct kbase_context *kctx); -+bool jd_done_nolock(struct kbase_jd_atom *katom, -+ struct list_head *completed_jobs_ctx); -+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom); -+bool jd_submit_atom(struct kbase_context *kctx, -+ const struct base_jd_atom_v2 *user_atom, -+ struct kbase_jd_atom *katom); -+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom); ++ spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, ++ flags); + -+void kbase_job_done(struct kbase_device *kbdev, u32 done); ++ ++kbdev->pm.backend.gpu_cycle_counter_requests; + -+/** -+ * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms -+ * and soft stop them -+ * @kctx: Pointer to context to check. -+ * @katom: Pointer to priority atom. ++ if (1 == kbdev->pm.backend.gpu_cycle_counter_requests) ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CYCLE_COUNT_START, NULL); ++ ++ spin_unlock_irqrestore( ++ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, ++ flags); ++} ++ ++void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ ++ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); ++ ++ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < ++ INT_MAX); ++ ++ kbase_pm_request_l2_caches(kbdev); ++ ++ kbase_pm_request_gpu_cycle_counter_do_request(kbdev); ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter); ++ ++void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ ++ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); ++ ++ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < ++ INT_MAX); ++ ++ kbase_pm_request_l2_caches_l2_is_on(kbdev); ++ ++ kbase_pm_request_gpu_cycle_counter_do_request(kbdev); ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on); ++ ++void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, ++ flags); ++ ++ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0); ++ ++ --kbdev->pm.backend.gpu_cycle_counter_requests; ++ ++ if (0 == kbdev->pm.backend.gpu_cycle_counter_requests) ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CYCLE_COUNT_STOP, NULL); ++ ++ spin_unlock_irqrestore( ++ &kbdev->pm.backend.gpu_cycle_counter_requests_lock, ++ flags); ++ ++ kbase_pm_release_l2_caches(kbdev); ++} ++ ++void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ kbase_pm_release_gpu_cycle_counter_nolock(kbdev); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter); +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h +new file mode 100644 +index 000000000..6804f45ac +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h +@@ -0,0 +1,548 @@ ++/* + * -+ * Atoms from @kctx on the same job slot as @katom, which have lower priority -+ * than @katom will be soft stopped and put back in the queue, so that atoms -+ * with higher priority can run. ++ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * The hwaccess_lock must be held when calling this function. + */ -+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, -+ struct kbase_jd_atom *katom); + -+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, -+ struct kbase_jd_atom *target_katom); -+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, -+ struct kbase_jd_atom *target_katom, u32 sw_flags); -+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, -+ struct kbase_jd_atom *target_katom); -+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, -+ base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom); -+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, -+ struct kbase_jd_atom *target_katom); + -+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event); -+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent); -+int kbase_event_pending(struct kbase_context *ctx); -+int kbase_event_init(struct kbase_context *kctx); -+void kbase_event_close(struct kbase_context *kctx); -+void kbase_event_cleanup(struct kbase_context *kctx); -+void kbase_event_wakeup(struct kbase_context *kctx); + -+int kbase_process_soft_job(struct kbase_jd_atom *katom); -+int kbase_prepare_soft_job(struct kbase_jd_atom *katom); -+void kbase_finish_soft_job(struct kbase_jd_atom *katom); -+void kbase_cancel_soft_job(struct kbase_jd_atom *katom); -+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev); -+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom); -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom); -+#endif -+int kbase_soft_event_update(struct kbase_context *kctx, -+ u64 event, -+ unsigned char new_status); + -+bool kbase_replay_process(struct kbase_jd_atom *katom); + -+void kbasep_soft_job_timeout_worker(struct timer_list *t); -+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt); ++/* ++ * Power management API definitions used internally by GPU backend ++ */ + -+/* api used internally for register access. Contains validation and tracing */ -+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value); -+int kbase_device_trace_buffer_install( -+ struct kbase_context *kctx, u32 *tb, size_t size); -+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx); ++#ifndef _KBASE_BACKEND_PM_INTERNAL_H_ ++#define _KBASE_BACKEND_PM_INTERNAL_H_ + -+/* api to be ported per OS, only need to do the raw register access */ -+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value); -+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset); ++#include + -+void kbasep_as_do_poke(struct work_struct *work); ++#include "mali_kbase_pm_ca.h" ++#include "mali_kbase_pm_policy.h" + -+/** Returns the name associated with a Mali exception code ++ ++/** ++ * kbase_pm_dev_idle - The GPU is idle. + * -+ * This function is called from the interrupt handler when a GPU fault occurs. -+ * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN. ++ * The OS may choose to turn off idle devices + * -+ * @param[in] kbdev The kbase device that the GPU fault occurred from. -+ * @param[in] exception_code exception code -+ * @return name associated with the exception code ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ -+const char *kbase_exception_name(struct kbase_device *kbdev, -+ u32 exception_code); ++void kbase_pm_dev_idle(struct kbase_device *kbdev); + +/** -+ * Check whether a system suspend is in progress, or has already been suspended ++ * kbase_pm_dev_activate - The GPU is active. + * -+ * The caller should ensure that either kbdev->pm.active_count_lock is held, or -+ * a dmb was executed recently (to ensure the value is most -+ * up-to-date). However, without a lock the value could change afterwards. ++ * The OS should avoid opportunistically turning off the GPU while it is active + * -+ * @return false if a suspend is not in progress -+ * @return !=false otherwise ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ -+static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev) -+{ -+ return kbdev->pm.suspending; -+} ++void kbase_pm_dev_activate(struct kbase_device *kbdev); + +/** -+ * Return the atom's ID, as was originally supplied by userspace in -+ * base_jd_atom_v2::atom_number ++ * kbase_pm_get_present_cores - Get details of the cores that are present in ++ * the device. ++ * ++ * This function can be called by the active power policy to return a bitmask of ++ * the cores (of a specified type) present in the GPU device and also a count of ++ * the number of cores. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid ++ * pointer) ++ * @type: The type of core (see the enum kbase_pm_core_type enumeration) ++ * ++ * Return: The bit mask of cores present + */ -+static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom) -+{ -+ int result; -+ -+ KBASE_DEBUG_ASSERT(kctx); -+ KBASE_DEBUG_ASSERT(katom); -+ KBASE_DEBUG_ASSERT(katom->kctx == kctx); -+ -+ result = katom - &kctx->jctx.atoms[0]; -+ KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT); -+ return result; -+} ++u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, ++ enum kbase_pm_core_type type); + +/** -+ * kbase_jd_atom_from_id - Return the atom structure for the given atom ID -+ * @kctx: Context pointer -+ * @id: ID of atom to retrieve ++ * kbase_pm_get_active_cores - Get details of the cores that are currently ++ * active in the device. + * -+ * Return: Pointer to struct kbase_jd_atom associated with the supplied ID ++ * This function can be called by the active power policy to return a bitmask of ++ * the cores (of a specified type) that are actively processing work (i.e. ++ * turned on *and* busy). ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @type: The type of core (see the enum kbase_pm_core_type enumeration) ++ * ++ * Return: The bit mask of active cores + */ -+static inline struct kbase_jd_atom *kbase_jd_atom_from_id( -+ struct kbase_context *kctx, int id) -+{ -+ return &kctx->jctx.atoms[id]; -+} ++u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, ++ enum kbase_pm_core_type type); + +/** -+ * Initialize the disjoint state ++ * kbase_pm_get_trans_cores - Get details of the cores that are currently ++ * transitioning between power states. + * -+ * The disjoint event count and state are both set to zero. ++ * This function can be called by the active power policy to return a bitmask of ++ * the cores (of a specified type) that are currently transitioning between ++ * power states. + * -+ * Disjoint functions usage: ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * -+ * The disjoint event count should be incremented whenever a disjoint event occurs. ++ * Return: The bit mask of transitioning cores ++ */ ++u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, ++ enum kbase_pm_core_type type); ++ ++/** ++ * kbase_pm_get_ready_cores - Get details of the cores that are currently ++ * powered and ready for jobs. + * -+ * There are several cases which are regarded as disjoint behavior. Rather than just increment -+ * the counter during disjoint events we also increment the counter when jobs may be affected -+ * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state. ++ * This function can be called by the active power policy to return a bitmask of ++ * the cores (of a specified type) that are powered and ready for jobs (they may ++ * or may not be currently executing jobs). + * -+ * Disjoint state is entered during GPU reset and for the entire time that an atom is replaying -+ * (as part of the replay workaround). Increasing the disjoint state also increases the count of -+ * disjoint events. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * -+ * The disjoint state is then used to increase the count of disjoint events during job submission -+ * and job completion. Any atom submitted or completed while the disjoint state is greater than -+ * zero is regarded as a disjoint event. ++ * Return: The bit mask of ready cores ++ */ ++u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, ++ enum kbase_pm_core_type type); ++ ++/** ++ * kbase_pm_clock_on - Turn the clock for the device on, and enable device ++ * interrupts. + * -+ * The disjoint event counter is also incremented immediately whenever a job is soft stopped -+ * and during context creation. ++ * This function can be used by a power policy to turn the clock for the GPU on. ++ * It should be modified during integration to perform the necessary actions to ++ * ensure that the GPU is fully powered and clocked. + * -+ * @param kbdev The kbase device ++ * @kbdev: The kbase device structure for the device (must be a valid ++ * pointer) ++ * @is_resume: true if clock on due to resume after suspend, false otherwise + */ -+void kbase_disjoint_init(struct kbase_device *kbdev); ++void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume); + +/** -+ * Increase the count of disjoint events -+ * called when a disjoint event has happened ++ * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the ++ * device off. + * -+ * @param kbdev The kbase device ++ * This function can be used by a power policy to turn the clock for the GPU ++ * off. It should be modified during integration to perform the necessary ++ * actions to turn the clock off (if this is possible in the integration). ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid ++ * pointer) ++ * @is_suspend: true if clock off due to suspend, false otherwise ++ * ++ * Return: true if clock was turned off, or ++ * false if clock can not be turned off due to pending page/bus fault ++ * workers. Caller must flush MMU workqueues and retry + */ -+void kbase_disjoint_event(struct kbase_device *kbdev); ++bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend); + +/** -+ * Increase the count of disjoint events only if the GPU is in a disjoint state ++ * kbase_pm_enable_interrupts - Enable interrupts on the device. + * -+ * This should be called when something happens which could be disjoint if the GPU -+ * is in a disjoint state. The state refcount keeps track of this. ++ * Interrupts are also enabled after a call to kbase_pm_clock_on(). + * -+ * @param kbdev The kbase device ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ -+void kbase_disjoint_event_potential(struct kbase_device *kbdev); ++void kbase_pm_enable_interrupts(struct kbase_device *kbdev); + +/** -+ * Returns the count of disjoint events ++ * kbase_pm_disable_interrupts - Disable interrupts on the device. + * -+ * @param kbdev The kbase device -+ * @return the count of disjoint events ++ * This prevents delivery of Power Management interrupts to the CPU so that ++ * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler ++ * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called. ++ * ++ * Interrupts are also disabled after a call to kbase_pm_clock_off(). ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ -+u32 kbase_disjoint_event_get(struct kbase_device *kbdev); ++void kbase_pm_disable_interrupts(struct kbase_device *kbdev); + +/** -+ * Increment the refcount state indicating that the GPU is in a disjoint state. ++ * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts() ++ * that does not take the hwaccess_lock + * -+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event) -+ * eventually after the disjoint state has completed @ref kbase_disjoint_state_down -+ * should be called ++ * Caller must hold the hwaccess_lock. + * -+ * @param kbdev The kbase device ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ -+void kbase_disjoint_state_up(struct kbase_device *kbdev); ++void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev); + +/** -+ * Decrement the refcount state -+ * -+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event) ++ * kbase_pm_init_hw - Initialize the hardware. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @flags: Flags specifying the type of PM init + * -+ * Called after @ref kbase_disjoint_state_up once the disjoint state is over ++ * This function checks the GPU ID register to ensure that the GPU is supported ++ * by the driver and performs a reset on the device so that it is in a known ++ * state before the device is used. + * -+ * @param kbdev The kbase device ++ * Return: 0 if the device is supported and successfully reset. + */ -+void kbase_disjoint_state_down(struct kbase_device *kbdev); ++int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags); + +/** -+ * If a job is soft stopped and the number of contexts is >= this value -+ * it is reported as a disjoint event ++ * kbase_pm_reset_done - The GPU has been reset successfully. ++ * ++ * This function must be called by the GPU interrupt handler when the ++ * RESET_COMPLETED bit is set. It signals to the power management initialization ++ * code that the GPU has been successfully reset. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ -+#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2 -+ -+#if !defined(UINT64_MAX) -+ #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) -+#endif ++void kbase_pm_reset_done(struct kbase_device *kbdev); + -+#if KBASE_TRACE_ENABLE -+void kbasep_trace_debugfs_init(struct kbase_device *kbdev); + -+#ifndef CONFIG_MALI_SYSTEM_TRACE -+/** Add trace values about a job-slot ++/** ++ * kbase_pm_check_transitions_nolock - Check if there are any power transitions ++ * to make, and if so start them. + * -+ * @note Any functions called through this macro will still be evaluated in -+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any -+ * functions called to get the parameters supplied to this macro must: -+ * - be static or static inline -+ * - must just return 0 and have no other statements present in the body. ++ * This function will check the desired_xx_state members of ++ * struct kbase_pm_device_data and the actual status of the hardware to see if ++ * any power transitions can be made at this time to make the hardware state ++ * closer to the state desired by the power policy. ++ * ++ * The return value can be used to check whether all the desired cores are ++ * available, and so whether it's worth submitting a job (e.g. from a Power ++ * Management IRQ). ++ * ++ * Note that this still returns true when desired_xx_state has no ++ * cores. That is: of the no cores desired, none were *un*available. In ++ * this case, the caller may still need to try submitting jobs. This is because ++ * the Core Availability Policy might have taken us to an intermediate state ++ * where no cores are powered, before powering on more cores (e.g. for core ++ * rotation) ++ * ++ * The caller must hold kbase_device.pm.power_change_lock ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Return: non-zero when all desired cores are available. That is, ++ * it's worthwhile for the caller to submit a job. ++ * false otherwise + */ -+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \ -+ kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ -+ KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0) ++bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev); + -+/** Add trace values about a job-slot, with info ++/** ++ * kbase_pm_check_transitions_sync - Synchronous and locking variant of ++ * kbase_pm_check_transitions_nolock() + * -+ * @note Any functions called through this macro will still be evaluated in -+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any -+ * functions called to get the parameters supplied to this macro must: -+ * - be static or static inline -+ * - must just return 0 and have no other statements present in the body. ++ * On returning, the desired state at the time of the call will have been met. ++ * ++ * There is nothing to stop the core being switched off by calls to ++ * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the ++ * caller must have already made a call to ++ * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously. ++ * ++ * The usual use-case for this is to ensure cores are 'READY' after performing ++ * a GPU Reset. ++ * ++ * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold ++ * kbase_device.pm.power_change_lock, because this function will take that ++ * lock itself. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ -+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \ -+ kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ -+ KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val) ++void kbase_pm_check_transitions_sync(struct kbase_device *kbdev); + -+/** Add trace values about a ctx refcount ++/** ++ * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state() ++ * where the caller must hold ++ * kbase_device.pm.power_change_lock + * -+ * @note Any functions called through this macro will still be evaluated in -+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any -+ * functions called to get the parameters supplied to this macro must: -+ * - be static or static inline -+ * - must just return 0 and have no other statements present in the body. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ -+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \ -+ kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ -+ KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0) -+/** Add trace values about a ctx refcount, and info ++void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev); ++ ++/** ++ * kbase_pm_update_cores_state - Update the desired state of shader cores from ++ * the Power Policy, and begin any power ++ * transitions. + * -+ * @note Any functions called through this macro will still be evaluated in -+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any -+ * functions called to get the parameters supplied to this macro must: -+ * - be static or static inline -+ * - must just return 0 and have no other statements present in the body. ++ * This function will update the desired_xx_state members of ++ * struct kbase_pm_device_data by calling into the current Power Policy. It will ++ * then begin power transitions to make the hardware acheive the desired shader ++ * core state. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ -+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \ -+ kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ -+ KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val) ++void kbase_pm_update_cores_state(struct kbase_device *kbdev); + -+/** Add trace values (no slot or refcount) ++/** ++ * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off ++ * the GPU and/or shader cores. + * -+ * @note Any functions called through this macro will still be evaluated in -+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any -+ * functions called to get the parameters supplied to this macro must: -+ * - be static or static inline -+ * - must just return 0 and have no other statements present in the body. ++ * This should be called by any functions which directly power off the GPU. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ -+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val) \ -+ kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ -+ 0, 0, 0, info_val) -+ -+/** Clear the trace */ -+#define KBASE_TRACE_CLEAR(kbdev) \ -+ kbasep_trace_clear(kbdev) -+ -+/** Dump the slot trace */ -+#define KBASE_TRACE_DUMP(kbdev) \ -+ kbasep_trace_dump(kbdev) -+ -+/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */ -+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val); -+/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */ -+void kbasep_trace_clear(struct kbase_device *kbdev); -+#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */ -+/* Dispatch kbase trace events as system trace events */ -+#include -+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\ -+ trace_mali_##code(jobslot, 0) -+ -+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\ -+ trace_mali_##code(jobslot, info_val) -+ -+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\ -+ trace_mali_##code(refcount, 0) -+ -+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\ -+ trace_mali_##code(refcount, info_val) -+ -+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\ -+ trace_mali_##code(gpu_addr, info_val) -+ -+#define KBASE_TRACE_CLEAR(kbdev)\ -+ do {\ -+ CSTD_UNUSED(kbdev);\ -+ CSTD_NOP(0);\ -+ } while (0) -+#define KBASE_TRACE_DUMP(kbdev)\ -+ do {\ -+ CSTD_UNUSED(kbdev);\ -+ CSTD_NOP(0);\ -+ } while (0) -+ -+#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */ -+#else -+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\ -+ do {\ -+ CSTD_UNUSED(kbdev);\ -+ CSTD_NOP(code);\ -+ CSTD_UNUSED(ctx);\ -+ CSTD_UNUSED(katom);\ -+ CSTD_UNUSED(gpu_addr);\ -+ CSTD_UNUSED(jobslot);\ -+ } while (0) ++void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev); + -+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\ -+ do {\ -+ CSTD_UNUSED(kbdev);\ -+ CSTD_NOP(code);\ -+ CSTD_UNUSED(ctx);\ -+ CSTD_UNUSED(katom);\ -+ CSTD_UNUSED(gpu_addr);\ -+ CSTD_UNUSED(jobslot);\ -+ CSTD_UNUSED(info_val);\ -+ CSTD_NOP(0);\ -+ } while (0) ++/** ++ * kbasep_pm_init_core_use_bitmaps - Initialise data tracking the required ++ * and used cores. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ */ ++void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev); + -+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\ -+ do {\ -+ CSTD_UNUSED(kbdev);\ -+ CSTD_NOP(code);\ -+ CSTD_UNUSED(ctx);\ -+ CSTD_UNUSED(katom);\ -+ CSTD_UNUSED(gpu_addr);\ -+ CSTD_UNUSED(refcount);\ -+ CSTD_NOP(0);\ -+ } while (0) ++/** ++ * kbasep_pm_metrics_init - Initialize the metrics gathering framework. ++ * ++ * This must be called before other metric gathering APIs are called. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Return: 0 on success, error code on error ++ */ ++int kbasep_pm_metrics_init(struct kbase_device *kbdev); + -+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\ -+ do {\ -+ CSTD_UNUSED(kbdev);\ -+ CSTD_NOP(code);\ -+ CSTD_UNUSED(ctx);\ -+ CSTD_UNUSED(katom);\ -+ CSTD_UNUSED(gpu_addr);\ -+ CSTD_UNUSED(info_val);\ -+ CSTD_NOP(0);\ -+ } while (0) ++/** ++ * kbasep_pm_metrics_term - Terminate the metrics gathering framework. ++ * ++ * This must be called when metric gathering is no longer required. It is an ++ * error to call any metrics gathering function (other than ++ * kbasep_pm_metrics_init()) after calling this function. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ */ ++void kbasep_pm_metrics_term(struct kbase_device *kbdev); + -+#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\ -+ do {\ -+ CSTD_UNUSED(kbdev);\ -+ CSTD_NOP(code);\ -+ CSTD_UNUSED(subcode);\ -+ CSTD_UNUSED(ctx);\ -+ CSTD_UNUSED(katom);\ -+ CSTD_UNUSED(val);\ -+ CSTD_NOP(0);\ -+ } while (0) ++/** ++ * kbase_pm_report_vsync - Function to be called by the frame buffer driver to ++ * update the vsync metric. ++ * ++ * This function should be called by the frame buffer driver to update whether ++ * the system is hitting the vsync target or not. buffer_updated should be true ++ * if the vsync corresponded with a new frame being displayed, otherwise it ++ * should be false. This function does not need to be called every vsync, but ++ * only when the value of @buffer_updated differs from a previous call. ++ * ++ * @kbdev: The kbase device structure for the device (must be a ++ * valid pointer) ++ * @buffer_updated: True if the buffer has been updated on this VSync, ++ * false otherwise ++ */ ++void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated); + -+#define KBASE_TRACE_CLEAR(kbdev)\ -+ do {\ -+ CSTD_UNUSED(kbdev);\ -+ CSTD_NOP(0);\ -+ } while (0) -+#define KBASE_TRACE_DUMP(kbdev)\ -+ do {\ -+ CSTD_UNUSED(kbdev);\ -+ CSTD_NOP(0);\ -+ } while (0) -+#endif /* KBASE_TRACE_ENABLE */ -+/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */ -+void kbasep_trace_dump(struct kbase_device *kbdev); ++/** ++ * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change ++ * the clock speed of the GPU. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * This function should be called regularly by the DVFS system to check whether ++ * the clock speed of the GPU needs updating. ++ */ ++void kbase_pm_get_dvfs_action(struct kbase_device *kbdev); + -+#ifdef CONFIG_MALI_DEBUG +/** -+ * kbase_set_driver_inactive - Force driver to go inactive -+ * @kbdev: Device pointer -+ * @inactive: true if driver should go inactive, false otherwise ++ * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is ++ * needed + * -+ * Forcing the driver inactive will cause all future IOCTLs to wait until the -+ * driver is made active again. This is intended solely for the use of tests -+ * which require that no jobs are running while the test executes. ++ * If the caller is the first caller then the GPU cycle counters will be enabled ++ * along with the l2 cache ++ * ++ * The GPU must be powered when calling this function (i.e. ++ * kbase_pm_context_active() must have been called). ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ -+void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive); -+#endif /* CONFIG_MALI_DEBUG */ ++void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev); + ++/** ++ * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is ++ * needed (l2 cache already on) ++ * ++ * This is a version of the above function ++ * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the ++ * l2 cache is known to be on and assured to be on until the subsequent call of ++ * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does ++ * not sleep and can be called from atomic functions. ++ * ++ * The GPU must be powered when calling this function (i.e. ++ * kbase_pm_context_active() must have been called) and the l2 cache must be ++ * powered on. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ */ ++void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev); + -+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) ++/** ++ * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no ++ * longer in use ++ * ++ * If the caller is the last caller then the GPU cycle counters will be ++ * disabled. A request must have been made before a call to this. ++ * ++ * Caller must not hold the hwaccess_lock, as it will be taken in this function. ++ * If the caller is already holding this lock then ++ * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ */ ++void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev); + -+/* kbase_io_history_init - initialize data struct for register access history ++/** ++ * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter() ++ * that does not take hwaccess_lock + * -+ * @kbdev The register history to initialize -+ * @n The number of register accesses that the buffer could hold ++ * Caller must hold the hwaccess_lock. + * -+ * @return 0 if successfully initialized, failure otherwise ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ -+int kbase_io_history_init(struct kbase_io_history *h, u16 n); ++void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev); + -+/* kbase_io_history_term - uninit all resources for the register access history ++/** ++ * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to ++ * complete + * -+ * @h The register history to terminate ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ -+void kbase_io_history_term(struct kbase_io_history *h); ++void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev); + -+/* kbase_io_history_dump - print the register history to the kernel ring buffer ++/** ++ * kbase_pm_register_access_enable - Enable access to GPU registers + * -+ * @kbdev Pointer to kbase_device containing the register history to dump ++ * Enables access to the GPU registers before power management has powered up ++ * the GPU with kbase_pm_powerup(). ++ * ++ * Access to registers should be done using kbase_os_reg_read()/write() at this ++ * stage, not kbase_reg_read()/write(). ++ * ++ * This results in the power management callbacks provided in the driver ++ * configuration to get called to turn on power and/or clocks to the GPU. See ++ * kbase_pm_callback_conf. ++ * ++ * This should only be used before power management is powered up with ++ * kbase_pm_powerup() ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ -+void kbase_io_history_dump(struct kbase_device *kbdev); ++void kbase_pm_register_access_enable(struct kbase_device *kbdev); + +/** -+ * kbase_io_history_resize - resize the register access history buffer. ++ * kbase_pm_register_access_disable - Disable early register access + * -+ * @h: Pointer to a valid register history to resize -+ * @new_size: Number of accesses the buffer could hold ++ * Disables access to the GPU registers enabled earlier by a call to ++ * kbase_pm_register_access_enable(). + * -+ * A successful resize will clear all recent register accesses. -+ * If resizing fails for any reason (e.g., could not allocate memory, invalid -+ * buffer size) then the original buffer will be kept intact. ++ * This results in the power management callbacks provided in the driver ++ * configuration to get called to turn off power and/or clocks to the GPU. See ++ * kbase_pm_callback_conf + * -+ * @return 0 if the buffer was resized, failure otherwise ++ * This should only be used before power management is powered up with ++ * kbase_pm_powerup() ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ -+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size); ++void kbase_pm_register_access_disable(struct kbase_device *kbdev); + -+#else /* CONFIG_DEBUG_FS */ ++/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline ++ * function */ + -+#define kbase_io_history_init(...) ((int)0) ++/** ++ * kbase_pm_metrics_is_active - Check if the power management metrics ++ * collection is active. ++ * ++ * Note that this returns if the power management metrics collection was ++ * active at the time of calling, it is possible that after the call the metrics ++ * collection enable may have changed state. ++ * ++ * The caller must handle the consequence that the state may have changed. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * Return: true if metrics collection was active else false. ++ */ ++bool kbase_pm_metrics_is_active(struct kbase_device *kbdev); + -+#define kbase_io_history_term CSTD_NOP ++/** ++ * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid ++ * pointer) ++ * @is_resume: true if power on due to resume after suspend, ++ * false otherwise ++ */ ++void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume); + -+#define kbase_io_history_dump CSTD_NOP ++/** ++ * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been ++ * requested. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid ++ * pointer) ++ * @is_suspend: true if power off due to suspend, ++ * false otherwise ++ */ ++void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend); + -+#define kbase_io_history_resize CSTD_NOP ++#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) ++void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev, ++ unsigned long *total, unsigned long *busy); ++void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev); ++#endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */ + -+#endif /* CONFIG_DEBUG_FS */ ++#ifdef CONFIG_MALI_MIDGARD_DVFS + ++/** ++ * kbase_platform_dvfs_event - Report utilisation to DVFS code ++ * ++ * Function provided by platform specific code when DVFS is enabled to allow ++ * the power management metrics system to report utilisation. ++ * ++ * @kbdev: The kbase device structure for the device (must be a ++ * valid pointer) ++ * @utilisation: The current calculated utilisation by the metrics system. ++ * @util_gl_share: The current calculated gl share of utilisation. ++ * @util_cl_share: The current calculated cl share of utilisation per core ++ * group. ++ * Return: Returns 0 on failure and non zero on success. ++ */ + ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, ++ u32 util_gl_share, u32 util_cl_share[2]); +#endif + ++void kbase_pm_power_changed(struct kbase_device *kbdev); ++ ++/** ++ * kbase_pm_metrics_update - Inform the metrics system that an atom is either ++ * about to be run or has just completed. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @now: Pointer to the timestamp of the change, or NULL to use current time ++ * ++ * Caller must hold hwaccess_lock ++ */ ++void kbase_pm_metrics_update(struct kbase_device *kbdev, ++ ktime_t *now); ++ ++/** ++ * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU ++ * If the GPU does not have coherency this is a no-op ++ * @kbdev: Device pointer ++ * ++ * This function should be called after L2 power up. ++ */ + ++void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev); + -diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c ++/** ++ * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU ++ * If the GPU does not have coherency this is a no-op ++ * @kbdev: Device pointer ++ * ++ * This function should be called before L2 power off. ++ */ ++void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev); ++ ++#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c new file mode 100644 -index 000000000..fde0f8ff8 +index 000000000..024248ca7 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c -@@ -0,0 +1,209 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c +@@ -0,0 +1,401 @@ +/* + * -+ * (C) COPYRIGHT 2013-2015, 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -352673,466 +354429,400 @@ index 000000000..fde0f8ff8 + */ + + -+#include -+#include -+#include -+ -+/* This function is used to solve an HW issue with single iterator GPUs. -+ * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the -+ * restart index is out of bounds and the rerun causes a tile range fault. If this happens -+ * we try to clamp the restart index to a correct value and rerun the job. -+ */ -+/* Mask of X and Y coordinates for the coordinates words in the descriptors*/ -+#define X_COORDINATE_MASK 0x00000FFF -+#define Y_COORDINATE_MASK 0x0FFF0000 -+/* Max number of words needed from the fragment shader job descriptor */ -+#define JOB_HEADER_SIZE_IN_WORDS 10 -+#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32)) + -+/* Word 0: Status Word */ -+#define JOB_DESC_STATUS_WORD 0 -+/* Word 1: Restart Index */ -+#define JOB_DESC_RESTART_INDEX_WORD 1 -+/* Word 2: Fault address low word */ -+#define JOB_DESC_FAULT_ADDR_LOW_WORD 2 -+/* Word 8: Minimum Tile Coordinates */ -+#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8 -+/* Word 9: Maximum Tile Coordinates */ -+#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9 + -+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom) -+{ -+ struct device *dev = katom->kctx->kbdev->dev; -+ u32 clamped = 0; -+ struct kbase_va_region *region; -+ phys_addr_t *page_array; -+ u64 page_index; -+ u32 offset = katom->jc & (~PAGE_MASK); -+ u32 *page_1 = NULL; -+ u32 *page_2 = NULL; -+ u32 job_header[JOB_HEADER_SIZE_IN_WORDS]; -+ void *dst = job_header; -+ u32 minX, minY, maxX, maxY; -+ u32 restartX, restartY; -+ struct page *p; -+ u32 copy_size; + -+ dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n"); -+ if (!(katom->core_req & BASE_JD_REQ_FS)) -+ return 0; ++/* ++ * Metrics for power management ++ */ + -+ kbase_gpu_vm_lock(katom->kctx); -+ region = kbase_region_tracker_find_region_enclosing_address(katom->kctx, -+ katom->jc); -+ if (!region || (region->flags & KBASE_REG_FREE)) -+ goto out_unlock; ++#include ++#include ++#include ++#include + -+ page_array = kbase_get_cpu_phy_pages(region); -+ if (!page_array) -+ goto out_unlock; ++/* When VSync is being hit aim for utilisation between 70-90% */ ++#define KBASE_PM_VSYNC_MIN_UTILISATION 70 ++#define KBASE_PM_VSYNC_MAX_UTILISATION 90 ++/* Otherwise aim for 10-40% */ ++#define KBASE_PM_NO_VSYNC_MIN_UTILISATION 10 ++#define KBASE_PM_NO_VSYNC_MAX_UTILISATION 40 + -+ page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn; ++/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns ++ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly ++ * under 11s. Exceeding this will cause overflow */ ++#define KBASE_PM_TIME_SHIFT 8 + -+ p = pfn_to_page(PFN_DOWN(page_array[page_index])); ++/* Maximum time between sampling of utilization data, without resetting the ++ * counters. */ ++#define MALI_UTILIZATION_MAX_PERIOD 100000 /* ns = 100ms */ + -+ /* we need the first 10 words of the fragment shader job descriptor. -+ * We need to check that the offset + 10 words is less that the page -+ * size otherwise we need to load the next page. -+ * page_size_overflow will be equal to 0 in case the whole descriptor -+ * is within the page > 0 otherwise. -+ */ -+ copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE); ++#ifdef CONFIG_MALI_MIDGARD_DVFS ++static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) ++{ ++ unsigned long flags; ++ struct kbasep_pm_metrics_data *metrics; + -+ page_1 = kmap_atomic(p); ++ KBASE_DEBUG_ASSERT(timer != NULL); + -+ /* page_1 is a u32 pointer, offset is expressed in bytes */ -+ page_1 += offset>>2; ++ metrics = container_of(timer, struct kbasep_pm_metrics_data, timer); ++ kbase_pm_get_dvfs_action(metrics->kbdev); + -+ kbase_sync_single_for_cpu(katom->kctx->kbdev, -+ kbase_dma_addr(p) + offset, -+ copy_size, DMA_BIDIRECTIONAL); ++ spin_lock_irqsave(&metrics->lock, flags); + -+ memcpy(dst, page_1, copy_size); ++ if (metrics->timer_active) ++ hrtimer_start(timer, ++ HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period), ++ HRTIMER_MODE_REL); + -+ /* The data needed overflows page the dimension, -+ * need to map the subsequent page */ -+ if (copy_size < JOB_HEADER_SIZE) { -+ p = pfn_to_page(PFN_DOWN(page_array[page_index + 1])); -+ page_2 = kmap_atomic(p); ++ spin_unlock_irqrestore(&metrics->lock, flags); + -+ kbase_sync_single_for_cpu(katom->kctx->kbdev, -+ kbase_dma_addr(p), -+ JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL); ++ return HRTIMER_NORESTART; ++} ++#endif /* CONFIG_MALI_MIDGARD_DVFS */ + -+ memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size); -+ } ++int kbasep_pm_metrics_init(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ /* We managed to correctly map one or two pages (in case of overflow) */ -+ /* Get Bounding Box data and restart index from fault address low word */ -+ minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK; -+ minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK; -+ maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK; -+ maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK; -+ restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK; -+ restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK; ++ kbdev->pm.backend.metrics.kbdev = kbdev; + -+ dev_warn(dev, "Before Clamping:\n" -+ "Jobstatus: %08x\n" -+ "restartIdx: %08x\n" -+ "Fault_addr_low: %08x\n" -+ "minCoordsX: %08x minCoordsY: %08x\n" -+ "maxCoordsX: %08x maxCoordsY: %08x\n", -+ job_header[JOB_DESC_STATUS_WORD], -+ job_header[JOB_DESC_RESTART_INDEX_WORD], -+ job_header[JOB_DESC_FAULT_ADDR_LOW_WORD], -+ minX, minY, -+ maxX, maxY); ++ kbdev->pm.backend.metrics.time_period_start = ktime_get(); ++ kbdev->pm.backend.metrics.time_busy = 0; ++ kbdev->pm.backend.metrics.time_idle = 0; ++ kbdev->pm.backend.metrics.prev_busy = 0; ++ kbdev->pm.backend.metrics.prev_idle = 0; ++ kbdev->pm.backend.metrics.gpu_active = false; ++ kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; ++ kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; ++ kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; ++ kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; ++ kbdev->pm.backend.metrics.busy_cl[0] = 0; ++ kbdev->pm.backend.metrics.busy_cl[1] = 0; ++ kbdev->pm.backend.metrics.busy_gl = 0; + -+ /* Set the restart index to the one which generated the fault*/ -+ job_header[JOB_DESC_RESTART_INDEX_WORD] = -+ job_header[JOB_DESC_FAULT_ADDR_LOW_WORD]; ++ spin_lock_init(&kbdev->pm.backend.metrics.lock); + -+ if (restartX < minX) { -+ job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY; -+ dev_warn(dev, -+ "Clamping restart X index to minimum. %08x clamped to %08x\n", -+ restartX, minX); -+ clamped = 1; -+ } -+ if (restartY < minY) { -+ job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX; -+ dev_warn(dev, -+ "Clamping restart Y index to minimum. %08x clamped to %08x\n", -+ restartY, minY); -+ clamped = 1; -+ } -+ if (restartX > maxX) { -+ job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY; -+ dev_warn(dev, -+ "Clamping restart X index to maximum. %08x clamped to %08x\n", -+ restartX, maxX); -+ clamped = 1; -+ } -+ if (restartY > maxY) { -+ job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX; -+ dev_warn(dev, -+ "Clamping restart Y index to maximum. %08x clamped to %08x\n", -+ restartY, maxY); -+ clamped = 1; -+ } ++#ifdef CONFIG_MALI_MIDGARD_DVFS ++ kbdev->pm.backend.metrics.timer_active = true; ++ hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_REL); ++ kbdev->pm.backend.metrics.timer.function = dvfs_callback; + -+ if (clamped) { -+ /* Reset the fault address low word -+ * and set the job status to STOPPED */ -+ job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0; -+ job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED; -+ dev_warn(dev, "After Clamping:\n" -+ "Jobstatus: %08x\n" -+ "restartIdx: %08x\n" -+ "Fault_addr_low: %08x\n" -+ "minCoordsX: %08x minCoordsY: %08x\n" -+ "maxCoordsX: %08x maxCoordsY: %08x\n", -+ job_header[JOB_DESC_STATUS_WORD], -+ job_header[JOB_DESC_RESTART_INDEX_WORD], -+ job_header[JOB_DESC_FAULT_ADDR_LOW_WORD], -+ minX, minY, -+ maxX, maxY); ++ hrtimer_start(&kbdev->pm.backend.metrics.timer, ++ HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period), ++ HRTIMER_MODE_REL); ++#endif /* CONFIG_MALI_MIDGARD_DVFS */ + -+ /* Flush CPU cache to update memory for future GPU reads*/ -+ memcpy(page_1, dst, copy_size); -+ p = pfn_to_page(PFN_DOWN(page_array[page_index])); ++ return 0; ++} + -+ kbase_sync_single_for_device(katom->kctx->kbdev, -+ kbase_dma_addr(p) + offset, -+ copy_size, DMA_TO_DEVICE); ++KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init); + -+ if (copy_size < JOB_HEADER_SIZE) { -+ memcpy(page_2, dst + copy_size, -+ JOB_HEADER_SIZE - copy_size); -+ p = pfn_to_page(PFN_DOWN(page_array[page_index + 1])); ++void kbasep_pm_metrics_term(struct kbase_device *kbdev) ++{ ++#ifdef CONFIG_MALI_MIDGARD_DVFS ++ unsigned long flags; + -+ kbase_sync_single_for_device(katom->kctx->kbdev, -+ kbase_dma_addr(p), -+ JOB_HEADER_SIZE - copy_size, -+ DMA_TO_DEVICE); -+ } -+ } -+ if (copy_size < JOB_HEADER_SIZE) -+ kunmap_atomic(page_2); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ kunmap_atomic(page_1); ++ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); ++ kbdev->pm.backend.metrics.timer_active = false; ++ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + -+out_unlock: -+ kbase_gpu_vm_unlock(katom->kctx); -+ return clamped; ++ hrtimer_cancel(&kbdev->pm.backend.metrics.timer); ++#endif /* CONFIG_MALI_MIDGARD_DVFS */ +} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h -new file mode 100644 -index 000000000..099a29861 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h -@@ -0,0 +1,23 @@ -+/* -+ * -+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ ++KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term); ++ ++/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this ++ * function + */ ++static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, ++ ktime_t now) ++{ ++ ktime_t diff; + ++ lockdep_assert_held(&kbdev->pm.backend.metrics.lock); + ++ diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); ++ if (ktime_to_ns(diff) < 0) ++ return; + -+#ifndef _KBASE_10969_WORKAROUND_ -+#define _KBASE_10969_WORKAROUND_ ++ if (kbdev->pm.backend.metrics.gpu_active) { ++ u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); + -+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom); ++ kbdev->pm.backend.metrics.time_busy += ns_time; ++ if (kbdev->pm.backend.metrics.active_cl_ctx[0]) ++ kbdev->pm.backend.metrics.busy_cl[0] += ns_time; ++ if (kbdev->pm.backend.metrics.active_cl_ctx[1]) ++ kbdev->pm.backend.metrics.busy_cl[1] += ns_time; ++ if (kbdev->pm.backend.metrics.active_gl_ctx[0]) ++ kbdev->pm.backend.metrics.busy_gl += ns_time; ++ if (kbdev->pm.backend.metrics.active_gl_ctx[1]) ++ kbdev->pm.backend.metrics.busy_gl += ns_time; ++ } else { ++ kbdev->pm.backend.metrics.time_idle += (u32) (ktime_to_ns(diff) ++ >> KBASE_PM_TIME_SHIFT); ++ } + -+#endif /* _KBASE_10969_WORKAROUND_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c -new file mode 100644 -index 000000000..f910fe970 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c -@@ -0,0 +1,102 @@ -+/* -+ * -+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ kbdev->pm.backend.metrics.time_period_start = now; ++} ++ ++#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) ++/* Caller needs to hold kbdev->pm.backend.metrics.lock before calling this ++ * function. + */ ++static void kbase_pm_reset_dvfs_utilisation_unlocked(struct kbase_device *kbdev, ++ ktime_t now) ++{ ++ /* Store previous value */ ++ kbdev->pm.backend.metrics.prev_idle = ++ kbdev->pm.backend.metrics.time_idle; ++ kbdev->pm.backend.metrics.prev_busy = ++ kbdev->pm.backend.metrics.time_busy; + ++ /* Reset current values */ ++ kbdev->pm.backend.metrics.time_period_start = now; ++ kbdev->pm.backend.metrics.time_idle = 0; ++ kbdev->pm.backend.metrics.time_busy = 0; ++ kbdev->pm.backend.metrics.busy_cl[0] = 0; ++ kbdev->pm.backend.metrics.busy_cl[1] = 0; ++ kbdev->pm.backend.metrics.busy_gl = 0; ++} + ++void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev) ++{ ++ unsigned long flags; + -+#include ++ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); ++ kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, ktime_get()); ++ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); ++} + -+#include -+#include ++void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev, ++ unsigned long *total_out, unsigned long *busy_out) ++{ ++ ktime_t now = ktime_get(); ++ unsigned long flags, busy, total; + -+#ifdef CONFIG_DEBUG_FS -+#ifdef CONFIG_MALI_DEBUG ++ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); ++ kbase_pm_get_dvfs_utilisation_calc(kbdev, now); + -+static int kbase_as_fault_read(struct seq_file *sfile, void *data) -+{ -+ uintptr_t as_no = (uintptr_t) sfile->private; ++ busy = kbdev->pm.backend.metrics.time_busy; ++ total = busy + kbdev->pm.backend.metrics.time_idle; + -+ struct list_head *entry; -+ const struct list_head *kbdev_list; -+ struct kbase_device *kbdev = NULL; ++ /* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default ++ * 100ms) */ ++ if (total >= MALI_UTILIZATION_MAX_PERIOD) { ++ kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now); ++ } else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) { ++ total += kbdev->pm.backend.metrics.prev_idle + ++ kbdev->pm.backend.metrics.prev_busy; ++ busy += kbdev->pm.backend.metrics.prev_busy; ++ } + -+ kbdev_list = kbase_dev_list_get(); ++ *total_out = total; ++ *busy_out = busy; ++ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); ++} ++#endif + -+ list_for_each(entry, kbdev_list) { -+ kbdev = list_entry(entry, struct kbase_device, entry); ++#ifdef CONFIG_MALI_MIDGARD_DVFS + -+ if(kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) { ++/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this ++ * function ++ */ ++int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev, ++ int *util_gl_share, ++ int util_cl_share[2], ++ ktime_t now) ++{ ++ int utilisation; ++ int busy; + -+ /* don't show this one again until another fault occors */ -+ kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no); ++ kbase_pm_get_dvfs_utilisation_calc(kbdev, now); + -+ /* output the last page fault addr */ -+ seq_printf(sfile, "%llu\n", (u64) kbdev->as[as_no].fault_addr); ++ if (kbdev->pm.backend.metrics.time_idle + ++ kbdev->pm.backend.metrics.time_busy == 0) { ++ /* No data - so we return NOP */ ++ utilisation = -1; ++ if (util_gl_share) ++ *util_gl_share = -1; ++ if (util_cl_share) { ++ util_cl_share[0] = -1; ++ util_cl_share[1] = -1; + } -+ ++ goto out; + } + -+ kbase_dev_list_put(kbdev_list); ++ utilisation = (100 * kbdev->pm.backend.metrics.time_busy) / ++ (kbdev->pm.backend.metrics.time_idle + ++ kbdev->pm.backend.metrics.time_busy); + -+ return 0; ++ busy = kbdev->pm.backend.metrics.busy_gl + ++ kbdev->pm.backend.metrics.busy_cl[0] + ++ kbdev->pm.backend.metrics.busy_cl[1]; ++ ++ if (busy != 0) { ++ if (util_gl_share) ++ *util_gl_share = ++ (100 * kbdev->pm.backend.metrics.busy_gl) / ++ busy; ++ if (util_cl_share) { ++ util_cl_share[0] = ++ (100 * kbdev->pm.backend.metrics.busy_cl[0]) / ++ busy; ++ util_cl_share[1] = ++ (100 * kbdev->pm.backend.metrics.busy_cl[1]) / ++ busy; ++ } ++ } else { ++ if (util_gl_share) ++ *util_gl_share = -1; ++ if (util_cl_share) { ++ util_cl_share[0] = -1; ++ util_cl_share[1] = -1; ++ } ++ } ++ ++out: ++ return utilisation; +} + -+static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file) ++void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) +{ -+ return single_open(file, kbase_as_fault_read , in->i_private); -+} ++ unsigned long flags; ++ int utilisation, util_gl_share; ++ int util_cl_share[2]; ++ ktime_t now; + -+static const struct file_operations as_fault_fops = { -+ .open = kbase_as_fault_debugfs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+#endif /* CONFIG_MALI_DEBUG */ -+#endif /* CONFIG_DEBUG_FS */ ++ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + -+/* -+ * Initialize debugfs entry for each address space -+ */ -+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev) -+{ -+#ifdef CONFIG_DEBUG_FS -+#ifdef CONFIG_MALI_DEBUG -+ uint i; -+ char as_name[64]; -+ struct dentry *debugfs_directory; ++ now = ktime_get(); + -+ kbdev->debugfs_as_read_bitmap = 0ULL; ++ utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share, ++ util_cl_share, now); + -+ KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces); -+ KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64)); ++ if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 || ++ util_cl_share[1] < 0) { ++ utilisation = 0; ++ util_gl_share = 0; ++ util_cl_share[0] = 0; ++ util_cl_share[1] = 0; ++ goto out; ++ } + -+ debugfs_directory = debugfs_create_dir("address_spaces", -+ kbdev->mali_debugfs_directory); ++out: ++#ifdef CONFIG_MALI_MIDGARD_DVFS ++ kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, ++ util_cl_share); ++#endif /*CONFIG_MALI_MIDGARD_DVFS */ + -+ if(debugfs_directory) { -+ for(i = 0; i < kbdev->nr_hw_address_spaces; i++) { -+ snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i); -+ debugfs_create_file(as_name, S_IRUGO, -+ debugfs_directory, (void*) ((uintptr_t) i), &as_fault_fops); -+ } -+ } -+ else -+ dev_warn(kbdev->dev, "unable to create address_spaces debugfs directory"); ++ kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now); + -+#endif /* CONFIG_MALI_DEBUG */ -+#endif /* CONFIG_DEBUG_FS */ -+ return; ++ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); +} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h -new file mode 100644 -index 000000000..3ed224889 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h -@@ -0,0 +1,45 @@ -+/* -+ * -+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ + ++bool kbase_pm_metrics_is_active(struct kbase_device *kbdev) ++{ ++ bool isactive; ++ unsigned long flags; ++ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + ++ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); ++ isactive = kbdev->pm.backend.metrics.timer_active; ++ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + -+#ifndef _KBASE_AS_FAULT_DEBUG_FS_H -+#define _KBASE_AS_FAULT_DEBUG_FS_H ++ return isactive; ++} ++KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active); + -+/** -+ * kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults -+ * -+ * @kbdev: Pointer to kbase_device -+ */ -+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev); ++#endif /* CONFIG_MALI_MIDGARD_DVFS */ + +/** -+ * kbase_as_fault_debugfs_new() - make the last fault available on debugfs ++ * kbase_pm_metrics_active_calc - Update PM active counts based on currently ++ * running atoms ++ * @kbdev: Device pointer + * -+ * @kbdev: Pointer to kbase_device -+ * @as_no: The address space the fault occurred on ++ * The caller must hold kbdev->pm.backend.metrics.lock + */ -+static inline void -+kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no) ++static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) +{ -+#ifdef CONFIG_DEBUG_FS -+#ifdef CONFIG_MALI_DEBUG -+ kbdev->debugfs_as_read_bitmap |= (1ULL << as_no); -+#endif /* CONFIG_DEBUG_FS */ -+#endif /* CONFIG_MALI_DEBUG */ -+ return; -+} -+ -+#endif /*_KBASE_AS_FAULT_DEBUG_FS_H*/ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c -new file mode 100644 -index 000000000..c67b3e97f ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c -@@ -0,0 +1,64 @@ -+/* -+ * -+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ -+ ++ int js; + ++ lockdep_assert_held(&kbdev->pm.backend.metrics.lock); + ++ kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; ++ kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; ++ kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; ++ kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; ++ kbdev->pm.backend.metrics.gpu_active = false; + ++ for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { ++ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); + -+/* -+ * Cache Policy API. -+ */ ++ /* Head atom may have just completed, so if it isn't running ++ * then try the next atom */ ++ if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) ++ katom = kbase_gpu_inspect(kbdev, js, 1); + -+#include "mali_kbase_cache_policy.h" ++ if (katom && katom->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_SUBMITTED) { ++ if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { ++ int device_nr = (katom->core_req & ++ BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) ++ ? katom->device_nr : 0; ++ if (!WARN_ON(device_nr >= 2)) ++ kbdev->pm.backend.metrics. ++ active_cl_ctx[device_nr] = 1; ++ } else { ++ /* Slot 2 should not be running non-compute ++ * atoms */ ++ if (!WARN_ON(js >= 2)) ++ kbdev->pm.backend.metrics. ++ active_gl_ctx[js] = 1; ++ } ++ kbdev->pm.backend.metrics.gpu_active = true; ++ } ++ } ++} + -+/* -+ * The output flags should be a combination of the following values: -+ * KBASE_REG_CPU_CACHED: CPU cache should be enabled. -+ */ -+u32 kbase_cache_enabled(u32 flags, u32 nr_pages) ++/* called when job is submitted to or removed from a GPU slot */ ++void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp) +{ -+ u32 cache_flags = 0; -+ -+ CSTD_UNUSED(nr_pages); ++ unsigned long flags; ++ ktime_t now; + -+ if (flags & BASE_MEM_CACHED_CPU) -+ cache_flags |= KBASE_REG_CPU_CACHED; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ return cache_flags; -+} ++ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + ++ if (!timestamp) { ++ now = ktime_get(); ++ timestamp = &now; ++ } + -+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, -+ size_t size, enum dma_data_direction dir) -+{ -+/* Check if kernel is using coherency with GPU */ -+#ifdef CONFIG_MALI_COH_KERN -+ if (kbdev->system_coherency == COHERENCY_ACE) -+ return; -+#endif /* CONFIG_MALI_COH_KERN */ -+ dma_sync_single_for_device(kbdev->dev, handle, size, dir); -+} ++ /* Track how long CL and/or GL jobs have been busy for */ ++ kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp); + ++ kbase_pm_metrics_active_calc(kbdev); + -+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, -+ size_t size, enum dma_data_direction dir) -+{ -+/* Check if kernel is using coherency with GPU */ -+#ifdef CONFIG_MALI_COH_KERN -+ if (kbdev->system_coherency == COHERENCY_ACE) -+ return; -+#endif /* CONFIG_MALI_COH_KERN */ -+ dma_sync_single_for_cpu(kbdev->dev, handle, size, dir); ++ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); +} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c new file mode 100644 -index 000000000..0c18bdb35 +index 000000000..075f020c6 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h -@@ -0,0 +1,45 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c +@@ -0,0 +1,973 @@ +/* + * -+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -353147,451 +354837,971 @@ index 000000000..0c18bdb35 + + + -+ -+ +/* -+ * Cache Policy API. ++ * Power policy API implementations + */ + -+#ifndef _KBASE_CACHE_POLICY_H_ -+#define _KBASE_CACHE_POLICY_H_ ++#include ++#include ++#include ++#include ++#include + -+#include "mali_kbase.h" -+#include "mali_base_kernel.h" ++static const struct kbase_pm_policy *const policy_list[] = { ++#ifdef CONFIG_MALI_NO_MALI ++ &kbase_pm_always_on_policy_ops, ++ &kbase_pm_demand_policy_ops, ++ &kbase_pm_coarse_demand_policy_ops, ++#if !MALI_CUSTOMER_RELEASE ++ &kbase_pm_demand_always_powered_policy_ops, ++ &kbase_pm_fast_start_policy_ops, ++#endif ++#else /* CONFIG_MALI_NO_MALI */ ++#if !PLATFORM_POWER_DOWN_ONLY ++ &kbase_pm_demand_policy_ops, ++#endif /* !PLATFORM_POWER_DOWN_ONLY */ ++ &kbase_pm_coarse_demand_policy_ops, ++ &kbase_pm_always_on_policy_ops, ++#if !MALI_CUSTOMER_RELEASE ++#if !PLATFORM_POWER_DOWN_ONLY ++ &kbase_pm_demand_always_powered_policy_ops, ++ &kbase_pm_fast_start_policy_ops, ++#endif /* !PLATFORM_POWER_DOWN_ONLY */ ++#endif ++#endif /* CONFIG_MALI_NO_MALI */ ++}; + -+/** -+ * kbase_cache_enabled - Choose the cache policy for a specific region -+ * @flags: flags describing attributes of the region -+ * @nr_pages: total number of pages (backed or not) for the region -+ * -+ * Tells whether the CPU and GPU caches should be enabled or not for a specific -+ * region. -+ * This function can be modified to customize the cache policy depending on the -+ * flags and size of the region. -+ * -+ * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED -+ * depending on the cache policy ++/* The number of policies available in the system. ++ * This is derived from the number of functions listed in policy_get_functions. + */ -+u32 kbase_cache_enabled(u32 flags, u32 nr_pages); ++#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list)) + -+#endif /* _KBASE_CACHE_POLICY_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.c b/drivers/gpu/arm/midgard/mali_kbase_config.c -new file mode 100644 -index 000000000..fb615ae02 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_config.c -@@ -0,0 +1,51 @@ -+/* -+ * -+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ + ++/* Function IDs for looking up Timeline Trace codes in ++ * kbase_pm_change_state_trace_code */ ++enum kbase_pm_func_id { ++ KBASE_PM_FUNC_ID_REQUEST_CORES_START, ++ KBASE_PM_FUNC_ID_REQUEST_CORES_END, ++ KBASE_PM_FUNC_ID_RELEASE_CORES_START, ++ KBASE_PM_FUNC_ID_RELEASE_CORES_END, ++ /* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither ++ * expect to hit it nor tend to hit it very much anyway. We can detect ++ * whether we need more instrumentation by a difference between ++ * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */ ++ ++ /* Must be the last */ ++ KBASE_PM_FUNC_ID_COUNT ++}; + + ++/* State changes during request/unrequest/release-ing cores */ ++enum { ++ KBASE_PM_CHANGE_STATE_SHADER = (1u << 0), ++ KBASE_PM_CHANGE_STATE_TILER = (1u << 1), + ++ /* These two must be last */ ++ KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER | ++ KBASE_PM_CHANGE_STATE_SHADER), ++ KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1 ++}; ++typedef u32 kbase_pm_change_state; + -+#include -+#include -+#include + -+int kbasep_platform_device_init(struct kbase_device *kbdev) -+{ -+ struct kbase_platform_funcs_conf *platform_funcs_p; ++#ifdef CONFIG_MALI_TRACE_TIMELINE ++/* Timeline Trace code lookups for each function */ ++static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT] ++ [KBASE_PM_CHANGE_STATE_COUNT] = { ++ /* kbase_pm_request_cores */ ++ [KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0, ++ [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] = ++ SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, ++ [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] = ++ SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, ++ [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER | ++ KBASE_PM_CHANGE_STATE_TILER] = ++ SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, + -+ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; -+ if (platform_funcs_p && platform_funcs_p->platform_init_func) -+ return platform_funcs_p->platform_init_func(kbdev); ++ [KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0, ++ [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] = ++ SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END, ++ [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] = ++ SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END, ++ [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER | ++ KBASE_PM_CHANGE_STATE_TILER] = ++ SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END, + -+ return 0; -+} ++ /* kbase_pm_release_cores */ ++ [KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0, ++ [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] = ++ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, ++ [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] = ++ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, ++ [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER | ++ KBASE_PM_CHANGE_STATE_TILER] = ++ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, + -+void kbasep_platform_device_term(struct kbase_device *kbdev) ++ [KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0, ++ [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] = ++ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END, ++ [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] = ++ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END, ++ [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER | ++ KBASE_PM_CHANGE_STATE_TILER] = ++ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END ++}; ++ ++static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev, ++ enum kbase_pm_func_id func_id, ++ kbase_pm_change_state state) +{ -+ struct kbase_platform_funcs_conf *platform_funcs_p; ++ int trace_code; + -+ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; -+ if (platform_funcs_p && platform_funcs_p->platform_term_func) -+ platform_funcs_p->platform_term_func(kbdev); ++ KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT); ++ KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) == ++ state); ++ ++ trace_code = kbase_pm_change_state_trace_code[func_id][state]; ++ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code); +} + -+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed) ++#else /* CONFIG_MALI_TRACE_TIMELINE */ ++static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev, ++ enum kbase_pm_func_id func_id, kbase_pm_change_state state) +{ -+ KBASE_DEBUG_ASSERT(NULL != clock_speed); -+ -+ *clock_speed = 100; -+ return 0; +} + -diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.h b/drivers/gpu/arm/midgard/mali_kbase_config.h -new file mode 100644 -index 000000000..356d52bcd ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_config.h -@@ -0,0 +1,345 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++#endif /* CONFIG_MALI_TRACE_TIMELINE */ + ++/** ++ * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any ++ * requested shader cores ++ * @kbdev: Device pointer ++ */ ++static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev) ++{ ++ u64 prev_shader_state = kbdev->pm.backend.desired_shader_state; ++ u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state; + ++ lockdep_assert_held(&kbdev->hwaccess_lock); + ++ kbdev->pm.backend.desired_shader_state &= ++ ~kbdev->pm.backend.shader_poweroff_pending; ++ kbdev->pm.backend.desired_tiler_state &= ++ ~kbdev->pm.backend.tiler_poweroff_pending; + ++ kbdev->pm.backend.shader_poweroff_pending = 0; ++ kbdev->pm.backend.tiler_poweroff_pending = 0; + -+/** -+ * @file mali_kbase_config.h -+ * Configuration API and Attributes for KBase -+ */ ++ if (prev_shader_state != kbdev->pm.backend.desired_shader_state || ++ prev_tiler_state != ++ kbdev->pm.backend.desired_tiler_state || ++ kbdev->pm.backend.ca_in_transition) { ++ bool cores_are_available; + -+#ifndef _KBASE_CONFIG_H_ -+#define _KBASE_CONFIG_H_ ++ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, ++ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START); ++ cores_are_available = kbase_pm_check_transitions_nolock(kbdev); ++ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, ++ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END); + -+#include ++ /* Don't need 'cores_are_available', ++ * because we don't return anything */ ++ CSTD_UNUSED(cores_are_available); ++ } ++} + -+#include -+#include ++static enum hrtimer_restart ++kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) ++{ ++ struct kbase_device *kbdev; ++ unsigned long flags; + -+/** -+ * @addtogroup base_api -+ * @{ -+ */ ++ kbdev = container_of(timer, struct kbase_device, ++ pm.backend.gpu_poweroff_timer); + -+/** -+ * @addtogroup base_kbase_api -+ * @{ -+ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+/** -+ * @addtogroup kbase_config Configuration API and Attributes -+ * @{ -+ */ ++ /* It is safe for this call to do nothing if the work item is already ++ * queued. The worker function will read the must up-to-date state of ++ * kbdev->pm.backend.gpu_poweroff_pending under lock. ++ * ++ * If a state change occurs while the worker function is processing, ++ * this call will succeed as a work item can be requeued once it has ++ * started processing. ++ */ ++ if (kbdev->pm.backend.gpu_poweroff_pending) ++ queue_work(kbdev->pm.backend.gpu_poweroff_wq, ++ &kbdev->pm.backend.gpu_poweroff_work); + -+#include ++ if (kbdev->pm.backend.shader_poweroff_pending || ++ kbdev->pm.backend.tiler_poweroff_pending) { ++ kbdev->pm.backend.shader_poweroff_pending_time--; + -+/* Forward declaration of struct kbase_device */ -+struct kbase_device; ++ KBASE_DEBUG_ASSERT( ++ kbdev->pm.backend.shader_poweroff_pending_time ++ >= 0); + -+/** -+ * kbase_platform_funcs_conf - Specifies platform init/term function pointers -+ * -+ * Specifies the functions pointers for platform specific initialization and -+ * termination. By default no functions are required. No additional platform -+ * specific control is necessary. -+ */ -+struct kbase_platform_funcs_conf { -+ /** -+ * platform_init_func - platform specific init function pointer -+ * @kbdev - kbase_device pointer -+ * -+ * Returns 0 on success, negative error code otherwise. -+ * -+ * Function pointer for platform specific initialization or NULL if no -+ * initialization function is required. At the point this the GPU is -+ * not active and its power and clocks are in unknown (platform specific -+ * state) as kbase doesn't yet have control of power and clocks. -+ * -+ * The platform specific private pointer kbase_device::platform_context -+ * can be accessed (and possibly initialized) in here. -+ */ -+ int (*platform_init_func)(struct kbase_device *kbdev); -+ /** -+ * platform_term_func - platform specific termination function pointer -+ * @kbdev - kbase_device pointer -+ * -+ * Function pointer for platform specific termination or NULL if no -+ * termination function is required. At the point this the GPU will be -+ * idle but still powered and clocked. -+ * -+ * The platform specific private pointer kbase_device::platform_context -+ * can be accessed (and possibly terminated) in here. -+ */ -+ void (*platform_term_func)(struct kbase_device *kbdev); -+}; ++ if (!kbdev->pm.backend.shader_poweroff_pending_time) ++ kbasep_pm_do_poweroff_cores(kbdev); ++ } + -+/* -+ * @brief Specifies the callbacks for power management -+ * -+ * By default no callbacks will be made and the GPU must not be powered off. -+ */ -+struct kbase_pm_callback_conf { -+ /** Callback for when the GPU is idle and the power to it can be switched off. -+ * -+ * The system integrator can decide whether to either do nothing, just switch off -+ * the clocks to the GPU, or to completely power down the GPU. -+ * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the -+ * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). -+ */ -+ void (*power_off_callback)(struct kbase_device *kbdev); ++ if (kbdev->pm.backend.poweroff_timer_needed) { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ /** Callback for when the GPU is about to become active and power must be supplied. -+ * -+ * This function must not return until the GPU is powered and clocked sufficiently for register access to -+ * succeed. The return value specifies whether the GPU was powered down since the call to power_off_callback. -+ * If the GPU state has been lost then this function must return 1, otherwise it should return 0. -+ * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the -+ * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). -+ * -+ * The return value of the first call to this function is ignored. -+ * -+ * @return 1 if the GPU state may have been lost, 0 otherwise. -+ */ -+ int (*power_on_callback)(struct kbase_device *kbdev); ++ hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time); + -+ /** Callback for when the system is requesting a suspend and GPU power -+ * must be switched off. -+ * -+ * Note that if this callback is present, then this may be called -+ * without a preceding call to power_off_callback. Therefore this -+ * callback must be able to take any action that might otherwise happen -+ * in power_off_callback. -+ * -+ * The platform specific private pointer kbase_device::platform_context -+ * can be accessed and modified in here. It is the platform \em -+ * callbacks responsibility to initialize and terminate this pointer if -+ * used (see @ref kbase_platform_funcs_conf). -+ */ -+ void (*power_suspend_callback)(struct kbase_device *kbdev); ++ return HRTIMER_RESTART; ++ } + -+ /** Callback for when the system is resuming from a suspend and GPU -+ * power must be switched on. -+ * -+ * Note that if this callback is present, then this may be called -+ * without a following call to power_on_callback. Therefore this -+ * callback must be able to take any action that might otherwise happen -+ * in power_on_callback. -+ * -+ * The platform specific private pointer kbase_device::platform_context -+ * can be accessed and modified in here. It is the platform \em -+ * callbacks responsibility to initialize and terminate this pointer if -+ * used (see @ref kbase_platform_funcs_conf). -+ */ -+ void (*power_resume_callback)(struct kbase_device *kbdev); ++ kbdev->pm.backend.poweroff_timer_running = false; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ /** Callback for handling runtime power management initialization. -+ * -+ * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback -+ * will become active from calls made to the OS from within this function. -+ * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback. -+ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. -+ * -+ * @return 0 on success, else int error code. -+ */ -+ int (*power_runtime_init_callback)(struct kbase_device *kbdev); ++ return HRTIMER_NORESTART; ++} + -+ /** Callback for handling runtime power management termination. -+ * -+ * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback -+ * should no longer be called by the OS on completion of this function. -+ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. -+ */ -+ void (*power_runtime_term_callback)(struct kbase_device *kbdev); ++static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data) ++{ ++ unsigned long flags; ++ struct kbase_device *kbdev; ++ bool do_poweroff = false; + -+ /** Callback for runtime power-off power management callback -+ * -+ * For linux this callback will be called by the kernel runtime_suspend callback. -+ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. -+ * -+ * @return 0 on success, else OS error code. -+ */ -+ void (*power_runtime_off_callback)(struct kbase_device *kbdev); ++ kbdev = container_of(data, struct kbase_device, ++ pm.backend.gpu_poweroff_work); + -+ /** Callback for runtime power-on power management callback -+ * -+ * For linux this callback will be called by the kernel runtime_resume callback. -+ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. -+ */ -+ int (*power_runtime_on_callback)(struct kbase_device *kbdev); ++ mutex_lock(&kbdev->pm.lock); + -+ /* -+ * Optional callback for checking if GPU can be suspended when idle -+ * -+ * This callback will be called by the runtime power management core -+ * when the reference count goes to 0 to provide notification that the -+ * GPU now seems idle. -+ * -+ * If this callback finds that the GPU can't be powered off, or handles -+ * suspend by powering off directly or queueing up a power off, a -+ * non-zero value must be returned to prevent the runtime PM core from -+ * also triggering a suspend. -+ * -+ * Returning 0 will cause the runtime PM core to conduct a regular -+ * autosuspend. -+ * -+ * This callback is optional and if not provided regular autosuspend -+ * will be triggered. -+ * -+ * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use -+ * this feature. -+ * -+ * Return 0 if GPU can be suspended, positive value if it can not be -+ * suspeneded by runtime PM, else OS error code -+ */ -+ int (*power_runtime_idle_callback)(struct kbase_device *kbdev); -+}; ++ if (kbdev->pm.backend.gpu_poweroff_pending == 0) { ++ mutex_unlock(&kbdev->pm.lock); ++ return; ++ } + -+/** -+ * kbase_cpuprops_get_default_clock_speed - default for CPU_SPEED_FUNC -+ * @clock_speed - see kbase_cpu_clk_speed_func for details on the parameters -+ * -+ * Returns 0 on success, negative error code otherwise. -+ * -+ * Default implementation of CPU_SPEED_FUNC. This function sets clock_speed -+ * to 100, so will be an underestimate for any real system. -+ */ -+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed); ++ kbdev->pm.backend.gpu_poweroff_pending--; + -+/** -+ * kbase_cpu_clk_speed_func - Type of the function pointer for CPU_SPEED_FUNC -+ * @param clock_speed - pointer to store the current CPU clock speed in MHz -+ * -+ * Returns 0 on success, otherwise negative error code. -+ * -+ * This is mainly used to implement OpenCL's clGetDeviceInfo(). -+ */ -+typedef int (*kbase_cpu_clk_speed_func) (u32 *clock_speed); ++ if (kbdev->pm.backend.gpu_poweroff_pending > 0) { ++ mutex_unlock(&kbdev->pm.lock); ++ return; ++ } + -+/** -+ * kbase_gpu_clk_speed_func - Type of the function pointer for GPU_SPEED_FUNC -+ * @param clock_speed - pointer to store the current GPU clock speed in MHz -+ * -+ * Returns 0 on success, otherwise negative error code. -+ * When an error is returned the caller assumes maximum GPU speed stored in -+ * gpu_freq_khz_max. -+ * -+ * If the system timer is not available then this function is required -+ * for the OpenCL queue profiling to return correct timing information. -+ * -+ */ -+typedef int (*kbase_gpu_clk_speed_func) (u32 *clock_speed); ++ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0); + -+#ifdef CONFIG_OF -+struct kbase_platform_config { -+}; -+#else ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+/* -+ * @brief Specifies start and end of I/O memory region. -+ */ -+struct kbase_io_memory_region { -+ u64 start; -+ u64 end; -+}; ++ /* Only power off the GPU if a request is still pending */ ++ if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev)) ++ do_poweroff = true; + -+/* -+ * @brief Specifies I/O related resources like IRQs and memory region for I/O operations. -+ */ -+struct kbase_io_resources { -+ u32 job_irq_number; -+ u32 mmu_irq_number; -+ u32 gpu_irq_number; -+ struct kbase_io_memory_region io_memory_region; -+}; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+struct kbase_platform_config { -+ const struct kbase_io_resources *io_resources; -+}; ++ if (do_poweroff) { ++ kbdev->pm.backend.poweroff_timer_needed = false; ++ hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer); ++ kbdev->pm.backend.poweroff_timer_running = false; + -+#endif /* CONFIG_OF */ ++ /* Power off the GPU */ ++ kbase_pm_do_poweroff(kbdev, false); ++ } + -+/** -+ * @brief Gets the pointer to platform config. -+ * -+ * @return Pointer to the platform config -+ */ -+struct kbase_platform_config *kbase_get_platform_config(void); ++ mutex_unlock(&kbdev->pm.lock); ++} + -+/** -+ * kbasep_platform_device_init: - Platform specific call to initialize hardware -+ * @kbdev: kbase device pointer -+ * -+ * Function calls a platform defined routine if specified in the configuration -+ * attributes. The routine can initialize any hardware and context state that -+ * is required for the GPU block to function. -+ * -+ * Return: 0 if no errors have been found in the config. -+ * Negative error code otherwise. -+ */ -+int kbasep_platform_device_init(struct kbase_device *kbdev); ++int kbase_pm_policy_init(struct kbase_device *kbdev) ++{ ++ struct workqueue_struct *wq; + -+/** -+ * kbasep_platform_device_term - Platform specific call to terminate hardware -+ * @kbdev: Kbase device pointer -+ * -+ * Function calls a platform defined routine if specified in the configuration -+ * attributes. The routine can destroy any platform specific context state and -+ * shut down any hardware functionality that are outside of the Power Management -+ * callbacks. -+ * -+ */ -+void kbasep_platform_device_term(struct kbase_device *kbdev); ++ wq = alloc_workqueue("kbase_pm_do_poweroff", ++ WQ_HIGHPRI | WQ_UNBOUND, 1); ++ if (!wq) ++ return -ENOMEM; + ++ kbdev->pm.backend.gpu_poweroff_wq = wq; ++ INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work, ++ kbasep_pm_do_gpu_poweroff_wq); ++ hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer, ++ CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ kbdev->pm.backend.gpu_poweroff_timer.function = ++ kbasep_pm_do_gpu_poweroff_callback; ++ kbdev->pm.backend.pm_current_policy = policy_list[0]; ++ kbdev->pm.backend.pm_current_policy->init(kbdev); ++ kbdev->pm.gpu_poweroff_time = ++ HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS); ++ kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER; ++ kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU; + -+/** -+ * kbase_platform_early_init - Early initialisation of the platform code -+ * -+ * This function will be called when the module is loaded to perform any -+ * early initialisation required by the platform code. Such as reading -+ * platform specific device tree entries for the GPU. -+ * -+ * Return: 0 for success, any other fail causes module initialisation to fail -+ */ -+int kbase_platform_early_init(void); ++ return 0; ++} + -+#ifndef CONFIG_OF -+#ifdef CONFIG_MALI_PLATFORM_FAKE -+/** -+ * kbase_platform_fake_register - Register a platform device for the GPU -+ * -+ * This can be used to register a platform device on systems where device tree -+ * is not enabled and the platform initialisation code in the kernel doesn't -+ * create the GPU device. Where possible device tree should be used instead. -+ * -+ * Return: 0 for success, any other fail causes module initialisation to fail -+ */ -+int kbase_platform_fake_register(void); ++void kbase_pm_policy_term(struct kbase_device *kbdev) ++{ ++ kbdev->pm.backend.pm_current_policy->term(kbdev); ++ destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq); ++} + -+/** -+ * kbase_platform_fake_unregister - Unregister a fake platform device -+ * -+ * Unregister the platform device created with kbase_platform_fake_register() -+ */ -+void kbase_platform_fake_unregister(void); -+#endif -+#endif ++void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev) ++{ ++ unsigned long flags; + -+ /** @} *//* end group kbase_config */ -+ /** @} *//* end group base_kbase_api */ -+ /** @} *//* end group base_api */ ++ lockdep_assert_held(&kbdev->pm.lock); + -+#endif /* _KBASE_CONFIG_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h ++ kbdev->pm.backend.poweroff_timer_needed = false; ++ hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->pm.backend.poweroff_timer_running = false; ++ ++ /* If wq is already running but is held off by pm.lock, make sure it has ++ * no effect */ ++ kbdev->pm.backend.gpu_poweroff_pending = 0; ++ ++ kbdev->pm.backend.shader_poweroff_pending = 0; ++ kbdev->pm.backend.tiler_poweroff_pending = 0; ++ kbdev->pm.backend.shader_poweroff_pending_time = 0; ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} ++ ++void kbase_pm_update_active(struct kbase_device *kbdev) ++{ ++ struct kbase_pm_device_data *pm = &kbdev->pm; ++ struct kbase_pm_backend_data *backend = &pm->backend; ++ unsigned long flags; ++ bool active; ++ ++ lockdep_assert_held(&pm->lock); ++ ++ /* pm_current_policy will never be NULL while pm.lock is held */ ++ KBASE_DEBUG_ASSERT(backend->pm_current_policy); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ active = backend->pm_current_policy->get_core_active(kbdev); ++ ++ if (active) { ++ if (backend->gpu_poweroff_pending) { ++ /* Cancel any pending power off request */ ++ backend->gpu_poweroff_pending = 0; ++ ++ /* If a request was pending then the GPU was still ++ * powered, so no need to continue */ ++ if (!kbdev->poweroff_pending) { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, ++ flags); ++ return; ++ } ++ } ++ ++ if (!backend->poweroff_timer_running && !backend->gpu_powered && ++ (pm->poweroff_gpu_ticks || ++ pm->poweroff_shader_ticks)) { ++ backend->poweroff_timer_needed = true; ++ backend->poweroff_timer_running = true; ++ hrtimer_start(&backend->gpu_poweroff_timer, ++ pm->gpu_poweroff_time, ++ HRTIMER_MODE_REL); ++ } ++ ++ /* Power on the GPU and any cores requested by the policy */ ++ if (pm->backend.poweroff_wait_in_progress) { ++ pm->backend.poweron_required = true; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } else { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ kbase_pm_do_poweron(kbdev, false); ++ } ++ } else { ++ /* It is an error for the power policy to power off the GPU ++ * when there are contexts active */ ++ KBASE_DEBUG_ASSERT(pm->active_count == 0); ++ ++ if (backend->shader_poweroff_pending || ++ backend->tiler_poweroff_pending) { ++ backend->shader_poweroff_pending = 0; ++ backend->tiler_poweroff_pending = 0; ++ backend->shader_poweroff_pending_time = 0; ++ } ++ ++ /* Request power off */ ++ if (pm->backend.gpu_powered) { ++ if (pm->poweroff_gpu_ticks) { ++ backend->gpu_poweroff_pending = ++ pm->poweroff_gpu_ticks; ++ backend->poweroff_timer_needed = true; ++ if (!backend->poweroff_timer_running) { ++ /* Start timer if not running (eg if ++ * power policy has been changed from ++ * always_on to something else). This ++ * will ensure the GPU is actually ++ * powered off */ ++ backend->poweroff_timer_running ++ = true; ++ hrtimer_start( ++ &backend->gpu_poweroff_timer, ++ pm->gpu_poweroff_time, ++ HRTIMER_MODE_REL); ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, ++ flags); ++ } else { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, ++ flags); ++ ++ /* Power off the GPU immediately */ ++ kbase_pm_do_poweroff(kbdev, false); ++ } ++ } else { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } ++ } ++} ++ ++void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) ++{ ++ u64 desired_bitmap; ++ u64 desired_tiler_bitmap; ++ bool cores_are_available; ++ bool do_poweroff = false; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ if (kbdev->pm.backend.pm_current_policy == NULL) ++ return; ++ if (kbdev->pm.backend.poweroff_wait_in_progress) ++ return; ++ ++ if (kbdev->protected_mode_transition && !kbdev->shader_needed_bitmap && ++ !kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt ++ && !kbdev->tiler_inuse_cnt) { ++ /* We are trying to change in/out of protected mode - force all ++ * cores off so that the L2 powers down */ ++ desired_bitmap = 0; ++ desired_tiler_bitmap = 0; ++ } else { ++ desired_bitmap = ++ kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev); ++ desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev); ++ ++ if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0) ++ desired_tiler_bitmap = 1; ++ else ++ desired_tiler_bitmap = 0; ++ ++ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) { ++ /* Unless XAFFINITY is supported, enable core 0 if tiler ++ * required, regardless of core availability */ ++ if (kbdev->tiler_needed_cnt > 0 || ++ kbdev->tiler_inuse_cnt > 0) ++ desired_bitmap |= 1; ++ } ++ } ++ ++ if (kbdev->pm.backend.desired_shader_state != desired_bitmap) ++ KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u, ++ (u32)desired_bitmap); ++ /* Are any cores being powered on? */ ++ if (~kbdev->pm.backend.desired_shader_state & desired_bitmap || ++ ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap || ++ kbdev->pm.backend.ca_in_transition) { ++ /* Check if we are powering off any cores before updating shader ++ * state */ ++ if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap || ++ kbdev->pm.backend.desired_tiler_state & ++ ~desired_tiler_bitmap) { ++ /* Start timer to power off cores */ ++ kbdev->pm.backend.shader_poweroff_pending |= ++ (kbdev->pm.backend.desired_shader_state & ++ ~desired_bitmap); ++ kbdev->pm.backend.tiler_poweroff_pending |= ++ (kbdev->pm.backend.desired_tiler_state & ++ ~desired_tiler_bitmap); ++ ++ if (kbdev->pm.poweroff_shader_ticks && ++ !kbdev->protected_mode_transition) ++ kbdev->pm.backend.shader_poweroff_pending_time = ++ kbdev->pm.poweroff_shader_ticks; ++ else ++ do_poweroff = true; ++ } ++ ++ kbdev->pm.backend.desired_shader_state = desired_bitmap; ++ kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap; ++ ++ /* If any cores are being powered on, transition immediately */ ++ cores_are_available = kbase_pm_check_transitions_nolock(kbdev); ++ } else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap || ++ kbdev->pm.backend.desired_tiler_state & ++ ~desired_tiler_bitmap) { ++ /* Start timer to power off cores */ ++ kbdev->pm.backend.shader_poweroff_pending |= ++ (kbdev->pm.backend.desired_shader_state & ++ ~desired_bitmap); ++ kbdev->pm.backend.tiler_poweroff_pending |= ++ (kbdev->pm.backend.desired_tiler_state & ++ ~desired_tiler_bitmap); ++ if (kbdev->pm.poweroff_shader_ticks && ++ !kbdev->protected_mode_transition) ++ kbdev->pm.backend.shader_poweroff_pending_time = ++ kbdev->pm.poweroff_shader_ticks; ++ else ++ kbasep_pm_do_poweroff_cores(kbdev); ++ } else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 && ++ desired_tiler_bitmap != 0 && ++ kbdev->pm.backend.poweroff_timer_needed) { ++ /* If power policy is keeping cores on despite there being no ++ * active contexts then disable poweroff timer as it isn't ++ * required. ++ * Only reset poweroff_timer_needed if we're not in the middle ++ * of the power off callback */ ++ kbdev->pm.backend.poweroff_timer_needed = false; ++ } ++ ++ /* Ensure timer does not power off wanted cores and make sure to power ++ * off unwanted cores */ ++ if (kbdev->pm.backend.shader_poweroff_pending || ++ kbdev->pm.backend.tiler_poweroff_pending) { ++ kbdev->pm.backend.shader_poweroff_pending &= ++ ~(kbdev->pm.backend.desired_shader_state & ++ desired_bitmap); ++ kbdev->pm.backend.tiler_poweroff_pending &= ++ ~(kbdev->pm.backend.desired_tiler_state & ++ desired_tiler_bitmap); ++ ++ if (!kbdev->pm.backend.shader_poweroff_pending && ++ !kbdev->pm.backend.tiler_poweroff_pending) ++ kbdev->pm.backend.shader_poweroff_pending_time = 0; ++ } ++ ++ /* Shader poweroff is deferred to the end of the function, to eliminate ++ * issues caused by the core availability policy recursing into this ++ * function */ ++ if (do_poweroff) ++ kbasep_pm_do_poweroff_cores(kbdev); ++ ++ /* Don't need 'cores_are_available', because we don't return anything */ ++ CSTD_UNUSED(cores_are_available); ++} ++ ++void kbase_pm_update_cores_state(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ kbase_pm_update_cores_state_nolock(kbdev); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} ++ ++int kbase_pm_list_policies(const struct kbase_pm_policy * const **list) ++{ ++ if (!list) ++ return POLICY_COUNT; ++ ++ *list = policy_list; ++ ++ return POLICY_COUNT; ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_list_policies); ++ ++const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ ++ return kbdev->pm.backend.pm_current_policy; ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_get_policy); ++ ++void kbase_pm_set_policy(struct kbase_device *kbdev, ++ const struct kbase_pm_policy *new_policy) ++{ ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ const struct kbase_pm_policy *old_policy; ++ unsigned long flags; ++ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(new_policy != NULL); ++ ++ KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id); ++ ++ /* During a policy change we pretend the GPU is active */ ++ /* A suspend won't happen here, because we're in a syscall from a ++ * userspace thread */ ++ kbase_pm_context_active(kbdev); ++ ++ mutex_lock(&js_devdata->runpool_mutex); ++ mutex_lock(&kbdev->pm.lock); ++ ++ /* Remove the policy to prevent IRQ handlers from working on it */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ old_policy = kbdev->pm.backend.pm_current_policy; ++ kbdev->pm.backend.pm_current_policy = NULL; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u, ++ old_policy->id); ++ if (old_policy->term) ++ old_policy->term(kbdev); ++ ++ KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u, ++ new_policy->id); ++ if (new_policy->init) ++ new_policy->init(kbdev); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->pm.backend.pm_current_policy = new_policy; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ /* If any core power state changes were previously attempted, but ++ * couldn't be made because the policy was changing (current_policy was ++ * NULL), then re-try them here. */ ++ kbase_pm_update_active(kbdev); ++ kbase_pm_update_cores_state(kbdev); ++ ++ mutex_unlock(&kbdev->pm.lock); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ ++ /* Now the policy change is finished, we release our fake context active ++ * reference */ ++ kbase_pm_context_idle(kbdev); ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_set_policy); ++ ++/* Check whether a state change has finished, and trace it as completed */ ++static void ++kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev) ++{ ++ if ((kbdev->shader_available_bitmap & ++ kbdev->pm.backend.desired_shader_state) ++ == kbdev->pm.backend.desired_shader_state && ++ (kbdev->tiler_available_bitmap & ++ kbdev->pm.backend.desired_tiler_state) ++ == kbdev->pm.backend.desired_tiler_state) ++ kbase_timeline_pm_check_handle_event(kbdev, ++ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); ++} ++ ++void kbase_pm_request_cores(struct kbase_device *kbdev, ++ bool tiler_required, u64 shader_cores) ++{ ++ u64 cores; ++ ++ kbase_pm_change_state change_gpu_state = 0u; ++ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ cores = shader_cores; ++ while (cores) { ++ int bitnum = fls64(cores) - 1; ++ u64 bit = 1ULL << bitnum; ++ ++ /* It should be almost impossible for this to overflow. It would ++ * require 2^32 atoms to request a particular core, which would ++ * require 2^24 contexts to submit. This would require an amount ++ * of memory that is impossible on a 32-bit system and extremely ++ * unlikely on a 64-bit system. */ ++ int cnt = ++kbdev->shader_needed_cnt[bitnum]; ++ ++ if (1 == cnt) { ++ kbdev->shader_needed_bitmap |= bit; ++ change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; ++ } ++ ++ cores &= ~bit; ++ } ++ ++ if (tiler_required) { ++ int cnt = ++kbdev->tiler_needed_cnt; ++ ++ if (1 == cnt) ++ change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER; ++ ++ KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0); ++ } ++ ++ if (change_gpu_state) { ++ KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL, ++ NULL, 0u, (u32) kbdev->shader_needed_bitmap); ++ ++ kbase_timeline_pm_cores_func(kbdev, ++ KBASE_PM_FUNC_ID_REQUEST_CORES_START, ++ change_gpu_state); ++ kbase_pm_update_cores_state_nolock(kbdev); ++ kbase_timeline_pm_cores_func(kbdev, ++ KBASE_PM_FUNC_ID_REQUEST_CORES_END, ++ change_gpu_state); ++ } ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_request_cores); ++ ++void kbase_pm_unrequest_cores(struct kbase_device *kbdev, ++ bool tiler_required, u64 shader_cores) ++{ ++ kbase_pm_change_state change_gpu_state = 0u; ++ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ while (shader_cores) { ++ int bitnum = fls64(shader_cores) - 1; ++ u64 bit = 1ULL << bitnum; ++ int cnt; ++ ++ KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0); ++ ++ cnt = --kbdev->shader_needed_cnt[bitnum]; ++ ++ if (0 == cnt) { ++ kbdev->shader_needed_bitmap &= ~bit; ++ ++ change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; ++ } ++ ++ shader_cores &= ~bit; ++ } ++ ++ if (tiler_required) { ++ int cnt; ++ ++ KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0); ++ ++ cnt = --kbdev->tiler_needed_cnt; ++ ++ if (0 == cnt) ++ change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER; ++ } ++ ++ if (change_gpu_state) { ++ KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL, ++ NULL, 0u, (u32) kbdev->shader_needed_bitmap); ++ ++ kbase_pm_update_cores_state_nolock(kbdev); ++ ++ /* Trace that any state change effectively completes immediately ++ * - no-one will wait on the state change */ ++ kbase_pm_trace_check_and_finish_state_change(kbdev); ++ } ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores); ++ ++enum kbase_pm_cores_ready ++kbase_pm_register_inuse_cores(struct kbase_device *kbdev, ++ bool tiler_required, u64 shader_cores) ++{ ++ u64 prev_shader_needed; /* Just for tracing */ ++ u64 prev_shader_inuse; /* Just for tracing */ ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ prev_shader_needed = kbdev->shader_needed_bitmap; ++ prev_shader_inuse = kbdev->shader_inuse_bitmap; ++ ++ /* If desired_shader_state does not contain the requested cores, then ++ * power management is not attempting to powering those cores (most ++ * likely due to core availability policy) and a new job affinity must ++ * be chosen */ ++ if ((kbdev->pm.backend.desired_shader_state & shader_cores) != ++ shader_cores) { ++ return (kbdev->pm.backend.poweroff_wait_in_progress || ++ kbdev->pm.backend.pm_current_policy == NULL) ? ++ KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY; ++ } ++ ++ if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores || ++ (tiler_required && !kbdev->tiler_available_bitmap)) { ++ /* Trace ongoing core transition */ ++ kbase_timeline_pm_l2_transition_start(kbdev); ++ return KBASE_CORES_NOT_READY; ++ } ++ ++ /* If we started to trace a state change, then trace it has being ++ * finished by now, at the very latest */ ++ kbase_pm_trace_check_and_finish_state_change(kbdev); ++ /* Trace core transition done */ ++ kbase_timeline_pm_l2_transition_done(kbdev); ++ ++ while (shader_cores) { ++ int bitnum = fls64(shader_cores) - 1; ++ u64 bit = 1ULL << bitnum; ++ int cnt; ++ ++ KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0); ++ ++ cnt = --kbdev->shader_needed_cnt[bitnum]; ++ ++ if (0 == cnt) ++ kbdev->shader_needed_bitmap &= ~bit; ++ ++ /* shader_inuse_cnt should not overflow because there can only ++ * be a very limited number of jobs on the h/w at one time */ ++ ++ kbdev->shader_inuse_cnt[bitnum]++; ++ kbdev->shader_inuse_bitmap |= bit; ++ ++ shader_cores &= ~bit; ++ } ++ ++ if (tiler_required) { ++ KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0); ++ ++ --kbdev->tiler_needed_cnt; ++ ++ kbdev->tiler_inuse_cnt++; ++ ++ KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0); ++ } ++ ++ if (prev_shader_needed != kbdev->shader_needed_bitmap) ++ KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL, ++ NULL, 0u, (u32) kbdev->shader_needed_bitmap); ++ ++ if (prev_shader_inuse != kbdev->shader_inuse_bitmap) ++ KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL, ++ NULL, 0u, (u32) kbdev->shader_inuse_bitmap); ++ ++ return KBASE_CORES_READY; ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores); ++ ++void kbase_pm_release_cores(struct kbase_device *kbdev, ++ bool tiler_required, u64 shader_cores) ++{ ++ kbase_pm_change_state change_gpu_state = 0u; ++ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ while (shader_cores) { ++ int bitnum = fls64(shader_cores) - 1; ++ u64 bit = 1ULL << bitnum; ++ int cnt; ++ ++ KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0); ++ ++ cnt = --kbdev->shader_inuse_cnt[bitnum]; ++ ++ if (0 == cnt) { ++ kbdev->shader_inuse_bitmap &= ~bit; ++ change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; ++ } ++ ++ shader_cores &= ~bit; ++ } ++ ++ if (tiler_required) { ++ int cnt; ++ ++ KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0); ++ ++ cnt = --kbdev->tiler_inuse_cnt; ++ ++ if (0 == cnt) ++ change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER; ++ } ++ ++ if (change_gpu_state) { ++ KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL, ++ NULL, 0u, (u32) kbdev->shader_inuse_bitmap); ++ ++ kbase_timeline_pm_cores_func(kbdev, ++ KBASE_PM_FUNC_ID_RELEASE_CORES_START, ++ change_gpu_state); ++ kbase_pm_update_cores_state_nolock(kbdev); ++ kbase_timeline_pm_cores_func(kbdev, ++ KBASE_PM_FUNC_ID_RELEASE_CORES_END, ++ change_gpu_state); ++ ++ /* Trace that any state change completed immediately */ ++ kbase_pm_trace_check_and_finish_state_change(kbdev); ++ } ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_release_cores); ++ ++void kbase_pm_request_cores_sync(struct kbase_device *kbdev, ++ bool tiler_required, ++ u64 shader_cores) ++{ ++ unsigned long flags; ++ ++ kbase_pm_wait_for_poweroff_complete(kbdev); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_pm_request_cores(kbdev, tiler_required, shader_cores); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ kbase_pm_check_transitions_sync(kbdev); ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync); ++ ++void kbase_pm_request_l2_caches(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ u32 prior_l2_users_count; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ prior_l2_users_count = kbdev->l2_users_count++; ++ ++ KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0); ++ ++ /* if the GPU is reset while the l2 is on, l2 will be off but ++ * prior_l2_users_count will be > 0. l2_available_bitmap will have been ++ * set to 0 though by kbase_pm_init_hw */ ++ if (!prior_l2_users_count || !kbdev->l2_available_bitmap) ++ kbase_pm_check_transitions_nolock(kbdev); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ wait_event(kbdev->pm.backend.l2_powered_wait, ++ kbdev->pm.backend.l2_powered == 1); ++ ++ /* Trace that any state change completed immediately */ ++ kbase_pm_trace_check_and_finish_state_change(kbdev); ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches); ++ ++void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ kbdev->l2_users_count++; ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on); ++ ++void kbase_pm_release_l2_caches(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0); ++ ++ --kbdev->l2_users_count; ++ ++ if (!kbdev->l2_users_count) { ++ kbase_pm_check_transitions_nolock(kbdev); ++ /* Trace that any state change completed immediately */ ++ kbase_pm_trace_check_and_finish_state_change(kbdev); ++ } ++} ++ ++KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches); +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h new file mode 100644 -index 000000000..1cf44b350 +index 000000000..611a90e66 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h @@ -0,0 +1,227 @@ +/* + * -+ * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -353606,225 +355816,334 @@ index 000000000..1cf44b350 + + + ++/* ++ * Power policy API definitions ++ */ ++ ++#ifndef _KBASE_PM_POLICY_H_ ++#define _KBASE_PM_POLICY_H_ ++ +/** -+ * @file mali_kbase_config_defaults.h ++ * kbase_pm_policy_init - Initialize power policy framework + * -+ * Default values for configuration settings ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Must be called before calling any other policy function + * ++ * Return: 0 if the power policy framework was successfully ++ * initialized, -errno otherwise. + */ -+ -+#ifndef _KBASE_CONFIG_DEFAULTS_H_ -+#define _KBASE_CONFIG_DEFAULTS_H_ -+ -+/* Include mandatory definitions per platform */ -+#include ++int kbase_pm_policy_init(struct kbase_device *kbdev); + +/** -+* Boolean indicating whether the driver is configured to be secure at -+* a potential loss of performance. -+* -+* This currently affects only r0p0-15dev0 HW and earlier. -+* -+* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and -+* performance: -+* -+* - When this is set to true, the driver remains fully secure, -+* but potentially loses performance compared with setting this to -+* false. -+* - When set to false, the driver is open to certain security -+* attacks. -+* -+* From r0p0-00rel0 and onwards, there is no security loss by setting -+* this to false, and no performance loss by setting it to -+* true. -+*/ -+#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false ++ * kbase_pm_policy_term - Terminate power policy framework ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ */ ++void kbase_pm_policy_term(struct kbase_device *kbdev); + -+enum { -+ /** -+ * Use unrestricted Address ID width on the AXI bus. -+ */ -+ KBASE_AID_32 = 0x0, ++/** ++ * kbase_pm_update_active - Update the active power state of the GPU ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Calls into the current power policy ++ */ ++void kbase_pm_update_active(struct kbase_device *kbdev); + -+ /** -+ * Restrict GPU to a half of maximum Address ID count. -+ * This will reduce performance, but reduce bus load due to GPU. -+ */ -+ KBASE_AID_16 = 0x3, ++/** ++ * kbase_pm_update_cores - Update the desired core state of the GPU ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Calls into the current power policy ++ */ ++void kbase_pm_update_cores(struct kbase_device *kbdev); + -+ /** -+ * Restrict GPU to a quarter of maximum Address ID count. -+ * This will reduce performance, but reduce bus load due to GPU. -+ */ -+ KBASE_AID_8 = 0x2, + -+ /** -+ * Restrict GPU to an eighth of maximum Address ID count. -+ * This will reduce performance, but reduce bus load due to GPU. -+ */ -+ KBASE_AID_4 = 0x1 ++enum kbase_pm_cores_ready { ++ KBASE_CORES_NOT_READY = 0, ++ KBASE_NEW_AFFINITY = 1, ++ KBASE_CORES_READY = 2 +}; + ++ +/** -+ * Default setting for read Address ID limiting on AXI bus. ++ * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores() + * -+ * Attached value: u32 register value -+ * KBASE_AID_32 - use the full 32 IDs (5 ID bits) -+ * KBASE_AID_16 - use 16 IDs (4 ID bits) -+ * KBASE_AID_8 - use 8 IDs (3 ID bits) -+ * KBASE_AID_4 - use 4 IDs (2 ID bits) -+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation -+ * may limit to a lower value. ++ * @kbdev: The kbase device structure for the device ++ * @tiler_required: true if the tiler is required, false otherwise ++ * @shader_cores: A bitmask of shader cores which are necessary for the job ++ * ++ * When this function returns, the @shader_cores will be in the READY state. ++ * ++ * This is safe variant of kbase_pm_check_transitions_sync(): it handles the ++ * work of ensuring the requested cores will remain powered until a matching ++ * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate) ++ * is made. + */ -+#define DEFAULT_ARID_LIMIT KBASE_AID_32 ++void kbase_pm_request_cores_sync(struct kbase_device *kbdev, ++ bool tiler_required, u64 shader_cores); + +/** -+ * Default setting for write Address ID limiting on AXI. ++ * kbase_pm_request_cores - Mark one or more cores as being required ++ * for jobs to be submitted + * -+ * Attached value: u32 register value -+ * KBASE_AID_32 - use the full 32 IDs (5 ID bits) -+ * KBASE_AID_16 - use 16 IDs (4 ID bits) -+ * KBASE_AID_8 - use 8 IDs (3 ID bits) -+ * KBASE_AID_4 - use 4 IDs (2 ID bits) -+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation -+ * may limit to a lower value. ++ * @kbdev: The kbase device structure for the device ++ * @tiler_required: true if the tiler is required, false otherwise ++ * @shader_cores: A bitmask of shader cores which are necessary for the job ++ * ++ * This function is called by the job scheduler to mark one or more cores as ++ * being required to submit jobs that are ready to run. ++ * ++ * The cores requested are reference counted and a subsequent call to ++ * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be ++ * made to dereference the cores as being 'needed'. ++ * ++ * The active power policy will meet or exceed the requirements of the ++ * requested cores in the system. Any core transitions needed will be begun ++ * immediately, but they might not complete/the cores might not be available ++ * until a Power Management IRQ. ++ * ++ * Return: 0 if the cores were successfully requested, or -errno otherwise. + */ -+#define DEFAULT_AWID_LIMIT KBASE_AID_32 ++void kbase_pm_request_cores(struct kbase_device *kbdev, ++ bool tiler_required, u64 shader_cores); + +/** -+ * Default UMP device mapping. A UMP_DEVICE__SHIFT value which -+ * defines which UMP device this GPU should be mapped to. ++ * kbase_pm_unrequest_cores - Unmark one or more cores as being required for ++ * jobs to be submitted. ++ * ++ * @kbdev: The kbase device structure for the device ++ * @tiler_required: true if the tiler is required, false otherwise ++ * @shader_cores: A bitmask of shader cores (as given to ++ * kbase_pm_request_cores() ) ++ * ++ * This function undoes the effect of kbase_pm_request_cores(). It should be ++ * used when a job is not going to be submitted to the hardware (e.g. the job is ++ * cancelled before it is enqueued). ++ * ++ * The active power policy will meet or exceed the requirements of the ++ * requested cores in the system. Any core transitions needed will be begun ++ * immediately, but they might not complete until a Power Management IRQ. ++ * ++ * The policy may use this as an indication that it can power down cores. + */ -+#define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT ++void kbase_pm_unrequest_cores(struct kbase_device *kbdev, ++ bool tiler_required, u64 shader_cores); + -+/* -+ * Default period for DVFS sampling ++/** ++ * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job ++ * ++ * @kbdev: The kbase device structure for the device ++ * @tiler_required: true if the tiler is required, false otherwise ++ * @shader_cores: A bitmask of shader cores (as given to ++ * kbase_pm_request_cores() ) ++ * ++ * This function should be called after kbase_pm_request_cores() when the job ++ * is about to be submitted to the hardware. It will check that the necessary ++ * cores are available and if so update the 'needed' and 'inuse' bitmasks to ++ * reflect that the job is now committed to being run. ++ * ++ * If the necessary cores are not currently available then the function will ++ * return %KBASE_CORES_NOT_READY and have no effect. ++ * ++ * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready, ++ * ++ * %KBASE_NEW_AFFINITY if the affinity requested is not allowed, ++ * ++ * %KBASE_CORES_READY if the cores requested are already available + */ -+// #define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */ -+#define DEFAULT_PM_DVFS_PERIOD 20 /* 20 ms */ ++enum kbase_pm_cores_ready kbase_pm_register_inuse_cores( ++ struct kbase_device *kbdev, ++ bool tiler_required, ++ u64 shader_cores); + -+/* -+ * Power Management poweroff tick granuality. This is in nanoseconds to -+ * allow HR timer support. ++/** ++ * kbase_pm_release_cores - Release cores after a job has run + * -+ * On each scheduling tick, the power manager core may decide to: -+ * -# Power off one or more shader cores -+ * -# Power off the entire GPU ++ * @kbdev: The kbase device structure for the device ++ * @tiler_required: true if the tiler is required, false otherwise ++ * @shader_cores: A bitmask of shader cores (as given to ++ * kbase_pm_register_inuse_cores() ) ++ * ++ * This function should be called when a job has finished running on the ++ * hardware. A call to kbase_pm_register_inuse_cores() must have previously ++ * occurred. The reference counts of the specified cores will be decremented ++ * which may cause the bitmask of 'inuse' cores to be reduced. The power policy ++ * may then turn off any cores which are no longer 'inuse'. + */ -+#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */ ++void kbase_pm_release_cores(struct kbase_device *kbdev, ++ bool tiler_required, u64 shader_cores); + -+/* -+ * Power Manager number of ticks before shader cores are powered off ++/** ++ * kbase_pm_request_l2_caches - Request l2 caches ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Request the use of l2 caches for all core groups, power up, wait and prevent ++ * the power manager from powering down the l2 caches. ++ * ++ * This tells the power management that the caches should be powered up, and ++ * they should remain powered, irrespective of the usage of shader cores. This ++ * does not return until the l2 caches are powered up. ++ * ++ * The caller must call kbase_pm_release_l2_caches() when they are finished ++ * to allow normal power management of the l2 caches to resume. ++ * ++ * This should only be used when power management is active. + */ -+#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */ ++void kbase_pm_request_l2_caches(struct kbase_device *kbdev); + -+/* -+ * Power Manager number of ticks before GPU is powered off ++/** ++ * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Increment the count of l2 users but do not attempt to power on the l2 ++ * ++ * It is the callers responsibility to ensure that the l2 is already powered up ++ * and to eventually call kbase_pm_release_l2_caches() + */ -+#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */ ++void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev); + -+/* -+ * Default scheduling tick granuality ++/** ++ * kbase_pm_request_l2_caches - Release l2 caches ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Release the use of l2 caches for all core groups and allow the power manager ++ * to power them down when necessary. ++ * ++ * This tells the power management that the caches can be powered down if ++ * necessary, with respect to the usage of shader cores. ++ * ++ * The caller must have called kbase_pm_request_l2_caches() prior to a call ++ * to this. ++ * ++ * This should only be used when power management is active. + */ -+#define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */ ++void kbase_pm_release_l2_caches(struct kbase_device *kbdev); + ++#endif /* _KBASE_PM_POLICY_H_ */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c +new file mode 100644 +index 000000000..d08c628dd +--- /dev/null ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c +@@ -0,0 +1,103 @@ +/* -+ * Default minimum number of scheduling ticks before jobs are soft-stopped. + * -+ * This defines the time-slice for a job (which may be different from that of a -+ * context) ++ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * + */ -+#define DEFAULT_JS_SOFT_STOP_TICKS (1) /* 100ms-200ms */ + -+/* -+ * Default minimum number of scheduling ticks before CL jobs are soft-stopped. -+ */ -+#define DEFAULT_JS_SOFT_STOP_TICKS_CL (1) /* 100ms-200ms */ + -+/* -+ * Default minimum number of scheduling ticks before jobs are hard-stopped -+ */ -+#define DEFAULT_JS_HARD_STOP_TICKS_SS (50) /* 5s */ -+#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408 (300) /* 30s */ + -+/* -+ * Default minimum number of scheduling ticks before CL jobs are hard-stopped. -+ */ -+#define DEFAULT_JS_HARD_STOP_TICKS_CL (50) /* 5s */ ++#include ++#include ++#include ++#include + -+/* -+ * Default minimum number of scheduling ticks before jobs are hard-stopped -+ * during dumping -+ */ -+#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */ ++void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, ++ u64 *system_time, struct timespec64 *ts) ++{ ++ u32 hi1, hi2; + -+/* -+ * Default timeout for some software jobs, after which the software event wait -+ * jobs will be cancelled. -+ */ -+#define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */ ++ kbase_pm_request_gpu_cycle_counter(kbdev); + -+/* -+ * Default minimum number of scheduling ticks before the GPU is reset to clear a -+ * "stuck" job -+ */ -+#define DEFAULT_JS_RESET_TICKS_SS (55) /* 5.5s */ -+#define DEFAULT_JS_RESET_TICKS_SS_8408 (450) /* 45s */ ++ /* Read hi, lo, hi to ensure that overflow from lo to hi is handled ++ * correctly */ ++ do { ++ hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI), ++ NULL); ++ *cycle_counter = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL); ++ hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI), ++ NULL); ++ *cycle_counter |= (((u64) hi1) << 32); ++ } while (hi1 != hi2); + -+/* -+ * Default minimum number of scheduling ticks before the GPU is reset to clear a -+ * "stuck" CL job. -+ */ -+#define DEFAULT_JS_RESET_TICKS_CL (55) /* 5.5s */ ++ /* Read hi, lo, hi to ensure that overflow from lo to hi is handled ++ * correctly */ ++ do { ++ hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI), ++ NULL); ++ *system_time = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(TIMESTAMP_LO), NULL); ++ hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI), ++ NULL); ++ *system_time |= (((u64) hi1) << 32); ++ } while (hi1 != hi2); + -+/* -+ * Default minimum number of scheduling ticks before the GPU is reset to clear a -+ * "stuck" job during dumping. -+ */ -+#define DEFAULT_JS_RESET_TICKS_DUMPING (15020) /* 1502s */ ++ /* Record the CPU's idea of current time */ ++ ktime_get_raw_ts64(ts); + -+/* -+ * Default number of milliseconds given for other jobs on the GPU to be -+ * soft-stopped when the GPU needs to be reset. -+ */ -+#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */ ++ kbase_pm_release_gpu_cycle_counter(kbdev); ++} + -+/* -+ * Default timeslice that a context is scheduled in for, in nanoseconds. -+ * -+ * When a context has used up this amount of time across its jobs, it is -+ * scheduled out to let another run. ++/** ++ * kbase_wait_write_flush - Wait for GPU write flush ++ * @kctx: Context pointer + * -+ * @note the resolution is nanoseconds (ns) here, because that's the format -+ * often used by the OS. -+ */ -+#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */ -+ -+/* -+ * Perform GPU power down using only platform specific code, skipping DDK power -+ * management. ++ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush ++ * its write buffer. + * -+ * If this is non-zero then kbase will avoid powering down shader cores, the -+ * tiler, and the L2 cache, instead just powering down the entire GPU through -+ * platform specific code. This may be required for certain platform -+ * integrations. ++ * Only in use for BASE_HW_ISSUE_6367 + * -+ * Note that as this prevents kbase from powering down shader cores, this limits -+ * the available power policies to coarse_demand and always_on. ++ * Note : If GPU resets occur then the counters are reset to zero, the delay may ++ * not be as expected. + */ -+#define PLATFORM_POWER_DOWN_ONLY (1) ++#ifndef CONFIG_MALI_NO_MALI ++void kbase_wait_write_flush(struct kbase_context *kctx) ++{ ++ u32 base_count = 0; + -+#endif /* _KBASE_CONFIG_DEFAULTS_H_ */ ++ /* ++ * The caller must be holding onto the kctx or the call is from ++ * userspace. ++ */ ++ kbase_pm_context_active(kctx->kbdev); ++ kbase_pm_request_gpu_cycle_counter(kctx->kbdev); + -diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c ++ while (true) { ++ u32 new_count; ++ ++ new_count = kbase_reg_read(kctx->kbdev, ++ GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL); ++ /* First time around, just store the count. */ ++ if (base_count == 0) { ++ base_count = new_count; ++ continue; ++ } ++ ++ /* No need to handle wrapping, unsigned maths works for this. */ ++ if ((new_count - base_count) > 1000) ++ break; ++ } ++ ++ kbase_pm_release_gpu_cycle_counter(kctx->kbdev); ++ kbase_pm_context_idle(kctx->kbdev); ++} ++#endif /* CONFIG_MALI_NO_MALI */ +diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h new file mode 100644 -index 000000000..6338a7e22 +index 000000000..433aa4b9c --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_context.c -@@ -0,0 +1,342 @@ ++++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h +@@ -0,0 +1,52 @@ +/* + * -+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -353839,340 +356158,300 @@ index 000000000..6338a7e22 + + + ++#ifndef _KBASE_BACKEND_TIME_H_ ++#define _KBASE_BACKEND_TIME_H_ + -+ -+/* -+ * Base kernel context APIs ++/** ++ * kbase_backend_get_gpu_time() - Get current GPU time ++ * @kbdev: Device pointer ++ * @cycle_counter: Pointer to u64 to store cycle counter in ++ * @system_time: Pointer to u64 to store system time in ++ * @ts: Pointer to struct timespec64 to store current monotonic ++ * time in + */ -+ -+#include -+#include -+#include -+#include -+#include ++void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, ++ u64 *system_time, struct timespec64 *ts); + +/** -+ * kbase_create_context() - Create a kernel base context. -+ * @kbdev: Kbase device -+ * @is_compat: Force creation of a 32-bit context ++ * kbase_wait_write_flush() - Wait for GPU write flush ++ * @kctx: Context pointer + * -+ * Allocate and init a kernel base context. ++ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush ++ * its write buffer. + * -+ * Return: new kbase context ++ * If GPU resets occur then the counters are reset to zero, the delay may not be ++ * as expected. ++ * ++ * This function is only in use for BASE_HW_ISSUE_6367 + */ -+struct kbase_context * -+kbase_create_context(struct kbase_device *kbdev, bool is_compat) ++#ifdef CONFIG_MALI_NO_MALI ++static inline void kbase_wait_write_flush(struct kbase_context *kctx) +{ -+ struct kbase_context *kctx; -+ int err; ++} ++#else ++void kbase_wait_write_flush(struct kbase_context *kctx); ++#endif + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++#endif /* _KBASE_BACKEND_TIME_H_ */ +diff --git a/drivers/gpu/arm/midgard/docs/Doxyfile b/drivers/gpu/arm/midgard/docs/Doxyfile +new file mode 100755 +index 000000000..35ff2f1ce +--- /dev/null ++++ b/drivers/gpu/arm/midgard/docs/Doxyfile +@@ -0,0 +1,126 @@ ++# ++# (C) COPYRIGHT 2011-2013, 2015 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+ /* zero-inited as lot of code assume it's zero'ed out on create */ -+ kctx = vzalloc(sizeof(*kctx)); + -+ if (!kctx) -+ goto out; + -+ /* creating a context is considered a disjoint event */ -+ kbase_disjoint_event(kbdev); ++############################################################################## + -+ kctx->kbdev = kbdev; -+ kctx->as_nr = KBASEP_AS_NR_INVALID; -+ atomic_set(&kctx->refcount, 0); -+ if (is_compat) -+ kbase_ctx_flag_set(kctx, KCTX_COMPAT); -+#ifdef CONFIG_MALI_TRACE_TIMELINE -+ kctx->timeline.owner_tgid = task_tgid_nr(current); -+#endif -+ atomic_set(&kctx->setup_complete, 0); -+ atomic_set(&kctx->setup_in_progress, 0); -+ spin_lock_init(&kctx->mm_update_lock); -+ kctx->process_mm = NULL; -+ atomic_set(&kctx->nonmapped_pages, 0); -+ kctx->slots_pullable = 0; -+ kctx->tgid = current->tgid; -+ kctx->pid = current->pid; ++# This file contains per-module Doxygen configuration. Please do not add ++# extra settings to this file without consulting all stakeholders, as they ++# may cause override project-wide settings. ++# ++# Additionally, when defining aliases, macros, sections etc, use the module ++# name as a prefix e.g. gles_my_alias. + -+ err = kbase_mem_pool_init(&kctx->mem_pool, -+ kbdev->mem_pool_max_size_default, -+ kctx->kbdev, &kbdev->mem_pool); -+ if (err) -+ goto free_kctx; ++############################################################################## + -+ err = kbase_mem_evictable_init(kctx); -+ if (err) -+ goto free_pool; ++@INCLUDE = ../../bldsys/Doxyfile_common + -+ atomic_set(&kctx->used_pages, 0); ++# The INPUT tag can be used to specify the files and/or directories that contain ++# documented source files. You may enter file names like "myfile.cpp" or ++# directories like "/usr/src/myproject". Separate the files or directories ++# with spaces. + -+ err = kbase_jd_init(kctx); -+ if (err) -+ goto deinit_evictable; ++INPUT += ../../kernel/drivers/gpu/arm/midgard/ + -+ err = kbasep_js_kctx_init(kctx); -+ if (err) -+ goto free_jd; /* safe to call kbasep_js_kctx_term in this case */ ++############################################################################## ++# Everything below here is optional, and in most cases not required ++############################################################################## + -+ err = kbase_event_init(kctx); -+ if (err) -+ goto free_jd; ++# This tag can be used to specify a number of aliases that acts ++# as commands in the documentation. An alias has the form "name=value". ++# For example adding "sideeffect=\par Side Effects:\n" will allow you to ++# put the command \sideeffect (or @sideeffect) in the documentation, which ++# will result in a user-defined paragraph with heading "Side Effects:". ++# You can put \n's in the value part of an alias to insert newlines. + -+ atomic_set(&kctx->drain_pending, 0); ++ALIASES += + -+ mutex_init(&kctx->reg_lock); ++# The ENABLED_SECTIONS tag can be used to enable conditional ++# documentation sections, marked by \if sectionname ... \endif. + -+ INIT_LIST_HEAD(&kctx->waiting_soft_jobs); -+ spin_lock_init(&kctx->waiting_soft_jobs_lock); -+#ifdef CONFIG_KDS -+ INIT_LIST_HEAD(&kctx->waiting_kds_resource); -+#endif -+ err = kbase_dma_fence_init(kctx); -+ if (err) -+ goto free_event; ++ENABLED_SECTIONS += + -+ err = kbase_mmu_init(kctx); -+ if (err) -+ goto term_dma_fence; ++# If the value of the INPUT tag contains directories, you can use the ++# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp ++# and *.h) to filter out the source-files in the directories. If left ++# blank the following patterns are tested: ++# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx ++# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 + -+ do { -+ err = kbase_mem_pool_grow(&kctx->mem_pool, -+ MIDGARD_MMU_BOTTOMLEVEL); -+ if (err) -+ goto pgd_no_mem; ++FILE_PATTERNS += + -+ mutex_lock(&kctx->mmu_lock); -+ kctx->pgd = kbase_mmu_alloc_pgd(kctx); -+ mutex_unlock(&kctx->mmu_lock); -+ } while (!kctx->pgd); ++# The EXCLUDE tag can be used to specify files and/or directories that should ++# excluded from the INPUT source files. This way you can easily exclude a ++# subdirectory from a directory tree whose root is specified with the INPUT tag. ++EXCLUDE += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/pm_test_script.sh ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile + -+ kctx->aliasing_sink_page = kbase_mem_alloc_page(kctx->kbdev); -+ if (!kctx->aliasing_sink_page) -+ goto no_sink_page; + -+ init_waitqueue_head(&kctx->event_queue); ++# If the value of the INPUT tag contains directories, you can use the ++# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude ++# certain files from those directories. Note that the wildcards are matched ++# against the file with absolute path, so to exclude all test directories ++# for example use the pattern */test/* + -+ kctx->cookies = KBASE_COOKIE_MASK; ++EXCLUDE_PATTERNS += + -+ /* Make sure page 0 is not used... */ -+ err = kbase_region_tracker_init(kctx); -+ if (err) -+ goto no_region_tracker; ++# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names ++# (namespaces, classes, functions, etc.) that should be excluded from the ++# output. The symbol name can be a fully qualified name, a word, or if the ++# wildcard * is used, a substring. Examples: ANamespace, AClass, ++# AClass::ANamespace, ANamespace::*Test + -+ err = kbase_sticky_resource_init(kctx); -+ if (err) -+ goto no_sticky; ++EXCLUDE_SYMBOLS += + -+ err = kbase_jit_init(kctx); -+ if (err) -+ goto no_jit; -+#ifdef CONFIG_GPU_TRACEPOINTS -+ atomic_set(&kctx->jctx.work_id, 0); -+#endif -+#ifdef CONFIG_MALI_TRACE_TIMELINE -+ atomic_set(&kctx->timeline.jd_atoms_in_flight, 0); -+#endif ++# The EXAMPLE_PATH tag can be used to specify one or more files or ++# directories that contain example code fragments that are included (see ++# the \include command). + -+ kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1; ++EXAMPLE_PATH += + -+ mutex_init(&kctx->vinstr_cli_lock); ++# The IMAGE_PATH tag can be used to specify one or more files or ++# directories that contain image that are included in the documentation (see ++# the \image command). + -+ timer_setup(&kctx->soft_job_timeout, -+ kbasep_soft_job_timeout_worker, -+ 0); ++IMAGE_PATH += + -+ return kctx; ++# The INCLUDE_PATH tag can be used to specify one or more directories that ++# contain include files that are not input files but should be processed by ++# the preprocessor. + -+no_jit: -+ kbase_gpu_vm_lock(kctx); -+ kbase_sticky_resource_term(kctx); -+ kbase_gpu_vm_unlock(kctx); -+no_sticky: -+ kbase_region_tracker_term(kctx); -+no_region_tracker: -+ kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false); -+no_sink_page: -+ /* VM lock needed for the call to kbase_mmu_free_pgd */ -+ kbase_gpu_vm_lock(kctx); -+ kbase_mmu_free_pgd(kctx); -+ kbase_gpu_vm_unlock(kctx); -+pgd_no_mem: -+ kbase_mmu_term(kctx); -+term_dma_fence: -+ kbase_dma_fence_term(kctx); -+free_event: -+ kbase_event_cleanup(kctx); -+free_jd: -+ /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ -+ kbasep_js_kctx_term(kctx); -+ kbase_jd_exit(kctx); -+deinit_evictable: -+ kbase_mem_evictable_deinit(kctx); -+free_pool: -+ kbase_mem_pool_term(&kctx->mem_pool); -+free_kctx: -+ vfree(kctx); -+out: -+ return NULL; -+} -+KBASE_EXPORT_SYMBOL(kbase_create_context); ++INCLUDE_PATH += + -+static void kbase_reg_pending_dtor(struct kbase_va_region *reg) -+{ -+ dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n"); -+ kbase_mem_phy_alloc_put(reg->cpu_alloc); -+ kbase_mem_phy_alloc_put(reg->gpu_alloc); -+ kfree(reg); -+} ++# The PREDEFINED tag can be used to specify one or more macro names that ++# are defined before the preprocessor is started (similar to the -D option of ++# gcc). The argument of the tag is a list of macros of the form: name ++# or name=definition (no spaces). If the definition and the = are ++# omitted =1 is assumed. To prevent a macro definition from being ++# undefined via #undef or recursively expanded use the := operator ++# instead of the = operator. + -+/** -+ * kbase_destroy_context - Destroy a kernel base context. -+ * @kctx: Context to destroy ++PREDEFINED += ++ ++# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then ++# this tag can be used to specify a list of macro names that should be expanded. ++# The macro definition that is found in the sources will be used. ++# Use the PREDEFINED tag if you want to use a different macro definition. ++ ++EXPAND_AS_DEFINED += ++ ++# The DOTFILE_DIRS tag can be used to specify one or more directories that ++# contain dot files that are included in the documentation (see the ++# \dotfile command). ++ ++DOTFILE_DIRS += ../../kernel/drivers/gpu/arm/midgard/docs ++ +diff --git a/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot b/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot +new file mode 100755 +index 000000000..7ae05c2f8 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot +@@ -0,0 +1,112 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * Calls kbase_destroy_os_context() to free OS specific structures. -+ * Will release all outstanding regions. + */ -+void kbase_destroy_context(struct kbase_context *kctx) -+{ -+ struct kbase_device *kbdev; -+ int pages; -+ unsigned long pending_regions_to_clean; -+ unsigned long flags; + -+ KBASE_DEBUG_ASSERT(NULL != kctx); + -+ kbdev = kctx->kbdev; -+ KBASE_DEBUG_ASSERT(NULL != kbdev); + -+ KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u); + -+ /* Ensure the core is powered up for the destroy process */ -+ /* A suspend won't happen here, because we're in a syscall from a userspace -+ * thread. */ -+ kbase_pm_context_active(kbdev); + -+ kbase_jd_zap_context(kctx); ++digraph policy_objects_diagram { ++ rankdir=LR; ++ size="12,8"; ++ compound=true; + -+#ifdef CONFIG_DEBUG_FS -+ /* Removing the rest of the debugfs entries here as we want to keep the -+ * atom debugfs interface alive until all atoms have completed. This -+ * is useful for debugging hung contexts. */ -+ debugfs_remove_recursive(kctx->kctx_dentry); -+#endif ++ node [ shape = box ]; + -+ kbase_event_cleanup(kctx); ++ subgraph cluster_policy_queues { ++ low_queue [ shape=record label = "LowP | {ctx_lo | ... | ctx_i | ... | ctx_hi}" ]; ++ queues_middle_sep [ label="" shape=plaintext width=0 height=0 ]; + -+ /* -+ * JIT must be terminated before the code below as it must be called -+ * without the region lock being held. -+ * The code above ensures no new JIT allocations can be made by -+ * by the time we get to this point of context tear down. -+ */ -+ kbase_jit_term(kctx); ++ rt_queue [ shape=record label = "RT | {ctx_lo | ... | ctx_j | ... | ctx_hi}" ]; + -+ kbase_gpu_vm_lock(kctx); ++ label = "Policy's Queue(s)"; ++ } + -+ kbase_sticky_resource_term(kctx); ++ call_enqueue [ shape=plaintext label="enqueue_ctx()" ]; + -+ /* MMU is disabled as part of scheduling out the context */ -+ kbase_mmu_free_pgd(kctx); ++ { ++ rank=same; ++ ordering=out; ++ call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ]; ++ call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ]; + -+ /* drop the aliasing sink page now that it can't be mapped anymore */ -+ kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false); ++ call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ]; ++ } + -+ /* free pending region setups */ -+ pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK; -+ while (pending_regions_to_clean) { -+ unsigned int cookie = __ffs(pending_regions_to_clean); ++ subgraph cluster_runpool { + -+ BUG_ON(!kctx->pending_regions[cookie]); ++ as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ]; ++ as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ]; ++ as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ]; ++ as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ]; + -+ kbase_reg_pending_dtor(kctx->pending_regions[cookie]); ++ label = "Policy's Run Pool"; ++ } + -+ kctx->pending_regions[cookie] = NULL; -+ pending_regions_to_clean &= ~(1UL << cookie); ++ { ++ rank=same; ++ call_jdequeue [ shape=plaintext label="dequeue_job()" ]; ++ sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ]; + } + -+ kbase_region_tracker_term(kctx); -+ kbase_gpu_vm_unlock(kctx); ++ { ++ rank=same; ++ ordering=out; ++ sstop [ shape=ellipse label="SS-Timer expires" ] ++ jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ]; + -+ /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ -+ kbasep_js_kctx_term(kctx); ++ irq [ label="IRQ" shape=ellipse ]; + -+ kbase_jd_exit(kctx); ++ job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ]; ++ } + -+ kbase_pm_context_idle(kbdev); ++ hstop [ shape=ellipse label="HS-Timer expires" ] + -+ kbase_dma_fence_term(kctx); ++ /* ++ * Edges ++ */ + -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); -+ kbase_ctx_sched_remove_ctx(kctx); -+ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); ++ call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ]; + -+ kbase_mmu_term(kctx); ++ low_queue:qr -> call_dequeue:w; ++ rt_queue:qr -> call_dequeue:w; + -+ pages = atomic_read(&kctx->used_pages); -+ if (pages != 0) -+ dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); ++ call_dequeue -> as1 [lhead=cluster_runpool]; + -+ kbase_mem_evictable_deinit(kctx); -+ kbase_mem_pool_term(&kctx->mem_pool); -+ WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); ++ as1->call_jdequeue [ltail=cluster_runpool]; ++ call_jdequeue->jobslots:0; ++ call_jdequeue->sstop_dotfixup [ arrowhead=none]; ++ sstop_dotfixup->sstop [label="Spawn SS-Timer"]; ++ sstop->jobslots [label="SoftStop"]; ++ sstop->hstop [label="Spawn HS-Timer"]; ++ hstop->jobslots:ne [label="HardStop"]; + -+ vfree(kctx); -+} -+KBASE_EXPORT_SYMBOL(kbase_destroy_context); + -+/** -+ * kbase_context_set_create_flags - Set creation flags on a context -+ * @kctx: Kbase context -+ * @flags: Flags to set -+ * -+ * Return: 0 on success -+ */ -+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags) -+{ -+ int err = 0; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ unsigned long irq_flags; ++ as3->call_ctxfinish:ne [ ltail=cluster_runpool ]; ++ call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ]; + -+ KBASE_DEBUG_ASSERT(NULL != kctx); ++ call_ctxfinish->call_ctxdone [constraint=false]; + -+ js_kctx_info = &kctx->jctx.sched_info; ++ call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false]; + -+ /* Validate flags */ -+ if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) { -+ err = -EINVAL; -+ goto out; -+ } + -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); ++ { ++ jobslots->irq [constraint=false]; + -+ /* Translate the flags */ -+ if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) -+ kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); ++ irq->job_finish [constraint=false]; ++ } + -+ /* Latch the initial attributes into the Job Scheduler */ -+ kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx); ++ irq->as2 [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ]; + -+ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ out: -+ return err; +} -+KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags); -diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.h b/drivers/gpu/arm/midgard/mali_kbase_context.h -new file mode 100644 -index 000000000..a3f5bb0ce +diff --git a/drivers/gpu/arm/midgard/docs/policy_overview.dot b/drivers/gpu/arm/midgard/docs/policy_overview.dot +new file mode 100755 +index 000000000..159b993b7 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_context.h -@@ -0,0 +1,90 @@ ++++ b/drivers/gpu/arm/midgard/docs/policy_overview.dot +@@ -0,0 +1,63 @@ +/* + * -+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -354187,88 +356466,91 @@ index 000000000..a3f5bb0ce + + + -+#ifndef _KBASE_CONTEXT_H_ -+#define _KBASE_CONTEXT_H_ + -+#include + ++digraph policy_objects_diagram { ++ rankdir=LR ++ size="6,6" ++ compound=true; ++ ++ node [ shape = box ]; ++ ++ call_enqueue [ shape=plaintext label="enqueue ctx" ]; ++ ++ ++ policy_queue [ label="Policy's Queue" ]; ++ ++ { ++ rank=same; ++ runpool [ label="Policy's Run Pool" ]; ++ ++ ctx_finish [ label="ctx finished" ]; ++ } ++ ++ { ++ rank=same; ++ jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ]; ++ ++ job_finish [ label="Job finished" ]; ++ } + -+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags); + -+/** -+ * kbase_ctx_flag - Check if @flag is set on @kctx -+ * @kctx: Pointer to kbase context to check -+ * @flag: Flag to check -+ * -+ * Return: true if @flag is set on @kctx, false if not. -+ */ -+static inline bool kbase_ctx_flag(struct kbase_context *kctx, -+ enum kbase_context_flags flag) -+{ -+ return atomic_read(&kctx->flags) & flag; -+} + -+/** -+ * kbase_ctx_flag_clear - Clear @flag on @kctx -+ * @kctx: Pointer to kbase context -+ * @flag: Flag to clear -+ * -+ * Clear the @flag on @kctx. This is done atomically, so other flags being -+ * cleared or set at the same time will be safe. -+ * -+ * Some flags have locking requirements, check the documentation for the -+ * respective flags. -+ */ -+static inline void kbase_ctx_flag_clear(struct kbase_context *kctx, -+ enum kbase_context_flags flag) -+{ -+#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE + /* -+ * Earlier kernel versions doesn't have atomic_andnot() or -+ * atomic_and(). atomic_clear_mask() was only available on some -+ * architectures and removed on arm in v3.13 on arm and arm64. -+ * -+ * Use a compare-exchange loop to clear the flag on pre 4.3 kernels, -+ * when atomic_andnot() becomes available. ++ * Edges + */ -+ int old, new; + -+ do { -+ old = atomic_read(&kctx->flags); -+ new = old & ~flag; ++ call_enqueue -> policy_queue; + -+ } while (atomic_cmpxchg(&kctx->flags, old, new) != old); -+#else -+ atomic_andnot(flag, &kctx->flags); -+#endif -+} ++ policy_queue->runpool [label="dequeue ctx" weight=0.1]; ++ runpool->policy_queue [label="requeue ctx" weight=0.1]; + -+/** -+ * kbase_ctx_flag_set - Set @flag on @kctx -+ * @kctx: Pointer to kbase context -+ * @flag: Flag to clear -+ * -+ * Set the @flag on @kctx. This is done atomically, so other flags being -+ * cleared or set at the same time will be safe. -+ * -+ * Some flags have locking requirements, check the documentation for the -+ * respective flags. -+ */ -+static inline void kbase_ctx_flag_set(struct kbase_context *kctx, -+ enum kbase_context_flags flag) -+{ -+ atomic_or(flag, &kctx->flags); ++ runpool->ctx_finish [ style=dotted ]; ++ ++ runpool->jobslots [label="dequeue job" weight=0.1]; ++ jobslots->runpool [label="requeue job" weight=0.1]; ++ ++ jobslots->job_finish [ style=dotted ]; +} -+#endif /* _KBASE_CONTEXT_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c +diff --git a/drivers/gpu/arm/midgard/ipa/Kbuild b/drivers/gpu/arm/midgard/ipa/Kbuild +new file mode 100755 +index 000000000..602b15f52 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/ipa/Kbuild +@@ -0,0 +1,24 @@ ++# ++# (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# ++ ++ ++midgard_kbase-y += \ ++ ipa/mali_kbase_ipa_simple.o \ ++ ipa/mali_kbase_ipa.o ++ ++midgard_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o ++ ++ifneq ($(wildcard $(src)/ipa/mali_kbase_ipa_tmix.c),) ++ midgard_kbase-y += ipa/mali_kbase_ipa_tmix.o ++endif +diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c new file mode 100644 -index 000000000..738766f88 +index 000000000..0c9111700 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c -@@ -0,0 +1,5023 @@ ++++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c +@@ -0,0 +1,589 @@ +/* + * -+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -354281,5023 +356563,5645 @@ index 000000000..738766f88 + * + */ + -+#define ENABLE_DEBUG_LOG -+#include "platform/rk/custom_log.h" + -+#include -+#include -+#include -+#include -+#include -+#include -+#ifdef CONFIG_MALI_DEVFREQ -+#include -+#include -+#ifdef CONFIG_DEVFREQ_THERMAL -+#include -+#endif /* CONFIG_DEVFREQ_THERMAL */ -+#endif /* CONFIG_MALI_DEVFREQ */ -+#ifdef CONFIG_MALI_NO_MALI -+#include "mali_kbase_model_linux.h" -+#endif /* CONFIG_MALI_NO_MALI */ -+#include "mali_kbase_mem_profile_debugfs_buf_size.h" -+#include "mali_kbase_debug_mem_view.h" -+#include "mali_kbase_mem.h" -+#include "mali_kbase_mem_pool_debugfs.h" -+#if !MALI_CUSTOMER_RELEASE -+#include "mali_kbase_regs_dump_debugfs.h" -+#endif /* !MALI_CUSTOMER_RELEASE */ -+#include "mali_kbase_regs_history_debugfs.h" -+#include -+#include -+#include -+#include -+#include "mali_kbase_ioctl.h" -+ -+#ifdef CONFIG_KDS -+#include -+#include -+#include -+#endif /* CONFIG_KDS */ -+ -+#include -+#include -+#include -+#include -+#include -+#include ++#include ++#include +#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include /* is_compat_task */ -+#include -+#include -+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE -+#include -+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */ -+#include -+#include -+#ifdef CONFIG_MALI_PLATFORM_FAKE -+#include -+#endif /*CONFIG_MALI_PLATFORM_FAKE */ -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+#include -+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ -+#include -+#include -+ -+#include -+ ++#include "mali_kbase.h" ++#include "mali_kbase_ipa.h" ++#include "mali_kbase_ipa_debugfs.h" + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) +#include +#else +#include ++#define dev_pm_opp_find_freq_exact opp_find_freq_exact ++#define dev_pm_opp_get_voltage opp_get_voltage ++#define dev_pm_opp opp +#endif ++#include + -+#include -+ -+#include -+ -+/* GPU IRQ Tags */ -+#define JOB_IRQ_TAG 0 -+#define MMU_IRQ_TAG 1 -+#define GPU_IRQ_TAG 2 ++#define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model" + -+#if MALI_UNIT_TEST -+static struct kbase_exported_test_data shared_kernel_test_data; -+EXPORT_SYMBOL(shared_kernel_test_data); -+#endif /* MALI_UNIT_TEST */ ++static struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = { ++ &kbase_simple_ipa_model_ops, ++}; + -+/** rk_ext : version of rk_ext on mali_ko, aka. rk_ko_ver. */ -+#define ROCKCHIP_VERSION (13) ++int kbase_ipa_model_recalculate(struct kbase_ipa_model *model) ++{ ++ int err = 0; + -+static int kbase_dev_nr; ++ lockdep_assert_held(&model->kbdev->ipa.lock); + -+static DEFINE_MUTEX(kbase_dev_list_lock); -+static LIST_HEAD(kbase_dev_list); ++ if (model->ops->recalculate) { ++ err = model->ops->recalculate(model); ++ if (err) { ++ dev_err(model->kbdev->dev, ++ "recalculation of power model %s returned error %d\n", ++ model->ops->name, err); ++ } ++ } + -+#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)" -+static inline void __compile_time_asserts(void) -+{ -+ CSTD_COMPILE_TIME_ASSERT(sizeof(KERNEL_SIDE_DDK_VERSION_STRING) <= KBASE_GET_VERSION_BUFFER_SIZE); ++ return err; +} + -+static int kbase_api_handshake(struct kbase_context *kctx, -+ struct kbase_ioctl_version_check *version) ++static struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, ++ const char *name) +{ -+ switch (version->major) { -+#ifdef BASE_LEGACY_UK6_SUPPORT -+ case 6: -+ /* We are backwards compatible with version 6, -+ * so pretend to be the old version */ -+ version->major = 6; -+ version->minor = 1; -+ break; -+#endif /* BASE_LEGACY_UK6_SUPPORT */ -+#ifdef BASE_LEGACY_UK7_SUPPORT -+ case 7: -+ /* We are backwards compatible with version 7, -+ * so pretend to be the old version */ -+ version->major = 7; -+ version->minor = 1; -+ break; -+#endif /* BASE_LEGACY_UK7_SUPPORT */ -+#ifdef BASE_LEGACY_UK8_SUPPORT -+ case 8: -+ /* We are backwards compatible with version 8, -+ * so pretend to be the old version */ -+ version->major = 8; -+ version->minor = 4; -+ break; -+#endif /* BASE_LEGACY_UK8_SUPPORT */ -+#ifdef BASE_LEGACY_UK9_SUPPORT -+ case 9: -+ /* We are backwards compatible with version 9, -+ * so pretend to be the old version */ -+ version->major = 9; -+ version->minor = 0; -+ break; -+#endif /* BASE_LEGACY_UK8_SUPPORT */ -+ case BASE_UK_VERSION_MAJOR: -+ /* set minor to be the lowest common */ -+ version->minor = min_t(int, BASE_UK_VERSION_MINOR, -+ (int)version->minor); -+ break; -+ default: -+ /* We return our actual version regardless if it -+ * matches the version returned by userspace - -+ * userspace can bail if it can't handle this -+ * version */ -+ version->major = BASE_UK_VERSION_MAJOR; -+ version->minor = BASE_UK_VERSION_MINOR; -+ break; ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) { ++ struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i]; ++ ++ if (!strcmp(ops->name, name)) ++ return ops; + } + -+ /* save the proposed version number for later use */ -+ kctx->api_version = KBASE_API_VERSION(version->major, version->minor); ++ dev_err(kbdev->dev, "power model \'%s\' not found\n", name); + -+ return 0; ++ return NULL; +} + -+/** -+ * enum mali_error - Mali error codes shared with userspace -+ * -+ * This is subset of those common Mali errors that can be returned to userspace. -+ * Values of matching user and kernel space enumerators MUST be the same. -+ * MALI_ERROR_NONE is guaranteed to be 0. -+ * -+ * @MALI_ERROR_NONE: Success -+ * @MALI_ERROR_OUT_OF_GPU_MEMORY: Not used in the kernel driver -+ * @MALI_ERROR_OUT_OF_MEMORY: Memory allocation failure -+ * @MALI_ERROR_FUNCTION_FAILED: Generic error code -+ */ -+enum mali_error { -+ MALI_ERROR_NONE = 0, -+ MALI_ERROR_OUT_OF_GPU_MEMORY, -+ MALI_ERROR_OUT_OF_MEMORY, -+ MALI_ERROR_FUNCTION_FAILED, -+}; ++void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev) ++{ ++ atomic_set(&kbdev->ipa_use_configured_model, false); ++} + -+enum { -+ inited_mem = (1u << 0), -+ inited_js = (1u << 1), -+ inited_pm_runtime_init = (1u << 2), -+#ifdef CONFIG_MALI_DEVFREQ -+ inited_devfreq = (1u << 3), -+#endif /* CONFIG_MALI_DEVFREQ */ -+ inited_tlstream = (1u << 4), -+ inited_backend_early = (1u << 5), -+ inited_backend_late = (1u << 6), -+ inited_device = (1u << 7), -+ inited_vinstr = (1u << 8), ++void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev) ++{ ++ atomic_set(&kbdev->ipa_use_configured_model, true); ++} + -+ inited_job_fault = (1u << 10), -+ inited_sysfs_group = (1u << 11), -+ inited_misc_register = (1u << 12), -+ inited_get_device = (1u << 13), -+ inited_dev_list = (1u << 14), -+ inited_debugfs = (1u << 15), -+ inited_gpu_device = (1u << 16), -+ inited_registers_map = (1u << 17), -+ inited_io_history = (1u << 18), -+ inited_power_control = (1u << 19), -+ inited_buslogger = (1u << 20), -+ inited_protected = (1u << 21), -+ inited_ctx_sched = (1u << 22) -+}; ++const char *kbase_ipa_model_name_from_id(u32 gpu_id) ++{ ++ const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> ++ GPU_ID_VERSION_PRODUCT_ID_SHIFT; + ++ if (GPU_ID_IS_NEW_FORMAT(prod_id)) { ++ switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { ++ case GPU_ID2_PRODUCT_TMIX: ++ return KBASE_IPA_FALLBACK_MODEL_NAME; ++ default: ++ return KBASE_IPA_FALLBACK_MODEL_NAME; ++ } ++ } + -+#ifdef CONFIG_MALI_DEBUG -+#define INACTIVE_WAIT_MS (5000) ++ return KBASE_IPA_FALLBACK_MODEL_NAME; ++} + -+void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive) ++static struct device_node *get_model_dt_node(struct kbase_ipa_model *model) +{ -+ kbdev->driver_inactive = inactive; -+ wake_up(&kbdev->driver_inactive_wait); ++ struct device_node *model_dt_node; ++ char compat_string[64]; + -+ /* Wait for any running IOCTLs to complete */ -+ if (inactive) -+ msleep(INACTIVE_WAIT_MS); ++ snprintf(compat_string, sizeof(compat_string), "arm,%s", ++ model->ops->name); ++ ++ model_dt_node = of_find_compatible_node(model->kbdev->dev->of_node, ++ NULL, compat_string); ++ if (!model_dt_node && !model->missing_dt_node_warning) { ++ dev_warn(model->kbdev->dev, ++ "Couldn't find power_model DT node matching \'%s\'\n", ++ compat_string); ++ model->missing_dt_node_warning = true; ++ } ++ ++ return model_dt_node; +} -+KBASE_EXPORT_TEST_API(kbase_set_driver_inactive); -+#endif /* CONFIG_MALI_DEBUG */ + -+/** -+ * kbase_legacy_dispatch - UKK dispatch function -+ * -+ * This is the dispatch function for the legacy UKK ioctl interface. No new -+ * ioctls should be added to this function, see kbase_ioctl instead. -+ * -+ * @kctx: The kernel context structure -+ * @args: Pointer to the data structure passed from/to user space -+ * @args_size: Size of the data structure -+ */ -+static int kbase_legacy_dispatch(struct kbase_context *kctx, -+ void * const args, u32 args_size) ++int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, ++ const char *name, s32 *addr, ++ size_t num_elems, bool dt_required) +{ -+ struct kbase_device *kbdev; -+ union uk_header *ukh = args; -+ u32 id; -+ int ret = 0; -+ -+ KBASE_DEBUG_ASSERT(ukh != NULL); ++ int err, i; ++ struct device_node *model_dt_node = get_model_dt_node(model); ++ char *origin; + -+ kbdev = kctx->kbdev; -+ id = ukh->id; -+ ukh->ret = MALI_ERROR_NONE; /* Be optimistic */ ++ err = of_property_read_u32_array(model_dt_node, name, addr, num_elems); + -+#ifdef CONFIG_MALI_DEBUG -+ wait_event(kbdev->driver_inactive_wait, -+ kbdev->driver_inactive == false); -+#endif /* CONFIG_MALI_DEBUG */ ++ if (err && dt_required) { ++ memset(addr, 0, sizeof(s32) * num_elems); ++ dev_warn(model->kbdev->dev, ++ "Error %d, no DT entry: %s.%s = %zu*[0]\n", ++ err, model->ops->name, name, num_elems); ++ origin = "zero"; ++ } else if (err && !dt_required) { ++ origin = "default"; ++ } else /* !err */ { ++ origin = "DT"; ++ } + -+ if (UKP_FUNC_ID_CHECK_VERSION == id) { -+ struct uku_version_check_args *version_check; -+ struct kbase_ioctl_version_check version; ++ /* Create a unique debugfs entry for each element */ ++ for (i = 0; i < num_elems; ++i) { ++ char elem_name[32]; + -+ if (args_size != sizeof(struct uku_version_check_args)) { -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ return 0; -+ } -+ version_check = (struct uku_version_check_args *)args; -+ version.minor = version_check->minor; -+ version.major = version_check->major; ++ if (num_elems == 1) ++ snprintf(elem_name, sizeof(elem_name), "%s", name); ++ else ++ snprintf(elem_name, sizeof(elem_name), "%s.%d", ++ name, i); + -+ kbase_api_handshake(kctx, &version); ++ dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n", ++ model->ops->name, elem_name, addr[i], origin); + -+ version_check->minor = version.minor; -+ version_check->major = version.major; -+ ukh->ret = MALI_ERROR_NONE; -+ return 0; ++ err = kbase_ipa_model_param_add(model, elem_name, ++ &addr[i], sizeof(s32), ++ PARAM_TYPE_S32); ++ if (err) ++ goto exit; + } ++exit: ++ return err; ++} + -+ /* block calls until version handshake */ -+ if (kctx->api_version == 0) -+ return -EINVAL; -+ -+ if (!atomic_read(&kctx->setup_complete)) { -+ struct kbase_uk_set_flags *kbase_set_flags; ++int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, ++ const char *name, char *addr, ++ size_t size, bool dt_required) ++{ ++ int err; ++ struct device_node *model_dt_node = get_model_dt_node(model); ++ const char *string_prop_value; ++ char *origin; + -+ /* setup pending, try to signal that we'll do the setup, -+ * if setup was already in progress, err this call -+ */ -+ if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1) != 0) -+ return -EINVAL; ++ err = of_property_read_string(model_dt_node, name, ++ &string_prop_value); ++ if (err && dt_required) { ++ strncpy(addr, "", size - 1); ++ dev_warn(model->kbdev->dev, ++ "Error %d, no DT entry: %s.%s = \'%s\'\n", ++ err, model->ops->name, name, addr); ++ err = 0; ++ origin = "zero"; ++ } else if (err && !dt_required) { ++ origin = "default"; ++ } else /* !err */ { ++ strncpy(addr, string_prop_value, size - 1); ++ origin = "DT"; ++ } + -+ /* if unexpected call, will stay stuck in setup mode -+ * (is it the only call we accept?) -+ */ -+ if (id != KBASE_FUNC_SET_FLAGS) -+ return -EINVAL; ++ addr[size - 1] = '\0'; + -+ kbase_set_flags = (struct kbase_uk_set_flags *)args; ++ dev_dbg(model->kbdev->dev, "%s.%s = \'%s\' (%s)\n", ++ model->ops->name, name, string_prop_value, origin); + -+ /* if not matching the expected call, stay in setup mode */ -+ if (sizeof(*kbase_set_flags) != args_size) -+ goto bad_size; ++ err = kbase_ipa_model_param_add(model, name, addr, size, ++ PARAM_TYPE_STRING); + -+ /* if bad flags, will stay stuck in setup mode */ -+ if (kbase_context_set_create_flags(kctx, -+ kbase_set_flags->create_flags) != 0) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ return err; ++} + -+ atomic_set(&kctx->setup_complete, 1); -+ return 0; -+ } ++void kbase_ipa_term_model(struct kbase_ipa_model *model) ++{ ++ if (!model) ++ return; + -+ /* setup complete, perform normal operation */ -+ switch (id) { -+ case KBASE_FUNC_MEM_JIT_INIT: -+ { -+ struct kbase_uk_mem_jit_init *jit_init = args; ++ lockdep_assert_held(&model->kbdev->ipa.lock); + -+ if (sizeof(*jit_init) != args_size) -+ goto bad_size; ++ if (model->ops->term) ++ model->ops->term(model); + -+ if (kbase_region_tracker_init_jit(kctx, -+ jit_init->va_pages)) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ break; -+ } -+ case KBASE_FUNC_MEM_ALLOC: -+ { -+ struct kbase_uk_mem_alloc *mem = args; -+ struct kbase_va_region *reg; ++ kbase_ipa_model_param_free_all(model); + -+ if (sizeof(*mem) != args_size) -+ goto bad_size; ++ kfree(model); ++} ++KBASE_EXPORT_TEST_API(kbase_ipa_term_model); + -+#if defined(CONFIG_64BIT) -+ if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { -+ /* force SAME_VA if a 64-bit client */ -+ mem->flags |= BASE_MEM_SAME_VA; -+ } -+#endif ++struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, ++ struct kbase_ipa_model_ops *ops) ++{ ++ struct kbase_ipa_model *model; ++ int err; + -+ reg = kbase_mem_alloc(kctx, mem->va_pages, -+ mem->commit_pages, mem->extent, -+ &mem->flags, &mem->gpu_va); -+ mem->va_alignment = 0; ++ lockdep_assert_held(&kbdev->ipa.lock); + -+ if (!reg) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ break; -+ } -+ case KBASE_FUNC_MEM_IMPORT: { -+ struct kbase_uk_mem_import *mem_import = args; -+ void __user *phandle; ++ if (!ops || !ops->name) ++ return NULL; + -+ if (sizeof(*mem_import) != args_size) -+ goto bad_size; -+#ifdef CONFIG_COMPAT -+ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) -+ phandle = compat_ptr(mem_import->phandle.compat_value); -+ else -+#endif -+ phandle = mem_import->phandle.value; ++ model = kzalloc(sizeof(struct kbase_ipa_model), GFP_KERNEL); ++ if (!model) ++ return NULL; + -+ if (mem_import->type == BASE_MEM_IMPORT_TYPE_INVALID) { -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ break; -+ } ++ model->kbdev = kbdev; ++ model->ops = ops; ++ INIT_LIST_HEAD(&model->params); + -+ if (kbase_mem_import(kctx, -+ (enum base_mem_import_type) -+ mem_import->type, -+ phandle, -+ 0, -+ &mem_import->gpu_va, -+ &mem_import->va_pages, -+ &mem_import->flags)) { -+ mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID; -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ } -+ break; ++ err = model->ops->init(model); ++ if (err) { ++ dev_err(kbdev->dev, ++ "init of power model \'%s\' returned error %d\n", ++ ops->name, err); ++ goto term_model; + } -+ case KBASE_FUNC_MEM_ALIAS: { -+ struct kbase_uk_mem_alias *alias = args; -+ struct base_mem_aliasing_info __user *user_ai; -+ struct base_mem_aliasing_info *ai; + -+ if (sizeof(*alias) != args_size) -+ goto bad_size; ++ err = kbase_ipa_model_recalculate(model); ++ if (err) ++ goto term_model; + -+ if (alias->nents > 2048) { -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ break; -+ } -+ if (!alias->nents) { -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ break; -+ } ++ return model; + -+#ifdef CONFIG_COMPAT -+ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) -+ user_ai = compat_ptr(alias->ai.compat_value); -+ else -+#endif -+ user_ai = alias->ai.value; ++term_model: ++ kbase_ipa_term_model(model); ++ return NULL; ++} ++KBASE_EXPORT_TEST_API(kbase_ipa_init_model); + -+ ai = vmalloc(sizeof(*ai) * alias->nents); ++static void kbase_ipa_term_locked(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->ipa.lock); + -+ if (!ai) { -+ ukh->ret = MALI_ERROR_OUT_OF_MEMORY; -+ break; -+ } ++ /* Clean up the models */ ++ if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model) ++ kbase_ipa_term_model(kbdev->ipa.configured_model); ++ kbase_ipa_term_model(kbdev->ipa.fallback_model); + -+ if (copy_from_user(ai, user_ai, -+ sizeof(*ai) * alias->nents)) { -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ goto copy_failed; -+ } ++ kbdev->ipa.configured_model = NULL; ++ kbdev->ipa.fallback_model = NULL; ++} + -+ alias->gpu_va = kbase_mem_alias(kctx, &alias->flags, -+ alias->stride, -+ alias->nents, ai, -+ &alias->va_pages); -+ if (!alias->gpu_va) { -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ goto no_alias; -+ } -+no_alias: -+copy_failed: -+ vfree(ai); -+ break; -+ } -+ case KBASE_FUNC_MEM_COMMIT: -+ { -+ struct kbase_uk_mem_commit *commit = args; -+ int ret; ++int kbase_ipa_init(struct kbase_device *kbdev) ++{ + -+ if (sizeof(*commit) != args_size) -+ goto bad_size; ++ const char *model_name; ++ struct kbase_ipa_model_ops *ops; ++ struct kbase_ipa_model *default_model = NULL; ++ int err; + -+ ret = kbase_mem_commit(kctx, commit->gpu_addr, -+ commit->pages); ++ mutex_init(&kbdev->ipa.lock); ++ /* ++ * Lock during init to avoid warnings from lockdep_assert_held (there ++ * shouldn't be any concurrent access yet). ++ */ ++ mutex_lock(&kbdev->ipa.lock); + -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ commit->result_subcode = -+ BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS; ++ /* The simple IPA model must *always* be present.*/ ++ ops = kbase_ipa_model_ops_find(kbdev, KBASE_IPA_FALLBACK_MODEL_NAME); + -+ if (ret == 0) { -+ ukh->ret = MALI_ERROR_NONE; -+ commit->result_subcode = -+ BASE_BACKING_THRESHOLD_OK; -+ } else if (ret == -ENOMEM) { -+ commit->result_subcode = -+ BASE_BACKING_THRESHOLD_ERROR_OOM; -+ } ++ if (!ops->do_utilization_scaling_in_framework) { ++ dev_err(kbdev->dev, ++ "Fallback IPA model %s should not account for utilization\n", ++ ops->name); ++ err = -EINVAL; ++ goto end; ++ } + -+ break; -+ } ++ default_model = kbase_ipa_init_model(kbdev, ops); ++ if (!default_model) { ++ err = -EINVAL; ++ goto end; ++ } + -+ case KBASE_FUNC_MEM_QUERY: -+ { -+ struct kbase_uk_mem_query *query = args; ++ kbdev->ipa.fallback_model = default_model; ++ err = of_property_read_string(kbdev->dev->of_node, ++ "ipa-model", ++ &model_name); ++ if (err) { ++ /* Attempt to load a match from GPU-ID */ ++ u32 gpu_id; + -+ if (sizeof(*query) != args_size) -+ goto bad_size; ++ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; ++ model_name = kbase_ipa_model_name_from_id(gpu_id); ++ dev_dbg(kbdev->dev, ++ "Inferring model from GPU ID 0x%x: \'%s\'\n", ++ gpu_id, model_name); ++ } else { ++ dev_dbg(kbdev->dev, ++ "Using ipa-model parameter from DT: \'%s\'\n", ++ model_name); ++ } + -+ if (kbase_mem_query(kctx, query->gpu_addr, -+ query->query, &query->value) != 0) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ else -+ ukh->ret = MALI_ERROR_NONE; -+ break; ++ if (strcmp(KBASE_IPA_FALLBACK_MODEL_NAME, model_name) != 0) { ++ ops = kbase_ipa_model_ops_find(kbdev, model_name); ++ kbdev->ipa.configured_model = kbase_ipa_init_model(kbdev, ops); ++ if (!kbdev->ipa.configured_model) { ++ err = -EINVAL; ++ goto end; + } -+ break; -+ -+ case KBASE_FUNC_MEM_FLAGS_CHANGE: -+ { -+ struct kbase_uk_mem_flags_change *fc = args; ++ } else { ++ kbdev->ipa.configured_model = default_model; ++ err = 0; ++ } + -+ if (sizeof(*fc) != args_size) -+ goto bad_size; ++ kbase_ipa_model_use_configured_locked(kbdev); + -+ if (kbase_mem_flags_change(kctx, fc->gpu_va, -+ fc->flags, fc->mask) != 0) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++end: ++ if (err) ++ kbase_ipa_term_locked(kbdev); ++ else ++ dev_info(kbdev->dev, ++ "Using configured power model %s, and fallback %s\n", ++ kbdev->ipa.configured_model->ops->name, ++ kbdev->ipa.fallback_model->ops->name); + -+ break; -+ } -+ case KBASE_FUNC_MEM_FREE: -+ { -+ struct kbase_uk_mem_free *mem = args; ++ mutex_unlock(&kbdev->ipa.lock); ++ return err; ++} ++KBASE_EXPORT_TEST_API(kbase_ipa_init); + -+ if (sizeof(*mem) != args_size) -+ goto bad_size; ++void kbase_ipa_term(struct kbase_device *kbdev) ++{ ++ mutex_lock(&kbdev->ipa.lock); ++ kbase_ipa_term_locked(kbdev); ++ mutex_unlock(&kbdev->ipa.lock); ++} ++KBASE_EXPORT_TEST_API(kbase_ipa_term); + -+ if (kbase_mem_free(kctx, mem->gpu_addr) != 0) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ break; -+ } ++/** ++ * kbase_scale_dynamic_power() - Scale a dynamic power coefficient to an OPP ++ * @c: Dynamic model coefficient, in pW/(Hz V^2). Should be in range ++ * 0 < c < 2^26 to prevent overflow. ++ * @freq: Frequency, in Hz. Range: 2^23 < freq < 2^30 (~8MHz to ~1GHz) ++ * @voltage: Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V) ++ * ++ * Keep a record of the approximate range of each value at every stage of the ++ * calculation, to ensure we don't overflow. This makes heavy use of the ++ * approximations 1000 = 2^10 and 1000000 = 2^20, but does the actual ++ * calculations in decimal for increased accuracy. ++ * ++ * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W) ++ */ ++static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq, ++ const u32 voltage) ++{ ++ /* Range: 2^8 < v2 < 2^16 m(V^2) */ ++ const u32 v2 = (voltage * voltage) / 1000; + -+ case KBASE_FUNC_JOB_SUBMIT: -+ { -+ struct kbase_uk_job_submit *job = args; -+ void __user *user_addr = NULL; ++ /* Range: 2^3 < f_MHz < 2^10 MHz */ ++ const u32 f_MHz = freq / 1000000; + -+ if (sizeof(*job) != args_size) -+ goto bad_size; ++ /* Range: 2^11 < v2f_big < 2^26 kHz V^2 */ ++ const u32 v2f_big = v2 * f_MHz; + -+#ifdef CONFIG_COMPAT -+ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) -+ user_addr = compat_ptr(job->addr.compat_value); -+ else -+#endif -+ user_addr = job->addr.value; ++ /* Range: 2^1 < v2f < 2^16 MHz V^2 */ ++ const u32 v2f = v2f_big / 1000; + -+ if (kbase_jd_submit(kctx, user_addr, job->nr_atoms, -+ job->stride, false) != 0) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ break; -+ } ++ /* Range (working backwards from next line): 0 < v2fc < 2^23 uW. ++ * Must be < 2^42 to avoid overflowing the return value. */ ++ const u64 v2fc = (u64) c * (u64) v2f; ++ u32 remainder; + -+#ifdef BASE_LEGACY_UK6_SUPPORT -+ case KBASE_FUNC_JOB_SUBMIT_UK6: -+ { -+ struct kbase_uk_job_submit *job = args; -+ void __user *user_addr = NULL; ++ /* Range: 0 < v2fc / 1000 < 2^13 mW */ ++ // static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) ++ return div_u64_rem(v2fc, 1000, &remainder); ++} + -+ if (sizeof(*job) != args_size) -+ goto bad_size; ++/** ++ * kbase_scale_static_power() - Scale a static power coefficient to an OPP ++ * @c: Static model coefficient, in uW/V^3. Should be in range ++ * 0 < c < 2^32 to prevent overflow. ++ * @voltage: Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V) ++ * ++ * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W) ++ */ ++u32 kbase_scale_static_power(const u32 c, const u32 voltage) ++{ ++ /* Range: 2^8 < v2 < 2^16 m(V^2) */ ++ const u32 v2 = (voltage * voltage) / 1000; + -+#ifdef CONFIG_COMPAT -+ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) -+ user_addr = compat_ptr(job->addr.compat_value); -+ else -+#endif -+ user_addr = job->addr.value; ++ /* Range: 2^17 < v3_big < 2^29 m(V^2) mV */ ++ const u32 v3_big = v2 * voltage; + -+ if (kbase_jd_submit(kctx, user_addr, job->nr_atoms, -+ job->stride, true) != 0) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ break; -+ } -+#endif ++ /* Range: 2^7 < v3 < 2^19 m(V^3) */ ++ const u32 v3 = v3_big / 1000; + -+ case KBASE_FUNC_SYNC: -+ { -+ struct kbase_uk_sync_now *sn = args; ++ /* ++ * Range (working backwards from next line): 0 < v3c_big < 2^33 nW. ++ * The result should be < 2^52 to avoid overflowing the return value. ++ */ ++ const u64 v3c_big = (u64) c * (u64) v3; ++ u32 remainder; + -+ if (sizeof(*sn) != args_size) -+ goto bad_size; ++ /* Range: 0 < v3c_big / 1000000 < 2^13 mW */ ++ // return v3c_big / 1000000; ++ return div_u64_rem(v3c_big, 1000000, &remainder); ++} + -+#ifndef CONFIG_MALI_COH_USER -+ if (kbase_sync_now(kctx, &sn->sset.basep_sset) != 0) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+#endif -+ break; -+ } ++static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->ipa.lock); + -+ case KBASE_FUNC_DISJOINT_QUERY: -+ { -+ struct kbase_uk_disjoint_query *dquery = args; ++ if (atomic_read(&kbdev->ipa_use_configured_model)) ++ return kbdev->ipa.configured_model; ++ else ++ return kbdev->ipa.fallback_model; ++} + -+ if (sizeof(*dquery) != args_size) -+ goto bad_size; ++static u32 get_static_power_locked(struct kbase_device *kbdev, ++ struct kbase_ipa_model *model, ++ unsigned long voltage) ++{ ++ u32 power = 0; ++ int err; ++ u32 power_coeff; + -+ /* Get the disjointness counter value. */ -+ dquery->counter = kbase_disjoint_event_get(kctx->kbdev); -+ break; -+ } ++ lockdep_assert_held(&model->kbdev->ipa.lock); + -+ case KBASE_FUNC_POST_TERM: -+ { -+ kbase_event_close(kctx); -+ break; -+ } ++ if (!model->ops->get_static_coeff) ++ model = kbdev->ipa.fallback_model; + -+ case KBASE_FUNC_HWCNT_SETUP: -+ { -+ struct kbase_uk_hwcnt_setup *setup = args; ++ if (model->ops->get_static_coeff) { ++ err = model->ops->get_static_coeff(model, &power_coeff); ++ if (!err) ++ power = kbase_scale_static_power(power_coeff, ++ (u32) voltage); ++ } + -+ if (sizeof(*setup) != args_size) -+ goto bad_size; ++ return power; ++} + -+ mutex_lock(&kctx->vinstr_cli_lock); -+ if (kbase_vinstr_legacy_hwc_setup(kbdev->vinstr_ctx, -+ &kctx->vinstr_cli, setup) != 0) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ mutex_unlock(&kctx->vinstr_cli_lock); -+ break; -+ } ++__maybe_unused ++#ifdef CONFIG_MALI_PWRSOFT_765 ++static unsigned long kbase_get_static_power(struct devfreq *df, ++ unsigned long voltage) ++#else ++static unsigned long kbase_get_static_power(unsigned long voltage) ++#endif ++{ ++ struct kbase_ipa_model *model; ++ u32 power = 0; ++#ifdef CONFIG_MALI_PWRSOFT_765 ++ struct kbase_device *kbdev = dev_get_drvdata(&df->dev); ++#else ++ struct kbase_device *kbdev = kbase_find_device(-1); ++#endif + -+ case KBASE_FUNC_HWCNT_DUMP: -+ { -+ /* args ignored */ -+ mutex_lock(&kctx->vinstr_cli_lock); -+ if (kbase_vinstr_hwc_dump(kctx->vinstr_cli, -+ BASE_HWCNT_READER_EVENT_MANUAL) != 0) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ mutex_unlock(&kctx->vinstr_cli_lock); -+ break; -+ } ++ mutex_lock(&kbdev->ipa.lock); + -+ case KBASE_FUNC_HWCNT_CLEAR: -+ { -+ /* args ignored */ -+ mutex_lock(&kctx->vinstr_cli_lock); -+ if (kbase_vinstr_hwc_clear(kctx->vinstr_cli) != 0) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ mutex_unlock(&kctx->vinstr_cli_lock); -+ break; -+ } ++ model = get_current_model(kbdev); ++ power = get_static_power_locked(kbdev, model, voltage); + -+ case KBASE_FUNC_HWCNT_READER_SETUP: -+ { -+ struct kbase_uk_hwcnt_reader_setup *setup = args; ++ mutex_unlock(&kbdev->ipa.lock); + -+ if (sizeof(*setup) != args_size) -+ goto bad_size; ++#ifndef CONFIG_MALI_PWRSOFT_765 ++ kbase_release_device(kbdev); ++#endif + -+ mutex_lock(&kctx->vinstr_cli_lock); -+ if (kbase_vinstr_hwcnt_reader_setup(kbdev->vinstr_ctx, -+ setup) != 0) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ mutex_unlock(&kctx->vinstr_cli_lock); -+ break; -+ } ++ return power; ++} + -+ case KBASE_FUNC_GPU_PROPS_REG_DUMP: -+ { -+ struct kbase_uk_gpuprops *setup = args; ++__maybe_unused ++#ifdef CONFIG_MALI_PWRSOFT_765 ++static unsigned long kbase_get_dynamic_power(struct devfreq *df, ++ unsigned long freq, ++ unsigned long voltage) ++#else ++static unsigned long kbase_get_dynamic_power(unsigned long freq, ++ unsigned long voltage) ++#endif ++{ ++ struct kbase_ipa_model *model; ++ u32 power_coeff = 0, power = 0; ++ int err = 0; ++#ifdef CONFIG_MALI_PWRSOFT_765 ++ struct kbase_device *kbdev = dev_get_drvdata(&df->dev); ++#else ++ struct kbase_device *kbdev = kbase_find_device(-1); ++#endif + -+ if (sizeof(*setup) != args_size) -+ goto bad_size; ++ mutex_lock(&kbdev->ipa.lock); + -+ if (kbase_gpuprops_uk_get_props(kctx, setup) != 0) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ break; -+ } -+ case KBASE_FUNC_FIND_CPU_OFFSET: -+ { -+ struct kbase_uk_find_cpu_offset *find = args; ++ model = kbdev->ipa.fallback_model; + -+ if (sizeof(*find) != args_size) -+ goto bad_size; ++ err = model->ops->get_dynamic_coeff(model, &power_coeff, freq); + -+ if (find->gpu_addr & ~PAGE_MASK) { -+ dev_warn(kbdev->dev, -+ "kbase_legacy_dispatch case KBASE_FUNC_FIND_CPU_OFFSET: find->gpu_addr: passed parameter is invalid"); -+ goto out_bad; -+ } ++ if (!err) ++ power = kbase_scale_dynamic_power(power_coeff, freq, voltage); ++ else ++ dev_err_ratelimited(kbdev->dev, ++ "Model %s returned error code %d\n", ++ model->ops->name, err); + -+ if (find->size > SIZE_MAX || find->cpu_addr > ULONG_MAX) { -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ } else { -+ int err; ++ mutex_unlock(&kbdev->ipa.lock); + -+ err = kbasep_find_enclosing_cpu_mapping_offset( -+ kctx, -+ find->cpu_addr, -+ find->size, -+ &find->offset); ++#ifndef CONFIG_MALI_PWRSOFT_765 ++ kbase_release_device(kbdev); ++#endif + -+ if (err) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ } -+ break; -+ } -+ case KBASE_FUNC_GET_VERSION: -+ { -+ struct kbase_uk_get_ddk_version *get_version = (struct kbase_uk_get_ddk_version *)args; ++ return power; ++} + -+ if (sizeof(*get_version) != args_size) -+ goto bad_size; ++int kbase_get_real_power(struct devfreq *df, u32 *power, ++ unsigned long freq, ++ unsigned long voltage) ++{ ++ struct kbase_ipa_model *model; ++ u32 power_coeff = 0; ++ int err = 0; ++ struct kbase_device *kbdev = dev_get_drvdata(&df->dev); + -+ /* version buffer size check is made in compile time assert */ -+ memcpy(get_version->version_buffer, -+ KERNEL_SIDE_DDK_VERSION_STRING, -+ sizeof(KERNEL_SIDE_DDK_VERSION_STRING)); -+ get_version->version_string_size = -+ sizeof(KERNEL_SIDE_DDK_VERSION_STRING); -+ get_version->rk_version = ROCKCHIP_VERSION; -+ break; -+ } ++ mutex_lock(&kbdev->ipa.lock); + -+ case KBASE_FUNC_STREAM_CREATE: -+ { -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ struct kbase_uk_stream_create *screate = (struct kbase_uk_stream_create *)args; ++ model = get_current_model(kbdev); + -+ if (sizeof(*screate) != args_size) -+ goto bad_size; ++ err = model->ops->get_dynamic_coeff(model, &power_coeff, freq); + -+ if (strnlen(screate->name, sizeof(screate->name)) >= sizeof(screate->name)) { -+ /* not NULL terminated */ -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ break; -+ } ++ /* If we switch to protected model between get_current_model() and ++ * get_dynamic_coeff(), counter reading could fail. If that happens ++ * (unlikely, but possible), revert to the fallback model. */ ++ if (err && model != kbdev->ipa.fallback_model) { ++ model = kbdev->ipa.fallback_model; ++ err = model->ops->get_dynamic_coeff(model, &power_coeff, freq); ++ } + -+ if (kbase_sync_fence_stream_create(screate->name, -+ &screate->fd) != 0) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ else -+ ukh->ret = MALI_ERROR_NONE; -+#else /* CONFIG_SYNC || CONFIG_SYNC_FILE */ -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ -+ break; -+ } -+ case KBASE_FUNC_FENCE_VALIDATE: -+ { -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ struct kbase_uk_fence_validate *fence_validate = (struct kbase_uk_fence_validate *)args; ++ if (err) ++ goto exit_unlock; + -+ if (sizeof(*fence_validate) != args_size) -+ goto bad_size; ++ *power = kbase_scale_dynamic_power(power_coeff, freq, voltage); + -+ if (kbase_sync_fence_validate(fence_validate->fd) != 0) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ else -+ ukh->ret = MALI_ERROR_NONE; -+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ -+ break; -+ } ++ if (model->ops->do_utilization_scaling_in_framework) { ++ struct devfreq_dev_status *status = &df->last_status; ++ unsigned long total_time = max(status->total_time, 1ul); ++ u64 busy_time = min(status->busy_time, total_time); ++ u32 remainder; + -+ case KBASE_FUNC_SET_TEST_DATA: -+ { -+#if MALI_UNIT_TEST -+ struct kbase_uk_set_test_data *set_data = args; ++ // *power = ((u64) *power * (u64) busy_time) / total_time; ++ *power = div_u64_rem(((u64) *power * (u64) busy_time), total_time, &remainder); ++ } + -+ shared_kernel_test_data = set_data->test_data; -+ shared_kernel_test_data.kctx.value = (void __user *)kctx; -+ shared_kernel_test_data.mm.value = (void __user *)current->mm; -+ ukh->ret = MALI_ERROR_NONE; -+#endif /* MALI_UNIT_TEST */ -+ break; -+ } ++ *power += get_static_power_locked(kbdev, model, voltage); + -+ case KBASE_FUNC_INJECT_ERROR: -+ { -+#ifdef CONFIG_MALI_ERROR_INJECT -+ unsigned long flags; -+ struct kbase_error_params params = ((struct kbase_uk_error_params *)args)->params; ++exit_unlock: ++ mutex_unlock(&kbdev->ipa.lock); + -+ /*mutex lock */ -+ spin_lock_irqsave(&kbdev->reg_op_lock, flags); -+ if (job_atom_inject_error(¶ms) != 0) -+ ukh->ret = MALI_ERROR_OUT_OF_MEMORY; -+ else -+ ukh->ret = MALI_ERROR_NONE; -+ spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); -+ /*mutex unlock */ -+#endif /* CONFIG_MALI_ERROR_INJECT */ -+ break; -+ } ++ return err; ++} ++KBASE_EXPORT_TEST_API(kbase_get_real_power); + -+ case KBASE_FUNC_MODEL_CONTROL: -+ { -+#ifdef CONFIG_MALI_NO_MALI -+ unsigned long flags; -+ struct kbase_model_control_params params = -+ ((struct kbase_uk_model_control_params *)args)->params; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) ++struct devfreq_cooling_ops kbase_ipa_power_model_ops = { ++#else ++struct devfreq_cooling_power kbase_ipa_power_model_ops = { ++#endif ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++ .get_static_power = &kbase_get_static_power, ++ .get_dynamic_power = &kbase_get_dynamic_power, ++#endif ++}; ++KBASE_EXPORT_TEST_API(kbase_ipa_power_model_ops); +diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h +new file mode 100644 +index 000000000..b2d3db149 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h +@@ -0,0 +1,148 @@ ++/* ++ * ++ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ /*mutex lock */ -+ spin_lock_irqsave(&kbdev->reg_op_lock, flags); -+ if (gpu_model_control(kbdev->model, ¶ms) != 0) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ else -+ ukh->ret = MALI_ERROR_NONE; -+ spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); -+ /*mutex unlock */ -+#endif /* CONFIG_MALI_NO_MALI */ -+ break; -+ } + -+#ifdef BASE_LEGACY_UK8_SUPPORT -+ case KBASE_FUNC_KEEP_GPU_POWERED: -+ { -+ dev_warn(kbdev->dev, "kbase_legacy_dispatch case KBASE_FUNC_KEEP_GPU_POWERED: function is deprecated and disabled\n"); -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ break; -+ } -+#endif /* BASE_LEGACY_UK8_SUPPORT */ + -+ case KBASE_FUNC_GET_PROFILING_CONTROLS: -+ { -+ struct kbase_uk_profiling_controls *controls = -+ (struct kbase_uk_profiling_controls *)args; -+ u32 i; ++#ifndef _KBASE_IPA_H_ ++#define _KBASE_IPA_H_ + -+ if (sizeof(*controls) != args_size) -+ goto bad_size; ++#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) + -+ for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++) -+ controls->profiling_controls[i] = -+ kbdev->kbase_profiling_controls[i]; ++struct devfreq; + -+ break; -+ } ++struct kbase_ipa_model { ++ struct list_head link; ++ struct kbase_device *kbdev; ++ void *model_data; ++ struct kbase_ipa_model_ops *ops; ++ struct list_head params; ++ bool missing_dt_node_warning; ++}; + -+ /* used only for testing purposes; these controls are to be set by gator through gator API */ -+ case KBASE_FUNC_SET_PROFILING_CONTROLS: -+ { -+ struct kbase_uk_profiling_controls *controls = -+ (struct kbase_uk_profiling_controls *)args; -+ u32 i; ++/** ++ * kbase_ipa_model_add_param_s32 - Add an integer model parameter ++ * @model: pointer to IPA model ++ * @name: name of corresponding debugfs entry ++ * @addr: address where the value is stored ++ * @num_elems: number of elements (1 if not an array) ++ * @dt_required: if false, a corresponding devicetree entry is not required, ++ * and the current value will be used. If true, a warning is ++ * output and the data is zeroed ++ * ++ * Return: 0 on success, or an error code ++ */ ++int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, ++ const char *name, s32 *addr, ++ size_t num_elems, bool dt_required); + -+ if (sizeof(*controls) != args_size) -+ goto bad_size; ++/** ++ * kbase_ipa_model_add_param_string - Add a string model parameter ++ * @model: pointer to IPA model ++ * @name: name of corresponding debugfs entry ++ * @addr: address where the value is stored ++ * @size: size, in bytes, of the value storage (so the maximum string ++ * length is size - 1) ++ * @dt_required: if false, a corresponding devicetree entry is not required, ++ * and the current value will be used. If true, a warning is ++ * output and the data is zeroed ++ * ++ * Return: 0 on success, or an error code ++ */ ++int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, ++ const char *name, char *addr, ++ size_t size, bool dt_required); + -+ for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++) -+ _mali_profiling_control(i, controls->profiling_controls[i]); ++struct kbase_ipa_model_ops { ++ char *name; ++ /* The init, recalculate and term ops on the default model are always ++ * called. However, all the other models are only invoked if the model ++ * is selected in the device tree. Otherwise they are never ++ * initialized. Additional resources can be acquired by models in ++ * init(), however they must be terminated in the term(). ++ */ ++ int (*init)(struct kbase_ipa_model *model); ++ /* Called immediately after init(), or when a parameter is changed, so ++ * that any coefficients derived from model parameters can be ++ * recalculated. */ ++ int (*recalculate)(struct kbase_ipa_model *model); ++ void (*term)(struct kbase_ipa_model *model); ++ /* ++ * get_dynamic_coeff() - calculate dynamic power coefficient ++ * @model: pointer to model ++ * @coeffp: pointer to return value location ++ * @current_freq: frequency the GPU has been running at for the ++ * previous sampling period. ++ * ++ * Calculate a dynamic power coefficient, with units pW/(Hz V^2), which ++ * is then scaled by the IPA framework according to the current OPP's ++ * frequency and voltage. ++ * ++ * Return: 0 on success, or an error code. ++ */ ++ int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp, ++ u32 current_freq); ++ /* ++ * get_static_coeff() - calculate static power coefficient ++ * @model: pointer to model ++ * @coeffp: pointer to return value location ++ * ++ * Calculate a static power coefficient, with units uW/(V^3), which is ++ * scaled by the IPA framework according to the current OPP's voltage. ++ * ++ * Return: 0 on success, or an error code. ++ */ ++ int (*get_static_coeff)(struct kbase_ipa_model *model, u32 *coeffp); ++ /* If false, the model's get_dynamic_coeff() method accounts for how ++ * long the GPU was active over the sample period. If true, the ++ * framework will scale the calculated power according to the ++ * utilization stats recorded by devfreq in get_real_power(). */ ++ bool do_utilization_scaling_in_framework; ++}; + -+ break; -+ } ++/* Models can be registered only in the platform's platform_init_func call */ ++int kbase_ipa_model_ops_register(struct kbase_device *kbdev, ++ struct kbase_ipa_model_ops *new_model_ops); ++struct kbase_ipa_model *kbase_ipa_get_model(struct kbase_device *kbdev, ++ const char *name); + -+ case KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD: -+ { -+ struct kbase_uk_debugfs_mem_profile_add *add_data = -+ (struct kbase_uk_debugfs_mem_profile_add *)args; -+ char *buf; -+ char __user *user_buf; ++int kbase_ipa_init(struct kbase_device *kbdev); ++void kbase_ipa_term(struct kbase_device *kbdev); ++void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev); ++void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev); ++int kbase_ipa_model_recalculate(struct kbase_ipa_model *model); ++struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, ++ struct kbase_ipa_model_ops *ops); ++void kbase_ipa_term_model(struct kbase_ipa_model *model); + -+ if (sizeof(*add_data) != args_size) -+ goto bad_size; ++extern struct kbase_ipa_model_ops kbase_simple_ipa_model_ops; + -+ if (add_data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) { -+ dev_err(kbdev->dev, "buffer too big\n"); -+ goto out_bad; -+ } + -+#ifdef CONFIG_COMPAT -+ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) -+ user_buf = -+ compat_ptr(add_data->buf.compat_value); -+ else ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) ++extern struct devfreq_cooling_ops kbase_ipa_power_model_ops; ++#else ++extern struct devfreq_cooling_power kbase_ipa_power_model_ops; +#endif -+ user_buf = add_data->buf.value; -+ -+ buf = kmalloc(add_data->len, GFP_KERNEL); -+ if (ZERO_OR_NULL_PTR(buf)) -+ goto out_bad; + -+ if (0 != copy_from_user(buf, user_buf, add_data->len)) { -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ kfree(buf); -+ goto out_bad; -+ } ++#else /* !(defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ + -+ if (kbasep_mem_profile_debugfs_insert(kctx, buf, -+ add_data->len)) { -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ goto out_bad; -+ } ++static inline void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev) ++{ } + -+ break; -+ } ++static inline void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev) ++{ } + -+#ifdef CONFIG_MALI_NO_MALI -+ case KBASE_FUNC_SET_PRFCNT_VALUES: -+ { ++#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ + -+ struct kbase_uk_prfcnt_values *params = -+ ((struct kbase_uk_prfcnt_values *)args); -+ gpu_model_set_dummy_prfcnt_sample(params->data, -+ params->size); ++#endif +diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c +new file mode 100644 +index 000000000..eafc14009 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c +@@ -0,0 +1,219 @@ ++/* ++ * ++ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ break; -+ } -+#endif /* CONFIG_MALI_NO_MALI */ -+#ifdef BASE_LEGACY_UK10_4_SUPPORT -+ case KBASE_FUNC_TLSTREAM_ACQUIRE_V10_4: -+ { -+ struct kbase_uk_tlstream_acquire_v10_4 *tlstream_acquire -+ = args; -+ int ret; + -+ if (sizeof(*tlstream_acquire) != args_size) -+ goto bad_size; + -+ ret = kbase_tlstream_acquire( -+ kctx, 0); -+ if (ret < 0) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ else -+ tlstream_acquire->fd = ret; -+ break; -+ } -+#endif /* BASE_LEGACY_UK10_4_SUPPORT */ -+ case KBASE_FUNC_TLSTREAM_ACQUIRE: -+ { -+ struct kbase_uk_tlstream_acquire *tlstream_acquire = -+ args; -+ int ret; ++#include ++#include ++#include + -+ if (sizeof(*tlstream_acquire) != args_size) -+ goto bad_size; ++#include "mali_kbase.h" ++#include "mali_kbase_ipa.h" ++#include "mali_kbase_ipa_debugfs.h" + -+ if (tlstream_acquire->flags & ~BASE_TLSTREAM_FLAGS_MASK) -+ goto out_bad; ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)) ++#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE ++#endif + -+ ret = kbase_tlstream_acquire( -+ kctx, tlstream_acquire->flags); -+ if (ret < 0) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; -+ else -+ tlstream_acquire->fd = ret; -+ break; -+ } -+ case KBASE_FUNC_TLSTREAM_FLUSH: -+ { -+ struct kbase_uk_tlstream_flush *tlstream_flush = -+ args; ++struct kbase_ipa_model_param { ++ char *name; ++ union { ++ void *voidp; ++ s32 *s32p; ++ char *str; ++ } addr; ++ size_t size; ++ enum kbase_ipa_model_param_type type; ++ struct kbase_ipa_model *model; ++ struct list_head link; ++}; + -+ if (sizeof(*tlstream_flush) != args_size) -+ goto bad_size; ++static int param_int_get(void *data, u64 *val) ++{ ++ struct kbase_ipa_model_param *param = data; + -+ kbase_tlstream_flush_streams(); -+ break; -+ } -+#if MALI_UNIT_TEST -+ case KBASE_FUNC_TLSTREAM_TEST: -+ { -+ struct kbase_uk_tlstream_test *tlstream_test = args; ++ mutex_lock(¶m->model->kbdev->ipa.lock); ++ *(s64 *) val = *param->addr.s32p; ++ mutex_unlock(¶m->model->kbdev->ipa.lock); + -+ if (sizeof(*tlstream_test) != args_size) -+ goto bad_size; ++ return 0; ++} + -+ kbase_tlstream_test( -+ tlstream_test->tpw_count, -+ tlstream_test->msg_delay, -+ tlstream_test->msg_count, -+ tlstream_test->aux_msg); -+ break; -+ } -+ case KBASE_FUNC_TLSTREAM_STATS: -+ { -+ struct kbase_uk_tlstream_stats *tlstream_stats = args; ++static int param_int_set(void *data, u64 val) ++{ ++ struct kbase_ipa_model_param *param = data; ++ struct kbase_ipa_model *model = param->model; ++ s64 sval = (s64) val; ++ int err = 0; + -+ if (sizeof(*tlstream_stats) != args_size) -+ goto bad_size; ++ if (sval < S32_MIN || sval > S32_MAX) ++ return -ERANGE; + -+ kbase_tlstream_stats( -+ &tlstream_stats->bytes_collected, -+ &tlstream_stats->bytes_generated); -+ break; -+ } -+#endif /* MALI_UNIT_TEST */ ++ mutex_lock(¶m->model->kbdev->ipa.lock); ++ *param->addr.s32p = val; ++ err = kbase_ipa_model_recalculate(model); ++ mutex_unlock(¶m->model->kbdev->ipa.lock); + -+ case KBASE_FUNC_GET_CONTEXT_ID: -+ { -+ struct kbase_uk_context_id *info = args; ++ return err; ++} + -+ info->id = kctx->id; -+ break; -+ } ++DEFINE_DEBUGFS_ATTRIBUTE(fops_s32, param_int_get, param_int_set, "%lld\n"); + -+ case KBASE_FUNC_SOFT_EVENT_UPDATE: -+ { -+ struct kbase_uk_soft_event_update *update = args; ++static ssize_t param_string_get(struct file *file, char __user *user_buf, ++ size_t count, loff_t *ppos) ++{ ++ struct kbase_ipa_model_param *param = file->private_data; ++ ssize_t ret; ++ size_t len; + -+ if (sizeof(*update) != args_size) -+ goto bad_size; ++ mutex_lock(¶m->model->kbdev->ipa.lock); ++ len = strnlen(param->addr.str, param->size - 1) + 1; ++ ret = simple_read_from_buffer(user_buf, count, ppos, ++ param->addr.str, len); ++ mutex_unlock(¶m->model->kbdev->ipa.lock); + -+ if (((update->new_status != BASE_JD_SOFT_EVENT_SET) && -+ (update->new_status != BASE_JD_SOFT_EVENT_RESET)) || -+ (update->flags != 0)) -+ goto out_bad; ++ return ret; ++} + -+ if (kbase_soft_event_update(kctx, update->evt, -+ update->new_status)) -+ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++static ssize_t param_string_set(struct file *file, const char __user *user_buf, ++ size_t count, loff_t *ppos) ++{ ++ struct kbase_ipa_model_param *param = file->private_data; ++ struct kbase_ipa_model *model = param->model; ++ ssize_t ret = count; ++ size_t buf_size; ++ int err; + -+ break; -+ } ++ mutex_lock(&model->kbdev->ipa.lock); + -+ default: -+ dev_err(kbdev->dev, "unknown ioctl %u\n", id); -+ goto out_bad; ++ if (count > param->size) { ++ ret = -EINVAL; ++ goto end; + } + -+ return ret; ++ buf_size = min(param->size - 1, count); ++ if (copy_from_user(param->addr.str, user_buf, buf_size)) { ++ ret = -EFAULT; ++ goto end; ++ } + -+bad_size: -+ dev_err(kbdev->dev, "Wrong syscall size (%d) for %08x\n", args_size, id); -+out_bad: -+ return -EINVAL; -+} ++ param->addr.str[buf_size] = '\0'; + -+static struct kbase_device *to_kbase_device(struct device *dev) -+{ -+ return dev_get_drvdata(dev); -+} ++ err = kbase_ipa_model_recalculate(model); ++ if (err < 0) ++ ret = err; + -+static int assign_irqs(struct platform_device *pdev) -+{ -+ struct kbase_device *kbdev = to_kbase_device(&pdev->dev); ++end: ++ mutex_unlock(&model->kbdev->ipa.lock); + -+ static const char *const irq_names_caps[] = { "JOB", "MMU", "GPU" }; ++ return ret; ++} + -+#if IS_ENABLED(CONFIG_OF) -+ static const char *const irq_names[] = { "job", "mmu", "gpu" }; -+#endif -+ int i; ++static const struct file_operations fops_string = { ++ .read = param_string_get, ++ .write = param_string_set, ++ .open = simple_open, ++ .llseek = default_llseek, ++}; + -+ if (!kbdev) -+ return -ENODEV; ++int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, ++ void *addr, size_t size, ++ enum kbase_ipa_model_param_type type) ++{ ++ struct kbase_ipa_model_param *param; + -+ for (i = 0; i < ARRAY_SIZE(irq_names_caps); i++) { -+ int irq; ++ param = kzalloc(sizeof(*param), GFP_KERNEL); + -+#if IS_ENABLED(CONFIG_OF) -+ /* We recommend using Upper case for the irq names in dts, but if -+ * there are devices in the world using Lower case then we should -+ * avoid breaking support for them. So try using names in Upper case -+ * first then try using Lower case names. If both attempts fail then -+ * we assume there is no IRQ resource specified for the GPU. -+ */ -+ irq = platform_get_irq_byname(pdev, irq_names_caps[i]); -+ if (irq < 0) -+ irq = platform_get_irq_byname(pdev, irq_names[i]); -+#else -+ irq = platform_get_irq(pdev, i); -+#endif /* CONFIG_OF */ ++ if (!param) ++ return -ENOMEM; + -+ if (irq < 0) { -+ dev_err(kbdev->dev, "No IRQ resource '%s'\n", irq_names_caps[i]); -+ return irq; -+ } ++ /* 'name' is stack-allocated for array elements, so copy it into ++ * heap-allocated storage */ ++ param->name = kstrdup(name, GFP_KERNEL); ++ param->addr.voidp = addr; ++ param->size = size; ++ param->type = type; ++ param->model = model; + -+ kbdev->irqs[i].irq = irq; -+ kbdev->irqs[i].flags = irqd_get_trigger_type(irq_get_irq_data(irq)); -+ } ++ list_add(¶m->link, &model->params); + + return 0; +} + -+/* -+ * API to acquire device list mutex and -+ * return pointer to the device list head -+ */ -+const struct list_head *kbase_dev_list_get(void) ++void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model) +{ -+ mutex_lock(&kbase_dev_list_lock); -+ return &kbase_dev_list; -+} -+KBASE_EXPORT_TEST_API(kbase_dev_list_get); ++ struct kbase_ipa_model_param *param_p, *param_n; + -+/* API to release the device list mutex */ -+void kbase_dev_list_put(const struct list_head *dev_list) -+{ -+ mutex_unlock(&kbase_dev_list_lock); ++ list_for_each_entry_safe(param_p, param_n, &model->params, link) { ++ list_del(¶m_p->link); ++ kfree(param_p->name); ++ kfree(param_p); ++ } +} -+KBASE_EXPORT_TEST_API(kbase_dev_list_put); + -+/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */ -+struct kbase_device *kbase_find_device(int minor) ++static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model) +{ -+ struct kbase_device *kbdev = NULL; -+ struct list_head *entry; -+ const struct list_head *dev_list = kbase_dev_list_get(); -+ -+ list_for_each(entry, dev_list) { -+ struct kbase_device *tmp; -+ -+ tmp = list_entry(entry, struct kbase_device, entry); -+ if (tmp->mdev.minor == minor || minor == -1) { -+ kbdev = tmp; -+ get_device(kbdev->dev); -+ break; -+ } -+ } -+ kbase_dev_list_put(dev_list); ++ struct list_head *it; ++ struct dentry *dir; + -+ return kbdev; -+} -+EXPORT_SYMBOL(kbase_find_device); ++ lockdep_assert_held(&model->kbdev->ipa.lock); + -+void kbase_release_device(struct kbase_device *kbdev) -+{ -+ put_device(kbdev->dev); -+} -+EXPORT_SYMBOL(kbase_release_device); ++ dir = debugfs_create_dir(model->ops->name, ++ model->kbdev->mali_debugfs_directory); + -+#if KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE -+/* -+ * Older versions, before v4.6, of the kernel doesn't have -+ * kstrtobool_from_user(), except longterm 4.4.y which had it added in 4.4.28 -+ */ -+static int kstrtobool_from_user(const char __user *s, size_t count, bool *res) -+{ -+ char buf[32]; ++ if (!dir) { ++ dev_err(model->kbdev->dev, ++ "Couldn't create mali debugfs %s directory", ++ model->ops->name); ++ return; ++ } + -+ count = min(sizeof(buf), count); ++ list_for_each(it, &model->params) { ++ struct kbase_ipa_model_param *param = ++ list_entry(it, ++ struct kbase_ipa_model_param, ++ link); ++ const struct file_operations *fops = NULL; + -+ if (copy_from_user(buf, s, count)) -+ return -EFAULT; -+ buf[count] = '\0'; ++ switch (param->type) { ++ case PARAM_TYPE_S32: ++ fops = &fops_s32; ++ break; ++ case PARAM_TYPE_STRING: ++ fops = &fops_string; ++ break; ++ } + -+ return strtobool(buf, res); ++ if (unlikely(!fops)) { ++ dev_err(model->kbdev->dev, ++ "Type not set for %s parameter %s\n", ++ model->ops->name, param->name); ++ } else { ++ debugfs_create_file(param->name, S_IRUGO | S_IWUSR, ++ dir, param, fops); ++ } ++ } +} -+#endif + -+static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off) ++void kbase_ipa_debugfs_init(struct kbase_device *kbdev) +{ -+ struct kbase_context *kctx = f->private_data; -+ int err; -+ bool value; -+ -+ err = kstrtobool_from_user(ubuf, size, &value); -+ if (err) -+ return err; ++ mutex_lock(&kbdev->ipa.lock); + -+ if (value) -+ kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); -+ else -+ kbase_ctx_flag_clear(kctx, KCTX_INFINITE_CACHE); ++ if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model) ++ kbase_ipa_model_debugfs_init(kbdev->ipa.configured_model); ++ kbase_ipa_model_debugfs_init(kbdev->ipa.fallback_model); + -+ return size; ++ mutex_unlock(&kbdev->ipa.lock); +} +diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h +new file mode 100644 +index 000000000..ec06e2096 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h +@@ -0,0 +1,49 @@ ++/* ++ * ++ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+static ssize_t read_ctx_infinite_cache(struct file *f, char __user *ubuf, size_t size, loff_t *off) -+{ -+ struct kbase_context *kctx = f->private_data; -+ char buf[32]; -+ int count; -+ bool value; -+ -+ value = kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE); + -+ count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N"); + -+ return simple_read_from_buffer(ubuf, size, off, buf, count); -+} ++#ifndef _KBASE_IPA_DEBUGFS_H_ ++#define _KBASE_IPA_DEBUGFS_H_ + -+static const struct file_operations kbase_infinite_cache_fops = { -+ .open = simple_open, -+ .write = write_ctx_infinite_cache, -+ .read = read_ctx_infinite_cache, ++enum kbase_ipa_model_param_type { ++ PARAM_TYPE_S32 = 1, ++ PARAM_TYPE_STRING, +}; + -+static int kbase_open(struct inode *inode, struct file *filp) -+{ -+ struct kbase_device *kbdev = NULL; -+ struct kbase_context *kctx; -+ int ret = 0; +#ifdef CONFIG_DEBUG_FS -+ char kctx_name[64]; -+#endif + -+ kbdev = kbase_find_device(iminor(inode)); ++void kbase_ipa_debugfs_init(struct kbase_device *kbdev); ++int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, ++ void *addr, size_t size, ++ enum kbase_ipa_model_param_type type); ++void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model); + -+ if (!kbdev) -+ return -ENODEV; ++#else /* CONFIG_DEBUG_FS */ + -+ kctx = kbase_create_context(kbdev, is_compat_task()); -+ if (!kctx) { -+ ret = -ENOMEM; -+ goto out; -+ } ++static inline int kbase_ipa_model_param_add(struct kbase_ipa_model *model, ++ const char *name, void *addr, ++ size_t size, ++ enum kbase_ipa_model_param_type type) ++{ ++ return 0; ++} + -+ init_waitqueue_head(&kctx->event_queue); -+ filp->f_mode |= FMODE_UNSIGNED_OFFSET; -+ filp->private_data = kctx; -+ kctx->filp = filp; ++static inline void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model) ++{ } + -+ if (kbdev->infinite_cache_active_default) -+ kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); ++#endif /* CONFIG_DEBUG_FS */ + -+#ifdef CONFIG_DEBUG_FS -+ snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id); ++#endif /* _KBASE_IPA_DEBUGFS_H_ */ +diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c +new file mode 100644 +index 000000000..da0a4d4a0 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c +@@ -0,0 +1,222 @@ ++/* ++ * ++ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ kctx->kctx_dentry = debugfs_create_dir(kctx_name, -+ kbdev->debugfs_ctx_directory); + -+ if (IS_ERR_OR_NULL(kctx->kctx_dentry)) { -+ ret = -ENOMEM; -+ goto out; -+ } + -+#ifdef CONFIG_MALI_COH_USER -+ /* if cache is completely coherent at hardware level, then remove the -+ * infinite cache control support from debugfs. -+ */ -+#else -+ debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry, -+ kctx, &kbase_infinite_cache_fops); -+#endif /* CONFIG_MALI_COH_USER */ ++#include ++#ifdef CONFIG_DEVFREQ_THERMAL ++#include ++#endif ++#include ++#include + -+ mutex_init(&kctx->mem_profile_lock); ++#include "mali_kbase.h" ++#include "mali_kbase_defs.h" + -+ kbasep_jd_debugfs_ctx_init(kctx); -+ kbase_debug_mem_view_init(filp); ++/* ++ * This model is primarily designed for the Juno platform. It may not be ++ * suitable for other platforms. The additional resources in this model ++ * should preferably be minimal, as this model is rarely used when a dynamic ++ * model is available. ++ */ + -+ kbase_debug_job_fault_context_init(kctx); ++/** ++ * struct kbase_ipa_model_simple_data - IPA context per device ++ * @dynamic_coefficient: dynamic coefficient of the model ++ * @static_coefficient: static coefficient of the model ++ * @ts: Thermal scaling coefficients of the model ++ * @tz_name: Thermal zone name ++ * @gpu_tz: thermal zone device ++ */ + -+ kbase_mem_pool_debugfs_init(kctx->kctx_dentry, &kctx->mem_pool); ++struct kbase_ipa_model_simple_data { ++ u32 dynamic_coefficient; ++ u32 static_coefficient; ++ s32 ts[4]; ++ char tz_name[16]; ++ struct thermal_zone_device *gpu_tz; ++}; ++#define FALLBACK_STATIC_TEMPERATURE 55000 + -+ kbase_jit_debugfs_init(kctx); -+#endif /* CONFIG_DEBUG_FS */ ++/** ++ * calculate_temp_scaling_factor() - Calculate temperature scaling coefficient ++ * @ts: Signed coefficients, in order t^0 to t^3, with units Deg^-N ++ * @t: Temperature, in mDeg C. Range: -2^17 < t < 2^17 ++ * ++ * Scale the temperature according to a cubic polynomial whose coefficients are ++ * provided in the device tree. The result is used to scale the static power ++ * coefficient, where 1000000 means no change. ++ * ++ * Return: Temperature scaling factor. Approx range 0 < ret < 10,000,000. ++ */ ++static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t) ++{ ++ /* Range: -2^24 < t2 < 2^24 m(Deg^2) */ ++ u32 remainder; ++ // static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) ++ const s64 t2 = div_s64_rem((t * t), 1000, &remainder); + -+ dev_dbg(kbdev->dev, "created base context\n"); ++ /* Range: -2^31 < t3 < 2^31 m(Deg^3) */ ++ const s64 t3 = div_s64_rem((t * t2), 1000, &remainder); + -+ { -+ struct kbasep_kctx_list_element *element; ++ /* ++ * Sum the parts. t^[1-3] are in m(Deg^N), but the coefficients are in ++ * Deg^-N, so we need to multiply the last coefficient by 1000. ++ * Range: -2^63 < res_big < 2^63 ++ */ ++ const s64 res_big = ts[3] * t3 /* +/- 2^62 */ ++ + ts[2] * t2 /* +/- 2^55 */ ++ + ts[1] * t /* +/- 2^48 */ ++ + ts[0] * 1000; /* +/- 2^41 */ + -+ element = kzalloc(sizeof(*element), GFP_KERNEL); -+ if (element) { -+ mutex_lock(&kbdev->kctx_list_lock); -+ element->kctx = kctx; -+ list_add(&element->link, &kbdev->kctx_list); -+ KBASE_TLSTREAM_TL_NEW_CTX( -+ element->kctx, -+ (u32)(element->kctx->id), -+ (u32)(element->kctx->tgid)); -+ mutex_unlock(&kbdev->kctx_list_lock); -+ } else { -+ /* we don't treat this as a fail - just warn about it */ -+ dev_warn(kbdev->dev, "couldn't add kctx to kctx_list\n"); -+ } -+ } -+ return 0; ++ /* Range: -2^60 < res_unclamped < 2^60 */ ++ s64 res_unclamped = div_s64_rem(res_big, 1000, &remainder); + -+ out: -+ kbase_release_device(kbdev); -+ return ret; ++ /* Clamp to range of 0x to 10x the static power */ ++ return clamp(res_unclamped, (s64) 0, (s64) 10000000); +} + -+static int kbase_release(struct inode *inode, struct file *filp) ++static int model_static_coeff(struct kbase_ipa_model *model, u32 *coeffp) +{ -+ struct kbase_context *kctx = filp->private_data; -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct kbasep_kctx_list_element *element, *tmp; -+ bool found_element = false; -+ -+ KBASE_TLSTREAM_TL_DEL_CTX(kctx); -+ -+#ifdef CONFIG_DEBUG_FS -+ kbasep_mem_profile_debugfs_remove(kctx); -+ kbase_debug_job_fault_context_term(kctx); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) ++ unsigned long temp; ++#else ++ int temp; +#endif ++ u32 temp_scaling_factor; ++ struct kbase_ipa_model_simple_data *model_data = ++ (struct kbase_ipa_model_simple_data *) model->model_data; ++ struct thermal_zone_device *gpu_tz = model_data->gpu_tz; ++ u64 coeffp_big; + -+ mutex_lock(&kbdev->kctx_list_lock); -+ list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) { -+ if (element->kctx == kctx) { -+ list_del(&element->link); -+ kfree(element); -+ found_element = true; -+ } -+ } -+ mutex_unlock(&kbdev->kctx_list_lock); -+ if (!found_element) -+ dev_warn(kbdev->dev, "kctx not in kctx_list\n"); -+ -+ filp->private_data = NULL; -+ -+ mutex_lock(&kctx->vinstr_cli_lock); -+ /* If this client was performing hwcnt dumping and did not explicitly -+ * detach itself, remove it from the vinstr core now */ -+ if (kctx->vinstr_cli) { -+ struct kbase_uk_hwcnt_setup setup; ++ if (gpu_tz) { ++ int ret; + -+ setup.dump_buffer = 0llu; -+ kbase_vinstr_legacy_hwc_setup( -+ kbdev->vinstr_ctx, &kctx->vinstr_cli, &setup); ++ ret = gpu_tz->ops->get_temp(gpu_tz, &temp); ++ if (ret) { ++ pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n", ++ ret); ++ temp = FALLBACK_STATIC_TEMPERATURE; ++ } ++ } else { ++ temp = FALLBACK_STATIC_TEMPERATURE; + } -+ mutex_unlock(&kctx->vinstr_cli_lock); + -+ kbase_destroy_context(kctx); ++ temp_scaling_factor = calculate_temp_scaling_factor(model_data->ts, ++ temp); ++ coeffp_big = (u64)model_data->static_coefficient * temp_scaling_factor; ++ *coeffp = div_u64(coeffp_big, 1000000); + -+ dev_dbg(kbdev->dev, "deleted base context\n"); -+ kbase_release_device(kbdev); + return 0; +} + -+#define CALL_MAX_SIZE 536 -+ -+static long kbase_legacy_ioctl(struct file *filp, unsigned int cmd, -+ unsigned long arg) ++static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp, ++ u32 current_freq) +{ -+ u64 msg[(CALL_MAX_SIZE + 7) >> 3] = { 0xdeadbeefdeadbeefull }; /* alignment fixup */ -+ u32 size = _IOC_SIZE(cmd); -+ struct kbase_context *kctx = filp->private_data; -+ -+ if (size > CALL_MAX_SIZE) -+ return -ENOTTY; -+ -+ if (0 != copy_from_user(&msg, (void __user *)arg, size)) { -+ dev_err(kctx->kbdev->dev, "failed to copy ioctl argument into kernel space\n"); -+ return -EFAULT; -+ } ++ struct kbase_ipa_model_simple_data *model_data = ++ (struct kbase_ipa_model_simple_data *) model->model_data; + -+ if (kbase_legacy_dispatch(kctx, &msg, size) != 0) -+ return -EFAULT; ++ *coeffp = model_data->dynamic_coefficient; + -+ if (0 != copy_to_user((void __user *)arg, &msg, size)) { -+ dev_err(kctx->kbdev->dev, "failed to copy results of UK call back to user space\n"); -+ return -EFAULT; -+ } + return 0; +} + -+static int kbase_api_set_flags(struct kbase_context *kctx, -+ struct kbase_ioctl_set_flags *flags) ++static int add_params(struct kbase_ipa_model *model) +{ -+ int err; -+ -+ /* setup pending, try to signal that we'll do the setup, -+ * if setup was already in progress, err this call -+ */ -+ if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1) != 0) -+ return -EINVAL; ++ int err = 0; ++ struct kbase_ipa_model_simple_data *model_data = ++ (struct kbase_ipa_model_simple_data *)model->model_data; + -+ err = kbase_context_set_create_flags(kctx, flags->create_flags); -+ /* if bad flags, will stay stuck in setup mode */ ++ err = kbase_ipa_model_add_param_s32(model, "static-coefficient", ++ &model_data->static_coefficient, ++ 1, true); + if (err) -+ return err; ++ goto end; + -+ atomic_set(&kctx->setup_complete, 1); -+ return 0; -+} ++ err = kbase_ipa_model_add_param_s32(model, "dynamic-coefficient", ++ &model_data->dynamic_coefficient, ++ 1, true); ++ if (err) ++ goto end; + -+static int kbase_api_job_submit(struct kbase_context *kctx, -+ struct kbase_ioctl_job_submit *submit) -+{ -+ void __user *user_addr = NULL; ++ err = kbase_ipa_model_add_param_s32(model, "ts", ++ model_data->ts, 4, true); ++ if (err) ++ goto end; + -+#ifdef CONFIG_COMPAT -+ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) -+ user_addr = compat_ptr(submit->addr.compat_value); -+ else -+#endif -+ user_addr = submit->addr.value; ++ err = kbase_ipa_model_add_param_string(model, "thermal-zone", ++ model_data->tz_name, ++ sizeof(model_data->tz_name), true); + -+ return kbase_jd_submit(kctx, user_addr, submit->nr_atoms, -+ submit->stride, false); ++end: ++ return err; +} + -+static int kbase_api_get_gpuprops(struct kbase_context *kctx, -+ struct kbase_ioctl_get_gpuprops *get_props) ++static int kbase_simple_power_model_init(struct kbase_ipa_model *model) +{ -+ struct kbase_gpu_props *kprops = &kctx->kbdev->gpu_props; + int err; ++ struct kbase_ipa_model_simple_data *model_data; + -+ if (get_props->flags != 0) { -+ dev_err(kctx->kbdev->dev, "Unsupported flags to get_gpuprops"); -+ return -EINVAL; -+ } ++ model_data = kzalloc(sizeof(struct kbase_ipa_model_simple_data), ++ GFP_KERNEL); ++ if (!model_data) ++ return -ENOMEM; + -+ if (get_props->size == 0) -+ return kprops->prop_buffer_size; -+ if (get_props->size < kprops->prop_buffer_size) -+ return -EINVAL; ++ model->model_data = (void *) model_data; + -+ err = copy_to_user(get_props->buffer.value, kprops->prop_buffer, -+ kprops->prop_buffer_size); -+ if (err) -+ return err; -+ return kprops->prop_buffer_size; ++ err = add_params(model); ++ ++ return err; +} + -+static int kbase_api_post_term(struct kbase_context *kctx) ++static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model) +{ -+ kbase_event_close(kctx); -+ return 0; -+} ++ struct kbase_ipa_model_simple_data *model_data = ++ (struct kbase_ipa_model_simple_data *)model->model_data; + -+static int kbase_api_mem_alloc(struct kbase_context *kctx, -+ union kbase_ioctl_mem_alloc *alloc) -+{ -+ struct kbase_va_region *reg; -+ u64 flags = alloc->in.flags; -+ u64 gpu_va; ++ if (!strnlen(model_data->tz_name, sizeof(model_data->tz_name))) { ++ model_data->gpu_tz = NULL; ++ } else { ++ model_data->gpu_tz = thermal_zone_get_zone_by_name(model_data->tz_name); + -+#if defined(CONFIG_64BIT) -+ if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { -+ /* force SAME_VA if a 64-bit client */ -+ flags |= BASE_MEM_SAME_VA; ++ if (IS_ERR(model_data->gpu_tz)) { ++ pr_warn_ratelimited("Error %ld getting thermal zone \'%s\', not yet ready?\n", ++ PTR_ERR(model_data->gpu_tz), ++ model_data->tz_name); ++ model_data->gpu_tz = NULL; ++ return -EPROBE_DEFER; ++ } + } -+#endif -+ -+ reg = kbase_mem_alloc(kctx, alloc->in.va_pages, -+ alloc->in.commit_pages, -+ alloc->in.extent, -+ &flags, &gpu_va); -+ -+ if (!reg) -+ return -ENOMEM; -+ -+ alloc->out.flags = flags; -+ alloc->out.gpu_va = gpu_va; + + return 0; +} + -+static int kbase_api_mem_query(struct kbase_context *kctx, -+ union kbase_ioctl_mem_query *query) -+{ -+ return kbase_mem_query(kctx, query->in.gpu_addr, -+ query->in.query, &query->out.value); -+} -+ -+static int kbase_api_mem_free(struct kbase_context *kctx, -+ struct kbase_ioctl_mem_free *free) -+{ -+ return kbase_mem_free(kctx, free->gpu_addr); -+} -+ -+static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx, -+ struct kbase_ioctl_hwcnt_reader_setup *setup) ++static void kbase_simple_power_model_term(struct kbase_ipa_model *model) +{ -+ int ret; -+ struct kbase_uk_hwcnt_reader_setup args = { -+ .buffer_count = setup->buffer_count, -+ .jm_bm = setup->jm_bm, -+ .shader_bm = setup->shader_bm, -+ .tiler_bm = setup->tiler_bm, -+ .mmu_l2_bm = setup->mmu_l2_bm -+ }; -+ -+ mutex_lock(&kctx->vinstr_cli_lock); -+ ret = kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, &args); -+ mutex_unlock(&kctx->vinstr_cli_lock); ++ struct kbase_ipa_model_simple_data *model_data = ++ (struct kbase_ipa_model_simple_data *)model->model_data; + -+ if (ret) -+ return ret; -+ return args.fd; ++ kfree(model_data); +} + -+static int kbase_api_hwcnt_enable(struct kbase_context *kctx, -+ struct kbase_ioctl_hwcnt_enable *enable) -+{ -+ int ret; -+ struct kbase_uk_hwcnt_setup args = { -+ .dump_buffer = enable->dump_buffer, -+ .jm_bm = enable->jm_bm, -+ .shader_bm = enable->shader_bm, -+ .tiler_bm = enable->tiler_bm, -+ .mmu_l2_bm = enable->mmu_l2_bm -+ }; ++struct kbase_ipa_model_ops kbase_simple_ipa_model_ops = { ++ .name = "mali-simple-power-model", ++ .init = &kbase_simple_power_model_init, ++ .recalculate = &kbase_simple_power_model_recalculate, ++ .term = &kbase_simple_power_model_term, ++ .get_dynamic_coeff = &model_dynamic_coeff, ++ .get_static_coeff = &model_static_coeff, ++ .do_utilization_scaling_in_framework = true, ++}; +diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h +new file mode 100644 +index 000000000..6be0a334f +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h +@@ -0,0 +1,311 @@ ++/* ++ * ++ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ mutex_lock(&kctx->vinstr_cli_lock); -+ ret = kbase_vinstr_legacy_hwc_setup(kctx->kbdev->vinstr_ctx, -+ &kctx->vinstr_cli, &args); -+ mutex_unlock(&kctx->vinstr_cli_lock); + -+ return ret; -+} + -+static int kbase_api_hwcnt_dump(struct kbase_context *kctx) -+{ -+ int ret; ++/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, ++ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py ++ * For more information see base/tools/hwconfig_generator/README ++ */ + -+ mutex_lock(&kctx->vinstr_cli_lock); -+ ret = kbase_vinstr_hwc_dump(kctx->vinstr_cli, -+ BASE_HWCNT_READER_EVENT_MANUAL); -+ mutex_unlock(&kctx->vinstr_cli_lock); ++#ifndef _BASE_HWCONFIG_FEATURES_H_ ++#define _BASE_HWCONFIG_FEATURES_H_ + -+ return ret; -+} ++enum base_hw_feature { ++ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, ++ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, ++ BASE_HW_FEATURE_33BIT_VA, ++ BASE_HW_FEATURE_XAFFINITY, ++ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, ++ BASE_HW_FEATURE_MRT, ++ BASE_HW_FEATURE_BRNDOUT_CC, ++ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, ++ BASE_HW_FEATURE_LD_ST_TILEBUFFER, ++ BASE_HW_FEATURE_MSAA_16X, ++ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, ++ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, ++ BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK, ++ BASE_HW_FEATURE_T7XX_PAIRING_RULES, ++ BASE_HW_FEATURE_LD_ST_LEA_TEX, ++ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, ++ BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4, ++ BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS, ++ BASE_HW_FEATURE_TEST4_DATUM_MODE, ++ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, ++ BASE_HW_FEATURE_BRNDOUT_KILL, ++ BASE_HW_FEATURE_WARPING, ++ BASE_HW_FEATURE_V4, ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_MODE, ++ BASE_HW_FEATURE_COHERENCY_REG, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_AARCH64_MMU, ++ BASE_HW_FEATURE_END ++}; + -+static int kbase_api_hwcnt_clear(struct kbase_context *kctx) -+{ -+ int ret; ++static const enum base_hw_feature base_hw_features_generic[] = { ++ BASE_HW_FEATURE_END ++}; + -+ mutex_lock(&kctx->vinstr_cli_lock); -+ ret = kbase_vinstr_hwc_clear(kctx->vinstr_cli); -+ mutex_unlock(&kctx->vinstr_cli_lock); ++static const enum base_hw_feature base_hw_features_t60x[] = { ++ BASE_HW_FEATURE_LD_ST_LEA_TEX, ++ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, ++ BASE_HW_FEATURE_V4, ++ BASE_HW_FEATURE_END ++}; + -+ return ret; -+} ++static const enum base_hw_feature base_hw_features_t62x[] = { ++ BASE_HW_FEATURE_LD_ST_LEA_TEX, ++ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, ++ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, ++ BASE_HW_FEATURE_V4, ++ BASE_HW_FEATURE_END ++}; + -+static int kbase_api_disjoint_query(struct kbase_context *kctx, -+ struct kbase_ioctl_disjoint_query *query) -+{ -+ query->counter = kbase_disjoint_event_get(kctx->kbdev); ++static const enum base_hw_feature base_hw_features_t72x[] = { ++ BASE_HW_FEATURE_33BIT_VA, ++ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, ++ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, ++ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, ++ BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK, ++ BASE_HW_FEATURE_T7XX_PAIRING_RULES, ++ BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4, ++ BASE_HW_FEATURE_WARPING, ++ BASE_HW_FEATURE_V4, ++ BASE_HW_FEATURE_END ++}; + -+ return 0; -+} ++static const enum base_hw_feature base_hw_features_t76x[] = { ++ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, ++ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, ++ BASE_HW_FEATURE_XAFFINITY, ++ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, ++ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, ++ BASE_HW_FEATURE_BRNDOUT_CC, ++ BASE_HW_FEATURE_LD_ST_LEA_TEX, ++ BASE_HW_FEATURE_LD_ST_TILEBUFFER, ++ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, ++ BASE_HW_FEATURE_MRT, ++ BASE_HW_FEATURE_MSAA_16X, ++ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, ++ BASE_HW_FEATURE_T7XX_PAIRING_RULES, ++ BASE_HW_FEATURE_TEST4_DATUM_MODE, ++ BASE_HW_FEATURE_END ++}; + -+static int kbase_api_get_ddk_version(struct kbase_context *kctx, -+ struct kbase_ioctl_get_ddk_version *version) -+{ -+ int ret; -+ int len = sizeof(KERNEL_SIDE_DDK_VERSION_STRING); ++static const enum base_hw_feature base_hw_features_tFxx[] = { ++ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, ++ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, ++ BASE_HW_FEATURE_XAFFINITY, ++ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, ++ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, ++ BASE_HW_FEATURE_BRNDOUT_CC, ++ BASE_HW_FEATURE_BRNDOUT_KILL, ++ BASE_HW_FEATURE_LD_ST_LEA_TEX, ++ BASE_HW_FEATURE_LD_ST_TILEBUFFER, ++ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, ++ BASE_HW_FEATURE_MRT, ++ BASE_HW_FEATURE_MSAA_16X, ++ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, ++ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, ++ BASE_HW_FEATURE_T7XX_PAIRING_RULES, ++ BASE_HW_FEATURE_TEST4_DATUM_MODE, ++ BASE_HW_FEATURE_END ++}; + -+ if (version->version_buffer.value == NULL) -+ return len; ++static const enum base_hw_feature base_hw_features_t83x[] = { ++ BASE_HW_FEATURE_33BIT_VA, ++ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, ++ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, ++ BASE_HW_FEATURE_XAFFINITY, ++ BASE_HW_FEATURE_WARPING, ++ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, ++ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, ++ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, ++ BASE_HW_FEATURE_BRNDOUT_CC, ++ BASE_HW_FEATURE_BRNDOUT_KILL, ++ BASE_HW_FEATURE_LD_ST_LEA_TEX, ++ BASE_HW_FEATURE_LD_ST_TILEBUFFER, ++ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, ++ BASE_HW_FEATURE_MRT, ++ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, ++ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, ++ BASE_HW_FEATURE_T7XX_PAIRING_RULES, ++ BASE_HW_FEATURE_TEST4_DATUM_MODE, ++ BASE_HW_FEATURE_END ++}; + -+ if (version->size < len) -+ return -EOVERFLOW; ++static const enum base_hw_feature base_hw_features_t82x[] = { ++ BASE_HW_FEATURE_33BIT_VA, ++ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, ++ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, ++ BASE_HW_FEATURE_XAFFINITY, ++ BASE_HW_FEATURE_WARPING, ++ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, ++ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, ++ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, ++ BASE_HW_FEATURE_BRNDOUT_CC, ++ BASE_HW_FEATURE_BRNDOUT_KILL, ++ BASE_HW_FEATURE_LD_ST_LEA_TEX, ++ BASE_HW_FEATURE_LD_ST_TILEBUFFER, ++ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, ++ BASE_HW_FEATURE_MRT, ++ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, ++ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, ++ BASE_HW_FEATURE_T7XX_PAIRING_RULES, ++ BASE_HW_FEATURE_TEST4_DATUM_MODE, ++ BASE_HW_FEATURE_END ++}; + -+ ret = copy_to_user(version->version_buffer.value, -+ KERNEL_SIDE_DDK_VERSION_STRING, -+ sizeof(KERNEL_SIDE_DDK_VERSION_STRING)); ++static const enum base_hw_feature base_hw_features_tMIx[] = { ++ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, ++ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, ++ BASE_HW_FEATURE_XAFFINITY, ++ BASE_HW_FEATURE_WARPING, ++ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, ++ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, ++ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, ++ BASE_HW_FEATURE_BRNDOUT_CC, ++ BASE_HW_FEATURE_BRNDOUT_KILL, ++ BASE_HW_FEATURE_LD_ST_LEA_TEX, ++ BASE_HW_FEATURE_LD_ST_TILEBUFFER, ++ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, ++ BASE_HW_FEATURE_MRT, ++ BASE_HW_FEATURE_MSAA_16X, ++ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, ++ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, ++ BASE_HW_FEATURE_T7XX_PAIRING_RULES, ++ BASE_HW_FEATURE_TEST4_DATUM_MODE, ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_MODE, ++ BASE_HW_FEATURE_COHERENCY_REG, ++ BASE_HW_FEATURE_END ++}; + -+ if (ret) -+ return ret; ++static const enum base_hw_feature base_hw_features_tHEx[] = { ++ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, ++ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, ++ BASE_HW_FEATURE_XAFFINITY, ++ BASE_HW_FEATURE_WARPING, ++ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, ++ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, ++ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, ++ BASE_HW_FEATURE_BRNDOUT_CC, ++ BASE_HW_FEATURE_BRNDOUT_KILL, ++ BASE_HW_FEATURE_LD_ST_LEA_TEX, ++ BASE_HW_FEATURE_LD_ST_TILEBUFFER, ++ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, ++ BASE_HW_FEATURE_MRT, ++ BASE_HW_FEATURE_MSAA_16X, ++ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, ++ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, ++ BASE_HW_FEATURE_T7XX_PAIRING_RULES, ++ BASE_HW_FEATURE_TEST4_DATUM_MODE, ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_MODE, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_COHERENCY_REG, ++ BASE_HW_FEATURE_END ++}; + -+ return len; -+} ++static const enum base_hw_feature base_hw_features_tSIx[] = { ++ BASE_HW_FEATURE_33BIT_VA, ++ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, ++ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, ++ BASE_HW_FEATURE_XAFFINITY, ++ BASE_HW_FEATURE_WARPING, ++ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, ++ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, ++ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, ++ BASE_HW_FEATURE_BRNDOUT_CC, ++ BASE_HW_FEATURE_BRNDOUT_KILL, ++ BASE_HW_FEATURE_LD_ST_LEA_TEX, ++ BASE_HW_FEATURE_LD_ST_TILEBUFFER, ++ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, ++ BASE_HW_FEATURE_MRT, ++ BASE_HW_FEATURE_MSAA_16X, ++ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, ++ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, ++ BASE_HW_FEATURE_T7XX_PAIRING_RULES, ++ BASE_HW_FEATURE_TEST4_DATUM_MODE, ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_MODE, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_COHERENCY_REG, ++ BASE_HW_FEATURE_END ++}; + -+static int kbase_api_mem_jit_init(struct kbase_context *kctx, -+ struct kbase_ioctl_mem_jit_init *jit_init) -+{ -+ return kbase_region_tracker_init_jit(kctx, jit_init->va_pages); -+} + -+static int kbase_api_mem_sync(struct kbase_context *kctx, -+ struct kbase_ioctl_mem_sync *sync) -+{ -+#ifdef CONFIG_MALI_COH_USER -+ return 0; -+#endif -+ struct basep_syncset sset = { -+ .mem_handle.basep.handle = sync->handle, -+ .user_addr = sync->user_addr, -+ .size = sync->size, -+ .type = sync->type -+ }; ++#ifdef MALI_INCLUDE_TKAX ++static const enum base_hw_feature base_hw_features_tKAx[] = { ++ BASE_HW_FEATURE_33BIT_VA, ++ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, ++ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, ++ BASE_HW_FEATURE_XAFFINITY, ++ BASE_HW_FEATURE_WARPING, ++ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, ++ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, ++ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, ++ BASE_HW_FEATURE_BRNDOUT_CC, ++ BASE_HW_FEATURE_BRNDOUT_KILL, ++ BASE_HW_FEATURE_LD_ST_LEA_TEX, ++ BASE_HW_FEATURE_LD_ST_TILEBUFFER, ++ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, ++ BASE_HW_FEATURE_MRT, ++ BASE_HW_FEATURE_MSAA_16X, ++ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, ++ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, ++ BASE_HW_FEATURE_T7XX_PAIRING_RULES, ++ BASE_HW_FEATURE_TEST4_DATUM_MODE, ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_MODE, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_COHERENCY_REG, ++ BASE_HW_FEATURE_END ++}; + -+ return kbase_sync_now(kctx, &sset); -+} ++#endif /* MALI_INCLUDE_TKAX */ + -+static int kbase_api_mem_find_cpu_offset(struct kbase_context *kctx, -+ union kbase_ioctl_mem_find_cpu_offset *find) -+{ -+ return kbasep_find_enclosing_cpu_mapping_offset( -+ kctx, -+ find->in.cpu_addr, -+ find->in.size, -+ &find->out.offset); -+} ++#ifdef MALI_INCLUDE_TTRX ++static const enum base_hw_feature base_hw_features_tTRx[] = { ++ BASE_HW_FEATURE_33BIT_VA, ++ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, ++ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, ++ BASE_HW_FEATURE_XAFFINITY, ++ BASE_HW_FEATURE_WARPING, ++ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, ++ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, ++ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, ++ BASE_HW_FEATURE_BRNDOUT_CC, ++ BASE_HW_FEATURE_BRNDOUT_KILL, ++ BASE_HW_FEATURE_LD_ST_LEA_TEX, ++ BASE_HW_FEATURE_LD_ST_TILEBUFFER, ++ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, ++ BASE_HW_FEATURE_MRT, ++ BASE_HW_FEATURE_MSAA_16X, ++ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, ++ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, ++ BASE_HW_FEATURE_T7XX_PAIRING_RULES, ++ BASE_HW_FEATURE_TEST4_DATUM_MODE, ++ BASE_HW_FEATURE_FLUSH_REDUCTION, ++ BASE_HW_FEATURE_PROTECTED_MODE, ++ BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, ++ BASE_HW_FEATURE_COHERENCY_REG, ++ BASE_HW_FEATURE_END ++}; + -+static int kbase_api_get_context_id(struct kbase_context *kctx, -+ struct kbase_ioctl_get_context_id *info) -+{ -+ info->id = kctx->id; ++#endif /* MALI_INCLUDE_TTRX */ + -+ return 0; -+} ++#endif /* _BASE_HWCONFIG_FEATURES_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h +new file mode 100644 +index 000000000..6d7e5c57e +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h +@@ -0,0 +1,1098 @@ ++/* ++ * ++ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+static int kbase_api_tlstream_acquire(struct kbase_context *kctx, -+ struct kbase_ioctl_tlstream_acquire *acquire) -+{ -+ return kbase_tlstream_acquire(kctx, acquire->flags); -+} + -+static int kbase_api_tlstream_flush(struct kbase_context *kctx) -+{ -+ kbase_tlstream_flush_streams(); + -+ return 0; -+} ++/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, ++ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py ++ * For more information see base/tools/hwconfig_generator/README ++ */ + -+static int kbase_api_mem_commit(struct kbase_context *kctx, -+ struct kbase_ioctl_mem_commit *commit) -+{ -+ return kbase_mem_commit(kctx, commit->gpu_addr, commit->pages); -+} ++#ifndef _BASE_HWCONFIG_ISSUES_H_ ++#define _BASE_HWCONFIG_ISSUES_H_ + -+static int kbase_api_mem_alias(struct kbase_context *kctx, -+ union kbase_ioctl_mem_alias *alias) -+{ -+ struct base_mem_aliasing_info *ai; -+ void __user *user_addr = NULL; -+ u64 flags; -+ int err; ++enum base_hw_issue { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_6367, ++ BASE_HW_ISSUE_6398, ++ BASE_HW_ISSUE_6402, ++ BASE_HW_ISSUE_6787, ++ BASE_HW_ISSUE_7027, ++ BASE_HW_ISSUE_7144, ++ BASE_HW_ISSUE_7304, ++ BASE_HW_ISSUE_8073, ++ BASE_HW_ISSUE_8186, ++ BASE_HW_ISSUE_8215, ++ BASE_HW_ISSUE_8245, ++ BASE_HW_ISSUE_8250, ++ BASE_HW_ISSUE_8260, ++ BASE_HW_ISSUE_8280, ++ BASE_HW_ISSUE_8316, ++ BASE_HW_ISSUE_8381, ++ BASE_HW_ISSUE_8394, ++ BASE_HW_ISSUE_8401, ++ BASE_HW_ISSUE_8408, ++ BASE_HW_ISSUE_8443, ++ BASE_HW_ISSUE_8456, ++ BASE_HW_ISSUE_8564, ++ BASE_HW_ISSUE_8634, ++ BASE_HW_ISSUE_8778, ++ BASE_HW_ISSUE_8791, ++ BASE_HW_ISSUE_8833, ++ BASE_HW_ISSUE_8879, ++ BASE_HW_ISSUE_8896, ++ BASE_HW_ISSUE_8975, ++ BASE_HW_ISSUE_8986, ++ BASE_HW_ISSUE_8987, ++ BASE_HW_ISSUE_9010, ++ BASE_HW_ISSUE_9418, ++ BASE_HW_ISSUE_9423, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_9510, ++ BASE_HW_ISSUE_9566, ++ BASE_HW_ISSUE_9630, ++ BASE_HW_ISSUE_10127, ++ BASE_HW_ISSUE_10327, ++ BASE_HW_ISSUE_10410, ++ BASE_HW_ISSUE_10471, ++ BASE_HW_ISSUE_10472, ++ BASE_HW_ISSUE_10487, ++ BASE_HW_ISSUE_10607, ++ BASE_HW_ISSUE_10632, ++ BASE_HW_ISSUE_10649, ++ BASE_HW_ISSUE_10676, ++ BASE_HW_ISSUE_10682, ++ BASE_HW_ISSUE_10684, ++ BASE_HW_ISSUE_10797, ++ BASE_HW_ISSUE_10817, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10931, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_10959, ++ BASE_HW_ISSUE_10969, ++ BASE_HW_ISSUE_10984, ++ BASE_HW_ISSUE_10995, ++ BASE_HW_ISSUE_11012, ++ BASE_HW_ISSUE_11020, ++ BASE_HW_ISSUE_11024, ++ BASE_HW_ISSUE_11035, ++ BASE_HW_ISSUE_11042, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T720_1386, ++ BASE_HW_ISSUE_T76X_26, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3542, ++ BASE_HW_ISSUE_T76X_3556, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_T76X_3960, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_T76X_3966, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_7940, ++ BASE_HW_ISSUE_TMIX_8042, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TMIX_8138, ++ BASE_HW_ISSUE_TMIX_8206, ++ BASE_HW_ISSUE_TMIX_8343, ++ BASE_HW_ISSUE_TMIX_8463, ++ BASE_HW_ISSUE_TMIX_8456, ++ GPUCORE_1619, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+ if (alias->in.nents == 0 || alias->in.nents > 2048) -+ return -EINVAL; ++static const enum base_hw_issue base_hw_issues_generic[] = { ++ BASE_HW_ISSUE_END ++}; + -+ ai = vmalloc(sizeof(*ai) * alias->in.nents); -+ if (!ai) -+ return -ENOMEM; ++static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = { ++ BASE_HW_ISSUE_6367, ++ BASE_HW_ISSUE_6398, ++ BASE_HW_ISSUE_6402, ++ BASE_HW_ISSUE_6787, ++ BASE_HW_ISSUE_7027, ++ BASE_HW_ISSUE_7144, ++ BASE_HW_ISSUE_7304, ++ BASE_HW_ISSUE_8073, ++ BASE_HW_ISSUE_8186, ++ BASE_HW_ISSUE_8215, ++ BASE_HW_ISSUE_8245, ++ BASE_HW_ISSUE_8250, ++ BASE_HW_ISSUE_8260, ++ BASE_HW_ISSUE_8280, ++ BASE_HW_ISSUE_8316, ++ BASE_HW_ISSUE_8381, ++ BASE_HW_ISSUE_8394, ++ BASE_HW_ISSUE_8401, ++ BASE_HW_ISSUE_8408, ++ BASE_HW_ISSUE_8443, ++ BASE_HW_ISSUE_8456, ++ BASE_HW_ISSUE_8564, ++ BASE_HW_ISSUE_8634, ++ BASE_HW_ISSUE_8778, ++ BASE_HW_ISSUE_8791, ++ BASE_HW_ISSUE_8833, ++ BASE_HW_ISSUE_8896, ++ BASE_HW_ISSUE_8975, ++ BASE_HW_ISSUE_8986, ++ BASE_HW_ISSUE_8987, ++ BASE_HW_ISSUE_9010, ++ BASE_HW_ISSUE_9418, ++ BASE_HW_ISSUE_9423, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_9510, ++ BASE_HW_ISSUE_9566, ++ BASE_HW_ISSUE_9630, ++ BASE_HW_ISSUE_10410, ++ BASE_HW_ISSUE_10471, ++ BASE_HW_ISSUE_10472, ++ BASE_HW_ISSUE_10487, ++ BASE_HW_ISSUE_10607, ++ BASE_HW_ISSUE_10632, ++ BASE_HW_ISSUE_10649, ++ BASE_HW_ISSUE_10676, ++ BASE_HW_ISSUE_10682, ++ BASE_HW_ISSUE_10684, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10931, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_10969, ++ BASE_HW_ISSUE_10984, ++ BASE_HW_ISSUE_10995, ++ BASE_HW_ISSUE_11012, ++ BASE_HW_ISSUE_11020, ++ BASE_HW_ISSUE_11035, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_3964, ++ GPUCORE_1619, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+#ifdef CONFIG_COMPAT -+ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) -+ user_addr = -+ compat_ptr(alias->in.aliasing_info.compat_value); -+ else -+#endif -+ user_addr = alias->in.aliasing_info.value; ++static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = { ++ BASE_HW_ISSUE_6367, ++ BASE_HW_ISSUE_6402, ++ BASE_HW_ISSUE_6787, ++ BASE_HW_ISSUE_7027, ++ BASE_HW_ISSUE_7304, ++ BASE_HW_ISSUE_8408, ++ BASE_HW_ISSUE_8564, ++ BASE_HW_ISSUE_8778, ++ BASE_HW_ISSUE_8975, ++ BASE_HW_ISSUE_9010, ++ BASE_HW_ISSUE_9418, ++ BASE_HW_ISSUE_9423, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_9510, ++ BASE_HW_ISSUE_10410, ++ BASE_HW_ISSUE_10471, ++ BASE_HW_ISSUE_10472, ++ BASE_HW_ISSUE_10487, ++ BASE_HW_ISSUE_10607, ++ BASE_HW_ISSUE_10632, ++ BASE_HW_ISSUE_10649, ++ BASE_HW_ISSUE_10676, ++ BASE_HW_ISSUE_10682, ++ BASE_HW_ISSUE_10684, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10931, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_10969, ++ BASE_HW_ISSUE_11012, ++ BASE_HW_ISSUE_11020, ++ BASE_HW_ISSUE_11035, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+ err = copy_from_user(ai, user_addr, sizeof(*ai) * alias->in.nents); -+ if (err) { -+ vfree(ai); -+ return err; -+ } ++static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = { ++ BASE_HW_ISSUE_6367, ++ BASE_HW_ISSUE_6402, ++ BASE_HW_ISSUE_6787, ++ BASE_HW_ISSUE_7027, ++ BASE_HW_ISSUE_7304, ++ BASE_HW_ISSUE_8408, ++ BASE_HW_ISSUE_8564, ++ BASE_HW_ISSUE_8778, ++ BASE_HW_ISSUE_8975, ++ BASE_HW_ISSUE_9010, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_9510, ++ BASE_HW_ISSUE_10410, ++ BASE_HW_ISSUE_10471, ++ BASE_HW_ISSUE_10472, ++ BASE_HW_ISSUE_10487, ++ BASE_HW_ISSUE_10607, ++ BASE_HW_ISSUE_10632, ++ BASE_HW_ISSUE_10649, ++ BASE_HW_ISSUE_10676, ++ BASE_HW_ISSUE_10682, ++ BASE_HW_ISSUE_10684, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10931, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11012, ++ BASE_HW_ISSUE_11020, ++ BASE_HW_ISSUE_11035, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+ flags = alias->in.flags; ++static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = { ++ BASE_HW_ISSUE_6402, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10127, ++ BASE_HW_ISSUE_10327, ++ BASE_HW_ISSUE_10410, ++ BASE_HW_ISSUE_10471, ++ BASE_HW_ISSUE_10472, ++ BASE_HW_ISSUE_10487, ++ BASE_HW_ISSUE_10607, ++ BASE_HW_ISSUE_10632, ++ BASE_HW_ISSUE_10649, ++ BASE_HW_ISSUE_10676, ++ BASE_HW_ISSUE_10682, ++ BASE_HW_ISSUE_10684, ++ BASE_HW_ISSUE_10817, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10931, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_10959, ++ BASE_HW_ISSUE_11012, ++ BASE_HW_ISSUE_11020, ++ BASE_HW_ISSUE_11024, ++ BASE_HW_ISSUE_11035, ++ BASE_HW_ISSUE_11042, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+ alias->out.gpu_va = kbase_mem_alias(kctx, &flags, -+ alias->in.stride, alias->in.nents, -+ ai, &alias->out.va_pages); ++static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = { ++ BASE_HW_ISSUE_6402, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10471, ++ BASE_HW_ISSUE_10472, ++ BASE_HW_ISSUE_10649, ++ BASE_HW_ISSUE_10684, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10931, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_10959, ++ BASE_HW_ISSUE_11012, ++ BASE_HW_ISSUE_11020, ++ BASE_HW_ISSUE_11024, ++ BASE_HW_ISSUE_11042, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+ alias->out.flags = flags; ++static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = { ++ BASE_HW_ISSUE_6402, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10471, ++ BASE_HW_ISSUE_10472, ++ BASE_HW_ISSUE_10649, ++ BASE_HW_ISSUE_10684, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10931, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_10959, ++ BASE_HW_ISSUE_11012, ++ BASE_HW_ISSUE_11042, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+ vfree(ai); ++static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11020, ++ BASE_HW_ISSUE_11024, ++ BASE_HW_ISSUE_11042, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_26, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3542, ++ BASE_HW_ISSUE_T76X_3556, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_T76X_3960, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_T76X_3966, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+ if (alias->out.gpu_va == 0) -+ return -ENOMEM; ++static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11020, ++ BASE_HW_ISSUE_11024, ++ BASE_HW_ISSUE_11042, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_26, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3542, ++ BASE_HW_ISSUE_T76X_3556, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_T76X_3960, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_T76X_3966, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+ return 0; -+} ++static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11042, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_26, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3542, ++ BASE_HW_ISSUE_T76X_3556, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_T76X_3960, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_T76X_3966, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+static int kbase_api_mem_import(struct kbase_context *kctx, -+ union kbase_ioctl_mem_import *import) -+{ -+ int ret; -+ u64 flags = import->in.flags; -+ void __user *phandle; ++static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11020, ++ BASE_HW_ISSUE_11024, ++ BASE_HW_ISSUE_11042, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_26, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3542, ++ BASE_HW_ISSUE_T76X_3556, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_T76X_3960, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_T76X_3966, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+#ifdef CONFIG_COMPAT -+ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) -+ phandle = compat_ptr(import->in.phandle.compat_value); -+ else -+#endif -+ phandle = import->in.phandle.value; ++static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11042, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_26, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3542, ++ BASE_HW_ISSUE_T76X_3556, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_T76X_3960, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_T76X_3966, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+ ret = kbase_mem_import(kctx, -+ import->in.type, -+ phandle, -+ import->in.padding, -+ &import->out.gpu_va, -+ &import->out.va_pages, -+ &flags); ++static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11042, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_T76X_3960, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_T76X_3966, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+ import->out.flags = flags; ++static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = { ++ BASE_HW_ISSUE_6402, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10471, ++ BASE_HW_ISSUE_10649, ++ BASE_HW_ISSUE_10684, ++ BASE_HW_ISSUE_10797, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11042, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+ return ret; -+} ++static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = { ++ BASE_HW_ISSUE_6402, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10471, ++ BASE_HW_ISSUE_10649, ++ BASE_HW_ISSUE_10684, ++ BASE_HW_ISSUE_10797, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11042, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T720_1386, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+static int kbase_api_mem_flags_change(struct kbase_context *kctx, -+ struct kbase_ioctl_mem_flags_change *change) -+{ -+ return kbase_mem_flags_change(kctx, change->gpu_va, -+ change->flags, change->mask); -+} ++static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = { ++ BASE_HW_ISSUE_6402, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10471, ++ BASE_HW_ISSUE_10649, ++ BASE_HW_ISSUE_10684, ++ BASE_HW_ISSUE_10797, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11042, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T720_1386, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+static int kbase_api_stream_create(struct kbase_context *kctx, -+ struct kbase_ioctl_stream_create *stream) -+{ -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ int fd, ret; ++static const enum base_hw_issue base_hw_issues_model_t72x[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_6402, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10471, ++ BASE_HW_ISSUE_10649, ++ BASE_HW_ISSUE_10797, ++ BASE_HW_ISSUE_11042, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3964, ++ GPUCORE_1619, ++ BASE_HW_ISSUE_END ++}; + -+ /* Name must be NULL-terminated and padded with NULLs, so check last -+ * character is NULL -+ */ -+ if (stream->name[sizeof(stream->name)-1] != 0) -+ return -EINVAL; ++static const enum base_hw_issue base_hw_issues_model_t76x[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_11020, ++ BASE_HW_ISSUE_11024, ++ BASE_HW_ISSUE_11042, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ GPUCORE_1619, ++ BASE_HW_ISSUE_END ++}; + -+ ret = kbase_sync_fence_stream_create(stream->name, &fd); ++static const enum base_hw_issue base_hw_issues_model_t60x[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_6402, ++ BASE_HW_ISSUE_8778, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10472, ++ BASE_HW_ISSUE_10649, ++ BASE_HW_ISSUE_10931, ++ BASE_HW_ISSUE_11012, ++ BASE_HW_ISSUE_11020, ++ BASE_HW_ISSUE_11024, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3964, ++ GPUCORE_1619, ++ BASE_HW_ISSUE_END ++}; + -+ if (ret) -+ return ret; -+ return fd; -+#else -+ return -ENOENT; -+#endif -+} ++static const enum base_hw_issue base_hw_issues_model_t62x[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_6402, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10472, ++ BASE_HW_ISSUE_10649, ++ BASE_HW_ISSUE_10931, ++ BASE_HW_ISSUE_11012, ++ BASE_HW_ISSUE_11020, ++ BASE_HW_ISSUE_11024, ++ BASE_HW_ISSUE_11042, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3964, ++ GPUCORE_1619, ++ BASE_HW_ISSUE_END ++}; + -+static int kbase_api_fence_validate(struct kbase_context *kctx, -+ struct kbase_ioctl_fence_validate *validate) -+{ -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ return kbase_sync_fence_validate(validate->fd); -+#else -+ return -ENOENT; -+#endif -+} ++static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_T76X_3960, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_T76X_3966, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+static int kbase_api_get_profiling_controls(struct kbase_context *kctx, -+ struct kbase_ioctl_get_profiling_controls *controls) -+{ -+ if (controls->count > FBDUMP_CONTROL_MAX) -+ return -EINVAL; ++static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_T76X_3966, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+ return copy_to_user(controls->buffer.value, -+ &kctx->kbdev->kbase_profiling_controls[ -+ FBDUMP_CONTROL_MIN], -+ controls->count * sizeof(u32)); -+} ++static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_T76X_3966, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+static int kbase_api_mem_profile_add(struct kbase_context *kctx, -+ struct kbase_ioctl_mem_profile_add *data) -+{ -+ char __user *user_buf; -+ char *buf; -+ int err; ++static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_T76X_3966, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+ if (data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) { -+ dev_err(kctx->kbdev->dev, "mem_profile_add: buffer too big\n"); -+ return -EINVAL; -+ } ++static const enum base_hw_issue base_hw_issues_model_tFRx[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ GPUCORE_1619, ++ BASE_HW_ISSUE_END ++}; + -+ buf = kmalloc(data->len, GFP_KERNEL); -+ if (ZERO_OR_NULL_PTR(buf)) -+ return -ENOMEM; ++static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_T76X_3966, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+#ifdef CONFIG_COMPAT -+ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) -+ user_buf = compat_ptr(data->buffer.compat_value); -+ else -+#endif -+ user_buf = data->buffer.value; ++static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_T76X_3966, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+ err = copy_from_user(buf, user_buf, data->len); -+ if (err) { -+ kfree(buf); -+ return err; -+ } ++static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_T76X_3966, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+ return kbasep_mem_profile_debugfs_insert(kctx, buf, data->len); -+} ++static const enum base_hw_issue base_hw_issues_model_t86x[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ GPUCORE_1619, ++ BASE_HW_ISSUE_END ++}; + -+static int kbase_api_soft_event_update(struct kbase_context *kctx, -+ struct kbase_ioctl_soft_event_update *update) -+{ -+ if (update->flags != 0) -+ return -EINVAL; ++static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T720_1386, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_T76X_3960, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+ return kbase_soft_event_update(kctx, update->event, update->new_status); -+} ++static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T720_1386, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_T76X_3960, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+#if MALI_UNIT_TEST -+static int kbase_api_tlstream_test(struct kbase_context *kctx, -+ struct kbase_ioctl_tlstream_test *test) -+{ -+ kbase_tlstream_test( -+ test->tpw_count, -+ test->msg_delay, -+ test->msg_count, -+ test->aux_msg); ++static const enum base_hw_issue base_hw_issues_model_t83x[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ GPUCORE_1619, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+ return 0; -+} ++static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T720_1386, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_T76X_3960, ++ BASE_HW_ISSUE_T76X_3964, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+static int kbase_api_tlstream_stats(struct kbase_context *kctx, -+ struct kbase_ioctl_tlstream_stats *stats) -+{ -+ kbase_tlstream_stats( -+ &stats->bytes_collected, -+ &stats->bytes_generated); ++static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T720_1386, ++ BASE_HW_ISSUE_T76X_1909, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_T76X_3960, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+ return 0; -+} -+#endif /* MALI_UNIT_TEST */ ++static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10821, ++ BASE_HW_ISSUE_10883, ++ BASE_HW_ISSUE_10946, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T720_1386, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_T76X_3960, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+#define KBASE_HANDLE_IOCTL(cmd, function) \ -+ case cmd: \ -+ do { \ -+ BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \ -+ return function(kctx); \ -+ } while (0) ++static const enum base_hw_issue base_hw_issues_model_t82x[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_11051, ++ BASE_HW_ISSUE_T76X_1963, ++ BASE_HW_ISSUE_T76X_3086, ++ BASE_HW_ISSUE_T76X_3700, ++ BASE_HW_ISSUE_T76X_3793, ++ BASE_HW_ISSUE_T76X_3979, ++ BASE_HW_ISSUE_TMIX_7891, ++ GPUCORE_1619, ++ BASE_HW_ISSUE_END ++}; + -+#define KBASE_HANDLE_IOCTL_IN(cmd, function, type) \ -+ case cmd: \ -+ do { \ -+ type param; \ -+ int err; \ -+ BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE); \ -+ BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ -+ err = copy_from_user(¶m, uarg, sizeof(param)); \ -+ if (err) \ -+ return -EFAULT; \ -+ return function(kctx, ¶m); \ -+ } while (0) ++static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10682, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_T76X_3953, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8042, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TMIX_8138, ++ BASE_HW_ISSUE_TMIX_8206, ++ BASE_HW_ISSUE_TMIX_8343, ++ BASE_HW_ISSUE_TMIX_8463, ++ BASE_HW_ISSUE_TMIX_8456, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type) \ -+ case cmd: \ -+ do { \ -+ type param; \ -+ int ret, err; \ -+ BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ); \ -+ BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ -+ ret = function(kctx, ¶m); \ -+ err = copy_to_user(uarg, ¶m, sizeof(param)); \ -+ if (err) \ -+ return -EFAULT; \ -+ return ret; \ -+ } while (0) ++static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10682, ++ BASE_HW_ISSUE_11054, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_7940, ++ BASE_HW_ISSUE_TMIX_8042, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TMIX_8138, ++ BASE_HW_ISSUE_TMIX_8206, ++ BASE_HW_ISSUE_TMIX_8343, ++ BASE_HW_ISSUE_TMIX_8463, ++ BASE_HW_ISSUE_TMIX_8456, ++ BASE_HW_ISSUE_TMIX_8438, ++ BASE_HW_ISSUE_END ++}; + -+#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type) \ -+ case cmd: \ -+ do { \ -+ type param; \ -+ int ret, err; \ -+ BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE|_IOC_READ)); \ -+ BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ -+ err = copy_from_user(¶m, uarg, sizeof(param)); \ -+ if (err) \ -+ return -EFAULT; \ -+ ret = function(kctx, ¶m); \ -+ err = copy_to_user(uarg, ¶m, sizeof(param)); \ -+ if (err) \ -+ return -EFAULT; \ -+ return ret; \ -+ } while (0) ++static const enum base_hw_issue base_hw_issues_model_tMIx[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_7940, ++ BASE_HW_ISSUE_TMIX_8042, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TMIX_8138, ++ BASE_HW_ISSUE_TMIX_8206, ++ BASE_HW_ISSUE_TMIX_8343, ++ BASE_HW_ISSUE_TMIX_8456, ++ BASE_HW_ISSUE_END ++}; + -+static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -+{ -+ struct kbase_context *kctx = filp->private_data; -+ struct kbase_device *kbdev = kctx->kbdev; -+ void __user *uarg = (void __user *)arg; ++static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10682, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8042, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_END ++}; + -+ /* The UK ioctl values overflow the cmd field causing the type to be -+ * incremented -+ */ -+ if (_IOC_TYPE(cmd) == LINUX_UK_BASE_MAGIC+2) -+ return kbase_legacy_ioctl(filp, cmd, arg); ++static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_10682, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8042, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_END ++}; + -+ /* The UK version check IOCTL doesn't overflow the cmd field, so is -+ * handled separately here -+ */ -+ if (cmd == _IOC(_IOC_READ|_IOC_WRITE, LINUX_UK_BASE_MAGIC, -+ UKP_FUNC_ID_CHECK_VERSION, -+ sizeof(struct uku_version_check_args))) -+ return kbase_legacy_ioctl(filp, cmd, arg); ++static const enum base_hw_issue base_hw_issues_model_tHEx[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_7891, ++ BASE_HW_ISSUE_TMIX_8042, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_END ++}; + -+ /* Only these ioctls are available until setup is complete */ -+ switch (cmd) { -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK, -+ kbase_api_handshake, -+ struct kbase_ioctl_version_check); -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS, -+ kbase_api_set_flags, -+ struct kbase_ioctl_set_flags); -+ } ++static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_END ++}; + -+ /* Block call until version handshake and setup is complete */ -+ if (kctx->api_version == 0 || !atomic_read(&kctx->setup_complete)) -+ return -EINVAL; ++static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_END ++}; + -+ /* Normal ioctls */ -+ switch (cmd) { -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT, -+ kbase_api_job_submit, -+ struct kbase_ioctl_job_submit); -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS, -+ kbase_api_get_gpuprops, -+ struct kbase_ioctl_get_gpuprops); -+ KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM, -+ kbase_api_post_term); -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC, -+ kbase_api_mem_alloc, -+ union kbase_ioctl_mem_alloc); -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_QUERY, -+ kbase_api_mem_query, -+ union kbase_ioctl_mem_query); -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FREE, -+ kbase_api_mem_free, -+ struct kbase_ioctl_mem_free); -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP, -+ kbase_api_hwcnt_reader_setup, -+ struct kbase_ioctl_hwcnt_reader_setup); -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE, -+ kbase_api_hwcnt_enable, -+ struct kbase_ioctl_hwcnt_enable); -+ KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP, -+ kbase_api_hwcnt_dump); -+ KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR, -+ kbase_api_hwcnt_clear); -+ KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_DISJOINT_QUERY, -+ kbase_api_disjoint_query, -+ struct kbase_ioctl_disjoint_query); -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_DDK_VERSION, -+ kbase_api_get_ddk_version, -+ struct kbase_ioctl_get_ddk_version); -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT, -+ kbase_api_mem_jit_init, -+ struct kbase_ioctl_mem_jit_init); -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC, -+ kbase_api_mem_sync, -+ struct kbase_ioctl_mem_sync); -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_CPU_OFFSET, -+ kbase_api_mem_find_cpu_offset, -+ union kbase_ioctl_mem_find_cpu_offset); -+ KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_GET_CONTEXT_ID, -+ kbase_api_get_context_id, -+ struct kbase_ioctl_get_context_id); -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_ACQUIRE, -+ kbase_api_tlstream_acquire, -+ struct kbase_ioctl_tlstream_acquire); -+ KBASE_HANDLE_IOCTL(KBASE_IOCTL_TLSTREAM_FLUSH, -+ kbase_api_tlstream_flush); -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_COMMIT, -+ kbase_api_mem_commit, -+ struct kbase_ioctl_mem_commit); -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALIAS, -+ kbase_api_mem_alias, -+ union kbase_ioctl_mem_alias); -+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_IMPORT, -+ kbase_api_mem_import, -+ union kbase_ioctl_mem_import); -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FLAGS_CHANGE, -+ kbase_api_mem_flags_change, -+ struct kbase_ioctl_mem_flags_change); -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STREAM_CREATE, -+ kbase_api_stream_create, -+ struct kbase_ioctl_stream_create); -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_FENCE_VALIDATE, -+ kbase_api_fence_validate, -+ struct kbase_ioctl_fence_validate); -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_PROFILING_CONTROLS, -+ kbase_api_get_profiling_controls, -+ struct kbase_ioctl_get_profiling_controls); -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_PROFILE_ADD, -+ kbase_api_mem_profile_add, -+ struct kbase_ioctl_mem_profile_add); -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE, -+ kbase_api_soft_event_update, -+ struct kbase_ioctl_soft_event_update); ++static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_END ++}; + -+#if MALI_UNIT_TEST -+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST, -+ kbase_api_tlstream_test, -+ struct kbase_ioctl_tlstream_test); -+ KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS, -+ kbase_api_tlstream_stats, -+ struct kbase_ioctl_tlstream_stats); -+#endif -+ } ++static const enum base_hw_issue base_hw_issues_model_tSIx[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_END ++}; + -+ dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); + -+ return -ENOIOCTLCMD; -+} + -+static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) -+{ -+ struct kbase_context *kctx = filp->private_data; -+ struct base_jd_event_v2 uevent; -+ int out_count = 0; ++#ifdef MALI_INCLUDE_TKAX ++static const enum base_hw_issue base_hw_issues_tKAx_r0p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_END ++}; + -+ if (count < sizeof(uevent)) -+ return -ENOBUFS; ++#endif /* MALI_INCLUDE_TKAX */ + -+ do { -+ while (kbase_event_dequeue(kctx, &uevent)) { -+ if (out_count > 0) -+ goto out; ++#ifdef MALI_INCLUDE_TKAX ++static const enum base_hw_issue base_hw_issues_model_tKAx[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_END ++}; + -+ if (filp->f_flags & O_NONBLOCK) -+ return -EAGAIN; ++#endif /* MALI_INCLUDE_TKAX */ + -+ if (wait_event_interruptible(kctx->event_queue, -+ kbase_event_pending(kctx)) != 0) -+ return -ERESTARTSYS; -+ } -+ if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) { -+ if (out_count == 0) -+ return -EPIPE; -+ goto out; -+ } ++#ifdef MALI_INCLUDE_TTRX ++static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = { ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_END ++}; + -+ if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0) -+ return -EFAULT; ++#endif /* MALI_INCLUDE_TTRX */ + -+ buf += sizeof(uevent); -+ out_count++; -+ count -= sizeof(uevent); -+ } while (count >= sizeof(uevent)); ++#ifdef MALI_INCLUDE_TTRX ++static const enum base_hw_issue base_hw_issues_model_tTRx[] = { ++ BASE_HW_ISSUE_5736, ++ BASE_HW_ISSUE_9435, ++ BASE_HW_ISSUE_TMIX_8133, ++ BASE_HW_ISSUE_TSIX_1116, ++ BASE_HW_ISSUE_END ++}; + -+ out: -+ return out_count * sizeof(uevent); -+} ++#endif /* MALI_INCLUDE_TTRX */ + -+static unsigned int kbase_poll(struct file *filp, poll_table *wait) -+{ -+ struct kbase_context *kctx = filp->private_data; ++#endif /* _BASE_HWCONFIG_ISSUES_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h +new file mode 100644 +index 000000000..ea5e473ca +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_base_kernel.h +@@ -0,0 +1,1858 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ poll_wait(filp, &kctx->event_queue, wait); -+ if (kbase_event_pending(kctx)) -+ return POLLIN | POLLRDNORM; + -+ return 0; -+} + -+void kbase_event_wakeup(struct kbase_context *kctx) -+{ -+ KBASE_DEBUG_ASSERT(kctx); + -+ wake_up_interruptible(&kctx->event_queue); -+} + -+KBASE_EXPORT_TEST_API(kbase_event_wakeup); ++/** ++ * @file ++ * Base structures shared with the kernel. ++ */ + -+static int kbase_check_flags(int flags) -+{ -+ /* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always -+ * closes the file descriptor in a child process. -+ */ -+ if (0 == (flags & O_CLOEXEC)) -+ return -EINVAL; ++#ifndef _BASE_KERNEL_H_ ++#define _BASE_KERNEL_H_ + -+ return 0; -+} ++#ifndef __user ++#define __user ++#endif + ++/* Support UK6 IOCTLS */ ++#define BASE_LEGACY_UK6_SUPPORT 1 + -+/** -+ * align_and_check - Align the specified pointer to the provided alignment and -+ * check that it is still in range. -+ * @gap_end: Highest possible start address for allocation (end of gap in -+ * address space) -+ * @gap_start: Start address of current memory area / gap in address space -+ * @info: vm_unmapped_area_info structure passed to caller, containing -+ * alignment, length and limits for the allocation -+ * @is_shader_code: True if the allocation is for shader code (which has -+ * additional alignment requirements) -+ * -+ * Return: true if gap_end is now aligned correctly and is still in range, -+ * false otherwise -+ */ -+static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, -+ struct vm_unmapped_area_info *info, bool is_shader_code) -+{ -+ /* Compute highest gap address at the desired alignment */ -+ (*gap_end) -= info->length; -+ (*gap_end) -= (*gap_end - info->align_offset) & info->align_mask; ++/* Support UK7 IOCTLS */ ++/* NB: To support UK6 we also need to support UK7 */ ++#define BASE_LEGACY_UK7_SUPPORT 1 + -+ if (is_shader_code) { -+ /* Check for 4GB boundary */ -+ if (0 == (*gap_end & BASE_MEM_MASK_4GB)) -+ (*gap_end) -= (info->align_offset ? info->align_offset : -+ info->length); -+ if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB)) -+ (*gap_end) -= (info->align_offset ? info->align_offset : -+ info->length); ++/* Support UK8 IOCTLS */ ++#define BASE_LEGACY_UK8_SUPPORT 1 + -+ if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end + -+ info->length) & BASE_MEM_MASK_4GB)) -+ return false; -+ } ++/* Support UK9 IOCTLS */ ++#define BASE_LEGACY_UK9_SUPPORT 1 + ++/* Support UK10_2 IOCTLS */ ++#define BASE_LEGACY_UK10_2_SUPPORT 1 + -+ if ((*gap_end < info->low_limit) || (*gap_end < gap_start)) -+ return false; ++/* Support UK10_4 IOCTLS */ ++#define BASE_LEGACY_UK10_4_SUPPORT 1 + ++typedef struct base_mem_handle { ++ struct { ++ u64 handle; ++ } basep; ++} base_mem_handle; + -+ return true; -+} ++#include "mali_base_mem_priv.h" ++#include "mali_kbase_profiling_gator_api.h" ++#include "mali_midg_coherency.h" ++#include "mali_kbase_gpu_id.h" + -+/* The following function is taken from the kernel and just -+ * renamed. As it's not exported to modules we must copy-paste it here. ++/* ++ * Dependency stuff, keep it private for now. May want to expose it if ++ * we decide to make the number of semaphores a configurable ++ * option. + */ ++#define BASE_JD_ATOM_COUNT 512 + -+static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info -+ *info, bool is_shader_code) -+{ -+#if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE) -+ struct mm_struct *mm = current->mm; -+ struct vm_area_struct *vma; -+ unsigned long length, low_limit, high_limit, gap_start, gap_end; ++#define BASEP_JD_SEM_PER_WORD_LOG2 5 ++#define BASEP_JD_SEM_PER_WORD (1 << BASEP_JD_SEM_PER_WORD_LOG2) ++#define BASEP_JD_SEM_WORD_NR(x) ((x) >> BASEP_JD_SEM_PER_WORD_LOG2) ++#define BASEP_JD_SEM_MASK_IN_WORD(x) (1 << ((x) & (BASEP_JD_SEM_PER_WORD - 1))) ++#define BASEP_JD_SEM_ARRAY_SIZE BASEP_JD_SEM_WORD_NR(BASE_JD_ATOM_COUNT) + -+ /* Adjust search length to account for worst case alignment overhead */ -+ length = info->length + info->align_mask; -+ if (length < info->length) -+ return -ENOMEM; ++/* Set/reset values for a software event */ ++#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1) ++#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0) + -+ /* -+ * Adjust search limits by the desired length. -+ * See implementation comment at top of unmapped_area(). -+ */ -+ gap_end = info->high_limit; -+ if (gap_end < length) -+ return -ENOMEM; -+ high_limit = gap_end - length; ++#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3 + -+ if (info->low_limit > high_limit) -+ return -ENOMEM; -+ low_limit = info->low_limit + length; ++#define BASE_MAX_COHERENT_GROUPS 16 + -+ /* Check highest gap, which does not precede any rbtree node */ -+ gap_start = mm->highest_vm_end; -+ if (gap_start <= high_limit) { -+ if (align_and_check(&gap_end, gap_start, info, is_shader_code)) -+ return gap_end; -+ } ++#if defined CDBG_ASSERT ++#define LOCAL_ASSERT CDBG_ASSERT ++#elif defined KBASE_DEBUG_ASSERT ++#define LOCAL_ASSERT KBASE_DEBUG_ASSERT ++#else ++#error assert macro not defined! ++#endif + -+ /* Check if rbtree root looks promising */ -+ if (RB_EMPTY_ROOT(&mm->mm_rb)) -+ return -ENOMEM; -+ vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb); -+ if (vma->rb_subtree_gap < length) -+ return -ENOMEM; ++#if defined PAGE_MASK ++#define LOCAL_PAGE_LSB ~PAGE_MASK ++#else ++#include + -+ while (true) { -+ /* Visit right subtree if it looks promising */ -+ gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; -+ if (gap_start <= high_limit && vma->vm_rb.rb_right) { -+ struct vm_area_struct *right = -+ rb_entry(vma->vm_rb.rb_right, -+ struct vm_area_struct, vm_rb); -+ if (right->rb_subtree_gap >= length) { -+ vma = right; -+ continue; -+ } -+ } ++#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2 ++#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1) ++#else ++#error Failed to find page size ++#endif ++#endif + -+check_current: -+ /* Check if current node has a suitable gap */ -+ gap_end = vma->vm_start; -+ if (gap_end < low_limit) -+ return -ENOMEM; -+ if (gap_start <= high_limit && gap_end - gap_start >= length) { -+ /* We found a suitable gap. Clip it with the original -+ * high_limit. */ -+ if (gap_end > info->high_limit) -+ gap_end = info->high_limit; ++/** 32/64-bit neutral way to represent pointers */ ++typedef union kbase_pointer { ++ void __user *value; /**< client should store their pointers here */ ++ u32 compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */ ++ u64 sizer; /**< Force 64-bit storage for all clients regardless */ ++} kbase_pointer; + -+ if (align_and_check(&gap_end, gap_start, info, -+ is_shader_code)) -+ return gap_end; -+ } ++/** ++ * @addtogroup base_user_api User-side Base APIs ++ * @{ ++ */ + -+ /* Visit left subtree if it looks promising */ -+ if (vma->vm_rb.rb_left) { -+ struct vm_area_struct *left = -+ rb_entry(vma->vm_rb.rb_left, -+ struct vm_area_struct, vm_rb); -+ if (left->rb_subtree_gap >= length) { -+ vma = left; -+ continue; -+ } -+ } ++/** ++ * @addtogroup base_user_api_memory User-side Base Memory APIs ++ * @{ ++ */ + -+ /* Go back up the rbtree to find next candidate node */ -+ while (true) { -+ struct rb_node *prev = &vma->vm_rb; -+ if (!rb_parent(prev)) -+ return -ENOMEM; -+ vma = rb_entry(rb_parent(prev), -+ struct vm_area_struct, vm_rb); -+ if (prev == vma->vm_rb.rb_right) { -+ gap_start = vma->vm_prev ? -+ vma->vm_prev->vm_end : 0; -+ goto check_current; -+ } -+ } -+ } -+#else -+ unsigned long length, high_limit, gap_start, gap_end; ++/** ++ * typedef base_mem_alloc_flags - Memory allocation, access/hint flags. ++ * ++ * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator ++ * in order to determine the best cache policy. Some combinations are ++ * of course invalid (e.g. MEM_PROT_CPU_WR | MEM_HINT_CPU_RD), ++ * which defines a write-only region on the CPU side, which is ++ * heavily read by the CPU... ++ * Other flags are only meaningful to a particular allocator. ++ * More flags can be added to this list, as long as they don't clash ++ * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit). ++ */ ++typedef u32 base_mem_alloc_flags; + -+ MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); -+ /* Adjust search length to account for worst case alignment overhead */ -+ length = info->length + info->align_mask; -+ if (length < info->length) -+ return -ENOMEM; ++/* Memory allocation, access/hint flags. ++ * ++ * See base_mem_alloc_flags. ++ */ + -+ /* -+ * Adjust search limits by the desired length. -+ * See implementation comment at top of unmapped_area(). -+ */ -+ gap_end = info->high_limit; -+ if (gap_end < length) -+ return -ENOMEM; -+ high_limit = gap_end - length; ++/* IN */ ++/* Read access CPU side ++ */ ++#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) + -+ if (info->low_limit > high_limit) -+ return -ENOMEM; ++/* Write access CPU side ++ */ ++#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) + -+ while (true) { -+ if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, length)) -+ return -ENOMEM; -+ gap_end = mas.last + 1; -+ gap_start = mas.min; ++/* Read access GPU side ++ */ ++#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) + -+ if (align_and_check(&gap_end, gap_start, info, is_shader_code)) -+ return gap_end; -+ } -+#endif -+ return -ENOMEM; -+} ++/* Write access GPU side ++ */ ++#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) + -+static unsigned long kbase_get_unmapped_area(struct file *filp, -+ const unsigned long addr, const unsigned long len, -+ const unsigned long pgoff, const unsigned long flags) -+{ -+ /* based on get_unmapped_area, but simplified slightly due to that some -+ * values are known in advance */ -+ struct kbase_context *kctx = filp->private_data; -+ struct mm_struct *mm = current->mm; -+ struct vm_unmapped_area_info info; -+ unsigned long align_offset = 0; -+ unsigned long align_mask = 0; -+ unsigned long high_limit = mm->mmap_base; -+ unsigned long low_limit = PAGE_SIZE; -+ int cpu_va_bits = BITS_PER_LONG; -+ int gpu_pc_bits = -+ kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; -+ bool is_shader_code = false; -+ unsigned long ret; ++/* Execute allowed on the GPU side ++ */ ++#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) + -+ /* err on fixed address */ -+ if ((flags & MAP_FIXED) || addr) -+ return -EINVAL; ++ /* BASE_MEM_HINT flags have been removed, but their values are reserved ++ * for backwards compatibility with older user-space drivers. The values ++ * can be re-used once support for r5p0 user-space drivers is removed, ++ * presumably in r7p0. ++ * ++ * RESERVED: (1U << 5) ++ * RESERVED: (1U << 6) ++ * RESERVED: (1U << 7) ++ * RESERVED: (1U << 8) ++ */ + -+#ifdef CONFIG_64BIT -+ /* too big? */ -+ if (len > TASK_SIZE - SZ_2M) -+ return -ENOMEM; ++/* Grow backing store on GPU Page Fault ++ */ ++#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) + -+ if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { ++/* Page coherence Outer shareable, if available ++ */ ++#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) + -+ if (kbase_hw_has_feature(kctx->kbdev, -+ BASE_HW_FEATURE_33BIT_VA)) { -+ high_limit = kctx->same_va_end << PAGE_SHIFT; -+ } else { -+ high_limit = min_t(unsigned long, mm->mmap_base, -+ (kctx->same_va_end << PAGE_SHIFT)); -+ if (len >= SZ_2M) { -+ align_offset = SZ_2M; -+ align_mask = SZ_2M - 1; -+ } -+ } ++/* Page coherence Inner shareable ++ */ ++#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) + -+ low_limit = SZ_2M; -+ } else { -+ cpu_va_bits = 32; -+ } -+#endif /* CONFIG_64BIT */ -+ if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) && -+ (PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) { -+ int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); ++/* Should be cached on the CPU ++ */ ++#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) + -+ if (!kctx->pending_regions[cookie]) -+ return -EINVAL; ++/* IN/OUT */ ++/* Must have same VA on both the GPU and the CPU ++ */ ++#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) + -+ if (!(kctx->pending_regions[cookie]->flags & -+ KBASE_REG_GPU_NX)) { -+ if (cpu_va_bits > gpu_pc_bits) { -+ align_offset = 1ULL << gpu_pc_bits; -+ align_mask = align_offset - 1; -+ is_shader_code = true; -+ } -+ } -+#ifndef CONFIG_64BIT -+ } else { -+ return current->mm->get_unmapped_area(filp, addr, len, pgoff, -+ flags); -+#endif -+ } ++/* OUT */ ++/* Must call mmap to acquire a GPU address for the alloc ++ */ ++#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) + -+ info.flags = 0; -+ info.length = len; -+ info.low_limit = low_limit; -+ info.high_limit = high_limit; -+ info.align_offset = align_offset; -+ info.align_mask = align_mask; ++/* IN */ ++/* Page coherence Outer shareable, required. ++ */ ++#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) + -+ ret = kbase_unmapped_area_topdown(&info, is_shader_code); ++/* Secure memory ++ */ ++#define BASE_MEM_SECURE ((base_mem_alloc_flags)1 << 16) + -+ if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base && -+ high_limit < (kctx->same_va_end << PAGE_SHIFT)) { -+ /* Retry above mmap_base */ -+ info.low_limit = mm->mmap_base; -+ info.high_limit = min_t(u64, TASK_SIZE, -+ (kctx->same_va_end << PAGE_SHIFT)); ++/* Not needed physical memory ++ */ ++#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) + -+ ret = kbase_unmapped_area_topdown(&info, is_shader_code); -+ } ++/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the ++ * addresses to be the same ++ */ ++#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) + -+ return ret; -+} ++/* Number of bits used as flags for base memory management ++ * ++ * Must be kept in sync with the base_mem_alloc_flags flags ++ */ ++#define BASE_MEM_FLAGS_NR_BITS 19 + -+static const struct file_operations kbase_fops = { -+ .owner = THIS_MODULE, -+ .open = kbase_open, -+ .release = kbase_release, -+ .read = kbase_read, -+ .poll = kbase_poll, -+ .unlocked_ioctl = kbase_ioctl, -+ .compat_ioctl = kbase_ioctl, -+ .mmap = kbase_mmap, -+ .check_flags = kbase_check_flags, -+ .get_unmapped_area = kbase_get_unmapped_area, -+}; ++/* A mask for all output bits, excluding IN/OUT bits. ++ */ ++#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP + -+#ifndef CONFIG_MALI_NO_MALI -+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value) -+{ -+ writel(value, kbdev->reg + offset); -+} ++/* A mask for all input bits, including IN/OUT bits. ++ */ ++#define BASE_MEM_FLAGS_INPUT_MASK \ ++ (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) + -+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset) -+{ -+ return readl(kbdev->reg + offset); -+} -+#endif /* !CONFIG_MALI_NO_MALI */ ++/* A mask for all the flags which are modifiable via the base_mem_set_flags ++ * interface. ++ */ ++#define BASE_MEM_FLAGS_MODIFIABLE \ ++ (BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \ ++ BASE_MEM_COHERENT_LOCAL) + +/** -+ * show_policy - Show callback for the power_policy sysfs file. ++ * enum base_mem_import_type - Memory types supported by @a base_mem_import + * -+ * This function is called to get the contents of the power_policy sysfs -+ * file. This is a list of the available policies with the currently active one -+ * surrounded by square brackets. ++ * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type ++ * @BASE_MEM_IMPORT_TYPE_UMP: UMP import. Handle type is ump_secure_id. ++ * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int) ++ * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a ++ * base_mem_import_user_buffer + * -+ * @dev: The device this sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The output buffer for the sysfs file contents ++ * Each type defines what the supported handle type is. + * -+ * Return: The number of bytes output to @buf. ++ * If any new type is added here ARM must be contacted ++ * to allocate a numeric value for it. ++ * Do not just add a new type without synchronizing with ARM ++ * as future releases from ARM might include other new types ++ * which could clash with your custom types. + */ -+static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf) -+{ -+ struct kbase_device *kbdev; -+ const struct kbase_pm_policy *current_policy; -+ const struct kbase_pm_policy *const *policy_list; -+ int policy_count; -+ int i; -+ ssize_t ret = 0; -+ -+ kbdev = to_kbase_device(dev); -+ -+ if (!kbdev) -+ return -ENODEV; -+ -+ current_policy = kbase_pm_get_policy(kbdev); -+ -+ policy_count = kbase_pm_list_policies(&policy_list); -+ -+ for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) { -+ if (policy_list[i] == current_policy) -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name); -+ else -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name); -+ } -+ -+ if (ret < PAGE_SIZE - 1) { -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); -+ } else { -+ buf[PAGE_SIZE - 2] = '\n'; -+ buf[PAGE_SIZE - 1] = '\0'; -+ ret = PAGE_SIZE - 1; -+ } -+ -+ return ret; -+} ++typedef enum base_mem_import_type { ++ BASE_MEM_IMPORT_TYPE_INVALID = 0, ++ BASE_MEM_IMPORT_TYPE_UMP = 1, ++ BASE_MEM_IMPORT_TYPE_UMM = 2, ++ BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3 ++} base_mem_import_type; + +/** -+ * set_policy - Store callback for the power_policy sysfs file. -+ * -+ * This function is called when the power_policy sysfs file is written to. -+ * It matches the requested policy against the available policies and if a -+ * matching policy is found calls kbase_pm_set_policy() to change the -+ * policy. ++ * struct base_mem_import_user_buffer - Handle of an imported user buffer + * -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes written to the sysfs file ++ * @ptr: kbase_pointer to imported user buffer ++ * @length: length of imported user buffer in bytes + * -+ * Return: @count if the function succeeded. An error code on failure. ++ * This structure is used to represent a handle of an imported user buffer. + */ -+static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ const struct kbase_pm_policy *new_policy = NULL; -+ const struct kbase_pm_policy *const *policy_list; -+ int policy_count; -+ int i; -+ -+ kbdev = to_kbase_device(dev); -+ -+ if (!kbdev) -+ return -ENODEV; -+ -+ policy_count = kbase_pm_list_policies(&policy_list); -+ -+ for (i = 0; i < policy_count; i++) { -+ if (sysfs_streq(policy_list[i]->name, buf)) { -+ new_policy = policy_list[i]; -+ break; -+ } -+ } -+ -+ if (!new_policy) { -+ dev_err(dev, "power_policy: policy not found\n"); -+ return -EINVAL; -+ } + -+ kbase_pm_set_policy(kbdev, new_policy); -+ -+ return count; -+} ++struct base_mem_import_user_buffer { ++ kbase_pointer ptr; ++ u64 length; ++}; + -+/* -+ * The sysfs file power_policy. ++/** ++ * @brief Invalid memory handle. + * -+ * This is used for obtaining information about the available policies, -+ * determining which policy is currently active, and changing the active -+ * policy. ++ * Return value from functions returning @ref base_mem_handle on error. ++ * ++ * @warning @ref base_mem_handle_new_invalid must be used instead of this macro ++ * in C++ code or other situations where compound literals cannot be used. + */ -+static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy); ++#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} }) + +/** -+ * show_ca_policy - Show callback for the core_availability_policy sysfs file. -+ * -+ * This function is called to get the contents of the core_availability_policy -+ * sysfs file. This is a list of the available policies with the currently -+ * active one surrounded by square brackets. ++ * @brief Special write-alloc memory handle. + * -+ * @dev: The device this sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The output buffer for the sysfs file contents ++ * A special handle is used to represent a region where a special page is mapped ++ * with a write-alloc cache setup, typically used when the write result of the ++ * GPU isn't needed, but the GPU must write anyway. + * -+ * Return: The number of bytes output to @buf. ++ * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro ++ * in C++ code or other situations where compound literals cannot be used. + */ -+static ssize_t show_ca_policy(struct device *dev, struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *kbdev; -+ const struct kbase_pm_ca_policy *current_policy; -+ const struct kbase_pm_ca_policy *const *policy_list; -+ int policy_count; -+ int i; -+ ssize_t ret = 0; ++#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} }) + -+ kbdev = to_kbase_device(dev); ++#define BASEP_MEM_INVALID_HANDLE (0ull << 12) ++#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) ++#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) ++#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) ++#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) ++/* reserved handles ..-64< for future special handles */ ++#define BASE_MEM_COOKIE_BASE (64ul << 12) ++#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ ++ BASE_MEM_COOKIE_BASE) + -+ if (!kbdev) -+ return -ENODEV; ++/* Mask to detect 4GB boundary alignment */ ++#define BASE_MEM_MASK_4GB 0xfffff000UL + -+ current_policy = kbase_pm_ca_get_policy(kbdev); + -+ policy_count = kbase_pm_ca_list_policies(&policy_list); ++/* Bit mask of cookies used for for memory allocation setup */ ++#define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ + -+ for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) { -+ if (policy_list[i] == current_policy) -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name); -+ else -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name); -+ } + -+ if (ret < PAGE_SIZE - 1) { -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); -+ } else { -+ buf[PAGE_SIZE - 2] = '\n'; -+ buf[PAGE_SIZE - 1] = '\0'; -+ ret = PAGE_SIZE - 1; -+ } ++/** ++ * @brief Result codes of changing the size of the backing store allocated to a tmem region ++ */ ++typedef enum base_backing_threshold_status { ++ BASE_BACKING_THRESHOLD_OK = 0, /**< Resize successful */ ++ BASE_BACKING_THRESHOLD_ERROR_OOM = -2, /**< Increase failed due to an out-of-memory condition */ ++ BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */ ++} base_backing_threshold_status; + -+ return ret; -+} ++/** ++ * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs ++ * @{ ++ */ + +/** -+ * set_ca_policy - Store callback for the core_availability_policy sysfs file. -+ * -+ * This function is called when the core_availability_policy sysfs file is -+ * written to. It matches the requested policy against the available policies -+ * and if a matching policy is found calls kbase_pm_set_policy() to change -+ * the policy. -+ * -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes written to the sysfs file ++ * @brief a basic memory operation (sync-set). + * -+ * Return: @count if the function succeeded. An error code on failure. ++ * The content of this structure is private, and should only be used ++ * by the accessors. + */ -+static ssize_t set_ca_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ const struct kbase_pm_ca_policy *new_policy = NULL; -+ const struct kbase_pm_ca_policy *const *policy_list; -+ int policy_count; -+ int i; ++typedef struct base_syncset { ++ struct basep_syncset basep_sset; ++} base_syncset; + -+ kbdev = to_kbase_device(dev); ++/** @} end group base_user_api_memory_defered */ + -+ if (!kbdev) -+ return -ENODEV; ++/** ++ * Handle to represent imported memory object. ++ * Simple opague handle to imported memory, can't be used ++ * with anything but base_external_resource_init to bind to an atom. ++ */ ++typedef struct base_import_handle { ++ struct { ++ u64 handle; ++ } basep; ++} base_import_handle; + -+ policy_count = kbase_pm_ca_list_policies(&policy_list); ++/** @} end group base_user_api_memory */ + -+ for (i = 0; i < policy_count; i++) { -+ if (sysfs_streq(policy_list[i]->name, buf)) { -+ new_policy = policy_list[i]; -+ break; -+ } -+ } ++/** ++ * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs ++ * @{ ++ */ + -+ if (!new_policy) { -+ dev_err(dev, "core_availability_policy: policy not found\n"); -+ return -EINVAL; -+ } ++typedef int platform_fence_type; ++#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1) + -+ kbase_pm_ca_set_policy(kbdev, new_policy); ++/** ++ * Base stream handle. ++ * ++ * References an underlying base stream object. ++ */ ++typedef struct base_stream { ++ struct { ++ int fd; ++ } basep; ++} base_stream; + -+ return count; -+} ++/** ++ * Base fence handle. ++ * ++ * References an underlying base fence object. ++ */ ++typedef struct base_fence { ++ struct { ++ int fd; ++ int stream_fd; ++ } basep; ++} base_fence; + -+/* -+ * The sysfs file core_availability_policy ++/** ++ * @brief Per-job data + * -+ * This is used for obtaining information about the available policies, -+ * determining which policy is currently active, and changing the active -+ * policy. ++ * This structure is used to store per-job data, and is completely unused ++ * by the Base driver. It can be used to store things such as callback ++ * function pointer, data to handle job completion. It is guaranteed to be ++ * untouched by the Base driver. + */ -+static DEVICE_ATTR(core_availability_policy, S_IRUGO | S_IWUSR, show_ca_policy, set_ca_policy); ++typedef struct base_jd_udata { ++ u64 blob[2]; /**< per-job data array */ ++} base_jd_udata; + -+/* -+ * show_core_mask - Show callback for the core_mask sysfs file. ++/** ++ * @brief Memory aliasing info + * -+ * This function is called to get the contents of the core_mask sysfs file. ++ * Describes a memory handle to be aliased. ++ * A subset of the handle can be chosen for aliasing, given an offset and a ++ * length. ++ * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a ++ * region where a special page is mapped with a write-alloc cache setup, ++ * typically used when the write result of the GPU isn't needed, but the GPU ++ * must write anyway. + * -+ * @dev: The device this sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The output buffer for the sysfs file contents ++ * Offset and length are specified in pages. ++ * Offset must be within the size of the handle. ++ * Offset+length must not overrun the size of the handle. + * -+ * Return: The number of bytes output to @buf. ++ * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ++ * @offset Offset within the handle to start aliasing from, in pages. ++ * Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE. ++ * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ++ * specifies the number of times the special page is needed. + */ -+static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *kbdev; -+ ssize_t ret = 0; ++struct base_mem_aliasing_info { ++ base_mem_handle handle; ++ u64 offset; ++ u64 length; ++}; + -+ kbdev = to_kbase_device(dev); ++/** ++ * struct base_jit_alloc_info - Structure which describes a JIT allocation ++ * request. ++ * @gpu_alloc_addr: The GPU virtual address to write the JIT ++ * allocated GPU virtual address to. ++ * @va_pages: The minimum number of virtual pages required. ++ * @commit_pages: The minimum number of physical pages which ++ * should back the allocation. ++ * @extent: Granularity of physical pages to grow the ++ * allocation by during a fault. ++ * @id: Unique ID provided by the caller, this is used ++ * to pair allocation and free requests. ++ * Zero is not a valid value. ++ */ ++struct base_jit_alloc_info { ++ u64 gpu_alloc_addr; ++ u64 va_pages; ++ u64 commit_pages; ++ u64 extent; ++ u8 id; ++}; + -+ if (!kbdev) -+ return -ENODEV; ++/** ++ * @brief Job dependency type. ++ * ++ * A flags field will be inserted into the atom structure to specify whether a dependency is a data or ++ * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without ++ * changing the structure size). ++ * When the flag is set for a particular dependency to signal that it is an ordering only dependency then ++ * errors will not be propagated. ++ */ ++typedef u8 base_jd_dep_type; + -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, -+ "Current core mask (JS0) : 0x%llX\n", -+ kbdev->pm.debug_core_mask[0]); -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, -+ "Current core mask (JS1) : 0x%llX\n", -+ kbdev->pm.debug_core_mask[1]); -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, -+ "Current core mask (JS2) : 0x%llX\n", -+ kbdev->pm.debug_core_mask[2]); -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, -+ "Available core mask : 0x%llX\n", -+ kbdev->gpu_props.props.raw_props.shader_present); + -+ return ret; -+} ++#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */ ++#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */ ++#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */ + +/** -+ * set_core_mask - Store callback for the core_mask sysfs file. -+ * -+ * This function is called when the core_mask sysfs file is written to. -+ * -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes written to the sysfs file ++ * @brief Job chain hardware requirements. + * -+ * Return: @count if the function succeeded. An error code on failure. ++ * A job chain must specify what GPU features it needs to allow the ++ * driver to schedule the job correctly. By not specifying the ++ * correct settings can/will cause an early job termination. Multiple ++ * values can be ORed together to specify multiple requirements. ++ * Special case is ::BASE_JD_REQ_DEP, which is used to express complex ++ * dependencies, and that doesn't execute anything on the hardware. + */ -+static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ u64 new_core_mask[3]; -+ int items; -+ -+ kbdev = to_kbase_device(dev); -+ -+ if (!kbdev) -+ return -ENODEV; ++typedef u32 base_jd_core_req; + -+ items = sscanf(buf, "%llx %llx %llx", -+ &new_core_mask[0], &new_core_mask[1], -+ &new_core_mask[2]); ++/* Requirements that come from the HW */ + -+ if (items == 1) -+ new_core_mask[1] = new_core_mask[2] = new_core_mask[0]; ++/** ++ * No requirement, dependency only ++ */ ++#define BASE_JD_REQ_DEP ((base_jd_core_req)0) + -+ if (items == 1 || items == 3) { -+ u64 shader_present = -+ kbdev->gpu_props.props.raw_props.shader_present; -+ u64 group0_core_mask = -+ kbdev->gpu_props.props.coherency_info.group[0]. -+ core_mask; ++/** ++ * Requires fragment shaders ++ */ ++#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0) + -+ if ((new_core_mask[0] & shader_present) != new_core_mask[0] || -+ !(new_core_mask[0] & group0_core_mask) || -+ (new_core_mask[1] & shader_present) != -+ new_core_mask[1] || -+ !(new_core_mask[1] & group0_core_mask) || -+ (new_core_mask[2] & shader_present) != -+ new_core_mask[2] || -+ !(new_core_mask[2] & group0_core_mask)) { -+ dev_err(dev, "power_policy: invalid core specification\n"); -+ return -EINVAL; -+ } ++/** ++ * Requires compute shaders ++ * This covers any of the following Midgard Job types: ++ * - Vertex Shader Job ++ * - Geometry Shader Job ++ * - An actual Compute Shader Job ++ * ++ * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the ++ * job is specifically just the "Compute Shader" job type, and not the "Vertex ++ * Shader" nor the "Geometry Shader" job type. ++ */ ++#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1) ++#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2) /**< Requires tiling */ ++#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3) /**< Requires cache flushes */ ++#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4) /**< Requires value writeback */ + -+ if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] || -+ kbdev->pm.debug_core_mask[1] != -+ new_core_mask[1] || -+ kbdev->pm.debug_core_mask[2] != -+ new_core_mask[2]) { -+ unsigned long flags; ++/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */ + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++/* Requires fragment job with AFBC encoding */ ++#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13) + -+ kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], -+ new_core_mask[1], new_core_mask[2]); ++/** ++ * SW-only requirement: coalesce completion events. ++ * If this bit is set then completion of this atom will not cause an event to ++ * be sent to userspace, whether successful or not; completion events will be ++ * deferred until an atom completes which does not have this bit set. ++ * ++ * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES. ++ */ ++#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5) + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ } ++/** ++ * SW Only requirement: the job chain requires a coherent core group. We don't ++ * mind which coherent core group is used. ++ */ ++#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6) + -+ return count; -+ } ++/** ++ * SW Only requirement: The performance counters should be enabled only when ++ * they are needed, to reduce power consumption. ++ */ + -+ dev_err(kbdev->dev, "Couldn't process set_core_mask write operation.\n" -+ "Use format \n" -+ "or \n"); -+ return -EINVAL; -+} ++#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7) + -+/* -+ * The sysfs file core_mask. ++/** ++ * SW Only requirement: External resources are referenced by this atom. ++ * When external resources are referenced no syncsets can be bundled with the atom ++ * but should instead be part of a NULL jobs inserted into the dependency tree. ++ * The first pre_dep object must be configured for the external resouces to use, ++ * the second pre_dep object can be used to create other dependencies. + * -+ * This is used to restrict shader core availability for debugging purposes. -+ * Reading it will show the current core mask and the mask of cores available. -+ * Writing to it will set the current core mask. ++ * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE. + */ -+static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask); ++#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8) + +/** -+ * set_soft_job_timeout - Store callback for the soft_job_timeout sysfs -+ * file. ++ * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted ++ * to the hardware but will cause some action to happen within the driver ++ */ ++#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9) ++ ++#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1) ++#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2) ++#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3) ++ ++/** ++ * SW Only requirement : Replay job. + * -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The value written to the sysfs file. -+ * @count: The number of bytes written to the sysfs file. ++ * If the preceding job fails, the replay job will cause the jobs specified in ++ * the list of base_jd_replay_payload pointed to by the jc pointer to be ++ * replayed. + * -+ * This allows setting the timeout for software jobs. Waiting soft event wait -+ * jobs will be cancelled after this period expires, while soft fence wait jobs -+ * will print debug information if the fence debug feature is enabled. ++ * A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT ++ * times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay ++ * job is failed, as well as any following dependencies. + * -+ * This is expressed in milliseconds. ++ * The replayed jobs will require a number of atom IDs. If there are not enough ++ * free atom IDs then the replay job will fail. + * -+ * Return: count if the function succeeded. An error code on failure. ++ * If the preceding job does not fail, then the replay job is returned as ++ * completed. ++ * ++ * The replayed jobs will never be returned to userspace. The preceding failed ++ * job will be returned to userspace as failed; the status of this job should ++ * be ignored. Completion should be determined by the status of the replay soft ++ * job. ++ * ++ * In order for the jobs to be replayed, the job headers will have to be ++ * modified. The Status field will be reset to NOT_STARTED. If the Job Type ++ * field indicates a Vertex Shader Job then it will be changed to Null Job. ++ * ++ * The replayed jobs have the following assumptions : ++ * ++ * - No external resources. Any required external resources will be held by the ++ * replay atom. ++ * - Pre-dependencies are created based on job order. ++ * - Atom numbers are automatically assigned. ++ * - device_nr is set to 0. This is not relevant as ++ * BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set. ++ * - Priority is inherited from the replay job. + */ -+static ssize_t set_soft_job_timeout(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ int soft_job_timeout_ms; -+ -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; -+ -+ if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) || -+ (soft_job_timeout_ms <= 0)) -+ return -EINVAL; -+ -+ atomic_set(&kbdev->js_data.soft_job_timeout_ms, -+ soft_job_timeout_ms); ++#define BASE_JD_REQ_SOFT_REPLAY (BASE_JD_REQ_SOFT_JOB | 0x4) ++/** ++ * SW only requirement: event wait/trigger job. ++ * ++ * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set. ++ * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the ++ * other waiting jobs. It completes immediately. ++ * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it ++ * possible for other jobs to wait upon. It completes immediately. ++ */ ++#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5) ++#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6) ++#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7) + -+ return count; -+} ++#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8) + +/** -+ * show_soft_job_timeout - Show callback for the soft_job_timeout sysfs -+ * file. ++ * SW only requirement: Just In Time allocation + * -+ * This will return the timeout for the software jobs. ++ * This job requests a JIT allocation based on the request in the ++ * @base_jit_alloc_info structure which is passed via the jc element of ++ * the atom. + * -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer for the sysfs file contents. ++ * It should be noted that the id entry in @base_jit_alloc_info must not ++ * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE. + * -+ * Return: The number of bytes output to buf. ++ * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE ++ * soft job to free the JIT allocation is still made. ++ * ++ * The job will complete immediately. + */ -+static ssize_t show_soft_job_timeout(struct device *dev, -+ struct device_attribute *attr, -+ char * const buf) -+{ -+ struct kbase_device *kbdev; -+ -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; -+ -+ return scnprintf(buf, PAGE_SIZE, "%i\n", -+ atomic_read(&kbdev->js_data.soft_job_timeout_ms)); -+} -+ -+static DEVICE_ATTR(soft_job_timeout, S_IRUGO | S_IWUSR, -+ show_soft_job_timeout, set_soft_job_timeout); -+ -+static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms, -+ int default_ticks, u32 old_ticks) -+{ -+ if (timeout_ms > 0) { -+ u64 ticks = timeout_ms * 1000000ULL; -+ do_div(ticks, kbdev->js_data.scheduling_period_ns); -+ if (!ticks) -+ return 1; -+ return ticks; -+ } else if (timeout_ms < 0) { -+ return default_ticks; -+ } else { -+ return old_ticks; -+ } -+} -+ ++#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9) +/** -+ * set_js_timeouts - Store callback for the js_timeouts sysfs file. ++ * SW only requirement: Just In Time free + * -+ * This function is called to get the contents of the js_timeouts sysfs -+ * file. This file contains five values separated by whitespace. The values -+ * are basically the same as %JS_SOFT_STOP_TICKS, %JS_HARD_STOP_TICKS_SS, -+ * %JS_HARD_STOP_TICKS_DUMPING, %JS_RESET_TICKS_SS, %JS_RESET_TICKS_DUMPING -+ * configuration values (in that order), with the difference that the js_timeout -+ * values are expressed in MILLISECONDS. ++ * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC ++ * to be freed. The ID of the JIT allocation is passed via the jc element of ++ * the atom. + * -+ * The js_timeouts sysfile file allows the current values in -+ * use by the job scheduler to get override. Note that a value needs to -+ * be other than 0 for it to override the current job scheduler value. ++ * The job will complete immediately. ++ */ ++#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa) ++ ++/** ++ * SW only requirement: Map external resource + * -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes written to the sysfs file ++ * This job requests external resource(s) are mapped once the dependencies ++ * of the job have been satisfied. The list of external resources are ++ * passed via the jc element of the atom which is a pointer to a ++ * @base_external_resource_list. ++ */ ++#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb) ++/** ++ * SW only requirement: Unmap external resource + * -+ * Return: @count if the function succeeded. An error code on failure. ++ * This job requests external resource(s) are unmapped once the dependencies ++ * of the job has been satisfied. The list of external resources are ++ * passed via the jc element of the atom which is a pointer to a ++ * @base_external_resource_list. + */ -+static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ int items; -+ long js_soft_stop_ms; -+ long js_soft_stop_ms_cl; -+ long js_hard_stop_ms_ss; -+ long js_hard_stop_ms_cl; -+ long js_hard_stop_ms_dumping; -+ long js_reset_ms_ss; -+ long js_reset_ms_cl; -+ long js_reset_ms_dumping; -+ -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; -+ -+ items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld", -+ &js_soft_stop_ms, &js_soft_stop_ms_cl, -+ &js_hard_stop_ms_ss, &js_hard_stop_ms_cl, -+ &js_hard_stop_ms_dumping, &js_reset_ms_ss, -+ &js_reset_ms_cl, &js_reset_ms_dumping); -+ -+ if (items == 8) { -+ struct kbasep_js_device_data *js_data = &kbdev->js_data; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ -+#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\ -+ js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \ -+ default, js_data->ticks_name); \ -+ dev_dbg(kbdev->dev, "Overriding " #ticks_name \ -+ " with %lu ticks (%lu ms)\n", \ -+ (unsigned long)js_data->ticks_name, \ -+ ms_name); \ -+ } while (0) -+ -+ UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms, -+ DEFAULT_JS_SOFT_STOP_TICKS); -+ UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl, -+ DEFAULT_JS_SOFT_STOP_TICKS_CL); -+ UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss, -+ kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ? -+ DEFAULT_JS_HARD_STOP_TICKS_SS_8408 : -+ DEFAULT_JS_HARD_STOP_TICKS_SS); -+ UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl, -+ DEFAULT_JS_HARD_STOP_TICKS_CL); -+ UPDATE_TIMEOUT(hard_stop_ticks_dumping, -+ js_hard_stop_ms_dumping, -+ DEFAULT_JS_HARD_STOP_TICKS_DUMPING); -+ UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss, -+ kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ? -+ DEFAULT_JS_RESET_TICKS_SS_8408 : -+ DEFAULT_JS_RESET_TICKS_SS); -+ UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl, -+ DEFAULT_JS_RESET_TICKS_CL); -+ UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping, -+ DEFAULT_JS_RESET_TICKS_DUMPING); -+ -+ kbase_js_set_timeouts(kbdev); -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ return count; -+ } -+ -+ dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n" -+ "Use format \n" -+ "Write 0 for no change, -1 to restore default timeout\n"); -+ return -EINVAL; -+} -+ -+static unsigned long get_js_timeout_in_ms( -+ u32 scheduling_period_ns, -+ u32 ticks) -+{ -+ u64 ms = (u64)ticks * scheduling_period_ns; -+ -+ do_div(ms, 1000000UL); -+ return ms; -+} ++#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc) + +/** -+ * show_js_timeouts - Show callback for the js_timeouts sysfs file. ++ * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders) + * -+ * This function is called to get the contents of the js_timeouts sysfs -+ * file. It returns the last set values written to the js_timeouts sysfs file. -+ * If the file didn't get written yet, the values will be current setting in -+ * use. -+ * @dev: The device this sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The output buffer for the sysfs file contents ++ * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type. + * -+ * Return: The number of bytes output to @buf. ++ * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job ++ * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs. + */ -+static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *kbdev; -+ ssize_t ret; -+ unsigned long js_soft_stop_ms; -+ unsigned long js_soft_stop_ms_cl; -+ unsigned long js_hard_stop_ms_ss; -+ unsigned long js_hard_stop_ms_cl; -+ unsigned long js_hard_stop_ms_dumping; -+ unsigned long js_reset_ms_ss; -+ unsigned long js_reset_ms_cl; -+ unsigned long js_reset_ms_dumping; -+ u32 scheduling_period_ns; ++#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10) + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++/** ++ * HW Requirement: Use the base_jd_atom::device_nr field to specify a ++ * particular core group ++ * ++ * If both @ref BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority ++ * ++ * This is only guaranteed to work for @ref BASE_JD_REQ_ONLY_COMPUTE atoms. ++ * ++ * If the core availability policy is keeping the required core group turned off, then ++ * the job will fail with a @ref BASE_JD_EVENT_PM_EVENT error code. ++ */ ++#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11) + -+ scheduling_period_ns = kbdev->js_data.scheduling_period_ns; ++/** ++ * SW Flag: If this bit is set then the successful completion of this atom ++ * will not cause an event to be sent to userspace ++ */ ++#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12) + -+#define GET_TIMEOUT(name) get_js_timeout_in_ms(\ -+ scheduling_period_ns, \ -+ kbdev->js_data.name) ++/** ++ * SW Flag: If this bit is set then completion of this atom will not cause an ++ * event to be sent to userspace, whether successful or not. ++ */ ++#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14) + -+ js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks); -+ js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl); -+ js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss); -+ js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl); -+ js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping); -+ js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss); -+ js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl); -+ js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping); ++/** ++ * SW Flag: Skip GPU cache clean and invalidation before starting a GPU job. ++ * ++ * If this bit is set then the GPU's cache will not be cleaned and invalidated ++ * until a GPU job starts which does not have this bit set or a job completes ++ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use if ++ * the CPU may have written to memory addressed by the job since the last job ++ * without this bit set was submitted. ++ */ ++#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15) + -+#undef GET_TIMEOUT ++/** ++ * SW Flag: Skip GPU cache clean and invalidation after a GPU job completes. ++ * ++ * If this bit is set then the GPU's cache will not be cleaned and invalidated ++ * until a GPU job completes which does not have this bit set or a job starts ++ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_START bti set. Do not use if ++ * the CPU may read from or partially overwrite memory addressed by the job ++ * before the next job without this bit set completes. ++ */ ++#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16) + -+ ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n", -+ js_soft_stop_ms, js_soft_stop_ms_cl, -+ js_hard_stop_ms_ss, js_hard_stop_ms_cl, -+ js_hard_stop_ms_dumping, js_reset_ms_ss, -+ js_reset_ms_cl, js_reset_ms_dumping); ++/** ++ * These requirement bits are currently unused in base_jd_core_req ++ */ ++#define BASEP_JD_REQ_RESERVED \ ++ (~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \ ++ BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \ ++ BASE_JD_REQ_EVENT_COALESCE | \ ++ BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \ ++ BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \ ++ BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END)) + -+ if (ret >= PAGE_SIZE) { -+ buf[PAGE_SIZE - 2] = '\n'; -+ buf[PAGE_SIZE - 1] = '\0'; -+ ret = PAGE_SIZE - 1; -+ } ++/** ++ * Mask of all bits in base_jd_core_req that control the type of the atom. ++ * ++ * This allows dependency only atoms to have flags set ++ */ ++#define BASE_JD_REQ_ATOM_TYPE \ ++ (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \ ++ BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE) + -+ return ret; -+} ++/** ++ * Mask of all bits in base_jd_core_req that control the type of a soft job. ++ */ ++#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f) + +/* -+ * The sysfs file js_timeouts. -+ * -+ * This is used to override the current job scheduler values for -+ * JS_STOP_STOP_TICKS_SS -+ * JS_STOP_STOP_TICKS_CL -+ * JS_HARD_STOP_TICKS_SS -+ * JS_HARD_STOP_TICKS_CL -+ * JS_HARD_STOP_TICKS_DUMPING -+ * JS_RESET_TICKS_SS -+ * JS_RESET_TICKS_CL -+ * JS_RESET_TICKS_DUMPING. ++ * Returns non-zero value if core requirements passed define a soft job or ++ * a dependency only job. + */ -+static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts); -+ -+static u32 get_new_js_timeout( -+ u32 old_period, -+ u32 old_ticks, -+ u32 new_scheduling_period_ns) -+{ -+ u64 ticks = (u64)old_period * (u64)old_ticks; -+ do_div(ticks, new_scheduling_period_ns); -+ return ticks?ticks:1; -+} ++#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \ ++ ((core_req & BASE_JD_REQ_SOFT_JOB) || \ ++ (core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) + +/** -+ * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs -+ * file -+ * @dev: The device the sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes written to the sysfs file ++ * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which ++ * handles retaining cores for power management and affinity management. + * -+ * This function is called when the js_scheduling_period sysfs file is written -+ * to. It checks the data written, and if valid updates the js_scheduling_period -+ * value ++ * The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack ++ * where lots of atoms could be submitted before powerup, and each has an ++ * affinity chosen that causes other atoms to have an affinity ++ * violation. Whilst the affinity was not causing violations at the time it ++ * was chosen, it could cause violations thereafter. For example, 1000 jobs ++ * could have had their affinity chosen during the powerup time, so any of ++ * those 1000 jobs could cause an affinity violation later on. + * -+ * Return: @count if the function succeeded. An error code on failure. ++ * The attack would otherwise occur because other atoms/contexts have to wait for: ++ * -# the currently running atoms (which are causing the violation) to ++ * finish ++ * -# and, the atoms that had their affinity chosen during powerup to ++ * finish. These are run preferentially because they don't cause a ++ * violation, but instead continue to cause the violation in others. ++ * -# or, the attacker is scheduled out (which might not happen for just 2 ++ * contexts) ++ * ++ * By re-choosing the affinity (which is designed to avoid violations at the ++ * time it's chosen), we break condition (2) of the wait, which minimizes the ++ * problem to just waiting for current jobs to finish (which can be bounded if ++ * the Job Scheduling Policy has a timer). + */ -+static ssize_t set_js_scheduling_period(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ int ret; -+ unsigned int js_scheduling_period; -+ u32 new_scheduling_period_ns; -+ u32 old_period; -+ struct kbasep_js_device_data *js_data; -+ unsigned long flags; -+ -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++enum kbase_atom_coreref_state { ++ /** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */ ++ KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED, ++ /** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */ ++ KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES, ++ /** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */ ++ KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY, ++ /** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */ ++ KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS, ++ /** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */ ++ KBASE_ATOM_COREREF_STATE_READY ++}; + -+ js_data = &kbdev->js_data; ++/* ++ * Base Atom priority ++ * ++ * Only certain priority levels are actually implemented, as specified by the ++ * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority ++ * level that is not one of those defined below. ++ * ++ * Priority levels only affect scheduling between atoms of the same type within ++ * a base context, and only after the atoms have had dependencies resolved. ++ * Fragment atoms does not affect non-frament atoms with lower priorities, and ++ * the other way around. For example, a low priority atom that has had its ++ * dependencies resolved might run before a higher priority atom that has not ++ * had its dependencies resolved. ++ * ++ * The scheduling between base contexts/processes and between atoms from ++ * different base contexts/processes is unaffected by atom priority. ++ * ++ * The atoms are scheduled as follows with respect to their priorities: ++ * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies ++ * resolved, and atom 'X' has a higher priority than atom 'Y' ++ * - If atom 'Y' is currently running on the HW, then it is interrupted to ++ * allow atom 'X' to run soon after ++ * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing ++ * the next atom to run, atom 'X' will always be chosen instead of atom 'Y' ++ * - Any two atoms that have the same priority could run in any order with ++ * respect to each other. That is, there is no ordering constraint between ++ * atoms of the same priority. ++ */ ++typedef u8 base_jd_prio; + -+ ret = kstrtouint(buf, 0, &js_scheduling_period); -+ if (ret || !js_scheduling_period) { -+ dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n" -+ "Use format \n"); -+ return -EINVAL; -+ } ++/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */ ++#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0) ++/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and ++ * BASE_JD_PRIO_LOW */ ++#define BASE_JD_PRIO_HIGH ((base_jd_prio)1) ++/* Low atom priority. */ ++#define BASE_JD_PRIO_LOW ((base_jd_prio)2) + -+ new_scheduling_period_ns = js_scheduling_period * 1000000; ++/* Count of the number of priority levels. This itself is not a valid ++ * base_jd_prio setting */ ++#define BASE_JD_NR_PRIO_LEVELS 3 + -+ /* Update scheduling timeouts */ -+ mutex_lock(&js_data->runpool_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++enum kbase_jd_atom_state { ++ /** Atom is not used */ ++ KBASE_JD_ATOM_STATE_UNUSED, ++ /** Atom is queued in JD */ ++ KBASE_JD_ATOM_STATE_QUEUED, ++ /** Atom has been given to JS (is runnable/running) */ ++ KBASE_JD_ATOM_STATE_IN_JS, ++ /** Atom has been completed, but not yet handed back to job dispatcher ++ * for dependency resolution */ ++ KBASE_JD_ATOM_STATE_HW_COMPLETED, ++ /** Atom has been completed, but not yet handed back to userspace */ ++ KBASE_JD_ATOM_STATE_COMPLETED ++}; + -+ /* If no contexts have been scheduled since js_timeouts was last written -+ * to, the new timeouts might not have been latched yet. So check if an -+ * update is pending and use the new values if necessary. */ ++typedef u16 base_atom_id; /**< Type big enough to store an atom number in */ + -+ /* Use previous 'new' scheduling period as a base if present. */ -+ old_period = js_data->scheduling_period_ns; ++struct base_dependency { ++ base_atom_id atom_id; /**< An atom number */ ++ base_jd_dep_type dependency_type; /**< Dependency type */ ++}; + -+#define SET_TIMEOUT(name) \ -+ (js_data->name = get_new_js_timeout(\ -+ old_period, \ -+ kbdev->js_data.name, \ -+ new_scheduling_period_ns)) ++/* This structure has changed since UK 10.2 for which base_jd_core_req was a u16 value. ++ * In order to keep the size of the structure same, padding field has been adjusted ++ * accordingly and core_req field of a u32 type (to which UK 10.3 base_jd_core_req defines) ++ * is added at the end of the structure. Place in the structure previously occupied by u16 core_req ++ * is kept but renamed to compat_core_req and as such it can be used in ioctl call for job submission ++ * as long as UK 10.2 legacy is supported. Once when this support ends, this field can be left ++ * for possible future use. */ ++typedef struct base_jd_atom_v2 { ++ u64 jc; /**< job-chain GPU address */ ++ struct base_jd_udata udata; /**< user data */ ++ kbase_pointer extres_list; /**< list of external resources */ ++ u16 nr_extres; /**< nr of external resources */ ++ u16 compat_core_req; /**< core requirements which correspond to the legacy support for UK 10.2 */ ++ struct base_dependency pre_dep[2]; /**< pre-dependencies, one need to use SETTER function to assign this field, ++ this is done in order to reduce possibility of improper assigment of a dependency field */ ++ base_atom_id atom_number; /**< unique number to identify the atom */ ++ base_jd_prio prio; /**< Atom priority. Refer to @ref base_jd_prio for more details */ ++ u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */ ++ u8 padding[1]; ++ base_jd_core_req core_req; /**< core requirements */ ++} base_jd_atom_v2; + -+ SET_TIMEOUT(soft_stop_ticks); -+ SET_TIMEOUT(soft_stop_ticks_cl); -+ SET_TIMEOUT(hard_stop_ticks_ss); -+ SET_TIMEOUT(hard_stop_ticks_cl); -+ SET_TIMEOUT(hard_stop_ticks_dumping); -+ SET_TIMEOUT(gpu_reset_ticks_ss); -+ SET_TIMEOUT(gpu_reset_ticks_cl); -+ SET_TIMEOUT(gpu_reset_ticks_dumping); ++#ifdef BASE_LEGACY_UK6_SUPPORT ++struct base_jd_atom_v2_uk6 { ++ u64 jc; /**< job-chain GPU address */ ++ struct base_jd_udata udata; /**< user data */ ++ kbase_pointer extres_list; /**< list of external resources */ ++ u16 nr_extres; /**< nr of external resources */ ++ u16 core_req; /**< core requirements */ ++ base_atom_id pre_dep[2]; /**< pre-dependencies */ ++ base_atom_id atom_number; /**< unique number to identify the atom */ ++ base_jd_prio prio; /**< priority - smaller is higher priority */ ++ u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */ ++ u8 padding[7]; ++}; ++#endif /* BASE_LEGACY_UK6_SUPPORT */ + -+#undef SET_TIMEOUT ++typedef enum base_external_resource_access { ++ BASE_EXT_RES_ACCESS_SHARED, ++ BASE_EXT_RES_ACCESS_EXCLUSIVE ++} base_external_resource_access; + -+ js_data->scheduling_period_ns = new_scheduling_period_ns; ++typedef struct base_external_resource { ++ u64 ext_resource; ++} base_external_resource; + -+ kbase_js_set_timeouts(kbdev); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&js_data->runpool_mutex); ++/** ++ * The maximum number of external resources which can be mapped/unmapped ++ * in a single request. ++ */ ++#define BASE_EXT_RES_COUNT_MAX 10 + -+ dev_dbg(kbdev->dev, "JS scheduling period: %dms\n", -+ js_scheduling_period); ++/** ++ * struct base_external_resource_list - Structure which describes a list of ++ * external resources. ++ * @count: The number of resources. ++ * @ext_res: Array of external resources which is ++ * sized at allocation time. ++ */ ++struct base_external_resource_list { ++ u64 count; ++ struct base_external_resource ext_res[1]; ++}; + -+ return count; -+} ++struct base_jd_debug_copy_buffer { ++ u64 address; ++ u64 size; ++ struct base_external_resource extres; ++}; + +/** -+ * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs -+ * entry. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the GPU information. ++ * @brief Setter for a dependency structure + * -+ * This function is called to get the current period used for the JS scheduling -+ * period. ++ * @param[in] dep The kbase jd atom dependency to be initialized. ++ * @param id The atom_id to be assigned. ++ * @param dep_type The dep_type to be assigned. + * -+ * Return: The number of bytes output to @buf. + */ -+static ssize_t show_js_scheduling_period(struct device *dev, -+ struct device_attribute *attr, char * const buf) ++static inline void base_jd_atom_dep_set(struct base_dependency *dep, ++ base_atom_id id, base_jd_dep_type dep_type) +{ -+ struct kbase_device *kbdev; -+ u32 period; -+ ssize_t ret; -+ -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; -+ -+ period = kbdev->js_data.scheduling_period_ns; ++ LOCAL_ASSERT(dep != NULL); + -+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", -+ period / 1000000); ++ /* ++ * make sure we don't set not allowed combinations ++ * of atom_id/dependency_type. ++ */ ++ LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) || ++ (id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID)); + -+ return ret; ++ dep->atom_id = id; ++ dep->dependency_type = dep_type; +} + -+static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR, -+ show_js_scheduling_period, set_js_scheduling_period); -+ -+#if !MALI_CUSTOMER_RELEASE +/** -+ * set_force_replay - Store callback for the force_replay sysfs file. ++ * @brief Make a copy of a dependency structure + * -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes written to the sysfs file ++ * @param[in,out] dep The kbase jd atom dependency to be written. ++ * @param[in] from The dependency to make a copy from. + * -+ * Return: @count if the function succeeded. An error code on failure. + */ -+static ssize_t set_force_replay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) ++static inline void base_jd_atom_dep_copy(struct base_dependency *dep, ++ const struct base_dependency *from) +{ -+ struct kbase_device *kbdev; -+ -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; -+ -+ if (!strncmp("limit=", buf, MIN(6, count))) { -+ int force_replay_limit; -+ int items = sscanf(buf, "limit=%u", &force_replay_limit); -+ -+ if (items == 1) { -+ kbdev->force_replay_random = false; -+ kbdev->force_replay_limit = force_replay_limit; -+ kbdev->force_replay_count = 0; -+ -+ return count; -+ } -+ } else if (!strncmp("random_limit", buf, MIN(12, count))) { -+ kbdev->force_replay_random = true; -+ kbdev->force_replay_count = 0; -+ -+ return count; -+ } else if (!strncmp("norandom_limit", buf, MIN(14, count))) { -+ kbdev->force_replay_random = false; -+ kbdev->force_replay_limit = KBASEP_FORCE_REPLAY_DISABLED; -+ kbdev->force_replay_count = 0; -+ -+ return count; -+ } else if (!strncmp("core_req=", buf, MIN(9, count))) { -+ unsigned int core_req; -+ int items = sscanf(buf, "core_req=%x", &core_req); -+ -+ if (items == 1) { -+ kbdev->force_replay_core_req = (base_jd_core_req)core_req; ++ LOCAL_ASSERT(dep != NULL); + -+ return count; -+ } -+ } -+ dev_err(kbdev->dev, "Couldn't process force_replay write operation.\nPossible settings: limit=, random_limit, norandom_limit, core_req=\n"); -+ return -EINVAL; ++ base_jd_atom_dep_set(dep, from->atom_id, from->dependency_type); +} + +/** -+ * show_force_replay - Show callback for the force_replay sysfs file. ++ * @brief Soft-atom fence trigger setup. + * -+ * This function is called to get the contents of the force_replay sysfs -+ * file. It returns the last set value written to the force_replay sysfs file. -+ * If the file didn't get written yet, the values will be 0. ++ * Sets up an atom to be a SW-only atom signaling a fence ++ * when it reaches the run state. + * -+ * @dev: The device this sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The output buffer for the sysfs file contents ++ * Using the existing base dependency system the fence can ++ * be set to trigger when a GPU job has finished. + * -+ * Return: The number of bytes output to @buf. -+ */ -+static ssize_t show_force_replay(struct device *dev, -+ struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *kbdev; -+ ssize_t ret; -+ -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; -+ -+ if (kbdev->force_replay_random) -+ ret = scnprintf(buf, PAGE_SIZE, -+ "limit=0\nrandom_limit\ncore_req=%x\n", -+ kbdev->force_replay_core_req); -+ else -+ ret = scnprintf(buf, PAGE_SIZE, -+ "limit=%u\nnorandom_limit\ncore_req=%x\n", -+ kbdev->force_replay_limit, -+ kbdev->force_replay_core_req); -+ -+ if (ret >= PAGE_SIZE) { -+ buf[PAGE_SIZE - 2] = '\n'; -+ buf[PAGE_SIZE - 1] = '\0'; -+ ret = PAGE_SIZE - 1; -+ } -+ -+ return ret; -+} -+ -+/* -+ * The sysfs file force_replay. ++ * The base fence object must not be terminated until the atom ++ * has been submitted to @a base_jd_submit and @a base_jd_submit has returned. ++ * ++ * @a fence must be a valid fence set up with @a base_fence_init. ++ * Calling this function with a uninitialized fence results in undefined behavior. ++ * ++ * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom ++ * @param[in] fence The base fence object to trigger. + */ -+static DEVICE_ATTR(force_replay, S_IRUGO | S_IWUSR, show_force_replay, -+ set_force_replay); -+#endif /* !MALI_CUSTOMER_RELEASE */ -+ -+#ifdef CONFIG_MALI_DEBUG -+static ssize_t set_js_softstop_always(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ int ret; -+ int softstop_always; -+ -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; -+ -+ ret = kstrtoint(buf, 0, &softstop_always); -+ if (ret || ((softstop_always != 0) && (softstop_always != 1))) { -+ dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n" -+ "Use format \n"); -+ return -EINVAL; -+ } -+ -+ kbdev->js_data.softstop_always = (bool) softstop_always; -+ dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n", -+ (kbdev->js_data.softstop_always) ? -+ "Enabled" : "Disabled"); -+ return count; -+} -+ -+static ssize_t show_js_softstop_always(struct device *dev, -+ struct device_attribute *attr, char * const buf) ++static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence) +{ -+ struct kbase_device *kbdev; -+ ssize_t ret; -+ -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; -+ -+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always); -+ -+ if (ret >= PAGE_SIZE) { -+ buf[PAGE_SIZE - 2] = '\n'; -+ buf[PAGE_SIZE - 1] = '\0'; -+ ret = PAGE_SIZE - 1; -+ } -+ -+ return ret; ++ LOCAL_ASSERT(atom); ++ LOCAL_ASSERT(fence); ++ LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE); ++ LOCAL_ASSERT(fence->basep.stream_fd >= 0); ++ atom->jc = (uintptr_t) fence; ++ atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER; +} + -+/* -+ * By default, soft-stops are disabled when only a single context is present. -+ * The ability to enable soft-stop when only a single context is present can be -+ * used for debug and unit-testing purposes. -+ * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.) -+ */ -+static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always); -+#endif /* CONFIG_MALI_DEBUG */ -+ -+#ifdef CONFIG_MALI_DEBUG -+typedef void (kbasep_debug_command_func) (struct kbase_device *); -+ -+enum kbasep_debug_command_code { -+ KBASEP_DEBUG_COMMAND_DUMPTRACE, -+ -+ /* This must be the last enum */ -+ KBASEP_DEBUG_COMMAND_COUNT -+}; -+ -+struct kbasep_debug_command { -+ char *str; -+ kbasep_debug_command_func *func; -+}; -+ -+/* Debug commands supported by the driver */ -+static const struct kbasep_debug_command debug_commands[] = { -+ { -+ .str = "dumptrace", -+ .func = &kbasep_trace_dump, -+ } -+}; -+ +/** -+ * show_debug - Show callback for the debug_command sysfs file. ++ * @brief Soft-atom fence wait setup. + * -+ * This function is called to get the contents of the debug_command sysfs -+ * file. This is a list of the available debug commands, separated by newlines. ++ * Sets up an atom to be a SW-only atom waiting on a fence. ++ * When the fence becomes triggered the atom becomes runnable ++ * and completes immediately. + * -+ * @dev: The device this sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The output buffer for the sysfs file contents ++ * Using the existing base dependency system the fence can ++ * be set to block a GPU job until it has been triggered. + * -+ * Return: The number of bytes output to @buf. ++ * The base fence object must not be terminated until the atom ++ * has been submitted to @a base_jd_submit and @a base_jd_submit has returned. ++ * ++ * @a fence must be a valid fence set up with @a base_fence_init or @a base_fence_import. ++ * Calling this function with a uninitialized fence results in undefined behavior. ++ * ++ * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom ++ * @param[in] fence The base fence object to wait on + */ -+static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf) ++static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence) +{ -+ struct kbase_device *kbdev; -+ int i; -+ ssize_t ret = 0; -+ -+ kbdev = to_kbase_device(dev); -+ -+ if (!kbdev) -+ return -ENODEV; -+ -+ for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++) -+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str); -+ -+ if (ret >= PAGE_SIZE) { -+ buf[PAGE_SIZE - 2] = '\n'; -+ buf[PAGE_SIZE - 1] = '\0'; -+ ret = PAGE_SIZE - 1; -+ } -+ -+ return ret; ++ LOCAL_ASSERT(atom); ++ LOCAL_ASSERT(fence); ++ LOCAL_ASSERT(fence->basep.fd >= 0); ++ atom->jc = (uintptr_t) fence; ++ atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT; +} + +/** -+ * issue_debug - Store callback for the debug_command sysfs file. -+ * -+ * This function is called when the debug_command sysfs file is written to. -+ * It matches the requested command against the available commands, and if -+ * a matching command is found calls the associated function from -+ * @debug_commands to issue the command. ++ * @brief External resource info initialization. + * -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes written to the sysfs file ++ * Sets up an external resource object to reference ++ * a memory allocation and the type of access requested. + * -+ * Return: @count if the function succeeded. An error code on failure. ++ * @param[in] res The resource object to initialize ++ * @param handle The handle to the imported memory object, must be ++ * obtained by calling @ref base_mem_as_import_handle(). ++ * @param access The type of access requested + */ -+static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) ++static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access) +{ -+ struct kbase_device *kbdev; -+ int i; -+ -+ kbdev = to_kbase_device(dev); ++ u64 address; + -+ if (!kbdev) -+ return -ENODEV; ++ address = handle.basep.handle; + -+ for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) { -+ if (sysfs_streq(debug_commands[i].str, buf)) { -+ debug_commands[i].func(kbdev); -+ return count; -+ } -+ } ++ LOCAL_ASSERT(res != NULL); ++ LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB)); ++ LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE); + -+ /* Debug Command not found */ -+ dev_err(dev, "debug_command: command not known\n"); -+ return -EINVAL; ++ res->ext_resource = address | (access & LOCAL_PAGE_LSB); +} + -+/* The sysfs file debug_command. -+ * -+ * This is used to issue general debug commands to the device driver. -+ * Reading it will produce a list of debug commands, separated by newlines. -+ * Writing to it with one of those commands will issue said command. ++/** ++ * @brief Job chain event code bits ++ * Defines the bits used to create ::base_jd_event_code + */ -+static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug); -+#endif /* CONFIG_MALI_DEBUG */ ++enum { ++ BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */ ++ BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */ ++ BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */ ++ BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */ ++ BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */ ++ BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */ ++ BASE_JD_SW_EVENT_RESERVED = (3u << 11), /**< Reserved event type */ ++ BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11) /**< Mask to extract the type from an event code */ ++}; + +/** -+ * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the GPU information. ++ * @brief Job chain event codes + * -+ * This function is called to get a description of the present Mali -+ * GPU via the gpuinfo sysfs entry. This includes the GPU family, the -+ * number of cores, the hardware version and the raw product id. For -+ * example ++ * HW and low-level SW events are represented by event codes. ++ * The status of jobs which succeeded are also represented by ++ * an event code (see ::BASE_JD_EVENT_DONE). ++ * Events are usually reported as part of a ::base_jd_event. + * -+ * Mali-T60x MP4 r0p0 0x6956 ++ * The event codes are encoded in the following way: ++ * @li 10:0 - subtype ++ * @li 12:11 - type ++ * @li 13 - SW success (only valid if the SW bit is set) ++ * @li 14 - SW event (HW event if not set) ++ * @li 15 - Kernel event (should never be seen in userspace) + * -+ * Return: The number of bytes output to @buf. ++ * Events are split up into ranges as follows: ++ * - BASE_JD_EVENT_RANGE_\_START ++ * - BASE_JD_EVENT_RANGE_\_END ++ * ++ * \a code is in \'s range when: ++ * - BASE_JD_EVENT_RANGE_\_START <= code < BASE_JD_EVENT_RANGE_\_END ++ * ++ * Ranges can be asserted for adjacency by testing that the END of the previous ++ * is equal to the START of the next. This is useful for optimizing some tests ++ * for range. ++ * ++ * A limitation is that the last member of this enum must explicitly be handled ++ * (with an assert-unreachable statement) in switch statements that use ++ * variables of this type. Otherwise, the compiler warns that we have not ++ * handled that enum value. + */ -+static ssize_t kbase_show_gpuinfo(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ static const struct gpu_product_id_name { -+ unsigned id; -+ char *name; -+ } gpu_product_id_names[] = { -+ { .id = GPU_ID_PI_T60X, .name = "Mali-T60x" }, -+ { .id = GPU_ID_PI_T62X, .name = "Mali-T62x" }, -+ { .id = GPU_ID_PI_T72X, .name = "Mali-T72x" }, -+ { .id = GPU_ID_PI_T76X, .name = "Mali-T76x" }, -+ { .id = GPU_ID_PI_T82X, .name = "Mali-T82x" }, -+ { .id = GPU_ID_PI_T83X, .name = "Mali-T83x" }, -+ { .id = GPU_ID_PI_T86X, .name = "Mali-T86x" }, -+ { .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" }, -+ { .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-G71" }, -+ { .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-THEx" }, -+ { .id = GPU_ID2_PRODUCT_TSIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ .name = "Mali-G51" }, -+ }; -+ const char *product_name = "(Unknown Mali GPU)"; -+ struct kbase_device *kbdev; -+ u32 gpu_id; -+ unsigned product_id, product_id_mask; -+ unsigned i; -+ bool is_new_format; ++typedef enum base_jd_event_code { ++ /* HW defined exceptions */ + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++ /** Start of HW Non-fault status codes ++ * ++ * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault, ++ * because the job was hard-stopped ++ */ ++ BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0, + -+ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; -+ product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; -+ is_new_format = GPU_ID_IS_NEW_FORMAT(product_id); -+ product_id_mask = -+ (is_new_format ? -+ GPU_ID2_PRODUCT_MODEL : -+ GPU_ID_VERSION_PRODUCT_ID) >> -+ GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++ /* non-fatal exceptions */ ++ BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */ ++ BASE_JD_EVENT_DONE = 0x01, ++ BASE_JD_EVENT_STOPPED = 0x03, /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */ ++ BASE_JD_EVENT_TERMINATED = 0x04, /**< This is actually a fault status code - the job was hard stopped */ ++ BASE_JD_EVENT_ACTIVE = 0x08, /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */ + -+ for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) { -+ const struct gpu_product_id_name *p = &gpu_product_id_names[i]; ++ /** End of HW Non-fault status codes ++ * ++ * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault, ++ * because the job was hard-stopped ++ */ ++ BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40, + -+ if ((GPU_ID_IS_NEW_FORMAT(p->id) == is_new_format) && -+ (p->id & product_id_mask) == -+ (product_id & product_id_mask)) { -+ product_name = p->name; -+ break; -+ } -+ } ++ /** Start of HW fault and SW Error status codes */ ++ BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40, + -+ return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n", -+ product_name, kbdev->gpu_props.num_cores, -+ (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, -+ (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, -+ product_id); -+} -+static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL); ++ /* job exceptions */ ++ BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40, ++ BASE_JD_EVENT_JOB_POWER_FAULT = 0x41, ++ BASE_JD_EVENT_JOB_READ_FAULT = 0x42, ++ BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43, ++ BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44, ++ BASE_JD_EVENT_JOB_BUS_FAULT = 0x48, ++ BASE_JD_EVENT_INSTR_INVALID_PC = 0x50, ++ BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51, ++ BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52, ++ BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53, ++ BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54, ++ BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55, ++ BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56, ++ BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58, ++ BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59, ++ BASE_JD_EVENT_STATE_FAULT = 0x5A, ++ BASE_JD_EVENT_OUT_OF_MEMORY = 0x60, ++ BASE_JD_EVENT_UNKNOWN = 0x7F, + -+/** -+ * set_dvfs_period - Store callback for the dvfs_period sysfs file. -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes written to the sysfs file -+ * -+ * This function is called when the dvfs_period sysfs file is written to. It -+ * checks the data written, and if valid updates the DVFS period variable, -+ * -+ * Return: @count if the function succeeded. An error code on failure. -+ */ -+static ssize_t set_dvfs_period(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ int ret; -+ int dvfs_period; ++ /* GPU exceptions */ ++ BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80, ++ BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88, + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++ /* MMU exceptions */ ++ BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1, ++ BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2, ++ BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3, ++ BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4, ++ BASE_JD_EVENT_PERMISSION_FAULT = 0xC8, ++ BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1, ++ BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2, ++ BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3, ++ BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4, ++ BASE_JD_EVENT_ACCESS_FLAG = 0xD8, + -+ ret = kstrtoint(buf, 0, &dvfs_period); -+ if (ret || dvfs_period <= 0) { -+ dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n" -+ "Use format \n"); -+ return -EINVAL; -+ } ++ /* SW defined exceptions */ ++ BASE_JD_EVENT_MEM_GROWTH_FAILED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000, ++ BASE_JD_EVENT_TIMED_OUT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001, ++ BASE_JD_EVENT_JOB_CANCELLED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002, ++ BASE_JD_EVENT_JOB_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003, ++ BASE_JD_EVENT_PM_EVENT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004, ++ BASE_JD_EVENT_FORCE_REPLAY = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x005, + -+ kbdev->pm.dvfs_period = dvfs_period; -+ dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period); ++ BASE_JD_EVENT_BAG_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003, + -+ return count; -+} ++ /** End of HW fault and SW Error status codes */ ++ BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF, + -+/** -+ * show_dvfs_period - Show callback for the dvfs_period sysfs entry. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the GPU information. -+ * -+ * This function is called to get the current period used for the DVFS sample -+ * timer. -+ * -+ * Return: The number of bytes output to @buf. -+ */ -+static ssize_t show_dvfs_period(struct device *dev, -+ struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *kbdev; -+ ssize_t ret; ++ /** Start of SW Success status codes */ ++ BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000, + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++ BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000, ++ BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000, ++ BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000, + -+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period); ++ /** End of SW Success status codes */ ++ BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF, + -+ return ret; -+} ++ /** Start of Kernel-only status codes. Such codes are never returned to user-space */ ++ BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000, ++ BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000, + -+static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period, -+ set_dvfs_period); ++ /** End of Kernel-only status codes. */ ++ BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF ++} base_jd_event_code; + +/** -+ * set_pm_poweroff - Store callback for the pm_poweroff sysfs file. -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes written to the sysfs file ++ * @brief Event reporting structure + * -+ * This function is called when the pm_poweroff sysfs file is written to. ++ * This structure is used by the kernel driver to report information ++ * about GPU events. The can either be HW-specific events or low-level ++ * SW events, such as job-chain completion. + * -+ * This file contains three values separated by whitespace. The values -+ * are gpu_poweroff_time (the period of the poweroff timer, in ns), -+ * poweroff_shader_ticks (the number of poweroff timer ticks before an idle -+ * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer -+ * ticks before the GPU is powered off), in that order. ++ * The event code contains an event type field which can be extracted ++ * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK. + * -+ * Return: @count if the function succeeded. An error code on failure. ++ * Based on the event type base_jd_event::data holds: ++ * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed ++ * job-chain ++ * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has ++ * been completed (ie all contained job-chains have been completed). ++ * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used + */ -+static ssize_t set_pm_poweroff(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ int items; -+ s64 gpu_poweroff_time; -+ int poweroff_shader_ticks, poweroff_gpu_ticks; -+ -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++typedef struct base_jd_event_v2 { ++ base_jd_event_code event_code; /**< event code */ ++ base_atom_id atom_number; /**< the atom number that has completed */ ++ struct base_jd_udata udata; /**< user data */ ++} base_jd_event_v2; + -+ items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time, -+ &poweroff_shader_ticks, -+ &poweroff_gpu_ticks); -+ if (items != 3) { -+ dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n" -+ "Use format \n"); -+ return -EINVAL; -+ } ++/** ++ * Padding required to ensure that the @ref struct base_dump_cpu_gpu_counters structure fills ++ * a full cache line. ++ */ + -+ kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(gpu_poweroff_time); -+ kbdev->pm.poweroff_shader_ticks = poweroff_shader_ticks; -+ kbdev->pm.poweroff_gpu_ticks = poweroff_gpu_ticks; ++#define BASE_CPU_GPU_CACHE_LINE_PADDING (36) + -+ return count; -+} + +/** -+ * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the GPU information. ++ * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs. + * -+ * This function is called to get the current period used for the DVFS sample -+ * timer. ++ * This structure is stored into the memory pointed to by the @c jc field of @ref base_jd_atom. + * -+ * Return: The number of bytes output to @buf. ++ * This structure must be padded to ensure that it will occupy whole cache lines. This is to avoid ++ * cases where access to pages containing the structure is shared between cached and un-cached ++ * memory regions, which would cause memory corruption. Here we set the structure size to be 64 bytes ++ * which is the cache line for ARM A15 processors. + */ -+static ssize_t show_pm_poweroff(struct device *dev, -+ struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *kbdev; -+ ssize_t ret; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++typedef struct base_dump_cpu_gpu_counters { ++ u64 system_time; ++ u64 cycle_counter; ++ u64 sec; ++ u32 usec; ++ u8 padding[BASE_CPU_GPU_CACHE_LINE_PADDING]; ++} base_dump_cpu_gpu_counters; + -+ ret = scnprintf(buf, PAGE_SIZE, "%llu %u %u\n", -+ ktime_to_ns(kbdev->pm.gpu_poweroff_time), -+ kbdev->pm.poweroff_shader_ticks, -+ kbdev->pm.poweroff_gpu_ticks); + -+ return ret; -+} + -+static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff, -+ set_pm_poweroff); ++/** @} end group base_user_api_job_dispatch */ ++ ++#define GPU_MAX_JOB_SLOTS 16 + +/** -+ * set_reset_timeout - Store callback for the reset_timeout sysfs file. -+ * @dev: The device with sysfs file is for -+ * @attr: The attributes of the sysfs file -+ * @buf: The value written to the sysfs file -+ * @count: The number of bytes written to the sysfs file ++ * @page page_base_user_api_gpuprops User-side Base GPU Property Query API + * -+ * This function is called when the reset_timeout sysfs file is written to. It -+ * checks the data written, and if valid updates the reset timeout. ++ * The User-side Base GPU Property Query API encapsulates two ++ * sub-modules: + * -+ * Return: @count if the function succeeded. An error code on failure. -+ */ -+static ssize_t set_reset_timeout(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ int ret; -+ int reset_timeout; ++ * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties" ++ * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties" ++ * ++ * There is a related third module outside of Base, which is owned by the MIDG ++ * module: ++ * - @ref gpu_props_static "Midgard Compile-time GPU Properties" ++ * ++ * Base only deals with properties that vary between different Midgard ++ * implementations - the Dynamic GPU properties and the Platform Config ++ * properties. ++ * ++ * For properties that are constant for the Midgard Architecture, refer to the ++ * MIDG module. However, we will discuss their relevance here just to ++ * provide background information. ++ * ++ * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules ++ * ++ * The compile-time properties (Platform Config, Midgard Compile-time ++ * properties) are exposed as pre-processor macros. ++ * ++ * Complementing the compile-time properties are the Dynamic GPU ++ * Properties, which act as a conduit for the Midgard Configuration ++ * Discovery. ++ * ++ * In general, the dynamic properties are present to verify that the platform ++ * has been configured correctly with the right set of Platform Config ++ * Compile-time Properties. ++ * ++ * As a consistent guide across the entire DDK, the choice for dynamic or ++ * compile-time should consider the following, in order: ++ * -# Can the code be written so that it doesn't need to know the ++ * implementation limits at all? ++ * -# If you need the limits, get the information from the Dynamic Property ++ * lookup. This should be done once as you fetch the context, and then cached ++ * as part of the context data structure, so it's cheap to access. ++ * -# If there's a clear and arguable inefficiency in using Dynamic Properties, ++ * then use a Compile-Time Property (Platform Config, or Midgard Compile-time ++ * property). Examples of where this might be sensible follow: ++ * - Part of a critical inner-loop ++ * - Frequent re-use throughout the driver, causing significant extra load ++ * instructions or control flow that would be worthwhile optimizing out. ++ * ++ * We cannot provide an exhaustive set of examples, neither can we provide a ++ * rule for every possible situation. Use common sense, and think about: what ++ * the rest of the driver will be doing; how the compiler might represent the ++ * value if it is a compile-time constant; whether an OEM shipping multiple ++ * devices would benefit much more from a single DDK binary, instead of ++ * insignificant micro-optimizations. ++ * ++ * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties ++ * ++ * Dynamic GPU properties are presented in two sets: ++ * -# the commonly used properties in @ref base_gpu_props, which have been ++ * unpacked from GPU register bitfields. ++ * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props ++ * (also a member of @ref base_gpu_props). All of these are presented in ++ * the packed form, as presented by the GPU registers themselves. ++ * ++ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to ++ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device ++ * behaving differently?". In this case, all information about the ++ * configuration is potentially useful, but it does not need to be processed ++ * by the driver. Instead, the raw registers can be processed by the Mali ++ * Tools software on the host PC. ++ * ++ * The properties returned extend the Midgard Configuration Discovery ++ * registers. For example, GPU clock speed is not specified in the Midgard ++ * Architecture, but is necessary for OpenCL's clGetDeviceInfo() function. ++ * ++ * The GPU properties are obtained by a call to ++ * _mali_base_get_gpu_props(). This simply returns a pointer to a const ++ * base_gpu_props structure. It is constant for the life of a base ++ * context. Multiple calls to _mali_base_get_gpu_props() to a base context ++ * return the same pointer to a constant structure. This avoids cache pollution ++ * of the common data. ++ * ++ * This pointer must not be freed, because it does not point to the start of a ++ * region allocated by the memory allocator; instead, just close the @ref ++ * base_context. ++ * ++ * ++ * @section sec_base_user_api_gpuprops_config Platform Config Compile-time Properties ++ * ++ * The Platform Config File sets up gpu properties that are specific to a ++ * certain platform. Properties that are 'Implementation Defined' in the ++ * Midgard Architecture spec are placed here. ++ * ++ * @note Reference configurations are provided for Midgard Implementations, such as ++ * the Mali-T600 family. The customer need not repeat this information, and can select one of ++ * these reference configurations. For example, VA_BITS, PA_BITS and the ++ * maximum number of samples per pixel might vary between Midgard Implementations, but ++ * \b not for platforms using the Mali-T604. This information is placed in ++ * the reference configuration files. ++ * ++ * The System Integrator creates the following structure: ++ * - platform_XYZ ++ * - platform_XYZ/plat ++ * - platform_XYZ/plat/plat_config.h ++ * ++ * They then edit plat_config.h, using the example plat_config.h files as a ++ * guide. ++ * ++ * At the very least, the customer must set @ref CONFIG_GPU_CORE_TYPE, and will ++ * receive a helpful \#error message if they do not do this correctly. This ++ * selects the Reference Configuration for the Midgard Implementation. The rationale ++ * behind this decision (against asking the customer to write \#include ++ * in their plat_config.h) is as follows: ++ * - This mechanism 'looks' like a regular config file (such as Linux's ++ * .config) ++ * - It is difficult to get wrong in a way that will produce strange build ++ * errors: ++ * - They need not know where the mali_t600.h, other_midg_gpu.h etc. files are stored - and ++ * so they won't accidentally pick another file with 'mali_t600' in its name ++ * - When the build doesn't work, the System Integrator may think the DDK is ++ * doesn't work, and attempt to fix it themselves: ++ * - For the @ref CONFIG_GPU_CORE_TYPE mechanism, the only way to get past the ++ * error is to set @ref CONFIG_GPU_CORE_TYPE, and this is what the \#error tells ++ * you. ++ * - For a \#include mechanism, checks must still be made elsewhere, which the ++ * System Integrator may try working around by setting \#defines (such as ++ * VA_BITS) themselves in their plat_config.h. In the worst case, they may ++ * set the prevention-mechanism \#define of ++ * "A_CORRECT_MIDGARD_CORE_WAS_CHOSEN". ++ * - In this case, they would believe they are on the right track, because ++ * the build progresses with their fix, but with errors elsewhere. ++ * ++ * However, there is nothing to prevent the customer using \#include to organize ++ * their own configurations files hierarchically. ++ * ++ * The mechanism for the header file processing is as follows: ++ * ++ * @dot ++ digraph plat_config_mechanism { ++ rankdir=BT ++ size="6,6" + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++ "mali_base.h"; ++ "gpu/mali_gpu.h"; + -+ ret = kstrtoint(buf, 0, &reset_timeout); -+ if (ret || reset_timeout <= 0) { -+ dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n" -+ "Use format \n"); -+ return -EINVAL; -+ } ++ node [ shape=box ]; ++ { ++ rank = same; ordering = out; + -+ kbdev->reset_timeout_ms = reset_timeout; -+ dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout); ++ "gpu/mali_gpu_props.h"; ++ "base/midg_gpus/mali_t600.h"; ++ "base/midg_gpus/other_midg_gpu.h"; ++ } ++ { rank = same; "plat/plat_config.h"; } ++ { ++ rank = same; ++ "gpu/mali_gpu.h" [ shape=box ]; ++ gpu_chooser [ label="" style="invisible" width=0 height=0 fixedsize=true ]; ++ select_gpu [ label="Mali-T600 | Other\n(select_gpu.h)" shape=polygon,sides=4,distortion=0.25 width=3.3 height=0.99 fixedsize=true ] ; ++ } ++ node [ shape=box ]; ++ { rank = same; "plat/plat_config.h"; } ++ { rank = same; "mali_base.h"; } + -+ return count; -+} ++ "mali_base.h" -> "gpu/mali_gpu.h" -> "gpu/mali_gpu_props.h"; ++ "mali_base.h" -> "plat/plat_config.h" ; ++ "mali_base.h" -> select_gpu ; + -+/** -+ * show_reset_timeout - Show callback for the reset_timeout sysfs entry. -+ * @dev: The device this sysfs file is for. -+ * @attr: The attributes of the sysfs file. -+ * @buf: The output buffer to receive the GPU information. ++ "plat/plat_config.h" -> gpu_chooser [style="dotted,bold" dir=none weight=4] ; ++ gpu_chooser -> select_gpu [style="dotted,bold"] ; ++ ++ select_gpu -> "base/midg_gpus/mali_t600.h" ; ++ select_gpu -> "base/midg_gpus/other_midg_gpu.h" ; ++ } ++ @enddot + * -+ * This function is called to get the current reset timeout. + * -+ * Return: The number of bytes output to @buf. ++ * @section sec_base_user_api_gpuprops_kernel Kernel Operation ++ * ++ * During Base Context Create time, user-side makes a single kernel call: ++ * - A call to fill user memory with GPU information structures ++ * ++ * The kernel-side will fill the provided the entire processed @ref base_gpu_props ++ * structure, because this information is required in both ++ * user and kernel side; it does not make sense to decode it twice. ++ * ++ * Coherency groups must be derived from the bitmasks, but this can be done ++ * kernel side, and just once at kernel startup: Coherency groups must already ++ * be known kernel-side, to support chains that specify a 'Only Coherent Group' ++ * SW requirement, or 'Only Coherent Group with Tiler' SW requirement. ++ * ++ * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation ++ * Creation of the coherent group data is done at device-driver startup, and so ++ * is one-time. This will most likely involve a loop with CLZ, shifting, and ++ * bit clearing on the L2_PRESENT mask, depending on whether the ++ * system is L2 Coherent. The number of shader cores is done by a ++ * population count, since faulty cores may be disabled during production, ++ * producing a non-contiguous mask. ++ * ++ * The memory requirements for this algorithm can be determined either by a u64 ++ * population count on the L2_PRESENT mask (a LUT helper already is ++ * required for the above), or simple assumption that there can be no more than ++ * 16 coherent groups, since core groups are typically 4 cores. + */ -+static ssize_t show_reset_timeout(struct device *dev, -+ struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *kbdev; -+ ssize_t ret; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++/** ++ * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs ++ * @{ ++ */ + -+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms); ++/** ++ * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties ++ * @{ ++ */ + -+ return ret; -+} ++#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3 + -+static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout, -+ set_reset_timeout); ++#define BASE_MAX_COHERENT_GROUPS 16 + ++struct mali_base_gpu_core_props { ++ /** ++ * Product specific value. ++ */ ++ u32 product_id; + ++ /** ++ * Status of the GPU release. ++ * No defined values, but starts at 0 and increases by one for each ++ * release status (alpha, beta, EAC, etc.). ++ * 4 bit values (0-15). ++ */ ++ u16 version_status; + -+static ssize_t show_mem_pool_size(struct device *dev, -+ struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *kbdev; -+ ssize_t ret; ++ /** ++ * Minor release number of the GPU. "P" part of an "RnPn" release number. ++ * 8 bit values (0-255). ++ */ ++ u16 minor_revision; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++ /** ++ * Major release number of the GPU. "R" part of an "RnPn" release number. ++ * 4 bit values (0-15). ++ */ ++ u16 major_revision; + -+ ret = scnprintf(buf, PAGE_SIZE, "%zu\n", -+ kbase_mem_pool_size(&kbdev->mem_pool)); ++ u16 padding; + -+ return ret; -+} ++ /** ++ * This property is deprecated since it has not contained the real current ++ * value of GPU clock speed. It is kept here only for backwards compatibility. ++ * For the new ioctl interface, it is ignored and is treated as a padding ++ * to keep the structure of the same size and retain the placement of its ++ * members. ++ */ ++ u32 gpu_speed_mhz; + -+static ssize_t set_mem_pool_size(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ size_t new_size; -+ int err; ++ /** ++ * @usecase GPU clock max/min speed is required for computing best/worst case ++ * in tasks as job scheduling ant irq_throttling. (It is not specified in the ++ * Midgard Architecture). ++ * Also, GPU clock max speed is used for OpenCL's clGetDeviceInfo() function. ++ */ ++ u32 gpu_freq_khz_max; ++ u32 gpu_freq_khz_min; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++ /** ++ * Size of the shader program counter, in bits. ++ */ ++ u32 log2_program_counter_size; + -+ err = kstrtoul(buf, 0, (unsigned long *)&new_size); -+ if (err) -+ return err; ++ /** ++ * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a ++ * bitpattern where a set bit indicates that the format is supported. ++ * ++ * Before using a texture format, it is recommended that the corresponding ++ * bit be checked. ++ */ ++ u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + -+ kbase_mem_pool_trim(&kbdev->mem_pool, new_size); ++ /** ++ * Theoretical maximum memory available to the GPU. It is unlikely that a ++ * client will be able to allocate all of this memory for their own ++ * purposes, but this at least provides an upper bound on the memory ++ * available to the GPU. ++ * ++ * This is required for OpenCL's clGetDeviceInfo() call when ++ * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The ++ * client will not be expecting to allocate anywhere near this value. ++ */ ++ u64 gpu_available_memory_size; ++}; + -+ return count; -+} ++/** ++ * ++ * More information is possible - but associativity and bus width are not ++ * required by upper-level apis. ++ */ ++struct mali_base_gpu_l2_cache_props { ++ u8 log2_line_size; ++ u8 log2_cache_size; ++ u8 num_l2_slices; /* Number of L2C slices. 1 or higher */ ++ u8 padding[5]; ++}; + -+static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size, -+ set_mem_pool_size); ++struct mali_base_gpu_tiler_props { ++ u32 bin_size_bytes; /* Max is 4*2^15 */ ++ u32 max_active_levels; /* Max is 2^15 */ ++}; + -+static ssize_t show_mem_pool_max_size(struct device *dev, -+ struct device_attribute *attr, char * const buf) -+{ -+ struct kbase_device *kbdev; -+ ssize_t ret; ++/** ++ * GPU threading system details. ++ */ ++struct mali_base_gpu_thread_props { ++ u32 max_threads; /* Max. number of threads per core */ ++ u32 max_workgroup_size; /* Max. number of threads per workgroup */ ++ u32 max_barrier_size; /* Max. number of threads that can synchronize on a simple barrier */ ++ u16 max_registers; /* Total size [1..65535] of the register file available per core. */ ++ u8 max_task_queue; /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */ ++ u8 max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */ ++ u8 impl_tech; /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */ ++ u8 padding[7]; ++}; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++/** ++ * @brief descriptor for a coherent group ++ * ++ * \c core_mask exposes all cores in that coherent group, and \c num_cores ++ * provides a cached population-count for that mask. ++ * ++ * @note Whilst all cores are exposed in the mask, not all may be available to ++ * the application, depending on the Kernel Power policy. ++ * ++ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage. ++ */ ++struct mali_base_gpu_coherent_group { ++ u64 core_mask; /**< Core restriction mask required for the group */ ++ u16 num_cores; /**< Number of cores in the group */ ++ u16 padding[3]; ++}; + -+ ret = scnprintf(buf, PAGE_SIZE, "%zu\n", -+ kbase_mem_pool_max_size(&kbdev->mem_pool)); ++/** ++ * @brief Coherency group information ++ * ++ * Note that the sizes of the members could be reduced. However, the \c group ++ * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte ++ * aligned, thus leading to wastage if the other members sizes were reduced. ++ * ++ * The groups are sorted by core mask. The core masks are non-repeating and do ++ * not intersect. ++ */ ++struct mali_base_gpu_coherent_group_info { ++ u32 num_groups; + -+ return ret; -+} ++ /** ++ * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches. ++ * ++ * The GPU Counter dumping writes 2048 bytes per core group, regardless of ++ * whether the core groups are coherent or not. Hence this member is needed ++ * to calculate how much memory is required for dumping. ++ * ++ * @note Do not use it to work out how many valid elements are in the ++ * group[] member. Use num_groups instead. ++ */ ++ u32 num_core_groups; + -+static ssize_t set_mem_pool_max_size(struct device *dev, -+ struct device_attribute *attr, const char *buf, size_t count) -+{ -+ struct kbase_device *kbdev; -+ size_t new_max_size; -+ int err; ++ /** ++ * Coherency features of the memory, accessed by @ref gpu_mem_features ++ * methods ++ */ ++ u32 coherency; + -+ kbdev = to_kbase_device(dev); -+ if (!kbdev) -+ return -ENODEV; ++ u32 padding; + -+ err = kstrtoul(buf, 0, (unsigned long *)&new_max_size); -+ if (err) -+ return -EINVAL; ++ /** ++ * Descriptors of coherent groups ++ */ ++ struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS]; ++}; + -+ kbase_mem_pool_set_max_size(&kbdev->mem_pool, new_max_size); ++/** ++ * A complete description of the GPU's Hardware Configuration Discovery ++ * registers. ++ * ++ * The information is presented inefficiently for access. For frequent access, ++ * the values should be better expressed in an unpacked form in the ++ * base_gpu_props structure. ++ * ++ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to ++ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device ++ * behaving differently?". In this case, all information about the ++ * configuration is potentially useful, but it does not need to be processed ++ * by the driver. Instead, the raw registers can be processed by the Mali ++ * Tools software on the host PC. ++ * ++ */ ++struct gpu_raw_gpu_props { ++ u64 shader_present; ++ u64 tiler_present; ++ u64 l2_present; ++ u64 stack_present; + -+ return count; -+} ++ u32 l2_features; ++ u32 suspend_size; /* API 8.2+ */ ++ u32 mem_features; ++ u32 mmu_features; + -+static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size, -+ set_mem_pool_max_size); ++ u32 as_present; + -+#ifdef CONFIG_DEBUG_FS ++ u32 js_present; ++ u32 js_features[GPU_MAX_JOB_SLOTS]; ++ u32 tiler_features; ++ u32 texture_features[3]; + -+/* Number of entries in serialize_jobs_settings[] */ -+#define NR_SERIALIZE_JOBS_SETTINGS 5 -+/* Maximum string length in serialize_jobs_settings[].name */ -+#define MAX_SERIALIZE_JOBS_NAME_LEN 16 ++ u32 gpu_id; + -+static struct -+{ -+ char *name; -+ u8 setting; -+} serialize_jobs_settings[NR_SERIALIZE_JOBS_SETTINGS] = { -+ {"none", 0}, -+ {"intra-slot", KBASE_SERIALIZE_INTRA_SLOT}, -+ {"inter-slot", KBASE_SERIALIZE_INTER_SLOT}, -+ {"full", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT}, -+ {"full-reset", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT | -+ KBASE_SERIALIZE_RESET} ++ u32 thread_max_threads; ++ u32 thread_max_workgroup_size; ++ u32 thread_max_barrier_size; ++ u32 thread_features; ++ ++ /* ++ * Note: This is the _selected_ coherency mode rather than the ++ * available modes as exposed in the coherency_features register. ++ */ ++ u32 coherency_mode; +}; + +/** -+ * kbasep_serialize_jobs_seq_show - Show callback for the serialize_jobs debugfs -+ * file -+ * @sfile: seq_file pointer -+ * @data: Private callback data ++ * Return structure for _mali_base_get_gpu_props(). + * -+ * This function is called to get the contents of the serialize_jobs debugfs -+ * file. This is a list of the available settings with the currently active one -+ * surrounded by square brackets. ++ * NOTE: the raw_props member in this data structure contains the register ++ * values from which the value of the other members are derived. The derived ++ * members exist to allow for efficient access and/or shielding the details ++ * of the layout of the registers. + * -+ * Return: 0 on success, or an error code on error + */ -+static int kbasep_serialize_jobs_seq_show(struct seq_file *sfile, void *data) -+{ -+ struct kbase_device *kbdev = sfile->private; -+ int i; ++typedef struct mali_base_gpu_props { ++ struct mali_base_gpu_core_props core_props; ++ struct mali_base_gpu_l2_cache_props l2_props; ++ u64 unused_1; /* keep for backwards compatibility */ ++ struct mali_base_gpu_tiler_props tiler_props; ++ struct mali_base_gpu_thread_props thread_props; + -+ CSTD_UNUSED(data); ++ /** This member is large, likely to be 128 bytes */ ++ struct gpu_raw_gpu_props raw_props; + -+ for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { -+ if (kbdev->serialize_jobs == serialize_jobs_settings[i].setting) -+ seq_printf(sfile, "[%s] ", -+ serialize_jobs_settings[i].name); -+ else -+ seq_printf(sfile, "%s ", -+ serialize_jobs_settings[i].name); -+ } ++ /** This must be last member of the structure */ ++ struct mali_base_gpu_coherent_group_info coherency_info; ++} base_gpu_props; + -+ seq_puts(sfile, "\n"); ++/** @} end group base_user_api_gpuprops_dyn */ + -+ return 0; -+} ++/** @} end group base_user_api_gpuprops */ + +/** -+ * kbasep_serialize_jobs_debugfs_write - Store callback for the serialize_jobs -+ * debugfs file. -+ * @file: File pointer -+ * @ubuf: User buffer containing data to store -+ * @count: Number of bytes in user buffer -+ * @ppos: File position ++ * @addtogroup base_user_api_core User-side Base core APIs ++ * @{ ++ */ ++ ++/** ++ * \enum base_context_create_flags + * -+ * This function is called when the serialize_jobs debugfs file is written to. -+ * It matches the requested setting against the available settings and if a -+ * matching setting is found updates kbdev->serialize_jobs. ++ * Flags to pass to ::base_context_init. ++ * Flags can be ORed together to enable multiple things. + * -+ * Return: @count if the function succeeded. An error code on failure. ++ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must ++ * not collide with them. + */ -+static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file, -+ const char __user *ubuf, size_t count, loff_t *ppos) -+{ -+ struct seq_file *s = file->private_data; -+ struct kbase_device *kbdev = s->private; -+ char buf[MAX_SERIALIZE_JOBS_NAME_LEN]; -+ int i; -+ bool valid = false; ++enum base_context_create_flags { ++ /** No flags set */ ++ BASE_CONTEXT_CREATE_FLAG_NONE = 0, + -+ CSTD_UNUSED(ppos); ++ /** Base context is embedded in a cctx object (flag used for CINSTR software counter macros) */ ++ BASE_CONTEXT_CCTX_EMBEDDED = (1u << 0), + -+ count = min_t(size_t, sizeof(buf) - 1, count); -+ if (copy_from_user(buf, ubuf, count)) -+ return -EFAULT; ++ /** Base context is a 'System Monitor' context for Hardware counters. ++ * ++ * One important side effect of this is that job submission is disabled. */ ++ BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1) ++}; + -+ buf[count] = 0; ++/** ++ * Bitpattern describing the ::base_context_create_flags that can be passed to base_context_init() ++ */ ++#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \ ++ (((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \ ++ ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)) + -+ for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { -+ if (sysfs_streq(serialize_jobs_settings[i].name, buf)) { -+ kbdev->serialize_jobs = -+ serialize_jobs_settings[i].setting; -+ valid = true; -+ break; -+ } -+ } ++/** ++ * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel ++ */ ++#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \ ++ ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) + -+ if (!valid) { -+ dev_err(kbdev->dev, "serialize_jobs: invalid setting\n"); -+ return -EINVAL; -+ } ++/* ++ * Private flags used on the base context ++ * ++ * These start at bit 31, and run down to zero. ++ * ++ * They share the same space as @ref base_context_create_flags, and so must ++ * not collide with them. ++ */ ++/** Private flag tracking whether job descriptor dumping is disabled */ ++#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED ((u32)(1 << 31)) + -+ return count; -+} ++/** @} end group base_user_api_core */ ++ ++/** @} end group base_user_api */ + +/** -+ * kbasep_serialize_jobs_debugfs_open - Open callback for the serialize_jobs -+ * debugfs file -+ * @in: inode pointer -+ * @file: file pointer ++ * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties ++ * @{ + * -+ * Return: Zero on success, error code on failure ++ * C Pre-processor macros are exposed here to do with Platform ++ * Config. ++ * ++ * These include: ++ * - GPU Properties that are constant on a particular Midgard Family ++ * Implementation e.g. Maximum samples per pixel on Mali-T600. ++ * - General platform config for the GPU, such as the GPU major and minor ++ * revison. + */ -+static int kbasep_serialize_jobs_debugfs_open(struct inode *in, -+ struct file *file) -+{ -+ return single_open(file, kbasep_serialize_jobs_seq_show, in->i_private); -+} + -+static const struct file_operations kbasep_serialize_jobs_debugfs_fops = { -+ .open = kbasep_serialize_jobs_debugfs_open, -+ .read = seq_read, -+ .write = kbasep_serialize_jobs_debugfs_write, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; ++/** @} end group base_plat_config_gpuprops */ + -+#endif /* CONFIG_DEBUG_FS */ ++/** ++ * @addtogroup base_api Base APIs ++ * @{ ++ */ + -+static int kbasep_protected_mode_init(struct kbase_device *kbdev) -+{ -+#ifdef CONFIG_OF -+ struct device_node *protected_node; -+ struct platform_device *pdev; -+ struct protected_mode_device *protected_dev; -+#endif ++/** ++ * @brief The payload for a replay job. This must be in GPU memory. ++ */ ++typedef struct base_jd_replay_payload { ++ /** ++ * Pointer to the first entry in the base_jd_replay_jc list. These ++ * will be replayed in @b reverse order (so that extra ones can be added ++ * to the head in future soft jobs without affecting this soft job) ++ */ ++ u64 tiler_jc_list; + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { -+ /* Use native protected ops */ -+ kbdev->protected_dev = kzalloc(sizeof(*kbdev->protected_dev), -+ GFP_KERNEL); -+ if (!kbdev->protected_dev) -+ return -ENOMEM; -+ kbdev->protected_dev->data = kbdev; -+ kbdev->protected_ops = &kbase_native_protected_ops; -+ kbdev->protected_mode_support = true; -+ return 0; -+ } ++ /** ++ * Pointer to the fragment job chain. ++ */ ++ u64 fragment_jc; + -+ kbdev->protected_mode_support = false; ++ /** ++ * Pointer to the tiler heap free FBD field to be modified. ++ */ ++ u64 tiler_heap_free; + -+#ifdef CONFIG_OF -+ protected_node = of_parse_phandle(kbdev->dev->of_node, -+ "protected-mode-switcher", 0); ++ /** ++ * Hierarchy mask for the replayed fragment jobs. May be zero. ++ */ ++ u16 fragment_hierarchy_mask; + -+ if (!protected_node) -+ protected_node = of_parse_phandle(kbdev->dev->of_node, -+ "secure-mode-switcher", 0); ++ /** ++ * Hierarchy mask for the replayed tiler jobs. May be zero. ++ */ ++ u16 tiler_hierarchy_mask; + -+ if (!protected_node) { -+ /* If protected_node cannot be looked up then we assume -+ * protected mode is not supported on this platform. */ -+ dev_info(kbdev->dev, "Protected mode not available\n"); -+ return 0; -+ } ++ /** ++ * Default weight to be used for hierarchy levels not in the original ++ * mask. ++ */ ++ u32 hierarchy_default_weight; + -+ pdev = of_find_device_by_node(protected_node); -+ if (!pdev) -+ return -EINVAL; ++ /** ++ * Core requirements for the tiler job chain ++ */ ++ base_jd_core_req tiler_core_req; + -+ protected_dev = platform_get_drvdata(pdev); -+ if (!protected_dev) -+ return -EPROBE_DEFER; ++ /** ++ * Core requirements for the fragment job chain ++ */ ++ base_jd_core_req fragment_core_req; ++} base_jd_replay_payload; + -+ kbdev->protected_ops = &protected_dev->ops; -+ kbdev->protected_dev = protected_dev; ++#ifdef BASE_LEGACY_UK10_2_SUPPORT ++typedef struct base_jd_replay_payload_uk10_2 { ++ u64 tiler_jc_list; ++ u64 fragment_jc; ++ u64 tiler_heap_free; ++ u16 fragment_hierarchy_mask; ++ u16 tiler_hierarchy_mask; ++ u32 hierarchy_default_weight; ++ u16 tiler_core_req; ++ u16 fragment_core_req; ++ u8 padding[4]; ++} base_jd_replay_payload_uk10_2; ++#endif /* BASE_LEGACY_UK10_2_SUPPORT */ + -+ if (kbdev->protected_ops) { -+ int err; ++/** ++ * @brief An entry in the linked list of job chains to be replayed. This must ++ * be in GPU memory. ++ */ ++typedef struct base_jd_replay_jc { ++ /** ++ * Pointer to next entry in the list. A setting of NULL indicates the ++ * end of the list. ++ */ ++ u64 next; + -+ /* Make sure protected mode is disabled on startup */ -+ mutex_lock(&kbdev->pm.lock); -+ err = kbdev->protected_ops->protected_mode_disable( -+ kbdev->protected_dev); -+ mutex_unlock(&kbdev->pm.lock); ++ /** ++ * Pointer to the job chain. ++ */ ++ u64 jc; + -+ /* protected_mode_disable() returns -EINVAL if not supported */ -+ kbdev->protected_mode_support = (err != -EINVAL); -+ } -+#endif -+ return 0; -+} ++} base_jd_replay_jc; + -+static void kbasep_protected_mode_term(struct kbase_device *kbdev) -+{ -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) -+ kfree(kbdev->protected_dev); -+} ++/* Maximum number of jobs allowed in a fragment chain in the payload of a ++ * replay job */ ++#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256 + -+#ifdef CONFIG_MALI_NO_MALI -+static int kbase_common_reg_map(struct kbase_device *kbdev) -+{ -+ return 0; -+} -+static void kbase_common_reg_unmap(struct kbase_device * const kbdev) -+{ -+} -+#else /* CONFIG_MALI_NO_MALI */ -+static int kbase_common_reg_map(struct kbase_device *kbdev) -+{ -+ int err = -ENOMEM; ++/** @} end group base_api */ + -+ if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) { -+ dev_err(kbdev->dev, "Register window unavailable\n"); -+ err = -EIO; -+ goto out_region; -+ } ++typedef struct base_profiling_controls { ++ u32 profiling_controls[FBDUMP_CONTROL_MAX]; ++} base_profiling_controls; + -+ kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size); -+ if (!kbdev->reg) { -+ dev_err(kbdev->dev, "Can't remap register window\n"); -+ err = -EINVAL; -+ goto out_ioremap; -+ } ++/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, ++ * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */ ++#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) + -+ return 0; ++/* Indicate that job dumping is enabled. This could affect certain timers ++ * to account for the performance impact. */ ++#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) + -+ out_ioremap: -+ release_mem_region(kbdev->reg_start, kbdev->reg_size); -+ out_region: -+ return err; -+} ++#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ ++ BASE_TLSTREAM_JOB_DUMPING_ENABLED) + -+static void kbase_common_reg_unmap(struct kbase_device * const kbdev) -+{ -+ if (kbdev->reg) { -+ iounmap(kbdev->reg); -+ release_mem_region(kbdev->reg_start, kbdev->reg_size); -+ kbdev->reg = NULL; -+ kbdev->reg_start = 0; -+ kbdev->reg_size = 0; -+ } -+} -+#endif /* CONFIG_MALI_NO_MALI */ ++#endif /* _BASE_KERNEL_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/drivers/gpu/arm/midgard/mali_base_mem_priv.h +new file mode 100644 +index 000000000..4a98a72cc +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_base_mem_priv.h +@@ -0,0 +1,52 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+static int registers_map(struct kbase_device * const kbdev) -+{ + -+ /* the first memory resource is the physical address of the GPU -+ * registers */ -+ struct platform_device *pdev = to_platform_device(kbdev->dev); -+ struct resource *reg_res; -+ int err; + -+ reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); -+ if (!reg_res) { -+ dev_err(kbdev->dev, "Invalid register resource\n"); -+ return -ENOENT; -+ } + -+ kbdev->reg_start = reg_res->start; -+ kbdev->reg_size = resource_size(reg_res); + -+ err = kbase_common_reg_map(kbdev); -+ if (err) { -+ dev_err(kbdev->dev, "Failed to map registers\n"); -+ return err; -+ } ++#ifndef _BASE_MEM_PRIV_H_ ++#define _BASE_MEM_PRIV_H_ + -+ return 0; -+} ++#define BASE_SYNCSET_OP_MSYNC (1U << 0) ++#define BASE_SYNCSET_OP_CSYNC (1U << 1) + -+static void registers_unmap(struct kbase_device *kbdev) -+{ -+ kbase_common_reg_unmap(kbdev); -+} ++/* ++ * This structure describe a basic memory coherency operation. ++ * It can either be: ++ * @li a sync from CPU to Memory: ++ * - type = ::BASE_SYNCSET_OP_MSYNC ++ * - mem_handle = a handle to the memory object on which the operation ++ * is taking place ++ * - user_addr = the address of the range to be synced ++ * - size = the amount of data to be synced, in bytes ++ * - offset is ignored. ++ * @li a sync from Memory to CPU: ++ * - type = ::BASE_SYNCSET_OP_CSYNC ++ * - mem_handle = a handle to the memory object on which the operation ++ * is taking place ++ * - user_addr = the address of the range to be synced ++ * - size = the amount of data to be synced, in bytes. ++ * - offset is ignored. ++ */ ++struct basep_syncset { ++ base_mem_handle mem_handle; ++ u64 user_addr; ++ u64 size; ++ u8 type; ++ u8 padding[7]; ++}; + -+static int power_control_init(struct platform_device *pdev) -+{ -+ struct kbase_device *kbdev = to_kbase_device(&pdev->dev); -+ int err = 0; ++#endif +diff --git a/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h b/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h +new file mode 100644 +index 000000000..be454a216 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h +@@ -0,0 +1,24 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ if (!kbdev) -+ return -ENODEV; + -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ -+ && defined(CONFIG_REGULATOR) -+ kbdev->regulator = regulator_get_optional(kbdev->dev, "mali"); -+ if (IS_ERR_OR_NULL(kbdev->regulator)) { -+ err = PTR_ERR(kbdev->regulator); -+ kbdev->regulator = NULL; -+ if (err == -EPROBE_DEFER) { -+ dev_err(&pdev->dev, "Failed to get regulator\n"); -+ return err; -+ } -+ dev_info(kbdev->dev, -+ "Continuing without Mali regulator control\n"); -+ /* Allow probe to continue without regulator */ -+ } -+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ + -+ kbdev->clock = clk_get(kbdev->dev, "clk_mali"); -+ if (IS_ERR_OR_NULL(kbdev->clock)) { -+ err = PTR_ERR(kbdev->clock); -+ kbdev->clock = NULL; -+ if (err == -EPROBE_DEFER) { -+ dev_err(&pdev->dev, "Failed to get clock\n"); -+ goto fail; -+ } -+ dev_info(kbdev->dev, "Continuing without Mali clock control\n"); -+ /* Allow probe to continue without clock. */ -+ } else { -+ err = clk_prepare(kbdev->clock); -+ if (err) { -+ dev_err(kbdev->dev, -+ "Failed to prepare and enable clock (%d)\n", -+ err); -+ goto fail; -+ } -+ } + -+ err = kbase_platform_rk_init_opp_table(kbdev); -+ if (err) -+ dev_err(kbdev->dev, "Failed to init_opp_table (%d)\n", err); ++#ifndef _BASE_VENDOR_SPEC_FUNC_H_ ++#define _BASE_VENDOR_SPEC_FUNC_H_ + -+ return 0; ++int kbase_get_vendor_specific_cpu_clock_speed(u32 * const); + -+fail: ++#endif /*_BASE_VENDOR_SPEC_FUNC_H_*/ +diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h +new file mode 100644 +index 000000000..0d9bf23dc +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase.h +@@ -0,0 +1,612 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+if (kbdev->clock != NULL) { -+ clk_put(kbdev->clock); -+ kbdev->clock = NULL; -+} + -+#ifdef CONFIG_REGULATOR -+ if (NULL != kbdev->regulator) { -+ regulator_put(kbdev->regulator); -+ kbdev->regulator = NULL; -+ } -+#endif + -+ return err; -+} + -+static void power_control_term(struct kbase_device *kbdev) -+{ -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) || \ -+ defined(LSK_OPPV2_BACKPORT) -+ dev_pm_opp_of_remove_table(kbdev->dev); -+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) -+ of_free_opp_table(kbdev->dev); -+#endif + -+ if (kbdev->clock) { -+ clk_unprepare(kbdev->clock); -+ clk_put(kbdev->clock); -+ kbdev->clock = NULL; -+ } ++#ifndef _KBASE_H_ ++#define _KBASE_H_ + -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ -+ && defined(CONFIG_REGULATOR) -+ if (kbdev->regulator) { -+ regulator_put(kbdev->regulator); -+ kbdev->regulator = NULL; -+ } -+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ -+} ++#include + -+#ifdef CONFIG_DEBUG_FS ++#include + -+#if KBASE_GPU_RESET_EN -+#include ++#include + -+static void trigger_quirks_reload(struct kbase_device *kbdev) -+{ -+ kbase_pm_context_active(kbdev); -+ if (kbase_prepare_to_reset_gpu(kbdev)) -+ kbase_reset_gpu(kbdev); -+ kbase_pm_context_idle(kbdev); -+} ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+#define MAKE_QUIRK_ACCESSORS(type) \ -+static int type##_quirks_set(void *data, u64 val) \ -+{ \ -+ struct kbase_device *kbdev; \ -+ kbdev = (struct kbase_device *)data; \ -+ kbdev->hw_quirks_##type = (u32)val; \ -+ trigger_quirks_reload(kbdev); \ -+ return 0;\ -+} \ -+\ -+static int type##_quirks_get(void *data, u64 *val) \ -+{ \ -+ struct kbase_device *kbdev;\ -+ kbdev = (struct kbase_device *)data;\ -+ *val = kbdev->hw_quirks_##type;\ -+ return 0;\ -+} \ -+DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\ -+ type##_quirks_set, "%llu\n") ++#include "mali_base_kernel.h" ++#include ++#include + -+MAKE_QUIRK_ACCESSORS(sc); -+MAKE_QUIRK_ACCESSORS(tiler); -+MAKE_QUIRK_ACCESSORS(mmu); -+MAKE_QUIRK_ACCESSORS(jm); ++/* ++ * Include mali_kbase_defs.h first as this provides types needed by other local ++ * header files. ++ */ ++#include "mali_kbase_defs.h" + -+#endif /* KBASE_GPU_RESET_EN */ ++#include "mali_kbase_context.h" ++#include "mali_kbase_strings.h" ++#include "mali_kbase_mem_lowlevel.h" ++#include "mali_kbase_trace_timeline.h" ++#include "mali_kbase_js.h" ++#include "mali_kbase_mem.h" ++#include "mali_kbase_utility.h" ++#include "mali_kbase_gpu_memory_debugfs.h" ++#include "mali_kbase_mem_profile_debugfs.h" ++#include "mali_kbase_debug_job_fault.h" ++#include "mali_kbase_jd_debugfs.h" ++#include "mali_kbase_gpuprops.h" ++#include "mali_kbase_jm.h" ++#include "mali_kbase_vinstr.h" ++ ++#include "ipa/mali_kbase_ipa.h" + ++#ifdef CONFIG_GPU_TRACEPOINTS ++#include ++#endif +/** -+ * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read -+ * @file: File object to read is for -+ * @buf: User buffer to populate with data -+ * @len: Length of user buffer -+ * @ppos: Offset within file object -+ * -+ * Retrieves the current status of protected debug mode -+ * (0 = disabled, 1 = enabled) -+ * -+ * Return: Number of bytes added to user buffer ++ * @page page_base_kernel_main Kernel-side Base (KBase) APIs + */ -+static ssize_t debugfs_protected_debug_mode_read(struct file *file, -+ char __user *buf, size_t len, loff_t *ppos) -+{ -+ struct kbase_device *kbdev = (struct kbase_device *)file->private_data; -+ u32 gpu_status; -+ ssize_t ret_val; + -+ kbase_pm_context_active(kbdev); -+ gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL); -+ kbase_pm_context_idle(kbdev); -+ -+ if (gpu_status & GPU_DBGEN) -+ ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2); -+ else -+ ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2); ++/** ++ * @defgroup base_kbase_api Kernel-side Base (KBase) APIs ++ */ + -+ return ret_val; -+} ++struct kbase_device *kbase_device_alloc(void); ++/* ++* note: configuration attributes member of kbdev needs to have ++* been setup before calling kbase_device_init ++*/ + +/* -+ * struct fops_protected_debug_mode - "protected_debug_mode" debugfs fops -+ * -+ * Contains the file operations for the "protected_debug_mode" debugfs file -+ */ -+static const struct file_operations fops_protected_debug_mode = { -+ .open = simple_open, -+ .read = debugfs_protected_debug_mode_read, -+ .llseek = default_llseek, -+}; ++* API to acquire device list semaphore and return pointer ++* to the device list head ++*/ ++const struct list_head *kbase_dev_list_get(void); ++/* API to release the device list semaphore */ ++void kbase_dev_list_put(const struct list_head *dev_list); + -+static int kbase_device_debugfs_init(struct kbase_device *kbdev) -+{ -+ struct dentry *debugfs_ctx_defaults_directory; -+ int err; ++int kbase_device_init(struct kbase_device * const kbdev); ++void kbase_device_term(struct kbase_device *kbdev); ++void kbase_device_free(struct kbase_device *kbdev); ++int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature); + -+ kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname, -+ NULL); -+ if (!kbdev->mali_debugfs_directory) { -+ dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n"); -+ err = -ENOMEM; -+ goto out; -+ } ++/* Needed for gator integration and for reporting vsync information */ ++struct kbase_device *kbase_find_device(int minor); ++void kbase_release_device(struct kbase_device *kbdev); + -+ kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx", -+ kbdev->mali_debugfs_directory); -+ if (!kbdev->debugfs_ctx_directory) { -+ dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n"); -+ err = -ENOMEM; -+ goto out; -+ } ++void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value); + -+ debugfs_ctx_defaults_directory = debugfs_create_dir("defaults", -+ kbdev->debugfs_ctx_directory); -+ if (!debugfs_ctx_defaults_directory) { -+ dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n"); -+ err = -ENOMEM; -+ goto out; -+ } ++struct kbase_context * ++kbase_create_context(struct kbase_device *kbdev, bool is_compat); ++void kbase_destroy_context(struct kbase_context *kctx); + -+#if !MALI_CUSTOMER_RELEASE -+ kbasep_regs_dump_debugfs_init(kbdev); -+#endif /* !MALI_CUSTOMER_RELEASE */ -+ kbasep_regs_history_debugfs_init(kbdev); ++int kbase_jd_init(struct kbase_context *kctx); ++void kbase_jd_exit(struct kbase_context *kctx); + -+ kbase_debug_job_fault_debugfs_init(kbdev); -+ kbasep_gpu_memory_debugfs_init(kbdev); -+ kbase_as_fault_debugfs_init(kbdev); -+#if KBASE_GPU_RESET_EN -+ /* fops_* variables created by invocations of macro -+ * MAKE_QUIRK_ACCESSORS() above. */ -+ debugfs_create_file("quirks_sc", 0644, -+ kbdev->mali_debugfs_directory, kbdev, -+ &fops_sc_quirks); -+ debugfs_create_file("quirks_tiler", 0644, -+ kbdev->mali_debugfs_directory, kbdev, -+ &fops_tiler_quirks); -+ debugfs_create_file("quirks_mmu", 0644, -+ kbdev->mali_debugfs_directory, kbdev, -+ &fops_mmu_quirks); -+ debugfs_create_file("quirks_jm", 0644, -+ kbdev->mali_debugfs_directory, kbdev, -+ &fops_jm_quirks); -+#endif /* KBASE_GPU_RESET_EN */ ++/** ++ * kbase_jd_submit - Submit atoms to the job dispatcher ++ * ++ * @kctx: The kbase context to submit to ++ * @user_addr: The address in user space of the struct base_jd_atom_v2 array ++ * @nr_atoms: The number of atoms in the array ++ * @stride: sizeof(struct base_jd_atom_v2) ++ * @uk6_atom: true if the atoms are legacy atoms (struct base_jd_atom_v2_uk6) ++ * ++ * Return: 0 on success or error code ++ */ ++int kbase_jd_submit(struct kbase_context *kctx, ++ void __user *user_addr, u32 nr_atoms, u32 stride, ++ bool uk6_atom); + -+#ifndef CONFIG_MALI_COH_USER -+ debugfs_create_bool("infinite_cache", 0644, -+ debugfs_ctx_defaults_directory, -+ (bool*)&(kbdev->infinite_cache_active_default)); -+#endif /* CONFIG_MALI_COH_USER */ ++/** ++ * kbase_jd_done_worker - Handle a job completion ++ * @data: a &struct work_struct ++ * ++ * This function requeues the job from the runpool (if it was soft-stopped or ++ * removed from NEXT registers). ++ * ++ * Removes it from the system if it finished/failed/was cancelled. ++ * ++ * Resolves dependencies to add dependent jobs to the context, potentially ++ * starting them if necessary (which may add more references to the context) ++ * ++ * Releases the reference to the context from the no-longer-running job. ++ * ++ * Handles retrying submission outside of IRQ context if it failed from within ++ * IRQ context. ++ */ ++void kbase_jd_done_worker(struct work_struct *data); + -+ debugfs_create_size_t("mem_pool_max_size", 0644, -+ debugfs_ctx_defaults_directory, -+ &kbdev->mem_pool_max_size_default); ++void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp, ++ kbasep_js_atom_done_code done_code); ++void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom); ++void kbase_jd_zap_context(struct kbase_context *kctx); ++bool jd_done_nolock(struct kbase_jd_atom *katom, ++ struct list_head *completed_jobs_ctx); ++void kbase_jd_free_external_resources(struct kbase_jd_atom *katom); ++bool jd_submit_atom(struct kbase_context *kctx, ++ const struct base_jd_atom_v2 *user_atom, ++ struct kbase_jd_atom *katom); ++void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom); + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { -+ debugfs_create_file("protected_debug_mode", S_IRUGO, -+ kbdev->mali_debugfs_directory, kbdev, -+ &fops_protected_debug_mode); -+ } ++void kbase_job_done(struct kbase_device *kbdev, u32 done); + -+#if KBASE_TRACE_ENABLE -+ kbasep_trace_debugfs_init(kbdev); -+#endif /* KBASE_TRACE_ENABLE */ ++/** ++ * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms ++ * and soft stop them ++ * @kctx: Pointer to context to check. ++ * @katom: Pointer to priority atom. ++ * ++ * Atoms from @kctx on the same job slot as @katom, which have lower priority ++ * than @katom will be soft stopped and put back in the queue, so that atoms ++ * with higher priority can run. ++ * ++ * The hwaccess_lock must be held when calling this function. ++ */ ++void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, ++ struct kbase_jd_atom *katom); + -+#ifdef CONFIG_MALI_TRACE_TIMELINE -+ kbasep_trace_timeline_debugfs_init(kbdev); -+#endif /* CONFIG_MALI_TRACE_TIMELINE */ ++void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, ++ struct kbase_jd_atom *target_katom); ++void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, ++ struct kbase_jd_atom *target_katom, u32 sw_flags); ++void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, ++ struct kbase_jd_atom *target_katom); ++void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, ++ base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom); ++void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, ++ struct kbase_jd_atom *target_katom); + -+#ifdef CONFIG_MALI_DEVFREQ -+#ifdef CONFIG_DEVFREQ_THERMAL -+ if ((kbdev->inited_subsys & inited_devfreq) && kbdev->devfreq_cooling) -+ kbase_ipa_debugfs_init(kbdev); -+#endif /* CONFIG_DEVFREQ_THERMAL */ -+#endif /* CONFIG_MALI_DEVFREQ */ ++void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event); ++int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent); ++int kbase_event_pending(struct kbase_context *ctx); ++int kbase_event_init(struct kbase_context *kctx); ++void kbase_event_close(struct kbase_context *kctx); ++void kbase_event_cleanup(struct kbase_context *kctx); ++void kbase_event_wakeup(struct kbase_context *kctx); + -+#ifdef CONFIG_DEBUG_FS -+ debugfs_create_file("serialize_jobs", S_IRUGO | S_IWUSR, -+ kbdev->mali_debugfs_directory, kbdev, -+ &kbasep_serialize_jobs_debugfs_fops); -+#endif /* CONFIG_DEBUG_FS */ ++int kbase_process_soft_job(struct kbase_jd_atom *katom); ++int kbase_prepare_soft_job(struct kbase_jd_atom *katom); ++void kbase_finish_soft_job(struct kbase_jd_atom *katom); ++void kbase_cancel_soft_job(struct kbase_jd_atom *katom); ++void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev); ++void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom); ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom); ++#endif ++int kbase_soft_event_update(struct kbase_context *kctx, ++ u64 event, ++ unsigned char new_status); + -+ return 0; ++bool kbase_replay_process(struct kbase_jd_atom *katom); + -+out: -+ debugfs_remove_recursive(kbdev->mali_debugfs_directory); -+ return err; -+} ++void kbasep_soft_job_timeout_worker(struct timer_list *t); ++void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt); + -+static void kbase_device_debugfs_term(struct kbase_device *kbdev) ++/* api used internally for register access. Contains validation and tracing */ ++void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value); ++int kbase_device_trace_buffer_install( ++ struct kbase_context *kctx, u32 *tb, size_t size); ++void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx); ++ ++/* api to be ported per OS, only need to do the raw register access */ ++void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value); ++u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset); ++ ++void kbasep_as_do_poke(struct work_struct *work); ++ ++/** Returns the name associated with a Mali exception code ++ * ++ * This function is called from the interrupt handler when a GPU fault occurs. ++ * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN. ++ * ++ * @param[in] kbdev The kbase device that the GPU fault occurred from. ++ * @param[in] exception_code exception code ++ * @return name associated with the exception code ++ */ ++const char *kbase_exception_name(struct kbase_device *kbdev, ++ u32 exception_code); ++ ++/** ++ * Check whether a system suspend is in progress, or has already been suspended ++ * ++ * The caller should ensure that either kbdev->pm.active_count_lock is held, or ++ * a dmb was executed recently (to ensure the value is most ++ * up-to-date). However, without a lock the value could change afterwards. ++ * ++ * @return false if a suspend is not in progress ++ * @return !=false otherwise ++ */ ++static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev) +{ -+ debugfs_remove_recursive(kbdev->mali_debugfs_directory); ++ return kbdev->pm.suspending; +} + -+#else /* CONFIG_DEBUG_FS */ -+static inline int kbase_device_debugfs_init(struct kbase_device *kbdev) ++/** ++ * Return the atom's ID, as was originally supplied by userspace in ++ * base_jd_atom_v2::atom_number ++ */ ++static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ -+ return 0; -+} ++ int result; + -+static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { } -+#endif /* CONFIG_DEBUG_FS */ ++ KBASE_DEBUG_ASSERT(kctx); ++ KBASE_DEBUG_ASSERT(katom); ++ KBASE_DEBUG_ASSERT(katom->kctx == kctx); + -+static void kbase_device_coherency_init(struct kbase_device *kbdev, -+ unsigned prod_id) ++ result = katom - &kctx->jctx.atoms[0]; ++ KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT); ++ return result; ++} ++ ++/** ++ * kbase_jd_atom_from_id - Return the atom structure for the given atom ID ++ * @kctx: Context pointer ++ * @id: ID of atom to retrieve ++ * ++ * Return: Pointer to struct kbase_jd_atom associated with the supplied ID ++ */ ++static inline struct kbase_jd_atom *kbase_jd_atom_from_id( ++ struct kbase_context *kctx, int id) +{ -+#ifdef CONFIG_OF -+ u32 supported_coherency_bitmap = -+ kbdev->gpu_props.props.raw_props.coherency_mode; -+ const void *coherency_override_dts; -+ u32 override_coherency; ++ return &kctx->jctx.atoms[id]; ++} + -+ /* Only for tMIx : -+ * (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly -+ * documented for tMIx so force correct value here. -+ */ -+ if (GPU_ID_IS_NEW_FORMAT(prod_id) && -+ (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == -+ GPU_ID2_PRODUCT_TMIX)) -+ if (supported_coherency_bitmap == -+ COHERENCY_FEATURE_BIT(COHERENCY_ACE)) -+ supported_coherency_bitmap |= -+ COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); ++/** ++ * Initialize the disjoint state ++ * ++ * The disjoint event count and state are both set to zero. ++ * ++ * Disjoint functions usage: ++ * ++ * The disjoint event count should be incremented whenever a disjoint event occurs. ++ * ++ * There are several cases which are regarded as disjoint behavior. Rather than just increment ++ * the counter during disjoint events we also increment the counter when jobs may be affected ++ * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state. ++ * ++ * Disjoint state is entered during GPU reset and for the entire time that an atom is replaying ++ * (as part of the replay workaround). Increasing the disjoint state also increases the count of ++ * disjoint events. ++ * ++ * The disjoint state is then used to increase the count of disjoint events during job submission ++ * and job completion. Any atom submitted or completed while the disjoint state is greater than ++ * zero is regarded as a disjoint event. ++ * ++ * The disjoint event counter is also incremented immediately whenever a job is soft stopped ++ * and during context creation. ++ * ++ * @param kbdev The kbase device ++ */ ++void kbase_disjoint_init(struct kbase_device *kbdev); + -+#endif /* CONFIG_OF */ ++/** ++ * Increase the count of disjoint events ++ * called when a disjoint event has happened ++ * ++ * @param kbdev The kbase device ++ */ ++void kbase_disjoint_event(struct kbase_device *kbdev); + -+ kbdev->system_coherency = COHERENCY_NONE; ++/** ++ * Increase the count of disjoint events only if the GPU is in a disjoint state ++ * ++ * This should be called when something happens which could be disjoint if the GPU ++ * is in a disjoint state. The state refcount keeps track of this. ++ * ++ * @param kbdev The kbase device ++ */ ++void kbase_disjoint_event_potential(struct kbase_device *kbdev); + -+ /* device tree may override the coherency */ -+#ifdef CONFIG_OF -+ coherency_override_dts = of_get_property(kbdev->dev->of_node, -+ "system-coherency", -+ NULL); -+ if (coherency_override_dts) { ++/** ++ * Returns the count of disjoint events ++ * ++ * @param kbdev The kbase device ++ * @return the count of disjoint events ++ */ ++u32 kbase_disjoint_event_get(struct kbase_device *kbdev); + -+ override_coherency = be32_to_cpup(coherency_override_dts); ++/** ++ * Increment the refcount state indicating that the GPU is in a disjoint state. ++ * ++ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event) ++ * eventually after the disjoint state has completed @ref kbase_disjoint_state_down ++ * should be called ++ * ++ * @param kbdev The kbase device ++ */ ++void kbase_disjoint_state_up(struct kbase_device *kbdev); + -+ if ((override_coherency <= COHERENCY_NONE) && -+ (supported_coherency_bitmap & -+ COHERENCY_FEATURE_BIT(override_coherency))) { ++/** ++ * Decrement the refcount state ++ * ++ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event) ++ * ++ * Called after @ref kbase_disjoint_state_up once the disjoint state is over ++ * ++ * @param kbdev The kbase device ++ */ ++void kbase_disjoint_state_down(struct kbase_device *kbdev); + -+ kbdev->system_coherency = override_coherency; ++/** ++ * If a job is soft stopped and the number of contexts is >= this value ++ * it is reported as a disjoint event ++ */ ++#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2 + -+ dev_info(kbdev->dev, -+ "Using coherency mode %u set from dtb", -+ override_coherency); -+ } else -+ dev_warn(kbdev->dev, -+ "Ignoring unsupported coherency mode %u set from dtb", -+ override_coherency); -+ } ++#if !defined(UINT64_MAX) ++ #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) ++#endif + -+#endif /* CONFIG_OF */ ++#if KBASE_TRACE_ENABLE ++void kbasep_trace_debugfs_init(struct kbase_device *kbdev); + -+ kbdev->gpu_props.props.raw_props.coherency_mode = -+ kbdev->system_coherency; -+} ++#ifndef CONFIG_MALI_SYSTEM_TRACE ++/** Add trace values about a job-slot ++ * ++ * @note Any functions called through this macro will still be evaluated in ++ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any ++ * functions called to get the parameters supplied to this macro must: ++ * - be static or static inline ++ * - must just return 0 and have no other statements present in the body. ++ */ ++#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \ ++ kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ ++ KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0) + -+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER ++/** Add trace values about a job-slot, with info ++ * ++ * @note Any functions called through this macro will still be evaluated in ++ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any ++ * functions called to get the parameters supplied to this macro must: ++ * - be static or static inline ++ * - must just return 0 and have no other statements present in the body. ++ */ ++#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \ ++ kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ ++ KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val) + -+/* Callback used by the kbase bus logger client, to initiate a GPU reset -+ * when the bus log is restarted. GPU reset is used as reference point -+ * in HW bus log analyses. ++/** Add trace values about a ctx refcount ++ * ++ * @note Any functions called through this macro will still be evaluated in ++ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any ++ * functions called to get the parameters supplied to this macro must: ++ * - be static or static inline ++ * - must just return 0 and have no other statements present in the body. + */ -+static void kbase_logging_started_cb(void *data) -+{ -+ struct kbase_device *kbdev = (struct kbase_device *)data; ++#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \ ++ kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ ++ KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0) ++/** Add trace values about a ctx refcount, and info ++ * ++ * @note Any functions called through this macro will still be evaluated in ++ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any ++ * functions called to get the parameters supplied to this macro must: ++ * - be static or static inline ++ * - must just return 0 and have no other statements present in the body. ++ */ ++#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \ ++ kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ ++ KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val) + -+ if (kbase_prepare_to_reset_gpu(kbdev)) -+ kbase_reset_gpu(kbdev); -+ dev_info(kbdev->dev, "KBASE - Bus logger restarted\n"); -+} -+#endif ++/** Add trace values (no slot or refcount) ++ * ++ * @note Any functions called through this macro will still be evaluated in ++ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any ++ * functions called to get the parameters supplied to this macro must: ++ * - be static or static inline ++ * - must just return 0 and have no other statements present in the body. ++ */ ++#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val) \ ++ kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ ++ 0, 0, 0, info_val) + -+static struct attribute *kbase_attrs[] = { -+#ifdef CONFIG_MALI_DEBUG -+ &dev_attr_debug_command.attr, -+ &dev_attr_js_softstop_always.attr, -+#endif -+#if !MALI_CUSTOMER_RELEASE -+ &dev_attr_force_replay.attr, -+#endif -+ &dev_attr_js_timeouts.attr, -+ &dev_attr_soft_job_timeout.attr, -+ &dev_attr_gpuinfo.attr, -+ &dev_attr_dvfs_period.attr, -+ &dev_attr_pm_poweroff.attr, -+ &dev_attr_reset_timeout.attr, -+ &dev_attr_js_scheduling_period.attr, -+ &dev_attr_power_policy.attr, -+ &dev_attr_core_availability_policy.attr, -+ &dev_attr_core_mask.attr, -+ &dev_attr_mem_pool_size.attr, -+ &dev_attr_mem_pool_max_size.attr, -+ NULL -+}; ++/** Clear the trace */ ++#define KBASE_TRACE_CLEAR(kbdev) \ ++ kbasep_trace_clear(kbdev) + -+static const struct attribute_group kbase_attr_group = { -+ .attrs = kbase_attrs, -+}; ++/** Dump the slot trace */ ++#define KBASE_TRACE_DUMP(kbdev) \ ++ kbasep_trace_dump(kbdev) + -+static int kbase_platform_device_remove(struct platform_device *pdev) -+{ -+ struct kbase_device *kbdev = to_kbase_device(&pdev->dev); -+ const struct list_head *dev_list; ++/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */ ++void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val); ++/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */ ++void kbasep_trace_clear(struct kbase_device *kbdev); ++#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */ ++/* Dispatch kbase trace events as system trace events */ ++#include ++#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\ ++ trace_mali_##code(jobslot, 0) + -+ if (!kbdev) -+ return -ENODEV; ++#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\ ++ trace_mali_##code(jobslot, info_val) + -+ kfree(kbdev->gpu_props.prop_buffer); ++#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\ ++ trace_mali_##code(refcount, 0) + -+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER -+ if (kbdev->inited_subsys & inited_buslogger) { -+ bl_core_client_unregister(kbdev->buslogger); -+ kbdev->inited_subsys &= ~inited_buslogger; -+ } -+#endif ++#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\ ++ trace_mali_##code(refcount, info_val) + ++#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\ ++ trace_mali_##code(gpu_addr, info_val) + -+ if (kbdev->inited_subsys & inited_dev_list) { -+ dev_list = kbase_dev_list_get(); -+ list_del(&kbdev->entry); -+ kbase_dev_list_put(dev_list); -+ kbdev->inited_subsys &= ~inited_dev_list; -+ } ++#define KBASE_TRACE_CLEAR(kbdev)\ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(0);\ ++ } while (0) ++#define KBASE_TRACE_DUMP(kbdev)\ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(0);\ ++ } while (0) + -+ if (kbdev->inited_subsys & inited_misc_register) { -+ misc_deregister(&kbdev->mdev); -+ kbdev->inited_subsys &= ~inited_misc_register; -+ } ++#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */ ++#else ++#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(code);\ ++ CSTD_UNUSED(ctx);\ ++ CSTD_UNUSED(katom);\ ++ CSTD_UNUSED(gpu_addr);\ ++ CSTD_UNUSED(jobslot);\ ++ } while (0) + -+ if (kbdev->inited_subsys & inited_sysfs_group) { -+ sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); -+ kbdev->inited_subsys &= ~inited_sysfs_group; -+ } ++#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(code);\ ++ CSTD_UNUSED(ctx);\ ++ CSTD_UNUSED(katom);\ ++ CSTD_UNUSED(gpu_addr);\ ++ CSTD_UNUSED(jobslot);\ ++ CSTD_UNUSED(info_val);\ ++ CSTD_NOP(0);\ ++ } while (0) + -+ if (kbdev->inited_subsys & inited_get_device) { -+ put_device(kbdev->dev); -+ kbdev->inited_subsys &= ~inited_get_device; -+ } ++#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(code);\ ++ CSTD_UNUSED(ctx);\ ++ CSTD_UNUSED(katom);\ ++ CSTD_UNUSED(gpu_addr);\ ++ CSTD_UNUSED(refcount);\ ++ CSTD_NOP(0);\ ++ } while (0) + -+ if (kbdev->inited_subsys & inited_debugfs) { -+ kbase_device_debugfs_term(kbdev); -+ kbdev->inited_subsys &= ~inited_debugfs; -+ } ++#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(code);\ ++ CSTD_UNUSED(ctx);\ ++ CSTD_UNUSED(katom);\ ++ CSTD_UNUSED(gpu_addr);\ ++ CSTD_UNUSED(info_val);\ ++ CSTD_NOP(0);\ ++ } while (0) + -+ if (kbdev->inited_subsys & inited_job_fault) { -+ kbase_debug_job_fault_dev_term(kbdev); -+ kbdev->inited_subsys &= ~inited_job_fault; -+ } -+ if (kbdev->inited_subsys & inited_vinstr) { -+ kbase_vinstr_term(kbdev->vinstr_ctx); -+ kbdev->inited_subsys &= ~inited_vinstr; -+ } ++#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(code);\ ++ CSTD_UNUSED(subcode);\ ++ CSTD_UNUSED(ctx);\ ++ CSTD_UNUSED(katom);\ ++ CSTD_UNUSED(val);\ ++ CSTD_NOP(0);\ ++ } while (0) + -+#ifdef CONFIG_MALI_DEVFREQ -+ if (kbdev->inited_subsys & inited_devfreq) { -+ kbase_devfreq_term(kbdev); -+ kbdev->inited_subsys &= ~inited_devfreq; -+ } -+#endif ++#define KBASE_TRACE_CLEAR(kbdev)\ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(0);\ ++ } while (0) ++#define KBASE_TRACE_DUMP(kbdev)\ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(0);\ ++ } while (0) ++#endif /* KBASE_TRACE_ENABLE */ ++/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */ ++void kbasep_trace_dump(struct kbase_device *kbdev); + -+ if (kbdev->inited_subsys & inited_backend_late) { -+ kbase_backend_late_term(kbdev); -+ kbdev->inited_subsys &= ~inited_backend_late; -+ } ++#ifdef CONFIG_MALI_DEBUG ++/** ++ * kbase_set_driver_inactive - Force driver to go inactive ++ * @kbdev: Device pointer ++ * @inactive: true if driver should go inactive, false otherwise ++ * ++ * Forcing the driver inactive will cause all future IOCTLs to wait until the ++ * driver is made active again. This is intended solely for the use of tests ++ * which require that no jobs are running while the test executes. ++ */ ++void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive); ++#endif /* CONFIG_MALI_DEBUG */ + -+ if (kbdev->inited_subsys & inited_tlstream) { -+ kbase_tlstream_term(); -+ kbdev->inited_subsys &= ~inited_tlstream; -+ } + -+ /* Bring job and mem sys to a halt before we continue termination */ ++#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) + -+ if (kbdev->inited_subsys & inited_js) -+ kbasep_js_devdata_halt(kbdev); ++/* kbase_io_history_init - initialize data struct for register access history ++ * ++ * @kbdev The register history to initialize ++ * @n The number of register accesses that the buffer could hold ++ * ++ * @return 0 if successfully initialized, failure otherwise ++ */ ++int kbase_io_history_init(struct kbase_io_history *h, u16 n); + -+ if (kbdev->inited_subsys & inited_mem) -+ kbase_mem_halt(kbdev); ++/* kbase_io_history_term - uninit all resources for the register access history ++ * ++ * @h The register history to terminate ++ */ ++void kbase_io_history_term(struct kbase_io_history *h); + -+ if (kbdev->inited_subsys & inited_protected) { -+ kbasep_protected_mode_term(kbdev); -+ kbdev->inited_subsys &= ~inited_protected; -+ } ++/* kbase_io_history_dump - print the register history to the kernel ring buffer ++ * ++ * @kbdev Pointer to kbase_device containing the register history to dump ++ */ ++void kbase_io_history_dump(struct kbase_device *kbdev); + -+ if (kbdev->inited_subsys & inited_js) { -+ kbasep_js_devdata_term(kbdev); -+ kbdev->inited_subsys &= ~inited_js; -+ } ++/** ++ * kbase_io_history_resize - resize the register access history buffer. ++ * ++ * @h: Pointer to a valid register history to resize ++ * @new_size: Number of accesses the buffer could hold ++ * ++ * A successful resize will clear all recent register accesses. ++ * If resizing fails for any reason (e.g., could not allocate memory, invalid ++ * buffer size) then the original buffer will be kept intact. ++ * ++ * @return 0 if the buffer was resized, failure otherwise ++ */ ++int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size); + -+ if (kbdev->inited_subsys & inited_mem) { -+ kbase_mem_term(kbdev); -+ kbdev->inited_subsys &= ~inited_mem; -+ } ++#else /* CONFIG_DEBUG_FS */ + -+ if (kbdev->inited_subsys & inited_pm_runtime_init) { -+ kbdev->pm.callback_power_runtime_term(kbdev); -+ kbdev->inited_subsys &= ~inited_pm_runtime_init; -+ } ++#define kbase_io_history_init(...) ((int)0) + -+ if (kbdev->inited_subsys & inited_ctx_sched) { -+ kbase_ctx_sched_term(kbdev); -+ kbdev->inited_subsys &= ~inited_ctx_sched; -+ } ++#define kbase_io_history_term CSTD_NOP + -+ if (kbdev->inited_subsys & inited_device) { -+ kbase_device_term(kbdev); -+ kbdev->inited_subsys &= ~inited_device; -+ } ++#define kbase_io_history_dump CSTD_NOP + -+ if (kbdev->inited_subsys & inited_backend_early) { -+ kbase_backend_early_term(kbdev); -+ kbdev->inited_subsys &= ~inited_backend_early; -+ } ++#define kbase_io_history_resize CSTD_NOP + -+ if (kbdev->inited_subsys & inited_io_history) { -+ kbase_io_history_term(&kbdev->io_history); -+ kbdev->inited_subsys &= ~inited_io_history; -+ } ++#endif /* CONFIG_DEBUG_FS */ + -+ if (kbdev->inited_subsys & inited_power_control) { -+ power_control_term(kbdev); -+ kbdev->inited_subsys &= ~inited_power_control; -+ } + -+ if (kbdev->inited_subsys & inited_registers_map) { -+ registers_unmap(kbdev); -+ kbdev->inited_subsys &= ~inited_registers_map; -+ } ++#endif + -+#ifdef CONFIG_MALI_NO_MALI -+ if (kbdev->inited_subsys & inited_gpu_device) { -+ gpu_device_destroy(kbdev); -+ kbdev->inited_subsys &= ~inited_gpu_device; -+ } -+#endif /* CONFIG_MALI_NO_MALI */ + -+ if (kbdev->inited_subsys != 0) -+ dev_err(kbdev->dev, "Missing sub system termination\n"); + -+ kbase_device_free(kbdev); +diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c +new file mode 100644 +index 000000000..fde0f8ff8 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c +@@ -0,0 +1,209 @@ ++/* ++ * ++ * (C) COPYRIGHT 2013-2015, 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ return 0; -+} + -+extern void kbase_platform_rk_shutdown(struct kbase_device *kbdev); -+static void kbase_platform_device_shutdown(struct platform_device *pdev) -+{ -+ struct kbase_device *kbdev = to_kbase_device(&pdev->dev); ++#include ++#include ++#include + -+ kbase_platform_rk_shutdown(kbdev); -+} ++/* This function is used to solve an HW issue with single iterator GPUs. ++ * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the ++ * restart index is out of bounds and the rerun causes a tile range fault. If this happens ++ * we try to clamp the restart index to a correct value and rerun the job. ++ */ ++/* Mask of X and Y coordinates for the coordinates words in the descriptors*/ ++#define X_COORDINATE_MASK 0x00000FFF ++#define Y_COORDINATE_MASK 0x0FFF0000 ++/* Max number of words needed from the fragment shader job descriptor */ ++#define JOB_HEADER_SIZE_IN_WORDS 10 ++#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32)) + -+/* Number of register accesses for the buffer that we allocate during -+ * initialization time. The buffer size can be changed later via debugfs. */ -+#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512) ++/* Word 0: Status Word */ ++#define JOB_DESC_STATUS_WORD 0 ++/* Word 1: Restart Index */ ++#define JOB_DESC_RESTART_INDEX_WORD 1 ++/* Word 2: Fault address low word */ ++#define JOB_DESC_FAULT_ADDR_LOW_WORD 2 ++/* Word 8: Minimum Tile Coordinates */ ++#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8 ++/* Word 9: Maximum Tile Coordinates */ ++#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9 + -+static int kbase_platform_device_probe(struct platform_device *pdev) ++int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom) +{ -+ struct kbase_device *kbdev; -+ struct mali_base_gpu_core_props *core_props; -+ u32 gpu_id; -+ unsigned prod_id; -+ const struct list_head *dev_list; -+ int err = 0; ++ struct device *dev = katom->kctx->kbdev->dev; ++ u32 clamped = 0; ++ struct kbase_va_region *region; ++ phys_addr_t *page_array; ++ u64 page_index; ++ u32 offset = katom->jc & (~PAGE_MASK); ++ u32 *page_1 = NULL; ++ u32 *page_2 = NULL; ++ u32 job_header[JOB_HEADER_SIZE_IN_WORDS]; ++ void *dst = job_header; ++ u32 minX, minY, maxX, maxY; ++ u32 restartX, restartY; ++ struct page *p; ++ u32 copy_size; + -+#ifdef CONFIG_OF -+ err = kbase_platform_early_init(); -+ if (err) { -+ dev_err(&pdev->dev, "Early platform initialization failed\n"); -+ kbase_platform_device_remove(pdev); -+ return err; -+ } -+#endif -+ kbdev = kbase_device_alloc(); -+ if (!kbdev) { -+ dev_err(&pdev->dev, "Allocate device failed\n"); -+ kbase_platform_device_remove(pdev); -+ return -ENOMEM; -+ } ++ dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n"); ++ if (!(katom->core_req & BASE_JD_REQ_FS)) ++ return 0; + -+ kbdev->dev = &pdev->dev; -+ dev_set_drvdata(kbdev->dev, kbdev); ++ kbase_gpu_vm_lock(katom->kctx); ++ region = kbase_region_tracker_find_region_enclosing_address(katom->kctx, ++ katom->jc); ++ if (!region || (region->flags & KBASE_REG_FREE)) ++ goto out_unlock; + -+#ifdef CONFIG_MALI_NO_MALI -+ err = gpu_device_create(kbdev); -+ if (err) { -+ dev_err(&pdev->dev, "Dummy model initialization failed\n"); -+ kbase_platform_device_remove(pdev); -+ return err; -+ } -+ kbdev->inited_subsys |= inited_gpu_device; -+#endif /* CONFIG_MALI_NO_MALI */ ++ page_array = kbase_get_cpu_phy_pages(region); ++ if (!page_array) ++ goto out_unlock; + -+ err = assign_irqs(pdev); -+ if (err) { -+ dev_err(&pdev->dev, "IRQ search failed\n"); -+ kbase_platform_device_remove(pdev); -+ return err; -+ } ++ page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn; + -+ err = registers_map(kbdev); -+ if (err) { -+ dev_err(&pdev->dev, "Register map failed\n"); -+ kbase_platform_device_remove(pdev); -+ return err; -+ } -+ kbdev->inited_subsys |= inited_registers_map; -+ -+ err = power_control_init(pdev); -+ if (err) { -+ dev_err(&pdev->dev, "Power control initialization failed\n"); -+ kbase_platform_device_remove(pdev); -+ return err; -+ } -+ kbdev->inited_subsys |= inited_power_control; -+ -+ err = kbase_io_history_init(&kbdev->io_history, -+ KBASEP_DEFAULT_REGISTER_HISTORY_SIZE); -+ if (err) { -+ dev_err(&pdev->dev, "Register access history initialization failed\n"); -+ kbase_platform_device_remove(pdev); -+ return -ENOMEM; -+ } -+ kbdev->inited_subsys |= inited_io_history; -+ -+ err = kbase_backend_early_init(kbdev); -+ if (err) { -+ dev_err(kbdev->dev, "Early backend initialization failed\n"); -+ kbase_platform_device_remove(pdev); -+ return err; -+ } -+ kbdev->inited_subsys |= inited_backend_early; -+ -+ scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, -+ kbase_dev_nr); -+ -+ kbase_disjoint_init(kbdev); ++ p = pfn_to_page(PFN_DOWN(page_array[page_index])); + -+ /* obtain min/max configured gpu frequencies */ -+ core_props = &(kbdev->gpu_props.props.core_props); -+ core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN; -+ core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; ++ /* we need the first 10 words of the fragment shader job descriptor. ++ * We need to check that the offset + 10 words is less that the page ++ * size otherwise we need to load the next page. ++ * page_size_overflow will be equal to 0 in case the whole descriptor ++ * is within the page > 0 otherwise. ++ */ ++ copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE); + -+ err = kbase_device_init(kbdev); -+ if (err) { -+ dev_err(kbdev->dev, "Device initialization failed (%d)\n", err); -+ kbase_platform_device_remove(pdev); -+ return err; -+ } -+ kbdev->inited_subsys |= inited_device; ++ page_1 = kmap_atomic(p); + -+ err = kbase_ctx_sched_init(kbdev); -+ if (err) { -+ dev_err(kbdev->dev, "Context scheduler initialization failed (%d)\n", -+ err); -+ kbase_platform_device_remove(pdev); -+ return err; -+ } -+ kbdev->inited_subsys |= inited_ctx_sched; ++ /* page_1 is a u32 pointer, offset is expressed in bytes */ ++ page_1 += offset>>2; + -+ if (kbdev->pm.callback_power_runtime_init) { -+ err = kbdev->pm.callback_power_runtime_init(kbdev); -+ if (err) { -+ dev_err(kbdev->dev, -+ "Runtime PM initialization failed\n"); -+ kbase_platform_device_remove(pdev); -+ return err; -+ } -+ kbdev->inited_subsys |= inited_pm_runtime_init; -+ } ++ kbase_sync_single_for_cpu(katom->kctx->kbdev, ++ kbase_dma_addr(p) + offset, ++ copy_size, DMA_BIDIRECTIONAL); + -+ err = kbase_mem_init(kbdev); -+ if (err) { -+ dev_err(kbdev->dev, "Memory subsystem initialization failed\n"); -+ kbase_platform_device_remove(pdev); -+ return err; -+ } -+ kbdev->inited_subsys |= inited_mem; ++ memcpy(dst, page_1, copy_size); + -+ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; -+ gpu_id &= GPU_ID_VERSION_PRODUCT_ID; -+ prod_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++ /* The data needed overflows page the dimension, ++ * need to map the subsequent page */ ++ if (copy_size < JOB_HEADER_SIZE) { ++ p = pfn_to_page(PFN_DOWN(page_array[page_index + 1])); ++ page_2 = kmap_atomic(p); + -+ kbase_device_coherency_init(kbdev, prod_id); ++ kbase_sync_single_for_cpu(katom->kctx->kbdev, ++ kbase_dma_addr(p), ++ JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL); + -+ err = kbasep_protected_mode_init(kbdev); -+ if (err) { -+ dev_err(kbdev->dev, "Protected mode subsystem initialization failed\n"); -+ kbase_platform_device_remove(pdev); -+ return err; ++ memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size); + } -+ kbdev->inited_subsys |= inited_protected; + -+ dev_list = kbase_dev_list_get(); -+ list_add(&kbdev->entry, &kbase_dev_list); -+ kbase_dev_list_put(dev_list); -+ kbdev->inited_subsys |= inited_dev_list; ++ /* We managed to correctly map one or two pages (in case of overflow) */ ++ /* Get Bounding Box data and restart index from fault address low word */ ++ minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK; ++ minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK; ++ maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK; ++ maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK; ++ restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK; ++ restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK; + -+ err = kbasep_js_devdata_init(kbdev); -+ if (err) { -+ dev_err(kbdev->dev, "Job JS devdata initialization failed\n"); -+ kbase_platform_device_remove(pdev); -+ return err; -+ } -+ kbdev->inited_subsys |= inited_js; ++ dev_warn(dev, "Before Clamping:\n" ++ "Jobstatus: %08x\n" ++ "restartIdx: %08x\n" ++ "Fault_addr_low: %08x\n" ++ "minCoordsX: %08x minCoordsY: %08x\n" ++ "maxCoordsX: %08x maxCoordsY: %08x\n", ++ job_header[JOB_DESC_STATUS_WORD], ++ job_header[JOB_DESC_RESTART_INDEX_WORD], ++ job_header[JOB_DESC_FAULT_ADDR_LOW_WORD], ++ minX, minY, ++ maxX, maxY); + -+ err = kbase_tlstream_init(); -+ if (err) { -+ dev_err(kbdev->dev, "Timeline stream initialization failed\n"); -+ kbase_platform_device_remove(pdev); -+ return err; -+ } -+ kbdev->inited_subsys |= inited_tlstream; ++ /* Set the restart index to the one which generated the fault*/ ++ job_header[JOB_DESC_RESTART_INDEX_WORD] = ++ job_header[JOB_DESC_FAULT_ADDR_LOW_WORD]; + -+ err = kbase_backend_late_init(kbdev); -+ if (err) { -+ dev_err(kbdev->dev, "Late backend initialization failed\n"); -+ kbase_platform_device_remove(pdev); -+ return err; ++ if (restartX < minX) { ++ job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY; ++ dev_warn(dev, ++ "Clamping restart X index to minimum. %08x clamped to %08x\n", ++ restartX, minX); ++ clamped = 1; + } -+ kbdev->inited_subsys |= inited_backend_late; -+ -+#ifdef CONFIG_MALI_DEVFREQ -+ err = kbase_devfreq_init(kbdev); -+ if (!err) -+ kbdev->inited_subsys |= inited_devfreq; -+ else -+ dev_err(kbdev->dev, "Continuing without devfreq\n"); -+#endif /* CONFIG_MALI_DEVFREQ */ -+ -+ kbdev->vinstr_ctx = kbase_vinstr_init(kbdev); -+ if (!kbdev->vinstr_ctx) { -+ dev_err(kbdev->dev, -+ "Virtual instrumentation initialization failed\n"); -+ kbase_platform_device_remove(pdev); -+ return -EINVAL; ++ if (restartY < minY) { ++ job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX; ++ dev_warn(dev, ++ "Clamping restart Y index to minimum. %08x clamped to %08x\n", ++ restartY, minY); ++ clamped = 1; + } -+ kbdev->inited_subsys |= inited_vinstr; -+ -+ err = kbase_debug_job_fault_dev_init(kbdev); -+ if (err) { -+ dev_err(kbdev->dev, "Job fault debug initialization failed\n"); -+ kbase_platform_device_remove(pdev); -+ return err; ++ if (restartX > maxX) { ++ job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY; ++ dev_warn(dev, ++ "Clamping restart X index to maximum. %08x clamped to %08x\n", ++ restartX, maxX); ++ clamped = 1; + } -+ kbdev->inited_subsys |= inited_job_fault; -+ -+ err = kbase_device_debugfs_init(kbdev); -+ if (err) { -+ dev_err(kbdev->dev, "DebugFS initialization failed"); -+ kbase_platform_device_remove(pdev); -+ return err; ++ if (restartY > maxY) { ++ job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX; ++ dev_warn(dev, ++ "Clamping restart Y index to maximum. %08x clamped to %08x\n", ++ restartY, maxY); ++ clamped = 1; + } -+ kbdev->inited_subsys |= inited_debugfs; -+ -+ /* initialize the kctx list */ -+ mutex_init(&kbdev->kctx_list_lock); -+ INIT_LIST_HEAD(&kbdev->kctx_list); -+ -+ kbdev->mdev.minor = MISC_DYNAMIC_MINOR; -+ kbdev->mdev.name = kbdev->devname; -+ kbdev->mdev.fops = &kbase_fops; -+ kbdev->mdev.parent = get_device(kbdev->dev); -+ kbdev->inited_subsys |= inited_get_device; -+ -+ /* This needs to happen before registering the device with misc_register(), -+ * otherwise it causes a race condition between registering the device and a -+ * uevent event being generated for userspace, causing udev rules to run -+ * which might expect certain sysfs attributes present. As a result of the -+ * race condition we avoid, some Mali sysfs entries may have appeared to -+ * udev to not exist. + -+ * For more information, see -+ * https://www.kernel.org/doc/Documentation/driver-model/device.txt, the -+ * paragraph that starts with "Word of warning", currently the second-last -+ * paragraph. -+ */ -+ err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); -+ if (err) { -+ dev_err(&pdev->dev, "SysFS group creation failed\n"); -+ kbase_platform_device_remove(pdev); -+ return err; -+ } -+ kbdev->inited_subsys |= inited_sysfs_group; ++ if (clamped) { ++ /* Reset the fault address low word ++ * and set the job status to STOPPED */ ++ job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0; ++ job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED; ++ dev_warn(dev, "After Clamping:\n" ++ "Jobstatus: %08x\n" ++ "restartIdx: %08x\n" ++ "Fault_addr_low: %08x\n" ++ "minCoordsX: %08x minCoordsY: %08x\n" ++ "maxCoordsX: %08x maxCoordsY: %08x\n", ++ job_header[JOB_DESC_STATUS_WORD], ++ job_header[JOB_DESC_RESTART_INDEX_WORD], ++ job_header[JOB_DESC_FAULT_ADDR_LOW_WORD], ++ minX, minY, ++ maxX, maxY); + -+ err = misc_register(&kbdev->mdev); -+ if (err) { -+ dev_err(kbdev->dev, "Misc device registration failed for %s\n", -+ kbdev->devname); -+ kbase_platform_device_remove(pdev); -+ return err; -+ } -+ kbdev->inited_subsys |= inited_misc_register; ++ /* Flush CPU cache to update memory for future GPU reads*/ ++ memcpy(page_1, dst, copy_size); ++ p = pfn_to_page(PFN_DOWN(page_array[page_index])); + ++ kbase_sync_single_for_device(katom->kctx->kbdev, ++ kbase_dma_addr(p) + offset, ++ copy_size, DMA_TO_DEVICE); + -+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER -+ err = bl_core_client_register(kbdev->devname, -+ kbase_logging_started_cb, -+ kbdev, &kbdev->buslogger, -+ THIS_MODULE, NULL); -+ if (err == 0) { -+ kbdev->inited_subsys |= inited_buslogger; -+ bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); -+ } else { -+ dev_warn(kbdev->dev, "Bus log client registration failed\n"); -+ err = 0; -+ } -+#endif ++ if (copy_size < JOB_HEADER_SIZE) { ++ memcpy(page_2, dst + copy_size, ++ JOB_HEADER_SIZE - copy_size); ++ p = pfn_to_page(PFN_DOWN(page_array[page_index + 1])); + -+ err = kbase_gpuprops_populate_user_buffer(kbdev); -+ if (err) { -+ dev_err(&pdev->dev, "GPU property population failed"); -+ kbase_platform_device_remove(pdev); -+ return err; ++ kbase_sync_single_for_device(katom->kctx->kbdev, ++ kbase_dma_addr(p), ++ JOB_HEADER_SIZE - copy_size, ++ DMA_TO_DEVICE); ++ } + } ++ if (copy_size < JOB_HEADER_SIZE) ++ kunmap_atomic(page_2); + -+ dev_info(kbdev->dev, -+ "Probed as %s\n", dev_name(kbdev->mdev.this_device)); -+ -+ kbase_dev_nr++; ++ kunmap_atomic(page_1); + -+ return err; ++out_unlock: ++ kbase_gpu_vm_unlock(katom->kctx); ++ return clamped; +} -+ -+#undef KBASEP_DEFAULT_REGISTER_HISTORY_SIZE -+ -+/** -+ * kbase_device_suspend - Suspend callback from the OS. -+ * -+ * This is called by Linux when the device should suspend. -+ * -+ * @dev: The device to suspend +diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h +new file mode 100644 +index 000000000..099a29861 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h +@@ -0,0 +1,23 @@ ++/* + * -+ * Return: A standard Linux error code -+ */ -+static int kbase_device_suspend(struct device *dev) -+{ -+ struct kbase_device *kbdev = to_kbase_device(dev); -+ -+ if (!kbdev) -+ return -ENODEV; -+ -+#if defined(CONFIG_MALI_DEVFREQ) && \ -+ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) -+ if (kbdev->inited_subsys & inited_devfreq) -+ devfreq_suspend_device(kbdev->devfreq); -+#endif -+ -+ kbase_pm_suspend(kbdev); -+ return 0; -+} -+ -+/** -+ * kbase_device_resume - Resume callback from the OS. ++ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved. + * -+ * This is called by Linux when the device should resume from suspension. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. + * -+ * @dev: The device to resume ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * Return: A standard Linux error code + */ -+static int kbase_device_resume(struct device *dev) -+{ -+ struct kbase_device *kbdev = to_kbase_device(dev); + -+ if (!kbdev) -+ return -ENODEV; + -+ kbase_pm_resume(kbdev); + -+#if defined(CONFIG_MALI_DEVFREQ) && \ -+ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) -+ if (kbdev->inited_subsys & inited_devfreq) -+ devfreq_resume_device(kbdev->devfreq); -+#endif -+ return 0; -+} ++#ifndef _KBASE_10969_WORKAROUND_ ++#define _KBASE_10969_WORKAROUND_ + -+/** -+ * kbase_device_runtime_suspend - Runtime suspend callback from the OS. ++int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom); ++ ++#endif /* _KBASE_10969_WORKAROUND_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c +new file mode 100644 +index 000000000..f910fe970 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c +@@ -0,0 +1,102 @@ ++/* + * -+ * This is called by Linux when the device should prepare for a condition in -+ * which it will not be able to communicate with the CPU(s) and RAM due to -+ * power management. ++ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. + * -+ * @dev: The device to suspend ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * Return: A standard Linux error code + */ -+#ifdef KBASE_PM_RUNTIME -+static int kbase_device_runtime_suspend(struct device *dev) -+{ -+ struct kbase_device *kbdev = to_kbase_device(dev); + -+ if (!kbdev) -+ return -ENODEV; + -+#if defined(CONFIG_MALI_DEVFREQ) && \ -+ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) -+ if (kbdev->inited_subsys & inited_devfreq) -+ devfreq_suspend_device(kbdev->devfreq); -+#endif + -+ if (kbdev->pm.backend.callback_power_runtime_off) { -+ kbdev->pm.backend.callback_power_runtime_off(kbdev); -+ dev_dbg(dev, "runtime suspend\n"); -+ } -+ return 0; -+} -+#endif /* KBASE_PM_RUNTIME */ ++#include + -+/** -+ * kbase_device_runtime_resume - Runtime resume callback from the OS. -+ * -+ * This is called by Linux when the device should go into a fully active state. -+ * -+ * @dev: The device to suspend -+ * -+ * Return: A standard Linux error code -+ */ ++#include ++#include + -+#ifdef KBASE_PM_RUNTIME -+static int kbase_device_runtime_resume(struct device *dev) ++#ifdef CONFIG_DEBUG_FS ++#ifdef CONFIG_MALI_DEBUG ++ ++static int kbase_as_fault_read(struct seq_file *sfile, void *data) +{ -+ int ret = 0; -+ struct kbase_device *kbdev = to_kbase_device(dev); ++ uintptr_t as_no = (uintptr_t) sfile->private; + -+ if (!kbdev) -+ return -ENODEV; ++ struct list_head *entry; ++ const struct list_head *kbdev_list; ++ struct kbase_device *kbdev = NULL; + -+ if (kbdev->pm.backend.callback_power_runtime_on) { -+ ret = kbdev->pm.backend.callback_power_runtime_on(kbdev); -+ dev_dbg(dev, "runtime resume\n"); -+ } ++ kbdev_list = kbase_dev_list_get(); + -+#if defined(CONFIG_MALI_DEVFREQ) && \ -+ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) -+ if (kbdev->inited_subsys & inited_devfreq) -+ devfreq_resume_device(kbdev->devfreq); -+#endif ++ list_for_each(entry, kbdev_list) { ++ kbdev = list_entry(entry, struct kbase_device, entry); + -+ return ret; -+} -+#endif /* KBASE_PM_RUNTIME */ ++ if(kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) { + ++ /* don't show this one again until another fault occors */ ++ kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no); + -+#ifdef KBASE_PM_RUNTIME -+/** -+ * kbase_device_runtime_idle - Runtime idle callback from the OS. -+ * @dev: The device to suspend -+ * -+ * This is called by Linux when the device appears to be inactive and it might -+ * be placed into a low power state. -+ * -+ * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend, -+ * otherwise a standard Linux error code -+ */ -+static int kbase_device_runtime_idle(struct device *dev) -+{ -+ struct kbase_device *kbdev = to_kbase_device(dev); ++ /* output the last page fault addr */ ++ seq_printf(sfile, "%llu\n", (u64) kbdev->as[as_no].fault_addr); ++ } + -+ if (!kbdev) -+ return -ENODEV; ++ } + -+ /* Use platform specific implementation if it exists. */ -+ if (kbdev->pm.backend.callback_power_runtime_idle) -+ return kbdev->pm.backend.callback_power_runtime_idle(kbdev); ++ kbase_dev_list_put(kbdev_list); + + return 0; +} -+#endif /* KBASE_PM_RUNTIME */ + -+/* The power management operations for the platform driver. -+ */ -+static const struct dev_pm_ops kbase_pm_ops = { -+ .suspend = kbase_device_suspend, -+ .resume = kbase_device_resume, -+#ifdef KBASE_PM_RUNTIME -+ .runtime_suspend = kbase_device_runtime_suspend, -+ .runtime_resume = kbase_device_runtime_resume, -+ .runtime_idle = kbase_device_runtime_idle, -+#endif /* KBASE_PM_RUNTIME */ -+}; ++static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file) ++{ ++ return single_open(file, kbase_as_fault_read , in->i_private); ++} + -+#ifdef CONFIG_OF -+static const struct of_device_id kbase_dt_ids[] = { -+ { .compatible = "arm,malit7xx" }, -+ { .compatible = "arm,mali-midgard" }, -+ { /* sentinel */ } ++static const struct file_operations as_fault_fops = { ++ .open = kbase_as_fault_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, +}; -+MODULE_DEVICE_TABLE(of, kbase_dt_ids); -+#endif + -+static struct platform_driver kbase_platform_driver = { -+ .probe = kbase_platform_device_probe, -+ .remove = kbase_platform_device_remove, -+ .shutdown = kbase_platform_device_shutdown, -+ .driver = { -+ .name = "midgard", -+ .owner = THIS_MODULE, -+ .pm = &kbase_pm_ops, -+ .of_match_table = of_match_ptr(kbase_dt_ids), -+ }, -+}; ++#endif /* CONFIG_MALI_DEBUG */ ++#endif /* CONFIG_DEBUG_FS */ + +/* -+ * The driver will not provide a shortcut to create the Mali platform device -+ * anymore when using Device Tree. ++ * Initialize debugfs entry for each address space + */ -+#ifdef CONFIG_OF -+module_platform_driver(kbase_platform_driver); -+#else -+ -+static int __init rockchip_gpu_init_driver(void) -+{ -+ return platform_driver_register(&kbase_platform_driver); -+} -+late_initcall(rockchip_gpu_init_driver); -+ -+static int __init kbase_driver_init(void) -+{ -+ int ret; -+ -+ ret = kbase_platform_early_init(); -+ if (ret) -+ return ret; -+ -+#ifdef CONFIG_MALI_PLATFORM_FAKE -+ ret = kbase_platform_fake_register(); -+ if (ret) -+ return ret; -+#endif -+ ret = platform_driver_register(&kbase_platform_driver); -+#ifdef CONFIG_MALI_PLATFORM_FAKE -+ if (ret) -+ kbase_platform_fake_unregister(); -+#endif -+ return ret; -+} -+ -+static void __exit kbase_driver_exit(void) ++void kbase_as_fault_debugfs_init(struct kbase_device *kbdev) +{ -+ platform_driver_unregister(&kbase_platform_driver); -+#ifdef CONFIG_MALI_PLATFORM_FAKE -+ kbase_platform_fake_unregister(); -+#endif -+} -+ -+module_init(kbase_driver_init); -+module_exit(kbase_driver_exit); -+ -+#endif /* CONFIG_OF */ -+ -+MODULE_LICENSE("GPL"); -+MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \ -+ __stringify(BASE_UK_VERSION_MAJOR) "." \ -+ __stringify(BASE_UK_VERSION_MINOR) ")"); ++#ifdef CONFIG_DEBUG_FS ++#ifdef CONFIG_MALI_DEBUG ++ uint i; ++ char as_name[64]; ++ struct dentry *debugfs_directory; + -+#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE) -+#define CREATE_TRACE_POINTS -+#endif ++ kbdev->debugfs_as_read_bitmap = 0ULL; + -+#ifdef CONFIG_MALI_GATOR_SUPPORT -+/* Create the trace points (otherwise we just get code to call a tracepoint) */ -+#include "mali_linux_trace.h" ++ KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces); ++ KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64)); + -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_on); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_off); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change); ++ debugfs_directory = debugfs_create_dir("address_spaces", ++ kbdev->mali_debugfs_directory); + -+void kbase_trace_mali_pm_status(u32 event, u64 value) -+{ -+ trace_mali_pm_status(event, value); -+} ++ if(debugfs_directory) { ++ for(i = 0; i < kbdev->nr_hw_address_spaces; i++) { ++ snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i); ++ debugfs_create_file(as_name, S_IRUGO, ++ debugfs_directory, (void*) ((uintptr_t) i), &as_fault_fops); ++ } ++ } ++ else ++ dev_warn(kbdev->dev, "unable to create address_spaces debugfs directory"); + -+void kbase_trace_mali_pm_power_off(u32 event, u64 value) -+{ -+ trace_mali_pm_power_off(event, value); ++#endif /* CONFIG_MALI_DEBUG */ ++#endif /* CONFIG_DEBUG_FS */ ++ return; +} +diff --git a/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h +new file mode 100644 +index 000000000..3ed224889 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h +@@ -0,0 +1,45 @@ ++/* ++ * ++ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+void kbase_trace_mali_pm_power_on(u32 event, u64 value) -+{ -+ trace_mali_pm_power_on(event, value); -+} + -+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id) -+{ -+ trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id); -+} + -+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value) -+{ -+ trace_mali_page_fault_insert_pages(event, value); -+} ++#ifndef _KBASE_AS_FAULT_DEBUG_FS_H ++#define _KBASE_AS_FAULT_DEBUG_FS_H + -+void kbase_trace_mali_mmu_as_in_use(int event) -+{ -+ trace_mali_mmu_as_in_use(event); -+} ++/** ++ * kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults ++ * ++ * @kbdev: Pointer to kbase_device ++ */ ++void kbase_as_fault_debugfs_init(struct kbase_device *kbdev); + -+void kbase_trace_mali_mmu_as_released(int event) ++/** ++ * kbase_as_fault_debugfs_new() - make the last fault available on debugfs ++ * ++ * @kbdev: Pointer to kbase_device ++ * @as_no: The address space the fault occurred on ++ */ ++static inline void ++kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no) +{ -+ trace_mali_mmu_as_released(event); ++#ifdef CONFIG_DEBUG_FS ++#ifdef CONFIG_MALI_DEBUG ++ kbdev->debugfs_as_read_bitmap |= (1ULL << as_no); ++#endif /* CONFIG_DEBUG_FS */ ++#endif /* CONFIG_MALI_DEBUG */ ++ return; +} + -+void kbase_trace_mali_total_alloc_pages_change(long long int event) -+{ -+ trace_mali_total_alloc_pages_change(event); -+} -+#endif /* CONFIG_MALI_GATOR_SUPPORT */ -+#ifdef CONFIG_MALI_SYSTEM_TRACE -+#include "mali_linux_kbase_trace.h" -+#endif -diff --git a/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c ++#endif /*_KBASE_AS_FAULT_DEBUG_FS_H*/ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c new file mode 100644 -index 000000000..ce0048414 +index 000000000..c67b3e97f --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c -@@ -0,0 +1,208 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c +@@ -0,0 +1,64 @@ +/* + * -+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -359312,206 +362216,170 @@ index 000000000..ce0048414 + + + -+#include -+#include + -+#include "mali_kbase_ctx_sched.h" + -+int kbase_ctx_sched_init(struct kbase_device *kbdev) -+{ -+ int as_present = (1U << kbdev->nr_hw_address_spaces) - 1; ++/* ++ * Cache Policy API. ++ */ + -+ /* These two must be recalculated if nr_hw_address_spaces changes -+ * (e.g. for HW workarounds) */ -+ kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces; -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) { -+ bool use_workaround; ++#include "mali_kbase_cache_policy.h" + -+ use_workaround = DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE; -+ if (use_workaround) { -+ dev_dbg(kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only"); -+ kbdev->nr_user_address_spaces = 1; -+ } -+ } ++/* ++ * The output flags should be a combination of the following values: ++ * KBASE_REG_CPU_CACHED: CPU cache should be enabled. ++ */ ++u32 kbase_cache_enabled(u32 flags, u32 nr_pages) ++{ ++ u32 cache_flags = 0; + -+ kbdev->as_free = as_present; /* All ASs initially free */ ++ CSTD_UNUSED(nr_pages); + -+ memset(kbdev->as_to_kctx, 0, sizeof(kbdev->as_to_kctx)); ++ if (flags & BASE_MEM_CACHED_CPU) ++ cache_flags |= KBASE_REG_CPU_CACHED; + -+ return 0; ++ return cache_flags; +} + -+void kbase_ctx_sched_term(struct kbase_device *kbdev) -+{ -+ s8 i; + -+ /* Sanity checks */ -+ for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) { -+ WARN_ON(kbdev->as_to_kctx[i] != NULL); -+ WARN_ON(!(kbdev->as_free & (1u << i))); -+ } ++void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, ++ size_t size, enum dma_data_direction dir) ++{ ++/* Check if kernel is using coherency with GPU */ ++#ifdef CONFIG_MALI_COH_KERN ++ if (kbdev->system_coherency == COHERENCY_ACE) ++ return; ++#endif /* CONFIG_MALI_COH_KERN */ ++ dma_sync_single_for_device(kbdev->dev, handle, size, dir); +} + -+/* kbasep_ctx_sched_find_as_for_ctx - Find a free address space ++ ++void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, ++ size_t size, enum dma_data_direction dir) ++{ ++/* Check if kernel is using coherency with GPU */ ++#ifdef CONFIG_MALI_COH_KERN ++ if (kbdev->system_coherency == COHERENCY_ACE) ++ return; ++#endif /* CONFIG_MALI_COH_KERN */ ++ dma_sync_single_for_cpu(kbdev->dev, handle, size, dir); ++} +diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h +new file mode 100644 +index 000000000..0c18bdb35 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h +@@ -0,0 +1,45 @@ ++/* + * -+ * @kbdev: The context for which to find a free address space ++ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. + * -+ * Return: A valid AS if successful, otherwise KBASEP_AS_NR_INVALID ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * This function returns an address space available for use. It would prefer -+ * returning an AS that has been previously assigned to the context to -+ * avoid having to reprogram the MMU. + */ -+static int kbasep_ctx_sched_find_as_for_ctx(struct kbase_context *kctx) -+{ -+ struct kbase_device *const kbdev = kctx->kbdev; -+ int free_as; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* First check if the previously assigned AS is available */ -+ if ((kctx->as_nr != KBASEP_AS_NR_INVALID) && -+ (kbdev->as_free & (1u << kctx->as_nr))) -+ return kctx->as_nr; + -+ /* The previously assigned AS was taken, we'll be returning any free -+ * AS at this point. -+ */ -+ free_as = ffs(kbdev->as_free) - 1; -+ if (free_as >= 0 && free_as < kbdev->nr_hw_address_spaces) -+ return free_as; + -+ return KBASEP_AS_NR_INVALID; -+} + -+int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx) -+{ -+ struct kbase_device *const kbdev = kctx->kbdev; + -+ lockdep_assert_held(&kbdev->mmu_hw_mutex); -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++/* ++ * Cache Policy API. ++ */ + -+ WARN_ON(!kbdev->pm.backend.gpu_powered); ++#ifndef _KBASE_CACHE_POLICY_H_ ++#define _KBASE_CACHE_POLICY_H_ + -+ if (atomic_inc_return(&kctx->refcount) == 1) { -+ int const free_as = kbasep_ctx_sched_find_as_for_ctx(kctx); ++#include "mali_kbase.h" ++#include "mali_base_kernel.h" + -+ if (free_as != KBASEP_AS_NR_INVALID) { -+ kbdev->as_free &= ~(1u << free_as); -+ /* Only program the MMU if the context has not been -+ * assigned the same address space before. -+ */ -+ if (free_as != kctx->as_nr) { -+ struct kbase_context *const prev_kctx = -+ kbdev->as_to_kctx[free_as]; ++/** ++ * kbase_cache_enabled - Choose the cache policy for a specific region ++ * @flags: flags describing attributes of the region ++ * @nr_pages: total number of pages (backed or not) for the region ++ * ++ * Tells whether the CPU and GPU caches should be enabled or not for a specific ++ * region. ++ * This function can be modified to customize the cache policy depending on the ++ * flags and size of the region. ++ * ++ * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED ++ * depending on the cache policy ++ */ ++u32 kbase_cache_enabled(u32 flags, u32 nr_pages); + -+ if (prev_kctx) { -+ WARN_ON(atomic_read(&prev_kctx->refcount) != 0); -+ kbase_mmu_disable(prev_kctx); -+ prev_kctx->as_nr = KBASEP_AS_NR_INVALID; -+ } ++#endif /* _KBASE_CACHE_POLICY_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.c b/drivers/gpu/arm/midgard/mali_kbase_config.c +new file mode 100644 +index 000000000..fb615ae02 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_config.c +@@ -0,0 +1,51 @@ ++/* ++ * ++ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ kctx->as_nr = free_as; -+ kbdev->as_to_kctx[free_as] = kctx; -+ kbase_mmu_update(kctx); -+ } -+ } else { -+ atomic_dec(&kctx->refcount); + -+ /* Failed to find an available address space, we must -+ * be returning an error at this point. -+ */ -+ WARN_ON(kctx->as_nr != KBASEP_AS_NR_INVALID); -+ } -+ } + -+ return kctx->as_nr; -+} + -+int kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx) -+{ -+ struct kbase_device *const kbdev = kctx->kbdev; + -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ WARN_ON(atomic_read(&kctx->refcount) == 0); -+ if (atomic_read(&kctx->refcount) == 0) -+ return -1; ++#include ++#include ++#include + -+ WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID); -+ WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx); ++int kbasep_platform_device_init(struct kbase_device *kbdev) ++{ ++ struct kbase_platform_funcs_conf *platform_funcs_p; + -+ atomic_inc(&kctx->refcount); ++ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; ++ if (platform_funcs_p && platform_funcs_p->platform_init_func) ++ return platform_funcs_p->platform_init_func(kbdev); + + return 0; +} + -+void kbase_ctx_sched_release_ctx(struct kbase_context *kctx) ++void kbasep_platform_device_term(struct kbase_device *kbdev) +{ -+ struct kbase_device *const kbdev = kctx->kbdev; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ struct kbase_platform_funcs_conf *platform_funcs_p; + -+ if (atomic_dec_return(&kctx->refcount) == 0) -+ kbdev->as_free |= (1u << kctx->as_nr); ++ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; ++ if (platform_funcs_p && platform_funcs_p->platform_term_func) ++ platform_funcs_p->platform_term_func(kbdev); +} + -+void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) ++int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed) +{ -+ struct kbase_device *const kbdev = kctx->kbdev; -+ -+ lockdep_assert_held(&kbdev->mmu_hw_mutex); -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ WARN_ON(atomic_read(&kctx->refcount) != 0); -+ -+ if (kctx->as_nr != KBASEP_AS_NR_INVALID) { -+ if (kbdev->pm.backend.gpu_powered) -+ kbase_mmu_disable(kctx); ++ KBASE_DEBUG_ASSERT(NULL != clock_speed); + -+ kbdev->as_to_kctx[kctx->as_nr] = NULL; -+ kctx->as_nr = KBASEP_AS_NR_INVALID; -+ } ++ *clock_speed = 100; ++ return 0; +} + -+void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) -+{ -+ s8 i; -+ -+ lockdep_assert_held(&kbdev->mmu_hw_mutex); -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ WARN_ON(!kbdev->pm.backend.gpu_powered); -+ -+ for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) { -+ struct kbase_context *kctx; -+ -+ kctx = kbdev->as_to_kctx[i]; -+ if (kctx) { -+ if (atomic_read(&kctx->refcount)) { -+ WARN_ON(kctx->as_nr != i); -+ -+ kbase_mmu_update(kctx); -+ } else { -+ /* This context might have been assigned an -+ * AS before, clear it. -+ */ -+ kbdev->as_to_kctx[kctx->as_nr] = NULL; -+ kctx->as_nr = KBASEP_AS_NR_INVALID; -+ } -+ } else { -+ kbase_mmu_disable_as(kbdev, i); -+ } -+ } -+} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h +diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.h b/drivers/gpu/arm/midgard/mali_kbase_config.h new file mode 100644 -index 000000000..47474fecc +index 000000000..356d52bcd --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h -@@ -0,0 +1,134 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_config.h +@@ -0,0 +1,345 @@ +/* + * -+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -359526,132 +362394,343 @@ index 000000000..47474fecc + + + -+#ifndef _KBASE_CTX_SCHED_H_ -+#define _KBASE_CTX_SCHED_H_ + -+#include + -+/* The Context Scheduler manages address space assignment and reference -+ * counting to kbase_context. The interface has been designed to minimise -+ * interactions between the Job Scheduler and Power Management/MMU to support -+ * both the existing Job Scheduler and Command Stream Frontend interface. -+ * -+ * The initial implementation of the Context Scheduler does not schedule -+ * contexts. Instead it relies on the Job Scheduler/CSF to make decisions of -+ * when to schedule/evict contexts if address spaces are starved. In the -+ * future, once an interface between the CS and JS/CSF have been devised to -+ * provide enough information about how each context is consuming GPU resources, -+ * those decisions can be made in the CS itself, thereby reducing duplicated -+ * code. ++/** ++ * @file mali_kbase_config.h ++ * Configuration API and Attributes for KBase + */ + -+/* base_ctx_sched_init - Initialise the context scheduler -+ * -+ * @kbdev: The device for which the context scheduler needs to be -+ * initialised ++#ifndef _KBASE_CONFIG_H_ ++#define _KBASE_CONFIG_H_ ++ ++#include ++ ++#include ++#include ++ ++/** ++ * @addtogroup base_api ++ * @{ ++ */ ++ ++/** ++ * @addtogroup base_kbase_api ++ * @{ ++ */ ++ ++/** ++ * @addtogroup kbase_config Configuration API and Attributes ++ * @{ ++ */ ++ ++#include ++ ++/* Forward declaration of struct kbase_device */ ++struct kbase_device; ++ ++/** ++ * kbase_platform_funcs_conf - Specifies platform init/term function pointers + * -+ * Return: 0 for success, otherwise failure ++ * Specifies the functions pointers for platform specific initialization and ++ * termination. By default no functions are required. No additional platform ++ * specific control is necessary. ++ */ ++struct kbase_platform_funcs_conf { ++ /** ++ * platform_init_func - platform specific init function pointer ++ * @kbdev - kbase_device pointer ++ * ++ * Returns 0 on success, negative error code otherwise. ++ * ++ * Function pointer for platform specific initialization or NULL if no ++ * initialization function is required. At the point this the GPU is ++ * not active and its power and clocks are in unknown (platform specific ++ * state) as kbase doesn't yet have control of power and clocks. ++ * ++ * The platform specific private pointer kbase_device::platform_context ++ * can be accessed (and possibly initialized) in here. ++ */ ++ int (*platform_init_func)(struct kbase_device *kbdev); ++ /** ++ * platform_term_func - platform specific termination function pointer ++ * @kbdev - kbase_device pointer ++ * ++ * Function pointer for platform specific termination or NULL if no ++ * termination function is required. At the point this the GPU will be ++ * idle but still powered and clocked. ++ * ++ * The platform specific private pointer kbase_device::platform_context ++ * can be accessed (and possibly terminated) in here. ++ */ ++ void (*platform_term_func)(struct kbase_device *kbdev); ++}; ++ ++/* ++ * @brief Specifies the callbacks for power management + * -+ * This must be called during device initilisation. The number of hardware -+ * address spaces must already be established before calling this function. ++ * By default no callbacks will be made and the GPU must not be powered off. + */ -+int kbase_ctx_sched_init(struct kbase_device *kbdev); ++struct kbase_pm_callback_conf { ++ /** Callback for when the GPU is idle and the power to it can be switched off. ++ * ++ * The system integrator can decide whether to either do nothing, just switch off ++ * the clocks to the GPU, or to completely power down the GPU. ++ * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the ++ * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). ++ */ ++ void (*power_off_callback)(struct kbase_device *kbdev); + -+/* base_ctx_sched_term - Terminate the context scheduler ++ /** Callback for when the GPU is about to become active and power must be supplied. ++ * ++ * This function must not return until the GPU is powered and clocked sufficiently for register access to ++ * succeed. The return value specifies whether the GPU was powered down since the call to power_off_callback. ++ * If the GPU state has been lost then this function must return 1, otherwise it should return 0. ++ * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the ++ * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). ++ * ++ * The return value of the first call to this function is ignored. ++ * ++ * @return 1 if the GPU state may have been lost, 0 otherwise. ++ */ ++ int (*power_on_callback)(struct kbase_device *kbdev); ++ ++ /** Callback for when the system is requesting a suspend and GPU power ++ * must be switched off. ++ * ++ * Note that if this callback is present, then this may be called ++ * without a preceding call to power_off_callback. Therefore this ++ * callback must be able to take any action that might otherwise happen ++ * in power_off_callback. ++ * ++ * The platform specific private pointer kbase_device::platform_context ++ * can be accessed and modified in here. It is the platform \em ++ * callbacks responsibility to initialize and terminate this pointer if ++ * used (see @ref kbase_platform_funcs_conf). ++ */ ++ void (*power_suspend_callback)(struct kbase_device *kbdev); ++ ++ /** Callback for when the system is resuming from a suspend and GPU ++ * power must be switched on. ++ * ++ * Note that if this callback is present, then this may be called ++ * without a following call to power_on_callback. Therefore this ++ * callback must be able to take any action that might otherwise happen ++ * in power_on_callback. ++ * ++ * The platform specific private pointer kbase_device::platform_context ++ * can be accessed and modified in here. It is the platform \em ++ * callbacks responsibility to initialize and terminate this pointer if ++ * used (see @ref kbase_platform_funcs_conf). ++ */ ++ void (*power_resume_callback)(struct kbase_device *kbdev); ++ ++ /** Callback for handling runtime power management initialization. ++ * ++ * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback ++ * will become active from calls made to the OS from within this function. ++ * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback. ++ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. ++ * ++ * @return 0 on success, else int error code. ++ */ ++ int (*power_runtime_init_callback)(struct kbase_device *kbdev); ++ ++ /** Callback for handling runtime power management termination. ++ * ++ * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback ++ * should no longer be called by the OS on completion of this function. ++ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. ++ */ ++ void (*power_runtime_term_callback)(struct kbase_device *kbdev); ++ ++ /** Callback for runtime power-off power management callback ++ * ++ * For linux this callback will be called by the kernel runtime_suspend callback. ++ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. ++ * ++ * @return 0 on success, else OS error code. ++ */ ++ void (*power_runtime_off_callback)(struct kbase_device *kbdev); ++ ++ /** Callback for runtime power-on power management callback ++ * ++ * For linux this callback will be called by the kernel runtime_resume callback. ++ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. ++ */ ++ int (*power_runtime_on_callback)(struct kbase_device *kbdev); ++ ++ /* ++ * Optional callback for checking if GPU can be suspended when idle ++ * ++ * This callback will be called by the runtime power management core ++ * when the reference count goes to 0 to provide notification that the ++ * GPU now seems idle. ++ * ++ * If this callback finds that the GPU can't be powered off, or handles ++ * suspend by powering off directly or queueing up a power off, a ++ * non-zero value must be returned to prevent the runtime PM core from ++ * also triggering a suspend. ++ * ++ * Returning 0 will cause the runtime PM core to conduct a regular ++ * autosuspend. ++ * ++ * This callback is optional and if not provided regular autosuspend ++ * will be triggered. ++ * ++ * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use ++ * this feature. ++ * ++ * Return 0 if GPU can be suspended, positive value if it can not be ++ * suspeneded by runtime PM, else OS error code ++ */ ++ int (*power_runtime_idle_callback)(struct kbase_device *kbdev); ++}; ++ ++/** ++ * kbase_cpuprops_get_default_clock_speed - default for CPU_SPEED_FUNC ++ * @clock_speed - see kbase_cpu_clk_speed_func for details on the parameters + * -+ * @kbdev: The device for which the context scheduler needs to be -+ * terminated ++ * Returns 0 on success, negative error code otherwise. + * -+ * This must be called during device termination after all contexts have been -+ * destroyed. ++ * Default implementation of CPU_SPEED_FUNC. This function sets clock_speed ++ * to 100, so will be an underestimate for any real system. + */ -+void kbase_ctx_sched_term(struct kbase_device *kbdev); ++int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed); + -+/* kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context ++/** ++ * kbase_cpu_clk_speed_func - Type of the function pointer for CPU_SPEED_FUNC ++ * @param clock_speed - pointer to store the current CPU clock speed in MHz + * -+ * @kctx: The context to which to retain a reference ++ * Returns 0 on success, otherwise negative error code. + * -+ * Return: The address space that the context has been assigned to or -+ * KBASEP_AS_NR_INVALID if no address space was available. ++ * This is mainly used to implement OpenCL's clGetDeviceInfo(). ++ */ ++typedef int (*kbase_cpu_clk_speed_func) (u32 *clock_speed); ++ ++/** ++ * kbase_gpu_clk_speed_func - Type of the function pointer for GPU_SPEED_FUNC ++ * @param clock_speed - pointer to store the current GPU clock speed in MHz + * -+ * This function should be called whenever an address space should be assigned -+ * to a context and programmed onto the MMU. It should typically be called -+ * when jobs are ready to be submitted to the GPU. ++ * Returns 0 on success, otherwise negative error code. ++ * When an error is returned the caller assumes maximum GPU speed stored in ++ * gpu_freq_khz_max. + * -+ * It can be called as many times as necessary. The address space will be -+ * assigned to the context for as long as there is a reference to said context. ++ * If the system timer is not available then this function is required ++ * for the OpenCL queue profiling to return correct timing information. + * -+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be -+ * held whilst calling this function. + */ -+int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx); ++typedef int (*kbase_gpu_clk_speed_func) (u32 *clock_speed); + -+/* kbase_ctx_sched_retain_ctx_refcount -+ * -+ * @kctx: The context to which to retain a reference ++#ifdef CONFIG_OF ++struct kbase_platform_config { ++}; ++#else ++ ++/* ++ * @brief Specifies start and end of I/O memory region. ++ */ ++struct kbase_io_memory_region { ++ u64 start; ++ u64 end; ++}; ++ ++/* ++ * @brief Specifies I/O related resources like IRQs and memory region for I/O operations. ++ */ ++struct kbase_io_resources { ++ u32 job_irq_number; ++ u32 mmu_irq_number; ++ u32 gpu_irq_number; ++ struct kbase_io_memory_region io_memory_region; ++}; ++ ++struct kbase_platform_config { ++ const struct kbase_io_resources *io_resources; ++}; ++ ++#endif /* CONFIG_OF */ ++ ++/** ++ * @brief Gets the pointer to platform config. + * -+ * This function only retains a reference to the context. It must be called -+ * only when the context already has a reference. ++ * @return Pointer to the platform config ++ */ ++struct kbase_platform_config *kbase_get_platform_config(void); ++ ++/** ++ * kbasep_platform_device_init: - Platform specific call to initialize hardware ++ * @kbdev: kbase device pointer + * -+ * This is typically called inside an atomic session where we know the context -+ * is already scheduled in but want to take an extra reference to ensure that -+ * it doesn't get descheduled. ++ * Function calls a platform defined routine if specified in the configuration ++ * attributes. The routine can initialize any hardware and context state that ++ * is required for the GPU block to function. + * -+ * The kbase_device::hwaccess_lock must be held whilst calling this function -+ * @return -+ * è‹¥æˆåŠŸ, 返回 0; -+ * è‹¥ *kctx 状æ€å¼‚常, 返回 -1. ++ * Return: 0 if no errors have been found in the config. ++ * Negative error code otherwise. + */ -+int kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx); ++int kbasep_platform_device_init(struct kbase_device *kbdev); + -+/* kbase_ctx_sched_release_ctx - Release a reference to the @ref kbase_context -+ * -+ * @kctx: The context from which to release a reference ++/** ++ * kbasep_platform_device_term - Platform specific call to terminate hardware ++ * @kbdev: Kbase device pointer + * -+ * This function should be called whenever an address space could be unassigned -+ * from a context. When there are no more references to said context, the -+ * address space previously assigned to this context shall be reassigned to -+ * other contexts as needed. ++ * Function calls a platform defined routine if specified in the configuration ++ * attributes. The routine can destroy any platform specific context state and ++ * shut down any hardware functionality that are outside of the Power Management ++ * callbacks. + * -+ * The kbase_device::hwaccess_lock must be held whilst calling this function + */ -+void kbase_ctx_sched_release_ctx(struct kbase_context *kctx); ++void kbasep_platform_device_term(struct kbase_device *kbdev); + -+/* kbase_ctx_sched_remove_ctx - Unassign previously assigned address space -+ * -+ * @kctx: The context to be removed ++ ++/** ++ * kbase_platform_early_init - Early initialisation of the platform code + * -+ * This function should be called when a context is being destroyed. The -+ * context must no longer have any reference. If it has been assigned an -+ * address space before then the AS will be unprogrammed. ++ * This function will be called when the module is loaded to perform any ++ * early initialisation required by the platform code. Such as reading ++ * platform specific device tree entries for the GPU. + * -+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be -+ * held whilst calling this function. ++ * Return: 0 for success, any other fail causes module initialisation to fail + */ -+void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx); ++int kbase_platform_early_init(void); + -+/* kbase_ctx_sched_restore_all_as - Reprogram all address spaces ++#ifndef CONFIG_OF ++#ifdef CONFIG_MALI_PLATFORM_FAKE ++/** ++ * kbase_platform_fake_register - Register a platform device for the GPU + * -+ * @kbdev: The device for which address spaces to be reprogrammed ++ * This can be used to register a platform device on systems where device tree ++ * is not enabled and the platform initialisation code in the kernel doesn't ++ * create the GPU device. Where possible device tree should be used instead. + * -+ * This function shall reprogram all address spaces previously assigned to -+ * contexts. It can be used after the GPU is reset. ++ * Return: 0 for success, any other fail causes module initialisation to fail ++ */ ++int kbase_platform_fake_register(void); ++ ++/** ++ * kbase_platform_fake_unregister - Unregister a fake platform device + * -+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be -+ * held whilst calling this function. ++ * Unregister the platform device created with kbase_platform_fake_register() + */ -+void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev); ++void kbase_platform_fake_unregister(void); ++#endif ++#endif + -+#endif /* _KBASE_CTX_SCHED_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug.c b/drivers/gpu/arm/midgard/mali_kbase_debug.c ++ /** @} *//* end group kbase_config */ ++ /** @} *//* end group base_kbase_api */ ++ /** @} *//* end group base_api */ ++ ++#endif /* _KBASE_CONFIG_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h new file mode 100644 -index 000000000..fb57ac2e3 +index 000000000..1cf44b350 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_debug.c -@@ -0,0 +1,39 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h +@@ -0,0 +1,227 @@ +/* + * -+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -359666,207 +362745,225 @@ index 000000000..fb57ac2e3 + + + ++/** ++ * @file mali_kbase_config_defaults.h ++ * ++ * Default values for configuration settings ++ * ++ */ + ++#ifndef _KBASE_CONFIG_DEFAULTS_H_ ++#define _KBASE_CONFIG_DEFAULTS_H_ + -+#include ++/* Include mandatory definitions per platform */ ++#include + -+static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = { -+ NULL, -+ NULL -+}; ++/** ++* Boolean indicating whether the driver is configured to be secure at ++* a potential loss of performance. ++* ++* This currently affects only r0p0-15dev0 HW and earlier. ++* ++* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and ++* performance: ++* ++* - When this is set to true, the driver remains fully secure, ++* but potentially loses performance compared with setting this to ++* false. ++* - When set to false, the driver is open to certain security ++* attacks. ++* ++* From r0p0-00rel0 and onwards, there is no security loss by setting ++* this to false, and no performance loss by setting it to ++* true. ++*/ ++#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false + -+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param) -+{ -+ kbasep_debug_assert_registered_cb.func = func; -+ kbasep_debug_assert_registered_cb.param = param; -+} ++enum { ++ /** ++ * Use unrestricted Address ID width on the AXI bus. ++ */ ++ KBASE_AID_32 = 0x0, + -+void kbasep_debug_assert_call_hook(void) -+{ -+ if (kbasep_debug_assert_registered_cb.func != NULL) -+ kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param); -+} -+KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook); ++ /** ++ * Restrict GPU to a half of maximum Address ID count. ++ * This will reduce performance, but reduce bus load due to GPU. ++ */ ++ KBASE_AID_16 = 0x3, + -diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug.h b/drivers/gpu/arm/midgard/mali_kbase_debug.h -new file mode 100644 -index 000000000..5fff2892b ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_debug.h -@@ -0,0 +1,164 @@ -+/* -+ * -+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ /** ++ * Restrict GPU to a quarter of maximum Address ID count. ++ * This will reduce performance, but reduce bus load due to GPU. ++ */ ++ KBASE_AID_8 = 0x2, ++ ++ /** ++ * Restrict GPU to an eighth of maximum Address ID count. ++ * This will reduce performance, but reduce bus load due to GPU. ++ */ ++ KBASE_AID_4 = 0x1 ++}; ++ ++/** ++ * Default setting for read Address ID limiting on AXI bus. + * ++ * Attached value: u32 register value ++ * KBASE_AID_32 - use the full 32 IDs (5 ID bits) ++ * KBASE_AID_16 - use 16 IDs (4 ID bits) ++ * KBASE_AID_8 - use 8 IDs (3 ID bits) ++ * KBASE_AID_4 - use 4 IDs (2 ID bits) ++ * Default value: KBASE_AID_32 (no limit). Note hardware implementation ++ * may limit to a lower value. + */ ++#define DEFAULT_ARID_LIMIT KBASE_AID_32 + ++/** ++ * Default setting for write Address ID limiting on AXI. ++ * ++ * Attached value: u32 register value ++ * KBASE_AID_32 - use the full 32 IDs (5 ID bits) ++ * KBASE_AID_16 - use 16 IDs (4 ID bits) ++ * KBASE_AID_8 - use 8 IDs (3 ID bits) ++ * KBASE_AID_4 - use 4 IDs (2 ID bits) ++ * Default value: KBASE_AID_32 (no limit). Note hardware implementation ++ * may limit to a lower value. ++ */ ++#define DEFAULT_AWID_LIMIT KBASE_AID_32 + ++/** ++ * Default UMP device mapping. A UMP_DEVICE__SHIFT value which ++ * defines which UMP device this GPU should be mapped to. ++ */ ++#define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT + ++/* ++ * Default period for DVFS sampling ++ */ ++// #define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */ ++#define DEFAULT_PM_DVFS_PERIOD 20 /* 20 ms */ + ++/* ++ * Power Management poweroff tick granuality. This is in nanoseconds to ++ * allow HR timer support. ++ * ++ * On each scheduling tick, the power manager core may decide to: ++ * -# Power off one or more shader cores ++ * -# Power off the entire GPU ++ */ ++#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */ + -+#ifndef _KBASE_DEBUG_H -+#define _KBASE_DEBUG_H -+ -+#include ++/* ++ * Power Manager number of ticks before shader cores are powered off ++ */ ++#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */ + -+/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */ -+#define KBASE_DEBUG_SKIP_TRACE 0 ++/* ++ * Power Manager number of ticks before GPU is powered off ++ */ ++#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */ + -+/** @brief If different from 0, the trace will only contain the file and line. */ -+#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0 ++/* ++ * Default scheduling tick granuality ++ */ ++#define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */ + -+/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */ -+#ifndef KBASE_DEBUG_DISABLE_ASSERTS -+#ifdef CONFIG_MALI_DEBUG -+#define KBASE_DEBUG_DISABLE_ASSERTS 0 -+#else -+#define KBASE_DEBUG_DISABLE_ASSERTS 1 -+#endif -+#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ ++/* ++ * Default minimum number of scheduling ticks before jobs are soft-stopped. ++ * ++ * This defines the time-slice for a job (which may be different from that of a ++ * context) ++ */ ++#define DEFAULT_JS_SOFT_STOP_TICKS (1) /* 100ms-200ms */ + -+/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */ -+typedef void (kbase_debug_assert_hook) (void *); ++/* ++ * Default minimum number of scheduling ticks before CL jobs are soft-stopped. ++ */ ++#define DEFAULT_JS_SOFT_STOP_TICKS_CL (1) /* 100ms-200ms */ + -+struct kbasep_debug_assert_cb { -+ kbase_debug_assert_hook *func; -+ void *param; -+}; ++/* ++ * Default minimum number of scheduling ticks before jobs are hard-stopped ++ */ ++#define DEFAULT_JS_HARD_STOP_TICKS_SS (50) /* 5s */ ++#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408 (300) /* 30s */ + -+/** -+ * @def KBASEP_DEBUG_PRINT_TRACE -+ * @brief Private macro containing the format of the trace to display before every message -+ * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME ++/* ++ * Default minimum number of scheduling ticks before CL jobs are hard-stopped. + */ -+#if !KBASE_DEBUG_SKIP_TRACE -+#define KBASEP_DEBUG_PRINT_TRACE \ -+ "In file: " __FILE__ " line: " CSTD_STR2(__LINE__) -+#if !KBASE_DEBUG_SKIP_FUNCTION_NAME -+#define KBASEP_DEBUG_PRINT_FUNCTION __func__ -+#else -+#define KBASEP_DEBUG_PRINT_FUNCTION "" -+#endif -+#else -+#define KBASEP_DEBUG_PRINT_TRACE "" -+#endif ++#define DEFAULT_JS_HARD_STOP_TICKS_CL (50) /* 5s */ + -+/** -+ * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) -+ * @brief (Private) system printing function associated to the @see KBASE_DEBUG_ASSERT_MSG event. -+ * @param trace location in the code from where the message is printed -+ * @param function function from where the message is printed -+ * @param ... Format string followed by format arguments. -+ * @note function parameter cannot be concatenated with other strings ++/* ++ * Default minimum number of scheduling ticks before jobs are hard-stopped ++ * during dumping + */ -+/* Select the correct system output function*/ -+#ifdef CONFIG_MALI_DEBUG -+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\ -+ do { \ -+ pr_err("Mali: %s function:%s ", trace, function);\ -+ pr_err(__VA_ARGS__);\ -+ pr_err("\n");\ -+ } while (false) -+#else -+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP() -+#endif ++#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */ + -+#ifdef CONFIG_MALI_DEBUG -+#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook() -+#else -+#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP() -+#endif ++/* ++ * Default timeout for some software jobs, after which the software event wait ++ * jobs will be cancelled. ++ */ ++#define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */ + -+/** -+ * @def KBASE_DEBUG_ASSERT(expr) -+ * @brief Calls @see KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false -+ * -+ * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1 -+ * -+ * @param expr Boolean expression ++/* ++ * Default minimum number of scheduling ticks before the GPU is reset to clear a ++ * "stuck" job + */ -+#define KBASE_DEBUG_ASSERT(expr) \ -+ KBASE_DEBUG_ASSERT_MSG(expr, #expr) ++#define DEFAULT_JS_RESET_TICKS_SS (55) /* 5.5s */ ++#define DEFAULT_JS_RESET_TICKS_SS_8408 (450) /* 45s */ + -+#if KBASE_DEBUG_DISABLE_ASSERTS -+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP() -+#else -+ /** -+ * @def KBASE_DEBUG_ASSERT_MSG(expr, ...) -+ * @brief Calls @see KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false -+ * -+ * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1 -+ * -+ * @param expr Boolean expression -+ * @param ... Message to display when @a expr is false, as a format string followed by format arguments. -+ */ -+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \ -+ do { \ -+ if (!(expr)) { \ -+ KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\ -+ KBASE_CALL_ASSERT_HOOK();\ -+ BUG();\ -+ } \ -+ } while (false) -+#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ ++/* ++ * Default minimum number of scheduling ticks before the GPU is reset to clear a ++ * "stuck" CL job. ++ */ ++#define DEFAULT_JS_RESET_TICKS_CL (55) /* 5.5s */ + -+/** -+ * @def KBASE_DEBUG_CODE( X ) -+ * @brief Executes the code inside the macro only in debug mode -+ * -+ * @param X Code to compile only in debug mode. ++/* ++ * Default minimum number of scheduling ticks before the GPU is reset to clear a ++ * "stuck" job during dumping. + */ -+#ifdef CONFIG_MALI_DEBUG -+#define KBASE_DEBUG_CODE(X) X -+#else -+#define KBASE_DEBUG_CODE(X) CSTD_NOP() -+#endif /* CONFIG_MALI_DEBUG */ ++#define DEFAULT_JS_RESET_TICKS_DUMPING (15020) /* 1502s */ + -+/** @} */ ++/* ++ * Default number of milliseconds given for other jobs on the GPU to be ++ * soft-stopped when the GPU needs to be reset. ++ */ ++#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */ + -+/** -+ * @brief Register a function to call on ASSERT -+ * -+ * Such functions will \b only be called during Debug mode, and for debugging -+ * features \b only. Do not rely on them to be called in general use. -+ * -+ * To disable the hook, supply NULL to \a func. ++/* ++ * Default timeslice that a context is scheduled in for, in nanoseconds. + * -+ * @note This function is not thread-safe, and should only be used to -+ * register/deregister once in the module's lifetime. ++ * When a context has used up this amount of time across its jobs, it is ++ * scheduled out to let another run. + * -+ * @param[in] func the function to call when an assert is triggered. -+ * @param[in] param the parameter to pass to \a func when calling it ++ * @note the resolution is nanoseconds (ns) here, because that's the format ++ * often used by the OS. + */ -+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param); ++#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */ + -+/** -+ * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook() ++/* ++ * Perform GPU power down using only platform specific code, skipping DDK power ++ * management. + * -+ * @note This function is not thread-safe with respect to multiple threads -+ * registering functions and parameters with -+ * kbase_debug_assert_register_hook(). Otherwise, thread safety is the -+ * responsibility of the registered hook. ++ * If this is non-zero then kbase will avoid powering down shader cores, the ++ * tiler, and the L2 cache, instead just powering down the entire GPU through ++ * platform specific code. This may be required for certain platform ++ * integrations. ++ * ++ * Note that as this prevents kbase from powering down shader cores, this limits ++ * the available power policies to coarse_demand and always_on. + */ -+void kbasep_debug_assert_call_hook(void); ++#define PLATFORM_POWER_DOWN_ONLY (1) + -+#endif /* _KBASE_DEBUG_H */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c ++#endif /* _KBASE_CONFIG_DEFAULTS_H_ */ ++ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c new file mode 100644 -index 000000000..f29430ddf +index 000000000..6338a7e22 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c -@@ -0,0 +1,499 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_context.c +@@ -0,0 +1,342 @@ +/* + * -+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -359881,497 +362978,340 @@ index 000000000..f29430ddf + + + -+#include -+#include -+#include -+ -+#ifdef CONFIG_DEBUG_FS + -+static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev) -+{ -+ struct list_head *event_list = &kbdev->job_fault_event_list; -+ unsigned long flags; -+ bool ret; + -+ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); -+ ret = !list_empty(event_list); -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); ++/* ++ * Base kernel context APIs ++ */ + -+ return ret; -+} ++#include ++#include ++#include ++#include ++#include + -+static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx) ++/** ++ * kbase_create_context() - Create a kernel base context. ++ * @kbdev: Kbase device ++ * @is_compat: Force creation of a 32-bit context ++ * ++ * Allocate and init a kernel base context. ++ * ++ * Return: new kbase context ++ */ ++struct kbase_context * ++kbase_create_context(struct kbase_device *kbdev, bool is_compat) +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct list_head *event_list = &kctx->kbdev->job_fault_event_list; -+ struct base_job_fault_event *event; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); -+ if (list_empty(event_list)) { -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); -+ return true; -+ } -+ list_for_each_entry(event, event_list, head) { -+ if (event->katom->kctx == kctx) { -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, -+ flags); -+ return false; -+ } -+ } -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); -+ return true; -+} ++ struct kbase_context *kctx; ++ int err; + -+/* wait until the fault happen and copy the event */ -+static int kbase_job_fault_event_wait(struct kbase_device *kbdev, -+ struct base_job_fault_event *event) -+{ -+ struct list_head *event_list = &kbdev->job_fault_event_list; -+ struct base_job_fault_event *event_in; -+ unsigned long flags; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); -+ if (list_empty(event_list)) { -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); -+ if (wait_event_interruptible(kbdev->job_fault_wq, -+ kbase_is_job_fault_event_pending(kbdev))) -+ return -ERESTARTSYS; -+ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); -+ } ++ /* zero-inited as lot of code assume it's zero'ed out on create */ ++ kctx = vzalloc(sizeof(*kctx)); + -+ event_in = list_entry(event_list->next, -+ struct base_job_fault_event, head); -+ event->event_code = event_in->event_code; -+ event->katom = event_in->katom; ++ if (!kctx) ++ goto out; + -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); ++ /* creating a context is considered a disjoint event */ ++ kbase_disjoint_event(kbdev); + -+ return 0; ++ kctx->kbdev = kbdev; ++ kctx->as_nr = KBASEP_AS_NR_INVALID; ++ atomic_set(&kctx->refcount, 0); ++ if (is_compat) ++ kbase_ctx_flag_set(kctx, KCTX_COMPAT); ++#ifdef CONFIG_MALI_TRACE_TIMELINE ++ kctx->timeline.owner_tgid = task_tgid_nr(current); ++#endif ++ atomic_set(&kctx->setup_complete, 0); ++ atomic_set(&kctx->setup_in_progress, 0); ++ spin_lock_init(&kctx->mm_update_lock); ++ kctx->process_mm = NULL; ++ atomic_set(&kctx->nonmapped_pages, 0); ++ kctx->slots_pullable = 0; ++ kctx->tgid = current->tgid; ++ kctx->pid = current->pid; + -+} ++ err = kbase_mem_pool_init(&kctx->mem_pool, ++ kbdev->mem_pool_max_size_default, ++ kctx->kbdev, &kbdev->mem_pool); ++ if (err) ++ goto free_kctx; + -+/* remove the event from the queue */ -+static struct base_job_fault_event *kbase_job_fault_event_dequeue( -+ struct kbase_device *kbdev, struct list_head *event_list) -+{ -+ struct base_job_fault_event *event; ++ err = kbase_mem_evictable_init(kctx); ++ if (err) ++ goto free_pool; + -+ event = list_entry(event_list->next, -+ struct base_job_fault_event, head); -+ list_del(event_list->next); ++ atomic_set(&kctx->used_pages, 0); + -+ return event; ++ err = kbase_jd_init(kctx); ++ if (err) ++ goto deinit_evictable; + -+} ++ err = kbasep_js_kctx_init(kctx); ++ if (err) ++ goto free_jd; /* safe to call kbasep_js_kctx_term in this case */ + -+/* Remove all the following atoms after the failed atom in the same context -+ * Call the postponed bottom half of job done. -+ * Then, this context could be rescheduled. -+ */ -+static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx) -+{ -+ struct list_head *event_list = &kctx->job_fault_resume_event_list; ++ err = kbase_event_init(kctx); ++ if (err) ++ goto free_jd; + -+ while (!list_empty(event_list)) { -+ struct base_job_fault_event *event; ++ atomic_set(&kctx->drain_pending, 0); + -+ event = kbase_job_fault_event_dequeue(kctx->kbdev, -+ &kctx->job_fault_resume_event_list); -+ kbase_jd_done_worker(&event->katom->work); -+ } ++ mutex_init(&kctx->reg_lock); + -+} ++ INIT_LIST_HEAD(&kctx->waiting_soft_jobs); ++ spin_lock_init(&kctx->waiting_soft_jobs_lock); ++#ifdef CONFIG_KDS ++ INIT_LIST_HEAD(&kctx->waiting_kds_resource); ++#endif ++ err = kbase_dma_fence_init(kctx); ++ if (err) ++ goto free_event; + -+/* Remove all the failed atoms that belong to different contexts -+ * Resume all the contexts that were suspend due to failed job -+ */ -+static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev) -+{ -+ struct list_head *event_list = &kbdev->job_fault_event_list; -+ unsigned long flags; ++ err = kbase_mmu_init(kctx); ++ if (err) ++ goto term_dma_fence; + -+ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); -+ while (!list_empty(event_list)) { -+ kbase_job_fault_event_dequeue(kbdev, event_list); -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); -+ wake_up(&kbdev->job_fault_resume_wq); -+ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); -+ } -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); -+} ++ do { ++ err = kbase_mem_pool_grow(&kctx->mem_pool, ++ MIDGARD_MMU_BOTTOMLEVEL); ++ if (err) ++ goto pgd_no_mem; + -+static void kbase_job_fault_resume_worker(struct work_struct *data) -+{ -+ struct base_job_fault_event *event = container_of(data, -+ struct base_job_fault_event, job_fault_work); -+ struct kbase_context *kctx; -+ struct kbase_jd_atom *katom; ++ mutex_lock(&kctx->mmu_lock); ++ kctx->pgd = kbase_mmu_alloc_pgd(kctx); ++ mutex_unlock(&kctx->mmu_lock); ++ } while (!kctx->pgd); + -+ katom = event->katom; -+ kctx = katom->kctx; ++ kctx->aliasing_sink_page = kbase_mem_alloc_page(kctx->kbdev); ++ if (!kctx->aliasing_sink_page) ++ goto no_sink_page; + -+ dev_info(kctx->kbdev->dev, "Job dumping wait\n"); ++ init_waitqueue_head(&kctx->event_queue); + -+ /* When it was waked up, it need to check if queue is empty or the -+ * failed atom belongs to different context. If yes, wake up. Both -+ * of them mean the failed job has been dumped. Please note, it -+ * should never happen that the job_fault_event_list has the two -+ * atoms belong to the same context. -+ */ -+ wait_event(kctx->kbdev->job_fault_resume_wq, -+ kbase_ctx_has_no_event_pending(kctx)); ++ kctx->cookies = KBASE_COOKIE_MASK; + -+ atomic_set(&kctx->job_fault_count, 0); -+ kbase_jd_done_worker(&katom->work); ++ /* Make sure page 0 is not used... */ ++ err = kbase_region_tracker_init(kctx); ++ if (err) ++ goto no_region_tracker; + -+ /* In case the following atoms were scheduled during failed job dump -+ * the job_done_worker was held. We need to rerun it after the dump -+ * was finished -+ */ -+ kbase_job_fault_resume_event_cleanup(kctx); ++ err = kbase_sticky_resource_init(kctx); ++ if (err) ++ goto no_sticky; + -+ dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n"); -+} ++ err = kbase_jit_init(kctx); ++ if (err) ++ goto no_jit; ++#ifdef CONFIG_GPU_TRACEPOINTS ++ atomic_set(&kctx->jctx.work_id, 0); ++#endif ++#ifdef CONFIG_MALI_TRACE_TIMELINE ++ atomic_set(&kctx->timeline.jd_atoms_in_flight, 0); ++#endif + -+static struct base_job_fault_event *kbase_job_fault_event_queue( -+ struct list_head *event_list, -+ struct kbase_jd_atom *atom, -+ u32 completion_code) -+{ -+ struct base_job_fault_event *event; ++ kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1; + -+ event = &atom->fault_event; ++ mutex_init(&kctx->vinstr_cli_lock); + -+ event->katom = atom; -+ event->event_code = completion_code; ++ timer_setup(&kctx->soft_job_timeout, ++ kbasep_soft_job_timeout_worker, ++ 0); + -+ list_add_tail(&event->head, event_list); ++ return kctx; + -+ return event; ++no_jit: ++ kbase_gpu_vm_lock(kctx); ++ kbase_sticky_resource_term(kctx); ++ kbase_gpu_vm_unlock(kctx); ++no_sticky: ++ kbase_region_tracker_term(kctx); ++no_region_tracker: ++ kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false); ++no_sink_page: ++ /* VM lock needed for the call to kbase_mmu_free_pgd */ ++ kbase_gpu_vm_lock(kctx); ++ kbase_mmu_free_pgd(kctx); ++ kbase_gpu_vm_unlock(kctx); ++pgd_no_mem: ++ kbase_mmu_term(kctx); ++term_dma_fence: ++ kbase_dma_fence_term(kctx); ++free_event: ++ kbase_event_cleanup(kctx); ++free_jd: ++ /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ ++ kbasep_js_kctx_term(kctx); ++ kbase_jd_exit(kctx); ++deinit_evictable: ++ kbase_mem_evictable_deinit(kctx); ++free_pool: ++ kbase_mem_pool_term(&kctx->mem_pool); ++free_kctx: ++ vfree(kctx); ++out: ++ return NULL; ++} ++KBASE_EXPORT_SYMBOL(kbase_create_context); + ++static void kbase_reg_pending_dtor(struct kbase_va_region *reg) ++{ ++ dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n"); ++ kbase_mem_phy_alloc_put(reg->cpu_alloc); ++ kbase_mem_phy_alloc_put(reg->gpu_alloc); ++ kfree(reg); +} + -+static void kbase_job_fault_event_post(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom, u32 completion_code) ++/** ++ * kbase_destroy_context - Destroy a kernel base context. ++ * @kctx: Context to destroy ++ * ++ * Calls kbase_destroy_os_context() to free OS specific structures. ++ * Will release all outstanding regions. ++ */ ++void kbase_destroy_context(struct kbase_context *kctx) +{ -+ struct base_job_fault_event *event; ++ struct kbase_device *kbdev; ++ int pages; ++ unsigned long pending_regions_to_clean; + unsigned long flags; + -+ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); -+ event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list, -+ katom, completion_code); -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); ++ KBASE_DEBUG_ASSERT(NULL != kctx); + -+ wake_up_interruptible(&kbdev->job_fault_wq); ++ kbdev = kctx->kbdev; ++ KBASE_DEBUG_ASSERT(NULL != kbdev); + -+ INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker); -+ queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work); ++ KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u); + -+ dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d", -+ katom->kctx->tgid, katom->kctx->id); ++ /* Ensure the core is powered up for the destroy process */ ++ /* A suspend won't happen here, because we're in a syscall from a userspace ++ * thread. */ ++ kbase_pm_context_active(kbdev); + -+} ++ kbase_jd_zap_context(kctx); + -+/* -+ * This function will process the job fault -+ * Get the register copy -+ * Send the failed job dump event -+ * Create a Wait queue to wait until the job dump finish -+ */ ++#ifdef CONFIG_DEBUG_FS ++ /* Removing the rest of the debugfs entries here as we want to keep the ++ * atom debugfs interface alive until all atoms have completed. This ++ * is useful for debugging hung contexts. */ ++ debugfs_remove_recursive(kctx->kctx_dentry); ++#endif + -+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, -+ u32 completion_code) -+{ -+ struct kbase_context *kctx = katom->kctx; ++ kbase_event_cleanup(kctx); + -+ /* Check if dumping is in the process -+ * only one atom of each context can be dumped at the same time -+ * If the atom belongs to different context, it can be dumped ++ /* ++ * JIT must be terminated before the code below as it must be called ++ * without the region lock being held. ++ * The code above ensures no new JIT allocations can be made by ++ * by the time we get to this point of context tear down. + */ -+ if (atomic_read(&kctx->job_fault_count) > 0) { -+ kbase_job_fault_event_queue( -+ &kctx->job_fault_resume_event_list, -+ katom, completion_code); -+ dev_info(kctx->kbdev->dev, "queue:%d\n", -+ kbase_jd_atom_id(kctx, katom)); -+ return true; -+ } ++ kbase_jit_term(kctx); + -+ if (kctx->kbdev->job_fault_debug == true) { ++ kbase_gpu_vm_lock(kctx); + -+ if (completion_code != BASE_JD_EVENT_DONE) { ++ kbase_sticky_resource_term(kctx); + -+ if (kbase_job_fault_get_reg_snapshot(kctx) == false) { -+ dev_warn(kctx->kbdev->dev, "get reg dump failed\n"); -+ return false; -+ } ++ /* MMU is disabled as part of scheduling out the context */ ++ kbase_mmu_free_pgd(kctx); + -+ kbase_job_fault_event_post(kctx->kbdev, katom, -+ completion_code); -+ atomic_inc(&kctx->job_fault_count); -+ dev_info(kctx->kbdev->dev, "post:%d\n", -+ kbase_jd_atom_id(kctx, katom)); -+ return true; ++ /* drop the aliasing sink page now that it can't be mapped anymore */ ++ kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false); + -+ } -+ } -+ return false; -+ -+} -+ -+static int debug_job_fault_show(struct seq_file *m, void *v) -+{ -+ struct kbase_device *kbdev = m->private; -+ struct base_job_fault_event *event = (struct base_job_fault_event *)v; -+ struct kbase_context *kctx = event->katom->kctx; -+ int i; -+ -+ dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d", -+ kctx->tgid, kctx->id, event->reg_offset); -+ -+ if (kctx->reg_dump == NULL) { -+ dev_warn(kbdev->dev, "reg dump is NULL"); -+ return -1; -+ } -+ -+ if (kctx->reg_dump[event->reg_offset] == -+ REGISTER_DUMP_TERMINATION_FLAG) { -+ /* Return the error here to stop the read. And the -+ * following next() will not be called. The stop can -+ * get the real event resource and release it -+ */ -+ return -1; -+ } -+ -+ if (event->reg_offset == 0) -+ seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id); -+ -+ for (i = 0; i < 50; i++) { -+ if (kctx->reg_dump[event->reg_offset] == -+ REGISTER_DUMP_TERMINATION_FLAG) { -+ break; -+ } -+ seq_printf(m, "%08x: %08x\n", -+ kctx->reg_dump[event->reg_offset], -+ kctx->reg_dump[1+event->reg_offset]); -+ event->reg_offset += 2; -+ -+ } -+ -+ -+ return 0; -+} -+static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos) -+{ -+ struct kbase_device *kbdev = m->private; -+ struct base_job_fault_event *event = (struct base_job_fault_event *)v; -+ -+ dev_info(kbdev->dev, "debug job fault seq next:%d, %d", -+ event->reg_offset, (int)*pos); -+ -+ return event; -+} -+ -+static void *debug_job_fault_start(struct seq_file *m, loff_t *pos) -+{ -+ struct kbase_device *kbdev = m->private; -+ struct base_job_fault_event *event; -+ -+ dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos); -+ -+ /* The condition is trick here. It needs make sure the -+ * fault hasn't happened and the dumping hasn't been started, -+ * or the dumping has finished -+ */ -+ if (*pos == 0) { -+ event = kmalloc(sizeof(*event), GFP_KERNEL); -+ if (!event) -+ return NULL; -+ event->reg_offset = 0; -+ if (kbase_job_fault_event_wait(kbdev, event)) { -+ kfree(event); -+ return NULL; -+ } -+ -+ /* The cache flush workaround is called in bottom half of -+ * job done but we delayed it. Now we should clean cache -+ * earlier. Then the GPU memory dump should be correct. -+ */ -+ kbase_backend_cacheclean(kbdev, event->katom); -+ } else -+ return NULL; -+ -+ return event; -+} -+ -+static void debug_job_fault_stop(struct seq_file *m, void *v) -+{ -+ struct kbase_device *kbdev = m->private; -+ -+ /* here we wake up the kbase_jd_done_worker after stop, it needs -+ * get the memory dump before the register dump in debug daemon, -+ * otherwise, the memory dump may be incorrect. -+ */ ++ /* free pending region setups */ ++ pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK; ++ while (pending_regions_to_clean) { ++ unsigned int cookie = __ffs(pending_regions_to_clean); + -+ if (v != NULL) { -+ kfree(v); -+ dev_info(kbdev->dev, "debug job fault seq stop stage 1"); ++ BUG_ON(!kctx->pending_regions[cookie]); + -+ } else { -+ unsigned long flags; ++ kbase_reg_pending_dtor(kctx->pending_regions[cookie]); + -+ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); -+ if (!list_empty(&kbdev->job_fault_event_list)) { -+ kbase_job_fault_event_dequeue(kbdev, -+ &kbdev->job_fault_event_list); -+ wake_up(&kbdev->job_fault_resume_wq); -+ } -+ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); -+ dev_info(kbdev->dev, "debug job fault seq stop stage 2"); ++ kctx->pending_regions[cookie] = NULL; ++ pending_regions_to_clean &= ~(1UL << cookie); + } + -+} -+ -+static const struct seq_operations ops = { -+ .start = debug_job_fault_start, -+ .next = debug_job_fault_next, -+ .stop = debug_job_fault_stop, -+ .show = debug_job_fault_show, -+}; -+ -+static int debug_job_fault_open(struct inode *in, struct file *file) -+{ -+ struct kbase_device *kbdev = in->i_private; -+ -+ seq_open(file, &ops); -+ -+ ((struct seq_file *)file->private_data)->private = kbdev; -+ dev_info(kbdev->dev, "debug job fault seq open"); -+ -+ kbdev->job_fault_debug = true; -+ -+ return 0; -+ -+} -+ -+static int debug_job_fault_release(struct inode *in, struct file *file) -+{ -+ struct kbase_device *kbdev = in->i_private; -+ -+ seq_release(in, file); -+ -+ kbdev->job_fault_debug = false; -+ -+ /* Clean the unprocessed job fault. After that, all the suspended -+ * contexts could be rescheduled. -+ */ -+ kbase_job_fault_event_cleanup(kbdev); -+ -+ dev_info(kbdev->dev, "debug job fault seq close"); -+ -+ return 0; -+} ++ kbase_region_tracker_term(kctx); ++ kbase_gpu_vm_unlock(kctx); + -+static const struct file_operations kbasep_debug_job_fault_fops = { -+ .open = debug_job_fault_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = debug_job_fault_release, -+}; ++ /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ ++ kbasep_js_kctx_term(kctx); + -+/* -+ * Initialize debugfs entry for job fault dump -+ */ -+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev) -+{ -+ debugfs_create_file("job_fault", S_IRUGO, -+ kbdev->mali_debugfs_directory, kbdev, -+ &kbasep_debug_job_fault_fops); -+} ++ kbase_jd_exit(kctx); + ++ kbase_pm_context_idle(kbdev); + -+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) -+{ ++ kbase_dma_fence_term(kctx); + -+ INIT_LIST_HEAD(&kbdev->job_fault_event_list); ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); ++ kbase_ctx_sched_remove_ctx(kctx); ++ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); + -+ init_waitqueue_head(&(kbdev->job_fault_wq)); -+ init_waitqueue_head(&(kbdev->job_fault_resume_wq)); -+ spin_lock_init(&kbdev->job_fault_event_lock); ++ kbase_mmu_term(kctx); + -+ kbdev->job_fault_resume_workq = alloc_workqueue( -+ "kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1); -+ if (!kbdev->job_fault_resume_workq) -+ return -ENOMEM; ++ pages = atomic_read(&kctx->used_pages); ++ if (pages != 0) ++ dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); + -+ kbdev->job_fault_debug = false; ++ kbase_mem_evictable_deinit(kctx); ++ kbase_mem_pool_term(&kctx->mem_pool); ++ WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); + -+ return 0; ++ vfree(kctx); +} ++KBASE_EXPORT_SYMBOL(kbase_destroy_context); + -+/* -+ * Release the relevant resource per device ++/** ++ * kbase_context_set_create_flags - Set creation flags on a context ++ * @kctx: Kbase context ++ * @flags: Flags to set ++ * ++ * Return: 0 on success + */ -+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) ++int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags) +{ -+ destroy_workqueue(kbdev->job_fault_resume_workq); -+} -+ ++ int err = 0; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ unsigned long irq_flags; + -+/* -+ * Initialize the relevant data structure per context -+ */ -+void kbase_debug_job_fault_context_init(struct kbase_context *kctx) -+{ ++ KBASE_DEBUG_ASSERT(NULL != kctx); + -+ /* We need allocate double size register range -+ * Because this memory will keep the register address and value -+ */ -+ kctx->reg_dump = vmalloc(0x4000 * 2); -+ if (kctx->reg_dump == NULL) -+ return; ++ js_kctx_info = &kctx->jctx.sched_info; + -+ if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) { -+ vfree(kctx->reg_dump); -+ kctx->reg_dump = NULL; ++ /* Validate flags */ ++ if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) { ++ err = -EINVAL; ++ goto out; + } -+ INIT_LIST_HEAD(&kctx->job_fault_resume_event_list); -+ atomic_set(&kctx->job_fault_count, 0); -+ -+} -+ -+/* -+ * release the relevant resource per context -+ */ -+void kbase_debug_job_fault_context_term(struct kbase_context *kctx) -+{ -+ vfree(kctx->reg_dump); -+} + -+#else /* CONFIG_DEBUG_FS */ ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); + -+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) -+{ -+ kbdev->job_fault_debug = false; ++ /* Translate the flags */ ++ if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) ++ kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); + -+ return 0; -+} ++ /* Latch the initial attributes into the Job Scheduler */ ++ kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx); + -+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) -+{ ++ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ out: ++ return err; +} -+ -+#endif /* CONFIG_DEBUG_FS */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h ++KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags); +diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.h b/drivers/gpu/arm/midgard/mali_kbase_context.h new file mode 100644 -index 000000000..a2bf8983c +index 000000000..a3f5bb0ce --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h -@@ -0,0 +1,96 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_context.h +@@ -0,0 +1,90 @@ +/* + * -+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -360386,94 +363326,88 @@ index 000000000..a2bf8983c + + + -+#ifndef _KBASE_DEBUG_JOB_FAULT_H -+#define _KBASE_DEBUG_JOB_FAULT_H -+ -+#include -+#include -+ -+#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF -+ -+/** -+ * kbase_debug_job_fault_dev_init - Create the fault event wait queue -+ * per device and initialize the required lists. -+ * @kbdev: Device pointer -+ * -+ * Return: Zero on success or a negative error code. -+ */ -+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev); ++#ifndef _KBASE_CONTEXT_H_ ++#define _KBASE_CONTEXT_H_ + -+/** -+ * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs -+ * @kbdev: Device pointer -+ */ -+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev); ++#include + -+/** -+ * kbase_debug_job_fault_dev_term - Clean up resources created in -+ * kbase_debug_job_fault_dev_init. -+ * @kbdev: Device pointer -+ */ -+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev); + -+/** -+ * kbase_debug_job_fault_context_init - Initialize the relevant -+ * data structure per context -+ * @kctx: KBase context pointer -+ */ -+void kbase_debug_job_fault_context_init(struct kbase_context *kctx); ++int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags); + +/** -+ * kbase_debug_job_fault_context_term - Release the relevant -+ * resource per context -+ * @kctx: KBase context pointer ++ * kbase_ctx_flag - Check if @flag is set on @kctx ++ * @kctx: Pointer to kbase context to check ++ * @flag: Flag to check ++ * ++ * Return: true if @flag is set on @kctx, false if not. + */ -+void kbase_debug_job_fault_context_term(struct kbase_context *kctx); ++static inline bool kbase_ctx_flag(struct kbase_context *kctx, ++ enum kbase_context_flags flag) ++{ ++ return atomic_read(&kctx->flags) & flag; ++} + +/** -+ * kbase_debug_job_fault_process - Process the failed job. -+ * It will send a event and wake up the job fault waiting queue -+ * Then create a work queue to wait for job dump finish -+ * This function should be called in the interrupt handler and before -+ * jd_done that make sure the jd_done_worker will be delayed until the -+ * job dump finish -+ * @katom: The failed atom pointer -+ * @completion_code: the job status -+ * @return true if dump is going on ++ * kbase_ctx_flag_clear - Clear @flag on @kctx ++ * @kctx: Pointer to kbase context ++ * @flag: Flag to clear ++ * ++ * Clear the @flag on @kctx. This is done atomically, so other flags being ++ * cleared or set at the same time will be safe. ++ * ++ * Some flags have locking requirements, check the documentation for the ++ * respective flags. + */ -+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, -+ u32 completion_code); ++static inline void kbase_ctx_flag_clear(struct kbase_context *kctx, ++ enum kbase_context_flags flag) ++{ ++#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE ++ /* ++ * Earlier kernel versions doesn't have atomic_andnot() or ++ * atomic_and(). atomic_clear_mask() was only available on some ++ * architectures and removed on arm in v3.13 on arm and arm64. ++ * ++ * Use a compare-exchange loop to clear the flag on pre 4.3 kernels, ++ * when atomic_andnot() becomes available. ++ */ ++ int old, new; + ++ do { ++ old = atomic_read(&kctx->flags); ++ new = old & ~flag; + -+/** -+ * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers -+ * address during the job fault process, the relevant registers will -+ * be saved when a job fault happen -+ * @kctx: KBase context pointer -+ * @reg_range: Maximum register address space -+ * @return true if initializing successfully -+ */ -+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, -+ int reg_range); ++ } while (atomic_cmpxchg(&kctx->flags, old, new) != old); ++#else ++ atomic_andnot(flag, &kctx->flags); ++#endif ++} + +/** -+ * kbase_job_fault_get_reg_snapshot - Read the interested registers for -+ * failed job dump -+ * @kctx: KBase context pointer -+ * @return true if getting registers successfully ++ * kbase_ctx_flag_set - Set @flag on @kctx ++ * @kctx: Pointer to kbase context ++ * @flag: Flag to clear ++ * ++ * Set the @flag on @kctx. This is done atomically, so other flags being ++ * cleared or set at the same time will be safe. ++ * ++ * Some flags have locking requirements, check the documentation for the ++ * respective flags. + */ -+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx); -+ -+#endif /*_KBASE_DEBUG_JOB_FAULT_H*/ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c ++static inline void kbase_ctx_flag_set(struct kbase_context *kctx, ++ enum kbase_context_flags flag) ++{ ++ atomic_or(flag, &kctx->flags); ++} ++#endif /* _KBASE_CONTEXT_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c new file mode 100644 -index 000000000..6f2cbdf57 +index 000000000..738766f88 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c -@@ -0,0 +1,306 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c +@@ -0,0 +1,5023 @@ +/* + * -+ * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -360486,7791 +363420,5237 @@ index 000000000..6f2cbdf57 + * + */ + ++#define ENABLE_DEBUG_LOG ++#include "platform/rk/custom_log.h" + -+ -+/* -+ * Debugfs interface to dump the memory visible to the GPU -+ */ -+ ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_MALI_DEVFREQ ++#include ++#include ++#ifdef CONFIG_DEVFREQ_THERMAL ++#include ++#endif /* CONFIG_DEVFREQ_THERMAL */ ++#endif /* CONFIG_MALI_DEVFREQ */ ++#ifdef CONFIG_MALI_NO_MALI ++#include "mali_kbase_model_linux.h" ++#endif /* CONFIG_MALI_NO_MALI */ ++#include "mali_kbase_mem_profile_debugfs_buf_size.h" +#include "mali_kbase_debug_mem_view.h" -+#include "mali_kbase.h" ++#include "mali_kbase_mem.h" ++#include "mali_kbase_mem_pool_debugfs.h" ++#if !MALI_CUSTOMER_RELEASE ++#include "mali_kbase_regs_dump_debugfs.h" ++#endif /* !MALI_CUSTOMER_RELEASE */ ++#include "mali_kbase_regs_history_debugfs.h" ++#include ++#include ++#include ++#include ++#include "mali_kbase_ioctl.h" + ++#ifdef CONFIG_KDS ++#include ++#include ++#include ++#endif /* CONFIG_KDS */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include +#include -+#include ++#include ++#include ++#include ++#include ++#include ++#include /* is_compat_task */ ++#include ++#include ++#ifdef CONFIG_MALI_PLATFORM_DEVICETREE ++#include ++#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */ ++#include ++#include ++#ifdef CONFIG_MALI_PLATFORM_FAKE ++#include ++#endif /*CONFIG_MALI_PLATFORM_FAKE */ ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++#include ++#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ ++#include ++#include + -+#ifdef CONFIG_DEBUG_FS ++#include + -+struct debug_mem_mapping { -+ struct list_head node; + -+ struct kbase_mem_phy_alloc *alloc; -+ unsigned long flags; ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) ++#include ++#else ++#include ++#endif + -+ u64 start_pfn; -+ size_t nr_pages; -+}; ++#include + -+struct debug_mem_data { -+ struct list_head mapping_list; -+ struct kbase_context *kctx; -+}; ++#include + -+struct debug_mem_seq_off { -+ struct list_head *lh; -+ size_t offset; -+}; ++/* GPU IRQ Tags */ ++#define JOB_IRQ_TAG 0 ++#define MMU_IRQ_TAG 1 ++#define GPU_IRQ_TAG 2 + -+static void *debug_mem_start(struct seq_file *m, loff_t *_pos) -+{ -+ struct debug_mem_data *mem_data = m->private; -+ struct debug_mem_seq_off *data; -+ struct debug_mem_mapping *map; -+ loff_t pos = *_pos; ++#if MALI_UNIT_TEST ++static struct kbase_exported_test_data shared_kernel_test_data; ++EXPORT_SYMBOL(shared_kernel_test_data); ++#endif /* MALI_UNIT_TEST */ + -+ list_for_each_entry(map, &mem_data->mapping_list, node) { -+ if (pos >= map->nr_pages) { -+ pos -= map->nr_pages; -+ } else { -+ data = kmalloc(sizeof(*data), GFP_KERNEL); -+ if (!data) -+ return NULL; -+ data->lh = &map->node; -+ data->offset = pos; -+ return data; -+ } -+ } ++/** rk_ext : version of rk_ext on mali_ko, aka. rk_ko_ver. */ ++#define ROCKCHIP_VERSION (13) + -+ /* Beyond the end */ -+ return NULL; -+} ++static int kbase_dev_nr; + -+static void debug_mem_stop(struct seq_file *m, void *v) ++static DEFINE_MUTEX(kbase_dev_list_lock); ++static LIST_HEAD(kbase_dev_list); ++ ++#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)" ++static inline void __compile_time_asserts(void) +{ -+ kfree(v); ++ CSTD_COMPILE_TIME_ASSERT(sizeof(KERNEL_SIDE_DDK_VERSION_STRING) <= KBASE_GET_VERSION_BUFFER_SIZE); +} + -+static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos) ++static int kbase_api_handshake(struct kbase_context *kctx, ++ struct kbase_ioctl_version_check *version) +{ -+ struct debug_mem_data *mem_data = m->private; -+ struct debug_mem_seq_off *data = v; -+ struct debug_mem_mapping *map; -+ -+ map = list_entry(data->lh, struct debug_mem_mapping, node); -+ -+ if (data->offset < map->nr_pages - 1) { -+ data->offset++; -+ ++*pos; -+ return data; -+ } -+ -+ if (list_is_last(data->lh, &mem_data->mapping_list)) { -+ kfree(data); -+ return NULL; ++ switch (version->major) { ++#ifdef BASE_LEGACY_UK6_SUPPORT ++ case 6: ++ /* We are backwards compatible with version 6, ++ * so pretend to be the old version */ ++ version->major = 6; ++ version->minor = 1; ++ break; ++#endif /* BASE_LEGACY_UK6_SUPPORT */ ++#ifdef BASE_LEGACY_UK7_SUPPORT ++ case 7: ++ /* We are backwards compatible with version 7, ++ * so pretend to be the old version */ ++ version->major = 7; ++ version->minor = 1; ++ break; ++#endif /* BASE_LEGACY_UK7_SUPPORT */ ++#ifdef BASE_LEGACY_UK8_SUPPORT ++ case 8: ++ /* We are backwards compatible with version 8, ++ * so pretend to be the old version */ ++ version->major = 8; ++ version->minor = 4; ++ break; ++#endif /* BASE_LEGACY_UK8_SUPPORT */ ++#ifdef BASE_LEGACY_UK9_SUPPORT ++ case 9: ++ /* We are backwards compatible with version 9, ++ * so pretend to be the old version */ ++ version->major = 9; ++ version->minor = 0; ++ break; ++#endif /* BASE_LEGACY_UK8_SUPPORT */ ++ case BASE_UK_VERSION_MAJOR: ++ /* set minor to be the lowest common */ ++ version->minor = min_t(int, BASE_UK_VERSION_MINOR, ++ (int)version->minor); ++ break; ++ default: ++ /* We return our actual version regardless if it ++ * matches the version returned by userspace - ++ * userspace can bail if it can't handle this ++ * version */ ++ version->major = BASE_UK_VERSION_MAJOR; ++ version->minor = BASE_UK_VERSION_MINOR; ++ break; + } + -+ data->lh = data->lh->next; -+ data->offset = 0; -+ ++*pos; ++ /* save the proposed version number for later use */ ++ kctx->api_version = KBASE_API_VERSION(version->major, version->minor); + -+ return data; ++ return 0; +} + -+static int debug_mem_show(struct seq_file *m, void *v) -+{ -+ struct debug_mem_data *mem_data = m->private; -+ struct debug_mem_seq_off *data = v; -+ struct debug_mem_mapping *map; -+ int i, j; -+ struct page *page; -+ uint32_t *mapping; -+ pgprot_t prot = PAGE_KERNEL; -+ -+ map = list_entry(data->lh, struct debug_mem_mapping, node); -+ -+ kbase_gpu_vm_lock(mem_data->kctx); -+ -+ if (data->offset >= map->alloc->nents) { -+ seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn + -+ data->offset) << PAGE_SHIFT); -+ goto out; -+ } -+ -+ if (!(map->flags & KBASE_REG_CPU_CACHED)) -+ prot = pgprot_writecombine(prot); ++/** ++ * enum mali_error - Mali error codes shared with userspace ++ * ++ * This is subset of those common Mali errors that can be returned to userspace. ++ * Values of matching user and kernel space enumerators MUST be the same. ++ * MALI_ERROR_NONE is guaranteed to be 0. ++ * ++ * @MALI_ERROR_NONE: Success ++ * @MALI_ERROR_OUT_OF_GPU_MEMORY: Not used in the kernel driver ++ * @MALI_ERROR_OUT_OF_MEMORY: Memory allocation failure ++ * @MALI_ERROR_FUNCTION_FAILED: Generic error code ++ */ ++enum mali_error { ++ MALI_ERROR_NONE = 0, ++ MALI_ERROR_OUT_OF_GPU_MEMORY, ++ MALI_ERROR_OUT_OF_MEMORY, ++ MALI_ERROR_FUNCTION_FAILED, ++}; + -+ page = pfn_to_page(PFN_DOWN(map->alloc->pages[data->offset])); -+ mapping = vmap(&page, 1, VM_MAP, prot); -+ if (!mapping) -+ goto out; ++enum { ++ inited_mem = (1u << 0), ++ inited_js = (1u << 1), ++ inited_pm_runtime_init = (1u << 2), ++#ifdef CONFIG_MALI_DEVFREQ ++ inited_devfreq = (1u << 3), ++#endif /* CONFIG_MALI_DEVFREQ */ ++ inited_tlstream = (1u << 4), ++ inited_backend_early = (1u << 5), ++ inited_backend_late = (1u << 6), ++ inited_device = (1u << 7), ++ inited_vinstr = (1u << 8), + -+ for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) { -+ seq_printf(m, "%016llx:", i + ((map->start_pfn + -+ data->offset) << PAGE_SHIFT)); ++ inited_job_fault = (1u << 10), ++ inited_sysfs_group = (1u << 11), ++ inited_misc_register = (1u << 12), ++ inited_get_device = (1u << 13), ++ inited_dev_list = (1u << 14), ++ inited_debugfs = (1u << 15), ++ inited_gpu_device = (1u << 16), ++ inited_registers_map = (1u << 17), ++ inited_io_history = (1u << 18), ++ inited_power_control = (1u << 19), ++ inited_buslogger = (1u << 20), ++ inited_protected = (1u << 21), ++ inited_ctx_sched = (1u << 22) ++}; + -+ for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping)) -+ seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]); -+ seq_putc(m, '\n'); -+ } + -+ vunmap(mapping); ++#ifdef CONFIG_MALI_DEBUG ++#define INACTIVE_WAIT_MS (5000) + -+ seq_putc(m, '\n'); ++void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive) ++{ ++ kbdev->driver_inactive = inactive; ++ wake_up(&kbdev->driver_inactive_wait); + -+out: -+ kbase_gpu_vm_unlock(mem_data->kctx); -+ return 0; ++ /* Wait for any running IOCTLs to complete */ ++ if (inactive) ++ msleep(INACTIVE_WAIT_MS); +} ++KBASE_EXPORT_TEST_API(kbase_set_driver_inactive); ++#endif /* CONFIG_MALI_DEBUG */ + -+static const struct seq_operations ops = { -+ .start = debug_mem_start, -+ .next = debug_mem_next, -+ .stop = debug_mem_stop, -+ .show = debug_mem_show, -+}; -+ -+static int debug_mem_zone_open(struct rb_root *rbtree, -+ struct debug_mem_data *mem_data) ++/** ++ * kbase_legacy_dispatch - UKK dispatch function ++ * ++ * This is the dispatch function for the legacy UKK ioctl interface. No new ++ * ioctls should be added to this function, see kbase_ioctl instead. ++ * ++ * @kctx: The kernel context structure ++ * @args: Pointer to the data structure passed from/to user space ++ * @args_size: Size of the data structure ++ */ ++static int kbase_legacy_dispatch(struct kbase_context *kctx, ++ void * const args, u32 args_size) +{ ++ struct kbase_device *kbdev; ++ union uk_header *ukh = args; ++ u32 id; + int ret = 0; -+ struct rb_node *p; -+ struct kbase_va_region *reg; -+ struct debug_mem_mapping *mapping; + -+ for (p = rb_first(rbtree); p; p = rb_next(p)) { -+ reg = rb_entry(p, struct kbase_va_region, rblink); ++ KBASE_DEBUG_ASSERT(ukh != NULL); + -+ if (reg->gpu_alloc == NULL) -+ /* Empty region - ignore */ -+ continue; ++ kbdev = kctx->kbdev; ++ id = ukh->id; ++ ukh->ret = MALI_ERROR_NONE; /* Be optimistic */ + -+ mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); -+ if (!mapping) { -+ ret = -ENOMEM; -+ goto out; ++#ifdef CONFIG_MALI_DEBUG ++ wait_event(kbdev->driver_inactive_wait, ++ kbdev->driver_inactive == false); ++#endif /* CONFIG_MALI_DEBUG */ ++ ++ if (UKP_FUNC_ID_CHECK_VERSION == id) { ++ struct uku_version_check_args *version_check; ++ struct kbase_ioctl_version_check version; ++ ++ if (args_size != sizeof(struct uku_version_check_args)) { ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ return 0; + } ++ version_check = (struct uku_version_check_args *)args; ++ version.minor = version_check->minor; ++ version.major = version_check->major; + -+ mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); -+ mapping->start_pfn = reg->start_pfn; -+ mapping->nr_pages = reg->nr_pages; -+ mapping->flags = reg->flags; -+ list_add_tail(&mapping->node, &mem_data->mapping_list); -+ } ++ kbase_api_handshake(kctx, &version); + -+out: -+ return ret; -+} ++ version_check->minor = version.minor; ++ version_check->major = version.major; ++ ukh->ret = MALI_ERROR_NONE; ++ return 0; ++ } + -+static int debug_mem_open(struct inode *i, struct file *file) -+{ -+ struct file *kctx_file = i->i_private; -+ struct kbase_context *kctx = kctx_file->private_data; -+ struct debug_mem_data *mem_data; -+ int ret; ++ /* block calls until version handshake */ ++ if (kctx->api_version == 0) ++ return -EINVAL; + -+ ret = seq_open(file, &ops); -+ if (ret) -+ return ret; ++ if (!atomic_read(&kctx->setup_complete)) { ++ struct kbase_uk_set_flags *kbase_set_flags; + -+ mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL); -+ if (!mem_data) { -+ ret = -ENOMEM; -+ goto out; -+ } ++ /* setup pending, try to signal that we'll do the setup, ++ * if setup was already in progress, err this call ++ */ ++ if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1) != 0) ++ return -EINVAL; + -+ mem_data->kctx = kctx; ++ /* if unexpected call, will stay stuck in setup mode ++ * (is it the only call we accept?) ++ */ ++ if (id != KBASE_FUNC_SET_FLAGS) ++ return -EINVAL; + -+ INIT_LIST_HEAD(&mem_data->mapping_list); ++ kbase_set_flags = (struct kbase_uk_set_flags *)args; + -+ get_file(kctx_file); ++ /* if not matching the expected call, stay in setup mode */ ++ if (sizeof(*kbase_set_flags) != args_size) ++ goto bad_size; + -+ kbase_gpu_vm_lock(kctx); ++ /* if bad flags, will stay stuck in setup mode */ ++ if (kbase_context_set_create_flags(kctx, ++ kbase_set_flags->create_flags) != 0) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; + -+ ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data); -+ if (0 != ret) { -+ kbase_gpu_vm_unlock(kctx); -+ goto out; ++ atomic_set(&kctx->setup_complete, 1); ++ return 0; + } + -+ ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data); -+ if (0 != ret) { -+ kbase_gpu_vm_unlock(kctx); -+ goto out; -+ } ++ /* setup complete, perform normal operation */ ++ switch (id) { ++ case KBASE_FUNC_MEM_JIT_INIT: ++ { ++ struct kbase_uk_mem_jit_init *jit_init = args; + -+ ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data); -+ if (0 != ret) { -+ kbase_gpu_vm_unlock(kctx); -+ goto out; -+ } ++ if (sizeof(*jit_init) != args_size) ++ goto bad_size; + -+ kbase_gpu_vm_unlock(kctx); ++ if (kbase_region_tracker_init_jit(kctx, ++ jit_init->va_pages)) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ break; ++ } ++ case KBASE_FUNC_MEM_ALLOC: ++ { ++ struct kbase_uk_mem_alloc *mem = args; ++ struct kbase_va_region *reg; + -+ ((struct seq_file *)file->private_data)->private = mem_data; ++ if (sizeof(*mem) != args_size) ++ goto bad_size; + -+ return 0; ++#if defined(CONFIG_64BIT) ++ if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { ++ /* force SAME_VA if a 64-bit client */ ++ mem->flags |= BASE_MEM_SAME_VA; ++ } ++#endif + -+out: -+ if (mem_data) { -+ while (!list_empty(&mem_data->mapping_list)) { -+ struct debug_mem_mapping *mapping; ++ reg = kbase_mem_alloc(kctx, mem->va_pages, ++ mem->commit_pages, mem->extent, ++ &mem->flags, &mem->gpu_va); ++ mem->va_alignment = 0; + -+ mapping = list_first_entry(&mem_data->mapping_list, -+ struct debug_mem_mapping, node); -+ kbase_mem_phy_alloc_put(mapping->alloc); -+ list_del(&mapping->node); -+ kfree(mapping); ++ if (!reg) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ break; + } -+ fput(kctx_file); -+ kfree(mem_data); -+ } -+ seq_release(i, file); -+ return ret; -+} ++ case KBASE_FUNC_MEM_IMPORT: { ++ struct kbase_uk_mem_import *mem_import = args; ++ void __user *phandle; + -+static int debug_mem_release(struct inode *inode, struct file *file) -+{ -+ struct file *kctx_file = inode->i_private; -+ struct seq_file *sfile = file->private_data; -+ struct debug_mem_data *mem_data = sfile->private; -+ struct debug_mem_mapping *mapping; ++ if (sizeof(*mem_import) != args_size) ++ goto bad_size; ++#ifdef CONFIG_COMPAT ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) ++ phandle = compat_ptr(mem_import->phandle.compat_value); ++ else ++#endif ++ phandle = mem_import->phandle.value; + -+ seq_release(inode, file); ++ if (mem_import->type == BASE_MEM_IMPORT_TYPE_INVALID) { ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ break; ++ } + -+ while (!list_empty(&mem_data->mapping_list)) { -+ mapping = list_first_entry(&mem_data->mapping_list, -+ struct debug_mem_mapping, node); -+ kbase_mem_phy_alloc_put(mapping->alloc); -+ list_del(&mapping->node); -+ kfree(mapping); ++ if (kbase_mem_import(kctx, ++ (enum base_mem_import_type) ++ mem_import->type, ++ phandle, ++ 0, ++ &mem_import->gpu_va, ++ &mem_import->va_pages, ++ &mem_import->flags)) { ++ mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID; ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ } ++ break; + } ++ case KBASE_FUNC_MEM_ALIAS: { ++ struct kbase_uk_mem_alias *alias = args; ++ struct base_mem_aliasing_info __user *user_ai; ++ struct base_mem_aliasing_info *ai; + -+ kfree(mem_data); -+ -+ fput(kctx_file); ++ if (sizeof(*alias) != args_size) ++ goto bad_size; + -+ return 0; -+} ++ if (alias->nents > 2048) { ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ break; ++ } ++ if (!alias->nents) { ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ break; ++ } + -+static const struct file_operations kbase_debug_mem_view_fops = { -+ .open = debug_mem_open, -+ .release = debug_mem_release, -+ .read = seq_read, -+ .llseek = seq_lseek -+}; ++#ifdef CONFIG_COMPAT ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) ++ user_ai = compat_ptr(alias->ai.compat_value); ++ else ++#endif ++ user_ai = alias->ai.value; + -+/** -+ * kbase_debug_mem_view_init - Initialise the mem_view sysfs file -+ * @kctx_file: The /dev/mali0 file instance for the context -+ * -+ * This function creates a "mem_view" file which can be used to get a view of -+ * the context's memory as the GPU sees it (i.e. using the GPU's page tables). -+ * -+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the -+ * parent directory. -+ */ -+void kbase_debug_mem_view_init(struct file *kctx_file) -+{ -+ struct kbase_context *kctx = kctx_file->private_data; ++ ai = vmalloc(sizeof(*ai) * alias->nents); + -+ debugfs_create_file("mem_view", S_IRUGO, kctx->kctx_dentry, kctx_file, -+ &kbase_debug_mem_view_fops); -+} ++ if (!ai) { ++ ukh->ret = MALI_ERROR_OUT_OF_MEMORY; ++ break; ++ } + -+#endif -diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h -new file mode 100644 -index 000000000..20ab51a77 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h -@@ -0,0 +1,25 @@ -+/* -+ * -+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ if (copy_from_user(ai, user_ai, ++ sizeof(*ai) * alias->nents)) { ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ goto copy_failed; ++ } + ++ alias->gpu_va = kbase_mem_alias(kctx, &alias->flags, ++ alias->stride, ++ alias->nents, ai, ++ &alias->va_pages); ++ if (!alias->gpu_va) { ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ goto no_alias; ++ } ++no_alias: ++copy_failed: ++ vfree(ai); ++ break; ++ } ++ case KBASE_FUNC_MEM_COMMIT: ++ { ++ struct kbase_uk_mem_commit *commit = args; ++ int ret; + ++ if (sizeof(*commit) != args_size) ++ goto bad_size; + -+#ifndef _KBASE_DEBUG_MEM_VIEW_H -+#define _KBASE_DEBUG_MEM_VIEW_H ++ ret = kbase_mem_commit(kctx, commit->gpu_addr, ++ commit->pages); + -+#include ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ commit->result_subcode = ++ BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS; + -+void kbase_debug_mem_view_init(struct file *kctx_file); ++ if (ret == 0) { ++ ukh->ret = MALI_ERROR_NONE; ++ commit->result_subcode = ++ BASE_BACKING_THRESHOLD_OK; ++ } else if (ret == -ENOMEM) { ++ commit->result_subcode = ++ BASE_BACKING_THRESHOLD_ERROR_OOM; ++ } + -+#endif -diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h -new file mode 100644 -index 000000000..f8a6f33df ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h -@@ -0,0 +1,1602 @@ -+/* -+ * -+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ break; ++ } + ++ case KBASE_FUNC_MEM_QUERY: ++ { ++ struct kbase_uk_mem_query *query = args; + ++ if (sizeof(*query) != args_size) ++ goto bad_size; + ++ if (kbase_mem_query(kctx, query->gpu_addr, ++ query->query, &query->value) != 0) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ else ++ ukh->ret = MALI_ERROR_NONE; ++ break; ++ } ++ break; + ++ case KBASE_FUNC_MEM_FLAGS_CHANGE: ++ { ++ struct kbase_uk_mem_flags_change *fc = args; + -+/** -+ * @file mali_kbase_defs.h -+ * -+ * Defintions (types, defines, etcs) common to Kbase. They are placed here to -+ * allow the hierarchy of header files to work. -+ */ ++ if (sizeof(*fc) != args_size) ++ goto bad_size; + -+#ifndef _KBASE_DEFS_H_ -+#define _KBASE_DEFS_H_ ++ if (kbase_mem_flags_change(kctx, fc->gpu_va, ++ fc->flags, fc->mask) != 0) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ break; ++ } ++ case KBASE_FUNC_MEM_FREE: ++ { ++ struct kbase_uk_mem_free *mem = args; + -+#include -+#include -+#include -+#include ++ if (sizeof(*mem) != args_size) ++ goto bad_size; + -+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER -+#include -+#endif ++ if (kbase_mem_free(kctx, mem->gpu_addr) != 0) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ break; ++ } + ++ case KBASE_FUNC_JOB_SUBMIT: ++ { ++ struct kbase_uk_job_submit *job = args; ++ void __user *user_addr = NULL; + -+#ifdef CONFIG_KDS -+#include -+#endif /* CONFIG_KDS */ ++ if (sizeof(*job) != args_size) ++ goto bad_size; + -+#if defined(CONFIG_SYNC) -+#include -+#else -+#include "mali_kbase_fence_defs.h" ++#ifdef CONFIG_COMPAT ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) ++ user_addr = compat_ptr(job->addr.compat_value); ++ else +#endif ++ user_addr = job->addr.value; + -+#ifdef CONFIG_DEBUG_FS -+#include -+#endif /* CONFIG_DEBUG_FS */ ++ if (kbase_jd_submit(kctx, user_addr, job->nr_atoms, ++ job->stride, false) != 0) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ break; ++ } + -+#ifdef CONFIG_MALI_DEVFREQ -+#include -+#endif /* CONFIG_MALI_DEVFREQ */ ++#ifdef BASE_LEGACY_UK6_SUPPORT ++ case KBASE_FUNC_JOB_SUBMIT_UK6: ++ { ++ struct kbase_uk_job_submit *job = args; ++ void __user *user_addr = NULL; + -+#include -+#include -+#include ++ if (sizeof(*job) != args_size) ++ goto bad_size; + -+#if defined(CONFIG_PM) -+#define KBASE_PM_RUNTIME 1 ++#ifdef CONFIG_COMPAT ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) ++ user_addr = compat_ptr(job->addr.compat_value); ++ else +#endif ++ user_addr = job->addr.value; + -+/** Enable SW tracing when set */ -+#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE -+#define KBASE_TRACE_ENABLE 1 ++ if (kbase_jd_submit(kctx, user_addr, job->nr_atoms, ++ job->stride, true) != 0) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ break; ++ } +#endif + -+#ifndef KBASE_TRACE_ENABLE -+#ifdef CONFIG_MALI_DEBUG -+#define KBASE_TRACE_ENABLE 1 -+#else -+#define KBASE_TRACE_ENABLE 0 -+#endif /* CONFIG_MALI_DEBUG */ -+#endif /* KBASE_TRACE_ENABLE */ ++ case KBASE_FUNC_SYNC: ++ { ++ struct kbase_uk_sync_now *sn = args; + -+/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */ -+#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1 ++ if (sizeof(*sn) != args_size) ++ goto bad_size; + -+/** -+ * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware. -+ * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU -+ * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware -+ * before resetting. -+ */ -+#define ZAP_TIMEOUT 1000 ++#ifndef CONFIG_MALI_COH_USER ++ if (kbase_sync_now(kctx, &sn->sset.basep_sset) != 0) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++#endif ++ break; ++ } + -+/** Number of milliseconds before we time out on a GPU soft/hard reset */ -+#define RESET_TIMEOUT 500 ++ case KBASE_FUNC_DISJOINT_QUERY: ++ { ++ struct kbase_uk_disjoint_query *dquery = args; + -+/** -+ * Prevent soft-stops from occuring in scheduling situations -+ * -+ * This is not due to HW issues, but when scheduling is desired to be more predictable. -+ * -+ * Therefore, soft stop may still be disabled due to HW issues. -+ * -+ * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context. -+ * -+ * @note if not in use, define this value to 0 instead of \#undef'ing it -+ */ -+#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0 ++ if (sizeof(*dquery) != args_size) ++ goto bad_size; + -+/** -+ * Prevent hard-stops from occuring in scheduling situations -+ * -+ * This is not due to HW issues, but when scheduling is desired to be more predictable. -+ * -+ * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context. -+ * -+ * @note if not in use, define this value to 0 instead of \#undef'ing it -+ */ -+#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0 ++ /* Get the disjointness counter value. */ ++ dquery->counter = kbase_disjoint_event_get(kctx->kbdev); ++ break; ++ } + -+/** -+ * The maximum number of Job Slots to support in the Hardware. -+ * -+ * You can optimize this down if your target devices will only ever support a -+ * small number of job slots. -+ */ -+#define BASE_JM_MAX_NR_SLOTS 3 ++ case KBASE_FUNC_POST_TERM: ++ { ++ kbase_event_close(kctx); ++ break; ++ } + -+/** -+ * The maximum number of Address Spaces to support in the Hardware. -+ * -+ * You can optimize this down if your target devices will only ever support a -+ * small number of Address Spaces -+ */ -+#define BASE_MAX_NR_AS 16 ++ case KBASE_FUNC_HWCNT_SETUP: ++ { ++ struct kbase_uk_hwcnt_setup *setup = args; + -+/* mmu */ -+#define MIDGARD_MMU_VA_BITS 48 ++ if (sizeof(*setup) != args_size) ++ goto bad_size; + -+#if MIDGARD_MMU_VA_BITS > 39 -+#define MIDGARD_MMU_TOPLEVEL 0 -+#else -+#define MIDGARD_MMU_TOPLEVEL 1 -+#endif ++ mutex_lock(&kctx->vinstr_cli_lock); ++ if (kbase_vinstr_legacy_hwc_setup(kbdev->vinstr_ctx, ++ &kctx->vinstr_cli, setup) != 0) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ mutex_unlock(&kctx->vinstr_cli_lock); ++ break; ++ } + -+#define MIDGARD_MMU_BOTTOMLEVEL 3 ++ case KBASE_FUNC_HWCNT_DUMP: ++ { ++ /* args ignored */ ++ mutex_lock(&kctx->vinstr_cli_lock); ++ if (kbase_vinstr_hwc_dump(kctx->vinstr_cli, ++ BASE_HWCNT_READER_EVENT_MANUAL) != 0) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ mutex_unlock(&kctx->vinstr_cli_lock); ++ break; ++ } + -+#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR) ++ case KBASE_FUNC_HWCNT_CLEAR: ++ { ++ /* args ignored */ ++ mutex_lock(&kctx->vinstr_cli_lock); ++ if (kbase_vinstr_hwc_clear(kctx->vinstr_cli) != 0) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ mutex_unlock(&kctx->vinstr_cli_lock); ++ break; ++ } + -+/** setting in kbase_context::as_nr that indicates it's invalid */ -+#define KBASEP_AS_NR_INVALID (-1) ++ case KBASE_FUNC_HWCNT_READER_SETUP: ++ { ++ struct kbase_uk_hwcnt_reader_setup *setup = args; + -+#define KBASE_LOCK_REGION_MAX_SIZE (63) -+#define KBASE_LOCK_REGION_MIN_SIZE (11) ++ if (sizeof(*setup) != args_size) ++ goto bad_size; + -+#define KBASE_TRACE_SIZE_LOG2 8 /* 256 entries */ -+#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2) -+#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1) ++ mutex_lock(&kctx->vinstr_cli_lock); ++ if (kbase_vinstr_hwcnt_reader_setup(kbdev->vinstr_ctx, ++ setup) != 0) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ mutex_unlock(&kctx->vinstr_cli_lock); ++ break; ++ } + -+#include "mali_kbase_js_defs.h" -+#include "mali_kbase_hwaccess_defs.h" ++ case KBASE_FUNC_GPU_PROPS_REG_DUMP: ++ { ++ struct kbase_uk_gpuprops *setup = args; + -+#define KBASEP_FORCE_REPLAY_DISABLED 0 ++ if (sizeof(*setup) != args_size) ++ goto bad_size; + -+/* Maximum force replay limit when randomization is enabled */ -+#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16 ++ if (kbase_gpuprops_uk_get_props(kctx, setup) != 0) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ break; ++ } ++ case KBASE_FUNC_FIND_CPU_OFFSET: ++ { ++ struct kbase_uk_find_cpu_offset *find = args; + -+/** Atom has been previously soft-stoppped */ -+#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1) -+/** Atom has been previously retried to execute */ -+#define KBASE_KATOM_FLAGS_RERUN (1<<2) -+#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3) -+/** Atom has been previously hard-stopped. */ -+#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4) -+/** Atom has caused us to enter disjoint state */ -+#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5) -+/* Atom blocked on cross-slot dependency */ -+#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7) -+/* Atom has fail dependency on cross-slot dependency */ -+#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8) -+/* Atom is currently in the list of atoms blocked on cross-slot dependencies */ -+#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9) -+/* Atom is currently holding a context reference */ -+#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10) -+/* Atom requires GPU to be in protected mode */ -+#define KBASE_KATOM_FLAG_PROTECTED (1<<11) -+/* Atom has been stored in runnable_tree */ -+#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12) ++ if (sizeof(*find) != args_size) ++ goto bad_size; + -+/* SW related flags about types of JS_COMMAND action -+ * NOTE: These must be masked off by JS_COMMAND_MASK */ ++ if (find->gpu_addr & ~PAGE_MASK) { ++ dev_warn(kbdev->dev, ++ "kbase_legacy_dispatch case KBASE_FUNC_FIND_CPU_OFFSET: find->gpu_addr: passed parameter is invalid"); ++ goto out_bad; ++ } + -+/** This command causes a disjoint event */ -+#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100 ++ if (find->size > SIZE_MAX || find->cpu_addr > ULONG_MAX) { ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ } else { ++ int err; + -+/** Bitmask of all SW related flags */ -+#define JS_COMMAND_SW_BITS (JS_COMMAND_SW_CAUSES_DISJOINT) ++ err = kbasep_find_enclosing_cpu_mapping_offset( ++ kctx, ++ find->cpu_addr, ++ find->size, ++ &find->offset); + -+#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK) -+#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks -+#endif ++ if (err) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ } ++ break; ++ } ++ case KBASE_FUNC_GET_VERSION: ++ { ++ struct kbase_uk_get_ddk_version *get_version = (struct kbase_uk_get_ddk_version *)args; + -+/** Soft-stop command that causes a Disjoint event. This of course isn't -+ * entirely masked off by JS_COMMAND_MASK */ -+#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \ -+ (JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP) ++ if (sizeof(*get_version) != args_size) ++ goto bad_size; + -+#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT ++ /* version buffer size check is made in compile time assert */ ++ memcpy(get_version->version_buffer, ++ KERNEL_SIDE_DDK_VERSION_STRING, ++ sizeof(KERNEL_SIDE_DDK_VERSION_STRING)); ++ get_version->version_string_size = ++ sizeof(KERNEL_SIDE_DDK_VERSION_STRING); ++ get_version->rk_version = ROCKCHIP_VERSION; ++ break; ++ } + -+/* Serialize atoms within a slot (ie only one atom per job slot) */ -+#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0) -+/* Serialize atoms between slots (ie only one job slot running at any time) */ -+#define KBASE_SERIALIZE_INTER_SLOT (1 << 1) -+/* Reset the GPU after each atom completion */ -+#define KBASE_SERIALIZE_RESET (1 << 2) ++ case KBASE_FUNC_STREAM_CREATE: ++ { ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ struct kbase_uk_stream_create *screate = (struct kbase_uk_stream_create *)args; + -+#ifdef CONFIG_DEBUG_FS -+struct base_job_fault_event { ++ if (sizeof(*screate) != args_size) ++ goto bad_size; + -+ u32 event_code; -+ struct kbase_jd_atom *katom; -+ struct work_struct job_fault_work; -+ struct list_head head; -+ int reg_offset; -+}; ++ if (strnlen(screate->name, sizeof(screate->name)) >= sizeof(screate->name)) { ++ /* not NULL terminated */ ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ break; ++ } + -+#endif ++ if (kbase_sync_fence_stream_create(screate->name, ++ &screate->fd) != 0) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ else ++ ukh->ret = MALI_ERROR_NONE; ++#else /* CONFIG_SYNC || CONFIG_SYNC_FILE */ ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ ++ break; ++ } ++ case KBASE_FUNC_FENCE_VALIDATE: ++ { ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ struct kbase_uk_fence_validate *fence_validate = (struct kbase_uk_fence_validate *)args; + -+struct kbase_jd_atom_dependency { -+ struct kbase_jd_atom *atom; -+ u8 dep_type; -+}; ++ if (sizeof(*fence_validate) != args_size) ++ goto bad_size; + -+/** -+ * struct kbase_io_access - holds information about 1 register access -+ * -+ * @addr: first bit indicates r/w (r=0, w=1) -+ * @value: value written or read -+ */ -+struct kbase_io_access { -+ uintptr_t addr; -+ u32 value; -+}; ++ if (kbase_sync_fence_validate(fence_validate->fd) != 0) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ else ++ ukh->ret = MALI_ERROR_NONE; ++#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ ++ break; ++ } + -+/** -+ * struct kbase_io_history - keeps track of all recent register accesses -+ * -+ * @enabled: true if register accesses are recorded, false otherwise -+ * @lock: spinlock protecting kbase_io_access array -+ * @count: number of registers read/written -+ * @size: number of elements in kbase_io_access array -+ * @buf: array of kbase_io_access -+ */ -+struct kbase_io_history { -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) -+ bool enabled; -+#else -+ u32 enabled; -+#endif ++ case KBASE_FUNC_SET_TEST_DATA: ++ { ++#if MALI_UNIT_TEST ++ struct kbase_uk_set_test_data *set_data = args; + -+ spinlock_t lock; -+ size_t count; -+ u16 size; -+ struct kbase_io_access *buf; -+}; ++ shared_kernel_test_data = set_data->test_data; ++ shared_kernel_test_data.kctx.value = (void __user *)kctx; ++ shared_kernel_test_data.mm.value = (void __user *)current->mm; ++ ukh->ret = MALI_ERROR_NONE; ++#endif /* MALI_UNIT_TEST */ ++ break; ++ } + -+/** -+ * @brief The function retrieves a read-only reference to the atom field from -+ * the kbase_jd_atom_dependency structure -+ * -+ * @param[in] dep kbase jd atom dependency. -+ * -+ * @return readonly reference to dependent ATOM. -+ */ -+static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) -+{ -+ LOCAL_ASSERT(dep != NULL); ++ case KBASE_FUNC_INJECT_ERROR: ++ { ++#ifdef CONFIG_MALI_ERROR_INJECT ++ unsigned long flags; ++ struct kbase_error_params params = ((struct kbase_uk_error_params *)args)->params; + -+ return (const struct kbase_jd_atom *)(dep->atom); -+} ++ /*mutex lock */ ++ spin_lock_irqsave(&kbdev->reg_op_lock, flags); ++ if (job_atom_inject_error(¶ms) != 0) ++ ukh->ret = MALI_ERROR_OUT_OF_MEMORY; ++ else ++ ukh->ret = MALI_ERROR_NONE; ++ spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); ++ /*mutex unlock */ ++#endif /* CONFIG_MALI_ERROR_INJECT */ ++ break; ++ } + -+/** -+ * @brief The function retrieves a read-only reference to the dependency type field from -+ * the kbase_jd_atom_dependency structure -+ * -+ * @param[in] dep kbase jd atom dependency. -+ * -+ * @return A dependency type value. -+ */ -+static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep) -+{ -+ LOCAL_ASSERT(dep != NULL); ++ case KBASE_FUNC_MODEL_CONTROL: ++ { ++#ifdef CONFIG_MALI_NO_MALI ++ unsigned long flags; ++ struct kbase_model_control_params params = ++ ((struct kbase_uk_model_control_params *)args)->params; + -+ return dep->dep_type; -+} ++ /*mutex lock */ ++ spin_lock_irqsave(&kbdev->reg_op_lock, flags); ++ if (gpu_model_control(kbdev->model, ¶ms) != 0) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ else ++ ukh->ret = MALI_ERROR_NONE; ++ spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); ++ /*mutex unlock */ ++#endif /* CONFIG_MALI_NO_MALI */ ++ break; ++ } + -+/** -+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom -+ * -+ * @param[in] dep The kbase jd atom dependency. -+ * @param[in] a The ATOM to be set as a dependency. -+ * @param type The ATOM dependency type to be set. -+ * -+ */ -+static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep, -+ struct kbase_jd_atom *a, u8 type) -+{ -+ struct kbase_jd_atom_dependency *dep; ++#ifdef BASE_LEGACY_UK8_SUPPORT ++ case KBASE_FUNC_KEEP_GPU_POWERED: ++ { ++ dev_warn(kbdev->dev, "kbase_legacy_dispatch case KBASE_FUNC_KEEP_GPU_POWERED: function is deprecated and disabled\n"); ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ break; ++ } ++#endif /* BASE_LEGACY_UK8_SUPPORT */ + -+ LOCAL_ASSERT(const_dep != NULL); ++ case KBASE_FUNC_GET_PROFILING_CONTROLS: ++ { ++ struct kbase_uk_profiling_controls *controls = ++ (struct kbase_uk_profiling_controls *)args; ++ u32 i; + -+ dep = (struct kbase_jd_atom_dependency *)const_dep; ++ if (sizeof(*controls) != args_size) ++ goto bad_size; + -+ dep->atom = a; -+ dep->dep_type = type; -+} ++ for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++) ++ controls->profiling_controls[i] = ++ kbdev->kbase_profiling_controls[i]; + -+/** -+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom -+ * -+ * @param[in] dep The kbase jd atom dependency to be cleared. -+ * -+ */ -+static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep) -+{ -+ struct kbase_jd_atom_dependency *dep; ++ break; ++ } + -+ LOCAL_ASSERT(const_dep != NULL); ++ /* used only for testing purposes; these controls are to be set by gator through gator API */ ++ case KBASE_FUNC_SET_PROFILING_CONTROLS: ++ { ++ struct kbase_uk_profiling_controls *controls = ++ (struct kbase_uk_profiling_controls *)args; ++ u32 i; + -+ dep = (struct kbase_jd_atom_dependency *)const_dep; ++ if (sizeof(*controls) != args_size) ++ goto bad_size; + -+ dep->atom = NULL; -+ dep->dep_type = BASE_JD_DEP_TYPE_INVALID; -+} ++ for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++) ++ _mali_profiling_control(i, controls->profiling_controls[i]); + -+enum kbase_atom_gpu_rb_state { -+ /* Atom is not currently present in slot ringbuffer */ -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB, -+ /* Atom is in slot ringbuffer but is blocked on a previous atom */ -+ KBASE_ATOM_GPU_RB_WAITING_BLOCKED, -+ /* Atom is in slot ringbuffer but is waiting for a previous protected -+ * mode transition to complete */ -+ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV, -+ /* Atom is in slot ringbuffer but is waiting for proected mode -+ * transition */ -+ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION, -+ /* Atom is in slot ringbuffer but is waiting for cores to become -+ * available */ -+ KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE, -+ /* Atom is in slot ringbuffer but is blocked on affinity */ -+ KBASE_ATOM_GPU_RB_WAITING_AFFINITY, -+ /* Atom is in slot ringbuffer and ready to run */ -+ KBASE_ATOM_GPU_RB_READY, -+ /* Atom is in slot ringbuffer and has been submitted to the GPU */ -+ KBASE_ATOM_GPU_RB_SUBMITTED, -+ /* Atom must be returned to JS as soon as it reaches the head of the -+ * ringbuffer due to a previous failure */ -+ KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1 -+}; ++ break; ++ } + -+enum kbase_atom_enter_protected_state { -+ /* -+ * Starting state: -+ * Check if a transition into protected mode is required. -+ * -+ * NOTE: The integer value of this must -+ * match KBASE_ATOM_EXIT_PROTECTED_CHECK. -+ */ -+ KBASE_ATOM_ENTER_PROTECTED_CHECK = 0, -+ /* Wait for vinstr to suspend. */ -+ KBASE_ATOM_ENTER_PROTECTED_VINSTR, -+ /* Wait for the L2 to become idle in preparation for -+ * the coherency change. */ -+ KBASE_ATOM_ENTER_PROTECTED_IDLE_L2, -+ /* End state; -+ * Prepare coherency change. */ -+ KBASE_ATOM_ENTER_PROTECTED_FINISHED, -+}; ++ case KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD: ++ { ++ struct kbase_uk_debugfs_mem_profile_add *add_data = ++ (struct kbase_uk_debugfs_mem_profile_add *)args; ++ char *buf; ++ char __user *user_buf; + -+enum kbase_atom_exit_protected_state { -+ /* -+ * Starting state: -+ * Check if a transition out of protected mode is required. -+ * -+ * NOTE: The integer value of this must -+ * match KBASE_ATOM_ENTER_PROTECTED_CHECK. -+ */ -+ KBASE_ATOM_EXIT_PROTECTED_CHECK = 0, -+ /* Wait for the L2 to become idle in preparation -+ * for the reset. */ -+ KBASE_ATOM_EXIT_PROTECTED_IDLE_L2, -+ /* Issue the protected reset. */ -+ KBASE_ATOM_EXIT_PROTECTED_RESET, -+ /* End state; -+ * Wait for the reset to complete. */ -+ KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, -+}; ++ if (sizeof(*add_data) != args_size) ++ goto bad_size; + -+struct kbase_ext_res { -+ u64 gpu_address; -+ struct kbase_mem_phy_alloc *alloc; -+}; ++ if (add_data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) { ++ dev_err(kbdev->dev, "buffer too big\n"); ++ goto out_bad; ++ } + -+struct kbase_jd_atom { -+ struct work_struct work; -+ ktime_t start_timestamp; ++#ifdef CONFIG_COMPAT ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) ++ user_buf = ++ compat_ptr(add_data->buf.compat_value); ++ else ++#endif ++ user_buf = add_data->buf.value; + -+ struct base_jd_udata udata; -+ struct kbase_context *kctx; ++ buf = kmalloc(add_data->len, GFP_KERNEL); ++ if (ZERO_OR_NULL_PTR(buf)) ++ goto out_bad; + -+ struct list_head dep_head[2]; -+ struct list_head dep_item[2]; -+ const struct kbase_jd_atom_dependency dep[2]; -+ /* List head used during job dispatch job_done processing - as -+ * dependencies may not be entirely resolved at this point, we need to -+ * use a separate list head. */ -+ struct list_head jd_item; -+ /* true if atom's jd_item is currently on a list. Prevents atom being -+ * processed twice. */ -+ bool in_jd_list; ++ if (0 != copy_from_user(buf, user_buf, add_data->len)) { ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ kfree(buf); ++ goto out_bad; ++ } + -+ u16 nr_extres; -+ struct kbase_ext_res *extres; ++ if (kbasep_mem_profile_debugfs_insert(kctx, buf, ++ add_data->len)) { ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ goto out_bad; ++ } + -+ u32 device_nr; -+ u64 affinity; -+ u64 jc; -+ enum kbase_atom_coreref_state coreref_state; -+#ifdef CONFIG_KDS -+ struct list_head node; -+ struct kds_resource_set *kds_rset; -+ bool kds_dep_satisfied; -+#endif /* CONFIG_KDS */ -+#if defined(CONFIG_SYNC) -+ /* Stores either an input or output fence, depending on soft-job type */ -+ struct sync_fence *fence; -+ struct sync_fence_waiter sync_waiter; -+#endif /* CONFIG_SYNC */ -+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) -+ struct { -+ /* Use the functions/API defined in mali_kbase_fence.h to -+ * when working with this sub struct */ -+#if defined(CONFIG_SYNC_FILE) -+ /* Input fence */ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+ struct fence *fence_in; -+#else -+ struct dma_fence *fence_in; -+#endif -+#endif -+ /* This points to the dma-buf output fence for this atom. If -+ * this is NULL then there is no fence for this atom and the -+ * following fields related to dma_fence may have invalid data. -+ * -+ * The context and seqno fields contain the details for this -+ * fence. -+ * -+ * This fence is signaled when the katom is completed, -+ * regardless of the event_code of the katom (signal also on -+ * failure). -+ */ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+ struct fence *fence; -+#else -+ struct dma_fence *fence; -+#endif -+ /* The dma-buf fence context number for this atom. A unique -+ * context number is allocated to each katom in the context on -+ * context creation. -+ */ -+ unsigned int context; -+ /* The dma-buf fence sequence number for this atom. This is -+ * increased every time this katom uses dma-buf fence. -+ */ -+ atomic_t seqno; -+ /* This contains a list of all callbacks set up to wait on -+ * other fences. This atom must be held back from JS until all -+ * these callbacks have been called and dep_count have reached -+ * 0. The initial value of dep_count must be equal to the -+ * number of callbacks on this list. -+ * -+ * This list is protected by jctx.lock. Callbacks are added to -+ * this list when the atom is built and the wait are set up. -+ * All the callbacks then stay on the list until all callbacks -+ * have been called and the atom is queued, or cancelled, and -+ * then all callbacks are taken off the list and freed. -+ */ -+ struct list_head callbacks; -+ /* Atomic counter of number of outstandind dma-buf fence -+ * dependencies for this atom. When dep_count reaches 0 the -+ * atom may be queued. -+ * -+ * The special value "-1" may only be set after the count -+ * reaches 0, while holding jctx.lock. This indicates that the -+ * atom has been handled, either queued in JS or cancelled. -+ * -+ * If anyone but the dma-fence worker sets this to -1 they must -+ * ensure that any potentially queued worker must have -+ * completed before allowing the atom to be marked as unused. -+ * This can be done by flushing the fence work queue: -+ * kctx->dma_fence.wq. -+ */ -+ atomic_t dep_count; -+ } dma_fence; -+#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE*/ ++ break; ++ } + -+ /* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */ -+ enum base_jd_event_code event_code; -+ base_jd_core_req core_req; /**< core requirements */ -+ /** Job Slot to retry submitting to if submission from IRQ handler failed -+ * -+ * NOTE: see if this can be unified into the another member e.g. the event */ -+ int retry_submit_on_slot; ++#ifdef CONFIG_MALI_NO_MALI ++ case KBASE_FUNC_SET_PRFCNT_VALUES: ++ { + -+ u32 ticks; -+ /* JS atom priority with respect to other atoms on its kctx. */ -+ int sched_priority; ++ struct kbase_uk_prfcnt_values *params = ++ ((struct kbase_uk_prfcnt_values *)args); ++ gpu_model_set_dummy_prfcnt_sample(params->data, ++ params->size); + -+ int poking; /* BASE_HW_ISSUE_8316 */ ++ break; ++ } ++#endif /* CONFIG_MALI_NO_MALI */ ++#ifdef BASE_LEGACY_UK10_4_SUPPORT ++ case KBASE_FUNC_TLSTREAM_ACQUIRE_V10_4: ++ { ++ struct kbase_uk_tlstream_acquire_v10_4 *tlstream_acquire ++ = args; ++ int ret; + -+ wait_queue_head_t completed; -+ enum kbase_jd_atom_state status; -+#ifdef CONFIG_GPU_TRACEPOINTS -+ int work_id; -+#endif -+ /* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */ -+ int slot_nr; ++ if (sizeof(*tlstream_acquire) != args_size) ++ goto bad_size; + -+ u32 atom_flags; ++ ret = kbase_tlstream_acquire( ++ kctx, 0); ++ if (ret < 0) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ else ++ tlstream_acquire->fd = ret; ++ break; ++ } ++#endif /* BASE_LEGACY_UK10_4_SUPPORT */ ++ case KBASE_FUNC_TLSTREAM_ACQUIRE: ++ { ++ struct kbase_uk_tlstream_acquire *tlstream_acquire = ++ args; ++ int ret; + -+ /* Number of times this atom has been retried. Used by replay soft job. -+ */ -+ int retry_count; ++ if (sizeof(*tlstream_acquire) != args_size) ++ goto bad_size; + -+ enum kbase_atom_gpu_rb_state gpu_rb_state; ++ if (tlstream_acquire->flags & ~BASE_TLSTREAM_FLAGS_MASK) ++ goto out_bad; + -+ u64 need_cache_flush_cores_retained; ++ ret = kbase_tlstream_acquire( ++ kctx, tlstream_acquire->flags); ++ if (ret < 0) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; ++ else ++ tlstream_acquire->fd = ret; ++ break; ++ } ++ case KBASE_FUNC_TLSTREAM_FLUSH: ++ { ++ struct kbase_uk_tlstream_flush *tlstream_flush = ++ args; + -+ atomic_t blocked; ++ if (sizeof(*tlstream_flush) != args_size) ++ goto bad_size; + -+ /* Pointer to atom that this atom has same-slot dependency on */ -+ struct kbase_jd_atom *pre_dep; -+ /* Pointer to atom that has same-slot dependency on this atom */ -+ struct kbase_jd_atom *post_dep; ++ kbase_tlstream_flush_streams(); ++ break; ++ } ++#if MALI_UNIT_TEST ++ case KBASE_FUNC_TLSTREAM_TEST: ++ { ++ struct kbase_uk_tlstream_test *tlstream_test = args; + -+ /* Pointer to atom that this atom has cross-slot dependency on */ -+ struct kbase_jd_atom *x_pre_dep; -+ /* Pointer to atom that has cross-slot dependency on this atom */ -+ struct kbase_jd_atom *x_post_dep; ++ if (sizeof(*tlstream_test) != args_size) ++ goto bad_size; + -+ /* The GPU's flush count recorded at the time of submission, used for -+ * the cache flush optimisation */ -+ u32 flush_id; ++ kbase_tlstream_test( ++ tlstream_test->tpw_count, ++ tlstream_test->msg_delay, ++ tlstream_test->msg_count, ++ tlstream_test->aux_msg); ++ break; ++ } ++ case KBASE_FUNC_TLSTREAM_STATS: ++ { ++ struct kbase_uk_tlstream_stats *tlstream_stats = args; + -+ struct kbase_jd_atom_backend backend; -+#ifdef CONFIG_DEBUG_FS -+ struct base_job_fault_event fault_event; -+#endif ++ if (sizeof(*tlstream_stats) != args_size) ++ goto bad_size; + -+ /* List head used for three different purposes: -+ * 1. Overflow list for JS ring buffers. If an atom is ready to run, -+ * but there is no room in the JS ring buffer, then the atom is put -+ * on the ring buffer's overflow list using this list node. -+ * 2. List of waiting soft jobs. -+ */ -+ struct list_head queue; ++ kbase_tlstream_stats( ++ &tlstream_stats->bytes_collected, ++ &tlstream_stats->bytes_generated); ++ break; ++ } ++#endif /* MALI_UNIT_TEST */ + -+ /* Used to keep track of all JIT free/alloc jobs in submission order -+ */ -+ struct list_head jit_node; -+ bool jit_blocked; ++ case KBASE_FUNC_GET_CONTEXT_ID: ++ { ++ struct kbase_uk_context_id *info = args; + -+ /* If non-zero, this indicates that the atom will fail with the set -+ * event_code when the atom is processed. */ -+ enum base_jd_event_code will_fail_event_code; ++ info->id = kctx->id; ++ break; ++ } + -+ /* Atoms will only ever be transitioning into, or out of -+ * protected mode so we do not need two separate fields. -+ */ -+ union { -+ enum kbase_atom_enter_protected_state enter; -+ enum kbase_atom_exit_protected_state exit; -+ } protected_state; ++ case KBASE_FUNC_SOFT_EVENT_UPDATE: ++ { ++ struct kbase_uk_soft_event_update *update = args; + -+ struct rb_node runnable_tree_node; ++ if (sizeof(*update) != args_size) ++ goto bad_size; + -+ /* 'Age' of atom relative to other atoms in the context. */ -+ u32 age; -+}; ++ if (((update->new_status != BASE_JD_SOFT_EVENT_SET) && ++ (update->new_status != BASE_JD_SOFT_EVENT_RESET)) || ++ (update->flags != 0)) ++ goto out_bad; + -+static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom) -+{ -+ return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED); -+} ++ if (kbase_soft_event_update(kctx, update->evt, ++ update->new_status)) ++ ukh->ret = MALI_ERROR_FUNCTION_FAILED; + -+/* -+ * Theory of operations: -+ * -+ * Atom objects are statically allocated within the context structure. -+ * -+ * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set. -+ */ ++ break; ++ } + -+#define KBASE_JD_DEP_QUEUE_SIZE 256 ++ default: ++ dev_err(kbdev->dev, "unknown ioctl %u\n", id); ++ goto out_bad; ++ } + -+struct kbase_jd_context { -+ struct mutex lock; -+ struct kbasep_js_kctx_info sched_info; -+ struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT]; ++ return ret; + -+ /** Tracks all job-dispatch jobs. This includes those not tracked by -+ * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ -+ u32 job_nr; ++bad_size: ++ dev_err(kbdev->dev, "Wrong syscall size (%d) for %08x\n", args_size, id); ++out_bad: ++ return -EINVAL; ++} + -+ /** Waitq that reflects whether there are no jobs (including SW-only -+ * dependency jobs). This is set when no jobs are present on the ctx, -+ * and clear when there are jobs. -+ * -+ * @note: Job Dispatcher knows about more jobs than the Job Scheduler: -+ * the Job Scheduler is unaware of jobs that are blocked on dependencies, -+ * and SW-only dependency jobs. -+ * -+ * This waitq can be waited upon to find out when the context jobs are all -+ * done/cancelled (including those that might've been blocked on -+ * dependencies) - and so, whether it can be terminated. However, it should -+ * only be terminated once it is not present in the run-pool (see -+ * kbasep_js_kctx_info::ctx::is_scheduled). -+ * -+ * Since the waitq is only set under kbase_jd_context::lock, -+ * the waiter should also briefly obtain and drop kbase_jd_context::lock to -+ * guarentee that the setter has completed its work on the kbase_context -+ * -+ * This must be updated atomically with: -+ * - kbase_jd_context::job_nr */ -+ wait_queue_head_t zero_jobs_wait; ++static struct kbase_device *to_kbase_device(struct device *dev) ++{ ++ return dev_get_drvdata(dev); ++} + -+ /** Job Done workqueue. */ -+ struct workqueue_struct *job_done_wq; ++static int assign_irqs(struct platform_device *pdev) ++{ ++ struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + -+ spinlock_t tb_lock; -+ u32 *tb; -+ size_t tb_wrap_offset; ++ static const char *const irq_names_caps[] = { "JOB", "MMU", "GPU" }; + -+#ifdef CONFIG_KDS -+ struct kds_callback kds_cb; -+#endif /* CONFIG_KDS */ -+#ifdef CONFIG_GPU_TRACEPOINTS -+ atomic_t work_id; ++#if IS_ENABLED(CONFIG_OF) ++ static const char *const irq_names[] = { "job", "mmu", "gpu" }; +#endif -+}; -+ -+struct kbase_device_info { -+ u32 features; -+}; -+ -+/** Poking state for BASE_HW_ISSUE_8316 */ -+enum { -+ KBASE_AS_POKE_STATE_IN_FLIGHT = 1<<0, -+ KBASE_AS_POKE_STATE_KILLING_POKE = 1<<1 -+}; ++ int i; + -+/** Poking state for BASE_HW_ISSUE_8316 */ -+typedef u32 kbase_as_poke_state; ++ if (!kbdev) ++ return -ENODEV; + -+struct kbase_mmu_setup { -+ u64 transtab; -+ u64 memattr; -+ u64 transcfg; -+}; ++ for (i = 0; i < ARRAY_SIZE(irq_names_caps); i++) { ++ int irq; + -+/** -+ * Important: Our code makes assumptions that a struct kbase_as structure is always at -+ * kbase_device->as[number]. This is used to recover the containing -+ * struct kbase_device from a struct kbase_as structure. -+ * -+ * Therefore, struct kbase_as structures must not be allocated anywhere else. -+ */ -+struct kbase_as { -+ int number; ++#if IS_ENABLED(CONFIG_OF) ++ /* We recommend using Upper case for the irq names in dts, but if ++ * there are devices in the world using Lower case then we should ++ * avoid breaking support for them. So try using names in Upper case ++ * first then try using Lower case names. If both attempts fail then ++ * we assume there is no IRQ resource specified for the GPU. ++ */ ++ irq = platform_get_irq_byname(pdev, irq_names_caps[i]); ++ if (irq < 0) ++ irq = platform_get_irq_byname(pdev, irq_names[i]); ++#else ++ irq = platform_get_irq(pdev, i); ++#endif /* CONFIG_OF */ + -+ struct workqueue_struct *pf_wq; -+ struct work_struct work_pagefault; -+ struct work_struct work_busfault; -+ enum kbase_mmu_fault_type fault_type; -+ bool protected_mode; -+ u32 fault_status; -+ u64 fault_addr; -+ u64 fault_extra_addr; ++ if (irq < 0) { ++ dev_err(kbdev->dev, "No IRQ resource '%s'\n", irq_names_caps[i]); ++ return irq; ++ } + -+ struct kbase_mmu_setup current_setup; ++ kbdev->irqs[i].irq = irq; ++ kbdev->irqs[i].flags = irqd_get_trigger_type(irq_get_irq_data(irq)); ++ } + -+ /* BASE_HW_ISSUE_8316 */ -+ struct workqueue_struct *poke_wq; -+ struct work_struct poke_work; -+ /** Protected by hwaccess_lock */ -+ int poke_refcount; -+ /** Protected by hwaccess_lock */ -+ kbase_as_poke_state poke_state; -+ struct hrtimer poke_timer; -+}; ++ return 0; ++} + -+static inline int kbase_as_has_bus_fault(struct kbase_as *as) ++/* ++ * API to acquire device list mutex and ++ * return pointer to the device list head ++ */ ++const struct list_head *kbase_dev_list_get(void) +{ -+ return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS; ++ mutex_lock(&kbase_dev_list_lock); ++ return &kbase_dev_list; +} ++KBASE_EXPORT_TEST_API(kbase_dev_list_get); + -+static inline int kbase_as_has_page_fault(struct kbase_as *as) ++/* API to release the device list mutex */ ++void kbase_dev_list_put(const struct list_head *dev_list) +{ -+ return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE; ++ mutex_unlock(&kbase_dev_list_lock); +} ++KBASE_EXPORT_TEST_API(kbase_dev_list_put); + -+struct kbasep_mem_device { -+ atomic_t used_pages; /* Tracks usage of OS shared memory. Updated -+ when OS memory is allocated/freed. */ -+ -+}; -+ -+#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X -+ -+enum kbase_trace_code { -+ /* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE -+ * THIS MUST BE USED AT THE START OF THE ENUM */ -+#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X) -+#include "mali_kbase_trace_defs.h" -+#undef KBASE_TRACE_CODE_MAKE_CODE -+ /* Comma on its own, to extend the list */ -+ , -+ /* Must be the last in the enum */ -+ KBASE_TRACE_CODE_COUNT -+}; -+ -+#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0) -+#define KBASE_TRACE_FLAG_JOBSLOT (((u8)1) << 1) -+ -+struct kbase_trace { -+ struct timespec64 timestamp; -+ u32 thread_id; -+ u32 cpu; -+ void *ctx; -+ bool katom; -+ int atom_number; -+ u64 atom_udata[2]; -+ u64 gpu_addr; -+ unsigned long info_val; -+ u8 code; -+ u8 jobslot; -+ u8 refcount; -+ u8 flags; -+}; -+ -+/** Event IDs for the power management framework. -+ * -+ * Any of these events might be missed, so they should not be relied upon to -+ * find the precise state of the GPU at a particular time in the -+ * trace. Overall, we should get a high percentage of these events for -+ * statisical purposes, and so a few missing should not be a problem */ -+enum kbase_timeline_pm_event { -+ /* helper for tests */ -+ KBASEP_TIMELINE_PM_EVENT_FIRST, -+ -+ /** Event reserved for backwards compatibility with 'init' events */ -+ KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST, -+ -+ /** The power state of the device has changed. -+ * -+ * Specifically, the device has reached a desired or available state. -+ */ -+ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED, -+ -+ /** The GPU is becoming active. -+ * -+ * This event is sent when the first context is about to use the GPU. -+ */ -+ KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE, -+ -+ /** The GPU is becoming idle. -+ * -+ * This event is sent when the last context has finished using the GPU. -+ */ -+ KBASE_TIMELINE_PM_EVENT_GPU_IDLE, -+ -+ /** Event reserved for backwards compatibility with 'policy_change' -+ * events */ -+ KBASE_TIMELINE_PM_EVENT_RESERVED_4, -+ -+ /** Event reserved for backwards compatibility with 'system_suspend' -+ * events */ -+ KBASE_TIMELINE_PM_EVENT_RESERVED_5, -+ -+ /** Event reserved for backwards compatibility with 'system_resume' -+ * events */ -+ KBASE_TIMELINE_PM_EVENT_RESERVED_6, -+ -+ /** The job scheduler is requesting to power up/down cores. -+ * -+ * This event is sent when: -+ * - powered down cores are needed to complete a job -+ * - powered up cores are not needed anymore -+ */ -+ KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE, -+ -+ KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE, -+}; -+ -+#ifdef CONFIG_MALI_TRACE_TIMELINE -+struct kbase_trace_kctx_timeline { -+ atomic_t jd_atoms_in_flight; -+ u32 owner_tgid; -+}; ++/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */ ++struct kbase_device *kbase_find_device(int minor) ++{ ++ struct kbase_device *kbdev = NULL; ++ struct list_head *entry; ++ const struct list_head *dev_list = kbase_dev_list_get(); + -+struct kbase_trace_kbdev_timeline { -+ /* Note: strictly speaking, not needed, because it's in sync with -+ * kbase_device::jm_slots[]::submitted_nr -+ * -+ * But it's kept as an example of how to add global timeline tracking -+ * information -+ * -+ * The caller must hold hwaccess_lock when accessing this */ -+ u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS]; ++ list_for_each(entry, dev_list) { ++ struct kbase_device *tmp; + -+ /* Last UID for each PM event */ -+ atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1]; -+ /* Counter for generating PM event UIDs */ -+ atomic_t pm_event_uid_counter; -+ /* -+ * L2 transition state - true indicates that the transition is ongoing -+ * Expected to be protected by hwaccess_lock */ -+ bool l2_transitioning; -+}; -+#endif /* CONFIG_MALI_TRACE_TIMELINE */ ++ tmp = list_entry(entry, struct kbase_device, entry); ++ if (tmp->mdev.minor == minor || minor == -1) { ++ kbdev = tmp; ++ get_device(kbdev->dev); ++ break; ++ } ++ } ++ kbase_dev_list_put(dev_list); + ++ return kbdev; ++} ++EXPORT_SYMBOL(kbase_find_device); + -+struct kbasep_kctx_list_element { -+ struct list_head link; -+ struct kbase_context *kctx; -+}; ++void kbase_release_device(struct kbase_device *kbdev) ++{ ++ put_device(kbdev->dev); ++} ++EXPORT_SYMBOL(kbase_release_device); + -+/** -+ * Data stored per device for power management. -+ * -+ * This structure contains data for the power management framework. There is one -+ * instance of this structure per device in the system. ++#if KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE ++/* ++ * Older versions, before v4.6, of the kernel doesn't have ++ * kstrtobool_from_user(), except longterm 4.4.y which had it added in 4.4.28 + */ -+struct kbase_pm_device_data { -+ /** -+ * The lock protecting Power Management structures accessed outside of -+ * IRQ. -+ * -+ * This lock must also be held whenever the GPU is being powered on or -+ * off. -+ */ -+ struct mutex lock; ++static int kstrtobool_from_user(const char __user *s, size_t count, bool *res) ++{ ++ char buf[32]; + -+ /** The reference count of active contexts on this device. */ -+ int active_count; -+ /** Flag indicating suspending/suspended */ -+ bool suspending; -+ /* Wait queue set when active_count == 0 */ -+ wait_queue_head_t zero_active_count_wait; ++ count = min(sizeof(buf), count); + -+ /** -+ * Bit masks identifying the available shader cores that are specified -+ * via sysfs. One mask per job slot. -+ */ -+ u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS]; -+ u64 debug_core_mask_all; ++ if (copy_from_user(buf, s, count)) ++ return -EFAULT; ++ buf[count] = '\0'; + -+ /** -+ * Callback for initializing the runtime power management. -+ * -+ * @param kbdev The kbase device -+ * -+ * @return 0 on success, else error code -+ */ -+ int (*callback_power_runtime_init)(struct kbase_device *kbdev); ++ return strtobool(buf, res); ++} ++#endif + -+ /** -+ * Callback for terminating the runtime power management. -+ * -+ * @param kbdev The kbase device -+ */ -+ void (*callback_power_runtime_term)(struct kbase_device *kbdev); ++static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off) ++{ ++ struct kbase_context *kctx = f->private_data; ++ int err; ++ bool value; + -+ /* Time in milliseconds between each dvfs sample */ -+ u32 dvfs_period; ++ err = kstrtobool_from_user(ubuf, size, &value); ++ if (err) ++ return err; + -+ /* Period of GPU poweroff timer */ -+ ktime_t gpu_poweroff_time; ++ if (value) ++ kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); ++ else ++ kbase_ctx_flag_clear(kctx, KCTX_INFINITE_CACHE); + -+ /* Number of ticks of GPU poweroff timer before shader is powered off */ -+ int poweroff_shader_ticks; ++ return size; ++} + -+ /* Number of ticks of GPU poweroff timer before GPU is powered off */ -+ int poweroff_gpu_ticks; ++static ssize_t read_ctx_infinite_cache(struct file *f, char __user *ubuf, size_t size, loff_t *off) ++{ ++ struct kbase_context *kctx = f->private_data; ++ char buf[32]; ++ int count; ++ bool value; + -+ struct kbase_pm_backend_data backend; -+}; ++ value = kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE); + -+/** -+ * struct kbase_mem_pool - Page based memory pool for kctx/kbdev -+ * @kbdev: Kbase device where memory is used -+ * @cur_size: Number of free pages currently in the pool (may exceed @max_size -+ * in some corner cases) -+ * @max_size: Maximum number of free pages in the pool -+ * @pool_lock: Lock protecting the pool - must be held when modifying @cur_size -+ * and @page_list -+ * @page_list: List of free pages in the pool -+ * @reclaim: Shrinker for kernel reclaim of free pages -+ * @next_pool: Pointer to next pool where pages can be allocated when this pool -+ * is empty. Pages will spill over to the next pool when this pool -+ * is full. Can be NULL if there is no next pool. -+ */ -+struct kbase_mem_pool { -+ struct kbase_device *kbdev; -+ size_t cur_size; -+ size_t max_size; -+ spinlock_t pool_lock; -+ struct list_head page_list; -+ struct shrinker reclaim; ++ count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N"); + -+ struct kbase_mem_pool *next_pool; -+}; ++ return simple_read_from_buffer(ubuf, size, off, buf, count); ++} + -+/** -+ * struct kbase_devfreq_opp - Lookup table for converting between nominal OPP -+ * frequency, and real frequency and core mask -+ * @opp_freq: Nominal OPP frequency -+ * @real_freq: Real GPU frequency -+ * @core_mask: Shader core mask -+ */ -+struct kbase_devfreq_opp { -+ u64 opp_freq; -+ u64 real_freq; -+ u64 core_mask; ++static const struct file_operations kbase_infinite_cache_fops = { ++ .open = simple_open, ++ .write = write_ctx_infinite_cache, ++ .read = read_ctx_infinite_cache, +}; + -+#define DEVNAME_SIZE 16 -+ -+struct kbase_device { -+ s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS]; -+ -+ u32 hw_quirks_sc; -+ u32 hw_quirks_tiler; -+ u32 hw_quirks_mmu; -+ u32 hw_quirks_jm; -+ -+ struct list_head entry; -+ struct device *dev; -+ unsigned int kbase_group_error; -+ struct miscdevice mdev; -+ u64 reg_start; -+ size_t reg_size; -+ void __iomem *reg; -+ -+ struct { -+ int irq; -+ int flags; -+ } irqs[3]; -+ -+ struct clk *clock; -+#ifdef CONFIG_REGULATOR -+ struct regulator *regulator; ++static int kbase_open(struct inode *inode, struct file *filp) ++{ ++ struct kbase_device *kbdev = NULL; ++ struct kbase_context *kctx; ++ int ret = 0; ++#ifdef CONFIG_DEBUG_FS ++ char kctx_name[64]; +#endif -+ char devname[DEVNAME_SIZE]; -+ -+#ifdef CONFIG_MALI_NO_MALI -+ void *model; -+ struct kmem_cache *irq_slab; -+ struct workqueue_struct *irq_workq; -+ atomic_t serving_job_irq; -+ atomic_t serving_gpu_irq; -+ atomic_t serving_mmu_irq; -+ spinlock_t reg_op_lock; -+#endif /* CONFIG_MALI_NO_MALI */ -+ -+ struct kbase_pm_device_data pm; -+ struct kbasep_js_device_data js_data; -+ struct kbase_mem_pool mem_pool; -+ struct kbasep_mem_device memdev; -+ struct kbase_mmu_mode const *mmu_mode; -+ -+ struct kbase_as as[BASE_MAX_NR_AS]; -+ /* The below variables (as_free and as_to_kctx) are managed by the -+ * Context Scheduler. The kbasep_js_device_data::runpool_irq::lock must -+ * be held whilst accessing these. -+ */ -+ u16 as_free; /* Bitpattern of free Address Spaces */ -+ /* Mapping from active Address Spaces to kbase_context */ -+ struct kbase_context *as_to_kctx[BASE_MAX_NR_AS]; -+ -+ -+ spinlock_t mmu_mask_change; -+ -+ struct kbase_gpu_props gpu_props; -+ -+ /** List of SW workarounds for HW issues */ -+ unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; -+ /** List of features available */ -+ unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; -+ -+ /* Bitmaps of cores that are currently in use (running jobs). -+ * These should be kept up to date by the job scheduler. -+ * -+ * pm.power_change_lock should be held when accessing these members. -+ * -+ * kbase_pm_check_transitions_nolock() should be called when bits are -+ * cleared to update the power management system and allow transitions to -+ * occur. */ -+ u64 shader_inuse_bitmap; -+ -+ /* Refcount for cores in use */ -+ u32 shader_inuse_cnt[64]; -+ -+ /* Bitmaps of cores the JS needs for jobs ready to run */ -+ u64 shader_needed_bitmap; -+ -+ /* Refcount for cores needed */ -+ u32 shader_needed_cnt[64]; -+ -+ u32 tiler_inuse_cnt; -+ -+ u32 tiler_needed_cnt; -+ -+ /* struct for keeping track of the disjoint information -+ * -+ * The state is > 0 if the GPU is in a disjoint state. Otherwise 0 -+ * The count is the number of disjoint events that have occurred on the GPU -+ */ -+ struct { -+ atomic_t count; -+ atomic_t state; -+ } disjoint_event; -+ -+ /* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */ -+ u32 l2_users_count; -+ -+ /* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be -+ * submitted to these cores. These are updated by the power management code. The job scheduler should avoid -+ * submitting new jobs to any cores that are not marked as available. -+ * -+ * pm.power_change_lock should be held when accessing these members. -+ */ -+ u64 shader_available_bitmap; -+ u64 tiler_available_bitmap; -+ u64 l2_available_bitmap; -+ u64 stack_available_bitmap; -+ -+ u64 shader_ready_bitmap; -+ u64 shader_transitioning_bitmap; -+ -+ s8 nr_hw_address_spaces; /**< Number of address spaces in the GPU (constant after driver initialisation) */ -+ s8 nr_user_address_spaces; /**< Number of address spaces available to user contexts */ -+ -+ /* Structure used for instrumentation and HW counters dumping */ -+ struct kbase_hwcnt { -+ /* The lock should be used when accessing any of the following members */ -+ spinlock_t lock; + -+ struct kbase_context *kctx; -+ u64 addr; ++ kbdev = kbase_find_device(iminor(inode)); + -+ struct kbase_instr_backend backend; -+ } hwcnt; ++ if (!kbdev) ++ return -ENODEV; + -+ struct kbase_vinstr_context *vinstr_ctx; ++ kctx = kbase_create_context(kbdev, is_compat_task()); ++ if (!kctx) { ++ ret = -ENOMEM; ++ goto out; ++ } + -+#if KBASE_TRACE_ENABLE -+ spinlock_t trace_lock; -+ u16 trace_first_out; -+ u16 trace_next_in; -+ struct kbase_trace *trace_rbuf; -+#endif ++ init_waitqueue_head(&kctx->event_queue); ++ filp->f_mode |= FMODE_UNSIGNED_OFFSET; ++ filp->private_data = kctx; ++ kctx->filp = filp; + -+ u32 reset_timeout_ms; ++ if (kbdev->infinite_cache_active_default) ++ kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); + -+ struct mutex cacheclean_lock; ++#ifdef CONFIG_DEBUG_FS ++ snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id); + -+ /* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */ -+ void *platform_context; ++ kctx->kctx_dentry = debugfs_create_dir(kctx_name, ++ kbdev->debugfs_ctx_directory); + -+ /* List of kbase_contexts created */ -+ struct list_head kctx_list; -+ struct mutex kctx_list_lock; ++ if (IS_ERR_OR_NULL(kctx->kctx_dentry)) { ++ ret = -ENOMEM; ++ goto out; ++ } + -+ struct rockchip_opp_info opp_info; -+#ifdef CONFIG_MALI_DEVFREQ -+ struct devfreq_dev_profile devfreq_profile; -+ struct devfreq *devfreq; -+ unsigned long current_freq; -+ unsigned long current_nominal_freq; -+ unsigned long current_voltage; -+ u64 current_core_mask; -+ struct kbase_devfreq_opp *opp_table; -+ int num_opps; -+ struct monitor_dev_info *mdev_info; -+#ifdef CONFIG_DEVFREQ_THERMAL -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) -+ struct devfreq_cooling_device *devfreq_cooling; ++#ifdef CONFIG_MALI_COH_USER ++ /* if cache is completely coherent at hardware level, then remove the ++ * infinite cache control support from debugfs. ++ */ +#else -+ struct thermal_cooling_device *devfreq_cooling; -+#endif -+ /* Current IPA model - true for configured model, false for fallback */ -+ atomic_t ipa_use_configured_model; -+ struct { -+ /* Access to this struct must be with ipa.lock held */ -+ struct mutex lock; -+ struct kbase_ipa_model *configured_model; -+ struct kbase_ipa_model *fallback_model; -+ } ipa; -+#endif /* CONFIG_DEVFREQ_THERMAL */ -+#endif /* CONFIG_MALI_DEVFREQ */ -+ -+ -+#ifdef CONFIG_MALI_TRACE_TIMELINE -+ struct kbase_trace_kbdev_timeline timeline; -+#endif ++ debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry, ++ kctx, &kbase_infinite_cache_fops); ++#endif /* CONFIG_MALI_COH_USER */ + -+ /* -+ * Control for enabling job dump on failure, set when control debugfs -+ * is opened. -+ */ -+ bool job_fault_debug; ++ mutex_init(&kctx->mem_profile_lock); + -+#ifdef CONFIG_DEBUG_FS -+ /* directory for debugfs entries */ -+ struct dentry *mali_debugfs_directory; -+ /* Root directory for per context entry */ -+ struct dentry *debugfs_ctx_directory; ++ kbasep_jd_debugfs_ctx_init(kctx); ++ kbase_debug_mem_view_init(filp); + -+#ifdef CONFIG_MALI_DEBUG -+ /* bit for each as, set if there is new data to report */ -+ u64 debugfs_as_read_bitmap; -+#endif /* CONFIG_MALI_DEBUG */ ++ kbase_debug_job_fault_context_init(kctx); + -+ /* failed job dump, used for separate debug process */ -+ wait_queue_head_t job_fault_wq; -+ wait_queue_head_t job_fault_resume_wq; -+ struct workqueue_struct *job_fault_resume_workq; -+ struct list_head job_fault_event_list; -+ spinlock_t job_fault_event_lock; -+ struct kbase_context *kctx_fault; ++ kbase_mem_pool_debugfs_init(kctx->kctx_dentry, &kctx->mem_pool); + -+#if !MALI_CUSTOMER_RELEASE -+ /* Per-device data for register dumping interface */ -+ struct { -+ u16 reg_offset; /* Offset of a GPU_CONTROL register to be -+ dumped upon request */ -+ } regs_dump_debugfs_data; -+#endif /* !MALI_CUSTOMER_RELEASE */ ++ kbase_jit_debugfs_init(kctx); +#endif /* CONFIG_DEBUG_FS */ + -+ /* fbdump profiling controls set by gator */ -+ u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX]; -+ -+ -+#if MALI_CUSTOMER_RELEASE == 0 -+ /* Number of jobs that are run before a job is forced to fail and -+ * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced -+ * failures. */ -+ int force_replay_limit; -+ /* Count of jobs between forced failures. Incremented on each job. A -+ * job is forced to fail once this is greater than or equal to -+ * force_replay_limit. */ -+ int force_replay_count; -+ /* Core requirement for jobs to be failed and replayed. May be zero. */ -+ base_jd_core_req force_replay_core_req; -+ /* true if force_replay_limit should be randomized. The random -+ * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT. -+ */ -+ bool force_replay_random; -+#endif -+ -+ /* Total number of created contexts */ -+ atomic_t ctx_num; ++ dev_dbg(kbdev->dev, "created base context\n"); + -+#ifdef CONFIG_DEBUG_FS -+ /* Holds the most recent register accesses */ -+ struct kbase_io_history io_history; -+#endif /* CONFIG_DEBUG_FS */ ++ { ++ struct kbasep_kctx_list_element *element; + -+ struct kbase_hwaccess_data hwaccess; ++ element = kzalloc(sizeof(*element), GFP_KERNEL); ++ if (element) { ++ mutex_lock(&kbdev->kctx_list_lock); ++ element->kctx = kctx; ++ list_add(&element->link, &kbdev->kctx_list); ++ KBASE_TLSTREAM_TL_NEW_CTX( ++ element->kctx, ++ (u32)(element->kctx->id), ++ (u32)(element->kctx->tgid)); ++ mutex_unlock(&kbdev->kctx_list_lock); ++ } else { ++ /* we don't treat this as a fail - just warn about it */ ++ dev_warn(kbdev->dev, "couldn't add kctx to kctx_list\n"); ++ } ++ } ++ return 0; + -+ /* Count of page/bus faults waiting for workqueues to process */ -+ atomic_t faults_pending; ++ out: ++ kbase_release_device(kbdev); ++ return ret; ++} + -+ /* true if GPU is powered off or power off operation is in progress */ -+ bool poweroff_pending; ++static int kbase_release(struct inode *inode, struct file *filp) ++{ ++ struct kbase_context *kctx = filp->private_data; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbasep_kctx_list_element *element, *tmp; ++ bool found_element = false; + ++ KBASE_TLSTREAM_TL_DEL_CTX(kctx); + -+ /* defaults for new context created for this device */ -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) -+ bool infinite_cache_active_default; -+#else -+ u32 infinite_cache_active_default; ++#ifdef CONFIG_DEBUG_FS ++ kbasep_mem_profile_debugfs_remove(kctx); ++ kbase_debug_job_fault_context_term(kctx); +#endif -+ size_t mem_pool_max_size_default; -+ -+ /* current gpu coherency mode */ -+ u32 current_gpu_coherency_mode; -+ /* system coherency mode */ -+ u32 system_coherency; -+ /* Flag to track when cci snoops have been enabled on the interface */ -+ bool cci_snoop_enabled; -+ -+ /* SMC function IDs to call into Trusted firmware to enable/disable -+ * cache snooping. Value of 0 indicates that they are not used -+ */ -+ u32 snoop_enable_smc; -+ u32 snoop_disable_smc; -+ -+ /* Protected mode operations */ -+ struct protected_mode_ops *protected_ops; -+ -+ /* Protected device attached to this kbase device */ -+ struct protected_mode_device *protected_dev; -+ -+ /* -+ * true when GPU is put into protected mode -+ */ -+ bool protected_mode; -+ -+ /* -+ * true when GPU is transitioning into or out of protected mode -+ */ -+ bool protected_mode_transition; -+ -+ /* -+ * true if protected mode is supported -+ */ -+ bool protected_mode_support; + ++ mutex_lock(&kbdev->kctx_list_lock); ++ list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) { ++ if (element->kctx == kctx) { ++ list_del(&element->link); ++ kfree(element); ++ found_element = true; ++ } ++ } ++ mutex_unlock(&kbdev->kctx_list_lock); ++ if (!found_element) ++ dev_warn(kbdev->dev, "kctx not in kctx_list\n"); + -+#ifdef CONFIG_MALI_DEBUG -+ wait_queue_head_t driver_inactive_wait; -+ bool driver_inactive; -+#endif /* CONFIG_MALI_DEBUG */ ++ filp->private_data = NULL; + -+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER -+ /* -+ * Bus logger integration. -+ */ -+ struct bus_logger_client *buslogger; -+#endif -+ /* Boolean indicating if an IRQ flush during reset is in progress. */ -+ bool irq_reset_flush; ++ mutex_lock(&kctx->vinstr_cli_lock); ++ /* If this client was performing hwcnt dumping and did not explicitly ++ * detach itself, remove it from the vinstr core now */ ++ if (kctx->vinstr_cli) { ++ struct kbase_uk_hwcnt_setup setup; + -+ /* list of inited sub systems. Used during terminate/error recovery */ -+ u32 inited_subsys; ++ setup.dump_buffer = 0llu; ++ kbase_vinstr_legacy_hwc_setup( ++ kbdev->vinstr_ctx, &kctx->vinstr_cli, &setup); ++ } ++ mutex_unlock(&kctx->vinstr_cli_lock); + -+ spinlock_t hwaccess_lock; ++ kbase_destroy_context(kctx); + -+ /* Protects access to MMU operations */ -+ struct mutex mmu_hw_mutex; ++ dev_dbg(kbdev->dev, "deleted base context\n"); ++ kbase_release_device(kbdev); ++ return 0; ++} + -+ /* Current serialization mode. See KBASE_SERIALIZE_* for details */ -+ u8 serialize_jobs; -+}; ++#define CALL_MAX_SIZE 536 + -+/** -+ * struct jsctx_queue - JS context atom queue -+ * @runnable_tree: Root of RB-tree containing currently runnable atoms on this -+ * job slot. -+ * @x_dep_head: Head item of the linked list of atoms blocked on cross-slot -+ * dependencies. Atoms on this list will be moved to the -+ * runnable_tree when the blocking atom completes. -+ * -+ * hwaccess_lock must be held when accessing this structure. -+ */ -+struct jsctx_queue { -+ struct rb_root runnable_tree; -+ struct list_head x_dep_head; -+}; ++static long kbase_legacy_ioctl(struct file *filp, unsigned int cmd, ++ unsigned long arg) ++{ ++ u64 msg[(CALL_MAX_SIZE + 7) >> 3] = { 0xdeadbeefdeadbeefull }; /* alignment fixup */ ++ u32 size = _IOC_SIZE(cmd); ++ struct kbase_context *kctx = filp->private_data; + ++ if (size > CALL_MAX_SIZE) ++ return -ENOTTY; + -+#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ -+ (((minor) & 0xFFF) << 8) | \ -+ ((0 & 0xFF) << 0)) ++ if (0 != copy_from_user(&msg, (void __user *)arg, size)) { ++ dev_err(kctx->kbdev->dev, "failed to copy ioctl argument into kernel space\n"); ++ return -EFAULT; ++ } + -+/** -+ * enum kbase_context_flags - Flags for kbase contexts -+ * -+ * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit -+ * process on a 64-bit kernel. -+ * -+ * @KCTX_RUNNABLE_REF: Set when context is counted in -+ * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing. -+ * -+ * @KCTX_ACTIVE: Set when the context is active. -+ * -+ * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this -+ * context. -+ * -+ * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been -+ * initialized. -+ * -+ * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new -+ * allocations. Existing allocations will not change. -+ * -+ * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs. -+ * -+ * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept -+ * scheduled in. -+ * -+ * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool. -+ * This is only ever updated whilst the jsctx_mutex is held. -+ * -+ * @KCTX_DYING: Set when the context process is in the process of being evicted. -+ * -+ * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this -+ * context, to disable use of implicit dma-buf fences. This is used to avoid -+ * potential synchronization deadlocks. -+ * -+ * All members need to be separate bits. This enum is intended for use in a -+ * bitmask where multiple values get OR-ed together. -+ */ -+enum kbase_context_flags { -+ KCTX_COMPAT = 1U << 0, -+ KCTX_RUNNABLE_REF = 1U << 1, -+ KCTX_ACTIVE = 1U << 2, -+ KCTX_PULLED = 1U << 3, -+ KCTX_MEM_PROFILE_INITIALIZED = 1U << 4, -+ KCTX_INFINITE_CACHE = 1U << 5, -+ KCTX_SUBMIT_DISABLED = 1U << 6, -+ KCTX_PRIVILEGED = 1U << 7, -+ KCTX_SCHEDULED = 1U << 8, -+ KCTX_DYING = 1U << 9, -+ KCTX_NO_IMPLICIT_SYNC = 1U << 10, -+}; ++ if (kbase_legacy_dispatch(kctx, &msg, size) != 0) ++ return -EFAULT; + -+struct kbase_context { -+ struct file *filp; -+ struct kbase_device *kbdev; -+ int id; /* System wide unique id */ -+ unsigned long api_version; -+ phys_addr_t pgd; -+ struct list_head event_list; -+ struct list_head event_coalesce_list; -+ struct mutex event_mutex; -+ atomic_t event_closed; -+ struct workqueue_struct *event_workq; -+ atomic_t event_count; -+ int event_coalesce_count; ++ if (0 != copy_to_user((void __user *)arg, &msg, size)) { ++ dev_err(kctx->kbdev->dev, "failed to copy results of UK call back to user space\n"); ++ return -EFAULT; ++ } ++ return 0; ++} + -+ atomic_t flags; ++static int kbase_api_set_flags(struct kbase_context *kctx, ++ struct kbase_ioctl_set_flags *flags) ++{ ++ int err; + -+ atomic_t setup_complete; -+ atomic_t setup_in_progress; ++ /* setup pending, try to signal that we'll do the setup, ++ * if setup was already in progress, err this call ++ */ ++ if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1) != 0) ++ return -EINVAL; + -+ u64 *mmu_teardown_pages; ++ err = kbase_context_set_create_flags(kctx, flags->create_flags); ++ /* if bad flags, will stay stuck in setup mode */ ++ if (err) ++ return err; + -+ struct page *aliasing_sink_page; ++ atomic_set(&kctx->setup_complete, 1); ++ return 0; ++} + -+ struct mutex mmu_lock; -+ struct mutex reg_lock; /* To be converted to a rwlock? */ -+ struct rb_root reg_rbtree_same; /* RB tree of GPU (live) regions, -+ * SAME_VA zone */ -+ struct rb_root reg_rbtree_exec; /* RB tree of GPU (live) regions, -+ * EXEC zone */ -+ struct rb_root reg_rbtree_custom; /* RB tree of GPU (live) regions, -+ * CUSTOM_VA zone */ ++static int kbase_api_job_submit(struct kbase_context *kctx, ++ struct kbase_ioctl_job_submit *submit) ++{ ++ void __user *user_addr = NULL; + -+ unsigned long cookies; -+ struct kbase_va_region *pending_regions[BITS_PER_LONG]; ++#ifdef CONFIG_COMPAT ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) ++ user_addr = compat_ptr(submit->addr.compat_value); ++ else ++#endif ++ user_addr = submit->addr.value; + -+ wait_queue_head_t event_queue; -+ pid_t tgid; -+ pid_t pid; ++ return kbase_jd_submit(kctx, user_addr, submit->nr_atoms, ++ submit->stride, false); ++} + -+ struct kbase_jd_context jctx; -+ atomic_t used_pages; -+ atomic_t nonmapped_pages; ++static int kbase_api_get_gpuprops(struct kbase_context *kctx, ++ struct kbase_ioctl_get_gpuprops *get_props) ++{ ++ struct kbase_gpu_props *kprops = &kctx->kbdev->gpu_props; ++ int err; + -+ struct kbase_mem_pool mem_pool; ++ if (get_props->flags != 0) { ++ dev_err(kctx->kbdev->dev, "Unsupported flags to get_gpuprops"); ++ return -EINVAL; ++ } + -+ struct shrinker reclaim; -+ struct list_head evict_list; ++ if (get_props->size == 0) ++ return kprops->prop_buffer_size; ++ if (get_props->size < kprops->prop_buffer_size) ++ return -EINVAL; + -+ struct list_head waiting_soft_jobs; -+ spinlock_t waiting_soft_jobs_lock; -+#ifdef CONFIG_KDS -+ struct list_head waiting_kds_resource; -+#endif -+#ifdef CONFIG_MALI_DMA_FENCE -+ struct { -+ struct list_head waiting_resource; -+ struct workqueue_struct *wq; -+ } dma_fence; -+#endif /* CONFIG_MALI_DMA_FENCE */ -+ /** This is effectively part of the Run Pool, because it only has a valid -+ * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in -+ * -+ * The hwaccess_lock must be held whilst accessing this. -+ * -+ * If the context relating to this as_nr is required, you must use -+ * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear -+ * whilst you're using it. Alternatively, just hold the hwaccess_lock -+ * to ensure the context doesn't disappear (but this has restrictions on what other locks -+ * you can take whilst doing this) */ -+ int as_nr; ++ err = copy_to_user(get_props->buffer.value, kprops->prop_buffer, ++ kprops->prop_buffer_size); ++ if (err) ++ return err; ++ return kprops->prop_buffer_size; ++} + -+ /* Keeps track of the number of users of this context. A user can be a -+ * job that is available for execution, instrumentation needing to 'pin' -+ * a context for counter collection, etc. If the refcount reaches 0 then -+ * this context is considered inactive and the previously programmed -+ * AS might be cleared at any point. -+ */ -+ atomic_t refcount; ++static int kbase_api_post_term(struct kbase_context *kctx) ++{ ++ kbase_event_close(kctx); ++ return 0; ++} + -+ /* NOTE: -+ * -+ * Flags are in jctx.sched_info.ctx.flags -+ * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex -+ * -+ * All other flags must be added there */ -+ spinlock_t mm_update_lock; -+ struct mm_struct *process_mm; -+ /* End of the SAME_VA zone */ -+ u64 same_va_end; ++static int kbase_api_mem_alloc(struct kbase_context *kctx, ++ union kbase_ioctl_mem_alloc *alloc) ++{ ++ struct kbase_va_region *reg; ++ u64 flags = alloc->in.flags; ++ u64 gpu_va; + -+#ifdef CONFIG_MALI_TRACE_TIMELINE -+ struct kbase_trace_kctx_timeline timeline; ++#if defined(CONFIG_64BIT) ++ if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { ++ /* force SAME_VA if a 64-bit client */ ++ flags |= BASE_MEM_SAME_VA; ++ } +#endif -+#ifdef CONFIG_DEBUG_FS -+ /* Content of mem_profile file */ -+ char *mem_profile_data; -+ /* Size of @c mem_profile_data */ -+ size_t mem_profile_size; -+ /* Mutex guarding memory profile state */ -+ struct mutex mem_profile_lock; -+ /* Memory profile directory under debugfs */ -+ struct dentry *kctx_dentry; -+ -+ /* for job fault debug */ -+ unsigned int *reg_dump; -+ atomic_t job_fault_count; -+ /* This list will keep the following atoms during the dump -+ * in the same context -+ */ -+ struct list_head job_fault_resume_event_list; + -+#endif /* CONFIG_DEBUG_FS */ ++ reg = kbase_mem_alloc(kctx, alloc->in.va_pages, ++ alloc->in.commit_pages, ++ alloc->in.extent, ++ &flags, &gpu_va); + -+ struct jsctx_queue jsctx_queue -+ [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; ++ if (!reg) ++ return -ENOMEM; + -+ /* Number of atoms currently pulled from this context */ -+ atomic_t atoms_pulled; -+ /* Number of atoms currently pulled from this context, per slot */ -+ atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS]; -+ /* Number of atoms currently pulled from this context, per slot and -+ * priority. Hold hwaccess_lock when accessing */ -+ int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][ -+ KBASE_JS_ATOM_SCHED_PRIO_COUNT]; ++ alloc->out.flags = flags; ++ alloc->out.gpu_va = gpu_va; + -+ /* true if slot is blocked on the given priority. This will be set on a -+ * soft-stop */ -+ bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; ++ return 0; ++} + -+ /* Bitmask of slots that can be pulled from */ -+ u32 slots_pullable; ++static int kbase_api_mem_query(struct kbase_context *kctx, ++ union kbase_ioctl_mem_query *query) ++{ ++ return kbase_mem_query(kctx, query->in.gpu_addr, ++ query->in.query, &query->out.value); ++} + -+ /* Backend specific data */ -+ struct kbase_context_backend backend; ++static int kbase_api_mem_free(struct kbase_context *kctx, ++ struct kbase_ioctl_mem_free *free) ++{ ++ return kbase_mem_free(kctx, free->gpu_addr); ++} + -+ /* Work structure used for deferred ASID assignment */ -+ struct work_struct work; ++static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx, ++ struct kbase_ioctl_hwcnt_reader_setup *setup) ++{ ++ int ret; ++ struct kbase_uk_hwcnt_reader_setup args = { ++ .buffer_count = setup->buffer_count, ++ .jm_bm = setup->jm_bm, ++ .shader_bm = setup->shader_bm, ++ .tiler_bm = setup->tiler_bm, ++ .mmu_l2_bm = setup->mmu_l2_bm ++ }; + -+ /* Only one userspace vinstr client per kbase context */ -+ struct kbase_vinstr_client *vinstr_cli; -+ struct mutex vinstr_cli_lock; ++ mutex_lock(&kctx->vinstr_cli_lock); ++ ret = kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, &args); ++ mutex_unlock(&kctx->vinstr_cli_lock); + -+ /* List of completed jobs waiting for events to be posted */ -+ struct list_head completed_jobs; -+ /* Number of work items currently pending on job_done_wq */ -+ atomic_t work_count; ++ if (ret) ++ return ret; ++ return args.fd; ++} + -+ /* Waiting soft-jobs will fail when this timer expires */ -+ struct timer_list soft_job_timeout; ++static int kbase_api_hwcnt_enable(struct kbase_context *kctx, ++ struct kbase_ioctl_hwcnt_enable *enable) ++{ ++ int ret; ++ struct kbase_uk_hwcnt_setup args = { ++ .dump_buffer = enable->dump_buffer, ++ .jm_bm = enable->jm_bm, ++ .shader_bm = enable->shader_bm, ++ .tiler_bm = enable->tiler_bm, ++ .mmu_l2_bm = enable->mmu_l2_bm ++ }; + -+ /* JIT allocation management */ -+ struct kbase_va_region *jit_alloc[256]; -+ struct list_head jit_active_head; -+ struct list_head jit_pool_head; -+ struct list_head jit_destroy_head; -+ struct mutex jit_evict_lock; -+ struct work_struct jit_work; ++ mutex_lock(&kctx->vinstr_cli_lock); ++ ret = kbase_vinstr_legacy_hwc_setup(kctx->kbdev->vinstr_ctx, ++ &kctx->vinstr_cli, &args); ++ mutex_unlock(&kctx->vinstr_cli_lock); + -+ /* A list of the JIT soft-jobs in submission order -+ * (protected by kbase_jd_context.lock) -+ */ -+ struct list_head jit_atoms_head; -+ /* A list of pending JIT alloc soft-jobs (using the 'queue' list_head) -+ * (protected by kbase_jd_context.lock) -+ */ -+ struct list_head jit_pending_alloc; ++ return ret; ++} + -+ /* External sticky resource management */ -+ struct list_head ext_res_meta_head; ++static int kbase_api_hwcnt_dump(struct kbase_context *kctx) ++{ ++ int ret; + -+ /* Used to record that a drain was requested from atomic context */ -+ atomic_t drain_pending; ++ mutex_lock(&kctx->vinstr_cli_lock); ++ ret = kbase_vinstr_hwc_dump(kctx->vinstr_cli, ++ BASE_HWCNT_READER_EVENT_MANUAL); ++ mutex_unlock(&kctx->vinstr_cli_lock); + -+ /* Current age count, used to determine age for newly submitted atoms */ -+ u32 age_count; -+}; ++ return ret; ++} + -+/** -+ * struct kbase_ctx_ext_res_meta - Structure which binds an external resource -+ * to a @kbase_context. -+ * @ext_res_node: List head for adding the metadata to a -+ * @kbase_context. -+ * @alloc: The physical memory allocation structure -+ * which is mapped. -+ * @gpu_addr: The GPU virtual address the resource is -+ * mapped to. -+ * -+ * External resources can be mapped into multiple contexts as well as the same -+ * context multiple times. -+ * As kbase_va_region itself isn't refcounted we can't attach our extra -+ * information to it as it could be removed under our feet leaving external -+ * resources pinned. -+ * This metadata structure binds a single external resource to a single -+ * context, ensuring that per context mapping is tracked separately so it can -+ * be overridden when needed and abuses by the application (freeing the resource -+ * multiple times) don't effect the refcount of the physical allocation. -+ */ -+struct kbase_ctx_ext_res_meta { -+ struct list_head ext_res_node; -+ struct kbase_mem_phy_alloc *alloc; -+ u64 gpu_addr; -+}; ++static int kbase_api_hwcnt_clear(struct kbase_context *kctx) ++{ ++ int ret; + -+enum kbase_reg_access_type { -+ REG_READ, -+ REG_WRITE -+}; ++ mutex_lock(&kctx->vinstr_cli_lock); ++ ret = kbase_vinstr_hwc_clear(kctx->vinstr_cli); ++ mutex_unlock(&kctx->vinstr_cli_lock); + -+enum kbase_share_attr_bits { -+ /* (1ULL << 8) bit is reserved */ -+ SHARE_BOTH_BITS = (2ULL << 8), /* inner and outer shareable coherency */ -+ SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */ -+}; ++ return ret; ++} + -+/** -+ * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent. -+ * @kbdev: kbase device -+ * -+ * Return: true if the device access are coherent, false if not. -+ */ -+static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) ++static int kbase_api_disjoint_query(struct kbase_context *kctx, ++ struct kbase_ioctl_disjoint_query *query) +{ -+ if ((kbdev->system_coherency == COHERENCY_ACE_LITE) || -+ (kbdev->system_coherency == COHERENCY_ACE)) -+ return true; ++ query->counter = kbase_disjoint_event_get(kctx->kbdev); + -+ return false; ++ return 0; +} + -+/* Conversion helpers for setting up high resolution timers */ -+#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U)) -+#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) -+ -+/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */ -+#define KBASE_CLEAN_CACHE_MAX_LOOPS 100000 -+/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */ -+#define KBASE_AS_INACTIVE_MAX_LOOPS 100000 -+ -+/* Maximum number of times a job can be replayed */ -+#define BASEP_JD_REPLAY_LIMIT 15 -+ -+/* JobDescriptorHeader - taken from the architecture specifications, the layout -+ * is currently identical for all GPU archs. */ -+struct job_descriptor_header { -+ u32 exception_status; -+ u32 first_incomplete_task; -+ u64 fault_pointer; -+ u8 job_descriptor_size : 1; -+ u8 job_type : 7; -+ u8 job_barrier : 1; -+ u8 _reserved_01 : 1; -+ u8 _reserved_1 : 1; -+ u8 _reserved_02 : 1; -+ u8 _reserved_03 : 1; -+ u8 _reserved_2 : 1; -+ u8 _reserved_04 : 1; -+ u8 _reserved_05 : 1; -+ u16 job_index; -+ u16 job_dependency_index_1; -+ u16 job_dependency_index_2; -+ union { -+ u64 _64; -+ u32 _32; -+ } next_job; -+}; -+ -+#endif /* _KBASE_DEFS_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c -new file mode 100644 -index 000000000..b0eb67da8 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_device.c -@@ -0,0 +1,674 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ -+ -+ -+ ++static int kbase_api_get_ddk_version(struct kbase_context *kctx, ++ struct kbase_ioctl_get_ddk_version *version) ++{ ++ int ret; ++ int len = sizeof(KERNEL_SIDE_DDK_VERSION_STRING); + ++ if (version->version_buffer.value == NULL) ++ return len; + -+/* -+ * Base kernel device APIs -+ */ ++ if (version->size < len) ++ return -EOVERFLOW; + -+#include -+#include -+#include -+#include -+#include -+#include ++ ret = copy_to_user(version->version_buffer.value, ++ KERNEL_SIDE_DDK_VERSION_STRING, ++ sizeof(KERNEL_SIDE_DDK_VERSION_STRING)); + -+#include -+#include -+#include -+#include -+#include ++ if (ret) ++ return ret; + -+#include ++ return len; ++} + -+/* NOTE: Magic - 0x45435254 (TRCE in ASCII). -+ * Supports tracing feature provided in the base module. -+ * Please keep it in sync with the value of base module. -+ */ -+#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254 ++static int kbase_api_mem_jit_init(struct kbase_context *kctx, ++ struct kbase_ioctl_mem_jit_init *jit_init) ++{ ++ return kbase_region_tracker_init_jit(kctx, jit_init->va_pages); ++} + -+#if KBASE_TRACE_ENABLE -+static const char *kbasep_trace_code_string[] = { -+ /* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE -+ * THIS MUST BE USED AT THE START OF THE ARRAY */ -+#define KBASE_TRACE_CODE_MAKE_CODE(X) # X -+#include "mali_kbase_trace_defs.h" -+#undef KBASE_TRACE_CODE_MAKE_CODE -+}; ++static int kbase_api_mem_sync(struct kbase_context *kctx, ++ struct kbase_ioctl_mem_sync *sync) ++{ ++#ifdef CONFIG_MALI_COH_USER ++ return 0; +#endif ++ struct basep_syncset sset = { ++ .mem_handle.basep.handle = sync->handle, ++ .user_addr = sync->user_addr, ++ .size = sync->size, ++ .type = sync->type ++ }; + -+#define DEBUG_MESSAGE_SIZE 256 -+ -+static int kbasep_trace_init(struct kbase_device *kbdev); -+static void kbasep_trace_term(struct kbase_device *kbdev); -+static void kbasep_trace_hook_wrapper(void *param); ++ return kbase_sync_now(kctx, &sset); ++} + -+struct kbase_device *kbase_device_alloc(void) ++static int kbase_api_mem_find_cpu_offset(struct kbase_context *kctx, ++ union kbase_ioctl_mem_find_cpu_offset *find) +{ -+ return kzalloc(sizeof(struct kbase_device), GFP_KERNEL); ++ return kbasep_find_enclosing_cpu_mapping_offset( ++ kctx, ++ find->in.cpu_addr, ++ find->in.size, ++ &find->out.offset); +} + -+static int kbase_device_as_init(struct kbase_device *kbdev, int i) ++static int kbase_api_get_context_id(struct kbase_context *kctx, ++ struct kbase_ioctl_get_context_id *info) +{ -+ const char format[] = "mali_mmu%d"; -+ char name[sizeof(format)]; -+ const char poke_format[] = "mali_mmu%d_poker"; -+ char poke_name[sizeof(poke_format)]; -+ -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) -+ snprintf(poke_name, sizeof(poke_name), poke_format, i); -+ -+ snprintf(name, sizeof(name), format, i); -+ -+ kbdev->as[i].number = i; -+ kbdev->as[i].fault_addr = 0ULL; -+ -+ kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1); -+ if (!kbdev->as[i].pf_wq) -+ return -EINVAL; -+ -+ INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker); -+ INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker); -+ -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) { -+ struct hrtimer *poke_timer = &kbdev->as[i].poke_timer; -+ struct work_struct *poke_work = &kbdev->as[i].poke_work; -+ -+ kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1); -+ if (!kbdev->as[i].poke_wq) { -+ destroy_workqueue(kbdev->as[i].pf_wq); -+ return -EINVAL; -+ } -+ KBASE_DEBUG_ASSERT(!object_is_on_stack(poke_work)); -+ INIT_WORK(poke_work, kbasep_as_do_poke); -+ -+ hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); -+ -+ poke_timer->function = kbasep_as_poke_timer_callback; -+ -+ kbdev->as[i].poke_refcount = 0; -+ kbdev->as[i].poke_state = 0u; -+ } ++ info->id = kctx->id; + + return 0; +} + -+static void kbase_device_as_term(struct kbase_device *kbdev, int i) ++static int kbase_api_tlstream_acquire(struct kbase_context *kctx, ++ struct kbase_ioctl_tlstream_acquire *acquire) +{ -+ destroy_workqueue(kbdev->as[i].pf_wq); -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) -+ destroy_workqueue(kbdev->as[i].poke_wq); ++ return kbase_tlstream_acquire(kctx, acquire->flags); +} + -+static int kbase_device_all_as_init(struct kbase_device *kbdev) ++static int kbase_api_tlstream_flush(struct kbase_context *kctx) +{ -+ int i, err; -+ -+ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { -+ err = kbase_device_as_init(kbdev, i); -+ if (err) -+ goto free_workqs; -+ } ++ kbase_tlstream_flush_streams(); + + return 0; -+ -+free_workqs: -+ for (; i > 0; i--) -+ kbase_device_as_term(kbdev, i); -+ -+ return err; +} + -+static void kbase_device_all_as_term(struct kbase_device *kbdev) ++static int kbase_api_mem_commit(struct kbase_context *kctx, ++ struct kbase_ioctl_mem_commit *commit) +{ -+ int i; -+ -+ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) -+ kbase_device_as_term(kbdev, i); ++ return kbase_mem_commit(kctx, commit->gpu_addr, commit->pages); +} + -+int kbase_device_init(struct kbase_device * const kbdev) ++static int kbase_api_mem_alias(struct kbase_context *kctx, ++ union kbase_ioctl_mem_alias *alias) +{ -+ int i, err; -+#ifdef CONFIG_ARM64 -+ struct device_node *np = NULL; -+#endif /* CONFIG_ARM64 */ -+ -+ spin_lock_init(&kbdev->mmu_mask_change); -+ mutex_init(&kbdev->mmu_hw_mutex); -+#ifdef CONFIG_ARM64 -+ kbdev->cci_snoop_enabled = false; -+ np = kbdev->dev->of_node; -+ if (np != NULL) { -+ if (of_property_read_u32(np, "snoop_enable_smc", -+ &kbdev->snoop_enable_smc)) -+ kbdev->snoop_enable_smc = 0; -+ if (of_property_read_u32(np, "snoop_disable_smc", -+ &kbdev->snoop_disable_smc)) -+ kbdev->snoop_disable_smc = 0; -+ /* Either both or none of the calls should be provided. */ -+ if (!((kbdev->snoop_disable_smc == 0 -+ && kbdev->snoop_enable_smc == 0) -+ || (kbdev->snoop_disable_smc != 0 -+ && kbdev->snoop_enable_smc != 0))) { -+ WARN_ON(1); -+ err = -EINVAL; -+ goto fail; -+ } -+ } -+#endif /* CONFIG_ARM64 */ -+ /* Get the list of workarounds for issues on the current HW -+ * (identified by the GPU_ID register) -+ */ -+ err = kbase_hw_set_issues_mask(kbdev); -+ if (err) -+ goto fail; ++ struct base_mem_aliasing_info *ai; ++ void __user *user_addr = NULL; ++ u64 flags; ++ int err; + -+ /* Set the list of features available on the current HW -+ * (identified by the GPU_ID register) -+ */ -+ kbase_hw_set_features_mask(kbdev); ++ if (alias->in.nents == 0 || alias->in.nents > 2048) ++ return -EINVAL; + -+ kbase_gpuprops_set_features(kbdev); ++ ai = vmalloc(sizeof(*ai) * alias->in.nents); ++ if (!ai) ++ return -ENOMEM; + -+ /* On Linux 4.0+, dma coherency is determined from device tree */ -+#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) -+ set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops); ++#ifdef CONFIG_COMPAT ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) ++ user_addr = ++ compat_ptr(alias->in.aliasing_info.compat_value); ++ else +#endif ++ user_addr = alias->in.aliasing_info.value; + -+ /* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our -+ * device structure was created by device-tree -+ */ -+ if (!kbdev->dev->dma_mask) -+ kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask; -+ -+ err = dma_set_mask(kbdev->dev, -+ DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); -+ if (err) -+ goto dma_set_mask_failed; -+ -+ err = dma_set_coherent_mask(kbdev->dev, -+ DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); -+ if (err) -+ goto dma_set_mask_failed; -+ -+ kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces; -+ -+ err = kbase_device_all_as_init(kbdev); -+ if (err) -+ goto as_init_failed; -+ -+ spin_lock_init(&kbdev->hwcnt.lock); -+ -+ err = kbasep_trace_init(kbdev); -+ if (err) -+ goto term_as; -+ -+ mutex_init(&kbdev->cacheclean_lock); -+ -+#ifdef CONFIG_MALI_TRACE_TIMELINE -+ for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) -+ kbdev->timeline.slot_atoms_submitted[i] = 0; -+ -+ for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i) -+ atomic_set(&kbdev->timeline.pm_event_uid[i], 0); -+#endif /* CONFIG_MALI_TRACE_TIMELINE */ -+ -+ /* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */ -+ for (i = 0; i < FBDUMP_CONTROL_MAX; i++) -+ kbdev->kbase_profiling_controls[i] = 0; -+ -+ kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev); -+ -+ atomic_set(&kbdev->ctx_num, 0); ++ err = copy_from_user(ai, user_addr, sizeof(*ai) * alias->in.nents); ++ if (err) { ++ vfree(ai); ++ return err; ++ } + -+ err = kbase_instr_backend_init(kbdev); -+ if (err) -+ goto term_trace; ++ flags = alias->in.flags; + -+ kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD; ++ alias->out.gpu_va = kbase_mem_alias(kctx, &flags, ++ alias->in.stride, alias->in.nents, ++ ai, &alias->out.va_pages); + -+ kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS; ++ alias->out.flags = flags; + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) -+ kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); -+ else -+ kbdev->mmu_mode = kbase_mmu_mode_get_lpae(); ++ vfree(ai); + -+#ifdef CONFIG_MALI_DEBUG -+ init_waitqueue_head(&kbdev->driver_inactive_wait); -+#endif /* CONFIG_MALI_DEBUG */ ++ if (alias->out.gpu_va == 0) ++ return -ENOMEM; + + return 0; -+term_trace: -+ kbasep_trace_term(kbdev); -+term_as: -+ kbase_device_all_as_term(kbdev); -+as_init_failed: -+dma_set_mask_failed: -+fail: -+ return err; +} + -+void kbase_device_term(struct kbase_device *kbdev) ++static int kbase_api_mem_import(struct kbase_context *kctx, ++ union kbase_ioctl_mem_import *import) +{ -+ KBASE_DEBUG_ASSERT(kbdev); ++ int ret; ++ u64 flags = import->in.flags; ++ void __user *phandle; + -+#if KBASE_TRACE_ENABLE -+ kbase_debug_assert_register_hook(NULL, NULL); ++#ifdef CONFIG_COMPAT ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) ++ phandle = compat_ptr(import->in.phandle.compat_value); ++ else +#endif ++ phandle = import->in.phandle.value; + -+ kbase_instr_backend_term(kbdev); ++ ret = kbase_mem_import(kctx, ++ import->in.type, ++ phandle, ++ import->in.padding, ++ &import->out.gpu_va, ++ &import->out.va_pages, ++ &flags); + -+ kbasep_trace_term(kbdev); ++ import->out.flags = flags; + -+ kbase_device_all_as_term(kbdev); ++ return ret; +} + -+void kbase_device_free(struct kbase_device *kbdev) ++static int kbase_api_mem_flags_change(struct kbase_context *kctx, ++ struct kbase_ioctl_mem_flags_change *change) +{ -+ kfree(kbdev); ++ return kbase_mem_flags_change(kctx, change->gpu_va, ++ change->flags, change->mask); +} + -+int kbase_device_trace_buffer_install( -+ struct kbase_context *kctx, u32 *tb, size_t size) ++static int kbase_api_stream_create(struct kbase_context *kctx, ++ struct kbase_ioctl_stream_create *stream) +{ -+ unsigned long flags; -+ -+ KBASE_DEBUG_ASSERT(kctx); -+ KBASE_DEBUG_ASSERT(tb); -+ -+ /* Interface uses 16-bit value to track last accessed entry. Each entry -+ * is composed of two 32-bit words. -+ * This limits the size that can be handled without an overflow. */ -+ if (0xFFFF * (2 * sizeof(u32)) < size) -+ return -EINVAL; ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ int fd, ret; + -+ /* set up the header */ -+ /* magic number in the first 4 bytes */ -+ tb[0] = TRACE_BUFFER_HEADER_SPECIAL; -+ /* Store (write offset = 0, wrap counter = 0, transaction active = no) -+ * write offset 0 means never written. -+ * Offsets 1 to (wrap_offset - 1) used to store values when trace started ++ /* Name must be NULL-terminated and padded with NULLs, so check last ++ * character is NULL + */ -+ tb[1] = 0; ++ if (stream->name[sizeof(stream->name)-1] != 0) ++ return -EINVAL; + -+ /* install trace buffer */ -+ spin_lock_irqsave(&kctx->jctx.tb_lock, flags); -+ kctx->jctx.tb_wrap_offset = size / 8; -+ kctx->jctx.tb = tb; -+ spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags); ++ ret = kbase_sync_fence_stream_create(stream->name, &fd); + -+ return 0; ++ if (ret) ++ return ret; ++ return fd; ++#else ++ return -ENOENT; ++#endif +} + -+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx) ++static int kbase_api_fence_validate(struct kbase_context *kctx, ++ struct kbase_ioctl_fence_validate *validate) +{ -+ unsigned long flags; -+ -+ KBASE_DEBUG_ASSERT(kctx); -+ spin_lock_irqsave(&kctx->jctx.tb_lock, flags); -+ kctx->jctx.tb = NULL; -+ kctx->jctx.tb_wrap_offset = 0; -+ spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags); ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ return kbase_sync_fence_validate(validate->fd); ++#else ++ return -ENOENT; ++#endif +} + -+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value) ++static int kbase_api_get_profiling_controls(struct kbase_context *kctx, ++ struct kbase_ioctl_get_profiling_controls *controls) +{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&kctx->jctx.tb_lock, flags); -+ if (kctx->jctx.tb) { -+ u16 wrap_count; -+ u16 write_offset; -+ u32 *tb = kctx->jctx.tb; -+ u32 header_word; ++ if (controls->count > FBDUMP_CONTROL_MAX) ++ return -EINVAL; + -+ header_word = tb[1]; -+ KBASE_DEBUG_ASSERT(0 == (header_word & 0x1)); ++ return copy_to_user(controls->buffer.value, ++ &kctx->kbdev->kbase_profiling_controls[ ++ FBDUMP_CONTROL_MIN], ++ controls->count * sizeof(u32)); ++} + -+ wrap_count = (header_word >> 1) & 0x7FFF; -+ write_offset = (header_word >> 16) & 0xFFFF; ++static int kbase_api_mem_profile_add(struct kbase_context *kctx, ++ struct kbase_ioctl_mem_profile_add *data) ++{ ++ char __user *user_buf; ++ char *buf; ++ int err; + -+ /* mark as transaction in progress */ -+ tb[1] |= 0x1; -+ mb(); ++ if (data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) { ++ dev_err(kctx->kbdev->dev, "mem_profile_add: buffer too big\n"); ++ return -EINVAL; ++ } + -+ /* calculate new offset */ -+ write_offset++; -+ if (write_offset == kctx->jctx.tb_wrap_offset) { -+ /* wrap */ -+ write_offset = 1; -+ wrap_count++; -+ wrap_count &= 0x7FFF; /* 15bit wrap counter */ -+ } ++ buf = kmalloc(data->len, GFP_KERNEL); ++ if (ZERO_OR_NULL_PTR(buf)) ++ return -ENOMEM; + -+ /* store the trace entry at the selected offset */ -+ tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0); -+ tb[write_offset * 2 + 1] = reg_value; -+ mb(); ++#ifdef CONFIG_COMPAT ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) ++ user_buf = compat_ptr(data->buffer.compat_value); ++ else ++#endif ++ user_buf = data->buffer.value; + -+ /* new header word */ -+ header_word = (write_offset << 16) | (wrap_count << 1) | 0x0; /* transaction complete */ -+ tb[1] = header_word; ++ err = copy_from_user(buf, user_buf, data->len); ++ if (err) { ++ kfree(buf); ++ return err; + } -+ spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags); -+} + -+/* -+ * Device trace functions -+ */ -+#if KBASE_TRACE_ENABLE ++ return kbasep_mem_profile_debugfs_insert(kctx, buf, data->len); ++} + -+static int kbasep_trace_init(struct kbase_device *kbdev) ++static int kbase_api_soft_event_update(struct kbase_context *kctx, ++ struct kbase_ioctl_soft_event_update *update) +{ -+ struct kbase_trace *rbuf; ++ if (update->flags != 0) ++ return -EINVAL; + -+ rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL); ++ return kbase_soft_event_update(kctx, update->event, update->new_status); ++} + -+ if (!rbuf) -+ return -EINVAL; ++#if MALI_UNIT_TEST ++static int kbase_api_tlstream_test(struct kbase_context *kctx, ++ struct kbase_ioctl_tlstream_test *test) ++{ ++ kbase_tlstream_test( ++ test->tpw_count, ++ test->msg_delay, ++ test->msg_count, ++ test->aux_msg); + -+ kbdev->trace_rbuf = rbuf; -+ spin_lock_init(&kbdev->trace_lock); + return 0; +} + -+static void kbasep_trace_term(struct kbase_device *kbdev) ++static int kbase_api_tlstream_stats(struct kbase_context *kctx, ++ struct kbase_ioctl_tlstream_stats *stats) +{ -+ kfree(kbdev->trace_rbuf); ++ kbase_tlstream_stats( ++ &stats->bytes_collected, ++ &stats->bytes_generated); ++ ++ return 0; +} ++#endif /* MALI_UNIT_TEST */ + -+static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len) -+{ -+ s32 written = 0; ++#define KBASE_HANDLE_IOCTL(cmd, function) \ ++ case cmd: \ ++ do { \ ++ BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \ ++ return function(kctx); \ ++ } while (0) + -+ /* Initial part of message */ -+ written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0); ++#define KBASE_HANDLE_IOCTL_IN(cmd, function, type) \ ++ case cmd: \ ++ do { \ ++ type param; \ ++ int err; \ ++ BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE); \ ++ BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ ++ err = copy_from_user(¶m, uarg, sizeof(param)); \ ++ if (err) \ ++ return -EFAULT; \ ++ return function(kctx, ¶m); \ ++ } while (0) + -+ if (trace_msg->katom) -+ written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0); ++#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type) \ ++ case cmd: \ ++ do { \ ++ type param; \ ++ int ret, err; \ ++ BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ); \ ++ BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ ++ ret = function(kctx, ¶m); \ ++ err = copy_to_user(uarg, ¶m, sizeof(param)); \ ++ if (err) \ ++ return -EFAULT; \ ++ return ret; \ ++ } while (0) + -+ written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0); ++#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type) \ ++ case cmd: \ ++ do { \ ++ type param; \ ++ int ret, err; \ ++ BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE|_IOC_READ)); \ ++ BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ ++ err = copy_from_user(¶m, uarg, sizeof(param)); \ ++ if (err) \ ++ return -EFAULT; \ ++ ret = function(kctx, ¶m); \ ++ err = copy_to_user(uarg, ¶m, sizeof(param)); \ ++ if (err) \ ++ return -EFAULT; \ ++ return ret; \ ++ } while (0) + -+ /* NOTE: Could add function callbacks to handle different message types */ -+ /* Jobslot present */ -+ if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT) -+ written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0); ++static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ++{ ++ struct kbase_context *kctx = filp->private_data; ++ struct kbase_device *kbdev = kctx->kbdev; ++ void __user *uarg = (void __user *)arg; + -+ written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0); ++ /* The UK ioctl values overflow the cmd field causing the type to be ++ * incremented ++ */ ++ if (_IOC_TYPE(cmd) == LINUX_UK_BASE_MAGIC+2) ++ return kbase_legacy_ioctl(filp, cmd, arg); + -+ /* Refcount present */ -+ if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT) -+ written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0); ++ /* The UK version check IOCTL doesn't overflow the cmd field, so is ++ * handled separately here ++ */ ++ if (cmd == _IOC(_IOC_READ|_IOC_WRITE, LINUX_UK_BASE_MAGIC, ++ UKP_FUNC_ID_CHECK_VERSION, ++ sizeof(struct uku_version_check_args))) ++ return kbase_legacy_ioctl(filp, cmd, arg); + -+ written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0); ++ /* Only these ioctls are available until setup is complete */ ++ switch (cmd) { ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK, ++ kbase_api_handshake, ++ struct kbase_ioctl_version_check); ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS, ++ kbase_api_set_flags, ++ struct kbase_ioctl_set_flags); ++ } + -+ /* Rest of message */ -+ written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0); -+} ++ /* Block call until version handshake and setup is complete */ ++ if (kctx->api_version == 0 || !atomic_read(&kctx->setup_complete)) ++ return -EINVAL; + -+static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg) -+{ -+ char buffer[DEBUG_MESSAGE_SIZE]; ++ /* Normal ioctls */ ++ switch (cmd) { ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT, ++ kbase_api_job_submit, ++ struct kbase_ioctl_job_submit); ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS, ++ kbase_api_get_gpuprops, ++ struct kbase_ioctl_get_gpuprops); ++ KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM, ++ kbase_api_post_term); ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC, ++ kbase_api_mem_alloc, ++ union kbase_ioctl_mem_alloc); ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_QUERY, ++ kbase_api_mem_query, ++ union kbase_ioctl_mem_query); ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FREE, ++ kbase_api_mem_free, ++ struct kbase_ioctl_mem_free); ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP, ++ kbase_api_hwcnt_reader_setup, ++ struct kbase_ioctl_hwcnt_reader_setup); ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE, ++ kbase_api_hwcnt_enable, ++ struct kbase_ioctl_hwcnt_enable); ++ KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP, ++ kbase_api_hwcnt_dump); ++ KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR, ++ kbase_api_hwcnt_clear); ++ KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_DISJOINT_QUERY, ++ kbase_api_disjoint_query, ++ struct kbase_ioctl_disjoint_query); ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_DDK_VERSION, ++ kbase_api_get_ddk_version, ++ struct kbase_ioctl_get_ddk_version); ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT, ++ kbase_api_mem_jit_init, ++ struct kbase_ioctl_mem_jit_init); ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC, ++ kbase_api_mem_sync, ++ struct kbase_ioctl_mem_sync); ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_CPU_OFFSET, ++ kbase_api_mem_find_cpu_offset, ++ union kbase_ioctl_mem_find_cpu_offset); ++ KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_GET_CONTEXT_ID, ++ kbase_api_get_context_id, ++ struct kbase_ioctl_get_context_id); ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_ACQUIRE, ++ kbase_api_tlstream_acquire, ++ struct kbase_ioctl_tlstream_acquire); ++ KBASE_HANDLE_IOCTL(KBASE_IOCTL_TLSTREAM_FLUSH, ++ kbase_api_tlstream_flush); ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_COMMIT, ++ kbase_api_mem_commit, ++ struct kbase_ioctl_mem_commit); ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALIAS, ++ kbase_api_mem_alias, ++ union kbase_ioctl_mem_alias); ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_IMPORT, ++ kbase_api_mem_import, ++ union kbase_ioctl_mem_import); ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FLAGS_CHANGE, ++ kbase_api_mem_flags_change, ++ struct kbase_ioctl_mem_flags_change); ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STREAM_CREATE, ++ kbase_api_stream_create, ++ struct kbase_ioctl_stream_create); ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_FENCE_VALIDATE, ++ kbase_api_fence_validate, ++ struct kbase_ioctl_fence_validate); ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_PROFILING_CONTROLS, ++ kbase_api_get_profiling_controls, ++ struct kbase_ioctl_get_profiling_controls); ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_PROFILE_ADD, ++ kbase_api_mem_profile_add, ++ struct kbase_ioctl_mem_profile_add); ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE, ++ kbase_api_soft_event_update, ++ struct kbase_ioctl_soft_event_update); + -+ kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE); -+ dev_dbg(kbdev->dev, "%s", buffer); ++#if MALI_UNIT_TEST ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST, ++ kbase_api_tlstream_test, ++ struct kbase_ioctl_tlstream_test); ++ KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS, ++ kbase_api_tlstream_stats, ++ struct kbase_ioctl_tlstream_stats); ++#endif ++ } ++ ++ dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); ++ ++ return -ENOIOCTLCMD; +} + -+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val) ++static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) +{ -+ unsigned long irqflags; -+ struct kbase_trace *trace_msg; ++ struct kbase_context *kctx = filp->private_data; ++ struct base_jd_event_v2 uevent; ++ int out_count = 0; + -+ spin_lock_irqsave(&kbdev->trace_lock, irqflags); ++ if (count < sizeof(uevent)) ++ return -ENOBUFS; + -+ trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in]; ++ do { ++ while (kbase_event_dequeue(kctx, &uevent)) { ++ if (out_count > 0) ++ goto out; + -+ /* Fill the message */ -+ trace_msg->thread_id = task_pid_nr(current); -+ trace_msg->cpu = task_cpu(current); ++ if (filp->f_flags & O_NONBLOCK) ++ return -EAGAIN; + -+ ktime_get_real_ts64(&trace_msg->timestamp); ++ if (wait_event_interruptible(kctx->event_queue, ++ kbase_event_pending(kctx)) != 0) ++ return -ERESTARTSYS; ++ } ++ if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) { ++ if (out_count == 0) ++ return -EPIPE; ++ goto out; ++ } + -+ trace_msg->code = code; -+ trace_msg->ctx = ctx; ++ if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0) ++ return -EFAULT; + -+ if (NULL == katom) { -+ trace_msg->katom = false; -+ } else { -+ trace_msg->katom = true; -+ trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom); -+ trace_msg->atom_udata[0] = katom->udata.blob[0]; -+ trace_msg->atom_udata[1] = katom->udata.blob[1]; -+ } ++ buf += sizeof(uevent); ++ out_count++; ++ count -= sizeof(uevent); ++ } while (count >= sizeof(uevent)); + -+ trace_msg->gpu_addr = gpu_addr; -+ trace_msg->jobslot = jobslot; -+ trace_msg->refcount = MIN((unsigned int)refcount, 0xFF); -+ trace_msg->info_val = info_val; -+ trace_msg->flags = flags; ++ out: ++ return out_count * sizeof(uevent); ++} + -+ /* Update the ringbuffer indices */ -+ kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK; -+ if (kbdev->trace_next_in == kbdev->trace_first_out) -+ kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK; ++static unsigned int kbase_poll(struct file *filp, poll_table *wait) ++{ ++ struct kbase_context *kctx = filp->private_data; + -+ /* Done */ ++ poll_wait(filp, &kctx->event_queue, wait); ++ if (kbase_event_pending(kctx)) ++ return POLLIN | POLLRDNORM; + -+ spin_unlock_irqrestore(&kbdev->trace_lock, irqflags); ++ return 0; +} + -+void kbasep_trace_clear(struct kbase_device *kbdev) ++void kbase_event_wakeup(struct kbase_context *kctx) +{ -+ unsigned long flags; ++ KBASE_DEBUG_ASSERT(kctx); + -+ spin_lock_irqsave(&kbdev->trace_lock, flags); -+ kbdev->trace_first_out = kbdev->trace_next_in; -+ spin_unlock_irqrestore(&kbdev->trace_lock, flags); ++ wake_up_interruptible(&kctx->event_queue); +} + -+void kbasep_trace_dump(struct kbase_device *kbdev) ++KBASE_EXPORT_TEST_API(kbase_event_wakeup); ++ ++static int kbase_check_flags(int flags) +{ -+ unsigned long flags; -+ u32 start; -+ u32 end; ++ /* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always ++ * closes the file descriptor in a child process. ++ */ ++ if (0 == (flags & O_CLOEXEC)) ++ return -EINVAL; + -+ dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val"); -+ spin_lock_irqsave(&kbdev->trace_lock, flags); -+ start = kbdev->trace_first_out; -+ end = kbdev->trace_next_in; ++ return 0; ++} + -+ while (start != end) { -+ struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start]; + -+ kbasep_trace_dump_msg(kbdev, trace_msg); ++/** ++ * align_and_check - Align the specified pointer to the provided alignment and ++ * check that it is still in range. ++ * @gap_end: Highest possible start address for allocation (end of gap in ++ * address space) ++ * @gap_start: Start address of current memory area / gap in address space ++ * @info: vm_unmapped_area_info structure passed to caller, containing ++ * alignment, length and limits for the allocation ++ * @is_shader_code: True if the allocation is for shader code (which has ++ * additional alignment requirements) ++ * ++ * Return: true if gap_end is now aligned correctly and is still in range, ++ * false otherwise ++ */ ++static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, ++ struct vm_unmapped_area_info *info, bool is_shader_code) ++{ ++ /* Compute highest gap address at the desired alignment */ ++ (*gap_end) -= info->length; ++ (*gap_end) -= (*gap_end - info->align_offset) & info->align_mask; + -+ start = (start + 1) & KBASE_TRACE_MASK; ++ if (is_shader_code) { ++ /* Check for 4GB boundary */ ++ if (0 == (*gap_end & BASE_MEM_MASK_4GB)) ++ (*gap_end) -= (info->align_offset ? info->align_offset : ++ info->length); ++ if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB)) ++ (*gap_end) -= (info->align_offset ? info->align_offset : ++ info->length); ++ ++ if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end + ++ info->length) & BASE_MEM_MASK_4GB)) ++ return false; + } -+ dev_dbg(kbdev->dev, "TRACE_END"); + -+ spin_unlock_irqrestore(&kbdev->trace_lock, flags); + -+ KBASE_TRACE_CLEAR(kbdev); -+} ++ if ((*gap_end < info->low_limit) || (*gap_end < gap_start)) ++ return false; + -+static void kbasep_trace_hook_wrapper(void *param) -+{ -+ struct kbase_device *kbdev = (struct kbase_device *)param; + -+ kbasep_trace_dump(kbdev); ++ return true; +} + -+#ifdef CONFIG_DEBUG_FS -+struct trace_seq_state { -+ struct kbase_trace trace_buf[KBASE_TRACE_SIZE]; -+ u32 start; -+ u32 end; -+}; ++/* The following function is taken from the kernel and just ++ * renamed. As it's not exported to modules we must copy-paste it here. ++ */ + -+static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos) ++static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info ++ *info, bool is_shader_code) +{ -+ struct trace_seq_state *state = s->private; -+ int i; ++#if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE) ++ struct mm_struct *mm = current->mm; ++ struct vm_area_struct *vma; ++ unsigned long length, low_limit, high_limit, gap_start, gap_end; + -+ if (*pos > KBASE_TRACE_SIZE) -+ return NULL; -+ i = state->start + *pos; -+ if ((state->end >= state->start && i >= state->end) || -+ i >= state->end + KBASE_TRACE_SIZE) -+ return NULL; ++ /* Adjust search length to account for worst case alignment overhead */ ++ length = info->length + info->align_mask; ++ if (length < info->length) ++ return -ENOMEM; + -+ i &= KBASE_TRACE_MASK; ++ /* ++ * Adjust search limits by the desired length. ++ * See implementation comment at top of unmapped_area(). ++ */ ++ gap_end = info->high_limit; ++ if (gap_end < length) ++ return -ENOMEM; ++ high_limit = gap_end - length; + -+ return &state->trace_buf[i]; -+} ++ if (info->low_limit > high_limit) ++ return -ENOMEM; ++ low_limit = info->low_limit + length; + -+static void kbasep_trace_seq_stop(struct seq_file *s, void *data) -+{ -+} ++ /* Check highest gap, which does not precede any rbtree node */ ++ gap_start = mm->highest_vm_end; ++ if (gap_start <= high_limit) { ++ if (align_and_check(&gap_end, gap_start, info, is_shader_code)) ++ return gap_end; ++ } + -+static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos) -+{ -+ struct trace_seq_state *state = s->private; -+ int i; ++ /* Check if rbtree root looks promising */ ++ if (RB_EMPTY_ROOT(&mm->mm_rb)) ++ return -ENOMEM; ++ vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb); ++ if (vma->rb_subtree_gap < length) ++ return -ENOMEM; + -+ (*pos)++; ++ while (true) { ++ /* Visit right subtree if it looks promising */ ++ gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; ++ if (gap_start <= high_limit && vma->vm_rb.rb_right) { ++ struct vm_area_struct *right = ++ rb_entry(vma->vm_rb.rb_right, ++ struct vm_area_struct, vm_rb); ++ if (right->rb_subtree_gap >= length) { ++ vma = right; ++ continue; ++ } ++ } + -+ i = (state->start + *pos) & KBASE_TRACE_MASK; -+ if (i == state->end) -+ return NULL; ++check_current: ++ /* Check if current node has a suitable gap */ ++ gap_end = vma->vm_start; ++ if (gap_end < low_limit) ++ return -ENOMEM; ++ if (gap_start <= high_limit && gap_end - gap_start >= length) { ++ /* We found a suitable gap. Clip it with the original ++ * high_limit. */ ++ if (gap_end > info->high_limit) ++ gap_end = info->high_limit; + -+ return &state->trace_buf[i]; -+} ++ if (align_and_check(&gap_end, gap_start, info, ++ is_shader_code)) ++ return gap_end; ++ } + -+static int kbasep_trace_seq_show(struct seq_file *s, void *data) -+{ -+ struct kbase_trace *trace_msg = data; -+ char buffer[DEBUG_MESSAGE_SIZE]; ++ /* Visit left subtree if it looks promising */ ++ if (vma->vm_rb.rb_left) { ++ struct vm_area_struct *left = ++ rb_entry(vma->vm_rb.rb_left, ++ struct vm_area_struct, vm_rb); ++ if (left->rb_subtree_gap >= length) { ++ vma = left; ++ continue; ++ } ++ } + -+ kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE); -+ seq_printf(s, "%s\n", buffer); -+ return 0; -+} ++ /* Go back up the rbtree to find next candidate node */ ++ while (true) { ++ struct rb_node *prev = &vma->vm_rb; ++ if (!rb_parent(prev)) ++ return -ENOMEM; ++ vma = rb_entry(rb_parent(prev), ++ struct vm_area_struct, vm_rb); ++ if (prev == vma->vm_rb.rb_right) { ++ gap_start = vma->vm_prev ? ++ vma->vm_prev->vm_end : 0; ++ goto check_current; ++ } ++ } ++ } ++#else ++ unsigned long length, high_limit, gap_start, gap_end; + -+static const struct seq_operations kbasep_trace_seq_ops = { -+ .start = kbasep_trace_seq_start, -+ .next = kbasep_trace_seq_next, -+ .stop = kbasep_trace_seq_stop, -+ .show = kbasep_trace_seq_show, -+}; ++ MA_STATE(mas, ¤t->mm->mm_mt, 0, 0); ++ /* Adjust search length to account for worst case alignment overhead */ ++ length = info->length + info->align_mask; ++ if (length < info->length) ++ return -ENOMEM; + -+static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file) ++ /* ++ * Adjust search limits by the desired length. ++ * See implementation comment at top of unmapped_area(). ++ */ ++ gap_end = info->high_limit; ++ if (gap_end < length) ++ return -ENOMEM; ++ high_limit = gap_end - length; ++ ++ if (info->low_limit > high_limit) ++ return -ENOMEM; ++ ++ while (true) { ++ if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, length)) ++ return -ENOMEM; ++ gap_end = mas.last + 1; ++ gap_start = mas.min; ++ ++ if (align_and_check(&gap_end, gap_start, info, is_shader_code)) ++ return gap_end; ++ } ++#endif ++ return -ENOMEM; ++} ++ ++static unsigned long kbase_get_unmapped_area(struct file *filp, ++ const unsigned long addr, const unsigned long len, ++ const unsigned long pgoff, const unsigned long flags) +{ -+ struct kbase_device *kbdev = inode->i_private; -+ unsigned long flags; ++ /* based on get_unmapped_area, but simplified slightly due to that some ++ * values are known in advance */ ++ struct kbase_context *kctx = filp->private_data; ++ struct mm_struct *mm = current->mm; ++ struct vm_unmapped_area_info info; ++ unsigned long align_offset = 0; ++ unsigned long align_mask = 0; ++ unsigned long high_limit = mm->mmap_base; ++ unsigned long low_limit = PAGE_SIZE; ++ int cpu_va_bits = BITS_PER_LONG; ++ int gpu_pc_bits = ++ kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; ++ bool is_shader_code = false; ++ unsigned long ret; + -+ struct trace_seq_state *state; ++ /* err on fixed address */ ++ if ((flags & MAP_FIXED) || addr) ++ return -EINVAL; + -+ state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state)); -+ if (!state) ++#ifdef CONFIG_64BIT ++ /* too big? */ ++ if (len > TASK_SIZE - SZ_2M) + return -ENOMEM; + -+ spin_lock_irqsave(&kbdev->trace_lock, flags); -+ state->start = kbdev->trace_first_out; -+ state->end = kbdev->trace_next_in; -+ memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf)); -+ spin_unlock_irqrestore(&kbdev->trace_lock, flags); ++ if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + -+ return 0; ++ if (kbase_hw_has_feature(kctx->kbdev, ++ BASE_HW_FEATURE_33BIT_VA)) { ++ high_limit = kctx->same_va_end << PAGE_SHIFT; ++ } else { ++ high_limit = min_t(unsigned long, mm->mmap_base, ++ (kctx->same_va_end << PAGE_SHIFT)); ++ if (len >= SZ_2M) { ++ align_offset = SZ_2M; ++ align_mask = SZ_2M - 1; ++ } ++ } ++ ++ low_limit = SZ_2M; ++ } else { ++ cpu_va_bits = 32; ++ } ++#endif /* CONFIG_64BIT */ ++ if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) && ++ (PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) { ++ int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); ++ ++ if (!kctx->pending_regions[cookie]) ++ return -EINVAL; ++ ++ if (!(kctx->pending_regions[cookie]->flags & ++ KBASE_REG_GPU_NX)) { ++ if (cpu_va_bits > gpu_pc_bits) { ++ align_offset = 1ULL << gpu_pc_bits; ++ align_mask = align_offset - 1; ++ is_shader_code = true; ++ } ++ } ++#ifndef CONFIG_64BIT ++ } else { ++ return current->mm->get_unmapped_area(filp, addr, len, pgoff, ++ flags); ++#endif ++ } ++ ++ info.flags = 0; ++ info.length = len; ++ info.low_limit = low_limit; ++ info.high_limit = high_limit; ++ info.align_offset = align_offset; ++ info.align_mask = align_mask; ++ ++ ret = kbase_unmapped_area_topdown(&info, is_shader_code); ++ ++ if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base && ++ high_limit < (kctx->same_va_end << PAGE_SHIFT)) { ++ /* Retry above mmap_base */ ++ info.low_limit = mm->mmap_base; ++ info.high_limit = min_t(u64, TASK_SIZE, ++ (kctx->same_va_end << PAGE_SHIFT)); ++ ++ ret = kbase_unmapped_area_topdown(&info, is_shader_code); ++ } ++ ++ return ret; +} + -+static const struct file_operations kbasep_trace_debugfs_fops = { -+ .open = kbasep_trace_debugfs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = seq_release_private, ++static const struct file_operations kbase_fops = { ++ .owner = THIS_MODULE, ++ .open = kbase_open, ++ .release = kbase_release, ++ .read = kbase_read, ++ .poll = kbase_poll, ++ .unlocked_ioctl = kbase_ioctl, ++ .compat_ioctl = kbase_ioctl, ++ .mmap = kbase_mmap, ++ .check_flags = kbase_check_flags, ++ .get_unmapped_area = kbase_get_unmapped_area, +}; + -+void kbasep_trace_debugfs_init(struct kbase_device *kbdev) ++#ifndef CONFIG_MALI_NO_MALI ++void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value) +{ -+ debugfs_create_file("mali_trace", S_IRUGO, -+ kbdev->mali_debugfs_directory, kbdev, -+ &kbasep_trace_debugfs_fops); ++ writel(value, kbdev->reg + offset); +} + -+#else -+void kbasep_trace_debugfs_init(struct kbase_device *kbdev) ++u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset) +{ ++ return readl(kbdev->reg + offset); +} -+#endif /* CONFIG_DEBUG_FS */ ++#endif /* !CONFIG_MALI_NO_MALI */ + -+#else /* KBASE_TRACE_ENABLE */ -+static int kbasep_trace_init(struct kbase_device *kbdev) ++/** ++ * show_policy - Show callback for the power_policy sysfs file. ++ * ++ * This function is called to get the contents of the power_policy sysfs ++ * file. This is a list of the available policies with the currently active one ++ * surrounded by square brackets. ++ * ++ * @dev: The device this sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The output buffer for the sysfs file contents ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf) +{ -+ CSTD_UNUSED(kbdev); -+ return 0; -+} ++ struct kbase_device *kbdev; ++ const struct kbase_pm_policy *current_policy; ++ const struct kbase_pm_policy *const *policy_list; ++ int policy_count; ++ int i; ++ ssize_t ret = 0; + -+static void kbasep_trace_term(struct kbase_device *kbdev) -+{ -+ CSTD_UNUSED(kbdev); -+} ++ kbdev = to_kbase_device(dev); + -+static void kbasep_trace_hook_wrapper(void *param) -+{ -+ CSTD_UNUSED(param); -+} ++ if (!kbdev) ++ return -ENODEV; + -+void kbasep_trace_dump(struct kbase_device *kbdev) -+{ -+ CSTD_UNUSED(kbdev); -+} -+#endif /* KBASE_TRACE_ENABLE */ ++ current_policy = kbase_pm_get_policy(kbdev); + -+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value) -+{ -+ switch (control) { -+ case FBDUMP_CONTROL_ENABLE: -+ /* fall through */ -+ case FBDUMP_CONTROL_RATE: -+ /* fall through */ -+ case SW_COUNTER_ENABLE: -+ /* fall through */ -+ case FBDUMP_CONTROL_RESIZE_FACTOR: -+ kbdev->kbase_profiling_controls[control] = value; -+ break; -+ default: -+ dev_err(kbdev->dev, "Profiling control %d not found\n", control); -+ break; ++ policy_count = kbase_pm_list_policies(&policy_list); ++ ++ for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) { ++ if (policy_list[i] == current_policy) ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name); ++ else ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name); + } -+} + -+/* -+ * Called by gator to control the production of -+ * profiling information at runtime -+ * */ ++ if (ret < PAGE_SIZE - 1) { ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); ++ } else { ++ buf[PAGE_SIZE - 2] = '\n'; ++ buf[PAGE_SIZE - 1] = '\0'; ++ ret = PAGE_SIZE - 1; ++ } + -+void _mali_profiling_control(u32 action, u32 value) ++ return ret; ++} ++ ++/** ++ * set_policy - Store callback for the power_policy sysfs file. ++ * ++ * This function is called when the power_policy sysfs file is written to. ++ * It matches the requested policy against the available policies and if a ++ * matching policy is found calls kbase_pm_set_policy() to change the ++ * policy. ++ * ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ -+ struct kbase_device *kbdev = NULL; ++ struct kbase_device *kbdev; ++ const struct kbase_pm_policy *new_policy = NULL; ++ const struct kbase_pm_policy *const *policy_list; ++ int policy_count; ++ int i; + -+ /* find the first i.e. call with -1 */ -+ kbdev = kbase_find_device(-1); ++ kbdev = to_kbase_device(dev); + -+ if (NULL != kbdev) -+ kbase_set_profiling_control(kbdev, action, value); ++ if (!kbdev) ++ return -ENODEV; ++ ++ policy_count = kbase_pm_list_policies(&policy_list); ++ ++ for (i = 0; i < policy_count; i++) { ++ if (sysfs_streq(policy_list[i]->name, buf)) { ++ new_policy = policy_list[i]; ++ break; ++ } ++ } ++ ++ if (!new_policy) { ++ dev_err(dev, "power_policy: policy not found\n"); ++ return -EINVAL; ++ } ++ ++ kbase_pm_set_policy(kbdev, new_policy); ++ ++ return count; +} -+KBASE_EXPORT_SYMBOL(_mali_profiling_control); + -diff --git a/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c -new file mode 100644 -index 000000000..f70bcccf4 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c -@@ -0,0 +1,76 @@ +/* ++ * The sysfs file power_policy. + * -+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. ++ * This is used for obtaining information about the available policies, ++ * determining which policy is currently active, and changing the active ++ * policy. ++ */ ++static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy); ++ ++/** ++ * show_ca_policy - Show callback for the core_availability_policy sysfs file. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ * This function is called to get the contents of the core_availability_policy ++ * sysfs file. This is a list of the available policies with the currently ++ * active one surrounded by square brackets. + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * @dev: The device this sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The output buffer for the sysfs file contents + * ++ * Return: The number of bytes output to @buf. + */ ++static ssize_t show_ca_policy(struct device *dev, struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *kbdev; ++ const struct kbase_pm_ca_policy *current_policy; ++ const struct kbase_pm_ca_policy *const *policy_list; ++ int policy_count; ++ int i; ++ ssize_t ret = 0; + ++ kbdev = to_kbase_device(dev); + ++ if (!kbdev) ++ return -ENODEV; + -+/* -+ * Base kernel disjoint events helper functions -+ */ -+ -+#include ++ current_policy = kbase_pm_ca_get_policy(kbdev); + -+void kbase_disjoint_init(struct kbase_device *kbdev) -+{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ policy_count = kbase_pm_ca_list_policies(&policy_list); + -+ atomic_set(&kbdev->disjoint_event.count, 0); -+ atomic_set(&kbdev->disjoint_event.state, 0); -+} ++ for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) { ++ if (policy_list[i] == current_policy) ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name); ++ else ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name); ++ } + -+/* increment the disjoint event count */ -+void kbase_disjoint_event(struct kbase_device *kbdev) -+{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ if (ret < PAGE_SIZE - 1) { ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); ++ } else { ++ buf[PAGE_SIZE - 2] = '\n'; ++ buf[PAGE_SIZE - 1] = '\0'; ++ ret = PAGE_SIZE - 1; ++ } + -+ atomic_inc(&kbdev->disjoint_event.count); ++ return ret; +} + -+/* increment the state and the event counter */ -+void kbase_disjoint_state_up(struct kbase_device *kbdev) ++/** ++ * set_ca_policy - Store callback for the core_availability_policy sysfs file. ++ * ++ * This function is called when the core_availability_policy sysfs file is ++ * written to. It matches the requested policy against the available policies ++ * and if a matching policy is found calls kbase_pm_set_policy() to change ++ * the policy. ++ * ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t set_ca_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ struct kbase_device *kbdev; ++ const struct kbase_pm_ca_policy *new_policy = NULL; ++ const struct kbase_pm_ca_policy *const *policy_list; ++ int policy_count; ++ int i; + -+ atomic_inc(&kbdev->disjoint_event.state); ++ kbdev = to_kbase_device(dev); + -+ kbase_disjoint_event(kbdev); -+} ++ if (!kbdev) ++ return -ENODEV; + -+/* decrement the state */ -+void kbase_disjoint_state_down(struct kbase_device *kbdev) -+{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0); ++ policy_count = kbase_pm_ca_list_policies(&policy_list); + -+ kbase_disjoint_event(kbdev); ++ for (i = 0; i < policy_count; i++) { ++ if (sysfs_streq(policy_list[i]->name, buf)) { ++ new_policy = policy_list[i]; ++ break; ++ } ++ } + -+ atomic_dec(&kbdev->disjoint_event.state); -+} ++ if (!new_policy) { ++ dev_err(dev, "core_availability_policy: policy not found\n"); ++ return -EINVAL; ++ } + -+/* increments the count only if the state is > 0 */ -+void kbase_disjoint_event_potential(struct kbase_device *kbdev) -+{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ kbase_pm_ca_set_policy(kbdev, new_policy); + -+ if (atomic_read(&kbdev->disjoint_event.state)) -+ kbase_disjoint_event(kbdev); ++ return count; +} + -+u32 kbase_disjoint_event_get(struct kbase_device *kbdev) -+{ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ -+ return atomic_read(&kbdev->disjoint_event.count); -+} -+KBASE_EXPORT_TEST_API(kbase_disjoint_event_get); -diff --git a/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c -new file mode 100644 -index 000000000..9197743c8 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c -@@ -0,0 +1,449 @@ +/* ++ * The sysfs file core_availability_policy + * -+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. ++ * This is used for obtaining information about the available policies, ++ * determining which policy is currently active, and changing the active ++ * policy. ++ */ ++static DEVICE_ATTR(core_availability_policy, S_IRUGO | S_IWUSR, show_ca_policy, set_ca_policy); ++ ++/* ++ * show_core_mask - Show callback for the core_mask sysfs file. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ * This function is called to get the contents of the core_mask sysfs file. + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * @dev: The device this sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The output buffer for the sysfs file contents + * ++ * Return: The number of bytes output to @buf. + */ ++static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *kbdev; ++ ssize_t ret = 0; + ++ kbdev = to_kbase_device(dev); + ++ if (!kbdev) ++ return -ENODEV; + ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "Current core mask (JS0) : 0x%llX\n", ++ kbdev->pm.debug_core_mask[0]); ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "Current core mask (JS1) : 0x%llX\n", ++ kbdev->pm.debug_core_mask[1]); ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "Current core mask (JS2) : 0x%llX\n", ++ kbdev->pm.debug_core_mask[2]); ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "Available core mask : 0x%llX\n", ++ kbdev->gpu_props.props.raw_props.shader_present); + -+/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as -+ * it will be set there. -+ */ -+#include "mali_kbase_dma_fence.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+static void -+kbase_dma_fence_work(struct work_struct *pwork); ++ return ret; ++} + -+static void -+kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom) ++/** ++ * set_core_mask - Store callback for the core_mask sysfs file. ++ * ++ * This function is called when the core_mask sysfs file is written to. ++ * ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ -+ struct kbase_context *kctx = katom->kctx; ++ struct kbase_device *kbdev; ++ u64 new_core_mask[3]; ++ int items; + -+ list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource); -+} ++ kbdev = to_kbase_device(dev); + -+static void -+kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom) -+{ -+ list_del(&katom->queue); -+} ++ if (!kbdev) ++ return -ENODEV; + -+static int -+kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info, -+ struct ww_acquire_ctx *ctx) -+{ -+ struct reservation_object *content_res = NULL; -+ unsigned int content_res_idx = 0; -+ unsigned int r; -+ int err = 0; ++ items = sscanf(buf, "%llx %llx %llx", ++ &new_core_mask[0], &new_core_mask[1], ++ &new_core_mask[2]); + -+ ww_acquire_init(ctx, &reservation_ww_class); ++ if (items == 1) ++ new_core_mask[1] = new_core_mask[2] = new_core_mask[0]; + -+retry: -+ for (r = 0; r < info->dma_fence_resv_count; r++) { -+ if (info->resv_objs[r] == content_res) { -+ content_res = NULL; -+ continue; -+ } ++ if (items == 1 || items == 3) { ++ u64 shader_present = ++ kbdev->gpu_props.props.raw_props.shader_present; ++ u64 group0_core_mask = ++ kbdev->gpu_props.props.coherency_info.group[0]. ++ core_mask; + -+ err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx); -+ if (err) -+ goto error; -+ } ++ if ((new_core_mask[0] & shader_present) != new_core_mask[0] || ++ !(new_core_mask[0] & group0_core_mask) || ++ (new_core_mask[1] & shader_present) != ++ new_core_mask[1] || ++ !(new_core_mask[1] & group0_core_mask) || ++ (new_core_mask[2] & shader_present) != ++ new_core_mask[2] || ++ !(new_core_mask[2] & group0_core_mask)) { ++ dev_err(dev, "power_policy: invalid core specification\n"); ++ return -EINVAL; ++ } + -+ ww_acquire_done(ctx); -+ return err; ++ if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] || ++ kbdev->pm.debug_core_mask[1] != ++ new_core_mask[1] || ++ kbdev->pm.debug_core_mask[2] != ++ new_core_mask[2]) { ++ unsigned long flags; + -+error: -+ content_res_idx = r; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ /* Unlock the locked one ones */ -+ while (r--) -+ ww_mutex_unlock(&info->resv_objs[r]->lock); ++ kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], ++ new_core_mask[1], new_core_mask[2]); + -+ if (content_res) -+ ww_mutex_unlock(&content_res->lock); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } + -+ /* If we deadlock try with lock_slow and retry */ -+ if (err == -EDEADLK) { -+ content_res = info->resv_objs[content_res_idx]; -+ ww_mutex_lock_slow(&content_res->lock, ctx); -+ goto retry; ++ return count; + } + -+ /* If we are here the function failed */ -+ ww_acquire_fini(ctx); -+ return err; ++ dev_err(kbdev->dev, "Couldn't process set_core_mask write operation.\n" ++ "Use format \n" ++ "or \n"); ++ return -EINVAL; +} + -+static void -+kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info, -+ struct ww_acquire_ctx *ctx) -+{ -+ unsigned int r; -+ -+ for (r = 0; r < info->dma_fence_resv_count; r++) -+ ww_mutex_unlock(&info->resv_objs[r]->lock); -+ ww_acquire_fini(ctx); -+} ++/* ++ * The sysfs file core_mask. ++ * ++ * This is used to restrict shader core availability for debugging purposes. ++ * Reading it will show the current core mask and the mask of cores available. ++ * Writing to it will set the current core mask. ++ */ ++static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask); + +/** -+ * kbase_dma_fence_queue_work() - Queue work to handle @katom -+ * @katom: Pointer to atom for which to queue work ++ * set_soft_job_timeout - Store callback for the soft_job_timeout sysfs ++ * file. + * -+ * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and -+ * submit the atom. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The value written to the sysfs file. ++ * @count: The number of bytes written to the sysfs file. ++ * ++ * This allows setting the timeout for software jobs. Waiting soft event wait ++ * jobs will be cancelled after this period expires, while soft fence wait jobs ++ * will print debug information if the fence debug feature is enabled. ++ * ++ * This is expressed in milliseconds. ++ * ++ * Return: count if the function succeeded. An error code on failure. + */ -+static void -+kbase_dma_fence_queue_work(struct kbase_jd_atom *katom) ++static ssize_t set_soft_job_timeout(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t count) +{ -+ struct kbase_context *kctx = katom->kctx; -+ bool ret; ++ struct kbase_device *kbdev; ++ int soft_job_timeout_ms; + -+ INIT_WORK(&katom->work, kbase_dma_fence_work); -+ ret = queue_work(kctx->dma_fence.wq, &katom->work); -+ /* Warn if work was already queued, that should not happen. */ -+ WARN_ON(!ret); ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; ++ ++ if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) || ++ (soft_job_timeout_ms <= 0)) ++ return -EINVAL; ++ ++ atomic_set(&kbdev->js_data.soft_job_timeout_ms, ++ soft_job_timeout_ms); ++ ++ return count; +} + +/** -+ * kbase_dma_fence_cancel_atom() - Cancels waiting on an atom -+ * @katom: Katom to cancel ++ * show_soft_job_timeout - Show callback for the soft_job_timeout sysfs ++ * file. + * -+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held. ++ * This will return the timeout for the software jobs. ++ * ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer for the sysfs file contents. ++ * ++ * Return: The number of bytes output to buf. + */ -+static void -+kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom) ++static ssize_t show_soft_job_timeout(struct device *dev, ++ struct device_attribute *attr, ++ char * const buf) +{ -+ lockdep_assert_held(&katom->kctx->jctx.lock); ++ struct kbase_device *kbdev; + -+ /* Cancel callbacks and clean up. */ -+ kbase_fence_free_callbacks(katom); ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ /* Mark the atom as handled in case all fences signaled just before -+ * canceling the callbacks and the worker was queued. -+ */ -+ kbase_fence_dep_count_set(katom, -1); ++ return scnprintf(buf, PAGE_SIZE, "%i\n", ++ atomic_read(&kbdev->js_data.soft_job_timeout_ms)); ++} + -+ /* Prevent job_done_nolock from being called twice on an atom when -+ * there is a race between job completion and cancellation. -+ */ ++static DEVICE_ATTR(soft_job_timeout, S_IRUGO | S_IWUSR, ++ show_soft_job_timeout, set_soft_job_timeout); + -+ if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) { -+ /* Wait was cancelled - zap the atom */ -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; -+ if (jd_done_nolock(katom, NULL)) -+ kbase_js_sched_all(katom->kctx->kbdev); ++static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms, ++ int default_ticks, u32 old_ticks) ++{ ++ if (timeout_ms > 0) { ++ u64 ticks = timeout_ms * 1000000ULL; ++ do_div(ticks, kbdev->js_data.scheduling_period_ns); ++ if (!ticks) ++ return 1; ++ return ticks; ++ } else if (timeout_ms < 0) { ++ return default_ticks; ++ } else { ++ return old_ticks; + } +} + +/** -+ * kbase_dma_fence_work() - Worker thread called when a fence is signaled -+ * @pwork: work_struct containing a pointer to a katom ++ * set_js_timeouts - Store callback for the js_timeouts sysfs file. + * -+ * This function will clean and mark all dependencies as satisfied ++ * This function is called to get the contents of the js_timeouts sysfs ++ * file. This file contains five values separated by whitespace. The values ++ * are basically the same as %JS_SOFT_STOP_TICKS, %JS_HARD_STOP_TICKS_SS, ++ * %JS_HARD_STOP_TICKS_DUMPING, %JS_RESET_TICKS_SS, %JS_RESET_TICKS_DUMPING ++ * configuration values (in that order), with the difference that the js_timeout ++ * values are expressed in MILLISECONDS. ++ * ++ * The js_timeouts sysfile file allows the current values in ++ * use by the job scheduler to get override. Note that a value needs to ++ * be other than 0 for it to override the current job scheduler value. ++ * ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * Return: @count if the function succeeded. An error code on failure. + */ -+static void -+kbase_dma_fence_work(struct work_struct *pwork) ++static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ -+ struct kbase_jd_atom *katom; -+ struct kbase_jd_context *ctx; ++ struct kbase_device *kbdev; ++ int items; ++ long js_soft_stop_ms; ++ long js_soft_stop_ms_cl; ++ long js_hard_stop_ms_ss; ++ long js_hard_stop_ms_cl; ++ long js_hard_stop_ms_dumping; ++ long js_reset_ms_ss; ++ long js_reset_ms_cl; ++ long js_reset_ms_dumping; + -+ katom = container_of(pwork, struct kbase_jd_atom, work); -+ ctx = &katom->kctx->jctx; ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ mutex_lock(&ctx->lock); -+ if (kbase_fence_dep_count_read(katom) != 0) -+ goto out; ++ items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld", ++ &js_soft_stop_ms, &js_soft_stop_ms_cl, ++ &js_hard_stop_ms_ss, &js_hard_stop_ms_cl, ++ &js_hard_stop_ms_dumping, &js_reset_ms_ss, ++ &js_reset_ms_cl, &js_reset_ms_dumping); + -+ kbase_fence_dep_count_set(katom, -1); ++ if (items == 8) { ++ struct kbasep_js_device_data *js_data = &kbdev->js_data; ++ unsigned long flags; + -+ /* Remove atom from list of dma-fence waiting atoms. */ -+ kbase_dma_fence_waiters_remove(katom); -+ /* Cleanup callbacks. */ -+ kbase_fence_free_callbacks(katom); -+ /* -+ * Queue atom on GPU, unless it has already completed due to a failing -+ * dependency. Run jd_done_nolock() on the katom if it is completed. -+ */ -+ if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED)) -+ jd_done_nolock(katom, NULL); -+ else -+ kbase_jd_dep_clear_locked(katom); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+out: -+ mutex_unlock(&ctx->lock); ++#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\ ++ js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \ ++ default, js_data->ticks_name); \ ++ dev_dbg(kbdev->dev, "Overriding " #ticks_name \ ++ " with %lu ticks (%lu ms)\n", \ ++ (unsigned long)js_data->ticks_name, \ ++ ms_name); \ ++ } while (0) ++ ++ UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms, ++ DEFAULT_JS_SOFT_STOP_TICKS); ++ UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl, ++ DEFAULT_JS_SOFT_STOP_TICKS_CL); ++ UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss, ++ kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ? ++ DEFAULT_JS_HARD_STOP_TICKS_SS_8408 : ++ DEFAULT_JS_HARD_STOP_TICKS_SS); ++ UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl, ++ DEFAULT_JS_HARD_STOP_TICKS_CL); ++ UPDATE_TIMEOUT(hard_stop_ticks_dumping, ++ js_hard_stop_ms_dumping, ++ DEFAULT_JS_HARD_STOP_TICKS_DUMPING); ++ UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss, ++ kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ? ++ DEFAULT_JS_RESET_TICKS_SS_8408 : ++ DEFAULT_JS_RESET_TICKS_SS); ++ UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl, ++ DEFAULT_JS_RESET_TICKS_CL); ++ UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping, ++ DEFAULT_JS_RESET_TICKS_DUMPING); ++ ++ kbase_js_set_timeouts(kbdev); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ return count; ++ } ++ ++ dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n" ++ "Use format \n" ++ "Write 0 for no change, -1 to restore default timeout\n"); ++ return -EINVAL; +} + -+static void -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb) -+#else -+kbase_dma_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) -+#endif ++static unsigned long get_js_timeout_in_ms( ++ u32 scheduling_period_ns, ++ u32 ticks) +{ -+ struct kbase_fence_cb *kcb = container_of(cb, -+ struct kbase_fence_cb, -+ fence_cb); -+ struct kbase_jd_atom *katom = kcb->katom; -+ -+ /* If the atom is zapped dep_count will be forced to a negative number -+ * preventing this callback from ever scheduling work. Which in turn -+ * would reschedule the atom. -+ */ ++ u64 ms = (u64)ticks * scheduling_period_ns; + -+ if (kbase_fence_dep_count_dec_and_test(katom)) -+ kbase_dma_fence_queue_work(katom); ++ do_div(ms, 1000000UL); ++ return ms; +} + -+static int -+kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom, -+ struct reservation_object *resv, -+ bool exclusive) ++/** ++ * show_js_timeouts - Show callback for the js_timeouts sysfs file. ++ * ++ * This function is called to get the contents of the js_timeouts sysfs ++ * file. It returns the last set values written to the js_timeouts sysfs file. ++ * If the file didn't get written yet, the values will be current setting in ++ * use. ++ * @dev: The device this sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The output buffer for the sysfs file contents ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf) +{ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+ struct fence *excl_fence = NULL; -+ struct fence **shared_fences = NULL; -+#else -+ struct dma_fence *excl_fence = NULL; -+ struct dma_fence **shared_fences = NULL; -+#endif -+ unsigned int shared_count = 0; -+ int err, i; ++ struct kbase_device *kbdev; ++ ssize_t ret; ++ unsigned long js_soft_stop_ms; ++ unsigned long js_soft_stop_ms_cl; ++ unsigned long js_hard_stop_ms_ss; ++ unsigned long js_hard_stop_ms_cl; ++ unsigned long js_hard_stop_ms_dumping; ++ unsigned long js_reset_ms_ss; ++ unsigned long js_reset_ms_cl; ++ unsigned long js_reset_ms_dumping; ++ u32 scheduling_period_ns; + -+ err = reservation_object_get_fences_rcu(resv, -+ &excl_fence, -+ &shared_count, -+ &shared_fences); -+ if (err) -+ return err; ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ if (excl_fence) { -+ err = kbase_fence_add_callback(katom, -+ excl_fence, -+ kbase_dma_fence_cb); ++ scheduling_period_ns = kbdev->js_data.scheduling_period_ns; + -+ /* Release our reference, taken by reservation_object_get_fences_rcu(), -+ * to the fence. We have set up our callback (if that was possible), -+ * and it's the fence's owner is responsible for singling the fence -+ * before allowing it to disappear. -+ */ -+ dma_fence_put(excl_fence); ++#define GET_TIMEOUT(name) get_js_timeout_in_ms(\ ++ scheduling_period_ns, \ ++ kbdev->js_data.name) + -+ if (err) -+ goto out; -+ } ++ js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks); ++ js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl); ++ js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss); ++ js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl); ++ js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping); ++ js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss); ++ js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl); ++ js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping); + -+ if (exclusive) { -+ for (i = 0; i < shared_count; i++) { -+ err = kbase_fence_add_callback(katom, -+ shared_fences[i], -+ kbase_dma_fence_cb); -+ if (err) -+ goto out; -+ } -+ } ++#undef GET_TIMEOUT + -+ /* Release all our references to the shared fences, taken by -+ * reservation_object_get_fences_rcu(). We have set up our callback (if -+ * that was possible), and it's the fence's owner is responsible for -+ * signaling the fence before allowing it to disappear. -+ */ -+out: -+ for (i = 0; i < shared_count; i++) -+ dma_fence_put(shared_fences[i]); -+ kfree(shared_fences); ++ ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n", ++ js_soft_stop_ms, js_soft_stop_ms_cl, ++ js_hard_stop_ms_ss, js_hard_stop_ms_cl, ++ js_hard_stop_ms_dumping, js_reset_ms_ss, ++ js_reset_ms_cl, js_reset_ms_dumping); + -+ if (err) { -+ /* -+ * On error, cancel and clean up all callbacks that was set up -+ * before the error. -+ */ -+ kbase_fence_free_callbacks(katom); ++ if (ret >= PAGE_SIZE) { ++ buf[PAGE_SIZE - 2] = '\n'; ++ buf[PAGE_SIZE - 1] = '\0'; ++ ret = PAGE_SIZE - 1; + } + -+ return err; ++ return ret; +} + -+void kbase_dma_fence_add_reservation(struct reservation_object *resv, -+ struct kbase_dma_fence_resv_info *info, -+ bool exclusive) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < info->dma_fence_resv_count; i++) { -+ /* Duplicate resource, ignore */ -+ if (info->resv_objs[i] == resv) -+ return; -+ } ++/* ++ * The sysfs file js_timeouts. ++ * ++ * This is used to override the current job scheduler values for ++ * JS_STOP_STOP_TICKS_SS ++ * JS_STOP_STOP_TICKS_CL ++ * JS_HARD_STOP_TICKS_SS ++ * JS_HARD_STOP_TICKS_CL ++ * JS_HARD_STOP_TICKS_DUMPING ++ * JS_RESET_TICKS_SS ++ * JS_RESET_TICKS_CL ++ * JS_RESET_TICKS_DUMPING. ++ */ ++static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts); + -+ info->resv_objs[info->dma_fence_resv_count] = resv; -+ if (exclusive) -+ set_bit(info->dma_fence_resv_count, -+ info->dma_fence_excl_bitmap); -+ (info->dma_fence_resv_count)++; ++static u32 get_new_js_timeout( ++ u32 old_period, ++ u32 old_ticks, ++ u32 new_scheduling_period_ns) ++{ ++ u64 ticks = (u64)old_period * (u64)old_ticks; ++ do_div(ticks, new_scheduling_period_ns); ++ return ticks?ticks:1; +} + -+int kbase_dma_fence_wait(struct kbase_jd_atom *katom, -+ struct kbase_dma_fence_resv_info *info) ++/** ++ * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs ++ * file ++ * @dev: The device the sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * This function is called when the js_scheduling_period sysfs file is written ++ * to. It checks the data written, and if valid updates the js_scheduling_period ++ * value ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t set_js_scheduling_period(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) +{ -+ int err, i; -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+ struct fence *fence; -+#else -+ struct dma_fence *fence; -+#endif -+ struct ww_acquire_ctx ww_ctx; -+ -+ lockdep_assert_held(&katom->kctx->jctx.lock); ++ struct kbase_device *kbdev; ++ int ret; ++ unsigned int js_scheduling_period; ++ u32 new_scheduling_period_ns; ++ u32 old_period; ++ struct kbasep_js_device_data *js_data; ++ unsigned long flags; + -+ fence = kbase_fence_out_new(katom); -+ if (!fence) { -+ err = -ENOMEM; -+ dev_err(katom->kctx->kbdev->dev, -+ "Error %d creating fence.\n", err); -+ return err; -+ } ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ kbase_fence_dep_count_set(katom, 1); ++ js_data = &kbdev->js_data; + -+ err = kbase_dma_fence_lock_reservations(info, &ww_ctx); -+ if (err) { -+ dev_err(katom->kctx->kbdev->dev, -+ "Error %d locking reservations.\n", err); -+ kbase_fence_dep_count_set(katom, -1); -+ kbase_fence_out_remove(katom); -+ return err; ++ ret = kstrtouint(buf, 0, &js_scheduling_period); ++ if (ret || !js_scheduling_period) { ++ dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n" ++ "Use format \n"); ++ return -EINVAL; + } + -+ for (i = 0; i < info->dma_fence_resv_count; i++) { -+ struct reservation_object *obj = info->resv_objs[i]; ++ new_scheduling_period_ns = js_scheduling_period * 1000000; + -+ if (!test_bit(i, info->dma_fence_excl_bitmap)) { -+ err = reservation_object_reserve_shared(obj); -+ if (err) { -+ dev_err(katom->kctx->kbdev->dev, -+ "Error %d reserving space for shared fence.\n", err); -+ goto end; -+ } ++ /* Update scheduling timeouts */ ++ mutex_lock(&js_data->runpool_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ err = kbase_dma_fence_add_reservation_callback(katom, obj, false); -+ if (err) { -+ dev_err(katom->kctx->kbdev->dev, -+ "Error %d adding reservation to callback.\n", err); -+ goto end; -+ } ++ /* If no contexts have been scheduled since js_timeouts was last written ++ * to, the new timeouts might not have been latched yet. So check if an ++ * update is pending and use the new values if necessary. */ + -+ reservation_object_add_shared_fence(obj, fence); -+ } else { -+ err = kbase_dma_fence_add_reservation_callback(katom, obj, true); -+ if (err) { -+ dev_err(katom->kctx->kbdev->dev, -+ "Error %d adding reservation to callback.\n", err); -+ goto end; -+ } ++ /* Use previous 'new' scheduling period as a base if present. */ ++ old_period = js_data->scheduling_period_ns; + -+ reservation_object_add_excl_fence(obj, fence); -+ } -+ } ++#define SET_TIMEOUT(name) \ ++ (js_data->name = get_new_js_timeout(\ ++ old_period, \ ++ kbdev->js_data.name, \ ++ new_scheduling_period_ns)) + -+end: -+ kbase_dma_fence_unlock_reservations(info, &ww_ctx); ++ SET_TIMEOUT(soft_stop_ticks); ++ SET_TIMEOUT(soft_stop_ticks_cl); ++ SET_TIMEOUT(hard_stop_ticks_ss); ++ SET_TIMEOUT(hard_stop_ticks_cl); ++ SET_TIMEOUT(hard_stop_ticks_dumping); ++ SET_TIMEOUT(gpu_reset_ticks_ss); ++ SET_TIMEOUT(gpu_reset_ticks_cl); ++ SET_TIMEOUT(gpu_reset_ticks_dumping); + -+ if (likely(!err)) { -+ /* Test if the callbacks are already triggered */ -+ if (kbase_fence_dep_count_dec_and_test(katom)) { -+ kbase_fence_dep_count_set(katom, -1); -+ kbase_fence_free_callbacks(katom); -+ } else { -+ /* Add katom to the list of dma-buf fence waiting atoms -+ * only if it is still waiting. -+ */ -+ kbase_dma_fence_waiters_add(katom); -+ } -+ } else { -+ /* There was an error, cancel callbacks, set dep_count to -1 to -+ * indicate that the atom has been handled (the caller will -+ * kill it for us), signal the fence, free callbacks and the -+ * fence. -+ */ -+ kbase_fence_free_callbacks(katom); -+ kbase_fence_dep_count_set(katom, -1); -+ kbase_dma_fence_signal(katom); -+ } ++#undef SET_TIMEOUT + -+ return err; -+} ++ js_data->scheduling_period_ns = new_scheduling_period_ns; + -+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx) -+{ -+ struct list_head *list = &kctx->dma_fence.waiting_resource; ++ kbase_js_set_timeouts(kbdev); + -+ while (!list_empty(list)) { -+ struct kbase_jd_atom *katom; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&js_data->runpool_mutex); + -+ katom = list_first_entry(list, struct kbase_jd_atom, queue); -+ kbase_dma_fence_waiters_remove(katom); -+ kbase_dma_fence_cancel_atom(katom); -+ } -+} ++ dev_dbg(kbdev->dev, "JS scheduling period: %dms\n", ++ js_scheduling_period); + -+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom) -+{ -+ /* Cancel callbacks and clean up. */ -+ if (kbase_fence_free_callbacks(katom)) -+ kbase_dma_fence_queue_work(katom); ++ return count; +} + -+void kbase_dma_fence_signal(struct kbase_jd_atom *katom) ++/** ++ * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs ++ * entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the GPU information. ++ * ++ * This function is called to get the current period used for the JS scheduling ++ * period. ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t show_js_scheduling_period(struct device *dev, ++ struct device_attribute *attr, char * const buf) +{ -+ if (!katom->dma_fence.fence) -+ return; ++ struct kbase_device *kbdev; ++ u32 period; ++ ssize_t ret; + -+ /* Signal the atom's fence. */ -+ dma_fence_signal(katom->dma_fence.fence); ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ kbase_fence_out_remove(katom); ++ period = kbdev->js_data.scheduling_period_ns; + -+ kbase_fence_free_callbacks(katom); -+} ++ ret = scnprintf(buf, PAGE_SIZE, "%d\n", ++ period / 1000000); + -+void kbase_dma_fence_term(struct kbase_context *kctx) -+{ -+ destroy_workqueue(kctx->dma_fence.wq); -+ kctx->dma_fence.wq = NULL; ++ return ret; +} + -+int kbase_dma_fence_init(struct kbase_context *kctx) -+{ -+ INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource); -+ -+ kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d", -+ WQ_UNBOUND, 1, kctx->pid); -+ if (!kctx->dma_fence.wq) -+ return -ENOMEM; ++static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR, ++ show_js_scheduling_period, set_js_scheduling_period); + -+ return 0; -+} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h -new file mode 100644 -index 000000000..c9ab40350 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h -@@ -0,0 +1,131 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++#if !MALI_CUSTOMER_RELEASE ++/** ++ * set_force_replay - Store callback for the force_replay sysfs file. + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file + * ++ * Return: @count if the function succeeded. An error code on failure. + */ ++static ssize_t set_force_replay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *kbdev; + ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + ++ if (!strncmp("limit=", buf, MIN(6, count))) { ++ int force_replay_limit; ++ int items = sscanf(buf, "limit=%u", &force_replay_limit); + -+#ifndef _KBASE_DMA_FENCE_H_ -+#define _KBASE_DMA_FENCE_H_ -+ -+#ifdef CONFIG_MALI_DMA_FENCE ++ if (items == 1) { ++ kbdev->force_replay_random = false; ++ kbdev->force_replay_limit = force_replay_limit; ++ kbdev->force_replay_count = 0; + -+#include -+#include -+#include ++ return count; ++ } ++ } else if (!strncmp("random_limit", buf, MIN(12, count))) { ++ kbdev->force_replay_random = true; ++ kbdev->force_replay_count = 0; + ++ return count; ++ } else if (!strncmp("norandom_limit", buf, MIN(14, count))) { ++ kbdev->force_replay_random = false; ++ kbdev->force_replay_limit = KBASEP_FORCE_REPLAY_DISABLED; ++ kbdev->force_replay_count = 0; + -+/* Forward declaration from mali_kbase_defs.h */ -+struct kbase_jd_atom; -+struct kbase_context; ++ return count; ++ } else if (!strncmp("core_req=", buf, MIN(9, count))) { ++ unsigned int core_req; ++ int items = sscanf(buf, "core_req=%x", &core_req); + -+/** -+ * struct kbase_dma_fence_resv_info - Structure with list of reservation objects -+ * @resv_objs: Array of reservation objects to attach the -+ * new fence to. -+ * @dma_fence_resv_count: Number of reservation objects in the array. -+ * @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive. -+ * -+ * This is used by some functions to pass around a collection of data about -+ * reservation objects. -+ */ -+struct kbase_dma_fence_resv_info { -+ struct reservation_object **resv_objs; -+ unsigned int dma_fence_resv_count; -+ unsigned long *dma_fence_excl_bitmap; -+}; ++ if (items == 1) { ++ kbdev->force_replay_core_req = (base_jd_core_req)core_req; + -+/** -+ * kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs -+ * @resv: Reservation object to add to the array. -+ * @info: Pointer to struct with current reservation info -+ * @exclusive: Boolean indicating if exclusive access is needed -+ * -+ * The function adds a new reservation_object to an existing array of -+ * reservation_objects. At the same time keeps track of which objects require -+ * exclusive access in dma_fence_excl_bitmap. -+ */ -+void kbase_dma_fence_add_reservation(struct reservation_object *resv, -+ struct kbase_dma_fence_resv_info *info, -+ bool exclusive); ++ return count; ++ } ++ } ++ dev_err(kbdev->dev, "Couldn't process force_replay write operation.\nPossible settings: limit=, random_limit, norandom_limit, core_req=\n"); ++ return -EINVAL; ++} + +/** -+ * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs -+ * @katom: Katom with the external dependency. -+ * @info: Pointer to struct with current reservation info ++ * show_force_replay - Show callback for the force_replay sysfs file. + * -+ * Return: An error code or 0 if succeeds -+ */ -+int kbase_dma_fence_wait(struct kbase_jd_atom *katom, -+ struct kbase_dma_fence_resv_info *info); -+ -+/** -+ * kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx -+ * @kctx: Pointer to kbase context ++ * This function is called to get the contents of the force_replay sysfs ++ * file. It returns the last set value written to the force_replay sysfs file. ++ * If the file didn't get written yet, the values will be 0. + * -+ * This function will cancel and clean up all katoms on @kctx that is waiting -+ * on dma-buf fences. ++ * @dev: The device this sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The output buffer for the sysfs file contents + * -+ * Locking: jctx.lock needs to be held when calling this function. ++ * Return: The number of bytes output to @buf. + */ -+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx); ++static ssize_t show_force_replay(struct device *dev, ++ struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *kbdev; ++ ssize_t ret; + -+/** -+ * kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom -+ * @katom: Pointer to katom whose callbacks are to be canceled -+ * -+ * This function cancels all dma-buf fence callbacks on @katom, but does not -+ * cancel the katom itself. -+ * -+ * The caller is responsible for ensuring that jd_done_nolock is called on -+ * @katom. -+ * -+ * Locking: jctx.lock must be held when calling this function. -+ */ -+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom); ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+/** -+ * kbase_dma_fence_signal() - Signal katom's fence and clean up after wait -+ * @katom: Pointer to katom to signal and clean up -+ * -+ * This function will signal the @katom's fence, if it has one, and clean up -+ * the callback data from the katom's wait on earlier fences. -+ * -+ * Locking: jctx.lock must be held while calling this function. -+ */ -+void kbase_dma_fence_signal(struct kbase_jd_atom *katom); ++ if (kbdev->force_replay_random) ++ ret = scnprintf(buf, PAGE_SIZE, ++ "limit=0\nrandom_limit\ncore_req=%x\n", ++ kbdev->force_replay_core_req); ++ else ++ ret = scnprintf(buf, PAGE_SIZE, ++ "limit=%u\nnorandom_limit\ncore_req=%x\n", ++ kbdev->force_replay_limit, ++ kbdev->force_replay_core_req); + -+/** -+ * kbase_dma_fence_term() - Terminate Mali dma-fence context -+ * @kctx: kbase context to terminate -+ */ -+void kbase_dma_fence_term(struct kbase_context *kctx); ++ if (ret >= PAGE_SIZE) { ++ buf[PAGE_SIZE - 2] = '\n'; ++ buf[PAGE_SIZE - 1] = '\0'; ++ ret = PAGE_SIZE - 1; ++ } + -+/** -+ * kbase_dma_fence_init() - Initialize Mali dma-fence context -+ * @kctx: kbase context to initialize ++ return ret; ++} ++ ++/* ++ * The sysfs file force_replay. + */ -+int kbase_dma_fence_init(struct kbase_context *kctx); ++static DEVICE_ATTR(force_replay, S_IRUGO | S_IWUSR, show_force_replay, ++ set_force_replay); ++#endif /* !MALI_CUSTOMER_RELEASE */ ++ ++#ifdef CONFIG_MALI_DEBUG ++static ssize_t set_js_softstop_always(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *kbdev; ++ int ret; ++ int softstop_always; + ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+#else /* CONFIG_MALI_DMA_FENCE */ -+/* Dummy functions for when dma-buf fence isn't enabled. */ ++ ret = kstrtoint(buf, 0, &softstop_always); ++ if (ret || ((softstop_always != 0) && (softstop_always != 1))) { ++ dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n" ++ "Use format \n"); ++ return -EINVAL; ++ } + -+static inline int kbase_dma_fence_init(struct kbase_context *kctx) ++ kbdev->js_data.softstop_always = (bool) softstop_always; ++ dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n", ++ (kbdev->js_data.softstop_always) ? ++ "Enabled" : "Disabled"); ++ return count; ++} ++ ++static ssize_t show_js_softstop_always(struct device *dev, ++ struct device_attribute *attr, char * const buf) +{ -+ return 0; ++ struct kbase_device *kbdev; ++ ssize_t ret; ++ ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; ++ ++ ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always); ++ ++ if (ret >= PAGE_SIZE) { ++ buf[PAGE_SIZE - 2] = '\n'; ++ buf[PAGE_SIZE - 1] = '\0'; ++ ret = PAGE_SIZE - 1; ++ } ++ ++ return ret; +} + -+static inline void kbase_dma_fence_term(struct kbase_context *kctx) {} -+#endif /* CONFIG_MALI_DMA_FENCE */ -+#endif -diff --git a/drivers/gpu/arm/midgard/mali_kbase_event.c b/drivers/gpu/arm/midgard/mali_kbase_event.c -new file mode 100644 -index 000000000..188148645 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_event.c -@@ -0,0 +1,259 @@ +/* -+ * -+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ * By default, soft-stops are disabled when only a single context is present. ++ * The ability to enable soft-stop when only a single context is present can be ++ * used for debug and unit-testing purposes. ++ * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.) + */ ++static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always); ++#endif /* CONFIG_MALI_DEBUG */ + ++#ifdef CONFIG_MALI_DEBUG ++typedef void (kbasep_debug_command_func) (struct kbase_device *); + ++enum kbasep_debug_command_code { ++ KBASEP_DEBUG_COMMAND_DUMPTRACE, + ++ /* This must be the last enum */ ++ KBASEP_DEBUG_COMMAND_COUNT ++}; + ++struct kbasep_debug_command { ++ char *str; ++ kbasep_debug_command_func *func; ++}; + -+#include -+#include -+#include ++/* Debug commands supported by the driver */ ++static const struct kbasep_debug_command debug_commands[] = { ++ { ++ .str = "dumptrace", ++ .func = &kbasep_trace_dump, ++ } ++}; + -+static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom) ++/** ++ * show_debug - Show callback for the debug_command sysfs file. ++ * ++ * This function is called to get the contents of the debug_command sysfs ++ * file. This is a list of the available debug commands, separated by newlines. ++ * ++ * @dev: The device this sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The output buffer for the sysfs file contents ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf) +{ -+ struct base_jd_udata data; ++ struct kbase_device *kbdev; ++ int i; ++ ssize_t ret = 0; + -+ lockdep_assert_held(&kctx->jctx.lock); ++ kbdev = to_kbase_device(dev); + -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(katom != NULL); -+ KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); ++ if (!kbdev) ++ return -ENODEV; + -+ data = katom->udata; ++ for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++) ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str); + -+ KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight)); ++ if (ret >= PAGE_SIZE) { ++ buf[PAGE_SIZE - 2] = '\n'; ++ buf[PAGE_SIZE - 1] = '\0'; ++ ret = PAGE_SIZE - 1; ++ } + -+ KBASE_TLSTREAM_TL_NRET_ATOM_CTX(katom, kctx); -+ KBASE_TLSTREAM_TL_DEL_ATOM(katom); ++ return ret; ++} + -+ katom->status = KBASE_JD_ATOM_STATE_UNUSED; ++/** ++ * issue_debug - Store callback for the debug_command sysfs file. ++ * ++ * This function is called when the debug_command sysfs file is written to. ++ * It matches the requested command against the available commands, and if ++ * a matching command is found calls the associated function from ++ * @debug_commands to issue the command. ++ * ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *kbdev; ++ int i; + -+ wake_up(&katom->completed); ++ kbdev = to_kbase_device(dev); + -+ return data; -+} ++ if (!kbdev) ++ return -ENODEV; + -+int kbase_event_pending(struct kbase_context *ctx) -+{ -+ KBASE_DEBUG_ASSERT(ctx); ++ for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) { ++ if (sysfs_streq(debug_commands[i].str, buf)) { ++ debug_commands[i].func(kbdev); ++ return count; ++ } ++ } + -+ return (atomic_read(&ctx->event_count) != 0) || -+ (atomic_read(&ctx->event_closed) != 0); ++ /* Debug Command not found */ ++ dev_err(dev, "debug_command: command not known\n"); ++ return -EINVAL; +} + -+KBASE_EXPORT_TEST_API(kbase_event_pending); ++/* The sysfs file debug_command. ++ * ++ * This is used to issue general debug commands to the device driver. ++ * Reading it will produce a list of debug commands, separated by newlines. ++ * Writing to it with one of those commands will issue said command. ++ */ ++static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug); ++#endif /* CONFIG_MALI_DEBUG */ + -+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent) ++/** ++ * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the GPU information. ++ * ++ * This function is called to get a description of the present Mali ++ * GPU via the gpuinfo sysfs entry. This includes the GPU family, the ++ * number of cores, the hardware version and the raw product id. For ++ * example ++ * ++ * Mali-T60x MP4 r0p0 0x6956 ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t kbase_show_gpuinfo(struct device *dev, ++ struct device_attribute *attr, char *buf) +{ -+ struct kbase_jd_atom *atom; ++ static const struct gpu_product_id_name { ++ unsigned id; ++ char *name; ++ } gpu_product_id_names[] = { ++ { .id = GPU_ID_PI_T60X, .name = "Mali-T60x" }, ++ { .id = GPU_ID_PI_T62X, .name = "Mali-T62x" }, ++ { .id = GPU_ID_PI_T72X, .name = "Mali-T72x" }, ++ { .id = GPU_ID_PI_T76X, .name = "Mali-T76x" }, ++ { .id = GPU_ID_PI_T82X, .name = "Mali-T82x" }, ++ { .id = GPU_ID_PI_T83X, .name = "Mali-T83x" }, ++ { .id = GPU_ID_PI_T86X, .name = "Mali-T86x" }, ++ { .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" }, ++ { .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-G71" }, ++ { .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-THEx" }, ++ { .id = GPU_ID2_PRODUCT_TSIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-G51" }, ++ }; ++ const char *product_name = "(Unknown Mali GPU)"; ++ struct kbase_device *kbdev; ++ u32 gpu_id; ++ unsigned product_id, product_id_mask; ++ unsigned i; ++ bool is_new_format; + -+ KBASE_DEBUG_ASSERT(ctx); ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ mutex_lock(&ctx->event_mutex); ++ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; ++ product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++ is_new_format = GPU_ID_IS_NEW_FORMAT(product_id); ++ product_id_mask = ++ (is_new_format ? ++ GPU_ID2_PRODUCT_MODEL : ++ GPU_ID_VERSION_PRODUCT_ID) >> ++ GPU_ID_VERSION_PRODUCT_ID_SHIFT; + -+ if (list_empty(&ctx->event_list)) { -+ if (!atomic_read(&ctx->event_closed)) { -+ mutex_unlock(&ctx->event_mutex); -+ return -1; -+ } ++ for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) { ++ const struct gpu_product_id_name *p = &gpu_product_id_names[i]; + -+ /* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */ -+ mutex_unlock(&ctx->event_mutex); -+ uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED; -+ memset(&uevent->udata, 0, sizeof(uevent->udata)); -+ dev_dbg(ctx->kbdev->dev, -+ "event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n", -+ BASE_JD_EVENT_DRV_TERMINATED); -+ return 0; ++ if ((GPU_ID_IS_NEW_FORMAT(p->id) == is_new_format) && ++ (p->id & product_id_mask) == ++ (product_id & product_id_mask)) { ++ product_name = p->name; ++ break; ++ } + } + -+ /* normal event processing */ -+ atomic_dec(&ctx->event_count); -+ atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]); -+ list_del(ctx->event_list.next); ++ return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n", ++ product_name, kbdev->gpu_props.num_cores, ++ (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, ++ (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, ++ product_id); ++} ++static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL); + -+ mutex_unlock(&ctx->event_mutex); ++/** ++ * set_dvfs_period - Store callback for the dvfs_period sysfs file. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * This function is called when the dvfs_period sysfs file is written to. It ++ * checks the data written, and if valid updates the DVFS period variable, ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t set_dvfs_period(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *kbdev; ++ int ret; ++ int dvfs_period; + -+ dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom); -+ uevent->event_code = atom->event_code; -+ uevent->atom_number = (atom - ctx->jctx.atoms); ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) -+ kbase_jd_free_external_resources(atom); ++ ret = kstrtoint(buf, 0, &dvfs_period); ++ if (ret || dvfs_period <= 0) { ++ dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n" ++ "Use format \n"); ++ return -EINVAL; ++ } + -+ mutex_lock(&ctx->jctx.lock); -+ uevent->udata = kbase_event_process(ctx, atom); -+ mutex_unlock(&ctx->jctx.lock); ++ kbdev->pm.dvfs_period = dvfs_period; ++ dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period); + -+ return 0; ++ return count; +} + -+KBASE_EXPORT_TEST_API(kbase_event_dequeue); -+ +/** -+ * kbase_event_process_noreport_worker - Worker for processing atoms that do not -+ * return an event but do have external -+ * resources -+ * @data: Work structure ++ * show_dvfs_period - Show callback for the dvfs_period sysfs entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the GPU information. ++ * ++ * This function is called to get the current period used for the DVFS sample ++ * timer. ++ * ++ * Return: The number of bytes output to @buf. + */ -+static void kbase_event_process_noreport_worker(struct work_struct *data) ++static ssize_t show_dvfs_period(struct device *dev, ++ struct device_attribute *attr, char * const buf) +{ -+ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, -+ work); -+ struct kbase_context *kctx = katom->kctx; ++ struct kbase_device *kbdev; ++ ssize_t ret; + -+ if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) -+ kbase_jd_free_external_resources(katom); ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ mutex_lock(&kctx->jctx.lock); -+ kbase_event_process(kctx, katom); -+ mutex_unlock(&kctx->jctx.lock); ++ ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period); ++ ++ return ret; +} + ++static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period, ++ set_dvfs_period); ++ +/** -+ * kbase_event_process_noreport - Process atoms that do not return an event -+ * @kctx: Context pointer -+ * @katom: Atom to be processed ++ * set_pm_poweroff - Store callback for the pm_poweroff sysfs file. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file + * -+ * Atoms that do not have external resources will be processed immediately. -+ * Atoms that do have external resources will be processed on a workqueue, in -+ * order to avoid locking issues. ++ * This function is called when the pm_poweroff sysfs file is written to. ++ * ++ * This file contains three values separated by whitespace. The values ++ * are gpu_poweroff_time (the period of the poweroff timer, in ns), ++ * poweroff_shader_ticks (the number of poweroff timer ticks before an idle ++ * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer ++ * ticks before the GPU is powered off), in that order. ++ * ++ * Return: @count if the function succeeded. An error code on failure. + */ -+static void kbase_event_process_noreport(struct kbase_context *kctx, -+ struct kbase_jd_atom *katom) ++static ssize_t set_pm_poweroff(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) +{ -+ if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { -+ INIT_WORK(&katom->work, kbase_event_process_noreport_worker); -+ queue_work(kctx->event_workq, &katom->work); -+ } else { -+ kbase_event_process(kctx, katom); ++ struct kbase_device *kbdev; ++ int items; ++ s64 gpu_poweroff_time; ++ int poweroff_shader_ticks, poweroff_gpu_ticks; ++ ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; ++ ++ items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time, ++ &poweroff_shader_ticks, ++ &poweroff_gpu_ticks); ++ if (items != 3) { ++ dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n" ++ "Use format \n"); ++ return -EINVAL; + } ++ ++ kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(gpu_poweroff_time); ++ kbdev->pm.poweroff_shader_ticks = poweroff_shader_ticks; ++ kbdev->pm.poweroff_gpu_ticks = poweroff_gpu_ticks; ++ ++ return count; +} + +/** -+ * kbase_event_coalesce - Move pending events to the main event list -+ * @kctx: Context pointer ++ * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the GPU information. + * -+ * kctx->event_list and kctx->event_coalesce_count must be protected -+ * by a lock unless this is the last thread using them -+ * (and we're about to terminate the lock). ++ * This function is called to get the current period used for the DVFS sample ++ * timer. + * -+ * Return: The number of pending events moved to the main event list ++ * Return: The number of bytes output to @buf. + */ -+static int kbase_event_coalesce(struct kbase_context *kctx) ++static ssize_t show_pm_poweroff(struct device *dev, ++ struct device_attribute *attr, char * const buf) +{ -+ const int event_count = kctx->event_coalesce_count; ++ struct kbase_device *kbdev; ++ ssize_t ret; + -+ /* Join the list of pending events onto the tail of the main list -+ and reset it */ -+ list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list); -+ kctx->event_coalesce_count = 0; ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ /* Return the number of events moved */ -+ return event_count; ++ ret = scnprintf(buf, PAGE_SIZE, "%llu %u %u\n", ++ ktime_to_ns(kbdev->pm.gpu_poweroff_time), ++ kbdev->pm.poweroff_shader_ticks, ++ kbdev->pm.poweroff_gpu_ticks); ++ ++ return ret; +} + -+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) -+{ -+ if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { -+ if (atom->event_code == BASE_JD_EVENT_DONE) { -+ /* Don't report the event */ -+ kbase_event_process_noreport(ctx, atom); -+ return; -+ } -+ } ++static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff, ++ set_pm_poweroff); + -+ if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) { -+ /* Don't report the event */ -+ kbase_event_process_noreport(ctx, atom); -+ return; -+ } -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_POSTED); -+ if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) { -+ /* Don't report the event until other event(s) have completed */ -+ mutex_lock(&ctx->event_mutex); -+ list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list); -+ ++ctx->event_coalesce_count; -+ mutex_unlock(&ctx->event_mutex); -+ } else { -+ /* Report the event and any pending events now */ -+ int event_count = 1; ++/** ++ * set_reset_timeout - Store callback for the reset_timeout sysfs file. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * This function is called when the reset_timeout sysfs file is written to. It ++ * checks the data written, and if valid updates the reset timeout. ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t set_reset_timeout(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *kbdev; ++ int ret; ++ int reset_timeout; + -+ mutex_lock(&ctx->event_mutex); -+ event_count += kbase_event_coalesce(ctx); -+ list_add_tail(&atom->dep_item[0], &ctx->event_list); -+ atomic_add(event_count, &ctx->event_count); -+ mutex_unlock(&ctx->event_mutex); ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ kbase_event_wakeup(ctx); ++ ret = kstrtoint(buf, 0, &reset_timeout); ++ if (ret || reset_timeout <= 0) { ++ dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n" ++ "Use format \n"); ++ return -EINVAL; + } -+} -+KBASE_EXPORT_TEST_API(kbase_event_post); + -+void kbase_event_close(struct kbase_context *kctx) -+{ -+ mutex_lock(&kctx->event_mutex); -+ atomic_set(&kctx->event_closed, true); -+ mutex_unlock(&kctx->event_mutex); -+ kbase_event_wakeup(kctx); ++ kbdev->reset_timeout_ms = reset_timeout; ++ dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout); ++ ++ return count; +} + -+int kbase_event_init(struct kbase_context *kctx) ++/** ++ * show_reset_timeout - Show callback for the reset_timeout sysfs entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the GPU information. ++ * ++ * This function is called to get the current reset timeout. ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t show_reset_timeout(struct device *dev, ++ struct device_attribute *attr, char * const buf) +{ -+ KBASE_DEBUG_ASSERT(kctx); ++ struct kbase_device *kbdev; ++ ssize_t ret; + -+ INIT_LIST_HEAD(&kctx->event_list); -+ INIT_LIST_HEAD(&kctx->event_coalesce_list); -+ mutex_init(&kctx->event_mutex); -+ atomic_set(&kctx->event_count, 0); -+ kctx->event_coalesce_count = 0; -+ atomic_set(&kctx->event_closed, false); -+ kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1); ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ if (NULL == kctx->event_workq) -+ return -EINVAL; ++ ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms); + -+ return 0; ++ return ret; +} + -+KBASE_EXPORT_TEST_API(kbase_event_init); -+ -+void kbase_event_cleanup(struct kbase_context *kctx) -+{ -+ int event_count; ++static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout, ++ set_reset_timeout); + -+ KBASE_DEBUG_ASSERT(kctx); -+ KBASE_DEBUG_ASSERT(kctx->event_workq); + -+ flush_workqueue(kctx->event_workq); -+ destroy_workqueue(kctx->event_workq); + -+ /* We use kbase_event_dequeue to remove the remaining events as that -+ * deals with all the cleanup needed for the atoms. -+ * -+ * Note: use of kctx->event_list without a lock is safe because this must be the last -+ * thread using it (because we're about to terminate the lock) -+ */ -+ event_count = kbase_event_coalesce(kctx); -+ atomic_add(event_count, &kctx->event_count); ++static ssize_t show_mem_pool_size(struct device *dev, ++ struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *kbdev; ++ ssize_t ret; + -+ while (!list_empty(&kctx->event_list)) { -+ struct base_jd_event_v2 event; ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+ kbase_event_dequeue(kctx, &event); -+ } -+} ++ ret = scnprintf(buf, PAGE_SIZE, "%zu\n", ++ kbase_mem_pool_size(&kbdev->mem_pool)); + -+KBASE_EXPORT_TEST_API(kbase_event_cleanup); -diff --git a/drivers/gpu/arm/midgard/mali_kbase_fence.c b/drivers/gpu/arm/midgard/mali_kbase_fence.c -new file mode 100644 -index 000000000..3bcfb38c3 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_fence.c -@@ -0,0 +1,200 @@ -+/* -+ * -+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ return ret; ++} + ++static ssize_t set_mem_pool_size(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *kbdev; ++ size_t new_size; ++ int err; + ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + -+#include -+#include -+#include -+#include -+#include -+#include ++ err = kstrtoul(buf, 0, (unsigned long *)&new_size); ++ if (err) ++ return err; + -+/* Spin lock protecting all Mali fences as fence->lock. */ -+static DEFINE_SPINLOCK(kbase_fence_lock); ++ kbase_mem_pool_trim(&kbdev->mem_pool, new_size); + -+static const char * -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+kbase_fence_get_driver_name(struct fence *fence) -+#else -+kbase_fence_get_driver_name(struct dma_fence *fence) -+#endif -+{ -+ return kbase_drv_name; ++ return count; +} + -+static const char * -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+kbase_fence_get_timeline_name(struct fence *fence) -+#else -+kbase_fence_get_timeline_name(struct dma_fence *fence) -+#endif -+{ -+ return kbase_timeline_name; -+} ++static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size, ++ set_mem_pool_size); + -+static bool -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+kbase_fence_enable_signaling(struct fence *fence) -+#else -+kbase_fence_enable_signaling(struct dma_fence *fence) -+#endif ++static ssize_t show_mem_pool_max_size(struct device *dev, ++ struct device_attribute *attr, char * const buf) +{ -+ return true; ++ struct kbase_device *kbdev; ++ ssize_t ret; ++ ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; ++ ++ ret = scnprintf(buf, PAGE_SIZE, "%zu\n", ++ kbase_mem_pool_max_size(&kbdev->mem_pool)); ++ ++ return ret; +} + -+static void -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+kbase_fence_fence_value_str(struct fence *fence, char *str, int size) -+#else -+kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size) -+#endif ++static ssize_t set_mem_pool_max_size(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) +{ -+#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) -+ snprintf(str, size, "%u", fence->seqno); -+#else -+ snprintf(str, size, "%llu", fence->seqno); -+#endif ++ struct kbase_device *kbdev; ++ size_t new_max_size; ++ int err; ++ ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; ++ ++ err = kstrtoul(buf, 0, (unsigned long *)&new_max_size); ++ if (err) ++ return -EINVAL; ++ ++ kbase_mem_pool_set_max_size(&kbdev->mem_pool, new_max_size); ++ ++ return count; +} + -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+const struct fence_ops kbase_fence_ops = { -+ .wait = fence_default_wait, -+#else -+const struct dma_fence_ops kbase_fence_ops = { -+ .wait = dma_fence_default_wait, -+#endif -+ .get_driver_name = kbase_fence_get_driver_name, -+ .get_timeline_name = kbase_fence_get_timeline_name, -+ .enable_signaling = kbase_fence_enable_signaling, -+ .fence_value_str = kbase_fence_fence_value_str -+}; ++static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size, ++ set_mem_pool_max_size); + -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+struct fence * -+kbase_fence_out_new(struct kbase_jd_atom *katom) -+#else -+struct dma_fence * -+kbase_fence_out_new(struct kbase_jd_atom *katom) -+#endif ++#ifdef CONFIG_DEBUG_FS ++ ++/* Number of entries in serialize_jobs_settings[] */ ++#define NR_SERIALIZE_JOBS_SETTINGS 5 ++/* Maximum string length in serialize_jobs_settings[].name */ ++#define MAX_SERIALIZE_JOBS_NAME_LEN 16 ++ ++static struct +{ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+ struct fence *fence; -+#else -+ struct dma_fence *fence; -+#endif ++ char *name; ++ u8 setting; ++} serialize_jobs_settings[NR_SERIALIZE_JOBS_SETTINGS] = { ++ {"none", 0}, ++ {"intra-slot", KBASE_SERIALIZE_INTRA_SLOT}, ++ {"inter-slot", KBASE_SERIALIZE_INTER_SLOT}, ++ {"full", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT}, ++ {"full-reset", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT | ++ KBASE_SERIALIZE_RESET} ++}; + -+ WARN_ON(katom->dma_fence.fence); ++/** ++ * kbasep_serialize_jobs_seq_show - Show callback for the serialize_jobs debugfs ++ * file ++ * @sfile: seq_file pointer ++ * @data: Private callback data ++ * ++ * This function is called to get the contents of the serialize_jobs debugfs ++ * file. This is a list of the available settings with the currently active one ++ * surrounded by square brackets. ++ * ++ * Return: 0 on success, or an error code on error ++ */ ++static int kbasep_serialize_jobs_seq_show(struct seq_file *sfile, void *data) ++{ ++ struct kbase_device *kbdev = sfile->private; ++ int i; + -+ fence = kzalloc(sizeof(*fence), GFP_KERNEL); -+ if (!fence) -+ return NULL; ++ CSTD_UNUSED(data); + -+ dma_fence_init(fence, -+ &kbase_fence_ops, -+ &kbase_fence_lock, -+ katom->dma_fence.context, -+ atomic_inc_return(&katom->dma_fence.seqno)); ++ for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { ++ if (kbdev->serialize_jobs == serialize_jobs_settings[i].setting) ++ seq_printf(sfile, "[%s] ", ++ serialize_jobs_settings[i].name); ++ else ++ seq_printf(sfile, "%s ", ++ serialize_jobs_settings[i].name); ++ } + -+ katom->dma_fence.fence = fence; ++ seq_puts(sfile, "\n"); + -+ return fence; ++ return 0; +} + -+bool -+kbase_fence_free_callbacks(struct kbase_jd_atom *katom) ++/** ++ * kbasep_serialize_jobs_debugfs_write - Store callback for the serialize_jobs ++ * debugfs file. ++ * @file: File pointer ++ * @ubuf: User buffer containing data to store ++ * @count: Number of bytes in user buffer ++ * @ppos: File position ++ * ++ * This function is called when the serialize_jobs debugfs file is written to. ++ * It matches the requested setting against the available settings and if a ++ * matching setting is found updates kbdev->serialize_jobs. ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file, ++ const char __user *ubuf, size_t count, loff_t *ppos) +{ -+ struct kbase_fence_cb *cb, *tmp; -+ bool res = false; -+ -+ lockdep_assert_held(&katom->kctx->jctx.lock); ++ struct seq_file *s = file->private_data; ++ struct kbase_device *kbdev = s->private; ++ char buf[MAX_SERIALIZE_JOBS_NAME_LEN]; ++ int i; ++ bool valid = false; + -+ /* Clean up and free callbacks. */ -+ list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) { -+ bool ret; ++ CSTD_UNUSED(ppos); + -+ /* Cancel callbacks that hasn't been called yet. */ -+ ret = dma_fence_remove_callback(cb->fence, &cb->fence_cb); -+ if (ret) { -+ int ret; ++ count = min_t(size_t, sizeof(buf) - 1, count); ++ if (copy_from_user(buf, ubuf, count)) ++ return -EFAULT; + -+ /* Fence had not signaled, clean up after -+ * canceling. -+ */ -+ ret = atomic_dec_return(&katom->dma_fence.dep_count); ++ buf[count] = 0; + -+ if (unlikely(ret == 0)) -+ res = true; ++ for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { ++ if (sysfs_streq(serialize_jobs_settings[i].name, buf)) { ++ kbdev->serialize_jobs = ++ serialize_jobs_settings[i].setting; ++ valid = true; ++ break; + } ++ } + -+ /* -+ * Release the reference taken in -+ * kbase_fence_add_callback(). -+ */ -+ dma_fence_put(cb->fence); -+ list_del(&cb->node); -+ kfree(cb); ++ if (!valid) { ++ dev_err(kbdev->dev, "serialize_jobs: invalid setting\n"); ++ return -EINVAL; + } + -+ return res; ++ return count; +} + -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+int -+kbase_fence_add_callback(struct kbase_jd_atom *katom, -+ struct fence *fence, -+ fence_func_t callback) -+#else -+int -+kbase_fence_add_callback(struct kbase_jd_atom *katom, -+ struct dma_fence *fence, -+ dma_fence_func_t callback) -+#endif ++/** ++ * kbasep_serialize_jobs_debugfs_open - Open callback for the serialize_jobs ++ * debugfs file ++ * @in: inode pointer ++ * @file: file pointer ++ * ++ * Return: Zero on success, error code on failure ++ */ ++static int kbasep_serialize_jobs_debugfs_open(struct inode *in, ++ struct file *file) +{ -+ int err = 0; -+ struct kbase_fence_cb *kbase_fence_cb; ++ return single_open(file, kbasep_serialize_jobs_seq_show, in->i_private); ++} + -+ if (!fence) -+ return -EINVAL; ++static const struct file_operations kbasep_serialize_jobs_debugfs_fops = { ++ .open = kbasep_serialize_jobs_debugfs_open, ++ .read = seq_read, ++ .write = kbasep_serialize_jobs_debugfs_write, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + -+ kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL); -+ if (!kbase_fence_cb) -+ return -ENOMEM; ++#endif /* CONFIG_DEBUG_FS */ + -+ kbase_fence_cb->fence = fence; -+ kbase_fence_cb->katom = katom; -+ INIT_LIST_HEAD(&kbase_fence_cb->node); ++static int kbasep_protected_mode_init(struct kbase_device *kbdev) ++{ ++#ifdef CONFIG_OF ++ struct device_node *protected_node; ++ struct platform_device *pdev; ++ struct protected_mode_device *protected_dev; ++#endif + -+ err = dma_fence_add_callback(fence, &kbase_fence_cb->fence_cb, -+ callback); -+ if (err == -ENOENT) { -+ /* Fence signaled, clear the error and return */ -+ err = 0; -+ kfree(kbase_fence_cb); -+ } else if (err) { -+ kfree(kbase_fence_cb); -+ } else { -+ /* -+ * Get reference to fence that will be kept until callback gets -+ * cleaned up in kbase_fence_free_callbacks(). -+ */ -+ dma_fence_get(fence); -+ atomic_inc(&katom->dma_fence.dep_count); -+ /* Add callback to katom's list of callbacks */ -+ list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks); ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { ++ /* Use native protected ops */ ++ kbdev->protected_dev = kzalloc(sizeof(*kbdev->protected_dev), ++ GFP_KERNEL); ++ if (!kbdev->protected_dev) ++ return -ENOMEM; ++ kbdev->protected_dev->data = kbdev; ++ kbdev->protected_ops = &kbase_native_protected_ops; ++ kbdev->protected_mode_support = true; ++ return 0; + } + -+ return err; -+} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_fence.h b/drivers/gpu/arm/midgard/mali_kbase_fence.h -new file mode 100644 -index 000000000..639cc2ef4 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_fence.h -@@ -0,0 +1,275 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ kbdev->protected_mode_support = false; + ++#ifdef CONFIG_OF ++ protected_node = of_parse_phandle(kbdev->dev->of_node, ++ "protected-mode-switcher", 0); + ++ if (!protected_node) ++ protected_node = of_parse_phandle(kbdev->dev->of_node, ++ "secure-mode-switcher", 0); + -+#ifndef _KBASE_FENCE_H_ -+#define _KBASE_FENCE_H_ ++ if (!protected_node) { ++ /* If protected_node cannot be looked up then we assume ++ * protected mode is not supported on this platform. */ ++ dev_info(kbdev->dev, "Protected mode not available\n"); ++ return 0; ++ } + -+/* -+ * mali_kbase_fence.[hc] has common fence code used by both -+ * - CONFIG_MALI_DMA_FENCE - implicit DMA fences -+ * - CONFIG_SYNC_FILE - explicit fences beginning with 4.9 kernel -+ */ ++ pdev = of_find_device_by_node(protected_node); ++ if (!pdev) ++ return -EINVAL; + -+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) ++ protected_dev = platform_get_drvdata(pdev); ++ if (!protected_dev) ++ return -EPROBE_DEFER; + -+#include -+#include "mali_kbase_fence_defs.h" -+#include "mali_kbase.h" ++ kbdev->protected_ops = &protected_dev->ops; ++ kbdev->protected_dev = protected_dev; + -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+extern const struct fence_ops kbase_fence_ops; -+#else -+extern const struct dma_fence_ops kbase_fence_ops; -+#endif ++ if (kbdev->protected_ops) { ++ int err; + -+/** -+* struct kbase_fence_cb - Mali dma-fence callback data struct -+* @fence_cb: Callback function -+* @katom: Pointer to katom that is waiting on this callback -+* @fence: Pointer to the fence object on which this callback is waiting -+* @node: List head for linking this callback to the katom -+*/ -+struct kbase_fence_cb { -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+ struct fence_cb fence_cb; -+ struct fence *fence; -+#else -+ struct dma_fence_cb fence_cb; -+ struct dma_fence *fence; -+#endif -+ struct kbase_jd_atom *katom; -+ struct list_head node; -+}; ++ /* Make sure protected mode is disabled on startup */ ++ mutex_lock(&kbdev->pm.lock); ++ err = kbdev->protected_ops->protected_mode_disable( ++ kbdev->protected_dev); ++ mutex_unlock(&kbdev->pm.lock); + -+/** -+ * kbase_fence_out_new() - Creates a new output fence and puts it on the atom -+ * @katom: Atom to create an output fence for -+ * -+ * return: A new fence object on success, NULL on failure. -+ */ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+struct fence *kbase_fence_out_new(struct kbase_jd_atom *katom); -+#else -+struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom); ++ /* protected_mode_disable() returns -EINVAL if not supported */ ++ kbdev->protected_mode_support = (err != -EINVAL); ++ } +#endif ++ return 0; ++} + -+#if defined(CONFIG_SYNC_FILE) -+/** -+ * kbase_fence_fence_in_set() - Assign input fence to atom -+ * @katom: Atom to assign input fence to -+ * @fence: Input fence to assign to atom -+ * -+ * This function will take ownership of one fence reference! -+ */ -+#define kbase_fence_fence_in_set(katom, fence) \ -+ do { \ -+ WARN_ON((katom)->dma_fence.fence_in); \ -+ (katom)->dma_fence.fence_in = fence; \ -+ } while (0) -+#endif ++static void kbasep_protected_mode_term(struct kbase_device *kbdev) ++{ ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) ++ kfree(kbdev->protected_dev); ++} + -+/** -+ * kbase_fence_out_remove() - Removes the output fence from atom -+ * @katom: Atom to remove output fence for -+ * -+ * This will also release the reference to this fence which the atom keeps -+ */ -+static inline void kbase_fence_out_remove(struct kbase_jd_atom *katom) ++#ifdef CONFIG_MALI_NO_MALI ++static int kbase_common_reg_map(struct kbase_device *kbdev) +{ -+ if (katom->dma_fence.fence) { -+ dma_fence_put(katom->dma_fence.fence); -+ katom->dma_fence.fence = NULL; ++ return 0; ++} ++static void kbase_common_reg_unmap(struct kbase_device * const kbdev) ++{ ++} ++#else /* CONFIG_MALI_NO_MALI */ ++static int kbase_common_reg_map(struct kbase_device *kbdev) ++{ ++ int err = -ENOMEM; ++ ++ if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) { ++ dev_err(kbdev->dev, "Register window unavailable\n"); ++ err = -EIO; ++ goto out_region; ++ } ++ ++ kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size); ++ if (!kbdev->reg) { ++ dev_err(kbdev->dev, "Can't remap register window\n"); ++ err = -EINVAL; ++ goto out_ioremap; + } ++ ++ return 0; ++ ++ out_ioremap: ++ release_mem_region(kbdev->reg_start, kbdev->reg_size); ++ out_region: ++ return err; +} + -+#if defined(CONFIG_SYNC_FILE) -+/** -+ * kbase_fence_out_remove() - Removes the input fence from atom -+ * @katom: Atom to remove input fence for -+ * -+ * This will also release the reference to this fence which the atom keeps -+ */ -+static inline void kbase_fence_in_remove(struct kbase_jd_atom *katom) ++static void kbase_common_reg_unmap(struct kbase_device * const kbdev) +{ -+ if (katom->dma_fence.fence_in) { -+ dma_fence_put(katom->dma_fence.fence_in); -+ katom->dma_fence.fence_in = NULL; ++ if (kbdev->reg) { ++ iounmap(kbdev->reg); ++ release_mem_region(kbdev->reg_start, kbdev->reg_size); ++ kbdev->reg = NULL; ++ kbdev->reg_start = 0; ++ kbdev->reg_size = 0; + } +} -+#endif ++#endif /* CONFIG_MALI_NO_MALI */ + -+/** -+ * kbase_fence_out_is_ours() - Check if atom has a valid fence created by us -+ * @katom: Atom to check output fence for -+ * -+ * Return: true if fence exists and is valid, otherwise false -+ */ -+static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom) ++static int registers_map(struct kbase_device * const kbdev) +{ -+ return katom->dma_fence.fence && -+ katom->dma_fence.fence->ops == &kbase_fence_ops; ++ ++ /* the first memory resource is the physical address of the GPU ++ * registers */ ++ struct platform_device *pdev = to_platform_device(kbdev->dev); ++ struct resource *reg_res; ++ int err; ++ ++ reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ if (!reg_res) { ++ dev_err(kbdev->dev, "Invalid register resource\n"); ++ return -ENOENT; ++ } ++ ++ kbdev->reg_start = reg_res->start; ++ kbdev->reg_size = resource_size(reg_res); ++ ++ err = kbase_common_reg_map(kbdev); ++ if (err) { ++ dev_err(kbdev->dev, "Failed to map registers\n"); ++ return err; ++ } ++ ++ return 0; +} + -+/** -+ * kbase_fence_out_signal() - Signal output fence of atom -+ * @katom: Atom to signal output fence for -+ * @status: Status to signal with (0 for success, < 0 for error) -+ * -+ * Return: 0 on success, < 0 on error -+ */ -+static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom, -+ int status) ++static void registers_unmap(struct kbase_device *kbdev) +{ -+ if (status) { -+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ -+ KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) -+ fence_set_error(katom->dma_fence.fence, status); -+#elif (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) -+ dma_fence_set_error(katom->dma_fence.fence, status); -+#else -+ katom->dma_fence.fence->status = status; -+#endif ++ kbase_common_reg_unmap(kbdev); ++} ++ ++static int power_control_init(struct platform_device *pdev) ++{ ++ struct kbase_device *kbdev = to_kbase_device(&pdev->dev); ++ int err = 0; ++ ++ if (!kbdev) ++ return -ENODEV; ++ ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ ++ && defined(CONFIG_REGULATOR) ++ kbdev->regulator = regulator_get_optional(kbdev->dev, "mali"); ++ if (IS_ERR_OR_NULL(kbdev->regulator)) { ++ err = PTR_ERR(kbdev->regulator); ++ kbdev->regulator = NULL; ++ if (err == -EPROBE_DEFER) { ++ dev_err(&pdev->dev, "Failed to get regulator\n"); ++ return err; ++ } ++ dev_info(kbdev->dev, ++ "Continuing without Mali regulator control\n"); ++ /* Allow probe to continue without regulator */ + } -+ return dma_fence_signal(katom->dma_fence.fence); ++#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ ++ ++ kbdev->clock = clk_get(kbdev->dev, "clk_mali"); ++ if (IS_ERR_OR_NULL(kbdev->clock)) { ++ err = PTR_ERR(kbdev->clock); ++ kbdev->clock = NULL; ++ if (err == -EPROBE_DEFER) { ++ dev_err(&pdev->dev, "Failed to get clock\n"); ++ goto fail; ++ } ++ dev_info(kbdev->dev, "Continuing without Mali clock control\n"); ++ /* Allow probe to continue without clock. */ ++ } else { ++ err = clk_prepare(kbdev->clock); ++ if (err) { ++ dev_err(kbdev->dev, ++ "Failed to prepare and enable clock (%d)\n", ++ err); ++ goto fail; ++ } ++ } ++ ++ err = kbase_platform_rk_init_opp_table(kbdev); ++ if (err) ++ dev_err(kbdev->dev, "Failed to init_opp_table (%d)\n", err); ++ ++ return 0; ++ ++fail: ++ ++if (kbdev->clock != NULL) { ++ clk_put(kbdev->clock); ++ kbdev->clock = NULL; +} + -+/** -+ * kbase_fence_add_callback() - Add callback on @fence to block @katom -+ * @katom: Pointer to katom that will be blocked by @fence -+ * @fence: Pointer to fence on which to set up the callback -+ * @callback: Pointer to function to be called when fence is signaled -+ * -+ * Caller needs to hold a reference to @fence when calling this function, and -+ * the caller is responsible for releasing that reference. An additional -+ * reference to @fence will be taken when the callback was successfully set up -+ * and @fence needs to be kept valid until the callback has been called and -+ * cleanup have been done. -+ * -+ * Return: 0 on success: fence was either already signaled, or callback was -+ * set up. Negative error code is returned on error. -+ */ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+int kbase_fence_add_callback(struct kbase_jd_atom *katom, -+ struct fence *fence, -+ fence_func_t callback); -+#else -+int kbase_fence_add_callback(struct kbase_jd_atom *katom, -+ struct dma_fence *fence, -+ dma_fence_func_t callback); ++#ifdef CONFIG_REGULATOR ++ if (NULL != kbdev->regulator) { ++ regulator_put(kbdev->regulator); ++ kbdev->regulator = NULL; ++ } +#endif + -+/** -+ * kbase_fence_dep_count_set() - Set dep_count value on atom to specified value -+ * @katom: Atom to set dep_count for -+ * @val: value to set dep_count to -+ * -+ * The dep_count is available to the users of this module so that they can -+ * synchronize completion of the wait with cancellation and adding of more -+ * callbacks. For instance, a user could do the following: -+ * -+ * dep_count set to 1 -+ * callback #1 added, dep_count is increased to 2 -+ * callback #1 happens, dep_count decremented to 1 -+ * since dep_count > 0, no completion is done -+ * callback #2 is added, dep_count is increased to 2 -+ * dep_count decremented to 1 -+ * callback #2 happens, dep_count decremented to 0 -+ * since dep_count now is zero, completion executes -+ * -+ * The dep_count can also be used to make sure that the completion only -+ * executes once. This is typically done by setting dep_count to -1 for the -+ * thread that takes on this responsibility. -+ */ -+static inline void -+kbase_fence_dep_count_set(struct kbase_jd_atom *katom, int val) -+{ -+ atomic_set(&katom->dma_fence.dep_count, val); ++ return err; +} + -+/** -+ * kbase_fence_dep_count_dec_and_test() - Decrements dep_count -+ * @katom: Atom to decrement dep_count for -+ * -+ * See @kbase_fence_dep_count_set for general description about dep_count -+ * -+ * Return: true if value was decremented to zero, otherwise false -+ */ -+static inline bool -+kbase_fence_dep_count_dec_and_test(struct kbase_jd_atom *katom) ++static void power_control_term(struct kbase_device *kbdev) +{ -+ return atomic_dec_and_test(&katom->dma_fence.dep_count); ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) || \ ++ defined(LSK_OPPV2_BACKPORT) ++ dev_pm_opp_of_remove_table(kbdev->dev); ++#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) ++ of_free_opp_table(kbdev->dev); ++#endif ++ ++ if (kbdev->clock) { ++ clk_unprepare(kbdev->clock); ++ clk_put(kbdev->clock); ++ kbdev->clock = NULL; ++ } ++ ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ ++ && defined(CONFIG_REGULATOR) ++ if (kbdev->regulator) { ++ regulator_put(kbdev->regulator); ++ kbdev->regulator = NULL; ++ } ++#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ +} + -+/** -+ * kbase_fence_dep_count_read() - Returns the current dep_count value -+ * @katom: Pointer to katom -+ * -+ * See @kbase_fence_dep_count_set for general description about dep_count -+ * -+ * Return: The current dep_count value -+ */ -+static inline int kbase_fence_dep_count_read(struct kbase_jd_atom *katom) ++#ifdef CONFIG_DEBUG_FS ++ ++#if KBASE_GPU_RESET_EN ++#include ++ ++static void trigger_quirks_reload(struct kbase_device *kbdev) +{ -+ return atomic_read(&katom->dma_fence.dep_count); ++ kbase_pm_context_active(kbdev); ++ if (kbase_prepare_to_reset_gpu(kbdev)) ++ kbase_reset_gpu(kbdev); ++ kbase_pm_context_idle(kbdev); +} + -+/** -+ * kbase_fence_free_callbacks() - Free dma-fence callbacks on a katom -+ * @katom: Pointer to katom -+ * -+ * This function will free all fence callbacks on the katom's list of -+ * callbacks. Callbacks that have not yet been called, because their fence -+ * hasn't yet signaled, will first be removed from the fence. -+ * -+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held. -+ * -+ * Return: true if dep_count reached 0, otherwise false. -+ */ -+bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom); ++#define MAKE_QUIRK_ACCESSORS(type) \ ++static int type##_quirks_set(void *data, u64 val) \ ++{ \ ++ struct kbase_device *kbdev; \ ++ kbdev = (struct kbase_device *)data; \ ++ kbdev->hw_quirks_##type = (u32)val; \ ++ trigger_quirks_reload(kbdev); \ ++ return 0;\ ++} \ ++\ ++static int type##_quirks_get(void *data, u64 *val) \ ++{ \ ++ struct kbase_device *kbdev;\ ++ kbdev = (struct kbase_device *)data;\ ++ *val = kbdev->hw_quirks_##type;\ ++ return 0;\ ++} \ ++DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\ ++ type##_quirks_set, "%llu\n") + -+#if defined(CONFIG_SYNC_FILE) -+/** -+ * kbase_fence_in_get() - Retrieve input fence for atom. -+ * @katom: Atom to get input fence from -+ * -+ * A ref will be taken for the fence, so use @kbase_fence_put() to release it -+ * -+ * Return: The fence, or NULL if there is no input fence for atom -+ */ -+#define kbase_fence_in_get(katom) dma_fence_get((katom)->dma_fence.fence_in) -+#endif ++MAKE_QUIRK_ACCESSORS(sc); ++MAKE_QUIRK_ACCESSORS(tiler); ++MAKE_QUIRK_ACCESSORS(mmu); ++MAKE_QUIRK_ACCESSORS(jm); ++ ++#endif /* KBASE_GPU_RESET_EN */ + +/** -+ * kbase_fence_out_get() - Retrieve output fence for atom. -+ * @katom: Atom to get output fence from ++ * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read ++ * @file: File object to read is for ++ * @buf: User buffer to populate with data ++ * @len: Length of user buffer ++ * @ppos: Offset within file object + * -+ * A ref will be taken for the fence, so use @kbase_fence_put() to release it ++ * Retrieves the current status of protected debug mode ++ * (0 = disabled, 1 = enabled) + * -+ * Return: The fence, or NULL if there is no output fence for atom ++ * Return: Number of bytes added to user buffer + */ -+#define kbase_fence_out_get(katom) dma_fence_get((katom)->dma_fence.fence) ++static ssize_t debugfs_protected_debug_mode_read(struct file *file, ++ char __user *buf, size_t len, loff_t *ppos) ++{ ++ struct kbase_device *kbdev = (struct kbase_device *)file->private_data; ++ u32 gpu_status; ++ ssize_t ret_val; + -+/** -+ * kbase_fence_put() - Releases a reference to a fence -+ * @fence: Fence to release reference for. -+ */ -+#define kbase_fence_put(fence) dma_fence_put(fence) ++ kbase_pm_context_active(kbdev); ++ gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL); ++ kbase_pm_context_idle(kbdev); + ++ if (gpu_status & GPU_DBGEN) ++ ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2); ++ else ++ ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2); + -+#endif /* CONFIG_MALI_DMA_FENCE || defined(CONFIG_SYNC_FILE */ ++ return ret_val; ++} + -+#endif /* _KBASE_FENCE_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h b/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h -new file mode 100644 -index 000000000..fa2c6dfe9 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h -@@ -0,0 +1,51 @@ +/* ++ * struct fops_protected_debug_mode - "protected_debug_mode" debugfs fops + * -+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ * Contains the file operations for the "protected_debug_mode" debugfs file + */ ++static const struct file_operations fops_protected_debug_mode = { ++ .open = simple_open, ++ .read = debugfs_protected_debug_mode_read, ++ .llseek = default_llseek, ++}; + ++static int kbase_device_debugfs_init(struct kbase_device *kbdev) ++{ ++ struct dentry *debugfs_ctx_defaults_directory; ++ int err; + ++ kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname, ++ NULL); ++ if (!kbdev->mali_debugfs_directory) { ++ dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n"); ++ err = -ENOMEM; ++ goto out; ++ } + -+#ifndef _KBASE_FENCE_DEFS_H_ -+#define _KBASE_FENCE_DEFS_H_ ++ kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx", ++ kbdev->mali_debugfs_directory); ++ if (!kbdev->debugfs_ctx_directory) { ++ dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n"); ++ err = -ENOMEM; ++ goto out; ++ } + -+/* -+ * There was a big rename in the 4.10 kernel (fence* -> dma_fence*) -+ * This file hides the compatibility issues with this for the rest the driver -+ */ ++ debugfs_ctx_defaults_directory = debugfs_create_dir("defaults", ++ kbdev->debugfs_ctx_directory); ++ if (!debugfs_ctx_defaults_directory) { ++ dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n"); ++ err = -ENOMEM; ++ goto out; ++ } + -+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) ++#if !MALI_CUSTOMER_RELEASE ++ kbasep_regs_dump_debugfs_init(kbdev); ++#endif /* !MALI_CUSTOMER_RELEASE */ ++ kbasep_regs_history_debugfs_init(kbdev); + -+#include ++ kbase_debug_job_fault_debugfs_init(kbdev); ++ kbasep_gpu_memory_debugfs_init(kbdev); ++ kbase_as_fault_debugfs_init(kbdev); ++#if KBASE_GPU_RESET_EN ++ /* fops_* variables created by invocations of macro ++ * MAKE_QUIRK_ACCESSORS() above. */ ++ debugfs_create_file("quirks_sc", 0644, ++ kbdev->mali_debugfs_directory, kbdev, ++ &fops_sc_quirks); ++ debugfs_create_file("quirks_tiler", 0644, ++ kbdev->mali_debugfs_directory, kbdev, ++ &fops_tiler_quirks); ++ debugfs_create_file("quirks_mmu", 0644, ++ kbdev->mali_debugfs_directory, kbdev, ++ &fops_mmu_quirks); ++ debugfs_create_file("quirks_jm", 0644, ++ kbdev->mali_debugfs_directory, kbdev, ++ &fops_jm_quirks); ++#endif /* KBASE_GPU_RESET_EN */ + -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#ifndef CONFIG_MALI_COH_USER ++ debugfs_create_bool("infinite_cache", 0644, ++ debugfs_ctx_defaults_directory, ++ (bool*)&(kbdev->infinite_cache_active_default)); ++#endif /* CONFIG_MALI_COH_USER */ + -+#include ++ debugfs_create_size_t("mem_pool_max_size", 0644, ++ debugfs_ctx_defaults_directory, ++ &kbdev->mem_pool_max_size_default); + -+#define dma_fence_context_alloc(a) fence_context_alloc(a) -+#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e) -+#define dma_fence_get(a) fence_get(a) -+#define dma_fence_put(a) fence_put(a) -+#define dma_fence_signal(a) fence_signal(a) -+#define dma_fence_is_signaled(a) fence_is_signaled(a) -+#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c) -+#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b) ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { ++ debugfs_create_file("protected_debug_mode", S_IRUGO, ++ kbdev->mali_debugfs_directory, kbdev, ++ &fops_protected_debug_mode); ++ } + -+#else ++#if KBASE_TRACE_ENABLE ++ kbasep_trace_debugfs_init(kbdev); ++#endif /* KBASE_TRACE_ENABLE */ + -+#include ++#ifdef CONFIG_MALI_TRACE_TIMELINE ++ kbasep_trace_timeline_debugfs_init(kbdev); ++#endif /* CONFIG_MALI_TRACE_TIMELINE */ + -+#endif /* < 4.10.0 */ ++#ifdef CONFIG_MALI_DEVFREQ ++#ifdef CONFIG_DEVFREQ_THERMAL ++ if ((kbdev->inited_subsys & inited_devfreq) && kbdev->devfreq_cooling) ++ kbase_ipa_debugfs_init(kbdev); ++#endif /* CONFIG_DEVFREQ_THERMAL */ ++#endif /* CONFIG_MALI_DEVFREQ */ + -+#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE */ ++#ifdef CONFIG_DEBUG_FS ++ debugfs_create_file("serialize_jobs", S_IRUGO | S_IWUSR, ++ kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_serialize_jobs_debugfs_fops); ++#endif /* CONFIG_DEBUG_FS */ + -+#endif /* _KBASE_FENCE_DEFS_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator.h b/drivers/gpu/arm/midgard/mali_kbase_gator.h -new file mode 100644 -index 000000000..ce65b5562 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_gator.h -@@ -0,0 +1,45 @@ -+/* -+ * -+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ return 0; + ++out: ++ debugfs_remove_recursive(kbdev->mali_debugfs_directory); ++ return err; ++} + ++static void kbase_device_debugfs_term(struct kbase_device *kbdev) ++{ ++ debugfs_remove_recursive(kbdev->mali_debugfs_directory); ++} + -+/* NB taken from gator */ -+/* -+ * List of possible actions to be controlled by DS-5 Streamline. -+ * The following numbers are used by gator to control the frame buffer dumping -+ * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because -+ * they are unknown inside gator. -+ */ -+#ifndef _KBASE_GATOR_H_ -+#define _KBASE_GATOR_H_ ++#else /* CONFIG_DEBUG_FS */ ++static inline int kbase_device_debugfs_init(struct kbase_device *kbdev) ++{ ++ return 0; ++} + -+#ifdef CONFIG_MALI_GATOR_SUPPORT -+#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16)) -+#define GATOR_JOB_SLOT_START 1 -+#define GATOR_JOB_SLOT_STOP 2 -+#define GATOR_JOB_SLOT_SOFT_STOPPED 3 ++static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { } ++#endif /* CONFIG_DEBUG_FS */ + -+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id); -+void kbase_trace_mali_pm_status(u32 event, u64 value); -+void kbase_trace_mali_pm_power_off(u32 event, u64 value); -+void kbase_trace_mali_pm_power_on(u32 event, u64 value); -+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value); -+void kbase_trace_mali_mmu_as_in_use(int event); -+void kbase_trace_mali_mmu_as_released(int event); -+void kbase_trace_mali_total_alloc_pages_change(long long int event); ++static void kbase_device_coherency_init(struct kbase_device *kbdev, ++ unsigned prod_id) ++{ ++#ifdef CONFIG_OF ++ u32 supported_coherency_bitmap = ++ kbdev->gpu_props.props.raw_props.coherency_mode; ++ const void *coherency_override_dts; ++ u32 override_coherency; + -+#endif /* CONFIG_MALI_GATOR_SUPPORT */ ++ /* Only for tMIx : ++ * (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly ++ * documented for tMIx so force correct value here. ++ */ ++ if (GPU_ID_IS_NEW_FORMAT(prod_id) && ++ (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == ++ GPU_ID2_PRODUCT_TMIX)) ++ if (supported_coherency_bitmap == ++ COHERENCY_FEATURE_BIT(COHERENCY_ACE)) ++ supported_coherency_bitmap |= ++ COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); + -+#endif /* _KBASE_GATOR_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c -new file mode 100644 -index 000000000..860e10159 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c -@@ -0,0 +1,334 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++#endif /* CONFIG_OF */ + ++ kbdev->system_coherency = COHERENCY_NONE; + ++ /* device tree may override the coherency */ ++#ifdef CONFIG_OF ++ coherency_override_dts = of_get_property(kbdev->dev->of_node, ++ "system-coherency", ++ NULL); ++ if (coherency_override_dts) { + -+#include "mali_kbase.h" -+#include "mali_kbase_hw.h" -+#include "mali_kbase_mem_linux.h" -+#include "mali_kbase_gator_api.h" -+#include "mali_kbase_gator_hwcnt_names.h" ++ override_coherency = be32_to_cpup(coherency_override_dts); + -+#define MALI_MAX_CORES_PER_GROUP 4 -+#define MALI_MAX_NUM_BLOCKS_PER_GROUP 8 -+#define MALI_COUNTERS_PER_BLOCK 64 -+#define MALI_BYTES_PER_COUNTER 4 ++ if ((override_coherency <= COHERENCY_NONE) && ++ (supported_coherency_bitmap & ++ COHERENCY_FEATURE_BIT(override_coherency))) { + -+struct kbase_gator_hwcnt_handles { -+ struct kbase_device *kbdev; -+ struct kbase_vinstr_client *vinstr_cli; -+ void *vinstr_buffer; -+ struct work_struct dump_work; -+ int dump_complete; -+ spinlock_t dump_lock; -+}; ++ kbdev->system_coherency = override_coherency; + -+static void dump_worker(struct work_struct *work); ++ dev_info(kbdev->dev, ++ "Using coherency mode %u set from dtb", ++ override_coherency); ++ } else ++ dev_warn(kbdev->dev, ++ "Ignoring unsupported coherency mode %u set from dtb", ++ override_coherency); ++ } + -+const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters) ++#endif /* CONFIG_OF */ ++ ++ kbdev->gpu_props.props.raw_props.coherency_mode = ++ kbdev->system_coherency; ++} ++ ++#ifdef CONFIG_MALI_FPGA_BUS_LOGGER ++ ++/* Callback used by the kbase bus logger client, to initiate a GPU reset ++ * when the bus log is restarted. GPU reset is used as reference point ++ * in HW bus log analyses. ++ */ ++static void kbase_logging_started_cb(void *data) +{ -+ const char * const *hardware_counters; -+ struct kbase_device *kbdev; -+ uint32_t product_id; -+ uint32_t count; ++ struct kbase_device *kbdev = (struct kbase_device *)data; + -+ if (!total_counters) -+ return NULL; ++ if (kbase_prepare_to_reset_gpu(kbdev)) ++ kbase_reset_gpu(kbdev); ++ dev_info(kbdev->dev, "KBASE - Bus logger restarted\n"); ++} ++#endif ++ ++static struct attribute *kbase_attrs[] = { ++#ifdef CONFIG_MALI_DEBUG ++ &dev_attr_debug_command.attr, ++ &dev_attr_js_softstop_always.attr, ++#endif ++#if !MALI_CUSTOMER_RELEASE ++ &dev_attr_force_replay.attr, ++#endif ++ &dev_attr_js_timeouts.attr, ++ &dev_attr_soft_job_timeout.attr, ++ &dev_attr_gpuinfo.attr, ++ &dev_attr_dvfs_period.attr, ++ &dev_attr_pm_poweroff.attr, ++ &dev_attr_reset_timeout.attr, ++ &dev_attr_js_scheduling_period.attr, ++ &dev_attr_power_policy.attr, ++ &dev_attr_core_availability_policy.attr, ++ &dev_attr_core_mask.attr, ++ &dev_attr_mem_pool_size.attr, ++ &dev_attr_mem_pool_max_size.attr, ++ NULL ++}; ++ ++static const struct attribute_group kbase_attr_group = { ++ .attrs = kbase_attrs, ++}; ++ ++static int kbase_platform_device_remove(struct platform_device *pdev) ++{ ++ struct kbase_device *kbdev = to_kbase_device(&pdev->dev); ++ const struct list_head *dev_list; + -+ /* Get the first device - it doesn't matter in this case */ -+ kbdev = kbase_find_device(-1); + if (!kbdev) -+ return NULL; ++ return -ENODEV; + -+ product_id = kbdev->gpu_props.props.core_props.product_id; ++ kfree(kbdev->gpu_props.prop_buffer); + -+ if (GPU_ID_IS_NEW_FORMAT(product_id)) { -+ switch (GPU_ID2_MODEL_MATCH_VALUE(product_id)) { -+ case GPU_ID2_PRODUCT_TMIX: -+ hardware_counters = hardware_counters_mali_tMIx; -+ count = ARRAY_SIZE(hardware_counters_mali_tMIx); -+ break; -+ case GPU_ID2_PRODUCT_THEX: -+ hardware_counters = hardware_counters_mali_tHEx; -+ count = ARRAY_SIZE(hardware_counters_mali_tHEx); -+ break; -+ case GPU_ID2_PRODUCT_TSIX: -+ hardware_counters = hardware_counters_mali_tSIx; -+ count = ARRAY_SIZE(hardware_counters_mali_tSIx); -+ break; -+ default: -+ hardware_counters = NULL; -+ count = 0; -+ dev_err(kbdev->dev, "Unrecognized product ID: %u\n", -+ product_id); -+ break; -+ } -+ } else { -+ switch (product_id) { -+ /* If we are using a Mali-T60x device */ -+ case GPU_ID_PI_T60X: -+ hardware_counters = hardware_counters_mali_t60x; -+ count = ARRAY_SIZE(hardware_counters_mali_t60x); -+ break; -+ /* If we are using a Mali-T62x device */ -+ case GPU_ID_PI_T62X: -+ hardware_counters = hardware_counters_mali_t62x; -+ count = ARRAY_SIZE(hardware_counters_mali_t62x); -+ break; -+ /* If we are using a Mali-T72x device */ -+ case GPU_ID_PI_T72X: -+ hardware_counters = hardware_counters_mali_t72x; -+ count = ARRAY_SIZE(hardware_counters_mali_t72x); -+ break; -+ /* If we are using a Mali-T76x device */ -+ case GPU_ID_PI_T76X: -+ hardware_counters = hardware_counters_mali_t76x; -+ count = ARRAY_SIZE(hardware_counters_mali_t76x); -+ break; -+ /* If we are using a Mali-T82x device */ -+ case GPU_ID_PI_T82X: -+ hardware_counters = hardware_counters_mali_t82x; -+ count = ARRAY_SIZE(hardware_counters_mali_t82x); -+ break; -+ /* If we are using a Mali-T83x device */ -+ case GPU_ID_PI_T83X: -+ hardware_counters = hardware_counters_mali_t83x; -+ count = ARRAY_SIZE(hardware_counters_mali_t83x); -+ break; -+ /* If we are using a Mali-T86x device */ -+ case GPU_ID_PI_T86X: -+ hardware_counters = hardware_counters_mali_t86x; -+ count = ARRAY_SIZE(hardware_counters_mali_t86x); -+ break; -+ /* If we are using a Mali-T88x device */ -+ case GPU_ID_PI_TFRX: -+ hardware_counters = hardware_counters_mali_t88x; -+ count = ARRAY_SIZE(hardware_counters_mali_t88x); -+ break; -+ default: -+ hardware_counters = NULL; -+ count = 0; -+ dev_err(kbdev->dev, "Unrecognized product ID: %u\n", -+ product_id); -+ break; -+ } ++#ifdef CONFIG_MALI_FPGA_BUS_LOGGER ++ if (kbdev->inited_subsys & inited_buslogger) { ++ bl_core_client_unregister(kbdev->buslogger); ++ kbdev->inited_subsys &= ~inited_buslogger; + } ++#endif + -+ /* Release the kbdev reference. */ -+ kbase_release_device(kbdev); + -+ *total_counters = count; ++ if (kbdev->inited_subsys & inited_dev_list) { ++ dev_list = kbase_dev_list_get(); ++ list_del(&kbdev->entry); ++ kbase_dev_list_put(dev_list); ++ kbdev->inited_subsys &= ~inited_dev_list; ++ } + -+ /* If we return a string array take a reference on the module (or fail). */ -+ if (hardware_counters && !try_module_get(THIS_MODULE)) -+ return NULL; ++ if (kbdev->inited_subsys & inited_misc_register) { ++ misc_deregister(&kbdev->mdev); ++ kbdev->inited_subsys &= ~inited_misc_register; ++ } + -+ return hardware_counters; -+} -+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names); ++ if (kbdev->inited_subsys & inited_sysfs_group) { ++ sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); ++ kbdev->inited_subsys &= ~inited_sysfs_group; ++ } + -+void kbase_gator_hwcnt_term_names(void) -+{ -+ /* Release the module reference. */ -+ module_put(THIS_MODULE); -+} -+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names); ++ if (kbdev->inited_subsys & inited_get_device) { ++ put_device(kbdev->dev); ++ kbdev->inited_subsys &= ~inited_get_device; ++ } + -+struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info) -+{ -+ struct kbase_gator_hwcnt_handles *hand; -+ struct kbase_uk_hwcnt_reader_setup setup; -+ uint32_t dump_size = 0, i = 0; ++ if (kbdev->inited_subsys & inited_debugfs) { ++ kbase_device_debugfs_term(kbdev); ++ kbdev->inited_subsys &= ~inited_debugfs; ++ } + -+ if (!in_out_info) -+ return NULL; ++ if (kbdev->inited_subsys & inited_job_fault) { ++ kbase_debug_job_fault_dev_term(kbdev); ++ kbdev->inited_subsys &= ~inited_job_fault; ++ } ++ if (kbdev->inited_subsys & inited_vinstr) { ++ kbase_vinstr_term(kbdev->vinstr_ctx); ++ kbdev->inited_subsys &= ~inited_vinstr; ++ } + -+ hand = kzalloc(sizeof(*hand), GFP_KERNEL); -+ if (!hand) -+ return NULL; ++#ifdef CONFIG_MALI_DEVFREQ ++ if (kbdev->inited_subsys & inited_devfreq) { ++ kbase_devfreq_term(kbdev); ++ kbdev->inited_subsys &= ~inited_devfreq; ++ } ++#endif + -+ INIT_WORK(&hand->dump_work, dump_worker); -+ spin_lock_init(&hand->dump_lock); ++ if (kbdev->inited_subsys & inited_backend_late) { ++ kbase_backend_late_term(kbdev); ++ kbdev->inited_subsys &= ~inited_backend_late; ++ } + -+ /* Get the first device */ -+ hand->kbdev = kbase_find_device(-1); -+ if (!hand->kbdev) -+ goto free_hand; ++ if (kbdev->inited_subsys & inited_tlstream) { ++ kbase_tlstream_term(); ++ kbdev->inited_subsys &= ~inited_tlstream; ++ } + -+ dump_size = kbase_vinstr_dump_size(hand->kbdev); -+ hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL); -+ if (!hand->vinstr_buffer) -+ goto release_device; -+ in_out_info->kernel_dump_buffer = hand->vinstr_buffer; ++ /* Bring job and mem sys to a halt before we continue termination */ + -+ in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores; -+ in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups; -+ in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id; ++ if (kbdev->inited_subsys & inited_js) ++ kbasep_js_devdata_halt(kbdev); + -+ /* If we are using a v4 device (Mali-T6xx or Mali-T72x) */ -+ if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) { -+ uint32_t cg, j; -+ uint64_t core_mask; ++ if (kbdev->inited_subsys & inited_mem) ++ kbase_mem_halt(kbdev); + -+ /* There are 8 hardware counters blocks per core group */ -+ in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * -+ MALI_MAX_NUM_BLOCKS_PER_GROUP * -+ in_out_info->nr_core_groups, GFP_KERNEL); ++ if (kbdev->inited_subsys & inited_protected) { ++ kbasep_protected_mode_term(kbdev); ++ kbdev->inited_subsys &= ~inited_protected; ++ } + -+ if (!in_out_info->hwc_layout) -+ goto free_vinstr_buffer; ++ if (kbdev->inited_subsys & inited_js) { ++ kbasep_js_devdata_term(kbdev); ++ kbdev->inited_subsys &= ~inited_js; ++ } + -+ dump_size = in_out_info->nr_core_groups * -+ MALI_MAX_NUM_BLOCKS_PER_GROUP * -+ MALI_COUNTERS_PER_BLOCK * -+ MALI_BYTES_PER_COUNTER; ++ if (kbdev->inited_subsys & inited_mem) { ++ kbase_mem_term(kbdev); ++ kbdev->inited_subsys &= ~inited_mem; ++ } + -+ for (cg = 0; cg < in_out_info->nr_core_groups; cg++) { -+ core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask; ++ if (kbdev->inited_subsys & inited_pm_runtime_init) { ++ kbdev->pm.callback_power_runtime_term(kbdev); ++ kbdev->inited_subsys &= ~inited_pm_runtime_init; ++ } + -+ for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) { -+ if (core_mask & (1u << j)) -+ in_out_info->hwc_layout[i++] = SHADER_BLOCK; -+ else -+ in_out_info->hwc_layout[i++] = RESERVED_BLOCK; -+ } ++ if (kbdev->inited_subsys & inited_ctx_sched) { ++ kbase_ctx_sched_term(kbdev); ++ kbdev->inited_subsys &= ~inited_ctx_sched; ++ } + -+ in_out_info->hwc_layout[i++] = TILER_BLOCK; -+ in_out_info->hwc_layout[i++] = MMU_L2_BLOCK; ++ if (kbdev->inited_subsys & inited_device) { ++ kbase_device_term(kbdev); ++ kbdev->inited_subsys &= ~inited_device; ++ } + -+ in_out_info->hwc_layout[i++] = RESERVED_BLOCK; ++ if (kbdev->inited_subsys & inited_backend_early) { ++ kbase_backend_early_term(kbdev); ++ kbdev->inited_subsys &= ~inited_backend_early; ++ } + -+ if (0 == cg) -+ in_out_info->hwc_layout[i++] = JM_BLOCK; -+ else -+ in_out_info->hwc_layout[i++] = RESERVED_BLOCK; -+ } -+ /* If we are using any other device */ -+ } else { -+ uint32_t nr_l2, nr_sc_bits, j; -+ uint64_t core_mask; ++ if (kbdev->inited_subsys & inited_io_history) { ++ kbase_io_history_term(&kbdev->io_history); ++ kbdev->inited_subsys &= ~inited_io_history; ++ } + -+ nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices; ++ if (kbdev->inited_subsys & inited_power_control) { ++ power_control_term(kbdev); ++ kbdev->inited_subsys &= ~inited_power_control; ++ } + -+ core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask; ++ if (kbdev->inited_subsys & inited_registers_map) { ++ registers_unmap(kbdev); ++ kbdev->inited_subsys &= ~inited_registers_map; ++ } + -+ nr_sc_bits = fls64(core_mask); ++#ifdef CONFIG_MALI_NO_MALI ++ if (kbdev->inited_subsys & inited_gpu_device) { ++ gpu_device_destroy(kbdev); ++ kbdev->inited_subsys &= ~inited_gpu_device; ++ } ++#endif /* CONFIG_MALI_NO_MALI */ + -+ /* The job manager and tiler sets of counters -+ * are always present */ -+ in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL); ++ if (kbdev->inited_subsys != 0) ++ dev_err(kbdev->dev, "Missing sub system termination\n"); + -+ if (!in_out_info->hwc_layout) -+ goto free_vinstr_buffer; ++ kbase_device_free(kbdev); + -+ dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; ++ return 0; ++} + -+ in_out_info->hwc_layout[i++] = JM_BLOCK; -+ in_out_info->hwc_layout[i++] = TILER_BLOCK; ++extern void kbase_platform_rk_shutdown(struct kbase_device *kbdev); ++static void kbase_platform_device_shutdown(struct platform_device *pdev) ++{ ++ struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + -+ for (j = 0; j < nr_l2; j++) -+ in_out_info->hwc_layout[i++] = MMU_L2_BLOCK; ++ kbase_platform_rk_shutdown(kbdev); ++} + -+ while (core_mask != 0ull) { -+ if ((core_mask & 1ull) != 0ull) -+ in_out_info->hwc_layout[i++] = SHADER_BLOCK; -+ else -+ in_out_info->hwc_layout[i++] = RESERVED_BLOCK; -+ core_mask >>= 1; -+ } ++/* Number of register accesses for the buffer that we allocate during ++ * initialization time. The buffer size can be changed later via debugfs. */ ++#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512) ++ ++static int kbase_platform_device_probe(struct platform_device *pdev) ++{ ++ struct kbase_device *kbdev; ++ struct mali_base_gpu_core_props *core_props; ++ u32 gpu_id; ++ unsigned prod_id; ++ const struct list_head *dev_list; ++ int err = 0; ++ ++#ifdef CONFIG_OF ++ err = kbase_platform_early_init(); ++ if (err) { ++ dev_err(&pdev->dev, "Early platform initialization failed\n"); ++ kbase_platform_device_remove(pdev); ++ return err; ++ } ++#endif ++ kbdev = kbase_device_alloc(); ++ if (!kbdev) { ++ dev_err(&pdev->dev, "Allocate device failed\n"); ++ kbase_platform_device_remove(pdev); ++ return -ENOMEM; + } + -+ in_out_info->nr_hwc_blocks = i; -+ in_out_info->size = dump_size; ++ kbdev->dev = &pdev->dev; ++ dev_set_drvdata(kbdev->dev, kbdev); + -+ setup.jm_bm = in_out_info->bitmask[0]; -+ setup.tiler_bm = in_out_info->bitmask[1]; -+ setup.shader_bm = in_out_info->bitmask[2]; -+ setup.mmu_l2_bm = in_out_info->bitmask[3]; -+ hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx, -+ &setup, hand->vinstr_buffer); -+ if (!hand->vinstr_cli) { -+ dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core"); -+ goto free_layout; ++#ifdef CONFIG_MALI_NO_MALI ++ err = gpu_device_create(kbdev); ++ if (err) { ++ dev_err(&pdev->dev, "Dummy model initialization failed\n"); ++ kbase_platform_device_remove(pdev); ++ return err; + } ++ kbdev->inited_subsys |= inited_gpu_device; ++#endif /* CONFIG_MALI_NO_MALI */ + -+ return hand; ++ err = assign_irqs(pdev); ++ if (err) { ++ dev_err(&pdev->dev, "IRQ search failed\n"); ++ kbase_platform_device_remove(pdev); ++ return err; ++ } + -+free_layout: -+ kfree(in_out_info->hwc_layout); ++ err = registers_map(kbdev); ++ if (err) { ++ dev_err(&pdev->dev, "Register map failed\n"); ++ kbase_platform_device_remove(pdev); ++ return err; ++ } ++ kbdev->inited_subsys |= inited_registers_map; + -+free_vinstr_buffer: -+ kfree(hand->vinstr_buffer); ++ err = power_control_init(pdev); ++ if (err) { ++ dev_err(&pdev->dev, "Power control initialization failed\n"); ++ kbase_platform_device_remove(pdev); ++ return err; ++ } ++ kbdev->inited_subsys |= inited_power_control; + -+release_device: -+ kbase_release_device(hand->kbdev); ++ err = kbase_io_history_init(&kbdev->io_history, ++ KBASEP_DEFAULT_REGISTER_HISTORY_SIZE); ++ if (err) { ++ dev_err(&pdev->dev, "Register access history initialization failed\n"); ++ kbase_platform_device_remove(pdev); ++ return -ENOMEM; ++ } ++ kbdev->inited_subsys |= inited_io_history; + -+free_hand: -+ kfree(hand); -+ return NULL; -+} -+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init); ++ err = kbase_backend_early_init(kbdev); ++ if (err) { ++ dev_err(kbdev->dev, "Early backend initialization failed\n"); ++ kbase_platform_device_remove(pdev); ++ return err; ++ } ++ kbdev->inited_subsys |= inited_backend_early; + -+void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles) -+{ -+ if (in_out_info) -+ kfree(in_out_info->hwc_layout); ++ scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, ++ kbase_dev_nr); + -+ if (opaque_handles) { -+ cancel_work_sync(&opaque_handles->dump_work); -+ kbase_vinstr_detach_client(opaque_handles->vinstr_cli); -+ kfree(opaque_handles->vinstr_buffer); -+ kbase_release_device(opaque_handles->kbdev); -+ kfree(opaque_handles); ++ kbase_disjoint_init(kbdev); ++ ++ /* obtain min/max configured gpu frequencies */ ++ core_props = &(kbdev->gpu_props.props.core_props); ++ core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN; ++ core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; ++ ++ err = kbase_device_init(kbdev); ++ if (err) { ++ dev_err(kbdev->dev, "Device initialization failed (%d)\n", err); ++ kbase_platform_device_remove(pdev); ++ return err; + } -+} -+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term); ++ kbdev->inited_subsys |= inited_device; + -+static void dump_worker(struct work_struct *work) -+{ -+ struct kbase_gator_hwcnt_handles *hand; ++ err = kbase_ctx_sched_init(kbdev); ++ if (err) { ++ dev_err(kbdev->dev, "Context scheduler initialization failed (%d)\n", ++ err); ++ kbase_platform_device_remove(pdev); ++ return err; ++ } ++ kbdev->inited_subsys |= inited_ctx_sched; + -+ hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work); -+ if (!kbase_vinstr_hwc_dump(hand->vinstr_cli, -+ BASE_HWCNT_READER_EVENT_MANUAL)) { -+ spin_lock_bh(&hand->dump_lock); -+ hand->dump_complete = 1; -+ spin_unlock_bh(&hand->dump_lock); -+ } else { -+ schedule_work(&hand->dump_work); ++ if (kbdev->pm.callback_power_runtime_init) { ++ err = kbdev->pm.callback_power_runtime_init(kbdev); ++ if (err) { ++ dev_err(kbdev->dev, ++ "Runtime PM initialization failed\n"); ++ kbase_platform_device_remove(pdev); ++ return err; ++ } ++ kbdev->inited_subsys |= inited_pm_runtime_init; + } -+} + -+uint32_t kbase_gator_instr_hwcnt_dump_complete( -+ struct kbase_gator_hwcnt_handles *opaque_handles, -+ uint32_t * const success) -+{ ++ err = kbase_mem_init(kbdev); ++ if (err) { ++ dev_err(kbdev->dev, "Memory subsystem initialization failed\n"); ++ kbase_platform_device_remove(pdev); ++ return err; ++ } ++ kbdev->inited_subsys |= inited_mem; + -+ if (opaque_handles && success) { -+ *success = opaque_handles->dump_complete; -+ opaque_handles->dump_complete = 0; -+ return *success; ++ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; ++ gpu_id &= GPU_ID_VERSION_PRODUCT_ID; ++ prod_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++ ++ kbase_device_coherency_init(kbdev, prod_id); ++ ++ err = kbasep_protected_mode_init(kbdev); ++ if (err) { ++ dev_err(kbdev->dev, "Protected mode subsystem initialization failed\n"); ++ kbase_platform_device_remove(pdev); ++ return err; + } -+ return 0; -+} -+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete); ++ kbdev->inited_subsys |= inited_protected; + -+uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles) -+{ -+ if (opaque_handles) -+ schedule_work(&opaque_handles->dump_work); -+ return 0; -+} -+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq); -diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.h b/drivers/gpu/arm/midgard/mali_kbase_gator_api.h -new file mode 100644 -index 000000000..ef9ac0f7b ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.h -@@ -0,0 +1,219 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ dev_list = kbase_dev_list_get(); ++ list_add(&kbdev->entry, &kbase_dev_list); ++ kbase_dev_list_put(dev_list); ++ kbdev->inited_subsys |= inited_dev_list; + ++ err = kbasep_js_devdata_init(kbdev); ++ if (err) { ++ dev_err(kbdev->dev, "Job JS devdata initialization failed\n"); ++ kbase_platform_device_remove(pdev); ++ return err; ++ } ++ kbdev->inited_subsys |= inited_js; + ++ err = kbase_tlstream_init(); ++ if (err) { ++ dev_err(kbdev->dev, "Timeline stream initialization failed\n"); ++ kbase_platform_device_remove(pdev); ++ return err; ++ } ++ kbdev->inited_subsys |= inited_tlstream; + -+#ifndef _KBASE_GATOR_API_H_ -+#define _KBASE_GATOR_API_H_ ++ err = kbase_backend_late_init(kbdev); ++ if (err) { ++ dev_err(kbdev->dev, "Late backend initialization failed\n"); ++ kbase_platform_device_remove(pdev); ++ return err; ++ } ++ kbdev->inited_subsys |= inited_backend_late; + -+/** -+ * @brief This file describes the API used by Gator to fetch hardware counters. -+ */ ++#ifdef CONFIG_MALI_DEVFREQ ++ err = kbase_devfreq_init(kbdev); ++ if (!err) ++ kbdev->inited_subsys |= inited_devfreq; ++ else ++ dev_err(kbdev->dev, "Continuing without devfreq\n"); ++#endif /* CONFIG_MALI_DEVFREQ */ + -+/* This define is used by the gator kernel module compile to select which DDK -+ * API calling convention to use. If not defined (legacy DDK) gator assumes -+ * version 1. The version to DDK release mapping is: -+ * Version 1 API: DDK versions r1px, r2px -+ * Version 2 API: DDK versions r3px, r4px -+ * Version 3 API: DDK version r5p0 and newer -+ * -+ * API Usage -+ * ========= -+ * -+ * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter -+ * names for the GPU present in this device. -+ * -+ * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for -+ * the counters you want enabled. The enables can all be set for simplicity in -+ * most use cases, but disabling some will let you minimize bandwidth impact. -+ * -+ * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a -+ * counter context. On successful return the DDK will have populated the -+ * structure with a variety of useful information. -+ * -+ * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a -+ * counter dump. If this returns a non-zero value the request has been queued, -+ * otherwise the driver has been unable to do so (typically because of another -+ * user of the instrumentation exists concurrently). -+ * -+ * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the previously -+ * requested dump has been succesful. If this returns non-zero the counter dump -+ * has resolved, but the value of *success must also be tested as the dump -+ * may have not been successful. If it returns zero the counter dump was -+ * abandoned due to the device being busy (typically because of another -+ * user of the instrumentation exists concurrently). -+ * -+ * 6] Process the counters stored in the buffer pointed to by ... -+ * -+ * kbase_gator_hwcnt_info->kernel_dump_buffer -+ * -+ * In pseudo code you can find all of the counters via this approach: -+ * -+ * -+ * hwcnt_info # pointer to kbase_gator_hwcnt_info structure -+ * hwcnt_name # pointer to name list -+ * -+ * u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer -+ * -+ * # Iterate over each 64-counter block in this GPU configuration -+ * for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) { -+ * hwc_type type = hwcnt_info->hwc_layout[i]; -+ * -+ * # Skip reserved type blocks - they contain no counters at all -+ * if( type == RESERVED_BLOCK ) { -+ * continue; -+ * } -+ * -+ * size_t name_offset = type * 64; -+ * size_t data_offset = i * 64; -+ * -+ * # Iterate over the names of the counters in this block type -+ * for( j = 0; j < 64; j++) { -+ * const char * name = hwcnt_name[name_offset+j]; -+ * -+ * # Skip empty name strings - there is no counter here -+ * if( name[0] == '\0' ) { -+ * continue; -+ * } -+ * -+ * u32 data = hwcnt_data[data_offset+j]; -+ * -+ * printk( "COUNTER: %s DATA: %u\n", name, data ); -+ * } -+ * } -+ * -+ * -+ * Note that in most implementations you typically want to either SUM or -+ * AVERAGE multiple instances of the same counter if, for example, you have -+ * multiple shader cores or multiple L2 caches. The most sensible view for -+ * analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU -+ * counters. -+ * -+ * 7] Goto 4, repeating until you want to stop collecting counters. -+ * -+ * 8] Release the dump resources by calling kbase_gator_hwcnt_term(). -+ * -+ * 9] Release the name table resources by calling -+ * kbase_gator_hwcnt_term_names(). This function must only be called if -+ * init_names() returned a non-NULL value. -+ **/ ++ kbdev->vinstr_ctx = kbase_vinstr_init(kbdev); ++ if (!kbdev->vinstr_ctx) { ++ dev_err(kbdev->dev, ++ "Virtual instrumentation initialization failed\n"); ++ kbase_platform_device_remove(pdev); ++ return -EINVAL; ++ } ++ kbdev->inited_subsys |= inited_vinstr; + -+#define MALI_DDK_GATOR_API_VERSION 3 ++ err = kbase_debug_job_fault_dev_init(kbdev); ++ if (err) { ++ dev_err(kbdev->dev, "Job fault debug initialization failed\n"); ++ kbase_platform_device_remove(pdev); ++ return err; ++ } ++ kbdev->inited_subsys |= inited_job_fault; + -+enum hwc_type { -+ JM_BLOCK = 0, -+ TILER_BLOCK, -+ SHADER_BLOCK, -+ MMU_L2_BLOCK, -+ RESERVED_BLOCK -+}; ++ err = kbase_device_debugfs_init(kbdev); ++ if (err) { ++ dev_err(kbdev->dev, "DebugFS initialization failed"); ++ kbase_platform_device_remove(pdev); ++ return err; ++ } ++ kbdev->inited_subsys |= inited_debugfs; + -+struct kbase_gator_hwcnt_info { -+ /* Passed from Gator to kbase */ ++ /* initialize the kctx list */ ++ mutex_init(&kbdev->kctx_list_lock); ++ INIT_LIST_HEAD(&kbdev->kctx_list); + -+ /* the bitmask of enabled hardware counters for each counter block */ -+ uint16_t bitmask[4]; ++ kbdev->mdev.minor = MISC_DYNAMIC_MINOR; ++ kbdev->mdev.name = kbdev->devname; ++ kbdev->mdev.fops = &kbase_fops; ++ kbdev->mdev.parent = get_device(kbdev->dev); ++ kbdev->inited_subsys |= inited_get_device; + -+ /* Passed from kbase to Gator */ ++ /* This needs to happen before registering the device with misc_register(), ++ * otherwise it causes a race condition between registering the device and a ++ * uevent event being generated for userspace, causing udev rules to run ++ * which might expect certain sysfs attributes present. As a result of the ++ * race condition we avoid, some Mali sysfs entries may have appeared to ++ * udev to not exist. + -+ /* ptr to counter dump memory */ -+ void *kernel_dump_buffer; ++ * For more information, see ++ * https://www.kernel.org/doc/Documentation/driver-model/device.txt, the ++ * paragraph that starts with "Word of warning", currently the second-last ++ * paragraph. ++ */ ++ err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); ++ if (err) { ++ dev_err(&pdev->dev, "SysFS group creation failed\n"); ++ kbase_platform_device_remove(pdev); ++ return err; ++ } ++ kbdev->inited_subsys |= inited_sysfs_group; + -+ /* size of counter dump memory */ -+ uint32_t size; ++ err = misc_register(&kbdev->mdev); ++ if (err) { ++ dev_err(kbdev->dev, "Misc device registration failed for %s\n", ++ kbdev->devname); ++ kbase_platform_device_remove(pdev); ++ return err; ++ } ++ kbdev->inited_subsys |= inited_misc_register; + -+ /* the ID of the Mali device */ -+ uint32_t gpu_id; + -+ /* the number of shader cores in the GPU */ -+ uint32_t nr_cores; ++#ifdef CONFIG_MALI_FPGA_BUS_LOGGER ++ err = bl_core_client_register(kbdev->devname, ++ kbase_logging_started_cb, ++ kbdev, &kbdev->buslogger, ++ THIS_MODULE, NULL); ++ if (err == 0) { ++ kbdev->inited_subsys |= inited_buslogger; ++ bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); ++ } else { ++ dev_warn(kbdev->dev, "Bus log client registration failed\n"); ++ err = 0; ++ } ++#endif + -+ /* the number of core groups */ -+ uint32_t nr_core_groups; ++ err = kbase_gpuprops_populate_user_buffer(kbdev); ++ if (err) { ++ dev_err(&pdev->dev, "GPU property population failed"); ++ kbase_platform_device_remove(pdev); ++ return err; ++ } + -+ /* the memory layout of the performance counters */ -+ enum hwc_type *hwc_layout; ++ dev_info(kbdev->dev, ++ "Probed as %s\n", dev_name(kbdev->mdev.this_device)); + -+ /* the total number of hardware couter blocks */ -+ uint32_t nr_hwc_blocks; -+}; ++ kbase_dev_nr++; + -+/** -+ * @brief Opaque block of Mali data which Gator needs to return to the API later. -+ */ -+struct kbase_gator_hwcnt_handles; ++ return err; ++} ++ ++#undef KBASEP_DEFAULT_REGISTER_HISTORY_SIZE + +/** -+ * @brief Initialize the resources Gator needs for performance profiling. ++ * kbase_device_suspend - Suspend callback from the OS. + * -+ * @param in_out_info A pointer to a structure containing the enabled counters passed from Gator and all the Mali -+ * specific information that will be returned to Gator. On entry Gator must have populated the -+ * 'bitmask' field with the counters it wishes to enable for each class of counter block. -+ * Each entry in the array corresponds to a single counter class based on the "hwc_type" -+ * enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables -+ * the first 4 counters in the block, and so on). See the GPU counter array as returned by -+ * kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU. ++ * This is called by Linux when the device should suspend. + * -+ * @return Pointer to an opaque handle block on success, NULL on error. -+ */ -+extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info); -+ -+/** -+ * @brief Free all resources once Gator has finished using performance counters. ++ * @dev: The device to suspend + * -+ * @param in_out_info A pointer to a structure containing the enabled counters passed from Gator and all the -+ * Mali specific information that will be returned to Gator. -+ * @param opaque_handles A wrapper structure for kbase structures. ++ * Return: A standard Linux error code + */ -+extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles); ++static int kbase_device_suspend(struct device *dev) ++{ ++ struct kbase_device *kbdev = to_kbase_device(dev); ++ ++ if (!kbdev) ++ return -ENODEV; ++ ++#if defined(CONFIG_MALI_DEVFREQ) && \ ++ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) ++ if (kbdev->inited_subsys & inited_devfreq) ++ devfreq_suspend_device(kbdev->devfreq); ++#endif ++ ++ kbase_pm_suspend(kbdev); ++ return 0; ++} + +/** -+ * @brief Poll whether a counter dump is successful. ++ * kbase_device_resume - Resume callback from the OS. + * -+ * @param opaque_handles A wrapper structure for kbase structures. -+ * @param[out] success Non-zero on success, zero on failure. ++ * This is called by Linux when the device should resume from suspension. + * -+ * @return Zero if the dump is still pending, non-zero if the dump has completed. Note that a -+ * completed dump may not have dumped succesfully, so the caller must test for both -+ * a completed and successful dump before processing counters. ++ * @dev: The device to resume ++ * ++ * Return: A standard Linux error code + */ -+extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success); ++static int kbase_device_resume(struct device *dev) ++{ ++ struct kbase_device *kbdev = to_kbase_device(dev); ++ ++ if (!kbdev) ++ return -ENODEV; ++ ++ kbase_pm_resume(kbdev); ++ ++#if defined(CONFIG_MALI_DEVFREQ) && \ ++ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) ++ if (kbdev->inited_subsys & inited_devfreq) ++ devfreq_resume_device(kbdev->devfreq); ++#endif ++ return 0; ++} + +/** -+ * @brief Request the generation of a new counter dump. ++ * kbase_device_runtime_suspend - Runtime suspend callback from the OS. + * -+ * @param opaque_handles A wrapper structure for kbase structures. ++ * This is called by Linux when the device should prepare for a condition in ++ * which it will not be able to communicate with the CPU(s) and RAM due to ++ * power management. + * -+ * @return Zero if the hardware device is busy and cannot handle the request, non-zero otherwise. ++ * @dev: The device to suspend ++ * ++ * Return: A standard Linux error code + */ -+extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles); ++#ifdef KBASE_PM_RUNTIME ++static int kbase_device_runtime_suspend(struct device *dev) ++{ ++ struct kbase_device *kbdev = to_kbase_device(dev); ++ ++ if (!kbdev) ++ return -ENODEV; ++ ++#if defined(CONFIG_MALI_DEVFREQ) && \ ++ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) ++ if (kbdev->inited_subsys & inited_devfreq) ++ devfreq_suspend_device(kbdev->devfreq); ++#endif ++ ++ if (kbdev->pm.backend.callback_power_runtime_off) { ++ kbdev->pm.backend.callback_power_runtime_off(kbdev); ++ dev_dbg(dev, "runtime suspend\n"); ++ } ++ return 0; ++} ++#endif /* KBASE_PM_RUNTIME */ + +/** -+ * @brief This function is used to fetch the names table based on the Mali device in use. ++ * kbase_device_runtime_resume - Runtime resume callback from the OS. + * -+ * @param[out] total_counters The total number of counters short names in the Mali devices' list. ++ * This is called by Linux when the device should go into a fully active state. + * -+ * @return Pointer to an array of strings of length *total_counters. -+ */ -+extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters); -+ -+/** -+ * @brief This function is used to terminate the use of the names table. ++ * @dev: The device to suspend + * -+ * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value. ++ * Return: A standard Linux error code + */ -+extern void kbase_gator_hwcnt_term_names(void); + ++#ifdef KBASE_PM_RUNTIME ++static int kbase_device_runtime_resume(struct device *dev) ++{ ++ int ret = 0; ++ struct kbase_device *kbdev = to_kbase_device(dev); ++ ++ if (!kbdev) ++ return -ENODEV; ++ ++ if (kbdev->pm.backend.callback_power_runtime_on) { ++ ret = kbdev->pm.backend.callback_power_runtime_on(kbdev); ++ dev_dbg(dev, "runtime resume\n"); ++ } ++ ++#if defined(CONFIG_MALI_DEVFREQ) && \ ++ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) ++ if (kbdev->inited_subsys & inited_devfreq) ++ devfreq_resume_device(kbdev->devfreq); +#endif -diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h -new file mode 100644 -index 000000000..cad19b662 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h -@@ -0,0 +1,2170 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ ++ return ret; ++} ++#endif /* KBASE_PM_RUNTIME */ ++ ++ ++#ifdef KBASE_PM_RUNTIME ++/** ++ * kbase_device_runtime_idle - Runtime idle callback from the OS. ++ * @dev: The device to suspend + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * This is called by Linux when the device appears to be inactive and it might ++ * be placed into a low power state. + * ++ * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend, ++ * otherwise a standard Linux error code + */ ++static int kbase_device_runtime_idle(struct device *dev) ++{ ++ struct kbase_device *kbdev = to_kbase_device(dev); + ++ if (!kbdev) ++ return -ENODEV; + ++ /* Use platform specific implementation if it exists. */ ++ if (kbdev->pm.backend.callback_power_runtime_idle) ++ return kbdev->pm.backend.callback_power_runtime_idle(kbdev); + -+#ifndef _KBASE_GATOR_HWCNT_NAMES_H_ -+#define _KBASE_GATOR_HWCNT_NAMES_H_ ++ return 0; ++} ++#endif /* KBASE_PM_RUNTIME */ ++ ++/* The power management operations for the platform driver. ++ */ ++static const struct dev_pm_ops kbase_pm_ops = { ++ .suspend = kbase_device_suspend, ++ .resume = kbase_device_resume, ++#ifdef KBASE_PM_RUNTIME ++ .runtime_suspend = kbase_device_runtime_suspend, ++ .runtime_resume = kbase_device_runtime_resume, ++ .runtime_idle = kbase_device_runtime_idle, ++#endif /* KBASE_PM_RUNTIME */ ++}; ++ ++#ifdef CONFIG_OF ++static const struct of_device_id kbase_dt_ids[] = { ++ { .compatible = "arm,malit7xx" }, ++ { .compatible = "arm,mali-midgard" }, ++ { /* sentinel */ } ++}; ++MODULE_DEVICE_TABLE(of, kbase_dt_ids); ++#endif ++ ++static struct platform_driver kbase_platform_driver = { ++ .probe = kbase_platform_device_probe, ++ .remove = kbase_platform_device_remove, ++ .shutdown = kbase_platform_device_shutdown, ++ .driver = { ++ .name = "midgard", ++ .owner = THIS_MODULE, ++ .pm = &kbase_pm_ops, ++ .of_match_table = of_match_ptr(kbase_dt_ids), ++ }, ++}; + +/* -+ * "Short names" for hardware counters used by Streamline. Counters names are -+ * stored in accordance with their memory layout in the binary counter block -+ * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block -+ * of 64 counters, and each GPU implements the same set of "masters" although -+ * the counters each master exposes within its block of 64 may vary. -+ * -+ * Counters which are an empty string are simply "holes" in the counter memory -+ * where no counter exists. ++ * The driver will not provide a shortcut to create the Mali platform device ++ * anymore when using Device Tree. + */ ++#ifdef CONFIG_OF ++module_platform_driver(kbase_platform_driver); ++#else + -+static const char * const hardware_counters_mali_t60x[] = { -+ /* Job Manager */ -+ "", -+ "", -+ "", -+ "", -+ "T60x_MESSAGES_SENT", -+ "T60x_MESSAGES_RECEIVED", -+ "T60x_GPU_ACTIVE", -+ "T60x_IRQ_ACTIVE", -+ "T60x_JS0_JOBS", -+ "T60x_JS0_TASKS", -+ "T60x_JS0_ACTIVE", -+ "", -+ "T60x_JS0_WAIT_READ", -+ "T60x_JS0_WAIT_ISSUE", -+ "T60x_JS0_WAIT_DEPEND", -+ "T60x_JS0_WAIT_FINISH", -+ "T60x_JS1_JOBS", -+ "T60x_JS1_TASKS", -+ "T60x_JS1_ACTIVE", -+ "", -+ "T60x_JS1_WAIT_READ", -+ "T60x_JS1_WAIT_ISSUE", -+ "T60x_JS1_WAIT_DEPEND", -+ "T60x_JS1_WAIT_FINISH", -+ "T60x_JS2_JOBS", -+ "T60x_JS2_TASKS", -+ "T60x_JS2_ACTIVE", -+ "", -+ "T60x_JS2_WAIT_READ", -+ "T60x_JS2_WAIT_ISSUE", -+ "T60x_JS2_WAIT_DEPEND", -+ "T60x_JS2_WAIT_FINISH", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", ++static int __init rockchip_gpu_init_driver(void) ++{ ++ return platform_driver_register(&kbase_platform_driver); ++} ++late_initcall(rockchip_gpu_init_driver); + -+ /*Tiler */ -+ "", -+ "", -+ "", -+ "T60x_TI_JOBS_PROCESSED", -+ "T60x_TI_TRIANGLES", -+ "T60x_TI_QUADS", -+ "T60x_TI_POLYGONS", -+ "T60x_TI_POINTS", -+ "T60x_TI_LINES", -+ "T60x_TI_VCACHE_HIT", -+ "T60x_TI_VCACHE_MISS", -+ "T60x_TI_FRONT_FACING", -+ "T60x_TI_BACK_FACING", -+ "T60x_TI_PRIM_VISIBLE", -+ "T60x_TI_PRIM_CULLED", -+ "T60x_TI_PRIM_CLIPPED", -+ "T60x_TI_LEVEL0", -+ "T60x_TI_LEVEL1", -+ "T60x_TI_LEVEL2", -+ "T60x_TI_LEVEL3", -+ "T60x_TI_LEVEL4", -+ "T60x_TI_LEVEL5", -+ "T60x_TI_LEVEL6", -+ "T60x_TI_LEVEL7", -+ "T60x_TI_COMMAND_1", -+ "T60x_TI_COMMAND_2", -+ "T60x_TI_COMMAND_3", -+ "T60x_TI_COMMAND_4", -+ "T60x_TI_COMMAND_4_7", -+ "T60x_TI_COMMAND_8_15", -+ "T60x_TI_COMMAND_16_63", -+ "T60x_TI_COMMAND_64", -+ "T60x_TI_COMPRESS_IN", -+ "T60x_TI_COMPRESS_OUT", -+ "T60x_TI_COMPRESS_FLUSH", -+ "T60x_TI_TIMESTAMPS", -+ "T60x_TI_PCACHE_HIT", -+ "T60x_TI_PCACHE_MISS", -+ "T60x_TI_PCACHE_LINE", -+ "T60x_TI_PCACHE_STALL", -+ "T60x_TI_WRBUF_HIT", -+ "T60x_TI_WRBUF_MISS", -+ "T60x_TI_WRBUF_LINE", -+ "T60x_TI_WRBUF_PARTIAL", -+ "T60x_TI_WRBUF_STALL", -+ "T60x_TI_ACTIVE", -+ "T60x_TI_LOADING_DESC", -+ "T60x_TI_INDEX_WAIT", -+ "T60x_TI_INDEX_RANGE_WAIT", -+ "T60x_TI_VERTEX_WAIT", -+ "T60x_TI_PCACHE_WAIT", -+ "T60x_TI_WRBUF_WAIT", -+ "T60x_TI_BUS_READ", -+ "T60x_TI_BUS_WRITE", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "T60x_TI_UTLB_STALL", -+ "T60x_TI_UTLB_REPLAY_MISS", -+ "T60x_TI_UTLB_REPLAY_FULL", -+ "T60x_TI_UTLB_NEW_MISS", -+ "T60x_TI_UTLB_HIT", ++static int __init kbase_driver_init(void) ++{ ++ int ret; + -+ /* Shader Core */ -+ "", -+ "", -+ "", -+ "", -+ "T60x_FRAG_ACTIVE", -+ "T60x_FRAG_PRIMITIVES", -+ "T60x_FRAG_PRIMITIVES_DROPPED", -+ "T60x_FRAG_CYCLES_DESC", -+ "T60x_FRAG_CYCLES_PLR", -+ "T60x_FRAG_CYCLES_VERT", -+ "T60x_FRAG_CYCLES_TRISETUP", -+ "T60x_FRAG_CYCLES_RAST", -+ "T60x_FRAG_THREADS", -+ "T60x_FRAG_DUMMY_THREADS", -+ "T60x_FRAG_QUADS_RAST", -+ "T60x_FRAG_QUADS_EZS_TEST", -+ "T60x_FRAG_QUADS_EZS_KILLED", -+ "T60x_FRAG_THREADS_LZS_TEST", -+ "T60x_FRAG_THREADS_LZS_KILLED", -+ "T60x_FRAG_CYCLES_NO_TILE", -+ "T60x_FRAG_NUM_TILES", -+ "T60x_FRAG_TRANS_ELIM", -+ "T60x_COMPUTE_ACTIVE", -+ "T60x_COMPUTE_TASKS", -+ "T60x_COMPUTE_THREADS", -+ "T60x_COMPUTE_CYCLES_DESC", -+ "T60x_TRIPIPE_ACTIVE", -+ "T60x_ARITH_WORDS", -+ "T60x_ARITH_CYCLES_REG", -+ "T60x_ARITH_CYCLES_L0", -+ "T60x_ARITH_FRAG_DEPEND", -+ "T60x_LS_WORDS", -+ "T60x_LS_ISSUES", -+ "T60x_LS_RESTARTS", -+ "T60x_LS_REISSUES_MISS", -+ "T60x_LS_REISSUES_VD", -+ "T60x_LS_REISSUE_ATTRIB_MISS", -+ "T60x_LS_NO_WB", -+ "T60x_TEX_WORDS", -+ "T60x_TEX_BUBBLES", -+ "T60x_TEX_WORDS_L0", -+ "T60x_TEX_WORDS_DESC", -+ "T60x_TEX_ISSUES", -+ "T60x_TEX_RECIRC_FMISS", -+ "T60x_TEX_RECIRC_DESC", -+ "T60x_TEX_RECIRC_MULTI", -+ "T60x_TEX_RECIRC_PMISS", -+ "T60x_TEX_RECIRC_CONF", -+ "T60x_LSC_READ_HITS", -+ "T60x_LSC_READ_MISSES", -+ "T60x_LSC_WRITE_HITS", -+ "T60x_LSC_WRITE_MISSES", -+ "T60x_LSC_ATOMIC_HITS", -+ "T60x_LSC_ATOMIC_MISSES", -+ "T60x_LSC_LINE_FETCHES", -+ "T60x_LSC_DIRTY_LINE", -+ "T60x_LSC_SNOOPS", -+ "T60x_AXI_TLB_STALL", -+ "T60x_AXI_TLB_MISS", -+ "T60x_AXI_TLB_TRANSACTION", -+ "T60x_LS_TLB_MISS", -+ "T60x_LS_TLB_HIT", -+ "T60x_AXI_BEATS_READ", -+ "T60x_AXI_BEATS_WRITTEN", ++ ret = kbase_platform_early_init(); ++ if (ret) ++ return ret; + -+ /*L2 and MMU */ -+ "", -+ "", -+ "", -+ "", -+ "T60x_MMU_HIT", -+ "T60x_MMU_NEW_MISS", -+ "T60x_MMU_REPLAY_FULL", -+ "T60x_MMU_REPLAY_MISS", -+ "T60x_MMU_TABLE_WALK", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "T60x_UTLB_HIT", -+ "T60x_UTLB_NEW_MISS", -+ "T60x_UTLB_REPLAY_FULL", -+ "T60x_UTLB_REPLAY_MISS", -+ "T60x_UTLB_STALL", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "T60x_L2_EXT_WRITE_BEATS", -+ "T60x_L2_EXT_READ_BEATS", -+ "T60x_L2_ANY_LOOKUP", -+ "T60x_L2_READ_LOOKUP", -+ "T60x_L2_SREAD_LOOKUP", -+ "T60x_L2_READ_REPLAY", -+ "T60x_L2_READ_SNOOP", -+ "T60x_L2_READ_HIT", -+ "T60x_L2_CLEAN_MISS", -+ "T60x_L2_WRITE_LOOKUP", -+ "T60x_L2_SWRITE_LOOKUP", -+ "T60x_L2_WRITE_REPLAY", -+ "T60x_L2_WRITE_SNOOP", -+ "T60x_L2_WRITE_HIT", -+ "T60x_L2_EXT_READ_FULL", -+ "T60x_L2_EXT_READ_HALF", -+ "T60x_L2_EXT_WRITE_FULL", -+ "T60x_L2_EXT_WRITE_HALF", -+ "T60x_L2_EXT_READ", -+ "T60x_L2_EXT_READ_LINE", -+ "T60x_L2_EXT_WRITE", -+ "T60x_L2_EXT_WRITE_LINE", -+ "T60x_L2_EXT_WRITE_SMALL", -+ "T60x_L2_EXT_BARRIER", -+ "T60x_L2_EXT_AR_STALL", -+ "T60x_L2_EXT_R_BUF_FULL", -+ "T60x_L2_EXT_RD_BUF_FULL", -+ "T60x_L2_EXT_R_RAW", -+ "T60x_L2_EXT_W_STALL", -+ "T60x_L2_EXT_W_BUF_FULL", -+ "T60x_L2_EXT_R_W_HAZARD", -+ "T60x_L2_TAG_HAZARD", -+ "T60x_L2_SNOOP_FULL", -+ "T60x_L2_REPLAY_FULL" -+}; -+static const char * const hardware_counters_mali_t62x[] = { -+ /* Job Manager */ -+ "", -+ "", -+ "", -+ "", -+ "T62x_MESSAGES_SENT", -+ "T62x_MESSAGES_RECEIVED", -+ "T62x_GPU_ACTIVE", -+ "T62x_IRQ_ACTIVE", -+ "T62x_JS0_JOBS", -+ "T62x_JS0_TASKS", -+ "T62x_JS0_ACTIVE", -+ "", -+ "T62x_JS0_WAIT_READ", -+ "T62x_JS0_WAIT_ISSUE", -+ "T62x_JS0_WAIT_DEPEND", -+ "T62x_JS0_WAIT_FINISH", -+ "T62x_JS1_JOBS", -+ "T62x_JS1_TASKS", -+ "T62x_JS1_ACTIVE", -+ "", -+ "T62x_JS1_WAIT_READ", -+ "T62x_JS1_WAIT_ISSUE", -+ "T62x_JS1_WAIT_DEPEND", -+ "T62x_JS1_WAIT_FINISH", -+ "T62x_JS2_JOBS", -+ "T62x_JS2_TASKS", -+ "T62x_JS2_ACTIVE", -+ "", -+ "T62x_JS2_WAIT_READ", -+ "T62x_JS2_WAIT_ISSUE", -+ "T62x_JS2_WAIT_DEPEND", -+ "T62x_JS2_WAIT_FINISH", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", ++#ifdef CONFIG_MALI_PLATFORM_FAKE ++ ret = kbase_platform_fake_register(); ++ if (ret) ++ return ret; ++#endif ++ ret = platform_driver_register(&kbase_platform_driver); ++#ifdef CONFIG_MALI_PLATFORM_FAKE ++ if (ret) ++ kbase_platform_fake_unregister(); ++#endif ++ return ret; ++} + -+ /*Tiler */ -+ "", -+ "", -+ "", -+ "T62x_TI_JOBS_PROCESSED", -+ "T62x_TI_TRIANGLES", -+ "T62x_TI_QUADS", -+ "T62x_TI_POLYGONS", -+ "T62x_TI_POINTS", -+ "T62x_TI_LINES", -+ "T62x_TI_VCACHE_HIT", -+ "T62x_TI_VCACHE_MISS", -+ "T62x_TI_FRONT_FACING", -+ "T62x_TI_BACK_FACING", -+ "T62x_TI_PRIM_VISIBLE", -+ "T62x_TI_PRIM_CULLED", -+ "T62x_TI_PRIM_CLIPPED", -+ "T62x_TI_LEVEL0", -+ "T62x_TI_LEVEL1", -+ "T62x_TI_LEVEL2", -+ "T62x_TI_LEVEL3", -+ "T62x_TI_LEVEL4", -+ "T62x_TI_LEVEL5", -+ "T62x_TI_LEVEL6", -+ "T62x_TI_LEVEL7", -+ "T62x_TI_COMMAND_1", -+ "T62x_TI_COMMAND_2", -+ "T62x_TI_COMMAND_3", -+ "T62x_TI_COMMAND_4", -+ "T62x_TI_COMMAND_5_7", -+ "T62x_TI_COMMAND_8_15", -+ "T62x_TI_COMMAND_16_63", -+ "T62x_TI_COMMAND_64", -+ "T62x_TI_COMPRESS_IN", -+ "T62x_TI_COMPRESS_OUT", -+ "T62x_TI_COMPRESS_FLUSH", -+ "T62x_TI_TIMESTAMPS", -+ "T62x_TI_PCACHE_HIT", -+ "T62x_TI_PCACHE_MISS", -+ "T62x_TI_PCACHE_LINE", -+ "T62x_TI_PCACHE_STALL", -+ "T62x_TI_WRBUF_HIT", -+ "T62x_TI_WRBUF_MISS", -+ "T62x_TI_WRBUF_LINE", -+ "T62x_TI_WRBUF_PARTIAL", -+ "T62x_TI_WRBUF_STALL", -+ "T62x_TI_ACTIVE", -+ "T62x_TI_LOADING_DESC", -+ "T62x_TI_INDEX_WAIT", -+ "T62x_TI_INDEX_RANGE_WAIT", -+ "T62x_TI_VERTEX_WAIT", -+ "T62x_TI_PCACHE_WAIT", -+ "T62x_TI_WRBUF_WAIT", -+ "T62x_TI_BUS_READ", -+ "T62x_TI_BUS_WRITE", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "T62x_TI_UTLB_STALL", -+ "T62x_TI_UTLB_REPLAY_MISS", -+ "T62x_TI_UTLB_REPLAY_FULL", -+ "T62x_TI_UTLB_NEW_MISS", -+ "T62x_TI_UTLB_HIT", ++static void __exit kbase_driver_exit(void) ++{ ++ platform_driver_unregister(&kbase_platform_driver); ++#ifdef CONFIG_MALI_PLATFORM_FAKE ++ kbase_platform_fake_unregister(); ++#endif ++} + -+ /* Shader Core */ -+ "", -+ "", -+ "", -+ "T62x_SHADER_CORE_ACTIVE", -+ "T62x_FRAG_ACTIVE", -+ "T62x_FRAG_PRIMITIVES", -+ "T62x_FRAG_PRIMITIVES_DROPPED", -+ "T62x_FRAG_CYCLES_DESC", -+ "T62x_FRAG_CYCLES_FPKQ_ACTIVE", -+ "T62x_FRAG_CYCLES_VERT", -+ "T62x_FRAG_CYCLES_TRISETUP", -+ "T62x_FRAG_CYCLES_EZS_ACTIVE", -+ "T62x_FRAG_THREADS", -+ "T62x_FRAG_DUMMY_THREADS", -+ "T62x_FRAG_QUADS_RAST", -+ "T62x_FRAG_QUADS_EZS_TEST", -+ "T62x_FRAG_QUADS_EZS_KILLED", -+ "T62x_FRAG_THREADS_LZS_TEST", -+ "T62x_FRAG_THREADS_LZS_KILLED", -+ "T62x_FRAG_CYCLES_NO_TILE", -+ "T62x_FRAG_NUM_TILES", -+ "T62x_FRAG_TRANS_ELIM", -+ "T62x_COMPUTE_ACTIVE", -+ "T62x_COMPUTE_TASKS", -+ "T62x_COMPUTE_THREADS", -+ "T62x_COMPUTE_CYCLES_DESC", -+ "T62x_TRIPIPE_ACTIVE", -+ "T62x_ARITH_WORDS", -+ "T62x_ARITH_CYCLES_REG", -+ "T62x_ARITH_CYCLES_L0", -+ "T62x_ARITH_FRAG_DEPEND", -+ "T62x_LS_WORDS", -+ "T62x_LS_ISSUES", -+ "T62x_LS_RESTARTS", -+ "T62x_LS_REISSUES_MISS", -+ "T62x_LS_REISSUES_VD", -+ "T62x_LS_REISSUE_ATTRIB_MISS", -+ "T62x_LS_NO_WB", -+ "T62x_TEX_WORDS", -+ "T62x_TEX_BUBBLES", -+ "T62x_TEX_WORDS_L0", -+ "T62x_TEX_WORDS_DESC", -+ "T62x_TEX_ISSUES", -+ "T62x_TEX_RECIRC_FMISS", -+ "T62x_TEX_RECIRC_DESC", -+ "T62x_TEX_RECIRC_MULTI", -+ "T62x_TEX_RECIRC_PMISS", -+ "T62x_TEX_RECIRC_CONF", -+ "T62x_LSC_READ_HITS", -+ "T62x_LSC_READ_MISSES", -+ "T62x_LSC_WRITE_HITS", -+ "T62x_LSC_WRITE_MISSES", -+ "T62x_LSC_ATOMIC_HITS", -+ "T62x_LSC_ATOMIC_MISSES", -+ "T62x_LSC_LINE_FETCHES", -+ "T62x_LSC_DIRTY_LINE", -+ "T62x_LSC_SNOOPS", -+ "T62x_AXI_TLB_STALL", -+ "T62x_AXI_TLB_MISS", -+ "T62x_AXI_TLB_TRANSACTION", -+ "T62x_LS_TLB_MISS", -+ "T62x_LS_TLB_HIT", -+ "T62x_AXI_BEATS_READ", -+ "T62x_AXI_BEATS_WRITTEN", ++module_init(kbase_driver_init); ++module_exit(kbase_driver_exit); + -+ /*L2 and MMU */ -+ "", -+ "", -+ "", -+ "", -+ "T62x_MMU_HIT", -+ "T62x_MMU_NEW_MISS", -+ "T62x_MMU_REPLAY_FULL", -+ "T62x_MMU_REPLAY_MISS", -+ "T62x_MMU_TABLE_WALK", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "T62x_UTLB_HIT", -+ "T62x_UTLB_NEW_MISS", -+ "T62x_UTLB_REPLAY_FULL", -+ "T62x_UTLB_REPLAY_MISS", -+ "T62x_UTLB_STALL", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "T62x_L2_EXT_WRITE_BEATS", -+ "T62x_L2_EXT_READ_BEATS", -+ "T62x_L2_ANY_LOOKUP", -+ "T62x_L2_READ_LOOKUP", -+ "T62x_L2_SREAD_LOOKUP", -+ "T62x_L2_READ_REPLAY", -+ "T62x_L2_READ_SNOOP", -+ "T62x_L2_READ_HIT", -+ "T62x_L2_CLEAN_MISS", -+ "T62x_L2_WRITE_LOOKUP", -+ "T62x_L2_SWRITE_LOOKUP", -+ "T62x_L2_WRITE_REPLAY", -+ "T62x_L2_WRITE_SNOOP", -+ "T62x_L2_WRITE_HIT", -+ "T62x_L2_EXT_READ_FULL", -+ "T62x_L2_EXT_READ_HALF", -+ "T62x_L2_EXT_WRITE_FULL", -+ "T62x_L2_EXT_WRITE_HALF", -+ "T62x_L2_EXT_READ", -+ "T62x_L2_EXT_READ_LINE", -+ "T62x_L2_EXT_WRITE", -+ "T62x_L2_EXT_WRITE_LINE", -+ "T62x_L2_EXT_WRITE_SMALL", -+ "T62x_L2_EXT_BARRIER", -+ "T62x_L2_EXT_AR_STALL", -+ "T62x_L2_EXT_R_BUF_FULL", -+ "T62x_L2_EXT_RD_BUF_FULL", -+ "T62x_L2_EXT_R_RAW", -+ "T62x_L2_EXT_W_STALL", -+ "T62x_L2_EXT_W_BUF_FULL", -+ "T62x_L2_EXT_R_W_HAZARD", -+ "T62x_L2_TAG_HAZARD", -+ "T62x_L2_SNOOP_FULL", -+ "T62x_L2_REPLAY_FULL" -+}; ++#endif /* CONFIG_OF */ + -+static const char * const hardware_counters_mali_t72x[] = { -+ /* Job Manager */ -+ "", -+ "", -+ "", -+ "", -+ "T72x_GPU_ACTIVE", -+ "T72x_IRQ_ACTIVE", -+ "T72x_JS0_JOBS", -+ "T72x_JS0_TASKS", -+ "T72x_JS0_ACTIVE", -+ "T72x_JS1_JOBS", -+ "T72x_JS1_TASKS", -+ "T72x_JS1_ACTIVE", -+ "T72x_JS2_JOBS", -+ "T72x_JS2_TASKS", -+ "T72x_JS2_ACTIVE", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", ++MODULE_LICENSE("GPL"); ++MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \ ++ __stringify(BASE_UK_VERSION_MAJOR) "." \ ++ __stringify(BASE_UK_VERSION_MINOR) ")"); + -+ /*Tiler */ -+ "", -+ "", -+ "", -+ "T72x_TI_JOBS_PROCESSED", -+ "T72x_TI_TRIANGLES", -+ "T72x_TI_QUADS", -+ "T72x_TI_POLYGONS", -+ "T72x_TI_POINTS", -+ "T72x_TI_LINES", -+ "T72x_TI_FRONT_FACING", -+ "T72x_TI_BACK_FACING", -+ "T72x_TI_PRIM_VISIBLE", -+ "T72x_TI_PRIM_CULLED", -+ "T72x_TI_PRIM_CLIPPED", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "T72x_TI_ACTIVE", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", ++#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE) ++#define CREATE_TRACE_POINTS ++#endif + -+ /* Shader Core */ -+ "", -+ "", -+ "", -+ "", -+ "T72x_FRAG_ACTIVE", -+ "T72x_FRAG_PRIMITIVES", -+ "T72x_FRAG_PRIMITIVES_DROPPED", -+ "T72x_FRAG_THREADS", -+ "T72x_FRAG_DUMMY_THREADS", -+ "T72x_FRAG_QUADS_RAST", -+ "T72x_FRAG_QUADS_EZS_TEST", -+ "T72x_FRAG_QUADS_EZS_KILLED", -+ "T72x_FRAG_THREADS_LZS_TEST", -+ "T72x_FRAG_THREADS_LZS_KILLED", -+ "T72x_FRAG_CYCLES_NO_TILE", -+ "T72x_FRAG_NUM_TILES", -+ "T72x_FRAG_TRANS_ELIM", -+ "T72x_COMPUTE_ACTIVE", -+ "T72x_COMPUTE_TASKS", -+ "T72x_COMPUTE_THREADS", -+ "T72x_TRIPIPE_ACTIVE", -+ "T72x_ARITH_WORDS", -+ "T72x_ARITH_CYCLES_REG", -+ "T72x_LS_WORDS", -+ "T72x_LS_ISSUES", -+ "T72x_LS_RESTARTS", -+ "T72x_LS_REISSUES_MISS", -+ "T72x_TEX_WORDS", -+ "T72x_TEX_BUBBLES", -+ "T72x_TEX_ISSUES", -+ "T72x_LSC_READ_HITS", -+ "T72x_LSC_READ_MISSES", -+ "T72x_LSC_WRITE_HITS", -+ "T72x_LSC_WRITE_MISSES", -+ "T72x_LSC_ATOMIC_HITS", -+ "T72x_LSC_ATOMIC_MISSES", -+ "T72x_LSC_LINE_FETCHES", -+ "T72x_LSC_DIRTY_LINE", -+ "T72x_LSC_SNOOPS", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", ++#ifdef CONFIG_MALI_GATOR_SUPPORT ++/* Create the trace points (otherwise we just get code to call a tracepoint) */ ++#include "mali_linux_trace.h" + -+ /*L2 and MMU */ -+ "", -+ "", -+ "", -+ "", -+ "T72x_L2_EXT_WRITE_BEAT", -+ "T72x_L2_EXT_READ_BEAT", -+ "T72x_L2_READ_SNOOP", -+ "T72x_L2_READ_HIT", -+ "T72x_L2_WRITE_SNOOP", -+ "T72x_L2_WRITE_HIT", -+ "T72x_L2_EXT_WRITE_SMALL", -+ "T72x_L2_EXT_BARRIER", -+ "T72x_L2_EXT_AR_STALL", -+ "T72x_L2_EXT_W_STALL", -+ "T72x_L2_SNOOP_FULL", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "" -+}; ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_on); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_off); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change); + -+static const char * const hardware_counters_mali_t76x[] = { -+ /* Job Manager */ -+ "", -+ "", -+ "", -+ "", -+ "T76x_MESSAGES_SENT", -+ "T76x_MESSAGES_RECEIVED", -+ "T76x_GPU_ACTIVE", -+ "T76x_IRQ_ACTIVE", -+ "T76x_JS0_JOBS", -+ "T76x_JS0_TASKS", -+ "T76x_JS0_ACTIVE", -+ "", -+ "T76x_JS0_WAIT_READ", -+ "T76x_JS0_WAIT_ISSUE", -+ "T76x_JS0_WAIT_DEPEND", -+ "T76x_JS0_WAIT_FINISH", -+ "T76x_JS1_JOBS", -+ "T76x_JS1_TASKS", -+ "T76x_JS1_ACTIVE", -+ "", -+ "T76x_JS1_WAIT_READ", -+ "T76x_JS1_WAIT_ISSUE", -+ "T76x_JS1_WAIT_DEPEND", -+ "T76x_JS1_WAIT_FINISH", -+ "T76x_JS2_JOBS", -+ "T76x_JS2_TASKS", -+ "T76x_JS2_ACTIVE", -+ "", -+ "T76x_JS2_WAIT_READ", -+ "T76x_JS2_WAIT_ISSUE", -+ "T76x_JS2_WAIT_DEPEND", -+ "T76x_JS2_WAIT_FINISH", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", ++void kbase_trace_mali_pm_status(u32 event, u64 value) ++{ ++ trace_mali_pm_status(event, value); ++} + -+ /*Tiler */ -+ "", -+ "", -+ "", -+ "T76x_TI_JOBS_PROCESSED", -+ "T76x_TI_TRIANGLES", -+ "T76x_TI_QUADS", -+ "T76x_TI_POLYGONS", -+ "T76x_TI_POINTS", -+ "T76x_TI_LINES", -+ "T76x_TI_VCACHE_HIT", -+ "T76x_TI_VCACHE_MISS", -+ "T76x_TI_FRONT_FACING", -+ "T76x_TI_BACK_FACING", -+ "T76x_TI_PRIM_VISIBLE", -+ "T76x_TI_PRIM_CULLED", -+ "T76x_TI_PRIM_CLIPPED", -+ "T76x_TI_LEVEL0", -+ "T76x_TI_LEVEL1", -+ "T76x_TI_LEVEL2", -+ "T76x_TI_LEVEL3", -+ "T76x_TI_LEVEL4", -+ "T76x_TI_LEVEL5", -+ "T76x_TI_LEVEL6", -+ "T76x_TI_LEVEL7", -+ "T76x_TI_COMMAND_1", -+ "T76x_TI_COMMAND_2", -+ "T76x_TI_COMMAND_3", -+ "T76x_TI_COMMAND_4", -+ "T76x_TI_COMMAND_5_7", -+ "T76x_TI_COMMAND_8_15", -+ "T76x_TI_COMMAND_16_63", -+ "T76x_TI_COMMAND_64", -+ "T76x_TI_COMPRESS_IN", -+ "T76x_TI_COMPRESS_OUT", -+ "T76x_TI_COMPRESS_FLUSH", -+ "T76x_TI_TIMESTAMPS", -+ "T76x_TI_PCACHE_HIT", -+ "T76x_TI_PCACHE_MISS", -+ "T76x_TI_PCACHE_LINE", -+ "T76x_TI_PCACHE_STALL", -+ "T76x_TI_WRBUF_HIT", -+ "T76x_TI_WRBUF_MISS", -+ "T76x_TI_WRBUF_LINE", -+ "T76x_TI_WRBUF_PARTIAL", -+ "T76x_TI_WRBUF_STALL", -+ "T76x_TI_ACTIVE", -+ "T76x_TI_LOADING_DESC", -+ "T76x_TI_INDEX_WAIT", -+ "T76x_TI_INDEX_RANGE_WAIT", -+ "T76x_TI_VERTEX_WAIT", -+ "T76x_TI_PCACHE_WAIT", -+ "T76x_TI_WRBUF_WAIT", -+ "T76x_TI_BUS_READ", -+ "T76x_TI_BUS_WRITE", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "T76x_TI_UTLB_HIT", -+ "T76x_TI_UTLB_NEW_MISS", -+ "T76x_TI_UTLB_REPLAY_FULL", -+ "T76x_TI_UTLB_REPLAY_MISS", -+ "T76x_TI_UTLB_STALL", ++void kbase_trace_mali_pm_power_off(u32 event, u64 value) ++{ ++ trace_mali_pm_power_off(event, value); ++} + -+ /* Shader Core */ -+ "", -+ "", -+ "", -+ "", -+ "T76x_FRAG_ACTIVE", -+ "T76x_FRAG_PRIMITIVES", -+ "T76x_FRAG_PRIMITIVES_DROPPED", -+ "T76x_FRAG_CYCLES_DESC", -+ "T76x_FRAG_CYCLES_FPKQ_ACTIVE", -+ "T76x_FRAG_CYCLES_VERT", -+ "T76x_FRAG_CYCLES_TRISETUP", -+ "T76x_FRAG_CYCLES_EZS_ACTIVE", -+ "T76x_FRAG_THREADS", -+ "T76x_FRAG_DUMMY_THREADS", -+ "T76x_FRAG_QUADS_RAST", -+ "T76x_FRAG_QUADS_EZS_TEST", -+ "T76x_FRAG_QUADS_EZS_KILLED", -+ "T76x_FRAG_THREADS_LZS_TEST", -+ "T76x_FRAG_THREADS_LZS_KILLED", -+ "T76x_FRAG_CYCLES_NO_TILE", -+ "T76x_FRAG_NUM_TILES", -+ "T76x_FRAG_TRANS_ELIM", -+ "T76x_COMPUTE_ACTIVE", -+ "T76x_COMPUTE_TASKS", -+ "T76x_COMPUTE_THREADS", -+ "T76x_COMPUTE_CYCLES_DESC", -+ "T76x_TRIPIPE_ACTIVE", -+ "T76x_ARITH_WORDS", -+ "T76x_ARITH_CYCLES_REG", -+ "T76x_ARITH_CYCLES_L0", -+ "T76x_ARITH_FRAG_DEPEND", -+ "T76x_LS_WORDS", -+ "T76x_LS_ISSUES", -+ "T76x_LS_REISSUE_ATTR", -+ "T76x_LS_REISSUES_VARY", -+ "T76x_LS_VARY_RV_MISS", -+ "T76x_LS_VARY_RV_HIT", -+ "T76x_LS_NO_UNPARK", -+ "T76x_TEX_WORDS", -+ "T76x_TEX_BUBBLES", -+ "T76x_TEX_WORDS_L0", -+ "T76x_TEX_WORDS_DESC", -+ "T76x_TEX_ISSUES", -+ "T76x_TEX_RECIRC_FMISS", -+ "T76x_TEX_RECIRC_DESC", -+ "T76x_TEX_RECIRC_MULTI", -+ "T76x_TEX_RECIRC_PMISS", -+ "T76x_TEX_RECIRC_CONF", -+ "T76x_LSC_READ_HITS", -+ "T76x_LSC_READ_OP", -+ "T76x_LSC_WRITE_HITS", -+ "T76x_LSC_WRITE_OP", -+ "T76x_LSC_ATOMIC_HITS", -+ "T76x_LSC_ATOMIC_OP", -+ "T76x_LSC_LINE_FETCHES", -+ "T76x_LSC_DIRTY_LINE", -+ "T76x_LSC_SNOOPS", -+ "T76x_AXI_TLB_STALL", -+ "T76x_AXI_TLB_MISS", -+ "T76x_AXI_TLB_TRANSACTION", -+ "T76x_LS_TLB_MISS", -+ "T76x_LS_TLB_HIT", -+ "T76x_AXI_BEATS_READ", -+ "T76x_AXI_BEATS_WRITTEN", ++void kbase_trace_mali_pm_power_on(u32 event, u64 value) ++{ ++ trace_mali_pm_power_on(event, value); ++} + -+ /*L2 and MMU */ -+ "", -+ "", -+ "", -+ "", -+ "T76x_MMU_HIT", -+ "T76x_MMU_NEW_MISS", -+ "T76x_MMU_REPLAY_FULL", -+ "T76x_MMU_REPLAY_MISS", -+ "T76x_MMU_TABLE_WALK", -+ "T76x_MMU_REQUESTS", -+ "", -+ "", -+ "T76x_UTLB_HIT", -+ "T76x_UTLB_NEW_MISS", -+ "T76x_UTLB_REPLAY_FULL", -+ "T76x_UTLB_REPLAY_MISS", -+ "T76x_UTLB_STALL", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "T76x_L2_EXT_WRITE_BEATS", -+ "T76x_L2_EXT_READ_BEATS", -+ "T76x_L2_ANY_LOOKUP", -+ "T76x_L2_READ_LOOKUP", -+ "T76x_L2_SREAD_LOOKUP", -+ "T76x_L2_READ_REPLAY", -+ "T76x_L2_READ_SNOOP", -+ "T76x_L2_READ_HIT", -+ "T76x_L2_CLEAN_MISS", -+ "T76x_L2_WRITE_LOOKUP", -+ "T76x_L2_SWRITE_LOOKUP", -+ "T76x_L2_WRITE_REPLAY", -+ "T76x_L2_WRITE_SNOOP", -+ "T76x_L2_WRITE_HIT", -+ "T76x_L2_EXT_READ_FULL", -+ "", -+ "T76x_L2_EXT_WRITE_FULL", -+ "T76x_L2_EXT_R_W_HAZARD", -+ "T76x_L2_EXT_READ", -+ "T76x_L2_EXT_READ_LINE", -+ "T76x_L2_EXT_WRITE", -+ "T76x_L2_EXT_WRITE_LINE", -+ "T76x_L2_EXT_WRITE_SMALL", -+ "T76x_L2_EXT_BARRIER", -+ "T76x_L2_EXT_AR_STALL", -+ "T76x_L2_EXT_R_BUF_FULL", -+ "T76x_L2_EXT_RD_BUF_FULL", -+ "T76x_L2_EXT_R_RAW", -+ "T76x_L2_EXT_W_STALL", -+ "T76x_L2_EXT_W_BUF_FULL", -+ "T76x_L2_EXT_R_BUF_FULL", -+ "T76x_L2_TAG_HAZARD", -+ "T76x_L2_SNOOP_FULL", -+ "T76x_L2_REPLAY_FULL" -+}; ++void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id) ++{ ++ trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id); ++} + -+static const char * const hardware_counters_mali_t82x[] = { -+ /* Job Manager */ -+ "", -+ "", -+ "", -+ "", -+ "T82x_MESSAGES_SENT", -+ "T82x_MESSAGES_RECEIVED", -+ "T82x_GPU_ACTIVE", -+ "T82x_IRQ_ACTIVE", -+ "T82x_JS0_JOBS", -+ "T82x_JS0_TASKS", -+ "T82x_JS0_ACTIVE", -+ "", -+ "T82x_JS0_WAIT_READ", -+ "T82x_JS0_WAIT_ISSUE", -+ "T82x_JS0_WAIT_DEPEND", -+ "T82x_JS0_WAIT_FINISH", -+ "T82x_JS1_JOBS", -+ "T82x_JS1_TASKS", -+ "T82x_JS1_ACTIVE", -+ "", -+ "T82x_JS1_WAIT_READ", -+ "T82x_JS1_WAIT_ISSUE", -+ "T82x_JS1_WAIT_DEPEND", -+ "T82x_JS1_WAIT_FINISH", -+ "T82x_JS2_JOBS", -+ "T82x_JS2_TASKS", -+ "T82x_JS2_ACTIVE", -+ "", -+ "T82x_JS2_WAIT_READ", -+ "T82x_JS2_WAIT_ISSUE", -+ "T82x_JS2_WAIT_DEPEND", -+ "T82x_JS2_WAIT_FINISH", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", ++void kbase_trace_mali_page_fault_insert_pages(int event, u32 value) ++{ ++ trace_mali_page_fault_insert_pages(event, value); ++} + -+ /*Tiler */ -+ "", -+ "", -+ "", -+ "T82x_TI_JOBS_PROCESSED", -+ "T82x_TI_TRIANGLES", -+ "T82x_TI_QUADS", -+ "T82x_TI_POLYGONS", -+ "T82x_TI_POINTS", -+ "T82x_TI_LINES", -+ "T82x_TI_FRONT_FACING", -+ "T82x_TI_BACK_FACING", -+ "T82x_TI_PRIM_VISIBLE", -+ "T82x_TI_PRIM_CULLED", -+ "T82x_TI_PRIM_CLIPPED", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "T82x_TI_ACTIVE", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", ++void kbase_trace_mali_mmu_as_in_use(int event) ++{ ++ trace_mali_mmu_as_in_use(event); ++} + -+ /* Shader Core */ -+ "", -+ "", -+ "", -+ "", -+ "T82x_FRAG_ACTIVE", -+ "T82x_FRAG_PRIMITIVES", -+ "T82x_FRAG_PRIMITIVES_DROPPED", -+ "T82x_FRAG_CYCLES_DESC", -+ "T82x_FRAG_CYCLES_FPKQ_ACTIVE", -+ "T82x_FRAG_CYCLES_VERT", -+ "T82x_FRAG_CYCLES_TRISETUP", -+ "T82x_FRAG_CYCLES_EZS_ACTIVE", -+ "T82x_FRAG_THREADS", -+ "T82x_FRAG_DUMMY_THREADS", -+ "T82x_FRAG_QUADS_RAST", -+ "T82x_FRAG_QUADS_EZS_TEST", -+ "T82x_FRAG_QUADS_EZS_KILLED", -+ "T82x_FRAG_THREADS_LZS_TEST", -+ "T82x_FRAG_THREADS_LZS_KILLED", -+ "T82x_FRAG_CYCLES_NO_TILE", -+ "T82x_FRAG_NUM_TILES", -+ "T82x_FRAG_TRANS_ELIM", -+ "T82x_COMPUTE_ACTIVE", -+ "T82x_COMPUTE_TASKS", -+ "T82x_COMPUTE_THREADS", -+ "T82x_COMPUTE_CYCLES_DESC", -+ "T82x_TRIPIPE_ACTIVE", -+ "T82x_ARITH_WORDS", -+ "T82x_ARITH_CYCLES_REG", -+ "T82x_ARITH_CYCLES_L0", -+ "T82x_ARITH_FRAG_DEPEND", -+ "T82x_LS_WORDS", -+ "T82x_LS_ISSUES", -+ "T82x_LS_REISSUE_ATTR", -+ "T82x_LS_REISSUES_VARY", -+ "T82x_LS_VARY_RV_MISS", -+ "T82x_LS_VARY_RV_HIT", -+ "T82x_LS_NO_UNPARK", -+ "T82x_TEX_WORDS", -+ "T82x_TEX_BUBBLES", -+ "T82x_TEX_WORDS_L0", -+ "T82x_TEX_WORDS_DESC", -+ "T82x_TEX_ISSUES", -+ "T82x_TEX_RECIRC_FMISS", -+ "T82x_TEX_RECIRC_DESC", -+ "T82x_TEX_RECIRC_MULTI", -+ "T82x_TEX_RECIRC_PMISS", -+ "T82x_TEX_RECIRC_CONF", -+ "T82x_LSC_READ_HITS", -+ "T82x_LSC_READ_OP", -+ "T82x_LSC_WRITE_HITS", -+ "T82x_LSC_WRITE_OP", -+ "T82x_LSC_ATOMIC_HITS", -+ "T82x_LSC_ATOMIC_OP", -+ "T82x_LSC_LINE_FETCHES", -+ "T82x_LSC_DIRTY_LINE", -+ "T82x_LSC_SNOOPS", -+ "T82x_AXI_TLB_STALL", -+ "T82x_AXI_TLB_MISS", -+ "T82x_AXI_TLB_TRANSACTION", -+ "T82x_LS_TLB_MISS", -+ "T82x_LS_TLB_HIT", -+ "T82x_AXI_BEATS_READ", -+ "T82x_AXI_BEATS_WRITTEN", ++void kbase_trace_mali_mmu_as_released(int event) ++{ ++ trace_mali_mmu_as_released(event); ++} + -+ /*L2 and MMU */ -+ "", -+ "", -+ "", -+ "", -+ "T82x_MMU_HIT", -+ "T82x_MMU_NEW_MISS", -+ "T82x_MMU_REPLAY_FULL", -+ "T82x_MMU_REPLAY_MISS", -+ "T82x_MMU_TABLE_WALK", -+ "T82x_MMU_REQUESTS", -+ "", -+ "", -+ "T82x_UTLB_HIT", -+ "T82x_UTLB_NEW_MISS", -+ "T82x_UTLB_REPLAY_FULL", -+ "T82x_UTLB_REPLAY_MISS", -+ "T82x_UTLB_STALL", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "T82x_L2_EXT_WRITE_BEATS", -+ "T82x_L2_EXT_READ_BEATS", -+ "T82x_L2_ANY_LOOKUP", -+ "T82x_L2_READ_LOOKUP", -+ "T82x_L2_SREAD_LOOKUP", -+ "T82x_L2_READ_REPLAY", -+ "T82x_L2_READ_SNOOP", -+ "T82x_L2_READ_HIT", -+ "T82x_L2_CLEAN_MISS", -+ "T82x_L2_WRITE_LOOKUP", -+ "T82x_L2_SWRITE_LOOKUP", -+ "T82x_L2_WRITE_REPLAY", -+ "T82x_L2_WRITE_SNOOP", -+ "T82x_L2_WRITE_HIT", -+ "T82x_L2_EXT_READ_FULL", -+ "", -+ "T82x_L2_EXT_WRITE_FULL", -+ "T82x_L2_EXT_R_W_HAZARD", -+ "T82x_L2_EXT_READ", -+ "T82x_L2_EXT_READ_LINE", -+ "T82x_L2_EXT_WRITE", -+ "T82x_L2_EXT_WRITE_LINE", -+ "T82x_L2_EXT_WRITE_SMALL", -+ "T82x_L2_EXT_BARRIER", -+ "T82x_L2_EXT_AR_STALL", -+ "T82x_L2_EXT_R_BUF_FULL", -+ "T82x_L2_EXT_RD_BUF_FULL", -+ "T82x_L2_EXT_R_RAW", -+ "T82x_L2_EXT_W_STALL", -+ "T82x_L2_EXT_W_BUF_FULL", -+ "T82x_L2_EXT_R_BUF_FULL", -+ "T82x_L2_TAG_HAZARD", -+ "T82x_L2_SNOOP_FULL", -+ "T82x_L2_REPLAY_FULL" -+}; ++void kbase_trace_mali_total_alloc_pages_change(long long int event) ++{ ++ trace_mali_total_alloc_pages_change(event); ++} ++#endif /* CONFIG_MALI_GATOR_SUPPORT */ ++#ifdef CONFIG_MALI_SYSTEM_TRACE ++#include "mali_linux_kbase_trace.h" ++#endif +diff --git a/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c +new file mode 100644 +index 000000000..ce0048414 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c +@@ -0,0 +1,208 @@ ++/* ++ * ++ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+static const char * const hardware_counters_mali_t83x[] = { -+ /* Job Manager */ -+ "", -+ "", -+ "", -+ "", -+ "T83x_MESSAGES_SENT", -+ "T83x_MESSAGES_RECEIVED", -+ "T83x_GPU_ACTIVE", -+ "T83x_IRQ_ACTIVE", -+ "T83x_JS0_JOBS", -+ "T83x_JS0_TASKS", -+ "T83x_JS0_ACTIVE", -+ "", -+ "T83x_JS0_WAIT_READ", -+ "T83x_JS0_WAIT_ISSUE", -+ "T83x_JS0_WAIT_DEPEND", -+ "T83x_JS0_WAIT_FINISH", -+ "T83x_JS1_JOBS", -+ "T83x_JS1_TASKS", -+ "T83x_JS1_ACTIVE", -+ "", -+ "T83x_JS1_WAIT_READ", -+ "T83x_JS1_WAIT_ISSUE", -+ "T83x_JS1_WAIT_DEPEND", -+ "T83x_JS1_WAIT_FINISH", -+ "T83x_JS2_JOBS", -+ "T83x_JS2_TASKS", -+ "T83x_JS2_ACTIVE", -+ "", -+ "T83x_JS2_WAIT_READ", -+ "T83x_JS2_WAIT_ISSUE", -+ "T83x_JS2_WAIT_DEPEND", -+ "T83x_JS2_WAIT_FINISH", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", + -+ /*Tiler */ -+ "", -+ "", -+ "", -+ "T83x_TI_JOBS_PROCESSED", -+ "T83x_TI_TRIANGLES", -+ "T83x_TI_QUADS", -+ "T83x_TI_POLYGONS", -+ "T83x_TI_POINTS", -+ "T83x_TI_LINES", -+ "T83x_TI_FRONT_FACING", -+ "T83x_TI_BACK_FACING", -+ "T83x_TI_PRIM_VISIBLE", -+ "T83x_TI_PRIM_CULLED", -+ "T83x_TI_PRIM_CLIPPED", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "T83x_TI_ACTIVE", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", + -+ /* Shader Core */ -+ "", -+ "", -+ "", -+ "", -+ "T83x_FRAG_ACTIVE", -+ "T83x_FRAG_PRIMITIVES", -+ "T83x_FRAG_PRIMITIVES_DROPPED", -+ "T83x_FRAG_CYCLES_DESC", -+ "T83x_FRAG_CYCLES_FPKQ_ACTIVE", -+ "T83x_FRAG_CYCLES_VERT", -+ "T83x_FRAG_CYCLES_TRISETUP", -+ "T83x_FRAG_CYCLES_EZS_ACTIVE", -+ "T83x_FRAG_THREADS", -+ "T83x_FRAG_DUMMY_THREADS", -+ "T83x_FRAG_QUADS_RAST", -+ "T83x_FRAG_QUADS_EZS_TEST", -+ "T83x_FRAG_QUADS_EZS_KILLED", -+ "T83x_FRAG_THREADS_LZS_TEST", -+ "T83x_FRAG_THREADS_LZS_KILLED", -+ "T83x_FRAG_CYCLES_NO_TILE", -+ "T83x_FRAG_NUM_TILES", -+ "T83x_FRAG_TRANS_ELIM", -+ "T83x_COMPUTE_ACTIVE", -+ "T83x_COMPUTE_TASKS", -+ "T83x_COMPUTE_THREADS", -+ "T83x_COMPUTE_CYCLES_DESC", -+ "T83x_TRIPIPE_ACTIVE", -+ "T83x_ARITH_WORDS", -+ "T83x_ARITH_CYCLES_REG", -+ "T83x_ARITH_CYCLES_L0", -+ "T83x_ARITH_FRAG_DEPEND", -+ "T83x_LS_WORDS", -+ "T83x_LS_ISSUES", -+ "T83x_LS_REISSUE_ATTR", -+ "T83x_LS_REISSUES_VARY", -+ "T83x_LS_VARY_RV_MISS", -+ "T83x_LS_VARY_RV_HIT", -+ "T83x_LS_NO_UNPARK", -+ "T83x_TEX_WORDS", -+ "T83x_TEX_BUBBLES", -+ "T83x_TEX_WORDS_L0", -+ "T83x_TEX_WORDS_DESC", -+ "T83x_TEX_ISSUES", -+ "T83x_TEX_RECIRC_FMISS", -+ "T83x_TEX_RECIRC_DESC", -+ "T83x_TEX_RECIRC_MULTI", -+ "T83x_TEX_RECIRC_PMISS", -+ "T83x_TEX_RECIRC_CONF", -+ "T83x_LSC_READ_HITS", -+ "T83x_LSC_READ_OP", -+ "T83x_LSC_WRITE_HITS", -+ "T83x_LSC_WRITE_OP", -+ "T83x_LSC_ATOMIC_HITS", -+ "T83x_LSC_ATOMIC_OP", -+ "T83x_LSC_LINE_FETCHES", -+ "T83x_LSC_DIRTY_LINE", -+ "T83x_LSC_SNOOPS", -+ "T83x_AXI_TLB_STALL", -+ "T83x_AXI_TLB_MISS", -+ "T83x_AXI_TLB_TRANSACTION", -+ "T83x_LS_TLB_MISS", -+ "T83x_LS_TLB_HIT", -+ "T83x_AXI_BEATS_READ", -+ "T83x_AXI_BEATS_WRITTEN", ++#include ++#include + -+ /*L2 and MMU */ -+ "", -+ "", -+ "", -+ "", -+ "T83x_MMU_HIT", -+ "T83x_MMU_NEW_MISS", -+ "T83x_MMU_REPLAY_FULL", -+ "T83x_MMU_REPLAY_MISS", -+ "T83x_MMU_TABLE_WALK", -+ "T83x_MMU_REQUESTS", -+ "", -+ "", -+ "T83x_UTLB_HIT", -+ "T83x_UTLB_NEW_MISS", -+ "T83x_UTLB_REPLAY_FULL", -+ "T83x_UTLB_REPLAY_MISS", -+ "T83x_UTLB_STALL", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "T83x_L2_EXT_WRITE_BEATS", -+ "T83x_L2_EXT_READ_BEATS", -+ "T83x_L2_ANY_LOOKUP", -+ "T83x_L2_READ_LOOKUP", -+ "T83x_L2_SREAD_LOOKUP", -+ "T83x_L2_READ_REPLAY", -+ "T83x_L2_READ_SNOOP", -+ "T83x_L2_READ_HIT", -+ "T83x_L2_CLEAN_MISS", -+ "T83x_L2_WRITE_LOOKUP", -+ "T83x_L2_SWRITE_LOOKUP", -+ "T83x_L2_WRITE_REPLAY", -+ "T83x_L2_WRITE_SNOOP", -+ "T83x_L2_WRITE_HIT", -+ "T83x_L2_EXT_READ_FULL", -+ "", -+ "T83x_L2_EXT_WRITE_FULL", -+ "T83x_L2_EXT_R_W_HAZARD", -+ "T83x_L2_EXT_READ", -+ "T83x_L2_EXT_READ_LINE", -+ "T83x_L2_EXT_WRITE", -+ "T83x_L2_EXT_WRITE_LINE", -+ "T83x_L2_EXT_WRITE_SMALL", -+ "T83x_L2_EXT_BARRIER", -+ "T83x_L2_EXT_AR_STALL", -+ "T83x_L2_EXT_R_BUF_FULL", -+ "T83x_L2_EXT_RD_BUF_FULL", -+ "T83x_L2_EXT_R_RAW", -+ "T83x_L2_EXT_W_STALL", -+ "T83x_L2_EXT_W_BUF_FULL", -+ "T83x_L2_EXT_R_BUF_FULL", -+ "T83x_L2_TAG_HAZARD", -+ "T83x_L2_SNOOP_FULL", -+ "T83x_L2_REPLAY_FULL" -+}; ++#include "mali_kbase_ctx_sched.h" + -+static const char * const hardware_counters_mali_t86x[] = { -+ /* Job Manager */ -+ "", -+ "", -+ "", -+ "", -+ "T86x_MESSAGES_SENT", -+ "T86x_MESSAGES_RECEIVED", -+ "T86x_GPU_ACTIVE", -+ "T86x_IRQ_ACTIVE", -+ "T86x_JS0_JOBS", -+ "T86x_JS0_TASKS", -+ "T86x_JS0_ACTIVE", -+ "", -+ "T86x_JS0_WAIT_READ", -+ "T86x_JS0_WAIT_ISSUE", -+ "T86x_JS0_WAIT_DEPEND", -+ "T86x_JS0_WAIT_FINISH", -+ "T86x_JS1_JOBS", -+ "T86x_JS1_TASKS", -+ "T86x_JS1_ACTIVE", -+ "", -+ "T86x_JS1_WAIT_READ", -+ "T86x_JS1_WAIT_ISSUE", -+ "T86x_JS1_WAIT_DEPEND", -+ "T86x_JS1_WAIT_FINISH", -+ "T86x_JS2_JOBS", -+ "T86x_JS2_TASKS", -+ "T86x_JS2_ACTIVE", -+ "", -+ "T86x_JS2_WAIT_READ", -+ "T86x_JS2_WAIT_ISSUE", -+ "T86x_JS2_WAIT_DEPEND", -+ "T86x_JS2_WAIT_FINISH", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", ++int kbase_ctx_sched_init(struct kbase_device *kbdev) ++{ ++ int as_present = (1U << kbdev->nr_hw_address_spaces) - 1; + -+ /*Tiler */ -+ "", -+ "", -+ "", -+ "T86x_TI_JOBS_PROCESSED", -+ "T86x_TI_TRIANGLES", -+ "T86x_TI_QUADS", -+ "T86x_TI_POLYGONS", -+ "T86x_TI_POINTS", -+ "T86x_TI_LINES", -+ "T86x_TI_VCACHE_HIT", -+ "T86x_TI_VCACHE_MISS", -+ "T86x_TI_FRONT_FACING", -+ "T86x_TI_BACK_FACING", -+ "T86x_TI_PRIM_VISIBLE", -+ "T86x_TI_PRIM_CULLED", -+ "T86x_TI_PRIM_CLIPPED", -+ "T86x_TI_LEVEL0", -+ "T86x_TI_LEVEL1", -+ "T86x_TI_LEVEL2", -+ "T86x_TI_LEVEL3", -+ "T86x_TI_LEVEL4", -+ "T86x_TI_LEVEL5", -+ "T86x_TI_LEVEL6", -+ "T86x_TI_LEVEL7", -+ "T86x_TI_COMMAND_1", -+ "T86x_TI_COMMAND_2", -+ "T86x_TI_COMMAND_3", -+ "T86x_TI_COMMAND_4", -+ "T86x_TI_COMMAND_5_7", -+ "T86x_TI_COMMAND_8_15", -+ "T86x_TI_COMMAND_16_63", -+ "T86x_TI_COMMAND_64", -+ "T86x_TI_COMPRESS_IN", -+ "T86x_TI_COMPRESS_OUT", -+ "T86x_TI_COMPRESS_FLUSH", -+ "T86x_TI_TIMESTAMPS", -+ "T86x_TI_PCACHE_HIT", -+ "T86x_TI_PCACHE_MISS", -+ "T86x_TI_PCACHE_LINE", -+ "T86x_TI_PCACHE_STALL", -+ "T86x_TI_WRBUF_HIT", -+ "T86x_TI_WRBUF_MISS", -+ "T86x_TI_WRBUF_LINE", -+ "T86x_TI_WRBUF_PARTIAL", -+ "T86x_TI_WRBUF_STALL", -+ "T86x_TI_ACTIVE", -+ "T86x_TI_LOADING_DESC", -+ "T86x_TI_INDEX_WAIT", -+ "T86x_TI_INDEX_RANGE_WAIT", -+ "T86x_TI_VERTEX_WAIT", -+ "T86x_TI_PCACHE_WAIT", -+ "T86x_TI_WRBUF_WAIT", -+ "T86x_TI_BUS_READ", -+ "T86x_TI_BUS_WRITE", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "T86x_TI_UTLB_HIT", -+ "T86x_TI_UTLB_NEW_MISS", -+ "T86x_TI_UTLB_REPLAY_FULL", -+ "T86x_TI_UTLB_REPLAY_MISS", -+ "T86x_TI_UTLB_STALL", ++ /* These two must be recalculated if nr_hw_address_spaces changes ++ * (e.g. for HW workarounds) */ ++ kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces; ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) { ++ bool use_workaround; + -+ /* Shader Core */ -+ "", -+ "", -+ "", -+ "", -+ "T86x_FRAG_ACTIVE", -+ "T86x_FRAG_PRIMITIVES", -+ "T86x_FRAG_PRIMITIVES_DROPPED", -+ "T86x_FRAG_CYCLES_DESC", -+ "T86x_FRAG_CYCLES_FPKQ_ACTIVE", -+ "T86x_FRAG_CYCLES_VERT", -+ "T86x_FRAG_CYCLES_TRISETUP", -+ "T86x_FRAG_CYCLES_EZS_ACTIVE", -+ "T86x_FRAG_THREADS", -+ "T86x_FRAG_DUMMY_THREADS", -+ "T86x_FRAG_QUADS_RAST", -+ "T86x_FRAG_QUADS_EZS_TEST", -+ "T86x_FRAG_QUADS_EZS_KILLED", -+ "T86x_FRAG_THREADS_LZS_TEST", -+ "T86x_FRAG_THREADS_LZS_KILLED", -+ "T86x_FRAG_CYCLES_NO_TILE", -+ "T86x_FRAG_NUM_TILES", -+ "T86x_FRAG_TRANS_ELIM", -+ "T86x_COMPUTE_ACTIVE", -+ "T86x_COMPUTE_TASKS", -+ "T86x_COMPUTE_THREADS", -+ "T86x_COMPUTE_CYCLES_DESC", -+ "T86x_TRIPIPE_ACTIVE", -+ "T86x_ARITH_WORDS", -+ "T86x_ARITH_CYCLES_REG", -+ "T86x_ARITH_CYCLES_L0", -+ "T86x_ARITH_FRAG_DEPEND", -+ "T86x_LS_WORDS", -+ "T86x_LS_ISSUES", -+ "T86x_LS_REISSUE_ATTR", -+ "T86x_LS_REISSUES_VARY", -+ "T86x_LS_VARY_RV_MISS", -+ "T86x_LS_VARY_RV_HIT", -+ "T86x_LS_NO_UNPARK", -+ "T86x_TEX_WORDS", -+ "T86x_TEX_BUBBLES", -+ "T86x_TEX_WORDS_L0", -+ "T86x_TEX_WORDS_DESC", -+ "T86x_TEX_ISSUES", -+ "T86x_TEX_RECIRC_FMISS", -+ "T86x_TEX_RECIRC_DESC", -+ "T86x_TEX_RECIRC_MULTI", -+ "T86x_TEX_RECIRC_PMISS", -+ "T86x_TEX_RECIRC_CONF", -+ "T86x_LSC_READ_HITS", -+ "T86x_LSC_READ_OP", -+ "T86x_LSC_WRITE_HITS", -+ "T86x_LSC_WRITE_OP", -+ "T86x_LSC_ATOMIC_HITS", -+ "T86x_LSC_ATOMIC_OP", -+ "T86x_LSC_LINE_FETCHES", -+ "T86x_LSC_DIRTY_LINE", -+ "T86x_LSC_SNOOPS", -+ "T86x_AXI_TLB_STALL", -+ "T86x_AXI_TLB_MISS", -+ "T86x_AXI_TLB_TRANSACTION", -+ "T86x_LS_TLB_MISS", -+ "T86x_LS_TLB_HIT", -+ "T86x_AXI_BEATS_READ", -+ "T86x_AXI_BEATS_WRITTEN", ++ use_workaround = DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE; ++ if (use_workaround) { ++ dev_dbg(kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only"); ++ kbdev->nr_user_address_spaces = 1; ++ } ++ } + -+ /*L2 and MMU */ -+ "", -+ "", -+ "", -+ "", -+ "T86x_MMU_HIT", -+ "T86x_MMU_NEW_MISS", -+ "T86x_MMU_REPLAY_FULL", -+ "T86x_MMU_REPLAY_MISS", -+ "T86x_MMU_TABLE_WALK", -+ "T86x_MMU_REQUESTS", -+ "", -+ "", -+ "T86x_UTLB_HIT", -+ "T86x_UTLB_NEW_MISS", -+ "T86x_UTLB_REPLAY_FULL", -+ "T86x_UTLB_REPLAY_MISS", -+ "T86x_UTLB_STALL", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "T86x_L2_EXT_WRITE_BEATS", -+ "T86x_L2_EXT_READ_BEATS", -+ "T86x_L2_ANY_LOOKUP", -+ "T86x_L2_READ_LOOKUP", -+ "T86x_L2_SREAD_LOOKUP", -+ "T86x_L2_READ_REPLAY", -+ "T86x_L2_READ_SNOOP", -+ "T86x_L2_READ_HIT", -+ "T86x_L2_CLEAN_MISS", -+ "T86x_L2_WRITE_LOOKUP", -+ "T86x_L2_SWRITE_LOOKUP", -+ "T86x_L2_WRITE_REPLAY", -+ "T86x_L2_WRITE_SNOOP", -+ "T86x_L2_WRITE_HIT", -+ "T86x_L2_EXT_READ_FULL", -+ "", -+ "T86x_L2_EXT_WRITE_FULL", -+ "T86x_L2_EXT_R_W_HAZARD", -+ "T86x_L2_EXT_READ", -+ "T86x_L2_EXT_READ_LINE", -+ "T86x_L2_EXT_WRITE", -+ "T86x_L2_EXT_WRITE_LINE", -+ "T86x_L2_EXT_WRITE_SMALL", -+ "T86x_L2_EXT_BARRIER", -+ "T86x_L2_EXT_AR_STALL", -+ "T86x_L2_EXT_R_BUF_FULL", -+ "T86x_L2_EXT_RD_BUF_FULL", -+ "T86x_L2_EXT_R_RAW", -+ "T86x_L2_EXT_W_STALL", -+ "T86x_L2_EXT_W_BUF_FULL", -+ "T86x_L2_EXT_R_BUF_FULL", -+ "T86x_L2_TAG_HAZARD", -+ "T86x_L2_SNOOP_FULL", -+ "T86x_L2_REPLAY_FULL" -+}; ++ kbdev->as_free = as_present; /* All ASs initially free */ + -+static const char * const hardware_counters_mali_t88x[] = { -+ /* Job Manager */ -+ "", -+ "", -+ "", -+ "", -+ "T88x_MESSAGES_SENT", -+ "T88x_MESSAGES_RECEIVED", -+ "T88x_GPU_ACTIVE", -+ "T88x_IRQ_ACTIVE", -+ "T88x_JS0_JOBS", -+ "T88x_JS0_TASKS", -+ "T88x_JS0_ACTIVE", -+ "", -+ "T88x_JS0_WAIT_READ", -+ "T88x_JS0_WAIT_ISSUE", -+ "T88x_JS0_WAIT_DEPEND", -+ "T88x_JS0_WAIT_FINISH", -+ "T88x_JS1_JOBS", -+ "T88x_JS1_TASKS", -+ "T88x_JS1_ACTIVE", -+ "", -+ "T88x_JS1_WAIT_READ", -+ "T88x_JS1_WAIT_ISSUE", -+ "T88x_JS1_WAIT_DEPEND", -+ "T88x_JS1_WAIT_FINISH", -+ "T88x_JS2_JOBS", -+ "T88x_JS2_TASKS", -+ "T88x_JS2_ACTIVE", -+ "", -+ "T88x_JS2_WAIT_READ", -+ "T88x_JS2_WAIT_ISSUE", -+ "T88x_JS2_WAIT_DEPEND", -+ "T88x_JS2_WAIT_FINISH", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", ++ memset(kbdev->as_to_kctx, 0, sizeof(kbdev->as_to_kctx)); + -+ /*Tiler */ -+ "", -+ "", -+ "", -+ "T88x_TI_JOBS_PROCESSED", -+ "T88x_TI_TRIANGLES", -+ "T88x_TI_QUADS", -+ "T88x_TI_POLYGONS", -+ "T88x_TI_POINTS", -+ "T88x_TI_LINES", -+ "T88x_TI_VCACHE_HIT", -+ "T88x_TI_VCACHE_MISS", -+ "T88x_TI_FRONT_FACING", -+ "T88x_TI_BACK_FACING", -+ "T88x_TI_PRIM_VISIBLE", -+ "T88x_TI_PRIM_CULLED", -+ "T88x_TI_PRIM_CLIPPED", -+ "T88x_TI_LEVEL0", -+ "T88x_TI_LEVEL1", -+ "T88x_TI_LEVEL2", -+ "T88x_TI_LEVEL3", -+ "T88x_TI_LEVEL4", -+ "T88x_TI_LEVEL5", -+ "T88x_TI_LEVEL6", -+ "T88x_TI_LEVEL7", -+ "T88x_TI_COMMAND_1", -+ "T88x_TI_COMMAND_2", -+ "T88x_TI_COMMAND_3", -+ "T88x_TI_COMMAND_4", -+ "T88x_TI_COMMAND_5_7", -+ "T88x_TI_COMMAND_8_15", -+ "T88x_TI_COMMAND_16_63", -+ "T88x_TI_COMMAND_64", -+ "T88x_TI_COMPRESS_IN", -+ "T88x_TI_COMPRESS_OUT", -+ "T88x_TI_COMPRESS_FLUSH", -+ "T88x_TI_TIMESTAMPS", -+ "T88x_TI_PCACHE_HIT", -+ "T88x_TI_PCACHE_MISS", -+ "T88x_TI_PCACHE_LINE", -+ "T88x_TI_PCACHE_STALL", -+ "T88x_TI_WRBUF_HIT", -+ "T88x_TI_WRBUF_MISS", -+ "T88x_TI_WRBUF_LINE", -+ "T88x_TI_WRBUF_PARTIAL", -+ "T88x_TI_WRBUF_STALL", -+ "T88x_TI_ACTIVE", -+ "T88x_TI_LOADING_DESC", -+ "T88x_TI_INDEX_WAIT", -+ "T88x_TI_INDEX_RANGE_WAIT", -+ "T88x_TI_VERTEX_WAIT", -+ "T88x_TI_PCACHE_WAIT", -+ "T88x_TI_WRBUF_WAIT", -+ "T88x_TI_BUS_READ", -+ "T88x_TI_BUS_WRITE", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "T88x_TI_UTLB_HIT", -+ "T88x_TI_UTLB_NEW_MISS", -+ "T88x_TI_UTLB_REPLAY_FULL", -+ "T88x_TI_UTLB_REPLAY_MISS", -+ "T88x_TI_UTLB_STALL", ++ return 0; ++} + -+ /* Shader Core */ -+ "", -+ "", -+ "", -+ "", -+ "T88x_FRAG_ACTIVE", -+ "T88x_FRAG_PRIMITIVES", -+ "T88x_FRAG_PRIMITIVES_DROPPED", -+ "T88x_FRAG_CYCLES_DESC", -+ "T88x_FRAG_CYCLES_FPKQ_ACTIVE", -+ "T88x_FRAG_CYCLES_VERT", -+ "T88x_FRAG_CYCLES_TRISETUP", -+ "T88x_FRAG_CYCLES_EZS_ACTIVE", -+ "T88x_FRAG_THREADS", -+ "T88x_FRAG_DUMMY_THREADS", -+ "T88x_FRAG_QUADS_RAST", -+ "T88x_FRAG_QUADS_EZS_TEST", -+ "T88x_FRAG_QUADS_EZS_KILLED", -+ "T88x_FRAG_THREADS_LZS_TEST", -+ "T88x_FRAG_THREADS_LZS_KILLED", -+ "T88x_FRAG_CYCLES_NO_TILE", -+ "T88x_FRAG_NUM_TILES", -+ "T88x_FRAG_TRANS_ELIM", -+ "T88x_COMPUTE_ACTIVE", -+ "T88x_COMPUTE_TASKS", -+ "T88x_COMPUTE_THREADS", -+ "T88x_COMPUTE_CYCLES_DESC", -+ "T88x_TRIPIPE_ACTIVE", -+ "T88x_ARITH_WORDS", -+ "T88x_ARITH_CYCLES_REG", -+ "T88x_ARITH_CYCLES_L0", -+ "T88x_ARITH_FRAG_DEPEND", -+ "T88x_LS_WORDS", -+ "T88x_LS_ISSUES", -+ "T88x_LS_REISSUE_ATTR", -+ "T88x_LS_REISSUES_VARY", -+ "T88x_LS_VARY_RV_MISS", -+ "T88x_LS_VARY_RV_HIT", -+ "T88x_LS_NO_UNPARK", -+ "T88x_TEX_WORDS", -+ "T88x_TEX_BUBBLES", -+ "T88x_TEX_WORDS_L0", -+ "T88x_TEX_WORDS_DESC", -+ "T88x_TEX_ISSUES", -+ "T88x_TEX_RECIRC_FMISS", -+ "T88x_TEX_RECIRC_DESC", -+ "T88x_TEX_RECIRC_MULTI", -+ "T88x_TEX_RECIRC_PMISS", -+ "T88x_TEX_RECIRC_CONF", -+ "T88x_LSC_READ_HITS", -+ "T88x_LSC_READ_OP", -+ "T88x_LSC_WRITE_HITS", -+ "T88x_LSC_WRITE_OP", -+ "T88x_LSC_ATOMIC_HITS", -+ "T88x_LSC_ATOMIC_OP", -+ "T88x_LSC_LINE_FETCHES", -+ "T88x_LSC_DIRTY_LINE", -+ "T88x_LSC_SNOOPS", -+ "T88x_AXI_TLB_STALL", -+ "T88x_AXI_TLB_MISS", -+ "T88x_AXI_TLB_TRANSACTION", -+ "T88x_LS_TLB_MISS", -+ "T88x_LS_TLB_HIT", -+ "T88x_AXI_BEATS_READ", -+ "T88x_AXI_BEATS_WRITTEN", ++void kbase_ctx_sched_term(struct kbase_device *kbdev) ++{ ++ s8 i; + -+ /*L2 and MMU */ -+ "", -+ "", -+ "", -+ "", -+ "T88x_MMU_HIT", -+ "T88x_MMU_NEW_MISS", -+ "T88x_MMU_REPLAY_FULL", -+ "T88x_MMU_REPLAY_MISS", -+ "T88x_MMU_TABLE_WALK", -+ "T88x_MMU_REQUESTS", -+ "", -+ "", -+ "T88x_UTLB_HIT", -+ "T88x_UTLB_NEW_MISS", -+ "T88x_UTLB_REPLAY_FULL", -+ "T88x_UTLB_REPLAY_MISS", -+ "T88x_UTLB_STALL", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "T88x_L2_EXT_WRITE_BEATS", -+ "T88x_L2_EXT_READ_BEATS", -+ "T88x_L2_ANY_LOOKUP", -+ "T88x_L2_READ_LOOKUP", -+ "T88x_L2_SREAD_LOOKUP", -+ "T88x_L2_READ_REPLAY", -+ "T88x_L2_READ_SNOOP", -+ "T88x_L2_READ_HIT", -+ "T88x_L2_CLEAN_MISS", -+ "T88x_L2_WRITE_LOOKUP", -+ "T88x_L2_SWRITE_LOOKUP", -+ "T88x_L2_WRITE_REPLAY", -+ "T88x_L2_WRITE_SNOOP", -+ "T88x_L2_WRITE_HIT", -+ "T88x_L2_EXT_READ_FULL", -+ "", -+ "T88x_L2_EXT_WRITE_FULL", -+ "T88x_L2_EXT_R_W_HAZARD", -+ "T88x_L2_EXT_READ", -+ "T88x_L2_EXT_READ_LINE", -+ "T88x_L2_EXT_WRITE", -+ "T88x_L2_EXT_WRITE_LINE", -+ "T88x_L2_EXT_WRITE_SMALL", -+ "T88x_L2_EXT_BARRIER", -+ "T88x_L2_EXT_AR_STALL", -+ "T88x_L2_EXT_R_BUF_FULL", -+ "T88x_L2_EXT_RD_BUF_FULL", -+ "T88x_L2_EXT_R_RAW", -+ "T88x_L2_EXT_W_STALL", -+ "T88x_L2_EXT_W_BUF_FULL", -+ "T88x_L2_EXT_R_BUF_FULL", -+ "T88x_L2_TAG_HAZARD", -+ "T88x_L2_SNOOP_FULL", -+ "T88x_L2_REPLAY_FULL" -+}; ++ /* Sanity checks */ ++ for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) { ++ WARN_ON(kbdev->as_to_kctx[i] != NULL); ++ WARN_ON(!(kbdev->as_free & (1u << i))); ++ } ++} + -+#include "mali_kbase_gator_hwcnt_names_tmix.h" ++/* kbasep_ctx_sched_find_as_for_ctx - Find a free address space ++ * ++ * @kbdev: The context for which to find a free address space ++ * ++ * Return: A valid AS if successful, otherwise KBASEP_AS_NR_INVALID ++ * ++ * This function returns an address space available for use. It would prefer ++ * returning an AS that has been previously assigned to the context to ++ * avoid having to reprogram the MMU. ++ */ ++static int kbasep_ctx_sched_find_as_for_ctx(struct kbase_context *kctx) ++{ ++ struct kbase_device *const kbdev = kctx->kbdev; ++ int free_as; + -+#include "mali_kbase_gator_hwcnt_names_thex.h" ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+#include "mali_kbase_gator_hwcnt_names_tsix.h" ++ /* First check if the previously assigned AS is available */ ++ if ((kctx->as_nr != KBASEP_AS_NR_INVALID) && ++ (kbdev->as_free & (1u << kctx->as_nr))) ++ return kctx->as_nr; + ++ /* The previously assigned AS was taken, we'll be returning any free ++ * AS at this point. ++ */ ++ free_as = ffs(kbdev->as_free) - 1; ++ if (free_as >= 0 && free_as < kbdev->nr_hw_address_spaces) ++ return free_as; + -+#ifdef MALI_INCLUDE_TKAX -+#include "mali_kbase_gator_hwcnt_names_tkax.h" -+#endif /* MALI_INCLUDE_TKAX */ ++ return KBASEP_AS_NR_INVALID; ++} + -+#endif -diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h -new file mode 100644 -index 000000000..bcceef4fc ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h -@@ -0,0 +1,291 @@ -+/* -+ * -+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx) ++{ ++ struct kbase_device *const kbdev = kctx->kbdev; + ++ lockdep_assert_held(&kbdev->mmu_hw_mutex); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + ++ WARN_ON(!kbdev->pm.backend.gpu_powered); + -+/* -+ * This header was autogenerated, it should not be edited. -+ */ ++ if (atomic_inc_return(&kctx->refcount) == 1) { ++ int const free_as = kbasep_ctx_sched_find_as_for_ctx(kctx); + -+#ifndef _KBASE_GATOR_HWCNT_NAMES_THEX_H_ -+#define _KBASE_GATOR_HWCNT_NAMES_THEX_H_ ++ if (free_as != KBASEP_AS_NR_INVALID) { ++ kbdev->as_free &= ~(1u << free_as); ++ /* Only program the MMU if the context has not been ++ * assigned the same address space before. ++ */ ++ if (free_as != kctx->as_nr) { ++ struct kbase_context *const prev_kctx = ++ kbdev->as_to_kctx[free_as]; + -+static const char * const hardware_counters_mali_tHEx[] = { -+ /* Performance counters for the Job Manager */ -+ "", -+ "", -+ "", -+ "", -+ "THEx_MESSAGES_SENT", -+ "THEx_MESSAGES_RECEIVED", -+ "THEx_GPU_ACTIVE", -+ "THEx_IRQ_ACTIVE", -+ "THEx_JS0_JOBS", -+ "THEx_JS0_TASKS", -+ "THEx_JS0_ACTIVE", -+ "", -+ "THEx_JS0_WAIT_READ", -+ "THEx_JS0_WAIT_ISSUE", -+ "THEx_JS0_WAIT_DEPEND", -+ "THEx_JS0_WAIT_FINISH", -+ "THEx_JS1_JOBS", -+ "THEx_JS1_TASKS", -+ "THEx_JS1_ACTIVE", -+ "", -+ "THEx_JS1_WAIT_READ", -+ "THEx_JS1_WAIT_ISSUE", -+ "THEx_JS1_WAIT_DEPEND", -+ "THEx_JS1_WAIT_FINISH", -+ "THEx_JS2_JOBS", -+ "THEx_JS2_TASKS", -+ "THEx_JS2_ACTIVE", -+ "", -+ "THEx_JS2_WAIT_READ", -+ "THEx_JS2_WAIT_ISSUE", -+ "THEx_JS2_WAIT_DEPEND", -+ "THEx_JS2_WAIT_FINISH", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", ++ if (prev_kctx) { ++ WARN_ON(atomic_read(&prev_kctx->refcount) != 0); ++ kbase_mmu_disable(prev_kctx); ++ prev_kctx->as_nr = KBASEP_AS_NR_INVALID; ++ } + -+ /* Performance counters for the Tiler */ -+ "", -+ "", -+ "", -+ "", -+ "THEx_TILER_ACTIVE", -+ "THEx_JOBS_PROCESSED", -+ "THEx_TRIANGLES", -+ "THEx_LINES", -+ "THEx_POINTS", -+ "THEx_FRONT_FACING", -+ "THEx_BACK_FACING", -+ "THEx_PRIM_VISIBLE", -+ "THEx_PRIM_CULLED", -+ "THEx_PRIM_CLIPPED", -+ "THEx_PRIM_SAT_CULLED", -+ "", -+ "", -+ "THEx_BUS_READ", -+ "", -+ "THEx_BUS_WRITE", -+ "THEx_LOADING_DESC", -+ "THEx_IDVS_POS_SHAD_REQ", -+ "THEx_IDVS_POS_SHAD_WAIT", -+ "THEx_IDVS_POS_SHAD_STALL", -+ "THEx_IDVS_POS_FIFO_FULL", -+ "THEx_PREFETCH_STALL", -+ "THEx_VCACHE_HIT", -+ "THEx_VCACHE_MISS", -+ "THEx_VCACHE_LINE_WAIT", -+ "THEx_VFETCH_POS_READ_WAIT", -+ "THEx_VFETCH_VERTEX_WAIT", -+ "THEx_VFETCH_STALL", -+ "THEx_PRIMASSY_STALL", -+ "THEx_BBOX_GEN_STALL", -+ "THEx_IDVS_VBU_HIT", -+ "THEx_IDVS_VBU_MISS", -+ "THEx_IDVS_VBU_LINE_DEALLOCATE", -+ "THEx_IDVS_VAR_SHAD_REQ", -+ "THEx_IDVS_VAR_SHAD_STALL", -+ "THEx_BINNER_STALL", -+ "THEx_ITER_STALL", -+ "THEx_COMPRESS_MISS", -+ "THEx_COMPRESS_STALL", -+ "THEx_PCACHE_HIT", -+ "THEx_PCACHE_MISS", -+ "THEx_PCACHE_MISS_STALL", -+ "THEx_PCACHE_EVICT_STALL", -+ "THEx_PMGR_PTR_WR_STALL", -+ "THEx_PMGR_PTR_RD_STALL", -+ "THEx_PMGR_CMD_WR_STALL", -+ "THEx_WRBUF_ACTIVE", -+ "THEx_WRBUF_HIT", -+ "THEx_WRBUF_MISS", -+ "THEx_WRBUF_NO_FREE_LINE_STALL", -+ "THEx_WRBUF_NO_AXI_ID_STALL", -+ "THEx_WRBUF_AXI_STALL", -+ "", -+ "", -+ "", -+ "THEx_UTLB_TRANS", -+ "THEx_UTLB_TRANS_HIT", -+ "THEx_UTLB_TRANS_STALL", -+ "THEx_UTLB_TRANS_MISS_DELAY", -+ "THEx_UTLB_MMU_REQ", ++ kctx->as_nr = free_as; ++ kbdev->as_to_kctx[free_as] = kctx; ++ kbase_mmu_update(kctx); ++ } ++ } else { ++ atomic_dec(&kctx->refcount); + -+ /* Performance counters for the Shader Core */ -+ "", -+ "", -+ "", -+ "", -+ "THEx_FRAG_ACTIVE", -+ "THEx_FRAG_PRIMITIVES", -+ "THEx_FRAG_PRIM_RAST", -+ "THEx_FRAG_FPK_ACTIVE", -+ "THEx_FRAG_STARVING", -+ "THEx_FRAG_WARPS", -+ "THEx_FRAG_PARTIAL_WARPS", -+ "THEx_FRAG_QUADS_RAST", -+ "THEx_FRAG_QUADS_EZS_TEST", -+ "THEx_FRAG_QUADS_EZS_UPDATE", -+ "THEx_FRAG_QUADS_EZS_KILL", -+ "THEx_FRAG_LZS_TEST", -+ "THEx_FRAG_LZS_KILL", -+ "", -+ "THEx_FRAG_PTILES", -+ "THEx_FRAG_TRANS_ELIM", -+ "THEx_QUAD_FPK_KILLER", -+ "", -+ "THEx_COMPUTE_ACTIVE", -+ "THEx_COMPUTE_TASKS", -+ "THEx_COMPUTE_WARPS", -+ "THEx_COMPUTE_STARVING", -+ "THEx_EXEC_CORE_ACTIVE", -+ "THEx_EXEC_ACTIVE", -+ "THEx_EXEC_INSTR_COUNT", -+ "THEx_EXEC_INSTR_DIVERGED", -+ "THEx_EXEC_INSTR_STARVING", -+ "THEx_ARITH_INSTR_SINGLE_FMA", -+ "THEx_ARITH_INSTR_DOUBLE", -+ "THEx_ARITH_INSTR_MSG", -+ "THEx_ARITH_INSTR_MSG_ONLY", -+ "THEx_TEX_INSTR", -+ "THEx_TEX_INSTR_MIPMAP", -+ "THEx_TEX_INSTR_COMPRESSED", -+ "THEx_TEX_INSTR_3D", -+ "THEx_TEX_INSTR_TRILINEAR", -+ "THEx_TEX_COORD_ISSUE", -+ "THEx_TEX_COORD_STALL", -+ "THEx_TEX_STARVE_CACHE", -+ "THEx_TEX_STARVE_FILTER", -+ "THEx_LS_MEM_READ_FULL", -+ "THEx_LS_MEM_READ_SHORT", -+ "THEx_LS_MEM_WRITE_FULL", -+ "THEx_LS_MEM_WRITE_SHORT", -+ "THEx_LS_MEM_ATOMIC", -+ "THEx_VARY_INSTR", -+ "THEx_VARY_SLOT_32", -+ "THEx_VARY_SLOT_16", -+ "THEx_ATTR_INSTR", -+ "THEx_ARITH_INSTR_FP_MUL", -+ "THEx_BEATS_RD_FTC", -+ "THEx_BEATS_RD_FTC_EXT", -+ "THEx_BEATS_RD_LSC", -+ "THEx_BEATS_RD_LSC_EXT", -+ "THEx_BEATS_RD_TEX", -+ "THEx_BEATS_RD_TEX_EXT", -+ "THEx_BEATS_RD_OTHER", -+ "THEx_BEATS_WR_LSC", -+ "THEx_BEATS_WR_TIB", -+ "", ++ /* Failed to find an available address space, we must ++ * be returning an error at this point. ++ */ ++ WARN_ON(kctx->as_nr != KBASEP_AS_NR_INVALID); ++ } ++ } + -+ /* Performance counters for the Memory System */ -+ "", -+ "", -+ "", -+ "", -+ "THEx_MMU_REQUESTS", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "THEx_L2_RD_MSG_IN", -+ "THEx_L2_RD_MSG_IN_STALL", -+ "THEx_L2_WR_MSG_IN", -+ "THEx_L2_WR_MSG_IN_STALL", -+ "THEx_L2_SNP_MSG_IN", -+ "THEx_L2_SNP_MSG_IN_STALL", -+ "THEx_L2_RD_MSG_OUT", -+ "THEx_L2_RD_MSG_OUT_STALL", -+ "THEx_L2_WR_MSG_OUT", -+ "THEx_L2_ANY_LOOKUP", -+ "THEx_L2_READ_LOOKUP", -+ "THEx_L2_WRITE_LOOKUP", -+ "THEx_L2_EXT_SNOOP_LOOKUP", -+ "THEx_L2_EXT_READ", -+ "THEx_L2_EXT_READ_NOSNP", -+ "THEx_L2_EXT_READ_UNIQUE", -+ "THEx_L2_EXT_READ_BEATS", -+ "THEx_L2_EXT_AR_STALL", -+ "THEx_L2_EXT_AR_CNT_Q1", -+ "THEx_L2_EXT_AR_CNT_Q2", -+ "THEx_L2_EXT_AR_CNT_Q3", -+ "THEx_L2_EXT_RRESP_0_127", -+ "THEx_L2_EXT_RRESP_128_191", -+ "THEx_L2_EXT_RRESP_192_255", -+ "THEx_L2_EXT_RRESP_256_319", -+ "THEx_L2_EXT_RRESP_320_383", -+ "THEx_L2_EXT_WRITE", -+ "THEx_L2_EXT_WRITE_NOSNP_FULL", -+ "THEx_L2_EXT_WRITE_NOSNP_PTL", -+ "THEx_L2_EXT_WRITE_SNP_FULL", -+ "THEx_L2_EXT_WRITE_SNP_PTL", -+ "THEx_L2_EXT_WRITE_BEATS", -+ "THEx_L2_EXT_W_STALL", -+ "THEx_L2_EXT_AW_CNT_Q1", -+ "THEx_L2_EXT_AW_CNT_Q2", -+ "THEx_L2_EXT_AW_CNT_Q3", -+ "THEx_L2_EXT_SNOOP", -+ "THEx_L2_EXT_SNOOP_STALL", -+ "THEx_L2_EXT_SNOOP_RESP_CLEAN", -+ "THEx_L2_EXT_SNOOP_RESP_DATA", -+ "THEx_L2_EXT_SNOOP_INTERNAL", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+}; ++ return kctx->as_nr; ++} + -+#endif /* _KBASE_GATOR_HWCNT_NAMES_THEX_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h -new file mode 100644 -index 000000000..5ea06770f ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h -@@ -0,0 +1,291 @@ -+/* -+ * -+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++int kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx) ++{ ++ struct kbase_device *const kbdev = kctx->kbdev; + ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ WARN_ON(atomic_read(&kctx->refcount) == 0); ++ if (atomic_read(&kctx->refcount) == 0) ++ return -1; + ++ WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID); ++ WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx); + -+/* -+ * This header was autogenerated, it should not be edited. -+ */ ++ atomic_inc(&kctx->refcount); + -+#ifndef _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ -+#define _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ ++ return 0; ++} + -+static const char * const hardware_counters_mali_tMIx[] = { -+ /* Performance counters for the Job Manager */ -+ "", -+ "", -+ "", -+ "", -+ "TMIx_MESSAGES_SENT", -+ "TMIx_MESSAGES_RECEIVED", -+ "TMIx_GPU_ACTIVE", -+ "TMIx_IRQ_ACTIVE", -+ "TMIx_JS0_JOBS", -+ "TMIx_JS0_TASKS", -+ "TMIx_JS0_ACTIVE", -+ "", -+ "TMIx_JS0_WAIT_READ", -+ "TMIx_JS0_WAIT_ISSUE", -+ "TMIx_JS0_WAIT_DEPEND", -+ "TMIx_JS0_WAIT_FINISH", -+ "TMIx_JS1_JOBS", -+ "TMIx_JS1_TASKS", -+ "TMIx_JS1_ACTIVE", -+ "", -+ "TMIx_JS1_WAIT_READ", -+ "TMIx_JS1_WAIT_ISSUE", -+ "TMIx_JS1_WAIT_DEPEND", -+ "TMIx_JS1_WAIT_FINISH", -+ "TMIx_JS2_JOBS", -+ "TMIx_JS2_TASKS", -+ "TMIx_JS2_ACTIVE", -+ "", -+ "TMIx_JS2_WAIT_READ", -+ "TMIx_JS2_WAIT_ISSUE", -+ "TMIx_JS2_WAIT_DEPEND", -+ "TMIx_JS2_WAIT_FINISH", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", ++void kbase_ctx_sched_release_ctx(struct kbase_context *kctx) ++{ ++ struct kbase_device *const kbdev = kctx->kbdev; + -+ /* Performance counters for the Tiler */ -+ "", -+ "", -+ "", -+ "", -+ "TMIx_TILER_ACTIVE", -+ "TMIx_JOBS_PROCESSED", -+ "TMIx_TRIANGLES", -+ "TMIx_LINES", -+ "TMIx_POINTS", -+ "TMIx_FRONT_FACING", -+ "TMIx_BACK_FACING", -+ "TMIx_PRIM_VISIBLE", -+ "TMIx_PRIM_CULLED", -+ "TMIx_PRIM_CLIPPED", -+ "TMIx_PRIM_SAT_CULLED", -+ "", -+ "", -+ "TMIx_BUS_READ", -+ "", -+ "TMIx_BUS_WRITE", -+ "TMIx_LOADING_DESC", -+ "TMIx_IDVS_POS_SHAD_REQ", -+ "TMIx_IDVS_POS_SHAD_WAIT", -+ "TMIx_IDVS_POS_SHAD_STALL", -+ "TMIx_IDVS_POS_FIFO_FULL", -+ "TMIx_PREFETCH_STALL", -+ "TMIx_VCACHE_HIT", -+ "TMIx_VCACHE_MISS", -+ "TMIx_VCACHE_LINE_WAIT", -+ "TMIx_VFETCH_POS_READ_WAIT", -+ "TMIx_VFETCH_VERTEX_WAIT", -+ "TMIx_VFETCH_STALL", -+ "TMIx_PRIMASSY_STALL", -+ "TMIx_BBOX_GEN_STALL", -+ "TMIx_IDVS_VBU_HIT", -+ "TMIx_IDVS_VBU_MISS", -+ "TMIx_IDVS_VBU_LINE_DEALLOCATE", -+ "TMIx_IDVS_VAR_SHAD_REQ", -+ "TMIx_IDVS_VAR_SHAD_STALL", -+ "TMIx_BINNER_STALL", -+ "TMIx_ITER_STALL", -+ "TMIx_COMPRESS_MISS", -+ "TMIx_COMPRESS_STALL", -+ "TMIx_PCACHE_HIT", -+ "TMIx_PCACHE_MISS", -+ "TMIx_PCACHE_MISS_STALL", -+ "TMIx_PCACHE_EVICT_STALL", -+ "TMIx_PMGR_PTR_WR_STALL", -+ "TMIx_PMGR_PTR_RD_STALL", -+ "TMIx_PMGR_CMD_WR_STALL", -+ "TMIx_WRBUF_ACTIVE", -+ "TMIx_WRBUF_HIT", -+ "TMIx_WRBUF_MISS", -+ "TMIx_WRBUF_NO_FREE_LINE_STALL", -+ "TMIx_WRBUF_NO_AXI_ID_STALL", -+ "TMIx_WRBUF_AXI_STALL", -+ "", -+ "", -+ "", -+ "TMIx_UTLB_TRANS", -+ "TMIx_UTLB_TRANS_HIT", -+ "TMIx_UTLB_TRANS_STALL", -+ "TMIx_UTLB_TRANS_MISS_DELAY", -+ "TMIx_UTLB_MMU_REQ", ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* Performance counters for the Shader Core */ -+ "", -+ "", -+ "", -+ "", -+ "TMIx_FRAG_ACTIVE", -+ "TMIx_FRAG_PRIMITIVES", -+ "TMIx_FRAG_PRIM_RAST", -+ "TMIx_FRAG_FPK_ACTIVE", -+ "TMIx_FRAG_STARVING", -+ "TMIx_FRAG_WARPS", -+ "TMIx_FRAG_PARTIAL_WARPS", -+ "TMIx_FRAG_QUADS_RAST", -+ "TMIx_FRAG_QUADS_EZS_TEST", -+ "TMIx_FRAG_QUADS_EZS_UPDATE", -+ "TMIx_FRAG_QUADS_EZS_KILL", -+ "TMIx_FRAG_LZS_TEST", -+ "TMIx_FRAG_LZS_KILL", -+ "", -+ "TMIx_FRAG_PTILES", -+ "TMIx_FRAG_TRANS_ELIM", -+ "TMIx_QUAD_FPK_KILLER", -+ "", -+ "TMIx_COMPUTE_ACTIVE", -+ "TMIx_COMPUTE_TASKS", -+ "TMIx_COMPUTE_WARPS", -+ "TMIx_COMPUTE_STARVING", -+ "TMIx_EXEC_CORE_ACTIVE", -+ "TMIx_EXEC_ACTIVE", -+ "TMIx_EXEC_INSTR_COUNT", -+ "TMIx_EXEC_INSTR_DIVERGED", -+ "TMIx_EXEC_INSTR_STARVING", -+ "TMIx_ARITH_INSTR_SINGLE_FMA", -+ "TMIx_ARITH_INSTR_DOUBLE", -+ "TMIx_ARITH_INSTR_MSG", -+ "TMIx_ARITH_INSTR_MSG_ONLY", -+ "TMIx_TEX_INSTR", -+ "TMIx_TEX_INSTR_MIPMAP", -+ "TMIx_TEX_INSTR_COMPRESSED", -+ "TMIx_TEX_INSTR_3D", -+ "TMIx_TEX_INSTR_TRILINEAR", -+ "TMIx_TEX_COORD_ISSUE", -+ "TMIx_TEX_COORD_STALL", -+ "TMIx_TEX_STARVE_CACHE", -+ "TMIx_TEX_STARVE_FILTER", -+ "TMIx_LS_MEM_READ_FULL", -+ "TMIx_LS_MEM_READ_SHORT", -+ "TMIx_LS_MEM_WRITE_FULL", -+ "TMIx_LS_MEM_WRITE_SHORT", -+ "TMIx_LS_MEM_ATOMIC", -+ "TMIx_VARY_INSTR", -+ "TMIx_VARY_SLOT_32", -+ "TMIx_VARY_SLOT_16", -+ "TMIx_ATTR_INSTR", -+ "TMIx_ARITH_INSTR_FP_MUL", -+ "TMIx_BEATS_RD_FTC", -+ "TMIx_BEATS_RD_FTC_EXT", -+ "TMIx_BEATS_RD_LSC", -+ "TMIx_BEATS_RD_LSC_EXT", -+ "TMIx_BEATS_RD_TEX", -+ "TMIx_BEATS_RD_TEX_EXT", -+ "TMIx_BEATS_RD_OTHER", -+ "TMIx_BEATS_WR_LSC", -+ "TMIx_BEATS_WR_TIB", -+ "", ++ if (atomic_dec_return(&kctx->refcount) == 0) ++ kbdev->as_free |= (1u << kctx->as_nr); ++} + -+ /* Performance counters for the Memory System */ -+ "", -+ "", -+ "", -+ "", -+ "TMIx_MMU_REQUESTS", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "TMIx_L2_RD_MSG_IN", -+ "TMIx_L2_RD_MSG_IN_STALL", -+ "TMIx_L2_WR_MSG_IN", -+ "TMIx_L2_WR_MSG_IN_STALL", -+ "TMIx_L2_SNP_MSG_IN", -+ "TMIx_L2_SNP_MSG_IN_STALL", -+ "TMIx_L2_RD_MSG_OUT", -+ "TMIx_L2_RD_MSG_OUT_STALL", -+ "TMIx_L2_WR_MSG_OUT", -+ "TMIx_L2_ANY_LOOKUP", -+ "TMIx_L2_READ_LOOKUP", -+ "TMIx_L2_WRITE_LOOKUP", -+ "TMIx_L2_EXT_SNOOP_LOOKUP", -+ "TMIx_L2_EXT_READ", -+ "TMIx_L2_EXT_READ_NOSNP", -+ "TMIx_L2_EXT_READ_UNIQUE", -+ "TMIx_L2_EXT_READ_BEATS", -+ "TMIx_L2_EXT_AR_STALL", -+ "TMIx_L2_EXT_AR_CNT_Q1", -+ "TMIx_L2_EXT_AR_CNT_Q2", -+ "TMIx_L2_EXT_AR_CNT_Q3", -+ "TMIx_L2_EXT_RRESP_0_127", -+ "TMIx_L2_EXT_RRESP_128_191", -+ "TMIx_L2_EXT_RRESP_192_255", -+ "TMIx_L2_EXT_RRESP_256_319", -+ "TMIx_L2_EXT_RRESP_320_383", -+ "TMIx_L2_EXT_WRITE", -+ "TMIx_L2_EXT_WRITE_NOSNP_FULL", -+ "TMIx_L2_EXT_WRITE_NOSNP_PTL", -+ "TMIx_L2_EXT_WRITE_SNP_FULL", -+ "TMIx_L2_EXT_WRITE_SNP_PTL", -+ "TMIx_L2_EXT_WRITE_BEATS", -+ "TMIx_L2_EXT_W_STALL", -+ "TMIx_L2_EXT_AW_CNT_Q1", -+ "TMIx_L2_EXT_AW_CNT_Q2", -+ "TMIx_L2_EXT_AW_CNT_Q3", -+ "TMIx_L2_EXT_SNOOP", -+ "TMIx_L2_EXT_SNOOP_STALL", -+ "TMIx_L2_EXT_SNOOP_RESP_CLEAN", -+ "TMIx_L2_EXT_SNOOP_RESP_DATA", -+ "TMIx_L2_EXT_SNOOP_INTERNAL", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+}; ++void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) ++{ ++ struct kbase_device *const kbdev = kctx->kbdev; + -+#endif /* _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h -new file mode 100644 -index 000000000..be09c4556 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h -@@ -0,0 +1,291 @@ -+/* -+ * -+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ lockdep_assert_held(&kbdev->mmu_hw_mutex); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + ++ WARN_ON(atomic_read(&kctx->refcount) != 0); + ++ if (kctx->as_nr != KBASEP_AS_NR_INVALID) { ++ if (kbdev->pm.backend.gpu_powered) ++ kbase_mmu_disable(kctx); + -+/* -+ * This header was autogenerated, it should not be edited. -+ */ ++ kbdev->as_to_kctx[kctx->as_nr] = NULL; ++ kctx->as_nr = KBASEP_AS_NR_INVALID; ++ } ++} + -+#ifndef _KBASE_GATOR_HWCNT_NAMES_TSIX_H_ -+#define _KBASE_GATOR_HWCNT_NAMES_TSIX_H_ ++void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) ++{ ++ s8 i; + -+static const char * const hardware_counters_mali_tSIx[] = { -+ /* Performance counters for the Job Manager */ -+ "", -+ "", -+ "", -+ "", -+ "TSIx_MESSAGES_SENT", -+ "TSIx_MESSAGES_RECEIVED", -+ "TSIx_GPU_ACTIVE", -+ "TSIx_IRQ_ACTIVE", -+ "TSIx_JS0_JOBS", -+ "TSIx_JS0_TASKS", -+ "TSIx_JS0_ACTIVE", -+ "", -+ "TSIx_JS0_WAIT_READ", -+ "TSIx_JS0_WAIT_ISSUE", -+ "TSIx_JS0_WAIT_DEPEND", -+ "TSIx_JS0_WAIT_FINISH", -+ "TSIx_JS1_JOBS", -+ "TSIx_JS1_TASKS", -+ "TSIx_JS1_ACTIVE", -+ "", -+ "TSIx_JS1_WAIT_READ", -+ "TSIx_JS1_WAIT_ISSUE", -+ "TSIx_JS1_WAIT_DEPEND", -+ "TSIx_JS1_WAIT_FINISH", -+ "TSIx_JS2_JOBS", -+ "TSIx_JS2_TASKS", -+ "TSIx_JS2_ACTIVE", -+ "", -+ "TSIx_JS2_WAIT_READ", -+ "TSIx_JS2_WAIT_ISSUE", -+ "TSIx_JS2_WAIT_DEPEND", -+ "TSIx_JS2_WAIT_FINISH", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", ++ lockdep_assert_held(&kbdev->mmu_hw_mutex); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* Performance counters for the Tiler */ -+ "", -+ "", -+ "", -+ "", -+ "TSIx_TILER_ACTIVE", -+ "TSIx_JOBS_PROCESSED", -+ "TSIx_TRIANGLES", -+ "TSIx_LINES", -+ "TSIx_POINTS", -+ "TSIx_FRONT_FACING", -+ "TSIx_BACK_FACING", -+ "TSIx_PRIM_VISIBLE", -+ "TSIx_PRIM_CULLED", -+ "TSIx_PRIM_CLIPPED", -+ "TSIx_PRIM_SAT_CULLED", -+ "", -+ "", -+ "TSIx_BUS_READ", -+ "", -+ "TSIx_BUS_WRITE", -+ "TSIx_LOADING_DESC", -+ "TSIx_IDVS_POS_SHAD_REQ", -+ "TSIx_IDVS_POS_SHAD_WAIT", -+ "TSIx_IDVS_POS_SHAD_STALL", -+ "TSIx_IDVS_POS_FIFO_FULL", -+ "TSIx_PREFETCH_STALL", -+ "TSIx_VCACHE_HIT", -+ "TSIx_VCACHE_MISS", -+ "TSIx_VCACHE_LINE_WAIT", -+ "TSIx_VFETCH_POS_READ_WAIT", -+ "TSIx_VFETCH_VERTEX_WAIT", -+ "TSIx_VFETCH_STALL", -+ "TSIx_PRIMASSY_STALL", -+ "TSIx_BBOX_GEN_STALL", -+ "TSIx_IDVS_VBU_HIT", -+ "TSIx_IDVS_VBU_MISS", -+ "TSIx_IDVS_VBU_LINE_DEALLOCATE", -+ "TSIx_IDVS_VAR_SHAD_REQ", -+ "TSIx_IDVS_VAR_SHAD_STALL", -+ "TSIx_BINNER_STALL", -+ "TSIx_ITER_STALL", -+ "TSIx_COMPRESS_MISS", -+ "TSIx_COMPRESS_STALL", -+ "TSIx_PCACHE_HIT", -+ "TSIx_PCACHE_MISS", -+ "TSIx_PCACHE_MISS_STALL", -+ "TSIx_PCACHE_EVICT_STALL", -+ "TSIx_PMGR_PTR_WR_STALL", -+ "TSIx_PMGR_PTR_RD_STALL", -+ "TSIx_PMGR_CMD_WR_STALL", -+ "TSIx_WRBUF_ACTIVE", -+ "TSIx_WRBUF_HIT", -+ "TSIx_WRBUF_MISS", -+ "TSIx_WRBUF_NO_FREE_LINE_STALL", -+ "TSIx_WRBUF_NO_AXI_ID_STALL", -+ "TSIx_WRBUF_AXI_STALL", -+ "", -+ "", -+ "", -+ "TSIx_UTLB_TRANS", -+ "TSIx_UTLB_TRANS_HIT", -+ "TSIx_UTLB_TRANS_STALL", -+ "TSIx_UTLB_TRANS_MISS_DELAY", -+ "TSIx_UTLB_MMU_REQ", ++ WARN_ON(!kbdev->pm.backend.gpu_powered); + -+ /* Performance counters for the Shader Core */ -+ "", -+ "", -+ "", -+ "", -+ "TSIx_FRAG_ACTIVE", -+ "TSIx_FRAG_PRIMITIVES", -+ "TSIx_FRAG_PRIM_RAST", -+ "TSIx_FRAG_FPK_ACTIVE", -+ "TSIx_FRAG_STARVING", -+ "TSIx_FRAG_WARPS", -+ "TSIx_FRAG_PARTIAL_WARPS", -+ "TSIx_FRAG_QUADS_RAST", -+ "TSIx_FRAG_QUADS_EZS_TEST", -+ "TSIx_FRAG_QUADS_EZS_UPDATE", -+ "TSIx_FRAG_QUADS_EZS_KILL", -+ "TSIx_FRAG_LZS_TEST", -+ "TSIx_FRAG_LZS_KILL", -+ "", -+ "TSIx_FRAG_PTILES", -+ "TSIx_FRAG_TRANS_ELIM", -+ "TSIx_QUAD_FPK_KILLER", -+ "", -+ "TSIx_COMPUTE_ACTIVE", -+ "TSIx_COMPUTE_TASKS", -+ "TSIx_COMPUTE_WARPS", -+ "TSIx_COMPUTE_STARVING", -+ "TSIx_EXEC_CORE_ACTIVE", -+ "TSIx_EXEC_ACTIVE", -+ "TSIx_EXEC_INSTR_COUNT", -+ "TSIx_EXEC_INSTR_DIVERGED", -+ "TSIx_EXEC_INSTR_STARVING", -+ "TSIx_ARITH_INSTR_SINGLE_FMA", -+ "TSIx_ARITH_INSTR_DOUBLE", -+ "TSIx_ARITH_INSTR_MSG", -+ "TSIx_ARITH_INSTR_MSG_ONLY", -+ "TSIx_TEX_MSGI_NUM_QUADS", -+ "TSIx_TEX_DFCH_NUM_PASSES", -+ "TSIx_TEX_DFCH_NUM_PASSES_MISS", -+ "TSIx_TEX_DFCH_NUM_PASSES_MIP_MAP", -+ "TSIx_TEX_TIDX_NUM_SPLIT_MIP_MAP", -+ "TSIx_TEX_TFCH_NUM_LINES_FETCHED", -+ "TSIx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED", -+ "TSIx_TEX_TFCH_NUM_OPERATIONS", -+ "TSIx_TEX_FILT_NUM_OPERATIONS", -+ "TSIx_LS_MEM_READ_FULL", -+ "TSIx_LS_MEM_READ_SHORT", -+ "TSIx_LS_MEM_WRITE_FULL", -+ "TSIx_LS_MEM_WRITE_SHORT", -+ "TSIx_LS_MEM_ATOMIC", -+ "TSIx_VARY_INSTR", -+ "TSIx_VARY_SLOT_32", -+ "TSIx_VARY_SLOT_16", -+ "TSIx_ATTR_INSTR", -+ "TSIx_ARITH_INSTR_FP_MUL", -+ "TSIx_BEATS_RD_FTC", -+ "TSIx_BEATS_RD_FTC_EXT", -+ "TSIx_BEATS_RD_LSC", -+ "TSIx_BEATS_RD_LSC_EXT", -+ "TSIx_BEATS_RD_TEX", -+ "TSIx_BEATS_RD_TEX_EXT", -+ "TSIx_BEATS_RD_OTHER", -+ "TSIx_BEATS_WR_LSC", -+ "TSIx_BEATS_WR_TIB", -+ "", ++ for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) { ++ struct kbase_context *kctx; + -+ /* Performance counters for the Memory System */ -+ "", -+ "", -+ "", -+ "", -+ "TSIx_MMU_REQUESTS", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "TSIx_L2_RD_MSG_IN", -+ "TSIx_L2_RD_MSG_IN_STALL", -+ "TSIx_L2_WR_MSG_IN", -+ "TSIx_L2_WR_MSG_IN_STALL", -+ "TSIx_L2_SNP_MSG_IN", -+ "TSIx_L2_SNP_MSG_IN_STALL", -+ "TSIx_L2_RD_MSG_OUT", -+ "TSIx_L2_RD_MSG_OUT_STALL", -+ "TSIx_L2_WR_MSG_OUT", -+ "TSIx_L2_ANY_LOOKUP", -+ "TSIx_L2_READ_LOOKUP", -+ "TSIx_L2_WRITE_LOOKUP", -+ "TSIx_L2_EXT_SNOOP_LOOKUP", -+ "TSIx_L2_EXT_READ", -+ "TSIx_L2_EXT_READ_NOSNP", -+ "TSIx_L2_EXT_READ_UNIQUE", -+ "TSIx_L2_EXT_READ_BEATS", -+ "TSIx_L2_EXT_AR_STALL", -+ "TSIx_L2_EXT_AR_CNT_Q1", -+ "TSIx_L2_EXT_AR_CNT_Q2", -+ "TSIx_L2_EXT_AR_CNT_Q3", -+ "TSIx_L2_EXT_RRESP_0_127", -+ "TSIx_L2_EXT_RRESP_128_191", -+ "TSIx_L2_EXT_RRESP_192_255", -+ "TSIx_L2_EXT_RRESP_256_319", -+ "TSIx_L2_EXT_RRESP_320_383", -+ "TSIx_L2_EXT_WRITE", -+ "TSIx_L2_EXT_WRITE_NOSNP_FULL", -+ "TSIx_L2_EXT_WRITE_NOSNP_PTL", -+ "TSIx_L2_EXT_WRITE_SNP_FULL", -+ "TSIx_L2_EXT_WRITE_SNP_PTL", -+ "TSIx_L2_EXT_WRITE_BEATS", -+ "TSIx_L2_EXT_W_STALL", -+ "TSIx_L2_EXT_AW_CNT_Q1", -+ "TSIx_L2_EXT_AW_CNT_Q2", -+ "TSIx_L2_EXT_AW_CNT_Q3", -+ "TSIx_L2_EXT_SNOOP", -+ "TSIx_L2_EXT_SNOOP_STALL", -+ "TSIx_L2_EXT_SNOOP_RESP_CLEAN", -+ "TSIx_L2_EXT_SNOOP_RESP_DATA", -+ "TSIx_L2_EXT_SNOOP_INTERNAL", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+ "", -+}; ++ kctx = kbdev->as_to_kctx[i]; ++ if (kctx) { ++ if (atomic_read(&kctx->refcount)) { ++ WARN_ON(kctx->as_nr != i); + -+#endif /* _KBASE_GATOR_HWCNT_NAMES_TSIX_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h ++ kbase_mmu_update(kctx); ++ } else { ++ /* This context might have been assigned an ++ * AS before, clear it. ++ */ ++ kbdev->as_to_kctx[kctx->as_nr] = NULL; ++ kctx->as_nr = KBASEP_AS_NR_INVALID; ++ } ++ } else { ++ kbase_mmu_disable_as(kbdev, i); ++ } ++ } ++} +diff --git a/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h new file mode 100644 -index 000000000..42f0111c4 +index 000000000..47474fecc --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h -@@ -0,0 +1,123 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h +@@ -0,0 +1,134 @@ +/* + * -+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -368284,122 +368664,133 @@ index 000000000..42f0111c4 + */ + + -+#ifndef _KBASE_GPU_ID_H_ -+#define _KBASE_GPU_ID_H_ -+ -+/* GPU_ID register */ -+#define GPU_ID_VERSION_STATUS_SHIFT 0 -+#define GPU_ID_VERSION_MINOR_SHIFT 4 -+#define GPU_ID_VERSION_MAJOR_SHIFT 12 -+#define GPU_ID_VERSION_PRODUCT_ID_SHIFT 16 -+#define GPU_ID_VERSION_STATUS (0xF << GPU_ID_VERSION_STATUS_SHIFT) -+#define GPU_ID_VERSION_MINOR (0xFF << GPU_ID_VERSION_MINOR_SHIFT) -+#define GPU_ID_VERSION_MAJOR (0xF << GPU_ID_VERSION_MAJOR_SHIFT) -+#define GPU_ID_VERSION_PRODUCT_ID (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT) -+ -+/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */ -+#define GPU_ID_PI_T60X 0x6956 -+#define GPU_ID_PI_T62X 0x0620 -+#define GPU_ID_PI_T76X 0x0750 -+#define GPU_ID_PI_T72X 0x0720 -+#define GPU_ID_PI_TFRX 0x0880 -+#define GPU_ID_PI_T86X 0x0860 -+#define GPU_ID_PI_T82X 0x0820 -+#define GPU_ID_PI_T83X 0x0830 + -+/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */ -+#define GPU_ID_PI_NEW_FORMAT_START 0x1000 -+#define GPU_ID_IS_NEW_FORMAT(product_id) ((product_id) != GPU_ID_PI_T60X && \ -+ (product_id) >= \ -+ GPU_ID_PI_NEW_FORMAT_START) ++#ifndef _KBASE_CTX_SCHED_H_ ++#define _KBASE_CTX_SCHED_H_ + -+#define GPU_ID2_VERSION_STATUS_SHIFT 0 -+#define GPU_ID2_VERSION_MINOR_SHIFT 4 -+#define GPU_ID2_VERSION_MAJOR_SHIFT 12 -+#define GPU_ID2_PRODUCT_MAJOR_SHIFT 16 -+#define GPU_ID2_ARCH_REV_SHIFT 20 -+#define GPU_ID2_ARCH_MINOR_SHIFT 24 -+#define GPU_ID2_ARCH_MAJOR_SHIFT 28 -+#define GPU_ID2_VERSION_STATUS (0xF << GPU_ID2_VERSION_STATUS_SHIFT) -+#define GPU_ID2_VERSION_MINOR (0xFF << GPU_ID2_VERSION_MINOR_SHIFT) -+#define GPU_ID2_VERSION_MAJOR (0xF << GPU_ID2_VERSION_MAJOR_SHIFT) -+#define GPU_ID2_PRODUCT_MAJOR (0xF << GPU_ID2_PRODUCT_MAJOR_SHIFT) -+#define GPU_ID2_ARCH_REV (0xF << GPU_ID2_ARCH_REV_SHIFT) -+#define GPU_ID2_ARCH_MINOR (0xF << GPU_ID2_ARCH_MINOR_SHIFT) -+#define GPU_ID2_ARCH_MAJOR (0xF << GPU_ID2_ARCH_MAJOR_SHIFT) -+#define GPU_ID2_PRODUCT_MODEL (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR) -+#define GPU_ID2_VERSION (GPU_ID2_VERSION_MAJOR | \ -+ GPU_ID2_VERSION_MINOR | \ -+ GPU_ID2_VERSION_STATUS) ++#include + -+/* Helper macro to create a partial GPU_ID (new format) that defines -+ a product ignoring its version. */ -+#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \ -+ (((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ -+ ((arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) | \ -+ ((arch_rev) << GPU_ID2_ARCH_REV_SHIFT) | \ -+ ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) ++/* The Context Scheduler manages address space assignment and reference ++ * counting to kbase_context. The interface has been designed to minimise ++ * interactions between the Job Scheduler and Power Management/MMU to support ++ * both the existing Job Scheduler and Command Stream Frontend interface. ++ * ++ * The initial implementation of the Context Scheduler does not schedule ++ * contexts. Instead it relies on the Job Scheduler/CSF to make decisions of ++ * when to schedule/evict contexts if address spaces are starved. In the ++ * future, once an interface between the CS and JS/CSF have been devised to ++ * provide enough information about how each context is consuming GPU resources, ++ * those decisions can be made in the CS itself, thereby reducing duplicated ++ * code. ++ */ + -+/* Helper macro to create a partial GPU_ID (new format) that specifies the -+ revision (major, minor, status) of a product */ -+#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \ -+ (((version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) | \ -+ ((version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) | \ -+ ((version_status) << GPU_ID2_VERSION_STATUS_SHIFT)) ++/* base_ctx_sched_init - Initialise the context scheduler ++ * ++ * @kbdev: The device for which the context scheduler needs to be ++ * initialised ++ * ++ * Return: 0 for success, otherwise failure ++ * ++ * This must be called during device initilisation. The number of hardware ++ * address spaces must already be established before calling this function. ++ */ ++int kbase_ctx_sched_init(struct kbase_device *kbdev); + -+/* Helper macro to create a complete GPU_ID (new format) */ -+#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \ -+ version_major, version_minor, version_status) \ -+ (GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \ -+ product_major) | \ -+ GPU_ID2_VERSION_MAKE(version_major, version_minor, \ -+ version_status)) ++/* base_ctx_sched_term - Terminate the context scheduler ++ * ++ * @kbdev: The device for which the context scheduler needs to be ++ * terminated ++ * ++ * This must be called during device termination after all contexts have been ++ * destroyed. ++ */ ++void kbase_ctx_sched_term(struct kbase_device *kbdev); + -+/* Helper macro to create a partial GPU_ID (new format) that identifies -+ a particular GPU model by its arch_major and product_major. */ -+#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \ -+ (((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ -+ ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) ++/* kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context ++ * ++ * @kctx: The context to which to retain a reference ++ * ++ * Return: The address space that the context has been assigned to or ++ * KBASEP_AS_NR_INVALID if no address space was available. ++ * ++ * This function should be called whenever an address space should be assigned ++ * to a context and programmed onto the MMU. It should typically be called ++ * when jobs are ready to be submitted to the GPU. ++ * ++ * It can be called as many times as necessary. The address space will be ++ * assigned to the context for as long as there is a reference to said context. ++ * ++ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be ++ * held whilst calling this function. ++ */ ++int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx); + -+/* Strip off the non-relevant bits from a product_id value and make it suitable -+ for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU -+ model. */ -+#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \ -+ (((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \ -+ GPU_ID2_PRODUCT_MODEL) ++/* kbase_ctx_sched_retain_ctx_refcount ++ * ++ * @kctx: The context to which to retain a reference ++ * ++ * This function only retains a reference to the context. It must be called ++ * only when the context already has a reference. ++ * ++ * This is typically called inside an atomic session where we know the context ++ * is already scheduled in but want to take an extra reference to ensure that ++ * it doesn't get descheduled. ++ * ++ * The kbase_device::hwaccess_lock must be held whilst calling this function ++ * @return ++ * è‹¥æˆåŠŸ, 返回 0; ++ * è‹¥ *kctx 状æ€å¼‚常, 返回 -1. ++ */ ++int kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx); + -+#define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6u, 0) -+#define GPU_ID2_PRODUCT_THEX GPU_ID2_MODEL_MAKE(6u, 1) -+#define GPU_ID2_PRODUCT_TSIX GPU_ID2_MODEL_MAKE(7u, 0) -+#ifdef MALI_INCLUDE_TKAX -+#define GPU_ID2_PRODUCT_TKAX GPU_ID2_MODEL_MAKE(9u, 0) -+#endif /* MALI_INCLUDE_TKAX */ -+#ifdef MALI_INCLUDE_TTRX -+#define GPU_ID2_PRODUCT_TTRX GPU_ID2_MODEL_MAKE(10u, 0) -+#endif /* MALI_INCLUDE_TTRX */ ++/* kbase_ctx_sched_release_ctx - Release a reference to the @ref kbase_context ++ * ++ * @kctx: The context from which to release a reference ++ * ++ * This function should be called whenever an address space could be unassigned ++ * from a context. When there are no more references to said context, the ++ * address space previously assigned to this context shall be reassigned to ++ * other contexts as needed. ++ * ++ * The kbase_device::hwaccess_lock must be held whilst calling this function ++ */ ++void kbase_ctx_sched_release_ctx(struct kbase_context *kctx); + -+/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */ -+#define GPU_ID_S_15DEV0 0x1 -+#define GPU_ID_S_EAC 0x2 ++/* kbase_ctx_sched_remove_ctx - Unassign previously assigned address space ++ * ++ * @kctx: The context to be removed ++ * ++ * This function should be called when a context is being destroyed. The ++ * context must no longer have any reference. If it has been assigned an ++ * address space before then the AS will be unprogrammed. ++ * ++ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be ++ * held whilst calling this function. ++ */ ++void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx); + -+/* Helper macro to create a GPU_ID assuming valid values for id, major, -+ minor, status */ -+#define GPU_ID_MAKE(id, major, minor, status) \ -+ (((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ -+ ((major) << GPU_ID_VERSION_MAJOR_SHIFT) | \ -+ ((minor) << GPU_ID_VERSION_MINOR_SHIFT) | \ -+ ((status) << GPU_ID_VERSION_STATUS_SHIFT)) ++/* kbase_ctx_sched_restore_all_as - Reprogram all address spaces ++ * ++ * @kbdev: The device for which address spaces to be reprogrammed ++ * ++ * This function shall reprogram all address spaces previously assigned to ++ * contexts. It can be used after the GPU is reset. ++ * ++ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be ++ * held whilst calling this function. ++ */ ++void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev); + -+#endif /* _KBASE_GPU_ID_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c ++#endif /* _KBASE_CTX_SCHED_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug.c b/drivers/gpu/arm/midgard/mali_kbase_debug.c new file mode 100644 -index 000000000..6df0a1cb1 +index 000000000..fb57ac2e3 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c -@@ -0,0 +1,97 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_debug.c +@@ -0,0 +1,39 @@ +/* + * -+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -368414,95 +368805,37 @@ index 000000000..6df0a1cb1 + + + -+#include -+ -+#ifdef CONFIG_DEBUG_FS -+/** Show callback for the @c gpu_memory debugfs file. -+ * -+ * This function is called to get the contents of the @c gpu_memory debugfs -+ * file. This is a report of current gpu memory usage. -+ * -+ * @param sfile The debugfs entry -+ * @param data Data associated with the entry -+ * -+ * @return 0 if successfully prints data in debugfs entry file -+ * -1 if it encountered an error -+ */ -+ -+static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) -+{ -+ struct list_head *entry; -+ const struct list_head *kbdev_list; -+ -+ kbdev_list = kbase_dev_list_get(); -+ list_for_each(entry, kbdev_list) { -+ struct kbase_device *kbdev = NULL; -+ struct kbasep_kctx_list_element *element; + -+ kbdev = list_entry(entry, struct kbase_device, entry); -+ /* output the total memory usage and cap for this device */ -+ seq_printf(sfile, "%-16s %10u\n", -+ kbdev->devname, -+ atomic_read(&(kbdev->memdev.used_pages))); -+ mutex_lock(&kbdev->kctx_list_lock); -+ list_for_each_entry(element, &kbdev->kctx_list, link) { -+ /* output the memory usage and cap for each kctx -+ * opened on this device */ -+ seq_printf(sfile, " %s-0x%p %10u\n", -+ "kctx", -+ element->kctx, -+ atomic_read(&(element->kctx->used_pages))); -+ } -+ mutex_unlock(&kbdev->kctx_list_lock); -+ } -+ kbase_dev_list_put(kbdev_list); -+ return 0; -+} + -+/* -+ * File operations related to debugfs entry for gpu_memory -+ */ -+static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file) -+{ -+ return single_open(file, kbasep_gpu_memory_seq_show , NULL); -+} ++#include + -+static const struct file_operations kbasep_gpu_memory_debugfs_fops = { -+ .open = kbasep_gpu_memory_debugfs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, ++static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = { ++ NULL, ++ NULL +}; + -+/* -+ * Initialize debugfs entry for gpu_memory -+ */ -+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) ++void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param) +{ -+ debugfs_create_file("gpu_memory", S_IRUGO, -+ kbdev->mali_debugfs_directory, NULL, -+ &kbasep_gpu_memory_debugfs_fops); -+ return; ++ kbasep_debug_assert_registered_cb.func = func; ++ kbasep_debug_assert_registered_cb.param = param; +} + -+#else -+/* -+ * Stub functions for when debugfs is disabled -+ */ -+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) ++void kbasep_debug_assert_call_hook(void) +{ -+ return; ++ if (kbasep_debug_assert_registered_cb.func != NULL) ++ kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param); +} -+#endif -diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h ++KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook); ++ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug.h b/drivers/gpu/arm/midgard/mali_kbase_debug.h new file mode 100644 -index 000000000..7045693eb +index 000000000..5fff2892b --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h -@@ -0,0 +1,37 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_debug.h +@@ -0,0 +1,164 @@ +/* + * -+ * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -368519,33 +368852,160 @@ index 000000000..7045693eb + + + ++#ifndef _KBASE_DEBUG_H ++#define _KBASE_DEBUG_H ++ ++#include ++ ++/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */ ++#define KBASE_DEBUG_SKIP_TRACE 0 ++ ++/** @brief If different from 0, the trace will only contain the file and line. */ ++#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0 ++ ++/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */ ++#ifndef KBASE_DEBUG_DISABLE_ASSERTS ++#ifdef CONFIG_MALI_DEBUG ++#define KBASE_DEBUG_DISABLE_ASSERTS 0 ++#else ++#define KBASE_DEBUG_DISABLE_ASSERTS 1 ++#endif ++#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ ++ ++/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */ ++typedef void (kbase_debug_assert_hook) (void *); ++ ++struct kbasep_debug_assert_cb { ++ kbase_debug_assert_hook *func; ++ void *param; ++}; ++ ++/** ++ * @def KBASEP_DEBUG_PRINT_TRACE ++ * @brief Private macro containing the format of the trace to display before every message ++ * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME ++ */ ++#if !KBASE_DEBUG_SKIP_TRACE ++#define KBASEP_DEBUG_PRINT_TRACE \ ++ "In file: " __FILE__ " line: " CSTD_STR2(__LINE__) ++#if !KBASE_DEBUG_SKIP_FUNCTION_NAME ++#define KBASEP_DEBUG_PRINT_FUNCTION __func__ ++#else ++#define KBASEP_DEBUG_PRINT_FUNCTION "" ++#endif ++#else ++#define KBASEP_DEBUG_PRINT_TRACE "" ++#endif ++ +/** -+ * @file mali_kbase_gpu_memory_debugfs.h -+ * Header file for gpu_memory entry in debugfs ++ * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) ++ * @brief (Private) system printing function associated to the @see KBASE_DEBUG_ASSERT_MSG event. ++ * @param trace location in the code from where the message is printed ++ * @param function function from where the message is printed ++ * @param ... Format string followed by format arguments. ++ * @note function parameter cannot be concatenated with other strings ++ */ ++/* Select the correct system output function*/ ++#ifdef CONFIG_MALI_DEBUG ++#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\ ++ do { \ ++ pr_err("Mali: %s function:%s ", trace, function);\ ++ pr_err(__VA_ARGS__);\ ++ pr_err("\n");\ ++ } while (false) ++#else ++#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP() ++#endif ++ ++#ifdef CONFIG_MALI_DEBUG ++#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook() ++#else ++#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP() ++#endif ++ ++/** ++ * @def KBASE_DEBUG_ASSERT(expr) ++ * @brief Calls @see KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false + * ++ * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1 ++ * ++ * @param expr Boolean expression + */ ++#define KBASE_DEBUG_ASSERT(expr) \ ++ KBASE_DEBUG_ASSERT_MSG(expr, #expr) + -+#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H -+#define _KBASE_GPU_MEMORY_DEBUGFS_H ++#if KBASE_DEBUG_DISABLE_ASSERTS ++#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP() ++#else ++ /** ++ * @def KBASE_DEBUG_ASSERT_MSG(expr, ...) ++ * @brief Calls @see KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false ++ * ++ * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1 ++ * ++ * @param expr Boolean expression ++ * @param ... Message to display when @a expr is false, as a format string followed by format arguments. ++ */ ++#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \ ++ do { \ ++ if (!(expr)) { \ ++ KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\ ++ KBASE_CALL_ASSERT_HOOK();\ ++ BUG();\ ++ } \ ++ } while (false) ++#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ + -+#include -+#include ++/** ++ * @def KBASE_DEBUG_CODE( X ) ++ * @brief Executes the code inside the macro only in debug mode ++ * ++ * @param X Code to compile only in debug mode. ++ */ ++#ifdef CONFIG_MALI_DEBUG ++#define KBASE_DEBUG_CODE(X) X ++#else ++#define KBASE_DEBUG_CODE(X) CSTD_NOP() ++#endif /* CONFIG_MALI_DEBUG */ ++ ++/** @} */ + +/** -+ * @brief Initialize gpu_memory debugfs entry ++ * @brief Register a function to call on ASSERT ++ * ++ * Such functions will \b only be called during Debug mode, and for debugging ++ * features \b only. Do not rely on them to be called in general use. ++ * ++ * To disable the hook, supply NULL to \a func. ++ * ++ * @note This function is not thread-safe, and should only be used to ++ * register/deregister once in the module's lifetime. ++ * ++ * @param[in] func the function to call when an assert is triggered. ++ * @param[in] param the parameter to pass to \a func when calling it + */ -+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev); ++void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param); + -+#endif /*_KBASE_GPU_MEMORY_DEBUGFS_H*/ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c ++/** ++ * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook() ++ * ++ * @note This function is not thread-safe with respect to multiple threads ++ * registering functions and parameters with ++ * kbase_debug_assert_register_hook(). Otherwise, thread safety is the ++ * responsibility of the registered hook. ++ */ ++void kbasep_debug_assert_call_hook(void); ++ ++#endif /* _KBASE_DEBUG_H */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c new file mode 100644 -index 000000000..a947a2e03 +index 000000000..f29430ddf --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c -@@ -0,0 +1,510 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c +@@ -0,0 +1,499 @@ +/* + * -+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -368560,508 +369020,497 @@ index 000000000..a947a2e03 + + + -+ -+ -+/* -+ * Base kernel property query APIs -+ */ -+ +#include -+#include -+#include -+#include -+#include -+#include "mali_kbase_ioctl.h" -+#include ++#include ++#include + -+/** -+ * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield. -+ * @value: The value from which to extract bits. -+ * @offset: The first bit to extract (0 being the LSB). -+ * @size: The number of bits to extract. -+ * -+ * Context: @offset + @size <= 32. -+ * -+ * Return: Bits [@offset, @offset + @size) from @value. -+ */ -+/* from mali_cdsb.h */ -+#define KBASE_UBFX32(value, offset, size) \ -+ (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1)) ++#ifdef CONFIG_DEBUG_FS + -+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props) ++static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev) +{ -+ kbase_gpu_clk_speed_func get_gpu_speed_mhz; -+ u32 gpu_speed_mhz; -+ int rc = 1; ++ struct list_head *event_list = &kbdev->job_fault_event_list; ++ unsigned long flags; ++ bool ret; + -+ KBASE_DEBUG_ASSERT(NULL != kctx); -+ KBASE_DEBUG_ASSERT(NULL != kbase_props); ++ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); ++ ret = !list_empty(event_list); ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + -+ /* Current GPU speed is requested from the system integrator via the GPU_SPEED_FUNC function. -+ * If that function fails, or the function is not provided by the system integrator, we report the maximum -+ * GPU speed as specified by GPU_FREQ_KHZ_MAX. -+ */ -+ get_gpu_speed_mhz = (kbase_gpu_clk_speed_func) GPU_SPEED_FUNC; -+ if (get_gpu_speed_mhz != NULL) { -+ rc = get_gpu_speed_mhz(&gpu_speed_mhz); -+#ifdef CONFIG_MALI_DEBUG -+ /* Issue a warning message when the reported GPU speed falls outside the min/max range */ -+ if (rc == 0) { -+ u32 gpu_speed_khz = gpu_speed_mhz * 1000; ++ return ret; ++} + -+ if (gpu_speed_khz < kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min || -+ gpu_speed_khz > kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max) -+ dev_warn(kctx->kbdev->dev, "GPU Speed is outside of min/max range (got %lu Khz, min %lu Khz, max %lu Khz)\n", -+ (unsigned long)gpu_speed_khz, -+ (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min, -+ (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max); -+ } -+#endif /* CONFIG_MALI_DEBUG */ ++static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx) ++{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct list_head *event_list = &kctx->kbdev->job_fault_event_list; ++ struct base_job_fault_event *event; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); ++ if (list_empty(event_list)) { ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); ++ return true; + } -+ if (kctx->kbdev->clock) { -+ gpu_speed_mhz = clk_get_rate(kctx->kbdev->clock) / 1000000; -+ rc = 0; ++ list_for_each_entry(event, event_list, head) { ++ if (event->katom->kctx == kctx) { ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, ++ flags); ++ return false; ++ } + } -+ if (rc != 0) -+ gpu_speed_mhz = kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max / 1000; ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); ++ return true; ++} + -+ kctx->kbdev->gpu_props.props.core_props.gpu_speed_mhz = gpu_speed_mhz; ++/* wait until the fault happen and copy the event */ ++static int kbase_job_fault_event_wait(struct kbase_device *kbdev, ++ struct base_job_fault_event *event) ++{ ++ struct list_head *event_list = &kbdev->job_fault_event_list; ++ struct base_job_fault_event *event_in; ++ unsigned long flags; + -+ memcpy(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props)); ++ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); ++ if (list_empty(event_list)) { ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); ++ if (wait_event_interruptible(kbdev->job_fault_wq, ++ kbase_is_job_fault_event_pending(kbdev))) ++ return -ERESTARTSYS; ++ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); ++ } + -+ /* Before API 8.2 they expect L3 cache info here, which was always 0 */ -+ if (kctx->api_version < KBASE_API_VERSION(8, 2)) -+ kbase_props->props.raw_props.suspend_size = 0; ++ event_in = list_entry(event_list->next, ++ struct base_job_fault_event, head); ++ event->event_code = event_in->event_code; ++ event->katom = event_in->katom; ++ ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + + return 0; ++ +} + -+static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props) ++/* remove the event from the queue */ ++static struct base_job_fault_event *kbase_job_fault_event_dequeue( ++ struct kbase_device *kbdev, struct list_head *event_list) +{ -+ struct mali_base_gpu_coherent_group *current_group; -+ u64 group_present; -+ u64 group_mask; -+ u64 first_set, first_set_prev; -+ u32 num_groups = 0; -+ -+ KBASE_DEBUG_ASSERT(NULL != props); -+ -+ props->coherency_info.coherency = props->raw_props.mem_features; -+ props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present); -+ -+ if (props->coherency_info.coherency & GROUPS_L2_COHERENT) { -+ /* Group is l2 coherent */ -+ group_present = props->raw_props.l2_present; -+ } else { -+ /* Group is l1 coherent */ -+ group_present = props->raw_props.shader_present; -+ } ++ struct base_job_fault_event *event; + -+ /* -+ * The coherent group mask can be computed from the l2 present -+ * register. -+ * -+ * For the coherent group n: -+ * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1) -+ * where first_set is group_present with only its nth set-bit kept -+ * (i.e. the position from where a new group starts). -+ * -+ * For instance if the groups are l2 coherent and l2_present=0x0..01111: -+ * The first mask is: -+ * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1) -+ * = (0x0..010 - 1) & ~(0x0..01 - 1) -+ * = 0x0..00f -+ * The second mask is: -+ * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1) -+ * = (0x0..100 - 1) & ~(0x0..010 - 1) -+ * = 0x0..0f0 -+ * And so on until all the bits from group_present have been cleared -+ * (i.e. there is no group left). -+ */ ++ event = list_entry(event_list->next, ++ struct base_job_fault_event, head); ++ list_del(event_list->next); + -+ current_group = props->coherency_info.group; -+ first_set = group_present & ~(group_present - 1); ++ return event; + -+ while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) { -+ group_present -= first_set; /* Clear the current group bit */ -+ first_set_prev = first_set; ++} + -+ first_set = group_present & ~(group_present - 1); -+ group_mask = (first_set - 1) & ~(first_set_prev - 1); ++/* Remove all the following atoms after the failed atom in the same context ++ * Call the postponed bottom half of job done. ++ * Then, this context could be rescheduled. ++ */ ++static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx) ++{ ++ struct list_head *event_list = &kctx->job_fault_resume_event_list; + -+ /* Populate the coherent_group structure for each group */ -+ current_group->core_mask = group_mask & props->raw_props.shader_present; -+ current_group->num_cores = hweight64(current_group->core_mask); ++ while (!list_empty(event_list)) { ++ struct base_job_fault_event *event; + -+ num_groups++; -+ current_group++; ++ event = kbase_job_fault_event_dequeue(kctx->kbdev, ++ &kctx->job_fault_resume_event_list); ++ kbase_jd_done_worker(&event->katom->work); + } + -+ if (group_present != 0) -+ pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS); -+ -+ props->coherency_info.num_groups = num_groups; +} + -+/** -+ * kbase_gpuprops_get_props - Get the GPU configuration -+ * @gpu_props: The &base_gpu_props structure -+ * @kbdev: The &struct kbase_device structure for the device -+ * -+ * Fill the &base_gpu_props structure with values from the GPU configuration -+ * registers. Only the raw properties are filled in this function ++/* Remove all the failed atoms that belong to different contexts ++ * Resume all the contexts that were suspend due to failed job + */ -+static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev) ++static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev) +{ -+ struct kbase_gpuprops_regdump regdump; -+ int i; ++ struct list_head *event_list = &kbdev->job_fault_event_list; ++ unsigned long flags; + -+ KBASE_DEBUG_ASSERT(NULL != kbdev); -+ KBASE_DEBUG_ASSERT(NULL != gpu_props); ++ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); ++ while (!list_empty(event_list)) { ++ kbase_job_fault_event_dequeue(kbdev, event_list); ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); ++ wake_up(&kbdev->job_fault_resume_wq); ++ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); ++ } ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); ++} + -+ /* Dump relevant registers */ -+ kbase_backend_gpuprops_get(kbdev, ®dump); ++static void kbase_job_fault_resume_worker(struct work_struct *data) ++{ ++ struct base_job_fault_event *event = container_of(data, ++ struct base_job_fault_event, job_fault_work); ++ struct kbase_context *kctx; ++ struct kbase_jd_atom *katom; + -+ gpu_props->raw_props.gpu_id = regdump.gpu_id; -+ gpu_props->raw_props.tiler_features = regdump.tiler_features; -+ gpu_props->raw_props.mem_features = regdump.mem_features; -+ gpu_props->raw_props.mmu_features = regdump.mmu_features; -+ gpu_props->raw_props.l2_features = regdump.l2_features; -+ gpu_props->raw_props.suspend_size = regdump.suspend_size; ++ katom = event->katom; ++ kctx = katom->kctx; + -+ gpu_props->raw_props.as_present = regdump.as_present; -+ gpu_props->raw_props.js_present = regdump.js_present; -+ gpu_props->raw_props.shader_present = -+ ((u64) regdump.shader_present_hi << 32) + -+ regdump.shader_present_lo; -+ gpu_props->raw_props.tiler_present = -+ ((u64) regdump.tiler_present_hi << 32) + -+ regdump.tiler_present_lo; -+ gpu_props->raw_props.l2_present = -+ ((u64) regdump.l2_present_hi << 32) + -+ regdump.l2_present_lo; -+#ifdef CONFIG_MALI_CORESTACK -+ gpu_props->raw_props.stack_present = -+ ((u64) regdump.stack_present_hi << 32) + -+ regdump.stack_present_lo; -+#else /* CONFIG_MALI_CORESTACK */ -+ gpu_props->raw_props.stack_present = 0; -+#endif /* CONFIG_MALI_CORESTACK */ ++ dev_info(kctx->kbdev->dev, "Job dumping wait\n"); + -+ for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) -+ gpu_props->raw_props.js_features[i] = regdump.js_features[i]; ++ /* When it was waked up, it need to check if queue is empty or the ++ * failed atom belongs to different context. If yes, wake up. Both ++ * of them mean the failed job has been dumped. Please note, it ++ * should never happen that the job_fault_event_list has the two ++ * atoms belong to the same context. ++ */ ++ wait_event(kctx->kbdev->job_fault_resume_wq, ++ kbase_ctx_has_no_event_pending(kctx)); + -+ for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) -+ gpu_props->raw_props.texture_features[i] = regdump.texture_features[i]; ++ atomic_set(&kctx->job_fault_count, 0); ++ kbase_jd_done_worker(&katom->work); + -+ gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size; -+ gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads; -+ gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size; -+ gpu_props->raw_props.thread_features = regdump.thread_features; -+} ++ /* In case the following atoms were scheduled during failed job dump ++ * the job_done_worker was held. We need to rerun it after the dump ++ * was finished ++ */ ++ kbase_job_fault_resume_event_cleanup(kctx); + -+void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props) -+{ -+ gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4); -+ gpu_props->core_props.minor_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8); -+ gpu_props->core_props.major_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4); -+ gpu_props->core_props.product_id = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16); ++ dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n"); +} + -+/** -+ * kbase_gpuprops_calculate_props - Calculate the derived properties -+ * @gpu_props: The &base_gpu_props structure -+ * @kbdev: The &struct kbase_device structure for the device -+ * -+ * Fill the &base_gpu_props structure with values derived from the GPU -+ * configuration registers -+ */ -+static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev) ++static struct base_job_fault_event *kbase_job_fault_event_queue( ++ struct list_head *event_list, ++ struct kbase_jd_atom *atom, ++ u32 completion_code) +{ -+ int i; ++ struct base_job_fault_event *event; + -+ /* Populate the base_gpu_props structure */ -+ kbase_gpuprops_update_core_props_gpu_id(gpu_props); -+ gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2; -+ gpu_props->core_props.gpu_available_memory_size = totalram_pages() << PAGE_SHIFT; ++ event = &atom->fault_event; + -+ for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) -+ gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i]; ++ event->katom = atom; ++ event->event_code = completion_code; + -+ gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8); -+ gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8); ++ list_add_tail(&event->head, event_list); + -+ /* Field with number of l2 slices is added to MEM_FEATURES register -+ * since t76x. Below code assumes that for older GPU reserved bits will -+ * be read as zero. */ -+ gpu_props->l2_props.num_l2_slices = -+ KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1; ++ return event; + -+ gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6); -+ gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4); ++} + -+ if (gpu_props->raw_props.thread_max_threads == 0) -+ gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT; -+ else -+ gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads; ++static void kbase_job_fault_event_post(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom, u32 completion_code) ++{ ++ struct base_job_fault_event *event; ++ unsigned long flags; + -+ if (gpu_props->raw_props.thread_max_workgroup_size == 0) -+ gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT; -+ else -+ gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size; ++ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); ++ event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list, ++ katom, completion_code); ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + -+ if (gpu_props->raw_props.thread_max_barrier_size == 0) -+ gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT; -+ else -+ gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size; ++ wake_up_interruptible(&kbdev->job_fault_wq); + -+ gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16); -+ gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8); -+ gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6); -+ gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2); ++ INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker); ++ queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work); + -+ /* If values are not specified, then use defaults */ -+ if (gpu_props->thread_props.max_registers == 0) { -+ gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT; -+ gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT; -+ gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT; ++ dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d", ++ katom->kctx->tgid, katom->kctx->id); ++ ++} ++ ++/* ++ * This function will process the job fault ++ * Get the register copy ++ * Send the failed job dump event ++ * Create a Wait queue to wait until the job dump finish ++ */ ++ ++bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, ++ u32 completion_code) ++{ ++ struct kbase_context *kctx = katom->kctx; ++ ++ /* Check if dumping is in the process ++ * only one atom of each context can be dumped at the same time ++ * If the atom belongs to different context, it can be dumped ++ */ ++ if (atomic_read(&kctx->job_fault_count) > 0) { ++ kbase_job_fault_event_queue( ++ &kctx->job_fault_resume_event_list, ++ katom, completion_code); ++ dev_info(kctx->kbdev->dev, "queue:%d\n", ++ kbase_jd_atom_id(kctx, katom)); ++ return true; + } -+ /* Initialize the coherent_group structure for each group */ -+ kbase_gpuprops_construct_coherent_groups(gpu_props); ++ ++ if (kctx->kbdev->job_fault_debug == true) { ++ ++ if (completion_code != BASE_JD_EVENT_DONE) { ++ ++ if (kbase_job_fault_get_reg_snapshot(kctx) == false) { ++ dev_warn(kctx->kbdev->dev, "get reg dump failed\n"); ++ return false; ++ } ++ ++ kbase_job_fault_event_post(kctx->kbdev, katom, ++ completion_code); ++ atomic_inc(&kctx->job_fault_count); ++ dev_info(kctx->kbdev->dev, "post:%d\n", ++ kbase_jd_atom_id(kctx, katom)); ++ return true; ++ ++ } ++ } ++ return false; ++ +} + -+void kbase_gpuprops_set(struct kbase_device *kbdev) ++static int debug_job_fault_show(struct seq_file *m, void *v) +{ -+ struct kbase_gpu_props *gpu_props; -+ struct gpu_raw_gpu_props *raw; ++ struct kbase_device *kbdev = m->private; ++ struct base_job_fault_event *event = (struct base_job_fault_event *)v; ++ struct kbase_context *kctx = event->katom->kctx; ++ int i; + -+ KBASE_DEBUG_ASSERT(NULL != kbdev); -+ gpu_props = &kbdev->gpu_props; -+ raw = &gpu_props->props.raw_props; ++ dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d", ++ kctx->tgid, kctx->id, event->reg_offset); + -+ /* Initialize the base_gpu_props structure from the hardware */ -+ kbase_gpuprops_get_props(&gpu_props->props, kbdev); ++ if (kctx->reg_dump == NULL) { ++ dev_warn(kbdev->dev, "reg dump is NULL"); ++ return -1; ++ } + -+ /* Populate the derived properties */ -+ kbase_gpuprops_calculate_props(&gpu_props->props, kbdev); ++ if (kctx->reg_dump[event->reg_offset] == ++ REGISTER_DUMP_TERMINATION_FLAG) { ++ /* Return the error here to stop the read. And the ++ * following next() will not be called. The stop can ++ * get the real event resource and release it ++ */ ++ return -1; ++ } + -+ /* Populate kbase-only fields */ -+ gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8); -+ gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8); ++ if (event->reg_offset == 0) ++ seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id); + -+ gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1); ++ for (i = 0; i < 50; i++) { ++ if (kctx->reg_dump[event->reg_offset] == ++ REGISTER_DUMP_TERMINATION_FLAG) { ++ break; ++ } ++ seq_printf(m, "%08x: %08x\n", ++ kctx->reg_dump[event->reg_offset], ++ kctx->reg_dump[1+event->reg_offset]); ++ event->reg_offset += 2; + -+ gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8); -+ gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8); ++ } + -+ gpu_props->num_cores = hweight64(raw->shader_present); -+ gpu_props->num_core_groups = hweight64(raw->l2_present); -+ gpu_props->num_address_spaces = hweight32(raw->as_present); -+ gpu_props->num_job_slots = hweight32(raw->js_present); ++ ++ return 0; +} ++static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos) ++{ ++ struct kbase_device *kbdev = m->private; ++ struct base_job_fault_event *event = (struct base_job_fault_event *)v; + -+void kbase_gpuprops_set_features(struct kbase_device *kbdev) ++ dev_info(kbdev->dev, "debug job fault seq next:%d, %d", ++ event->reg_offset, (int)*pos); ++ ++ return event; ++} ++ ++static void *debug_job_fault_start(struct seq_file *m, loff_t *pos) +{ -+ base_gpu_props *gpu_props; -+ struct kbase_gpuprops_regdump regdump; ++ struct kbase_device *kbdev = m->private; ++ struct base_job_fault_event *event; + -+ gpu_props = &kbdev->gpu_props.props; ++ dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos); + -+ /* Dump relevant registers */ -+ kbase_backend_gpuprops_get_features(kbdev, ®dump); ++ /* The condition is trick here. It needs make sure the ++ * fault hasn't happened and the dumping hasn't been started, ++ * or the dumping has finished ++ */ ++ if (*pos == 0) { ++ event = kmalloc(sizeof(*event), GFP_KERNEL); ++ if (!event) ++ return NULL; ++ event->reg_offset = 0; ++ if (kbase_job_fault_event_wait(kbdev, event)) { ++ kfree(event); ++ return NULL; ++ } + -+ /* -+ * Copy the raw value from the register, later this will get turned -+ * into the selected coherency mode. -+ * Additionally, add non-coherent mode, as this is always supported. ++ /* The cache flush workaround is called in bottom half of ++ * job done but we delayed it. Now we should clean cache ++ * earlier. Then the GPU memory dump should be correct. ++ */ ++ kbase_backend_cacheclean(kbdev, event->katom); ++ } else ++ return NULL; ++ ++ return event; ++} ++ ++static void debug_job_fault_stop(struct seq_file *m, void *v) ++{ ++ struct kbase_device *kbdev = m->private; ++ ++ /* here we wake up the kbase_jd_done_worker after stop, it needs ++ * get the memory dump before the register dump in debug daemon, ++ * otherwise, the memory dump may be incorrect. + */ -+ gpu_props->raw_props.coherency_mode = regdump.coherency_features | -+ COHERENCY_FEATURE_BIT(COHERENCY_NONE); ++ ++ if (v != NULL) { ++ kfree(v); ++ dev_info(kbdev->dev, "debug job fault seq stop stage 1"); ++ ++ } else { ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); ++ if (!list_empty(&kbdev->job_fault_event_list)) { ++ kbase_job_fault_event_dequeue(kbdev, ++ &kbdev->job_fault_event_list); ++ wake_up(&kbdev->job_fault_resume_wq); ++ } ++ spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); ++ dev_info(kbdev->dev, "debug job fault seq stop stage 2"); ++ } ++ +} + -+static struct { -+ u32 type; -+ size_t offset; -+ int size; -+} gpu_property_mapping[] = { -+#define PROP(name, member) \ -+ {KBASE_GPUPROP_ ## name, offsetof(struct mali_base_gpu_props, member), \ -+ sizeof(((struct mali_base_gpu_props *)0)->member)} -+ PROP(PRODUCT_ID, core_props.product_id), -+ PROP(VERSION_STATUS, core_props.version_status), -+ PROP(MINOR_REVISION, core_props.minor_revision), -+ PROP(MAJOR_REVISION, core_props.major_revision), -+ PROP(GPU_SPEED_MHZ, core_props.gpu_speed_mhz), -+ PROP(GPU_FREQ_KHZ_MAX, core_props.gpu_freq_khz_max), -+ PROP(GPU_FREQ_KHZ_MIN, core_props.gpu_freq_khz_min), -+ PROP(LOG2_PROGRAM_COUNTER_SIZE, core_props.log2_program_counter_size), -+ PROP(TEXTURE_FEATURES_0, core_props.texture_features[0]), -+ PROP(TEXTURE_FEATURES_1, core_props.texture_features[1]), -+ PROP(TEXTURE_FEATURES_2, core_props.texture_features[2]), -+ PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size), ++static const struct seq_operations ops = { ++ .start = debug_job_fault_start, ++ .next = debug_job_fault_next, ++ .stop = debug_job_fault_stop, ++ .show = debug_job_fault_show, ++}; + -+ PROP(L2_LOG2_LINE_SIZE, l2_props.log2_line_size), -+ PROP(L2_LOG2_CACHE_SIZE, l2_props.log2_cache_size), -+ PROP(L2_NUM_L2_SLICES, l2_props.num_l2_slices), ++static int debug_job_fault_open(struct inode *in, struct file *file) ++{ ++ struct kbase_device *kbdev = in->i_private; + -+ PROP(TILER_BIN_SIZE_BYTES, tiler_props.bin_size_bytes), -+ PROP(TILER_MAX_ACTIVE_LEVELS, tiler_props.max_active_levels), ++ seq_open(file, &ops); + -+ PROP(MAX_THREADS, thread_props.max_threads), -+ PROP(MAX_WORKGROUP_SIZE, thread_props.max_workgroup_size), -+ PROP(MAX_BARRIER_SIZE, thread_props.max_barrier_size), -+ PROP(MAX_REGISTERS, thread_props.max_registers), -+ PROP(MAX_TASK_QUEUE, thread_props.max_task_queue), -+ PROP(MAX_THREAD_GROUP_SPLIT, thread_props.max_thread_group_split), -+ PROP(IMPL_TECH, thread_props.impl_tech), ++ ((struct seq_file *)file->private_data)->private = kbdev; ++ dev_info(kbdev->dev, "debug job fault seq open"); + -+ PROP(RAW_SHADER_PRESENT, raw_props.shader_present), -+ PROP(RAW_TILER_PRESENT, raw_props.tiler_present), -+ PROP(RAW_L2_PRESENT, raw_props.l2_present), -+ PROP(RAW_STACK_PRESENT, raw_props.stack_present), -+ PROP(RAW_L2_FEATURES, raw_props.l2_features), -+ PROP(RAW_SUSPEND_SIZE, raw_props.suspend_size), -+ PROP(RAW_MEM_FEATURES, raw_props.mem_features), -+ PROP(RAW_MMU_FEATURES, raw_props.mmu_features), -+ PROP(RAW_AS_PRESENT, raw_props.as_present), -+ PROP(RAW_JS_PRESENT, raw_props.js_present), -+ PROP(RAW_JS_FEATURES_0, raw_props.js_features[0]), -+ PROP(RAW_JS_FEATURES_1, raw_props.js_features[1]), -+ PROP(RAW_JS_FEATURES_2, raw_props.js_features[2]), -+ PROP(RAW_JS_FEATURES_3, raw_props.js_features[3]), -+ PROP(RAW_JS_FEATURES_4, raw_props.js_features[4]), -+ PROP(RAW_JS_FEATURES_5, raw_props.js_features[5]), -+ PROP(RAW_JS_FEATURES_6, raw_props.js_features[6]), -+ PROP(RAW_JS_FEATURES_7, raw_props.js_features[7]), -+ PROP(RAW_JS_FEATURES_8, raw_props.js_features[8]), -+ PROP(RAW_JS_FEATURES_9, raw_props.js_features[9]), -+ PROP(RAW_JS_FEATURES_10, raw_props.js_features[10]), -+ PROP(RAW_JS_FEATURES_11, raw_props.js_features[11]), -+ PROP(RAW_JS_FEATURES_12, raw_props.js_features[12]), -+ PROP(RAW_JS_FEATURES_13, raw_props.js_features[13]), -+ PROP(RAW_JS_FEATURES_14, raw_props.js_features[14]), -+ PROP(RAW_JS_FEATURES_15, raw_props.js_features[15]), -+ PROP(RAW_TILER_FEATURES, raw_props.tiler_features), -+ PROP(RAW_TEXTURE_FEATURES_0, raw_props.texture_features[0]), -+ PROP(RAW_TEXTURE_FEATURES_1, raw_props.texture_features[1]), -+ PROP(RAW_TEXTURE_FEATURES_2, raw_props.texture_features[2]), -+ PROP(RAW_GPU_ID, raw_props.gpu_id), -+ PROP(RAW_THREAD_MAX_THREADS, raw_props.thread_max_threads), -+ PROP(RAW_THREAD_MAX_WORKGROUP_SIZE, -+ raw_props.thread_max_workgroup_size), -+ PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size), -+ PROP(RAW_THREAD_FEATURES, raw_props.thread_features), -+ PROP(RAW_COHERENCY_MODE, raw_props.coherency_mode), ++ kbdev->job_fault_debug = true; + -+ PROP(COHERENCY_NUM_GROUPS, coherency_info.num_groups), -+ PROP(COHERENCY_NUM_CORE_GROUPS, coherency_info.num_core_groups), -+ PROP(COHERENCY_COHERENCY, coherency_info.coherency), -+ PROP(COHERENCY_GROUP_0, coherency_info.group[0].core_mask), -+ PROP(COHERENCY_GROUP_1, coherency_info.group[1].core_mask), -+ PROP(COHERENCY_GROUP_2, coherency_info.group[2].core_mask), -+ PROP(COHERENCY_GROUP_3, coherency_info.group[3].core_mask), -+ PROP(COHERENCY_GROUP_4, coherency_info.group[4].core_mask), -+ PROP(COHERENCY_GROUP_5, coherency_info.group[5].core_mask), -+ PROP(COHERENCY_GROUP_6, coherency_info.group[6].core_mask), -+ PROP(COHERENCY_GROUP_7, coherency_info.group[7].core_mask), -+ PROP(COHERENCY_GROUP_8, coherency_info.group[8].core_mask), -+ PROP(COHERENCY_GROUP_9, coherency_info.group[9].core_mask), -+ PROP(COHERENCY_GROUP_10, coherency_info.group[10].core_mask), -+ PROP(COHERENCY_GROUP_11, coherency_info.group[11].core_mask), -+ PROP(COHERENCY_GROUP_12, coherency_info.group[12].core_mask), -+ PROP(COHERENCY_GROUP_13, coherency_info.group[13].core_mask), -+ PROP(COHERENCY_GROUP_14, coherency_info.group[14].core_mask), -+ PROP(COHERENCY_GROUP_15, coherency_info.group[15].core_mask), ++ return 0; + -+#undef PROP ++} ++ ++static int debug_job_fault_release(struct inode *in, struct file *file) ++{ ++ struct kbase_device *kbdev = in->i_private; ++ ++ seq_release(in, file); ++ ++ kbdev->job_fault_debug = false; ++ ++ /* Clean the unprocessed job fault. After that, all the suspended ++ * contexts could be rescheduled. ++ */ ++ kbase_job_fault_event_cleanup(kbdev); ++ ++ dev_info(kbdev->dev, "debug job fault seq close"); ++ ++ return 0; ++} ++ ++static const struct file_operations kbasep_debug_job_fault_fops = { ++ .open = debug_job_fault_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = debug_job_fault_release, +}; + -+int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev) ++/* ++ * Initialize debugfs entry for job fault dump ++ */ ++void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev) +{ -+ struct kbase_gpu_props *kprops = &kbdev->gpu_props; -+ struct mali_base_gpu_props *props = &kprops->props; -+ u32 count = ARRAY_SIZE(gpu_property_mapping); -+ u32 i; -+ u32 size = 0; -+ u8 *p; ++ debugfs_create_file("job_fault", S_IRUGO, ++ kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_debug_job_fault_fops); ++} + -+ for (i = 0; i < count; i++) { -+ /* 4 bytes for the ID, and the size of the property */ -+ size += 4 + gpu_property_mapping[i].size; -+ } + -+ kprops->prop_buffer_size = size; -+ kprops->prop_buffer = kmalloc(size, GFP_KERNEL); ++int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) ++{ + -+ if (!kprops->prop_buffer) { -+ kprops->prop_buffer_size = 0; ++ INIT_LIST_HEAD(&kbdev->job_fault_event_list); ++ ++ init_waitqueue_head(&(kbdev->job_fault_wq)); ++ init_waitqueue_head(&(kbdev->job_fault_resume_wq)); ++ spin_lock_init(&kbdev->job_fault_event_lock); ++ ++ kbdev->job_fault_resume_workq = alloc_workqueue( ++ "kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1); ++ if (!kbdev->job_fault_resume_workq) + return -ENOMEM; -+ } + -+ p = kprops->prop_buffer; ++ kbdev->job_fault_debug = false; + -+#define WRITE_U8(v) (*p++ = (v) & 0xFF) -+#define WRITE_U16(v) do { WRITE_U8(v); WRITE_U8((v) >> 8); } while (0) -+#define WRITE_U32(v) do { WRITE_U16(v); WRITE_U16((v) >> 16); } while (0) -+#define WRITE_U64(v) do { WRITE_U32(v); WRITE_U32((v) >> 32); } while (0) ++ return 0; ++} + -+ for (i = 0; i < count; i++) { -+ u32 type = gpu_property_mapping[i].type; -+ u8 type_size; -+ void *field = ((u8 *)props) + gpu_property_mapping[i].offset; ++/* ++ * Release the relevant resource per device ++ */ ++void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) ++{ ++ destroy_workqueue(kbdev->job_fault_resume_workq); ++} + -+ switch (gpu_property_mapping[i].size) { -+ case 1: -+ type_size = KBASE_GPUPROP_VALUE_SIZE_U8; -+ break; -+ case 2: -+ type_size = KBASE_GPUPROP_VALUE_SIZE_U16; -+ break; -+ case 4: -+ type_size = KBASE_GPUPROP_VALUE_SIZE_U32; -+ break; -+ case 8: -+ type_size = KBASE_GPUPROP_VALUE_SIZE_U64; -+ break; -+ default: -+ dev_err(kbdev->dev, -+ "Invalid gpu_property_mapping type=%d size=%d", -+ type, gpu_property_mapping[i].size); -+ return -EINVAL; -+ } + -+ WRITE_U32((type<<2) | type_size); ++/* ++ * Initialize the relevant data structure per context ++ */ ++void kbase_debug_job_fault_context_init(struct kbase_context *kctx) ++{ + -+ switch (type_size) { -+ case KBASE_GPUPROP_VALUE_SIZE_U8: -+ WRITE_U8(*((u8 *)field)); -+ break; -+ case KBASE_GPUPROP_VALUE_SIZE_U16: -+ WRITE_U16(*((u16 *)field)); -+ break; -+ case KBASE_GPUPROP_VALUE_SIZE_U32: -+ WRITE_U32(*((u32 *)field)); -+ break; -+ case KBASE_GPUPROP_VALUE_SIZE_U64: -+ WRITE_U64(*((u64 *)field)); -+ break; -+ default: /* Cannot be reached */ -+ WARN_ON(1); -+ return -EINVAL; -+ } ++ /* We need allocate double size register range ++ * Because this memory will keep the register address and value ++ */ ++ kctx->reg_dump = vmalloc(0x4000 * 2); ++ if (kctx->reg_dump == NULL) ++ return; ++ ++ if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) { ++ vfree(kctx->reg_dump); ++ kctx->reg_dump = NULL; + } ++ INIT_LIST_HEAD(&kctx->job_fault_resume_event_list); ++ atomic_set(&kctx->job_fault_count, 0); ++ ++} ++ ++/* ++ * release the relevant resource per context ++ */ ++void kbase_debug_job_fault_context_term(struct kbase_context *kctx) ++{ ++ vfree(kctx->reg_dump); ++} ++ ++#else /* CONFIG_DEBUG_FS */ ++ ++int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) ++{ ++ kbdev->job_fault_debug = false; + + return 0; +} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h ++ ++void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) ++{ ++} ++ ++#endif /* CONFIG_DEBUG_FS */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h new file mode 100644 -index 000000000..57b3eaf9c +index 000000000..a2bf8983c --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h -@@ -0,0 +1,84 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h +@@ -0,0 +1,96 @@ +/* + * -+ * (C) COPYRIGHT 2011-2015,2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -369076,82 +369525,94 @@ index 000000000..57b3eaf9c + + + ++#ifndef _KBASE_DEBUG_JOB_FAULT_H ++#define _KBASE_DEBUG_JOB_FAULT_H ++ ++#include ++#include + ++#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF + +/** -+ * @file mali_kbase_gpuprops.h -+ * Base kernel property query APIs ++ * kbase_debug_job_fault_dev_init - Create the fault event wait queue ++ * per device and initialize the required lists. ++ * @kbdev: Device pointer ++ * ++ * Return: Zero on success or a negative error code. + */ ++int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev); + -+#ifndef _KBASE_GPUPROPS_H_ -+#define _KBASE_GPUPROPS_H_ -+ -+#include "mali_kbase_gpuprops_types.h" -+ -+/* Forward definition - see mali_kbase.h */ -+struct kbase_device; ++/** ++ * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs ++ * @kbdev: Device pointer ++ */ ++void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev); + +/** -+ * @brief Set up Kbase GPU properties. -+ * -+ * Set up Kbase GPU properties with information from the GPU registers -+ * -+ * @param kbdev The struct kbase_device structure for the device ++ * kbase_debug_job_fault_dev_term - Clean up resources created in ++ * kbase_debug_job_fault_dev_init. ++ * @kbdev: Device pointer + */ -+void kbase_gpuprops_set(struct kbase_device *kbdev); ++void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev); + +/** -+ * kbase_gpuprops_set_features - Set up Kbase GPU properties -+ * @kbdev: Device pointer -+ * -+ * This function sets up GPU properties that are dependent on the hardware -+ * features bitmask. This function must be preceeded by a call to -+ * kbase_hw_set_features_mask(). ++ * kbase_debug_job_fault_context_init - Initialize the relevant ++ * data structure per context ++ * @kctx: KBase context pointer + */ -+void kbase_gpuprops_set_features(struct kbase_device *kbdev); ++void kbase_debug_job_fault_context_init(struct kbase_context *kctx); + +/** -+ * @brief Provide GPU properties to userside through UKU call. -+ * -+ * Fill the struct kbase_uk_gpuprops with values from GPU configuration registers. -+ * -+ * @param kctx The struct kbase_context structure -+ * @param kbase_props A copy of the struct kbase_uk_gpuprops structure from userspace -+ * -+ * @return 0 on success. Any other value indicates failure. ++ * kbase_debug_job_fault_context_term - Release the relevant ++ * resource per context ++ * @kctx: KBase context pointer + */ -+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props); ++void kbase_debug_job_fault_context_term(struct kbase_context *kctx); + +/** -+ * kbase_gpuprops_populate_user_buffer - Populate the GPU properties buffer -+ * @kbdev: The kbase device -+ * -+ * Fills kbdev->gpu_props->prop_buffer with the GPU properties for user -+ * space to read. ++ * kbase_debug_job_fault_process - Process the failed job. ++ * It will send a event and wake up the job fault waiting queue ++ * Then create a work queue to wait for job dump finish ++ * This function should be called in the interrupt handler and before ++ * jd_done that make sure the jd_done_worker will be delayed until the ++ * job dump finish ++ * @katom: The failed atom pointer ++ * @completion_code: the job status ++ * @return true if dump is going on + */ -+int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev); ++bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, ++ u32 completion_code); ++ + +/** -+ * kbase_gpuprops_update_core_props_gpu_id - break down gpu id value -+ * @gpu_props: the &base_gpu_props structure -+ * -+ * Break down gpu_id value stored in base_gpu_props::raw_props.gpu_id into -+ * separate fields (version_status, minor_revision, major_revision, product_id) -+ * stored in base_gpu_props::core_props. ++ * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers ++ * address during the job fault process, the relevant registers will ++ * be saved when a job fault happen ++ * @kctx: KBase context pointer ++ * @reg_range: Maximum register address space ++ * @return true if initializing successfully + */ -+void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props); ++bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, ++ int reg_range); + ++/** ++ * kbase_job_fault_get_reg_snapshot - Read the interested registers for ++ * failed job dump ++ * @kctx: KBase context pointer ++ * @return true if getting registers successfully ++ */ ++bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx); + -+#endif /* _KBASE_GPUPROPS_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h ++#endif /*_KBASE_DEBUG_JOB_FAULT_H*/ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c new file mode 100644 -index 000000000..10794fc27 +index 000000000..6f2cbdf57 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h -@@ -0,0 +1,92 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c +@@ -0,0 +1,306 @@ +/* + * -+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -369166,680 +369627,304 @@ index 000000000..10794fc27 + + + -+ -+ -+/** -+ * @file mali_kbase_gpuprops_types.h -+ * Base kernel property query APIs ++/* ++ * Debugfs interface to dump the memory visible to the GPU + */ + -+#ifndef _KBASE_GPUPROPS_TYPES_H_ -+#define _KBASE_GPUPROPS_TYPES_H_ ++#include "mali_kbase_debug_mem_view.h" ++#include "mali_kbase.h" + -+#include "mali_base_kernel.h" ++#include ++#include + -+#define KBASE_GPU_SPEED_MHZ 123 -+#define KBASE_GPU_PC_SIZE_LOG2 24U ++#ifdef CONFIG_DEBUG_FS + -+struct kbase_gpuprops_regdump { -+ u32 gpu_id; -+ u32 l2_features; -+ u32 suspend_size; /* API 8.2+ */ -+ u32 tiler_features; -+ u32 mem_features; -+ u32 mmu_features; -+ u32 as_present; -+ u32 js_present; -+ u32 thread_max_threads; -+ u32 thread_max_workgroup_size; -+ u32 thread_max_barrier_size; -+ u32 thread_features; -+ u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; -+ u32 js_features[GPU_MAX_JOB_SLOTS]; -+ u32 shader_present_lo; -+ u32 shader_present_hi; -+ u32 tiler_present_lo; -+ u32 tiler_present_hi; -+ u32 l2_present_lo; -+ u32 l2_present_hi; -+ u32 stack_present_lo; -+ u32 stack_present_hi; -+ u32 coherency_features; -+}; ++struct debug_mem_mapping { ++ struct list_head node; + -+struct kbase_gpu_cache_props { -+ u8 associativity; -+ u8 external_bus_width; -+}; ++ struct kbase_mem_phy_alloc *alloc; ++ unsigned long flags; + -+struct kbase_gpu_mem_props { -+ u8 core_group; ++ u64 start_pfn; ++ size_t nr_pages; +}; + -+struct kbase_gpu_mmu_props { -+ u8 va_bits; -+ u8 pa_bits; ++struct debug_mem_data { ++ struct list_head mapping_list; ++ struct kbase_context *kctx; +}; + -+struct kbase_gpu_props { -+ /* kernel-only properties */ -+ u8 num_cores; -+ u8 num_core_groups; -+ u8 num_address_spaces; -+ u8 num_job_slots; -+ -+ struct kbase_gpu_cache_props l2_props; -+ -+ struct kbase_gpu_mem_props mem; -+ struct kbase_gpu_mmu_props mmu; -+ -+ /* Properties shared with userspace */ -+ base_gpu_props props; -+ -+ u32 prop_buffer_size; -+ void *prop_buffer; ++struct debug_mem_seq_off { ++ struct list_head *lh; ++ size_t offset; +}; + -+#endif /* _KBASE_GPUPROPS_TYPES_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.c b/drivers/gpu/arm/midgard/mali_kbase_hw.c -new file mode 100644 -index 000000000..9a390d233 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_hw.c -@@ -0,0 +1,453 @@ -+/* -+ * -+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ -+ -+ -+ ++static void *debug_mem_start(struct seq_file *m, loff_t *_pos) ++{ ++ struct debug_mem_data *mem_data = m->private; ++ struct debug_mem_seq_off *data; ++ struct debug_mem_mapping *map; ++ loff_t pos = *_pos; + ++ list_for_each_entry(map, &mem_data->mapping_list, node) { ++ if (pos >= map->nr_pages) { ++ pos -= map->nr_pages; ++ } else { ++ data = kmalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) ++ return NULL; ++ data->lh = &map->node; ++ data->offset = pos; ++ return data; ++ } ++ } + -+/* -+ * Run-time work-arounds helpers -+ */ ++ /* Beyond the end */ ++ return NULL; ++} + -+#include -+#include -+#include -+#include "mali_kbase.h" -+#include "mali_kbase_hw.h" ++static void debug_mem_stop(struct seq_file *m, void *v) ++{ ++ kfree(v); ++} + -+void kbase_hw_set_features_mask(struct kbase_device *kbdev) ++static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos) +{ -+ const enum base_hw_feature *features; -+ u32 gpu_id; -+ u32 product_id; ++ struct debug_mem_data *mem_data = m->private; ++ struct debug_mem_seq_off *data = v; ++ struct debug_mem_mapping *map; + -+ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; -+ product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; -+ product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++ map = list_entry(data->lh, struct debug_mem_mapping, node); + -+ if (GPU_ID_IS_NEW_FORMAT(product_id)) { -+ switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { -+ case GPU_ID2_PRODUCT_TMIX: -+ features = base_hw_features_tMIx; -+ break; -+ case GPU_ID2_PRODUCT_THEX: -+ features = base_hw_features_tHEx; -+ break; -+ case GPU_ID2_PRODUCT_TSIX: -+ features = base_hw_features_tSIx; -+ break; -+#ifdef MALI_INCLUDE_TKAX -+ case GPU_ID2_PRODUCT_TKAX: -+ features = base_hw_features_tKAx; -+ break; -+#endif /* MALI_INCLUDE_TKAX */ -+#ifdef MALI_INCLUDE_TTRX -+ case GPU_ID2_PRODUCT_TTRX: -+ features = base_hw_features_tTRx; -+ break; -+#endif /* MALI_INCLUDE_TTRX */ -+ default: -+ features = base_hw_features_generic; -+ break; -+ } -+ } else { -+ switch (product_id) { -+ case GPU_ID_PI_TFRX: -+ /* FALLTHROUGH */ -+ case GPU_ID_PI_T86X: -+ features = base_hw_features_tFxx; -+ break; -+ case GPU_ID_PI_T83X: -+ features = base_hw_features_t83x; -+ break; -+ case GPU_ID_PI_T82X: -+ features = base_hw_features_t82x; -+ break; -+ case GPU_ID_PI_T76X: -+ features = base_hw_features_t76x; -+ break; -+ case GPU_ID_PI_T72X: -+ features = base_hw_features_t72x; -+ break; -+ case GPU_ID_PI_T62X: -+ features = base_hw_features_t62x; -+ break; -+ case GPU_ID_PI_T60X: -+ features = base_hw_features_t60x; -+ break; -+ default: -+ features = base_hw_features_generic; -+ break; -+ } ++ if (data->offset < map->nr_pages - 1) { ++ data->offset++; ++ ++*pos; ++ return data; + } + -+ for (; *features != BASE_HW_FEATURE_END; features++) -+ set_bit(*features, &kbdev->hw_features_mask[0]); -+} ++ if (list_is_last(data->lh, &mem_data->mapping_list)) { ++ kfree(data); ++ return NULL; ++ } + -+/** -+ * kbase_hw_get_issues_for_new_id - Get the hardware issues for a new GPU ID -+ * @kbdev: Device pointer -+ * -+ * Return: pointer to an array of hardware issues, terminated by -+ * BASE_HW_ISSUE_END. -+ * -+ * This function can only be used on new-format GPU IDs, i.e. those for which -+ * GPU_ID_IS_NEW_FORMAT evaluates as true. The GPU ID is read from the @kbdev. -+ * -+ * In debugging versions of the driver, unknown versions of a known GPU will -+ * be treated as the most recent known version not later than the actual -+ * version. In such circumstances, the GPU ID in @kbdev will also be replaced -+ * with the most recent known version. -+ * -+ * Note: The GPU configuration must have been read by kbase_gpuprops_get_props() -+ * before calling this function. -+ */ -+static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( -+ struct kbase_device *kbdev) -+{ -+ const enum base_hw_issue *issues = NULL; ++ data->lh = data->lh->next; ++ data->offset = 0; ++ ++*pos; + -+ struct base_hw_product { -+ u32 product_model; -+ struct { -+ u32 version; -+ const enum base_hw_issue *issues; -+ } map[7]; -+ }; ++ return data; ++} + -+ static const struct base_hw_product base_hw_products[] = { -+ {GPU_ID2_PRODUCT_TMIX, -+ {{GPU_ID2_VERSION_MAKE(0, 0, 1), -+ base_hw_issues_tMIx_r0p0_05dev0}, -+ {GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0}, -+ {U32_MAX /* sentinel value */, NULL} } }, ++static int debug_mem_show(struct seq_file *m, void *v) ++{ ++ struct debug_mem_data *mem_data = m->private; ++ struct debug_mem_seq_off *data = v; ++ struct debug_mem_mapping *map; ++ int i, j; ++ struct page *page; ++ uint32_t *mapping; ++ pgprot_t prot = PAGE_KERNEL; + -+ {GPU_ID2_PRODUCT_THEX, -+ {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0}, -+ {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0}, -+ {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1}, -+ {U32_MAX, NULL} } }, ++ map = list_entry(data->lh, struct debug_mem_mapping, node); + -+ {GPU_ID2_PRODUCT_TSIX, -+ {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0}, -+ {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0}, -+ {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1}, -+ {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tSIx_r0p1}, -+ {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0}, -+ {GPU_ID2_VERSION_MAKE(1, 0, 1), base_hw_issues_tSIx_r1p0}, -+ {U32_MAX, NULL} } }, ++ kbase_gpu_vm_lock(mem_data->kctx); + ++ if (data->offset >= map->alloc->nents) { ++ seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn + ++ data->offset) << PAGE_SHIFT); ++ goto out; ++ } + -+#ifdef MALI_INCLUDE_TKAX -+ {GPU_ID2_PRODUCT_TKAX, -+ {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tKAx_r0p0}, -+ {U32_MAX, NULL} } }, -+#endif /* MALI_INCLUDE_TKAX */ ++ if (!(map->flags & KBASE_REG_CPU_CACHED)) ++ prot = pgprot_writecombine(prot); + -+#ifdef MALI_INCLUDE_TTRX -+ {GPU_ID2_PRODUCT_TTRX, -+ {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0}, -+ {U32_MAX, NULL} } }, -+#endif /* MALI_INCLUDE_TTRX */ -+ }; ++ page = pfn_to_page(PFN_DOWN(map->alloc->pages[data->offset])); ++ mapping = vmap(&page, 1, VM_MAP, prot); ++ if (!mapping) ++ goto out; + -+ u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; -+ const u32 product_model = gpu_id & GPU_ID2_PRODUCT_MODEL; -+ const struct base_hw_product *product = NULL; -+ size_t p; ++ for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) { ++ seq_printf(m, "%016llx:", i + ((map->start_pfn + ++ data->offset) << PAGE_SHIFT)); + -+ /* Stop when we reach the end of the products array. */ -+ for (p = 0; p < ARRAY_SIZE(base_hw_products); ++p) { -+ if (product_model == base_hw_products[p].product_model) { -+ product = &base_hw_products[p]; -+ break; -+ } ++ for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping)) ++ seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]); ++ seq_putc(m, '\n'); + } + -+ if (product != NULL) { -+ /* Found a matching product. */ -+ const u32 version = gpu_id & GPU_ID2_VERSION; -+ u32 fallback_version = 0; -+ const enum base_hw_issue *fallback_issues = NULL; -+ size_t v; ++ vunmap(mapping); + -+ /* Stop when we reach the end of the map. */ -+ for (v = 0; product->map[v].version != U32_MAX; ++v) { ++ seq_putc(m, '\n'); + -+ if (version == product->map[v].version) { -+ /* Exact match so stop. */ -+ issues = product->map[v].issues; -+ break; -+ } ++out: ++ kbase_gpu_vm_unlock(mem_data->kctx); ++ return 0; ++} + -+ /* Check whether this is a candidate for most recent -+ known version not later than the actual -+ version. */ -+ if ((version > product->map[v].version) && -+ (product->map[v].version >= fallback_version)) { -+ fallback_version = product->map[v].version; -+ fallback_issues = product->map[v].issues; -+ } -+ } ++static const struct seq_operations ops = { ++ .start = debug_mem_start, ++ .next = debug_mem_next, ++ .stop = debug_mem_stop, ++ .show = debug_mem_show, ++}; + -+ if ((issues == NULL) && (fallback_issues != NULL)) { -+ /* Fall back to the issue set of the most recent known -+ version not later than the actual version. */ -+ issues = fallback_issues; ++static int debug_mem_zone_open(struct rb_root *rbtree, ++ struct debug_mem_data *mem_data) ++{ ++ int ret = 0; ++ struct rb_node *p; ++ struct kbase_va_region *reg; ++ struct debug_mem_mapping *mapping; + -+ dev_info(kbdev->dev, -+ "r%dp%d status %d is unknown; treating as r%dp%d status %d", -+ (gpu_id & GPU_ID2_VERSION_MAJOR) >> -+ GPU_ID2_VERSION_MAJOR_SHIFT, -+ (gpu_id & GPU_ID2_VERSION_MINOR) >> -+ GPU_ID2_VERSION_MINOR_SHIFT, -+ (gpu_id & GPU_ID2_VERSION_STATUS) >> -+ GPU_ID2_VERSION_STATUS_SHIFT, -+ (fallback_version & GPU_ID2_VERSION_MAJOR) >> -+ GPU_ID2_VERSION_MAJOR_SHIFT, -+ (fallback_version & GPU_ID2_VERSION_MINOR) >> -+ GPU_ID2_VERSION_MINOR_SHIFT, -+ (fallback_version & GPU_ID2_VERSION_STATUS) >> -+ GPU_ID2_VERSION_STATUS_SHIFT); ++ for (p = rb_first(rbtree); p; p = rb_next(p)) { ++ reg = rb_entry(p, struct kbase_va_region, rblink); + -+ gpu_id &= ~GPU_ID2_VERSION; -+ gpu_id |= fallback_version; -+ kbdev->gpu_props.props.raw_props.gpu_id = gpu_id; ++ if (reg->gpu_alloc == NULL) ++ /* Empty region - ignore */ ++ continue; + -+ kbase_gpuprops_update_core_props_gpu_id(&kbdev->gpu_props.props); ++ mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); ++ if (!mapping) { ++ ret = -ENOMEM; ++ goto out; + } ++ ++ mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); ++ mapping->start_pfn = reg->start_pfn; ++ mapping->nr_pages = reg->nr_pages; ++ mapping->flags = reg->flags; ++ list_add_tail(&mapping->node, &mem_data->mapping_list); + } -+ return issues; ++ ++out: ++ return ret; +} + -+int kbase_hw_set_issues_mask(struct kbase_device *kbdev) ++static int debug_mem_open(struct inode *i, struct file *file) +{ -+ const enum base_hw_issue *issues; -+ u32 gpu_id; -+ u32 product_id; -+ u32 impl_tech; -+ -+ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; -+ product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; -+ product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; -+ impl_tech = kbdev->gpu_props.props.thread_props.impl_tech; -+ -+ if (impl_tech != IMPLEMENTATION_MODEL) { -+ if (GPU_ID_IS_NEW_FORMAT(product_id)) { -+ issues = kbase_hw_get_issues_for_new_id(kbdev); -+ if (issues == NULL) { -+ dev_err(kbdev->dev, -+ "Unknown GPU ID %x", gpu_id); -+ return -EINVAL; -+ } -+ -+ /* The GPU ID might have been replaced with the last -+ known version of the same GPU. */ -+ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; ++ struct file *kctx_file = i->i_private; ++ struct kbase_context *kctx = kctx_file->private_data; ++ struct debug_mem_data *mem_data; ++ int ret; + -+ } else { -+ switch (gpu_id) { -+ case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0): -+ issues = base_hw_issues_t60x_r0p0_15dev0; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC): -+ issues = base_hw_issues_t60x_r0p0_eac; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0): -+ issues = base_hw_issues_t60x_r0p1; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0): -+ issues = base_hw_issues_t62x_r0p1; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0): -+ case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1): -+ issues = base_hw_issues_t62x_r1p0; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0): -+ issues = base_hw_issues_t62x_r1p1; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1): -+ issues = base_hw_issues_t76x_r0p0; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1): -+ issues = base_hw_issues_t76x_r0p1; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9): -+ issues = base_hw_issues_t76x_r0p1_50rel0; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1): -+ issues = base_hw_issues_t76x_r0p2; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1): -+ issues = base_hw_issues_t76x_r0p3; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0): -+ issues = base_hw_issues_t76x_r1p0; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0): -+ case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1): -+ case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2): -+ issues = base_hw_issues_t72x_r0p0; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0): -+ issues = base_hw_issues_t72x_r1p0; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0): -+ issues = base_hw_issues_t72x_r1p1; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2): -+ issues = base_hw_issues_tFRx_r0p1; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0): -+ issues = base_hw_issues_tFRx_r0p2; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0): -+ case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8): -+ issues = base_hw_issues_tFRx_r1p0; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0): -+ issues = base_hw_issues_tFRx_r2p0; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0): -+ issues = base_hw_issues_t86x_r0p2; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0): -+ case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8): -+ issues = base_hw_issues_t86x_r1p0; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0): -+ issues = base_hw_issues_t86x_r2p0; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0): -+ issues = base_hw_issues_t83x_r0p1; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0): -+ case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8): -+ issues = base_hw_issues_t83x_r1p0; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0): -+ issues = base_hw_issues_t82x_r0p0; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0): -+ issues = base_hw_issues_t82x_r0p1; -+ break; -+ case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0): -+ case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8): -+ issues = base_hw_issues_t82x_r1p0; -+ break; -+ default: -+ dev_err(kbdev->dev, -+ "Unknown GPU ID %x", gpu_id); -+ return -EINVAL; -+ } -+ } -+ } else { -+ /* Software model */ -+ if (GPU_ID_IS_NEW_FORMAT(product_id)) { -+ switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { -+ case GPU_ID2_PRODUCT_TMIX: -+ issues = base_hw_issues_model_tMIx; -+ break; -+ case GPU_ID2_PRODUCT_THEX: -+ issues = base_hw_issues_model_tHEx; -+ break; -+ case GPU_ID2_PRODUCT_TSIX: -+ issues = base_hw_issues_model_tSIx; -+ break; -+#ifdef MALI_INCLUDE_TKAX -+ case GPU_ID2_PRODUCT_TKAX: -+ issues = base_hw_issues_model_tKAx; -+ break; -+#endif /* MALI_INCLUDE_TKAX */ -+#ifdef MALI_INCLUDE_TTRX -+ case GPU_ID2_PRODUCT_TTRX: -+ issues = base_hw_issues_model_tTRx; -+ break; -+#endif /* MALI_INCLUDE_TTRX */ -+ default: -+ dev_err(kbdev->dev, -+ "Unknown GPU ID %x", gpu_id); -+ return -EINVAL; -+ } -+ } else { -+ switch (product_id) { -+ case GPU_ID_PI_T60X: -+ issues = base_hw_issues_model_t60x; -+ break; -+ case GPU_ID_PI_T62X: -+ issues = base_hw_issues_model_t62x; -+ break; -+ case GPU_ID_PI_T72X: -+ issues = base_hw_issues_model_t72x; -+ break; -+ case GPU_ID_PI_T76X: -+ issues = base_hw_issues_model_t76x; -+ break; -+ case GPU_ID_PI_TFRX: -+ issues = base_hw_issues_model_tFRx; -+ break; -+ case GPU_ID_PI_T86X: -+ issues = base_hw_issues_model_t86x; -+ break; -+ case GPU_ID_PI_T83X: -+ issues = base_hw_issues_model_t83x; -+ break; -+ case GPU_ID_PI_T82X: -+ issues = base_hw_issues_model_t82x; -+ break; -+ default: -+ dev_err(kbdev->dev, "Unknown GPU ID %x", -+ gpu_id); -+ return -EINVAL; -+ } -+ } -+ } ++ ret = seq_open(file, &ops); ++ if (ret) ++ return ret; + -+ if (GPU_ID_IS_NEW_FORMAT(product_id)) { -+ dev_info(kbdev->dev, -+ "GPU identified as 0x%x arch %d.%d.%d r%dp%d status %d", -+ (gpu_id & GPU_ID2_PRODUCT_MAJOR) >> -+ GPU_ID2_PRODUCT_MAJOR_SHIFT, -+ (gpu_id & GPU_ID2_ARCH_MAJOR) >> -+ GPU_ID2_ARCH_MAJOR_SHIFT, -+ (gpu_id & GPU_ID2_ARCH_MINOR) >> -+ GPU_ID2_ARCH_MINOR_SHIFT, -+ (gpu_id & GPU_ID2_ARCH_REV) >> -+ GPU_ID2_ARCH_REV_SHIFT, -+ (gpu_id & GPU_ID2_VERSION_MAJOR) >> -+ GPU_ID2_VERSION_MAJOR_SHIFT, -+ (gpu_id & GPU_ID2_VERSION_MINOR) >> -+ GPU_ID2_VERSION_MINOR_SHIFT, -+ (gpu_id & GPU_ID2_VERSION_STATUS) >> -+ GPU_ID2_VERSION_STATUS_SHIFT); -+ } else { -+ dev_info(kbdev->dev, -+ "GPU identified as 0x%04x r%dp%d status %d", -+ (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> -+ GPU_ID_VERSION_PRODUCT_ID_SHIFT, -+ (gpu_id & GPU_ID_VERSION_MAJOR) >> -+ GPU_ID_VERSION_MAJOR_SHIFT, -+ (gpu_id & GPU_ID_VERSION_MINOR) >> -+ GPU_ID_VERSION_MINOR_SHIFT, -+ (gpu_id & GPU_ID_VERSION_STATUS) >> -+ GPU_ID_VERSION_STATUS_SHIFT); ++ mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL); ++ if (!mem_data) { ++ ret = -ENOMEM; ++ goto out; + } + -+ for (; *issues != BASE_HW_ISSUE_END; issues++) -+ set_bit(*issues, &kbdev->hw_issues_mask[0]); ++ mem_data->kctx = kctx; + -+ return 0; -+} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.h b/drivers/gpu/arm/midgard/mali_kbase_hw.h -new file mode 100644 -index 000000000..754250ce9 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_hw.h -@@ -0,0 +1,65 @@ -+/* -+ * -+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ INIT_LIST_HEAD(&mem_data->mapping_list); + ++ get_file(kctx_file); + ++ kbase_gpu_vm_lock(kctx); + ++ ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data); ++ if (0 != ret) { ++ kbase_gpu_vm_unlock(kctx); ++ goto out; ++ } + ++ ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data); ++ if (0 != ret) { ++ kbase_gpu_vm_unlock(kctx); ++ goto out; ++ } + -+/** -+ * @file -+ * Run-time work-arounds helpers -+ */ ++ ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data); ++ if (0 != ret) { ++ kbase_gpu_vm_unlock(kctx); ++ goto out; ++ } + -+#ifndef _KBASE_HW_H_ -+#define _KBASE_HW_H_ ++ kbase_gpu_vm_unlock(kctx); + -+#include "mali_kbase_defs.h" ++ ((struct seq_file *)file->private_data)->private = mem_data; + -+/** -+ * @brief Tell whether a work-around should be enabled -+ */ -+#define kbase_hw_has_issue(kbdev, issue)\ -+ test_bit(issue, &(kbdev)->hw_issues_mask[0]) ++ return 0; + -+/** -+ * @brief Tell whether a feature is supported -+ */ -+#define kbase_hw_has_feature(kbdev, feature)\ -+ test_bit(feature, &(kbdev)->hw_features_mask[0]) ++out: ++ if (mem_data) { ++ while (!list_empty(&mem_data->mapping_list)) { ++ struct debug_mem_mapping *mapping; + -+/** -+ * kbase_hw_set_issues_mask - Set the hardware issues mask based on the GPU ID -+ * @kbdev: Device pointer -+ * -+ * Return: 0 if the GPU ID was recognized, otherwise -EINVAL. -+ * -+ * The GPU ID is read from the @kbdev. -+ * -+ * In debugging versions of the driver, unknown versions of a known GPU with a -+ * new-format ID will be treated as the most recent known version not later -+ * than the actual version. In such circumstances, the GPU ID in @kbdev will -+ * also be replaced with the most recent known version. -+ * -+ * Note: The GPU configuration must have been read by -+ * kbase_gpuprops_get_props() before calling this function. -+ */ -+int kbase_hw_set_issues_mask(struct kbase_device *kbdev); ++ mapping = list_first_entry(&mem_data->mapping_list, ++ struct debug_mem_mapping, node); ++ kbase_mem_phy_alloc_put(mapping->alloc); ++ list_del(&mapping->node); ++ kfree(mapping); ++ } ++ fput(kctx_file); ++ kfree(mem_data); ++ } ++ seq_release(i, file); ++ return ret; ++} + -+/** -+ * @brief Set the features mask depending on the GPU ID -+ */ -+void kbase_hw_set_features_mask(struct kbase_device *kbdev); ++static int debug_mem_release(struct inode *inode, struct file *file) ++{ ++ struct file *kctx_file = inode->i_private; ++ struct seq_file *sfile = file->private_data; ++ struct debug_mem_data *mem_data = sfile->private; ++ struct debug_mem_mapping *mapping; + -+#endif /* _KBASE_HW_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h -new file mode 100644 -index 000000000..b09be99e6 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h -@@ -0,0 +1,54 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ seq_release(inode, file); + ++ while (!list_empty(&mem_data->mapping_list)) { ++ mapping = list_first_entry(&mem_data->mapping_list, ++ struct debug_mem_mapping, node); ++ kbase_mem_phy_alloc_put(mapping->alloc); ++ list_del(&mapping->node); ++ kfree(mapping); ++ } + ++ kfree(mem_data); + ++ fput(kctx_file); + -+/* -+ * HW access backend common APIs -+ */ ++ return 0; ++} + -+#ifndef _KBASE_HWACCESS_BACKEND_H_ -+#define _KBASE_HWACCESS_BACKEND_H_ ++static const struct file_operations kbase_debug_mem_view_fops = { ++ .open = debug_mem_open, ++ .release = debug_mem_release, ++ .read = seq_read, ++ .llseek = seq_lseek ++}; + +/** -+ * kbase_backend_early_init - Perform any backend-specific initialization. -+ * @kbdev: Device pointer ++ * kbase_debug_mem_view_init - Initialise the mem_view sysfs file ++ * @kctx_file: The /dev/mali0 file instance for the context + * -+ * Return: 0 on success, or an error code on failure. -+ */ -+int kbase_backend_early_init(struct kbase_device *kbdev); -+ -+/** -+ * kbase_backend_late_init - Perform any backend-specific initialization. -+ * @kbdev: Device pointer ++ * This function creates a "mem_view" file which can be used to get a view of ++ * the context's memory as the GPU sees it (i.e. using the GPU's page tables). + * -+ * Return: 0 on success, or an error code on failure. -+ */ -+int kbase_backend_late_init(struct kbase_device *kbdev); -+ -+/** -+ * kbase_backend_early_term - Perform any backend-specific termination. -+ * @kbdev: Device pointer ++ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the ++ * parent directory. + */ -+void kbase_backend_early_term(struct kbase_device *kbdev); ++void kbase_debug_mem_view_init(struct file *kctx_file) ++{ ++ struct kbase_context *kctx = kctx_file->private_data; + -+/** -+ * kbase_backend_late_term - Perform any backend-specific termination. -+ * @kbdev: Device pointer -+ */ -+void kbase_backend_late_term(struct kbase_device *kbdev); ++ debugfs_create_file("mem_view", S_IRUGO, kctx->kctx_dentry, kctx_file, ++ &kbase_debug_mem_view_fops); ++} + -+#endif /* _KBASE_HWACCESS_BACKEND_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h ++#endif +diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h new file mode 100644 -index 000000000..0acf29719 +index 000000000..20ab51a77 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h -@@ -0,0 +1,36 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h +@@ -0,0 +1,25 @@ +/* + * -+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -369854,34 +369939,23 @@ index 000000000..0acf29719 + + + ++#ifndef _KBASE_DEBUG_MEM_VIEW_H ++#define _KBASE_DEBUG_MEM_VIEW_H + -+/** -+ * @file mali_kbase_hwaccess_gpu_defs.h -+ * HW access common definitions -+ */ -+ -+#ifndef _KBASE_HWACCESS_DEFS_H_ -+#define _KBASE_HWACCESS_DEFS_H_ -+ -+#include -+ -+/* The hwaccess_lock (a spinlock) must be held when accessing this structure */ -+struct kbase_hwaccess_data { -+ struct kbase_context *active_kctx; ++#include + -+ struct kbase_backend_data backend; -+}; ++void kbase_debug_mem_view_init(struct file *kctx_file); + -+#endif /* _KBASE_HWACCESS_DEFS_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h ++#endif +diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h new file mode 100644 -index 000000000..cf8a8131c +index 000000000..f8a6f33df --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h -@@ -0,0 +1,47 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h +@@ -0,0 +1,1602 @@ +/* + * -+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -369897,3891 +369971,2816 @@ index 000000000..cf8a8131c + + + ++ +/** -+ * Base kernel property query backend APIs ++ * @file mali_kbase_defs.h ++ * ++ * Defintions (types, defines, etcs) common to Kbase. They are placed here to ++ * allow the hierarchy of header files to work. + */ + -+#ifndef _KBASE_HWACCESS_GPUPROPS_H_ -+#define _KBASE_HWACCESS_GPUPROPS_H_ ++#ifndef _KBASE_DEFS_H_ ++#define _KBASE_DEFS_H_ + -+/** -+ * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from -+ * GPU -+ * @kbdev: Device pointer -+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure -+ */ -+void kbase_backend_gpuprops_get(struct kbase_device *kbdev, -+ struct kbase_gpuprops_regdump *regdump); ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+/** -+ * kbase_backend_gpuprops_get - Fill @regdump with GPU properties read from GPU -+ * @kbdev: Device pointer -+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure -+ * -+ * This function reads GPU properties that are dependent on the hardware -+ * features bitmask -+ */ -+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, -+ struct kbase_gpuprops_regdump *regdump); ++#include ++#include ++#include ++#include + ++#ifdef CONFIG_MALI_FPGA_BUS_LOGGER ++#include ++#endif + -+#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h -new file mode 100644 -index 000000000..5de2b7535 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h -@@ -0,0 +1,116 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ + ++#ifdef CONFIG_KDS ++#include ++#endif /* CONFIG_KDS */ + ++#if defined(CONFIG_SYNC) ++#include ++#else ++#include "mali_kbase_fence_defs.h" ++#endif + ++#ifdef CONFIG_DEBUG_FS ++#include ++#endif /* CONFIG_DEBUG_FS */ + ++#ifdef CONFIG_MALI_DEVFREQ ++#include ++#endif /* CONFIG_MALI_DEVFREQ */ + -+/* -+ * HW Access instrumentation common APIs -+ */ ++#include ++#include ++#include + -+#ifndef _KBASE_HWACCESS_INSTR_H_ -+#define _KBASE_HWACCESS_INSTR_H_ ++#if defined(CONFIG_PM) ++#define KBASE_PM_RUNTIME 1 ++#endif + -+#include ++/** Enable SW tracing when set */ ++#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE ++#define KBASE_TRACE_ENABLE 1 ++#endif + -+/** -+ * kbase_instr_hwcnt_enable_internal - Enable HW counters collection -+ * @kbdev: Kbase device -+ * @kctx: Kbase context -+ * @setup: HW counter setup parameters -+ * -+ * Context: might sleep, waiting for reset to complete -+ * -+ * Return: 0 on success -+ */ -+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ struct kbase_uk_hwcnt_setup *setup); ++#ifndef KBASE_TRACE_ENABLE ++#ifdef CONFIG_MALI_DEBUG ++#define KBASE_TRACE_ENABLE 1 ++#else ++#define KBASE_TRACE_ENABLE 0 ++#endif /* CONFIG_MALI_DEBUG */ ++#endif /* KBASE_TRACE_ENABLE */ + -+/** -+ * kbase_instr_hwcnt_disable_internal - Disable HW counters collection -+ * @kctx: Kbase context -+ * -+ * Context: might sleep, waiting for an ongoing dump to complete -+ * -+ * Return: 0 on success -+ */ -+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx); ++/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */ ++#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1 + +/** -+ * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU -+ * @kctx: Kbase context -+ * -+ * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true, -+ * of call kbase_instr_hwcnt_wait_for_dump(). -+ * -+ * Return: 0 on success ++ * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware. ++ * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU ++ * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware ++ * before resetting. + */ -+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx); ++#define ZAP_TIMEOUT 1000 ++ ++/** Number of milliseconds before we time out on a GPU soft/hard reset */ ++#define RESET_TIMEOUT 500 + +/** -+ * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has -+ * completed. -+ * @kctx: Kbase context ++ * Prevent soft-stops from occuring in scheduling situations + * -+ * Context: will sleep, waiting for dump to complete ++ * This is not due to HW issues, but when scheduling is desired to be more predictable. + * -+ * Return: 0 on success -+ */ -+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx); -+ -+/** -+ * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has -+ * completed -+ * @kctx: Kbase context -+ * @success: Set to true if successful ++ * Therefore, soft stop may still be disabled due to HW issues. + * -+ * Context: does not sleep. ++ * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context. + * -+ * Return: true if the dump is complete ++ * @note if not in use, define this value to 0 instead of \#undef'ing it + */ -+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, -+ bool * const success); ++#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0 + +/** -+ * kbase_instr_hwcnt_clear() - Clear HW counters -+ * @kctx: Kbase context ++ * Prevent hard-stops from occuring in scheduling situations + * -+ * Context: might sleep, waiting for reset to complete ++ * This is not due to HW issues, but when scheduling is desired to be more predictable. + * -+ * Return: 0 on success ++ * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context. ++ * ++ * @note if not in use, define this value to 0 instead of \#undef'ing it + */ -+int kbase_instr_hwcnt_clear(struct kbase_context *kctx); ++#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0 + +/** -+ * kbase_instr_backend_init() - Initialise the instrumentation backend -+ * @kbdev: Kbase device -+ * -+ * This function should be called during driver initialization. ++ * The maximum number of Job Slots to support in the Hardware. + * -+ * Return: 0 on success ++ * You can optimize this down if your target devices will only ever support a ++ * small number of job slots. + */ -+int kbase_instr_backend_init(struct kbase_device *kbdev); ++#define BASE_JM_MAX_NR_SLOTS 3 + +/** -+ * kbase_instr_backend_init() - Terminate the instrumentation backend -+ * @kbdev: Kbase device ++ * The maximum number of Address Spaces to support in the Hardware. + * -+ * This function should be called during driver termination. ++ * You can optimize this down if your target devices will only ever support a ++ * small number of Address Spaces + */ -+void kbase_instr_backend_term(struct kbase_device *kbdev); ++#define BASE_MAX_NR_AS 16 + -+#endif /* _KBASE_HWACCESS_INSTR_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h -new file mode 100644 -index 000000000..750fda2cd ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h -@@ -0,0 +1,381 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++/* mmu */ ++#define MIDGARD_MMU_VA_BITS 48 + ++#if MIDGARD_MMU_VA_BITS > 39 ++#define MIDGARD_MMU_TOPLEVEL 0 ++#else ++#define MIDGARD_MMU_TOPLEVEL 1 ++#endif + ++#define MIDGARD_MMU_BOTTOMLEVEL 3 + ++#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR) + -+/* -+ * HW access job manager common APIs -+ */ ++/** setting in kbase_context::as_nr that indicates it's invalid */ ++#define KBASEP_AS_NR_INVALID (-1) + -+#ifndef _KBASE_HWACCESS_JM_H_ -+#define _KBASE_HWACCESS_JM_H_ ++#define KBASE_LOCK_REGION_MAX_SIZE (63) ++#define KBASE_LOCK_REGION_MIN_SIZE (11) + -+/** -+ * kbase_backend_run_atom() - Run an atom on the GPU -+ * @kbdev: Device pointer -+ * @atom: Atom to run -+ * -+ * Caller must hold the HW access lock -+ */ -+void kbase_backend_run_atom(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom); ++#define KBASE_TRACE_SIZE_LOG2 8 /* 256 entries */ ++#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2) ++#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1) + -+/** -+ * kbase_backend_slot_update - Update state based on slot ringbuffers -+ * -+ * @kbdev: Device pointer -+ * -+ * Inspect the jobs in the slot ringbuffers and update state. -+ * -+ * This will cause jobs to be submitted to hardware if they are unblocked -+ */ -+void kbase_backend_slot_update(struct kbase_device *kbdev); ++#include "mali_kbase_js_defs.h" ++#include "mali_kbase_hwaccess_defs.h" + -+/** -+ * kbase_backend_find_and_release_free_address_space() - Release a free AS -+ * @kbdev: Device pointer -+ * @kctx: Context pointer -+ * -+ * This function can evict an idle context from the runpool, freeing up the -+ * address space it was using. -+ * -+ * The address space is marked as in use. The caller must either assign a -+ * context using kbase_gpu_use_ctx(), or release it using -+ * kbase_ctx_sched_release() -+ * -+ * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none -+ * available -+ */ -+int kbase_backend_find_and_release_free_address_space( -+ struct kbase_device *kbdev, struct kbase_context *kctx); ++#define KBASEP_FORCE_REPLAY_DISABLED 0 + -+/** -+ * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the -+ * provided address space. -+ * @kbdev: Device pointer -+ * @kctx: Context pointer. May be NULL -+ * @as_nr: Free address space to use -+ * -+ * kbase_gpu_next_job() will pull atoms from the active context. -+ * -+ * Return: true if successful, false if ASID not assigned. -+ */ -+bool kbase_backend_use_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ int as_nr); ++/* Maximum force replay limit when randomization is enabled */ ++#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16 + -+/** -+ * kbase_backend_use_ctx_sched() - Activate a context. -+ * @kbdev: Device pointer -+ * @kctx: Context pointer -+ * -+ * kbase_gpu_next_job() will pull atoms from the active context. -+ * -+ * The context must already be scheduled and assigned to an address space. If -+ * the context is not scheduled, then kbase_gpu_use_ctx() should be used -+ * instead. -+ * -+ * Caller must hold hwaccess_lock -+ * -+ * Return: true if context is now active, false otherwise (ie if context does -+ * not have an address space assigned) -+ */ -+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, -+ struct kbase_context *kctx); ++/** Atom has been previously soft-stoppped */ ++#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1) ++/** Atom has been previously retried to execute */ ++#define KBASE_KATOM_FLAGS_RERUN (1<<2) ++#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3) ++/** Atom has been previously hard-stopped. */ ++#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4) ++/** Atom has caused us to enter disjoint state */ ++#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5) ++/* Atom blocked on cross-slot dependency */ ++#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7) ++/* Atom has fail dependency on cross-slot dependency */ ++#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8) ++/* Atom is currently in the list of atoms blocked on cross-slot dependencies */ ++#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9) ++/* Atom is currently holding a context reference */ ++#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10) ++/* Atom requires GPU to be in protected mode */ ++#define KBASE_KATOM_FLAG_PROTECTED (1<<11) ++/* Atom has been stored in runnable_tree */ ++#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12) + -+/** -+ * kbase_backend_release_ctx_irq - Release a context from the GPU. This will -+ * de-assign the assigned address space. -+ * @kbdev: Device pointer -+ * @kctx: Context pointer -+ * -+ * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock -+ */ -+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, -+ struct kbase_context *kctx); ++/* SW related flags about types of JS_COMMAND action ++ * NOTE: These must be masked off by JS_COMMAND_MASK */ + -+/** -+ * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will -+ * de-assign the assigned address space. -+ * @kbdev: Device pointer -+ * @kctx: Context pointer -+ * -+ * Caller must hold kbase_device->mmu_hw_mutex -+ * -+ * This function must perform any operations that could not be performed in IRQ -+ * context by kbase_backend_release_ctx_irq(). -+ */ -+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, -+ struct kbase_context *kctx); ++/** This command causes a disjoint event */ ++#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100 + -+/** -+ * kbase_backend_cacheclean - Perform a cache clean if the given atom requires -+ * one -+ * @kbdev: Device pointer -+ * @katom: Pointer to the failed atom -+ * -+ * On some GPUs, the GPU cache must be cleaned following a failed atom. This -+ * function performs a clean if it is required by @katom. -+ */ -+void kbase_backend_cacheclean(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom); ++/** Bitmask of all SW related flags */ ++#define JS_COMMAND_SW_BITS (JS_COMMAND_SW_CAUSES_DISJOINT) + ++#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK) ++#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks ++#endif + -+/** -+ * kbase_backend_complete_wq() - Perform backend-specific actions required on -+ * completing an atom. -+ * @kbdev: Device pointer -+ * @katom: Pointer to the atom to complete -+ * -+ * This function should only be called from kbase_jd_done_worker() or -+ * js_return_worker(). -+ * -+ * Return: true if atom has completed, false if atom should be re-submitted -+ */ -+void kbase_backend_complete_wq(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom); ++/** Soft-stop command that causes a Disjoint event. This of course isn't ++ * entirely masked off by JS_COMMAND_MASK */ ++#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \ ++ (JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP) + -+/** -+ * kbase_backend_complete_wq_post_sched - Perform backend-specific actions -+ * required on completing an atom, after -+ * any scheduling has taken place. -+ * @kbdev: Device pointer -+ * @core_req: Core requirements of atom -+ * @affinity: Affinity of atom -+ * @coreref_state: Coreref state of atom -+ * -+ * This function should only be called from kbase_jd_done_worker() or -+ * js_return_worker(). -+ */ -+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, -+ base_jd_core_req core_req, u64 affinity, -+ enum kbase_atom_coreref_state coreref_state); ++#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT + -+/** -+ * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU -+ * and remove any others from the ringbuffers. -+ * @kbdev: Device pointer -+ * @end_timestamp: Timestamp of reset -+ */ -+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp); ++/* Serialize atoms within a slot (ie only one atom per job slot) */ ++#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0) ++/* Serialize atoms between slots (ie only one job slot running at any time) */ ++#define KBASE_SERIALIZE_INTER_SLOT (1 << 1) ++/* Reset the GPU after each atom completion */ ++#define KBASE_SERIALIZE_RESET (1 << 2) + -+/** -+ * kbase_backend_inspect_head() - Return the atom currently at the head of slot -+ * @js -+ * @kbdev: Device pointer -+ * @js: Job slot to inspect -+ * -+ * Return : Atom currently at the head of slot @js, or NULL -+ */ -+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev, -+ int js); ++#ifdef CONFIG_DEBUG_FS ++struct base_job_fault_event { + -+/** -+ * kbase_backend_inspect_tail - Return the atom currently at the tail of slot -+ * @js -+ * @kbdev: Device pointer -+ * @js: Job slot to inspect -+ * -+ * Return : Atom currently at the head of slot @js, or NULL -+ */ -+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, -+ int js); ++ u32 event_code; ++ struct kbase_jd_atom *katom; ++ struct work_struct job_fault_work; ++ struct list_head head; ++ int reg_offset; ++}; + -+/** -+ * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a -+ * slot. -+ * @kbdev: Device pointer -+ * @js: Job slot to inspect -+ * -+ * Return : Number of atoms currently on slot -+ */ -+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js); ++#endif + -+/** -+ * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot -+ * that are currently on the GPU. -+ * @kbdev: Device pointer -+ * @js: Job slot to inspect -+ * -+ * Return : Number of atoms currently on slot @js that are currently on the GPU. -+ */ -+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js); ++struct kbase_jd_atom_dependency { ++ struct kbase_jd_atom *atom; ++ u8 dep_type; ++}; + +/** -+ * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs -+ * has changed. -+ * @kbdev: Device pointer ++ * struct kbase_io_access - holds information about 1 register access + * -+ * Perform any required backend-specific actions (eg starting/stopping -+ * scheduling timers). ++ * @addr: first bit indicates r/w (r=0, w=1) ++ * @value: value written or read + */ -+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev); ++struct kbase_io_access { ++ uintptr_t addr; ++ u32 value; ++}; + +/** -+ * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed. -+ * @kbdev: Device pointer ++ * struct kbase_io_history - keeps track of all recent register accesses + * -+ * Perform any required backend-specific actions (eg updating timeouts of -+ * currently running atoms). ++ * @enabled: true if register accesses are recorded, false otherwise ++ * @lock: spinlock protecting kbase_io_access array ++ * @count: number of registers read/written ++ * @size: number of elements in kbase_io_access array ++ * @buf: array of kbase_io_access + */ -+void kbase_backend_timeouts_changed(struct kbase_device *kbdev); ++struct kbase_io_history { ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) ++ bool enabled; ++#else ++ u32 enabled; ++#endif + -+/** -+ * kbase_backend_slot_free() - Return the number of jobs that can be currently -+ * submitted to slot @js. -+ * @kbdev: Device pointer -+ * @js: Job slot to inspect -+ * -+ * Return : Number of jobs that can be submitted. -+ */ -+int kbase_backend_slot_free(struct kbase_device *kbdev, int js); ++ spinlock_t lock; ++ size_t count; ++ u16 size; ++ struct kbase_io_access *buf; ++}; + +/** -+ * kbase_job_check_enter_disjoint - potentially leave disjoint state -+ * @kbdev: kbase device -+ * @target_katom: atom which is finishing ++ * @brief The function retrieves a read-only reference to the atom field from ++ * the kbase_jd_atom_dependency structure + * -+ * Work out whether to leave disjoint state when finishing an atom that was -+ * originated by kbase_job_check_enter_disjoint(). -+ */ -+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, -+ struct kbase_jd_atom *target_katom); -+ -+/** -+ * kbase_backend_jm_kill_jobs_from_kctx - Kill all jobs that are currently -+ * running from a context -+ * @kctx: Context pointer ++ * @param[in] dep kbase jd atom dependency. + * -+ * This is used in response to a page fault to remove all jobs from the faulting -+ * context from the hardware. ++ * @return readonly reference to dependent ATOM. + */ -+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx); ++static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) ++{ ++ LOCAL_ASSERT(dep != NULL); + -+/** -+ * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and -+ * to be descheduled. -+ * @kctx: Context pointer -+ * -+ * This should be called following kbase_js_zap_context(), to ensure the context -+ * can be safely destroyed. -+ */ -+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx); ++ return (const struct kbase_jd_atom *)(dep->atom); ++} + +/** -+ * kbase_backend_get_current_flush_id - Return the current flush ID ++ * @brief The function retrieves a read-only reference to the dependency type field from ++ * the kbase_jd_atom_dependency structure + * -+ * @kbdev: Device pointer ++ * @param[in] dep kbase jd atom dependency. + * -+ * Return: the current flush ID to be recorded for each job chain ++ * @return A dependency type value. + */ -+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev); ++static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep) ++{ ++ LOCAL_ASSERT(dep != NULL); + -+#if KBASE_GPU_RESET_EN -+/** -+ * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU. -+ * @kbdev: Device pointer -+ * -+ * This function just soft-stops all the slots to ensure that as many jobs as -+ * possible are saved. -+ * -+ * Return: a boolean which should be interpreted as follows: -+ * - true - Prepared for reset, kbase_reset_gpu should be called. -+ * - false - Another thread is performing a reset, kbase_reset_gpu should -+ * not be called. -+ */ -+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev); ++ return dep->dep_type; ++} + +/** -+ * kbase_reset_gpu - Reset the GPU -+ * @kbdev: Device pointer ++ * @brief Setter macro for dep_atom array entry in kbase_jd_atom + * -+ * This function should be called after kbase_prepare_to_reset_gpu if it returns -+ * true. It should never be called without a corresponding call to -+ * kbase_prepare_to_reset_gpu. ++ * @param[in] dep The kbase jd atom dependency. ++ * @param[in] a The ATOM to be set as a dependency. ++ * @param type The ATOM dependency type to be set. + * -+ * After this function is called (or not called if kbase_prepare_to_reset_gpu -+ * returned false), the caller should wait for kbdev->reset_waitq to be -+ * signalled to know when the reset has completed. + */ -+void kbase_reset_gpu(struct kbase_device *kbdev); ++static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep, ++ struct kbase_jd_atom *a, u8 type) ++{ ++ struct kbase_jd_atom_dependency *dep; + -+/** -+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU. -+ * @kbdev: Device pointer -+ * -+ * This function just soft-stops all the slots to ensure that as many jobs as -+ * possible are saved. -+ * -+ * Return: a boolean which should be interpreted as follows: -+ * - true - Prepared for reset, kbase_reset_gpu should be called. -+ * - false - Another thread is performing a reset, kbase_reset_gpu should -+ * not be called. -+ */ -+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev); ++ LOCAL_ASSERT(const_dep != NULL); + -+/** -+ * kbase_reset_gpu_locked - Reset the GPU -+ * @kbdev: Device pointer -+ * -+ * This function should be called after kbase_prepare_to_reset_gpu if it -+ * returns true. It should never be called without a corresponding call to -+ * kbase_prepare_to_reset_gpu. -+ * -+ * After this function is called (or not called if kbase_prepare_to_reset_gpu -+ * returned false), the caller should wait for kbdev->reset_waitq to be -+ * signalled to know when the reset has completed. -+ */ -+void kbase_reset_gpu_locked(struct kbase_device *kbdev); ++ dep = (struct kbase_jd_atom_dependency *)const_dep; ++ ++ dep->atom = a; ++ dep->dep_type = type; ++} + +/** -+ * kbase_reset_gpu_silent - Reset the GPU silently -+ * @kbdev: Device pointer ++ * @brief Setter macro for dep_atom array entry in kbase_jd_atom + * -+ * Reset the GPU without trying to cancel jobs and don't emit messages into -+ * the kernel log while doing the reset. ++ * @param[in] dep The kbase jd atom dependency to be cleared. + * -+ * This function should be used in cases where we are doing a controlled reset -+ * of the GPU as part of normal processing (e.g. exiting protected mode) where -+ * the driver will have ensured the scheduler has been idled and all other -+ * users of the GPU (e.g. instrumentation) have been suspended. + */ -+void kbase_reset_gpu_silent(struct kbase_device *kbdev); ++static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep) ++{ ++ struct kbase_jd_atom_dependency *dep; + -+/** -+ * kbase_reset_gpu_active - Reports if the GPU is being reset -+ * @kbdev: Device pointer -+ * -+ * Return: True if the GPU is in the process of being reset. -+ */ -+bool kbase_reset_gpu_active(struct kbase_device *kbdev); -+#endif ++ LOCAL_ASSERT(const_dep != NULL); + -+/** -+ * kbase_job_slot_hardstop - Hard-stop the specified job slot -+ * @kctx: The kbase context that contains the job(s) that should -+ * be hard-stopped -+ * @js: The job slot to hard-stop -+ * @target_katom: The job that should be hard-stopped (or NULL for all -+ * jobs from the context) -+ * Context: -+ * The job slot lock must be held when calling this function. -+ */ -+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, -+ struct kbase_jd_atom *target_katom); ++ dep = (struct kbase_jd_atom_dependency *)const_dep; + -+extern struct protected_mode_ops kbase_native_protected_ops; ++ dep->atom = NULL; ++ dep->dep_type = BASE_JD_DEP_TYPE_INVALID; ++} + -+#endif /* _KBASE_HWACCESS_JM_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h -new file mode 100644 -index 000000000..71c7d495c ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h -@@ -0,0 +1,209 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++enum kbase_atom_gpu_rb_state { ++ /* Atom is not currently present in slot ringbuffer */ ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB, ++ /* Atom is in slot ringbuffer but is blocked on a previous atom */ ++ KBASE_ATOM_GPU_RB_WAITING_BLOCKED, ++ /* Atom is in slot ringbuffer but is waiting for a previous protected ++ * mode transition to complete */ ++ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV, ++ /* Atom is in slot ringbuffer but is waiting for proected mode ++ * transition */ ++ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION, ++ /* Atom is in slot ringbuffer but is waiting for cores to become ++ * available */ ++ KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE, ++ /* Atom is in slot ringbuffer but is blocked on affinity */ ++ KBASE_ATOM_GPU_RB_WAITING_AFFINITY, ++ /* Atom is in slot ringbuffer and ready to run */ ++ KBASE_ATOM_GPU_RB_READY, ++ /* Atom is in slot ringbuffer and has been submitted to the GPU */ ++ KBASE_ATOM_GPU_RB_SUBMITTED, ++ /* Atom must be returned to JS as soon as it reaches the head of the ++ * ringbuffer due to a previous failure */ ++ KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1 ++}; + ++enum kbase_atom_enter_protected_state { ++ /* ++ * Starting state: ++ * Check if a transition into protected mode is required. ++ * ++ * NOTE: The integer value of this must ++ * match KBASE_ATOM_EXIT_PROTECTED_CHECK. ++ */ ++ KBASE_ATOM_ENTER_PROTECTED_CHECK = 0, ++ /* Wait for vinstr to suspend. */ ++ KBASE_ATOM_ENTER_PROTECTED_VINSTR, ++ /* Wait for the L2 to become idle in preparation for ++ * the coherency change. */ ++ KBASE_ATOM_ENTER_PROTECTED_IDLE_L2, ++ /* End state; ++ * Prepare coherency change. */ ++ KBASE_ATOM_ENTER_PROTECTED_FINISHED, ++}; + ++enum kbase_atom_exit_protected_state { ++ /* ++ * Starting state: ++ * Check if a transition out of protected mode is required. ++ * ++ * NOTE: The integer value of this must ++ * match KBASE_ATOM_ENTER_PROTECTED_CHECK. ++ */ ++ KBASE_ATOM_EXIT_PROTECTED_CHECK = 0, ++ /* Wait for the L2 to become idle in preparation ++ * for the reset. */ ++ KBASE_ATOM_EXIT_PROTECTED_IDLE_L2, ++ /* Issue the protected reset. */ ++ KBASE_ATOM_EXIT_PROTECTED_RESET, ++ /* End state; ++ * Wait for the reset to complete. */ ++ KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, ++}; + ++struct kbase_ext_res { ++ u64 gpu_address; ++ struct kbase_mem_phy_alloc *alloc; ++}; + -+/** -+ * @file mali_kbase_hwaccess_pm.h -+ * HW access power manager common APIs -+ */ ++struct kbase_jd_atom { ++ struct work_struct work; ++ ktime_t start_timestamp; + -+#ifndef _KBASE_HWACCESS_PM_H_ -+#define _KBASE_HWACCESS_PM_H_ ++ struct base_jd_udata udata; ++ struct kbase_context *kctx; + -+#include -+#include ++ struct list_head dep_head[2]; ++ struct list_head dep_item[2]; ++ const struct kbase_jd_atom_dependency dep[2]; ++ /* List head used during job dispatch job_done processing - as ++ * dependencies may not be entirely resolved at this point, we need to ++ * use a separate list head. */ ++ struct list_head jd_item; ++ /* true if atom's jd_item is currently on a list. Prevents atom being ++ * processed twice. */ ++ bool in_jd_list; + -+#include ++ u16 nr_extres; ++ struct kbase_ext_res *extres; + -+/* Forward definition - see mali_kbase.h */ -+struct kbase_device; ++ u32 device_nr; ++ u64 affinity; ++ u64 jc; ++ enum kbase_atom_coreref_state coreref_state; ++#ifdef CONFIG_KDS ++ struct list_head node; ++ struct kds_resource_set *kds_rset; ++ bool kds_dep_satisfied; ++#endif /* CONFIG_KDS */ ++#if defined(CONFIG_SYNC) ++ /* Stores either an input or output fence, depending on soft-job type */ ++ struct sync_fence *fence; ++ struct sync_fence_waiter sync_waiter; ++#endif /* CONFIG_SYNC */ ++#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) ++ struct { ++ /* Use the functions/API defined in mali_kbase_fence.h to ++ * when working with this sub struct */ ++#if defined(CONFIG_SYNC_FILE) ++ /* Input fence */ ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++ struct fence *fence_in; ++#else ++ struct dma_fence *fence_in; ++#endif ++#endif ++ /* This points to the dma-buf output fence for this atom. If ++ * this is NULL then there is no fence for this atom and the ++ * following fields related to dma_fence may have invalid data. ++ * ++ * The context and seqno fields contain the details for this ++ * fence. ++ * ++ * This fence is signaled when the katom is completed, ++ * regardless of the event_code of the katom (signal also on ++ * failure). ++ */ ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++ struct fence *fence; ++#else ++ struct dma_fence *fence; ++#endif ++ /* The dma-buf fence context number for this atom. A unique ++ * context number is allocated to each katom in the context on ++ * context creation. ++ */ ++ unsigned int context; ++ /* The dma-buf fence sequence number for this atom. This is ++ * increased every time this katom uses dma-buf fence. ++ */ ++ atomic_t seqno; ++ /* This contains a list of all callbacks set up to wait on ++ * other fences. This atom must be held back from JS until all ++ * these callbacks have been called and dep_count have reached ++ * 0. The initial value of dep_count must be equal to the ++ * number of callbacks on this list. ++ * ++ * This list is protected by jctx.lock. Callbacks are added to ++ * this list when the atom is built and the wait are set up. ++ * All the callbacks then stay on the list until all callbacks ++ * have been called and the atom is queued, or cancelled, and ++ * then all callbacks are taken off the list and freed. ++ */ ++ struct list_head callbacks; ++ /* Atomic counter of number of outstandind dma-buf fence ++ * dependencies for this atom. When dep_count reaches 0 the ++ * atom may be queued. ++ * ++ * The special value "-1" may only be set after the count ++ * reaches 0, while holding jctx.lock. This indicates that the ++ * atom has been handled, either queued in JS or cancelled. ++ * ++ * If anyone but the dma-fence worker sets this to -1 they must ++ * ensure that any potentially queued worker must have ++ * completed before allowing the atom to be marked as unused. ++ * This can be done by flushing the fence work queue: ++ * kctx->dma_fence.wq. ++ */ ++ atomic_t dep_count; ++ } dma_fence; ++#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE*/ + -+/* Functions common to all HW access backends */ ++ /* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */ ++ enum base_jd_event_code event_code; ++ base_jd_core_req core_req; /**< core requirements */ ++ /** Job Slot to retry submitting to if submission from IRQ handler failed ++ * ++ * NOTE: see if this can be unified into the another member e.g. the event */ ++ int retry_submit_on_slot; + -+/** -+ * Initialize the power management framework. -+ * -+ * Must be called before any other power management function -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid -+ * pointer) -+ * -+ * @return 0 if the power management framework was successfully -+ * initialized. -+ */ -+int kbase_hwaccess_pm_init(struct kbase_device *kbdev); ++ u32 ticks; ++ /* JS atom priority with respect to other atoms on its kctx. */ ++ int sched_priority; + -+/** -+ * Terminate the power management framework. -+ * -+ * No power management functions may be called after this (except -+ * @ref kbase_pm_init) -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid -+ * pointer) -+ */ -+void kbase_hwaccess_pm_term(struct kbase_device *kbdev); ++ int poking; /* BASE_HW_ISSUE_8316 */ + -+/** -+ * kbase_hwaccess_pm_powerup - Power up the GPU. -+ * @kbdev: The kbase device structure for the device (must be a valid pointer) -+ * @flags: Flags to pass on to kbase_pm_init_hw -+ * -+ * Power up GPU after all modules have been initialized and interrupt handlers -+ * installed. -+ * -+ * Return: 0 if powerup was successful. -+ */ -+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, -+ unsigned int flags); ++ wait_queue_head_t completed; ++ enum kbase_jd_atom_state status; ++#ifdef CONFIG_GPU_TRACEPOINTS ++ int work_id; ++#endif ++ /* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */ ++ int slot_nr; + -+/** -+ * Halt the power management framework. -+ * -+ * Should ensure that no new interrupts are generated, but allow any currently -+ * running interrupt handlers to complete successfully. The GPU is forced off by -+ * the time this function returns, regardless of whether or not the active power -+ * policy asks for the GPU to be powered off. -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid -+ * pointer) -+ */ -+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev); ++ u32 atom_flags; + -+/** -+ * Perform any backend-specific actions to suspend the GPU -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid -+ * pointer) -+ */ -+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev); ++ /* Number of times this atom has been retried. Used by replay soft job. ++ */ ++ int retry_count; + -+/** -+ * Perform any backend-specific actions to resume the GPU from a suspend -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid -+ * pointer) -+ */ -+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev); ++ enum kbase_atom_gpu_rb_state gpu_rb_state; + -+/** -+ * Perform any required actions for activating the GPU. Called when the first -+ * context goes active. -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid -+ * pointer) -+ */ -+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev); ++ u64 need_cache_flush_cores_retained; + -+/** -+ * Perform any required actions for idling the GPU. Called when the last -+ * context goes idle. -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid -+ * pointer) -+ */ -+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev); ++ atomic_t blocked; + ++ /* Pointer to atom that this atom has same-slot dependency on */ ++ struct kbase_jd_atom *pre_dep; ++ /* Pointer to atom that has same-slot dependency on this atom */ ++ struct kbase_jd_atom *post_dep; + -+/** -+ * Set the debug core mask. -+ * -+ * This determines which cores the power manager is allowed to use. -+ * -+ * @param kbdev The kbase device structure for the device (must be a -+ * valid pointer) -+ * @param new_core_mask_js0 The core mask to use for job slot 0 -+ * @param new_core_mask_js0 The core mask to use for job slot 1 -+ * @param new_core_mask_js0 The core mask to use for job slot 2 -+ */ -+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, -+ u64 new_core_mask_js0, u64 new_core_mask_js1, -+ u64 new_core_mask_js2); ++ /* Pointer to atom that this atom has cross-slot dependency on */ ++ struct kbase_jd_atom *x_pre_dep; ++ /* Pointer to atom that has cross-slot dependency on this atom */ ++ struct kbase_jd_atom *x_post_dep; + ++ /* The GPU's flush count recorded at the time of submission, used for ++ * the cache flush optimisation */ ++ u32 flush_id; + -+/** -+ * Get the current policy. -+ * -+ * Returns the policy that is currently active. -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid -+ * pointer) -+ * -+ * @return The current policy -+ */ -+const struct kbase_pm_ca_policy -+*kbase_pm_ca_get_policy(struct kbase_device *kbdev); ++ struct kbase_jd_atom_backend backend; ++#ifdef CONFIG_DEBUG_FS ++ struct base_job_fault_event fault_event; ++#endif + -+/** -+ * Change the policy to the one specified. -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid -+ * pointer) -+ * @param policy The policy to change to (valid pointer returned from -+ * @ref kbase_pm_ca_list_policies) -+ */ -+void kbase_pm_ca_set_policy(struct kbase_device *kbdev, -+ const struct kbase_pm_ca_policy *policy); ++ /* List head used for three different purposes: ++ * 1. Overflow list for JS ring buffers. If an atom is ready to run, ++ * but there is no room in the JS ring buffer, then the atom is put ++ * on the ring buffer's overflow list using this list node. ++ * 2. List of waiting soft jobs. ++ */ ++ struct list_head queue; + -+/** -+ * Retrieve a static list of the available policies. -+ * -+ * @param[out] policies An array pointer to take the list of policies. This may -+ * be NULL. The contents of this array must not be -+ * modified. -+ * -+ * @return The number of policies -+ */ -+int -+kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies); ++ /* Used to keep track of all JIT free/alloc jobs in submission order ++ */ ++ struct list_head jit_node; ++ bool jit_blocked; + ++ /* If non-zero, this indicates that the atom will fail with the set ++ * event_code when the atom is processed. */ ++ enum base_jd_event_code will_fail_event_code; + -+/** -+ * Get the current policy. -+ * -+ * Returns the policy that is currently active. -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid -+ * pointer) -+ * -+ * @return The current policy -+ */ -+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev); ++ /* Atoms will only ever be transitioning into, or out of ++ * protected mode so we do not need two separate fields. ++ */ ++ union { ++ enum kbase_atom_enter_protected_state enter; ++ enum kbase_atom_exit_protected_state exit; ++ } protected_state; + -+/** -+ * Change the policy to the one specified. -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid -+ * pointer) -+ * @param policy The policy to change to (valid pointer returned from -+ * @ref kbase_pm_list_policies) -+ */ -+void kbase_pm_set_policy(struct kbase_device *kbdev, -+ const struct kbase_pm_policy *policy); ++ struct rb_node runnable_tree_node; + -+/** -+ * Retrieve a static list of the available policies. -+ * -+ * @param[out] policies An array pointer to take the list of policies. This may -+ * be NULL. The contents of this array must not be -+ * modified. -+ * -+ * @return The number of policies -+ */ -+int kbase_pm_list_policies(const struct kbase_pm_policy * const **policies); ++ /* 'Age' of atom relative to other atoms in the context. */ ++ u32 age; ++}; ++ ++static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom) ++{ ++ return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED); ++} + -+#endif /* _KBASE_HWACCESS_PM_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h -new file mode 100644 -index 000000000..10b65798e ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h -@@ -0,0 +1,53 @@ +/* ++ * Theory of operations: + * -+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * Atom objects are statically allocated within the context structure. + * ++ * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set. + */ + ++#define KBASE_JD_DEP_QUEUE_SIZE 256 + ++struct kbase_jd_context { ++ struct mutex lock; ++ struct kbasep_js_kctx_info sched_info; ++ struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT]; + ++ /** Tracks all job-dispatch jobs. This includes those not tracked by ++ * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ ++ u32 job_nr; + -+/** -+ * -+ */ ++ /** Waitq that reflects whether there are no jobs (including SW-only ++ * dependency jobs). This is set when no jobs are present on the ctx, ++ * and clear when there are jobs. ++ * ++ * @note: Job Dispatcher knows about more jobs than the Job Scheduler: ++ * the Job Scheduler is unaware of jobs that are blocked on dependencies, ++ * and SW-only dependency jobs. ++ * ++ * This waitq can be waited upon to find out when the context jobs are all ++ * done/cancelled (including those that might've been blocked on ++ * dependencies) - and so, whether it can be terminated. However, it should ++ * only be terminated once it is not present in the run-pool (see ++ * kbasep_js_kctx_info::ctx::is_scheduled). ++ * ++ * Since the waitq is only set under kbase_jd_context::lock, ++ * the waiter should also briefly obtain and drop kbase_jd_context::lock to ++ * guarentee that the setter has completed its work on the kbase_context ++ * ++ * This must be updated atomically with: ++ * - kbase_jd_context::job_nr */ ++ wait_queue_head_t zero_jobs_wait; + -+#ifndef _KBASE_BACKEND_TIME_H_ -+#define _KBASE_BACKEND_TIME_H_ ++ /** Job Done workqueue. */ ++ struct workqueue_struct *job_done_wq; + -+/** -+ * kbase_backend_get_gpu_time() - Get current GPU time -+ * @kbdev: Device pointer -+ * @cycle_counter: Pointer to u64 to store cycle counter in -+ * @system_time: Pointer to u64 to store system time in -+ * @ts: Pointer to struct timespec64 to store current monotonic -+ * time in -+ */ -+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, -+ u64 *system_time, struct timespec64 *ts); ++ spinlock_t tb_lock; ++ u32 *tb; ++ size_t tb_wrap_offset; + -+/** -+ * kbase_wait_write_flush() - Wait for GPU write flush -+ * @kctx: Context pointer -+ * -+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush -+ * its write buffer. -+ * -+ * If GPU resets occur then the counters are reset to zero, the delay may not be -+ * as expected. -+ * -+ * This function is only in use for BASE_HW_ISSUE_6367 -+ */ -+#ifndef CONFIG_MALI_NO_MALI -+void kbase_wait_write_flush(struct kbase_context *kctx); ++#ifdef CONFIG_KDS ++ struct kds_callback kds_cb; ++#endif /* CONFIG_KDS */ ++#ifdef CONFIG_GPU_TRACEPOINTS ++ atomic_t work_id; +#endif ++}; + -+#endif /* _KBASE_BACKEND_TIME_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h -new file mode 100644 -index 000000000..cf7bf1b35 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h -@@ -0,0 +1,66 @@ -+/* -+ * -+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ -+ -+ -+ -+#ifndef _KBASE_HWCNT_READER_H_ -+#define _KBASE_HWCNT_READER_H_ -+ -+/* The ids of ioctl commands. */ -+#define KBASE_HWCNT_READER 0xBE -+#define KBASE_HWCNT_READER_GET_HWVER _IOR(KBASE_HWCNT_READER, 0x00, u32) -+#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32) -+#define KBASE_HWCNT_READER_DUMP _IOW(KBASE_HWCNT_READER, 0x10, u32) -+#define KBASE_HWCNT_READER_CLEAR _IOW(KBASE_HWCNT_READER, 0x11, u32) -+#define KBASE_HWCNT_READER_GET_BUFFER _IOR(KBASE_HWCNT_READER, 0x20,\ -+ struct kbase_hwcnt_reader_metadata) -+#define KBASE_HWCNT_READER_PUT_BUFFER _IOW(KBASE_HWCNT_READER, 0x21,\ -+ struct kbase_hwcnt_reader_metadata) -+#define KBASE_HWCNT_READER_SET_INTERVAL _IOW(KBASE_HWCNT_READER, 0x30, u32) -+#define KBASE_HWCNT_READER_ENABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x40, u32) -+#define KBASE_HWCNT_READER_DISABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x41, u32) -+#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32) ++struct kbase_device_info { ++ u32 features; ++}; + -+/** -+ * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata -+ * @timestamp: time when sample was collected -+ * @event_id: id of an event that triggered sample collection -+ * @buffer_idx: position in sampling area where sample buffer was stored -+ */ -+struct kbase_hwcnt_reader_metadata { -+ u64 timestamp; -+ u32 event_id; -+ u32 buffer_idx; ++/** Poking state for BASE_HW_ISSUE_8316 */ ++enum { ++ KBASE_AS_POKE_STATE_IN_FLIGHT = 1<<0, ++ KBASE_AS_POKE_STATE_KILLING_POKE = 1<<1 +}; + -+/** -+ * enum base_hwcnt_reader_event - hwcnt dumping events -+ * @BASE_HWCNT_READER_EVENT_MANUAL: manual request for dump -+ * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump -+ * @BASE_HWCNT_READER_EVENT_PREJOB: prejob dump request -+ * @BASE_HWCNT_READER_EVENT_POSTJOB: postjob dump request -+ * @BASE_HWCNT_READER_EVENT_COUNT: number of supported events -+ */ -+enum base_hwcnt_reader_event { -+ BASE_HWCNT_READER_EVENT_MANUAL, -+ BASE_HWCNT_READER_EVENT_PERIODIC, -+ BASE_HWCNT_READER_EVENT_PREJOB, -+ BASE_HWCNT_READER_EVENT_POSTJOB, ++/** Poking state for BASE_HW_ISSUE_8316 */ ++typedef u32 kbase_as_poke_state; + -+ BASE_HWCNT_READER_EVENT_COUNT ++struct kbase_mmu_setup { ++ u64 transtab; ++ u64 memattr; ++ u64 transcfg; +}; + -+#endif /* _KBASE_HWCNT_READER_H_ */ -+ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_ioctl.h b/drivers/gpu/arm/midgard/mali_kbase_ioctl.h -new file mode 100644 -index 000000000..dcbed9c77 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_ioctl.h -@@ -0,0 +1,656 @@ -+/* -+ * -+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++/** ++ * Important: Our code makes assumptions that a struct kbase_as structure is always at ++ * kbase_device->as[number]. This is used to recover the containing ++ * struct kbase_device from a struct kbase_as structure. + * ++ * Therefore, struct kbase_as structures must not be allocated anywhere else. + */ ++struct kbase_as { ++ int number; + ++ struct workqueue_struct *pf_wq; ++ struct work_struct work_pagefault; ++ struct work_struct work_busfault; ++ enum kbase_mmu_fault_type fault_type; ++ bool protected_mode; ++ u32 fault_status; ++ u64 fault_addr; ++ u64 fault_extra_addr; + ++ struct kbase_mmu_setup current_setup; + -+#ifndef _KBASE_IOCTL_H_ -+#define _KBASE_IOCTL_H_ -+ -+#ifdef __cpluscplus -+extern "C" { -+#endif ++ /* BASE_HW_ISSUE_8316 */ ++ struct workqueue_struct *poke_wq; ++ struct work_struct poke_work; ++ /** Protected by hwaccess_lock */ ++ int poke_refcount; ++ /** Protected by hwaccess_lock */ ++ kbase_as_poke_state poke_state; ++ struct hrtimer poke_timer; ++}; + -+#include ++static inline int kbase_as_has_bus_fault(struct kbase_as *as) ++{ ++ return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS; ++} + -+#define KBASE_IOCTL_TYPE 0x80 ++static inline int kbase_as_has_page_fault(struct kbase_as *as) ++{ ++ return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE; ++} + -+#ifdef ANDROID -+/* Android's definition of ioctl is incorrect, specifying the type argument as -+ * 'int'. This creates a warning when using _IOWR (as the top bit is set). Work -+ * round this by redefining _IOC to include a case to 'int'. -+ */ -+#undef _IOC -+#define _IOC(dir, type, nr, size) \ -+ ((int)(((dir) << _IOC_DIRSHIFT) | ((type) << _IOC_TYPESHIFT) | \ -+ ((nr) << _IOC_NRSHIFT) | ((size) << _IOC_SIZESHIFT))) -+#endif ++struct kbasep_mem_device { ++ atomic_t used_pages; /* Tracks usage of OS shared memory. Updated ++ when OS memory is allocated/freed. */ + -+/** -+ * struct kbase_ioctl_version_check - Check version compatibility with kernel -+ * -+ * @major: Major version number -+ * @minor: Minor version number -+ */ -+struct kbase_ioctl_version_check { -+ __u16 major; -+ __u16 minor; +}; + -+#define KBASE_IOCTL_VERSION_CHECK \ -+ _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check) ++#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X + -+/** -+ * struct kbase_ioctl_set_flags - Set kernel context creation flags -+ * -+ * @create_flags: Flags - see base_context_create_flags -+ */ -+struct kbase_ioctl_set_flags { -+ __u32 create_flags; ++enum kbase_trace_code { ++ /* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE ++ * THIS MUST BE USED AT THE START OF THE ENUM */ ++#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X) ++#include "mali_kbase_trace_defs.h" ++#undef KBASE_TRACE_CODE_MAKE_CODE ++ /* Comma on its own, to extend the list */ ++ , ++ /* Must be the last in the enum */ ++ KBASE_TRACE_CODE_COUNT +}; + -+#define KBASE_IOCTL_SET_FLAGS \ -+ _IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags) ++#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0) ++#define KBASE_TRACE_FLAG_JOBSLOT (((u8)1) << 1) + -+/** -+ * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel -+ * -+ * @addr: Memory address of an array of struct base_jd_atom_v2 -+ * @nr_atoms: Number of entries in the array -+ * @stride: sizeof(struct base_jd_atom_v2) -+ */ -+struct kbase_ioctl_job_submit { -+ union kbase_pointer addr; -+ __u32 nr_atoms; -+ __u32 stride; ++struct kbase_trace { ++ struct timespec64 timestamp; ++ u32 thread_id; ++ u32 cpu; ++ void *ctx; ++ bool katom; ++ int atom_number; ++ u64 atom_udata[2]; ++ u64 gpu_addr; ++ unsigned long info_val; ++ u8 code; ++ u8 jobslot; ++ u8 refcount; ++ u8 flags; +}; + -+#define KBASE_IOCTL_JOB_SUBMIT \ -+ _IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit) -+ -+/** -+ * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel -+ * -+ * @buffer: Pointer to the buffer to store properties into -+ * @size: Size of the buffer -+ * @flags: Flags - must be zero for now -+ * -+ * The ioctl will return the number of bytes stored into @buffer or an error -+ * on failure (e.g. @size is too small). If @size is specified as 0 then no -+ * data will be written but the return value will be the number of bytes needed -+ * for all the properties. -+ * -+ * @flags may be used in the future to request a different format for the -+ * buffer. With @flags == 0 the following format is used. -+ * -+ * The buffer will be filled with pairs of values, a u32 key identifying the -+ * property followed by the value. The size of the value is identified using -+ * the bottom bits of the key. The value then immediately followed the key and -+ * is tightly packed (there is no padding). All keys and values are -+ * little-endian. ++/** Event IDs for the power management framework. + * -+ * 00 = u8 -+ * 01 = u16 -+ * 10 = u32 -+ * 11 = u64 -+ */ -+struct kbase_ioctl_get_gpuprops { -+ union kbase_pointer buffer; -+ __u32 size; -+ __u32 flags; -+}; -+ -+#define KBASE_IOCTL_GET_GPUPROPS \ -+ _IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops) ++ * Any of these events might be missed, so they should not be relied upon to ++ * find the precise state of the GPU at a particular time in the ++ * trace. Overall, we should get a high percentage of these events for ++ * statisical purposes, and so a few missing should not be a problem */ ++enum kbase_timeline_pm_event { ++ /* helper for tests */ ++ KBASEP_TIMELINE_PM_EVENT_FIRST, + -+#define KBASE_IOCTL_POST_TERM \ -+ _IO(KBASE_IOCTL_TYPE, 4) ++ /** Event reserved for backwards compatibility with 'init' events */ ++ KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST, + -+/** -+ * union kbase_ioctl_mem_alloc - Allocate memory on the GPU -+ * -+ * @va_pages: The number of pages of virtual address space to reserve -+ * @commit_pages: The number of physical pages to allocate -+ * @extent: The number of extra pages to allocate on each GPU fault which grows -+ * the region -+ * @flags: Flags -+ * @gpu_va: The GPU virtual address which is allocated -+ * -+ * @in: Input parameters -+ * @out: Output parameters -+ */ -+union kbase_ioctl_mem_alloc { -+ struct { -+ __u64 va_pages; -+ __u64 commit_pages; -+ __u64 extent; -+ __u64 flags; -+ } in; -+ struct { -+ __u64 flags; -+ __u64 gpu_va; -+ } out; -+}; ++ /** The power state of the device has changed. ++ * ++ * Specifically, the device has reached a desired or available state. ++ */ ++ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED, + -+#define KBASE_IOCTL_MEM_ALLOC \ -+ _IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc) ++ /** The GPU is becoming active. ++ * ++ * This event is sent when the first context is about to use the GPU. ++ */ ++ KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE, + -+/** -+ * struct kbase_ioctl_mem_query - Query properties of a GPU memory region -+ * @gpu_addr: A GPU address contained within the region -+ * @query: The type of query -+ * @value: The result of the query -+ * -+ * Use a %KBASE_MEM_QUERY_xxx flag as input for @query. -+ * -+ * @in: Input parameters -+ * @out: Output parameters -+ */ -+union kbase_ioctl_mem_query { -+ struct { -+ __u64 gpu_addr; -+ __u64 query; -+ } in; -+ struct { -+ __u64 value; -+ } out; -+}; ++ /** The GPU is becoming idle. ++ * ++ * This event is sent when the last context has finished using the GPU. ++ */ ++ KBASE_TIMELINE_PM_EVENT_GPU_IDLE, + -+#define KBASE_IOCTL_MEM_QUERY \ -+ _IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query) ++ /** Event reserved for backwards compatibility with 'policy_change' ++ * events */ ++ KBASE_TIMELINE_PM_EVENT_RESERVED_4, + -+#define KBASE_MEM_QUERY_COMMIT_SIZE 1 -+#define KBASE_MEM_QUERY_VA_SIZE 2 -+#define KBASE_MEM_QUERY_FLAGS 3 ++ /** Event reserved for backwards compatibility with 'system_suspend' ++ * events */ ++ KBASE_TIMELINE_PM_EVENT_RESERVED_5, + -+/** -+ * struct kbase_ioctl_mem_free - Free a memory region -+ * @gpu_addr: Handle to the region to free -+ */ -+struct kbase_ioctl_mem_free { -+ __u64 gpu_addr; -+}; ++ /** Event reserved for backwards compatibility with 'system_resume' ++ * events */ ++ KBASE_TIMELINE_PM_EVENT_RESERVED_6, + -+#define KBASE_IOCTL_MEM_FREE \ -+ _IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free) ++ /** The job scheduler is requesting to power up/down cores. ++ * ++ * This event is sent when: ++ * - powered down cores are needed to complete a job ++ * - powered up cores are not needed anymore ++ */ ++ KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE, + -+/** -+ * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader -+ * @buffer_count: requested number of dumping buffers -+ * @jm_bm: counters selection bitmask (JM) -+ * @shader_bm: counters selection bitmask (Shader) -+ * @tiler_bm: counters selection bitmask (Tiler) -+ * @mmu_l2_bm: counters selection bitmask (MMU_L2) -+ * -+ * A fd is returned from the ioctl if successful, or a negative value on error -+ */ -+struct kbase_ioctl_hwcnt_reader_setup { -+ __u32 buffer_count; -+ __u32 jm_bm; -+ __u32 shader_bm; -+ __u32 tiler_bm; -+ __u32 mmu_l2_bm; ++ KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE, +}; + -+#define KBASE_IOCTL_HWCNT_READER_SETUP \ -+ _IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup) -+ -+/** -+ * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection -+ * @dump_buffer: GPU address to write counters to -+ * @jm_bm: counters selection bitmask (JM) -+ * @shader_bm: counters selection bitmask (Shader) -+ * @tiler_bm: counters selection bitmask (Tiler) -+ * @mmu_l2_bm: counters selection bitmask (MMU_L2) -+ */ -+struct kbase_ioctl_hwcnt_enable { -+ __u64 dump_buffer; -+ __u32 jm_bm; -+ __u32 shader_bm; -+ __u32 tiler_bm; -+ __u32 mmu_l2_bm; ++#ifdef CONFIG_MALI_TRACE_TIMELINE ++struct kbase_trace_kctx_timeline { ++ atomic_t jd_atoms_in_flight; ++ u32 owner_tgid; +}; + -+#define KBASE_IOCTL_HWCNT_ENABLE \ -+ _IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable) ++struct kbase_trace_kbdev_timeline { ++ /* Note: strictly speaking, not needed, because it's in sync with ++ * kbase_device::jm_slots[]::submitted_nr ++ * ++ * But it's kept as an example of how to add global timeline tracking ++ * information ++ * ++ * The caller must hold hwaccess_lock when accessing this */ ++ u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS]; + -+#define KBASE_IOCTL_HWCNT_DUMP \ -+ _IO(KBASE_IOCTL_TYPE, 10) ++ /* Last UID for each PM event */ ++ atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1]; ++ /* Counter for generating PM event UIDs */ ++ atomic_t pm_event_uid_counter; ++ /* ++ * L2 transition state - true indicates that the transition is ongoing ++ * Expected to be protected by hwaccess_lock */ ++ bool l2_transitioning; ++}; ++#endif /* CONFIG_MALI_TRACE_TIMELINE */ + -+#define KBASE_IOCTL_HWCNT_CLEAR \ -+ _IO(KBASE_IOCTL_TYPE, 11) + -+/** -+ * struct kbase_ioctl_disjoint_query - Query the disjoint counter -+ * @counter: A counter of disjoint events in the kernel -+ */ -+struct kbase_ioctl_disjoint_query { -+ __u32 counter; ++struct kbasep_kctx_list_element { ++ struct list_head link; ++ struct kbase_context *kctx; +}; + -+#define KBASE_IOCTL_DISJOINT_QUERY \ -+ _IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query) -+ +/** -+ * struct kbase_ioctl_get_ddk_version - Query the kernel version -+ * @version_buffer: Buffer to receive the kernel version string -+ * @size: Size of the buffer ++ * Data stored per device for power management. + * -+ * The ioctl will return the number of bytes written into version_buffer -+ * (which includes a NULL byte) or a negative error code ++ * This structure contains data for the power management framework. There is one ++ * instance of this structure per device in the system. + */ -+struct kbase_ioctl_get_ddk_version { -+ union kbase_pointer version_buffer; -+ __u32 size; -+}; ++struct kbase_pm_device_data { ++ /** ++ * The lock protecting Power Management structures accessed outside of ++ * IRQ. ++ * ++ * This lock must also be held whenever the GPU is being powered on or ++ * off. ++ */ ++ struct mutex lock; + -+#define KBASE_IOCTL_GET_DDK_VERSION \ -+ _IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version) ++ /** The reference count of active contexts on this device. */ ++ int active_count; ++ /** Flag indicating suspending/suspended */ ++ bool suspending; ++ /* Wait queue set when active_count == 0 */ ++ wait_queue_head_t zero_active_count_wait; + -+/** -+ * struct kbase_ioctl_mem_jit_init - Initialise the JIT memory allocator -+ * -+ * @va_pages: Number of VA pages to reserve for JIT -+ * -+ * Note that depending on the VA size of the application and GPU, the value -+ * specified in @va_pages may be ignored. -+ */ -+struct kbase_ioctl_mem_jit_init { -+ __u64 va_pages; -+}; ++ /** ++ * Bit masks identifying the available shader cores that are specified ++ * via sysfs. One mask per job slot. ++ */ ++ u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS]; ++ u64 debug_core_mask_all; + -+#define KBASE_IOCTL_MEM_JIT_INIT \ -+ _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init) ++ /** ++ * Callback for initializing the runtime power management. ++ * ++ * @param kbdev The kbase device ++ * ++ * @return 0 on success, else error code ++ */ ++ int (*callback_power_runtime_init)(struct kbase_device *kbdev); + -+/** -+ * struct kbase_ioctl_mem_sync - Perform cache maintenance on memory -+ * -+ * @handle: GPU memory handle (GPU VA) -+ * @user_addr: The address where it is mapped in user space -+ * @size: The number of bytes to synchronise -+ * @type: The direction to synchronise: 0 is sync to memory (clean), -+ * 1 is sync from memory (invalidate). Use the BASE_SYNCSET_OP_xxx constants. -+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero -+ */ -+struct kbase_ioctl_mem_sync { -+ __u64 handle; -+ __u64 user_addr; -+ __u64 size; -+ __u8 type; -+ __u8 padding[7]; -+}; ++ /** ++ * Callback for terminating the runtime power management. ++ * ++ * @param kbdev The kbase device ++ */ ++ void (*callback_power_runtime_term)(struct kbase_device *kbdev); + -+#define KBASE_IOCTL_MEM_SYNC \ -+ _IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync) ++ /* Time in milliseconds between each dvfs sample */ ++ u32 dvfs_period; + -+/** -+ * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer -+ * -+ * @gpu_addr: The GPU address of the memory region -+ * @cpu_addr: The CPU address to locate -+ * @size: A size in bytes to validate is contained within the region -+ * @offset: The offset from the start of the memory region to @cpu_addr -+ * -+ * @in: Input parameters -+ * @out: Output parameters -+ */ -+union kbase_ioctl_mem_find_cpu_offset { -+ struct { -+ __u64 gpu_addr; -+ __u64 cpu_addr; -+ __u64 size; -+ } in; -+ struct { -+ __u64 offset; -+ } out; -+}; ++ /* Period of GPU poweroff timer */ ++ ktime_t gpu_poweroff_time; + -+#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \ -+ _IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset) ++ /* Number of ticks of GPU poweroff timer before shader is powered off */ ++ int poweroff_shader_ticks; + -+/** -+ * struct kbase_ioctl_get_context_id - Get the kernel context ID -+ * -+ * @id: The kernel context ID -+ */ -+struct kbase_ioctl_get_context_id { -+ int id; /* This should really be __u32, but see GPUCORE-10048 */ -+}; ++ /* Number of ticks of GPU poweroff timer before GPU is powered off */ ++ int poweroff_gpu_ticks; + -+#define KBASE_IOCTL_GET_CONTEXT_ID \ -+ _IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id) ++ struct kbase_pm_backend_data backend; ++}; + +/** -+ * struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd -+ * -+ * @flags: Flags -+ * -+ * The ioctl returns a file descriptor when successful ++ * struct kbase_mem_pool - Page based memory pool for kctx/kbdev ++ * @kbdev: Kbase device where memory is used ++ * @cur_size: Number of free pages currently in the pool (may exceed @max_size ++ * in some corner cases) ++ * @max_size: Maximum number of free pages in the pool ++ * @pool_lock: Lock protecting the pool - must be held when modifying @cur_size ++ * and @page_list ++ * @page_list: List of free pages in the pool ++ * @reclaim: Shrinker for kernel reclaim of free pages ++ * @next_pool: Pointer to next pool where pages can be allocated when this pool ++ * is empty. Pages will spill over to the next pool when this pool ++ * is full. Can be NULL if there is no next pool. + */ -+struct kbase_ioctl_tlstream_acquire { -+ __u32 flags; -+}; -+ -+#define KBASE_IOCTL_TLSTREAM_ACQUIRE \ -+ _IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire) ++struct kbase_mem_pool { ++ struct kbase_device *kbdev; ++ size_t cur_size; ++ size_t max_size; ++ spinlock_t pool_lock; ++ struct list_head page_list; ++ struct shrinker reclaim; + -+#define KBASE_IOCTL_TLSTREAM_FLUSH \ -+ _IO(KBASE_IOCTL_TYPE, 19) ++ struct kbase_mem_pool *next_pool; ++}; + +/** -+ * struct kbase_ioctl_mem_commit - Change the amount of memory backing a region -+ * -+ * @gpu_addr: The memory region to modify -+ * @pages: The number of physical pages that should be present -+ * -+ * The ioctl may return on the following error codes or 0 for success: -+ * -ENOMEM: Out of memory -+ * -EINVAL: Invalid arguments ++ * struct kbase_devfreq_opp - Lookup table for converting between nominal OPP ++ * frequency, and real frequency and core mask ++ * @opp_freq: Nominal OPP frequency ++ * @real_freq: Real GPU frequency ++ * @core_mask: Shader core mask + */ -+struct kbase_ioctl_mem_commit { -+ __u64 gpu_addr; -+ __u64 pages; ++struct kbase_devfreq_opp { ++ u64 opp_freq; ++ u64 real_freq; ++ u64 core_mask; +}; + -+#define KBASE_IOCTL_MEM_COMMIT \ -+ _IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit) ++#define DEVNAME_SIZE 16 + -+/** -+ * union kbase_ioctl_mem_alias - Create an alias of memory regions -+ * @flags: Flags, see BASE_MEM_xxx -+ * @stride: Bytes between start of each memory region -+ * @nents: The number of regions to pack together into the alias -+ * @aliasing_info: Pointer to an array of struct base_mem_aliasing_info -+ * @gpu_va: Address of the new alias -+ * @va_pages: Size of the new alias -+ * -+ * @in: Input parameters -+ * @out: Output parameters -+ */ -+union kbase_ioctl_mem_alias { -+ struct { -+ __u64 flags; -+ __u64 stride; -+ __u64 nents; -+ union kbase_pointer aliasing_info; -+ } in; -+ struct { -+ __u64 flags; -+ __u64 gpu_va; -+ __u64 va_pages; -+ } out; -+}; ++struct kbase_device { ++ s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS]; + -+#define KBASE_IOCTL_MEM_ALIAS \ -+ _IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias) ++ u32 hw_quirks_sc; ++ u32 hw_quirks_tiler; ++ u32 hw_quirks_mmu; ++ u32 hw_quirks_jm; ++ ++ struct list_head entry; ++ struct device *dev; ++ unsigned int kbase_group_error; ++ struct miscdevice mdev; ++ u64 reg_start; ++ size_t reg_size; ++ void __iomem *reg; + -+/** -+ * union kbase_ioctl_mem_import - Import memory for use by the GPU -+ * @flags: Flags, see BASE_MEM_xxx -+ * @phandle: Handle to the external memory -+ * @type: Type of external memory, see base_mem_import_type -+ * @padding: Amount of extra VA pages to append to the imported buffer -+ * @gpu_va: Address of the new alias -+ * @va_pages: Size of the new alias -+ * -+ * @in: Input parameters -+ * @out: Output parameters -+ */ -+union kbase_ioctl_mem_import { -+ struct { -+ __u64 flags; -+ union kbase_pointer phandle; -+ __u32 type; -+ __u32 padding; -+ } in; + struct { -+ __u64 flags; -+ __u64 gpu_va; -+ __u64 va_pages; -+ } out; -+}; ++ int irq; ++ int flags; ++ } irqs[3]; + -+#define KBASE_IOCTL_MEM_IMPORT \ -+ _IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import) ++ struct clk *clock; ++#ifdef CONFIG_REGULATOR ++ struct regulator *regulator; ++#endif ++ char devname[DEVNAME_SIZE]; + -+/** -+ * struct kbase_ioctl_mem_flags_change - Change the flags for a memory region -+ * @gpu_va: The GPU region to modify -+ * @flags: The new flags to set -+ * @mask: Mask of the flags to modify -+ */ -+struct kbase_ioctl_mem_flags_change { -+ __u64 gpu_va; -+ __u64 flags; -+ __u64 mask; -+}; ++#ifdef CONFIG_MALI_NO_MALI ++ void *model; ++ struct kmem_cache *irq_slab; ++ struct workqueue_struct *irq_workq; ++ atomic_t serving_job_irq; ++ atomic_t serving_gpu_irq; ++ atomic_t serving_mmu_irq; ++ spinlock_t reg_op_lock; ++#endif /* CONFIG_MALI_NO_MALI */ + -+#define KBASE_IOCTL_MEM_FLAGS_CHANGE \ -+ _IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change) ++ struct kbase_pm_device_data pm; ++ struct kbasep_js_device_data js_data; ++ struct kbase_mem_pool mem_pool; ++ struct kbasep_mem_device memdev; ++ struct kbase_mmu_mode const *mmu_mode; + -+/** -+ * struct kbase_ioctl_stream_create - Create a synchronisation stream -+ * @name: A name to identify this stream. Must be NULL-terminated. -+ * -+ * Note that this is also called a "timeline", but is named stream to avoid -+ * confusion with other uses of the word. -+ * -+ * Unused bytes in @name (after the first NULL byte) must be also be NULL bytes. -+ * -+ * The ioctl returns a file descriptor. -+ */ -+struct kbase_ioctl_stream_create { -+ char name[32]; -+}; ++ struct kbase_as as[BASE_MAX_NR_AS]; ++ /* The below variables (as_free and as_to_kctx) are managed by the ++ * Context Scheduler. The kbasep_js_device_data::runpool_irq::lock must ++ * be held whilst accessing these. ++ */ ++ u16 as_free; /* Bitpattern of free Address Spaces */ ++ /* Mapping from active Address Spaces to kbase_context */ ++ struct kbase_context *as_to_kctx[BASE_MAX_NR_AS]; + -+#define KBASE_IOCTL_STREAM_CREATE \ -+ _IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create) + -+/** -+ * struct kbase_ioctl_fence_validate - Validate a fd refers to a fence -+ * @fd: The file descriptor to validate -+ */ -+struct kbase_ioctl_fence_validate { -+ int fd; -+}; ++ spinlock_t mmu_mask_change; + -+#define KBASE_IOCTL_FENCE_VALIDATE \ -+ _IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate) ++ struct kbase_gpu_props gpu_props; + -+/** -+ * struct kbase_ioctl_get_profiling_controls - Get the profiling controls -+ * @count: The size of @buffer in u32 words -+ * @buffer: The buffer to receive the profiling controls -+ */ -+struct kbase_ioctl_get_profiling_controls { -+ union kbase_pointer buffer; -+ __u32 count; -+}; ++ /** List of SW workarounds for HW issues */ ++ unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; ++ /** List of features available */ ++ unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; + -+#define KBASE_IOCTL_GET_PROFILING_CONTROLS \ -+ _IOW(KBASE_IOCTL_TYPE, 26, struct kbase_ioctl_get_profiling_controls) ++ /* Bitmaps of cores that are currently in use (running jobs). ++ * These should be kept up to date by the job scheduler. ++ * ++ * pm.power_change_lock should be held when accessing these members. ++ * ++ * kbase_pm_check_transitions_nolock() should be called when bits are ++ * cleared to update the power management system and allow transitions to ++ * occur. */ ++ u64 shader_inuse_bitmap; + -+/** -+ * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel -+ * @buffer: Pointer to the information -+ * @len: Length -+ * @padding: Padding -+ * -+ * The data provided is accessible through a debugfs file -+ */ -+struct kbase_ioctl_mem_profile_add { -+ union kbase_pointer buffer; -+ __u32 len; -+ __u32 padding; -+}; ++ /* Refcount for cores in use */ ++ u32 shader_inuse_cnt[64]; + -+#define KBASE_IOCTL_MEM_PROFILE_ADD \ -+ _IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add) ++ /* Bitmaps of cores the JS needs for jobs ready to run */ ++ u64 shader_needed_bitmap; + -+/** -+ * struct kbase_ioctl_soft_event_update - Update the status of a soft-event -+ * @event: GPU address of the event which has been updated -+ * @new_status: The new status to set -+ * @flags: Flags for future expansion -+ */ -+struct kbase_ioctl_soft_event_update { -+ __u64 event; -+ __u32 new_status; -+ __u32 flags; -+}; ++ /* Refcount for cores needed */ ++ u32 shader_needed_cnt[64]; + -+#define KBASE_IOCTL_SOFT_EVENT_UPDATE \ -+ _IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update) ++ u32 tiler_inuse_cnt; + -+/*************** -+ * test ioctls * -+ ***************/ -+#if MALI_UNIT_TEST -+/* These ioctls are purely for test purposes and are not used in the production -+ * driver, they therefore may change without notice -+ */ ++ u32 tiler_needed_cnt; + -+#define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1) ++ /* struct for keeping track of the disjoint information ++ * ++ * The state is > 0 if the GPU is in a disjoint state. Otherwise 0 ++ * The count is the number of disjoint events that have occurred on the GPU ++ */ ++ struct { ++ atomic_t count; ++ atomic_t state; ++ } disjoint_event; + -+/** -+ * struct kbase_ioctl_tlstream_test - Start a timeline stream test -+ * -+ * @tpw_count: number of trace point writers in each context -+ * @msg_delay: time delay between tracepoints from one writer in milliseconds -+ * @msg_count: number of trace points written by one writer -+ * @aux_msg: if non-zero aux messages will be included -+ */ -+struct kbase_ioctl_tlstream_test { -+ __u32 tpw_count; -+ __u32 msg_delay; -+ __u32 msg_count; -+ __u32 aux_msg; -+}; ++ /* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */ ++ u32 l2_users_count; + -+#define KBASE_IOCTL_TLSTREAM_TEST \ -+ _IOW(KBASE_IOCTL_TEST_TYPE, 1, struct kbase_ioctl_tlstream_test) ++ /* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be ++ * submitted to these cores. These are updated by the power management code. The job scheduler should avoid ++ * submitting new jobs to any cores that are not marked as available. ++ * ++ * pm.power_change_lock should be held when accessing these members. ++ */ ++ u64 shader_available_bitmap; ++ u64 tiler_available_bitmap; ++ u64 l2_available_bitmap; ++ u64 stack_available_bitmap; + -+/** -+ * struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes -+ * @bytes_collected: number of bytes read by user -+ * @bytes_generated: number of bytes generated by tracepoints -+ */ -+struct kbase_ioctl_tlstream_stats { -+ __u32 bytes_collected; -+ __u32 bytes_generated; -+}; ++ u64 shader_ready_bitmap; ++ u64 shader_transitioning_bitmap; + -+#define KBASE_IOCTL_TLSTREAM_STATS \ -+ _IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats) ++ s8 nr_hw_address_spaces; /**< Number of address spaces in the GPU (constant after driver initialisation) */ ++ s8 nr_user_address_spaces; /**< Number of address spaces available to user contexts */ + -+#endif ++ /* Structure used for instrumentation and HW counters dumping */ ++ struct kbase_hwcnt { ++ /* The lock should be used when accessing any of the following members */ ++ spinlock_t lock; + -+/********************************** -+ * Definitions for GPU properties * -+ **********************************/ -+#define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0) -+#define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1) -+#define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2) -+#define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3) ++ struct kbase_context *kctx; ++ u64 addr; + -+#define KBASE_GPUPROP_PRODUCT_ID 1 -+#define KBASE_GPUPROP_VERSION_STATUS 2 -+#define KBASE_GPUPROP_MINOR_REVISION 3 -+#define KBASE_GPUPROP_MAJOR_REVISION 4 -+#define KBASE_GPUPROP_GPU_SPEED_MHZ 5 -+#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX 6 -+#define KBASE_GPUPROP_GPU_FREQ_KHZ_MIN 7 -+#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE 8 -+#define KBASE_GPUPROP_TEXTURE_FEATURES_0 9 -+#define KBASE_GPUPROP_TEXTURE_FEATURES_1 10 -+#define KBASE_GPUPROP_TEXTURE_FEATURES_2 11 -+#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE 12 ++ struct kbase_instr_backend backend; ++ } hwcnt; + -+#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE 13 -+#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE 14 -+#define KBASE_GPUPROP_L2_NUM_L2_SLICES 15 ++ struct kbase_vinstr_context *vinstr_ctx; + -+#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES 16 -+#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS 17 ++#if KBASE_TRACE_ENABLE ++ spinlock_t trace_lock; ++ u16 trace_first_out; ++ u16 trace_next_in; ++ struct kbase_trace *trace_rbuf; ++#endif + -+#define KBASE_GPUPROP_MAX_THREADS 18 -+#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE 19 -+#define KBASE_GPUPROP_MAX_BARRIER_SIZE 20 -+#define KBASE_GPUPROP_MAX_REGISTERS 21 -+#define KBASE_GPUPROP_MAX_TASK_QUEUE 22 -+#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT 23 -+#define KBASE_GPUPROP_IMPL_TECH 24 ++ u32 reset_timeout_ms; + -+#define KBASE_GPUPROP_RAW_SHADER_PRESENT 25 -+#define KBASE_GPUPROP_RAW_TILER_PRESENT 26 -+#define KBASE_GPUPROP_RAW_L2_PRESENT 27 -+#define KBASE_GPUPROP_RAW_STACK_PRESENT 28 -+#define KBASE_GPUPROP_RAW_L2_FEATURES 29 -+#define KBASE_GPUPROP_RAW_SUSPEND_SIZE 30 -+#define KBASE_GPUPROP_RAW_MEM_FEATURES 31 -+#define KBASE_GPUPROP_RAW_MMU_FEATURES 32 -+#define KBASE_GPUPROP_RAW_AS_PRESENT 33 -+#define KBASE_GPUPROP_RAW_JS_PRESENT 34 -+#define KBASE_GPUPROP_RAW_JS_FEATURES_0 35 -+#define KBASE_GPUPROP_RAW_JS_FEATURES_1 36 -+#define KBASE_GPUPROP_RAW_JS_FEATURES_2 37 -+#define KBASE_GPUPROP_RAW_JS_FEATURES_3 38 -+#define KBASE_GPUPROP_RAW_JS_FEATURES_4 39 -+#define KBASE_GPUPROP_RAW_JS_FEATURES_5 40 -+#define KBASE_GPUPROP_RAW_JS_FEATURES_6 41 -+#define KBASE_GPUPROP_RAW_JS_FEATURES_7 42 -+#define KBASE_GPUPROP_RAW_JS_FEATURES_8 43 -+#define KBASE_GPUPROP_RAW_JS_FEATURES_9 44 -+#define KBASE_GPUPROP_RAW_JS_FEATURES_10 45 -+#define KBASE_GPUPROP_RAW_JS_FEATURES_11 46 -+#define KBASE_GPUPROP_RAW_JS_FEATURES_12 47 -+#define KBASE_GPUPROP_RAW_JS_FEATURES_13 48 -+#define KBASE_GPUPROP_RAW_JS_FEATURES_14 49 -+#define KBASE_GPUPROP_RAW_JS_FEATURES_15 50 -+#define KBASE_GPUPROP_RAW_TILER_FEATURES 51 -+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0 52 -+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1 53 -+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2 54 -+#define KBASE_GPUPROP_RAW_GPU_ID 55 -+#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS 56 -+#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE 57 -+#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE 58 -+#define KBASE_GPUPROP_RAW_THREAD_FEATURES 59 -+#define KBASE_GPUPROP_RAW_COHERENCY_MODE 60 ++ struct mutex cacheclean_lock; + -+#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61 -+#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62 -+#define KBASE_GPUPROP_COHERENCY_COHERENCY 63 -+#define KBASE_GPUPROP_COHERENCY_GROUP_0 64 -+#define KBASE_GPUPROP_COHERENCY_GROUP_1 65 -+#define KBASE_GPUPROP_COHERENCY_GROUP_2 66 -+#define KBASE_GPUPROP_COHERENCY_GROUP_3 67 -+#define KBASE_GPUPROP_COHERENCY_GROUP_4 68 -+#define KBASE_GPUPROP_COHERENCY_GROUP_5 69 -+#define KBASE_GPUPROP_COHERENCY_GROUP_6 70 -+#define KBASE_GPUPROP_COHERENCY_GROUP_7 71 -+#define KBASE_GPUPROP_COHERENCY_GROUP_8 72 -+#define KBASE_GPUPROP_COHERENCY_GROUP_9 73 -+#define KBASE_GPUPROP_COHERENCY_GROUP_10 74 -+#define KBASE_GPUPROP_COHERENCY_GROUP_11 75 -+#define KBASE_GPUPROP_COHERENCY_GROUP_12 76 -+#define KBASE_GPUPROP_COHERENCY_GROUP_13 77 -+#define KBASE_GPUPROP_COHERENCY_GROUP_14 78 -+#define KBASE_GPUPROP_COHERENCY_GROUP_15 79 ++ /* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */ ++ void *platform_context; + -+#ifdef __cpluscplus -+} ++ /* List of kbase_contexts created */ ++ struct list_head kctx_list; ++ struct mutex kctx_list_lock; ++ ++ struct rockchip_opp_info opp_info; ++#ifdef CONFIG_MALI_DEVFREQ ++ struct devfreq_dev_profile devfreq_profile; ++ struct devfreq *devfreq; ++ unsigned long current_freq; ++ unsigned long current_nominal_freq; ++ unsigned long current_voltage; ++ u64 current_core_mask; ++ struct kbase_devfreq_opp *opp_table; ++ int num_opps; ++ struct monitor_dev_info *mdev_info; ++#ifdef CONFIG_DEVFREQ_THERMAL ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) ++ struct devfreq_cooling_device *devfreq_cooling; ++#else ++ struct thermal_cooling_device *devfreq_cooling; +#endif ++ /* Current IPA model - true for configured model, false for fallback */ ++ atomic_t ipa_use_configured_model; ++ struct { ++ /* Access to this struct must be with ipa.lock held */ ++ struct mutex lock; ++ struct kbase_ipa_model *configured_model; ++ struct kbase_ipa_model *fallback_model; ++ } ipa; ++#endif /* CONFIG_DEVFREQ_THERMAL */ ++#endif /* CONFIG_MALI_DEVFREQ */ ++ + ++#ifdef CONFIG_MALI_TRACE_TIMELINE ++ struct kbase_trace_kbdev_timeline timeline; +#endif -diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c -new file mode 100644 -index 000000000..d9d8658d3 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c -@@ -0,0 +1,1903 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ + ++ /* ++ * Control for enabling job dump on failure, set when control debugfs ++ * is opened. ++ */ ++ bool job_fault_debug; ++ ++#ifdef CONFIG_DEBUG_FS ++ /* directory for debugfs entries */ ++ struct dentry *mali_debugfs_directory; ++ /* Root directory for per context entry */ ++ struct dentry *debugfs_ctx_directory; ++ ++#ifdef CONFIG_MALI_DEBUG ++ /* bit for each as, set if there is new data to report */ ++ u64 debugfs_as_read_bitmap; ++#endif /* CONFIG_MALI_DEBUG */ ++ ++ /* failed job dump, used for separate debug process */ ++ wait_queue_head_t job_fault_wq; ++ wait_queue_head_t job_fault_resume_wq; ++ struct workqueue_struct *job_fault_resume_workq; ++ struct list_head job_fault_event_list; ++ spinlock_t job_fault_event_lock; ++ struct kbase_context *kctx_fault; + ++#if !MALI_CUSTOMER_RELEASE ++ /* Per-device data for register dumping interface */ ++ struct { ++ u16 reg_offset; /* Offset of a GPU_CONTROL register to be ++ dumped upon request */ ++ } regs_dump_debugfs_data; ++#endif /* !MALI_CUSTOMER_RELEASE */ ++#endif /* CONFIG_DEBUG_FS */ + ++ /* fbdump profiling controls set by gator */ ++ u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX]; + + -+#if defined(CONFIG_DMA_SHARED_BUFFER) -+#include -+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ -+#ifdef CONFIG_COMPAT -+#include ++#if MALI_CUSTOMER_RELEASE == 0 ++ /* Number of jobs that are run before a job is forced to fail and ++ * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced ++ * failures. */ ++ int force_replay_limit; ++ /* Count of jobs between forced failures. Incremented on each job. A ++ * job is forced to fail once this is greater than or equal to ++ * force_replay_limit. */ ++ int force_replay_count; ++ /* Core requirement for jobs to be failed and replayed. May be zero. */ ++ base_jd_core_req force_replay_core_req; ++ /* true if force_replay_limit should be randomized. The random ++ * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT. ++ */ ++ bool force_replay_random; +#endif -+#include -+#include -+#include -+#include -+#include -+#include + -+#include -+#include -+#include ++ /* Total number of created contexts */ ++ atomic_t ctx_num; + -+#include "mali_kbase_dma_fence.h" ++#ifdef CONFIG_DEBUG_FS ++ /* Holds the most recent register accesses */ ++ struct kbase_io_history io_history; ++#endif /* CONFIG_DEBUG_FS */ + -+#define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) ++ struct kbase_hwaccess_data hwaccess; + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) -+/* random32 was renamed to prandom_u32 in 3.8 */ -+#define prandom_u32 random32 -+#endif ++ /* Count of page/bus faults waiting for workqueues to process */ ++ atomic_t faults_pending; + -+/* Return whether katom will run on the GPU or not. Currently only soft jobs and -+ * dependency-only atoms do not run on the GPU */ -+#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) || \ -+ ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == \ -+ BASE_JD_REQ_DEP))) -+/* -+ * This is the kernel side of the API. Only entry points are: -+ * - kbase_jd_submit(): Called from userspace to submit a single bag -+ * - kbase_jd_done(): Called from interrupt context to track the -+ * completion of a job. -+ * Callouts: -+ * - to the job manager (enqueue a job) -+ * - to the event subsystem (signals the completion/failure of bag/job-chains). -+ */ ++ /* true if GPU is powered off or power off operation is in progress */ ++ bool poweroff_pending; + -+static void __user * -+get_compat_pointer(struct kbase_context *kctx, const union kbase_pointer *p) -+{ -+#ifdef CONFIG_COMPAT -+ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) -+ return compat_ptr(p->compat_value); ++ ++ /* defaults for new context created for this device */ ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) ++ bool infinite_cache_active_default; ++#else ++ u32 infinite_cache_active_default; +#endif -+ return p->value; -+} ++ size_t mem_pool_max_size_default; + -+/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs -+ * -+ * Returns whether the JS needs a reschedule. -+ * -+ * Note that the caller must also check the atom status and -+ * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock -+ */ -+static int jd_run_atom(struct kbase_jd_atom *katom) -+{ -+ struct kbase_context *kctx = katom->kctx; ++ /* current gpu coherency mode */ ++ u32 current_gpu_coherency_mode; ++ /* system coherency mode */ ++ u32 system_coherency; ++ /* Flag to track when cci snoops have been enabled on the interface */ ++ bool cci_snoop_enabled; + -+ KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); ++ /* SMC function IDs to call into Trusted firmware to enable/disable ++ * cache snooping. Value of 0 indicates that they are not used ++ */ ++ u32 snoop_enable_smc; ++ u32 snoop_disable_smc; + -+ if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) { -+ /* Dependency only atom */ -+ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; -+ return 0; -+ } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { -+ /* Soft-job */ -+ if (katom->will_fail_event_code) { -+ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; -+ return 0; -+ } -+ if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) -+ == BASE_JD_REQ_SOFT_REPLAY) { -+ if (!kbase_replay_process(katom)) -+ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; -+ } else if (kbase_process_soft_job(katom) == 0) { -+ kbase_finish_soft_job(katom); -+ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; -+ } -+ return 0; -+ } ++ /* Protected mode operations */ ++ struct protected_mode_ops *protected_ops; + -+ katom->status = KBASE_JD_ATOM_STATE_IN_JS; -+ /* Queue an action about whether we should try scheduling a context */ -+ return kbasep_js_add_job(kctx, katom); -+} ++ /* Protected device attached to this kbase device */ ++ struct protected_mode_device *protected_dev; + -+#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE) -+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom) -+{ -+ struct kbase_device *kbdev; ++ /* ++ * true when GPU is put into protected mode ++ */ ++ bool protected_mode; + -+ KBASE_DEBUG_ASSERT(katom); -+ kbdev = katom->kctx->kbdev; -+ KBASE_DEBUG_ASSERT(kbdev); ++ /* ++ * true when GPU is transitioning into or out of protected mode ++ */ ++ bool protected_mode_transition; + -+ /* Check whether the atom's other dependencies were already met. If -+ * katom is a GPU atom then the job scheduler may be able to represent -+ * the dependencies, hence we may attempt to submit it before they are -+ * met. Other atoms must have had both dependencies resolved. ++ /* ++ * true if protected mode is supported + */ -+ if (IS_GPU_ATOM(katom) || -+ (!kbase_jd_katom_dep_atom(&katom->dep[0]) && -+ !kbase_jd_katom_dep_atom(&katom->dep[1]))) { -+ /* katom dep complete, attempt to run it */ -+ bool resched = false; ++ bool protected_mode_support; + -+ resched = jd_run_atom(katom); + -+ if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) { -+ /* The atom has already finished */ -+ resched |= jd_done_nolock(katom, NULL); -+ } ++#ifdef CONFIG_MALI_DEBUG ++ wait_queue_head_t driver_inactive_wait; ++ bool driver_inactive; ++#endif /* CONFIG_MALI_DEBUG */ + -+ if (resched) -+ kbase_js_sched_all(kbdev); -+ } -+} ++#ifdef CONFIG_MALI_FPGA_BUS_LOGGER ++ /* ++ * Bus logger integration. ++ */ ++ struct bus_logger_client *buslogger; +#endif ++ /* Boolean indicating if an IRQ flush during reset is in progress. */ ++ bool irq_reset_flush; + -+#ifdef CONFIG_KDS ++ /* list of inited sub systems. Used during terminate/error recovery */ ++ u32 inited_subsys; + -+/* Add the katom to the kds waiting list. -+ * Atoms must be added to the waiting list after a successful call to kds_async_waitall. -+ * The caller must hold the kbase_jd_context.lock */ ++ spinlock_t hwaccess_lock; + -+static void kbase_jd_kds_waiters_add(struct kbase_jd_atom *katom) -+{ -+ struct kbase_context *kctx; ++ /* Protects access to MMU operations */ ++ struct mutex mmu_hw_mutex; + -+ KBASE_DEBUG_ASSERT(katom); ++ /* Current serialization mode. See KBASE_SERIALIZE_* for details */ ++ u8 serialize_jobs; ++}; + -+ kctx = katom->kctx; ++/** ++ * struct jsctx_queue - JS context atom queue ++ * @runnable_tree: Root of RB-tree containing currently runnable atoms on this ++ * job slot. ++ * @x_dep_head: Head item of the linked list of atoms blocked on cross-slot ++ * dependencies. Atoms on this list will be moved to the ++ * runnable_tree when the blocking atom completes. ++ * ++ * hwaccess_lock must be held when accessing this structure. ++ */ ++struct jsctx_queue { ++ struct rb_root runnable_tree; ++ struct list_head x_dep_head; ++}; + -+ list_add_tail(&katom->node, &kctx->waiting_kds_resource); -+} + -+/* Remove the katom from the kds waiting list. -+ * Atoms must be removed from the waiting list before a call to kds_resource_set_release_sync. -+ * The supplied katom must first have been added to the list with a call to kbase_jd_kds_waiters_add. -+ * The caller must hold the kbase_jd_context.lock */ ++#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ ++ (((minor) & 0xFFF) << 8) | \ ++ ((0 & 0xFF) << 0)) + -+static void kbase_jd_kds_waiters_remove(struct kbase_jd_atom *katom) -+{ -+ KBASE_DEBUG_ASSERT(katom); -+ list_del(&katom->node); -+} ++/** ++ * enum kbase_context_flags - Flags for kbase contexts ++ * ++ * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit ++ * process on a 64-bit kernel. ++ * ++ * @KCTX_RUNNABLE_REF: Set when context is counted in ++ * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing. ++ * ++ * @KCTX_ACTIVE: Set when the context is active. ++ * ++ * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this ++ * context. ++ * ++ * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been ++ * initialized. ++ * ++ * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new ++ * allocations. Existing allocations will not change. ++ * ++ * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs. ++ * ++ * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept ++ * scheduled in. ++ * ++ * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool. ++ * This is only ever updated whilst the jsctx_mutex is held. ++ * ++ * @KCTX_DYING: Set when the context process is in the process of being evicted. ++ * ++ * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this ++ * context, to disable use of implicit dma-buf fences. This is used to avoid ++ * potential synchronization deadlocks. ++ * ++ * All members need to be separate bits. This enum is intended for use in a ++ * bitmask where multiple values get OR-ed together. ++ */ ++enum kbase_context_flags { ++ KCTX_COMPAT = 1U << 0, ++ KCTX_RUNNABLE_REF = 1U << 1, ++ KCTX_ACTIVE = 1U << 2, ++ KCTX_PULLED = 1U << 3, ++ KCTX_MEM_PROFILE_INITIALIZED = 1U << 4, ++ KCTX_INFINITE_CACHE = 1U << 5, ++ KCTX_SUBMIT_DISABLED = 1U << 6, ++ KCTX_PRIVILEGED = 1U << 7, ++ KCTX_SCHEDULED = 1U << 8, ++ KCTX_DYING = 1U << 9, ++ KCTX_NO_IMPLICIT_SYNC = 1U << 10, ++}; + -+static void kds_dep_clear(void *callback_parameter, void *callback_extra_parameter) -+{ -+ struct kbase_jd_atom *katom; -+ struct kbase_jd_context *ctx; ++struct kbase_context { ++ struct file *filp; ++ struct kbase_device *kbdev; ++ int id; /* System wide unique id */ ++ unsigned long api_version; ++ phys_addr_t pgd; ++ struct list_head event_list; ++ struct list_head event_coalesce_list; ++ struct mutex event_mutex; ++ atomic_t event_closed; ++ struct workqueue_struct *event_workq; ++ atomic_t event_count; ++ int event_coalesce_count; + -+ katom = (struct kbase_jd_atom *)callback_parameter; -+ KBASE_DEBUG_ASSERT(katom); ++ atomic_t flags; + -+ ctx = &katom->kctx->jctx; ++ atomic_t setup_complete; ++ atomic_t setup_in_progress; + -+ /* If KDS resource has already been satisfied (e.g. due to zapping) -+ * do nothing. -+ */ -+ mutex_lock(&ctx->lock); -+ if (!katom->kds_dep_satisfied) { -+ katom->kds_dep_satisfied = true; -+ kbase_jd_dep_clear_locked(katom); -+ } -+ mutex_unlock(&ctx->lock); -+} ++ u64 *mmu_teardown_pages; + -+static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom) -+{ -+ KBASE_DEBUG_ASSERT(katom); ++ struct page *aliasing_sink_page; + -+ /* Prevent job_done_nolock from being called twice on an atom when -+ * there is a race between job completion and cancellation */ ++ struct mutex mmu_lock; ++ struct mutex reg_lock; /* To be converted to a rwlock? */ ++ struct rb_root reg_rbtree_same; /* RB tree of GPU (live) regions, ++ * SAME_VA zone */ ++ struct rb_root reg_rbtree_exec; /* RB tree of GPU (live) regions, ++ * EXEC zone */ ++ struct rb_root reg_rbtree_custom; /* RB tree of GPU (live) regions, ++ * CUSTOM_VA zone */ + -+ if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) { -+ /* Wait was cancelled - zap the atom */ -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; -+ if (jd_done_nolock(katom, NULL)) -+ kbase_js_sched_all(katom->kctx->kbdev); -+ } -+} -+#endif /* CONFIG_KDS */ ++ unsigned long cookies; ++ struct kbase_va_region *pending_regions[BITS_PER_LONG]; + -+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom) -+{ -+#ifdef CONFIG_KDS -+ if (katom->kds_rset) { -+ struct kbase_jd_context *jctx = &katom->kctx->jctx; ++ wait_queue_head_t event_queue; ++ pid_t tgid; ++ pid_t pid; + -+ /* -+ * As the atom is no longer waiting, remove it from -+ * the waiting list. -+ */ ++ struct kbase_jd_context jctx; ++ atomic_t used_pages; ++ atomic_t nonmapped_pages; + -+ mutex_lock(&jctx->lock); -+ kbase_jd_kds_waiters_remove(katom); -+ mutex_unlock(&jctx->lock); ++ struct kbase_mem_pool mem_pool; + -+ /* Release the kds resource or cancel if zapping */ -+ kds_resource_set_release_sync(&katom->kds_rset); -+ } -+#endif /* CONFIG_KDS */ ++ struct shrinker reclaim; ++ struct list_head evict_list; + ++ struct list_head waiting_soft_jobs; ++ spinlock_t waiting_soft_jobs_lock; ++#ifdef CONFIG_KDS ++ struct list_head waiting_kds_resource; ++#endif +#ifdef CONFIG_MALI_DMA_FENCE -+ /* Flush dma-fence workqueue to ensure that any callbacks that may have -+ * been queued are done before continuing. -+ * Any successfully completed atom would have had all it's callbacks -+ * completed before the atom was run, so only flush for failed atoms. -+ */ -+ if (katom->event_code != BASE_JD_EVENT_DONE) -+ flush_workqueue(katom->kctx->dma_fence.wq); ++ struct { ++ struct list_head waiting_resource; ++ struct workqueue_struct *wq; ++ } dma_fence; +#endif /* CONFIG_MALI_DMA_FENCE */ -+} ++ /** This is effectively part of the Run Pool, because it only has a valid ++ * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in ++ * ++ * The hwaccess_lock must be held whilst accessing this. ++ * ++ * If the context relating to this as_nr is required, you must use ++ * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear ++ * whilst you're using it. Alternatively, just hold the hwaccess_lock ++ * to ensure the context doesn't disappear (but this has restrictions on what other locks ++ * you can take whilst doing this) */ ++ int as_nr; + -+static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) -+{ -+ KBASE_DEBUG_ASSERT(katom); -+ KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES); ++ /* Keeps track of the number of users of this context. A user can be a ++ * job that is available for execution, instrumentation needing to 'pin' ++ * a context for counter collection, etc. If the refcount reaches 0 then ++ * this context is considered inactive and the previously programmed ++ * AS might be cleared at any point. ++ */ ++ atomic_t refcount; + -+#ifdef CONFIG_KDS -+ /* Prevent the KDS resource from triggering the atom in case of zapping */ -+ if (katom->kds_rset) -+ katom->kds_dep_satisfied = true; -+#endif /* CONFIG_KDS */ ++ /* NOTE: ++ * ++ * Flags are in jctx.sched_info.ctx.flags ++ * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex ++ * ++ * All other flags must be added there */ ++ spinlock_t mm_update_lock; ++ struct mm_struct *process_mm; ++ /* End of the SAME_VA zone */ ++ u64 same_va_end; + -+#ifdef CONFIG_MALI_DMA_FENCE -+ kbase_dma_fence_signal(katom); -+#endif /* CONFIG_MALI_DMA_FENCE */ ++#ifdef CONFIG_MALI_TRACE_TIMELINE ++ struct kbase_trace_kctx_timeline timeline; ++#endif ++#ifdef CONFIG_DEBUG_FS ++ /* Content of mem_profile file */ ++ char *mem_profile_data; ++ /* Size of @c mem_profile_data */ ++ size_t mem_profile_size; ++ /* Mutex guarding memory profile state */ ++ struct mutex mem_profile_lock; ++ /* Memory profile directory under debugfs */ ++ struct dentry *kctx_dentry; + -+ kbase_gpu_vm_lock(katom->kctx); -+ /* only roll back if extres is non-NULL */ -+ if (katom->extres) { -+ u32 res_no; ++ /* for job fault debug */ ++ unsigned int *reg_dump; ++ atomic_t job_fault_count; ++ /* This list will keep the following atoms during the dump ++ * in the same context ++ */ ++ struct list_head job_fault_resume_event_list; + -+ res_no = katom->nr_extres; -+ while (res_no-- > 0) { -+ struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; -+ struct kbase_va_region *reg; ++#endif /* CONFIG_DEBUG_FS */ + -+ reg = kbase_region_tracker_find_region_base_address( -+ katom->kctx, -+ katom->extres[res_no].gpu_address); -+ kbase_unmap_external_resource(katom->kctx, reg, alloc); -+ } -+ kfree(katom->extres); -+ katom->extres = NULL; -+ } -+ kbase_gpu_vm_unlock(katom->kctx); -+} ++ struct jsctx_queue jsctx_queue ++ [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; + -+/* -+ * Set up external resources needed by this job. -+ * -+ * jctx.lock must be held when this is called. -+ */ ++ /* Number of atoms currently pulled from this context */ ++ atomic_t atoms_pulled; ++ /* Number of atoms currently pulled from this context, per slot */ ++ atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS]; ++ /* Number of atoms currently pulled from this context, per slot and ++ * priority. Hold hwaccess_lock when accessing */ ++ int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][ ++ KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + -+static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom) -+{ -+ int err_ret_val = -EINVAL; -+ u32 res_no; -+#ifdef CONFIG_KDS -+ u32 kds_res_count = 0; -+ struct kds_resource **kds_resources = NULL; -+ unsigned long *kds_access_bitmap = NULL; -+#endif /* CONFIG_KDS */ -+#ifdef CONFIG_MALI_DMA_FENCE -+ struct kbase_dma_fence_resv_info info = { -+ .dma_fence_resv_count = 0, -+ }; -+#ifdef CONFIG_SYNC -+ /* -+ * When both dma-buf fence and Android native sync is enabled, we -+ * disable dma-buf fence for contexts that are using Android native -+ * fences. -+ */ -+ const bool implicit_sync = !kbase_ctx_flag(katom->kctx, -+ KCTX_NO_IMPLICIT_SYNC); -+#else /* CONFIG_SYNC */ -+ const bool implicit_sync = true; -+#endif /* CONFIG_SYNC */ -+#endif /* CONFIG_MALI_DMA_FENCE */ -+ struct base_external_resource *input_extres; ++ /* true if slot is blocked on the given priority. This will be set on a ++ * soft-stop */ ++ bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + -+ KBASE_DEBUG_ASSERT(katom); -+ KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES); ++ /* Bitmask of slots that can be pulled from */ ++ u32 slots_pullable; + -+ /* no resources encoded, early out */ -+ if (!katom->nr_extres) -+ return -EINVAL; ++ /* Backend specific data */ ++ struct kbase_context_backend backend; + -+ katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL); -+ if (NULL == katom->extres) { -+ err_ret_val = -ENOMEM; -+ goto early_err_out; -+ } ++ /* Work structure used for deferred ASID assignment */ ++ struct work_struct work; + -+ /* copy user buffer to the end of our real buffer. -+ * Make sure the struct sizes haven't changed in a way -+ * we don't support */ -+ BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres)); -+ input_extres = (struct base_external_resource *) -+ (((unsigned char *)katom->extres) + -+ (sizeof(*katom->extres) - sizeof(*input_extres)) * -+ katom->nr_extres); ++ /* Only one userspace vinstr client per kbase context */ ++ struct kbase_vinstr_client *vinstr_cli; ++ struct mutex vinstr_cli_lock; + -+ if (copy_from_user(input_extres, -+ get_compat_pointer(katom->kctx, &user_atom->extres_list), -+ sizeof(*input_extres) * katom->nr_extres) != 0) { -+ err_ret_val = -EINVAL; -+ goto early_err_out; -+ } -+#ifdef CONFIG_KDS -+ /* assume we have to wait for all */ -+ KBASE_DEBUG_ASSERT(0 != katom->nr_extres); -+ kds_resources = kmalloc_array(katom->nr_extres, sizeof(struct kds_resource *), GFP_KERNEL); ++ /* List of completed jobs waiting for events to be posted */ ++ struct list_head completed_jobs; ++ /* Number of work items currently pending on job_done_wq */ ++ atomic_t work_count; + -+ if (!kds_resources) { -+ err_ret_val = -ENOMEM; -+ goto early_err_out; -+ } ++ /* Waiting soft-jobs will fail when this timer expires */ ++ struct timer_list soft_job_timeout; + -+ KBASE_DEBUG_ASSERT(0 != katom->nr_extres); -+ kds_access_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres), -+ sizeof(unsigned long), -+ GFP_KERNEL); -+ if (!kds_access_bitmap) { -+ err_ret_val = -ENOMEM; -+ goto early_err_out; -+ } -+#endif /* CONFIG_KDS */ ++ /* JIT allocation management */ ++ struct kbase_va_region *jit_alloc[256]; ++ struct list_head jit_active_head; ++ struct list_head jit_pool_head; ++ struct list_head jit_destroy_head; ++ struct mutex jit_evict_lock; ++ struct work_struct jit_work; + -+#ifdef CONFIG_MALI_DMA_FENCE -+ if (implicit_sync) { -+ info.resv_objs = kmalloc_array(katom->nr_extres, -+ sizeof(struct reservation_object *), -+ GFP_KERNEL); -+ if (!info.resv_objs) { -+ err_ret_val = -ENOMEM; -+ goto early_err_out; -+ } ++ /* A list of the JIT soft-jobs in submission order ++ * (protected by kbase_jd_context.lock) ++ */ ++ struct list_head jit_atoms_head; ++ /* A list of pending JIT alloc soft-jobs (using the 'queue' list_head) ++ * (protected by kbase_jd_context.lock) ++ */ ++ struct list_head jit_pending_alloc; + -+ info.dma_fence_excl_bitmap = -+ kcalloc(BITS_TO_LONGS(katom->nr_extres), -+ sizeof(unsigned long), GFP_KERNEL); -+ if (!info.dma_fence_excl_bitmap) { -+ err_ret_val = -ENOMEM; -+ goto early_err_out; -+ } -+ } -+#endif /* CONFIG_MALI_DMA_FENCE */ ++ /* External sticky resource management */ ++ struct list_head ext_res_meta_head; + -+ /* Take the processes mmap lock */ -+ down_read(¤t->mm->mmap_lock); ++ /* Used to record that a drain was requested from atomic context */ ++ atomic_t drain_pending; + -+ /* need to keep the GPU VM locked while we set up UMM buffers */ -+ kbase_gpu_vm_lock(katom->kctx); -+ for (res_no = 0; res_no < katom->nr_extres; res_no++) { -+ struct base_external_resource *res; -+ struct kbase_va_region *reg; -+ struct kbase_mem_phy_alloc *alloc; -+ bool exclusive; ++ /* Current age count, used to determine age for newly submitted atoms */ ++ u32 age_count; ++}; + -+ res = &input_extres[res_no]; -+ exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE) -+ ? true : false; -+ reg = kbase_region_tracker_find_region_enclosing_address( -+ katom->kctx, -+ res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); -+ /* did we find a matching region object? */ -+ if (NULL == reg || (reg->flags & KBASE_REG_FREE)) { -+ /* roll back */ -+ goto failed_loop; -+ } ++/** ++ * struct kbase_ctx_ext_res_meta - Structure which binds an external resource ++ * to a @kbase_context. ++ * @ext_res_node: List head for adding the metadata to a ++ * @kbase_context. ++ * @alloc: The physical memory allocation structure ++ * which is mapped. ++ * @gpu_addr: The GPU virtual address the resource is ++ * mapped to. ++ * ++ * External resources can be mapped into multiple contexts as well as the same ++ * context multiple times. ++ * As kbase_va_region itself isn't refcounted we can't attach our extra ++ * information to it as it could be removed under our feet leaving external ++ * resources pinned. ++ * This metadata structure binds a single external resource to a single ++ * context, ensuring that per context mapping is tracked separately so it can ++ * be overridden when needed and abuses by the application (freeing the resource ++ * multiple times) don't effect the refcount of the physical allocation. ++ */ ++struct kbase_ctx_ext_res_meta { ++ struct list_head ext_res_node; ++ struct kbase_mem_phy_alloc *alloc; ++ u64 gpu_addr; ++}; + -+ if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) && -+ (reg->flags & KBASE_REG_SECURE)) { -+ katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED; -+ } ++enum kbase_reg_access_type { ++ REG_READ, ++ REG_WRITE ++}; + -+ alloc = kbase_map_external_resource(katom->kctx, reg, -+ current->mm -+#ifdef CONFIG_KDS -+ , &kds_res_count, kds_resources, -+ kds_access_bitmap, exclusive -+#endif -+ ); -+ if (!alloc) { -+ err_ret_val = -EINVAL; -+ goto failed_loop; -+ } ++enum kbase_share_attr_bits { ++ /* (1ULL << 8) bit is reserved */ ++ SHARE_BOTH_BITS = (2ULL << 8), /* inner and outer shareable coherency */ ++ SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */ ++}; + -+#ifdef CONFIG_MALI_DMA_FENCE -+ if (implicit_sync && -+ reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { -+ struct reservation_object *resv; ++/** ++ * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent. ++ * @kbdev: kbase device ++ * ++ * Return: true if the device access are coherent, false if not. ++ */ ++static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) ++{ ++ if ((kbdev->system_coherency == COHERENCY_ACE_LITE) || ++ (kbdev->system_coherency == COHERENCY_ACE)) ++ return true; + -+ resv = reg->gpu_alloc->imported.umm.dma_buf->resv; -+ if (resv) -+ kbase_dma_fence_add_reservation(resv, &info, -+ exclusive); -+ } -+#endif /* CONFIG_MALI_DMA_FENCE */ ++ return false; ++} + -+ /* finish with updating out array with the data we found */ -+ /* NOTE: It is important that this is the last thing we do (or -+ * at least not before the first write) as we overwrite elements -+ * as we loop and could be overwriting ourself, so no writes -+ * until the last read for an element. -+ * */ -+ katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */ -+ katom->extres[res_no].alloc = alloc; -+ } -+ /* successfully parsed the extres array */ -+ /* drop the vm lock before we call into kds */ -+ kbase_gpu_vm_unlock(katom->kctx); ++/* Conversion helpers for setting up high resolution timers */ ++#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U)) ++#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) + -+ /* Release the processes mmap lock */ -+ up_read(¤t->mm->mmap_lock); ++/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */ ++#define KBASE_CLEAN_CACHE_MAX_LOOPS 100000 ++/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */ ++#define KBASE_AS_INACTIVE_MAX_LOOPS 100000 + -+#ifdef CONFIG_KDS -+ if (kds_res_count) { -+ int wait_failed; ++/* Maximum number of times a job can be replayed */ ++#define BASEP_JD_REPLAY_LIMIT 15 + -+ /* We have resources to wait for with kds */ -+ katom->kds_dep_satisfied = false; ++/* JobDescriptorHeader - taken from the architecture specifications, the layout ++ * is currently identical for all GPU archs. */ ++struct job_descriptor_header { ++ u32 exception_status; ++ u32 first_incomplete_task; ++ u64 fault_pointer; ++ u8 job_descriptor_size : 1; ++ u8 job_type : 7; ++ u8 job_barrier : 1; ++ u8 _reserved_01 : 1; ++ u8 _reserved_1 : 1; ++ u8 _reserved_02 : 1; ++ u8 _reserved_03 : 1; ++ u8 _reserved_2 : 1; ++ u8 _reserved_04 : 1; ++ u8 _reserved_05 : 1; ++ u16 job_index; ++ u16 job_dependency_index_1; ++ u16 job_dependency_index_2; ++ union { ++ u64 _64; ++ u32 _32; ++ } next_job; ++}; + -+ wait_failed = kds_async_waitall(&katom->kds_rset, -+ &katom->kctx->jctx.kds_cb, katom, NULL, -+ kds_res_count, kds_access_bitmap, -+ kds_resources); ++#endif /* _KBASE_DEFS_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c +new file mode 100644 +index 000000000..b0eb67da8 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_device.c +@@ -0,0 +1,674 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ if (wait_failed) -+ goto failed_kds_setup; -+ else -+ kbase_jd_kds_waiters_add(katom); -+ } else { -+ /* Nothing to wait for, so kds dep met */ -+ katom->kds_dep_satisfied = true; -+ } -+ kfree(kds_resources); -+ kfree(kds_access_bitmap); -+#endif /* CONFIG_KDS */ + -+#ifdef CONFIG_MALI_DMA_FENCE -+ if (implicit_sync) { -+ if (info.dma_fence_resv_count) { -+ int ret; + -+ ret = kbase_dma_fence_wait(katom, &info); -+ if (ret < 0) -+ goto failed_dma_fence_setup; -+ } + -+ kfree(info.resv_objs); -+ kfree(info.dma_fence_excl_bitmap); -+ } -+#endif /* CONFIG_MALI_DMA_FENCE */ + -+ /* all done OK */ -+ return 0; ++/* ++ * Base kernel device APIs ++ */ + -+/* error handling section */ ++#include ++#include ++#include ++#include ++#include ++#include + -+#ifdef CONFIG_MALI_DMA_FENCE -+failed_dma_fence_setup: -+#ifdef CONFIG_KDS -+ /* If we are here, dma_fence setup failed but KDS didn't. -+ * Revert KDS setup if any. -+ */ -+ if (kds_res_count) { -+ mutex_unlock(&katom->kctx->jctx.lock); -+ kds_resource_set_release_sync(&katom->kds_rset); -+ mutex_lock(&katom->kctx->jctx.lock); ++#include ++#include ++#include ++#include ++#include + -+ kbase_jd_kds_waiters_remove(katom); -+ katom->kds_dep_satisfied = true; -+ } -+#endif /* CONFIG_KDS */ -+#endif /* CONFIG_MALI_DMA_FENCE */ -+#ifdef CONFIG_KDS -+failed_kds_setup: -+#endif -+#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE) -+ /* Lock the processes mmap lock */ -+ down_read(¤t->mm->mmap_lock); ++#include + -+ /* lock before we unmap */ -+ kbase_gpu_vm_lock(katom->kctx); -+#endif ++/* NOTE: Magic - 0x45435254 (TRCE in ASCII). ++ * Supports tracing feature provided in the base module. ++ * Please keep it in sync with the value of base module. ++ */ ++#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254 + -+ failed_loop: -+ /* undo the loop work */ -+ while (res_no-- > 0) { -+ struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; ++#if KBASE_TRACE_ENABLE ++static const char *kbasep_trace_code_string[] = { ++ /* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE ++ * THIS MUST BE USED AT THE START OF THE ARRAY */ ++#define KBASE_TRACE_CODE_MAKE_CODE(X) # X ++#include "mali_kbase_trace_defs.h" ++#undef KBASE_TRACE_CODE_MAKE_CODE ++}; ++#endif + -+ kbase_unmap_external_resource(katom->kctx, NULL, alloc); -+ } -+ kbase_gpu_vm_unlock(katom->kctx); ++#define DEBUG_MESSAGE_SIZE 256 + -+ /* Release the processes mmap lock */ -+ up_read(¤t->mm->mmap_lock); ++static int kbasep_trace_init(struct kbase_device *kbdev); ++static void kbasep_trace_term(struct kbase_device *kbdev); ++static void kbasep_trace_hook_wrapper(void *param); + -+ early_err_out: -+ kfree(katom->extres); -+ katom->extres = NULL; -+#ifdef CONFIG_KDS -+ kfree(kds_resources); -+ kfree(kds_access_bitmap); -+#endif /* CONFIG_KDS */ -+#ifdef CONFIG_MALI_DMA_FENCE -+ if (implicit_sync) { -+ kfree(info.resv_objs); -+ kfree(info.dma_fence_excl_bitmap); -+ } -+#endif -+ return err_ret_val; ++struct kbase_device *kbase_device_alloc(void) ++{ ++ return kzalloc(sizeof(struct kbase_device), GFP_KERNEL); +} + -+static inline void jd_resolve_dep(struct list_head *out_list, -+ struct kbase_jd_atom *katom, -+ u8 d, bool ctx_is_dying) ++static int kbase_device_as_init(struct kbase_device *kbdev, int i) +{ -+ u8 other_d = !d; ++ const char format[] = "mali_mmu%d"; ++ char name[sizeof(format)]; ++ const char poke_format[] = "mali_mmu%d_poker"; ++ char poke_name[sizeof(poke_format)]; + -+ while (!list_empty(&katom->dep_head[d])) { -+ struct kbase_jd_atom *dep_atom; -+ struct kbase_jd_atom *other_dep_atom; -+ u8 dep_type; ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) ++ snprintf(poke_name, sizeof(poke_name), poke_format, i); + -+ dep_atom = list_entry(katom->dep_head[d].next, -+ struct kbase_jd_atom, dep_item[d]); -+ list_del(katom->dep_head[d].next); ++ snprintf(name, sizeof(name), format, i); + -+ dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]); -+ kbase_jd_katom_dep_clear(&dep_atom->dep[d]); ++ kbdev->as[i].number = i; ++ kbdev->as[i].fault_addr = 0ULL; + -+ if (katom->event_code != BASE_JD_EVENT_DONE && -+ (dep_type != BASE_JD_DEP_TYPE_ORDER)) { -+#ifdef CONFIG_KDS -+ if (!dep_atom->kds_dep_satisfied) { -+ /* Just set kds_dep_satisfied to true. If the callback happens after this then it will early out and -+ * do nothing. If the callback doesn't happen then kbase_jd_post_external_resources will clean up -+ */ -+ dep_atom->kds_dep_satisfied = true; -+ } -+#endif ++ kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1); ++ if (!kbdev->as[i].pf_wq) ++ return -EINVAL; + -+#ifdef CONFIG_MALI_DMA_FENCE -+ kbase_dma_fence_cancel_callbacks(dep_atom); -+#endif ++ INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker); ++ INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker); + -+ dep_atom->event_code = katom->event_code; -+ KBASE_DEBUG_ASSERT(dep_atom->status != -+ KBASE_JD_ATOM_STATE_UNUSED); ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) { ++ struct hrtimer *poke_timer = &kbdev->as[i].poke_timer; ++ struct work_struct *poke_work = &kbdev->as[i].poke_work; + -+ if ((dep_atom->core_req & BASE_JD_REQ_SOFT_REPLAY) -+ != BASE_JD_REQ_SOFT_REPLAY) { -+ dep_atom->will_fail_event_code = -+ dep_atom->event_code; -+ } else { -+ dep_atom->status = -+ KBASE_JD_ATOM_STATE_COMPLETED; -+ } ++ kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1); ++ if (!kbdev->as[i].poke_wq) { ++ destroy_workqueue(kbdev->as[i].pf_wq); ++ return -EINVAL; + } -+ other_dep_atom = (struct kbase_jd_atom *) -+ kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]); -+ -+ if (!dep_atom->in_jd_list && (!other_dep_atom || -+ (IS_GPU_ATOM(dep_atom) && !ctx_is_dying && -+ !dep_atom->will_fail_event_code && -+ !other_dep_atom->will_fail_event_code))) { -+ bool dep_satisfied = true; -+#ifdef CONFIG_MALI_DMA_FENCE -+ int dep_count; ++ KBASE_DEBUG_ASSERT(!object_is_on_stack(poke_work)); ++ INIT_WORK(poke_work, kbasep_as_do_poke); + -+ dep_count = kbase_fence_dep_count_read(dep_atom); -+ if (likely(dep_count == -1)) { -+ dep_satisfied = true; -+ } else { -+ /* -+ * There are either still active callbacks, or -+ * all fences for this @dep_atom has signaled, -+ * but the worker that will queue the atom has -+ * not yet run. -+ * -+ * Wait for the fences to signal and the fence -+ * worker to run and handle @dep_atom. If -+ * @dep_atom was completed due to error on -+ * @katom, then the fence worker will pick up -+ * the complete status and error code set on -+ * @dep_atom above. -+ */ -+ dep_satisfied = false; -+ } -+#endif /* CONFIG_MALI_DMA_FENCE */ ++ hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + -+#ifdef CONFIG_KDS -+ dep_satisfied = dep_satisfied && dep_atom->kds_dep_satisfied; -+#endif ++ poke_timer->function = kbasep_as_poke_timer_callback; + -+ if (dep_satisfied) { -+ dep_atom->in_jd_list = true; -+ list_add_tail(&dep_atom->jd_item, out_list); -+ } -+ } ++ kbdev->as[i].poke_refcount = 0; ++ kbdev->as[i].poke_state = 0u; + } -+} + -+KBASE_EXPORT_TEST_API(jd_resolve_dep); ++ return 0; ++} + -+#if MALI_CUSTOMER_RELEASE == 0 -+static void jd_force_failure(struct kbase_device *kbdev, struct kbase_jd_atom *katom) ++static void kbase_device_as_term(struct kbase_device *kbdev, int i) +{ -+ kbdev->force_replay_count++; -+ -+ if (kbdev->force_replay_count >= kbdev->force_replay_limit) { -+ kbdev->force_replay_count = 0; -+ katom->event_code = BASE_JD_EVENT_FORCE_REPLAY; -+ -+ if (kbdev->force_replay_random) -+ kbdev->force_replay_limit = -+ (prandom_u32() % KBASEP_FORCE_REPLAY_RANDOM_LIMIT) + 1; -+ -+ dev_info(kbdev->dev, "force_replay : promoting to error\n"); -+ } ++ destroy_workqueue(kbdev->as[i].pf_wq); ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) ++ destroy_workqueue(kbdev->as[i].poke_wq); +} + -+/** Test to see if atom should be forced to fail. -+ * -+ * This function will check if an atom has a replay job as a dependent. If so -+ * then it will be considered for forced failure. */ -+static void jd_check_force_failure(struct kbase_jd_atom *katom) ++static int kbase_device_all_as_init(struct kbase_device *kbdev) +{ -+ struct kbase_context *kctx = katom->kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ int i; -+ -+ if ((kbdev->force_replay_limit == KBASEP_FORCE_REPLAY_DISABLED) || -+ (katom->core_req & BASEP_JD_REQ_EVENT_NEVER)) -+ return; -+ -+ for (i = 1; i < BASE_JD_ATOM_COUNT; i++) { -+ if (kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[0]) == katom || -+ kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) { -+ struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i]; ++ int i, err; + -+ if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == -+ BASE_JD_REQ_SOFT_REPLAY && -+ (dep_atom->core_req & kbdev->force_replay_core_req) -+ == kbdev->force_replay_core_req) { -+ jd_force_failure(kbdev, katom); -+ return; -+ } -+ } ++ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { ++ err = kbase_device_as_init(kbdev, i); ++ if (err) ++ goto free_workqs; + } -+} -+#endif -+ -+/** -+ * is_dep_valid - Validate that a dependency is valid for early dependency -+ * submission -+ * @katom: Dependency atom to validate -+ * -+ * A dependency is valid if any of the following are true : -+ * - It does not exist (a non-existent dependency does not block submission) -+ * - It is in the job scheduler -+ * - It has completed, does not have a failure event code, and has not been -+ * marked to fail in the future -+ * -+ * Return: true if valid, false otherwise -+ */ -+static bool is_dep_valid(struct kbase_jd_atom *katom) -+{ -+ /* If there's no dependency then this is 'valid' from the perspective of -+ * early dependency submission */ -+ if (!katom) -+ return true; + -+ /* Dependency must have reached the job scheduler */ -+ if (katom->status < KBASE_JD_ATOM_STATE_IN_JS) -+ return false; ++ return 0; + -+ /* If dependency has completed and has failed or will fail then it is -+ * not valid */ -+ if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED && -+ (katom->event_code != BASE_JD_EVENT_DONE || -+ katom->will_fail_event_code)) -+ return false; ++free_workqs: ++ for (; i > 0; i--) ++ kbase_device_as_term(kbdev, i); + -+ return true; ++ return err; +} + -+static void jd_try_submitting_deps(struct list_head *out_list, -+ struct kbase_jd_atom *node) ++static void kbase_device_all_as_term(struct kbase_device *kbdev) +{ + int i; + -+ for (i = 0; i < 2; i++) { -+ struct list_head *pos; -+ -+ list_for_each(pos, &node->dep_head[i]) { -+ struct kbase_jd_atom *dep_atom = list_entry(pos, -+ struct kbase_jd_atom, dep_item[i]); -+ -+ if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) { -+ /*Check if atom deps look sane*/ -+ bool dep0_valid = is_dep_valid( -+ dep_atom->dep[0].atom); -+ bool dep1_valid = is_dep_valid( -+ dep_atom->dep[1].atom); -+ bool dep_satisfied = true; -+#ifdef CONFIG_MALI_DMA_FENCE -+ int dep_count; -+ -+ dep_count = kbase_fence_dep_count_read( -+ dep_atom); -+ if (likely(dep_count == -1)) { -+ dep_satisfied = true; -+ } else { -+ /* -+ * There are either still active callbacks, or -+ * all fences for this @dep_atom has signaled, -+ * but the worker that will queue the atom has -+ * not yet run. -+ * -+ * Wait for the fences to signal and the fence -+ * worker to run and handle @dep_atom. If -+ * @dep_atom was completed due to error on -+ * @katom, then the fence worker will pick up -+ * the complete status and error code set on -+ * @dep_atom above. -+ */ -+ dep_satisfied = false; -+ } -+#endif /* CONFIG_MALI_DMA_FENCE */ -+#ifdef CONFIG_KDS -+ dep_satisfied = dep_satisfied && -+ dep_atom->kds_dep_satisfied; -+#endif -+ -+ if (dep0_valid && dep1_valid && dep_satisfied) { -+ dep_atom->in_jd_list = true; -+ list_add(&dep_atom->jd_item, out_list); -+ } -+ } -+ } -+ } ++ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) ++ kbase_device_as_term(kbdev, i); +} + -+/* -+ * Perform the necessary handling of an atom that has finished running -+ * on the GPU. -+ * -+ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller -+ * is responsible for calling kbase_finish_soft_job *before* calling this function. -+ * -+ * The caller must hold the kbase_jd_context.lock. -+ */ -+bool jd_done_nolock(struct kbase_jd_atom *katom, -+ struct list_head *completed_jobs_ctx) ++int kbase_device_init(struct kbase_device * const kbdev) +{ -+ struct kbase_context *kctx = katom->kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct list_head completed_jobs; -+ struct list_head runnable_jobs; -+ bool need_to_try_schedule_context = false; -+ int i; -+ -+ INIT_LIST_HEAD(&completed_jobs); -+ INIT_LIST_HEAD(&runnable_jobs); -+ -+ KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); -+ -+#if MALI_CUSTOMER_RELEASE == 0 -+ jd_check_force_failure(katom); -+#endif ++ int i, err; ++#ifdef CONFIG_ARM64 ++ struct device_node *np = NULL; ++#endif /* CONFIG_ARM64 */ + -+ /* This is needed in case an atom is failed due to being invalid, this -+ * can happen *before* the jobs that the atom depends on have completed */ -+ for (i = 0; i < 2; i++) { -+ if (kbase_jd_katom_dep_atom(&katom->dep[i])) { -+ list_del(&katom->dep_item[i]); -+ kbase_jd_katom_dep_clear(&katom->dep[i]); ++ spin_lock_init(&kbdev->mmu_mask_change); ++ mutex_init(&kbdev->mmu_hw_mutex); ++#ifdef CONFIG_ARM64 ++ kbdev->cci_snoop_enabled = false; ++ np = kbdev->dev->of_node; ++ if (np != NULL) { ++ if (of_property_read_u32(np, "snoop_enable_smc", ++ &kbdev->snoop_enable_smc)) ++ kbdev->snoop_enable_smc = 0; ++ if (of_property_read_u32(np, "snoop_disable_smc", ++ &kbdev->snoop_disable_smc)) ++ kbdev->snoop_disable_smc = 0; ++ /* Either both or none of the calls should be provided. */ ++ if (!((kbdev->snoop_disable_smc == 0 ++ && kbdev->snoop_enable_smc == 0) ++ || (kbdev->snoop_disable_smc != 0 ++ && kbdev->snoop_enable_smc != 0))) { ++ WARN_ON(1); ++ err = -EINVAL; ++ goto fail; + } + } ++#endif /* CONFIG_ARM64 */ ++ /* Get the list of workarounds for issues on the current HW ++ * (identified by the GPU_ID register) ++ */ ++ err = kbase_hw_set_issues_mask(kbdev); ++ if (err) ++ goto fail; + -+ /* With PRLAM-10817 or PRLAM-10959 the last tile of a fragment job being soft-stopped can fail with -+ * BASE_JD_EVENT_TILE_RANGE_FAULT. -+ * -+ * So here if the fragment job failed with TILE_RANGE_FAULT and it has been soft-stopped, then we promote the -+ * error code to BASE_JD_EVENT_DONE ++ /* Set the list of features available on the current HW ++ * (identified by the GPU_ID register) + */ ++ kbase_hw_set_features_mask(kbdev); + -+ if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10817) || kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10959)) && -+ katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT) { -+ if ((katom->core_req & BASE_JD_REQ_FS) && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED)) { -+ /* Promote the failure to job done */ -+ katom->event_code = BASE_JD_EVENT_DONE; -+ katom->atom_flags = katom->atom_flags & (~KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED); -+ } -+ } ++ kbase_gpuprops_set_features(kbdev); + -+ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; -+ list_add_tail(&katom->jd_item, &completed_jobs); ++ /* On Linux 4.0+, dma coherency is determined from device tree */ ++#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) ++ set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops); ++#endif + -+ while (!list_empty(&completed_jobs)) { -+ katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item); -+ list_del(completed_jobs.prev); -+ KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); ++ /* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our ++ * device structure was created by device-tree ++ */ ++ if (!kbdev->dev->dma_mask) ++ kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask; + -+ for (i = 0; i < 2; i++) -+ jd_resolve_dep(&runnable_jobs, katom, i, -+ kbase_ctx_flag(kctx, KCTX_DYING)); ++ err = dma_set_mask(kbdev->dev, ++ DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); ++ if (err) ++ goto dma_set_mask_failed; + -+ if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) -+ kbase_jd_post_external_resources(katom); ++ err = dma_set_coherent_mask(kbdev->dev, ++ DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); ++ if (err) ++ goto dma_set_mask_failed; + -+ while (!list_empty(&runnable_jobs)) { -+ struct kbase_jd_atom *node; ++ kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces; + -+ node = list_entry(runnable_jobs.next, -+ struct kbase_jd_atom, jd_item); -+ list_del(runnable_jobs.next); -+ node->in_jd_list = false; ++ err = kbase_device_all_as_init(kbdev); ++ if (err) ++ goto as_init_failed; + -+ KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); ++ spin_lock_init(&kbdev->hwcnt.lock); + -+ if (node->status != KBASE_JD_ATOM_STATE_COMPLETED && -+ !kbase_ctx_flag(kctx, KCTX_DYING)) { -+ need_to_try_schedule_context |= jd_run_atom(node); -+ } else { -+ node->event_code = katom->event_code; ++ err = kbasep_trace_init(kbdev); ++ if (err) ++ goto term_as; + -+ if ((node->core_req & -+ BASE_JD_REQ_SOFT_JOB_TYPE) == -+ BASE_JD_REQ_SOFT_REPLAY) { -+ if (kbase_replay_process(node)) -+ /* Don't complete this atom */ -+ continue; -+ } else if (node->core_req & -+ BASE_JD_REQ_SOFT_JOB) { -+ /* If this is a fence wait soft job -+ * then remove it from the list of sync -+ * waiters. -+ */ -+ if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req) -+ kbasep_remove_waiting_soft_job(node); ++ mutex_init(&kbdev->cacheclean_lock); + -+ kbase_finish_soft_job(node); -+ } -+ node->status = KBASE_JD_ATOM_STATE_COMPLETED; -+ } ++#ifdef CONFIG_MALI_TRACE_TIMELINE ++ for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) ++ kbdev->timeline.slot_atoms_submitted[i] = 0; + -+ if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) { -+ list_add_tail(&node->jd_item, &completed_jobs); -+ } else if (node->status == KBASE_JD_ATOM_STATE_IN_JS && -+ !node->will_fail_event_code) { -+ /* Node successfully submitted, try submitting -+ * dependencies as they may now be representable -+ * in JS */ -+ jd_try_submitting_deps(&runnable_jobs, node); -+ } -+ } ++ for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i) ++ atomic_set(&kbdev->timeline.pm_event_uid[i], 0); ++#endif /* CONFIG_MALI_TRACE_TIMELINE */ + -+ /* Register a completed job as a disjoint event when the GPU -+ * is in a disjoint state (ie. being reset or replaying jobs). -+ */ -+ kbase_disjoint_event_potential(kctx->kbdev); -+ if (completed_jobs_ctx) -+ list_add_tail(&katom->jd_item, completed_jobs_ctx); -+ else -+ kbase_event_post(kctx, katom); ++ /* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */ ++ for (i = 0; i < FBDUMP_CONTROL_MAX; i++) ++ kbdev->kbase_profiling_controls[i] = 0; + -+ /* Decrement and check the TOTAL number of jobs. This includes -+ * those not tracked by the scheduler: 'not ready to run' and -+ * 'dependency-only' jobs. */ -+ if (--kctx->jctx.job_nr == 0) -+ wake_up(&kctx->jctx.zero_jobs_wait); /* All events are safely queued now, and we can signal any waiter -+ * that we've got no more jobs (so we can be safely terminated) */ -+ } ++ kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev); + -+ return need_to_try_schedule_context; -+} ++ atomic_set(&kbdev->ctx_num, 0); + -+KBASE_EXPORT_TEST_API(jd_done_nolock); ++ err = kbase_instr_backend_init(kbdev); ++ if (err) ++ goto term_trace; + -+#ifdef CONFIG_GPU_TRACEPOINTS -+enum { -+ CORE_REQ_DEP_ONLY, -+ CORE_REQ_SOFT, -+ CORE_REQ_COMPUTE, -+ CORE_REQ_FRAGMENT, -+ CORE_REQ_VERTEX, -+ CORE_REQ_TILER, -+ CORE_REQ_FRAGMENT_VERTEX, -+ CORE_REQ_FRAGMENT_VERTEX_TILER, -+ CORE_REQ_FRAGMENT_TILER, -+ CORE_REQ_VERTEX_TILER, -+ CORE_REQ_UNKNOWN -+}; -+static const char * const core_req_strings[] = { -+ "Dependency Only Job", -+ "Soft Job", -+ "Compute Shader Job", -+ "Fragment Shader Job", -+ "Vertex/Geometry Shader Job", -+ "Tiler Job", -+ "Fragment Shader + Vertex/Geometry Shader Job", -+ "Fragment Shader + Vertex/Geometry Shader Job + Tiler Job", -+ "Fragment Shader + Tiler Job", -+ "Vertex/Geometry Shader Job + Tiler Job", -+ "Unknown Job" -+}; -+static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req) -+{ -+ if (core_req & BASE_JD_REQ_SOFT_JOB) -+ return core_req_strings[CORE_REQ_SOFT]; -+ if (core_req & BASE_JD_REQ_ONLY_COMPUTE) -+ return core_req_strings[CORE_REQ_COMPUTE]; -+ switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) { -+ case BASE_JD_REQ_DEP: -+ return core_req_strings[CORE_REQ_DEP_ONLY]; -+ case BASE_JD_REQ_FS: -+ return core_req_strings[CORE_REQ_FRAGMENT]; -+ case BASE_JD_REQ_CS: -+ return core_req_strings[CORE_REQ_VERTEX]; -+ case BASE_JD_REQ_T: -+ return core_req_strings[CORE_REQ_TILER]; -+ case (BASE_JD_REQ_FS | BASE_JD_REQ_CS): -+ return core_req_strings[CORE_REQ_FRAGMENT_VERTEX]; -+ case (BASE_JD_REQ_FS | BASE_JD_REQ_T): -+ return core_req_strings[CORE_REQ_FRAGMENT_TILER]; -+ case (BASE_JD_REQ_CS | BASE_JD_REQ_T): -+ return core_req_strings[CORE_REQ_VERTEX_TILER]; -+ case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T): -+ return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER]; -+ } -+ return core_req_strings[CORE_REQ_UNKNOWN]; -+} -+#endif ++ kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD; + -+bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom) -+{ -+ struct kbase_jd_context *jctx = &kctx->jctx; -+ int queued = 0; -+ int i; -+ int sched_prio; -+ bool ret; -+ bool will_fail = false; ++ kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS; + -+ /* Update the TOTAL number of jobs. This includes those not tracked by -+ * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ -+ jctx->job_nr++; ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) ++ kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); ++ else ++ kbdev->mmu_mode = kbase_mmu_mode_get_lpae(); + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) -+ katom->start_timestamp.tv64 = 0; -+#else -+ katom->start_timestamp = 0; -+#endif -+ katom->udata = user_atom->udata; -+ katom->kctx = kctx; -+ katom->nr_extres = user_atom->nr_extres; -+ katom->extres = NULL; -+ katom->device_nr = user_atom->device_nr; -+ katom->affinity = 0; -+ katom->jc = user_atom->jc; -+ katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; -+ katom->core_req = user_atom->core_req; -+ katom->atom_flags = 0; -+ katom->retry_count = 0; -+ katom->need_cache_flush_cores_retained = 0; -+ katom->pre_dep = NULL; -+ katom->post_dep = NULL; -+ katom->x_pre_dep = NULL; -+ katom->x_post_dep = NULL; -+ katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED; ++#ifdef CONFIG_MALI_DEBUG ++ init_waitqueue_head(&kbdev->driver_inactive_wait); ++#endif /* CONFIG_MALI_DEBUG */ + -+ /* Implicitly sets katom->protected_state.enter as well. */ -+ katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; ++ return 0; ++term_trace: ++ kbasep_trace_term(kbdev); ++term_as: ++ kbase_device_all_as_term(kbdev); ++as_init_failed: ++dma_set_mask_failed: ++fail: ++ return err; ++} + -+ katom->age = kctx->age_count++; ++void kbase_device_term(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev); + -+ INIT_LIST_HEAD(&katom->jd_item); -+#ifdef CONFIG_KDS -+ /* Start by assuming that the KDS dependencies are satisfied, -+ * kbase_jd_pre_external_resources will correct this if there are dependencies */ -+ katom->kds_dep_satisfied = true; -+ katom->kds_rset = NULL; -+#endif /* CONFIG_KDS */ -+#ifdef CONFIG_MALI_DMA_FENCE -+ kbase_fence_dep_count_set(katom, -1); ++#if KBASE_TRACE_ENABLE ++ kbase_debug_assert_register_hook(NULL, NULL); +#endif + -+ /* Don't do anything if there is a mess up with dependencies. -+ This is done in a separate cycle to check both the dependencies at ones, otherwise -+ it will be extra complexity to deal with 1st dependency ( just added to the list ) -+ if only the 2nd one has invalid config. -+ */ -+ for (i = 0; i < 2; i++) { -+ int dep_atom_number = user_atom->pre_dep[i].atom_id; -+ base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type; -+ -+ if (dep_atom_number) { -+ if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER && -+ dep_atom_type != BASE_JD_DEP_TYPE_DATA) { -+ katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; -+ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; ++ kbase_instr_backend_term(kbdev); + -+ /* Wrong dependency setup. Atom will be sent -+ * back to user space. Do not record any -+ * dependencies. */ -+ KBASE_TLSTREAM_TL_NEW_ATOM( -+ katom, -+ kbase_jd_atom_id(kctx, katom)); -+ KBASE_TLSTREAM_TL_RET_ATOM_CTX( -+ katom, kctx); -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, -+ TL_ATOM_STATE_IDLE); ++ kbasep_trace_term(kbdev); + -+ ret = jd_done_nolock(katom, NULL); -+ goto out; -+ } -+ } -+ } ++ kbase_device_all_as_term(kbdev); ++} + -+ /* Add dependencies */ -+ for (i = 0; i < 2; i++) { -+ int dep_atom_number = user_atom->pre_dep[i].atom_id; -+ base_jd_dep_type dep_atom_type; -+ struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number]; ++void kbase_device_free(struct kbase_device *kbdev) ++{ ++ kfree(kbdev); ++} + -+ dep_atom_type = user_atom->pre_dep[i].dependency_type; -+ kbase_jd_katom_dep_clear(&katom->dep[i]); ++int kbase_device_trace_buffer_install( ++ struct kbase_context *kctx, u32 *tb, size_t size) ++{ ++ unsigned long flags; + -+ if (!dep_atom_number) -+ continue; ++ KBASE_DEBUG_ASSERT(kctx); ++ KBASE_DEBUG_ASSERT(tb); + -+ if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED || -+ dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) { ++ /* Interface uses 16-bit value to track last accessed entry. Each entry ++ * is composed of two 32-bit words. ++ * This limits the size that can be handled without an overflow. */ ++ if (0xFFFF * (2 * sizeof(u32)) < size) ++ return -EINVAL; + -+ if (dep_atom->event_code == BASE_JD_EVENT_DONE) -+ continue; -+ /* don't stop this atom if it has an order dependency -+ * only to the failed one, try to submit it through -+ * the normal path -+ */ -+ if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER && -+ dep_atom->event_code > BASE_JD_EVENT_ACTIVE) { -+ continue; -+ } ++ /* set up the header */ ++ /* magic number in the first 4 bytes */ ++ tb[0] = TRACE_BUFFER_HEADER_SPECIAL; ++ /* Store (write offset = 0, wrap counter = 0, transaction active = no) ++ * write offset 0 means never written. ++ * Offsets 1 to (wrap_offset - 1) used to store values when trace started ++ */ ++ tb[1] = 0; + -+ /* Atom has completed, propagate the error code if any */ -+ katom->event_code = dep_atom->event_code; -+ katom->status = KBASE_JD_ATOM_STATE_QUEUED; ++ /* install trace buffer */ ++ spin_lock_irqsave(&kctx->jctx.tb_lock, flags); ++ kctx->jctx.tb_wrap_offset = size / 8; ++ kctx->jctx.tb = tb; ++ spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags); + -+ /* This atom is going through soft replay or -+ * will be sent back to user space. Do not record any -+ * dependencies. */ -+ KBASE_TLSTREAM_TL_NEW_ATOM( -+ katom, -+ kbase_jd_atom_id(kctx, katom)); -+ KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx); -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, -+ TL_ATOM_STATE_IDLE); ++ return 0; ++} + -+ if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) -+ == BASE_JD_REQ_SOFT_REPLAY) { -+ if (kbase_replay_process(katom)) { -+ ret = false; -+ goto out; -+ } -+ } -+ will_fail = true; ++void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx) ++{ ++ unsigned long flags; + -+ } else { -+ /* Atom is in progress, add this atom to the list */ -+ list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]); -+ kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type); -+ queued = 1; -+ } -+ } ++ KBASE_DEBUG_ASSERT(kctx); ++ spin_lock_irqsave(&kctx->jctx.tb_lock, flags); ++ kctx->jctx.tb = NULL; ++ kctx->jctx.tb_wrap_offset = 0; ++ spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags); ++} + -+ if (will_fail) { -+ if (!queued) { -+ ret = jd_done_nolock(katom, NULL); ++void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value) ++{ ++ unsigned long flags; + -+ goto out; -+ } else { -+ katom->will_fail_event_code = katom->event_code; -+ ret = false; ++ spin_lock_irqsave(&kctx->jctx.tb_lock, flags); ++ if (kctx->jctx.tb) { ++ u16 wrap_count; ++ u16 write_offset; ++ u32 *tb = kctx->jctx.tb; ++ u32 header_word; + -+ goto out; -+ } -+ } else { -+ /* These must occur after the above loop to ensure that an atom -+ * that depends on a previous atom with the same number behaves -+ * as expected */ -+ katom->event_code = BASE_JD_EVENT_DONE; -+ katom->status = KBASE_JD_ATOM_STATE_QUEUED; -+ } ++ header_word = tb[1]; ++ KBASE_DEBUG_ASSERT(0 == (header_word & 0x1)); + -+ /* For invalid priority, be most lenient and choose the default */ -+ sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); -+ if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID) -+ sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT; -+ katom->sched_priority = sched_prio; ++ wrap_count = (header_word >> 1) & 0x7FFF; ++ write_offset = (header_word >> 16) & 0xFFFF; + -+ /* Create a new atom recording all dependencies it was set up with. */ -+ KBASE_TLSTREAM_TL_NEW_ATOM( -+ katom, -+ kbase_jd_atom_id(kctx, katom)); -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_IDLE); -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(katom, katom->sched_priority); -+ KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx); -+ for (i = 0; i < 2; i++) -+ if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type( -+ &katom->dep[i])) { -+ KBASE_TLSTREAM_TL_DEP_ATOM_ATOM( -+ (void *)kbase_jd_katom_dep_atom( -+ &katom->dep[i]), -+ (void *)katom); -+ } else if (BASE_JD_DEP_TYPE_INVALID != -+ user_atom->pre_dep[i].dependency_type) { -+ /* Resolved dependency. */ -+ int dep_atom_number = -+ user_atom->pre_dep[i].atom_id; -+ struct kbase_jd_atom *dep_atom = -+ &jctx->atoms[dep_atom_number]; ++ /* mark as transaction in progress */ ++ tb[1] |= 0x1; ++ mb(); + -+ KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM( -+ (void *)dep_atom, -+ (void *)katom); ++ /* calculate new offset */ ++ write_offset++; ++ if (write_offset == kctx->jctx.tb_wrap_offset) { ++ /* wrap */ ++ write_offset = 1; ++ wrap_count++; ++ wrap_count &= 0x7FFF; /* 15bit wrap counter */ + } + -+ /* Reject atoms with job chain = NULL, as these cause issues with soft-stop */ -+ if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { -+ dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL"); -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ ret = jd_done_nolock(katom, NULL); -+ goto out; -+ } -+ -+ /* Reject atoms with an invalid device_nr */ -+ if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) && -+ (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) { -+ dev_warn(kctx->kbdev->dev, -+ "Rejecting atom with invalid device_nr %d", -+ katom->device_nr); -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ ret = jd_done_nolock(katom, NULL); -+ goto out; -+ } -+ -+ /* Reject atoms with invalid core requirements */ -+ if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && -+ (katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) { -+ dev_warn(kctx->kbdev->dev, -+ "Rejecting atom with invalid core requirements"); -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE; -+ ret = jd_done_nolock(katom, NULL); -+ goto out; -+ } ++ /* store the trace entry at the selected offset */ ++ tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0); ++ tb[write_offset * 2 + 1] = reg_value; ++ mb(); + -+ if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { -+ /* handle what we need to do to access the external resources */ -+ if (kbase_jd_pre_external_resources(katom, user_atom) != 0) { -+ /* setup failed (no access, bad resource, unknown resource types, etc.) */ -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ ret = jd_done_nolock(katom, NULL); -+ goto out; -+ } ++ /* new header word */ ++ header_word = (write_offset << 16) | (wrap_count << 1) | 0x0; /* transaction complete */ ++ tb[1] = header_word; + } ++ spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags); ++} + -+ /* Validate the atom. Function will return error if the atom is -+ * malformed. -+ * -+ * Soft-jobs never enter the job scheduler but have their own initialize method. -+ * -+ * If either fail then we immediately complete the atom with an error. -+ */ -+ if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) { -+ if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) { -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ ret = jd_done_nolock(katom, NULL); -+ goto out; -+ } -+ } else { -+ /* Soft-job */ -+ if (kbase_prepare_soft_job(katom) != 0) { -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ ret = jd_done_nolock(katom, NULL); -+ goto out; -+ } -+ } ++/* ++ * Device trace functions ++ */ ++#if KBASE_TRACE_ENABLE + -+#ifdef CONFIG_GPU_TRACEPOINTS -+ katom->work_id = atomic_inc_return(&jctx->work_id); -+ trace_gpu_job_enqueue((u32)kctx->id, katom->work_id, -+ kbasep_map_core_reqs_to_string(katom->core_req)); -+#endif ++static int kbasep_trace_init(struct kbase_device *kbdev) ++{ ++ struct kbase_trace *rbuf; + -+ if (queued && !IS_GPU_ATOM(katom)) { -+ ret = false; -+ goto out; -+ } -+#ifdef CONFIG_KDS -+ if (!katom->kds_dep_satisfied) { -+ /* Queue atom due to KDS dependency */ -+ ret = false; -+ goto out; -+ } -+#endif /* CONFIG_KDS */ ++ rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL); + ++ if (!rbuf) ++ return -EINVAL; + -+#ifdef CONFIG_MALI_DMA_FENCE -+ if (kbase_fence_dep_count_read(katom) != -1) { -+ ret = false; -+ goto out; -+ } -+#endif /* CONFIG_MALI_DMA_FENCE */ ++ kbdev->trace_rbuf = rbuf; ++ spin_lock_init(&kbdev->trace_lock); ++ return 0; ++} + -+ if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) -+ == BASE_JD_REQ_SOFT_REPLAY) { -+ if (kbase_replay_process(katom)) -+ ret = false; -+ else -+ ret = jd_done_nolock(katom, NULL); ++static void kbasep_trace_term(struct kbase_device *kbdev) ++{ ++ kfree(kbdev->trace_rbuf); ++} + -+ goto out; -+ } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { -+ if (kbase_process_soft_job(katom) == 0) { -+ kbase_finish_soft_job(katom); -+ ret = jd_done_nolock(katom, NULL); -+ goto out; -+ } ++static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len) ++{ ++ s32 written = 0; + -+ ret = false; -+ } else if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { -+ katom->status = KBASE_JD_ATOM_STATE_IN_JS; -+ ret = kbasep_js_add_job(kctx, katom); -+ /* If job was cancelled then resolve immediately */ -+ if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED) -+ ret = jd_done_nolock(katom, NULL); -+ } else { -+ /* This is a pure dependency. Resolve it immediately */ -+ ret = jd_done_nolock(katom, NULL); -+ } ++ /* Initial part of message */ ++ written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0); + -+ out: -+ return ret; -+} ++ if (trace_msg->katom) ++ written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0); + -+int kbase_jd_submit(struct kbase_context *kctx, -+ void __user *user_addr, u32 nr_atoms, u32 stride, -+ bool uk6_atom) -+{ -+ struct kbase_jd_context *jctx = &kctx->jctx; -+ int err = 0; -+ int i; -+ bool need_to_try_schedule_context = false; -+ struct kbase_device *kbdev; -+ u32 latest_flush; ++ written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0); + -+ /* -+ * kbase_jd_submit isn't expected to fail and so all errors with the -+ * jobs are reported by immediately failing them (through event system) -+ */ -+ kbdev = kctx->kbdev; ++ /* NOTE: Could add function callbacks to handle different message types */ ++ /* Jobslot present */ ++ if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT) ++ written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0); + -+ beenthere(kctx, "%s", "Enter"); ++ written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0); + -+ if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { -+ dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it"); -+ return -EINVAL; -+ } ++ /* Refcount present */ ++ if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT) ++ written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0); + -+ if (stride != sizeof(base_jd_atom_v2)) { -+ dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel"); -+ return -EINVAL; -+ } ++ written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0); + -+ KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(nr_atoms, -+ &kctx->timeline.jd_atoms_in_flight)); ++ /* Rest of message */ ++ written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0); ++} + -+ /* All atoms submitted in this call have the same flush ID */ -+ latest_flush = kbase_backend_get_current_flush_id(kbdev); ++static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg) ++{ ++ char buffer[DEBUG_MESSAGE_SIZE]; + -+ for (i = 0; i < nr_atoms; i++) { -+ struct base_jd_atom_v2 user_atom; -+ struct kbase_jd_atom *katom; ++ kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE); ++ dev_dbg(kbdev->dev, "%s", buffer); ++} + -+#ifdef BASE_LEGACY_UK6_SUPPORT -+ BUILD_BUG_ON(sizeof(struct base_jd_atom_v2_uk6) != -+ sizeof(base_jd_atom_v2)); ++void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val) ++{ ++ unsigned long irqflags; ++ struct kbase_trace *trace_msg; + -+ if (uk6_atom) { -+ struct base_jd_atom_v2_uk6 user_atom_v6; -+ base_jd_dep_type dep_types[2] = {BASE_JD_DEP_TYPE_DATA, BASE_JD_DEP_TYPE_DATA}; ++ spin_lock_irqsave(&kbdev->trace_lock, irqflags); + -+ if (copy_from_user(&user_atom_v6, user_addr, -+ sizeof(user_atom_v6))) { -+ err = -EINVAL; -+ KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, -+ atomic_sub_return( -+ nr_atoms - i, -+ &kctx->timeline.jd_atoms_in_flight)); -+ break; -+ } -+ /* Convert from UK6 atom format to UK7 format */ -+ user_atom.jc = user_atom_v6.jc; -+ user_atom.udata = user_atom_v6.udata; -+ user_atom.extres_list = user_atom_v6.extres_list; -+ user_atom.nr_extres = user_atom_v6.nr_extres; -+ user_atom.core_req = (u32)(user_atom_v6.core_req & 0x7fff); ++ trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in]; + -+ /* atom number 0 is used for no dependency atoms */ -+ if (!user_atom_v6.pre_dep[0]) -+ dep_types[0] = BASE_JD_DEP_TYPE_INVALID; ++ /* Fill the message */ ++ trace_msg->thread_id = task_pid_nr(current); ++ trace_msg->cpu = task_cpu(current); + -+ base_jd_atom_dep_set(&user_atom.pre_dep[0], -+ user_atom_v6.pre_dep[0], -+ dep_types[0]); ++ ktime_get_real_ts64(&trace_msg->timestamp); + -+ /* atom number 0 is used for no dependency atoms */ -+ if (!user_atom_v6.pre_dep[1]) -+ dep_types[1] = BASE_JD_DEP_TYPE_INVALID; ++ trace_msg->code = code; ++ trace_msg->ctx = ctx; + -+ base_jd_atom_dep_set(&user_atom.pre_dep[1], -+ user_atom_v6.pre_dep[1], -+ dep_types[1]); ++ if (NULL == katom) { ++ trace_msg->katom = false; ++ } else { ++ trace_msg->katom = true; ++ trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom); ++ trace_msg->atom_udata[0] = katom->udata.blob[0]; ++ trace_msg->atom_udata[1] = katom->udata.blob[1]; ++ } + -+ user_atom.atom_number = user_atom_v6.atom_number; -+ user_atom.prio = user_atom_v6.prio; -+ user_atom.device_nr = user_atom_v6.device_nr; -+ } else { -+#endif /* BASE_LEGACY_UK6_SUPPORT */ -+ if (copy_from_user(&user_atom, user_addr, -+ sizeof(user_atom)) != 0) { -+ err = -EINVAL; -+ KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, -+ atomic_sub_return(nr_atoms - i, -+ &kctx->timeline.jd_atoms_in_flight)); -+ break; -+ } -+#ifdef BASE_LEGACY_UK6_SUPPORT -+ } -+#endif ++ trace_msg->gpu_addr = gpu_addr; ++ trace_msg->jobslot = jobslot; ++ trace_msg->refcount = MIN((unsigned int)refcount, 0xFF); ++ trace_msg->info_val = info_val; ++ trace_msg->flags = flags; + -+#ifdef BASE_LEGACY_UK10_2_SUPPORT -+ if (KBASE_API_VERSION(10, 3) > kctx->api_version) -+ user_atom.core_req = (u32)(user_atom.compat_core_req -+ & 0x7fff); -+#endif /* BASE_LEGACY_UK10_2_SUPPORT */ ++ /* Update the ringbuffer indices */ ++ kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK; ++ if (kbdev->trace_next_in == kbdev->trace_first_out) ++ kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK; + -+ user_addr = (void __user *)((uintptr_t) user_addr + stride); ++ /* Done */ + -+ mutex_lock(&jctx->lock); -+#ifndef compiletime_assert -+#define compiletime_assert_defined -+#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \ -+while (false) -+#endif -+ compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) >= -+ BASE_JD_ATOM_COUNT, -+ "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); -+ compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) == -+ sizeof(user_atom.atom_number), -+ "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); -+#ifdef compiletime_assert_defined -+#undef compiletime_assert -+#undef compiletime_assert_defined -+#endif -+ if (user_atom.atom_number >= BASE_JD_ATOM_COUNT) { -+ err = -EINVAL; -+ break; -+ } -+ user_atom.atom_number = -+ array_index_nospec(user_atom.atom_number, -+ BASE_JD_ATOM_COUNT); -+ katom = &jctx->atoms[user_atom.atom_number]; ++ spin_unlock_irqrestore(&kbdev->trace_lock, irqflags); ++} + -+ /* Record the flush ID for the cache flush optimisation */ -+ katom->flush_id = latest_flush; ++void kbasep_trace_clear(struct kbase_device *kbdev) ++{ ++ unsigned long flags; + -+ while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) { -+ /* Atom number is already in use, wait for the atom to -+ * complete -+ */ -+ mutex_unlock(&jctx->lock); ++ spin_lock_irqsave(&kbdev->trace_lock, flags); ++ kbdev->trace_first_out = kbdev->trace_next_in; ++ spin_unlock_irqrestore(&kbdev->trace_lock, flags); ++} + -+ /* This thread will wait for the atom to complete. Due -+ * to thread scheduling we are not sure that the other -+ * thread that owns the atom will also schedule the -+ * context, so we force the scheduler to be active and -+ * hence eventually schedule this context at some point -+ * later. -+ */ -+ kbase_js_sched_all(kbdev); ++void kbasep_trace_dump(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ u32 start; ++ u32 end; + -+ if (wait_event_killable(katom->completed, -+ katom->status == -+ KBASE_JD_ATOM_STATE_UNUSED) != 0) { -+ /* We're being killed so the result code -+ * doesn't really matter -+ */ -+ return 0; -+ } -+ mutex_lock(&jctx->lock); -+ } ++ dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val"); ++ spin_lock_irqsave(&kbdev->trace_lock, flags); ++ start = kbdev->trace_first_out; ++ end = kbdev->trace_next_in; + -+ need_to_try_schedule_context |= -+ jd_submit_atom(kctx, &user_atom, katom); ++ while (start != end) { ++ struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start]; + -+ /* Register a completed job as a disjoint event when the GPU is in a disjoint state -+ * (ie. being reset or replaying jobs). -+ */ -+ kbase_disjoint_event_potential(kbdev); ++ kbasep_trace_dump_msg(kbdev, trace_msg); + -+ mutex_unlock(&jctx->lock); ++ start = (start + 1) & KBASE_TRACE_MASK; + } ++ dev_dbg(kbdev->dev, "TRACE_END"); + -+ if (need_to_try_schedule_context) -+ kbase_js_sched_all(kbdev); ++ spin_unlock_irqrestore(&kbdev->trace_lock, flags); + -+ return err; ++ KBASE_TRACE_CLEAR(kbdev); +} + -+KBASE_EXPORT_TEST_API(kbase_jd_submit); -+ -+void kbase_jd_done_worker(struct work_struct *data) ++static void kbasep_trace_hook_wrapper(void *param) +{ -+ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); -+ struct kbase_jd_context *jctx; -+ struct kbase_context *kctx; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ struct kbase_device *kbdev; -+ struct kbasep_js_device_data *js_devdata; -+ u64 cache_jc = katom->jc; -+ struct kbasep_js_atom_retained_state katom_retained_state; -+ bool context_idle; -+ base_jd_core_req core_req = katom->core_req; -+ u64 affinity = katom->affinity; -+ enum kbase_atom_coreref_state coreref_state = katom->coreref_state; -+ -+ /* Soft jobs should never reach this function */ -+ KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); ++ struct kbase_device *kbdev = (struct kbase_device *)param; + -+ kctx = katom->kctx; -+ jctx = &kctx->jctx; -+ kbdev = kctx->kbdev; -+ js_kctx_info = &kctx->jctx.sched_info; -+ js_devdata = &kbdev->js_data; ++ kbasep_trace_dump(kbdev); ++} + -+ KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); ++#ifdef CONFIG_DEBUG_FS ++struct trace_seq_state { ++ struct kbase_trace trace_buf[KBASE_TRACE_SIZE]; ++ u32 start; ++ u32 end; ++}; + -+ kbase_backend_complete_wq(kbdev, katom); ++static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos) ++{ ++ struct trace_seq_state *state = s->private; ++ int i; + -+ /* -+ * Begin transaction on JD context and JS context -+ */ -+ mutex_lock(&jctx->lock); -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_DONE); -+ mutex_lock(&js_devdata->queue_mutex); -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ if (*pos > KBASE_TRACE_SIZE) ++ return NULL; ++ i = state->start + *pos; ++ if ((state->end >= state->start && i >= state->end) || ++ i >= state->end + KBASE_TRACE_SIZE) ++ return NULL; + -+ /* This worker only gets called on contexts that are scheduled *in*. This is -+ * because it only happens in response to an IRQ from a job that was -+ * running. -+ */ -+ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++ i &= KBASE_TRACE_MASK; + -+ if (katom->event_code == BASE_JD_EVENT_STOPPED) { -+ /* Atom has been promoted to stopped */ -+ unsigned long flags; ++ return &state->trace_buf[i]; ++} + -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_unlock(&js_devdata->queue_mutex); ++static void kbasep_trace_seq_stop(struct seq_file *s, void *data) ++{ ++} + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos) ++{ ++ struct trace_seq_state *state = s->private; ++ int i; + -+ katom->status = KBASE_JD_ATOM_STATE_IN_JS; -+ kbase_js_unpull(kctx, katom); ++ (*pos)++; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&jctx->lock); ++ i = (state->start + *pos) & KBASE_TRACE_MASK; ++ if (i == state->end) ++ return NULL; + -+ return; -+ } ++ return &state->trace_buf[i]; ++} + -+ if (katom->event_code != BASE_JD_EVENT_DONE) -+ dev_err(kbdev->dev, -+ "t6xx: GPU fault 0x%02lx from job slot %d\n", -+ (unsigned long)katom->event_code, -+ katom->slot_nr); ++static int kbasep_trace_seq_show(struct seq_file *s, void *data) ++{ ++ struct kbase_trace *trace_msg = data; ++ char buffer[DEBUG_MESSAGE_SIZE]; + -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) -+ kbase_as_poking_timer_release_atom(kbdev, kctx, katom); ++ kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE); ++ seq_printf(s, "%s\n", buffer); ++ return 0; ++} + -+ /* Retain state before the katom disappears */ -+ kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); ++static const struct seq_operations kbasep_trace_seq_ops = { ++ .start = kbasep_trace_seq_start, ++ .next = kbasep_trace_seq_next, ++ .stop = kbasep_trace_seq_stop, ++ .show = kbasep_trace_seq_show, ++}; + -+ context_idle = kbase_js_complete_atom_wq(kctx, katom); ++static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file) ++{ ++ struct kbase_device *kbdev = inode->i_private; ++ unsigned long flags; + -+ KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state)); ++ struct trace_seq_state *state; + -+ kbasep_js_remove_job(kbdev, kctx, katom); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_unlock(&js_devdata->queue_mutex); -+ katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF; -+ /* jd_done_nolock() requires the jsctx_mutex lock to be dropped */ -+ jd_done_nolock(katom, &kctx->completed_jobs); ++ state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state)); ++ if (!state) ++ return -ENOMEM; + -+ /* katom may have been freed now, do not use! */ ++ spin_lock_irqsave(&kbdev->trace_lock, flags); ++ state->start = kbdev->trace_first_out; ++ state->end = kbdev->trace_next_in; ++ memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf)); ++ spin_unlock_irqrestore(&kbdev->trace_lock, flags); + -+ if (context_idle) { -+ unsigned long flags; ++ return 0; ++} + -+ context_idle = false; -+ mutex_lock(&js_devdata->queue_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++static const struct file_operations kbasep_trace_debugfs_fops = { ++ .open = kbasep_trace_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = seq_release_private, ++}; + -+ /* If kbase_sched() has scheduled this context back in then -+ * KCTX_ACTIVE will have been set after we marked it as -+ * inactive, and another pm reference will have been taken, so -+ * drop our reference. But do not call kbase_jm_idle_ctx(), as -+ * the context is active and fast-starting is allowed. -+ * -+ * If an atom has been fast-started then kctx->atoms_pulled will -+ * be non-zero but KCTX_ACTIVE will still be false (as the -+ * previous pm reference has been inherited). Do NOT drop our -+ * reference, as it has been re-used, and leave the context as -+ * active. -+ * -+ * If no new atoms have been started then KCTX_ACTIVE will still -+ * be false and atoms_pulled will be zero, so drop the reference -+ * and call kbase_jm_idle_ctx(). -+ * -+ * As the checks are done under both the queue_mutex and -+ * hwaccess_lock is should be impossible for this to race -+ * with the scheduler code. -+ */ -+ if (kbase_ctx_flag(kctx, KCTX_ACTIVE) || -+ !atomic_read(&kctx->atoms_pulled)) { -+ /* Calling kbase_jm_idle_ctx() here will ensure that -+ * atoms are not fast-started when we drop the -+ * hwaccess_lock. This is not performed if -+ * KCTX_ACTIVE is set as in that case another pm -+ * reference has been taken and a fast-start would be -+ * valid. -+ */ -+ if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) -+ kbase_jm_idle_ctx(kbdev, kctx); -+ context_idle = true; -+ } else { -+ kbase_ctx_flag_set(kctx, KCTX_ACTIVE); -+ } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&js_devdata->queue_mutex); -+ } ++void kbasep_trace_debugfs_init(struct kbase_device *kbdev) ++{ ++ debugfs_create_file("mali_trace", S_IRUGO, ++ kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_trace_debugfs_fops); ++} + -+ /* -+ * Transaction complete -+ */ -+ mutex_unlock(&jctx->lock); ++#else ++void kbasep_trace_debugfs_init(struct kbase_device *kbdev) ++{ ++} ++#endif /* CONFIG_DEBUG_FS */ + -+ /* Job is now no longer running, so can now safely release the context -+ * reference, and handle any actions that were logged against the atom's retained state */ ++#else /* KBASE_TRACE_ENABLE */ ++static int kbasep_trace_init(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++ return 0; ++} + -+ kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state); ++static void kbasep_trace_term(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++} + -+ kbase_js_sched_all(kbdev); ++static void kbasep_trace_hook_wrapper(void *param) ++{ ++ CSTD_UNUSED(param); ++} + -+ if (!atomic_dec_return(&kctx->work_count)) { -+ /* If worker now idle then post all events that jd_done_nolock() -+ * has queued */ -+ mutex_lock(&jctx->lock); -+ while (!list_empty(&kctx->completed_jobs)) { -+ struct kbase_jd_atom *atom = list_entry( -+ kctx->completed_jobs.next, -+ struct kbase_jd_atom, jd_item); -+ list_del(kctx->completed_jobs.next); ++void kbasep_trace_dump(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++} ++#endif /* KBASE_TRACE_ENABLE */ + -+ kbase_event_post(kctx, atom); -+ } -+ mutex_unlock(&jctx->lock); ++void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value) ++{ ++ switch (control) { ++ case FBDUMP_CONTROL_ENABLE: ++ /* fall through */ ++ case FBDUMP_CONTROL_RATE: ++ /* fall through */ ++ case SW_COUNTER_ENABLE: ++ /* fall through */ ++ case FBDUMP_CONTROL_RESIZE_FACTOR: ++ kbdev->kbase_profiling_controls[control] = value; ++ break; ++ default: ++ dev_err(kbdev->dev, "Profiling control %d not found\n", control); ++ break; + } ++} + -+ kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity, -+ coreref_state); ++/* ++ * Called by gator to control the production of ++ * profiling information at runtime ++ * */ + -+ if (context_idle) -+ kbase_pm_context_idle(kbdev); ++void _mali_profiling_control(u32 action, u32 value) ++{ ++ struct kbase_device *kbdev = NULL; + -+ KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); ++ /* find the first i.e. call with -1 */ ++ kbdev = kbase_find_device(-1); ++ ++ if (NULL != kbdev) ++ kbase_set_profiling_control(kbdev, action, value); +} ++KBASE_EXPORT_SYMBOL(_mali_profiling_control); + -+/** -+ * jd_cancel_worker - Work queue job cancel function. -+ * @data: a &struct work_struct +diff --git a/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c +new file mode 100644 +index 000000000..f70bcccf4 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c +@@ -0,0 +1,76 @@ ++/* + * -+ * Only called as part of 'Zapping' a context (which occurs on termination). -+ * Operates serially with the kbase_jd_done_worker() on the work queue. ++ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * -+ * This can only be called on contexts that aren't scheduled. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * We don't need to release most of the resources that would occur on -+ * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be -+ * running (by virtue of only being called on contexts that aren't -+ * scheduled). + */ -+static void jd_cancel_worker(struct work_struct *data) -+{ -+ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); -+ struct kbase_jd_context *jctx; -+ struct kbase_context *kctx; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ bool need_to_try_schedule_context; -+ bool attr_state_changed; -+ struct kbase_device *kbdev; -+ -+ /* Soft jobs should never reach this function */ -+ KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); + -+ kctx = katom->kctx; -+ kbdev = kctx->kbdev; -+ jctx = &kctx->jctx; -+ js_kctx_info = &kctx->jctx.sched_info; + -+ KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0); + -+ /* This only gets called on contexts that are scheduled out. Hence, we must -+ * make sure we don't de-ref the number of running jobs (there aren't -+ * any), nor must we try to schedule out the context (it's already -+ * scheduled out). -+ */ -+ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++/* ++ * Base kernel disjoint events helper functions ++ */ + -+ /* Scheduler: Remove the job from the system */ -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++#include + -+ mutex_lock(&jctx->lock); ++void kbase_disjoint_init(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ need_to_try_schedule_context = jd_done_nolock(katom, NULL); -+ /* Because we're zapping, we're not adding any more jobs to this ctx, so no need to -+ * schedule the context. There's also no need for the jsctx_mutex to have been taken -+ * around this too. */ -+ KBASE_DEBUG_ASSERT(!need_to_try_schedule_context); ++ atomic_set(&kbdev->disjoint_event.count, 0); ++ atomic_set(&kbdev->disjoint_event.state, 0); ++} + -+ /* katom may have been freed now, do not use! */ -+ mutex_unlock(&jctx->lock); ++/* increment the disjoint event count */ ++void kbase_disjoint_event(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ if (attr_state_changed) -+ kbase_js_sched_all(kbdev); ++ atomic_inc(&kbdev->disjoint_event.count); +} + -+/** -+ * kbase_jd_done - Complete a job that has been removed from the Hardware -+ * @katom: atom which has been completed -+ * @slot_nr: slot the atom was on -+ * @end_timestamp: completion time -+ * @done_code: completion code -+ * -+ * This must be used whenever a job has been removed from the Hardware, e.g.: -+ * An IRQ indicates that the job finished (for both error and 'done' codes), or -+ * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop. -+ * -+ * Some work is carried out immediately, and the rest is deferred onto a -+ * workqueue -+ * -+ * Context: -+ * This can be called safely from atomic context. -+ * The caller must hold kbdev->hwaccess_lock -+ */ -+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, -+ ktime_t *end_timestamp, kbasep_js_atom_done_code done_code) ++/* increment the state and the event counter */ ++void kbase_disjoint_state_up(struct kbase_device *kbdev) +{ -+ struct kbase_context *kctx; -+ struct kbase_device *kbdev; -+ -+ KBASE_DEBUG_ASSERT(katom); -+ kctx = katom->kctx; -+ KBASE_DEBUG_ASSERT(kctx); -+ kbdev = kctx->kbdev; -+ KBASE_DEBUG_ASSERT(kbdev); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) -+ katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; ++ atomic_inc(&kbdev->disjoint_event.state); + -+ KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0); ++ kbase_disjoint_event(kbdev); ++} + -+ kbase_job_check_leave_disjoint(kbdev, katom); ++/* decrement the state */ ++void kbase_disjoint_state_down(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0); + -+ katom->slot_nr = slot_nr; ++ kbase_disjoint_event(kbdev); + -+ atomic_inc(&kctx->work_count); ++ atomic_dec(&kbdev->disjoint_event.state); ++} + -+#ifdef CONFIG_DEBUG_FS -+ /* a failed job happened and is waiting for dumping*/ -+ if (!katom->will_fail_event_code && -+ kbase_debug_job_fault_process(katom, katom->event_code)) -+ return; -+#endif ++/* increments the count only if the state is > 0 */ ++void kbase_disjoint_event_potential(struct kbase_device *kbdev) ++{ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ WARN_ON(work_pending(&katom->work)); -+ KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); -+ INIT_WORK(&katom->work, kbase_jd_done_worker); -+ queue_work(kctx->jctx.job_done_wq, &katom->work); ++ if (atomic_read(&kbdev->disjoint_event.state)) ++ kbase_disjoint_event(kbdev); +} + -+KBASE_EXPORT_TEST_API(kbase_jd_done); -+ -+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) ++u32 kbase_disjoint_event_get(struct kbase_device *kbdev) +{ -+ struct kbase_context *kctx; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ KBASE_DEBUG_ASSERT(NULL != kbdev); -+ KBASE_DEBUG_ASSERT(NULL != katom); -+ kctx = katom->kctx; -+ KBASE_DEBUG_ASSERT(NULL != kctx); ++ return atomic_read(&kbdev->disjoint_event.count); ++} ++KBASE_EXPORT_TEST_API(kbase_disjoint_event_get); +diff --git a/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c +new file mode 100644 +index 000000000..9197743c8 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c +@@ -0,0 +1,449 @@ ++/* ++ * ++ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); + -+ /* This should only be done from a context that is not scheduled */ -+ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+ WARN_ON(work_pending(&katom->work)); + -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as ++ * it will be set there. ++ */ ++#include "mali_kbase_dma_fence.h" + -+ KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); -+ INIT_WORK(&katom->work, jd_cancel_worker); -+ queue_work(kctx->jctx.job_done_wq, &katom->work); -+} ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + ++#include + -+void kbase_jd_zap_context(struct kbase_context *kctx) -+{ -+ struct kbase_jd_atom *katom; -+ struct list_head *entry, *tmp; -+ struct kbase_device *kbdev; ++static void ++kbase_dma_fence_work(struct work_struct *pwork); + -+ KBASE_DEBUG_ASSERT(kctx); ++static void ++kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom) ++{ ++ struct kbase_context *kctx = katom->kctx; + -+ kbdev = kctx->kbdev; ++ list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource); ++} + -+ KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u); ++static void ++kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom) ++{ ++ list_del(&katom->queue); ++} + -+ kbase_js_zap_context(kctx); ++static int ++kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info, ++ struct ww_acquire_ctx *ctx) ++{ ++ struct reservation_object *content_res = NULL; ++ unsigned int content_res_idx = 0; ++ unsigned int r; ++ int err = 0; + -+ mutex_lock(&kctx->jctx.lock); ++ ww_acquire_init(ctx, &reservation_ww_class); + -+ /* -+ * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are -+ * queued outside the job scheduler. -+ */ ++retry: ++ for (r = 0; r < info->dma_fence_resv_count; r++) { ++ if (info->resv_objs[r] == content_res) { ++ content_res = NULL; ++ continue; ++ } + -+ del_timer_sync(&kctx->soft_job_timeout); -+ list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { -+ katom = list_entry(entry, struct kbase_jd_atom, queue); -+ kbase_cancel_soft_job(katom); ++ err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx); ++ if (err) ++ goto error; + } + ++ ww_acquire_done(ctx); ++ return err; + -+#ifdef CONFIG_KDS ++error: ++ content_res_idx = r; + -+ /* For each job waiting on a kds resource, cancel the wait and force the job to -+ * complete early, this is done so that we don't leave jobs outstanding waiting -+ * on kds resources which may never be released when contexts are zapped, resulting -+ * in a hang. -+ * -+ * Note that we can safely iterate over the list as the struct kbase_jd_context lock is held, -+ * this prevents items being removed when calling job_done_nolock in kbase_cancel_kds_wait_job. -+ */ ++ /* Unlock the locked one ones */ ++ while (r--) ++ ww_mutex_unlock(&info->resv_objs[r]->lock); + -+ list_for_each(entry, &kctx->waiting_kds_resource) { -+ katom = list_entry(entry, struct kbase_jd_atom, node); ++ if (content_res) ++ ww_mutex_unlock(&content_res->lock); + -+ kbase_cancel_kds_wait_job(katom); ++ /* If we deadlock try with lock_slow and retry */ ++ if (err == -EDEADLK) { ++ content_res = info->resv_objs[content_res_idx]; ++ ww_mutex_lock_slow(&content_res->lock, ctx); ++ goto retry; + } -+#endif -+ -+#ifdef CONFIG_MALI_DMA_FENCE -+ kbase_dma_fence_cancel_all_atoms(kctx); -+#endif -+ -+ mutex_unlock(&kctx->jctx.lock); -+ -+#ifdef CONFIG_MALI_DMA_FENCE -+ /* Flush dma-fence workqueue to ensure that any callbacks that may have -+ * been queued are done before continuing. -+ */ -+ flush_workqueue(kctx->dma_fence.wq); -+#endif + -+ kbase_jm_wait_for_zero_jobs(kctx); ++ /* If we are here the function failed */ ++ ww_acquire_fini(ctx); ++ return err; +} + -+KBASE_EXPORT_TEST_API(kbase_jd_zap_context); -+ -+int kbase_jd_init(struct kbase_context *kctx) ++static void ++kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info, ++ struct ww_acquire_ctx *ctx) +{ -+ int i; -+ int mali_err = 0; -+#ifdef CONFIG_KDS -+ int err; -+#endif /* CONFIG_KDS */ -+ -+ KBASE_DEBUG_ASSERT(kctx); -+ -+ kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", -+ WQ_HIGHPRI | WQ_UNBOUND, 1); -+ if (NULL == kctx->jctx.job_done_wq) { -+ mali_err = -ENOMEM; -+ goto out1; -+ } ++ unsigned int r; + -+ for (i = 0; i < BASE_JD_ATOM_COUNT; i++) { -+ init_waitqueue_head(&kctx->jctx.atoms[i].completed); ++ for (r = 0; r < info->dma_fence_resv_count; r++) ++ ww_mutex_unlock(&info->resv_objs[r]->lock); ++ ww_acquire_fini(ctx); ++} + -+ INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]); -+ INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]); ++/** ++ * kbase_dma_fence_queue_work() - Queue work to handle @katom ++ * @katom: Pointer to atom for which to queue work ++ * ++ * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and ++ * submit the atom. ++ */ ++static void ++kbase_dma_fence_queue_work(struct kbase_jd_atom *katom) ++{ ++ struct kbase_context *kctx = katom->kctx; ++ bool ret; + -+ /* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */ -+ kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID; -+ kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED; ++ INIT_WORK(&katom->work, kbase_dma_fence_work); ++ ret = queue_work(kctx->dma_fence.wq, &katom->work); ++ /* Warn if work was already queued, that should not happen. */ ++ WARN_ON(!ret); ++} + -+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) -+ kctx->jctx.atoms[i].dma_fence.context = -+ dma_fence_context_alloc(1); -+ atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0); -+ INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks); -+#endif -+ } ++/** ++ * kbase_dma_fence_cancel_atom() - Cancels waiting on an atom ++ * @katom: Katom to cancel ++ * ++ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held. ++ */ ++static void ++kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom) ++{ ++ lockdep_assert_held(&katom->kctx->jctx.lock); + -+ mutex_init(&kctx->jctx.lock); ++ /* Cancel callbacks and clean up. */ ++ kbase_fence_free_callbacks(katom); + -+ init_waitqueue_head(&kctx->jctx.zero_jobs_wait); ++ /* Mark the atom as handled in case all fences signaled just before ++ * canceling the callbacks and the worker was queued. ++ */ ++ kbase_fence_dep_count_set(katom, -1); + -+ spin_lock_init(&kctx->jctx.tb_lock); ++ /* Prevent job_done_nolock from being called twice on an atom when ++ * there is a race between job completion and cancellation. ++ */ + -+#ifdef CONFIG_KDS -+ err = kds_callback_init(&kctx->jctx.kds_cb, 0, kds_dep_clear); -+ if (0 != err) { -+ mali_err = -EINVAL; -+ goto out2; ++ if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) { ++ /* Wait was cancelled - zap the atom */ ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ if (jd_done_nolock(katom, NULL)) ++ kbase_js_sched_all(katom->kctx->kbdev); + } -+#endif /* CONFIG_KDS */ -+ -+ kctx->jctx.job_nr = 0; -+ INIT_LIST_HEAD(&kctx->completed_jobs); -+ atomic_set(&kctx->work_count, 0); -+ -+ return 0; -+ -+#ifdef CONFIG_KDS -+ out2: -+ destroy_workqueue(kctx->jctx.job_done_wq); -+#endif /* CONFIG_KDS */ -+ out1: -+ return mali_err; -+} -+ -+KBASE_EXPORT_TEST_API(kbase_jd_init); -+ -+void kbase_jd_exit(struct kbase_context *kctx) -+{ -+ KBASE_DEBUG_ASSERT(kctx); -+ -+#ifdef CONFIG_KDS -+ kds_callback_term(&kctx->jctx.kds_cb); -+#endif /* CONFIG_KDS */ -+ /* Work queue is emptied by this */ -+ destroy_workqueue(kctx->jctx.job_done_wq); +} + -+KBASE_EXPORT_TEST_API(kbase_jd_exit); -diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c -new file mode 100644 -index 000000000..44643abf8 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c -@@ -0,0 +1,233 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++/** ++ * kbase_dma_fence_work() - Worker thread called when a fence is signaled ++ * @pwork: work_struct containing a pointer to a katom + * ++ * This function will clean and mark all dependencies as satisfied + */ ++static void ++kbase_dma_fence_work(struct work_struct *pwork) ++{ ++ struct kbase_jd_atom *katom; ++ struct kbase_jd_context *ctx; + ++ katom = container_of(pwork, struct kbase_jd_atom, work); ++ ctx = &katom->kctx->jctx; + ++ mutex_lock(&ctx->lock); ++ if (kbase_fence_dep_count_read(katom) != 0) ++ goto out; + -+#ifdef CONFIG_DEBUG_FS -+ -+#include -+#include -+#include -+#include -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+#include -+#endif -+ -+struct kbase_jd_debugfs_depinfo { -+ u8 id; -+ char type; -+}; -+ -+static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, -+ struct seq_file *sfile) -+{ -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ struct kbase_sync_fence_info info; -+ int res; ++ kbase_fence_dep_count_set(katom, -1); + -+ switch (atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { -+ case BASE_JD_REQ_SOFT_FENCE_TRIGGER: -+ res = kbase_sync_fence_out_info_get(atom, &info); -+ if (res == 0) -+ seq_printf(sfile, "Sa([%p]%d) ", -+ info.fence, info.status); -+ break; -+ case BASE_JD_REQ_SOFT_FENCE_WAIT: -+ res = kbase_sync_fence_in_info_get(atom, &info); -+ if (res == 0) -+ seq_printf(sfile, "Wa([%p]%d) ", -+ info.fence, info.status); -+ break; -+ default: -+ break; -+ } -+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ ++ /* Remove atom from list of dma-fence waiting atoms. */ ++ kbase_dma_fence_waiters_remove(katom); ++ /* Cleanup callbacks. */ ++ kbase_fence_free_callbacks(katom); ++ /* ++ * Queue atom on GPU, unless it has already completed due to a failing ++ * dependency. Run jd_done_nolock() on the katom if it is completed. ++ */ ++ if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED)) ++ jd_done_nolock(katom, NULL); ++ else ++ kbase_jd_dep_clear_locked(katom); + -+#ifdef CONFIG_MALI_DMA_FENCE -+ if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { -+ struct kbase_fence_cb *cb; ++out: ++ mutex_unlock(&ctx->lock); ++} + -+ if (atom->dma_fence.fence) { ++static void +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+ struct fence *fence = atom->dma_fence.fence; ++kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb) +#else -+ struct dma_fence *fence = atom->dma_fence.fence; ++kbase_dma_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) +#endif ++{ ++ struct kbase_fence_cb *kcb = container_of(cb, ++ struct kbase_fence_cb, ++ fence_cb); ++ struct kbase_jd_atom *katom = kcb->katom; + -+ seq_printf(sfile, -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) -+ "Sd(%u#%u: %s) ", -+#else -+ "Sd(%llu#%u: %s) ", -+#endif -+ fence->context, -+ fence->seqno, -+ dma_fence_is_signaled(fence) ? -+ "signaled" : "active"); -+ } ++ /* If the atom is zapped dep_count will be forced to a negative number ++ * preventing this callback from ever scheduling work. Which in turn ++ * would reschedule the atom. ++ */ + -+ list_for_each_entry(cb, &atom->dma_fence.callbacks, -+ node) { ++ if (kbase_fence_dep_count_dec_and_test(katom)) ++ kbase_dma_fence_queue_work(katom); ++} ++ ++static int ++kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom, ++ struct reservation_object *resv, ++ bool exclusive) ++{ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+ struct fence *fence = cb->fence; ++ struct fence *excl_fence = NULL; ++ struct fence **shared_fences = NULL; +#else -+ struct dma_fence *fence = cb->fence; ++ struct dma_fence *excl_fence = NULL; ++ struct dma_fence **shared_fences = NULL; +#endif ++ unsigned int shared_count = 0; ++ int err, i; + -+ seq_printf(sfile, -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) -+ "Wd(%u#%u: %s) ", -+#else -+ "Wd(%llu#%u: %s) ", -+#endif -+ fence->context, -+ fence->seqno, -+ dma_fence_is_signaled(fence) ? -+ "signaled" : "active"); -+ } -+ } -+#endif /* CONFIG_MALI_DMA_FENCE */ ++ err = reservation_object_get_fences_rcu(resv, ++ &excl_fence, ++ &shared_count, ++ &shared_fences); ++ if (err) ++ return err; + -+} ++ if (excl_fence) { ++ err = kbase_fence_add_callback(katom, ++ excl_fence, ++ kbase_dma_fence_cb); + -+static void kbasep_jd_debugfs_atom_deps( -+ struct kbase_jd_debugfs_depinfo *deps, -+ struct kbase_jd_atom *atom) -+{ -+ struct kbase_context *kctx = atom->kctx; -+ int i; ++ /* Release our reference, taken by reservation_object_get_fences_rcu(), ++ * to the fence. We have set up our callback (if that was possible), ++ * and it's the fence's owner is responsible for singling the fence ++ * before allowing it to disappear. ++ */ ++ dma_fence_put(excl_fence); + -+ for (i = 0; i < 2; i++) { -+ deps[i].id = (unsigned)(atom->dep[i].atom ? -+ kbase_jd_atom_id(kctx, atom->dep[i].atom) : 0); ++ if (err) ++ goto out; ++ } + -+ switch (atom->dep[i].dep_type) { -+ case BASE_JD_DEP_TYPE_INVALID: -+ deps[i].type = ' '; -+ break; -+ case BASE_JD_DEP_TYPE_DATA: -+ deps[i].type = 'D'; -+ break; -+ case BASE_JD_DEP_TYPE_ORDER: -+ deps[i].type = '>'; -+ break; -+ default: -+ deps[i].type = '?'; -+ break; ++ if (exclusive) { ++ for (i = 0; i < shared_count; i++) { ++ err = kbase_fence_add_callback(katom, ++ shared_fences[i], ++ kbase_dma_fence_cb); ++ if (err) ++ goto out; + } + } -+} -+/** -+ * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file. -+ * @sfile: The debugfs entry -+ * @data: Data associated with the entry -+ * -+ * This function is called to get the contents of the JD atoms debugfs file. -+ * This is a report of all atoms managed by kbase_jd_context.atoms -+ * -+ * Return: 0 if successfully prints data in debugfs entry file, failure -+ * otherwise -+ */ -+static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) -+{ -+ struct kbase_context *kctx = sfile->private; -+ struct kbase_jd_atom *atoms; -+ unsigned long irq_flags; -+ int i; -+ -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ -+ /* Print version */ -+ seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION); -+ -+ /* Print U/K API version */ -+ seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR, -+ BASE_UK_VERSION_MINOR); -+ -+ /* Print table heading */ -+ seq_puts(sfile, " ID, Core req, St, CR, Predeps, Start time, Additional info...\n"); -+ -+ atoms = kctx->jctx.atoms; -+ /* General atom states */ -+ mutex_lock(&kctx->jctx.lock); -+ /* JS-related states */ -+ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); -+ for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) { -+ struct kbase_jd_atom *atom = &atoms[i]; -+ s64 start_timestamp = 0; -+ struct kbase_jd_debugfs_depinfo deps[2]; -+ -+ if (atom->status == KBASE_JD_ATOM_STATE_UNUSED) -+ continue; -+ -+ /* start_timestamp is cleared as soon as the atom leaves UNUSED state -+ * and set before a job is submitted to the h/w, a non-zero value means -+ * it is valid */ -+ if (ktime_to_ns(atom->start_timestamp)) -+ start_timestamp = ktime_to_ns( -+ ktime_sub(ktime_get(), atom->start_timestamp)); -+ -+ kbasep_jd_debugfs_atom_deps(deps, atom); -+ -+ seq_printf(sfile, -+ "%3u, %8x, %2u, %2u, %c%3u %c%3u, %20lld, ", -+ i, atom->core_req, atom->status, -+ atom->coreref_state, -+ deps[0].type, deps[0].id, -+ deps[1].type, deps[1].id, -+ start_timestamp); + ++ /* Release all our references to the shared fences, taken by ++ * reservation_object_get_fences_rcu(). We have set up our callback (if ++ * that was possible), and it's the fence's owner is responsible for ++ * signaling the fence before allowing it to disappear. ++ */ ++out: ++ for (i = 0; i < shared_count; i++) ++ dma_fence_put(shared_fences[i]); ++ kfree(shared_fences); + -+ kbase_jd_debugfs_fence_info(atom, sfile); -+ -+ seq_puts(sfile, "\n"); ++ if (err) { ++ /* ++ * On error, cancel and clean up all callbacks that was set up ++ * before the error. ++ */ ++ kbase_fence_free_callbacks(katom); + } -+ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); -+ mutex_unlock(&kctx->jctx.lock); -+ -+ return 0; -+} -+ + -+/** -+ * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file -+ * @in: &struct inode pointer -+ * @file: &struct file pointer -+ * -+ * Return: file descriptor -+ */ -+static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file) -+{ -+ return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private); ++ return err; +} + -+static const struct file_operations kbasep_jd_debugfs_atoms_fops = { -+ .open = kbasep_jd_debugfs_atoms_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; -+ -+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx) ++void kbase_dma_fence_add_reservation(struct reservation_object *resv, ++ struct kbase_dma_fence_resv_info *info, ++ bool exclusive) +{ -+ KBASE_DEBUG_ASSERT(kctx != NULL); ++ unsigned int i; + -+ /* Expose all atoms */ -+ debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx, -+ &kbasep_jd_debugfs_atoms_fops); ++ for (i = 0; i < info->dma_fence_resv_count; i++) { ++ /* Duplicate resource, ignore */ ++ if (info->resv_objs[i] == resv) ++ return; ++ } + ++ info->resv_objs[info->dma_fence_resv_count] = resv; ++ if (exclusive) ++ set_bit(info->dma_fence_resv_count, ++ info->dma_fence_excl_bitmap); ++ (info->dma_fence_resv_count)++; +} + -+#endif /* CONFIG_DEBUG_FS */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h -new file mode 100644 -index 000000000..0935f1db7 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h -@@ -0,0 +1,39 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ -+ -+ -+ -+/** -+ * @file mali_kbase_jd_debugfs.h -+ * Header file for job dispatcher-related entries in debugfs -+ */ -+ -+#ifndef _KBASE_JD_DEBUGFS_H -+#define _KBASE_JD_DEBUGFS_H -+ -+#include -+ -+#include -+ -+#define MALI_JD_DEBUGFS_VERSION 2 -+ -+/** -+ * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system -+ * -+ * @kctx Pointer to kbase_context -+ */ -+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx); -+ -+#endif /*_KBASE_JD_DEBUGFS_H*/ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.c b/drivers/gpu/arm/midgard/mali_kbase_jm.c -new file mode 100644 -index 000000000..0c5c6a6f7 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_jm.c -@@ -0,0 +1,131 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++int kbase_dma_fence_wait(struct kbase_jd_atom *katom, ++ struct kbase_dma_fence_resv_info *info) ++{ ++ int err, i; ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++ struct fence *fence; ++#else ++ struct dma_fence *fence; ++#endif ++ struct ww_acquire_ctx ww_ctx; + ++ lockdep_assert_held(&katom->kctx->jctx.lock); + ++ fence = kbase_fence_out_new(katom); ++ if (!fence) { ++ err = -ENOMEM; ++ dev_err(katom->kctx->kbdev->dev, ++ "Error %d creating fence.\n", err); ++ return err; ++ } + ++ kbase_fence_dep_count_set(katom, 1); + -+/* -+ * HW access job manager common APIs -+ */ ++ err = kbase_dma_fence_lock_reservations(info, &ww_ctx); ++ if (err) { ++ dev_err(katom->kctx->kbdev->dev, ++ "Error %d locking reservations.\n", err); ++ kbase_fence_dep_count_set(katom, -1); ++ kbase_fence_out_remove(katom); ++ return err; ++ } + -+#include -+#include "mali_kbase_hwaccess_jm.h" -+#include "mali_kbase_jm.h" ++ for (i = 0; i < info->dma_fence_resv_count; i++) { ++ struct reservation_object *obj = info->resv_objs[i]; + -+/** -+ * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot -+ * @js on the active context. -+ * @kbdev: Device pointer -+ * @js: Job slot to run on -+ * @nr_jobs_to_submit: Number of jobs to attempt to submit -+ * -+ * Return: true if slot can still be submitted on, false if slot is now full. -+ */ -+static bool kbase_jm_next_job(struct kbase_device *kbdev, int js, -+ int nr_jobs_to_submit) -+{ -+ struct kbase_context *kctx; -+ int i; ++ if (!test_bit(i, info->dma_fence_excl_bitmap)) { ++ err = reservation_object_reserve_shared(obj); ++ if (err) { ++ dev_err(katom->kctx->kbdev->dev, ++ "Error %d reserving space for shared fence.\n", err); ++ goto end; ++ } + -+ kctx = kbdev->hwaccess.active_kctx; ++ err = kbase_dma_fence_add_reservation_callback(katom, obj, false); ++ if (err) { ++ dev_err(katom->kctx->kbdev->dev, ++ "Error %d adding reservation to callback.\n", err); ++ goto end; ++ } + -+ if (!kctx) -+ return true; ++ reservation_object_add_shared_fence(obj, fence); ++ } else { ++ err = kbase_dma_fence_add_reservation_callback(katom, obj, true); ++ if (err) { ++ dev_err(katom->kctx->kbdev->dev, ++ "Error %d adding reservation to callback.\n", err); ++ goto end; ++ } + -+ for (i = 0; i < nr_jobs_to_submit; i++) { -+ struct kbase_jd_atom *katom = kbase_js_pull(kctx, js); ++ reservation_object_add_excl_fence(obj, fence); ++ } ++ } + -+ if (!katom) -+ return true; /* Context has no jobs on this slot */ ++end: ++ kbase_dma_fence_unlock_reservations(info, &ww_ctx); + -+ kbase_backend_run_atom(kbdev, katom); ++ if (likely(!err)) { ++ /* Test if the callbacks are already triggered */ ++ if (kbase_fence_dep_count_dec_and_test(katom)) { ++ kbase_fence_dep_count_set(katom, -1); ++ kbase_fence_free_callbacks(katom); ++ } else { ++ /* Add katom to the list of dma-buf fence waiting atoms ++ * only if it is still waiting. ++ */ ++ kbase_dma_fence_waiters_add(katom); ++ } ++ } else { ++ /* There was an error, cancel callbacks, set dep_count to -1 to ++ * indicate that the atom has been handled (the caller will ++ * kill it for us), signal the fence, free callbacks and the ++ * fence. ++ */ ++ kbase_fence_free_callbacks(katom); ++ kbase_fence_dep_count_set(katom, -1); ++ kbase_dma_fence_signal(katom); + } + -+ return false; /* Slot ringbuffer should now be full */ ++ return err; +} + -+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask) ++void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx) +{ -+ u32 ret_mask = 0; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ while (js_mask) { -+ int js = ffs(js_mask) - 1; -+ int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js); ++ struct list_head *list = &kctx->dma_fence.waiting_resource; + -+ if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit)) -+ ret_mask |= (1 << js); ++ while (!list_empty(list)) { ++ struct kbase_jd_atom *katom; + -+ js_mask &= ~(1 << js); ++ katom = list_first_entry(list, struct kbase_jd_atom, queue); ++ kbase_dma_fence_waiters_remove(katom); ++ kbase_dma_fence_cancel_atom(katom); + } -+ -+ return ret_mask; +} + -+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask) ++void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom) +{ -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ if (!down_trylock(&js_devdata->schedule_sem)) { -+ kbase_jm_kick(kbdev, js_mask); -+ up(&js_devdata->schedule_sem); -+ } ++ /* Cancel callbacks and clean up. */ ++ if (kbase_fence_free_callbacks(katom)) ++ kbase_dma_fence_queue_work(katom); +} + -+void kbase_jm_try_kick_all(struct kbase_device *kbdev) ++void kbase_dma_fence_signal(struct kbase_jd_atom *katom) +{ -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ if (!katom->dma_fence.fence) ++ return; + -+ if (!down_trylock(&js_devdata->schedule_sem)) { -+ kbase_jm_kick_all(kbdev); -+ up(&js_devdata->schedule_sem); -+ } -+} ++ /* Signal the atom's fence. */ ++ dma_fence_signal(katom->dma_fence.fence); + -+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ kbase_fence_out_remove(katom); + -+ if (kbdev->hwaccess.active_kctx == kctx) -+ kbdev->hwaccess.active_kctx = NULL; ++ kbase_fence_free_callbacks(katom); +} + -+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom) ++void kbase_dma_fence_term(struct kbase_context *kctx) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ if (katom->event_code != BASE_JD_EVENT_STOPPED && -+ katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) { -+ return kbase_js_complete_atom(katom, NULL); -+ } else { -+ kbase_js_unpull(katom->kctx, katom); -+ return NULL; -+ } ++ destroy_workqueue(kctx->dma_fence.wq); ++ kctx->dma_fence.wq = NULL; +} + -+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom, ktime_t *end_timestamp) ++int kbase_dma_fence_init(struct kbase_context *kctx) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource); + -+ return kbase_js_complete_atom(katom, end_timestamp); -+} ++ kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d", ++ WQ_UNBOUND, 1, kctx->pid); ++ if (!kctx->dma_fence.wq) ++ return -ENOMEM; + -diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.h b/drivers/gpu/arm/midgard/mali_kbase_jm.h ++ return 0; ++} +diff --git a/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h new file mode 100644 -index 000000000..a74ee24c8 +index 000000000..c9ab40350 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_jm.h -@@ -0,0 +1,110 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h +@@ -0,0 +1,131 @@ +/* + * -+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -373796,108 +372795,129 @@ index 000000000..a74ee24c8 + + + ++#ifndef _KBASE_DMA_FENCE_H_ ++#define _KBASE_DMA_FENCE_H_ + -+/* -+ * Job manager common APIs -+ */ ++#ifdef CONFIG_MALI_DMA_FENCE + -+#ifndef _KBASE_JM_H_ -+#define _KBASE_JM_H_ ++#include ++#include ++#include ++ ++ ++/* Forward declaration from mali_kbase_defs.h */ ++struct kbase_jd_atom; ++struct kbase_context; + +/** -+ * kbase_jm_kick() - Indicate that there are jobs ready to run. -+ * @kbdev: Device pointer -+ * @js_mask: Mask of the job slots that can be pulled from. -+ * -+ * Caller must hold the hwaccess_lock and schedule_sem semaphore ++ * struct kbase_dma_fence_resv_info - Structure with list of reservation objects ++ * @resv_objs: Array of reservation objects to attach the ++ * new fence to. ++ * @dma_fence_resv_count: Number of reservation objects in the array. ++ * @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive. + * -+ * Return: Mask of the job slots that can still be submitted to. ++ * This is used by some functions to pass around a collection of data about ++ * reservation objects. + */ -+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask); ++struct kbase_dma_fence_resv_info { ++ struct reservation_object **resv_objs; ++ unsigned int dma_fence_resv_count; ++ unsigned long *dma_fence_excl_bitmap; ++}; + +/** -+ * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job -+ * slots. -+ * @kbdev: Device pointer -+ * -+ * Caller must hold the hwaccess_lock and schedule_sem semaphore ++ * kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs ++ * @resv: Reservation object to add to the array. ++ * @info: Pointer to struct with current reservation info ++ * @exclusive: Boolean indicating if exclusive access is needed + * -+ * Return: Mask of the job slots that can still be submitted to. ++ * The function adds a new reservation_object to an existing array of ++ * reservation_objects. At the same time keeps track of which objects require ++ * exclusive access in dma_fence_excl_bitmap. + */ -+static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev) -+{ -+ return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); -+} ++void kbase_dma_fence_add_reservation(struct reservation_object *resv, ++ struct kbase_dma_fence_resv_info *info, ++ bool exclusive); + +/** -+ * kbase_jm_try_kick - Attempt to call kbase_jm_kick -+ * @kbdev: Device pointer -+ * @js_mask: Mask of the job slots that can be pulled from -+ * Context: Caller must hold hwaccess_lock ++ * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs ++ * @katom: Katom with the external dependency. ++ * @info: Pointer to struct with current reservation info + * -+ * If schedule_sem can be immediately obtained then this function will call -+ * kbase_jm_kick() otherwise it will do nothing. ++ * Return: An error code or 0 if succeeds + */ -+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask); ++int kbase_dma_fence_wait(struct kbase_jd_atom *katom, ++ struct kbase_dma_fence_resv_info *info); + +/** -+ * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all -+ * @kbdev: Device pointer -+ * Context: Caller must hold hwaccess_lock ++ * kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx ++ * @kctx: Pointer to kbase context + * -+ * If schedule_sem can be immediately obtained then this function will call -+ * kbase_jm_kick_all() otherwise it will do nothing. ++ * This function will cancel and clean up all katoms on @kctx that is waiting ++ * on dma-buf fences. ++ * ++ * Locking: jctx.lock needs to be held when calling this function. + */ -+void kbase_jm_try_kick_all(struct kbase_device *kbdev); ++void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx); + +/** -+ * kbase_jm_idle_ctx() - Mark a context as idle. -+ * @kbdev: Device pointer -+ * @kctx: Context to mark as idle ++ * kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom ++ * @katom: Pointer to katom whose callbacks are to be canceled + * -+ * No more atoms will be pulled from this context until it is marked as active -+ * by kbase_js_use_ctx(). ++ * This function cancels all dma-buf fence callbacks on @katom, but does not ++ * cancel the katom itself. + * -+ * The context should have no atoms currently pulled from it -+ * (kctx->atoms_pulled == 0). ++ * The caller is responsible for ensuring that jd_done_nolock is called on ++ * @katom. + * -+ * Caller must hold the hwaccess_lock ++ * Locking: jctx.lock must be held when calling this function. + */ -+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); ++void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom); + +/** -+ * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has -+ * been soft-stopped or will fail due to a -+ * dependency -+ * @kbdev: Device pointer -+ * @katom: Atom that has been stopped or will be failed ++ * kbase_dma_fence_signal() - Signal katom's fence and clean up after wait ++ * @katom: Pointer to katom to signal and clean up + * -+ * Return: Atom that has now been unblocked and can now be run, or NULL if none ++ * This function will signal the @katom's fence, if it has one, and clean up ++ * the callback data from the katom's wait on earlier fences. ++ * ++ * Locking: jctx.lock must be held while calling this function. + */ -+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom); ++void kbase_dma_fence_signal(struct kbase_jd_atom *katom); + +/** -+ * kbase_jm_complete() - Complete an atom -+ * @kbdev: Device pointer -+ * @katom: Atom that has completed -+ * @end_timestamp: Timestamp of atom completion -+ * -+ * Return: Atom that has now been unblocked and can now be run, or NULL if none ++ * kbase_dma_fence_term() - Terminate Mali dma-fence context ++ * @kctx: kbase context to terminate + */ -+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom, ktime_t *end_timestamp); ++void kbase_dma_fence_term(struct kbase_context *kctx); + -+#endif /* _KBASE_JM_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c ++/** ++ * kbase_dma_fence_init() - Initialize Mali dma-fence context ++ * @kctx: kbase context to initialize ++ */ ++int kbase_dma_fence_init(struct kbase_context *kctx); ++ ++ ++#else /* CONFIG_MALI_DMA_FENCE */ ++/* Dummy functions for when dma-buf fence isn't enabled. */ ++ ++static inline int kbase_dma_fence_init(struct kbase_context *kctx) ++{ ++ return 0; ++} ++ ++static inline void kbase_dma_fence_term(struct kbase_context *kctx) {} ++#endif /* CONFIG_MALI_DMA_FENCE */ ++#endif +diff --git a/drivers/gpu/arm/midgard/mali_kbase_event.c b/drivers/gpu/arm/midgard/mali_kbase_event.c new file mode 100644 -index 000000000..10a1d5909 +index 000000000..188148645 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_js.c -@@ -0,0 +1,2834 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_event.c +@@ -0,0 +1,259 @@ +/* + * -+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -373910,2834 +372930,5277 @@ index 000000000..10a1d5909 + * + */ + -+/* #define ENABLE_DEBUG_LOG */ -+#include "./platform/rk/custom_log.h" + -+/* -+ * Job Scheduler Implementation -+ */ -+#include -+#include -+#if defined(CONFIG_MALI_GATOR_SUPPORT) -+#include -+#endif -+#include -+#include -+#include + -+#include -+#include + -+#include "mali_kbase_jm.h" -+#include "mali_kbase_hwaccess_jm.h" + -+/* -+ * Private types -+ */ ++#include ++#include ++#include + -+/* Bitpattern indicating the result of releasing a context */ -+enum { -+ /* The context was descheduled - caller should try scheduling in a new -+ * one to keep the runpool full */ -+ KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0), -+ /* Ctx attributes were changed - caller should try scheduling all -+ * contexts */ -+ KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1) -+}; ++static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom) ++{ ++ struct base_jd_udata data; + -+typedef u32 kbasep_js_release_result; ++ lockdep_assert_held(&kctx->jctx.lock); + -+const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = { -+ KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */ -+ KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */ -+ KBASE_JS_ATOM_SCHED_PRIO_LOW /* BASE_JD_PRIO_LOW */ -+}; ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(katom != NULL); ++ KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); + -+const base_jd_prio -+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = { -+ BASE_JD_PRIO_HIGH, /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */ -+ BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */ -+ BASE_JD_PRIO_LOW /* KBASE_JS_ATOM_SCHED_PRIO_LOW */ -+}; ++ data = katom->udata; + ++ KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight)); + -+/* -+ * Private function prototypes -+ */ -+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( -+ struct kbase_device *kbdev, struct kbase_context *kctx, -+ struct kbasep_js_atom_retained_state *katom_retained_state); ++ KBASE_TLSTREAM_TL_NRET_ATOM_CTX(katom, kctx); ++ KBASE_TLSTREAM_TL_DEL_ATOM(katom); + -+static int kbase_js_get_slot(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom); ++ katom->status = KBASE_JD_ATOM_STATE_UNUSED; + -+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, -+ kbasep_js_ctx_job_cb callback); ++ wake_up(&katom->completed); + -+/* Helper for trace subcodes */ -+#if KBASE_TRACE_ENABLE -+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, -+ struct kbase_context *kctx) -+{ -+ return atomic_read(&kctx->refcount); -+} -+#else /* KBASE_TRACE_ENABLE */ -+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, -+ struct kbase_context *kctx) -+{ -+ CSTD_UNUSED(kbdev); -+ CSTD_UNUSED(kctx); -+ return 0; ++ return data; +} -+#endif /* KBASE_TRACE_ENABLE */ -+ -+/* -+ * Private functions -+ */ + -+/** -+ * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements -+ * @features: JSn_FEATURE register value -+ * -+ * Given a JSn_FEATURE register value returns the core requirements that match -+ * -+ * Return: Core requirement bit mask -+ */ -+static base_jd_core_req core_reqs_from_jsn_features(u16 features) ++int kbase_event_pending(struct kbase_context *ctx) +{ -+ base_jd_core_req core_req = 0u; ++ KBASE_DEBUG_ASSERT(ctx); + -+ if ((features & JS_FEATURE_SET_VALUE_JOB) != 0) -+ core_req |= BASE_JD_REQ_V; ++ return (atomic_read(&ctx->event_count) != 0) || ++ (atomic_read(&ctx->event_closed) != 0); ++} + -+ if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0) -+ core_req |= BASE_JD_REQ_CF; ++KBASE_EXPORT_TEST_API(kbase_event_pending); + -+ if ((features & JS_FEATURE_COMPUTE_JOB) != 0) -+ core_req |= BASE_JD_REQ_CS; ++int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent) ++{ ++ struct kbase_jd_atom *atom; + -+ if ((features & JS_FEATURE_TILER_JOB) != 0) -+ core_req |= BASE_JD_REQ_T; ++ KBASE_DEBUG_ASSERT(ctx); + -+ if ((features & JS_FEATURE_FRAGMENT_JOB) != 0) -+ core_req |= BASE_JD_REQ_FS; ++ mutex_lock(&ctx->event_mutex); + -+ return core_req; -+} ++ if (list_empty(&ctx->event_list)) { ++ if (!atomic_read(&ctx->event_closed)) { ++ mutex_unlock(&ctx->event_mutex); ++ return -1; ++ } + -+static void kbase_js_sync_timers(struct kbase_device *kbdev) -+{ -+ mutex_lock(&kbdev->js_data.runpool_mutex); -+ kbase_backend_ctx_count_changed(kbdev); -+ mutex_unlock(&kbdev->js_data.runpool_mutex); -+} ++ /* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */ ++ mutex_unlock(&ctx->event_mutex); ++ uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED; ++ memset(&uevent->udata, 0, sizeof(uevent->udata)); ++ dev_dbg(ctx->kbdev->dev, ++ "event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n", ++ BASE_JD_EVENT_DRV_TERMINATED); ++ return 0; ++ } + -+/* Hold the mmu_hw_mutex and hwaccess_lock for this */ -+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, -+ struct kbase_context *kctx) -+{ -+ struct kbasep_js_device_data *js_devdata; -+ bool result = false; -+ int as_nr; ++ /* normal event processing */ ++ atomic_dec(&ctx->event_count); ++ atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]); ++ list_del(ctx->event_list.next); + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ js_devdata = &kbdev->js_data; ++ mutex_unlock(&ctx->event_mutex); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom); ++ uevent->event_code = atom->event_code; ++ uevent->atom_number = (atom - ctx->jctx.atoms); + -+ as_nr = kctx->as_nr; -+ if (atomic_read(&kctx->refcount) > 0) { -+ KBASE_DEBUG_ASSERT(as_nr >= 0); ++ if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) ++ kbase_jd_free_external_resources(atom); + -+ kbase_ctx_sched_retain_ctx_refcount(kctx); -+ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx, -+ NULL, 0u, atomic_read(&kctx->refcount)); -+ result = true; -+ } ++ mutex_lock(&ctx->jctx.lock); ++ uevent->udata = kbase_event_process(ctx, atom); ++ mutex_unlock(&ctx->jctx.lock); + -+ return result; ++ return 0; +} + ++KBASE_EXPORT_TEST_API(kbase_event_dequeue); ++ +/** -+ * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms -+ * @kctx: Pointer to kbase context with ring buffer. -+ * @js: Job slot id to check. -+ * @prio: Priority to check. -+ * -+ * Return true if there are no atoms to pull. There may be running atoms in the -+ * ring buffer even if there are no atoms to pull. It is also possible for the -+ * ring buffer to be full (with running atoms) when this functions returns -+ * true. -+ * -+ * Return: true if there are no atoms to pull, false otherwise. ++ * kbase_event_process_noreport_worker - Worker for processing atoms that do not ++ * return an event but do have external ++ * resources ++ * @data: Work structure + */ -+static inline bool -+jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) ++static void kbase_event_process_noreport_worker(struct work_struct *data) +{ -+ struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; ++ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, ++ work); ++ struct kbase_context *kctx = katom->kctx; + -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) ++ kbase_jd_free_external_resources(katom); + -+ return RB_EMPTY_ROOT(&rb->runnable_tree); ++ mutex_lock(&kctx->jctx.lock); ++ kbase_event_process(kctx, katom); ++ mutex_unlock(&kctx->jctx.lock); +} + +/** -+ * jsctx_rb_none_to_pull(): - Check if all priority ring buffers have no -+ * pullable atoms -+ * @kctx: Pointer to kbase context with ring buffer. -+ * @js: Job slot id to check. -+ * -+ * Caller must hold hwaccess_lock ++ * kbase_event_process_noreport - Process atoms that do not return an event ++ * @kctx: Context pointer ++ * @katom: Atom to be processed + * -+ * Return: true if the ring buffers for all priorities have no pullable atoms, -+ * false otherwise. ++ * Atoms that do not have external resources will be processed immediately. ++ * Atoms that do have external resources will be processed on a workqueue, in ++ * order to avoid locking issues. + */ -+static inline bool -+jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) ++static void kbase_event_process_noreport(struct kbase_context *kctx, ++ struct kbase_jd_atom *katom) +{ -+ int prio; -+ -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ -+ for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { -+ if (!jsctx_rb_none_to_pull_prio(kctx, js, prio)) -+ return false; ++ if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { ++ INIT_WORK(&katom->work, kbase_event_process_noreport_worker); ++ queue_work(kctx->event_workq, &katom->work); ++ } else { ++ kbase_event_process(kctx, katom); + } -+ -+ return true; +} + +/** -+ * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue. -+ * @kctx: Pointer to kbase context with the queue. -+ * @js: Job slot id to iterate. -+ * @prio: Priority id to iterate. -+ * @callback: Function pointer to callback. -+ * -+ * Iterate over a queue and invoke @callback for each entry in the queue, and -+ * remove the entry from the queue. ++ * kbase_event_coalesce - Move pending events to the main event list ++ * @kctx: Context pointer + * -+ * If entries are added to the queue while this is running those entries may, or -+ * may not be covered. To ensure that all entries in the buffer have been -+ * enumerated when this function returns jsctx->lock must be held when calling -+ * this function. ++ * kctx->event_list and kctx->event_coalesce_count must be protected ++ * by a lock unless this is the last thread using them ++ * (and we're about to terminate the lock). + * -+ * The HW access lock must always be held when calling this function. ++ * Return: The number of pending events moved to the main event list + */ -+static void -+jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, -+ kbasep_js_ctx_job_cb callback) ++static int kbase_event_coalesce(struct kbase_context *kctx) +{ -+ struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; ++ const int event_count = kctx->event_coalesce_count; + -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ /* Join the list of pending events onto the tail of the main list ++ and reset it */ ++ list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list); ++ kctx->event_coalesce_count = 0; + -+ while (!RB_EMPTY_ROOT(&queue->runnable_tree)) { -+ struct rb_node *node = rb_first(&queue->runnable_tree); -+ struct kbase_jd_atom *entry = rb_entry(node, -+ struct kbase_jd_atom, runnable_tree_node); ++ /* Return the number of events moved */ ++ return event_count; ++} + -+ rb_erase(node, &queue->runnable_tree); -+ callback(kctx->kbdev, entry); ++void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) ++{ ++ if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { ++ if (atom->event_code == BASE_JD_EVENT_DONE) { ++ /* Don't report the event */ ++ kbase_event_process_noreport(ctx, atom); ++ return; ++ } + } + -+ while (!list_empty(&queue->x_dep_head)) { -+ struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next, -+ struct kbase_jd_atom, queue); ++ if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) { ++ /* Don't report the event */ ++ kbase_event_process_noreport(ctx, atom); ++ return; ++ } ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_POSTED); ++ if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) { ++ /* Don't report the event until other event(s) have completed */ ++ mutex_lock(&ctx->event_mutex); ++ list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list); ++ ++ctx->event_coalesce_count; ++ mutex_unlock(&ctx->event_mutex); ++ } else { ++ /* Report the event and any pending events now */ ++ int event_count = 1; + -+ list_del(queue->x_dep_head.next); ++ mutex_lock(&ctx->event_mutex); ++ event_count += kbase_event_coalesce(ctx); ++ list_add_tail(&atom->dep_item[0], &ctx->event_list); ++ atomic_add(event_count, &ctx->event_count); ++ mutex_unlock(&ctx->event_mutex); + -+ callback(kctx->kbdev, entry); ++ kbase_event_wakeup(ctx); + } +} ++KBASE_EXPORT_TEST_API(kbase_event_post); + -+/** -+ * jsctx_queue_foreach(): - Execute callback for each entry in every queue -+ * @kctx: Pointer to kbase context with queue. -+ * @js: Job slot id to iterate. -+ * @callback: Function pointer to callback. -+ * -+ * Iterate over all the different priorities, and for each call -+ * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback -+ * for each entry, and remove the entry from the queue. -+ */ -+static inline void -+jsctx_queue_foreach(struct kbase_context *kctx, int js, -+ kbasep_js_ctx_job_cb callback) ++void kbase_event_close(struct kbase_context *kctx) +{ -+ int prio; -+ -+ for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) -+ jsctx_queue_foreach_prio(kctx, js, prio, callback); ++ mutex_lock(&kctx->event_mutex); ++ atomic_set(&kctx->event_closed, true); ++ mutex_unlock(&kctx->event_mutex); ++ kbase_event_wakeup(kctx); +} + -+/** -+ * jsctx_rb_peek_prio(): - Check buffer and get next atom -+ * @kctx: Pointer to kbase context with ring buffer. -+ * @js: Job slot id to check. -+ * @prio: Priority id to check. -+ * -+ * Check the ring buffer for the specified @js and @prio and return a pointer to -+ * the next atom, unless the ring buffer is empty. -+ * -+ * Return: Pointer to next atom in buffer, or NULL if there is no atom. -+ */ -+static inline struct kbase_jd_atom * -+jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) ++int kbase_event_init(struct kbase_context *kctx) +{ -+ struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; -+ struct rb_node *node; ++ KBASE_DEBUG_ASSERT(kctx); + -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ INIT_LIST_HEAD(&kctx->event_list); ++ INIT_LIST_HEAD(&kctx->event_coalesce_list); ++ mutex_init(&kctx->event_mutex); ++ atomic_set(&kctx->event_count, 0); ++ kctx->event_coalesce_count = 0; ++ atomic_set(&kctx->event_closed, false); ++ kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1); + -+ node = rb_first(&rb->runnable_tree); -+ if (!node) -+ return NULL; ++ if (NULL == kctx->event_workq) ++ return -EINVAL; + -+ return rb_entry(node, struct kbase_jd_atom, runnable_tree_node); ++ return 0; +} + -+/** -+ * jsctx_rb_peek(): - Check all priority buffers and get next atom -+ * @kctx: Pointer to kbase context with ring buffer. -+ * @js: Job slot id to check. -+ * -+ * Check the ring buffers for all priorities, starting from -+ * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a -+ * pointer to the next atom, unless all the priority's ring buffers are empty. -+ * -+ * Caller must hold the hwaccess_lock. -+ * -+ * Return: Pointer to next atom in buffer, or NULL if there is no atom. -+ */ -+static inline struct kbase_jd_atom * -+jsctx_rb_peek(struct kbase_context *kctx, int js) ++KBASE_EXPORT_TEST_API(kbase_event_init); ++ ++void kbase_event_cleanup(struct kbase_context *kctx) +{ -+ int prio; ++ int event_count; + -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ KBASE_DEBUG_ASSERT(kctx); ++ KBASE_DEBUG_ASSERT(kctx->event_workq); + -+ for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { -+ struct kbase_jd_atom *katom; ++ flush_workqueue(kctx->event_workq); ++ destroy_workqueue(kctx->event_workq); + -+ katom = jsctx_rb_peek_prio(kctx, js, prio); -+ if (katom) -+ return katom; -+ } ++ /* We use kbase_event_dequeue to remove the remaining events as that ++ * deals with all the cleanup needed for the atoms. ++ * ++ * Note: use of kctx->event_list without a lock is safe because this must be the last ++ * thread using it (because we're about to terminate the lock) ++ */ ++ event_count = kbase_event_coalesce(kctx); ++ atomic_add(event_count, &kctx->event_count); + -+ return NULL; ++ while (!list_empty(&kctx->event_list)) { ++ struct base_jd_event_v2 event; ++ ++ kbase_event_dequeue(kctx, &event); ++ } +} + -+/** -+ * jsctx_rb_pull(): - Mark atom in list as running -+ * @kctx: Pointer to kbase context with ring buffer. -+ * @katom: Pointer to katom to pull. ++KBASE_EXPORT_TEST_API(kbase_event_cleanup); +diff --git a/drivers/gpu/arm/midgard/mali_kbase_fence.c b/drivers/gpu/arm/midgard/mali_kbase_fence.c +new file mode 100644 +index 000000000..3bcfb38c3 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_fence.c +@@ -0,0 +1,200 @@ ++/* + * -+ * Mark an atom previously obtained from jsctx_rb_peek() as running. ++ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * @katom must currently be at the head of the ring buffer. + */ -+static inline void -+jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) -+{ -+ int prio = katom->sched_priority; -+ int js = katom->slot_nr; -+ struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ /* Atoms must be pulled in the correct order. */ -+ WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio)); + -+ rb_erase(&katom->runnable_tree_node, &rb->runnable_tree); -+} ++#include ++#include ++#include ++#include ++#include ++#include + -+#define LESS_THAN_WRAP(a, b) ((s32)(a - b) < 0) ++/* Spin lock protecting all Mali fences as fence->lock. */ ++static DEFINE_SPINLOCK(kbase_fence_lock); + -+static void -+jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) ++static const char * ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++kbase_fence_get_driver_name(struct fence *fence) ++#else ++kbase_fence_get_driver_name(struct dma_fence *fence) ++#endif +{ -+ int prio = katom->sched_priority; -+ int js = katom->slot_nr; -+ struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; -+ struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL; -+ -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ -+ while (*new) { -+ struct kbase_jd_atom *entry = container_of(*new, -+ struct kbase_jd_atom, runnable_tree_node); -+ -+ parent = *new; -+ if (LESS_THAN_WRAP(katom->age, entry->age)) -+ new = &((*new)->rb_left); -+ else -+ new = &((*new)->rb_right); -+ } -+ -+ /* Add new node and rebalance tree. */ -+ rb_link_node(&katom->runnable_tree_node, parent, new); -+ rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree); ++ return kbase_drv_name; +} + -+/** -+ * jsctx_rb_unpull(): - Undo marking of atom in list as running -+ * @kctx: Pointer to kbase context with ring buffer. -+ * @katom: Pointer to katom to unpull. -+ * -+ * Undo jsctx_rb_pull() and put @katom back in the queue. -+ * -+ * jsctx_rb_unpull() must be called on atoms in the same order the atoms were -+ * pulled. -+ */ -+static inline void -+jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) ++static const char * ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++kbase_fence_get_timeline_name(struct fence *fence) ++#else ++kbase_fence_get_timeline_name(struct dma_fence *fence) ++#endif +{ -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ -+ jsctx_tree_add(kctx, katom); ++ return kbase_timeline_name; +} + -+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, -+ int js, -+ bool is_scheduled); -+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ int js); -+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ int js); -+ -+/* -+ * Functions private to KBase ('Protected' functions) -+ */ -+int kbasep_js_devdata_init(struct kbase_device * const kbdev) ++static bool ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++kbase_fence_enable_signaling(struct fence *fence) ++#else ++kbase_fence_enable_signaling(struct dma_fence *fence) ++#endif +{ -+ struct kbasep_js_device_data *jsdd; -+ int i; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ -+ jsdd = &kbdev->js_data; -+ -+#ifdef CONFIG_MALI_DEBUG -+ /* Soft-stop will be disabled on a single context by default unless -+ * softstop_always is set */ -+ jsdd->softstop_always = false; -+#endif /* CONFIG_MALI_DEBUG */ -+ jsdd->nr_all_contexts_running = 0; -+ jsdd->nr_user_contexts_running = 0; -+ jsdd->nr_contexts_pullable = 0; -+ atomic_set(&jsdd->nr_contexts_runnable, 0); -+ /* No ctx allowed to submit */ -+ jsdd->runpool_irq.submit_allowed = 0u; -+ memset(jsdd->runpool_irq.ctx_attr_ref_count, 0, -+ sizeof(jsdd->runpool_irq.ctx_attr_ref_count)); -+ memset(jsdd->runpool_irq.slot_affinities, 0, -+ sizeof(jsdd->runpool_irq.slot_affinities)); -+ memset(jsdd->runpool_irq.slot_affinity_refcount, 0, -+ sizeof(jsdd->runpool_irq.slot_affinity_refcount)); -+ INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list); -+ -+ /* Config attributes */ -+ jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS; -+ jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS; -+ jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL; -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) -+ jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS_8408; -+ else -+ jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS; -+ jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL; -+ jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING; -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) -+ jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS_8408; -+ else -+ jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS; -+ jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL; -+ jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING; -+ jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS; -+ atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT); -+ -+ dev_dbg(kbdev->dev, "JS Config Attribs: "); -+ dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u", -+ jsdd->scheduling_period_ns); -+ dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u", -+ jsdd->soft_stop_ticks); -+ dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u", -+ jsdd->soft_stop_ticks_cl); -+ dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u", -+ jsdd->hard_stop_ticks_ss); -+ dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u", -+ jsdd->hard_stop_ticks_cl); -+ dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u", -+ jsdd->hard_stop_ticks_dumping); -+ dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u", -+ jsdd->gpu_reset_ticks_ss); -+ dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u", -+ jsdd->gpu_reset_ticks_cl); -+ dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u", -+ jsdd->gpu_reset_ticks_dumping); -+ dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u", -+ jsdd->ctx_timeslice_ns); -+ dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i", -+ atomic_read(&jsdd->soft_job_timeout_ms)); -+ -+ if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss && -+ jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss && -+ jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping && -+ jsdd->hard_stop_ticks_dumping < -+ jsdd->gpu_reset_ticks_dumping)) { -+ dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n"); -+ return -EINVAL; -+ } ++ return true; ++} + -+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS -+ dev_dbg(kbdev->dev, "Job Scheduling Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.", -+ jsdd->soft_stop_ticks, -+ jsdd->scheduling_period_ns); ++static void ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++kbase_fence_fence_value_str(struct fence *fence, char *str, int size) ++#else ++kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size) +#endif -+#if KBASE_DISABLE_SCHEDULING_HARD_STOPS -+ dev_dbg(kbdev->dev, "Job Scheduling Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.", -+ jsdd->hard_stop_ticks_ss, -+ jsdd->hard_stop_ticks_dumping, -+ jsdd->scheduling_period_ns); ++{ ++#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) ++ snprintf(str, size, "%u", fence->seqno); ++#else ++ snprintf(str, size, "%llu", fence->seqno); +#endif -+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS -+ dev_dbg(kbdev->dev, "Note: The JS tick timer (if coded) will still be run, but do nothing."); ++} ++ ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++const struct fence_ops kbase_fence_ops = { ++ .wait = fence_default_wait, ++#else ++const struct dma_fence_ops kbase_fence_ops = { ++ .wait = dma_fence_default_wait, +#endif ++ .get_driver_name = kbase_fence_get_driver_name, ++ .get_timeline_name = kbase_fence_get_timeline_name, ++ .enable_signaling = kbase_fence_enable_signaling, ++ .fence_value_str = kbase_fence_fence_value_str ++}; + -+ for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) -+ jsdd->js_reqs[i] = core_reqs_from_jsn_features( -+ kbdev->gpu_props.props.raw_props.js_features[i]); ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++struct fence * ++kbase_fence_out_new(struct kbase_jd_atom *katom) ++#else ++struct dma_fence * ++kbase_fence_out_new(struct kbase_jd_atom *katom) ++#endif ++{ ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++ struct fence *fence; ++#else ++ struct dma_fence *fence; ++#endif + -+ /* On error, we could continue on: providing none of the below resources -+ * rely on the ones above */ ++ WARN_ON(katom->dma_fence.fence); + -+ mutex_init(&jsdd->runpool_mutex); -+ mutex_init(&jsdd->queue_mutex); -+ spin_lock_init(&kbdev->hwaccess_lock); -+ sema_init(&jsdd->schedule_sem, 1); ++ fence = kzalloc(sizeof(*fence), GFP_KERNEL); ++ if (!fence) ++ return NULL; + -+ for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) { -+ INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i]); -+ INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i]); -+ } ++ dma_fence_init(fence, ++ &kbase_fence_ops, ++ &kbase_fence_lock, ++ katom->dma_fence.context, ++ atomic_inc_return(&katom->dma_fence.seqno)); + -+ return 0; -+} ++ katom->dma_fence.fence = fence; + -+void kbasep_js_devdata_halt(struct kbase_device *kbdev) -+{ -+ CSTD_UNUSED(kbdev); ++ return fence; +} + -+void kbasep_js_devdata_term(struct kbase_device *kbdev) ++bool ++kbase_fence_free_callbacks(struct kbase_jd_atom *katom) +{ -+ struct kbasep_js_device_data *js_devdata; -+ s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, }; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ struct kbase_fence_cb *cb, *tmp; ++ bool res = false; + -+ js_devdata = &kbdev->js_data; ++ lockdep_assert_held(&katom->kctx->jctx.lock); + -+ /* The caller must de-register all contexts before calling this -+ */ -+ KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0); -+ KBASE_DEBUG_ASSERT(memcmp( -+ js_devdata->runpool_irq.ctx_attr_ref_count, -+ zero_ctx_attr_ref_count, -+ sizeof(zero_ctx_attr_ref_count)) == 0); -+ CSTD_UNUSED(zero_ctx_attr_ref_count); -+} ++ /* Clean up and free callbacks. */ ++ list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) { ++ bool ret; + -+int kbasep_js_kctx_init(struct kbase_context * const kctx) -+{ -+ struct kbase_device *kbdev; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ int i, j; ++ /* Cancel callbacks that hasn't been called yet. */ ++ ret = dma_fence_remove_callback(cb->fence, &cb->fence_cb); ++ if (ret) { ++ int ret; + -+ KBASE_DEBUG_ASSERT(kctx != NULL); ++ /* Fence had not signaled, clean up after ++ * canceling. ++ */ ++ ret = atomic_dec_return(&katom->dma_fence.dep_count); + -+ kbdev = kctx->kbdev; -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ if (unlikely(ret == 0)) ++ res = true; ++ } + -+ for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) -+ INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]); ++ /* ++ * Release the reference taken in ++ * kbase_fence_add_callback(). ++ */ ++ dma_fence_put(cb->fence); ++ list_del(&cb->node); ++ kfree(cb); ++ } + -+ js_kctx_info = &kctx->jctx.sched_info; ++ return res; ++} + -+ js_kctx_info->ctx.nr_jobs = 0; -+ kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); -+ kbase_ctx_flag_clear(kctx, KCTX_DYING); -+ memset(js_kctx_info->ctx.ctx_attr_ref_count, 0, -+ sizeof(js_kctx_info->ctx.ctx_attr_ref_count)); ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++int ++kbase_fence_add_callback(struct kbase_jd_atom *katom, ++ struct fence *fence, ++ fence_func_t callback) ++#else ++int ++kbase_fence_add_callback(struct kbase_jd_atom *katom, ++ struct dma_fence *fence, ++ dma_fence_func_t callback) ++#endif ++{ ++ int err = 0; ++ struct kbase_fence_cb *kbase_fence_cb; + -+ /* Initially, the context is disabled from submission until the create -+ * flags are set */ -+ kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED); ++ if (!fence) ++ return -EINVAL; + -+ /* On error, we could continue on: providing none of the below resources -+ * rely on the ones above */ -+ mutex_init(&js_kctx_info->ctx.jsctx_mutex); ++ kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL); ++ if (!kbase_fence_cb) ++ return -ENOMEM; + -+ init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait); ++ kbase_fence_cb->fence = fence; ++ kbase_fence_cb->katom = katom; ++ INIT_LIST_HEAD(&kbase_fence_cb->node); + -+ for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { -+ for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) { -+ INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head); -+ kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT; -+ } ++ err = dma_fence_add_callback(fence, &kbase_fence_cb->fence_cb, ++ callback); ++ if (err == -ENOENT) { ++ /* Fence signaled, clear the error and return */ ++ err = 0; ++ kfree(kbase_fence_cb); ++ } else if (err) { ++ kfree(kbase_fence_cb); ++ } else { ++ /* ++ * Get reference to fence that will be kept until callback gets ++ * cleaned up in kbase_fence_free_callbacks(). ++ */ ++ dma_fence_get(fence); ++ atomic_inc(&katom->dma_fence.dep_count); ++ /* Add callback to katom's list of callbacks */ ++ list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks); + } + -+ return 0; ++ return err; +} +diff --git a/drivers/gpu/arm/midgard/mali_kbase_fence.h b/drivers/gpu/arm/midgard/mali_kbase_fence.h +new file mode 100644 +index 000000000..639cc2ef4 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_fence.h +@@ -0,0 +1,275 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+void kbasep_js_kctx_term(struct kbase_context *kctx) -+{ -+ struct kbase_device *kbdev; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ int js; -+ bool update_ctx_count = false; -+ -+ KBASE_DEBUG_ASSERT(kctx != NULL); + -+ kbdev = kctx->kbdev; -+ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ js_kctx_info = &kctx->jctx.sched_info; ++#ifndef _KBASE_FENCE_H_ ++#define _KBASE_FENCE_H_ + -+ /* The caller must de-register all jobs before calling this */ -+ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); -+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0); ++/* ++ * mali_kbase_fence.[hc] has common fence code used by both ++ * - CONFIG_MALI_DMA_FENCE - implicit DMA fences ++ * - CONFIG_SYNC_FILE - explicit fences beginning with 4.9 kernel ++ */ + -+ mutex_lock(&kbdev->js_data.queue_mutex); -+ mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); ++#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) + -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) -+ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); ++#include ++#include "mali_kbase_fence_defs.h" ++#include "mali_kbase.h" + -+ if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) { -+ WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0); -+ atomic_dec(&kbdev->js_data.nr_contexts_runnable); -+ update_ctx_count = true; -+ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); -+ } ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++extern const struct fence_ops kbase_fence_ops; ++#else ++extern const struct dma_fence_ops kbase_fence_ops; ++#endif + -+ mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); -+ mutex_unlock(&kbdev->js_data.queue_mutex); ++/** ++* struct kbase_fence_cb - Mali dma-fence callback data struct ++* @fence_cb: Callback function ++* @katom: Pointer to katom that is waiting on this callback ++* @fence: Pointer to the fence object on which this callback is waiting ++* @node: List head for linking this callback to the katom ++*/ ++struct kbase_fence_cb { ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++ struct fence_cb fence_cb; ++ struct fence *fence; ++#else ++ struct dma_fence_cb fence_cb; ++ struct dma_fence *fence; ++#endif ++ struct kbase_jd_atom *katom; ++ struct list_head node; ++}; + -+ if (update_ctx_count) { -+ mutex_lock(&kbdev->js_data.runpool_mutex); -+ kbase_backend_ctx_count_changed(kbdev); -+ mutex_unlock(&kbdev->js_data.runpool_mutex); -+ } -+} ++/** ++ * kbase_fence_out_new() - Creates a new output fence and puts it on the atom ++ * @katom: Atom to create an output fence for ++ * ++ * return: A new fence object on success, NULL on failure. ++ */ ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++struct fence *kbase_fence_out_new(struct kbase_jd_atom *katom); ++#else ++struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom); ++#endif + ++#if defined(CONFIG_SYNC_FILE) +/** -+ * kbase_js_ctx_list_add_pullable_nolock - Variant of -+ * kbase_jd_ctx_list_add_pullable() -+ * where the caller must hold -+ * hwaccess_lock -+ * @kbdev: Device pointer -+ * @kctx: Context to add to queue -+ * @js: Job slot to use ++ * kbase_fence_fence_in_set() - Assign input fence to atom ++ * @katom: Atom to assign input fence to ++ * @fence: Input fence to assign to atom + * -+ * Caller must hold hwaccess_lock ++ * This function will take ownership of one fence reference! ++ */ ++#define kbase_fence_fence_in_set(katom, fence) \ ++ do { \ ++ WARN_ON((katom)->dma_fence.fence_in); \ ++ (katom)->dma_fence.fence_in = fence; \ ++ } while (0) ++#endif ++ ++/** ++ * kbase_fence_out_remove() - Removes the output fence from atom ++ * @katom: Atom to remove output fence for + * -+ * Return: true if caller should call kbase_backend_ctx_count_changed() ++ * This will also release the reference to this fence which the atom keeps + */ -+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ int js) ++static inline void kbase_fence_out_remove(struct kbase_jd_atom *katom) +{ -+ bool ret = false; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) -+ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); -+ -+ list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], -+ &kbdev->js_data.ctx_list_pullable[js]); -+ -+ if (!kctx->slots_pullable) { -+ kbdev->js_data.nr_contexts_pullable++; -+ ret = true; -+ if (!atomic_read(&kctx->atoms_pulled)) { -+ WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); -+ kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); -+ atomic_inc(&kbdev->js_data.nr_contexts_runnable); -+ } ++ if (katom->dma_fence.fence) { ++ dma_fence_put(katom->dma_fence.fence); ++ katom->dma_fence.fence = NULL; + } -+ kctx->slots_pullable |= (1 << js); -+ -+ return ret; +} + ++#if defined(CONFIG_SYNC_FILE) +/** -+ * kbase_js_ctx_list_add_pullable_head_nolock - Variant of -+ * kbase_js_ctx_list_add_pullable_head() -+ * where the caller must hold -+ * hwaccess_lock -+ * @kbdev: Device pointer -+ * @kctx: Context to add to queue -+ * @js: Job slot to use -+ * -+ * Caller must hold hwaccess_lock ++ * kbase_fence_out_remove() - Removes the input fence from atom ++ * @katom: Atom to remove input fence for + * -+ * Return: true if caller should call kbase_backend_ctx_count_changed() ++ * This will also release the reference to this fence which the atom keeps + */ -+static bool kbase_js_ctx_list_add_pullable_head_nolock( -+ struct kbase_device *kbdev, struct kbase_context *kctx, int js) ++static inline void kbase_fence_in_remove(struct kbase_jd_atom *katom) +{ -+ bool ret = false; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) -+ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); -+ -+ list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], -+ &kbdev->js_data.ctx_list_pullable[js]); -+ -+ if (!kctx->slots_pullable) { -+ kbdev->js_data.nr_contexts_pullable++; -+ ret = true; -+ if (!atomic_read(&kctx->atoms_pulled)) { -+ WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); -+ kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); -+ atomic_inc(&kbdev->js_data.nr_contexts_runnable); -+ } ++ if (katom->dma_fence.fence_in) { ++ dma_fence_put(katom->dma_fence.fence_in); ++ katom->dma_fence.fence_in = NULL; + } -+ kctx->slots_pullable |= (1 << js); -+ -+ return ret; +} ++#endif + +/** -+ * kbase_js_ctx_list_add_pullable_head - Add context to the head of the -+ * per-slot pullable context queue -+ * @kbdev: Device pointer -+ * @kctx: Context to add to queue -+ * @js: Job slot to use -+ * -+ * If the context is on either the pullable or unpullable queues, then it is -+ * removed before being added to the head. -+ * -+ * This function should be used when a context has been scheduled, but no jobs -+ * can currently be pulled from it. ++ * kbase_fence_out_is_ours() - Check if atom has a valid fence created by us ++ * @katom: Atom to check output fence for + * -+ * Return: true if caller should call kbase_backend_ctx_count_changed() ++ * Return: true if fence exists and is valid, otherwise false + */ -+static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ int js) ++static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom) +{ -+ bool ret; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ return ret; ++ return katom->dma_fence.fence && ++ katom->dma_fence.fence->ops == &kbase_fence_ops; +} + +/** -+ * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the -+ * per-slot unpullable context queue -+ * @kbdev: Device pointer -+ * @kctx: Context to add to queue -+ * @js: Job slot to use -+ * -+ * The context must already be on the per-slot pullable queue. It will be -+ * removed from the pullable queue before being added to the unpullable queue. -+ * -+ * This function should be used when a context has been pulled from, and there -+ * are no jobs remaining on the specified slot. -+ * -+ * Caller must hold hwaccess_lock ++ * kbase_fence_out_signal() - Signal output fence of atom ++ * @katom: Atom to signal output fence for ++ * @status: Status to signal with (0 for success, < 0 for error) + * -+ * Return: true if caller should call kbase_backend_ctx_count_changed() ++ * Return: 0 on success, < 0 on error + */ -+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ int js) ++static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom, ++ int status) +{ -+ bool ret = false; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], -+ &kbdev->js_data.ctx_list_unpullable[js]); -+ -+ if (kctx->slots_pullable == (1 << js)) { -+ kbdev->js_data.nr_contexts_pullable--; -+ ret = true; -+ if (!atomic_read(&kctx->atoms_pulled)) { -+ WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); -+ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); -+ atomic_dec(&kbdev->js_data.nr_contexts_runnable); -+ } ++ if (status) { ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ ++ KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) ++ fence_set_error(katom->dma_fence.fence, status); ++#elif (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) ++ dma_fence_set_error(katom->dma_fence.fence, status); ++#else ++ katom->dma_fence.fence->status = status; ++#endif + } -+ kctx->slots_pullable &= ~(1 << js); -+ -+ return ret; ++ return dma_fence_signal(katom->dma_fence.fence); +} + +/** -+ * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable -+ * or unpullable context queues -+ * @kbdev: Device pointer -+ * @kctx: Context to remove from queue -+ * @js: Job slot to use ++ * kbase_fence_add_callback() - Add callback on @fence to block @katom ++ * @katom: Pointer to katom that will be blocked by @fence ++ * @fence: Pointer to fence on which to set up the callback ++ * @callback: Pointer to function to be called when fence is signaled + * -+ * The context must already be on one of the queues. ++ * Caller needs to hold a reference to @fence when calling this function, and ++ * the caller is responsible for releasing that reference. An additional ++ * reference to @fence will be taken when the callback was successfully set up ++ * and @fence needs to be kept valid until the callback has been called and ++ * cleanup have been done. + * -+ * This function should be used when a context has no jobs on the GPU, and no -+ * jobs remaining for the specified slot. ++ * Return: 0 on success: fence was either already signaled, or callback was ++ * set up. Negative error code is returned on error. ++ */ ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++int kbase_fence_add_callback(struct kbase_jd_atom *katom, ++ struct fence *fence, ++ fence_func_t callback); ++#else ++int kbase_fence_add_callback(struct kbase_jd_atom *katom, ++ struct dma_fence *fence, ++ dma_fence_func_t callback); ++#endif ++ ++/** ++ * kbase_fence_dep_count_set() - Set dep_count value on atom to specified value ++ * @katom: Atom to set dep_count for ++ * @val: value to set dep_count to + * -+ * Caller must hold hwaccess_lock ++ * The dep_count is available to the users of this module so that they can ++ * synchronize completion of the wait with cancellation and adding of more ++ * callbacks. For instance, a user could do the following: + * -+ * Return: true if caller should call kbase_backend_ctx_count_changed() ++ * dep_count set to 1 ++ * callback #1 added, dep_count is increased to 2 ++ * callback #1 happens, dep_count decremented to 1 ++ * since dep_count > 0, no completion is done ++ * callback #2 is added, dep_count is increased to 2 ++ * dep_count decremented to 1 ++ * callback #2 happens, dep_count decremented to 0 ++ * since dep_count now is zero, completion executes ++ * ++ * The dep_count can also be used to make sure that the completion only ++ * executes once. This is typically done by setting dep_count to -1 for the ++ * thread that takes on this responsibility. + */ -+static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ int js) ++static inline void ++kbase_fence_dep_count_set(struct kbase_jd_atom *katom, int val) +{ -+ bool ret = false; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])); -+ -+ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); -+ -+ if (kctx->slots_pullable == (1 << js)) { -+ kbdev->js_data.nr_contexts_pullable--; -+ ret = true; -+ if (!atomic_read(&kctx->atoms_pulled)) { -+ WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); -+ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); -+ atomic_dec(&kbdev->js_data.nr_contexts_runnable); -+ } -+ } -+ kctx->slots_pullable &= ~(1 << js); -+ -+ return ret; ++ atomic_set(&katom->dma_fence.dep_count, val); +} + +/** -+ * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head() -+ * where the caller must hold -+ * hwaccess_lock -+ * @kbdev: Device pointer -+ * @js: Job slot to use ++ * kbase_fence_dep_count_dec_and_test() - Decrements dep_count ++ * @katom: Atom to decrement dep_count for + * -+ * Caller must hold hwaccess_lock ++ * See @kbase_fence_dep_count_set for general description about dep_count + * -+ * Return: Context to use for specified slot. -+ * NULL if no contexts present for specified slot ++ * Return: true if value was decremented to zero, otherwise false + */ -+static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( -+ struct kbase_device *kbdev, -+ int js) ++static inline bool ++kbase_fence_dep_count_dec_and_test(struct kbase_jd_atom *katom) +{ -+ struct kbase_context *kctx; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ if (list_empty(&kbdev->js_data.ctx_list_pullable[js])) -+ return NULL; -+ -+ kctx = list_entry(kbdev->js_data.ctx_list_pullable[js].next, -+ struct kbase_context, -+ jctx.sched_info.ctx.ctx_list_entry[js]); -+ -+ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); -+ -+ return kctx; ++ return atomic_dec_and_test(&katom->dma_fence.dep_count); +} + +/** -+ * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable -+ * queue. -+ * @kbdev: Device pointer -+ * @js: Job slot to use ++ * kbase_fence_dep_count_read() - Returns the current dep_count value ++ * @katom: Pointer to katom + * -+ * Return: Context to use for specified slot. -+ * NULL if no contexts present for specified slot ++ * See @kbase_fence_dep_count_set for general description about dep_count ++ * ++ * Return: The current dep_count value + */ -+static struct kbase_context *kbase_js_ctx_list_pop_head( -+ struct kbase_device *kbdev, int js) ++static inline int kbase_fence_dep_count_read(struct kbase_jd_atom *katom) +{ -+ struct kbase_context *kctx; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ return kctx; ++ return atomic_read(&katom->dma_fence.dep_count); +} + +/** -+ * kbase_js_ctx_pullable - Return if a context can be pulled from on the -+ * specified slot -+ * @kctx: Context pointer -+ * @js: Job slot to use -+ * @is_scheduled: true if the context is currently scheduled ++ * kbase_fence_free_callbacks() - Free dma-fence callbacks on a katom ++ * @katom: Pointer to katom + * -+ * Caller must hold hwaccess_lock ++ * This function will free all fence callbacks on the katom's list of ++ * callbacks. Callbacks that have not yet been called, because their fence ++ * hasn't yet signaled, will first be removed from the fence. + * -+ * Return: true if context can be pulled from on specified slot -+ * false otherwise ++ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held. ++ * ++ * Return: true if dep_count reached 0, otherwise false. + */ -+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, -+ bool is_scheduled) -+{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbase_jd_atom *katom; -+ -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ -+ js_devdata = &kctx->kbdev->js_data; -+ -+ if (is_scheduled) { -+ if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) -+ return false; -+ } -+ katom = jsctx_rb_peek(kctx, js); -+ if (!katom) -+ return false; /* No pullable atoms */ -+ if (kctx->blocked_js[js][katom->sched_priority]) -+ return false; -+ if (atomic_read(&katom->blocked)) -+ return false; /* next atom blocked */ -+ if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { -+ if (katom->x_pre_dep->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || -+ katom->x_pre_dep->will_fail_event_code) -+ return false; -+ if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && -+ kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) -+ return false; -+ } -+ -+ return true; -+} -+ -+static bool kbase_js_dep_validate(struct kbase_context *kctx, -+ struct kbase_jd_atom *katom) -+{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ bool ret = true; -+ bool has_dep = false, has_x_dep = false; -+ int js = kbase_js_get_slot(kbdev, katom); -+ int prio = katom->sched_priority; -+ int i; -+ -+ for (i = 0; i < 2; i++) { -+ struct kbase_jd_atom *dep_atom = katom->dep[i].atom; -+ -+ if (dep_atom) { -+ int dep_js = kbase_js_get_slot(kbdev, dep_atom); -+ int dep_prio = dep_atom->sched_priority; -+ -+ /* Dependent atom must already have been submitted */ -+ if (!(dep_atom->atom_flags & -+ KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { -+ ret = false; -+ break; -+ } -+ -+ /* Dependencies with different priorities can't -+ be represented in the ringbuffer */ -+ if (prio != dep_prio) { -+ ret = false; -+ break; -+ } -+ -+ if (js == dep_js) { -+ /* Only one same-slot dependency can be -+ * represented in the ringbuffer */ -+ if (has_dep) { -+ ret = false; -+ break; -+ } -+ /* Each dependee atom can only have one -+ * same-slot dependency */ -+ if (dep_atom->post_dep) { -+ ret = false; -+ break; -+ } -+ has_dep = true; -+ } else { -+ /* Only one cross-slot dependency can be -+ * represented in the ringbuffer */ -+ if (has_x_dep) { -+ ret = false; -+ break; -+ } -+ /* Each dependee atom can only have one -+ * cross-slot dependency */ -+ if (dep_atom->x_post_dep) { -+ ret = false; -+ break; -+ } -+ /* The dependee atom can not already be in the -+ * HW access ringbuffer */ -+ if (dep_atom->gpu_rb_state != -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { -+ ret = false; -+ break; -+ } -+ /* The dependee atom can not already have -+ * completed */ -+ if (dep_atom->status != -+ KBASE_JD_ATOM_STATE_IN_JS) { -+ ret = false; -+ break; -+ } -+ /* Cross-slot dependencies must not violate -+ * PRLAM-8987 affinity restrictions */ -+ if (kbase_hw_has_issue(kbdev, -+ BASE_HW_ISSUE_8987) && -+ (js == 2 || dep_js == 2)) { -+ ret = false; -+ break; -+ } -+ has_x_dep = true; -+ } -+ -+ /* Dependency can be represented in ringbuffers */ -+ } -+ } -+ -+ /* If dependencies can be represented by ringbuffer then clear them from -+ * atom structure */ -+ if (ret) { -+ for (i = 0; i < 2; i++) { -+ struct kbase_jd_atom *dep_atom = katom->dep[i].atom; -+ -+ if (dep_atom) { -+ int dep_js = kbase_js_get_slot(kbdev, dep_atom); -+ -+ if ((js != dep_js) && -+ (dep_atom->status != -+ KBASE_JD_ATOM_STATE_COMPLETED) -+ && (dep_atom->status != -+ KBASE_JD_ATOM_STATE_HW_COMPLETED) -+ && (dep_atom->status != -+ KBASE_JD_ATOM_STATE_UNUSED)) { -+ -+ katom->atom_flags |= -+ KBASE_KATOM_FLAG_X_DEP_BLOCKED; -+ katom->x_pre_dep = dep_atom; -+ dep_atom->x_post_dep = katom; -+ if (kbase_jd_katom_dep_type( -+ &katom->dep[i]) == -+ BASE_JD_DEP_TYPE_DATA) -+ katom->atom_flags |= -+ KBASE_KATOM_FLAG_FAIL_BLOCKER; -+ } -+ if ((kbase_jd_katom_dep_type(&katom->dep[i]) -+ == BASE_JD_DEP_TYPE_DATA) && -+ (js == dep_js)) { -+ katom->pre_dep = dep_atom; -+ dep_atom->post_dep = katom; -+ } -+ -+ list_del(&katom->dep_item[i]); -+ kbase_jd_katom_dep_clear(&katom->dep[i]); -+ } -+ } -+ } -+ -+ return ret; -+} -+ -+bool kbasep_js_add_job(struct kbase_context *kctx, -+ struct kbase_jd_atom *atom) -+{ -+ unsigned long flags; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ struct kbase_device *kbdev; -+ struct kbasep_js_device_data *js_devdata; -+ -+ bool enqueue_required = false; -+ bool timer_sync = false; -+ -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(atom != NULL); -+ lockdep_assert_held(&kctx->jctx.lock); -+ -+ kbdev = kctx->kbdev; -+ js_devdata = &kbdev->js_data; -+ js_kctx_info = &kctx->jctx.sched_info; -+ -+ mutex_lock(&js_devdata->queue_mutex); -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ -+ /* -+ * Begin Runpool transaction -+ */ -+ mutex_lock(&js_devdata->runpool_mutex); -+ -+ /* Refcount ctx.nr_jobs */ -+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX); -+ ++(js_kctx_info->ctx.nr_jobs); -+ -+ /* Setup any scheduling information */ -+ kbasep_js_clear_job_retry_submit(atom); -+ -+ /* Lock for state available during IRQ */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ -+ if (!kbase_js_dep_validate(kctx, atom)) { -+ /* Dependencies could not be represented */ -+ --(js_kctx_info->ctx.nr_jobs); -+ -+ /* Setting atom status back to queued as it still has unresolved -+ * dependencies */ -+ atom->status = KBASE_JD_ATOM_STATE_QUEUED; -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&js_devdata->runpool_mutex); -+ -+ goto out_unlock; -+ } -+ -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_READY); -+ KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom)); -+ -+ enqueue_required = kbase_js_dep_resolved_submit(kctx, atom); -+ -+ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc, -+ kbasep_js_trace_get_refcnt(kbdev, kctx)); -+ -+ /* Context Attribute Refcounting */ -+ kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom); -+ -+ if (enqueue_required) { -+ if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false)) -+ timer_sync = kbase_js_ctx_list_add_pullable_nolock( -+ kbdev, kctx, atom->slot_nr); -+ else -+ timer_sync = kbase_js_ctx_list_add_unpullable_nolock( -+ kbdev, kctx, atom->slot_nr); -+ } -+ /* If this context is active and the atom is the first on its slot, -+ * kick the job manager to attempt to fast-start the atom */ -+ if (enqueue_required && kctx == kbdev->hwaccess.active_kctx) -+ kbase_jm_try_kick(kbdev, 1 << atom->slot_nr); -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ if (timer_sync) -+ kbase_backend_ctx_count_changed(kbdev); -+ mutex_unlock(&js_devdata->runpool_mutex); -+ /* End runpool transaction */ -+ -+ if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { -+ if (kbase_ctx_flag(kctx, KCTX_DYING)) { -+ /* A job got added while/after kbase_job_zap_context() -+ * was called on a non-scheduled context (e.g. KDS -+ * dependency resolved). Kill that job by killing the -+ * context. */ -+ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, -+ false); -+ } else if (js_kctx_info->ctx.nr_jobs == 1) { -+ /* Handle Refcount going from 0 to 1: schedule the -+ * context on the Queue */ -+ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); -+ dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx); -+ -+ /* Queue was updated - caller must try to -+ * schedule the head context */ -+ WARN_ON(!enqueue_required); -+ } -+ } -+out_unlock: -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ -+ mutex_unlock(&js_devdata->queue_mutex); -+ -+ return enqueue_required; -+} -+ -+void kbasep_js_remove_job(struct kbase_device *kbdev, -+ struct kbase_context *kctx, struct kbase_jd_atom *atom) -+{ -+ struct kbasep_js_kctx_info *js_kctx_info; -+ struct kbasep_js_device_data *js_devdata; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(atom != NULL); -+ -+ js_devdata = &kbdev->js_data; -+ js_kctx_info = &kctx->jctx.sched_info; -+ -+ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc, -+ kbasep_js_trace_get_refcnt(kbdev, kctx)); ++bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom); + -+ /* De-refcount ctx.nr_jobs */ -+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0); -+ --(js_kctx_info->ctx.nr_jobs); -+} ++#if defined(CONFIG_SYNC_FILE) ++/** ++ * kbase_fence_in_get() - Retrieve input fence for atom. ++ * @katom: Atom to get input fence from ++ * ++ * A ref will be taken for the fence, so use @kbase_fence_put() to release it ++ * ++ * Return: The fence, or NULL if there is no input fence for atom ++ */ ++#define kbase_fence_in_get(katom) dma_fence_get((katom)->dma_fence.fence_in) ++#endif + -+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, -+ struct kbase_context *kctx, struct kbase_jd_atom *katom) -+{ -+ unsigned long flags; -+ struct kbasep_js_atom_retained_state katom_retained_state; -+ struct kbasep_js_device_data *js_devdata; -+ bool attr_state_changed; ++/** ++ * kbase_fence_out_get() - Retrieve output fence for atom. ++ * @katom: Atom to get output fence from ++ * ++ * A ref will be taken for the fence, so use @kbase_fence_put() to release it ++ * ++ * Return: The fence, or NULL if there is no output fence for atom ++ */ ++#define kbase_fence_out_get(katom) dma_fence_get((katom)->dma_fence.fence) + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(katom != NULL); ++/** ++ * kbase_fence_put() - Releases a reference to a fence ++ * @fence: Fence to release reference for. ++ */ ++#define kbase_fence_put(fence) dma_fence_put(fence) + -+ js_devdata = &kbdev->js_data; + -+ kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); -+ kbasep_js_remove_job(kbdev, kctx, katom); ++#endif /* CONFIG_MALI_DMA_FENCE || defined(CONFIG_SYNC_FILE */ + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++#endif /* _KBASE_FENCE_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h b/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h +new file mode 100644 +index 000000000..fa2c6dfe9 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h +@@ -0,0 +1,51 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ /* The atom has 'finished' (will not be re-run), so no need to call -+ * kbasep_js_has_atom_finished(). -+ * -+ * This is because it returns false for soft-stopped atoms, but we -+ * want to override that, because we're cancelling an atom regardless of -+ * whether it was soft-stopped or not */ -+ attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, -+ &katom_retained_state); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ return attr_state_changed; -+} + -+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx) -+{ -+ unsigned long flags; -+ struct kbasep_js_device_data *js_devdata; -+ bool result; ++#ifndef _KBASE_FENCE_DEFS_H_ ++#define _KBASE_FENCE_DEFS_H_ + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ js_devdata = &kbdev->js_data; ++/* ++ * There was a big rename in the 4.10 kernel (fence* -> dma_fence*) ++ * This file hides the compatibility issues with this for the rest the driver ++ */ + -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); ++#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) + -+ return result; -+} ++#include + -+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, -+ int as_nr) -+{ -+ int ret = 0; -+ unsigned long flags; -+ struct kbasep_js_device_data *js_devdata; -+ struct kbase_context *found_kctx = NULL; ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS); -+ js_devdata = &kbdev->js_data; ++#include + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++#define dma_fence_context_alloc(a) fence_context_alloc(a) ++#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e) ++#define dma_fence_get(a) fence_get(a) ++#define dma_fence_put(a) fence_put(a) ++#define dma_fence_signal(a) fence_signal(a) ++#define dma_fence_is_signaled(a) fence_is_signaled(a) ++#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c) ++#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b) + -+ found_kctx = kbdev->as_to_kctx[as_nr]; ++#else + -+ if (found_kctx != NULL) { -+ ret = kbase_ctx_sched_retain_ctx_refcount(found_kctx); -+ if (ret != 0) { -+ E("fail to retain ctx_refcount, ret : %d.", ret); -+ found_kctx = NULL; -+ } -+ } ++#include + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++#endif /* < 4.10.0 */ + -+ return found_kctx; -+} ++#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE */ + -+/** -+ * kbasep_js_release_result - Try running more jobs after releasing a context -+ * and/or atom ++#endif /* _KBASE_FENCE_DEFS_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator.h b/drivers/gpu/arm/midgard/mali_kbase_gator.h +new file mode 100644 +index 000000000..ce65b5562 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_gator.h +@@ -0,0 +1,45 @@ ++/* + * -+ * @kbdev: The kbase_device to operate on -+ * @kctx: The kbase_context to operate on -+ * @katom_retained_state: Retained state from the atom -+ * @runpool_ctx_attr_change: True if the runpool context attributes have changed ++ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * -+ * This collates a set of actions that must happen whilst hwaccess_lock is held. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. + * -+ * This includes running more jobs when: -+ * - The previously released kctx caused a ctx attribute change, -+ * - The released atom caused a ctx attribute change, -+ * - Slots were previously blocked due to affinity restrictions, -+ * - Submission during IRQ handling failed. ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were -+ * changed. The caller should try scheduling all contexts + */ -+static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release( -+ struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ struct kbasep_js_atom_retained_state *katom_retained_state, -+ bool runpool_ctx_attr_change) -+{ -+ struct kbasep_js_device_data *js_devdata; -+ kbasep_js_release_result result = 0; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(katom_retained_state != NULL); -+ js_devdata = &kbdev->js_data; + -+ lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); -+ lockdep_assert_held(&js_devdata->runpool_mutex); -+ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (js_devdata->nr_user_contexts_running != 0) { -+ bool retry_submit = false; -+ int retry_jobslot = 0; ++/* NB taken from gator */ ++/* ++ * List of possible actions to be controlled by DS-5 Streamline. ++ * The following numbers are used by gator to control the frame buffer dumping ++ * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because ++ * they are unknown inside gator. ++ */ ++#ifndef _KBASE_GATOR_H_ ++#define _KBASE_GATOR_H_ + -+ if (katom_retained_state) -+ retry_submit = kbasep_js_get_atom_retry_submit_slot( -+ katom_retained_state, &retry_jobslot); ++#ifdef CONFIG_MALI_GATOR_SUPPORT ++#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16)) ++#define GATOR_JOB_SLOT_START 1 ++#define GATOR_JOB_SLOT_STOP 2 ++#define GATOR_JOB_SLOT_SOFT_STOPPED 3 + -+ if (runpool_ctx_attr_change || retry_submit) { -+ /* A change in runpool ctx attributes might mean we can -+ * run more jobs than before */ -+ result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL; ++void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id); ++void kbase_trace_mali_pm_status(u32 event, u64 value); ++void kbase_trace_mali_pm_power_off(u32 event, u64 value); ++void kbase_trace_mali_pm_power_on(u32 event, u64 value); ++void kbase_trace_mali_page_fault_insert_pages(int event, u32 value); ++void kbase_trace_mali_mmu_as_in_use(int event); ++void kbase_trace_mali_mmu_as_released(int event); ++void kbase_trace_mali_total_alloc_pages_change(long long int event); + -+ KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB, -+ kctx, NULL, 0u, retry_jobslot); -+ } -+ } -+ return result; -+} ++#endif /* CONFIG_MALI_GATOR_SUPPORT */ + ++#endif /* _KBASE_GATOR_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c +new file mode 100644 +index 000000000..860e10159 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c +@@ -0,0 +1,334 @@ +/* -+ * Internal function to release the reference on a ctx and an atom's "retained -+ * state", only taking the runpool and as transaction mutexes + * -+ * This also starts more jobs running in the case of an ctx-attribute state -+ * change ++ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * -+ * This does none of the followup actions for scheduling: -+ * - It does not schedule in a new context -+ * - It does not requeue or handle dying contexts ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. + * -+ * For those tasks, just call kbasep_js_runpool_release_ctx() instead ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * Requires: -+ * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr -+ * - Context has a non-zero refcount -+ * - Caller holds js_kctx_info->ctx.jsctx_mutex -+ * - Caller holds js_devdata->runpool_mutex + */ -+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( -+ struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ struct kbasep_js_atom_retained_state *katom_retained_state) -+{ -+ unsigned long flags; -+ struct kbasep_js_device_data *js_devdata; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ -+ kbasep_js_release_result release_result = 0u; -+ bool runpool_ctx_attr_change = false; -+ int kctx_as_nr; -+ struct kbase_as *current_as; -+ int new_ref_count; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ js_kctx_info = &kctx->jctx.sched_info; -+ js_devdata = &kbdev->js_data; -+ -+ /* Ensure context really is scheduled in */ -+ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); -+ -+ kctx_as_nr = kctx->as_nr; -+ KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID); -+ KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0); -+ -+ /* -+ * Transaction begins on AS and runpool_irq -+ * -+ * Assert about out calling contract -+ */ -+ current_as = &kbdev->as[kctx_as_nr]; -+ mutex_lock(&kbdev->pm.lock); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ -+ KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr); -+ KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0); -+ -+ /* Update refcount */ -+ kbase_ctx_sched_release_ctx(kctx); -+ new_ref_count = atomic_read(&kctx->refcount); -+ -+ /* Release the atom if it finished (i.e. wasn't soft-stopped) */ -+ if (kbasep_js_has_atom_finished(katom_retained_state)) -+ runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom( -+ kbdev, kctx, katom_retained_state); + -+ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u, -+ new_ref_count); + -+ if (new_ref_count == 2 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) && -+ !kbase_pm_is_suspending(kbdev)) { -+ /* Context is kept scheduled into an address space even when -+ * there are no jobs, in this case we have to handle the -+ * situation where all jobs have been evicted from the GPU and -+ * submission is disabled. -+ * -+ * At this point we re-enable submission to allow further jobs -+ * to be executed -+ */ -+ kbasep_js_set_submit_allowed(js_devdata, kctx); -+ } + -+ /* Make a set of checks to see if the context should be scheduled out. -+ * Note that there'll always be at least 1 reference to the context -+ * which was previously acquired by kbasep_js_schedule_ctx(). */ -+ if (new_ref_count == 1 && -+ (!kbasep_js_is_submit_allowed(js_devdata, kctx) || -+ kbdev->pm.suspending)) { -+ int num_slots = kbdev->gpu_props.num_job_slots; -+ int slot; ++#include "mali_kbase.h" ++#include "mali_kbase_hw.h" ++#include "mali_kbase_mem_linux.h" ++#include "mali_kbase_gator_api.h" ++#include "mali_kbase_gator_hwcnt_names.h" + -+ /* Last reference, and we've been told to remove this context -+ * from the Run Pool */ -+ dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because refcount=%d, jobs=%d, allowed=%d", -+ kctx, new_ref_count, js_kctx_info->ctx.nr_jobs, -+ kbasep_js_is_submit_allowed(js_devdata, kctx)); ++#define MALI_MAX_CORES_PER_GROUP 4 ++#define MALI_MAX_NUM_BLOCKS_PER_GROUP 8 ++#define MALI_COUNTERS_PER_BLOCK 64 ++#define MALI_BYTES_PER_COUNTER 4 + -+#if defined(CONFIG_MALI_GATOR_SUPPORT) -+ kbase_trace_mali_mmu_as_released(kctx->as_nr); -+#endif -+ KBASE_TLSTREAM_TL_NRET_AS_CTX(&kbdev->as[kctx->as_nr], kctx); ++struct kbase_gator_hwcnt_handles { ++ struct kbase_device *kbdev; ++ struct kbase_vinstr_client *vinstr_cli; ++ void *vinstr_buffer; ++ struct work_struct dump_work; ++ int dump_complete; ++ spinlock_t dump_lock; ++}; + -+ kbase_backend_release_ctx_irq(kbdev, kctx); ++static void dump_worker(struct work_struct *work); + -+ if (kbdev->hwaccess.active_kctx == kctx) -+ kbdev->hwaccess.active_kctx = NULL; ++const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters) ++{ ++ const char * const *hardware_counters; ++ struct kbase_device *kbdev; ++ uint32_t product_id; ++ uint32_t count; + -+ /* Ctx Attribute handling -+ * -+ * Releasing atoms attributes must either happen before this, or -+ * after the KCTX_SHEDULED flag is changed, otherwise we -+ * double-decount the attributes -+ */ -+ runpool_ctx_attr_change |= -+ kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx); ++ if (!total_counters) ++ return NULL; + -+ /* Releasing the context and katom retained state can allow -+ * more jobs to run */ -+ release_result |= -+ kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, -+ kctx, katom_retained_state, -+ runpool_ctx_attr_change); ++ /* Get the first device - it doesn't matter in this case */ ++ kbdev = kbase_find_device(-1); ++ if (!kbdev) ++ return NULL; + -+ /* -+ * Transaction ends on AS and runpool_irq: -+ * -+ * By this point, the AS-related data is now clear and ready -+ * for re-use. -+ * -+ * Since releases only occur once for each previous successful -+ * retain, and no more retains are allowed on this context, no -+ * other thread will be operating in this -+ * code whilst we are -+ */ ++ product_id = kbdev->gpu_props.props.core_props.product_id; + -+ /* Recalculate pullable status for all slots */ -+ for (slot = 0; slot < num_slots; slot++) { -+ if (kbase_js_ctx_pullable(kctx, slot, false)) -+ kbase_js_ctx_list_add_pullable_nolock(kbdev, -+ kctx, slot); ++ if (GPU_ID_IS_NEW_FORMAT(product_id)) { ++ switch (GPU_ID2_MODEL_MATCH_VALUE(product_id)) { ++ case GPU_ID2_PRODUCT_TMIX: ++ hardware_counters = hardware_counters_mali_tMIx; ++ count = ARRAY_SIZE(hardware_counters_mali_tMIx); ++ break; ++ case GPU_ID2_PRODUCT_THEX: ++ hardware_counters = hardware_counters_mali_tHEx; ++ count = ARRAY_SIZE(hardware_counters_mali_tHEx); ++ break; ++ case GPU_ID2_PRODUCT_TSIX: ++ hardware_counters = hardware_counters_mali_tSIx; ++ count = ARRAY_SIZE(hardware_counters_mali_tSIx); ++ break; ++ default: ++ hardware_counters = NULL; ++ count = 0; ++ dev_err(kbdev->dev, "Unrecognized product ID: %u\n", ++ product_id); ++ break; + } -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ kbase_backend_release_ctx_noirq(kbdev, kctx); -+ -+ mutex_unlock(&kbdev->pm.lock); -+ -+ /* Note: Don't reuse kctx_as_nr now */ -+ -+ /* Synchronize with any timers */ -+ kbase_backend_ctx_count_changed(kbdev); -+ -+ /* update book-keeping info */ -+ kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); -+ /* Signal any waiter that the context is not scheduled, so is -+ * safe for termination - once the jsctx_mutex is also dropped, -+ * and jobs have finished. */ -+ wake_up(&js_kctx_info->ctx.is_scheduled_wait); -+ -+ /* Queue an action to occur after we've dropped the lock */ -+ release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED | -+ KBASEP_JS_RELEASE_RESULT_SCHED_ALL; + } else { -+ kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, -+ katom_retained_state, runpool_ctx_attr_change); -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->pm.lock); ++ switch (product_id) { ++ /* If we are using a Mali-T60x device */ ++ case GPU_ID_PI_T60X: ++ hardware_counters = hardware_counters_mali_t60x; ++ count = ARRAY_SIZE(hardware_counters_mali_t60x); ++ break; ++ /* If we are using a Mali-T62x device */ ++ case GPU_ID_PI_T62X: ++ hardware_counters = hardware_counters_mali_t62x; ++ count = ARRAY_SIZE(hardware_counters_mali_t62x); ++ break; ++ /* If we are using a Mali-T72x device */ ++ case GPU_ID_PI_T72X: ++ hardware_counters = hardware_counters_mali_t72x; ++ count = ARRAY_SIZE(hardware_counters_mali_t72x); ++ break; ++ /* If we are using a Mali-T76x device */ ++ case GPU_ID_PI_T76X: ++ hardware_counters = hardware_counters_mali_t76x; ++ count = ARRAY_SIZE(hardware_counters_mali_t76x); ++ break; ++ /* If we are using a Mali-T82x device */ ++ case GPU_ID_PI_T82X: ++ hardware_counters = hardware_counters_mali_t82x; ++ count = ARRAY_SIZE(hardware_counters_mali_t82x); ++ break; ++ /* If we are using a Mali-T83x device */ ++ case GPU_ID_PI_T83X: ++ hardware_counters = hardware_counters_mali_t83x; ++ count = ARRAY_SIZE(hardware_counters_mali_t83x); ++ break; ++ /* If we are using a Mali-T86x device */ ++ case GPU_ID_PI_T86X: ++ hardware_counters = hardware_counters_mali_t86x; ++ count = ARRAY_SIZE(hardware_counters_mali_t86x); ++ break; ++ /* If we are using a Mali-T88x device */ ++ case GPU_ID_PI_TFRX: ++ hardware_counters = hardware_counters_mali_t88x; ++ count = ARRAY_SIZE(hardware_counters_mali_t88x); ++ break; ++ default: ++ hardware_counters = NULL; ++ count = 0; ++ dev_err(kbdev->dev, "Unrecognized product ID: %u\n", ++ product_id); ++ break; ++ } + } + -+ return release_result; -+} ++ /* Release the kbdev reference. */ ++ kbase_release_device(kbdev); + -+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, -+ struct kbase_context *kctx) -+{ -+ struct kbasep_js_atom_retained_state katom_retained_state; ++ *total_counters = count; + -+ /* Setup a dummy katom_retained_state */ -+ kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); ++ /* If we return a string array take a reference on the module (or fail). */ ++ if (hardware_counters && !try_module_get(THIS_MODULE)) ++ return NULL; + -+ kbasep_js_runpool_release_ctx_internal(kbdev, kctx, -+ &katom_retained_state); ++ return hardware_counters; +} ++KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names); + -+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx, bool has_pm_ref) ++void kbase_gator_hwcnt_term_names(void) +{ -+ struct kbasep_js_device_data *js_devdata; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ js_devdata = &kbdev->js_data; -+ -+ /* This is called if and only if you've you've detached the context from -+ * the Runpool Queue, and not added it back to the Runpool -+ */ -+ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); -+ -+ if (kbase_ctx_flag(kctx, KCTX_DYING)) { -+ /* Dying: don't requeue, but kill all jobs on the context. This -+ * happens asynchronously */ -+ dev_dbg(kbdev->dev, -+ "JS: ** Killing Context %p on RunPool Remove **", kctx); -+ kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel); -+ } ++ /* Release the module reference. */ ++ module_put(THIS_MODULE); +} ++KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names); + -+void kbasep_js_runpool_release_ctx_and_katom_retained_state( -+ struct kbase_device *kbdev, struct kbase_context *kctx, -+ struct kbasep_js_atom_retained_state *katom_retained_state) ++struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info) +{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ kbasep_js_release_result release_result; ++ struct kbase_gator_hwcnt_handles *hand; ++ struct kbase_uk_hwcnt_reader_setup setup; ++ uint32_t dump_size = 0, i = 0; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ js_kctx_info = &kctx->jctx.sched_info; -+ js_devdata = &kbdev->js_data; ++ if (!in_out_info) ++ return NULL; + -+ mutex_lock(&js_devdata->queue_mutex); -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_lock(&js_devdata->runpool_mutex); ++ hand = kzalloc(sizeof(*hand), GFP_KERNEL); ++ if (!hand) ++ return NULL; + -+ release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, -+ katom_retained_state); ++ INIT_WORK(&hand->dump_work, dump_worker); ++ spin_lock_init(&hand->dump_lock); + -+ /* Drop the runpool mutex to allow requeing kctx */ -+ mutex_unlock(&js_devdata->runpool_mutex); ++ /* Get the first device */ ++ hand->kbdev = kbase_find_device(-1); ++ if (!hand->kbdev) ++ goto free_hand; + -+ if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) -+ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true); ++ dump_size = kbase_vinstr_dump_size(hand->kbdev); ++ hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL); ++ if (!hand->vinstr_buffer) ++ goto release_device; ++ in_out_info->kernel_dump_buffer = hand->vinstr_buffer; + -+ /* Drop the jsctx_mutex to allow scheduling in a new context */ ++ in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores; ++ in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups; ++ in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id; + -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_unlock(&js_devdata->queue_mutex); ++ /* If we are using a v4 device (Mali-T6xx or Mali-T72x) */ ++ if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) { ++ uint32_t cg, j; ++ uint64_t core_mask; + -+ if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL) -+ kbase_js_sched_all(kbdev); -+} ++ /* There are 8 hardware counters blocks per core group */ ++ in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * ++ MALI_MAX_NUM_BLOCKS_PER_GROUP * ++ in_out_info->nr_core_groups, GFP_KERNEL); + -+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx) -+{ -+ struct kbasep_js_atom_retained_state katom_retained_state; ++ if (!in_out_info->hwc_layout) ++ goto free_vinstr_buffer; + -+ kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); ++ dump_size = in_out_info->nr_core_groups * ++ MALI_MAX_NUM_BLOCKS_PER_GROUP * ++ MALI_COUNTERS_PER_BLOCK * ++ MALI_BYTES_PER_COUNTER; + -+ kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, -+ &katom_retained_state); -+} ++ for (cg = 0; cg < in_out_info->nr_core_groups; cg++) { ++ core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask; + -+/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into -+ * kbase_js_sched_all() */ -+static void kbasep_js_runpool_release_ctx_no_schedule( -+ struct kbase_device *kbdev, struct kbase_context *kctx) -+{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ kbasep_js_release_result release_result; -+ struct kbasep_js_atom_retained_state katom_retained_state_struct; -+ struct kbasep_js_atom_retained_state *katom_retained_state = -+ &katom_retained_state_struct; ++ for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) { ++ if (core_mask & (1u << j)) ++ in_out_info->hwc_layout[i++] = SHADER_BLOCK; ++ else ++ in_out_info->hwc_layout[i++] = RESERVED_BLOCK; ++ } + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ js_kctx_info = &kctx->jctx.sched_info; -+ js_devdata = &kbdev->js_data; -+ kbasep_js_atom_retained_state_init_invalid(katom_retained_state); ++ in_out_info->hwc_layout[i++] = TILER_BLOCK; ++ in_out_info->hwc_layout[i++] = MMU_L2_BLOCK; + -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_lock(&js_devdata->runpool_mutex); ++ in_out_info->hwc_layout[i++] = RESERVED_BLOCK; + -+ release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, -+ katom_retained_state); ++ if (0 == cg) ++ in_out_info->hwc_layout[i++] = JM_BLOCK; ++ else ++ in_out_info->hwc_layout[i++] = RESERVED_BLOCK; ++ } ++ /* If we are using any other device */ ++ } else { ++ uint32_t nr_l2, nr_sc_bits, j; ++ uint64_t core_mask; + -+ /* Drop the runpool mutex to allow requeing kctx */ -+ mutex_unlock(&js_devdata->runpool_mutex); -+ if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) -+ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true); ++ nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices; + -+ /* Drop the jsctx_mutex to allow scheduling in a new context */ -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask; + -+ /* NOTE: could return release_result if the caller would like to know -+ * whether it should schedule a new context, but currently no callers do -+ */ -+} ++ nr_sc_bits = fls64(core_mask); + -+void kbase_js_set_timeouts(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ /* The job manager and tiler sets of counters ++ * are always present */ ++ in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL); + -+ kbase_backend_timeouts_changed(kbdev); -+} ++ if (!in_out_info->hwc_layout) ++ goto free_vinstr_buffer; + -+static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx) -+{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ struct kbase_as *new_address_space = NULL; -+ unsigned long flags; -+ bool kctx_suspended = false; -+ int as_nr; ++ dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; + -+ js_devdata = &kbdev->js_data; -+ js_kctx_info = &kctx->jctx.sched_info; ++ in_out_info->hwc_layout[i++] = JM_BLOCK; ++ in_out_info->hwc_layout[i++] = TILER_BLOCK; + -+ /* Pick available address space for this context */ -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ as_nr = kbase_ctx_sched_retain_ctx(kctx); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ if (as_nr == KBASEP_AS_NR_INVALID) { -+ as_nr = kbase_backend_find_and_release_free_address_space( -+ kbdev, kctx); -+ if (as_nr != KBASEP_AS_NR_INVALID) { -+ /* Attempt to retain the context again, this should -+ * succeed */ -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ as_nr = kbase_ctx_sched_retain_ctx(kctx); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); ++ for (j = 0; j < nr_l2; j++) ++ in_out_info->hwc_layout[i++] = MMU_L2_BLOCK; + -+ WARN_ON(as_nr == KBASEP_AS_NR_INVALID); ++ while (core_mask != 0ull) { ++ if ((core_mask & 1ull) != 0ull) ++ in_out_info->hwc_layout[i++] = SHADER_BLOCK; ++ else ++ in_out_info->hwc_layout[i++] = RESERVED_BLOCK; ++ core_mask >>= 1; + } + } -+ if (as_nr == KBASEP_AS_NR_INVALID) -+ return false; /* No address spaces currently available */ -+ -+ new_address_space = &kbdev->as[as_nr]; -+ -+ /* -+ * Atomic transaction on the Context and Run Pool begins -+ */ -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_lock(&js_devdata->runpool_mutex); -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ -+ /* Check to see if context is dying due to kbase_job_zap_context() */ -+ if (kbase_ctx_flag(kctx, KCTX_DYING)) { -+ /* Roll back the transaction so far and return */ -+ kbase_ctx_sched_release_ctx(kctx); -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ mutex_unlock(&js_devdata->runpool_mutex); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ -+ return false; -+ } -+ -+ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL, -+ 0u, -+ kbasep_js_trace_get_refcnt(kbdev, kctx)); -+ -+ kbase_ctx_flag_set(kctx, KCTX_SCHEDULED); -+ -+ /* Assign context to previously chosen address space */ -+ if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) { -+ /* Roll back the transaction so far and return */ -+ kbase_ctx_sched_release_ctx(kctx); -+ kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ mutex_unlock(&js_devdata->runpool_mutex); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ in_out_info->nr_hwc_blocks = i; ++ in_out_info->size = dump_size; + -+ return false; ++ setup.jm_bm = in_out_info->bitmask[0]; ++ setup.tiler_bm = in_out_info->bitmask[1]; ++ setup.shader_bm = in_out_info->bitmask[2]; ++ setup.mmu_l2_bm = in_out_info->bitmask[3]; ++ hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx, ++ &setup, hand->vinstr_buffer); ++ if (!hand->vinstr_cli) { ++ dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core"); ++ goto free_layout; + } + -+ kbdev->hwaccess.active_kctx = kctx; -+ -+#if defined(CONFIG_MALI_GATOR_SUPPORT) -+ kbase_trace_mali_mmu_as_in_use(kctx->as_nr); -+#endif -+ KBASE_TLSTREAM_TL_RET_AS_CTX(&kbdev->as[kctx->as_nr], kctx); -+ -+ /* Cause any future waiter-on-termination to wait until the context is -+ * descheduled */ -+ wake_up(&js_kctx_info->ctx.is_scheduled_wait); -+ -+ /* Re-check for suspending: a suspend could've occurred, and all the -+ * contexts could've been removed from the runpool before we took this -+ * lock. In this case, we don't want to allow this context to run jobs, -+ * we just want it out immediately. -+ * -+ * The DMB required to read the suspend flag was issued recently as part -+ * of the hwaccess_lock locking. If a suspend occurs *after* that lock -+ * was taken (i.e. this condition doesn't execute), then the -+ * kbasep_js_suspend() code will cleanup this context instead (by virtue -+ * of it being called strictly after the suspend flag is set, and will -+ * wait for this lock to drop) */ -+ if (kbase_pm_is_suspending(kbdev)) { -+ /* Cause it to leave at some later point */ -+ bool retained; -+ -+ retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); -+ KBASE_DEBUG_ASSERT(retained); -+ -+ kbasep_js_clear_submit_allowed(js_devdata, kctx); -+ kctx_suspended = true; -+ } ++ return hand; + -+ /* Transaction complete */ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kbdev->mmu_hw_mutex); ++free_layout: ++ kfree(in_out_info->hwc_layout); + -+ /* Synchronize with any timers */ -+ kbase_backend_ctx_count_changed(kbdev); ++free_vinstr_buffer: ++ kfree(hand->vinstr_buffer); + -+ mutex_unlock(&js_devdata->runpool_mutex); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ /* Note: after this point, the context could potentially get scheduled -+ * out immediately */ ++release_device: ++ kbase_release_device(hand->kbdev); + -+ if (kctx_suspended) { -+ /* Finishing forcing out the context due to a suspend. Use a -+ * variant of kbasep_js_runpool_release_ctx() that doesn't -+ * schedule a new context, to prevent a risk of recursion back -+ * into this function */ -+ kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx); -+ return false; -+ } -+ return true; ++free_hand: ++ kfree(hand); ++ return NULL; +} ++KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init); + -+static bool kbase_js_use_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx) ++void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles) +{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (in_out_info) ++ kfree(in_out_info->hwc_layout); + -+ if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && -+ kbase_backend_use_ctx_sched(kbdev, kctx)) { -+ /* Context already has ASID - mark as active */ -+ kbdev->hwaccess.active_kctx = kctx; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ return true; /* Context already scheduled */ ++ if (opaque_handles) { ++ cancel_work_sync(&opaque_handles->dump_work); ++ kbase_vinstr_detach_client(opaque_handles->vinstr_cli); ++ kfree(opaque_handles->vinstr_buffer); ++ kbase_release_device(opaque_handles->kbdev); ++ kfree(opaque_handles); + } -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ return kbasep_js_schedule_ctx(kbdev, kctx); +} ++KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term); + -+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx) ++static void dump_worker(struct work_struct *work) +{ -+ struct kbasep_js_kctx_info *js_kctx_info; -+ struct kbasep_js_device_data *js_devdata; -+ bool is_scheduled; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ -+ js_devdata = &kbdev->js_data; -+ js_kctx_info = &kctx->jctx.sched_info; -+ -+ /* This must never be attempted whilst suspending - i.e. it should only -+ * happen in response to a syscall from a user-space thread */ -+ BUG_ON(kbase_pm_is_suspending(kbdev)); -+ -+ mutex_lock(&js_devdata->queue_mutex); -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ -+ /* Mark the context as privileged */ -+ kbase_ctx_flag_set(kctx, KCTX_PRIVILEGED); -+ -+ is_scheduled = kbase_ctx_flag(kctx, KCTX_SCHEDULED); -+ if (!is_scheduled) { -+ /* Add the context to the pullable list */ -+ if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0)) -+ kbase_js_sync_timers(kbdev); -+ -+ /* Fast-starting requires the jsctx_mutex to be dropped, -+ * because it works on multiple ctxs */ -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_unlock(&js_devdata->queue_mutex); -+ -+ /* Try to schedule the context in */ -+ kbase_js_sched_all(kbdev); ++ struct kbase_gator_hwcnt_handles *hand; + -+ /* Wait for the context to be scheduled in */ -+ wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, -+ kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++ hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work); ++ if (!kbase_vinstr_hwc_dump(hand->vinstr_cli, ++ BASE_HWCNT_READER_EVENT_MANUAL)) { ++ spin_lock_bh(&hand->dump_lock); ++ hand->dump_complete = 1; ++ spin_unlock_bh(&hand->dump_lock); + } else { -+ /* Already scheduled in - We need to retain it to keep the -+ * corresponding address space */ -+ kbasep_js_runpool_retain_ctx(kbdev, kctx); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_unlock(&js_devdata->queue_mutex); ++ schedule_work(&hand->dump_work); + } +} -+KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx); -+ -+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, -+ struct kbase_context *kctx) -+{ -+ struct kbasep_js_kctx_info *js_kctx_info; -+ -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ js_kctx_info = &kctx->jctx.sched_info; -+ -+ /* We don't need to use the address space anymore */ -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ kbase_ctx_flag_clear(kctx, KCTX_PRIVILEGED); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ -+ /* Release the context - it will be scheduled out */ -+ kbasep_js_runpool_release_ctx(kbdev, kctx); -+ -+ kbase_js_sched_all(kbdev); -+} -+KBASE_EXPORT_TEST_API(kbasep_js_release_privileged_ctx); + -+void kbasep_js_suspend(struct kbase_device *kbdev) ++uint32_t kbase_gator_instr_hwcnt_dump_complete( ++ struct kbase_gator_hwcnt_handles *opaque_handles, ++ uint32_t * const success) +{ -+ unsigned long flags; -+ struct kbasep_js_device_data *js_devdata; -+ int i; -+ u16 retained = 0u; -+ int nr_privileged_ctx = 0; -+ -+ KBASE_DEBUG_ASSERT(kbdev); -+ KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev)); -+ js_devdata = &kbdev->js_data; -+ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ -+ /* Prevent all contexts from submitting */ -+ js_devdata->runpool_irq.submit_allowed = 0; -+ -+ /* Retain each of the contexts, so we can cause it to leave even if it -+ * had no refcount to begin with */ -+ for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) { -+ struct kbase_context *kctx = kbdev->as_to_kctx[i]; -+ -+ retained = retained << 1; -+ -+ if (kctx) { -+ kbase_ctx_sched_retain_ctx_refcount(kctx); -+ retained |= 1u; -+ /* We can only cope with up to 1 privileged context - -+ * the instrumented context. It'll be suspended by -+ * disabling instrumentation */ -+ if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) { -+ ++nr_privileged_ctx; -+ WARN_ON(nr_privileged_ctx != 1); -+ } -+ } -+ } -+ CSTD_UNUSED(nr_privileged_ctx); -+ -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ -+ /* De-ref the previous retain to ensure each context gets pulled out -+ * sometime later. */ -+ for (i = 0; -+ i < BASE_MAX_NR_AS; -+ ++i, retained = retained >> 1) { -+ struct kbase_context *kctx = kbdev->as_to_kctx[i]; + -+ if (retained & 1u) -+ kbasep_js_runpool_release_ctx(kbdev, kctx); ++ if (opaque_handles && success) { ++ *success = opaque_handles->dump_complete; ++ opaque_handles->dump_complete = 0; ++ return *success; + } -+ -+ /* Caller must wait for all Power Manager active references to be -+ * dropped */ ++ return 0; +} ++KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete); + -+void kbasep_js_resume(struct kbase_device *kbdev) ++uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles) +{ -+ struct kbasep_js_device_data *js_devdata; -+ int js; -+ -+ KBASE_DEBUG_ASSERT(kbdev); -+ js_devdata = &kbdev->js_data; -+ KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); -+ -+ mutex_lock(&js_devdata->queue_mutex); -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ struct kbase_context *kctx, *n; -+ -+ list_for_each_entry_safe(kctx, n, -+ &kbdev->js_data.ctx_list_unpullable[js], -+ jctx.sched_info.ctx.ctx_list_entry[js]) { -+ struct kbasep_js_kctx_info *js_kctx_info; -+ unsigned long flags; -+ bool timer_sync = false; ++ if (opaque_handles) ++ schedule_work(&opaque_handles->dump_work); ++ return 0; ++} ++KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq); +diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.h b/drivers/gpu/arm/midgard/mali_kbase_gator_api.h +new file mode 100644 +index 000000000..ef9ac0f7b +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.h +@@ -0,0 +1,219 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ js_kctx_info = &kctx->jctx.sched_info; + -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_lock(&js_devdata->runpool_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && -+ kbase_js_ctx_pullable(kctx, js, false)) -+ timer_sync = -+ kbase_js_ctx_list_add_pullable_nolock( -+ kbdev, kctx, js); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ if (timer_sync) -+ kbase_backend_ctx_count_changed(kbdev); -+ mutex_unlock(&js_devdata->runpool_mutex); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ } -+ } -+ mutex_unlock(&js_devdata->queue_mutex); ++#ifndef _KBASE_GATOR_API_H_ ++#define _KBASE_GATOR_API_H_ + -+ /* Restart atom processing */ -+ kbase_js_sched_all(kbdev); ++/** ++ * @brief This file describes the API used by Gator to fetch hardware counters. ++ */ + -+ /* JS Resume complete */ -+} ++/* This define is used by the gator kernel module compile to select which DDK ++ * API calling convention to use. If not defined (legacy DDK) gator assumes ++ * version 1. The version to DDK release mapping is: ++ * Version 1 API: DDK versions r1px, r2px ++ * Version 2 API: DDK versions r3px, r4px ++ * Version 3 API: DDK version r5p0 and newer ++ * ++ * API Usage ++ * ========= ++ * ++ * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter ++ * names for the GPU present in this device. ++ * ++ * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for ++ * the counters you want enabled. The enables can all be set for simplicity in ++ * most use cases, but disabling some will let you minimize bandwidth impact. ++ * ++ * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a ++ * counter context. On successful return the DDK will have populated the ++ * structure with a variety of useful information. ++ * ++ * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a ++ * counter dump. If this returns a non-zero value the request has been queued, ++ * otherwise the driver has been unable to do so (typically because of another ++ * user of the instrumentation exists concurrently). ++ * ++ * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the previously ++ * requested dump has been succesful. If this returns non-zero the counter dump ++ * has resolved, but the value of *success must also be tested as the dump ++ * may have not been successful. If it returns zero the counter dump was ++ * abandoned due to the device being busy (typically because of another ++ * user of the instrumentation exists concurrently). ++ * ++ * 6] Process the counters stored in the buffer pointed to by ... ++ * ++ * kbase_gator_hwcnt_info->kernel_dump_buffer ++ * ++ * In pseudo code you can find all of the counters via this approach: ++ * ++ * ++ * hwcnt_info # pointer to kbase_gator_hwcnt_info structure ++ * hwcnt_name # pointer to name list ++ * ++ * u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer ++ * ++ * # Iterate over each 64-counter block in this GPU configuration ++ * for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) { ++ * hwc_type type = hwcnt_info->hwc_layout[i]; ++ * ++ * # Skip reserved type blocks - they contain no counters at all ++ * if( type == RESERVED_BLOCK ) { ++ * continue; ++ * } ++ * ++ * size_t name_offset = type * 64; ++ * size_t data_offset = i * 64; ++ * ++ * # Iterate over the names of the counters in this block type ++ * for( j = 0; j < 64; j++) { ++ * const char * name = hwcnt_name[name_offset+j]; ++ * ++ * # Skip empty name strings - there is no counter here ++ * if( name[0] == '\0' ) { ++ * continue; ++ * } ++ * ++ * u32 data = hwcnt_data[data_offset+j]; ++ * ++ * printk( "COUNTER: %s DATA: %u\n", name, data ); ++ * } ++ * } ++ * ++ * ++ * Note that in most implementations you typically want to either SUM or ++ * AVERAGE multiple instances of the same counter if, for example, you have ++ * multiple shader cores or multiple L2 caches. The most sensible view for ++ * analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU ++ * counters. ++ * ++ * 7] Goto 4, repeating until you want to stop collecting counters. ++ * ++ * 8] Release the dump resources by calling kbase_gator_hwcnt_term(). ++ * ++ * 9] Release the name table resources by calling ++ * kbase_gator_hwcnt_term_names(). This function must only be called if ++ * init_names() returned a non-NULL value. ++ **/ + -+bool kbase_js_is_atom_valid(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom) -+{ -+ if ((katom->core_req & BASE_JD_REQ_FS) && -+ (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | -+ BASE_JD_REQ_T))) -+ return false; ++#define MALI_DDK_GATOR_API_VERSION 3 + -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987) && -+ (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) && -+ (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_T))) -+ return false; ++enum hwc_type { ++ JM_BLOCK = 0, ++ TILER_BLOCK, ++ SHADER_BLOCK, ++ MMU_L2_BLOCK, ++ RESERVED_BLOCK ++}; + -+ return true; -+} ++struct kbase_gator_hwcnt_info { ++ /* Passed from Gator to kbase */ + -+static int kbase_js_get_slot(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom) -+{ -+ if (katom->core_req & BASE_JD_REQ_FS) -+ return 0; ++ /* the bitmask of enabled hardware counters for each counter block */ ++ uint16_t bitmask[4]; + -+ if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { -+ if (katom->device_nr == 1 && -+ kbdev->gpu_props.num_core_groups == 2) -+ return 2; -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) -+ return 2; -+ } ++ /* Passed from kbase to Gator */ + -+ return 1; -+} ++ /* ptr to counter dump memory */ ++ void *kernel_dump_buffer; + -+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, -+ struct kbase_jd_atom *katom) -+{ -+ bool enqueue_required; ++ /* size of counter dump memory */ ++ uint32_t size; + -+ katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom); ++ /* the ID of the Mali device */ ++ uint32_t gpu_id; + -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ lockdep_assert_held(&kctx->jctx.lock); ++ /* the number of shader cores in the GPU */ ++ uint32_t nr_cores; + -+ /* If slot will transition from unpullable to pullable then add to -+ * pullable list */ -+ if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) { -+ enqueue_required = true; -+ } else { -+ enqueue_required = false; -+ } -+ if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) || -+ (katom->pre_dep && (katom->pre_dep->atom_flags & -+ KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { -+ int prio = katom->sched_priority; -+ int js = katom->slot_nr; -+ struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; ++ /* the number of core groups */ ++ uint32_t nr_core_groups; + -+ list_add_tail(&katom->queue, &queue->x_dep_head); -+ katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; -+ enqueue_required = false; -+ } else { -+ /* Check if there are lower priority jobs to soft stop */ -+ kbase_job_slot_ctx_priority_check_locked(kctx, katom); ++ /* the memory layout of the performance counters */ ++ enum hwc_type *hwc_layout; + -+ /* Add atom to ring buffer. */ -+ jsctx_tree_add(kctx, katom); -+ katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; -+ } ++ /* the total number of hardware couter blocks */ ++ uint32_t nr_hwc_blocks; ++}; + -+ return enqueue_required; -+} ++/** ++ * @brief Opaque block of Mali data which Gator needs to return to the API later. ++ */ ++struct kbase_gator_hwcnt_handles; + +/** -+ * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the -+ * runnable_tree, ready for execution -+ * @katom: Atom to submit ++ * @brief Initialize the resources Gator needs for performance profiling. + * -+ * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set, -+ * but is still present in the x_dep list. If @katom has a same-slot dependent -+ * atom then that atom (and any dependents) will also be moved. ++ * @param in_out_info A pointer to a structure containing the enabled counters passed from Gator and all the Mali ++ * specific information that will be returned to Gator. On entry Gator must have populated the ++ * 'bitmask' field with the counters it wishes to enable for each class of counter block. ++ * Each entry in the array corresponds to a single counter class based on the "hwc_type" ++ * enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables ++ * the first 4 counters in the block, and so on). See the GPU counter array as returned by ++ * kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU. ++ * ++ * @return Pointer to an opaque handle block on success, NULL on error. + */ -+static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) -+{ -+ lockdep_assert_held(&katom->kctx->kbdev->hwaccess_lock); -+ -+ while (katom) { -+ WARN_ON(!(katom->atom_flags & -+ KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); -+ -+ if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { -+ list_del(&katom->queue); -+ katom->atom_flags &= -+ ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; -+ jsctx_tree_add(katom->kctx, katom); -+ katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; -+ } else { -+ break; -+ } -+ -+ katom = katom->post_dep; -+ } -+} -+ ++extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info); + +/** -+ * kbase_js_evict_deps - Evict dependencies of a failed atom. -+ * @kctx: Context pointer -+ * @katom: Pointer to the atom that has failed. -+ * @js: The job slot the katom was run on. -+ * @prio: Priority of the katom. ++ * @brief Free all resources once Gator has finished using performance counters. + * -+ * Remove all post dependencies of an atom from the context ringbuffers. ++ * @param in_out_info A pointer to a structure containing the enabled counters passed from Gator and all the ++ * Mali specific information that will be returned to Gator. ++ * @param opaque_handles A wrapper structure for kbase structures. ++ */ ++extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles); ++ ++/** ++ * @brief Poll whether a counter dump is successful. + * -+ * The original atom's event_code will be propogated to all dependent atoms. ++ * @param opaque_handles A wrapper structure for kbase structures. ++ * @param[out] success Non-zero on success, zero on failure. + * -+ * Context: Caller must hold the HW access lock ++ * @return Zero if the dump is still pending, non-zero if the dump has completed. Note that a ++ * completed dump may not have dumped succesfully, so the caller must test for both ++ * a completed and successful dump before processing counters. + */ -+static void kbase_js_evict_deps(struct kbase_context *kctx, -+ struct kbase_jd_atom *katom, int js, int prio) -+{ -+ struct kbase_jd_atom *x_dep = katom->x_post_dep; -+ struct kbase_jd_atom *next_katom = katom->post_dep; -+ -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success); + -+ if (next_katom) { -+ KBASE_DEBUG_ASSERT(next_katom->status != -+ KBASE_JD_ATOM_STATE_HW_COMPLETED); -+ next_katom->will_fail_event_code = katom->event_code; ++/** ++ * @brief Request the generation of a new counter dump. ++ * ++ * @param opaque_handles A wrapper structure for kbase structures. ++ * ++ * @return Zero if the hardware device is busy and cannot handle the request, non-zero otherwise. ++ */ ++extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles); + -+ } ++/** ++ * @brief This function is used to fetch the names table based on the Mali device in use. ++ * ++ * @param[out] total_counters The total number of counters short names in the Mali devices' list. ++ * ++ * @return Pointer to an array of strings of length *total_counters. ++ */ ++extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters); + -+ /* Has cross slot depenency. */ -+ if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE | -+ KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { -+ /* Remove dependency.*/ -+ x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; ++/** ++ * @brief This function is used to terminate the use of the names table. ++ * ++ * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value. ++ */ ++extern void kbase_gator_hwcnt_term_names(void); + -+ /* Fail if it had a data dependency. */ -+ if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) { -+ x_dep->will_fail_event_code = katom->event_code; -+ } -+ if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) -+ kbase_js_move_to_tree(x_dep); -+ } -+} ++#endif +diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h +new file mode 100644 +index 000000000..cad19b662 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h +@@ -0,0 +1,2170 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) -+{ -+ struct kbase_jd_atom *katom; -+ struct kbasep_js_device_data *js_devdata; -+ struct kbase_device *kbdev; -+ int pulled; + -+ KBASE_DEBUG_ASSERT(kctx); + -+ kbdev = kctx->kbdev; ++#ifndef _KBASE_GATOR_HWCNT_NAMES_H_ ++#define _KBASE_GATOR_HWCNT_NAMES_H_ + -+ js_devdata = &kbdev->js_data; -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++/* ++ * "Short names" for hardware counters used by Streamline. Counters names are ++ * stored in accordance with their memory layout in the binary counter block ++ * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block ++ * of 64 counters, and each GPU implements the same set of "masters" although ++ * the counters each master exposes within its block of 64 may vary. ++ * ++ * Counters which are an empty string are simply "holes" in the counter memory ++ * where no counter exists. ++ */ + -+ if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) -+ return NULL; -+ if (kbase_pm_is_suspending(kbdev)) -+ return NULL; ++static const char * const hardware_counters_mali_t60x[] = { ++ /* Job Manager */ ++ "", ++ "", ++ "", ++ "", ++ "T60x_MESSAGES_SENT", ++ "T60x_MESSAGES_RECEIVED", ++ "T60x_GPU_ACTIVE", ++ "T60x_IRQ_ACTIVE", ++ "T60x_JS0_JOBS", ++ "T60x_JS0_TASKS", ++ "T60x_JS0_ACTIVE", ++ "", ++ "T60x_JS0_WAIT_READ", ++ "T60x_JS0_WAIT_ISSUE", ++ "T60x_JS0_WAIT_DEPEND", ++ "T60x_JS0_WAIT_FINISH", ++ "T60x_JS1_JOBS", ++ "T60x_JS1_TASKS", ++ "T60x_JS1_ACTIVE", ++ "", ++ "T60x_JS1_WAIT_READ", ++ "T60x_JS1_WAIT_ISSUE", ++ "T60x_JS1_WAIT_DEPEND", ++ "T60x_JS1_WAIT_FINISH", ++ "T60x_JS2_JOBS", ++ "T60x_JS2_TASKS", ++ "T60x_JS2_ACTIVE", ++ "", ++ "T60x_JS2_WAIT_READ", ++ "T60x_JS2_WAIT_ISSUE", ++ "T60x_JS2_WAIT_DEPEND", ++ "T60x_JS2_WAIT_FINISH", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", + -+ katom = jsctx_rb_peek(kctx, js); -+ if (!katom) -+ return NULL; -+ if (kctx->blocked_js[js][katom->sched_priority]) -+ return NULL; -+ if (atomic_read(&katom->blocked)) -+ return NULL; ++ /*Tiler */ ++ "", ++ "", ++ "", ++ "T60x_TI_JOBS_PROCESSED", ++ "T60x_TI_TRIANGLES", ++ "T60x_TI_QUADS", ++ "T60x_TI_POLYGONS", ++ "T60x_TI_POINTS", ++ "T60x_TI_LINES", ++ "T60x_TI_VCACHE_HIT", ++ "T60x_TI_VCACHE_MISS", ++ "T60x_TI_FRONT_FACING", ++ "T60x_TI_BACK_FACING", ++ "T60x_TI_PRIM_VISIBLE", ++ "T60x_TI_PRIM_CULLED", ++ "T60x_TI_PRIM_CLIPPED", ++ "T60x_TI_LEVEL0", ++ "T60x_TI_LEVEL1", ++ "T60x_TI_LEVEL2", ++ "T60x_TI_LEVEL3", ++ "T60x_TI_LEVEL4", ++ "T60x_TI_LEVEL5", ++ "T60x_TI_LEVEL6", ++ "T60x_TI_LEVEL7", ++ "T60x_TI_COMMAND_1", ++ "T60x_TI_COMMAND_2", ++ "T60x_TI_COMMAND_3", ++ "T60x_TI_COMMAND_4", ++ "T60x_TI_COMMAND_4_7", ++ "T60x_TI_COMMAND_8_15", ++ "T60x_TI_COMMAND_16_63", ++ "T60x_TI_COMMAND_64", ++ "T60x_TI_COMPRESS_IN", ++ "T60x_TI_COMPRESS_OUT", ++ "T60x_TI_COMPRESS_FLUSH", ++ "T60x_TI_TIMESTAMPS", ++ "T60x_TI_PCACHE_HIT", ++ "T60x_TI_PCACHE_MISS", ++ "T60x_TI_PCACHE_LINE", ++ "T60x_TI_PCACHE_STALL", ++ "T60x_TI_WRBUF_HIT", ++ "T60x_TI_WRBUF_MISS", ++ "T60x_TI_WRBUF_LINE", ++ "T60x_TI_WRBUF_PARTIAL", ++ "T60x_TI_WRBUF_STALL", ++ "T60x_TI_ACTIVE", ++ "T60x_TI_LOADING_DESC", ++ "T60x_TI_INDEX_WAIT", ++ "T60x_TI_INDEX_RANGE_WAIT", ++ "T60x_TI_VERTEX_WAIT", ++ "T60x_TI_PCACHE_WAIT", ++ "T60x_TI_WRBUF_WAIT", ++ "T60x_TI_BUS_READ", ++ "T60x_TI_BUS_WRITE", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "T60x_TI_UTLB_STALL", ++ "T60x_TI_UTLB_REPLAY_MISS", ++ "T60x_TI_UTLB_REPLAY_FULL", ++ "T60x_TI_UTLB_NEW_MISS", ++ "T60x_TI_UTLB_HIT", + -+ /* Due to ordering restrictions when unpulling atoms on failure, we do -+ * not allow multiple runs of fail-dep atoms from the same context to be -+ * present on the same slot */ -+ if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) { -+ struct kbase_jd_atom *prev_atom = -+ kbase_backend_inspect_tail(kbdev, js); ++ /* Shader Core */ ++ "", ++ "", ++ "", ++ "", ++ "T60x_FRAG_ACTIVE", ++ "T60x_FRAG_PRIMITIVES", ++ "T60x_FRAG_PRIMITIVES_DROPPED", ++ "T60x_FRAG_CYCLES_DESC", ++ "T60x_FRAG_CYCLES_PLR", ++ "T60x_FRAG_CYCLES_VERT", ++ "T60x_FRAG_CYCLES_TRISETUP", ++ "T60x_FRAG_CYCLES_RAST", ++ "T60x_FRAG_THREADS", ++ "T60x_FRAG_DUMMY_THREADS", ++ "T60x_FRAG_QUADS_RAST", ++ "T60x_FRAG_QUADS_EZS_TEST", ++ "T60x_FRAG_QUADS_EZS_KILLED", ++ "T60x_FRAG_THREADS_LZS_TEST", ++ "T60x_FRAG_THREADS_LZS_KILLED", ++ "T60x_FRAG_CYCLES_NO_TILE", ++ "T60x_FRAG_NUM_TILES", ++ "T60x_FRAG_TRANS_ELIM", ++ "T60x_COMPUTE_ACTIVE", ++ "T60x_COMPUTE_TASKS", ++ "T60x_COMPUTE_THREADS", ++ "T60x_COMPUTE_CYCLES_DESC", ++ "T60x_TRIPIPE_ACTIVE", ++ "T60x_ARITH_WORDS", ++ "T60x_ARITH_CYCLES_REG", ++ "T60x_ARITH_CYCLES_L0", ++ "T60x_ARITH_FRAG_DEPEND", ++ "T60x_LS_WORDS", ++ "T60x_LS_ISSUES", ++ "T60x_LS_RESTARTS", ++ "T60x_LS_REISSUES_MISS", ++ "T60x_LS_REISSUES_VD", ++ "T60x_LS_REISSUE_ATTRIB_MISS", ++ "T60x_LS_NO_WB", ++ "T60x_TEX_WORDS", ++ "T60x_TEX_BUBBLES", ++ "T60x_TEX_WORDS_L0", ++ "T60x_TEX_WORDS_DESC", ++ "T60x_TEX_ISSUES", ++ "T60x_TEX_RECIRC_FMISS", ++ "T60x_TEX_RECIRC_DESC", ++ "T60x_TEX_RECIRC_MULTI", ++ "T60x_TEX_RECIRC_PMISS", ++ "T60x_TEX_RECIRC_CONF", ++ "T60x_LSC_READ_HITS", ++ "T60x_LSC_READ_MISSES", ++ "T60x_LSC_WRITE_HITS", ++ "T60x_LSC_WRITE_MISSES", ++ "T60x_LSC_ATOMIC_HITS", ++ "T60x_LSC_ATOMIC_MISSES", ++ "T60x_LSC_LINE_FETCHES", ++ "T60x_LSC_DIRTY_LINE", ++ "T60x_LSC_SNOOPS", ++ "T60x_AXI_TLB_STALL", ++ "T60x_AXI_TLB_MISS", ++ "T60x_AXI_TLB_TRANSACTION", ++ "T60x_LS_TLB_MISS", ++ "T60x_LS_TLB_HIT", ++ "T60x_AXI_BEATS_READ", ++ "T60x_AXI_BEATS_WRITTEN", + -+ if (prev_atom && prev_atom->kctx != kctx) -+ return NULL; -+ } ++ /*L2 and MMU */ ++ "", ++ "", ++ "", ++ "", ++ "T60x_MMU_HIT", ++ "T60x_MMU_NEW_MISS", ++ "T60x_MMU_REPLAY_FULL", ++ "T60x_MMU_REPLAY_MISS", ++ "T60x_MMU_TABLE_WALK", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "T60x_UTLB_HIT", ++ "T60x_UTLB_NEW_MISS", ++ "T60x_UTLB_REPLAY_FULL", ++ "T60x_UTLB_REPLAY_MISS", ++ "T60x_UTLB_STALL", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "T60x_L2_EXT_WRITE_BEATS", ++ "T60x_L2_EXT_READ_BEATS", ++ "T60x_L2_ANY_LOOKUP", ++ "T60x_L2_READ_LOOKUP", ++ "T60x_L2_SREAD_LOOKUP", ++ "T60x_L2_READ_REPLAY", ++ "T60x_L2_READ_SNOOP", ++ "T60x_L2_READ_HIT", ++ "T60x_L2_CLEAN_MISS", ++ "T60x_L2_WRITE_LOOKUP", ++ "T60x_L2_SWRITE_LOOKUP", ++ "T60x_L2_WRITE_REPLAY", ++ "T60x_L2_WRITE_SNOOP", ++ "T60x_L2_WRITE_HIT", ++ "T60x_L2_EXT_READ_FULL", ++ "T60x_L2_EXT_READ_HALF", ++ "T60x_L2_EXT_WRITE_FULL", ++ "T60x_L2_EXT_WRITE_HALF", ++ "T60x_L2_EXT_READ", ++ "T60x_L2_EXT_READ_LINE", ++ "T60x_L2_EXT_WRITE", ++ "T60x_L2_EXT_WRITE_LINE", ++ "T60x_L2_EXT_WRITE_SMALL", ++ "T60x_L2_EXT_BARRIER", ++ "T60x_L2_EXT_AR_STALL", ++ "T60x_L2_EXT_R_BUF_FULL", ++ "T60x_L2_EXT_RD_BUF_FULL", ++ "T60x_L2_EXT_R_RAW", ++ "T60x_L2_EXT_W_STALL", ++ "T60x_L2_EXT_W_BUF_FULL", ++ "T60x_L2_EXT_R_W_HAZARD", ++ "T60x_L2_TAG_HAZARD", ++ "T60x_L2_SNOOP_FULL", ++ "T60x_L2_REPLAY_FULL" ++}; ++static const char * const hardware_counters_mali_t62x[] = { ++ /* Job Manager */ ++ "", ++ "", ++ "", ++ "", ++ "T62x_MESSAGES_SENT", ++ "T62x_MESSAGES_RECEIVED", ++ "T62x_GPU_ACTIVE", ++ "T62x_IRQ_ACTIVE", ++ "T62x_JS0_JOBS", ++ "T62x_JS0_TASKS", ++ "T62x_JS0_ACTIVE", ++ "", ++ "T62x_JS0_WAIT_READ", ++ "T62x_JS0_WAIT_ISSUE", ++ "T62x_JS0_WAIT_DEPEND", ++ "T62x_JS0_WAIT_FINISH", ++ "T62x_JS1_JOBS", ++ "T62x_JS1_TASKS", ++ "T62x_JS1_ACTIVE", ++ "", ++ "T62x_JS1_WAIT_READ", ++ "T62x_JS1_WAIT_ISSUE", ++ "T62x_JS1_WAIT_DEPEND", ++ "T62x_JS1_WAIT_FINISH", ++ "T62x_JS2_JOBS", ++ "T62x_JS2_TASKS", ++ "T62x_JS2_ACTIVE", ++ "", ++ "T62x_JS2_WAIT_READ", ++ "T62x_JS2_WAIT_ISSUE", ++ "T62x_JS2_WAIT_DEPEND", ++ "T62x_JS2_WAIT_FINISH", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", + -+ if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { -+ if (katom->x_pre_dep->gpu_rb_state == -+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || -+ katom->x_pre_dep->will_fail_event_code) -+ return NULL; -+ if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && -+ kbase_backend_nr_atoms_on_slot(kbdev, js)) -+ return NULL; -+ } -+ -+ kbase_ctx_flag_set(kctx, KCTX_PULLED); ++ /*Tiler */ ++ "", ++ "", ++ "", ++ "T62x_TI_JOBS_PROCESSED", ++ "T62x_TI_TRIANGLES", ++ "T62x_TI_QUADS", ++ "T62x_TI_POLYGONS", ++ "T62x_TI_POINTS", ++ "T62x_TI_LINES", ++ "T62x_TI_VCACHE_HIT", ++ "T62x_TI_VCACHE_MISS", ++ "T62x_TI_FRONT_FACING", ++ "T62x_TI_BACK_FACING", ++ "T62x_TI_PRIM_VISIBLE", ++ "T62x_TI_PRIM_CULLED", ++ "T62x_TI_PRIM_CLIPPED", ++ "T62x_TI_LEVEL0", ++ "T62x_TI_LEVEL1", ++ "T62x_TI_LEVEL2", ++ "T62x_TI_LEVEL3", ++ "T62x_TI_LEVEL4", ++ "T62x_TI_LEVEL5", ++ "T62x_TI_LEVEL6", ++ "T62x_TI_LEVEL7", ++ "T62x_TI_COMMAND_1", ++ "T62x_TI_COMMAND_2", ++ "T62x_TI_COMMAND_3", ++ "T62x_TI_COMMAND_4", ++ "T62x_TI_COMMAND_5_7", ++ "T62x_TI_COMMAND_8_15", ++ "T62x_TI_COMMAND_16_63", ++ "T62x_TI_COMMAND_64", ++ "T62x_TI_COMPRESS_IN", ++ "T62x_TI_COMPRESS_OUT", ++ "T62x_TI_COMPRESS_FLUSH", ++ "T62x_TI_TIMESTAMPS", ++ "T62x_TI_PCACHE_HIT", ++ "T62x_TI_PCACHE_MISS", ++ "T62x_TI_PCACHE_LINE", ++ "T62x_TI_PCACHE_STALL", ++ "T62x_TI_WRBUF_HIT", ++ "T62x_TI_WRBUF_MISS", ++ "T62x_TI_WRBUF_LINE", ++ "T62x_TI_WRBUF_PARTIAL", ++ "T62x_TI_WRBUF_STALL", ++ "T62x_TI_ACTIVE", ++ "T62x_TI_LOADING_DESC", ++ "T62x_TI_INDEX_WAIT", ++ "T62x_TI_INDEX_RANGE_WAIT", ++ "T62x_TI_VERTEX_WAIT", ++ "T62x_TI_PCACHE_WAIT", ++ "T62x_TI_WRBUF_WAIT", ++ "T62x_TI_BUS_READ", ++ "T62x_TI_BUS_WRITE", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "T62x_TI_UTLB_STALL", ++ "T62x_TI_UTLB_REPLAY_MISS", ++ "T62x_TI_UTLB_REPLAY_FULL", ++ "T62x_TI_UTLB_NEW_MISS", ++ "T62x_TI_UTLB_HIT", + -+ pulled = atomic_inc_return(&kctx->atoms_pulled); -+ if (pulled == 1 && !kctx->slots_pullable) { -+ WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); -+ kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); -+ atomic_inc(&kbdev->js_data.nr_contexts_runnable); -+ } -+ atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]); -+ kctx->atoms_pulled_slot_pri[katom->slot_nr][katom->sched_priority]++; -+ jsctx_rb_pull(kctx, katom); ++ /* Shader Core */ ++ "", ++ "", ++ "", ++ "T62x_SHADER_CORE_ACTIVE", ++ "T62x_FRAG_ACTIVE", ++ "T62x_FRAG_PRIMITIVES", ++ "T62x_FRAG_PRIMITIVES_DROPPED", ++ "T62x_FRAG_CYCLES_DESC", ++ "T62x_FRAG_CYCLES_FPKQ_ACTIVE", ++ "T62x_FRAG_CYCLES_VERT", ++ "T62x_FRAG_CYCLES_TRISETUP", ++ "T62x_FRAG_CYCLES_EZS_ACTIVE", ++ "T62x_FRAG_THREADS", ++ "T62x_FRAG_DUMMY_THREADS", ++ "T62x_FRAG_QUADS_RAST", ++ "T62x_FRAG_QUADS_EZS_TEST", ++ "T62x_FRAG_QUADS_EZS_KILLED", ++ "T62x_FRAG_THREADS_LZS_TEST", ++ "T62x_FRAG_THREADS_LZS_KILLED", ++ "T62x_FRAG_CYCLES_NO_TILE", ++ "T62x_FRAG_NUM_TILES", ++ "T62x_FRAG_TRANS_ELIM", ++ "T62x_COMPUTE_ACTIVE", ++ "T62x_COMPUTE_TASKS", ++ "T62x_COMPUTE_THREADS", ++ "T62x_COMPUTE_CYCLES_DESC", ++ "T62x_TRIPIPE_ACTIVE", ++ "T62x_ARITH_WORDS", ++ "T62x_ARITH_CYCLES_REG", ++ "T62x_ARITH_CYCLES_L0", ++ "T62x_ARITH_FRAG_DEPEND", ++ "T62x_LS_WORDS", ++ "T62x_LS_ISSUES", ++ "T62x_LS_RESTARTS", ++ "T62x_LS_REISSUES_MISS", ++ "T62x_LS_REISSUES_VD", ++ "T62x_LS_REISSUE_ATTRIB_MISS", ++ "T62x_LS_NO_WB", ++ "T62x_TEX_WORDS", ++ "T62x_TEX_BUBBLES", ++ "T62x_TEX_WORDS_L0", ++ "T62x_TEX_WORDS_DESC", ++ "T62x_TEX_ISSUES", ++ "T62x_TEX_RECIRC_FMISS", ++ "T62x_TEX_RECIRC_DESC", ++ "T62x_TEX_RECIRC_MULTI", ++ "T62x_TEX_RECIRC_PMISS", ++ "T62x_TEX_RECIRC_CONF", ++ "T62x_LSC_READ_HITS", ++ "T62x_LSC_READ_MISSES", ++ "T62x_LSC_WRITE_HITS", ++ "T62x_LSC_WRITE_MISSES", ++ "T62x_LSC_ATOMIC_HITS", ++ "T62x_LSC_ATOMIC_MISSES", ++ "T62x_LSC_LINE_FETCHES", ++ "T62x_LSC_DIRTY_LINE", ++ "T62x_LSC_SNOOPS", ++ "T62x_AXI_TLB_STALL", ++ "T62x_AXI_TLB_MISS", ++ "T62x_AXI_TLB_TRANSACTION", ++ "T62x_LS_TLB_MISS", ++ "T62x_LS_TLB_HIT", ++ "T62x_AXI_BEATS_READ", ++ "T62x_AXI_BEATS_WRITTEN", + -+ kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); ++ /*L2 and MMU */ ++ "", ++ "", ++ "", ++ "", ++ "T62x_MMU_HIT", ++ "T62x_MMU_NEW_MISS", ++ "T62x_MMU_REPLAY_FULL", ++ "T62x_MMU_REPLAY_MISS", ++ "T62x_MMU_TABLE_WALK", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "T62x_UTLB_HIT", ++ "T62x_UTLB_NEW_MISS", ++ "T62x_UTLB_REPLAY_FULL", ++ "T62x_UTLB_REPLAY_MISS", ++ "T62x_UTLB_STALL", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "T62x_L2_EXT_WRITE_BEATS", ++ "T62x_L2_EXT_READ_BEATS", ++ "T62x_L2_ANY_LOOKUP", ++ "T62x_L2_READ_LOOKUP", ++ "T62x_L2_SREAD_LOOKUP", ++ "T62x_L2_READ_REPLAY", ++ "T62x_L2_READ_SNOOP", ++ "T62x_L2_READ_HIT", ++ "T62x_L2_CLEAN_MISS", ++ "T62x_L2_WRITE_LOOKUP", ++ "T62x_L2_SWRITE_LOOKUP", ++ "T62x_L2_WRITE_REPLAY", ++ "T62x_L2_WRITE_SNOOP", ++ "T62x_L2_WRITE_HIT", ++ "T62x_L2_EXT_READ_FULL", ++ "T62x_L2_EXT_READ_HALF", ++ "T62x_L2_EXT_WRITE_FULL", ++ "T62x_L2_EXT_WRITE_HALF", ++ "T62x_L2_EXT_READ", ++ "T62x_L2_EXT_READ_LINE", ++ "T62x_L2_EXT_WRITE", ++ "T62x_L2_EXT_WRITE_LINE", ++ "T62x_L2_EXT_WRITE_SMALL", ++ "T62x_L2_EXT_BARRIER", ++ "T62x_L2_EXT_AR_STALL", ++ "T62x_L2_EXT_R_BUF_FULL", ++ "T62x_L2_EXT_RD_BUF_FULL", ++ "T62x_L2_EXT_R_RAW", ++ "T62x_L2_EXT_W_STALL", ++ "T62x_L2_EXT_W_BUF_FULL", ++ "T62x_L2_EXT_R_W_HAZARD", ++ "T62x_L2_TAG_HAZARD", ++ "T62x_L2_SNOOP_FULL", ++ "T62x_L2_REPLAY_FULL" ++}; + -+ katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF; ++static const char * const hardware_counters_mali_t72x[] = { ++ /* Job Manager */ ++ "", ++ "", ++ "", ++ "", ++ "T72x_GPU_ACTIVE", ++ "T72x_IRQ_ACTIVE", ++ "T72x_JS0_JOBS", ++ "T72x_JS0_TASKS", ++ "T72x_JS0_ACTIVE", ++ "T72x_JS1_JOBS", ++ "T72x_JS1_TASKS", ++ "T72x_JS1_ACTIVE", ++ "T72x_JS2_JOBS", ++ "T72x_JS2_TASKS", ++ "T72x_JS2_ACTIVE", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", + -+ katom->ticks = 0; ++ /*Tiler */ ++ "", ++ "", ++ "", ++ "T72x_TI_JOBS_PROCESSED", ++ "T72x_TI_TRIANGLES", ++ "T72x_TI_QUADS", ++ "T72x_TI_POLYGONS", ++ "T72x_TI_POINTS", ++ "T72x_TI_LINES", ++ "T72x_TI_FRONT_FACING", ++ "T72x_TI_BACK_FACING", ++ "T72x_TI_PRIM_VISIBLE", ++ "T72x_TI_PRIM_CULLED", ++ "T72x_TI_PRIM_CLIPPED", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "T72x_TI_ACTIVE", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", + -+ return katom; -+} ++ /* Shader Core */ ++ "", ++ "", ++ "", ++ "", ++ "T72x_FRAG_ACTIVE", ++ "T72x_FRAG_PRIMITIVES", ++ "T72x_FRAG_PRIMITIVES_DROPPED", ++ "T72x_FRAG_THREADS", ++ "T72x_FRAG_DUMMY_THREADS", ++ "T72x_FRAG_QUADS_RAST", ++ "T72x_FRAG_QUADS_EZS_TEST", ++ "T72x_FRAG_QUADS_EZS_KILLED", ++ "T72x_FRAG_THREADS_LZS_TEST", ++ "T72x_FRAG_THREADS_LZS_KILLED", ++ "T72x_FRAG_CYCLES_NO_TILE", ++ "T72x_FRAG_NUM_TILES", ++ "T72x_FRAG_TRANS_ELIM", ++ "T72x_COMPUTE_ACTIVE", ++ "T72x_COMPUTE_TASKS", ++ "T72x_COMPUTE_THREADS", ++ "T72x_TRIPIPE_ACTIVE", ++ "T72x_ARITH_WORDS", ++ "T72x_ARITH_CYCLES_REG", ++ "T72x_LS_WORDS", ++ "T72x_LS_ISSUES", ++ "T72x_LS_RESTARTS", ++ "T72x_LS_REISSUES_MISS", ++ "T72x_TEX_WORDS", ++ "T72x_TEX_BUBBLES", ++ "T72x_TEX_ISSUES", ++ "T72x_LSC_READ_HITS", ++ "T72x_LSC_READ_MISSES", ++ "T72x_LSC_WRITE_HITS", ++ "T72x_LSC_WRITE_MISSES", ++ "T72x_LSC_ATOMIC_HITS", ++ "T72x_LSC_ATOMIC_MISSES", ++ "T72x_LSC_LINE_FETCHES", ++ "T72x_LSC_DIRTY_LINE", ++ "T72x_LSC_SNOOPS", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", + ++ /*L2 and MMU */ ++ "", ++ "", ++ "", ++ "", ++ "T72x_L2_EXT_WRITE_BEAT", ++ "T72x_L2_EXT_READ_BEAT", ++ "T72x_L2_READ_SNOOP", ++ "T72x_L2_READ_HIT", ++ "T72x_L2_WRITE_SNOOP", ++ "T72x_L2_WRITE_HIT", ++ "T72x_L2_EXT_WRITE_SMALL", ++ "T72x_L2_EXT_BARRIER", ++ "T72x_L2_EXT_AR_STALL", ++ "T72x_L2_EXT_W_STALL", ++ "T72x_L2_SNOOP_FULL", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "" ++}; + -+static void js_return_worker(struct work_struct *data) -+{ -+ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, -+ work); -+ struct kbase_context *kctx = katom->kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; -+ struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; -+ struct kbasep_js_atom_retained_state retained_state; -+ int js = katom->slot_nr; -+ int prio = katom->sched_priority; -+ bool timer_sync = false; -+ bool context_idle = false; -+ unsigned long flags; -+ base_jd_core_req core_req = katom->core_req; -+ u64 affinity = katom->affinity; -+ enum kbase_atom_coreref_state coreref_state = katom->coreref_state; ++static const char * const hardware_counters_mali_t76x[] = { ++ /* Job Manager */ ++ "", ++ "", ++ "", ++ "", ++ "T76x_MESSAGES_SENT", ++ "T76x_MESSAGES_RECEIVED", ++ "T76x_GPU_ACTIVE", ++ "T76x_IRQ_ACTIVE", ++ "T76x_JS0_JOBS", ++ "T76x_JS0_TASKS", ++ "T76x_JS0_ACTIVE", ++ "", ++ "T76x_JS0_WAIT_READ", ++ "T76x_JS0_WAIT_ISSUE", ++ "T76x_JS0_WAIT_DEPEND", ++ "T76x_JS0_WAIT_FINISH", ++ "T76x_JS1_JOBS", ++ "T76x_JS1_TASKS", ++ "T76x_JS1_ACTIVE", ++ "", ++ "T76x_JS1_WAIT_READ", ++ "T76x_JS1_WAIT_ISSUE", ++ "T76x_JS1_WAIT_DEPEND", ++ "T76x_JS1_WAIT_FINISH", ++ "T76x_JS2_JOBS", ++ "T76x_JS2_TASKS", ++ "T76x_JS2_ACTIVE", ++ "", ++ "T76x_JS2_WAIT_READ", ++ "T76x_JS2_WAIT_ISSUE", ++ "T76x_JS2_WAIT_DEPEND", ++ "T76x_JS2_WAIT_FINISH", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", + -+ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(katom); ++ /*Tiler */ ++ "", ++ "", ++ "", ++ "T76x_TI_JOBS_PROCESSED", ++ "T76x_TI_TRIANGLES", ++ "T76x_TI_QUADS", ++ "T76x_TI_POLYGONS", ++ "T76x_TI_POINTS", ++ "T76x_TI_LINES", ++ "T76x_TI_VCACHE_HIT", ++ "T76x_TI_VCACHE_MISS", ++ "T76x_TI_FRONT_FACING", ++ "T76x_TI_BACK_FACING", ++ "T76x_TI_PRIM_VISIBLE", ++ "T76x_TI_PRIM_CULLED", ++ "T76x_TI_PRIM_CLIPPED", ++ "T76x_TI_LEVEL0", ++ "T76x_TI_LEVEL1", ++ "T76x_TI_LEVEL2", ++ "T76x_TI_LEVEL3", ++ "T76x_TI_LEVEL4", ++ "T76x_TI_LEVEL5", ++ "T76x_TI_LEVEL6", ++ "T76x_TI_LEVEL7", ++ "T76x_TI_COMMAND_1", ++ "T76x_TI_COMMAND_2", ++ "T76x_TI_COMMAND_3", ++ "T76x_TI_COMMAND_4", ++ "T76x_TI_COMMAND_5_7", ++ "T76x_TI_COMMAND_8_15", ++ "T76x_TI_COMMAND_16_63", ++ "T76x_TI_COMMAND_64", ++ "T76x_TI_COMPRESS_IN", ++ "T76x_TI_COMPRESS_OUT", ++ "T76x_TI_COMPRESS_FLUSH", ++ "T76x_TI_TIMESTAMPS", ++ "T76x_TI_PCACHE_HIT", ++ "T76x_TI_PCACHE_MISS", ++ "T76x_TI_PCACHE_LINE", ++ "T76x_TI_PCACHE_STALL", ++ "T76x_TI_WRBUF_HIT", ++ "T76x_TI_WRBUF_MISS", ++ "T76x_TI_WRBUF_LINE", ++ "T76x_TI_WRBUF_PARTIAL", ++ "T76x_TI_WRBUF_STALL", ++ "T76x_TI_ACTIVE", ++ "T76x_TI_LOADING_DESC", ++ "T76x_TI_INDEX_WAIT", ++ "T76x_TI_INDEX_RANGE_WAIT", ++ "T76x_TI_VERTEX_WAIT", ++ "T76x_TI_PCACHE_WAIT", ++ "T76x_TI_WRBUF_WAIT", ++ "T76x_TI_BUS_READ", ++ "T76x_TI_BUS_WRITE", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "T76x_TI_UTLB_HIT", ++ "T76x_TI_UTLB_NEW_MISS", ++ "T76x_TI_UTLB_REPLAY_FULL", ++ "T76x_TI_UTLB_REPLAY_MISS", ++ "T76x_TI_UTLB_STALL", + -+ kbase_backend_complete_wq(kbdev, katom); ++ /* Shader Core */ ++ "", ++ "", ++ "", ++ "", ++ "T76x_FRAG_ACTIVE", ++ "T76x_FRAG_PRIMITIVES", ++ "T76x_FRAG_PRIMITIVES_DROPPED", ++ "T76x_FRAG_CYCLES_DESC", ++ "T76x_FRAG_CYCLES_FPKQ_ACTIVE", ++ "T76x_FRAG_CYCLES_VERT", ++ "T76x_FRAG_CYCLES_TRISETUP", ++ "T76x_FRAG_CYCLES_EZS_ACTIVE", ++ "T76x_FRAG_THREADS", ++ "T76x_FRAG_DUMMY_THREADS", ++ "T76x_FRAG_QUADS_RAST", ++ "T76x_FRAG_QUADS_EZS_TEST", ++ "T76x_FRAG_QUADS_EZS_KILLED", ++ "T76x_FRAG_THREADS_LZS_TEST", ++ "T76x_FRAG_THREADS_LZS_KILLED", ++ "T76x_FRAG_CYCLES_NO_TILE", ++ "T76x_FRAG_NUM_TILES", ++ "T76x_FRAG_TRANS_ELIM", ++ "T76x_COMPUTE_ACTIVE", ++ "T76x_COMPUTE_TASKS", ++ "T76x_COMPUTE_THREADS", ++ "T76x_COMPUTE_CYCLES_DESC", ++ "T76x_TRIPIPE_ACTIVE", ++ "T76x_ARITH_WORDS", ++ "T76x_ARITH_CYCLES_REG", ++ "T76x_ARITH_CYCLES_L0", ++ "T76x_ARITH_FRAG_DEPEND", ++ "T76x_LS_WORDS", ++ "T76x_LS_ISSUES", ++ "T76x_LS_REISSUE_ATTR", ++ "T76x_LS_REISSUES_VARY", ++ "T76x_LS_VARY_RV_MISS", ++ "T76x_LS_VARY_RV_HIT", ++ "T76x_LS_NO_UNPARK", ++ "T76x_TEX_WORDS", ++ "T76x_TEX_BUBBLES", ++ "T76x_TEX_WORDS_L0", ++ "T76x_TEX_WORDS_DESC", ++ "T76x_TEX_ISSUES", ++ "T76x_TEX_RECIRC_FMISS", ++ "T76x_TEX_RECIRC_DESC", ++ "T76x_TEX_RECIRC_MULTI", ++ "T76x_TEX_RECIRC_PMISS", ++ "T76x_TEX_RECIRC_CONF", ++ "T76x_LSC_READ_HITS", ++ "T76x_LSC_READ_OP", ++ "T76x_LSC_WRITE_HITS", ++ "T76x_LSC_WRITE_OP", ++ "T76x_LSC_ATOMIC_HITS", ++ "T76x_LSC_ATOMIC_OP", ++ "T76x_LSC_LINE_FETCHES", ++ "T76x_LSC_DIRTY_LINE", ++ "T76x_LSC_SNOOPS", ++ "T76x_AXI_TLB_STALL", ++ "T76x_AXI_TLB_MISS", ++ "T76x_AXI_TLB_TRANSACTION", ++ "T76x_LS_TLB_MISS", ++ "T76x_LS_TLB_HIT", ++ "T76x_AXI_BEATS_READ", ++ "T76x_AXI_BEATS_WRITTEN", + -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) -+ kbase_as_poking_timer_release_atom(kbdev, kctx, katom); ++ /*L2 and MMU */ ++ "", ++ "", ++ "", ++ "", ++ "T76x_MMU_HIT", ++ "T76x_MMU_NEW_MISS", ++ "T76x_MMU_REPLAY_FULL", ++ "T76x_MMU_REPLAY_MISS", ++ "T76x_MMU_TABLE_WALK", ++ "T76x_MMU_REQUESTS", ++ "", ++ "", ++ "T76x_UTLB_HIT", ++ "T76x_UTLB_NEW_MISS", ++ "T76x_UTLB_REPLAY_FULL", ++ "T76x_UTLB_REPLAY_MISS", ++ "T76x_UTLB_STALL", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "T76x_L2_EXT_WRITE_BEATS", ++ "T76x_L2_EXT_READ_BEATS", ++ "T76x_L2_ANY_LOOKUP", ++ "T76x_L2_READ_LOOKUP", ++ "T76x_L2_SREAD_LOOKUP", ++ "T76x_L2_READ_REPLAY", ++ "T76x_L2_READ_SNOOP", ++ "T76x_L2_READ_HIT", ++ "T76x_L2_CLEAN_MISS", ++ "T76x_L2_WRITE_LOOKUP", ++ "T76x_L2_SWRITE_LOOKUP", ++ "T76x_L2_WRITE_REPLAY", ++ "T76x_L2_WRITE_SNOOP", ++ "T76x_L2_WRITE_HIT", ++ "T76x_L2_EXT_READ_FULL", ++ "", ++ "T76x_L2_EXT_WRITE_FULL", ++ "T76x_L2_EXT_R_W_HAZARD", ++ "T76x_L2_EXT_READ", ++ "T76x_L2_EXT_READ_LINE", ++ "T76x_L2_EXT_WRITE", ++ "T76x_L2_EXT_WRITE_LINE", ++ "T76x_L2_EXT_WRITE_SMALL", ++ "T76x_L2_EXT_BARRIER", ++ "T76x_L2_EXT_AR_STALL", ++ "T76x_L2_EXT_R_BUF_FULL", ++ "T76x_L2_EXT_RD_BUF_FULL", ++ "T76x_L2_EXT_R_RAW", ++ "T76x_L2_EXT_W_STALL", ++ "T76x_L2_EXT_W_BUF_FULL", ++ "T76x_L2_EXT_R_BUF_FULL", ++ "T76x_L2_TAG_HAZARD", ++ "T76x_L2_SNOOP_FULL", ++ "T76x_L2_REPLAY_FULL" ++}; + -+ kbasep_js_atom_retained_state_copy(&retained_state, katom); ++static const char * const hardware_counters_mali_t82x[] = { ++ /* Job Manager */ ++ "", ++ "", ++ "", ++ "", ++ "T82x_MESSAGES_SENT", ++ "T82x_MESSAGES_RECEIVED", ++ "T82x_GPU_ACTIVE", ++ "T82x_IRQ_ACTIVE", ++ "T82x_JS0_JOBS", ++ "T82x_JS0_TASKS", ++ "T82x_JS0_ACTIVE", ++ "", ++ "T82x_JS0_WAIT_READ", ++ "T82x_JS0_WAIT_ISSUE", ++ "T82x_JS0_WAIT_DEPEND", ++ "T82x_JS0_WAIT_FINISH", ++ "T82x_JS1_JOBS", ++ "T82x_JS1_TASKS", ++ "T82x_JS1_ACTIVE", ++ "", ++ "T82x_JS1_WAIT_READ", ++ "T82x_JS1_WAIT_ISSUE", ++ "T82x_JS1_WAIT_DEPEND", ++ "T82x_JS1_WAIT_FINISH", ++ "T82x_JS2_JOBS", ++ "T82x_JS2_TASKS", ++ "T82x_JS2_ACTIVE", ++ "", ++ "T82x_JS2_WAIT_READ", ++ "T82x_JS2_WAIT_ISSUE", ++ "T82x_JS2_WAIT_DEPEND", ++ "T82x_JS2_WAIT_FINISH", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", + -+ mutex_lock(&js_devdata->queue_mutex); -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ /*Tiler */ ++ "", ++ "", ++ "", ++ "T82x_TI_JOBS_PROCESSED", ++ "T82x_TI_TRIANGLES", ++ "T82x_TI_QUADS", ++ "T82x_TI_POLYGONS", ++ "T82x_TI_POINTS", ++ "T82x_TI_LINES", ++ "T82x_TI_FRONT_FACING", ++ "T82x_TI_BACK_FACING", ++ "T82x_TI_PRIM_VISIBLE", ++ "T82x_TI_PRIM_CULLED", ++ "T82x_TI_PRIM_CLIPPED", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "T82x_TI_ACTIVE", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", + -+ atomic_dec(&kctx->atoms_pulled); -+ atomic_dec(&kctx->atoms_pulled_slot[js]); ++ /* Shader Core */ ++ "", ++ "", ++ "", ++ "", ++ "T82x_FRAG_ACTIVE", ++ "T82x_FRAG_PRIMITIVES", ++ "T82x_FRAG_PRIMITIVES_DROPPED", ++ "T82x_FRAG_CYCLES_DESC", ++ "T82x_FRAG_CYCLES_FPKQ_ACTIVE", ++ "T82x_FRAG_CYCLES_VERT", ++ "T82x_FRAG_CYCLES_TRISETUP", ++ "T82x_FRAG_CYCLES_EZS_ACTIVE", ++ "T82x_FRAG_THREADS", ++ "T82x_FRAG_DUMMY_THREADS", ++ "T82x_FRAG_QUADS_RAST", ++ "T82x_FRAG_QUADS_EZS_TEST", ++ "T82x_FRAG_QUADS_EZS_KILLED", ++ "T82x_FRAG_THREADS_LZS_TEST", ++ "T82x_FRAG_THREADS_LZS_KILLED", ++ "T82x_FRAG_CYCLES_NO_TILE", ++ "T82x_FRAG_NUM_TILES", ++ "T82x_FRAG_TRANS_ELIM", ++ "T82x_COMPUTE_ACTIVE", ++ "T82x_COMPUTE_TASKS", ++ "T82x_COMPUTE_THREADS", ++ "T82x_COMPUTE_CYCLES_DESC", ++ "T82x_TRIPIPE_ACTIVE", ++ "T82x_ARITH_WORDS", ++ "T82x_ARITH_CYCLES_REG", ++ "T82x_ARITH_CYCLES_L0", ++ "T82x_ARITH_FRAG_DEPEND", ++ "T82x_LS_WORDS", ++ "T82x_LS_ISSUES", ++ "T82x_LS_REISSUE_ATTR", ++ "T82x_LS_REISSUES_VARY", ++ "T82x_LS_VARY_RV_MISS", ++ "T82x_LS_VARY_RV_HIT", ++ "T82x_LS_NO_UNPARK", ++ "T82x_TEX_WORDS", ++ "T82x_TEX_BUBBLES", ++ "T82x_TEX_WORDS_L0", ++ "T82x_TEX_WORDS_DESC", ++ "T82x_TEX_ISSUES", ++ "T82x_TEX_RECIRC_FMISS", ++ "T82x_TEX_RECIRC_DESC", ++ "T82x_TEX_RECIRC_MULTI", ++ "T82x_TEX_RECIRC_PMISS", ++ "T82x_TEX_RECIRC_CONF", ++ "T82x_LSC_READ_HITS", ++ "T82x_LSC_READ_OP", ++ "T82x_LSC_WRITE_HITS", ++ "T82x_LSC_WRITE_OP", ++ "T82x_LSC_ATOMIC_HITS", ++ "T82x_LSC_ATOMIC_OP", ++ "T82x_LSC_LINE_FETCHES", ++ "T82x_LSC_DIRTY_LINE", ++ "T82x_LSC_SNOOPS", ++ "T82x_AXI_TLB_STALL", ++ "T82x_AXI_TLB_MISS", ++ "T82x_AXI_TLB_TRANSACTION", ++ "T82x_LS_TLB_MISS", ++ "T82x_LS_TLB_HIT", ++ "T82x_AXI_BEATS_READ", ++ "T82x_AXI_BEATS_WRITTEN", + -+ atomic_dec(&katom->blocked); ++ /*L2 and MMU */ ++ "", ++ "", ++ "", ++ "", ++ "T82x_MMU_HIT", ++ "T82x_MMU_NEW_MISS", ++ "T82x_MMU_REPLAY_FULL", ++ "T82x_MMU_REPLAY_MISS", ++ "T82x_MMU_TABLE_WALK", ++ "T82x_MMU_REQUESTS", ++ "", ++ "", ++ "T82x_UTLB_HIT", ++ "T82x_UTLB_NEW_MISS", ++ "T82x_UTLB_REPLAY_FULL", ++ "T82x_UTLB_REPLAY_MISS", ++ "T82x_UTLB_STALL", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "T82x_L2_EXT_WRITE_BEATS", ++ "T82x_L2_EXT_READ_BEATS", ++ "T82x_L2_ANY_LOOKUP", ++ "T82x_L2_READ_LOOKUP", ++ "T82x_L2_SREAD_LOOKUP", ++ "T82x_L2_READ_REPLAY", ++ "T82x_L2_READ_SNOOP", ++ "T82x_L2_READ_HIT", ++ "T82x_L2_CLEAN_MISS", ++ "T82x_L2_WRITE_LOOKUP", ++ "T82x_L2_SWRITE_LOOKUP", ++ "T82x_L2_WRITE_REPLAY", ++ "T82x_L2_WRITE_SNOOP", ++ "T82x_L2_WRITE_HIT", ++ "T82x_L2_EXT_READ_FULL", ++ "", ++ "T82x_L2_EXT_WRITE_FULL", ++ "T82x_L2_EXT_R_W_HAZARD", ++ "T82x_L2_EXT_READ", ++ "T82x_L2_EXT_READ_LINE", ++ "T82x_L2_EXT_WRITE", ++ "T82x_L2_EXT_WRITE_LINE", ++ "T82x_L2_EXT_WRITE_SMALL", ++ "T82x_L2_EXT_BARRIER", ++ "T82x_L2_EXT_AR_STALL", ++ "T82x_L2_EXT_R_BUF_FULL", ++ "T82x_L2_EXT_RD_BUF_FULL", ++ "T82x_L2_EXT_R_RAW", ++ "T82x_L2_EXT_W_STALL", ++ "T82x_L2_EXT_W_BUF_FULL", ++ "T82x_L2_EXT_R_BUF_FULL", ++ "T82x_L2_TAG_HAZARD", ++ "T82x_L2_SNOOP_FULL", ++ "T82x_L2_REPLAY_FULL" ++}; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++static const char * const hardware_counters_mali_t83x[] = { ++ /* Job Manager */ ++ "", ++ "", ++ "", ++ "", ++ "T83x_MESSAGES_SENT", ++ "T83x_MESSAGES_RECEIVED", ++ "T83x_GPU_ACTIVE", ++ "T83x_IRQ_ACTIVE", ++ "T83x_JS0_JOBS", ++ "T83x_JS0_TASKS", ++ "T83x_JS0_ACTIVE", ++ "", ++ "T83x_JS0_WAIT_READ", ++ "T83x_JS0_WAIT_ISSUE", ++ "T83x_JS0_WAIT_DEPEND", ++ "T83x_JS0_WAIT_FINISH", ++ "T83x_JS1_JOBS", ++ "T83x_JS1_TASKS", ++ "T83x_JS1_ACTIVE", ++ "", ++ "T83x_JS1_WAIT_READ", ++ "T83x_JS1_WAIT_ISSUE", ++ "T83x_JS1_WAIT_DEPEND", ++ "T83x_JS1_WAIT_FINISH", ++ "T83x_JS2_JOBS", ++ "T83x_JS2_TASKS", ++ "T83x_JS2_ACTIVE", ++ "", ++ "T83x_JS2_WAIT_READ", ++ "T83x_JS2_WAIT_ISSUE", ++ "T83x_JS2_WAIT_DEPEND", ++ "T83x_JS2_WAIT_FINISH", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", + -+ kctx->atoms_pulled_slot_pri[js][katom->sched_priority]--; ++ /*Tiler */ ++ "", ++ "", ++ "", ++ "T83x_TI_JOBS_PROCESSED", ++ "T83x_TI_TRIANGLES", ++ "T83x_TI_QUADS", ++ "T83x_TI_POLYGONS", ++ "T83x_TI_POINTS", ++ "T83x_TI_LINES", ++ "T83x_TI_FRONT_FACING", ++ "T83x_TI_BACK_FACING", ++ "T83x_TI_PRIM_VISIBLE", ++ "T83x_TI_PRIM_CULLED", ++ "T83x_TI_PRIM_CLIPPED", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "T83x_TI_ACTIVE", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", + -+ if (!atomic_read(&kctx->atoms_pulled_slot[js]) && -+ jsctx_rb_none_to_pull(kctx, js)) -+ timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js); ++ /* Shader Core */ ++ "", ++ "", ++ "", ++ "", ++ "T83x_FRAG_ACTIVE", ++ "T83x_FRAG_PRIMITIVES", ++ "T83x_FRAG_PRIMITIVES_DROPPED", ++ "T83x_FRAG_CYCLES_DESC", ++ "T83x_FRAG_CYCLES_FPKQ_ACTIVE", ++ "T83x_FRAG_CYCLES_VERT", ++ "T83x_FRAG_CYCLES_TRISETUP", ++ "T83x_FRAG_CYCLES_EZS_ACTIVE", ++ "T83x_FRAG_THREADS", ++ "T83x_FRAG_DUMMY_THREADS", ++ "T83x_FRAG_QUADS_RAST", ++ "T83x_FRAG_QUADS_EZS_TEST", ++ "T83x_FRAG_QUADS_EZS_KILLED", ++ "T83x_FRAG_THREADS_LZS_TEST", ++ "T83x_FRAG_THREADS_LZS_KILLED", ++ "T83x_FRAG_CYCLES_NO_TILE", ++ "T83x_FRAG_NUM_TILES", ++ "T83x_FRAG_TRANS_ELIM", ++ "T83x_COMPUTE_ACTIVE", ++ "T83x_COMPUTE_TASKS", ++ "T83x_COMPUTE_THREADS", ++ "T83x_COMPUTE_CYCLES_DESC", ++ "T83x_TRIPIPE_ACTIVE", ++ "T83x_ARITH_WORDS", ++ "T83x_ARITH_CYCLES_REG", ++ "T83x_ARITH_CYCLES_L0", ++ "T83x_ARITH_FRAG_DEPEND", ++ "T83x_LS_WORDS", ++ "T83x_LS_ISSUES", ++ "T83x_LS_REISSUE_ATTR", ++ "T83x_LS_REISSUES_VARY", ++ "T83x_LS_VARY_RV_MISS", ++ "T83x_LS_VARY_RV_HIT", ++ "T83x_LS_NO_UNPARK", ++ "T83x_TEX_WORDS", ++ "T83x_TEX_BUBBLES", ++ "T83x_TEX_WORDS_L0", ++ "T83x_TEX_WORDS_DESC", ++ "T83x_TEX_ISSUES", ++ "T83x_TEX_RECIRC_FMISS", ++ "T83x_TEX_RECIRC_DESC", ++ "T83x_TEX_RECIRC_MULTI", ++ "T83x_TEX_RECIRC_PMISS", ++ "T83x_TEX_RECIRC_CONF", ++ "T83x_LSC_READ_HITS", ++ "T83x_LSC_READ_OP", ++ "T83x_LSC_WRITE_HITS", ++ "T83x_LSC_WRITE_OP", ++ "T83x_LSC_ATOMIC_HITS", ++ "T83x_LSC_ATOMIC_OP", ++ "T83x_LSC_LINE_FETCHES", ++ "T83x_LSC_DIRTY_LINE", ++ "T83x_LSC_SNOOPS", ++ "T83x_AXI_TLB_STALL", ++ "T83x_AXI_TLB_MISS", ++ "T83x_AXI_TLB_TRANSACTION", ++ "T83x_LS_TLB_MISS", ++ "T83x_LS_TLB_HIT", ++ "T83x_AXI_BEATS_READ", ++ "T83x_AXI_BEATS_WRITTEN", + -+ /* If this slot has been blocked due to soft-stopped atoms, and all -+ * atoms have now been processed, then unblock the slot */ -+ if (!kctx->atoms_pulled_slot_pri[js][prio] && -+ kctx->blocked_js[js][prio]) { -+ kctx->blocked_js[js][prio] = false; ++ /*L2 and MMU */ ++ "", ++ "", ++ "", ++ "", ++ "T83x_MMU_HIT", ++ "T83x_MMU_NEW_MISS", ++ "T83x_MMU_REPLAY_FULL", ++ "T83x_MMU_REPLAY_MISS", ++ "T83x_MMU_TABLE_WALK", ++ "T83x_MMU_REQUESTS", ++ "", ++ "", ++ "T83x_UTLB_HIT", ++ "T83x_UTLB_NEW_MISS", ++ "T83x_UTLB_REPLAY_FULL", ++ "T83x_UTLB_REPLAY_MISS", ++ "T83x_UTLB_STALL", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "T83x_L2_EXT_WRITE_BEATS", ++ "T83x_L2_EXT_READ_BEATS", ++ "T83x_L2_ANY_LOOKUP", ++ "T83x_L2_READ_LOOKUP", ++ "T83x_L2_SREAD_LOOKUP", ++ "T83x_L2_READ_REPLAY", ++ "T83x_L2_READ_SNOOP", ++ "T83x_L2_READ_HIT", ++ "T83x_L2_CLEAN_MISS", ++ "T83x_L2_WRITE_LOOKUP", ++ "T83x_L2_SWRITE_LOOKUP", ++ "T83x_L2_WRITE_REPLAY", ++ "T83x_L2_WRITE_SNOOP", ++ "T83x_L2_WRITE_HIT", ++ "T83x_L2_EXT_READ_FULL", ++ "", ++ "T83x_L2_EXT_WRITE_FULL", ++ "T83x_L2_EXT_R_W_HAZARD", ++ "T83x_L2_EXT_READ", ++ "T83x_L2_EXT_READ_LINE", ++ "T83x_L2_EXT_WRITE", ++ "T83x_L2_EXT_WRITE_LINE", ++ "T83x_L2_EXT_WRITE_SMALL", ++ "T83x_L2_EXT_BARRIER", ++ "T83x_L2_EXT_AR_STALL", ++ "T83x_L2_EXT_R_BUF_FULL", ++ "T83x_L2_EXT_RD_BUF_FULL", ++ "T83x_L2_EXT_R_RAW", ++ "T83x_L2_EXT_W_STALL", ++ "T83x_L2_EXT_W_BUF_FULL", ++ "T83x_L2_EXT_R_BUF_FULL", ++ "T83x_L2_TAG_HAZARD", ++ "T83x_L2_SNOOP_FULL", ++ "T83x_L2_REPLAY_FULL" ++}; + -+ /* Only mark the slot as pullable if the context is not idle - -+ * that case is handled below */ -+ if (atomic_read(&kctx->atoms_pulled) && -+ kbase_js_ctx_pullable(kctx, js, true)) -+ timer_sync |= kbase_js_ctx_list_add_pullable_nolock( -+ kbdev, kctx, js); -+ } ++static const char * const hardware_counters_mali_t86x[] = { ++ /* Job Manager */ ++ "", ++ "", ++ "", ++ "", ++ "T86x_MESSAGES_SENT", ++ "T86x_MESSAGES_RECEIVED", ++ "T86x_GPU_ACTIVE", ++ "T86x_IRQ_ACTIVE", ++ "T86x_JS0_JOBS", ++ "T86x_JS0_TASKS", ++ "T86x_JS0_ACTIVE", ++ "", ++ "T86x_JS0_WAIT_READ", ++ "T86x_JS0_WAIT_ISSUE", ++ "T86x_JS0_WAIT_DEPEND", ++ "T86x_JS0_WAIT_FINISH", ++ "T86x_JS1_JOBS", ++ "T86x_JS1_TASKS", ++ "T86x_JS1_ACTIVE", ++ "", ++ "T86x_JS1_WAIT_READ", ++ "T86x_JS1_WAIT_ISSUE", ++ "T86x_JS1_WAIT_DEPEND", ++ "T86x_JS1_WAIT_FINISH", ++ "T86x_JS2_JOBS", ++ "T86x_JS2_TASKS", ++ "T86x_JS2_ACTIVE", ++ "", ++ "T86x_JS2_WAIT_READ", ++ "T86x_JS2_WAIT_ISSUE", ++ "T86x_JS2_WAIT_DEPEND", ++ "T86x_JS2_WAIT_FINISH", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", + -+ if (!atomic_read(&kctx->atoms_pulled)) { -+ if (!kctx->slots_pullable) { -+ WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); -+ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); -+ atomic_dec(&kbdev->js_data.nr_contexts_runnable); -+ timer_sync = true; -+ } ++ /*Tiler */ ++ "", ++ "", ++ "", ++ "T86x_TI_JOBS_PROCESSED", ++ "T86x_TI_TRIANGLES", ++ "T86x_TI_QUADS", ++ "T86x_TI_POLYGONS", ++ "T86x_TI_POINTS", ++ "T86x_TI_LINES", ++ "T86x_TI_VCACHE_HIT", ++ "T86x_TI_VCACHE_MISS", ++ "T86x_TI_FRONT_FACING", ++ "T86x_TI_BACK_FACING", ++ "T86x_TI_PRIM_VISIBLE", ++ "T86x_TI_PRIM_CULLED", ++ "T86x_TI_PRIM_CLIPPED", ++ "T86x_TI_LEVEL0", ++ "T86x_TI_LEVEL1", ++ "T86x_TI_LEVEL2", ++ "T86x_TI_LEVEL3", ++ "T86x_TI_LEVEL4", ++ "T86x_TI_LEVEL5", ++ "T86x_TI_LEVEL6", ++ "T86x_TI_LEVEL7", ++ "T86x_TI_COMMAND_1", ++ "T86x_TI_COMMAND_2", ++ "T86x_TI_COMMAND_3", ++ "T86x_TI_COMMAND_4", ++ "T86x_TI_COMMAND_5_7", ++ "T86x_TI_COMMAND_8_15", ++ "T86x_TI_COMMAND_16_63", ++ "T86x_TI_COMMAND_64", ++ "T86x_TI_COMPRESS_IN", ++ "T86x_TI_COMPRESS_OUT", ++ "T86x_TI_COMPRESS_FLUSH", ++ "T86x_TI_TIMESTAMPS", ++ "T86x_TI_PCACHE_HIT", ++ "T86x_TI_PCACHE_MISS", ++ "T86x_TI_PCACHE_LINE", ++ "T86x_TI_PCACHE_STALL", ++ "T86x_TI_WRBUF_HIT", ++ "T86x_TI_WRBUF_MISS", ++ "T86x_TI_WRBUF_LINE", ++ "T86x_TI_WRBUF_PARTIAL", ++ "T86x_TI_WRBUF_STALL", ++ "T86x_TI_ACTIVE", ++ "T86x_TI_LOADING_DESC", ++ "T86x_TI_INDEX_WAIT", ++ "T86x_TI_INDEX_RANGE_WAIT", ++ "T86x_TI_VERTEX_WAIT", ++ "T86x_TI_PCACHE_WAIT", ++ "T86x_TI_WRBUF_WAIT", ++ "T86x_TI_BUS_READ", ++ "T86x_TI_BUS_WRITE", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "T86x_TI_UTLB_HIT", ++ "T86x_TI_UTLB_NEW_MISS", ++ "T86x_TI_UTLB_REPLAY_FULL", ++ "T86x_TI_UTLB_REPLAY_MISS", ++ "T86x_TI_UTLB_STALL", + -+ if (kctx->as_nr != KBASEP_AS_NR_INVALID && -+ !kbase_ctx_flag(kctx, KCTX_DYING)) { -+ int num_slots = kbdev->gpu_props.num_job_slots; -+ int slot; ++ /* Shader Core */ ++ "", ++ "", ++ "", ++ "", ++ "T86x_FRAG_ACTIVE", ++ "T86x_FRAG_PRIMITIVES", ++ "T86x_FRAG_PRIMITIVES_DROPPED", ++ "T86x_FRAG_CYCLES_DESC", ++ "T86x_FRAG_CYCLES_FPKQ_ACTIVE", ++ "T86x_FRAG_CYCLES_VERT", ++ "T86x_FRAG_CYCLES_TRISETUP", ++ "T86x_FRAG_CYCLES_EZS_ACTIVE", ++ "T86x_FRAG_THREADS", ++ "T86x_FRAG_DUMMY_THREADS", ++ "T86x_FRAG_QUADS_RAST", ++ "T86x_FRAG_QUADS_EZS_TEST", ++ "T86x_FRAG_QUADS_EZS_KILLED", ++ "T86x_FRAG_THREADS_LZS_TEST", ++ "T86x_FRAG_THREADS_LZS_KILLED", ++ "T86x_FRAG_CYCLES_NO_TILE", ++ "T86x_FRAG_NUM_TILES", ++ "T86x_FRAG_TRANS_ELIM", ++ "T86x_COMPUTE_ACTIVE", ++ "T86x_COMPUTE_TASKS", ++ "T86x_COMPUTE_THREADS", ++ "T86x_COMPUTE_CYCLES_DESC", ++ "T86x_TRIPIPE_ACTIVE", ++ "T86x_ARITH_WORDS", ++ "T86x_ARITH_CYCLES_REG", ++ "T86x_ARITH_CYCLES_L0", ++ "T86x_ARITH_FRAG_DEPEND", ++ "T86x_LS_WORDS", ++ "T86x_LS_ISSUES", ++ "T86x_LS_REISSUE_ATTR", ++ "T86x_LS_REISSUES_VARY", ++ "T86x_LS_VARY_RV_MISS", ++ "T86x_LS_VARY_RV_HIT", ++ "T86x_LS_NO_UNPARK", ++ "T86x_TEX_WORDS", ++ "T86x_TEX_BUBBLES", ++ "T86x_TEX_WORDS_L0", ++ "T86x_TEX_WORDS_DESC", ++ "T86x_TEX_ISSUES", ++ "T86x_TEX_RECIRC_FMISS", ++ "T86x_TEX_RECIRC_DESC", ++ "T86x_TEX_RECIRC_MULTI", ++ "T86x_TEX_RECIRC_PMISS", ++ "T86x_TEX_RECIRC_CONF", ++ "T86x_LSC_READ_HITS", ++ "T86x_LSC_READ_OP", ++ "T86x_LSC_WRITE_HITS", ++ "T86x_LSC_WRITE_OP", ++ "T86x_LSC_ATOMIC_HITS", ++ "T86x_LSC_ATOMIC_OP", ++ "T86x_LSC_LINE_FETCHES", ++ "T86x_LSC_DIRTY_LINE", ++ "T86x_LSC_SNOOPS", ++ "T86x_AXI_TLB_STALL", ++ "T86x_AXI_TLB_MISS", ++ "T86x_AXI_TLB_TRANSACTION", ++ "T86x_LS_TLB_MISS", ++ "T86x_LS_TLB_HIT", ++ "T86x_AXI_BEATS_READ", ++ "T86x_AXI_BEATS_WRITTEN", + -+ if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) -+ kbasep_js_set_submit_allowed(js_devdata, kctx); ++ /*L2 and MMU */ ++ "", ++ "", ++ "", ++ "", ++ "T86x_MMU_HIT", ++ "T86x_MMU_NEW_MISS", ++ "T86x_MMU_REPLAY_FULL", ++ "T86x_MMU_REPLAY_MISS", ++ "T86x_MMU_TABLE_WALK", ++ "T86x_MMU_REQUESTS", ++ "", ++ "", ++ "T86x_UTLB_HIT", ++ "T86x_UTLB_NEW_MISS", ++ "T86x_UTLB_REPLAY_FULL", ++ "T86x_UTLB_REPLAY_MISS", ++ "T86x_UTLB_STALL", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "T86x_L2_EXT_WRITE_BEATS", ++ "T86x_L2_EXT_READ_BEATS", ++ "T86x_L2_ANY_LOOKUP", ++ "T86x_L2_READ_LOOKUP", ++ "T86x_L2_SREAD_LOOKUP", ++ "T86x_L2_READ_REPLAY", ++ "T86x_L2_READ_SNOOP", ++ "T86x_L2_READ_HIT", ++ "T86x_L2_CLEAN_MISS", ++ "T86x_L2_WRITE_LOOKUP", ++ "T86x_L2_SWRITE_LOOKUP", ++ "T86x_L2_WRITE_REPLAY", ++ "T86x_L2_WRITE_SNOOP", ++ "T86x_L2_WRITE_HIT", ++ "T86x_L2_EXT_READ_FULL", ++ "", ++ "T86x_L2_EXT_WRITE_FULL", ++ "T86x_L2_EXT_R_W_HAZARD", ++ "T86x_L2_EXT_READ", ++ "T86x_L2_EXT_READ_LINE", ++ "T86x_L2_EXT_WRITE", ++ "T86x_L2_EXT_WRITE_LINE", ++ "T86x_L2_EXT_WRITE_SMALL", ++ "T86x_L2_EXT_BARRIER", ++ "T86x_L2_EXT_AR_STALL", ++ "T86x_L2_EXT_R_BUF_FULL", ++ "T86x_L2_EXT_RD_BUF_FULL", ++ "T86x_L2_EXT_R_RAW", ++ "T86x_L2_EXT_W_STALL", ++ "T86x_L2_EXT_W_BUF_FULL", ++ "T86x_L2_EXT_R_BUF_FULL", ++ "T86x_L2_TAG_HAZARD", ++ "T86x_L2_SNOOP_FULL", ++ "T86x_L2_REPLAY_FULL" ++}; + -+ for (slot = 0; slot < num_slots; slot++) { -+ if (kbase_js_ctx_pullable(kctx, slot, true)) -+ timer_sync |= -+ kbase_js_ctx_list_add_pullable_nolock( -+ kbdev, kctx, slot); -+ } -+ } ++static const char * const hardware_counters_mali_t88x[] = { ++ /* Job Manager */ ++ "", ++ "", ++ "", ++ "", ++ "T88x_MESSAGES_SENT", ++ "T88x_MESSAGES_RECEIVED", ++ "T88x_GPU_ACTIVE", ++ "T88x_IRQ_ACTIVE", ++ "T88x_JS0_JOBS", ++ "T88x_JS0_TASKS", ++ "T88x_JS0_ACTIVE", ++ "", ++ "T88x_JS0_WAIT_READ", ++ "T88x_JS0_WAIT_ISSUE", ++ "T88x_JS0_WAIT_DEPEND", ++ "T88x_JS0_WAIT_FINISH", ++ "T88x_JS1_JOBS", ++ "T88x_JS1_TASKS", ++ "T88x_JS1_ACTIVE", ++ "", ++ "T88x_JS1_WAIT_READ", ++ "T88x_JS1_WAIT_ISSUE", ++ "T88x_JS1_WAIT_DEPEND", ++ "T88x_JS1_WAIT_FINISH", ++ "T88x_JS2_JOBS", ++ "T88x_JS2_TASKS", ++ "T88x_JS2_ACTIVE", ++ "", ++ "T88x_JS2_WAIT_READ", ++ "T88x_JS2_WAIT_ISSUE", ++ "T88x_JS2_WAIT_DEPEND", ++ "T88x_JS2_WAIT_FINISH", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", + -+ kbase_jm_idle_ctx(kbdev, kctx); ++ /*Tiler */ ++ "", ++ "", ++ "", ++ "T88x_TI_JOBS_PROCESSED", ++ "T88x_TI_TRIANGLES", ++ "T88x_TI_QUADS", ++ "T88x_TI_POLYGONS", ++ "T88x_TI_POINTS", ++ "T88x_TI_LINES", ++ "T88x_TI_VCACHE_HIT", ++ "T88x_TI_VCACHE_MISS", ++ "T88x_TI_FRONT_FACING", ++ "T88x_TI_BACK_FACING", ++ "T88x_TI_PRIM_VISIBLE", ++ "T88x_TI_PRIM_CULLED", ++ "T88x_TI_PRIM_CLIPPED", ++ "T88x_TI_LEVEL0", ++ "T88x_TI_LEVEL1", ++ "T88x_TI_LEVEL2", ++ "T88x_TI_LEVEL3", ++ "T88x_TI_LEVEL4", ++ "T88x_TI_LEVEL5", ++ "T88x_TI_LEVEL6", ++ "T88x_TI_LEVEL7", ++ "T88x_TI_COMMAND_1", ++ "T88x_TI_COMMAND_2", ++ "T88x_TI_COMMAND_3", ++ "T88x_TI_COMMAND_4", ++ "T88x_TI_COMMAND_5_7", ++ "T88x_TI_COMMAND_8_15", ++ "T88x_TI_COMMAND_16_63", ++ "T88x_TI_COMMAND_64", ++ "T88x_TI_COMPRESS_IN", ++ "T88x_TI_COMPRESS_OUT", ++ "T88x_TI_COMPRESS_FLUSH", ++ "T88x_TI_TIMESTAMPS", ++ "T88x_TI_PCACHE_HIT", ++ "T88x_TI_PCACHE_MISS", ++ "T88x_TI_PCACHE_LINE", ++ "T88x_TI_PCACHE_STALL", ++ "T88x_TI_WRBUF_HIT", ++ "T88x_TI_WRBUF_MISS", ++ "T88x_TI_WRBUF_LINE", ++ "T88x_TI_WRBUF_PARTIAL", ++ "T88x_TI_WRBUF_STALL", ++ "T88x_TI_ACTIVE", ++ "T88x_TI_LOADING_DESC", ++ "T88x_TI_INDEX_WAIT", ++ "T88x_TI_INDEX_RANGE_WAIT", ++ "T88x_TI_VERTEX_WAIT", ++ "T88x_TI_PCACHE_WAIT", ++ "T88x_TI_WRBUF_WAIT", ++ "T88x_TI_BUS_READ", ++ "T88x_TI_BUS_WRITE", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "T88x_TI_UTLB_HIT", ++ "T88x_TI_UTLB_NEW_MISS", ++ "T88x_TI_UTLB_REPLAY_FULL", ++ "T88x_TI_UTLB_REPLAY_MISS", ++ "T88x_TI_UTLB_STALL", + -+ context_idle = true; -+ } ++ /* Shader Core */ ++ "", ++ "", ++ "", ++ "", ++ "T88x_FRAG_ACTIVE", ++ "T88x_FRAG_PRIMITIVES", ++ "T88x_FRAG_PRIMITIVES_DROPPED", ++ "T88x_FRAG_CYCLES_DESC", ++ "T88x_FRAG_CYCLES_FPKQ_ACTIVE", ++ "T88x_FRAG_CYCLES_VERT", ++ "T88x_FRAG_CYCLES_TRISETUP", ++ "T88x_FRAG_CYCLES_EZS_ACTIVE", ++ "T88x_FRAG_THREADS", ++ "T88x_FRAG_DUMMY_THREADS", ++ "T88x_FRAG_QUADS_RAST", ++ "T88x_FRAG_QUADS_EZS_TEST", ++ "T88x_FRAG_QUADS_EZS_KILLED", ++ "T88x_FRAG_THREADS_LZS_TEST", ++ "T88x_FRAG_THREADS_LZS_KILLED", ++ "T88x_FRAG_CYCLES_NO_TILE", ++ "T88x_FRAG_NUM_TILES", ++ "T88x_FRAG_TRANS_ELIM", ++ "T88x_COMPUTE_ACTIVE", ++ "T88x_COMPUTE_TASKS", ++ "T88x_COMPUTE_THREADS", ++ "T88x_COMPUTE_CYCLES_DESC", ++ "T88x_TRIPIPE_ACTIVE", ++ "T88x_ARITH_WORDS", ++ "T88x_ARITH_CYCLES_REG", ++ "T88x_ARITH_CYCLES_L0", ++ "T88x_ARITH_FRAG_DEPEND", ++ "T88x_LS_WORDS", ++ "T88x_LS_ISSUES", ++ "T88x_LS_REISSUE_ATTR", ++ "T88x_LS_REISSUES_VARY", ++ "T88x_LS_VARY_RV_MISS", ++ "T88x_LS_VARY_RV_HIT", ++ "T88x_LS_NO_UNPARK", ++ "T88x_TEX_WORDS", ++ "T88x_TEX_BUBBLES", ++ "T88x_TEX_WORDS_L0", ++ "T88x_TEX_WORDS_DESC", ++ "T88x_TEX_ISSUES", ++ "T88x_TEX_RECIRC_FMISS", ++ "T88x_TEX_RECIRC_DESC", ++ "T88x_TEX_RECIRC_MULTI", ++ "T88x_TEX_RECIRC_PMISS", ++ "T88x_TEX_RECIRC_CONF", ++ "T88x_LSC_READ_HITS", ++ "T88x_LSC_READ_OP", ++ "T88x_LSC_WRITE_HITS", ++ "T88x_LSC_WRITE_OP", ++ "T88x_LSC_ATOMIC_HITS", ++ "T88x_LSC_ATOMIC_OP", ++ "T88x_LSC_LINE_FETCHES", ++ "T88x_LSC_DIRTY_LINE", ++ "T88x_LSC_SNOOPS", ++ "T88x_AXI_TLB_STALL", ++ "T88x_AXI_TLB_MISS", ++ "T88x_AXI_TLB_TRANSACTION", ++ "T88x_LS_TLB_MISS", ++ "T88x_LS_TLB_HIT", ++ "T88x_AXI_BEATS_READ", ++ "T88x_AXI_BEATS_WRITTEN", + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /*L2 and MMU */ ++ "", ++ "", ++ "", ++ "", ++ "T88x_MMU_HIT", ++ "T88x_MMU_NEW_MISS", ++ "T88x_MMU_REPLAY_FULL", ++ "T88x_MMU_REPLAY_MISS", ++ "T88x_MMU_TABLE_WALK", ++ "T88x_MMU_REQUESTS", ++ "", ++ "", ++ "T88x_UTLB_HIT", ++ "T88x_UTLB_NEW_MISS", ++ "T88x_UTLB_REPLAY_FULL", ++ "T88x_UTLB_REPLAY_MISS", ++ "T88x_UTLB_STALL", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "T88x_L2_EXT_WRITE_BEATS", ++ "T88x_L2_EXT_READ_BEATS", ++ "T88x_L2_ANY_LOOKUP", ++ "T88x_L2_READ_LOOKUP", ++ "T88x_L2_SREAD_LOOKUP", ++ "T88x_L2_READ_REPLAY", ++ "T88x_L2_READ_SNOOP", ++ "T88x_L2_READ_HIT", ++ "T88x_L2_CLEAN_MISS", ++ "T88x_L2_WRITE_LOOKUP", ++ "T88x_L2_SWRITE_LOOKUP", ++ "T88x_L2_WRITE_REPLAY", ++ "T88x_L2_WRITE_SNOOP", ++ "T88x_L2_WRITE_HIT", ++ "T88x_L2_EXT_READ_FULL", ++ "", ++ "T88x_L2_EXT_WRITE_FULL", ++ "T88x_L2_EXT_R_W_HAZARD", ++ "T88x_L2_EXT_READ", ++ "T88x_L2_EXT_READ_LINE", ++ "T88x_L2_EXT_WRITE", ++ "T88x_L2_EXT_WRITE_LINE", ++ "T88x_L2_EXT_WRITE_SMALL", ++ "T88x_L2_EXT_BARRIER", ++ "T88x_L2_EXT_AR_STALL", ++ "T88x_L2_EXT_R_BUF_FULL", ++ "T88x_L2_EXT_RD_BUF_FULL", ++ "T88x_L2_EXT_R_RAW", ++ "T88x_L2_EXT_W_STALL", ++ "T88x_L2_EXT_W_BUF_FULL", ++ "T88x_L2_EXT_R_BUF_FULL", ++ "T88x_L2_TAG_HAZARD", ++ "T88x_L2_SNOOP_FULL", ++ "T88x_L2_REPLAY_FULL" ++}; + -+ if (context_idle) { -+ WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); -+ kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); -+ kbase_pm_context_idle(kbdev); -+ } ++#include "mali_kbase_gator_hwcnt_names_tmix.h" + -+ if (timer_sync) -+ kbase_js_sync_timers(kbdev); ++#include "mali_kbase_gator_hwcnt_names_thex.h" + -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_unlock(&js_devdata->queue_mutex); ++#include "mali_kbase_gator_hwcnt_names_tsix.h" + -+ katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF; -+ kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, -+ &retained_state); + -+ kbase_js_sched_all(kbdev); ++#ifdef MALI_INCLUDE_TKAX ++#include "mali_kbase_gator_hwcnt_names_tkax.h" ++#endif /* MALI_INCLUDE_TKAX */ + -+ kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity, -+ coreref_state); -+} ++#endif +diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h +new file mode 100644 +index 000000000..bcceef4fc +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h +@@ -0,0 +1,291 @@ ++/* ++ * ++ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) -+{ -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ jsctx_rb_unpull(kctx, katom); + -+ WARN_ON(work_pending(&katom->work)); ++/* ++ * This header was autogenerated, it should not be edited. ++ */ + -+ /* Block re-submission until workqueue has run */ -+ atomic_inc(&katom->blocked); ++#ifndef _KBASE_GATOR_HWCNT_NAMES_THEX_H_ ++#define _KBASE_GATOR_HWCNT_NAMES_THEX_H_ + -+ kbase_job_check_leave_disjoint(kctx->kbdev, katom); ++static const char * const hardware_counters_mali_tHEx[] = { ++ /* Performance counters for the Job Manager */ ++ "", ++ "", ++ "", ++ "", ++ "THEx_MESSAGES_SENT", ++ "THEx_MESSAGES_RECEIVED", ++ "THEx_GPU_ACTIVE", ++ "THEx_IRQ_ACTIVE", ++ "THEx_JS0_JOBS", ++ "THEx_JS0_TASKS", ++ "THEx_JS0_ACTIVE", ++ "", ++ "THEx_JS0_WAIT_READ", ++ "THEx_JS0_WAIT_ISSUE", ++ "THEx_JS0_WAIT_DEPEND", ++ "THEx_JS0_WAIT_FINISH", ++ "THEx_JS1_JOBS", ++ "THEx_JS1_TASKS", ++ "THEx_JS1_ACTIVE", ++ "", ++ "THEx_JS1_WAIT_READ", ++ "THEx_JS1_WAIT_ISSUE", ++ "THEx_JS1_WAIT_DEPEND", ++ "THEx_JS1_WAIT_FINISH", ++ "THEx_JS2_JOBS", ++ "THEx_JS2_TASKS", ++ "THEx_JS2_ACTIVE", ++ "", ++ "THEx_JS2_WAIT_READ", ++ "THEx_JS2_WAIT_ISSUE", ++ "THEx_JS2_WAIT_DEPEND", ++ "THEx_JS2_WAIT_FINISH", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", + -+ KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); -+ INIT_WORK(&katom->work, js_return_worker); -+ queue_work(kctx->jctx.job_done_wq, &katom->work); -+} ++ /* Performance counters for the Tiler */ ++ "", ++ "", ++ "", ++ "", ++ "THEx_TILER_ACTIVE", ++ "THEx_JOBS_PROCESSED", ++ "THEx_TRIANGLES", ++ "THEx_LINES", ++ "THEx_POINTS", ++ "THEx_FRONT_FACING", ++ "THEx_BACK_FACING", ++ "THEx_PRIM_VISIBLE", ++ "THEx_PRIM_CULLED", ++ "THEx_PRIM_CLIPPED", ++ "THEx_PRIM_SAT_CULLED", ++ "", ++ "", ++ "THEx_BUS_READ", ++ "", ++ "THEx_BUS_WRITE", ++ "THEx_LOADING_DESC", ++ "THEx_IDVS_POS_SHAD_REQ", ++ "THEx_IDVS_POS_SHAD_WAIT", ++ "THEx_IDVS_POS_SHAD_STALL", ++ "THEx_IDVS_POS_FIFO_FULL", ++ "THEx_PREFETCH_STALL", ++ "THEx_VCACHE_HIT", ++ "THEx_VCACHE_MISS", ++ "THEx_VCACHE_LINE_WAIT", ++ "THEx_VFETCH_POS_READ_WAIT", ++ "THEx_VFETCH_VERTEX_WAIT", ++ "THEx_VFETCH_STALL", ++ "THEx_PRIMASSY_STALL", ++ "THEx_BBOX_GEN_STALL", ++ "THEx_IDVS_VBU_HIT", ++ "THEx_IDVS_VBU_MISS", ++ "THEx_IDVS_VBU_LINE_DEALLOCATE", ++ "THEx_IDVS_VAR_SHAD_REQ", ++ "THEx_IDVS_VAR_SHAD_STALL", ++ "THEx_BINNER_STALL", ++ "THEx_ITER_STALL", ++ "THEx_COMPRESS_MISS", ++ "THEx_COMPRESS_STALL", ++ "THEx_PCACHE_HIT", ++ "THEx_PCACHE_MISS", ++ "THEx_PCACHE_MISS_STALL", ++ "THEx_PCACHE_EVICT_STALL", ++ "THEx_PMGR_PTR_WR_STALL", ++ "THEx_PMGR_PTR_RD_STALL", ++ "THEx_PMGR_CMD_WR_STALL", ++ "THEx_WRBUF_ACTIVE", ++ "THEx_WRBUF_HIT", ++ "THEx_WRBUF_MISS", ++ "THEx_WRBUF_NO_FREE_LINE_STALL", ++ "THEx_WRBUF_NO_AXI_ID_STALL", ++ "THEx_WRBUF_AXI_STALL", ++ "", ++ "", ++ "", ++ "THEx_UTLB_TRANS", ++ "THEx_UTLB_TRANS_HIT", ++ "THEx_UTLB_TRANS_STALL", ++ "THEx_UTLB_TRANS_MISS_DELAY", ++ "THEx_UTLB_MMU_REQ", + -+bool kbase_js_complete_atom_wq(struct kbase_context *kctx, -+ struct kbase_jd_atom *katom) -+{ -+ struct kbasep_js_kctx_info *js_kctx_info; -+ struct kbasep_js_device_data *js_devdata; -+ struct kbase_device *kbdev; -+ unsigned long flags; -+ bool timer_sync = false; -+ int atom_slot; -+ bool context_idle = false; -+ int prio = katom->sched_priority; ++ /* Performance counters for the Shader Core */ ++ "", ++ "", ++ "", ++ "", ++ "THEx_FRAG_ACTIVE", ++ "THEx_FRAG_PRIMITIVES", ++ "THEx_FRAG_PRIM_RAST", ++ "THEx_FRAG_FPK_ACTIVE", ++ "THEx_FRAG_STARVING", ++ "THEx_FRAG_WARPS", ++ "THEx_FRAG_PARTIAL_WARPS", ++ "THEx_FRAG_QUADS_RAST", ++ "THEx_FRAG_QUADS_EZS_TEST", ++ "THEx_FRAG_QUADS_EZS_UPDATE", ++ "THEx_FRAG_QUADS_EZS_KILL", ++ "THEx_FRAG_LZS_TEST", ++ "THEx_FRAG_LZS_KILL", ++ "", ++ "THEx_FRAG_PTILES", ++ "THEx_FRAG_TRANS_ELIM", ++ "THEx_QUAD_FPK_KILLER", ++ "", ++ "THEx_COMPUTE_ACTIVE", ++ "THEx_COMPUTE_TASKS", ++ "THEx_COMPUTE_WARPS", ++ "THEx_COMPUTE_STARVING", ++ "THEx_EXEC_CORE_ACTIVE", ++ "THEx_EXEC_ACTIVE", ++ "THEx_EXEC_INSTR_COUNT", ++ "THEx_EXEC_INSTR_DIVERGED", ++ "THEx_EXEC_INSTR_STARVING", ++ "THEx_ARITH_INSTR_SINGLE_FMA", ++ "THEx_ARITH_INSTR_DOUBLE", ++ "THEx_ARITH_INSTR_MSG", ++ "THEx_ARITH_INSTR_MSG_ONLY", ++ "THEx_TEX_INSTR", ++ "THEx_TEX_INSTR_MIPMAP", ++ "THEx_TEX_INSTR_COMPRESSED", ++ "THEx_TEX_INSTR_3D", ++ "THEx_TEX_INSTR_TRILINEAR", ++ "THEx_TEX_COORD_ISSUE", ++ "THEx_TEX_COORD_STALL", ++ "THEx_TEX_STARVE_CACHE", ++ "THEx_TEX_STARVE_FILTER", ++ "THEx_LS_MEM_READ_FULL", ++ "THEx_LS_MEM_READ_SHORT", ++ "THEx_LS_MEM_WRITE_FULL", ++ "THEx_LS_MEM_WRITE_SHORT", ++ "THEx_LS_MEM_ATOMIC", ++ "THEx_VARY_INSTR", ++ "THEx_VARY_SLOT_32", ++ "THEx_VARY_SLOT_16", ++ "THEx_ATTR_INSTR", ++ "THEx_ARITH_INSTR_FP_MUL", ++ "THEx_BEATS_RD_FTC", ++ "THEx_BEATS_RD_FTC_EXT", ++ "THEx_BEATS_RD_LSC", ++ "THEx_BEATS_RD_LSC_EXT", ++ "THEx_BEATS_RD_TEX", ++ "THEx_BEATS_RD_TEX_EXT", ++ "THEx_BEATS_RD_OTHER", ++ "THEx_BEATS_WR_LSC", ++ "THEx_BEATS_WR_TIB", ++ "", + -+ kbdev = kctx->kbdev; -+ atom_slot = katom->slot_nr; ++ /* Performance counters for the Memory System */ ++ "", ++ "", ++ "", ++ "", ++ "THEx_MMU_REQUESTS", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "THEx_L2_RD_MSG_IN", ++ "THEx_L2_RD_MSG_IN_STALL", ++ "THEx_L2_WR_MSG_IN", ++ "THEx_L2_WR_MSG_IN_STALL", ++ "THEx_L2_SNP_MSG_IN", ++ "THEx_L2_SNP_MSG_IN_STALL", ++ "THEx_L2_RD_MSG_OUT", ++ "THEx_L2_RD_MSG_OUT_STALL", ++ "THEx_L2_WR_MSG_OUT", ++ "THEx_L2_ANY_LOOKUP", ++ "THEx_L2_READ_LOOKUP", ++ "THEx_L2_WRITE_LOOKUP", ++ "THEx_L2_EXT_SNOOP_LOOKUP", ++ "THEx_L2_EXT_READ", ++ "THEx_L2_EXT_READ_NOSNP", ++ "THEx_L2_EXT_READ_UNIQUE", ++ "THEx_L2_EXT_READ_BEATS", ++ "THEx_L2_EXT_AR_STALL", ++ "THEx_L2_EXT_AR_CNT_Q1", ++ "THEx_L2_EXT_AR_CNT_Q2", ++ "THEx_L2_EXT_AR_CNT_Q3", ++ "THEx_L2_EXT_RRESP_0_127", ++ "THEx_L2_EXT_RRESP_128_191", ++ "THEx_L2_EXT_RRESP_192_255", ++ "THEx_L2_EXT_RRESP_256_319", ++ "THEx_L2_EXT_RRESP_320_383", ++ "THEx_L2_EXT_WRITE", ++ "THEx_L2_EXT_WRITE_NOSNP_FULL", ++ "THEx_L2_EXT_WRITE_NOSNP_PTL", ++ "THEx_L2_EXT_WRITE_SNP_FULL", ++ "THEx_L2_EXT_WRITE_SNP_PTL", ++ "THEx_L2_EXT_WRITE_BEATS", ++ "THEx_L2_EXT_W_STALL", ++ "THEx_L2_EXT_AW_CNT_Q1", ++ "THEx_L2_EXT_AW_CNT_Q2", ++ "THEx_L2_EXT_AW_CNT_Q3", ++ "THEx_L2_EXT_SNOOP", ++ "THEx_L2_EXT_SNOOP_STALL", ++ "THEx_L2_EXT_SNOOP_RESP_CLEAN", ++ "THEx_L2_EXT_SNOOP_RESP_DATA", ++ "THEx_L2_EXT_SNOOP_INTERNAL", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++}; + -+ js_kctx_info = &kctx->jctx.sched_info; -+ js_devdata = &kbdev->js_data; ++#endif /* _KBASE_GATOR_HWCNT_NAMES_THEX_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h +new file mode 100644 +index 000000000..5ea06770f +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h +@@ -0,0 +1,291 @@ ++/* ++ * ++ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + -+ mutex_lock(&js_devdata->runpool_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { -+ context_idle = !atomic_dec_return(&kctx->atoms_pulled); -+ atomic_dec(&kctx->atoms_pulled_slot[atom_slot]); -+ kctx->atoms_pulled_slot_pri[atom_slot][prio]--; ++/* ++ * This header was autogenerated, it should not be edited. ++ */ + -+ if (!atomic_read(&kctx->atoms_pulled) && -+ !kctx->slots_pullable) { -+ WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); -+ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); -+ atomic_dec(&kbdev->js_data.nr_contexts_runnable); -+ timer_sync = true; -+ } ++#ifndef _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ ++#define _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ + -+ /* If this slot has been blocked due to soft-stopped atoms, and -+ * all atoms have now been processed, then unblock the slot */ -+ if (!kctx->atoms_pulled_slot_pri[atom_slot][prio] -+ && kctx->blocked_js[atom_slot][prio]) { -+ kctx->blocked_js[atom_slot][prio] = false; -+ if (kbase_js_ctx_pullable(kctx, atom_slot, true)) -+ timer_sync |= -+ kbase_js_ctx_list_add_pullable_nolock( -+ kbdev, kctx, atom_slot); -+ } -+ } -+ WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)); ++static const char * const hardware_counters_mali_tMIx[] = { ++ /* Performance counters for the Job Manager */ ++ "", ++ "", ++ "", ++ "", ++ "TMIx_MESSAGES_SENT", ++ "TMIx_MESSAGES_RECEIVED", ++ "TMIx_GPU_ACTIVE", ++ "TMIx_IRQ_ACTIVE", ++ "TMIx_JS0_JOBS", ++ "TMIx_JS0_TASKS", ++ "TMIx_JS0_ACTIVE", ++ "", ++ "TMIx_JS0_WAIT_READ", ++ "TMIx_JS0_WAIT_ISSUE", ++ "TMIx_JS0_WAIT_DEPEND", ++ "TMIx_JS0_WAIT_FINISH", ++ "TMIx_JS1_JOBS", ++ "TMIx_JS1_TASKS", ++ "TMIx_JS1_ACTIVE", ++ "", ++ "TMIx_JS1_WAIT_READ", ++ "TMIx_JS1_WAIT_ISSUE", ++ "TMIx_JS1_WAIT_DEPEND", ++ "TMIx_JS1_WAIT_FINISH", ++ "TMIx_JS2_JOBS", ++ "TMIx_JS2_TASKS", ++ "TMIx_JS2_ACTIVE", ++ "", ++ "TMIx_JS2_WAIT_READ", ++ "TMIx_JS2_WAIT_ISSUE", ++ "TMIx_JS2_WAIT_DEPEND", ++ "TMIx_JS2_WAIT_FINISH", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", + -+ if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) && -+ jsctx_rb_none_to_pull(kctx, atom_slot)) { -+ if (!list_empty( -+ &kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot])) -+ timer_sync |= kbase_js_ctx_list_remove_nolock( -+ kctx->kbdev, kctx, atom_slot); -+ } ++ /* Performance counters for the Tiler */ ++ "", ++ "", ++ "", ++ "", ++ "TMIx_TILER_ACTIVE", ++ "TMIx_JOBS_PROCESSED", ++ "TMIx_TRIANGLES", ++ "TMIx_LINES", ++ "TMIx_POINTS", ++ "TMIx_FRONT_FACING", ++ "TMIx_BACK_FACING", ++ "TMIx_PRIM_VISIBLE", ++ "TMIx_PRIM_CULLED", ++ "TMIx_PRIM_CLIPPED", ++ "TMIx_PRIM_SAT_CULLED", ++ "", ++ "", ++ "TMIx_BUS_READ", ++ "", ++ "TMIx_BUS_WRITE", ++ "TMIx_LOADING_DESC", ++ "TMIx_IDVS_POS_SHAD_REQ", ++ "TMIx_IDVS_POS_SHAD_WAIT", ++ "TMIx_IDVS_POS_SHAD_STALL", ++ "TMIx_IDVS_POS_FIFO_FULL", ++ "TMIx_PREFETCH_STALL", ++ "TMIx_VCACHE_HIT", ++ "TMIx_VCACHE_MISS", ++ "TMIx_VCACHE_LINE_WAIT", ++ "TMIx_VFETCH_POS_READ_WAIT", ++ "TMIx_VFETCH_VERTEX_WAIT", ++ "TMIx_VFETCH_STALL", ++ "TMIx_PRIMASSY_STALL", ++ "TMIx_BBOX_GEN_STALL", ++ "TMIx_IDVS_VBU_HIT", ++ "TMIx_IDVS_VBU_MISS", ++ "TMIx_IDVS_VBU_LINE_DEALLOCATE", ++ "TMIx_IDVS_VAR_SHAD_REQ", ++ "TMIx_IDVS_VAR_SHAD_STALL", ++ "TMIx_BINNER_STALL", ++ "TMIx_ITER_STALL", ++ "TMIx_COMPRESS_MISS", ++ "TMIx_COMPRESS_STALL", ++ "TMIx_PCACHE_HIT", ++ "TMIx_PCACHE_MISS", ++ "TMIx_PCACHE_MISS_STALL", ++ "TMIx_PCACHE_EVICT_STALL", ++ "TMIx_PMGR_PTR_WR_STALL", ++ "TMIx_PMGR_PTR_RD_STALL", ++ "TMIx_PMGR_CMD_WR_STALL", ++ "TMIx_WRBUF_ACTIVE", ++ "TMIx_WRBUF_HIT", ++ "TMIx_WRBUF_MISS", ++ "TMIx_WRBUF_NO_FREE_LINE_STALL", ++ "TMIx_WRBUF_NO_AXI_ID_STALL", ++ "TMIx_WRBUF_AXI_STALL", ++ "", ++ "", ++ "", ++ "TMIx_UTLB_TRANS", ++ "TMIx_UTLB_TRANS_HIT", ++ "TMIx_UTLB_TRANS_STALL", ++ "TMIx_UTLB_TRANS_MISS_DELAY", ++ "TMIx_UTLB_MMU_REQ", + -+ /* -+ * If submission is disabled on this context (most likely due to an -+ * atom failure) and there are now no atoms left in the system then -+ * re-enable submission so that context can be scheduled again. -+ */ -+ if (!kbasep_js_is_submit_allowed(js_devdata, kctx) && -+ !atomic_read(&kctx->atoms_pulled) && -+ !kbase_ctx_flag(kctx, KCTX_DYING)) { -+ int js; ++ /* Performance counters for the Shader Core */ ++ "", ++ "", ++ "", ++ "", ++ "TMIx_FRAG_ACTIVE", ++ "TMIx_FRAG_PRIMITIVES", ++ "TMIx_FRAG_PRIM_RAST", ++ "TMIx_FRAG_FPK_ACTIVE", ++ "TMIx_FRAG_STARVING", ++ "TMIx_FRAG_WARPS", ++ "TMIx_FRAG_PARTIAL_WARPS", ++ "TMIx_FRAG_QUADS_RAST", ++ "TMIx_FRAG_QUADS_EZS_TEST", ++ "TMIx_FRAG_QUADS_EZS_UPDATE", ++ "TMIx_FRAG_QUADS_EZS_KILL", ++ "TMIx_FRAG_LZS_TEST", ++ "TMIx_FRAG_LZS_KILL", ++ "", ++ "TMIx_FRAG_PTILES", ++ "TMIx_FRAG_TRANS_ELIM", ++ "TMIx_QUAD_FPK_KILLER", ++ "", ++ "TMIx_COMPUTE_ACTIVE", ++ "TMIx_COMPUTE_TASKS", ++ "TMIx_COMPUTE_WARPS", ++ "TMIx_COMPUTE_STARVING", ++ "TMIx_EXEC_CORE_ACTIVE", ++ "TMIx_EXEC_ACTIVE", ++ "TMIx_EXEC_INSTR_COUNT", ++ "TMIx_EXEC_INSTR_DIVERGED", ++ "TMIx_EXEC_INSTR_STARVING", ++ "TMIx_ARITH_INSTR_SINGLE_FMA", ++ "TMIx_ARITH_INSTR_DOUBLE", ++ "TMIx_ARITH_INSTR_MSG", ++ "TMIx_ARITH_INSTR_MSG_ONLY", ++ "TMIx_TEX_INSTR", ++ "TMIx_TEX_INSTR_MIPMAP", ++ "TMIx_TEX_INSTR_COMPRESSED", ++ "TMIx_TEX_INSTR_3D", ++ "TMIx_TEX_INSTR_TRILINEAR", ++ "TMIx_TEX_COORD_ISSUE", ++ "TMIx_TEX_COORD_STALL", ++ "TMIx_TEX_STARVE_CACHE", ++ "TMIx_TEX_STARVE_FILTER", ++ "TMIx_LS_MEM_READ_FULL", ++ "TMIx_LS_MEM_READ_SHORT", ++ "TMIx_LS_MEM_WRITE_FULL", ++ "TMIx_LS_MEM_WRITE_SHORT", ++ "TMIx_LS_MEM_ATOMIC", ++ "TMIx_VARY_INSTR", ++ "TMIx_VARY_SLOT_32", ++ "TMIx_VARY_SLOT_16", ++ "TMIx_ATTR_INSTR", ++ "TMIx_ARITH_INSTR_FP_MUL", ++ "TMIx_BEATS_RD_FTC", ++ "TMIx_BEATS_RD_FTC_EXT", ++ "TMIx_BEATS_RD_LSC", ++ "TMIx_BEATS_RD_LSC_EXT", ++ "TMIx_BEATS_RD_TEX", ++ "TMIx_BEATS_RD_TEX_EXT", ++ "TMIx_BEATS_RD_OTHER", ++ "TMIx_BEATS_WR_LSC", ++ "TMIx_BEATS_WR_TIB", ++ "", + -+ kbasep_js_set_submit_allowed(js_devdata, kctx); ++ /* Performance counters for the Memory System */ ++ "", ++ "", ++ "", ++ "", ++ "TMIx_MMU_REQUESTS", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "TMIx_L2_RD_MSG_IN", ++ "TMIx_L2_RD_MSG_IN_STALL", ++ "TMIx_L2_WR_MSG_IN", ++ "TMIx_L2_WR_MSG_IN_STALL", ++ "TMIx_L2_SNP_MSG_IN", ++ "TMIx_L2_SNP_MSG_IN_STALL", ++ "TMIx_L2_RD_MSG_OUT", ++ "TMIx_L2_RD_MSG_OUT_STALL", ++ "TMIx_L2_WR_MSG_OUT", ++ "TMIx_L2_ANY_LOOKUP", ++ "TMIx_L2_READ_LOOKUP", ++ "TMIx_L2_WRITE_LOOKUP", ++ "TMIx_L2_EXT_SNOOP_LOOKUP", ++ "TMIx_L2_EXT_READ", ++ "TMIx_L2_EXT_READ_NOSNP", ++ "TMIx_L2_EXT_READ_UNIQUE", ++ "TMIx_L2_EXT_READ_BEATS", ++ "TMIx_L2_EXT_AR_STALL", ++ "TMIx_L2_EXT_AR_CNT_Q1", ++ "TMIx_L2_EXT_AR_CNT_Q2", ++ "TMIx_L2_EXT_AR_CNT_Q3", ++ "TMIx_L2_EXT_RRESP_0_127", ++ "TMIx_L2_EXT_RRESP_128_191", ++ "TMIx_L2_EXT_RRESP_192_255", ++ "TMIx_L2_EXT_RRESP_256_319", ++ "TMIx_L2_EXT_RRESP_320_383", ++ "TMIx_L2_EXT_WRITE", ++ "TMIx_L2_EXT_WRITE_NOSNP_FULL", ++ "TMIx_L2_EXT_WRITE_NOSNP_PTL", ++ "TMIx_L2_EXT_WRITE_SNP_FULL", ++ "TMIx_L2_EXT_WRITE_SNP_PTL", ++ "TMIx_L2_EXT_WRITE_BEATS", ++ "TMIx_L2_EXT_W_STALL", ++ "TMIx_L2_EXT_AW_CNT_Q1", ++ "TMIx_L2_EXT_AW_CNT_Q2", ++ "TMIx_L2_EXT_AW_CNT_Q3", ++ "TMIx_L2_EXT_SNOOP", ++ "TMIx_L2_EXT_SNOOP_STALL", ++ "TMIx_L2_EXT_SNOOP_RESP_CLEAN", ++ "TMIx_L2_EXT_SNOOP_RESP_DATA", ++ "TMIx_L2_EXT_SNOOP_INTERNAL", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++}; + -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ if (kbase_js_ctx_pullable(kctx, js, true)) -+ timer_sync |= -+ kbase_js_ctx_list_add_pullable_nolock( -+ kbdev, kctx, js); -+ } -+ } else if (katom->x_post_dep && -+ kbasep_js_is_submit_allowed(js_devdata, kctx)) { -+ int js; ++#endif /* _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h +new file mode 100644 +index 000000000..be09c4556 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h +@@ -0,0 +1,291 @@ ++/* ++ * ++ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ if (kbase_js_ctx_pullable(kctx, js, true)) -+ timer_sync |= -+ kbase_js_ctx_list_add_pullable_nolock( -+ kbdev, kctx, js); -+ } -+ } + -+ /* Mark context as inactive. The pm reference will be dropped later in -+ * jd_done_worker(). -+ */ -+ if (context_idle) -+ kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ if (timer_sync) -+ kbase_backend_ctx_count_changed(kbdev); -+ mutex_unlock(&js_devdata->runpool_mutex); ++/* ++ * This header was autogenerated, it should not be edited. ++ */ + -+ return context_idle; -+} ++#ifndef _KBASE_GATOR_HWCNT_NAMES_TSIX_H_ ++#define _KBASE_GATOR_HWCNT_NAMES_TSIX_H_ + -+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, -+ ktime_t *end_timestamp) -+{ -+ u64 microseconds_spent = 0; -+ struct kbase_device *kbdev; -+ struct kbase_context *kctx = katom->kctx; -+ struct kbase_jd_atom *x_dep = katom->x_post_dep; ++static const char * const hardware_counters_mali_tSIx[] = { ++ /* Performance counters for the Job Manager */ ++ "", ++ "", ++ "", ++ "", ++ "TSIx_MESSAGES_SENT", ++ "TSIx_MESSAGES_RECEIVED", ++ "TSIx_GPU_ACTIVE", ++ "TSIx_IRQ_ACTIVE", ++ "TSIx_JS0_JOBS", ++ "TSIx_JS0_TASKS", ++ "TSIx_JS0_ACTIVE", ++ "", ++ "TSIx_JS0_WAIT_READ", ++ "TSIx_JS0_WAIT_ISSUE", ++ "TSIx_JS0_WAIT_DEPEND", ++ "TSIx_JS0_WAIT_FINISH", ++ "TSIx_JS1_JOBS", ++ "TSIx_JS1_TASKS", ++ "TSIx_JS1_ACTIVE", ++ "", ++ "TSIx_JS1_WAIT_READ", ++ "TSIx_JS1_WAIT_ISSUE", ++ "TSIx_JS1_WAIT_DEPEND", ++ "TSIx_JS1_WAIT_FINISH", ++ "TSIx_JS2_JOBS", ++ "TSIx_JS2_TASKS", ++ "TSIx_JS2_ACTIVE", ++ "", ++ "TSIx_JS2_WAIT_READ", ++ "TSIx_JS2_WAIT_ISSUE", ++ "TSIx_JS2_WAIT_DEPEND", ++ "TSIx_JS2_WAIT_FINISH", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", + -+ kbdev = kctx->kbdev; ++ /* Performance counters for the Tiler */ ++ "", ++ "", ++ "", ++ "", ++ "TSIx_TILER_ACTIVE", ++ "TSIx_JOBS_PROCESSED", ++ "TSIx_TRIANGLES", ++ "TSIx_LINES", ++ "TSIx_POINTS", ++ "TSIx_FRONT_FACING", ++ "TSIx_BACK_FACING", ++ "TSIx_PRIM_VISIBLE", ++ "TSIx_PRIM_CULLED", ++ "TSIx_PRIM_CLIPPED", ++ "TSIx_PRIM_SAT_CULLED", ++ "", ++ "", ++ "TSIx_BUS_READ", ++ "", ++ "TSIx_BUS_WRITE", ++ "TSIx_LOADING_DESC", ++ "TSIx_IDVS_POS_SHAD_REQ", ++ "TSIx_IDVS_POS_SHAD_WAIT", ++ "TSIx_IDVS_POS_SHAD_STALL", ++ "TSIx_IDVS_POS_FIFO_FULL", ++ "TSIx_PREFETCH_STALL", ++ "TSIx_VCACHE_HIT", ++ "TSIx_VCACHE_MISS", ++ "TSIx_VCACHE_LINE_WAIT", ++ "TSIx_VFETCH_POS_READ_WAIT", ++ "TSIx_VFETCH_VERTEX_WAIT", ++ "TSIx_VFETCH_STALL", ++ "TSIx_PRIMASSY_STALL", ++ "TSIx_BBOX_GEN_STALL", ++ "TSIx_IDVS_VBU_HIT", ++ "TSIx_IDVS_VBU_MISS", ++ "TSIx_IDVS_VBU_LINE_DEALLOCATE", ++ "TSIx_IDVS_VAR_SHAD_REQ", ++ "TSIx_IDVS_VAR_SHAD_STALL", ++ "TSIx_BINNER_STALL", ++ "TSIx_ITER_STALL", ++ "TSIx_COMPRESS_MISS", ++ "TSIx_COMPRESS_STALL", ++ "TSIx_PCACHE_HIT", ++ "TSIx_PCACHE_MISS", ++ "TSIx_PCACHE_MISS_STALL", ++ "TSIx_PCACHE_EVICT_STALL", ++ "TSIx_PMGR_PTR_WR_STALL", ++ "TSIx_PMGR_PTR_RD_STALL", ++ "TSIx_PMGR_CMD_WR_STALL", ++ "TSIx_WRBUF_ACTIVE", ++ "TSIx_WRBUF_HIT", ++ "TSIx_WRBUF_MISS", ++ "TSIx_WRBUF_NO_FREE_LINE_STALL", ++ "TSIx_WRBUF_NO_AXI_ID_STALL", ++ "TSIx_WRBUF_AXI_STALL", ++ "", ++ "", ++ "", ++ "TSIx_UTLB_TRANS", ++ "TSIx_UTLB_TRANS_HIT", ++ "TSIx_UTLB_TRANS_STALL", ++ "TSIx_UTLB_TRANS_MISS_DELAY", ++ "TSIx_UTLB_MMU_REQ", + ++ /* Performance counters for the Shader Core */ ++ "", ++ "", ++ "", ++ "", ++ "TSIx_FRAG_ACTIVE", ++ "TSIx_FRAG_PRIMITIVES", ++ "TSIx_FRAG_PRIM_RAST", ++ "TSIx_FRAG_FPK_ACTIVE", ++ "TSIx_FRAG_STARVING", ++ "TSIx_FRAG_WARPS", ++ "TSIx_FRAG_PARTIAL_WARPS", ++ "TSIx_FRAG_QUADS_RAST", ++ "TSIx_FRAG_QUADS_EZS_TEST", ++ "TSIx_FRAG_QUADS_EZS_UPDATE", ++ "TSIx_FRAG_QUADS_EZS_KILL", ++ "TSIx_FRAG_LZS_TEST", ++ "TSIx_FRAG_LZS_KILL", ++ "", ++ "TSIx_FRAG_PTILES", ++ "TSIx_FRAG_TRANS_ELIM", ++ "TSIx_QUAD_FPK_KILLER", ++ "", ++ "TSIx_COMPUTE_ACTIVE", ++ "TSIx_COMPUTE_TASKS", ++ "TSIx_COMPUTE_WARPS", ++ "TSIx_COMPUTE_STARVING", ++ "TSIx_EXEC_CORE_ACTIVE", ++ "TSIx_EXEC_ACTIVE", ++ "TSIx_EXEC_INSTR_COUNT", ++ "TSIx_EXEC_INSTR_DIVERGED", ++ "TSIx_EXEC_INSTR_STARVING", ++ "TSIx_ARITH_INSTR_SINGLE_FMA", ++ "TSIx_ARITH_INSTR_DOUBLE", ++ "TSIx_ARITH_INSTR_MSG", ++ "TSIx_ARITH_INSTR_MSG_ONLY", ++ "TSIx_TEX_MSGI_NUM_QUADS", ++ "TSIx_TEX_DFCH_NUM_PASSES", ++ "TSIx_TEX_DFCH_NUM_PASSES_MISS", ++ "TSIx_TEX_DFCH_NUM_PASSES_MIP_MAP", ++ "TSIx_TEX_TIDX_NUM_SPLIT_MIP_MAP", ++ "TSIx_TEX_TFCH_NUM_LINES_FETCHED", ++ "TSIx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED", ++ "TSIx_TEX_TFCH_NUM_OPERATIONS", ++ "TSIx_TEX_FILT_NUM_OPERATIONS", ++ "TSIx_LS_MEM_READ_FULL", ++ "TSIx_LS_MEM_READ_SHORT", ++ "TSIx_LS_MEM_WRITE_FULL", ++ "TSIx_LS_MEM_WRITE_SHORT", ++ "TSIx_LS_MEM_ATOMIC", ++ "TSIx_VARY_INSTR", ++ "TSIx_VARY_SLOT_32", ++ "TSIx_VARY_SLOT_16", ++ "TSIx_ATTR_INSTR", ++ "TSIx_ARITH_INSTR_FP_MUL", ++ "TSIx_BEATS_RD_FTC", ++ "TSIx_BEATS_RD_FTC_EXT", ++ "TSIx_BEATS_RD_LSC", ++ "TSIx_BEATS_RD_LSC_EXT", ++ "TSIx_BEATS_RD_TEX", ++ "TSIx_BEATS_RD_TEX_EXT", ++ "TSIx_BEATS_RD_OTHER", ++ "TSIx_BEATS_WR_LSC", ++ "TSIx_BEATS_WR_TIB", ++ "", + -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ /* Performance counters for the Memory System */ ++ "", ++ "", ++ "", ++ "", ++ "TSIx_MMU_REQUESTS", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "TSIx_L2_RD_MSG_IN", ++ "TSIx_L2_RD_MSG_IN_STALL", ++ "TSIx_L2_WR_MSG_IN", ++ "TSIx_L2_WR_MSG_IN_STALL", ++ "TSIx_L2_SNP_MSG_IN", ++ "TSIx_L2_SNP_MSG_IN_STALL", ++ "TSIx_L2_RD_MSG_OUT", ++ "TSIx_L2_RD_MSG_OUT_STALL", ++ "TSIx_L2_WR_MSG_OUT", ++ "TSIx_L2_ANY_LOOKUP", ++ "TSIx_L2_READ_LOOKUP", ++ "TSIx_L2_WRITE_LOOKUP", ++ "TSIx_L2_EXT_SNOOP_LOOKUP", ++ "TSIx_L2_EXT_READ", ++ "TSIx_L2_EXT_READ_NOSNP", ++ "TSIx_L2_EXT_READ_UNIQUE", ++ "TSIx_L2_EXT_READ_BEATS", ++ "TSIx_L2_EXT_AR_STALL", ++ "TSIx_L2_EXT_AR_CNT_Q1", ++ "TSIx_L2_EXT_AR_CNT_Q2", ++ "TSIx_L2_EXT_AR_CNT_Q3", ++ "TSIx_L2_EXT_RRESP_0_127", ++ "TSIx_L2_EXT_RRESP_128_191", ++ "TSIx_L2_EXT_RRESP_192_255", ++ "TSIx_L2_EXT_RRESP_256_319", ++ "TSIx_L2_EXT_RRESP_320_383", ++ "TSIx_L2_EXT_WRITE", ++ "TSIx_L2_EXT_WRITE_NOSNP_FULL", ++ "TSIx_L2_EXT_WRITE_NOSNP_PTL", ++ "TSIx_L2_EXT_WRITE_SNP_FULL", ++ "TSIx_L2_EXT_WRITE_SNP_PTL", ++ "TSIx_L2_EXT_WRITE_BEATS", ++ "TSIx_L2_EXT_W_STALL", ++ "TSIx_L2_EXT_AW_CNT_Q1", ++ "TSIx_L2_EXT_AW_CNT_Q2", ++ "TSIx_L2_EXT_AW_CNT_Q3", ++ "TSIx_L2_EXT_SNOOP", ++ "TSIx_L2_EXT_SNOOP_STALL", ++ "TSIx_L2_EXT_SNOOP_RESP_CLEAN", ++ "TSIx_L2_EXT_SNOOP_RESP_DATA", ++ "TSIx_L2_EXT_SNOOP_INTERNAL", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++ "", ++}; + -+ if (katom->will_fail_event_code) -+ katom->event_code = katom->will_fail_event_code; ++#endif /* _KBASE_GATOR_HWCNT_NAMES_TSIX_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h +new file mode 100644 +index 000000000..42f0111c4 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h +@@ -0,0 +1,123 @@ ++/* ++ * ++ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED; + -+ if (katom->event_code != BASE_JD_EVENT_DONE) { -+ kbase_js_evict_deps(kctx, katom, katom->slot_nr, -+ katom->sched_priority); -+ } ++#ifndef _KBASE_GPU_ID_H_ ++#define _KBASE_GPU_ID_H_ + -+#if defined(CONFIG_MALI_GATOR_SUPPORT) -+ kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP, -+ katom->slot_nr), NULL, 0); -+#endif ++/* GPU_ID register */ ++#define GPU_ID_VERSION_STATUS_SHIFT 0 ++#define GPU_ID_VERSION_MINOR_SHIFT 4 ++#define GPU_ID_VERSION_MAJOR_SHIFT 12 ++#define GPU_ID_VERSION_PRODUCT_ID_SHIFT 16 ++#define GPU_ID_VERSION_STATUS (0xF << GPU_ID_VERSION_STATUS_SHIFT) ++#define GPU_ID_VERSION_MINOR (0xFF << GPU_ID_VERSION_MINOR_SHIFT) ++#define GPU_ID_VERSION_MAJOR (0xF << GPU_ID_VERSION_MAJOR_SHIFT) ++#define GPU_ID_VERSION_PRODUCT_ID (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT) + -+ /* Calculate the job's time used */ -+ if (end_timestamp != NULL) { -+ /* Only calculating it for jobs that really run on the HW (e.g. -+ * removed from next jobs never actually ran, so really did take -+ * zero time) */ -+ ktime_t tick_diff = ktime_sub(*end_timestamp, -+ katom->start_timestamp); ++/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */ ++#define GPU_ID_PI_T60X 0x6956 ++#define GPU_ID_PI_T62X 0x0620 ++#define GPU_ID_PI_T76X 0x0750 ++#define GPU_ID_PI_T72X 0x0720 ++#define GPU_ID_PI_TFRX 0x0880 ++#define GPU_ID_PI_T86X 0x0860 ++#define GPU_ID_PI_T82X 0x0820 ++#define GPU_ID_PI_T83X 0x0830 + -+ microseconds_spent = ktime_to_ns(tick_diff); ++/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */ ++#define GPU_ID_PI_NEW_FORMAT_START 0x1000 ++#define GPU_ID_IS_NEW_FORMAT(product_id) ((product_id) != GPU_ID_PI_T60X && \ ++ (product_id) >= \ ++ GPU_ID_PI_NEW_FORMAT_START) + -+ do_div(microseconds_spent, 1000); ++#define GPU_ID2_VERSION_STATUS_SHIFT 0 ++#define GPU_ID2_VERSION_MINOR_SHIFT 4 ++#define GPU_ID2_VERSION_MAJOR_SHIFT 12 ++#define GPU_ID2_PRODUCT_MAJOR_SHIFT 16 ++#define GPU_ID2_ARCH_REV_SHIFT 20 ++#define GPU_ID2_ARCH_MINOR_SHIFT 24 ++#define GPU_ID2_ARCH_MAJOR_SHIFT 28 ++#define GPU_ID2_VERSION_STATUS (0xF << GPU_ID2_VERSION_STATUS_SHIFT) ++#define GPU_ID2_VERSION_MINOR (0xFF << GPU_ID2_VERSION_MINOR_SHIFT) ++#define GPU_ID2_VERSION_MAJOR (0xF << GPU_ID2_VERSION_MAJOR_SHIFT) ++#define GPU_ID2_PRODUCT_MAJOR (0xF << GPU_ID2_PRODUCT_MAJOR_SHIFT) ++#define GPU_ID2_ARCH_REV (0xF << GPU_ID2_ARCH_REV_SHIFT) ++#define GPU_ID2_ARCH_MINOR (0xF << GPU_ID2_ARCH_MINOR_SHIFT) ++#define GPU_ID2_ARCH_MAJOR (0xF << GPU_ID2_ARCH_MAJOR_SHIFT) ++#define GPU_ID2_PRODUCT_MODEL (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR) ++#define GPU_ID2_VERSION (GPU_ID2_VERSION_MAJOR | \ ++ GPU_ID2_VERSION_MINOR | \ ++ GPU_ID2_VERSION_STATUS) + -+ /* Round up time spent to the minimum timer resolution */ -+ if (microseconds_spent < KBASEP_JS_TICK_RESOLUTION_US) -+ microseconds_spent = KBASEP_JS_TICK_RESOLUTION_US; -+ } ++/* Helper macro to create a partial GPU_ID (new format) that defines ++ a product ignoring its version. */ ++#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \ ++ (((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ ++ ((arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) | \ ++ ((arch_rev) << GPU_ID2_ARCH_REV_SHIFT) | \ ++ ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) ++ ++/* Helper macro to create a partial GPU_ID (new format) that specifies the ++ revision (major, minor, status) of a product */ ++#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \ ++ (((version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) | \ ++ ((version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) | \ ++ ((version_status) << GPU_ID2_VERSION_STATUS_SHIFT)) + ++/* Helper macro to create a complete GPU_ID (new format) */ ++#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \ ++ version_major, version_minor, version_status) \ ++ (GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \ ++ product_major) | \ ++ GPU_ID2_VERSION_MAKE(version_major, version_minor, \ ++ version_status)) + -+ kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0); ++/* Helper macro to create a partial GPU_ID (new format) that identifies ++ a particular GPU model by its arch_major and product_major. */ ++#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \ ++ (((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ ++ ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) + -+ /* Unblock cross dependency if present */ -+ if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE || -+ !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) && -+ (x_dep->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { -+ bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr, -+ false); -+ x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; -+ kbase_js_move_to_tree(x_dep); -+ if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr, -+ false)) -+ kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, -+ x_dep->slot_nr); ++/* Strip off the non-relevant bits from a product_id value and make it suitable ++ for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU ++ model. */ ++#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \ ++ (((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \ ++ GPU_ID2_PRODUCT_MODEL) + -+ if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) -+ return x_dep; ++#define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6u, 0) ++#define GPU_ID2_PRODUCT_THEX GPU_ID2_MODEL_MAKE(6u, 1) ++#define GPU_ID2_PRODUCT_TSIX GPU_ID2_MODEL_MAKE(7u, 0) ++#ifdef MALI_INCLUDE_TKAX ++#define GPU_ID2_PRODUCT_TKAX GPU_ID2_MODEL_MAKE(9u, 0) ++#endif /* MALI_INCLUDE_TKAX */ ++#ifdef MALI_INCLUDE_TTRX ++#define GPU_ID2_PRODUCT_TTRX GPU_ID2_MODEL_MAKE(10u, 0) ++#endif /* MALI_INCLUDE_TTRX */ ++ ++/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */ ++#define GPU_ID_S_15DEV0 0x1 ++#define GPU_ID_S_EAC 0x2 ++ ++/* Helper macro to create a GPU_ID assuming valid values for id, major, ++ minor, status */ ++#define GPU_ID_MAKE(id, major, minor, status) \ ++ (((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ ++ ((major) << GPU_ID_VERSION_MAJOR_SHIFT) | \ ++ ((minor) << GPU_ID_VERSION_MINOR_SHIFT) | \ ++ ((status) << GPU_ID_VERSION_STATUS_SHIFT)) ++ ++#endif /* _KBASE_GPU_ID_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c +new file mode 100644 +index 000000000..6df0a1cb1 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c +@@ -0,0 +1,97 @@ ++/* ++ * ++ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ ++ ++ ++ ++#include ++ ++#ifdef CONFIG_DEBUG_FS ++/** Show callback for the @c gpu_memory debugfs file. ++ * ++ * This function is called to get the contents of the @c gpu_memory debugfs ++ * file. This is a report of current gpu memory usage. ++ * ++ * @param sfile The debugfs entry ++ * @param data Data associated with the entry ++ * ++ * @return 0 if successfully prints data in debugfs entry file ++ * -1 if it encountered an error ++ */ ++ ++static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) ++{ ++ struct list_head *entry; ++ const struct list_head *kbdev_list; ++ ++ kbdev_list = kbase_dev_list_get(); ++ list_for_each(entry, kbdev_list) { ++ struct kbase_device *kbdev = NULL; ++ struct kbasep_kctx_list_element *element; ++ ++ kbdev = list_entry(entry, struct kbase_device, entry); ++ /* output the total memory usage and cap for this device */ ++ seq_printf(sfile, "%-16s %10u\n", ++ kbdev->devname, ++ atomic_read(&(kbdev->memdev.used_pages))); ++ mutex_lock(&kbdev->kctx_list_lock); ++ list_for_each_entry(element, &kbdev->kctx_list, link) { ++ /* output the memory usage and cap for each kctx ++ * opened on this device */ ++ seq_printf(sfile, " %s-0x%p %10u\n", ++ "kctx", ++ element->kctx, ++ atomic_read(&(element->kctx->used_pages))); ++ } ++ mutex_unlock(&kbdev->kctx_list_lock); + } ++ kbase_dev_list_put(kbdev_list); ++ return 0; ++} + -+ return NULL; ++/* ++ * File operations related to debugfs entry for gpu_memory ++ */ ++static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file) ++{ ++ return single_open(file, kbasep_gpu_memory_seq_show , NULL); +} + -+void kbase_js_sched(struct kbase_device *kbdev, int js_mask) ++static const struct file_operations kbasep_gpu_memory_debugfs_fops = { ++ .open = kbasep_gpu_memory_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++/* ++ * Initialize debugfs entry for gpu_memory ++ */ ++void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) +{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbase_context *last_active; -+ bool timer_sync = false; -+ bool ctx_waiting = false; ++ debugfs_create_file("gpu_memory", S_IRUGO, ++ kbdev->mali_debugfs_directory, NULL, ++ &kbasep_gpu_memory_debugfs_fops); ++ return; ++} + -+ js_devdata = &kbdev->js_data; ++#else ++/* ++ * Stub functions for when debugfs is disabled ++ */ ++void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) ++{ ++ return; ++} ++#endif +diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h +new file mode 100644 +index 000000000..7045693eb +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h +@@ -0,0 +1,37 @@ ++/* ++ * ++ * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ down(&js_devdata->schedule_sem); -+ mutex_lock(&js_devdata->queue_mutex); + -+ last_active = kbdev->hwaccess.active_kctx; + -+ while (js_mask) { -+ int js; + -+ js = ffs(js_mask) - 1; + -+ while (1) { -+ struct kbase_context *kctx; -+ unsigned long flags; -+ bool context_idle = false; ++/** ++ * @file mali_kbase_gpu_memory_debugfs.h ++ * Header file for gpu_memory entry in debugfs ++ * ++ */ + -+ kctx = kbase_js_ctx_list_pop_head(kbdev, js); ++#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H ++#define _KBASE_GPU_MEMORY_DEBUGFS_H + -+ if (!kctx) { -+ js_mask &= ~(1 << js); -+ break; /* No contexts on pullable list */ -+ } ++#include ++#include + -+ if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) { -+ context_idle = true; ++/** ++ * @brief Initialize gpu_memory debugfs entry ++ */ ++void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev); + -+ if (kbase_pm_context_active_handle_suspend( -+ kbdev, -+ KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { -+ /* Suspend pending - return context to -+ * queue and stop scheduling */ -+ mutex_lock( -+ &kctx->jctx.sched_info.ctx.jsctx_mutex); -+ if (kbase_js_ctx_list_add_pullable_head( -+ kctx->kbdev, kctx, js)) -+ kbase_js_sync_timers(kbdev); -+ mutex_unlock( -+ &kctx->jctx.sched_info.ctx.jsctx_mutex); -+ mutex_unlock(&js_devdata->queue_mutex); -+ up(&js_devdata->schedule_sem); -+ return; -+ } -+ kbase_ctx_flag_set(kctx, KCTX_ACTIVE); -+ } ++#endif /*_KBASE_GPU_MEMORY_DEBUGFS_H*/ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c +new file mode 100644 +index 000000000..a947a2e03 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c +@@ -0,0 +1,510 @@ ++/* ++ * ++ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ if (!kbase_js_use_ctx(kbdev, kctx)) { -+ mutex_lock( -+ &kctx->jctx.sched_info.ctx.jsctx_mutex); -+ /* Context can not be used at this time */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ if (kbase_js_ctx_pullable(kctx, js, false) -+ || kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) -+ timer_sync |= -+ kbase_js_ctx_list_add_pullable_head_nolock( -+ kctx->kbdev, kctx, js); -+ else -+ timer_sync |= -+ kbase_js_ctx_list_add_unpullable_nolock( -+ kctx->kbdev, kctx, js); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, -+ flags); -+ mutex_unlock( -+ &kctx->jctx.sched_info.ctx.jsctx_mutex); -+ if (context_idle) { -+ WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); -+ kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); -+ kbase_pm_context_idle(kbdev); -+ } + -+ /* No more jobs can be submitted on this slot */ -+ js_mask &= ~(1 << js); -+ break; -+ } -+ mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ kbase_ctx_flag_clear(kctx, KCTX_PULLED); + -+ if (!kbase_jm_kick(kbdev, 1 << js)) -+ /* No more jobs can be submitted on this slot */ -+ js_mask &= ~(1 << js); + -+ if (!kbase_ctx_flag(kctx, KCTX_PULLED)) { -+ bool pullable = kbase_js_ctx_pullable(kctx, js, -+ true); ++/* ++ * Base kernel property query APIs ++ */ + -+ /* Failed to pull jobs - push to head of list. -+ * Unless this context is already 'active', in -+ * which case it's effectively already scheduled -+ * so push it to the back of the list. */ -+ if (pullable && kctx == last_active) -+ timer_sync |= -+ kbase_js_ctx_list_add_pullable_nolock( -+ kctx->kbdev, -+ kctx, js); -+ else if (pullable) -+ timer_sync |= -+ kbase_js_ctx_list_add_pullable_head_nolock( -+ kctx->kbdev, -+ kctx, js); -+ else -+ timer_sync |= -+ kbase_js_ctx_list_add_unpullable_nolock( -+ kctx->kbdev, -+ kctx, js); ++#include ++#include ++#include ++#include ++#include ++#include "mali_kbase_ioctl.h" ++#include + -+ /* If this context is not the active context, -+ * but the active context is pullable on this -+ * slot, then we need to remove the active -+ * marker to prevent it from submitting atoms in -+ * the IRQ handler, which would prevent this -+ * context from making progress. */ -+ if (last_active && kctx != last_active && -+ kbase_js_ctx_pullable( -+ last_active, js, true)) -+ ctx_waiting = true; ++/** ++ * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield. ++ * @value: The value from which to extract bits. ++ * @offset: The first bit to extract (0 being the LSB). ++ * @size: The number of bits to extract. ++ * ++ * Context: @offset + @size <= 32. ++ * ++ * Return: Bits [@offset, @offset + @size) from @value. ++ */ ++/* from mali_cdsb.h */ ++#define KBASE_UBFX32(value, offset, size) \ ++ (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1)) + -+ if (context_idle) { -+ kbase_jm_idle_ctx(kbdev, kctx); -+ spin_unlock_irqrestore( -+ &kbdev->hwaccess_lock, -+ flags); -+ WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); -+ kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); -+ kbase_pm_context_idle(kbdev); -+ } else { -+ spin_unlock_irqrestore( -+ &kbdev->hwaccess_lock, -+ flags); -+ } -+ mutex_unlock( -+ &kctx->jctx.sched_info.ctx.jsctx_mutex); ++int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props) ++{ ++ kbase_gpu_clk_speed_func get_gpu_speed_mhz; ++ u32 gpu_speed_mhz; ++ int rc = 1; + -+ js_mask &= ~(1 << js); -+ break; /* Could not run atoms on this slot */ -+ } ++ KBASE_DEBUG_ASSERT(NULL != kctx); ++ KBASE_DEBUG_ASSERT(NULL != kbase_props); + -+ /* Push to back of list */ -+ if (kbase_js_ctx_pullable(kctx, js, true)) -+ timer_sync |= -+ kbase_js_ctx_list_add_pullable_nolock( -+ kctx->kbdev, kctx, js); -+ else -+ timer_sync |= -+ kbase_js_ctx_list_add_unpullable_nolock( -+ kctx->kbdev, kctx, js); ++ /* Current GPU speed is requested from the system integrator via the GPU_SPEED_FUNC function. ++ * If that function fails, or the function is not provided by the system integrator, we report the maximum ++ * GPU speed as specified by GPU_FREQ_KHZ_MAX. ++ */ ++ get_gpu_speed_mhz = (kbase_gpu_clk_speed_func) GPU_SPEED_FUNC; ++ if (get_gpu_speed_mhz != NULL) { ++ rc = get_gpu_speed_mhz(&gpu_speed_mhz); ++#ifdef CONFIG_MALI_DEBUG ++ /* Issue a warning message when the reported GPU speed falls outside the min/max range */ ++ if (rc == 0) { ++ u32 gpu_speed_khz = gpu_speed_mhz * 1000; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); ++ if (gpu_speed_khz < kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min || ++ gpu_speed_khz > kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max) ++ dev_warn(kctx->kbdev->dev, "GPU Speed is outside of min/max range (got %lu Khz, min %lu Khz, max %lu Khz)\n", ++ (unsigned long)gpu_speed_khz, ++ (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min, ++ (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max); + } ++#endif /* CONFIG_MALI_DEBUG */ ++ } ++ if (kctx->kbdev->clock) { ++ gpu_speed_mhz = clk_get_rate(kctx->kbdev->clock) / 1000000; ++ rc = 0; + } ++ if (rc != 0) ++ gpu_speed_mhz = kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max / 1000; + -+ if (timer_sync) -+ kbase_js_sync_timers(kbdev); ++ kctx->kbdev->gpu_props.props.core_props.gpu_speed_mhz = gpu_speed_mhz; + -+ if (kbdev->hwaccess.active_kctx == last_active && ctx_waiting) -+ kbdev->hwaccess.active_kctx = NULL; ++ memcpy(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props)); + -+ mutex_unlock(&js_devdata->queue_mutex); -+ up(&js_devdata->schedule_sem); ++ /* Before API 8.2 they expect L3 cache info here, which was always 0 */ ++ if (kctx->api_version < KBASE_API_VERSION(8, 2)) ++ kbase_props->props.raw_props.suspend_size = 0; ++ ++ return 0; +} + -+void kbase_js_zap_context(struct kbase_context *kctx) ++static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props) +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; -+ struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; -+ int js; ++ struct mali_base_gpu_coherent_group *current_group; ++ u64 group_present; ++ u64 group_mask; ++ u64 first_set, first_set_prev; ++ u32 num_groups = 0; + -+ /* -+ * Critical assumption: No more submission is possible outside of the -+ * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs) -+ * whilst the struct kbase_context is terminating. -+ */ ++ KBASE_DEBUG_ASSERT(NULL != props); + -+ /* First, atomically do the following: -+ * - mark the context as dying -+ * - try to evict it from the queue */ -+ mutex_lock(&kctx->jctx.lock); -+ mutex_lock(&js_devdata->queue_mutex); -+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); -+ kbase_ctx_flag_set(kctx, KCTX_DYING); ++ props->coherency_info.coherency = props->raw_props.mem_features; ++ props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present); + -+ dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx); ++ if (props->coherency_info.coherency & GROUPS_L2_COHERENT) { ++ /* Group is l2 coherent */ ++ group_present = props->raw_props.l2_present; ++ } else { ++ /* Group is l1 coherent */ ++ group_present = props->raw_props.shader_present; ++ } + + /* -+ * At this point we know: -+ * - If eviction succeeded, it was in the queue, but now no -+ * longer is -+ * - We must cancel the jobs here. No Power Manager active reference to -+ * release. -+ * - This happens asynchronously - kbase_jd_zap_context() will wait for -+ * those jobs to be killed. -+ * - If eviction failed, then it wasn't in the queue. It is one -+ * of the following: -+ * - a. it didn't have any jobs, and so is not in the Queue or -+ * the Run Pool (not scheduled) -+ * - Hence, no more work required to cancel jobs. No Power Manager -+ * active reference to release. -+ * - b. it was in the middle of a scheduling transaction (and thus must -+ * have at least 1 job). This can happen from a syscall or a -+ * kernel thread. We still hold the jsctx_mutex, and so the thread -+ * must be waiting inside kbasep_js_try_schedule_head_ctx(), -+ * before checking whether the runpool is full. That thread will -+ * continue after we drop the mutex, and will notice the context -+ * is dying. It will rollback the transaction, killing all jobs at -+ * the same time. kbase_jd_zap_context() will wait for those jobs -+ * to be killed. -+ * - Hence, no more work required to cancel jobs, or to release the -+ * Power Manager active reference. -+ * - c. it is scheduled, and may or may not be running jobs -+ * - We must cause it to leave the runpool by stopping it from -+ * submitting any more jobs. When it finally does leave, -+ * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs -+ * (because it is dying), release the Power Manager active reference, -+ * and will not requeue the context in the queue. -+ * kbase_jd_zap_context() will wait for those jobs to be killed. -+ * - Hence, work required just to make it leave the runpool. Cancelling -+ * jobs and releasing the Power manager active reference will be -+ * handled when it leaves the runpool. ++ * The coherent group mask can be computed from the l2 present ++ * register. ++ * ++ * For the coherent group n: ++ * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1) ++ * where first_set is group_present with only its nth set-bit kept ++ * (i.e. the position from where a new group starts). ++ * ++ * For instance if the groups are l2 coherent and l2_present=0x0..01111: ++ * The first mask is: ++ * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1) ++ * = (0x0..010 - 1) & ~(0x0..01 - 1) ++ * = 0x0..00f ++ * The second mask is: ++ * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1) ++ * = (0x0..100 - 1) & ~(0x0..010 - 1) ++ * = 0x0..0f0 ++ * And so on until all the bits from group_present have been cleared ++ * (i.e. there is no group left). + */ -+ if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { -+ if (!list_empty( -+ &kctx->jctx.sched_info.ctx.ctx_list_entry[js])) -+ list_del_init( -+ &kctx->jctx.sched_info.ctx.ctx_list_entry[js]); -+ } -+ -+ /* The following events require us to kill off remaining jobs -+ * and update PM book-keeping: -+ * - we evicted it correctly (it must have jobs to be in the -+ * Queue) -+ * -+ * These events need no action, but take this path anyway: -+ * - Case a: it didn't have any jobs, and was never in the Queue -+ * - Case b: scheduling transaction will be partially rolled- -+ * back (this already cancels the jobs) -+ */ -+ -+ KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, -+ kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+ dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx); ++ current_group = props->coherency_info.group; ++ first_set = group_present & ~(group_present - 1); + -+ /* Only cancel jobs when we evicted from the -+ * queue. No Power Manager active reference was held. -+ * -+ * Having is_dying set ensures that this kills, and -+ * doesn't requeue */ -+ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false); ++ while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) { ++ group_present -= first_set; /* Clear the current group bit */ ++ first_set_prev = first_set; + -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_unlock(&js_devdata->queue_mutex); -+ mutex_unlock(&kctx->jctx.lock); -+ } else { -+ unsigned long flags; -+ bool was_retained; ++ first_set = group_present & ~(group_present - 1); ++ group_mask = (first_set - 1) & ~(first_set_prev - 1); + -+ /* Case c: didn't evict, but it is scheduled - it's in the Run -+ * Pool */ -+ KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, -+ kbase_ctx_flag(kctx, KCTX_SCHEDULED)); -+ dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx); ++ /* Populate the coherent_group structure for each group */ ++ current_group->core_mask = group_mask & props->raw_props.shader_present; ++ current_group->num_cores = hweight64(current_group->core_mask); + -+ /* Disable the ctx from submitting any more jobs */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ num_groups++; ++ current_group++; ++ } + -+ kbasep_js_clear_submit_allowed(js_devdata, kctx); ++ if (group_present != 0) ++ pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS); + -+ /* Retain and (later) release the context whilst it is is now -+ * disallowed from submitting jobs - ensures that someone -+ * somewhere will be removing the context later on */ -+ was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); ++ props->coherency_info.num_groups = num_groups; ++} + -+ /* Since it's scheduled and we have the jsctx_mutex, it must be -+ * retained successfully */ -+ KBASE_DEBUG_ASSERT(was_retained); ++/** ++ * kbase_gpuprops_get_props - Get the GPU configuration ++ * @gpu_props: The &base_gpu_props structure ++ * @kbdev: The &struct kbase_device structure for the device ++ * ++ * Fill the &base_gpu_props structure with values from the GPU configuration ++ * registers. Only the raw properties are filled in this function ++ */ ++static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev) ++{ ++ struct kbase_gpuprops_regdump regdump; ++ int i; + -+ dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx); ++ KBASE_DEBUG_ASSERT(NULL != kbdev); ++ KBASE_DEBUG_ASSERT(NULL != gpu_props); + -+ /* Cancel any remaining running jobs for this kctx - if any. -+ * Submit is disallowed which takes effect immediately, so no -+ * more new jobs will appear after we do this. */ -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) -+ kbase_job_slot_hardstop(kctx, js, NULL); ++ /* Dump relevant registers */ ++ kbase_backend_gpuprops_get(kbdev, ®dump); + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); -+ mutex_unlock(&js_devdata->queue_mutex); -+ mutex_unlock(&kctx->jctx.lock); ++ gpu_props->raw_props.gpu_id = regdump.gpu_id; ++ gpu_props->raw_props.tiler_features = regdump.tiler_features; ++ gpu_props->raw_props.mem_features = regdump.mem_features; ++ gpu_props->raw_props.mmu_features = regdump.mmu_features; ++ gpu_props->raw_props.l2_features = regdump.l2_features; ++ gpu_props->raw_props.suspend_size = regdump.suspend_size; + -+ dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)", -+ kctx); ++ gpu_props->raw_props.as_present = regdump.as_present; ++ gpu_props->raw_props.js_present = regdump.js_present; ++ gpu_props->raw_props.shader_present = ++ ((u64) regdump.shader_present_hi << 32) + ++ regdump.shader_present_lo; ++ gpu_props->raw_props.tiler_present = ++ ((u64) regdump.tiler_present_hi << 32) + ++ regdump.tiler_present_lo; ++ gpu_props->raw_props.l2_present = ++ ((u64) regdump.l2_present_hi << 32) + ++ regdump.l2_present_lo; ++#ifdef CONFIG_MALI_CORESTACK ++ gpu_props->raw_props.stack_present = ++ ((u64) regdump.stack_present_hi << 32) + ++ regdump.stack_present_lo; ++#else /* CONFIG_MALI_CORESTACK */ ++ gpu_props->raw_props.stack_present = 0; ++#endif /* CONFIG_MALI_CORESTACK */ + -+ kbasep_js_runpool_release_ctx(kbdev, kctx); -+ } ++ for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) ++ gpu_props->raw_props.js_features[i] = regdump.js_features[i]; + -+ KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u); ++ for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) ++ gpu_props->raw_props.texture_features[i] = regdump.texture_features[i]; + -+ /* After this, you must wait on both the -+ * kbase_jd_context::zero_jobs_wait and the -+ * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs -+ * to be destroyed, and the context to be de-scheduled (if it was on the -+ * runpool). -+ * -+ * kbase_jd_zap_context() will do this. */ ++ gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size; ++ gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads; ++ gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size; ++ gpu_props->raw_props.thread_features = regdump.thread_features; +} + -+static inline int trace_get_refcnt(struct kbase_device *kbdev, -+ struct kbase_context *kctx) ++void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props) +{ -+ return atomic_read(&kctx->refcount); ++ gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4); ++ gpu_props->core_props.minor_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8); ++ gpu_props->core_props.major_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4); ++ gpu_props->core_props.product_id = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16); +} + +/** -+ * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context -+ * @kctx: Pointer to context. -+ * @callback: Pointer to function to call for each job. -+ * -+ * Call a function on all jobs belonging to a non-queued, non-running -+ * context, and detach the jobs from the context as it goes. -+ * -+ * Due to the locks that might be held at the time of the call, the callback -+ * may need to defer work on a workqueue to complete its actions (e.g. when -+ * cancelling jobs) -+ * -+ * Atoms will be removed from the queue, so this must only be called when -+ * cancelling jobs (which occurs as part of context destruction). ++ * kbase_gpuprops_calculate_props - Calculate the derived properties ++ * @gpu_props: The &base_gpu_props structure ++ * @kbdev: The &struct kbase_device structure for the device + * -+ * The locking conditions on the caller are as follows: -+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. ++ * Fill the &base_gpu_props structure with values derived from the GPU ++ * configuration registers + */ -+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, -+ kbasep_js_ctx_job_cb callback) ++static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev) +{ -+ struct kbase_device *kbdev; -+ struct kbasep_js_device_data *js_devdata; -+ unsigned long flags; -+ u32 js; ++ int i; + -+ kbdev = kctx->kbdev; -+ js_devdata = &kbdev->js_data; ++ /* Populate the base_gpu_props structure */ ++ kbase_gpuprops_update_core_props_gpu_id(gpu_props); ++ gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2; ++ gpu_props->core_props.gpu_available_memory_size = totalram_pages() << PAGE_SHIFT; + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) ++ gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i]; + -+ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL, -+ 0u, trace_get_refcnt(kbdev, kctx)); ++ gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8); ++ gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8); + -+ /* Invoke callback on jobs on each slot in turn */ -+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) -+ jsctx_queue_foreach(kctx, js, callback); ++ /* Field with number of l2 slices is added to MEM_FEATURES register ++ * since t76x. Below code assumes that for older GPU reserved bits will ++ * be read as zero. */ ++ gpu_props->l2_props.num_l2_slices = ++ KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1; + -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6); ++ gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4); ++ ++ if (gpu_props->raw_props.thread_max_threads == 0) ++ gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT; ++ else ++ gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads; ++ ++ if (gpu_props->raw_props.thread_max_workgroup_size == 0) ++ gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT; ++ else ++ gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size; ++ ++ if (gpu_props->raw_props.thread_max_barrier_size == 0) ++ gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT; ++ else ++ gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size; ++ ++ gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16); ++ gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8); ++ gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6); ++ gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2); ++ ++ /* If values are not specified, then use defaults */ ++ if (gpu_props->thread_props.max_registers == 0) { ++ gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT; ++ gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT; ++ gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT; ++ } ++ /* Initialize the coherent_group structure for each group */ ++ kbase_gpuprops_construct_coherent_groups(gpu_props); +} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.h b/drivers/gpu/arm/midgard/mali_kbase_js.h ++ ++void kbase_gpuprops_set(struct kbase_device *kbdev) ++{ ++ struct kbase_gpu_props *gpu_props; ++ struct gpu_raw_gpu_props *raw; ++ ++ KBASE_DEBUG_ASSERT(NULL != kbdev); ++ gpu_props = &kbdev->gpu_props; ++ raw = &gpu_props->props.raw_props; ++ ++ /* Initialize the base_gpu_props structure from the hardware */ ++ kbase_gpuprops_get_props(&gpu_props->props, kbdev); ++ ++ /* Populate the derived properties */ ++ kbase_gpuprops_calculate_props(&gpu_props->props, kbdev); ++ ++ /* Populate kbase-only fields */ ++ gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8); ++ gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8); ++ ++ gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1); ++ ++ gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8); ++ gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8); ++ ++ gpu_props->num_cores = hweight64(raw->shader_present); ++ gpu_props->num_core_groups = hweight64(raw->l2_present); ++ gpu_props->num_address_spaces = hweight32(raw->as_present); ++ gpu_props->num_job_slots = hweight32(raw->js_present); ++} ++ ++void kbase_gpuprops_set_features(struct kbase_device *kbdev) ++{ ++ base_gpu_props *gpu_props; ++ struct kbase_gpuprops_regdump regdump; ++ ++ gpu_props = &kbdev->gpu_props.props; ++ ++ /* Dump relevant registers */ ++ kbase_backend_gpuprops_get_features(kbdev, ®dump); ++ ++ /* ++ * Copy the raw value from the register, later this will get turned ++ * into the selected coherency mode. ++ * Additionally, add non-coherent mode, as this is always supported. ++ */ ++ gpu_props->raw_props.coherency_mode = regdump.coherency_features | ++ COHERENCY_FEATURE_BIT(COHERENCY_NONE); ++} ++ ++static struct { ++ u32 type; ++ size_t offset; ++ int size; ++} gpu_property_mapping[] = { ++#define PROP(name, member) \ ++ {KBASE_GPUPROP_ ## name, offsetof(struct mali_base_gpu_props, member), \ ++ sizeof(((struct mali_base_gpu_props *)0)->member)} ++ PROP(PRODUCT_ID, core_props.product_id), ++ PROP(VERSION_STATUS, core_props.version_status), ++ PROP(MINOR_REVISION, core_props.minor_revision), ++ PROP(MAJOR_REVISION, core_props.major_revision), ++ PROP(GPU_SPEED_MHZ, core_props.gpu_speed_mhz), ++ PROP(GPU_FREQ_KHZ_MAX, core_props.gpu_freq_khz_max), ++ PROP(GPU_FREQ_KHZ_MIN, core_props.gpu_freq_khz_min), ++ PROP(LOG2_PROGRAM_COUNTER_SIZE, core_props.log2_program_counter_size), ++ PROP(TEXTURE_FEATURES_0, core_props.texture_features[0]), ++ PROP(TEXTURE_FEATURES_1, core_props.texture_features[1]), ++ PROP(TEXTURE_FEATURES_2, core_props.texture_features[2]), ++ PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size), ++ ++ PROP(L2_LOG2_LINE_SIZE, l2_props.log2_line_size), ++ PROP(L2_LOG2_CACHE_SIZE, l2_props.log2_cache_size), ++ PROP(L2_NUM_L2_SLICES, l2_props.num_l2_slices), ++ ++ PROP(TILER_BIN_SIZE_BYTES, tiler_props.bin_size_bytes), ++ PROP(TILER_MAX_ACTIVE_LEVELS, tiler_props.max_active_levels), ++ ++ PROP(MAX_THREADS, thread_props.max_threads), ++ PROP(MAX_WORKGROUP_SIZE, thread_props.max_workgroup_size), ++ PROP(MAX_BARRIER_SIZE, thread_props.max_barrier_size), ++ PROP(MAX_REGISTERS, thread_props.max_registers), ++ PROP(MAX_TASK_QUEUE, thread_props.max_task_queue), ++ PROP(MAX_THREAD_GROUP_SPLIT, thread_props.max_thread_group_split), ++ PROP(IMPL_TECH, thread_props.impl_tech), ++ ++ PROP(RAW_SHADER_PRESENT, raw_props.shader_present), ++ PROP(RAW_TILER_PRESENT, raw_props.tiler_present), ++ PROP(RAW_L2_PRESENT, raw_props.l2_present), ++ PROP(RAW_STACK_PRESENT, raw_props.stack_present), ++ PROP(RAW_L2_FEATURES, raw_props.l2_features), ++ PROP(RAW_SUSPEND_SIZE, raw_props.suspend_size), ++ PROP(RAW_MEM_FEATURES, raw_props.mem_features), ++ PROP(RAW_MMU_FEATURES, raw_props.mmu_features), ++ PROP(RAW_AS_PRESENT, raw_props.as_present), ++ PROP(RAW_JS_PRESENT, raw_props.js_present), ++ PROP(RAW_JS_FEATURES_0, raw_props.js_features[0]), ++ PROP(RAW_JS_FEATURES_1, raw_props.js_features[1]), ++ PROP(RAW_JS_FEATURES_2, raw_props.js_features[2]), ++ PROP(RAW_JS_FEATURES_3, raw_props.js_features[3]), ++ PROP(RAW_JS_FEATURES_4, raw_props.js_features[4]), ++ PROP(RAW_JS_FEATURES_5, raw_props.js_features[5]), ++ PROP(RAW_JS_FEATURES_6, raw_props.js_features[6]), ++ PROP(RAW_JS_FEATURES_7, raw_props.js_features[7]), ++ PROP(RAW_JS_FEATURES_8, raw_props.js_features[8]), ++ PROP(RAW_JS_FEATURES_9, raw_props.js_features[9]), ++ PROP(RAW_JS_FEATURES_10, raw_props.js_features[10]), ++ PROP(RAW_JS_FEATURES_11, raw_props.js_features[11]), ++ PROP(RAW_JS_FEATURES_12, raw_props.js_features[12]), ++ PROP(RAW_JS_FEATURES_13, raw_props.js_features[13]), ++ PROP(RAW_JS_FEATURES_14, raw_props.js_features[14]), ++ PROP(RAW_JS_FEATURES_15, raw_props.js_features[15]), ++ PROP(RAW_TILER_FEATURES, raw_props.tiler_features), ++ PROP(RAW_TEXTURE_FEATURES_0, raw_props.texture_features[0]), ++ PROP(RAW_TEXTURE_FEATURES_1, raw_props.texture_features[1]), ++ PROP(RAW_TEXTURE_FEATURES_2, raw_props.texture_features[2]), ++ PROP(RAW_GPU_ID, raw_props.gpu_id), ++ PROP(RAW_THREAD_MAX_THREADS, raw_props.thread_max_threads), ++ PROP(RAW_THREAD_MAX_WORKGROUP_SIZE, ++ raw_props.thread_max_workgroup_size), ++ PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size), ++ PROP(RAW_THREAD_FEATURES, raw_props.thread_features), ++ PROP(RAW_COHERENCY_MODE, raw_props.coherency_mode), ++ ++ PROP(COHERENCY_NUM_GROUPS, coherency_info.num_groups), ++ PROP(COHERENCY_NUM_CORE_GROUPS, coherency_info.num_core_groups), ++ PROP(COHERENCY_COHERENCY, coherency_info.coherency), ++ PROP(COHERENCY_GROUP_0, coherency_info.group[0].core_mask), ++ PROP(COHERENCY_GROUP_1, coherency_info.group[1].core_mask), ++ PROP(COHERENCY_GROUP_2, coherency_info.group[2].core_mask), ++ PROP(COHERENCY_GROUP_3, coherency_info.group[3].core_mask), ++ PROP(COHERENCY_GROUP_4, coherency_info.group[4].core_mask), ++ PROP(COHERENCY_GROUP_5, coherency_info.group[5].core_mask), ++ PROP(COHERENCY_GROUP_6, coherency_info.group[6].core_mask), ++ PROP(COHERENCY_GROUP_7, coherency_info.group[7].core_mask), ++ PROP(COHERENCY_GROUP_8, coherency_info.group[8].core_mask), ++ PROP(COHERENCY_GROUP_9, coherency_info.group[9].core_mask), ++ PROP(COHERENCY_GROUP_10, coherency_info.group[10].core_mask), ++ PROP(COHERENCY_GROUP_11, coherency_info.group[11].core_mask), ++ PROP(COHERENCY_GROUP_12, coherency_info.group[12].core_mask), ++ PROP(COHERENCY_GROUP_13, coherency_info.group[13].core_mask), ++ PROP(COHERENCY_GROUP_14, coherency_info.group[14].core_mask), ++ PROP(COHERENCY_GROUP_15, coherency_info.group[15].core_mask), ++ ++#undef PROP ++}; ++ ++int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev) ++{ ++ struct kbase_gpu_props *kprops = &kbdev->gpu_props; ++ struct mali_base_gpu_props *props = &kprops->props; ++ u32 count = ARRAY_SIZE(gpu_property_mapping); ++ u32 i; ++ u32 size = 0; ++ u8 *p; ++ ++ for (i = 0; i < count; i++) { ++ /* 4 bytes for the ID, and the size of the property */ ++ size += 4 + gpu_property_mapping[i].size; ++ } ++ ++ kprops->prop_buffer_size = size; ++ kprops->prop_buffer = kmalloc(size, GFP_KERNEL); ++ ++ if (!kprops->prop_buffer) { ++ kprops->prop_buffer_size = 0; ++ return -ENOMEM; ++ } ++ ++ p = kprops->prop_buffer; ++ ++#define WRITE_U8(v) (*p++ = (v) & 0xFF) ++#define WRITE_U16(v) do { WRITE_U8(v); WRITE_U8((v) >> 8); } while (0) ++#define WRITE_U32(v) do { WRITE_U16(v); WRITE_U16((v) >> 16); } while (0) ++#define WRITE_U64(v) do { WRITE_U32(v); WRITE_U32((v) >> 32); } while (0) ++ ++ for (i = 0; i < count; i++) { ++ u32 type = gpu_property_mapping[i].type; ++ u8 type_size; ++ void *field = ((u8 *)props) + gpu_property_mapping[i].offset; ++ ++ switch (gpu_property_mapping[i].size) { ++ case 1: ++ type_size = KBASE_GPUPROP_VALUE_SIZE_U8; ++ break; ++ case 2: ++ type_size = KBASE_GPUPROP_VALUE_SIZE_U16; ++ break; ++ case 4: ++ type_size = KBASE_GPUPROP_VALUE_SIZE_U32; ++ break; ++ case 8: ++ type_size = KBASE_GPUPROP_VALUE_SIZE_U64; ++ break; ++ default: ++ dev_err(kbdev->dev, ++ "Invalid gpu_property_mapping type=%d size=%d", ++ type, gpu_property_mapping[i].size); ++ return -EINVAL; ++ } ++ ++ WRITE_U32((type<<2) | type_size); ++ ++ switch (type_size) { ++ case KBASE_GPUPROP_VALUE_SIZE_U8: ++ WRITE_U8(*((u8 *)field)); ++ break; ++ case KBASE_GPUPROP_VALUE_SIZE_U16: ++ WRITE_U16(*((u16 *)field)); ++ break; ++ case KBASE_GPUPROP_VALUE_SIZE_U32: ++ WRITE_U32(*((u32 *)field)); ++ break; ++ case KBASE_GPUPROP_VALUE_SIZE_U64: ++ WRITE_U64(*((u64 *)field)); ++ break; ++ default: /* Cannot be reached */ ++ WARN_ON(1); ++ return -EINVAL; ++ } ++ } ++ ++ return 0; ++} +diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h new file mode 100644 -index 000000000..ddada8e46 +index 000000000..57b3eaf9c --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_js.h -@@ -0,0 +1,925 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h +@@ -0,0 +1,84 @@ +/* + * -+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2015,2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -376755,920 +378218,984 @@ index 000000000..ddada8e46 + + +/** -+ * @file mali_kbase_js.h -+ * Job Scheduler APIs. ++ * @file mali_kbase_gpuprops.h ++ * Base kernel property query APIs + */ + -+#ifndef _KBASE_JS_H_ -+#define _KBASE_JS_H_ -+ -+#include "mali_kbase_js_defs.h" -+#include "mali_kbase_context.h" -+#include "mali_kbase_defs.h" -+#include "mali_kbase_debug.h" -+ -+#include "mali_kbase_js_ctx_attr.h" ++#ifndef _KBASE_GPUPROPS_H_ ++#define _KBASE_GPUPROPS_H_ + -+/** -+ * @addtogroup base_api -+ * @{ -+ */ ++#include "mali_kbase_gpuprops_types.h" + -+/** -+ * @addtogroup base_kbase_api -+ * @{ -+ */ ++/* Forward definition - see mali_kbase.h */ ++struct kbase_device; + +/** -+ * @addtogroup kbase_js Job Scheduler Internal APIs -+ * @{ ++ * @brief Set up Kbase GPU properties. + * -+ * These APIs are Internal to KBase. -+ */ -+ -+/** -+ * @brief Initialize the Job Scheduler ++ * Set up Kbase GPU properties with information from the GPU registers + * -+ * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero -+ * initialized before passing to the kbasep_js_devdata_init() function. This is -+ * to give efficient error path code. ++ * @param kbdev The struct kbase_device structure for the device + */ -+int kbasep_js_devdata_init(struct kbase_device * const kbdev); ++void kbase_gpuprops_set(struct kbase_device *kbdev); + +/** -+ * @brief Halt the Job Scheduler. -+ * -+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data -+ * sub-structure was never initialized/failed initialization, to give efficient -+ * error-path code. -+ * -+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must -+ * be zero initialized before passing to the kbasep_js_devdata_init() -+ * function. This is to give efficient error path code. -+ * -+ * It is a Programming Error to call this whilst there are still kbase_context -+ * structures registered with this scheduler. ++ * kbase_gpuprops_set_features - Set up Kbase GPU properties ++ * @kbdev: Device pointer + * ++ * This function sets up GPU properties that are dependent on the hardware ++ * features bitmask. This function must be preceeded by a call to ++ * kbase_hw_set_features_mask(). + */ -+void kbasep_js_devdata_halt(struct kbase_device *kbdev); ++void kbase_gpuprops_set_features(struct kbase_device *kbdev); + +/** -+ * @brief Terminate the Job Scheduler ++ * @brief Provide GPU properties to userside through UKU call. + * -+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data -+ * sub-structure was never initialized/failed initialization, to give efficient -+ * error-path code. ++ * Fill the struct kbase_uk_gpuprops with values from GPU configuration registers. + * -+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must -+ * be zero initialized before passing to the kbasep_js_devdata_init() -+ * function. This is to give efficient error path code. ++ * @param kctx The struct kbase_context structure ++ * @param kbase_props A copy of the struct kbase_uk_gpuprops structure from userspace + * -+ * It is a Programming Error to call this whilst there are still kbase_context -+ * structures registered with this scheduler. ++ * @return 0 on success. Any other value indicates failure. + */ -+void kbasep_js_devdata_term(struct kbase_device *kbdev); ++int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props); + +/** -+ * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler. -+ * -+ * This effectively registers a struct kbase_context with a Job Scheduler. -+ * -+ * It does not register any jobs owned by the struct kbase_context with the scheduler. -+ * Those must be separately registered by kbasep_js_add_job(). ++ * kbase_gpuprops_populate_user_buffer - Populate the GPU properties buffer ++ * @kbdev: The kbase device + * -+ * The struct kbase_context must be zero intitialized before passing to the -+ * kbase_js_init() function. This is to give efficient error path code. ++ * Fills kbdev->gpu_props->prop_buffer with the GPU properties for user ++ * space to read. + */ -+int kbasep_js_kctx_init(struct kbase_context * const kctx); ++int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev); + +/** -+ * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler -+ * -+ * This effectively de-registers a struct kbase_context from its Job Scheduler -+ * -+ * It is safe to call this on a struct kbase_context that has never had or failed -+ * initialization of its jctx.sched_info member, to give efficient error-path -+ * code. -+ * -+ * For this to work, the struct kbase_context must be zero intitialized before passing -+ * to the kbase_js_init() function. ++ * kbase_gpuprops_update_core_props_gpu_id - break down gpu id value ++ * @gpu_props: the &base_gpu_props structure + * -+ * It is a Programming Error to call this whilst there are still jobs -+ * registered with this context. ++ * Break down gpu_id value stored in base_gpu_props::raw_props.gpu_id into ++ * separate fields (version_status, minor_revision, major_revision, product_id) ++ * stored in base_gpu_props::core_props. + */ -+void kbasep_js_kctx_term(struct kbase_context *kctx); ++void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props); + -+/** -+ * @brief Add a job chain to the Job Scheduler, and take necessary actions to -+ * schedule the context/run the job. -+ * -+ * This atomically does the following: -+ * - Update the numbers of jobs information -+ * - Add the job to the run pool if necessary (part of init_job) -+ * -+ * Once this is done, then an appropriate action is taken: -+ * - If the ctx is scheduled, it attempts to start the next job (which might be -+ * this added job) -+ * - Otherwise, and if this is the first job on the context, it enqueues it on -+ * the Policy Queue -+ * -+ * The Policy's Queue can be updated by this in the following ways: -+ * - In the above case that this is the first job on the context -+ * - If the context is high priority and the context is not scheduled, then it -+ * could cause the Policy to schedule out a low-priority context, allowing -+ * this context to be scheduled in. ++ ++#endif /* _KBASE_GPUPROPS_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h +new file mode 100644 +index 000000000..10794fc27 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h +@@ -0,0 +1,92 @@ ++/* + * -+ * If the context is already scheduled on the RunPool, then adding a job to it -+ * is guarenteed not to update the Policy Queue. And so, the caller is -+ * guarenteed to not need to try scheduling a context from the Run Pool - it -+ * can safely assert that the result is false. ++ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * -+ * It is a programming error to have more than U32_MAX jobs in flight at a time. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. + * -+ * The following locking conditions are made on the caller: -+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. -+ * - it must \em not hold hwaccess_lock (as this will be obtained internally) -+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be -+ * obtained internally) -+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally). ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * @return true indicates that the Policy Queue was updated, and so the -+ * caller will need to try scheduling a context onto the Run Pool. -+ * @return false indicates that no updates were made to the Policy Queue, -+ * so no further action is required from the caller. This is \b always returned -+ * when the context is currently scheduled. + */ -+bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom); ++ ++ ++ ++ + +/** -+ * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'. -+ * -+ * Completely removing a job requires several calls: -+ * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of -+ * the atom -+ * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler -+ * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the -+ * remaining state held as part of the job having been run. -+ * -+ * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions. -+ * -+ * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions. -+ * -+ * It is a programming error to call this when: -+ * - \a atom is not a job belonging to kctx. -+ * - \a atom has already been removed from the Job Scheduler. -+ * - \a atom is still in the runpool -+ * -+ * Do not use this for removing jobs being killed by kbase_jd_cancel() - use -+ * kbasep_js_remove_cancelled_job() instead. -+ * -+ * The following locking conditions are made on the caller: -+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. -+ * ++ * @file mali_kbase_gpuprops_types.h ++ * Base kernel property query APIs + */ -+void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom); + -+/** -+ * @brief Completely remove a job chain from the Job Scheduler, in the case -+ * where the job chain was cancelled. ++#ifndef _KBASE_GPUPROPS_TYPES_H_ ++#define _KBASE_GPUPROPS_TYPES_H_ ++ ++#include "mali_base_kernel.h" ++ ++#define KBASE_GPU_SPEED_MHZ 123 ++#define KBASE_GPU_PC_SIZE_LOG2 24U ++ ++struct kbase_gpuprops_regdump { ++ u32 gpu_id; ++ u32 l2_features; ++ u32 suspend_size; /* API 8.2+ */ ++ u32 tiler_features; ++ u32 mem_features; ++ u32 mmu_features; ++ u32 as_present; ++ u32 js_present; ++ u32 thread_max_threads; ++ u32 thread_max_workgroup_size; ++ u32 thread_max_barrier_size; ++ u32 thread_features; ++ u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; ++ u32 js_features[GPU_MAX_JOB_SLOTS]; ++ u32 shader_present_lo; ++ u32 shader_present_hi; ++ u32 tiler_present_lo; ++ u32 tiler_present_hi; ++ u32 l2_present_lo; ++ u32 l2_present_hi; ++ u32 stack_present_lo; ++ u32 stack_present_hi; ++ u32 coherency_features; ++}; ++ ++struct kbase_gpu_cache_props { ++ u8 associativity; ++ u8 external_bus_width; ++}; ++ ++struct kbase_gpu_mem_props { ++ u8 core_group; ++}; ++ ++struct kbase_gpu_mmu_props { ++ u8 va_bits; ++ u8 pa_bits; ++}; ++ ++struct kbase_gpu_props { ++ /* kernel-only properties */ ++ u8 num_cores; ++ u8 num_core_groups; ++ u8 num_address_spaces; ++ u8 num_job_slots; ++ ++ struct kbase_gpu_cache_props l2_props; ++ ++ struct kbase_gpu_mem_props mem; ++ struct kbase_gpu_mmu_props mmu; ++ ++ /* Properties shared with userspace */ ++ base_gpu_props props; ++ ++ u32 prop_buffer_size; ++ void *prop_buffer; ++}; ++ ++#endif /* _KBASE_GPUPROPS_TYPES_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.c b/drivers/gpu/arm/midgard/mali_kbase_hw.c +new file mode 100644 +index 000000000..9a390d233 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_hw.c +@@ -0,0 +1,453 @@ ++/* + * -+ * This is a variant of kbasep_js_remove_job() that takes care of removing all -+ * of the retained state too. This is generally useful for cancelled atoms, -+ * which need not be handled in an optimal way. ++ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. + * -+ * It is a programming error to call this when: -+ * - \a atom is not a job belonging to kctx. -+ * - \a atom has already been removed from the Job Scheduler. -+ * - \a atom is still in the runpool: -+ * - it is not being killed with kbasep_jd_cancel() ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. + * -+ * The following locking conditions are made on the caller: -+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. -+ * - it must \em not hold the hwaccess_lock, (as this will be obtained -+ * internally) -+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be -+ * obtained internally) ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * @return true indicates that ctx attributes have changed and the caller -+ * should call kbase_js_sched_all() to try to run more jobs -+ * @return false otherwise + */ -+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, -+ struct kbase_context *kctx, -+ struct kbase_jd_atom *katom); + -+/** -+ * @brief Refcount a context as being busy, preventing it from being scheduled -+ * out. -+ * -+ * @note This function can safely be called from IRQ context. -+ * -+ * The following locking conditions are made on the caller: -+ * - it must \em not hold mmu_hw_mutex and hwaccess_lock, because they will be -+ * used internally. -+ * -+ * @return value != false if the retain succeeded, and the context will not be scheduled out. -+ * @return false if the retain failed (because the context is being/has been scheduled out). -+ */ -+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + -+/** -+ * @brief Refcount a context as being busy, preventing it from being scheduled -+ * out. -+ * -+ * @note This function can safely be called from IRQ context. -+ * -+ * The following locks must be held by the caller: -+ * - mmu_hw_mutex, hwaccess_lock -+ * -+ * @return value != false if the retain succeeded, and the context will not be scheduled out. -+ * @return false if the retain failed (because the context is being/has been scheduled out). -+ */ -+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx); + -+/** -+ * @brief Lookup a context in the Run Pool based upon its current address space -+ * and ensure that is stays scheduled in. -+ * -+ * The context is refcounted as being busy to prevent it from scheduling -+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no -+ * longer required to stay scheduled in. -+ * -+ * @note This function can safely be called from IRQ context. -+ * -+ * The following locking conditions are made on the caller: -+ * - it must \em not hold the hwaccess_lock, because it will be used internally. -+ * If the hwaccess_lock is already held, then the caller should use -+ * kbasep_js_runpool_lookup_ctx_nolock() instead. -+ * -+ * @return a valid struct kbase_context on success, which has been refcounted as being busy. -+ * @return NULL on failure, indicating that no context was found in \a as_nr ++ ++ ++/* ++ * Run-time work-arounds helpers + */ -+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr); ++ ++#include ++#include ++#include ++#include "mali_kbase.h" ++#include "mali_kbase_hw.h" ++ ++void kbase_hw_set_features_mask(struct kbase_device *kbdev) ++{ ++ const enum base_hw_feature *features; ++ u32 gpu_id; ++ u32 product_id; ++ ++ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; ++ product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; ++ product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++ ++ if (GPU_ID_IS_NEW_FORMAT(product_id)) { ++ switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { ++ case GPU_ID2_PRODUCT_TMIX: ++ features = base_hw_features_tMIx; ++ break; ++ case GPU_ID2_PRODUCT_THEX: ++ features = base_hw_features_tHEx; ++ break; ++ case GPU_ID2_PRODUCT_TSIX: ++ features = base_hw_features_tSIx; ++ break; ++#ifdef MALI_INCLUDE_TKAX ++ case GPU_ID2_PRODUCT_TKAX: ++ features = base_hw_features_tKAx; ++ break; ++#endif /* MALI_INCLUDE_TKAX */ ++#ifdef MALI_INCLUDE_TTRX ++ case GPU_ID2_PRODUCT_TTRX: ++ features = base_hw_features_tTRx; ++ break; ++#endif /* MALI_INCLUDE_TTRX */ ++ default: ++ features = base_hw_features_generic; ++ break; ++ } ++ } else { ++ switch (product_id) { ++ case GPU_ID_PI_TFRX: ++ /* FALLTHROUGH */ ++ case GPU_ID_PI_T86X: ++ features = base_hw_features_tFxx; ++ break; ++ case GPU_ID_PI_T83X: ++ features = base_hw_features_t83x; ++ break; ++ case GPU_ID_PI_T82X: ++ features = base_hw_features_t82x; ++ break; ++ case GPU_ID_PI_T76X: ++ features = base_hw_features_t76x; ++ break; ++ case GPU_ID_PI_T72X: ++ features = base_hw_features_t72x; ++ break; ++ case GPU_ID_PI_T62X: ++ features = base_hw_features_t62x; ++ break; ++ case GPU_ID_PI_T60X: ++ features = base_hw_features_t60x; ++ break; ++ default: ++ features = base_hw_features_generic; ++ break; ++ } ++ } ++ ++ for (; *features != BASE_HW_FEATURE_END; features++) ++ set_bit(*features, &kbdev->hw_features_mask[0]); ++} + +/** -+ * @brief Handling the requeuing/killing of a context that was evicted from the -+ * policy queue or runpool. ++ * kbase_hw_get_issues_for_new_id - Get the hardware issues for a new GPU ID ++ * @kbdev: Device pointer + * -+ * This should be used whenever handing off a context that has been evicted -+ * from the policy queue or the runpool: -+ * - If the context is not dying and has jobs, it gets re-added to the policy -+ * queue -+ * - Otherwise, it is not added ++ * Return: pointer to an array of hardware issues, terminated by ++ * BASE_HW_ISSUE_END. + * -+ * In addition, if the context is dying the jobs are killed asynchronously. ++ * This function can only be used on new-format GPU IDs, i.e. those for which ++ * GPU_ID_IS_NEW_FORMAT evaluates as true. The GPU ID is read from the @kbdev. + * -+ * In all cases, the Power Manager active reference is released -+ * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true. \a -+ * has_pm_ref must be set to false whenever the context was not previously in -+ * the runpool and does not hold a Power Manager active refcount. Note that -+ * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an -+ * active refcount even though they weren't in the runpool. ++ * In debugging versions of the driver, unknown versions of a known GPU will ++ * be treated as the most recent known version not later than the actual ++ * version. In such circumstances, the GPU ID in @kbdev will also be replaced ++ * with the most recent known version. + * -+ * The following locking conditions are made on the caller: -+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. -+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be -+ * obtained internally) ++ * Note: The GPU configuration must have been read by kbase_gpuprops_get_props() ++ * before calling this function. + */ -+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref); ++static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( ++ struct kbase_device *kbdev) ++{ ++ const enum base_hw_issue *issues = NULL; + -+/** -+ * @brief Release a refcount of a context being busy, allowing it to be -+ * scheduled out. -+ * -+ * When the refcount reaches zero and the context \em might be scheduled out -+ * (depending on whether the Scheudling Policy has deemed it so, or if it has run -+ * out of jobs). -+ * -+ * If the context does get scheduled out, then The following actions will be -+ * taken as part of deschduling a context: -+ * - For the context being descheduled: -+ * - If the context is in the processing of dying (all the jobs are being -+ * removed from it), then descheduling also kills off any jobs remaining in the -+ * context. -+ * - If the context is not dying, and any jobs remain after descheduling the -+ * context then it is re-enqueued to the Policy's Queue. -+ * - Otherwise, the context is still known to the scheduler, but remains absent -+ * from the Policy Queue until a job is next added to it. -+ * - In all descheduling cases, the Power Manager active reference (obtained -+ * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()). -+ * -+ * Whilst the context is being descheduled, this also handles actions that -+ * cause more atoms to be run: -+ * - Attempt submitting atoms when the Context Attributes on the Runpool have -+ * changed. This is because the context being scheduled out could mean that -+ * there are more opportunities to run atoms. -+ * - Attempt submitting to a slot that was previously blocked due to affinity -+ * restrictions. This is usually only necessary when releasing a context -+ * happens as part of completing a previous job, but is harmless nonetheless. -+ * - Attempt scheduling in a new context (if one is available), and if necessary, -+ * running a job from that new context. ++ struct base_hw_product { ++ u32 product_model; ++ struct { ++ u32 version; ++ const enum base_hw_issue *issues; ++ } map[7]; ++ }; ++ ++ static const struct base_hw_product base_hw_products[] = { ++ {GPU_ID2_PRODUCT_TMIX, ++ {{GPU_ID2_VERSION_MAKE(0, 0, 1), ++ base_hw_issues_tMIx_r0p0_05dev0}, ++ {GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0}, ++ {U32_MAX /* sentinel value */, NULL} } }, ++ ++ {GPU_ID2_PRODUCT_THEX, ++ {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0}, ++ {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0}, ++ {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1}, ++ {U32_MAX, NULL} } }, ++ ++ {GPU_ID2_PRODUCT_TSIX, ++ {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0}, ++ {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0}, ++ {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1}, ++ {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tSIx_r0p1}, ++ {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0}, ++ {GPU_ID2_VERSION_MAKE(1, 0, 1), base_hw_issues_tSIx_r1p0}, ++ {U32_MAX, NULL} } }, ++ ++ ++#ifdef MALI_INCLUDE_TKAX ++ {GPU_ID2_PRODUCT_TKAX, ++ {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tKAx_r0p0}, ++ {U32_MAX, NULL} } }, ++#endif /* MALI_INCLUDE_TKAX */ ++ ++#ifdef MALI_INCLUDE_TTRX ++ {GPU_ID2_PRODUCT_TTRX, ++ {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0}, ++ {U32_MAX, NULL} } }, ++#endif /* MALI_INCLUDE_TTRX */ ++ }; ++ ++ u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; ++ const u32 product_model = gpu_id & GPU_ID2_PRODUCT_MODEL; ++ const struct base_hw_product *product = NULL; ++ size_t p; ++ ++ /* Stop when we reach the end of the products array. */ ++ for (p = 0; p < ARRAY_SIZE(base_hw_products); ++p) { ++ if (product_model == base_hw_products[p].product_model) { ++ product = &base_hw_products[p]; ++ break; ++ } ++ } ++ ++ if (product != NULL) { ++ /* Found a matching product. */ ++ const u32 version = gpu_id & GPU_ID2_VERSION; ++ u32 fallback_version = 0; ++ const enum base_hw_issue *fallback_issues = NULL; ++ size_t v; ++ ++ /* Stop when we reach the end of the map. */ ++ for (v = 0; product->map[v].version != U32_MAX; ++v) { ++ ++ if (version == product->map[v].version) { ++ /* Exact match so stop. */ ++ issues = product->map[v].issues; ++ break; ++ } ++ ++ /* Check whether this is a candidate for most recent ++ known version not later than the actual ++ version. */ ++ if ((version > product->map[v].version) && ++ (product->map[v].version >= fallback_version)) { ++ fallback_version = product->map[v].version; ++ fallback_issues = product->map[v].issues; ++ } ++ } ++ ++ if ((issues == NULL) && (fallback_issues != NULL)) { ++ /* Fall back to the issue set of the most recent known ++ version not later than the actual version. */ ++ issues = fallback_issues; ++ ++ dev_info(kbdev->dev, ++ "r%dp%d status %d is unknown; treating as r%dp%d status %d", ++ (gpu_id & GPU_ID2_VERSION_MAJOR) >> ++ GPU_ID2_VERSION_MAJOR_SHIFT, ++ (gpu_id & GPU_ID2_VERSION_MINOR) >> ++ GPU_ID2_VERSION_MINOR_SHIFT, ++ (gpu_id & GPU_ID2_VERSION_STATUS) >> ++ GPU_ID2_VERSION_STATUS_SHIFT, ++ (fallback_version & GPU_ID2_VERSION_MAJOR) >> ++ GPU_ID2_VERSION_MAJOR_SHIFT, ++ (fallback_version & GPU_ID2_VERSION_MINOR) >> ++ GPU_ID2_VERSION_MINOR_SHIFT, ++ (fallback_version & GPU_ID2_VERSION_STATUS) >> ++ GPU_ID2_VERSION_STATUS_SHIFT); ++ ++ gpu_id &= ~GPU_ID2_VERSION; ++ gpu_id |= fallback_version; ++ kbdev->gpu_props.props.raw_props.gpu_id = gpu_id; ++ ++ kbase_gpuprops_update_core_props_gpu_id(&kbdev->gpu_props.props); ++ } ++ } ++ return issues; ++} ++ ++int kbase_hw_set_issues_mask(struct kbase_device *kbdev) ++{ ++ const enum base_hw_issue *issues; ++ u32 gpu_id; ++ u32 product_id; ++ u32 impl_tech; ++ ++ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; ++ product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; ++ product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++ impl_tech = kbdev->gpu_props.props.thread_props.impl_tech; ++ ++ if (impl_tech != IMPLEMENTATION_MODEL) { ++ if (GPU_ID_IS_NEW_FORMAT(product_id)) { ++ issues = kbase_hw_get_issues_for_new_id(kbdev); ++ if (issues == NULL) { ++ dev_err(kbdev->dev, ++ "Unknown GPU ID %x", gpu_id); ++ return -EINVAL; ++ } ++ ++ /* The GPU ID might have been replaced with the last ++ known version of the same GPU. */ ++ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; ++ ++ } else { ++ switch (gpu_id) { ++ case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0): ++ issues = base_hw_issues_t60x_r0p0_15dev0; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC): ++ issues = base_hw_issues_t60x_r0p0_eac; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0): ++ issues = base_hw_issues_t60x_r0p1; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0): ++ issues = base_hw_issues_t62x_r0p1; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0): ++ case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1): ++ issues = base_hw_issues_t62x_r1p0; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0): ++ issues = base_hw_issues_t62x_r1p1; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1): ++ issues = base_hw_issues_t76x_r0p0; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1): ++ issues = base_hw_issues_t76x_r0p1; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9): ++ issues = base_hw_issues_t76x_r0p1_50rel0; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1): ++ issues = base_hw_issues_t76x_r0p2; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1): ++ issues = base_hw_issues_t76x_r0p3; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0): ++ issues = base_hw_issues_t76x_r1p0; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0): ++ case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1): ++ case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2): ++ issues = base_hw_issues_t72x_r0p0; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0): ++ issues = base_hw_issues_t72x_r1p0; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0): ++ issues = base_hw_issues_t72x_r1p1; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2): ++ issues = base_hw_issues_tFRx_r0p1; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0): ++ issues = base_hw_issues_tFRx_r0p2; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0): ++ case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8): ++ issues = base_hw_issues_tFRx_r1p0; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0): ++ issues = base_hw_issues_tFRx_r2p0; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0): ++ issues = base_hw_issues_t86x_r0p2; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0): ++ case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8): ++ issues = base_hw_issues_t86x_r1p0; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0): ++ issues = base_hw_issues_t86x_r2p0; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0): ++ issues = base_hw_issues_t83x_r0p1; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0): ++ case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8): ++ issues = base_hw_issues_t83x_r1p0; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0): ++ issues = base_hw_issues_t82x_r0p0; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0): ++ issues = base_hw_issues_t82x_r0p1; ++ break; ++ case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0): ++ case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8): ++ issues = base_hw_issues_t82x_r1p0; ++ break; ++ default: ++ dev_err(kbdev->dev, ++ "Unknown GPU ID %x", gpu_id); ++ return -EINVAL; ++ } ++ } ++ } else { ++ /* Software model */ ++ if (GPU_ID_IS_NEW_FORMAT(product_id)) { ++ switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { ++ case GPU_ID2_PRODUCT_TMIX: ++ issues = base_hw_issues_model_tMIx; ++ break; ++ case GPU_ID2_PRODUCT_THEX: ++ issues = base_hw_issues_model_tHEx; ++ break; ++ case GPU_ID2_PRODUCT_TSIX: ++ issues = base_hw_issues_model_tSIx; ++ break; ++#ifdef MALI_INCLUDE_TKAX ++ case GPU_ID2_PRODUCT_TKAX: ++ issues = base_hw_issues_model_tKAx; ++ break; ++#endif /* MALI_INCLUDE_TKAX */ ++#ifdef MALI_INCLUDE_TTRX ++ case GPU_ID2_PRODUCT_TTRX: ++ issues = base_hw_issues_model_tTRx; ++ break; ++#endif /* MALI_INCLUDE_TTRX */ ++ default: ++ dev_err(kbdev->dev, ++ "Unknown GPU ID %x", gpu_id); ++ return -EINVAL; ++ } ++ } else { ++ switch (product_id) { ++ case GPU_ID_PI_T60X: ++ issues = base_hw_issues_model_t60x; ++ break; ++ case GPU_ID_PI_T62X: ++ issues = base_hw_issues_model_t62x; ++ break; ++ case GPU_ID_PI_T72X: ++ issues = base_hw_issues_model_t72x; ++ break; ++ case GPU_ID_PI_T76X: ++ issues = base_hw_issues_model_t76x; ++ break; ++ case GPU_ID_PI_TFRX: ++ issues = base_hw_issues_model_tFRx; ++ break; ++ case GPU_ID_PI_T86X: ++ issues = base_hw_issues_model_t86x; ++ break; ++ case GPU_ID_PI_T83X: ++ issues = base_hw_issues_model_t83x; ++ break; ++ case GPU_ID_PI_T82X: ++ issues = base_hw_issues_model_t82x; ++ break; ++ default: ++ dev_err(kbdev->dev, "Unknown GPU ID %x", ++ gpu_id); ++ return -EINVAL; ++ } ++ } ++ } ++ ++ if (GPU_ID_IS_NEW_FORMAT(product_id)) { ++ dev_info(kbdev->dev, ++ "GPU identified as 0x%x arch %d.%d.%d r%dp%d status %d", ++ (gpu_id & GPU_ID2_PRODUCT_MAJOR) >> ++ GPU_ID2_PRODUCT_MAJOR_SHIFT, ++ (gpu_id & GPU_ID2_ARCH_MAJOR) >> ++ GPU_ID2_ARCH_MAJOR_SHIFT, ++ (gpu_id & GPU_ID2_ARCH_MINOR) >> ++ GPU_ID2_ARCH_MINOR_SHIFT, ++ (gpu_id & GPU_ID2_ARCH_REV) >> ++ GPU_ID2_ARCH_REV_SHIFT, ++ (gpu_id & GPU_ID2_VERSION_MAJOR) >> ++ GPU_ID2_VERSION_MAJOR_SHIFT, ++ (gpu_id & GPU_ID2_VERSION_MINOR) >> ++ GPU_ID2_VERSION_MINOR_SHIFT, ++ (gpu_id & GPU_ID2_VERSION_STATUS) >> ++ GPU_ID2_VERSION_STATUS_SHIFT); ++ } else { ++ dev_info(kbdev->dev, ++ "GPU identified as 0x%04x r%dp%d status %d", ++ (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> ++ GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ (gpu_id & GPU_ID_VERSION_MAJOR) >> ++ GPU_ID_VERSION_MAJOR_SHIFT, ++ (gpu_id & GPU_ID_VERSION_MINOR) >> ++ GPU_ID_VERSION_MINOR_SHIFT, ++ (gpu_id & GPU_ID_VERSION_STATUS) >> ++ GPU_ID_VERSION_STATUS_SHIFT); ++ } ++ ++ for (; *issues != BASE_HW_ISSUE_END; issues++) ++ set_bit(*issues, &kbdev->hw_issues_mask[0]); ++ ++ return 0; ++} +diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.h b/drivers/gpu/arm/midgard/mali_kbase_hw.h +new file mode 100644 +index 000000000..754250ce9 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_hw.h +@@ -0,0 +1,65 @@ ++/* + * -+ * Unlike retaining a context in the runpool, this function \b cannot be called -+ * from IRQ context. ++ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. + * -+ * It is a programming error to call this on a \a kctx that is not currently -+ * scheduled, or that already has a zero refcount. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. + * -+ * The following locking conditions are made on the caller: -+ * - it must \em not hold the hwaccess_lock, because it will be used internally. -+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. -+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be -+ * obtained internally) -+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be -+ * obtained internally) -+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be -+ * obtained internally) ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * + */ -+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + -+/** -+ * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional -+ * actions from completing an atom. -+ * -+ * This is usually called as part of completing an atom and releasing the -+ * refcount on the context held by the atom. -+ * -+ * Therefore, the extra actions carried out are part of handling actions queued -+ * on a completed atom, namely: -+ * - Releasing the atom's context attributes -+ * - Retrying the submission on a particular slot, because we couldn't submit -+ * on that slot from an IRQ handler. -+ * -+ * The locking conditions of this function are the same as those for -+ * kbasep_js_runpool_release_ctx() -+ */ -+void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); + -+/** -+ * @brief Variant of kbase_js_runpool_release_ctx() that assumes that -+ * kbasep_js_device_data::runpool_mutex and -+ * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not -+ * attempt to schedule new contexts. -+ */ -+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, -+ struct kbase_context *kctx); ++ ++ + +/** -+ * @brief Schedule in a privileged context -+ * -+ * This schedules a context in regardless of the context priority. -+ * If the runpool is full, a context will be forced out of the runpool and the function will wait -+ * for the new context to be scheduled in. -+ * The context will be kept scheduled in (and the corresponding address space reserved) until -+ * kbasep_js_release_privileged_ctx is called). -+ * -+ * The following locking conditions are made on the caller: -+ * - it must \em not hold the hwaccess_lock, because it will be used internally. -+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be -+ * obtained internally) -+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be -+ * obtained internally) -+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally). -+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will -+ * be used internally. -+ * ++ * @file ++ * Run-time work-arounds helpers + */ -+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); ++ ++#ifndef _KBASE_HW_H_ ++#define _KBASE_HW_H_ ++ ++#include "mali_kbase_defs.h" + +/** -+ * @brief Release a privileged context, allowing it to be scheduled out. -+ * -+ * See kbasep_js_runpool_release_ctx for potential side effects. -+ * -+ * The following locking conditions are made on the caller: -+ * - it must \em not hold the hwaccess_lock, because it will be used internally. -+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. -+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be -+ * obtained internally) -+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be -+ * obtained internally) -+ * ++ * @brief Tell whether a work-around should be enabled + */ -+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); ++#define kbase_hw_has_issue(kbdev, issue)\ ++ test_bit(issue, &(kbdev)->hw_issues_mask[0]) + +/** -+ * @brief Try to submit the next job on each slot -+ * -+ * The following locks may be used: -+ * - kbasep_js_device_data::runpool_mutex -+ * - hwaccess_lock ++ * @brief Tell whether a feature is supported + */ -+void kbase_js_try_run_jobs(struct kbase_device *kbdev); ++#define kbase_hw_has_feature(kbdev, feature)\ ++ test_bit(feature, &(kbdev)->hw_features_mask[0]) + +/** -+ * @brief Suspend the job scheduler during a Power Management Suspend event. -+ * -+ * Causes all contexts to be removed from the runpool, and prevents any -+ * contexts from (re)entering the runpool. ++ * kbase_hw_set_issues_mask - Set the hardware issues mask based on the GPU ID ++ * @kbdev: Device pointer + * -+ * This does not handle suspending the one privileged context: the caller must -+ * instead do this by by suspending the GPU HW Counter Instrumentation. ++ * Return: 0 if the GPU ID was recognized, otherwise -EINVAL. + * -+ * This will eventually cause all Power Management active references held by -+ * contexts on the runpool to be released, without running any more atoms. ++ * The GPU ID is read from the @kbdev. + * -+ * The caller must then wait for all Power Mangement active refcount to become -+ * zero before completing the suspend. ++ * In debugging versions of the driver, unknown versions of a known GPU with a ++ * new-format ID will be treated as the most recent known version not later ++ * than the actual version. In such circumstances, the GPU ID in @kbdev will ++ * also be replaced with the most recent known version. + * -+ * The emptying mechanism may take some time to complete, since it can wait for -+ * jobs to complete naturally instead of forcing them to end quickly. However, -+ * this is bounded by the Job Scheduler's Job Timeouts. Hence, this -+ * function is guaranteed to complete in a finite time. ++ * Note: The GPU configuration must have been read by ++ * kbase_gpuprops_get_props() before calling this function. + */ -+void kbasep_js_suspend(struct kbase_device *kbdev); ++int kbase_hw_set_issues_mask(struct kbase_device *kbdev); + +/** -+ * @brief Resume the Job Scheduler after a Power Management Resume event. -+ * -+ * This restores the actions from kbasep_js_suspend(): -+ * - Schedules contexts back into the runpool -+ * - Resumes running atoms on the GPU ++ * @brief Set the features mask depending on the GPU ID + */ -+void kbasep_js_resume(struct kbase_device *kbdev); ++void kbase_hw_set_features_mask(struct kbase_device *kbdev); + -+/** -+ * @brief Submit an atom to the job scheduler. ++#endif /* _KBASE_HW_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h +new file mode 100644 +index 000000000..b09be99e6 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h +@@ -0,0 +1,54 @@ ++/* + * -+ * The atom is enqueued on the context's ringbuffer. The caller must have -+ * ensured that all dependencies can be represented in the ringbuffer. ++ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * -+ * Caller must hold jctx->lock ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. + * -+ * @param[in] kctx Context pointer -+ * @param[in] atom Pointer to the atom to submit ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * @return Whether the context requires to be enqueued. */ -+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, -+ struct kbase_jd_atom *katom); ++ */ + -+/** -+ * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer. -+ * @kctx: Context Pointer -+ * @prio: Priority (specifies the queue together with js). -+ * @js: Job slot (specifies the queue together with prio). -+ * -+ * Pushes all possible atoms from the linked list to the ringbuffer. -+ * Number of atoms are limited to free space in the ringbuffer and -+ * number of available atoms in the linked list. -+ * -+ */ -+void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js); -+/** -+ * @brief Pull an atom from a context in the job scheduler for execution. -+ * -+ * The atom will not be removed from the ringbuffer at this stage. -+ * -+ * The HW access lock must be held when calling this function. -+ * -+ * @param[in] kctx Context to pull from -+ * @param[in] js Job slot to pull from -+ * @return Pointer to an atom, or NULL if there are no atoms for this -+ * slot that can be currently run. ++ ++ ++ ++/* ++ * HW access backend common APIs + */ -+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js); ++ ++#ifndef _KBASE_HWACCESS_BACKEND_H_ ++#define _KBASE_HWACCESS_BACKEND_H_ + +/** -+ * @brief Return an atom to the job scheduler ringbuffer. -+ * -+ * An atom is 'unpulled' if execution is stopped but intended to be returned to -+ * later. The most common reason for this is that the atom has been -+ * soft-stopped. -+ * -+ * Note that if multiple atoms are to be 'unpulled', they must be returned in -+ * the reverse order to which they were originally pulled. It is a programming -+ * error to return atoms in any other order. -+ * -+ * The HW access lock must be held when calling this function. ++ * kbase_backend_early_init - Perform any backend-specific initialization. ++ * @kbdev: Device pointer + * -+ * @param[in] kctx Context pointer -+ * @param[in] atom Pointer to the atom to unpull ++ * Return: 0 on success, or an error code on failure. + */ -+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom); ++int kbase_backend_early_init(struct kbase_device *kbdev); + +/** -+ * @brief Complete an atom from jd_done_worker(), removing it from the job -+ * scheduler ringbuffer. -+ * -+ * If the atom failed then all dependee atoms marked for failure propagation -+ * will also fail. ++ * kbase_backend_late_init - Perform any backend-specific initialization. ++ * @kbdev: Device pointer + * -+ * @param[in] kctx Context pointer -+ * @param[in] katom Pointer to the atom to complete -+ * @return true if the context is now idle (no jobs pulled) -+ * false otherwise ++ * Return: 0 on success, or an error code on failure. + */ -+bool kbase_js_complete_atom_wq(struct kbase_context *kctx, -+ struct kbase_jd_atom *katom); ++int kbase_backend_late_init(struct kbase_device *kbdev); + +/** -+ * @brief Complete an atom. -+ * -+ * Most of the work required to complete an atom will be performed by -+ * jd_done_worker(). -+ * -+ * The HW access lock must be held when calling this function. -+ * -+ * @param[in] katom Pointer to the atom to complete -+ * @param[in] end_timestamp The time that the atom completed (may be NULL) -+ * -+ * Return: Atom that has now been unblocked and can now be run, or NULL if none ++ * kbase_backend_early_term - Perform any backend-specific termination. ++ * @kbdev: Device pointer + */ -+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, -+ ktime_t *end_timestamp); ++void kbase_backend_early_term(struct kbase_device *kbdev); + +/** -+ * @brief Submit atoms from all available contexts. -+ * -+ * This will attempt to submit as many jobs as possible to the provided job -+ * slots. It will exit when either all job slots are full, or all contexts have -+ * been used. -+ * -+ * @param[in] kbdev Device pointer -+ * @param[in] js_mask Mask of job slots to submit to ++ * kbase_backend_late_term - Perform any backend-specific termination. ++ * @kbdev: Device pointer + */ -+void kbase_js_sched(struct kbase_device *kbdev, int js_mask); ++void kbase_backend_late_term(struct kbase_device *kbdev); + -+/** -+ * kbase_jd_zap_context - Attempt to deschedule a context that is being -+ * destroyed -+ * @kctx: Context pointer ++#endif /* _KBASE_HWACCESS_BACKEND_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h +new file mode 100644 +index 000000000..0acf29719 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h +@@ -0,0 +1,36 @@ ++/* + * -+ * This will attempt to remove a context from any internal job scheduler queues -+ * and perform any other actions to ensure a context will not be submitted -+ * from. ++ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved. + * -+ * If the context is currently scheduled, then the caller must wait for all -+ * pending jobs to complete before taking any further action. -+ */ -+void kbase_js_zap_context(struct kbase_context *kctx); -+ -+/** -+ * @brief Validate an atom ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. + * -+ * This will determine whether the atom can be scheduled onto the GPU. Atoms -+ * with invalid combinations of core requirements will be rejected. ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * @param[in] kbdev Device pointer -+ * @param[in] katom Atom to validate -+ * @return true if atom is valid -+ * false otherwise + */ -+bool kbase_js_is_atom_valid(struct kbase_device *kbdev, -+ struct kbase_jd_atom *katom); ++ ++ ++ + +/** -+ * kbase_js_set_timeouts - update all JS timeouts with user specified data -+ * @kbdev: Device pointer -+ * -+ * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is -+ * set to a positive number then that becomes the new value used, if a timeout -+ * is negative then the default is set. ++ * @file mali_kbase_hwaccess_gpu_defs.h ++ * HW access common definitions + */ -+void kbase_js_set_timeouts(struct kbase_device *kbdev); + -+/* -+ * Helpers follow -+ */ ++#ifndef _KBASE_HWACCESS_DEFS_H_ ++#define _KBASE_HWACCESS_DEFS_H_ + -+/** -+ * @brief Check that a context is allowed to submit jobs on this policy ++#include ++ ++/* The hwaccess_lock (a spinlock) must be held when accessing this structure */ ++struct kbase_hwaccess_data { ++ struct kbase_context *active_kctx; ++ ++ struct kbase_backend_data backend; ++}; ++ ++#endif /* _KBASE_HWACCESS_DEFS_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h +new file mode 100644 +index 000000000..cf8a8131c +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h +@@ -0,0 +1,47 @@ ++/* + * -+ * The purpose of this abstraction is to hide the underlying data size, and wrap up -+ * the long repeated line of code. ++ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * -+ * As with any bool, never test the return value with true. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * The caller must hold hwaccess_lock. + */ -+static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) -+{ -+ u16 test_bit; + -+ /* Ensure context really is scheduled in */ -+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); -+ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+ test_bit = (u16) (1u << kctx->as_nr); + -+ return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit); -+} + +/** -+ * @brief Allow a context to submit jobs on this policy -+ * -+ * The purpose of this abstraction is to hide the underlying data size, and wrap up -+ * the long repeated line of code. -+ * -+ * The caller must hold hwaccess_lock. ++ * Base kernel property query backend APIs + */ -+static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) -+{ -+ u16 set_bit; + -+ /* Ensure context really is scheduled in */ -+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); -+ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++#ifndef _KBASE_HWACCESS_GPUPROPS_H_ ++#define _KBASE_HWACCESS_GPUPROPS_H_ + -+ set_bit = (u16) (1u << kctx->as_nr); ++/** ++ * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from ++ * GPU ++ * @kbdev: Device pointer ++ * @regdump: Pointer to struct kbase_gpuprops_regdump structure ++ */ ++void kbase_backend_gpuprops_get(struct kbase_device *kbdev, ++ struct kbase_gpuprops_regdump *regdump); + -+ dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", kctx, kctx->as_nr); ++/** ++ * kbase_backend_gpuprops_get - Fill @regdump with GPU properties read from GPU ++ * @kbdev: Device pointer ++ * @regdump: Pointer to struct kbase_gpuprops_regdump structure ++ * ++ * This function reads GPU properties that are dependent on the hardware ++ * features bitmask ++ */ ++void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, ++ struct kbase_gpuprops_regdump *regdump); + -+ js_devdata->runpool_irq.submit_allowed |= set_bit; -+} + -+/** -+ * @brief Prevent a context from submitting more jobs on this policy ++#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h +new file mode 100644 +index 000000000..5de2b7535 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h +@@ -0,0 +1,116 @@ ++/* + * -+ * The purpose of this abstraction is to hide the underlying data size, and wrap up -+ * the long repeated line of code. ++ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * The caller must hold hwaccess_lock. + */ -+static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) -+{ -+ u16 clear_bit; -+ u16 clear_mask; + -+ /* Ensure context really is scheduled in */ -+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); -+ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+ clear_bit = (u16) (1u << kctx->as_nr); -+ clear_mask = ~clear_bit; + -+ dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", kctx, kctx->as_nr); + -+ js_devdata->runpool_irq.submit_allowed &= clear_mask; -+} + -+/** -+ * @brief Manage the 'retry_submit_on_slot' part of a kbase_jd_atom ++/* ++ * HW Access instrumentation common APIs + */ -+static inline void kbasep_js_clear_job_retry_submit(struct kbase_jd_atom *atom) -+{ -+ atom->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID; -+} ++ ++#ifndef _KBASE_HWACCESS_INSTR_H_ ++#define _KBASE_HWACCESS_INSTR_H_ ++ ++#include + +/** -+ * Mark a slot as requiring resubmission by carrying that information on a -+ * completing atom. ++ * kbase_instr_hwcnt_enable_internal - Enable HW counters collection ++ * @kbdev: Kbase device ++ * @kctx: Kbase context ++ * @setup: HW counter setup parameters + * -+ * @note This can ASSERT in debug builds if the submit slot has been set to -+ * something other than the current value for @a js. This is because you might -+ * be unintentionally stopping more jobs being submitted on the old submit -+ * slot, and that might cause a scheduling-hang. ++ * Context: might sleep, waiting for reset to complete + * -+ * @note If you can guarantee that the atoms for the original slot will be -+ * submitted on some other slot, then call kbasep_js_clear_job_retry_submit() -+ * first to silence the ASSERT. ++ * Return: 0 on success + */ -+static inline void kbasep_js_set_job_retry_submit_slot(struct kbase_jd_atom *atom, int js) -+{ -+ KBASE_DEBUG_ASSERT(0 <= js && js <= BASE_JM_MAX_NR_SLOTS); -+ KBASE_DEBUG_ASSERT((atom->retry_submit_on_slot == -+ KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID) -+ || (atom->retry_submit_on_slot == js)); -+ -+ atom->retry_submit_on_slot = js; -+} ++int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ struct kbase_uk_hwcnt_setup *setup); + +/** -+ * Create an initial 'invalid' atom retained state, that requires no -+ * atom-related work to be done on releasing with -+ * kbasep_js_runpool_release_ctx_and_katom_retained_state() ++ * kbase_instr_hwcnt_disable_internal - Disable HW counters collection ++ * @kctx: Kbase context ++ * ++ * Context: might sleep, waiting for an ongoing dump to complete ++ * ++ * Return: 0 on success + */ -+static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state) -+{ -+ retained_state->event_code = BASE_JD_EVENT_NOT_STARTED; -+ retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID; -+ retained_state->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID; -+} ++int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx); + +/** -+ * Copy atom state that can be made available after jd_done_nolock() is called -+ * on that atom. ++ * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU ++ * @kctx: Kbase context ++ * ++ * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true, ++ * of call kbase_instr_hwcnt_wait_for_dump(). ++ * ++ * Return: 0 on success + */ -+static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom) -+{ -+ retained_state->event_code = katom->event_code; -+ retained_state->core_req = katom->core_req; -+ retained_state->retry_submit_on_slot = katom->retry_submit_on_slot; -+ retained_state->sched_priority = katom->sched_priority; -+ retained_state->device_nr = katom->device_nr; -+} ++int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx); + +/** -+ * @brief Determine whether an atom has finished (given its retained state), -+ * and so should be given back to userspace/removed from the system. ++ * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has ++ * completed. ++ * @kctx: Kbase context + * -+ * Reasons for an atom not finishing include: -+ * - Being soft-stopped (and so, the atom should be resubmitted sometime later) ++ * Context: will sleep, waiting for dump to complete + * -+ * @param[in] katom_retained_state the retained state of the atom to check -+ * @return false if the atom has not finished -+ * @return !=false if the atom has finished ++ * Return: 0 on success + */ -+static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state) -+{ -+ return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT); -+} ++int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx); + +/** -+ * @brief Determine whether a struct kbasep_js_atom_retained_state is valid ++ * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has ++ * completed ++ * @kctx: Kbase context ++ * @success: Set to true if successful + * -+ * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the -+ * code should just ignore it. ++ * Context: does not sleep. + * -+ * @param[in] katom_retained_state the atom's retained state to check -+ * @return false if the retained state is invalid, and can be ignored -+ * @return !=false if the retained state is valid ++ * Return: true if the dump is complete + */ -+static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state) -+{ -+ return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID); -+} -+ -+static inline bool kbasep_js_get_atom_retry_submit_slot(const struct kbasep_js_atom_retained_state *katom_retained_state, int *res) -+{ -+ int js = katom_retained_state->retry_submit_on_slot; -+ -+ *res = js; -+ return (bool) (js >= 0); -+} ++bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, ++ bool * const success); + +/** -+ * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the -+ * context is guaranteed to be already previously retained. -+ * -+ * It is a programming error to supply the \a as_nr of a context that has not -+ * been previously retained/has a busy refcount of zero. The only exception is -+ * when there is no ctx in \a as_nr (NULL returned). ++ * kbase_instr_hwcnt_clear() - Clear HW counters ++ * @kctx: Kbase context + * -+ * The following locking conditions are made on the caller: -+ * - it must \em not hold the hwaccess_lock, because it will be used internally. ++ * Context: might sleep, waiting for reset to complete + * -+ * @return a valid struct kbase_context on success, with a refcount that is guaranteed -+ * to be non-zero and unmodified by this function. -+ * @return NULL on failure, indicating that no context was found in \a as_nr -+ */ -+static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr) -+{ -+ struct kbase_context *found_kctx; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS); -+ -+ found_kctx = kbdev->as_to_kctx[as_nr]; -+ KBASE_DEBUG_ASSERT(found_kctx == NULL || -+ atomic_read(&found_kctx->refcount) > 0); -+ -+ return found_kctx; -+} -+ -+/* -+ * The following locking conditions are made on the caller: -+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. -+ * - The caller must hold the kbasep_js_device_data::runpool_mutex -+ */ -+static inline void kbase_js_runpool_inc_context_count( -+ struct kbase_device *kbdev, -+ struct kbase_context *kctx) -+{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ -+ js_devdata = &kbdev->js_data; -+ js_kctx_info = &kctx->jctx.sched_info; -+ -+ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); -+ lockdep_assert_held(&js_devdata->runpool_mutex); -+ -+ /* Track total contexts */ -+ KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX); -+ ++(js_devdata->nr_all_contexts_running); -+ -+ if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { -+ /* Track contexts that can submit jobs */ -+ KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running < -+ S8_MAX); -+ ++(js_devdata->nr_user_contexts_running); -+ } -+} -+ -+/* -+ * The following locking conditions are made on the caller: -+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. -+ * - The caller must hold the kbasep_js_device_data::runpool_mutex ++ * Return: 0 on success + */ -+static inline void kbase_js_runpool_dec_context_count( -+ struct kbase_device *kbdev, -+ struct kbase_context *kctx) -+{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ -+ js_devdata = &kbdev->js_data; -+ js_kctx_info = &kctx->jctx.sched_info; -+ -+ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); -+ lockdep_assert_held(&js_devdata->runpool_mutex); -+ -+ /* Track total contexts */ -+ --(js_devdata->nr_all_contexts_running); -+ KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0); -+ -+ if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { -+ /* Track contexts that can submit jobs */ -+ --(js_devdata->nr_user_contexts_running); -+ KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0); -+ } -+} -+ ++int kbase_instr_hwcnt_clear(struct kbase_context *kctx); + +/** -+ * @brief Submit atoms from all available contexts to all job slots. ++ * kbase_instr_backend_init() - Initialise the instrumentation backend ++ * @kbdev: Kbase device + * -+ * This will attempt to submit as many jobs as possible. It will exit when -+ * either all job slots are full, or all contexts have been used. ++ * This function should be called during driver initialization. + * -+ * @param[in] kbdev Device pointer ++ * Return: 0 on success + */ -+static inline void kbase_js_sched_all(struct kbase_device *kbdev) -+{ -+ kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); -+} -+ -+extern const int -+kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS]; -+ -+extern const base_jd_prio -+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; ++int kbase_instr_backend_init(struct kbase_device *kbdev); + +/** -+ * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio) -+ * to relative ordering -+ * @atom_prio: Priority ID to translate. -+ * -+ * Atom priority values for @ref base_jd_prio cannot be compared directly to -+ * find out which are higher or lower. -+ * -+ * This function will convert base_jd_prio values for successively lower -+ * priorities into a monotonically increasing sequence. That is, the lower the -+ * base_jd_prio priority, the higher the value produced by this function. This -+ * is in accordance with how the rest of the kernel treates priority. -+ * -+ * The mapping is 1:1 and the size of the valid input range is the same as the -+ * size of the valid output range, i.e. -+ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS -+ * -+ * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions ++ * kbase_instr_backend_init() - Terminate the instrumentation backend ++ * @kbdev: Kbase device + * -+ * Return: On success: a value in the inclusive range -+ * 0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure: -+ * KBASE_JS_ATOM_SCHED_PRIO_INVALID ++ * This function should be called during driver termination. + */ -+static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio) -+{ -+ if (atom_prio >= BASE_JD_NR_PRIO_LEVELS) -+ return KBASE_JS_ATOM_SCHED_PRIO_INVALID; -+ -+ return kbasep_js_atom_priority_to_relative[atom_prio]; -+} -+ -+static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio) -+{ -+ unsigned int prio_idx; -+ -+ KBASE_DEBUG_ASSERT(0 <= sched_prio -+ && sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT); -+ -+ prio_idx = (unsigned int)sched_prio; -+ -+ return kbasep_js_relative_priority_to_atom[prio_idx]; -+} -+ -+ /** @} *//* end group kbase_js */ -+ /** @} *//* end group base_kbase_api */ -+ /** @} *//* end group base_api */ ++void kbase_instr_backend_term(struct kbase_device *kbdev); + -+#endif /* _KBASE_JS_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c ++#endif /* _KBASE_HWACCESS_INSTR_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h new file mode 100644 -index 000000000..321506ada +index 000000000..750fda2cd --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c -@@ -0,0 +1,301 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h +@@ -0,0 +1,381 @@ +/* + * -+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -377684,462 +379211,378 @@ index 000000000..321506ada + + + -+#include -+#include -+ +/* -+ * Private functions follow ++ * HW access job manager common APIs + */ + ++#ifndef _KBASE_HWACCESS_JM_H_ ++#define _KBASE_HWACCESS_JM_H_ ++ +/** -+ * @brief Check whether a ctx has a certain attribute, and if so, retain that -+ * attribute on the runpool. -+ * -+ * Requires: -+ * - jsctx mutex -+ * - runpool_irq spinlock -+ * - ctx is scheduled on the runpool ++ * kbase_backend_run_atom() - Run an atom on the GPU ++ * @kbdev: Device pointer ++ * @atom: Atom to run + * -+ * @return true indicates a change in ctx attributes state of the runpool. -+ * In this state, the scheduler might be able to submit more jobs than -+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() -+ * or similar is called sometime later. -+ * @return false indicates no change in ctx attributes state of the runpool. ++ * Caller must hold the HW access lock + */ -+static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) -+{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ bool runpool_state_changed = false; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); -+ js_devdata = &kbdev->js_data; -+ js_kctx_info = &kctx->jctx.sched_info; -+ -+ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ -+ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); -+ -+ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { -+ KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX); -+ ++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]); -+ -+ if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) { -+ /* First refcount indicates a state change */ -+ runpool_state_changed = true; -+ KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute); -+ } -+ } -+ -+ return runpool_state_changed; -+} ++void kbase_backend_run_atom(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom); + +/** -+ * @brief Check whether a ctx has a certain attribute, and if so, release that -+ * attribute on the runpool. ++ * kbase_backend_slot_update - Update state based on slot ringbuffers + * -+ * Requires: -+ * - jsctx mutex -+ * - runpool_irq spinlock -+ * - ctx is scheduled on the runpool ++ * @kbdev: Device pointer + * -+ * @return true indicates a change in ctx attributes state of the runpool. -+ * In this state, the scheduler might be able to submit more jobs than -+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() -+ * or similar is called sometime later. -+ * @return false indicates no change in ctx attributes state of the runpool. ++ * Inspect the jobs in the slot ringbuffers and update state. ++ * ++ * This will cause jobs to be submitted to hardware if they are unblocked + */ -+static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) -+{ -+ struct kbasep_js_device_data *js_devdata; -+ struct kbasep_js_kctx_info *js_kctx_info; -+ bool runpool_state_changed = false; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); -+ js_devdata = &kbdev->js_data; -+ js_kctx_info = &kctx->jctx.sched_info; -+ -+ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); -+ -+ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { -+ KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0); -+ --(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]); -+ -+ if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) { -+ /* Last de-refcount indicates a state change */ -+ runpool_state_changed = true; -+ KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute); -+ } -+ } -+ -+ return runpool_state_changed; -+} ++void kbase_backend_slot_update(struct kbase_device *kbdev); + +/** -+ * @brief Retain a certain attribute on a ctx, also retaining it on the runpool -+ * if the context is scheduled. ++ * kbase_backend_find_and_release_free_address_space() - Release a free AS ++ * @kbdev: Device pointer ++ * @kctx: Context pointer + * -+ * Requires: -+ * - jsctx mutex -+ * - If the context is scheduled, then runpool_irq spinlock must also be held ++ * This function can evict an idle context from the runpool, freeing up the ++ * address space it was using. + * -+ * @return true indicates a change in ctx attributes state of the runpool. -+ * This may allow the scheduler to submit more jobs than previously. -+ * @return false indicates no change in ctx attributes state of the runpool. ++ * The address space is marked as in use. The caller must either assign a ++ * context using kbase_gpu_use_ctx(), or release it using ++ * kbase_ctx_sched_release() ++ * ++ * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none ++ * available + */ -+static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) -+{ -+ struct kbasep_js_kctx_info *js_kctx_info; -+ bool runpool_state_changed = false; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); -+ js_kctx_info = &kctx->jctx.sched_info; -+ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); -+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX); -+ -+ ++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); -+ -+ if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { -+ /* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */ -+ KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute); -+ runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute); -+ } -+ -+ return runpool_state_changed; -+} ++int kbase_backend_find_and_release_free_address_space( ++ struct kbase_device *kbdev, struct kbase_context *kctx); + -+/* -+ * @brief Release a certain attribute on a ctx, also releasing it from the runpool -+ * if the context is scheduled. ++/** ++ * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the ++ * provided address space. ++ * @kbdev: Device pointer ++ * @kctx: Context pointer. May be NULL ++ * @as_nr: Free address space to use + * -+ * Requires: -+ * - jsctx mutex -+ * - If the context is scheduled, then runpool_irq spinlock must also be held ++ * kbase_gpu_next_job() will pull atoms from the active context. + * -+ * @return true indicates a change in ctx attributes state of the runpool. -+ * This may allow the scheduler to submit more jobs than previously. -+ * @return false indicates no change in ctx attributes state of the runpool. ++ * Return: true if successful, false if ASID not assigned. + */ -+static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) -+{ -+ struct kbasep_js_kctx_info *js_kctx_info; -+ bool runpool_state_changed = false; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); -+ js_kctx_info = &kctx->jctx.sched_info; -+ -+ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); -+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0); -+ -+ if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ /* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */ -+ runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute); -+ KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute); -+ } -+ -+ /* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */ -+ --(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); -+ -+ return runpool_state_changed; -+} ++bool kbase_backend_use_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ int as_nr); + -+/* -+ * More commonly used public functions ++/** ++ * kbase_backend_use_ctx_sched() - Activate a context. ++ * @kbdev: Device pointer ++ * @kctx: Context pointer ++ * ++ * kbase_gpu_next_job() will pull atoms from the active context. ++ * ++ * The context must already be scheduled and assigned to an address space. If ++ * the context is not scheduled, then kbase_gpu_use_ctx() should be used ++ * instead. ++ * ++ * Caller must hold hwaccess_lock ++ * ++ * Return: true if context is now active, false otherwise (ie if context does ++ * not have an address space assigned) + */ ++bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, ++ struct kbase_context *kctx); + -+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx) -+{ -+ bool runpool_state_changed = false; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ -+ if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { -+ /* This context never submits, so don't track any scheduling attributes */ -+ return; -+ } -+ -+ /* Transfer attributes held in the context flags for contexts that have submit enabled */ -+ -+ /* ... More attributes can be added here ... */ -+ -+ /* The context should not have been scheduled yet, so ASSERT if this caused -+ * runpool state changes (note that other threads *can't* affect the value -+ * of runpool_state_changed, due to how it's calculated) */ -+ KBASE_DEBUG_ASSERT(runpool_state_changed == false); -+ CSTD_UNUSED(runpool_state_changed); -+} -+ -+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) -+{ -+ bool runpool_state_changed; -+ int i; -+ -+ /* Retain any existing attributes */ -+ for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { -+ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) { -+ /* The context is being scheduled in, so update the runpool with the new attributes */ -+ runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i); -+ -+ /* We don't need to know about state changed, because retaining a -+ * context occurs on scheduling it, and that itself will also try -+ * to run new atoms */ -+ CSTD_UNUSED(runpool_state_changed); -+ } -+ } -+} -+ -+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) -+{ -+ bool runpool_state_changed = false; -+ int i; -+ -+ /* Release any existing attributes */ -+ for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { -+ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) { -+ /* The context is being scheduled out, so update the runpool on the removed attributes */ -+ runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i); -+ } -+ } -+ -+ return runpool_state_changed; -+} -+ -+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom) -+{ -+ bool runpool_state_changed = false; -+ base_jd_core_req core_req; -+ -+ KBASE_DEBUG_ASSERT(katom); -+ core_req = katom->core_req; -+ -+ if (core_req & BASE_JD_REQ_ONLY_COMPUTE) -+ runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); -+ else -+ runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); -+ -+ if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { -+ /* Atom that can run on slot1 or slot2, and can use all cores */ -+ runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); -+ } -+ -+ /* We don't need to know about state changed, because retaining an -+ * atom occurs on adding it, and that itself will also try to run -+ * new atoms */ -+ CSTD_UNUSED(runpool_state_changed); -+} -+ -+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state) -+{ -+ bool runpool_state_changed = false; -+ base_jd_core_req core_req; -+ -+ KBASE_DEBUG_ASSERT(katom_retained_state); -+ core_req = katom_retained_state->core_req; ++/** ++ * kbase_backend_release_ctx_irq - Release a context from the GPU. This will ++ * de-assign the assigned address space. ++ * @kbdev: Device pointer ++ * @kctx: Context pointer ++ * ++ * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock ++ */ ++void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, ++ struct kbase_context *kctx); + -+ /* No-op for invalid atoms */ -+ if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false) -+ return false; ++/** ++ * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will ++ * de-assign the assigned address space. ++ * @kbdev: Device pointer ++ * @kctx: Context pointer ++ * ++ * Caller must hold kbase_device->mmu_hw_mutex ++ * ++ * This function must perform any operations that could not be performed in IRQ ++ * context by kbase_backend_release_ctx_irq(). ++ */ ++void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, ++ struct kbase_context *kctx); + -+ if (core_req & BASE_JD_REQ_ONLY_COMPUTE) -+ runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); -+ else -+ runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); ++/** ++ * kbase_backend_cacheclean - Perform a cache clean if the given atom requires ++ * one ++ * @kbdev: Device pointer ++ * @katom: Pointer to the failed atom ++ * ++ * On some GPUs, the GPU cache must be cleaned following a failed atom. This ++ * function performs a clean if it is required by @katom. ++ */ ++void kbase_backend_cacheclean(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom); + -+ if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { -+ /* Atom that can run on slot1 or slot2, and can use all cores */ -+ runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); -+ } + -+ return runpool_state_changed; -+} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h -new file mode 100644 -index 000000000..ce9183326 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h -@@ -0,0 +1,158 @@ -+/* -+ * -+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++/** ++ * kbase_backend_complete_wq() - Perform backend-specific actions required on ++ * completing an atom. ++ * @kbdev: Device pointer ++ * @katom: Pointer to the atom to complete + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * This function should only be called from kbase_jd_done_worker() or ++ * js_return_worker(). + * ++ * Return: true if atom has completed, false if atom should be re-submitted + */ ++void kbase_backend_complete_wq(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom); + ++/** ++ * kbase_backend_complete_wq_post_sched - Perform backend-specific actions ++ * required on completing an atom, after ++ * any scheduling has taken place. ++ * @kbdev: Device pointer ++ * @core_req: Core requirements of atom ++ * @affinity: Affinity of atom ++ * @coreref_state: Coreref state of atom ++ * ++ * This function should only be called from kbase_jd_done_worker() or ++ * js_return_worker(). ++ */ ++void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, ++ base_jd_core_req core_req, u64 affinity, ++ enum kbase_atom_coreref_state coreref_state); + ++/** ++ * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU ++ * and remove any others from the ringbuffers. ++ * @kbdev: Device pointer ++ * @end_timestamp: Timestamp of reset ++ */ ++void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp); + -+ ++/** ++ * kbase_backend_inspect_head() - Return the atom currently at the head of slot ++ * @js ++ * @kbdev: Device pointer ++ * @js: Job slot to inspect ++ * ++ * Return : Atom currently at the head of slot @js, or NULL ++ */ ++struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev, ++ int js); + +/** -+ * @file mali_kbase_js_ctx_attr.h -+ * Job Scheduler Context Attribute APIs ++ * kbase_backend_inspect_tail - Return the atom currently at the tail of slot ++ * @js ++ * @kbdev: Device pointer ++ * @js: Job slot to inspect ++ * ++ * Return : Atom currently at the head of slot @js, or NULL + */ ++struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, ++ int js); + -+#ifndef _KBASE_JS_CTX_ATTR_H_ -+#define _KBASE_JS_CTX_ATTR_H_ ++/** ++ * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a ++ * slot. ++ * @kbdev: Device pointer ++ * @js: Job slot to inspect ++ * ++ * Return : Number of atoms currently on slot ++ */ ++int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js); + +/** -+ * @addtogroup base_api -+ * @{ ++ * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot ++ * that are currently on the GPU. ++ * @kbdev: Device pointer ++ * @js: Job slot to inspect ++ * ++ * Return : Number of atoms currently on slot @js that are currently on the GPU. + */ ++int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js); + +/** -+ * @addtogroup base_kbase_api -+ * @{ ++ * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs ++ * has changed. ++ * @kbdev: Device pointer ++ * ++ * Perform any required backend-specific actions (eg starting/stopping ++ * scheduling timers). + */ ++void kbase_backend_ctx_count_changed(struct kbase_device *kbdev); + +/** -+ * @addtogroup kbase_js -+ * @{ ++ * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed. ++ * @kbdev: Device pointer ++ * ++ * Perform any required backend-specific actions (eg updating timeouts of ++ * currently running atoms). + */ ++void kbase_backend_timeouts_changed(struct kbase_device *kbdev); + +/** -+ * Set the initial attributes of a context (when context create flags are set) ++ * kbase_backend_slot_free() - Return the number of jobs that can be currently ++ * submitted to slot @js. ++ * @kbdev: Device pointer ++ * @js: Job slot to inspect + * -+ * Requires: -+ * - Hold the jsctx_mutex ++ * Return : Number of jobs that can be submitted. + */ -+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx); ++int kbase_backend_slot_free(struct kbase_device *kbdev, int js); + +/** -+ * Retain all attributes of a context ++ * kbase_job_check_enter_disjoint - potentially leave disjoint state ++ * @kbdev: kbase device ++ * @target_katom: atom which is finishing + * -+ * This occurs on scheduling in the context on the runpool (but after -+ * is_scheduled is set) ++ * Work out whether to leave disjoint state when finishing an atom that was ++ * originated by kbase_job_check_enter_disjoint(). ++ */ ++void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, ++ struct kbase_jd_atom *target_katom); ++ ++/** ++ * kbase_backend_jm_kill_jobs_from_kctx - Kill all jobs that are currently ++ * running from a context ++ * @kctx: Context pointer + * -+ * Requires: -+ * - jsctx mutex -+ * - runpool_irq spinlock -+ * - ctx->is_scheduled is true ++ * This is used in response to a page fault to remove all jobs from the faulting ++ * context from the hardware. + */ -+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); ++void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx); + +/** -+ * Release all attributes of a context ++ * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and ++ * to be descheduled. ++ * @kctx: Context pointer + * -+ * This occurs on scheduling out the context from the runpool (but before -+ * is_scheduled is cleared) ++ * This should be called following kbase_js_zap_context(), to ensure the context ++ * can be safely destroyed. ++ */ ++void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx); ++ ++/** ++ * kbase_backend_get_current_flush_id - Return the current flush ID + * -+ * Requires: -+ * - jsctx mutex -+ * - runpool_irq spinlock -+ * - ctx->is_scheduled is true ++ * @kbdev: Device pointer + * -+ * @return true indicates a change in ctx attributes state of the runpool. -+ * In this state, the scheduler might be able to submit more jobs than -+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() -+ * or similar is called sometime later. -+ * @return false indicates no change in ctx attributes state of the runpool. ++ * Return: the current flush ID to be recorded for each job chain + */ -+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); ++u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev); + ++#if KBASE_GPU_RESET_EN +/** -+ * Retain all attributes of an atom ++ * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU. ++ * @kbdev: Device pointer + * -+ * This occurs on adding an atom to a context ++ * This function just soft-stops all the slots to ensure that as many jobs as ++ * possible are saved. + * -+ * Requires: -+ * - jsctx mutex -+ * - If the context is scheduled, then runpool_irq spinlock must also be held ++ * Return: a boolean which should be interpreted as follows: ++ * - true - Prepared for reset, kbase_reset_gpu should be called. ++ * - false - Another thread is performing a reset, kbase_reset_gpu should ++ * not be called. + */ -+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom); ++bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev); + +/** -+ * Release all attributes of an atom, given its retained state. ++ * kbase_reset_gpu - Reset the GPU ++ * @kbdev: Device pointer + * -+ * This occurs after (permanently) removing an atom from a context ++ * This function should be called after kbase_prepare_to_reset_gpu if it returns ++ * true. It should never be called without a corresponding call to ++ * kbase_prepare_to_reset_gpu. + * -+ * Requires: -+ * - jsctx mutex -+ * - If the context is scheduled, then runpool_irq spinlock must also be held ++ * After this function is called (or not called if kbase_prepare_to_reset_gpu ++ * returned false), the caller should wait for kbdev->reset_waitq to be ++ * signalled to know when the reset has completed. ++ */ ++void kbase_reset_gpu(struct kbase_device *kbdev); ++ ++/** ++ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU. ++ * @kbdev: Device pointer + * -+ * This is a no-op when \a katom_retained_state is invalid. ++ * This function just soft-stops all the slots to ensure that as many jobs as ++ * possible are saved. + * -+ * @return true indicates a change in ctx attributes state of the runpool. -+ * In this state, the scheduler might be able to submit more jobs than -+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() -+ * or similar is called sometime later. -+ * @return false indicates no change in ctx attributes state of the runpool. ++ * Return: a boolean which should be interpreted as follows: ++ * - true - Prepared for reset, kbase_reset_gpu should be called. ++ * - false - Another thread is performing a reset, kbase_reset_gpu should ++ * not be called. + */ -+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); ++bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev); + +/** -+ * Requires: -+ * - runpool_irq spinlock ++ * kbase_reset_gpu_locked - Reset the GPU ++ * @kbdev: Device pointer ++ * ++ * This function should be called after kbase_prepare_to_reset_gpu if it ++ * returns true. It should never be called without a corresponding call to ++ * kbase_prepare_to_reset_gpu. ++ * ++ * After this function is called (or not called if kbase_prepare_to_reset_gpu ++ * returned false), the caller should wait for kbdev->reset_waitq to be ++ * signalled to know when the reset has completed. + */ -+static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute) -+{ -+ struct kbasep_js_device_data *js_devdata; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); -+ js_devdata = &kbdev->js_data; -+ -+ return js_devdata->runpool_irq.ctx_attr_ref_count[attribute]; -+} ++void kbase_reset_gpu_locked(struct kbase_device *kbdev); + +/** -+ * Requires: -+ * - runpool_irq spinlock ++ * kbase_reset_gpu_silent - Reset the GPU silently ++ * @kbdev: Device pointer ++ * ++ * Reset the GPU without trying to cancel jobs and don't emit messages into ++ * the kernel log while doing the reset. ++ * ++ * This function should be used in cases where we are doing a controlled reset ++ * of the GPU as part of normal processing (e.g. exiting protected mode) where ++ * the driver will have ensured the scheduler has been idled and all other ++ * users of the GPU (e.g. instrumentation) have been suspended. + */ -+static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute) -+{ -+ /* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */ -+ return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute); -+} ++void kbase_reset_gpu_silent(struct kbase_device *kbdev); + +/** -+ * Requires: -+ * - jsctx mutex ++ * kbase_reset_gpu_active - Reports if the GPU is being reset ++ * @kbdev: Device pointer ++ * ++ * Return: True if the GPU is in the process of being reset. + */ -+static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) -+{ -+ struct kbasep_js_kctx_info *js_kctx_info; -+ -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); -+ js_kctx_info = &kctx->jctx.sched_info; ++bool kbase_reset_gpu_active(struct kbase_device *kbdev); ++#endif + -+ /* In general, attributes are 'on' when they have a refcount (which should never be < 0) */ -+ return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]); -+} ++/** ++ * kbase_job_slot_hardstop - Hard-stop the specified job slot ++ * @kctx: The kbase context that contains the job(s) that should ++ * be hard-stopped ++ * @js: The job slot to hard-stop ++ * @target_katom: The job that should be hard-stopped (or NULL for all ++ * jobs from the context) ++ * Context: ++ * The job slot lock must be held when calling this function. ++ */ ++void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, ++ struct kbase_jd_atom *target_katom); + -+ /** @} *//* end group kbase_js */ -+ /** @} *//* end group base_kbase_api */ -+ /** @} *//* end group base_api */ ++extern struct protected_mode_ops kbase_native_protected_ops; + -+#endif /* _KBASE_JS_DEFS_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h ++#endif /* _KBASE_HWACCESS_JM_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h new file mode 100644 -index 000000000..ba8b64415 +index 000000000..71c7d495c --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h -@@ -0,0 +1,386 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h +@@ -0,0 +1,209 @@ +/* + * -+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -378155,383 +379598,206 @@ index 000000000..ba8b64415 + + + -+ +/** -+ * @file mali_kbase_js.h -+ * Job Scheduler Type Definitions ++ * @file mali_kbase_hwaccess_pm.h ++ * HW access power manager common APIs + */ + -+#ifndef _KBASE_JS_DEFS_H_ -+#define _KBASE_JS_DEFS_H_ ++#ifndef _KBASE_HWACCESS_PM_H_ ++#define _KBASE_HWACCESS_PM_H_ ++ ++#include ++#include ++ ++#include ++ ++/* Forward definition - see mali_kbase.h */ ++struct kbase_device; ++ ++/* Functions common to all HW access backends */ + +/** -+ * @addtogroup base_api -+ * @{ ++ * Initialize the power management framework. ++ * ++ * Must be called before any other power management function ++ * ++ * @param kbdev The kbase device structure for the device (must be a valid ++ * pointer) ++ * ++ * @return 0 if the power management framework was successfully ++ * initialized. + */ ++int kbase_hwaccess_pm_init(struct kbase_device *kbdev); + +/** -+ * @addtogroup base_kbase_api -+ * @{ ++ * Terminate the power management framework. ++ * ++ * No power management functions may be called after this (except ++ * @ref kbase_pm_init) ++ * ++ * @param kbdev The kbase device structure for the device (must be a valid ++ * pointer) + */ ++void kbase_hwaccess_pm_term(struct kbase_device *kbdev); + +/** -+ * @addtogroup kbase_js -+ * @{ ++ * kbase_hwaccess_pm_powerup - Power up the GPU. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @flags: Flags to pass on to kbase_pm_init_hw ++ * ++ * Power up GPU after all modules have been initialized and interrupt handlers ++ * installed. ++ * ++ * Return: 0 if powerup was successful. + */ -+/* Forward decls */ -+struct kbase_device; -+struct kbase_jd_atom; -+ -+ -+typedef u32 kbase_context_flags; -+ -+struct kbasep_atom_req { -+ base_jd_core_req core_req; -+ kbase_context_flags ctx_req; -+ u32 device_nr; -+}; -+ -+/** Callback function run on all of a context's jobs registered with the Job -+ * Scheduler */ -+typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom); ++int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, ++ unsigned int flags); + +/** -+ * @brief Maximum number of jobs that can be submitted to a job slot whilst -+ * inside the IRQ handler. ++ * Halt the power management framework. + * -+ * This is important because GPU NULL jobs can complete whilst the IRQ handler -+ * is running. Otherwise, it potentially allows an unlimited number of GPU NULL -+ * jobs to be submitted inside the IRQ handler, which increases IRQ latency. ++ * Should ensure that no new interrupts are generated, but allow any currently ++ * running interrupt handlers to complete successfully. The GPU is forced off by ++ * the time this function returns, regardless of whether or not the active power ++ * policy asks for the GPU to be powered off. ++ * ++ * @param kbdev The kbase device structure for the device (must be a valid ++ * pointer) + */ -+#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2 ++void kbase_hwaccess_pm_halt(struct kbase_device *kbdev); + +/** -+ * @brief Context attributes -+ * -+ * Each context attribute can be thought of as a boolean value that caches some -+ * state information about either the runpool, or the context: -+ * - In the case of the runpool, it is a cache of "Do any contexts owned by -+ * the runpool have attribute X?" -+ * - In the case of a context, it is a cache of "Do any atoms owned by the -+ * context have attribute X?" -+ * -+ * The boolean value of the context attributes often affect scheduling -+ * decisions, such as affinities to use and job slots to use. ++ * Perform any backend-specific actions to suspend the GPU + * -+ * To accomodate changes of state in the context, each attribute is refcounted -+ * in the context, and in the runpool for all running contexts. Specifically: -+ * - The runpool holds a refcount of how many contexts in the runpool have this -+ * attribute. -+ * - The context holds a refcount of how many atoms have this attribute. ++ * @param kbdev The kbase device structure for the device (must be a valid ++ * pointer) + */ -+enum kbasep_js_ctx_attr { -+ /** Attribute indicating a context that contains Compute jobs. That is, -+ * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE -+ * -+ * @note A context can be both 'Compute' and 'Non Compute' if it contains -+ * both types of jobs. -+ */ -+ KBASEP_JS_CTX_ATTR_COMPUTE, -+ -+ /** Attribute indicating a context that contains Non-Compute jobs. That is, -+ * the context has some jobs that are \b not of type @ref -+ * BASE_JD_REQ_ONLY_COMPUTE. -+ * -+ * @note A context can be both 'Compute' and 'Non Compute' if it contains -+ * both types of jobs. -+ */ -+ KBASEP_JS_CTX_ATTR_NON_COMPUTE, ++void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev); + -+ /** Attribute indicating that a context contains compute-job atoms that -+ * aren't restricted to a coherent group, and can run on all cores. -+ * -+ * Specifically, this is when the atom's \a core_req satisfy: -+ * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2 -+ * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups -+ * -+ * Such atoms could be blocked from running if one of the coherent groups -+ * is being used by another job slot, so tracking this context attribute -+ * allows us to prevent such situations. -+ * -+ * @note This doesn't take into account the 1-coregroup case, where all -+ * compute atoms would effectively be able to run on 'all cores', but -+ * contexts will still not always get marked with this attribute. Instead, -+ * it is the caller's responsibility to take into account the number of -+ * coregroups when interpreting this attribute. -+ * -+ * @note Whilst Tiler atoms are normally combined with -+ * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without -+ * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy -+ * enough to handle anyway. -+ */ -+ KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, ++/** ++ * Perform any backend-specific actions to resume the GPU from a suspend ++ * ++ * @param kbdev The kbase device structure for the device (must be a valid ++ * pointer) ++ */ ++void kbase_hwaccess_pm_resume(struct kbase_device *kbdev); + -+ /** Must be the last in the enum */ -+ KBASEP_JS_CTX_ATTR_COUNT -+}; ++/** ++ * Perform any required actions for activating the GPU. Called when the first ++ * context goes active. ++ * ++ * @param kbdev The kbase device structure for the device (must be a valid ++ * pointer) ++ */ ++void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev); + -+enum { -+ /** Bit indicating that new atom should be started because this atom completed */ -+ KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0), -+ /** Bit indicating that the atom was evicted from the JS_NEXT registers */ -+ KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1) -+}; ++/** ++ * Perform any required actions for idling the GPU. Called when the last ++ * context goes idle. ++ * ++ * @param kbdev The kbase device structure for the device (must be a valid ++ * pointer) ++ */ ++void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev); + -+/** Combination of KBASE_JS_ATOM_DONE_<...> bits */ -+typedef u32 kbasep_js_atom_done_code; + +/** -+ * @brief KBase Device Data Job Scheduler sub-structure ++ * Set the debug core mask. + * -+ * This encapsulates the current context of the Job Scheduler on a particular -+ * device. This context is global to the device, and is not tied to any -+ * particular struct kbase_context running on the device. ++ * This determines which cores the power manager is allowed to use. + * -+ * nr_contexts_running and as_free are optimized for packing together (by making -+ * them smaller types than u32). The operations on them should rarely involve -+ * masking. The use of signed types for arithmetic indicates to the compiler that -+ * the value will not rollover (which would be undefined behavior), and so under -+ * the Total License model, it is free to make optimizations based on that (i.e. -+ * to remove masking). ++ * @param kbdev The kbase device structure for the device (must be a ++ * valid pointer) ++ * @param new_core_mask_js0 The core mask to use for job slot 0 ++ * @param new_core_mask_js0 The core mask to use for job slot 1 ++ * @param new_core_mask_js0 The core mask to use for job slot 2 + */ -+struct kbasep_js_device_data { -+ /* Sub-structure to collect together Job Scheduling data used in IRQ -+ * context. The hwaccess_lock must be held when accessing. */ -+ struct runpool_irq { -+ /** Bitvector indicating whether a currently scheduled context is allowed to submit jobs. -+ * When bit 'N' is set in this, it indicates whether the context bound to address space -+ * 'N' is allowed to submit jobs. -+ */ -+ u16 submit_allowed; -+ -+ /** Context Attributes: -+ * Each is large enough to hold a refcount of the number of contexts -+ * that can fit into the runpool. This is currently BASE_MAX_NR_AS -+ * -+ * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store -+ * the refcount. Hence, it's not worthwhile reducing this to -+ * bit-manipulation on u32s to save space (where in contrast, 4 bit -+ * sub-fields would be easy to do and would save space). -+ * -+ * Whilst this must not become negative, the sign bit is used for: -+ * - error detection in debug builds -+ * - Optimization: it is undefined for a signed int to overflow, and so -+ * the compiler can optimize for that never happening (thus, no masking -+ * is required on updating the variable) */ -+ s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; -+ -+ /* -+ * Affinity management and tracking -+ */ -+ /** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates -+ * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */ -+ u64 slot_affinities[BASE_JM_MAX_NR_SLOTS]; -+ /** Refcount for each core owned by each slot. Used to generate the -+ * slot_affinities array of bitvectors -+ * -+ * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS, -+ * because it is refcounted only when a job is definitely about to be -+ * submitted to a slot, and is de-refcounted immediately after a job -+ * finishes */ -+ s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64]; -+ } runpool_irq; -+ -+ /** -+ * Run Pool mutex, for managing contexts within the runpool. -+ * Unless otherwise specified, you must hold this lock whilst accessing any -+ * members that follow -+ * -+ * In addition, this is used to access: -+ * - the kbasep_js_kctx_info::runpool substructure -+ */ -+ struct mutex runpool_mutex; -+ -+ /** -+ * Queue Lock, used to access the Policy's queue of contexts independently -+ * of the Run Pool. -+ * -+ * Of course, you don't need the Run Pool lock to access this. -+ */ -+ struct mutex queue_mutex; -+ -+ /** -+ * Scheduling semaphore. This must be held when calling -+ * kbase_jm_kick() -+ */ -+ struct semaphore schedule_sem; -+ -+ /** -+ * List of contexts that can currently be pulled from -+ */ -+ struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS]; -+ /** -+ * List of contexts that can not currently be pulled from, but have -+ * jobs currently running. -+ */ -+ struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS]; -+ -+ /** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */ -+ s8 nr_user_contexts_running; -+ /** Number of currently scheduled contexts (including ones that are not submitting jobs) */ -+ s8 nr_all_contexts_running; -+ -+ /** Core Requirements to match up with base_js_atom's core_req memeber -+ * @note This is a write-once member, and so no locking is required to read */ -+ base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS]; -+ -+ u32 scheduling_period_ns; /*< Value for JS_SCHEDULING_PERIOD_NS */ -+ u32 soft_stop_ticks; /*< Value for JS_SOFT_STOP_TICKS */ -+ u32 soft_stop_ticks_cl; /*< Value for JS_SOFT_STOP_TICKS_CL */ -+ u32 hard_stop_ticks_ss; /*< Value for JS_HARD_STOP_TICKS_SS */ -+ u32 hard_stop_ticks_cl; /*< Value for JS_HARD_STOP_TICKS_CL */ -+ u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */ -+ u32 gpu_reset_ticks_ss; /*< Value for JS_RESET_TICKS_SS */ -+ u32 gpu_reset_ticks_cl; /*< Value for JS_RESET_TICKS_CL */ -+ u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */ -+ u32 ctx_timeslice_ns; /**< Value for JS_CTX_TIMESLICE_NS */ -+ -+ /**< Value for JS_SOFT_JOB_TIMEOUT */ -+ atomic_t soft_job_timeout_ms; -+ -+ /** List of suspended soft jobs */ -+ struct list_head suspended_soft_jobs_list; -+ -+#ifdef CONFIG_MALI_DEBUG -+ /* Support soft-stop on a single context */ -+ bool softstop_always; -+#endif /* CONFIG_MALI_DEBUG */ -+ -+ /** The initalized-flag is placed at the end, to avoid cache-pollution (we should -+ * only be using this during init/term paths). -+ * @note This is a write-once member, and so no locking is required to read */ -+ int init_status; -+ -+ /* Number of contexts that can currently be pulled from */ -+ u32 nr_contexts_pullable; ++void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, ++ u64 new_core_mask_js0, u64 new_core_mask_js1, ++ u64 new_core_mask_js2); + -+ /* Number of contexts that can either be pulled from or are currently -+ * running */ -+ atomic_t nr_contexts_runnable; -+}; + +/** -+ * @brief KBase Context Job Scheduling information structure ++ * Get the current policy. + * -+ * This is a substructure in the struct kbase_context that encapsulates all the -+ * scheduling information. ++ * Returns the policy that is currently active. ++ * ++ * @param kbdev The kbase device structure for the device (must be a valid ++ * pointer) ++ * ++ * @return The current policy + */ -+struct kbasep_js_kctx_info { -+ -+ /** -+ * Job Scheduler Context information sub-structure. These members are -+ * accessed regardless of whether the context is: -+ * - In the Policy's Run Pool -+ * - In the Policy's Queue -+ * - Not queued nor in the Run Pool. -+ * -+ * You must obtain the jsctx_mutex before accessing any other members of -+ * this substructure. -+ * -+ * You may not access any of these members from IRQ context. -+ */ -+ struct kbase_jsctx { -+ struct mutex jsctx_mutex; /**< Job Scheduler Context lock */ -+ -+ /** Number of jobs ready to run - does \em not include the jobs waiting in -+ * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr -+ * for such jobs*/ -+ u32 nr_jobs; -+ -+ /** Context Attributes: -+ * Each is large enough to hold a refcount of the number of atoms on -+ * the context. **/ -+ u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; -+ -+ /** -+ * Wait queue to wait for KCTX_SHEDULED flag state changes. -+ * */ -+ wait_queue_head_t is_scheduled_wait; -+ -+ /** Link implementing JS queues. Context can be present on one -+ * list per job slot -+ */ -+ struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS]; -+ } ctx; -+ -+ /* The initalized-flag is placed at the end, to avoid cache-pollution (we should -+ * only be using this during init/term paths) */ -+ int init_status; -+}; -+ -+/** Subset of atom state that can be available after jd_done_nolock() is called -+ * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(), -+ * because the original atom could disappear. */ -+struct kbasep_js_atom_retained_state { -+ /** Event code - to determine whether the atom has finished */ -+ enum base_jd_event_code event_code; -+ /** core requirements */ -+ base_jd_core_req core_req; -+ /* priority */ -+ int sched_priority; -+ /** Job Slot to retry submitting to if submission from IRQ handler failed */ -+ int retry_submit_on_slot; -+ /* Core group atom was executed on */ -+ u32 device_nr; -+ -+}; ++const struct kbase_pm_ca_policy ++*kbase_pm_ca_get_policy(struct kbase_device *kbdev); + +/** -+ * Value signifying 'no retry on a slot required' for: -+ * - kbase_js_atom_retained_state::retry_submit_on_slot -+ * - kbase_jd_atom::retry_submit_on_slot ++ * Change the policy to the one specified. ++ * ++ * @param kbdev The kbase device structure for the device (must be a valid ++ * pointer) ++ * @param policy The policy to change to (valid pointer returned from ++ * @ref kbase_pm_ca_list_policies) + */ -+#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1) ++void kbase_pm_ca_set_policy(struct kbase_device *kbdev, ++ const struct kbase_pm_ca_policy *policy); + +/** -+ * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state. ++ * Retrieve a static list of the available policies. + * -+ * @see kbase_atom_retained_state_is_valid() ++ * @param[out] policies An array pointer to take the list of policies. This may ++ * be NULL. The contents of this array must not be ++ * modified. ++ * ++ * @return The number of policies + */ -+#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP ++int ++kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies); ++ + +/** -+ * @brief The JS timer resolution, in microseconds ++ * Get the current policy. + * -+ * Any non-zero difference in time will be at least this size. ++ * Returns the policy that is currently active. ++ * ++ * @param kbdev The kbase device structure for the device (must be a valid ++ * pointer) ++ * ++ * @return The current policy + */ -+#define KBASEP_JS_TICK_RESOLUTION_US 1 ++const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev); + -+/* -+ * Internal atom priority defines for kbase_jd_atom::sched_prio ++/** ++ * Change the policy to the one specified. ++ * ++ * @param kbdev The kbase device structure for the device (must be a valid ++ * pointer) ++ * @param policy The policy to change to (valid pointer returned from ++ * @ref kbase_pm_list_policies) + */ -+enum { -+ KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0, -+ KBASE_JS_ATOM_SCHED_PRIO_MED, -+ KBASE_JS_ATOM_SCHED_PRIO_LOW, -+ KBASE_JS_ATOM_SCHED_PRIO_COUNT, -+}; -+ -+/* Invalid priority for kbase_jd_atom::sched_prio */ -+#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1 -+ -+/* Default priority in the case of contexts with no atoms, or being lenient -+ * about invalid priorities from userspace */ -+#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED ++void kbase_pm_set_policy(struct kbase_device *kbdev, ++ const struct kbase_pm_policy *policy); + -+ /** @} *//* end group kbase_js */ -+ /** @} *//* end group base_kbase_api */ -+ /** @} *//* end group base_api */ ++/** ++ * Retrieve a static list of the available policies. ++ * ++ * @param[out] policies An array pointer to take the list of policies. This may ++ * be NULL. The contents of this array must not be ++ * modified. ++ * ++ * @return The number of policies ++ */ ++int kbase_pm_list_policies(const struct kbase_pm_policy * const **policies); + -+#endif /* _KBASE_JS_DEFS_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_linux.h b/drivers/gpu/arm/midgard/mali_kbase_linux.h ++#endif /* _KBASE_HWACCESS_PM_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h new file mode 100644 -index 000000000..6d1e61fd4 +index 000000000..10b65798e --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_linux.h -@@ -0,0 +1,43 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h +@@ -0,0 +1,53 @@ +/* + * -+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -378547,40 +379813,50 @@ index 000000000..6d1e61fd4 + + + -+ +/** -+ * @file mali_kbase_linux.h -+ * Base kernel APIs, Linux implementation. ++ * + */ + -+#ifndef _KBASE_LINUX_H_ -+#define _KBASE_LINUX_H_ ++#ifndef _KBASE_BACKEND_TIME_H_ ++#define _KBASE_BACKEND_TIME_H_ + -+/* All things that are needed for the Linux port. */ -+#include -+#include -+#include -+#include -+#include ++/** ++ * kbase_backend_get_gpu_time() - Get current GPU time ++ * @kbdev: Device pointer ++ * @cycle_counter: Pointer to u64 to store cycle counter in ++ * @system_time: Pointer to u64 to store system time in ++ * @ts: Pointer to struct timespec64 to store current monotonic ++ * time in ++ */ ++void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, ++ u64 *system_time, struct timespec64 *ts); + -+#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API)) -+ #define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func) -+#else -+ #define KBASE_EXPORT_TEST_API(func) ++/** ++ * kbase_wait_write_flush() - Wait for GPU write flush ++ * @kctx: Context pointer ++ * ++ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush ++ * its write buffer. ++ * ++ * If GPU resets occur then the counters are reset to zero, the delay may not be ++ * as expected. ++ * ++ * This function is only in use for BASE_HW_ISSUE_6367 ++ */ ++#ifndef CONFIG_MALI_NO_MALI ++void kbase_wait_write_flush(struct kbase_context *kctx); +#endif + -+#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func) -+ -+#endif /* _KBASE_LINUX_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c ++#endif /* _KBASE_BACKEND_TIME_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h new file mode 100644 -index 000000000..f372e02ef +index 000000000..cf7bf1b35 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c -@@ -0,0 +1,2657 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h +@@ -0,0 +1,66 @@ +/* + * -+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -378595,2655 +379871,2874 @@ index 000000000..f372e02ef + + + ++#ifndef _KBASE_HWCNT_READER_H_ ++#define _KBASE_HWCNT_READER_H_ + ++/* The ids of ioctl commands. */ ++#define KBASE_HWCNT_READER 0xBE ++#define KBASE_HWCNT_READER_GET_HWVER _IOR(KBASE_HWCNT_READER, 0x00, u32) ++#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32) ++#define KBASE_HWCNT_READER_DUMP _IOW(KBASE_HWCNT_READER, 0x10, u32) ++#define KBASE_HWCNT_READER_CLEAR _IOW(KBASE_HWCNT_READER, 0x11, u32) ++#define KBASE_HWCNT_READER_GET_BUFFER _IOR(KBASE_HWCNT_READER, 0x20,\ ++ struct kbase_hwcnt_reader_metadata) ++#define KBASE_HWCNT_READER_PUT_BUFFER _IOW(KBASE_HWCNT_READER, 0x21,\ ++ struct kbase_hwcnt_reader_metadata) ++#define KBASE_HWCNT_READER_SET_INTERVAL _IOW(KBASE_HWCNT_READER, 0x30, u32) ++#define KBASE_HWCNT_READER_ENABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x40, u32) ++#define KBASE_HWCNT_READER_DISABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x41, u32) ++#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32) + +/** -+ * @file mali_kbase_mem.c -+ * Base kernel memory APIs ++ * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata ++ * @timestamp: time when sample was collected ++ * @event_id: id of an event that triggered sample collection ++ * @buffer_idx: position in sampling area where sample buffer was stored + */ -+#ifdef CONFIG_DMA_SHARED_BUFFER -+#include -+#endif /* CONFIG_DMA_SHARED_BUFFER */ -+#ifdef CONFIG_UMP -+#include -+#endif /* CONFIG_UMP */ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++struct kbase_hwcnt_reader_metadata { ++ u64 timestamp; ++ u32 event_id; ++ u32 buffer_idx; ++}; + -+/* This function finds out which RB tree the given GPU VA region belongs to -+ * based on the region zone */ -+static struct rb_root *kbase_reg_flags_to_rbtree(struct kbase_context *kctx, -+ struct kbase_va_region *reg) -+{ -+ struct rb_root *rbtree = NULL; ++/** ++ * enum base_hwcnt_reader_event - hwcnt dumping events ++ * @BASE_HWCNT_READER_EVENT_MANUAL: manual request for dump ++ * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump ++ * @BASE_HWCNT_READER_EVENT_PREJOB: prejob dump request ++ * @BASE_HWCNT_READER_EVENT_POSTJOB: postjob dump request ++ * @BASE_HWCNT_READER_EVENT_COUNT: number of supported events ++ */ ++enum base_hwcnt_reader_event { ++ BASE_HWCNT_READER_EVENT_MANUAL, ++ BASE_HWCNT_READER_EVENT_PERIODIC, ++ BASE_HWCNT_READER_EVENT_PREJOB, ++ BASE_HWCNT_READER_EVENT_POSTJOB, + -+ switch (reg->flags & KBASE_REG_ZONE_MASK) { -+ case KBASE_REG_ZONE_CUSTOM_VA: -+ rbtree = &kctx->reg_rbtree_custom; -+ break; -+ case KBASE_REG_ZONE_EXEC: -+ rbtree = &kctx->reg_rbtree_exec; -+ break; -+ case KBASE_REG_ZONE_SAME_VA: -+ rbtree = &kctx->reg_rbtree_same; -+ /* fall through */ -+ default: -+ rbtree = &kctx->reg_rbtree_same; -+ break; -+ } ++ BASE_HWCNT_READER_EVENT_COUNT ++}; + -+ return rbtree; -+} ++#endif /* _KBASE_HWCNT_READER_H_ */ + -+/* This function finds out which RB tree the given pfn from the GPU VA belongs -+ * to based on the memory zone the pfn refers to */ -+static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx, -+ u64 gpu_pfn) -+{ -+ struct rb_root *rbtree = NULL; +diff --git a/drivers/gpu/arm/midgard/mali_kbase_ioctl.h b/drivers/gpu/arm/midgard/mali_kbase_ioctl.h +new file mode 100644 +index 000000000..dcbed9c77 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_ioctl.h +@@ -0,0 +1,656 @@ ++/* ++ * ++ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+#ifdef CONFIG_64BIT -+ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { -+#endif /* CONFIG_64BIT */ -+ if (gpu_pfn >= KBASE_REG_ZONE_CUSTOM_VA_BASE) -+ rbtree = &kctx->reg_rbtree_custom; -+ else if (gpu_pfn >= KBASE_REG_ZONE_EXEC_BASE) -+ rbtree = &kctx->reg_rbtree_exec; -+ else -+ rbtree = &kctx->reg_rbtree_same; -+#ifdef CONFIG_64BIT -+ } else { -+ if (gpu_pfn >= kctx->same_va_end) -+ rbtree = &kctx->reg_rbtree_custom; -+ else -+ rbtree = &kctx->reg_rbtree_same; -+ } -+#endif /* CONFIG_64BIT */ + -+ return rbtree; -+} + -+/* This function inserts a region into the tree. */ -+static void kbase_region_tracker_insert(struct kbase_context *kctx, -+ struct kbase_va_region *new_reg) -+{ -+ u64 start_pfn = new_reg->start_pfn; -+ struct rb_node **link = NULL; -+ struct rb_node *parent = NULL; -+ struct rb_root *rbtree = NULL; ++#ifndef _KBASE_IOCTL_H_ ++#define _KBASE_IOCTL_H_ + -+ rbtree = kbase_reg_flags_to_rbtree(kctx, new_reg); ++#ifdef __cpluscplus ++extern "C" { ++#endif + -+ link = &(rbtree->rb_node); -+ /* Find the right place in the tree using tree search */ -+ while (*link) { -+ struct kbase_va_region *old_reg; ++#include + -+ parent = *link; -+ old_reg = rb_entry(parent, struct kbase_va_region, rblink); ++#define KBASE_IOCTL_TYPE 0x80 + -+ /* RBTree requires no duplicate entries. */ -+ KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn); ++#ifdef ANDROID ++/* Android's definition of ioctl is incorrect, specifying the type argument as ++ * 'int'. This creates a warning when using _IOWR (as the top bit is set). Work ++ * round this by redefining _IOC to include a case to 'int'. ++ */ ++#undef _IOC ++#define _IOC(dir, type, nr, size) \ ++ ((int)(((dir) << _IOC_DIRSHIFT) | ((type) << _IOC_TYPESHIFT) | \ ++ ((nr) << _IOC_NRSHIFT) | ((size) << _IOC_SIZESHIFT))) ++#endif + -+ if (old_reg->start_pfn > start_pfn) -+ link = &(*link)->rb_left; -+ else -+ link = &(*link)->rb_right; -+ } ++/** ++ * struct kbase_ioctl_version_check - Check version compatibility with kernel ++ * ++ * @major: Major version number ++ * @minor: Minor version number ++ */ ++struct kbase_ioctl_version_check { ++ __u16 major; ++ __u16 minor; ++}; + -+ /* Put the new node there, and rebalance tree */ -+ rb_link_node(&(new_reg->rblink), parent, link); ++#define KBASE_IOCTL_VERSION_CHECK \ ++ _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check) + -+ rb_insert_color(&(new_reg->rblink), rbtree); -+} ++/** ++ * struct kbase_ioctl_set_flags - Set kernel context creation flags ++ * ++ * @create_flags: Flags - see base_context_create_flags ++ */ ++struct kbase_ioctl_set_flags { ++ __u32 create_flags; ++}; + -+/* Find allocated region enclosing free range. */ -+static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free( -+ struct kbase_context *kctx, u64 start_pfn, size_t nr_pages) -+{ -+ struct rb_node *rbnode = NULL; -+ struct kbase_va_region *reg = NULL; -+ struct rb_root *rbtree = NULL; ++#define KBASE_IOCTL_SET_FLAGS \ ++ _IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags) + -+ u64 end_pfn = start_pfn + nr_pages; ++/** ++ * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel ++ * ++ * @addr: Memory address of an array of struct base_jd_atom_v2 ++ * @nr_atoms: Number of entries in the array ++ * @stride: sizeof(struct base_jd_atom_v2) ++ */ ++struct kbase_ioctl_job_submit { ++ union kbase_pointer addr; ++ __u32 nr_atoms; ++ __u32 stride; ++}; + -+ rbtree = kbase_gpu_va_to_rbtree(kctx, start_pfn); ++#define KBASE_IOCTL_JOB_SUBMIT \ ++ _IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit) + -+ rbnode = rbtree->rb_node; ++/** ++ * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel ++ * ++ * @buffer: Pointer to the buffer to store properties into ++ * @size: Size of the buffer ++ * @flags: Flags - must be zero for now ++ * ++ * The ioctl will return the number of bytes stored into @buffer or an error ++ * on failure (e.g. @size is too small). If @size is specified as 0 then no ++ * data will be written but the return value will be the number of bytes needed ++ * for all the properties. ++ * ++ * @flags may be used in the future to request a different format for the ++ * buffer. With @flags == 0 the following format is used. ++ * ++ * The buffer will be filled with pairs of values, a u32 key identifying the ++ * property followed by the value. The size of the value is identified using ++ * the bottom bits of the key. The value then immediately followed the key and ++ * is tightly packed (there is no padding). All keys and values are ++ * little-endian. ++ * ++ * 00 = u8 ++ * 01 = u16 ++ * 10 = u32 ++ * 11 = u64 ++ */ ++struct kbase_ioctl_get_gpuprops { ++ union kbase_pointer buffer; ++ __u32 size; ++ __u32 flags; ++}; + -+ while (rbnode) { -+ u64 tmp_start_pfn, tmp_end_pfn; ++#define KBASE_IOCTL_GET_GPUPROPS \ ++ _IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops) + -+ reg = rb_entry(rbnode, struct kbase_va_region, rblink); -+ tmp_start_pfn = reg->start_pfn; -+ tmp_end_pfn = reg->start_pfn + reg->nr_pages; ++#define KBASE_IOCTL_POST_TERM \ ++ _IO(KBASE_IOCTL_TYPE, 4) + -+ /* If start is lower than this, go left. */ -+ if (start_pfn < tmp_start_pfn) -+ rbnode = rbnode->rb_left; -+ /* If end is higher than this, then go right. */ -+ else if (end_pfn > tmp_end_pfn) -+ rbnode = rbnode->rb_right; -+ else /* Enclosing */ -+ return reg; -+ } ++/** ++ * union kbase_ioctl_mem_alloc - Allocate memory on the GPU ++ * ++ * @va_pages: The number of pages of virtual address space to reserve ++ * @commit_pages: The number of physical pages to allocate ++ * @extent: The number of extra pages to allocate on each GPU fault which grows ++ * the region ++ * @flags: Flags ++ * @gpu_va: The GPU virtual address which is allocated ++ * ++ * @in: Input parameters ++ * @out: Output parameters ++ */ ++union kbase_ioctl_mem_alloc { ++ struct { ++ __u64 va_pages; ++ __u64 commit_pages; ++ __u64 extent; ++ __u64 flags; ++ } in; ++ struct { ++ __u64 flags; ++ __u64 gpu_va; ++ } out; ++}; + -+ return NULL; -+} ++#define KBASE_IOCTL_MEM_ALLOC \ ++ _IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc) + -+/* Find region enclosing given address. */ -+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr) -+{ -+ struct rb_node *rbnode; -+ struct kbase_va_region *reg; -+ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; -+ struct rb_root *rbtree = NULL; ++/** ++ * struct kbase_ioctl_mem_query - Query properties of a GPU memory region ++ * @gpu_addr: A GPU address contained within the region ++ * @query: The type of query ++ * @value: The result of the query ++ * ++ * Use a %KBASE_MEM_QUERY_xxx flag as input for @query. ++ * ++ * @in: Input parameters ++ * @out: Output parameters ++ */ ++union kbase_ioctl_mem_query { ++ struct { ++ __u64 gpu_addr; ++ __u64 query; ++ } in; ++ struct { ++ __u64 value; ++ } out; ++}; + -+ KBASE_DEBUG_ASSERT(NULL != kctx); ++#define KBASE_IOCTL_MEM_QUERY \ ++ _IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query) + -+ lockdep_assert_held(&kctx->reg_lock); ++#define KBASE_MEM_QUERY_COMMIT_SIZE 1 ++#define KBASE_MEM_QUERY_VA_SIZE 2 ++#define KBASE_MEM_QUERY_FLAGS 3 + -+ rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); ++/** ++ * struct kbase_ioctl_mem_free - Free a memory region ++ * @gpu_addr: Handle to the region to free ++ */ ++struct kbase_ioctl_mem_free { ++ __u64 gpu_addr; ++}; + -+ rbnode = rbtree->rb_node; ++#define KBASE_IOCTL_MEM_FREE \ ++ _IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free) + -+ while (rbnode) { -+ u64 tmp_start_pfn, tmp_end_pfn; ++/** ++ * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader ++ * @buffer_count: requested number of dumping buffers ++ * @jm_bm: counters selection bitmask (JM) ++ * @shader_bm: counters selection bitmask (Shader) ++ * @tiler_bm: counters selection bitmask (Tiler) ++ * @mmu_l2_bm: counters selection bitmask (MMU_L2) ++ * ++ * A fd is returned from the ioctl if successful, or a negative value on error ++ */ ++struct kbase_ioctl_hwcnt_reader_setup { ++ __u32 buffer_count; ++ __u32 jm_bm; ++ __u32 shader_bm; ++ __u32 tiler_bm; ++ __u32 mmu_l2_bm; ++}; + -+ reg = rb_entry(rbnode, struct kbase_va_region, rblink); -+ tmp_start_pfn = reg->start_pfn; -+ tmp_end_pfn = reg->start_pfn + reg->nr_pages; ++#define KBASE_IOCTL_HWCNT_READER_SETUP \ ++ _IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup) + -+ /* If start is lower than this, go left. */ -+ if (gpu_pfn < tmp_start_pfn) -+ rbnode = rbnode->rb_left; -+ /* If end is higher than this, then go right. */ -+ else if (gpu_pfn >= tmp_end_pfn) -+ rbnode = rbnode->rb_right; -+ else /* Enclosing */ -+ return reg; -+ } ++/** ++ * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection ++ * @dump_buffer: GPU address to write counters to ++ * @jm_bm: counters selection bitmask (JM) ++ * @shader_bm: counters selection bitmask (Shader) ++ * @tiler_bm: counters selection bitmask (Tiler) ++ * @mmu_l2_bm: counters selection bitmask (MMU_L2) ++ */ ++struct kbase_ioctl_hwcnt_enable { ++ __u64 dump_buffer; ++ __u32 jm_bm; ++ __u32 shader_bm; ++ __u32 tiler_bm; ++ __u32 mmu_l2_bm; ++}; + -+ return NULL; -+} ++#define KBASE_IOCTL_HWCNT_ENABLE \ ++ _IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable) + -+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address); ++#define KBASE_IOCTL_HWCNT_DUMP \ ++ _IO(KBASE_IOCTL_TYPE, 10) + -+/* Find region with given base address */ -+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr) -+{ -+ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; -+ struct rb_node *rbnode = NULL; -+ struct kbase_va_region *reg = NULL; -+ struct rb_root *rbtree = NULL; ++#define KBASE_IOCTL_HWCNT_CLEAR \ ++ _IO(KBASE_IOCTL_TYPE, 11) + -+ KBASE_DEBUG_ASSERT(NULL != kctx); ++/** ++ * struct kbase_ioctl_disjoint_query - Query the disjoint counter ++ * @counter: A counter of disjoint events in the kernel ++ */ ++struct kbase_ioctl_disjoint_query { ++ __u32 counter; ++}; + -+ lockdep_assert_held(&kctx->reg_lock); ++#define KBASE_IOCTL_DISJOINT_QUERY \ ++ _IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query) + -+ rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); ++/** ++ * struct kbase_ioctl_get_ddk_version - Query the kernel version ++ * @version_buffer: Buffer to receive the kernel version string ++ * @size: Size of the buffer ++ * ++ * The ioctl will return the number of bytes written into version_buffer ++ * (which includes a NULL byte) or a negative error code ++ */ ++struct kbase_ioctl_get_ddk_version { ++ union kbase_pointer version_buffer; ++ __u32 size; ++}; + -+ rbnode = rbtree->rb_node; ++#define KBASE_IOCTL_GET_DDK_VERSION \ ++ _IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version) + -+ while (rbnode) { -+ reg = rb_entry(rbnode, struct kbase_va_region, rblink); -+ if (reg->start_pfn > gpu_pfn) -+ rbnode = rbnode->rb_left; -+ else if (reg->start_pfn < gpu_pfn) -+ rbnode = rbnode->rb_right; -+ else -+ return reg; ++/** ++ * struct kbase_ioctl_mem_jit_init - Initialise the JIT memory allocator ++ * ++ * @va_pages: Number of VA pages to reserve for JIT ++ * ++ * Note that depending on the VA size of the application and GPU, the value ++ * specified in @va_pages may be ignored. ++ */ ++struct kbase_ioctl_mem_jit_init { ++ __u64 va_pages; ++}; + -+ } ++#define KBASE_IOCTL_MEM_JIT_INIT \ ++ _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init) + -+ return NULL; -+} ++/** ++ * struct kbase_ioctl_mem_sync - Perform cache maintenance on memory ++ * ++ * @handle: GPU memory handle (GPU VA) ++ * @user_addr: The address where it is mapped in user space ++ * @size: The number of bytes to synchronise ++ * @type: The direction to synchronise: 0 is sync to memory (clean), ++ * 1 is sync from memory (invalidate). Use the BASE_SYNCSET_OP_xxx constants. ++ * @padding: Padding to round up to a multiple of 8 bytes, must be zero ++ */ ++struct kbase_ioctl_mem_sync { ++ __u64 handle; ++ __u64 user_addr; ++ __u64 size; ++ __u8 type; ++ __u8 padding[7]; ++}; + -+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address); ++#define KBASE_IOCTL_MEM_SYNC \ ++ _IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync) + -+/* Find region meeting given requirements */ -+static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(struct kbase_context *kctx, struct kbase_va_region *reg_reqs, size_t nr_pages, size_t align) -+{ -+ struct rb_node *rbnode = NULL; -+ struct kbase_va_region *reg = NULL; -+ struct rb_root *rbtree = NULL; ++/** ++ * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer ++ * ++ * @gpu_addr: The GPU address of the memory region ++ * @cpu_addr: The CPU address to locate ++ * @size: A size in bytes to validate is contained within the region ++ * @offset: The offset from the start of the memory region to @cpu_addr ++ * ++ * @in: Input parameters ++ * @out: Output parameters ++ */ ++union kbase_ioctl_mem_find_cpu_offset { ++ struct { ++ __u64 gpu_addr; ++ __u64 cpu_addr; ++ __u64 size; ++ } in; ++ struct { ++ __u64 offset; ++ } out; ++}; + -+ /* Note that this search is a linear search, as we do not have a target -+ address in mind, so does not benefit from the rbtree search */ ++#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \ ++ _IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset) + -+ rbtree = kbase_reg_flags_to_rbtree(kctx, reg_reqs); ++/** ++ * struct kbase_ioctl_get_context_id - Get the kernel context ID ++ * ++ * @id: The kernel context ID ++ */ ++struct kbase_ioctl_get_context_id { ++ int id; /* This should really be __u32, but see GPUCORE-10048 */ ++}; + -+ rbnode = rb_first(rbtree); ++#define KBASE_IOCTL_GET_CONTEXT_ID \ ++ _IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id) + -+ while (rbnode) { -+ reg = rb_entry(rbnode, struct kbase_va_region, rblink); -+ if ((reg->nr_pages >= nr_pages) && -+ (reg->flags & KBASE_REG_FREE)) { -+ /* Check alignment */ -+ u64 start_pfn = (reg->start_pfn + align - 1) & ~(align - 1); ++/** ++ * struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd ++ * ++ * @flags: Flags ++ * ++ * The ioctl returns a file descriptor when successful ++ */ ++struct kbase_ioctl_tlstream_acquire { ++ __u32 flags; ++}; + -+ if ((start_pfn >= reg->start_pfn) && -+ (start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) && -+ ((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) -+ return reg; -+ } -+ rbnode = rb_next(rbnode); -+ } ++#define KBASE_IOCTL_TLSTREAM_ACQUIRE \ ++ _IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire) + -+ return NULL; -+} ++#define KBASE_IOCTL_TLSTREAM_FLUSH \ ++ _IO(KBASE_IOCTL_TYPE, 19) + +/** -+ * @brief Remove a region object from the global list. ++ * struct kbase_ioctl_mem_commit - Change the amount of memory backing a region + * -+ * The region reg is removed, possibly by merging with other free and -+ * compatible adjacent regions. It must be called with the context -+ * region lock held. The associated memory is not released (see -+ * kbase_free_alloced_region). Internal use only. ++ * @gpu_addr: The memory region to modify ++ * @pages: The number of physical pages that should be present ++ * ++ * The ioctl may return on the following error codes or 0 for success: ++ * -ENOMEM: Out of memory ++ * -EINVAL: Invalid arguments + */ -+static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_region *reg) -+{ -+ struct rb_node *rbprev; -+ struct kbase_va_region *prev = NULL; -+ struct rb_node *rbnext; -+ struct kbase_va_region *next = NULL; -+ struct rb_root *reg_rbtree = NULL; -+ -+ int merged_front = 0; -+ int merged_back = 0; -+ int err = 0; ++struct kbase_ioctl_mem_commit { ++ __u64 gpu_addr; ++ __u64 pages; ++}; + -+ reg_rbtree = kbase_reg_flags_to_rbtree(kctx, reg); ++#define KBASE_IOCTL_MEM_COMMIT \ ++ _IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit) + -+ /* Try to merge with the previous block first */ -+ rbprev = rb_prev(&(reg->rblink)); -+ if (rbprev) { -+ prev = rb_entry(rbprev, struct kbase_va_region, rblink); -+ if (prev->flags & KBASE_REG_FREE) { -+ /* We're compatible with the previous VMA, -+ * merge with it */ -+ WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) != -+ (reg->flags & KBASE_REG_ZONE_MASK)); -+ prev->nr_pages += reg->nr_pages; -+ rb_erase(&(reg->rblink), reg_rbtree); -+ reg = prev; -+ merged_front = 1; -+ } -+ } ++/** ++ * union kbase_ioctl_mem_alias - Create an alias of memory regions ++ * @flags: Flags, see BASE_MEM_xxx ++ * @stride: Bytes between start of each memory region ++ * @nents: The number of regions to pack together into the alias ++ * @aliasing_info: Pointer to an array of struct base_mem_aliasing_info ++ * @gpu_va: Address of the new alias ++ * @va_pages: Size of the new alias ++ * ++ * @in: Input parameters ++ * @out: Output parameters ++ */ ++union kbase_ioctl_mem_alias { ++ struct { ++ __u64 flags; ++ __u64 stride; ++ __u64 nents; ++ union kbase_pointer aliasing_info; ++ } in; ++ struct { ++ __u64 flags; ++ __u64 gpu_va; ++ __u64 va_pages; ++ } out; ++}; + -+ /* Try to merge with the next block second */ -+ /* Note we do the lookup here as the tree may have been rebalanced. */ -+ rbnext = rb_next(&(reg->rblink)); -+ if (rbnext) { -+ /* We're compatible with the next VMA, merge with it */ -+ next = rb_entry(rbnext, struct kbase_va_region, rblink); -+ if (next->flags & KBASE_REG_FREE) { -+ WARN_ON((next->flags & KBASE_REG_ZONE_MASK) != -+ (reg->flags & KBASE_REG_ZONE_MASK)); -+ next->start_pfn = reg->start_pfn; -+ next->nr_pages += reg->nr_pages; -+ rb_erase(&(reg->rblink), reg_rbtree); -+ merged_back = 1; -+ if (merged_front) { -+ /* We already merged with prev, free it */ -+ kbase_free_alloced_region(reg); -+ } -+ } -+ } ++#define KBASE_IOCTL_MEM_ALIAS \ ++ _IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias) + -+ /* If we failed to merge then we need to add a new block */ -+ if (!(merged_front || merged_back)) { -+ /* -+ * We didn't merge anything. Add a new free -+ * placeholder and remove the original one. -+ */ -+ struct kbase_va_region *free_reg; ++/** ++ * union kbase_ioctl_mem_import - Import memory for use by the GPU ++ * @flags: Flags, see BASE_MEM_xxx ++ * @phandle: Handle to the external memory ++ * @type: Type of external memory, see base_mem_import_type ++ * @padding: Amount of extra VA pages to append to the imported buffer ++ * @gpu_va: Address of the new alias ++ * @va_pages: Size of the new alias ++ * ++ * @in: Input parameters ++ * @out: Output parameters ++ */ ++union kbase_ioctl_mem_import { ++ struct { ++ __u64 flags; ++ union kbase_pointer phandle; ++ __u32 type; ++ __u32 padding; ++ } in; ++ struct { ++ __u64 flags; ++ __u64 gpu_va; ++ __u64 va_pages; ++ } out; ++}; + -+ free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK); -+ if (!free_reg) { -+ err = -ENOMEM; -+ goto out; -+ } -+ rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree); -+ } ++#define KBASE_IOCTL_MEM_IMPORT \ ++ _IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import) + -+ out: -+ return err; -+} ++/** ++ * struct kbase_ioctl_mem_flags_change - Change the flags for a memory region ++ * @gpu_va: The GPU region to modify ++ * @flags: The new flags to set ++ * @mask: Mask of the flags to modify ++ */ ++struct kbase_ioctl_mem_flags_change { ++ __u64 gpu_va; ++ __u64 flags; ++ __u64 mask; ++}; + -+KBASE_EXPORT_TEST_API(kbase_remove_va_region); ++#define KBASE_IOCTL_MEM_FLAGS_CHANGE \ ++ _IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change) + +/** -+ * @brief Insert a VA region to the list, replacing the current at_reg. ++ * struct kbase_ioctl_stream_create - Create a synchronisation stream ++ * @name: A name to identify this stream. Must be NULL-terminated. ++ * ++ * Note that this is also called a "timeline", but is named stream to avoid ++ * confusion with other uses of the word. ++ * ++ * Unused bytes in @name (after the first NULL byte) must be also be NULL bytes. ++ * ++ * The ioctl returns a file descriptor. + */ -+static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages) -+{ -+ struct rb_root *reg_rbtree = NULL; -+ int err = 0; -+ -+ reg_rbtree = kbase_reg_flags_to_rbtree(kctx, at_reg); ++struct kbase_ioctl_stream_create { ++ char name[32]; ++}; + -+ /* Must be a free region */ -+ KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0); -+ /* start_pfn should be contained within at_reg */ -+ KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages)); -+ /* at least nr_pages from start_pfn should be contained within at_reg */ -+ KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages); ++#define KBASE_IOCTL_STREAM_CREATE \ ++ _IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create) + -+ new_reg->start_pfn = start_pfn; -+ new_reg->nr_pages = nr_pages; ++/** ++ * struct kbase_ioctl_fence_validate - Validate a fd refers to a fence ++ * @fd: The file descriptor to validate ++ */ ++struct kbase_ioctl_fence_validate { ++ int fd; ++}; + -+ /* Regions are a whole use, so swap and delete old one. */ -+ if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) { -+ rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), -+ reg_rbtree); -+ kbase_free_alloced_region(at_reg); -+ } -+ /* New region replaces the start of the old one, so insert before. */ -+ else if (at_reg->start_pfn == start_pfn) { -+ at_reg->start_pfn += nr_pages; -+ KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages); -+ at_reg->nr_pages -= nr_pages; ++#define KBASE_IOCTL_FENCE_VALIDATE \ ++ _IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate) + -+ kbase_region_tracker_insert(kctx, new_reg); -+ } -+ /* New region replaces the end of the old one, so insert after. */ -+ else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) { -+ at_reg->nr_pages -= nr_pages; ++/** ++ * struct kbase_ioctl_get_profiling_controls - Get the profiling controls ++ * @count: The size of @buffer in u32 words ++ * @buffer: The buffer to receive the profiling controls ++ */ ++struct kbase_ioctl_get_profiling_controls { ++ union kbase_pointer buffer; ++ __u32 count; ++}; + -+ kbase_region_tracker_insert(kctx, new_reg); -+ } -+ /* New region splits the old one, so insert and create new */ -+ else { -+ struct kbase_va_region *new_front_reg; ++#define KBASE_IOCTL_GET_PROFILING_CONTROLS \ ++ _IOW(KBASE_IOCTL_TYPE, 26, struct kbase_ioctl_get_profiling_controls) + -+ new_front_reg = kbase_alloc_free_region(kctx, -+ at_reg->start_pfn, -+ start_pfn - at_reg->start_pfn, -+ at_reg->flags & KBASE_REG_ZONE_MASK); ++/** ++ * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel ++ * @buffer: Pointer to the information ++ * @len: Length ++ * @padding: Padding ++ * ++ * The data provided is accessible through a debugfs file ++ */ ++struct kbase_ioctl_mem_profile_add { ++ union kbase_pointer buffer; ++ __u32 len; ++ __u32 padding; ++}; + -+ if (new_front_reg) { -+ at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages; -+ at_reg->start_pfn = start_pfn + nr_pages; ++#define KBASE_IOCTL_MEM_PROFILE_ADD \ ++ _IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add) + -+ kbase_region_tracker_insert(kctx, new_front_reg); -+ kbase_region_tracker_insert(kctx, new_reg); -+ } else { -+ err = -ENOMEM; -+ } -+ } ++/** ++ * struct kbase_ioctl_soft_event_update - Update the status of a soft-event ++ * @event: GPU address of the event which has been updated ++ * @new_status: The new status to set ++ * @flags: Flags for future expansion ++ */ ++struct kbase_ioctl_soft_event_update { ++ __u64 event; ++ __u32 new_status; ++ __u32 flags; ++}; + -+ return err; -+} ++#define KBASE_IOCTL_SOFT_EVENT_UPDATE \ ++ _IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update) + -+/** -+ * @brief Add a VA region to the list. ++/*************** ++ * test ioctls * ++ ***************/ ++#if MALI_UNIT_TEST ++/* These ioctls are purely for test purposes and are not used in the production ++ * driver, they therefore may change without notice + */ -+int kbase_add_va_region(struct kbase_context *kctx, -+ struct kbase_va_region *reg, u64 addr, -+ size_t nr_pages, size_t align) -+{ -+ struct kbase_va_region *tmp; -+ u64 gpu_pfn = addr >> PAGE_SHIFT; -+ int err = 0; + -+ KBASE_DEBUG_ASSERT(NULL != kctx); -+ KBASE_DEBUG_ASSERT(NULL != reg); ++#define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1) + -+ lockdep_assert_held(&kctx->reg_lock); ++/** ++ * struct kbase_ioctl_tlstream_test - Start a timeline stream test ++ * ++ * @tpw_count: number of trace point writers in each context ++ * @msg_delay: time delay between tracepoints from one writer in milliseconds ++ * @msg_count: number of trace points written by one writer ++ * @aux_msg: if non-zero aux messages will be included ++ */ ++struct kbase_ioctl_tlstream_test { ++ __u32 tpw_count; ++ __u32 msg_delay; ++ __u32 msg_count; ++ __u32 aux_msg; ++}; + -+ if (!align) -+ align = 1; ++#define KBASE_IOCTL_TLSTREAM_TEST \ ++ _IOW(KBASE_IOCTL_TEST_TYPE, 1, struct kbase_ioctl_tlstream_test) + -+ /* must be a power of 2 */ -+ KBASE_DEBUG_ASSERT((align & (align - 1)) == 0); -+ KBASE_DEBUG_ASSERT(nr_pages > 0); ++/** ++ * struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes ++ * @bytes_collected: number of bytes read by user ++ * @bytes_generated: number of bytes generated by tracepoints ++ */ ++struct kbase_ioctl_tlstream_stats { ++ __u32 bytes_collected; ++ __u32 bytes_generated; ++}; + -+ /* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */ -+ if (gpu_pfn) { -+ struct device *dev = kctx->kbdev->dev; ++#define KBASE_IOCTL_TLSTREAM_STATS \ ++ _IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats) + -+ KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1))); ++#endif + -+ tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages); -+ if (!tmp) { -+ dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages); -+ err = -ENOMEM; -+ goto exit; -+ } -+ if (!(tmp->flags & KBASE_REG_FREE)) { -+ dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK); -+ dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages); -+ dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align); -+ err = -ENOMEM; -+ goto exit; -+ } ++/********************************** ++ * Definitions for GPU properties * ++ **********************************/ ++#define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0) ++#define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1) ++#define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2) ++#define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3) + -+ err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages); -+ if (err) { -+ dev_warn(dev, "Failed to insert va region"); -+ err = -ENOMEM; -+ goto exit; -+ } ++#define KBASE_GPUPROP_PRODUCT_ID 1 ++#define KBASE_GPUPROP_VERSION_STATUS 2 ++#define KBASE_GPUPROP_MINOR_REVISION 3 ++#define KBASE_GPUPROP_MAJOR_REVISION 4 ++#define KBASE_GPUPROP_GPU_SPEED_MHZ 5 ++#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX 6 ++#define KBASE_GPUPROP_GPU_FREQ_KHZ_MIN 7 ++#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE 8 ++#define KBASE_GPUPROP_TEXTURE_FEATURES_0 9 ++#define KBASE_GPUPROP_TEXTURE_FEATURES_1 10 ++#define KBASE_GPUPROP_TEXTURE_FEATURES_2 11 ++#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE 12 + -+ goto exit; -+ } ++#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE 13 ++#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE 14 ++#define KBASE_GPUPROP_L2_NUM_L2_SLICES 15 + -+ /* Path 2: Map any free address which meets the requirements. */ -+ { -+ u64 start_pfn; ++#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES 16 ++#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS 17 + -+ /* -+ * Depending on the zone the allocation request is for -+ * we might need to retry it. -+ */ -+ do { -+ tmp = kbase_region_tracker_find_region_meeting_reqs( -+ kctx, reg, nr_pages, align); -+ if (tmp) { -+ start_pfn = (tmp->start_pfn + align - 1) & -+ ~(align - 1); -+ err = kbase_insert_va_region_nolock(kctx, reg, -+ tmp, start_pfn, nr_pages); -+ break; -+ } ++#define KBASE_GPUPROP_MAX_THREADS 18 ++#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE 19 ++#define KBASE_GPUPROP_MAX_BARRIER_SIZE 20 ++#define KBASE_GPUPROP_MAX_REGISTERS 21 ++#define KBASE_GPUPROP_MAX_TASK_QUEUE 22 ++#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT 23 ++#define KBASE_GPUPROP_IMPL_TECH 24 + -+ /* -+ * If the allocation is not from the same zone as JIT -+ * then don't retry, we're out of VA and there is -+ * nothing which can be done about it. -+ */ -+ if ((reg->flags & KBASE_REG_ZONE_MASK) != -+ KBASE_REG_ZONE_CUSTOM_VA) -+ break; -+ } while (kbase_jit_evict(kctx)); ++#define KBASE_GPUPROP_RAW_SHADER_PRESENT 25 ++#define KBASE_GPUPROP_RAW_TILER_PRESENT 26 ++#define KBASE_GPUPROP_RAW_L2_PRESENT 27 ++#define KBASE_GPUPROP_RAW_STACK_PRESENT 28 ++#define KBASE_GPUPROP_RAW_L2_FEATURES 29 ++#define KBASE_GPUPROP_RAW_SUSPEND_SIZE 30 ++#define KBASE_GPUPROP_RAW_MEM_FEATURES 31 ++#define KBASE_GPUPROP_RAW_MMU_FEATURES 32 ++#define KBASE_GPUPROP_RAW_AS_PRESENT 33 ++#define KBASE_GPUPROP_RAW_JS_PRESENT 34 ++#define KBASE_GPUPROP_RAW_JS_FEATURES_0 35 ++#define KBASE_GPUPROP_RAW_JS_FEATURES_1 36 ++#define KBASE_GPUPROP_RAW_JS_FEATURES_2 37 ++#define KBASE_GPUPROP_RAW_JS_FEATURES_3 38 ++#define KBASE_GPUPROP_RAW_JS_FEATURES_4 39 ++#define KBASE_GPUPROP_RAW_JS_FEATURES_5 40 ++#define KBASE_GPUPROP_RAW_JS_FEATURES_6 41 ++#define KBASE_GPUPROP_RAW_JS_FEATURES_7 42 ++#define KBASE_GPUPROP_RAW_JS_FEATURES_8 43 ++#define KBASE_GPUPROP_RAW_JS_FEATURES_9 44 ++#define KBASE_GPUPROP_RAW_JS_FEATURES_10 45 ++#define KBASE_GPUPROP_RAW_JS_FEATURES_11 46 ++#define KBASE_GPUPROP_RAW_JS_FEATURES_12 47 ++#define KBASE_GPUPROP_RAW_JS_FEATURES_13 48 ++#define KBASE_GPUPROP_RAW_JS_FEATURES_14 49 ++#define KBASE_GPUPROP_RAW_JS_FEATURES_15 50 ++#define KBASE_GPUPROP_RAW_TILER_FEATURES 51 ++#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0 52 ++#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1 53 ++#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2 54 ++#define KBASE_GPUPROP_RAW_GPU_ID 55 ++#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS 56 ++#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE 57 ++#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE 58 ++#define KBASE_GPUPROP_RAW_THREAD_FEATURES 59 ++#define KBASE_GPUPROP_RAW_COHERENCY_MODE 60 + -+ if (!tmp) -+ err = -ENOMEM; -+ } ++#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61 ++#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62 ++#define KBASE_GPUPROP_COHERENCY_COHERENCY 63 ++#define KBASE_GPUPROP_COHERENCY_GROUP_0 64 ++#define KBASE_GPUPROP_COHERENCY_GROUP_1 65 ++#define KBASE_GPUPROP_COHERENCY_GROUP_2 66 ++#define KBASE_GPUPROP_COHERENCY_GROUP_3 67 ++#define KBASE_GPUPROP_COHERENCY_GROUP_4 68 ++#define KBASE_GPUPROP_COHERENCY_GROUP_5 69 ++#define KBASE_GPUPROP_COHERENCY_GROUP_6 70 ++#define KBASE_GPUPROP_COHERENCY_GROUP_7 71 ++#define KBASE_GPUPROP_COHERENCY_GROUP_8 72 ++#define KBASE_GPUPROP_COHERENCY_GROUP_9 73 ++#define KBASE_GPUPROP_COHERENCY_GROUP_10 74 ++#define KBASE_GPUPROP_COHERENCY_GROUP_11 75 ++#define KBASE_GPUPROP_COHERENCY_GROUP_12 76 ++#define KBASE_GPUPROP_COHERENCY_GROUP_13 77 ++#define KBASE_GPUPROP_COHERENCY_GROUP_14 78 ++#define KBASE_GPUPROP_COHERENCY_GROUP_15 79 + -+ exit: -+ return err; ++#ifdef __cpluscplus +} ++#endif + -+KBASE_EXPORT_TEST_API(kbase_add_va_region); -+ -+/** -+ * @brief Initialize the internal region tracker data structure. ++#endif +diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c +new file mode 100644 +index 000000000..d9d8658d3 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c +@@ -0,0 +1,1903 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * + */ -+static void kbase_region_tracker_ds_init(struct kbase_context *kctx, -+ struct kbase_va_region *same_va_reg, -+ struct kbase_va_region *exec_reg, -+ struct kbase_va_region *custom_va_reg) -+{ -+ kctx->reg_rbtree_same = RB_ROOT; -+ kbase_region_tracker_insert(kctx, same_va_reg); + -+ /* Although exec and custom_va_reg don't always exist, -+ * initialize unconditionally because of the mem_view debugfs -+ * implementation which relies on these being empty */ -+ kctx->reg_rbtree_exec = RB_ROOT; -+ kctx->reg_rbtree_custom = RB_ROOT; + -+ if (exec_reg) -+ kbase_region_tracker_insert(kctx, exec_reg); -+ if (custom_va_reg) -+ kbase_region_tracker_insert(kctx, custom_va_reg); -+} + -+static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) -+{ -+ struct rb_node *rbnode; -+ struct kbase_va_region *reg; + -+ do { -+ rbnode = rb_first(rbtree); -+ if (rbnode) { -+ rb_erase(rbnode, rbtree); -+ reg = rb_entry(rbnode, struct kbase_va_region, rblink); -+ kbase_free_alloced_region(reg); -+ } -+ } while (rbnode); -+} + -+void kbase_region_tracker_term(struct kbase_context *kctx) -+{ -+ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); -+ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); -+ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); -+} ++#if defined(CONFIG_DMA_SHARED_BUFFER) ++#include ++#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ ++#ifdef CONFIG_COMPAT ++#include ++#endif ++#include ++#include ++#include ++#include ++#include ++#include + -+/** -+ * Initialize the region tracker data structure. -+ */ -+int kbase_region_tracker_init(struct kbase_context *kctx) -+{ -+ struct kbase_va_region *same_va_reg; -+ struct kbase_va_region *exec_reg = NULL; -+ struct kbase_va_region *custom_va_reg = NULL; -+ size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE; -+ u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; -+ u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT; -+ u64 same_va_pages; -+ int err; ++#include ++#include ++#include + -+ /* Take the lock as kbase_free_alloced_region requires it */ -+ kbase_gpu_vm_lock(kctx); ++#include "mali_kbase_dma_fence.h" + -+#if defined(CONFIG_ARM64) -+ same_va_bits = VA_BITS; -+#elif defined(CONFIG_X86_64) -+ same_va_bits = 47; -+#elif defined(CONFIG_64BIT) -+#error Unsupported 64-bit architecture ++#define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) ++/* random32 was renamed to prandom_u32 in 3.8 */ ++#define prandom_u32 random32 +#endif + -+#ifdef CONFIG_64BIT ++/* Return whether katom will run on the GPU or not. Currently only soft jobs and ++ * dependency-only atoms do not run on the GPU */ ++#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) || \ ++ ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == \ ++ BASE_JD_REQ_DEP))) ++/* ++ * This is the kernel side of the API. Only entry points are: ++ * - kbase_jd_submit(): Called from userspace to submit a single bag ++ * - kbase_jd_done(): Called from interrupt context to track the ++ * completion of a job. ++ * Callouts: ++ * - to the job manager (enqueue a job) ++ * - to the event subsystem (signals the completion/failure of bag/job-chains). ++ */ ++ ++static void __user * ++get_compat_pointer(struct kbase_context *kctx, const union kbase_pointer *p) ++{ ++#ifdef CONFIG_COMPAT + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) -+ same_va_bits = 32; -+ else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) -+ same_va_bits = 33; ++ return compat_ptr(p->compat_value); +#endif ++ return p->value; ++} + -+ if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) { -+ err = -EINVAL; -+ goto fail_unlock; -+ } ++/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs ++ * ++ * Returns whether the JS needs a reschedule. ++ * ++ * Note that the caller must also check the atom status and ++ * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock ++ */ ++static int jd_run_atom(struct kbase_jd_atom *katom) ++{ ++ struct kbase_context *kctx = katom->kctx; + -+ same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; -+ /* all have SAME_VA */ -+ same_va_reg = kbase_alloc_free_region(kctx, 1, -+ same_va_pages, -+ KBASE_REG_ZONE_SAME_VA); ++ KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); + -+ if (!same_va_reg) { -+ err = -ENOMEM; -+ goto fail_unlock; ++ if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) { ++ /* Dependency only atom */ ++ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; ++ return 0; ++ } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { ++ /* Soft-job */ ++ if (katom->will_fail_event_code) { ++ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; ++ return 0; ++ } ++ if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ++ == BASE_JD_REQ_SOFT_REPLAY) { ++ if (!kbase_replay_process(katom)) ++ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; ++ } else if (kbase_process_soft_job(katom) == 0) { ++ kbase_finish_soft_job(katom); ++ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; ++ } ++ return 0; + } + -+#ifdef CONFIG_64BIT -+ /* 32-bit clients have exec and custom VA zones */ -+ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { -+#endif -+ if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { -+ err = -EINVAL; -+ goto fail_free_same_va; -+ } -+ /* If the current size of TMEM is out of range of the -+ * virtual address space addressable by the MMU then -+ * we should shrink it to fit -+ */ -+ if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit) -+ custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; ++ katom->status = KBASE_JD_ATOM_STATE_IN_JS; ++ /* Queue an action about whether we should try scheduling a context */ ++ return kbasep_js_add_job(kctx, katom); ++} + -+ exec_reg = kbase_alloc_free_region(kctx, -+ KBASE_REG_ZONE_EXEC_BASE, -+ KBASE_REG_ZONE_EXEC_SIZE, -+ KBASE_REG_ZONE_EXEC); ++#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE) ++void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom) ++{ ++ struct kbase_device *kbdev; + -+ if (!exec_reg) { -+ err = -ENOMEM; -+ goto fail_free_same_va; -+ } ++ KBASE_DEBUG_ASSERT(katom); ++ kbdev = katom->kctx->kbdev; ++ KBASE_DEBUG_ASSERT(kbdev); + -+ custom_va_reg = kbase_alloc_free_region(kctx, -+ KBASE_REG_ZONE_CUSTOM_VA_BASE, -+ custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); ++ /* Check whether the atom's other dependencies were already met. If ++ * katom is a GPU atom then the job scheduler may be able to represent ++ * the dependencies, hence we may attempt to submit it before they are ++ * met. Other atoms must have had both dependencies resolved. ++ */ ++ if (IS_GPU_ATOM(katom) || ++ (!kbase_jd_katom_dep_atom(&katom->dep[0]) && ++ !kbase_jd_katom_dep_atom(&katom->dep[1]))) { ++ /* katom dep complete, attempt to run it */ ++ bool resched = false; + -+ if (!custom_va_reg) { -+ err = -ENOMEM; -+ goto fail_free_exec; ++ resched = jd_run_atom(katom); ++ ++ if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) { ++ /* The atom has already finished */ ++ resched |= jd_done_nolock(katom, NULL); + } -+#ifdef CONFIG_64BIT ++ ++ if (resched) ++ kbase_js_sched_all(kbdev); + } ++} +#endif + -+ kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg); -+ -+ kctx->same_va_end = same_va_pages + 1; -+ -+ kbase_gpu_vm_unlock(kctx); -+ return 0; ++#ifdef CONFIG_KDS + -+fail_free_exec: -+ kbase_free_alloced_region(exec_reg); -+fail_free_same_va: -+ kbase_free_alloced_region(same_va_reg); -+fail_unlock: -+ kbase_gpu_vm_unlock(kctx); -+ return err; -+} ++/* Add the katom to the kds waiting list. ++ * Atoms must be added to the waiting list after a successful call to kds_async_waitall. ++ * The caller must hold the kbase_jd_context.lock */ + -+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages) ++static void kbase_jd_kds_waiters_add(struct kbase_jd_atom *katom) +{ -+#ifdef CONFIG_64BIT -+ struct kbase_va_region *same_va; -+ struct kbase_va_region *custom_va_reg; -+ u64 same_va_bits; -+ u64 total_va_size; -+ int err; -+ -+ /* -+ * Nothing to do for 32-bit clients, JIT uses the existing -+ * custom VA zone. -+ */ -+ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) -+ return 0; ++ struct kbase_context *kctx; + -+#if defined(CONFIG_ARM64) -+ same_va_bits = VA_BITS; -+#elif defined(CONFIG_X86_64) -+ same_va_bits = 47; -+#elif defined(CONFIG_64BIT) -+#error Unsupported 64-bit architecture -+#endif ++ KBASE_DEBUG_ASSERT(katom); + -+ if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) -+ same_va_bits = 33; ++ kctx = katom->kctx; + -+ total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; ++ list_add_tail(&katom->node, &kctx->waiting_kds_resource); ++} + -+ kbase_gpu_vm_lock(kctx); ++/* Remove the katom from the kds waiting list. ++ * Atoms must be removed from the waiting list before a call to kds_resource_set_release_sync. ++ * The supplied katom must first have been added to the list with a call to kbase_jd_kds_waiters_add. ++ * The caller must hold the kbase_jd_context.lock */ + -+ /* -+ * Modify the same VA free region after creation. Be careful to ensure -+ * that allocations haven't been made as they could cause an overlap -+ * to happen with existing same VA allocations and the custom VA zone. -+ */ -+ same_va = kbase_region_tracker_find_region_base_address(kctx, -+ PAGE_SIZE); -+ if (!same_va) { -+ err = -ENOMEM; -+ goto fail_unlock; -+ } ++static void kbase_jd_kds_waiters_remove(struct kbase_jd_atom *katom) ++{ ++ KBASE_DEBUG_ASSERT(katom); ++ list_del(&katom->node); ++} + -+ /* The region flag or region size has changed since creation so bail. */ -+ if ((!(same_va->flags & KBASE_REG_FREE)) || -+ (same_va->nr_pages != total_va_size)) { -+ err = -ENOMEM; -+ goto fail_unlock; -+ } ++static void kds_dep_clear(void *callback_parameter, void *callback_extra_parameter) ++{ ++ struct kbase_jd_atom *katom; ++ struct kbase_jd_context *ctx; + -+ if (same_va->nr_pages < jit_va_pages || -+ kctx->same_va_end < jit_va_pages) { -+ err = -ENOMEM; -+ goto fail_unlock; -+ } ++ katom = (struct kbase_jd_atom *)callback_parameter; ++ KBASE_DEBUG_ASSERT(katom); + -+ /* It's safe to adjust the same VA zone now */ -+ same_va->nr_pages -= jit_va_pages; -+ kctx->same_va_end -= jit_va_pages; ++ ctx = &katom->kctx->jctx; + -+ /* -+ * Create a custom VA zone at the end of the VA for allocations which -+ * JIT can use so it doesn't have to allocate VA from the kernel. ++ /* If KDS resource has already been satisfied (e.g. due to zapping) ++ * do nothing. + */ -+ custom_va_reg = kbase_alloc_free_region(kctx, -+ kctx->same_va_end, -+ jit_va_pages, -+ KBASE_REG_ZONE_CUSTOM_VA); -+ -+ if (!custom_va_reg) { -+ /* -+ * The context will be destroyed if we fail here so no point -+ * reverting the change we made to same_va. -+ */ -+ err = -ENOMEM; -+ goto fail_unlock; ++ mutex_lock(&ctx->lock); ++ if (!katom->kds_dep_satisfied) { ++ katom->kds_dep_satisfied = true; ++ kbase_jd_dep_clear_locked(katom); + } ++ mutex_unlock(&ctx->lock); ++} + -+ kbase_region_tracker_insert(kctx, custom_va_reg); ++static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom) ++{ ++ KBASE_DEBUG_ASSERT(katom); + -+ kbase_gpu_vm_unlock(kctx); -+ return 0; ++ /* Prevent job_done_nolock from being called twice on an atom when ++ * there is a race between job completion and cancellation */ + -+fail_unlock: -+ kbase_gpu_vm_unlock(kctx); -+ return err; -+#else -+ return 0; -+#endif ++ if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) { ++ /* Wait was cancelled - zap the atom */ ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ if (jd_done_nolock(katom, NULL)) ++ kbase_js_sched_all(katom->kctx->kbdev); ++ } +} ++#endif /* CONFIG_KDS */ + -+int kbase_mem_init(struct kbase_device *kbdev) ++void kbase_jd_free_external_resources(struct kbase_jd_atom *katom) +{ -+ struct kbasep_mem_device *memdev; ++#ifdef CONFIG_KDS ++ if (katom->kds_rset) { ++ struct kbase_jd_context *jctx = &katom->kctx->jctx; + -+ KBASE_DEBUG_ASSERT(kbdev); ++ /* ++ * As the atom is no longer waiting, remove it from ++ * the waiting list. ++ */ + -+ memdev = &kbdev->memdev; -+ kbdev->mem_pool_max_size_default = KBASE_MEM_POOL_MAX_SIZE_KCTX; ++ mutex_lock(&jctx->lock); ++ kbase_jd_kds_waiters_remove(katom); ++ mutex_unlock(&jctx->lock); + -+ /* Initialize memory usage */ -+ atomic_set(&memdev->used_pages, 0); ++ /* Release the kds resource or cancel if zapping */ ++ kds_resource_set_release_sync(&katom->kds_rset); ++ } ++#endif /* CONFIG_KDS */ + -+ return kbase_mem_pool_init(&kbdev->mem_pool, -+ KBASE_MEM_POOL_MAX_SIZE_KBDEV, kbdev, NULL); ++#ifdef CONFIG_MALI_DMA_FENCE ++ /* Flush dma-fence workqueue to ensure that any callbacks that may have ++ * been queued are done before continuing. ++ * Any successfully completed atom would have had all it's callbacks ++ * completed before the atom was run, so only flush for failed atoms. ++ */ ++ if (katom->event_code != BASE_JD_EVENT_DONE) ++ flush_workqueue(katom->kctx->dma_fence.wq); ++#endif /* CONFIG_MALI_DMA_FENCE */ +} + -+void kbase_mem_halt(struct kbase_device *kbdev) ++static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) +{ -+ CSTD_UNUSED(kbdev); -+} ++ KBASE_DEBUG_ASSERT(katom); ++ KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES); + -+void kbase_mem_term(struct kbase_device *kbdev) -+{ -+ struct kbasep_mem_device *memdev; -+ int pages; ++#ifdef CONFIG_KDS ++ /* Prevent the KDS resource from triggering the atom in case of zapping */ ++ if (katom->kds_rset) ++ katom->kds_dep_satisfied = true; ++#endif /* CONFIG_KDS */ + -+ KBASE_DEBUG_ASSERT(kbdev); ++#ifdef CONFIG_MALI_DMA_FENCE ++ kbase_dma_fence_signal(katom); ++#endif /* CONFIG_MALI_DMA_FENCE */ + -+ memdev = &kbdev->memdev; ++ kbase_gpu_vm_lock(katom->kctx); ++ /* only roll back if extres is non-NULL */ ++ if (katom->extres) { ++ u32 res_no; + -+ pages = atomic_read(&memdev->used_pages); -+ if (pages != 0) -+ dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); ++ res_no = katom->nr_extres; ++ while (res_no-- > 0) { ++ struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; ++ struct kbase_va_region *reg; + -+ kbase_mem_pool_term(&kbdev->mem_pool); ++ reg = kbase_region_tracker_find_region_base_address( ++ katom->kctx, ++ katom->extres[res_no].gpu_address); ++ kbase_unmap_external_resource(katom->kctx, reg, alloc); ++ } ++ kfree(katom->extres); ++ katom->extres = NULL; ++ } ++ kbase_gpu_vm_unlock(katom->kctx); +} + -+KBASE_EXPORT_TEST_API(kbase_mem_term); -+ -+ -+ -+ -+/** -+ * @brief Allocate a free region object. -+ * -+ * The allocated object is not part of any list yet, and is flagged as -+ * KBASE_REG_FREE. No mapping is allocated yet. -+ * -+ * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC ++/* ++ * Set up external resources needed by this job. + * ++ * jctx.lock must be held when this is called. + */ -+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone) ++ ++static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom) +{ -+ struct kbase_va_region *new_reg; ++ int err_ret_val = -EINVAL; ++ u32 res_no; ++#ifdef CONFIG_KDS ++ u32 kds_res_count = 0; ++ struct kds_resource **kds_resources = NULL; ++ unsigned long *kds_access_bitmap = NULL; ++#endif /* CONFIG_KDS */ ++#ifdef CONFIG_MALI_DMA_FENCE ++ struct kbase_dma_fence_resv_info info = { ++ .dma_fence_resv_count = 0, ++ }; ++#ifdef CONFIG_SYNC ++ /* ++ * When both dma-buf fence and Android native sync is enabled, we ++ * disable dma-buf fence for contexts that are using Android native ++ * fences. ++ */ ++ const bool implicit_sync = !kbase_ctx_flag(katom->kctx, ++ KCTX_NO_IMPLICIT_SYNC); ++#else /* CONFIG_SYNC */ ++ const bool implicit_sync = true; ++#endif /* CONFIG_SYNC */ ++#endif /* CONFIG_MALI_DMA_FENCE */ ++ struct base_external_resource *input_extres; + -+ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(katom); ++ KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES); + -+ /* zone argument should only contain zone related region flags */ -+ KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0); -+ KBASE_DEBUG_ASSERT(nr_pages > 0); -+ /* 64-bit address range is the max */ -+ KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE)); ++ /* no resources encoded, early out */ ++ if (!katom->nr_extres) ++ return -EINVAL; + -+ new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL); ++ katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL); ++ if (NULL == katom->extres) { ++ err_ret_val = -ENOMEM; ++ goto early_err_out; ++ } + -+ if (!new_reg) -+ return NULL; ++ /* copy user buffer to the end of our real buffer. ++ * Make sure the struct sizes haven't changed in a way ++ * we don't support */ ++ BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres)); ++ input_extres = (struct base_external_resource *) ++ (((unsigned char *)katom->extres) + ++ (sizeof(*katom->extres) - sizeof(*input_extres)) * ++ katom->nr_extres); + -+ new_reg->cpu_alloc = NULL; /* no alloc bound yet */ -+ new_reg->gpu_alloc = NULL; /* no alloc bound yet */ -+ new_reg->kctx = kctx; -+ new_reg->flags = zone | KBASE_REG_FREE; ++ if (copy_from_user(input_extres, ++ get_compat_pointer(katom->kctx, &user_atom->extres_list), ++ sizeof(*input_extres) * katom->nr_extres) != 0) { ++ err_ret_val = -EINVAL; ++ goto early_err_out; ++ } ++#ifdef CONFIG_KDS ++ /* assume we have to wait for all */ ++ KBASE_DEBUG_ASSERT(0 != katom->nr_extres); ++ kds_resources = kmalloc_array(katom->nr_extres, sizeof(struct kds_resource *), GFP_KERNEL); + -+ new_reg->flags |= KBASE_REG_GROWABLE; ++ if (!kds_resources) { ++ err_ret_val = -ENOMEM; ++ goto early_err_out; ++ } + -+ new_reg->start_pfn = start_pfn; -+ new_reg->nr_pages = nr_pages; ++ KBASE_DEBUG_ASSERT(0 != katom->nr_extres); ++ kds_access_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres), ++ sizeof(unsigned long), ++ GFP_KERNEL); ++ if (!kds_access_bitmap) { ++ err_ret_val = -ENOMEM; ++ goto early_err_out; ++ } ++#endif /* CONFIG_KDS */ + -+ return new_reg; -+} ++#ifdef CONFIG_MALI_DMA_FENCE ++ if (implicit_sync) { ++ info.resv_objs = kmalloc_array(katom->nr_extres, ++ sizeof(struct reservation_object *), ++ GFP_KERNEL); ++ if (!info.resv_objs) { ++ err_ret_val = -ENOMEM; ++ goto early_err_out; ++ } + -+KBASE_EXPORT_TEST_API(kbase_alloc_free_region); ++ info.dma_fence_excl_bitmap = ++ kcalloc(BITS_TO_LONGS(katom->nr_extres), ++ sizeof(unsigned long), GFP_KERNEL); ++ if (!info.dma_fence_excl_bitmap) { ++ err_ret_val = -ENOMEM; ++ goto early_err_out; ++ } ++ } ++#endif /* CONFIG_MALI_DMA_FENCE */ + -+/** -+ * @brief Free a region object. -+ * -+ * The described region must be freed of any mapping. -+ * -+ * If the region is not flagged as KBASE_REG_FREE, the region's -+ * alloc object will be released. -+ * It is a bug if no alloc object exists for non-free regions. -+ * -+ */ -+void kbase_free_alloced_region(struct kbase_va_region *reg) -+{ -+ if (!(reg->flags & KBASE_REG_FREE)) { -+ /* -+ * The physical allocation should have been removed from the -+ * eviction list before this function is called. However, in the -+ * case of abnormal process termination or the app leaking the -+ * memory kbase_mem_free_region is not called so it can still be -+ * on the list at termination time of the region tracker. -+ */ -+ if (!list_empty(®->gpu_alloc->evict_node)) { -+ /* -+ * Unlink the physical allocation before unmaking it -+ * evictable so that the allocation isn't grown back to -+ * its last backed size as we're going to unmap it -+ * anyway. -+ */ -+ reg->cpu_alloc->reg = NULL; -+ if (reg->cpu_alloc != reg->gpu_alloc) -+ reg->gpu_alloc->reg = NULL; ++ /* Take the processes mmap lock */ ++ down_read(¤t->mm->mmap_lock); + -+ /* -+ * If a region has been made evictable then we must -+ * unmake it before trying to free it. -+ * If the memory hasn't been reclaimed it will be -+ * unmapped and freed below, if it has been reclaimed -+ * then the operations below are no-ops. -+ */ -+ if (reg->flags & KBASE_REG_DONT_NEED) { -+ KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == -+ KBASE_MEM_TYPE_NATIVE); -+ kbase_mem_evictable_unmake(reg->gpu_alloc); -+ } -+ } ++ /* need to keep the GPU VM locked while we set up UMM buffers */ ++ kbase_gpu_vm_lock(katom->kctx); ++ for (res_no = 0; res_no < katom->nr_extres; res_no++) { ++ struct base_external_resource *res; ++ struct kbase_va_region *reg; ++ struct kbase_mem_phy_alloc *alloc; ++ bool exclusive; + -+ /* -+ * Remove the region from the sticky resource metadata -+ * list should it be there. -+ */ -+ kbase_sticky_resource_release(reg->kctx, NULL, -+ reg->start_pfn << PAGE_SHIFT); ++ res = &input_extres[res_no]; ++ exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE) ++ ? true : false; ++ reg = kbase_region_tracker_find_region_enclosing_address( ++ katom->kctx, ++ res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); ++ /* did we find a matching region object? */ ++ if (NULL == reg || (reg->flags & KBASE_REG_FREE)) { ++ /* roll back */ ++ goto failed_loop; ++ } + -+ kbase_mem_phy_alloc_put(reg->cpu_alloc); -+ kbase_mem_phy_alloc_put(reg->gpu_alloc); -+ /* To detect use-after-free in debug builds */ -+ KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE); -+ } -+ kfree(reg); -+} ++ if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) && ++ (reg->flags & KBASE_REG_SECURE)) { ++ katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED; ++ } + -+KBASE_EXPORT_TEST_API(kbase_free_alloced_region); ++ alloc = kbase_map_external_resource(katom->kctx, reg, ++ current->mm ++#ifdef CONFIG_KDS ++ , &kds_res_count, kds_resources, ++ kds_access_bitmap, exclusive ++#endif ++ ); ++ if (!alloc) { ++ err_ret_val = -EINVAL; ++ goto failed_loop; ++ } + -+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align) -+{ -+ int err; -+ size_t i = 0; -+ unsigned long attr; -+ unsigned long mask = ~KBASE_REG_MEMATTR_MASK; ++#ifdef CONFIG_MALI_DMA_FENCE ++ if (implicit_sync && ++ reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { ++ struct reservation_object *resv; + -+ if ((kctx->kbdev->system_coherency == COHERENCY_ACE) && -+ (reg->flags & KBASE_REG_SHARE_BOTH)) -+ attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA); -+ else -+ attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC); ++ resv = reg->gpu_alloc->imported.umm.dma_buf->resv; ++ if (resv) ++ kbase_dma_fence_add_reservation(resv, &info, ++ exclusive); ++ } ++#endif /* CONFIG_MALI_DMA_FENCE */ + -+ KBASE_DEBUG_ASSERT(NULL != kctx); -+ KBASE_DEBUG_ASSERT(NULL != reg); ++ /* finish with updating out array with the data we found */ ++ /* NOTE: It is important that this is the last thing we do (or ++ * at least not before the first write) as we overwrite elements ++ * as we loop and could be overwriting ourself, so no writes ++ * until the last read for an element. ++ * */ ++ katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */ ++ katom->extres[res_no].alloc = alloc; ++ } ++ /* successfully parsed the extres array */ ++ /* drop the vm lock before we call into kds */ ++ kbase_gpu_vm_unlock(katom->kctx); + -+ err = kbase_add_va_region(kctx, reg, addr, nr_pages, align); -+ if (err) -+ return err; ++ /* Release the processes mmap lock */ ++ up_read(¤t->mm->mmap_lock); + -+ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { -+ u64 stride; -+ struct kbase_mem_phy_alloc *alloc; ++#ifdef CONFIG_KDS ++ if (kds_res_count) { ++ int wait_failed; + -+ alloc = reg->gpu_alloc; -+ stride = alloc->imported.alias.stride; -+ KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased); -+ for (i = 0; i < alloc->imported.alias.nents; i++) { -+ if (alloc->imported.alias.aliased[i].alloc) { -+ err = kbase_mmu_insert_pages(kctx, -+ reg->start_pfn + (i * stride), -+ alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset, -+ alloc->imported.alias.aliased[i].length, -+ reg->flags); -+ if (err) -+ goto bad_insert; ++ /* We have resources to wait for with kds */ ++ katom->kds_dep_satisfied = false; + -+ kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc); -+ } else { -+ err = kbase_mmu_insert_single_page(kctx, -+ reg->start_pfn + i * stride, -+ page_to_phys(kctx->aliasing_sink_page), -+ alloc->imported.alias.aliased[i].length, -+ (reg->flags & mask) | attr); ++ wait_failed = kds_async_waitall(&katom->kds_rset, ++ &katom->kctx->jctx.kds_cb, katom, NULL, ++ kds_res_count, kds_access_bitmap, ++ kds_resources); + -+ if (err) -+ goto bad_insert; -+ } -+ } ++ if (wait_failed) ++ goto failed_kds_setup; ++ else ++ kbase_jd_kds_waiters_add(katom); + } else { -+ err = kbase_mmu_insert_pages(kctx, reg->start_pfn, -+ kbase_get_gpu_phy_pages(reg), -+ kbase_reg_current_backed_size(reg), -+ reg->flags); -+ if (err) -+ goto bad_insert; -+ kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc); ++ /* Nothing to wait for, so kds dep met */ ++ katom->kds_dep_satisfied = true; + } ++ kfree(kds_resources); ++ kfree(kds_access_bitmap); ++#endif /* CONFIG_KDS */ + -+ return err; ++#ifdef CONFIG_MALI_DMA_FENCE ++ if (implicit_sync) { ++ if (info.dma_fence_resv_count) { ++ int ret; + -+bad_insert: -+ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { -+ u64 stride; ++ ret = kbase_dma_fence_wait(katom, &info); ++ if (ret < 0) ++ goto failed_dma_fence_setup; ++ } + -+ stride = reg->gpu_alloc->imported.alias.stride; -+ KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased); -+ while (i--) -+ if (reg->gpu_alloc->imported.alias.aliased[i].alloc) { -+ kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->gpu_alloc->imported.alias.aliased[i].length); -+ kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc); -+ } ++ kfree(info.resv_objs); ++ kfree(info.dma_fence_excl_bitmap); + } ++#endif /* CONFIG_MALI_DMA_FENCE */ + -+ kbase_remove_va_region(kctx, reg); -+ -+ return err; -+} -+ -+KBASE_EXPORT_TEST_API(kbase_gpu_mmap); -+ -+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, -+ struct kbase_mem_phy_alloc *alloc, bool writeable); -+ -+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) -+{ -+ int err; ++ /* all done OK */ ++ return 0; + -+ if (reg->start_pfn == 0) -+ return 0; ++/* error handling section */ + -+ if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { -+ size_t i; ++#ifdef CONFIG_MALI_DMA_FENCE ++failed_dma_fence_setup: ++#ifdef CONFIG_KDS ++ /* If we are here, dma_fence setup failed but KDS didn't. ++ * Revert KDS setup if any. ++ */ ++ if (kds_res_count) { ++ mutex_unlock(&katom->kctx->jctx.lock); ++ kds_resource_set_release_sync(&katom->kds_rset); ++ mutex_lock(&katom->kctx->jctx.lock); + -+ err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages); -+ KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased); -+ for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++) -+ if (reg->gpu_alloc->imported.alias.aliased[i].alloc) -+ kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc); -+ } else { -+ err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg)); -+ kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc); ++ kbase_jd_kds_waiters_remove(katom); ++ katom->kds_dep_satisfied = true; + } ++#endif /* CONFIG_KDS */ ++#endif /* CONFIG_MALI_DMA_FENCE */ ++#ifdef CONFIG_KDS ++failed_kds_setup: ++#endif ++#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE) ++ /* Lock the processes mmap lock */ ++ down_read(¤t->mm->mmap_lock); + -+ if (reg->gpu_alloc && reg->gpu_alloc->type == -+ KBASE_MEM_TYPE_IMPORTED_USER_BUF) { -+ struct kbase_alloc_import_user_buf *user_buf = -+ ®->gpu_alloc->imported.user_buf; ++ /* lock before we unmap */ ++ kbase_gpu_vm_lock(katom->kctx); ++#endif + -+ if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) { -+ user_buf->current_mapping_usage_count &= -+ ~PINNED_ON_IMPORT; ++ failed_loop: ++ /* undo the loop work */ ++ while (res_no-- > 0) { ++ struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; + -+ kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc, -+ (reg->flags & KBASE_REG_GPU_WR)); -+ } ++ kbase_unmap_external_resource(katom->kctx, NULL, alloc); + } ++ kbase_gpu_vm_unlock(katom->kctx); + -+ if (err) -+ return err; ++ /* Release the processes mmap lock */ ++ up_read(¤t->mm->mmap_lock); + -+ err = kbase_remove_va_region(kctx, reg); -+ return err; ++ early_err_out: ++ kfree(katom->extres); ++ katom->extres = NULL; ++#ifdef CONFIG_KDS ++ kfree(kds_resources); ++ kfree(kds_access_bitmap); ++#endif /* CONFIG_KDS */ ++#ifdef CONFIG_MALI_DMA_FENCE ++ if (implicit_sync) { ++ kfree(info.resv_objs); ++ kfree(info.dma_fence_excl_bitmap); ++ } ++#endif ++ return err_ret_val; +} + -+static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping( -+ struct kbase_context *kctx, -+ unsigned long uaddr, size_t size, u64 *offset) ++static inline void jd_resolve_dep(struct list_head *out_list, ++ struct kbase_jd_atom *katom, ++ u8 d, bool ctx_is_dying) +{ -+ struct vm_area_struct *vma; -+ struct kbase_cpu_mapping *map; -+ unsigned long vm_pgoff_in_region; -+ unsigned long vm_off_in_region; -+ unsigned long map_start; -+ size_t map_size; ++ u8 other_d = !d; + -+ lockdep_assert_held(¤t->mm->mmap_lock); ++ while (!list_empty(&katom->dep_head[d])) { ++ struct kbase_jd_atom *dep_atom; ++ struct kbase_jd_atom *other_dep_atom; ++ u8 dep_type; + -+ if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */ -+ return NULL; ++ dep_atom = list_entry(katom->dep_head[d].next, ++ struct kbase_jd_atom, dep_item[d]); ++ list_del(katom->dep_head[d].next); + -+ vma = find_vma_intersection(current->mm, uaddr, uaddr+size); ++ dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]); ++ kbase_jd_katom_dep_clear(&dep_atom->dep[d]); + -+ if (!vma || vma->vm_start > uaddr) -+ return NULL; -+ if (vma->vm_ops != &kbase_vm_ops) -+ /* Not ours! */ -+ return NULL; ++ if (katom->event_code != BASE_JD_EVENT_DONE && ++ (dep_type != BASE_JD_DEP_TYPE_ORDER)) { ++#ifdef CONFIG_KDS ++ if (!dep_atom->kds_dep_satisfied) { ++ /* Just set kds_dep_satisfied to true. If the callback happens after this then it will early out and ++ * do nothing. If the callback doesn't happen then kbase_jd_post_external_resources will clean up ++ */ ++ dep_atom->kds_dep_satisfied = true; ++ } ++#endif + -+ map = vma->vm_private_data; ++#ifdef CONFIG_MALI_DMA_FENCE ++ kbase_dma_fence_cancel_callbacks(dep_atom); ++#endif + -+ if (map->kctx != kctx) -+ /* Not from this context! */ -+ return NULL; ++ dep_atom->event_code = katom->event_code; ++ KBASE_DEBUG_ASSERT(dep_atom->status != ++ KBASE_JD_ATOM_STATE_UNUSED); + -+ vm_pgoff_in_region = vma->vm_pgoff - map->region->start_pfn; -+ vm_off_in_region = vm_pgoff_in_region << PAGE_SHIFT; -+ map_start = vma->vm_start - vm_off_in_region; -+ map_size = map->region->nr_pages << PAGE_SHIFT; ++ if ((dep_atom->core_req & BASE_JD_REQ_SOFT_REPLAY) ++ != BASE_JD_REQ_SOFT_REPLAY) { ++ dep_atom->will_fail_event_code = ++ dep_atom->event_code; ++ } else { ++ dep_atom->status = ++ KBASE_JD_ATOM_STATE_COMPLETED; ++ } ++ } ++ other_dep_atom = (struct kbase_jd_atom *) ++ kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]); + -+ if ((uaddr + size) > (map_start + map_size)) -+ /* Not within the CPU mapping */ -+ return NULL; ++ if (!dep_atom->in_jd_list && (!other_dep_atom || ++ (IS_GPU_ATOM(dep_atom) && !ctx_is_dying && ++ !dep_atom->will_fail_event_code && ++ !other_dep_atom->will_fail_event_code))) { ++ bool dep_satisfied = true; ++#ifdef CONFIG_MALI_DMA_FENCE ++ int dep_count; + -+ *offset = (uaddr - vma->vm_start) + vm_off_in_region; ++ dep_count = kbase_fence_dep_count_read(dep_atom); ++ if (likely(dep_count == -1)) { ++ dep_satisfied = true; ++ } else { ++ /* ++ * There are either still active callbacks, or ++ * all fences for this @dep_atom has signaled, ++ * but the worker that will queue the atom has ++ * not yet run. ++ * ++ * Wait for the fences to signal and the fence ++ * worker to run and handle @dep_atom. If ++ * @dep_atom was completed due to error on ++ * @katom, then the fence worker will pick up ++ * the complete status and error code set on ++ * @dep_atom above. ++ */ ++ dep_satisfied = false; ++ } ++#endif /* CONFIG_MALI_DMA_FENCE */ + -+ return map; ++#ifdef CONFIG_KDS ++ dep_satisfied = dep_satisfied && dep_atom->kds_dep_satisfied; ++#endif ++ ++ if (dep_satisfied) { ++ dep_atom->in_jd_list = true; ++ list_add_tail(&dep_atom->jd_item, out_list); ++ } ++ } ++ } +} + -+int kbasep_find_enclosing_cpu_mapping_offset( -+ struct kbase_context *kctx, -+ unsigned long uaddr, size_t size, u64 *offset) ++KBASE_EXPORT_TEST_API(jd_resolve_dep); ++ ++#if MALI_CUSTOMER_RELEASE == 0 ++static void jd_force_failure(struct kbase_device *kbdev, struct kbase_jd_atom *katom) +{ -+ struct kbase_cpu_mapping *map; ++ kbdev->force_replay_count++; + -+ kbase_os_mem_map_lock(kctx); ++ if (kbdev->force_replay_count >= kbdev->force_replay_limit) { ++ kbdev->force_replay_count = 0; ++ katom->event_code = BASE_JD_EVENT_FORCE_REPLAY; + -+ map = kbasep_find_enclosing_cpu_mapping(kctx, uaddr, size, offset); ++ if (kbdev->force_replay_random) ++ kbdev->force_replay_limit = ++ (prandom_u32() % KBASEP_FORCE_REPLAY_RANDOM_LIMIT) + 1; + -+ kbase_os_mem_map_unlock(kctx); ++ dev_info(kbdev->dev, "force_replay : promoting to error\n"); ++ } ++} + -+ if (!map) -+ return -EINVAL; ++/** Test to see if atom should be forced to fail. ++ * ++ * This function will check if an atom has a replay job as a dependent. If so ++ * then it will be considered for forced failure. */ ++static void jd_check_force_failure(struct kbase_jd_atom *katom) ++{ ++ struct kbase_context *kctx = katom->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ int i; + -+ return 0; -+} ++ if ((kbdev->force_replay_limit == KBASEP_FORCE_REPLAY_DISABLED) || ++ (katom->core_req & BASEP_JD_REQ_EVENT_NEVER)) ++ return; + -+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset); ++ for (i = 1; i < BASE_JD_ATOM_COUNT; i++) { ++ if (kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[0]) == katom || ++ kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) { ++ struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i]; + -+void kbase_sync_single(struct kbase_context *kctx, -+ phys_addr_t cpu_pa, phys_addr_t gpu_pa, -+ off_t offset, size_t size, enum kbase_sync_type sync_fn) ++ if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == ++ BASE_JD_REQ_SOFT_REPLAY && ++ (dep_atom->core_req & kbdev->force_replay_core_req) ++ == kbdev->force_replay_core_req) { ++ jd_force_failure(kbdev, katom); ++ return; ++ } ++ } ++ } ++} ++#endif ++ ++/** ++ * is_dep_valid - Validate that a dependency is valid for early dependency ++ * submission ++ * @katom: Dependency atom to validate ++ * ++ * A dependency is valid if any of the following are true : ++ * - It does not exist (a non-existent dependency does not block submission) ++ * - It is in the job scheduler ++ * - It has completed, does not have a failure event code, and has not been ++ * marked to fail in the future ++ * ++ * Return: true if valid, false otherwise ++ */ ++static bool is_dep_valid(struct kbase_jd_atom *katom) +{ -+ struct page *cpu_page; ++ /* If there's no dependency then this is 'valid' from the perspective of ++ * early dependency submission */ ++ if (!katom) ++ return true; + -+ cpu_page = pfn_to_page(PFN_DOWN(cpu_pa)); ++ /* Dependency must have reached the job scheduler */ ++ if (katom->status < KBASE_JD_ATOM_STATE_IN_JS) ++ return false; + -+ if (likely(cpu_pa == gpu_pa)) { -+ dma_addr_t dma_addr; ++ /* If dependency has completed and has failed or will fail then it is ++ * not valid */ ++ if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED && ++ (katom->event_code != BASE_JD_EVENT_DONE || ++ katom->will_fail_event_code)) ++ return false; + -+ BUG_ON(!cpu_page); -+ BUG_ON(offset + size > PAGE_SIZE); ++ return true; ++} + -+ dma_addr = kbase_dma_addr(cpu_page) + offset; -+ if (sync_fn == KBASE_SYNC_TO_CPU) -+ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, -+ size, DMA_BIDIRECTIONAL); -+ else if (sync_fn == KBASE_SYNC_TO_DEVICE) -+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, -+ size, DMA_BIDIRECTIONAL); -+ } else { -+ void *src = NULL; -+ void *dst = NULL; -+ struct page *gpu_page; ++static void jd_try_submitting_deps(struct list_head *out_list, ++ struct kbase_jd_atom *node) ++{ ++ int i; + -+ if (WARN(!gpu_pa, "No GPU PA found for infinite cache op")) -+ return; ++ for (i = 0; i < 2; i++) { ++ struct list_head *pos; + -+ gpu_page = pfn_to_page(PFN_DOWN(gpu_pa)); ++ list_for_each(pos, &node->dep_head[i]) { ++ struct kbase_jd_atom *dep_atom = list_entry(pos, ++ struct kbase_jd_atom, dep_item[i]); + -+ if (sync_fn == KBASE_SYNC_TO_DEVICE) { -+ src = ((unsigned char *)kmap(cpu_page)) + offset; -+ dst = ((unsigned char *)kmap(gpu_page)) + offset; -+ } else if (sync_fn == KBASE_SYNC_TO_CPU) { -+ dma_sync_single_for_cpu(kctx->kbdev->dev, -+ kbase_dma_addr(gpu_page) + offset, -+ size, DMA_BIDIRECTIONAL); -+ src = ((unsigned char *)kmap(gpu_page)) + offset; -+ dst = ((unsigned char *)kmap(cpu_page)) + offset; ++ if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) { ++ /*Check if atom deps look sane*/ ++ bool dep0_valid = is_dep_valid( ++ dep_atom->dep[0].atom); ++ bool dep1_valid = is_dep_valid( ++ dep_atom->dep[1].atom); ++ bool dep_satisfied = true; ++#ifdef CONFIG_MALI_DMA_FENCE ++ int dep_count; ++ ++ dep_count = kbase_fence_dep_count_read( ++ dep_atom); ++ if (likely(dep_count == -1)) { ++ dep_satisfied = true; ++ } else { ++ /* ++ * There are either still active callbacks, or ++ * all fences for this @dep_atom has signaled, ++ * but the worker that will queue the atom has ++ * not yet run. ++ * ++ * Wait for the fences to signal and the fence ++ * worker to run and handle @dep_atom. If ++ * @dep_atom was completed due to error on ++ * @katom, then the fence worker will pick up ++ * the complete status and error code set on ++ * @dep_atom above. ++ */ ++ dep_satisfied = false; ++ } ++#endif /* CONFIG_MALI_DMA_FENCE */ ++#ifdef CONFIG_KDS ++ dep_satisfied = dep_satisfied && ++ dep_atom->kds_dep_satisfied; ++#endif ++ ++ if (dep0_valid && dep1_valid && dep_satisfied) { ++ dep_atom->in_jd_list = true; ++ list_add(&dep_atom->jd_item, out_list); ++ } ++ } + } -+ memcpy(dst, src, size); -+ kunmap(gpu_page); -+ kunmap(cpu_page); -+ if (sync_fn == KBASE_SYNC_TO_DEVICE) -+ dma_sync_single_for_device(kctx->kbdev->dev, -+ kbase_dma_addr(gpu_page) + offset, -+ size, DMA_BIDIRECTIONAL); + } +} + -+static int kbase_do_syncset(struct kbase_context *kctx, -+ struct basep_syncset *sset, enum kbase_sync_type sync_fn) ++/* ++ * Perform the necessary handling of an atom that has finished running ++ * on the GPU. ++ * ++ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller ++ * is responsible for calling kbase_finish_soft_job *before* calling this function. ++ * ++ * The caller must hold the kbase_jd_context.lock. ++ */ ++bool jd_done_nolock(struct kbase_jd_atom *katom, ++ struct list_head *completed_jobs_ctx) +{ -+ int err = 0; -+ struct kbase_va_region *reg; -+ struct kbase_cpu_mapping *map; -+ unsigned long start; -+ size_t size; -+ phys_addr_t *cpu_pa; -+ phys_addr_t *gpu_pa; -+ u64 page_off, page_count; -+ u64 i; -+ u64 offset; -+ -+ kbase_os_mem_map_lock(kctx); -+ kbase_gpu_vm_lock(kctx); ++ struct kbase_context *kctx = katom->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct list_head completed_jobs; ++ struct list_head runnable_jobs; ++ bool need_to_try_schedule_context = false; ++ int i; + -+ /* find the region where the virtual address is contained */ -+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, -+ sset->mem_handle.basep.handle); -+ if (!reg) { -+ dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX", -+ sset->mem_handle.basep.handle); -+ err = -EINVAL; -+ goto out_unlock; -+ } ++ INIT_LIST_HEAD(&completed_jobs); ++ INIT_LIST_HEAD(&runnable_jobs); + -+ if (!(reg->flags & KBASE_REG_CPU_CACHED)) -+ goto out_unlock; ++ KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); + -+ start = (uintptr_t)sset->user_addr; -+ size = (size_t)sset->size; ++#if MALI_CUSTOMER_RELEASE == 0 ++ jd_check_force_failure(katom); ++#endif + -+ map = kbasep_find_enclosing_cpu_mapping(kctx, start, size, &offset); -+ if (!map) { -+ dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX", -+ start, sset->mem_handle.basep.handle); -+ err = -EINVAL; -+ goto out_unlock; ++ /* This is needed in case an atom is failed due to being invalid, this ++ * can happen *before* the jobs that the atom depends on have completed */ ++ for (i = 0; i < 2; i++) { ++ if (kbase_jd_katom_dep_atom(&katom->dep[i])) { ++ list_del(&katom->dep_item[i]); ++ kbase_jd_katom_dep_clear(&katom->dep[i]); ++ } + } + -+ page_off = offset >> PAGE_SHIFT; -+ offset &= ~PAGE_MASK; -+ page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT; -+ cpu_pa = kbase_get_cpu_phy_pages(reg); -+ gpu_pa = kbase_get_gpu_phy_pages(reg); ++ /* With PRLAM-10817 or PRLAM-10959 the last tile of a fragment job being soft-stopped can fail with ++ * BASE_JD_EVENT_TILE_RANGE_FAULT. ++ * ++ * So here if the fragment job failed with TILE_RANGE_FAULT and it has been soft-stopped, then we promote the ++ * error code to BASE_JD_EVENT_DONE ++ */ + -+ if (page_off > reg->nr_pages || -+ page_off + page_count > reg->nr_pages) { -+ /* Sync overflows the region */ -+ err = -EINVAL; -+ goto out_unlock; ++ if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10817) || kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10959)) && ++ katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT) { ++ if ((katom->core_req & BASE_JD_REQ_FS) && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED)) { ++ /* Promote the failure to job done */ ++ katom->event_code = BASE_JD_EVENT_DONE; ++ katom->atom_flags = katom->atom_flags & (~KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED); ++ } + } + -+ /* Sync first page */ -+ if (cpu_pa[page_off]) { -+ size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); ++ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; ++ list_add_tail(&katom->jd_item, &completed_jobs); + -+ kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off], -+ offset, sz, sync_fn); -+ } ++ while (!list_empty(&completed_jobs)) { ++ katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item); ++ list_del(completed_jobs.prev); ++ KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); + -+ /* Sync middle pages (if any) */ -+ for (i = 1; page_count > 2 && i < page_count - 1; i++) { -+ /* we grow upwards, so bail on first non-present page */ -+ if (!cpu_pa[page_off + i]) -+ break; ++ for (i = 0; i < 2; i++) ++ jd_resolve_dep(&runnable_jobs, katom, i, ++ kbase_ctx_flag(kctx, KCTX_DYING)); + -+ kbase_sync_single(kctx, cpu_pa[page_off + i], -+ gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn); -+ } ++ if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) ++ kbase_jd_post_external_resources(katom); + -+ /* Sync last page (if any) */ -+ if (page_count > 1 && cpu_pa[page_off + page_count - 1]) { -+ size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1; ++ while (!list_empty(&runnable_jobs)) { ++ struct kbase_jd_atom *node; + -+ kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1], -+ gpu_pa[page_off + page_count - 1], 0, sz, -+ sync_fn); -+ } ++ node = list_entry(runnable_jobs.next, ++ struct kbase_jd_atom, jd_item); ++ list_del(runnable_jobs.next); ++ node->in_jd_list = false; + -+out_unlock: -+ kbase_gpu_vm_unlock(kctx); -+ kbase_os_mem_map_unlock(kctx); -+ return err; -+} ++ KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); + -+int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset) -+{ -+ int err = -EINVAL; ++ if (node->status != KBASE_JD_ATOM_STATE_COMPLETED && ++ !kbase_ctx_flag(kctx, KCTX_DYING)) { ++ need_to_try_schedule_context |= jd_run_atom(node); ++ } else { ++ node->event_code = katom->event_code; + -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(sset != NULL); ++ if ((node->core_req & ++ BASE_JD_REQ_SOFT_JOB_TYPE) == ++ BASE_JD_REQ_SOFT_REPLAY) { ++ if (kbase_replay_process(node)) ++ /* Don't complete this atom */ ++ continue; ++ } else if (node->core_req & ++ BASE_JD_REQ_SOFT_JOB) { ++ /* If this is a fence wait soft job ++ * then remove it from the list of sync ++ * waiters. ++ */ ++ if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req) ++ kbasep_remove_waiting_soft_job(node); + -+ if (sset->mem_handle.basep.handle & ~PAGE_MASK) { -+ dev_warn(kctx->kbdev->dev, -+ "mem_handle: passed parameter is invalid"); -+ return -EINVAL; -+ } ++ kbase_finish_soft_job(node); ++ } ++ node->status = KBASE_JD_ATOM_STATE_COMPLETED; ++ } + -+ switch (sset->type) { -+ case BASE_SYNCSET_OP_MSYNC: -+ err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_DEVICE); -+ break; ++ if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) { ++ list_add_tail(&node->jd_item, &completed_jobs); ++ } else if (node->status == KBASE_JD_ATOM_STATE_IN_JS && ++ !node->will_fail_event_code) { ++ /* Node successfully submitted, try submitting ++ * dependencies as they may now be representable ++ * in JS */ ++ jd_try_submitting_deps(&runnable_jobs, node); ++ } ++ } + -+ case BASE_SYNCSET_OP_CSYNC: -+ err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_CPU); -+ break; ++ /* Register a completed job as a disjoint event when the GPU ++ * is in a disjoint state (ie. being reset or replaying jobs). ++ */ ++ kbase_disjoint_event_potential(kctx->kbdev); ++ if (completed_jobs_ctx) ++ list_add_tail(&katom->jd_item, completed_jobs_ctx); ++ else ++ kbase_event_post(kctx, katom); + -+ default: -+ dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type); -+ break; ++ /* Decrement and check the TOTAL number of jobs. This includes ++ * those not tracked by the scheduler: 'not ready to run' and ++ * 'dependency-only' jobs. */ ++ if (--kctx->jctx.job_nr == 0) ++ wake_up(&kctx->jctx.zero_jobs_wait); /* All events are safely queued now, and we can signal any waiter ++ * that we've got no more jobs (so we can be safely terminated) */ + } + -+ return err; ++ return need_to_try_schedule_context; +} + -+KBASE_EXPORT_TEST_API(kbase_sync_now); ++KBASE_EXPORT_TEST_API(jd_done_nolock); + -+/* vm lock must be held */ -+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg) ++#ifdef CONFIG_GPU_TRACEPOINTS ++enum { ++ CORE_REQ_DEP_ONLY, ++ CORE_REQ_SOFT, ++ CORE_REQ_COMPUTE, ++ CORE_REQ_FRAGMENT, ++ CORE_REQ_VERTEX, ++ CORE_REQ_TILER, ++ CORE_REQ_FRAGMENT_VERTEX, ++ CORE_REQ_FRAGMENT_VERTEX_TILER, ++ CORE_REQ_FRAGMENT_TILER, ++ CORE_REQ_VERTEX_TILER, ++ CORE_REQ_UNKNOWN ++}; ++static const char * const core_req_strings[] = { ++ "Dependency Only Job", ++ "Soft Job", ++ "Compute Shader Job", ++ "Fragment Shader Job", ++ "Vertex/Geometry Shader Job", ++ "Tiler Job", ++ "Fragment Shader + Vertex/Geometry Shader Job", ++ "Fragment Shader + Vertex/Geometry Shader Job + Tiler Job", ++ "Fragment Shader + Tiler Job", ++ "Vertex/Geometry Shader Job + Tiler Job", ++ "Unknown Job" ++}; ++static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req) +{ -+ int err; ++ if (core_req & BASE_JD_REQ_SOFT_JOB) ++ return core_req_strings[CORE_REQ_SOFT]; ++ if (core_req & BASE_JD_REQ_ONLY_COMPUTE) ++ return core_req_strings[CORE_REQ_COMPUTE]; ++ switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) { ++ case BASE_JD_REQ_DEP: ++ return core_req_strings[CORE_REQ_DEP_ONLY]; ++ case BASE_JD_REQ_FS: ++ return core_req_strings[CORE_REQ_FRAGMENT]; ++ case BASE_JD_REQ_CS: ++ return core_req_strings[CORE_REQ_VERTEX]; ++ case BASE_JD_REQ_T: ++ return core_req_strings[CORE_REQ_TILER]; ++ case (BASE_JD_REQ_FS | BASE_JD_REQ_CS): ++ return core_req_strings[CORE_REQ_FRAGMENT_VERTEX]; ++ case (BASE_JD_REQ_FS | BASE_JD_REQ_T): ++ return core_req_strings[CORE_REQ_FRAGMENT_TILER]; ++ case (BASE_JD_REQ_CS | BASE_JD_REQ_T): ++ return core_req_strings[CORE_REQ_VERTEX_TILER]; ++ case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T): ++ return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER]; ++ } ++ return core_req_strings[CORE_REQ_UNKNOWN]; ++} ++#endif + -+ KBASE_DEBUG_ASSERT(NULL != kctx); -+ KBASE_DEBUG_ASSERT(NULL != reg); -+ lockdep_assert_held(&kctx->reg_lock); ++bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom) ++{ ++ struct kbase_jd_context *jctx = &kctx->jctx; ++ int queued = 0; ++ int i; ++ int sched_prio; ++ bool ret; ++ bool will_fail = false; + -+ /* -+ * Unlink the physical allocation before unmaking it evictable so -+ * that the allocation isn't grown back to its last backed size -+ * as we're going to unmap it anyway. -+ */ -+ reg->cpu_alloc->reg = NULL; -+ if (reg->cpu_alloc != reg->gpu_alloc) -+ reg->gpu_alloc->reg = NULL; ++ /* Update the TOTAL number of jobs. This includes those not tracked by ++ * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ ++ jctx->job_nr++; + -+ /* -+ * If a region has been made evictable then we must unmake it -+ * before trying to free it. -+ * If the memory hasn't been reclaimed it will be unmapped and freed -+ * below, if it has been reclaimed then the operations below are no-ops. -+ */ -+ if (reg->flags & KBASE_REG_DONT_NEED) { -+ KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == -+ KBASE_MEM_TYPE_NATIVE); -+ kbase_mem_evictable_unmake(reg->gpu_alloc); -+ } ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++ katom->start_timestamp.tv64 = 0; ++#else ++ katom->start_timestamp = 0; ++#endif ++ katom->udata = user_atom->udata; ++ katom->kctx = kctx; ++ katom->nr_extres = user_atom->nr_extres; ++ katom->extres = NULL; ++ katom->device_nr = user_atom->device_nr; ++ katom->affinity = 0; ++ katom->jc = user_atom->jc; ++ katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; ++ katom->core_req = user_atom->core_req; ++ katom->atom_flags = 0; ++ katom->retry_count = 0; ++ katom->need_cache_flush_cores_retained = 0; ++ katom->pre_dep = NULL; ++ katom->post_dep = NULL; ++ katom->x_pre_dep = NULL; ++ katom->x_post_dep = NULL; ++ katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED; + -+ err = kbase_gpu_munmap(kctx, reg); -+ if (err) { -+ dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n"); -+ goto out; -+ } ++ /* Implicitly sets katom->protected_state.enter as well. */ ++ katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; + -+ /* This will also free the physical pages */ -+ kbase_free_alloced_region(reg); ++ katom->age = kctx->age_count++; + -+ out: -+ return err; -+} ++ INIT_LIST_HEAD(&katom->jd_item); ++#ifdef CONFIG_KDS ++ /* Start by assuming that the KDS dependencies are satisfied, ++ * kbase_jd_pre_external_resources will correct this if there are dependencies */ ++ katom->kds_dep_satisfied = true; ++ katom->kds_rset = NULL; ++#endif /* CONFIG_KDS */ ++#ifdef CONFIG_MALI_DMA_FENCE ++ kbase_fence_dep_count_set(katom, -1); ++#endif + -+KBASE_EXPORT_TEST_API(kbase_mem_free_region); ++ /* Don't do anything if there is a mess up with dependencies. ++ This is done in a separate cycle to check both the dependencies at ones, otherwise ++ it will be extra complexity to deal with 1st dependency ( just added to the list ) ++ if only the 2nd one has invalid config. ++ */ ++ for (i = 0; i < 2; i++) { ++ int dep_atom_number = user_atom->pre_dep[i].atom_id; ++ base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type; + -+/** -+ * @brief Free the region from the GPU and unregister it. -+ * -+ * This function implements the free operation on a memory segment. -+ * It will loudly fail if called with outstanding mappings. -+ */ -+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) -+{ -+ int err = 0; -+ struct kbase_va_region *reg; ++ if (dep_atom_number) { ++ if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER && ++ dep_atom_type != BASE_JD_DEP_TYPE_DATA) { ++ katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; ++ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + -+ KBASE_DEBUG_ASSERT(kctx != NULL); ++ /* Wrong dependency setup. Atom will be sent ++ * back to user space. Do not record any ++ * dependencies. */ ++ KBASE_TLSTREAM_TL_NEW_ATOM( ++ katom, ++ kbase_jd_atom_id(kctx, katom)); ++ KBASE_TLSTREAM_TL_RET_ATOM_CTX( ++ katom, kctx); ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, ++ TL_ATOM_STATE_IDLE); + -+ if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) { -+ dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid"); -+ return -EINVAL; ++ ret = jd_done_nolock(katom, NULL); ++ goto out; ++ } ++ } + } + -+ if (0 == gpu_addr) { -+ dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n"); -+ return -EINVAL; -+ } -+ kbase_gpu_vm_lock(kctx); ++ /* Add dependencies */ ++ for (i = 0; i < 2; i++) { ++ int dep_atom_number = user_atom->pre_dep[i].atom_id; ++ base_jd_dep_type dep_atom_type; ++ struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number]; + -+ if (gpu_addr >= BASE_MEM_COOKIE_BASE && -+ gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) { -+ int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE); ++ dep_atom_type = user_atom->pre_dep[i].dependency_type; ++ kbase_jd_katom_dep_clear(&katom->dep[i]); + -+ reg = kctx->pending_regions[cookie]; -+ if (!reg) { -+ err = -EINVAL; -+ goto out_unlock; -+ } ++ if (!dep_atom_number) ++ continue; + -+ /* ask to unlink the cookie as we'll free it */ ++ if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED || ++ dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) { + -+ kctx->pending_regions[cookie] = NULL; -+ kctx->cookies |= (1UL << cookie); ++ if (dep_atom->event_code == BASE_JD_EVENT_DONE) ++ continue; ++ /* don't stop this atom if it has an order dependency ++ * only to the failed one, try to submit it through ++ * the normal path ++ */ ++ if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER && ++ dep_atom->event_code > BASE_JD_EVENT_ACTIVE) { ++ continue; ++ } + -+ kbase_free_alloced_region(reg); -+ } else { -+ /* A real GPU va */ -+ /* Validate the region */ -+ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); -+ if (!reg || (reg->flags & KBASE_REG_FREE)) { -+ dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX", -+ gpu_addr); -+ err = -EINVAL; -+ goto out_unlock; -+ } ++ /* Atom has completed, propagate the error code if any */ ++ katom->event_code = dep_atom->event_code; ++ katom->status = KBASE_JD_ATOM_STATE_QUEUED; + -+ if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) { -+ /* SAME_VA must be freed through munmap */ -+ dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__, -+ gpu_addr); -+ err = -EINVAL; -+ goto out_unlock; -+ } -+ err = kbase_mem_free_region(kctx, reg); -+ } ++ /* This atom is going through soft replay or ++ * will be sent back to user space. Do not record any ++ * dependencies. */ ++ KBASE_TLSTREAM_TL_NEW_ATOM( ++ katom, ++ kbase_jd_atom_id(kctx, katom)); ++ KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx); ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, ++ TL_ATOM_STATE_IDLE); + -+ out_unlock: -+ kbase_gpu_vm_unlock(kctx); -+ return err; -+} ++ if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ++ == BASE_JD_REQ_SOFT_REPLAY) { ++ if (kbase_replay_process(katom)) { ++ ret = false; ++ goto out; ++ } ++ } ++ will_fail = true; + -+KBASE_EXPORT_TEST_API(kbase_mem_free); ++ } else { ++ /* Atom is in progress, add this atom to the list */ ++ list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]); ++ kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type); ++ queued = 1; ++ } ++ } + -+int kbase_update_region_flags(struct kbase_context *kctx, -+ struct kbase_va_region *reg, unsigned long flags) -+{ -+ KBASE_DEBUG_ASSERT(NULL != reg); -+ KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0); ++ if (will_fail) { ++ if (!queued) { ++ ret = jd_done_nolock(katom, NULL); + -+ reg->flags |= kbase_cache_enabled(flags, reg->nr_pages); -+ /* all memory is now growable */ -+ reg->flags |= KBASE_REG_GROWABLE; ++ goto out; ++ } else { ++ katom->will_fail_event_code = katom->event_code; ++ ret = false; + -+ if (flags & BASE_MEM_GROW_ON_GPF) -+ reg->flags |= KBASE_REG_PF_GROW; ++ goto out; ++ } ++ } else { ++ /* These must occur after the above loop to ensure that an atom ++ * that depends on a previous atom with the same number behaves ++ * as expected */ ++ katom->event_code = BASE_JD_EVENT_DONE; ++ katom->status = KBASE_JD_ATOM_STATE_QUEUED; ++ } + -+ if (flags & BASE_MEM_PROT_CPU_WR) -+ reg->flags |= KBASE_REG_CPU_WR; ++ /* For invalid priority, be most lenient and choose the default */ ++ sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); ++ if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID) ++ sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT; ++ katom->sched_priority = sched_prio; + -+ if (flags & BASE_MEM_PROT_CPU_RD) -+ reg->flags |= KBASE_REG_CPU_RD; ++ /* Create a new atom recording all dependencies it was set up with. */ ++ KBASE_TLSTREAM_TL_NEW_ATOM( ++ katom, ++ kbase_jd_atom_id(kctx, katom)); ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_IDLE); ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(katom, katom->sched_priority); ++ KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx); ++ for (i = 0; i < 2; i++) ++ if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type( ++ &katom->dep[i])) { ++ KBASE_TLSTREAM_TL_DEP_ATOM_ATOM( ++ (void *)kbase_jd_katom_dep_atom( ++ &katom->dep[i]), ++ (void *)katom); ++ } else if (BASE_JD_DEP_TYPE_INVALID != ++ user_atom->pre_dep[i].dependency_type) { ++ /* Resolved dependency. */ ++ int dep_atom_number = ++ user_atom->pre_dep[i].atom_id; ++ struct kbase_jd_atom *dep_atom = ++ &jctx->atoms[dep_atom_number]; + -+ if (flags & BASE_MEM_PROT_GPU_WR) -+ reg->flags |= KBASE_REG_GPU_WR; ++ KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM( ++ (void *)dep_atom, ++ (void *)katom); ++ } + -+ if (flags & BASE_MEM_PROT_GPU_RD) -+ reg->flags |= KBASE_REG_GPU_RD; ++ /* Reject atoms with job chain = NULL, as these cause issues with soft-stop */ ++ if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { ++ dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL"); ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ ret = jd_done_nolock(katom, NULL); ++ goto out; ++ } + -+ if (0 == (flags & BASE_MEM_PROT_GPU_EX)) -+ reg->flags |= KBASE_REG_GPU_NX; ++ /* Reject atoms with an invalid device_nr */ ++ if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) && ++ (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) { ++ dev_warn(kctx->kbdev->dev, ++ "Rejecting atom with invalid device_nr %d", ++ katom->device_nr); ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ ret = jd_done_nolock(katom, NULL); ++ goto out; ++ } + -+ if (!kbase_device_is_cpu_coherent(kctx->kbdev)) { -+ if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) -+ return -EINVAL; -+ } else if (flags & (BASE_MEM_COHERENT_SYSTEM | -+ BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { -+ reg->flags |= KBASE_REG_SHARE_BOTH; ++ /* Reject atoms with invalid core requirements */ ++ if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && ++ (katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) { ++ dev_warn(kctx->kbdev->dev, ++ "Rejecting atom with invalid core requirements"); ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE; ++ ret = jd_done_nolock(katom, NULL); ++ goto out; + } + -+ if (!(reg->flags & KBASE_REG_SHARE_BOTH) && -+ flags & BASE_MEM_COHERENT_LOCAL) { -+ reg->flags |= KBASE_REG_SHARE_IN; ++ if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { ++ /* handle what we need to do to access the external resources */ ++ if (kbase_jd_pre_external_resources(katom, user_atom) != 0) { ++ /* setup failed (no access, bad resource, unknown resource types, etc.) */ ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ ret = jd_done_nolock(katom, NULL); ++ goto out; ++ } + } + -+ /* Set up default MEMATTR usage */ -+ if (kctx->kbdev->system_coherency == COHERENCY_ACE && -+ (reg->flags & KBASE_REG_SHARE_BOTH)) { -+ reg->flags |= -+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); ++ /* Validate the atom. Function will return error if the atom is ++ * malformed. ++ * ++ * Soft-jobs never enter the job scheduler but have their own initialize method. ++ * ++ * If either fail then we immediately complete the atom with an error. ++ */ ++ if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) { ++ if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) { ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ ret = jd_done_nolock(katom, NULL); ++ goto out; ++ } + } else { -+ reg->flags |= -+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT); ++ /* Soft-job */ ++ if (kbase_prepare_soft_job(katom) != 0) { ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ ret = jd_done_nolock(katom, NULL); ++ goto out; ++ } + } + -+ return 0; -+} -+ -+int kbase_alloc_phy_pages_helper( -+ struct kbase_mem_phy_alloc *alloc, -+ size_t nr_pages_requested) -+{ -+ int new_page_count __maybe_unused; -+ size_t old_page_count = alloc->nents; -+ -+ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); -+ KBASE_DEBUG_ASSERT(alloc->imported.kctx); -+ -+ if (nr_pages_requested == 0) -+ goto done; /*nothing to do*/ ++#ifdef CONFIG_GPU_TRACEPOINTS ++ katom->work_id = atomic_inc_return(&jctx->work_id); ++ trace_gpu_job_enqueue((u32)kctx->id, katom->work_id, ++ kbasep_map_core_reqs_to_string(katom->core_req)); ++#endif + -+ new_page_count = kbase_atomic_add_pages( -+ nr_pages_requested, &alloc->imported.kctx->used_pages); -+ kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages); ++ if (queued && !IS_GPU_ATOM(katom)) { ++ ret = false; ++ goto out; ++ } ++#ifdef CONFIG_KDS ++ if (!katom->kds_dep_satisfied) { ++ /* Queue atom due to KDS dependency */ ++ ret = false; ++ goto out; ++ } ++#endif /* CONFIG_KDS */ + -+ /* Increase mm counters before we allocate pages so that this -+ * allocation is visible to the OOM killer */ -+ kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested); + -+ if (kbase_mem_pool_alloc_pages(&alloc->imported.kctx->mem_pool, -+ nr_pages_requested, alloc->pages + old_page_count) != 0) -+ goto no_alloc; ++#ifdef CONFIG_MALI_DMA_FENCE ++ if (kbase_fence_dep_count_read(katom) != -1) { ++ ret = false; ++ goto out; ++ } ++#endif /* CONFIG_MALI_DMA_FENCE */ + -+ KBASE_TLSTREAM_AUX_PAGESALLOC( -+ (u32)alloc->imported.kctx->id, -+ (u64)new_page_count); ++ if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ++ == BASE_JD_REQ_SOFT_REPLAY) { ++ if (kbase_replay_process(katom)) ++ ret = false; ++ else ++ ret = jd_done_nolock(katom, NULL); + -+ alloc->nents += nr_pages_requested; -+done: -+ return 0; ++ goto out; ++ } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { ++ if (kbase_process_soft_job(katom) == 0) { ++ kbase_finish_soft_job(katom); ++ ret = jd_done_nolock(katom, NULL); ++ goto out; ++ } + -+no_alloc: -+ kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_requested); -+ kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->used_pages); -+ kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages); ++ ret = false; ++ } else if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { ++ katom->status = KBASE_JD_ATOM_STATE_IN_JS; ++ ret = kbasep_js_add_job(kctx, katom); ++ /* If job was cancelled then resolve immediately */ ++ if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED) ++ ret = jd_done_nolock(katom, NULL); ++ } else { ++ /* This is a pure dependency. Resolve it immediately */ ++ ret = jd_done_nolock(katom, NULL); ++ } + -+ return -ENOMEM; ++ out: ++ return ret; +} + -+int kbase_free_phy_pages_helper( -+ struct kbase_mem_phy_alloc *alloc, -+ size_t nr_pages_to_free) ++int kbase_jd_submit(struct kbase_context *kctx, ++ void __user *user_addr, u32 nr_atoms, u32 stride, ++ bool uk6_atom) +{ -+ struct kbase_context *kctx = alloc->imported.kctx; -+ bool syncback; -+ bool reclaimed = (alloc->evicted != 0); -+ phys_addr_t *start_free; -+ int new_page_count __maybe_unused; ++ struct kbase_jd_context *jctx = &kctx->jctx; ++ int err = 0; ++ int i; ++ bool need_to_try_schedule_context = false; ++ struct kbase_device *kbdev; ++ u32 latest_flush; + -+ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); -+ KBASE_DEBUG_ASSERT(alloc->imported.kctx); -+ KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); ++ /* ++ * kbase_jd_submit isn't expected to fail and so all errors with the ++ * jobs are reported by immediately failing them (through event system) ++ */ ++ kbdev = kctx->kbdev; + -+ /* early out if nothing to do */ -+ if (0 == nr_pages_to_free) -+ return 0; ++ beenthere(kctx, "%s", "Enter"); + -+ start_free = alloc->pages + alloc->nents - nr_pages_to_free; ++ if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { ++ dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it"); ++ return -EINVAL; ++ } + -+ syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; ++ if (stride != sizeof(base_jd_atom_v2)) { ++ dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel"); ++ return -EINVAL; ++ } + -+ kbase_mem_pool_free_pages(&kctx->mem_pool, -+ nr_pages_to_free, -+ start_free, -+ syncback, -+ reclaimed); ++ KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(nr_atoms, ++ &kctx->timeline.jd_atoms_in_flight)); + -+ alloc->nents -= nr_pages_to_free; ++ /* All atoms submitted in this call have the same flush ID */ ++ latest_flush = kbase_backend_get_current_flush_id(kbdev); + -+ /* -+ * If the allocation was not evicted (i.e. evicted == 0) then -+ * the page accounting needs to be done. -+ */ -+ if (!reclaimed) { -+ kbase_process_page_usage_dec(kctx, nr_pages_to_free); -+ new_page_count = kbase_atomic_sub_pages(nr_pages_to_free, -+ &kctx->used_pages); -+ kbase_atomic_sub_pages(nr_pages_to_free, -+ &kctx->kbdev->memdev.used_pages); ++ for (i = 0; i < nr_atoms; i++) { ++ struct base_jd_atom_v2 user_atom; ++ struct kbase_jd_atom *katom; + -+ KBASE_TLSTREAM_AUX_PAGESALLOC( -+ (u32)kctx->id, -+ (u64)new_page_count); -+ } ++#ifdef BASE_LEGACY_UK6_SUPPORT ++ BUILD_BUG_ON(sizeof(struct base_jd_atom_v2_uk6) != ++ sizeof(base_jd_atom_v2)); + -+ return 0; -+} ++ if (uk6_atom) { ++ struct base_jd_atom_v2_uk6 user_atom_v6; ++ base_jd_dep_type dep_types[2] = {BASE_JD_DEP_TYPE_DATA, BASE_JD_DEP_TYPE_DATA}; + -+void kbase_mem_kref_free(struct kref *kref) -+{ -+ struct kbase_mem_phy_alloc *alloc; ++ if (copy_from_user(&user_atom_v6, user_addr, ++ sizeof(user_atom_v6))) { ++ err = -EINVAL; ++ KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, ++ atomic_sub_return( ++ nr_atoms - i, ++ &kctx->timeline.jd_atoms_in_flight)); ++ break; ++ } ++ /* Convert from UK6 atom format to UK7 format */ ++ user_atom.jc = user_atom_v6.jc; ++ user_atom.udata = user_atom_v6.udata; ++ user_atom.extres_list = user_atom_v6.extres_list; ++ user_atom.nr_extres = user_atom_v6.nr_extres; ++ user_atom.core_req = (u32)(user_atom_v6.core_req & 0x7fff); + -+ alloc = container_of(kref, struct kbase_mem_phy_alloc, kref); ++ /* atom number 0 is used for no dependency atoms */ ++ if (!user_atom_v6.pre_dep[0]) ++ dep_types[0] = BASE_JD_DEP_TYPE_INVALID; + -+ switch (alloc->type) { -+ case KBASE_MEM_TYPE_NATIVE: { -+ WARN_ON(!alloc->imported.kctx); -+ /* -+ * The physical allocation must have been removed from the -+ * eviction list before trying to free it. -+ */ -+ WARN_ON(!list_empty(&alloc->evict_node)); -+ kbase_free_phy_pages_helper(alloc, alloc->nents); -+ break; -+ } -+ case KBASE_MEM_TYPE_ALIAS: { -+ /* just call put on the underlying phy allocs */ -+ size_t i; -+ struct kbase_aliased *aliased; ++ base_jd_atom_dep_set(&user_atom.pre_dep[0], ++ user_atom_v6.pre_dep[0], ++ dep_types[0]); + -+ aliased = alloc->imported.alias.aliased; -+ if (aliased) { -+ for (i = 0; i < alloc->imported.alias.nents; i++) -+ if (aliased[i].alloc) -+ kbase_mem_phy_alloc_put(aliased[i].alloc); -+ vfree(aliased); ++ /* atom number 0 is used for no dependency atoms */ ++ if (!user_atom_v6.pre_dep[1]) ++ dep_types[1] = BASE_JD_DEP_TYPE_INVALID; ++ ++ base_jd_atom_dep_set(&user_atom.pre_dep[1], ++ user_atom_v6.pre_dep[1], ++ dep_types[1]); ++ ++ user_atom.atom_number = user_atom_v6.atom_number; ++ user_atom.prio = user_atom_v6.prio; ++ user_atom.device_nr = user_atom_v6.device_nr; ++ } else { ++#endif /* BASE_LEGACY_UK6_SUPPORT */ ++ if (copy_from_user(&user_atom, user_addr, ++ sizeof(user_atom)) != 0) { ++ err = -EINVAL; ++ KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, ++ atomic_sub_return(nr_atoms - i, ++ &kctx->timeline.jd_atoms_in_flight)); ++ break; ++ } ++#ifdef BASE_LEGACY_UK6_SUPPORT + } -+ break; -+ } -+ case KBASE_MEM_TYPE_RAW: -+ /* raw pages, external cleanup */ -+ break; -+ #ifdef CONFIG_UMP -+ case KBASE_MEM_TYPE_IMPORTED_UMP: -+ ump_dd_release(alloc->imported.ump_handle); -+ break; +#endif -+#ifdef CONFIG_DMA_SHARED_BUFFER -+ case KBASE_MEM_TYPE_IMPORTED_UMM: -+ dma_buf_detach(alloc->imported.umm.dma_buf, -+ alloc->imported.umm.dma_attachment); -+ dma_buf_put(alloc->imported.umm.dma_buf); -+ break; -+#endif -+ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: -+ if (alloc->imported.user_buf.mm) -+ mmdrop(alloc->imported.user_buf.mm); -+ kfree(alloc->imported.user_buf.pages); -+ break; -+ case KBASE_MEM_TYPE_TB:{ -+ void *tb; + -+ tb = alloc->imported.kctx->jctx.tb; -+ kbase_device_trace_buffer_uninstall(alloc->imported.kctx); -+ vfree(tb); -+ break; -+ } -+ default: -+ WARN(1, "Unexecpted free of type %d\n", alloc->type); -+ break; -+ } ++#ifdef BASE_LEGACY_UK10_2_SUPPORT ++ if (KBASE_API_VERSION(10, 3) > kctx->api_version) ++ user_atom.core_req = (u32)(user_atom.compat_core_req ++ & 0x7fff); ++#endif /* BASE_LEGACY_UK10_2_SUPPORT */ + -+ /* Free based on allocation type */ -+ if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) -+ vfree(alloc); -+ else -+ kfree(alloc); -+} ++ user_addr = (void __user *)((uintptr_t) user_addr + stride); + -+KBASE_EXPORT_TEST_API(kbase_mem_kref_free); ++ mutex_lock(&jctx->lock); ++#ifndef compiletime_assert ++#define compiletime_assert_defined ++#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \ ++while (false) ++#endif ++ compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) >= ++ BASE_JD_ATOM_COUNT, ++ "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); ++ compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) == ++ sizeof(user_atom.atom_number), ++ "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); ++#ifdef compiletime_assert_defined ++#undef compiletime_assert ++#undef compiletime_assert_defined ++#endif ++ if (user_atom.atom_number >= BASE_JD_ATOM_COUNT) { ++ err = -EINVAL; ++ break; ++ } ++ user_atom.atom_number = ++ array_index_nospec(user_atom.atom_number, ++ BASE_JD_ATOM_COUNT); ++ katom = &jctx->atoms[user_atom.atom_number]; + -+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size) -+{ -+ KBASE_DEBUG_ASSERT(NULL != reg); -+ KBASE_DEBUG_ASSERT(vsize > 0); ++ /* Record the flush ID for the cache flush optimisation */ ++ katom->flush_id = latest_flush; + -+ /* validate user provided arguments */ -+ if (size > vsize || vsize > reg->nr_pages) -+ goto out_term; ++ while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) { ++ /* Atom number is already in use, wait for the atom to ++ * complete ++ */ ++ mutex_unlock(&jctx->lock); + -+ /* Prevent vsize*sizeof from wrapping around. -+ * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail. -+ */ -+ if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages))) -+ goto out_term; ++ /* This thread will wait for the atom to complete. Due ++ * to thread scheduling we are not sure that the other ++ * thread that owns the atom will also schedule the ++ * context, so we force the scheduler to be active and ++ * hence eventually schedule this context at some point ++ * later. ++ */ ++ kbase_js_sched_all(kbdev); + -+ KBASE_DEBUG_ASSERT(0 != vsize); ++ if (wait_event_killable(katom->completed, ++ katom->status == ++ KBASE_JD_ATOM_STATE_UNUSED) != 0) { ++ /* We're being killed so the result code ++ * doesn't really matter ++ */ ++ return 0; ++ } ++ mutex_lock(&jctx->lock); ++ } + -+ if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0) -+ goto out_term; ++ need_to_try_schedule_context |= ++ jd_submit_atom(kctx, &user_atom, katom); + -+ reg->cpu_alloc->reg = reg; -+ if (reg->cpu_alloc != reg->gpu_alloc) { -+ if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0) -+ goto out_rollback; -+ reg->gpu_alloc->reg = reg; ++ /* Register a completed job as a disjoint event when the GPU is in a disjoint state ++ * (ie. being reset or replaying jobs). ++ */ ++ kbase_disjoint_event_potential(kbdev); ++ ++ mutex_unlock(&jctx->lock); + } + -+ return 0; ++ if (need_to_try_schedule_context) ++ kbase_js_sched_all(kbdev); + -+out_rollback: -+ kbase_free_phy_pages_helper(reg->cpu_alloc, size); -+out_term: -+ return -1; ++ return err; +} + -+KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages); ++KBASE_EXPORT_TEST_API(kbase_jd_submit); + -+bool kbase_check_alloc_flags(unsigned long flags) ++void kbase_jd_done_worker(struct work_struct *data) +{ -+ /* Only known input flags should be set. */ -+ if (flags & ~BASE_MEM_FLAGS_INPUT_MASK) -+ return false; ++ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); ++ struct kbase_jd_context *jctx; ++ struct kbase_context *kctx; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ struct kbase_device *kbdev; ++ struct kbasep_js_device_data *js_devdata; ++ u64 cache_jc = katom->jc; ++ struct kbasep_js_atom_retained_state katom_retained_state; ++ bool context_idle; ++ base_jd_core_req core_req = katom->core_req; ++ u64 affinity = katom->affinity; ++ enum kbase_atom_coreref_state coreref_state = katom->coreref_state; + -+ /* At least one flag should be set */ -+ if (flags == 0) -+ return false; ++ /* Soft jobs should never reach this function */ ++ KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); + -+ /* Either the GPU or CPU must be reading from the allocated memory */ -+ if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0) -+ return false; ++ kctx = katom->kctx; ++ jctx = &kctx->jctx; ++ kbdev = kctx->kbdev; ++ js_kctx_info = &kctx->jctx.sched_info; ++ js_devdata = &kbdev->js_data; + -+ /* Either the GPU or CPU must be writing to the allocated memory */ -+ if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0) -+ return false; ++ KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); + -+ /* GPU cannot be writing to GPU executable memory and cannot grow the memory on page fault. */ -+ if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF))) -+ return false; ++ kbase_backend_complete_wq(kbdev, katom); + -+ /* GPU should have at least read or write access otherwise there is no -+ reason for allocating. */ -+ if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) -+ return false; ++ /* ++ * Begin transaction on JD context and JS context ++ */ ++ mutex_lock(&jctx->lock); ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_DONE); ++ mutex_lock(&js_devdata->queue_mutex); ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + -+ /* BASE_MEM_IMPORT_SHARED is only valid for imported memory */ -+ if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED) -+ return false; ++ /* This worker only gets called on contexts that are scheduled *in*. This is ++ * because it only happens in response to an IRQ from a job that was ++ * running. ++ */ ++ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+ return true; -+} ++ if (katom->event_code == BASE_JD_EVENT_STOPPED) { ++ /* Atom has been promoted to stopped */ ++ unsigned long flags; + -+bool kbase_check_import_flags(unsigned long flags) -+{ -+ /* Only known input flags should be set. */ -+ if (flags & ~BASE_MEM_FLAGS_INPUT_MASK) -+ return false; ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_unlock(&js_devdata->queue_mutex); + -+ /* At least one flag should be set */ -+ if (flags == 0) -+ return false; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ /* Imported memory cannot be GPU executable */ -+ if (flags & BASE_MEM_PROT_GPU_EX) -+ return false; ++ katom->status = KBASE_JD_ATOM_STATE_IN_JS; ++ kbase_js_unpull(kctx, katom); + -+ /* Imported memory cannot grow on page fault */ -+ if (flags & BASE_MEM_GROW_ON_GPF) -+ return false; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&jctx->lock); + -+ /* GPU should have at least read or write access otherwise there is no -+ reason for importing. */ -+ if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) -+ return false; ++ return; ++ } + -+ /* Secure memory cannot be read by the CPU */ -+ if ((flags & BASE_MEM_SECURE) && (flags & BASE_MEM_PROT_CPU_RD)) -+ return false; ++ if (katom->event_code != BASE_JD_EVENT_DONE) ++ dev_err(kbdev->dev, ++ "t6xx: GPU fault 0x%02lx from job slot %d\n", ++ (unsigned long)katom->event_code, ++ katom->slot_nr); + -+ return true; -+} ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) ++ kbase_as_poking_timer_release_atom(kbdev, kctx, katom); + -+/** -+ * @brief Acquire the per-context region list lock -+ */ -+void kbase_gpu_vm_lock(struct kbase_context *kctx) -+{ -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ mutex_lock(&kctx->reg_lock); -+} ++ /* Retain state before the katom disappears */ ++ kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); + -+KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock); ++ context_idle = kbase_js_complete_atom_wq(kctx, katom); + -+/** -+ * @brief Release the per-context region list lock -+ */ -+void kbase_gpu_vm_unlock(struct kbase_context *kctx) -+{ -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ mutex_unlock(&kctx->reg_lock); -+} ++ KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state)); + -+KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock); ++ kbasep_js_remove_job(kbdev, kctx, katom); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_unlock(&js_devdata->queue_mutex); ++ katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF; ++ /* jd_done_nolock() requires the jsctx_mutex lock to be dropped */ ++ jd_done_nolock(katom, &kctx->completed_jobs); + -+#ifdef CONFIG_DEBUG_FS -+struct kbase_jit_debugfs_data { -+ int (*func)(struct kbase_jit_debugfs_data *); -+ struct mutex lock; -+ struct kbase_context *kctx; -+ u64 active_value; -+ u64 pool_value; -+ u64 destroy_value; -+ char buffer[50]; -+}; ++ /* katom may have been freed now, do not use! */ + -+static int kbase_jit_debugfs_common_open(struct inode *inode, -+ struct file *file, int (*func)(struct kbase_jit_debugfs_data *)) -+{ -+ struct kbase_jit_debugfs_data *data; ++ if (context_idle) { ++ unsigned long flags; + -+ data = kzalloc(sizeof(*data), GFP_KERNEL); -+ if (!data) -+ return -ENOMEM; ++ context_idle = false; ++ mutex_lock(&js_devdata->queue_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ data->func = func; -+ mutex_init(&data->lock); -+ data->kctx = (struct kbase_context *) inode->i_private; ++ /* If kbase_sched() has scheduled this context back in then ++ * KCTX_ACTIVE will have been set after we marked it as ++ * inactive, and another pm reference will have been taken, so ++ * drop our reference. But do not call kbase_jm_idle_ctx(), as ++ * the context is active and fast-starting is allowed. ++ * ++ * If an atom has been fast-started then kctx->atoms_pulled will ++ * be non-zero but KCTX_ACTIVE will still be false (as the ++ * previous pm reference has been inherited). Do NOT drop our ++ * reference, as it has been re-used, and leave the context as ++ * active. ++ * ++ * If no new atoms have been started then KCTX_ACTIVE will still ++ * be false and atoms_pulled will be zero, so drop the reference ++ * and call kbase_jm_idle_ctx(). ++ * ++ * As the checks are done under both the queue_mutex and ++ * hwaccess_lock is should be impossible for this to race ++ * with the scheduler code. ++ */ ++ if (kbase_ctx_flag(kctx, KCTX_ACTIVE) || ++ !atomic_read(&kctx->atoms_pulled)) { ++ /* Calling kbase_jm_idle_ctx() here will ensure that ++ * atoms are not fast-started when we drop the ++ * hwaccess_lock. This is not performed if ++ * KCTX_ACTIVE is set as in that case another pm ++ * reference has been taken and a fast-start would be ++ * valid. ++ */ ++ if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) ++ kbase_jm_idle_ctx(kbdev, kctx); ++ context_idle = true; ++ } else { ++ kbase_ctx_flag_set(kctx, KCTX_ACTIVE); ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&js_devdata->queue_mutex); ++ } + -+ file->private_data = data; ++ /* ++ * Transaction complete ++ */ ++ mutex_unlock(&jctx->lock); + -+ return nonseekable_open(inode, file); -+} ++ /* Job is now no longer running, so can now safely release the context ++ * reference, and handle any actions that were logged against the atom's retained state */ + -+static ssize_t kbase_jit_debugfs_common_read(struct file *file, -+ char __user *buf, size_t len, loff_t *ppos) -+{ -+ struct kbase_jit_debugfs_data *data; -+ size_t size; -+ int ret; ++ kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state); + -+ data = (struct kbase_jit_debugfs_data *) file->private_data; -+ mutex_lock(&data->lock); ++ kbase_js_sched_all(kbdev); + -+ if (*ppos) { -+ size = strnlen(data->buffer, sizeof(data->buffer)); -+ } else { -+ if (!data->func) { -+ ret = -EACCES; -+ goto out_unlock; -+ } ++ if (!atomic_dec_return(&kctx->work_count)) { ++ /* If worker now idle then post all events that jd_done_nolock() ++ * has queued */ ++ mutex_lock(&jctx->lock); ++ while (!list_empty(&kctx->completed_jobs)) { ++ struct kbase_jd_atom *atom = list_entry( ++ kctx->completed_jobs.next, ++ struct kbase_jd_atom, jd_item); ++ list_del(kctx->completed_jobs.next); + -+ if (data->func(data)) { -+ ret = -EACCES; -+ goto out_unlock; ++ kbase_event_post(kctx, atom); + } -+ -+ size = scnprintf(data->buffer, sizeof(data->buffer), -+ "%llu,%llu,%llu", data->active_value, -+ data->pool_value, data->destroy_value); ++ mutex_unlock(&jctx->lock); + } + -+ ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size); -+ -+out_unlock: -+ mutex_unlock(&data->lock); -+ return ret; -+} ++ kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity, ++ coreref_state); + -+static int kbase_jit_debugfs_common_release(struct inode *inode, -+ struct file *file) -+{ -+ kfree(file->private_data); -+ return 0; -+} ++ if (context_idle) ++ kbase_pm_context_idle(kbdev); + -+#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \ -+static int __fops ## _open(struct inode *inode, struct file *file) \ -+{ \ -+ return kbase_jit_debugfs_common_open(inode, file, __func); \ -+} \ -+static const struct file_operations __fops = { \ -+ .owner = THIS_MODULE, \ -+ .open = __fops ## _open, \ -+ .release = kbase_jit_debugfs_common_release, \ -+ .read = kbase_jit_debugfs_common_read, \ -+ .write = NULL, \ -+ .llseek = generic_file_llseek, \ ++ KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); +} + -+static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data) ++/** ++ * jd_cancel_worker - Work queue job cancel function. ++ * @data: a &struct work_struct ++ * ++ * Only called as part of 'Zapping' a context (which occurs on termination). ++ * Operates serially with the kbase_jd_done_worker() on the work queue. ++ * ++ * This can only be called on contexts that aren't scheduled. ++ * ++ * We don't need to release most of the resources that would occur on ++ * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be ++ * running (by virtue of only being called on contexts that aren't ++ * scheduled). ++ */ ++static void jd_cancel_worker(struct work_struct *data) +{ -+ struct kbase_context *kctx = data->kctx; -+ struct list_head *tmp; ++ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); ++ struct kbase_jd_context *jctx; ++ struct kbase_context *kctx; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ bool need_to_try_schedule_context; ++ bool attr_state_changed; ++ struct kbase_device *kbdev; + -+ mutex_lock(&kctx->jit_evict_lock); -+ list_for_each(tmp, &kctx->jit_active_head) { -+ data->active_value++; -+ } ++ /* Soft jobs should never reach this function */ ++ KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); + -+ list_for_each(tmp, &kctx->jit_pool_head) { -+ data->pool_value++; -+ } ++ kctx = katom->kctx; ++ kbdev = kctx->kbdev; ++ jctx = &kctx->jctx; ++ js_kctx_info = &kctx->jctx.sched_info; + -+ list_for_each(tmp, &kctx->jit_destroy_head) { -+ data->destroy_value++; -+ } -+ mutex_unlock(&kctx->jit_evict_lock); ++ KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0); + -+ return 0; -+} -+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops, -+ kbase_jit_debugfs_count_get); ++ /* This only gets called on contexts that are scheduled out. Hence, we must ++ * make sure we don't de-ref the number of running jobs (there aren't ++ * any), nor must we try to schedule out the context (it's already ++ * scheduled out). ++ */ ++ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data) -+{ -+ struct kbase_context *kctx = data->kctx; -+ struct kbase_va_region *reg; ++ /* Scheduler: Remove the job from the system */ ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + -+ mutex_lock(&kctx->jit_evict_lock); -+ list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { -+ data->active_value += reg->nr_pages; -+ } ++ mutex_lock(&jctx->lock); + -+ list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { -+ data->pool_value += reg->nr_pages; -+ } ++ need_to_try_schedule_context = jd_done_nolock(katom, NULL); ++ /* Because we're zapping, we're not adding any more jobs to this ctx, so no need to ++ * schedule the context. There's also no need for the jsctx_mutex to have been taken ++ * around this too. */ ++ KBASE_DEBUG_ASSERT(!need_to_try_schedule_context); + -+ list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { -+ data->destroy_value += reg->nr_pages; -+ } -+ mutex_unlock(&kctx->jit_evict_lock); ++ /* katom may have been freed now, do not use! */ ++ mutex_unlock(&jctx->lock); + -+ return 0; ++ if (attr_state_changed) ++ kbase_js_sched_all(kbdev); +} -+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops, -+ kbase_jit_debugfs_vm_get); + -+static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) ++/** ++ * kbase_jd_done - Complete a job that has been removed from the Hardware ++ * @katom: atom which has been completed ++ * @slot_nr: slot the atom was on ++ * @end_timestamp: completion time ++ * @done_code: completion code ++ * ++ * This must be used whenever a job has been removed from the Hardware, e.g.: ++ * An IRQ indicates that the job finished (for both error and 'done' codes), or ++ * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop. ++ * ++ * Some work is carried out immediately, and the rest is deferred onto a ++ * workqueue ++ * ++ * Context: ++ * This can be called safely from atomic context. ++ * The caller must hold kbdev->hwaccess_lock ++ */ ++void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ++ ktime_t *end_timestamp, kbasep_js_atom_done_code done_code) +{ -+ struct kbase_context *kctx = data->kctx; -+ struct kbase_va_region *reg; ++ struct kbase_context *kctx; ++ struct kbase_device *kbdev; + -+ mutex_lock(&kctx->jit_evict_lock); -+ list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { -+ data->active_value += reg->gpu_alloc->nents; -+ } ++ KBASE_DEBUG_ASSERT(katom); ++ kctx = katom->kctx; ++ KBASE_DEBUG_ASSERT(kctx); ++ kbdev = kctx->kbdev; ++ KBASE_DEBUG_ASSERT(kbdev); + -+ list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { -+ data->pool_value += reg->gpu_alloc->nents; -+ } ++ if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) ++ katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; + -+ list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { -+ data->destroy_value += reg->gpu_alloc->nents; -+ } -+ mutex_unlock(&kctx->jit_evict_lock); ++ KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0); + -+ return 0; -+} -+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, -+ kbase_jit_debugfs_phys_get); ++ kbase_job_check_leave_disjoint(kbdev, katom); + -+void kbase_jit_debugfs_init(struct kbase_context *kctx) -+{ -+ /* Debugfs entry for getting the number of JIT allocations. */ -+ debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry, -+ kctx, &kbase_jit_debugfs_count_fops); ++ katom->slot_nr = slot_nr; + -+ /* -+ * Debugfs entry for getting the total number of virtual pages -+ * used by JIT allocations. -+ */ -+ debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry, -+ kctx, &kbase_jit_debugfs_vm_fops); ++ atomic_inc(&kctx->work_count); + -+ /* -+ * Debugfs entry for getting the number of physical pages used -+ * by JIT allocations. -+ */ -+ debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry, -+ kctx, &kbase_jit_debugfs_phys_fops); ++#ifdef CONFIG_DEBUG_FS ++ /* a failed job happened and is waiting for dumping*/ ++ if (!katom->will_fail_event_code && ++ kbase_debug_job_fault_process(katom, katom->event_code)) ++ return; ++#endif ++ ++ WARN_ON(work_pending(&katom->work)); ++ KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); ++ INIT_WORK(&katom->work, kbase_jd_done_worker); ++ queue_work(kctx->jctx.job_done_wq, &katom->work); +} -+#endif /* CONFIG_DEBUG_FS */ + -+/** -+ * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations -+ * @work: Work item -+ * -+ * This function does the work of freeing JIT allocations whose physical -+ * backing has been released. -+ */ -+static void kbase_jit_destroy_worker(struct work_struct *work) ++KBASE_EXPORT_TEST_API(kbase_jd_done); ++ ++void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx; -+ struct kbase_va_region *reg; + -+ kctx = container_of(work, struct kbase_context, jit_work); -+ do { -+ mutex_lock(&kctx->jit_evict_lock); -+ if (list_empty(&kctx->jit_destroy_head)) { -+ mutex_unlock(&kctx->jit_evict_lock); -+ break; -+ } ++ KBASE_DEBUG_ASSERT(NULL != kbdev); ++ KBASE_DEBUG_ASSERT(NULL != katom); ++ kctx = katom->kctx; ++ KBASE_DEBUG_ASSERT(NULL != kctx); + -+ reg = list_first_entry(&kctx->jit_destroy_head, -+ struct kbase_va_region, jit_node); ++ KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); + -+ list_del(®->jit_node); -+ mutex_unlock(&kctx->jit_evict_lock); ++ /* This should only be done from a context that is not scheduled */ ++ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+ kbase_gpu_vm_lock(kctx); -+ kbase_mem_free_region(kctx, reg); -+ kbase_gpu_vm_unlock(kctx); -+ } while (1); ++ WARN_ON(work_pending(&katom->work)); ++ ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ ++ KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); ++ INIT_WORK(&katom->work, jd_cancel_worker); ++ queue_work(kctx->jctx.job_done_wq, &katom->work); +} + -+int kbase_jit_init(struct kbase_context *kctx) ++ ++void kbase_jd_zap_context(struct kbase_context *kctx) +{ -+ INIT_LIST_HEAD(&kctx->jit_active_head); -+ INIT_LIST_HEAD(&kctx->jit_pool_head); -+ INIT_LIST_HEAD(&kctx->jit_destroy_head); -+ INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker); ++ struct kbase_jd_atom *katom; ++ struct list_head *entry, *tmp; ++ struct kbase_device *kbdev; + -+ INIT_LIST_HEAD(&kctx->jit_pending_alloc); -+ INIT_LIST_HEAD(&kctx->jit_atoms_head); ++ KBASE_DEBUG_ASSERT(kctx); + -+ return 0; -+} ++ kbdev = kctx->kbdev; + -+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, -+ struct base_jit_alloc_info *info) -+{ -+ struct kbase_va_region *reg = NULL; -+ struct kbase_va_region *walker; -+ struct kbase_va_region *temp; -+ size_t current_diff = SIZE_MAX; ++ KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u); + -+ int ret; ++ kbase_js_zap_context(kctx); ++ ++ mutex_lock(&kctx->jctx.lock); + -+ mutex_lock(&kctx->jit_evict_lock); + /* -+ * Scan the pool for an existing allocation which meets our -+ * requirements and remove it. ++ * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are ++ * queued outside the job scheduler. + */ -+ list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) { + -+ if (walker->nr_pages >= info->va_pages) { -+ size_t min_size, max_size, diff; ++ del_timer_sync(&kctx->soft_job_timeout); ++ list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { ++ katom = list_entry(entry, struct kbase_jd_atom, queue); ++ kbase_cancel_soft_job(katom); ++ } + -+ /* -+ * The JIT allocations VA requirements have been -+ * meet, it's suitable but other allocations -+ * might be a better fit. -+ */ -+ min_size = min_t(size_t, walker->gpu_alloc->nents, -+ info->commit_pages); -+ max_size = max_t(size_t, walker->gpu_alloc->nents, -+ info->commit_pages); -+ diff = max_size - min_size; + -+ if (current_diff > diff) { -+ current_diff = diff; -+ reg = walker; -+ } -+ -+ /* The allocation is an exact match, stop looking */ -+ if (current_diff == 0) -+ break; -+ } -+ } -+ -+ if (reg) { -+ /* -+ * Remove the found region from the pool and add it to the -+ * active list. -+ */ -+ list_move(®->jit_node, &kctx->jit_active_head); -+ -+ /* -+ * Remove the allocation from the eviction list as it's no -+ * longer eligible for eviction. This must be done before -+ * dropping the jit_evict_lock -+ */ -+ list_del_init(®->gpu_alloc->evict_node); -+ mutex_unlock(&kctx->jit_evict_lock); -+ -+ kbase_gpu_vm_lock(kctx); -+ -+ /* Make the physical backing no longer reclaimable */ -+ if (!kbase_mem_evictable_unmake(reg->gpu_alloc)) -+ goto update_failed; -+ -+ /* Grow the backing if required */ -+ if (reg->gpu_alloc->nents < info->commit_pages) { -+ size_t delta; -+ size_t old_size = reg->gpu_alloc->nents; -+ -+ /* Allocate some more pages */ -+ delta = info->commit_pages - reg->gpu_alloc->nents; -+ if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta) -+ != 0) -+ goto update_failed; -+ -+ if (reg->cpu_alloc != reg->gpu_alloc) { -+ if (kbase_alloc_phy_pages_helper( -+ reg->cpu_alloc, delta) != 0) { -+ kbase_free_phy_pages_helper( -+ reg->gpu_alloc, delta); -+ goto update_failed; -+ } -+ } -+ -+ ret = kbase_mem_grow_gpu_mapping(kctx, reg, -+ info->commit_pages, old_size); -+ /* -+ * The grow failed so put the allocation back in the -+ * pool and return failure. -+ */ -+ if (ret) -+ goto update_failed; -+ } -+ kbase_gpu_vm_unlock(kctx); -+ } else { -+ /* No suitable JIT allocation was found so create a new one */ -+ u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD | -+ BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF | -+ BASE_MEM_COHERENT_LOCAL; -+ u64 gpu_addr; -+ -+ mutex_unlock(&kctx->jit_evict_lock); -+ -+ reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, -+ info->extent, &flags, &gpu_addr); -+ if (!reg) -+ goto out_unlocked; -+ -+ mutex_lock(&kctx->jit_evict_lock); -+ list_add(®->jit_node, &kctx->jit_active_head); -+ mutex_unlock(&kctx->jit_evict_lock); -+ } -+ -+ return reg; -+ -+update_failed: -+ /* -+ * An update to an allocation from the pool failed, chances -+ * are slim a new allocation would fair any better so return -+ * the allocation to the pool and return the function with failure. -+ */ -+ kbase_gpu_vm_unlock(kctx); -+ mutex_lock(&kctx->jit_evict_lock); -+ list_move(®->jit_node, &kctx->jit_pool_head); -+ mutex_unlock(&kctx->jit_evict_lock); -+out_unlocked: -+ return NULL; -+} -+ -+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) -+{ -+ /* The physical backing of memory in the pool is always reclaimable */ -+ kbase_gpu_vm_lock(kctx); -+ kbase_mem_evictable_make(reg->gpu_alloc); -+ kbase_gpu_vm_unlock(kctx); -+ -+ mutex_lock(&kctx->jit_evict_lock); -+ list_move(®->jit_node, &kctx->jit_pool_head); -+ mutex_unlock(&kctx->jit_evict_lock); -+} -+ -+void kbase_jit_backing_lost(struct kbase_va_region *reg) -+{ -+ struct kbase_context *kctx = reg->kctx; -+ -+ lockdep_assert_held(&kctx->jit_evict_lock); -+ -+ /* -+ * JIT allocations will always be on a list, if the region -+ * is not on a list then it's not a JIT allocation. -+ */ -+ if (list_empty(®->jit_node)) -+ return; ++#ifdef CONFIG_KDS + -+ /* -+ * Freeing the allocation requires locks we might not be able -+ * to take now, so move the allocation to the free list and kick -+ * the worker which will do the freeing. ++ /* For each job waiting on a kds resource, cancel the wait and force the job to ++ * complete early, this is done so that we don't leave jobs outstanding waiting ++ * on kds resources which may never be released when contexts are zapped, resulting ++ * in a hang. ++ * ++ * Note that we can safely iterate over the list as the struct kbase_jd_context lock is held, ++ * this prevents items being removed when calling job_done_nolock in kbase_cancel_kds_wait_job. + */ -+ list_move(®->jit_node, &kctx->jit_destroy_head); -+ -+ schedule_work(&kctx->jit_work); -+} -+ -+bool kbase_jit_evict(struct kbase_context *kctx) -+{ -+ struct kbase_va_region *reg = NULL; + -+ lockdep_assert_held(&kctx->reg_lock); ++ list_for_each(entry, &kctx->waiting_kds_resource) { ++ katom = list_entry(entry, struct kbase_jd_atom, node); + -+ /* Free the oldest allocation from the pool */ -+ mutex_lock(&kctx->jit_evict_lock); -+ if (!list_empty(&kctx->jit_pool_head)) { -+ reg = list_entry(kctx->jit_pool_head.prev, -+ struct kbase_va_region, jit_node); -+ list_del(®->jit_node); ++ kbase_cancel_kds_wait_job(katom); + } -+ mutex_unlock(&kctx->jit_evict_lock); -+ -+ if (reg) -+ kbase_mem_free_region(kctx, reg); -+ -+ return (reg != NULL); -+} ++#endif + -+void kbase_jit_term(struct kbase_context *kctx) -+{ -+ struct kbase_va_region *walker; ++#ifdef CONFIG_MALI_DMA_FENCE ++ kbase_dma_fence_cancel_all_atoms(kctx); ++#endif + -+ /* Free all allocations for this context */ ++ mutex_unlock(&kctx->jctx.lock); + -+ /* -+ * Flush the freeing of allocations whose backing has been freed -+ * (i.e. everything in jit_destroy_head). ++#ifdef CONFIG_MALI_DMA_FENCE ++ /* Flush dma-fence workqueue to ensure that any callbacks that may have ++ * been queued are done before continuing. + */ -+ cancel_work_sync(&kctx->jit_work); -+ -+ kbase_gpu_vm_lock(kctx); -+ mutex_lock(&kctx->jit_evict_lock); -+ /* Free all allocations from the pool */ -+ while (!list_empty(&kctx->jit_pool_head)) { -+ walker = list_first_entry(&kctx->jit_pool_head, -+ struct kbase_va_region, jit_node); -+ list_del(&walker->jit_node); -+ mutex_unlock(&kctx->jit_evict_lock); -+ kbase_mem_free_region(kctx, walker); -+ mutex_lock(&kctx->jit_evict_lock); -+ } -+ -+ /* Free all allocations from active list */ -+ while (!list_empty(&kctx->jit_active_head)) { -+ walker = list_first_entry(&kctx->jit_active_head, -+ struct kbase_va_region, jit_node); -+ list_del(&walker->jit_node); -+ mutex_unlock(&kctx->jit_evict_lock); -+ kbase_mem_free_region(kctx, walker); -+ mutex_lock(&kctx->jit_evict_lock); -+ } -+ mutex_unlock(&kctx->jit_evict_lock); -+ kbase_gpu_vm_unlock(kctx); -+} -+ -+static int kbase_jd_user_buf_map(struct kbase_context *kctx, -+ struct kbase_va_region *reg) -+{ -+ long pinned_pages; -+ struct kbase_mem_phy_alloc *alloc; -+ struct page **pages; -+ phys_addr_t *pa; -+ long i; -+ int err = -ENOMEM; -+ unsigned long address; -+ struct mm_struct *mm; -+ struct device *dev; -+ unsigned long offset; -+ unsigned long local_size; -+ -+ alloc = reg->gpu_alloc; -+ pa = kbase_get_gpu_phy_pages(reg); -+ address = alloc->imported.user_buf.address; -+ mm = alloc->imported.user_buf.mm; -+ -+ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); -+ -+ pages = alloc->imported.user_buf.pages; -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) -+ pinned_pages = get_user_pages(NULL, mm, -+ address, -+ alloc->imported.user_buf.nr_pages, -+ reg->flags & KBASE_REG_GPU_WR, -+ 0, pages, NULL); -+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) -+ pinned_pages = get_user_pages_remote(NULL, mm, -+ address, -+ alloc->imported.user_buf.nr_pages, -+ reg->flags & KBASE_REG_GPU_WR, -+ 0, pages, NULL); -+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) -+ pinned_pages = get_user_pages_remote(NULL, mm, -+ address, -+ alloc->imported.user_buf.nr_pages, -+ reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, -+ pages, NULL); -+#elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 9, 0) -+ pinned_pages = get_user_pages_remote(NULL, mm, -+ address, -+ alloc->imported.user_buf.nr_pages, -+ reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, -+ pages, NULL, NULL); -+#else -+ pinned_pages = get_user_pages_remote(mm, -+ address, -+ alloc->imported.user_buf.nr_pages, -+ reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, -+ pages, NULL); ++ flush_workqueue(kctx->dma_fence.wq); +#endif + -+ if (pinned_pages <= 0) -+ return pinned_pages; -+ -+ if (pinned_pages != alloc->imported.user_buf.nr_pages) { -+ for (i = 0; i < pinned_pages; i++) -+ put_page(pages[i]); -+ return -ENOMEM; -+ } -+ -+ dev = kctx->kbdev->dev; -+ offset = address & ~PAGE_MASK; -+ local_size = alloc->imported.user_buf.size; -+ -+ for (i = 0; i < pinned_pages; i++) { -+ dma_addr_t dma_addr; -+ unsigned long min; -+ -+ min = MIN(PAGE_SIZE - offset, local_size); -+ dma_addr = dma_map_page(dev, pages[i], -+ offset, min, -+ DMA_BIDIRECTIONAL); -+ if (dma_mapping_error(dev, dma_addr)) -+ goto unwind; -+ -+ alloc->imported.user_buf.dma_addrs[i] = dma_addr; -+ pa[i] = page_to_phys(pages[i]); -+ -+ local_size -= min; -+ offset = 0; -+ } -+ -+ alloc->nents = pinned_pages; -+ -+ err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa, -+ kbase_reg_current_backed_size(reg), -+ reg->flags); -+ if (err == 0) -+ return 0; -+ -+ alloc->nents = 0; -+ /* fall down */ -+unwind: -+ while (i--) { -+ dma_unmap_page(kctx->kbdev->dev, -+ alloc->imported.user_buf.dma_addrs[i], -+ PAGE_SIZE, DMA_BIDIRECTIONAL); -+ put_page(pages[i]); -+ pages[i] = NULL; -+ } -+ -+ return err; -+} -+ -+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, -+ struct kbase_mem_phy_alloc *alloc, bool writeable) -+{ -+ long i; -+ struct page **pages; -+ unsigned long size = alloc->imported.user_buf.size; -+ -+ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); -+ pages = alloc->imported.user_buf.pages; -+ for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { -+ unsigned long local_size; -+ dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; -+ -+ local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); -+ dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, -+ DMA_BIDIRECTIONAL); -+ if (writeable) -+ set_page_dirty_lock(pages[i]); -+ put_page(pages[i]); -+ pages[i] = NULL; -+ -+ size -= local_size; -+ } -+ alloc->nents = 0; ++ kbase_jm_wait_for_zero_jobs(kctx); +} + ++KBASE_EXPORT_TEST_API(kbase_jd_zap_context); + -+/* to replace sg_dma_len. */ -+#define MALI_SG_DMA_LEN(sg) ((sg)->length) -+ -+#ifdef CONFIG_DMA_SHARED_BUFFER -+static int kbase_jd_umm_map(struct kbase_context *kctx, -+ struct kbase_va_region *reg) ++int kbase_jd_init(struct kbase_context *kctx) +{ -+ struct sg_table *sgt; -+ struct scatterlist *s; + int i; -+ phys_addr_t *pa; ++ int mali_err = 0; ++#ifdef CONFIG_KDS + int err; -+ size_t count = 0; -+ struct kbase_mem_phy_alloc *alloc; -+ -+ alloc = reg->gpu_alloc; -+ -+ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM); -+ KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt); -+ sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, -+ DMA_BIDIRECTIONAL); -+ -+ if (IS_ERR_OR_NULL(sgt)) -+ return -EINVAL; ++#endif /* CONFIG_KDS */ + -+ /* save for later */ -+ alloc->imported.umm.sgt = sgt; ++ KBASE_DEBUG_ASSERT(kctx); + -+ pa = kbase_get_gpu_phy_pages(reg); -+ KBASE_DEBUG_ASSERT(pa); ++ kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", ++ WQ_HIGHPRI | WQ_UNBOUND, 1); ++ if (NULL == kctx->jctx.job_done_wq) { ++ mali_err = -ENOMEM; ++ goto out1; ++ } + -+ for_each_sg(sgt->sgl, s, sgt->nents, i) { -+ int j; -+ size_t pages = PFN_UP(MALI_SG_DMA_LEN(s)); ++ for (i = 0; i < BASE_JD_ATOM_COUNT; i++) { ++ init_waitqueue_head(&kctx->jctx.atoms[i].completed); + -+ WARN_ONCE(MALI_SG_DMA_LEN(s) & (PAGE_SIZE-1), -+ "MALI_SG_DMA_LEN(s)=%u is not a multiple of PAGE_SIZE\n", -+ MALI_SG_DMA_LEN(s)); ++ INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]); ++ INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]); + -+ WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1), -+ "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n", -+ (unsigned long long) sg_dma_address(s)); ++ /* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */ ++ kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID; ++ kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED; + -+ for (j = 0; (j < pages) && (count < reg->nr_pages); j++, -+ count++) -+ *pa++ = sg_dma_address(s) + (j << PAGE_SHIFT); -+ WARN_ONCE(j < pages, -+ "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n", -+ alloc->imported.umm.dma_buf->size); ++#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) ++ kctx->jctx.atoms[i].dma_fence.context = ++ dma_fence_context_alloc(1); ++ atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0); ++ INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks); ++#endif + } + -+ if (!(reg->flags & KBASE_REG_IMPORT_PAD) && -+ WARN_ONCE(count < reg->nr_pages, -+ "sg list from dma_buf_map_attachment < dma_buf->size=%zu\n", -+ alloc->imported.umm.dma_buf->size)) { -+ err = -EINVAL; -+ goto err_unmap_attachment; -+ } ++ mutex_init(&kctx->jctx.lock); + -+ /* Update nents as we now have pages to map */ -+ alloc->nents = reg->nr_pages; ++ init_waitqueue_head(&kctx->jctx.zero_jobs_wait); + -+ err = kbase_mmu_insert_pages(kctx, reg->start_pfn, -+ kbase_get_gpu_phy_pages(reg), -+ count, -+ reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD); -+ if (err) -+ goto err_unmap_attachment; ++ spin_lock_init(&kctx->jctx.tb_lock); + -+ if (reg->flags & KBASE_REG_IMPORT_PAD) { -+ err = kbase_mmu_insert_single_page(kctx, -+ reg->start_pfn + count, -+ page_to_phys(kctx->aliasing_sink_page), -+ reg->nr_pages - count, -+ (reg->flags | KBASE_REG_GPU_RD) & -+ ~KBASE_REG_GPU_WR); -+ if (err) -+ goto err_teardown_orig_pages; ++#ifdef CONFIG_KDS ++ err = kds_callback_init(&kctx->jctx.kds_cb, 0, kds_dep_clear); ++ if (0 != err) { ++ mali_err = -EINVAL; ++ goto out2; + } ++#endif /* CONFIG_KDS */ + -+ return 0; ++ kctx->jctx.job_nr = 0; ++ INIT_LIST_HEAD(&kctx->completed_jobs); ++ atomic_set(&kctx->work_count, 0); + -+err_teardown_orig_pages: -+ kbase_mmu_teardown_pages(kctx, reg->start_pfn, count); -+err_unmap_attachment: -+ dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, -+ alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); -+ alloc->imported.umm.sgt = NULL; ++ return 0; + -+ return err; ++#ifdef CONFIG_KDS ++ out2: ++ destroy_workqueue(kctx->jctx.job_done_wq); ++#endif /* CONFIG_KDS */ ++ out1: ++ return mali_err; +} + -+static void kbase_jd_umm_unmap(struct kbase_context *kctx, -+ struct kbase_mem_phy_alloc *alloc) ++KBASE_EXPORT_TEST_API(kbase_jd_init); ++ ++void kbase_jd_exit(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx); -+ KBASE_DEBUG_ASSERT(alloc); -+ KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment); -+ KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt); -+ dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, -+ alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); -+ alloc->imported.umm.sgt = NULL; -+ alloc->nents = 0; ++ ++#ifdef CONFIG_KDS ++ kds_callback_term(&kctx->jctx.kds_cb); ++#endif /* CONFIG_KDS */ ++ /* Work queue is emptied by this */ ++ destroy_workqueue(kctx->jctx.job_done_wq); +} -+#endif /* CONFIG_DMA_SHARED_BUFFER */ + -+#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) \ -+ || defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS) -+static void add_kds_resource(struct kds_resource *kds_res, -+ struct kds_resource **kds_resources, u32 *kds_res_count, -+ unsigned long *kds_access_bitmap, bool exclusive) -+{ -+ u32 i; ++KBASE_EXPORT_TEST_API(kbase_jd_exit); +diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c +new file mode 100644 +index 000000000..44643abf8 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c +@@ -0,0 +1,233 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ for (i = 0; i < *kds_res_count; i++) { -+ /* Duplicate resource, ignore */ -+ if (kds_resources[i] == kds_res) -+ return; -+ } + -+ kds_resources[*kds_res_count] = kds_res; -+ if (exclusive) -+ set_bit(*kds_res_count, kds_access_bitmap); -+ (*kds_res_count)++; -+} -+#endif + -+struct kbase_mem_phy_alloc *kbase_map_external_resource( -+ struct kbase_context *kctx, struct kbase_va_region *reg, -+ struct mm_struct *locked_mm -+#ifdef CONFIG_KDS -+ , u32 *kds_res_count, struct kds_resource **kds_resources, -+ unsigned long *kds_access_bitmap, bool exclusive ++#ifdef CONFIG_DEBUG_FS ++ ++#include ++#include ++#include ++#include ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++#include +#endif -+ ) -+{ -+ int err; + -+ /* decide what needs to happen for this resource */ -+ switch (reg->gpu_alloc->type) { -+ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { -+ if (reg->gpu_alloc->imported.user_buf.mm != locked_mm) -+ goto exit; ++struct kbase_jd_debugfs_depinfo { ++ u8 id; ++ char type; ++}; + -+ reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; -+ if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) { -+ err = kbase_jd_user_buf_map(kctx, reg); -+ if (err) { -+ reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; -+ goto exit; -+ } -+ } -+ } -+ break; -+ case KBASE_MEM_TYPE_IMPORTED_UMP: { -+#if defined(CONFIG_KDS) && defined(CONFIG_UMP) -+ if (kds_res_count) { -+ struct kds_resource *kds_res; ++static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, ++ struct seq_file *sfile) ++{ ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ struct kbase_sync_fence_info info; ++ int res; + -+ kds_res = ump_dd_kds_resource_get( -+ reg->gpu_alloc->imported.ump_handle); -+ if (kds_res) -+ add_kds_resource(kds_res, kds_resources, -+ kds_res_count, -+ kds_access_bitmap, exclusive); -+ } -+#endif /*defined(CONFIG_KDS) && defined(CONFIG_UMP) */ ++ switch (atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { ++ case BASE_JD_REQ_SOFT_FENCE_TRIGGER: ++ res = kbase_sync_fence_out_info_get(atom, &info); ++ if (res == 0) ++ seq_printf(sfile, "Sa([%p]%d) ", ++ info.fence, info.status); + break; -+ } -+#ifdef CONFIG_DMA_SHARED_BUFFER -+ case KBASE_MEM_TYPE_IMPORTED_UMM: { -+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS -+ if (kds_res_count) { -+ struct kds_resource *kds_res; -+ -+ kds_res = get_dma_buf_kds_resource( -+ reg->gpu_alloc->imported.umm.dma_buf); -+ if (kds_res) -+ add_kds_resource(kds_res, kds_resources, -+ kds_res_count, -+ kds_access_bitmap, exclusive); -+ } -+#endif -+ reg->gpu_alloc->imported.umm.current_mapping_usage_count++; -+ if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) { -+ err = kbase_jd_umm_map(kctx, reg); -+ if (err) { -+ reg->gpu_alloc->imported.umm.current_mapping_usage_count--; -+ goto exit; -+ } -+ } ++ case BASE_JD_REQ_SOFT_FENCE_WAIT: ++ res = kbase_sync_fence_in_info_get(atom, &info); ++ if (res == 0) ++ seq_printf(sfile, "Wa([%p]%d) ", ++ info.fence, info.status); + break; -+ } -+#endif + default: -+ goto exit; ++ break; + } ++#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ + -+ return kbase_mem_phy_alloc_get(reg->gpu_alloc); -+exit: -+ return NULL; -+} -+ -+void kbase_unmap_external_resource(struct kbase_context *kctx, -+ struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) -+{ -+ switch (alloc->type) { -+#ifdef CONFIG_DMA_SHARED_BUFFER -+ case KBASE_MEM_TYPE_IMPORTED_UMM: { -+ alloc->imported.umm.current_mapping_usage_count--; -+ -+ if (0 == alloc->imported.umm.current_mapping_usage_count) { -+ if (reg && reg->gpu_alloc == alloc) { -+ int err; ++#ifdef CONFIG_MALI_DMA_FENCE ++ if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { ++ struct kbase_fence_cb *cb; + -+ err = kbase_mmu_teardown_pages( -+ kctx, -+ reg->start_pfn, -+ alloc->nents); -+ WARN_ON(err); -+ } ++ if (atom->dma_fence.fence) { ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++ struct fence *fence = atom->dma_fence.fence; ++#else ++ struct dma_fence *fence = atom->dma_fence.fence; ++#endif + -+ kbase_jd_umm_unmap(kctx, alloc); ++ seq_printf(sfile, ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) ++ "Sd(%u#%u: %s) ", ++#else ++ "Sd(%llu#%u: %s) ", ++#endif ++ fence->context, ++ fence->seqno, ++ dma_fence_is_signaled(fence) ? ++ "signaled" : "active"); + } -+ } -+ break; -+#endif /* CONFIG_DMA_SHARED_BUFFER */ -+ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { -+ alloc->imported.user_buf.current_mapping_usage_count--; -+ -+ if (0 == alloc->imported.user_buf.current_mapping_usage_count) { -+ bool writeable = true; + -+ if (reg && reg->gpu_alloc == alloc) -+ kbase_mmu_teardown_pages( -+ kctx, -+ reg->start_pfn, -+ kbase_reg_current_backed_size(reg)); -+ -+ if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0)) -+ writeable = false; ++ list_for_each_entry(cb, &atom->dma_fence.callbacks, ++ node) { ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++ struct fence *fence = cb->fence; ++#else ++ struct dma_fence *fence = cb->fence; ++#endif + -+ kbase_jd_user_buf_unmap(kctx, alloc, writeable); ++ seq_printf(sfile, ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) ++ "Wd(%u#%u: %s) ", ++#else ++ "Wd(%llu#%u: %s) ", ++#endif ++ fence->context, ++ fence->seqno, ++ dma_fence_is_signaled(fence) ? ++ "signaled" : "active"); + } + } -+ break; -+ default: -+ break; -+ } -+ kbase_mem_phy_alloc_put(alloc); ++#endif /* CONFIG_MALI_DMA_FENCE */ ++ +} + -+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( -+ struct kbase_context *kctx, u64 gpu_addr) ++static void kbasep_jd_debugfs_atom_deps( ++ struct kbase_jd_debugfs_depinfo *deps, ++ struct kbase_jd_atom *atom) +{ -+ struct kbase_ctx_ext_res_meta *meta = NULL; -+ struct kbase_ctx_ext_res_meta *walker; ++ struct kbase_context *kctx = atom->kctx; ++ int i; + -+ lockdep_assert_held(&kctx->reg_lock); ++ for (i = 0; i < 2; i++) { ++ deps[i].id = (unsigned)(atom->dep[i].atom ? ++ kbase_jd_atom_id(kctx, atom->dep[i].atom) : 0); + -+ /* -+ * Walk the per context external resource metadata list for the -+ * metadata which matches the region which is being acquired. -+ */ -+ list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { -+ if (walker->gpu_addr == gpu_addr) { -+ meta = walker; ++ switch (atom->dep[i].dep_type) { ++ case BASE_JD_DEP_TYPE_INVALID: ++ deps[i].type = ' '; ++ break; ++ case BASE_JD_DEP_TYPE_DATA: ++ deps[i].type = 'D'; ++ break; ++ case BASE_JD_DEP_TYPE_ORDER: ++ deps[i].type = '>'; ++ break; ++ default: ++ deps[i].type = '?'; + break; + } + } ++} ++/** ++ * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file. ++ * @sfile: The debugfs entry ++ * @data: Data associated with the entry ++ * ++ * This function is called to get the contents of the JD atoms debugfs file. ++ * This is a report of all atoms managed by kbase_jd_context.atoms ++ * ++ * Return: 0 if successfully prints data in debugfs entry file, failure ++ * otherwise ++ */ ++static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) ++{ ++ struct kbase_context *kctx = sfile->private; ++ struct kbase_jd_atom *atoms; ++ unsigned long irq_flags; ++ int i; + -+ /* No metadata exists so create one. */ -+ if (!meta) { -+ struct kbase_va_region *reg; ++ KBASE_DEBUG_ASSERT(kctx != NULL); + -+ /* Find the region */ -+ reg = kbase_region_tracker_find_region_enclosing_address( -+ kctx, gpu_addr); -+ if (NULL == reg || (reg->flags & KBASE_REG_FREE)) -+ goto failed; ++ /* Print version */ ++ seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION); + -+ /* Allocate the metadata object */ -+ meta = kzalloc(sizeof(*meta), GFP_KERNEL); -+ if (!meta) -+ goto failed; ++ /* Print U/K API version */ ++ seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR, ++ BASE_UK_VERSION_MINOR); + -+ /* -+ * Fill in the metadata object and acquire a reference -+ * for the physical resource. -+ */ -+ meta->alloc = kbase_map_external_resource(kctx, reg, NULL -+#ifdef CONFIG_KDS -+ , NULL, NULL, -+ NULL, false -+#endif -+ ); ++ /* Print table heading */ ++ seq_puts(sfile, " ID, Core req, St, CR, Predeps, Start time, Additional info...\n"); + -+ if (!meta->alloc) -+ goto fail_map; ++ atoms = kctx->jctx.atoms; ++ /* General atom states */ ++ mutex_lock(&kctx->jctx.lock); ++ /* JS-related states */ ++ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); ++ for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) { ++ struct kbase_jd_atom *atom = &atoms[i]; ++ s64 start_timestamp = 0; ++ struct kbase_jd_debugfs_depinfo deps[2]; + -+ meta->gpu_addr = reg->start_pfn << PAGE_SHIFT; ++ if (atom->status == KBASE_JD_ATOM_STATE_UNUSED) ++ continue; + -+ list_add(&meta->ext_res_node, &kctx->ext_res_meta_head); -+ } ++ /* start_timestamp is cleared as soon as the atom leaves UNUSED state ++ * and set before a job is submitted to the h/w, a non-zero value means ++ * it is valid */ ++ if (ktime_to_ns(atom->start_timestamp)) ++ start_timestamp = ktime_to_ns( ++ ktime_sub(ktime_get(), atom->start_timestamp)); + -+ return meta; ++ kbasep_jd_debugfs_atom_deps(deps, atom); + -+fail_map: -+ kfree(meta); -+failed: -+ return NULL; -+} ++ seq_printf(sfile, ++ "%3u, %8x, %2u, %2u, %c%3u %c%3u, %20lld, ", ++ i, atom->core_req, atom->status, ++ atom->coreref_state, ++ deps[0].type, deps[0].id, ++ deps[1].type, deps[1].id, ++ start_timestamp); + -+bool kbase_sticky_resource_release(struct kbase_context *kctx, -+ struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr) -+{ -+ struct kbase_ctx_ext_res_meta *walker; -+ struct kbase_va_region *reg; + -+ lockdep_assert_held(&kctx->reg_lock); ++ kbase_jd_debugfs_fence_info(atom, sfile); + -+ /* Search of the metadata if one isn't provided. */ -+ if (!meta) { -+ /* -+ * Walk the per context external resource metadata list for the -+ * metadata which matches the region which is being released. -+ */ -+ list_for_each_entry(walker, &kctx->ext_res_meta_head, -+ ext_res_node) { -+ if (walker->gpu_addr == gpu_addr) { -+ meta = walker; -+ break; -+ } -+ } ++ seq_puts(sfile, "\n"); + } ++ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); ++ mutex_unlock(&kctx->jctx.lock); + -+ /* No metadata so just return. */ -+ if (!meta) -+ return false; -+ -+ /* Drop the physical memory reference and free the metadata. */ -+ reg = kbase_region_tracker_find_region_enclosing_address( -+ kctx, -+ meta->gpu_addr); -+ -+ kbase_unmap_external_resource(kctx, reg, meta->alloc); -+ list_del(&meta->ext_res_node); -+ kfree(meta); -+ -+ return true; ++ return 0; +} + -+int kbase_sticky_resource_init(struct kbase_context *kctx) -+{ -+ INIT_LIST_HEAD(&kctx->ext_res_meta_head); + -+ return 0; ++/** ++ * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file ++ * @in: &struct inode pointer ++ * @file: &struct file pointer ++ * ++ * Return: file descriptor ++ */ ++static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file) ++{ ++ return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private); +} + -+void kbase_sticky_resource_term(struct kbase_context *kctx) -+{ -+ struct kbase_ctx_ext_res_meta *walker; ++static const struct file_operations kbasep_jd_debugfs_atoms_fops = { ++ .open = kbasep_jd_debugfs_atoms_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; + -+ lockdep_assert_held(&kctx->reg_lock); ++void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx) ++{ ++ KBASE_DEBUG_ASSERT(kctx != NULL); + -+ /* -+ * Free any sticky resources which haven't been unmapped. -+ * -+ * Note: -+ * We don't care about refcounts at this point as no future -+ * references to the meta data will be made. -+ * Region termination would find these if we didn't free them -+ * here, but it's more efficient if we do the clean up here. -+ */ -+ while (!list_empty(&kctx->ext_res_meta_head)) { -+ walker = list_first_entry(&kctx->ext_res_meta_head, -+ struct kbase_ctx_ext_res_meta, ext_res_node); ++ /* Expose all atoms */ ++ debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx, ++ &kbasep_jd_debugfs_atoms_fops); + -+ kbase_sticky_resource_release(kctx, walker, 0); -+ } +} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h ++ ++#endif /* CONFIG_DEBUG_FS */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h new file mode 100644 -index 000000000..3f3eaa3fd +index 000000000..0935f1db7 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h -@@ -0,0 +1,1068 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h +@@ -0,0 +1,39 @@ +/* + * -+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -381258,3646 +382753,3130 @@ index 000000000..3f3eaa3fd + + + -+ -+ +/** -+ * @file mali_kbase_mem.h -+ * Base kernel memory APIs ++ * @file mali_kbase_jd_debugfs.h ++ * Header file for job dispatcher-related entries in debugfs + */ + -+#ifndef _KBASE_MEM_H_ -+#define _KBASE_MEM_H_ -+ -+#ifndef _KBASE_H_ -+#error "Don't include this file directly, use mali_kbase.h instead" -+#endif -+ -+#include -+#ifdef CONFIG_KDS -+#include -+#endif /* CONFIG_KDS */ -+#ifdef CONFIG_UMP -+#include -+#endif /* CONFIG_UMP */ -+#include "mali_base_kernel.h" -+#include -+#include "mali_kbase_pm.h" -+#include "mali_kbase_defs.h" -+#if defined(CONFIG_MALI_GATOR_SUPPORT) -+#include "mali_kbase_gator.h" -+#endif -+/* Required for kbase_mem_evictable_unmake */ -+#include "mali_kbase_mem_linux.h" ++#ifndef _KBASE_JD_DEBUGFS_H ++#define _KBASE_JD_DEBUGFS_H + -+/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */ -+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */ ++#include + -+/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages. -+The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and -+page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table -+updates and generates duplicate page faults as the page table information used by the MMU is not valid. */ -+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3) /* round to 8 pages */ ++#include + -+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0) /* round to 1 page */ ++#define MALI_JD_DEBUGFS_VERSION 2 + -+/* This must always be a power of 2 */ -+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2) -+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316) -+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630) +/** -+ * A CPU mapping ++ * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system ++ * ++ * @kctx Pointer to kbase_context + */ -+struct kbase_cpu_mapping { -+ struct list_head mappings_list; -+ struct kbase_mem_phy_alloc *alloc; -+ struct kbase_context *kctx; -+ struct kbase_va_region *region; -+ int count; -+ int free_on_close; -+}; -+ -+enum kbase_memory_type { -+ KBASE_MEM_TYPE_NATIVE, -+ KBASE_MEM_TYPE_IMPORTED_UMP, -+ KBASE_MEM_TYPE_IMPORTED_UMM, -+ KBASE_MEM_TYPE_IMPORTED_USER_BUF, -+ KBASE_MEM_TYPE_ALIAS, -+ KBASE_MEM_TYPE_TB, -+ KBASE_MEM_TYPE_RAW -+}; -+ -+/* internal structure, mirroring base_mem_aliasing_info, -+ * but with alloc instead of a gpu va (handle) */ -+struct kbase_aliased { -+ struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */ -+ u64 offset; /* in pages */ -+ u64 length; /* in pages */ -+}; -+ -+/** -+ * @brief Physical pages tracking object properties -+ */ -+#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED (1ul << 0) -+#define KBASE_MEM_PHY_ALLOC_LARGE (1ul << 1) ++void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx); + -+/* physical pages tracking object. -+ * Set up to track N pages. -+ * N not stored here, the creator holds that info. -+ * This object only tracks how many elements are actually valid (present). -+ * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not -+ * shared with another region or client. CPU mappings are OK to exist when changing, as -+ * long as the tracked mappings objects are updated as part of the change. ++#endif /*_KBASE_JD_DEBUGFS_H*/ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.c b/drivers/gpu/arm/midgard/mali_kbase_jm.c +new file mode 100644 +index 000000000..0c5c6a6f7 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_jm.c +@@ -0,0 +1,131 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * + */ -+struct kbase_mem_phy_alloc { -+ struct kref kref; /* number of users of this alloc */ -+ atomic_t gpu_mappings; -+ size_t nents; /* 0..N */ -+ phys_addr_t *pages; /* N elements, only 0..nents are valid */ -+ -+ /* kbase_cpu_mappings */ -+ struct list_head mappings; -+ -+ /* Node used to store this allocation on the eviction list */ -+ struct list_head evict_node; -+ /* Physical backing size when the pages where evicted */ -+ size_t evicted; -+ /* -+ * Back reference to the region structure which created this -+ * allocation, or NULL if it has been freed. -+ */ -+ struct kbase_va_region *reg; + -+ /* type of buffer */ -+ enum kbase_memory_type type; + -+ unsigned long properties; + -+ /* member in union valid based on @a type */ -+ union { -+#ifdef CONFIG_UMP -+ ump_dd_handle ump_handle; -+#endif /* CONFIG_UMP */ -+#if defined(CONFIG_DMA_SHARED_BUFFER) -+ struct { -+ struct dma_buf *dma_buf; -+ struct dma_buf_attachment *dma_attachment; -+ unsigned int current_mapping_usage_count; -+ struct sg_table *sgt; -+ } umm; -+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ -+ struct { -+ u64 stride; -+ size_t nents; -+ struct kbase_aliased *aliased; -+ } alias; -+ /* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */ -+ struct kbase_context *kctx; -+ struct kbase_alloc_import_user_buf { -+ unsigned long address; -+ unsigned long size; -+ unsigned long nr_pages; -+ struct page **pages; -+ /* top bit (1<<31) of current_mapping_usage_count -+ * specifies that this import was pinned on import -+ * See PINNED_ON_IMPORT -+ */ -+ u32 current_mapping_usage_count; -+ struct mm_struct *mm; -+ dma_addr_t *dma_addrs; -+ } user_buf; -+ } imported; -+}; + -+/* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is -+ * used to signify that a buffer was pinned when it was imported. Since the -+ * reference count is limited by the number of atoms that can be submitted at -+ * once there should be no danger of overflowing into this bit. -+ * Stealing the top bit also has the benefit that -+ * current_mapping_usage_count != 0 if and only if the buffer is mapped. ++/* ++ * HW access job manager common APIs + */ -+#define PINNED_ON_IMPORT (1<<31) -+ -+static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc) -+{ -+ KBASE_DEBUG_ASSERT(alloc); -+ /* we only track mappings of NATIVE buffers */ -+ if (alloc->type == KBASE_MEM_TYPE_NATIVE) -+ atomic_inc(&alloc->gpu_mappings); -+} -+ -+static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc) -+{ -+ KBASE_DEBUG_ASSERT(alloc); -+ /* we only track mappings of NATIVE buffers */ -+ if (alloc->type == KBASE_MEM_TYPE_NATIVE) -+ if (0 > atomic_dec_return(&alloc->gpu_mappings)) { -+ pr_err("Mismatched %s:\n", __func__); -+ dump_stack(); -+ } -+} -+ -+void kbase_mem_kref_free(struct kref *kref); -+ -+int kbase_mem_init(struct kbase_device *kbdev); -+void kbase_mem_halt(struct kbase_device *kbdev); -+void kbase_mem_term(struct kbase_device *kbdev); -+ -+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc) -+{ -+ kref_get(&alloc->kref); -+ return alloc; -+} + -+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc) -+{ -+ kref_put(&alloc->kref, kbase_mem_kref_free); -+ return NULL; -+} ++#include ++#include "mali_kbase_hwaccess_jm.h" ++#include "mali_kbase_jm.h" + +/** -+ * A GPU memory region, and attributes for CPU mappings. ++ * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot ++ * @js on the active context. ++ * @kbdev: Device pointer ++ * @js: Job slot to run on ++ * @nr_jobs_to_submit: Number of jobs to attempt to submit ++ * ++ * Return: true if slot can still be submitted on, false if slot is now full. + */ -+struct kbase_va_region { -+ struct rb_node rblink; -+ struct list_head link; -+ -+ struct kbase_context *kctx; /* Backlink to base context */ -+ -+ u64 start_pfn; /* The PFN in GPU space */ -+ size_t nr_pages; -+ -+/* Free region */ -+#define KBASE_REG_FREE (1ul << 0) -+/* CPU write access */ -+#define KBASE_REG_CPU_WR (1ul << 1) -+/* GPU write access */ -+#define KBASE_REG_GPU_WR (1ul << 2) -+/* No eXecute flag */ -+#define KBASE_REG_GPU_NX (1ul << 3) -+/* Is CPU cached? */ -+#define KBASE_REG_CPU_CACHED (1ul << 4) -+/* Is GPU cached? */ -+#define KBASE_REG_GPU_CACHED (1ul << 5) -+ -+#define KBASE_REG_GROWABLE (1ul << 6) -+/* Can grow on pf? */ -+#define KBASE_REG_PF_GROW (1ul << 7) -+ -+/* VA managed by us */ -+#define KBASE_REG_CUSTOM_VA (1ul << 8) -+ -+/* inner shareable coherency */ -+#define KBASE_REG_SHARE_IN (1ul << 9) -+/* inner & outer shareable coherency */ -+#define KBASE_REG_SHARE_BOTH (1ul << 10) -+ -+/* Space for 4 different zones */ -+#define KBASE_REG_ZONE_MASK (3ul << 11) -+#define KBASE_REG_ZONE(x) (((x) & 3) << 11) -+ -+/* GPU read access */ -+#define KBASE_REG_GPU_RD (1ul<<13) -+/* CPU read access */ -+#define KBASE_REG_CPU_RD (1ul<<14) -+ -+/* Index of chosen MEMATTR for this region (0..7) */ -+#define KBASE_REG_MEMATTR_MASK (7ul << 16) -+#define KBASE_REG_MEMATTR_INDEX(x) (((x) & 7) << 16) -+#define KBASE_REG_MEMATTR_VALUE(x) (((x) & KBASE_REG_MEMATTR_MASK) >> 16) -+ -+#define KBASE_REG_SECURE (1ul << 19) -+ -+#define KBASE_REG_DONT_NEED (1ul << 20) ++static bool kbase_jm_next_job(struct kbase_device *kbdev, int js, ++ int nr_jobs_to_submit) ++{ ++ struct kbase_context *kctx; ++ int i; + -+/* Imported buffer is padded? */ -+#define KBASE_REG_IMPORT_PAD (1ul << 21) ++ kctx = kbdev->hwaccess.active_kctx; + -+#define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) ++ if (!kctx) ++ return true; + -+/* only used with 32-bit clients */ -+/* -+ * On a 32bit platform, custom VA should be wired from (4GB + shader region) -+ * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface -+ * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference). -+ * So we put the default limit to the maximum possible on Linux and shrink -+ * it down, if required by the GPU, during initialization. -+ */ ++ for (i = 0; i < nr_jobs_to_submit; i++) { ++ struct kbase_jd_atom *katom = kbase_js_pull(kctx, js); + -+/* -+ * Dedicated 16MB region for shader code: -+ * VA range 0x101000000-0x102000000 -+ */ -+#define KBASE_REG_ZONE_EXEC KBASE_REG_ZONE(1) -+#define KBASE_REG_ZONE_EXEC_BASE (0x101000000ULL >> PAGE_SHIFT) -+#define KBASE_REG_ZONE_EXEC_SIZE ((16ULL * 1024 * 1024) >> PAGE_SHIFT) ++ if (!katom) ++ return true; /* Context has no jobs on this slot */ + -+#define KBASE_REG_ZONE_CUSTOM_VA KBASE_REG_ZONE(2) -+#define KBASE_REG_ZONE_CUSTOM_VA_BASE (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) /* Starting after KBASE_REG_ZONE_EXEC */ -+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) -+/* end 32-bit clients only */ ++ kbase_backend_run_atom(kbdev, katom); ++ } + -+ unsigned long flags; ++ return false; /* Slot ringbuffer should now be full */ ++} + -+ size_t extent; /* nr of pages alloc'd on PF */ ++u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask) ++{ ++ u32 ret_mask = 0; + -+ struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */ -+ struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* non-NULL if this memory object is a kds_resource */ -+ struct kds_resource *kds_res; ++ while (js_mask) { ++ int js = ffs(js_mask) - 1; ++ int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js); + -+ /* List head used to store the region in the JIT allocation pool */ -+ struct list_head jit_node; -+}; ++ if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit)) ++ ret_mask |= (1 << js); + -+/* Common functions */ -+static inline phys_addr_t *kbase_get_cpu_phy_pages(struct kbase_va_region *reg) -+{ -+ KBASE_DEBUG_ASSERT(reg); -+ KBASE_DEBUG_ASSERT(reg->cpu_alloc); -+ KBASE_DEBUG_ASSERT(reg->gpu_alloc); -+ KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); ++ js_mask &= ~(1 << js); ++ } + -+ return reg->cpu_alloc->pages; ++ return ret_mask; +} + -+static inline phys_addr_t *kbase_get_gpu_phy_pages(struct kbase_va_region *reg) ++void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask) +{ -+ KBASE_DEBUG_ASSERT(reg); -+ KBASE_DEBUG_ASSERT(reg->cpu_alloc); -+ KBASE_DEBUG_ASSERT(reg->gpu_alloc); -+ KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + -+ return reg->gpu_alloc->pages; ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ if (!down_trylock(&js_devdata->schedule_sem)) { ++ kbase_jm_kick(kbdev, js_mask); ++ up(&js_devdata->schedule_sem); ++ } +} + -+static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg) ++void kbase_jm_try_kick_all(struct kbase_device *kbdev) +{ -+ KBASE_DEBUG_ASSERT(reg); -+ /* if no alloc object the backed size naturally is 0 */ -+ if (!reg->cpu_alloc) -+ return 0; ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + -+ KBASE_DEBUG_ASSERT(reg->cpu_alloc); -+ KBASE_DEBUG_ASSERT(reg->gpu_alloc); -+ KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ return reg->cpu_alloc->nents; ++ if (!down_trylock(&js_devdata->schedule_sem)) { ++ kbase_jm_kick_all(kbdev); ++ up(&js_devdata->schedule_sem); ++ } +} + -+#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */ -+ -+static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type) ++void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) +{ -+ struct kbase_mem_phy_alloc *alloc; -+ size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages; -+ size_t per_page_size = sizeof(*alloc->pages); -+ -+ /* Imported pages may have page private data already in use */ -+ if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { -+ alloc_size += nr_pages * -+ sizeof(*alloc->imported.user_buf.dma_addrs); -+ per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs); -+ } -+ -+ /* -+ * Prevent nr_pages*per_page_size + sizeof(*alloc) from -+ * wrapping around. -+ */ -+ if (nr_pages > ((((size_t) -1) - sizeof(*alloc)) -+ / per_page_size)) -+ return ERR_PTR(-ENOMEM); -+ -+ /* Allocate based on the size to reduce internal fragmentation of vmem */ -+ if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD) -+ alloc = vzalloc(alloc_size); -+ else -+ alloc = kzalloc(alloc_size, GFP_KERNEL); -+ -+ if (!alloc) -+ return ERR_PTR(-ENOMEM); -+ -+ /* Store allocation method */ -+ if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD) -+ alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE; -+ -+ kref_init(&alloc->kref); -+ atomic_set(&alloc->gpu_mappings, 0); -+ alloc->nents = 0; -+ alloc->pages = (void *)(alloc + 1); -+ INIT_LIST_HEAD(&alloc->mappings); -+ alloc->type = type; -+ -+ if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) -+ alloc->imported.user_buf.dma_addrs = -+ (void *) (alloc->pages + nr_pages); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ return alloc; ++ if (kbdev->hwaccess.active_kctx == kctx) ++ kbdev->hwaccess.active_kctx = NULL; +} + -+static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, -+ struct kbase_context *kctx) ++struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom) +{ -+ KBASE_DEBUG_ASSERT(reg); -+ KBASE_DEBUG_ASSERT(!reg->cpu_alloc); -+ KBASE_DEBUG_ASSERT(!reg->gpu_alloc); -+ KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ reg->cpu_alloc = kbase_alloc_create(reg->nr_pages, -+ KBASE_MEM_TYPE_NATIVE); -+ if (IS_ERR(reg->cpu_alloc)) -+ return PTR_ERR(reg->cpu_alloc); -+ else if (!reg->cpu_alloc) -+ return -ENOMEM; -+ reg->cpu_alloc->imported.kctx = kctx; -+ INIT_LIST_HEAD(®->cpu_alloc->evict_node); -+ if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE) -+ && (reg->flags & KBASE_REG_CPU_CACHED)) { -+ reg->gpu_alloc = kbase_alloc_create(reg->nr_pages, -+ KBASE_MEM_TYPE_NATIVE); -+ reg->gpu_alloc->imported.kctx = kctx; -+ INIT_LIST_HEAD(®->gpu_alloc->evict_node); ++ if (katom->event_code != BASE_JD_EVENT_STOPPED && ++ katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) { ++ return kbase_js_complete_atom(katom, NULL); + } else { -+ reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); ++ kbase_js_unpull(katom->kctx, katom); ++ return NULL; + } -+ -+ INIT_LIST_HEAD(®->jit_node); -+ reg->flags &= ~KBASE_REG_FREE; -+ return 0; +} + -+static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages) ++struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom, ktime_t *end_timestamp) +{ -+ int new_val = atomic_add_return(num_pages, used_pages); -+#if defined(CONFIG_MALI_GATOR_SUPPORT) -+ kbase_trace_mali_total_alloc_pages_change((long long int)new_val); -+#endif -+ return new_val; -+} ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+static inline int kbase_atomic_sub_pages(int num_pages, atomic_t *used_pages) -+{ -+ int new_val = atomic_sub_return(num_pages, used_pages); -+#if defined(CONFIG_MALI_GATOR_SUPPORT) -+ kbase_trace_mali_total_alloc_pages_change((long long int)new_val); -+#endif -+ return new_val; ++ return kbase_js_complete_atom(katom, end_timestamp); +} + +diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.h b/drivers/gpu/arm/midgard/mali_kbase_jm.h +new file mode 100644 +index 000000000..a74ee24c8 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_jm.h +@@ -0,0 +1,110 @@ +/* -+ * Max size for kbdev memory pool (in pages) -+ */ -+#define KBASE_MEM_POOL_MAX_SIZE_KBDEV (SZ_64M >> PAGE_SHIFT) -+ -+/* -+ * Max size for kctx memory pool (in pages) -+ */ -+#define KBASE_MEM_POOL_MAX_SIZE_KCTX (SZ_64M >> PAGE_SHIFT) -+ -+/** -+ * kbase_mem_pool_init - Create a memory pool for a kbase device -+ * @pool: Memory pool to initialize -+ * @max_size: Maximum number of free pages the pool can hold -+ * @kbdev: Kbase device where memory is used -+ * @next_pool: Pointer to the next pool or NULL. + * -+ * Allocations from @pool are in whole pages. Each @pool has a free list where -+ * pages can be quickly allocated from. The free list is initially empty and -+ * filled whenever pages are freed back to the pool. The number of free pages -+ * in the pool will in general not exceed @max_size, but the pool may in -+ * certain corner cases grow above @max_size. ++ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved. + * -+ * If @next_pool is not NULL, we will allocate from @next_pool before going to -+ * the kernel allocator. Similarily pages can spill over to @next_pool when -+ * @pool is full. Pages are zeroed before they spill over to another pool, to -+ * prevent leaking information between applications. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. + * -+ * A shrinker is registered so that Linux mm can reclaim pages from the pool as -+ * needed. ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * Return: 0 on success, negative -errno on error + */ -+int kbase_mem_pool_init(struct kbase_mem_pool *pool, -+ size_t max_size, -+ struct kbase_device *kbdev, -+ struct kbase_mem_pool *next_pool); + -+/** -+ * kbase_mem_pool_term - Destroy a memory pool -+ * @pool: Memory pool to destroy -+ * -+ * Pages in the pool will spill over to @next_pool (if available) or freed to -+ * the kernel. ++ ++ ++ ++/* ++ * Job manager common APIs + */ -+void kbase_mem_pool_term(struct kbase_mem_pool *pool); ++ ++#ifndef _KBASE_JM_H_ ++#define _KBASE_JM_H_ + +/** -+ * kbase_mem_pool_alloc - Allocate a page from memory pool -+ * @pool: Memory pool to allocate from ++ * kbase_jm_kick() - Indicate that there are jobs ready to run. ++ * @kbdev: Device pointer ++ * @js_mask: Mask of the job slots that can be pulled from. + * -+ * Allocations from the pool are made as follows: -+ * 1. If there are free pages in the pool, allocate a page from @pool. -+ * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page -+ * from @next_pool. -+ * 3. Return NULL if no memory in the pool ++ * Caller must hold the hwaccess_lock and schedule_sem semaphore + * -+ * Return: Pointer to allocated page, or NULL if allocation failed. ++ * Return: Mask of the job slots that can still be submitted to. + */ -+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool); ++u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask); + +/** -+ * kbase_mem_pool_free - Free a page to memory pool -+ * @pool: Memory pool where page should be freed -+ * @page: Page to free to the pool -+ * @dirty: Whether some of the page may be dirty in the cache. ++ * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job ++ * slots. ++ * @kbdev: Device pointer + * -+ * Pages are freed to the pool as follows: -+ * 1. If @pool is not full, add @page to @pool. -+ * 2. Otherwise, if @next_pool is not NULL and not full, add @page to -+ * @next_pool. -+ * 3. Finally, free @page to the kernel. ++ * Caller must hold the hwaccess_lock and schedule_sem semaphore ++ * ++ * Return: Mask of the job slots that can still be submitted to. + */ -+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page, -+ bool dirty); ++static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev) ++{ ++ return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); ++} + +/** -+ * kbase_mem_pool_alloc_pages - Allocate pages from memory pool -+ * @pool: Memory pool to allocate from -+ * @nr_pages: Number of pages to allocate -+ * @pages: Pointer to array where the physical address of the allocated -+ * pages will be stored. -+ * -+ * Like kbase_mem_pool_alloc() but optimized for allocating many pages. ++ * kbase_jm_try_kick - Attempt to call kbase_jm_kick ++ * @kbdev: Device pointer ++ * @js_mask: Mask of the job slots that can be pulled from ++ * Context: Caller must hold hwaccess_lock + * -+ * Return: 0 on success, negative -errno on error ++ * If schedule_sem can be immediately obtained then this function will call ++ * kbase_jm_kick() otherwise it will do nothing. + */ -+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages, -+ phys_addr_t *pages); ++void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask); + +/** -+ * kbase_mem_pool_free_pages - Free pages to memory pool -+ * @pool: Memory pool where pages should be freed -+ * @nr_pages: Number of pages to free -+ * @pages: Pointer to array holding the physical addresses of the pages to -+ * free. -+ * @dirty: Whether any pages may be dirty in the cache. -+ * @reclaimed: Whether the pages where reclaimable and thus should bypass -+ * the pool and go straight to the kernel. ++ * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all ++ * @kbdev: Device pointer ++ * Context: Caller must hold hwaccess_lock + * -+ * Like kbase_mem_pool_free() but optimized for freeing many pages. ++ * If schedule_sem can be immediately obtained then this function will call ++ * kbase_jm_kick_all() otherwise it will do nothing. + */ -+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, -+ phys_addr_t *pages, bool dirty, bool reclaimed); ++void kbase_jm_try_kick_all(struct kbase_device *kbdev); + +/** -+ * kbase_mem_pool_size - Get number of free pages in memory pool -+ * @pool: Memory pool to inspect ++ * kbase_jm_idle_ctx() - Mark a context as idle. ++ * @kbdev: Device pointer ++ * @kctx: Context to mark as idle + * -+ * Note: the size of the pool may in certain corner cases exceed @max_size! ++ * No more atoms will be pulled from this context until it is marked as active ++ * by kbase_js_use_ctx(). + * -+ * Return: Number of free pages in the pool ++ * The context should have no atoms currently pulled from it ++ * (kctx->atoms_pulled == 0). ++ * ++ * Caller must hold the hwaccess_lock + */ -+static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool) -+{ -+ return READ_ONCE(pool->cur_size); -+} ++void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + +/** -+ * kbase_mem_pool_max_size - Get maximum number of free pages in memory pool -+ * @pool: Memory pool to inspect ++ * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has ++ * been soft-stopped or will fail due to a ++ * dependency ++ * @kbdev: Device pointer ++ * @katom: Atom that has been stopped or will be failed + * -+ * Return: Maximum number of free pages in the pool ++ * Return: Atom that has now been unblocked and can now be run, or NULL if none + */ -+static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool) -+{ -+ return pool->max_size; -+} -+ ++struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom); + +/** -+ * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool -+ * @pool: Memory pool to inspect -+ * @max_size: Maximum number of free pages the pool can hold ++ * kbase_jm_complete() - Complete an atom ++ * @kbdev: Device pointer ++ * @katom: Atom that has completed ++ * @end_timestamp: Timestamp of atom completion + * -+ * If @max_size is reduced, the pool will be shrunk to adhere to the new limit. -+ * For details see kbase_mem_pool_shrink(). ++ * Return: Atom that has now been unblocked and can now be run, or NULL if none + */ -+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size); ++struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom, ktime_t *end_timestamp); + -+/** -+ * kbase_mem_pool_grow - Grow the pool -+ * @pool: Memory pool to grow -+ * @nr_to_grow: Number of pages to add to the pool ++#endif /* _KBASE_JM_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c +new file mode 100644 +index 000000000..10a1d5909 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_js.c +@@ -0,0 +1,2834 @@ ++/* + * -+ * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to -+ * become larger than the maximum size specified. ++ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages + */ -+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow); + -+/** -+ * kbase_mem_pool_trim - Grow or shrink the pool to a new size -+ * @pool: Memory pool to trim -+ * @new_size: New number of pages in the pool -+ * -+ * If @new_size > @cur_size, fill the pool with new pages from the kernel, but -+ * not above the max_size for the pool. -+ * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel. ++/* #define ENABLE_DEBUG_LOG */ ++#include "./platform/rk/custom_log.h" ++ ++/* ++ * Job Scheduler Implementation + */ -+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size); ++#include ++#include ++#if defined(CONFIG_MALI_GATOR_SUPPORT) ++#include ++#endif ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include "mali_kbase_jm.h" ++#include "mali_kbase_hwaccess_jm.h" + +/* -+ * kbase_mem_alloc_page - Allocate a new page for a device -+ * @kbdev: The kbase device -+ * -+ * Most uses should use kbase_mem_pool_alloc to allocate a page. However that -+ * function can fail in the event the pool is empty. -+ * -+ * Return: A new page or NULL if no memory ++ * Private types + */ -+struct page *kbase_mem_alloc_page(struct kbase_device *kbdev); + -+int kbase_region_tracker_init(struct kbase_context *kctx); -+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages); -+void kbase_region_tracker_term(struct kbase_context *kctx); ++/* Bitpattern indicating the result of releasing a context */ ++enum { ++ /* The context was descheduled - caller should try scheduling in a new ++ * one to keep the runpool full */ ++ KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0), ++ /* Ctx attributes were changed - caller should try scheduling all ++ * contexts */ ++ KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1) ++}; + -+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr); ++typedef u32 kbasep_js_release_result; + -+/** -+ * @brief Check that a pointer is actually a valid region. -+ * -+ * Must be called with context lock held. ++const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = { ++ KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */ ++ KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */ ++ KBASE_JS_ATOM_SCHED_PRIO_LOW /* BASE_JD_PRIO_LOW */ ++}; ++ ++const base_jd_prio ++kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = { ++ BASE_JD_PRIO_HIGH, /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */ ++ BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */ ++ BASE_JD_PRIO_LOW /* KBASE_JS_ATOM_SCHED_PRIO_LOW */ ++}; ++ ++ ++/* ++ * Private function prototypes + */ -+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr); ++static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( ++ struct kbase_device *kbdev, struct kbase_context *kctx, ++ struct kbasep_js_atom_retained_state *katom_retained_state); + -+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone); -+void kbase_free_alloced_region(struct kbase_va_region *reg); -+int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); ++static int kbase_js_get_slot(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom); + -+bool kbase_check_alloc_flags(unsigned long flags); -+bool kbase_check_import_flags(unsigned long flags); ++static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, ++ kbasep_js_ctx_job_cb callback); ++ ++/* Helper for trace subcodes */ ++#if KBASE_TRACE_ENABLE ++static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ ++ return atomic_read(&kctx->refcount); ++} ++#else /* KBASE_TRACE_ENABLE */ ++static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ ++ CSTD_UNUSED(kbdev); ++ CSTD_UNUSED(kctx); ++ return 0; ++} ++#endif /* KBASE_TRACE_ENABLE */ ++ ++/* ++ * Private functions ++ */ + +/** -+ * kbase_update_region_flags - Convert user space flags to kernel region flags -+ * -+ * @kctx: kbase context -+ * @reg: The region to update the flags on -+ * @flags: The flags passed from user space ++ * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements ++ * @features: JSn_FEATURE register value + * -+ * The user space flag BASE_MEM_COHERENT_SYSTEM_REQUIRED will be rejected and -+ * this function will fail if the system does not support system coherency. ++ * Given a JSn_FEATURE register value returns the core requirements that match + * -+ * Return: 0 if successful, -EINVAL if the flags are not supported ++ * Return: Core requirement bit mask + */ -+int kbase_update_region_flags(struct kbase_context *kctx, -+ struct kbase_va_region *reg, unsigned long flags); ++static base_jd_core_req core_reqs_from_jsn_features(u16 features) ++{ ++ base_jd_core_req core_req = 0u; + -+void kbase_gpu_vm_lock(struct kbase_context *kctx); -+void kbase_gpu_vm_unlock(struct kbase_context *kctx); ++ if ((features & JS_FEATURE_SET_VALUE_JOB) != 0) ++ core_req |= BASE_JD_REQ_V; + -+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size); ++ if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0) ++ core_req |= BASE_JD_REQ_CF; + -+int kbase_mmu_init(struct kbase_context *kctx); -+void kbase_mmu_term(struct kbase_context *kctx); ++ if ((features & JS_FEATURE_COMPUTE_JOB) != 0) ++ core_req |= BASE_JD_REQ_CS; + -+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx); -+void kbase_mmu_free_pgd(struct kbase_context *kctx); -+int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, -+ phys_addr_t *phys, size_t nr, -+ unsigned long flags); -+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, -+ phys_addr_t *phys, size_t nr, -+ unsigned long flags); -+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, -+ phys_addr_t phys, size_t nr, -+ unsigned long flags); ++ if ((features & JS_FEATURE_TILER_JOB) != 0) ++ core_req |= BASE_JD_REQ_T; + -+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr); -+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags); ++ if ((features & JS_FEATURE_FRAGMENT_JOB) != 0) ++ core_req |= BASE_JD_REQ_FS; + -+/** -+ * @brief Register region and map it on the GPU. -+ * -+ * Call kbase_add_va_region() and map the region on the GPU. -+ */ -+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); ++ return core_req; ++} + -+/** -+ * @brief Remove the region from the GPU and unregister it. -+ * -+ * Must be called with context lock held. -+ */ -+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg); ++static void kbase_js_sync_timers(struct kbase_device *kbdev) ++{ ++ mutex_lock(&kbdev->js_data.runpool_mutex); ++ kbase_backend_ctx_count_changed(kbdev); ++ mutex_unlock(&kbdev->js_data.runpool_mutex); ++} + -+/** -+ * The caller has the following locking conditions: -+ * - It must hold kbase_device->mmu_hw_mutex -+ * - It must hold the hwaccess_lock -+ */ -+void kbase_mmu_update(struct kbase_context *kctx); ++/* Hold the mmu_hw_mutex and hwaccess_lock for this */ ++bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ ++ struct kbasep_js_device_data *js_devdata; ++ bool result = false; ++ int as_nr; + -+/** -+ * kbase_mmu_disable() - Disable the MMU for a previously active kbase context. -+ * @kctx: Kbase context -+ * -+ * Disable and perform the required cache maintenance to remove the all -+ * data from provided kbase context from the GPU caches. -+ * -+ * The caller has the following locking conditions: -+ * - It must hold kbase_device->mmu_hw_mutex -+ * - It must hold the hwaccess_lock -+ */ -+void kbase_mmu_disable(struct kbase_context *kctx); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ js_devdata = &kbdev->js_data; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ as_nr = kctx->as_nr; ++ if (atomic_read(&kctx->refcount) > 0) { ++ KBASE_DEBUG_ASSERT(as_nr >= 0); ++ ++ kbase_ctx_sched_retain_ctx_refcount(kctx); ++ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx, ++ NULL, 0u, atomic_read(&kctx->refcount)); ++ result = true; ++ } ++ ++ return result; ++} + +/** -+ * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified -+ * address space. -+ * @kbdev: Kbase device -+ * @as_nr: The address space number to set to unmapped. ++ * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms ++ * @kctx: Pointer to kbase context with ring buffer. ++ * @js: Job slot id to check. ++ * @prio: Priority to check. + * -+ * This function must only be called during reset/power-up and it used to -+ * ensure the registers are in a known state. ++ * Return true if there are no atoms to pull. There may be running atoms in the ++ * ring buffer even if there are no atoms to pull. It is also possible for the ++ * ring buffer to be full (with running atoms) when this functions returns ++ * true. + * -+ * The caller must hold kbdev->mmu_hw_mutex. ++ * Return: true if there are no atoms to pull, false otherwise. + */ -+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr); ++static inline bool ++jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) ++{ ++ struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + -+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+/** Dump the MMU tables to a buffer -+ * -+ * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the -+ * buffer is too small then the return value will be NULL. -+ * -+ * The GPU vm lock must be held when calling this function. -+ * -+ * The buffer returned should be freed with @ref vfree when it is no longer required. -+ * -+ * @param[in] kctx The kbase context to dump -+ * @param[in] nr_pages The number of pages to allocate for the buffer. -+ * -+ * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too -+ * small) -+ */ -+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages); ++ return RB_EMPTY_ROOT(&rb->runnable_tree); ++} + +/** -+ * kbase_sync_now - Perform cache maintenance on a memory region ++ * jsctx_rb_none_to_pull(): - Check if all priority ring buffers have no ++ * pullable atoms ++ * @kctx: Pointer to kbase context with ring buffer. ++ * @js: Job slot id to check. + * -+ * @kctx: The kbase context of the region -+ * @sset: A syncset structure describing the region and direction of the -+ * synchronisation required ++ * Caller must hold hwaccess_lock + * -+ * Return: 0 on success or error code ++ * Return: true if the ring buffers for all priorities have no pullable atoms, ++ * false otherwise. + */ -+int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset); -+void kbase_sync_single(struct kbase_context *kctx, phys_addr_t cpu_pa, -+ phys_addr_t gpu_pa, off_t offset, size_t size, -+ enum kbase_sync_type sync_fn); -+void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr); -+void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr); ++static inline bool ++jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) ++{ ++ int prio; + -+/* OS specific functions */ -+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr); -+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg); -+void kbase_os_mem_map_lock(struct kbase_context *kctx); -+void kbase_os_mem_map_unlock(struct kbase_context *kctx); ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ ++ for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { ++ if (!jsctx_rb_none_to_pull_prio(kctx, js, prio)) ++ return false; ++ } ++ ++ return true; ++} + +/** -+ * @brief Update the memory allocation counters for the current process ++ * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue. ++ * @kctx: Pointer to kbase context with the queue. ++ * @js: Job slot id to iterate. ++ * @prio: Priority id to iterate. ++ * @callback: Function pointer to callback. + * -+ * OS specific call to updates the current memory allocation counters for the current process with -+ * the supplied delta. ++ * Iterate over a queue and invoke @callback for each entry in the queue, and ++ * remove the entry from the queue. + * -+ * @param[in] kctx The kbase context -+ * @param[in] pages The desired delta to apply to the memory usage counters. ++ * If entries are added to the queue while this is running those entries may, or ++ * may not be covered. To ensure that all entries in the buffer have been ++ * enumerated when this function returns jsctx->lock must be held when calling ++ * this function. ++ * ++ * The HW access lock must always be held when calling this function. + */ ++static void ++jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, ++ kbasep_js_ctx_job_cb callback) ++{ ++ struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + -+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages); ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ ++ while (!RB_EMPTY_ROOT(&queue->runnable_tree)) { ++ struct rb_node *node = rb_first(&queue->runnable_tree); ++ struct kbase_jd_atom *entry = rb_entry(node, ++ struct kbase_jd_atom, runnable_tree_node); ++ ++ rb_erase(node, &queue->runnable_tree); ++ callback(kctx->kbdev, entry); ++ } ++ ++ while (!list_empty(&queue->x_dep_head)) { ++ struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next, ++ struct kbase_jd_atom, queue); ++ ++ list_del(queue->x_dep_head.next); ++ ++ callback(kctx->kbdev, entry); ++ } ++} + +/** -+ * @brief Add to the memory allocation counters for the current process -+ * -+ * OS specific call to add to the current memory allocation counters for the current process by -+ * the supplied amount. ++ * jsctx_queue_foreach(): - Execute callback for each entry in every queue ++ * @kctx: Pointer to kbase context with queue. ++ * @js: Job slot id to iterate. ++ * @callback: Function pointer to callback. + * -+ * @param[in] kctx The kernel base context used for the allocation. -+ * @param[in] pages The desired delta to apply to the memory usage counters. ++ * Iterate over all the different priorities, and for each call ++ * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback ++ * for each entry, and remove the entry from the queue. + */ -+ -+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages) ++static inline void ++jsctx_queue_foreach(struct kbase_context *kctx, int js, ++ kbasep_js_ctx_job_cb callback) +{ -+ kbasep_os_process_page_usage_update(kctx, pages); ++ int prio; ++ ++ for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) ++ jsctx_queue_foreach_prio(kctx, js, prio, callback); +} + +/** -+ * @brief Subtract from the memory allocation counters for the current process ++ * jsctx_rb_peek_prio(): - Check buffer and get next atom ++ * @kctx: Pointer to kbase context with ring buffer. ++ * @js: Job slot id to check. ++ * @prio: Priority id to check. + * -+ * OS specific call to subtract from the current memory allocation counters for the current process by -+ * the supplied amount. ++ * Check the ring buffer for the specified @js and @prio and return a pointer to ++ * the next atom, unless the ring buffer is empty. + * -+ * @param[in] kctx The kernel base context used for the allocation. -+ * @param[in] pages The desired delta to apply to the memory usage counters. ++ * Return: Pointer to next atom in buffer, or NULL if there is no atom. + */ -+ -+static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages) ++static inline struct kbase_jd_atom * ++jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) +{ -+ kbasep_os_process_page_usage_update(kctx, 0 - pages); ++ struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; ++ struct rb_node *node; ++ ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ ++ node = rb_first(&rb->runnable_tree); ++ if (!node) ++ return NULL; ++ ++ return rb_entry(node, struct kbase_jd_atom, runnable_tree_node); +} + +/** -+ * kbasep_find_enclosing_cpu_mapping_offset() - Find the offset of the CPU -+ * mapping of a memory allocation containing a given address range ++ * jsctx_rb_peek(): - Check all priority buffers and get next atom ++ * @kctx: Pointer to kbase context with ring buffer. ++ * @js: Job slot id to check. + * -+ * Searches for a CPU mapping of any part of any region that fully encloses the -+ * CPU virtual address range specified by @uaddr and @size. Returns a failure -+ * indication if only part of the address range lies within a CPU mapping. ++ * Check the ring buffers for all priorities, starting from ++ * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a ++ * pointer to the next atom, unless all the priority's ring buffers are empty. + * -+ * @kctx: The kernel base context used for the allocation. -+ * @uaddr: Start of the CPU virtual address range. -+ * @size: Size of the CPU virtual address range (in bytes). -+ * @offset: The offset from the start of the allocation to the specified CPU -+ * virtual address. ++ * Caller must hold the hwaccess_lock. + * -+ * Return: 0 if offset was obtained successfully. Error code otherwise. ++ * Return: Pointer to next atom in buffer, or NULL if there is no atom. + */ -+int kbasep_find_enclosing_cpu_mapping_offset( -+ struct kbase_context *kctx, -+ unsigned long uaddr, size_t size, u64 *offset); -+ -+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer); -+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom); -+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom); ++static inline struct kbase_jd_atom * ++jsctx_rb_peek(struct kbase_context *kctx, int js) ++{ ++ int prio; + -+/** -+* @brief Allocates physical pages. -+* -+* Allocates \a nr_pages_requested and updates the alloc object. -+* -+* @param[in] alloc allocation object to add pages to -+* @param[in] nr_pages_requested number of physical pages to allocate -+* -+* @return 0 if all pages have been successfully allocated. Error code otherwise -+*/ -+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested); ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+/** -+* @brief Free physical pages. -+* -+* Frees \a nr_pages and updates the alloc object. -+* -+* @param[in] alloc allocation object to free pages from -+* @param[in] nr_pages_to_free number of physical pages to free -+*/ -+int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free); ++ for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { ++ struct kbase_jd_atom *katom; + -+static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr) -+{ -+ SetPagePrivate(p); -+ if (sizeof(dma_addr_t) > sizeof(p->private)) { -+ /* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the -+ * private field stays the same. So we have to be clever and -+ * use the fact that we only store DMA addresses of whole pages, -+ * so the low bits should be zero */ -+ KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1))); -+ set_page_private(p, dma_addr >> PAGE_SHIFT); -+ } else { -+ set_page_private(p, dma_addr); ++ katom = jsctx_rb_peek_prio(kctx, js, prio); ++ if (katom) ++ return katom; + } ++ ++ return NULL; +} + -+static inline dma_addr_t kbase_dma_addr(struct page *p) ++/** ++ * jsctx_rb_pull(): - Mark atom in list as running ++ * @kctx: Pointer to kbase context with ring buffer. ++ * @katom: Pointer to katom to pull. ++ * ++ * Mark an atom previously obtained from jsctx_rb_peek() as running. ++ * ++ * @katom must currently be at the head of the ring buffer. ++ */ ++static inline void ++jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ -+ if (sizeof(dma_addr_t) > sizeof(p->private)) -+ return ((dma_addr_t)page_private(p)) << PAGE_SHIFT; ++ int prio = katom->sched_priority; ++ int js = katom->slot_nr; ++ struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + -+ return (dma_addr_t)page_private(p); -+} ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+static inline void kbase_clear_dma_addr(struct page *p) -+{ -+ ClearPagePrivate(p); ++ /* Atoms must be pulled in the correct order. */ ++ WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio)); ++ ++ rb_erase(&katom->runnable_tree_node, &rb->runnable_tree); +} + -+/** -+* @brief Process a bus or page fault. -+* -+* This function will process a fault on a specific address space -+* -+* @param[in] kbdev The @ref kbase_device the fault happened on -+* @param[in] kctx The @ref kbase_context for the faulting address space if -+* one was found. -+* @param[in] as The address space that has the fault -+*/ -+void kbase_mmu_interrupt_process(struct kbase_device *kbdev, -+ struct kbase_context *kctx, struct kbase_as *as); -+ -+/** -+ * @brief Process a page fault. -+ * -+ * @param[in] data work_struct passed by queue_work() -+ */ -+void page_fault_worker(struct work_struct *data); -+ -+/** -+ * @brief Process a bus fault. -+ * -+ * @param[in] data work_struct passed by queue_work() -+ */ -+void bus_fault_worker(struct work_struct *data); -+ -+/** -+ * @brief Flush MMU workqueues. -+ * -+ * This function will cause any outstanding page or bus faults to be processed. -+ * It should be called prior to powering off the GPU. -+ * -+ * @param[in] kbdev Device pointer -+ */ -+void kbase_flush_mmu_wqs(struct kbase_device *kbdev); -+ -+/** -+ * kbase_sync_single_for_device - update physical memory and give GPU ownership -+ * @kbdev: Device pointer -+ * @handle: DMA address of region -+ * @size: Size of region to sync -+ * @dir: DMA data direction -+ */ ++#define LESS_THAN_WRAP(a, b) ((s32)(a - b) < 0) + -+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, -+ size_t size, enum dma_data_direction dir); ++static void ++jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) ++{ ++ int prio = katom->sched_priority; ++ int js = katom->slot_nr; ++ struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; ++ struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL; + -+/** -+ * kbase_sync_single_for_cpu - update physical memory and give CPU ownership -+ * @kbdev: Device pointer -+ * @handle: DMA address of region -+ * @size: Size of region to sync -+ * @dir: DMA data direction -+ */ ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, -+ size_t size, enum dma_data_direction dir); ++ while (*new) { ++ struct kbase_jd_atom *entry = container_of(*new, ++ struct kbase_jd_atom, runnable_tree_node); + -+#ifdef CONFIG_DEBUG_FS -+/** -+ * kbase_jit_debugfs_init - Add per context debugfs entry for JIT. -+ * @kctx: kbase context -+ */ -+void kbase_jit_debugfs_init(struct kbase_context *kctx); -+#endif /* CONFIG_DEBUG_FS */ ++ parent = *new; ++ if (LESS_THAN_WRAP(katom->age, entry->age)) ++ new = &((*new)->rb_left); ++ else ++ new = &((*new)->rb_right); ++ } + -+/** -+ * kbase_jit_init - Initialize the JIT memory pool management -+ * @kctx: kbase context -+ * -+ * Returns zero on success or negative error number on failure. -+ */ -+int kbase_jit_init(struct kbase_context *kctx); ++ /* Add new node and rebalance tree. */ ++ rb_link_node(&katom->runnable_tree_node, parent, new); ++ rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree); ++} + +/** -+ * kbase_jit_allocate - Allocate JIT memory -+ * @kctx: kbase context -+ * @info: JIT allocation information ++ * jsctx_rb_unpull(): - Undo marking of atom in list as running ++ * @kctx: Pointer to kbase context with ring buffer. ++ * @katom: Pointer to katom to unpull. + * -+ * Return: JIT allocation on success or NULL on failure. -+ */ -+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, -+ struct base_jit_alloc_info *info); -+ -+/** -+ * kbase_jit_free - Free a JIT allocation -+ * @kctx: kbase context -+ * @reg: JIT allocation ++ * Undo jsctx_rb_pull() and put @katom back in the queue. + * -+ * Frees a JIT allocation and places it into the free pool for later reuse. ++ * jsctx_rb_unpull() must be called on atoms in the same order the atoms were ++ * pulled. + */ -+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg); ++static inline void ++jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) ++{ ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+/** -+ * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing -+ * @reg: JIT allocation -+ */ -+void kbase_jit_backing_lost(struct kbase_va_region *reg); ++ jsctx_tree_add(kctx, katom); ++} + -+/** -+ * kbase_jit_evict - Evict a JIT allocation from the pool -+ * @kctx: kbase context -+ * -+ * Evict the least recently used JIT allocation from the pool. This can be -+ * required if normal VA allocations are failing due to VA exhaustion. -+ * -+ * Return: True if a JIT allocation was freed, false otherwise. -+ */ -+bool kbase_jit_evict(struct kbase_context *kctx); ++static bool kbase_js_ctx_pullable(struct kbase_context *kctx, ++ int js, ++ bool is_scheduled); ++static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ int js); ++static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ int js); + -+/** -+ * kbase_jit_term - Terminate the JIT memory pool management -+ * @kctx: kbase context ++/* ++ * Functions private to KBase ('Protected' functions) + */ -+void kbase_jit_term(struct kbase_context *kctx); ++int kbasep_js_devdata_init(struct kbase_device * const kbdev) ++{ ++ struct kbasep_js_device_data *jsdd; ++ int i; + -+/** -+ * kbase_map_external_resource - Map an external resource to the GPU. -+ * @kctx: kbase context. -+ * @reg: The region to map. -+ * @locked_mm: The mm_struct which has been locked for this operation. -+ * @kds_res_count: The number of KDS resources. -+ * @kds_resources: Array of KDS resources. -+ * @kds_access_bitmap: Access bitmap for KDS. -+ * @exclusive: If the KDS resource requires exclusive access. -+ * -+ * Return: The physical allocation which backs the region on success or NULL -+ * on failure. -+ */ -+struct kbase_mem_phy_alloc *kbase_map_external_resource( -+ struct kbase_context *kctx, struct kbase_va_region *reg, -+ struct mm_struct *locked_mm -+#ifdef CONFIG_KDS -+ , u32 *kds_res_count, struct kds_resource **kds_resources, -+ unsigned long *kds_access_bitmap, bool exclusive -+#endif -+ ); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+/** -+ * kbase_unmap_external_resource - Unmap an external resource from the GPU. -+ * @kctx: kbase context. -+ * @reg: The region to unmap or NULL if it has already been released. -+ * @alloc: The physical allocation being unmapped. -+ */ -+void kbase_unmap_external_resource(struct kbase_context *kctx, -+ struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc); ++ jsdd = &kbdev->js_data; + -+/** -+ * kbase_sticky_resource_init - Initialize sticky resource management. -+ * @kctx: kbase context -+ * -+ * Returns zero on success or negative error number on failure. -+ */ -+int kbase_sticky_resource_init(struct kbase_context *kctx); ++#ifdef CONFIG_MALI_DEBUG ++ /* Soft-stop will be disabled on a single context by default unless ++ * softstop_always is set */ ++ jsdd->softstop_always = false; ++#endif /* CONFIG_MALI_DEBUG */ ++ jsdd->nr_all_contexts_running = 0; ++ jsdd->nr_user_contexts_running = 0; ++ jsdd->nr_contexts_pullable = 0; ++ atomic_set(&jsdd->nr_contexts_runnable, 0); ++ /* No ctx allowed to submit */ ++ jsdd->runpool_irq.submit_allowed = 0u; ++ memset(jsdd->runpool_irq.ctx_attr_ref_count, 0, ++ sizeof(jsdd->runpool_irq.ctx_attr_ref_count)); ++ memset(jsdd->runpool_irq.slot_affinities, 0, ++ sizeof(jsdd->runpool_irq.slot_affinities)); ++ memset(jsdd->runpool_irq.slot_affinity_refcount, 0, ++ sizeof(jsdd->runpool_irq.slot_affinity_refcount)); ++ INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list); + -+/** -+ * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource. -+ * @kctx: kbase context. -+ * @gpu_addr: The GPU address of the external resource. -+ * -+ * Return: The metadata object which represents the binding between the -+ * external resource and the kbase context on success or NULL on failure. -+ */ -+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( -+ struct kbase_context *kctx, u64 gpu_addr); ++ /* Config attributes */ ++ jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS; ++ jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS; ++ jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL; ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) ++ jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS_8408; ++ else ++ jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS; ++ jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL; ++ jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING; ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) ++ jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS_8408; ++ else ++ jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS; ++ jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL; ++ jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING; ++ jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS; ++ atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT); + -+/** -+ * kbase_sticky_resource_release - Release a reference on a sticky resource. -+ * @kctx: kbase context. -+ * @meta: Binding metadata. -+ * @gpu_addr: GPU address of the external resource. -+ * -+ * If meta is NULL then gpu_addr will be used to scan the metadata list and -+ * find the matching metadata (if any), otherwise the provided meta will be -+ * used and gpu_addr will be ignored. -+ * -+ * Return: True if the release found the metadata and the reference was dropped. -+ */ -+bool kbase_sticky_resource_release(struct kbase_context *kctx, -+ struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr); ++ dev_dbg(kbdev->dev, "JS Config Attribs: "); ++ dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u", ++ jsdd->scheduling_period_ns); ++ dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u", ++ jsdd->soft_stop_ticks); ++ dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u", ++ jsdd->soft_stop_ticks_cl); ++ dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u", ++ jsdd->hard_stop_ticks_ss); ++ dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u", ++ jsdd->hard_stop_ticks_cl); ++ dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u", ++ jsdd->hard_stop_ticks_dumping); ++ dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u", ++ jsdd->gpu_reset_ticks_ss); ++ dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u", ++ jsdd->gpu_reset_ticks_cl); ++ dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u", ++ jsdd->gpu_reset_ticks_dumping); ++ dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u", ++ jsdd->ctx_timeslice_ns); ++ dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i", ++ atomic_read(&jsdd->soft_job_timeout_ms)); + -+/** -+ * kbase_sticky_resource_term - Terminate sticky resource management. -+ * @kctx: kbase context -+ */ -+void kbase_sticky_resource_term(struct kbase_context *kctx); ++ if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss && ++ jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss && ++ jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping && ++ jsdd->hard_stop_ticks_dumping < ++ jsdd->gpu_reset_ticks_dumping)) { ++ dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n"); ++ return -EINVAL; ++ } + -+#endif /* _KBASE_MEM_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c -new file mode 100644 -index 000000000..de12cdf76 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c -@@ -0,0 +1,2574 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS ++ dev_dbg(kbdev->dev, "Job Scheduling Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.", ++ jsdd->soft_stop_ticks, ++ jsdd->scheduling_period_ns); ++#endif ++#if KBASE_DISABLE_SCHEDULING_HARD_STOPS ++ dev_dbg(kbdev->dev, "Job Scheduling Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.", ++ jsdd->hard_stop_ticks_ss, ++ jsdd->hard_stop_ticks_dumping, ++ jsdd->scheduling_period_ns); ++#endif ++#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS ++ dev_dbg(kbdev->dev, "Note: The JS tick timer (if coded) will still be run, but do nothing."); ++#endif + ++ for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) ++ jsdd->js_reqs[i] = core_reqs_from_jsn_features( ++ kbdev->gpu_props.props.raw_props.js_features[i]); + ++ /* On error, we could continue on: providing none of the below resources ++ * rely on the ones above */ + ++ mutex_init(&jsdd->runpool_mutex); ++ mutex_init(&jsdd->queue_mutex); ++ spin_lock_init(&kbdev->hwaccess_lock); ++ sema_init(&jsdd->schedule_sem, 1); + ++ for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) { ++ INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i]); ++ INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i]); ++ } + -+/** -+ * @file mali_kbase_mem_linux.c -+ * Base kernel memory APIs, Linux implementation. -+ */ ++ return 0; ++} + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \ -+ (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) -+#include -+#endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */ -+#ifdef CONFIG_DMA_SHARED_BUFFER -+#include -+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ -+#include -+#include -+#include ++void kbasep_js_devdata_halt(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++} + -+#include -+#include -+#include -+#include -+#include ++void kbasep_js_devdata_term(struct kbase_device *kbdev) ++{ ++ struct kbasep_js_device_data *js_devdata; ++ s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, }; + -+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+/** -+ * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation -+ * @kctx: Context the region belongs to -+ * @reg: The GPU region -+ * @new_pages: The number of pages after the shrink -+ * @old_pages: The number of pages before the shrink -+ * -+ * Shrink (or completely remove) all CPU mappings which reference the shrunk -+ * part of the allocation. -+ * -+ * Note: Caller must be holding the processes mmap_lock lock. -+ */ -+static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, -+ struct kbase_va_region *reg, -+ u64 new_pages, u64 old_pages); ++ js_devdata = &kbdev->js_data; + -+/** -+ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation -+ * @kctx: Context the region belongs to -+ * @reg: The GPU region or NULL if there isn't one -+ * @new_pages: The number of pages after the shrink -+ * @old_pages: The number of pages before the shrink -+ * -+ * Return: 0 on success, negative -errno on error -+ * -+ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region -+ * itself is unmodified as we still need to reserve the VA, only the page tables -+ * will be modified by this function. -+ */ -+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, -+ struct kbase_va_region *reg, -+ u64 new_pages, u64 old_pages); ++ /* The caller must de-register all contexts before calling this ++ */ ++ KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0); ++ KBASE_DEBUG_ASSERT(memcmp( ++ js_devdata->runpool_irq.ctx_attr_ref_count, ++ zero_ctx_attr_ref_count, ++ sizeof(zero_ctx_attr_ref_count)) == 0); ++ CSTD_UNUSED(zero_ctx_attr_ref_count); ++} + -+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, -+ u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, -+ u64 *gpu_va) ++int kbasep_js_kctx_init(struct kbase_context * const kctx) +{ -+ int zone; -+ int gpu_pc_bits; -+ int cpu_va_bits; -+ struct kbase_va_region *reg; -+ struct device *dev; ++ struct kbase_device *kbdev; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ int i, j; + -+ KBASE_DEBUG_ASSERT(kctx); -+ KBASE_DEBUG_ASSERT(flags); -+ KBASE_DEBUG_ASSERT(gpu_va); ++ KBASE_DEBUG_ASSERT(kctx != NULL); + -+ dev = kctx->kbdev->dev; -+ *gpu_va = 0; /* return 0 on failure */ ++ kbdev = kctx->kbdev; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; -+ cpu_va_bits = BITS_PER_LONG; ++ for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) ++ INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]); + -+ if (0 == va_pages) { -+ dev_warn(dev, "kbase_mem_alloc called with 0 va_pages!"); -+ goto bad_size; -+ } ++ js_kctx_info = &kctx->jctx.sched_info; + -+ if (va_pages > (U64_MAX / PAGE_SIZE)) -+ /* 64-bit address range is the max */ -+ goto bad_size; ++ js_kctx_info->ctx.nr_jobs = 0; ++ kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); ++ kbase_ctx_flag_clear(kctx, KCTX_DYING); ++ memset(js_kctx_info->ctx.ctx_attr_ref_count, 0, ++ sizeof(js_kctx_info->ctx.ctx_attr_ref_count)); + -+#if defined(CONFIG_64BIT) -+ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) -+ cpu_va_bits = 32; -+#endif ++ /* Initially, the context is disabled from submission until the create ++ * flags are set */ ++ kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED); + -+ if (!kbase_check_alloc_flags(*flags)) { -+ dev_warn(dev, -+ "kbase_mem_alloc called with bad flags (%llx)", -+ (unsigned long long)*flags); -+ goto bad_flags; -+ } ++ /* On error, we could continue on: providing none of the below resources ++ * rely on the ones above */ ++ mutex_init(&js_kctx_info->ctx.jsctx_mutex); + -+ if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && -+ !kbase_device_is_cpu_coherent(kctx->kbdev)) { -+ dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable"); -+ goto bad_flags; -+ } -+ if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && -+ !kbase_device_is_cpu_coherent(kctx->kbdev)) { -+ /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ -+ *flags &= ~BASE_MEM_COHERENT_SYSTEM; ++ init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait); ++ ++ for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { ++ for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) { ++ INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head); ++ kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT; ++ } + } + -+ /* Limit GPU executable allocs to GPU PC size */ -+ if ((*flags & BASE_MEM_PROT_GPU_EX) && -+ (va_pages > (1ULL << gpu_pc_bits >> PAGE_SHIFT))) -+ goto bad_ex_size; ++ return 0; ++} + -+ /* find out which VA zone to use */ -+ if (*flags & BASE_MEM_SAME_VA) -+ zone = KBASE_REG_ZONE_SAME_VA; -+ else if (*flags & BASE_MEM_PROT_GPU_EX) -+ zone = KBASE_REG_ZONE_EXEC; -+ else -+ zone = KBASE_REG_ZONE_CUSTOM_VA; ++void kbasep_js_kctx_term(struct kbase_context *kctx) ++{ ++ struct kbase_device *kbdev; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ int js; ++ bool update_ctx_count = false; + -+ reg = kbase_alloc_free_region(kctx, 0, va_pages, zone); -+ if (!reg) { -+ dev_err(dev, "Failed to allocate free region"); -+ goto no_region; -+ } ++ KBASE_DEBUG_ASSERT(kctx != NULL); + -+ if (kbase_update_region_flags(kctx, reg, *flags) != 0) -+ goto invalid_flags; ++ kbdev = kctx->kbdev; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ if (kbase_reg_prepare_native(reg, kctx) != 0) { -+ dev_err(dev, "Failed to prepare region"); -+ goto prepare_failed; -+ } ++ js_kctx_info = &kctx->jctx.sched_info; + -+ if (*flags & BASE_MEM_GROW_ON_GPF) -+ reg->extent = extent; -+ else -+ reg->extent = 0; ++ /* The caller must de-register all jobs before calling this */ ++ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0); + -+ if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) { -+ dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)", -+ (unsigned long long)commit_pages, -+ (unsigned long long)va_pages); -+ goto no_mem; -+ } ++ mutex_lock(&kbdev->js_data.queue_mutex); ++ mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + -+ kbase_gpu_vm_lock(kctx); ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) ++ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + -+ /* mmap needed to setup VA? */ -+ if (*flags & BASE_MEM_SAME_VA) { -+ unsigned long prot = PROT_NONE; -+ unsigned long va_size = va_pages << PAGE_SHIFT; -+ unsigned long va_map = va_size; -+ unsigned long cookie, cookie_nr; -+ unsigned long cpu_addr; ++ if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) { ++ WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0); ++ atomic_dec(&kbdev->js_data.nr_contexts_runnable); ++ update_ctx_count = true; ++ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); ++ } + -+ /* Bind to a cookie */ -+ if (!kctx->cookies) { -+ dev_err(dev, "No cookies available for allocation!"); -+ kbase_gpu_vm_unlock(kctx); -+ goto no_cookie; -+ } -+ /* return a cookie */ -+ cookie_nr = __ffs(kctx->cookies); -+ kctx->cookies &= ~(1UL << cookie_nr); -+ BUG_ON(kctx->pending_regions[cookie_nr]); -+ kctx->pending_regions[cookie_nr] = reg; ++ mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); ++ mutex_unlock(&kbdev->js_data.queue_mutex); + -+ kbase_gpu_vm_unlock(kctx); ++ if (update_ctx_count) { ++ mutex_lock(&kbdev->js_data.runpool_mutex); ++ kbase_backend_ctx_count_changed(kbdev); ++ mutex_unlock(&kbdev->js_data.runpool_mutex); ++ } ++} + -+ /* relocate to correct base */ -+ cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE); -+ cookie <<= PAGE_SHIFT; ++/** ++ * kbase_js_ctx_list_add_pullable_nolock - Variant of ++ * kbase_jd_ctx_list_add_pullable() ++ * where the caller must hold ++ * hwaccess_lock ++ * @kbdev: Device pointer ++ * @kctx: Context to add to queue ++ * @js: Job slot to use ++ * ++ * Caller must hold hwaccess_lock ++ * ++ * Return: true if caller should call kbase_backend_ctx_count_changed() ++ */ ++static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ int js) ++{ ++ bool ret = false; + -+ /* -+ * 10.1-10.4 UKU userland relies on the kernel to call mmap. -+ * For all other versions we can just return the cookie -+ */ -+ if (kctx->api_version < KBASE_API_VERSION(10, 1) || -+ kctx->api_version > KBASE_API_VERSION(10, 4)) { -+ *gpu_va = (u64) cookie; -+ return reg; -+ } -+ if (*flags & BASE_MEM_PROT_CPU_RD) -+ prot |= PROT_READ; -+ if (*flags & BASE_MEM_PROT_CPU_WR) -+ prot |= PROT_WRITE; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot, -+ MAP_SHARED, cookie); ++ if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) ++ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + -+ if (IS_ERR_VALUE(cpu_addr)) { -+ kbase_gpu_vm_lock(kctx); -+ kctx->pending_regions[cookie_nr] = NULL; -+ kctx->cookies |= (1UL << cookie_nr); -+ kbase_gpu_vm_unlock(kctx); -+ goto no_mmap; -+ } ++ list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], ++ &kbdev->js_data.ctx_list_pullable[js]); + -+ *gpu_va = (u64) cpu_addr; -+ } else /* we control the VA */ { -+ if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) { -+ dev_warn(dev, "Failed to map memory on GPU"); -+ kbase_gpu_vm_unlock(kctx); -+ goto no_mmap; ++ if (!kctx->slots_pullable) { ++ kbdev->js_data.nr_contexts_pullable++; ++ ret = true; ++ if (!atomic_read(&kctx->atoms_pulled)) { ++ WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); ++ kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); ++ atomic_inc(&kbdev->js_data.nr_contexts_runnable); + } -+ /* return real GPU VA */ -+ *gpu_va = reg->start_pfn << PAGE_SHIFT; -+ -+ kbase_gpu_vm_unlock(kctx); + } ++ kctx->slots_pullable |= (1 << js); + -+ return reg; -+ -+no_mmap: -+no_cookie: -+no_mem: -+ kbase_mem_phy_alloc_put(reg->cpu_alloc); -+ kbase_mem_phy_alloc_put(reg->gpu_alloc); -+invalid_flags: -+prepare_failed: -+ kfree(reg); -+no_region: -+bad_ex_size: -+bad_flags: -+bad_size: -+ return NULL; ++ return ret; +} -+KBASE_EXPORT_TEST_API(kbase_mem_alloc); + -+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 * const out) ++/** ++ * kbase_js_ctx_list_add_pullable_head_nolock - Variant of ++ * kbase_js_ctx_list_add_pullable_head() ++ * where the caller must hold ++ * hwaccess_lock ++ * @kbdev: Device pointer ++ * @kctx: Context to add to queue ++ * @js: Job slot to use ++ * ++ * Caller must hold hwaccess_lock ++ * ++ * Return: true if caller should call kbase_backend_ctx_count_changed() ++ */ ++static bool kbase_js_ctx_list_add_pullable_head_nolock( ++ struct kbase_device *kbdev, struct kbase_context *kctx, int js) +{ -+ struct kbase_va_region *reg; -+ int ret = -EINVAL; -+ -+ KBASE_DEBUG_ASSERT(kctx); -+ KBASE_DEBUG_ASSERT(out); ++ bool ret = false; + -+ if (gpu_addr & ~PAGE_MASK) { -+ dev_warn(kctx->kbdev->dev, "mem_query: gpu_addr: passed parameter is invalid"); -+ return -EINVAL; -+ } ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ kbase_gpu_vm_lock(kctx); ++ if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) ++ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + -+ /* Validate the region */ -+ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); -+ if (!reg || (reg->flags & KBASE_REG_FREE)) -+ goto out_unlock; ++ list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], ++ &kbdev->js_data.ctx_list_pullable[js]); + -+ switch (query) { -+ case KBASE_MEM_QUERY_COMMIT_SIZE: -+ if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) { -+ *out = kbase_reg_current_backed_size(reg); -+ } else { -+ size_t i; -+ struct kbase_aliased *aliased; -+ *out = 0; -+ aliased = reg->cpu_alloc->imported.alias.aliased; -+ for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++) -+ *out += aliased[i].length; ++ if (!kctx->slots_pullable) { ++ kbdev->js_data.nr_contexts_pullable++; ++ ret = true; ++ if (!atomic_read(&kctx->atoms_pulled)) { ++ WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); ++ kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); ++ atomic_inc(&kbdev->js_data.nr_contexts_runnable); + } -+ break; -+ case KBASE_MEM_QUERY_VA_SIZE: -+ *out = reg->nr_pages; -+ break; -+ case KBASE_MEM_QUERY_FLAGS: -+ { -+ *out = 0; -+ if (KBASE_REG_CPU_WR & reg->flags) -+ *out |= BASE_MEM_PROT_CPU_WR; -+ if (KBASE_REG_CPU_RD & reg->flags) -+ *out |= BASE_MEM_PROT_CPU_RD; -+ if (KBASE_REG_CPU_CACHED & reg->flags) -+ *out |= BASE_MEM_CACHED_CPU; -+ if (KBASE_REG_GPU_WR & reg->flags) -+ *out |= BASE_MEM_PROT_GPU_WR; -+ if (KBASE_REG_GPU_RD & reg->flags) -+ *out |= BASE_MEM_PROT_GPU_RD; -+ if (!(KBASE_REG_GPU_NX & reg->flags)) -+ *out |= BASE_MEM_PROT_GPU_EX; -+ if (KBASE_REG_SHARE_BOTH & reg->flags) -+ *out |= BASE_MEM_COHERENT_SYSTEM; -+ if (KBASE_REG_SHARE_IN & reg->flags) -+ *out |= BASE_MEM_COHERENT_LOCAL; -+ break; -+ } -+ default: -+ *out = 0; -+ goto out_unlock; + } ++ kctx->slots_pullable |= (1 << js); + -+ ret = 0; -+ -+out_unlock: -+ kbase_gpu_vm_unlock(kctx); + return ret; +} + +/** -+ * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the -+ * Ephemeral memory eviction list. -+ * @s: Shrinker -+ * @sc: Shrinker control ++ * kbase_js_ctx_list_add_pullable_head - Add context to the head of the ++ * per-slot pullable context queue ++ * @kbdev: Device pointer ++ * @kctx: Context to add to queue ++ * @js: Job slot to use + * -+ * Return: Number of pages which can be freed. ++ * If the context is on either the pullable or unpullable queues, then it is ++ * removed before being added to the head. ++ * ++ * This function should be used when a context has been scheduled, but no jobs ++ * can currently be pulled from it. ++ * ++ * Return: true if caller should call kbase_backend_ctx_count_changed() + */ -+static -+unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, -+ struct shrink_control *sc) ++static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ int js) +{ -+ struct kbase_context *kctx; -+ struct kbase_mem_phy_alloc *alloc; -+ unsigned long pages = 0; -+ -+ kctx = container_of(s, struct kbase_context, reclaim); -+ -+ mutex_lock(&kctx->jit_evict_lock); ++ bool ret; ++ unsigned long flags; + -+ list_for_each_entry(alloc, &kctx->evict_list, evict_node) -+ pages += alloc->nents; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ mutex_unlock(&kctx->jit_evict_lock); -+ return pages; ++ return ret; +} + +/** -+ * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction -+ * list for pages and try to reclaim them. -+ * @s: Shrinker -+ * @sc: Shrinker control ++ * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the ++ * per-slot unpullable context queue ++ * @kbdev: Device pointer ++ * @kctx: Context to add to queue ++ * @js: Job slot to use + * -+ * Return: Number of pages freed (can be less then requested) or -1 if the -+ * shrinker failed to free pages in its pool. ++ * The context must already be on the per-slot pullable queue. It will be ++ * removed from the pullable queue before being added to the unpullable queue. + * -+ * Note: -+ * This function accesses region structures without taking the region lock, -+ * this is required as the OOM killer can call the shrinker after the region -+ * lock has already been held. -+ * This is safe as we can guarantee that a region on the eviction list will -+ * not be freed (kbase_mem_free_region removes the allocation from the list -+ * before destroying it), or modified by other parts of the driver. -+ * The eviction list itself is guarded by the eviction lock and the MMU updates -+ * are protected by their own lock. ++ * This function should be used when a context has been pulled from, and there ++ * are no jobs remaining on the specified slot. ++ * ++ * Caller must hold hwaccess_lock ++ * ++ * Return: true if caller should call kbase_backend_ctx_count_changed() + */ -+static -+unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, -+ struct shrink_control *sc) ++static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ int js) +{ -+ struct kbase_context *kctx; -+ struct kbase_mem_phy_alloc *alloc; -+ struct kbase_mem_phy_alloc *tmp; -+ unsigned long freed = 0; ++ bool ret = false; + -+ kctx = container_of(s, struct kbase_context, reclaim); -+ mutex_lock(&kctx->jit_evict_lock); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) { -+ int err; ++ list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], ++ &kbdev->js_data.ctx_list_unpullable[js]); + -+ err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg, -+ 0, alloc->nents); -+ if (err != 0) { -+ /* -+ * Failed to remove GPU mapping, tell the shrinker -+ * to stop trying to shrink our slab even though we -+ * have pages in it. -+ */ -+ freed = -1; -+ goto out_unlock; ++ if (kctx->slots_pullable == (1 << js)) { ++ kbdev->js_data.nr_contexts_pullable--; ++ ret = true; ++ if (!atomic_read(&kctx->atoms_pulled)) { ++ WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); ++ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); ++ atomic_dec(&kbdev->js_data.nr_contexts_runnable); + } ++ } ++ kctx->slots_pullable &= ~(1 << js); + -+ /* -+ * Update alloc->evicted before freeing the backing so the -+ * helper can determine that it needs to bypass the accounting -+ * and memory pool. -+ */ -+ alloc->evicted = alloc->nents; ++ return ret; ++} + -+ kbase_free_phy_pages_helper(alloc, alloc->evicted); -+ freed += alloc->evicted; -+ list_del_init(&alloc->evict_node); ++/** ++ * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable ++ * or unpullable context queues ++ * @kbdev: Device pointer ++ * @kctx: Context to remove from queue ++ * @js: Job slot to use ++ * ++ * The context must already be on one of the queues. ++ * ++ * This function should be used when a context has no jobs on the GPU, and no ++ * jobs remaining for the specified slot. ++ * ++ * Caller must hold hwaccess_lock ++ * ++ * Return: true if caller should call kbase_backend_ctx_count_changed() ++ */ ++static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ int js) ++{ ++ bool ret = false; + -+ /* -+ * Inform the JIT allocator this region has lost backing -+ * as it might need to free the allocation. -+ */ -+ kbase_jit_backing_lost(alloc->reg); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* Enough pages have been freed so stop now */ -+ if (freed > sc->nr_to_scan) -+ break; ++ WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])); ++ ++ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); ++ ++ if (kctx->slots_pullable == (1 << js)) { ++ kbdev->js_data.nr_contexts_pullable--; ++ ret = true; ++ if (!atomic_read(&kctx->atoms_pulled)) { ++ WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); ++ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); ++ atomic_dec(&kbdev->js_data.nr_contexts_runnable); ++ } + } -+out_unlock: -+ mutex_unlock(&kctx->jit_evict_lock); ++ kctx->slots_pullable &= ~(1 << js); + -+ return freed; ++ return ret; +} + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) -+static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s, -+ struct shrink_control *sc) ++/** ++ * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head() ++ * where the caller must hold ++ * hwaccess_lock ++ * @kbdev: Device pointer ++ * @js: Job slot to use ++ * ++ * Caller must hold hwaccess_lock ++ * ++ * Return: Context to use for specified slot. ++ * NULL if no contexts present for specified slot ++ */ ++static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( ++ struct kbase_device *kbdev, ++ int js) +{ -+ if (sc->nr_to_scan == 0) -+ return kbase_mem_evictable_reclaim_count_objects(s, sc); ++ struct kbase_context *kctx; + -+ return kbase_mem_evictable_reclaim_scan_objects(s, sc); -+} -+#endif ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+int kbase_mem_evictable_init(struct kbase_context *kctx) -+{ -+ INIT_LIST_HEAD(&kctx->evict_list); -+ mutex_init(&kctx->jit_evict_lock); ++ if (list_empty(&kbdev->js_data.ctx_list_pullable[js])) ++ return NULL; + -+ /* Register shrinker */ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) -+ kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink; -+#else -+ kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects; -+ kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects; -+#endif -+ kctx->reclaim.seeks = DEFAULT_SEEKS; -+ /* Kernel versions prior to 3.1 : -+ * struct shrinker does not define batch */ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) -+ kctx->reclaim.batch = 0; -+#endif -+#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE -+ register_shrinker(&kctx->reclaim); -+#else -+ register_shrinker(&kctx->reclaim, "mali-mem"); -+#endif -+ return 0; -+} ++ kctx = list_entry(kbdev->js_data.ctx_list_pullable[js].next, ++ struct kbase_context, ++ jctx.sched_info.ctx.ctx_list_entry[js]); + -+void kbase_mem_evictable_deinit(struct kbase_context *kctx) -+{ -+ unregister_shrinker(&kctx->reclaim); ++ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); ++ ++ return kctx; +} + +/** -+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable. -+ * @alloc: The physical allocation ++ * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable ++ * queue. ++ * @kbdev: Device pointer ++ * @js: Job slot to use ++ * ++ * Return: Context to use for specified slot. ++ * NULL if no contexts present for specified slot + */ -+static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) ++static struct kbase_context *kbase_js_ctx_list_pop_head( ++ struct kbase_device *kbdev, int js) +{ -+ struct kbase_context *kctx = alloc->imported.kctx; -+ int __maybe_unused new_page_count; ++ struct kbase_context *kctx; ++ unsigned long flags; + -+ kbase_process_page_usage_dec(kctx, alloc->nents); -+ new_page_count = kbase_atomic_sub_pages(alloc->nents, -+ &kctx->used_pages); -+ kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ KBASE_TLSTREAM_AUX_PAGESALLOC( -+ (u32)kctx->id, -+ (u64)new_page_count); ++ return kctx; +} + +/** -+ * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable. -+ * @alloc: The physical allocation ++ * kbase_js_ctx_pullable - Return if a context can be pulled from on the ++ * specified slot ++ * @kctx: Context pointer ++ * @js: Job slot to use ++ * @is_scheduled: true if the context is currently scheduled ++ * ++ * Caller must hold hwaccess_lock ++ * ++ * Return: true if context can be pulled from on specified slot ++ * false otherwise + */ -+static -+void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) ++static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, ++ bool is_scheduled) +{ -+ struct kbase_context *kctx = alloc->imported.kctx; -+ int __maybe_unused new_page_count; ++ struct kbasep_js_device_data *js_devdata; ++ struct kbase_jd_atom *katom; + -+ new_page_count = kbase_atomic_add_pages(alloc->nents, -+ &kctx->used_pages); -+ kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages); ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ /* Increase mm counters so that the allocation is accounted for -+ * against the process and thus is visible to the OOM killer. -+ */ -+ kbase_process_page_usage_inc(kctx, alloc->nents); ++ js_devdata = &kctx->kbdev->js_data; + -+ KBASE_TLSTREAM_AUX_PAGESALLOC( -+ (u32)kctx->id, -+ (u64)new_page_count); ++ if (is_scheduled) { ++ if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) ++ return false; ++ } ++ katom = jsctx_rb_peek(kctx, js); ++ if (!katom) ++ return false; /* No pullable atoms */ ++ if (kctx->blocked_js[js][katom->sched_priority]) ++ return false; ++ if (atomic_read(&katom->blocked)) ++ return false; /* next atom blocked */ ++ if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { ++ if (katom->x_pre_dep->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || ++ katom->x_pre_dep->will_fail_event_code) ++ return false; ++ if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && ++ kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) ++ return false; ++ } ++ ++ return true; +} + -+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) ++static bool kbase_js_dep_validate(struct kbase_context *kctx, ++ struct kbase_jd_atom *katom) +{ -+ struct kbase_context *kctx = gpu_alloc->imported.kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ bool ret = true; ++ bool has_dep = false, has_x_dep = false; ++ int js = kbase_js_get_slot(kbdev, katom); ++ int prio = katom->sched_priority; ++ int i; + -+ lockdep_assert_held(&kctx->reg_lock); ++ for (i = 0; i < 2; i++) { ++ struct kbase_jd_atom *dep_atom = katom->dep[i].atom; + -+ /* This alloction can't already be on a list. */ -+ WARN_ON(!list_empty(&gpu_alloc->evict_node)); ++ if (dep_atom) { ++ int dep_js = kbase_js_get_slot(kbdev, dep_atom); ++ int dep_prio = dep_atom->sched_priority; + -+ kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg, -+ 0, gpu_alloc->nents); ++ /* Dependent atom must already have been submitted */ ++ if (!(dep_atom->atom_flags & ++ KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { ++ ret = false; ++ break; ++ } + -+ /* -+ * Add the allocation to the eviction list, after this point the shrink -+ * can reclaim it. -+ */ -+ mutex_lock(&kctx->jit_evict_lock); -+ list_add(&gpu_alloc->evict_node, &kctx->evict_list); -+ mutex_unlock(&kctx->jit_evict_lock); -+ kbase_mem_evictable_mark_reclaim(gpu_alloc); ++ /* Dependencies with different priorities can't ++ be represented in the ringbuffer */ ++ if (prio != dep_prio) { ++ ret = false; ++ break; ++ } + -+ gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED; -+ return 0; -+} ++ if (js == dep_js) { ++ /* Only one same-slot dependency can be ++ * represented in the ringbuffer */ ++ if (has_dep) { ++ ret = false; ++ break; ++ } ++ /* Each dependee atom can only have one ++ * same-slot dependency */ ++ if (dep_atom->post_dep) { ++ ret = false; ++ break; ++ } ++ has_dep = true; ++ } else { ++ /* Only one cross-slot dependency can be ++ * represented in the ringbuffer */ ++ if (has_x_dep) { ++ ret = false; ++ break; ++ } ++ /* Each dependee atom can only have one ++ * cross-slot dependency */ ++ if (dep_atom->x_post_dep) { ++ ret = false; ++ break; ++ } ++ /* The dependee atom can not already be in the ++ * HW access ringbuffer */ ++ if (dep_atom->gpu_rb_state != ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { ++ ret = false; ++ break; ++ } ++ /* The dependee atom can not already have ++ * completed */ ++ if (dep_atom->status != ++ KBASE_JD_ATOM_STATE_IN_JS) { ++ ret = false; ++ break; ++ } ++ /* Cross-slot dependencies must not violate ++ * PRLAM-8987 affinity restrictions */ ++ if (kbase_hw_has_issue(kbdev, ++ BASE_HW_ISSUE_8987) && ++ (js == 2 || dep_js == 2)) { ++ ret = false; ++ break; ++ } ++ has_x_dep = true; ++ } + -+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) -+{ -+ struct kbase_context *kctx = gpu_alloc->imported.kctx; -+ int err = 0; ++ /* Dependency can be represented in ringbuffers */ ++ } ++ } + -+ lockdep_assert_held(&kctx->reg_lock); ++ /* If dependencies can be represented by ringbuffer then clear them from ++ * atom structure */ ++ if (ret) { ++ for (i = 0; i < 2; i++) { ++ struct kbase_jd_atom *dep_atom = katom->dep[i].atom; + -+ /* -+ * First remove the allocation from the eviction list as it's no -+ * longer eligible for eviction. -+ */ -+ list_del_init(&gpu_alloc->evict_node); ++ if (dep_atom) { ++ int dep_js = kbase_js_get_slot(kbdev, dep_atom); + -+ if (gpu_alloc->evicted == 0) { -+ /* -+ * The backing is still present, update the VM stats as it's -+ * in use again. -+ */ -+ kbase_mem_evictable_unmark_reclaim(gpu_alloc); -+ } else { -+ /* If the region is still alive ... */ -+ if (gpu_alloc->reg) { -+ /* ... allocate replacement backing ... */ -+ err = kbase_alloc_phy_pages_helper(gpu_alloc, -+ gpu_alloc->evicted); ++ if ((js != dep_js) && ++ (dep_atom->status != ++ KBASE_JD_ATOM_STATE_COMPLETED) ++ && (dep_atom->status != ++ KBASE_JD_ATOM_STATE_HW_COMPLETED) ++ && (dep_atom->status != ++ KBASE_JD_ATOM_STATE_UNUSED)) { + -+ /* -+ * ... and grow the mapping back to its -+ * pre-eviction size. -+ */ -+ if (!err) -+ err = kbase_mem_grow_gpu_mapping(kctx, -+ gpu_alloc->reg, -+ gpu_alloc->evicted, 0); ++ katom->atom_flags |= ++ KBASE_KATOM_FLAG_X_DEP_BLOCKED; ++ katom->x_pre_dep = dep_atom; ++ dep_atom->x_post_dep = katom; ++ if (kbase_jd_katom_dep_type( ++ &katom->dep[i]) == ++ BASE_JD_DEP_TYPE_DATA) ++ katom->atom_flags |= ++ KBASE_KATOM_FLAG_FAIL_BLOCKER; ++ } ++ if ((kbase_jd_katom_dep_type(&katom->dep[i]) ++ == BASE_JD_DEP_TYPE_DATA) && ++ (js == dep_js)) { ++ katom->pre_dep = dep_atom; ++ dep_atom->post_dep = katom; ++ } + -+ gpu_alloc->evicted = 0; ++ list_del(&katom->dep_item[i]); ++ kbase_jd_katom_dep_clear(&katom->dep[i]); ++ } + } + } + -+ /* If the region is still alive remove the DONT_NEED attribute. */ -+ if (gpu_alloc->reg) -+ gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED; -+ -+ return (err == 0); ++ return ret; +} + -+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask) ++bool kbasep_js_add_job(struct kbase_context *kctx, ++ struct kbase_jd_atom *atom) +{ -+ struct kbase_va_region *reg; -+ int ret = -EINVAL; -+ unsigned int real_flags = 0; -+ unsigned int prev_flags = 0; -+ bool prev_needed, new_needed; -+ -+ KBASE_DEBUG_ASSERT(kctx); ++ unsigned long flags; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ struct kbase_device *kbdev; ++ struct kbasep_js_device_data *js_devdata; + -+ if (!gpu_addr) -+ return -EINVAL; ++ bool enqueue_required = false; ++ bool timer_sync = false; + -+ if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) -+ return -EINVAL; ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(atom != NULL); ++ lockdep_assert_held(&kctx->jctx.lock); + -+ /* nuke other bits */ -+ flags &= mask; ++ kbdev = kctx->kbdev; ++ js_devdata = &kbdev->js_data; ++ js_kctx_info = &kctx->jctx.sched_info; + -+ /* check for only supported flags */ -+ if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE)) -+ goto out; ++ mutex_lock(&js_devdata->queue_mutex); ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + -+ /* mask covers bits we don't support? */ -+ if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE)) -+ goto out; ++ /* ++ * Begin Runpool transaction ++ */ ++ mutex_lock(&js_devdata->runpool_mutex); + -+ /* convert flags */ -+ if (BASE_MEM_COHERENT_SYSTEM & flags) -+ real_flags |= KBASE_REG_SHARE_BOTH; -+ else if (BASE_MEM_COHERENT_LOCAL & flags) -+ real_flags |= KBASE_REG_SHARE_IN; ++ /* Refcount ctx.nr_jobs */ ++ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX); ++ ++(js_kctx_info->ctx.nr_jobs); + -+ /* now we can lock down the context, and find the region */ -+ down_write(¤t->mm->mmap_lock); -+ kbase_gpu_vm_lock(kctx); ++ /* Setup any scheduling information */ ++ kbasep_js_clear_job_retry_submit(atom); + -+ /* Validate the region */ -+ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); -+ if (!reg || (reg->flags & KBASE_REG_FREE)) -+ goto out_unlock; ++ /* Lock for state available during IRQ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ /* Is the region being transitioning between not needed and needed? */ -+ prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED; -+ new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED; -+ if (prev_needed != new_needed) { -+ /* Aliased allocations can't be made ephemeral */ -+ if (atomic_read(®->cpu_alloc->gpu_mappings) > 1) -+ goto out_unlock; ++ if (!kbase_js_dep_validate(kctx, atom)) { ++ /* Dependencies could not be represented */ ++ --(js_kctx_info->ctx.nr_jobs); + -+ if (new_needed) { -+ /* Only native allocations can be marked not needed */ -+ if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { -+ ret = -EINVAL; -+ goto out_unlock; -+ } -+ ret = kbase_mem_evictable_make(reg->gpu_alloc); -+ if (ret) -+ goto out_unlock; -+ } else { -+ kbase_mem_evictable_unmake(reg->gpu_alloc); -+ } -+ } ++ /* Setting atom status back to queued as it still has unresolved ++ * dependencies */ ++ atom->status = KBASE_JD_ATOM_STATE_QUEUED; + -+ /* limit to imported memory */ -+ if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) && -+ (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)) -+ goto out_unlock; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&js_devdata->runpool_mutex); + -+ /* no change? */ -+ if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) { -+ ret = 0; + goto out_unlock; + } + -+ /* save for roll back */ -+ prev_flags = reg->flags; -+ reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH); -+ reg->flags |= real_flags; ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_READY); ++ KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom)); + -+ /* Currently supporting only imported memory */ -+ switch (reg->gpu_alloc->type) { -+#ifdef CONFIG_UMP -+ case KBASE_MEM_TYPE_IMPORTED_UMP: -+ ret = kbase_mmu_update_pages(kctx, reg->start_pfn, kbase_get_cpu_phy_pages(reg), reg->gpu_alloc->nents, reg->flags); -+ break; -+#endif -+#ifdef CONFIG_DMA_SHARED_BUFFER -+ case KBASE_MEM_TYPE_IMPORTED_UMM: -+ /* Future use will use the new flags, existing mapping will NOT be updated -+ * as memory should not be in use by the GPU when updating the flags. -+ */ -+ ret = 0; -+ WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count); -+ break; -+#endif -+ default: -+ break; ++ enqueue_required = kbase_js_dep_resolved_submit(kctx, atom); ++ ++ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc, ++ kbasep_js_trace_get_refcnt(kbdev, kctx)); ++ ++ /* Context Attribute Refcounting */ ++ kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom); ++ ++ if (enqueue_required) { ++ if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false)) ++ timer_sync = kbase_js_ctx_list_add_pullable_nolock( ++ kbdev, kctx, atom->slot_nr); ++ else ++ timer_sync = kbase_js_ctx_list_add_unpullable_nolock( ++ kbdev, kctx, atom->slot_nr); + } ++ /* If this context is active and the atom is the first on its slot, ++ * kick the job manager to attempt to fast-start the atom */ ++ if (enqueue_required && kctx == kbdev->hwaccess.active_kctx) ++ kbase_jm_try_kick(kbdev, 1 << atom->slot_nr); + -+ /* roll back on error, i.e. not UMP */ -+ if (ret) -+ reg->flags = prev_flags; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (timer_sync) ++ kbase_backend_ctx_count_changed(kbdev); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ /* End runpool transaction */ + -+out_unlock: -+ kbase_gpu_vm_unlock(kctx); -+ up_write(¤t->mm->mmap_lock); -+out: -+ return ret; -+} ++ if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { ++ if (kbase_ctx_flag(kctx, KCTX_DYING)) { ++ /* A job got added while/after kbase_job_zap_context() ++ * was called on a non-scheduled context (e.g. KDS ++ * dependency resolved). Kill that job by killing the ++ * context. */ ++ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, ++ false); ++ } else if (js_kctx_info->ctx.nr_jobs == 1) { ++ /* Handle Refcount going from 0 to 1: schedule the ++ * context on the Queue */ ++ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++ dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx); + -+#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS) ++ /* Queue was updated - caller must try to ++ * schedule the head context */ ++ WARN_ON(!enqueue_required); ++ } ++ } ++out_unlock: ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + -+#ifdef CONFIG_UMP -+static struct kbase_va_region *kbase_mem_from_ump(struct kbase_context *kctx, ump_secure_id id, u64 *va_pages, u64 *flags) -+{ -+ struct kbase_va_region *reg; -+ ump_dd_handle umph; -+ u64 block_count; -+ const ump_dd_physical_block_64 *block_array; -+ u64 i, j; -+ int page = 0; -+ ump_alloc_flags ump_flags; -+ ump_alloc_flags cpu_flags; -+ ump_alloc_flags gpu_flags; ++ mutex_unlock(&js_devdata->queue_mutex); + -+ if (*flags & BASE_MEM_SECURE) -+ goto bad_flags; ++ return enqueue_required; ++} + -+ umph = ump_dd_from_secure_id(id); -+ if (UMP_DD_INVALID_MEMORY_HANDLE == umph) -+ goto bad_id; ++void kbasep_js_remove_job(struct kbase_device *kbdev, ++ struct kbase_context *kctx, struct kbase_jd_atom *atom) ++{ ++ struct kbasep_js_kctx_info *js_kctx_info; ++ struct kbasep_js_device_data *js_devdata; + -+ ump_flags = ump_dd_allocation_flags_get(umph); -+ cpu_flags = (ump_flags >> UMP_DEVICE_CPU_SHIFT) & UMP_DEVICE_MASK; -+ gpu_flags = (ump_flags >> DEFAULT_UMP_GPU_DEVICE_SHIFT) & -+ UMP_DEVICE_MASK; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(atom != NULL); + -+ *va_pages = ump_dd_size_get_64(umph); -+ *va_pages >>= PAGE_SHIFT; ++ js_devdata = &kbdev->js_data; ++ js_kctx_info = &kctx->jctx.sched_info; + -+ if (!*va_pages) -+ goto bad_size; ++ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc, ++ kbasep_js_trace_get_refcnt(kbdev, kctx)); + -+ if (*va_pages > (U64_MAX / PAGE_SIZE)) -+ /* 64-bit address range is the max */ -+ goto bad_size; ++ /* De-refcount ctx.nr_jobs */ ++ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0); ++ --(js_kctx_info->ctx.nr_jobs); ++} + -+ if (*flags & BASE_MEM_SAME_VA) -+ reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA); -+ else -+ reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA); ++bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, ++ struct kbase_context *kctx, struct kbase_jd_atom *katom) ++{ ++ unsigned long flags; ++ struct kbasep_js_atom_retained_state katom_retained_state; ++ struct kbasep_js_device_data *js_devdata; ++ bool attr_state_changed; + -+ if (!reg) -+ goto no_region; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(katom != NULL); + -+ /* we've got pages to map now, and support SAME_VA */ -+ *flags |= KBASE_MEM_IMPORT_HAVE_PAGES; ++ js_devdata = &kbdev->js_data; + -+ reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMP); -+ if (IS_ERR_OR_NULL(reg->gpu_alloc)) -+ goto no_alloc_obj; ++ kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); ++ kbasep_js_remove_job(kbdev, kctx, katom); + -+ reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ reg->gpu_alloc->imported.ump_handle = umph; ++ /* The atom has 'finished' (will not be re-run), so no need to call ++ * kbasep_js_has_atom_finished(). ++ * ++ * This is because it returns false for soft-stopped atoms, but we ++ * want to override that, because we're cancelling an atom regardless of ++ * whether it was soft-stopped or not */ ++ attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, ++ &katom_retained_state); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ reg->flags &= ~KBASE_REG_FREE; -+ reg->flags |= KBASE_REG_GPU_NX; /* UMP is always No eXecute */ -+ reg->flags &= ~KBASE_REG_GROWABLE; /* UMP cannot be grown */ ++ return attr_state_changed; ++} + -+ /* Override import flags based on UMP flags */ -+ *flags &= ~(BASE_MEM_CACHED_CPU); -+ *flags &= ~(BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR); -+ *flags &= ~(BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR); ++bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ ++ unsigned long flags; ++ struct kbasep_js_device_data *js_devdata; ++ bool result; + -+ if ((cpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) == -+ (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) { -+ reg->flags |= KBASE_REG_CPU_CACHED; -+ *flags |= BASE_MEM_CACHED_CPU; -+ } ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ js_devdata = &kbdev->js_data; + -+ if (cpu_flags & UMP_PROT_CPU_WR) { -+ reg->flags |= KBASE_REG_CPU_WR; -+ *flags |= BASE_MEM_PROT_CPU_WR; -+ } ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); + -+ if (cpu_flags & UMP_PROT_CPU_RD) { -+ reg->flags |= KBASE_REG_CPU_RD; -+ *flags |= BASE_MEM_PROT_CPU_RD; -+ } ++ return result; ++} + -+ if ((gpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) == -+ (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) -+ reg->flags |= KBASE_REG_GPU_CACHED; ++struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, ++ int as_nr) ++{ ++ int ret = 0; ++ unsigned long flags; ++ struct kbasep_js_device_data *js_devdata; ++ struct kbase_context *found_kctx = NULL; + -+ if (gpu_flags & UMP_PROT_DEVICE_WR) { -+ reg->flags |= KBASE_REG_GPU_WR; -+ *flags |= BASE_MEM_PROT_GPU_WR; -+ } ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS); ++ js_devdata = &kbdev->js_data; + -+ if (gpu_flags & UMP_PROT_DEVICE_RD) { -+ reg->flags |= KBASE_REG_GPU_RD; -+ *flags |= BASE_MEM_PROT_GPU_RD; -+ } ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ /* ump phys block query */ -+ ump_dd_phys_blocks_get_64(umph, &block_count, &block_array); ++ found_kctx = kbdev->as_to_kctx[as_nr]; + -+ for (i = 0; i < block_count; i++) { -+ for (j = 0; j < (block_array[i].size >> PAGE_SHIFT); j++) { -+ reg->gpu_alloc->pages[page] = block_array[i].addr + (j << PAGE_SHIFT); -+ page++; ++ if (found_kctx != NULL) { ++ ret = kbase_ctx_sched_retain_ctx_refcount(found_kctx); ++ if (ret != 0) { ++ E("fail to retain ctx_refcount, ret : %d.", ret); ++ found_kctx = NULL; + } + } -+ reg->gpu_alloc->nents = *va_pages; -+ reg->extent = 0; + -+ return reg; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+no_alloc_obj: -+ kfree(reg); -+no_region: -+bad_size: -+ ump_dd_release(umph); -+bad_id: -+bad_flags: -+ return NULL; ++ return found_kctx; +} -+#endif /* CONFIG_UMP */ + -+#ifdef CONFIG_DMA_SHARED_BUFFER -+static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, -+ int fd, u64 *va_pages, u64 *flags, u32 padding) ++/** ++ * kbasep_js_release_result - Try running more jobs after releasing a context ++ * and/or atom ++ * ++ * @kbdev: The kbase_device to operate on ++ * @kctx: The kbase_context to operate on ++ * @katom_retained_state: Retained state from the atom ++ * @runpool_ctx_attr_change: True if the runpool context attributes have changed ++ * ++ * This collates a set of actions that must happen whilst hwaccess_lock is held. ++ * ++ * This includes running more jobs when: ++ * - The previously released kctx caused a ctx attribute change, ++ * - The released atom caused a ctx attribute change, ++ * - Slots were previously blocked due to affinity restrictions, ++ * - Submission during IRQ handling failed. ++ * ++ * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were ++ * changed. The caller should try scheduling all contexts ++ */ ++static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release( ++ struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ struct kbasep_js_atom_retained_state *katom_retained_state, ++ bool runpool_ctx_attr_change) +{ -+ struct kbase_va_region *reg; -+ struct dma_buf *dma_buf; -+ struct dma_buf_attachment *dma_attachment; -+ bool shared_zone = false; -+ -+ dma_buf = dma_buf_get(fd); -+ if (IS_ERR_OR_NULL(dma_buf)) -+ goto no_buf; -+ -+ dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev); -+ if (!dma_attachment) -+ goto no_attachment; ++ struct kbasep_js_device_data *js_devdata; ++ kbasep_js_release_result result = 0; + -+ *va_pages = (PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT) + padding; -+ if (!*va_pages) -+ goto bad_size; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(katom_retained_state != NULL); ++ js_devdata = &kbdev->js_data; + -+ if (*va_pages > (U64_MAX / PAGE_SIZE)) -+ /* 64-bit address range is the max */ -+ goto bad_size; ++ lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); ++ lockdep_assert_held(&js_devdata->runpool_mutex); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ /* ignore SAME_VA */ -+ *flags &= ~BASE_MEM_SAME_VA; ++ if (js_devdata->nr_user_contexts_running != 0) { ++ bool retry_submit = false; ++ int retry_jobslot = 0; + -+ if (*flags & BASE_MEM_IMPORT_SHARED) -+ shared_zone = true; ++ if (katom_retained_state) ++ retry_submit = kbasep_js_get_atom_retry_submit_slot( ++ katom_retained_state, &retry_jobslot); + -+#ifdef CONFIG_64BIT -+ if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { -+ /* -+ * 64-bit tasks require us to reserve VA on the CPU that we use -+ * on the GPU. -+ */ -+ shared_zone = true; -+ } -+#endif ++ if (runpool_ctx_attr_change || retry_submit) { ++ /* A change in runpool ctx attributes might mean we can ++ * run more jobs than before */ ++ result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL; + -+ if (shared_zone) { -+ *flags |= BASE_MEM_NEED_MMAP; -+ reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA); -+ } else { -+ reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA); ++ KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB, ++ kctx, NULL, 0u, retry_jobslot); ++ } + } ++ return result; ++} + -+ if (!reg) -+ goto no_region; -+ -+ reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMM); -+ if (IS_ERR_OR_NULL(reg->gpu_alloc)) -+ goto no_alloc_obj; ++/* ++ * Internal function to release the reference on a ctx and an atom's "retained ++ * state", only taking the runpool and as transaction mutexes ++ * ++ * This also starts more jobs running in the case of an ctx-attribute state ++ * change ++ * ++ * This does none of the followup actions for scheduling: ++ * - It does not schedule in a new context ++ * - It does not requeue or handle dying contexts ++ * ++ * For those tasks, just call kbasep_js_runpool_release_ctx() instead ++ * ++ * Requires: ++ * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr ++ * - Context has a non-zero refcount ++ * - Caller holds js_kctx_info->ctx.jsctx_mutex ++ * - Caller holds js_devdata->runpool_mutex ++ */ ++static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( ++ struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ struct kbasep_js_atom_retained_state *katom_retained_state) ++{ ++ unsigned long flags; ++ struct kbasep_js_device_data *js_devdata; ++ struct kbasep_js_kctx_info *js_kctx_info; + -+ reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); ++ kbasep_js_release_result release_result = 0u; ++ bool runpool_ctx_attr_change = false; ++ int kctx_as_nr; ++ struct kbase_as *current_as; ++ int new_ref_count; + -+ /* No pages to map yet */ -+ reg->gpu_alloc->nents = 0; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ js_kctx_info = &kctx->jctx.sched_info; ++ js_devdata = &kbdev->js_data; + -+ if (kbase_update_region_flags(kctx, reg, *flags) != 0) -+ goto invalid_flags; ++ /* Ensure context really is scheduled in */ ++ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+ reg->flags &= ~KBASE_REG_FREE; -+ reg->flags |= KBASE_REG_GPU_NX; /* UMM is always No eXecute */ -+ reg->flags &= ~KBASE_REG_GROWABLE; /* UMM cannot be grown */ -+ reg->flags |= KBASE_REG_GPU_CACHED; ++ kctx_as_nr = kctx->as_nr; ++ KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID); ++ KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0); + -+ if (*flags & BASE_MEM_SECURE) -+ reg->flags |= KBASE_REG_SECURE; ++ /* ++ * Transaction begins on AS and runpool_irq ++ * ++ * Assert about out calling contract ++ */ ++ current_as = &kbdev->as[kctx_as_nr]; ++ mutex_lock(&kbdev->pm.lock); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ if (padding) -+ reg->flags |= KBASE_REG_IMPORT_PAD; ++ KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr); ++ KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0); + -+ reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM; -+ reg->gpu_alloc->imported.umm.sgt = NULL; -+ reg->gpu_alloc->imported.umm.dma_buf = dma_buf; -+ reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment; -+ reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0; -+ reg->extent = 0; ++ /* Update refcount */ ++ kbase_ctx_sched_release_ctx(kctx); ++ new_ref_count = atomic_read(&kctx->refcount); + -+ return reg; ++ /* Release the atom if it finished (i.e. wasn't soft-stopped) */ ++ if (kbasep_js_has_atom_finished(katom_retained_state)) ++ runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom( ++ kbdev, kctx, katom_retained_state); + -+invalid_flags: -+ kbase_mem_phy_alloc_put(reg->gpu_alloc); -+no_alloc_obj: -+ kfree(reg); -+no_region: -+bad_size: -+ dma_buf_detach(dma_buf, dma_attachment); -+no_attachment: -+ dma_buf_put(dma_buf); -+no_buf: -+ return NULL; -+} -+#endif /* CONFIG_DMA_SHARED_BUFFER */ ++ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u, ++ new_ref_count); + -+static u32 kbase_get_cache_line_alignment(struct kbase_context *kctx) -+{ -+ u32 cpu_cache_line_size = cache_line_size(); -+ u32 gpu_cache_line_size = -+ (1UL << kctx->kbdev->gpu_props.props.l2_props.log2_line_size); ++ if (new_ref_count == 2 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) && ++ !kbase_pm_is_suspending(kbdev)) { ++ /* Context is kept scheduled into an address space even when ++ * there are no jobs, in this case we have to handle the ++ * situation where all jobs have been evicted from the GPU and ++ * submission is disabled. ++ * ++ * At this point we re-enable submission to allow further jobs ++ * to be executed ++ */ ++ kbasep_js_set_submit_allowed(js_devdata, kctx); ++ } + -+ return ((cpu_cache_line_size > gpu_cache_line_size) ? -+ cpu_cache_line_size : -+ gpu_cache_line_size); -+} ++ /* Make a set of checks to see if the context should be scheduled out. ++ * Note that there'll always be at least 1 reference to the context ++ * which was previously acquired by kbasep_js_schedule_ctx(). */ ++ if (new_ref_count == 1 && ++ (!kbasep_js_is_submit_allowed(js_devdata, kctx) || ++ kbdev->pm.suspending)) { ++ int num_slots = kbdev->gpu_props.num_job_slots; ++ int slot; + -+static struct kbase_va_region *kbase_mem_from_user_buffer( -+ struct kbase_context *kctx, unsigned long address, -+ unsigned long size, u64 *va_pages, u64 *flags) -+{ -+ long i; -+ struct kbase_va_region *reg; -+ long faulted_pages; -+ int zone = KBASE_REG_ZONE_CUSTOM_VA; -+ bool shared_zone = false; -+ u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx); -+ struct kbase_alloc_import_user_buf *user_buf; -+ struct page **pages = NULL; ++ /* Last reference, and we've been told to remove this context ++ * from the Run Pool */ ++ dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because refcount=%d, jobs=%d, allowed=%d", ++ kctx, new_ref_count, js_kctx_info->ctx.nr_jobs, ++ kbasep_js_is_submit_allowed(js_devdata, kctx)); + -+ if ((address & (cache_line_alignment - 1)) != 0 || -+ (size & (cache_line_alignment - 1)) != 0) { -+ /* Coherency must be enabled to handle partial cache lines */ -+ if (*flags & (BASE_MEM_COHERENT_SYSTEM | -+ BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { -+ /* Force coherent system required flag, import will -+ * then fail if coherency isn't available -+ */ -+ *flags |= BASE_MEM_COHERENT_SYSTEM_REQUIRED; -+ } else { -+ dev_warn(kctx->kbdev->dev, -+ "User buffer is not cache line aligned and no coherency enabled\n"); -+ goto bad_size; -+ } -+ } ++#if defined(CONFIG_MALI_GATOR_SUPPORT) ++ kbase_trace_mali_mmu_as_released(kctx->as_nr); ++#endif ++ KBASE_TLSTREAM_TL_NRET_AS_CTX(&kbdev->as[kctx->as_nr], kctx); + -+ *va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) - -+ PFN_DOWN(address); -+ if (!*va_pages) -+ goto bad_size; ++ kbase_backend_release_ctx_irq(kbdev, kctx); + -+ if (*va_pages > (UINT64_MAX / PAGE_SIZE)) -+ /* 64-bit address range is the max */ -+ goto bad_size; ++ if (kbdev->hwaccess.active_kctx == kctx) ++ kbdev->hwaccess.active_kctx = NULL; + -+ /* SAME_VA generally not supported with imported memory (no known use cases) */ -+ *flags &= ~BASE_MEM_SAME_VA; ++ /* Ctx Attribute handling ++ * ++ * Releasing atoms attributes must either happen before this, or ++ * after the KCTX_SHEDULED flag is changed, otherwise we ++ * double-decount the attributes ++ */ ++ runpool_ctx_attr_change |= ++ kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx); + -+ if (*flags & BASE_MEM_IMPORT_SHARED) -+ shared_zone = true; ++ /* Releasing the context and katom retained state can allow ++ * more jobs to run */ ++ release_result |= ++ kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, ++ kctx, katom_retained_state, ++ runpool_ctx_attr_change); + -+#ifdef CONFIG_64BIT -+ if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + /* -+ * 64-bit tasks require us to reserve VA on the CPU that we use -+ * on the GPU. ++ * Transaction ends on AS and runpool_irq: ++ * ++ * By this point, the AS-related data is now clear and ready ++ * for re-use. ++ * ++ * Since releases only occur once for each previous successful ++ * retain, and no more retains are allowed on this context, no ++ * other thread will be operating in this ++ * code whilst we are + */ -+ shared_zone = true; -+ } -+#endif -+ -+ if (shared_zone) { -+ *flags |= BASE_MEM_NEED_MMAP; -+ zone = KBASE_REG_ZONE_SAME_VA; -+ } -+ -+ reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone); + -+ if (!reg) -+ goto no_region; ++ /* Recalculate pullable status for all slots */ ++ for (slot = 0; slot < num_slots; slot++) { ++ if (kbase_js_ctx_pullable(kctx, slot, false)) ++ kbase_js_ctx_list_add_pullable_nolock(kbdev, ++ kctx, slot); ++ } + -+ reg->gpu_alloc = kbase_alloc_create(*va_pages, -+ KBASE_MEM_TYPE_IMPORTED_USER_BUF); -+ if (IS_ERR_OR_NULL(reg->gpu_alloc)) -+ goto no_alloc_obj; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); ++ kbase_backend_release_ctx_noirq(kbdev, kctx); + -+ if (kbase_update_region_flags(kctx, reg, *flags) != 0) -+ goto invalid_flags; ++ mutex_unlock(&kbdev->pm.lock); + -+ reg->flags &= ~KBASE_REG_FREE; -+ reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */ -+ reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */ -+ reg->flags &= ~KBASE_REG_CPU_CACHED; ++ /* Note: Don't reuse kctx_as_nr now */ + -+ user_buf = ®->gpu_alloc->imported.user_buf; ++ /* Synchronize with any timers */ ++ kbase_backend_ctx_count_changed(kbdev); + -+ user_buf->size = size; -+ user_buf->address = address; -+ user_buf->nr_pages = *va_pages; -+ user_buf->mm = current->mm; -+ user_buf->pages = kmalloc_array(*va_pages, sizeof(struct page *), -+ GFP_KERNEL); ++ /* update book-keeping info */ ++ kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); ++ /* Signal any waiter that the context is not scheduled, so is ++ * safe for termination - once the jsctx_mutex is also dropped, ++ * and jobs have finished. */ ++ wake_up(&js_kctx_info->ctx.is_scheduled_wait); + -+ if (!user_buf->pages) -+ goto no_page_array; ++ /* Queue an action to occur after we've dropped the lock */ ++ release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED | ++ KBASEP_JS_RELEASE_RESULT_SCHED_ALL; ++ } else { ++ kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, ++ katom_retained_state, runpool_ctx_attr_change); + -+ /* If the region is coherent with the CPU then the memory is imported -+ * and mapped onto the GPU immediately. -+ * Otherwise get_user_pages is called as a sanity check, but with -+ * NULL as the pages argument which will fault the pages, but not -+ * pin them. The memory will then be pinned only around the jobs that -+ * specify the region as an external resource. -+ */ -+ if (reg->flags & KBASE_REG_SHARE_BOTH) { -+ pages = user_buf->pages; -+ *flags |= KBASE_MEM_IMPORT_HAVE_PAGES; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->pm.lock); + } + -+ down_read(¤t->mm->mmap_lock); ++ return release_result; ++} + -+ faulted_pages = -+ kbase_get_user_pages(address, *va_pages, reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, pages, NULL); ++void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ ++ struct kbasep_js_atom_retained_state katom_retained_state; + -+ up_read(¤t->mm->mmap_lock); ++ /* Setup a dummy katom_retained_state */ ++ kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); + -+ if (faulted_pages != *va_pages) -+ goto fault_mismatch; ++ kbasep_js_runpool_release_ctx_internal(kbdev, kctx, ++ &katom_retained_state); ++} + -+ atomic_inc(¤t->mm->mm_count); ++void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx, bool has_pm_ref) ++{ ++ struct kbasep_js_device_data *js_devdata; + -+ reg->gpu_alloc->nents = 0; -+ reg->extent = 0; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ js_devdata = &kbdev->js_data; + -+ if (pages) { -+ struct device *dev = kctx->kbdev->dev; -+ unsigned long local_size = user_buf->size; -+ unsigned long offset = user_buf->address & ~PAGE_MASK; -+ phys_addr_t *pa = kbase_get_gpu_phy_pages(reg); ++ /* This is called if and only if you've you've detached the context from ++ * the Runpool Queue, and not added it back to the Runpool ++ */ ++ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+ /* Top bit signifies that this was pinned on import */ -+ user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT; ++ if (kbase_ctx_flag(kctx, KCTX_DYING)) { ++ /* Dying: don't requeue, but kill all jobs on the context. This ++ * happens asynchronously */ ++ dev_dbg(kbdev->dev, ++ "JS: ** Killing Context %p on RunPool Remove **", kctx); ++ kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel); ++ } ++} + -+ for (i = 0; i < faulted_pages; i++) { -+ dma_addr_t dma_addr; -+ unsigned long min; ++void kbasep_js_runpool_release_ctx_and_katom_retained_state( ++ struct kbase_device *kbdev, struct kbase_context *kctx, ++ struct kbasep_js_atom_retained_state *katom_retained_state) ++{ ++ struct kbasep_js_device_data *js_devdata; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ kbasep_js_release_result release_result; + -+ min = MIN(PAGE_SIZE - offset, local_size); -+ dma_addr = dma_map_page(dev, pages[i], -+ offset, min, -+ DMA_BIDIRECTIONAL); -+ if (dma_mapping_error(dev, dma_addr)) -+ goto unwind_dma_map; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ js_kctx_info = &kctx->jctx.sched_info; ++ js_devdata = &kbdev->js_data; + -+ user_buf->dma_addrs[i] = dma_addr; -+ pa[i] = page_to_phys(pages[i]); ++ mutex_lock(&js_devdata->queue_mutex); ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_lock(&js_devdata->runpool_mutex); + -+ local_size -= min; -+ offset = 0; -+ } ++ release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, ++ katom_retained_state); + -+ reg->gpu_alloc->nents = faulted_pages; -+ } ++ /* Drop the runpool mutex to allow requeing kctx */ ++ mutex_unlock(&js_devdata->runpool_mutex); + -+ return reg; ++ if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) ++ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true); + -+unwind_dma_map: -+ while (i--) { -+ dma_unmap_page(kctx->kbdev->dev, -+ user_buf->dma_addrs[i], -+ PAGE_SIZE, DMA_BIDIRECTIONAL); -+ } -+fault_mismatch: -+ if (pages) { -+ for (i = 0; i < faulted_pages; i++) -+ put_page(pages[i]); -+ } -+ kfree(user_buf->pages); -+no_page_array: -+invalid_flags: -+ kbase_mem_phy_alloc_put(reg->cpu_alloc); -+ kbase_mem_phy_alloc_put(reg->gpu_alloc); -+no_alloc_obj: -+ kfree(reg); -+no_region: -+bad_size: -+ return NULL; ++ /* Drop the jsctx_mutex to allow scheduling in a new context */ + -+} ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_unlock(&js_devdata->queue_mutex); + ++ if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL) ++ kbase_js_sched_all(kbdev); ++} + -+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, -+ u64 nents, struct base_mem_aliasing_info *ai, -+ u64 *num_pages) ++void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx) +{ -+ struct kbase_va_region *reg; -+ u64 gpu_va; -+ size_t i; -+ bool coherent; ++ struct kbasep_js_atom_retained_state katom_retained_state; + -+ KBASE_DEBUG_ASSERT(kctx); -+ KBASE_DEBUG_ASSERT(flags); -+ KBASE_DEBUG_ASSERT(ai); -+ KBASE_DEBUG_ASSERT(num_pages); ++ kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); + -+ /* mask to only allowed flags */ -+ *flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | -+ BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL | -+ BASE_MEM_COHERENT_SYSTEM_REQUIRED); ++ kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, ++ &katom_retained_state); ++} + -+ if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) { -+ dev_warn(kctx->kbdev->dev, -+ "kbase_mem_alias called with bad flags (%llx)", -+ (unsigned long long)*flags); -+ goto bad_flags; -+ } -+ coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 || -+ (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0; -+ -+ if (!stride) -+ goto bad_stride; ++/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into ++ * kbase_js_sched_all() */ ++static void kbasep_js_runpool_release_ctx_no_schedule( ++ struct kbase_device *kbdev, struct kbase_context *kctx) ++{ ++ struct kbasep_js_device_data *js_devdata; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ kbasep_js_release_result release_result; ++ struct kbasep_js_atom_retained_state katom_retained_state_struct; ++ struct kbasep_js_atom_retained_state *katom_retained_state = ++ &katom_retained_state_struct; + -+ if (!nents) -+ goto bad_nents; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ js_kctx_info = &kctx->jctx.sched_info; ++ js_devdata = &kbdev->js_data; ++ kbasep_js_atom_retained_state_init_invalid(katom_retained_state); + -+ if ((nents * stride) > (U64_MAX / PAGE_SIZE)) -+ /* 64-bit address range is the max */ -+ goto bad_size; ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_lock(&js_devdata->runpool_mutex); + -+ /* calculate the number of pages this alias will cover */ -+ *num_pages = nents * stride; ++ release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, ++ katom_retained_state); + -+#ifdef CONFIG_64BIT -+ if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { -+ /* 64-bit tasks must MMAP anyway, but not expose this address to -+ * clients */ -+ *flags |= BASE_MEM_NEED_MMAP; -+ reg = kbase_alloc_free_region(kctx, 0, *num_pages, -+ KBASE_REG_ZONE_SAME_VA); -+ } else { -+#else -+ if (1) { -+#endif -+ reg = kbase_alloc_free_region(kctx, 0, *num_pages, -+ KBASE_REG_ZONE_CUSTOM_VA); -+ } ++ /* Drop the runpool mutex to allow requeing kctx */ ++ mutex_unlock(&js_devdata->runpool_mutex); ++ if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) ++ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true); + -+ if (!reg) -+ goto no_reg; ++ /* Drop the jsctx_mutex to allow scheduling in a new context */ ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + -+ /* zero-sized page array, as we don't need one/can support one */ -+ reg->gpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_ALIAS); -+ if (IS_ERR_OR_NULL(reg->gpu_alloc)) -+ goto no_alloc_obj; ++ /* NOTE: could return release_result if the caller would like to know ++ * whether it should schedule a new context, but currently no callers do ++ */ ++} + -+ reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); ++void kbase_js_set_timeouts(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (kbase_update_region_flags(kctx, reg, *flags) != 0) -+ goto invalid_flags; ++ kbase_backend_timeouts_changed(kbdev); ++} + -+ reg->gpu_alloc->imported.alias.nents = nents; -+ reg->gpu_alloc->imported.alias.stride = stride; -+ reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents); -+ if (!reg->gpu_alloc->imported.alias.aliased) -+ goto no_aliased_array; ++static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ ++ struct kbasep_js_device_data *js_devdata; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ struct kbase_as *new_address_space = NULL; ++ unsigned long flags; ++ bool kctx_suspended = false; ++ int as_nr; + -+ kbase_gpu_vm_lock(kctx); ++ js_devdata = &kbdev->js_data; ++ js_kctx_info = &kctx->jctx.sched_info; + -+ /* validate and add src handles */ -+ for (i = 0; i < nents; i++) { -+ if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) { -+ if (ai[i].handle.basep.handle != -+ BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE) -+ goto bad_handle; /* unsupported magic handle */ -+ if (!ai[i].length) -+ goto bad_handle; /* must be > 0 */ -+ if (ai[i].length > stride) -+ goto bad_handle; /* can't be larger than the -+ stride */ -+ reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; -+ } else { -+ struct kbase_va_region *aliasing_reg; -+ struct kbase_mem_phy_alloc *alloc; ++ /* Pick available address space for this context */ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ as_nr = kbase_ctx_sched_retain_ctx(kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ if (as_nr == KBASEP_AS_NR_INVALID) { ++ as_nr = kbase_backend_find_and_release_free_address_space( ++ kbdev, kctx); ++ if (as_nr != KBASEP_AS_NR_INVALID) { ++ /* Attempt to retain the context again, this should ++ * succeed */ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ as_nr = kbase_ctx_sched_retain_ctx(kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); + -+ aliasing_reg = kbase_region_tracker_find_region_base_address( -+ kctx, -+ (ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT); ++ WARN_ON(as_nr == KBASEP_AS_NR_INVALID); ++ } ++ } ++ if (as_nr == KBASEP_AS_NR_INVALID) ++ return false; /* No address spaces currently available */ + -+ /* validate found region */ -+ if (!aliasing_reg) -+ goto bad_handle; /* Not found */ -+ if (aliasing_reg->flags & KBASE_REG_FREE) -+ goto bad_handle; /* Free region */ -+ if (aliasing_reg->flags & KBASE_REG_DONT_NEED) -+ goto bad_handle; /* Ephemeral region */ -+ if (!aliasing_reg->gpu_alloc) -+ goto bad_handle; /* No alloc */ -+ if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) -+ goto bad_handle; /* Not a native alloc */ -+ if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0)) -+ goto bad_handle; -+ /* Non-coherent memory cannot alias -+ coherent memory, and vice versa.*/ ++ new_address_space = &kbdev->as[as_nr]; + -+ /* check size against stride */ -+ if (!ai[i].length) -+ goto bad_handle; /* must be > 0 */ -+ if (ai[i].length > stride) -+ goto bad_handle; /* can't be larger than the -+ stride */ ++ /* ++ * Atomic transaction on the Context and Run Pool begins ++ */ ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_lock(&js_devdata->runpool_mutex); ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ alloc = aliasing_reg->gpu_alloc; ++ /* Check to see if context is dying due to kbase_job_zap_context() */ ++ if (kbase_ctx_flag(kctx, KCTX_DYING)) { ++ /* Roll back the transaction so far and return */ ++ kbase_ctx_sched_release_ctx(kctx); + -+ /* check against the alloc's size */ -+ if (ai[i].offset > alloc->nents) -+ goto bad_handle; /* beyond end */ -+ if (ai[i].offset + ai[i].length > alloc->nents) -+ goto bad_handle; /* beyond end */ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + -+ reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc); -+ reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; -+ reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset; -+ } ++ return false; + } + -+#ifdef CONFIG_64BIT -+ if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { -+ /* Bind to a cookie */ -+ if (!kctx->cookies) { -+ dev_err(kctx->kbdev->dev, "No cookies available for allocation!"); -+ goto no_cookie; -+ } -+ /* return a cookie */ -+ gpu_va = __ffs(kctx->cookies); -+ kctx->cookies &= ~(1UL << gpu_va); -+ BUG_ON(kctx->pending_regions[gpu_va]); -+ kctx->pending_regions[gpu_va] = reg; ++ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL, ++ 0u, ++ kbasep_js_trace_get_refcnt(kbdev, kctx)); + -+ /* relocate to correct base */ -+ gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); -+ gpu_va <<= PAGE_SHIFT; -+ } else /* we control the VA */ { -+#else -+ if (1) { -+#endif -+ if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) { -+ dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU"); -+ goto no_mmap; -+ } -+ /* return real GPU VA */ -+ gpu_va = reg->start_pfn << PAGE_SHIFT; -+ } ++ kbase_ctx_flag_set(kctx, KCTX_SCHEDULED); + -+ reg->flags &= ~KBASE_REG_FREE; -+ reg->flags &= ~KBASE_REG_GROWABLE; ++ /* Assign context to previously chosen address space */ ++ if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) { ++ /* Roll back the transaction so far and return */ ++ kbase_ctx_sched_release_ctx(kctx); ++ kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); + -+ kbase_gpu_vm_unlock(kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + -+ return gpu_va; ++ return false; ++ } + -+#ifdef CONFIG_64BIT -+no_cookie: ++ kbdev->hwaccess.active_kctx = kctx; ++ ++#if defined(CONFIG_MALI_GATOR_SUPPORT) ++ kbase_trace_mali_mmu_as_in_use(kctx->as_nr); +#endif -+no_mmap: -+bad_handle: -+ kbase_gpu_vm_unlock(kctx); -+no_aliased_array: -+invalid_flags: -+ kbase_mem_phy_alloc_put(reg->cpu_alloc); -+ kbase_mem_phy_alloc_put(reg->gpu_alloc); -+no_alloc_obj: -+ kfree(reg); -+no_reg: -+bad_size: -+bad_nents: -+bad_stride: -+bad_flags: -+ return 0; -+} ++ KBASE_TLSTREAM_TL_RET_AS_CTX(&kbdev->as[kctx->as_nr], kctx); + -+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, -+ void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, -+ u64 *flags) -+{ -+ struct kbase_va_region *reg; ++ /* Cause any future waiter-on-termination to wait until the context is ++ * descheduled */ ++ wake_up(&js_kctx_info->ctx.is_scheduled_wait); + -+ KBASE_DEBUG_ASSERT(kctx); -+ KBASE_DEBUG_ASSERT(gpu_va); -+ KBASE_DEBUG_ASSERT(va_pages); -+ KBASE_DEBUG_ASSERT(flags); ++ /* Re-check for suspending: a suspend could've occurred, and all the ++ * contexts could've been removed from the runpool before we took this ++ * lock. In this case, we don't want to allow this context to run jobs, ++ * we just want it out immediately. ++ * ++ * The DMB required to read the suspend flag was issued recently as part ++ * of the hwaccess_lock locking. If a suspend occurs *after* that lock ++ * was taken (i.e. this condition doesn't execute), then the ++ * kbasep_js_suspend() code will cleanup this context instead (by virtue ++ * of it being called strictly after the suspend flag is set, and will ++ * wait for this lock to drop) */ ++ if (kbase_pm_is_suspending(kbdev)) { ++ /* Cause it to leave at some later point */ ++ bool retained; + -+#ifdef CONFIG_64BIT -+ if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) -+ *flags |= BASE_MEM_SAME_VA; -+#endif ++ retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); ++ KBASE_DEBUG_ASSERT(retained); + -+ if (!kbase_check_import_flags(*flags)) { -+ dev_warn(kctx->kbdev->dev, -+ "kbase_mem_import called with bad flags (%llx)", -+ (unsigned long long)*flags); -+ goto bad_flags; ++ kbasep_js_clear_submit_allowed(js_devdata, kctx); ++ kctx_suspended = true; + } + -+ if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && -+ !kbase_device_is_cpu_coherent(kctx->kbdev)) { -+ dev_warn(kctx->kbdev->dev, -+ "kbase_mem_import call required coherent mem when unavailable"); -+ goto bad_flags; -+ } -+ if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && -+ !kbase_device_is_cpu_coherent(kctx->kbdev)) { -+ /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ -+ *flags &= ~BASE_MEM_COHERENT_SYSTEM; -+ } ++ /* Transaction complete */ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); + -+ if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) { -+ dev_warn(kctx->kbdev->dev, -+ "padding is only supported for UMM"); -+ goto bad_flags; -+ } ++ /* Synchronize with any timers */ ++ kbase_backend_ctx_count_changed(kbdev); + -+ switch (type) { -+#ifdef CONFIG_UMP -+ case BASE_MEM_IMPORT_TYPE_UMP: { -+ ump_secure_id id; ++ mutex_unlock(&js_devdata->runpool_mutex); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ /* Note: after this point, the context could potentially get scheduled ++ * out immediately */ + -+ if (get_user(id, (ump_secure_id __user *)phandle)) -+ reg = NULL; -+ else -+ reg = kbase_mem_from_ump(kctx, id, va_pages, flags); ++ if (kctx_suspended) { ++ /* Finishing forcing out the context due to a suspend. Use a ++ * variant of kbasep_js_runpool_release_ctx() that doesn't ++ * schedule a new context, to prevent a risk of recursion back ++ * into this function */ ++ kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx); ++ return false; + } -+ break; -+#endif /* CONFIG_UMP */ -+#ifdef CONFIG_DMA_SHARED_BUFFER -+ case BASE_MEM_IMPORT_TYPE_UMM: { -+ int fd; ++ return true; ++} + -+ if (get_user(fd, (int __user *)phandle)) -+ reg = NULL; -+ else -+ reg = kbase_mem_from_umm(kctx, fd, va_pages, flags, -+ padding); -+ } -+ break; -+#endif /* CONFIG_DMA_SHARED_BUFFER */ -+ case BASE_MEM_IMPORT_TYPE_USER_BUFFER: { -+ struct base_mem_import_user_buffer user_buffer; -+ void __user *uptr; ++static bool kbase_js_use_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ ++ unsigned long flags; + -+ if (copy_from_user(&user_buffer, phandle, -+ sizeof(user_buffer))) { -+ reg = NULL; -+ } else { -+#ifdef CONFIG_COMPAT -+ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) -+ uptr = compat_ptr(user_buffer.ptr.compat_value); -+ else -+#endif -+ uptr = user_buffer.ptr.value; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ reg = kbase_mem_from_user_buffer(kctx, -+ (unsigned long)uptr, user_buffer.length, -+ va_pages, flags); -+ } -+ break; -+ } -+ default: { -+ reg = NULL; -+ break; -+ } ++ if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && ++ kbase_backend_use_ctx_sched(kbdev, kctx)) { ++ /* Context already has ASID - mark as active */ ++ kbdev->hwaccess.active_kctx = kctx; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return true; /* Context already scheduled */ + } + -+ if (!reg) -+ goto no_reg; -+ -+ kbase_gpu_vm_lock(kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ /* mmap needed to setup VA? */ -+ if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) { -+ /* Bind to a cookie */ -+ if (!kctx->cookies) -+ goto no_cookie; -+ /* return a cookie */ -+ *gpu_va = __ffs(kctx->cookies); -+ kctx->cookies &= ~(1UL << *gpu_va); -+ BUG_ON(kctx->pending_regions[*gpu_va]); -+ kctx->pending_regions[*gpu_va] = reg; ++ return kbasep_js_schedule_ctx(kbdev, kctx); ++} + -+ /* relocate to correct base */ -+ *gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); -+ *gpu_va <<= PAGE_SHIFT; ++void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ ++ struct kbasep_js_kctx_info *js_kctx_info; ++ struct kbasep_js_device_data *js_devdata; ++ bool is_scheduled; + -+ } else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES) { -+ /* we control the VA, mmap now to the GPU */ -+ if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0) -+ goto no_gpu_va; -+ /* return real GPU VA */ -+ *gpu_va = reg->start_pfn << PAGE_SHIFT; -+ } else { -+ /* we control the VA, but nothing to mmap yet */ -+ if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0) -+ goto no_gpu_va; -+ /* return real GPU VA */ -+ *gpu_va = reg->start_pfn << PAGE_SHIFT; -+ } ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); + -+ /* clear out private flags */ -+ *flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1); ++ js_devdata = &kbdev->js_data; ++ js_kctx_info = &kctx->jctx.sched_info; + -+ kbase_gpu_vm_unlock(kctx); ++ /* This must never be attempted whilst suspending - i.e. it should only ++ * happen in response to a syscall from a user-space thread */ ++ BUG_ON(kbase_pm_is_suspending(kbdev)); + -+ return 0; ++ mutex_lock(&js_devdata->queue_mutex); ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + -+no_gpu_va: -+no_cookie: -+ kbase_gpu_vm_unlock(kctx); -+ kbase_mem_phy_alloc_put(reg->cpu_alloc); -+ kbase_mem_phy_alloc_put(reg->gpu_alloc); -+ kfree(reg); -+no_reg: -+bad_flags: -+ *gpu_va = 0; -+ *va_pages = 0; -+ *flags = 0; -+ return -ENOMEM; -+} ++ /* Mark the context as privileged */ ++ kbase_ctx_flag_set(kctx, KCTX_PRIVILEGED); + -+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, -+ struct kbase_va_region *reg, -+ u64 new_pages, u64 old_pages) -+{ -+ phys_addr_t *phy_pages; -+ u64 delta = new_pages - old_pages; -+ int ret = 0; ++ is_scheduled = kbase_ctx_flag(kctx, KCTX_SCHEDULED); ++ if (!is_scheduled) { ++ /* Add the context to the pullable list */ ++ if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0)) ++ kbase_js_sync_timers(kbdev); + -+ lockdep_assert_held(&kctx->reg_lock); ++ /* Fast-starting requires the jsctx_mutex to be dropped, ++ * because it works on multiple ctxs */ ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_unlock(&js_devdata->queue_mutex); + -+ /* Map the new pages into the GPU */ -+ phy_pages = kbase_get_gpu_phy_pages(reg); -+ ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages, -+ phy_pages + old_pages, delta, reg->flags); ++ /* Try to schedule the context in */ ++ kbase_js_sched_all(kbdev); + -+ return ret; ++ /* Wait for the context to be scheduled in */ ++ wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, ++ kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++ } else { ++ /* Already scheduled in - We need to retain it to keep the ++ * corresponding address space */ ++ kbasep_js_runpool_retain_ctx(kbdev, kctx); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_unlock(&js_devdata->queue_mutex); ++ } +} ++KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx); + -+static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, -+ struct kbase_va_region *reg, -+ u64 new_pages, u64 old_pages) ++void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, ++ struct kbase_context *kctx) +{ -+ u64 gpu_va_start = reg->start_pfn; -+ -+ if (new_pages == old_pages) -+ /* Nothing to do */ -+ return; ++ struct kbasep_js_kctx_info *js_kctx_info; + -+ unmap_mapping_range(kctx->filp->f_inode->i_mapping, -+ (gpu_va_start + new_pages)<jctx.sched_info; + -+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, -+ struct kbase_va_region *reg, -+ u64 new_pages, u64 old_pages) -+{ -+ u64 delta = old_pages - new_pages; -+ int ret = 0; ++ /* We don't need to use the address space anymore */ ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ kbase_ctx_flag_clear(kctx, KCTX_PRIVILEGED); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + -+ ret = kbase_mmu_teardown_pages(kctx, -+ reg->start_pfn + new_pages, delta); ++ /* Release the context - it will be scheduled out */ ++ kbasep_js_runpool_release_ctx(kbdev, kctx); + -+ return ret; ++ kbase_js_sched_all(kbdev); +} ++KBASE_EXPORT_TEST_API(kbasep_js_release_privileged_ctx); + -+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) ++void kbasep_js_suspend(struct kbase_device *kbdev) +{ -+ u64 old_pages; -+ u64 delta; -+ int res = -EINVAL; -+ struct kbase_va_region *reg; -+ bool read_locked = false; -+ -+ KBASE_DEBUG_ASSERT(kctx); -+ KBASE_DEBUG_ASSERT(gpu_addr != 0); ++ unsigned long flags; ++ struct kbasep_js_device_data *js_devdata; ++ int i; ++ u16 retained = 0u; ++ int nr_privileged_ctx = 0; + -+ if (gpu_addr & ~PAGE_MASK) { -+ dev_warn(kctx->kbdev->dev, "kbase:mem_commit: gpu_addr: passed parameter is invalid"); -+ return -EINVAL; -+ } ++ KBASE_DEBUG_ASSERT(kbdev); ++ KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev)); ++ js_devdata = &kbdev->js_data; + -+ down_write(¤t->mm->mmap_lock); -+ kbase_gpu_vm_lock(kctx); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ /* Validate the region */ -+ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); -+ if (!reg || (reg->flags & KBASE_REG_FREE)) -+ goto out_unlock; ++ /* Prevent all contexts from submitting */ ++ js_devdata->runpool_irq.submit_allowed = 0; + -+ KBASE_DEBUG_ASSERT(reg->cpu_alloc); -+ KBASE_DEBUG_ASSERT(reg->gpu_alloc); ++ /* Retain each of the contexts, so we can cause it to leave even if it ++ * had no refcount to begin with */ ++ for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) { ++ struct kbase_context *kctx = kbdev->as_to_kctx[i]; + -+ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) -+ goto out_unlock; ++ retained = retained << 1; + -+ if (0 == (reg->flags & KBASE_REG_GROWABLE)) -+ goto out_unlock; ++ if (kctx) { ++ kbase_ctx_sched_retain_ctx_refcount(kctx); ++ retained |= 1u; ++ /* We can only cope with up to 1 privileged context - ++ * the instrumented context. It'll be suspended by ++ * disabling instrumentation */ ++ if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) { ++ ++nr_privileged_ctx; ++ WARN_ON(nr_privileged_ctx != 1); ++ } ++ } ++ } ++ CSTD_UNUSED(nr_privileged_ctx); + -+ /* Would overflow the VA region */ -+ if (new_pages > reg->nr_pages) -+ goto out_unlock; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ /* can't be mapped more than once on the GPU */ -+ if (atomic_read(®->gpu_alloc->gpu_mappings) > 1) -+ goto out_unlock; -+ /* can't grow regions which are ephemeral */ -+ if (reg->flags & KBASE_REG_DONT_NEED) -+ goto out_unlock; ++ /* De-ref the previous retain to ensure each context gets pulled out ++ * sometime later. */ ++ for (i = 0; ++ i < BASE_MAX_NR_AS; ++ ++i, retained = retained >> 1) { ++ struct kbase_context *kctx = kbdev->as_to_kctx[i]; + -+ if (new_pages == reg->gpu_alloc->nents) { -+ /* no change */ -+ res = 0; -+ goto out_unlock; ++ if (retained & 1u) ++ kbasep_js_runpool_release_ctx(kbdev, kctx); + } + -+ old_pages = kbase_reg_current_backed_size(reg); -+ if (new_pages > old_pages) { -+ delta = new_pages - old_pages; ++ /* Caller must wait for all Power Manager active references to be ++ * dropped */ ++} + -+ /* -+ * No update to the mm so downgrade the writer lock to a read -+ * lock so other readers aren't blocked after this point. -+ */ -+ downgrade_write(¤t->mm->mmap_lock); -+ read_locked = true; ++void kbasep_js_resume(struct kbase_device *kbdev) ++{ ++ struct kbasep_js_device_data *js_devdata; ++ int js; + -+ /* Allocate some more pages */ -+ if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) { -+ res = -ENOMEM; -+ goto out_unlock; -+ } -+ if (reg->cpu_alloc != reg->gpu_alloc) { -+ if (kbase_alloc_phy_pages_helper( -+ reg->gpu_alloc, delta) != 0) { -+ res = -ENOMEM; -+ kbase_free_phy_pages_helper(reg->cpu_alloc, -+ delta); -+ goto out_unlock; -+ } -+ } ++ KBASE_DEBUG_ASSERT(kbdev); ++ js_devdata = &kbdev->js_data; ++ KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); + -+ /* No update required for CPU mappings, that's done on fault. */ ++ mutex_lock(&js_devdata->queue_mutex); ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ struct kbase_context *kctx, *n; + -+ /* Update GPU mapping. */ -+ res = kbase_mem_grow_gpu_mapping(kctx, reg, -+ new_pages, old_pages); ++ list_for_each_entry_safe(kctx, n, ++ &kbdev->js_data.ctx_list_unpullable[js], ++ jctx.sched_info.ctx.ctx_list_entry[js]) { ++ struct kbasep_js_kctx_info *js_kctx_info; ++ unsigned long flags; ++ bool timer_sync = false; + -+ /* On error free the new pages */ -+ if (res) { -+ kbase_free_phy_pages_helper(reg->cpu_alloc, delta); -+ if (reg->cpu_alloc != reg->gpu_alloc) -+ kbase_free_phy_pages_helper(reg->gpu_alloc, -+ delta); -+ res = -ENOMEM; -+ goto out_unlock; -+ } -+ } else { -+ delta = old_pages - new_pages; ++ js_kctx_info = &kctx->jctx.sched_info; + -+ /* Update all CPU mapping(s) */ -+ kbase_mem_shrink_cpu_mapping(kctx, reg, -+ new_pages, old_pages); ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_lock(&js_devdata->runpool_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ /* Update the GPU mapping */ -+ res = kbase_mem_shrink_gpu_mapping(kctx, reg, -+ new_pages, old_pages); -+ if (res) { -+ res = -ENOMEM; -+ goto out_unlock; ++ if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && ++ kbase_js_ctx_pullable(kctx, js, false)) ++ timer_sync = ++ kbase_js_ctx_list_add_pullable_nolock( ++ kbdev, kctx, js); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (timer_sync) ++ kbase_backend_ctx_count_changed(kbdev); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + } -+ -+ kbase_free_phy_pages_helper(reg->cpu_alloc, delta); -+ if (reg->cpu_alloc != reg->gpu_alloc) -+ kbase_free_phy_pages_helper(reg->gpu_alloc, delta); + } ++ mutex_unlock(&js_devdata->queue_mutex); + -+out_unlock: -+ kbase_gpu_vm_unlock(kctx); -+ if (read_locked) -+ up_read(¤t->mm->mmap_lock); -+ else -+ up_write(¤t->mm->mmap_lock); ++ /* Restart atom processing */ ++ kbase_js_sched_all(kbdev); + -+ return res; ++ /* JS Resume complete */ +} + -+static void kbase_cpu_vm_open(struct vm_area_struct *vma) ++bool kbase_js_is_atom_valid(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom) +{ -+ struct kbase_cpu_mapping *map = vma->vm_private_data; ++ if ((katom->core_req & BASE_JD_REQ_FS) && ++ (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | ++ BASE_JD_REQ_T))) ++ return false; + -+ KBASE_DEBUG_ASSERT(map); -+ KBASE_DEBUG_ASSERT(map->count > 0); -+ /* non-atomic as we're under Linux' mm lock */ -+ map->count++; ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987) && ++ (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) && ++ (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_T))) ++ return false; ++ ++ return true; +} + -+static void kbase_cpu_vm_close(struct vm_area_struct *vma) ++static int kbase_js_get_slot(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom) +{ -+ struct kbase_cpu_mapping *map = vma->vm_private_data; ++ if (katom->core_req & BASE_JD_REQ_FS) ++ return 0; + -+ KBASE_DEBUG_ASSERT(map); -+ KBASE_DEBUG_ASSERT(map->count > 0); ++ if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { ++ if (katom->device_nr == 1 && ++ kbdev->gpu_props.num_core_groups == 2) ++ return 2; ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) ++ return 2; ++ } + -+ /* non-atomic as we're under Linux' mm lock */ -+ if (--map->count) -+ return; ++ return 1; ++} + -+ KBASE_DEBUG_ASSERT(map->kctx); -+ KBASE_DEBUG_ASSERT(map->alloc); ++bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, ++ struct kbase_jd_atom *katom) ++{ ++ bool enqueue_required; + -+ kbase_gpu_vm_lock(map->kctx); ++ katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom); + -+ if (map->free_on_close) { -+ KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) == -+ KBASE_REG_ZONE_SAME_VA); -+ /* Avoid freeing memory on the process death which results in -+ * GPU Page Fault. Memory will be freed in kbase_destroy_context -+ */ -+ if (!(current->flags & PF_EXITING)) -+ kbase_mem_free_region(map->kctx, map->region); ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ lockdep_assert_held(&kctx->jctx.lock); ++ ++ /* If slot will transition from unpullable to pullable then add to ++ * pullable list */ ++ if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) { ++ enqueue_required = true; ++ } else { ++ enqueue_required = false; + } ++ if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) || ++ (katom->pre_dep && (katom->pre_dep->atom_flags & ++ KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { ++ int prio = katom->sched_priority; ++ int js = katom->slot_nr; ++ struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + -+ list_del(&map->mappings_list); ++ list_add_tail(&katom->queue, &queue->x_dep_head); ++ katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; ++ enqueue_required = false; ++ } else { ++ /* Check if there are lower priority jobs to soft stop */ ++ kbase_job_slot_ctx_priority_check_locked(kctx, katom); + -+ kbase_gpu_vm_unlock(map->kctx); ++ /* Add atom to ring buffer. */ ++ jsctx_tree_add(kctx, katom); ++ katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; ++ } + -+ kbase_mem_phy_alloc_put(map->alloc); -+ kfree(map); ++ return enqueue_required; +} + -+KBASE_EXPORT_TEST_API(kbase_cpu_vm_close); ++/** ++ * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the ++ * runnable_tree, ready for execution ++ * @katom: Atom to submit ++ * ++ * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set, ++ * but is still present in the x_dep list. If @katom has a same-slot dependent ++ * atom then that atom (and any dependents) will also be moved. ++ */ ++static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) ++{ ++ lockdep_assert_held(&katom->kctx->kbdev->hwaccess_lock); + ++ while (katom) { ++ WARN_ON(!(katom->atom_flags & ++ KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); + -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)) -+static vm_fault_t kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -+{ -+#else -+static vm_fault_t kbase_cpu_vm_fault(struct vm_fault *vmf) -+{ -+ struct vm_area_struct *vma = vmf->vma; -+#endif -+ struct kbase_cpu_mapping *map = vma->vm_private_data; -+ pgoff_t rel_pgoff; -+ size_t i; -+ pgoff_t addr; -+ vm_fault_t ret = VM_FAULT_SIGBUS; ++ if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { ++ list_del(&katom->queue); ++ katom->atom_flags &= ++ ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; ++ jsctx_tree_add(katom->kctx, katom); ++ katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; ++ } else { ++ break; ++ } + -+ KBASE_DEBUG_ASSERT(map); -+ KBASE_DEBUG_ASSERT(map->count > 0); -+ KBASE_DEBUG_ASSERT(map->kctx); -+ KBASE_DEBUG_ASSERT(map->alloc); ++ katom = katom->post_dep; ++ } ++} + -+ rel_pgoff = vmf->pgoff - map->region->start_pfn; + -+ kbase_gpu_vm_lock(map->kctx); -+ if (rel_pgoff >= map->alloc->nents) -+ goto locked_bad_fault; ++/** ++ * kbase_js_evict_deps - Evict dependencies of a failed atom. ++ * @kctx: Context pointer ++ * @katom: Pointer to the atom that has failed. ++ * @js: The job slot the katom was run on. ++ * @prio: Priority of the katom. ++ * ++ * Remove all post dependencies of an atom from the context ringbuffers. ++ * ++ * The original atom's event_code will be propogated to all dependent atoms. ++ * ++ * Context: Caller must hold the HW access lock ++ */ ++static void kbase_js_evict_deps(struct kbase_context *kctx, ++ struct kbase_jd_atom *katom, int js, int prio) ++{ ++ struct kbase_jd_atom *x_dep = katom->x_post_dep; ++ struct kbase_jd_atom *next_katom = katom->post_dep; + -+ /* Fault on access to DONT_NEED regions */ -+ if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED)) -+ goto locked_bad_fault; ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ /* insert all valid pages from the fault location */ -+ i = rel_pgoff; -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+ addr = (pgoff_t)((uintptr_t)vmf->virtual_address >> PAGE_SHIFT); -+#else -+ addr = (pgoff_t)(vmf->address >> PAGE_SHIFT); -+#endif -+ while (i < map->alloc->nents && (addr < vma->vm_end >> PAGE_SHIFT)) { -+ ret = vmf_insert_pfn(vma, addr << PAGE_SHIFT, -+ PFN_DOWN(map->alloc->pages[i])); -+ if (ret != VM_FAULT_NOPAGE) -+ goto locked_bad_fault; ++ if (next_katom) { ++ KBASE_DEBUG_ASSERT(next_katom->status != ++ KBASE_JD_ATOM_STATE_HW_COMPLETED); ++ next_katom->will_fail_event_code = katom->event_code; + -+ i++; addr++; + } + -+ kbase_gpu_vm_unlock(map->kctx); -+ /* we resolved it, nothing for VM to do */ -+ return VM_FAULT_NOPAGE; ++ /* Has cross slot depenency. */ ++ if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE | ++ KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { ++ /* Remove dependency.*/ ++ x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + -+locked_bad_fault: -+ kbase_gpu_vm_unlock(map->kctx); -+ return ret; ++ /* Fail if it had a data dependency. */ ++ if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) { ++ x_dep->will_fail_event_code = katom->event_code; ++ } ++ if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) ++ kbase_js_move_to_tree(x_dep); ++ } +} + -+const struct vm_operations_struct kbase_vm_ops = { -+ .open = kbase_cpu_vm_open, -+ .close = kbase_cpu_vm_close, -+ .fault = kbase_cpu_vm_fault -+}; -+ -+static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vma, void *kaddr, size_t nr_pages, unsigned long aligned_offset, int free_on_close) ++struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) +{ -+ struct kbase_cpu_mapping *map; -+ phys_addr_t *page_array; -+ int err = 0; -+ int i; -+ -+ map = kzalloc(sizeof(*map), GFP_KERNEL); -+ -+ if (!map) { -+ WARN_ON(1); -+ err = -ENOMEM; -+ goto out; -+ } ++ struct kbase_jd_atom *katom; ++ struct kbasep_js_device_data *js_devdata; ++ struct kbase_device *kbdev; ++ int pulled; + -+ /* -+ * VM_DONTCOPY - don't make this mapping available in fork'ed processes -+ * VM_DONTEXPAND - disable mremap on this region -+ * VM_IO - disables paging -+ * VM_DONTDUMP - Don't include in core dumps (3.7 only) -+ * VM_MIXEDMAP - Support mixing struct page*s and raw pfns. -+ * This is needed to support using the dedicated and -+ * the OS based memory backends together. -+ */ -+ /* -+ * This will need updating to propagate coherency flags -+ * See MIDBASE-1057 -+ */ ++ KBASE_DEBUG_ASSERT(kctx); + -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) -+ vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO); -+#else -+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO; -+#endif -+ vma->vm_ops = &kbase_vm_ops; -+ vma->vm_private_data = map; ++ kbdev = kctx->kbdev; + -+ page_array = kbase_get_cpu_phy_pages(reg); ++ js_devdata = &kbdev->js_data; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ if (!(reg->flags & KBASE_REG_CPU_CACHED) && -+ (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) { -+ /* We can't map vmalloc'd memory uncached. -+ * Other memory will have been returned from -+ * kbase_mem_pool which would be -+ * suitable for mapping uncached. -+ */ -+ BUG_ON(kaddr); -+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); -+ } ++ if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) ++ return NULL; ++ if (kbase_pm_is_suspending(kbdev)) ++ return NULL; + -+ if (!kaddr) { -+ unsigned long addr = vma->vm_start + aligned_offset; -+ u64 start_off = vma->vm_pgoff - reg->start_pfn + -+ (aligned_offset>>PAGE_SHIFT); ++ katom = jsctx_rb_peek(kctx, js); ++ if (!katom) ++ return NULL; ++ if (kctx->blocked_js[js][katom->sched_priority]) ++ return NULL; ++ if (atomic_read(&katom->blocked)) ++ return NULL; + -+ vm_flags_set(vma, VM_PFNMAP); -+ for (i = 0; i < nr_pages; i++) { -+ unsigned long pfn = PFN_DOWN(page_array[i + start_off]); -+ vm_fault_t ret; ++ /* Due to ordering restrictions when unpulling atoms on failure, we do ++ * not allow multiple runs of fail-dep atoms from the same context to be ++ * present on the same slot */ ++ if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) { ++ struct kbase_jd_atom *prev_atom = ++ kbase_backend_inspect_tail(kbdev, js); + -+ ret = vmf_insert_pfn(vma, addr, pfn); -+ if (WARN_ON(ret != VM_FAULT_NOPAGE)) { -+ if (ret == VM_FAULT_OOM) -+ err = -ENOMEM; -+ else -+ err = -EFAULT; -+ break; -+ } ++ if (prev_atom && prev_atom->kctx != kctx) ++ return NULL; ++ } + -+ addr += PAGE_SIZE; -+ } -+ } else { -+ WARN_ON(aligned_offset); -+ /* MIXEDMAP so we can vfree the kaddr early and not track it after map time */ -+ vm_flags_set(vma, VM_MIXEDMAP); -+ /* vmalloc remaping is easy... */ -+ err = remap_vmalloc_range(vma, kaddr, 0); -+ WARN_ON(err); ++ if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { ++ if (katom->x_pre_dep->gpu_rb_state == ++ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || ++ katom->x_pre_dep->will_fail_event_code) ++ return NULL; ++ if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && ++ kbase_backend_nr_atoms_on_slot(kbdev, js)) ++ return NULL; + } + -+ if (err) { -+ kfree(map); -+ goto out; ++ kbase_ctx_flag_set(kctx, KCTX_PULLED); ++ ++ pulled = atomic_inc_return(&kctx->atoms_pulled); ++ if (pulled == 1 && !kctx->slots_pullable) { ++ WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); ++ kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); ++ atomic_inc(&kbdev->js_data.nr_contexts_runnable); + } ++ atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]); ++ kctx->atoms_pulled_slot_pri[katom->slot_nr][katom->sched_priority]++; ++ jsctx_rb_pull(kctx, katom); + -+ map->region = reg; -+ map->free_on_close = free_on_close; -+ map->kctx = reg->kctx; -+ map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); -+ map->count = 1; /* start with one ref */ ++ kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); + -+ if (reg->flags & KBASE_REG_CPU_CACHED) -+ map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; ++ katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF; + -+ list_add(&map->mappings_list, &map->alloc->mappings); ++ katom->ticks = 0; + -+ out: -+ return err; ++ return katom; +} + -+static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kaddr) -+{ -+ struct kbase_va_region *new_reg; -+ u32 nr_pages; -+ size_t size; -+ int err = 0; -+ u32 *tb; -+ int owns_tb = 1; + -+ dev_dbg(kctx->kbdev->dev, "in %s\n", __func__); -+ size = (vma->vm_end - vma->vm_start); -+ nr_pages = size >> PAGE_SHIFT; ++static void js_return_worker(struct work_struct *data) ++{ ++ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, ++ work); ++ struct kbase_context *kctx = katom->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; ++ struct kbasep_js_atom_retained_state retained_state; ++ int js = katom->slot_nr; ++ int prio = katom->sched_priority; ++ bool timer_sync = false; ++ bool context_idle = false; ++ unsigned long flags; ++ base_jd_core_req core_req = katom->core_req; ++ u64 affinity = katom->affinity; ++ enum kbase_atom_coreref_state coreref_state = katom->coreref_state; + -+ if (!kctx->jctx.tb) { -+ KBASE_DEBUG_ASSERT(0 != size); -+ tb = vmalloc_user(size); ++ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(katom); + -+ if (NULL == tb) { -+ err = -ENOMEM; -+ goto out; -+ } ++ kbase_backend_complete_wq(kbdev, katom); + -+ err = kbase_device_trace_buffer_install(kctx, tb, size); -+ if (err) { -+ vfree(tb); -+ goto out; -+ } -+ } else { -+ err = -EINVAL; -+ goto out; -+ } ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) ++ kbase_as_poking_timer_release_atom(kbdev, kctx, katom); + -+ *kaddr = kctx->jctx.tb; ++ kbasep_js_atom_retained_state_copy(&retained_state, katom); + -+ new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA); -+ if (!new_reg) { -+ err = -ENOMEM; -+ WARN_ON(1); -+ goto out_no_region; -+ } ++ mutex_lock(&js_devdata->queue_mutex); ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + -+ new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_TB); -+ if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) { -+ err = -ENOMEM; -+ new_reg->cpu_alloc = NULL; -+ WARN_ON(1); -+ goto out_no_alloc; -+ } ++ atomic_dec(&kctx->atoms_pulled); ++ atomic_dec(&kctx->atoms_pulled_slot[js]); + -+ new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc); ++ atomic_dec(&katom->blocked); + -+ new_reg->cpu_alloc->imported.kctx = kctx; -+ new_reg->flags &= ~KBASE_REG_FREE; -+ new_reg->flags |= KBASE_REG_CPU_CACHED; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ /* alloc now owns the tb */ -+ owns_tb = 0; ++ kctx->atoms_pulled_slot_pri[js][katom->sched_priority]--; + -+ if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) { -+ err = -ENOMEM; -+ WARN_ON(1); -+ goto out_no_va_region; -+ } ++ if (!atomic_read(&kctx->atoms_pulled_slot[js]) && ++ jsctx_rb_none_to_pull(kctx, js)) ++ timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js); + -+ *reg = new_reg; ++ /* If this slot has been blocked due to soft-stopped atoms, and all ++ * atoms have now been processed, then unblock the slot */ ++ if (!kctx->atoms_pulled_slot_pri[js][prio] && ++ kctx->blocked_js[js][prio]) { ++ kctx->blocked_js[js][prio] = false; + -+ /* map read only, noexec */ -+ vm_flags_clear(vma, (VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC)); -+ /* the rest of the flags is added by the cpu_mmap handler */ ++ /* Only mark the slot as pullable if the context is not idle - ++ * that case is handled below */ ++ if (atomic_read(&kctx->atoms_pulled) && ++ kbase_js_ctx_pullable(kctx, js, true)) ++ timer_sync |= kbase_js_ctx_list_add_pullable_nolock( ++ kbdev, kctx, js); ++ } + -+ dev_dbg(kctx->kbdev->dev, "%s done\n", __func__); -+ return 0; ++ if (!atomic_read(&kctx->atoms_pulled)) { ++ if (!kctx->slots_pullable) { ++ WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); ++ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); ++ atomic_dec(&kbdev->js_data.nr_contexts_runnable); ++ timer_sync = true; ++ } + -+out_no_va_region: -+out_no_alloc: -+ kbase_free_alloced_region(new_reg); -+out_no_region: -+ if (owns_tb) { -+ kbase_device_trace_buffer_uninstall(kctx); -+ vfree(tb); -+ } -+out: -+ return err; -+} ++ if (kctx->as_nr != KBASEP_AS_NR_INVALID && ++ !kbase_ctx_flag(kctx, KCTX_DYING)) { ++ int num_slots = kbdev->gpu_props.num_job_slots; ++ int slot; + -+static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr) -+{ -+ struct kbase_va_region *new_reg; -+ void *kaddr; -+ u32 nr_pages; -+ size_t size; -+ int err = 0; ++ if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) ++ kbasep_js_set_submit_allowed(js_devdata, kctx); + -+ dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n"); -+ size = (vma->vm_end - vma->vm_start); -+ nr_pages = size >> PAGE_SHIFT; ++ for (slot = 0; slot < num_slots; slot++) { ++ if (kbase_js_ctx_pullable(kctx, slot, true)) ++ timer_sync |= ++ kbase_js_ctx_list_add_pullable_nolock( ++ kbdev, kctx, slot); ++ } ++ } + -+ kaddr = kbase_mmu_dump(kctx, nr_pages); ++ kbase_jm_idle_ctx(kbdev, kctx); + -+ if (!kaddr) { -+ err = -ENOMEM; -+ goto out; ++ context_idle = true; + } + -+ new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA); -+ if (!new_reg) { -+ err = -ENOMEM; -+ WARN_ON(1); -+ goto out; -+ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_RAW); -+ if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) { -+ err = -ENOMEM; -+ new_reg->cpu_alloc = NULL; -+ WARN_ON(1); -+ goto out_no_alloc; ++ if (context_idle) { ++ WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); ++ kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); ++ kbase_pm_context_idle(kbdev); + } + -+ new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc); ++ if (timer_sync) ++ kbase_js_sync_timers(kbdev); + -+ new_reg->flags &= ~KBASE_REG_FREE; -+ new_reg->flags |= KBASE_REG_CPU_CACHED; -+ if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) { -+ err = -ENOMEM; -+ WARN_ON(1); -+ goto out_va_region; -+ } ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_unlock(&js_devdata->queue_mutex); + -+ *kmap_addr = kaddr; -+ *reg = new_reg; ++ katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF; ++ kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, ++ &retained_state); + -+ dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n"); -+ return 0; ++ kbase_js_sched_all(kbdev); + -+out_no_alloc: -+out_va_region: -+ kbase_free_alloced_region(new_reg); -+out: -+ return err; ++ kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity, ++ coreref_state); +} + -+ -+void kbase_os_mem_map_lock(struct kbase_context *kctx) ++void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ -+ struct mm_struct *mm = current->mm; -+ (void)kctx; -+ down_read(&mm->mmap_lock); ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ ++ jsctx_rb_unpull(kctx, katom); ++ ++ WARN_ON(work_pending(&katom->work)); ++ ++ /* Block re-submission until workqueue has run */ ++ atomic_inc(&katom->blocked); ++ ++ kbase_job_check_leave_disjoint(kctx->kbdev, katom); ++ ++ KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); ++ INIT_WORK(&katom->work, js_return_worker); ++ queue_work(kctx->jctx.job_done_wq, &katom->work); +} + -+void kbase_os_mem_map_unlock(struct kbase_context *kctx) ++bool kbase_js_complete_atom_wq(struct kbase_context *kctx, ++ struct kbase_jd_atom *katom) +{ -+ struct mm_struct *mm = current->mm; -+ (void)kctx; -+ up_read(&mm->mmap_lock); -+} ++ struct kbasep_js_kctx_info *js_kctx_info; ++ struct kbasep_js_device_data *js_devdata; ++ struct kbase_device *kbdev; ++ unsigned long flags; ++ bool timer_sync = false; ++ int atom_slot; ++ bool context_idle = false; ++ int prio = katom->sched_priority; + -+static int kbasep_reg_mmap(struct kbase_context *kctx, -+ struct vm_area_struct *vma, -+ struct kbase_va_region **regm, -+ size_t *nr_pages, size_t *aligned_offset) ++ kbdev = kctx->kbdev; ++ atom_slot = katom->slot_nr; + -+{ -+ int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); -+ struct kbase_va_region *reg; -+ int err = 0; ++ js_kctx_info = &kctx->jctx.sched_info; ++ js_devdata = &kbdev->js_data; + -+ *aligned_offset = 0; ++ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + -+ dev_dbg(kctx->kbdev->dev, "in kbasep_reg_mmap\n"); ++ mutex_lock(&js_devdata->runpool_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ /* SAME_VA stuff, fetch the right region */ -+ reg = kctx->pending_regions[cookie]; -+ if (!reg) { -+ err = -ENOMEM; -+ goto out; -+ } ++ if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { ++ context_idle = !atomic_dec_return(&kctx->atoms_pulled); ++ atomic_dec(&kctx->atoms_pulled_slot[atom_slot]); ++ kctx->atoms_pulled_slot_pri[atom_slot][prio]--; + -+ if ((reg->flags & KBASE_REG_GPU_NX) && (reg->nr_pages != *nr_pages)) { -+ /* incorrect mmap size */ -+ /* leave the cookie for a potential later -+ * mapping, or to be reclaimed later when the -+ * context is freed */ -+ err = -ENOMEM; -+ goto out; ++ if (!atomic_read(&kctx->atoms_pulled) && ++ !kctx->slots_pullable) { ++ WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); ++ kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); ++ atomic_dec(&kbdev->js_data.nr_contexts_runnable); ++ timer_sync = true; ++ } ++ ++ /* If this slot has been blocked due to soft-stopped atoms, and ++ * all atoms have now been processed, then unblock the slot */ ++ if (!kctx->atoms_pulled_slot_pri[atom_slot][prio] ++ && kctx->blocked_js[atom_slot][prio]) { ++ kctx->blocked_js[atom_slot][prio] = false; ++ if (kbase_js_ctx_pullable(kctx, atom_slot, true)) ++ timer_sync |= ++ kbase_js_ctx_list_add_pullable_nolock( ++ kbdev, kctx, atom_slot); ++ } + } ++ WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)); + -+ if ((vma->vm_flags & VM_READ && !(reg->flags & KBASE_REG_CPU_RD)) || -+ (vma->vm_flags & VM_WRITE && !(reg->flags & KBASE_REG_CPU_WR))) { -+ /* VM flags inconsistent with region flags */ -+ err = -EPERM; -+ dev_err(kctx->kbdev->dev, "%s:%d inconsistent VM flags\n", -+ __FILE__, __LINE__); -+ goto out; ++ if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) && ++ jsctx_rb_none_to_pull(kctx, atom_slot)) { ++ if (!list_empty( ++ &kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot])) ++ timer_sync |= kbase_js_ctx_list_remove_nolock( ++ kctx->kbdev, kctx, atom_slot); + } + -+ /* adjust down nr_pages to what we have physically */ -+ *nr_pages = kbase_reg_current_backed_size(reg); ++ /* ++ * If submission is disabled on this context (most likely due to an ++ * atom failure) and there are now no atoms left in the system then ++ * re-enable submission so that context can be scheduled again. ++ */ ++ if (!kbasep_js_is_submit_allowed(js_devdata, kctx) && ++ !atomic_read(&kctx->atoms_pulled) && ++ !kbase_ctx_flag(kctx, KCTX_DYING)) { ++ int js; + -+ if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset, -+ reg->nr_pages, 1) != 0) { -+ dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__); -+ /* Unable to map in GPU space. */ -+ WARN_ON(1); -+ err = -ENOMEM; -+ goto out; ++ kbasep_js_set_submit_allowed(js_devdata, kctx); ++ ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ if (kbase_js_ctx_pullable(kctx, js, true)) ++ timer_sync |= ++ kbase_js_ctx_list_add_pullable_nolock( ++ kbdev, kctx, js); ++ } ++ } else if (katom->x_post_dep && ++ kbasep_js_is_submit_allowed(js_devdata, kctx)) { ++ int js; ++ ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ if (kbase_js_ctx_pullable(kctx, js, true)) ++ timer_sync |= ++ kbase_js_ctx_list_add_pullable_nolock( ++ kbdev, kctx, js); ++ } + } -+ /* no need for the cookie anymore */ -+ kctx->pending_regions[cookie] = NULL; -+ kctx->cookies |= (1UL << cookie); + -+ /* -+ * Overwrite the offset with the region start_pfn, so we effectively -+ * map from offset 0 in the region. However subtract the aligned -+ * offset so that when user space trims the mapping the beginning of -+ * the trimmed VMA has the correct vm_pgoff; ++ /* Mark context as inactive. The pm reference will be dropped later in ++ * jd_done_worker(). + */ -+ vma->vm_pgoff = reg->start_pfn - ((*aligned_offset)>>PAGE_SHIFT); -+out: -+ *regm = reg; -+ dev_dbg(kctx->kbdev->dev, "kbasep_reg_mmap done\n"); ++ if (context_idle) ++ kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); + -+ return err; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (timer_sync) ++ kbase_backend_ctx_count_changed(kbdev); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ ++ return context_idle; +} + -+int kbase_mmap(struct file *file, struct vm_area_struct *vma) ++struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, ++ ktime_t *end_timestamp) +{ -+ struct kbase_context *kctx = file->private_data; -+ struct kbase_va_region *reg = NULL; -+ void *kaddr = NULL; -+ size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; -+ int err = 0; -+ int free_on_close = 0; -+ struct device *dev = kctx->kbdev->dev; -+ size_t aligned_offset = 0; ++ u64 microseconds_spent = 0; ++ struct kbase_device *kbdev; ++ struct kbase_context *kctx = katom->kctx; ++ struct kbase_jd_atom *x_dep = katom->x_post_dep; + -+ dev_dbg(dev, "kbase_mmap\n"); ++ kbdev = kctx->kbdev; + -+ /* strip away corresponding VM_MAY% flags to the VM_% flags requested */ -+ vm_flags_clear(vma, ((vma->vm_flags & (VM_READ | VM_WRITE)) << 4)); + -+ if (0 == nr_pages) { -+ err = -EINVAL; -+ goto out; -+ } ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ if (!(vma->vm_flags & VM_SHARED)) { -+ err = -EINVAL; -+ goto out; -+ } ++ if (katom->will_fail_event_code) ++ katom->event_code = katom->will_fail_event_code; + -+ kbase_gpu_vm_lock(kctx); ++ katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED; + -+ if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) { -+ /* The non-mapped tracking helper page */ -+ err = kbase_tracking_page_setup(kctx, vma); -+ goto out_unlock; ++ if (katom->event_code != BASE_JD_EVENT_DONE) { ++ kbase_js_evict_deps(kctx, katom, katom->slot_nr, ++ katom->sched_priority); + } + -+ /* if not the MTP, verify that the MTP has been mapped */ -+ rcu_read_lock(); -+ /* catches both when the special page isn't present or -+ * when we've forked */ -+ if (rcu_dereference(kctx->process_mm) != current->mm) { -+ err = -EINVAL; -+ rcu_read_unlock(); -+ goto out_unlock; -+ } -+ rcu_read_unlock(); ++#if defined(CONFIG_MALI_GATOR_SUPPORT) ++ kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP, ++ katom->slot_nr), NULL, 0); ++#endif + -+ switch (vma->vm_pgoff) { -+ case PFN_DOWN(BASEP_MEM_INVALID_HANDLE): -+ case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE): -+ /* Illegal handle for direct map */ -+ err = -EINVAL; -+ goto out_unlock; -+ case PFN_DOWN(BASE_MEM_TRACE_BUFFER_HANDLE): -+ err = kbase_trace_buffer_mmap(kctx, vma, ®, &kaddr); -+ if (0 != err) -+ goto out_unlock; -+ dev_dbg(dev, "kbase_trace_buffer_mmap ok\n"); -+ /* free the region on munmap */ -+ free_on_close = 1; -+ break; -+ case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE): -+ /* MMU dump */ -+ err = kbase_mmu_dump_mmap(kctx, vma, ®, &kaddr); -+ if (0 != err) -+ goto out_unlock; -+ /* free the region on munmap */ -+ free_on_close = 1; -+ break; -+ case PFN_DOWN(BASE_MEM_COOKIE_BASE) ... -+ PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: { -+ err = kbasep_reg_mmap(kctx, vma, ®, &nr_pages, -+ &aligned_offset); -+ if (0 != err) -+ goto out_unlock; -+ /* free the region on munmap */ -+ free_on_close = 1; -+ break; -+ } -+ default: { -+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, -+ (u64)vma->vm_pgoff << PAGE_SHIFT); ++ /* Calculate the job's time used */ ++ if (end_timestamp != NULL) { ++ /* Only calculating it for jobs that really run on the HW (e.g. ++ * removed from next jobs never actually ran, so really did take ++ * zero time) */ ++ ktime_t tick_diff = ktime_sub(*end_timestamp, ++ katom->start_timestamp); + -+ if (reg && !(reg->flags & KBASE_REG_FREE)) { -+ /* will this mapping overflow the size of the region? */ -+ if (nr_pages > (reg->nr_pages - -+ (vma->vm_pgoff - reg->start_pfn))) { -+ err = -ENOMEM; -+ goto out_unlock; -+ } ++ microseconds_spent = ktime_to_ns(tick_diff); + -+ if ((vma->vm_flags & VM_READ && -+ !(reg->flags & KBASE_REG_CPU_RD)) || -+ (vma->vm_flags & VM_WRITE && -+ !(reg->flags & KBASE_REG_CPU_WR))) { -+ /* VM flags inconsistent with region flags */ -+ err = -EPERM; -+ dev_err(dev, "%s:%d inconsistent VM flags\n", -+ __FILE__, __LINE__); -+ goto out_unlock; -+ } ++ do_div(microseconds_spent, 1000); + -+#ifdef CONFIG_DMA_SHARED_BUFFER -+ if (KBASE_MEM_TYPE_IMPORTED_UMM == -+ reg->cpu_alloc->type) { -+ err = dma_buf_mmap( -+ reg->cpu_alloc->imported.umm.dma_buf, -+ vma, vma->vm_pgoff - reg->start_pfn); -+ goto out_unlock; -+ } -+#endif /* CONFIG_DMA_SHARED_BUFFER */ ++ /* Round up time spent to the minimum timer resolution */ ++ if (microseconds_spent < KBASEP_JS_TICK_RESOLUTION_US) ++ microseconds_spent = KBASEP_JS_TICK_RESOLUTION_US; ++ } + -+ /* limit what we map to the amount currently backed */ -+ if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) { -+ if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents) -+ nr_pages = 0; -+ else -+ nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn); -+ } -+ } else { -+ err = -ENOMEM; -+ goto out_unlock; -+ } -+ } /* default */ -+ } /* switch */ + -+ err = kbase_cpu_mmap(reg, vma, kaddr, nr_pages, aligned_offset, free_on_close); ++ kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0); + -+ if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) { -+ /* MMU dump - userspace should now have a reference on -+ * the pages, so we can now free the kernel mapping */ -+ vfree(kaddr); -+ } ++ /* Unblock cross dependency if present */ ++ if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE || ++ !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) && ++ (x_dep->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { ++ bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr, ++ false); ++ x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; ++ kbase_js_move_to_tree(x_dep); ++ if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr, ++ false)) ++ kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, ++ x_dep->slot_nr); + -+out_unlock: -+ kbase_gpu_vm_unlock(kctx); -+out: -+ if (err) -+ dev_err(dev, "mmap failed %d\n", err); ++ if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) ++ return x_dep; ++ } + -+ return err; ++ return NULL; +} + -+KBASE_EXPORT_TEST_API(kbase_mmap); -+ -+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, -+ unsigned long prot_request, struct kbase_vmap_struct *map) ++void kbase_js_sched(struct kbase_device *kbdev, int js_mask) +{ -+ struct kbase_va_region *reg; -+ unsigned long page_index; -+ unsigned int offset = gpu_addr & ~PAGE_MASK; -+ size_t page_count = PFN_UP(offset + size); -+ phys_addr_t *page_array; -+ struct page **pages; -+ void *cpu_addr = NULL; -+ pgprot_t prot; -+ size_t i; -+ bool sync_needed; -+ -+ if (!size || !map) -+ return NULL; ++ struct kbasep_js_device_data *js_devdata; ++ struct kbase_context *last_active; ++ bool timer_sync = false; ++ bool ctx_waiting = false; + -+ /* check if page_count calculation will wrap */ -+ if (size > ((size_t)-1 / PAGE_SIZE)) -+ return NULL; ++ js_devdata = &kbdev->js_data; + -+ kbase_gpu_vm_lock(kctx); ++ down(&js_devdata->schedule_sem); ++ mutex_lock(&js_devdata->queue_mutex); + -+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); -+ if (!reg || (reg->flags & KBASE_REG_FREE)) -+ goto out_unlock; ++ last_active = kbdev->hwaccess.active_kctx; + -+ page_index = (gpu_addr >> PAGE_SHIFT) - reg->start_pfn; ++ while (js_mask) { ++ int js; + -+ /* check if page_index + page_count will wrap */ -+ if (-1UL - page_count < page_index) -+ goto out_unlock; ++ js = ffs(js_mask) - 1; + -+ if (page_index + page_count > kbase_reg_current_backed_size(reg)) -+ goto out_unlock; ++ while (1) { ++ struct kbase_context *kctx; ++ unsigned long flags; ++ bool context_idle = false; + -+ if (reg->flags & KBASE_REG_DONT_NEED) -+ goto out_unlock; ++ kctx = kbase_js_ctx_list_pop_head(kbdev, js); + -+ /* check access permissions can be satisfied -+ * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} */ -+ if ((reg->flags & prot_request) != prot_request) -+ goto out_unlock; ++ if (!kctx) { ++ js_mask &= ~(1 << js); ++ break; /* No contexts on pullable list */ ++ } + -+ page_array = kbase_get_cpu_phy_pages(reg); -+ if (!page_array) -+ goto out_unlock; ++ if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) { ++ context_idle = true; + -+ pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); -+ if (!pages) -+ goto out_unlock; ++ if (kbase_pm_context_active_handle_suspend( ++ kbdev, ++ KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { ++ /* Suspend pending - return context to ++ * queue and stop scheduling */ ++ mutex_lock( ++ &kctx->jctx.sched_info.ctx.jsctx_mutex); ++ if (kbase_js_ctx_list_add_pullable_head( ++ kctx->kbdev, kctx, js)) ++ kbase_js_sync_timers(kbdev); ++ mutex_unlock( ++ &kctx->jctx.sched_info.ctx.jsctx_mutex); ++ mutex_unlock(&js_devdata->queue_mutex); ++ up(&js_devdata->schedule_sem); ++ return; ++ } ++ kbase_ctx_flag_set(kctx, KCTX_ACTIVE); ++ } + -+ for (i = 0; i < page_count; i++) -+ pages[i] = pfn_to_page(PFN_DOWN(page_array[page_index + i])); ++ if (!kbase_js_use_ctx(kbdev, kctx)) { ++ mutex_lock( ++ &kctx->jctx.sched_info.ctx.jsctx_mutex); ++ /* Context can not be used at this time */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (kbase_js_ctx_pullable(kctx, js, false) ++ || kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) ++ timer_sync |= ++ kbase_js_ctx_list_add_pullable_head_nolock( ++ kctx->kbdev, kctx, js); ++ else ++ timer_sync |= ++ kbase_js_ctx_list_add_unpullable_nolock( ++ kctx->kbdev, kctx, js); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, ++ flags); ++ mutex_unlock( ++ &kctx->jctx.sched_info.ctx.jsctx_mutex); ++ if (context_idle) { ++ WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); ++ kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); ++ kbase_pm_context_idle(kbdev); ++ } + -+ prot = PAGE_KERNEL; -+ if (!(reg->flags & KBASE_REG_CPU_CACHED)) { -+ /* Map uncached */ -+ prot = pgprot_writecombine(prot); -+ } -+ /* Note: enforcing a RO prot_request onto prot is not done, since: -+ * - CPU-arch-specific integration required -+ * - kbase_vmap() requires no access checks to be made/enforced */ ++ /* No more jobs can be submitted on this slot */ ++ js_mask &= ~(1 << js); ++ break; ++ } ++ mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ cpu_addr = vmap(pages, page_count, VM_MAP, prot); ++ kbase_ctx_flag_clear(kctx, KCTX_PULLED); + -+ kfree(pages); ++ if (!kbase_jm_kick(kbdev, 1 << js)) ++ /* No more jobs can be submitted on this slot */ ++ js_mask &= ~(1 << js); + -+ if (!cpu_addr) -+ goto out_unlock; ++ if (!kbase_ctx_flag(kctx, KCTX_PULLED)) { ++ bool pullable = kbase_js_ctx_pullable(kctx, js, ++ true); + -+ map->gpu_addr = gpu_addr; -+ map->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); -+ map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index]; -+ map->gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); -+ map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index]; -+ map->addr = (void *)((uintptr_t)cpu_addr + offset); -+ map->size = size; -+ map->is_cached = (reg->flags & KBASE_REG_CPU_CACHED) != 0; -+ sync_needed = map->is_cached; ++ /* Failed to pull jobs - push to head of list. ++ * Unless this context is already 'active', in ++ * which case it's effectively already scheduled ++ * so push it to the back of the list. */ ++ if (pullable && kctx == last_active) ++ timer_sync |= ++ kbase_js_ctx_list_add_pullable_nolock( ++ kctx->kbdev, ++ kctx, js); ++ else if (pullable) ++ timer_sync |= ++ kbase_js_ctx_list_add_pullable_head_nolock( ++ kctx->kbdev, ++ kctx, js); ++ else ++ timer_sync |= ++ kbase_js_ctx_list_add_unpullable_nolock( ++ kctx->kbdev, ++ kctx, js); + -+#ifdef CONFIG_MALI_COH_KERN -+ /* kernel can use coherent memory if supported */ -+ if (kctx->kbdev->system_coherency == COHERENCY_ACE) -+ sync_needed = false; -+#endif ++ /* If this context is not the active context, ++ * but the active context is pullable on this ++ * slot, then we need to remove the active ++ * marker to prevent it from submitting atoms in ++ * the IRQ handler, which would prevent this ++ * context from making progress. */ ++ if (last_active && kctx != last_active && ++ kbase_js_ctx_pullable( ++ last_active, js, true)) ++ ctx_waiting = true; + -+ if (sync_needed) { -+ /* Sync first page */ -+ size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); -+ phys_addr_t cpu_pa = map->cpu_pages[0]; -+ phys_addr_t gpu_pa = map->gpu_pages[0]; ++ if (context_idle) { ++ kbase_jm_idle_ctx(kbdev, kctx); ++ spin_unlock_irqrestore( ++ &kbdev->hwaccess_lock, ++ flags); ++ WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); ++ kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); ++ kbase_pm_context_idle(kbdev); ++ } else { ++ spin_unlock_irqrestore( ++ &kbdev->hwaccess_lock, ++ flags); ++ } ++ mutex_unlock( ++ &kctx->jctx.sched_info.ctx.jsctx_mutex); + -+ kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz, -+ KBASE_SYNC_TO_CPU); ++ js_mask &= ~(1 << js); ++ break; /* Could not run atoms on this slot */ ++ } + -+ /* Sync middle pages (if any) */ -+ for (i = 1; page_count > 2 && i < page_count - 1; i++) { -+ cpu_pa = map->cpu_pages[i]; -+ gpu_pa = map->gpu_pages[i]; -+ kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE, -+ KBASE_SYNC_TO_CPU); -+ } ++ /* Push to back of list */ ++ if (kbase_js_ctx_pullable(kctx, js, true)) ++ timer_sync |= ++ kbase_js_ctx_list_add_pullable_nolock( ++ kctx->kbdev, kctx, js); ++ else ++ timer_sync |= ++ kbase_js_ctx_list_add_unpullable_nolock( ++ kctx->kbdev, kctx, js); + -+ /* Sync last page (if any) */ -+ if (page_count > 1) { -+ cpu_pa = map->cpu_pages[page_count - 1]; -+ gpu_pa = map->gpu_pages[page_count - 1]; -+ sz = ((offset + size - 1) & ~PAGE_MASK) + 1; -+ kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz, -+ KBASE_SYNC_TO_CPU); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + } + } -+ kbase_gpu_vm_unlock(kctx); + -+ return map->addr; ++ if (timer_sync) ++ kbase_js_sync_timers(kbdev); + -+out_unlock: -+ kbase_gpu_vm_unlock(kctx); -+ return NULL; -+} ++ if (kbdev->hwaccess.active_kctx == last_active && ctx_waiting) ++ kbdev->hwaccess.active_kctx = NULL; + -+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, -+ struct kbase_vmap_struct *map) -+{ -+ /* 0 is specified for prot_request to indicate no access checks should -+ * be made. -+ * -+ * As mentioned in kbase_vmap_prot() this means that a kernel-side -+ * CPU-RO mapping is not enforced to allow this to work */ -+ return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map); ++ mutex_unlock(&js_devdata->queue_mutex); ++ up(&js_devdata->schedule_sem); +} -+KBASE_EXPORT_TEST_API(kbase_vmap); + -+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) ++void kbase_js_zap_context(struct kbase_context *kctx) +{ -+ void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK); -+ bool sync_needed = map->is_cached; -+ vunmap(addr); -+#ifdef CONFIG_MALI_COH_KERN -+ /* kernel can use coherent memory if supported */ -+ if (kctx->kbdev->system_coherency == COHERENCY_ACE) -+ sync_needed = false; -+#endif -+ if (sync_needed) { -+ off_t offset = (uintptr_t)map->addr & ~PAGE_MASK; -+ size_t size = map->size; -+ size_t page_count = PFN_UP(offset + size); -+ size_t i; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; ++ int js; + -+ /* Sync first page */ -+ size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); -+ phys_addr_t cpu_pa = map->cpu_pages[0]; -+ phys_addr_t gpu_pa = map->gpu_pages[0]; ++ /* ++ * Critical assumption: No more submission is possible outside of the ++ * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs) ++ * whilst the struct kbase_context is terminating. ++ */ + -+ kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz, -+ KBASE_SYNC_TO_DEVICE); ++ /* First, atomically do the following: ++ * - mark the context as dying ++ * - try to evict it from the queue */ ++ mutex_lock(&kctx->jctx.lock); ++ mutex_lock(&js_devdata->queue_mutex); ++ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); ++ kbase_ctx_flag_set(kctx, KCTX_DYING); + -+ /* Sync middle pages (if any) */ -+ for (i = 1; page_count > 2 && i < page_count - 1; i++) { -+ cpu_pa = map->cpu_pages[i]; -+ gpu_pa = map->gpu_pages[i]; -+ kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE, -+ KBASE_SYNC_TO_DEVICE); -+ } ++ dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx); + -+ /* Sync last page (if any) */ -+ if (page_count > 1) { -+ cpu_pa = map->cpu_pages[page_count - 1]; -+ gpu_pa = map->gpu_pages[page_count - 1]; -+ sz = ((offset + size - 1) & ~PAGE_MASK) + 1; -+ kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz, -+ KBASE_SYNC_TO_DEVICE); ++ /* ++ * At this point we know: ++ * - If eviction succeeded, it was in the queue, but now no ++ * longer is ++ * - We must cancel the jobs here. No Power Manager active reference to ++ * release. ++ * - This happens asynchronously - kbase_jd_zap_context() will wait for ++ * those jobs to be killed. ++ * - If eviction failed, then it wasn't in the queue. It is one ++ * of the following: ++ * - a. it didn't have any jobs, and so is not in the Queue or ++ * the Run Pool (not scheduled) ++ * - Hence, no more work required to cancel jobs. No Power Manager ++ * active reference to release. ++ * - b. it was in the middle of a scheduling transaction (and thus must ++ * have at least 1 job). This can happen from a syscall or a ++ * kernel thread. We still hold the jsctx_mutex, and so the thread ++ * must be waiting inside kbasep_js_try_schedule_head_ctx(), ++ * before checking whether the runpool is full. That thread will ++ * continue after we drop the mutex, and will notice the context ++ * is dying. It will rollback the transaction, killing all jobs at ++ * the same time. kbase_jd_zap_context() will wait for those jobs ++ * to be killed. ++ * - Hence, no more work required to cancel jobs, or to release the ++ * Power Manager active reference. ++ * - c. it is scheduled, and may or may not be running jobs ++ * - We must cause it to leave the runpool by stopping it from ++ * submitting any more jobs. When it finally does leave, ++ * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs ++ * (because it is dying), release the Power Manager active reference, ++ * and will not requeue the context in the queue. ++ * kbase_jd_zap_context() will wait for those jobs to be killed. ++ * - Hence, work required just to make it leave the runpool. Cancelling ++ * jobs and releasing the Power manager active reference will be ++ * handled when it leaves the runpool. ++ */ ++ if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { ++ if (!list_empty( ++ &kctx->jctx.sched_info.ctx.ctx_list_entry[js])) ++ list_del_init( ++ &kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + } -+ } -+ map->gpu_addr = 0; -+ map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc); -+ map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc); -+ map->cpu_pages = NULL; -+ map->gpu_pages = NULL; -+ map->addr = NULL; -+ map->size = 0; -+ map->is_cached = false; -+} -+KBASE_EXPORT_TEST_API(kbase_vunmap); -+ -+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) -+{ -+ struct mm_struct *mm; + -+ rcu_read_lock(); -+ mm = rcu_dereference(kctx->process_mm); -+ if (mm) { -+ atomic_add(pages, &kctx->nonmapped_pages); -+#ifdef SPLIT_RSS_COUNTING -+ add_mm_counter(mm, MM_FILEPAGES, pages); -+#else -+ spin_lock(&mm->page_table_lock); -+ add_mm_counter(mm, MM_FILEPAGES, pages); -+ spin_unlock(&mm->page_table_lock); -+#endif -+ } -+ rcu_read_unlock(); -+} ++ /* The following events require us to kill off remaining jobs ++ * and update PM book-keeping: ++ * - we evicted it correctly (it must have jobs to be in the ++ * Queue) ++ * ++ * These events need no action, but take this path anyway: ++ * - Case a: it didn't have any jobs, and was never in the Queue ++ * - Case b: scheduling transaction will be partially rolled- ++ * back (this already cancels the jobs) ++ */ + -+static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx) -+{ -+ int pages; -+ struct mm_struct *mm; ++ KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, ++ kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+ spin_lock(&kctx->mm_update_lock); -+ mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock)); -+ if (!mm) { -+ spin_unlock(&kctx->mm_update_lock); -+ return; -+ } ++ dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx); + -+ rcu_assign_pointer(kctx->process_mm, NULL); -+ spin_unlock(&kctx->mm_update_lock); -+ synchronize_rcu(); ++ /* Only cancel jobs when we evicted from the ++ * queue. No Power Manager active reference was held. ++ * ++ * Having is_dying set ensures that this kills, and ++ * doesn't requeue */ ++ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false); + -+ pages = atomic_xchg(&kctx->nonmapped_pages, 0); -+#ifdef SPLIT_RSS_COUNTING -+ add_mm_counter(mm, MM_FILEPAGES, -pages); -+#else -+ spin_lock(&mm->page_table_lock); -+ add_mm_counter(mm, MM_FILEPAGES, -pages); -+ spin_unlock(&mm->page_table_lock); -+#endif -+} ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_unlock(&js_devdata->queue_mutex); ++ mutex_unlock(&kctx->jctx.lock); ++ } else { ++ unsigned long flags; ++ bool was_retained; + -+static void kbase_special_vm_close(struct vm_area_struct *vma) -+{ -+ struct kbase_context *kctx; ++ /* Case c: didn't evict, but it is scheduled - it's in the Run ++ * Pool */ ++ KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, ++ kbase_ctx_flag(kctx, KCTX_SCHEDULED)); ++ dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx); + -+ kctx = vma->vm_private_data; -+ kbasep_os_process_page_usage_drain(kctx); -+} ++ /* Disable the ctx from submitting any more jobs */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+static const struct vm_operations_struct kbase_vm_special_ops = { -+ .close = kbase_special_vm_close, -+}; ++ kbasep_js_clear_submit_allowed(js_devdata, kctx); + -+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma) -+{ -+ /* check that this is the only tracking page */ -+ spin_lock(&kctx->mm_update_lock); -+ if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) { -+ spin_unlock(&kctx->mm_update_lock); -+ return -EFAULT; -+ } ++ /* Retain and (later) release the context whilst it is is now ++ * disallowed from submitting jobs - ensures that someone ++ * somewhere will be removing the context later on */ ++ was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); + -+ rcu_assign_pointer(kctx->process_mm, current->mm); ++ /* Since it's scheduled and we have the jsctx_mutex, it must be ++ * retained successfully */ ++ KBASE_DEBUG_ASSERT(was_retained); + -+ spin_unlock(&kctx->mm_update_lock); ++ dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx); + -+ /* no real access */ -+ vm_flags_clear(vma, (VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC)); -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(6, 6, 0)) -+ vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO); -+#else -+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO; -+#endif -+ vma->vm_ops = &kbase_vm_special_ops; -+ vma->vm_private_data = kctx; ++ /* Cancel any remaining running jobs for this kctx - if any. ++ * Submit is disallowed which takes effect immediately, so no ++ * more new jobs will appear after we do this. */ ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) ++ kbase_job_slot_hardstop(kctx, js, NULL); + -+ return 0; -+} -+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle) -+{ -+ int i; -+ int res; -+ void *va; -+ dma_addr_t dma_pa; -+ struct kbase_va_region *reg; -+ phys_addr_t *page_array; -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) -+ unsigned long attrs = DMA_ATTR_WRITE_COMBINE; -+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) -+ DEFINE_DMA_ATTRS(attrs); -+#endif ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++ mutex_unlock(&js_devdata->queue_mutex); ++ mutex_unlock(&kctx->jctx.lock); + -+ u32 pages = ((size - 1) >> PAGE_SHIFT) + 1; -+ u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR | -+ BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR; ++ dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)", ++ kctx); + -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(0 != size); -+ KBASE_DEBUG_ASSERT(0 != pages); ++ kbasep_js_runpool_release_ctx(kbdev, kctx); ++ } + -+ if (size == 0) -+ goto err; ++ KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u); + -+ /* All the alloc calls return zeroed memory */ -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) -+ va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, -+ attrs); -+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) -+ dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); -+ va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, -+ &attrs); -+#else -+ va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL); -+#endif -+ if (!va) -+ goto err; ++ /* After this, you must wait on both the ++ * kbase_jd_context::zero_jobs_wait and the ++ * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs ++ * to be destroyed, and the context to be de-scheduled (if it was on the ++ * runpool). ++ * ++ * kbase_jd_zap_context() will do this. */ ++} + -+ /* Store the state so we can free it later. */ -+ handle->cpu_va = va; -+ handle->dma_pa = dma_pa; -+ handle->size = size; -+ -+ -+ reg = kbase_alloc_free_region(kctx, 0, pages, KBASE_REG_ZONE_SAME_VA); -+ if (!reg) -+ goto no_reg; -+ -+ reg->flags &= ~KBASE_REG_FREE; -+ if (kbase_update_region_flags(kctx, reg, flags) != 0) -+ goto invalid_flags; -+ -+ reg->cpu_alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW); -+ if (IS_ERR_OR_NULL(reg->cpu_alloc)) -+ goto no_alloc; -+ -+ reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); -+ -+ page_array = kbase_get_cpu_phy_pages(reg); -+ -+ for (i = 0; i < pages; i++) -+ page_array[i] = dma_pa + (i << PAGE_SHIFT); -+ -+ reg->cpu_alloc->nents = pages; -+ -+ kbase_gpu_vm_lock(kctx); -+ res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1); -+ kbase_gpu_vm_unlock(kctx); -+ if (res) -+ goto no_mmap; -+ -+ return va; -+ -+no_mmap: -+ kbase_mem_phy_alloc_put(reg->cpu_alloc); -+ kbase_mem_phy_alloc_put(reg->gpu_alloc); -+no_alloc: -+invalid_flags: -+ kfree(reg); -+no_reg: -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) -+ dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, attrs); -+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) -+ dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs); -+#else -+ dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa); -+#endif -+err: -+ return NULL; ++static inline int trace_get_refcnt(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ ++ return atomic_read(&kctx->refcount); +} -+KBASE_EXPORT_SYMBOL(kbase_va_alloc); + -+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle) ++/** ++ * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context ++ * @kctx: Pointer to context. ++ * @callback: Pointer to function to call for each job. ++ * ++ * Call a function on all jobs belonging to a non-queued, non-running ++ * context, and detach the jobs from the context as it goes. ++ * ++ * Due to the locks that might be held at the time of the call, the callback ++ * may need to defer work on a workqueue to complete its actions (e.g. when ++ * cancelling jobs) ++ * ++ * Atoms will be removed from the queue, so this must only be called when ++ * cancelling jobs (which occurs as part of context destruction). ++ * ++ * The locking conditions on the caller are as follows: ++ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. ++ */ ++static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, ++ kbasep_js_ctx_job_cb callback) +{ -+ struct kbase_va_region *reg; -+ int err; -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \ -+ (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) -+ DEFINE_DMA_ATTRS(attrs); -+#endif ++ struct kbase_device *kbdev; ++ struct kbasep_js_device_data *js_devdata; ++ unsigned long flags; ++ u32 js; + -+ KBASE_DEBUG_ASSERT(kctx != NULL); -+ KBASE_DEBUG_ASSERT(handle->cpu_va != NULL); ++ kbdev = kctx->kbdev; ++ js_devdata = &kbdev->js_data; + -+ kbase_gpu_vm_lock(kctx); -+ reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va); -+ KBASE_DEBUG_ASSERT(reg); -+ err = kbase_gpu_munmap(kctx, reg); -+ kbase_gpu_vm_unlock(kctx); -+ KBASE_DEBUG_ASSERT(!err); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ kbase_mem_phy_alloc_put(reg->cpu_alloc); -+ kbase_mem_phy_alloc_put(reg->gpu_alloc); -+ kfree(reg); ++ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL, ++ 0u, trace_get_refcnt(kbdev, kctx)); + -+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) -+ dma_free_attrs(kctx->kbdev->dev, handle->size, -+ handle->cpu_va, handle->dma_pa, DMA_ATTR_WRITE_COMBINE); -+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) -+ dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); -+ dma_free_attrs(kctx->kbdev->dev, handle->size, -+ handle->cpu_va, handle->dma_pa, &attrs); -+#else -+ dma_free_writecombine(kctx->kbdev->dev, handle->size, -+ handle->cpu_va, handle->dma_pa); -+#endif -+} -+KBASE_EXPORT_SYMBOL(kbase_va_free); ++ /* Invoke callback on jobs on each slot in turn */ ++ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) ++ jsctx_queue_foreach(kctx, js, callback); + -diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} +diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.h b/drivers/gpu/arm/midgard/mali_kbase_js.h new file mode 100644 -index 000000000..33b3554f9 +index 000000000..ddada8e46 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h -@@ -0,0 +1,231 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_js.h +@@ -0,0 +1,925 @@ +/* + * -+ * (C) COPYRIGHT 2010, 2012-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -384915,943 +385894,1227 @@ index 000000000..33b3554f9 + + +/** -+ * @file mali_kbase_mem_linux.h -+ * Base kernel memory APIs, Linux implementation. ++ * @file mali_kbase_js.h ++ * Job Scheduler APIs. + */ + -+#ifndef _KBASE_MEM_LINUX_H_ -+#define _KBASE_MEM_LINUX_H_ ++#ifndef _KBASE_JS_H_ ++#define _KBASE_JS_H_ + -+/** A HWC dump mapping */ -+struct kbase_hwc_dma_mapping { -+ void *cpu_va; -+ dma_addr_t dma_pa; -+ size_t size; -+}; ++#include "mali_kbase_js_defs.h" ++#include "mali_kbase_context.h" ++#include "mali_kbase_defs.h" ++#include "mali_kbase_debug.h" + -+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, -+ u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, -+ u64 *gpu_va); -+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 *const pages); -+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, -+ void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, -+ u64 *flags); -+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages); -+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask); ++#include "mali_kbase_js_ctx_attr.h" + +/** -+ * kbase_mem_commit - Change the physical backing size of a region -+ * -+ * @kctx: The kernel context -+ * @gpu_addr: Handle to the memory region -+ * @new_pages: Number of physical pages to back the region with -+ * -+ * Return: 0 on success or error code ++ * @addtogroup base_api ++ * @{ + */ -+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages); + -+int kbase_mmap(struct file *file, struct vm_area_struct *vma); ++/** ++ * @addtogroup base_kbase_api ++ * @{ ++ */ + +/** -+ * kbase_mem_evictable_init - Initialize the Ephemeral memory the eviction -+ * mechanism. -+ * @kctx: The kbase context to initialize. ++ * @addtogroup kbase_js Job Scheduler Internal APIs ++ * @{ + * -+ * Return: Zero on success or -errno on failure. ++ * These APIs are Internal to KBase. + */ -+int kbase_mem_evictable_init(struct kbase_context *kctx); + +/** -+ * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction -+ * mechanism. -+ * @kctx: The kbase context to de-initialize. ++ * @brief Initialize the Job Scheduler ++ * ++ * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero ++ * initialized before passing to the kbasep_js_devdata_init() function. This is ++ * to give efficient error path code. + */ -+void kbase_mem_evictable_deinit(struct kbase_context *kctx); ++int kbasep_js_devdata_init(struct kbase_device * const kbdev); + +/** -+ * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation -+ * @kctx: Context the region belongs to -+ * @reg: The GPU region -+ * @new_pages: The number of pages after the grow -+ * @old_pages: The number of pages before the grow ++ * @brief Halt the Job Scheduler. + * -+ * Return: 0 on success, -errno on error. ++ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data ++ * sub-structure was never initialized/failed initialization, to give efficient ++ * error-path code. + * -+ * Expand the GPU mapping to encompass the new psychical pages which have -+ * been added to the allocation. ++ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must ++ * be zero initialized before passing to the kbasep_js_devdata_init() ++ * function. This is to give efficient error path code. ++ * ++ * It is a Programming Error to call this whilst there are still kbase_context ++ * structures registered with this scheduler. + * -+ * Note: Caller must be holding the region lock. + */ -+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, -+ struct kbase_va_region *reg, -+ u64 new_pages, u64 old_pages); ++void kbasep_js_devdata_halt(struct kbase_device *kbdev); + +/** -+ * kbase_mem_evictable_make - Make a physical allocation eligible for eviction -+ * @gpu_alloc: The physical allocation to make evictable ++ * @brief Terminate the Job Scheduler + * -+ * Return: 0 on success, -errno on error. ++ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data ++ * sub-structure was never initialized/failed initialization, to give efficient ++ * error-path code. + * -+ * Take the provided region and make all the physical pages within it -+ * reclaimable by the kernel, updating the per-process VM stats as well. -+ * Remove any CPU mappings (as these can't be removed in the shrinker callback -+ * as mmap_lock might already be taken) but leave the GPU mapping intact as -+ * and until the shrinker reclaims the allocation. ++ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must ++ * be zero initialized before passing to the kbasep_js_devdata_init() ++ * function. This is to give efficient error path code. + * -+ * Note: Must be called with the region lock of the containing context. ++ * It is a Programming Error to call this whilst there are still kbase_context ++ * structures registered with this scheduler. + */ -+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc); ++void kbasep_js_devdata_term(struct kbase_device *kbdev); + +/** -+ * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for -+ * eviction. -+ * @alloc: The physical allocation to remove eviction eligibility from. ++ * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler. + * -+ * Return: True if the allocation had its backing restored and false if -+ * it hasn't. ++ * This effectively registers a struct kbase_context with a Job Scheduler. + * -+ * Make the physical pages in the region no longer reclaimable and update the -+ * per-process stats, if the shrinker has already evicted the memory then -+ * re-allocate it if the region is still alive. ++ * It does not register any jobs owned by the struct kbase_context with the scheduler. ++ * Those must be separately registered by kbasep_js_add_job(). + * -+ * Note: Must be called with the region lock of the containing context. ++ * The struct kbase_context must be zero intitialized before passing to the ++ * kbase_js_init() function. This is to give efficient error path code. + */ -+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc); -+ -+struct kbase_vmap_struct { -+ u64 gpu_addr; -+ struct kbase_mem_phy_alloc *cpu_alloc; -+ struct kbase_mem_phy_alloc *gpu_alloc; -+ phys_addr_t *cpu_pages; -+ phys_addr_t *gpu_pages; -+ void *addr; -+ size_t size; -+ bool is_cached; -+}; ++int kbasep_js_kctx_init(struct kbase_context * const kctx); + ++/** ++ * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler ++ * ++ * This effectively de-registers a struct kbase_context from its Job Scheduler ++ * ++ * It is safe to call this on a struct kbase_context that has never had or failed ++ * initialization of its jctx.sched_info member, to give efficient error-path ++ * code. ++ * ++ * For this to work, the struct kbase_context must be zero intitialized before passing ++ * to the kbase_js_init() function. ++ * ++ * It is a Programming Error to call this whilst there are still jobs ++ * registered with this context. ++ */ ++void kbasep_js_kctx_term(struct kbase_context *kctx); + +/** -+ * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the -+ * requested access permissions are supported -+ * @kctx: Context the VA range belongs to -+ * @gpu_addr: Start address of VA range -+ * @size: Size of VA range -+ * @prot_request: Flags indicating how the caller will then access the memory -+ * @map: Structure to be given to kbase_vunmap() on freeing ++ * @brief Add a job chain to the Job Scheduler, and take necessary actions to ++ * schedule the context/run the job. + * -+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error ++ * This atomically does the following: ++ * - Update the numbers of jobs information ++ * - Add the job to the run pool if necessary (part of init_job) + * -+ * Map a GPU VA Range into the kernel. The VA range must be contained within a -+ * GPU memory region. Appropriate CPU cache-flushing operations are made as -+ * required, dependent on the CPU mapping for the memory region. ++ * Once this is done, then an appropriate action is taken: ++ * - If the ctx is scheduled, it attempts to start the next job (which might be ++ * this added job) ++ * - Otherwise, and if this is the first job on the context, it enqueues it on ++ * the Policy Queue + * -+ * This is safer than using kmap() on the pages directly, -+ * because the pages here are refcounted to prevent freeing (and hence reuse -+ * elsewhere in the system) until an kbase_vunmap() ++ * The Policy's Queue can be updated by this in the following ways: ++ * - In the above case that this is the first job on the context ++ * - If the context is high priority and the context is not scheduled, then it ++ * could cause the Policy to schedule out a low-priority context, allowing ++ * this context to be scheduled in. + * -+ * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check -+ * whether the region should allow the intended access, and return an error if -+ * disallowed. This is essential for security of imported memory, particularly -+ * a user buf from SHM mapped into the process as RO. In that case, write -+ * access must be checked if the intention is for kernel to write to the -+ * memory. ++ * If the context is already scheduled on the RunPool, then adding a job to it ++ * is guarenteed not to update the Policy Queue. And so, the caller is ++ * guarenteed to not need to try scheduling a context from the Run Pool - it ++ * can safely assert that the result is false. + * -+ * The checks are also there to help catch access errors on memory where -+ * security is not a concern: imported memory that is always RW, and memory -+ * that was allocated and owned by the process attached to @kctx. In this case, -+ * it helps to identify memory that was was mapped with the wrong access type. ++ * It is a programming error to have more than U32_MAX jobs in flight at a time. + * -+ * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases -+ * where either the security of memory is solely dependent on those flags, or -+ * when userspace code was expecting only the GPU to access the memory (e.g. HW -+ * workarounds). ++ * The following locking conditions are made on the caller: ++ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. ++ * - it must \em not hold hwaccess_lock (as this will be obtained internally) ++ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be ++ * obtained internally) ++ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally). + * ++ * @return true indicates that the Policy Queue was updated, and so the ++ * caller will need to try scheduling a context onto the Run Pool. ++ * @return false indicates that no updates were made to the Policy Queue, ++ * so no further action is required from the caller. This is \b always returned ++ * when the context is currently scheduled. + */ -+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, -+ unsigned long prot_request, struct kbase_vmap_struct *map); ++bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom); + +/** -+ * kbase_vmap - Map a GPU VA range into the kernel safely -+ * @kctx: Context the VA range belongs to -+ * @gpu_addr: Start address of VA range -+ * @size: Size of VA range -+ * @map: Structure to be given to kbase_vunmap() on freeing ++ * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'. + * -+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error ++ * Completely removing a job requires several calls: ++ * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of ++ * the atom ++ * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler ++ * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the ++ * remaining state held as part of the job having been run. + * -+ * Map a GPU VA Range into the kernel. The VA range must be contained within a -+ * GPU memory region. Appropriate CPU cache-flushing operations are made as -+ * required, dependent on the CPU mapping for the memory region. ++ * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions. + * -+ * This is safer than using kmap() on the pages directly, -+ * because the pages here are refcounted to prevent freeing (and hence reuse -+ * elsewhere in the system) until an kbase_vunmap() ++ * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions. ++ * ++ * It is a programming error to call this when: ++ * - \a atom is not a job belonging to kctx. ++ * - \a atom has already been removed from the Job Scheduler. ++ * - \a atom is still in the runpool ++ * ++ * Do not use this for removing jobs being killed by kbase_jd_cancel() - use ++ * kbasep_js_remove_cancelled_job() instead. ++ * ++ * The following locking conditions are made on the caller: ++ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * -+ * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no -+ * checks to ensure the security of e.g. imported user bufs from RO SHM. + */ -+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, -+ struct kbase_vmap_struct *map); ++void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom); + +/** -+ * kbase_vunmap - Unmap a GPU VA range from the kernel -+ * @kctx: Context the VA range belongs to -+ * @map: Structure describing the mapping from the corresponding kbase_vmap() -+ * call ++ * @brief Completely remove a job chain from the Job Scheduler, in the case ++ * where the job chain was cancelled. + * -+ * Unmaps a GPU VA range from the kernel, given its @map structure obtained -+ * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as -+ * required, dependent on the CPU mapping for the memory region. ++ * This is a variant of kbasep_js_remove_job() that takes care of removing all ++ * of the retained state too. This is generally useful for cancelled atoms, ++ * which need not be handled in an optimal way. + * -+ * The reference taken on pages during kbase_vmap() is released. ++ * It is a programming error to call this when: ++ * - \a atom is not a job belonging to kctx. ++ * - \a atom has already been removed from the Job Scheduler. ++ * - \a atom is still in the runpool: ++ * - it is not being killed with kbasep_jd_cancel() ++ * ++ * The following locking conditions are made on the caller: ++ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. ++ * - it must \em not hold the hwaccess_lock, (as this will be obtained ++ * internally) ++ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be ++ * obtained internally) ++ * ++ * @return true indicates that ctx attributes have changed and the caller ++ * should call kbase_js_sched_all() to try to run more jobs ++ * @return false otherwise + */ -+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map); ++bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, ++ struct kbase_context *kctx, ++ struct kbase_jd_atom *katom); + -+/** @brief Allocate memory from kernel space and map it onto the GPU ++/** ++ * @brief Refcount a context as being busy, preventing it from being scheduled ++ * out. + * -+ * @param kctx The context used for the allocation/mapping -+ * @param size The size of the allocation in bytes -+ * @param handle An opaque structure used to contain the state needed to free the memory -+ * @return the VA for kernel space and GPU MMU ++ * @note This function can safely be called from IRQ context. ++ * ++ * The following locking conditions are made on the caller: ++ * - it must \em not hold mmu_hw_mutex and hwaccess_lock, because they will be ++ * used internally. ++ * ++ * @return value != false if the retain succeeded, and the context will not be scheduled out. ++ * @return false if the retain failed (because the context is being/has been scheduled out). + */ -+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle); ++bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + -+/** @brief Free/unmap memory allocated by kbase_va_alloc ++/** ++ * @brief Refcount a context as being busy, preventing it from being scheduled ++ * out. + * -+ * @param kctx The context used for the allocation/mapping -+ * @param handle An opaque structure returned by the kbase_va_alloc function. ++ * @note This function can safely be called from IRQ context. ++ * ++ * The following locks must be held by the caller: ++ * - mmu_hw_mutex, hwaccess_lock ++ * ++ * @return value != false if the retain succeeded, and the context will not be scheduled out. ++ * @return false if the retain failed (because the context is being/has been scheduled out). + */ -+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle); ++bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx); + -+extern const struct vm_operations_struct kbase_vm_ops; ++/** ++ * @brief Lookup a context in the Run Pool based upon its current address space ++ * and ensure that is stays scheduled in. ++ * ++ * The context is refcounted as being busy to prevent it from scheduling ++ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no ++ * longer required to stay scheduled in. ++ * ++ * @note This function can safely be called from IRQ context. ++ * ++ * The following locking conditions are made on the caller: ++ * - it must \em not hold the hwaccess_lock, because it will be used internally. ++ * If the hwaccess_lock is already held, then the caller should use ++ * kbasep_js_runpool_lookup_ctx_nolock() instead. ++ * ++ * @return a valid struct kbase_context on success, which has been refcounted as being busy. ++ * @return NULL on failure, indicating that no context was found in \a as_nr ++ */ ++struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr); + -+#endif /* _KBASE_MEM_LINUX_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h -new file mode 100644 -index 000000000..9725fd3f0 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h -@@ -0,0 +1,45 @@ -+/* ++/** ++ * @brief Handling the requeuing/killing of a context that was evicted from the ++ * policy queue or runpool. + * -+ * (C) COPYRIGHT 2012-2014, 2017 ARM Limited. All rights reserved. ++ * This should be used whenever handing off a context that has been evicted ++ * from the policy queue or the runpool: ++ * - If the context is not dying and has jobs, it gets re-added to the policy ++ * queue ++ * - Otherwise, it is not added + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ * In addition, if the context is dying the jobs are killed asynchronously. + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * In all cases, the Power Manager active reference is released ++ * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true. \a ++ * has_pm_ref must be set to false whenever the context was not previously in ++ * the runpool and does not hold a Power Manager active refcount. Note that ++ * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an ++ * active refcount even though they weren't in the runpool. + * ++ * The following locking conditions are made on the caller: ++ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. ++ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be ++ * obtained internally) + */ ++void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref); + ++/** ++ * @brief Release a refcount of a context being busy, allowing it to be ++ * scheduled out. ++ * ++ * When the refcount reaches zero and the context \em might be scheduled out ++ * (depending on whether the Scheudling Policy has deemed it so, or if it has run ++ * out of jobs). ++ * ++ * If the context does get scheduled out, then The following actions will be ++ * taken as part of deschduling a context: ++ * - For the context being descheduled: ++ * - If the context is in the processing of dying (all the jobs are being ++ * removed from it), then descheduling also kills off any jobs remaining in the ++ * context. ++ * - If the context is not dying, and any jobs remain after descheduling the ++ * context then it is re-enqueued to the Policy's Queue. ++ * - Otherwise, the context is still known to the scheduler, but remains absent ++ * from the Policy Queue until a job is next added to it. ++ * - In all descheduling cases, the Power Manager active reference (obtained ++ * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()). ++ * ++ * Whilst the context is being descheduled, this also handles actions that ++ * cause more atoms to be run: ++ * - Attempt submitting atoms when the Context Attributes on the Runpool have ++ * changed. This is because the context being scheduled out could mean that ++ * there are more opportunities to run atoms. ++ * - Attempt submitting to a slot that was previously blocked due to affinity ++ * restrictions. This is usually only necessary when releasing a context ++ * happens as part of completing a previous job, but is harmless nonetheless. ++ * - Attempt scheduling in a new context (if one is available), and if necessary, ++ * running a job from that new context. ++ * ++ * Unlike retaining a context in the runpool, this function \b cannot be called ++ * from IRQ context. ++ * ++ * It is a programming error to call this on a \a kctx that is not currently ++ * scheduled, or that already has a zero refcount. ++ * ++ * The following locking conditions are made on the caller: ++ * - it must \em not hold the hwaccess_lock, because it will be used internally. ++ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. ++ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be ++ * obtained internally) ++ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be ++ * obtained internally) ++ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be ++ * obtained internally) ++ * ++ */ ++void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + ++/** ++ * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional ++ * actions from completing an atom. ++ * ++ * This is usually called as part of completing an atom and releasing the ++ * refcount on the context held by the atom. ++ * ++ * Therefore, the extra actions carried out are part of handling actions queued ++ * on a completed atom, namely: ++ * - Releasing the atom's context attributes ++ * - Retrying the submission on a particular slot, because we couldn't submit ++ * on that slot from an IRQ handler. ++ * ++ * The locking conditions of this function are the same as those for ++ * kbasep_js_runpool_release_ctx() ++ */ ++void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); + ++/** ++ * @brief Variant of kbase_js_runpool_release_ctx() that assumes that ++ * kbasep_js_device_data::runpool_mutex and ++ * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not ++ * attempt to schedule new contexts. ++ */ ++void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, ++ struct kbase_context *kctx); + ++/** ++ * @brief Schedule in a privileged context ++ * ++ * This schedules a context in regardless of the context priority. ++ * If the runpool is full, a context will be forced out of the runpool and the function will wait ++ * for the new context to be scheduled in. ++ * The context will be kept scheduled in (and the corresponding address space reserved) until ++ * kbasep_js_release_privileged_ctx is called). ++ * ++ * The following locking conditions are made on the caller: ++ * - it must \em not hold the hwaccess_lock, because it will be used internally. ++ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be ++ * obtained internally) ++ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be ++ * obtained internally) ++ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally). ++ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will ++ * be used internally. ++ * ++ */ ++void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + -+#ifndef _KBASE_MEM_LOWLEVEL_H -+#define _KBASE_MEM_LOWLEVEL_H ++/** ++ * @brief Release a privileged context, allowing it to be scheduled out. ++ * ++ * See kbasep_js_runpool_release_ctx for potential side effects. ++ * ++ * The following locking conditions are made on the caller: ++ * - it must \em not hold the hwaccess_lock, because it will be used internally. ++ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. ++ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be ++ * obtained internally) ++ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be ++ * obtained internally) ++ * ++ */ ++void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + -+#ifndef _KBASE_H_ -+#error "Don't include this file directly, use mali_kbase.h instead" -+#endif ++/** ++ * @brief Try to submit the next job on each slot ++ * ++ * The following locks may be used: ++ * - kbasep_js_device_data::runpool_mutex ++ * - hwaccess_lock ++ */ ++void kbase_js_try_run_jobs(struct kbase_device *kbdev); + -+#include ++/** ++ * @brief Suspend the job scheduler during a Power Management Suspend event. ++ * ++ * Causes all contexts to be removed from the runpool, and prevents any ++ * contexts from (re)entering the runpool. ++ * ++ * This does not handle suspending the one privileged context: the caller must ++ * instead do this by by suspending the GPU HW Counter Instrumentation. ++ * ++ * This will eventually cause all Power Management active references held by ++ * contexts on the runpool to be released, without running any more atoms. ++ * ++ * The caller must then wait for all Power Mangement active refcount to become ++ * zero before completing the suspend. ++ * ++ * The emptying mechanism may take some time to complete, since it can wait for ++ * jobs to complete naturally instead of forcing them to end quickly. However, ++ * this is bounded by the Job Scheduler's Job Timeouts. Hence, this ++ * function is guaranteed to complete in a finite time. ++ */ ++void kbasep_js_suspend(struct kbase_device *kbdev); + +/** -+ * @brief Flags for kbase_phy_allocator_pages_alloc ++ * @brief Resume the Job Scheduler after a Power Management Resume event. ++ * ++ * This restores the actions from kbasep_js_suspend(): ++ * - Schedules contexts back into the runpool ++ * - Resumes running atoms on the GPU + */ -+#define KBASE_PHY_PAGES_FLAG_DEFAULT (0) /** Default allocation flag */ -+#define KBASE_PHY_PAGES_FLAG_CLEAR (1 << 0) /** Clear the pages after allocation */ -+#define KBASE_PHY_PAGES_FLAG_POISON (1 << 1) /** Fill the memory with a poison value */ ++void kbasep_js_resume(struct kbase_device *kbdev); + -+#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON) ++/** ++ * @brief Submit an atom to the job scheduler. ++ * ++ * The atom is enqueued on the context's ringbuffer. The caller must have ++ * ensured that all dependencies can be represented in the ringbuffer. ++ * ++ * Caller must hold jctx->lock ++ * ++ * @param[in] kctx Context pointer ++ * @param[in] atom Pointer to the atom to submit ++ * ++ * @return Whether the context requires to be enqueued. */ ++bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, ++ struct kbase_jd_atom *katom); + -+#define KBASE_PHY_PAGES_POISON_VALUE 0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */ ++/** ++ * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer. ++ * @kctx: Context Pointer ++ * @prio: Priority (specifies the queue together with js). ++ * @js: Job slot (specifies the queue together with prio). ++ * ++ * Pushes all possible atoms from the linked list to the ringbuffer. ++ * Number of atoms are limited to free space in the ringbuffer and ++ * number of available atoms in the linked list. ++ * ++ */ ++void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js); ++/** ++ * @brief Pull an atom from a context in the job scheduler for execution. ++ * ++ * The atom will not be removed from the ringbuffer at this stage. ++ * ++ * The HW access lock must be held when calling this function. ++ * ++ * @param[in] kctx Context to pull from ++ * @param[in] js Job slot to pull from ++ * @return Pointer to an atom, or NULL if there are no atoms for this ++ * slot that can be currently run. ++ */ ++struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js); + -+enum kbase_sync_type { -+ KBASE_SYNC_TO_CPU, -+ KBASE_SYNC_TO_DEVICE -+}; ++/** ++ * @brief Return an atom to the job scheduler ringbuffer. ++ * ++ * An atom is 'unpulled' if execution is stopped but intended to be returned to ++ * later. The most common reason for this is that the atom has been ++ * soft-stopped. ++ * ++ * Note that if multiple atoms are to be 'unpulled', they must be returned in ++ * the reverse order to which they were originally pulled. It is a programming ++ * error to return atoms in any other order. ++ * ++ * The HW access lock must be held when calling this function. ++ * ++ * @param[in] kctx Context pointer ++ * @param[in] atom Pointer to the atom to unpull ++ */ ++void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom); + -+#endif /* _KBASE_LOWLEVEL_H */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c -new file mode 100644 -index 000000000..0cafb64ee ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c -@@ -0,0 +1,573 @@ -+/* ++/** ++ * @brief Complete an atom from jd_done_worker(), removing it from the job ++ * scheduler ringbuffer. + * -+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved. ++ * If the atom failed then all dependee atoms marked for failure propagation ++ * will also fail. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ * @param[in] kctx Context pointer ++ * @param[in] katom Pointer to the atom to complete ++ * @return true if the context is now idle (no jobs pulled) ++ * false otherwise ++ */ ++bool kbase_js_complete_atom_wq(struct kbase_context *kctx, ++ struct kbase_jd_atom *katom); ++ ++/** ++ * @brief Complete an atom. + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * Most of the work required to complete an atom will be performed by ++ * jd_done_worker(). ++ * ++ * The HW access lock must be held when calling this function. ++ * ++ * @param[in] katom Pointer to the atom to complete ++ * @param[in] end_timestamp The time that the atom completed (may be NULL) + * ++ * Return: Atom that has now been unblocked and can now be run, or NULL if none + */ ++struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, ++ ktime_t *end_timestamp); + ++/** ++ * @brief Submit atoms from all available contexts. ++ * ++ * This will attempt to submit as many jobs as possible to the provided job ++ * slots. It will exit when either all job slots are full, or all contexts have ++ * been used. ++ * ++ * @param[in] kbdev Device pointer ++ * @param[in] js_mask Mask of job slots to submit to ++ */ ++void kbase_js_sched(struct kbase_device *kbdev, int js_mask); + ++/** ++ * kbase_jd_zap_context - Attempt to deschedule a context that is being ++ * destroyed ++ * @kctx: Context pointer ++ * ++ * This will attempt to remove a context from any internal job scheduler queues ++ * and perform any other actions to ensure a context will not be submitted ++ * from. ++ * ++ * If the context is currently scheduled, then the caller must wait for all ++ * pending jobs to complete before taking any further action. ++ */ ++void kbase_js_zap_context(struct kbase_context *kctx); + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++/** ++ * @brief Validate an atom ++ * ++ * This will determine whether the atom can be scheduled onto the GPU. Atoms ++ * with invalid combinations of core requirements will be rejected. ++ * ++ * @param[in] kbdev Device pointer ++ * @param[in] katom Atom to validate ++ * @return true if atom is valid ++ * false otherwise ++ */ ++bool kbase_js_is_atom_valid(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom); + -+#define pool_dbg(pool, format, ...) \ -+ dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format, \ -+ (pool->next_pool) ? "kctx" : "kbdev", \ -+ kbase_mem_pool_size(pool), \ -+ kbase_mem_pool_max_size(pool), \ -+ ##__VA_ARGS__) ++/** ++ * kbase_js_set_timeouts - update all JS timeouts with user specified data ++ * @kbdev: Device pointer ++ * ++ * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is ++ * set to a positive number then that becomes the new value used, if a timeout ++ * is negative then the default is set. ++ */ ++void kbase_js_set_timeouts(struct kbase_device *kbdev); + -+#define NOT_DIRTY false -+#define NOT_RECLAIMED false ++/* ++ * Helpers follow ++ */ + -+static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool) ++/** ++ * @brief Check that a context is allowed to submit jobs on this policy ++ * ++ * The purpose of this abstraction is to hide the underlying data size, and wrap up ++ * the long repeated line of code. ++ * ++ * As with any bool, never test the return value with true. ++ * ++ * The caller must hold hwaccess_lock. ++ */ ++static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) +{ -+ spin_lock(&pool->pool_lock); -+} ++ u16 test_bit; + -+static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool) -+{ -+ spin_unlock(&pool->pool_lock); -+} ++ /* Ensure context really is scheduled in */ ++ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); ++ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool) -+{ -+ ssize_t max_size = kbase_mem_pool_max_size(pool); -+ ssize_t cur_size = kbase_mem_pool_size(pool); ++ test_bit = (u16) (1u << kctx->as_nr); + -+ return max(max_size - cur_size, (ssize_t)0); ++ return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit); +} + -+static bool kbase_mem_pool_is_full(struct kbase_mem_pool *pool) ++/** ++ * @brief Allow a context to submit jobs on this policy ++ * ++ * The purpose of this abstraction is to hide the underlying data size, and wrap up ++ * the long repeated line of code. ++ * ++ * The caller must hold hwaccess_lock. ++ */ ++static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) +{ -+ return kbase_mem_pool_size(pool) >= kbase_mem_pool_max_size(pool); -+} ++ u16 set_bit; + -+static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool) -+{ -+ return kbase_mem_pool_size(pool) == 0; -+} ++ /* Ensure context really is scheduled in */ ++ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); ++ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool, -+ struct page *p) -+{ -+ lockdep_assert_held(&pool->pool_lock); ++ set_bit = (u16) (1u << kctx->as_nr); + -+ list_add(&p->lru, &pool->page_list); -+ pool->cur_size++; ++ dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", kctx, kctx->as_nr); + -+ pool_dbg(pool, "added page\n"); ++ js_devdata->runpool_irq.submit_allowed |= set_bit; +} + -+static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p) ++/** ++ * @brief Prevent a context from submitting more jobs on this policy ++ * ++ * The purpose of this abstraction is to hide the underlying data size, and wrap up ++ * the long repeated line of code. ++ * ++ * The caller must hold hwaccess_lock. ++ */ ++static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) +{ -+ kbase_mem_pool_lock(pool); -+ kbase_mem_pool_add_locked(pool, p); -+ kbase_mem_pool_unlock(pool); -+} ++ u16 clear_bit; ++ u16 clear_mask; + -+static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool, -+ struct list_head *page_list, size_t nr_pages) -+{ -+ lockdep_assert_held(&pool->pool_lock); ++ /* Ensure context really is scheduled in */ ++ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); ++ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+ list_splice(page_list, &pool->page_list); -+ pool->cur_size += nr_pages; ++ clear_bit = (u16) (1u << kctx->as_nr); ++ clear_mask = ~clear_bit; + -+ pool_dbg(pool, "added %zu pages\n", nr_pages); -+} ++ dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", kctx, kctx->as_nr); + -+static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool, -+ struct list_head *page_list, size_t nr_pages) -+{ -+ kbase_mem_pool_lock(pool); -+ kbase_mem_pool_add_list_locked(pool, page_list, nr_pages); -+ kbase_mem_pool_unlock(pool); ++ js_devdata->runpool_irq.submit_allowed &= clear_mask; +} + -+static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool) ++/** ++ * @brief Manage the 'retry_submit_on_slot' part of a kbase_jd_atom ++ */ ++static inline void kbasep_js_clear_job_retry_submit(struct kbase_jd_atom *atom) +{ -+ struct page *p; -+ -+ lockdep_assert_held(&pool->pool_lock); -+ -+ if (kbase_mem_pool_is_empty(pool)) -+ return NULL; -+ -+ p = list_first_entry(&pool->page_list, struct page, lru); -+ list_del_init(&p->lru); -+ pool->cur_size--; -+ -+ pool_dbg(pool, "removed page\n"); -+ -+ return p; ++ atom->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID; +} + -+static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool) ++/** ++ * Mark a slot as requiring resubmission by carrying that information on a ++ * completing atom. ++ * ++ * @note This can ASSERT in debug builds if the submit slot has been set to ++ * something other than the current value for @a js. This is because you might ++ * be unintentionally stopping more jobs being submitted on the old submit ++ * slot, and that might cause a scheduling-hang. ++ * ++ * @note If you can guarantee that the atoms for the original slot will be ++ * submitted on some other slot, then call kbasep_js_clear_job_retry_submit() ++ * first to silence the ASSERT. ++ */ ++static inline void kbasep_js_set_job_retry_submit_slot(struct kbase_jd_atom *atom, int js) +{ -+ struct page *p; -+ -+ kbase_mem_pool_lock(pool); -+ p = kbase_mem_pool_remove_locked(pool); -+ kbase_mem_pool_unlock(pool); ++ KBASE_DEBUG_ASSERT(0 <= js && js <= BASE_JM_MAX_NR_SLOTS); ++ KBASE_DEBUG_ASSERT((atom->retry_submit_on_slot == ++ KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID) ++ || (atom->retry_submit_on_slot == js)); + -+ return p; ++ atom->retry_submit_on_slot = js; +} + -+static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool, -+ struct page *p) ++/** ++ * Create an initial 'invalid' atom retained state, that requires no ++ * atom-related work to be done on releasing with ++ * kbasep_js_runpool_release_ctx_and_katom_retained_state() ++ */ ++static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state) +{ -+ struct device *dev = pool->kbdev->dev; -+ -+ dma_sync_single_for_device(dev, kbase_dma_addr(p), -+ PAGE_SIZE, DMA_BIDIRECTIONAL); ++ retained_state->event_code = BASE_JD_EVENT_NOT_STARTED; ++ retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID; ++ retained_state->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID; +} + -+static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool, -+ struct page *p) ++/** ++ * Copy atom state that can be made available after jd_done_nolock() is called ++ * on that atom. ++ */ ++static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom) +{ -+ clear_highpage(p); -+ kbase_mem_pool_sync_page(pool, p); ++ retained_state->event_code = katom->event_code; ++ retained_state->core_req = katom->core_req; ++ retained_state->retry_submit_on_slot = katom->retry_submit_on_slot; ++ retained_state->sched_priority = katom->sched_priority; ++ retained_state->device_nr = katom->device_nr; +} + -+static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool, -+ struct page *p) ++/** ++ * @brief Determine whether an atom has finished (given its retained state), ++ * and so should be given back to userspace/removed from the system. ++ * ++ * Reasons for an atom not finishing include: ++ * - Being soft-stopped (and so, the atom should be resubmitted sometime later) ++ * ++ * @param[in] katom_retained_state the retained state of the atom to check ++ * @return false if the atom has not finished ++ * @return !=false if the atom has finished ++ */ ++static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state) +{ -+ /* Zero page before spilling */ -+ kbase_mem_pool_zero_page(next_pool, p); -+ -+ kbase_mem_pool_add(next_pool, p); ++ return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT); +} + -+struct page *kbase_mem_alloc_page(struct kbase_device *kbdev) ++/** ++ * @brief Determine whether a struct kbasep_js_atom_retained_state is valid ++ * ++ * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the ++ * code should just ignore it. ++ * ++ * @param[in] katom_retained_state the atom's retained state to check ++ * @return false if the retained state is invalid, and can be ignored ++ * @return !=false if the retained state is valid ++ */ ++static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state) +{ -+ struct page *p; -+ gfp_t gfp; -+ struct device *dev = kbdev->dev; -+ dma_addr_t dma_addr; -+ -+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \ -+ LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) -+ /* DMA cache sync fails for HIGHMEM before 3.5 on ARM */ -+ gfp = GFP_USER | __GFP_ZERO; -+#else -+ gfp = GFP_HIGHUSER | __GFP_ZERO; -+#endif -+ -+ if (current->flags & PF_KTHREAD) { -+ /* Don't trigger OOM killer from kernel threads, e.g. when -+ * growing memory on GPU page fault */ -+ gfp |= __GFP_NORETRY; -+ } -+ -+ p = alloc_page(gfp); -+ if (!p) -+ return NULL; -+ -+ dma_addr = dma_map_page(dev, p, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); -+ if (dma_mapping_error(dev, dma_addr)) { -+ __free_page(p); -+ return NULL; -+ } -+ -+ WARN_ON(dma_addr != page_to_phys(p)); -+ -+ kbase_set_dma_addr(p, dma_addr); -+ -+ return p; ++ return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID); +} + -+static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, -+ struct page *p) ++static inline bool kbasep_js_get_atom_retry_submit_slot(const struct kbasep_js_atom_retained_state *katom_retained_state, int *res) +{ -+ struct device *dev = pool->kbdev->dev; -+ dma_addr_t dma_addr = kbase_dma_addr(p); -+ -+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); -+ kbase_clear_dma_addr(p); -+ __free_page(p); ++ int js = katom_retained_state->retry_submit_on_slot; + -+ pool_dbg(pool, "freed page to kernel\n"); ++ *res = js; ++ return (bool) (js >= 0); +} + -+static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool, -+ size_t nr_to_shrink) ++/** ++ * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the ++ * context is guaranteed to be already previously retained. ++ * ++ * It is a programming error to supply the \a as_nr of a context that has not ++ * been previously retained/has a busy refcount of zero. The only exception is ++ * when there is no ctx in \a as_nr (NULL returned). ++ * ++ * The following locking conditions are made on the caller: ++ * - it must \em not hold the hwaccess_lock, because it will be used internally. ++ * ++ * @return a valid struct kbase_context on success, with a refcount that is guaranteed ++ * to be non-zero and unmodified by this function. ++ * @return NULL on failure, indicating that no context was found in \a as_nr ++ */ ++static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr) +{ -+ struct page *p; -+ size_t i; ++ struct kbase_context *found_kctx; + -+ lockdep_assert_held(&pool->pool_lock); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS); + -+ for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) { -+ p = kbase_mem_pool_remove_locked(pool); -+ kbase_mem_pool_free_page(pool, p); -+ } ++ found_kctx = kbdev->as_to_kctx[as_nr]; ++ KBASE_DEBUG_ASSERT(found_kctx == NULL || ++ atomic_read(&found_kctx->refcount) > 0); + -+ return i; ++ return found_kctx; +} + -+static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool, -+ size_t nr_to_shrink) ++/* ++ * The following locking conditions are made on the caller: ++ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. ++ * - The caller must hold the kbasep_js_device_data::runpool_mutex ++ */ ++static inline void kbase_js_runpool_inc_context_count( ++ struct kbase_device *kbdev, ++ struct kbase_context *kctx) +{ -+ size_t nr_freed; ++ struct kbasep_js_device_data *js_devdata; ++ struct kbasep_js_kctx_info *js_kctx_info; + -+ kbase_mem_pool_lock(pool); -+ nr_freed = kbase_mem_pool_shrink_locked(pool, nr_to_shrink); -+ kbase_mem_pool_unlock(pool); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); + -+ return nr_freed; -+} ++ js_devdata = &kbdev->js_data; ++ js_kctx_info = &kctx->jctx.sched_info; + -+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, -+ size_t nr_to_grow) -+{ -+ struct page *p; -+ size_t i; ++ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); ++ lockdep_assert_held(&js_devdata->runpool_mutex); + -+ for (i = 0; i < nr_to_grow; i++) { -+ p = kbase_mem_alloc_page(pool->kbdev); -+ if (!p) -+ return -ENOMEM; -+ kbase_mem_pool_add(pool, p); -+ } ++ /* Track total contexts */ ++ KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX); ++ ++(js_devdata->nr_all_contexts_running); + -+ return 0; ++ if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { ++ /* Track contexts that can submit jobs */ ++ KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running < ++ S8_MAX); ++ ++(js_devdata->nr_user_contexts_running); ++ } +} + -+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size) ++/* ++ * The following locking conditions are made on the caller: ++ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. ++ * - The caller must hold the kbasep_js_device_data::runpool_mutex ++ */ ++static inline void kbase_js_runpool_dec_context_count( ++ struct kbase_device *kbdev, ++ struct kbase_context *kctx) +{ -+ size_t cur_size; -+ -+ cur_size = kbase_mem_pool_size(pool); -+ -+ if (new_size > pool->max_size) -+ new_size = pool->max_size; ++ struct kbasep_js_device_data *js_devdata; ++ struct kbasep_js_kctx_info *js_kctx_info; + -+ if (new_size < cur_size) -+ kbase_mem_pool_shrink(pool, cur_size - new_size); -+ else if (new_size > cur_size) -+ kbase_mem_pool_grow(pool, new_size - cur_size); -+} ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); + -+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size) -+{ -+ size_t cur_size; -+ size_t nr_to_shrink; ++ js_devdata = &kbdev->js_data; ++ js_kctx_info = &kctx->jctx.sched_info; + -+ kbase_mem_pool_lock(pool); ++ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); ++ lockdep_assert_held(&js_devdata->runpool_mutex); + -+ pool->max_size = max_size; ++ /* Track total contexts */ ++ --(js_devdata->nr_all_contexts_running); ++ KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0); + -+ cur_size = kbase_mem_pool_size(pool); -+ if (max_size < cur_size) { -+ nr_to_shrink = cur_size - max_size; -+ kbase_mem_pool_shrink_locked(pool, nr_to_shrink); ++ if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { ++ /* Track contexts that can submit jobs */ ++ --(js_devdata->nr_user_contexts_running); ++ KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0); + } -+ -+ kbase_mem_pool_unlock(pool); +} + + -+static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s, -+ struct shrink_control *sc) ++/** ++ * @brief Submit atoms from all available contexts to all job slots. ++ * ++ * This will attempt to submit as many jobs as possible. It will exit when ++ * either all job slots are full, or all contexts have been used. ++ * ++ * @param[in] kbdev Device pointer ++ */ ++static inline void kbase_js_sched_all(struct kbase_device *kbdev) +{ -+ struct kbase_mem_pool *pool; -+ -+ pool = container_of(s, struct kbase_mem_pool, reclaim); -+ pool_dbg(pool, "reclaim count: %zu\n", kbase_mem_pool_size(pool)); -+ return kbase_mem_pool_size(pool); ++ kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); +} + -+static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s, -+ struct shrink_control *sc) -+{ -+ struct kbase_mem_pool *pool; -+ unsigned long freed; -+ -+ pool = container_of(s, struct kbase_mem_pool, reclaim); -+ -+ pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan); -+ -+ freed = kbase_mem_pool_shrink(pool, sc->nr_to_scan); -+ -+ pool_dbg(pool, "reclaim freed %ld pages\n", freed); ++extern const int ++kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS]; + -+ return freed; -+} ++extern const base_jd_prio ++kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) -+static int kbase_mem_pool_reclaim_shrink(struct shrinker *s, -+ struct shrink_control *sc) ++/** ++ * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio) ++ * to relative ordering ++ * @atom_prio: Priority ID to translate. ++ * ++ * Atom priority values for @ref base_jd_prio cannot be compared directly to ++ * find out which are higher or lower. ++ * ++ * This function will convert base_jd_prio values for successively lower ++ * priorities into a monotonically increasing sequence. That is, the lower the ++ * base_jd_prio priority, the higher the value produced by this function. This ++ * is in accordance with how the rest of the kernel treates priority. ++ * ++ * The mapping is 1:1 and the size of the valid input range is the same as the ++ * size of the valid output range, i.e. ++ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS ++ * ++ * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions ++ * ++ * Return: On success: a value in the inclusive range ++ * 0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure: ++ * KBASE_JS_ATOM_SCHED_PRIO_INVALID ++ */ ++static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio) +{ -+ if (sc->nr_to_scan == 0) -+ return kbase_mem_pool_reclaim_count_objects(s, sc); ++ if (atom_prio >= BASE_JD_NR_PRIO_LEVELS) ++ return KBASE_JS_ATOM_SCHED_PRIO_INVALID; + -+ return kbase_mem_pool_reclaim_scan_objects(s, sc); ++ return kbasep_js_atom_priority_to_relative[atom_prio]; +} -+#endif + -+int kbase_mem_pool_init(struct kbase_mem_pool *pool, -+ size_t max_size, -+ struct kbase_device *kbdev, -+ struct kbase_mem_pool *next_pool) ++static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio) +{ -+ pool->cur_size = 0; -+ pool->max_size = max_size; -+ pool->kbdev = kbdev; -+ pool->next_pool = next_pool; -+ -+ spin_lock_init(&pool->pool_lock); -+ INIT_LIST_HEAD(&pool->page_list); ++ unsigned int prio_idx; + -+ /* Register shrinker */ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) -+ pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink; -+#else -+ pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects; -+ pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects; -+#endif -+ pool->reclaim.seeks = DEFAULT_SEEKS; -+ /* Kernel versions prior to 3.1 : -+ * struct shrinker does not define batch */ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) -+ pool->reclaim.batch = 0; -+#endif -+#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE -+ register_shrinker(&pool->reclaim); -+#else -+ register_shrinker(&pool->reclaim, "mali-mem-pool"); -+#endif ++ KBASE_DEBUG_ASSERT(0 <= sched_prio ++ && sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT); + -+ pool_dbg(pool, "initialized\n"); ++ prio_idx = (unsigned int)sched_prio; + -+ return 0; ++ return kbasep_js_relative_priority_to_atom[prio_idx]; +} + -+void kbase_mem_pool_term(struct kbase_mem_pool *pool) -+{ -+ struct kbase_mem_pool *next_pool = pool->next_pool; -+ struct page *p; -+ size_t nr_to_spill = 0; -+ LIST_HEAD(spill_list); -+ int i; -+ -+ pool_dbg(pool, "terminate()\n"); -+ -+ unregister_shrinker(&pool->reclaim); -+ -+ kbase_mem_pool_lock(pool); -+ pool->max_size = 0; -+ -+ if (next_pool && !kbase_mem_pool_is_full(next_pool)) { -+ /* Spill to next pool (may overspill) */ -+ nr_to_spill = kbase_mem_pool_capacity(next_pool); -+ nr_to_spill = min(kbase_mem_pool_size(pool), nr_to_spill); ++ /** @} *//* end group kbase_js */ ++ /** @} *//* end group base_kbase_api */ ++ /** @} *//* end group base_api */ + -+ /* Zero pages first without holding the next_pool lock */ -+ for (i = 0; i < nr_to_spill; i++) { -+ p = kbase_mem_pool_remove_locked(pool); -+ kbase_mem_pool_zero_page(pool, p); -+ list_add(&p->lru, &spill_list); -+ } -+ } ++#endif /* _KBASE_JS_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c +new file mode 100644 +index 000000000..321506ada +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c +@@ -0,0 +1,301 @@ ++/* ++ * ++ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ while (!kbase_mem_pool_is_empty(pool)) { -+ /* Free remaining pages to kernel */ -+ p = kbase_mem_pool_remove_locked(pool); -+ kbase_mem_pool_free_page(pool, p); -+ } + -+ kbase_mem_pool_unlock(pool); + -+ if (next_pool && nr_to_spill) { -+ /* Add new page list to next_pool */ -+ kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill); + -+ pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill); -+ } ++#include ++#include + -+ pool_dbg(pool, "terminated\n"); -+} ++/* ++ * Private functions follow ++ */ + -+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool) ++/** ++ * @brief Check whether a ctx has a certain attribute, and if so, retain that ++ * attribute on the runpool. ++ * ++ * Requires: ++ * - jsctx mutex ++ * - runpool_irq spinlock ++ * - ctx is scheduled on the runpool ++ * ++ * @return true indicates a change in ctx attributes state of the runpool. ++ * In this state, the scheduler might be able to submit more jobs than ++ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() ++ * or similar is called sometime later. ++ * @return false indicates no change in ctx attributes state of the runpool. ++ */ ++static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ -+ struct page *p; -+ -+ do { -+ pool_dbg(pool, "alloc()\n"); -+ p = kbase_mem_pool_remove(pool); -+ -+ if (p) -+ return p; -+ -+ pool = pool->next_pool; -+ } while (pool); ++ struct kbasep_js_device_data *js_devdata; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ bool runpool_state_changed = false; + -+ return NULL; -+} ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); ++ js_devdata = &kbdev->js_data; ++ js_kctx_info = &kctx->jctx.sched_info; + -+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p, -+ bool dirty) -+{ -+ struct kbase_mem_pool *next_pool = pool->next_pool; ++ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ pool_dbg(pool, "free()\n"); ++ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+ if (!kbase_mem_pool_is_full(pool)) { -+ /* Add to our own pool */ -+ if (dirty) -+ kbase_mem_pool_sync_page(pool, p); ++ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { ++ KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX); ++ ++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]); + -+ kbase_mem_pool_add(pool, p); -+ } else if (next_pool && !kbase_mem_pool_is_full(next_pool)) { -+ /* Spill to next pool */ -+ kbase_mem_pool_spill(next_pool, p); -+ } else { -+ /* Free page */ -+ kbase_mem_pool_free_page(pool, p); ++ if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) { ++ /* First refcount indicates a state change */ ++ runpool_state_changed = true; ++ KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute); ++ } + } ++ ++ return runpool_state_changed; +} + -+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages, -+ phys_addr_t *pages) ++/** ++ * @brief Check whether a ctx has a certain attribute, and if so, release that ++ * attribute on the runpool. ++ * ++ * Requires: ++ * - jsctx mutex ++ * - runpool_irq spinlock ++ * - ctx is scheduled on the runpool ++ * ++ * @return true indicates a change in ctx attributes state of the runpool. ++ * In this state, the scheduler might be able to submit more jobs than ++ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() ++ * or similar is called sometime later. ++ * @return false indicates no change in ctx attributes state of the runpool. ++ */ ++static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ -+ struct page *p; -+ size_t nr_from_pool; -+ size_t i; -+ int err = -ENOMEM; -+ -+ pool_dbg(pool, "alloc_pages(%zu):\n", nr_pages); -+ -+ /* Get pages from this pool */ -+ kbase_mem_pool_lock(pool); -+ nr_from_pool = min(nr_pages, kbase_mem_pool_size(pool)); -+ for (i = 0; i < nr_from_pool; i++) { -+ p = kbase_mem_pool_remove_locked(pool); -+ pages[i] = page_to_phys(p); -+ } -+ kbase_mem_pool_unlock(pool); ++ struct kbasep_js_device_data *js_devdata; ++ struct kbasep_js_kctx_info *js_kctx_info; ++ bool runpool_state_changed = false; + -+ if (i != nr_pages && pool->next_pool) { -+ /* Allocate via next pool */ -+ err = kbase_mem_pool_alloc_pages(pool->next_pool, -+ nr_pages - i, pages + i); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); ++ js_devdata = &kbdev->js_data; ++ js_kctx_info = &kctx->jctx.sched_info; + -+ if (err) -+ goto err_rollback; ++ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + -+ i += nr_pages - i; -+ } ++ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { ++ KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0); ++ --(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]); + -+ /* Get any remaining pages from kernel */ -+ for (; i < nr_pages; i++) { -+ p = kbase_mem_alloc_page(pool->kbdev); -+ if (!p) -+ goto err_rollback; -+ pages[i] = page_to_phys(p); ++ if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) { ++ /* Last de-refcount indicates a state change */ ++ runpool_state_changed = true; ++ KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute); ++ } + } + -+ pool_dbg(pool, "alloc_pages(%zu) done\n", nr_pages); -+ -+ return 0; -+ -+err_rollback: -+ kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED); -+ return err; ++ return runpool_state_changed; +} + -+static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool, -+ size_t nr_pages, phys_addr_t *pages, bool zero, bool sync) ++/** ++ * @brief Retain a certain attribute on a ctx, also retaining it on the runpool ++ * if the context is scheduled. ++ * ++ * Requires: ++ * - jsctx mutex ++ * - If the context is scheduled, then runpool_irq spinlock must also be held ++ * ++ * @return true indicates a change in ctx attributes state of the runpool. ++ * This may allow the scheduler to submit more jobs than previously. ++ * @return false indicates no change in ctx attributes state of the runpool. ++ */ ++static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ -+ struct page *p; -+ size_t nr_to_pool = 0; -+ LIST_HEAD(new_page_list); -+ size_t i; -+ -+ if (!nr_pages) -+ return; -+ -+ pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n", -+ nr_pages, zero, sync); ++ struct kbasep_js_kctx_info *js_kctx_info; ++ bool runpool_state_changed = false; + -+ /* Zero/sync pages first without holding the pool lock */ -+ for (i = 0; i < nr_pages; i++) { -+ if (unlikely(!pages[i])) -+ continue; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); ++ js_kctx_info = &kctx->jctx.sched_info; + -+ p = phys_to_page(pages[i]); ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); ++ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX); + -+ if (zero) -+ kbase_mem_pool_zero_page(pool, p); -+ else if (sync) -+ kbase_mem_pool_sync_page(pool, p); ++ ++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); + -+ list_add(&p->lru, &new_page_list); -+ nr_to_pool++; -+ pages[i] = 0; ++ if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { ++ /* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */ ++ KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute); ++ runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute); + } + -+ /* Add new page list to pool */ -+ kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool); -+ -+ pool_dbg(pool, "add_array(%zu) added %zu pages\n", -+ nr_pages, nr_to_pool); ++ return runpool_state_changed; +} + -+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, -+ phys_addr_t *pages, bool dirty, bool reclaimed) ++/* ++ * @brief Release a certain attribute on a ctx, also releasing it from the runpool ++ * if the context is scheduled. ++ * ++ * Requires: ++ * - jsctx mutex ++ * - If the context is scheduled, then runpool_irq spinlock must also be held ++ * ++ * @return true indicates a change in ctx attributes state of the runpool. ++ * This may allow the scheduler to submit more jobs than previously. ++ * @return false indicates no change in ctx attributes state of the runpool. ++ */ ++static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ -+ struct kbase_mem_pool *next_pool = pool->next_pool; -+ struct page *p; -+ size_t nr_to_pool; -+ LIST_HEAD(to_pool_list); -+ size_t i = 0; -+ -+ pool_dbg(pool, "free_pages(%zu):\n", nr_pages); -+ -+ if (!reclaimed) { -+ /* Add to this pool */ -+ nr_to_pool = kbase_mem_pool_capacity(pool); -+ nr_to_pool = min(nr_pages, nr_to_pool); -+ -+ kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty); ++ struct kbasep_js_kctx_info *js_kctx_info; ++ bool runpool_state_changed = false; + -+ i += nr_to_pool; ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); ++ js_kctx_info = &kctx->jctx.sched_info; + -+ if (i != nr_pages && next_pool) { -+ /* Spill to next pool (may overspill) */ -+ nr_to_pool = kbase_mem_pool_capacity(next_pool); -+ nr_to_pool = min(nr_pages - i, nr_to_pool); ++ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); ++ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0); + -+ kbase_mem_pool_add_array(next_pool, nr_to_pool, -+ pages + i, true, dirty); -+ i += nr_to_pool; -+ } ++ if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ /* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */ ++ runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute); ++ KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute); + } + -+ /* Free any remaining pages to kernel */ -+ for (; i < nr_pages; i++) { -+ if (unlikely(!pages[i])) -+ continue; -+ -+ p = phys_to_page(pages[i]); -+ -+ kbase_mem_pool_free_page(pool, p); -+ pages[i] = 0; -+ } ++ /* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */ ++ --(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); + -+ pool_dbg(pool, "free_pages(%zu) done\n", nr_pages); ++ return runpool_state_changed; +} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c -new file mode 100644 -index 000000000..585fba036 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c -@@ -0,0 +1,81 @@ ++ +/* -+ * -+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ * More commonly used public functions + */ + ++void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx) ++{ ++ bool runpool_state_changed = false; + ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); + -+#include -+#include ++ if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { ++ /* This context never submits, so don't track any scheduling attributes */ ++ return; ++ } + -+#include ++ /* Transfer attributes held in the context flags for contexts that have submit enabled */ + -+#ifdef CONFIG_DEBUG_FS ++ /* ... More attributes can be added here ... */ + -+static int kbase_mem_pool_debugfs_size_get(void *data, u64 *val) ++ /* The context should not have been scheduled yet, so ASSERT if this caused ++ * runpool state changes (note that other threads *can't* affect the value ++ * of runpool_state_changed, due to how it's calculated) */ ++ KBASE_DEBUG_ASSERT(runpool_state_changed == false); ++ CSTD_UNUSED(runpool_state_changed); ++} ++ ++void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) +{ -+ struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; ++ bool runpool_state_changed; ++ int i; + -+ *val = kbase_mem_pool_size(pool); ++ /* Retain any existing attributes */ ++ for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { ++ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) { ++ /* The context is being scheduled in, so update the runpool with the new attributes */ ++ runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i); + -+ return 0; ++ /* We don't need to know about state changed, because retaining a ++ * context occurs on scheduling it, and that itself will also try ++ * to run new atoms */ ++ CSTD_UNUSED(runpool_state_changed); ++ } ++ } +} + -+static int kbase_mem_pool_debugfs_size_set(void *data, u64 val) ++bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) +{ -+ struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; ++ bool runpool_state_changed = false; ++ int i; + -+ kbase_mem_pool_trim(pool, val); ++ /* Release any existing attributes */ ++ for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { ++ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) { ++ /* The context is being scheduled out, so update the runpool on the removed attributes */ ++ runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i); ++ } ++ } + -+ return 0; ++ return runpool_state_changed; +} + -+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_size_fops, -+ kbase_mem_pool_debugfs_size_get, -+ kbase_mem_pool_debugfs_size_set, -+ "%llu\n"); -+ -+static int kbase_mem_pool_debugfs_max_size_get(void *data, u64 *val) ++void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ -+ struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; ++ bool runpool_state_changed = false; ++ base_jd_core_req core_req; + -+ *val = kbase_mem_pool_max_size(pool); ++ KBASE_DEBUG_ASSERT(katom); ++ core_req = katom->core_req; + -+ return 0; ++ if (core_req & BASE_JD_REQ_ONLY_COMPUTE) ++ runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); ++ else ++ runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); ++ ++ if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { ++ /* Atom that can run on slot1 or slot2, and can use all cores */ ++ runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); ++ } ++ ++ /* We don't need to know about state changed, because retaining an ++ * atom occurs on adding it, and that itself will also try to run ++ * new atoms */ ++ CSTD_UNUSED(runpool_state_changed); +} + -+static int kbase_mem_pool_debugfs_max_size_set(void *data, u64 val) ++bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state) +{ -+ struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; ++ bool runpool_state_changed = false; ++ base_jd_core_req core_req; + -+ kbase_mem_pool_set_max_size(pool, val); ++ KBASE_DEBUG_ASSERT(katom_retained_state); ++ core_req = katom_retained_state->core_req; + -+ return 0; -+} ++ /* No-op for invalid atoms */ ++ if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false) ++ return false; + -+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_max_size_fops, -+ kbase_mem_pool_debugfs_max_size_get, -+ kbase_mem_pool_debugfs_max_size_set, -+ "%llu\n"); ++ if (core_req & BASE_JD_REQ_ONLY_COMPUTE) ++ runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); ++ else ++ runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); + -+void kbase_mem_pool_debugfs_init(struct dentry *parent, -+ struct kbase_mem_pool *pool) -+{ -+ debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent, -+ pool, &kbase_mem_pool_debugfs_size_fops); ++ if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { ++ /* Atom that can run on slot1 or slot2, and can use all cores */ ++ runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); ++ } + -+ debugfs_create_file("mem_pool_max_size", S_IRUGO | S_IWUSR, parent, -+ pool, &kbase_mem_pool_debugfs_max_size_fops); ++ return runpool_state_changed; +} -+ -+#endif /* CONFIG_DEBUG_FS */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h +diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h new file mode 100644 -index 000000000..1442854e8 +index 000000000..ce9183326 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h -@@ -0,0 +1,36 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h +@@ -0,0 +1,158 @@ +/* + * -+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -385866,161 +387129,156 @@ index 000000000..1442854e8 + + + -+#ifndef _KBASE_MEM_POOL_DEBUGFS_H -+#define _KBASE_MEM_POOL_DEBUGFS_H + -+#include + +/** -+ * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool -+ * @parent: Parent debugfs dentry -+ * @pool: Memory pool to control -+ * -+ * Adds two debugfs files under @parent: -+ * - mem_pool_size: get/set the current size of @pool -+ * - mem_pool_max_size: get/set the max size of @pool ++ * @file mali_kbase_js_ctx_attr.h ++ * Job Scheduler Context Attribute APIs + */ -+void kbase_mem_pool_debugfs_init(struct dentry *parent, -+ struct kbase_mem_pool *pool); + -+#endif /*_KBASE_MEM_POOL_DEBUGFS_H*/ ++#ifndef _KBASE_JS_CTX_ATTR_H_ ++#define _KBASE_JS_CTX_ATTR_H_ + -diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c -new file mode 100644 -index 000000000..d58fd8d62 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c -@@ -0,0 +1,121 @@ -+/* -+ * -+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++/** ++ * @addtogroup base_api ++ * @{ + */ + ++/** ++ * @addtogroup base_kbase_api ++ * @{ ++ */ + ++/** ++ * @addtogroup kbase_js ++ * @{ ++ */ + -+#include ++/** ++ * Set the initial attributes of a context (when context create flags are set) ++ * ++ * Requires: ++ * - Hold the jsctx_mutex ++ */ ++void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx); + -+#ifdef CONFIG_DEBUG_FS ++/** ++ * Retain all attributes of a context ++ * ++ * This occurs on scheduling in the context on the runpool (but after ++ * is_scheduled is set) ++ * ++ * Requires: ++ * - jsctx mutex ++ * - runpool_irq spinlock ++ * - ctx->is_scheduled is true ++ */ ++void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + -+/** Show callback for the @c mem_profile debugfs file. ++/** ++ * Release all attributes of a context + * -+ * This function is called to get the contents of the @c mem_profile debugfs -+ * file. This is a report of current memory usage and distribution in userspace. ++ * This occurs on scheduling out the context from the runpool (but before ++ * is_scheduled is cleared) + * -+ * @param sfile The debugfs entry -+ * @param data Data associated with the entry ++ * Requires: ++ * - jsctx mutex ++ * - runpool_irq spinlock ++ * - ctx->is_scheduled is true + * -+ * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise ++ * @return true indicates a change in ctx attributes state of the runpool. ++ * In this state, the scheduler might be able to submit more jobs than ++ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() ++ * or similar is called sometime later. ++ * @return false indicates no change in ctx attributes state of the runpool. + */ -+static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data) -+{ -+ struct kbase_context *kctx = sfile->private; ++bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + -+ mutex_lock(&kctx->mem_profile_lock); ++/** ++ * Retain all attributes of an atom ++ * ++ * This occurs on adding an atom to a context ++ * ++ * Requires: ++ * - jsctx mutex ++ * - If the context is scheduled, then runpool_irq spinlock must also be held ++ */ ++void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom); + -+ seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size); ++/** ++ * Release all attributes of an atom, given its retained state. ++ * ++ * This occurs after (permanently) removing an atom from a context ++ * ++ * Requires: ++ * - jsctx mutex ++ * - If the context is scheduled, then runpool_irq spinlock must also be held ++ * ++ * This is a no-op when \a katom_retained_state is invalid. ++ * ++ * @return true indicates a change in ctx attributes state of the runpool. ++ * In this state, the scheduler might be able to submit more jobs than ++ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() ++ * or similar is called sometime later. ++ * @return false indicates no change in ctx attributes state of the runpool. ++ */ ++bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); + -+ seq_putc(sfile, '\n'); ++/** ++ * Requires: ++ * - runpool_irq spinlock ++ */ ++static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute) ++{ ++ struct kbasep_js_device_data *js_devdata; + -+ mutex_unlock(&kctx->mem_profile_lock); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); ++ js_devdata = &kbdev->js_data; + -+ return 0; ++ return js_devdata->runpool_irq.ctx_attr_ref_count[attribute]; +} + -+/* -+ * File operations related to debugfs entry for mem_profile ++/** ++ * Requires: ++ * - runpool_irq spinlock + */ -+static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file) -+{ -+ return single_open(file, kbasep_mem_profile_seq_show, in->i_private); -+} -+ -+static const struct file_operations kbasep_mem_profile_debugfs_fops = { -+ .open = kbasep_mem_profile_debugfs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; -+ -+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, -+ size_t size) ++static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute) +{ -+ int err = 0; -+ -+ mutex_lock(&kctx->mem_profile_lock); -+ -+ dev_dbg(kctx->kbdev->dev, "initialised: %d", -+ kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); -+ -+ if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { -+ if (!debugfs_create_file("mem_profile", S_IRUGO, -+ kctx->kctx_dentry, kctx, -+ &kbasep_mem_profile_debugfs_fops)) { -+ err = -EAGAIN; -+ } else { -+ kbase_ctx_flag_set(kctx, -+ KCTX_MEM_PROFILE_INITIALIZED); -+ } -+ } -+ -+ if (kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { -+ kfree(kctx->mem_profile_data); -+ kctx->mem_profile_data = data; -+ kctx->mem_profile_size = size; -+ } else { -+ kfree(data); -+ } -+ -+ dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d", -+ err, kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); -+ -+ mutex_unlock(&kctx->mem_profile_lock); -+ -+ return err; ++ /* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */ ++ return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute); +} + -+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx) ++/** ++ * Requires: ++ * - jsctx mutex ++ */ ++static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ -+ mutex_lock(&kctx->mem_profile_lock); -+ -+ dev_dbg(kctx->kbdev->dev, "initialised: %d", -+ kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); ++ struct kbasep_js_kctx_info *js_kctx_info; + -+ kfree(kctx->mem_profile_data); -+ kctx->mem_profile_data = NULL; -+ kctx->mem_profile_size = 0; ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); ++ js_kctx_info = &kctx->jctx.sched_info; + -+ mutex_unlock(&kctx->mem_profile_lock); ++ /* In general, attributes are 'on' when they have a refcount (which should never be < 0) */ ++ return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]); +} + -+#else /* CONFIG_DEBUG_FS */ ++ /** @} *//* end group kbase_js */ ++ /** @} *//* end group base_kbase_api */ ++ /** @} *//* end group base_api */ + -+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, -+ size_t size) -+{ -+ kfree(data); -+ return 0; -+} -+#endif /* CONFIG_DEBUG_FS */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h ++#endif /* _KBASE_JS_DEFS_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h new file mode 100644 -index 000000000..a1dc2e0b1 +index 000000000..ba8b64415 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h -@@ -0,0 +1,59 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h +@@ -0,0 +1,386 @@ +/* + * -+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -386038,3479 +387296,3093 @@ index 000000000..a1dc2e0b1 + + +/** -+ * @file mali_kbase_mem_profile_debugfs.h -+ * Header file for mem profiles entries in debugfs -+ * ++ * @file mali_kbase_js.h ++ * Job Scheduler Type Definitions + */ + -+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H -+#define _KBASE_MEM_PROFILE_DEBUGFS_H ++#ifndef _KBASE_JS_DEFS_H_ ++#define _KBASE_JS_DEFS_H_ + -+#include -+#include ++/** ++ * @addtogroup base_api ++ * @{ ++ */ + +/** -+ * @brief Remove entry from Mali memory profile debugfs ++ * @addtogroup base_kbase_api ++ * @{ + */ -+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx); + +/** -+ * @brief Insert @p data to the debugfs file so it can be read by userspace -+ * -+ * The function takes ownership of @p data and frees it later when new data -+ * is inserted. -+ * -+ * If the debugfs entry corresponding to the @p kctx doesn't exist, -+ * an attempt will be made to create it. -+ * -+ * @param kctx The context whose debugfs file @p data should be inserted to -+ * @param data A NULL-terminated string to be inserted to the debugfs file, -+ * without the trailing new line character -+ * @param size The length of the @p data string -+ * @return 0 if @p data inserted correctly -+ * -EAGAIN in case of error -+ * @post @ref mem_profile_initialized will be set to @c true -+ * the first time this function succeeds. ++ * @addtogroup kbase_js ++ * @{ + */ -+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, -+ size_t size); ++/* Forward decls */ ++struct kbase_device; ++struct kbase_jd_atom; + -+#endif /*_KBASE_MEM_PROFILE_DEBUGFS_H*/ + -diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h -new file mode 100644 -index 000000000..82f070297 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h -@@ -0,0 +1,33 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++typedef u32 kbase_context_flags; + ++struct kbasep_atom_req { ++ base_jd_core_req core_req; ++ kbase_context_flags ctx_req; ++ u32 device_nr; ++}; + ++/** Callback function run on all of a context's jobs registered with the Job ++ * Scheduler */ ++typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom); + +/** -+ * @file mali_kbase_mem_profile_debugfs_buf_size.h -+ * Header file for the size of the buffer to accumulate the histogram report text in ++ * @brief Maximum number of jobs that can be submitted to a job slot whilst ++ * inside the IRQ handler. ++ * ++ * This is important because GPU NULL jobs can complete whilst the IRQ handler ++ * is running. Otherwise, it potentially allows an unlimited number of GPU NULL ++ * jobs to be submitted inside the IRQ handler, which increases IRQ latency. + */ -+ -+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_ -+#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_ ++#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2 + +/** -+ * The size of the buffer to accumulate the histogram report text in -+ * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT -+ */ -+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t) (64 + ((80 + (56 * 64)) * 15) + 56)) -+ -+#endif /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/ -+ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c -new file mode 100644 -index 000000000..26144850a ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c -@@ -0,0 +1,2088 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * @brief Context attributes + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ * Each context attribute can be thought of as a boolean value that caches some ++ * state information about either the runpool, or the context: ++ * - In the case of the runpool, it is a cache of "Do any contexts owned by ++ * the runpool have attribute X?" ++ * - In the case of a context, it is a cache of "Do any atoms owned by the ++ * context have attribute X?" + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * The boolean value of the context attributes often affect scheduling ++ * decisions, such as affinities to use and job slots to use. + * ++ * To accomodate changes of state in the context, each attribute is refcounted ++ * in the context, and in the runpool for all running contexts. Specifically: ++ * - The runpool holds a refcount of how many contexts in the runpool have this ++ * attribute. ++ * - The context holds a refcount of how many atoms have this attribute. + */ ++enum kbasep_js_ctx_attr { ++ /** Attribute indicating a context that contains Compute jobs. That is, ++ * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE ++ * ++ * @note A context can be both 'Compute' and 'Non Compute' if it contains ++ * both types of jobs. ++ */ ++ KBASEP_JS_CTX_ATTR_COMPUTE, + ++ /** Attribute indicating a context that contains Non-Compute jobs. That is, ++ * the context has some jobs that are \b not of type @ref ++ * BASE_JD_REQ_ONLY_COMPUTE. ++ * ++ * @note A context can be both 'Compute' and 'Non Compute' if it contains ++ * both types of jobs. ++ */ ++ KBASEP_JS_CTX_ATTR_NON_COMPUTE, + ++ /** Attribute indicating that a context contains compute-job atoms that ++ * aren't restricted to a coherent group, and can run on all cores. ++ * ++ * Specifically, this is when the atom's \a core_req satisfy: ++ * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2 ++ * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups ++ * ++ * Such atoms could be blocked from running if one of the coherent groups ++ * is being used by another job slot, so tracking this context attribute ++ * allows us to prevent such situations. ++ * ++ * @note This doesn't take into account the 1-coregroup case, where all ++ * compute atoms would effectively be able to run on 'all cores', but ++ * contexts will still not always get marked with this attribute. Instead, ++ * it is the caller's responsibility to take into account the number of ++ * coregroups when interpreting this attribute. ++ * ++ * @note Whilst Tiler atoms are normally combined with ++ * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without ++ * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy ++ * enough to handle anyway. ++ */ ++ KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, + ++ /** Must be the last in the enum */ ++ KBASEP_JS_CTX_ATTR_COUNT ++}; + ++enum { ++ /** Bit indicating that new atom should be started because this atom completed */ ++ KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0), ++ /** Bit indicating that the atom was evicted from the JS_NEXT registers */ ++ KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1) ++}; + -+/** -+ * @file mali_kbase_mmu.c -+ * Base kernel MMU management. -+ */ -+ -+/* #define DEBUG 1 */ -+#include -+#include -+#include -+#include -+#if defined(CONFIG_MALI_GATOR_SUPPORT) -+#include -+#endif -+#include -+#include -+#include -+ -+#define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define KBASE_MMU_PAGE_ENTRIES 512 ++/** Combination of KBASE_JS_ATOM_DONE_<...> bits */ ++typedef u32 kbasep_js_atom_done_code; + +/** -+ * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches. -+ * @kctx: The KBase context. -+ * @vpfn: The virtual page frame number to start the flush on. -+ * @nr: The number of pages to flush. -+ * @sync: Set if the operation should be synchronous or not. -+ * -+ * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs. ++ * @brief KBase Device Data Job Scheduler sub-structure + * -+ * If sync is not set then transactions still in flight when the flush is issued -+ * may use the old page tables and the data they write will not be written out -+ * to memory, this function returns after the flush has been issued but -+ * before all accesses which might effect the flushed region have completed. ++ * This encapsulates the current context of the Job Scheduler on a particular ++ * device. This context is global to the device, and is not tied to any ++ * particular struct kbase_context running on the device. + * -+ * If sync is set then accesses in the flushed region will be drained -+ * before data is flush and invalidated through L1, L2 and into memory, -+ * after which point this function will return. ++ * nr_contexts_running and as_free are optimized for packing together (by making ++ * them smaller types than u32). The operations on them should rarely involve ++ * masking. The use of signed types for arithmetic indicates to the compiler that ++ * the value will not rollover (which would be undefined behavior), and so under ++ * the Total License model, it is free to make optimizations based on that (i.e. ++ * to remove masking). + */ -+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, -+ u64 vpfn, size_t nr, bool sync); ++struct kbasep_js_device_data { ++ /* Sub-structure to collect together Job Scheduling data used in IRQ ++ * context. The hwaccess_lock must be held when accessing. */ ++ struct runpool_irq { ++ /** Bitvector indicating whether a currently scheduled context is allowed to submit jobs. ++ * When bit 'N' is set in this, it indicates whether the context bound to address space ++ * 'N' is allowed to submit jobs. ++ */ ++ u16 submit_allowed; + -+/** -+ * kbase_mmu_sync_pgd - sync page directory to memory -+ * @kbdev: Device pointer. -+ * @handle: Address of DMA region. -+ * @size: Size of the region to sync. -+ * -+ * This should be called after each page directory update. -+ */ ++ /** Context Attributes: ++ * Each is large enough to hold a refcount of the number of contexts ++ * that can fit into the runpool. This is currently BASE_MAX_NR_AS ++ * ++ * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store ++ * the refcount. Hence, it's not worthwhile reducing this to ++ * bit-manipulation on u32s to save space (where in contrast, 4 bit ++ * sub-fields would be easy to do and would save space). ++ * ++ * Whilst this must not become negative, the sign bit is used for: ++ * - error detection in debug builds ++ * - Optimization: it is undefined for a signed int to overflow, and so ++ * the compiler can optimize for that never happening (thus, no masking ++ * is required on updating the variable) */ ++ s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; + -+static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, -+ dma_addr_t handle, size_t size) -+{ -+ /* If page table is not coherent then ensure the gpu can read -+ * the pages from memory ++ /* ++ * Affinity management and tracking ++ */ ++ /** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates ++ * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */ ++ u64 slot_affinities[BASE_JM_MAX_NR_SLOTS]; ++ /** Refcount for each core owned by each slot. Used to generate the ++ * slot_affinities array of bitvectors ++ * ++ * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS, ++ * because it is refcounted only when a job is definitely about to be ++ * submitted to a slot, and is de-refcounted immediately after a job ++ * finishes */ ++ s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64]; ++ } runpool_irq; ++ ++ /** ++ * Run Pool mutex, for managing contexts within the runpool. ++ * Unless otherwise specified, you must hold this lock whilst accessing any ++ * members that follow ++ * ++ * In addition, this is used to access: ++ * - the kbasep_js_kctx_info::runpool substructure + */ -+ if (kbdev->system_coherency != COHERENCY_ACE) -+ dma_sync_single_for_device(kbdev->dev, handle, size, -+ DMA_TO_DEVICE); -+} ++ struct mutex runpool_mutex; + -+/* -+ * Definitions: -+ * - PGD: Page Directory. -+ * - PTE: Page Table Entry. A 64bit value pointing to the next -+ * level of translation -+ * - ATE: Address Transation Entry. A 64bit value pointing to -+ * a 4kB physical page. -+ */ ++ /** ++ * Queue Lock, used to access the Policy's queue of contexts independently ++ * of the Run Pool. ++ * ++ * Of course, you don't need the Run Pool lock to access this. ++ */ ++ struct mutex queue_mutex; + -+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, -+ struct kbase_as *as, const char *reason_str); ++ /** ++ * Scheduling semaphore. This must be held when calling ++ * kbase_jm_kick() ++ */ ++ struct semaphore schedule_sem; + ++ /** ++ * List of contexts that can currently be pulled from ++ */ ++ struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS]; ++ /** ++ * List of contexts that can not currently be pulled from, but have ++ * jobs currently running. ++ */ ++ struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS]; + -+static size_t make_multiple(size_t minimum, size_t multiple) -+{ -+ size_t remainder = minimum % multiple; ++ /** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */ ++ s8 nr_user_contexts_running; ++ /** Number of currently scheduled contexts (including ones that are not submitting jobs) */ ++ s8 nr_all_contexts_running; + -+ if (remainder == 0) -+ return minimum; ++ /** Core Requirements to match up with base_js_atom's core_req memeber ++ * @note This is a write-once member, and so no locking is required to read */ ++ base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS]; + -+ return minimum + multiple - remainder; -+} ++ u32 scheduling_period_ns; /*< Value for JS_SCHEDULING_PERIOD_NS */ ++ u32 soft_stop_ticks; /*< Value for JS_SOFT_STOP_TICKS */ ++ u32 soft_stop_ticks_cl; /*< Value for JS_SOFT_STOP_TICKS_CL */ ++ u32 hard_stop_ticks_ss; /*< Value for JS_HARD_STOP_TICKS_SS */ ++ u32 hard_stop_ticks_cl; /*< Value for JS_HARD_STOP_TICKS_CL */ ++ u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */ ++ u32 gpu_reset_ticks_ss; /*< Value for JS_RESET_TICKS_SS */ ++ u32 gpu_reset_ticks_cl; /*< Value for JS_RESET_TICKS_CL */ ++ u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */ ++ u32 ctx_timeslice_ns; /**< Value for JS_CTX_TIMESLICE_NS */ + -+void page_fault_worker(struct work_struct *data) -+{ -+ u64 fault_pfn; -+ u32 fault_status; -+ size_t new_pages; -+ size_t fault_rel_pfn; -+ struct kbase_as *faulting_as; -+ int as_no; -+ struct kbase_context *kctx; -+ struct kbase_device *kbdev; -+ struct kbase_va_region *region; -+ int err; -+ bool grown = false; ++ /**< Value for JS_SOFT_JOB_TIMEOUT */ ++ atomic_t soft_job_timeout_ms; + -+ faulting_as = container_of(data, struct kbase_as, work_pagefault); -+ fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT; -+ as_no = faulting_as->number; ++ /** List of suspended soft jobs */ ++ struct list_head suspended_soft_jobs_list; + -+ kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); ++#ifdef CONFIG_MALI_DEBUG ++ /* Support soft-stop on a single context */ ++ bool softstop_always; ++#endif /* CONFIG_MALI_DEBUG */ + -+ /* Grab the context that was already refcounted in kbase_mmu_interrupt(). -+ * Therefore, it cannot be scheduled out of this AS until we explicitly release it -+ */ -+ kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no); -+ if (WARN_ON(!kctx)) { -+ atomic_dec(&kbdev->faults_pending); -+ return; -+ } ++ /** The initalized-flag is placed at the end, to avoid cache-pollution (we should ++ * only be using this during init/term paths). ++ * @note This is a write-once member, and so no locking is required to read */ ++ int init_status; + -+ KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev); ++ /* Number of contexts that can currently be pulled from */ ++ u32 nr_contexts_pullable; + -+ if (unlikely(faulting_as->protected_mode)) -+ { -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Protected mode fault"); -+ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, -+ KBASE_MMU_FAULT_TYPE_PAGE); ++ /* Number of contexts that can either be pulled from or are currently ++ * running */ ++ atomic_t nr_contexts_runnable; ++}; + -+ goto fault_done; -+ } ++/** ++ * @brief KBase Context Job Scheduling information structure ++ * ++ * This is a substructure in the struct kbase_context that encapsulates all the ++ * scheduling information. ++ */ ++struct kbasep_js_kctx_info { + -+ fault_status = faulting_as->fault_status; -+ switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) { ++ /** ++ * Job Scheduler Context information sub-structure. These members are ++ * accessed regardless of whether the context is: ++ * - In the Policy's Run Pool ++ * - In the Policy's Queue ++ * - Not queued nor in the Run Pool. ++ * ++ * You must obtain the jsctx_mutex before accessing any other members of ++ * this substructure. ++ * ++ * You may not access any of these members from IRQ context. ++ */ ++ struct kbase_jsctx { ++ struct mutex jsctx_mutex; /**< Job Scheduler Context lock */ + -+ case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT: -+ /* need to check against the region to handle this one */ -+ break; ++ /** Number of jobs ready to run - does \em not include the jobs waiting in ++ * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr ++ * for such jobs*/ ++ u32 nr_jobs; + -+ case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT: -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Permission failure"); -+ goto fault_done; ++ /** Context Attributes: ++ * Each is large enough to hold a refcount of the number of atoms on ++ * the context. **/ ++ u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; + -+ case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT: -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Translation table bus fault"); -+ goto fault_done; ++ /** ++ * Wait queue to wait for KCTX_SHEDULED flag state changes. ++ * */ ++ wait_queue_head_t is_scheduled_wait; + -+ case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG: -+ /* nothing to do, but we don't expect this fault currently */ -+ dev_warn(kbdev->dev, "Access flag unexpectedly set"); -+ goto fault_done; ++ /** Link implementing JS queues. Context can be present on one ++ * list per job slot ++ */ ++ struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS]; ++ } ctx; + -+ case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT: -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Address size fault"); -+ else -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Unknown fault code"); -+ goto fault_done; ++ /* The initalized-flag is placed at the end, to avoid cache-pollution (we should ++ * only be using this during init/term paths) */ ++ int init_status; ++}; + -+ case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT: -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Memory attributes fault"); -+ else -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Unknown fault code"); -+ goto fault_done; ++/** Subset of atom state that can be available after jd_done_nolock() is called ++ * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(), ++ * because the original atom could disappear. */ ++struct kbasep_js_atom_retained_state { ++ /** Event code - to determine whether the atom has finished */ ++ enum base_jd_event_code event_code; ++ /** core requirements */ ++ base_jd_core_req core_req; ++ /* priority */ ++ int sched_priority; ++ /** Job Slot to retry submitting to if submission from IRQ handler failed */ ++ int retry_submit_on_slot; ++ /* Core group atom was executed on */ ++ u32 device_nr; + -+ default: -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Unknown fault code"); -+ goto fault_done; -+ } ++}; + -+ /* so we have a translation fault, let's see if it is for growable -+ * memory */ -+ kbase_gpu_vm_lock(kctx); ++/** ++ * Value signifying 'no retry on a slot required' for: ++ * - kbase_js_atom_retained_state::retry_submit_on_slot ++ * - kbase_jd_atom::retry_submit_on_slot ++ */ ++#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1) + -+ region = kbase_region_tracker_find_region_enclosing_address(kctx, -+ faulting_as->fault_addr); -+ if (!region || region->flags & KBASE_REG_FREE) { -+ kbase_gpu_vm_unlock(kctx); -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Memory is not mapped on the GPU"); -+ goto fault_done; -+ } ++/** ++ * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state. ++ * ++ * @see kbase_atom_retained_state_is_valid() ++ */ ++#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP + -+ if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { -+ kbase_gpu_vm_unlock(kctx); -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "DMA-BUF is not mapped on the GPU"); -+ goto fault_done; -+ } ++/** ++ * @brief The JS timer resolution, in microseconds ++ * ++ * Any non-zero difference in time will be at least this size. ++ */ ++#define KBASEP_JS_TICK_RESOLUTION_US 1 + -+ if ((region->flags & GROWABLE_FLAGS_REQUIRED) -+ != GROWABLE_FLAGS_REQUIRED) { -+ kbase_gpu_vm_unlock(kctx); -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Memory is not growable"); -+ goto fault_done; -+ } ++/* ++ * Internal atom priority defines for kbase_jd_atom::sched_prio ++ */ ++enum { ++ KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0, ++ KBASE_JS_ATOM_SCHED_PRIO_MED, ++ KBASE_JS_ATOM_SCHED_PRIO_LOW, ++ KBASE_JS_ATOM_SCHED_PRIO_COUNT, ++}; + -+ if ((region->flags & KBASE_REG_DONT_NEED)) { -+ kbase_gpu_vm_unlock(kctx); -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Don't need memory can't be grown"); -+ goto fault_done; -+ } ++/* Invalid priority for kbase_jd_atom::sched_prio */ ++#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1 + -+ /* find the size we need to grow it by */ -+ /* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address -+ * validating the fault_adress to be within a size_t from the start_pfn */ -+ fault_rel_pfn = fault_pfn - region->start_pfn; ++/* Default priority in the case of contexts with no atoms, or being lenient ++ * about invalid priorities from userspace */ ++#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED + -+ if (fault_rel_pfn < kbase_reg_current_backed_size(region)) { -+ dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring", -+ faulting_as->fault_addr, region->start_pfn, -+ region->start_pfn + -+ kbase_reg_current_backed_size(region)); ++ /** @} *//* end group kbase_js */ ++ /** @} *//* end group base_kbase_api */ ++ /** @} *//* end group base_api */ + -+ mutex_lock(&kbdev->mmu_hw_mutex); ++#endif /* _KBASE_JS_DEFS_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_linux.h b/drivers/gpu/arm/midgard/mali_kbase_linux.h +new file mode 100644 +index 000000000..6d1e61fd4 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_linux.h +@@ -0,0 +1,43 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, -+ KBASE_MMU_FAULT_TYPE_PAGE); -+ /* [1] in case another page fault occurred while we were -+ * handling the (duplicate) page fault we need to ensure we -+ * don't loose the other page fault as result of us clearing -+ * the MMU IRQ. Therefore, after we clear the MMU IRQ we send -+ * an UNLOCK command that will retry any stalled memory -+ * transaction (which should cause the other page fault to be -+ * raised again). -+ */ -+ kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0, -+ AS_COMMAND_UNLOCK, 1); + -+ mutex_unlock(&kbdev->mmu_hw_mutex); + -+ kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, -+ KBASE_MMU_FAULT_TYPE_PAGE); -+ kbase_gpu_vm_unlock(kctx); + -+ goto fault_done; -+ } + -+ new_pages = make_multiple(fault_rel_pfn - -+ kbase_reg_current_backed_size(region) + 1, -+ region->extent); ++/** ++ * @file mali_kbase_linux.h ++ * Base kernel APIs, Linux implementation. ++ */ + -+ /* cap to max vsize */ -+ if (new_pages + kbase_reg_current_backed_size(region) > -+ region->nr_pages) -+ new_pages = region->nr_pages - -+ kbase_reg_current_backed_size(region); ++#ifndef _KBASE_LINUX_H_ ++#define _KBASE_LINUX_H_ + -+ if (0 == new_pages) { -+ mutex_lock(&kbdev->mmu_hw_mutex); ++/* All things that are needed for the Linux port. */ ++#include ++#include ++#include ++#include ++#include + -+ /* Duplicate of a fault we've already handled, nothing to do */ -+ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, -+ KBASE_MMU_FAULT_TYPE_PAGE); -+ /* See comment [1] about UNLOCK usage */ -+ kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0, -+ AS_COMMAND_UNLOCK, 1); ++#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API)) ++ #define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func) ++#else ++ #define KBASE_EXPORT_TEST_API(func) ++#endif + -+ mutex_unlock(&kbdev->mmu_hw_mutex); ++#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func) + -+ kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, -+ KBASE_MMU_FAULT_TYPE_PAGE); -+ kbase_gpu_vm_unlock(kctx); -+ goto fault_done; -+ } ++#endif /* _KBASE_LINUX_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c +new file mode 100644 +index 000000000..f372e02ef +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c +@@ -0,0 +1,2657 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ if (kbase_alloc_phy_pages_helper(region->gpu_alloc, new_pages) == 0) { -+ if (region->gpu_alloc != region->cpu_alloc) { -+ if (kbase_alloc_phy_pages_helper( -+ region->cpu_alloc, new_pages) == 0) { -+ grown = true; -+ } else { -+ kbase_free_phy_pages_helper(region->gpu_alloc, -+ new_pages); -+ } -+ } else { -+ grown = true; -+ } -+ } + + -+ if (grown) { -+ u64 pfn_offset; -+ u32 op; + -+ /* alloc success */ -+ KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages); + -+ /* set up the new pages */ -+ pfn_offset = kbase_reg_current_backed_size(region) - new_pages; -+ /* -+ * Note: -+ * Issuing an MMU operation will unlock the MMU and cause the -+ * translation to be replayed. If the page insertion fails then -+ * rather then trying to continue the context should be killed -+ * so the no_flush version of insert_pages is used which allows -+ * us to unlock the MMU as we see fit. -+ */ -+ err = kbase_mmu_insert_pages_no_flush(kctx, -+ region->start_pfn + pfn_offset, -+ &kbase_get_gpu_phy_pages(region)[pfn_offset], -+ new_pages, region->flags); -+ if (err) { -+ kbase_free_phy_pages_helper(region->gpu_alloc, new_pages); -+ if (region->gpu_alloc != region->cpu_alloc) -+ kbase_free_phy_pages_helper(region->cpu_alloc, -+ new_pages); -+ kbase_gpu_vm_unlock(kctx); -+ /* The locked VA region will be unlocked and the cache invalidated in here */ -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Page table update failure"); -+ goto fault_done; -+ } -+#if defined(CONFIG_MALI_GATOR_SUPPORT) -+ kbase_trace_mali_page_fault_insert_pages(as_no, new_pages); -+#endif -+ KBASE_TLSTREAM_AUX_PAGEFAULT(kctx->id, (u64)new_pages); ++/** ++ * @file mali_kbase_mem.c ++ * Base kernel memory APIs ++ */ ++#ifdef CONFIG_DMA_SHARED_BUFFER ++#include ++#endif /* CONFIG_DMA_SHARED_BUFFER */ ++#ifdef CONFIG_UMP ++#include ++#endif /* CONFIG_UMP */ ++#include ++#include ++#include ++#include ++#include + -+ /* AS transaction begin */ -+ mutex_lock(&kbdev->mmu_hw_mutex); ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+ /* flush L2 and unlock the VA (resumes the MMU) */ -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367)) -+ op = AS_COMMAND_FLUSH; -+ else -+ op = AS_COMMAND_FLUSH_PT; ++/* This function finds out which RB tree the given GPU VA region belongs to ++ * based on the region zone */ ++static struct rb_root *kbase_reg_flags_to_rbtree(struct kbase_context *kctx, ++ struct kbase_va_region *reg) ++{ ++ struct rb_root *rbtree = NULL; + -+ /* clear MMU interrupt - this needs to be done after updating -+ * the page tables but before issuing a FLUSH command. The -+ * FLUSH cmd has a side effect that it restarts stalled memory -+ * transactions in other address spaces which may cause -+ * another fault to occur. If we didn't clear the interrupt at -+ * this stage a new IRQ might not be raised when the GPU finds -+ * a MMU IRQ is already pending. -+ */ -+ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, -+ KBASE_MMU_FAULT_TYPE_PAGE); ++ switch (reg->flags & KBASE_REG_ZONE_MASK) { ++ case KBASE_REG_ZONE_CUSTOM_VA: ++ rbtree = &kctx->reg_rbtree_custom; ++ break; ++ case KBASE_REG_ZONE_EXEC: ++ rbtree = &kctx->reg_rbtree_exec; ++ break; ++ case KBASE_REG_ZONE_SAME_VA: ++ rbtree = &kctx->reg_rbtree_same; ++ /* fall through */ ++ default: ++ rbtree = &kctx->reg_rbtree_same; ++ break; ++ } + -+ kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx, -+ faulting_as->fault_addr >> PAGE_SHIFT, -+ new_pages, -+ op, 1); ++ return rbtree; ++} + -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ /* AS transaction end */ ++/* This function finds out which RB tree the given pfn from the GPU VA belongs ++ * to based on the memory zone the pfn refers to */ ++static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx, ++ u64 gpu_pfn) ++{ ++ struct rb_root *rbtree = NULL; + -+ /* reenable this in the mask */ -+ kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, -+ KBASE_MMU_FAULT_TYPE_PAGE); -+ kbase_gpu_vm_unlock(kctx); ++#ifdef CONFIG_64BIT ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { ++#endif /* CONFIG_64BIT */ ++ if (gpu_pfn >= KBASE_REG_ZONE_CUSTOM_VA_BASE) ++ rbtree = &kctx->reg_rbtree_custom; ++ else if (gpu_pfn >= KBASE_REG_ZONE_EXEC_BASE) ++ rbtree = &kctx->reg_rbtree_exec; ++ else ++ rbtree = &kctx->reg_rbtree_same; ++#ifdef CONFIG_64BIT + } else { -+ /* failed to extend, handle as a normal PF */ -+ kbase_gpu_vm_unlock(kctx); -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Page allocation failure"); ++ if (gpu_pfn >= kctx->same_va_end) ++ rbtree = &kctx->reg_rbtree_custom; ++ else ++ rbtree = &kctx->reg_rbtree_same; + } ++#endif /* CONFIG_64BIT */ + -+fault_done: -+ /* -+ * By this point, the fault was handled in some way, -+ * so release the ctx refcount -+ */ -+ kbasep_js_runpool_release_ctx(kbdev, kctx); -+ -+ atomic_dec(&kbdev->faults_pending); ++ return rbtree; +} + -+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx) ++/* This function inserts a region into the tree. */ ++static void kbase_region_tracker_insert(struct kbase_context *kctx, ++ struct kbase_va_region *new_reg) +{ -+ u64 *page; -+ int i; -+ struct page *p; -+ int new_page_count __maybe_unused; -+ -+ KBASE_DEBUG_ASSERT(NULL != kctx); -+ new_page_count = kbase_atomic_add_pages(1, &kctx->used_pages); -+ kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages); -+ -+ p = kbase_mem_pool_alloc(&kctx->mem_pool); -+ if (!p) -+ goto sub_pages; -+ -+ KBASE_TLSTREAM_AUX_PAGESALLOC( -+ (u32)kctx->id, -+ (u64)new_page_count); ++ u64 start_pfn = new_reg->start_pfn; ++ struct rb_node **link = NULL; ++ struct rb_node *parent = NULL; ++ struct rb_root *rbtree = NULL; + -+ page = kmap(p); -+ if (NULL == page) -+ goto alloc_free; ++ rbtree = kbase_reg_flags_to_rbtree(kctx, new_reg); + -+ kbase_process_page_usage_inc(kctx, 1); ++ link = &(rbtree->rb_node); ++ /* Find the right place in the tree using tree search */ ++ while (*link) { ++ struct kbase_va_region *old_reg; + -+ for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) -+ kctx->kbdev->mmu_mode->entry_invalidate(&page[i]); ++ parent = *link; ++ old_reg = rb_entry(parent, struct kbase_va_region, rblink); + -+ kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE); ++ /* RBTree requires no duplicate entries. */ ++ KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn); + -+ kunmap(p); -+ return page_to_phys(p); ++ if (old_reg->start_pfn > start_pfn) ++ link = &(*link)->rb_left; ++ else ++ link = &(*link)->rb_right; ++ } + -+alloc_free: -+ kbase_mem_pool_free(&kctx->mem_pool, p, false); -+sub_pages: -+ kbase_atomic_sub_pages(1, &kctx->used_pages); -+ kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); ++ /* Put the new node there, and rebalance tree */ ++ rb_link_node(&(new_reg->rblink), parent, link); + -+ return 0; ++ rb_insert_color(&(new_reg->rblink), rbtree); +} + -+KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd); -+ -+/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the -+ * new table from the pool if needed and possible -+ */ -+static int mmu_get_next_pgd(struct kbase_context *kctx, -+ phys_addr_t *pgd, u64 vpfn, int level) ++/* Find allocated region enclosing free range. */ ++static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free( ++ struct kbase_context *kctx, u64 start_pfn, size_t nr_pages) +{ -+ u64 *page; -+ phys_addr_t target_pgd; -+ struct page *p; -+ -+ KBASE_DEBUG_ASSERT(*pgd); -+ KBASE_DEBUG_ASSERT(NULL != kctx); -+ -+ lockdep_assert_held(&kctx->mmu_lock); -+ -+ /* -+ * Architecture spec defines level-0 as being the top-most. -+ * This is a bit unfortunate here, but we keep the same convention. -+ */ -+ vpfn >>= (3 - level) * 9; -+ vpfn &= 0x1FF; -+ -+ p = pfn_to_page(PFN_DOWN(*pgd)); -+ page = kmap(p); -+ if (NULL == page) { -+ dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n"); -+ return -EINVAL; -+ } -+ -+ target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]); -+ -+ if (!target_pgd) { -+ target_pgd = kbase_mmu_alloc_pgd(kctx); -+ if (!target_pgd) { -+ dev_dbg(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n"); -+ kunmap(p); -+ return -ENOMEM; -+ } -+ -+ kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd); ++ struct rb_node *rbnode = NULL; ++ struct kbase_va_region *reg = NULL; ++ struct rb_root *rbtree = NULL; + -+ kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE); -+ /* Rely on the caller to update the address space flags. */ -+ } ++ u64 end_pfn = start_pfn + nr_pages; + -+ kunmap(p); -+ *pgd = target_pgd; ++ rbtree = kbase_gpu_va_to_rbtree(kctx, start_pfn); + -+ return 0; -+} ++ rbnode = rbtree->rb_node; + -+static int mmu_get_bottom_pgd(struct kbase_context *kctx, -+ u64 vpfn, phys_addr_t *out_pgd) -+{ -+ phys_addr_t pgd; -+ int l; ++ while (rbnode) { ++ u64 tmp_start_pfn, tmp_end_pfn; + -+ lockdep_assert_held(&kctx->mmu_lock); ++ reg = rb_entry(rbnode, struct kbase_va_region, rblink); ++ tmp_start_pfn = reg->start_pfn; ++ tmp_end_pfn = reg->start_pfn + reg->nr_pages; + -+ pgd = kctx->pgd; -+ for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) { -+ int err = mmu_get_next_pgd(kctx, &pgd, vpfn, l); -+ /* Handle failure condition */ -+ if (err) { -+ dev_dbg(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n"); -+ return err; -+ } ++ /* If start is lower than this, go left. */ ++ if (start_pfn < tmp_start_pfn) ++ rbnode = rbnode->rb_left; ++ /* If end is higher than this, then go right. */ ++ else if (end_pfn > tmp_end_pfn) ++ rbnode = rbnode->rb_right; ++ else /* Enclosing */ ++ return reg; + } + -+ *out_pgd = pgd; -+ -+ return 0; ++ return NULL; +} + -+static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level) ++/* Find region enclosing given address. */ ++struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr) +{ -+ u64 *page; -+ phys_addr_t target_pgd; ++ struct rb_node *rbnode; ++ struct kbase_va_region *reg; ++ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; ++ struct rb_root *rbtree = NULL; + -+ KBASE_DEBUG_ASSERT(pgd); + KBASE_DEBUG_ASSERT(NULL != kctx); + -+ lockdep_assert_held(&kctx->mmu_lock); + lockdep_assert_held(&kctx->reg_lock); + -+ /* -+ * Architecture spec defines level-0 as being the top-most. -+ * This is a bit unfortunate here, but we keep the same convention. -+ */ -+ vpfn >>= (3 - level) * 9; -+ vpfn &= 0x1FF; -+ -+ page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); -+ /* kmap_atomic should NEVER fail */ -+ KBASE_DEBUG_ASSERT(NULL != page); -+ -+ target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]); -+ /* As we are recovering from what has already been set up, we should have a target_pgd */ -+ KBASE_DEBUG_ASSERT(0 != target_pgd); -+ kunmap_atomic(page); -+ return target_pgd; -+} ++ rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); + -+static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn) -+{ -+ phys_addr_t pgd; -+ int l; ++ rbnode = rbtree->rb_node; + -+ lockdep_assert_held(&kctx->mmu_lock); ++ while (rbnode) { ++ u64 tmp_start_pfn, tmp_end_pfn; + -+ pgd = kctx->pgd; ++ reg = rb_entry(rbnode, struct kbase_va_region, rblink); ++ tmp_start_pfn = reg->start_pfn; ++ tmp_end_pfn = reg->start_pfn + reg->nr_pages; + -+ for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) { -+ pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l); -+ /* Should never fail */ -+ KBASE_DEBUG_ASSERT(0 != pgd); ++ /* If start is lower than this, go left. */ ++ if (gpu_pfn < tmp_start_pfn) ++ rbnode = rbnode->rb_left; ++ /* If end is higher than this, then go right. */ ++ else if (gpu_pfn >= tmp_end_pfn) ++ rbnode = rbnode->rb_right; ++ else /* Enclosing */ ++ return reg; + } + -+ return pgd; ++ return NULL; +} + -+static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vpfn, -+ size_t nr) ++KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address); ++ ++/* Find region with given base address */ ++struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr) +{ -+ phys_addr_t pgd; -+ u64 *pgd_page; -+ struct kbase_mmu_mode const *mmu_mode; ++ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; ++ struct rb_node *rbnode = NULL; ++ struct kbase_va_region *reg = NULL; ++ struct rb_root *rbtree = NULL; + + KBASE_DEBUG_ASSERT(NULL != kctx); -+ KBASE_DEBUG_ASSERT(0 != vpfn); -+ /* 64-bit address range is the max */ -+ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + -+ lockdep_assert_held(&kctx->mmu_lock); + lockdep_assert_held(&kctx->reg_lock); + -+ mmu_mode = kctx->kbdev->mmu_mode; ++ rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); + -+ while (nr) { -+ unsigned int i; -+ unsigned int index = vpfn & 0x1FF; -+ unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; -+ struct page *p; ++ rbnode = rbtree->rb_node; + -+ if (count > nr) -+ count = nr; ++ while (rbnode) { ++ reg = rb_entry(rbnode, struct kbase_va_region, rblink); ++ if (reg->start_pfn > gpu_pfn) ++ rbnode = rbnode->rb_left; ++ else if (reg->start_pfn < gpu_pfn) ++ rbnode = rbnode->rb_right; ++ else ++ return reg; + -+ pgd = mmu_insert_pages_recover_get_bottom_pgd(kctx, vpfn); -+ KBASE_DEBUG_ASSERT(0 != pgd); ++ } + -+ p = pfn_to_page(PFN_DOWN(pgd)); ++ return NULL; ++} + -+ pgd_page = kmap_atomic(p); -+ KBASE_DEBUG_ASSERT(NULL != pgd_page); ++KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address); + -+ /* Invalidate the entries we added */ -+ for (i = 0; i < count; i++) -+ mmu_mode->entry_invalidate(&pgd_page[index + i]); ++/* Find region meeting given requirements */ ++static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(struct kbase_context *kctx, struct kbase_va_region *reg_reqs, size_t nr_pages, size_t align) ++{ ++ struct rb_node *rbnode = NULL; ++ struct kbase_va_region *reg = NULL; ++ struct rb_root *rbtree = NULL; + -+ vpfn += count; -+ nr -= count; ++ /* Note that this search is a linear search, as we do not have a target ++ address in mind, so does not benefit from the rbtree search */ + -+ kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE); ++ rbtree = kbase_reg_flags_to_rbtree(kctx, reg_reqs); + -+ kunmap_atomic(pgd_page); ++ rbnode = rb_first(rbtree); ++ ++ while (rbnode) { ++ reg = rb_entry(rbnode, struct kbase_va_region, rblink); ++ if ((reg->nr_pages >= nr_pages) && ++ (reg->flags & KBASE_REG_FREE)) { ++ /* Check alignment */ ++ u64 start_pfn = (reg->start_pfn + align - 1) & ~(align - 1); ++ ++ if ((start_pfn >= reg->start_pfn) && ++ (start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) && ++ ((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) ++ return reg; ++ } ++ rbnode = rb_next(rbnode); + } ++ ++ return NULL; +} + -+/* -+ * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn' ++/** ++ * @brief Remove a region object from the global list. ++ * ++ * The region reg is removed, possibly by merging with other free and ++ * compatible adjacent regions. It must be called with the context ++ * region lock held. The associated memory is not released (see ++ * kbase_free_alloced_region). Internal use only. + */ -+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, -+ phys_addr_t phys, size_t nr, -+ unsigned long flags) ++static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_region *reg) +{ -+ phys_addr_t pgd; -+ u64 *pgd_page; -+ /* In case the insert_single_page only partially completes we need to be -+ * able to recover */ -+ bool recover_required = false; -+ u64 recover_vpfn = vpfn; -+ size_t recover_count = 0; -+ size_t remain = nr; -+ int err; -+ -+ KBASE_DEBUG_ASSERT(NULL != kctx); -+ KBASE_DEBUG_ASSERT(0 != vpfn); -+ /* 64-bit address range is the max */ -+ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); -+ -+ /* Early out if there is nothing to do */ -+ if (nr == 0) -+ return 0; -+ -+ mutex_lock(&kctx->mmu_lock); ++ struct rb_node *rbprev; ++ struct kbase_va_region *prev = NULL; ++ struct rb_node *rbnext; ++ struct kbase_va_region *next = NULL; ++ struct rb_root *reg_rbtree = NULL; + -+ while (remain) { -+ unsigned int i; -+ unsigned int index = vpfn & 0x1FF; -+ unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; -+ struct page *p; ++ int merged_front = 0; ++ int merged_back = 0; ++ int err = 0; + -+ if (count > remain) -+ count = remain; ++ reg_rbtree = kbase_reg_flags_to_rbtree(kctx, reg); + -+ /* -+ * Repeatedly calling mmu_get_bottom_pte() is clearly -+ * suboptimal. We don't have to re-parse the whole tree -+ * each time (just cache the l0-l2 sequence). -+ * On the other hand, it's only a gain when we map more than -+ * 256 pages at once (on average). Do we really care? -+ */ -+ do { -+ err = mmu_get_bottom_pgd(kctx, vpfn, &pgd); -+ if (err != -ENOMEM) -+ break; -+ /* Fill the memory pool with enough pages for -+ * the page walk to succeed -+ */ -+ mutex_unlock(&kctx->mmu_lock); -+ err = kbase_mem_pool_grow(&kctx->mem_pool, -+ MIDGARD_MMU_BOTTOMLEVEL); -+ mutex_lock(&kctx->mmu_lock); -+ } while (!err); -+ if (err) { -+ dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n"); -+ if (recover_required) { -+ /* Invalidate the pages we have partially -+ * completed */ -+ mmu_insert_pages_failure_recovery(kctx, -+ recover_vpfn, -+ recover_count); -+ } -+ goto fail_unlock; ++ /* Try to merge with the previous block first */ ++ rbprev = rb_prev(&(reg->rblink)); ++ if (rbprev) { ++ prev = rb_entry(rbprev, struct kbase_va_region, rblink); ++ if (prev->flags & KBASE_REG_FREE) { ++ /* We're compatible with the previous VMA, ++ * merge with it */ ++ WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) != ++ (reg->flags & KBASE_REG_ZONE_MASK)); ++ prev->nr_pages += reg->nr_pages; ++ rb_erase(&(reg->rblink), reg_rbtree); ++ reg = prev; ++ merged_front = 1; + } ++ } + -+ p = pfn_to_page(PFN_DOWN(pgd)); -+ pgd_page = kmap(p); -+ if (!pgd_page) { -+ dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n"); -+ if (recover_required) { -+ /* Invalidate the pages we have partially -+ * completed */ -+ mmu_insert_pages_failure_recovery(kctx, -+ recover_vpfn, -+ recover_count); ++ /* Try to merge with the next block second */ ++ /* Note we do the lookup here as the tree may have been rebalanced. */ ++ rbnext = rb_next(&(reg->rblink)); ++ if (rbnext) { ++ /* We're compatible with the next VMA, merge with it */ ++ next = rb_entry(rbnext, struct kbase_va_region, rblink); ++ if (next->flags & KBASE_REG_FREE) { ++ WARN_ON((next->flags & KBASE_REG_ZONE_MASK) != ++ (reg->flags & KBASE_REG_ZONE_MASK)); ++ next->start_pfn = reg->start_pfn; ++ next->nr_pages += reg->nr_pages; ++ rb_erase(&(reg->rblink), reg_rbtree); ++ merged_back = 1; ++ if (merged_front) { ++ /* We already merged with prev, free it */ ++ kbase_free_alloced_region(reg); + } -+ err = -ENOMEM; -+ goto fail_unlock; + } ++ } + -+ for (i = 0; i < count; i++) { -+ unsigned int ofs = index + i; ++ /* If we failed to merge then we need to add a new block */ ++ if (!(merged_front || merged_back)) { ++ /* ++ * We didn't merge anything. Add a new free ++ * placeholder and remove the original one. ++ */ ++ struct kbase_va_region *free_reg; + -+ KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL)); -+ kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs], -+ phys, flags); ++ free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK); ++ if (!free_reg) { ++ err = -ENOMEM; ++ goto out; + } -+ -+ vpfn += count; -+ remain -= count; -+ -+ kbase_mmu_sync_pgd(kctx->kbdev, -+ kbase_dma_addr(p) + (index * sizeof(u64)), -+ count * sizeof(u64)); -+ -+ kunmap(p); -+ /* We have started modifying the page table. -+ * If further pages need inserting and fail we need to undo what -+ * has already taken place */ -+ recover_required = true; -+ recover_count += count; ++ rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree); + } -+ mutex_unlock(&kctx->mmu_lock); -+ kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); -+ return 0; + -+fail_unlock: -+ mutex_unlock(&kctx->mmu_lock); -+ kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); ++ out: + return err; +} + -+int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, -+ phys_addr_t *phys, size_t nr, -+ unsigned long flags) ++KBASE_EXPORT_TEST_API(kbase_remove_va_region); ++ ++/** ++ * @brief Insert a VA region to the list, replacing the current at_reg. ++ */ ++static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages) +{ -+ phys_addr_t pgd; -+ u64 *pgd_page; -+ /* In case the insert_pages only partially completes we need to be able -+ * to recover */ -+ bool recover_required = false; -+ u64 recover_vpfn = vpfn; -+ size_t recover_count = 0; -+ size_t remain = nr; -+ int err; ++ struct rb_root *reg_rbtree = NULL; ++ int err = 0; + -+ KBASE_DEBUG_ASSERT(NULL != kctx); -+ KBASE_DEBUG_ASSERT(0 != vpfn); -+ /* 64-bit address range is the max */ -+ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); ++ reg_rbtree = kbase_reg_flags_to_rbtree(kctx, at_reg); + -+ /* Early out if there is nothing to do */ -+ if (nr == 0) -+ return 0; ++ /* Must be a free region */ ++ KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0); ++ /* start_pfn should be contained within at_reg */ ++ KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages)); ++ /* at least nr_pages from start_pfn should be contained within at_reg */ ++ KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages); + -+ mutex_lock(&kctx->mmu_lock); ++ new_reg->start_pfn = start_pfn; ++ new_reg->nr_pages = nr_pages; + -+ while (remain) { -+ unsigned int i; -+ unsigned int index = vpfn & 0x1FF; -+ unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; -+ struct page *p; ++ /* Regions are a whole use, so swap and delete old one. */ ++ if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) { ++ rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), ++ reg_rbtree); ++ kbase_free_alloced_region(at_reg); ++ } ++ /* New region replaces the start of the old one, so insert before. */ ++ else if (at_reg->start_pfn == start_pfn) { ++ at_reg->start_pfn += nr_pages; ++ KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages); ++ at_reg->nr_pages -= nr_pages; + -+ if (count > remain) -+ count = remain; ++ kbase_region_tracker_insert(kctx, new_reg); ++ } ++ /* New region replaces the end of the old one, so insert after. */ ++ else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) { ++ at_reg->nr_pages -= nr_pages; + -+ /* -+ * Repeatedly calling mmu_get_bottom_pte() is clearly -+ * suboptimal. We don't have to re-parse the whole tree -+ * each time (just cache the l0-l2 sequence). -+ * On the other hand, it's only a gain when we map more than -+ * 256 pages at once (on average). Do we really care? -+ */ -+ do { -+ err = mmu_get_bottom_pgd(kctx, vpfn, &pgd); -+ if (err != -ENOMEM) -+ break; -+ /* Fill the memory pool with enough pages for -+ * the page walk to succeed -+ */ -+ mutex_unlock(&kctx->mmu_lock); -+ err = kbase_mem_pool_grow(&kctx->mem_pool, -+ MIDGARD_MMU_BOTTOMLEVEL); -+ mutex_lock(&kctx->mmu_lock); -+ } while (!err); -+ if (err) { -+ dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n"); -+ if (recover_required) { -+ /* Invalidate the pages we have partially -+ * completed */ -+ mmu_insert_pages_failure_recovery(kctx, -+ recover_vpfn, -+ recover_count); -+ } -+ goto fail_unlock; -+ } ++ kbase_region_tracker_insert(kctx, new_reg); ++ } ++ /* New region splits the old one, so insert and create new */ ++ else { ++ struct kbase_va_region *new_front_reg; + -+ p = pfn_to_page(PFN_DOWN(pgd)); -+ pgd_page = kmap(p); -+ if (!pgd_page) { -+ dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n"); -+ if (recover_required) { -+ /* Invalidate the pages we have partially -+ * completed */ -+ mmu_insert_pages_failure_recovery(kctx, -+ recover_vpfn, -+ recover_count); -+ } -+ err = -ENOMEM; -+ goto fail_unlock; -+ } ++ new_front_reg = kbase_alloc_free_region(kctx, ++ at_reg->start_pfn, ++ start_pfn - at_reg->start_pfn, ++ at_reg->flags & KBASE_REG_ZONE_MASK); + -+ for (i = 0; i < count; i++) { -+ unsigned int ofs = index + i; ++ if (new_front_reg) { ++ at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages; ++ at_reg->start_pfn = start_pfn + nr_pages; + -+ KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL)); -+ kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs], -+ phys[i], flags); ++ kbase_region_tracker_insert(kctx, new_front_reg); ++ kbase_region_tracker_insert(kctx, new_reg); ++ } else { ++ err = -ENOMEM; + } -+ -+ phys += count; -+ vpfn += count; -+ remain -= count; -+ -+ kbase_mmu_sync_pgd(kctx->kbdev, -+ kbase_dma_addr(p) + (index * sizeof(u64)), -+ count * sizeof(u64)); -+ -+ kunmap(p); -+ /* We have started modifying the page table. If further pages -+ * need inserting and fail we need to undo what has already -+ * taken place */ -+ recover_required = true; -+ recover_count += count; + } + -+ mutex_unlock(&kctx->mmu_lock); -+ return 0; -+ -+fail_unlock: -+ mutex_unlock(&kctx->mmu_lock); -+ return err; -+} -+ -+/* -+ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' -+ */ -+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, -+ phys_addr_t *phys, size_t nr, -+ unsigned long flags) -+{ -+ int err; -+ -+ err = kbase_mmu_insert_pages_no_flush(kctx, vpfn, phys, nr, flags); -+ kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); + return err; +} + -+KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); -+ +/** -+ * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches -+ * without retaining the kbase context. -+ * @kctx: The KBase context. -+ * @vpfn: The virtual page frame number to start the flush on. -+ * @nr: The number of pages to flush. -+ * @sync: Set if the operation should be synchronous or not. -+ * -+ * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any -+ * other locking. ++ * @brief Add a VA region to the list. + */ -+static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, -+ u64 vpfn, size_t nr, bool sync) ++int kbase_add_va_region(struct kbase_context *kctx, ++ struct kbase_va_region *reg, u64 addr, ++ size_t nr_pages, size_t align) +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ int err; -+ u32 op; -+ -+ /* Early out if there is nothing to do */ -+ if (nr == 0) -+ return; -+ -+ if (sync) -+ op = AS_COMMAND_FLUSH_MEM; -+ else -+ op = AS_COMMAND_FLUSH_PT; -+ -+ err = kbase_mmu_hw_do_operation(kbdev, -+ &kbdev->as[kctx->as_nr], -+ kctx, vpfn, nr, op, 0); -+#if KBASE_GPU_RESET_EN -+ if (err) { -+ /* Flush failed to complete, assume the -+ * GPU has hung and perform a reset to -+ * recover */ -+ dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); -+ -+ if (kbase_prepare_to_reset_gpu_locked(kbdev)) -+ kbase_reset_gpu_locked(kbdev); -+ } -+#endif /* KBASE_GPU_RESET_EN */ -+ -+#ifndef CONFIG_MALI_NO_MALI -+ /* -+ * As this function could be called in interrupt context the sync -+ * request can't block. Instead log the request and the next flush -+ * request will pick it up. -+ */ -+ if ((!err) && sync && -+ kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) -+ atomic_set(&kctx->drain_pending, 1); -+#endif /* !CONFIG_MALI_NO_MALI */ -+} ++ struct kbase_va_region *tmp; ++ u64 gpu_pfn = addr >> PAGE_SHIFT; ++ int err = 0; + -+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, -+ u64 vpfn, size_t nr, bool sync) -+{ -+ struct kbase_device *kbdev; -+ bool ctx_is_in_runpool; -+#ifndef CONFIG_MALI_NO_MALI -+ bool drain_pending = false; ++ KBASE_DEBUG_ASSERT(NULL != kctx); ++ KBASE_DEBUG_ASSERT(NULL != reg); + -+ if (atomic_xchg(&kctx->drain_pending, 0)) -+ drain_pending = true; -+#endif /* !CONFIG_MALI_NO_MALI */ ++ lockdep_assert_held(&kctx->reg_lock); + -+ /* Early out if there is nothing to do */ -+ if (nr == 0) -+ return; ++ if (!align) ++ align = 1; + -+ kbdev = kctx->kbdev; -+ mutex_lock(&kbdev->js_data.queue_mutex); -+ ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx); -+ mutex_unlock(&kbdev->js_data.queue_mutex); ++ /* must be a power of 2 */ ++ KBASE_DEBUG_ASSERT((align & (align - 1)) == 0); ++ KBASE_DEBUG_ASSERT(nr_pages > 0); + -+ if (ctx_is_in_runpool) { -+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); ++ /* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */ ++ if (gpu_pfn) { ++ struct device *dev = kctx->kbdev->dev; + -+ if (!kbase_pm_context_active_handle_suspend(kbdev, -+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { -+ int err; -+ u32 op; ++ KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1))); + -+ /* AS transaction begin */ -+ mutex_lock(&kbdev->mmu_hw_mutex); ++ tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages); ++ if (!tmp) { ++ dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages); ++ err = -ENOMEM; ++ goto exit; ++ } ++ if (!(tmp->flags & KBASE_REG_FREE)) { ++ dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK); ++ dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages); ++ dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align); ++ err = -ENOMEM; ++ goto exit; ++ } + -+ if (sync) -+ op = AS_COMMAND_FLUSH_MEM; -+ else -+ op = AS_COMMAND_FLUSH_PT; ++ err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages); ++ if (err) { ++ dev_warn(dev, "Failed to insert va region"); ++ err = -ENOMEM; ++ goto exit; ++ } + -+ err = kbase_mmu_hw_do_operation(kbdev, -+ &kbdev->as[kctx->as_nr], -+ kctx, vpfn, nr, op, 0); ++ goto exit; ++ } + -+#if KBASE_GPU_RESET_EN -+ if (err) { -+ /* Flush failed to complete, assume the -+ * GPU has hung and perform a reset to -+ * recover */ -+ dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n"); ++ /* Path 2: Map any free address which meets the requirements. */ ++ { ++ u64 start_pfn; + -+ if (kbase_prepare_to_reset_gpu(kbdev)) -+ kbase_reset_gpu(kbdev); ++ /* ++ * Depending on the zone the allocation request is for ++ * we might need to retry it. ++ */ ++ do { ++ tmp = kbase_region_tracker_find_region_meeting_reqs( ++ kctx, reg, nr_pages, align); ++ if (tmp) { ++ start_pfn = (tmp->start_pfn + align - 1) & ++ ~(align - 1); ++ err = kbase_insert_va_region_nolock(kctx, reg, ++ tmp, start_pfn, nr_pages); ++ break; + } -+#endif /* KBASE_GPU_RESET_EN */ -+ -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ /* AS transaction end */ + -+#ifndef CONFIG_MALI_NO_MALI + /* -+ * The transaction lock must be dropped before here -+ * as kbase_wait_write_flush could take it if -+ * the GPU was powered down (static analysis doesn't -+ * know this can't happen). ++ * If the allocation is not from the same zone as JIT ++ * then don't retry, we're out of VA and there is ++ * nothing which can be done about it. + */ -+ drain_pending |= (!err) && sync && -+ kbase_hw_has_issue(kctx->kbdev, -+ BASE_HW_ISSUE_6367); -+ if (drain_pending) { -+ /* Wait for GPU to flush write buffer */ -+ kbase_wait_write_flush(kctx); -+ } -+#endif /* !CONFIG_MALI_NO_MALI */ ++ if ((reg->flags & KBASE_REG_ZONE_MASK) != ++ KBASE_REG_ZONE_CUSTOM_VA) ++ break; ++ } while (kbase_jit_evict(kctx)); + -+ kbase_pm_context_idle(kbdev); -+ } -+ kbasep_js_runpool_release_ctx(kbdev, kctx); ++ if (!tmp) ++ err = -ENOMEM; + } ++ ++ exit: ++ return err; +} + -+void kbase_mmu_update(struct kbase_context *kctx) ++KBASE_EXPORT_TEST_API(kbase_add_va_region); ++ ++/** ++ * @brief Initialize the internal region tracker data structure. ++ */ ++static void kbase_region_tracker_ds_init(struct kbase_context *kctx, ++ struct kbase_va_region *same_va_reg, ++ struct kbase_va_region *exec_reg, ++ struct kbase_va_region *custom_va_reg) +{ -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); -+ /* ASSERT that the context has a valid as_nr, which is only the case -+ * when it's scheduled in. -+ * -+ * as_nr won't change because the caller has the hwaccess_lock */ -+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); ++ kctx->reg_rbtree_same = RB_ROOT; ++ kbase_region_tracker_insert(kctx, same_va_reg); + -+ kctx->kbdev->mmu_mode->update(kctx); ++ /* Although exec and custom_va_reg don't always exist, ++ * initialize unconditionally because of the mem_view debugfs ++ * implementation which relies on these being empty */ ++ kctx->reg_rbtree_exec = RB_ROOT; ++ kctx->reg_rbtree_custom = RB_ROOT; ++ ++ if (exec_reg) ++ kbase_region_tracker_insert(kctx, exec_reg); ++ if (custom_va_reg) ++ kbase_region_tracker_insert(kctx, custom_va_reg); +} -+KBASE_EXPORT_TEST_API(kbase_mmu_update); + -+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) ++static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ lockdep_assert_held(&kbdev->mmu_hw_mutex); ++ struct rb_node *rbnode; ++ struct kbase_va_region *reg; + -+ kbdev->mmu_mode->disable_as(kbdev, as_nr); ++ do { ++ rbnode = rb_first(rbtree); ++ if (rbnode) { ++ rb_erase(rbnode, rbtree); ++ reg = rb_entry(rbnode, struct kbase_va_region, rblink); ++ kbase_free_alloced_region(reg); ++ } ++ } while (rbnode); +} + -+void kbase_mmu_disable(struct kbase_context *kctx) ++void kbase_region_tracker_term(struct kbase_context *kctx) +{ -+ /* ASSERT that the context has a valid as_nr, which is only the case -+ * when it's scheduled in. -+ * -+ * as_nr won't change because the caller has the hwaccess_lock */ -+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); -+ -+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); -+ -+ /* -+ * The address space is being disabled, drain all knowledge of it out -+ * from the caches as pages and page tables might be freed after this. -+ * -+ * The job scheduler code will already be holding the locks and context -+ * so just do the flush. -+ */ -+ kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true); -+ -+ kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); ++ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); ++ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); ++ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); +} -+KBASE_EXPORT_TEST_API(kbase_mmu_disable); + -+/* -+ * We actually only discard the ATE, and not the page table -+ * pages. There is a potential DoS here, as we'll leak memory by -+ * having PTEs that are potentially unused. Will require physical -+ * page accounting, so MMU pages are part of the process allocation. -+ * -+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is -+ * currently scheduled into the runpool, and so potentially uses a lot of locks. -+ * These locks must be taken in the correct order with respect to others -+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more -+ * information. ++/** ++ * Initialize the region tracker data structure. + */ -+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) ++int kbase_region_tracker_init(struct kbase_context *kctx) +{ -+ phys_addr_t pgd; -+ u64 *pgd_page; -+ struct kbase_device *kbdev; -+ size_t requested_nr = nr; -+ struct kbase_mmu_mode const *mmu_mode; ++ struct kbase_va_region *same_va_reg; ++ struct kbase_va_region *exec_reg = NULL; ++ struct kbase_va_region *custom_va_reg = NULL; ++ size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE; ++ u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; ++ u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT; ++ u64 same_va_pages; + int err; + -+ KBASE_DEBUG_ASSERT(NULL != kctx); -+ beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr); ++ /* Take the lock as kbase_free_alloced_region requires it */ ++ kbase_gpu_vm_lock(kctx); + -+ if (0 == nr) { -+ /* early out if nothing to do */ -+ return 0; -+ } ++#if defined(CONFIG_ARM64) ++ same_va_bits = VA_BITS; ++#elif defined(CONFIG_X86_64) ++ same_va_bits = 47; ++#elif defined(CONFIG_64BIT) ++#error Unsupported 64-bit architecture ++#endif + -+ mutex_lock(&kctx->mmu_lock); ++#ifdef CONFIG_64BIT ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) ++ same_va_bits = 32; ++ else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) ++ same_va_bits = 33; ++#endif + -+ kbdev = kctx->kbdev; -+ mmu_mode = kbdev->mmu_mode; ++ if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) { ++ err = -EINVAL; ++ goto fail_unlock; ++ } + -+ while (nr) { -+ unsigned int i; -+ unsigned int index = vpfn & 0x1FF; -+ unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; -+ struct page *p; ++ same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; ++ /* all have SAME_VA */ ++ same_va_reg = kbase_alloc_free_region(kctx, 1, ++ same_va_pages, ++ KBASE_REG_ZONE_SAME_VA); + -+ if (count > nr) -+ count = nr; ++ if (!same_va_reg) { ++ err = -ENOMEM; ++ goto fail_unlock; ++ } + -+ err = mmu_get_bottom_pgd(kctx, vpfn, &pgd); -+ if (err) { -+ dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n"); ++#ifdef CONFIG_64BIT ++ /* 32-bit clients have exec and custom VA zones */ ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { ++#endif ++ if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { + err = -EINVAL; -+ goto fail_unlock; ++ goto fail_free_same_va; + } ++ /* If the current size of TMEM is out of range of the ++ * virtual address space addressable by the MMU then ++ * we should shrink it to fit ++ */ ++ if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit) ++ custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; + -+ p = pfn_to_page(PFN_DOWN(pgd)); -+ pgd_page = kmap(p); -+ if (!pgd_page) { -+ dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n"); ++ exec_reg = kbase_alloc_free_region(kctx, ++ KBASE_REG_ZONE_EXEC_BASE, ++ KBASE_REG_ZONE_EXEC_SIZE, ++ KBASE_REG_ZONE_EXEC); ++ ++ if (!exec_reg) { + err = -ENOMEM; -+ goto fail_unlock; ++ goto fail_free_same_va; + } + -+ for (i = 0; i < count; i++) -+ mmu_mode->entry_invalidate(&pgd_page[index + i]); ++ custom_va_reg = kbase_alloc_free_region(kctx, ++ KBASE_REG_ZONE_CUSTOM_VA_BASE, ++ custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); + -+ vpfn += count; -+ nr -= count; ++ if (!custom_va_reg) { ++ err = -ENOMEM; ++ goto fail_free_exec; ++ } ++#ifdef CONFIG_64BIT ++ } ++#endif + -+ kbase_mmu_sync_pgd(kctx->kbdev, -+ kbase_dma_addr(p) + (index * sizeof(u64)), -+ count * sizeof(u64)); ++ kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg); + -+ kunmap(p); -+ } ++ kctx->same_va_end = same_va_pages + 1; + -+ mutex_unlock(&kctx->mmu_lock); -+ kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); ++ kbase_gpu_vm_unlock(kctx); + return 0; + ++fail_free_exec: ++ kbase_free_alloced_region(exec_reg); ++fail_free_same_va: ++ kbase_free_alloced_region(same_va_reg); +fail_unlock: -+ mutex_unlock(&kctx->mmu_lock); -+ kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); ++ kbase_gpu_vm_unlock(kctx); + return err; +} + -+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); -+ -+/** -+ * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'. -+ * This call is being triggered as a response to the changes of the mem attributes -+ * -+ * @pre : The caller is responsible for validating the memory attributes -+ * -+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is -+ * currently scheduled into the runpool, and so potentially uses a lot of locks. -+ * These locks must be taken in the correct order with respect to others -+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more -+ * information. -+ */ -+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags) ++int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages) +{ -+ phys_addr_t pgd; -+ u64 *pgd_page; -+ size_t requested_nr = nr; -+ struct kbase_mmu_mode const *mmu_mode; ++#ifdef CONFIG_64BIT ++ struct kbase_va_region *same_va; ++ struct kbase_va_region *custom_va_reg; ++ u64 same_va_bits; ++ u64 total_va_size; + int err; + -+ KBASE_DEBUG_ASSERT(NULL != kctx); -+ KBASE_DEBUG_ASSERT(0 != vpfn); -+ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); -+ -+ /* Early out if there is nothing to do */ -+ if (nr == 0) ++ /* ++ * Nothing to do for 32-bit clients, JIT uses the existing ++ * custom VA zone. ++ */ ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + return 0; + -+ mutex_lock(&kctx->mmu_lock); -+ -+ mmu_mode = kctx->kbdev->mmu_mode; ++#if defined(CONFIG_ARM64) ++ same_va_bits = VA_BITS; ++#elif defined(CONFIG_X86_64) ++ same_va_bits = 47; ++#elif defined(CONFIG_64BIT) ++#error Unsupported 64-bit architecture ++#endif + -+ dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages(): updating page share flags on GPU PFN 0x%llx from phys %p, %zu pages", -+ vpfn, phys, nr); ++ if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) ++ same_va_bits = 33; + -+ while (nr) { -+ unsigned int i; -+ unsigned int index = vpfn & 0x1FF; -+ size_t count = KBASE_MMU_PAGE_ENTRIES - index; -+ struct page *p; ++ total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; + -+ if (count > nr) -+ count = nr; ++ kbase_gpu_vm_lock(kctx); + -+ do { -+ err = mmu_get_bottom_pgd(kctx, vpfn, &pgd); -+ if (err != -ENOMEM) -+ break; -+ /* Fill the memory pool with enough pages for -+ * the page walk to succeed -+ */ -+ mutex_unlock(&kctx->mmu_lock); -+ err = kbase_mem_pool_grow(&kctx->mem_pool, -+ MIDGARD_MMU_BOTTOMLEVEL); -+ mutex_lock(&kctx->mmu_lock); -+ } while (!err); -+ if (err) { -+ dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n"); -+ goto fail_unlock; -+ } ++ /* ++ * Modify the same VA free region after creation. Be careful to ensure ++ * that allocations haven't been made as they could cause an overlap ++ * to happen with existing same VA allocations and the custom VA zone. ++ */ ++ same_va = kbase_region_tracker_find_region_base_address(kctx, ++ PAGE_SIZE); ++ if (!same_va) { ++ err = -ENOMEM; ++ goto fail_unlock; ++ } + -+ p = pfn_to_page(PFN_DOWN(pgd)); -+ pgd_page = kmap(p); -+ if (!pgd_page) { -+ dev_warn(kctx->kbdev->dev, "kmap failure\n"); -+ err = -ENOMEM; -+ goto fail_unlock; -+ } ++ /* The region flag or region size has changed since creation so bail. */ ++ if ((!(same_va->flags & KBASE_REG_FREE)) || ++ (same_va->nr_pages != total_va_size)) { ++ err = -ENOMEM; ++ goto fail_unlock; ++ } + -+ for (i = 0; i < count; i++) -+ mmu_mode->entry_set_ate(&pgd_page[index + i], phys[i], -+ flags); ++ if (same_va->nr_pages < jit_va_pages || ++ kctx->same_va_end < jit_va_pages) { ++ err = -ENOMEM; ++ goto fail_unlock; ++ } + -+ phys += count; -+ vpfn += count; -+ nr -= count; ++ /* It's safe to adjust the same VA zone now */ ++ same_va->nr_pages -= jit_va_pages; ++ kctx->same_va_end -= jit_va_pages; + -+ kbase_mmu_sync_pgd(kctx->kbdev, -+ kbase_dma_addr(p) + (index * sizeof(u64)), -+ count * sizeof(u64)); ++ /* ++ * Create a custom VA zone at the end of the VA for allocations which ++ * JIT can use so it doesn't have to allocate VA from the kernel. ++ */ ++ custom_va_reg = kbase_alloc_free_region(kctx, ++ kctx->same_va_end, ++ jit_va_pages, ++ KBASE_REG_ZONE_CUSTOM_VA); + -+ kunmap(pfn_to_page(PFN_DOWN(pgd))); ++ if (!custom_va_reg) { ++ /* ++ * The context will be destroyed if we fail here so no point ++ * reverting the change we made to same_va. ++ */ ++ err = -ENOMEM; ++ goto fail_unlock; + } + -+ mutex_unlock(&kctx->mmu_lock); -+ kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); ++ kbase_region_tracker_insert(kctx, custom_va_reg); ++ ++ kbase_gpu_vm_unlock(kctx); + return 0; + +fail_unlock: -+ mutex_unlock(&kctx->mmu_lock); -+ kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); ++ kbase_gpu_vm_unlock(kctx); + return err; ++#else ++ return 0; ++#endif +} + -+/* This is a debug feature only */ -+static void mmu_check_unused(struct kbase_context *kctx, phys_addr_t pgd) ++int kbase_mem_init(struct kbase_device *kbdev) +{ -+ u64 *page; -+ int i; ++ struct kbasep_mem_device *memdev; + -+ lockdep_assert_held(&kctx->reg_lock); ++ KBASE_DEBUG_ASSERT(kbdev); + -+ page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); -+ /* kmap_atomic should NEVER fail. */ -+ KBASE_DEBUG_ASSERT(NULL != page); ++ memdev = &kbdev->memdev; ++ kbdev->mem_pool_max_size_default = KBASE_MEM_POOL_MAX_SIZE_KCTX; + -+ for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { -+ if (kctx->kbdev->mmu_mode->ate_is_valid(page[i])) -+ beenthere(kctx, "live pte %016lx", (unsigned long)page[i]); -+ } -+ kunmap_atomic(page); ++ /* Initialize memory usage */ ++ atomic_set(&memdev->used_pages, 0); ++ ++ return kbase_mem_pool_init(&kbdev->mem_pool, ++ KBASE_MEM_POOL_MAX_SIZE_KBDEV, kbdev, NULL); +} + -+static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int level, int zap, u64 *pgd_page_buffer) ++void kbase_mem_halt(struct kbase_device *kbdev) +{ -+ phys_addr_t target_pgd; -+ u64 *pgd_page; -+ int i; -+ struct kbase_mmu_mode const *mmu_mode; -+ -+ KBASE_DEBUG_ASSERT(NULL != kctx); -+ lockdep_assert_held(&kctx->mmu_lock); -+ lockdep_assert_held(&kctx->reg_lock); -+ -+ pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); -+ /* kmap_atomic should NEVER fail. */ -+ KBASE_DEBUG_ASSERT(NULL != pgd_page); -+ /* Copy the page to our preallocated buffer so that we can minimize kmap_atomic usage */ -+ memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); -+ kunmap_atomic(pgd_page); -+ pgd_page = pgd_page_buffer; ++ CSTD_UNUSED(kbdev); ++} + -+ mmu_mode = kctx->kbdev->mmu_mode; ++void kbase_mem_term(struct kbase_device *kbdev) ++{ ++ struct kbasep_mem_device *memdev; ++ int pages; + -+ for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { -+ target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]); ++ KBASE_DEBUG_ASSERT(kbdev); + -+ if (target_pgd) { -+ if (level < (MIDGARD_MMU_BOTTOMLEVEL - 1)) { -+ mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64))); -+ } else { -+ /* -+ * So target_pte is a level-3 page. -+ * As a leaf, it is safe to free it. -+ * Unless we have live pages attached to it! -+ */ -+ mmu_check_unused(kctx, target_pgd); -+ } ++ memdev = &kbdev->memdev; + -+ beenthere(kctx, "pte %lx level %d", (unsigned long)target_pgd, level + 1); -+ if (zap) { -+ struct page *p = phys_to_page(target_pgd); ++ pages = atomic_read(&memdev->used_pages); ++ if (pages != 0) ++ dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); + -+ kbase_mem_pool_free(&kctx->mem_pool, p, true); -+ kbase_process_page_usage_dec(kctx, 1); -+ kbase_atomic_sub_pages(1, &kctx->used_pages); -+ kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); -+ } -+ } -+ } ++ kbase_mem_pool_term(&kbdev->mem_pool); +} + -+int kbase_mmu_init(struct kbase_context *kctx) -+{ -+ KBASE_DEBUG_ASSERT(NULL != kctx); -+ KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages); -+ -+ mutex_init(&kctx->mmu_lock); ++KBASE_EXPORT_TEST_API(kbase_mem_term); + -+ /* Preallocate MMU depth of four pages for mmu_teardown_level to use */ -+ kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); + -+ if (NULL == kctx->mmu_teardown_pages) -+ return -ENOMEM; + -+ return 0; -+} + -+void kbase_mmu_term(struct kbase_context *kctx) ++/** ++ * @brief Allocate a free region object. ++ * ++ * The allocated object is not part of any list yet, and is flagged as ++ * KBASE_REG_FREE. No mapping is allocated yet. ++ * ++ * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC ++ * ++ */ ++struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone) +{ -+ KBASE_DEBUG_ASSERT(NULL != kctx); -+ KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages); ++ struct kbase_va_region *new_reg; + -+ kfree(kctx->mmu_teardown_pages); -+ kctx->mmu_teardown_pages = NULL; -+} ++ KBASE_DEBUG_ASSERT(kctx != NULL); + -+void kbase_mmu_free_pgd(struct kbase_context *kctx) -+{ -+ int new_page_count __maybe_unused; ++ /* zone argument should only contain zone related region flags */ ++ KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0); ++ KBASE_DEBUG_ASSERT(nr_pages > 0); ++ /* 64-bit address range is the max */ ++ KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE)); + -+ KBASE_DEBUG_ASSERT(NULL != kctx); -+ KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages); ++ new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL); + -+ mutex_lock(&kctx->mmu_lock); -+ mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages); -+ mutex_unlock(&kctx->mmu_lock); ++ if (!new_reg) ++ return NULL; + -+ beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd); -+ kbase_mem_pool_free(&kctx->mem_pool, phys_to_page(kctx->pgd), true); -+ kbase_process_page_usage_dec(kctx, 1); -+ new_page_count = kbase_atomic_sub_pages(1, &kctx->used_pages); -+ kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); ++ new_reg->cpu_alloc = NULL; /* no alloc bound yet */ ++ new_reg->gpu_alloc = NULL; /* no alloc bound yet */ ++ new_reg->kctx = kctx; ++ new_reg->flags = zone | KBASE_REG_FREE; + -+ KBASE_TLSTREAM_AUX_PAGESALLOC( -+ (u32)kctx->id, -+ (u64)new_page_count); ++ new_reg->flags |= KBASE_REG_GROWABLE; ++ ++ new_reg->start_pfn = start_pfn; ++ new_reg->nr_pages = nr_pages; ++ ++ return new_reg; +} + -+KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd); ++KBASE_EXPORT_TEST_API(kbase_alloc_free_region); + -+static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left) ++/** ++ * @brief Free a region object. ++ * ++ * The described region must be freed of any mapping. ++ * ++ * If the region is not flagged as KBASE_REG_FREE, the region's ++ * alloc object will be released. ++ * It is a bug if no alloc object exists for non-free regions. ++ * ++ */ ++void kbase_free_alloced_region(struct kbase_va_region *reg) +{ -+ phys_addr_t target_pgd; -+ u64 *pgd_page; -+ int i; -+ size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64); -+ size_t dump_size; -+ struct kbase_mmu_mode const *mmu_mode; ++ if (!(reg->flags & KBASE_REG_FREE)) { ++ /* ++ * The physical allocation should have been removed from the ++ * eviction list before this function is called. However, in the ++ * case of abnormal process termination or the app leaking the ++ * memory kbase_mem_free_region is not called so it can still be ++ * on the list at termination time of the region tracker. ++ */ ++ if (!list_empty(®->gpu_alloc->evict_node)) { ++ /* ++ * Unlink the physical allocation before unmaking it ++ * evictable so that the allocation isn't grown back to ++ * its last backed size as we're going to unmap it ++ * anyway. ++ */ ++ reg->cpu_alloc->reg = NULL; ++ if (reg->cpu_alloc != reg->gpu_alloc) ++ reg->gpu_alloc->reg = NULL; + -+ KBASE_DEBUG_ASSERT(NULL != kctx); -+ lockdep_assert_held(&kctx->mmu_lock); ++ /* ++ * If a region has been made evictable then we must ++ * unmake it before trying to free it. ++ * If the memory hasn't been reclaimed it will be ++ * unmapped and freed below, if it has been reclaimed ++ * then the operations below are no-ops. ++ */ ++ if (reg->flags & KBASE_REG_DONT_NEED) { ++ KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == ++ KBASE_MEM_TYPE_NATIVE); ++ kbase_mem_evictable_unmake(reg->gpu_alloc); ++ } ++ } + -+ mmu_mode = kctx->kbdev->mmu_mode; ++ /* ++ * Remove the region from the sticky resource metadata ++ * list should it be there. ++ */ ++ kbase_sticky_resource_release(reg->kctx, NULL, ++ reg->start_pfn << PAGE_SHIFT); + -+ pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd))); -+ if (!pgd_page) { -+ dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n"); -+ return 0; ++ kbase_mem_phy_alloc_put(reg->cpu_alloc); ++ kbase_mem_phy_alloc_put(reg->gpu_alloc); ++ /* To detect use-after-free in debug builds */ ++ KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE); + } ++ kfree(reg); ++} + -+ if (*size_left >= size) { -+ /* A modified physical address that contains the page table level */ -+ u64 m_pgd = pgd | level; -+ -+ /* Put the modified physical address in the output buffer */ -+ memcpy(*buffer, &m_pgd, sizeof(m_pgd)); -+ *buffer += sizeof(m_pgd); -+ -+ /* Followed by the page table itself */ -+ memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES); -+ *buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES; ++KBASE_EXPORT_TEST_API(kbase_free_alloced_region); + -+ *size_left -= size; -+ } ++int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align) ++{ ++ int err; ++ size_t i = 0; ++ unsigned long attr; ++ unsigned long mask = ~KBASE_REG_MEMATTR_MASK; + -+ if (level < MIDGARD_MMU_BOTTOMLEVEL) { -+ for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { -+ if (mmu_mode->pte_is_valid(pgd_page[i])) { -+ target_pgd = mmu_mode->pte_to_phy_addr( -+ pgd_page[i]); ++ if ((kctx->kbdev->system_coherency == COHERENCY_ACE) && ++ (reg->flags & KBASE_REG_SHARE_BOTH)) ++ attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA); ++ else ++ attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC); + -+ dump_size = kbasep_mmu_dump_level(kctx, -+ target_pgd, level + 1, -+ buffer, size_left); -+ if (!dump_size) { -+ kunmap(pfn_to_page(PFN_DOWN(pgd))); -+ return 0; -+ } -+ size += dump_size; -+ } -+ } -+ } ++ KBASE_DEBUG_ASSERT(NULL != kctx); ++ KBASE_DEBUG_ASSERT(NULL != reg); + -+ kunmap(pfn_to_page(PFN_DOWN(pgd))); ++ err = kbase_add_va_region(kctx, reg, addr, nr_pages, align); ++ if (err) ++ return err; + -+ return size; -+} ++ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { ++ u64 stride; ++ struct kbase_mem_phy_alloc *alloc; + -+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) -+{ -+ void *kaddr; -+ size_t size_left; ++ alloc = reg->gpu_alloc; ++ stride = alloc->imported.alias.stride; ++ KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased); ++ for (i = 0; i < alloc->imported.alias.nents; i++) { ++ if (alloc->imported.alias.aliased[i].alloc) { ++ err = kbase_mmu_insert_pages(kctx, ++ reg->start_pfn + (i * stride), ++ alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset, ++ alloc->imported.alias.aliased[i].length, ++ reg->flags); ++ if (err) ++ goto bad_insert; + -+ KBASE_DEBUG_ASSERT(kctx); ++ kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc); ++ } else { ++ err = kbase_mmu_insert_single_page(kctx, ++ reg->start_pfn + i * stride, ++ page_to_phys(kctx->aliasing_sink_page), ++ alloc->imported.alias.aliased[i].length, ++ (reg->flags & mask) | attr); + -+ if (0 == nr_pages) { -+ /* can't dump in a 0 sized buffer, early out */ -+ return NULL; ++ if (err) ++ goto bad_insert; ++ } ++ } ++ } else { ++ err = kbase_mmu_insert_pages(kctx, reg->start_pfn, ++ kbase_get_gpu_phy_pages(reg), ++ kbase_reg_current_backed_size(reg), ++ reg->flags); ++ if (err) ++ goto bad_insert; ++ kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc); + } + -+ size_left = nr_pages * PAGE_SIZE; ++ return err; + -+ KBASE_DEBUG_ASSERT(0 != size_left); -+ kaddr = vmalloc_user(size_left); ++bad_insert: ++ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { ++ u64 stride; + -+ mutex_lock(&kctx->mmu_lock); ++ stride = reg->gpu_alloc->imported.alias.stride; ++ KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased); ++ while (i--) ++ if (reg->gpu_alloc->imported.alias.aliased[i].alloc) { ++ kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->gpu_alloc->imported.alias.aliased[i].length); ++ kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc); ++ } ++ } + -+ if (kaddr) { -+ u64 end_marker = 0xFFULL; -+ char *buffer; -+ char *mmu_dump_buffer; -+ u64 config[3]; -+ size_t size; ++ kbase_remove_va_region(kctx, reg); + -+ buffer = (char *)kaddr; -+ mmu_dump_buffer = buffer; ++ return err; ++} + -+ if (kctx->api_version >= KBASE_API_VERSION(8, 4)) { -+ struct kbase_mmu_setup as_setup; ++KBASE_EXPORT_TEST_API(kbase_gpu_mmap); + -+ kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup); -+ config[0] = as_setup.transtab; -+ config[1] = as_setup.memattr; -+ config[2] = as_setup.transcfg; -+ memcpy(buffer, &config, sizeof(config)); -+ mmu_dump_buffer += sizeof(config); -+ size_left -= sizeof(config); -+ } ++static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, ++ struct kbase_mem_phy_alloc *alloc, bool writeable); + ++int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) ++{ ++ int err; + ++ if (reg->start_pfn == 0) ++ return 0; + -+ size = kbasep_mmu_dump_level(kctx, -+ kctx->pgd, -+ MIDGARD_MMU_TOPLEVEL, -+ &mmu_dump_buffer, -+ &size_left); ++ if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { ++ size_t i; + -+ if (!size) -+ goto fail_free; ++ err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages); ++ KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased); ++ for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++) ++ if (reg->gpu_alloc->imported.alias.aliased[i].alloc) ++ kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc); ++ } else { ++ err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg)); ++ kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc); ++ } + -+ /* Add on the size for the end marker */ -+ size += sizeof(u64); -+ /* Add on the size for the config */ -+ if (kctx->api_version >= KBASE_API_VERSION(8, 4)) -+ size += sizeof(config); ++ if (reg->gpu_alloc && reg->gpu_alloc->type == ++ KBASE_MEM_TYPE_IMPORTED_USER_BUF) { ++ struct kbase_alloc_import_user_buf *user_buf = ++ ®->gpu_alloc->imported.user_buf; + ++ if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) { ++ user_buf->current_mapping_usage_count &= ++ ~PINNED_ON_IMPORT; + -+ if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) { -+ /* The buffer isn't big enough - free the memory and return failure */ -+ goto fail_free; ++ kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc, ++ (reg->flags & KBASE_REG_GPU_WR)); + } -+ -+ /* Add the end marker */ -+ memcpy(mmu_dump_buffer, &end_marker, sizeof(u64)); + } + -+ mutex_unlock(&kctx->mmu_lock); -+ return kaddr; ++ if (err) ++ return err; + -+fail_free: -+ vfree(kaddr); -+ mutex_unlock(&kctx->mmu_lock); -+ return NULL; ++ err = kbase_remove_va_region(kctx, reg); ++ return err; +} -+KBASE_EXPORT_TEST_API(kbase_mmu_dump); + -+void bus_fault_worker(struct work_struct *data) ++static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping( ++ struct kbase_context *kctx, ++ unsigned long uaddr, size_t size, u64 *offset) +{ -+ struct kbase_as *faulting_as; -+ int as_no; -+ struct kbase_context *kctx; -+ struct kbase_device *kbdev; -+#if KBASE_GPU_RESET_EN -+ bool reset_status = false; -+#endif /* KBASE_GPU_RESET_EN */ ++ struct vm_area_struct *vma; ++ struct kbase_cpu_mapping *map; ++ unsigned long vm_pgoff_in_region; ++ unsigned long vm_off_in_region; ++ unsigned long map_start; ++ size_t map_size; + -+ faulting_as = container_of(data, struct kbase_as, work_busfault); ++ lockdep_assert_held(¤t->mm->mmap_lock); + -+ as_no = faulting_as->number; ++ if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */ ++ return NULL; + -+ kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); ++ vma = find_vma_intersection(current->mm, uaddr, uaddr+size); + -+ /* Grab the context that was already refcounted in kbase_mmu_interrupt(). -+ * Therefore, it cannot be scheduled out of this AS until we explicitly release it -+ */ -+ kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no); -+ if (WARN_ON(!kctx)) { -+ atomic_dec(&kbdev->faults_pending); -+ return; -+ } ++ if (!vma || vma->vm_start > uaddr) ++ return NULL; ++ if (vma->vm_ops != &kbase_vm_ops) ++ /* Not ours! */ ++ return NULL; + -+ if (unlikely(faulting_as->protected_mode)) -+ { -+ kbase_mmu_report_fault_and_kill(kctx, faulting_as, -+ "Permission failure"); -+ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, -+ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); -+ kbasep_js_runpool_release_ctx(kbdev, kctx); -+ atomic_dec(&kbdev->faults_pending); -+ return; ++ map = vma->vm_private_data; + -+ } ++ if (map->kctx != kctx) ++ /* Not from this context! */ ++ return NULL; + -+#if KBASE_GPU_RESET_EN -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { -+ /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode. -+ * We start the reset before switching to UNMAPPED to ensure that unrelated jobs -+ * are evicted from the GPU before the switch. -+ */ -+ dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n"); -+ reset_status = kbase_prepare_to_reset_gpu(kbdev); -+ } -+#endif /* KBASE_GPU_RESET_EN */ -+ /* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */ -+ if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { -+ unsigned long flags; ++ vm_pgoff_in_region = vma->vm_pgoff - map->region->start_pfn; ++ vm_off_in_region = vm_pgoff_in_region << PAGE_SHIFT; ++ map_start = vma->vm_start - vm_off_in_region; ++ map_size = map->region->nr_pages << PAGE_SHIFT; + -+ /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ -+ /* AS transaction begin */ -+ mutex_lock(&kbdev->mmu_hw_mutex); ++ if ((uaddr + size) > (map_start + map_size)) ++ /* Not within the CPU mapping */ ++ return NULL; + -+ /* Set the MMU into unmapped mode */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_mmu_disable(kctx); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ *offset = (uaddr - vma->vm_start) + vm_off_in_region; + -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ /* AS transaction end */ ++ return map; ++} + -+ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, -+ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); -+ kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, -+ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); ++int kbasep_find_enclosing_cpu_mapping_offset( ++ struct kbase_context *kctx, ++ unsigned long uaddr, size_t size, u64 *offset) ++{ ++ struct kbase_cpu_mapping *map; + -+ kbase_pm_context_idle(kbdev); -+ } ++ kbase_os_mem_map_lock(kctx); + -+#if KBASE_GPU_RESET_EN -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status) -+ kbase_reset_gpu(kbdev); -+#endif /* KBASE_GPU_RESET_EN */ ++ map = kbasep_find_enclosing_cpu_mapping(kctx, uaddr, size, offset); + -+ kbasep_js_runpool_release_ctx(kbdev, kctx); ++ kbase_os_mem_map_unlock(kctx); + -+ atomic_dec(&kbdev->faults_pending); ++ if (!map) ++ return -EINVAL; ++ ++ return 0; +} + -+const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code) ++KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset); ++ ++void kbase_sync_single(struct kbase_context *kctx, ++ phys_addr_t cpu_pa, phys_addr_t gpu_pa, ++ off_t offset, size_t size, enum kbase_sync_type sync_fn) +{ -+ const char *e; ++ struct page *cpu_page; + -+ switch (exception_code) { -+ /* Non-Fault Status code */ -+ case 0x00: -+ e = "NOT_STARTED/IDLE/OK"; -+ break; -+ case 0x01: -+ e = "DONE"; -+ break; -+ case 0x02: -+ e = "INTERRUPTED"; -+ break; -+ case 0x03: -+ e = "STOPPED"; -+ break; -+ case 0x04: -+ e = "TERMINATED"; -+ break; -+ case 0x08: -+ e = "ACTIVE"; -+ break; -+ /* Job exceptions */ -+ case 0x40: -+ e = "JOB_CONFIG_FAULT"; -+ break; -+ case 0x41: -+ e = "JOB_POWER_FAULT"; -+ break; -+ case 0x42: -+ e = "JOB_READ_FAULT"; -+ break; -+ case 0x43: -+ e = "JOB_WRITE_FAULT"; -+ break; -+ case 0x44: -+ e = "JOB_AFFINITY_FAULT"; -+ break; -+ case 0x48: -+ e = "JOB_BUS_FAULT"; -+ break; -+ case 0x50: -+ e = "INSTR_INVALID_PC"; -+ break; -+ case 0x51: -+ e = "INSTR_INVALID_ENC"; -+ break; -+ case 0x52: -+ e = "INSTR_TYPE_MISMATCH"; -+ break; -+ case 0x53: -+ e = "INSTR_OPERAND_FAULT"; -+ break; -+ case 0x54: -+ e = "INSTR_TLS_FAULT"; -+ break; -+ case 0x55: -+ e = "INSTR_BARRIER_FAULT"; -+ break; -+ case 0x56: -+ e = "INSTR_ALIGN_FAULT"; -+ break; -+ case 0x58: -+ e = "DATA_INVALID_FAULT"; -+ break; -+ case 0x59: -+ e = "TILE_RANGE_FAULT"; -+ break; -+ case 0x5A: -+ e = "ADDR_RANGE_FAULT"; -+ break; -+ case 0x60: -+ e = "OUT_OF_MEMORY"; -+ break; -+ /* GPU exceptions */ -+ case 0x80: -+ e = "DELAYED_BUS_FAULT"; -+ break; -+ case 0x88: -+ e = "SHAREABILITY_FAULT"; -+ break; -+ /* MMU exceptions */ -+ case 0xC0: -+ case 0xC1: -+ case 0xC2: -+ case 0xC3: -+ case 0xC4: -+ case 0xC5: -+ case 0xC6: -+ case 0xC7: -+ e = "TRANSLATION_FAULT"; -+ break; -+ case 0xC8: -+ e = "PERMISSION_FAULT"; -+ break; -+ case 0xC9: -+ case 0xCA: -+ case 0xCB: -+ case 0xCC: -+ case 0xCD: -+ case 0xCE: -+ case 0xCF: -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) -+ e = "PERMISSION_FAULT"; -+ else -+ e = "UNKNOWN"; -+ break; -+ case 0xD0: -+ case 0xD1: -+ case 0xD2: -+ case 0xD3: -+ case 0xD4: -+ case 0xD5: -+ case 0xD6: -+ case 0xD7: -+ e = "TRANSTAB_BUS_FAULT"; -+ break; -+ case 0xD8: -+ e = "ACCESS_FLAG"; -+ break; -+ case 0xD9: -+ case 0xDA: -+ case 0xDB: -+ case 0xDC: -+ case 0xDD: -+ case 0xDE: -+ case 0xDF: -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) -+ e = "ACCESS_FLAG"; -+ else -+ e = "UNKNOWN"; -+ break; -+ case 0xE0: -+ case 0xE1: -+ case 0xE2: -+ case 0xE3: -+ case 0xE4: -+ case 0xE5: -+ case 0xE6: -+ case 0xE7: -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) -+ e = "ADDRESS_SIZE_FAULT"; -+ else -+ e = "UNKNOWN"; -+ break; -+ case 0xE8: -+ case 0xE9: -+ case 0xEA: -+ case 0xEB: -+ case 0xEC: -+ case 0xED: -+ case 0xEE: -+ case 0xEF: -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) -+ e = "MEMORY_ATTRIBUTES_FAULT"; -+ else -+ e = "UNKNOWN"; -+ break; -+ default: -+ e = "UNKNOWN"; -+ break; -+ }; ++ cpu_page = pfn_to_page(PFN_DOWN(cpu_pa)); + -+ return e; -+} ++ if (likely(cpu_pa == gpu_pa)) { ++ dma_addr_t dma_addr; + -+static const char *access_type_name(struct kbase_device *kbdev, -+ u32 fault_status) -+{ -+ switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) { -+ case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) -+ return "ATOMIC"; -+ else -+ return "UNKNOWN"; -+ case AS_FAULTSTATUS_ACCESS_TYPE_READ: -+ return "READ"; -+ case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: -+ return "WRITE"; -+ case AS_FAULTSTATUS_ACCESS_TYPE_EX: -+ return "EXECUTE"; -+ default: -+ WARN_ON(1); -+ return NULL; ++ BUG_ON(!cpu_page); ++ BUG_ON(offset + size > PAGE_SIZE); ++ ++ dma_addr = kbase_dma_addr(cpu_page) + offset; ++ if (sync_fn == KBASE_SYNC_TO_CPU) ++ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, ++ size, DMA_BIDIRECTIONAL); ++ else if (sync_fn == KBASE_SYNC_TO_DEVICE) ++ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, ++ size, DMA_BIDIRECTIONAL); ++ } else { ++ void *src = NULL; ++ void *dst = NULL; ++ struct page *gpu_page; ++ ++ if (WARN(!gpu_pa, "No GPU PA found for infinite cache op")) ++ return; ++ ++ gpu_page = pfn_to_page(PFN_DOWN(gpu_pa)); ++ ++ if (sync_fn == KBASE_SYNC_TO_DEVICE) { ++ src = ((unsigned char *)kmap(cpu_page)) + offset; ++ dst = ((unsigned char *)kmap(gpu_page)) + offset; ++ } else if (sync_fn == KBASE_SYNC_TO_CPU) { ++ dma_sync_single_for_cpu(kctx->kbdev->dev, ++ kbase_dma_addr(gpu_page) + offset, ++ size, DMA_BIDIRECTIONAL); ++ src = ((unsigned char *)kmap(gpu_page)) + offset; ++ dst = ((unsigned char *)kmap(cpu_page)) + offset; ++ } ++ memcpy(dst, src, size); ++ kunmap(gpu_page); ++ kunmap(cpu_page); ++ if (sync_fn == KBASE_SYNC_TO_DEVICE) ++ dma_sync_single_for_device(kctx->kbdev->dev, ++ kbase_dma_addr(gpu_page) + offset, ++ size, DMA_BIDIRECTIONAL); + } +} + -+/** -+ * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on. -+ */ -+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, -+ struct kbase_as *as, const char *reason_str) ++static int kbase_do_syncset(struct kbase_context *kctx, ++ struct basep_syncset *sset, enum kbase_sync_type sync_fn) +{ -+ unsigned long flags; -+ int exception_type; -+ int access_type; -+ int source_id; -+ int as_no; -+ struct kbase_device *kbdev; -+ struct kbasep_js_device_data *js_devdata; ++ int err = 0; ++ struct kbase_va_region *reg; ++ struct kbase_cpu_mapping *map; ++ unsigned long start; ++ size_t size; ++ phys_addr_t *cpu_pa; ++ phys_addr_t *gpu_pa; ++ u64 page_off, page_count; ++ u64 i; ++ u64 offset; + -+#if KBASE_GPU_RESET_EN -+ bool reset_status = false; -+#endif ++ kbase_os_mem_map_lock(kctx); ++ kbase_gpu_vm_lock(kctx); + -+ as_no = as->number; -+ kbdev = kctx->kbdev; -+ js_devdata = &kbdev->js_data; ++ /* find the region where the virtual address is contained */ ++ reg = kbase_region_tracker_find_region_enclosing_address(kctx, ++ sset->mem_handle.basep.handle); ++ if (!reg) { ++ dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX", ++ sset->mem_handle.basep.handle); ++ err = -EINVAL; ++ goto out_unlock; ++ } + -+ /* ASSERT that the context won't leave the runpool */ -+ KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0); ++ if (!(reg->flags & KBASE_REG_CPU_CACHED)) ++ goto out_unlock; + -+ /* decode the fault status */ -+ exception_type = as->fault_status & 0xFF; -+ access_type = (as->fault_status >> 8) & 0x3; -+ source_id = (as->fault_status >> 16); ++ start = (uintptr_t)sset->user_addr; ++ size = (size_t)sset->size; + -+ /* terminal fault, print info about the fault */ -+ dev_err(kbdev->dev, -+ "Unhandled Page fault in AS%d at VA 0x%016llX\n" -+ "Reason: %s\n" -+ "raw fault status: 0x%X\n" -+ "decoded fault status: %s\n" -+ "exception type 0x%X: %s\n" -+ "access type 0x%X: %s\n" -+ "source id 0x%X\n" -+ "pid: %d\n", -+ as_no, as->fault_addr, -+ reason_str, -+ as->fault_status, -+ (as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), -+ exception_type, kbase_exception_name(kbdev, exception_type), -+ access_type, access_type_name(kbdev, as->fault_status), -+ source_id, -+ kctx->pid); ++ map = kbasep_find_enclosing_cpu_mapping(kctx, start, size, &offset); ++ if (!map) { ++ dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX", ++ start, sset->mem_handle.basep.handle); ++ err = -EINVAL; ++ goto out_unlock; ++ } + -+ /* hardware counters dump fault handling */ -+ if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) && -+ (kbdev->hwcnt.backend.state == -+ KBASE_INSTR_STATE_DUMPING)) { -+ unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; ++ page_off = offset >> PAGE_SHIFT; ++ offset &= ~PAGE_MASK; ++ page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT; ++ cpu_pa = kbase_get_cpu_phy_pages(reg); ++ gpu_pa = kbase_get_gpu_phy_pages(reg); + -+ if ((as->fault_addr >= kbdev->hwcnt.addr) && -+ (as->fault_addr < (kbdev->hwcnt.addr + -+ (num_core_groups * 2048)))) -+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; ++ if (page_off > reg->nr_pages || ++ page_off + page_count > reg->nr_pages) { ++ /* Sync overflows the region */ ++ err = -EINVAL; ++ goto out_unlock; + } + -+ /* Stop the kctx from submitting more jobs and cause it to be scheduled -+ * out/rescheduled - this will occur on releasing the context's refcount */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbasep_js_clear_submit_allowed(js_devdata, kctx); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ /* Sync first page */ ++ if (cpu_pa[page_off]) { ++ size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); + -+ /* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this -+ * context can appear in the job slots from this point on */ -+ kbase_backend_jm_kill_jobs_from_kctx(kctx); -+ /* AS transaction begin */ -+ mutex_lock(&kbdev->mmu_hw_mutex); -+#if KBASE_GPU_RESET_EN -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { -+ /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode. -+ * We start the reset before switching to UNMAPPED to ensure that unrelated jobs -+ * are evicted from the GPU before the switch. -+ */ -+ dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery."); -+ reset_status = kbase_prepare_to_reset_gpu(kbdev); ++ kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off], ++ offset, sz, sync_fn); + } -+#endif /* KBASE_GPU_RESET_EN */ -+ /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_mmu_disable(kctx); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ /* AS transaction end */ -+ /* Clear down the fault */ -+ kbase_mmu_hw_clear_fault(kbdev, as, kctx, -+ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); -+ kbase_mmu_hw_enable_fault(kbdev, as, kctx, -+ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); ++ /* Sync middle pages (if any) */ ++ for (i = 1; page_count > 2 && i < page_count - 1; i++) { ++ /* we grow upwards, so bail on first non-present page */ ++ if (!cpu_pa[page_off + i]) ++ break; + -+#if KBASE_GPU_RESET_EN -+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status) -+ kbase_reset_gpu(kbdev); -+#endif /* KBASE_GPU_RESET_EN */ -+} ++ kbase_sync_single(kctx, cpu_pa[page_off + i], ++ gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn); ++ } + -+void kbasep_as_do_poke(struct work_struct *work) -+{ -+ struct kbase_as *as; -+ struct kbase_device *kbdev; -+ struct kbase_context *kctx; -+ unsigned long flags; ++ /* Sync last page (if any) */ ++ if (page_count > 1 && cpu_pa[page_off + page_count - 1]) { ++ size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1; + -+ KBASE_DEBUG_ASSERT(work); -+ as = container_of(work, struct kbase_as, poke_work); -+ kbdev = container_of(as, struct kbase_device, as[as->number]); -+ KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT); ++ kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1], ++ gpu_pa[page_off + page_count - 1], 0, sz, ++ sync_fn); ++ } + -+ /* GPU power will already be active by virtue of the caller holding a JS -+ * reference on the address space, and will not release it until this worker -+ * has finished */ ++out_unlock: ++ kbase_gpu_vm_unlock(kctx); ++ kbase_os_mem_map_unlock(kctx); ++ return err; ++} + -+ /* Further to the comment above, we know that while this function is running -+ * the AS will not be released as before the atom is released this workqueue -+ * is flushed (in kbase_as_poking_timer_release_atom) -+ */ -+ kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number); ++int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset) ++{ ++ int err = -EINVAL; + -+ /* AS transaction begin */ -+ mutex_lock(&kbdev->mmu_hw_mutex); -+ /* Force a uTLB invalidate */ -+ kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0, -+ AS_COMMAND_UNLOCK, 0); -+ mutex_unlock(&kbdev->mmu_hw_mutex); -+ /* AS transaction end */ ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(sset != NULL); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ if (as->poke_refcount && -+ !(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) { -+ /* Only queue up the timer if we need it, and we're not trying to kill it */ -+ hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL); ++ if (sset->mem_handle.basep.handle & ~PAGE_MASK) { ++ dev_warn(kctx->kbdev->dev, ++ "mem_handle: passed parameter is invalid"); ++ return -EINVAL; + } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+} + -+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer) -+{ -+ struct kbase_as *as; -+ int queue_work_ret; ++ switch (sset->type) { ++ case BASE_SYNCSET_OP_MSYNC: ++ err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_DEVICE); ++ break; + -+ KBASE_DEBUG_ASSERT(NULL != timer); -+ as = container_of(timer, struct kbase_as, poke_timer); -+ KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT); ++ case BASE_SYNCSET_OP_CSYNC: ++ err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_CPU); ++ break; + -+ queue_work_ret = queue_work(as->poke_wq, &as->poke_work); -+ KBASE_DEBUG_ASSERT(queue_work_ret); -+ return HRTIMER_NORESTART; ++ default: ++ dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type); ++ break; ++ } ++ ++ return err; +} + -+/** -+ * Retain the poking timer on an atom's context (if the atom hasn't already -+ * done so), and start the timer (if it's not already started). -+ * -+ * This must only be called on a context that's scheduled in, and an atom -+ * that's running on the GPU. -+ * -+ * The caller must hold hwaccess_lock -+ * -+ * This can be called safely from atomic context -+ */ -+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom) -+{ -+ struct kbase_as *as; ++KBASE_EXPORT_TEST_API(kbase_sync_now); + -+ KBASE_DEBUG_ASSERT(kbdev); -+ KBASE_DEBUG_ASSERT(kctx); -+ KBASE_DEBUG_ASSERT(katom); -+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++/* vm lock must be held */ ++int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg) ++{ ++ int err; + -+ if (katom->poking) -+ return; ++ KBASE_DEBUG_ASSERT(NULL != kctx); ++ KBASE_DEBUG_ASSERT(NULL != reg); ++ lockdep_assert_held(&kctx->reg_lock); + -+ katom->poking = 1; ++ /* ++ * Unlink the physical allocation before unmaking it evictable so ++ * that the allocation isn't grown back to its last backed size ++ * as we're going to unmap it anyway. ++ */ ++ reg->cpu_alloc->reg = NULL; ++ if (reg->cpu_alloc != reg->gpu_alloc) ++ reg->gpu_alloc->reg = NULL; + -+ /* It's safe to work on the as/as_nr without an explicit reference, -+ * because the caller holds the hwaccess_lock, and the atom itself -+ * was also running and had already taken a reference */ -+ as = &kbdev->as[kctx->as_nr]; ++ /* ++ * If a region has been made evictable then we must unmake it ++ * before trying to free it. ++ * If the memory hasn't been reclaimed it will be unmapped and freed ++ * below, if it has been reclaimed then the operations below are no-ops. ++ */ ++ if (reg->flags & KBASE_REG_DONT_NEED) { ++ KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == ++ KBASE_MEM_TYPE_NATIVE); ++ kbase_mem_evictable_unmake(reg->gpu_alloc); ++ } + -+ if (++(as->poke_refcount) == 1) { -+ /* First refcount for poke needed: check if not already in flight */ -+ if (!as->poke_state) { -+ /* need to start poking */ -+ as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT; -+ queue_work(as->poke_wq, &as->poke_work); -+ } ++ err = kbase_gpu_munmap(kctx, reg); ++ if (err) { ++ dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n"); ++ goto out; + } ++ ++ /* This will also free the physical pages */ ++ kbase_free_alloced_region(reg); ++ ++ out: ++ return err; +} + ++KBASE_EXPORT_TEST_API(kbase_mem_free_region); ++ +/** -+ * If an atom holds a poking timer, release it and wait for it to finish -+ * -+ * This must only be called on a context that's scheduled in, and an atom -+ * that still has a JS reference on the context ++ * @brief Free the region from the GPU and unregister it. + * -+ * This must \b not be called from atomic context, since it can sleep. ++ * This function implements the free operation on a memory segment. ++ * It will loudly fail if called with outstanding mappings. + */ -+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom) ++int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) +{ -+ struct kbase_as *as; -+ unsigned long flags; ++ int err = 0; ++ struct kbase_va_region *reg; + -+ KBASE_DEBUG_ASSERT(kbdev); -+ KBASE_DEBUG_ASSERT(kctx); -+ KBASE_DEBUG_ASSERT(katom); -+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); ++ KBASE_DEBUG_ASSERT(kctx != NULL); + -+ if (!katom->poking) -+ return; ++ if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) { ++ dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid"); ++ return -EINVAL; ++ } + -+ as = &kbdev->as[kctx->as_nr]; ++ if (0 == gpu_addr) { ++ dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n"); ++ return -EINVAL; ++ } ++ kbase_gpu_vm_lock(kctx); + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ KBASE_DEBUG_ASSERT(as->poke_refcount > 0); -+ KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT); ++ if (gpu_addr >= BASE_MEM_COOKIE_BASE && ++ gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) { ++ int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE); + -+ if (--(as->poke_refcount) == 0) { -+ as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE; -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ reg = kctx->pending_regions[cookie]; ++ if (!reg) { ++ err = -EINVAL; ++ goto out_unlock; ++ } + -+ hrtimer_cancel(&as->poke_timer); -+ flush_workqueue(as->poke_wq); ++ /* ask to unlink the cookie as we'll free it */ + -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kctx->pending_regions[cookie] = NULL; ++ kctx->cookies |= (1UL << cookie); + -+ /* Re-check whether it's still needed */ -+ if (as->poke_refcount) { -+ int queue_work_ret; -+ /* Poking still needed: -+ * - Another retain will not be starting the timer or queueing work, -+ * because it's still marked as in-flight -+ * - The hrtimer has finished, and has not started a new timer or -+ * queued work because it's been marked as killing -+ * -+ * So whatever happens now, just queue the work again */ -+ as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE); -+ queue_work_ret = queue_work(as->poke_wq, &as->poke_work); -+ KBASE_DEBUG_ASSERT(queue_work_ret); -+ } else { -+ /* It isn't - so mark it as not in flight, and not killing */ -+ as->poke_state = 0u; ++ kbase_free_alloced_region(reg); ++ } else { ++ /* A real GPU va */ ++ /* Validate the region */ ++ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); ++ if (!reg || (reg->flags & KBASE_REG_FREE)) { ++ dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX", ++ gpu_addr); ++ err = -EINVAL; ++ goto out_unlock; ++ } + -+ /* The poke associated with the atom has now finished. If this is -+ * also the last atom on the context, then we can guarentee no more -+ * pokes (and thus no more poking register accesses) will occur on -+ * the context until new atoms are run */ ++ if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) { ++ /* SAME_VA must be freed through munmap */ ++ dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__, ++ gpu_addr); ++ err = -EINVAL; ++ goto out_unlock; + } ++ err = kbase_mem_free_region(kctx, reg); + } -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ katom->poking = 0; ++ out_unlock: ++ kbase_gpu_vm_unlock(kctx); ++ return err; +} + -+void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as) ++KBASE_EXPORT_TEST_API(kbase_mem_free); ++ ++int kbase_update_region_flags(struct kbase_context *kctx, ++ struct kbase_va_region *reg, unsigned long flags) +{ -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ KBASE_DEBUG_ASSERT(NULL != reg); ++ KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0); + -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ reg->flags |= kbase_cache_enabled(flags, reg->nr_pages); ++ /* all memory is now growable */ ++ reg->flags |= KBASE_REG_GROWABLE; + -+ if (!kctx) { -+ dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n", -+ kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault", -+ as->number, as->fault_addr); ++ if (flags & BASE_MEM_GROW_ON_GPF) ++ reg->flags |= KBASE_REG_PF_GROW; + -+ /* Since no ctx was found, the MMU must be disabled. */ -+ WARN_ON(as->current_setup.transtab); ++ if (flags & BASE_MEM_PROT_CPU_WR) ++ reg->flags |= KBASE_REG_CPU_WR; + -+ if (kbase_as_has_bus_fault(as)) { -+ kbase_mmu_hw_clear_fault(kbdev, as, kctx, -+ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); -+ kbase_mmu_hw_enable_fault(kbdev, as, kctx, -+ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); -+ } else if (kbase_as_has_page_fault(as)) { -+ kbase_mmu_hw_clear_fault(kbdev, as, kctx, -+ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); -+ kbase_mmu_hw_enable_fault(kbdev, as, kctx, -+ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); -+ } ++ if (flags & BASE_MEM_PROT_CPU_RD) ++ reg->flags |= KBASE_REG_CPU_RD; + -+#if KBASE_GPU_RESET_EN -+ if (kbase_as_has_bus_fault(as) && -+ kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { -+ bool reset_status; -+ /* -+ * Reset the GPU, like in bus_fault_worker, in case an -+ * earlier error hasn't been properly cleared by this -+ * point. -+ */ -+ dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n"); -+ reset_status = kbase_prepare_to_reset_gpu_locked(kbdev); -+ if (reset_status) -+ kbase_reset_gpu_locked(kbdev); -+ } -+#endif /* KBASE_GPU_RESET_EN */ ++ if (flags & BASE_MEM_PROT_GPU_WR) ++ reg->flags |= KBASE_REG_GPU_WR; + -+ return; -+ } ++ if (flags & BASE_MEM_PROT_GPU_RD) ++ reg->flags |= KBASE_REG_GPU_RD; + -+ if (kbase_as_has_bus_fault(as)) { -+ /* -+ * hw counters dumping in progress, signal the -+ * other thread that it failed -+ */ -+ if ((kbdev->hwcnt.kctx == kctx) && -+ (kbdev->hwcnt.backend.state == -+ KBASE_INSTR_STATE_DUMPING)) -+ kbdev->hwcnt.backend.state = -+ KBASE_INSTR_STATE_FAULT; ++ if (0 == (flags & BASE_MEM_PROT_GPU_EX)) ++ reg->flags |= KBASE_REG_GPU_NX; + -+ /* -+ * Stop the kctx from submitting more jobs and cause it -+ * to be scheduled out/rescheduled when all references -+ * to it are released -+ */ -+ kbasep_js_clear_submit_allowed(js_devdata, kctx); ++ if (!kbase_device_is_cpu_coherent(kctx->kbdev)) { ++ if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) ++ return -EINVAL; ++ } else if (flags & (BASE_MEM_COHERENT_SYSTEM | ++ BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { ++ reg->flags |= KBASE_REG_SHARE_BOTH; ++ } + -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) -+ dev_warn(kbdev->dev, -+ "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", -+ as->number, as->fault_addr, -+ as->fault_extra_addr); -+ else -+ dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", -+ as->number, as->fault_addr); ++ if (!(reg->flags & KBASE_REG_SHARE_BOTH) && ++ flags & BASE_MEM_COHERENT_LOCAL) { ++ reg->flags |= KBASE_REG_SHARE_IN; ++ } + -+ /* -+ * We need to switch to UNMAPPED mode - but we do this in a -+ * worker so that we can sleep -+ */ -+ kbdev->kbase_group_error++; -+ KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_busfault)); -+ WARN_ON(work_pending(&as->work_busfault)); -+ queue_work(as->pf_wq, &as->work_busfault); -+ atomic_inc(&kbdev->faults_pending); ++ /* Set up default MEMATTR usage */ ++ if (kctx->kbdev->system_coherency == COHERENCY_ACE && ++ (reg->flags & KBASE_REG_SHARE_BOTH)) { ++ reg->flags |= ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); + } else { -+ kbdev->kbase_group_error++; -+ KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_pagefault)); -+ WARN_ON(work_pending(&as->work_pagefault)); -+ queue_work(as->pf_wq, &as->work_pagefault); -+ atomic_inc(&kbdev->faults_pending); ++ reg->flags |= ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT); + } ++ ++ return 0; +} + -+void kbase_flush_mmu_wqs(struct kbase_device *kbdev) ++int kbase_alloc_phy_pages_helper( ++ struct kbase_mem_phy_alloc *alloc, ++ size_t nr_pages_requested) +{ -+ int i; -+ -+ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { -+ struct kbase_as *as = &kbdev->as[i]; -+ -+ flush_workqueue(as->pf_wq); -+ } -+} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h -new file mode 100644 -index 000000000..986e959e9 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h -@@ -0,0 +1,123 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ int new_page_count __maybe_unused; ++ size_t old_page_count = alloc->nents; + ++ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); ++ KBASE_DEBUG_ASSERT(alloc->imported.kctx); + ++ if (nr_pages_requested == 0) ++ goto done; /*nothing to do*/ + -+/** -+ * @file -+ * Interface file for accessing MMU hardware functionality -+ */ ++ new_page_count = kbase_atomic_add_pages( ++ nr_pages_requested, &alloc->imported.kctx->used_pages); ++ kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages); + -+/** -+ * @page mali_kbase_mmu_hw_page MMU hardware interface -+ * -+ * @section mali_kbase_mmu_hw_intro_sec Introduction -+ * This module provides an abstraction for accessing the functionality provided -+ * by the midgard MMU and thus allows all MMU HW access to be contained within -+ * one common place and allows for different backends (implementations) to -+ * be provided. -+ */ ++ /* Increase mm counters before we allocate pages so that this ++ * allocation is visible to the OOM killer */ ++ kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested); + -+#ifndef _MALI_KBASE_MMU_HW_H_ -+#define _MALI_KBASE_MMU_HW_H_ ++ if (kbase_mem_pool_alloc_pages(&alloc->imported.kctx->mem_pool, ++ nr_pages_requested, alloc->pages + old_page_count) != 0) ++ goto no_alloc; + -+/* Forward declarations */ -+struct kbase_device; -+struct kbase_as; -+struct kbase_context; ++ KBASE_TLSTREAM_AUX_PAGESALLOC( ++ (u32)alloc->imported.kctx->id, ++ (u64)new_page_count); + -+/** -+ * @addtogroup base_kbase_api -+ * @{ -+ */ ++ alloc->nents += nr_pages_requested; ++done: ++ return 0; + -+/** -+ * @addtogroup mali_kbase_mmu_hw MMU access APIs -+ * @{ -+ */ ++no_alloc: ++ kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_requested); ++ kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->used_pages); ++ kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages); + -+/** @brief MMU fault type descriptor. -+ */ -+enum kbase_mmu_fault_type { -+ KBASE_MMU_FAULT_TYPE_UNKNOWN = 0, -+ KBASE_MMU_FAULT_TYPE_PAGE, -+ KBASE_MMU_FAULT_TYPE_BUS, -+ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED, -+ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED -+}; ++ return -ENOMEM; ++} + -+/** @brief Configure an address space for use. -+ * -+ * Configure the MMU using the address space details setup in the -+ * @ref kbase_context structure. -+ * -+ * @param[in] kbdev kbase device to configure. -+ * @param[in] as address space to configure. -+ * @param[in] kctx kbase context to configure. -+ */ -+void kbase_mmu_hw_configure(struct kbase_device *kbdev, -+ struct kbase_as *as, struct kbase_context *kctx); ++int kbase_free_phy_pages_helper( ++ struct kbase_mem_phy_alloc *alloc, ++ size_t nr_pages_to_free) ++{ ++ struct kbase_context *kctx = alloc->imported.kctx; ++ bool syncback; ++ bool reclaimed = (alloc->evicted != 0); ++ phys_addr_t *start_free; ++ int new_page_count __maybe_unused; + -+/** @brief Issue an operation to the MMU. -+ * -+ * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that -+ * is associated with the provided @ref kbase_context over the specified range -+ * -+ * @param[in] kbdev kbase device to issue the MMU operation on. -+ * @param[in] as address space to issue the MMU operation on. -+ * @param[in] kctx kbase context to issue the MMU operation on. -+ * @param[in] vpfn MMU Virtual Page Frame Number to start the -+ * operation on. -+ * @param[in] nr Number of pages to work on. -+ * @param[in] type Operation type (written to ASn_COMMAND). -+ * @param[in] handling_irq Is this operation being called during the handling -+ * of an interrupt? -+ * -+ * @return Zero if the operation was successful, non-zero otherwise. -+ */ -+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, -+ struct kbase_context *kctx, u64 vpfn, u32 nr, u32 type, -+ unsigned int handling_irq); ++ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); ++ KBASE_DEBUG_ASSERT(alloc->imported.kctx); ++ KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); + -+/** @brief Clear a fault that has been previously reported by the MMU. -+ * -+ * Clear a bus error or page fault that has been reported by the MMU. -+ * -+ * @param[in] kbdev kbase device to clear the fault from. -+ * @param[in] as address space to clear the fault from. -+ * @param[in] kctx kbase context to clear the fault from or NULL. -+ * @param[in] type The type of fault that needs to be cleared. -+ */ -+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, -+ struct kbase_context *kctx, enum kbase_mmu_fault_type type); ++ /* early out if nothing to do */ ++ if (0 == nr_pages_to_free) ++ return 0; + -+/** @brief Enable fault that has been previously reported by the MMU. -+ * -+ * After a page fault or bus error has been reported by the MMU these -+ * will be disabled. After these are handled this function needs to be -+ * called to enable the page fault or bus error fault again. -+ * -+ * @param[in] kbdev kbase device to again enable the fault from. -+ * @param[in] as address space to again enable the fault from. -+ * @param[in] kctx kbase context to again enable the fault from. -+ * @param[in] type The type of fault that needs to be enabled again. -+ */ -+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, -+ struct kbase_context *kctx, enum kbase_mmu_fault_type type); ++ start_free = alloc->pages + alloc->nents - nr_pages_to_free; + -+/** @} *//* end group mali_kbase_mmu_hw */ -+/** @} *//* end group base_kbase_api */ ++ syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; + -+#endif /* _MALI_KBASE_MMU_HW_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h -new file mode 100644 -index 000000000..b487c0042 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h -@@ -0,0 +1,47 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ kbase_mem_pool_free_pages(&kctx->mem_pool, ++ nr_pages_to_free, ++ start_free, ++ syncback, ++ reclaimed); + ++ alloc->nents -= nr_pages_to_free; + ++ /* ++ * If the allocation was not evicted (i.e. evicted == 0) then ++ * the page accounting needs to be done. ++ */ ++ if (!reclaimed) { ++ kbase_process_page_usage_dec(kctx, nr_pages_to_free); ++ new_page_count = kbase_atomic_sub_pages(nr_pages_to_free, ++ &kctx->used_pages); ++ kbase_atomic_sub_pages(nr_pages_to_free, ++ &kctx->kbdev->memdev.used_pages); + ++ KBASE_TLSTREAM_AUX_PAGESALLOC( ++ (u32)kctx->id, ++ (u64)new_page_count); ++ } + ++ return 0; ++} + -+#ifndef _MALI_KBASE_MMU_MODE_ -+#define _MALI_KBASE_MMU_MODE_ ++void kbase_mem_kref_free(struct kref *kref) ++{ ++ struct kbase_mem_phy_alloc *alloc; + -+#include ++ alloc = container_of(kref, struct kbase_mem_phy_alloc, kref); + -+/* Forward declarations */ -+struct kbase_context; -+struct kbase_device; -+struct kbase_as; -+struct kbase_mmu_setup; ++ switch (alloc->type) { ++ case KBASE_MEM_TYPE_NATIVE: { ++ WARN_ON(!alloc->imported.kctx); ++ /* ++ * The physical allocation must have been removed from the ++ * eviction list before trying to free it. ++ */ ++ WARN_ON(!list_empty(&alloc->evict_node)); ++ kbase_free_phy_pages_helper(alloc, alloc->nents); ++ break; ++ } ++ case KBASE_MEM_TYPE_ALIAS: { ++ /* just call put on the underlying phy allocs */ ++ size_t i; ++ struct kbase_aliased *aliased; + -+struct kbase_mmu_mode { -+ void (*update)(struct kbase_context *kctx); -+ void (*get_as_setup)(struct kbase_context *kctx, -+ struct kbase_mmu_setup * const setup); -+ void (*disable_as)(struct kbase_device *kbdev, int as_nr); -+ phys_addr_t (*pte_to_phy_addr)(u64 entry); -+ int (*ate_is_valid)(u64 ate); -+ int (*pte_is_valid)(u64 pte); -+ void (*entry_set_ate)(u64 *entry, phys_addr_t phy, unsigned long flags); -+ void (*entry_set_pte)(u64 *entry, phys_addr_t phy); -+ void (*entry_invalidate)(u64 *entry); -+}; ++ aliased = alloc->imported.alias.aliased; ++ if (aliased) { ++ for (i = 0; i < alloc->imported.alias.nents; i++) ++ if (aliased[i].alloc) ++ kbase_mem_phy_alloc_put(aliased[i].alloc); ++ vfree(aliased); ++ } ++ break; ++ } ++ case KBASE_MEM_TYPE_RAW: ++ /* raw pages, external cleanup */ ++ break; ++ #ifdef CONFIG_UMP ++ case KBASE_MEM_TYPE_IMPORTED_UMP: ++ ump_dd_release(alloc->imported.ump_handle); ++ break; ++#endif ++#ifdef CONFIG_DMA_SHARED_BUFFER ++ case KBASE_MEM_TYPE_IMPORTED_UMM: ++ dma_buf_detach(alloc->imported.umm.dma_buf, ++ alloc->imported.umm.dma_attachment); ++ dma_buf_put(alloc->imported.umm.dma_buf); ++ break; ++#endif ++ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: ++ if (alloc->imported.user_buf.mm) ++ mmdrop(alloc->imported.user_buf.mm); ++ kfree(alloc->imported.user_buf.pages); ++ break; ++ case KBASE_MEM_TYPE_TB:{ ++ void *tb; + -+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void); -+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); ++ tb = alloc->imported.kctx->jctx.tb; ++ kbase_device_trace_buffer_uninstall(alloc->imported.kctx); ++ vfree(tb); ++ break; ++ } ++ default: ++ WARN(1, "Unexecpted free of type %d\n", alloc->type); ++ break; ++ } + -+#endif /* _MALI_KBASE_MMU_MODE_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c -new file mode 100644 -index 000000000..60df17116 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c -@@ -0,0 +1,200 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2014, 2016, 2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ /* Free based on allocation type */ ++ if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) ++ vfree(alloc); ++ else ++ kfree(alloc); ++} + ++KBASE_EXPORT_TEST_API(kbase_mem_kref_free); + ++int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size) ++{ ++ KBASE_DEBUG_ASSERT(NULL != reg); ++ KBASE_DEBUG_ASSERT(vsize > 0); + ++ /* validate user provided arguments */ ++ if (size > vsize || vsize > reg->nr_pages) ++ goto out_term; + ++ /* Prevent vsize*sizeof from wrapping around. ++ * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail. ++ */ ++ if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages))) ++ goto out_term; + -+#include "mali_kbase_mmu_mode.h" ++ KBASE_DEBUG_ASSERT(0 != vsize); + -+#include "mali_kbase.h" -+#include "mali_midg_regmap.h" ++ if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0) ++ goto out_term; + -+#define ENTRY_TYPE_MASK 3ULL -+/* For valid ATEs bit 1 = (level == 3) ? 1 : 0. -+ * The MMU is only ever configured by the driver so that ATEs -+ * are at level 3, so bit 1 should always be set -+ */ -+#define ENTRY_IS_ATE 3ULL -+#define ENTRY_IS_INVAL 2ULL -+#define ENTRY_IS_PTE 3ULL ++ reg->cpu_alloc->reg = reg; ++ if (reg->cpu_alloc != reg->gpu_alloc) { ++ if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0) ++ goto out_rollback; ++ reg->gpu_alloc->reg = reg; ++ } + -+#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */ -+#define ENTRY_ACCESS_RW (1ULL << 6) /* bits 6:7 */ -+#define ENTRY_ACCESS_RO (3ULL << 6) -+#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */ -+#define ENTRY_ACCESS_BIT (1ULL << 10) -+#define ENTRY_NX_BIT (1ULL << 54) ++ return 0; + -+/* Helper Function to perform assignment of page table entries, to -+ * ensure the use of strd, which is required on LPAE systems. -+ */ -+static inline void page_table_entry_set(u64 *pte, u64 phy) -+{ -+#ifdef CONFIG_64BIT -+ *pte = phy; -+#elif defined(CONFIG_ARM) -+ /* -+ * In order to prevent the compiler keeping cached copies of -+ * memory, we have to explicitly say that we have updated memory. -+ * -+ * Note: We could manually move the data ourselves into R0 and -+ * R1 by specifying register variables that are explicitly -+ * given registers assignments, the down side of this is that -+ * we have to assume cpu endianness. To avoid this we can use -+ * the ldrd to read the data from memory into R0 and R1 which -+ * will respect the cpu endianness, we then use strd to make -+ * the 64 bit assignment to the page table entry. -+ */ -+ asm volatile("ldrd r0, r1, [%[ptemp]]\n\t" -+ "strd r0, r1, [%[pte]]\n\t" -+ : "=m" (*pte) -+ : [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy) -+ : "r0", "r1"); -+#else -+#error "64-bit atomic write must be implemented for your architecture" -+#endif ++out_rollback: ++ kbase_free_phy_pages_helper(reg->cpu_alloc, size); ++out_term: ++ return -1; +} + -+static void mmu_get_as_setup(struct kbase_context *kctx, -+ struct kbase_mmu_setup * const setup) -+{ -+ /* Set up the required caching policies at the correct indices -+ * in the memattr register. -+ */ -+ setup->memattr = -+ (AS_MEMATTR_IMPL_DEF_CACHE_POLICY << -+ (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | -+ (AS_MEMATTR_FORCE_TO_CACHE_ALL << -+ (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | -+ (AS_MEMATTR_WRITE_ALLOC << -+ (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | -+ (AS_MEMATTR_AARCH64_OUTER_IMPL_DEF << -+ (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | -+ (AS_MEMATTR_AARCH64_OUTER_WA << -+ (AS_MEMATTR_INDEX_OUTER_WA * 8)); -+ -+ setup->transtab = (u64)kctx->pgd & AS_TRANSTAB_BASE_MASK; -+ setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K; -+} ++KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages); + -+static void mmu_update(struct kbase_context *kctx) ++bool kbase_check_alloc_flags(unsigned long flags) +{ -+ struct kbase_device * const kbdev = kctx->kbdev; -+ struct kbase_as * const as = &kbdev->as[kctx->as_nr]; -+ struct kbase_mmu_setup * const current_setup = &as->current_setup; -+ -+ mmu_get_as_setup(kctx, current_setup); ++ /* Only known input flags should be set. */ ++ if (flags & ~BASE_MEM_FLAGS_INPUT_MASK) ++ return false; + -+ /* Apply the address space setting */ -+ kbase_mmu_hw_configure(kbdev, as, kctx); -+} ++ /* At least one flag should be set */ ++ if (flags == 0) ++ return false; + -+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) -+{ -+ struct kbase_as * const as = &kbdev->as[as_nr]; -+ struct kbase_mmu_setup * const current_setup = &as->current_setup; ++ /* Either the GPU or CPU must be reading from the allocated memory */ ++ if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0) ++ return false; + -+ current_setup->transtab = 0ULL; -+ current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED; ++ /* Either the GPU or CPU must be writing to the allocated memory */ ++ if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0) ++ return false; + -+ /* Apply the address space setting */ -+ kbase_mmu_hw_configure(kbdev, as, NULL); -+} ++ /* GPU cannot be writing to GPU executable memory and cannot grow the memory on page fault. */ ++ if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF))) ++ return false; + -+static phys_addr_t pte_to_phy_addr(u64 entry) -+{ -+ if (!(entry & 1)) -+ return 0; ++ /* GPU should have at least read or write access otherwise there is no ++ reason for allocating. */ ++ if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) ++ return false; + -+ return entry & ~0xFFF; -+} ++ /* BASE_MEM_IMPORT_SHARED is only valid for imported memory */ ++ if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED) ++ return false; + -+static int ate_is_valid(u64 ate) -+{ -+ return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE); ++ return true; +} + -+static int pte_is_valid(u64 pte) ++bool kbase_check_import_flags(unsigned long flags) +{ -+ return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE); -+} ++ /* Only known input flags should be set. */ ++ if (flags & ~BASE_MEM_FLAGS_INPUT_MASK) ++ return false; + -+/* -+ * Map KBASE_REG flags to MMU flags -+ */ -+static u64 get_mmu_flags(unsigned long flags) -+{ -+ u64 mmu_flags; ++ /* At least one flag should be set */ ++ if (flags == 0) ++ return false; + -+ /* store mem_attr index as 4:2 (macro called ensures 3 bits already) */ -+ mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2; ++ /* Imported memory cannot be GPU executable */ ++ if (flags & BASE_MEM_PROT_GPU_EX) ++ return false; + -+ /* Set access flags - note that AArch64 stage 1 does not support -+ * write-only access, so we use read/write instead -+ */ -+ if (flags & KBASE_REG_GPU_WR) -+ mmu_flags |= ENTRY_ACCESS_RW; -+ else if (flags & KBASE_REG_GPU_RD) -+ mmu_flags |= ENTRY_ACCESS_RO; ++ /* Imported memory cannot grow on page fault */ ++ if (flags & BASE_MEM_GROW_ON_GPF) ++ return false; + -+ /* nx if requested */ -+ mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0; ++ /* GPU should have at least read or write access otherwise there is no ++ reason for importing. */ ++ if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) ++ return false; + -+ if (flags & KBASE_REG_SHARE_BOTH) { -+ /* inner and outer shareable */ -+ mmu_flags |= SHARE_BOTH_BITS; -+ } else if (flags & KBASE_REG_SHARE_IN) { -+ /* inner shareable coherency */ -+ mmu_flags |= SHARE_INNER_BITS; -+ } ++ /* Secure memory cannot be read by the CPU */ ++ if ((flags & BASE_MEM_SECURE) && (flags & BASE_MEM_PROT_CPU_RD)) ++ return false; + -+ return mmu_flags; ++ return true; +} + -+static void entry_set_ate(u64 *entry, phys_addr_t phy, unsigned long flags) ++/** ++ * @brief Acquire the per-context region list lock ++ */ ++void kbase_gpu_vm_lock(struct kbase_context *kctx) +{ -+ page_table_entry_set(entry, (phy & ~0xFFF) | -+ get_mmu_flags(flags) | -+ ENTRY_ACCESS_BIT | ENTRY_IS_ATE); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ mutex_lock(&kctx->reg_lock); +} + -+static void entry_set_pte(u64 *entry, phys_addr_t phy) -+{ -+ page_table_entry_set(entry, (phy & ~0xFFF) | -+ ENTRY_ACCESS_BIT | ENTRY_IS_PTE); -+} ++KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock); + -+static void entry_invalidate(u64 *entry) ++/** ++ * @brief Release the per-context region list lock ++ */ ++void kbase_gpu_vm_unlock(struct kbase_context *kctx) +{ -+ page_table_entry_set(entry, ENTRY_IS_INVAL); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ mutex_unlock(&kctx->reg_lock); +} + -+static struct kbase_mmu_mode const aarch64_mode = { -+ .update = mmu_update, -+ .get_as_setup = mmu_get_as_setup, -+ .disable_as = mmu_disable_as, -+ .pte_to_phy_addr = pte_to_phy_addr, -+ .ate_is_valid = ate_is_valid, -+ .pte_is_valid = pte_is_valid, -+ .entry_set_ate = entry_set_ate, -+ .entry_set_pte = entry_set_pte, -+ .entry_invalidate = entry_invalidate ++KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock); ++ ++#ifdef CONFIG_DEBUG_FS ++struct kbase_jit_debugfs_data { ++ int (*func)(struct kbase_jit_debugfs_data *); ++ struct mutex lock; ++ struct kbase_context *kctx; ++ u64 active_value; ++ u64 pool_value; ++ u64 destroy_value; ++ char buffer[50]; +}; + -+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void) ++static int kbase_jit_debugfs_common_open(struct inode *inode, ++ struct file *file, int (*func)(struct kbase_jit_debugfs_data *)) +{ -+ return &aarch64_mode; -+} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c -new file mode 100644 -index 000000000..53fbbc73a ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c -@@ -0,0 +1,198 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ struct kbase_jit_debugfs_data *data; + ++ data = kzalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) ++ return -ENOMEM; + ++ data->func = func; ++ mutex_init(&data->lock); ++ data->kctx = (struct kbase_context *) inode->i_private; + ++ file->private_data = data; + ++ return nonseekable_open(inode, file); ++} + -+#include "mali_kbase_mmu_mode.h" ++static ssize_t kbase_jit_debugfs_common_read(struct file *file, ++ char __user *buf, size_t len, loff_t *ppos) ++{ ++ struct kbase_jit_debugfs_data *data; ++ size_t size; ++ int ret; + -+#include "mali_kbase.h" -+#include "mali_midg_regmap.h" ++ data = (struct kbase_jit_debugfs_data *) file->private_data; ++ mutex_lock(&data->lock); + -+#define ENTRY_TYPE_MASK 3ULL -+#define ENTRY_IS_ATE 1ULL -+#define ENTRY_IS_INVAL 2ULL -+#define ENTRY_IS_PTE 3ULL ++ if (*ppos) { ++ size = strnlen(data->buffer, sizeof(data->buffer)); ++ } else { ++ if (!data->func) { ++ ret = -EACCES; ++ goto out_unlock; ++ } + -+#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */ -+#define ENTRY_RD_BIT (1ULL << 6) -+#define ENTRY_WR_BIT (1ULL << 7) -+#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */ -+#define ENTRY_ACCESS_BIT (1ULL << 10) -+#define ENTRY_NX_BIT (1ULL << 54) ++ if (data->func(data)) { ++ ret = -EACCES; ++ goto out_unlock; ++ } + -+#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \ -+ ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT) ++ size = scnprintf(data->buffer, sizeof(data->buffer), ++ "%llu,%llu,%llu", data->active_value, ++ data->pool_value, data->destroy_value); ++ } + -+/* Helper Function to perform assignment of page table entries, to -+ * ensure the use of strd, which is required on LPAE systems. -+ */ -+static inline void page_table_entry_set(u64 *pte, u64 phy) -+{ -+#ifdef CONFIG_64BIT -+ *pte = phy; -+#elif defined(CONFIG_ARM) -+ /* -+ * In order to prevent the compiler keeping cached copies of -+ * memory, we have to explicitly say that we have updated -+ * memory. -+ * -+ * Note: We could manually move the data ourselves into R0 and -+ * R1 by specifying register variables that are explicitly -+ * given registers assignments, the down side of this is that -+ * we have to assume cpu endianness. To avoid this we can use -+ * the ldrd to read the data from memory into R0 and R1 which -+ * will respect the cpu endianness, we then use strd to make -+ * the 64 bit assignment to the page table entry. -+ */ -+ asm volatile("ldrd r0, r1, [%[ptemp]]\n\t" -+ "strd r0, r1, [%[pte]]\n\t" -+ : "=m" (*pte) -+ : [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy) -+ : "r0", "r1"); -+#else -+#error "64-bit atomic write must be implemented for your architecture" -+#endif ++ ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size); ++ ++out_unlock: ++ mutex_unlock(&data->lock); ++ return ret; +} + -+static void mmu_get_as_setup(struct kbase_context *kctx, -+ struct kbase_mmu_setup * const setup) ++static int kbase_jit_debugfs_common_release(struct inode *inode, ++ struct file *file) +{ -+ /* Set up the required caching policies at the correct indices -+ * in the memattr register. */ -+ setup->memattr = -+ (AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY << -+ (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | -+ (AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL << -+ (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | -+ (AS_MEMATTR_LPAE_WRITE_ALLOC << -+ (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | -+ (AS_MEMATTR_LPAE_OUTER_IMPL_DEF << -+ (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | -+ (AS_MEMATTR_LPAE_OUTER_WA << -+ (AS_MEMATTR_INDEX_OUTER_WA * 8)) | -+ 0; /* The other indices are unused for now */ -+ -+ setup->transtab = ((u64)kctx->pgd & -+ ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) | -+ AS_TRANSTAB_LPAE_ADRMODE_TABLE | -+ AS_TRANSTAB_LPAE_READ_INNER; ++ kfree(file->private_data); ++ return 0; ++} + -+ setup->transcfg = 0; ++#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \ ++static int __fops ## _open(struct inode *inode, struct file *file) \ ++{ \ ++ return kbase_jit_debugfs_common_open(inode, file, __func); \ ++} \ ++static const struct file_operations __fops = { \ ++ .owner = THIS_MODULE, \ ++ .open = __fops ## _open, \ ++ .release = kbase_jit_debugfs_common_release, \ ++ .read = kbase_jit_debugfs_common_read, \ ++ .write = NULL, \ ++ .llseek = generic_file_llseek, \ +} + -+static void mmu_update(struct kbase_context *kctx) ++static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data) +{ -+ struct kbase_device * const kbdev = kctx->kbdev; -+ struct kbase_as * const as = &kbdev->as[kctx->as_nr]; -+ struct kbase_mmu_setup * const current_setup = &as->current_setup; -+ -+ mmu_get_as_setup(kctx, current_setup); ++ struct kbase_context *kctx = data->kctx; ++ struct list_head *tmp; + -+ /* Apply the address space setting */ -+ kbase_mmu_hw_configure(kbdev, as, kctx); -+} ++ mutex_lock(&kctx->jit_evict_lock); ++ list_for_each(tmp, &kctx->jit_active_head) { ++ data->active_value++; ++ } + -+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) -+{ -+ struct kbase_as * const as = &kbdev->as[as_nr]; -+ struct kbase_mmu_setup * const current_setup = &as->current_setup; ++ list_for_each(tmp, &kctx->jit_pool_head) { ++ data->pool_value++; ++ } + -+ current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED; ++ list_for_each(tmp, &kctx->jit_destroy_head) { ++ data->destroy_value++; ++ } ++ mutex_unlock(&kctx->jit_evict_lock); + -+ /* Apply the address space setting */ -+ kbase_mmu_hw_configure(kbdev, as, NULL); ++ return 0; +} ++KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops, ++ kbase_jit_debugfs_count_get); + -+static phys_addr_t pte_to_phy_addr(u64 entry) ++static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data) +{ -+ if (!(entry & 1)) -+ return 0; ++ struct kbase_context *kctx = data->kctx; ++ struct kbase_va_region *reg; + -+ return entry & ~0xFFF; -+} ++ mutex_lock(&kctx->jit_evict_lock); ++ list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { ++ data->active_value += reg->nr_pages; ++ } + -+static int ate_is_valid(u64 ate) -+{ -+ return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE); -+} ++ list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { ++ data->pool_value += reg->nr_pages; ++ } + -+static int pte_is_valid(u64 pte) -+{ -+ return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE); ++ list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { ++ data->destroy_value += reg->nr_pages; ++ } ++ mutex_unlock(&kctx->jit_evict_lock); ++ ++ return 0; +} ++KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops, ++ kbase_jit_debugfs_vm_get); + -+/* -+ * Map KBASE_REG flags to MMU flags -+ */ -+static u64 get_mmu_flags(unsigned long flags) ++static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) +{ -+ u64 mmu_flags; ++ struct kbase_context *kctx = data->kctx; ++ struct kbase_va_region *reg; + -+ /* store mem_attr index as 4:2 (macro called ensures 3 bits already) */ -+ mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2; ++ mutex_lock(&kctx->jit_evict_lock); ++ list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { ++ data->active_value += reg->gpu_alloc->nents; ++ } + -+ /* write perm if requested */ -+ mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0; -+ /* read perm if requested */ -+ mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0; -+ /* nx if requested */ -+ mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0; ++ list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { ++ data->pool_value += reg->gpu_alloc->nents; ++ } + -+ if (flags & KBASE_REG_SHARE_BOTH) { -+ /* inner and outer shareable */ -+ mmu_flags |= SHARE_BOTH_BITS; -+ } else if (flags & KBASE_REG_SHARE_IN) { -+ /* inner shareable coherency */ -+ mmu_flags |= SHARE_INNER_BITS; ++ list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { ++ data->destroy_value += reg->gpu_alloc->nents; + } ++ mutex_unlock(&kctx->jit_evict_lock); + -+ return mmu_flags; ++ return 0; +} ++KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, ++ kbase_jit_debugfs_phys_get); + -+static void entry_set_ate(u64 *entry, phys_addr_t phy, unsigned long flags) ++void kbase_jit_debugfs_init(struct kbase_context *kctx) +{ -+ page_table_entry_set(entry, (phy & ~0xFFF) | -+ get_mmu_flags(flags) | -+ ENTRY_IS_ATE); -+} ++ /* Debugfs entry for getting the number of JIT allocations. */ ++ debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry, ++ kctx, &kbase_jit_debugfs_count_fops); + -+static void entry_set_pte(u64 *entry, phys_addr_t phy) -+{ -+ page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE); -+} ++ /* ++ * Debugfs entry for getting the total number of virtual pages ++ * used by JIT allocations. ++ */ ++ debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry, ++ kctx, &kbase_jit_debugfs_vm_fops); + -+static void entry_invalidate(u64 *entry) -+{ -+ page_table_entry_set(entry, ENTRY_IS_INVAL); ++ /* ++ * Debugfs entry for getting the number of physical pages used ++ * by JIT allocations. ++ */ ++ debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry, ++ kctx, &kbase_jit_debugfs_phys_fops); +} ++#endif /* CONFIG_DEBUG_FS */ + -+static struct kbase_mmu_mode const lpae_mode = { -+ .update = mmu_update, -+ .get_as_setup = mmu_get_as_setup, -+ .disable_as = mmu_disable_as, -+ .pte_to_phy_addr = pte_to_phy_addr, -+ .ate_is_valid = ate_is_valid, -+ .pte_is_valid = pte_is_valid, -+ .entry_set_ate = entry_set_ate, -+ .entry_set_pte = entry_set_pte, -+ .entry_invalidate = entry_invalidate -+}; -+ -+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void) -+{ -+ return &lpae_mode; -+} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c -new file mode 100644 -index 000000000..1a44957fe ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c -@@ -0,0 +1,124 @@ -+/* -+ * -+ * (C) COPYRIGHT 2011-2014, 2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++/** ++ * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations ++ * @work: Work item + * ++ * This function does the work of freeing JIT allocations whose physical ++ * backing has been released. + */ ++static void kbase_jit_destroy_worker(struct work_struct *work) ++{ ++ struct kbase_context *kctx; ++ struct kbase_va_region *reg; + ++ kctx = container_of(work, struct kbase_context, jit_work); ++ do { ++ mutex_lock(&kctx->jit_evict_lock); ++ if (list_empty(&kctx->jit_destroy_head)) { ++ mutex_unlock(&kctx->jit_evict_lock); ++ break; ++ } + ++ reg = list_first_entry(&kctx->jit_destroy_head, ++ struct kbase_va_region, jit_node); + -+#ifdef CONFIG_MALI_PLATFORM_FAKE -+ -+#include -+#include -+#include -+#include -+#include ++ list_del(®->jit_node); ++ mutex_unlock(&kctx->jit_evict_lock); + ++ kbase_gpu_vm_lock(kctx); ++ kbase_mem_free_region(kctx, reg); ++ kbase_gpu_vm_unlock(kctx); ++ } while (1); ++} + -+/* -+ * This file is included only for type definitions and functions belonging to -+ * specific platform folders. Do not add dependencies with symbols that are -+ * defined somewhere else. -+ */ -+#include ++int kbase_jit_init(struct kbase_context *kctx) ++{ ++ INIT_LIST_HEAD(&kctx->jit_active_head); ++ INIT_LIST_HEAD(&kctx->jit_pool_head); ++ INIT_LIST_HEAD(&kctx->jit_destroy_head); ++ INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker); + -+#define PLATFORM_CONFIG_RESOURCE_COUNT 4 -+#define PLATFORM_CONFIG_IRQ_RES_COUNT 3 ++ INIT_LIST_HEAD(&kctx->jit_pending_alloc); ++ INIT_LIST_HEAD(&kctx->jit_atoms_head); + -+static struct platform_device *mali_device; ++ return 0; ++} + -+#ifndef CONFIG_OF -+/** -+ * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources -+ * -+ * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function -+ * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT. -+ * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ. -+ * -+ * @param[in] io_resource Input IO resource data -+ * @param[out] linux_resources Pointer to output array of Linux resource structures -+ */ -+static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources) ++struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, ++ struct base_jit_alloc_info *info) +{ -+ if (!io_resources || !linux_resources) { -+ pr_err("%s: couldn't find proper resources\n", __func__); -+ return; -+ } -+ -+ memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource)); ++ struct kbase_va_region *reg = NULL; ++ struct kbase_va_region *walker; ++ struct kbase_va_region *temp; ++ size_t current_diff = SIZE_MAX; + -+ linux_resources[0].start = io_resources->io_memory_region.start; -+ linux_resources[0].end = io_resources->io_memory_region.end; -+ linux_resources[0].flags = IORESOURCE_MEM; ++ int ret; + -+ linux_resources[1].start = io_resources->job_irq_number; -+ linux_resources[1].end = io_resources->job_irq_number; -+ linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; ++ mutex_lock(&kctx->jit_evict_lock); ++ /* ++ * Scan the pool for an existing allocation which meets our ++ * requirements and remove it. ++ */ ++ list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) { + -+ linux_resources[2].start = io_resources->mmu_irq_number; -+ linux_resources[2].end = io_resources->mmu_irq_number; -+ linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; ++ if (walker->nr_pages >= info->va_pages) { ++ size_t min_size, max_size, diff; + -+ linux_resources[3].start = io_resources->gpu_irq_number; -+ linux_resources[3].end = io_resources->gpu_irq_number; -+ linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; -+} -+#endif /* CONFIG_OF */ -+ -+int kbase_platform_fake_register(void) -+{ -+ struct kbase_platform_config *config; -+#ifndef CONFIG_OF -+ struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT]; -+#endif -+ int err; -+ -+ config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */ -+ if (config == NULL) { -+ pr_err("%s: couldn't get platform config\n", __func__); -+ return -ENODEV; -+ } ++ /* ++ * The JIT allocations VA requirements have been ++ * meet, it's suitable but other allocations ++ * might be a better fit. ++ */ ++ min_size = min_t(size_t, walker->gpu_alloc->nents, ++ info->commit_pages); ++ max_size = max_t(size_t, walker->gpu_alloc->nents, ++ info->commit_pages); ++ diff = max_size - min_size; + -+ mali_device = platform_device_alloc("mali", 0); -+ if (mali_device == NULL) -+ return -ENOMEM; ++ if (current_diff > diff) { ++ current_diff = diff; ++ reg = walker; ++ } + -+#ifndef CONFIG_OF -+ kbasep_config_parse_io_resources(config->io_resources, resources); -+ err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT); -+ if (err) { -+ platform_device_put(mali_device); -+ mali_device = NULL; -+ return err; ++ /* The allocation is an exact match, stop looking */ ++ if (current_diff == 0) ++ break; ++ } + } -+#endif /* CONFIG_OF */ + -+ err = platform_device_add(mali_device); -+ if (err) { -+ platform_device_unregister(mali_device); -+ mali_device = NULL; -+ return err; -+ } ++ if (reg) { ++ /* ++ * Remove the found region from the pool and add it to the ++ * active list. ++ */ ++ list_move(®->jit_node, &kctx->jit_active_head); + -+ return 0; -+} -+EXPORT_SYMBOL(kbase_platform_fake_register); ++ /* ++ * Remove the allocation from the eviction list as it's no ++ * longer eligible for eviction. This must be done before ++ * dropping the jit_evict_lock ++ */ ++ list_del_init(®->gpu_alloc->evict_node); ++ mutex_unlock(&kctx->jit_evict_lock); + -+void kbase_platform_fake_unregister(void) -+{ -+ if (mali_device) -+ platform_device_unregister(mali_device); -+} -+EXPORT_SYMBOL(kbase_platform_fake_unregister); ++ kbase_gpu_vm_lock(kctx); + -+#endif /* CONFIG_MALI_PLATFORM_FAKE */ ++ /* Make the physical backing no longer reclaimable */ ++ if (!kbase_mem_evictable_unmake(reg->gpu_alloc)) ++ goto update_failed; + -diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.c b/drivers/gpu/arm/midgard/mali_kbase_pm.c -new file mode 100644 -index 000000000..97d543464 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_pm.c -@@ -0,0 +1,205 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ /* Grow the backing if required */ ++ if (reg->gpu_alloc->nents < info->commit_pages) { ++ size_t delta; ++ size_t old_size = reg->gpu_alloc->nents; + ++ /* Allocate some more pages */ ++ delta = info->commit_pages - reg->gpu_alloc->nents; ++ if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta) ++ != 0) ++ goto update_failed; + ++ if (reg->cpu_alloc != reg->gpu_alloc) { ++ if (kbase_alloc_phy_pages_helper( ++ reg->cpu_alloc, delta) != 0) { ++ kbase_free_phy_pages_helper( ++ reg->gpu_alloc, delta); ++ goto update_failed; ++ } ++ } + ++ ret = kbase_mem_grow_gpu_mapping(kctx, reg, ++ info->commit_pages, old_size); ++ /* ++ * The grow failed so put the allocation back in the ++ * pool and return failure. ++ */ ++ if (ret) ++ goto update_failed; ++ } ++ kbase_gpu_vm_unlock(kctx); ++ } else { ++ /* No suitable JIT allocation was found so create a new one */ ++ u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD | ++ BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF | ++ BASE_MEM_COHERENT_LOCAL; ++ u64 gpu_addr; + ++ mutex_unlock(&kctx->jit_evict_lock); + -+/** -+ * @file mali_kbase_pm.c -+ * Base kernel power management APIs -+ */ ++ reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, ++ info->extent, &flags, &gpu_addr); ++ if (!reg) ++ goto out_unlocked; + -+#include -+#include -+#include ++ mutex_lock(&kctx->jit_evict_lock); ++ list_add(®->jit_node, &kctx->jit_active_head); ++ mutex_unlock(&kctx->jit_evict_lock); ++ } + -+#include ++ return reg; + -+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags) -+{ -+ return kbase_hwaccess_pm_powerup(kbdev, flags); ++update_failed: ++ /* ++ * An update to an allocation from the pool failed, chances ++ * are slim a new allocation would fair any better so return ++ * the allocation to the pool and return the function with failure. ++ */ ++ kbase_gpu_vm_unlock(kctx); ++ mutex_lock(&kctx->jit_evict_lock); ++ list_move(®->jit_node, &kctx->jit_pool_head); ++ mutex_unlock(&kctx->jit_evict_lock); ++out_unlocked: ++ return NULL; +} + -+void kbase_pm_halt(struct kbase_device *kbdev) ++void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) +{ -+ kbase_hwaccess_pm_halt(kbdev); -+} ++ /* The physical backing of memory in the pool is always reclaimable */ ++ kbase_gpu_vm_lock(kctx); ++ kbase_mem_evictable_make(reg->gpu_alloc); ++ kbase_gpu_vm_unlock(kctx); + -+void kbase_pm_context_active(struct kbase_device *kbdev) -+{ -+ (void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); ++ mutex_lock(&kctx->jit_evict_lock); ++ list_move(®->jit_node, &kctx->jit_pool_head); ++ mutex_unlock(&kctx->jit_evict_lock); +} + -+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler) ++void kbase_jit_backing_lost(struct kbase_va_region *reg) +{ -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; -+ int c; -+ int old_count; -+ -+ KBASE_DEBUG_ASSERT(kbdev != NULL); -+ -+ /* Trace timeline information about how long it took to handle the decision -+ * to powerup. Sometimes the event might be missed due to reading the count -+ * outside of mutex, but this is necessary to get the trace timing -+ * correct. */ -+ old_count = kbdev->pm.active_count; -+ if (old_count == 0) -+ kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE); -+ -+ mutex_lock(&js_devdata->runpool_mutex); -+ mutex_lock(&kbdev->pm.lock); -+ if (kbase_pm_is_suspending(kbdev)) { -+ switch (suspend_handler) { -+ case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE: -+ if (kbdev->pm.active_count != 0) -+ break; -+ /* FALLTHROUGH */ -+ case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE: -+ mutex_unlock(&kbdev->pm.lock); -+ mutex_unlock(&js_devdata->runpool_mutex); -+ if (old_count == 0) -+ kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE); -+ return 1; -+ -+ case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE: -+ /* FALLTHROUGH */ -+ default: -+ KBASE_DEBUG_ASSERT_MSG(false, "unreachable"); -+ break; -+ } -+ } -+ c = ++kbdev->pm.active_count; -+ KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c); -+ KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c); ++ struct kbase_context *kctx = reg->kctx; + -+ /* Trace the event being handled */ -+ if (old_count == 0) -+ kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE); ++ lockdep_assert_held(&kctx->jit_evict_lock); + -+ if (c == 1) -+ /* First context active: Power on the GPU and any cores requested by -+ * the policy */ -+ kbase_hwaccess_pm_gpu_active(kbdev); ++ /* ++ * JIT allocations will always be on a list, if the region ++ * is not on a list then it's not a JIT allocation. ++ */ ++ if (list_empty(®->jit_node)) ++ return; + -+ mutex_unlock(&kbdev->pm.lock); -+ mutex_unlock(&js_devdata->runpool_mutex); ++ /* ++ * Freeing the allocation requires locks we might not be able ++ * to take now, so move the allocation to the free list and kick ++ * the worker which will do the freeing. ++ */ ++ list_move(®->jit_node, &kctx->jit_destroy_head); + -+ return 0; ++ schedule_work(&kctx->jit_work); +} + -+KBASE_EXPORT_TEST_API(kbase_pm_context_active); -+ -+void kbase_pm_context_idle(struct kbase_device *kbdev) ++bool kbase_jit_evict(struct kbase_context *kctx) +{ -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; -+ int c; -+ int old_count; ++ struct kbase_va_region *reg = NULL; + -+ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ lockdep_assert_held(&kctx->reg_lock); + -+ /* Trace timeline information about how long it took to handle the decision -+ * to powerdown. Sometimes the event might be missed due to reading the -+ * count outside of mutex, but this is necessary to get the trace timing -+ * correct. */ -+ old_count = kbdev->pm.active_count; -+ if (old_count == 0) -+ kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE); ++ /* Free the oldest allocation from the pool */ ++ mutex_lock(&kctx->jit_evict_lock); ++ if (!list_empty(&kctx->jit_pool_head)) { ++ reg = list_entry(kctx->jit_pool_head.prev, ++ struct kbase_va_region, jit_node); ++ list_del(®->jit_node); ++ } ++ mutex_unlock(&kctx->jit_evict_lock); + -+ mutex_lock(&js_devdata->runpool_mutex); -+ mutex_lock(&kbdev->pm.lock); ++ if (reg) ++ kbase_mem_free_region(kctx, reg); + -+ c = --kbdev->pm.active_count; -+ KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c); -+ KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c); ++ return (reg != NULL); ++} + -+ KBASE_DEBUG_ASSERT(c >= 0); ++void kbase_jit_term(struct kbase_context *kctx) ++{ ++ struct kbase_va_region *walker; + -+ /* Trace the event being handled */ -+ if (old_count == 0) -+ kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE); ++ /* Free all allocations for this context */ + -+ if (c == 0) { -+ /* Last context has gone idle */ -+ kbase_hwaccess_pm_gpu_idle(kbdev); ++ /* ++ * Flush the freeing of allocations whose backing has been freed ++ * (i.e. everything in jit_destroy_head). ++ */ ++ cancel_work_sync(&kctx->jit_work); + -+ /* Wake up anyone waiting for this to become 0 (e.g. suspend). The -+ * waiters must synchronize with us by locking the pm.lock after -+ * waiting */ -+ wake_up(&kbdev->pm.zero_active_count_wait); ++ kbase_gpu_vm_lock(kctx); ++ mutex_lock(&kctx->jit_evict_lock); ++ /* Free all allocations from the pool */ ++ while (!list_empty(&kctx->jit_pool_head)) { ++ walker = list_first_entry(&kctx->jit_pool_head, ++ struct kbase_va_region, jit_node); ++ list_del(&walker->jit_node); ++ mutex_unlock(&kctx->jit_evict_lock); ++ kbase_mem_free_region(kctx, walker); ++ mutex_lock(&kctx->jit_evict_lock); + } + -+ mutex_unlock(&kbdev->pm.lock); -+ mutex_unlock(&js_devdata->runpool_mutex); ++ /* Free all allocations from active list */ ++ while (!list_empty(&kctx->jit_active_head)) { ++ walker = list_first_entry(&kctx->jit_active_head, ++ struct kbase_va_region, jit_node); ++ list_del(&walker->jit_node); ++ mutex_unlock(&kctx->jit_evict_lock); ++ kbase_mem_free_region(kctx, walker); ++ mutex_lock(&kctx->jit_evict_lock); ++ } ++ mutex_unlock(&kctx->jit_evict_lock); ++ kbase_gpu_vm_unlock(kctx); +} + -+KBASE_EXPORT_TEST_API(kbase_pm_context_idle); -+ -+void kbase_pm_suspend(struct kbase_device *kbdev) ++static int kbase_jd_user_buf_map(struct kbase_context *kctx, ++ struct kbase_va_region *reg) +{ -+ KBASE_DEBUG_ASSERT(kbdev); ++ long pinned_pages; ++ struct kbase_mem_phy_alloc *alloc; ++ struct page **pages; ++ phys_addr_t *pa; ++ long i; ++ int err = -ENOMEM; ++ unsigned long address; ++ struct mm_struct *mm; ++ struct device *dev; ++ unsigned long offset; ++ unsigned long local_size; + -+ /* Suspend vinstr. -+ * This call will block until vinstr is suspended. */ -+ kbase_vinstr_suspend(kbdev->vinstr_ctx); ++ alloc = reg->gpu_alloc; ++ pa = kbase_get_gpu_phy_pages(reg); ++ address = alloc->imported.user_buf.address; ++ mm = alloc->imported.user_buf.mm; + -+ mutex_lock(&kbdev->pm.lock); -+ KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); -+ kbdev->pm.suspending = true; -+ mutex_unlock(&kbdev->pm.lock); ++ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); + -+ /* From now on, the active count will drop towards zero. Sometimes, it'll -+ * go up briefly before going down again. However, once it reaches zero it -+ * will stay there - guaranteeing that we've idled all pm references */ ++ pages = alloc->imported.user_buf.pages; + -+ /* Suspend job scheduler and associated components, so that it releases all -+ * the PM active count references */ -+ kbasep_js_suspend(kbdev); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++ pinned_pages = get_user_pages(NULL, mm, ++ address, ++ alloc->imported.user_buf.nr_pages, ++ reg->flags & KBASE_REG_GPU_WR, ++ 0, pages, NULL); ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) ++ pinned_pages = get_user_pages_remote(NULL, mm, ++ address, ++ alloc->imported.user_buf.nr_pages, ++ reg->flags & KBASE_REG_GPU_WR, ++ 0, pages, NULL); ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++ pinned_pages = get_user_pages_remote(NULL, mm, ++ address, ++ alloc->imported.user_buf.nr_pages, ++ reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, ++ pages, NULL); ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 9, 0) ++ pinned_pages = get_user_pages_remote(NULL, mm, ++ address, ++ alloc->imported.user_buf.nr_pages, ++ reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, ++ pages, NULL, NULL); ++#else ++ pinned_pages = get_user_pages_remote(mm, ++ address, ++ alloc->imported.user_buf.nr_pages, ++ reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, ++ pages, NULL); ++#endif + -+ /* Wait for the active count to reach zero. This is not the same as -+ * waiting for a power down, since not all policies power down when this -+ * reaches zero. */ -+ wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0); ++ if (pinned_pages <= 0) ++ return pinned_pages; + -+ /* NOTE: We synchronize with anything that was just finishing a -+ * kbase_pm_context_idle() call by locking the pm.lock below */ ++ if (pinned_pages != alloc->imported.user_buf.nr_pages) { ++ for (i = 0; i < pinned_pages; i++) ++ put_page(pages[i]); ++ return -ENOMEM; ++ } + -+ kbase_hwaccess_pm_suspend(kbdev); -+} ++ dev = kctx->kbdev->dev; ++ offset = address & ~PAGE_MASK; ++ local_size = alloc->imported.user_buf.size; + -+void kbase_pm_resume(struct kbase_device *kbdev) -+{ -+ /* MUST happen before any pm_context_active calls occur */ -+ kbase_hwaccess_pm_resume(kbdev); ++ for (i = 0; i < pinned_pages; i++) { ++ dma_addr_t dma_addr; ++ unsigned long min; + -+ /* Initial active call, to power on the GPU/cores if needed */ -+ kbase_pm_context_active(kbdev); ++ min = MIN(PAGE_SIZE - offset, local_size); ++ dma_addr = dma_map_page(dev, pages[i], ++ offset, min, ++ DMA_BIDIRECTIONAL); ++ if (dma_mapping_error(dev, dma_addr)) ++ goto unwind; + -+ /* Resume any blocked atoms (which may cause contexts to be scheduled in -+ * and dependent atoms to run) */ -+ kbase_resume_suspended_soft_jobs(kbdev); ++ alloc->imported.user_buf.dma_addrs[i] = dma_addr; ++ pa[i] = page_to_phys(pages[i]); + -+ /* Resume the Job Scheduler and associated components, and start running -+ * atoms */ -+ kbasep_js_resume(kbdev); ++ local_size -= min; ++ offset = 0; ++ } + -+ /* Matching idle call, to power off the GPU/cores if we didn't actually -+ * need it and the policy doesn't want it on */ -+ kbase_pm_context_idle(kbdev); ++ alloc->nents = pinned_pages; + -+ /* Resume vinstr operation */ -+ kbase_vinstr_resume(kbdev->vinstr_ctx); ++ err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa, ++ kbase_reg_current_backed_size(reg), ++ reg->flags); ++ if (err == 0) ++ return 0; ++ ++ alloc->nents = 0; ++ /* fall down */ ++unwind: ++ while (i--) { ++ dma_unmap_page(kctx->kbdev->dev, ++ alloc->imported.user_buf.dma_addrs[i], ++ PAGE_SIZE, DMA_BIDIRECTIONAL); ++ put_page(pages[i]); ++ pages[i] = NULL; ++ } ++ ++ return err; +} + -diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.h b/drivers/gpu/arm/midgard/mali_kbase_pm.h -new file mode 100644 -index 000000000..37fa2479d ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_pm.h -@@ -0,0 +1,171 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, ++ struct kbase_mem_phy_alloc *alloc, bool writeable) ++{ ++ long i; ++ struct page **pages; ++ unsigned long size = alloc->imported.user_buf.size; + ++ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); ++ pages = alloc->imported.user_buf.pages; ++ for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { ++ unsigned long local_size; ++ dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; + ++ local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); ++ dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, ++ DMA_BIDIRECTIONAL); ++ if (writeable) ++ set_page_dirty_lock(pages[i]); ++ put_page(pages[i]); ++ pages[i] = NULL; + ++ size -= local_size; ++ } ++ alloc->nents = 0; ++} + + -+/** -+ * @file mali_kbase_pm.h -+ * Power management API definitions -+ */ ++/* to replace sg_dma_len. */ ++#define MALI_SG_DMA_LEN(sg) ((sg)->length) + -+#ifndef _KBASE_PM_H_ -+#define _KBASE_PM_H_ ++#ifdef CONFIG_DMA_SHARED_BUFFER ++static int kbase_jd_umm_map(struct kbase_context *kctx, ++ struct kbase_va_region *reg) ++{ ++ struct sg_table *sgt; ++ struct scatterlist *s; ++ int i; ++ phys_addr_t *pa; ++ int err; ++ size_t count = 0; ++ struct kbase_mem_phy_alloc *alloc; + -+#include "mali_kbase_hwaccess_pm.h" ++ alloc = reg->gpu_alloc; + -+#define PM_ENABLE_IRQS 0x01 -+#define PM_HW_ISSUES_DETECT 0x02 ++ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM); ++ KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt); ++ sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, ++ DMA_BIDIRECTIONAL); + ++ if (IS_ERR_OR_NULL(sgt)) ++ return -EINVAL; + -+/** Initialize the power management framework. -+ * -+ * Must be called before any other power management function -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid pointer) -+ * -+ * @return 0 if the power management framework was successfully initialized. -+ */ -+int kbase_pm_init(struct kbase_device *kbdev); ++ /* save for later */ ++ alloc->imported.umm.sgt = sgt; + -+/** Power up GPU after all modules have been initialized and interrupt handlers installed. -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid pointer) -+ * -+ * @param flags Flags to pass on to kbase_pm_init_hw -+ * -+ * @return 0 if powerup was successful. -+ */ -+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags); ++ pa = kbase_get_gpu_phy_pages(reg); ++ KBASE_DEBUG_ASSERT(pa); + -+/** -+ * Halt the power management framework. -+ * Should ensure that no new interrupts are generated, -+ * but allow any currently running interrupt handlers to complete successfully. -+ * The GPU is forced off by the time this function returns, regardless of -+ * whether or not the active power policy asks for the GPU to be powered off. -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_halt(struct kbase_device *kbdev); ++ for_each_sg(sgt->sgl, s, sgt->nents, i) { ++ int j; ++ size_t pages = PFN_UP(MALI_SG_DMA_LEN(s)); + -+/** Terminate the power management framework. -+ * -+ * No power management functions may be called after this -+ * (except @ref kbase_pm_init) -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_term(struct kbase_device *kbdev); ++ WARN_ONCE(MALI_SG_DMA_LEN(s) & (PAGE_SIZE-1), ++ "MALI_SG_DMA_LEN(s)=%u is not a multiple of PAGE_SIZE\n", ++ MALI_SG_DMA_LEN(s)); + -+/** Increment the count of active contexts. -+ * -+ * This function should be called when a context is about to submit a job. It informs the active power policy that the -+ * GPU is going to be in use shortly and the policy is expected to start turning on the GPU. -+ * -+ * This function will block until the GPU is available. -+ * -+ * This function ASSERTS if a suspend is occuring/has occurred whilst this is -+ * in use. Use kbase_pm_contect_active_unless_suspending() instead. -+ * -+ * @note a Suspend is only visible to Kernel threads; user-space threads in a -+ * syscall cannot witness a suspend, because they are frozen before the suspend -+ * begins. -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_context_active(struct kbase_device *kbdev); ++ WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1), ++ "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n", ++ (unsigned long long) sg_dma_address(s)); + ++ for (j = 0; (j < pages) && (count < reg->nr_pages); j++, ++ count++) ++ *pa++ = sg_dma_address(s) + (j << PAGE_SHIFT); ++ WARN_ONCE(j < pages, ++ "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n", ++ alloc->imported.umm.dma_buf->size); ++ } + -+/** Handler codes for doing kbase_pm_context_active_handle_suspend() */ -+enum kbase_pm_suspend_handler { -+ /** A suspend is not expected/not possible - this is the same as -+ * kbase_pm_context_active() */ -+ KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE, -+ /** If we're suspending, fail and don't increase the active count */ -+ KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE, -+ /** If we're suspending, succeed and allow the active count to increase iff -+ * it didn't go from 0->1 (i.e., we didn't re-activate the GPU). -+ * -+ * This should only be used when there is a bounded time on the activation -+ * (e.g. guarantee it's going to be idled very soon after) */ -+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE -+}; ++ if (!(reg->flags & KBASE_REG_IMPORT_PAD) && ++ WARN_ONCE(count < reg->nr_pages, ++ "sg list from dma_buf_map_attachment < dma_buf->size=%zu\n", ++ alloc->imported.umm.dma_buf->size)) { ++ err = -EINVAL; ++ goto err_unmap_attachment; ++ } + -+/** Suspend 'safe' variant of kbase_pm_context_active() -+ * -+ * If a suspend is in progress, this allows for various different ways of -+ * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details. -+ * -+ * We returns a status code indicating whether we're allowed to keep the GPU -+ * active during the suspend, depending on the handler code. If the status code -+ * indicates a failure, the caller must abort whatever operation it was -+ * attempting, and potentially queue it up for after the OS has resumed. -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid pointer) -+ * @param suspend_handler The handler code for how to handle a suspend that might occur -+ * @return zero Indicates success -+ * @return non-zero Indicates failure due to the system being suspending/suspended. -+ */ -+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler); ++ /* Update nents as we now have pages to map */ ++ alloc->nents = reg->nr_pages; + -+/** Decrement the reference count of active contexts. -+ * -+ * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power -+ * policy so the calling code should ensure that it does not access the GPU's registers. -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_context_idle(struct kbase_device *kbdev); ++ err = kbase_mmu_insert_pages(kctx, reg->start_pfn, ++ kbase_get_gpu_phy_pages(reg), ++ count, ++ reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD); ++ if (err) ++ goto err_unmap_attachment; + -+/** -+ * Suspend the GPU and prevent any further register accesses to it from Kernel -+ * threads. -+ * -+ * This is called in response to an OS suspend event, and calls into the various -+ * kbase components to complete the suspend. -+ * -+ * @note the mechanisms used here rely on all user-space threads being frozen -+ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up -+ * the GPU e.g. via atom submission. -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_suspend(struct kbase_device *kbdev); ++ if (reg->flags & KBASE_REG_IMPORT_PAD) { ++ err = kbase_mmu_insert_single_page(kctx, ++ reg->start_pfn + count, ++ page_to_phys(kctx->aliasing_sink_page), ++ reg->nr_pages - count, ++ (reg->flags | KBASE_REG_GPU_RD) & ++ ~KBASE_REG_GPU_WR); ++ if (err) ++ goto err_teardown_orig_pages; ++ } + -+/** -+ * Resume the GPU, allow register accesses to it, and resume running atoms on -+ * the GPU. -+ * -+ * This is called in response to an OS resume event, and calls into the various -+ * kbase components to complete the resume. -+ * -+ * @param kbdev The kbase device structure for the device (must be a valid pointer) -+ */ -+void kbase_pm_resume(struct kbase_device *kbdev); ++ return 0; + -+/** -+ * kbase_pm_vsync_callback - vsync callback -+ * -+ * @buffer_updated: 1 if a new frame was displayed, 0 otherwise -+ * @data: Pointer to the kbase device as returned by kbase_find_device() -+ * -+ * Callback function used to notify the power management code that a vsync has -+ * occurred on the display. -+ */ -+void kbase_pm_vsync_callback(int buffer_updated, void *data); ++err_teardown_orig_pages: ++ kbase_mmu_teardown_pages(kctx, reg->start_pfn, count); ++err_unmap_attachment: ++ dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, ++ alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); ++ alloc->imported.umm.sgt = NULL; + -+#endif /* _KBASE_PM_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h b/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h -new file mode 100644 -index 000000000..7fb674ede ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h -@@ -0,0 +1,40 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010, 2013 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ return err; ++} + ++static void kbase_jd_umm_unmap(struct kbase_context *kctx, ++ struct kbase_mem_phy_alloc *alloc) ++{ ++ KBASE_DEBUG_ASSERT(kctx); ++ KBASE_DEBUG_ASSERT(alloc); ++ KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment); ++ KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt); ++ dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, ++ alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); ++ alloc->imported.umm.sgt = NULL; ++ alloc->nents = 0; ++} ++#endif /* CONFIG_DMA_SHARED_BUFFER */ + ++#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) \ ++ || defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS) ++static void add_kds_resource(struct kds_resource *kds_res, ++ struct kds_resource **kds_resources, u32 *kds_res_count, ++ unsigned long *kds_access_bitmap, bool exclusive) ++{ ++ u32 i; + -+/** -+ * @file mali_kbase_profiling_gator_api.h -+ * Model interface -+ */ ++ for (i = 0; i < *kds_res_count; i++) { ++ /* Duplicate resource, ignore */ ++ if (kds_resources[i] == kds_res) ++ return; ++ } + -+#ifndef _KBASE_PROFILING_GATOR_API_H_ -+#define _KBASE_PROFILING_GATOR_API_H_ ++ kds_resources[*kds_res_count] = kds_res; ++ if (exclusive) ++ set_bit(*kds_res_count, kds_access_bitmap); ++ (*kds_res_count)++; ++} ++#endif + -+/* -+ * List of possible actions to be controlled by Streamline. -+ * The following numbers are used by gator to control -+ * the frame buffer dumping and s/w counter reporting. -+ */ -+#define FBDUMP_CONTROL_ENABLE (1) -+#define FBDUMP_CONTROL_RATE (2) -+#define SW_COUNTER_ENABLE (3) -+#define FBDUMP_CONTROL_RESIZE_FACTOR (4) -+#define FBDUMP_CONTROL_MAX (5) -+#define FBDUMP_CONTROL_MIN FBDUMP_CONTROL_ENABLE ++struct kbase_mem_phy_alloc *kbase_map_external_resource( ++ struct kbase_context *kctx, struct kbase_va_region *reg, ++ struct mm_struct *locked_mm ++#ifdef CONFIG_KDS ++ , u32 *kds_res_count, struct kds_resource **kds_resources, ++ unsigned long *kds_access_bitmap, bool exclusive ++#endif ++ ) ++{ ++ int err; + -+void _mali_profiling_control(u32 action, u32 value); ++ /* decide what needs to happen for this resource */ ++ switch (reg->gpu_alloc->type) { ++ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { ++ if (reg->gpu_alloc->imported.user_buf.mm != locked_mm) ++ goto exit; + -+#endif /* _KBASE_PROFILING_GATOR_API */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c -new file mode 100644 -index 000000000..c97065006 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c -@@ -0,0 +1,130 @@ -+/* -+ * -+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; ++ if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) { ++ err = kbase_jd_user_buf_map(kctx, reg); ++ if (err) { ++ reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; ++ goto exit; ++ } ++ } ++ } ++ break; ++ case KBASE_MEM_TYPE_IMPORTED_UMP: { ++#if defined(CONFIG_KDS) && defined(CONFIG_UMP) ++ if (kds_res_count) { ++ struct kds_resource *kds_res; ++ ++ kds_res = ump_dd_kds_resource_get( ++ reg->gpu_alloc->imported.ump_handle); ++ if (kds_res) ++ add_kds_resource(kds_res, kds_resources, ++ kds_res_count, ++ kds_access_bitmap, exclusive); ++ } ++#endif /*defined(CONFIG_KDS) && defined(CONFIG_UMP) */ ++ break; ++ } ++#ifdef CONFIG_DMA_SHARED_BUFFER ++ case KBASE_MEM_TYPE_IMPORTED_UMM: { ++#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS ++ if (kds_res_count) { ++ struct kds_resource *kds_res; + ++ kds_res = get_dma_buf_kds_resource( ++ reg->gpu_alloc->imported.umm.dma_buf); ++ if (kds_res) ++ add_kds_resource(kds_res, kds_resources, ++ kds_res_count, ++ kds_access_bitmap, exclusive); ++ } ++#endif ++ reg->gpu_alloc->imported.umm.current_mapping_usage_count++; ++ if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) { ++ err = kbase_jd_umm_map(kctx, reg); ++ if (err) { ++ reg->gpu_alloc->imported.umm.current_mapping_usage_count--; ++ goto exit; ++ } ++ } ++ break; ++ } ++#endif ++ default: ++ goto exit; ++ } + ++ return kbase_mem_phy_alloc_get(reg->gpu_alloc); ++exit: ++ return NULL; ++} + -+#include "mali_kbase.h" ++void kbase_unmap_external_resource(struct kbase_context *kctx, ++ struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) ++{ ++ switch (alloc->type) { ++#ifdef CONFIG_DMA_SHARED_BUFFER ++ case KBASE_MEM_TYPE_IMPORTED_UMM: { ++ alloc->imported.umm.current_mapping_usage_count--; + -+#include "mali_kbase_regs_history_debugfs.h" ++ if (0 == alloc->imported.umm.current_mapping_usage_count) { ++ if (reg && reg->gpu_alloc == alloc) { ++ int err; + -+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) ++ err = kbase_mmu_teardown_pages( ++ kctx, ++ reg->start_pfn, ++ alloc->nents); ++ WARN_ON(err); ++ } + -+#include ++ kbase_jd_umm_unmap(kctx, alloc); ++ } ++ } ++ break; ++#endif /* CONFIG_DMA_SHARED_BUFFER */ ++ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { ++ alloc->imported.user_buf.current_mapping_usage_count--; + ++ if (0 == alloc->imported.user_buf.current_mapping_usage_count) { ++ bool writeable = true; + -+static int regs_history_size_get(void *data, u64 *val) -+{ -+ struct kbase_io_history *const h = data; ++ if (reg && reg->gpu_alloc == alloc) ++ kbase_mmu_teardown_pages( ++ kctx, ++ reg->start_pfn, ++ kbase_reg_current_backed_size(reg)); + -+ *val = h->size; ++ if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0)) ++ writeable = false; + -+ return 0; ++ kbase_jd_user_buf_unmap(kctx, alloc, writeable); ++ } ++ } ++ break; ++ default: ++ break; ++ } ++ kbase_mem_phy_alloc_put(alloc); +} + -+static int regs_history_size_set(void *data, u64 val) ++struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( ++ struct kbase_context *kctx, u64 gpu_addr) +{ -+ struct kbase_io_history *const h = data; ++ struct kbase_ctx_ext_res_meta *meta = NULL; ++ struct kbase_ctx_ext_res_meta *walker; + -+ return kbase_io_history_resize(h, (u16)val); -+} ++ lockdep_assert_held(&kctx->reg_lock); + ++ /* ++ * Walk the per context external resource metadata list for the ++ * metadata which matches the region which is being acquired. ++ */ ++ list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { ++ if (walker->gpu_addr == gpu_addr) { ++ meta = walker; ++ break; ++ } ++ } + -+DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops, -+ regs_history_size_get, -+ regs_history_size_set, -+ "%llu\n"); ++ /* No metadata exists so create one. */ ++ if (!meta) { ++ struct kbase_va_region *reg; + ++ /* Find the region */ ++ reg = kbase_region_tracker_find_region_enclosing_address( ++ kctx, gpu_addr); ++ if (NULL == reg || (reg->flags & KBASE_REG_FREE)) ++ goto failed; + -+/** -+ * regs_history_show - show callback for the register access history file. -+ * -+ * @sfile: The debugfs entry -+ * @data: Data associated with the entry -+ * -+ * This function is called to dump all recent accesses to the GPU registers. -+ * -+ * @return 0 if successfully prints data in debugfs entry file, failure -+ * otherwise -+ */ -+static int regs_history_show(struct seq_file *sfile, void *data) -+{ -+ struct kbase_io_history *const h = sfile->private; -+ u16 i; -+ size_t iters; -+ unsigned long flags; ++ /* Allocate the metadata object */ ++ meta = kzalloc(sizeof(*meta), GFP_KERNEL); ++ if (!meta) ++ goto failed; + -+ if (!h->enabled) { -+ seq_puts(sfile, "The register access history is disabled\n"); -+ goto out; -+ } ++ /* ++ * Fill in the metadata object and acquire a reference ++ * for the physical resource. ++ */ ++ meta->alloc = kbase_map_external_resource(kctx, reg, NULL ++#ifdef CONFIG_KDS ++ , NULL, NULL, ++ NULL, false ++#endif ++ ); + -+ spin_lock_irqsave(&h->lock, flags); ++ if (!meta->alloc) ++ goto fail_map; + -+ iters = (h->size > h->count) ? h->count : h->size; -+ seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters, -+ h->count); -+ for (i = 0; i < iters; ++i) { -+ struct kbase_io_access *io = -+ &h->buf[(h->count - iters + i) % h->size]; -+ char const access = (io->addr & 1) ? 'w' : 'r'; ++ meta->gpu_addr = reg->start_pfn << PAGE_SHIFT; + -+ seq_printf(sfile, "%6i: %c: reg 0x%p val %08x\n", i, access, -+ (void *)(io->addr & ~0x1), io->value); ++ list_add(&meta->ext_res_node, &kctx->ext_res_meta_head); + } + -+ spin_unlock_irqrestore(&h->lock, flags); ++ return meta; + -+out: -+ return 0; ++fail_map: ++ kfree(meta); ++failed: ++ return NULL; +} + -+ -+/** -+ * regs_history_open - open operation for regs_history debugfs file -+ * -+ * @in: &struct inode pointer -+ * @file: &struct file pointer -+ * -+ * @return file descriptor -+ */ -+static int regs_history_open(struct inode *in, struct file *file) ++bool kbase_sticky_resource_release(struct kbase_context *kctx, ++ struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr) +{ -+ return single_open(file, ®s_history_show, in->i_private); -+} ++ struct kbase_ctx_ext_res_meta *walker; ++ struct kbase_va_region *reg; + ++ lockdep_assert_held(&kctx->reg_lock); + -+static const struct file_operations regs_history_fops = { -+ .open = ®s_history_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; ++ /* Search of the metadata if one isn't provided. */ ++ if (!meta) { ++ /* ++ * Walk the per context external resource metadata list for the ++ * metadata which matches the region which is being released. ++ */ ++ list_for_each_entry(walker, &kctx->ext_res_meta_head, ++ ext_res_node) { ++ if (walker->gpu_addr == gpu_addr) { ++ meta = walker; ++ break; ++ } ++ } ++ } ++ ++ /* No metadata so just return. */ ++ if (!meta) ++ return false; + ++ /* Drop the physical memory reference and free the metadata. */ ++ reg = kbase_region_tracker_find_region_enclosing_address( ++ kctx, ++ meta->gpu_addr); + -+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev) ++ kbase_unmap_external_resource(kctx, reg, meta->alloc); ++ list_del(&meta->ext_res_node); ++ kfree(meta); ++ ++ return true; ++} ++ ++int kbase_sticky_resource_init(struct kbase_context *kctx) +{ -+ debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR, -+ kbdev->mali_debugfs_directory, -+ &kbdev->io_history.enabled); -+ debugfs_create_file("regs_history_size", S_IRUGO | S_IWUSR, -+ kbdev->mali_debugfs_directory, -+ &kbdev->io_history, ®s_history_size_fops); -+ debugfs_create_file("regs_history", S_IRUGO, -+ kbdev->mali_debugfs_directory, &kbdev->io_history, -+ ®s_history_fops); ++ INIT_LIST_HEAD(&kctx->ext_res_meta_head); ++ ++ return 0; +} + ++void kbase_sticky_resource_term(struct kbase_context *kctx) ++{ ++ struct kbase_ctx_ext_res_meta *walker; + -+#endif /* CONFIG_DEBUG_FS */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h ++ lockdep_assert_held(&kctx->reg_lock); ++ ++ /* ++ * Free any sticky resources which haven't been unmapped. ++ * ++ * Note: ++ * We don't care about refcounts at this point as no future ++ * references to the meta data will be made. ++ * Region termination would find these if we didn't free them ++ * here, but it's more efficient if we do the clean up here. ++ */ ++ while (!list_empty(&kctx->ext_res_meta_head)) { ++ walker = list_first_entry(&kctx->ext_res_meta_head, ++ struct kbase_ctx_ext_res_meta, ext_res_node); ++ ++ kbase_sticky_resource_release(kctx, walker, 0); ++ } ++} +diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h new file mode 100644 -index 000000000..f10837002 +index 000000000..3f3eaa3fd --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h -@@ -0,0 +1,50 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h +@@ -0,0 +1,1068 @@ +/* + * -+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -389525,3204 +390397,3934 @@ index 000000000..f10837002 + + + -+/** -+ * Header file for register access history support via debugfs -+ * -+ * This interface is made available via /sys/kernel/debug/mali#/regs_history*. -+ * -+ * Usage: -+ * - regs_history_enabled: whether recording of register accesses is enabled. -+ * Write 'y' to enable, 'n' to disable. -+ * - regs_history_size: size of the register history buffer, must be > 0 -+ * - regs_history: return the information about last accesses to the registers. -+ */ -+ -+#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H -+#define _KBASE_REGS_HISTORY_DEBUGFS_H -+ -+struct kbase_device; + -+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) + +/** -+ * kbasep_regs_history_debugfs_init - add debugfs entries for register history -+ * -+ * @kbdev: Pointer to kbase_device containing the register history ++ * @file mali_kbase_mem.h ++ * Base kernel memory APIs + */ -+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev); + -+#else /* CONFIG_DEBUG_FS */ ++#ifndef _KBASE_MEM_H_ ++#define _KBASE_MEM_H_ + -+#define kbasep_regs_history_debugfs_init CSTD_NOP ++#ifndef _KBASE_H_ ++#error "Don't include this file directly, use mali_kbase.h instead" ++#endif + -+#endif /* CONFIG_DEBUG_FS */ ++#include ++#ifdef CONFIG_KDS ++#include ++#endif /* CONFIG_KDS */ ++#ifdef CONFIG_UMP ++#include ++#endif /* CONFIG_UMP */ ++#include "mali_base_kernel.h" ++#include ++#include "mali_kbase_pm.h" ++#include "mali_kbase_defs.h" ++#if defined(CONFIG_MALI_GATOR_SUPPORT) ++#include "mali_kbase_gator.h" ++#endif ++/* Required for kbase_mem_evictable_unmake */ ++#include "mali_kbase_mem_linux.h" + -+#endif /*_KBASE_REGS_HISTORY_DEBUGFS_H*/ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_replay.c b/drivers/gpu/arm/midgard/mali_kbase_replay.c -new file mode 100644 -index 000000000..84aa3316e ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_replay.c -@@ -0,0 +1,1166 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */ ++#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */ + ++/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages. ++The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and ++page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table ++updates and generates duplicate page faults as the page table information used by the MMU is not valid. */ ++#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3) /* round to 8 pages */ + ++#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0) /* round to 1 page */ + ++/* This must always be a power of 2 */ ++#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2) ++#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316) ++#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630) +/** -+ * @file mali_kbase_replay.c -+ * Replay soft job handlers ++ * A CPU mapping + */ ++struct kbase_cpu_mapping { ++ struct list_head mappings_list; ++ struct kbase_mem_phy_alloc *alloc; ++ struct kbase_context *kctx; ++ struct kbase_va_region *region; ++ int count; ++ int free_on_close; ++}; + -+#include -+#include -+#include -+#include -+#include -+ -+#define JOB_NOT_STARTED 0 -+#define JOB_TYPE_NULL (1) -+#define JOB_TYPE_VERTEX (5) -+#define JOB_TYPE_TILER (7) -+#define JOB_TYPE_FUSED (8) -+#define JOB_TYPE_FRAGMENT (9) -+ -+#define JOB_HEADER_32_FBD_OFFSET (31*4) -+#define JOB_HEADER_64_FBD_OFFSET (44*4) -+ -+#define FBD_POINTER_MASK (~0x3f) -+ -+#define SFBD_TILER_OFFSET (48*4) ++enum kbase_memory_type { ++ KBASE_MEM_TYPE_NATIVE, ++ KBASE_MEM_TYPE_IMPORTED_UMP, ++ KBASE_MEM_TYPE_IMPORTED_UMM, ++ KBASE_MEM_TYPE_IMPORTED_USER_BUF, ++ KBASE_MEM_TYPE_ALIAS, ++ KBASE_MEM_TYPE_TB, ++ KBASE_MEM_TYPE_RAW ++}; + -+#define MFBD_TILER_OFFSET (14*4) ++/* internal structure, mirroring base_mem_aliasing_info, ++ * but with alloc instead of a gpu va (handle) */ ++struct kbase_aliased { ++ struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */ ++ u64 offset; /* in pages */ ++ u64 length; /* in pages */ ++}; + -+#define FBD_HIERARCHY_WEIGHTS 8 -+#define FBD_HIERARCHY_MASK_MASK 0x1fff ++/** ++ * @brief Physical pages tracking object properties ++ */ ++#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED (1ul << 0) ++#define KBASE_MEM_PHY_ALLOC_LARGE (1ul << 1) + -+#define FBD_TYPE 1 ++/* physical pages tracking object. ++ * Set up to track N pages. ++ * N not stored here, the creator holds that info. ++ * This object only tracks how many elements are actually valid (present). ++ * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not ++ * shared with another region or client. CPU mappings are OK to exist when changing, as ++ * long as the tracked mappings objects are updated as part of the change. ++ */ ++struct kbase_mem_phy_alloc { ++ struct kref kref; /* number of users of this alloc */ ++ atomic_t gpu_mappings; ++ size_t nents; /* 0..N */ ++ phys_addr_t *pages; /* N elements, only 0..nents are valid */ + -+#define HIERARCHY_WEIGHTS 13 ++ /* kbase_cpu_mappings */ ++ struct list_head mappings; + -+#define JOB_HEADER_ID_MAX 0xffff ++ /* Node used to store this allocation on the eviction list */ ++ struct list_head evict_node; ++ /* Physical backing size when the pages where evicted */ ++ size_t evicted; ++ /* ++ * Back reference to the region structure which created this ++ * allocation, or NULL if it has been freed. ++ */ ++ struct kbase_va_region *reg; + -+#define JOB_SOURCE_ID(status) (((status) >> 16) & 0xFFFF) -+#define JOB_POLYGON_LIST (0x03) ++ /* type of buffer */ ++ enum kbase_memory_type type; + -+struct fragment_job { -+ struct job_descriptor_header header; ++ unsigned long properties; + -+ u32 x[2]; ++ /* member in union valid based on @a type */ + union { -+ u64 _64; -+ u32 _32; -+ } fragment_fbd; ++#ifdef CONFIG_UMP ++ ump_dd_handle ump_handle; ++#endif /* CONFIG_UMP */ ++#if defined(CONFIG_DMA_SHARED_BUFFER) ++ struct { ++ struct dma_buf *dma_buf; ++ struct dma_buf_attachment *dma_attachment; ++ unsigned int current_mapping_usage_count; ++ struct sg_table *sgt; ++ } umm; ++#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ ++ struct { ++ u64 stride; ++ size_t nents; ++ struct kbase_aliased *aliased; ++ } alias; ++ /* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */ ++ struct kbase_context *kctx; ++ struct kbase_alloc_import_user_buf { ++ unsigned long address; ++ unsigned long size; ++ unsigned long nr_pages; ++ struct page **pages; ++ /* top bit (1<<31) of current_mapping_usage_count ++ * specifies that this import was pinned on import ++ * See PINNED_ON_IMPORT ++ */ ++ u32 current_mapping_usage_count; ++ struct mm_struct *mm; ++ dma_addr_t *dma_addrs; ++ } user_buf; ++ } imported; +}; + -+static void dump_job_head(struct kbase_context *kctx, char *head_str, -+ struct job_descriptor_header *job) -+{ -+#ifdef CONFIG_MALI_DEBUG -+ dev_dbg(kctx->kbdev->dev, "%s\n", head_str); -+ dev_dbg(kctx->kbdev->dev, -+ "addr = %p\n" -+ "exception_status = %x (Source ID: 0x%x Access: 0x%x Exception: 0x%x)\n" -+ "first_incomplete_task = %x\n" -+ "fault_pointer = %llx\n" -+ "job_descriptor_size = %x\n" -+ "job_type = %x\n" -+ "job_barrier = %x\n" -+ "_reserved_01 = %x\n" -+ "_reserved_02 = %x\n" -+ "_reserved_03 = %x\n" -+ "_reserved_04/05 = %x,%x\n" -+ "job_index = %x\n" -+ "dependencies = %x,%x\n", -+ job, job->exception_status, -+ JOB_SOURCE_ID(job->exception_status), -+ (job->exception_status >> 8) & 0x3, -+ job->exception_status & 0xFF, -+ job->first_incomplete_task, -+ job->fault_pointer, job->job_descriptor_size, -+ job->job_type, job->job_barrier, job->_reserved_01, -+ job->_reserved_02, job->_reserved_03, -+ job->_reserved_04, job->_reserved_05, -+ job->job_index, -+ job->job_dependency_index_1, -+ job->job_dependency_index_2); ++/* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is ++ * used to signify that a buffer was pinned when it was imported. Since the ++ * reference count is limited by the number of atoms that can be submitted at ++ * once there should be no danger of overflowing into this bit. ++ * Stealing the top bit also has the benefit that ++ * current_mapping_usage_count != 0 if and only if the buffer is mapped. ++ */ ++#define PINNED_ON_IMPORT (1<<31) + -+ if (job->job_descriptor_size) -+ dev_dbg(kctx->kbdev->dev, "next = %llx\n", -+ job->next_job._64); -+ else -+ dev_dbg(kctx->kbdev->dev, "next = %x\n", -+ job->next_job._32); -+#endif ++static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc) ++{ ++ KBASE_DEBUG_ASSERT(alloc); ++ /* we only track mappings of NATIVE buffers */ ++ if (alloc->type == KBASE_MEM_TYPE_NATIVE) ++ atomic_inc(&alloc->gpu_mappings); +} + -+static int kbasep_replay_reset_sfbd(struct kbase_context *kctx, -+ u64 fbd_address, u64 tiler_heap_free, -+ u16 hierarchy_mask, u32 default_weight) ++static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc) +{ -+ struct { -+ u32 padding_1[1]; -+ u32 flags; -+ u64 padding_2[2]; -+ u64 heap_free_address; -+ u32 padding[8]; -+ u32 weights[FBD_HIERARCHY_WEIGHTS]; -+ } *fbd_tiler; -+ struct kbase_vmap_struct map; -+ -+ dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address); -+ -+ fbd_tiler = kbase_vmap(kctx, fbd_address + SFBD_TILER_OFFSET, -+ sizeof(*fbd_tiler), &map); -+ if (!fbd_tiler) { -+ dev_err(kctx->kbdev->dev, "kbasep_replay_reset_fbd: failed to map fbd\n"); -+ return -EINVAL; -+ } -+ -+#ifdef CONFIG_MALI_DEBUG -+ dev_dbg(kctx->kbdev->dev, -+ "FBD tiler:\n" -+ "flags = %x\n" -+ "heap_free_address = %llx\n", -+ fbd_tiler->flags, fbd_tiler->heap_free_address); -+#endif -+ if (hierarchy_mask) { -+ u32 weights[HIERARCHY_WEIGHTS]; -+ u16 old_hierarchy_mask = fbd_tiler->flags & -+ FBD_HIERARCHY_MASK_MASK; -+ int i, j = 0; -+ -+ for (i = 0; i < HIERARCHY_WEIGHTS; i++) { -+ if (old_hierarchy_mask & (1 << i)) { -+ KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); -+ weights[i] = fbd_tiler->weights[j++]; -+ } else { -+ weights[i] = default_weight; -+ } -+ } -+ -+ -+ dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x New hierarchy mask=%x\n", -+ old_hierarchy_mask, hierarchy_mask); -+ -+ for (i = 0; i < HIERARCHY_WEIGHTS; i++) -+ dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n", -+ i, weights[i]); -+ -+ j = 0; -+ -+ for (i = 0; i < HIERARCHY_WEIGHTS; i++) { -+ if (hierarchy_mask & (1 << i)) { -+ KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); -+ -+ dev_dbg(kctx->kbdev->dev, " Writing hierarchy level %02d (%08x) to %d\n", -+ i, weights[i], j); -+ -+ fbd_tiler->weights[j++] = weights[i]; -+ } ++ KBASE_DEBUG_ASSERT(alloc); ++ /* we only track mappings of NATIVE buffers */ ++ if (alloc->type == KBASE_MEM_TYPE_NATIVE) ++ if (0 > atomic_dec_return(&alloc->gpu_mappings)) { ++ pr_err("Mismatched %s:\n", __func__); ++ dump_stack(); + } ++} + -+ for (; j < FBD_HIERARCHY_WEIGHTS; j++) -+ fbd_tiler->weights[j] = 0; -+ -+ fbd_tiler->flags = hierarchy_mask | (1 << 16); -+ } -+ -+ fbd_tiler->heap_free_address = tiler_heap_free; -+ -+ dev_dbg(kctx->kbdev->dev, "heap_free_address=%llx flags=%x\n", -+ fbd_tiler->heap_free_address, fbd_tiler->flags); ++void kbase_mem_kref_free(struct kref *kref); + -+ kbase_vunmap(kctx, &map); ++int kbase_mem_init(struct kbase_device *kbdev); ++void kbase_mem_halt(struct kbase_device *kbdev); ++void kbase_mem_term(struct kbase_device *kbdev); + -+ return 0; ++static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc) ++{ ++ kref_get(&alloc->kref); ++ return alloc; +} + -+static int kbasep_replay_reset_mfbd(struct kbase_context *kctx, -+ u64 fbd_address, u64 tiler_heap_free, -+ u16 hierarchy_mask, u32 default_weight) ++static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc) +{ -+ struct kbase_vmap_struct map; -+ struct { -+ u32 padding_0; -+ u32 flags; -+ u64 padding_1[2]; -+ u64 heap_free_address; -+ u64 padding_2; -+ u32 weights[FBD_HIERARCHY_WEIGHTS]; -+ } *fbd_tiler; -+ -+ dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address); ++ kref_put(&alloc->kref, kbase_mem_kref_free); ++ return NULL; ++} + -+ fbd_tiler = kbase_vmap(kctx, fbd_address + MFBD_TILER_OFFSET, -+ sizeof(*fbd_tiler), &map); -+ if (!fbd_tiler) { -+ dev_err(kctx->kbdev->dev, -+ "kbasep_replay_reset_fbd: failed to map fbd\n"); -+ return -EINVAL; -+ } ++/** ++ * A GPU memory region, and attributes for CPU mappings. ++ */ ++struct kbase_va_region { ++ struct rb_node rblink; ++ struct list_head link; + -+#ifdef CONFIG_MALI_DEBUG -+ dev_dbg(kctx->kbdev->dev, "FBD tiler:\n" -+ "flags = %x\n" -+ "heap_free_address = %llx\n", -+ fbd_tiler->flags, -+ fbd_tiler->heap_free_address); -+#endif -+ if (hierarchy_mask) { -+ u32 weights[HIERARCHY_WEIGHTS]; -+ u16 old_hierarchy_mask = (fbd_tiler->flags) & -+ FBD_HIERARCHY_MASK_MASK; -+ int i, j = 0; ++ struct kbase_context *kctx; /* Backlink to base context */ + -+ for (i = 0; i < HIERARCHY_WEIGHTS; i++) { -+ if (old_hierarchy_mask & (1 << i)) { -+ KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); -+ weights[i] = fbd_tiler->weights[j++]; -+ } else { -+ weights[i] = default_weight; -+ } -+ } ++ u64 start_pfn; /* The PFN in GPU space */ ++ size_t nr_pages; + ++/* Free region */ ++#define KBASE_REG_FREE (1ul << 0) ++/* CPU write access */ ++#define KBASE_REG_CPU_WR (1ul << 1) ++/* GPU write access */ ++#define KBASE_REG_GPU_WR (1ul << 2) ++/* No eXecute flag */ ++#define KBASE_REG_GPU_NX (1ul << 3) ++/* Is CPU cached? */ ++#define KBASE_REG_CPU_CACHED (1ul << 4) ++/* Is GPU cached? */ ++#define KBASE_REG_GPU_CACHED (1ul << 5) + -+ dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x New hierarchy mask=%x\n", -+ old_hierarchy_mask, hierarchy_mask); ++#define KBASE_REG_GROWABLE (1ul << 6) ++/* Can grow on pf? */ ++#define KBASE_REG_PF_GROW (1ul << 7) + -+ for (i = 0; i < HIERARCHY_WEIGHTS; i++) -+ dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n", -+ i, weights[i]); ++/* VA managed by us */ ++#define KBASE_REG_CUSTOM_VA (1ul << 8) + -+ j = 0; ++/* inner shareable coherency */ ++#define KBASE_REG_SHARE_IN (1ul << 9) ++/* inner & outer shareable coherency */ ++#define KBASE_REG_SHARE_BOTH (1ul << 10) + -+ for (i = 0; i < HIERARCHY_WEIGHTS; i++) { -+ if (hierarchy_mask & (1 << i)) { -+ KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); ++/* Space for 4 different zones */ ++#define KBASE_REG_ZONE_MASK (3ul << 11) ++#define KBASE_REG_ZONE(x) (((x) & 3) << 11) + -+ dev_dbg(kctx->kbdev->dev, -+ " Writing hierarchy level %02d (%08x) to %d\n", -+ i, weights[i], j); ++/* GPU read access */ ++#define KBASE_REG_GPU_RD (1ul<<13) ++/* CPU read access */ ++#define KBASE_REG_CPU_RD (1ul<<14) + -+ fbd_tiler->weights[j++] = weights[i]; -+ } -+ } ++/* Index of chosen MEMATTR for this region (0..7) */ ++#define KBASE_REG_MEMATTR_MASK (7ul << 16) ++#define KBASE_REG_MEMATTR_INDEX(x) (((x) & 7) << 16) ++#define KBASE_REG_MEMATTR_VALUE(x) (((x) & KBASE_REG_MEMATTR_MASK) >> 16) + -+ for (; j < FBD_HIERARCHY_WEIGHTS; j++) -+ fbd_tiler->weights[j] = 0; ++#define KBASE_REG_SECURE (1ul << 19) + -+ fbd_tiler->flags = hierarchy_mask | (1 << 16); -+ } ++#define KBASE_REG_DONT_NEED (1ul << 20) + -+ fbd_tiler->heap_free_address = tiler_heap_free; ++/* Imported buffer is padded? */ ++#define KBASE_REG_IMPORT_PAD (1ul << 21) + -+ kbase_vunmap(kctx, &map); ++#define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) + -+ return 0; -+} ++/* only used with 32-bit clients */ ++/* ++ * On a 32bit platform, custom VA should be wired from (4GB + shader region) ++ * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface ++ * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference). ++ * So we put the default limit to the maximum possible on Linux and shrink ++ * it down, if required by the GPU, during initialization. ++ */ + -+/** -+ * @brief Reset the status of an FBD pointed to by a tiler job -+ * -+ * This performs two functions : -+ * - Set the hierarchy mask -+ * - Reset the tiler free heap address -+ * -+ * @param[in] kctx Context pointer -+ * @param[in] job_header Address of job header to reset. -+ * @param[in] tiler_heap_free The value to reset Tiler Heap Free to -+ * @param[in] hierarchy_mask The hierarchy mask to use -+ * @param[in] default_weight Default hierarchy weight to write when no other -+ * weight is given in the FBD -+ * @param[in] job_64 true if this job is using 64-bit -+ * descriptors -+ * -+ * @return 0 on success, error code on failure ++/* ++ * Dedicated 16MB region for shader code: ++ * VA range 0x101000000-0x102000000 + */ -+static int kbasep_replay_reset_tiler_job(struct kbase_context *kctx, -+ u64 job_header, u64 tiler_heap_free, -+ u16 hierarchy_mask, u32 default_weight, bool job_64) -+{ -+ struct kbase_vmap_struct map; -+ u64 fbd_address; ++#define KBASE_REG_ZONE_EXEC KBASE_REG_ZONE(1) ++#define KBASE_REG_ZONE_EXEC_BASE (0x101000000ULL >> PAGE_SHIFT) ++#define KBASE_REG_ZONE_EXEC_SIZE ((16ULL * 1024 * 1024) >> PAGE_SHIFT) + -+ if (job_64) { -+ u64 *job_ext; ++#define KBASE_REG_ZONE_CUSTOM_VA KBASE_REG_ZONE(2) ++#define KBASE_REG_ZONE_CUSTOM_VA_BASE (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) /* Starting after KBASE_REG_ZONE_EXEC */ ++#define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) ++/* end 32-bit clients only */ + -+ job_ext = kbase_vmap(kctx, -+ job_header + JOB_HEADER_64_FBD_OFFSET, -+ sizeof(*job_ext), &map); ++ unsigned long flags; + -+ if (!job_ext) { -+ dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n"); -+ return -EINVAL; -+ } ++ size_t extent; /* nr of pages alloc'd on PF */ + -+ fbd_address = *job_ext; ++ struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */ ++ struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */ + -+ kbase_vunmap(kctx, &map); -+ } else { -+ u32 *job_ext; ++ /* non-NULL if this memory object is a kds_resource */ ++ struct kds_resource *kds_res; + -+ job_ext = kbase_vmap(kctx, -+ job_header + JOB_HEADER_32_FBD_OFFSET, -+ sizeof(*job_ext), &map); ++ /* List head used to store the region in the JIT allocation pool */ ++ struct list_head jit_node; ++}; + -+ if (!job_ext) { -+ dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n"); -+ return -EINVAL; -+ } ++/* Common functions */ ++static inline phys_addr_t *kbase_get_cpu_phy_pages(struct kbase_va_region *reg) ++{ ++ KBASE_DEBUG_ASSERT(reg); ++ KBASE_DEBUG_ASSERT(reg->cpu_alloc); ++ KBASE_DEBUG_ASSERT(reg->gpu_alloc); ++ KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); + -+ fbd_address = *job_ext; ++ return reg->cpu_alloc->pages; ++} + -+ kbase_vunmap(kctx, &map); -+ } ++static inline phys_addr_t *kbase_get_gpu_phy_pages(struct kbase_va_region *reg) ++{ ++ KBASE_DEBUG_ASSERT(reg); ++ KBASE_DEBUG_ASSERT(reg->cpu_alloc); ++ KBASE_DEBUG_ASSERT(reg->gpu_alloc); ++ KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); + -+ if (fbd_address & FBD_TYPE) { -+ return kbasep_replay_reset_mfbd(kctx, -+ fbd_address & FBD_POINTER_MASK, -+ tiler_heap_free, -+ hierarchy_mask, -+ default_weight); -+ } else { -+ return kbasep_replay_reset_sfbd(kctx, -+ fbd_address & FBD_POINTER_MASK, -+ tiler_heap_free, -+ hierarchy_mask, -+ default_weight); -+ } ++ return reg->gpu_alloc->pages; +} + -+/** -+ * @brief Reset the status of a job -+ * -+ * This performs the following functions : -+ * -+ * - Reset the Job Status field of each job to NOT_STARTED. -+ * - Set the Job Type field of any Vertex Jobs to Null Job. -+ * - For any jobs using an FBD, set the Tiler Heap Free field to the value of -+ * the tiler_heap_free parameter, and set the hierarchy level mask to the -+ * hier_mask parameter. -+ * - Offset HW dependencies by the hw_job_id_offset parameter -+ * - Set the Perform Job Barrier flag if this job is the first in the chain -+ * - Read the address of the next job header -+ * -+ * @param[in] kctx Context pointer -+ * @param[in,out] job_header Address of job header to reset. Set to address -+ * of next job header on exit. -+ * @param[in] prev_jc Previous job chain to link to, if this job is -+ * the last in the chain. -+ * @param[in] hw_job_id_offset Offset for HW job IDs -+ * @param[in] tiler_heap_free The value to reset Tiler Heap Free to -+ * @param[in] hierarchy_mask The hierarchy mask to use -+ * @param[in] default_weight Default hierarchy weight to write when no other -+ * weight is given in the FBD -+ * @param[in] first_in_chain true if this job is the first in the chain -+ * @param[in] fragment_chain true if this job is in the fragment chain -+ * -+ * @return 0 on success, error code on failure -+ */ -+static int kbasep_replay_reset_job(struct kbase_context *kctx, -+ u64 *job_header, u64 prev_jc, -+ u64 tiler_heap_free, u16 hierarchy_mask, -+ u32 default_weight, u16 hw_job_id_offset, -+ bool first_in_chain, bool fragment_chain) ++static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg) +{ -+ struct fragment_job *frag_job; -+ struct job_descriptor_header *job; -+ u64 new_job_header; -+ struct kbase_vmap_struct map; ++ KBASE_DEBUG_ASSERT(reg); ++ /* if no alloc object the backed size naturally is 0 */ ++ if (!reg->cpu_alloc) ++ return 0; + -+ frag_job = kbase_vmap(kctx, *job_header, sizeof(*frag_job), &map); -+ if (!frag_job) { -+ dev_err(kctx->kbdev->dev, -+ "kbasep_replay_parse_jc: failed to map jc\n"); -+ return -EINVAL; -+ } -+ job = &frag_job->header; ++ KBASE_DEBUG_ASSERT(reg->cpu_alloc); ++ KBASE_DEBUG_ASSERT(reg->gpu_alloc); ++ KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); + -+ dump_job_head(kctx, "Job header:", job); ++ return reg->cpu_alloc->nents; ++} + -+ if (job->exception_status == JOB_NOT_STARTED && !fragment_chain) { -+ dev_err(kctx->kbdev->dev, "Job already not started\n"); -+ goto out_unmap; -+ } -+ job->exception_status = JOB_NOT_STARTED; ++#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */ + -+ if (job->job_type == JOB_TYPE_VERTEX) -+ job->job_type = JOB_TYPE_NULL; ++static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type) ++{ ++ struct kbase_mem_phy_alloc *alloc; ++ size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages; ++ size_t per_page_size = sizeof(*alloc->pages); + -+ if (job->job_type == JOB_TYPE_FUSED) { -+ dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n"); -+ goto out_unmap; ++ /* Imported pages may have page private data already in use */ ++ if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { ++ alloc_size += nr_pages * ++ sizeof(*alloc->imported.user_buf.dma_addrs); ++ per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs); + } + -+ if (first_in_chain) -+ job->job_barrier = 1; -+ -+ if ((job->job_dependency_index_1 + hw_job_id_offset) > -+ JOB_HEADER_ID_MAX || -+ (job->job_dependency_index_2 + hw_job_id_offset) > -+ JOB_HEADER_ID_MAX || -+ (job->job_index + hw_job_id_offset) > JOB_HEADER_ID_MAX) { -+ dev_err(kctx->kbdev->dev, -+ "Job indicies/dependencies out of valid range\n"); -+ goto out_unmap; -+ } ++ /* ++ * Prevent nr_pages*per_page_size + sizeof(*alloc) from ++ * wrapping around. ++ */ ++ if (nr_pages > ((((size_t) -1) - sizeof(*alloc)) ++ / per_page_size)) ++ return ERR_PTR(-ENOMEM); + -+ if (job->job_dependency_index_1) -+ job->job_dependency_index_1 += hw_job_id_offset; -+ if (job->job_dependency_index_2) -+ job->job_dependency_index_2 += hw_job_id_offset; ++ /* Allocate based on the size to reduce internal fragmentation of vmem */ ++ if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD) ++ alloc = vzalloc(alloc_size); ++ else ++ alloc = kzalloc(alloc_size, GFP_KERNEL); + -+ job->job_index += hw_job_id_offset; ++ if (!alloc) ++ return ERR_PTR(-ENOMEM); + -+ if (job->job_descriptor_size) { -+ new_job_header = job->next_job._64; -+ if (!job->next_job._64) -+ job->next_job._64 = prev_jc; -+ } else { -+ new_job_header = job->next_job._32; -+ if (!job->next_job._32) -+ job->next_job._32 = prev_jc; -+ } -+ dump_job_head(kctx, "Updated to:", job); ++ /* Store allocation method */ ++ if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD) ++ alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE; + -+ if (job->job_type == JOB_TYPE_TILER) { -+ bool job_64 = job->job_descriptor_size != 0; ++ kref_init(&alloc->kref); ++ atomic_set(&alloc->gpu_mappings, 0); ++ alloc->nents = 0; ++ alloc->pages = (void *)(alloc + 1); ++ INIT_LIST_HEAD(&alloc->mappings); ++ alloc->type = type; + -+ if (kbasep_replay_reset_tiler_job(kctx, *job_header, -+ tiler_heap_free, hierarchy_mask, -+ default_weight, job_64) != 0) -+ goto out_unmap; ++ if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) ++ alloc->imported.user_buf.dma_addrs = ++ (void *) (alloc->pages + nr_pages); + -+ } else if (job->job_type == JOB_TYPE_FRAGMENT) { -+ u64 fbd_address; ++ return alloc; ++} + -+ if (job->job_descriptor_size) -+ fbd_address = frag_job->fragment_fbd._64; -+ else -+ fbd_address = (u64)frag_job->fragment_fbd._32; ++static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, ++ struct kbase_context *kctx) ++{ ++ KBASE_DEBUG_ASSERT(reg); ++ KBASE_DEBUG_ASSERT(!reg->cpu_alloc); ++ KBASE_DEBUG_ASSERT(!reg->gpu_alloc); ++ KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE); + -+ if (fbd_address & FBD_TYPE) { -+ if (kbasep_replay_reset_mfbd(kctx, -+ fbd_address & FBD_POINTER_MASK, -+ tiler_heap_free, -+ hierarchy_mask, -+ default_weight) != 0) -+ goto out_unmap; -+ } else { -+ if (kbasep_replay_reset_sfbd(kctx, -+ fbd_address & FBD_POINTER_MASK, -+ tiler_heap_free, -+ hierarchy_mask, -+ default_weight) != 0) -+ goto out_unmap; -+ } ++ reg->cpu_alloc = kbase_alloc_create(reg->nr_pages, ++ KBASE_MEM_TYPE_NATIVE); ++ if (IS_ERR(reg->cpu_alloc)) ++ return PTR_ERR(reg->cpu_alloc); ++ else if (!reg->cpu_alloc) ++ return -ENOMEM; ++ reg->cpu_alloc->imported.kctx = kctx; ++ INIT_LIST_HEAD(®->cpu_alloc->evict_node); ++ if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE) ++ && (reg->flags & KBASE_REG_CPU_CACHED)) { ++ reg->gpu_alloc = kbase_alloc_create(reg->nr_pages, ++ KBASE_MEM_TYPE_NATIVE); ++ reg->gpu_alloc->imported.kctx = kctx; ++ INIT_LIST_HEAD(®->gpu_alloc->evict_node); ++ } else { ++ reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); + } + -+ kbase_vunmap(kctx, &map); -+ -+ *job_header = new_job_header; -+ ++ INIT_LIST_HEAD(®->jit_node); ++ reg->flags &= ~KBASE_REG_FREE; + return 0; -+ -+out_unmap: -+ kbase_vunmap(kctx, &map); -+ return -EINVAL; +} + -+/** -+ * @brief Find the highest job ID in a job chain -+ * -+ * @param[in] kctx Context pointer -+ * @param[in] jc Job chain start address -+ * @param[out] hw_job_id Highest job ID in chain -+ * -+ * @return 0 on success, error code on failure -+ */ -+static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx, -+ u64 jc, u16 *hw_job_id) ++static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages) +{ -+ while (jc) { -+ struct job_descriptor_header *job; -+ struct kbase_vmap_struct map; -+ -+ dev_dbg(kctx->kbdev->dev, -+ "kbasep_replay_find_hw_job_id: parsing jc=%llx\n", jc); -+ -+ job = kbase_vmap(kctx, jc, sizeof(*job), &map); -+ if (!job) { -+ dev_err(kctx->kbdev->dev, "failed to map jc\n"); -+ -+ return -EINVAL; -+ } -+ -+ if (job->job_index > *hw_job_id) -+ *hw_job_id = job->job_index; ++ int new_val = atomic_add_return(num_pages, used_pages); ++#if defined(CONFIG_MALI_GATOR_SUPPORT) ++ kbase_trace_mali_total_alloc_pages_change((long long int)new_val); ++#endif ++ return new_val; ++} + -+ if (job->job_descriptor_size) -+ jc = job->next_job._64; -+ else -+ jc = job->next_job._32; ++static inline int kbase_atomic_sub_pages(int num_pages, atomic_t *used_pages) ++{ ++ int new_val = atomic_sub_return(num_pages, used_pages); ++#if defined(CONFIG_MALI_GATOR_SUPPORT) ++ kbase_trace_mali_total_alloc_pages_change((long long int)new_val); ++#endif ++ return new_val; ++} + -+ kbase_vunmap(kctx, &map); -+ } ++/* ++ * Max size for kbdev memory pool (in pages) ++ */ ++#define KBASE_MEM_POOL_MAX_SIZE_KBDEV (SZ_64M >> PAGE_SHIFT) + -+ return 0; -+} ++/* ++ * Max size for kctx memory pool (in pages) ++ */ ++#define KBASE_MEM_POOL_MAX_SIZE_KCTX (SZ_64M >> PAGE_SHIFT) + +/** -+ * @brief Reset the status of a number of jobs ++ * kbase_mem_pool_init - Create a memory pool for a kbase device ++ * @pool: Memory pool to initialize ++ * @max_size: Maximum number of free pages the pool can hold ++ * @kbdev: Kbase device where memory is used ++ * @next_pool: Pointer to the next pool or NULL. + * -+ * This function walks the provided job chain, and calls -+ * kbasep_replay_reset_job for each job. It also links the job chain to the -+ * provided previous job chain. ++ * Allocations from @pool are in whole pages. Each @pool has a free list where ++ * pages can be quickly allocated from. The free list is initially empty and ++ * filled whenever pages are freed back to the pool. The number of free pages ++ * in the pool will in general not exceed @max_size, but the pool may in ++ * certain corner cases grow above @max_size. + * -+ * The function will fail if any of the jobs passed already have status of -+ * NOT_STARTED. ++ * If @next_pool is not NULL, we will allocate from @next_pool before going to ++ * the kernel allocator. Similarily pages can spill over to @next_pool when ++ * @pool is full. Pages are zeroed before they spill over to another pool, to ++ * prevent leaking information between applications. + * -+ * @param[in] kctx Context pointer -+ * @param[in] jc Job chain to be processed -+ * @param[in] prev_jc Job chain to be added to. May be NULL -+ * @param[in] tiler_heap_free The value to reset Tiler Heap Free to -+ * @param[in] hierarchy_mask The hierarchy mask to use -+ * @param[in] default_weight Default hierarchy weight to write when no other -+ * weight is given in the FBD -+ * @param[in] hw_job_id_offset Offset for HW job IDs -+ * @param[in] fragment_chain true if this chain is the fragment chain ++ * A shrinker is registered so that Linux mm can reclaim pages from the pool as ++ * needed. + * -+ * @return 0 on success, error code otherwise ++ * Return: 0 on success, negative -errno on error + */ -+static int kbasep_replay_parse_jc(struct kbase_context *kctx, -+ u64 jc, u64 prev_jc, -+ u64 tiler_heap_free, u16 hierarchy_mask, -+ u32 default_weight, u16 hw_job_id_offset, -+ bool fragment_chain) -+{ -+ bool first_in_chain = true; -+ int nr_jobs = 0; -+ -+ dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: jc=%llx hw_job_id=%x\n", -+ jc, hw_job_id_offset); -+ -+ while (jc) { -+ dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: parsing jc=%llx\n", jc); -+ -+ if (kbasep_replay_reset_job(kctx, &jc, prev_jc, -+ tiler_heap_free, hierarchy_mask, -+ default_weight, hw_job_id_offset, -+ first_in_chain, fragment_chain) != 0) -+ return -EINVAL; -+ -+ first_in_chain = false; -+ -+ nr_jobs++; -+ if (fragment_chain && -+ nr_jobs >= BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT) { -+ dev_err(kctx->kbdev->dev, -+ "Exceeded maximum number of jobs in fragment chain\n"); -+ return -EINVAL; -+ } -+ } -+ -+ return 0; -+} ++int kbase_mem_pool_init(struct kbase_mem_pool *pool, ++ size_t max_size, ++ struct kbase_device *kbdev, ++ struct kbase_mem_pool *next_pool); + +/** -+ * @brief Reset the status of a replay job, and set up dependencies -+ * -+ * This performs the actions to allow the replay job to be re-run following -+ * completion of the passed dependency. ++ * kbase_mem_pool_term - Destroy a memory pool ++ * @pool: Memory pool to destroy + * -+ * @param[in] katom The atom to be reset -+ * @param[in] dep_atom The dependency to be attached to the atom ++ * Pages in the pool will spill over to @next_pool (if available) or freed to ++ * the kernel. + */ -+static void kbasep_replay_reset_softjob(struct kbase_jd_atom *katom, -+ struct kbase_jd_atom *dep_atom) -+{ -+ katom->status = KBASE_JD_ATOM_STATE_QUEUED; -+ kbase_jd_katom_dep_set(&katom->dep[0], dep_atom, BASE_JD_DEP_TYPE_DATA); -+ list_add_tail(&katom->dep_item[0], &dep_atom->dep_head[0]); -+} ++void kbase_mem_pool_term(struct kbase_mem_pool *pool); + +/** -+ * @brief Allocate an unused katom -+ * -+ * This will search the provided context for an unused katom, and will mark it -+ * as KBASE_JD_ATOM_STATE_QUEUED. ++ * kbase_mem_pool_alloc - Allocate a page from memory pool ++ * @pool: Memory pool to allocate from + * -+ * If no atoms are available then the function will fail. ++ * Allocations from the pool are made as follows: ++ * 1. If there are free pages in the pool, allocate a page from @pool. ++ * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page ++ * from @next_pool. ++ * 3. Return NULL if no memory in the pool + * -+ * @param[in] kctx Context pointer -+ * @return An atom ID, or -1 on failure ++ * Return: Pointer to allocated page, or NULL if allocation failed. + */ -+static int kbasep_allocate_katom(struct kbase_context *kctx) -+{ -+ struct kbase_jd_context *jctx = &kctx->jctx; -+ int i; -+ -+ for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) { -+ if (jctx->atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) { -+ jctx->atoms[i].status = KBASE_JD_ATOM_STATE_QUEUED; -+ dev_dbg(kctx->kbdev->dev, -+ "kbasep_allocate_katom: Allocated atom %d\n", -+ i); -+ return i; -+ } -+ } -+ -+ return -1; -+} ++struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool); + +/** -+ * @brief Release a katom ++ * kbase_mem_pool_free - Free a page to memory pool ++ * @pool: Memory pool where page should be freed ++ * @page: Page to free to the pool ++ * @dirty: Whether some of the page may be dirty in the cache. + * -+ * This will mark the provided atom as available, and remove any dependencies. ++ * Pages are freed to the pool as follows: ++ * 1. If @pool is not full, add @page to @pool. ++ * 2. Otherwise, if @next_pool is not NULL and not full, add @page to ++ * @next_pool. ++ * 3. Finally, free @page to the kernel. ++ */ ++void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page, ++ bool dirty); ++ ++/** ++ * kbase_mem_pool_alloc_pages - Allocate pages from memory pool ++ * @pool: Memory pool to allocate from ++ * @nr_pages: Number of pages to allocate ++ * @pages: Pointer to array where the physical address of the allocated ++ * pages will be stored. + * -+ * For use on error path. ++ * Like kbase_mem_pool_alloc() but optimized for allocating many pages. + * -+ * @param[in] kctx Context pointer -+ * @param[in] atom_id ID of atom to release ++ * Return: 0 on success, negative -errno on error + */ -+static void kbasep_release_katom(struct kbase_context *kctx, int atom_id) -+{ -+ struct kbase_jd_context *jctx = &kctx->jctx; -+ -+ dev_dbg(kctx->kbdev->dev, "kbasep_release_katom: Released atom %d\n", -+ atom_id); -+ -+ while (!list_empty(&jctx->atoms[atom_id].dep_head[0])) -+ list_del(jctx->atoms[atom_id].dep_head[0].next); ++int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages, ++ phys_addr_t *pages); + -+ while (!list_empty(&jctx->atoms[atom_id].dep_head[1])) -+ list_del(jctx->atoms[atom_id].dep_head[1].next); ++/** ++ * kbase_mem_pool_free_pages - Free pages to memory pool ++ * @pool: Memory pool where pages should be freed ++ * @nr_pages: Number of pages to free ++ * @pages: Pointer to array holding the physical addresses of the pages to ++ * free. ++ * @dirty: Whether any pages may be dirty in the cache. ++ * @reclaimed: Whether the pages where reclaimable and thus should bypass ++ * the pool and go straight to the kernel. ++ * ++ * Like kbase_mem_pool_free() but optimized for freeing many pages. ++ */ ++void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, ++ phys_addr_t *pages, bool dirty, bool reclaimed); + -+ jctx->atoms[atom_id].status = KBASE_JD_ATOM_STATE_UNUSED; ++/** ++ * kbase_mem_pool_size - Get number of free pages in memory pool ++ * @pool: Memory pool to inspect ++ * ++ * Note: the size of the pool may in certain corner cases exceed @max_size! ++ * ++ * Return: Number of free pages in the pool ++ */ ++static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool) ++{ ++ return READ_ONCE(pool->cur_size); +} + -+static void kbasep_replay_create_atom(struct kbase_context *kctx, -+ struct base_jd_atom_v2 *atom, -+ int atom_nr, -+ base_jd_prio prio) ++/** ++ * kbase_mem_pool_max_size - Get maximum number of free pages in memory pool ++ * @pool: Memory pool to inspect ++ * ++ * Return: Maximum number of free pages in the pool ++ */ ++static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool) +{ -+ atom->nr_extres = 0; -+ atom->extres_list.value = NULL; -+ atom->device_nr = 0; -+ atom->prio = prio; -+ atom->atom_number = atom_nr; -+ -+ base_jd_atom_dep_set(&atom->pre_dep[0], 0 , BASE_JD_DEP_TYPE_INVALID); -+ base_jd_atom_dep_set(&atom->pre_dep[1], 0 , BASE_JD_DEP_TYPE_INVALID); -+ -+ atom->udata.blob[0] = 0; -+ atom->udata.blob[1] = 0; ++ return pool->max_size; +} + ++ +/** -+ * @brief Create two atoms for the purpose of replaying jobs ++ * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool ++ * @pool: Memory pool to inspect ++ * @max_size: Maximum number of free pages the pool can hold + * -+ * Two atoms are allocated and created. The jc pointer is not set at this -+ * stage. The second atom has a dependency on the first. The remaining fields -+ * are set up as follows : ++ * If @max_size is reduced, the pool will be shrunk to adhere to the new limit. ++ * For details see kbase_mem_pool_shrink(). ++ */ ++void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size); ++ ++/** ++ * kbase_mem_pool_grow - Grow the pool ++ * @pool: Memory pool to grow ++ * @nr_to_grow: Number of pages to add to the pool + * -+ * - No external resources. Any required external resources will be held by the -+ * replay atom. -+ * - device_nr is set to 0. This is not relevant as -+ * BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set. -+ * - Priority is inherited from the replay job. ++ * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to ++ * become larger than the maximum size specified. + * -+ * @param[out] t_atom Atom to use for tiler jobs -+ * @param[out] f_atom Atom to use for fragment jobs -+ * @param[in] prio Priority of new atom (inherited from replay soft -+ * job) -+ * @return 0 on success, error code on failure ++ * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages + */ -+static int kbasep_replay_create_atoms(struct kbase_context *kctx, -+ struct base_jd_atom_v2 *t_atom, -+ struct base_jd_atom_v2 *f_atom, -+ base_jd_prio prio) -+{ -+ int t_atom_nr, f_atom_nr; ++int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow); + -+ t_atom_nr = kbasep_allocate_katom(kctx); -+ if (t_atom_nr < 0) { -+ dev_err(kctx->kbdev->dev, "Failed to allocate katom\n"); -+ return -EINVAL; -+ } ++/** ++ * kbase_mem_pool_trim - Grow or shrink the pool to a new size ++ * @pool: Memory pool to trim ++ * @new_size: New number of pages in the pool ++ * ++ * If @new_size > @cur_size, fill the pool with new pages from the kernel, but ++ * not above the max_size for the pool. ++ * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel. ++ */ ++void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size); + -+ f_atom_nr = kbasep_allocate_katom(kctx); -+ if (f_atom_nr < 0) { -+ dev_err(kctx->kbdev->dev, "Failed to allocate katom\n"); -+ kbasep_release_katom(kctx, t_atom_nr); -+ return -EINVAL; -+ } ++/* ++ * kbase_mem_alloc_page - Allocate a new page for a device ++ * @kbdev: The kbase device ++ * ++ * Most uses should use kbase_mem_pool_alloc to allocate a page. However that ++ * function can fail in the event the pool is empty. ++ * ++ * Return: A new page or NULL if no memory ++ */ ++struct page *kbase_mem_alloc_page(struct kbase_device *kbdev); + -+ kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio); -+ kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio); ++int kbase_region_tracker_init(struct kbase_context *kctx); ++int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages); ++void kbase_region_tracker_term(struct kbase_context *kctx); + -+ base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr , BASE_JD_DEP_TYPE_DATA); ++struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr); + -+ return 0; -+} ++/** ++ * @brief Check that a pointer is actually a valid region. ++ * ++ * Must be called with context lock held. ++ */ ++struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr); + -+#ifdef CONFIG_MALI_DEBUG -+static void payload_dump(struct kbase_context *kctx, base_jd_replay_payload *payload) -+{ -+ u64 next; ++struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone); ++void kbase_free_alloced_region(struct kbase_va_region *reg); ++int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); + -+ dev_dbg(kctx->kbdev->dev, "Tiler jc list :\n"); -+ next = payload->tiler_jc_list; ++bool kbase_check_alloc_flags(unsigned long flags); ++bool kbase_check_import_flags(unsigned long flags); + -+ while (next) { -+ struct kbase_vmap_struct map; -+ base_jd_replay_jc *jc_struct; ++/** ++ * kbase_update_region_flags - Convert user space flags to kernel region flags ++ * ++ * @kctx: kbase context ++ * @reg: The region to update the flags on ++ * @flags: The flags passed from user space ++ * ++ * The user space flag BASE_MEM_COHERENT_SYSTEM_REQUIRED will be rejected and ++ * this function will fail if the system does not support system coherency. ++ * ++ * Return: 0 if successful, -EINVAL if the flags are not supported ++ */ ++int kbase_update_region_flags(struct kbase_context *kctx, ++ struct kbase_va_region *reg, unsigned long flags); + -+ jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &map); ++void kbase_gpu_vm_lock(struct kbase_context *kctx); ++void kbase_gpu_vm_unlock(struct kbase_context *kctx); + -+ if (!jc_struct) -+ return; ++int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size); + -+ dev_dbg(kctx->kbdev->dev, "* jc_struct=%p jc=%llx next=%llx\n", -+ jc_struct, jc_struct->jc, jc_struct->next); ++int kbase_mmu_init(struct kbase_context *kctx); ++void kbase_mmu_term(struct kbase_context *kctx); + -+ next = jc_struct->next; ++phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx); ++void kbase_mmu_free_pgd(struct kbase_context *kctx); ++int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, ++ phys_addr_t *phys, size_t nr, ++ unsigned long flags); ++int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, ++ phys_addr_t *phys, size_t nr, ++ unsigned long flags); ++int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, ++ phys_addr_t phys, size_t nr, ++ unsigned long flags); + -+ kbase_vunmap(kctx, &map); -+ } -+} -+#endif ++int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr); ++int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags); + +/** -+ * @brief Parse a base_jd_replay_payload provided by userspace -+ * -+ * This will read the payload from userspace, and parse the job chains. ++ * @brief Register region and map it on the GPU. + * -+ * @param[in] kctx Context pointer -+ * @param[in] replay_atom Replay soft job atom -+ * @param[in] t_atom Atom to use for tiler jobs -+ * @param[in] f_atom Atom to use for fragment jobs -+ * @return 0 on success, error code on failure ++ * Call kbase_add_va_region() and map the region on the GPU. + */ -+static int kbasep_replay_parse_payload(struct kbase_context *kctx, -+ struct kbase_jd_atom *replay_atom, -+ struct base_jd_atom_v2 *t_atom, -+ struct base_jd_atom_v2 *f_atom) -+{ -+ base_jd_replay_payload *payload = NULL; -+ u64 next; -+ u64 prev_jc = 0; -+ u16 hw_job_id_offset = 0; -+ int ret = -EINVAL; -+ struct kbase_vmap_struct map; -+ -+ dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: replay_atom->jc = %llx sizeof(payload) = %zu\n", -+ replay_atom->jc, sizeof(payload)); -+ -+ payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map); -+ if (!payload) { -+ dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n"); -+ return -EINVAL; -+ } -+ -+#ifdef BASE_LEGACY_UK10_2_SUPPORT -+ if (KBASE_API_VERSION(10, 3) > replay_atom->kctx->api_version) { -+ base_jd_replay_payload_uk10_2 *payload_uk10_2; -+ u16 tiler_core_req; -+ u16 fragment_core_req; ++int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); + -+ payload_uk10_2 = (base_jd_replay_payload_uk10_2 *) payload; -+ memcpy(&tiler_core_req, &payload_uk10_2->tiler_core_req, -+ sizeof(tiler_core_req)); -+ memcpy(&fragment_core_req, &payload_uk10_2->fragment_core_req, -+ sizeof(fragment_core_req)); -+ payload->tiler_core_req = (u32)(tiler_core_req & 0x7fff); -+ payload->fragment_core_req = (u32)(fragment_core_req & 0x7fff); -+ } -+#endif /* BASE_LEGACY_UK10_2_SUPPORT */ ++/** ++ * @brief Remove the region from the GPU and unregister it. ++ * ++ * Must be called with context lock held. ++ */ ++int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg); + -+#ifdef CONFIG_MALI_DEBUG -+ dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload); -+ dev_dbg(kctx->kbdev->dev, "Payload structure:\n" -+ "tiler_jc_list = %llx\n" -+ "fragment_jc = %llx\n" -+ "tiler_heap_free = %llx\n" -+ "fragment_hierarchy_mask = %x\n" -+ "tiler_hierarchy_mask = %x\n" -+ "hierarchy_default_weight = %x\n" -+ "tiler_core_req = %x\n" -+ "fragment_core_req = %x\n", -+ payload->tiler_jc_list, -+ payload->fragment_jc, -+ payload->tiler_heap_free, -+ payload->fragment_hierarchy_mask, -+ payload->tiler_hierarchy_mask, -+ payload->hierarchy_default_weight, -+ payload->tiler_core_req, -+ payload->fragment_core_req); -+ payload_dump(kctx, payload); -+#endif -+ t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER; -+ f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER; ++/** ++ * The caller has the following locking conditions: ++ * - It must hold kbase_device->mmu_hw_mutex ++ * - It must hold the hwaccess_lock ++ */ ++void kbase_mmu_update(struct kbase_context *kctx); + -+ /* Sanity check core requirements*/ -+ if ((t_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_T || -+ (f_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_FS || -+ t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES || -+ f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { ++/** ++ * kbase_mmu_disable() - Disable the MMU for a previously active kbase context. ++ * @kctx: Kbase context ++ * ++ * Disable and perform the required cache maintenance to remove the all ++ * data from provided kbase context from the GPU caches. ++ * ++ * The caller has the following locking conditions: ++ * - It must hold kbase_device->mmu_hw_mutex ++ * - It must hold the hwaccess_lock ++ */ ++void kbase_mmu_disable(struct kbase_context *kctx); + -+ int t_atom_type = t_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP; -+ int f_atom_type = f_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC; -+ int t_has_ex_res = t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES; -+ int f_has_ex_res = f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES; ++/** ++ * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified ++ * address space. ++ * @kbdev: Kbase device ++ * @as_nr: The address space number to set to unmapped. ++ * ++ * This function must only be called during reset/power-up and it used to ++ * ensure the registers are in a known state. ++ * ++ * The caller must hold kbdev->mmu_hw_mutex. ++ */ ++void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr); + -+ if (t_atom_type != BASE_JD_REQ_T) { -+ dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom not a tiler job. Was: 0x%x\n Expected: 0x%x", -+ t_atom_type, BASE_JD_REQ_T); -+ } -+ if (f_atom_type != BASE_JD_REQ_FS) { -+ dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom not a fragment shader. Was 0x%x Expected: 0x%x\n", -+ f_atom_type, BASE_JD_REQ_FS); -+ } -+ if (t_has_ex_res) { -+ dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom has external resources.\n"); -+ } -+ if (f_has_ex_res) { -+ dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom has external resources.\n"); -+ } ++void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); + -+ goto out; -+ } ++/** Dump the MMU tables to a buffer ++ * ++ * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the ++ * buffer is too small then the return value will be NULL. ++ * ++ * The GPU vm lock must be held when calling this function. ++ * ++ * The buffer returned should be freed with @ref vfree when it is no longer required. ++ * ++ * @param[in] kctx The kbase context to dump ++ * @param[in] nr_pages The number of pages to allocate for the buffer. ++ * ++ * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too ++ * small) ++ */ ++void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages); + -+ /* Process tiler job chains */ -+ next = payload->tiler_jc_list; -+ if (!next) { -+ dev_err(kctx->kbdev->dev, "Invalid tiler JC list\n"); -+ goto out; -+ } ++/** ++ * kbase_sync_now - Perform cache maintenance on a memory region ++ * ++ * @kctx: The kbase context of the region ++ * @sset: A syncset structure describing the region and direction of the ++ * synchronisation required ++ * ++ * Return: 0 on success or error code ++ */ ++int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset); ++void kbase_sync_single(struct kbase_context *kctx, phys_addr_t cpu_pa, ++ phys_addr_t gpu_pa, off_t offset, size_t size, ++ enum kbase_sync_type sync_fn); ++void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr); ++void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr); + -+ while (next) { -+ base_jd_replay_jc *jc_struct; -+ struct kbase_vmap_struct jc_map; -+ u64 jc; ++/* OS specific functions */ ++int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr); ++int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg); ++void kbase_os_mem_map_lock(struct kbase_context *kctx); ++void kbase_os_mem_map_unlock(struct kbase_context *kctx); + -+ jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &jc_map); ++/** ++ * @brief Update the memory allocation counters for the current process ++ * ++ * OS specific call to updates the current memory allocation counters for the current process with ++ * the supplied delta. ++ * ++ * @param[in] kctx The kbase context ++ * @param[in] pages The desired delta to apply to the memory usage counters. ++ */ + -+ if (!jc_struct) { -+ dev_err(kctx->kbdev->dev, "Failed to map jc struct\n"); -+ goto out; -+ } ++void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages); + -+ jc = jc_struct->jc; -+ next = jc_struct->next; -+ if (next) -+ jc_struct->jc = 0; ++/** ++ * @brief Add to the memory allocation counters for the current process ++ * ++ * OS specific call to add to the current memory allocation counters for the current process by ++ * the supplied amount. ++ * ++ * @param[in] kctx The kernel base context used for the allocation. ++ * @param[in] pages The desired delta to apply to the memory usage counters. ++ */ + -+ kbase_vunmap(kctx, &jc_map); ++static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages) ++{ ++ kbasep_os_process_page_usage_update(kctx, pages); ++} + -+ if (jc) { -+ u16 max_hw_job_id = 0; ++/** ++ * @brief Subtract from the memory allocation counters for the current process ++ * ++ * OS specific call to subtract from the current memory allocation counters for the current process by ++ * the supplied amount. ++ * ++ * @param[in] kctx The kernel base context used for the allocation. ++ * @param[in] pages The desired delta to apply to the memory usage counters. ++ */ + -+ if (kbasep_replay_find_hw_job_id(kctx, jc, -+ &max_hw_job_id) != 0) -+ goto out; ++static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages) ++{ ++ kbasep_os_process_page_usage_update(kctx, 0 - pages); ++} + -+ if (kbasep_replay_parse_jc(kctx, jc, prev_jc, -+ payload->tiler_heap_free, -+ payload->tiler_hierarchy_mask, -+ payload->hierarchy_default_weight, -+ hw_job_id_offset, false) != 0) { -+ goto out; -+ } ++/** ++ * kbasep_find_enclosing_cpu_mapping_offset() - Find the offset of the CPU ++ * mapping of a memory allocation containing a given address range ++ * ++ * Searches for a CPU mapping of any part of any region that fully encloses the ++ * CPU virtual address range specified by @uaddr and @size. Returns a failure ++ * indication if only part of the address range lies within a CPU mapping. ++ * ++ * @kctx: The kernel base context used for the allocation. ++ * @uaddr: Start of the CPU virtual address range. ++ * @size: Size of the CPU virtual address range (in bytes). ++ * @offset: The offset from the start of the allocation to the specified CPU ++ * virtual address. ++ * ++ * Return: 0 if offset was obtained successfully. Error code otherwise. ++ */ ++int kbasep_find_enclosing_cpu_mapping_offset( ++ struct kbase_context *kctx, ++ unsigned long uaddr, size_t size, u64 *offset); + -+ hw_job_id_offset += max_hw_job_id; ++enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer); ++void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom); ++void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom); + -+ prev_jc = jc; -+ } -+ } -+ t_atom->jc = prev_jc; ++/** ++* @brief Allocates physical pages. ++* ++* Allocates \a nr_pages_requested and updates the alloc object. ++* ++* @param[in] alloc allocation object to add pages to ++* @param[in] nr_pages_requested number of physical pages to allocate ++* ++* @return 0 if all pages have been successfully allocated. Error code otherwise ++*/ ++int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested); + -+ /* Process fragment job chain */ -+ f_atom->jc = payload->fragment_jc; -+ if (kbasep_replay_parse_jc(kctx, payload->fragment_jc, 0, -+ payload->tiler_heap_free, -+ payload->fragment_hierarchy_mask, -+ payload->hierarchy_default_weight, 0, -+ true) != 0) { -+ goto out; -+ } ++/** ++* @brief Free physical pages. ++* ++* Frees \a nr_pages and updates the alloc object. ++* ++* @param[in] alloc allocation object to free pages from ++* @param[in] nr_pages_to_free number of physical pages to free ++*/ ++int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free); + -+ if (!t_atom->jc || !f_atom->jc) { -+ dev_err(kctx->kbdev->dev, "Invalid payload\n"); -+ goto out; ++static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr) ++{ ++ SetPagePrivate(p); ++ if (sizeof(dma_addr_t) > sizeof(p->private)) { ++ /* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the ++ * private field stays the same. So we have to be clever and ++ * use the fact that we only store DMA addresses of whole pages, ++ * so the low bits should be zero */ ++ KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1))); ++ set_page_private(p, dma_addr >> PAGE_SHIFT); ++ } else { ++ set_page_private(p, dma_addr); + } ++} + -+ dev_dbg(kctx->kbdev->dev, "t_atom->jc=%llx f_atom->jc=%llx\n", -+ t_atom->jc, f_atom->jc); -+ ret = 0; -+ -+out: -+ kbase_vunmap(kctx, &map); ++static inline dma_addr_t kbase_dma_addr(struct page *p) ++{ ++ if (sizeof(dma_addr_t) > sizeof(p->private)) ++ return ((dma_addr_t)page_private(p)) << PAGE_SHIFT; + -+ return ret; ++ return (dma_addr_t)page_private(p); +} + -+static void kbase_replay_process_worker(struct work_struct *data) ++static inline void kbase_clear_dma_addr(struct page *p) +{ -+ struct kbase_jd_atom *katom; -+ struct kbase_context *kctx; -+ struct kbase_jd_context *jctx; -+ bool need_to_try_schedule_context = false; ++ ClearPagePrivate(p); ++} + -+ struct base_jd_atom_v2 t_atom, f_atom; -+ struct kbase_jd_atom *t_katom, *f_katom; -+ base_jd_prio atom_prio; ++/** ++* @brief Process a bus or page fault. ++* ++* This function will process a fault on a specific address space ++* ++* @param[in] kbdev The @ref kbase_device the fault happened on ++* @param[in] kctx The @ref kbase_context for the faulting address space if ++* one was found. ++* @param[in] as The address space that has the fault ++*/ ++void kbase_mmu_interrupt_process(struct kbase_device *kbdev, ++ struct kbase_context *kctx, struct kbase_as *as); + -+ katom = container_of(data, struct kbase_jd_atom, work); -+ kctx = katom->kctx; -+ jctx = &kctx->jctx; ++/** ++ * @brief Process a page fault. ++ * ++ * @param[in] data work_struct passed by queue_work() ++ */ ++void page_fault_worker(struct work_struct *data); + -+ mutex_lock(&jctx->lock); ++/** ++ * @brief Process a bus fault. ++ * ++ * @param[in] data work_struct passed by queue_work() ++ */ ++void bus_fault_worker(struct work_struct *data); + -+ atom_prio = kbasep_js_sched_prio_to_atom_prio(katom->sched_priority); ++/** ++ * @brief Flush MMU workqueues. ++ * ++ * This function will cause any outstanding page or bus faults to be processed. ++ * It should be called prior to powering off the GPU. ++ * ++ * @param[in] kbdev Device pointer ++ */ ++void kbase_flush_mmu_wqs(struct kbase_device *kbdev); + -+ if (kbasep_replay_create_atoms( -+ kctx, &t_atom, &f_atom, atom_prio) != 0) { -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; -+ goto out; -+ } ++/** ++ * kbase_sync_single_for_device - update physical memory and give GPU ownership ++ * @kbdev: Device pointer ++ * @handle: DMA address of region ++ * @size: Size of region to sync ++ * @dir: DMA data direction ++ */ + -+ t_katom = &jctx->atoms[t_atom.atom_number]; -+ f_katom = &jctx->atoms[f_atom.atom_number]; ++void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, ++ size_t size, enum dma_data_direction dir); + -+ if (kbasep_replay_parse_payload(kctx, katom, &t_atom, &f_atom) != 0) { -+ kbasep_release_katom(kctx, t_atom.atom_number); -+ kbasep_release_katom(kctx, f_atom.atom_number); -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; -+ goto out; -+ } ++/** ++ * kbase_sync_single_for_cpu - update physical memory and give CPU ownership ++ * @kbdev: Device pointer ++ * @handle: DMA address of region ++ * @size: Size of region to sync ++ * @dir: DMA data direction ++ */ + -+ kbasep_replay_reset_softjob(katom, f_katom); ++void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, ++ size_t size, enum dma_data_direction dir); + -+ need_to_try_schedule_context |= jd_submit_atom(kctx, &t_atom, t_katom); -+ if (t_katom->event_code == BASE_JD_EVENT_JOB_INVALID) { -+ dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n"); -+ kbasep_release_katom(kctx, f_atom.atom_number); -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; -+ goto out; -+ } -+ need_to_try_schedule_context |= jd_submit_atom(kctx, &f_atom, f_katom); -+ if (f_katom->event_code == BASE_JD_EVENT_JOB_INVALID) { -+ dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n"); -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; -+ goto out; -+ } ++#ifdef CONFIG_DEBUG_FS ++/** ++ * kbase_jit_debugfs_init - Add per context debugfs entry for JIT. ++ * @kctx: kbase context ++ */ ++void kbase_jit_debugfs_init(struct kbase_context *kctx); ++#endif /* CONFIG_DEBUG_FS */ + -+ katom->event_code = BASE_JD_EVENT_DONE; ++/** ++ * kbase_jit_init - Initialize the JIT memory pool management ++ * @kctx: kbase context ++ * ++ * Returns zero on success or negative error number on failure. ++ */ ++int kbase_jit_init(struct kbase_context *kctx); + -+out: -+ if (katom->event_code != BASE_JD_EVENT_DONE) { -+ kbase_disjoint_state_down(kctx->kbdev); ++/** ++ * kbase_jit_allocate - Allocate JIT memory ++ * @kctx: kbase context ++ * @info: JIT allocation information ++ * ++ * Return: JIT allocation on success or NULL on failure. ++ */ ++struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, ++ struct base_jit_alloc_info *info); + -+ need_to_try_schedule_context |= jd_done_nolock(katom, NULL); -+ } ++/** ++ * kbase_jit_free - Free a JIT allocation ++ * @kctx: kbase context ++ * @reg: JIT allocation ++ * ++ * Frees a JIT allocation and places it into the free pool for later reuse. ++ */ ++void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg); + -+ if (need_to_try_schedule_context) -+ kbase_js_sched_all(kctx->kbdev); ++/** ++ * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing ++ * @reg: JIT allocation ++ */ ++void kbase_jit_backing_lost(struct kbase_va_region *reg); + -+ mutex_unlock(&jctx->lock); -+} ++/** ++ * kbase_jit_evict - Evict a JIT allocation from the pool ++ * @kctx: kbase context ++ * ++ * Evict the least recently used JIT allocation from the pool. This can be ++ * required if normal VA allocations are failing due to VA exhaustion. ++ * ++ * Return: True if a JIT allocation was freed, false otherwise. ++ */ ++bool kbase_jit_evict(struct kbase_context *kctx); + +/** -+ * @brief Check job replay fault ++ * kbase_jit_term - Terminate the JIT memory pool management ++ * @kctx: kbase context ++ */ ++void kbase_jit_term(struct kbase_context *kctx); ++ ++/** ++ * kbase_map_external_resource - Map an external resource to the GPU. ++ * @kctx: kbase context. ++ * @reg: The region to map. ++ * @locked_mm: The mm_struct which has been locked for this operation. ++ * @kds_res_count: The number of KDS resources. ++ * @kds_resources: Array of KDS resources. ++ * @kds_access_bitmap: Access bitmap for KDS. ++ * @exclusive: If the KDS resource requires exclusive access. + * -+ * This will read the job payload, checks fault type and source, then decides -+ * whether replay is required. ++ * Return: The physical allocation which backs the region on success or NULL ++ * on failure. ++ */ ++struct kbase_mem_phy_alloc *kbase_map_external_resource( ++ struct kbase_context *kctx, struct kbase_va_region *reg, ++ struct mm_struct *locked_mm ++#ifdef CONFIG_KDS ++ , u32 *kds_res_count, struct kds_resource **kds_resources, ++ unsigned long *kds_access_bitmap, bool exclusive ++#endif ++ ); ++ ++/** ++ * kbase_unmap_external_resource - Unmap an external resource from the GPU. ++ * @kctx: kbase context. ++ * @reg: The region to unmap or NULL if it has already been released. ++ * @alloc: The physical allocation being unmapped. ++ */ ++void kbase_unmap_external_resource(struct kbase_context *kctx, ++ struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc); ++ ++/** ++ * kbase_sticky_resource_init - Initialize sticky resource management. ++ * @kctx: kbase context + * -+ * @param[in] katom The atom to be processed -+ * @return true (success) if replay required or false on failure. ++ * Returns zero on success or negative error number on failure. + */ -+static bool kbase_replay_fault_check(struct kbase_jd_atom *katom) -+{ -+ struct kbase_context *kctx = katom->kctx; -+ struct device *dev = kctx->kbdev->dev; -+ base_jd_replay_payload *payload; -+ u64 job_header; -+ u64 job_loop_detect; -+ struct job_descriptor_header *job; -+ struct kbase_vmap_struct job_map; -+ struct kbase_vmap_struct map; -+ bool err = false; ++int kbase_sticky_resource_init(struct kbase_context *kctx); + -+ /* Replay job if fault is of type BASE_JD_EVENT_JOB_WRITE_FAULT or -+ * if force_replay is enabled. -+ */ -+ if (BASE_JD_EVENT_TERMINATED == katom->event_code) { -+ return false; -+ } else if (BASE_JD_EVENT_JOB_WRITE_FAULT == katom->event_code) { -+ return true; -+ } else if (BASE_JD_EVENT_FORCE_REPLAY == katom->event_code) { -+ katom->event_code = BASE_JD_EVENT_DATA_INVALID_FAULT; -+ return true; -+ } else if (BASE_JD_EVENT_DATA_INVALID_FAULT != katom->event_code) { -+ /* No replay for faults of type other than -+ * BASE_JD_EVENT_DATA_INVALID_FAULT. -+ */ -+ return false; -+ } ++/** ++ * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource. ++ * @kctx: kbase context. ++ * @gpu_addr: The GPU address of the external resource. ++ * ++ * Return: The metadata object which represents the binding between the ++ * external resource and the kbase context on success or NULL on failure. ++ */ ++struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( ++ struct kbase_context *kctx, u64 gpu_addr); + -+ /* Job fault is BASE_JD_EVENT_DATA_INVALID_FAULT, now scan fragment jc -+ * to find out whether the source of exception is POLYGON_LIST. Replay -+ * is required if the source of fault is POLYGON_LIST. -+ */ -+ payload = kbase_vmap(kctx, katom->jc, sizeof(*payload), &map); -+ if (!payload) { -+ dev_err(dev, "kbase_replay_fault_check: failed to map payload.\n"); -+ return false; -+ } ++/** ++ * kbase_sticky_resource_release - Release a reference on a sticky resource. ++ * @kctx: kbase context. ++ * @meta: Binding metadata. ++ * @gpu_addr: GPU address of the external resource. ++ * ++ * If meta is NULL then gpu_addr will be used to scan the metadata list and ++ * find the matching metadata (if any), otherwise the provided meta will be ++ * used and gpu_addr will be ignored. ++ * ++ * Return: True if the release found the metadata and the reference was dropped. ++ */ ++bool kbase_sticky_resource_release(struct kbase_context *kctx, ++ struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr); + -+#ifdef CONFIG_MALI_DEBUG -+ dev_dbg(dev, "kbase_replay_fault_check: payload=%p\n", payload); -+ dev_dbg(dev, "\nPayload structure:\n" -+ "fragment_jc = 0x%llx\n" -+ "fragment_hierarchy_mask = 0x%x\n" -+ "fragment_core_req = 0x%x\n", -+ payload->fragment_jc, -+ payload->fragment_hierarchy_mask, -+ payload->fragment_core_req); -+#endif -+ /* Process fragment job chain */ -+ job_header = (u64) payload->fragment_jc; -+ job_loop_detect = job_header; -+ while (job_header) { -+ job = kbase_vmap(kctx, job_header, sizeof(*job), &job_map); -+ if (!job) { -+ dev_err(dev, "failed to map jc\n"); -+ /* unmap payload*/ -+ kbase_vunmap(kctx, &map); -+ return false; -+ } ++/** ++ * kbase_sticky_resource_term - Terminate sticky resource management. ++ * @kctx: kbase context ++ */ ++void kbase_sticky_resource_term(struct kbase_context *kctx); + ++#endif /* _KBASE_MEM_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c +new file mode 100644 +index 000000000..de12cdf76 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c +@@ -0,0 +1,2574 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ dump_job_head(kctx, "\njob_head structure:\n", job); + -+ /* Replay only when the polygon list reader caused the -+ * DATA_INVALID_FAULT */ -+ if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) && -+ (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->exception_status))) { -+ err = true; -+ kbase_vunmap(kctx, &job_map); -+ break; -+ } + -+ /* Move on to next fragment job in the list */ -+ if (job->job_descriptor_size) -+ job_header = job->next_job._64; -+ else -+ job_header = job->next_job._32; + -+ kbase_vunmap(kctx, &job_map); + -+ /* Job chain loop detected */ -+ if (job_header == job_loop_detect) -+ break; -+ } ++/** ++ * @file mali_kbase_mem_linux.c ++ * Base kernel memory APIs, Linux implementation. ++ */ + -+ /* unmap payload*/ -+ kbase_vunmap(kctx, &map); ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \ ++ (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) ++#include ++#endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */ ++#ifdef CONFIG_DMA_SHARED_BUFFER ++#include ++#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ ++#include ++#include ++#include + -+ return err; -+} ++#include ++#include ++#include ++#include ++#include + ++static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); + +/** -+ * @brief Process a replay job -+ * -+ * Called from kbase_process_soft_job. ++ * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation ++ * @kctx: Context the region belongs to ++ * @reg: The GPU region ++ * @new_pages: The number of pages after the shrink ++ * @old_pages: The number of pages before the shrink + * -+ * On exit, if the job has completed, katom->event_code will have been updated. -+ * If the job has not completed, and is replaying jobs, then the atom status -+ * will have been reset to KBASE_JD_ATOM_STATE_QUEUED. ++ * Shrink (or completely remove) all CPU mappings which reference the shrunk ++ * part of the allocation. + * -+ * @param[in] katom The atom to be processed -+ * @return false if the atom has completed -+ * true if the atom is replaying jobs ++ * Note: Caller must be holding the processes mmap_lock lock. + */ -+bool kbase_replay_process(struct kbase_jd_atom *katom) -+{ -+ struct kbase_context *kctx = katom->kctx; -+ struct kbase_device *kbdev = kctx->kbdev; ++static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, ++ struct kbase_va_region *reg, ++ u64 new_pages, u64 old_pages); + -+ /* Don't replay this atom if these issues are not present in the -+ * hardware */ -+ if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11020) && -+ !kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11024)) { -+ dev_dbg(kbdev->dev, "Hardware does not need replay workaround"); ++/** ++ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation ++ * @kctx: Context the region belongs to ++ * @reg: The GPU region or NULL if there isn't one ++ * @new_pages: The number of pages after the shrink ++ * @old_pages: The number of pages before the shrink ++ * ++ * Return: 0 on success, negative -errno on error ++ * ++ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region ++ * itself is unmodified as we still need to reserve the VA, only the page tables ++ * will be modified by this function. ++ */ ++static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, ++ struct kbase_va_region *reg, ++ u64 new_pages, u64 old_pages); + -+ /* Signal failure to userspace */ -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, ++ u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, ++ u64 *gpu_va) ++{ ++ int zone; ++ int gpu_pc_bits; ++ int cpu_va_bits; ++ struct kbase_va_region *reg; ++ struct device *dev; + -+ return false; -+ } ++ KBASE_DEBUG_ASSERT(kctx); ++ KBASE_DEBUG_ASSERT(flags); ++ KBASE_DEBUG_ASSERT(gpu_va); + -+ if (katom->event_code == BASE_JD_EVENT_DONE) { -+ dev_dbg(kbdev->dev, "Previous job succeeded - not replaying\n"); ++ dev = kctx->kbdev->dev; ++ *gpu_va = 0; /* return 0 on failure */ + -+ if (katom->retry_count) -+ kbase_disjoint_state_down(kbdev); ++ gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; ++ cpu_va_bits = BITS_PER_LONG; + -+ return false; ++ if (0 == va_pages) { ++ dev_warn(dev, "kbase_mem_alloc called with 0 va_pages!"); ++ goto bad_size; + } + -+ if (kbase_ctx_flag(kctx, KCTX_DYING)) { -+ dev_dbg(kbdev->dev, "Not replaying; context is dying\n"); ++ if (va_pages > (U64_MAX / PAGE_SIZE)) ++ /* 64-bit address range is the max */ ++ goto bad_size; + -+ if (katom->retry_count) -+ kbase_disjoint_state_down(kbdev); ++#if defined(CONFIG_64BIT) ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) ++ cpu_va_bits = 32; ++#endif + -+ return false; ++ if (!kbase_check_alloc_flags(*flags)) { ++ dev_warn(dev, ++ "kbase_mem_alloc called with bad flags (%llx)", ++ (unsigned long long)*flags); ++ goto bad_flags; + } + -+ /* Check job exception type and source before replaying. */ -+ if (!kbase_replay_fault_check(katom)) { -+ dev_dbg(kbdev->dev, -+ "Replay cancelled on event %x\n", katom->event_code); -+ /* katom->event_code is already set to the failure code of the -+ * previous job. -+ */ -+ return false; ++ if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && ++ !kbase_device_is_cpu_coherent(kctx->kbdev)) { ++ dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable"); ++ goto bad_flags; ++ } ++ if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && ++ !kbase_device_is_cpu_coherent(kctx->kbdev)) { ++ /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ ++ *flags &= ~BASE_MEM_COHERENT_SYSTEM; + } + -+ dev_warn(kbdev->dev, "Replaying jobs retry=%d\n", -+ katom->retry_count); ++ /* Limit GPU executable allocs to GPU PC size */ ++ if ((*flags & BASE_MEM_PROT_GPU_EX) && ++ (va_pages > (1ULL << gpu_pc_bits >> PAGE_SHIFT))) ++ goto bad_ex_size; + -+ katom->retry_count++; ++ /* find out which VA zone to use */ ++ if (*flags & BASE_MEM_SAME_VA) ++ zone = KBASE_REG_ZONE_SAME_VA; ++ else if (*flags & BASE_MEM_PROT_GPU_EX) ++ zone = KBASE_REG_ZONE_EXEC; ++ else ++ zone = KBASE_REG_ZONE_CUSTOM_VA; + -+ if (katom->retry_count > BASEP_JD_REPLAY_LIMIT) { -+ dev_err(kbdev->dev, "Replay exceeded limit - failing jobs\n"); ++ reg = kbase_alloc_free_region(kctx, 0, va_pages, zone); ++ if (!reg) { ++ dev_err(dev, "Failed to allocate free region"); ++ goto no_region; ++ } + -+ kbase_disjoint_state_down(kbdev); ++ if (kbase_update_region_flags(kctx, reg, *flags) != 0) ++ goto invalid_flags; + -+ /* katom->event_code is already set to the failure code of the -+ previous job */ -+ return false; ++ if (kbase_reg_prepare_native(reg, kctx) != 0) { ++ dev_err(dev, "Failed to prepare region"); ++ goto prepare_failed; + } + -+ /* only enter the disjoint state once for the whole time while the replay is ongoing */ -+ if (katom->retry_count == 1) -+ kbase_disjoint_state_up(kbdev); ++ if (*flags & BASE_MEM_GROW_ON_GPF) ++ reg->extent = extent; ++ else ++ reg->extent = 0; + -+ INIT_WORK(&katom->work, kbase_replay_process_worker); -+ queue_work(kctx->event_workq, &katom->work); ++ if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) { ++ dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)", ++ (unsigned long long)commit_pages, ++ (unsigned long long)va_pages); ++ goto no_mem; ++ } + -+ return true; -+} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_smc.c b/drivers/gpu/arm/midgard/mali_kbase_smc.c -new file mode 100644 -index 000000000..6c8cf73ae ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_smc.c -@@ -0,0 +1,86 @@ -+/* -+ * -+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ kbase_gpu_vm_lock(kctx); + ++ /* mmap needed to setup VA? */ ++ if (*flags & BASE_MEM_SAME_VA) { ++ unsigned long prot = PROT_NONE; ++ unsigned long va_size = va_pages << PAGE_SHIFT; ++ unsigned long va_map = va_size; ++ unsigned long cookie, cookie_nr; ++ unsigned long cpu_addr; + ++ /* Bind to a cookie */ ++ if (!kctx->cookies) { ++ dev_err(dev, "No cookies available for allocation!"); ++ kbase_gpu_vm_unlock(kctx); ++ goto no_cookie; ++ } ++ /* return a cookie */ ++ cookie_nr = __ffs(kctx->cookies); ++ kctx->cookies &= ~(1UL << cookie_nr); ++ BUG_ON(kctx->pending_regions[cookie_nr]); ++ kctx->pending_regions[cookie_nr] = reg; + -+#ifdef CONFIG_ARM64 ++ kbase_gpu_vm_unlock(kctx); + -+#include -+#include ++ /* relocate to correct base */ ++ cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE); ++ cookie <<= PAGE_SHIFT; + -+#include ++ /* ++ * 10.1-10.4 UKU userland relies on the kernel to call mmap. ++ * For all other versions we can just return the cookie ++ */ ++ if (kctx->api_version < KBASE_API_VERSION(10, 1) || ++ kctx->api_version > KBASE_API_VERSION(10, 4)) { ++ *gpu_va = (u64) cookie; ++ return reg; ++ } ++ if (*flags & BASE_MEM_PROT_CPU_RD) ++ prot |= PROT_READ; ++ if (*flags & BASE_MEM_PROT_CPU_WR) ++ prot |= PROT_WRITE; + -+/* __asmeq is not available on Kernel versions >= 4.20 */ -+#ifndef __asmeq -+/* -+ * This is used to ensure the compiler did actually allocate the register we -+ * asked it for some inline assembly sequences. Apparently we can't trust the -+ * compiler from one version to another so a bit of paranoia won't hurt. This -+ * string is meant to be concatenated with the inline asm string and will -+ * cause compilation to stop on mismatch. (for details, see gcc PR 15089) -+ */ -+#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t" -+#endif ++ cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot, ++ MAP_SHARED, cookie); + -+static noinline u64 invoke_smc_fid(u64 function_id, -+ u64 arg0, u64 arg1, u64 arg2) -+{ -+ register u64 x0 asm("x0") = function_id; -+ register u64 x1 asm("x1") = arg0; -+ register u64 x2 asm("x2") = arg1; -+ register u64 x3 asm("x3") = arg2; ++ if (IS_ERR_VALUE(cpu_addr)) { ++ kbase_gpu_vm_lock(kctx); ++ kctx->pending_regions[cookie_nr] = NULL; ++ kctx->cookies |= (1UL << cookie_nr); ++ kbase_gpu_vm_unlock(kctx); ++ goto no_mmap; ++ } + -+ asm volatile( -+ __asmeq("%0", "x0") -+ __asmeq("%1", "x1") -+ __asmeq("%2", "x2") -+ __asmeq("%3", "x3") -+ "smc #0\n" -+ : "+r" (x0) -+ : "r" (x1), "r" (x2), "r" (x3)); ++ *gpu_va = (u64) cpu_addr; ++ } else /* we control the VA */ { ++ if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) { ++ dev_warn(dev, "Failed to map memory on GPU"); ++ kbase_gpu_vm_unlock(kctx); ++ goto no_mmap; ++ } ++ /* return real GPU VA */ ++ *gpu_va = reg->start_pfn << PAGE_SHIFT; + -+ return x0; -+} ++ kbase_gpu_vm_unlock(kctx); ++ } + -+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2) -+{ -+ /* Is fast call (bit 31 set) */ -+ KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL); -+ /* bits 16-23 must be zero for fast calls */ -+ KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0); ++ return reg; + -+ return invoke_smc_fid(fid, arg0, arg1, arg2); ++no_mmap: ++no_cookie: ++no_mem: ++ kbase_mem_phy_alloc_put(reg->cpu_alloc); ++ kbase_mem_phy_alloc_put(reg->gpu_alloc); ++invalid_flags: ++prepare_failed: ++ kfree(reg); ++no_region: ++bad_ex_size: ++bad_flags: ++bad_size: ++ return NULL; +} ++KBASE_EXPORT_TEST_API(kbase_mem_alloc); + -+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, -+ u64 arg0, u64 arg1, u64 arg2) ++int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 * const out) +{ -+ u32 fid = 0; -+ -+ /* Only the six bits allowed should be used. */ -+ KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0); -+ -+ fid |= SMC_FAST_CALL; /* Bit 31: Fast call */ -+ if (smc64) -+ fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */ -+ fid |= oen; /* Bit 29:24: OEN */ -+ /* Bit 23:16: Must be zero for fast calls */ -+ fid |= (function_number); /* Bit 15:0: function number */ -+ -+ return kbase_invoke_smc_fid(fid, arg0, arg1, arg2); -+} -+ -+#endif /* CONFIG_ARM64 */ ++ struct kbase_va_region *reg; ++ int ret = -EINVAL; + -diff --git a/drivers/gpu/arm/midgard/mali_kbase_smc.h b/drivers/gpu/arm/midgard/mali_kbase_smc.h -new file mode 100644 -index 000000000..9bff3d2e8 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_smc.h -@@ -0,0 +1,67 @@ -+/* -+ * -+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ KBASE_DEBUG_ASSERT(kctx); ++ KBASE_DEBUG_ASSERT(out); + ++ if (gpu_addr & ~PAGE_MASK) { ++ dev_warn(kctx->kbdev->dev, "mem_query: gpu_addr: passed parameter is invalid"); ++ return -EINVAL; ++ } + ++ kbase_gpu_vm_lock(kctx); + ++ /* Validate the region */ ++ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); ++ if (!reg || (reg->flags & KBASE_REG_FREE)) ++ goto out_unlock; + ++ switch (query) { ++ case KBASE_MEM_QUERY_COMMIT_SIZE: ++ if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) { ++ *out = kbase_reg_current_backed_size(reg); ++ } else { ++ size_t i; ++ struct kbase_aliased *aliased; ++ *out = 0; ++ aliased = reg->cpu_alloc->imported.alias.aliased; ++ for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++) ++ *out += aliased[i].length; ++ } ++ break; ++ case KBASE_MEM_QUERY_VA_SIZE: ++ *out = reg->nr_pages; ++ break; ++ case KBASE_MEM_QUERY_FLAGS: ++ { ++ *out = 0; ++ if (KBASE_REG_CPU_WR & reg->flags) ++ *out |= BASE_MEM_PROT_CPU_WR; ++ if (KBASE_REG_CPU_RD & reg->flags) ++ *out |= BASE_MEM_PROT_CPU_RD; ++ if (KBASE_REG_CPU_CACHED & reg->flags) ++ *out |= BASE_MEM_CACHED_CPU; ++ if (KBASE_REG_GPU_WR & reg->flags) ++ *out |= BASE_MEM_PROT_GPU_WR; ++ if (KBASE_REG_GPU_RD & reg->flags) ++ *out |= BASE_MEM_PROT_GPU_RD; ++ if (!(KBASE_REG_GPU_NX & reg->flags)) ++ *out |= BASE_MEM_PROT_GPU_EX; ++ if (KBASE_REG_SHARE_BOTH & reg->flags) ++ *out |= BASE_MEM_COHERENT_SYSTEM; ++ if (KBASE_REG_SHARE_IN & reg->flags) ++ *out |= BASE_MEM_COHERENT_LOCAL; ++ break; ++ } ++ default: ++ *out = 0; ++ goto out_unlock; ++ } + -+#ifndef _KBASE_SMC_H_ -+#define _KBASE_SMC_H_ ++ ret = 0; + -+#ifdef CONFIG_ARM64 ++out_unlock: ++ kbase_gpu_vm_unlock(kctx); ++ return ret; ++} + -+#include ++/** ++ * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the ++ * Ephemeral memory eviction list. ++ * @s: Shrinker ++ * @sc: Shrinker control ++ * ++ * Return: Number of pages which can be freed. ++ */ ++static ++unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, ++ struct shrink_control *sc) ++{ ++ struct kbase_context *kctx; ++ struct kbase_mem_phy_alloc *alloc; ++ unsigned long pages = 0; + -+#define SMC_FAST_CALL (1 << 31) -+#define SMC_64 (1 << 30) ++ kctx = container_of(s, struct kbase_context, reclaim); + -+#define SMC_OEN_OFFSET 24 -+#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */ -+#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET) -+#define SMC_OEN_STD (4 << SMC_OEN_OFFSET) ++ mutex_lock(&kctx->jit_evict_lock); + ++ list_for_each_entry(alloc, &kctx->evict_list, evict_node) ++ pages += alloc->nents; + -+/** -+ * kbase_invoke_smc_fid - Perform a secure monitor call -+ * @fid: The SMC function to call, see SMC Calling convention. -+ * @arg0: First argument to the SMC. -+ * @arg1: Second argument to the SMC. -+ * @arg2: Third argument to the SMC. -+ * -+ * See SMC Calling Convention for details. -+ * -+ * Return: the return value from the SMC. -+ */ -+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2); ++ mutex_unlock(&kctx->jit_evict_lock); ++ return pages; ++} + +/** -+ * kbase_invoke_smc_fid - Perform a secure monitor call -+ * @oen: Owning Entity number (SIP, STD etc). -+ * @function_number: The function number within the OEN. -+ * @smc64: use SMC64 calling convention instead of SMC32. -+ * @arg0: First argument to the SMC. -+ * @arg1: Second argument to the SMC. -+ * @arg2: Third argument to the SMC. -+ * -+ * See SMC Calling Convention for details. -+ * -+ * Return: the return value from the SMC call. -+ */ -+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, -+ u64 arg0, u64 arg1, u64 arg2); -+ -+#endif /* CONFIG_ARM64 */ -+ -+#endif /* _KBASE_SMC_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c -new file mode 100644 -index 000000000..396953e78 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c -@@ -0,0 +1,1549 @@ -+/* -+ * -+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction ++ * list for pages and try to reclaim them. ++ * @s: Shrinker ++ * @sc: Shrinker control + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * Return: Number of pages freed (can be less then requested) or -1 if the ++ * shrinker failed to free pages in its pool. + * ++ * Note: ++ * This function accesses region structures without taking the region lock, ++ * this is required as the OOM killer can call the shrinker after the region ++ * lock has already been held. ++ * This is safe as we can guarantee that a region on the eviction list will ++ * not be freed (kbase_mem_free_region removes the allocation from the list ++ * before destroying it), or modified by other parts of the driver. ++ * The eviction list itself is guarded by the eviction lock and the MMU updates ++ * are protected by their own lock. + */ ++static ++unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, ++ struct shrink_control *sc) ++{ ++ struct kbase_context *kctx; ++ struct kbase_mem_phy_alloc *alloc; ++ struct kbase_mem_phy_alloc *tmp; ++ unsigned long freed = 0; + ++ kctx = container_of(s, struct kbase_context, reclaim); ++ mutex_lock(&kctx->jit_evict_lock); + ++ list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) { ++ int err; + ++ err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg, ++ 0, alloc->nents); ++ if (err != 0) { ++ /* ++ * Failed to remove GPU mapping, tell the shrinker ++ * to stop trying to shrink our slab even though we ++ * have pages in it. ++ */ ++ freed = -1; ++ goto out_unlock; ++ } + ++ /* ++ * Update alloc->evicted before freeing the backing so the ++ * helper can determine that it needs to bypass the accounting ++ * and memory pool. ++ */ ++ alloc->evicted = alloc->nents; + -+#include ++ kbase_free_phy_pages_helper(alloc, alloc->evicted); ++ freed += alloc->evicted; ++ list_del_init(&alloc->evict_node); + -+#if defined(CONFIG_DMA_SHARED_BUFFER) -+#include -+#include -+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+#include -+#endif -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ /* ++ * Inform the JIT allocator this region has lost backing ++ * as it might need to free the allocation. ++ */ ++ kbase_jit_backing_lost(alloc->reg); + -+/* Mask to check cache alignment of data structures */ -+#define KBASE_CACHE_ALIGNMENT_MASK ((1< sc->nr_to_scan) ++ break; ++ } ++out_unlock: ++ mutex_unlock(&kctx->jit_evict_lock); + -+/** -+ * @file mali_kbase_softjobs.c -+ * -+ * This file implements the logic behind software only jobs that are -+ * executed within the driver rather than being handed over to the GPU. -+ */ ++ return freed; ++} + -+static void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) ++static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s, ++ struct shrink_control *sc) +{ -+ struct kbase_context *kctx = katom->kctx; -+ unsigned long lflags; ++ if (sc->nr_to_scan == 0) ++ return kbase_mem_evictable_reclaim_count_objects(s, sc); + -+ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); -+ list_add_tail(&katom->queue, &kctx->waiting_soft_jobs); -+ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); ++ return kbase_mem_evictable_reclaim_scan_objects(s, sc); +} ++#endif + -+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom) ++int kbase_mem_evictable_init(struct kbase_context *kctx) +{ -+ struct kbase_context *kctx = katom->kctx; -+ unsigned long lflags; ++ INIT_LIST_HEAD(&kctx->evict_list); ++ mutex_init(&kctx->jit_evict_lock); + -+ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); -+ list_del(&katom->queue); -+ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); ++ /* Register shrinker */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) ++ kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink; ++#else ++ kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects; ++ kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects; ++#endif ++ kctx->reclaim.seeks = DEFAULT_SEEKS; ++ /* Kernel versions prior to 3.1 : ++ * struct shrinker does not define batch */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) ++ kctx->reclaim.batch = 0; ++#endif ++#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE ++ register_shrinker(&kctx->reclaim); ++#else ++ register_shrinker(&kctx->reclaim, "mali-mem"); ++#endif ++ return 0; +} + -+static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom) ++void kbase_mem_evictable_deinit(struct kbase_context *kctx) +{ -+ struct kbase_context *kctx = katom->kctx; ++ unregister_shrinker(&kctx->reclaim); ++} + -+ /* Record the start time of this atom so we could cancel it at -+ * the right time. -+ */ -+ katom->start_timestamp = ktime_get(); ++/** ++ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable. ++ * @alloc: The physical allocation ++ */ ++static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) ++{ ++ struct kbase_context *kctx = alloc->imported.kctx; ++ int __maybe_unused new_page_count; + -+ /* Add the atom to the waiting list before the timer is -+ * (re)started to make sure that it gets processed. -+ */ -+ kbasep_add_waiting_soft_job(katom); ++ kbase_process_page_usage_dec(kctx, alloc->nents); ++ new_page_count = kbase_atomic_sub_pages(alloc->nents, ++ &kctx->used_pages); ++ kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages); + -+ /* Schedule timeout of this atom after a period if it is not active */ -+ if (!timer_pending(&kctx->soft_job_timeout)) { -+ int timeout_ms = atomic_read( -+ &kctx->kbdev->js_data.soft_job_timeout_ms); -+ mod_timer(&kctx->soft_job_timeout, -+ jiffies + msecs_to_jiffies(timeout_ms)); -+ } ++ KBASE_TLSTREAM_AUX_PAGESALLOC( ++ (u32)kctx->id, ++ (u64)new_page_count); +} + -+static int kbasep_read_soft_event_status( -+ struct kbase_context *kctx, u64 evt, unsigned char *status) ++/** ++ * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable. ++ * @alloc: The physical allocation ++ */ ++static ++void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) +{ -+ unsigned char *mapped_evt; -+ struct kbase_vmap_struct map; -+ -+ mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); -+ if (!mapped_evt) -+ return -EFAULT; ++ struct kbase_context *kctx = alloc->imported.kctx; ++ int __maybe_unused new_page_count; + -+ *status = *mapped_evt; ++ new_page_count = kbase_atomic_add_pages(alloc->nents, ++ &kctx->used_pages); ++ kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages); + -+ kbase_vunmap(kctx, &map); ++ /* Increase mm counters so that the allocation is accounted for ++ * against the process and thus is visible to the OOM killer. ++ */ ++ kbase_process_page_usage_inc(kctx, alloc->nents); + -+ return 0; ++ KBASE_TLSTREAM_AUX_PAGESALLOC( ++ (u32)kctx->id, ++ (u64)new_page_count); +} + -+static int kbasep_write_soft_event_status( -+ struct kbase_context *kctx, u64 evt, unsigned char new_status) ++int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) +{ -+ unsigned char *mapped_evt; -+ struct kbase_vmap_struct map; ++ struct kbase_context *kctx = gpu_alloc->imported.kctx; + -+ if ((new_status != BASE_JD_SOFT_EVENT_SET) && -+ (new_status != BASE_JD_SOFT_EVENT_RESET)) -+ return -EINVAL; ++ lockdep_assert_held(&kctx->reg_lock); + -+ mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); -+ if (!mapped_evt) -+ return -EFAULT; ++ /* This alloction can't already be on a list. */ ++ WARN_ON(!list_empty(&gpu_alloc->evict_node)); + -+ *mapped_evt = new_status; ++ kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg, ++ 0, gpu_alloc->nents); + -+ kbase_vunmap(kctx, &map); ++ /* ++ * Add the allocation to the eviction list, after this point the shrink ++ * can reclaim it. ++ */ ++ mutex_lock(&kctx->jit_evict_lock); ++ list_add(&gpu_alloc->evict_node, &kctx->evict_list); ++ mutex_unlock(&kctx->jit_evict_lock); ++ kbase_mem_evictable_mark_reclaim(gpu_alloc); + ++ gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED; + return 0; +} + -+static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) ++bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) +{ -+ struct kbase_vmap_struct map; -+ void *user_result; -+ struct timespec64 ts; -+ struct base_dump_cpu_gpu_counters data; -+ u64 system_time; -+ u64 cycle_counter; -+ u64 jc = katom->jc; -+ struct kbase_context *kctx = katom->kctx; -+ int pm_active_err; ++ struct kbase_context *kctx = gpu_alloc->imported.kctx; ++ int err = 0; + -+ memset(&data, 0, sizeof(data)); ++ lockdep_assert_held(&kctx->reg_lock); + -+ /* Take the PM active reference as late as possible - otherwise, it could -+ * delay suspend until we process the atom (which may be at the end of a -+ * long chain of dependencies */ -+ pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); -+ if (pm_active_err) { -+ struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; ++ /* ++ * First remove the allocation from the eviction list as it's no ++ * longer eligible for eviction. ++ */ ++ list_del_init(&gpu_alloc->evict_node); + -+ /* We're suspended - queue this on the list of suspended jobs -+ * Use dep_item[1], because dep_item[0] was previously in use -+ * for 'waiting_soft_jobs'. ++ if (gpu_alloc->evicted == 0) { ++ /* ++ * The backing is still present, update the VM stats as it's ++ * in use again. + */ -+ mutex_lock(&js_devdata->runpool_mutex); -+ list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list); -+ mutex_unlock(&js_devdata->runpool_mutex); ++ kbase_mem_evictable_unmark_reclaim(gpu_alloc); ++ } else { ++ /* If the region is still alive ... */ ++ if (gpu_alloc->reg) { ++ /* ... allocate replacement backing ... */ ++ err = kbase_alloc_phy_pages_helper(gpu_alloc, ++ gpu_alloc->evicted); + -+ /* Also adding this to the list of waiting soft job */ -+ kbasep_add_waiting_soft_job(katom); ++ /* ++ * ... and grow the mapping back to its ++ * pre-eviction size. ++ */ ++ if (!err) ++ err = kbase_mem_grow_gpu_mapping(kctx, ++ gpu_alloc->reg, ++ gpu_alloc->evicted, 0); + -+ return pm_active_err; ++ gpu_alloc->evicted = 0; ++ } + } + -+ kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time, -+ &ts); ++ /* If the region is still alive remove the DONT_NEED attribute. */ ++ if (gpu_alloc->reg) ++ gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED; + -+ kbase_pm_context_idle(kctx->kbdev); ++ return (err == 0); ++} + -+ data.sec = ts.tv_sec; -+ data.usec = ts.tv_nsec / 1000; -+ data.system_time = system_time; -+ data.cycle_counter = cycle_counter; ++int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask) ++{ ++ struct kbase_va_region *reg; ++ int ret = -EINVAL; ++ unsigned int real_flags = 0; ++ unsigned int prev_flags = 0; ++ bool prev_needed, new_needed; + -+ /* Assume this atom will be cancelled until we know otherwise */ -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ KBASE_DEBUG_ASSERT(kctx); + -+ /* GPU_WR access is checked on the range for returning the result to -+ * userspace for the following reasons: -+ * - security, this is currently how imported user bufs are checked. -+ * - userspace ddk guaranteed to assume region was mapped as GPU_WR */ -+ user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map); -+ if (!user_result) -+ return 0; ++ if (!gpu_addr) ++ return -EINVAL; + -+ memcpy(user_result, &data, sizeof(data)); ++ if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) ++ return -EINVAL; + -+ kbase_vunmap(kctx, &map); ++ /* nuke other bits */ ++ flags &= mask; + -+ /* Atom was fine - mark it as done */ -+ katom->event_code = BASE_JD_EVENT_DONE; ++ /* check for only supported flags */ ++ if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE)) ++ goto out; + -+ return 0; -+} ++ /* mask covers bits we don't support? */ ++ if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE)) ++ goto out; + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+/* Called by the explicit fence mechanism when a fence wait has completed */ -+void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom) -+{ -+ struct kbase_context *kctx = katom->kctx; ++ /* convert flags */ ++ if (BASE_MEM_COHERENT_SYSTEM & flags) ++ real_flags |= KBASE_REG_SHARE_BOTH; ++ else if (BASE_MEM_COHERENT_LOCAL & flags) ++ real_flags |= KBASE_REG_SHARE_IN; + -+ mutex_lock(&kctx->jctx.lock); -+ kbasep_remove_waiting_soft_job(katom); -+ kbase_finish_soft_job(katom); -+ if (jd_done_nolock(katom, NULL)) -+ kbase_js_sched_all(kctx->kbdev); -+ mutex_unlock(&kctx->jctx.lock); -+} -+#endif ++ /* now we can lock down the context, and find the region */ ++ down_write(¤t->mm->mmap_lock); ++ kbase_gpu_vm_lock(kctx); + -+static void kbasep_soft_event_complete_job(struct work_struct *work) -+{ -+ struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, -+ work); -+ struct kbase_context *kctx = katom->kctx; -+ int resched; ++ /* Validate the region */ ++ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); ++ if (!reg || (reg->flags & KBASE_REG_FREE)) ++ goto out_unlock; + -+ mutex_lock(&kctx->jctx.lock); -+ resched = jd_done_nolock(katom, NULL); -+ mutex_unlock(&kctx->jctx.lock); ++ /* Is the region being transitioning between not needed and needed? */ ++ prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED; ++ new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED; ++ if (prev_needed != new_needed) { ++ /* Aliased allocations can't be made ephemeral */ ++ if (atomic_read(®->cpu_alloc->gpu_mappings) > 1) ++ goto out_unlock; + -+ if (resched) -+ kbase_js_sched_all(kctx->kbdev); -+} ++ if (new_needed) { ++ /* Only native allocations can be marked not needed */ ++ if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { ++ ret = -EINVAL; ++ goto out_unlock; ++ } ++ ret = kbase_mem_evictable_make(reg->gpu_alloc); ++ if (ret) ++ goto out_unlock; ++ } else { ++ kbase_mem_evictable_unmake(reg->gpu_alloc); ++ } ++ } + -+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt) -+{ -+ int cancel_timer = 1; -+ struct list_head *entry, *tmp; -+ unsigned long lflags; ++ /* limit to imported memory */ ++ if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) && ++ (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)) ++ goto out_unlock; + -+ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); -+ list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { -+ struct kbase_jd_atom *katom = list_entry( -+ entry, struct kbase_jd_atom, queue); ++ /* no change? */ ++ if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) { ++ ret = 0; ++ goto out_unlock; ++ } + -+ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { -+ case BASE_JD_REQ_SOFT_EVENT_WAIT: -+ if (katom->jc == evt) { -+ list_del(&katom->queue); ++ /* save for roll back */ ++ prev_flags = reg->flags; ++ reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH); ++ reg->flags |= real_flags; + -+ katom->event_code = BASE_JD_EVENT_DONE; -+ INIT_WORK(&katom->work, -+ kbasep_soft_event_complete_job); -+ queue_work(kctx->jctx.job_done_wq, -+ &katom->work); -+ } else { -+ /* There are still other waiting jobs, we cannot -+ * cancel the timer yet. -+ */ -+ cancel_timer = 0; -+ } -+ break; -+#ifdef CONFIG_MALI_FENCE_DEBUG -+ case BASE_JD_REQ_SOFT_FENCE_WAIT: -+ /* Keep the timer running if fence debug is enabled and -+ * there are waiting fence jobs. -+ */ -+ cancel_timer = 0; -+ break; ++ /* Currently supporting only imported memory */ ++ switch (reg->gpu_alloc->type) { ++#ifdef CONFIG_UMP ++ case KBASE_MEM_TYPE_IMPORTED_UMP: ++ ret = kbase_mmu_update_pages(kctx, reg->start_pfn, kbase_get_cpu_phy_pages(reg), reg->gpu_alloc->nents, reg->flags); ++ break; +#endif -+ } ++#ifdef CONFIG_DMA_SHARED_BUFFER ++ case KBASE_MEM_TYPE_IMPORTED_UMM: ++ /* Future use will use the new flags, existing mapping will NOT be updated ++ * as memory should not be in use by the GPU when updating the flags. ++ */ ++ ret = 0; ++ WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count); ++ break; ++#endif ++ default: ++ break; + } + -+ if (cancel_timer) -+ del_timer(&kctx->soft_job_timeout); -+ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); ++ /* roll back on error, i.e. not UMP */ ++ if (ret) ++ reg->flags = prev_flags; ++ ++out_unlock: ++ kbase_gpu_vm_unlock(kctx); ++ up_write(¤t->mm->mmap_lock); ++out: ++ return ret; +} + -+#ifdef CONFIG_MALI_FENCE_DEBUG -+static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom) -+{ -+ struct kbase_context *kctx = katom->kctx; -+ struct device *dev = kctx->kbdev->dev; -+ int i; ++#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS) + -+ for (i = 0; i < 2; i++) { -+ struct kbase_jd_atom *dep; ++#ifdef CONFIG_UMP ++static struct kbase_va_region *kbase_mem_from_ump(struct kbase_context *kctx, ump_secure_id id, u64 *va_pages, u64 *flags) ++{ ++ struct kbase_va_region *reg; ++ ump_dd_handle umph; ++ u64 block_count; ++ const ump_dd_physical_block_64 *block_array; ++ u64 i, j; ++ int page = 0; ++ ump_alloc_flags ump_flags; ++ ump_alloc_flags cpu_flags; ++ ump_alloc_flags gpu_flags; + -+ list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) { -+ if (dep->status == KBASE_JD_ATOM_STATE_UNUSED || -+ dep->status == KBASE_JD_ATOM_STATE_COMPLETED) -+ continue; ++ if (*flags & BASE_MEM_SECURE) ++ goto bad_flags; + -+ if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) -+ == BASE_JD_REQ_SOFT_FENCE_TRIGGER) { -+ /* Found blocked trigger fence. */ -+ struct kbase_sync_fence_info info; ++ umph = ump_dd_from_secure_id(id); ++ if (UMP_DD_INVALID_MEMORY_HANDLE == umph) ++ goto bad_id; + -+ if (!kbase_sync_fence_in_info_get(dep, &info)) { -+ dev_warn(dev, -+ "\tVictim trigger atom %d fence [%p] %s: %s\n", -+ kbase_jd_atom_id(kctx, dep), -+ info.fence, -+ info.name, -+ kbase_sync_status_string(info.status)); -+ } -+ } ++ ump_flags = ump_dd_allocation_flags_get(umph); ++ cpu_flags = (ump_flags >> UMP_DEVICE_CPU_SHIFT) & UMP_DEVICE_MASK; ++ gpu_flags = (ump_flags >> DEFAULT_UMP_GPU_DEVICE_SHIFT) & ++ UMP_DEVICE_MASK; + -+ kbase_fence_debug_check_atom(dep); -+ } -+ } -+} ++ *va_pages = ump_dd_size_get_64(umph); ++ *va_pages >>= PAGE_SHIFT; + -+static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom) -+{ -+ struct kbase_context *kctx = katom->kctx; -+ struct device *dev = katom->kctx->kbdev->dev; -+ int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms); -+ unsigned long lflags; -+ struct kbase_sync_fence_info info; ++ if (!*va_pages) ++ goto bad_size; + -+ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); ++ if (*va_pages > (U64_MAX / PAGE_SIZE)) ++ /* 64-bit address range is the max */ ++ goto bad_size; + -+ if (kbase_sync_fence_in_info_get(katom, &info)) { -+ /* Fence must have signaled just after timeout. */ -+ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); -+ return; -+ } ++ if (*flags & BASE_MEM_SAME_VA) ++ reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA); ++ else ++ reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA); + -+ dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n", -+ kctx->tgid, kctx->id, -+ kbase_jd_atom_id(kctx, katom), -+ info.fence, timeout_ms); -+ dev_warn(dev, "\tGuilty fence [%p] %s: %s\n", -+ info.fence, info.name, -+ kbase_sync_status_string(info.status)); ++ if (!reg) ++ goto no_region; + -+ /* Search for blocked trigger atoms */ -+ kbase_fence_debug_check_atom(katom); ++ /* we've got pages to map now, and support SAME_VA */ ++ *flags |= KBASE_MEM_IMPORT_HAVE_PAGES; + -+ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); ++ reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMP); ++ if (IS_ERR_OR_NULL(reg->gpu_alloc)) ++ goto no_alloc_obj; + -+ kbase_sync_fence_in_dump(katom); -+} ++ reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + -+struct kbase_fence_debug_work { -+ struct kbase_jd_atom *katom; -+ struct work_struct work; -+}; ++ reg->gpu_alloc->imported.ump_handle = umph; + -+static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work) -+{ -+ struct kbase_fence_debug_work *w = container_of(work, -+ struct kbase_fence_debug_work, work); -+ struct kbase_jd_atom *katom = w->katom; -+ struct kbase_context *kctx = katom->kctx; ++ reg->flags &= ~KBASE_REG_FREE; ++ reg->flags |= KBASE_REG_GPU_NX; /* UMP is always No eXecute */ ++ reg->flags &= ~KBASE_REG_GROWABLE; /* UMP cannot be grown */ + -+ mutex_lock(&kctx->jctx.lock); -+ kbase_fence_debug_wait_timeout(katom); -+ mutex_unlock(&kctx->jctx.lock); ++ /* Override import flags based on UMP flags */ ++ *flags &= ~(BASE_MEM_CACHED_CPU); ++ *flags &= ~(BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR); ++ *flags &= ~(BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR); + -+ kfree(w); -+} ++ if ((cpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) == ++ (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) { ++ reg->flags |= KBASE_REG_CPU_CACHED; ++ *flags |= BASE_MEM_CACHED_CPU; ++ } + -+static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom) -+{ -+ struct kbase_fence_debug_work *work; -+ struct kbase_context *kctx = katom->kctx; ++ if (cpu_flags & UMP_PROT_CPU_WR) { ++ reg->flags |= KBASE_REG_CPU_WR; ++ *flags |= BASE_MEM_PROT_CPU_WR; ++ } + -+ /* Enqueue fence debug worker. Use job_done_wq to get -+ * debug print ordered with job completion. -+ */ -+ work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC); -+ /* Ignore allocation failure. */ -+ if (work) { -+ work->katom = katom; -+ INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker); -+ queue_work(kctx->jctx.job_done_wq, &work->work); ++ if (cpu_flags & UMP_PROT_CPU_RD) { ++ reg->flags |= KBASE_REG_CPU_RD; ++ *flags |= BASE_MEM_PROT_CPU_RD; + } -+} -+#endif /* CONFIG_MALI_FENCE_DEBUG */ + -+void kbasep_soft_job_timeout_worker(struct timer_list *t) -+{ -+ struct kbase_context *kctx = from_timer(kctx, t, soft_job_timeout); -+ u32 timeout_ms = (u32)atomic_read( -+ &kctx->kbdev->js_data.soft_job_timeout_ms); -+ struct timer_list *timer = &kctx->soft_job_timeout; -+ ktime_t cur_time = ktime_get(); -+ bool restarting = false; -+ unsigned long lflags; -+ struct list_head *entry, *tmp; ++ if ((gpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) == ++ (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ++ reg->flags |= KBASE_REG_GPU_CACHED; + -+ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); -+ list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { -+ struct kbase_jd_atom *katom = list_entry(entry, -+ struct kbase_jd_atom, queue); -+ s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time, -+ katom->start_timestamp)); ++ if (gpu_flags & UMP_PROT_DEVICE_WR) { ++ reg->flags |= KBASE_REG_GPU_WR; ++ *flags |= BASE_MEM_PROT_GPU_WR; ++ } + -+ if (elapsed_time < (s64)timeout_ms) { -+ restarting = true; -+ continue; -+ } ++ if (gpu_flags & UMP_PROT_DEVICE_RD) { ++ reg->flags |= KBASE_REG_GPU_RD; ++ *flags |= BASE_MEM_PROT_GPU_RD; ++ } + -+ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { -+ case BASE_JD_REQ_SOFT_EVENT_WAIT: -+ /* Take it out of the list to ensure that it -+ * will be cancelled in all cases -+ */ -+ list_del(&katom->queue); ++ /* ump phys block query */ ++ ump_dd_phys_blocks_get_64(umph, &block_count, &block_array); + -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; -+ INIT_WORK(&katom->work, kbasep_soft_event_complete_job); -+ queue_work(kctx->jctx.job_done_wq, &katom->work); -+ break; -+#ifdef CONFIG_MALI_FENCE_DEBUG -+ case BASE_JD_REQ_SOFT_FENCE_WAIT: -+ kbase_fence_debug_timeout(katom); -+ break; -+#endif ++ for (i = 0; i < block_count; i++) { ++ for (j = 0; j < (block_array[i].size >> PAGE_SHIFT); j++) { ++ reg->gpu_alloc->pages[page] = block_array[i].addr + (j << PAGE_SHIFT); ++ page++; + } + } ++ reg->gpu_alloc->nents = *va_pages; ++ reg->extent = 0; + -+ if (restarting) -+ mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms)); -+ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); ++ return reg; ++ ++no_alloc_obj: ++ kfree(reg); ++no_region: ++bad_size: ++ ump_dd_release(umph); ++bad_id: ++bad_flags: ++ return NULL; +} ++#endif /* CONFIG_UMP */ + -+static int kbasep_soft_event_wait(struct kbase_jd_atom *katom) ++#ifdef CONFIG_DMA_SHARED_BUFFER ++static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, ++ int fd, u64 *va_pages, u64 *flags, u32 padding) +{ -+ struct kbase_context *kctx = katom->kctx; -+ unsigned char status; ++ struct kbase_va_region *reg; ++ struct dma_buf *dma_buf; ++ struct dma_buf_attachment *dma_attachment; ++ bool shared_zone = false; + -+ /* The status of this soft-job is stored in jc */ -+ if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) { -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; -+ return 0; -+ } ++ dma_buf = dma_buf_get(fd); ++ if (IS_ERR_OR_NULL(dma_buf)) ++ goto no_buf; + -+ if (status == BASE_JD_SOFT_EVENT_SET) -+ return 0; /* Event already set, nothing to do */ ++ dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev); ++ if (!dma_attachment) ++ goto no_attachment; + -+ kbasep_add_waiting_with_timeout(katom); ++ *va_pages = (PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT) + padding; ++ if (!*va_pages) ++ goto bad_size; + -+ return 1; -+} ++ if (*va_pages > (U64_MAX / PAGE_SIZE)) ++ /* 64-bit address range is the max */ ++ goto bad_size; + -+static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom, -+ unsigned char new_status) -+{ -+ /* Complete jobs waiting on the same event */ -+ struct kbase_context *kctx = katom->kctx; ++ /* ignore SAME_VA */ ++ *flags &= ~BASE_MEM_SAME_VA; + -+ if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) { -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; -+ return; ++ if (*flags & BASE_MEM_IMPORT_SHARED) ++ shared_zone = true; ++ ++#ifdef CONFIG_64BIT ++ if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { ++ /* ++ * 64-bit tasks require us to reserve VA on the CPU that we use ++ * on the GPU. ++ */ ++ shared_zone = true; + } ++#endif + -+ if (new_status == BASE_JD_SOFT_EVENT_SET) -+ kbasep_complete_triggered_soft_events(kctx, katom->jc); -+} ++ if (shared_zone) { ++ *flags |= BASE_MEM_NEED_MMAP; ++ reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA); ++ } else { ++ reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA); ++ } + -+/** -+ * kbase_soft_event_update() - Update soft event state -+ * @kctx: Pointer to context -+ * @event: Event to update -+ * @new_status: New status value of event -+ * -+ * Update the event, and wake up any atoms waiting for the event. -+ * -+ * Return: 0 on success, a negative error code on failure. -+ */ -+int kbase_soft_event_update(struct kbase_context *kctx, -+ u64 event, -+ unsigned char new_status) -+{ -+ int err = 0; ++ if (!reg) ++ goto no_region; + -+ mutex_lock(&kctx->jctx.lock); ++ reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMM); ++ if (IS_ERR_OR_NULL(reg->gpu_alloc)) ++ goto no_alloc_obj; + -+ if (kbasep_write_soft_event_status(kctx, event, new_status)) { -+ err = -ENOENT; -+ goto out; -+ } ++ reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + -+ if (new_status == BASE_JD_SOFT_EVENT_SET) -+ kbasep_complete_triggered_soft_events(kctx, event); ++ /* No pages to map yet */ ++ reg->gpu_alloc->nents = 0; + -+out: -+ mutex_unlock(&kctx->jctx.lock); ++ if (kbase_update_region_flags(kctx, reg, *flags) != 0) ++ goto invalid_flags; + -+ return err; -+} ++ reg->flags &= ~KBASE_REG_FREE; ++ reg->flags |= KBASE_REG_GPU_NX; /* UMM is always No eXecute */ ++ reg->flags &= ~KBASE_REG_GROWABLE; /* UMM cannot be grown */ ++ reg->flags |= KBASE_REG_GPU_CACHED; + -+static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) -+{ -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; -+ if (jd_done_nolock(katom, NULL)) -+ kbase_js_sched_all(katom->kctx->kbdev); -+} ++ if (*flags & BASE_MEM_SECURE) ++ reg->flags |= KBASE_REG_SECURE; + -+struct kbase_debug_copy_buffer { -+ size_t size; -+ struct page **pages; -+ int nr_pages; -+ size_t offset; -+ struct kbase_mem_phy_alloc *gpu_alloc; ++ if (padding) ++ reg->flags |= KBASE_REG_IMPORT_PAD; + -+ struct page **extres_pages; -+ int nr_extres_pages; -+}; ++ reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM; ++ reg->gpu_alloc->imported.umm.sgt = NULL; ++ reg->gpu_alloc->imported.umm.dma_buf = dma_buf; ++ reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment; ++ reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0; ++ reg->extent = 0; + -+static inline void free_user_buffer(struct kbase_debug_copy_buffer *buffer) ++ return reg; ++ ++invalid_flags: ++ kbase_mem_phy_alloc_put(reg->gpu_alloc); ++no_alloc_obj: ++ kfree(reg); ++no_region: ++bad_size: ++ dma_buf_detach(dma_buf, dma_attachment); ++no_attachment: ++ dma_buf_put(dma_buf); ++no_buf: ++ return NULL; ++} ++#endif /* CONFIG_DMA_SHARED_BUFFER */ ++ ++static u32 kbase_get_cache_line_alignment(struct kbase_context *kctx) +{ -+ struct page **pages = buffer->extres_pages; -+ int nr_pages = buffer->nr_extres_pages; ++ u32 cpu_cache_line_size = cache_line_size(); ++ u32 gpu_cache_line_size = ++ (1UL << kctx->kbdev->gpu_props.props.l2_props.log2_line_size); + -+ if (pages) { -+ int i; ++ return ((cpu_cache_line_size > gpu_cache_line_size) ? ++ cpu_cache_line_size : ++ gpu_cache_line_size); ++} + -+ for (i = 0; i < nr_pages; i++) { -+ struct page *pg = pages[i]; ++static struct kbase_va_region *kbase_mem_from_user_buffer( ++ struct kbase_context *kctx, unsigned long address, ++ unsigned long size, u64 *va_pages, u64 *flags) ++{ ++ long i; ++ struct kbase_va_region *reg; ++ long faulted_pages; ++ int zone = KBASE_REG_ZONE_CUSTOM_VA; ++ bool shared_zone = false; ++ u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx); ++ struct kbase_alloc_import_user_buf *user_buf; ++ struct page **pages = NULL; + -+ if (pg) -+ put_page(pg); ++ if ((address & (cache_line_alignment - 1)) != 0 || ++ (size & (cache_line_alignment - 1)) != 0) { ++ /* Coherency must be enabled to handle partial cache lines */ ++ if (*flags & (BASE_MEM_COHERENT_SYSTEM | ++ BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { ++ /* Force coherent system required flag, import will ++ * then fail if coherency isn't available ++ */ ++ *flags |= BASE_MEM_COHERENT_SYSTEM_REQUIRED; ++ } else { ++ dev_warn(kctx->kbdev->dev, ++ "User buffer is not cache line aligned and no coherency enabled\n"); ++ goto bad_size; + } -+ kfree(pages); + } -+} + -+static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) -+{ -+ struct kbase_debug_copy_buffer *buffers = -+ (struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc; -+ unsigned int i; -+ unsigned int nr = katom->nr_extres; ++ *va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) - ++ PFN_DOWN(address); ++ if (!*va_pages) ++ goto bad_size; + -+ if (!buffers) -+ return; ++ if (*va_pages > (UINT64_MAX / PAGE_SIZE)) ++ /* 64-bit address range is the max */ ++ goto bad_size; + -+ kbase_gpu_vm_lock(katom->kctx); -+ for (i = 0; i < nr; i++) { -+ int p; -+ struct kbase_mem_phy_alloc *gpu_alloc = buffers[i].gpu_alloc; ++ /* SAME_VA generally not supported with imported memory (no known use cases) */ ++ *flags &= ~BASE_MEM_SAME_VA; + -+ if (!buffers[i].pages) -+ break; -+ for (p = 0; p < buffers[i].nr_pages; p++) { -+ struct page *pg = buffers[i].pages[p]; ++ if (*flags & BASE_MEM_IMPORT_SHARED) ++ shared_zone = true; + -+ if (pg) -+ put_page(pg); -+ } -+ kfree(buffers[i].pages); -+ if (gpu_alloc) { -+ switch (gpu_alloc->type) { -+ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: -+ { -+ free_user_buffer(&buffers[i]); -+ break; -+ } -+ default: -+ /* Nothing to be done. */ -+ break; -+ } -+ kbase_mem_phy_alloc_put(gpu_alloc); -+ } ++#ifdef CONFIG_64BIT ++ if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { ++ /* ++ * 64-bit tasks require us to reserve VA on the CPU that we use ++ * on the GPU. ++ */ ++ shared_zone = true; + } -+ kbase_gpu_vm_unlock(katom->kctx); -+ kfree(buffers); ++#endif + -+ katom->jc = 0; -+} ++ if (shared_zone) { ++ *flags |= BASE_MEM_NEED_MMAP; ++ zone = KBASE_REG_ZONE_SAME_VA; ++ } + -+static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) -+{ -+ struct kbase_debug_copy_buffer *buffers; -+ struct base_jd_debug_copy_buffer *user_buffers = NULL; -+ unsigned int i; -+ unsigned int nr = katom->nr_extres; -+ int ret = 0; -+ void __user *user_structs = (void __user *)(uintptr_t)katom->jc; ++ reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone); + -+ if (!user_structs) -+ return -EINVAL; ++ if (!reg) ++ goto no_region; + -+ buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL); -+ if (!buffers) { -+ ret = -ENOMEM; -+ katom->jc = 0; -+ goto out_cleanup; -+ } -+ katom->jc = (u64)(uintptr_t)buffers; ++ reg->gpu_alloc = kbase_alloc_create(*va_pages, ++ KBASE_MEM_TYPE_IMPORTED_USER_BUF); ++ if (IS_ERR_OR_NULL(reg->gpu_alloc)) ++ goto no_alloc_obj; + -+ user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL); ++ reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + -+ if (!user_buffers) { -+ ret = -ENOMEM; -+ goto out_cleanup; -+ } ++ if (kbase_update_region_flags(kctx, reg, *flags) != 0) ++ goto invalid_flags; + -+ ret = copy_from_user(user_buffers, user_structs, -+ sizeof(*user_buffers)*nr); -+ if (ret) -+ goto out_cleanup; ++ reg->flags &= ~KBASE_REG_FREE; ++ reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */ ++ reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */ ++ reg->flags &= ~KBASE_REG_CPU_CACHED; + -+ for (i = 0; i < nr; i++) { -+ u64 addr = user_buffers[i].address; -+ u64 page_addr = addr & PAGE_MASK; -+ u64 end_page_addr = addr + user_buffers[i].size - 1; -+ u64 last_page_addr = end_page_addr & PAGE_MASK; -+ int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1; -+ int pinned_pages; -+ struct kbase_va_region *reg; -+ struct base_external_resource user_extres; ++ user_buf = ®->gpu_alloc->imported.user_buf; + -+ if (!addr) -+ continue; ++ user_buf->size = size; ++ user_buf->address = address; ++ user_buf->nr_pages = *va_pages; ++ user_buf->mm = current->mm; ++ user_buf->pages = kmalloc_array(*va_pages, sizeof(struct page *), ++ GFP_KERNEL); + -+ buffers[i].nr_pages = nr_pages; -+ buffers[i].offset = addr & ~PAGE_MASK; -+ if (buffers[i].offset >= PAGE_SIZE) { -+ ret = -EINVAL; -+ goto out_cleanup; -+ } -+ buffers[i].size = user_buffers[i].size; ++ if (!user_buf->pages) ++ goto no_page_array; + -+ buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *), -+ GFP_KERNEL); -+ if (!buffers[i].pages) { -+ ret = -ENOMEM; -+ goto out_cleanup; -+ } ++ /* If the region is coherent with the CPU then the memory is imported ++ * and mapped onto the GPU immediately. ++ * Otherwise get_user_pages is called as a sanity check, but with ++ * NULL as the pages argument which will fault the pages, but not ++ * pin them. The memory will then be pinned only around the jobs that ++ * specify the region as an external resource. ++ */ ++ if (reg->flags & KBASE_REG_SHARE_BOTH) { ++ pages = user_buf->pages; ++ *flags |= KBASE_MEM_IMPORT_HAVE_PAGES; ++ } + -+ pinned_pages = get_user_pages_fast(page_addr, -+ nr_pages, -+ 1, /* Write */ -+ buffers[i].pages); -+ if (pinned_pages < 0) { -+ ret = pinned_pages; -+ goto out_cleanup; -+ } -+ if (pinned_pages != nr_pages) { -+ ret = -EINVAL; -+ goto out_cleanup; -+ } ++ down_read(¤t->mm->mmap_lock); + -+ user_extres = user_buffers[i].extres; -+ if (user_extres.ext_resource == 0ULL) { -+ ret = -EINVAL; -+ goto out_cleanup; -+ } ++ faulted_pages = ++ kbase_get_user_pages(address, *va_pages, reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, pages, NULL); + -+ kbase_gpu_vm_lock(katom->kctx); -+ reg = kbase_region_tracker_find_region_enclosing_address( -+ katom->kctx, user_extres.ext_resource & -+ ~BASE_EXT_RES_ACCESS_EXCLUSIVE); ++ up_read(¤t->mm->mmap_lock); + -+ if (NULL == reg || NULL == reg->gpu_alloc || -+ (reg->flags & KBASE_REG_FREE)) { -+ ret = -EINVAL; -+ goto out_unlock; -+ } ++ if (faulted_pages != *va_pages) ++ goto fault_mismatch; + -+ buffers[i].gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); -+ buffers[i].nr_extres_pages = reg->nr_pages; ++ atomic_inc(¤t->mm->mm_count); + -+ if (reg->nr_pages*PAGE_SIZE != buffers[i].size) -+ dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n"); ++ reg->gpu_alloc->nents = 0; ++ reg->extent = 0; + -+ switch (reg->gpu_alloc->type) { -+ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: -+ { -+ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; -+ unsigned long nr_pages = -+ alloc->imported.user_buf.nr_pages; ++ if (pages) { ++ struct device *dev = kctx->kbdev->dev; ++ unsigned long local_size = user_buf->size; ++ unsigned long offset = user_buf->address & ~PAGE_MASK; ++ phys_addr_t *pa = kbase_get_gpu_phy_pages(reg); + -+ if (alloc->imported.user_buf.mm != current->mm) { -+ ret = -EINVAL; -+ goto out_unlock; -+ } -+ buffers[i].extres_pages = kcalloc(nr_pages, -+ sizeof(struct page *), GFP_KERNEL); -+ if (!buffers[i].extres_pages) { -+ ret = -ENOMEM; -+ goto out_unlock; -+ } ++ /* Top bit signifies that this was pinned on import */ ++ user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT; + -+ ret = get_user_pages_fast( -+ alloc->imported.user_buf.address, -+ nr_pages, 0, -+ buffers[i].extres_pages); -+ if (ret != nr_pages) -+ goto out_unlock; -+ ret = 0; -+ break; -+ } -+ case KBASE_MEM_TYPE_IMPORTED_UMP: -+ { -+ dev_warn(katom->kctx->kbdev->dev, -+ "UMP is not supported for debug_copy jobs\n"); -+ ret = -EINVAL; -+ goto out_unlock; -+ } -+ default: -+ /* Nothing to be done. */ -+ break; ++ for (i = 0; i < faulted_pages; i++) { ++ dma_addr_t dma_addr; ++ unsigned long min; ++ ++ min = MIN(PAGE_SIZE - offset, local_size); ++ dma_addr = dma_map_page(dev, pages[i], ++ offset, min, ++ DMA_BIDIRECTIONAL); ++ if (dma_mapping_error(dev, dma_addr)) ++ goto unwind_dma_map; ++ ++ user_buf->dma_addrs[i] = dma_addr; ++ pa[i] = page_to_phys(pages[i]); ++ ++ local_size -= min; ++ offset = 0; + } -+ kbase_gpu_vm_unlock(katom->kctx); -+ } -+ kfree(user_buffers); + -+ return ret; ++ reg->gpu_alloc->nents = faulted_pages; ++ } + -+out_unlock: -+ kbase_gpu_vm_unlock(katom->kctx); ++ return reg; + -+out_cleanup: -+ kfree(buffers); -+ kfree(user_buffers); ++unwind_dma_map: ++ while (i--) { ++ dma_unmap_page(kctx->kbdev->dev, ++ user_buf->dma_addrs[i], ++ PAGE_SIZE, DMA_BIDIRECTIONAL); ++ } ++fault_mismatch: ++ if (pages) { ++ for (i = 0; i < faulted_pages; i++) ++ put_page(pages[i]); ++ } ++ kfree(user_buf->pages); ++no_page_array: ++invalid_flags: ++ kbase_mem_phy_alloc_put(reg->cpu_alloc); ++ kbase_mem_phy_alloc_put(reg->gpu_alloc); ++no_alloc_obj: ++ kfree(reg); ++no_region: ++bad_size: ++ return NULL; + -+ /* Frees allocated memory for kbase_debug_copy_job struct, including -+ * members, and sets jc to 0 */ -+ kbase_debug_copy_finish(katom); -+ return ret; +} + -+static void kbase_mem_copy_from_extres_page(struct kbase_context *kctx, -+ void *extres_page, struct page **pages, unsigned int nr_pages, -+ unsigned int *target_page_nr, size_t offset, size_t *to_copy) ++ ++u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, ++ u64 nents, struct base_mem_aliasing_info *ai, ++ u64 *num_pages) +{ -+ void *target_page = kmap(pages[*target_page_nr]); -+ size_t chunk = PAGE_SIZE-offset; ++ struct kbase_va_region *reg; ++ u64 gpu_va; ++ size_t i; ++ bool coherent; + -+ lockdep_assert_held(&kctx->reg_lock); ++ KBASE_DEBUG_ASSERT(kctx); ++ KBASE_DEBUG_ASSERT(flags); ++ KBASE_DEBUG_ASSERT(ai); ++ KBASE_DEBUG_ASSERT(num_pages); + -+ if (!target_page) { -+ *target_page_nr += 1; -+ dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job."); -+ return; ++ /* mask to only allowed flags */ ++ *flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | ++ BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL | ++ BASE_MEM_COHERENT_SYSTEM_REQUIRED); ++ ++ if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) { ++ dev_warn(kctx->kbdev->dev, ++ "kbase_mem_alias called with bad flags (%llx)", ++ (unsigned long long)*flags); ++ goto bad_flags; + } ++ coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 || ++ (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0; + -+ chunk = min(chunk, *to_copy); ++ if (!stride) ++ goto bad_stride; + -+ memcpy(target_page + offset, extres_page, chunk); -+ *to_copy -= chunk; ++ if (!nents) ++ goto bad_nents; + -+ kunmap(pages[*target_page_nr]); ++ if ((nents * stride) > (U64_MAX / PAGE_SIZE)) ++ /* 64-bit address range is the max */ ++ goto bad_size; + -+ *target_page_nr += 1; -+ if (*target_page_nr >= nr_pages) -+ return; ++ /* calculate the number of pages this alias will cover */ ++ *num_pages = nents * stride; + -+ target_page = kmap(pages[*target_page_nr]); -+ if (!target_page) { -+ *target_page_nr += 1; -+ dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job."); -+ return; ++#ifdef CONFIG_64BIT ++ if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { ++ /* 64-bit tasks must MMAP anyway, but not expose this address to ++ * clients */ ++ *flags |= BASE_MEM_NEED_MMAP; ++ reg = kbase_alloc_free_region(kctx, 0, *num_pages, ++ KBASE_REG_ZONE_SAME_VA); ++ } else { ++#else ++ if (1) { ++#endif ++ reg = kbase_alloc_free_region(kctx, 0, *num_pages, ++ KBASE_REG_ZONE_CUSTOM_VA); + } + -+ KBASE_DEBUG_ASSERT(target_page); -+ -+ chunk = min(offset, *to_copy); -+ memcpy(target_page, extres_page + PAGE_SIZE-offset, chunk); -+ *to_copy -= chunk; -+ -+ kunmap(pages[*target_page_nr]); -+} -+ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) -+static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, -+ unsigned long page_num, struct page **page) -+{ -+ struct sg_table *sgt = gpu_alloc->imported.umm.sgt; -+ struct sg_page_iter sg_iter; -+ unsigned long page_index = 0; ++ if (!reg) ++ goto no_reg; + -+ if (WARN_ON(gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)) -+ return NULL; ++ /* zero-sized page array, as we don't need one/can support one */ ++ reg->gpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_ALIAS); ++ if (IS_ERR_OR_NULL(reg->gpu_alloc)) ++ goto no_alloc_obj; + -+ if (!sgt) -+ return NULL; ++ reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + -+ if (WARN_ON(page_num >= gpu_alloc->nents)) -+ return NULL; ++ if (kbase_update_region_flags(kctx, reg, *flags) != 0) ++ goto invalid_flags; + -+ for_each_sg_page(sgt->sgl, &sg_iter, sgt->nents, 0) { -+ if (page_index == page_num) { -+ *page = sg_page_iter_page(&sg_iter); ++ reg->gpu_alloc->imported.alias.nents = nents; ++ reg->gpu_alloc->imported.alias.stride = stride; ++ reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents); ++ if (!reg->gpu_alloc->imported.alias.aliased) ++ goto no_aliased_array; + -+ return kmap(*page); -+ } -+ page_index++; -+ } ++ kbase_gpu_vm_lock(kctx); + -+ return NULL; -+} -+#endif ++ /* validate and add src handles */ ++ for (i = 0; i < nents; i++) { ++ if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) { ++ if (ai[i].handle.basep.handle != ++ BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE) ++ goto bad_handle; /* unsupported magic handle */ ++ if (!ai[i].length) ++ goto bad_handle; /* must be > 0 */ ++ if (ai[i].length > stride) ++ goto bad_handle; /* can't be larger than the ++ stride */ ++ reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; ++ } else { ++ struct kbase_va_region *aliasing_reg; ++ struct kbase_mem_phy_alloc *alloc; + -+static int kbase_mem_copy_from_extres(struct kbase_context *kctx, -+ struct kbase_debug_copy_buffer *buf_data) -+{ -+ unsigned int i; -+ unsigned int target_page_nr = 0; -+ struct page **pages = buf_data->pages; -+ u64 offset = buf_data->offset; -+ size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE; -+ size_t to_copy = min(extres_size, buf_data->size); -+ struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc; -+ int ret = 0; ++ aliasing_reg = kbase_region_tracker_find_region_base_address( ++ kctx, ++ (ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT); + -+ KBASE_DEBUG_ASSERT(pages != NULL); ++ /* validate found region */ ++ if (!aliasing_reg) ++ goto bad_handle; /* Not found */ ++ if (aliasing_reg->flags & KBASE_REG_FREE) ++ goto bad_handle; /* Free region */ ++ if (aliasing_reg->flags & KBASE_REG_DONT_NEED) ++ goto bad_handle; /* Ephemeral region */ ++ if (!aliasing_reg->gpu_alloc) ++ goto bad_handle; /* No alloc */ ++ if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) ++ goto bad_handle; /* Not a native alloc */ ++ if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0)) ++ goto bad_handle; ++ /* Non-coherent memory cannot alias ++ coherent memory, and vice versa.*/ + -+ kbase_gpu_vm_lock(kctx); -+ if (!gpu_alloc) { -+ ret = -EINVAL; -+ goto out_unlock; -+ } ++ /* check size against stride */ ++ if (!ai[i].length) ++ goto bad_handle; /* must be > 0 */ ++ if (ai[i].length > stride) ++ goto bad_handle; /* can't be larger than the ++ stride */ + -+ switch (gpu_alloc->type) { -+ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: -+ { -+ for (i = 0; i < buf_data->nr_extres_pages; i++) { -+ struct page *pg = buf_data->extres_pages[i]; -+ void *extres_page = kmap(pg); ++ alloc = aliasing_reg->gpu_alloc; + -+ if (extres_page) -+ kbase_mem_copy_from_extres_page(kctx, -+ extres_page, pages, -+ buf_data->nr_pages, -+ &target_page_nr, -+ offset, &to_copy); ++ /* check against the alloc's size */ ++ if (ai[i].offset > alloc->nents) ++ goto bad_handle; /* beyond end */ ++ if (ai[i].offset + ai[i].length > alloc->nents) ++ goto bad_handle; /* beyond end */ + -+ kunmap(pg); -+ if (target_page_nr >= buf_data->nr_pages) -+ break; ++ reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc); ++ reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; ++ reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset; + } -+ break; + } -+ break; -+#ifdef CONFIG_DMA_SHARED_BUFFER -+ case KBASE_MEM_TYPE_IMPORTED_UMM: { -+ struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf; + -+ KBASE_DEBUG_ASSERT(dma_buf != NULL); -+ KBASE_DEBUG_ASSERT(dma_buf->size == -+ buf_data->nr_extres_pages * PAGE_SIZE); ++#ifdef CONFIG_64BIT ++ if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { ++ /* Bind to a cookie */ ++ if (!kctx->cookies) { ++ dev_err(kctx->kbdev->dev, "No cookies available for allocation!"); ++ goto no_cookie; ++ } ++ /* return a cookie */ ++ gpu_va = __ffs(kctx->cookies); ++ kctx->cookies &= ~(1UL << gpu_va); ++ BUG_ON(kctx->pending_regions[gpu_va]); ++ kctx->pending_regions[gpu_va] = reg; + -+ ret = dma_buf_begin_cpu_access(dma_buf, -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS) -+ 0, buf_data->nr_extres_pages*PAGE_SIZE, ++ /* relocate to correct base */ ++ gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); ++ gpu_va <<= PAGE_SHIFT; ++ } else /* we control the VA */ { ++#else ++ if (1) { +#endif -+ DMA_FROM_DEVICE); -+ if (ret) -+ goto out_unlock; ++ if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) { ++ dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU"); ++ goto no_mmap; ++ } ++ /* return real GPU VA */ ++ gpu_va = reg->start_pfn << PAGE_SHIFT; ++ } + -+ for (i = 0; i < buf_data->nr_extres_pages; i++) { ++ reg->flags &= ~KBASE_REG_FREE; ++ reg->flags &= ~KBASE_REG_GROWABLE; + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) -+ struct page *pg; -+ void *extres_page = dma_buf_kmap_page(gpu_alloc, i, &pg); -+#else -+ void *extres_page = dma_buf_kmap(dma_buf, i); -+#endif ++ kbase_gpu_vm_unlock(kctx); + -+ if (extres_page) -+ kbase_mem_copy_from_extres_page(kctx, -+ extres_page, pages, -+ buf_data->nr_pages, -+ &target_page_nr, -+ offset, &to_copy); ++ return gpu_va; + -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) -+ kunmap(pg); -+#else -+ dma_buf_kunmap(dma_buf, i, extres_page); -+#endif -+ if (target_page_nr >= buf_data->nr_pages) -+ break; -+ } -+ dma_buf_end_cpu_access(dma_buf, -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS) -+ 0, buf_data->nr_extres_pages*PAGE_SIZE, -+#endif -+ DMA_FROM_DEVICE); -+ break; -+ } ++#ifdef CONFIG_64BIT ++no_cookie: +#endif -+ default: -+ ret = -EINVAL; -+ } -+out_unlock: ++no_mmap: ++bad_handle: + kbase_gpu_vm_unlock(kctx); -+ return ret; -+ ++no_aliased_array: ++invalid_flags: ++ kbase_mem_phy_alloc_put(reg->cpu_alloc); ++ kbase_mem_phy_alloc_put(reg->gpu_alloc); ++no_alloc_obj: ++ kfree(reg); ++no_reg: ++bad_size: ++bad_nents: ++bad_stride: ++bad_flags: ++ return 0; +} + -+static int kbase_debug_copy(struct kbase_jd_atom *katom) ++int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, ++ void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, ++ u64 *flags) +{ -+ struct kbase_debug_copy_buffer *buffers = -+ (struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc; -+ unsigned int i; -+ -+ for (i = 0; i < katom->nr_extres; i++) { -+ int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]); ++ struct kbase_va_region *reg; + -+ if (res) -+ return res; -+ } ++ KBASE_DEBUG_ASSERT(kctx); ++ KBASE_DEBUG_ASSERT(gpu_va); ++ KBASE_DEBUG_ASSERT(va_pages); ++ KBASE_DEBUG_ASSERT(flags); + -+ return 0; -+} ++#ifdef CONFIG_64BIT ++ if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) ++ *flags |= BASE_MEM_SAME_VA; ++#endif + -+static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) -+{ -+ __user void *data = (__user void *)(uintptr_t) katom->jc; -+ struct base_jit_alloc_info *info; -+ struct kbase_context *kctx = katom->kctx; -+ int ret; ++ if (!kbase_check_import_flags(*flags)) { ++ dev_warn(kctx->kbdev->dev, ++ "kbase_mem_import called with bad flags (%llx)", ++ (unsigned long long)*flags); ++ goto bad_flags; ++ } + -+ /* Fail the job if there is no info structure */ -+ if (!data) { -+ ret = -EINVAL; -+ goto fail; ++ if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && ++ !kbase_device_is_cpu_coherent(kctx->kbdev)) { ++ dev_warn(kctx->kbdev->dev, ++ "kbase_mem_import call required coherent mem when unavailable"); ++ goto bad_flags; ++ } ++ if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && ++ !kbase_device_is_cpu_coherent(kctx->kbdev)) { ++ /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ ++ *flags &= ~BASE_MEM_COHERENT_SYSTEM; + } + -+ /* Copy the information for safe access and future storage */ -+ info = kzalloc(sizeof(*info), GFP_KERNEL); -+ if (!info) { -+ ret = -ENOMEM; -+ goto fail; ++ if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) { ++ dev_warn(kctx->kbdev->dev, ++ "padding is only supported for UMM"); ++ goto bad_flags; + } + -+ if (copy_from_user(info, data, sizeof(*info)) != 0) { -+ ret = -EINVAL; -+ goto free_info; ++ switch (type) { ++#ifdef CONFIG_UMP ++ case BASE_MEM_IMPORT_TYPE_UMP: { ++ ump_secure_id id; ++ ++ if (get_user(id, (ump_secure_id __user *)phandle)) ++ reg = NULL; ++ else ++ reg = kbase_mem_from_ump(kctx, id, va_pages, flags); + } ++ break; ++#endif /* CONFIG_UMP */ ++#ifdef CONFIG_DMA_SHARED_BUFFER ++ case BASE_MEM_IMPORT_TYPE_UMM: { ++ int fd; + -+ /* If the ID is zero then fail the job */ -+ if (info->id == 0) { -+ ret = -EINVAL; -+ goto free_info; ++ if (get_user(fd, (int __user *)phandle)) ++ reg = NULL; ++ else ++ reg = kbase_mem_from_umm(kctx, fd, va_pages, flags, ++ padding); + } ++ break; ++#endif /* CONFIG_DMA_SHARED_BUFFER */ ++ case BASE_MEM_IMPORT_TYPE_USER_BUFFER: { ++ struct base_mem_import_user_buffer user_buffer; ++ void __user *uptr; + -+ /* Sanity check that the PA fits within the VA */ -+ if (info->va_pages < info->commit_pages) { -+ ret = -EINVAL; -+ goto free_info; ++ if (copy_from_user(&user_buffer, phandle, ++ sizeof(user_buffer))) { ++ reg = NULL; ++ } else { ++#ifdef CONFIG_COMPAT ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) ++ uptr = compat_ptr(user_buffer.ptr.compat_value); ++ else ++#endif ++ uptr = user_buffer.ptr.value; ++ ++ reg = kbase_mem_from_user_buffer(kctx, ++ (unsigned long)uptr, user_buffer.length, ++ va_pages, flags); ++ } ++ break; ++ } ++ default: { ++ reg = NULL; ++ break; ++ } + } + -+ /* Ensure the GPU address is correctly aligned */ -+ if ((info->gpu_alloc_addr & 0x7) != 0) { -+ ret = -EINVAL; -+ goto free_info; ++ if (!reg) ++ goto no_reg; ++ ++ kbase_gpu_vm_lock(kctx); ++ ++ /* mmap needed to setup VA? */ ++ if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) { ++ /* Bind to a cookie */ ++ if (!kctx->cookies) ++ goto no_cookie; ++ /* return a cookie */ ++ *gpu_va = __ffs(kctx->cookies); ++ kctx->cookies &= ~(1UL << *gpu_va); ++ BUG_ON(kctx->pending_regions[*gpu_va]); ++ kctx->pending_regions[*gpu_va] = reg; ++ ++ /* relocate to correct base */ ++ *gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); ++ *gpu_va <<= PAGE_SHIFT; ++ ++ } else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES) { ++ /* we control the VA, mmap now to the GPU */ ++ if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0) ++ goto no_gpu_va; ++ /* return real GPU VA */ ++ *gpu_va = reg->start_pfn << PAGE_SHIFT; ++ } else { ++ /* we control the VA, but nothing to mmap yet */ ++ if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0) ++ goto no_gpu_va; ++ /* return real GPU VA */ ++ *gpu_va = reg->start_pfn << PAGE_SHIFT; + } + -+ /* Replace the user pointer with our kernel allocated info structure */ -+ katom->jc = (u64)(uintptr_t) info; -+ katom->jit_blocked = false; ++ /* clear out private flags */ ++ *flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1); + -+ lockdep_assert_held(&kctx->jctx.lock); -+ list_add_tail(&katom->jit_node, &kctx->jit_atoms_head); ++ kbase_gpu_vm_unlock(kctx); + -+ /* -+ * Note: -+ * The provided info->gpu_alloc_addr isn't validated here as -+ * userland can cache allocations which means that even -+ * though the region is valid it doesn't represent the -+ * same thing it used to. -+ * -+ * Complete validation of va_pages, commit_pages and extent -+ * isn't done here as it will be done during the call to -+ * kbase_mem_alloc. -+ */ + return 0; + -+free_info: -+ kfree(info); -+fail: -+ katom->jc = 0; ++no_gpu_va: ++no_cookie: ++ kbase_gpu_vm_unlock(kctx); ++ kbase_mem_phy_alloc_put(reg->cpu_alloc); ++ kbase_mem_phy_alloc_put(reg->gpu_alloc); ++ kfree(reg); ++no_reg: ++bad_flags: ++ *gpu_va = 0; ++ *va_pages = 0; ++ *flags = 0; ++ return -ENOMEM; ++} ++ ++int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, ++ struct kbase_va_region *reg, ++ u64 new_pages, u64 old_pages) ++{ ++ phys_addr_t *phy_pages; ++ u64 delta = new_pages - old_pages; ++ int ret = 0; ++ ++ lockdep_assert_held(&kctx->reg_lock); ++ ++ /* Map the new pages into the GPU */ ++ phy_pages = kbase_get_gpu_phy_pages(reg); ++ ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages, ++ phy_pages + old_pages, delta, reg->flags); ++ + return ret; +} + -+static u8 kbase_jit_free_get_id(struct kbase_jd_atom *katom) ++static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, ++ struct kbase_va_region *reg, ++ u64 new_pages, u64 old_pages) +{ -+ if (WARN_ON(katom->core_req != BASE_JD_REQ_SOFT_JIT_FREE)) -+ return 0; ++ u64 gpu_va_start = reg->start_pfn; + -+ return (u8) katom->jc; ++ if (new_pages == old_pages) ++ /* Nothing to do */ ++ return; ++ ++ unmap_mapping_range(kctx->filp->f_inode->i_mapping, ++ (gpu_va_start + new_pages)<kctx; -+ struct base_jit_alloc_info *info; -+ struct kbase_va_region *reg; -+ struct kbase_vmap_struct mapping; -+ u64 *ptr, new_addr; ++ u64 delta = old_pages - new_pages; ++ int ret = 0; + -+ if (katom->jit_blocked) { -+ list_del(&katom->queue); -+ katom->jit_blocked = false; -+ } ++ ret = kbase_mmu_teardown_pages(kctx, ++ reg->start_pfn + new_pages, delta); + -+ info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc; ++ return ret; ++} + -+ /* The JIT ID is still in use so fail the allocation */ -+ if (kctx->jit_alloc[info->id]) { -+ katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; -+ return 0; -+ } ++int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) ++{ ++ u64 old_pages; ++ u64 delta; ++ int res = -EINVAL; ++ struct kbase_va_region *reg; ++ bool read_locked = false; + -+ /* Create a JIT allocation */ -+ reg = kbase_jit_allocate(kctx, info); -+ if (!reg) { -+ struct kbase_jd_atom *jit_atom; -+ bool can_block = false; ++ KBASE_DEBUG_ASSERT(kctx); ++ KBASE_DEBUG_ASSERT(gpu_addr != 0); + -+ lockdep_assert_held(&kctx->jctx.lock); ++ if (gpu_addr & ~PAGE_MASK) { ++ dev_warn(kctx->kbdev->dev, "kbase:mem_commit: gpu_addr: passed parameter is invalid"); ++ return -EINVAL; ++ } + -+ jit_atom = list_first_entry(&kctx->jit_atoms_head, -+ struct kbase_jd_atom, jit_node); ++ down_write(¤t->mm->mmap_lock); ++ kbase_gpu_vm_lock(kctx); + -+ list_for_each_entry(jit_atom, &kctx->jit_atoms_head, jit_node) { -+ if (jit_atom == katom) -+ break; -+ if (jit_atom->core_req == BASE_JD_REQ_SOFT_JIT_FREE) { -+ u8 free_id = kbase_jit_free_get_id(jit_atom); ++ /* Validate the region */ ++ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); ++ if (!reg || (reg->flags & KBASE_REG_FREE)) ++ goto out_unlock; + -+ if (free_id && kctx->jit_alloc[free_id]) { -+ /* A JIT free which is active and -+ * submitted before this atom -+ */ -+ can_block = true; -+ break; -+ } -+ } -+ } ++ KBASE_DEBUG_ASSERT(reg->cpu_alloc); ++ KBASE_DEBUG_ASSERT(reg->gpu_alloc); + -+ if (!can_block) { -+ /* Mark the allocation so we know it's in use even if -+ * the allocation itself fails. -+ */ -+ kctx->jit_alloc[info->id] = -+ (struct kbase_va_region *) -1; ++ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) ++ goto out_unlock; + -+ katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; -+ return 0; -+ } ++ if (0 == (reg->flags & KBASE_REG_GROWABLE)) ++ goto out_unlock; + -+ /* There are pending frees for an active allocation -+ * so we should wait to see whether they free the memory. -+ * Add to the beginning of the list to ensure that the atom is -+ * processed only once in kbase_jit_free_finish -+ */ -+ list_add(&katom->queue, &kctx->jit_pending_alloc); -+ katom->jit_blocked = true; ++ /* Would overflow the VA region */ ++ if (new_pages > reg->nr_pages) ++ goto out_unlock; + -+ return 1; ++ /* can't be mapped more than once on the GPU */ ++ if (atomic_read(®->gpu_alloc->gpu_mappings) > 1) ++ goto out_unlock; ++ /* can't grow regions which are ephemeral */ ++ if (reg->flags & KBASE_REG_DONT_NEED) ++ goto out_unlock; ++ ++ if (new_pages == reg->gpu_alloc->nents) { ++ /* no change */ ++ res = 0; ++ goto out_unlock; + } + -+ /* -+ * Write the address of the JIT allocation to the user provided -+ * GPU allocation. -+ */ -+ ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr), -+ &mapping); -+ if (!ptr) { ++ old_pages = kbase_reg_current_backed_size(reg); ++ if (new_pages > old_pages) { ++ delta = new_pages - old_pages; ++ + /* -+ * Leave the allocation "live" as the JIT free jit will be -+ * submitted anyway. ++ * No update to the mm so downgrade the writer lock to a read ++ * lock so other readers aren't blocked after this point. + */ -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ return 0; -+ } -+ -+ new_addr = reg->start_pfn << PAGE_SHIFT; -+ *ptr = new_addr; -+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT( -+ katom, info->gpu_alloc_addr, new_addr); -+ kbase_vunmap(kctx, &mapping); ++ downgrade_write(¤t->mm->mmap_lock); ++ read_locked = true; + -+ katom->event_code = BASE_JD_EVENT_DONE; ++ /* Allocate some more pages */ ++ if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) { ++ res = -ENOMEM; ++ goto out_unlock; ++ } ++ if (reg->cpu_alloc != reg->gpu_alloc) { ++ if (kbase_alloc_phy_pages_helper( ++ reg->gpu_alloc, delta) != 0) { ++ res = -ENOMEM; ++ kbase_free_phy_pages_helper(reg->cpu_alloc, ++ delta); ++ goto out_unlock; ++ } ++ } + -+ /* -+ * Bind it to the user provided ID. Do this last so we can check for -+ * the JIT free racing this JIT alloc job. -+ */ -+ kctx->jit_alloc[info->id] = reg; ++ /* No update required for CPU mappings, that's done on fault. */ + -+ return 0; -+} ++ /* Update GPU mapping. */ ++ res = kbase_mem_grow_gpu_mapping(kctx, reg, ++ new_pages, old_pages); + -+static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom) -+{ -+ struct base_jit_alloc_info *info; ++ /* On error free the new pages */ ++ if (res) { ++ kbase_free_phy_pages_helper(reg->cpu_alloc, delta); ++ if (reg->cpu_alloc != reg->gpu_alloc) ++ kbase_free_phy_pages_helper(reg->gpu_alloc, ++ delta); ++ res = -ENOMEM; ++ goto out_unlock; ++ } ++ } else { ++ delta = old_pages - new_pages; + -+ lockdep_assert_held(&katom->kctx->jctx.lock); ++ /* Update all CPU mapping(s) */ ++ kbase_mem_shrink_cpu_mapping(kctx, reg, ++ new_pages, old_pages); + -+ /* Remove atom from jit_atoms_head list */ -+ list_del(&katom->jit_node); ++ /* Update the GPU mapping */ ++ res = kbase_mem_shrink_gpu_mapping(kctx, reg, ++ new_pages, old_pages); ++ if (res) { ++ res = -ENOMEM; ++ goto out_unlock; ++ } + -+ if (katom->jit_blocked) { -+ list_del(&katom->queue); -+ katom->jit_blocked = false; ++ kbase_free_phy_pages_helper(reg->cpu_alloc, delta); ++ if (reg->cpu_alloc != reg->gpu_alloc) ++ kbase_free_phy_pages_helper(reg->gpu_alloc, delta); + } + -+ info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc; -+ /* Free the info structure */ -+ kfree(info); ++out_unlock: ++ kbase_gpu_vm_unlock(kctx); ++ if (read_locked) ++ up_read(¤t->mm->mmap_lock); ++ else ++ up_write(¤t->mm->mmap_lock); ++ ++ return res; +} + -+static int kbase_jit_free_prepare(struct kbase_jd_atom *katom) ++static void kbase_cpu_vm_open(struct vm_area_struct *vma) +{ -+ struct kbase_context *kctx = katom->kctx; -+ -+ lockdep_assert_held(&kctx->jctx.lock); -+ list_add_tail(&katom->jit_node, &kctx->jit_atoms_head); ++ struct kbase_cpu_mapping *map = vma->vm_private_data; + -+ return 0; ++ KBASE_DEBUG_ASSERT(map); ++ KBASE_DEBUG_ASSERT(map->count > 0); ++ /* non-atomic as we're under Linux' mm lock */ ++ map->count++; +} + -+static void kbase_jit_free_process(struct kbase_jd_atom *katom) ++static void kbase_cpu_vm_close(struct vm_area_struct *vma) +{ -+ struct kbase_context *kctx = katom->kctx; -+ u8 id = kbase_jit_free_get_id(katom); ++ struct kbase_cpu_mapping *map = vma->vm_private_data; + -+ /* -+ * If the ID is zero or it is not in use yet then fail the job. -+ */ -+ if ((id == 0) || (kctx->jit_alloc[id] == NULL)) { -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ KBASE_DEBUG_ASSERT(map); ++ KBASE_DEBUG_ASSERT(map->count > 0); ++ ++ /* non-atomic as we're under Linux' mm lock */ ++ if (--map->count) + return; ++ ++ KBASE_DEBUG_ASSERT(map->kctx); ++ KBASE_DEBUG_ASSERT(map->alloc); ++ ++ kbase_gpu_vm_lock(map->kctx); ++ ++ if (map->free_on_close) { ++ KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) == ++ KBASE_REG_ZONE_SAME_VA); ++ /* Avoid freeing memory on the process death which results in ++ * GPU Page Fault. Memory will be freed in kbase_destroy_context ++ */ ++ if (!(current->flags & PF_EXITING)) ++ kbase_mem_free_region(map->kctx, map->region); + } + -+ /* -+ * If the ID is valid but the allocation request failed still succeed -+ * this soft job but don't try and free the allocation. -+ */ -+ if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1) -+ kbase_jit_free(kctx, kctx->jit_alloc[id]); ++ list_del(&map->mappings_list); + -+ kctx->jit_alloc[id] = NULL; ++ kbase_gpu_vm_unlock(map->kctx); ++ ++ kbase_mem_phy_alloc_put(map->alloc); ++ kfree(map); +} + -+static void kbasep_jit_free_finish_worker(struct work_struct *work) ++KBASE_EXPORT_TEST_API(kbase_cpu_vm_close); ++ ++ ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)) ++static vm_fault_t kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ -+ struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, -+ work); -+ struct kbase_context *kctx = katom->kctx; -+ int resched; ++#else ++static vm_fault_t kbase_cpu_vm_fault(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++#endif ++ struct kbase_cpu_mapping *map = vma->vm_private_data; ++ pgoff_t rel_pgoff; ++ size_t i; ++ pgoff_t addr; ++ vm_fault_t ret = VM_FAULT_SIGBUS; + -+ mutex_lock(&kctx->jctx.lock); -+ kbase_finish_soft_job(katom); -+ resched = jd_done_nolock(katom, NULL); -+ mutex_unlock(&kctx->jctx.lock); ++ KBASE_DEBUG_ASSERT(map); ++ KBASE_DEBUG_ASSERT(map->count > 0); ++ KBASE_DEBUG_ASSERT(map->kctx); ++ KBASE_DEBUG_ASSERT(map->alloc); + -+ if (resched) -+ kbase_js_sched_all(kctx->kbdev); -+} ++ rel_pgoff = vmf->pgoff - map->region->start_pfn; + -+static void kbase_jit_free_finish(struct kbase_jd_atom *katom) -+{ -+ struct list_head *i, *tmp; -+ struct kbase_context *kctx = katom->kctx; ++ kbase_gpu_vm_lock(map->kctx); ++ if (rel_pgoff >= map->alloc->nents) ++ goto locked_bad_fault; + -+ lockdep_assert_held(&kctx->jctx.lock); -+ /* Remove this atom from the kctx->jit_atoms_head list */ -+ list_del(&katom->jit_node); ++ /* Fault on access to DONT_NEED regions */ ++ if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED)) ++ goto locked_bad_fault; + -+ list_for_each_safe(i, tmp, &kctx->jit_pending_alloc) { -+ struct kbase_jd_atom *pending_atom = list_entry(i, -+ struct kbase_jd_atom, queue); -+ if (kbase_jit_allocate_process(pending_atom) == 0) { -+ /* Atom has completed */ -+ INIT_WORK(&pending_atom->work, -+ kbasep_jit_free_finish_worker); -+ queue_work(kctx->jctx.job_done_wq, &pending_atom->work); -+ } ++ /* insert all valid pages from the fault location */ ++ i = rel_pgoff; ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++ addr = (pgoff_t)((uintptr_t)vmf->virtual_address >> PAGE_SHIFT); ++#else ++ addr = (pgoff_t)(vmf->address >> PAGE_SHIFT); ++#endif ++ while (i < map->alloc->nents && (addr < vma->vm_end >> PAGE_SHIFT)) { ++ ret = vmf_insert_pfn(vma, addr << PAGE_SHIFT, ++ PFN_DOWN(map->alloc->pages[i])); ++ if (ret != VM_FAULT_NOPAGE) ++ goto locked_bad_fault; ++ ++ i++; addr++; + } ++ ++ kbase_gpu_vm_unlock(map->kctx); ++ /* we resolved it, nothing for VM to do */ ++ return VM_FAULT_NOPAGE; ++ ++locked_bad_fault: ++ kbase_gpu_vm_unlock(map->kctx); ++ return ret; +} + -+static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) ++const struct vm_operations_struct kbase_vm_ops = { ++ .open = kbase_cpu_vm_open, ++ .close = kbase_cpu_vm_close, ++ .fault = kbase_cpu_vm_fault ++}; ++ ++static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vma, void *kaddr, size_t nr_pages, unsigned long aligned_offset, int free_on_close) +{ -+ __user struct base_external_resource_list *user_ext_res; -+ struct base_external_resource_list *ext_res; -+ u64 count = 0; -+ size_t copy_size; -+ int ret; ++ struct kbase_cpu_mapping *map; ++ phys_addr_t *page_array; ++ int err = 0; ++ int i; + -+ user_ext_res = (__user struct base_external_resource_list *) -+ (uintptr_t) katom->jc; ++ map = kzalloc(sizeof(*map), GFP_KERNEL); + -+ /* Fail the job if there is no info structure */ -+ if (!user_ext_res) { -+ ret = -EINVAL; -+ goto fail; ++ if (!map) { ++ WARN_ON(1); ++ err = -ENOMEM; ++ goto out; + } + -+ if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) { -+ ret = -EINVAL; -+ goto fail; -+ } ++ /* ++ * VM_DONTCOPY - don't make this mapping available in fork'ed processes ++ * VM_DONTEXPAND - disable mremap on this region ++ * VM_IO - disables paging ++ * VM_DONTDUMP - Don't include in core dumps (3.7 only) ++ * VM_MIXEDMAP - Support mixing struct page*s and raw pfns. ++ * This is needed to support using the dedicated and ++ * the OS based memory backends together. ++ */ ++ /* ++ * This will need updating to propagate coherency flags ++ * See MIDBASE-1057 ++ */ + -+ /* Is the number of external resources in range? */ -+ if (!count || count > BASE_EXT_RES_COUNT_MAX) { -+ ret = -EINVAL; -+ goto fail; ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) ++ vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO); ++#else ++ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO; ++#endif ++ vma->vm_ops = &kbase_vm_ops; ++ vma->vm_private_data = map; ++ ++ page_array = kbase_get_cpu_phy_pages(reg); ++ ++ if (!(reg->flags & KBASE_REG_CPU_CACHED) && ++ (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) { ++ /* We can't map vmalloc'd memory uncached. ++ * Other memory will have been returned from ++ * kbase_mem_pool which would be ++ * suitable for mapping uncached. ++ */ ++ BUG_ON(kaddr); ++ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + } + -+ /* Copy the information for safe access and future storage */ -+ copy_size = sizeof(*ext_res); -+ copy_size += sizeof(struct base_external_resource) * (count - 1); -+ ext_res = kzalloc(copy_size, GFP_KERNEL); -+ if (!ext_res) { -+ ret = -ENOMEM; -+ goto fail; ++ if (!kaddr) { ++ unsigned long addr = vma->vm_start + aligned_offset; ++ u64 start_off = vma->vm_pgoff - reg->start_pfn + ++ (aligned_offset>>PAGE_SHIFT); ++ ++ vm_flags_set(vma, VM_PFNMAP); ++ for (i = 0; i < nr_pages; i++) { ++ unsigned long pfn = PFN_DOWN(page_array[i + start_off]); ++ vm_fault_t ret; ++ ++ ret = vmf_insert_pfn(vma, addr, pfn); ++ if (WARN_ON(ret != VM_FAULT_NOPAGE)) { ++ if (ret == VM_FAULT_OOM) ++ err = -ENOMEM; ++ else ++ err = -EFAULT; ++ break; ++ } ++ ++ addr += PAGE_SIZE; ++ } ++ } else { ++ WARN_ON(aligned_offset); ++ /* MIXEDMAP so we can vfree the kaddr early and not track it after map time */ ++ vm_flags_set(vma, VM_MIXEDMAP); ++ /* vmalloc remaping is easy... */ ++ err = remap_vmalloc_range(vma, kaddr, 0); ++ WARN_ON(err); + } + -+ if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) { -+ ret = -EINVAL; -+ goto free_info; ++ if (err) { ++ kfree(map); ++ goto out; + } + -+ /* -+ * Overwrite the count with the first value incase it was changed -+ * after the fact. -+ */ -+ ext_res->count = count; ++ map->region = reg; ++ map->free_on_close = free_on_close; ++ map->kctx = reg->kctx; ++ map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); ++ map->count = 1; /* start with one ref */ + -+ /* -+ * Replace the user pointer with our kernel allocated -+ * ext_res structure. -+ */ -+ katom->jc = (u64)(uintptr_t) ext_res; ++ if (reg->flags & KBASE_REG_CPU_CACHED) ++ map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; + -+ return 0; ++ list_add(&map->mappings_list, &map->alloc->mappings); + -+free_info: -+ kfree(ext_res); -+fail: -+ return ret; ++ out: ++ return err; +} + -+static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) ++static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kaddr) +{ -+ struct base_external_resource_list *ext_res; -+ int i; -+ bool failed = false; ++ struct kbase_va_region *new_reg; ++ u32 nr_pages; ++ size_t size; ++ int err = 0; ++ u32 *tb; ++ int owns_tb = 1; + -+ ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc; -+ if (!ext_res) -+ goto failed_jc; ++ dev_dbg(kctx->kbdev->dev, "in %s\n", __func__); ++ size = (vma->vm_end - vma->vm_start); ++ nr_pages = size >> PAGE_SHIFT; + -+ kbase_gpu_vm_lock(katom->kctx); ++ if (!kctx->jctx.tb) { ++ KBASE_DEBUG_ASSERT(0 != size); ++ tb = vmalloc_user(size); + -+ for (i = 0; i < ext_res->count; i++) { -+ u64 gpu_addr; ++ if (NULL == tb) { ++ err = -ENOMEM; ++ goto out; ++ } + -+ gpu_addr = ext_res->ext_res[i].ext_resource & -+ ~BASE_EXT_RES_ACCESS_EXCLUSIVE; -+ if (map) { -+ if (!kbase_sticky_resource_acquire(katom->kctx, -+ gpu_addr)) -+ goto failed_loop; -+ } else -+ if (!kbase_sticky_resource_release(katom->kctx, NULL, -+ gpu_addr)) -+ failed = true; ++ err = kbase_device_trace_buffer_install(kctx, tb, size); ++ if (err) { ++ vfree(tb); ++ goto out; ++ } ++ } else { ++ err = -EINVAL; ++ goto out; + } + -+ /* -+ * In the case of unmap we continue unmapping other resources in the -+ * case of failure but will always report failure if _any_ unmap -+ * request fails. -+ */ -+ if (failed) -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ else -+ katom->event_code = BASE_JD_EVENT_DONE; ++ *kaddr = kctx->jctx.tb; + -+ kbase_gpu_vm_unlock(katom->kctx); ++ new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA); ++ if (!new_reg) { ++ err = -ENOMEM; ++ WARN_ON(1); ++ goto out_no_region; ++ } + -+ return; ++ new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_TB); ++ if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) { ++ err = -ENOMEM; ++ new_reg->cpu_alloc = NULL; ++ WARN_ON(1); ++ goto out_no_alloc; ++ } + -+failed_loop: -+ while (--i > 0) { -+ u64 gpu_addr; ++ new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc); + -+ gpu_addr = ext_res->ext_res[i].ext_resource & -+ ~BASE_EXT_RES_ACCESS_EXCLUSIVE; ++ new_reg->cpu_alloc->imported.kctx = kctx; ++ new_reg->flags &= ~KBASE_REG_FREE; ++ new_reg->flags |= KBASE_REG_CPU_CACHED; + -+ kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr); ++ /* alloc now owns the tb */ ++ owns_tb = 0; ++ ++ if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) { ++ err = -ENOMEM; ++ WARN_ON(1); ++ goto out_no_va_region; + } + -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ kbase_gpu_vm_unlock(katom->kctx); ++ *reg = new_reg; + -+failed_jc: -+ return; -+} ++ /* map read only, noexec */ ++ vm_flags_clear(vma, (VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC)); ++ /* the rest of the flags is added by the cpu_mmap handler */ + -+static void kbase_ext_res_finish(struct kbase_jd_atom *katom) -+{ -+ struct base_external_resource_list *ext_res; ++ dev_dbg(kctx->kbdev->dev, "%s done\n", __func__); ++ return 0; + -+ ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc; -+ /* Free the info structure */ -+ kfree(ext_res); ++out_no_va_region: ++out_no_alloc: ++ kbase_free_alloced_region(new_reg); ++out_no_region: ++ if (owns_tb) { ++ kbase_device_trace_buffer_uninstall(kctx); ++ vfree(tb); ++ } ++out: ++ return err; +} + -+int kbase_process_soft_job(struct kbase_jd_atom *katom) ++static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr) +{ -+ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { -+ case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: -+ return kbase_dump_cpu_gpu_time(katom); ++ struct kbase_va_region *new_reg; ++ void *kaddr; ++ u32 nr_pages; ++ size_t size; ++ int err = 0; + -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ case BASE_JD_REQ_SOFT_FENCE_TRIGGER: -+ katom->event_code = kbase_sync_fence_out_trigger(katom, -+ katom->event_code == BASE_JD_EVENT_DONE ? -+ 0 : -EFAULT); -+ break; -+ case BASE_JD_REQ_SOFT_FENCE_WAIT: -+ { -+ int ret = kbase_sync_fence_in_wait(katom); ++ dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n"); ++ size = (vma->vm_end - vma->vm_start); ++ nr_pages = size >> PAGE_SHIFT; + -+ if (ret == 1) { -+#ifdef CONFIG_MALI_FENCE_DEBUG -+ kbasep_add_waiting_with_timeout(katom); -+#else -+ kbasep_add_waiting_soft_job(katom); -+#endif -+ } -+ return ret; ++ kaddr = kbase_mmu_dump(kctx, nr_pages); ++ ++ if (!kaddr) { ++ err = -ENOMEM; ++ goto out; + } -+#endif + -+ case BASE_JD_REQ_SOFT_REPLAY: -+ return kbase_replay_process(katom); -+ case BASE_JD_REQ_SOFT_EVENT_WAIT: -+ return kbasep_soft_event_wait(katom); -+ case BASE_JD_REQ_SOFT_EVENT_SET: -+ kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET); -+ break; -+ case BASE_JD_REQ_SOFT_EVENT_RESET: -+ kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET); -+ break; -+ case BASE_JD_REQ_SOFT_DEBUG_COPY: -+ { -+ int res = kbase_debug_copy(katom); ++ new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA); ++ if (!new_reg) { ++ err = -ENOMEM; ++ WARN_ON(1); ++ goto out; ++ } + -+ if (res) -+ katom->event_code = BASE_JD_EVENT_JOB_INVALID; -+ break; ++ new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_RAW); ++ if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) { ++ err = -ENOMEM; ++ new_reg->cpu_alloc = NULL; ++ WARN_ON(1); ++ goto out_no_alloc; + } -+ case BASE_JD_REQ_SOFT_JIT_ALLOC: -+ return kbase_jit_allocate_process(katom); -+ case BASE_JD_REQ_SOFT_JIT_FREE: -+ kbase_jit_free_process(katom); -+ break; -+ case BASE_JD_REQ_SOFT_EXT_RES_MAP: -+ kbase_ext_res_process(katom, true); -+ break; -+ case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: -+ kbase_ext_res_process(katom, false); -+ break; ++ ++ new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc); ++ ++ new_reg->flags &= ~KBASE_REG_FREE; ++ new_reg->flags |= KBASE_REG_CPU_CACHED; ++ if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) { ++ err = -ENOMEM; ++ WARN_ON(1); ++ goto out_va_region; + } + -+ /* Atom is complete */ ++ *kmap_addr = kaddr; ++ *reg = new_reg; ++ ++ dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n"); + return 0; ++ ++out_no_alloc: ++out_va_region: ++ kbase_free_alloced_region(new_reg); ++out: ++ return err; +} + -+void kbase_cancel_soft_job(struct kbase_jd_atom *katom) ++ ++void kbase_os_mem_map_lock(struct kbase_context *kctx) +{ -+ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ case BASE_JD_REQ_SOFT_FENCE_WAIT: -+ kbase_sync_fence_in_cancel_wait(katom); -+ break; -+#endif -+ case BASE_JD_REQ_SOFT_EVENT_WAIT: -+ kbasep_soft_event_cancel_job(katom); -+ break; -+ default: -+ /* This soft-job doesn't support cancellation! */ -+ KBASE_DEBUG_ASSERT(0); -+ } ++ struct mm_struct *mm = current->mm; ++ (void)kctx; ++ down_read(&mm->mmap_lock); +} + -+int kbase_prepare_soft_job(struct kbase_jd_atom *katom) ++void kbase_os_mem_map_unlock(struct kbase_context *kctx) +{ -+ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { -+ case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: -+ { -+ if (0 != (katom->jc & KBASE_CACHE_ALIGNMENT_MASK)) -+ return -EINVAL; -+ } -+ break; -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ case BASE_JD_REQ_SOFT_FENCE_TRIGGER: -+ { -+ struct base_fence fence; -+ int fd; -+ -+ if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence))) -+ return -EINVAL; ++ struct mm_struct *mm = current->mm; ++ (void)kctx; ++ up_read(&mm->mmap_lock); ++} + -+ fd = kbase_sync_fence_out_create(katom, -+ fence.basep.stream_fd); -+ if (fd < 0) -+ return -EINVAL; ++static int kbasep_reg_mmap(struct kbase_context *kctx, ++ struct vm_area_struct *vma, ++ struct kbase_va_region **regm, ++ size_t *nr_pages, size_t *aligned_offset) + -+ fence.basep.fd = fd; -+ if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) { -+ kbase_sync_fence_out_remove(katom); -+ kbase_sync_fence_close_fd(fd); -+ fence.basep.fd = -EINVAL; -+ return -EINVAL; -+ } -+ } -+ break; -+ case BASE_JD_REQ_SOFT_FENCE_WAIT: -+ { -+ struct base_fence fence; -+ int ret; ++{ ++ int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); ++ struct kbase_va_region *reg; ++ int err = 0; + -+ if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence))) -+ return -EINVAL; ++ *aligned_offset = 0; + -+ /* Get a reference to the fence object */ -+ ret = kbase_sync_fence_in_from_fd(katom, -+ fence.basep.fd); -+ if (ret < 0) -+ return ret; ++ dev_dbg(kctx->kbdev->dev, "in kbasep_reg_mmap\n"); + -+#ifdef CONFIG_MALI_DMA_FENCE -+ /* -+ * Set KCTX_NO_IMPLICIT_FENCE in the context the first -+ * time a soft fence wait job is observed. This will -+ * prevent the implicit dma-buf fence to conflict with -+ * the Android native sync fences. -+ */ -+ if (!kbase_ctx_flag(katom->kctx, KCTX_NO_IMPLICIT_SYNC)) -+ kbase_ctx_flag_set(katom->kctx, KCTX_NO_IMPLICIT_SYNC); -+#endif /* CONFIG_MALI_DMA_FENCE */ -+ } -+ break; -+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ -+ case BASE_JD_REQ_SOFT_JIT_ALLOC: -+ return kbase_jit_allocate_prepare(katom); -+ case BASE_JD_REQ_SOFT_REPLAY: -+ break; -+ case BASE_JD_REQ_SOFT_JIT_FREE: -+ return kbase_jit_free_prepare(katom); -+ case BASE_JD_REQ_SOFT_EVENT_WAIT: -+ case BASE_JD_REQ_SOFT_EVENT_SET: -+ case BASE_JD_REQ_SOFT_EVENT_RESET: -+ if (katom->jc == 0) -+ return -EINVAL; -+ break; -+ case BASE_JD_REQ_SOFT_DEBUG_COPY: -+ return kbase_debug_copy_prepare(katom); -+ case BASE_JD_REQ_SOFT_EXT_RES_MAP: -+ return kbase_ext_res_prepare(katom); -+ case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: -+ return kbase_ext_res_prepare(katom); -+ default: -+ /* Unsupported soft-job */ -+ return -EINVAL; ++ /* SAME_VA stuff, fetch the right region */ ++ reg = kctx->pending_regions[cookie]; ++ if (!reg) { ++ err = -ENOMEM; ++ goto out; + } -+ return 0; -+} + -+void kbase_finish_soft_job(struct kbase_jd_atom *katom) -+{ -+ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { -+ case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: -+ /* Nothing to do */ -+ break; -+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -+ case BASE_JD_REQ_SOFT_FENCE_TRIGGER: -+ /* If fence has not yet been signaled, do it now */ -+ kbase_sync_fence_out_trigger(katom, katom->event_code == -+ BASE_JD_EVENT_DONE ? 0 : -EFAULT); -+ break; -+ case BASE_JD_REQ_SOFT_FENCE_WAIT: -+ /* Release katom's reference to fence object */ -+ kbase_sync_fence_in_remove(katom); -+ break; -+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ -+ case BASE_JD_REQ_SOFT_DEBUG_COPY: -+ kbase_debug_copy_finish(katom); -+ break; -+ case BASE_JD_REQ_SOFT_JIT_ALLOC: -+ kbase_jit_allocate_finish(katom); -+ break; -+ case BASE_JD_REQ_SOFT_EXT_RES_MAP: -+ kbase_ext_res_finish(katom); -+ break; -+ case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: -+ kbase_ext_res_finish(katom); -+ break; -+ case BASE_JD_REQ_SOFT_JIT_FREE: -+ kbase_jit_free_finish(katom); -+ break; ++ if ((reg->flags & KBASE_REG_GPU_NX) && (reg->nr_pages != *nr_pages)) { ++ /* incorrect mmap size */ ++ /* leave the cookie for a potential later ++ * mapping, or to be reclaimed later when the ++ * context is freed */ ++ err = -ENOMEM; ++ goto out; + } -+} -+ -+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) -+{ -+ LIST_HEAD(local_suspended_soft_jobs); -+ struct kbase_jd_atom *tmp_iter; -+ struct kbase_jd_atom *katom_iter; -+ struct kbasep_js_device_data *js_devdata; -+ bool resched = false; + -+ KBASE_DEBUG_ASSERT(kbdev); ++ if ((vma->vm_flags & VM_READ && !(reg->flags & KBASE_REG_CPU_RD)) || ++ (vma->vm_flags & VM_WRITE && !(reg->flags & KBASE_REG_CPU_WR))) { ++ /* VM flags inconsistent with region flags */ ++ err = -EPERM; ++ dev_err(kctx->kbdev->dev, "%s:%d inconsistent VM flags\n", ++ __FILE__, __LINE__); ++ goto out; ++ } + -+ js_devdata = &kbdev->js_data; ++ /* adjust down nr_pages to what we have physically */ ++ *nr_pages = kbase_reg_current_backed_size(reg); + -+ /* Move out the entire list */ -+ mutex_lock(&js_devdata->runpool_mutex); -+ list_splice_init(&js_devdata->suspended_soft_jobs_list, -+ &local_suspended_soft_jobs); -+ mutex_unlock(&js_devdata->runpool_mutex); ++ if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset, ++ reg->nr_pages, 1) != 0) { ++ dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__); ++ /* Unable to map in GPU space. */ ++ WARN_ON(1); ++ err = -ENOMEM; ++ goto out; ++ } ++ /* no need for the cookie anymore */ ++ kctx->pending_regions[cookie] = NULL; ++ kctx->cookies |= (1UL << cookie); + + /* -+ * Each atom must be detached from the list and ran separately - -+ * it could be re-added to the old list, but this is unlikely ++ * Overwrite the offset with the region start_pfn, so we effectively ++ * map from offset 0 in the region. However subtract the aligned ++ * offset so that when user space trims the mapping the beginning of ++ * the trimmed VMA has the correct vm_pgoff; + */ -+ list_for_each_entry_safe(katom_iter, tmp_iter, -+ &local_suspended_soft_jobs, dep_item[1]) { -+ struct kbase_context *kctx = katom_iter->kctx; ++ vma->vm_pgoff = reg->start_pfn - ((*aligned_offset)>>PAGE_SHIFT); ++out: ++ *regm = reg; ++ dev_dbg(kctx->kbdev->dev, "kbasep_reg_mmap done\n"); + -+ mutex_lock(&kctx->jctx.lock); ++ return err; ++} + -+ /* Remove from the global list */ -+ list_del(&katom_iter->dep_item[1]); -+ /* Remove from the context's list of waiting soft jobs */ -+ kbasep_remove_waiting_soft_job(katom_iter); ++int kbase_mmap(struct file *file, struct vm_area_struct *vma) ++{ ++ struct kbase_context *kctx = file->private_data; ++ struct kbase_va_region *reg = NULL; ++ void *kaddr = NULL; ++ size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; ++ int err = 0; ++ int free_on_close = 0; ++ struct device *dev = kctx->kbdev->dev; ++ size_t aligned_offset = 0; + -+ if (kbase_process_soft_job(katom_iter) == 0) { -+ kbase_finish_soft_job(katom_iter); -+ resched |= jd_done_nolock(katom_iter, NULL); -+ } else { -+ KBASE_DEBUG_ASSERT((katom_iter->core_req & -+ BASE_JD_REQ_SOFT_JOB_TYPE) -+ != BASE_JD_REQ_SOFT_REPLAY); -+ } ++ dev_dbg(dev, "kbase_mmap\n"); + -+ mutex_unlock(&kctx->jctx.lock); ++ /* strip away corresponding VM_MAY% flags to the VM_% flags requested */ ++ vm_flags_clear(vma, ((vma->vm_flags & (VM_READ | VM_WRITE)) << 4)); ++ ++ if (0 == nr_pages) { ++ err = -EINVAL; ++ goto out; + } + -+ if (resched) -+ kbase_js_sched_all(kbdev); -+} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_strings.c b/drivers/gpu/arm/midgard/mali_kbase_strings.c -new file mode 100644 -index 000000000..c98762cec ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_strings.c -@@ -0,0 +1,23 @@ -+ /* -+ * -+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ if (!(vma->vm_flags & VM_SHARED)) { ++ err = -EINVAL; ++ goto out; ++ } + ++ kbase_gpu_vm_lock(kctx); + -+#include "mali_kbase_strings.h" ++ if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) { ++ /* The non-mapped tracking helper page */ ++ err = kbase_tracking_page_setup(kctx, vma); ++ goto out_unlock; ++ } + -+#define KBASE_DRV_NAME "mali" -+#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline" ++ /* if not the MTP, verify that the MTP has been mapped */ ++ rcu_read_lock(); ++ /* catches both when the special page isn't present or ++ * when we've forked */ ++ if (rcu_dereference(kctx->process_mm) != current->mm) { ++ err = -EINVAL; ++ rcu_read_unlock(); ++ goto out_unlock; ++ } ++ rcu_read_unlock(); + -+const char kbase_drv_name[] = KBASE_DRV_NAME; -+const char kbase_timeline_name[] = KBASE_TIMELINE_NAME; -diff --git a/drivers/gpu/arm/midgard/mali_kbase_strings.h b/drivers/gpu/arm/midgard/mali_kbase_strings.h -new file mode 100644 -index 000000000..41b8fdbec ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_strings.h -@@ -0,0 +1,19 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ switch (vma->vm_pgoff) { ++ case PFN_DOWN(BASEP_MEM_INVALID_HANDLE): ++ case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE): ++ /* Illegal handle for direct map */ ++ err = -EINVAL; ++ goto out_unlock; ++ case PFN_DOWN(BASE_MEM_TRACE_BUFFER_HANDLE): ++ err = kbase_trace_buffer_mmap(kctx, vma, ®, &kaddr); ++ if (0 != err) ++ goto out_unlock; ++ dev_dbg(dev, "kbase_trace_buffer_mmap ok\n"); ++ /* free the region on munmap */ ++ free_on_close = 1; ++ break; ++ case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE): ++ /* MMU dump */ ++ err = kbase_mmu_dump_mmap(kctx, vma, ®, &kaddr); ++ if (0 != err) ++ goto out_unlock; ++ /* free the region on munmap */ ++ free_on_close = 1; ++ break; ++ case PFN_DOWN(BASE_MEM_COOKIE_BASE) ... ++ PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: { ++ err = kbasep_reg_mmap(kctx, vma, ®, &nr_pages, ++ &aligned_offset); ++ if (0 != err) ++ goto out_unlock; ++ /* free the region on munmap */ ++ free_on_close = 1; ++ break; ++ } ++ default: { ++ reg = kbase_region_tracker_find_region_enclosing_address(kctx, ++ (u64)vma->vm_pgoff << PAGE_SHIFT); + ++ if (reg && !(reg->flags & KBASE_REG_FREE)) { ++ /* will this mapping overflow the size of the region? */ ++ if (nr_pages > (reg->nr_pages - ++ (vma->vm_pgoff - reg->start_pfn))) { ++ err = -ENOMEM; ++ goto out_unlock; ++ } + ++ if ((vma->vm_flags & VM_READ && ++ !(reg->flags & KBASE_REG_CPU_RD)) || ++ (vma->vm_flags & VM_WRITE && ++ !(reg->flags & KBASE_REG_CPU_WR))) { ++ /* VM flags inconsistent with region flags */ ++ err = -EPERM; ++ dev_err(dev, "%s:%d inconsistent VM flags\n", ++ __FILE__, __LINE__); ++ goto out_unlock; ++ } + -+extern const char kbase_drv_name[]; -+extern const char kbase_timeline_name[]; -diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync.h b/drivers/gpu/arm/midgard/mali_kbase_sync.h -new file mode 100644 -index 000000000..2cb8c1820 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_sync.h -@@ -0,0 +1,204 @@ -+/* -+ * -+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++#ifdef CONFIG_DMA_SHARED_BUFFER ++ if (KBASE_MEM_TYPE_IMPORTED_UMM == ++ reg->cpu_alloc->type) { ++ err = dma_buf_mmap( ++ reg->cpu_alloc->imported.umm.dma_buf, ++ vma, vma->vm_pgoff - reg->start_pfn); ++ goto out_unlock; ++ } ++#endif /* CONFIG_DMA_SHARED_BUFFER */ + ++ /* limit what we map to the amount currently backed */ ++ if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) { ++ if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents) ++ nr_pages = 0; ++ else ++ nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn); ++ } ++ } else { ++ err = -ENOMEM; ++ goto out_unlock; ++ } ++ } /* default */ ++ } /* switch */ + ++ err = kbase_cpu_mmap(reg, vma, kaddr, nr_pages, aligned_offset, free_on_close); + -+/** -+ * @file mali_kbase_sync.h -+ * -+ * This file contains our internal "API" for explicit fences. -+ * It hides the implementation details of the actual explicit fence mechanism -+ * used (Android fences or sync file with DMA fences). -+ */ ++ if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) { ++ /* MMU dump - userspace should now have a reference on ++ * the pages, so we can now free the kernel mapping */ ++ vfree(kaddr); ++ } + -+#ifndef MALI_KBASE_SYNC_H -+#define MALI_KBASE_SYNC_H ++out_unlock: ++ kbase_gpu_vm_unlock(kctx); ++out: ++ if (err) ++ dev_err(dev, "mmap failed %d\n", err); + -+#include -+#include -+#ifdef CONFIG_SYNC -+#include -+#endif -+#ifdef CONFIG_SYNC_FILE -+#include "mali_kbase_fence_defs.h" -+#include -+#endif ++ return err; ++} + -+#include "mali_kbase.h" ++KBASE_EXPORT_TEST_API(kbase_mmap); + -+/** -+ * struct kbase_sync_fence_info - Information about a fence -+ * @fence: Pointer to fence (type is void*, as underlaying struct can differ) -+ * @name: The name given to this fence when it was created -+ * @status: < 0 means error, 0 means active, 1 means signaled -+ * -+ * Use kbase_sync_fence_in_info_get() or kbase_sync_fence_out_info_get() -+ * to get the information. -+ */ -+struct kbase_sync_fence_info { -+ void *fence; -+ char name[32]; -+ int status; ++void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, ++ unsigned long prot_request, struct kbase_vmap_struct *map) ++{ ++ struct kbase_va_region *reg; ++ unsigned long page_index; ++ unsigned int offset = gpu_addr & ~PAGE_MASK; ++ size_t page_count = PFN_UP(offset + size); ++ phys_addr_t *page_array; ++ struct page **pages; ++ void *cpu_addr = NULL; ++ pgprot_t prot; ++ size_t i; ++ bool sync_needed; ++ ++ if (!size || !map) ++ return NULL; ++ ++ /* check if page_count calculation will wrap */ ++ if (size > ((size_t)-1 / PAGE_SIZE)) ++ return NULL; ++ ++ kbase_gpu_vm_lock(kctx); ++ ++ reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); ++ if (!reg || (reg->flags & KBASE_REG_FREE)) ++ goto out_unlock; ++ ++ page_index = (gpu_addr >> PAGE_SHIFT) - reg->start_pfn; ++ ++ /* check if page_index + page_count will wrap */ ++ if (-1UL - page_count < page_index) ++ goto out_unlock; ++ ++ if (page_index + page_count > kbase_reg_current_backed_size(reg)) ++ goto out_unlock; ++ ++ if (reg->flags & KBASE_REG_DONT_NEED) ++ goto out_unlock; ++ ++ /* check access permissions can be satisfied ++ * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} */ ++ if ((reg->flags & prot_request) != prot_request) ++ goto out_unlock; ++ ++ page_array = kbase_get_cpu_phy_pages(reg); ++ if (!page_array) ++ goto out_unlock; ++ ++ pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); ++ if (!pages) ++ goto out_unlock; ++ ++ for (i = 0; i < page_count; i++) ++ pages[i] = pfn_to_page(PFN_DOWN(page_array[page_index + i])); ++ ++ prot = PAGE_KERNEL; ++ if (!(reg->flags & KBASE_REG_CPU_CACHED)) { ++ /* Map uncached */ ++ prot = pgprot_writecombine(prot); ++ } ++ /* Note: enforcing a RO prot_request onto prot is not done, since: ++ * - CPU-arch-specific integration required ++ * - kbase_vmap() requires no access checks to be made/enforced */ ++ ++ cpu_addr = vmap(pages, page_count, VM_MAP, prot); ++ ++ kfree(pages); ++ ++ if (!cpu_addr) ++ goto out_unlock; ++ ++ map->gpu_addr = gpu_addr; ++ map->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); ++ map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index]; ++ map->gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); ++ map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index]; ++ map->addr = (void *)((uintptr_t)cpu_addr + offset); ++ map->size = size; ++ map->is_cached = (reg->flags & KBASE_REG_CPU_CACHED) != 0; ++ sync_needed = map->is_cached; ++ ++#ifdef CONFIG_MALI_COH_KERN ++ /* kernel can use coherent memory if supported */ ++ if (kctx->kbdev->system_coherency == COHERENCY_ACE) ++ sync_needed = false; ++#endif ++ ++ if (sync_needed) { ++ /* Sync first page */ ++ size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); ++ phys_addr_t cpu_pa = map->cpu_pages[0]; ++ phys_addr_t gpu_pa = map->gpu_pages[0]; ++ ++ kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz, ++ KBASE_SYNC_TO_CPU); ++ ++ /* Sync middle pages (if any) */ ++ for (i = 1; page_count > 2 && i < page_count - 1; i++) { ++ cpu_pa = map->cpu_pages[i]; ++ gpu_pa = map->gpu_pages[i]; ++ kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE, ++ KBASE_SYNC_TO_CPU); ++ } ++ ++ /* Sync last page (if any) */ ++ if (page_count > 1) { ++ cpu_pa = map->cpu_pages[page_count - 1]; ++ gpu_pa = map->gpu_pages[page_count - 1]; ++ sz = ((offset + size - 1) & ~PAGE_MASK) + 1; ++ kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz, ++ KBASE_SYNC_TO_CPU); ++ } ++ } ++ kbase_gpu_vm_unlock(kctx); ++ ++ return map->addr; ++ ++out_unlock: ++ kbase_gpu_vm_unlock(kctx); ++ return NULL; ++} ++ ++void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, ++ struct kbase_vmap_struct *map) ++{ ++ /* 0 is specified for prot_request to indicate no access checks should ++ * be made. ++ * ++ * As mentioned in kbase_vmap_prot() this means that a kernel-side ++ * CPU-RO mapping is not enforced to allow this to work */ ++ return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map); ++} ++KBASE_EXPORT_TEST_API(kbase_vmap); ++ ++void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) ++{ ++ void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK); ++ bool sync_needed = map->is_cached; ++ vunmap(addr); ++#ifdef CONFIG_MALI_COH_KERN ++ /* kernel can use coherent memory if supported */ ++ if (kctx->kbdev->system_coherency == COHERENCY_ACE) ++ sync_needed = false; ++#endif ++ if (sync_needed) { ++ off_t offset = (uintptr_t)map->addr & ~PAGE_MASK; ++ size_t size = map->size; ++ size_t page_count = PFN_UP(offset + size); ++ size_t i; ++ ++ /* Sync first page */ ++ size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); ++ phys_addr_t cpu_pa = map->cpu_pages[0]; ++ phys_addr_t gpu_pa = map->gpu_pages[0]; ++ ++ kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz, ++ KBASE_SYNC_TO_DEVICE); ++ ++ /* Sync middle pages (if any) */ ++ for (i = 1; page_count > 2 && i < page_count - 1; i++) { ++ cpu_pa = map->cpu_pages[i]; ++ gpu_pa = map->gpu_pages[i]; ++ kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE, ++ KBASE_SYNC_TO_DEVICE); ++ } ++ ++ /* Sync last page (if any) */ ++ if (page_count > 1) { ++ cpu_pa = map->cpu_pages[page_count - 1]; ++ gpu_pa = map->gpu_pages[page_count - 1]; ++ sz = ((offset + size - 1) & ~PAGE_MASK) + 1; ++ kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz, ++ KBASE_SYNC_TO_DEVICE); ++ } ++ } ++ map->gpu_addr = 0; ++ map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc); ++ map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc); ++ map->cpu_pages = NULL; ++ map->gpu_pages = NULL; ++ map->addr = NULL; ++ map->size = 0; ++ map->is_cached = false; ++} ++KBASE_EXPORT_TEST_API(kbase_vunmap); ++ ++void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) ++{ ++ struct mm_struct *mm; ++ ++ rcu_read_lock(); ++ mm = rcu_dereference(kctx->process_mm); ++ if (mm) { ++ atomic_add(pages, &kctx->nonmapped_pages); ++#ifdef SPLIT_RSS_COUNTING ++ add_mm_counter(mm, MM_FILEPAGES, pages); ++#else ++ spin_lock(&mm->page_table_lock); ++ add_mm_counter(mm, MM_FILEPAGES, pages); ++ spin_unlock(&mm->page_table_lock); ++#endif ++ } ++ rcu_read_unlock(); ++} ++ ++static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx) ++{ ++ int pages; ++ struct mm_struct *mm; ++ ++ spin_lock(&kctx->mm_update_lock); ++ mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock)); ++ if (!mm) { ++ spin_unlock(&kctx->mm_update_lock); ++ return; ++ } ++ ++ rcu_assign_pointer(kctx->process_mm, NULL); ++ spin_unlock(&kctx->mm_update_lock); ++ synchronize_rcu(); ++ ++ pages = atomic_xchg(&kctx->nonmapped_pages, 0); ++#ifdef SPLIT_RSS_COUNTING ++ add_mm_counter(mm, MM_FILEPAGES, -pages); ++#else ++ spin_lock(&mm->page_table_lock); ++ add_mm_counter(mm, MM_FILEPAGES, -pages); ++ spin_unlock(&mm->page_table_lock); ++#endif ++} ++ ++static void kbase_special_vm_close(struct vm_area_struct *vma) ++{ ++ struct kbase_context *kctx; ++ ++ kctx = vma->vm_private_data; ++ kbasep_os_process_page_usage_drain(kctx); ++} ++ ++static const struct vm_operations_struct kbase_vm_special_ops = { ++ .close = kbase_special_vm_close, +}; + -+/** -+ * kbase_sync_fence_stream_create() - Create a stream object -+ * @name: Name of stream (only used to ease debugging/visualization) -+ * @out_fd: A file descriptor representing the created stream object ++static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma) ++{ ++ /* check that this is the only tracking page */ ++ spin_lock(&kctx->mm_update_lock); ++ if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) { ++ spin_unlock(&kctx->mm_update_lock); ++ return -EFAULT; ++ } ++ ++ rcu_assign_pointer(kctx->process_mm, current->mm); ++ ++ spin_unlock(&kctx->mm_update_lock); ++ ++ /* no real access */ ++ vm_flags_clear(vma, (VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC)); ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(6, 6, 0)) ++ vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO); ++#else ++ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO; ++#endif ++ vma->vm_ops = &kbase_vm_special_ops; ++ vma->vm_private_data = kctx; ++ ++ return 0; ++} ++void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle) ++{ ++ int i; ++ int res; ++ void *va; ++ dma_addr_t dma_pa; ++ struct kbase_va_region *reg; ++ phys_addr_t *page_array; ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) ++ unsigned long attrs = DMA_ATTR_WRITE_COMBINE; ++#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) ++ DEFINE_DMA_ATTRS(attrs); ++#endif ++ ++ u32 pages = ((size - 1) >> PAGE_SHIFT) + 1; ++ u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR | ++ BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR; ++ ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(0 != size); ++ KBASE_DEBUG_ASSERT(0 != pages); ++ ++ if (size == 0) ++ goto err; ++ ++ /* All the alloc calls return zeroed memory */ ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) ++ va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, ++ attrs); ++#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) ++ dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); ++ va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, ++ &attrs); ++#else ++ va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL); ++#endif ++ if (!va) ++ goto err; ++ ++ /* Store the state so we can free it later. */ ++ handle->cpu_va = va; ++ handle->dma_pa = dma_pa; ++ handle->size = size; ++ ++ ++ reg = kbase_alloc_free_region(kctx, 0, pages, KBASE_REG_ZONE_SAME_VA); ++ if (!reg) ++ goto no_reg; ++ ++ reg->flags &= ~KBASE_REG_FREE; ++ if (kbase_update_region_flags(kctx, reg, flags) != 0) ++ goto invalid_flags; ++ ++ reg->cpu_alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW); ++ if (IS_ERR_OR_NULL(reg->cpu_alloc)) ++ goto no_alloc; ++ ++ reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); ++ ++ page_array = kbase_get_cpu_phy_pages(reg); ++ ++ for (i = 0; i < pages; i++) ++ page_array[i] = dma_pa + (i << PAGE_SHIFT); ++ ++ reg->cpu_alloc->nents = pages; ++ ++ kbase_gpu_vm_lock(kctx); ++ res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1); ++ kbase_gpu_vm_unlock(kctx); ++ if (res) ++ goto no_mmap; ++ ++ return va; ++ ++no_mmap: ++ kbase_mem_phy_alloc_put(reg->cpu_alloc); ++ kbase_mem_phy_alloc_put(reg->gpu_alloc); ++no_alloc: ++invalid_flags: ++ kfree(reg); ++no_reg: ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) ++ dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, attrs); ++#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) ++ dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs); ++#else ++ dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa); ++#endif ++err: ++ return NULL; ++} ++KBASE_EXPORT_SYMBOL(kbase_va_alloc); ++ ++void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle) ++{ ++ struct kbase_va_region *reg; ++ int err; ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \ ++ (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) ++ DEFINE_DMA_ATTRS(attrs); ++#endif ++ ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(handle->cpu_va != NULL); ++ ++ kbase_gpu_vm_lock(kctx); ++ reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va); ++ KBASE_DEBUG_ASSERT(reg); ++ err = kbase_gpu_munmap(kctx, reg); ++ kbase_gpu_vm_unlock(kctx); ++ KBASE_DEBUG_ASSERT(!err); ++ ++ kbase_mem_phy_alloc_put(reg->cpu_alloc); ++ kbase_mem_phy_alloc_put(reg->gpu_alloc); ++ kfree(reg); ++ ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) ++ dma_free_attrs(kctx->kbdev->dev, handle->size, ++ handle->cpu_va, handle->dma_pa, DMA_ATTR_WRITE_COMBINE); ++#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) ++ dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); ++ dma_free_attrs(kctx->kbdev->dev, handle->size, ++ handle->cpu_va, handle->dma_pa, &attrs); ++#else ++ dma_free_writecombine(kctx->kbdev->dev, handle->size, ++ handle->cpu_va, handle->dma_pa); ++#endif ++} ++KBASE_EXPORT_SYMBOL(kbase_va_free); ++ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h +new file mode 100644 +index 000000000..33b3554f9 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h +@@ -0,0 +1,231 @@ ++/* + * -+ * Can map down to a timeline implementation in some implementations. -+ * Exposed as a file descriptor. -+ * Life-time controlled via the file descriptor: -+ * - dup to add a ref -+ * - close to remove a ref ++ * (C) COPYRIGHT 2010, 2012-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * return: 0 on success, < 0 on error + */ -+int kbase_sync_fence_stream_create(const char *name, int *const out_fd); ++ ++ ++ ++ + +/** -+ * kbase_sync_fence_out_create Create an explicit output fence to specified atom -+ * @katom: Atom to assign the new explicit fence to -+ * @stream_fd: File descriptor for stream object to create fence on -+ * -+ * return: Valid file descriptor to fence or < 0 on error ++ * @file mali_kbase_mem_linux.h ++ * Base kernel memory APIs, Linux implementation. + */ -+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd); ++ ++#ifndef _KBASE_MEM_LINUX_H_ ++#define _KBASE_MEM_LINUX_H_ ++ ++/** A HWC dump mapping */ ++struct kbase_hwc_dma_mapping { ++ void *cpu_va; ++ dma_addr_t dma_pa; ++ size_t size; ++}; ++ ++struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, ++ u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, ++ u64 *gpu_va); ++int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 *const pages); ++int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, ++ void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, ++ u64 *flags); ++u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages); ++int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask); + +/** -+ * kbase_sync_fence_in_from_fd() Assigns an existing fence to specified atom -+ * @katom: Atom to assign the existing explicit fence to -+ * @fd: File descriptor to an existing fence ++ * kbase_mem_commit - Change the physical backing size of a region + * -+ * Assigns an explicit input fence to atom. -+ * This can later be waited for by calling @kbase_sync_fence_in_wait ++ * @kctx: The kernel context ++ * @gpu_addr: Handle to the memory region ++ * @new_pages: Number of physical pages to back the region with + * -+ * return: 0 on success, < 0 on error ++ * Return: 0 on success or error code + */ -+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd); ++int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages); ++ ++int kbase_mmap(struct file *file, struct vm_area_struct *vma); + +/** -+ * kbase_sync_fence_validate() - Validate a fd to be a valid fence -+ * @fd: File descriptor to check -+ * -+ * This function is only usable to catch unintentional user errors early, -+ * it does not stop malicious code changing the fd after this function returns. ++ * kbase_mem_evictable_init - Initialize the Ephemeral memory the eviction ++ * mechanism. ++ * @kctx: The kbase context to initialize. + * -+ * return 0: if fd is for a valid fence, < 0 if invalid ++ * Return: Zero on success or -errno on failure. + */ -+int kbase_sync_fence_validate(int fd); ++int kbase_mem_evictable_init(struct kbase_context *kctx); + +/** -+ * kbase_sync_fence_out_trigger - Signal explicit output fence attached on katom -+ * @katom: Atom with an explicit fence to signal -+ * @result: < 0 means signal with error, 0 >= indicates success -+ * -+ * Signal output fence attached on katom and remove the fence from the atom. -+ * -+ * return: The "next" event code for atom, typically JOB_CANCELLED or EVENT_DONE ++ * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction ++ * mechanism. ++ * @kctx: The kbase context to de-initialize. + */ -+enum base_jd_event_code -+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result); ++void kbase_mem_evictable_deinit(struct kbase_context *kctx); + +/** -+ * kbase_sync_fence_in_wait() - Wait for explicit input fence to be signaled -+ * @katom: Atom with explicit fence to wait for ++ * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation ++ * @kctx: Context the region belongs to ++ * @reg: The GPU region ++ * @new_pages: The number of pages after the grow ++ * @old_pages: The number of pages before the grow + * -+ * If the fence is already signaled, then 0 is returned, and the caller must -+ * continue processing of the katom. ++ * Return: 0 on success, -errno on error. + * -+ * If the fence isn't already signaled, then this kbase_sync framework will -+ * take responsibility to continue the processing once the fence is signaled. ++ * Expand the GPU mapping to encompass the new psychical pages which have ++ * been added to the allocation. + * -+ * return: 0 if already signaled, otherwise 1 ++ * Note: Caller must be holding the region lock. + */ -+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom); ++int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, ++ struct kbase_va_region *reg, ++ u64 new_pages, u64 old_pages); + +/** -+ * kbase_sync_fence_in_cancel_wait() - Cancel explicit input fence waits -+ * @katom: Atom to cancel wait for ++ * kbase_mem_evictable_make - Make a physical allocation eligible for eviction ++ * @gpu_alloc: The physical allocation to make evictable + * -+ * This function is fully responsible for continuing processing of this atom -+ * (remove_waiting_soft_job + finish_soft_job + jd_done + js_sched_all) ++ * Return: 0 on success, -errno on error. ++ * ++ * Take the provided region and make all the physical pages within it ++ * reclaimable by the kernel, updating the per-process VM stats as well. ++ * Remove any CPU mappings (as these can't be removed in the shrinker callback ++ * as mmap_lock might already be taken) but leave the GPU mapping intact as ++ * and until the shrinker reclaims the allocation. ++ * ++ * Note: Must be called with the region lock of the containing context. + */ -+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom); ++int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc); + +/** -+ * kbase_sync_fence_in_remove() - Remove the input fence from the katom -+ * @katom: Atom to remove explicit input fence for ++ * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for ++ * eviction. ++ * @alloc: The physical allocation to remove eviction eligibility from. + * -+ * This will also release the corresponding reference. ++ * Return: True if the allocation had its backing restored and false if ++ * it hasn't. ++ * ++ * Make the physical pages in the region no longer reclaimable and update the ++ * per-process stats, if the shrinker has already evicted the memory then ++ * re-allocate it if the region is still alive. ++ * ++ * Note: Must be called with the region lock of the containing context. + */ -+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom); ++bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc); ++ ++struct kbase_vmap_struct { ++ u64 gpu_addr; ++ struct kbase_mem_phy_alloc *cpu_alloc; ++ struct kbase_mem_phy_alloc *gpu_alloc; ++ phys_addr_t *cpu_pages; ++ phys_addr_t *gpu_pages; ++ void *addr; ++ size_t size; ++ bool is_cached; ++}; ++ + +/** -+ * kbase_sync_fence_out_remove() - Remove the output fence from the katom -+ * @katom: Atom to remove explicit output fence for ++ * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the ++ * requested access permissions are supported ++ * @kctx: Context the VA range belongs to ++ * @gpu_addr: Start address of VA range ++ * @size: Size of VA range ++ * @prot_request: Flags indicating how the caller will then access the memory ++ * @map: Structure to be given to kbase_vunmap() on freeing ++ * ++ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error ++ * ++ * Map a GPU VA Range into the kernel. The VA range must be contained within a ++ * GPU memory region. Appropriate CPU cache-flushing operations are made as ++ * required, dependent on the CPU mapping for the memory region. ++ * ++ * This is safer than using kmap() on the pages directly, ++ * because the pages here are refcounted to prevent freeing (and hence reuse ++ * elsewhere in the system) until an kbase_vunmap() ++ * ++ * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check ++ * whether the region should allow the intended access, and return an error if ++ * disallowed. This is essential for security of imported memory, particularly ++ * a user buf from SHM mapped into the process as RO. In that case, write ++ * access must be checked if the intention is for kernel to write to the ++ * memory. ++ * ++ * The checks are also there to help catch access errors on memory where ++ * security is not a concern: imported memory that is always RW, and memory ++ * that was allocated and owned by the process attached to @kctx. In this case, ++ * it helps to identify memory that was was mapped with the wrong access type. ++ * ++ * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases ++ * where either the security of memory is solely dependent on those flags, or ++ * when userspace code was expecting only the GPU to access the memory (e.g. HW ++ * workarounds). + * -+ * This will also release the corresponding reference. + */ -+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom); ++void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, ++ unsigned long prot_request, struct kbase_vmap_struct *map); + +/** -+ * kbase_sync_fence_close_fd() - Close a file descriptor representing a fence -+ * @fd: File descriptor to close ++ * kbase_vmap - Map a GPU VA range into the kernel safely ++ * @kctx: Context the VA range belongs to ++ * @gpu_addr: Start address of VA range ++ * @size: Size of VA range ++ * @map: Structure to be given to kbase_vunmap() on freeing ++ * ++ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error ++ * ++ * Map a GPU VA Range into the kernel. The VA range must be contained within a ++ * GPU memory region. Appropriate CPU cache-flushing operations are made as ++ * required, dependent on the CPU mapping for the memory region. ++ * ++ * This is safer than using kmap() on the pages directly, ++ * because the pages here are refcounted to prevent freeing (and hence reuse ++ * elsewhere in the system) until an kbase_vunmap() ++ * ++ * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no ++ * checks to ensure the security of e.g. imported user bufs from RO SHM. + */ -+static inline void kbase_sync_fence_close_fd(int fd) -+{ -+ close_fd(fd); -+} ++void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, ++ struct kbase_vmap_struct *map); + +/** -+ * kbase_sync_fence_in_info_get() - Retrieves information about input fence -+ * @katom: Atom to get fence information from -+ * @info: Struct to be filled with fence information ++ * kbase_vunmap - Unmap a GPU VA range from the kernel ++ * @kctx: Context the VA range belongs to ++ * @map: Structure describing the mapping from the corresponding kbase_vmap() ++ * call + * -+ * return: 0 on success, < 0 on error ++ * Unmaps a GPU VA range from the kernel, given its @map structure obtained ++ * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as ++ * required, dependent on the CPU mapping for the memory region. ++ * ++ * The reference taken on pages during kbase_vmap() is released. + */ -+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, -+ struct kbase_sync_fence_info *info); ++void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map); + -+/** -+ * kbase_sync_fence_out_info_get() - Retrieves information about output fence -+ * @katom: Atom to get fence information from -+ * @info: Struct to be filled with fence information ++/** @brief Allocate memory from kernel space and map it onto the GPU + * -+ * return: 0 on success, < 0 on error ++ * @param kctx The context used for the allocation/mapping ++ * @param size The size of the allocation in bytes ++ * @param handle An opaque structure used to contain the state needed to free the memory ++ * @return the VA for kernel space and GPU MMU + */ -+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, -+ struct kbase_sync_fence_info *info); ++void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle); + -+/** -+ * kbase_sync_status_string() - Get string matching @status -+ * @status: Value of fence status. ++/** @brief Free/unmap memory allocated by kbase_va_alloc + * -+ * return: Pointer to string describing @status. ++ * @param kctx The context used for the allocation/mapping ++ * @param handle An opaque structure returned by the kbase_va_alloc function. + */ -+const char *kbase_sync_status_string(int status); ++void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle); ++ ++extern const struct vm_operations_struct kbase_vm_ops; + ++#endif /* _KBASE_MEM_LINUX_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h +new file mode 100644 +index 000000000..9725fd3f0 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h +@@ -0,0 +1,45 @@ +/* -+ * Internal worker used to continue processing of atom. ++ * ++ * (C) COPYRIGHT 2012-2014, 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * + */ -+void kbase_sync_fence_wait_worker(struct work_struct *data); + -+#ifdef CONFIG_MALI_FENCE_DEBUG ++ ++ ++ ++ ++#ifndef _KBASE_MEM_LOWLEVEL_H ++#define _KBASE_MEM_LOWLEVEL_H ++ ++#ifndef _KBASE_H_ ++#error "Don't include this file directly, use mali_kbase.h instead" ++#endif ++ ++#include ++ +/** -+ * kbase_sync_fence_in_dump() Trigger a debug dump of atoms input fence state -+ * @katom: Atom to trigger fence debug dump for ++ * @brief Flags for kbase_phy_allocator_pages_alloc + */ -+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom); -+#endif ++#define KBASE_PHY_PAGES_FLAG_DEFAULT (0) /** Default allocation flag */ ++#define KBASE_PHY_PAGES_FLAG_CLEAR (1 << 0) /** Clear the pages after allocation */ ++#define KBASE_PHY_PAGES_FLAG_POISON (1 << 1) /** Fill the memory with a poison value */ + -+#endif /* MALI_KBASE_SYNC_H */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_android.c b/drivers/gpu/arm/midgard/mali_kbase_sync_android.c ++#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON) ++ ++#define KBASE_PHY_PAGES_POISON_VALUE 0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */ ++ ++enum kbase_sync_type { ++ KBASE_SYNC_TO_CPU, ++ KBASE_SYNC_TO_DEVICE ++}; ++ ++#endif /* _KBASE_LOWLEVEL_H */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c new file mode 100644 -index 000000000..d7349dcae +index 000000000..0cafb64ee --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_sync_android.c -@@ -0,0 +1,537 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c +@@ -0,0 +1,573 @@ +/* + * -+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -392737,584 +394339,571 @@ index 000000000..d7349dcae + + + -+/* -+ * Code for supporting explicit Android fences (CONFIG_SYNC) -+ * Known to be good for kernels 4.5 and earlier. -+ * Replaced with CONFIG_SYNC_FILE for 4.9 and later kernels -+ * (see mali_kbase_sync_file.c) -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "sync.h" +#include -+#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+struct mali_sync_timeline { -+ struct sync_timeline timeline; -+ atomic_t counter; -+ atomic_t signaled; -+}; ++#define pool_dbg(pool, format, ...) \ ++ dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format, \ ++ (pool->next_pool) ? "kctx" : "kbdev", \ ++ kbase_mem_pool_size(pool), \ ++ kbase_mem_pool_max_size(pool), \ ++ ##__VA_ARGS__) + -+struct mali_sync_pt { -+ struct sync_pt pt; -+ int order; -+ int result; -+}; ++#define NOT_DIRTY false ++#define NOT_RECLAIMED false + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) -+/* For backwards compatibility with kernels before 3.17. After 3.17 -+ * sync_pt_parent is included in the kernel. */ -+static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt) ++static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool) +{ -+ return pt->parent; ++ spin_lock(&pool->pool_lock); +} -+#endif + -+static struct mali_sync_timeline *to_mali_sync_timeline( -+ struct sync_timeline *timeline) ++static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool) +{ -+ return container_of(timeline, struct mali_sync_timeline, timeline); ++ spin_unlock(&pool->pool_lock); +} + -+static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt) ++static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool) +{ -+ return container_of(pt, struct mali_sync_pt, pt); ++ ssize_t max_size = kbase_mem_pool_max_size(pool); ++ ssize_t cur_size = kbase_mem_pool_size(pool); ++ ++ return max(max_size - cur_size, (ssize_t)0); +} + -+static struct sync_pt *timeline_dup(struct sync_pt *pt) ++static bool kbase_mem_pool_is_full(struct kbase_mem_pool *pool) +{ -+ struct mali_sync_pt *mpt = to_mali_sync_pt(pt); -+ struct mali_sync_pt *new_mpt; -+ struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt), -+ sizeof(struct mali_sync_pt)); ++ return kbase_mem_pool_size(pool) >= kbase_mem_pool_max_size(pool); ++} + -+ if (!new_pt) -+ return NULL; ++static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool) ++{ ++ return kbase_mem_pool_size(pool) == 0; ++} + -+ new_mpt = to_mali_sync_pt(new_pt); -+ new_mpt->order = mpt->order; -+ new_mpt->result = mpt->result; ++static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool, ++ struct page *p) ++{ ++ lockdep_assert_held(&pool->pool_lock); + -+ return new_pt; ++ list_add(&p->lru, &pool->page_list); ++ pool->cur_size++; ++ ++ pool_dbg(pool, "added page\n"); +} + -+static int timeline_has_signaled(struct sync_pt *pt) ++static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p) +{ -+ struct mali_sync_pt *mpt = to_mali_sync_pt(pt); -+ struct mali_sync_timeline *mtl = to_mali_sync_timeline( -+ sync_pt_parent(pt)); -+ int result = mpt->result; ++ kbase_mem_pool_lock(pool); ++ kbase_mem_pool_add_locked(pool, p); ++ kbase_mem_pool_unlock(pool); ++} + -+ int diff = atomic_read(&mtl->signaled) - mpt->order; ++static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool, ++ struct list_head *page_list, size_t nr_pages) ++{ ++ lockdep_assert_held(&pool->pool_lock); + -+ if (diff >= 0) -+ return (result < 0) ? result : 1; ++ list_splice(page_list, &pool->page_list); ++ pool->cur_size += nr_pages; + -+ return 0; ++ pool_dbg(pool, "added %zu pages\n", nr_pages); +} + -+static int timeline_compare(struct sync_pt *a, struct sync_pt *b) ++static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool, ++ struct list_head *page_list, size_t nr_pages) +{ -+ struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt); -+ struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt); ++ kbase_mem_pool_lock(pool); ++ kbase_mem_pool_add_list_locked(pool, page_list, nr_pages); ++ kbase_mem_pool_unlock(pool); ++} + -+ int diff = ma->order - mb->order; ++static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool) ++{ ++ struct page *p; + -+ if (diff == 0) -+ return 0; ++ lockdep_assert_held(&pool->pool_lock); + -+ return (diff < 0) ? -1 : 1; ++ if (kbase_mem_pool_is_empty(pool)) ++ return NULL; ++ ++ p = list_first_entry(&pool->page_list, struct page, lru); ++ list_del_init(&p->lru); ++ pool->cur_size--; ++ ++ pool_dbg(pool, "removed page\n"); ++ ++ return p; +} + -+static void timeline_value_str(struct sync_timeline *timeline, char *str, -+ int size) ++static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool) +{ -+ struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline); ++ struct page *p; + -+ snprintf(str, size, "%d", atomic_read(&mtl->signaled)); ++ kbase_mem_pool_lock(pool); ++ p = kbase_mem_pool_remove_locked(pool); ++ kbase_mem_pool_unlock(pool); ++ ++ return p; +} + -+static void pt_value_str(struct sync_pt *pt, char *str, int size) ++static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool, ++ struct page *p) +{ -+ struct mali_sync_pt *mpt = to_mali_sync_pt(pt); ++ struct device *dev = pool->kbdev->dev; + -+ snprintf(str, size, "%d(%d)", mpt->order, mpt->result); ++ dma_sync_single_for_device(dev, kbase_dma_addr(p), ++ PAGE_SIZE, DMA_BIDIRECTIONAL); +} + -+static struct sync_timeline_ops mali_timeline_ops = { -+ .driver_name = "Mali", -+ .dup = timeline_dup, -+ .has_signaled = timeline_has_signaled, -+ .compare = timeline_compare, -+ .timeline_value_str = timeline_value_str, -+ .pt_value_str = pt_value_str, -+}; ++static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool, ++ struct page *p) ++{ ++ clear_highpage(p); ++ kbase_mem_pool_sync_page(pool, p); ++} + -+/* Allocates a timeline for Mali -+ * -+ * One timeline should be allocated per API context. -+ */ -+static struct sync_timeline *mali_sync_timeline_alloc(const char *name) ++static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool, ++ struct page *p) +{ -+ struct sync_timeline *tl; -+ struct mali_sync_timeline *mtl; ++ /* Zero page before spilling */ ++ kbase_mem_pool_zero_page(next_pool, p); + -+ tl = sync_timeline_create(&mali_timeline_ops, -+ sizeof(struct mali_sync_timeline), name); -+ if (!tl) ++ kbase_mem_pool_add(next_pool, p); ++} ++ ++struct page *kbase_mem_alloc_page(struct kbase_device *kbdev) ++{ ++ struct page *p; ++ gfp_t gfp; ++ struct device *dev = kbdev->dev; ++ dma_addr_t dma_addr; ++ ++#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \ ++ LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) ++ /* DMA cache sync fails for HIGHMEM before 3.5 on ARM */ ++ gfp = GFP_USER | __GFP_ZERO; ++#else ++ gfp = GFP_HIGHUSER | __GFP_ZERO; ++#endif ++ ++ if (current->flags & PF_KTHREAD) { ++ /* Don't trigger OOM killer from kernel threads, e.g. when ++ * growing memory on GPU page fault */ ++ gfp |= __GFP_NORETRY; ++ } ++ ++ p = alloc_page(gfp); ++ if (!p) + return NULL; + -+ /* Set the counter in our private struct */ -+ mtl = to_mali_sync_timeline(tl); -+ atomic_set(&mtl->counter, 0); -+ atomic_set(&mtl->signaled, 0); ++ dma_addr = dma_map_page(dev, p, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); ++ if (dma_mapping_error(dev, dma_addr)) { ++ __free_page(p); ++ return NULL; ++ } + -+ return tl; ++ WARN_ON(dma_addr != page_to_phys(p)); ++ ++ kbase_set_dma_addr(p, dma_addr); ++ ++ return p; +} + -+static int kbase_stream_close(struct inode *inode, struct file *file) ++static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, ++ struct page *p) +{ -+ struct sync_timeline *tl; ++ struct device *dev = pool->kbdev->dev; ++ dma_addr_t dma_addr = kbase_dma_addr(p); + -+ tl = (struct sync_timeline *)file->private_data; -+ sync_timeline_destroy(tl); -+ return 0; ++ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); ++ kbase_clear_dma_addr(p); ++ __free_page(p); ++ ++ pool_dbg(pool, "freed page to kernel\n"); +} + -+static const struct file_operations stream_fops = { -+ .owner = THIS_MODULE, -+ .release = kbase_stream_close, -+}; ++static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool, ++ size_t nr_to_shrink) ++{ ++ struct page *p; ++ size_t i; + -+int kbase_sync_fence_stream_create(const char *name, int *const out_fd) ++ lockdep_assert_held(&pool->pool_lock); ++ ++ for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) { ++ p = kbase_mem_pool_remove_locked(pool); ++ kbase_mem_pool_free_page(pool, p); ++ } ++ ++ return i; ++} ++ ++static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool, ++ size_t nr_to_shrink) +{ -+ struct sync_timeline *tl; ++ size_t nr_freed; + -+ if (!out_fd) -+ return -EINVAL; ++ kbase_mem_pool_lock(pool); ++ nr_freed = kbase_mem_pool_shrink_locked(pool, nr_to_shrink); ++ kbase_mem_pool_unlock(pool); + -+ tl = mali_sync_timeline_alloc(name); -+ if (!tl) -+ return -EINVAL; ++ return nr_freed; ++} + -+ *out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY|O_CLOEXEC); ++int kbase_mem_pool_grow(struct kbase_mem_pool *pool, ++ size_t nr_to_grow) ++{ ++ struct page *p; ++ size_t i; + -+ if (*out_fd < 0) { -+ sync_timeline_destroy(tl); -+ return -EINVAL; ++ for (i = 0; i < nr_to_grow; i++) { ++ p = kbase_mem_alloc_page(pool->kbdev); ++ if (!p) ++ return -ENOMEM; ++ kbase_mem_pool_add(pool, p); + } + + return 0; +} + -+/* Allocates a sync point within the timeline. -+ * -+ * The timeline must be the one allocated by kbase_sync_timeline_alloc -+ * -+ * Sync points must be triggered in *exactly* the same order as they are -+ * allocated. -+ */ -+static struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent) ++void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size) +{ -+ struct sync_pt *pt = sync_pt_create(parent, -+ sizeof(struct mali_sync_pt)); -+ struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent); -+ struct mali_sync_pt *mpt; ++ size_t cur_size; + -+ if (!pt) -+ return NULL; ++ cur_size = kbase_mem_pool_size(pool); + -+ mpt = to_mali_sync_pt(pt); -+ mpt->order = atomic_inc_return(&mtl->counter); -+ mpt->result = 0; ++ if (new_size > pool->max_size) ++ new_size = pool->max_size; + -+ return pt; ++ if (new_size < cur_size) ++ kbase_mem_pool_shrink(pool, cur_size - new_size); ++ else if (new_size > cur_size) ++ kbase_mem_pool_grow(pool, new_size - cur_size); +} + -+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd) ++void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size) +{ -+ struct sync_timeline *tl; -+ struct sync_pt *pt; -+ struct sync_fence *fence; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0) -+ struct files_struct *files; -+ struct fdtable *fdt; -+#endif -+ int fd; -+ struct file *tl_file; ++ size_t cur_size; ++ size_t nr_to_shrink; + -+ tl_file = fget(tl_fd); -+ if (tl_file == NULL) -+ return -EBADF; ++ kbase_mem_pool_lock(pool); + -+ if (tl_file->f_op != &stream_fops) { -+ fd = -EBADF; -+ goto out; ++ pool->max_size = max_size; ++ ++ cur_size = kbase_mem_pool_size(pool); ++ if (max_size < cur_size) { ++ nr_to_shrink = cur_size - max_size; ++ kbase_mem_pool_shrink_locked(pool, nr_to_shrink); + } + -+ tl = tl_file->private_data; ++ kbase_mem_pool_unlock(pool); ++} + -+ pt = kbase_sync_pt_alloc(tl); -+ if (!pt) { -+ fd = -EFAULT; -+ goto out; -+ } + -+ fence = sync_fence_create("mali_fence", pt); -+ if (!fence) { -+ sync_pt_free(pt); -+ fd = -EFAULT; -+ goto out; -+ } ++static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s, ++ struct shrink_control *sc) ++{ ++ struct kbase_mem_pool *pool; + -+ /* from here the fence owns the sync_pt */ ++ pool = container_of(s, struct kbase_mem_pool, reclaim); ++ pool_dbg(pool, "reclaim count: %zu\n", kbase_mem_pool_size(pool)); ++ return kbase_mem_pool_size(pool); ++} + -+ /* create a fd representing the fence */ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) -+ fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); -+ if (fd < 0) { -+ sync_fence_put(fence); -+ goto out; -+ } -+#else -+ fd = get_unused_fd(); -+ if (fd < 0) { -+ sync_fence_put(fence); -+ goto out; -+ } ++static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s, ++ struct shrink_control *sc) ++{ ++ struct kbase_mem_pool *pool; ++ unsigned long freed; + -+ files = current->files; -+ spin_lock(&files->file_lock); -+ fdt = files_fdtable(files); -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) -+ __set_close_on_exec(fd, fdt); -+#else -+ FD_SET(fd, fdt->close_on_exec); -+#endif -+ spin_unlock(&files->file_lock); -+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */ ++ pool = container_of(s, struct kbase_mem_pool, reclaim); + -+ /* bind fence to the new fd */ -+ sync_fence_install(fence, fd); ++ pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan); + -+ katom->fence = sync_fence_fdget(fd); -+ if (katom->fence == NULL) { -+ /* The only way the fence can be NULL is if userspace closed it -+ * for us, so we don't need to clear it up */ -+ fd = -EINVAL; -+ goto out; -+ } ++ freed = kbase_mem_pool_shrink(pool, sc->nr_to_scan); + -+out: -+ fput(tl_file); ++ pool_dbg(pool, "reclaim freed %ld pages\n", freed); + -+ return fd; ++ return freed; +} + -+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) ++static int kbase_mem_pool_reclaim_shrink(struct shrinker *s, ++ struct shrink_control *sc) +{ -+ katom->fence = sync_fence_fdget(fd); -+ return katom->fence ? 0 : -ENOENT; ++ if (sc->nr_to_scan == 0) ++ return kbase_mem_pool_reclaim_count_objects(s, sc); ++ ++ return kbase_mem_pool_reclaim_scan_objects(s, sc); +} ++#endif + -+int kbase_sync_fence_validate(int fd) ++int kbase_mem_pool_init(struct kbase_mem_pool *pool, ++ size_t max_size, ++ struct kbase_device *kbdev, ++ struct kbase_mem_pool *next_pool) +{ -+ struct sync_fence *fence; ++ pool->cur_size = 0; ++ pool->max_size = max_size; ++ pool->kbdev = kbdev; ++ pool->next_pool = next_pool; + -+ fence = sync_fence_fdget(fd); -+ if (!fence) -+ return -EINVAL; ++ spin_lock_init(&pool->pool_lock); ++ INIT_LIST_HEAD(&pool->page_list); ++ ++ /* Register shrinker */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) ++ pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink; ++#else ++ pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects; ++ pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects; ++#endif ++ pool->reclaim.seeks = DEFAULT_SEEKS; ++ /* Kernel versions prior to 3.1 : ++ * struct shrinker does not define batch */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) ++ pool->reclaim.batch = 0; ++#endif ++#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE ++ register_shrinker(&pool->reclaim); ++#else ++ register_shrinker(&pool->reclaim, "mali-mem-pool"); ++#endif ++ ++ pool_dbg(pool, "initialized\n"); + -+ sync_fence_put(fence); + return 0; +} + -+/* Returns true if the specified timeline is allocated by Mali */ -+static int kbase_sync_timeline_is_ours(struct sync_timeline *timeline) ++void kbase_mem_pool_term(struct kbase_mem_pool *pool) +{ -+ return timeline->ops == &mali_timeline_ops; -+} ++ struct kbase_mem_pool *next_pool = pool->next_pool; ++ struct page *p; ++ size_t nr_to_spill = 0; ++ LIST_HEAD(spill_list); ++ int i; + -+/* Signals a particular sync point -+ * -+ * Sync points must be triggered in *exactly* the same order as they are -+ * allocated. -+ * -+ * If they are signaled in the wrong order then a message will be printed in -+ * debug builds and otherwise attempts to signal order sync_pts will be ignored. -+ * -+ * result can be negative to indicate error, any other value is interpreted as -+ * success. -+ */ -+static void kbase_sync_signal_pt(struct sync_pt *pt, int result) -+{ -+ struct mali_sync_pt *mpt = to_mali_sync_pt(pt); -+ struct mali_sync_timeline *mtl = to_mali_sync_timeline( -+ sync_pt_parent(pt)); -+ int signaled; -+ int diff; ++ pool_dbg(pool, "terminate()\n"); + -+ mpt->result = result; ++ unregister_shrinker(&pool->reclaim); + -+ do { -+ signaled = atomic_read(&mtl->signaled); ++ kbase_mem_pool_lock(pool); ++ pool->max_size = 0; + -+ diff = signaled - mpt->order; ++ if (next_pool && !kbase_mem_pool_is_full(next_pool)) { ++ /* Spill to next pool (may overspill) */ ++ nr_to_spill = kbase_mem_pool_capacity(next_pool); ++ nr_to_spill = min(kbase_mem_pool_size(pool), nr_to_spill); + -+ if (diff > 0) { -+ /* The timeline is already at or ahead of this point. -+ * This should not happen unless userspace has been -+ * signaling fences out of order, so warn but don't -+ * violate the sync_pt API. -+ * The warning is only in debug builds to prevent -+ * a malicious user being able to spam dmesg. -+ */ -+#ifdef CONFIG_MALI_DEBUG -+ pr_err("Fences were triggered in a different order to allocation!"); -+#endif /* CONFIG_MALI_DEBUG */ -+ return; ++ /* Zero pages first without holding the next_pool lock */ ++ for (i = 0; i < nr_to_spill; i++) { ++ p = kbase_mem_pool_remove_locked(pool); ++ kbase_mem_pool_zero_page(pool, p); ++ list_add(&p->lru, &spill_list); + } -+ } while (atomic_cmpxchg(&mtl->signaled, -+ signaled, mpt->order) != signaled); -+} -+ -+enum base_jd_event_code -+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) -+{ -+ struct sync_pt *pt; -+ struct sync_timeline *timeline; -+ -+ if (!katom->fence) -+ return BASE_JD_EVENT_JOB_CANCELLED; -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) -+ if (!list_is_singular(&katom->fence->pt_list_head)) { -+#else -+ if (katom->fence->num_fences != 1) { -+#endif -+ /* Not exactly one item in the list - so it didn't (directly) -+ * come from us */ -+ return BASE_JD_EVENT_JOB_CANCELLED; + } + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) -+ pt = list_first_entry(&katom->fence->pt_list_head, -+ struct sync_pt, pt_list); -+#else -+ pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base); -+#endif -+ timeline = sync_pt_parent(pt); -+ -+ if (!kbase_sync_timeline_is_ours(timeline)) { -+ /* Fence has a sync_pt which isn't ours! */ -+ return BASE_JD_EVENT_JOB_CANCELLED; ++ while (!kbase_mem_pool_is_empty(pool)) { ++ /* Free remaining pages to kernel */ ++ p = kbase_mem_pool_remove_locked(pool); ++ kbase_mem_pool_free_page(pool, p); + } + -+ kbase_sync_signal_pt(pt, result); ++ kbase_mem_pool_unlock(pool); + -+ sync_timeline_signal(timeline); ++ if (next_pool && nr_to_spill) { ++ /* Add new page list to next_pool */ ++ kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill); + -+ kbase_sync_fence_out_remove(katom); ++ pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill); ++ } + -+ return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE; ++ pool_dbg(pool, "terminated\n"); +} + -+static inline int kbase_fence_get_status(struct sync_fence *fence) ++struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool) +{ -+ if (!fence) -+ return -ENOENT; ++ struct page *p; + -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) -+ return fence->status; -+#else -+ return atomic_read(&fence->status); -+#endif ++ do { ++ pool_dbg(pool, "alloc()\n"); ++ p = kbase_mem_pool_remove(pool); ++ ++ if (p) ++ return p; ++ ++ pool = pool->next_pool; ++ } while (pool); ++ ++ return NULL; +} + -+static void kbase_fence_wait_callback(struct sync_fence *fence, -+ struct sync_fence_waiter *waiter) ++void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p, ++ bool dirty) +{ -+ struct kbase_jd_atom *katom = container_of(waiter, -+ struct kbase_jd_atom, sync_waiter); -+ struct kbase_context *kctx = katom->kctx; ++ struct kbase_mem_pool *next_pool = pool->next_pool; + -+ /* Propagate the fence status to the atom. -+ * If negative then cancel this atom and its dependencies. -+ */ -+ if (kbase_fence_get_status(fence) < 0) -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ pool_dbg(pool, "free()\n"); + -+ /* To prevent a potential deadlock we schedule the work onto the -+ * job_done_wq workqueue -+ * -+ * The issue is that we may signal the timeline while holding -+ * kctx->jctx.lock and the callbacks are run synchronously from -+ * sync_timeline_signal. So we simply defer the work. -+ */ ++ if (!kbase_mem_pool_is_full(pool)) { ++ /* Add to our own pool */ ++ if (dirty) ++ kbase_mem_pool_sync_page(pool, p); + -+ INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); -+ queue_work(kctx->jctx.job_done_wq, &katom->work); ++ kbase_mem_pool_add(pool, p); ++ } else if (next_pool && !kbase_mem_pool_is_full(next_pool)) { ++ /* Spill to next pool */ ++ kbase_mem_pool_spill(next_pool, p); ++ } else { ++ /* Free page */ ++ kbase_mem_pool_free_page(pool, p); ++ } +} + -+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) ++int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages, ++ phys_addr_t *pages) +{ -+ int ret; ++ struct page *p; ++ size_t nr_from_pool; ++ size_t i; ++ int err = -ENOMEM; + -+ sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback); ++ pool_dbg(pool, "alloc_pages(%zu):\n", nr_pages); + -+ ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter); ++ /* Get pages from this pool */ ++ kbase_mem_pool_lock(pool); ++ nr_from_pool = min(nr_pages, kbase_mem_pool_size(pool)); ++ for (i = 0; i < nr_from_pool; i++) { ++ p = kbase_mem_pool_remove_locked(pool); ++ pages[i] = page_to_phys(p); ++ } ++ kbase_mem_pool_unlock(pool); + -+ if (ret == 1) { -+ /* Already signaled */ -+ return 0; ++ if (i != nr_pages && pool->next_pool) { ++ /* Allocate via next pool */ ++ err = kbase_mem_pool_alloc_pages(pool->next_pool, ++ nr_pages - i, pages + i); ++ ++ if (err) ++ goto err_rollback; ++ ++ i += nr_pages - i; + } + -+ if (ret < 0) { -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; -+ /* We should cause the dependent jobs in the bag to be failed, -+ * to do this we schedule the work queue to complete this job */ -+ INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); -+ queue_work(katom->kctx->jctx.job_done_wq, &katom->work); ++ /* Get any remaining pages from kernel */ ++ for (; i < nr_pages; i++) { ++ p = kbase_mem_alloc_page(pool->kbdev); ++ if (!p) ++ goto err_rollback; ++ pages[i] = page_to_phys(p); + } + -+ return 1; ++ pool_dbg(pool, "alloc_pages(%zu) done\n", nr_pages); ++ ++ return 0; ++ ++err_rollback: ++ kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED); ++ return err; +} + -+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) ++static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool, ++ size_t nr_pages, phys_addr_t *pages, bool zero, bool sync) +{ -+ if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) { -+ /* The wait wasn't cancelled - leave the cleanup for -+ * kbase_fence_wait_callback */ ++ struct page *p; ++ size_t nr_to_pool = 0; ++ LIST_HEAD(new_page_list); ++ size_t i; ++ ++ if (!nr_pages) + return; -+ } + -+ /* Wait was cancelled - zap the atoms */ -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n", ++ nr_pages, zero, sync); + -+ kbasep_remove_waiting_soft_job(katom); -+ kbase_finish_soft_job(katom); ++ /* Zero/sync pages first without holding the pool lock */ ++ for (i = 0; i < nr_pages; i++) { ++ if (unlikely(!pages[i])) ++ continue; + -+ if (jd_done_nolock(katom, NULL)) -+ kbase_js_sched_all(katom->kctx->kbdev); -+} ++ p = phys_to_page(pages[i]); + -+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom) -+{ -+ if (katom->fence) { -+ sync_fence_put(katom->fence); -+ katom->fence = NULL; -+ } -+} ++ if (zero) ++ kbase_mem_pool_zero_page(pool, p); ++ else if (sync) ++ kbase_mem_pool_sync_page(pool, p); + -+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom) -+{ -+ if (katom->fence) { -+ sync_fence_put(katom->fence); -+ katom->fence = NULL; ++ list_add(&p->lru, &new_page_list); ++ nr_to_pool++; ++ pages[i] = 0; + } -+} + -+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, -+ struct kbase_sync_fence_info *info) -+{ -+ if (!katom->fence) -+ return -ENOENT; -+ -+ info->fence = katom->fence; -+ info->status = kbase_fence_get_status(katom->fence); -+ strlcpy(info->name, katom->fence->name, sizeof(info->name)); ++ /* Add new page list to pool */ ++ kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool); + -+ return 0; ++ pool_dbg(pool, "add_array(%zu) added %zu pages\n", ++ nr_pages, nr_to_pool); +} + -+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, -+ struct kbase_sync_fence_info *info) ++void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, ++ phys_addr_t *pages, bool dirty, bool reclaimed) +{ -+ if (!katom->fence) -+ return -ENOENT; ++ struct kbase_mem_pool *next_pool = pool->next_pool; ++ struct page *p; ++ size_t nr_to_pool; ++ LIST_HEAD(to_pool_list); ++ size_t i = 0; + -+ info->fence = katom->fence; -+ info->status = kbase_fence_get_status(katom->fence); -+ strlcpy(info->name, katom->fence->name, sizeof(info->name)); ++ pool_dbg(pool, "free_pages(%zu):\n", nr_pages); + -+ return 0; -+} ++ if (!reclaimed) { ++ /* Add to this pool */ ++ nr_to_pool = kbase_mem_pool_capacity(pool); ++ nr_to_pool = min(nr_pages, nr_to_pool); + -+#ifdef CONFIG_MALI_FENCE_DEBUG -+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom) -+{ -+ /* Dump out the full state of all the Android sync fences. -+ * The function sync_dump() isn't exported to modules, so force -+ * sync_fence_wait() to time out to trigger sync_dump(). -+ */ -+ if (katom->fence) -+ sync_fence_wait(katom->fence, 1); -+} -+#endif -diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_common.c b/drivers/gpu/arm/midgard/mali_kbase_sync_common.c -new file mode 100644 -index 000000000..457def296 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_sync_common.c -@@ -0,0 +1,43 @@ -+/* -+ * -+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty); + ++ i += nr_to_pool; + ++ if (i != nr_pages && next_pool) { ++ /* Spill to next pool (may overspill) */ ++ nr_to_pool = kbase_mem_pool_capacity(next_pool); ++ nr_to_pool = min(nr_pages - i, nr_to_pool); + -+/* -+ * @file mali_kbase_sync_common.c -+ * -+ * Common code for our explicit fence functionality -+ */ ++ kbase_mem_pool_add_array(next_pool, nr_to_pool, ++ pages + i, true, dirty); ++ i += nr_to_pool; ++ } ++ } + -+#include -+#include "mali_kbase.h" ++ /* Free any remaining pages to kernel */ ++ for (; i < nr_pages; i++) { ++ if (unlikely(!pages[i])) ++ continue; + -+void kbase_sync_fence_wait_worker(struct work_struct *data) -+{ -+ struct kbase_jd_atom *katom; ++ p = phys_to_page(pages[i]); + -+ katom = container_of(data, struct kbase_jd_atom, work); -+ kbase_soft_event_wait_callback(katom); -+} ++ kbase_mem_pool_free_page(pool, p); ++ pages[i] = 0; ++ } + -+const char *kbase_sync_status_string(int status) -+{ -+ if (status == 0) -+ return "signaled"; -+ else if (status > 0) -+ return "active"; -+ else -+ return "error"; ++ pool_dbg(pool, "free_pages(%zu) done\n", nr_pages); +} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_file.c b/drivers/gpu/arm/midgard/mali_kbase_sync_file.c +diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c new file mode 100644 -index 000000000..60b5d74db +index 000000000..585fba036 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_sync_file.c -@@ -0,0 +1,359 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c +@@ -0,0 +1,81 @@ +/* + * -+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -393329,357 +394918,248 @@ index 000000000..60b5d74db + + + -+/* -+ * Code for supporting explicit Linux fences (CONFIG_SYNC_FILE) -+ * Introduced in kernel 4.9. -+ * Android explicit fences (CONFIG_SYNC) can be used for older kernels -+ * (see mali_kbase_sync_android.c) -+ */ ++#include ++#include + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "mali_kbase_fence_defs.h" -+#include "mali_kbase_sync.h" -+#include "mali_kbase_fence.h" -+#include "mali_kbase.h" ++#include + -+static const struct file_operations stream_fops = { -+ .owner = THIS_MODULE -+}; ++#ifdef CONFIG_DEBUG_FS + -+int kbase_sync_fence_stream_create(const char *name, int *const out_fd) ++static int kbase_mem_pool_debugfs_size_get(void *data, u64 *val) +{ -+ if (!out_fd) -+ return -EINVAL; ++ struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; + -+ *out_fd = anon_inode_getfd(name, &stream_fops, NULL, -+ O_RDONLY | O_CLOEXEC); -+ if (*out_fd < 0) -+ return -EINVAL; ++ *val = kbase_mem_pool_size(pool); + + return 0; +} + -+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd) ++static int kbase_mem_pool_debugfs_size_set(void *data, u64 val) +{ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+ struct fence *fence; -+#else -+ struct dma_fence *fence; -+#endif -+ struct sync_file *sync_file; -+ int fd; -+ -+ fence = kbase_fence_out_new(katom); -+ if (!fence) -+ return -ENOMEM; -+ -+#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) -+ /* Take an extra reference to the fence on behalf of the sync_file. -+ * This is only needed on older kernels where sync_file_create() -+ * does not take its own reference. This was changed in v4.9.68, -+ * where sync_file_create() now takes its own reference. -+ */ -+ dma_fence_get(fence); -+#endif -+ -+ /* create a sync_file fd representing the fence */ -+ sync_file = sync_file_create(fence); -+ if (!sync_file) { -+ dma_fence_put(fence); -+ kbase_fence_out_remove(katom); -+ return -ENOMEM; -+ } -+ -+ fd = get_unused_fd_flags(O_CLOEXEC); -+ if (fd < 0) { -+ fput(sync_file->file); -+ kbase_fence_out_remove(katom); -+ return fd; -+ } ++ struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; + -+ fd_install(fd, sync_file->file); ++ kbase_mem_pool_trim(pool, val); + -+ return fd; ++ return 0; +} + -+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) -+{ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+ struct fence *fence = sync_file_get_fence(fd); -+#else -+ struct dma_fence *fence = sync_file_get_fence(fd); -+#endif ++DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_size_fops, ++ kbase_mem_pool_debugfs_size_get, ++ kbase_mem_pool_debugfs_size_set, ++ "%llu\n"); + -+ if (!fence) -+ return -ENOENT; ++static int kbase_mem_pool_debugfs_max_size_get(void *data, u64 *val) ++{ ++ struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; + -+ kbase_fence_fence_in_set(katom, fence); ++ *val = kbase_mem_pool_max_size(pool); + + return 0; +} + -+int kbase_sync_fence_validate(int fd) ++static int kbase_mem_pool_debugfs_max_size_set(void *data, u64 val) +{ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+ struct fence *fence = sync_file_get_fence(fd); -+#else -+ struct dma_fence *fence = sync_file_get_fence(fd); -+#endif -+ -+ if (!fence) -+ return -EINVAL; ++ struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; + -+ dma_fence_put(fence); ++ kbase_mem_pool_set_max_size(pool, val); + -+ return 0; /* valid */ ++ return 0; +} + -+enum base_jd_event_code -+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) -+{ -+ int res; -+ -+ if (!kbase_fence_out_is_ours(katom)) { -+ /* Not our fence */ -+ return BASE_JD_EVENT_JOB_CANCELLED; -+ } -+ -+ res = kbase_fence_out_signal(katom, result); -+ if (unlikely(res < 0)) { -+ dev_warn(katom->kctx->kbdev->dev, -+ "fence_signal() failed with %d\n", res); -+ } -+ -+ kbase_sync_fence_out_remove(katom); -+ -+ return (result != 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE; -+} ++DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_max_size_fops, ++ kbase_mem_pool_debugfs_max_size_get, ++ kbase_mem_pool_debugfs_max_size_set, ++ "%llu\n"); + -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+static void kbase_fence_wait_callback(struct fence *fence, -+ struct fence_cb *cb) -+#else -+static void kbase_fence_wait_callback(struct dma_fence *fence, -+ struct dma_fence_cb *cb) -+#endif ++void kbase_mem_pool_debugfs_init(struct dentry *parent, ++ struct kbase_mem_pool *pool) +{ -+ struct kbase_fence_cb *kcb = container_of(cb, -+ struct kbase_fence_cb, -+ fence_cb); -+ struct kbase_jd_atom *katom = kcb->katom; -+ struct kbase_context *kctx = katom->kctx; -+ -+ /* Cancel atom if fence is erroneous */ -+#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \ -+ (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ -+ KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) -+ if (dma_fence_is_signaled(kcb->fence) && kcb->fence->error) -+#else -+ if (dma_fence_is_signaled(kcb->fence) && kcb->fence->status < 0) -+#endif -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; -+ -+ if (kbase_fence_dep_count_dec_and_test(katom)) { -+ /* We take responsibility of handling this */ -+ kbase_fence_dep_count_set(katom, -1); ++ debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent, ++ pool, &kbase_mem_pool_debugfs_size_fops); + -+ /* To prevent a potential deadlock we schedule the work onto the -+ * job_done_wq workqueue -+ * -+ * The issue is that we may signal the timeline while holding -+ * kctx->jctx.lock and the callbacks are run synchronously from -+ * sync_timeline_signal. So we simply defer the work. -+ */ -+ INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); -+ queue_work(kctx->jctx.job_done_wq, &katom->work); -+ } ++ debugfs_create_file("mem_pool_max_size", S_IRUGO | S_IWUSR, parent, ++ pool, &kbase_mem_pool_debugfs_max_size_fops); +} + -+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) -+{ -+ int err; -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+ struct fence *fence; -+#else -+ struct dma_fence *fence; -+#endif ++#endif /* CONFIG_DEBUG_FS */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h +new file mode 100644 +index 000000000..1442854e8 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h +@@ -0,0 +1,36 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ fence = kbase_fence_in_get(katom); -+ if (!fence) -+ return 0; /* no input fence to wait for, good to go! */ + -+ kbase_fence_dep_count_set(katom, 1); + -+ err = kbase_fence_add_callback(katom, fence, kbase_fence_wait_callback); ++#ifndef _KBASE_MEM_POOL_DEBUGFS_H ++#define _KBASE_MEM_POOL_DEBUGFS_H + -+ kbase_fence_put(fence); ++#include + -+ if (likely(!err)) { -+ /* Test if the callbacks are already triggered */ -+ if (kbase_fence_dep_count_dec_and_test(katom)) { -+ kbase_fence_free_callbacks(katom); -+ kbase_fence_dep_count_set(katom, -1); -+ return 0; /* Already signaled, good to go right now */ -+ } ++/** ++ * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool ++ * @parent: Parent debugfs dentry ++ * @pool: Memory pool to control ++ * ++ * Adds two debugfs files under @parent: ++ * - mem_pool_size: get/set the current size of @pool ++ * - mem_pool_max_size: get/set the max size of @pool ++ */ ++void kbase_mem_pool_debugfs_init(struct dentry *parent, ++ struct kbase_mem_pool *pool); + -+ /* Callback installed, so we just need to wait for it... */ -+ } else { -+ /* Failure */ -+ kbase_fence_free_callbacks(katom); -+ kbase_fence_dep_count_set(katom, -1); ++#endif /*_KBASE_MEM_POOL_DEBUGFS_H*/ + -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; +diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c +new file mode 100644 +index 000000000..d58fd8d62 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c +@@ -0,0 +1,121 @@ ++/* ++ * ++ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ /* We should cause the dependent jobs in the bag to be failed, -+ * to do this we schedule the work queue to complete this job */ + -+ INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); -+ queue_work(katom->kctx->jctx.job_done_wq, &katom->work); -+ } + -+ return 1; /* completion to be done later by callback/worker */ -+} ++#include + -+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) ++#ifdef CONFIG_DEBUG_FS ++ ++/** Show callback for the @c mem_profile debugfs file. ++ * ++ * This function is called to get the contents of the @c mem_profile debugfs ++ * file. This is a report of current memory usage and distribution in userspace. ++ * ++ * @param sfile The debugfs entry ++ * @param data Data associated with the entry ++ * ++ * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise ++ */ ++static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data) +{ -+ if (!kbase_fence_free_callbacks(katom)) { -+ /* The wait wasn't cancelled - -+ * leave the cleanup for kbase_fence_wait_callback */ -+ return; -+ } ++ struct kbase_context *kctx = sfile->private; + -+ /* Take responsibility of completion */ -+ kbase_fence_dep_count_set(katom, -1); ++ mutex_lock(&kctx->mem_profile_lock); + -+ /* Wait was cancelled - zap the atoms */ -+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size); + -+ kbasep_remove_waiting_soft_job(katom); -+ kbase_finish_soft_job(katom); ++ seq_putc(sfile, '\n'); + -+ if (jd_done_nolock(katom, NULL)) -+ kbase_js_sched_all(katom->kctx->kbdev); -+} ++ mutex_unlock(&kctx->mem_profile_lock); + -+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom) -+{ -+ kbase_fence_out_remove(katom); ++ return 0; +} + -+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom) ++/* ++ * File operations related to debugfs entry for mem_profile ++ */ ++static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file) +{ -+ kbase_fence_free_callbacks(katom); -+ kbase_fence_in_remove(katom); ++ return single_open(file, kbasep_mem_profile_seq_show, in->i_private); +} + -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+static void kbase_sync_fence_info_get(struct fence *fence, -+ struct kbase_sync_fence_info *info) -+#else -+static void kbase_sync_fence_info_get(struct dma_fence *fence, -+ struct kbase_sync_fence_info *info) -+#endif ++static const struct file_operations kbasep_mem_profile_debugfs_fops = { ++ .open = kbasep_mem_profile_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, ++ size_t size) +{ -+ info->fence = fence; ++ int err = 0; + -+ /* translate into CONFIG_SYNC status: -+ * < 0 : error -+ * 0 : active -+ * 1 : signaled -+ */ -+ if (dma_fence_is_signaled(fence)) { -+#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \ -+ (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ -+ KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) -+ int status = fence->error; -+#else -+ int status = fence->status; -+#endif -+ if (status < 0) -+ info->status = status; /* signaled with error */ -+ else -+ info->status = 1; /* signaled with success */ -+ } else { -+ info->status = 0; /* still active (unsignaled) */ -+ } ++ mutex_lock(&kctx->mem_profile_lock); + -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) -+ scnprintf(info->name, sizeof(info->name), "%u#%u", -+ fence->context, fence->seqno); -+#elif (LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0)) -+ scnprintf(info->name, sizeof(info->name), "%llu#%u", -+ fence->context, fence->seqno); -+#else -+ scnprintf(info->name, sizeof(info->name), "%llu#%llu", -+ fence->context, fence->seqno); -+#endif -+} ++ dev_dbg(kctx->kbdev->dev, "initialised: %d", ++ kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); + -+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, -+ struct kbase_sync_fence_info *info) -+{ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+ struct fence *fence; -+#else -+ struct dma_fence *fence; -+#endif ++ if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { ++ if (!debugfs_create_file("mem_profile", S_IRUGO, ++ kctx->kctx_dentry, kctx, ++ &kbasep_mem_profile_debugfs_fops)) { ++ err = -EAGAIN; ++ } else { ++ kbase_ctx_flag_set(kctx, ++ KCTX_MEM_PROFILE_INITIALIZED); ++ } ++ } + -+ fence = kbase_fence_in_get(katom); -+ if (!fence) -+ return -ENOENT; ++ if (kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { ++ kfree(kctx->mem_profile_data); ++ kctx->mem_profile_data = data; ++ kctx->mem_profile_size = size; ++ } else { ++ kfree(data); ++ } + -+ kbase_sync_fence_info_get(fence, info); ++ dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d", ++ err, kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); + -+ kbase_fence_put(fence); ++ mutex_unlock(&kctx->mem_profile_lock); + -+ return 0; ++ return err; +} + -+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, -+ struct kbase_sync_fence_info *info) ++void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx) +{ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -+ struct fence *fence; -+#else -+ struct dma_fence *fence; -+#endif -+ -+ fence = kbase_fence_out_get(katom); -+ if (!fence) -+ return -ENOENT; ++ mutex_lock(&kctx->mem_profile_lock); + -+ kbase_sync_fence_info_get(fence, info); ++ dev_dbg(kctx->kbdev->dev, "initialised: %d", ++ kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); + -+ kbase_fence_put(fence); ++ kfree(kctx->mem_profile_data); ++ kctx->mem_profile_data = NULL; ++ kctx->mem_profile_size = 0; + -+ return 0; ++ mutex_unlock(&kctx->mem_profile_lock); +} + ++#else /* CONFIG_DEBUG_FS */ + -+#ifdef CONFIG_MALI_FENCE_DEBUG -+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom) ++int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, ++ size_t size) +{ -+ /* Not implemented */ ++ kfree(data); ++ return 0; +} -+#endif -diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c ++#endif /* CONFIG_DEBUG_FS */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h new file mode 100644 -index 000000000..c8310c45f +index 000000000..a1dc2e0b1 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c -@@ -0,0 +1,2572 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h +@@ -0,0 +1,59 @@ +/* + * -+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -393694,2570 +395174,2190 @@ index 000000000..c8310c45f + + + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include + -+/*****************************************************************************/ + -+/* The version of swtrace protocol used in timeline stream. */ -+#define SWTRACE_VERSION 3 ++/** ++ * @file mali_kbase_mem_profile_debugfs.h ++ * Header file for mem profiles entries in debugfs ++ * ++ */ + -+/* The maximum expected length of string in tracepoint descriptor. */ -+#define STRLEN_MAX 64 /* bytes */ ++#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H ++#define _KBASE_MEM_PROFILE_DEBUGFS_H + -+/* The number of nanoseconds in a second. */ -+#define NSECS_IN_SEC 1000000000ull /* ns */ ++#include ++#include + -+/* The period of autoflush checker execution in milliseconds. */ -+#define AUTOFLUSH_INTERVAL 1000 /* ms */ ++/** ++ * @brief Remove entry from Mali memory profile debugfs ++ */ ++void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx); + -+/* The maximum size of a single packet used by timeline. */ -+#define PACKET_SIZE 4096 /* bytes */ ++/** ++ * @brief Insert @p data to the debugfs file so it can be read by userspace ++ * ++ * The function takes ownership of @p data and frees it later when new data ++ * is inserted. ++ * ++ * If the debugfs entry corresponding to the @p kctx doesn't exist, ++ * an attempt will be made to create it. ++ * ++ * @param kctx The context whose debugfs file @p data should be inserted to ++ * @param data A NULL-terminated string to be inserted to the debugfs file, ++ * without the trailing new line character ++ * @param size The length of the @p data string ++ * @return 0 if @p data inserted correctly ++ * -EAGAIN in case of error ++ * @post @ref mem_profile_initialized will be set to @c true ++ * the first time this function succeeds. ++ */ ++int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, ++ size_t size); + -+/* The number of packets used by one timeline stream. */ -+#define PACKET_COUNT 16 ++#endif /*_KBASE_MEM_PROFILE_DEBUGFS_H*/ + -+/* The number of bytes reserved for packet header. -+ * These value must be defined according to MIPE documentation. */ -+#define PACKET_HEADER_SIZE 8 /* bytes */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h +new file mode 100644 +index 000000000..82f070297 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h +@@ -0,0 +1,33 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+/* The number of bytes reserved for packet sequence number. -+ * These value must be defined according to MIPE documentation. */ -+#define PACKET_NUMBER_SIZE 4 /* bytes */ + -+/* Packet header - first word. -+ * These values must be defined according to MIPE documentation. */ -+#define PACKET_STREAMID_POS 0 -+#define PACKET_STREAMID_LEN 8 -+#define PACKET_RSVD1_POS (PACKET_STREAMID_POS + PACKET_STREAMID_LEN) -+#define PACKET_RSVD1_LEN 8 -+#define PACKET_TYPE_POS (PACKET_RSVD1_POS + PACKET_RSVD1_LEN) -+#define PACKET_TYPE_LEN 3 -+#define PACKET_CLASS_POS (PACKET_TYPE_POS + PACKET_TYPE_LEN) -+#define PACKET_CLASS_LEN 7 -+#define PACKET_FAMILY_POS (PACKET_CLASS_POS + PACKET_CLASS_LEN) -+#define PACKET_FAMILY_LEN 6 + -+/* Packet header - second word -+ * These values must be defined according to MIPE documentation. */ -+#define PACKET_LENGTH_POS 0 -+#define PACKET_LENGTH_LEN 24 -+#define PACKET_SEQBIT_POS (PACKET_LENGTH_POS + PACKET_LENGTH_LEN) -+#define PACKET_SEQBIT_LEN 1 -+#define PACKET_RSVD2_POS (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN) -+#define PACKET_RSVD2_LEN 7 ++/** ++ * @file mali_kbase_mem_profile_debugfs_buf_size.h ++ * Header file for the size of the buffer to accumulate the histogram report text in ++ */ + -+/* Types of streams generated by timeline. -+ * Order is significant! Header streams must precede respective body streams. */ -+enum tl_stream_type { -+ TL_STREAM_TYPE_OBJ_HEADER, -+ TL_STREAM_TYPE_OBJ_SUMMARY, -+ TL_STREAM_TYPE_OBJ, -+ TL_STREAM_TYPE_AUX_HEADER, -+ TL_STREAM_TYPE_AUX, ++#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_ ++#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_ + -+ TL_STREAM_TYPE_COUNT -+}; ++/** ++ * The size of the buffer to accumulate the histogram report text in ++ * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT ++ */ ++#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t) (64 + ((80 + (56 * 64)) * 15) + 56)) + -+/* Timeline packet family ids. -+ * Values are significant! Check MIPE documentation. */ -+enum tl_packet_family { -+ TL_PACKET_FAMILY_CTRL = 0, /* control packets */ -+ TL_PACKET_FAMILY_TL = 1, /* timeline packets */ ++#endif /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/ + -+ TL_PACKET_FAMILY_COUNT -+}; +diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c +new file mode 100644 +index 000000000..26144850a +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c +@@ -0,0 +1,2088 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+/* Packet classes used in timeline streams. -+ * Values are significant! Check MIPE documentation. */ -+enum tl_packet_class { -+ TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */ -+ TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */ -+}; + -+/* Packet types used in timeline streams. -+ * Values are significant! Check MIPE documentation. */ -+enum tl_packet_type { -+ TL_PACKET_TYPE_HEADER = 0, /* stream's header/directory */ -+ TL_PACKET_TYPE_BODY = 1, /* stream's body */ -+ TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */ -+}; + -+/* Message ids of trace events that are recorded in the timeline stream. */ -+enum tl_msg_id_obj { -+ /* Timeline object events. */ -+ KBASE_TL_NEW_CTX, -+ KBASE_TL_NEW_GPU, -+ KBASE_TL_NEW_LPU, -+ KBASE_TL_NEW_ATOM, -+ KBASE_TL_NEW_AS, -+ KBASE_TL_DEL_CTX, -+ KBASE_TL_DEL_ATOM, -+ KBASE_TL_LIFELINK_LPU_GPU, -+ KBASE_TL_LIFELINK_AS_GPU, -+ KBASE_TL_RET_CTX_LPU, -+ KBASE_TL_RET_ATOM_CTX, -+ KBASE_TL_RET_ATOM_LPU, -+ KBASE_TL_NRET_CTX_LPU, -+ KBASE_TL_NRET_ATOM_CTX, -+ KBASE_TL_NRET_ATOM_LPU, -+ KBASE_TL_RET_AS_CTX, -+ KBASE_TL_NRET_AS_CTX, -+ KBASE_TL_RET_ATOM_AS, -+ KBASE_TL_NRET_ATOM_AS, -+ KBASE_TL_DEP_ATOM_ATOM, -+ KBASE_TL_NDEP_ATOM_ATOM, -+ KBASE_TL_RDEP_ATOM_ATOM, -+ KBASE_TL_ATTRIB_ATOM_CONFIG, -+ KBASE_TL_ATTRIB_ATOM_PRIORITY, -+ KBASE_TL_ATTRIB_ATOM_STATE, -+ KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE, -+ KBASE_TL_ATTRIB_ATOM_JIT, -+ KBASE_TL_ATTRIB_AS_CONFIG, -+ KBASE_TL_EVENT_LPU_SOFTSTOP, -+ KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, -+ KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, + -+ /* Job dump specific events. */ -+ KBASE_JD_GPU_SOFT_RESET -+}; + -+/* Message ids of trace events that are recorded in the auxiliary stream. */ -+enum tl_msg_id_aux { -+ KBASE_AUX_PM_STATE, -+ KBASE_AUX_PAGEFAULT, -+ KBASE_AUX_PAGESALLOC, -+ KBASE_AUX_DEVFREQ_TARGET, -+ KBASE_AUX_PROTECTED_ENTER_START, -+ KBASE_AUX_PROTECTED_ENTER_END, -+ KBASE_AUX_PROTECTED_LEAVE_START, -+ KBASE_AUX_PROTECTED_LEAVE_END -+}; ++/** ++ * @file mali_kbase_mmu.c ++ * Base kernel MMU management. ++ */ + -+/*****************************************************************************/ ++/* #define DEBUG 1 */ ++#include ++#include ++#include ++#include ++#if defined(CONFIG_MALI_GATOR_SUPPORT) ++#include ++#endif ++#include ++#include ++#include ++ ++#define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define KBASE_MMU_PAGE_ENTRIES 512 + +/** -+ * struct tl_stream - timeline stream structure -+ * @lock: message order lock -+ * @buffer: array of buffers -+ * @wbi: write buffer index -+ * @rbi: read buffer index -+ * @numbered: if non-zero stream's packets are sequentially numbered -+ * @autoflush_counter: counter tracking stream's autoflush state ++ * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches. ++ * @kctx: The KBase context. ++ * @vpfn: The virtual page frame number to start the flush on. ++ * @nr: The number of pages to flush. ++ * @sync: Set if the operation should be synchronous or not. + * -+ * This structure holds information needed to construct proper packets in the -+ * timeline stream. Each message in sequence must bear timestamp that is greater -+ * to one in previous message in the same stream. For this reason lock is held -+ * throughout the process of message creation. Each stream contains set of -+ * buffers. Each buffer will hold one MIPE packet. In case there is no free -+ * space required to store incoming message the oldest buffer is discarded. -+ * Each packet in timeline body stream has sequence number embedded (this value -+ * must increment monotonically and is used by packets receiver to discover -+ * buffer overflows. -+ * Autoflush counter is set to negative number when there is no data pending -+ * for flush and it is set to zero on every update of the buffer. Autoflush -+ * timer will increment the counter by one on every expiry. In case there will -+ * be no activity on the buffer during two consecutive timer expiries, stream -+ * buffer will be flushed. ++ * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs. ++ * ++ * If sync is not set then transactions still in flight when the flush is issued ++ * may use the old page tables and the data they write will not be written out ++ * to memory, this function returns after the flush has been issued but ++ * before all accesses which might effect the flushed region have completed. ++ * ++ * If sync is set then accesses in the flushed region will be drained ++ * before data is flush and invalidated through L1, L2 and into memory, ++ * after which point this function will return. + */ -+struct tl_stream { -+ spinlock_t lock; -+ -+ struct { -+ atomic_t size; /* number of bytes in buffer */ -+ char data[PACKET_SIZE]; /* buffer's data */ -+ } buffer[PACKET_COUNT]; ++static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, ++ u64 vpfn, size_t nr, bool sync); + -+ atomic_t wbi; -+ atomic_t rbi; ++/** ++ * kbase_mmu_sync_pgd - sync page directory to memory ++ * @kbdev: Device pointer. ++ * @handle: Address of DMA region. ++ * @size: Size of the region to sync. ++ * ++ * This should be called after each page directory update. ++ */ + -+ int numbered; -+ atomic_t autoflush_counter; -+}; ++static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, ++ dma_addr_t handle, size_t size) ++{ ++ /* If page table is not coherent then ensure the gpu can read ++ * the pages from memory ++ */ ++ if (kbdev->system_coherency != COHERENCY_ACE) ++ dma_sync_single_for_device(kbdev->dev, handle, size, ++ DMA_TO_DEVICE); ++} + -+/** -+ * struct tp_desc - tracepoint message descriptor structure -+ * @id: tracepoint ID identifying message in stream -+ * @id_str: human readable version of tracepoint ID -+ * @name: tracepoint description -+ * @arg_types: tracepoint's arguments types declaration -+ * @arg_names: comma separated list of tracepoint's arguments names ++/* ++ * Definitions: ++ * - PGD: Page Directory. ++ * - PTE: Page Table Entry. A 64bit value pointing to the next ++ * level of translation ++ * - ATE: Address Transation Entry. A 64bit value pointing to ++ * a 4kB physical page. + */ -+struct tp_desc { -+ u32 id; -+ const char *id_str; -+ const char *name; -+ const char *arg_types; -+ const char *arg_names; -+}; + -+/*****************************************************************************/ ++static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, ++ struct kbase_as *as, const char *reason_str); + -+/* Configuration of timeline streams generated by kernel. -+ * Kernel emit only streams containing either timeline object events or -+ * auxiliary events. All streams have stream id value of 1 (as opposed to user -+ * space streams that have value of 0). */ -+static const struct { -+ enum tl_packet_family pkt_family; -+ enum tl_packet_class pkt_class; -+ enum tl_packet_type pkt_type; -+ unsigned int stream_id; -+} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = { -+ {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_HEADER, 1}, -+ {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1}, -+ {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY, 1}, -+ {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_HEADER, 1}, -+ {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY, 1} -+}; + -+/* The timeline streams generated by kernel. */ -+static struct tl_stream *tl_stream[TL_STREAM_TYPE_COUNT]; ++static size_t make_multiple(size_t minimum, size_t multiple) ++{ ++ size_t remainder = minimum % multiple; + -+/* Autoflush timer. */ -+static struct timer_list autoflush_timer; ++ if (remainder == 0) ++ return minimum; + -+/* If non-zero autoflush timer is active. */ -+static atomic_t autoflush_timer_active; ++ return minimum + multiple - remainder; ++} + -+/* Reader lock. Only one reader is allowed to have access to the timeline -+ * streams at any given time. */ -+static DEFINE_MUTEX(tl_reader_lock); ++void page_fault_worker(struct work_struct *data) ++{ ++ u64 fault_pfn; ++ u32 fault_status; ++ size_t new_pages; ++ size_t fault_rel_pfn; ++ struct kbase_as *faulting_as; ++ int as_no; ++ struct kbase_context *kctx; ++ struct kbase_device *kbdev; ++ struct kbase_va_region *region; ++ int err; ++ bool grown = false; + -+/* Timeline stream event queue. */ -+static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue); ++ faulting_as = container_of(data, struct kbase_as, work_pagefault); ++ fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT; ++ as_no = faulting_as->number; + -+/* The timeline stream file operations functions. */ -+static ssize_t kbasep_tlstream_read( -+ struct file *filp, -+ char __user *buffer, -+ size_t size, -+ loff_t *f_pos); -+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait); -+static int kbasep_tlstream_release(struct inode *inode, struct file *filp); ++ kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); + -+/* The timeline stream file operations structure. */ -+static const struct file_operations kbasep_tlstream_fops = { -+ .release = kbasep_tlstream_release, -+ .read = kbasep_tlstream_read, -+ .poll = kbasep_tlstream_poll, -+}; ++ /* Grab the context that was already refcounted in kbase_mmu_interrupt(). ++ * Therefore, it cannot be scheduled out of this AS until we explicitly release it ++ */ ++ kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no); ++ if (WARN_ON(!kctx)) { ++ atomic_dec(&kbdev->faults_pending); ++ return; ++ } + -+/* Descriptors of timeline messages transmitted in object events stream. */ -+static const struct tp_desc tp_desc_obj[] = { -+ { -+ KBASE_TL_NEW_CTX, -+ __stringify(KBASE_TL_NEW_CTX), -+ "object ctx is created", -+ "@pII", -+ "ctx,ctx_nr,tgid" -+ }, -+ { -+ KBASE_TL_NEW_GPU, -+ __stringify(KBASE_TL_NEW_GPU), -+ "object gpu is created", -+ "@pII", -+ "gpu,gpu_id,core_count" -+ }, -+ { -+ KBASE_TL_NEW_LPU, -+ __stringify(KBASE_TL_NEW_LPU), -+ "object lpu is created", -+ "@pII", -+ "lpu,lpu_nr,lpu_fn" -+ }, -+ { -+ KBASE_TL_NEW_ATOM, -+ __stringify(KBASE_TL_NEW_ATOM), -+ "object atom is created", -+ "@pI", -+ "atom,atom_nr" -+ }, -+ { -+ KBASE_TL_NEW_AS, -+ __stringify(KBASE_TL_NEW_AS), -+ "address space object is created", -+ "@pI", -+ "address_space,as_nr" -+ }, -+ { -+ KBASE_TL_DEL_CTX, -+ __stringify(KBASE_TL_DEL_CTX), -+ "context is destroyed", -+ "@p", -+ "ctx" -+ }, -+ { -+ KBASE_TL_DEL_ATOM, -+ __stringify(KBASE_TL_DEL_ATOM), -+ "atom is destroyed", -+ "@p", -+ "atom" -+ }, -+ { -+ KBASE_TL_LIFELINK_LPU_GPU, -+ __stringify(KBASE_TL_LIFELINK_LPU_GPU), -+ "lpu is deleted with gpu", -+ "@pp", -+ "lpu,gpu" -+ }, -+ { -+ KBASE_TL_LIFELINK_AS_GPU, -+ __stringify(KBASE_TL_LIFELINK_AS_GPU), -+ "address space is deleted with gpu", -+ "@pp", -+ "address_space,gpu" -+ }, -+ { -+ KBASE_TL_RET_CTX_LPU, -+ __stringify(KBASE_TL_RET_CTX_LPU), -+ "context is retained by lpu", -+ "@pp", -+ "ctx,lpu" -+ }, -+ { -+ KBASE_TL_RET_ATOM_CTX, -+ __stringify(KBASE_TL_RET_ATOM_CTX), -+ "atom is retained by context", -+ "@pp", -+ "atom,ctx" -+ }, -+ { -+ KBASE_TL_RET_ATOM_LPU, -+ __stringify(KBASE_TL_RET_ATOM_LPU), -+ "atom is retained by lpu", -+ "@pps", -+ "atom,lpu,attrib_match_list" -+ }, -+ { -+ KBASE_TL_NRET_CTX_LPU, -+ __stringify(KBASE_TL_NRET_CTX_LPU), -+ "context is released by lpu", -+ "@pp", -+ "ctx,lpu" -+ }, -+ { -+ KBASE_TL_NRET_ATOM_CTX, -+ __stringify(KBASE_TL_NRET_ATOM_CTX), -+ "atom is released by context", -+ "@pp", -+ "atom,ctx" -+ }, -+ { -+ KBASE_TL_NRET_ATOM_LPU, -+ __stringify(KBASE_TL_NRET_ATOM_LPU), -+ "atom is released by lpu", -+ "@pp", -+ "atom,lpu" -+ }, -+ { -+ KBASE_TL_RET_AS_CTX, -+ __stringify(KBASE_TL_RET_AS_CTX), -+ "address space is retained by context", -+ "@pp", -+ "address_space,ctx" -+ }, -+ { -+ KBASE_TL_NRET_AS_CTX, -+ __stringify(KBASE_TL_NRET_AS_CTX), -+ "address space is released by context", -+ "@pp", -+ "address_space,ctx" -+ }, -+ { -+ KBASE_TL_RET_ATOM_AS, -+ __stringify(KBASE_TL_RET_ATOM_AS), -+ "atom is retained by address space", -+ "@pp", -+ "atom,address_space" -+ }, -+ { -+ KBASE_TL_NRET_ATOM_AS, -+ __stringify(KBASE_TL_NRET_ATOM_AS), -+ "atom is released by address space", -+ "@pp", -+ "atom,address_space" -+ }, -+ { -+ KBASE_TL_DEP_ATOM_ATOM, -+ __stringify(KBASE_TL_DEP_ATOM_ATOM), -+ "atom2 depends on atom1", -+ "@pp", -+ "atom1,atom2" -+ }, -+ { -+ KBASE_TL_NDEP_ATOM_ATOM, -+ __stringify(KBASE_TL_NDEP_ATOM_ATOM), -+ "atom2 no longer depends on atom1", -+ "@pp", -+ "atom1,atom2" -+ }, -+ { -+ KBASE_TL_RDEP_ATOM_ATOM, -+ __stringify(KBASE_TL_RDEP_ATOM_ATOM), -+ "resolved dependecy of atom2 depending on atom1", -+ "@pp", -+ "atom1,atom2" -+ }, -+ { -+ KBASE_TL_ATTRIB_ATOM_CONFIG, -+ __stringify(KBASE_TL_ATTRIB_ATOM_CONFIG), -+ "atom job slot attributes", -+ "@pLLI", -+ "atom,descriptor,affinity,config" -+ }, -+ { -+ KBASE_TL_ATTRIB_ATOM_PRIORITY, -+ __stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY), -+ "atom priority", -+ "@pI", -+ "atom,prio" -+ }, -+ { -+ KBASE_TL_ATTRIB_ATOM_STATE, -+ __stringify(KBASE_TL_ATTRIB_ATOM_STATE), -+ "atom state", -+ "@pI", -+ "atom,state" -+ }, -+ { -+ KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE, -+ __stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE), -+ "atom caused priority change", -+ "@p", -+ "atom" -+ }, -+ { -+ KBASE_TL_ATTRIB_ATOM_JIT, -+ __stringify(KBASE_TL_ATTRIB_ATOM_JIT), -+ "jit done for atom", -+ "@pLL", -+ "atom,edit_addr,new_addr" -+ }, -+ { -+ KBASE_TL_ATTRIB_AS_CONFIG, -+ __stringify(KBASE_TL_ATTRIB_AS_CONFIG), -+ "address space attributes", -+ "@pLLL", -+ "address_space,transtab,memattr,transcfg" -+ }, -+ { -+ KBASE_TL_EVENT_LPU_SOFTSTOP, -+ __stringify(KBASE_TL_EVENT_LPU_SOFTSTOP), -+ "softstop event on given lpu", -+ "@p", -+ "lpu" -+ }, -+ { -+ KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, -+ __stringify(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX), -+ "atom softstopped", -+ "@p", -+ "atom" -+ }, -+ { -+ KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, -+ __stringify(KBASE_TL_EVENT_SOFTSTOP_ISSUE), -+ "atom softstop issued", -+ "@p", -+ "atom" -+ }, -+ { -+ KBASE_JD_GPU_SOFT_RESET, -+ __stringify(KBASE_JD_GPU_SOFT_RESET), -+ "gpu soft reset", -+ "@p", -+ "gpu" -+ }, -+}; ++ KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev); + -+/* Descriptors of timeline messages transmitted in auxiliary events stream. */ -+static const struct tp_desc tp_desc_aux[] = { -+ { -+ KBASE_AUX_PM_STATE, -+ __stringify(KBASE_AUX_PM_STATE), -+ "PM state", -+ "@IL", -+ "core_type,core_state_bitset" -+ }, -+ { -+ KBASE_AUX_PAGEFAULT, -+ __stringify(KBASE_AUX_PAGEFAULT), -+ "Page fault", -+ "@IL", -+ "ctx_nr,page_cnt_change" -+ }, -+ { -+ KBASE_AUX_PAGESALLOC, -+ __stringify(KBASE_AUX_PAGESALLOC), -+ "Total alloc pages change", -+ "@IL", -+ "ctx_nr,page_cnt" -+ }, -+ { -+ KBASE_AUX_DEVFREQ_TARGET, -+ __stringify(KBASE_AUX_DEVFREQ_TARGET), -+ "New device frequency target", -+ "@L", -+ "target_freq" -+ }, -+ { -+ KBASE_AUX_PROTECTED_ENTER_START, -+ __stringify(KBASE_AUX_PROTECTED_ENTER_START), -+ "enter protected mode start", -+ "@p", -+ "gpu" -+ }, -+ { -+ KBASE_AUX_PROTECTED_ENTER_END, -+ __stringify(KBASE_AUX_PROTECTED_ENTER_END), -+ "enter protected mode end", -+ "@p", -+ "gpu" -+ }, -+ { -+ KBASE_AUX_PROTECTED_LEAVE_START, -+ __stringify(KBASE_AUX_PROTECTED_LEAVE_START), -+ "leave protected mode start", -+ "@p", -+ "gpu" -+ }, ++ if (unlikely(faulting_as->protected_mode)) + { -+ KBASE_AUX_PROTECTED_LEAVE_END, -+ __stringify(KBASE_AUX_PROTECTED_LEAVE_END), -+ "leave protected mode end", -+ "@p", -+ "gpu" ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Protected mode fault"); ++ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, ++ KBASE_MMU_FAULT_TYPE_PAGE); ++ ++ goto fault_done; + } -+}; + -+#if MALI_UNIT_TEST -+/* Number of bytes read by user. */ -+static atomic_t tlstream_bytes_collected = {0}; ++ fault_status = faulting_as->fault_status; ++ switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) { + -+/* Number of bytes generated by tracepoint messages. */ -+static atomic_t tlstream_bytes_generated = {0}; -+#endif /* MALI_UNIT_TEST */ ++ case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT: ++ /* need to check against the region to handle this one */ ++ break; + -+/*****************************************************************************/ ++ case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT: ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Permission failure"); ++ goto fault_done; + -+/* Indicator of whether the timeline stream file descriptor is used. */ -+atomic_t kbase_tlstream_enabled = {0}; ++ case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT: ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Translation table bus fault"); ++ goto fault_done; + -+/*****************************************************************************/ ++ case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG: ++ /* nothing to do, but we don't expect this fault currently */ ++ dev_warn(kbdev->dev, "Access flag unexpectedly set"); ++ goto fault_done; + -+/** -+ * kbasep_tlstream_get_timestamp - return timestamp -+ * -+ * Function returns timestamp value based on raw monotonic timer. Value will -+ * wrap around zero in case of overflow. -+ * Return: timestamp value -+ */ -+static u64 kbasep_tlstream_get_timestamp(void) -+{ -+ struct timespec64 ts; -+ u64 timestamp; ++ case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT: ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Address size fault"); ++ else ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Unknown fault code"); ++ goto fault_done; + -+ ktime_get_raw_ts64(&ts); -+ timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec; -+ return timestamp; -+} ++ case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT: ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Memory attributes fault"); ++ else ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Unknown fault code"); ++ goto fault_done; + -+/** -+ * kbasep_tlstream_write_bytes - write data to message buffer -+ * @buffer: buffer where data will be written -+ * @pos: position in the buffer where to place data -+ * @bytes: pointer to buffer holding data -+ * @len: length of data to be written -+ * -+ * Return: updated position in the buffer -+ */ -+static size_t kbasep_tlstream_write_bytes( -+ char *buffer, -+ size_t pos, -+ const void *bytes, -+ size_t len) -+{ -+ KBASE_DEBUG_ASSERT(buffer); -+ KBASE_DEBUG_ASSERT(bytes); ++ default: ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Unknown fault code"); ++ goto fault_done; ++ } + -+ memcpy(&buffer[pos], bytes, len); ++ /* so we have a translation fault, let's see if it is for growable ++ * memory */ ++ kbase_gpu_vm_lock(kctx); + -+ return pos + len; -+} ++ region = kbase_region_tracker_find_region_enclosing_address(kctx, ++ faulting_as->fault_addr); ++ if (!region || region->flags & KBASE_REG_FREE) { ++ kbase_gpu_vm_unlock(kctx); ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Memory is not mapped on the GPU"); ++ goto fault_done; ++ } + -+/** -+ * kbasep_tlstream_write_string - write string to message buffer -+ * @buffer: buffer where data will be written -+ * @pos: position in the buffer where to place data -+ * @string: pointer to buffer holding the source string -+ * @max_write_size: number of bytes that can be stored in buffer -+ * -+ * Return: updated position in the buffer -+ */ -+static size_t kbasep_tlstream_write_string( -+ char *buffer, -+ size_t pos, -+ const char *string, -+ size_t max_write_size) -+{ -+ u32 string_len; ++ if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { ++ kbase_gpu_vm_unlock(kctx); ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "DMA-BUF is not mapped on the GPU"); ++ goto fault_done; ++ } + -+ KBASE_DEBUG_ASSERT(buffer); -+ KBASE_DEBUG_ASSERT(string); -+ /* Timeline string consists of at least string length and nul -+ * terminator. */ -+ KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char)); -+ max_write_size -= sizeof(string_len); ++ if ((region->flags & GROWABLE_FLAGS_REQUIRED) ++ != GROWABLE_FLAGS_REQUIRED) { ++ kbase_gpu_vm_unlock(kctx); ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Memory is not growable"); ++ goto fault_done; ++ } + -+ string_len = strlcpy( -+ &buffer[pos + sizeof(string_len)], -+ string, -+ max_write_size); -+ string_len += sizeof(char); ++ if ((region->flags & KBASE_REG_DONT_NEED)) { ++ kbase_gpu_vm_unlock(kctx); ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Don't need memory can't be grown"); ++ goto fault_done; ++ } + -+ /* Make sure that the source string fit into the buffer. */ -+ KBASE_DEBUG_ASSERT(string_len <= max_write_size); ++ /* find the size we need to grow it by */ ++ /* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address ++ * validating the fault_adress to be within a size_t from the start_pfn */ ++ fault_rel_pfn = fault_pfn - region->start_pfn; + -+ /* Update string length. */ -+ memcpy(&buffer[pos], &string_len, sizeof(string_len)); ++ if (fault_rel_pfn < kbase_reg_current_backed_size(region)) { ++ dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring", ++ faulting_as->fault_addr, region->start_pfn, ++ region->start_pfn + ++ kbase_reg_current_backed_size(region)); + -+ return pos + sizeof(string_len) + string_len; -+} ++ mutex_lock(&kbdev->mmu_hw_mutex); + -+/** -+ * kbasep_tlstream_write_timestamp - write timestamp to message buffer -+ * @buffer: buffer where data will be written -+ * @pos: position in the buffer where to place data -+ * -+ * Return: updated position in the buffer -+ */ -+static size_t kbasep_tlstream_write_timestamp(void *buffer, size_t pos) -+{ -+ u64 timestamp = kbasep_tlstream_get_timestamp(); ++ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, ++ KBASE_MMU_FAULT_TYPE_PAGE); ++ /* [1] in case another page fault occurred while we were ++ * handling the (duplicate) page fault we need to ensure we ++ * don't loose the other page fault as result of us clearing ++ * the MMU IRQ. Therefore, after we clear the MMU IRQ we send ++ * an UNLOCK command that will retry any stalled memory ++ * transaction (which should cause the other page fault to be ++ * raised again). ++ */ ++ kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0, ++ AS_COMMAND_UNLOCK, 1); + -+ return kbasep_tlstream_write_bytes( -+ buffer, pos, -+ ×tamp, sizeof(timestamp)); -+} ++ mutex_unlock(&kbdev->mmu_hw_mutex); + -+/** -+ * kbasep_tlstream_put_bits - put bits in a word -+ * @word: pointer to the words being modified -+ * @value: value that shall be written to given position -+ * @bitpos: position where value shall be written (in bits) -+ * @bitlen: length of value (in bits) -+ */ -+static void kbasep_tlstream_put_bits( -+ u32 *word, -+ u32 value, -+ unsigned int bitpos, -+ unsigned int bitlen) -+{ -+ const u32 mask = ((1 << bitlen) - 1) << bitpos; ++ kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, ++ KBASE_MMU_FAULT_TYPE_PAGE); ++ kbase_gpu_vm_unlock(kctx); + -+ KBASE_DEBUG_ASSERT(word); -+ KBASE_DEBUG_ASSERT((0 != bitlen) && (32 >= bitlen)); -+ KBASE_DEBUG_ASSERT((bitpos + bitlen) <= 32); ++ goto fault_done; ++ } + -+ *word &= ~mask; -+ *word |= ((value << bitpos) & mask); -+} ++ new_pages = make_multiple(fault_rel_pfn - ++ kbase_reg_current_backed_size(region) + 1, ++ region->extent); + -+/** -+ * kbasep_tlstream_packet_header_setup - setup the packet header -+ * @buffer: pointer to the buffer -+ * @pkt_family: packet's family -+ * @pkt_type: packet's type -+ * @pkt_class: packet's class -+ * @stream_id: stream id -+ * @numbered: non-zero if this stream is numbered -+ * -+ * Function sets up immutable part of packet header in the given buffer. -+ */ -+static void kbasep_tlstream_packet_header_setup( -+ char *buffer, -+ enum tl_packet_family pkt_family, -+ enum tl_packet_class pkt_class, -+ enum tl_packet_type pkt_type, -+ unsigned int stream_id, -+ int numbered) -+{ -+ u32 word0 = 0; -+ u32 word1 = 0; ++ /* cap to max vsize */ ++ if (new_pages + kbase_reg_current_backed_size(region) > ++ region->nr_pages) ++ new_pages = region->nr_pages - ++ kbase_reg_current_backed_size(region); + -+ KBASE_DEBUG_ASSERT(buffer); -+ KBASE_DEBUG_ASSERT(pkt_family == TL_PACKET_FAMILY_TL); -+ KBASE_DEBUG_ASSERT( -+ (pkt_type == TL_PACKET_TYPE_HEADER) || -+ (pkt_type == TL_PACKET_TYPE_SUMMARY) || -+ (pkt_type == TL_PACKET_TYPE_BODY)); -+ KBASE_DEBUG_ASSERT( -+ (pkt_class == TL_PACKET_CLASS_OBJ) || -+ (pkt_class == TL_PACKET_CLASS_AUX)); ++ if (0 == new_pages) { ++ mutex_lock(&kbdev->mmu_hw_mutex); + -+ kbasep_tlstream_put_bits( -+ &word0, pkt_family, -+ PACKET_FAMILY_POS, PACKET_FAMILY_LEN); -+ kbasep_tlstream_put_bits( -+ &word0, pkt_class, -+ PACKET_CLASS_POS, PACKET_CLASS_LEN); -+ kbasep_tlstream_put_bits( -+ &word0, pkt_type, -+ PACKET_TYPE_POS, PACKET_TYPE_LEN); -+ kbasep_tlstream_put_bits( -+ &word0, stream_id, -+ PACKET_STREAMID_POS, PACKET_STREAMID_LEN); ++ /* Duplicate of a fault we've already handled, nothing to do */ ++ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, ++ KBASE_MMU_FAULT_TYPE_PAGE); ++ /* See comment [1] about UNLOCK usage */ ++ kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0, ++ AS_COMMAND_UNLOCK, 1); + -+ if (numbered) -+ kbasep_tlstream_put_bits( -+ &word1, 1, -+ PACKET_SEQBIT_POS, PACKET_SEQBIT_LEN); ++ mutex_unlock(&kbdev->mmu_hw_mutex); + -+ memcpy(&buffer[0], &word0, sizeof(word0)); -+ memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1)); -+} ++ kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, ++ KBASE_MMU_FAULT_TYPE_PAGE); ++ kbase_gpu_vm_unlock(kctx); ++ goto fault_done; ++ } + -+/** -+ * kbasep_tlstream_packet_header_update - update the packet header -+ * @buffer: pointer to the buffer -+ * @data_size: amount of data carried in this packet -+ * -+ * Function updates mutable part of packet header in the given buffer. -+ * Note that value of data_size must not including size of the header. -+ */ -+static void kbasep_tlstream_packet_header_update( -+ char *buffer, -+ size_t data_size) -+{ -+ u32 word0; -+ u32 word1; ++ if (kbase_alloc_phy_pages_helper(region->gpu_alloc, new_pages) == 0) { ++ if (region->gpu_alloc != region->cpu_alloc) { ++ if (kbase_alloc_phy_pages_helper( ++ region->cpu_alloc, new_pages) == 0) { ++ grown = true; ++ } else { ++ kbase_free_phy_pages_helper(region->gpu_alloc, ++ new_pages); ++ } ++ } else { ++ grown = true; ++ } ++ } + -+ KBASE_DEBUG_ASSERT(buffer); -+ CSTD_UNUSED(word0); + -+ memcpy(&word1, &buffer[sizeof(word0)], sizeof(word1)); ++ if (grown) { ++ u64 pfn_offset; ++ u32 op; + -+ kbasep_tlstream_put_bits( -+ &word1, data_size, -+ PACKET_LENGTH_POS, PACKET_LENGTH_LEN); ++ /* alloc success */ ++ KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages); + -+ memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1)); -+} ++ /* set up the new pages */ ++ pfn_offset = kbase_reg_current_backed_size(region) - new_pages; ++ /* ++ * Note: ++ * Issuing an MMU operation will unlock the MMU and cause the ++ * translation to be replayed. If the page insertion fails then ++ * rather then trying to continue the context should be killed ++ * so the no_flush version of insert_pages is used which allows ++ * us to unlock the MMU as we see fit. ++ */ ++ err = kbase_mmu_insert_pages_no_flush(kctx, ++ region->start_pfn + pfn_offset, ++ &kbase_get_gpu_phy_pages(region)[pfn_offset], ++ new_pages, region->flags); ++ if (err) { ++ kbase_free_phy_pages_helper(region->gpu_alloc, new_pages); ++ if (region->gpu_alloc != region->cpu_alloc) ++ kbase_free_phy_pages_helper(region->cpu_alloc, ++ new_pages); ++ kbase_gpu_vm_unlock(kctx); ++ /* The locked VA region will be unlocked and the cache invalidated in here */ ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Page table update failure"); ++ goto fault_done; ++ } ++#if defined(CONFIG_MALI_GATOR_SUPPORT) ++ kbase_trace_mali_page_fault_insert_pages(as_no, new_pages); ++#endif ++ KBASE_TLSTREAM_AUX_PAGEFAULT(kctx->id, (u64)new_pages); + -+/** -+ * kbasep_tlstream_packet_number_update - update the packet number -+ * @buffer: pointer to the buffer -+ * @counter: value of packet counter for this packet's stream -+ * -+ * Function updates packet number embedded within the packet placed in the -+ * given buffer. -+ */ -+static void kbasep_tlstream_packet_number_update(char *buffer, u32 counter) -+{ -+ KBASE_DEBUG_ASSERT(buffer); ++ /* AS transaction begin */ ++ mutex_lock(&kbdev->mmu_hw_mutex); + -+ memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter)); -+} ++ /* flush L2 and unlock the VA (resumes the MMU) */ ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367)) ++ op = AS_COMMAND_FLUSH; ++ else ++ op = AS_COMMAND_FLUSH_PT; + -+/** -+ * kbasep_timeline_stream_reset - reset stream -+ * @stream: pointer to the stream structure -+ * -+ * Function discards all pending messages and resets packet counters. -+ */ -+static void kbasep_timeline_stream_reset(struct tl_stream *stream) -+{ -+ unsigned int i; ++ /* clear MMU interrupt - this needs to be done after updating ++ * the page tables but before issuing a FLUSH command. The ++ * FLUSH cmd has a side effect that it restarts stalled memory ++ * transactions in other address spaces which may cause ++ * another fault to occur. If we didn't clear the interrupt at ++ * this stage a new IRQ might not be raised when the GPU finds ++ * a MMU IRQ is already pending. ++ */ ++ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, ++ KBASE_MMU_FAULT_TYPE_PAGE); + -+ for (i = 0; i < PACKET_COUNT; i++) { -+ if (stream->numbered) -+ atomic_set( -+ &stream->buffer[i].size, -+ PACKET_HEADER_SIZE + -+ PACKET_NUMBER_SIZE); -+ else -+ atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE); ++ kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx, ++ faulting_as->fault_addr >> PAGE_SHIFT, ++ new_pages, ++ op, 1); ++ ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ /* AS transaction end */ ++ ++ /* reenable this in the mask */ ++ kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, ++ KBASE_MMU_FAULT_TYPE_PAGE); ++ kbase_gpu_vm_unlock(kctx); ++ } else { ++ /* failed to extend, handle as a normal PF */ ++ kbase_gpu_vm_unlock(kctx); ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Page allocation failure"); + } + -+ atomic_set(&stream->wbi, 0); -+ atomic_set(&stream->rbi, 0); ++fault_done: ++ /* ++ * By this point, the fault was handled in some way, ++ * so release the ctx refcount ++ */ ++ kbasep_js_runpool_release_ctx(kbdev, kctx); ++ ++ atomic_dec(&kbdev->faults_pending); +} + -+/** -+ * kbasep_timeline_stream_init - initialize timeline stream -+ * @stream: pointer to the stream structure -+ * @stream_type: stream type -+ */ -+static void kbasep_timeline_stream_init( -+ struct tl_stream *stream, -+ enum tl_stream_type stream_type) ++phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx) +{ -+ unsigned int i; ++ u64 *page; ++ int i; ++ struct page *p; ++ int new_page_count __maybe_unused; + -+ KBASE_DEBUG_ASSERT(stream); -+ KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); ++ KBASE_DEBUG_ASSERT(NULL != kctx); ++ new_page_count = kbase_atomic_add_pages(1, &kctx->used_pages); ++ kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages); + -+ spin_lock_init(&stream->lock); ++ p = kbase_mem_pool_alloc(&kctx->mem_pool); ++ if (!p) ++ goto sub_pages; + -+ /* All packets carrying tracepoints shall be numbered. */ -+ if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type) -+ stream->numbered = 1; -+ else -+ stream->numbered = 0; ++ KBASE_TLSTREAM_AUX_PAGESALLOC( ++ (u32)kctx->id, ++ (u64)new_page_count); + -+ for (i = 0; i < PACKET_COUNT; i++) -+ kbasep_tlstream_packet_header_setup( -+ stream->buffer[i].data, -+ tl_stream_cfg[stream_type].pkt_family, -+ tl_stream_cfg[stream_type].pkt_class, -+ tl_stream_cfg[stream_type].pkt_type, -+ tl_stream_cfg[stream_type].stream_id, -+ stream->numbered); ++ page = kmap(p); ++ if (NULL == page) ++ goto alloc_free; + -+ kbasep_timeline_stream_reset(tl_stream[stream_type]); -+} ++ kbase_process_page_usage_inc(kctx, 1); + -+/** -+ * kbasep_timeline_stream_term - terminate timeline stream -+ * @stream: pointer to the stream structure -+ */ -+static void kbasep_timeline_stream_term(struct tl_stream *stream) -+{ -+ KBASE_DEBUG_ASSERT(stream); ++ for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) ++ kctx->kbdev->mmu_mode->entry_invalidate(&page[i]); ++ ++ kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE); ++ ++ kunmap(p); ++ return page_to_phys(p); ++ ++alloc_free: ++ kbase_mem_pool_free(&kctx->mem_pool, p, false); ++sub_pages: ++ kbase_atomic_sub_pages(1, &kctx->used_pages); ++ kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); ++ ++ return 0; +} + -+/** -+ * kbasep_tlstream_msgbuf_submit - submit packet to the user space -+ * @stream: pointer to the stream structure -+ * @wb_idx_raw: write buffer index -+ * @wb_size: length of data stored in current buffer -+ * -+ * Function updates currently written buffer with packet header. Then write -+ * index is incremented and buffer is handled to user space. Parameters -+ * of new buffer are returned using provided arguments. -+ * -+ * Return: length of data in new buffer -+ * -+ * Warning: User must update the stream structure with returned value. ++KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd); ++ ++/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the ++ * new table from the pool if needed and possible + */ -+static size_t kbasep_tlstream_msgbuf_submit( -+ struct tl_stream *stream, -+ unsigned int wb_idx_raw, -+ unsigned int wb_size) ++static int mmu_get_next_pgd(struct kbase_context *kctx, ++ phys_addr_t *pgd, u64 vpfn, int level) +{ -+ unsigned int rb_idx_raw = atomic_read(&stream->rbi); -+ unsigned int wb_idx = wb_idx_raw % PACKET_COUNT; ++ u64 *page; ++ phys_addr_t target_pgd; ++ struct page *p; + -+ /* Set stream as flushed. */ -+ atomic_set(&stream->autoflush_counter, -1); ++ KBASE_DEBUG_ASSERT(*pgd); ++ KBASE_DEBUG_ASSERT(NULL != kctx); + -+ kbasep_tlstream_packet_header_update( -+ stream->buffer[wb_idx].data, -+ wb_size - PACKET_HEADER_SIZE); ++ lockdep_assert_held(&kctx->mmu_lock); + -+ if (stream->numbered) -+ kbasep_tlstream_packet_number_update( -+ stream->buffer[wb_idx].data, -+ wb_idx_raw); ++ /* ++ * Architecture spec defines level-0 as being the top-most. ++ * This is a bit unfortunate here, but we keep the same convention. ++ */ ++ vpfn >>= (3 - level) * 9; ++ vpfn &= 0x1FF; + -+ /* Increasing write buffer index will expose this packet to the reader. -+ * As stream->lock is not taken on reader side we must make sure memory -+ * is updated correctly before this will happen. */ -+ smp_wmb(); -+ wb_idx_raw++; -+ atomic_set(&stream->wbi, wb_idx_raw); ++ p = pfn_to_page(PFN_DOWN(*pgd)); ++ page = kmap(p); ++ if (NULL == page) { ++ dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n"); ++ return -EINVAL; ++ } + -+ /* Inform user that packets are ready for reading. */ -+ wake_up_interruptible(&tl_event_queue); ++ target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]); + -+ /* Detect and mark overflow in this stream. */ -+ if (PACKET_COUNT == wb_idx_raw - rb_idx_raw) { -+ /* Reader side depends on this increment to correctly handle -+ * overflows. The value shall be updated only if it was not -+ * modified by the reader. The data holding buffer will not be -+ * updated before stream->lock is released, however size of the -+ * buffer will. Make sure this increment is globally visible -+ * before information about selected write buffer size. */ -+ atomic_cmpxchg(&stream->rbi, rb_idx_raw, rb_idx_raw + 1); ++ if (!target_pgd) { ++ target_pgd = kbase_mmu_alloc_pgd(kctx); ++ if (!target_pgd) { ++ dev_dbg(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n"); ++ kunmap(p); ++ return -ENOMEM; ++ } ++ ++ kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd); ++ ++ kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE); ++ /* Rely on the caller to update the address space flags. */ + } + -+ wb_size = PACKET_HEADER_SIZE; -+ if (stream->numbered) -+ wb_size += PACKET_NUMBER_SIZE; ++ kunmap(p); ++ *pgd = target_pgd; + -+ return wb_size; ++ return 0; +} + -+/** -+ * kbasep_tlstream_msgbuf_acquire - lock selected stream and reserves buffer -+ * @stream_type: type of the stream that shall be locked -+ * @msg_size: message size -+ * @flags: pointer to store flags passed back on stream release -+ * -+ * Function will lock the stream and reserve the number of bytes requested -+ * in msg_size for the user. -+ * -+ * Return: pointer to the buffer where message can be stored -+ * -+ * Warning: Stream must be released with kbasep_tlstream_msgbuf_release(). -+ * Only atomic operations are allowed while stream is locked -+ * (i.e. do not use any operation that may sleep). -+ */ -+static char *kbasep_tlstream_msgbuf_acquire( -+ enum tl_stream_type stream_type, -+ size_t msg_size, -+ unsigned long *flags) __acquires(&stream->lock) ++static int mmu_get_bottom_pgd(struct kbase_context *kctx, ++ u64 vpfn, phys_addr_t *out_pgd) +{ -+ struct tl_stream *stream; -+ unsigned int wb_idx_raw; -+ unsigned int wb_idx; -+ size_t wb_size; ++ phys_addr_t pgd; ++ int l; + -+ KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); -+ KBASE_DEBUG_ASSERT( -+ PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >= -+ msg_size); ++ lockdep_assert_held(&kctx->mmu_lock); + -+ stream = tl_stream[stream_type]; ++ pgd = kctx->pgd; ++ for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) { ++ int err = mmu_get_next_pgd(kctx, &pgd, vpfn, l); ++ /* Handle failure condition */ ++ if (err) { ++ dev_dbg(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n"); ++ return err; ++ } ++ } + -+ spin_lock_irqsave(&stream->lock, *flags); ++ *out_pgd = pgd; + -+ wb_idx_raw = atomic_read(&stream->wbi); -+ wb_idx = wb_idx_raw % PACKET_COUNT; -+ wb_size = atomic_read(&stream->buffer[wb_idx].size); ++ return 0; ++} + -+ /* Select next buffer if data will not fit into current one. */ -+ if (PACKET_SIZE < wb_size + msg_size) { -+ wb_size = kbasep_tlstream_msgbuf_submit( -+ stream, wb_idx_raw, wb_size); -+ wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; -+ } ++static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level) ++{ ++ u64 *page; ++ phys_addr_t target_pgd; + -+ /* Reserve space in selected buffer. */ -+ atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size); ++ KBASE_DEBUG_ASSERT(pgd); ++ KBASE_DEBUG_ASSERT(NULL != kctx); + -+#if MALI_UNIT_TEST -+ atomic_add(msg_size, &tlstream_bytes_generated); -+#endif /* MALI_UNIT_TEST */ ++ lockdep_assert_held(&kctx->mmu_lock); ++ lockdep_assert_held(&kctx->reg_lock); + -+ return &stream->buffer[wb_idx].data[wb_size]; ++ /* ++ * Architecture spec defines level-0 as being the top-most. ++ * This is a bit unfortunate here, but we keep the same convention. ++ */ ++ vpfn >>= (3 - level) * 9; ++ vpfn &= 0x1FF; ++ ++ page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); ++ /* kmap_atomic should NEVER fail */ ++ KBASE_DEBUG_ASSERT(NULL != page); ++ ++ target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]); ++ /* As we are recovering from what has already been set up, we should have a target_pgd */ ++ KBASE_DEBUG_ASSERT(0 != target_pgd); ++ kunmap_atomic(page); ++ return target_pgd; +} + -+/** -+ * kbasep_tlstream_msgbuf_release - unlock selected stream -+ * @stream_type: type of the stream that shall be locked -+ * @flags: value obtained during stream acquire -+ * -+ * Function releases stream that has been previously locked with a call to -+ * kbasep_tlstream_msgbuf_acquire(). -+ */ -+static void kbasep_tlstream_msgbuf_release( -+ enum tl_stream_type stream_type, -+ unsigned long flags) __releases(&stream->lock) ++static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn) +{ -+ struct tl_stream *stream; ++ phys_addr_t pgd; ++ int l; + -+ KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); ++ lockdep_assert_held(&kctx->mmu_lock); + -+ stream = tl_stream[stream_type]; ++ pgd = kctx->pgd; + -+ /* Mark stream as containing unflushed data. */ -+ atomic_set(&stream->autoflush_counter, 0); ++ for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) { ++ pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l); ++ /* Should never fail */ ++ KBASE_DEBUG_ASSERT(0 != pgd); ++ } + -+ spin_unlock_irqrestore(&stream->lock, flags); ++ return pgd; +} + -+/*****************************************************************************/ -+ -+/** -+ * kbasep_tlstream_flush_stream - flush stream -+ * @stype: type of stream to be flushed -+ * -+ * Flush pending data in timeline stream. -+ */ -+static void kbasep_tlstream_flush_stream(enum tl_stream_type stype) ++static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vpfn, ++ size_t nr) +{ -+ struct tl_stream *stream = tl_stream[stype]; -+ unsigned long flags; -+ unsigned int wb_idx_raw; -+ unsigned int wb_idx; -+ size_t wb_size; -+ size_t min_size = PACKET_HEADER_SIZE; ++ phys_addr_t pgd; ++ u64 *pgd_page; ++ struct kbase_mmu_mode const *mmu_mode; + -+ if (stream->numbered) -+ min_size += PACKET_NUMBER_SIZE; ++ KBASE_DEBUG_ASSERT(NULL != kctx); ++ KBASE_DEBUG_ASSERT(0 != vpfn); ++ /* 64-bit address range is the max */ ++ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + -+ spin_lock_irqsave(&stream->lock, flags); ++ lockdep_assert_held(&kctx->mmu_lock); ++ lockdep_assert_held(&kctx->reg_lock); + -+ wb_idx_raw = atomic_read(&stream->wbi); -+ wb_idx = wb_idx_raw % PACKET_COUNT; -+ wb_size = atomic_read(&stream->buffer[wb_idx].size); ++ mmu_mode = kctx->kbdev->mmu_mode; + -+ if (wb_size > min_size) { -+ wb_size = kbasep_tlstream_msgbuf_submit( -+ stream, wb_idx_raw, wb_size); -+ wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; -+ atomic_set(&stream->buffer[wb_idx].size, wb_size); ++ while (nr) { ++ unsigned int i; ++ unsigned int index = vpfn & 0x1FF; ++ unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; ++ struct page *p; ++ ++ if (count > nr) ++ count = nr; ++ ++ pgd = mmu_insert_pages_recover_get_bottom_pgd(kctx, vpfn); ++ KBASE_DEBUG_ASSERT(0 != pgd); ++ ++ p = pfn_to_page(PFN_DOWN(pgd)); ++ ++ pgd_page = kmap_atomic(p); ++ KBASE_DEBUG_ASSERT(NULL != pgd_page); ++ ++ /* Invalidate the entries we added */ ++ for (i = 0; i < count; i++) ++ mmu_mode->entry_invalidate(&pgd_page[index + i]); ++ ++ vpfn += count; ++ nr -= count; ++ ++ kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE); ++ ++ kunmap_atomic(pgd_page); + } -+ spin_unlock_irqrestore(&stream->lock, flags); +} + -+/** -+ * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback -+ * @data: unused -+ * -+ * Timer is executed periodically to check if any of the stream contains -+ * buffer ready to be submitted to user space. ++/* ++ * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn' + */ -+static void kbasep_tlstream_autoflush_timer_callback(struct timer_list *t) ++int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, ++ phys_addr_t phys, size_t nr, ++ unsigned long flags) +{ -+ enum tl_stream_type stype; -+ int rcode; ++ phys_addr_t pgd; ++ u64 *pgd_page; ++ /* In case the insert_single_page only partially completes we need to be ++ * able to recover */ ++ bool recover_required = false; ++ u64 recover_vpfn = vpfn; ++ size_t recover_count = 0; ++ size_t remain = nr; ++ int err; + -+ CSTD_UNUSED(t); ++ KBASE_DEBUG_ASSERT(NULL != kctx); ++ KBASE_DEBUG_ASSERT(0 != vpfn); ++ /* 64-bit address range is the max */ ++ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + -+ for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) { -+ struct tl_stream *stream = tl_stream[stype]; -+ unsigned long flags; -+ unsigned int wb_idx_raw; -+ unsigned int wb_idx; -+ size_t wb_size; -+ size_t min_size = PACKET_HEADER_SIZE; ++ /* Early out if there is nothing to do */ ++ if (nr == 0) ++ return 0; + -+ int af_cnt = atomic_read(&stream->autoflush_counter); ++ mutex_lock(&kctx->mmu_lock); + -+ /* Check if stream contain unflushed data. */ -+ if (0 > af_cnt) -+ continue; ++ while (remain) { ++ unsigned int i; ++ unsigned int index = vpfn & 0x1FF; ++ unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; ++ struct page *p; + -+ /* Check if stream should be flushed now. */ -+ if (af_cnt != atomic_cmpxchg( -+ &stream->autoflush_counter, -+ af_cnt, -+ af_cnt + 1)) -+ continue; -+ if (!af_cnt) -+ continue; ++ if (count > remain) ++ count = remain; + -+ /* Autoflush this stream. */ -+ if (stream->numbered) -+ min_size += PACKET_NUMBER_SIZE; ++ /* ++ * Repeatedly calling mmu_get_bottom_pte() is clearly ++ * suboptimal. We don't have to re-parse the whole tree ++ * each time (just cache the l0-l2 sequence). ++ * On the other hand, it's only a gain when we map more than ++ * 256 pages at once (on average). Do we really care? ++ */ ++ do { ++ err = mmu_get_bottom_pgd(kctx, vpfn, &pgd); ++ if (err != -ENOMEM) ++ break; ++ /* Fill the memory pool with enough pages for ++ * the page walk to succeed ++ */ ++ mutex_unlock(&kctx->mmu_lock); ++ err = kbase_mem_pool_grow(&kctx->mem_pool, ++ MIDGARD_MMU_BOTTOMLEVEL); ++ mutex_lock(&kctx->mmu_lock); ++ } while (!err); ++ if (err) { ++ dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n"); ++ if (recover_required) { ++ /* Invalidate the pages we have partially ++ * completed */ ++ mmu_insert_pages_failure_recovery(kctx, ++ recover_vpfn, ++ recover_count); ++ } ++ goto fail_unlock; ++ } + -+ spin_lock_irqsave(&stream->lock, flags); ++ p = pfn_to_page(PFN_DOWN(pgd)); ++ pgd_page = kmap(p); ++ if (!pgd_page) { ++ dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n"); ++ if (recover_required) { ++ /* Invalidate the pages we have partially ++ * completed */ ++ mmu_insert_pages_failure_recovery(kctx, ++ recover_vpfn, ++ recover_count); ++ } ++ err = -ENOMEM; ++ goto fail_unlock; ++ } + -+ wb_idx_raw = atomic_read(&stream->wbi); -+ wb_idx = wb_idx_raw % PACKET_COUNT; -+ wb_size = atomic_read(&stream->buffer[wb_idx].size); ++ for (i = 0; i < count; i++) { ++ unsigned int ofs = index + i; + -+ if (wb_size > min_size) { -+ wb_size = kbasep_tlstream_msgbuf_submit( -+ stream, wb_idx_raw, wb_size); -+ wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; -+ atomic_set(&stream->buffer[wb_idx].size, -+ wb_size); ++ KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL)); ++ kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs], ++ phys, flags); + } -+ spin_unlock_irqrestore(&stream->lock, flags); -+ } -+ -+ if (atomic_read(&autoflush_timer_active)) -+ rcode = mod_timer( -+ &autoflush_timer, -+ jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); -+ CSTD_UNUSED(rcode); -+} + -+/** -+ * kbasep_tlstream_packet_pending - check timeline streams for pending packets -+ * @stype: pointer to variable where stream type will be placed -+ * @rb_idx_raw: pointer to variable where read buffer index will be placed -+ * -+ * Function checks all streams for pending packets. It will stop as soon as -+ * packet ready to be submitted to user space is detected. Variables under -+ * pointers, passed as the parameters to this function will be updated with -+ * values pointing to right stream and buffer. -+ * -+ * Return: non-zero if any of timeline streams has at last one packet ready -+ */ -+static int kbasep_tlstream_packet_pending( -+ enum tl_stream_type *stype, -+ unsigned int *rb_idx_raw) -+{ -+ int pending = 0; ++ vpfn += count; ++ remain -= count; + -+ KBASE_DEBUG_ASSERT(stype); -+ KBASE_DEBUG_ASSERT(rb_idx_raw); ++ kbase_mmu_sync_pgd(kctx->kbdev, ++ kbase_dma_addr(p) + (index * sizeof(u64)), ++ count * sizeof(u64)); + -+ for ( -+ *stype = 0; -+ (*stype < TL_STREAM_TYPE_COUNT) && !pending; -+ (*stype)++) { -+ if (NULL != tl_stream[*stype]) { -+ *rb_idx_raw = atomic_read(&tl_stream[*stype]->rbi); -+ /* Read buffer index may be updated by writer in case of -+ * overflow. Read and write buffer indexes must be -+ * loaded in correct order. */ -+ smp_rmb(); -+ if (atomic_read(&tl_stream[*stype]->wbi) != *rb_idx_raw) -+ pending = 1; -+ } ++ kunmap(p); ++ /* We have started modifying the page table. ++ * If further pages need inserting and fail we need to undo what ++ * has already taken place */ ++ recover_required = true; ++ recover_count += count; + } -+ (*stype)--; ++ mutex_unlock(&kctx->mmu_lock); ++ kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); ++ return 0; + -+ return pending; ++fail_unlock: ++ mutex_unlock(&kctx->mmu_lock); ++ kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); ++ return err; +} + -+/** -+ * kbasep_tlstream_read - copy data from streams to buffer provided by user -+ * @filp: pointer to file structure (unused) -+ * @buffer: pointer to the buffer provided by user -+ * @size: maximum amount of data that can be stored in the buffer -+ * @f_pos: pointer to file offset (unused) -+ * -+ * Return: number of bytes stored in the buffer -+ */ -+static ssize_t kbasep_tlstream_read( -+ struct file *filp, -+ char __user *buffer, -+ size_t size, -+ loff_t *f_pos) ++int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, ++ phys_addr_t *phys, size_t nr, ++ unsigned long flags) +{ -+ ssize_t copy_len = 0; ++ phys_addr_t pgd; ++ u64 *pgd_page; ++ /* In case the insert_pages only partially completes we need to be able ++ * to recover */ ++ bool recover_required = false; ++ u64 recover_vpfn = vpfn; ++ size_t recover_count = 0; ++ size_t remain = nr; ++ int err; + -+ KBASE_DEBUG_ASSERT(filp); -+ KBASE_DEBUG_ASSERT(f_pos); ++ KBASE_DEBUG_ASSERT(NULL != kctx); ++ KBASE_DEBUG_ASSERT(0 != vpfn); ++ /* 64-bit address range is the max */ ++ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + -+ if (!buffer) -+ return -EINVAL; ++ /* Early out if there is nothing to do */ ++ if (nr == 0) ++ return 0; + -+ if ((0 > *f_pos) || (PACKET_SIZE > size)) -+ return -EINVAL; ++ mutex_lock(&kctx->mmu_lock); + -+ mutex_lock(&tl_reader_lock); ++ while (remain) { ++ unsigned int i; ++ unsigned int index = vpfn & 0x1FF; ++ unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; ++ struct page *p; + -+ while (copy_len < size) { -+ enum tl_stream_type stype; -+ unsigned int rb_idx_raw = 0; -+ unsigned int rb_idx; -+ size_t rb_size; ++ if (count > remain) ++ count = remain; + -+ /* If we don't have any data yet, wait for packet to be -+ * submitted. If we already read some packets and there is no -+ * packet pending return back to user. */ -+ if (0 < copy_len) { -+ if (!kbasep_tlstream_packet_pending( -+ &stype, -+ &rb_idx_raw)) -+ break; -+ } else { -+ if (wait_event_interruptible( -+ tl_event_queue, -+ kbasep_tlstream_packet_pending( -+ &stype, -+ &rb_idx_raw))) { -+ copy_len = -ERESTARTSYS; ++ /* ++ * Repeatedly calling mmu_get_bottom_pte() is clearly ++ * suboptimal. We don't have to re-parse the whole tree ++ * each time (just cache the l0-l2 sequence). ++ * On the other hand, it's only a gain when we map more than ++ * 256 pages at once (on average). Do we really care? ++ */ ++ do { ++ err = mmu_get_bottom_pgd(kctx, vpfn, &pgd); ++ if (err != -ENOMEM) + break; ++ /* Fill the memory pool with enough pages for ++ * the page walk to succeed ++ */ ++ mutex_unlock(&kctx->mmu_lock); ++ err = kbase_mem_pool_grow(&kctx->mem_pool, ++ MIDGARD_MMU_BOTTOMLEVEL); ++ mutex_lock(&kctx->mmu_lock); ++ } while (!err); ++ if (err) { ++ dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n"); ++ if (recover_required) { ++ /* Invalidate the pages we have partially ++ * completed */ ++ mmu_insert_pages_failure_recovery(kctx, ++ recover_vpfn, ++ recover_count); + } ++ goto fail_unlock; + } + -+ /* Check if this packet fits into the user buffer. -+ * If so copy its content. */ -+ rb_idx = rb_idx_raw % PACKET_COUNT; -+ rb_size = atomic_read(&tl_stream[stype]->buffer[rb_idx].size); -+ if (rb_size > size - copy_len) -+ break; -+ if (copy_to_user( -+ &buffer[copy_len], -+ tl_stream[stype]->buffer[rb_idx].data, -+ rb_size)) { -+ copy_len = -EFAULT; -+ break; ++ p = pfn_to_page(PFN_DOWN(pgd)); ++ pgd_page = kmap(p); ++ if (!pgd_page) { ++ dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n"); ++ if (recover_required) { ++ /* Invalidate the pages we have partially ++ * completed */ ++ mmu_insert_pages_failure_recovery(kctx, ++ recover_vpfn, ++ recover_count); ++ } ++ err = -ENOMEM; ++ goto fail_unlock; + } + -+ /* If the rbi still points to the packet we just processed -+ * then there was no overflow so we add the copied size to -+ * copy_len and move rbi on to the next packet -+ */ -+ smp_rmb(); -+ if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) { -+ copy_len += rb_size; -+ atomic_inc(&tl_stream[stype]->rbi); ++ for (i = 0; i < count; i++) { ++ unsigned int ofs = index + i; + -+#if MALI_UNIT_TEST -+ atomic_add(rb_size, &tlstream_bytes_collected); -+#endif /* MALI_UNIT_TEST */ ++ KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL)); ++ kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs], ++ phys[i], flags); + } ++ ++ phys += count; ++ vpfn += count; ++ remain -= count; ++ ++ kbase_mmu_sync_pgd(kctx->kbdev, ++ kbase_dma_addr(p) + (index * sizeof(u64)), ++ count * sizeof(u64)); ++ ++ kunmap(p); ++ /* We have started modifying the page table. If further pages ++ * need inserting and fail we need to undo what has already ++ * taken place */ ++ recover_required = true; ++ recover_count += count; + } + -+ mutex_unlock(&tl_reader_lock); ++ mutex_unlock(&kctx->mmu_lock); ++ return 0; + -+ return copy_len; ++fail_unlock: ++ mutex_unlock(&kctx->mmu_lock); ++ return err; +} + -+/** -+ * kbasep_tlstream_poll - poll timeline stream for packets -+ * @filp: pointer to file structure -+ * @wait: pointer to poll table -+ * Return: POLLIN if data can be read without blocking, otherwise zero ++/* ++ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' + */ -+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait) ++int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, ++ phys_addr_t *phys, size_t nr, ++ unsigned long flags) +{ -+ enum tl_stream_type stream_type; -+ unsigned int rb_idx; -+ -+ KBASE_DEBUG_ASSERT(filp); -+ KBASE_DEBUG_ASSERT(wait); ++ int err; + -+ poll_wait(filp, &tl_event_queue, wait); -+ if (kbasep_tlstream_packet_pending(&stream_type, &rb_idx)) -+ return POLLIN; -+ return 0; ++ err = kbase_mmu_insert_pages_no_flush(kctx, vpfn, phys, nr, flags); ++ kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); ++ return err; +} + ++KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); ++ +/** -+ * kbasep_tlstream_release - release timeline stream descriptor -+ * @inode: pointer to inode structure -+ * @filp: pointer to file structure ++ * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches ++ * without retaining the kbase context. ++ * @kctx: The KBase context. ++ * @vpfn: The virtual page frame number to start the flush on. ++ * @nr: The number of pages to flush. ++ * @sync: Set if the operation should be synchronous or not. + * -+ * Return always return zero ++ * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any ++ * other locking. + */ -+static int kbasep_tlstream_release(struct inode *inode, struct file *filp) ++static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, ++ u64 vpfn, size_t nr, bool sync) +{ -+ KBASE_DEBUG_ASSERT(inode); -+ KBASE_DEBUG_ASSERT(filp); -+ CSTD_UNUSED(inode); -+ CSTD_UNUSED(filp); ++ struct kbase_device *kbdev = kctx->kbdev; ++ int err; ++ u32 op; + -+ /* Stop autoflush timer before releasing access to streams. */ -+ atomic_set(&autoflush_timer_active, 0); -+ del_timer_sync(&autoflush_timer); ++ /* Early out if there is nothing to do */ ++ if (nr == 0) ++ return; + -+ atomic_set(&kbase_tlstream_enabled, 0); -+ return 0; ++ if (sync) ++ op = AS_COMMAND_FLUSH_MEM; ++ else ++ op = AS_COMMAND_FLUSH_PT; ++ ++ err = kbase_mmu_hw_do_operation(kbdev, ++ &kbdev->as[kctx->as_nr], ++ kctx, vpfn, nr, op, 0); ++#if KBASE_GPU_RESET_EN ++ if (err) { ++ /* Flush failed to complete, assume the ++ * GPU has hung and perform a reset to ++ * recover */ ++ dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); ++ ++ if (kbase_prepare_to_reset_gpu_locked(kbdev)) ++ kbase_reset_gpu_locked(kbdev); ++ } ++#endif /* KBASE_GPU_RESET_EN */ ++ ++#ifndef CONFIG_MALI_NO_MALI ++ /* ++ * As this function could be called in interrupt context the sync ++ * request can't block. Instead log the request and the next flush ++ * request will pick it up. ++ */ ++ if ((!err) && sync && ++ kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) ++ atomic_set(&kctx->drain_pending, 1); ++#endif /* !CONFIG_MALI_NO_MALI */ +} + -+/** -+ * kbasep_tlstream_timeline_header - prepare timeline header stream packet -+ * @stream_type: type of the stream that will carry header data -+ * @tp_desc: pointer to array with tracepoint descriptors -+ * @tp_count: number of descriptors in the given array -+ * -+ * Functions fills in information about tracepoints stored in body stream -+ * associated with this header stream. -+ */ -+static void kbasep_tlstream_timeline_header( -+ enum tl_stream_type stream_type, -+ const struct tp_desc *tp_desc, -+ u32 tp_count) ++static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, ++ u64 vpfn, size_t nr, bool sync) +{ -+ const u8 tv = SWTRACE_VERSION; /* protocol version */ -+ const u8 ps = sizeof(void *); /* pointer size */ -+ size_t msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count); -+ char *buffer; -+ size_t pos = 0; -+ unsigned long flags; -+ unsigned int i; ++ struct kbase_device *kbdev; ++ bool ctx_is_in_runpool; ++#ifndef CONFIG_MALI_NO_MALI ++ bool drain_pending = false; + -+ KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); -+ KBASE_DEBUG_ASSERT(tp_desc); ++ if (atomic_xchg(&kctx->drain_pending, 0)) ++ drain_pending = true; ++#endif /* !CONFIG_MALI_NO_MALI */ + -+ /* Calculate the size of the timeline message. */ -+ for (i = 0; i < tp_count; i++) { -+ msg_size += sizeof(tp_desc[i].id); -+ msg_size += -+ strnlen(tp_desc[i].id_str, STRLEN_MAX) + -+ sizeof(char) + sizeof(u32); -+ msg_size += -+ strnlen(tp_desc[i].name, STRLEN_MAX) + -+ sizeof(char) + sizeof(u32); -+ msg_size += -+ strnlen(tp_desc[i].arg_types, STRLEN_MAX) + -+ sizeof(char) + sizeof(u32); -+ msg_size += -+ strnlen(tp_desc[i].arg_names, STRLEN_MAX) + -+ sizeof(char) + sizeof(u32); -+ } ++ /* Early out if there is nothing to do */ ++ if (nr == 0) ++ return; + -+ KBASE_DEBUG_ASSERT(PACKET_SIZE - PACKET_HEADER_SIZE >= msg_size); ++ kbdev = kctx->kbdev; ++ mutex_lock(&kbdev->js_data.queue_mutex); ++ ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx); ++ mutex_unlock(&kbdev->js_data.queue_mutex); + -+ buffer = kbasep_tlstream_msgbuf_acquire(stream_type, msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ if (ctx_is_in_runpool) { ++ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &tv, sizeof(tv)); -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &ps, sizeof(ps)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &tp_count, sizeof(tp_count)); ++ if (!kbase_pm_context_active_handle_suspend(kbdev, ++ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { ++ int err; ++ u32 op; + -+ for (i = 0; i < tp_count; i++) { -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, -+ &tp_desc[i].id, sizeof(tp_desc[i].id)); -+ pos = kbasep_tlstream_write_string( -+ buffer, pos, -+ tp_desc[i].id_str, msg_size - pos); -+ pos = kbasep_tlstream_write_string( -+ buffer, pos, -+ tp_desc[i].name, msg_size - pos); -+ pos = kbasep_tlstream_write_string( -+ buffer, pos, -+ tp_desc[i].arg_types, msg_size - pos); -+ pos = kbasep_tlstream_write_string( -+ buffer, pos, -+ tp_desc[i].arg_names, msg_size - pos); -+ } ++ /* AS transaction begin */ ++ mutex_lock(&kbdev->mmu_hw_mutex); + -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ if (sync) ++ op = AS_COMMAND_FLUSH_MEM; ++ else ++ op = AS_COMMAND_FLUSH_PT; + -+ kbasep_tlstream_msgbuf_release(stream_type, flags); ++ err = kbase_mmu_hw_do_operation(kbdev, ++ &kbdev->as[kctx->as_nr], ++ kctx, vpfn, nr, op, 0); + -+ /* We don't expect any more data to be read in this stream. -+ * As header stream must be read before its associated body stream, -+ * make this packet visible to the user straightaway. */ -+ kbasep_tlstream_flush_stream(stream_type); -+} ++#if KBASE_GPU_RESET_EN ++ if (err) { ++ /* Flush failed to complete, assume the ++ * GPU has hung and perform a reset to ++ * recover */ ++ dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n"); + -+/*****************************************************************************/ ++ if (kbase_prepare_to_reset_gpu(kbdev)) ++ kbase_reset_gpu(kbdev); ++ } ++#endif /* KBASE_GPU_RESET_EN */ + -+int kbase_tlstream_init(void) -+{ -+ enum tl_stream_type i; ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ /* AS transaction end */ + -+ /* Prepare stream structures. */ -+ for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) { -+ tl_stream[i] = kmalloc(sizeof(**tl_stream), GFP_KERNEL); -+ if (!tl_stream[i]) -+ break; -+ kbasep_timeline_stream_init(tl_stream[i], i); -+ } -+ if (TL_STREAM_TYPE_COUNT > i) { -+ for (; i > 0; i--) { -+ kbasep_timeline_stream_term(tl_stream[i - 1]); -+ kfree(tl_stream[i - 1]); ++#ifndef CONFIG_MALI_NO_MALI ++ /* ++ * The transaction lock must be dropped before here ++ * as kbase_wait_write_flush could take it if ++ * the GPU was powered down (static analysis doesn't ++ * know this can't happen). ++ */ ++ drain_pending |= (!err) && sync && ++ kbase_hw_has_issue(kctx->kbdev, ++ BASE_HW_ISSUE_6367); ++ if (drain_pending) { ++ /* Wait for GPU to flush write buffer */ ++ kbase_wait_write_flush(kctx); ++ } ++#endif /* !CONFIG_MALI_NO_MALI */ ++ ++ kbase_pm_context_idle(kbdev); + } -+ return -ENOMEM; ++ kbasep_js_runpool_release_ctx(kbdev, kctx); + } ++} + -+ /* Initialize autoflush timer. */ -+ atomic_set(&autoflush_timer_active, 0); -+ timer_setup(&autoflush_timer, -+ kbasep_tlstream_autoflush_timer_callback, -+ 0); ++void kbase_mmu_update(struct kbase_context *kctx) ++{ ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); ++ /* ASSERT that the context has a valid as_nr, which is only the case ++ * when it's scheduled in. ++ * ++ * as_nr won't change because the caller has the hwaccess_lock */ ++ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + -+ return 0; ++ kctx->kbdev->mmu_mode->update(kctx); +} ++KBASE_EXPORT_TEST_API(kbase_mmu_update); + -+void kbase_tlstream_term(void) ++void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) +{ -+ enum tl_stream_type i; ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ lockdep_assert_held(&kbdev->mmu_hw_mutex); + -+ for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) { -+ kbasep_timeline_stream_term(tl_stream[i]); -+ kfree(tl_stream[i]); -+ } ++ kbdev->mmu_mode->disable_as(kbdev, as_nr); +} + -+static void kbase_create_timeline_objects(struct kbase_context *kctx) ++void kbase_mmu_disable(struct kbase_context *kctx) +{ -+ struct kbase_device *kbdev = kctx->kbdev; -+ unsigned int lpu_id; -+ unsigned int as_nr; -+ struct kbasep_kctx_list_element *element; -+ -+ /* Create LPU objects. */ -+ for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { -+ u32 *lpu = -+ &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; -+ KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, lpu_id, *lpu); -+ } ++ /* ASSERT that the context has a valid as_nr, which is only the case ++ * when it's scheduled in. ++ * ++ * as_nr won't change because the caller has the hwaccess_lock */ ++ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + -+ /* Create Address Space objects. */ -+ for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) -+ KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(&kbdev->as[as_nr], as_nr); ++ lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + -+ /* Create GPU object and make it retain all LPUs and address spaces. */ -+ KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU( -+ kbdev, -+ kbdev->gpu_props.props.raw_props.gpu_id, -+ kbdev->gpu_props.num_cores); -+ -+ for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { -+ void *lpu = -+ &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; -+ KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, kbdev); -+ } -+ for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) -+ KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU( -+ &kbdev->as[as_nr], -+ kbdev); -+ -+ /* Create object for each known context. */ -+ mutex_lock(&kbdev->kctx_list_lock); -+ list_for_each_entry(element, &kbdev->kctx_list, link) { -+ KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX( -+ element->kctx, -+ (u32)(element->kctx->id), -+ (u32)(element->kctx->tgid)); -+ } -+ /* Before releasing the lock, reset body stream buffers. -+ * This will prevent context creation message to be directed to both -+ * summary and body stream. -+ */ -+ kbase_tlstream_reset_body_streams(); -+ mutex_unlock(&kbdev->kctx_list_lock); -+ /* Static object are placed into summary packet that needs to be -+ * transmitted first. Flush all streams to make it available to -+ * user space. ++ /* ++ * The address space is being disabled, drain all knowledge of it out ++ * from the caches as pages and page tables might be freed after this. ++ * ++ * The job scheduler code will already be holding the locks and context ++ * so just do the flush. + */ -+ kbase_tlstream_flush_streams(); ++ kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true); ++ ++ kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); +} ++KBASE_EXPORT_TEST_API(kbase_mmu_disable); + -+int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags) ++/* ++ * We actually only discard the ATE, and not the page table ++ * pages. There is a potential DoS here, as we'll leak memory by ++ * having PTEs that are potentially unused. Will require physical ++ * page accounting, so MMU pages are part of the process allocation. ++ * ++ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is ++ * currently scheduled into the runpool, and so potentially uses a lot of locks. ++ * These locks must be taken in the correct order with respect to others ++ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more ++ * information. ++ */ ++int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) +{ -+ int ret; -+ u32 tlstream_enabled = TLSTREAM_ENABLED | flags; ++ phys_addr_t pgd; ++ u64 *pgd_page; ++ struct kbase_device *kbdev; ++ size_t requested_nr = nr; ++ struct kbase_mmu_mode const *mmu_mode; ++ int err; + -+ if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, tlstream_enabled)) { -+ int rcode; ++ KBASE_DEBUG_ASSERT(NULL != kctx); ++ beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr); + -+ ret = anon_inode_getfd( -+ "[mali_tlstream]", -+ &kbasep_tlstream_fops, -+ kctx, -+ O_RDONLY | O_CLOEXEC); -+ if (ret < 0) { -+ atomic_set(&kbase_tlstream_enabled, 0); -+ return ret; -+ } ++ if (0 == nr) { ++ /* early out if nothing to do */ ++ return 0; ++ } + -+ /* Reset and initialize header streams. */ -+ kbasep_timeline_stream_reset( -+ tl_stream[TL_STREAM_TYPE_OBJ_HEADER]); -+ kbasep_timeline_stream_reset( -+ tl_stream[TL_STREAM_TYPE_OBJ_SUMMARY]); -+ kbasep_timeline_stream_reset( -+ tl_stream[TL_STREAM_TYPE_AUX_HEADER]); -+ kbasep_tlstream_timeline_header( -+ TL_STREAM_TYPE_OBJ_HEADER, -+ tp_desc_obj, -+ ARRAY_SIZE(tp_desc_obj)); -+ kbasep_tlstream_timeline_header( -+ TL_STREAM_TYPE_AUX_HEADER, -+ tp_desc_aux, -+ ARRAY_SIZE(tp_desc_aux)); ++ mutex_lock(&kctx->mmu_lock); + -+ /* Start autoflush timer. */ -+ atomic_set(&autoflush_timer_active, 1); -+ rcode = mod_timer( -+ &autoflush_timer, -+ jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); -+ CSTD_UNUSED(rcode); ++ kbdev = kctx->kbdev; ++ mmu_mode = kbdev->mmu_mode; + -+ /* If job dumping is enabled, readjust the software event's -+ * timeout as the default value of 3 seconds is often -+ * insufficient. */ -+ if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) { -+ dev_info(kctx->kbdev->dev, -+ "Job dumping is enabled, readjusting the software event's timeout\n"); -+ atomic_set(&kctx->kbdev->js_data.soft_job_timeout_ms, -+ 1800000); ++ while (nr) { ++ unsigned int i; ++ unsigned int index = vpfn & 0x1FF; ++ unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; ++ struct page *p; ++ ++ if (count > nr) ++ count = nr; ++ ++ err = mmu_get_bottom_pgd(kctx, vpfn, &pgd); ++ if (err) { ++ dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n"); ++ err = -EINVAL; ++ goto fail_unlock; + } + -+ /* Summary stream was cleared during acquire. -+ * Create static timeline objects that will be -+ * read by client. -+ */ -+ kbase_create_timeline_objects(kctx); ++ p = pfn_to_page(PFN_DOWN(pgd)); ++ pgd_page = kmap(p); ++ if (!pgd_page) { ++ dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n"); ++ err = -ENOMEM; ++ goto fail_unlock; ++ } + -+ } else { -+ ret = -EBUSY; -+ } ++ for (i = 0; i < count; i++) ++ mmu_mode->entry_invalidate(&pgd_page[index + i]); + -+ return ret; -+} ++ vpfn += count; ++ nr -= count; + -+void kbase_tlstream_flush_streams(void) -+{ -+ enum tl_stream_type stype; ++ kbase_mmu_sync_pgd(kctx->kbdev, ++ kbase_dma_addr(p) + (index * sizeof(u64)), ++ count * sizeof(u64)); + -+ for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) -+ kbasep_tlstream_flush_stream(stype); -+} ++ kunmap(p); ++ } + -+void kbase_tlstream_reset_body_streams(void) -+{ -+ kbasep_timeline_stream_reset( -+ tl_stream[TL_STREAM_TYPE_OBJ]); -+ kbasep_timeline_stream_reset( -+ tl_stream[TL_STREAM_TYPE_AUX]); -+} ++ mutex_unlock(&kctx->mmu_lock); ++ kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); ++ return 0; + -+#if MALI_UNIT_TEST -+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated) -+{ -+ KBASE_DEBUG_ASSERT(bytes_collected); -+ KBASE_DEBUG_ASSERT(bytes_generated); -+ *bytes_collected = atomic_read(&tlstream_bytes_collected); -+ *bytes_generated = atomic_read(&tlstream_bytes_generated); ++fail_unlock: ++ mutex_unlock(&kctx->mmu_lock); ++ kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); ++ return err; +} -+#endif /* MALI_UNIT_TEST */ + -+/*****************************************************************************/ ++KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); + -+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid) ++/** ++ * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'. ++ * This call is being triggered as a response to the changes of the mem attributes ++ * ++ * @pre : The caller is responsible for validating the memory attributes ++ * ++ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is ++ * currently scheduled into the runpool, and so potentially uses a lot of locks. ++ * These locks must be taken in the correct order with respect to others ++ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more ++ * information. ++ */ ++int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags) +{ -+ const u32 msg_id = KBASE_TL_NEW_CTX; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) + -+ sizeof(tgid); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; -+ -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ_SUMMARY, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ phys_addr_t pgd; ++ u64 *pgd_page; ++ size_t requested_nr = nr; ++ struct kbase_mmu_mode const *mmu_mode; ++ int err; + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &context, sizeof(context)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &nr, sizeof(nr)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &tgid, sizeof(tgid)); ++ KBASE_DEBUG_ASSERT(NULL != kctx); ++ KBASE_DEBUG_ASSERT(0 != vpfn); ++ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ /* Early out if there is nothing to do */ ++ if (nr == 0) ++ return 0; + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); -+} ++ mutex_lock(&kctx->mmu_lock); + -+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count) -+{ -+ const u32 msg_id = KBASE_TL_NEW_GPU; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(id) + -+ sizeof(core_count); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ mmu_mode = kctx->kbdev->mmu_mode; + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ_SUMMARY, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages(): updating page share flags on GPU PFN 0x%llx from phys %p, %zu pages", ++ vpfn, phys, nr); + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &gpu, sizeof(gpu)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &id, sizeof(id)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &core_count, sizeof(core_count)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ while (nr) { ++ unsigned int i; ++ unsigned int index = vpfn & 0x1FF; ++ size_t count = KBASE_MMU_PAGE_ENTRIES - index; ++ struct page *p; + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); -+} ++ if (count > nr) ++ count = nr; + -+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn) -+{ -+ const u32 msg_id = KBASE_TL_NEW_LPU; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(nr) + -+ sizeof(fn); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ do { ++ err = mmu_get_bottom_pgd(kctx, vpfn, &pgd); ++ if (err != -ENOMEM) ++ break; ++ /* Fill the memory pool with enough pages for ++ * the page walk to succeed ++ */ ++ mutex_unlock(&kctx->mmu_lock); ++ err = kbase_mem_pool_grow(&kctx->mem_pool, ++ MIDGARD_MMU_BOTTOMLEVEL); ++ mutex_lock(&kctx->mmu_lock); ++ } while (!err); ++ if (err) { ++ dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n"); ++ goto fail_unlock; ++ } + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ_SUMMARY, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ p = pfn_to_page(PFN_DOWN(pgd)); ++ pgd_page = kmap(p); ++ if (!pgd_page) { ++ dev_warn(kctx->kbdev->dev, "kmap failure\n"); ++ err = -ENOMEM; ++ goto fail_unlock; ++ } + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &lpu, sizeof(lpu)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &nr, sizeof(nr)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &fn, sizeof(fn)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ for (i = 0; i < count; i++) ++ mmu_mode->entry_set_ate(&pgd_page[index + i], phys[i], ++ flags); + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); -+} ++ phys += count; ++ vpfn += count; ++ nr -= count; + -+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) -+{ -+ const u32 msg_id = KBASE_TL_LIFELINK_LPU_GPU; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(gpu); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ kbase_mmu_sync_pgd(kctx->kbdev, ++ kbase_dma_addr(p) + (index * sizeof(u64)), ++ count * sizeof(u64)); + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ_SUMMARY, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ kunmap(pfn_to_page(PFN_DOWN(pgd))); ++ } + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &lpu, sizeof(lpu)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &gpu, sizeof(gpu)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ mutex_unlock(&kctx->mmu_lock); ++ kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); ++ return 0; + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); ++fail_unlock: ++ mutex_unlock(&kctx->mmu_lock); ++ kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); ++ return err; +} + -+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr) ++/* This is a debug feature only */ ++static void mmu_check_unused(struct kbase_context *kctx, phys_addr_t pgd) +{ -+ const u32 msg_id = KBASE_TL_NEW_AS; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(nr); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ u64 *page; ++ int i; + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ_SUMMARY, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ lockdep_assert_held(&kctx->reg_lock); + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &as, sizeof(as)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &nr, sizeof(nr)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); ++ /* kmap_atomic should NEVER fail. */ ++ KBASE_DEBUG_ASSERT(NULL != page); + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); ++ for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { ++ if (kctx->kbdev->mmu_mode->ate_is_valid(page[i])) ++ beenthere(kctx, "live pte %016lx", (unsigned long)page[i]); ++ } ++ kunmap_atomic(page); +} + -+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu) ++static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int level, int zap, u64 *pgd_page_buffer) +{ -+ const u32 msg_id = KBASE_TL_LIFELINK_AS_GPU; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(gpu); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; -+ -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ_SUMMARY, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ phys_addr_t target_pgd; ++ u64 *pgd_page; ++ int i; ++ struct kbase_mmu_mode const *mmu_mode; + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &as, sizeof(as)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &gpu, sizeof(gpu)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ KBASE_DEBUG_ASSERT(NULL != kctx); ++ lockdep_assert_held(&kctx->mmu_lock); ++ lockdep_assert_held(&kctx->reg_lock); + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); -+} ++ pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); ++ /* kmap_atomic should NEVER fail. */ ++ KBASE_DEBUG_ASSERT(NULL != pgd_page); ++ /* Copy the page to our preallocated buffer so that we can minimize kmap_atomic usage */ ++ memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); ++ kunmap_atomic(pgd_page); ++ pgd_page = pgd_page_buffer; + -+/*****************************************************************************/ ++ mmu_mode = kctx->kbdev->mmu_mode; + -+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid) -+{ -+ const u32 msg_id = KBASE_TL_NEW_CTX; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) + -+ sizeof(tgid); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { ++ target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]); + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ if (target_pgd) { ++ if (level < (MIDGARD_MMU_BOTTOMLEVEL - 1)) { ++ mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64))); ++ } else { ++ /* ++ * So target_pte is a level-3 page. ++ * As a leaf, it is safe to free it. ++ * Unless we have live pages attached to it! ++ */ ++ mmu_check_unused(kctx, target_pgd); ++ } + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &context, sizeof(context)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &nr, sizeof(nr)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &tgid, sizeof(tgid)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ beenthere(kctx, "pte %lx level %d", (unsigned long)target_pgd, level + 1); ++ if (zap) { ++ struct page *p = phys_to_page(target_pgd); + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++ kbase_mem_pool_free(&kctx->mem_pool, p, true); ++ kbase_process_page_usage_dec(kctx, 1); ++ kbase_atomic_sub_pages(1, &kctx->used_pages); ++ kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); ++ } ++ } ++ } +} + -+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr) ++int kbase_mmu_init(struct kbase_context *kctx) +{ -+ const u32 msg_id = KBASE_TL_NEW_ATOM; -+ const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(atom) + -+ sizeof(nr); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; -+ -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); -+ -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom, sizeof(atom)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &nr, sizeof(nr)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); -+ -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -+} ++ KBASE_DEBUG_ASSERT(NULL != kctx); ++ KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages); + -+void __kbase_tlstream_tl_del_ctx(void *context) -+{ -+ const u32 msg_id = KBASE_TL_DEL_CTX; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(context); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ mutex_init(&kctx->mmu_lock); + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ /* Preallocate MMU depth of four pages for mmu_teardown_level to use */ ++ kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &context, sizeof(context)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ if (NULL == kctx->mmu_teardown_pages) ++ return -ENOMEM; + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++ return 0; +} + -+void __kbase_tlstream_tl_del_atom(void *atom) ++void kbase_mmu_term(struct kbase_context *kctx) +{ -+ const u32 msg_id = KBASE_TL_DEL_ATOM; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(atom); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; -+ -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); -+ -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom, sizeof(atom)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ KBASE_DEBUG_ASSERT(NULL != kctx); ++ KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages); + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++ kfree(kctx->mmu_teardown_pages); ++ kctx->mmu_teardown_pages = NULL; +} + -+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu) ++void kbase_mmu_free_pgd(struct kbase_context *kctx) +{ -+ const u32 msg_id = KBASE_TL_RET_CTX_LPU; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; -+ -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); -+ -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &context, sizeof(context)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &lpu, sizeof(lpu)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); -+ -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -+} ++ int new_page_count __maybe_unused; + -+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) -+{ -+ const u32 msg_id = KBASE_TL_RET_ATOM_CTX; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ KBASE_DEBUG_ASSERT(NULL != kctx); ++ KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages); + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ mutex_lock(&kctx->mmu_lock); ++ mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages); ++ mutex_unlock(&kctx->mmu_lock); + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom, sizeof(atom)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &context, sizeof(context)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd); ++ kbase_mem_pool_free(&kctx->mem_pool, phys_to_page(kctx->pgd), true); ++ kbase_process_page_usage_dec(kctx, 1); ++ new_page_count = kbase_atomic_sub_pages(1, &kctx->used_pages); ++ kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++ KBASE_TLSTREAM_AUX_PAGESALLOC( ++ (u32)kctx->id, ++ (u64)new_page_count); +} + -+void __kbase_tlstream_tl_ret_atom_lpu( -+ void *atom, void *lpu, const char *attrib_match_list) ++KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd); ++ ++static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left) +{ -+ const u32 msg_id = KBASE_TL_RET_ATOM_LPU; -+ const size_t msg_s0 = sizeof(u32) + sizeof(char) + -+ strnlen(attrib_match_list, STRLEN_MAX); -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + -+ sizeof(atom) + sizeof(lpu) + msg_s0; -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ phys_addr_t target_pgd; ++ u64 *pgd_page; ++ int i; ++ size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64); ++ size_t dump_size; ++ struct kbase_mmu_mode const *mmu_mode; + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ KBASE_DEBUG_ASSERT(NULL != kctx); ++ lockdep_assert_held(&kctx->mmu_lock); + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom, sizeof(atom)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &lpu, sizeof(lpu)); -+ pos = kbasep_tlstream_write_string( -+ buffer, pos, attrib_match_list, msg_s0); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ mmu_mode = kctx->kbdev->mmu_mode; + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -+} ++ pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd))); ++ if (!pgd_page) { ++ dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n"); ++ return 0; ++ } + -+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu) -+{ -+ const u32 msg_id = KBASE_TL_NRET_CTX_LPU; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ if (*size_left >= size) { ++ /* A modified physical address that contains the page table level */ ++ u64 m_pgd = pgd | level; + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ /* Put the modified physical address in the output buffer */ ++ memcpy(*buffer, &m_pgd, sizeof(m_pgd)); ++ *buffer += sizeof(m_pgd); + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &context, sizeof(context)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &lpu, sizeof(lpu)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ /* Followed by the page table itself */ ++ memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES); ++ *buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES; + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -+} ++ *size_left -= size; ++ } + -+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) -+{ -+ const u32 msg_id = KBASE_TL_NRET_ATOM_CTX; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ if (level < MIDGARD_MMU_BOTTOMLEVEL) { ++ for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { ++ if (mmu_mode->pte_is_valid(pgd_page[i])) { ++ target_pgd = mmu_mode->pte_to_phy_addr( ++ pgd_page[i]); + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ dump_size = kbasep_mmu_dump_level(kctx, ++ target_pgd, level + 1, ++ buffer, size_left); ++ if (!dump_size) { ++ kunmap(pfn_to_page(PFN_DOWN(pgd))); ++ return 0; ++ } ++ size += dump_size; ++ } ++ } ++ } + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom, sizeof(atom)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &context, sizeof(context)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ kunmap(pfn_to_page(PFN_DOWN(pgd))); + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++ return size; +} + -+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2) ++void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) +{ -+ const u32 msg_id = KBASE_TL_DEP_ATOM_ATOM; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ void *kaddr; ++ size_t size_left; + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ KBASE_DEBUG_ASSERT(kctx); + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom1, sizeof(atom1)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom2, sizeof(atom2)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ if (0 == nr_pages) { ++ /* can't dump in a 0 sized buffer, early out */ ++ return NULL; ++ } + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -+} ++ size_left = nr_pages * PAGE_SIZE; + -+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2) -+{ -+ const u32 msg_id = KBASE_TL_NDEP_ATOM_ATOM; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ KBASE_DEBUG_ASSERT(0 != size_left); ++ kaddr = vmalloc_user(size_left); + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ mutex_lock(&kctx->mmu_lock); + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom1, sizeof(atom1)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom2, sizeof(atom2)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ if (kaddr) { ++ u64 end_marker = 0xFFULL; ++ char *buffer; ++ char *mmu_dump_buffer; ++ u64 config[3]; ++ size_t size; + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -+} ++ buffer = (char *)kaddr; ++ mmu_dump_buffer = buffer; + -+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2) -+{ -+ const u32 msg_id = KBASE_TL_RDEP_ATOM_ATOM; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ if (kctx->api_version >= KBASE_API_VERSION(8, 4)) { ++ struct kbase_mmu_setup as_setup; + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup); ++ config[0] = as_setup.transtab; ++ config[1] = as_setup.memattr; ++ config[2] = as_setup.transcfg; ++ memcpy(buffer, &config, sizeof(config)); ++ mmu_dump_buffer += sizeof(config); ++ size_left -= sizeof(config); ++ } + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom1, sizeof(atom1)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom2, sizeof(atom2)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -+} + -+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) -+{ -+ const u32 msg_id = KBASE_TL_NRET_ATOM_LPU; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ size = kbasep_mmu_dump_level(kctx, ++ kctx->pgd, ++ MIDGARD_MMU_TOPLEVEL, ++ &mmu_dump_buffer, ++ &size_left); + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ if (!size) ++ goto fail_free; + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom, sizeof(atom)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &lpu, sizeof(lpu)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ /* Add on the size for the end marker */ ++ size += sizeof(u64); ++ /* Add on the size for the config */ ++ if (kctx->api_version >= KBASE_API_VERSION(8, 4)) ++ size += sizeof(config); + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -+} + -+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx) -+{ -+ const u32 msg_id = KBASE_TL_RET_AS_CTX; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) { ++ /* The buffer isn't big enough - free the memory and return failure */ ++ goto fail_free; ++ } + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ /* Add the end marker */ ++ memcpy(mmu_dump_buffer, &end_marker, sizeof(u64)); ++ } + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &as, sizeof(as)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &ctx, sizeof(ctx)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ mutex_unlock(&kctx->mmu_lock); ++ return kaddr; + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++fail_free: ++ vfree(kaddr); ++ mutex_unlock(&kctx->mmu_lock); ++ return NULL; +} ++KBASE_EXPORT_TEST_API(kbase_mmu_dump); + -+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx) ++void bus_fault_worker(struct work_struct *data) +{ -+ const u32 msg_id = KBASE_TL_NRET_AS_CTX; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ struct kbase_as *faulting_as; ++ int as_no; ++ struct kbase_context *kctx; ++ struct kbase_device *kbdev; ++#if KBASE_GPU_RESET_EN ++ bool reset_status = false; ++#endif /* KBASE_GPU_RESET_EN */ + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ faulting_as = container_of(data, struct kbase_as, work_busfault); + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &as, sizeof(as)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &ctx, sizeof(ctx)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ as_no = faulting_as->number; + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -+} ++ kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); + -+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as) -+{ -+ const u32 msg_id = KBASE_TL_RET_ATOM_AS; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ /* Grab the context that was already refcounted in kbase_mmu_interrupt(). ++ * Therefore, it cannot be scheduled out of this AS until we explicitly release it ++ */ ++ kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no); ++ if (WARN_ON(!kctx)) { ++ atomic_dec(&kbdev->faults_pending); ++ return; ++ } + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ if (unlikely(faulting_as->protected_mode)) ++ { ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Permission failure"); ++ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, ++ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); ++ kbasep_js_runpool_release_ctx(kbdev, kctx); ++ atomic_dec(&kbdev->faults_pending); ++ return; + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom, sizeof(atom)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &as, sizeof(as)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ } + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -+} ++#if KBASE_GPU_RESET_EN ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { ++ /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode. ++ * We start the reset before switching to UNMAPPED to ensure that unrelated jobs ++ * are evicted from the GPU before the switch. ++ */ ++ dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n"); ++ reset_status = kbase_prepare_to_reset_gpu(kbdev); ++ } ++#endif /* KBASE_GPU_RESET_EN */ ++ /* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */ ++ if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { ++ unsigned long flags; + -+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as) -+{ -+ const u32 msg_id = KBASE_TL_NRET_ATOM_AS; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ ++ /* AS transaction begin */ ++ mutex_lock(&kbdev->mmu_hw_mutex); + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ /* Set the MMU into unmapped mode */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_mmu_disable(kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom, sizeof(atom)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &as, sizeof(as)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ /* AS transaction end */ + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -+} ++ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, ++ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); ++ kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, ++ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + -+void __kbase_tlstream_tl_attrib_atom_config( -+ void *atom, u64 jd, u64 affinity, u32 config) -+{ -+ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + -+ sizeof(jd) + sizeof(affinity) + sizeof(config); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ kbase_pm_context_idle(kbdev); ++ } + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++#if KBASE_GPU_RESET_EN ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status) ++ kbase_reset_gpu(kbdev); ++#endif /* KBASE_GPU_RESET_EN */ + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom, sizeof(atom)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &jd, sizeof(jd)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &affinity, sizeof(affinity)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &config, sizeof(config)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ kbasep_js_runpool_release_ctx(kbdev, kctx); + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++ atomic_dec(&kbdev->faults_pending); +} + -+void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio) ++const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code) +{ -+ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(prio); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; -+ -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ const char *e; + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom, sizeof(atom)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &prio, sizeof(prio)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ switch (exception_code) { ++ /* Non-Fault Status code */ ++ case 0x00: ++ e = "NOT_STARTED/IDLE/OK"; ++ break; ++ case 0x01: ++ e = "DONE"; ++ break; ++ case 0x02: ++ e = "INTERRUPTED"; ++ break; ++ case 0x03: ++ e = "STOPPED"; ++ break; ++ case 0x04: ++ e = "TERMINATED"; ++ break; ++ case 0x08: ++ e = "ACTIVE"; ++ break; ++ /* Job exceptions */ ++ case 0x40: ++ e = "JOB_CONFIG_FAULT"; ++ break; ++ case 0x41: ++ e = "JOB_POWER_FAULT"; ++ break; ++ case 0x42: ++ e = "JOB_READ_FAULT"; ++ break; ++ case 0x43: ++ e = "JOB_WRITE_FAULT"; ++ break; ++ case 0x44: ++ e = "JOB_AFFINITY_FAULT"; ++ break; ++ case 0x48: ++ e = "JOB_BUS_FAULT"; ++ break; ++ case 0x50: ++ e = "INSTR_INVALID_PC"; ++ break; ++ case 0x51: ++ e = "INSTR_INVALID_ENC"; ++ break; ++ case 0x52: ++ e = "INSTR_TYPE_MISMATCH"; ++ break; ++ case 0x53: ++ e = "INSTR_OPERAND_FAULT"; ++ break; ++ case 0x54: ++ e = "INSTR_TLS_FAULT"; ++ break; ++ case 0x55: ++ e = "INSTR_BARRIER_FAULT"; ++ break; ++ case 0x56: ++ e = "INSTR_ALIGN_FAULT"; ++ break; ++ case 0x58: ++ e = "DATA_INVALID_FAULT"; ++ break; ++ case 0x59: ++ e = "TILE_RANGE_FAULT"; ++ break; ++ case 0x5A: ++ e = "ADDR_RANGE_FAULT"; ++ break; ++ case 0x60: ++ e = "OUT_OF_MEMORY"; ++ break; ++ /* GPU exceptions */ ++ case 0x80: ++ e = "DELAYED_BUS_FAULT"; ++ break; ++ case 0x88: ++ e = "SHAREABILITY_FAULT"; ++ break; ++ /* MMU exceptions */ ++ case 0xC0: ++ case 0xC1: ++ case 0xC2: ++ case 0xC3: ++ case 0xC4: ++ case 0xC5: ++ case 0xC6: ++ case 0xC7: ++ e = "TRANSLATION_FAULT"; ++ break; ++ case 0xC8: ++ e = "PERMISSION_FAULT"; ++ break; ++ case 0xC9: ++ case 0xCA: ++ case 0xCB: ++ case 0xCC: ++ case 0xCD: ++ case 0xCE: ++ case 0xCF: ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) ++ e = "PERMISSION_FAULT"; ++ else ++ e = "UNKNOWN"; ++ break; ++ case 0xD0: ++ case 0xD1: ++ case 0xD2: ++ case 0xD3: ++ case 0xD4: ++ case 0xD5: ++ case 0xD6: ++ case 0xD7: ++ e = "TRANSTAB_BUS_FAULT"; ++ break; ++ case 0xD8: ++ e = "ACCESS_FLAG"; ++ break; ++ case 0xD9: ++ case 0xDA: ++ case 0xDB: ++ case 0xDC: ++ case 0xDD: ++ case 0xDE: ++ case 0xDF: ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) ++ e = "ACCESS_FLAG"; ++ else ++ e = "UNKNOWN"; ++ break; ++ case 0xE0: ++ case 0xE1: ++ case 0xE2: ++ case 0xE3: ++ case 0xE4: ++ case 0xE5: ++ case 0xE6: ++ case 0xE7: ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) ++ e = "ADDRESS_SIZE_FAULT"; ++ else ++ e = "UNKNOWN"; ++ break; ++ case 0xE8: ++ case 0xE9: ++ case 0xEA: ++ case 0xEB: ++ case 0xEC: ++ case 0xED: ++ case 0xEE: ++ case 0xEF: ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) ++ e = "MEMORY_ATTRIBUTES_FAULT"; ++ else ++ e = "UNKNOWN"; ++ break; ++ default: ++ e = "UNKNOWN"; ++ break; ++ }; + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++ return e; +} + -+void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state) ++static const char *access_type_name(struct kbase_device *kbdev, ++ u32 fault_status) +{ -+ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_STATE; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(state); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; -+ -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); -+ -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom, sizeof(atom)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &state, sizeof(state)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); -+ -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++ switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) { ++ case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) ++ return "ATOMIC"; ++ else ++ return "UNKNOWN"; ++ case AS_FAULTSTATUS_ACCESS_TYPE_READ: ++ return "READ"; ++ case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: ++ return "WRITE"; ++ case AS_FAULTSTATUS_ACCESS_TYPE_EX: ++ return "EXECUTE"; ++ default: ++ WARN_ON(1); ++ return NULL; ++ } +} + -+void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom) ++/** ++ * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on. ++ */ ++static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, ++ struct kbase_as *as, const char *reason_str) +{ -+ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(atom); + unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ int exception_type; ++ int access_type; ++ int source_id; ++ int as_no; ++ struct kbase_device *kbdev; ++ struct kbasep_js_device_data *js_devdata; + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++#if KBASE_GPU_RESET_EN ++ bool reset_status = false; ++#endif + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom, sizeof(atom)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ as_no = as->number; ++ kbdev = kctx->kbdev; ++ js_devdata = &kbdev->js_data; + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -+} ++ /* ASSERT that the context won't leave the runpool */ ++ KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0); + -+void __kbase_tlstream_tl_attrib_atom_jit( -+ void *atom, u64 edit_addr, u64 new_addr) -+{ -+ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JIT; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(atom) -+ + sizeof(edit_addr) + sizeof(new_addr); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ /* decode the fault status */ ++ exception_type = as->fault_status & 0xFF; ++ access_type = (as->fault_status >> 8) & 0x3; ++ source_id = (as->fault_status >> 16); + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ /* terminal fault, print info about the fault */ ++ dev_err(kbdev->dev, ++ "Unhandled Page fault in AS%d at VA 0x%016llX\n" ++ "Reason: %s\n" ++ "raw fault status: 0x%X\n" ++ "decoded fault status: %s\n" ++ "exception type 0x%X: %s\n" ++ "access type 0x%X: %s\n" ++ "source id 0x%X\n" ++ "pid: %d\n", ++ as_no, as->fault_addr, ++ reason_str, ++ as->fault_status, ++ (as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), ++ exception_type, kbase_exception_name(kbdev, exception_type), ++ access_type, access_type_name(kbdev, as->fault_status), ++ source_id, ++ kctx->pid); + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom, sizeof(atom)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &edit_addr, sizeof(edit_addr)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &new_addr, sizeof(new_addr)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ /* hardware counters dump fault handling */ ++ if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) && ++ (kbdev->hwcnt.backend.state == ++ KBASE_INSTR_STATE_DUMPING)) { ++ unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -+} ++ if ((as->fault_addr >= kbdev->hwcnt.addr) && ++ (as->fault_addr < (kbdev->hwcnt.addr + ++ (num_core_groups * 2048)))) ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; ++ } + -+void __kbase_tlstream_tl_attrib_as_config( -+ void *as, u64 transtab, u64 memattr, u64 transcfg) -+{ -+ const u32 msg_id = KBASE_TL_ATTRIB_AS_CONFIG; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(as) + -+ sizeof(transtab) + sizeof(memattr) + sizeof(transcfg); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ /* Stop the kctx from submitting more jobs and cause it to be scheduled ++ * out/rescheduled - this will occur on releasing the context's refcount */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbasep_js_clear_submit_allowed(js_devdata, kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ /* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this ++ * context can appear in the job slots from this point on */ ++ kbase_backend_jm_kill_jobs_from_kctx(kctx); ++ /* AS transaction begin */ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++#if KBASE_GPU_RESET_EN ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { ++ /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode. ++ * We start the reset before switching to UNMAPPED to ensure that unrelated jobs ++ * are evicted from the GPU before the switch. ++ */ ++ dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery."); ++ reset_status = kbase_prepare_to_reset_gpu(kbdev); ++ } ++#endif /* KBASE_GPU_RESET_EN */ ++ /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_mmu_disable(kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &as, sizeof(as)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &transtab, sizeof(transtab)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &memattr, sizeof(memattr)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &transcfg, sizeof(transcfg)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ /* AS transaction end */ ++ /* Clear down the fault */ ++ kbase_mmu_hw_clear_fault(kbdev, as, kctx, ++ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); ++ kbase_mmu_hw_enable_fault(kbdev, as, kctx, ++ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++#if KBASE_GPU_RESET_EN ++ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status) ++ kbase_reset_gpu(kbdev); ++#endif /* KBASE_GPU_RESET_EN */ +} + -+void __kbase_tlstream_tl_event_lpu_softstop(void *lpu) ++void kbasep_as_do_poke(struct work_struct *work) +{ -+ const u32 msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(lpu); ++ struct kbase_as *as; ++ struct kbase_device *kbdev; ++ struct kbase_context *kctx; + unsigned long flags; -+ char *buffer; -+ size_t pos = 0; -+ -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &lpu, sizeof(lpu)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); -+ -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -+} ++ KBASE_DEBUG_ASSERT(work); ++ as = container_of(work, struct kbase_as, poke_work); ++ kbdev = container_of(as, struct kbase_device, as[as->number]); ++ KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT); + -+void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom) -+{ -+ const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(atom); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ /* GPU power will already be active by virtue of the caller holding a JS ++ * reference on the address space, and will not release it until this worker ++ * has finished */ + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ /* Further to the comment above, we know that while this function is running ++ * the AS will not be released as before the atom is released this workqueue ++ * is flushed (in kbase_as_poking_timer_release_atom) ++ */ ++ kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number); + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom, sizeof(atom)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ /* AS transaction begin */ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ /* Force a uTLB invalidate */ ++ kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0, ++ AS_COMMAND_UNLOCK, 0); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ /* AS transaction end */ + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (as->poke_refcount && ++ !(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) { ++ /* Only queue up the timer if we need it, and we're not trying to kill it */ ++ hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL); ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + -+void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom) ++enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer) +{ -+ const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(atom); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; -+ -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ struct kbase_as *as; ++ int queue_work_ret; + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &atom, sizeof(atom)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ KBASE_DEBUG_ASSERT(NULL != timer); ++ as = container_of(timer, struct kbase_as, poke_timer); ++ KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT); + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++ queue_work_ret = queue_work(as->poke_wq, &as->poke_work); ++ KBASE_DEBUG_ASSERT(queue_work_ret); ++ return HRTIMER_NORESTART; +} + -+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu) ++/** ++ * Retain the poking timer on an atom's context (if the atom hasn't already ++ * done so), and start the timer (if it's not already started). ++ * ++ * This must only be called on a context that's scheduled in, and an atom ++ * that's running on the GPU. ++ * ++ * The caller must hold hwaccess_lock ++ * ++ * This can be called safely from atomic context ++ */ ++void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ -+ const u32 msg_id = KBASE_JD_GPU_SOFT_RESET; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(gpu); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; -+ -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_OBJ, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); -+ -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &gpu, sizeof(gpu)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); -+ -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -+} ++ struct kbase_as *as; + -+/*****************************************************************************/ ++ KBASE_DEBUG_ASSERT(kbdev); ++ KBASE_DEBUG_ASSERT(kctx); ++ KBASE_DEBUG_ASSERT(katom); ++ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state) -+{ -+ const u32 msg_id = KBASE_AUX_PM_STATE; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(core_type) + -+ sizeof(state); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ if (katom->poking) ++ return; + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_AUX, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ katom->poking = 1; + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &core_type, sizeof(core_type)); -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ /* It's safe to work on the as/as_nr without an explicit reference, ++ * because the caller holds the hwaccess_lock, and the atom itself ++ * was also running and had already taken a reference */ ++ as = &kbdev->as[kctx->as_nr]; + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); ++ if (++(as->poke_refcount) == 1) { ++ /* First refcount for poke needed: check if not already in flight */ ++ if (!as->poke_state) { ++ /* need to start poking */ ++ as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT; ++ queue_work(as->poke_wq, &as->poke_work); ++ } ++ } +} + -+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change) ++/** ++ * If an atom holds a poking timer, release it and wait for it to finish ++ * ++ * This must only be called on a context that's scheduled in, and an atom ++ * that still has a JS reference on the context ++ * ++ * This must \b not be called from atomic context, since it can sleep. ++ */ ++void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ -+ const u32 msg_id = KBASE_AUX_PAGEFAULT; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) + -+ sizeof(page_count_change); ++ struct kbase_as *as; + unsigned long flags; -+ char *buffer; -+ size_t pos = 0; -+ -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_AUX, msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, -+ &page_count_change, sizeof(page_count_change)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ KBASE_DEBUG_ASSERT(kbdev); ++ KBASE_DEBUG_ASSERT(kctx); ++ KBASE_DEBUG_ASSERT(katom); ++ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); -+} ++ if (!katom->poking) ++ return; + -+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count) -+{ -+ const u32 msg_id = KBASE_AUX_PAGESALLOC; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) + -+ sizeof(page_count); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ as = &kbdev->as[kctx->as_nr]; + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_AUX, msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ KBASE_DEBUG_ASSERT(as->poke_refcount > 0); ++ KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT); + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr)); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &page_count, sizeof(page_count)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ if (--(as->poke_refcount) == 0) { ++ as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); -+} ++ hrtimer_cancel(&as->poke_timer); ++ flush_workqueue(as->poke_wq); + -+void __kbase_tlstream_aux_devfreq_target(u64 target_freq) -+{ -+ const u32 msg_id = KBASE_AUX_DEVFREQ_TARGET; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(target_freq); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_AUX, msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ /* Re-check whether it's still needed */ ++ if (as->poke_refcount) { ++ int queue_work_ret; ++ /* Poking still needed: ++ * - Another retain will not be starting the timer or queueing work, ++ * because it's still marked as in-flight ++ * - The hrtimer has finished, and has not started a new timer or ++ * queued work because it's been marked as killing ++ * ++ * So whatever happens now, just queue the work again */ ++ as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE); ++ queue_work_ret = queue_work(as->poke_wq, &as->poke_work); ++ KBASE_DEBUG_ASSERT(queue_work_ret); ++ } else { ++ /* It isn't - so mark it as not in flight, and not killing */ ++ as->poke_state = 0u; + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &target_freq, sizeof(target_freq)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ /* The poke associated with the atom has now finished. If this is ++ * also the last atom on the context, then we can guarentee no more ++ * pokes (and thus no more poking register accesses) will occur on ++ * the context until new atoms are run */ ++ } ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); ++ katom->poking = 0; +} + -+void __kbase_tlstream_aux_protected_enter_start(void *gpu) ++void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as) +{ -+ const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_START; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(gpu); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_AUX, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &gpu, sizeof(gpu)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ if (!kctx) { ++ dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n", ++ kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault", ++ as->number, as->fault_addr); + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); -+} -+void __kbase_tlstream_aux_protected_enter_end(void *gpu) -+{ -+ const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_END; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(gpu); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ /* Since no ctx was found, the MMU must be disabled. */ ++ WARN_ON(as->current_setup.transtab); + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_AUX, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ if (kbase_as_has_bus_fault(as)) { ++ kbase_mmu_hw_clear_fault(kbdev, as, kctx, ++ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); ++ kbase_mmu_hw_enable_fault(kbdev, as, kctx, ++ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); ++ } else if (kbase_as_has_page_fault(as)) { ++ kbase_mmu_hw_clear_fault(kbdev, as, kctx, ++ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); ++ kbase_mmu_hw_enable_fault(kbdev, as, kctx, ++ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); ++ } + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &gpu, sizeof(gpu)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++#if KBASE_GPU_RESET_EN ++ if (kbase_as_has_bus_fault(as) && ++ kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { ++ bool reset_status; ++ /* ++ * Reset the GPU, like in bus_fault_worker, in case an ++ * earlier error hasn't been properly cleared by this ++ * point. ++ */ ++ dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n"); ++ reset_status = kbase_prepare_to_reset_gpu_locked(kbdev); ++ if (reset_status) ++ kbase_reset_gpu_locked(kbdev); ++ } ++#endif /* KBASE_GPU_RESET_EN */ + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); -+} ++ return; ++ } + -+void __kbase_tlstream_aux_protected_leave_start(void *gpu) -+{ -+ const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_START; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(gpu); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; ++ if (kbase_as_has_bus_fault(as)) { ++ /* ++ * hw counters dumping in progress, signal the ++ * other thread that it failed ++ */ ++ if ((kbdev->hwcnt.kctx == kctx) && ++ (kbdev->hwcnt.backend.state == ++ KBASE_INSTR_STATE_DUMPING)) ++ kbdev->hwcnt.backend.state = ++ KBASE_INSTR_STATE_FAULT; + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_AUX, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++ /* ++ * Stop the kctx from submitting more jobs and cause it ++ * to be scheduled out/rescheduled when all references ++ * to it are released ++ */ ++ kbasep_js_clear_submit_allowed(js_devdata, kctx); + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &gpu, sizeof(gpu)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) ++ dev_warn(kbdev->dev, ++ "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", ++ as->number, as->fault_addr, ++ as->fault_extra_addr); ++ else ++ dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", ++ as->number, as->fault_addr); + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); ++ /* ++ * We need to switch to UNMAPPED mode - but we do this in a ++ * worker so that we can sleep ++ */ ++ kbdev->kbase_group_error++; ++ KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_busfault)); ++ WARN_ON(work_pending(&as->work_busfault)); ++ queue_work(as->pf_wq, &as->work_busfault); ++ atomic_inc(&kbdev->faults_pending); ++ } else { ++ kbdev->kbase_group_error++; ++ KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_pagefault)); ++ WARN_ON(work_pending(&as->work_pagefault)); ++ queue_work(as->pf_wq, &as->work_pagefault); ++ atomic_inc(&kbdev->faults_pending); ++ } +} -+void __kbase_tlstream_aux_protected_leave_end(void *gpu) -+{ -+ const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_END; -+ const size_t msg_size = -+ sizeof(msg_id) + sizeof(u64) + sizeof(gpu); -+ unsigned long flags; -+ char *buffer; -+ size_t pos = 0; + -+ buffer = kbasep_tlstream_msgbuf_acquire( -+ TL_STREAM_TYPE_AUX, -+ msg_size, &flags); -+ KBASE_DEBUG_ASSERT(buffer); ++void kbase_flush_mmu_wqs(struct kbase_device *kbdev) ++{ ++ int i; + -+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); -+ pos = kbasep_tlstream_write_timestamp(buffer, pos); -+ pos = kbasep_tlstream_write_bytes( -+ buffer, pos, &gpu, sizeof(gpu)); -+ KBASE_DEBUG_ASSERT(msg_size == pos); ++ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { ++ struct kbase_as *as = &kbdev->as[i]; + -+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); ++ flush_workqueue(as->pf_wq); ++ } +} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h +diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h new file mode 100644 -index 000000000..c0a1117d5 +index 000000000..986e959e9 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h -@@ -0,0 +1,623 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h +@@ -0,0 +1,123 @@ +/* + * -+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -396272,621 +397372,584 @@ index 000000000..c0a1117d5 + + + -+#if !defined(_KBASE_TLSTREAM_H) -+#define _KBASE_TLSTREAM_H ++/** ++ * @file ++ * Interface file for accessing MMU hardware functionality ++ */ + -+#include ++/** ++ * @page mali_kbase_mmu_hw_page MMU hardware interface ++ * ++ * @section mali_kbase_mmu_hw_intro_sec Introduction ++ * This module provides an abstraction for accessing the functionality provided ++ * by the midgard MMU and thus allows all MMU HW access to be contained within ++ * one common place and allows for different backends (implementations) to ++ * be provided. ++ */ + -+/*****************************************************************************/ ++#ifndef _MALI_KBASE_MMU_HW_H_ ++#define _MALI_KBASE_MMU_HW_H_ ++ ++/* Forward declarations */ ++struct kbase_device; ++struct kbase_as; ++struct kbase_context; + +/** -+ * kbase_tlstream_init - initialize timeline infrastructure in kernel -+ * Return: zero on success, negative number on error ++ * @addtogroup base_kbase_api ++ * @{ + */ -+int kbase_tlstream_init(void); + +/** -+ * kbase_tlstream_term - terminate timeline infrastructure in kernel -+ * -+ * Timeline need have to been previously enabled with kbase_tlstream_init(). ++ * @addtogroup mali_kbase_mmu_hw MMU access APIs ++ * @{ + */ -+void kbase_tlstream_term(void); + -+/** -+ * kbase_tlstream_acquire - acquire timeline stream file descriptor -+ * @kctx: kernel common context -+ * @flags: timeline stream flags ++/** @brief MMU fault type descriptor. ++ */ ++enum kbase_mmu_fault_type { ++ KBASE_MMU_FAULT_TYPE_UNKNOWN = 0, ++ KBASE_MMU_FAULT_TYPE_PAGE, ++ KBASE_MMU_FAULT_TYPE_BUS, ++ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED, ++ KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED ++}; ++ ++/** @brief Configure an address space for use. + * -+ * This descriptor is meant to be used by userspace timeline to gain access to -+ * kernel timeline stream. This stream is later broadcasted by user space to the -+ * timeline client. -+ * Only one entity can own the descriptor at any given time. Descriptor shall be -+ * closed if unused. If descriptor cannot be obtained (i.e. when it is already -+ * being used) return will be a negative value. ++ * Configure the MMU using the address space details setup in the ++ * @ref kbase_context structure. + * -+ * Return: file descriptor on success, negative number on error ++ * @param[in] kbdev kbase device to configure. ++ * @param[in] as address space to configure. ++ * @param[in] kctx kbase context to configure. + */ -+int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags); ++void kbase_mmu_hw_configure(struct kbase_device *kbdev, ++ struct kbase_as *as, struct kbase_context *kctx); + -+/** -+ * kbase_tlstream_flush_streams - flush timeline streams. ++/** @brief Issue an operation to the MMU. + * -+ * Function will flush pending data in all timeline streams. ++ * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that ++ * is associated with the provided @ref kbase_context over the specified range ++ * ++ * @param[in] kbdev kbase device to issue the MMU operation on. ++ * @param[in] as address space to issue the MMU operation on. ++ * @param[in] kctx kbase context to issue the MMU operation on. ++ * @param[in] vpfn MMU Virtual Page Frame Number to start the ++ * operation on. ++ * @param[in] nr Number of pages to work on. ++ * @param[in] type Operation type (written to ASn_COMMAND). ++ * @param[in] handling_irq Is this operation being called during the handling ++ * of an interrupt? ++ * ++ * @return Zero if the operation was successful, non-zero otherwise. + */ -+void kbase_tlstream_flush_streams(void); ++int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, ++ struct kbase_context *kctx, u64 vpfn, u32 nr, u32 type, ++ unsigned int handling_irq); + -+/** -+ * kbase_tlstream_reset_body_streams - reset timeline body streams. ++/** @brief Clear a fault that has been previously reported by the MMU. + * -+ * Function will discard pending data in all timeline body streams. ++ * Clear a bus error or page fault that has been reported by the MMU. ++ * ++ * @param[in] kbdev kbase device to clear the fault from. ++ * @param[in] as address space to clear the fault from. ++ * @param[in] kctx kbase context to clear the fault from or NULL. ++ * @param[in] type The type of fault that needs to be cleared. + */ -+void kbase_tlstream_reset_body_streams(void); ++void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, ++ struct kbase_context *kctx, enum kbase_mmu_fault_type type); + -+#if MALI_UNIT_TEST -+/** -+ * kbase_tlstream_test - start timeline stream data generator -+ * @tpw_count: number of trace point writers in each context -+ * @msg_delay: time delay in milliseconds between trace points written by one -+ * writer -+ * @msg_count: number of trace points written by one writer -+ * @aux_msg: if non-zero aux messages will be included ++/** @brief Enable fault that has been previously reported by the MMU. + * -+ * This test starts a requested number of asynchronous writers in both IRQ and -+ * thread context. Each writer will generate required number of test -+ * tracepoints (tracepoints with embedded information about writer that -+ * should be verified by user space reader). Tracepoints will be emitted in -+ * all timeline body streams. If aux_msg is non-zero writer will also -+ * generate not testable tracepoints (tracepoints without information about -+ * writer). These tracepoints are used to check correctness of remaining -+ * timeline message generating functions. Writer will wait requested time -+ * between generating another set of messages. This call blocks until all -+ * writers finish. ++ * After a page fault or bus error has been reported by the MMU these ++ * will be disabled. After these are handled this function needs to be ++ * called to enable the page fault or bus error fault again. ++ * ++ * @param[in] kbdev kbase device to again enable the fault from. ++ * @param[in] as address space to again enable the fault from. ++ * @param[in] kctx kbase context to again enable the fault from. ++ * @param[in] type The type of fault that needs to be enabled again. + */ -+void kbase_tlstream_test( -+ unsigned int tpw_count, -+ unsigned int msg_delay, -+ unsigned int msg_count, -+ int aux_msg); ++void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, ++ struct kbase_context *kctx, enum kbase_mmu_fault_type type); + -+/** -+ * kbase_tlstream_stats - read timeline stream statistics -+ * @bytes_collected: will hold number of bytes read by the user -+ * @bytes_generated: will hold number of bytes generated by trace points ++/** @} *//* end group mali_kbase_mmu_hw */ ++/** @} *//* end group base_kbase_api */ ++ ++#endif /* _MALI_KBASE_MMU_HW_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h +new file mode 100644 +index 000000000..b487c0042 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h +@@ -0,0 +1,47 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * + */ -+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated); -+#endif /* MALI_UNIT_TEST */ + -+/*****************************************************************************/ + -+#define TL_ATOM_STATE_IDLE 0 -+#define TL_ATOM_STATE_READY 1 -+#define TL_ATOM_STATE_DONE 2 -+#define TL_ATOM_STATE_POSTED 3 + -+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid); -+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count); -+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn); -+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu); -+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr); -+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu); -+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid); -+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr); -+void __kbase_tlstream_tl_del_ctx(void *context); -+void __kbase_tlstream_tl_del_atom(void *atom); -+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu); -+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context); -+void __kbase_tlstream_tl_ret_atom_lpu( -+ void *atom, void *lpu, const char *attrib_match_list); -+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu); -+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context); -+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu); -+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx); -+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx); -+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as); -+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as); -+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2); -+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2); -+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2); -+void __kbase_tlstream_tl_attrib_atom_config( -+ void *atom, u64 jd, u64 affinity, u32 config); -+void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio); -+void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state); -+void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom); -+void __kbase_tlstream_tl_attrib_atom_jit( -+ void *atom, u64 edit_addr, u64 new_addr); -+void __kbase_tlstream_tl_attrib_as_config( -+ void *as, u64 transtab, u64 memattr, u64 transcfg); -+void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom); -+void __kbase_tlstream_tl_event_lpu_softstop(void *lpu); -+void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom); -+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu); -+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state); -+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change); -+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count); -+void __kbase_tlstream_aux_devfreq_target(u64 target_freq); -+void __kbase_tlstream_aux_protected_enter_start(void *gpu); -+void __kbase_tlstream_aux_protected_enter_end(void *gpu); -+void __kbase_tlstream_aux_protected_leave_start(void *gpu); -+void __kbase_tlstream_aux_protected_leave_end(void *gpu); + -+#define TLSTREAM_ENABLED (1 << 31) + -+extern atomic_t kbase_tlstream_enabled; ++#ifndef _MALI_KBASE_MMU_MODE_ ++#define _MALI_KBASE_MMU_MODE_ + -+#define __TRACE_IF_ENABLED(trace_name, ...) \ -+ do { \ -+ int enabled = atomic_read(&kbase_tlstream_enabled); \ -+ if (enabled & TLSTREAM_ENABLED) \ -+ __kbase_tlstream_##trace_name(__VA_ARGS__); \ -+ } while (0) ++#include + -+#define __TRACE_IF_ENABLED_LATENCY(trace_name, ...) \ -+ do { \ -+ int enabled = atomic_read(&kbase_tlstream_enabled); \ -+ if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ -+ __kbase_tlstream_##trace_name(__VA_ARGS__); \ -+ } while (0) ++/* Forward declarations */ ++struct kbase_context; ++struct kbase_device; ++struct kbase_as; ++struct kbase_mmu_setup; + -+#define __TRACE_IF_ENABLED_JD(trace_name, ...) \ -+ do { \ -+ int enabled = atomic_read(&kbase_tlstream_enabled); \ -+ if (enabled & BASE_TLSTREAM_JOB_DUMPING_ENABLED) \ -+ __kbase_tlstream_##trace_name(__VA_ARGS__); \ -+ } while (0) ++struct kbase_mmu_mode { ++ void (*update)(struct kbase_context *kctx); ++ void (*get_as_setup)(struct kbase_context *kctx, ++ struct kbase_mmu_setup * const setup); ++ void (*disable_as)(struct kbase_device *kbdev, int as_nr); ++ phys_addr_t (*pte_to_phy_addr)(u64 entry); ++ int (*ate_is_valid)(u64 ate); ++ int (*pte_is_valid)(u64 pte); ++ void (*entry_set_ate)(u64 *entry, phys_addr_t phy, unsigned long flags); ++ void (*entry_set_pte)(u64 *entry, phys_addr_t phy); ++ void (*entry_invalidate)(u64 *entry); ++}; + -+/*****************************************************************************/ ++struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void); ++struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); + -+/** -+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX - create context object in timeline -+ * summary -+ * @context: name of the context object -+ * @nr: context number -+ * @tgid: thread Group Id ++#endif /* _MALI_KBASE_MMU_MODE_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c +new file mode 100644 +index 000000000..60df17116 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c +@@ -0,0 +1,200 @@ ++/* + * -+ * Function emits a timeline message informing about context creation. Context -+ * is created with context number (its attribute), that can be used to link -+ * kbase context with userspace context. -+ * This message is directed to timeline summary stream. -+ */ -+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX(context, nr, tgid) \ -+ __TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid) -+ -+/** -+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU - create GPU object in timeline summary -+ * @gpu: name of the GPU object -+ * @id: id value of this GPU -+ * @core_count: number of cores this GPU hosts ++ * (C) COPYRIGHT 2010-2014, 2016, 2017 ARM Limited. All rights reserved. + * -+ * Function emits a timeline message informing about GPU creation. GPU is -+ * created with two attributes: id and core count. -+ * This message is directed to timeline summary stream. -+ */ -+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU(gpu, id, core_count) \ -+ __TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count) -+ -+/** -+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU - create LPU object in timeline summary -+ * @lpu: name of the Logical Processing Unit object -+ * @nr: sequential number assigned to this LPU -+ * @fn: property describing this LPU's functional abilities ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. + * -+ * Function emits a timeline message informing about LPU creation. LPU is -+ * created with two attributes: number linking this LPU with GPU's job slot -+ * and function bearing information about this LPU abilities. -+ * This message is directed to timeline summary stream. -+ */ -+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, nr, fn) \ -+ __TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn) -+ -+/** -+ * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU - lifelink LPU object to GPU -+ * @lpu: name of the Logical Processing Unit object -+ * @gpu: name of the GPU object ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * Function emits a timeline message informing that LPU object shall be deleted -+ * along with GPU object. -+ * This message is directed to timeline summary stream. + */ -+#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, gpu) \ -+ __TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu) + -+/** -+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_AS - create address space object in timeline summary -+ * @as: name of the address space object -+ * @nr: sequential number assigned to this address space -+ * -+ * Function emits a timeline message informing about address space creation. -+ * Address space is created with one attribute: number identifying this -+ * address space. -+ * This message is directed to timeline summary stream. -+ */ -+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(as, nr) \ -+ __TRACE_IF_ENABLED(tl_summary_new_as, as, nr) + -+/** -+ * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU - lifelink address space object to GPU -+ * @as: name of the address space object -+ * @gpu: name of the GPU object -+ * -+ * Function emits a timeline message informing that address space object -+ * shall be deleted along with GPU object. -+ * This message is directed to timeline summary stream. -+ */ -+#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU(as, gpu) \ -+ __TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu) + -+/** -+ * KBASE_TLSTREAM_TL_NEW_CTX - create context object in timeline -+ * @context: name of the context object -+ * @nr: context number -+ * @tgid: thread Group Id -+ * -+ * Function emits a timeline message informing about context creation. Context -+ * is created with context number (its attribute), that can be used to link -+ * kbase context with userspace context. -+ */ -+#define KBASE_TLSTREAM_TL_NEW_CTX(context, nr, tgid) \ -+ __TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid) + -+/** -+ * KBASE_TLSTREAM_TL_NEW_ATOM - create atom object in timeline -+ * @atom: name of the atom object -+ * @nr: sequential number assigned to this atom -+ * -+ * Function emits a timeline message informing about atom creation. Atom is -+ * created with atom number (its attribute) that links it with actual work -+ * bucket id understood by hardware. -+ */ -+#define KBASE_TLSTREAM_TL_NEW_ATOM(atom, nr) \ -+ __TRACE_IF_ENABLED(tl_new_atom, atom, nr) + -+/** -+ * KBASE_TLSTREAM_TL_DEL_CTX - destroy context object in timeline -+ * @context: name of the context object -+ * -+ * Function emits a timeline message informing that context object ceased to -+ * exist. -+ */ -+#define KBASE_TLSTREAM_TL_DEL_CTX(context) \ -+ __TRACE_IF_ENABLED(tl_del_ctx, context) ++#include "mali_kbase_mmu_mode.h" + -+/** -+ * KBASE_TLSTREAM_TL_DEL_ATOM - destroy atom object in timeline -+ * @atom: name of the atom object -+ * -+ * Function emits a timeline message informing that atom object ceased to -+ * exist. -+ */ -+#define KBASE_TLSTREAM_TL_DEL_ATOM(atom) \ -+ __TRACE_IF_ENABLED(tl_del_atom, atom) ++#include "mali_kbase.h" ++#include "mali_midg_regmap.h" + -+/** -+ * KBASE_TLSTREAM_TL_RET_CTX_LPU - retain context by LPU -+ * @context: name of the context object -+ * @lpu: name of the Logical Processing Unit object -+ * -+ * Function emits a timeline message informing that context is being held -+ * by LPU and must not be deleted unless it is released. ++#define ENTRY_TYPE_MASK 3ULL ++/* For valid ATEs bit 1 = (level == 3) ? 1 : 0. ++ * The MMU is only ever configured by the driver so that ATEs ++ * are at level 3, so bit 1 should always be set + */ -+#define KBASE_TLSTREAM_TL_RET_CTX_LPU(context, lpu) \ -+ __TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu) ++#define ENTRY_IS_ATE 3ULL ++#define ENTRY_IS_INVAL 2ULL ++#define ENTRY_IS_PTE 3ULL + -+/** -+ * KBASE_TLSTREAM_TL_RET_ATOM_CTX - retain atom by context -+ * @atom: name of the atom object -+ * @context: name of the context object -+ * -+ * Function emits a timeline message informing that atom object is being held -+ * by context and must not be deleted unless it is released. -+ */ -+#define KBASE_TLSTREAM_TL_RET_ATOM_CTX(atom, context) \ -+ __TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context) ++#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */ ++#define ENTRY_ACCESS_RW (1ULL << 6) /* bits 6:7 */ ++#define ENTRY_ACCESS_RO (3ULL << 6) ++#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */ ++#define ENTRY_ACCESS_BIT (1ULL << 10) ++#define ENTRY_NX_BIT (1ULL << 54) + -+/** -+ * KBASE_TLSTREAM_TL_RET_ATOM_LPU - retain atom by LPU -+ * @atom: name of the atom object -+ * @lpu: name of the Logical Processing Unit object -+ * @attrib_match_list: list containing match operator attributes -+ * -+ * Function emits a timeline message informing that atom object is being held -+ * by LPU and must not be deleted unless it is released. ++/* Helper Function to perform assignment of page table entries, to ++ * ensure the use of strd, which is required on LPAE systems. + */ -+#define KBASE_TLSTREAM_TL_RET_ATOM_LPU(atom, lpu, attrib_match_list) \ -+ __TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list) ++static inline void page_table_entry_set(u64 *pte, u64 phy) ++{ ++#ifdef CONFIG_64BIT ++ *pte = phy; ++#elif defined(CONFIG_ARM) ++ /* ++ * In order to prevent the compiler keeping cached copies of ++ * memory, we have to explicitly say that we have updated memory. ++ * ++ * Note: We could manually move the data ourselves into R0 and ++ * R1 by specifying register variables that are explicitly ++ * given registers assignments, the down side of this is that ++ * we have to assume cpu endianness. To avoid this we can use ++ * the ldrd to read the data from memory into R0 and R1 which ++ * will respect the cpu endianness, we then use strd to make ++ * the 64 bit assignment to the page table entry. ++ */ ++ asm volatile("ldrd r0, r1, [%[ptemp]]\n\t" ++ "strd r0, r1, [%[pte]]\n\t" ++ : "=m" (*pte) ++ : [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy) ++ : "r0", "r1"); ++#else ++#error "64-bit atomic write must be implemented for your architecture" ++#endif ++} + -+/** -+ * KBASE_TLSTREAM_TL_NRET_CTX_LPU - release context by LPU -+ * @context: name of the context object -+ * @lpu: name of the Logical Processing Unit object -+ * -+ * Function emits a timeline message informing that context is being released -+ * by LPU object. -+ */ -+#define KBASE_TLSTREAM_TL_NRET_CTX_LPU(context, lpu) \ -+ __TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu) ++static void mmu_get_as_setup(struct kbase_context *kctx, ++ struct kbase_mmu_setup * const setup) ++{ ++ /* Set up the required caching policies at the correct indices ++ * in the memattr register. ++ */ ++ setup->memattr = ++ (AS_MEMATTR_IMPL_DEF_CACHE_POLICY << ++ (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | ++ (AS_MEMATTR_FORCE_TO_CACHE_ALL << ++ (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | ++ (AS_MEMATTR_WRITE_ALLOC << ++ (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | ++ (AS_MEMATTR_AARCH64_OUTER_IMPL_DEF << ++ (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | ++ (AS_MEMATTR_AARCH64_OUTER_WA << ++ (AS_MEMATTR_INDEX_OUTER_WA * 8)); + -+/** -+ * KBASE_TLSTREAM_TL_NRET_ATOM_CTX - release atom by context -+ * @atom: name of the atom object -+ * @context: name of the context object -+ * -+ * Function emits a timeline message informing that atom object is being -+ * released by context. -+ */ -+#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX(atom, context) \ -+ __TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context) ++ setup->transtab = (u64)kctx->pgd & AS_TRANSTAB_BASE_MASK; ++ setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K; ++} + -+/** -+ * KBASE_TLSTREAM_TL_NRET_ATOM_LPU - release atom by LPU -+ * @atom: name of the atom object -+ * @lpu: name of the Logical Processing Unit object -+ * -+ * Function emits a timeline message informing that atom object is being -+ * released by LPU. -+ */ -+#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU(atom, lpu) \ -+ __TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu) ++static void mmu_update(struct kbase_context *kctx) ++{ ++ struct kbase_device * const kbdev = kctx->kbdev; ++ struct kbase_as * const as = &kbdev->as[kctx->as_nr]; ++ struct kbase_mmu_setup * const current_setup = &as->current_setup; + -+/** -+ * KBASE_TLSTREAM_TL_RET_AS_CTX - lifelink address space object to context -+ * @as: name of the address space object -+ * @ctx: name of the context object -+ * -+ * Function emits a timeline message informing that address space object -+ * is being held by the context object. -+ */ -+#define KBASE_TLSTREAM_TL_RET_AS_CTX(as, ctx) \ -+ __TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx) ++ mmu_get_as_setup(kctx, current_setup); + -+/** -+ * KBASE_TLSTREAM_TL_NRET_AS_CTX - release address space by context -+ * @as: name of the address space object -+ * @ctx: name of the context object -+ * -+ * Function emits a timeline message informing that address space object -+ * is being released by atom. -+ */ -+#define KBASE_TLSTREAM_TL_NRET_AS_CTX(as, ctx) \ -+ __TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx) ++ /* Apply the address space setting */ ++ kbase_mmu_hw_configure(kbdev, as, kctx); ++} + -+/** -+ * KBASE_TLSTREAM_TL_RET_ATOM_AS - retain atom by address space -+ * @atom: name of the atom object -+ * @as: name of the address space object -+ * -+ * Function emits a timeline message informing that atom object is being held -+ * by address space and must not be deleted unless it is released. -+ */ -+#define KBASE_TLSTREAM_TL_RET_ATOM_AS(atom, as) \ -+ __TRACE_IF_ENABLED(tl_ret_atom_as, atom, as) ++static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) ++{ ++ struct kbase_as * const as = &kbdev->as[as_nr]; ++ struct kbase_mmu_setup * const current_setup = &as->current_setup; + -+/** -+ * KBASE_TLSTREAM_TL_NRET_ATOM_AS - release atom by address space -+ * @atom: name of the atom object -+ * @as: name of the address space object -+ * -+ * Function emits a timeline message informing that atom object is being -+ * released by address space. -+ */ -+#define KBASE_TLSTREAM_TL_NRET_ATOM_AS(atom, as) \ -+ __TRACE_IF_ENABLED(tl_nret_atom_as, atom, as) ++ current_setup->transtab = 0ULL; ++ current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED; + -+/** -+ * KBASE_TLSTREAM_TL_DEP_ATOM_ATOM - parent atom depends on child atom -+ * @atom1: name of the child atom object -+ * @atom2: name of the parent atom object that depends on child atom -+ * -+ * Function emits a timeline message informing that parent atom waits for -+ * child atom object to be completed before start its execution. -+ */ -+#define KBASE_TLSTREAM_TL_DEP_ATOM_ATOM(atom1, atom2) \ -+ __TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2) ++ /* Apply the address space setting */ ++ kbase_mmu_hw_configure(kbdev, as, NULL); ++} + -+/** -+ * KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM - dependency between atoms resolved -+ * @atom1: name of the child atom object -+ * @atom2: name of the parent atom object that depended on child atom -+ * -+ * Function emits a timeline message informing that parent atom execution -+ * dependency on child atom has been resolved. -+ */ -+#define KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM(atom1, atom2) \ -+ __TRACE_IF_ENABLED(tl_ndep_atom_atom, atom1, atom2) ++static phys_addr_t pte_to_phy_addr(u64 entry) ++{ ++ if (!(entry & 1)) ++ return 0; + -+/** -+ * KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM - information about already resolved dependency between atoms -+ * @atom1: name of the child atom object -+ * @atom2: name of the parent atom object that depended on child atom -+ * -+ * Function emits a timeline message informing that parent atom execution -+ * dependency on child atom has been resolved. -+ */ -+#define KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM(atom1, atom2) \ -+ __TRACE_IF_ENABLED(tl_rdep_atom_atom, atom1, atom2) ++ return entry & ~0xFFF; ++} + -+/** -+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG - atom job slot attributes -+ * @atom: name of the atom object -+ * @jd: job descriptor address -+ * @affinity: job affinity -+ * @config: job config -+ * -+ * Function emits a timeline message containing atom attributes. -+ */ -+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(atom, jd, affinity, config) \ -+ __TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config) ++static int ate_is_valid(u64 ate) ++{ ++ return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE); ++} + -+/** -+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY - atom priority -+ * @atom: name of the atom object -+ * @prio: atom priority -+ * -+ * Function emits a timeline message containing atom priority. -+ */ -+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(atom, prio) \ -+ __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority, atom, prio) ++static int pte_is_valid(u64 pte) ++{ ++ return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE); ++} + -+/** -+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE - atom state -+ * @atom: name of the atom object -+ * @state: atom state -+ * -+ * Function emits a timeline message containing atom state. ++/* ++ * Map KBASE_REG flags to MMU flags + */ -+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, state) \ -+ __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_state, atom, state) ++static u64 get_mmu_flags(unsigned long flags) ++{ ++ u64 mmu_flags; + -+/** -+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE - atom caused priority change -+ * @atom: name of the atom object -+ * -+ * Function emits a timeline message signalling priority change -+ */ -+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE(atom) \ -+ __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority_change, atom) ++ /* store mem_attr index as 4:2 (macro called ensures 3 bits already) */ ++ mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2; + -+/** -+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT - jit happened on atom -+ * @atom: atom identifier -+ * @edit_addr: address edited by jit -+ * @new_addr: address placed into the edited location -+ */ -+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(atom, edit_addr, new_addr) \ -+ __TRACE_IF_ENABLED_JD(tl_attrib_atom_jit, atom, edit_addr, new_addr) ++ /* Set access flags - note that AArch64 stage 1 does not support ++ * write-only access, so we use read/write instead ++ */ ++ if (flags & KBASE_REG_GPU_WR) ++ mmu_flags |= ENTRY_ACCESS_RW; ++ else if (flags & KBASE_REG_GPU_RD) ++ mmu_flags |= ENTRY_ACCESS_RO; + -+/** -+ * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG - address space attributes -+ * @as: assigned address space -+ * @transtab: configuration of the TRANSTAB register -+ * @memattr: configuration of the MEMATTR register -+ * @transcfg: configuration of the TRANSCFG register (or zero if not present) -+ * -+ * Function emits a timeline message containing address space attributes. -+ */ -+#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as, transtab, memattr, transcfg) \ -+ __TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg) ++ /* nx if requested */ ++ mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0; + -+/** -+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ex -+ * @atom: atom identifier -+ */ -+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(atom) \ -+ __TRACE_IF_ENABLED(tl_event_atom_softstop_ex, atom) ++ if (flags & KBASE_REG_SHARE_BOTH) { ++ /* inner and outer shareable */ ++ mmu_flags |= SHARE_BOTH_BITS; ++ } else if (flags & KBASE_REG_SHARE_IN) { ++ /* inner shareable coherency */ ++ mmu_flags |= SHARE_INNER_BITS; ++ } + -+/** -+ * KBASE_TLSTREAM_TL_EVENT_LPU_softstop -+ * @lpu: name of the LPU object -+ */ -+#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(lpu) \ -+ __TRACE_IF_ENABLED(tl_event_lpu_softstop, lpu) ++ return mmu_flags; ++} + -+/** -+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_issue -+ * @atom: atom identifier -+ */ -+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(atom) \ -+ __TRACE_IF_ENABLED(tl_event_atom_softstop_issue, atom) ++static void entry_set_ate(u64 *entry, phys_addr_t phy, unsigned long flags) ++{ ++ page_table_entry_set(entry, (phy & ~0xFFF) | ++ get_mmu_flags(flags) | ++ ENTRY_ACCESS_BIT | ENTRY_IS_ATE); ++} + -+/** -+ * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - The GPU is being soft reset -+ * @gpu: name of the GPU object ++static void entry_set_pte(u64 *entry, phys_addr_t phy) ++{ ++ page_table_entry_set(entry, (phy & ~0xFFF) | ++ ENTRY_ACCESS_BIT | ENTRY_IS_PTE); ++} ++ ++static void entry_invalidate(u64 *entry) ++{ ++ page_table_entry_set(entry, ENTRY_IS_INVAL); ++} ++ ++static struct kbase_mmu_mode const aarch64_mode = { ++ .update = mmu_update, ++ .get_as_setup = mmu_get_as_setup, ++ .disable_as = mmu_disable_as, ++ .pte_to_phy_addr = pte_to_phy_addr, ++ .ate_is_valid = ate_is_valid, ++ .pte_is_valid = pte_is_valid, ++ .entry_set_ate = entry_set_ate, ++ .entry_set_pte = entry_set_pte, ++ .entry_invalidate = entry_invalidate ++}; ++ ++struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void) ++{ ++ return &aarch64_mode; ++} +diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c +new file mode 100644 +index 000000000..53fbbc73a +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c +@@ -0,0 +1,198 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * This imperative tracepoint is specific to job dumping. -+ * Function emits a timeline message indicating GPU soft reset. + */ -+#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET(gpu) \ -+ __TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu) + + -+/** -+ * KBASE_TLSTREAM_AUX_PM_STATE - timeline message: power management state -+ * @core_type: core type (shader, tiler, l2 cache, l3 cache) -+ * @state: 64bits bitmask reporting power state of the cores (1-ON, 0-OFF) -+ */ -+#define KBASE_TLSTREAM_AUX_PM_STATE(core_type, state) \ -+ __TRACE_IF_ENABLED(aux_pm_state, core_type, state) + -+/** -+ * KBASE_TLSTREAM_AUX_PAGEFAULT - timeline message: MMU page fault event -+ * resulting in new pages being mapped -+ * @ctx_nr: kernel context number -+ * @page_count_change: number of pages to be added -+ */ -+#define KBASE_TLSTREAM_AUX_PAGEFAULT(ctx_nr, page_count_change) \ -+ __TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change) + -+/** -+ * KBASE_TLSTREAM_AUX_PAGESALLOC - timeline message: total number of allocated -+ * pages is changed -+ * @ctx_nr: kernel context number -+ * @page_count: number of pages used by the context -+ */ -+#define KBASE_TLSTREAM_AUX_PAGESALLOC(ctx_nr, page_count) \ -+ __TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count) + -+/** -+ * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET - timeline message: new target DVFS -+ * frequency -+ * @target_freq: new target frequency -+ */ -+#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(target_freq) \ -+ __TRACE_IF_ENABLED(aux_devfreq_target, target_freq) ++#include "mali_kbase_mmu_mode.h" + -+/** -+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START - The GPU has started transitioning -+ * to protected mode -+ * @gpu: name of the GPU object -+ * -+ * Function emits a timeline message indicating the GPU is starting to -+ * transition to protected mode. -+ */ -+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(gpu) \ -+ __TRACE_IF_ENABLED_LATENCY(aux_protected_enter_start, gpu) ++#include "mali_kbase.h" ++#include "mali_midg_regmap.h" + -+/** -+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END - The GPU has finished transitioning -+ * to protected mode -+ * @gpu: name of the GPU object -+ * -+ * Function emits a timeline message indicating the GPU has finished -+ * transitioning to protected mode. -+ */ -+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(gpu) \ -+ __TRACE_IF_ENABLED_LATENCY(aux_protected_enter_end, gpu) ++#define ENTRY_TYPE_MASK 3ULL ++#define ENTRY_IS_ATE 1ULL ++#define ENTRY_IS_INVAL 2ULL ++#define ENTRY_IS_PTE 3ULL + -+/** -+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START - The GPU has started transitioning -+ * to non-protected mode -+ * @gpu: name of the GPU object -+ * -+ * Function emits a timeline message indicating the GPU is starting to -+ * transition to non-protected mode. ++#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */ ++#define ENTRY_RD_BIT (1ULL << 6) ++#define ENTRY_WR_BIT (1ULL << 7) ++#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */ ++#define ENTRY_ACCESS_BIT (1ULL << 10) ++#define ENTRY_NX_BIT (1ULL << 54) ++ ++#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \ ++ ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT) ++ ++/* Helper Function to perform assignment of page table entries, to ++ * ensure the use of strd, which is required on LPAE systems. + */ -+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(gpu) \ -+ __TRACE_IF_ENABLED_LATENCY(aux_protected_leave_start, gpu) ++static inline void page_table_entry_set(u64 *pte, u64 phy) ++{ ++#ifdef CONFIG_64BIT ++ *pte = phy; ++#elif defined(CONFIG_ARM) ++ /* ++ * In order to prevent the compiler keeping cached copies of ++ * memory, we have to explicitly say that we have updated ++ * memory. ++ * ++ * Note: We could manually move the data ourselves into R0 and ++ * R1 by specifying register variables that are explicitly ++ * given registers assignments, the down side of this is that ++ * we have to assume cpu endianness. To avoid this we can use ++ * the ldrd to read the data from memory into R0 and R1 which ++ * will respect the cpu endianness, we then use strd to make ++ * the 64 bit assignment to the page table entry. ++ */ ++ asm volatile("ldrd r0, r1, [%[ptemp]]\n\t" ++ "strd r0, r1, [%[pte]]\n\t" ++ : "=m" (*pte) ++ : [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy) ++ : "r0", "r1"); ++#else ++#error "64-bit atomic write must be implemented for your architecture" ++#endif ++} + -+/** -+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END - The GPU has finished transitioning -+ * to non-protected mode -+ * @gpu: name of the GPU object -+ * -+ * Function emits a timeline message indicating the GPU has finished -+ * transitioning to non-protected mode. ++static void mmu_get_as_setup(struct kbase_context *kctx, ++ struct kbase_mmu_setup * const setup) ++{ ++ /* Set up the required caching policies at the correct indices ++ * in the memattr register. */ ++ setup->memattr = ++ (AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY << ++ (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | ++ (AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL << ++ (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | ++ (AS_MEMATTR_LPAE_WRITE_ALLOC << ++ (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | ++ (AS_MEMATTR_LPAE_OUTER_IMPL_DEF << ++ (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | ++ (AS_MEMATTR_LPAE_OUTER_WA << ++ (AS_MEMATTR_INDEX_OUTER_WA * 8)) | ++ 0; /* The other indices are unused for now */ ++ ++ setup->transtab = ((u64)kctx->pgd & ++ ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) | ++ AS_TRANSTAB_LPAE_ADRMODE_TABLE | ++ AS_TRANSTAB_LPAE_READ_INNER; ++ ++ setup->transcfg = 0; ++} ++ ++static void mmu_update(struct kbase_context *kctx) ++{ ++ struct kbase_device * const kbdev = kctx->kbdev; ++ struct kbase_as * const as = &kbdev->as[kctx->as_nr]; ++ struct kbase_mmu_setup * const current_setup = &as->current_setup; ++ ++ mmu_get_as_setup(kctx, current_setup); ++ ++ /* Apply the address space setting */ ++ kbase_mmu_hw_configure(kbdev, as, kctx); ++} ++ ++static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) ++{ ++ struct kbase_as * const as = &kbdev->as[as_nr]; ++ struct kbase_mmu_setup * const current_setup = &as->current_setup; ++ ++ current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED; ++ ++ /* Apply the address space setting */ ++ kbase_mmu_hw_configure(kbdev, as, NULL); ++} ++ ++static phys_addr_t pte_to_phy_addr(u64 entry) ++{ ++ if (!(entry & 1)) ++ return 0; ++ ++ return entry & ~0xFFF; ++} ++ ++static int ate_is_valid(u64 ate) ++{ ++ return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE); ++} ++ ++static int pte_is_valid(u64 pte) ++{ ++ return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE); ++} ++ ++/* ++ * Map KBASE_REG flags to MMU flags + */ -+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(gpu) \ -+ __TRACE_IF_ENABLED_LATENCY(aux_protected_leave_end, gpu) ++static u64 get_mmu_flags(unsigned long flags) ++{ ++ u64 mmu_flags; + -+#endif /* _KBASE_TLSTREAM_H */ ++ /* store mem_attr index as 4:2 (macro called ensures 3 bits already) */ ++ mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2; + -diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h ++ /* write perm if requested */ ++ mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0; ++ /* read perm if requested */ ++ mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0; ++ /* nx if requested */ ++ mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0; ++ ++ if (flags & KBASE_REG_SHARE_BOTH) { ++ /* inner and outer shareable */ ++ mmu_flags |= SHARE_BOTH_BITS; ++ } else if (flags & KBASE_REG_SHARE_IN) { ++ /* inner shareable coherency */ ++ mmu_flags |= SHARE_INNER_BITS; ++ } ++ ++ return mmu_flags; ++} ++ ++static void entry_set_ate(u64 *entry, phys_addr_t phy, unsigned long flags) ++{ ++ page_table_entry_set(entry, (phy & ~0xFFF) | ++ get_mmu_flags(flags) | ++ ENTRY_IS_ATE); ++} ++ ++static void entry_set_pte(u64 *entry, phys_addr_t phy) ++{ ++ page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE); ++} ++ ++static void entry_invalidate(u64 *entry) ++{ ++ page_table_entry_set(entry, ENTRY_IS_INVAL); ++} ++ ++static struct kbase_mmu_mode const lpae_mode = { ++ .update = mmu_update, ++ .get_as_setup = mmu_get_as_setup, ++ .disable_as = mmu_disable_as, ++ .pte_to_phy_addr = pte_to_phy_addr, ++ .ate_is_valid = ate_is_valid, ++ .pte_is_valid = pte_is_valid, ++ .entry_set_ate = entry_set_ate, ++ .entry_set_pte = entry_set_pte, ++ .entry_invalidate = entry_invalidate ++}; ++ ++struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void) ++{ ++ return &lpae_mode; ++} +diff --git a/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c new file mode 100644 -index 000000000..e2e054420 +index 000000000..1a44957fe --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h -@@ -0,0 +1,264 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c +@@ -0,0 +1,124 @@ +/* + * -+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2014, 2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -396901,262 +397964,122 @@ index 000000000..e2e054420 + + + ++#ifdef CONFIG_MALI_PLATFORM_FAKE + ++#include ++#include ++#include ++#include ++#include + -+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** -+ * ***** DO NOT INCLUDE DIRECTLY ***** -+ * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ + +/* -+ * The purpose of this header file is just to contain a list of trace code idenitifers -+ * -+ * Each identifier is wrapped in a macro, so that its string form and enum form can be created -+ * -+ * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block. -+ * -+ * This allows automatic creation of an enum and a corresponding array of strings -+ * -+ * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE. -+ * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE. -+ * -+ * e.g.: -+ * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X -+ * typedef enum -+ * { -+ * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X ) -+ * #include "mali_kbase_trace_defs.h" -+ * #undef KBASE_TRACE_CODE_MAKE_CODE -+ * } kbase_trace_code; -+ * -+ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE -+ * -+ * -+ * The use of the macro here is: -+ * - KBASE_TRACE_CODE_MAKE_CODE( X ) -+ * -+ * Which produces: -+ * - For an enum, KBASE_TRACE_CODE_X -+ * - For a string, "X" -+ * -+ * -+ * For example: -+ * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to: -+ * - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum -+ * - "JM_JOB_COMPLETE" for the string -+ * - To use it to trace an event, do: -+ * - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val ); ++ * This file is included only for type definitions and functions belonging to ++ * specific platform folders. Do not add dependencies with symbols that are ++ * defined somewhere else. + */ ++#include + -+#if 0 /* Dummy section to avoid breaking formatting */ -+int dummy_array[] = { -+#endif ++#define PLATFORM_CONFIG_RESOURCE_COUNT 4 ++#define PLATFORM_CONFIG_IRQ_RES_COUNT 3 + -+/* -+ * Core events -+ */ -+ /* no info_val, no gpu_addr, no atom */ -+ KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY), -+ /* no info_val, no gpu_addr, no atom */ -+ KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM), -+ /* info_val == GPU_IRQ_STATUS register */ -+ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ), -+ /* info_val == bits cleared */ -+ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR), -+ /* info_val == GPU_IRQ_STATUS register */ -+ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE), -+ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET), -+ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET), -+ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR), -+ /* GPU addr==dump address */ -+ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE), -+ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES), -+/* -+ * Job Slot management events -+ */ -+ /* info_val==irq rawstat at start */ -+ KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ), -+ /* info_val==jobs processed */ -+ KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END), -+/* In the following: ++static struct platform_device *mali_device; ++ ++#ifndef CONFIG_OF ++/** ++ * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources + * -+ * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases) -+ * - uatom==kernel-side mapped uatom address (for correlation with user-side) -+ */ -+ /* info_val==exit code; gpu_addr==chain gpuaddr */ -+ KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE), -+ /* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of affinity */ -+ KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT), -+ /* gpu_addr is as follows: -+ * - If JS_STATUS active after soft-stop, val==gpu addr written to -+ * JS_HEAD on submit -+ * - otherwise gpu_addr==0 */ -+ KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP), -+ KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0), -+ KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1), -+ /* gpu_addr==JS_HEAD read */ -+ KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP), -+ /* gpu_addr==JS_HEAD read */ -+ KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0), -+ /* gpu_addr==JS_HEAD read */ -+ KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1), -+ /* gpu_addr==JS_TAIL read */ -+ KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD), -+/* gpu_addr is as follows: -+ * - If JS_STATUS active before soft-stop, val==JS_HEAD -+ * - otherwise gpu_addr==0 -+ */ -+ /* gpu_addr==JS_HEAD read */ -+ KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD), -+ KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS), -+ KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE), -+ /* info_val == is_scheduled */ -+ KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED), -+ /* info_val == is_scheduled */ -+ KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED), -+ KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE), -+ /* info_val == nr jobs submitted */ -+ KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP), -+ /* gpu_addr==JS_HEAD_NEXT last written */ -+ KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT), -+ KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET), -+ KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER), -+ KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER), -+/* -+ * Job dispatch events -+ */ -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_TRACE_CODE_MAKE_CODE(JD_DONE), -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER), -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END), -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB), -+ /* gpu_addr==0, info_val==0, uatom==0 */ -+ KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT), -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL), -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER), -+/* -+ * Scheduler Core events -+ */ -+ KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK), -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB), -+ /* gpu_addr==last value written/would be written to JS_HEAD */ -+ KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB), -+ KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX), -+ KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX), -+ KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX), -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB), -+ /* gpu_addr==value to write into JS_HEAD */ -+ KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED), -+ /* kctx is the one being evicted, info_val == kctx to put in */ -+ KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX), -+ KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED), -+ /* info_val == lower 32 bits of affinity */ -+ KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT), -+ /* info_val == lower 32 bits of affinity */ -+ KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED), -+ /* info_val == lower 32 bits of affinity */ -+ KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED), -+ /* info_val == lower 32 bits of rechecked affinity */ -+ KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED), -+ /* info_val == lower 32 bits of rechecked affinity */ -+ KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED), -+ /* info_val == lower 32 bits of affinity */ -+ KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE), -+ /* info_val == the ctx attribute now on ctx */ -+ KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX), -+ /* info_val == the ctx attribute now on runpool */ -+ KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL), -+ /* info_val == the ctx attribute now off ctx */ -+ KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX), -+ /* info_val == the ctx attribute now off runpool */ -+ KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL), -+/* -+ * Scheduler Policy events -+ */ -+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX), -+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX), -+ /* info_val == whether it was evicted */ -+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX), -+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS), -+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX), -+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX), -+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX), -+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX), -+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB), -+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ), -+ /* gpu_addr==JS_HEAD to write if the job were run */ -+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB), -+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START), -+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END), -+/* -+ * Power Management Events ++ * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function ++ * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT. ++ * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ. ++ * ++ * @param[in] io_resource Input IO resource data ++ * @param[out] linux_resources Pointer to output array of Linux resource structures + */ -+ KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER), -+ /* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */ -+ KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_INUSE), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_INUSE), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_NEEDED), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_NEEDED), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_INUSE), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_INUSE), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_SHADER_NEEDED), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_TILER_NEEDED), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF), -+ /* info_val == policy number, or -1 for "Already changing" */ -+ KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY), -+ KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY), -+ /* info_val == policy number */ -+ KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT), -+ /* info_val == policy number */ -+ KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM), -+/* Unused code just to make it easier to not have a comma at the end. -+ * All other codes MUST come before this */ -+ KBASE_TRACE_CODE_MAKE_CODE(DUMMY) ++static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources) ++{ ++ if (!io_resources || !linux_resources) { ++ pr_err("%s: couldn't find proper resources\n", __func__); ++ return; ++ } + -+#if 0 /* Dummy section to avoid breaking formatting */ -+}; ++ memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource)); ++ ++ linux_resources[0].start = io_resources->io_memory_region.start; ++ linux_resources[0].end = io_resources->io_memory_region.end; ++ linux_resources[0].flags = IORESOURCE_MEM; ++ ++ linux_resources[1].start = io_resources->job_irq_number; ++ linux_resources[1].end = io_resources->job_irq_number; ++ linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; ++ ++ linux_resources[2].start = io_resources->mmu_irq_number; ++ linux_resources[2].end = io_resources->mmu_irq_number; ++ linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; ++ ++ linux_resources[3].start = io_resources->gpu_irq_number; ++ linux_resources[3].end = io_resources->gpu_irq_number; ++ linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; ++} ++#endif /* CONFIG_OF */ ++ ++int kbase_platform_fake_register(void) ++{ ++ struct kbase_platform_config *config; ++#ifndef CONFIG_OF ++ struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT]; +#endif ++ int err; + -+/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c ++ config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */ ++ if (config == NULL) { ++ pr_err("%s: couldn't get platform config\n", __func__); ++ return -ENODEV; ++ } ++ ++ mali_device = platform_device_alloc("mali", 0); ++ if (mali_device == NULL) ++ return -ENOMEM; ++ ++#ifndef CONFIG_OF ++ kbasep_config_parse_io_resources(config->io_resources, resources); ++ err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT); ++ if (err) { ++ platform_device_put(mali_device); ++ mali_device = NULL; ++ return err; ++ } ++#endif /* CONFIG_OF */ ++ ++ err = platform_device_add(mali_device); ++ if (err) { ++ platform_device_unregister(mali_device); ++ mali_device = NULL; ++ return err; ++ } ++ ++ return 0; ++} ++EXPORT_SYMBOL(kbase_platform_fake_register); ++ ++void kbase_platform_fake_unregister(void) ++{ ++ if (mali_device) ++ platform_device_unregister(mali_device); ++} ++EXPORT_SYMBOL(kbase_platform_fake_unregister); ++ ++#endif /* CONFIG_MALI_PLATFORM_FAKE */ ++ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.c b/drivers/gpu/arm/midgard/mali_kbase_pm.c new file mode 100644 -index 000000000..5830e87f0 +index 000000000..97d543464 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c -@@ -0,0 +1,236 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_pm.c +@@ -0,0 +1,205 @@ +/* + * -+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -397173,232 +398096,201 @@ index 000000000..5830e87f0 + + + -+#include -+#include -+#include -+ -+#define CREATE_TRACE_POINTS -+ -+#ifdef CONFIG_MALI_TRACE_TIMELINE -+#include "mali_timeline.h" -+ -+#include -+#include -+ -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atoms_in_flight); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atom); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_active); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_action); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_power_active); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_l2_power_active); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_event); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_slot_atom); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_checktrans); -+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_context_active); -+ -+struct kbase_trace_timeline_desc { -+ char *enum_str; -+ char *desc; -+ char *format; -+ char *format_desc; -+}; ++/** ++ * @file mali_kbase_pm.c ++ * Base kernel power management APIs ++ */ + -+static struct kbase_trace_timeline_desc kbase_trace_timeline_desc_table[] = { -+ #define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) { #enum_val, desc, format, format_desc } -+ #include "mali_kbase_trace_timeline_defs.h" -+ #undef KBASE_TIMELINE_TRACE_CODE -+}; ++#include ++#include ++#include + -+#define KBASE_NR_TRACE_CODES ARRAY_SIZE(kbase_trace_timeline_desc_table) ++#include + -+static void *kbasep_trace_timeline_seq_start(struct seq_file *s, loff_t *pos) ++int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags) +{ -+ if (*pos >= KBASE_NR_TRACE_CODES) -+ return NULL; -+ -+ return &kbase_trace_timeline_desc_table[*pos]; ++ return kbase_hwaccess_pm_powerup(kbdev, flags); +} + -+static void kbasep_trace_timeline_seq_stop(struct seq_file *s, void *data) ++void kbase_pm_halt(struct kbase_device *kbdev) +{ ++ kbase_hwaccess_pm_halt(kbdev); +} + -+static void *kbasep_trace_timeline_seq_next(struct seq_file *s, void *data, loff_t *pos) ++void kbase_pm_context_active(struct kbase_device *kbdev) +{ -+ (*pos)++; -+ -+ if (*pos == KBASE_NR_TRACE_CODES) -+ return NULL; -+ -+ return &kbase_trace_timeline_desc_table[*pos]; ++ (void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); +} + -+static int kbasep_trace_timeline_seq_show(struct seq_file *s, void *data) ++int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler) +{ -+ struct kbase_trace_timeline_desc *trace_desc = data; ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ int c; ++ int old_count; + -+ seq_printf(s, "%s#%s#%s#%s\n", trace_desc->enum_str, trace_desc->desc, trace_desc->format, trace_desc->format_desc); -+ return 0; -+} ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + ++ /* Trace timeline information about how long it took to handle the decision ++ * to powerup. Sometimes the event might be missed due to reading the count ++ * outside of mutex, but this is necessary to get the trace timing ++ * correct. */ ++ old_count = kbdev->pm.active_count; ++ if (old_count == 0) ++ kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE); + -+static const struct seq_operations kbasep_trace_timeline_seq_ops = { -+ .start = kbasep_trace_timeline_seq_start, -+ .next = kbasep_trace_timeline_seq_next, -+ .stop = kbasep_trace_timeline_seq_stop, -+ .show = kbasep_trace_timeline_seq_show, -+}; ++ mutex_lock(&js_devdata->runpool_mutex); ++ mutex_lock(&kbdev->pm.lock); ++ if (kbase_pm_is_suspending(kbdev)) { ++ switch (suspend_handler) { ++ case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE: ++ if (kbdev->pm.active_count != 0) ++ break; ++ /* FALLTHROUGH */ ++ case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE: ++ mutex_unlock(&kbdev->pm.lock); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ if (old_count == 0) ++ kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE); ++ return 1; + -+static int kbasep_trace_timeline_debugfs_open(struct inode *inode, struct file *file) -+{ -+ return seq_open(file, &kbasep_trace_timeline_seq_ops); -+} ++ case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE: ++ /* FALLTHROUGH */ ++ default: ++ KBASE_DEBUG_ASSERT_MSG(false, "unreachable"); ++ break; ++ } ++ } ++ c = ++kbdev->pm.active_count; ++ KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c); ++ KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c); + -+static const struct file_operations kbasep_trace_timeline_debugfs_fops = { -+ .open = kbasep_trace_timeline_debugfs_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = seq_release, -+}; ++ /* Trace the event being handled */ ++ if (old_count == 0) ++ kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE); + -+#ifdef CONFIG_DEBUG_FS ++ if (c == 1) ++ /* First context active: Power on the GPU and any cores requested by ++ * the policy */ ++ kbase_hwaccess_pm_gpu_active(kbdev); + -+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev) -+{ -+ debugfs_create_file("mali_timeline_defs", -+ S_IRUGO, kbdev->mali_debugfs_directory, NULL, -+ &kbasep_trace_timeline_debugfs_fops); ++ mutex_unlock(&kbdev->pm.lock); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ ++ return 0; +} + -+#endif /* CONFIG_DEBUG_FS */ ++KBASE_EXPORT_TEST_API(kbase_pm_context_active); + -+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, -+ struct kbase_jd_atom *katom, int js) ++void kbase_pm_context_idle(struct kbase_device *kbdev) +{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ int c; ++ int old_count; + -+ if (kbdev->timeline.slot_atoms_submitted[js] > 0) { -+ KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1); -+ } else { -+ base_atom_id atom_number = kbase_jd_atom_id(kctx, katom); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + -+ KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 1); -+ KBASE_TIMELINE_JOB_START(kctx, js, atom_number); -+ } -+ ++kbdev->timeline.slot_atoms_submitted[js]; ++ /* Trace timeline information about how long it took to handle the decision ++ * to powerdown. Sometimes the event might be missed due to reading the ++ * count outside of mutex, but this is necessary to get the trace timing ++ * correct. */ ++ old_count = kbdev->pm.active_count; ++ if (old_count == 0) ++ kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE); + -+ KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]); -+} ++ mutex_lock(&js_devdata->runpool_mutex); ++ mutex_lock(&kbdev->pm.lock); + -+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx, -+ struct kbase_jd_atom *katom, int js, -+ kbasep_js_atom_done_code done_code) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); ++ c = --kbdev->pm.active_count; ++ KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c); ++ KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c); + -+ if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) { -+ KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0); -+ } else { -+ /* Job finished in JS_HEAD */ -+ base_atom_id atom_number = kbase_jd_atom_id(kctx, katom); ++ KBASE_DEBUG_ASSERT(c >= 0); + -+ KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 0); -+ KBASE_TIMELINE_JOB_STOP(kctx, js, atom_number); ++ /* Trace the event being handled */ ++ if (old_count == 0) ++ kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE); + -+ /* see if we need to trace the job in JS_NEXT moving to JS_HEAD */ -+ if (kbase_backend_nr_atoms_submitted(kbdev, js)) { -+ struct kbase_jd_atom *next_katom; -+ struct kbase_context *next_kctx; ++ if (c == 0) { ++ /* Last context has gone idle */ ++ kbase_hwaccess_pm_gpu_idle(kbdev); + -+ /* Peek the next atom - note that the atom in JS_HEAD will already -+ * have been dequeued */ -+ next_katom = kbase_backend_inspect_head(kbdev, js); -+ WARN_ON(!next_katom); -+ next_kctx = next_katom->kctx; -+ KBASE_TIMELINE_JOB_START_NEXT(next_kctx, js, 0); -+ KBASE_TIMELINE_JOB_START_HEAD(next_kctx, js, 1); -+ KBASE_TIMELINE_JOB_START(next_kctx, js, kbase_jd_atom_id(next_kctx, next_katom)); -+ } ++ /* Wake up anyone waiting for this to become 0 (e.g. suspend). The ++ * waiters must synchronize with us by locking the pm.lock after ++ * waiting */ ++ wake_up(&kbdev->pm.zero_active_count_wait); + } + -+ --kbdev->timeline.slot_atoms_submitted[js]; -+ -+ KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]); ++ mutex_unlock(&kbdev->pm.lock); ++ mutex_unlock(&js_devdata->runpool_mutex); +} + -+void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent) ++KBASE_EXPORT_TEST_API(kbase_pm_context_idle); ++ ++void kbase_pm_suspend(struct kbase_device *kbdev) +{ -+ int uid = 0; -+ int old_uid; ++ KBASE_DEBUG_ASSERT(kbdev); + -+ /* If a producer already exists for the event, try to use their UID (multiple-producers) */ -+ uid = atomic_read(&kbdev->timeline.pm_event_uid[event_sent]); -+ old_uid = uid; ++ /* Suspend vinstr. ++ * This call will block until vinstr is suspended. */ ++ kbase_vinstr_suspend(kbdev->vinstr_ctx); + -+ /* Get a new non-zero UID if we don't have one yet */ -+ while (!uid) -+ uid = atomic_inc_return(&kbdev->timeline.pm_event_uid_counter); ++ mutex_lock(&kbdev->pm.lock); ++ KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); ++ kbdev->pm.suspending = true; ++ mutex_unlock(&kbdev->pm.lock); + -+ /* Try to use this UID */ -+ if (old_uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event_sent], old_uid, uid)) -+ /* If it changed, raced with another producer: we've lost this UID */ -+ uid = 0; ++ /* From now on, the active count will drop towards zero. Sometimes, it'll ++ * go up briefly before going down again. However, once it reaches zero it ++ * will stay there - guaranteeing that we've idled all pm references */ + -+ KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_sent, uid); -+} ++ /* Suspend job scheduler and associated components, so that it releases all ++ * the PM active count references */ ++ kbasep_js_suspend(kbdev); + -+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event) -+{ -+ int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]); ++ /* Wait for the active count to reach zero. This is not the same as ++ * waiting for a power down, since not all policies power down when this ++ * reaches zero. */ ++ wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0); + -+ if (uid != 0) { -+ if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0)) -+ /* If it changed, raced with another consumer: we've lost this UID */ -+ uid = 0; ++ /* NOTE: We synchronize with anything that was just finishing a ++ * kbase_pm_context_idle() call by locking the pm.lock below */ + -+ KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid); -+ } ++ kbase_hwaccess_pm_suspend(kbdev); +} + -+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event) ++void kbase_pm_resume(struct kbase_device *kbdev) +{ -+ int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]); ++ /* MUST happen before any pm_context_active calls occur */ ++ kbase_hwaccess_pm_resume(kbdev); + -+ if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0)) -+ /* If it changed, raced with another consumer: we've lost this UID */ -+ uid = 0; ++ /* Initial active call, to power on the GPU/cores if needed */ ++ kbase_pm_context_active(kbdev); + -+ KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid); -+} ++ /* Resume any blocked atoms (which may cause contexts to be scheduled in ++ * and dependent atoms to run) */ ++ kbase_resume_suspended_soft_jobs(kbdev); + -+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ /* Simply log the start of the transition */ -+ kbdev->timeline.l2_transitioning = true; -+ KBASE_TIMELINE_POWERING_L2(kbdev); -+} ++ /* Resume the Job Scheduler and associated components, and start running ++ * atoms */ ++ kbasep_js_resume(kbdev); + -+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+ /* Simply log the end of the transition */ -+ if (kbdev->timeline.l2_transitioning) { -+ kbdev->timeline.l2_transitioning = false; -+ KBASE_TIMELINE_POWERED_L2(kbdev); -+ } ++ /* Matching idle call, to power off the GPU/cores if we didn't actually ++ * need it and the policy doesn't want it on */ ++ kbase_pm_context_idle(kbdev); ++ ++ /* Resume vinstr operation */ ++ kbase_vinstr_resume(kbdev->vinstr_ctx); +} + -+#endif /* CONFIG_MALI_TRACE_TIMELINE */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h +diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.h b/drivers/gpu/arm/midgard/mali_kbase_pm.h new file mode 100644 -index 000000000..a04f7c142 +index 000000000..37fa2479d --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h -@@ -0,0 +1,363 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_pm.h +@@ -0,0 +1,171 @@ +/* + * -+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -397415,359 +398307,349 @@ index 000000000..a04f7c142 + + + -+#if !defined(_KBASE_TRACE_TIMELINE_H) -+#define _KBASE_TRACE_TIMELINE_H -+ -+#ifdef CONFIG_MALI_TRACE_TIMELINE -+ -+enum kbase_trace_timeline_code { -+ #define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) enum_val -+ #include "mali_kbase_trace_timeline_defs.h" -+ #undef KBASE_TIMELINE_TRACE_CODE -+}; -+ -+#ifdef CONFIG_DEBUG_FS -+ -+/** Initialize Timeline DebugFS entries */ -+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev); -+ -+#else /* CONFIG_DEBUG_FS */ -+ -+#define kbasep_trace_timeline_debugfs_init CSTD_NOP ++/** ++ * @file mali_kbase_pm.h ++ * Power management API definitions ++ */ + -+#endif /* CONFIG_DEBUG_FS */ ++#ifndef _KBASE_PM_H_ ++#define _KBASE_PM_H_ + -+/* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE -+ * functions. -+ * Output is timestamped by either sched_clock() (default), local_clock(), or -+ * cpu_clock(), depending on /sys/kernel/debug/tracing/trace_clock */ -+#include "mali_timeline.h" ++#include "mali_kbase_hwaccess_pm.h" + -+/* Trace number of atoms in flight for kctx (atoms either not completed, or in -+ process of being returned to user */ -+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) \ -+ do { \ -+ struct timespec64 ts; \ -+ ktime_get_raw_ts64(&ts); \ -+ trace_mali_timeline_atoms_in_flight(ts.tv_sec, ts.tv_nsec, \ -+ (int)kctx->timeline.owner_tgid, \ -+ count); \ -+ } while (0) ++#define PM_ENABLE_IRQS 0x01 ++#define PM_HW_ISSUES_DETECT 0x02 + -+/* Trace atom_id being Ready to Run */ -+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) \ -+ do { \ -+ struct timespec64 ts; \ -+ ktime_get_raw_ts64(&ts); \ -+ trace_mali_timeline_atom(ts.tv_sec, ts.tv_nsec, \ -+ CTX_FLOW_ATOM_READY, \ -+ (int)kctx->timeline.owner_tgid, \ -+ atom_id); \ -+ } while (0) + -+/* Trace number of atoms submitted to job slot js ++/** Initialize the power management framework. + * -+ * NOTE: This uses a different tracepoint to the head/next/soft-stop actions, -+ * so that those actions can be filtered out separately from this ++ * Must be called before any other power management function + * -+ * This is because this is more useful, as we can use it to calculate general -+ * utilization easily and accurately */ -+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) \ -+ do { \ -+ struct timespec64 ts; \ -+ ktime_get_raw_ts64(&ts); \ -+ trace_mali_timeline_gpu_slot_active(ts.tv_sec, ts.tv_nsec, \ -+ SW_SET_GPU_SLOT_ACTIVE, \ -+ (int)kctx->timeline.owner_tgid, \ -+ js, count); \ -+ } while (0) -+ -+ -+/* Trace atoms present in JS_NEXT */ -+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) \ -+ do { \ -+ struct timespec64 ts; \ -+ ktime_get_raw_ts64(&ts); \ -+ trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \ -+ SW_SET_GPU_SLOT_NEXT, \ -+ (int)kctx->timeline.owner_tgid, \ -+ js, count); \ -+ } while (0) -+ -+/* Trace atoms present in JS_HEAD */ -+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) \ -+ do { \ -+ struct timespec64 ts; \ -+ ktime_get_raw_ts64(&ts); \ -+ trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \ -+ SW_SET_GPU_SLOT_HEAD, \ -+ (int)kctx->timeline.owner_tgid, \ -+ js, count); \ -+ } while (0) -+ -+/* Trace that a soft stop/evict from next is being attempted on a slot */ -+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) \ -+ do { \ -+ struct timespec64 ts; \ -+ ktime_get_raw_ts64(&ts); \ -+ trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \ -+ SW_SET_GPU_SLOT_STOPPING, \ -+ (kctx) ? (int)kctx->timeline.owner_tgid : 0, \ -+ js, count); \ -+ } while (0) -+ -+ -+ -+/* Trace state of overall GPU power */ -+#define KBASE_TIMELINE_GPU_POWER(kbdev, active) \ -+ do { \ -+ struct timespec64 ts; \ -+ ktime_get_raw_ts64(&ts); \ -+ trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ -+ SW_SET_GPU_POWER_ACTIVE, active); \ -+ } while (0) -+ -+/* Trace state of tiler power */ -+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) \ -+ do { \ -+ struct timespec64 ts; \ -+ ktime_get_raw_ts64(&ts); \ -+ trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ -+ SW_SET_GPU_POWER_TILER_ACTIVE, \ -+ hweight64(bitmap)); \ -+ } while (0) -+ -+/* Trace number of shaders currently powered */ -+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) \ -+ do { \ -+ struct timespec64 ts; \ -+ ktime_get_raw_ts64(&ts); \ -+ trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ -+ SW_SET_GPU_POWER_SHADER_ACTIVE, \ -+ hweight64(bitmap)); \ -+ } while (0) -+ -+/* Trace state of L2 power */ -+#define KBASE_TIMELINE_POWER_L2(kbdev, bitmap) \ -+ do { \ -+ struct timespec64 ts; \ -+ ktime_get_raw_ts64(&ts); \ -+ trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ -+ SW_SET_GPU_POWER_L2_ACTIVE, \ -+ hweight64(bitmap)); \ -+ } while (0) -+ -+/* Trace state of L2 cache*/ -+#define KBASE_TIMELINE_POWERING_L2(kbdev) \ -+ do { \ -+ struct timespec64 ts; \ -+ ktime_get_raw_ts64(&ts); \ -+ trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec, \ -+ SW_FLOW_GPU_POWER_L2_POWERING, \ -+ 1); \ -+ } while (0) ++ * @param kbdev The kbase device structure for the device (must be a valid pointer) ++ * ++ * @return 0 if the power management framework was successfully initialized. ++ */ ++int kbase_pm_init(struct kbase_device *kbdev); + -+#define KBASE_TIMELINE_POWERED_L2(kbdev) \ -+ do { \ -+ struct timespec64 ts; \ -+ ktime_get_raw_ts64(&ts); \ -+ trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec, \ -+ SW_FLOW_GPU_POWER_L2_ACTIVE, \ -+ 1); \ -+ } while (0) ++/** Power up GPU after all modules have been initialized and interrupt handlers installed. ++ * ++ * @param kbdev The kbase device structure for the device (must be a valid pointer) ++ * ++ * @param flags Flags to pass on to kbase_pm_init_hw ++ * ++ * @return 0 if powerup was successful. ++ */ ++int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags); + -+/* Trace kbase_pm_send_event message send */ -+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) \ -+ do { \ -+ struct timespec64 ts; \ -+ ktime_get_raw_ts64(&ts); \ -+ trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec, \ -+ SW_FLOW_PM_SEND_EVENT, \ -+ event_type, pm_event_id); \ -+ } while (0) ++/** ++ * Halt the power management framework. ++ * Should ensure that no new interrupts are generated, ++ * but allow any currently running interrupt handlers to complete successfully. ++ * The GPU is forced off by the time this function returns, regardless of ++ * whether or not the active power policy asks for the GPU to be powered off. ++ * ++ * @param kbdev The kbase device structure for the device (must be a valid pointer) ++ */ ++void kbase_pm_halt(struct kbase_device *kbdev); + -+/* Trace kbase_pm_worker message receive */ -+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) \ -+ do { \ -+ struct timespec64 ts; \ -+ ktime_get_raw_ts64(&ts); \ -+ trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec, \ -+ SW_FLOW_PM_HANDLE_EVENT, \ -+ event_type, pm_event_id); \ -+ } while (0) ++/** Terminate the power management framework. ++ * ++ * No power management functions may be called after this ++ * (except @ref kbase_pm_init) ++ * ++ * @param kbdev The kbase device structure for the device (must be a valid pointer) ++ */ ++void kbase_pm_term(struct kbase_device *kbdev); + ++/** Increment the count of active contexts. ++ * ++ * This function should be called when a context is about to submit a job. It informs the active power policy that the ++ * GPU is going to be in use shortly and the policy is expected to start turning on the GPU. ++ * ++ * This function will block until the GPU is available. ++ * ++ * This function ASSERTS if a suspend is occuring/has occurred whilst this is ++ * in use. Use kbase_pm_contect_active_unless_suspending() instead. ++ * ++ * @note a Suspend is only visible to Kernel threads; user-space threads in a ++ * syscall cannot witness a suspend, because they are frozen before the suspend ++ * begins. ++ * ++ * @param kbdev The kbase device structure for the device (must be a valid pointer) ++ */ ++void kbase_pm_context_active(struct kbase_device *kbdev); + -+/* Trace atom_id starting in JS_HEAD */ -+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) \ -+ do { \ -+ struct timespec64 ts; \ -+ ktime_get_raw_ts64(&ts); \ -+ trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec, \ -+ HW_START_GPU_JOB_CHAIN_SW_APPROX, \ -+ (int)kctx->timeline.owner_tgid, \ -+ js, _consumerof_atom_number); \ -+ } while (0) + -+/* Trace atom_id stopping on JS_HEAD */ -+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) \ -+ do { \ -+ struct timespec64 ts; \ -+ ktime_get_raw_ts64(&ts); \ -+ trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec, \ -+ HW_STOP_GPU_JOB_CHAIN_SW_APPROX, \ -+ (int)kctx->timeline.owner_tgid, \ -+ js, _producerof_atom_number_completed); \ -+ } while (0) ++/** Handler codes for doing kbase_pm_context_active_handle_suspend() */ ++enum kbase_pm_suspend_handler { ++ /** A suspend is not expected/not possible - this is the same as ++ * kbase_pm_context_active() */ ++ KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE, ++ /** If we're suspending, fail and don't increase the active count */ ++ KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE, ++ /** If we're suspending, succeed and allow the active count to increase iff ++ * it didn't go from 0->1 (i.e., we didn't re-activate the GPU). ++ * ++ * This should only be used when there is a bounded time on the activation ++ * (e.g. guarantee it's going to be idled very soon after) */ ++ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE ++}; + -+/** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a -+ * certin caller */ -+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) \ -+ do { \ -+ struct timespec64 ts; \ -+ ktime_get_raw_ts64(&ts); \ -+ trace_mali_timeline_pm_checktrans(ts.tv_sec, ts.tv_nsec, \ -+ trace_code, 1); \ -+ } while (0) ++/** Suspend 'safe' variant of kbase_pm_context_active() ++ * ++ * If a suspend is in progress, this allows for various different ways of ++ * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details. ++ * ++ * We returns a status code indicating whether we're allowed to keep the GPU ++ * active during the suspend, depending on the handler code. If the status code ++ * indicates a failure, the caller must abort whatever operation it was ++ * attempting, and potentially queue it up for after the OS has resumed. ++ * ++ * @param kbdev The kbase device structure for the device (must be a valid pointer) ++ * @param suspend_handler The handler code for how to handle a suspend that might occur ++ * @return zero Indicates success ++ * @return non-zero Indicates failure due to the system being suspending/suspended. ++ */ ++int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler); + -+/* Trace number of contexts active */ -+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) \ -+ do { \ -+ struct timespec64 ts; \ -+ ktime_get_raw_ts64(&ts); \ -+ trace_mali_timeline_context_active(ts.tv_sec, ts.tv_nsec, \ -+ count); \ -+ } while (0) ++/** Decrement the reference count of active contexts. ++ * ++ * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power ++ * policy so the calling code should ensure that it does not access the GPU's registers. ++ * ++ * @param kbdev The kbase device structure for the device (must be a valid pointer) ++ */ ++void kbase_pm_context_idle(struct kbase_device *kbdev); + -+/* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */ ++/** ++ * Suspend the GPU and prevent any further register accesses to it from Kernel ++ * threads. ++ * ++ * This is called in response to an OS suspend event, and calls into the various ++ * kbase components to complete the suspend. ++ * ++ * @note the mechanisms used here rely on all user-space threads being frozen ++ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up ++ * the GPU e.g. via atom submission. ++ * ++ * @param kbdev The kbase device structure for the device (must be a valid pointer) ++ */ ++void kbase_pm_suspend(struct kbase_device *kbdev); + +/** -+ * Trace that an atom is starting on a job slot ++ * Resume the GPU, allow register accesses to it, and resume running atoms on ++ * the GPU. + * -+ * The caller must be holding hwaccess_lock ++ * This is called in response to an OS resume event, and calls into the various ++ * kbase components to complete the resume. ++ * ++ * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ -+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, -+ struct kbase_jd_atom *katom, int js); ++void kbase_pm_resume(struct kbase_device *kbdev); + +/** -+ * Trace that an atom has done on a job slot ++ * kbase_pm_vsync_callback - vsync callback + * -+ * 'Done' in this sense can occur either because: -+ * - the atom in JS_HEAD finished -+ * - the atom in JS_NEXT was evicted ++ * @buffer_updated: 1 if a new frame was displayed, 0 otherwise ++ * @data: Pointer to the kbase device as returned by kbase_find_device() + * -+ * Whether the atom finished or was evicted is passed in @a done_code ++ * Callback function used to notify the power management code that a vsync has ++ * occurred on the display. ++ */ ++void kbase_pm_vsync_callback(int buffer_updated, void *data); ++ ++#endif /* _KBASE_PM_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h b/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h +new file mode 100644 +index 000000000..7fb674ede +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h +@@ -0,0 +1,40 @@ ++/* + * -+ * It is assumed that the atom has already been removed from the submit slot, -+ * with either: -+ * - kbasep_jm_dequeue_submit_slot() -+ * - kbasep_jm_dequeue_tail_submit_slot() ++ * (C) COPYRIGHT 2010, 2013 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * The caller must be holding hwaccess_lock + */ -+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx, -+ struct kbase_jd_atom *katom, int js, -+ kbasep_js_atom_done_code done_code); + + -+/** Trace a pm event starting */ -+void kbase_timeline_pm_send_event(struct kbase_device *kbdev, -+ enum kbase_timeline_pm_event event_sent); + -+/** Trace a pm event finishing */ -+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event); ++/** ++ * @file mali_kbase_profiling_gator_api.h ++ * Model interface ++ */ + -+/** Check whether a pm event was present, and if so trace finishing it */ -+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event); ++#ifndef _KBASE_PROFILING_GATOR_API_H_ ++#define _KBASE_PROFILING_GATOR_API_H_ + -+/** Trace L2 power-up start */ -+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev); ++/* ++ * List of possible actions to be controlled by Streamline. ++ * The following numbers are used by gator to control ++ * the frame buffer dumping and s/w counter reporting. ++ */ ++#define FBDUMP_CONTROL_ENABLE (1) ++#define FBDUMP_CONTROL_RATE (2) ++#define SW_COUNTER_ENABLE (3) ++#define FBDUMP_CONTROL_RESIZE_FACTOR (4) ++#define FBDUMP_CONTROL_MAX (5) ++#define FBDUMP_CONTROL_MIN FBDUMP_CONTROL_ENABLE + -+/** Trace L2 power-up done */ -+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev); ++void _mali_profiling_control(u32 action, u32 value); + -+#else ++#endif /* _KBASE_PROFILING_GATOR_API */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c +new file mode 100644 +index 000000000..c97065006 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c +@@ -0,0 +1,130 @@ ++/* ++ * ++ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) CSTD_NOP() + -+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) CSTD_NOP() + -+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) CSTD_NOP() ++#include "mali_kbase.h" + -+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) CSTD_NOP() ++#include "mali_kbase_regs_history_debugfs.h" + -+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) CSTD_NOP() ++#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) + -+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) CSTD_NOP() ++#include + -+#define KBASE_TIMELINE_GPU_POWER(kbdev, active) CSTD_NOP() + -+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) CSTD_NOP() ++static int regs_history_size_get(void *data, u64 *val) ++{ ++ struct kbase_io_history *const h = data; + -+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) CSTD_NOP() ++ *val = h->size; + -+#define KBASE_TIMELINE_POWER_L2(kbdev, active) CSTD_NOP() ++ return 0; ++} + -+#define KBASE_TIMELINE_POWERING_L2(kbdev) CSTD_NOP() ++static int regs_history_size_set(void *data, u64 val) ++{ ++ struct kbase_io_history *const h = data; + -+#define KBASE_TIMELINE_POWERED_L2(kbdev) CSTD_NOP() ++ return kbase_io_history_resize(h, (u16)val); ++} + -+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP() + -+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP() ++DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops, ++ regs_history_size_get, ++ regs_history_size_set, ++ "%llu\n"); + -+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) CSTD_NOP() + -+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) CSTD_NOP() ++/** ++ * regs_history_show - show callback for the register access history file. ++ * ++ * @sfile: The debugfs entry ++ * @data: Data associated with the entry ++ * ++ * This function is called to dump all recent accesses to the GPU registers. ++ * ++ * @return 0 if successfully prints data in debugfs entry file, failure ++ * otherwise ++ */ ++static int regs_history_show(struct seq_file *sfile, void *data) ++{ ++ struct kbase_io_history *const h = sfile->private; ++ u16 i; ++ size_t iters; ++ unsigned long flags; + -+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) CSTD_NOP() ++ if (!h->enabled) { ++ seq_puts(sfile, "The register access history is disabled\n"); ++ goto out; ++ } + -+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP() ++ spin_lock_irqsave(&h->lock, flags); + -+static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, -+ struct kbase_jd_atom *katom, int js) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+} ++ iters = (h->size > h->count) ? h->count : h->size; ++ seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters, ++ h->count); ++ for (i = 0; i < iters; ++i) { ++ struct kbase_io_access *io = ++ &h->buf[(h->count - iters + i) % h->size]; ++ char const access = (io->addr & 1) ? 'w' : 'r'; + -+static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx, -+ struct kbase_jd_atom *katom, int js, -+ kbasep_js_atom_done_code done_code) -+{ -+ lockdep_assert_held(&kbdev->hwaccess_lock); -+} ++ seq_printf(sfile, "%6i: %c: reg 0x%p val %08x\n", i, access, ++ (void *)(io->addr & ~0x1), io->value); ++ } + -+static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent) -+{ -+} ++ spin_unlock_irqrestore(&h->lock, flags); + -+static inline void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event) -+{ ++out: ++ return 0; +} + -+static inline void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event) -+{ -+} + -+static inline void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev) ++/** ++ * regs_history_open - open operation for regs_history debugfs file ++ * ++ * @in: &struct inode pointer ++ * @file: &struct file pointer ++ * ++ * @return file descriptor ++ */ ++static int regs_history_open(struct inode *in, struct file *file) +{ ++ return single_open(file, ®s_history_show, in->i_private); +} + -+static inline void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev) ++ ++static const struct file_operations regs_history_fops = { ++ .open = ®s_history_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++ ++void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev) +{ ++ debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR, ++ kbdev->mali_debugfs_directory, ++ &kbdev->io_history.enabled); ++ debugfs_create_file("regs_history_size", S_IRUGO | S_IWUSR, ++ kbdev->mali_debugfs_directory, ++ &kbdev->io_history, ®s_history_size_fops); ++ debugfs_create_file("regs_history", S_IRUGO, ++ kbdev->mali_debugfs_directory, &kbdev->io_history, ++ ®s_history_fops); +} -+#endif /* CONFIG_MALI_TRACE_TIMELINE */ + -+#endif /* _KBASE_TRACE_TIMELINE_H */ + -diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h ++#endif /* CONFIG_DEBUG_FS */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h new file mode 100644 -index 000000000..156a95a67 +index 000000000..f10837002 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h -@@ -0,0 +1,140 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h +@@ -0,0 +1,50 @@ +/* + * -+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -397782,138 +398664,48 @@ index 000000000..156a95a67 + + + ++/** ++ * Header file for register access history support via debugfs ++ * ++ * This interface is made available via /sys/kernel/debug/mali#/regs_history*. ++ * ++ * Usage: ++ * - regs_history_enabled: whether recording of register accesses is enabled. ++ * Write 'y' to enable, 'n' to disable. ++ * - regs_history_size: size of the register history buffer, must be > 0 ++ * - regs_history: return the information about last accesses to the registers. ++ */ ++ ++#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H ++#define _KBASE_REGS_HISTORY_DEBUGFS_H + ++struct kbase_device; + -+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** -+ * ***** DO NOT INCLUDE DIRECTLY ***** -+ * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ ++#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) + -+/* -+ * Conventions on Event Names: ++/** ++ * kbasep_regs_history_debugfs_init - add debugfs entries for register history + * -+ * - The prefix determines something about how the timeline should be -+ * displayed, and is split up into various parts, separated by underscores: -+ * - 'SW' and 'HW' as the first part will be used to determine whether a -+ * timeline is to do with Software or Hardware - effectively, separate -+ * 'channels' for Software and Hardware -+ * - 'START', 'STOP', 'ENTER', 'LEAVE' can be used in the second part, and -+ * signify related pairs of events - these are optional. -+ * - 'FLOW' indicates a generic event, which can use dependencies -+ * - This gives events such as: -+ * - 'SW_ENTER_FOO' -+ * - 'SW_LEAVE_FOO' -+ * - 'SW_FLOW_BAR_1' -+ * - 'SW_FLOW_BAR_2' -+ * - 'HW_START_BAZ' -+ * - 'HW_STOP_BAZ' -+ * - And an unadorned HW event: -+ * - 'HW_BAZ_FROZBOZ' -+ */ -+ -+/* -+ * Conventions on parameter names: -+ * - anything with 'instance' in the name will have a separate timeline based -+ * on that instances. -+ * - underscored-prefixed parameters will by hidden by default on timelines -+ * -+ * Hence: -+ * - Different job slots have their own 'instance', based on the instance value -+ * - Per-context info (e.g. atoms on a context) have their own 'instance' -+ * (i.e. each context should be on a different timeline) -+ * -+ * Note that globally-shared resources can be tagged with a tgid, but we don't -+ * want an instance per context: -+ * - There's no point having separate Job Slot timelines for each context, that -+ * would be confusing - there's only really 3 job slots! -+ * - There's no point having separate Shader-powered timelines for each -+ * context, that would be confusing - all shader cores (whether it be 4, 8, -+ * etc) are shared in the system. ++ * @kbdev: Pointer to kbase_device containing the register history + */ ++void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev); + -+ /* -+ * CTX events -+ */ -+ /* Separate timelines for each context 'instance'*/ -+ KBASE_TIMELINE_TRACE_CODE(CTX_SET_NR_ATOMS_IN_FLIGHT, "CTX: Atoms in flight", "%d,%d", "_instance_tgid,_value_number_of_atoms"), -+ KBASE_TIMELINE_TRACE_CODE(CTX_FLOW_ATOM_READY, "CTX: Atoms Ready to Run", "%d,%d,%d", "_instance_tgid,_consumerof_atom_number,_producerof_atom_number_ready"), -+ -+ /* -+ * SW Events -+ */ -+ /* Separate timelines for each slot 'instance' */ -+ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_ACTIVE, "SW: GPU slot active", "%d,%d,%d", "_tgid,_instance_slot,_value_number_of_atoms"), -+ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_NEXT, "SW: GPU atom in NEXT", "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_next"), -+ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_HEAD, "SW: GPU atom in HEAD", "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_head"), -+ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_STOPPING, "SW: Try Soft-Stop on GPU slot", "%d,%d,%d", "_tgid,_instance_slot,_value_is_slot_stopping"), -+ /* Shader and overall power is shared - can't have separate instances of -+ * it, just tagging with the context */ -+ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_ACTIVE, "SW: GPU power active", "%d,%d", "_tgid,_value_is_power_active"), -+ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_TILER_ACTIVE, "SW: GPU tiler powered", "%d,%d", "_tgid,_value_number_of_tilers"), -+ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_SHADER_ACTIVE, "SW: GPU shaders powered", "%d,%d", "_tgid,_value_number_of_shaders"), -+ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_L2_ACTIVE, "SW: GPU L2 powered", "%d,%d", "_tgid,_value_number_of_l2"), -+ -+ /* SW Power event messaging. _event_type is one from the kbase_pm_event enum */ -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_SEND_EVENT, "SW: PM Send Event", "%d,%d,%d", "_tgid,_event_type,_writerof_pm_event_id"), -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_HANDLE_EVENT, "SW: PM Handle Event", "%d,%d,%d", "_tgid,_event_type,_finalconsumerof_pm_event_id"), -+ /* SW L2 power events */ -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_POWERING, "SW: GPU L2 powering", "%d,%d", "_tgid,_writerof_l2_transitioning"), -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_ACTIVE, "SW: GPU L2 powering done", "%d,%d", "_tgid,_finalconsumerof_l2_transitioning"), -+ -+ KBASE_TIMELINE_TRACE_CODE(SW_SET_CONTEXT_ACTIVE, "SW: Context Active", "%d,%d", "_tgid,_value_active"), ++#else /* CONFIG_DEBUG_FS */ + -+ /* -+ * BEGIN: Significant SW Functions that call kbase_pm_check_transitions_nolock() -+ */ -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweroff"), -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweroff"), -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweron"), -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweron"), -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_writerof_pm_checktrans_gpu_interrupt"), -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_gpu_interrupt"), ++#define kbasep_regs_history_debugfs_init CSTD_NOP + -+ /* -+ * Significant Indirect callers of kbase_pm_check_transitions_nolock() -+ */ -+ /* kbase_pm_request_cores */ -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader"), -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader"), -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_tiler"), -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_tiler"), -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader_tiler"), -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader_tiler"), -+ /* kbase_pm_release_cores */ -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader"), -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader"), -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_tiler"), -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_tiler"), -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader_tiler"), -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader_tiler"), -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_shader_poweroff_callback"), -+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_shader_poweroff_callback"), -+ /* -+ * END: SW Functions that call kbase_pm_check_transitions_nolock() -+ */ ++#endif /* CONFIG_DEBUG_FS */ + -+ /* -+ * HW Events -+ */ -+ KBASE_TIMELINE_TRACE_CODE(HW_MMU_FAULT, -+"HW: MMU Fault", "%d,%d,%d", "_tgid,fault_type,fault_stage,asid"), -+ KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX, -+"HW: Job Chain start (SW approximated)", "%d,%d,%d", -+"_tgid,job_slot,_consumerof_atom_number_ready"), -+ KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX, -+"HW: Job Chain stop (SW approximated)", "%d,%d,%d", -+"_tgid,job_slot,_producerof_atom_number_completed") -diff --git a/drivers/gpu/arm/midgard/mali_kbase_uku.h b/drivers/gpu/arm/midgard/mali_kbase_uku.h ++#endif /*_KBASE_REGS_HISTORY_DEBUGFS_H*/ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_replay.c b/drivers/gpu/arm/midgard/mali_kbase_replay.c new file mode 100644 -index 000000000..c22a59324 +index 000000000..84aa3316e --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_uku.h -@@ -0,0 +1,545 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_replay.c +@@ -0,0 +1,1166 @@ +/* + * -+ * (C) COPYRIGHT 2008-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -397928,2862 +398720,2884 @@ index 000000000..c22a59324 + + + ++/** ++ * @file mali_kbase_replay.c ++ * Replay soft job handlers ++ */ + ++#include ++#include ++#include ++#include ++#include + -+#ifndef _KBASE_UKU_H_ -+#define _KBASE_UKU_H_ ++#define JOB_NOT_STARTED 0 ++#define JOB_TYPE_NULL (1) ++#define JOB_TYPE_VERTEX (5) ++#define JOB_TYPE_TILER (7) ++#define JOB_TYPE_FUSED (8) ++#define JOB_TYPE_FRAGMENT (9) + -+#include "mali_uk.h" -+#include "mali_base_kernel.h" ++#define JOB_HEADER_32_FBD_OFFSET (31*4) ++#define JOB_HEADER_64_FBD_OFFSET (44*4) + -+/* This file needs to support being included from kernel and userside (which use different defines) */ -+#if defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON -+#define SUPPORT_MALI_ERROR_INJECT -+#endif /* defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON */ -+#if defined(CONFIG_MALI_NO_MALI) -+#define SUPPORT_MALI_NO_MALI -+#elif defined(MALI_NO_MALI) -+#if MALI_NO_MALI -+#define SUPPORT_MALI_NO_MALI -+#endif -+#endif ++#define FBD_POINTER_MASK (~0x3f) + -+#if defined(SUPPORT_MALI_NO_MALI) || defined(SUPPORT_MALI_ERROR_INJECT) -+#include "backend/gpu/mali_kbase_model_dummy.h" -+#endif ++#define SFBD_TILER_OFFSET (48*4) + -+#include "mali_kbase_gpuprops_types.h" ++#define MFBD_TILER_OFFSET (14*4) + -+/* -+ * 10.1: -+ * - Do mmap in kernel for SAME_VA memory allocations rather then -+ * calling back into the kernel as a 2nd stage of the allocation request. -+ * -+ * 10.2: -+ * - Add KBASE_FUNC_MEM_JIT_INIT which allows clients to request a custom VA -+ * region for use with JIT (ignored on 32-bit platforms) -+ * -+ * 10.3: -+ * - base_jd_core_req typedef-ed to u32 (instead of to u16) -+ * - two flags added: BASE_JD_REQ_SKIP_CACHE_STAT / _END -+ * -+ * 10.4: -+ * - Removed KBASE_FUNC_EXT_BUFFER_LOCK used only in internal tests -+ * -+ * 10.5: -+ * - Reverted to performing mmap in user space so that tools like valgrind work. -+ * -+ * 10.6: -+ * - Add flags input variable to KBASE_FUNC_TLSTREAM_ACQUIRE -+ */ -+#define BASE_UK_VERSION_MAJOR 10 -+#define BASE_UK_VERSION_MINOR 6 ++#define FBD_HIERARCHY_WEIGHTS 8 ++#define FBD_HIERARCHY_MASK_MASK 0x1fff + -+#define LINUX_UK_BASE_MAGIC 0x80 ++#define FBD_TYPE 1 + -+struct kbase_uk_mem_alloc { -+ union uk_header header; -+ /* IN */ -+ u64 va_pages; -+ u64 commit_pages; -+ u64 extent; -+ /* IN/OUT */ -+ u64 flags; -+ /* OUT */ -+ u64 gpu_va; -+ u16 va_alignment; -+ u8 padding[6]; -+}; ++#define HIERARCHY_WEIGHTS 13 + -+struct kbase_uk_mem_free { -+ union uk_header header; -+ /* IN */ -+ u64 gpu_addr; -+ /* OUT */ -+}; ++#define JOB_HEADER_ID_MAX 0xffff + -+struct kbase_uk_mem_alias { -+ union uk_header header; -+ /* IN/OUT */ -+ u64 flags; -+ /* IN */ -+ u64 stride; -+ u64 nents; -+ union kbase_pointer ai; -+ /* OUT */ -+ u64 gpu_va; -+ u64 va_pages; -+}; ++#define JOB_SOURCE_ID(status) (((status) >> 16) & 0xFFFF) ++#define JOB_POLYGON_LIST (0x03) + -+struct kbase_uk_mem_import { -+ union uk_header header; -+ /* IN */ -+ union kbase_pointer phandle; -+ u32 type; -+ u32 padding; -+ /* IN/OUT */ -+ u64 flags; -+ /* OUT */ -+ u64 gpu_va; -+ u64 va_pages; -+}; ++struct fragment_job { ++ struct job_descriptor_header header; + -+struct kbase_uk_mem_flags_change { -+ union uk_header header; -+ /* IN */ -+ u64 gpu_va; -+ u64 flags; -+ u64 mask; ++ u32 x[2]; ++ union { ++ u64 _64; ++ u32 _32; ++ } fragment_fbd; +}; + -+struct kbase_uk_job_submit { -+ union uk_header header; -+ /* IN */ -+ union kbase_pointer addr; -+ u32 nr_atoms; -+ u32 stride; /* bytes between atoms, i.e. sizeof(base_jd_atom_v2) */ -+ /* OUT */ -+}; ++static void dump_job_head(struct kbase_context *kctx, char *head_str, ++ struct job_descriptor_header *job) ++{ ++#ifdef CONFIG_MALI_DEBUG ++ dev_dbg(kctx->kbdev->dev, "%s\n", head_str); ++ dev_dbg(kctx->kbdev->dev, ++ "addr = %p\n" ++ "exception_status = %x (Source ID: 0x%x Access: 0x%x Exception: 0x%x)\n" ++ "first_incomplete_task = %x\n" ++ "fault_pointer = %llx\n" ++ "job_descriptor_size = %x\n" ++ "job_type = %x\n" ++ "job_barrier = %x\n" ++ "_reserved_01 = %x\n" ++ "_reserved_02 = %x\n" ++ "_reserved_03 = %x\n" ++ "_reserved_04/05 = %x,%x\n" ++ "job_index = %x\n" ++ "dependencies = %x,%x\n", ++ job, job->exception_status, ++ JOB_SOURCE_ID(job->exception_status), ++ (job->exception_status >> 8) & 0x3, ++ job->exception_status & 0xFF, ++ job->first_incomplete_task, ++ job->fault_pointer, job->job_descriptor_size, ++ job->job_type, job->job_barrier, job->_reserved_01, ++ job->_reserved_02, job->_reserved_03, ++ job->_reserved_04, job->_reserved_05, ++ job->job_index, ++ job->job_dependency_index_1, ++ job->job_dependency_index_2); + -+struct kbase_uk_post_term { -+ union uk_header header; -+}; ++ if (job->job_descriptor_size) ++ dev_dbg(kctx->kbdev->dev, "next = %llx\n", ++ job->next_job._64); ++ else ++ dev_dbg(kctx->kbdev->dev, "next = %x\n", ++ job->next_job._32); ++#endif ++} + -+struct kbase_uk_sync_now { -+ union uk_header header; ++static int kbasep_replay_reset_sfbd(struct kbase_context *kctx, ++ u64 fbd_address, u64 tiler_heap_free, ++ u16 hierarchy_mask, u32 default_weight) ++{ ++ struct { ++ u32 padding_1[1]; ++ u32 flags; ++ u64 padding_2[2]; ++ u64 heap_free_address; ++ u32 padding[8]; ++ u32 weights[FBD_HIERARCHY_WEIGHTS]; ++ } *fbd_tiler; ++ struct kbase_vmap_struct map; + -+ /* IN */ -+ struct base_syncset sset; ++ dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address); + -+ /* OUT */ -+}; ++ fbd_tiler = kbase_vmap(kctx, fbd_address + SFBD_TILER_OFFSET, ++ sizeof(*fbd_tiler), &map); ++ if (!fbd_tiler) { ++ dev_err(kctx->kbdev->dev, "kbasep_replay_reset_fbd: failed to map fbd\n"); ++ return -EINVAL; ++ } + -+struct kbase_uk_hwcnt_setup { -+ union uk_header header; ++#ifdef CONFIG_MALI_DEBUG ++ dev_dbg(kctx->kbdev->dev, ++ "FBD tiler:\n" ++ "flags = %x\n" ++ "heap_free_address = %llx\n", ++ fbd_tiler->flags, fbd_tiler->heap_free_address); ++#endif ++ if (hierarchy_mask) { ++ u32 weights[HIERARCHY_WEIGHTS]; ++ u16 old_hierarchy_mask = fbd_tiler->flags & ++ FBD_HIERARCHY_MASK_MASK; ++ int i, j = 0; + -+ /* IN */ -+ u64 dump_buffer; -+ u32 jm_bm; -+ u32 shader_bm; -+ u32 tiler_bm; -+ u32 unused_1; /* keep for backwards compatibility */ -+ u32 mmu_l2_bm; -+ u32 padding; -+ /* OUT */ -+}; ++ for (i = 0; i < HIERARCHY_WEIGHTS; i++) { ++ if (old_hierarchy_mask & (1 << i)) { ++ KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); ++ weights[i] = fbd_tiler->weights[j++]; ++ } else { ++ weights[i] = default_weight; ++ } ++ } + -+/** -+ * struct kbase_uk_hwcnt_reader_setup - User/Kernel space data exchange structure -+ * @header: UK structure header -+ * @buffer_count: requested number of dumping buffers -+ * @jm_bm: counters selection bitmask (JM) -+ * @shader_bm: counters selection bitmask (Shader) -+ * @tiler_bm: counters selection bitmask (Tiler) -+ * @mmu_l2_bm: counters selection bitmask (MMU_L2) -+ * @fd: dumping notification file descriptor -+ * -+ * This structure sets up HWC dumper/reader for this context. -+ * Multiple instances can be created for single context. -+ */ -+struct kbase_uk_hwcnt_reader_setup { -+ union uk_header header; + -+ /* IN */ -+ u32 buffer_count; -+ u32 jm_bm; -+ u32 shader_bm; -+ u32 tiler_bm; -+ u32 mmu_l2_bm; ++ dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x New hierarchy mask=%x\n", ++ old_hierarchy_mask, hierarchy_mask); + -+ /* OUT */ -+ s32 fd; -+}; ++ for (i = 0; i < HIERARCHY_WEIGHTS; i++) ++ dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n", ++ i, weights[i]); + -+struct kbase_uk_hwcnt_dump { -+ union uk_header header; -+}; ++ j = 0; + -+struct kbase_uk_hwcnt_clear { -+ union uk_header header; -+}; ++ for (i = 0; i < HIERARCHY_WEIGHTS; i++) { ++ if (hierarchy_mask & (1 << i)) { ++ KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); + -+struct kbase_uk_fence_validate { -+ union uk_header header; -+ /* IN */ -+ s32 fd; -+ u32 padding; -+ /* OUT */ -+}; ++ dev_dbg(kctx->kbdev->dev, " Writing hierarchy level %02d (%08x) to %d\n", ++ i, weights[i], j); + -+struct kbase_uk_stream_create { -+ union uk_header header; -+ /* IN */ -+ char name[32]; -+ /* OUT */ -+ s32 fd; -+ u32 padding; -+}; ++ fbd_tiler->weights[j++] = weights[i]; ++ } ++ } + -+struct kbase_uk_gpuprops { -+ union uk_header header; ++ for (; j < FBD_HIERARCHY_WEIGHTS; j++) ++ fbd_tiler->weights[j] = 0; + -+ /* IN */ -+ struct mali_base_gpu_props props; -+ /* OUT */ -+}; ++ fbd_tiler->flags = hierarchy_mask | (1 << 16); ++ } + -+struct kbase_uk_mem_query { -+ union uk_header header; -+ /* IN */ -+ u64 gpu_addr; -+#define KBASE_MEM_QUERY_COMMIT_SIZE 1 -+#define KBASE_MEM_QUERY_VA_SIZE 2 -+#define KBASE_MEM_QUERY_FLAGS 3 -+ u64 query; -+ /* OUT */ -+ u64 value; -+}; ++ fbd_tiler->heap_free_address = tiler_heap_free; + -+struct kbase_uk_mem_commit { -+ union uk_header header; -+ /* IN */ -+ u64 gpu_addr; -+ u64 pages; -+ /* OUT */ -+ u32 result_subcode; -+ u32 padding; -+}; ++ dev_dbg(kctx->kbdev->dev, "heap_free_address=%llx flags=%x\n", ++ fbd_tiler->heap_free_address, fbd_tiler->flags); + -+struct kbase_uk_find_cpu_offset { -+ union uk_header header; -+ /* IN */ -+ u64 gpu_addr; -+ u64 cpu_addr; -+ u64 size; -+ /* OUT */ -+ u64 offset; -+}; ++ kbase_vunmap(kctx, &map); + -+#define KBASE_GET_VERSION_BUFFER_SIZE 64 -+struct kbase_uk_get_ddk_version { -+ union uk_header header; -+ /* OUT */ -+ char version_buffer[KBASE_GET_VERSION_BUFFER_SIZE]; -+ u32 version_string_size; -+ u32 padding; -+ u32 rk_version; -+}; ++ return 0; ++} + -+struct kbase_uk_disjoint_query { -+ union uk_header header; -+ /* OUT */ -+ u32 counter; -+ u32 padding; -+}; ++static int kbasep_replay_reset_mfbd(struct kbase_context *kctx, ++ u64 fbd_address, u64 tiler_heap_free, ++ u16 hierarchy_mask, u32 default_weight) ++{ ++ struct kbase_vmap_struct map; ++ struct { ++ u32 padding_0; ++ u32 flags; ++ u64 padding_1[2]; ++ u64 heap_free_address; ++ u64 padding_2; ++ u32 weights[FBD_HIERARCHY_WEIGHTS]; ++ } *fbd_tiler; + -+struct kbase_uk_set_flags { -+ union uk_header header; -+ /* IN */ -+ u32 create_flags; -+ u32 padding; -+}; ++ dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address); + -+#if MALI_UNIT_TEST -+#define TEST_ADDR_COUNT 4 -+#define KBASE_TEST_BUFFER_SIZE 128 -+struct kbase_exported_test_data { -+ u64 test_addr[TEST_ADDR_COUNT]; /**< memory address */ -+ u32 test_addr_pages[TEST_ADDR_COUNT]; /**< memory size in pages */ -+ union kbase_pointer kctx; /**< base context created by process */ -+ union kbase_pointer mm; /**< pointer to process address space */ -+ u8 buffer1[KBASE_TEST_BUFFER_SIZE]; /**< unit test defined parameter */ -+ u8 buffer2[KBASE_TEST_BUFFER_SIZE]; /**< unit test defined parameter */ -+}; ++ fbd_tiler = kbase_vmap(kctx, fbd_address + MFBD_TILER_OFFSET, ++ sizeof(*fbd_tiler), &map); ++ if (!fbd_tiler) { ++ dev_err(kctx->kbdev->dev, ++ "kbasep_replay_reset_fbd: failed to map fbd\n"); ++ return -EINVAL; ++ } + -+struct kbase_uk_set_test_data { -+ union uk_header header; -+ /* IN */ -+ struct kbase_exported_test_data test_data; -+}; ++#ifdef CONFIG_MALI_DEBUG ++ dev_dbg(kctx->kbdev->dev, "FBD tiler:\n" ++ "flags = %x\n" ++ "heap_free_address = %llx\n", ++ fbd_tiler->flags, ++ fbd_tiler->heap_free_address); ++#endif ++ if (hierarchy_mask) { ++ u32 weights[HIERARCHY_WEIGHTS]; ++ u16 old_hierarchy_mask = (fbd_tiler->flags) & ++ FBD_HIERARCHY_MASK_MASK; ++ int i, j = 0; + -+#endif /* MALI_UNIT_TEST */ ++ for (i = 0; i < HIERARCHY_WEIGHTS; i++) { ++ if (old_hierarchy_mask & (1 << i)) { ++ KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); ++ weights[i] = fbd_tiler->weights[j++]; ++ } else { ++ weights[i] = default_weight; ++ } ++ } + -+#ifdef SUPPORT_MALI_ERROR_INJECT -+struct kbase_uk_error_params { -+ union uk_header header; -+ /* IN */ -+ struct kbase_error_params params; -+}; -+#endif /* SUPPORT_MALI_ERROR_INJECT */ + -+#ifdef SUPPORT_MALI_NO_MALI -+struct kbase_uk_model_control_params { -+ union uk_header header; -+ /* IN */ -+ struct kbase_model_control_params params; -+}; -+#endif /* SUPPORT_MALI_NO_MALI */ ++ dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x New hierarchy mask=%x\n", ++ old_hierarchy_mask, hierarchy_mask); + -+#ifdef BASE_LEGACY_UK8_SUPPORT -+struct kbase_uk_keep_gpu_powered { -+ union uk_header header; -+ u32 enabled; -+ u32 padding; -+}; -+#endif /* BASE_LEGACY_UK8_SUPPORT */ ++ for (i = 0; i < HIERARCHY_WEIGHTS; i++) ++ dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n", ++ i, weights[i]); + -+struct kbase_uk_profiling_controls { -+ union uk_header header; -+ u32 profiling_controls[FBDUMP_CONTROL_MAX]; -+}; ++ j = 0; + -+struct kbase_uk_debugfs_mem_profile_add { -+ union uk_header header; -+ u32 len; -+ u32 padding; -+ union kbase_pointer buf; -+}; ++ for (i = 0; i < HIERARCHY_WEIGHTS; i++) { ++ if (hierarchy_mask & (1 << i)) { ++ KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); + -+struct kbase_uk_context_id { -+ union uk_header header; -+ /* OUT */ -+ int id; -+}; ++ dev_dbg(kctx->kbdev->dev, ++ " Writing hierarchy level %02d (%08x) to %d\n", ++ i, weights[i], j); + -+/** -+ * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure -+ * @header: UK structure header -+ * @flags: timeline stream flags -+ * @fd: timeline stream file descriptor -+ * -+ * This structure is used when performing a call to acquire kernel side timeline -+ * stream file descriptor. -+ */ -+struct kbase_uk_tlstream_acquire { -+ union uk_header header; -+ /* IN */ -+ u32 flags; -+ /* OUT */ -+ s32 fd; -+}; ++ fbd_tiler->weights[j++] = weights[i]; ++ } ++ } + -+/** -+ * struct kbase_uk_tlstream_acquire_v10_4 - User/Kernel space data exchange -+ * structure -+ * @header: UK structure header -+ * @fd: timeline stream file descriptor -+ * -+ * This structure is used when performing a call to acquire kernel side timeline -+ * stream file descriptor. -+ */ -+struct kbase_uk_tlstream_acquire_v10_4 { -+ union uk_header header; -+ /* IN */ -+ /* OUT */ -+ s32 fd; -+}; ++ for (; j < FBD_HIERARCHY_WEIGHTS; j++) ++ fbd_tiler->weights[j] = 0; + -+/** -+ * struct kbase_uk_tlstream_flush - User/Kernel space data exchange structure -+ * @header: UK structure header -+ * -+ * This structure is used when performing a call to flush kernel side -+ * timeline streams. -+ */ -+struct kbase_uk_tlstream_flush { -+ union uk_header header; -+ /* IN */ -+ /* OUT */ -+}; ++ fbd_tiler->flags = hierarchy_mask | (1 << 16); ++ } + -+#if MALI_UNIT_TEST -+/** -+ * struct kbase_uk_tlstream_test - User/Kernel space data exchange structure -+ * @header: UK structure header -+ * @tpw_count: number of trace point writers in each context -+ * @msg_delay: time delay between tracepoints from one writer in milliseconds -+ * @msg_count: number of trace points written by one writer -+ * @aux_msg: if non-zero aux messages will be included -+ * -+ * This structure is used when performing a call to start timeline stream test -+ * embedded in kernel. -+ */ -+struct kbase_uk_tlstream_test { -+ union uk_header header; -+ /* IN */ -+ u32 tpw_count; -+ u32 msg_delay; -+ u32 msg_count; -+ u32 aux_msg; -+ /* OUT */ -+}; ++ fbd_tiler->heap_free_address = tiler_heap_free; + -+/** -+ * struct kbase_uk_tlstream_stats - User/Kernel space data exchange structure -+ * @header: UK structure header -+ * @bytes_collected: number of bytes read by user -+ * @bytes_generated: number of bytes generated by tracepoints -+ * -+ * This structure is used when performing a call to obtain timeline stream -+ * statistics. -+ */ -+struct kbase_uk_tlstream_stats { -+ union uk_header header; /**< UK structure header. */ -+ /* IN */ -+ /* OUT */ -+ u32 bytes_collected; -+ u32 bytes_generated; -+}; -+#endif /* MALI_UNIT_TEST */ ++ kbase_vunmap(kctx, &map); + -+/** -+ * struct struct kbase_uk_prfcnt_value for the KBASE_FUNC_SET_PRFCNT_VALUES ioctl -+ * @header: UK structure header -+ * @data: Counter samples for the dummy model -+ * @size:............Size of the counter sample data -+ */ -+struct kbase_uk_prfcnt_values { -+ union uk_header header; -+ /* IN */ -+ u32 *data; -+ u32 size; -+}; ++ return 0; ++} + +/** -+ * struct kbase_uk_soft_event_update - User/Kernel space data exchange structure -+ * @header: UK structure header -+ * @evt: the GPU address containing the event -+ * @new_status: the new event status, must be either BASE_JD_SOFT_EVENT_SET or -+ * BASE_JD_SOFT_EVENT_RESET -+ * @flags: reserved for future uses, must be set to 0 ++ * @brief Reset the status of an FBD pointed to by a tiler job + * -+ * This structure is used to update the status of a software event. If the -+ * event's status is set to BASE_JD_SOFT_EVENT_SET, any job currently waiting -+ * on this event will complete. ++ * This performs two functions : ++ * - Set the hierarchy mask ++ * - Reset the tiler free heap address ++ * ++ * @param[in] kctx Context pointer ++ * @param[in] job_header Address of job header to reset. ++ * @param[in] tiler_heap_free The value to reset Tiler Heap Free to ++ * @param[in] hierarchy_mask The hierarchy mask to use ++ * @param[in] default_weight Default hierarchy weight to write when no other ++ * weight is given in the FBD ++ * @param[in] job_64 true if this job is using 64-bit ++ * descriptors ++ * ++ * @return 0 on success, error code on failure + */ -+struct kbase_uk_soft_event_update { -+ union uk_header header; -+ /* IN */ -+ u64 evt; -+ u32 new_status; -+ u32 flags; -+}; ++static int kbasep_replay_reset_tiler_job(struct kbase_context *kctx, ++ u64 job_header, u64 tiler_heap_free, ++ u16 hierarchy_mask, u32 default_weight, bool job_64) ++{ ++ struct kbase_vmap_struct map; ++ u64 fbd_address; ++ ++ if (job_64) { ++ u64 *job_ext; ++ ++ job_ext = kbase_vmap(kctx, ++ job_header + JOB_HEADER_64_FBD_OFFSET, ++ sizeof(*job_ext), &map); ++ ++ if (!job_ext) { ++ dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n"); ++ return -EINVAL; ++ } ++ ++ fbd_address = *job_ext; ++ ++ kbase_vunmap(kctx, &map); ++ } else { ++ u32 *job_ext; ++ ++ job_ext = kbase_vmap(kctx, ++ job_header + JOB_HEADER_32_FBD_OFFSET, ++ sizeof(*job_ext), &map); ++ ++ if (!job_ext) { ++ dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n"); ++ return -EINVAL; ++ } ++ ++ fbd_address = *job_ext; ++ ++ kbase_vunmap(kctx, &map); ++ } ++ ++ if (fbd_address & FBD_TYPE) { ++ return kbasep_replay_reset_mfbd(kctx, ++ fbd_address & FBD_POINTER_MASK, ++ tiler_heap_free, ++ hierarchy_mask, ++ default_weight); ++ } else { ++ return kbasep_replay_reset_sfbd(kctx, ++ fbd_address & FBD_POINTER_MASK, ++ tiler_heap_free, ++ hierarchy_mask, ++ default_weight); ++ } ++} + +/** -+ * struct kbase_uk_mem_jit_init - User/Kernel space data exchange structure -+ * @header: UK structure header -+ * @va_pages: Number of virtual pages required for JIT ++ * @brief Reset the status of a job + * -+ * This structure is used when requesting initialization of JIT. ++ * This performs the following functions : ++ * ++ * - Reset the Job Status field of each job to NOT_STARTED. ++ * - Set the Job Type field of any Vertex Jobs to Null Job. ++ * - For any jobs using an FBD, set the Tiler Heap Free field to the value of ++ * the tiler_heap_free parameter, and set the hierarchy level mask to the ++ * hier_mask parameter. ++ * - Offset HW dependencies by the hw_job_id_offset parameter ++ * - Set the Perform Job Barrier flag if this job is the first in the chain ++ * - Read the address of the next job header ++ * ++ * @param[in] kctx Context pointer ++ * @param[in,out] job_header Address of job header to reset. Set to address ++ * of next job header on exit. ++ * @param[in] prev_jc Previous job chain to link to, if this job is ++ * the last in the chain. ++ * @param[in] hw_job_id_offset Offset for HW job IDs ++ * @param[in] tiler_heap_free The value to reset Tiler Heap Free to ++ * @param[in] hierarchy_mask The hierarchy mask to use ++ * @param[in] default_weight Default hierarchy weight to write when no other ++ * weight is given in the FBD ++ * @param[in] first_in_chain true if this job is the first in the chain ++ * @param[in] fragment_chain true if this job is in the fragment chain ++ * ++ * @return 0 on success, error code on failure + */ -+struct kbase_uk_mem_jit_init { -+ union uk_header header; -+ /* IN */ -+ u64 va_pages; -+}; -+ -+enum kbase_uk_function_id { -+ KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0), -+ KBASE_FUNC_MEM_IMPORT = (UK_FUNC_ID + 1), -+ KBASE_FUNC_MEM_COMMIT = (UK_FUNC_ID + 2), -+ KBASE_FUNC_MEM_QUERY = (UK_FUNC_ID + 3), -+ KBASE_FUNC_MEM_FREE = (UK_FUNC_ID + 4), -+ KBASE_FUNC_MEM_FLAGS_CHANGE = (UK_FUNC_ID + 5), -+ KBASE_FUNC_MEM_ALIAS = (UK_FUNC_ID + 6), -+ -+#ifdef BASE_LEGACY_UK6_SUPPORT -+ KBASE_FUNC_JOB_SUBMIT_UK6 = (UK_FUNC_ID + 7), -+#endif /* BASE_LEGACY_UK6_SUPPORT */ ++static int kbasep_replay_reset_job(struct kbase_context *kctx, ++ u64 *job_header, u64 prev_jc, ++ u64 tiler_heap_free, u16 hierarchy_mask, ++ u32 default_weight, u16 hw_job_id_offset, ++ bool first_in_chain, bool fragment_chain) ++{ ++ struct fragment_job *frag_job; ++ struct job_descriptor_header *job; ++ u64 new_job_header; ++ struct kbase_vmap_struct map; + -+ KBASE_FUNC_SYNC = (UK_FUNC_ID + 8), ++ frag_job = kbase_vmap(kctx, *job_header, sizeof(*frag_job), &map); ++ if (!frag_job) { ++ dev_err(kctx->kbdev->dev, ++ "kbasep_replay_parse_jc: failed to map jc\n"); ++ return -EINVAL; ++ } ++ job = &frag_job->header; + -+ KBASE_FUNC_POST_TERM = (UK_FUNC_ID + 9), ++ dump_job_head(kctx, "Job header:", job); + -+ KBASE_FUNC_HWCNT_SETUP = (UK_FUNC_ID + 10), -+ KBASE_FUNC_HWCNT_DUMP = (UK_FUNC_ID + 11), -+ KBASE_FUNC_HWCNT_CLEAR = (UK_FUNC_ID + 12), ++ if (job->exception_status == JOB_NOT_STARTED && !fragment_chain) { ++ dev_err(kctx->kbdev->dev, "Job already not started\n"); ++ goto out_unmap; ++ } ++ job->exception_status = JOB_NOT_STARTED; + -+ KBASE_FUNC_GPU_PROPS_REG_DUMP = (UK_FUNC_ID + 14), ++ if (job->job_type == JOB_TYPE_VERTEX) ++ job->job_type = JOB_TYPE_NULL; + -+ KBASE_FUNC_FIND_CPU_OFFSET = (UK_FUNC_ID + 15), ++ if (job->job_type == JOB_TYPE_FUSED) { ++ dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n"); ++ goto out_unmap; ++ } + -+ KBASE_FUNC_GET_VERSION = (UK_FUNC_ID + 16), -+ KBASE_FUNC_SET_FLAGS = (UK_FUNC_ID + 18), ++ if (first_in_chain) ++ job->job_barrier = 1; + -+ KBASE_FUNC_SET_TEST_DATA = (UK_FUNC_ID + 19), -+ KBASE_FUNC_INJECT_ERROR = (UK_FUNC_ID + 20), -+ KBASE_FUNC_MODEL_CONTROL = (UK_FUNC_ID + 21), ++ if ((job->job_dependency_index_1 + hw_job_id_offset) > ++ JOB_HEADER_ID_MAX || ++ (job->job_dependency_index_2 + hw_job_id_offset) > ++ JOB_HEADER_ID_MAX || ++ (job->job_index + hw_job_id_offset) > JOB_HEADER_ID_MAX) { ++ dev_err(kctx->kbdev->dev, ++ "Job indicies/dependencies out of valid range\n"); ++ goto out_unmap; ++ } + -+#ifdef BASE_LEGACY_UK8_SUPPORT -+ KBASE_FUNC_KEEP_GPU_POWERED = (UK_FUNC_ID + 22), -+#endif /* BASE_LEGACY_UK8_SUPPORT */ ++ if (job->job_dependency_index_1) ++ job->job_dependency_index_1 += hw_job_id_offset; ++ if (job->job_dependency_index_2) ++ job->job_dependency_index_2 += hw_job_id_offset; + -+ KBASE_FUNC_FENCE_VALIDATE = (UK_FUNC_ID + 23), -+ KBASE_FUNC_STREAM_CREATE = (UK_FUNC_ID + 24), -+ KBASE_FUNC_GET_PROFILING_CONTROLS = (UK_FUNC_ID + 25), -+ KBASE_FUNC_SET_PROFILING_CONTROLS = (UK_FUNC_ID + 26), -+ /* to be used only for testing -+ * purposes, otherwise these controls -+ * are set through gator API */ ++ job->job_index += hw_job_id_offset; + -+ KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD = (UK_FUNC_ID + 27), -+ KBASE_FUNC_JOB_SUBMIT = (UK_FUNC_ID + 28), -+ KBASE_FUNC_DISJOINT_QUERY = (UK_FUNC_ID + 29), ++ if (job->job_descriptor_size) { ++ new_job_header = job->next_job._64; ++ if (!job->next_job._64) ++ job->next_job._64 = prev_jc; ++ } else { ++ new_job_header = job->next_job._32; ++ if (!job->next_job._32) ++ job->next_job._32 = prev_jc; ++ } ++ dump_job_head(kctx, "Updated to:", job); + -+ KBASE_FUNC_GET_CONTEXT_ID = (UK_FUNC_ID + 31), ++ if (job->job_type == JOB_TYPE_TILER) { ++ bool job_64 = job->job_descriptor_size != 0; + -+ KBASE_FUNC_TLSTREAM_ACQUIRE_V10_4 = (UK_FUNC_ID + 32), -+#if MALI_UNIT_TEST -+ KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33), -+ KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34), -+#endif /* MALI_UNIT_TEST */ -+ KBASE_FUNC_TLSTREAM_FLUSH = (UK_FUNC_ID + 35), ++ if (kbasep_replay_reset_tiler_job(kctx, *job_header, ++ tiler_heap_free, hierarchy_mask, ++ default_weight, job_64) != 0) ++ goto out_unmap; + -+ KBASE_FUNC_HWCNT_READER_SETUP = (UK_FUNC_ID + 36), ++ } else if (job->job_type == JOB_TYPE_FRAGMENT) { ++ u64 fbd_address; + -+#ifdef SUPPORT_MALI_NO_MALI -+ KBASE_FUNC_SET_PRFCNT_VALUES = (UK_FUNC_ID + 37), -+#endif ++ if (job->job_descriptor_size) ++ fbd_address = frag_job->fragment_fbd._64; ++ else ++ fbd_address = (u64)frag_job->fragment_fbd._32; + -+ KBASE_FUNC_SOFT_EVENT_UPDATE = (UK_FUNC_ID + 38), ++ if (fbd_address & FBD_TYPE) { ++ if (kbasep_replay_reset_mfbd(kctx, ++ fbd_address & FBD_POINTER_MASK, ++ tiler_heap_free, ++ hierarchy_mask, ++ default_weight) != 0) ++ goto out_unmap; ++ } else { ++ if (kbasep_replay_reset_sfbd(kctx, ++ fbd_address & FBD_POINTER_MASK, ++ tiler_heap_free, ++ hierarchy_mask, ++ default_weight) != 0) ++ goto out_unmap; ++ } ++ } + -+ KBASE_FUNC_MEM_JIT_INIT = (UK_FUNC_ID + 39), ++ kbase_vunmap(kctx, &map); + -+ KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 40), ++ *job_header = new_job_header; + -+ KBASE_FUNC_MAX -+}; ++ return 0; + -+#endif /* _KBASE_UKU_H_ */ ++out_unmap: ++ kbase_vunmap(kctx, &map); ++ return -EINVAL; ++} + -diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.c b/drivers/gpu/arm/midgard/mali_kbase_utility.c -new file mode 100644 -index 000000000..be474ff87 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_utility.c -@@ -0,0 +1,33 @@ -+/* -+ * -+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++/** ++ * @brief Find the highest job ID in a job chain + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * @param[in] kctx Context pointer ++ * @param[in] jc Job chain start address ++ * @param[out] hw_job_id Highest job ID in chain + * ++ * @return 0 on success, error code on failure + */ ++static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx, ++ u64 jc, u16 *hw_job_id) ++{ ++ while (jc) { ++ struct job_descriptor_header *job; ++ struct kbase_vmap_struct map; + ++ dev_dbg(kctx->kbdev->dev, ++ "kbasep_replay_find_hw_job_id: parsing jc=%llx\n", jc); + ++ job = kbase_vmap(kctx, jc, sizeof(*job), &map); ++ if (!job) { ++ dev_err(kctx->kbdev->dev, "failed to map jc\n"); + ++ return -EINVAL; ++ } + ++ if (job->job_index > *hw_job_id) ++ *hw_job_id = job->job_index; + -+#include -+ -+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry) -+{ -+ struct list_head *pos = base->next; -+ -+ while (pos != base) { -+ if (pos == entry) -+ return true; ++ if (job->job_descriptor_size) ++ jc = job->next_job._64; ++ else ++ jc = job->next_job._32; + -+ pos = pos->next; ++ kbase_vunmap(kctx, &map); + } -+ return false; ++ ++ return 0; +} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.h b/drivers/gpu/arm/midgard/mali_kbase_utility.h -new file mode 100644 -index 000000000..fd7252dab ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_utility.h -@@ -0,0 +1,37 @@ -+/* ++ ++/** ++ * @brief Reset the status of a number of jobs + * -+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. ++ * This function walks the provided job chain, and calls ++ * kbasep_replay_reset_job for each job. It also links the job chain to the ++ * provided previous job chain. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ * The function will fail if any of the jobs passed already have status of ++ * NOT_STARTED. + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * @param[in] kctx Context pointer ++ * @param[in] jc Job chain to be processed ++ * @param[in] prev_jc Job chain to be added to. May be NULL ++ * @param[in] tiler_heap_free The value to reset Tiler Heap Free to ++ * @param[in] hierarchy_mask The hierarchy mask to use ++ * @param[in] default_weight Default hierarchy weight to write when no other ++ * weight is given in the FBD ++ * @param[in] hw_job_id_offset Offset for HW job IDs ++ * @param[in] fragment_chain true if this chain is the fragment chain + * ++ * @return 0 on success, error code otherwise + */ ++static int kbasep_replay_parse_jc(struct kbase_context *kctx, ++ u64 jc, u64 prev_jc, ++ u64 tiler_heap_free, u16 hierarchy_mask, ++ u32 default_weight, u16 hw_job_id_offset, ++ bool fragment_chain) ++{ ++ bool first_in_chain = true; ++ int nr_jobs = 0; + ++ dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: jc=%llx hw_job_id=%x\n", ++ jc, hw_job_id_offset); + ++ while (jc) { ++ dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: parsing jc=%llx\n", jc); + ++ if (kbasep_replay_reset_job(kctx, &jc, prev_jc, ++ tiler_heap_free, hierarchy_mask, ++ default_weight, hw_job_id_offset, ++ first_in_chain, fragment_chain) != 0) ++ return -EINVAL; + ++ first_in_chain = false; + -+#ifndef _KBASE_UTILITY_H -+#define _KBASE_UTILITY_H ++ nr_jobs++; ++ if (fragment_chain && ++ nr_jobs >= BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT) { ++ dev_err(kctx->kbdev->dev, ++ "Exceeded maximum number of jobs in fragment chain\n"); ++ return -EINVAL; ++ } ++ } + -+#ifndef _KBASE_H_ -+#error "Don't include this file directly, use mali_kbase.h instead" -+#endif ++ return 0; ++} + -+/** Test whether the given list entry is a member of the given list. ++/** ++ * @brief Reset the status of a replay job, and set up dependencies + * -+ * @param base The head of the list to be tested -+ * @param entry The list entry to be tested ++ * This performs the actions to allow the replay job to be re-run following ++ * completion of the passed dependency. + * -+ * @return true if entry is a member of base -+ * false otherwise ++ * @param[in] katom The atom to be reset ++ * @param[in] dep_atom The dependency to be attached to the atom + */ -+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry); ++static void kbasep_replay_reset_softjob(struct kbase_jd_atom *katom, ++ struct kbase_jd_atom *dep_atom) ++{ ++ katom->status = KBASE_JD_ATOM_STATE_QUEUED; ++ kbase_jd_katom_dep_set(&katom->dep[0], dep_atom, BASE_JD_DEP_TYPE_DATA); ++ list_add_tail(&katom->dep_item[0], &dep_atom->dep_head[0]); ++} + -+#endif /* _KBASE_UTILITY_H */ -diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c -new file mode 100644 -index 000000000..8395568d0 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c -@@ -0,0 +1,2070 @@ -+/* -+ * -+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. ++/** ++ * @brief Allocate an unused katom + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ * This will search the provided context for an unused katom, and will mark it ++ * as KBASE_JD_ATOM_STATE_QUEUED. + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * If no atoms are available then the function will fail. + * ++ * @param[in] kctx Context pointer ++ * @return An atom ID, or -1 on failure + */ ++static int kbasep_allocate_katom(struct kbase_context *kctx) ++{ ++ struct kbase_jd_context *jctx = &kctx->jctx; ++ int i; + ++ for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) { ++ if (jctx->atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) { ++ jctx->atoms[i].status = KBASE_JD_ATOM_STATE_QUEUED; ++ dev_dbg(kctx->kbdev->dev, ++ "kbasep_allocate_katom: Allocated atom %d\n", ++ i); ++ return i; ++ } ++ } + ++ return -1; ++} + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/*****************************************************************************/ -+ -+/* Hwcnt reader API version */ -+#define HWCNT_READER_API 1 ++/** ++ * @brief Release a katom ++ * ++ * This will mark the provided atom as available, and remove any dependencies. ++ * ++ * For use on error path. ++ * ++ * @param[in] kctx Context pointer ++ * @param[in] atom_id ID of atom to release ++ */ ++static void kbasep_release_katom(struct kbase_context *kctx, int atom_id) ++{ ++ struct kbase_jd_context *jctx = &kctx->jctx; + -+/* The number of nanoseconds in a second. */ -+#define NSECS_IN_SEC 1000000000ull /* ns */ ++ dev_dbg(kctx->kbdev->dev, "kbasep_release_katom: Released atom %d\n", ++ atom_id); + -+/* The time resolution of dumping service. */ -+#define DUMPING_RESOLUTION 500000ull /* ns */ ++ while (!list_empty(&jctx->atoms[atom_id].dep_head[0])) ++ list_del(jctx->atoms[atom_id].dep_head[0].next); + -+/* The maximal supported number of dumping buffers. */ -+#define MAX_BUFFER_COUNT 32 ++ while (!list_empty(&jctx->atoms[atom_id].dep_head[1])) ++ list_del(jctx->atoms[atom_id].dep_head[1].next); + -+/* Size and number of hw counters blocks. */ -+#define NR_CNT_BLOCKS_PER_GROUP 8 -+#define NR_CNT_PER_BLOCK 64 -+#define NR_BYTES_PER_CNT 4 -+#define NR_BYTES_PER_HDR 16 -+#define PRFCNT_EN_MASK_OFFSET 0x8 ++ jctx->atoms[atom_id].status = KBASE_JD_ATOM_STATE_UNUSED; ++} + -+/*****************************************************************************/ ++static void kbasep_replay_create_atom(struct kbase_context *kctx, ++ struct base_jd_atom_v2 *atom, ++ int atom_nr, ++ base_jd_prio prio) ++{ ++ atom->nr_extres = 0; ++ atom->extres_list.value = NULL; ++ atom->device_nr = 0; ++ atom->prio = prio; ++ atom->atom_number = atom_nr; + -+enum { -+ SHADER_HWCNT_BM, -+ TILER_HWCNT_BM, -+ MMU_L2_HWCNT_BM, -+ JM_HWCNT_BM -+}; ++ base_jd_atom_dep_set(&atom->pre_dep[0], 0 , BASE_JD_DEP_TYPE_INVALID); ++ base_jd_atom_dep_set(&atom->pre_dep[1], 0 , BASE_JD_DEP_TYPE_INVALID); + -+enum vinstr_state { -+ VINSTR_IDLE, -+ VINSTR_DUMPING, -+ VINSTR_SUSPENDING, -+ VINSTR_SUSPENDED, -+ VINSTR_RESUMING -+}; ++ atom->udata.blob[0] = 0; ++ atom->udata.blob[1] = 0; ++} + +/** -+ * struct kbase_vinstr_context - vinstr context per device -+ * @lock: protects the entire vinstr context -+ * @kbdev: pointer to kbase device -+ * @kctx: pointer to kbase context -+ * @vmap: vinstr vmap for mapping hwcnt dump buffer -+ * @gpu_va: GPU hwcnt dump buffer address -+ * @cpu_va: the CPU side mapping of the hwcnt dump buffer -+ * @dump_size: size of the dump buffer in bytes -+ * @bitmap: current set of counters monitored, not always in sync -+ * with hardware -+ * @reprogram: when true, reprogram hwcnt block with the new set of -+ * counters -+ * @state: vinstr state -+ * @state_lock: protects information about vinstr state -+ * @suspend_waitq: notification queue to trigger state re-validation -+ * @suspend_cnt: reference counter of vinstr's suspend state -+ * @suspend_work: worker to execute on entering suspended state -+ * @resume_work: worker to execute on leaving suspended state -+ * @nclients: number of attached clients, pending or otherwise -+ * @waiting_clients: head of list of clients being periodically sampled -+ * @idle_clients: head of list of clients being idle -+ * @suspended_clients: head of list of clients being suspended -+ * @thread: periodic sampling thread -+ * @waitq: notification queue of sampling thread -+ * @request_pending: request for action for sampling thread ++ * @brief Create two atoms for the purpose of replaying jobs ++ * ++ * Two atoms are allocated and created. The jc pointer is not set at this ++ * stage. The second atom has a dependency on the first. The remaining fields ++ * are set up as follows : ++ * ++ * - No external resources. Any required external resources will be held by the ++ * replay atom. ++ * - device_nr is set to 0. This is not relevant as ++ * BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set. ++ * - Priority is inherited from the replay job. ++ * ++ * @param[out] t_atom Atom to use for tiler jobs ++ * @param[out] f_atom Atom to use for fragment jobs ++ * @param[in] prio Priority of new atom (inherited from replay soft ++ * job) ++ * @return 0 on success, error code on failure + */ -+struct kbase_vinstr_context { -+ struct mutex lock; -+ struct kbase_device *kbdev; -+ struct kbase_context *kctx; ++static int kbasep_replay_create_atoms(struct kbase_context *kctx, ++ struct base_jd_atom_v2 *t_atom, ++ struct base_jd_atom_v2 *f_atom, ++ base_jd_prio prio) ++{ ++ int t_atom_nr, f_atom_nr; + -+ struct kbase_vmap_struct vmap; -+ u64 gpu_va; -+ void *cpu_va; -+ size_t dump_size; -+ u32 bitmap[4]; -+ bool reprogram; ++ t_atom_nr = kbasep_allocate_katom(kctx); ++ if (t_atom_nr < 0) { ++ dev_err(kctx->kbdev->dev, "Failed to allocate katom\n"); ++ return -EINVAL; ++ } + -+ enum vinstr_state state; -+ struct spinlock state_lock; -+ wait_queue_head_t suspend_waitq; -+ unsigned int suspend_cnt; -+ struct work_struct suspend_work; -+ struct work_struct resume_work; ++ f_atom_nr = kbasep_allocate_katom(kctx); ++ if (f_atom_nr < 0) { ++ dev_err(kctx->kbdev->dev, "Failed to allocate katom\n"); ++ kbasep_release_katom(kctx, t_atom_nr); ++ return -EINVAL; ++ } + -+ u32 nclients; -+ struct list_head waiting_clients; -+ struct list_head idle_clients; -+ struct list_head suspended_clients; ++ kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio); ++ kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio); + -+ struct task_struct *thread; -+ wait_queue_head_t waitq; -+ atomic_t request_pending; -+}; ++ base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr , BASE_JD_DEP_TYPE_DATA); + -+/** -+ * struct kbase_vinstr_client - a vinstr client attached to a vinstr context -+ * @vinstr_ctx: vinstr context client is attached to -+ * @list: node used to attach this client to list in vinstr context -+ * @buffer_count: number of buffers this client is using -+ * @event_mask: events this client reacts to -+ * @dump_size: size of one dump buffer in bytes -+ * @bitmap: bitmap request for JM, TILER, SHADER and MMU counters -+ * @legacy_buffer: userspace hwcnt dump buffer (legacy interface) -+ * @kernel_buffer: kernel hwcnt dump buffer (kernel client interface) -+ * @accum_buffer: temporary accumulation buffer for preserving counters -+ * @dump_time: next time this clients shall request hwcnt dump -+ * @dump_interval: interval between periodic hwcnt dumps -+ * @dump_buffers: kernel hwcnt dump buffers allocated by this client -+ * @dump_buffers_meta: metadata of dump buffers -+ * @meta_idx: index of metadata being accessed by userspace -+ * @read_idx: index of buffer read by userspace -+ * @write_idx: index of buffer being written by dumping service -+ * @waitq: client's notification queue -+ * @pending: when true, client has attached but hwcnt not yet updated -+ */ -+struct kbase_vinstr_client { -+ struct kbase_vinstr_context *vinstr_ctx; -+ struct list_head list; -+ unsigned int buffer_count; -+ u32 event_mask; -+ size_t dump_size; -+ u32 bitmap[4]; -+ void __user *legacy_buffer; -+ void *kernel_buffer; -+ void *accum_buffer; -+ u64 dump_time; -+ u32 dump_interval; -+ char *dump_buffers; -+ struct kbase_hwcnt_reader_metadata *dump_buffers_meta; -+ atomic_t meta_idx; -+ atomic_t read_idx; -+ atomic_t write_idx; -+ wait_queue_head_t waitq; -+ bool pending; -+}; ++ return 0; ++} + -+/** -+ * struct kbasep_vinstr_wake_up_timer - vinstr service thread wake up timer -+ * @hrtimer: high resolution timer -+ * @vinstr_ctx: vinstr context -+ */ -+struct kbasep_vinstr_wake_up_timer { -+ struct hrtimer hrtimer; -+ struct kbase_vinstr_context *vinstr_ctx; -+}; ++#ifdef CONFIG_MALI_DEBUG ++static void payload_dump(struct kbase_context *kctx, base_jd_replay_payload *payload) ++{ ++ u64 next; + -+/*****************************************************************************/ ++ dev_dbg(kctx->kbdev->dev, "Tiler jc list :\n"); ++ next = payload->tiler_jc_list; + -+static int kbasep_vinstr_service_task(void *data); ++ while (next) { ++ struct kbase_vmap_struct map; ++ base_jd_replay_jc *jc_struct; + -+static unsigned int kbasep_vinstr_hwcnt_reader_poll( -+ struct file *filp, -+ poll_table *wait); -+static long kbasep_vinstr_hwcnt_reader_ioctl( -+ struct file *filp, -+ unsigned int cmd, -+ unsigned long arg); -+static int kbasep_vinstr_hwcnt_reader_mmap( -+ struct file *filp, -+ struct vm_area_struct *vma); -+static int kbasep_vinstr_hwcnt_reader_release( -+ struct inode *inode, -+ struct file *filp); ++ jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &map); + -+/* The timeline stream file operations structure. */ -+static const struct file_operations vinstr_client_fops = { -+ .poll = kbasep_vinstr_hwcnt_reader_poll, -+ .unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, -+ .compat_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, -+ .mmap = kbasep_vinstr_hwcnt_reader_mmap, -+ .release = kbasep_vinstr_hwcnt_reader_release, -+}; ++ if (!jc_struct) ++ return; + -+/*****************************************************************************/ ++ dev_dbg(kctx->kbdev->dev, "* jc_struct=%p jc=%llx next=%llx\n", ++ jc_struct, jc_struct->jc, jc_struct->next); + -+static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) ++ next = jc_struct->next; ++ ++ kbase_vunmap(kctx, &map); ++ } ++} ++#endif ++ ++/** ++ * @brief Parse a base_jd_replay_payload provided by userspace ++ * ++ * This will read the payload from userspace, and parse the job chains. ++ * ++ * @param[in] kctx Context pointer ++ * @param[in] replay_atom Replay soft job atom ++ * @param[in] t_atom Atom to use for tiler jobs ++ * @param[in] f_atom Atom to use for fragment jobs ++ * @return 0 on success, error code on failure ++ */ ++static int kbasep_replay_parse_payload(struct kbase_context *kctx, ++ struct kbase_jd_atom *replay_atom, ++ struct base_jd_atom_v2 *t_atom, ++ struct base_jd_atom_v2 *f_atom) +{ -+ struct kbase_context *kctx = vinstr_ctx->kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ struct kbase_uk_hwcnt_setup setup; -+ int err; ++ base_jd_replay_payload *payload = NULL; ++ u64 next; ++ u64 prev_jc = 0; ++ u16 hw_job_id_offset = 0; ++ int ret = -EINVAL; ++ struct kbase_vmap_struct map; + -+ setup.dump_buffer = vinstr_ctx->gpu_va; -+ setup.jm_bm = vinstr_ctx->bitmap[JM_HWCNT_BM]; -+ setup.tiler_bm = vinstr_ctx->bitmap[TILER_HWCNT_BM]; -+ setup.shader_bm = vinstr_ctx->bitmap[SHADER_HWCNT_BM]; -+ setup.mmu_l2_bm = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM]; ++ dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: replay_atom->jc = %llx sizeof(payload) = %zu\n", ++ replay_atom->jc, sizeof(payload)); + -+ /* Mark the context as active so the GPU is kept turned on */ -+ /* A suspend won't happen here, because we're in a syscall from a -+ * userspace thread. */ -+ kbase_pm_context_active(kbdev); ++ payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map); ++ if (!payload) { ++ dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n"); ++ return -EINVAL; ++ } + -+ /* Schedule the context in */ -+ kbasep_js_schedule_privileged_ctx(kbdev, kctx); -+ err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &setup); -+ if (err) { -+ /* Release the context. This had its own Power Manager Active -+ * reference */ -+ kbasep_js_release_privileged_ctx(kbdev, kctx); ++#ifdef BASE_LEGACY_UK10_2_SUPPORT ++ if (KBASE_API_VERSION(10, 3) > replay_atom->kctx->api_version) { ++ base_jd_replay_payload_uk10_2 *payload_uk10_2; ++ u16 tiler_core_req; ++ u16 fragment_core_req; + -+ /* Also release our Power Manager Active reference */ -+ kbase_pm_context_idle(kbdev); ++ payload_uk10_2 = (base_jd_replay_payload_uk10_2 *) payload; ++ memcpy(&tiler_core_req, &payload_uk10_2->tiler_core_req, ++ sizeof(tiler_core_req)); ++ memcpy(&fragment_core_req, &payload_uk10_2->fragment_core_req, ++ sizeof(fragment_core_req)); ++ payload->tiler_core_req = (u32)(tiler_core_req & 0x7fff); ++ payload->fragment_core_req = (u32)(fragment_core_req & 0x7fff); + } ++#endif /* BASE_LEGACY_UK10_2_SUPPORT */ + -+ return err; -+} ++#ifdef CONFIG_MALI_DEBUG ++ dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload); ++ dev_dbg(kctx->kbdev->dev, "Payload structure:\n" ++ "tiler_jc_list = %llx\n" ++ "fragment_jc = %llx\n" ++ "tiler_heap_free = %llx\n" ++ "fragment_hierarchy_mask = %x\n" ++ "tiler_hierarchy_mask = %x\n" ++ "hierarchy_default_weight = %x\n" ++ "tiler_core_req = %x\n" ++ "fragment_core_req = %x\n", ++ payload->tiler_jc_list, ++ payload->fragment_jc, ++ payload->tiler_heap_free, ++ payload->fragment_hierarchy_mask, ++ payload->tiler_hierarchy_mask, ++ payload->hierarchy_default_weight, ++ payload->tiler_core_req, ++ payload->fragment_core_req); ++ payload_dump(kctx, payload); ++#endif ++ t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER; ++ f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER; + -+static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) -+{ -+ struct kbase_context *kctx = vinstr_ctx->kctx; -+ struct kbase_device *kbdev = kctx->kbdev; -+ int err; ++ /* Sanity check core requirements*/ ++ if ((t_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_T || ++ (f_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_FS || ++ t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES || ++ f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { + -+ err = kbase_instr_hwcnt_disable_internal(kctx); -+ if (err) { -+ dev_warn(kbdev->dev, "Failed to disable HW counters (ctx:%p)", -+ kctx); -+ return; -+ } ++ int t_atom_type = t_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP; ++ int f_atom_type = f_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC; ++ int t_has_ex_res = t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES; ++ int f_has_ex_res = f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES; + -+ /* Release the context. This had its own Power Manager Active reference. */ -+ kbasep_js_release_privileged_ctx(kbdev, kctx); ++ if (t_atom_type != BASE_JD_REQ_T) { ++ dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom not a tiler job. Was: 0x%x\n Expected: 0x%x", ++ t_atom_type, BASE_JD_REQ_T); ++ } ++ if (f_atom_type != BASE_JD_REQ_FS) { ++ dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom not a fragment shader. Was 0x%x Expected: 0x%x\n", ++ f_atom_type, BASE_JD_REQ_FS); ++ } ++ if (t_has_ex_res) { ++ dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom has external resources.\n"); ++ } ++ if (f_has_ex_res) { ++ dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom has external resources.\n"); ++ } + -+ /* Also release our Power Manager Active reference. */ -+ kbase_pm_context_idle(kbdev); ++ goto out; ++ } + -+ dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx); -+} ++ /* Process tiler job chains */ ++ next = payload->tiler_jc_list; ++ if (!next) { ++ dev_err(kctx->kbdev->dev, "Invalid tiler JC list\n"); ++ goto out; ++ } + -+static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx) -+{ -+ disable_hwcnt(vinstr_ctx); -+ return enable_hwcnt(vinstr_ctx); -+} ++ while (next) { ++ base_jd_replay_jc *jc_struct; ++ struct kbase_vmap_struct jc_map; ++ u64 jc; + -+static void hwcnt_bitmap_set(u32 dst[4], u32 src[4]) -+{ -+ dst[JM_HWCNT_BM] = src[JM_HWCNT_BM]; -+ dst[TILER_HWCNT_BM] = src[TILER_HWCNT_BM]; -+ dst[SHADER_HWCNT_BM] = src[SHADER_HWCNT_BM]; -+ dst[MMU_L2_HWCNT_BM] = src[MMU_L2_HWCNT_BM]; -+} ++ jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &jc_map); + -+static void hwcnt_bitmap_union(u32 dst[4], u32 src[4]) -+{ -+ dst[JM_HWCNT_BM] |= src[JM_HWCNT_BM]; -+ dst[TILER_HWCNT_BM] |= src[TILER_HWCNT_BM]; -+ dst[SHADER_HWCNT_BM] |= src[SHADER_HWCNT_BM]; -+ dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM]; -+} ++ if (!jc_struct) { ++ dev_err(kctx->kbdev->dev, "Failed to map jc struct\n"); ++ goto out; ++ } + -+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev) -+{ -+ size_t dump_size; ++ jc = jc_struct->jc; ++ next = jc_struct->next; ++ if (next) ++ jc_struct->jc = 0; + -+#ifndef CONFIG_MALI_NO_MALI -+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) { -+ u32 nr_cg; ++ kbase_vunmap(kctx, &jc_map); + -+ nr_cg = kbdev->gpu_props.num_core_groups; -+ dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP * -+ NR_CNT_PER_BLOCK * -+ NR_BYTES_PER_CNT; -+ } else -+#endif /* CONFIG_MALI_NO_MALI */ -+ { -+ /* assume v5 for now */ -+ base_gpu_props *props = &kbdev->gpu_props.props; -+ u32 nr_l2 = props->l2_props.num_l2_slices; -+ u64 core_mask = props->coherency_info.group[0].core_mask; -+ u32 nr_blocks = fls64(core_mask); ++ if (jc) { ++ u16 max_hw_job_id = 0; + -+ /* JM and tiler counter blocks are always present */ -+ dump_size = (2 + nr_l2 + nr_blocks) * -+ NR_CNT_PER_BLOCK * -+ NR_BYTES_PER_CNT; -+ } -+ return dump_size; -+} -+KBASE_EXPORT_TEST_API(kbase_vinstr_dump_size); ++ if (kbasep_replay_find_hw_job_id(kctx, jc, ++ &max_hw_job_id) != 0) ++ goto out; + -+static size_t kbasep_vinstr_dump_size_ctx( -+ struct kbase_vinstr_context *vinstr_ctx) -+{ -+ return kbase_vinstr_dump_size(vinstr_ctx->kctx->kbdev); -+} ++ if (kbasep_replay_parse_jc(kctx, jc, prev_jc, ++ payload->tiler_heap_free, ++ payload->tiler_hierarchy_mask, ++ payload->hierarchy_default_weight, ++ hw_job_id_offset, false) != 0) { ++ goto out; ++ } + -+static int kbasep_vinstr_map_kernel_dump_buffer( -+ struct kbase_vinstr_context *vinstr_ctx) -+{ -+ struct kbase_va_region *reg; -+ struct kbase_context *kctx = vinstr_ctx->kctx; -+ u64 flags, nr_pages; ++ hw_job_id_offset += max_hw_job_id; + -+ flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR; -+ vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx); -+ nr_pages = PFN_UP(vinstr_ctx->dump_size); ++ prev_jc = jc; ++ } ++ } ++ t_atom->jc = prev_jc; + -+ reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, -+ &vinstr_ctx->gpu_va); -+ if (!reg) -+ return -ENOMEM; ++ /* Process fragment job chain */ ++ f_atom->jc = payload->fragment_jc; ++ if (kbasep_replay_parse_jc(kctx, payload->fragment_jc, 0, ++ payload->tiler_heap_free, ++ payload->fragment_hierarchy_mask, ++ payload->hierarchy_default_weight, 0, ++ true) != 0) { ++ goto out; ++ } + -+ vinstr_ctx->cpu_va = kbase_vmap( -+ kctx, -+ vinstr_ctx->gpu_va, -+ vinstr_ctx->dump_size, -+ &vinstr_ctx->vmap); -+ if (!vinstr_ctx->cpu_va) { -+ kbase_mem_free(kctx, vinstr_ctx->gpu_va); -+ return -ENOMEM; ++ if (!t_atom->jc || !f_atom->jc) { ++ dev_err(kctx->kbdev->dev, "Invalid payload\n"); ++ goto out; + } + -+ return 0; -+} ++ dev_dbg(kctx->kbdev->dev, "t_atom->jc=%llx f_atom->jc=%llx\n", ++ t_atom->jc, f_atom->jc); ++ ret = 0; + -+static void kbasep_vinstr_unmap_kernel_dump_buffer( -+ struct kbase_vinstr_context *vinstr_ctx) -+{ -+ struct kbase_context *kctx = vinstr_ctx->kctx; ++out: ++ kbase_vunmap(kctx, &map); + -+ kbase_vunmap(kctx, &vinstr_ctx->vmap); -+ kbase_mem_free(kctx, vinstr_ctx->gpu_va); ++ return ret; +} + -+/** -+ * kbasep_vinstr_create_kctx - create kernel context for vinstr -+ * @vinstr_ctx: vinstr context -+ * Return: zero on success -+ */ -+static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) ++static void kbase_replay_process_worker(struct work_struct *data) +{ -+ struct kbase_device *kbdev = vinstr_ctx->kbdev; -+ struct kbasep_kctx_list_element *element; -+ unsigned long flags; -+ bool enable_backend = false; -+ int err; ++ struct kbase_jd_atom *katom; ++ struct kbase_context *kctx; ++ struct kbase_jd_context *jctx; ++ bool need_to_try_schedule_context = false; + -+ vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true); -+ if (!vinstr_ctx->kctx) -+ return -ENOMEM; ++ struct base_jd_atom_v2 t_atom, f_atom; ++ struct kbase_jd_atom *t_katom, *f_katom; ++ base_jd_prio atom_prio; + -+ /* Map the master kernel dump buffer. The HW dumps the counters -+ * into this memory region. */ -+ err = kbasep_vinstr_map_kernel_dump_buffer(vinstr_ctx); -+ if (err) { -+ kbase_destroy_context(vinstr_ctx->kctx); -+ vinstr_ctx->kctx = NULL; -+ return err; -+ } ++ katom = container_of(data, struct kbase_jd_atom, work); ++ kctx = katom->kctx; ++ jctx = &kctx->jctx; + -+ /* Add kernel context to list of contexts associated with device. */ -+ element = kzalloc(sizeof(*element), GFP_KERNEL); -+ if (element) { -+ element->kctx = vinstr_ctx->kctx; -+ mutex_lock(&kbdev->kctx_list_lock); -+ list_add(&element->link, &kbdev->kctx_list); ++ mutex_lock(&jctx->lock); + -+ /* Inform timeline client about new context. -+ * Do this while holding the lock to avoid tracepoint -+ * being created in both body and summary stream. */ -+ KBASE_TLSTREAM_TL_NEW_CTX( -+ vinstr_ctx->kctx, -+ (u32)(vinstr_ctx->kctx->id), -+ (u32)(vinstr_ctx->kctx->tgid)); ++ atom_prio = kbasep_js_sched_prio_to_atom_prio(katom->sched_priority); + -+ mutex_unlock(&kbdev->kctx_list_lock); -+ } else { -+ /* Don't treat this as a fail - just warn about it. */ -+ dev_warn(kbdev->dev, -+ "couldn't add kctx to kctx_list\n"); ++ if (kbasep_replay_create_atoms( ++ kctx, &t_atom, &f_atom, atom_prio) != 0) { ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ goto out; + } + -+ /* Don't enable hardware counters if vinstr is suspended. -+ * Note that vinstr resume code is run under vinstr context lock, -+ * lower layer will be enabled as needed on resume. */ -+ spin_lock_irqsave(&vinstr_ctx->state_lock, flags); -+ if (VINSTR_IDLE == vinstr_ctx->state) -+ enable_backend = true; -+ spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); -+ if (enable_backend) -+ err = enable_hwcnt(vinstr_ctx); ++ t_katom = &jctx->atoms[t_atom.atom_number]; ++ f_katom = &jctx->atoms[f_atom.atom_number]; + -+ if (err) { -+ kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); -+ kbase_destroy_context(vinstr_ctx->kctx); -+ if (element) { -+ mutex_lock(&kbdev->kctx_list_lock); -+ list_del(&element->link); -+ kfree(element); -+ mutex_unlock(&kbdev->kctx_list_lock); -+ } -+ KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx); -+ vinstr_ctx->kctx = NULL; -+ return err; ++ if (kbasep_replay_parse_payload(kctx, katom, &t_atom, &f_atom) != 0) { ++ kbasep_release_katom(kctx, t_atom.atom_number); ++ kbasep_release_katom(kctx, f_atom.atom_number); ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ goto out; + } + -+ vinstr_ctx->thread = kthread_run( -+ kbasep_vinstr_service_task, -+ vinstr_ctx, -+ "mali_vinstr_service"); -+ if (!vinstr_ctx->thread) { -+ disable_hwcnt(vinstr_ctx); -+ kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); -+ kbase_destroy_context(vinstr_ctx->kctx); -+ if (element) { -+ mutex_lock(&kbdev->kctx_list_lock); -+ list_del(&element->link); -+ kfree(element); -+ mutex_unlock(&kbdev->kctx_list_lock); -+ } -+ KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx); -+ vinstr_ctx->kctx = NULL; -+ return -EFAULT; -+ } ++ kbasep_replay_reset_softjob(katom, f_katom); + -+ return 0; -+} ++ need_to_try_schedule_context |= jd_submit_atom(kctx, &t_atom, t_katom); ++ if (t_katom->event_code == BASE_JD_EVENT_JOB_INVALID) { ++ dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n"); ++ kbasep_release_katom(kctx, f_atom.atom_number); ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ goto out; ++ } ++ need_to_try_schedule_context |= jd_submit_atom(kctx, &f_atom, f_katom); ++ if (f_katom->event_code == BASE_JD_EVENT_JOB_INVALID) { ++ dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n"); ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ goto out; ++ } + -+/** -+ * kbasep_vinstr_destroy_kctx - destroy vinstr's kernel context -+ * @vinstr_ctx: vinstr context -+ */ -+static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx) -+{ -+ struct kbase_device *kbdev = vinstr_ctx->kbdev; -+ struct kbasep_kctx_list_element *element; -+ struct kbasep_kctx_list_element *tmp; -+ bool found = false; ++ katom->event_code = BASE_JD_EVENT_DONE; + -+ /* Release hw counters dumping resources. */ -+ vinstr_ctx->thread = NULL; -+ disable_hwcnt(vinstr_ctx); -+ kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); -+ kbase_destroy_context(vinstr_ctx->kctx); ++out: ++ if (katom->event_code != BASE_JD_EVENT_DONE) { ++ kbase_disjoint_state_down(kctx->kbdev); + -+ /* Remove kernel context from the device's contexts list. */ -+ mutex_lock(&kbdev->kctx_list_lock); -+ list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) { -+ if (element->kctx == vinstr_ctx->kctx) { -+ list_del(&element->link); -+ kfree(element); -+ found = true; -+ } ++ need_to_try_schedule_context |= jd_done_nolock(katom, NULL); + } -+ mutex_unlock(&kbdev->kctx_list_lock); -+ -+ if (!found) -+ dev_warn(kbdev->dev, "kctx not in kctx_list\n"); + -+ /* Inform timeline client about context destruction. */ -+ KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx); ++ if (need_to_try_schedule_context) ++ kbase_js_sched_all(kctx->kbdev); + -+ vinstr_ctx->kctx = NULL; ++ mutex_unlock(&jctx->lock); +} + +/** -+ * kbasep_vinstr_attach_client - Attach a client to the vinstr core -+ * @vinstr_ctx: vinstr context -+ * @buffer_count: requested number of dump buffers -+ * @bitmap: bitmaps describing which counters should be enabled -+ * @argp: pointer where notification descriptor shall be stored -+ * @kernel_buffer: pointer to kernel side buffer ++ * @brief Check job replay fault + * -+ * Return: vinstr opaque client handle or NULL on failure ++ * This will read the job payload, checks fault type and source, then decides ++ * whether replay is required. ++ * ++ * @param[in] katom The atom to be processed ++ * @return true (success) if replay required or false on failure. + */ -+static struct kbase_vinstr_client *kbasep_vinstr_attach_client( -+ struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count, -+ u32 bitmap[4], void *argp, void *kernel_buffer) ++static bool kbase_replay_fault_check(struct kbase_jd_atom *katom) +{ -+ struct task_struct *thread = NULL; -+ struct kbase_vinstr_client *cli; ++ struct kbase_context *kctx = katom->kctx; ++ struct device *dev = kctx->kbdev->dev; ++ base_jd_replay_payload *payload; ++ u64 job_header; ++ u64 job_loop_detect; ++ struct job_descriptor_header *job; ++ struct kbase_vmap_struct job_map; ++ struct kbase_vmap_struct map; ++ bool err = false; + -+ KBASE_DEBUG_ASSERT(vinstr_ctx); ++ /* Replay job if fault is of type BASE_JD_EVENT_JOB_WRITE_FAULT or ++ * if force_replay is enabled. ++ */ ++ if (BASE_JD_EVENT_TERMINATED == katom->event_code) { ++ return false; ++ } else if (BASE_JD_EVENT_JOB_WRITE_FAULT == katom->event_code) { ++ return true; ++ } else if (BASE_JD_EVENT_FORCE_REPLAY == katom->event_code) { ++ katom->event_code = BASE_JD_EVENT_DATA_INVALID_FAULT; ++ return true; ++ } else if (BASE_JD_EVENT_DATA_INVALID_FAULT != katom->event_code) { ++ /* No replay for faults of type other than ++ * BASE_JD_EVENT_DATA_INVALID_FAULT. ++ */ ++ return false; ++ } + -+ if (buffer_count > MAX_BUFFER_COUNT -+ || (buffer_count & (buffer_count - 1))) -+ return NULL; ++ /* Job fault is BASE_JD_EVENT_DATA_INVALID_FAULT, now scan fragment jc ++ * to find out whether the source of exception is POLYGON_LIST. Replay ++ * is required if the source of fault is POLYGON_LIST. ++ */ ++ payload = kbase_vmap(kctx, katom->jc, sizeof(*payload), &map); ++ if (!payload) { ++ dev_err(dev, "kbase_replay_fault_check: failed to map payload.\n"); ++ return false; ++ } + -+ cli = kzalloc(sizeof(*cli), GFP_KERNEL); -+ if (!cli) -+ return NULL; ++#ifdef CONFIG_MALI_DEBUG ++ dev_dbg(dev, "kbase_replay_fault_check: payload=%p\n", payload); ++ dev_dbg(dev, "\nPayload structure:\n" ++ "fragment_jc = 0x%llx\n" ++ "fragment_hierarchy_mask = 0x%x\n" ++ "fragment_core_req = 0x%x\n", ++ payload->fragment_jc, ++ payload->fragment_hierarchy_mask, ++ payload->fragment_core_req); ++#endif ++ /* Process fragment job chain */ ++ job_header = (u64) payload->fragment_jc; ++ job_loop_detect = job_header; ++ while (job_header) { ++ job = kbase_vmap(kctx, job_header, sizeof(*job), &job_map); ++ if (!job) { ++ dev_err(dev, "failed to map jc\n"); ++ /* unmap payload*/ ++ kbase_vunmap(kctx, &map); ++ return false; ++ } + -+ cli->vinstr_ctx = vinstr_ctx; -+ cli->buffer_count = buffer_count; -+ cli->event_mask = -+ (1 << BASE_HWCNT_READER_EVENT_MANUAL) | -+ (1 << BASE_HWCNT_READER_EVENT_PERIODIC); -+ cli->pending = true; + -+ hwcnt_bitmap_set(cli->bitmap, bitmap); ++ dump_job_head(kctx, "\njob_head structure:\n", job); + -+ mutex_lock(&vinstr_ctx->lock); ++ /* Replay only when the polygon list reader caused the ++ * DATA_INVALID_FAULT */ ++ if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) && ++ (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->exception_status))) { ++ err = true; ++ kbase_vunmap(kctx, &job_map); ++ break; ++ } + -+ hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap); -+ vinstr_ctx->reprogram = true; ++ /* Move on to next fragment job in the list */ ++ if (job->job_descriptor_size) ++ job_header = job->next_job._64; ++ else ++ job_header = job->next_job._32; + -+ /* If this is the first client, create the vinstr kbase -+ * context. This context is permanently resident until the -+ * last client exits. */ -+ if (!vinstr_ctx->nclients) { -+ hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap); -+ if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0) -+ goto error; ++ kbase_vunmap(kctx, &job_map); + -+ vinstr_ctx->reprogram = false; -+ cli->pending = false; ++ /* Job chain loop detected */ ++ if (job_header == job_loop_detect) ++ break; + } + -+ /* The GPU resets the counter block every time there is a request -+ * to dump it. We need a per client kernel buffer for accumulating -+ * the counters. */ -+ cli->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx); -+ cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL); -+ if (!cli->accum_buffer) -+ goto error; ++ /* unmap payload*/ ++ kbase_vunmap(kctx, &map); + -+ /* Prepare buffers. */ -+ if (cli->buffer_count) { -+ int *fd = (int *)argp; -+ size_t tmp; ++ return err; ++} + -+ /* Allocate area for buffers metadata storage. */ -+ tmp = sizeof(struct kbase_hwcnt_reader_metadata) * -+ cli->buffer_count; -+ cli->dump_buffers_meta = kmalloc(tmp, GFP_KERNEL); -+ if (!cli->dump_buffers_meta) -+ goto error; + -+ /* Allocate required number of dumping buffers. */ -+ cli->dump_buffers = (char *)__get_free_pages( -+ GFP_KERNEL | __GFP_ZERO, -+ get_order(cli->dump_size * cli->buffer_count)); -+ if (!cli->dump_buffers) -+ goto error; ++/** ++ * @brief Process a replay job ++ * ++ * Called from kbase_process_soft_job. ++ * ++ * On exit, if the job has completed, katom->event_code will have been updated. ++ * If the job has not completed, and is replaying jobs, then the atom status ++ * will have been reset to KBASE_JD_ATOM_STATE_QUEUED. ++ * ++ * @param[in] katom The atom to be processed ++ * @return false if the atom has completed ++ * true if the atom is replaying jobs ++ */ ++bool kbase_replay_process(struct kbase_jd_atom *katom) ++{ ++ struct kbase_context *kctx = katom->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; + -+ /* Create descriptor for user-kernel data exchange. */ -+ *fd = anon_inode_getfd( -+ "[mali_vinstr_desc]", -+ &vinstr_client_fops, -+ cli, -+ O_RDONLY | O_CLOEXEC); -+ if (0 > *fd) -+ goto error; -+ } else if (kernel_buffer) { -+ cli->kernel_buffer = kernel_buffer; -+ } else { -+ cli->legacy_buffer = (void __user *)argp; -+ } ++ /* Don't replay this atom if these issues are not present in the ++ * hardware */ ++ if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11020) && ++ !kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11024)) { ++ dev_dbg(kbdev->dev, "Hardware does not need replay workaround"); + -+ atomic_set(&cli->read_idx, 0); -+ atomic_set(&cli->meta_idx, 0); -+ atomic_set(&cli->write_idx, 0); -+ init_waitqueue_head(&cli->waitq); ++ /* Signal failure to userspace */ ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; + -+ vinstr_ctx->nclients++; -+ list_add(&cli->list, &vinstr_ctx->idle_clients); ++ return false; ++ } + -+ mutex_unlock(&vinstr_ctx->lock); ++ if (katom->event_code == BASE_JD_EVENT_DONE) { ++ dev_dbg(kbdev->dev, "Previous job succeeded - not replaying\n"); + -+ return cli; ++ if (katom->retry_count) ++ kbase_disjoint_state_down(kbdev); + -+error: -+ kfree(cli->dump_buffers_meta); -+ if (cli->dump_buffers) -+ free_pages( -+ (unsigned long)cli->dump_buffers, -+ get_order(cli->dump_size * cli->buffer_count)); -+ kfree(cli->accum_buffer); -+ if (!vinstr_ctx->nclients && vinstr_ctx->kctx) { -+ thread = vinstr_ctx->thread; -+ kbasep_vinstr_destroy_kctx(vinstr_ctx); ++ return false; + } -+ kfree(cli); -+ -+ mutex_unlock(&vinstr_ctx->lock); + -+ /* Thread must be stopped after lock is released. */ -+ if (thread) -+ kthread_stop(thread); ++ if (kbase_ctx_flag(kctx, KCTX_DYING)) { ++ dev_dbg(kbdev->dev, "Not replaying; context is dying\n"); + -+ return NULL; -+} ++ if (katom->retry_count) ++ kbase_disjoint_state_down(kbdev); + -+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli) -+{ -+ struct kbase_vinstr_context *vinstr_ctx; -+ struct kbase_vinstr_client *iter, *tmp; -+ struct task_struct *thread = NULL; -+ u32 zerobitmap[4] = { 0 }; -+ int cli_found = 0; ++ return false; ++ } + -+ KBASE_DEBUG_ASSERT(cli); -+ vinstr_ctx = cli->vinstr_ctx; -+ KBASE_DEBUG_ASSERT(vinstr_ctx); -+ -+ mutex_lock(&vinstr_ctx->lock); -+ -+ list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) { -+ if (iter == cli) { -+ vinstr_ctx->reprogram = true; -+ cli_found = 1; -+ list_del(&iter->list); -+ break; -+ } -+ } -+ if (!cli_found) { -+ list_for_each_entry_safe( -+ iter, tmp, &vinstr_ctx->waiting_clients, list) { -+ if (iter == cli) { -+ vinstr_ctx->reprogram = true; -+ cli_found = 1; -+ list_del(&iter->list); -+ break; -+ } -+ } -+ } -+ KBASE_DEBUG_ASSERT(cli_found); -+ -+ kfree(cli->dump_buffers_meta); -+ free_pages( -+ (unsigned long)cli->dump_buffers, -+ get_order(cli->dump_size * cli->buffer_count)); -+ kfree(cli->accum_buffer); -+ kfree(cli); -+ -+ vinstr_ctx->nclients--; -+ if (!vinstr_ctx->nclients) { -+ thread = vinstr_ctx->thread; -+ kbasep_vinstr_destroy_kctx(vinstr_ctx); ++ /* Check job exception type and source before replaying. */ ++ if (!kbase_replay_fault_check(katom)) { ++ dev_dbg(kbdev->dev, ++ "Replay cancelled on event %x\n", katom->event_code); ++ /* katom->event_code is already set to the failure code of the ++ * previous job. ++ */ ++ return false; + } + -+ /* Rebuild context bitmap now that the client has detached */ -+ hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap); -+ list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) -+ hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap); -+ list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) -+ hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap); ++ dev_warn(kbdev->dev, "Replaying jobs retry=%d\n", ++ katom->retry_count); + -+ mutex_unlock(&vinstr_ctx->lock); ++ katom->retry_count++; + -+ /* Thread must be stopped after lock is released. */ -+ if (thread) -+ kthread_stop(thread); -+} -+KBASE_EXPORT_TEST_API(kbase_vinstr_detach_client); ++ if (katom->retry_count > BASEP_JD_REPLAY_LIMIT) { ++ dev_err(kbdev->dev, "Replay exceeded limit - failing jobs\n"); + -+/* Accumulate counters in the dump buffer */ -+static void accum_dump_buffer(void *dst, void *src, size_t dump_size) -+{ -+ size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT; -+ u32 *d = dst; -+ u32 *s = src; -+ size_t i, j; ++ kbase_disjoint_state_down(kbdev); + -+ for (i = 0; i < dump_size; i += block_size) { -+ /* skip over the header block */ -+ d += NR_BYTES_PER_HDR / sizeof(u32); -+ s += NR_BYTES_PER_HDR / sizeof(u32); -+ for (j = 0; j < (block_size - NR_BYTES_PER_HDR) / sizeof(u32); j++) { -+ /* saturate result if addition would result in wraparound */ -+ if (U32_MAX - *d < *s) -+ *d = U32_MAX; -+ else -+ *d += *s; -+ d++; -+ s++; -+ } ++ /* katom->event_code is already set to the failure code of the ++ previous job */ ++ return false; + } -+} + -+/* This is the Midgard v4 patch function. It copies the headers for each -+ * of the defined blocks from the master kernel buffer and then patches up -+ * the performance counter enable mask for each of the blocks to exclude -+ * counters that were not requested by the client. */ -+static void patch_dump_buffer_hdr_v4( -+ struct kbase_vinstr_context *vinstr_ctx, -+ struct kbase_vinstr_client *cli) -+{ -+ u32 *mask; -+ u8 *dst = cli->accum_buffer; -+ u8 *src = vinstr_ctx->cpu_va; -+ u32 nr_cg = vinstr_ctx->kctx->kbdev->gpu_props.num_core_groups; -+ size_t i, group_size, group; -+ enum { -+ SC0_BASE = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, -+ SC1_BASE = 1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, -+ SC2_BASE = 2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, -+ SC3_BASE = 3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, -+ TILER_BASE = 4 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, -+ MMU_L2_BASE = 5 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, -+ JM_BASE = 7 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT -+ }; ++ /* only enter the disjoint state once for the whole time while the replay is ongoing */ ++ if (katom->retry_count == 1) ++ kbase_disjoint_state_up(kbdev); + -+ group_size = NR_CNT_BLOCKS_PER_GROUP * -+ NR_CNT_PER_BLOCK * -+ NR_BYTES_PER_CNT; -+ for (i = 0; i < nr_cg; i++) { -+ group = i * group_size; -+ /* copy shader core headers */ -+ memcpy(&dst[group + SC0_BASE], &src[group + SC0_BASE], -+ NR_BYTES_PER_HDR); -+ memcpy(&dst[group + SC1_BASE], &src[group + SC1_BASE], -+ NR_BYTES_PER_HDR); -+ memcpy(&dst[group + SC2_BASE], &src[group + SC2_BASE], -+ NR_BYTES_PER_HDR); -+ memcpy(&dst[group + SC3_BASE], &src[group + SC3_BASE], -+ NR_BYTES_PER_HDR); ++ INIT_WORK(&katom->work, kbase_replay_process_worker); ++ queue_work(kctx->event_workq, &katom->work); + -+ /* copy tiler header */ -+ memcpy(&dst[group + TILER_BASE], &src[group + TILER_BASE], -+ NR_BYTES_PER_HDR); ++ return true; ++} +diff --git a/drivers/gpu/arm/midgard/mali_kbase_smc.c b/drivers/gpu/arm/midgard/mali_kbase_smc.c +new file mode 100644 +index 000000000..6c8cf73ae +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_smc.c +@@ -0,0 +1,86 @@ ++/* ++ * ++ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ /* copy mmu header */ -+ memcpy(&dst[group + MMU_L2_BASE], &src[group + MMU_L2_BASE], -+ NR_BYTES_PER_HDR); + -+ /* copy job manager header */ -+ memcpy(&dst[group + JM_BASE], &src[group + JM_BASE], -+ NR_BYTES_PER_HDR); + -+ /* patch the shader core enable mask */ -+ mask = (u32 *)&dst[group + SC0_BASE + PRFCNT_EN_MASK_OFFSET]; -+ *mask &= cli->bitmap[SHADER_HWCNT_BM]; -+ mask = (u32 *)&dst[group + SC1_BASE + PRFCNT_EN_MASK_OFFSET]; -+ *mask &= cli->bitmap[SHADER_HWCNT_BM]; -+ mask = (u32 *)&dst[group + SC2_BASE + PRFCNT_EN_MASK_OFFSET]; -+ *mask &= cli->bitmap[SHADER_HWCNT_BM]; -+ mask = (u32 *)&dst[group + SC3_BASE + PRFCNT_EN_MASK_OFFSET]; -+ *mask &= cli->bitmap[SHADER_HWCNT_BM]; ++#ifdef CONFIG_ARM64 + -+ /* patch the tiler core enable mask */ -+ mask = (u32 *)&dst[group + TILER_BASE + PRFCNT_EN_MASK_OFFSET]; -+ *mask &= cli->bitmap[TILER_HWCNT_BM]; ++#include ++#include + -+ /* patch the mmu core enable mask */ -+ mask = (u32 *)&dst[group + MMU_L2_BASE + PRFCNT_EN_MASK_OFFSET]; -+ *mask &= cli->bitmap[MMU_L2_HWCNT_BM]; ++#include + -+ /* patch the job manager enable mask */ -+ mask = (u32 *)&dst[group + JM_BASE + PRFCNT_EN_MASK_OFFSET]; -+ *mask &= cli->bitmap[JM_HWCNT_BM]; -+ } -+} ++/* __asmeq is not available on Kernel versions >= 4.20 */ ++#ifndef __asmeq ++/* ++ * This is used to ensure the compiler did actually allocate the register we ++ * asked it for some inline assembly sequences. Apparently we can't trust the ++ * compiler from one version to another so a bit of paranoia won't hurt. This ++ * string is meant to be concatenated with the inline asm string and will ++ * cause compilation to stop on mismatch. (for details, see gcc PR 15089) ++ */ ++#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t" ++#endif + -+/* This is the Midgard v5 patch function. It copies the headers for each -+ * of the defined blocks from the master kernel buffer and then patches up -+ * the performance counter enable mask for each of the blocks to exclude -+ * counters that were not requested by the client. */ -+static void patch_dump_buffer_hdr_v5( -+ struct kbase_vinstr_context *vinstr_ctx, -+ struct kbase_vinstr_client *cli) ++static noinline u64 invoke_smc_fid(u64 function_id, ++ u64 arg0, u64 arg1, u64 arg2) +{ -+ struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev; -+ u32 i, nr_l2; -+ u64 core_mask; -+ u32 *mask; -+ u8 *dst = cli->accum_buffer; -+ u8 *src = vinstr_ctx->cpu_va; -+ size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT; -+ -+ /* copy and patch job manager header */ -+ memcpy(dst, src, NR_BYTES_PER_HDR); -+ mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; -+ *mask &= cli->bitmap[JM_HWCNT_BM]; -+ dst += block_size; -+ src += block_size; ++ register u64 x0 asm("x0") = function_id; ++ register u64 x1 asm("x1") = arg0; ++ register u64 x2 asm("x2") = arg1; ++ register u64 x3 asm("x3") = arg2; + -+ /* copy and patch tiler header */ -+ memcpy(dst, src, NR_BYTES_PER_HDR); -+ mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; -+ *mask &= cli->bitmap[TILER_HWCNT_BM]; -+ dst += block_size; -+ src += block_size; ++ asm volatile( ++ __asmeq("%0", "x0") ++ __asmeq("%1", "x1") ++ __asmeq("%2", "x2") ++ __asmeq("%3", "x3") ++ "smc #0\n" ++ : "+r" (x0) ++ : "r" (x1), "r" (x2), "r" (x3)); + -+ /* copy and patch MMU/L2C headers */ -+ nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices; -+ for (i = 0; i < nr_l2; i++) { -+ memcpy(dst, src, NR_BYTES_PER_HDR); -+ mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; -+ *mask &= cli->bitmap[MMU_L2_HWCNT_BM]; -+ dst += block_size; -+ src += block_size; -+ } ++ return x0; ++} + -+ /* copy and patch shader core headers */ -+ core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; -+ while (0ull != core_mask) { -+ memcpy(dst, src, NR_BYTES_PER_HDR); -+ if (0ull != (core_mask & 1ull)) { -+ /* if block is not reserved update header */ -+ mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; -+ *mask &= cli->bitmap[SHADER_HWCNT_BM]; -+ } -+ dst += block_size; -+ src += block_size; ++u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2) ++{ ++ /* Is fast call (bit 31 set) */ ++ KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL); ++ /* bits 16-23 must be zero for fast calls */ ++ KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0); + -+ core_mask >>= 1; -+ } ++ return invoke_smc_fid(fid, arg0, arg1, arg2); +} + -+/** -+ * accum_clients - accumulate dumped hw counters for all known clients -+ * @vinstr_ctx: vinstr context -+ */ -+static void accum_clients(struct kbase_vinstr_context *vinstr_ctx) ++u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, ++ u64 arg0, u64 arg1, u64 arg2) +{ -+ struct kbase_vinstr_client *iter; -+ int v4 = 0; ++ u32 fid = 0; + -+#ifndef CONFIG_MALI_NO_MALI -+ v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4); -+#endif ++ /* Only the six bits allowed should be used. */ ++ KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0); + -+ list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) { -+ /* Don't bother accumulating clients whose hwcnt requests -+ * have not yet been honoured. */ -+ if (iter->pending) -+ continue; -+ if (v4) -+ patch_dump_buffer_hdr_v4(vinstr_ctx, iter); -+ else -+ patch_dump_buffer_hdr_v5(vinstr_ctx, iter); -+ accum_dump_buffer( -+ iter->accum_buffer, -+ vinstr_ctx->cpu_va, -+ iter->dump_size); -+ } -+ list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) { -+ /* Don't bother accumulating clients whose hwcnt requests -+ * have not yet been honoured. */ -+ if (iter->pending) -+ continue; -+ if (v4) -+ patch_dump_buffer_hdr_v4(vinstr_ctx, iter); -+ else -+ patch_dump_buffer_hdr_v5(vinstr_ctx, iter); -+ accum_dump_buffer( -+ iter->accum_buffer, -+ vinstr_ctx->cpu_va, -+ iter->dump_size); -+ } ++ fid |= SMC_FAST_CALL; /* Bit 31: Fast call */ ++ if (smc64) ++ fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */ ++ fid |= oen; /* Bit 29:24: OEN */ ++ /* Bit 23:16: Must be zero for fast calls */ ++ fid |= (function_number); /* Bit 15:0: function number */ ++ ++ return kbase_invoke_smc_fid(fid, arg0, arg1, arg2); +} + -+/*****************************************************************************/ ++#endif /* CONFIG_ARM64 */ + -+/** -+ * kbasep_vinstr_get_timestamp - return timestamp +diff --git a/drivers/gpu/arm/midgard/mali_kbase_smc.h b/drivers/gpu/arm/midgard/mali_kbase_smc.h +new file mode 100644 +index 000000000..9bff3d2e8 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_smc.h +@@ -0,0 +1,67 @@ ++/* + * -+ * Function returns timestamp value based on raw monotonic timer. Value will -+ * wrap around zero in case of overflow. ++ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * Return: timestamp value + */ -+static u64 kbasep_vinstr_get_timestamp(void) -+{ -+ struct timespec64 ts; -+ -+ ktime_get_raw_ts64(&ts); -+ return (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec; -+} + -+/** -+ * kbasep_vinstr_add_dump_request - register client's dumping request -+ * @cli: requesting client -+ * @waiting_clients: list of pending dumping requests -+ */ -+static void kbasep_vinstr_add_dump_request( -+ struct kbase_vinstr_client *cli, -+ struct list_head *waiting_clients) -+{ -+ struct kbase_vinstr_client *tmp; + -+ if (list_empty(waiting_clients)) { -+ list_add(&cli->list, waiting_clients); -+ return; -+ } -+ list_for_each_entry(tmp, waiting_clients, list) { -+ if (tmp->dump_time > cli->dump_time) { -+ list_add_tail(&cli->list, &tmp->list); -+ return; -+ } -+ } -+ list_add_tail(&cli->list, waiting_clients); -+} + -+/** -+ * kbasep_vinstr_collect_and_accumulate - collect hw counters via low level -+ * dump and accumulate them for known -+ * clients -+ * @vinstr_ctx: vinstr context -+ * @timestamp: pointer where collection timestamp will be recorded -+ * -+ * Return: zero on success -+ */ -+static int kbasep_vinstr_collect_and_accumulate( -+ struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp) -+{ -+ unsigned long flags; -+ int rcode; + -+#ifdef CONFIG_MALI_NO_MALI -+ /* The dummy model needs the CPU mapping. */ -+ gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va); -+#endif + -+ spin_lock_irqsave(&vinstr_ctx->state_lock, flags); -+ if (VINSTR_IDLE != vinstr_ctx->state) { -+ spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); -+ return -EAGAIN; -+ } else { -+ vinstr_ctx->state = VINSTR_DUMPING; -+ } -+ spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); ++#ifndef _KBASE_SMC_H_ ++#define _KBASE_SMC_H_ + -+ /* Request HW counters dump. -+ * Disable preemption to make dump timestamp more accurate. */ -+ preempt_disable(); -+ *timestamp = kbasep_vinstr_get_timestamp(); -+ rcode = kbase_instr_hwcnt_request_dump(vinstr_ctx->kctx); -+ preempt_enable(); ++#ifdef CONFIG_ARM64 + -+ if (!rcode) -+ rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx); -+ WARN_ON(rcode); ++#include + -+ spin_lock_irqsave(&vinstr_ctx->state_lock, flags); -+ switch (vinstr_ctx->state) -+ { -+ case VINSTR_SUSPENDING: -+ schedule_work(&vinstr_ctx->suspend_work); -+ break; -+ case VINSTR_DUMPING: -+ vinstr_ctx->state = VINSTR_IDLE; -+ wake_up_all(&vinstr_ctx->suspend_waitq); -+ break; -+ default: -+ break; -+ } -+ spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); ++#define SMC_FAST_CALL (1 << 31) ++#define SMC_64 (1 << 30) + -+ /* Accumulate values of collected counters. */ -+ if (!rcode) -+ accum_clients(vinstr_ctx); ++#define SMC_OEN_OFFSET 24 ++#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */ ++#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET) ++#define SMC_OEN_STD (4 << SMC_OEN_OFFSET) + -+ return rcode; -+} + +/** -+ * kbasep_vinstr_fill_dump_buffer - copy accumulated counters to empty kernel -+ * buffer -+ * @cli: requesting client -+ * @timestamp: timestamp when counters were collected -+ * @event_id: id of event that caused triggered counters collection -+ * -+ * Return: zero on success -+ */ -+static int kbasep_vinstr_fill_dump_buffer( -+ struct kbase_vinstr_client *cli, u64 timestamp, -+ enum base_hwcnt_reader_event event_id) -+{ -+ unsigned int write_idx = atomic_read(&cli->write_idx); -+ unsigned int read_idx = atomic_read(&cli->read_idx); -+ -+ struct kbase_hwcnt_reader_metadata *meta; -+ void *buffer; ++ * kbase_invoke_smc_fid - Perform a secure monitor call ++ * @fid: The SMC function to call, see SMC Calling convention. ++ * @arg0: First argument to the SMC. ++ * @arg1: Second argument to the SMC. ++ * @arg2: Third argument to the SMC. ++ * ++ * See SMC Calling Convention for details. ++ * ++ * Return: the return value from the SMC. ++ */ ++u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2); + -+ /* Check if there is a place to copy HWC block into. */ -+ if (write_idx - read_idx == cli->buffer_count) -+ return -1; -+ write_idx %= cli->buffer_count; ++/** ++ * kbase_invoke_smc_fid - Perform a secure monitor call ++ * @oen: Owning Entity number (SIP, STD etc). ++ * @function_number: The function number within the OEN. ++ * @smc64: use SMC64 calling convention instead of SMC32. ++ * @arg0: First argument to the SMC. ++ * @arg1: Second argument to the SMC. ++ * @arg2: Third argument to the SMC. ++ * ++ * See SMC Calling Convention for details. ++ * ++ * Return: the return value from the SMC call. ++ */ ++u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, ++ u64 arg0, u64 arg1, u64 arg2); + -+ /* Fill in dump buffer and its metadata. */ -+ buffer = &cli->dump_buffers[write_idx * cli->dump_size]; -+ meta = &cli->dump_buffers_meta[write_idx]; -+ meta->timestamp = timestamp; -+ meta->event_id = event_id; -+ meta->buffer_idx = write_idx; -+ memcpy(buffer, cli->accum_buffer, cli->dump_size); -+ return 0; -+} ++#endif /* CONFIG_ARM64 */ + -+/** -+ * kbasep_vinstr_fill_dump_buffer_legacy - copy accumulated counters to buffer -+ * allocated in userspace -+ * @cli: requesting client ++#endif /* _KBASE_SMC_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c +new file mode 100644 +index 000000000..396953e78 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c +@@ -0,0 +1,1549 @@ ++/* + * -+ * Return: zero on success ++ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * -+ * This is part of legacy ioctl interface. -+ */ -+static int kbasep_vinstr_fill_dump_buffer_legacy( -+ struct kbase_vinstr_client *cli) -+{ -+ void __user *buffer = cli->legacy_buffer; -+ int rcode; -+ -+ /* Copy data to user buffer. */ -+ rcode = copy_to_user(buffer, cli->accum_buffer, cli->dump_size); -+ if (rcode) -+ pr_warn("error while copying buffer to user\n"); -+ return rcode; -+} -+ -+/** -+ * kbasep_vinstr_fill_dump_buffer_kernel - copy accumulated counters to buffer -+ * allocated in kernel space -+ * @cli: requesting client ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. + * -+ * Return: zero on success ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * This is part of the kernel client interface. + */ -+static int kbasep_vinstr_fill_dump_buffer_kernel( -+ struct kbase_vinstr_client *cli) -+{ -+ memcpy(cli->kernel_buffer, cli->accum_buffer, cli->dump_size); + -+ return 0; -+} + -+/** -+ * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst -+ * @vinstr_ctx: vinstr context -+ */ -+static void kbasep_vinstr_reprogram( -+ struct kbase_vinstr_context *vinstr_ctx) -+{ -+ unsigned long flags; -+ bool suspended = false; + -+ /* Don't enable hardware counters if vinstr is suspended. */ -+ spin_lock_irqsave(&vinstr_ctx->state_lock, flags); -+ if (VINSTR_IDLE != vinstr_ctx->state) -+ suspended = true; -+ spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); -+ if (suspended) -+ return; + -+ /* Change to suspended state is done while holding vinstr context -+ * lock. Below code will then no re-enable the instrumentation. */ + -+ if (vinstr_ctx->reprogram) { -+ struct kbase_vinstr_client *iter; ++#include + -+ if (!reprogram_hwcnt(vinstr_ctx)) { -+ vinstr_ctx->reprogram = false; -+ list_for_each_entry( -+ iter, -+ &vinstr_ctx->idle_clients, -+ list) -+ iter->pending = false; -+ list_for_each_entry( -+ iter, -+ &vinstr_ctx->waiting_clients, -+ list) -+ iter->pending = false; -+ } -+ } -+} ++#if defined(CONFIG_DMA_SHARED_BUFFER) ++#include ++#include ++#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++#include ++#endif ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* Mask to check cache alignment of data structures */ ++#define KBASE_CACHE_ALIGNMENT_MASK ((1<kctx; ++ unsigned long lflags; + -+ /* Copy collected counters to user readable buffer. */ -+ if (cli->buffer_count) -+ rcode = kbasep_vinstr_fill_dump_buffer( -+ cli, timestamp, event_id); -+ else if (cli->kernel_buffer) -+ rcode = kbasep_vinstr_fill_dump_buffer_kernel(cli); -+ else -+ rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli); ++ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); ++ list_add_tail(&katom->queue, &kctx->waiting_soft_jobs); ++ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); ++} + -+ if (rcode) -+ goto exit; ++void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom) ++{ ++ struct kbase_context *kctx = katom->kctx; ++ unsigned long lflags; + ++ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); ++ list_del(&katom->queue); ++ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); ++} + -+ /* Notify client. Make sure all changes to memory are visible. */ -+ wmb(); -+ atomic_inc(&cli->write_idx); -+ wake_up_interruptible(&cli->waitq); ++static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom) ++{ ++ struct kbase_context *kctx = katom->kctx; + -+ /* Prepare for next request. */ -+ memset(cli->accum_buffer, 0, cli->dump_size); ++ /* Record the start time of this atom so we could cancel it at ++ * the right time. ++ */ ++ katom->start_timestamp = ktime_get(); + -+exit: -+ return rcode; ++ /* Add the atom to the waiting list before the timer is ++ * (re)started to make sure that it gets processed. ++ */ ++ kbasep_add_waiting_soft_job(katom); ++ ++ /* Schedule timeout of this atom after a period if it is not active */ ++ if (!timer_pending(&kctx->soft_job_timeout)) { ++ int timeout_ms = atomic_read( ++ &kctx->kbdev->js_data.soft_job_timeout_ms); ++ mod_timer(&kctx->soft_job_timeout, ++ jiffies + msecs_to_jiffies(timeout_ms)); ++ } +} + -+/** -+ * kbasep_vinstr_wake_up_callback - vinstr wake up timer wake up function -+ * -+ * @hrtimer: high resolution timer -+ * -+ * Return: High resolution timer restart enum. -+ */ -+static enum hrtimer_restart kbasep_vinstr_wake_up_callback( -+ struct hrtimer *hrtimer) ++static int kbasep_read_soft_event_status( ++ struct kbase_context *kctx, u64 evt, unsigned char *status) +{ -+ struct kbasep_vinstr_wake_up_timer *timer = -+ container_of( -+ hrtimer, -+ struct kbasep_vinstr_wake_up_timer, -+ hrtimer); ++ unsigned char *mapped_evt; ++ struct kbase_vmap_struct map; + -+ KBASE_DEBUG_ASSERT(timer); ++ mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); ++ if (!mapped_evt) ++ return -EFAULT; + -+ atomic_set(&timer->vinstr_ctx->request_pending, 1); -+ wake_up_all(&timer->vinstr_ctx->waitq); ++ *status = *mapped_evt; + -+ return HRTIMER_NORESTART; -+} ++ kbase_vunmap(kctx, &map); + -+#ifdef CONFIG_DEBUG_OBJECT_TIMERS -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0)) -+/** -+ * kbase_destroy_hrtimer_on_stack - kernel's destroy_hrtimer_on_stack(), -+ * rewritten -+ * -+ * @timer: high resolution timer -+ * -+ * destroy_hrtimer_on_stack() was exported only for 4.7.0 kernel so for -+ * earlier kernel versions it is not possible to call it explicitly. -+ * Since this function must accompany hrtimer_init_on_stack(), which -+ * has to be used for hrtimer initialization if CONFIG_DEBUG_OBJECT_TIMERS -+ * is defined in order to avoid the warning about object on stack not being -+ * annotated, we rewrite it here to be used for earlier kernel versions. -+ */ -+static void kbase_destroy_hrtimer_on_stack(struct hrtimer *timer) -+{ -+ debug_object_free(timer, &hrtimer_debug_descr); ++ return 0; +} -+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0) */ -+#endif /* CONFIG_DEBUG_OBJECT_TIMERS */ + -+/** -+ * kbasep_vinstr_service_task - HWC dumping service thread -+ * -+ * @data: Pointer to vinstr context structure. -+ * -+ * Return: Always returns zero. -+ */ -+static int kbasep_vinstr_service_task(void *data) ++static int kbasep_write_soft_event_status( ++ struct kbase_context *kctx, u64 evt, unsigned char new_status) +{ -+ struct kbase_vinstr_context *vinstr_ctx = data; -+ struct kbasep_vinstr_wake_up_timer timer; -+ -+ KBASE_DEBUG_ASSERT(vinstr_ctx); -+ -+ hrtimer_init_on_stack(&timer.hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ unsigned char *mapped_evt; ++ struct kbase_vmap_struct map; + -+ timer.hrtimer.function = kbasep_vinstr_wake_up_callback; -+ timer.vinstr_ctx = vinstr_ctx; ++ if ((new_status != BASE_JD_SOFT_EVENT_SET) && ++ (new_status != BASE_JD_SOFT_EVENT_RESET)) ++ return -EINVAL; + -+ while (!kthread_should_stop()) { -+ struct kbase_vinstr_client *cli = NULL; -+ struct kbase_vinstr_client *tmp; -+ int rcode; ++ mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); ++ if (!mapped_evt) ++ return -EFAULT; + -+ u64 timestamp = kbasep_vinstr_get_timestamp(); -+ u64 dump_time = 0; -+ struct list_head expired_requests; ++ *mapped_evt = new_status; + -+ /* Hold lock while performing operations on lists of clients. */ -+ mutex_lock(&vinstr_ctx->lock); ++ kbase_vunmap(kctx, &map); + -+ /* Closing thread must not interact with client requests. */ -+ if (current == vinstr_ctx->thread) { -+ atomic_set(&vinstr_ctx->request_pending, 0); ++ return 0; ++} + -+ if (!list_empty(&vinstr_ctx->waiting_clients)) { -+ cli = list_first_entry( -+ &vinstr_ctx->waiting_clients, -+ struct kbase_vinstr_client, -+ list); -+ dump_time = cli->dump_time; -+ } -+ } ++static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) ++{ ++ struct kbase_vmap_struct map; ++ void *user_result; ++ struct timespec64 ts; ++ struct base_dump_cpu_gpu_counters data; ++ u64 system_time; ++ u64 cycle_counter; ++ u64 jc = katom->jc; ++ struct kbase_context *kctx = katom->kctx; ++ int pm_active_err; + -+ if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) { -+ mutex_unlock(&vinstr_ctx->lock); ++ memset(&data, 0, sizeof(data)); + -+ /* Sleep until next dumping event or service request. */ -+ if (cli) { -+ u64 diff = dump_time - timestamp; ++ /* Take the PM active reference as late as possible - otherwise, it could ++ * delay suspend until we process the atom (which may be at the end of a ++ * long chain of dependencies */ ++ pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); ++ if (pm_active_err) { ++ struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; + -+ hrtimer_start( -+ &timer.hrtimer, -+ ns_to_ktime(diff), -+ HRTIMER_MODE_REL); -+ } -+ wait_event( -+ vinstr_ctx->waitq, -+ atomic_read( -+ &vinstr_ctx->request_pending) || -+ kthread_should_stop()); -+ hrtimer_cancel(&timer.hrtimer); -+ continue; -+ } ++ /* We're suspended - queue this on the list of suspended jobs ++ * Use dep_item[1], because dep_item[0] was previously in use ++ * for 'waiting_soft_jobs'. ++ */ ++ mutex_lock(&js_devdata->runpool_mutex); ++ list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list); ++ mutex_unlock(&js_devdata->runpool_mutex); + -+ rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, -+ ×tamp); ++ /* Also adding this to the list of waiting soft job */ ++ kbasep_add_waiting_soft_job(katom); + -+ INIT_LIST_HEAD(&expired_requests); ++ return pm_active_err; ++ } + -+ /* Find all expired requests. */ -+ list_for_each_entry_safe( -+ cli, -+ tmp, -+ &vinstr_ctx->waiting_clients, -+ list) { -+ s64 tdiff = -+ (s64)(timestamp + DUMPING_RESOLUTION) - -+ (s64)cli->dump_time; -+ if (tdiff >= 0ll) { -+ list_del(&cli->list); -+ list_add(&cli->list, &expired_requests); -+ } else { -+ break; -+ } -+ } ++ kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time, ++ &ts); + -+ /* Fill data for each request found. */ -+ list_for_each_entry_safe(cli, tmp, &expired_requests, list) { -+ /* Ensure that legacy buffer will not be used from -+ * this kthread context. */ -+ BUG_ON(0 == cli->buffer_count); -+ /* Expect only periodically sampled clients. */ -+ BUG_ON(0 == cli->dump_interval); ++ kbase_pm_context_idle(kctx->kbdev); + -+ if (!rcode) -+ kbasep_vinstr_update_client( -+ cli, -+ timestamp, -+ BASE_HWCNT_READER_EVENT_PERIODIC); ++ data.sec = ts.tv_sec; ++ data.usec = ts.tv_nsec / 1000; ++ data.system_time = system_time; ++ data.cycle_counter = cycle_counter; + -+ /* Set new dumping time. Drop missed probing times. */ -+ do { -+ cli->dump_time += cli->dump_interval; -+ } while (cli->dump_time < timestamp); ++ /* Assume this atom will be cancelled until we know otherwise */ ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + -+ list_del(&cli->list); -+ kbasep_vinstr_add_dump_request( -+ cli, -+ &vinstr_ctx->waiting_clients); -+ } ++ /* GPU_WR access is checked on the range for returning the result to ++ * userspace for the following reasons: ++ * - security, this is currently how imported user bufs are checked. ++ * - userspace ddk guaranteed to assume region was mapped as GPU_WR */ ++ user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map); ++ if (!user_result) ++ return 0; + -+ /* Reprogram counters set if required. */ -+ kbasep_vinstr_reprogram(vinstr_ctx); ++ memcpy(user_result, &data, sizeof(data)); + -+ mutex_unlock(&vinstr_ctx->lock); -+ } ++ kbase_vunmap(kctx, &map); + -+#ifdef CONFIG_DEBUG_OBJECTS_TIMERS -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0)) -+ kbase_destroy_hrtimer_on_stack(&timer.hrtimer); -+#else -+ destroy_hrtimer_on_stack(&timer.hrtimer); -+#endif /* (LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0)) */ -+#endif /* CONFIG_DEBUG_OBJECTS_TIMERS */ ++ /* Atom was fine - mark it as done */ ++ katom->event_code = BASE_JD_EVENT_DONE; + + return 0; +} + -+/*****************************************************************************/ -+ -+/** -+ * kbasep_vinstr_hwcnt_reader_buffer_ready - check if client has ready buffers -+ * @cli: pointer to vinstr client structure -+ * -+ * Return: non-zero if client has at least one dumping buffer filled that was -+ * not notified to user yet -+ */ -+static int kbasep_vinstr_hwcnt_reader_buffer_ready( -+ struct kbase_vinstr_client *cli) ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++/* Called by the explicit fence mechanism when a fence wait has completed */ ++void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom) +{ -+ KBASE_DEBUG_ASSERT(cli); -+ return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx); ++ struct kbase_context *kctx = katom->kctx; ++ ++ mutex_lock(&kctx->jctx.lock); ++ kbasep_remove_waiting_soft_job(katom); ++ kbase_finish_soft_job(katom); ++ if (jd_done_nolock(katom, NULL)) ++ kbase_js_sched_all(kctx->kbdev); ++ mutex_unlock(&kctx->jctx.lock); +} ++#endif + -+/** -+ * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer - hwcnt reader's ioctl command -+ * @cli: pointer to vinstr client structure -+ * @buffer: pointer to userspace buffer -+ * @size: size of buffer -+ * -+ * Return: zero on success -+ */ -+static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( -+ struct kbase_vinstr_client *cli, void __user *buffer, -+ size_t size) ++static void kbasep_soft_event_complete_job(struct work_struct *work) +{ -+ unsigned int meta_idx = atomic_read(&cli->meta_idx); -+ unsigned int idx = meta_idx % cli->buffer_count; -+ -+ struct kbase_hwcnt_reader_metadata *meta = &cli->dump_buffers_meta[idx]; ++ struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, ++ work); ++ struct kbase_context *kctx = katom->kctx; ++ int resched; + -+ /* Metadata sanity check. */ -+ KBASE_DEBUG_ASSERT(idx == meta->buffer_idx); ++ mutex_lock(&kctx->jctx.lock); ++ resched = jd_done_nolock(katom, NULL); ++ mutex_unlock(&kctx->jctx.lock); + -+ if (sizeof(struct kbase_hwcnt_reader_metadata) != size) -+ return -EINVAL; ++ if (resched) ++ kbase_js_sched_all(kctx->kbdev); ++} + -+ /* Check if there is any buffer available. */ -+ if (atomic_read(&cli->write_idx) == meta_idx) -+ return -EAGAIN; ++void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt) ++{ ++ int cancel_timer = 1; ++ struct list_head *entry, *tmp; ++ unsigned long lflags; + -+ /* Check if previously taken buffer was put back. */ -+ if (atomic_read(&cli->read_idx) != meta_idx) -+ return -EBUSY; ++ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); ++ list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { ++ struct kbase_jd_atom *katom = list_entry( ++ entry, struct kbase_jd_atom, queue); + -+ /* Copy next available buffer's metadata to user. */ -+ if (copy_to_user(buffer, meta, size)) -+ return -EFAULT; ++ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { ++ case BASE_JD_REQ_SOFT_EVENT_WAIT: ++ if (katom->jc == evt) { ++ list_del(&katom->queue); + -+ atomic_inc(&cli->meta_idx); ++ katom->event_code = BASE_JD_EVENT_DONE; ++ INIT_WORK(&katom->work, ++ kbasep_soft_event_complete_job); ++ queue_work(kctx->jctx.job_done_wq, ++ &katom->work); ++ } else { ++ /* There are still other waiting jobs, we cannot ++ * cancel the timer yet. ++ */ ++ cancel_timer = 0; ++ } ++ break; ++#ifdef CONFIG_MALI_FENCE_DEBUG ++ case BASE_JD_REQ_SOFT_FENCE_WAIT: ++ /* Keep the timer running if fence debug is enabled and ++ * there are waiting fence jobs. ++ */ ++ cancel_timer = 0; ++ break; ++#endif ++ } ++ } + -+ return 0; ++ if (cancel_timer) ++ del_timer(&kctx->soft_job_timeout); ++ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} + -+/** -+ * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer - hwcnt reader's ioctl command -+ * @cli: pointer to vinstr client structure -+ * @buffer: pointer to userspace buffer -+ * @size: size of buffer -+ * -+ * Return: zero on success -+ */ -+static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( -+ struct kbase_vinstr_client *cli, void __user *buffer, -+ size_t size) ++#ifdef CONFIG_MALI_FENCE_DEBUG ++static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom) +{ -+ unsigned int read_idx = atomic_read(&cli->read_idx); -+ unsigned int idx = read_idx % cli->buffer_count; -+ -+ struct kbase_hwcnt_reader_metadata meta; ++ struct kbase_context *kctx = katom->kctx; ++ struct device *dev = kctx->kbdev->dev; ++ int i; + -+ if (sizeof(struct kbase_hwcnt_reader_metadata) != size) -+ return -EINVAL; ++ for (i = 0; i < 2; i++) { ++ struct kbase_jd_atom *dep; + -+ /* Check if any buffer was taken. */ -+ if (atomic_read(&cli->meta_idx) == read_idx) -+ return -EPERM; ++ list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) { ++ if (dep->status == KBASE_JD_ATOM_STATE_UNUSED || ++ dep->status == KBASE_JD_ATOM_STATE_COMPLETED) ++ continue; + -+ /* Check if correct buffer is put back. */ -+ if (copy_from_user(&meta, buffer, size)) -+ return -EFAULT; -+ if (idx != meta.buffer_idx) -+ return -EINVAL; ++ if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ++ == BASE_JD_REQ_SOFT_FENCE_TRIGGER) { ++ /* Found blocked trigger fence. */ ++ struct kbase_sync_fence_info info; + -+ atomic_inc(&cli->read_idx); ++ if (!kbase_sync_fence_in_info_get(dep, &info)) { ++ dev_warn(dev, ++ "\tVictim trigger atom %d fence [%p] %s: %s\n", ++ kbase_jd_atom_id(kctx, dep), ++ info.fence, ++ info.name, ++ kbase_sync_status_string(info.status)); ++ } ++ } + -+ return 0; ++ kbase_fence_debug_check_atom(dep); ++ } ++ } +} + -+/** -+ * kbasep_vinstr_hwcnt_reader_ioctl_set_interval - hwcnt reader's ioctl command -+ * @cli: pointer to vinstr client structure -+ * @interval: periodic dumping interval (disable periodic dumping if zero) -+ * -+ * Return: zero on success -+ */ -+static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval( -+ struct kbase_vinstr_client *cli, u32 interval) ++static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom) +{ -+ struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; ++ struct kbase_context *kctx = katom->kctx; ++ struct device *dev = katom->kctx->kbdev->dev; ++ int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms); ++ unsigned long lflags; ++ struct kbase_sync_fence_info info; + -+ KBASE_DEBUG_ASSERT(vinstr_ctx); ++ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + -+ mutex_lock(&vinstr_ctx->lock); ++ if (kbase_sync_fence_in_info_get(katom, &info)) { ++ /* Fence must have signaled just after timeout. */ ++ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); ++ return; ++ } + -+ list_del(&cli->list); ++ dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n", ++ kctx->tgid, kctx->id, ++ kbase_jd_atom_id(kctx, katom), ++ info.fence, timeout_ms); ++ dev_warn(dev, "\tGuilty fence [%p] %s: %s\n", ++ info.fence, info.name, ++ kbase_sync_status_string(info.status)); + -+ cli->dump_interval = interval; ++ /* Search for blocked trigger atoms */ ++ kbase_fence_debug_check_atom(katom); + -+ /* If interval is non-zero, enable periodic dumping for this client. */ -+ if (cli->dump_interval) { -+ if (DUMPING_RESOLUTION > cli->dump_interval) -+ cli->dump_interval = DUMPING_RESOLUTION; -+ cli->dump_time = -+ kbasep_vinstr_get_timestamp() + cli->dump_interval; ++ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); + -+ kbasep_vinstr_add_dump_request( -+ cli, &vinstr_ctx->waiting_clients); ++ kbase_sync_fence_in_dump(katom); ++} + -+ atomic_set(&vinstr_ctx->request_pending, 1); -+ wake_up_all(&vinstr_ctx->waitq); -+ } else { -+ list_add(&cli->list, &vinstr_ctx->idle_clients); -+ } ++struct kbase_fence_debug_work { ++ struct kbase_jd_atom *katom; ++ struct work_struct work; ++}; + -+ mutex_unlock(&vinstr_ctx->lock); ++static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work) ++{ ++ struct kbase_fence_debug_work *w = container_of(work, ++ struct kbase_fence_debug_work, work); ++ struct kbase_jd_atom *katom = w->katom; ++ struct kbase_context *kctx = katom->kctx; + -+ return 0; ++ mutex_lock(&kctx->jctx.lock); ++ kbase_fence_debug_wait_timeout(katom); ++ mutex_unlock(&kctx->jctx.lock); ++ ++ kfree(w); +} + -+/** -+ * kbasep_vinstr_hwcnt_reader_event_mask - return event mask for event id -+ * @event_id: id of event -+ * Return: event_mask or zero if event is not supported or maskable -+ */ -+static u32 kbasep_vinstr_hwcnt_reader_event_mask( -+ enum base_hwcnt_reader_event event_id) ++static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom) +{ -+ u32 event_mask = 0; -+ -+ switch (event_id) { -+ case BASE_HWCNT_READER_EVENT_PREJOB: -+ case BASE_HWCNT_READER_EVENT_POSTJOB: -+ /* These event are maskable. */ -+ event_mask = (1 << event_id); -+ break; ++ struct kbase_fence_debug_work *work; ++ struct kbase_context *kctx = katom->kctx; + -+ case BASE_HWCNT_READER_EVENT_MANUAL: -+ case BASE_HWCNT_READER_EVENT_PERIODIC: -+ /* These event are non-maskable. */ -+ default: -+ /* These event are not supported. */ -+ break; ++ /* Enqueue fence debug worker. Use job_done_wq to get ++ * debug print ordered with job completion. ++ */ ++ work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC); ++ /* Ignore allocation failure. */ ++ if (work) { ++ work->katom = katom; ++ INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker); ++ queue_work(kctx->jctx.job_done_wq, &work->work); + } -+ -+ return event_mask; +} ++#endif /* CONFIG_MALI_FENCE_DEBUG */ + -+/** -+ * kbasep_vinstr_hwcnt_reader_ioctl_enable_event - hwcnt reader's ioctl command -+ * @cli: pointer to vinstr client structure -+ * @event_id: id of event to enable -+ * -+ * Return: zero on success -+ */ -+static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event( -+ struct kbase_vinstr_client *cli, -+ enum base_hwcnt_reader_event event_id) ++void kbasep_soft_job_timeout_worker(struct timer_list *t) +{ -+ struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; -+ u32 event_mask; ++ struct kbase_context *kctx = from_timer(kctx, t, soft_job_timeout); ++ u32 timeout_ms = (u32)atomic_read( ++ &kctx->kbdev->js_data.soft_job_timeout_ms); ++ struct timer_list *timer = &kctx->soft_job_timeout; ++ ktime_t cur_time = ktime_get(); ++ bool restarting = false; ++ unsigned long lflags; ++ struct list_head *entry, *tmp; + -+ KBASE_DEBUG_ASSERT(vinstr_ctx); ++ spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); ++ list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { ++ struct kbase_jd_atom *katom = list_entry(entry, ++ struct kbase_jd_atom, queue); ++ s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time, ++ katom->start_timestamp)); + -+ event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id); -+ if (!event_mask) -+ return -EINVAL; ++ if (elapsed_time < (s64)timeout_ms) { ++ restarting = true; ++ continue; ++ } + -+ mutex_lock(&vinstr_ctx->lock); -+ cli->event_mask |= event_mask; -+ mutex_unlock(&vinstr_ctx->lock); ++ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { ++ case BASE_JD_REQ_SOFT_EVENT_WAIT: ++ /* Take it out of the list to ensure that it ++ * will be cancelled in all cases ++ */ ++ list_del(&katom->queue); + -+ return 0; ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ INIT_WORK(&katom->work, kbasep_soft_event_complete_job); ++ queue_work(kctx->jctx.job_done_wq, &katom->work); ++ break; ++#ifdef CONFIG_MALI_FENCE_DEBUG ++ case BASE_JD_REQ_SOFT_FENCE_WAIT: ++ kbase_fence_debug_timeout(katom); ++ break; ++#endif ++ } ++ } ++ ++ if (restarting) ++ mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms)); ++ spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} + -+/** -+ * kbasep_vinstr_hwcnt_reader_ioctl_disable_event - hwcnt reader's ioctl command -+ * @cli: pointer to vinstr client structure -+ * @event_id: id of event to disable -+ * -+ * Return: zero on success -+ */ -+static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event( -+ struct kbase_vinstr_client *cli, -+ enum base_hwcnt_reader_event event_id) ++static int kbasep_soft_event_wait(struct kbase_jd_atom *katom) +{ -+ struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; -+ u32 event_mask; ++ struct kbase_context *kctx = katom->kctx; ++ unsigned char status; + -+ KBASE_DEBUG_ASSERT(vinstr_ctx); ++ /* The status of this soft-job is stored in jc */ ++ if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) { ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ return 0; ++ } + -+ event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id); -+ if (!event_mask) -+ return -EINVAL; ++ if (status == BASE_JD_SOFT_EVENT_SET) ++ return 0; /* Event already set, nothing to do */ + -+ mutex_lock(&vinstr_ctx->lock); -+ cli->event_mask &= ~event_mask; -+ mutex_unlock(&vinstr_ctx->lock); ++ kbasep_add_waiting_with_timeout(katom); + -+ return 0; ++ return 1; +} + -+/** -+ * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver - hwcnt reader's ioctl command -+ * @cli: pointer to vinstr client structure -+ * @hwver: pointer to user buffer where hw version will be stored -+ * -+ * Return: zero on success -+ */ -+static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( -+ struct kbase_vinstr_client *cli, u32 __user *hwver) ++static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom, ++ unsigned char new_status) +{ -+#ifndef CONFIG_MALI_NO_MALI -+ struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; -+#endif -+ -+ u32 ver = 5; ++ /* Complete jobs waiting on the same event */ ++ struct kbase_context *kctx = katom->kctx; + -+#ifndef CONFIG_MALI_NO_MALI -+ KBASE_DEBUG_ASSERT(vinstr_ctx); -+ if (kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4)) -+ ver = 4; -+#endif ++ if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) { ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ return; ++ } + -+ return put_user(ver, hwver); ++ if (new_status == BASE_JD_SOFT_EVENT_SET) ++ kbasep_complete_triggered_soft_events(kctx, katom->jc); +} + +/** -+ * kbasep_vinstr_hwcnt_reader_ioctl - hwcnt reader's ioctl -+ * @filp: pointer to file structure -+ * @cmd: user command -+ * @arg: command's argument ++ * kbase_soft_event_update() - Update soft event state ++ * @kctx: Pointer to context ++ * @event: Event to update ++ * @new_status: New status value of event + * -+ * Return: zero on success ++ * Update the event, and wake up any atoms waiting for the event. ++ * ++ * Return: 0 on success, a negative error code on failure. + */ -+static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp, -+ unsigned int cmd, unsigned long arg) ++int kbase_soft_event_update(struct kbase_context *kctx, ++ u64 event, ++ unsigned char new_status) +{ -+ long rcode = 0; -+ struct kbase_vinstr_client *cli; -+ -+ KBASE_DEBUG_ASSERT(filp); -+ -+ cli = filp->private_data; -+ KBASE_DEBUG_ASSERT(cli); ++ int err = 0; + -+ if (unlikely(KBASE_HWCNT_READER != _IOC_TYPE(cmd))) -+ return -EINVAL; ++ mutex_lock(&kctx->jctx.lock); + -+ switch (cmd) { -+ case KBASE_HWCNT_READER_GET_API_VERSION: -+ rcode = put_user(HWCNT_READER_API, (u32 __user *)arg); -+ break; -+ case KBASE_HWCNT_READER_GET_HWVER: -+ rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( -+ cli, (u32 __user *)arg); -+ break; -+ case KBASE_HWCNT_READER_GET_BUFFER_SIZE: -+ KBASE_DEBUG_ASSERT(cli->vinstr_ctx); -+ rcode = put_user( -+ (u32)cli->vinstr_ctx->dump_size, -+ (u32 __user *)arg); -+ break; -+ case KBASE_HWCNT_READER_DUMP: -+ rcode = kbase_vinstr_hwc_dump( -+ cli, BASE_HWCNT_READER_EVENT_MANUAL); -+ break; -+ case KBASE_HWCNT_READER_CLEAR: -+ rcode = kbase_vinstr_hwc_clear(cli); -+ break; -+ case KBASE_HWCNT_READER_GET_BUFFER: -+ rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( -+ cli, (void __user *)arg, _IOC_SIZE(cmd)); -+ break; -+ case KBASE_HWCNT_READER_PUT_BUFFER: -+ rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( -+ cli, (void __user *)arg, _IOC_SIZE(cmd)); -+ break; -+ case KBASE_HWCNT_READER_SET_INTERVAL: -+ rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval( -+ cli, (u32)arg); -+ break; -+ case KBASE_HWCNT_READER_ENABLE_EVENT: -+ rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event( -+ cli, (enum base_hwcnt_reader_event)arg); -+ break; -+ case KBASE_HWCNT_READER_DISABLE_EVENT: -+ rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event( -+ cli, (enum base_hwcnt_reader_event)arg); -+ break; -+ default: -+ rcode = -EINVAL; -+ break; ++ if (kbasep_write_soft_event_status(kctx, event, new_status)) { ++ err = -ENOENT; ++ goto out; + } + -+ return rcode; -+} -+ -+/** -+ * kbasep_vinstr_hwcnt_reader_poll - hwcnt reader's poll -+ * @filp: pointer to file structure -+ * @wait: pointer to poll table -+ * Return: POLLIN if data can be read without blocking, otherwise zero -+ */ -+static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp, -+ poll_table *wait) -+{ -+ struct kbase_vinstr_client *cli; -+ -+ KBASE_DEBUG_ASSERT(filp); -+ KBASE_DEBUG_ASSERT(wait); ++ if (new_status == BASE_JD_SOFT_EVENT_SET) ++ kbasep_complete_triggered_soft_events(kctx, event); + -+ cli = filp->private_data; -+ KBASE_DEBUG_ASSERT(cli); ++out: ++ mutex_unlock(&kctx->jctx.lock); + -+ poll_wait(filp, &cli->waitq, wait); -+ if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli)) -+ return POLLIN; -+ return 0; ++ return err; +} + -+/** -+ * kbasep_vinstr_hwcnt_reader_mmap - hwcnt reader's mmap -+ * @filp: pointer to file structure -+ * @vma: pointer to vma structure -+ * Return: zero on success -+ */ -+static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp, -+ struct vm_area_struct *vma) ++static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) +{ -+ struct kbase_vinstr_client *cli; -+ unsigned long size, addr, pfn, offset; -+ unsigned long vm_size = vma->vm_end - vma->vm_start; -+ -+ KBASE_DEBUG_ASSERT(filp); -+ KBASE_DEBUG_ASSERT(vma); ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ if (jd_done_nolock(katom, NULL)) ++ kbase_js_sched_all(katom->kctx->kbdev); ++} + -+ cli = filp->private_data; -+ KBASE_DEBUG_ASSERT(cli); ++struct kbase_debug_copy_buffer { ++ size_t size; ++ struct page **pages; ++ int nr_pages; ++ size_t offset; ++ struct kbase_mem_phy_alloc *gpu_alloc; + -+ size = cli->buffer_count * cli->dump_size; ++ struct page **extres_pages; ++ int nr_extres_pages; ++}; + -+ if (vma->vm_pgoff > (size >> PAGE_SHIFT)) -+ return -EINVAL; ++static inline void free_user_buffer(struct kbase_debug_copy_buffer *buffer) ++{ ++ struct page **pages = buffer->extres_pages; ++ int nr_pages = buffer->nr_extres_pages; + -+ offset = vma->vm_pgoff << PAGE_SHIFT; -+ if (vm_size > size - offset) -+ return -EINVAL; ++ if (pages) { ++ int i; + -+ addr = __pa((unsigned long)cli->dump_buffers + offset); -+ pfn = addr >> PAGE_SHIFT; ++ for (i = 0; i < nr_pages; i++) { ++ struct page *pg = pages[i]; + -+ return remap_pfn_range( -+ vma, -+ vma->vm_start, -+ pfn, -+ vm_size, -+ vma->vm_page_prot); ++ if (pg) ++ put_page(pg); ++ } ++ kfree(pages); ++ } +} + -+/** -+ * kbasep_vinstr_hwcnt_reader_release - hwcnt reader's release -+ * @inode: pointer to inode structure -+ * @filp: pointer to file structure -+ * Return always return zero -+ */ -+static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode, -+ struct file *filp) ++static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) +{ -+ struct kbase_vinstr_client *cli; -+ -+ KBASE_DEBUG_ASSERT(inode); -+ KBASE_DEBUG_ASSERT(filp); ++ struct kbase_debug_copy_buffer *buffers = ++ (struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc; ++ unsigned int i; ++ unsigned int nr = katom->nr_extres; + -+ cli = filp->private_data; -+ KBASE_DEBUG_ASSERT(cli); ++ if (!buffers) ++ return; + -+ kbase_vinstr_detach_client(cli); -+ return 0; -+} ++ kbase_gpu_vm_lock(katom->kctx); ++ for (i = 0; i < nr; i++) { ++ int p; ++ struct kbase_mem_phy_alloc *gpu_alloc = buffers[i].gpu_alloc; + -+/*****************************************************************************/ ++ if (!buffers[i].pages) ++ break; ++ for (p = 0; p < buffers[i].nr_pages; p++) { ++ struct page *pg = buffers[i].pages[p]; + -+/** -+ * kbasep_vinstr_kick_scheduler - trigger scheduler cycle -+ * @kbdev: pointer to kbase device structure -+ */ -+static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev) -+{ -+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; -+ unsigned long flags; ++ if (pg) ++ put_page(pg); ++ } ++ kfree(buffers[i].pages); ++ if (gpu_alloc) { ++ switch (gpu_alloc->type) { ++ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: ++ { ++ free_user_buffer(&buffers[i]); ++ break; ++ } ++ default: ++ /* Nothing to be done. */ ++ break; ++ } ++ kbase_mem_phy_alloc_put(gpu_alloc); ++ } ++ } ++ kbase_gpu_vm_unlock(katom->kctx); ++ kfree(buffers); + -+ down(&js_devdata->schedule_sem); -+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -+ kbase_backend_slot_update(kbdev); -+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -+ up(&js_devdata->schedule_sem); ++ katom->jc = 0; +} + -+/** -+ * kbasep_vinstr_suspend_worker - worker suspending vinstr module -+ * @data: pointer to work structure -+ */ -+static void kbasep_vinstr_suspend_worker(struct work_struct *data) ++static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) +{ -+ struct kbase_vinstr_context *vinstr_ctx; -+ unsigned long flags; ++ struct kbase_debug_copy_buffer *buffers; ++ struct base_jd_debug_copy_buffer *user_buffers = NULL; ++ unsigned int i; ++ unsigned int nr = katom->nr_extres; ++ int ret = 0; ++ void __user *user_structs = (void __user *)(uintptr_t)katom->jc; + -+ vinstr_ctx = container_of(data, struct kbase_vinstr_context, -+ suspend_work); ++ if (!user_structs) ++ return -EINVAL; + -+ mutex_lock(&vinstr_ctx->lock); ++ buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL); ++ if (!buffers) { ++ ret = -ENOMEM; ++ katom->jc = 0; ++ goto out_cleanup; ++ } ++ katom->jc = (u64)(uintptr_t)buffers; + -+ if (vinstr_ctx->kctx) -+ disable_hwcnt(vinstr_ctx); ++ user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL); + -+ spin_lock_irqsave(&vinstr_ctx->state_lock, flags); -+ vinstr_ctx->state = VINSTR_SUSPENDED; -+ wake_up_all(&vinstr_ctx->suspend_waitq); -+ spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); ++ if (!user_buffers) { ++ ret = -ENOMEM; ++ goto out_cleanup; ++ } + -+ mutex_unlock(&vinstr_ctx->lock); ++ ret = copy_from_user(user_buffers, user_structs, ++ sizeof(*user_buffers)*nr); ++ if (ret) ++ goto out_cleanup; + -+ /* Kick GPU scheduler to allow entering protected mode. -+ * This must happen after vinstr was suspended. */ -+ kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev); -+} ++ for (i = 0; i < nr; i++) { ++ u64 addr = user_buffers[i].address; ++ u64 page_addr = addr & PAGE_MASK; ++ u64 end_page_addr = addr + user_buffers[i].size - 1; ++ u64 last_page_addr = end_page_addr & PAGE_MASK; ++ int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1; ++ int pinned_pages; ++ struct kbase_va_region *reg; ++ struct base_external_resource user_extres; + -+/** -+ * kbasep_vinstr_suspend_worker - worker resuming vinstr module -+ * @data: pointer to work structure -+ */ -+static void kbasep_vinstr_resume_worker(struct work_struct *data) -+{ -+ struct kbase_vinstr_context *vinstr_ctx; -+ unsigned long flags; ++ if (!addr) ++ continue; + -+ vinstr_ctx = container_of(data, struct kbase_vinstr_context, -+ resume_work); ++ buffers[i].nr_pages = nr_pages; ++ buffers[i].offset = addr & ~PAGE_MASK; ++ if (buffers[i].offset >= PAGE_SIZE) { ++ ret = -EINVAL; ++ goto out_cleanup; ++ } ++ buffers[i].size = user_buffers[i].size; + -+ mutex_lock(&vinstr_ctx->lock); ++ buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *), ++ GFP_KERNEL); ++ if (!buffers[i].pages) { ++ ret = -ENOMEM; ++ goto out_cleanup; ++ } + -+ if (vinstr_ctx->kctx) -+ enable_hwcnt(vinstr_ctx); ++ pinned_pages = get_user_pages_fast(page_addr, ++ nr_pages, ++ 1, /* Write */ ++ buffers[i].pages); ++ if (pinned_pages < 0) { ++ ret = pinned_pages; ++ goto out_cleanup; ++ } ++ if (pinned_pages != nr_pages) { ++ ret = -EINVAL; ++ goto out_cleanup; ++ } + -+ spin_lock_irqsave(&vinstr_ctx->state_lock, flags); -+ vinstr_ctx->state = VINSTR_IDLE; -+ wake_up_all(&vinstr_ctx->suspend_waitq); -+ spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); ++ user_extres = user_buffers[i].extres; ++ if (user_extres.ext_resource == 0ULL) { ++ ret = -EINVAL; ++ goto out_cleanup; ++ } + -+ mutex_unlock(&vinstr_ctx->lock); ++ kbase_gpu_vm_lock(katom->kctx); ++ reg = kbase_region_tracker_find_region_enclosing_address( ++ katom->kctx, user_extres.ext_resource & ++ ~BASE_EXT_RES_ACCESS_EXCLUSIVE); + -+ /* Kick GPU scheduler to allow entering protected mode. -+ * Note that scheduler state machine might requested re-entry to -+ * protected mode before vinstr was resumed. -+ * This must happen after vinstr was release. */ -+ kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev); -+} ++ if (NULL == reg || NULL == reg->gpu_alloc || ++ (reg->flags & KBASE_REG_FREE)) { ++ ret = -EINVAL; ++ goto out_unlock; ++ } + -+/*****************************************************************************/ ++ buffers[i].gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); ++ buffers[i].nr_extres_pages = reg->nr_pages; + -+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev) -+{ -+ struct kbase_vinstr_context *vinstr_ctx; ++ if (reg->nr_pages*PAGE_SIZE != buffers[i].size) ++ dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n"); + -+ vinstr_ctx = kzalloc(sizeof(*vinstr_ctx), GFP_KERNEL); -+ if (!vinstr_ctx) -+ return NULL; ++ switch (reg->gpu_alloc->type) { ++ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: ++ { ++ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; ++ unsigned long nr_pages = ++ alloc->imported.user_buf.nr_pages; + -+ INIT_LIST_HEAD(&vinstr_ctx->idle_clients); -+ INIT_LIST_HEAD(&vinstr_ctx->waiting_clients); -+ mutex_init(&vinstr_ctx->lock); -+ spin_lock_init(&vinstr_ctx->state_lock); -+ vinstr_ctx->kbdev = kbdev; -+ vinstr_ctx->thread = NULL; -+ vinstr_ctx->state = VINSTR_IDLE; -+ vinstr_ctx->suspend_cnt = 0; -+ INIT_WORK(&vinstr_ctx->suspend_work, kbasep_vinstr_suspend_worker); -+ INIT_WORK(&vinstr_ctx->resume_work, kbasep_vinstr_resume_worker); -+ init_waitqueue_head(&vinstr_ctx->suspend_waitq); ++ if (alloc->imported.user_buf.mm != current->mm) { ++ ret = -EINVAL; ++ goto out_unlock; ++ } ++ buffers[i].extres_pages = kcalloc(nr_pages, ++ sizeof(struct page *), GFP_KERNEL); ++ if (!buffers[i].extres_pages) { ++ ret = -ENOMEM; ++ goto out_unlock; ++ } + -+ atomic_set(&vinstr_ctx->request_pending, 0); -+ init_waitqueue_head(&vinstr_ctx->waitq); ++ ret = get_user_pages_fast( ++ alloc->imported.user_buf.address, ++ nr_pages, 0, ++ buffers[i].extres_pages); ++ if (ret != nr_pages) ++ goto out_unlock; ++ ret = 0; ++ break; ++ } ++ case KBASE_MEM_TYPE_IMPORTED_UMP: ++ { ++ dev_warn(katom->kctx->kbdev->dev, ++ "UMP is not supported for debug_copy jobs\n"); ++ ret = -EINVAL; ++ goto out_unlock; ++ } ++ default: ++ /* Nothing to be done. */ ++ break; ++ } ++ kbase_gpu_vm_unlock(katom->kctx); ++ } ++ kfree(user_buffers); + -+ return vinstr_ctx; -+} ++ return ret; + -+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx) -+{ -+ struct kbase_vinstr_client *cli; ++out_unlock: ++ kbase_gpu_vm_unlock(katom->kctx); + -+ /* Stop service thread first. */ -+ if (vinstr_ctx->thread) -+ kthread_stop(vinstr_ctx->thread); ++out_cleanup: ++ kfree(buffers); ++ kfree(user_buffers); + -+ /* Wait for workers. */ -+ flush_work(&vinstr_ctx->suspend_work); -+ flush_work(&vinstr_ctx->resume_work); ++ /* Frees allocated memory for kbase_debug_copy_job struct, including ++ * members, and sets jc to 0 */ ++ kbase_debug_copy_finish(katom); ++ return ret; ++} + -+ while (1) { -+ struct list_head *list = &vinstr_ctx->idle_clients; ++static void kbase_mem_copy_from_extres_page(struct kbase_context *kctx, ++ void *extres_page, struct page **pages, unsigned int nr_pages, ++ unsigned int *target_page_nr, size_t offset, size_t *to_copy) ++{ ++ void *target_page = kmap(pages[*target_page_nr]); ++ size_t chunk = PAGE_SIZE-offset; + -+ if (list_empty(list)) { -+ list = &vinstr_ctx->waiting_clients; -+ if (list_empty(list)) -+ break; -+ } ++ lockdep_assert_held(&kctx->reg_lock); + -+ cli = list_first_entry(list, struct kbase_vinstr_client, list); -+ list_del(&cli->list); -+ kfree(cli->accum_buffer); -+ kfree(cli); -+ vinstr_ctx->nclients--; ++ if (!target_page) { ++ *target_page_nr += 1; ++ dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job."); ++ return; + } -+ KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients); -+ if (vinstr_ctx->kctx) -+ kbasep_vinstr_destroy_kctx(vinstr_ctx); -+ kfree(vinstr_ctx); -+} + -+int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx, -+ struct kbase_uk_hwcnt_reader_setup *setup) -+{ -+ struct kbase_vinstr_client *cli; -+ u32 bitmap[4]; ++ chunk = min(chunk, *to_copy); + -+ KBASE_DEBUG_ASSERT(vinstr_ctx); -+ KBASE_DEBUG_ASSERT(setup); -+ KBASE_DEBUG_ASSERT(setup->buffer_count); ++ memcpy(target_page + offset, extres_page, chunk); ++ *to_copy -= chunk; + -+ bitmap[SHADER_HWCNT_BM] = setup->shader_bm; -+ bitmap[TILER_HWCNT_BM] = setup->tiler_bm; -+ bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm; -+ bitmap[JM_HWCNT_BM] = setup->jm_bm; ++ kunmap(pages[*target_page_nr]); + -+ cli = kbasep_vinstr_attach_client( -+ vinstr_ctx, -+ setup->buffer_count, -+ bitmap, -+ &setup->fd, -+ NULL); ++ *target_page_nr += 1; ++ if (*target_page_nr >= nr_pages) ++ return; + -+ if (!cli) -+ return -ENOMEM; ++ target_page = kmap(pages[*target_page_nr]); ++ if (!target_page) { ++ *target_page_nr += 1; ++ dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job."); ++ return; ++ } + -+ return 0; ++ KBASE_DEBUG_ASSERT(target_page); ++ ++ chunk = min(offset, *to_copy); ++ memcpy(target_page, extres_page + PAGE_SIZE-offset, chunk); ++ *to_copy -= chunk; ++ ++ kunmap(pages[*target_page_nr]); +} + -+int kbase_vinstr_legacy_hwc_setup( -+ struct kbase_vinstr_context *vinstr_ctx, -+ struct kbase_vinstr_client **cli, -+ struct kbase_uk_hwcnt_setup *setup) ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) ++static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, ++ unsigned long page_num, struct page **page) +{ -+ KBASE_DEBUG_ASSERT(vinstr_ctx); -+ KBASE_DEBUG_ASSERT(setup); -+ KBASE_DEBUG_ASSERT(cli); -+ -+ if (setup->dump_buffer) { -+ u32 bitmap[4]; ++ struct sg_table *sgt = gpu_alloc->imported.umm.sgt; ++ struct sg_page_iter sg_iter; ++ unsigned long page_index = 0; + -+ bitmap[SHADER_HWCNT_BM] = setup->shader_bm; -+ bitmap[TILER_HWCNT_BM] = setup->tiler_bm; -+ bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm; -+ bitmap[JM_HWCNT_BM] = setup->jm_bm; ++ if (WARN_ON(gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)) ++ return NULL; + -+ if (*cli) -+ return -EBUSY; ++ if (!sgt) ++ return NULL; + -+ *cli = kbasep_vinstr_attach_client( -+ vinstr_ctx, -+ 0, -+ bitmap, -+ (void *)(long)setup->dump_buffer, -+ NULL); ++ if (WARN_ON(page_num >= gpu_alloc->nents)) ++ return NULL; + -+ if (!(*cli)) -+ return -ENOMEM; -+ } else { -+ if (!*cli) -+ return -EINVAL; ++ for_each_sg_page(sgt->sgl, &sg_iter, sgt->nents, 0) { ++ if (page_index == page_num) { ++ *page = sg_page_iter_page(&sg_iter); + -+ kbase_vinstr_detach_client(*cli); -+ *cli = NULL; ++ return kmap(*page); ++ } ++ page_index++; + } + -+ return 0; ++ return NULL; +} ++#endif + -+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup( -+ struct kbase_vinstr_context *vinstr_ctx, -+ struct kbase_uk_hwcnt_reader_setup *setup, -+ void *kernel_buffer) ++static int kbase_mem_copy_from_extres(struct kbase_context *kctx, ++ struct kbase_debug_copy_buffer *buf_data) +{ -+ u32 bitmap[4]; ++ unsigned int i; ++ unsigned int target_page_nr = 0; ++ struct page **pages = buf_data->pages; ++ u64 offset = buf_data->offset; ++ size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE; ++ size_t to_copy = min(extres_size, buf_data->size); ++ struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc; ++ int ret = 0; + -+ if (!vinstr_ctx || !setup || !kernel_buffer) -+ return NULL; ++ KBASE_DEBUG_ASSERT(pages != NULL); + -+ bitmap[SHADER_HWCNT_BM] = setup->shader_bm; -+ bitmap[TILER_HWCNT_BM] = setup->tiler_bm; -+ bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm; -+ bitmap[JM_HWCNT_BM] = setup->jm_bm; ++ kbase_gpu_vm_lock(kctx); ++ if (!gpu_alloc) { ++ ret = -EINVAL; ++ goto out_unlock; ++ } + -+ return kbasep_vinstr_attach_client( -+ vinstr_ctx, -+ 0, -+ bitmap, -+ NULL, -+ kernel_buffer); -+} -+KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup); ++ switch (gpu_alloc->type) { ++ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: ++ { ++ for (i = 0; i < buf_data->nr_extres_pages; i++) { ++ struct page *pg = buf_data->extres_pages[i]; ++ void *extres_page = kmap(pg); + -+int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli, -+ enum base_hwcnt_reader_event event_id) -+{ -+ int rcode = 0; -+ struct kbase_vinstr_context *vinstr_ctx; -+ u64 timestamp; -+ u32 event_mask; ++ if (extres_page) ++ kbase_mem_copy_from_extres_page(kctx, ++ extres_page, pages, ++ buf_data->nr_pages, ++ &target_page_nr, ++ offset, &to_copy); + -+ if (!cli) -+ return -EINVAL; ++ kunmap(pg); ++ if (target_page_nr >= buf_data->nr_pages) ++ break; ++ } ++ break; ++ } ++ break; ++#ifdef CONFIG_DMA_SHARED_BUFFER ++ case KBASE_MEM_TYPE_IMPORTED_UMM: { ++ struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf; + -+ vinstr_ctx = cli->vinstr_ctx; -+ KBASE_DEBUG_ASSERT(vinstr_ctx); ++ KBASE_DEBUG_ASSERT(dma_buf != NULL); ++ KBASE_DEBUG_ASSERT(dma_buf->size == ++ buf_data->nr_extres_pages * PAGE_SIZE); + -+ KBASE_DEBUG_ASSERT(event_id < BASE_HWCNT_READER_EVENT_COUNT); -+ event_mask = 1 << event_id; ++ ret = dma_buf_begin_cpu_access(dma_buf, ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS) ++ 0, buf_data->nr_extres_pages*PAGE_SIZE, ++#endif ++ DMA_FROM_DEVICE); ++ if (ret) ++ goto out_unlock; + -+ mutex_lock(&vinstr_ctx->lock); ++ for (i = 0; i < buf_data->nr_extres_pages; i++) { + -+ if (event_mask & cli->event_mask) { -+ rcode = kbasep_vinstr_collect_and_accumulate( -+ vinstr_ctx, -+ ×tamp); -+ if (rcode) -+ goto exit; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) ++ struct page *pg; ++ void *extres_page = dma_buf_kmap_page(gpu_alloc, i, &pg); ++#else ++ void *extres_page = dma_buf_kmap(dma_buf, i); ++#endif + -+ rcode = kbasep_vinstr_update_client(cli, timestamp, event_id); -+ if (rcode) -+ goto exit; ++ if (extres_page) ++ kbase_mem_copy_from_extres_page(kctx, ++ extres_page, pages, ++ buf_data->nr_pages, ++ &target_page_nr, ++ offset, &to_copy); + -+ kbasep_vinstr_reprogram(vinstr_ctx); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) ++ kunmap(pg); ++#else ++ dma_buf_kunmap(dma_buf, i, extres_page); ++#endif ++ if (target_page_nr >= buf_data->nr_pages) ++ break; ++ } ++ dma_buf_end_cpu_access(dma_buf, ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS) ++ 0, buf_data->nr_extres_pages*PAGE_SIZE, ++#endif ++ DMA_FROM_DEVICE); ++ break; + } ++#endif ++ default: ++ ret = -EINVAL; ++ } ++out_unlock: ++ kbase_gpu_vm_unlock(kctx); ++ return ret; + -+exit: -+ mutex_unlock(&vinstr_ctx->lock); -+ -+ return rcode; +} -+KBASE_EXPORT_TEST_API(kbase_vinstr_hwc_dump); + -+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli) ++static int kbase_debug_copy(struct kbase_jd_atom *katom) +{ -+ struct kbase_vinstr_context *vinstr_ctx; -+ int rcode; -+ u64 unused; -+ -+ if (!cli) -+ return -EINVAL; -+ -+ vinstr_ctx = cli->vinstr_ctx; -+ KBASE_DEBUG_ASSERT(vinstr_ctx); -+ -+ mutex_lock(&vinstr_ctx->lock); -+ -+ rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused); -+ if (rcode) -+ goto exit; -+ rcode = kbase_instr_hwcnt_clear(vinstr_ctx->kctx); -+ if (rcode) -+ goto exit; -+ memset(cli->accum_buffer, 0, cli->dump_size); ++ struct kbase_debug_copy_buffer *buffers = ++ (struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc; ++ unsigned int i; + -+ kbasep_vinstr_reprogram(vinstr_ctx); ++ for (i = 0; i < katom->nr_extres; i++) { ++ int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]); + -+exit: -+ mutex_unlock(&vinstr_ctx->lock); ++ if (res) ++ return res; ++ } + -+ return rcode; ++ return 0; +} + -+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx) ++static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) +{ -+ unsigned long flags; -+ int ret = -EAGAIN; ++ __user void *data = (__user void *)(uintptr_t) katom->jc; ++ struct base_jit_alloc_info *info; ++ struct kbase_context *kctx = katom->kctx; ++ int ret; + -+ KBASE_DEBUG_ASSERT(vinstr_ctx); ++ /* Fail the job if there is no info structure */ ++ if (!data) { ++ ret = -EINVAL; ++ goto fail; ++ } + -+ spin_lock_irqsave(&vinstr_ctx->state_lock, flags); -+ switch (vinstr_ctx->state) { -+ case VINSTR_SUSPENDED: -+ vinstr_ctx->suspend_cnt++; -+ /* overflow shall not happen */ -+ BUG_ON(0 == vinstr_ctx->suspend_cnt); -+ ret = 0; -+ break; ++ /* Copy the information for safe access and future storage */ ++ info = kzalloc(sizeof(*info), GFP_KERNEL); ++ if (!info) { ++ ret = -ENOMEM; ++ goto fail; ++ } + -+ case VINSTR_IDLE: -+ vinstr_ctx->state = VINSTR_SUSPENDING; -+ schedule_work(&vinstr_ctx->suspend_work); -+ break; ++ if (copy_from_user(info, data, sizeof(*info)) != 0) { ++ ret = -EINVAL; ++ goto free_info; ++ } + -+ case VINSTR_DUMPING: -+ vinstr_ctx->state = VINSTR_SUSPENDING; -+ break; ++ /* If the ID is zero then fail the job */ ++ if (info->id == 0) { ++ ret = -EINVAL; ++ goto free_info; ++ } + -+ case VINSTR_SUSPENDING: -+ /* fall through */ -+ case VINSTR_RESUMING: -+ break; ++ /* Sanity check that the PA fits within the VA */ ++ if (info->va_pages < info->commit_pages) { ++ ret = -EINVAL; ++ goto free_info; ++ } + -+ default: -+ BUG(); -+ break; ++ /* Ensure the GPU address is correctly aligned */ ++ if ((info->gpu_alloc_addr & 0x7) != 0) { ++ ret = -EINVAL; ++ goto free_info; + } -+ spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + -+ return ret; -+} ++ /* Replace the user pointer with our kernel allocated info structure */ ++ katom->jc = (u64)(uintptr_t) info; ++ katom->jit_blocked = false; + -+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx) -+{ -+ wait_event(vinstr_ctx->suspend_waitq, -+ (0 == kbase_vinstr_try_suspend(vinstr_ctx))); -+} ++ lockdep_assert_held(&kctx->jctx.lock); ++ list_add_tail(&katom->jit_node, &kctx->jit_atoms_head); + -+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx) ++ /* ++ * Note: ++ * The provided info->gpu_alloc_addr isn't validated here as ++ * userland can cache allocations which means that even ++ * though the region is valid it doesn't represent the ++ * same thing it used to. ++ * ++ * Complete validation of va_pages, commit_pages and extent ++ * isn't done here as it will be done during the call to ++ * kbase_mem_alloc. ++ */ ++ return 0; ++ ++free_info: ++ kfree(info); ++fail: ++ katom->jc = 0; ++ return ret; ++} ++ ++static u8 kbase_jit_free_get_id(struct kbase_jd_atom *katom) +{ -+ unsigned long flags; ++ if (WARN_ON(katom->core_req != BASE_JD_REQ_SOFT_JIT_FREE)) ++ return 0; + -+ KBASE_DEBUG_ASSERT(vinstr_ctx); ++ return (u8) katom->jc; ++} + -+ spin_lock_irqsave(&vinstr_ctx->state_lock, flags); -+ BUG_ON(VINSTR_SUSPENDING == vinstr_ctx->state); -+ if (VINSTR_SUSPENDED == vinstr_ctx->state) { -+ BUG_ON(0 == vinstr_ctx->suspend_cnt); -+ vinstr_ctx->suspend_cnt--; -+ if (0 == vinstr_ctx->suspend_cnt) { -+ vinstr_ctx->state = VINSTR_RESUMING; -+ schedule_work(&vinstr_ctx->resume_work); ++static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) ++{ ++ struct kbase_context *kctx = katom->kctx; ++ struct base_jit_alloc_info *info; ++ struct kbase_va_region *reg; ++ struct kbase_vmap_struct mapping; ++ u64 *ptr, new_addr; ++ ++ if (katom->jit_blocked) { ++ list_del(&katom->queue); ++ katom->jit_blocked = false; ++ } ++ ++ info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc; ++ ++ /* The JIT ID is still in use so fail the allocation */ ++ if (kctx->jit_alloc[info->id]) { ++ katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; ++ return 0; ++ } ++ ++ /* Create a JIT allocation */ ++ reg = kbase_jit_allocate(kctx, info); ++ if (!reg) { ++ struct kbase_jd_atom *jit_atom; ++ bool can_block = false; ++ ++ lockdep_assert_held(&kctx->jctx.lock); ++ ++ jit_atom = list_first_entry(&kctx->jit_atoms_head, ++ struct kbase_jd_atom, jit_node); ++ ++ list_for_each_entry(jit_atom, &kctx->jit_atoms_head, jit_node) { ++ if (jit_atom == katom) ++ break; ++ if (jit_atom->core_req == BASE_JD_REQ_SOFT_JIT_FREE) { ++ u8 free_id = kbase_jit_free_get_id(jit_atom); ++ ++ if (free_id && kctx->jit_alloc[free_id]) { ++ /* A JIT free which is active and ++ * submitted before this atom ++ */ ++ can_block = true; ++ break; ++ } ++ } ++ } ++ ++ if (!can_block) { ++ /* Mark the allocation so we know it's in use even if ++ * the allocation itself fails. ++ */ ++ kctx->jit_alloc[info->id] = ++ (struct kbase_va_region *) -1; ++ ++ katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; ++ return 0; + } ++ ++ /* There are pending frees for an active allocation ++ * so we should wait to see whether they free the memory. ++ * Add to the beginning of the list to ensure that the atom is ++ * processed only once in kbase_jit_free_finish ++ */ ++ list_add(&katom->queue, &kctx->jit_pending_alloc); ++ katom->jit_blocked = true; ++ ++ return 1; + } -+ spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); ++ ++ /* ++ * Write the address of the JIT allocation to the user provided ++ * GPU allocation. ++ */ ++ ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr), ++ &mapping); ++ if (!ptr) { ++ /* ++ * Leave the allocation "live" as the JIT free jit will be ++ * submitted anyway. ++ */ ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ return 0; ++ } ++ ++ new_addr = reg->start_pfn << PAGE_SHIFT; ++ *ptr = new_addr; ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT( ++ katom, info->gpu_alloc_addr, new_addr); ++ kbase_vunmap(kctx, &mapping); ++ ++ katom->event_code = BASE_JD_EVENT_DONE; ++ ++ /* ++ * Bind it to the user provided ID. Do this last so we can check for ++ * the JIT free racing this JIT alloc job. ++ */ ++ kctx->jit_alloc[info->id] = reg; ++ ++ return 0; +} -diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h -new file mode 100644 -index 000000000..6207d25ae ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h -@@ -0,0 +1,155 @@ -+/* -+ * -+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ + ++static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom) ++{ ++ struct base_jit_alloc_info *info; + ++ lockdep_assert_held(&katom->kctx->jctx.lock); + -+#ifndef _KBASE_VINSTR_H_ -+#define _KBASE_VINSTR_H_ ++ /* Remove atom from jit_atoms_head list */ ++ list_del(&katom->jit_node); + -+#include -+#include ++ if (katom->jit_blocked) { ++ list_del(&katom->queue); ++ katom->jit_blocked = false; ++ } + -+/*****************************************************************************/ ++ info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc; ++ /* Free the info structure */ ++ kfree(info); ++} + -+struct kbase_vinstr_context; -+struct kbase_vinstr_client; ++static int kbase_jit_free_prepare(struct kbase_jd_atom *katom) ++{ ++ struct kbase_context *kctx = katom->kctx; + -+/*****************************************************************************/ ++ lockdep_assert_held(&kctx->jctx.lock); ++ list_add_tail(&katom->jit_node, &kctx->jit_atoms_head); + -+/** -+ * kbase_vinstr_init() - initialize the vinstr core -+ * @kbdev: kbase device -+ * -+ * Return: pointer to the vinstr context on success or NULL on failure -+ */ -+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev); ++ return 0; ++} + -+/** -+ * kbase_vinstr_term() - terminate the vinstr core -+ * @vinstr_ctx: vinstr context -+ */ -+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx); ++static void kbase_jit_free_process(struct kbase_jd_atom *katom) ++{ ++ struct kbase_context *kctx = katom->kctx; ++ u8 id = kbase_jit_free_get_id(katom); + -+/** -+ * kbase_vinstr_hwcnt_reader_setup - configure hw counters reader -+ * @vinstr_ctx: vinstr context -+ * @setup: reader's configuration -+ * -+ * Return: zero on success -+ */ -+int kbase_vinstr_hwcnt_reader_setup( -+ struct kbase_vinstr_context *vinstr_ctx, -+ struct kbase_uk_hwcnt_reader_setup *setup); ++ /* ++ * If the ID is zero or it is not in use yet then fail the job. ++ */ ++ if ((id == 0) || (kctx->jit_alloc[id] == NULL)) { ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ return; ++ } + -+/** -+ * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping -+ * @vinstr_ctx: vinstr context -+ * @cli: pointer where to store pointer to new vinstr client structure -+ * @setup: hwc configuration -+ * -+ * Return: zero on success -+ */ -+int kbase_vinstr_legacy_hwc_setup( -+ struct kbase_vinstr_context *vinstr_ctx, -+ struct kbase_vinstr_client **cli, -+ struct kbase_uk_hwcnt_setup *setup); ++ /* ++ * If the ID is valid but the allocation request failed still succeed ++ * this soft job but don't try and free the allocation. ++ */ ++ if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1) ++ kbase_jit_free(kctx, kctx->jit_alloc[id]); + -+/** -+ * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side -+ * client -+ * @vinstr_ctx: vinstr context -+ * @setup: reader's configuration -+ * @kernel_buffer: pointer to dump buffer -+ * -+ * setup->buffer_count and setup->fd are not used for kernel side clients. -+ * -+ * Return: pointer to client structure, or NULL on failure -+ */ -+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup( -+ struct kbase_vinstr_context *vinstr_ctx, -+ struct kbase_uk_hwcnt_reader_setup *setup, -+ void *kernel_buffer); ++ kctx->jit_alloc[id] = NULL; ++} + -+/** -+ * kbase_vinstr_hwc_dump - issue counter dump for vinstr client -+ * @cli: pointer to vinstr client -+ * @event_id: id of event that triggered hwcnt dump -+ * -+ * Return: zero on success -+ */ -+int kbase_vinstr_hwc_dump( -+ struct kbase_vinstr_client *cli, -+ enum base_hwcnt_reader_event event_id); ++static void kbasep_jit_free_finish_worker(struct work_struct *work) ++{ ++ struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, ++ work); ++ struct kbase_context *kctx = katom->kctx; ++ int resched; + -+/** -+ * kbase_vinstr_hwc_clear - performs a reset of the hardware counters for -+ * a given kbase context -+ * @cli: pointer to vinstr client -+ * -+ * Return: zero on success -+ */ -+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli); ++ mutex_lock(&kctx->jctx.lock); ++ kbase_finish_soft_job(katom); ++ resched = jd_done_nolock(katom, NULL); ++ mutex_unlock(&kctx->jctx.lock); + -+/** -+ * kbase_vinstr_try_suspend - try suspending operation of a given vinstr context -+ * @vinstr_ctx: vinstr context -+ * -+ * Return: 0 on success, or negative if state change is in progress -+ * -+ * Warning: This API call is non-generic. It is meant to be used only by -+ * job scheduler state machine. -+ * -+ * Function initiates vinstr switch to suspended state. Once it was called -+ * vinstr enters suspending state. If function return non-zero value, it -+ * indicates that state switch is not complete and function must be called -+ * again. On state switch vinstr will trigger job scheduler state machine -+ * cycle. -+ */ -+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx); ++ if (resched) ++ kbase_js_sched_all(kctx->kbdev); ++} + -+/** -+ * kbase_vinstr_suspend - suspends operation of a given vinstr context -+ * @vinstr_ctx: vinstr context -+ * -+ * Function initiates vinstr switch to suspended state. Then it blocks until -+ * operation is completed. -+ */ -+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx); ++static void kbase_jit_free_finish(struct kbase_jd_atom *katom) ++{ ++ struct list_head *i, *tmp; ++ struct kbase_context *kctx = katom->kctx; + -+/** -+ * kbase_vinstr_resume - resumes operation of a given vinstr context -+ * @vinstr_ctx: vinstr context -+ * -+ * Function can be called only if it was preceded by a successful call -+ * to kbase_vinstr_suspend. -+ */ -+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx); ++ lockdep_assert_held(&kctx->jctx.lock); ++ /* Remove this atom from the kctx->jit_atoms_head list */ ++ list_del(&katom->jit_node); + -+/** -+ * kbase_vinstr_dump_size - Return required size of dump buffer -+ * @kbdev: device pointer -+ * -+ * Return : buffer size in bytes -+ */ -+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev); ++ list_for_each_safe(i, tmp, &kctx->jit_pending_alloc) { ++ struct kbase_jd_atom *pending_atom = list_entry(i, ++ struct kbase_jd_atom, queue); ++ if (kbase_jit_allocate_process(pending_atom) == 0) { ++ /* Atom has completed */ ++ INIT_WORK(&pending_atom->work, ++ kbasep_jit_free_finish_worker); ++ queue_work(kctx->jctx.job_done_wq, &pending_atom->work); ++ } ++ } ++} + -+/** -+ * kbase_vinstr_detach_client - Detach a client from the vinstr core -+ * @cli: pointer to vinstr client -+ */ -+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli); ++static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) ++{ ++ __user struct base_external_resource_list *user_ext_res; ++ struct base_external_resource_list *ext_res; ++ u64 count = 0; ++ size_t copy_size; ++ int ret; + -+#endif /* _KBASE_VINSTR_H_ */ ++ user_ext_res = (__user struct base_external_resource_list *) ++ (uintptr_t) katom->jc; + -diff --git a/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h ++ /* Fail the job if there is no info structure */ ++ if (!user_ext_res) { ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) { ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ /* Is the number of external resources in range? */ ++ if (!count || count > BASE_EXT_RES_COUNT_MAX) { ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ /* Copy the information for safe access and future storage */ ++ copy_size = sizeof(*ext_res); ++ copy_size += sizeof(struct base_external_resource) * (count - 1); ++ ext_res = kzalloc(copy_size, GFP_KERNEL); ++ if (!ext_res) { ++ ret = -ENOMEM; ++ goto fail; ++ } ++ ++ if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) { ++ ret = -EINVAL; ++ goto free_info; ++ } ++ ++ /* ++ * Overwrite the count with the first value incase it was changed ++ * after the fact. ++ */ ++ ext_res->count = count; ++ ++ /* ++ * Replace the user pointer with our kernel allocated ++ * ext_res structure. ++ */ ++ katom->jc = (u64)(uintptr_t) ext_res; ++ ++ return 0; ++ ++free_info: ++ kfree(ext_res); ++fail: ++ return ret; ++} ++ ++static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) ++{ ++ struct base_external_resource_list *ext_res; ++ int i; ++ bool failed = false; ++ ++ ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc; ++ if (!ext_res) ++ goto failed_jc; ++ ++ kbase_gpu_vm_lock(katom->kctx); ++ ++ for (i = 0; i < ext_res->count; i++) { ++ u64 gpu_addr; ++ ++ gpu_addr = ext_res->ext_res[i].ext_resource & ++ ~BASE_EXT_RES_ACCESS_EXCLUSIVE; ++ if (map) { ++ if (!kbase_sticky_resource_acquire(katom->kctx, ++ gpu_addr)) ++ goto failed_loop; ++ } else ++ if (!kbase_sticky_resource_release(katom->kctx, NULL, ++ gpu_addr)) ++ failed = true; ++ } ++ ++ /* ++ * In the case of unmap we continue unmapping other resources in the ++ * case of failure but will always report failure if _any_ unmap ++ * request fails. ++ */ ++ if (failed) ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ else ++ katom->event_code = BASE_JD_EVENT_DONE; ++ ++ kbase_gpu_vm_unlock(katom->kctx); ++ ++ return; ++ ++failed_loop: ++ while (--i > 0) { ++ u64 gpu_addr; ++ ++ gpu_addr = ext_res->ext_res[i].ext_resource & ++ ~BASE_EXT_RES_ACCESS_EXCLUSIVE; ++ ++ kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr); ++ } ++ ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ kbase_gpu_vm_unlock(katom->kctx); ++ ++failed_jc: ++ return; ++} ++ ++static void kbase_ext_res_finish(struct kbase_jd_atom *katom) ++{ ++ struct base_external_resource_list *ext_res; ++ ++ ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc; ++ /* Free the info structure */ ++ kfree(ext_res); ++} ++ ++int kbase_process_soft_job(struct kbase_jd_atom *katom) ++{ ++ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { ++ case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: ++ return kbase_dump_cpu_gpu_time(katom); ++ ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ case BASE_JD_REQ_SOFT_FENCE_TRIGGER: ++ katom->event_code = kbase_sync_fence_out_trigger(katom, ++ katom->event_code == BASE_JD_EVENT_DONE ? ++ 0 : -EFAULT); ++ break; ++ case BASE_JD_REQ_SOFT_FENCE_WAIT: ++ { ++ int ret = kbase_sync_fence_in_wait(katom); ++ ++ if (ret == 1) { ++#ifdef CONFIG_MALI_FENCE_DEBUG ++ kbasep_add_waiting_with_timeout(katom); ++#else ++ kbasep_add_waiting_soft_job(katom); ++#endif ++ } ++ return ret; ++ } ++#endif ++ ++ case BASE_JD_REQ_SOFT_REPLAY: ++ return kbase_replay_process(katom); ++ case BASE_JD_REQ_SOFT_EVENT_WAIT: ++ return kbasep_soft_event_wait(katom); ++ case BASE_JD_REQ_SOFT_EVENT_SET: ++ kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET); ++ break; ++ case BASE_JD_REQ_SOFT_EVENT_RESET: ++ kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET); ++ break; ++ case BASE_JD_REQ_SOFT_DEBUG_COPY: ++ { ++ int res = kbase_debug_copy(katom); ++ ++ if (res) ++ katom->event_code = BASE_JD_EVENT_JOB_INVALID; ++ break; ++ } ++ case BASE_JD_REQ_SOFT_JIT_ALLOC: ++ return kbase_jit_allocate_process(katom); ++ case BASE_JD_REQ_SOFT_JIT_FREE: ++ kbase_jit_free_process(katom); ++ break; ++ case BASE_JD_REQ_SOFT_EXT_RES_MAP: ++ kbase_ext_res_process(katom, true); ++ break; ++ case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: ++ kbase_ext_res_process(katom, false); ++ break; ++ } ++ ++ /* Atom is complete */ ++ return 0; ++} ++ ++void kbase_cancel_soft_job(struct kbase_jd_atom *katom) ++{ ++ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ case BASE_JD_REQ_SOFT_FENCE_WAIT: ++ kbase_sync_fence_in_cancel_wait(katom); ++ break; ++#endif ++ case BASE_JD_REQ_SOFT_EVENT_WAIT: ++ kbasep_soft_event_cancel_job(katom); ++ break; ++ default: ++ /* This soft-job doesn't support cancellation! */ ++ KBASE_DEBUG_ASSERT(0); ++ } ++} ++ ++int kbase_prepare_soft_job(struct kbase_jd_atom *katom) ++{ ++ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { ++ case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: ++ { ++ if (0 != (katom->jc & KBASE_CACHE_ALIGNMENT_MASK)) ++ return -EINVAL; ++ } ++ break; ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ case BASE_JD_REQ_SOFT_FENCE_TRIGGER: ++ { ++ struct base_fence fence; ++ int fd; ++ ++ if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence))) ++ return -EINVAL; ++ ++ fd = kbase_sync_fence_out_create(katom, ++ fence.basep.stream_fd); ++ if (fd < 0) ++ return -EINVAL; ++ ++ fence.basep.fd = fd; ++ if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) { ++ kbase_sync_fence_out_remove(katom); ++ kbase_sync_fence_close_fd(fd); ++ fence.basep.fd = -EINVAL; ++ return -EINVAL; ++ } ++ } ++ break; ++ case BASE_JD_REQ_SOFT_FENCE_WAIT: ++ { ++ struct base_fence fence; ++ int ret; ++ ++ if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence))) ++ return -EINVAL; ++ ++ /* Get a reference to the fence object */ ++ ret = kbase_sync_fence_in_from_fd(katom, ++ fence.basep.fd); ++ if (ret < 0) ++ return ret; ++ ++#ifdef CONFIG_MALI_DMA_FENCE ++ /* ++ * Set KCTX_NO_IMPLICIT_FENCE in the context the first ++ * time a soft fence wait job is observed. This will ++ * prevent the implicit dma-buf fence to conflict with ++ * the Android native sync fences. ++ */ ++ if (!kbase_ctx_flag(katom->kctx, KCTX_NO_IMPLICIT_SYNC)) ++ kbase_ctx_flag_set(katom->kctx, KCTX_NO_IMPLICIT_SYNC); ++#endif /* CONFIG_MALI_DMA_FENCE */ ++ } ++ break; ++#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ ++ case BASE_JD_REQ_SOFT_JIT_ALLOC: ++ return kbase_jit_allocate_prepare(katom); ++ case BASE_JD_REQ_SOFT_REPLAY: ++ break; ++ case BASE_JD_REQ_SOFT_JIT_FREE: ++ return kbase_jit_free_prepare(katom); ++ case BASE_JD_REQ_SOFT_EVENT_WAIT: ++ case BASE_JD_REQ_SOFT_EVENT_SET: ++ case BASE_JD_REQ_SOFT_EVENT_RESET: ++ if (katom->jc == 0) ++ return -EINVAL; ++ break; ++ case BASE_JD_REQ_SOFT_DEBUG_COPY: ++ return kbase_debug_copy_prepare(katom); ++ case BASE_JD_REQ_SOFT_EXT_RES_MAP: ++ return kbase_ext_res_prepare(katom); ++ case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: ++ return kbase_ext_res_prepare(katom); ++ default: ++ /* Unsupported soft-job */ ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++void kbase_finish_soft_job(struct kbase_jd_atom *katom) ++{ ++ switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { ++ case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: ++ /* Nothing to do */ ++ break; ++#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) ++ case BASE_JD_REQ_SOFT_FENCE_TRIGGER: ++ /* If fence has not yet been signaled, do it now */ ++ kbase_sync_fence_out_trigger(katom, katom->event_code == ++ BASE_JD_EVENT_DONE ? 0 : -EFAULT); ++ break; ++ case BASE_JD_REQ_SOFT_FENCE_WAIT: ++ /* Release katom's reference to fence object */ ++ kbase_sync_fence_in_remove(katom); ++ break; ++#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ ++ case BASE_JD_REQ_SOFT_DEBUG_COPY: ++ kbase_debug_copy_finish(katom); ++ break; ++ case BASE_JD_REQ_SOFT_JIT_ALLOC: ++ kbase_jit_allocate_finish(katom); ++ break; ++ case BASE_JD_REQ_SOFT_EXT_RES_MAP: ++ kbase_ext_res_finish(katom); ++ break; ++ case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: ++ kbase_ext_res_finish(katom); ++ break; ++ case BASE_JD_REQ_SOFT_JIT_FREE: ++ kbase_jit_free_finish(katom); ++ break; ++ } ++} ++ ++void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) ++{ ++ LIST_HEAD(local_suspended_soft_jobs); ++ struct kbase_jd_atom *tmp_iter; ++ struct kbase_jd_atom *katom_iter; ++ struct kbasep_js_device_data *js_devdata; ++ bool resched = false; ++ ++ KBASE_DEBUG_ASSERT(kbdev); ++ ++ js_devdata = &kbdev->js_data; ++ ++ /* Move out the entire list */ ++ mutex_lock(&js_devdata->runpool_mutex); ++ list_splice_init(&js_devdata->suspended_soft_jobs_list, ++ &local_suspended_soft_jobs); ++ mutex_unlock(&js_devdata->runpool_mutex); ++ ++ /* ++ * Each atom must be detached from the list and ran separately - ++ * it could be re-added to the old list, but this is unlikely ++ */ ++ list_for_each_entry_safe(katom_iter, tmp_iter, ++ &local_suspended_soft_jobs, dep_item[1]) { ++ struct kbase_context *kctx = katom_iter->kctx; ++ ++ mutex_lock(&kctx->jctx.lock); ++ ++ /* Remove from the global list */ ++ list_del(&katom_iter->dep_item[1]); ++ /* Remove from the context's list of waiting soft jobs */ ++ kbasep_remove_waiting_soft_job(katom_iter); ++ ++ if (kbase_process_soft_job(katom_iter) == 0) { ++ kbase_finish_soft_job(katom_iter); ++ resched |= jd_done_nolock(katom_iter, NULL); ++ } else { ++ KBASE_DEBUG_ASSERT((katom_iter->core_req & ++ BASE_JD_REQ_SOFT_JOB_TYPE) ++ != BASE_JD_REQ_SOFT_REPLAY); ++ } ++ ++ mutex_unlock(&kctx->jctx.lock); ++ } ++ ++ if (resched) ++ kbase_js_sched_all(kbdev); ++} +diff --git a/drivers/gpu/arm/midgard/mali_kbase_strings.c b/drivers/gpu/arm/midgard/mali_kbase_strings.c new file mode 100644 -index 000000000..5d6b4021d +index 000000000..c98762cec --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h -@@ -0,0 +1,201 @@ -+/* ++++ b/drivers/gpu/arm/midgard/mali_kbase_strings.c +@@ -0,0 +1,23 @@ ++ /* + * -+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -400797,200 +401611,47 @@ index 000000000..5d6b4021d + */ + + ++#include "mali_kbase_strings.h" + ++#define KBASE_DRV_NAME "mali" ++#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline" + -+#if !defined(_TRACE_MALI_KBASE_H) || defined(TRACE_HEADER_MULTI_READ) -+#define _TRACE_MALI_KBASE_H -+ -+#undef TRACE_SYSTEM -+#define TRACE_SYSTEM mali -+ -+#include -+ -+DECLARE_EVENT_CLASS(mali_slot_template, -+ TP_PROTO(int jobslot, unsigned int info_val), -+ TP_ARGS(jobslot, info_val), -+ TP_STRUCT__entry( -+ __field(unsigned int, jobslot) -+ __field(unsigned int, info_val) -+ ), -+ TP_fast_assign( -+ __entry->jobslot = jobslot; -+ __entry->info_val = info_val; -+ ), -+ TP_printk("jobslot=%u info=%u", __entry->jobslot, __entry->info_val) -+); -+ -+#define DEFINE_MALI_SLOT_EVENT(name) \ -+DEFINE_EVENT(mali_slot_template, mali_##name, \ -+ TP_PROTO(int jobslot, unsigned int info_val), \ -+ TP_ARGS(jobslot, info_val)) -+DEFINE_MALI_SLOT_EVENT(JM_SUBMIT); -+DEFINE_MALI_SLOT_EVENT(JM_JOB_DONE); -+DEFINE_MALI_SLOT_EVENT(JM_UPDATE_HEAD); -+DEFINE_MALI_SLOT_EVENT(JM_CHECK_HEAD); -+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP); -+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_0); -+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_1); -+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP); -+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_0); -+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_1); -+DEFINE_MALI_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP); -+DEFINE_MALI_SLOT_EVENT(JM_SLOT_EVICT); -+DEFINE_MALI_SLOT_EVENT(JM_BEGIN_RESET_WORKER); -+DEFINE_MALI_SLOT_EVENT(JM_END_RESET_WORKER); -+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED); -+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED); -+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_CURRENT); -+DEFINE_MALI_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB); -+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED); -+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED); -+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED); -+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE); -+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB); -+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED); -+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB); -+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ); -+#undef DEFINE_MALI_SLOT_EVENT -+ -+DECLARE_EVENT_CLASS(mali_refcount_template, -+ TP_PROTO(int refcount, unsigned int info_val), -+ TP_ARGS(refcount, info_val), -+ TP_STRUCT__entry( -+ __field(unsigned int, refcount) -+ __field(unsigned int, info_val) -+ ), -+ TP_fast_assign( -+ __entry->refcount = refcount; -+ __entry->info_val = info_val; -+ ), -+ TP_printk("refcount=%u info=%u", __entry->refcount, __entry->info_val) -+); -+ -+#define DEFINE_MALI_REFCOUNT_EVENT(name) \ -+DEFINE_EVENT(mali_refcount_template, mali_##name, \ -+ TP_PROTO(int refcount, unsigned int info_val), \ -+ TP_ARGS(refcount, info_val)) -+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX_NOLOCK); -+DEFINE_MALI_REFCOUNT_EVENT(JS_ADD_JOB); -+DEFINE_MALI_REFCOUNT_EVENT(JS_REMOVE_JOB); -+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX); -+DEFINE_MALI_REFCOUNT_EVENT(JS_RELEASE_CTX); -+DEFINE_MALI_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX); -+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_INIT_CTX); -+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TERM_CTX); -+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX); -+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX); -+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX); -+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX); -+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX); -+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS); -+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_ACTIVE); -+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_IDLE); -+#undef DEFINE_MALI_REFCOUNT_EVENT -+ -+DECLARE_EVENT_CLASS(mali_add_template, -+ TP_PROTO(int gpu_addr, unsigned int info_val), -+ TP_ARGS(gpu_addr, info_val), -+ TP_STRUCT__entry( -+ __field(unsigned int, gpu_addr) -+ __field(unsigned int, info_val) -+ ), -+ TP_fast_assign( -+ __entry->gpu_addr = gpu_addr; -+ __entry->info_val = info_val; -+ ), -+ TP_printk("gpu_addr=%u info=%u", __entry->gpu_addr, __entry->info_val) -+); -+ -+#define DEFINE_MALI_ADD_EVENT(name) \ -+DEFINE_EVENT(mali_add_template, mali_##name, \ -+ TP_PROTO(int gpu_addr, unsigned int info_val), \ -+ TP_ARGS(gpu_addr, info_val)) -+DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY); -+DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM); -+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ); -+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR); -+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE); -+DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET); -+DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET); -+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE); -+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR); -+DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES); -+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER); -+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER_END); -+DEFINE_MALI_ADD_EVENT(JD_CANCEL_WORKER); -+DEFINE_MALI_ADD_EVENT(JD_DONE); -+DEFINE_MALI_ADD_EVENT(JD_CANCEL); -+DEFINE_MALI_ADD_EVENT(JD_ZAP_CONTEXT); -+DEFINE_MALI_ADD_EVENT(JM_IRQ); -+DEFINE_MALI_ADD_EVENT(JM_IRQ_END); -+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS); -+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS_DONE); -+DEFINE_MALI_ADD_EVENT(JM_ZAP_NON_SCHEDULED); -+DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED); -+DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE); -+DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET); -+DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE); -+DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX); -+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL); -+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL); -+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX); -+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX); -+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_END); -+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_START); -+DEFINE_MALI_ADD_EVENT(JS_POLICY_ENQUEUE_JOB); -+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED); -+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP); -+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP); -+DEFINE_MALI_ADD_EVENT(PM_PWRON); -+DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER); -+DEFINE_MALI_ADD_EVENT(PM_PWRON_L2); -+DEFINE_MALI_ADD_EVENT(PM_PWROFF); -+DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER); -+DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2); -+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED); -+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER); -+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2); -+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED); -+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER); -+DEFINE_MALI_ADD_EVENT(PM_UNREQUEST_CHANGE_SHADER_NEEDED); -+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED); -+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_NEEDED); -+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_INUSE); -+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_INUSE); -+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE); -+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER); -+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE); -+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER); -+DEFINE_MALI_ADD_EVENT(PM_GPU_ON); -+DEFINE_MALI_ADD_EVENT(PM_GPU_OFF); -+DEFINE_MALI_ADD_EVENT(PM_SET_POLICY); -+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT); -+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM); -+DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY); -+DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS); -+#undef DEFINE_MALI_ADD_EVENT ++const char kbase_drv_name[] = KBASE_DRV_NAME; ++const char kbase_timeline_name[] = KBASE_TIMELINE_NAME; +diff --git a/drivers/gpu/arm/midgard/mali_kbase_strings.h b/drivers/gpu/arm/midgard/mali_kbase_strings.h +new file mode 100644 +index 000000000..41b8fdbec +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_strings.h +@@ -0,0 +1,19 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+#endif /* _TRACE_MALI_KBASE_H */ + -+#undef TRACE_INCLUDE_PATH -+#undef linux -+#define TRACE_INCLUDE_PATH . -+#undef TRACE_INCLUDE_FILE -+#define TRACE_INCLUDE_FILE mali_linux_kbase_trace + -+/* This part must be outside protection */ -+#include -diff --git a/drivers/gpu/arm/midgard/mali_linux_trace.h b/drivers/gpu/arm/midgard/mali_linux_trace.h ++extern const char kbase_drv_name[]; ++extern const char kbase_timeline_name[]; +diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync.h b/drivers/gpu/arm/midgard/mali_kbase_sync.h new file mode 100644 -index 000000000..2be06a552 +index 000000000..2cb8c1820 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_linux_trace.h -@@ -0,0 +1,189 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_sync.h +@@ -0,0 +1,204 @@ +/* + * -+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -401005,324 +401666,202 @@ index 000000000..2be06a552 + + + -+#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ) -+#define _TRACE_MALI_H -+ -+#undef TRACE_SYSTEM -+#define TRACE_SYSTEM mali -+#define TRACE_INCLUDE_FILE mali_linux_trace ++/** ++ * @file mali_kbase_sync.h ++ * ++ * This file contains our internal "API" for explicit fences. ++ * It hides the implementation details of the actual explicit fence mechanism ++ * used (Android fences or sync file with DMA fences). ++ */ + -+#include ++#ifndef MALI_KBASE_SYNC_H ++#define MALI_KBASE_SYNC_H + -+#define MALI_JOB_SLOTS_EVENT_CHANGED ++#include ++#include ++#ifdef CONFIG_SYNC ++#include ++#endif ++#ifdef CONFIG_SYNC_FILE ++#include "mali_kbase_fence_defs.h" ++#include ++#endif + -+/** -+ * mali_job_slots_event - called from mali_kbase_core_linux.c -+ * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro. -+ */ -+TRACE_EVENT(mali_job_slots_event, -+ TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid, -+ unsigned char job_id), -+ TP_ARGS(event_id, tgid, pid, job_id), -+ TP_STRUCT__entry( -+ __field(unsigned int, event_id) -+ __field(unsigned int, tgid) -+ __field(unsigned int, pid) -+ __field(unsigned char, job_id) -+ ), -+ TP_fast_assign( -+ __entry->event_id = event_id; -+ __entry->tgid = tgid; -+ __entry->pid = pid; -+ __entry->job_id = job_id; -+ ), -+ TP_printk("event=%u tgid=%u pid=%u job_id=%u", -+ __entry->event_id, __entry->tgid, __entry->pid, __entry->job_id) -+); ++#include "mali_kbase.h" + +/** -+ * mali_pm_status - Called by mali_kbase_pm_driver.c -+ * @event_id: core type (shader, tiler, l2 cache) -+ * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF) ++ * struct kbase_sync_fence_info - Information about a fence ++ * @fence: Pointer to fence (type is void*, as underlaying struct can differ) ++ * @name: The name given to this fence when it was created ++ * @status: < 0 means error, 0 means active, 1 means signaled ++ * ++ * Use kbase_sync_fence_in_info_get() or kbase_sync_fence_out_info_get() ++ * to get the information. + */ -+TRACE_EVENT(mali_pm_status, -+ TP_PROTO(unsigned int event_id, unsigned long long value), -+ TP_ARGS(event_id, value), -+ TP_STRUCT__entry( -+ __field(unsigned int, event_id) -+ __field(unsigned long long, value) -+ ), -+ TP_fast_assign( -+ __entry->event_id = event_id; -+ __entry->value = value; -+ ), -+ TP_printk("event %u = %llu", __entry->event_id, __entry->value) -+); ++struct kbase_sync_fence_info { ++ void *fence; ++ char name[32]; ++ int status; ++}; + +/** -+ * mali_pm_power_on - Called by mali_kbase_pm_driver.c -+ * @event_id: core type (shader, tiler, l2 cache) -+ * @value: 64bits bitmask reporting the cores to power up ++ * kbase_sync_fence_stream_create() - Create a stream object ++ * @name: Name of stream (only used to ease debugging/visualization) ++ * @out_fd: A file descriptor representing the created stream object ++ * ++ * Can map down to a timeline implementation in some implementations. ++ * Exposed as a file descriptor. ++ * Life-time controlled via the file descriptor: ++ * - dup to add a ref ++ * - close to remove a ref ++ * ++ * return: 0 on success, < 0 on error + */ -+TRACE_EVENT(mali_pm_power_on, -+ TP_PROTO(unsigned int event_id, unsigned long long value), -+ TP_ARGS(event_id, value), -+ TP_STRUCT__entry( -+ __field(unsigned int, event_id) -+ __field(unsigned long long, value) -+ ), -+ TP_fast_assign( -+ __entry->event_id = event_id; -+ __entry->value = value; -+ ), -+ TP_printk("event %u = %llu", __entry->event_id, __entry->value) -+); ++int kbase_sync_fence_stream_create(const char *name, int *const out_fd); + +/** -+ * mali_pm_power_off - Called by mali_kbase_pm_driver.c -+ * @event_id: core type (shader, tiler, l2 cache) -+ * @value: 64bits bitmask reporting the cores to power down ++ * kbase_sync_fence_out_create Create an explicit output fence to specified atom ++ * @katom: Atom to assign the new explicit fence to ++ * @stream_fd: File descriptor for stream object to create fence on ++ * ++ * return: Valid file descriptor to fence or < 0 on error + */ -+TRACE_EVENT(mali_pm_power_off, -+ TP_PROTO(unsigned int event_id, unsigned long long value), -+ TP_ARGS(event_id, value), -+ TP_STRUCT__entry( -+ __field(unsigned int, event_id) -+ __field(unsigned long long, value) -+ ), -+ TP_fast_assign( -+ __entry->event_id = event_id; -+ __entry->value = value; -+ ), -+ TP_printk("event %u = %llu", __entry->event_id, __entry->value) -+); ++int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd); + +/** -+ * mali_page_fault_insert_pages - Called by page_fault_worker() -+ * it reports an MMU page fault resulting in new pages being mapped. -+ * @event_id: MMU address space number. -+ * @value: number of newly allocated pages ++ * kbase_sync_fence_in_from_fd() Assigns an existing fence to specified atom ++ * @katom: Atom to assign the existing explicit fence to ++ * @fd: File descriptor to an existing fence ++ * ++ * Assigns an explicit input fence to atom. ++ * This can later be waited for by calling @kbase_sync_fence_in_wait ++ * ++ * return: 0 on success, < 0 on error + */ -+TRACE_EVENT(mali_page_fault_insert_pages, -+ TP_PROTO(int event_id, unsigned long value), -+ TP_ARGS(event_id, value), -+ TP_STRUCT__entry( -+ __field(int, event_id) -+ __field(unsigned long, value) -+ ), -+ TP_fast_assign( -+ __entry->event_id = event_id; -+ __entry->value = value; -+ ), -+ TP_printk("event %d = %lu", __entry->event_id, __entry->value) -+); ++int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd); + +/** -+ * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space() -+ * it reports that a certain MMU address space is in use now. -+ * @event_id: MMU address space number. ++ * kbase_sync_fence_validate() - Validate a fd to be a valid fence ++ * @fd: File descriptor to check ++ * ++ * This function is only usable to catch unintentional user errors early, ++ * it does not stop malicious code changing the fd after this function returns. ++ * ++ * return 0: if fd is for a valid fence, < 0 if invalid + */ -+TRACE_EVENT(mali_mmu_as_in_use, -+ TP_PROTO(int event_id), -+ TP_ARGS(event_id), -+ TP_STRUCT__entry( -+ __field(int, event_id) -+ ), -+ TP_fast_assign( -+ __entry->event_id = event_id; -+ ), -+ TP_printk("event=%d", __entry->event_id) -+); ++int kbase_sync_fence_validate(int fd); + +/** -+ * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal() -+ * it reports that a certain MMU address space has been released now. -+ * @event_id: MMU address space number. ++ * kbase_sync_fence_out_trigger - Signal explicit output fence attached on katom ++ * @katom: Atom with an explicit fence to signal ++ * @result: < 0 means signal with error, 0 >= indicates success ++ * ++ * Signal output fence attached on katom and remove the fence from the atom. ++ * ++ * return: The "next" event code for atom, typically JOB_CANCELLED or EVENT_DONE + */ -+TRACE_EVENT(mali_mmu_as_released, -+ TP_PROTO(int event_id), -+ TP_ARGS(event_id), -+ TP_STRUCT__entry( -+ __field(int, event_id) -+ ), -+ TP_fast_assign( -+ __entry->event_id = event_id; -+ ), -+ TP_printk("event=%d", __entry->event_id) -+); ++enum base_jd_event_code ++kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result); + +/** -+ * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages() -+ * and by kbase_atomic_sub_pages() -+ * it reports that the total number of allocated pages is changed. -+ * @event_id: number of pages to be added or subtracted (according to the sign). -+ */ -+TRACE_EVENT(mali_total_alloc_pages_change, -+ TP_PROTO(long long int event_id), -+ TP_ARGS(event_id), -+ TP_STRUCT__entry( -+ __field(long long int, event_id) -+ ), -+ TP_fast_assign( -+ __entry->event_id = event_id; -+ ), -+ TP_printk("event=%lld", __entry->event_id) -+); -+ -+#endif /* _TRACE_MALI_H */ -+ -+#undef TRACE_INCLUDE_PATH -+#undef linux -+#define TRACE_INCLUDE_PATH . -+ -+/* This part must be outside protection */ -+#include -diff --git a/drivers/gpu/arm/midgard/mali_malisw.h b/drivers/gpu/arm/midgard/mali_malisw.h -new file mode 100644 -index 000000000..99452933e ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_malisw.h -@@ -0,0 +1,131 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. ++ * kbase_sync_fence_in_wait() - Wait for explicit input fence to be signaled ++ * @katom: Atom with explicit fence to wait for + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ * If the fence is already signaled, then 0 is returned, and the caller must ++ * continue processing of the katom. + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * If the fence isn't already signaled, then this kbase_sync framework will ++ * take responsibility to continue the processing once the fence is signaled. + * ++ * return: 0 if already signaled, otherwise 1 + */ -+ -+ ++int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom); + +/** -+ * Kernel-wide include for common macros and types. ++ * kbase_sync_fence_in_cancel_wait() - Cancel explicit input fence waits ++ * @katom: Atom to cancel wait for ++ * ++ * This function is fully responsible for continuing processing of this atom ++ * (remove_waiting_soft_job + finish_soft_job + jd_done + js_sched_all) + */ -+ -+#ifndef _MALISW_H_ -+#define _MALISW_H_ -+ -+#include -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) -+#define U8_MAX ((u8)~0U) -+#define S8_MAX ((s8)(U8_MAX>>1)) -+#define S8_MIN ((s8)(-S8_MAX - 1)) -+#define U16_MAX ((u16)~0U) -+#define S16_MAX ((s16)(U16_MAX>>1)) -+#define S16_MIN ((s16)(-S16_MAX - 1)) -+#define U32_MAX ((u32)~0U) -+#define S32_MAX ((s32)(U32_MAX>>1)) -+#define S32_MIN ((s32)(-S32_MAX - 1)) -+#define U64_MAX ((u64)~0ULL) -+#define S64_MAX ((s64)(U64_MAX>>1)) -+#define S64_MIN ((s64)(-S64_MAX - 1)) -+#endif /* LINUX_VERSION_CODE */ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) -+#define SIZE_MAX (~(size_t)0) -+#endif /* LINUX_VERSION_CODE */ ++void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom); + +/** -+ * MIN - Return the lesser of two values. ++ * kbase_sync_fence_in_remove() - Remove the input fence from the katom ++ * @katom: Atom to remove explicit input fence for + * -+ * As a macro it may evaluate its arguments more than once. -+ * Refer to MAX macro for more details ++ * This will also release the corresponding reference. + */ -+#define MIN(x, y) ((x) < (y) ? (x) : (y)) ++void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom); + +/** -+ * MAX - Return the greater of two values. ++ * kbase_sync_fence_out_remove() - Remove the output fence from the katom ++ * @katom: Atom to remove explicit output fence for + * -+ * As a macro it may evaluate its arguments more than once. -+ * If called on the same two arguments as MIN it is guaranteed to return -+ * the one that MIN didn't return. This is significant for types where not -+ * all values are comparable e.g. NaNs in floating-point types. But if you want -+ * to retrieve the min and max of two values, consider using a conditional swap -+ * instead. ++ * This will also release the corresponding reference. + */ -+#define MAX(x, y) ((x) < (y) ? (y) : (x)) ++void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom); + +/** -+ * @hideinitializer -+ * Function-like macro for suppressing unused variable warnings. Where possible -+ * such variables should be removed; this macro is present for cases where we -+ * much support API backwards compatibility. ++ * kbase_sync_fence_close_fd() - Close a file descriptor representing a fence ++ * @fd: File descriptor to close + */ -+#define CSTD_UNUSED(x) ((void)(x)) ++static inline void kbase_sync_fence_close_fd(int fd) ++{ ++ close_fd(fd); ++} + +/** -+ * @hideinitializer -+ * Function-like macro for use where "no behavior" is desired. This is useful -+ * when compile time macros turn a function-like macro in to a no-op, but -+ * where having no statement is otherwise invalid. ++ * kbase_sync_fence_in_info_get() - Retrieves information about input fence ++ * @katom: Atom to get fence information from ++ * @info: Struct to be filled with fence information ++ * ++ * return: 0 on success, < 0 on error + */ -+#define CSTD_NOP(...) ((void)#__VA_ARGS__) ++int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, ++ struct kbase_sync_fence_info *info); + +/** -+ * Function-like macro for converting a pointer in to a u64 for storing into -+ * an external data structure. This is commonly used when pairing a 32-bit -+ * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion -+ * is complex and a straight cast does not work reliably as pointers are -+ * often considered as signed. ++ * kbase_sync_fence_out_info_get() - Retrieves information about output fence ++ * @katom: Atom to get fence information from ++ * @info: Struct to be filled with fence information ++ * ++ * return: 0 on success, < 0 on error + */ -+#define PTR_TO_U64(x) ((uint64_t)((uintptr_t)(x))) ++int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, ++ struct kbase_sync_fence_info *info); + +/** -+ * @hideinitializer -+ * Function-like macro for stringizing a single level macro. -+ * @code -+ * #define MY_MACRO 32 -+ * CSTD_STR1( MY_MACRO ) -+ * > "MY_MACRO" -+ * @endcode ++ * kbase_sync_status_string() - Get string matching @status ++ * @status: Value of fence status. ++ * ++ * return: Pointer to string describing @status. + */ -+#define CSTD_STR1(x) #x ++const char *kbase_sync_status_string(int status); + -+/** -+ * @hideinitializer -+ * Function-like macro for stringizing a macro's value. This should not be used -+ * if the macro is defined in a way which may have no value; use the -+ * alternative @c CSTD_STR2N macro should be used instead. -+ * @code -+ * #define MY_MACRO 32 -+ * CSTD_STR2( MY_MACRO ) -+ * > "32" -+ * @endcode ++/* ++ * Internal worker used to continue processing of atom. + */ -+#define CSTD_STR2(x) CSTD_STR1(x) ++void kbase_sync_fence_wait_worker(struct work_struct *data); + ++#ifdef CONFIG_MALI_FENCE_DEBUG +/** -+ * Specify an assertion value which is evaluated at compile time. Recommended -+ * usage is specification of a @c static @c INLINE function containing all of -+ * the assertions thus: -+ * -+ * @code -+ * static INLINE [module]_compile_time_assertions( void ) -+ * { -+ * COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) ); -+ * } -+ * @endcode -+ * -+ * @note Use @c static not @c STATIC. We never want to turn off this @c static -+ * specification for testing purposes. ++ * kbase_sync_fence_in_dump() Trigger a debug dump of atoms input fence state ++ * @katom: Atom to trigger fence debug dump for + */ -+#define CSTD_COMPILE_TIME_ASSERT(expr) \ -+ do { switch (0) { case 0: case (expr):; } } while (false) ++void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom); ++#endif + -+#endif /* _MALISW_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_midg_coherency.h b/drivers/gpu/arm/midgard/mali_midg_coherency.h ++#endif /* MALI_KBASE_SYNC_H */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_android.c b/drivers/gpu/arm/midgard/mali_kbase_sync_android.c new file mode 100644 -index 000000000..a509cbd5f +index 000000000..d7349dcae --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_midg_coherency.h -@@ -0,0 +1,26 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_sync_android.c +@@ -0,0 +1,537 @@ +/* + * -+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -401337,641 +401876,535 @@ index 000000000..a509cbd5f + + + -+#ifndef _MIDG_COHERENCY_H_ -+#define _MIDG_COHERENCY_H_ -+ -+#define COHERENCY_ACE_LITE 0 -+#define COHERENCY_ACE 1 -+#define COHERENCY_NONE 31 -+#define COHERENCY_FEATURE_BIT(x) (1 << (x)) -+ -+#endif /* _MIDG_COHERENCY_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h -new file mode 100644 -index 000000000..7d7b7bcd3 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h -@@ -0,0 +1,611 @@ +/* -+ * -+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * ++ * Code for supporting explicit Android fences (CONFIG_SYNC) ++ * Known to be good for kernels 4.5 and earlier. ++ * Replaced with CONFIG_SYNC_FILE for 4.9 and later kernels ++ * (see mali_kbase_sync_file.c) + */ + ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "sync.h" ++#include ++#include + ++struct mali_sync_timeline { ++ struct sync_timeline timeline; ++ atomic_t counter; ++ atomic_t signaled; ++}; + -+#ifndef _MIDGARD_REGMAP_H_ -+#define _MIDGARD_REGMAP_H_ -+ -+#include "mali_midg_coherency.h" -+#include "mali_kbase_gpu_id.h" ++struct mali_sync_pt { ++ struct sync_pt pt; ++ int order; ++ int result; ++}; + -+/* -+ * Begin Register Offsets -+ */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) ++/* For backwards compatibility with kernels before 3.17. After 3.17 ++ * sync_pt_parent is included in the kernel. */ ++static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt) ++{ ++ return pt->parent; ++} ++#endif + -+#define GPU_CONTROL_BASE 0x0000 -+#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) -+#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ -+#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ -+#define SUSPEND_SIZE 0x008 /* (RO) Fixed-function suspend buffer -+ size */ -+#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ -+#define MEM_FEATURES 0x010 /* (RO) Memory system features */ -+#define MMU_FEATURES 0x014 /* (RO) MMU features */ -+#define AS_PRESENT 0x018 /* (RO) Address space slots present */ -+#define JS_PRESENT 0x01C /* (RO) Job slots present */ -+#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ -+#define GPU_IRQ_CLEAR 0x024 /* (WO) */ -+#define GPU_IRQ_MASK 0x028 /* (RW) */ -+#define GPU_IRQ_STATUS 0x02C /* (RO) */ ++static struct mali_sync_timeline *to_mali_sync_timeline( ++ struct sync_timeline *timeline) ++{ ++ return container_of(timeline, struct mali_sync_timeline, timeline); ++} + -+/* IRQ flags */ -+#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ -+#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ -+#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. Intended to use with SOFT_RESET -+ commands which may take time. */ -+#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ -+#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down -+ and the power manager is idle. */ ++static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt) ++{ ++ return container_of(pt, struct mali_sync_pt, pt); ++} + -+#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ -+#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ ++static struct sync_pt *timeline_dup(struct sync_pt *pt) ++{ ++ struct mali_sync_pt *mpt = to_mali_sync_pt(pt); ++ struct mali_sync_pt *new_mpt; ++ struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt), ++ sizeof(struct mali_sync_pt)); + -+#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ -+ | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) ++ if (!new_pt) ++ return NULL; + -+#define GPU_COMMAND 0x030 /* (WO) */ -+#define GPU_STATUS 0x034 /* (RO) */ -+#define LATEST_FLUSH 0x038 /* (RO) */ ++ new_mpt = to_mali_sync_pt(new_pt); ++ new_mpt->order = mpt->order; ++ new_mpt->result = mpt->result; + -+#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ -+#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ ++ return new_pt; ++} + -+#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ -+#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ -+#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ ++static int timeline_has_signaled(struct sync_pt *pt) ++{ ++ struct mali_sync_pt *mpt = to_mali_sync_pt(pt); ++ struct mali_sync_timeline *mtl = to_mali_sync_timeline( ++ sync_pt_parent(pt)); ++ int result = mpt->result; + -+#define PWR_KEY 0x050 /* (WO) Power manager key register */ -+#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ -+#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ ++ int diff = atomic_read(&mtl->signaled) - mpt->order; + -+#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory region base address, low word */ -+#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory region base address, high word */ -+#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter configuration */ -+#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable flags for Job Manager */ -+#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable flags for shader cores */ -+#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable flags for tiler */ -+#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable flags for MMU/L2 cache */ ++ if (diff >= 0) ++ return (result < 0) ? result : 1; + -+#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ -+#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ -+#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ -+#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ ++ return 0; ++} + -+#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ -+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ -+#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ -+#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ ++static int timeline_compare(struct sync_pt *a, struct sync_pt *b) ++{ ++ struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt); ++ struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt); + -+#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ -+#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ -+#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ ++ int diff = ma->order - mb->order; + -+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) ++ if (diff == 0) ++ return 0; + -+#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */ -+#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */ -+#define JS2_FEATURES 0x0C8 /* (RO) Features of job slot 2 */ -+#define JS3_FEATURES 0x0CC /* (RO) Features of job slot 3 */ -+#define JS4_FEATURES 0x0D0 /* (RO) Features of job slot 4 */ -+#define JS5_FEATURES 0x0D4 /* (RO) Features of job slot 5 */ -+#define JS6_FEATURES 0x0D8 /* (RO) Features of job slot 6 */ -+#define JS7_FEATURES 0x0DC /* (RO) Features of job slot 7 */ -+#define JS8_FEATURES 0x0E0 /* (RO) Features of job slot 8 */ -+#define JS9_FEATURES 0x0E4 /* (RO) Features of job slot 9 */ -+#define JS10_FEATURES 0x0E8 /* (RO) Features of job slot 10 */ -+#define JS11_FEATURES 0x0EC /* (RO) Features of job slot 11 */ -+#define JS12_FEATURES 0x0F0 /* (RO) Features of job slot 12 */ -+#define JS13_FEATURES 0x0F4 /* (RO) Features of job slot 13 */ -+#define JS14_FEATURES 0x0F8 /* (RO) Features of job slot 14 */ -+#define JS15_FEATURES 0x0FC /* (RO) Features of job slot 15 */ ++ return (diff < 0) ? -1 : 1; ++} + -+#define JS_FEATURES_REG(n) GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2)) ++static void timeline_value_str(struct sync_timeline *timeline, char *str, ++ int size) ++{ ++ struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline); + -+#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ -+#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ ++ snprintf(str, size, "%d", atomic_read(&mtl->signaled)); ++} + -+#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ -+#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ ++static void pt_value_str(struct sync_pt *pt, char *str, int size) ++{ ++ struct mali_sync_pt *mpt = to_mali_sync_pt(pt); + -+#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ -+#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ ++ snprintf(str, size, "%d(%d)", mpt->order, mpt->result); ++} + -+#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ -+#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ ++static struct sync_timeline_ops mali_timeline_ops = { ++ .driver_name = "Mali", ++ .dup = timeline_dup, ++ .has_signaled = timeline_has_signaled, ++ .compare = timeline_compare, ++ .timeline_value_str = timeline_value_str, ++ .pt_value_str = pt_value_str, ++}; + ++/* Allocates a timeline for Mali ++ * ++ * One timeline should be allocated per API context. ++ */ ++static struct sync_timeline *mali_sync_timeline_alloc(const char *name) ++{ ++ struct sync_timeline *tl; ++ struct mali_sync_timeline *mtl; + -+#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ -+#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ ++ tl = sync_timeline_create(&mali_timeline_ops, ++ sizeof(struct mali_sync_timeline), name); ++ if (!tl) ++ return NULL; + -+#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ -+#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ ++ /* Set the counter in our private struct */ ++ mtl = to_mali_sync_timeline(tl); ++ atomic_set(&mtl->counter, 0); ++ atomic_set(&mtl->signaled, 0); + -+#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ -+#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ ++ return tl; ++} + -+#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ -+#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ ++static int kbase_stream_close(struct inode *inode, struct file *file) ++{ ++ struct sync_timeline *tl; + ++ tl = (struct sync_timeline *)file->private_data; ++ sync_timeline_destroy(tl); ++ return 0; ++} + -+#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ -+#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ ++static const struct file_operations stream_fops = { ++ .owner = THIS_MODULE, ++ .release = kbase_stream_close, ++}; + -+#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ -+#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ ++int kbase_sync_fence_stream_create(const char *name, int *const out_fd) ++{ ++ struct sync_timeline *tl; + -+#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ -+#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ ++ if (!out_fd) ++ return -EINVAL; + -+#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ -+#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ ++ tl = mali_sync_timeline_alloc(name); ++ if (!tl) ++ return -EINVAL; + ++ *out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY|O_CLOEXEC); + -+#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ -+#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ ++ if (*out_fd < 0) { ++ sync_timeline_destroy(tl); ++ return -EINVAL; ++ } + -+#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ -+#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ ++ return 0; ++} + -+#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ -+#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ ++/* Allocates a sync point within the timeline. ++ * ++ * The timeline must be the one allocated by kbase_sync_timeline_alloc ++ * ++ * Sync points must be triggered in *exactly* the same order as they are ++ * allocated. ++ */ ++static struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent) ++{ ++ struct sync_pt *pt = sync_pt_create(parent, ++ sizeof(struct mali_sync_pt)); ++ struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent); ++ struct mali_sync_pt *mpt; + -+#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ -+#define STACK_PRWOFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ ++ if (!pt) ++ return NULL; + ++ mpt = to_mali_sync_pt(pt); ++ mpt->order = atomic_inc_return(&mtl->counter); ++ mpt->result = 0; + -+#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ -+#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ ++ return pt; ++} + -+#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ -+#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ ++int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd) ++{ ++ struct sync_timeline *tl; ++ struct sync_pt *pt; ++ struct sync_fence *fence; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0) ++ struct files_struct *files; ++ struct fdtable *fdt; ++#endif ++ int fd; ++ struct file *tl_file; + -+#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ -+#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ ++ tl_file = fget(tl_fd); ++ if (tl_file == NULL) ++ return -EBADF; + -+#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ -+#define STACK_PRWTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ ++ if (tl_file->f_op != &stream_fops) { ++ fd = -EBADF; ++ goto out; ++ } + ++ tl = tl_file->private_data; + -+#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ -+#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ ++ pt = kbase_sync_pt_alloc(tl); ++ if (!pt) { ++ fd = -EFAULT; ++ goto out; ++ } + -+#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ -+#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ ++ fence = sync_fence_create("mali_fence", pt); ++ if (!fence) { ++ sync_pt_free(pt); ++ fd = -EFAULT; ++ goto out; ++ } + -+#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ -+#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ ++ /* from here the fence owns the sync_pt */ + -+#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ -+#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ ++ /* create a fd representing the fence */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) ++ fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); ++ if (fd < 0) { ++ sync_fence_put(fence); ++ goto out; ++ } ++#else ++ fd = get_unused_fd(); ++ if (fd < 0) { ++ sync_fence_put(fence); ++ goto out; ++ } + -+#define JM_CONFIG 0xF00 /* (RW) Job Manager configuration register (Implementation specific register) */ -+#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration settings (Implementation specific register) */ -+#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration settings (Implementation specific register) */ -+#define L2_MMU_CONFIG 0xF0C /* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */ ++ files = current->files; ++ spin_lock(&files->file_lock); ++ fdt = files_fdtable(files); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) ++ __set_close_on_exec(fd, fdt); ++#else ++ FD_SET(fd, fdt->close_on_exec); ++#endif ++ spin_unlock(&files->file_lock); ++#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */ + -+#define JOB_CONTROL_BASE 0x1000 ++ /* bind fence to the new fd */ ++ sync_fence_install(fence, fd); + -+#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) ++ katom->fence = sync_fence_fdget(fd); ++ if (katom->fence == NULL) { ++ /* The only way the fence can be NULL is if userspace closed it ++ * for us, so we don't need to clear it up */ ++ fd = -EINVAL; ++ goto out; ++ } + -+#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ -+#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ -+#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ -+#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ -+#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */ -+#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */ ++out: ++ fput(tl_file); + -+#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */ -+#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */ -+#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */ -+#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */ -+#define JOB_SLOT4 0xA00 /* Configuration registers for job slot 4 */ -+#define JOB_SLOT5 0xA80 /* Configuration registers for job slot 5 */ -+#define JOB_SLOT6 0xB00 /* Configuration registers for job slot 6 */ -+#define JOB_SLOT7 0xB80 /* Configuration registers for job slot 7 */ -+#define JOB_SLOT8 0xC00 /* Configuration registers for job slot 8 */ -+#define JOB_SLOT9 0xC80 /* Configuration registers for job slot 9 */ -+#define JOB_SLOT10 0xD00 /* Configuration registers for job slot 10 */ -+#define JOB_SLOT11 0xD80 /* Configuration registers for job slot 11 */ -+#define JOB_SLOT12 0xE00 /* Configuration registers for job slot 12 */ -+#define JOB_SLOT13 0xE80 /* Configuration registers for job slot 13 */ -+#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */ -+#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */ ++ return fd; ++} + -+#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r)) ++int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) ++{ ++ katom->fence = sync_fence_fdget(fd); ++ return katom->fence ? 0 : -ENOENT; ++} + -+#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */ -+#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */ -+#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */ -+#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */ -+#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ -+#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ -+#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ -+#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job -+ slot n */ ++int kbase_sync_fence_validate(int fd) ++{ ++ struct sync_fence *fence; + -+#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ -+#define JS_STATUS 0x24 /* (RO) Status register for job slot n */ ++ fence = sync_fence_fdget(fd); ++ if (!fence) ++ return -EINVAL; + -+#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */ -+#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */ ++ sync_fence_put(fence); ++ return 0; ++} + -+#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ -+#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ -+#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ -+#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for -+ job slot n */ ++/* Returns true if the specified timeline is allocated by Mali */ ++static int kbase_sync_timeline_is_ours(struct sync_timeline *timeline) ++{ ++ return timeline->ops == &mali_timeline_ops; ++} + -+#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ ++/* Signals a particular sync point ++ * ++ * Sync points must be triggered in *exactly* the same order as they are ++ * allocated. ++ * ++ * If they are signaled in the wrong order then a message will be printed in ++ * debug builds and otherwise attempts to signal order sync_pts will be ignored. ++ * ++ * result can be negative to indicate error, any other value is interpreted as ++ * success. ++ */ ++static void kbase_sync_signal_pt(struct sync_pt *pt, int result) ++{ ++ struct mali_sync_pt *mpt = to_mali_sync_pt(pt); ++ struct mali_sync_timeline *mtl = to_mali_sync_timeline( ++ sync_pt_parent(pt)); ++ int signaled; ++ int diff; + -+#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ ++ mpt->result = result; + -+#define MEMORY_MANAGEMENT_BASE 0x2000 -+#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) ++ do { ++ signaled = atomic_read(&mtl->signaled); + -+#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ -+#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ -+#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ -+#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ ++ diff = signaled - mpt->order; + -+#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ -+#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ -+#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ -+#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ -+#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ -+#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ -+#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ -+#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ -+#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ -+#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ -+#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ -+#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ -+#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ -+#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ -+#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ -+#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ ++ if (diff > 0) { ++ /* The timeline is already at or ahead of this point. ++ * This should not happen unless userspace has been ++ * signaling fences out of order, so warn but don't ++ * violate the sync_pt API. ++ * The warning is only in debug builds to prevent ++ * a malicious user being able to spam dmesg. ++ */ ++#ifdef CONFIG_MALI_DEBUG ++ pr_err("Fences were triggered in a different order to allocation!"); ++#endif /* CONFIG_MALI_DEBUG */ ++ return; ++ } ++ } while (atomic_cmpxchg(&mtl->signaled, ++ signaled, mpt->order) != signaled); ++} + -+#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) ++enum base_jd_event_code ++kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) ++{ ++ struct sync_pt *pt; ++ struct sync_timeline *timeline; + -+#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ -+#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ -+#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ -+#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ -+#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ -+#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ -+#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ -+#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ -+#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ -+#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ -+#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ ++ if (!katom->fence) ++ return BASE_JD_EVENT_JOB_CANCELLED; + ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) ++ if (!list_is_singular(&katom->fence->pt_list_head)) { ++#else ++ if (katom->fence->num_fences != 1) { ++#endif ++ /* Not exactly one item in the list - so it didn't (directly) ++ * come from us */ ++ return BASE_JD_EVENT_JOB_CANCELLED; ++ } + -+/* (RW) Translation table configuration for address space n, low word */ -+#define AS_TRANSCFG_LO 0x30 -+/* (RW) Translation table configuration for address space n, high word */ -+#define AS_TRANSCFG_HI 0x34 -+/* (RO) Secondary fault address for address space n, low word */ -+#define AS_FAULTEXTRA_LO 0x38 -+/* (RO) Secondary fault address for address space n, high word */ -+#define AS_FAULTEXTRA_HI 0x3C ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) ++ pt = list_first_entry(&katom->fence->pt_list_head, ++ struct sync_pt, pt_list); ++#else ++ pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base); ++#endif ++ timeline = sync_pt_parent(pt); + -+/* End Register Offsets */ ++ if (!kbase_sync_timeline_is_ours(timeline)) { ++ /* Fence has a sync_pt which isn't ours! */ ++ return BASE_JD_EVENT_JOB_CANCELLED; ++ } + -+/* -+ * MMU_IRQ_RAWSTAT register values. Values are valid also for -+ MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. -+ */ ++ kbase_sync_signal_pt(pt, result); + -+#define MMU_PAGE_FAULT_FLAGS 16 ++ sync_timeline_signal(timeline); + -+/* Macros returning a bitmask to retrieve page fault or bus error flags from -+ * MMU registers */ -+#define MMU_PAGE_FAULT(n) (1UL << (n)) -+#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) ++ kbase_sync_fence_out_remove(katom); + -+/* -+ * Begin LPAE MMU TRANSTAB register values -+ */ -+#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK 0xfffff000 -+#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED (0u << 0) -+#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY (1u << 1) -+#define AS_TRANSTAB_LPAE_ADRMODE_TABLE (3u << 0) -+#define AS_TRANSTAB_LPAE_READ_INNER (1u << 2) -+#define AS_TRANSTAB_LPAE_SHARE_OUTER (1u << 4) ++ return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE; ++} + -+#define AS_TRANSTAB_LPAE_ADRMODE_MASK 0x00000003 ++static inline int kbase_fence_get_status(struct sync_fence *fence) ++{ ++ if (!fence) ++ return -ENOENT; + -+/* -+ * Begin AARCH64 MMU TRANSTAB register values -+ */ -+#define MMU_HW_OUTA_BITS 40 -+#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) ++ return fence->status; ++#else ++ return atomic_read(&fence->status); ++#endif ++} + -+/* -+ * Begin MMU STATUS register values -+ */ -+#define AS_STATUS_AS_ACTIVE 0x01 ++static void kbase_fence_wait_callback(struct sync_fence *fence, ++ struct sync_fence_waiter *waiter) ++{ ++ struct kbase_jd_atom *katom = container_of(waiter, ++ struct kbase_jd_atom, sync_waiter); ++ struct kbase_context *kctx = katom->kctx; + -+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3) -+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3) -+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3) -+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) -+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) ++ /* Propagate the fence status to the atom. ++ * If negative then cancel this atom and its dependencies. ++ */ ++ if (kbase_fence_get_status(fence) < 0) ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + -+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) -+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) ++ /* To prevent a potential deadlock we schedule the work onto the ++ * job_done_wq workqueue ++ * ++ * The issue is that we may signal the timeline while holding ++ * kctx->jctx.lock and the callbacks are run synchronously from ++ * sync_timeline_signal. So we simply defer the work. ++ */ + -+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3<<8) -+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0<<8) -+#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1<<8) -+#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2<<8) -+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3<<8) ++ INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); ++ queue_work(kctx->jctx.job_done_wq, &katom->work); ++} + -+/* -+ * Begin MMU TRANSCFG register values -+ */ ++int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) ++{ ++ int ret; + -+#define AS_TRANSCFG_ADRMODE_LEGACY 0 -+#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 -+#define AS_TRANSCFG_ADRMODE_IDENTITY 2 -+#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 -+#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 ++ sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback); + -+#define AS_TRANSCFG_ADRMODE_MASK 0xF ++ ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter); + ++ if (ret == 1) { ++ /* Already signaled */ ++ return 0; ++ } + -+/* -+ * Begin TRANSCFG register values -+ */ -+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24) -+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24) -+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24) ++ if (ret < 0) { ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; ++ /* We should cause the dependent jobs in the bag to be failed, ++ * to do this we schedule the work queue to complete this job */ ++ INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); ++ queue_work(katom->kctx->jctx.job_done_wq, &katom->work); ++ } + -+#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28)) -+#define AS_TRANSCFG_PTW_SH_OS (2 << 28) -+#define AS_TRANSCFG_PTW_SH_IS (3 << 28) ++ return 1; ++} + -+/* -+ * Begin Command Values -+ */ ++void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) ++{ ++ if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) { ++ /* The wait wasn't cancelled - leave the cleanup for ++ * kbase_fence_wait_callback */ ++ return; ++ } + -+/* JS_COMMAND register commands */ -+#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */ -+#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */ -+#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */ -+#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */ -+#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */ -+#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */ -+#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ -+#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ ++ /* Wait was cancelled - zap the atoms */ ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + -+#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */ ++ kbasep_remove_waiting_soft_job(katom); ++ kbase_finish_soft_job(katom); + -+/* AS_COMMAND register commands */ -+#define AS_COMMAND_NOP 0x00 /* NOP Operation */ -+#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ -+#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ -+#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ -+#define AS_COMMAND_FLUSH 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs -+ (deprecated - only for use with T60x) */ -+#define AS_COMMAND_FLUSH_PT 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs */ -+#define AS_COMMAND_FLUSH_MEM 0x05 /* Wait for memory accesses to complete, flush all the L1s cache then -+ flush all L2 caches then issue a flush region command to all MMUs */ ++ if (jd_done_nolock(katom, NULL)) ++ kbase_js_sched_all(katom->kctx->kbdev); ++} + -+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */ -+#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0) -+#define JS_CONFIG_START_FLUSH_CLEAN (1u << 8) -+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8) -+#define JS_CONFIG_START_MMU (1u << 10) -+#define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11) -+#define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION -+#define JS_CONFIG_END_FLUSH_CLEAN (1u << 12) -+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) -+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14) -+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15) -+#define JS_CONFIG_THREAD_PRI(n) ((n) << 16) -+ -+/* JS_XAFFINITY register values */ -+#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0) -+#define JS_XAFFINITY_TILER_ENABLE (1u << 8) -+#define JS_XAFFINITY_CACHE_ENABLE (1u << 16) -+ -+/* JS_STATUS register values */ -+ -+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h. -+ * The values are separated to avoid dependency of userspace and kernel code. -+ */ -+ -+/* Group of values representing the job status insead a particular fault */ -+#define JS_STATUS_NO_EXCEPTION_BASE 0x00 -+#define JS_STATUS_INTERRUPTED (JS_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */ -+#define JS_STATUS_STOPPED (JS_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */ -+#define JS_STATUS_TERMINATED (JS_STATUS_NO_EXCEPTION_BASE + 0x04) /* 0x04 means TERMINATED */ -+ -+/* General fault values */ -+#define JS_STATUS_FAULT_BASE 0x40 -+#define JS_STATUS_CONFIG_FAULT (JS_STATUS_FAULT_BASE) /* 0x40 means CONFIG FAULT */ -+#define JS_STATUS_POWER_FAULT (JS_STATUS_FAULT_BASE + 0x01) /* 0x41 means POWER FAULT */ -+#define JS_STATUS_READ_FAULT (JS_STATUS_FAULT_BASE + 0x02) /* 0x42 means READ FAULT */ -+#define JS_STATUS_WRITE_FAULT (JS_STATUS_FAULT_BASE + 0x03) /* 0x43 means WRITE FAULT */ -+#define JS_STATUS_AFFINITY_FAULT (JS_STATUS_FAULT_BASE + 0x04) /* 0x44 means AFFINITY FAULT */ -+#define JS_STATUS_BUS_FAULT (JS_STATUS_FAULT_BASE + 0x08) /* 0x48 means BUS FAULT */ -+ -+/* Instruction or data faults */ -+#define JS_STATUS_INSTRUCTION_FAULT_BASE 0x50 -+#define JS_STATUS_INSTR_INVALID_PC (JS_STATUS_INSTRUCTION_FAULT_BASE) /* 0x50 means INSTR INVALID PC */ -+#define JS_STATUS_INSTR_INVALID_ENC (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01) /* 0x51 means INSTR INVALID ENC */ -+#define JS_STATUS_INSTR_TYPE_MISMATCH (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02) /* 0x52 means INSTR TYPE MISMATCH */ -+#define JS_STATUS_INSTR_OPERAND_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03) /* 0x53 means INSTR OPERAND FAULT */ -+#define JS_STATUS_INSTR_TLS_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04) /* 0x54 means INSTR TLS FAULT */ -+#define JS_STATUS_INSTR_BARRIER_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05) /* 0x55 means INSTR BARRIER FAULT */ -+#define JS_STATUS_INSTR_ALIGN_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06) /* 0x56 means INSTR ALIGN FAULT */ -+/* NOTE: No fault with 0x57 code defined in spec. */ -+#define JS_STATUS_DATA_INVALID_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08) /* 0x58 means DATA INVALID FAULT */ -+#define JS_STATUS_TILE_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09) /* 0x59 means TILE RANGE FAULT */ -+#define JS_STATUS_ADDRESS_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A) /* 0x5A means ADDRESS RANGE FAULT */ -+ -+/* Other faults */ -+#define JS_STATUS_MEMORY_FAULT_BASE 0x60 -+#define JS_STATUS_OUT_OF_MEMORY (JS_STATUS_MEMORY_FAULT_BASE) /* 0x60 means OUT OF MEMORY */ -+#define JS_STATUS_UNKNOWN 0x7F /* 0x7F means UNKNOWN */ -+ -+/* GPU_COMMAND values */ -+#define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */ -+#define GPU_COMMAND_SOFT_RESET 0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */ -+#define GPU_COMMAND_HARD_RESET 0x02 /* Immediately reset the entire GPU. */ -+#define GPU_COMMAND_PRFCNT_CLEAR 0x03 /* Clear all performance counters, setting them all to zero. */ -+#define GPU_COMMAND_PRFCNT_SAMPLE 0x04 /* Sample all performance counters, writing them out to memory */ -+#define GPU_COMMAND_CYCLE_COUNT_START 0x05 /* Starts the cycle counter, and system timestamp propagation */ -+#define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */ -+#define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */ -+#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ -+#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ -+ -+/* End Command Values */ -+ -+/* GPU_STATUS values */ -+#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ -+#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ -+ -+/* PRFCNT_CONFIG register values */ -+#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ -+#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ -+#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ -+ -+#define PRFCNT_CONFIG_MODE_OFF 0 /* The performance counters are disabled. */ -+#define PRFCNT_CONFIG_MODE_MANUAL 1 /* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */ -+#define PRFCNT_CONFIG_MODE_TILE 2 /* The performance counters are enabled, and are written out each time a tile finishes rendering. */ -+ -+/* AS_MEMATTR values: */ -+/* Use GPU implementation-defined caching policy. */ -+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull -+/* The attribute set to force all resources to be cached. */ -+#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full -+/* Inner write-alloc cache setup, no outer caching */ -+#define AS_MEMATTR_WRITE_ALLOC 0x8Dull -+ -+/* Set to implementation defined, outer caching */ -+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull -+/* Set to write back memory, outer caching */ -+#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull -+ -+/* Use GPU implementation-defined caching policy. */ -+#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull -+/* The attribute set to force all resources to be cached. */ -+#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full -+/* Inner write-alloc cache setup, no outer caching */ -+#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull -+/* Set to implementation defined, outer caching */ -+#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull -+/* Set to write back memory, outer caching */ -+#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull -+ -+/* Symbol for default MEMATTR to use */ -+ -+/* Default is - HW implementation defined caching */ -+#define AS_MEMATTR_INDEX_DEFAULT 0 -+#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 -+ -+/* HW implementation defined caching */ -+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 -+/* Force cache on */ -+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 -+/* Write-alloc */ -+#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 -+/* Outer coherent, inner implementation defined policy */ -+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 -+/* Outer coherent, write alloc inner */ -+#define AS_MEMATTR_INDEX_OUTER_WA 4 -+ -+/* JS_FEATURES register */ -+ -+#define JS_FEATURE_NULL_JOB (1u << 1) -+#define JS_FEATURE_SET_VALUE_JOB (1u << 2) -+#define JS_FEATURE_CACHE_FLUSH_JOB (1u << 3) -+#define JS_FEATURE_COMPUTE_JOB (1u << 4) -+#define JS_FEATURE_VERTEX_JOB (1u << 5) -+#define JS_FEATURE_GEOMETRY_JOB (1u << 6) -+#define JS_FEATURE_TILER_JOB (1u << 7) -+#define JS_FEATURE_FUSED_JOB (1u << 8) -+#define JS_FEATURE_FRAGMENT_JOB (1u << 9) -+ -+/* End JS_FEATURES register */ -+ -+/* L2_MMU_CONFIG register */ -+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) -+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) -+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT (24) -+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) -+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) -+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) -+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) -+ -+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT (26) -+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) -+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) -+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) -+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) -+/* End L2_MMU_CONFIG register */ -+ -+/* THREAD_* registers */ -+ -+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */ -+#define IMPLEMENTATION_UNSPECIFIED 0 -+#define IMPLEMENTATION_SILICON 1 -+#define IMPLEMENTATION_FPGA 2 -+#define IMPLEMENTATION_MODEL 3 -+ -+/* Default values when registers are not supported by the implemented hardware */ -+#define THREAD_MT_DEFAULT 256 -+#define THREAD_MWS_DEFAULT 256 -+#define THREAD_MBS_DEFAULT 256 -+#define THREAD_MR_DEFAULT 1024 -+#define THREAD_MTQ_DEFAULT 4 -+#define THREAD_MTGS_DEFAULT 10 -+ -+/* End THREAD_* registers */ -+ -+/* SHADER_CONFIG register */ ++void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom) ++{ ++ if (katom->fence) { ++ sync_fence_put(katom->fence); ++ katom->fence = NULL; ++ } ++} + -+#define SC_ALT_COUNTERS (1ul << 3) -+#define SC_OVERRIDE_FWD_PIXEL_KILL (1ul << 4) -+#define SC_SDC_DISABLE_OQ_DISCARD (1ul << 6) -+#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16) -+#define SC_LS_PAUSEBUFFER_DISABLE (1ul << 16) -+#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18) -+#define SC_ENABLE_TEXGRD_FLAGS (1ul << 25) -+/* End SHADER_CONFIG register */ ++void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom) ++{ ++ if (katom->fence) { ++ sync_fence_put(katom->fence); ++ katom->fence = NULL; ++ } ++} + -+/* TILER_CONFIG register */ ++int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, ++ struct kbase_sync_fence_info *info) ++{ ++ if (!katom->fence) ++ return -ENOENT; + -+#define TC_CLOCK_GATE_OVERRIDE (1ul << 0) ++ info->fence = katom->fence; ++ info->status = kbase_fence_get_status(katom->fence); ++ strlcpy(info->name, katom->fence->name, sizeof(info->name)); + -+/* End TILER_CONFIG register */ ++ return 0; ++} + -+/* JM_CONFIG register */ ++int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, ++ struct kbase_sync_fence_info *info) ++{ ++ if (!katom->fence) ++ return -ENOENT; + -+#define JM_TIMESTAMP_OVERRIDE (1ul << 0) -+#define JM_CLOCK_GATE_OVERRIDE (1ul << 1) -+#define JM_JOB_THROTTLE_ENABLE (1ul << 2) -+#define JM_JOB_THROTTLE_LIMIT_SHIFT (3) -+#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F) -+#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2) -+#define JM_IDVS_GROUP_SIZE_SHIFT (16) -+#define JM_MAX_IDVS_GROUP_SIZE (0x3F) -+/* End JM_CONFIG register */ ++ info->fence = katom->fence; ++ info->status = kbase_fence_get_status(katom->fence); ++ strlcpy(info->name, katom->fence->name, sizeof(info->name)); + ++ return 0; ++} + -+#endif /* _MIDGARD_REGMAP_H_ */ -diff --git a/drivers/gpu/arm/midgard/mali_timeline.h b/drivers/gpu/arm/midgard/mali_timeline.h ++#ifdef CONFIG_MALI_FENCE_DEBUG ++void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom) ++{ ++ /* Dump out the full state of all the Android sync fences. ++ * The function sync_dump() isn't exported to modules, so force ++ * sync_fence_wait() to time out to trigger sync_dump(). ++ */ ++ if (katom->fence) ++ sync_fence_wait(katom->fence, 1); ++} ++#endif +diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_common.c b/drivers/gpu/arm/midgard/mali_kbase_sync_common.c new file mode 100644 -index 000000000..bd5f6614b +index 000000000..457def296 --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_timeline.h -@@ -0,0 +1,396 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_sync_common.c +@@ -0,0 +1,43 @@ +/* + * -+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -401986,394 +402419,406 @@ index 000000000..bd5f6614b + + + ++/* ++ * @file mali_kbase_sync_common.c ++ * ++ * Common code for our explicit fence functionality ++ */ + ++#include ++#include "mali_kbase.h" + -+#undef TRACE_SYSTEM -+#define TRACE_SYSTEM mali_timeline -+ -+#if !defined(_MALI_TIMELINE_H) || defined(TRACE_HEADER_MULTI_READ) -+#define _MALI_TIMELINE_H -+ -+#include -+ -+TRACE_EVENT(mali_timeline_atoms_in_flight, ++void kbase_sync_fence_wait_worker(struct work_struct *data) ++{ ++ struct kbase_jd_atom *katom; + -+ TP_PROTO(u64 ts_sec, -+ u32 ts_nsec, -+ int tgid, -+ int count), ++ katom = container_of(data, struct kbase_jd_atom, work); ++ kbase_soft_event_wait_callback(katom); ++} + -+ TP_ARGS(ts_sec, -+ ts_nsec, -+ tgid, -+ count), ++const char *kbase_sync_status_string(int status) ++{ ++ if (status == 0) ++ return "signaled"; ++ else if (status > 0) ++ return "active"; ++ else ++ return "error"; ++} +diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_file.c b/drivers/gpu/arm/midgard/mali_kbase_sync_file.c +new file mode 100644 +index 000000000..60b5d74db +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_sync_file.c +@@ -0,0 +1,359 @@ ++/* ++ * ++ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ TP_STRUCT__entry( -+ __field(u64, ts_sec) -+ __field(u32, ts_nsec) -+ __field(int, tgid) -+ __field(int, count) -+ ), + -+ TP_fast_assign( -+ __entry->ts_sec = ts_sec; -+ __entry->ts_nsec = ts_nsec; -+ __entry->tgid = tgid; -+ __entry->count = count; -+ ), + -+ TP_printk("%i,%i.%.9i,%i,%i", CTX_SET_NR_ATOMS_IN_FLIGHT, -+ (int)__entry->ts_sec, -+ (int)__entry->ts_nsec, -+ __entry->tgid, -+ __entry->count) -+); ++/* ++ * Code for supporting explicit Linux fences (CONFIG_SYNC_FILE) ++ * Introduced in kernel 4.9. ++ * Android explicit fences (CONFIG_SYNC) can be used for older kernels ++ * (see mali_kbase_sync_android.c) ++ */ + ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "mali_kbase_fence_defs.h" ++#include "mali_kbase_sync.h" ++#include "mali_kbase_fence.h" ++#include "mali_kbase.h" + -+TRACE_EVENT(mali_timeline_atom, ++static const struct file_operations stream_fops = { ++ .owner = THIS_MODULE ++}; + -+ TP_PROTO(u64 ts_sec, -+ u32 ts_nsec, -+ int event_type, -+ int tgid, -+ int atom_id), ++int kbase_sync_fence_stream_create(const char *name, int *const out_fd) ++{ ++ if (!out_fd) ++ return -EINVAL; + -+ TP_ARGS(ts_sec, -+ ts_nsec, -+ event_type, -+ tgid, -+ atom_id), ++ *out_fd = anon_inode_getfd(name, &stream_fops, NULL, ++ O_RDONLY | O_CLOEXEC); ++ if (*out_fd < 0) ++ return -EINVAL; + -+ TP_STRUCT__entry( -+ __field(u64, ts_sec) -+ __field(u32, ts_nsec) -+ __field(int, event_type) -+ __field(int, tgid) -+ __field(int, atom_id) -+ ), ++ return 0; ++} + -+ TP_fast_assign( -+ __entry->ts_sec = ts_sec; -+ __entry->ts_nsec = ts_nsec; -+ __entry->event_type = event_type; -+ __entry->tgid = tgid; -+ __entry->atom_id = atom_id; -+ ), ++int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd) ++{ ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++ struct fence *fence; ++#else ++ struct dma_fence *fence; ++#endif ++ struct sync_file *sync_file; ++ int fd; + -+ TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, -+ (int)__entry->ts_sec, -+ (int)__entry->ts_nsec, -+ __entry->tgid, -+ __entry->atom_id, -+ __entry->atom_id) -+); ++ fence = kbase_fence_out_new(katom); ++ if (!fence) ++ return -ENOMEM; + -+TRACE_EVENT(mali_timeline_gpu_slot_active, ++#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) ++ /* Take an extra reference to the fence on behalf of the sync_file. ++ * This is only needed on older kernels where sync_file_create() ++ * does not take its own reference. This was changed in v4.9.68, ++ * where sync_file_create() now takes its own reference. ++ */ ++ dma_fence_get(fence); ++#endif + -+ TP_PROTO(u64 ts_sec, -+ u32 ts_nsec, -+ int event_type, -+ int tgid, -+ int js, -+ int count), ++ /* create a sync_file fd representing the fence */ ++ sync_file = sync_file_create(fence); ++ if (!sync_file) { ++ dma_fence_put(fence); ++ kbase_fence_out_remove(katom); ++ return -ENOMEM; ++ } + -+ TP_ARGS(ts_sec, -+ ts_nsec, -+ event_type, -+ tgid, -+ js, -+ count), ++ fd = get_unused_fd_flags(O_CLOEXEC); ++ if (fd < 0) { ++ fput(sync_file->file); ++ kbase_fence_out_remove(katom); ++ return fd; ++ } + -+ TP_STRUCT__entry( -+ __field(u64, ts_sec) -+ __field(u32, ts_nsec) -+ __field(int, event_type) -+ __field(int, tgid) -+ __field(int, js) -+ __field(int, count) -+ ), ++ fd_install(fd, sync_file->file); + -+ TP_fast_assign( -+ __entry->ts_sec = ts_sec; -+ __entry->ts_nsec = ts_nsec; -+ __entry->event_type = event_type; -+ __entry->tgid = tgid; -+ __entry->js = js; -+ __entry->count = count; -+ ), ++ return fd; ++} + -+ TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, -+ (int)__entry->ts_sec, -+ (int)__entry->ts_nsec, -+ __entry->tgid, -+ __entry->js, -+ __entry->count) -+); ++int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) ++{ ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++ struct fence *fence = sync_file_get_fence(fd); ++#else ++ struct dma_fence *fence = sync_file_get_fence(fd); ++#endif + -+TRACE_EVENT(mali_timeline_gpu_slot_action, ++ if (!fence) ++ return -ENOENT; + -+ TP_PROTO(u64 ts_sec, -+ u32 ts_nsec, -+ int event_type, -+ int tgid, -+ int js, -+ int count), ++ kbase_fence_fence_in_set(katom, fence); + -+ TP_ARGS(ts_sec, -+ ts_nsec, -+ event_type, -+ tgid, -+ js, -+ count), ++ return 0; ++} + -+ TP_STRUCT__entry( -+ __field(u64, ts_sec) -+ __field(u32, ts_nsec) -+ __field(int, event_type) -+ __field(int, tgid) -+ __field(int, js) -+ __field(int, count) -+ ), ++int kbase_sync_fence_validate(int fd) ++{ ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++ struct fence *fence = sync_file_get_fence(fd); ++#else ++ struct dma_fence *fence = sync_file_get_fence(fd); ++#endif + -+ TP_fast_assign( -+ __entry->ts_sec = ts_sec; -+ __entry->ts_nsec = ts_nsec; -+ __entry->event_type = event_type; -+ __entry->tgid = tgid; -+ __entry->js = js; -+ __entry->count = count; -+ ), ++ if (!fence) ++ return -EINVAL; + -+ TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, -+ (int)__entry->ts_sec, -+ (int)__entry->ts_nsec, -+ __entry->tgid, -+ __entry->js, -+ __entry->count) -+); ++ dma_fence_put(fence); + -+TRACE_EVENT(mali_timeline_gpu_power_active, ++ return 0; /* valid */ ++} + -+ TP_PROTO(u64 ts_sec, -+ u32 ts_nsec, -+ int event_type, -+ int active), ++enum base_jd_event_code ++kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) ++{ ++ int res; + -+ TP_ARGS(ts_sec, -+ ts_nsec, -+ event_type, -+ active), ++ if (!kbase_fence_out_is_ours(katom)) { ++ /* Not our fence */ ++ return BASE_JD_EVENT_JOB_CANCELLED; ++ } + -+ TP_STRUCT__entry( -+ __field(u64, ts_sec) -+ __field(u32, ts_nsec) -+ __field(int, event_type) -+ __field(int, active) -+ ), ++ res = kbase_fence_out_signal(katom, result); ++ if (unlikely(res < 0)) { ++ dev_warn(katom->kctx->kbdev->dev, ++ "fence_signal() failed with %d\n", res); ++ } + -+ TP_fast_assign( -+ __entry->ts_sec = ts_sec; -+ __entry->ts_nsec = ts_nsec; -+ __entry->event_type = event_type; -+ __entry->active = active; -+ ), ++ kbase_sync_fence_out_remove(katom); + -+ TP_printk("%i,%i.%.9i,0,%i", __entry->event_type, -+ (int)__entry->ts_sec, -+ (int)__entry->ts_nsec, -+ __entry->active) ++ return (result != 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE; ++} + -+); ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++static void kbase_fence_wait_callback(struct fence *fence, ++ struct fence_cb *cb) ++#else ++static void kbase_fence_wait_callback(struct dma_fence *fence, ++ struct dma_fence_cb *cb) ++#endif ++{ ++ struct kbase_fence_cb *kcb = container_of(cb, ++ struct kbase_fence_cb, ++ fence_cb); ++ struct kbase_jd_atom *katom = kcb->katom; ++ struct kbase_context *kctx = katom->kctx; + -+TRACE_EVENT(mali_timeline_l2_power_active, ++ /* Cancel atom if fence is erroneous */ ++#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \ ++ (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ ++ KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) ++ if (dma_fence_is_signaled(kcb->fence) && kcb->fence->error) ++#else ++ if (dma_fence_is_signaled(kcb->fence) && kcb->fence->status < 0) ++#endif ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + -+ TP_PROTO(u64 ts_sec, -+ u32 ts_nsec, -+ int event_type, -+ int state), ++ if (kbase_fence_dep_count_dec_and_test(katom)) { ++ /* We take responsibility of handling this */ ++ kbase_fence_dep_count_set(katom, -1); + -+ TP_ARGS(ts_sec, -+ ts_nsec, -+ event_type, -+ state), ++ /* To prevent a potential deadlock we schedule the work onto the ++ * job_done_wq workqueue ++ * ++ * The issue is that we may signal the timeline while holding ++ * kctx->jctx.lock and the callbacks are run synchronously from ++ * sync_timeline_signal. So we simply defer the work. ++ */ ++ INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); ++ queue_work(kctx->jctx.job_done_wq, &katom->work); ++ } ++} + -+ TP_STRUCT__entry( -+ __field(u64, ts_sec) -+ __field(u32, ts_nsec) -+ __field(int, event_type) -+ __field(int, state) -+ ), ++int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) ++{ ++ int err; ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++ struct fence *fence; ++#else ++ struct dma_fence *fence; ++#endif + -+ TP_fast_assign( -+ __entry->ts_sec = ts_sec; -+ __entry->ts_nsec = ts_nsec; -+ __entry->event_type = event_type; -+ __entry->state = state; -+ ), ++ fence = kbase_fence_in_get(katom); ++ if (!fence) ++ return 0; /* no input fence to wait for, good to go! */ + -+ TP_printk("%i,%i.%.9i,0,%i", __entry->event_type, -+ (int)__entry->ts_sec, -+ (int)__entry->ts_nsec, -+ __entry->state) ++ kbase_fence_dep_count_set(katom, 1); + -+); -+TRACE_EVENT(mali_timeline_pm_event, ++ err = kbase_fence_add_callback(katom, fence, kbase_fence_wait_callback); + -+ TP_PROTO(u64 ts_sec, -+ u32 ts_nsec, -+ int event_type, -+ int pm_event_type, -+ unsigned int pm_event_id), ++ kbase_fence_put(fence); + -+ TP_ARGS(ts_sec, -+ ts_nsec, -+ event_type, -+ pm_event_type, -+ pm_event_id), ++ if (likely(!err)) { ++ /* Test if the callbacks are already triggered */ ++ if (kbase_fence_dep_count_dec_and_test(katom)) { ++ kbase_fence_free_callbacks(katom); ++ kbase_fence_dep_count_set(katom, -1); ++ return 0; /* Already signaled, good to go right now */ ++ } + -+ TP_STRUCT__entry( -+ __field(u64, ts_sec) -+ __field(u32, ts_nsec) -+ __field(int, event_type) -+ __field(int, pm_event_type) -+ __field(unsigned int, pm_event_id) -+ ), ++ /* Callback installed, so we just need to wait for it... */ ++ } else { ++ /* Failure */ ++ kbase_fence_free_callbacks(katom); ++ kbase_fence_dep_count_set(katom, -1); + -+ TP_fast_assign( -+ __entry->ts_sec = ts_sec; -+ __entry->ts_nsec = ts_nsec; -+ __entry->event_type = event_type; -+ __entry->pm_event_type = pm_event_type; -+ __entry->pm_event_id = pm_event_id; -+ ), ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + -+ TP_printk("%i,%i.%.9i,0,%i,%u", __entry->event_type, -+ (int)__entry->ts_sec, -+ (int)__entry->ts_nsec, -+ __entry->pm_event_type, __entry->pm_event_id) ++ /* We should cause the dependent jobs in the bag to be failed, ++ * to do this we schedule the work queue to complete this job */ + -+); ++ INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); ++ queue_work(katom->kctx->jctx.job_done_wq, &katom->work); ++ } + -+TRACE_EVENT(mali_timeline_slot_atom, ++ return 1; /* completion to be done later by callback/worker */ ++} + -+ TP_PROTO(u64 ts_sec, -+ u32 ts_nsec, -+ int event_type, -+ int tgid, -+ int js, -+ int atom_id), ++void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) ++{ ++ if (!kbase_fence_free_callbacks(katom)) { ++ /* The wait wasn't cancelled - ++ * leave the cleanup for kbase_fence_wait_callback */ ++ return; ++ } + -+ TP_ARGS(ts_sec, -+ ts_nsec, -+ event_type, -+ tgid, -+ js, -+ atom_id), ++ /* Take responsibility of completion */ ++ kbase_fence_dep_count_set(katom, -1); + -+ TP_STRUCT__entry( -+ __field(u64, ts_sec) -+ __field(u32, ts_nsec) -+ __field(int, event_type) -+ __field(int, tgid) -+ __field(int, js) -+ __field(int, atom_id) -+ ), ++ /* Wait was cancelled - zap the atoms */ ++ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + -+ TP_fast_assign( -+ __entry->ts_sec = ts_sec; -+ __entry->ts_nsec = ts_nsec; -+ __entry->event_type = event_type; -+ __entry->tgid = tgid; -+ __entry->js = js; -+ __entry->atom_id = atom_id; -+ ), ++ kbasep_remove_waiting_soft_job(katom); ++ kbase_finish_soft_job(katom); + -+ TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, -+ (int)__entry->ts_sec, -+ (int)__entry->ts_nsec, -+ __entry->tgid, -+ __entry->js, -+ __entry->atom_id) -+); ++ if (jd_done_nolock(katom, NULL)) ++ kbase_js_sched_all(katom->kctx->kbdev); ++} + -+TRACE_EVENT(mali_timeline_pm_checktrans, ++void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom) ++{ ++ kbase_fence_out_remove(katom); ++} + -+ TP_PROTO(u64 ts_sec, -+ u32 ts_nsec, -+ int trans_code, -+ int trans_id), ++void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom) ++{ ++ kbase_fence_free_callbacks(katom); ++ kbase_fence_in_remove(katom); ++} + -+ TP_ARGS(ts_sec, -+ ts_nsec, -+ trans_code, -+ trans_id), ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++static void kbase_sync_fence_info_get(struct fence *fence, ++ struct kbase_sync_fence_info *info) ++#else ++static void kbase_sync_fence_info_get(struct dma_fence *fence, ++ struct kbase_sync_fence_info *info) ++#endif ++{ ++ info->fence = fence; + -+ TP_STRUCT__entry( -+ __field(u64, ts_sec) -+ __field(u32, ts_nsec) -+ __field(int, trans_code) -+ __field(int, trans_id) -+ ), ++ /* translate into CONFIG_SYNC status: ++ * < 0 : error ++ * 0 : active ++ * 1 : signaled ++ */ ++ if (dma_fence_is_signaled(fence)) { ++#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \ ++ (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ ++ KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) ++ int status = fence->error; ++#else ++ int status = fence->status; ++#endif ++ if (status < 0) ++ info->status = status; /* signaled with error */ ++ else ++ info->status = 1; /* signaled with success */ ++ } else { ++ info->status = 0; /* still active (unsignaled) */ ++ } + -+ TP_fast_assign( -+ __entry->ts_sec = ts_sec; -+ __entry->ts_nsec = ts_nsec; -+ __entry->trans_code = trans_code; -+ __entry->trans_id = trans_id; -+ ), ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) ++ scnprintf(info->name, sizeof(info->name), "%u#%u", ++ fence->context, fence->seqno); ++#elif (LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0)) ++ scnprintf(info->name, sizeof(info->name), "%llu#%u", ++ fence->context, fence->seqno); ++#else ++ scnprintf(info->name, sizeof(info->name), "%llu#%llu", ++ fence->context, fence->seqno); ++#endif ++} + -+ TP_printk("%i,%i.%.9i,0,%i", __entry->trans_code, -+ (int)__entry->ts_sec, -+ (int)__entry->ts_nsec, -+ __entry->trans_id) ++int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, ++ struct kbase_sync_fence_info *info) ++{ ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++ struct fence *fence; ++#else ++ struct dma_fence *fence; ++#endif + -+); ++ fence = kbase_fence_in_get(katom); ++ if (!fence) ++ return -ENOENT; + -+TRACE_EVENT(mali_timeline_context_active, ++ kbase_sync_fence_info_get(fence, info); + -+ TP_PROTO(u64 ts_sec, -+ u32 ts_nsec, -+ int count), ++ kbase_fence_put(fence); + -+ TP_ARGS(ts_sec, -+ ts_nsec, -+ count), ++ return 0; ++} + -+ TP_STRUCT__entry( -+ __field(u64, ts_sec) -+ __field(u32, ts_nsec) -+ __field(int, count) -+ ), ++int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, ++ struct kbase_sync_fence_info *info) ++{ ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++ struct fence *fence; ++#else ++ struct dma_fence *fence; ++#endif + -+ TP_fast_assign( -+ __entry->ts_sec = ts_sec; -+ __entry->ts_nsec = ts_nsec; -+ __entry->count = count; -+ ), ++ fence = kbase_fence_out_get(katom); ++ if (!fence) ++ return -ENOENT; + -+ TP_printk("%i,%i.%.9i,0,%i", SW_SET_CONTEXT_ACTIVE, -+ (int)__entry->ts_sec, -+ (int)__entry->ts_nsec, -+ __entry->count) -+); ++ kbase_sync_fence_info_get(fence, info); + -+#endif /* _MALI_TIMELINE_H */ ++ kbase_fence_put(fence); + -+#undef TRACE_INCLUDE_PATH -+#define TRACE_INCLUDE_PATH . ++ return 0; ++} + -+/* This part must be outside protection */ -+#include + -diff --git a/drivers/gpu/arm/midgard/mali_uk.h b/drivers/gpu/arm/midgard/mali_uk.h ++#ifdef CONFIG_MALI_FENCE_DEBUG ++void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom) ++{ ++ /* Not implemented */ ++} ++#endif +diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c new file mode 100644 -index 000000000..841d03fb5 +index 000000000..c8310c45f --- /dev/null -+++ b/drivers/gpu/arm/midgard/mali_uk.h -@@ -0,0 +1,141 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c +@@ -0,0 +1,2572 @@ +/* + * -+ * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -402388,1872 +402833,2570 @@ index 000000000..841d03fb5 + + + ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++/*****************************************************************************/ ++ ++/* The version of swtrace protocol used in timeline stream. */ ++#define SWTRACE_VERSION 3 + ++/* The maximum expected length of string in tracepoint descriptor. */ ++#define STRLEN_MAX 64 /* bytes */ + -+/** -+ * @file mali_uk.h -+ * Types and definitions that are common across OSs for both the user -+ * and kernel side of the User-Kernel interface. -+ */ ++/* The number of nanoseconds in a second. */ ++#define NSECS_IN_SEC 1000000000ull /* ns */ + -+#ifndef _UK_H_ -+#define _UK_H_ ++/* The period of autoflush checker execution in milliseconds. */ ++#define AUTOFLUSH_INTERVAL 1000 /* ms */ + -+#ifdef __cplusplus -+extern "C" { -+#endif /* __cplusplus */ ++/* The maximum size of a single packet used by timeline. */ ++#define PACKET_SIZE 4096 /* bytes */ + -+/** -+ * @addtogroup base_api -+ * @{ -+ */ ++/* The number of packets used by one timeline stream. */ ++#define PACKET_COUNT 16 + -+/** -+ * @defgroup uk_api User-Kernel Interface API -+ * -+ * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device -+ * drivers developed as part of the Midgard DDK. Currently that includes the Base driver and the UMP driver. -+ * -+ * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent -+ * kernel-side API (UKK) via an OS-specific communication mechanism. -+ * -+ * This API is internal to the Midgard DDK and is not exposed to any applications. -+ * -+ * @{ -+ */ ++/* The number of bytes reserved for packet header. ++ * These value must be defined according to MIPE documentation. */ ++#define PACKET_HEADER_SIZE 8 /* bytes */ + -+/** -+ * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The -+ * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this -+ * identifier to select a UKK client to the uku_open() function. -+ * -+ * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id -+ * enumeration and the uku_open() implemenation for the various OS ports need to be updated to -+ * provide a mapping of the identifier to the OS specific device name. -+ * -+ */ -+enum uk_client_id { -+ /** -+ * Value used to identify the Base driver UK client. -+ */ -+ UK_CLIENT_MALI_T600_BASE, ++/* The number of bytes reserved for packet sequence number. ++ * These value must be defined according to MIPE documentation. */ ++#define PACKET_NUMBER_SIZE 4 /* bytes */ + -+ /** The number of uk clients supported. This must be the last member of the enum */ -+ UK_CLIENT_COUNT ++/* Packet header - first word. ++ * These values must be defined according to MIPE documentation. */ ++#define PACKET_STREAMID_POS 0 ++#define PACKET_STREAMID_LEN 8 ++#define PACKET_RSVD1_POS (PACKET_STREAMID_POS + PACKET_STREAMID_LEN) ++#define PACKET_RSVD1_LEN 8 ++#define PACKET_TYPE_POS (PACKET_RSVD1_POS + PACKET_RSVD1_LEN) ++#define PACKET_TYPE_LEN 3 ++#define PACKET_CLASS_POS (PACKET_TYPE_POS + PACKET_TYPE_LEN) ++#define PACKET_CLASS_LEN 7 ++#define PACKET_FAMILY_POS (PACKET_CLASS_POS + PACKET_CLASS_LEN) ++#define PACKET_FAMILY_LEN 6 ++ ++/* Packet header - second word ++ * These values must be defined according to MIPE documentation. */ ++#define PACKET_LENGTH_POS 0 ++#define PACKET_LENGTH_LEN 24 ++#define PACKET_SEQBIT_POS (PACKET_LENGTH_POS + PACKET_LENGTH_LEN) ++#define PACKET_SEQBIT_LEN 1 ++#define PACKET_RSVD2_POS (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN) ++#define PACKET_RSVD2_LEN 7 ++ ++/* Types of streams generated by timeline. ++ * Order is significant! Header streams must precede respective body streams. */ ++enum tl_stream_type { ++ TL_STREAM_TYPE_OBJ_HEADER, ++ TL_STREAM_TYPE_OBJ_SUMMARY, ++ TL_STREAM_TYPE_OBJ, ++ TL_STREAM_TYPE_AUX_HEADER, ++ TL_STREAM_TYPE_AUX, ++ ++ TL_STREAM_TYPE_COUNT +}; + -+/** -+ * Each function callable through the UK interface has a unique number. -+ * Functions provided by UK clients start from number UK_FUNC_ID. -+ * Numbers below UK_FUNC_ID are used for internal UK functions. -+ */ -+enum uk_func { -+ UKP_FUNC_ID_CHECK_VERSION, /**< UKK Core internal function */ -+ /** -+ * Each UK client numbers the functions they provide starting from -+ * number UK_FUNC_ID. This number is then eventually assigned to the -+ * id field of the union uk_header structure when preparing to make a -+ * UK call. See your UK client for a list of their function numbers. -+ */ -+ UK_FUNC_ID = 512 ++/* Timeline packet family ids. ++ * Values are significant! Check MIPE documentation. */ ++enum tl_packet_family { ++ TL_PACKET_FAMILY_CTRL = 0, /* control packets */ ++ TL_PACKET_FAMILY_TL = 1, /* timeline packets */ ++ ++ TL_PACKET_FAMILY_COUNT ++}; ++ ++/* Packet classes used in timeline streams. ++ * Values are significant! Check MIPE documentation. */ ++enum tl_packet_class { ++ TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */ ++ TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */ ++}; ++ ++/* Packet types used in timeline streams. ++ * Values are significant! Check MIPE documentation. */ ++enum tl_packet_type { ++ TL_PACKET_TYPE_HEADER = 0, /* stream's header/directory */ ++ TL_PACKET_TYPE_BODY = 1, /* stream's body */ ++ TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */ ++}; ++ ++/* Message ids of trace events that are recorded in the timeline stream. */ ++enum tl_msg_id_obj { ++ /* Timeline object events. */ ++ KBASE_TL_NEW_CTX, ++ KBASE_TL_NEW_GPU, ++ KBASE_TL_NEW_LPU, ++ KBASE_TL_NEW_ATOM, ++ KBASE_TL_NEW_AS, ++ KBASE_TL_DEL_CTX, ++ KBASE_TL_DEL_ATOM, ++ KBASE_TL_LIFELINK_LPU_GPU, ++ KBASE_TL_LIFELINK_AS_GPU, ++ KBASE_TL_RET_CTX_LPU, ++ KBASE_TL_RET_ATOM_CTX, ++ KBASE_TL_RET_ATOM_LPU, ++ KBASE_TL_NRET_CTX_LPU, ++ KBASE_TL_NRET_ATOM_CTX, ++ KBASE_TL_NRET_ATOM_LPU, ++ KBASE_TL_RET_AS_CTX, ++ KBASE_TL_NRET_AS_CTX, ++ KBASE_TL_RET_ATOM_AS, ++ KBASE_TL_NRET_ATOM_AS, ++ KBASE_TL_DEP_ATOM_ATOM, ++ KBASE_TL_NDEP_ATOM_ATOM, ++ KBASE_TL_RDEP_ATOM_ATOM, ++ KBASE_TL_ATTRIB_ATOM_CONFIG, ++ KBASE_TL_ATTRIB_ATOM_PRIORITY, ++ KBASE_TL_ATTRIB_ATOM_STATE, ++ KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE, ++ KBASE_TL_ATTRIB_ATOM_JIT, ++ KBASE_TL_ATTRIB_AS_CONFIG, ++ KBASE_TL_EVENT_LPU_SOFTSTOP, ++ KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, ++ KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, ++ ++ /* Job dump specific events. */ ++ KBASE_JD_GPU_SOFT_RESET ++}; ++ ++/* Message ids of trace events that are recorded in the auxiliary stream. */ ++enum tl_msg_id_aux { ++ KBASE_AUX_PM_STATE, ++ KBASE_AUX_PAGEFAULT, ++ KBASE_AUX_PAGESALLOC, ++ KBASE_AUX_DEVFREQ_TARGET, ++ KBASE_AUX_PROTECTED_ENTER_START, ++ KBASE_AUX_PROTECTED_ENTER_END, ++ KBASE_AUX_PROTECTED_LEAVE_START, ++ KBASE_AUX_PROTECTED_LEAVE_END +}; + ++/*****************************************************************************/ ++ +/** -+ * Arguments for a UK call are stored in a structure. This structure consists -+ * of a fixed size header and a payload. The header carries a 32-bit number -+ * identifying the UK function to be called (see uk_func). When the UKK client -+ * receives this header and executed the requested UK function, it will use -+ * the same header to store the result of the function in the form of a -+ * int return code. The size of this structure is such that the -+ * first member of the payload following the header can be accessed efficiently -+ * on a 32 and 64-bit kernel and the structure has the same size regardless -+ * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined -+ * accordingly in the OS specific mali_uk_os.h header file. ++ * struct tl_stream - timeline stream structure ++ * @lock: message order lock ++ * @buffer: array of buffers ++ * @wbi: write buffer index ++ * @rbi: read buffer index ++ * @numbered: if non-zero stream's packets are sequentially numbered ++ * @autoflush_counter: counter tracking stream's autoflush state ++ * ++ * This structure holds information needed to construct proper packets in the ++ * timeline stream. Each message in sequence must bear timestamp that is greater ++ * to one in previous message in the same stream. For this reason lock is held ++ * throughout the process of message creation. Each stream contains set of ++ * buffers. Each buffer will hold one MIPE packet. In case there is no free ++ * space required to store incoming message the oldest buffer is discarded. ++ * Each packet in timeline body stream has sequence number embedded (this value ++ * must increment monotonically and is used by packets receiver to discover ++ * buffer overflows. ++ * Autoflush counter is set to negative number when there is no data pending ++ * for flush and it is set to zero on every update of the buffer. Autoflush ++ * timer will increment the counter by one on every expiry. In case there will ++ * be no activity on the buffer during two consecutive timer expiries, stream ++ * buffer will be flushed. + */ -+union uk_header { -+ /** -+ * 32-bit number identifying the UK function to be called. -+ * Also see uk_func. -+ */ -+ u32 id; -+ /** -+ * The int return code returned by the called UK function. -+ * See the specification of the particular UK function you are -+ * calling for the meaning of the error codes returned. All -+ * UK functions return 0 on success. -+ */ -+ u32 ret; -+ /* -+ * Used to ensure 64-bit alignment of this union. Do not remove. -+ * This field is used for padding and does not need to be initialized. -+ */ -+ u64 sizer; ++struct tl_stream { ++ spinlock_t lock; ++ ++ struct { ++ atomic_t size; /* number of bytes in buffer */ ++ char data[PACKET_SIZE]; /* buffer's data */ ++ } buffer[PACKET_COUNT]; ++ ++ atomic_t wbi; ++ atomic_t rbi; ++ ++ int numbered; ++ atomic_t autoflush_counter; +}; + +/** -+ * This structure carries a 16-bit major and minor number and is sent along with an internal UK call -+ * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side. ++ * struct tp_desc - tracepoint message descriptor structure ++ * @id: tracepoint ID identifying message in stream ++ * @id_str: human readable version of tracepoint ID ++ * @name: tracepoint description ++ * @arg_types: tracepoint's arguments types declaration ++ * @arg_names: comma separated list of tracepoint's arguments names + */ -+struct uku_version_check_args { -+ union uk_header header; -+ /**< UK call header */ -+ u16 major; -+ /**< This field carries the user-side major version on input and the kernel-side major version on output */ -+ u16 minor; -+ /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */ -+ u8 padding[4]; ++struct tp_desc { ++ u32 id; ++ const char *id_str; ++ const char *name; ++ const char *arg_types; ++ const char *arg_names; +}; + -+/** @} end group uk_api */ -+ -+/** @} *//* end group base_api */ -+ -+#ifdef __cplusplus -+} -+#endif /* __cplusplus */ -+#endif /* _UK_H_ */ -diff --git a/drivers/gpu/arm/midgard/platform/Kconfig b/drivers/gpu/arm/midgard/platform/Kconfig -new file mode 100644 -index 000000000..8fb4e917c ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/Kconfig -@@ -0,0 +1,24 @@ -+# -+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++/*****************************************************************************/ + ++/* Configuration of timeline streams generated by kernel. ++ * Kernel emit only streams containing either timeline object events or ++ * auxiliary events. All streams have stream id value of 1 (as opposed to user ++ * space streams that have value of 0). */ ++static const struct { ++ enum tl_packet_family pkt_family; ++ enum tl_packet_class pkt_class; ++ enum tl_packet_type pkt_type; ++ unsigned int stream_id; ++} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = { ++ {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_HEADER, 1}, ++ {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1}, ++ {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY, 1}, ++ {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_HEADER, 1}, ++ {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY, 1} ++}; + ++/* The timeline streams generated by kernel. */ ++static struct tl_stream *tl_stream[TL_STREAM_TYPE_COUNT]; + ++/* Autoflush timer. */ ++static struct timer_list autoflush_timer; + -+# Add your platform specific Kconfig file here -+# -+# "drivers/gpu/arm/midgard/platform/xxx/Kconfig" -+# -+# Where xxx is the platform name is the name set in MALI_PLATFORM_THIRDPARTY_NAME -+# ++/* If non-zero autoflush timer is active. */ ++static atomic_t autoflush_timer_active; + -diff --git a/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/drivers/gpu/arm/midgard/platform/devicetree/Kbuild -new file mode 100755 -index 000000000..e888a42fc ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/devicetree/Kbuild -@@ -0,0 +1,18 @@ -+# -+# (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++/* Reader lock. Only one reader is allowed to have access to the timeline ++ * streams at any given time. */ ++static DEFINE_MUTEX(tl_reader_lock); + ++/* Timeline stream event queue. */ ++static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue); + -+mali_kbase-y += \ -+ $(MALI_PLATFORM_THIRDPARTY_DIR)/mali_kbase_config_devicetree.o \ -+ $(MALI_PLATFORM_THIRDPARTY_DIR)/mali_kbase_runtime_pm.o -diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c -new file mode 100644 -index 000000000..b2a7c93f1 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c -@@ -0,0 +1,31 @@ -+/* -+ * -+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++/* The timeline stream file operations functions. */ ++static ssize_t kbasep_tlstream_read( ++ struct file *filp, ++ char __user *buffer, ++ size_t size, ++ loff_t *f_pos); ++static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait); ++static int kbasep_tlstream_release(struct inode *inode, struct file *filp); + ++/* The timeline stream file operations structure. */ ++static const struct file_operations kbasep_tlstream_fops = { ++ .release = kbasep_tlstream_release, ++ .read = kbasep_tlstream_read, ++ .poll = kbasep_tlstream_poll, ++}; + ++/* Descriptors of timeline messages transmitted in object events stream. */ ++static const struct tp_desc tp_desc_obj[] = { ++ { ++ KBASE_TL_NEW_CTX, ++ __stringify(KBASE_TL_NEW_CTX), ++ "object ctx is created", ++ "@pII", ++ "ctx,ctx_nr,tgid" ++ }, ++ { ++ KBASE_TL_NEW_GPU, ++ __stringify(KBASE_TL_NEW_GPU), ++ "object gpu is created", ++ "@pII", ++ "gpu,gpu_id,core_count" ++ }, ++ { ++ KBASE_TL_NEW_LPU, ++ __stringify(KBASE_TL_NEW_LPU), ++ "object lpu is created", ++ "@pII", ++ "lpu,lpu_nr,lpu_fn" ++ }, ++ { ++ KBASE_TL_NEW_ATOM, ++ __stringify(KBASE_TL_NEW_ATOM), ++ "object atom is created", ++ "@pI", ++ "atom,atom_nr" ++ }, ++ { ++ KBASE_TL_NEW_AS, ++ __stringify(KBASE_TL_NEW_AS), ++ "address space object is created", ++ "@pI", ++ "address_space,as_nr" ++ }, ++ { ++ KBASE_TL_DEL_CTX, ++ __stringify(KBASE_TL_DEL_CTX), ++ "context is destroyed", ++ "@p", ++ "ctx" ++ }, ++ { ++ KBASE_TL_DEL_ATOM, ++ __stringify(KBASE_TL_DEL_ATOM), ++ "atom is destroyed", ++ "@p", ++ "atom" ++ }, ++ { ++ KBASE_TL_LIFELINK_LPU_GPU, ++ __stringify(KBASE_TL_LIFELINK_LPU_GPU), ++ "lpu is deleted with gpu", ++ "@pp", ++ "lpu,gpu" ++ }, ++ { ++ KBASE_TL_LIFELINK_AS_GPU, ++ __stringify(KBASE_TL_LIFELINK_AS_GPU), ++ "address space is deleted with gpu", ++ "@pp", ++ "address_space,gpu" ++ }, ++ { ++ KBASE_TL_RET_CTX_LPU, ++ __stringify(KBASE_TL_RET_CTX_LPU), ++ "context is retained by lpu", ++ "@pp", ++ "ctx,lpu" ++ }, ++ { ++ KBASE_TL_RET_ATOM_CTX, ++ __stringify(KBASE_TL_RET_ATOM_CTX), ++ "atom is retained by context", ++ "@pp", ++ "atom,ctx" ++ }, ++ { ++ KBASE_TL_RET_ATOM_LPU, ++ __stringify(KBASE_TL_RET_ATOM_LPU), ++ "atom is retained by lpu", ++ "@pps", ++ "atom,lpu,attrib_match_list" ++ }, ++ { ++ KBASE_TL_NRET_CTX_LPU, ++ __stringify(KBASE_TL_NRET_CTX_LPU), ++ "context is released by lpu", ++ "@pp", ++ "ctx,lpu" ++ }, ++ { ++ KBASE_TL_NRET_ATOM_CTX, ++ __stringify(KBASE_TL_NRET_ATOM_CTX), ++ "atom is released by context", ++ "@pp", ++ "atom,ctx" ++ }, ++ { ++ KBASE_TL_NRET_ATOM_LPU, ++ __stringify(KBASE_TL_NRET_ATOM_LPU), ++ "atom is released by lpu", ++ "@pp", ++ "atom,lpu" ++ }, ++ { ++ KBASE_TL_RET_AS_CTX, ++ __stringify(KBASE_TL_RET_AS_CTX), ++ "address space is retained by context", ++ "@pp", ++ "address_space,ctx" ++ }, ++ { ++ KBASE_TL_NRET_AS_CTX, ++ __stringify(KBASE_TL_NRET_AS_CTX), ++ "address space is released by context", ++ "@pp", ++ "address_space,ctx" ++ }, ++ { ++ KBASE_TL_RET_ATOM_AS, ++ __stringify(KBASE_TL_RET_ATOM_AS), ++ "atom is retained by address space", ++ "@pp", ++ "atom,address_space" ++ }, ++ { ++ KBASE_TL_NRET_ATOM_AS, ++ __stringify(KBASE_TL_NRET_ATOM_AS), ++ "atom is released by address space", ++ "@pp", ++ "atom,address_space" ++ }, ++ { ++ KBASE_TL_DEP_ATOM_ATOM, ++ __stringify(KBASE_TL_DEP_ATOM_ATOM), ++ "atom2 depends on atom1", ++ "@pp", ++ "atom1,atom2" ++ }, ++ { ++ KBASE_TL_NDEP_ATOM_ATOM, ++ __stringify(KBASE_TL_NDEP_ATOM_ATOM), ++ "atom2 no longer depends on atom1", ++ "@pp", ++ "atom1,atom2" ++ }, ++ { ++ KBASE_TL_RDEP_ATOM_ATOM, ++ __stringify(KBASE_TL_RDEP_ATOM_ATOM), ++ "resolved dependecy of atom2 depending on atom1", ++ "@pp", ++ "atom1,atom2" ++ }, ++ { ++ KBASE_TL_ATTRIB_ATOM_CONFIG, ++ __stringify(KBASE_TL_ATTRIB_ATOM_CONFIG), ++ "atom job slot attributes", ++ "@pLLI", ++ "atom,descriptor,affinity,config" ++ }, ++ { ++ KBASE_TL_ATTRIB_ATOM_PRIORITY, ++ __stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY), ++ "atom priority", ++ "@pI", ++ "atom,prio" ++ }, ++ { ++ KBASE_TL_ATTRIB_ATOM_STATE, ++ __stringify(KBASE_TL_ATTRIB_ATOM_STATE), ++ "atom state", ++ "@pI", ++ "atom,state" ++ }, ++ { ++ KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE, ++ __stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE), ++ "atom caused priority change", ++ "@p", ++ "atom" ++ }, ++ { ++ KBASE_TL_ATTRIB_ATOM_JIT, ++ __stringify(KBASE_TL_ATTRIB_ATOM_JIT), ++ "jit done for atom", ++ "@pLL", ++ "atom,edit_addr,new_addr" ++ }, ++ { ++ KBASE_TL_ATTRIB_AS_CONFIG, ++ __stringify(KBASE_TL_ATTRIB_AS_CONFIG), ++ "address space attributes", ++ "@pLLL", ++ "address_space,transtab,memattr,transcfg" ++ }, ++ { ++ KBASE_TL_EVENT_LPU_SOFTSTOP, ++ __stringify(KBASE_TL_EVENT_LPU_SOFTSTOP), ++ "softstop event on given lpu", ++ "@p", ++ "lpu" ++ }, ++ { ++ KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, ++ __stringify(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX), ++ "atom softstopped", ++ "@p", ++ "atom" ++ }, ++ { ++ KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, ++ __stringify(KBASE_TL_EVENT_SOFTSTOP_ISSUE), ++ "atom softstop issued", ++ "@p", ++ "atom" ++ }, ++ { ++ KBASE_JD_GPU_SOFT_RESET, ++ __stringify(KBASE_JD_GPU_SOFT_RESET), ++ "gpu soft reset", ++ "@p", ++ "gpu" ++ }, ++}; + -+#include ++/* Descriptors of timeline messages transmitted in auxiliary events stream. */ ++static const struct tp_desc tp_desc_aux[] = { ++ { ++ KBASE_AUX_PM_STATE, ++ __stringify(KBASE_AUX_PM_STATE), ++ "PM state", ++ "@IL", ++ "core_type,core_state_bitset" ++ }, ++ { ++ KBASE_AUX_PAGEFAULT, ++ __stringify(KBASE_AUX_PAGEFAULT), ++ "Page fault", ++ "@IL", ++ "ctx_nr,page_cnt_change" ++ }, ++ { ++ KBASE_AUX_PAGESALLOC, ++ __stringify(KBASE_AUX_PAGESALLOC), ++ "Total alloc pages change", ++ "@IL", ++ "ctx_nr,page_cnt" ++ }, ++ { ++ KBASE_AUX_DEVFREQ_TARGET, ++ __stringify(KBASE_AUX_DEVFREQ_TARGET), ++ "New device frequency target", ++ "@L", ++ "target_freq" ++ }, ++ { ++ KBASE_AUX_PROTECTED_ENTER_START, ++ __stringify(KBASE_AUX_PROTECTED_ENTER_START), ++ "enter protected mode start", ++ "@p", ++ "gpu" ++ }, ++ { ++ KBASE_AUX_PROTECTED_ENTER_END, ++ __stringify(KBASE_AUX_PROTECTED_ENTER_END), ++ "enter protected mode end", ++ "@p", ++ "gpu" ++ }, ++ { ++ KBASE_AUX_PROTECTED_LEAVE_START, ++ __stringify(KBASE_AUX_PROTECTED_LEAVE_START), ++ "leave protected mode start", ++ "@p", ++ "gpu" ++ }, ++ { ++ KBASE_AUX_PROTECTED_LEAVE_END, ++ __stringify(KBASE_AUX_PROTECTED_LEAVE_END), ++ "leave protected mode end", ++ "@p", ++ "gpu" ++ } ++}; + -+int kbase_platform_early_init(void) -+{ -+ /* Nothing needed at this stage */ -+ return 0; -+} ++#if MALI_UNIT_TEST ++/* Number of bytes read by user. */ ++static atomic_t tlstream_bytes_collected = {0}; + -+static struct kbase_platform_config dummy_platform_config; ++/* Number of bytes generated by tracepoint messages. */ ++static atomic_t tlstream_bytes_generated = {0}; ++#endif /* MALI_UNIT_TEST */ + -+struct kbase_platform_config *kbase_get_platform_config(void) -+{ -+ return &dummy_platform_config; -+} -diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h -new file mode 100644 -index 000000000..49e107f98 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h -@@ -0,0 +1,73 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++/*****************************************************************************/ + ++/* Indicator of whether the timeline stream file descriptor is used. */ ++atomic_t kbase_tlstream_enabled = {0}; + ++/*****************************************************************************/ + +/** -+ * Maximum frequency GPU will be clocked at. Given in kHz. -+ * This must be specified as there is no default value. ++ * kbasep_tlstream_get_timestamp - return timestamp + * -+ * Attached value: number in kHz -+ * Default value: NA ++ * Function returns timestamp value based on raw monotonic timer. Value will ++ * wrap around zero in case of overflow. ++ * Return: timestamp value + */ -+#define GPU_FREQ_KHZ_MAX (5000) ++static u64 kbasep_tlstream_get_timestamp(void) ++{ ++ struct timespec64 ts; ++ u64 timestamp; ++ ++ ktime_get_raw_ts64(&ts); ++ timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec; ++ return timestamp; ++} ++ +/** -+ * Minimum frequency GPU will be clocked at. Given in kHz. -+ * This must be specified as there is no default value. ++ * kbasep_tlstream_write_bytes - write data to message buffer ++ * @buffer: buffer where data will be written ++ * @pos: position in the buffer where to place data ++ * @bytes: pointer to buffer holding data ++ * @len: length of data to be written + * -+ * Attached value: number in kHz -+ * Default value: NA ++ * Return: updated position in the buffer + */ -+#define GPU_FREQ_KHZ_MIN (5000) ++static size_t kbasep_tlstream_write_bytes( ++ char *buffer, ++ size_t pos, ++ const void *bytes, ++ size_t len) ++{ ++ KBASE_DEBUG_ASSERT(buffer); ++ KBASE_DEBUG_ASSERT(bytes); ++ ++ memcpy(&buffer[pos], bytes, len); ++ ++ return pos + len; ++} + +/** -+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock -+ * -+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func -+ * for the function prototype. ++ * kbasep_tlstream_write_string - write string to message buffer ++ * @buffer: buffer where data will be written ++ * @pos: position in the buffer where to place data ++ * @string: pointer to buffer holding the source string ++ * @max_write_size: number of bytes that can be stored in buffer + * -+ * Attached value: A kbase_cpu_clk_speed_func. -+ * Default Value: NA ++ * Return: updated position in the buffer + */ -+#define CPU_SPEED_FUNC (NULL) ++static size_t kbasep_tlstream_write_string( ++ char *buffer, ++ size_t pos, ++ const char *string, ++ size_t max_write_size) ++{ ++ u32 string_len; ++ ++ KBASE_DEBUG_ASSERT(buffer); ++ KBASE_DEBUG_ASSERT(string); ++ /* Timeline string consists of at least string length and nul ++ * terminator. */ ++ KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char)); ++ max_write_size -= sizeof(string_len); ++ ++ string_len = strlcpy( ++ &buffer[pos + sizeof(string_len)], ++ string, ++ max_write_size); ++ string_len += sizeof(char); ++ ++ /* Make sure that the source string fit into the buffer. */ ++ KBASE_DEBUG_ASSERT(string_len <= max_write_size); ++ ++ /* Update string length. */ ++ memcpy(&buffer[pos], &string_len, sizeof(string_len)); ++ ++ return pos + sizeof(string_len) + string_len; ++} + +/** -+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock -+ * -+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func -+ * for the function prototype. ++ * kbasep_tlstream_write_timestamp - write timestamp to message buffer ++ * @buffer: buffer where data will be written ++ * @pos: position in the buffer where to place data + * -+ * Attached value: A kbase_gpu_clk_speed_func. -+ * Default Value: NA ++ * Return: updated position in the buffer + */ -+#define GPU_SPEED_FUNC (NULL) ++static size_t kbasep_tlstream_write_timestamp(void *buffer, size_t pos) ++{ ++ u64 timestamp = kbasep_tlstream_get_timestamp(); ++ ++ return kbasep_tlstream_write_bytes( ++ buffer, pos, ++ ×tamp, sizeof(timestamp)); ++} + +/** -+ * Power management configuration -+ * -+ * Attached value: pointer to @ref kbase_pm_callback_conf -+ * Default value: See @ref kbase_pm_callback_conf ++ * kbasep_tlstream_put_bits - put bits in a word ++ * @word: pointer to the words being modified ++ * @value: value that shall be written to given position ++ * @bitpos: position where value shall be written (in bits) ++ * @bitlen: length of value (in bits) + */ -+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) ++static void kbasep_tlstream_put_bits( ++ u32 *word, ++ u32 value, ++ unsigned int bitpos, ++ unsigned int bitlen) ++{ ++ const u32 mask = ((1 << bitlen) - 1) << bitpos; ++ ++ KBASE_DEBUG_ASSERT(word); ++ KBASE_DEBUG_ASSERT((0 != bitlen) && (32 >= bitlen)); ++ KBASE_DEBUG_ASSERT((bitpos + bitlen) <= 32); ++ ++ *word &= ~mask; ++ *word |= ((value << bitpos) & mask); ++} + +/** -+ * Platform specific configuration functions ++ * kbasep_tlstream_packet_header_setup - setup the packet header ++ * @buffer: pointer to the buffer ++ * @pkt_family: packet's family ++ * @pkt_type: packet's type ++ * @pkt_class: packet's class ++ * @stream_id: stream id ++ * @numbered: non-zero if this stream is numbered + * -+ * Attached value: pointer to @ref kbase_platform_funcs_conf -+ * Default value: See @ref kbase_platform_funcs_conf ++ * Function sets up immutable part of packet header in the given buffer. + */ -+#define PLATFORM_FUNCS (NULL) ++static void kbasep_tlstream_packet_header_setup( ++ char *buffer, ++ enum tl_packet_family pkt_family, ++ enum tl_packet_class pkt_class, ++ enum tl_packet_type pkt_type, ++ unsigned int stream_id, ++ int numbered) ++{ ++ u32 word0 = 0; ++ u32 word1 = 0; + -+extern struct kbase_pm_callback_conf pm_callbacks; -diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c -new file mode 100644 -index 000000000..aa4376afd ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c -@@ -0,0 +1,100 @@ -+/* -+ * -+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ KBASE_DEBUG_ASSERT(buffer); ++ KBASE_DEBUG_ASSERT(pkt_family == TL_PACKET_FAMILY_TL); ++ KBASE_DEBUG_ASSERT( ++ (pkt_type == TL_PACKET_TYPE_HEADER) || ++ (pkt_type == TL_PACKET_TYPE_SUMMARY) || ++ (pkt_type == TL_PACKET_TYPE_BODY)); ++ KBASE_DEBUG_ASSERT( ++ (pkt_class == TL_PACKET_CLASS_OBJ) || ++ (pkt_class == TL_PACKET_CLASS_AUX)); + ++ kbasep_tlstream_put_bits( ++ &word0, pkt_family, ++ PACKET_FAMILY_POS, PACKET_FAMILY_LEN); ++ kbasep_tlstream_put_bits( ++ &word0, pkt_class, ++ PACKET_CLASS_POS, PACKET_CLASS_LEN); ++ kbasep_tlstream_put_bits( ++ &word0, pkt_type, ++ PACKET_TYPE_POS, PACKET_TYPE_LEN); ++ kbasep_tlstream_put_bits( ++ &word0, stream_id, ++ PACKET_STREAMID_POS, PACKET_STREAMID_LEN); + ++ if (numbered) ++ kbasep_tlstream_put_bits( ++ &word1, 1, ++ PACKET_SEQBIT_POS, PACKET_SEQBIT_LEN); + -+#include -+#include -+#include -+#include ++ memcpy(&buffer[0], &word0, sizeof(word0)); ++ memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1)); ++} + -+static int pm_callback_power_on(struct kbase_device *kbdev) ++/** ++ * kbasep_tlstream_packet_header_update - update the packet header ++ * @buffer: pointer to the buffer ++ * @data_size: amount of data carried in this packet ++ * ++ * Function updates mutable part of packet header in the given buffer. ++ * Note that value of data_size must not including size of the header. ++ */ ++static void kbasep_tlstream_packet_header_update( ++ char *buffer, ++ size_t data_size) +{ -+ int ret; ++ u32 word0; ++ u32 word1; + -+ dev_dbg(kbdev->dev, "pm_callback_power_on %p\n", -+ (void *)kbdev->dev->pm_domain); ++ KBASE_DEBUG_ASSERT(buffer); ++ CSTD_UNUSED(word0); + -+ ret = pm_runtime_get_sync(kbdev->dev); ++ memcpy(&word1, &buffer[sizeof(word0)], sizeof(word1)); + -+ dev_dbg(kbdev->dev, "pm_runtime_get returned %d\n", ret); ++ kbasep_tlstream_put_bits( ++ &word1, data_size, ++ PACKET_LENGTH_POS, PACKET_LENGTH_LEN); + -+ return 1; ++ memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1)); +} + -+static void pm_callback_power_off(struct kbase_device *kbdev) ++/** ++ * kbasep_tlstream_packet_number_update - update the packet number ++ * @buffer: pointer to the buffer ++ * @counter: value of packet counter for this packet's stream ++ * ++ * Function updates packet number embedded within the packet placed in the ++ * given buffer. ++ */ ++static void kbasep_tlstream_packet_number_update(char *buffer, u32 counter) +{ -+ dev_dbg(kbdev->dev, "pm_callback_power_off\n"); ++ KBASE_DEBUG_ASSERT(buffer); + -+ pm_runtime_put_autosuspend(kbdev->dev); ++ memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter)); +} + -+int kbase_device_runtime_init(struct kbase_device *kbdev) ++/** ++ * kbasep_timeline_stream_reset - reset stream ++ * @stream: pointer to the stream structure ++ * ++ * Function discards all pending messages and resets packet counters. ++ */ ++static void kbasep_timeline_stream_reset(struct tl_stream *stream) +{ -+ dev_dbg(kbdev->dev, "kbase_device_runtime_init\n"); -+ pm_runtime_enable(kbdev->dev); ++ unsigned int i; + -+ return 0; -+} ++ for (i = 0; i < PACKET_COUNT; i++) { ++ if (stream->numbered) ++ atomic_set( ++ &stream->buffer[i].size, ++ PACKET_HEADER_SIZE + ++ PACKET_NUMBER_SIZE); ++ else ++ atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE); ++ } + -+void kbase_device_runtime_disable(struct kbase_device *kbdev) -+{ -+ dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n"); -+ pm_runtime_disable(kbdev->dev); ++ atomic_set(&stream->wbi, 0); ++ atomic_set(&stream->rbi, 0); +} + -+static int pm_callback_runtime_on(struct kbase_device *kbdev) ++/** ++ * kbasep_timeline_stream_init - initialize timeline stream ++ * @stream: pointer to the stream structure ++ * @stream_type: stream type ++ */ ++static void kbasep_timeline_stream_init( ++ struct tl_stream *stream, ++ enum tl_stream_type stream_type) +{ -+ dev_dbg(kbdev->dev, "pm_callback_runtime_on\n"); ++ unsigned int i; + -+ return 0; -+} ++ KBASE_DEBUG_ASSERT(stream); ++ KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); + -+static void pm_callback_runtime_off(struct kbase_device *kbdev) -+{ -+ dev_dbg(kbdev->dev, "pm_callback_runtime_off\n"); -+} ++ spin_lock_init(&stream->lock); + -+static void pm_callback_resume(struct kbase_device *kbdev) -+{ -+ int ret = pm_callback_runtime_on(kbdev); ++ /* All packets carrying tracepoints shall be numbered. */ ++ if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type) ++ stream->numbered = 1; ++ else ++ stream->numbered = 0; + -+ WARN_ON(ret); ++ for (i = 0; i < PACKET_COUNT; i++) ++ kbasep_tlstream_packet_header_setup( ++ stream->buffer[i].data, ++ tl_stream_cfg[stream_type].pkt_family, ++ tl_stream_cfg[stream_type].pkt_class, ++ tl_stream_cfg[stream_type].pkt_type, ++ tl_stream_cfg[stream_type].stream_id, ++ stream->numbered); ++ ++ kbasep_timeline_stream_reset(tl_stream[stream_type]); +} + -+static void pm_callback_suspend(struct kbase_device *kbdev) ++/** ++ * kbasep_timeline_stream_term - terminate timeline stream ++ * @stream: pointer to the stream structure ++ */ ++static void kbasep_timeline_stream_term(struct tl_stream *stream) +{ -+ pm_callback_runtime_off(kbdev); ++ KBASE_DEBUG_ASSERT(stream); +} + -+struct kbase_pm_callback_conf pm_callbacks = { -+ .power_on_callback = pm_callback_power_on, -+ .power_off_callback = pm_callback_power_off, -+ .power_suspend_callback = pm_callback_suspend, -+ .power_resume_callback = pm_callback_resume, -+#ifdef KBASE_PM_RUNTIME -+ .power_runtime_init_callback = kbase_device_runtime_init, -+ .power_runtime_term_callback = kbase_device_runtime_disable, -+ .power_runtime_on_callback = pm_callback_runtime_on, -+ .power_runtime_off_callback = pm_callback_runtime_off, -+#else /* KBASE_PM_RUNTIME */ -+ .power_runtime_init_callback = NULL, -+ .power_runtime_term_callback = NULL, -+ .power_runtime_on_callback = NULL, -+ .power_runtime_off_callback = NULL, -+#endif /* KBASE_PM_RUNTIME */ -+}; -+ -+ -diff --git a/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h -new file mode 100644 -index 000000000..c11085af5 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h -@@ -0,0 +1,28 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved. ++/** ++ * kbasep_tlstream_msgbuf_submit - submit packet to the user space ++ * @stream: pointer to the stream structure ++ * @wb_idx_raw: write buffer index ++ * @wb_size: length of data stored in current buffer + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ * Function updates currently written buffer with packet header. Then write ++ * index is incremented and buffer is handled to user space. Parameters ++ * of new buffer are returned using provided arguments. + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * Return: length of data in new buffer + * ++ * Warning: User must update the stream structure with returned value. + */ ++static size_t kbasep_tlstream_msgbuf_submit( ++ struct tl_stream *stream, ++ unsigned int wb_idx_raw, ++ unsigned int wb_size) ++{ ++ unsigned int rb_idx_raw = atomic_read(&stream->rbi); ++ unsigned int wb_idx = wb_idx_raw % PACKET_COUNT; + -+#include ++ /* Set stream as flushed. */ ++ atomic_set(&stream->autoflush_counter, -1); ++ ++ kbasep_tlstream_packet_header_update( ++ stream->buffer[wb_idx].data, ++ wb_size - PACKET_HEADER_SIZE); ++ ++ if (stream->numbered) ++ kbasep_tlstream_packet_number_update( ++ stream->buffer[wb_idx].data, ++ wb_idx_raw); ++ ++ /* Increasing write buffer index will expose this packet to the reader. ++ * As stream->lock is not taken on reader side we must make sure memory ++ * is updated correctly before this will happen. */ ++ smp_wmb(); ++ wb_idx_raw++; ++ atomic_set(&stream->wbi, wb_idx_raw); ++ ++ /* Inform user that packets are ready for reading. */ ++ wake_up_interruptible(&tl_event_queue); + ++ /* Detect and mark overflow in this stream. */ ++ if (PACKET_COUNT == wb_idx_raw - rb_idx_raw) { ++ /* Reader side depends on this increment to correctly handle ++ * overflows. The value shall be updated only if it was not ++ * modified by the reader. The data holding buffer will not be ++ * updated before stream->lock is released, however size of the ++ * buffer will. Make sure this increment is globally visible ++ * before information about selected write buffer size. */ ++ atomic_cmpxchg(&stream->rbi, rb_idx_raw, rb_idx_raw + 1); ++ } ++ ++ wb_size = PACKET_HEADER_SIZE; ++ if (stream->numbered) ++ wb_size += PACKET_NUMBER_SIZE; ++ ++ return wb_size; ++} + +/** -+ * @brief Entry point to transfer control to a platform for early initialization ++ * kbasep_tlstream_msgbuf_acquire - lock selected stream and reserves buffer ++ * @stream_type: type of the stream that shall be locked ++ * @msg_size: message size ++ * @flags: pointer to store flags passed back on stream release + * -+ * This function is called early on in the initialization during execution of -+ * @ref kbase_driver_init. ++ * Function will lock the stream and reserve the number of bytes requested ++ * in msg_size for the user. + * -+ * @return Zero to indicate success non-zero for failure. ++ * Return: pointer to the buffer where message can be stored ++ * ++ * Warning: Stream must be released with kbasep_tlstream_msgbuf_release(). ++ * Only atomic operations are allowed while stream is locked ++ * (i.e. do not use any operation that may sleep). + */ -+int kbase_platform_early_init(void); -+int kbase_platform_rk_init_opp_table(struct kbase_device *kbdev); -diff --git a/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h -new file mode 100644 -index 000000000..01f9dfce9 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h -@@ -0,0 +1,38 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ -+ -+ -+ -+#ifdef CONFIG_MALI_PLATFORM_FAKE ++static char *kbasep_tlstream_msgbuf_acquire( ++ enum tl_stream_type stream_type, ++ size_t msg_size, ++ unsigned long *flags) __acquires(&stream->lock) ++{ ++ struct tl_stream *stream; ++ unsigned int wb_idx_raw; ++ unsigned int wb_idx; ++ size_t wb_size; + -+/** -+ * kbase_platform_fake_register - Entry point for fake platform registration -+ * -+ * This function is called early on in the initialization during execution of -+ * kbase_driver_init. -+ * -+ * Return: 0 to indicate success, non-zero for failure. -+ */ -+int kbase_platform_fake_register(void); ++ KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); ++ KBASE_DEBUG_ASSERT( ++ PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >= ++ msg_size); + -+/** -+ * kbase_platform_fake_unregister - Entry point for fake platform unregistration -+ * -+ * This function is called in the termination during execution of -+ * kbase_driver_exit. -+ */ -+void kbase_platform_fake_unregister(void); ++ stream = tl_stream[stream_type]; + -+#endif /* CONFIG_MALI_PLATFORM_FAKE */ -diff --git a/drivers/gpu/arm/midgard/platform/rk/Kbuild b/drivers/gpu/arm/midgard/platform/rk/Kbuild -new file mode 100755 -index 000000000..db993487e ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/rk/Kbuild -@@ -0,0 +1,17 @@ -+# -+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++ spin_lock_irqsave(&stream->lock, *flags); + -+midgard_kbase-y += \ -+ $(MALI_PLATFORM_THIRDPARTY_DIR)/mali_kbase_config_rk.o ++ wb_idx_raw = atomic_read(&stream->wbi); ++ wb_idx = wb_idx_raw % PACKET_COUNT; ++ wb_size = atomic_read(&stream->buffer[wb_idx].size); + -diff --git a/drivers/gpu/arm/midgard/platform/rk/custom_log.h b/drivers/gpu/arm/midgard/platform/rk/custom_log.h -new file mode 100644 -index 000000000..fe5e12241 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/rk/custom_log.h -@@ -0,0 +1,209 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* ---------------------------------------------------------------------------- -+ * File: custom_log.h -+ * -+ * Desc: ChenZhen å好的 log 输出的定制实现. -+ * -+ * -------------------------------------------------------------------- -+ * < 习语 å’Œ 缩略语 > : -+ * -+ * -------------------------------------------------------------------- -+ * Usage: -+ * -+ * Note: -+ * -+ * Author: ChenZhen -+ * -+ * ---------------------------------------------------------------------------- -+ * Version: -+ * v1.0 -+ * ---------------------------------------------------------------------------- -+ * Log: -+ ----Fri Nov 19 15:20:28 2010 v1.0 -+ * -+ * ---------------------------------------------------------------------------- -+ */ ++ /* Select next buffer if data will not fit into current one. */ ++ if (PACKET_SIZE < wb_size + msg_size) { ++ wb_size = kbasep_tlstream_msgbuf_submit( ++ stream, wb_idx_raw, wb_size); ++ wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; ++ } + -+#ifndef __CUSTOM_LOG_H__ -+#define __CUSTOM_LOG_H__ ++ /* Reserve space in selected buffer. */ ++ atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size); + -+#ifdef __cplusplus -+extern "C" { -+#endif ++#if MALI_UNIT_TEST ++ atomic_add(msg_size, &tlstream_bytes_generated); ++#endif /* MALI_UNIT_TEST */ + -+/* ----------------------------------------------------------------------------- -+ * Include Files -+ * ----------------------------------------------------------------------------- -+ */ -+#include -+#include ++ return &stream->buffer[wb_idx].data[wb_size]; ++} + -+/* ----------------------------------------------------------------------------- -+ * Macros Definition -+ * ----------------------------------------------------------------------------- ++/** ++ * kbasep_tlstream_msgbuf_release - unlock selected stream ++ * @stream_type: type of the stream that shall be locked ++ * @flags: value obtained during stream acquire ++ * ++ * Function releases stream that has been previously locked with a call to ++ * kbasep_tlstream_msgbuf_acquire(). + */ ++static void kbasep_tlstream_msgbuf_release( ++ enum tl_stream_type stream_type, ++ unsigned long flags) __releases(&stream->lock) ++{ ++ struct tl_stream *stream; + -+/** 若下列 macro 有被定义, æ‰ ä½¿èƒ½ log 输出. */ -+/* #define ENABLE_DEBUG_LOG */ -+ -+/*----------------------------------------------------------------------------*/ -+ -+#ifdef ENABLE_VERBOSE_LOG -+/** Verbose log. */ -+#define V(fmt, args...) \ -+ pr_debug("V : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ -+ "\n", \ -+ __FILE__, \ -+ __LINE__, \ -+ __func__, \ -+ ## args) -+#else -+#define V(...) ((void)0) -+#endif ++ KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); + -+#ifdef ENABLE_DEBUG_LOG -+/** Debug log. */ -+#define D(fmt, args...) \ -+ pr_info("D : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ -+ "\n", \ -+ __FILE__, \ -+ __LINE__, \ -+ __func__, \ -+ ## args) -+#else -+#define D(...) ((void)0) -+#endif ++ stream = tl_stream[stream_type]; + -+#define I(fmt, args...) \ -+ pr_info("I : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ -+ "\n", \ -+ __FILE__, \ -+ __LINE__, \ -+ __func__, \ -+ ## args) ++ /* Mark stream as containing unflushed data. */ ++ atomic_set(&stream->autoflush_counter, 0); + -+#define W(fmt, args...) \ -+ pr_warn("W : [File] : %s; [Line] : %d; [Func] : %s(); " \ -+ fmt "\n", \ -+ __FILE__, \ -+ __LINE__, \ -+ __func__, \ -+ ## args) ++ spin_unlock_irqrestore(&stream->lock, flags); ++} + -+#define E(fmt, args...) \ -+ pr_err("E : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ -+ "\n", \ -+ __FILE__, \ -+ __LINE__, \ -+ __func__, \ -+ ## args) ++/*****************************************************************************/ + -+/*-------------------------------------------------------*/ ++/** ++ * kbasep_tlstream_flush_stream - flush stream ++ * @stype: type of stream to be flushed ++ * ++ * Flush pending data in timeline stream. ++ */ ++static void kbasep_tlstream_flush_stream(enum tl_stream_type stype) ++{ ++ struct tl_stream *stream = tl_stream[stype]; ++ unsigned long flags; ++ unsigned int wb_idx_raw; ++ unsigned int wb_idx; ++ size_t wb_size; ++ size_t min_size = PACKET_HEADER_SIZE; + -+/** 使用 D(), 以åè¿›åˆ¶çš„å½¢å¼æ‰“å°å˜é‡ 'var' çš„ value. */ -+#define D_DEC(var) D(#var " = %d.", var) ++ if (stream->numbered) ++ min_size += PACKET_NUMBER_SIZE; + -+#define E_DEC(var) E(#var " = %d.", var) ++ spin_lock_irqsave(&stream->lock, flags); + -+/** 使用 D(), 以åå…­è¿›åˆ¶çš„å½¢å¼æ‰“å°å˜é‡ 'var' çš„ value. */ -+#define D_HEX(var) D(#var " = 0x%x.", var) ++ wb_idx_raw = atomic_read(&stream->wbi); ++ wb_idx = wb_idx_raw % PACKET_COUNT; ++ wb_size = atomic_read(&stream->buffer[wb_idx].size); + -+#define E_HEX(var) E(#var " = 0x%x.", var) ++ if (wb_size > min_size) { ++ wb_size = kbasep_tlstream_msgbuf_submit( ++ stream, wb_idx_raw, wb_size); ++ wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; ++ atomic_set(&stream->buffer[wb_idx].size, wb_size); ++ } ++ spin_unlock_irqrestore(&stream->lock, flags); ++} + +/** -+ * 使用 D(), 以å六进制的形å¼, -+ * æ‰“å°æŒ‡é’ˆç±»åž‹å˜é‡ 'ptr' çš„ value. ++ * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback ++ * @data: unused ++ * ++ * Timer is executed periodically to check if any of the stream contains ++ * buffer ready to be submitted to user space. + */ -+#define D_PTR(ptr) D(#ptr " = %p.", ptr) ++static void kbasep_tlstream_autoflush_timer_callback(struct timer_list *t) ++{ ++ enum tl_stream_type stype; ++ int rcode; + -+#define E_PTR(ptr) E(#ptr " = %p.", ptr) ++ CSTD_UNUSED(t); + -+/** 使用 D(), æ‰“å° char 字串. */ -+#define D_STR(p_str) \ -+do { \ -+ if (!p_str) { \ -+ D(#p_str " = NULL."); \ -+ else \ -+ D(#p_str " = '%s'.", p_str); \ -+} while (0) ++ for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) { ++ struct tl_stream *stream = tl_stream[stype]; ++ unsigned long flags; ++ unsigned int wb_idx_raw; ++ unsigned int wb_idx; ++ size_t wb_size; ++ size_t min_size = PACKET_HEADER_SIZE; + -+#define E_STR(p_str) \ -+do { \ -+ if (!p_str) \ -+ E(#p_str " = NULL."); \ -+ else \ -+ E(#p_str " = '%s'.", p_str); \ -+} while (0) ++ int af_cnt = atomic_read(&stream->autoflush_counter); + -+#ifdef ENABLE_DEBUG_LOG -+/** -+ * log 从 'p_start' 地å€å¼€å§‹çš„ 'len' 个字节的数æ®. -+ */ -+#define D_MEM(p_start, len) \ -+do { \ -+ int i = 0; \ -+ char *p = (char *)(p_start); \ -+ D("dump memory from addr of '" #p_start "', from %p, length %d' : ", \ -+ (p_start), \ -+ (len)); \ -+ pr_debug("\t\t"); \ -+ for (i = 0; i < (len); i++) \ -+ pr_debug("0x%02x, ", p[i]); \ -+ pr_debug("\n"); \ -+} while (0) -+#else -+#define D_MEM(...) ((void)0) -+#endif ++ /* Check if stream contain unflushed data. */ ++ if (0 > af_cnt) ++ continue; + -+/*-------------------------------------------------------*/ ++ /* Check if stream should be flushed now. */ ++ if (af_cnt != atomic_cmpxchg( ++ &stream->autoflush_counter, ++ af_cnt, ++ af_cnt + 1)) ++ continue; ++ if (!af_cnt) ++ continue; + -+/** -+ * 在特定æ¡ä»¶ä¸‹, 判定 error å‘生, -+ * å°†å˜é‡ 'ret_var' 设置 'err_code', -+ * log 输出对应的 Error Caution, -+ * ç„¶åŽè·³è½¬ 'label' 指定的代ç å¤„执行. -+ * @param msg -+ * 纯字串形å¼çš„æç¤ºä¿¡æ¯. -+ * @param ret_var -+ * æ ‡è¯†å‡½æ•°æ‰§è¡ŒçŠ¶æ€æˆ–者结果的å˜é‡, -+ * 将被设置具体的 Error Code. -+ * 通常是 'ret' or 'result'. -+ * @param err_code -+ * 表å¾ç‰¹å®š error 的常数标识, -+ * 通常是 å®çš„å½¢æ€. -+ * @param label -+ * 程åºå°†è¦è·³è½¬åˆ°çš„错误处ç†ä»£ç çš„æ ‡å·, -+ * 通常就是 'EXIT'. -+ * @param args... -+ * 对应 'msg_fmt' 实å‚中, -+ * '%s', '%d', ... 等转æ¢è¯´æ˜Žç¬¦çš„具体å¯å˜é•¿å®žå‚. -+ */ -+#define SET_ERROR_AND_JUMP(msg_fmt, ret_var, err_code, label, args...) \ -+do { \ -+ E("To set '" #ret_var "' to %d('" #err_code "'), because : " msg_fmt, \ -+ (err_code), \ -+ ## args); \ -+ (ret_var) = (err_code); \ -+ goto label; \ -+} while (0) ++ /* Autoflush this stream. */ ++ if (stream->numbered) ++ min_size += PACKET_NUMBER_SIZE; + -+/* ----------------------------------------------------------------------------- -+ * Types and Structures Definition -+ * ----------------------------------------------------------------------------- -+ */ ++ spin_lock_irqsave(&stream->lock, flags); + -+/* ----------------------------------------------------------------------------- -+ * Global Functions' Prototype -+ * ----------------------------------------------------------------------------- -+ */ ++ wb_idx_raw = atomic_read(&stream->wbi); ++ wb_idx = wb_idx_raw % PACKET_COUNT; ++ wb_size = atomic_read(&stream->buffer[wb_idx].size); + -+/* ----------------------------------------------------------------------------- -+ * Inline Functions Implementation -+ * ----------------------------------------------------------------------------- -+ */ ++ if (wb_size > min_size) { ++ wb_size = kbasep_tlstream_msgbuf_submit( ++ stream, wb_idx_raw, wb_size); ++ wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; ++ atomic_set(&stream->buffer[wb_idx].size, ++ wb_size); ++ } ++ spin_unlock_irqrestore(&stream->lock, flags); ++ } + -+#ifdef __cplusplus ++ if (atomic_read(&autoflush_timer_active)) ++ rcode = mod_timer( ++ &autoflush_timer, ++ jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); ++ CSTD_UNUSED(rcode); +} -+#endif + -+#endif /* __CUSTOM_LOG_H__ */ -diff --git a/drivers/gpu/arm/midgard/platform/rk/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/rk/mali_kbase_config_platform.h -new file mode 100644 -index 000000000..07c5b6f8a ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/rk/mali_kbase_config_platform.h -@@ -0,0 +1,88 @@ -+/* ++/** ++ * kbasep_tlstream_packet_pending - check timeline streams for pending packets ++ * @stype: pointer to variable where stream type will be placed ++ * @rb_idx_raw: pointer to variable where read buffer index will be placed + * -+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. ++ * Function checks all streams for pending packets. It will stop as soon as ++ * packet ready to be submitted to user space is detected. Variables under ++ * pointers, passed as the parameters to this function will be updated with ++ * values pointing to right stream and buffer. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ * Return: non-zero if any of timeline streams has at last one packet ready + */ ++static int kbasep_tlstream_packet_pending( ++ enum tl_stream_type *stype, ++ unsigned int *rb_idx_raw) ++{ ++ int pending = 0; + -+/** -+ * @file mali_kbase_config_platform.h -+ * 声明 platform_config_of_rk (platform_rk çš„ platform_config). -+ */ ++ KBASE_DEBUG_ASSERT(stype); ++ KBASE_DEBUG_ASSERT(rb_idx_raw); + -+/** -+ * Maximum frequency GPU will be clocked at. -+ * Given in kHz. -+ * This must be specified as there is no default value. -+ * -+ * Attached value: number in kHz -+ * Default value: NA -+ */ -+#define GPU_FREQ_KHZ_MAX (5000) ++ for ( ++ *stype = 0; ++ (*stype < TL_STREAM_TYPE_COUNT) && !pending; ++ (*stype)++) { ++ if (NULL != tl_stream[*stype]) { ++ *rb_idx_raw = atomic_read(&tl_stream[*stype]->rbi); ++ /* Read buffer index may be updated by writer in case of ++ * overflow. Read and write buffer indexes must be ++ * loaded in correct order. */ ++ smp_rmb(); ++ if (atomic_read(&tl_stream[*stype]->wbi) != *rb_idx_raw) ++ pending = 1; ++ } ++ } ++ (*stype)--; + -+/** -+ * Minimum frequency GPU will be clocked at. -+ * Given in kHz. -+ * This must be specified as there is no default value. -+ * -+ * Attached value: number in kHz -+ * Default value: NA -+ */ -+#define GPU_FREQ_KHZ_MIN (5000) ++ return pending; ++} + +/** -+ * CPU_SPEED_FUNC -+ * - A pointer to a function that calculates the CPU clock -+ * -+ * CPU clock speed of the platform is in MHz -+ * - see kbase_cpu_clk_speed_func for the function prototype. ++ * kbasep_tlstream_read - copy data from streams to buffer provided by user ++ * @filp: pointer to file structure (unused) ++ * @buffer: pointer to the buffer provided by user ++ * @size: maximum amount of data that can be stored in the buffer ++ * @f_pos: pointer to file offset (unused) + * -+ * Attached value: A kbase_cpu_clk_speed_func. -+ * Default Value: NA ++ * Return: number of bytes stored in the buffer + */ -+#define CPU_SPEED_FUNC (NULL) ++static ssize_t kbasep_tlstream_read( ++ struct file *filp, ++ char __user *buffer, ++ size_t size, ++ loff_t *f_pos) ++{ ++ ssize_t copy_len = 0; + -+/** -+ * GPU_SPEED_FUNC -+ * - A pointer to a function that calculates the GPU clock -+ * -+ * GPU clock speed of the platform in MHz -+ * - see kbase_gpu_clk_speed_func for the function prototype. -+ * -+ * Attached value: A kbase_gpu_clk_speed_func. -+ * Default Value: NA -+ */ -+#define GPU_SPEED_FUNC (NULL) ++ KBASE_DEBUG_ASSERT(filp); ++ KBASE_DEBUG_ASSERT(f_pos); + -+/** -+ * Power management configuration -+ * -+ * Attached value: -+ * pointer to @ref kbase_pm_callback_conf -+ * Default value: -+ * See @ref kbase_pm_callback_conf -+ */ -+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) -+extern struct kbase_pm_callback_conf pm_callbacks; ++ if (!buffer) ++ return -EINVAL; + -+/** -+ * Platform specific configuration functions -+ * -+ * Attached value: -+ * pointer to @ref kbase_platform_funcs_conf -+ * Default value: -+ * See @ref kbase_platform_funcs_conf -+ */ -+#define PLATFORM_FUNCS (&platform_funcs) -+extern struct kbase_platform_funcs_conf platform_funcs; ++ if ((0 > *f_pos) || (PACKET_SIZE > size)) ++ return -EINVAL; + -+/** -+ * Secure mode switch -+ * -+ * Attached value: pointer to @ref kbase_secure_ops -+ */ -+#define SECURE_CALLBACKS (NULL) ++ mutex_lock(&tl_reader_lock); + -diff --git a/drivers/gpu/arm/midgard/platform/rk/mali_kbase_config_rk.c b/drivers/gpu/arm/midgard/platform/rk/mali_kbase_config_rk.c -new file mode 100644 -index 000000000..8ad910c12 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/rk/mali_kbase_config_rk.c -@@ -0,0 +1,492 @@ -+/* -+ * -+ * (C) COPYRIGHT ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ */ ++ while (copy_len < size) { ++ enum tl_stream_type stype; ++ unsigned int rb_idx_raw = 0; ++ unsigned int rb_idx; ++ size_t rb_size; + -+/* #define ENABLE_DEBUG_LOG */ -+#include "custom_log.h" ++ /* If we don't have any data yet, wait for packet to be ++ * submitted. If we already read some packets and there is no ++ * packet pending return back to user. */ ++ if (0 < copy_len) { ++ if (!kbasep_tlstream_packet_pending( ++ &stype, ++ &rb_idx_raw)) ++ break; ++ } else { ++ if (wait_event_interruptible( ++ tl_event_queue, ++ kbasep_tlstream_packet_pending( ++ &stype, ++ &rb_idx_raw))) { ++ copy_len = -ERESTARTSYS; ++ break; ++ } ++ } + -+#include -+#include -+#include -+#include ++ /* Check if this packet fits into the user buffer. ++ * If so copy its content. */ ++ rb_idx = rb_idx_raw % PACKET_COUNT; ++ rb_size = atomic_read(&tl_stream[stype]->buffer[rb_idx].size); ++ if (rb_size > size - copy_len) ++ break; ++ if (copy_to_user( ++ &buffer[copy_len], ++ tl_stream[stype]->buffer[rb_idx].data, ++ rb_size)) { ++ copy_len = -EFAULT; ++ break; ++ } + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ /* If the rbi still points to the packet we just processed ++ * then there was no overflow so we add the copied size to ++ * copy_len and move rbi on to the next packet ++ */ ++ smp_rmb(); ++ if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) { ++ copy_len += rb_size; ++ atomic_inc(&tl_stream[stype]->rbi); + -+#include "mali_kbase_rk.h" ++#if MALI_UNIT_TEST ++ atomic_add(rb_size, &tlstream_bytes_collected); ++#endif /* MALI_UNIT_TEST */ ++ } ++ } ++ ++ mutex_unlock(&tl_reader_lock); ++ ++ return copy_len; ++} + +/** -+ * @file mali_kbase_config_rk.c -+ * 对 platform_config_of_rk 的具体实现. -+ * -+ * mali_device_driver 包å«ä¸¤éƒ¨åˆ† : -+ * .DP : platform_dependent_part_in_mdd : -+ * ä¾èµ– platform 部分, -+ * æºç åœ¨ /platform// -+ * 在 mali_device_driver 内部, -+ * 记为 platform_dependent_part, -+ * 也被记为 platform_specific_code. -+ * .DP : common_parts_in_mdd : -+ * arm 实现的通用的部分, -+ * æºç åœ¨ / 下. -+ * 在 mali_device_driver 内部, 记为 common_parts. ++ * kbasep_tlstream_poll - poll timeline stream for packets ++ * @filp: pointer to file structure ++ * @wait: pointer to poll table ++ * Return: POLLIN if data can be read without blocking, otherwise zero + */ ++static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait) ++{ ++ enum tl_stream_type stream_type; ++ unsigned int rb_idx; + -+/*---------------------------------------------------------------------------*/ ++ KBASE_DEBUG_ASSERT(filp); ++ KBASE_DEBUG_ASSERT(wait); + -+#ifdef CONFIG_REGULATOR -+static int rk_pm_enable_regulator(struct kbase_device *kbdev); -+static void rk_pm_disable_regulator(struct kbase_device *kbdev); -+#else -+static inline int rk_pm_enable_regulator(struct kbase_device *kbdev) -+{ ++ poll_wait(filp, &tl_event_queue, wait); ++ if (kbasep_tlstream_packet_pending(&stream_type, &rb_idx)) ++ return POLLIN; + return 0; +} + -+static inline void rk_pm_disable_regulator(struct kbase_device *kbdev) ++/** ++ * kbasep_tlstream_release - release timeline stream descriptor ++ * @inode: pointer to inode structure ++ * @filp: pointer to file structure ++ * ++ * Return always return zero ++ */ ++static int kbasep_tlstream_release(struct inode *inode, struct file *filp) +{ -+} -+#endif -+ -+static int rk_pm_enable_clk(struct kbase_device *kbdev); -+ -+static void rk_pm_disable_clk(struct kbase_device *kbdev); -+ -+static int kbase_platform_rk_create_sysfs_files(struct device *dev); ++ KBASE_DEBUG_ASSERT(inode); ++ KBASE_DEBUG_ASSERT(filp); ++ CSTD_UNUSED(inode); ++ CSTD_UNUSED(filp); + -+static void kbase_platform_rk_remove_sysfs_files(struct device *dev); ++ /* Stop autoflush timer before releasing access to streams. */ ++ atomic_set(&autoflush_timer_active, 0); ++ del_timer_sync(&autoflush_timer); + -+/*---------------------------------------------------------------------------*/ ++ atomic_set(&kbase_tlstream_enabled, 0); ++ return 0; ++} + -+static void rk_pm_power_off_delay_work(struct work_struct *work) ++/** ++ * kbasep_tlstream_timeline_header - prepare timeline header stream packet ++ * @stream_type: type of the stream that will carry header data ++ * @tp_desc: pointer to array with tracepoint descriptors ++ * @tp_count: number of descriptors in the given array ++ * ++ * Functions fills in information about tracepoints stored in body stream ++ * associated with this header stream. ++ */ ++static void kbasep_tlstream_timeline_header( ++ enum tl_stream_type stream_type, ++ const struct tp_desc *tp_desc, ++ u32 tp_count) +{ -+ struct rk_context *platform = -+ container_of(to_delayed_work(work), struct rk_context, work); -+ struct kbase_device *kbdev = platform->kbdev; ++ const u8 tv = SWTRACE_VERSION; /* protocol version */ ++ const u8 ps = sizeof(void *); /* pointer size */ ++ size_t msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count); ++ char *buffer; ++ size_t pos = 0; ++ unsigned long flags; ++ unsigned int i; + -+ if (!platform->is_powered) { -+ D("mali_dev is already powered off."); -+ return; -+ } ++ KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); ++ KBASE_DEBUG_ASSERT(tp_desc); + -+ if (pm_runtime_enabled(kbdev->dev)) { -+ D("to put_sync_suspend mali_dev."); -+ pm_runtime_put_sync_suspend(kbdev->dev); ++ /* Calculate the size of the timeline message. */ ++ for (i = 0; i < tp_count; i++) { ++ msg_size += sizeof(tp_desc[i].id); ++ msg_size += ++ strnlen(tp_desc[i].id_str, STRLEN_MAX) + ++ sizeof(char) + sizeof(u32); ++ msg_size += ++ strnlen(tp_desc[i].name, STRLEN_MAX) + ++ sizeof(char) + sizeof(u32); ++ msg_size += ++ strnlen(tp_desc[i].arg_types, STRLEN_MAX) + ++ sizeof(char) + sizeof(u32); ++ msg_size += ++ strnlen(tp_desc[i].arg_names, STRLEN_MAX) + ++ sizeof(char) + sizeof(u32); + } + -+ rk_pm_disable_regulator(kbdev); ++ KBASE_DEBUG_ASSERT(PACKET_SIZE - PACKET_HEADER_SIZE >= msg_size); + -+ platform->is_powered = false; -+ KBASE_TIMELINE_GPU_POWER(kbdev, 0); -+ wake_unlock(&platform->wake_lock); -+} ++ buffer = kbasep_tlstream_msgbuf_acquire(stream_type, msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+static int kbase_platform_rk_init(struct kbase_device *kbdev) -+{ -+ int ret = 0; -+ struct rk_context *platform; ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &tv, sizeof(tv)); ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &ps, sizeof(ps)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &tp_count, sizeof(tp_count)); + -+ platform = kzalloc(sizeof(*platform), GFP_KERNEL); -+ if (!platform) { -+ E("err."); -+ return -ENOMEM; ++ for (i = 0; i < tp_count; i++) { ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, ++ &tp_desc[i].id, sizeof(tp_desc[i].id)); ++ pos = kbasep_tlstream_write_string( ++ buffer, pos, ++ tp_desc[i].id_str, msg_size - pos); ++ pos = kbasep_tlstream_write_string( ++ buffer, pos, ++ tp_desc[i].name, msg_size - pos); ++ pos = kbasep_tlstream_write_string( ++ buffer, pos, ++ tp_desc[i].arg_types, msg_size - pos); ++ pos = kbasep_tlstream_write_string( ++ buffer, pos, ++ tp_desc[i].arg_names, msg_size - pos); + } + -+ platform->is_powered = false; -+ platform->kbdev = kbdev; ++ KBASE_DEBUG_ASSERT(msg_size == pos); + -+ platform->delay_ms = 200; -+ if (of_property_read_u32(kbdev->dev->of_node, "power-off-delay-ms", -+ &platform->delay_ms)) -+ W("power-off-delay-ms not available."); ++ kbasep_tlstream_msgbuf_release(stream_type, flags); + -+ platform->power_off_wq = create_freezable_workqueue("gpu_power_off_wq"); -+ if (!platform->power_off_wq) { -+ E("couldn't create workqueue"); -+ ret = -ENOMEM; -+ goto err_wq; -+ } -+ INIT_DEFERRABLE_WORK(&platform->work, rk_pm_power_off_delay_work); ++ /* We don't expect any more data to be read in this stream. ++ * As header stream must be read before its associated body stream, ++ * make this packet visible to the user straightaway. */ ++ kbasep_tlstream_flush_stream(stream_type); ++} + -+ wake_lock_init(&platform->wake_lock, WAKE_LOCK_SUSPEND, "gpu"); ++/*****************************************************************************/ + -+ platform->utilisation_period = DEFAULT_UTILISATION_PERIOD_IN_MS; ++int kbase_tlstream_init(void) ++{ ++ enum tl_stream_type i; + -+ ret = kbase_platform_rk_create_sysfs_files(kbdev->dev); -+ if (ret) { -+ E("fail to create sysfs_files. ret = %d.", ret); -+ goto err_sysfs_files; ++ /* Prepare stream structures. */ ++ for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) { ++ tl_stream[i] = kmalloc(sizeof(**tl_stream), GFP_KERNEL); ++ if (!tl_stream[i]) ++ break; ++ kbasep_timeline_stream_init(tl_stream[i], i); ++ } ++ if (TL_STREAM_TYPE_COUNT > i) { ++ for (; i > 0; i--) { ++ kbasep_timeline_stream_term(tl_stream[i - 1]); ++ kfree(tl_stream[i - 1]); ++ } ++ return -ENOMEM; + } + -+ kbdev->platform_context = (void *)platform; -+ pm_runtime_enable(kbdev->dev); ++ /* Initialize autoflush timer. */ ++ atomic_set(&autoflush_timer_active, 0); ++ timer_setup(&autoflush_timer, ++ kbasep_tlstream_autoflush_timer_callback, ++ 0); + + return 0; -+ -+err_sysfs_files: -+ wake_lock_destroy(&platform->wake_lock); -+ destroy_workqueue(platform->power_off_wq); -+err_wq: -+ return ret; +} + -+static void kbase_platform_rk_term(struct kbase_device *kbdev) ++void kbase_tlstream_term(void) +{ -+ struct rk_context *platform = -+ (struct rk_context *)kbdev->platform_context; -+ -+ pm_runtime_disable(kbdev->dev); -+ kbdev->platform_context = NULL; ++ enum tl_stream_type i; + -+ if (platform) { -+ cancel_delayed_work_sync(&platform->work); -+ wake_lock_destroy(&platform->wake_lock); -+ destroy_workqueue(platform->power_off_wq); -+ platform->is_powered = false; -+ platform->kbdev = NULL; -+ kfree(platform); ++ for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) { ++ kbasep_timeline_stream_term(tl_stream[i]); ++ kfree(tl_stream[i]); + } -+ kbase_platform_rk_remove_sysfs_files(kbdev->dev); +} + -+struct kbase_platform_funcs_conf platform_funcs = { -+ .platform_init_func = &kbase_platform_rk_init, -+ .platform_term_func = &kbase_platform_rk_term, -+}; ++static void kbase_create_timeline_objects(struct kbase_context *kctx) ++{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ unsigned int lpu_id; ++ unsigned int as_nr; ++ struct kbasep_kctx_list_element *element; + -+/*---------------------------------------------------------------------------*/ ++ /* Create LPU objects. */ ++ for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { ++ u32 *lpu = ++ &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; ++ KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, lpu_id, *lpu); ++ } + -+static int rk_pm_callback_runtime_on(struct kbase_device *kbdev) -+{ -+ return 0; -+} ++ /* Create Address Space objects. */ ++ for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) ++ KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(&kbdev->as[as_nr], as_nr); + -+static void rk_pm_callback_runtime_off(struct kbase_device *kbdev) -+{ ++ /* Create GPU object and make it retain all LPUs and address spaces. */ ++ KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU( ++ kbdev, ++ kbdev->gpu_props.props.raw_props.gpu_id, ++ kbdev->gpu_props.num_cores); ++ ++ for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { ++ void *lpu = ++ &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; ++ KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, kbdev); ++ } ++ for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) ++ KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU( ++ &kbdev->as[as_nr], ++ kbdev); ++ ++ /* Create object for each known context. */ ++ mutex_lock(&kbdev->kctx_list_lock); ++ list_for_each_entry(element, &kbdev->kctx_list, link) { ++ KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX( ++ element->kctx, ++ (u32)(element->kctx->id), ++ (u32)(element->kctx->tgid)); ++ } ++ /* Before releasing the lock, reset body stream buffers. ++ * This will prevent context creation message to be directed to both ++ * summary and body stream. ++ */ ++ kbase_tlstream_reset_body_streams(); ++ mutex_unlock(&kbdev->kctx_list_lock); ++ /* Static object are placed into summary packet that needs to be ++ * transmitted first. Flush all streams to make it available to ++ * user space. ++ */ ++ kbase_tlstream_flush_streams(); +} + -+static int rk_pm_callback_power_on(struct kbase_device *kbdev) ++int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags) +{ -+ int ret = 1; /* Assume GPU has been powered off */ -+ int err = 0; -+ struct rk_context *platform = get_rk_context(kbdev); ++ int ret; ++ u32 tlstream_enabled = TLSTREAM_ENABLED | flags; + -+ cancel_delayed_work_sync(&platform->work); ++ if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, tlstream_enabled)) { ++ int rcode; + -+ err = rk_pm_enable_clk(kbdev); -+ if (err) { -+ E("failed to enable clk: %d", err); -+ return err; -+ } ++ ret = anon_inode_getfd( ++ "[mali_tlstream]", ++ &kbasep_tlstream_fops, ++ kctx, ++ O_RDONLY | O_CLOEXEC); ++ if (ret < 0) { ++ atomic_set(&kbase_tlstream_enabled, 0); ++ return ret; ++ } + -+ if (platform->is_powered) { -+ D("mali_device is already powered."); -+ return 0; -+ } ++ /* Reset and initialize header streams. */ ++ kbasep_timeline_stream_reset( ++ tl_stream[TL_STREAM_TYPE_OBJ_HEADER]); ++ kbasep_timeline_stream_reset( ++ tl_stream[TL_STREAM_TYPE_OBJ_SUMMARY]); ++ kbasep_timeline_stream_reset( ++ tl_stream[TL_STREAM_TYPE_AUX_HEADER]); ++ kbasep_tlstream_timeline_header( ++ TL_STREAM_TYPE_OBJ_HEADER, ++ tp_desc_obj, ++ ARRAY_SIZE(tp_desc_obj)); ++ kbasep_tlstream_timeline_header( ++ TL_STREAM_TYPE_AUX_HEADER, ++ tp_desc_aux, ++ ARRAY_SIZE(tp_desc_aux)); + -+ /* we must enable vdd_gpu before pd_gpu_in_chip. */ -+ err = rk_pm_enable_regulator(kbdev); -+ if (err) { -+ E("fail to enable regulator, err : %d.", err); -+ return err; -+ } ++ /* Start autoflush timer. */ ++ atomic_set(&autoflush_timer_active, 1); ++ rcode = mod_timer( ++ &autoflush_timer, ++ jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); ++ CSTD_UNUSED(rcode); + -+ /* è‹¥ mali_dev çš„ runtime_pm 是 enabled çš„, 则... */ -+ if (pm_runtime_enabled(kbdev->dev)) { -+ D("to resume mali_dev syncly."); -+ /* 对 pd_in_chip çš„ on æ“作, -+ * 将在 pm_domain çš„ runtime_pm_callbacks 中完æˆ. -+ */ -+ err = pm_runtime_get_sync(kbdev->dev); -+ if (err < 0) { -+ E("failed to runtime resume device: %d.", err); -+ return err; -+ } else if (err == 1) { /* runtime_pm_status is still active */ -+ D("chip has NOT been powered off, no need to re-init."); -+ ret = 0; ++ /* If job dumping is enabled, readjust the software event's ++ * timeout as the default value of 3 seconds is often ++ * insufficient. */ ++ if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) { ++ dev_info(kctx->kbdev->dev, ++ "Job dumping is enabled, readjusting the software event's timeout\n"); ++ atomic_set(&kctx->kbdev->js_data.soft_job_timeout_ms, ++ 1800000); + } -+ } + -+ platform->is_powered = true; -+ KBASE_TIMELINE_GPU_POWER(kbdev, 1); -+ wake_lock(&platform->wake_lock); ++ /* Summary stream was cleared during acquire. ++ * Create static timeline objects that will be ++ * read by client. ++ */ ++ kbase_create_timeline_objects(kctx); ++ ++ } else { ++ ret = -EBUSY; ++ } + + return ret; +} + -+static void rk_pm_callback_power_off(struct kbase_device *kbdev) ++void kbase_tlstream_flush_streams(void) +{ -+ struct rk_context *platform = get_rk_context(kbdev); ++ enum tl_stream_type stype; + -+ rk_pm_disable_clk(kbdev); -+ queue_delayed_work(platform->power_off_wq, &platform->work, -+ msecs_to_jiffies(platform->delay_ms)); ++ for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) ++ kbasep_tlstream_flush_stream(stype); +} + -+int rk_kbase_device_runtime_init(struct kbase_device *kbdev) ++void kbase_tlstream_reset_body_streams(void) +{ -+ return 0; ++ kbasep_timeline_stream_reset( ++ tl_stream[TL_STREAM_TYPE_OBJ]); ++ kbasep_timeline_stream_reset( ++ tl_stream[TL_STREAM_TYPE_AUX]); +} + -+void rk_kbase_device_runtime_disable(struct kbase_device *kbdev) ++#if MALI_UNIT_TEST ++void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated) +{ ++ KBASE_DEBUG_ASSERT(bytes_collected); ++ KBASE_DEBUG_ASSERT(bytes_generated); ++ *bytes_collected = atomic_read(&tlstream_bytes_collected); ++ *bytes_generated = atomic_read(&tlstream_bytes_generated); +} ++#endif /* MALI_UNIT_TEST */ + -+struct kbase_pm_callback_conf pm_callbacks = { -+ .power_on_callback = rk_pm_callback_power_on, -+ .power_off_callback = rk_pm_callback_power_off, -+#ifdef CONFIG_PM -+ .power_runtime_init_callback = rk_kbase_device_runtime_init, -+ .power_runtime_term_callback = rk_kbase_device_runtime_disable, -+ .power_runtime_on_callback = rk_pm_callback_runtime_on, -+ .power_runtime_off_callback = rk_pm_callback_runtime_off, -+#else /* CONFIG_PM */ -+ .power_runtime_init_callback = NULL, -+ .power_runtime_term_callback = NULL, -+ .power_runtime_on_callback = NULL, -+ .power_runtime_off_callback = NULL, -+#endif /* CONFIG_PM */ -+}; ++/*****************************************************************************/ + -+int kbase_platform_early_init(void) ++void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid) +{ -+ /* Nothing needed at this stage */ -+ return 0; -+} ++ const u32 msg_id = KBASE_TL_NEW_CTX; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) + ++ sizeof(tgid); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+/*---------------------------------------------------------------------------*/ ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ_SUMMARY, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+void kbase_platform_rk_shutdown(struct kbase_device *kbdev) -+{ -+ I("to make vdd_gpu enabled for turning off pd_gpu in pm_framework."); -+ rk_pm_enable_regulator(kbdev); ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &context, sizeof(context)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &nr, sizeof(nr)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &tgid, sizeof(tgid)); ++ ++ KBASE_DEBUG_ASSERT(msg_size == pos); ++ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); +} + -+/*---------------------------------------------------------------------------*/ ++void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count) ++{ ++ const u32 msg_id = KBASE_TL_NEW_GPU; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(id) + ++ sizeof(core_count); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+#ifdef CONFIG_REGULATOR -+static int rk_pm_enable_regulator(struct kbase_device *kbdev) ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ_SUMMARY, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); ++ ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &gpu, sizeof(gpu)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &id, sizeof(id)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &core_count, sizeof(core_count)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); ++ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); ++} ++ ++void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn) +{ -+ int ret = 0; ++ const u32 msg_id = KBASE_TL_NEW_LPU; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(nr) + ++ sizeof(fn); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+ if (!kbdev->regulator) { -+ W("no mali regulator control, no need to enable."); -+ goto EXIT; -+ } ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ_SUMMARY, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+ D("to enable regulator."); -+ ret = regulator_enable(kbdev->regulator); -+ if (ret) { -+ E("fail to enable regulator, ret : %d.", ret); -+ goto EXIT; -+ } ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &lpu, sizeof(lpu)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &nr, sizeof(nr)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &fn, sizeof(fn)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); + -+EXIT: -+ return ret; ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); +} + -+static void rk_pm_disable_regulator(struct kbase_device *kbdev) ++void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) +{ -+ if (!(kbdev->regulator)) { -+ W("no mali regulator control, no need to disable."); -+ return; -+ } ++ const u32 msg_id = KBASE_TL_LIFELINK_LPU_GPU; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(gpu); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+ D("to disable regulator."); -+ regulator_disable(kbdev->regulator); ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ_SUMMARY, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); ++ ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &lpu, sizeof(lpu)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &gpu, sizeof(gpu)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); ++ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); +} -+#endif + -+static int rk_pm_enable_clk(struct kbase_device *kbdev) ++void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr) +{ -+ int err = 0; ++ const u32 msg_id = KBASE_TL_NEW_AS; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(nr); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+ if (!(kbdev->clock)) { -+ W("no mali clock control, no need to enable."); -+ } else { -+ D("to enable clk."); -+ err = clk_enable(kbdev->clock); -+ if (err) -+ E("failed to enable clk: %d.", err); -+ } ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ_SUMMARY, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+ return err; ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &as, sizeof(as)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &nr, sizeof(nr)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); ++ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); +} + -+static void rk_pm_disable_clk(struct kbase_device *kbdev) ++void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu) +{ -+ if (!(kbdev->clock)) { -+ W("no mali clock control, no need to disable."); -+ } else { -+ D("to disable clk."); -+ clk_disable(kbdev->clock); -+ } ++ const u32 msg_id = KBASE_TL_LIFELINK_AS_GPU; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(gpu); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; ++ ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ_SUMMARY, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); ++ ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &as, sizeof(as)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &gpu, sizeof(gpu)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); ++ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); +} + -+/*---------------------------------------------------------------------------*/ ++/*****************************************************************************/ + -+static ssize_t utilisation_period_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) ++void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid) +{ -+ struct kbase_device *kbdev = dev_get_drvdata(dev); -+ struct rk_context *platform = get_rk_context(kbdev); -+ ssize_t ret = 0; ++ const u32 msg_id = KBASE_TL_NEW_CTX; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) + ++ sizeof(tgid); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+ ret += snprintf(buf, PAGE_SIZE, "%u\n", platform->utilisation_period); ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+ return ret; ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &context, sizeof(context)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &nr, sizeof(nr)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &tgid, sizeof(tgid)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); ++ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + -+static ssize_t utilisation_period_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, -+ size_t count) ++void __kbase_tlstream_tl_new_atom(void *atom, u32 nr) +{ -+ struct kbase_device *kbdev = dev_get_drvdata(dev); -+ struct rk_context *platform = get_rk_context(kbdev); -+ int ret = 0; ++ const u32 msg_id = KBASE_TL_NEW_ATOM; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(atom) + ++ sizeof(nr); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+ ret = kstrtouint(buf, 0, &platform->utilisation_period); -+ if (ret) { -+ E("invalid input period : %s.", buf); -+ return ret; -+ } -+ D("set utilisation_period to '%d'.", platform->utilisation_period); ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+ return count; ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &atom, sizeof(atom)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &nr, sizeof(nr)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); ++ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + -+static ssize_t utilisation_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) ++void __kbase_tlstream_tl_del_ctx(void *context) +{ -+ struct kbase_device *kbdev = dev_get_drvdata(dev); -+ struct rk_context *platform = get_rk_context(kbdev); -+ ssize_t ret = 0; -+ unsigned long period_in_us = platform->utilisation_period * 1000; -+ unsigned long total_time; -+ unsigned long busy_time; -+ unsigned long utilisation; ++ const u32 msg_id = KBASE_TL_DEL_CTX; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(context); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+ kbase_pm_reset_dvfs_utilisation(kbdev); -+ usleep_range(period_in_us, period_in_us + 100); -+ kbase_pm_get_dvfs_utilisation(kbdev, &total_time, &busy_time); -+ /* 'devfreq_dev_profile' instance registered to devfreq -+ * also uses kbase_pm_reset_dvfs_utilisation -+ * and kbase_pm_get_dvfs_utilisation. -+ * it's better to cat this file when DVFS is disabled. -+ */ -+ D("total_time : %lu, busy_time : %lu.", total_time, busy_time); ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+ utilisation = busy_time * 100 / total_time; -+ ret += snprintf(buf, PAGE_SIZE, "%ld\n", utilisation); ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &context, sizeof(context)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); + -+ return ret; ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + -+static DEVICE_ATTR_RW(utilisation_period); -+static DEVICE_ATTR_RO(utilisation); -+ -+static int kbase_platform_rk_create_sysfs_files(struct device *dev) ++void __kbase_tlstream_tl_del_atom(void *atom) +{ -+ int ret = 0; -+ -+ ret = device_create_file(dev, &dev_attr_utilisation_period); -+ if (ret) { -+ E("fail to create sysfs file 'utilisation_period'."); -+ goto out; -+ } ++ const u32 msg_id = KBASE_TL_DEL_ATOM; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(atom); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+ ret = device_create_file(dev, &dev_attr_utilisation); -+ if (ret) { -+ E("fail to create sysfs file 'utilisation'."); -+ goto remove_utilisation_period; -+ } ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+ return 0; ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &atom, sizeof(atom)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); + -+remove_utilisation_period: -+ device_remove_file(dev, &dev_attr_utilisation_period); -+out: -+ return ret; ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + -+static void kbase_platform_rk_remove_sysfs_files(struct device *dev) ++void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu) +{ -+ device_remove_file(dev, &dev_attr_utilisation_period); -+ device_remove_file(dev, &dev_attr_utilisation); ++ const u32 msg_id = KBASE_TL_RET_CTX_LPU; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; ++ ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); ++ ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &context, sizeof(context)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &lpu, sizeof(lpu)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); ++ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + -+static int rk3288_get_soc_info(struct device *dev, struct device_node *np, -+ int *bin, int *process) ++void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) +{ -+ int ret = -EINVAL; -+ u8 value = 0; -+ char *name; ++ const u32 msg_id = KBASE_TL_RET_ATOM_CTX; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+ if (!bin) -+ goto out; ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+ if (soc_is_rk3288w()) -+ name = "performance-w"; -+ else -+ name = "performance"; -+ if (of_property_match_string(np, "nvmem-cell-names", name) >= 0) { -+ ret = rockchip_nvmem_cell_read_u8(np, name, &value); -+ if (ret) { -+ dev_err(dev, "Failed to get soc performance value\n"); -+ goto out; -+ } -+ if (value & 0x2) -+ *bin = 3; -+ else if (value & 0x01) -+ *bin = 2; -+ else -+ *bin = 0; -+ } else { -+ dev_err(dev, "Failed to get bin config\n"); -+ } -+ if (*bin >= 0) -+ dev_info(dev, "bin=%d\n", *bin); ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &atom, sizeof(atom)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &context, sizeof(context)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); + -+out: -+ return ret; ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + -+static const struct rockchip_opp_data rk3288_gpu_opp_data = { -+ .get_soc_info = rk3288_get_soc_info, -+}; ++void __kbase_tlstream_tl_ret_atom_lpu( ++ void *atom, void *lpu, const char *attrib_match_list) ++{ ++ const u32 msg_id = KBASE_TL_RET_ATOM_LPU; ++ const size_t msg_s0 = sizeof(u32) + sizeof(char) + ++ strnlen(attrib_match_list, STRLEN_MAX); ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + ++ sizeof(atom) + sizeof(lpu) + msg_s0; ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+static const struct of_device_id rockchip_mali_of_match[] = { -+ { -+ .compatible = "rockchip,rk3288", -+ .data = (void *)&rk3288_gpu_opp_data, -+ }, -+ { -+ .compatible = "rockchip,rk3288w", -+ .data = (void *)&rk3288_gpu_opp_data, -+ }, -+ {}, -+}; ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+int kbase_platform_rk_init_opp_table(struct kbase_device *kbdev) -+{ -+ rockchip_get_opp_data(rockchip_mali_of_match, &kbdev->opp_info); ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &atom, sizeof(atom)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &lpu, sizeof(lpu)); ++ pos = kbasep_tlstream_write_string( ++ buffer, pos, attrib_match_list, msg_s0); ++ KBASE_DEBUG_ASSERT(msg_size == pos); + -+ return rockchip_init_opp_table(kbdev->dev, &kbdev->opp_info, -+ "gpu_leakage", "mali"); ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} -diff --git a/drivers/gpu/arm/midgard/platform/rk/mali_kbase_rk.h b/drivers/gpu/arm/midgard/platform/rk/mali_kbase_rk.h -new file mode 100644 -index 000000000..6eab25014 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/rk/mali_kbase_rk.h -@@ -0,0 +1,62 @@ -+/* drivers/gpu/t6xx/kbase/src/platform/rk/mali_kbase_platform.h -+ * Rockchip SoC Mali-Midgard platform-dependent codes -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software FoundatIon. -+ */ + -+/** -+ * @file mali_kbase_rk.h -+ * -+ * defines work_context type of platform_dependent_part. -+ */ ++void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu) ++{ ++ const u32 msg_id = KBASE_TL_NRET_CTX_LPU; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+#ifndef _MALI_KBASE_RK_H_ -+#define _MALI_KBASE_RK_H_ ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+#include ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &context, sizeof(context)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &lpu, sizeof(lpu)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); + -+/*---------------------------------------------------------------------------*/ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++} + -+#define DEFAULT_UTILISATION_PERIOD_IN_MS (100) ++void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) ++{ ++ const u32 msg_id = KBASE_TL_NRET_ATOM_CTX; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+/*---------------------------------------------------------------------------*/ ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+/* -+ * struct rk_context - work_context of platform_dependent_part_of_rk. -+ */ -+struct rk_context { -+ /* -+ * record the status of common_parts calling 'power_on_callback' -+ * and 'power_off_callback'. -+ */ -+ bool is_powered; ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &atom, sizeof(atom)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &context, sizeof(context)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); + -+ struct kbase_device *kbdev; ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++} + -+ struct workqueue_struct *power_off_wq; -+ /* delayed_work_to_power_off_gpu. */ -+ struct delayed_work work; -+ unsigned int delay_ms; ++void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2) ++{ ++ const u32 msg_id = KBASE_TL_DEP_ATOM_ATOM; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+ /* -+ * WAKE_LOCK_SUSPEND for ensuring to run -+ * delayed_work_to_power_off_gpu before suspend. -+ */ -+ struct wake_lock wake_lock; ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+ /* debug only, the period in ms to count gpu_utilisation. */ -+ unsigned int utilisation_period; -+}; ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &atom1, sizeof(atom1)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &atom2, sizeof(atom2)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); + -+/*---------------------------------------------------------------------------*/ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++} + -+static inline struct rk_context *get_rk_context( -+ const struct kbase_device *kbdev) ++void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2) +{ -+ return (struct rk_context *)(kbdev->platform_context); ++ const u32 msg_id = KBASE_TL_NDEP_ATOM_ATOM; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; ++ ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); ++ ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &atom1, sizeof(atom1)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &atom2, sizeof(atom2)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); ++ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + -+#endif /* _MALI_KBASE_RK_H_ */ ++void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2) ++{ ++ const u32 msg_id = KBASE_TL_RDEP_ATOM_ATOM; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -diff --git a/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress/Kbuild -new file mode 100755 -index 000000000..1caa29366 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/vexpress/Kbuild -@@ -0,0 +1,18 @@ -+# -+# (C) COPYRIGHT 2012-2013, 2016 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &atom1, sizeof(atom1)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &atom2, sizeof(atom2)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); + -+mali_kbase-y += \ -+ $(MALI_PLATFORM_THIRDPARTY_DIR)/mali_kbase_config_vexpress.o \ -+ $(MALI_PLATFORM_THIRDPARTY_DIR)/mali_kbase_cpu_vexpress.o -diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h -new file mode 100644 -index 000000000..02835f129 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h -@@ -0,0 +1,75 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++} + ++void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) ++{ ++ const u32 msg_id = KBASE_TL_NRET_ATOM_LPU; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+#include "mali_kbase_cpu_vexpress.h" ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &atom, sizeof(atom)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &lpu, sizeof(lpu)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); + -+/** -+ * Maximum frequency GPU will be clocked at. Given in kHz. -+ * This must be specified as there is no default value. -+ * -+ * Attached value: number in kHz -+ * Default value: NA -+ */ -+#define GPU_FREQ_KHZ_MAX kbase_get_platform_max_freq() -+/** -+ * Minimum frequency GPU will be clocked at. Given in kHz. -+ * This must be specified as there is no default value. -+ * -+ * Attached value: number in kHz -+ * Default value: NA -+ */ -+#define GPU_FREQ_KHZ_MIN kbase_get_platform_min_freq() ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++} + -+/** -+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock -+ * -+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func -+ * for the function prototype. -+ * -+ * Attached value: A kbase_cpu_clk_speed_func. -+ * Default Value: NA -+ */ -+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed) ++void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx) ++{ ++ const u32 msg_id = KBASE_TL_RET_AS_CTX; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+/** -+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock -+ * -+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func -+ * for the function prototype. -+ * -+ * Attached value: A kbase_gpu_clk_speed_func. -+ * Default Value: NA -+ */ -+#define GPU_SPEED_FUNC (NULL) ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+/** -+ * Power management configuration -+ * -+ * Attached value: pointer to @ref kbase_pm_callback_conf -+ * Default value: See @ref kbase_pm_callback_conf -+ */ -+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &as, sizeof(as)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &ctx, sizeof(ctx)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); + -+/** -+ * Platform specific configuration functions -+ * -+ * Attached value: pointer to @ref kbase_platform_funcs_conf -+ * Default value: See @ref kbase_platform_funcs_conf -+ */ -+#define PLATFORM_FUNCS (NULL) ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++} + -+extern struct kbase_pm_callback_conf pm_callbacks; -diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c -new file mode 100644 -index 000000000..15ce2bc5e ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c -@@ -0,0 +1,85 @@ -+/* -+ * -+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx) ++{ ++ const u32 msg_id = KBASE_TL_NRET_AS_CTX; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &as, sizeof(as)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &ctx, sizeof(ctx)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); + ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++} + ++void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as) ++{ ++ const u32 msg_id = KBASE_TL_RET_ATOM_AS; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+#include -+#include -+#include -+#include -+#include "mali_kbase_cpu_vexpress.h" -+#include "mali_kbase_config_platform.h" ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+#define HARD_RESET_AT_POWER_OFF 0 ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &atom, sizeof(atom)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &as, sizeof(as)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); + -+#ifndef CONFIG_OF -+static struct kbase_io_resources io_resources = { -+ .job_irq_number = 68, -+ .mmu_irq_number = 69, -+ .gpu_irq_number = 70, -+ .io_memory_region = { -+ .start = 0xFC010000, -+ .end = 0xFC010000 + (4096 * 4) - 1 -+ } -+}; -+#endif /* CONFIG_OF */ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++} + -+static int pm_callback_power_on(struct kbase_device *kbdev) ++void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as) +{ -+ /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ -+ return 1; ++ const u32 msg_id = KBASE_TL_NRET_ATOM_AS; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; ++ ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); ++ ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &atom, sizeof(atom)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &as, sizeof(as)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); ++ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + -+static void pm_callback_power_off(struct kbase_device *kbdev) ++void __kbase_tlstream_tl_attrib_atom_config( ++ void *atom, u64 jd, u64 affinity, u32 config) +{ -+#if HARD_RESET_AT_POWER_OFF -+ /* Cause a GPU hard reset to test whether we have actually idled the GPU -+ * and that we properly reconfigure the GPU on power up. -+ * Usually this would be dangerous, but if the GPU is working correctly it should -+ * be completely safe as the GPU should not be active at this point. -+ * However this is disabled normally because it will most likely interfere with -+ * bus logging etc. -+ */ -+ KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); -+ kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET); -+#endif -+} ++ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + ++ sizeof(jd) + sizeof(affinity) + sizeof(config); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+struct kbase_pm_callback_conf pm_callbacks = { -+ .power_on_callback = pm_callback_power_on, -+ .power_off_callback = pm_callback_power_off, -+ .power_suspend_callback = NULL, -+ .power_resume_callback = NULL -+}; ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+static struct kbase_platform_config versatile_platform_config = { -+#ifndef CONFIG_OF -+ .io_resources = &io_resources -+#endif -+}; ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &atom, sizeof(atom)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &jd, sizeof(jd)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &affinity, sizeof(affinity)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &config, sizeof(config)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); + -+struct kbase_platform_config *kbase_get_platform_config(void) -+{ -+ return &versatile_platform_config; ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + -+ -+int kbase_platform_early_init(void) ++void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio) +{ -+ /* Nothing needed at this stage */ -+ return 0; -+} -diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c -new file mode 100644 -index 000000000..4665f98cb ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c -@@ -0,0 +1,279 @@ -+/* -+ * -+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(prio); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &atom, sizeof(atom)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &prio, sizeof(prio)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); + -+#include -+#include -+#include "mali_kbase_cpu_vexpress.h" ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++} + -+#define HZ_IN_MHZ (1000000) ++void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state) ++{ ++ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_STATE; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(state); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+#define CORETILE_EXPRESS_A9X4_SCC_START (0x100E2000) -+#define MOTHERBOARD_SYS_CFG_START (0x10000000) -+#define SYS_CFGDATA_OFFSET (0x000000A0) -+#define SYS_CFGCTRL_OFFSET (0x000000A4) -+#define SYS_CFGSTAT_OFFSET (0x000000A8) ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+#define SYS_CFGCTRL_START_BIT_VALUE (1 << 31) -+#define READ_REG_BIT_VALUE (0 << 30) -+#define DCC_DEFAULT_BIT_VALUE (0 << 26) -+#define SYS_CFG_OSC_FUNC_BIT_VALUE (1 << 20) -+#define SITE_DEFAULT_BIT_VALUE (1 << 16) -+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12) -+#define DEVICE_DEFAULT_BIT_VALUE (2 << 0) -+#define SYS_CFG_COMPLETE_BIT_VALUE (1 << 0) -+#define SYS_CFG_ERROR_BIT_VALUE (1 << 1) ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &atom, sizeof(atom)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &state, sizeof(state)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); + -+#define FEED_REG_BIT_MASK (0x0F) -+#define FCLK_PA_DIVIDE_BIT_SHIFT (0x03) -+#define FCLK_PB_DIVIDE_BIT_SHIFT (0x07) -+#define FCLK_PC_DIVIDE_BIT_SHIFT (0x0B) -+#define AXICLK_PA_DIVIDE_BIT_SHIFT (0x0F) -+#define AXICLK_PB_DIVIDE_BIT_SHIFT (0x13) ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++} + -+/* the following three values used for reading -+ * HBI value of the LogicTile daughterboard */ -+#define VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 (0x10000000) -+#define VE_SYS_PROC_ID1_OFFSET (0x00000088) -+#define VE_LOGIC_TILE_HBI_MASK (0x00000FFF) ++void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom) ++{ ++ const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(atom); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+#define IS_SINGLE_BIT_SET(val, pos) (val&(1<> -+ FCLK_PA_DIVIDE_BIT_SHIFT); -+ /* CFGRW0[10:7] */ -+ pb_divide = ((reg_val & (FEED_REG_BIT_MASK << -+ FCLK_PB_DIVIDE_BIT_SHIFT)) >> -+ FCLK_PB_DIVIDE_BIT_SHIFT); -+ *cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1); -+ } else if (IS_SINGLE_BIT_SET(reg_val, 1)) { -+ /* CFGRW0[1] - CLKOC */ -+ /* CFGRW0[6:3] */ -+ pa_divide = ((reg_val & (FEED_REG_BIT_MASK << -+ FCLK_PA_DIVIDE_BIT_SHIFT)) >> -+ FCLK_PA_DIVIDE_BIT_SHIFT); -+ /* CFGRW0[14:11] */ -+ pc_divide = ((reg_val & (FEED_REG_BIT_MASK << -+ FCLK_PC_DIVIDE_BIT_SHIFT)) >> -+ FCLK_PC_DIVIDE_BIT_SHIFT); -+ *cpu_clock = osc2_value * (pa_divide + 1) / (pc_divide + 1); -+ } else if (IS_SINGLE_BIT_SET(reg_val, 2)) { -+ /* CFGRW0[2] - FACLK */ -+ /* CFGRW0[18:15] */ -+ pa_divide = ((reg_val & (FEED_REG_BIT_MASK << -+ AXICLK_PA_DIVIDE_BIT_SHIFT)) >> -+ AXICLK_PA_DIVIDE_BIT_SHIFT); -+ /* CFGRW0[22:19] */ -+ pb_divide = ((reg_val & (FEED_REG_BIT_MASK << -+ AXICLK_PB_DIVIDE_BIT_SHIFT)) >> -+ AXICLK_PB_DIVIDE_BIT_SHIFT); -+ *cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1); -+ } else { -+ err = -EIO; -+ } ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++} + -+set_reg_error: -+ongoing_request: -+ raw_spin_unlock(&syscfg_lock); -+ *cpu_clock /= HZ_IN_MHZ; ++void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom) ++{ ++ const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(atom); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+ if (!err) -+ cpu_clock_speed = *cpu_clock; ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+ iounmap(scc_reg); ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &atom, sizeof(atom)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); + -+scc_reg_map_failed: -+ iounmap(syscfg_reg); ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); ++} + -+syscfg_reg_map_failed: ++void __kbase_tlstream_jd_gpu_soft_reset(void *gpu) ++{ ++ const u32 msg_id = KBASE_JD_GPU_SOFT_RESET; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(gpu); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+ return err; ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_OBJ, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); ++ ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &gpu, sizeof(gpu)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); ++ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + -+/** -+ * kbase_get_platform_logic_tile_type - determines which LogicTile type -+ * is used by Versatile Express -+ * -+ * When platform_config build parameter is specified as vexpress, i.e., -+ * platform_config=vexpress, GPU frequency may vary dependent on the -+ * particular platform. The GPU frequency depends on the LogicTile type. -+ * -+ * This function determines which LogicTile type is used by the platform by -+ * reading the HBI value of the daughterboard which holds the LogicTile: -+ * -+ * 0x217 HBI0217 Virtex-6 -+ * 0x192 HBI0192 Virtex-5 -+ * 0x247 HBI0247 Virtex-7 -+ * -+ * Return: HBI value of the logic tile daughterboard, zero if not accessible -+ */ -+static u32 kbase_get_platform_logic_tile_type(void) ++/*****************************************************************************/ ++ ++void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state) +{ -+ void __iomem *syscfg_reg = NULL; -+ u32 sys_procid1 = 0; ++ const u32 msg_id = KBASE_AUX_PM_STATE; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(core_type) + ++ sizeof(state); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+ syscfg_reg = ioremap(VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 + VE_SYS_PROC_ID1_OFFSET, 4); -+ if (NULL != syscfg_reg) { -+ sys_procid1 = readl(syscfg_reg); -+ iounmap(syscfg_reg); -+ } ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_AUX, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); + -+ return sys_procid1 & VE_LOGIC_TILE_HBI_MASK; ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &core_type, sizeof(core_type)); ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); ++ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); +} + -+u32 kbase_get_platform_min_freq(void) ++void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change) +{ -+ u32 ve_logic_tile = kbase_get_platform_logic_tile_type(); ++ const u32 msg_id = KBASE_AUX_PAGEFAULT; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) + ++ sizeof(page_count_change); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+ switch (ve_logic_tile) { -+ case 0x217: -+ /* Virtex 6, HBI0217 */ -+ return VE_VIRTEX6_GPU_FREQ_MIN; -+ case 0x247: -+ /* Virtex 7, HBI0247 */ -+ return VE_VIRTEX7_GPU_FREQ_MIN; -+ default: -+ /* all other logic tiles, i.e., Virtex 5 HBI0192 -+ * or unsuccessful reading from the platform - -+ * fall back to some default value */ -+ return VE_DEFAULT_GPU_FREQ_MIN; -+ } ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_AUX, msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); ++ ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, ++ &page_count_change, sizeof(page_count_change)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); ++ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); +} + -+u32 kbase_get_platform_max_freq(void) ++void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count) +{ -+ u32 ve_logic_tile = kbase_get_platform_logic_tile_type(); ++ const u32 msg_id = KBASE_AUX_PAGESALLOC; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) + ++ sizeof(page_count); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; + -+ switch (ve_logic_tile) { -+ case 0x217: -+ /* Virtex 6, HBI0217 */ -+ return VE_VIRTEX6_GPU_FREQ_MAX; -+ case 0x247: -+ /* Virtex 7, HBI0247 */ -+ return VE_VIRTEX7_GPU_FREQ_MAX; -+ default: -+ /* all other logic tiles, i.e., Virtex 5 HBI0192 -+ * or unsuccessful reading from the platform - -+ * fall back to some default value */ -+ return VE_DEFAULT_GPU_FREQ_MAX; -+ } ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_AUX, msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); ++ ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr)); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &page_count, sizeof(page_count)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); ++ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); +} -diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h ++ ++void __kbase_tlstream_aux_devfreq_target(u64 target_freq) ++{ ++ const u32 msg_id = KBASE_AUX_DEVFREQ_TARGET; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(target_freq); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; ++ ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_AUX, msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); ++ ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &target_freq, sizeof(target_freq)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); ++ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); ++} ++ ++void __kbase_tlstream_aux_protected_enter_start(void *gpu) ++{ ++ const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_START; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(gpu); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; ++ ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_AUX, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); ++ ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &gpu, sizeof(gpu)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); ++ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); ++} ++void __kbase_tlstream_aux_protected_enter_end(void *gpu) ++{ ++ const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_END; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(gpu); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; ++ ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_AUX, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); ++ ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &gpu, sizeof(gpu)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); ++ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); ++} ++ ++void __kbase_tlstream_aux_protected_leave_start(void *gpu) ++{ ++ const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_START; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(gpu); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; ++ ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_AUX, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); ++ ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &gpu, sizeof(gpu)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); ++ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); ++} ++void __kbase_tlstream_aux_protected_leave_end(void *gpu) ++{ ++ const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_END; ++ const size_t msg_size = ++ sizeof(msg_id) + sizeof(u64) + sizeof(gpu); ++ unsigned long flags; ++ char *buffer; ++ size_t pos = 0; ++ ++ buffer = kbasep_tlstream_msgbuf_acquire( ++ TL_STREAM_TYPE_AUX, ++ msg_size, &flags); ++ KBASE_DEBUG_ASSERT(buffer); ++ ++ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_tlstream_write_timestamp(buffer, pos); ++ pos = kbasep_tlstream_write_bytes( ++ buffer, pos, &gpu, sizeof(gpu)); ++ KBASE_DEBUG_ASSERT(msg_size == pos); ++ ++ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); ++} +diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h new file mode 100644 -index 000000000..da8656981 +index 000000000..c0a1117d5 --- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h -@@ -0,0 +1,38 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h +@@ -0,0 +1,623 @@ +/* + * -+ * (C) COPYRIGHT 2012-2013, 2015-2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -404268,324 +405411,618 @@ index 000000000..da8656981 + + + ++#if !defined(_KBASE_TLSTREAM_H) ++#define _KBASE_TLSTREAM_H + ++#include + -+#ifndef _KBASE_CPU_VEXPRESS_H_ -+#define _KBASE_CPU_VEXPRESS_H_ ++/*****************************************************************************/ + +/** -+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func. ++ * kbase_tlstream_init - initialize timeline infrastructure in kernel ++ * Return: zero on success, negative number on error + */ -+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock); ++int kbase_tlstream_init(void); + +/** -+ * Get the minimum GPU frequency for the attached logic tile ++ * kbase_tlstream_term - terminate timeline infrastructure in kernel ++ * ++ * Timeline need have to been previously enabled with kbase_tlstream_init(). + */ -+u32 kbase_get_platform_min_freq(void); ++void kbase_tlstream_term(void); + +/** -+ * Get the maximum GPU frequency for the attached logic tile ++ * kbase_tlstream_acquire - acquire timeline stream file descriptor ++ * @kctx: kernel common context ++ * @flags: timeline stream flags ++ * ++ * This descriptor is meant to be used by userspace timeline to gain access to ++ * kernel timeline stream. This stream is later broadcasted by user space to the ++ * timeline client. ++ * Only one entity can own the descriptor at any given time. Descriptor shall be ++ * closed if unused. If descriptor cannot be obtained (i.e. when it is already ++ * being used) return will be a negative value. ++ * ++ * Return: file descriptor on success, negative number on error + */ -+u32 kbase_get_platform_max_freq(void); -+ -+#endif /* _KBASE_CPU_VEXPRESS_H_ */ -diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild -new file mode 100755 -index 000000000..7efe8fa42 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild -@@ -0,0 +1,16 @@ -+# -+# (C) COPYRIGHT 2013-2014, 2016 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# -+ ++int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags); + -+mali_kbase-y += $(MALI_PLATFORM_THIRDPARTY_DIR)/mali_kbase_config_vexpress.o -diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h -new file mode 100644 -index 000000000..0efbf3962 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h -@@ -0,0 +1,73 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. ++/** ++ * kbase_tlstream_flush_streams - flush timeline streams. + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ * Function will flush pending data in all timeline streams. ++ */ ++void kbase_tlstream_flush_streams(void); ++ ++/** ++ * kbase_tlstream_reset_body_streams - reset timeline body streams. + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * Function will discard pending data in all timeline body streams. ++ */ ++void kbase_tlstream_reset_body_streams(void); ++ ++#if MALI_UNIT_TEST ++/** ++ * kbase_tlstream_test - start timeline stream data generator ++ * @tpw_count: number of trace point writers in each context ++ * @msg_delay: time delay in milliseconds between trace points written by one ++ * writer ++ * @msg_count: number of trace points written by one writer ++ * @aux_msg: if non-zero aux messages will be included + * ++ * This test starts a requested number of asynchronous writers in both IRQ and ++ * thread context. Each writer will generate required number of test ++ * tracepoints (tracepoints with embedded information about writer that ++ * should be verified by user space reader). Tracepoints will be emitted in ++ * all timeline body streams. If aux_msg is non-zero writer will also ++ * generate not testable tracepoints (tracepoints without information about ++ * writer). These tracepoints are used to check correctness of remaining ++ * timeline message generating functions. Writer will wait requested time ++ * between generating another set of messages. This call blocks until all ++ * writers finish. ++ */ ++void kbase_tlstream_test( ++ unsigned int tpw_count, ++ unsigned int msg_delay, ++ unsigned int msg_count, ++ int aux_msg); ++ ++/** ++ * kbase_tlstream_stats - read timeline stream statistics ++ * @bytes_collected: will hold number of bytes read by the user ++ * @bytes_generated: will hold number of bytes generated by trace points + */ ++void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated); ++#endif /* MALI_UNIT_TEST */ ++ ++/*****************************************************************************/ ++ ++#define TL_ATOM_STATE_IDLE 0 ++#define TL_ATOM_STATE_READY 1 ++#define TL_ATOM_STATE_DONE 2 ++#define TL_ATOM_STATE_POSTED 3 ++ ++void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid); ++void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count); ++void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn); ++void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu); ++void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr); ++void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu); ++void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid); ++void __kbase_tlstream_tl_new_atom(void *atom, u32 nr); ++void __kbase_tlstream_tl_del_ctx(void *context); ++void __kbase_tlstream_tl_del_atom(void *atom); ++void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu); ++void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context); ++void __kbase_tlstream_tl_ret_atom_lpu( ++ void *atom, void *lpu, const char *attrib_match_list); ++void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu); ++void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context); ++void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu); ++void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx); ++void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx); ++void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as); ++void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as); ++void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2); ++void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2); ++void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2); ++void __kbase_tlstream_tl_attrib_atom_config( ++ void *atom, u64 jd, u64 affinity, u32 config); ++void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio); ++void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state); ++void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom); ++void __kbase_tlstream_tl_attrib_atom_jit( ++ void *atom, u64 edit_addr, u64 new_addr); ++void __kbase_tlstream_tl_attrib_as_config( ++ void *as, u64 transtab, u64 memattr, u64 transcfg); ++void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom); ++void __kbase_tlstream_tl_event_lpu_softstop(void *lpu); ++void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom); ++void __kbase_tlstream_jd_gpu_soft_reset(void *gpu); ++void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state); ++void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change); ++void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count); ++void __kbase_tlstream_aux_devfreq_target(u64 target_freq); ++void __kbase_tlstream_aux_protected_enter_start(void *gpu); ++void __kbase_tlstream_aux_protected_enter_end(void *gpu); ++void __kbase_tlstream_aux_protected_leave_start(void *gpu); ++void __kbase_tlstream_aux_protected_leave_end(void *gpu); ++ ++#define TLSTREAM_ENABLED (1 << 31) ++ ++extern atomic_t kbase_tlstream_enabled; ++ ++#define __TRACE_IF_ENABLED(trace_name, ...) \ ++ do { \ ++ int enabled = atomic_read(&kbase_tlstream_enabled); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_##trace_name(__VA_ARGS__); \ ++ } while (0) ++ ++#define __TRACE_IF_ENABLED_LATENCY(trace_name, ...) \ ++ do { \ ++ int enabled = atomic_read(&kbase_tlstream_enabled); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ ++ __kbase_tlstream_##trace_name(__VA_ARGS__); \ ++ } while (0) + ++#define __TRACE_IF_ENABLED_JD(trace_name, ...) \ ++ do { \ ++ int enabled = atomic_read(&kbase_tlstream_enabled); \ ++ if (enabled & BASE_TLSTREAM_JOB_DUMPING_ENABLED) \ ++ __kbase_tlstream_##trace_name(__VA_ARGS__); \ ++ } while (0) + ++/*****************************************************************************/ + +/** -+ * Maximum frequency GPU will be clocked at. Given in kHz. -+ * This must be specified as there is no default value. ++ * KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX - create context object in timeline ++ * summary ++ * @context: name of the context object ++ * @nr: context number ++ * @tgid: thread Group Id + * -+ * Attached value: number in kHz -+ * Default value: NA ++ * Function emits a timeline message informing about context creation. Context ++ * is created with context number (its attribute), that can be used to link ++ * kbase context with userspace context. ++ * This message is directed to timeline summary stream. + */ -+#define GPU_FREQ_KHZ_MAX 5000 ++#define KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX(context, nr, tgid) \ ++ __TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid) ++ +/** -+ * Minimum frequency GPU will be clocked at. Given in kHz. -+ * This must be specified as there is no default value. ++ * KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU - create GPU object in timeline summary ++ * @gpu: name of the GPU object ++ * @id: id value of this GPU ++ * @core_count: number of cores this GPU hosts + * -+ * Attached value: number in kHz -+ * Default value: NA ++ * Function emits a timeline message informing about GPU creation. GPU is ++ * created with two attributes: id and core count. ++ * This message is directed to timeline summary stream. + */ -+#define GPU_FREQ_KHZ_MIN 5000 ++#define KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU(gpu, id, core_count) \ ++ __TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count) + +/** -+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock ++ * KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU - create LPU object in timeline summary ++ * @lpu: name of the Logical Processing Unit object ++ * @nr: sequential number assigned to this LPU ++ * @fn: property describing this LPU's functional abilities + * -+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func -+ * for the function prototype. ++ * Function emits a timeline message informing about LPU creation. LPU is ++ * created with two attributes: number linking this LPU with GPU's job slot ++ * and function bearing information about this LPU abilities. ++ * This message is directed to timeline summary stream. ++ */ ++#define KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, nr, fn) \ ++ __TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn) ++ ++/** ++ * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU - lifelink LPU object to GPU ++ * @lpu: name of the Logical Processing Unit object ++ * @gpu: name of the GPU object + * -+ * Attached value: A kbase_cpu_clk_speed_func. -+ * Default Value: NA ++ * Function emits a timeline message informing that LPU object shall be deleted ++ * along with GPU object. ++ * This message is directed to timeline summary stream. + */ -+#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed) ++#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, gpu) \ ++ __TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu) + +/** -+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock ++ * KBASE_TLSTREAM_TL_SUMMARY_NEW_AS - create address space object in timeline summary ++ * @as: name of the address space object ++ * @nr: sequential number assigned to this address space + * -+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func -+ * for the function prototype. ++ * Function emits a timeline message informing about address space creation. ++ * Address space is created with one attribute: number identifying this ++ * address space. ++ * This message is directed to timeline summary stream. ++ */ ++#define KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(as, nr) \ ++ __TRACE_IF_ENABLED(tl_summary_new_as, as, nr) ++ ++/** ++ * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU - lifelink address space object to GPU ++ * @as: name of the address space object ++ * @gpu: name of the GPU object + * -+ * Attached value: A kbase_gpu_clk_speed_func. -+ * Default Value: NA ++ * Function emits a timeline message informing that address space object ++ * shall be deleted along with GPU object. ++ * This message is directed to timeline summary stream. + */ -+#define GPU_SPEED_FUNC (NULL) ++#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU(as, gpu) \ ++ __TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu) + +/** -+ * Power management configuration ++ * KBASE_TLSTREAM_TL_NEW_CTX - create context object in timeline ++ * @context: name of the context object ++ * @nr: context number ++ * @tgid: thread Group Id + * -+ * Attached value: pointer to @ref kbase_pm_callback_conf -+ * Default value: See @ref kbase_pm_callback_conf ++ * Function emits a timeline message informing about context creation. Context ++ * is created with context number (its attribute), that can be used to link ++ * kbase context with userspace context. + */ -+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) ++#define KBASE_TLSTREAM_TL_NEW_CTX(context, nr, tgid) \ ++ __TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid) + +/** -+ * Platform specific configuration functions ++ * KBASE_TLSTREAM_TL_NEW_ATOM - create atom object in timeline ++ * @atom: name of the atom object ++ * @nr: sequential number assigned to this atom + * -+ * Attached value: pointer to @ref kbase_platform_funcs_conf -+ * Default value: See @ref kbase_platform_funcs_conf ++ * Function emits a timeline message informing about atom creation. Atom is ++ * created with atom number (its attribute) that links it with actual work ++ * bucket id understood by hardware. + */ -+#define PLATFORM_FUNCS (NULL) ++#define KBASE_TLSTREAM_TL_NEW_ATOM(atom, nr) \ ++ __TRACE_IF_ENABLED(tl_new_atom, atom, nr) + -+extern struct kbase_pm_callback_conf pm_callbacks; -diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c -new file mode 100644 -index 000000000..3ff0930fb ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c -@@ -0,0 +1,79 @@ -+/* ++/** ++ * KBASE_TLSTREAM_TL_DEL_CTX - destroy context object in timeline ++ * @context: name of the context object + * -+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved. ++ * Function emits a timeline message informing that context object ceased to ++ * exist. ++ */ ++#define KBASE_TLSTREAM_TL_DEL_CTX(context) \ ++ __TRACE_IF_ENABLED(tl_del_ctx, context) ++ ++/** ++ * KBASE_TLSTREAM_TL_DEL_ATOM - destroy atom object in timeline ++ * @atom: name of the atom object + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ * Function emits a timeline message informing that atom object ceased to ++ * exist. ++ */ ++#define KBASE_TLSTREAM_TL_DEL_ATOM(atom) \ ++ __TRACE_IF_ENABLED(tl_del_atom, atom) ++ ++/** ++ * KBASE_TLSTREAM_TL_RET_CTX_LPU - retain context by LPU ++ * @context: name of the context object ++ * @lpu: name of the Logical Processing Unit object + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * Function emits a timeline message informing that context is being held ++ * by LPU and must not be deleted unless it is released. ++ */ ++#define KBASE_TLSTREAM_TL_RET_CTX_LPU(context, lpu) \ ++ __TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu) ++ ++/** ++ * KBASE_TLSTREAM_TL_RET_ATOM_CTX - retain atom by context ++ * @atom: name of the atom object ++ * @context: name of the context object + * ++ * Function emits a timeline message informing that atom object is being held ++ * by context and must not be deleted unless it is released. + */ ++#define KBASE_TLSTREAM_TL_RET_ATOM_CTX(atom, context) \ ++ __TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context) + ++/** ++ * KBASE_TLSTREAM_TL_RET_ATOM_LPU - retain atom by LPU ++ * @atom: name of the atom object ++ * @lpu: name of the Logical Processing Unit object ++ * @attrib_match_list: list containing match operator attributes ++ * ++ * Function emits a timeline message informing that atom object is being held ++ * by LPU and must not be deleted unless it is released. ++ */ ++#define KBASE_TLSTREAM_TL_RET_ATOM_LPU(atom, lpu, attrib_match_list) \ ++ __TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list) + ++/** ++ * KBASE_TLSTREAM_TL_NRET_CTX_LPU - release context by LPU ++ * @context: name of the context object ++ * @lpu: name of the Logical Processing Unit object ++ * ++ * Function emits a timeline message informing that context is being released ++ * by LPU object. ++ */ ++#define KBASE_TLSTREAM_TL_NRET_CTX_LPU(context, lpu) \ ++ __TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu) + -+#include -+#include -+#include -+#include ++/** ++ * KBASE_TLSTREAM_TL_NRET_ATOM_CTX - release atom by context ++ * @atom: name of the atom object ++ * @context: name of the context object ++ * ++ * Function emits a timeline message informing that atom object is being ++ * released by context. ++ */ ++#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX(atom, context) \ ++ __TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context) + -+#define HARD_RESET_AT_POWER_OFF 0 ++/** ++ * KBASE_TLSTREAM_TL_NRET_ATOM_LPU - release atom by LPU ++ * @atom: name of the atom object ++ * @lpu: name of the Logical Processing Unit object ++ * ++ * Function emits a timeline message informing that atom object is being ++ * released by LPU. ++ */ ++#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU(atom, lpu) \ ++ __TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu) + -+#ifndef CONFIG_OF -+static struct kbase_io_resources io_resources = { -+ .job_irq_number = 68, -+ .mmu_irq_number = 69, -+ .gpu_irq_number = 70, -+ .io_memory_region = { -+ .start = 0x2f010000, -+ .end = 0x2f010000 + (4096 * 4) - 1} -+}; -+#endif ++/** ++ * KBASE_TLSTREAM_TL_RET_AS_CTX - lifelink address space object to context ++ * @as: name of the address space object ++ * @ctx: name of the context object ++ * ++ * Function emits a timeline message informing that address space object ++ * is being held by the context object. ++ */ ++#define KBASE_TLSTREAM_TL_RET_AS_CTX(as, ctx) \ ++ __TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx) + -+static int pm_callback_power_on(struct kbase_device *kbdev) -+{ -+ /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ -+ return 1; -+} ++/** ++ * KBASE_TLSTREAM_TL_NRET_AS_CTX - release address space by context ++ * @as: name of the address space object ++ * @ctx: name of the context object ++ * ++ * Function emits a timeline message informing that address space object ++ * is being released by atom. ++ */ ++#define KBASE_TLSTREAM_TL_NRET_AS_CTX(as, ctx) \ ++ __TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx) + -+static void pm_callback_power_off(struct kbase_device *kbdev) -+{ -+#if HARD_RESET_AT_POWER_OFF -+ /* Cause a GPU hard reset to test whether we have actually idled the GPU -+ * and that we properly reconfigure the GPU on power up. -+ * Usually this would be dangerous, but if the GPU is working correctly it should -+ * be completely safe as the GPU should not be active at this point. -+ * However this is disabled normally because it will most likely interfere with -+ * bus logging etc. -+ */ -+ KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); -+ kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET); -+#endif -+} ++/** ++ * KBASE_TLSTREAM_TL_RET_ATOM_AS - retain atom by address space ++ * @atom: name of the atom object ++ * @as: name of the address space object ++ * ++ * Function emits a timeline message informing that atom object is being held ++ * by address space and must not be deleted unless it is released. ++ */ ++#define KBASE_TLSTREAM_TL_RET_ATOM_AS(atom, as) \ ++ __TRACE_IF_ENABLED(tl_ret_atom_as, atom, as) + -+struct kbase_pm_callback_conf pm_callbacks = { -+ .power_on_callback = pm_callback_power_on, -+ .power_off_callback = pm_callback_power_off, -+ .power_suspend_callback = NULL, -+ .power_resume_callback = NULL -+}; ++/** ++ * KBASE_TLSTREAM_TL_NRET_ATOM_AS - release atom by address space ++ * @atom: name of the atom object ++ * @as: name of the address space object ++ * ++ * Function emits a timeline message informing that atom object is being ++ * released by address space. ++ */ ++#define KBASE_TLSTREAM_TL_NRET_ATOM_AS(atom, as) \ ++ __TRACE_IF_ENABLED(tl_nret_atom_as, atom, as) + -+static struct kbase_platform_config versatile_platform_config = { -+#ifndef CONFIG_OF -+ .io_resources = &io_resources -+#endif -+}; ++/** ++ * KBASE_TLSTREAM_TL_DEP_ATOM_ATOM - parent atom depends on child atom ++ * @atom1: name of the child atom object ++ * @atom2: name of the parent atom object that depends on child atom ++ * ++ * Function emits a timeline message informing that parent atom waits for ++ * child atom object to be completed before start its execution. ++ */ ++#define KBASE_TLSTREAM_TL_DEP_ATOM_ATOM(atom1, atom2) \ ++ __TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2) + -+struct kbase_platform_config *kbase_get_platform_config(void) -+{ -+ return &versatile_platform_config; -+} ++/** ++ * KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM - dependency between atoms resolved ++ * @atom1: name of the child atom object ++ * @atom2: name of the parent atom object that depended on child atom ++ * ++ * Function emits a timeline message informing that parent atom execution ++ * dependency on child atom has been resolved. ++ */ ++#define KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM(atom1, atom2) \ ++ __TRACE_IF_ENABLED(tl_ndep_atom_atom, atom1, atom2) + -+int kbase_platform_early_init(void) -+{ -+ /* Nothing needed at this stage */ -+ return 0; -+} -diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild -new file mode 100755 -index 000000000..1caa29366 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild -@@ -0,0 +1,18 @@ -+# -+# (C) COPYRIGHT 2012-2013, 2016 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++/** ++ * KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM - information about already resolved dependency between atoms ++ * @atom1: name of the child atom object ++ * @atom2: name of the parent atom object that depended on child atom ++ * ++ * Function emits a timeline message informing that parent atom execution ++ * dependency on child atom has been resolved. ++ */ ++#define KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM(atom1, atom2) \ ++ __TRACE_IF_ENABLED(tl_rdep_atom_atom, atom1, atom2) + ++/** ++ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG - atom job slot attributes ++ * @atom: name of the atom object ++ * @jd: job descriptor address ++ * @affinity: job affinity ++ * @config: job config ++ * ++ * Function emits a timeline message containing atom attributes. ++ */ ++#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(atom, jd, affinity, config) \ ++ __TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config) + -+mali_kbase-y += \ -+ $(MALI_PLATFORM_THIRDPARTY_DIR)/mali_kbase_config_vexpress.o \ -+ $(MALI_PLATFORM_THIRDPARTY_DIR)/mali_kbase_cpu_vexpress.o -diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h -new file mode 100644 -index 000000000..dbdf21e00 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h -@@ -0,0 +1,75 @@ -+/* ++/** ++ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY - atom priority ++ * @atom: name of the atom object ++ * @prio: atom priority + * -+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. ++ * Function emits a timeline message containing atom priority. ++ */ ++#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(atom, prio) \ ++ __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority, atom, prio) ++ ++/** ++ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE - atom state ++ * @atom: name of the atom object ++ * @state: atom state + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ * Function emits a timeline message containing atom state. ++ */ ++#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, state) \ ++ __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_state, atom, state) ++ ++/** ++ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE - atom caused priority change ++ * @atom: name of the atom object + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * Function emits a timeline message signalling priority change ++ */ ++#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE(atom) \ ++ __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority_change, atom) ++ ++/** ++ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT - jit happened on atom ++ * @atom: atom identifier ++ * @edit_addr: address edited by jit ++ * @new_addr: address placed into the edited location ++ */ ++#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(atom, edit_addr, new_addr) \ ++ __TRACE_IF_ENABLED_JD(tl_attrib_atom_jit, atom, edit_addr, new_addr) ++ ++/** ++ * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG - address space attributes ++ * @as: assigned address space ++ * @transtab: configuration of the TRANSTAB register ++ * @memattr: configuration of the MEMATTR register ++ * @transcfg: configuration of the TRANSCFG register (or zero if not present) + * ++ * Function emits a timeline message containing address space attributes. + */ ++#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as, transtab, memattr, transcfg) \ ++ __TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg) + ++/** ++ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ex ++ * @atom: atom identifier ++ */ ++#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(atom) \ ++ __TRACE_IF_ENABLED(tl_event_atom_softstop_ex, atom) + ++/** ++ * KBASE_TLSTREAM_TL_EVENT_LPU_softstop ++ * @lpu: name of the LPU object ++ */ ++#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(lpu) \ ++ __TRACE_IF_ENABLED(tl_event_lpu_softstop, lpu) + -+#include "mali_kbase_cpu_vexpress.h" ++/** ++ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_issue ++ * @atom: atom identifier ++ */ ++#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(atom) \ ++ __TRACE_IF_ENABLED(tl_event_atom_softstop_issue, atom) + +/** -+ * Maximum frequency GPU will be clocked at. Given in kHz. -+ * This must be specified as there is no default value. ++ * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - The GPU is being soft reset ++ * @gpu: name of the GPU object + * -+ * Attached value: number in kHz -+ * Default value: NA ++ * This imperative tracepoint is specific to job dumping. ++ * Function emits a timeline message indicating GPU soft reset. + */ -+#define GPU_FREQ_KHZ_MAX 10000 ++#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET(gpu) \ ++ __TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu) ++ ++ +/** -+ * Minimum frequency GPU will be clocked at. Given in kHz. -+ * This must be specified as there is no default value. -+ * -+ * Attached value: number in kHz -+ * Default value: NA ++ * KBASE_TLSTREAM_AUX_PM_STATE - timeline message: power management state ++ * @core_type: core type (shader, tiler, l2 cache, l3 cache) ++ * @state: 64bits bitmask reporting power state of the cores (1-ON, 0-OFF) + */ -+#define GPU_FREQ_KHZ_MIN 10000 ++#define KBASE_TLSTREAM_AUX_PM_STATE(core_type, state) \ ++ __TRACE_IF_ENABLED(aux_pm_state, core_type, state) + +/** -+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock -+ * -+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func -+ * for the function prototype. -+ * -+ * Attached value: A kbase_cpu_clk_speed_func. -+ * Default Value: NA ++ * KBASE_TLSTREAM_AUX_PAGEFAULT - timeline message: MMU page fault event ++ * resulting in new pages being mapped ++ * @ctx_nr: kernel context number ++ * @page_count_change: number of pages to be added + */ -+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed) ++#define KBASE_TLSTREAM_AUX_PAGEFAULT(ctx_nr, page_count_change) \ ++ __TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change) + +/** -+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock ++ * KBASE_TLSTREAM_AUX_PAGESALLOC - timeline message: total number of allocated ++ * pages is changed ++ * @ctx_nr: kernel context number ++ * @page_count: number of pages used by the context ++ */ ++#define KBASE_TLSTREAM_AUX_PAGESALLOC(ctx_nr, page_count) \ ++ __TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count) ++ ++/** ++ * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET - timeline message: new target DVFS ++ * frequency ++ * @target_freq: new target frequency ++ */ ++#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(target_freq) \ ++ __TRACE_IF_ENABLED(aux_devfreq_target, target_freq) ++ ++/** ++ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START - The GPU has started transitioning ++ * to protected mode ++ * @gpu: name of the GPU object + * -+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func -+ * for the function prototype. ++ * Function emits a timeline message indicating the GPU is starting to ++ * transition to protected mode. ++ */ ++#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(gpu) \ ++ __TRACE_IF_ENABLED_LATENCY(aux_protected_enter_start, gpu) ++ ++/** ++ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END - The GPU has finished transitioning ++ * to protected mode ++ * @gpu: name of the GPU object + * -+ * Attached value: A kbase_gpu_clk_speed_func. -+ * Default Value: NA ++ * Function emits a timeline message indicating the GPU has finished ++ * transitioning to protected mode. + */ -+#define GPU_SPEED_FUNC (NULL) ++#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(gpu) \ ++ __TRACE_IF_ENABLED_LATENCY(aux_protected_enter_end, gpu) + +/** -+ * Power management configuration ++ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START - The GPU has started transitioning ++ * to non-protected mode ++ * @gpu: name of the GPU object + * -+ * Attached value: pointer to @ref kbase_pm_callback_conf -+ * Default value: See @ref kbase_pm_callback_conf ++ * Function emits a timeline message indicating the GPU is starting to ++ * transition to non-protected mode. + */ -+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) ++#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(gpu) \ ++ __TRACE_IF_ENABLED_LATENCY(aux_protected_leave_start, gpu) + +/** -+ * Platform specific configuration functions ++ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END - The GPU has finished transitioning ++ * to non-protected mode ++ * @gpu: name of the GPU object + * -+ * Attached value: pointer to @ref kbase_platform_funcs_conf -+ * Default value: See @ref kbase_platform_funcs_conf ++ * Function emits a timeline message indicating the GPU has finished ++ * transitioning to non-protected mode. + */ -+#define PLATFORM_FUNCS (NULL) ++#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(gpu) \ ++ __TRACE_IF_ENABLED_LATENCY(aux_protected_leave_end, gpu) + -+extern struct kbase_pm_callback_conf pm_callbacks; -diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c ++#endif /* _KBASE_TLSTREAM_H */ ++ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h new file mode 100644 -index 000000000..76ffe4a1e +index 000000000..e2e054420 --- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c -@@ -0,0 +1,83 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h +@@ -0,0 +1,264 @@ +/* + * + * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved. @@ -404605,79 +406042,260 @@ index 000000000..76ffe4a1e + + + -+#include -+#include -+#include -+#include -+#include "mali_kbase_cpu_vexpress.h" ++/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** ++ * ***** DO NOT INCLUDE DIRECTLY ***** ++ * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ + -+#define HARD_RESET_AT_POWER_OFF 0 ++/* ++ * The purpose of this header file is just to contain a list of trace code idenitifers ++ * ++ * Each identifier is wrapped in a macro, so that its string form and enum form can be created ++ * ++ * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block. ++ * ++ * This allows automatic creation of an enum and a corresponding array of strings ++ * ++ * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE. ++ * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE. ++ * ++ * e.g.: ++ * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X ++ * typedef enum ++ * { ++ * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X ) ++ * #include "mali_kbase_trace_defs.h" ++ * #undef KBASE_TRACE_CODE_MAKE_CODE ++ * } kbase_trace_code; ++ * ++ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE ++ * ++ * ++ * The use of the macro here is: ++ * - KBASE_TRACE_CODE_MAKE_CODE( X ) ++ * ++ * Which produces: ++ * - For an enum, KBASE_TRACE_CODE_X ++ * - For a string, "X" ++ * ++ * ++ * For example: ++ * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to: ++ * - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum ++ * - "JM_JOB_COMPLETE" for the string ++ * - To use it to trace an event, do: ++ * - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val ); ++ */ + -+#ifndef CONFIG_OF -+static struct kbase_io_resources io_resources = { -+ .job_irq_number = 75, -+ .mmu_irq_number = 76, -+ .gpu_irq_number = 77, -+ .io_memory_region = { -+ .start = 0x2F000000, -+ .end = 0x2F000000 + (4096 * 4) - 1} -+}; ++#if 0 /* Dummy section to avoid breaking formatting */ ++int dummy_array[] = { +#endif + -+static int pm_callback_power_on(struct kbase_device *kbdev) -+{ -+ /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ -+ return 1; -+} -+ -+static void pm_callback_power_off(struct kbase_device *kbdev) -+{ -+#if HARD_RESET_AT_POWER_OFF -+ /* Cause a GPU hard reset to test whether we have actually idled the GPU -+ * and that we properly reconfigure the GPU on power up. -+ * Usually this would be dangerous, but if the GPU is working correctly it should -+ * be completely safe as the GPU should not be active at this point. -+ * However this is disabled normally because it will most likely interfere with -+ * bus logging etc. -+ */ -+ KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); -+ kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET); -+#endif -+} ++/* ++ * Core events ++ */ ++ /* no info_val, no gpu_addr, no atom */ ++ KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY), ++ /* no info_val, no gpu_addr, no atom */ ++ KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM), ++ /* info_val == GPU_IRQ_STATUS register */ ++ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ), ++ /* info_val == bits cleared */ ++ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR), ++ /* info_val == GPU_IRQ_STATUS register */ ++ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE), ++ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET), ++ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET), ++ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR), ++ /* GPU addr==dump address */ ++ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE), ++ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES), ++/* ++ * Job Slot management events ++ */ ++ /* info_val==irq rawstat at start */ ++ KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ), ++ /* info_val==jobs processed */ ++ KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END), ++/* In the following: ++ * ++ * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases) ++ * - uatom==kernel-side mapped uatom address (for correlation with user-side) ++ */ ++ /* info_val==exit code; gpu_addr==chain gpuaddr */ ++ KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE), ++ /* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of affinity */ ++ KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT), ++ /* gpu_addr is as follows: ++ * - If JS_STATUS active after soft-stop, val==gpu addr written to ++ * JS_HEAD on submit ++ * - otherwise gpu_addr==0 */ ++ KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP), ++ KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0), ++ KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1), ++ /* gpu_addr==JS_HEAD read */ ++ KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP), ++ /* gpu_addr==JS_HEAD read */ ++ KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0), ++ /* gpu_addr==JS_HEAD read */ ++ KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1), ++ /* gpu_addr==JS_TAIL read */ ++ KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD), ++/* gpu_addr is as follows: ++ * - If JS_STATUS active before soft-stop, val==JS_HEAD ++ * - otherwise gpu_addr==0 ++ */ ++ /* gpu_addr==JS_HEAD read */ ++ KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD), ++ KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS), ++ KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE), ++ /* info_val == is_scheduled */ ++ KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED), ++ /* info_val == is_scheduled */ ++ KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED), ++ KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE), ++ /* info_val == nr jobs submitted */ ++ KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP), ++ /* gpu_addr==JS_HEAD_NEXT last written */ ++ KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT), ++ KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET), ++ KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER), ++ KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER), ++/* ++ * Job dispatch events ++ */ ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_TRACE_CODE_MAKE_CODE(JD_DONE), ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER), ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END), ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB), ++ /* gpu_addr==0, info_val==0, uatom==0 */ ++ KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT), ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL), ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER), ++/* ++ * Scheduler Core events ++ */ ++ KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK), ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB), ++ /* gpu_addr==last value written/would be written to JS_HEAD */ ++ KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB), ++ KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX), ++ KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX), ++ KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX), ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB), ++ /* gpu_addr==value to write into JS_HEAD */ ++ KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED), ++ /* kctx is the one being evicted, info_val == kctx to put in */ ++ KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX), ++ KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED), ++ /* info_val == lower 32 bits of affinity */ ++ KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT), ++ /* info_val == lower 32 bits of affinity */ ++ KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED), ++ /* info_val == lower 32 bits of affinity */ ++ KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED), ++ /* info_val == lower 32 bits of rechecked affinity */ ++ KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED), ++ /* info_val == lower 32 bits of rechecked affinity */ ++ KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED), ++ /* info_val == lower 32 bits of affinity */ ++ KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE), ++ /* info_val == the ctx attribute now on ctx */ ++ KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX), ++ /* info_val == the ctx attribute now on runpool */ ++ KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL), ++ /* info_val == the ctx attribute now off ctx */ ++ KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX), ++ /* info_val == the ctx attribute now off runpool */ ++ KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL), ++/* ++ * Scheduler Policy events ++ */ ++ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX), ++ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX), ++ /* info_val == whether it was evicted */ ++ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX), ++ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS), ++ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX), ++ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX), ++ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX), ++ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX), ++ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB), ++ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ), ++ /* gpu_addr==JS_HEAD to write if the job were run */ ++ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB), ++ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START), ++ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END), ++/* ++ * Power Management Events ++ */ ++ KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER), ++ /* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */ ++ KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_INUSE), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_INUSE), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_NEEDED), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_NEEDED), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_INUSE), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_INUSE), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_SHADER_NEEDED), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_TILER_NEEDED), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF), ++ /* info_val == policy number, or -1 for "Already changing" */ ++ KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY), ++ KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY), ++ /* info_val == policy number */ ++ KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT), ++ /* info_val == policy number */ ++ KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM), ++/* Unused code just to make it easier to not have a comma at the end. ++ * All other codes MUST come before this */ ++ KBASE_TRACE_CODE_MAKE_CODE(DUMMY) + -+struct kbase_pm_callback_conf pm_callbacks = { -+ .power_on_callback = pm_callback_power_on, -+ .power_off_callback = pm_callback_power_off, -+ .power_suspend_callback = NULL, -+ .power_resume_callback = NULL ++#if 0 /* Dummy section to avoid breaking formatting */ +}; -+ -+static struct kbase_platform_config versatile_platform_config = { -+#ifndef CONFIG_OF -+ .io_resources = &io_resources +#endif -+}; -+ -+struct kbase_platform_config *kbase_get_platform_config(void) -+{ -+ return &versatile_platform_config; -+} -+ -+int kbase_platform_early_init(void) -+{ -+ /* Nothing needed at this stage */ -+ return 0; -+} + -diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c ++/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c new file mode 100644 -index 000000000..816dff498 +index 000000000..5830e87f0 --- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c -@@ -0,0 +1,71 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c +@@ -0,0 +1,236 @@ +/* + * -+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -404694,160 +406312,232 @@ index 000000000..816dff498 + + + -+#include +#include -+#include "mali_kbase_cpu_vexpress.h" ++#include ++#include + -+#define HZ_IN_MHZ (1000000) ++#define CREATE_TRACE_POINTS + -+#define CORETILE_EXPRESS_A9X4_SCC_START (0x100E2000) -+#define MOTHERBOARD_SYS_CFG_START (0x10000000) -+#define SYS_CFGDATA_OFFSET (0x000000A0) -+#define SYS_CFGCTRL_OFFSET (0x000000A4) -+#define SYS_CFGSTAT_OFFSET (0x000000A8) ++#ifdef CONFIG_MALI_TRACE_TIMELINE ++#include "mali_timeline.h" + -+#define SYS_CFGCTRL_START_BIT_VALUE (1 << 31) -+#define READ_REG_BIT_VALUE (0 << 30) -+#define DCC_DEFAULT_BIT_VALUE (0 << 26) -+#define SYS_CFG_OSC_FUNC_BIT_VALUE (1 << 20) -+#define SITE_DEFAULT_BIT_VALUE (1 << 16) -+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12) -+#define DEVICE_DEFAULT_BIT_VALUE (2 << 0) -+#define SYS_CFG_COMPLETE_BIT_VALUE (1 << 0) -+#define SYS_CFG_ERROR_BIT_VALUE (1 << 1) ++#include ++#include + -+#define FEED_REG_BIT_MASK (0x0F) -+#define FCLK_PA_DIVIDE_BIT_SHIFT (0x03) -+#define FCLK_PB_DIVIDE_BIT_SHIFT (0x07) -+#define FCLK_PC_DIVIDE_BIT_SHIFT (0x0B) -+#define AXICLK_PA_DIVIDE_BIT_SHIFT (0x0F) -+#define AXICLK_PB_DIVIDE_BIT_SHIFT (0x13) ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atoms_in_flight); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atom); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_active); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_action); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_power_active); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_l2_power_active); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_event); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_slot_atom); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_checktrans); ++EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_context_active); + -+#define IS_SINGLE_BIT_SET(val, pos) (val&(1<= KBASE_NR_TRACE_CODES) ++ return NULL; + -+static DEFINE_RAW_SPINLOCK(syscfg_lock); -+/** -+ * kbase_get_vendor_specific_cpu_clock_speed -+ * @brief Retrieves the CPU clock speed. -+ * The implementation is platform specific. -+ * @param[out] cpu_clock - the value of CPU clock speed in MHz -+ * @return 0 on success, 1 otherwise -+*/ -+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock) ++ return &kbase_trace_timeline_desc_table[*pos]; ++} ++ ++static void kbasep_trace_timeline_seq_stop(struct seq_file *s, void *data) +{ -+ /* TODO: MIDBASE-2873 - Provide runtime detection of CPU clock freq for 6XV7 board */ -+ *cpu_clock = CPU_CLOCK_SPEED_6XV7; ++} ++ ++static void *kbasep_trace_timeline_seq_next(struct seq_file *s, void *data, loff_t *pos) ++{ ++ (*pos)++; + ++ if (*pos == KBASE_NR_TRACE_CODES) ++ return NULL; ++ ++ return &kbase_trace_timeline_desc_table[*pos]; ++} ++ ++static int kbasep_trace_timeline_seq_show(struct seq_file *s, void *data) ++{ ++ struct kbase_trace_timeline_desc *trace_desc = data; ++ ++ seq_printf(s, "%s#%s#%s#%s\n", trace_desc->enum_str, trace_desc->desc, trace_desc->format, trace_desc->format_desc); + return 0; +} -diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h -new file mode 100644 -index 000000000..23647ccb0 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h -@@ -0,0 +1,28 @@ -+/* -+ * -+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ + + ++static const struct seq_operations kbasep_trace_timeline_seq_ops = { ++ .start = kbasep_trace_timeline_seq_start, ++ .next = kbasep_trace_timeline_seq_next, ++ .stop = kbasep_trace_timeline_seq_stop, ++ .show = kbasep_trace_timeline_seq_show, ++}; + ++static int kbasep_trace_timeline_debugfs_open(struct inode *inode, struct file *file) ++{ ++ return seq_open(file, &kbasep_trace_timeline_seq_ops); ++} + ++static const struct file_operations kbasep_trace_timeline_debugfs_fops = { ++ .open = kbasep_trace_timeline_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = seq_release, ++}; + -+#ifndef _KBASE_CPU_VEXPRESS_H_ -+#define _KBASE_CPU_VEXPRESS_H_ ++#ifdef CONFIG_DEBUG_FS + -+/** -+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func. -+ */ -+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock); ++void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev) ++{ ++ debugfs_create_file("mali_timeline_defs", ++ S_IRUGO, kbdev->mali_debugfs_directory, NULL, ++ &kbasep_trace_timeline_debugfs_fops); ++} + -+#endif /* _KBASE_CPU_VEXPRESS_H_ */ -diff --git a/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h b/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h -new file mode 100644 -index 000000000..5fa9b39c4 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h -@@ -0,0 +1,53 @@ -+/* -+ * -+ * (C) COPYRIGHT 2010, 2012-2014 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++#endif /* CONFIG_DEBUG_FS */ + ++void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, ++ struct kbase_jd_atom *katom, int js) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + ++ if (kbdev->timeline.slot_atoms_submitted[js] > 0) { ++ KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1); ++ } else { ++ base_atom_id atom_number = kbase_jd_atom_id(kctx, katom); + ++ KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 1); ++ KBASE_TIMELINE_JOB_START(kctx, js, atom_number); ++ } ++ ++kbdev->timeline.slot_atoms_submitted[js]; + ++ KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]); ++} + -+/** -+ * @file mali_ukk_os.h -+ * Types and definitions that are common for Linux OSs for the kernel side of the -+ * User-Kernel interface. -+ */ ++void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx, ++ struct kbase_jd_atom *katom, int js, ++ kbasep_js_atom_done_code done_code) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); + -+#ifndef _UKK_OS_H_ /* Linux version */ -+#define _UKK_OS_H_ ++ if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) { ++ KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0); ++ } else { ++ /* Job finished in JS_HEAD */ ++ base_atom_id atom_number = kbase_jd_atom_id(kctx, katom); + -+#include ++ KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 0); ++ KBASE_TIMELINE_JOB_STOP(kctx, js, atom_number); + -+/** -+ * @addtogroup uk_api User-Kernel Interface API -+ * @{ -+ */ ++ /* see if we need to trace the job in JS_NEXT moving to JS_HEAD */ ++ if (kbase_backend_nr_atoms_submitted(kbdev, js)) { ++ struct kbase_jd_atom *next_katom; ++ struct kbase_context *next_kctx; + -+/** -+ * @addtogroup uk_api_kernel UKK (Kernel side) -+ * @{ -+ */ ++ /* Peek the next atom - note that the atom in JS_HEAD will already ++ * have been dequeued */ ++ next_katom = kbase_backend_inspect_head(kbdev, js); ++ WARN_ON(!next_katom); ++ next_kctx = next_katom->kctx; ++ KBASE_TIMELINE_JOB_START_NEXT(next_kctx, js, 0); ++ KBASE_TIMELINE_JOB_START_HEAD(next_kctx, js, 1); ++ KBASE_TIMELINE_JOB_START(next_kctx, js, kbase_jd_atom_id(next_kctx, next_katom)); ++ } ++ } + -+/** -+ * Internal OS specific data structure associated with each UKK session. Part -+ * of a ukk_session object. -+ */ -+typedef struct ukkp_session { -+ int dummy; /**< No internal OS specific data at this time */ -+} ukkp_session; ++ --kbdev->timeline.slot_atoms_submitted[js]; + -+/** @} end group uk_api_kernel */ ++ KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]); ++} + -+/** @} end group uk_api */ ++void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent) ++{ ++ int uid = 0; ++ int old_uid; + -+#endif /* _UKK_OS_H__ */ -diff --git a/drivers/gpu/arm/midgard/protected_mode_switcher.h b/drivers/gpu/arm/midgard/protected_mode_switcher.h ++ /* If a producer already exists for the event, try to use their UID (multiple-producers) */ ++ uid = atomic_read(&kbdev->timeline.pm_event_uid[event_sent]); ++ old_uid = uid; ++ ++ /* Get a new non-zero UID if we don't have one yet */ ++ while (!uid) ++ uid = atomic_inc_return(&kbdev->timeline.pm_event_uid_counter); ++ ++ /* Try to use this UID */ ++ if (old_uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event_sent], old_uid, uid)) ++ /* If it changed, raced with another producer: we've lost this UID */ ++ uid = 0; ++ ++ KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_sent, uid); ++} ++ ++void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event) ++{ ++ int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]); ++ ++ if (uid != 0) { ++ if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0)) ++ /* If it changed, raced with another consumer: we've lost this UID */ ++ uid = 0; ++ ++ KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid); ++ } ++} ++ ++void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event) ++{ ++ int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]); ++ ++ if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0)) ++ /* If it changed, raced with another consumer: we've lost this UID */ ++ uid = 0; ++ ++ KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid); ++} ++ ++void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ /* Simply log the start of the transition */ ++ kbdev->timeline.l2_transitioning = true; ++ KBASE_TIMELINE_POWERING_L2(kbdev); ++} ++ ++void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ /* Simply log the end of the transition */ ++ if (kbdev->timeline.l2_transitioning) { ++ kbdev->timeline.l2_transitioning = false; ++ KBASE_TIMELINE_POWERED_L2(kbdev); ++ } ++} ++ ++#endif /* CONFIG_MALI_TRACE_TIMELINE */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h new file mode 100644 -index 000000000..5dc2f3ba8 +index 000000000..a04f7c142 --- /dev/null -+++ b/drivers/gpu/arm/midgard/protected_mode_switcher.h -@@ -0,0 +1,64 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h +@@ -0,0 +1,363 @@ +/* + * -+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -404862,709 +406552,361 @@ index 000000000..5dc2f3ba8 + + + -+#ifndef _PROTECTED_MODE_SWITCH_H_ -+#define _PROTECTED_MODE_SWITCH_H_ + -+struct protected_mode_device; + -+/** -+ * struct protected_mode_ops - Callbacks for protected mode switch operations -+ * -+ * @protected_mode_enable: Callback to enable protected mode for device -+ * @protected_mode_disable: Callback to disable protected mode for device -+ */ -+struct protected_mode_ops { -+ /** -+ * protected_mode_enable() - Enable protected mode on device -+ * @dev: The struct device -+ * -+ * Return: 0 on success, non-zero on error -+ */ -+ int (*protected_mode_enable)( -+ struct protected_mode_device *protected_dev); ++#if !defined(_KBASE_TRACE_TIMELINE_H) ++#define _KBASE_TRACE_TIMELINE_H + -+ /** -+ * protected_mode_disable() - Disable protected mode on device, and -+ * reset device -+ * @dev: The struct device -+ * -+ * Return: 0 on success, non-zero on error -+ */ -+ int (*protected_mode_disable)( -+ struct protected_mode_device *protected_dev); ++#ifdef CONFIG_MALI_TRACE_TIMELINE ++ ++enum kbase_trace_timeline_code { ++ #define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) enum_val ++ #include "mali_kbase_trace_timeline_defs.h" ++ #undef KBASE_TIMELINE_TRACE_CODE +}; + -+/** -+ * struct protected_mode_device - Device structure for protected mode devices ++#ifdef CONFIG_DEBUG_FS ++ ++/** Initialize Timeline DebugFS entries */ ++void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev); ++ ++#else /* CONFIG_DEBUG_FS */ ++ ++#define kbasep_trace_timeline_debugfs_init CSTD_NOP ++ ++#endif /* CONFIG_DEBUG_FS */ ++ ++/* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE ++ * functions. ++ * Output is timestamped by either sched_clock() (default), local_clock(), or ++ * cpu_clock(), depending on /sys/kernel/debug/tracing/trace_clock */ ++#include "mali_timeline.h" ++ ++/* Trace number of atoms in flight for kctx (atoms either not completed, or in ++ process of being returned to user */ ++#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) \ ++ do { \ ++ struct timespec64 ts; \ ++ ktime_get_raw_ts64(&ts); \ ++ trace_mali_timeline_atoms_in_flight(ts.tv_sec, ts.tv_nsec, \ ++ (int)kctx->timeline.owner_tgid, \ ++ count); \ ++ } while (0) ++ ++/* Trace atom_id being Ready to Run */ ++#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) \ ++ do { \ ++ struct timespec64 ts; \ ++ ktime_get_raw_ts64(&ts); \ ++ trace_mali_timeline_atom(ts.tv_sec, ts.tv_nsec, \ ++ CTX_FLOW_ATOM_READY, \ ++ (int)kctx->timeline.owner_tgid, \ ++ atom_id); \ ++ } while (0) ++ ++/* Trace number of atoms submitted to job slot js + * -+ * @ops - Callbacks associated with this device -+ * @data - Pointer to device private data ++ * NOTE: This uses a different tracepoint to the head/next/soft-stop actions, ++ * so that those actions can be filtered out separately from this + * -+ * This structure should be registered with the platform device using -+ * platform_set_drvdata(). -+ */ -+struct protected_mode_device { -+ struct protected_mode_ops ops; -+ void *data; -+}; ++ * This is because this is more useful, as we can use it to calculate general ++ * utilization easily and accurately */ ++#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) \ ++ do { \ ++ struct timespec64 ts; \ ++ ktime_get_raw_ts64(&ts); \ ++ trace_mali_timeline_gpu_slot_active(ts.tv_sec, ts.tv_nsec, \ ++ SW_SET_GPU_SLOT_ACTIVE, \ ++ (int)kctx->timeline.owner_tgid, \ ++ js, count); \ ++ } while (0) + -+#endif /* _PROTECTED_MODE_SWITCH_H_ */ -diff --git a/drivers/gpu/arm/midgard/rename.h b/drivers/gpu/arm/midgard/rename.h -new file mode 100644 -index 000000000..821866618 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/rename.h -@@ -0,0 +1,426 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _RENAME_H -+#define _RENAME_H -+#define __crc_kbase_create_context midgard___crc_kbase_create_context -+#define __crc_kbase_destroy_context midgard___crc_kbase_destroy_context -+#define __crc_kbase_find_device midgard___crc_kbase_find_device -+#define __crc_kbase_instr_hwcnt_clear midgard___crc_kbase_instr_hwcnt_clear -+#define __crc_kbase_instr_hwcnt_dump_complete midgard___crc_kbase_instr_hwcnt_dump_complete -+#define __crc_kbase_instr_hwcnt_request_dump midgard___crc_kbase_instr_hwcnt_request_dump -+#define __crc_kbase_release_device midgard___crc_kbase_release_device -+#define jd_done_nolock midgard_jd_done_nolock -+#define kbase_add_va_region midgard_kbase_add_va_region -+#define kbase_alloc_free_region midgard_kbase_alloc_free_region -+#define kbase_alloc_phy_pages_helper midgard_kbase_alloc_phy_pages_helper -+#define kbase_alloc_phy_pages midgard_kbase_alloc_phy_pages -+#define kbase_as_fault_debugfs_init midgard_kbase_as_fault_debugfs_init -+#define kbase_backend_complete_wq midgard_kbase_backend_complete_wq -+#define kbase_backend_complete_wq_post_sched midgard_kbase_backend_complete_wq_post_sched -+#define kbase_backend_ctx_count_changed midgard_kbase_backend_ctx_count_changed -+#define kbase_backend_find_and_release_free_address_space midgard_kbase_backend_find_and_release_free_address_space -+#define kbase_backend_get_current_flush_id midgard_kbase_backend_get_current_flush_id -+#define kbase_backend_get_gpu_time midgard_kbase_backend_get_gpu_time -+#define kbase_backend_gpuprops_get_features midgard_kbase_backend_gpuprops_get_features -+#define kbase_backend_gpuprops_get midgard_kbase_backend_gpuprops_get -+#define kbase_backend_inspect_tail midgard_kbase_backend_inspect_tail -+#define kbase_backend_nr_atoms_on_slot midgard_kbase_backend_nr_atoms_on_slot -+#define kbase_backend_nr_atoms_submitted midgard_kbase_backend_nr_atoms_submitted -+#define kbase_backend_release_ctx_irq midgard_kbase_backend_release_ctx_irq -+#define kbase_backend_release_ctx_noirq midgard_kbase_backend_release_ctx_noirq -+#define kbase_backend_reset midgard_kbase_backend_reset -+#define kbase_backend_run_atom midgard_kbase_backend_run_atom -+#define kbase_backend_slot_free midgard_kbase_backend_slot_free -+#define kbase_backend_slot_update midgard_kbase_backend_slot_update -+#define kbase_backend_soft_hard_stop_slot midgard_kbase_backend_soft_hard_stop_slot -+#define kbase_backend_timeouts_changed midgard_kbase_backend_timeouts_changed -+#define kbase_backend_timer_init midgard_kbase_backend_timer_init -+#define kbase_backend_timer_resume midgard_kbase_backend_timer_resume -+#define kbase_backend_timer_suspend midgard_kbase_backend_timer_suspend -+#define kbase_backend_timer_term midgard_kbase_backend_timer_term -+#define kbase_backend_use_ctx midgard_kbase_backend_use_ctx -+#define kbase_backend_use_ctx_sched midgard_kbase_backend_use_ctx_sched -+#define kbase_cache_enabled midgard_kbase_cache_enabled -+#define kbase_cache_set_coherency_mode midgard_kbase_cache_set_coherency_mode -+#define kbase_cancel_soft_job midgard_kbase_cancel_soft_job -+#define kbase_check_alloc_flags midgard_kbase_check_alloc_flags -+#define kbase_check_import_flags midgard_kbase_check_import_flags -+#define kbase_clean_caches_done midgard_kbase_clean_caches_done -+#define kbase_create_context midgard_kbase_create_context -+#define kbase_ctx_sched_init midgard_kbase_ctx_sched_init -+#define kbase_ctx_sched_release_ctx midgard_kbase_ctx_sched_release_ctx -+#define kbase_ctx_sched_remove_ctx midgard_kbase_ctx_sched_remove_ctx -+#define kbase_ctx_sched_restore_all_as midgard_kbase_ctx_sched_restore_all_as -+#define kbase_ctx_sched_retain_ctx midgard_kbase_ctx_sched_retain_ctx -+#define kbase_ctx_sched_retain_ctx_refcount midgard_kbase_ctx_sched_retain_ctx_refcount -+#define kbase_ctx_sched_term midgard_kbase_ctx_sched_term -+#define kbase_debug_assert_register_hook midgard_kbase_debug_assert_register_hook -+#define kbase_debug_job_fault_context_init midgard_kbase_debug_job_fault_context_init -+#define kbase_debug_job_fault_context_term midgard_kbase_debug_job_fault_context_term -+#define kbase_debug_job_fault_debugfs_init midgard_kbase_debug_job_fault_debugfs_init -+#define kbase_debug_job_fault_dev_init midgard_kbase_debug_job_fault_dev_init -+#define kbase_debug_job_fault_dev_term midgard_kbase_debug_job_fault_dev_term -+#define kbase_debug_job_fault_process midgard_kbase_debug_job_fault_process -+#define kbase_debug_job_fault_reg_snapshot_init midgard_kbase_debug_job_fault_reg_snapshot_init -+#define kbase_debug_mem_view_init midgard_kbase_debug_mem_view_init -+#define kbase_destroy_context midgard_kbase_destroy_context -+#define kbase_devfreq_init midgard_kbase_devfreq_init -+#define kbase_devfreq_set_core_mask midgard_kbase_devfreq_set_core_mask -+#define kbase_devfreq_term midgard_kbase_devfreq_term -+#define kbase_device_alloc midgard_kbase_device_alloc -+#define kbase_device_free midgard_kbase_device_free -+#define kbase_device_init midgard_kbase_device_init -+#define kbase_device_term midgard_kbase_device_term -+#define kbase_disjoint_event_get midgard_kbase_disjoint_event_get -+#define kbase_disjoint_event midgard_kbase_disjoint_event -+#define kbase_disjoint_event_potential midgard_kbase_disjoint_event_potential -+#define kbase_disjoint_init midgard_kbase_disjoint_init -+#define kbase_disjoint_state_down midgard_kbase_disjoint_state_down -+#define kbase_disjoint_state_up midgard_kbase_disjoint_state_up -+#define kbase_drv_name midgard_kbase_drv_name -+#define kbase_event_cleanup midgard_kbase_event_cleanup -+#define kbase_event_close midgard_kbase_event_close -+#define kbase_event_dequeue midgard_kbase_event_dequeue -+#define kbase_event_init midgard_kbase_event_init -+#define kbase_event_pending midgard_kbase_event_pending -+#define kbase_event_post midgard_kbase_event_post -+#define kbase_event_wakeup midgard_kbase_event_wakeup -+#define kbase_fence_add_callback midgard_kbase_fence_add_callback -+#define kbase_fence_free_callbacks midgard_kbase_fence_free_callbacks -+#define kbase_fence_ops midgard_kbase_fence_ops -+#define kbase_fence_out_new midgard_kbase_fence_out_new -+#define kbase_find_device midgard_kbase_find_device -+#define kbase_finish_soft_job midgard_kbase_finish_soft_job -+#define kbase_flush_mmu_wqs midgard_kbase_flush_mmu_wqs -+#define kbase_free_alloced_region midgard_kbase_free_alloced_region -+#define kbase_free_phy_pages_helper midgard_kbase_free_phy_pages_helper -+#define kbase_get_real_power midgard_kbase_get_real_power -+#define kbase_gpu_complete_hw midgard_kbase_gpu_complete_hw -+#define kbase_gpu_dump_slots midgard_kbase_gpu_dump_slots -+#define kbase_gpu_inspect midgard_kbase_gpu_inspect -+#define kbase_gpu_interrupt midgard_kbase_gpu_interrupt -+#define kbase_gpu_irq_evict midgard_kbase_gpu_irq_evict -+#define kbase_gpu_mmap midgard_kbase_gpu_mmap -+#define kbase_gpu_munmap midgard_kbase_gpu_munmap -+#define kbase_gpuprops_populate_user_buffer midgard_kbase_gpuprops_populate_user_buffer -+#define kbase_gpuprops_set_features midgard_kbase_gpuprops_set_features -+#define kbase_gpuprops_set midgard_kbase_gpuprops_set -+#define kbase_gpuprops_update_core_props_gpu_id midgard_kbase_gpuprops_update_core_props_gpu_id -+#define kbase_gpu_vm_lock midgard_kbase_gpu_vm_lock -+#define kbase_gpu_vm_unlock midgard_kbase_gpu_vm_unlock -+#define kbase_hwaccess_pm_gpu_active midgard_kbase_hwaccess_pm_gpu_active -+#define kbase_hwaccess_pm_gpu_idle midgard_kbase_hwaccess_pm_gpu_idle -+#define kbase_hwaccess_pm_halt midgard_kbase_hwaccess_pm_halt -+#define kbase_hwaccess_pm_init midgard_kbase_hwaccess_pm_init -+#define kbase_hwaccess_pm_powerup midgard_kbase_hwaccess_pm_powerup -+#define kbase_hwaccess_pm_resume midgard_kbase_hwaccess_pm_resume -+#define kbase_hwaccess_pm_suspend midgard_kbase_hwaccess_pm_suspend -+#define kbase_hwaccess_pm_term midgard_kbase_hwaccess_pm_term -+#define kbase_hw_set_features_mask midgard_kbase_hw_set_features_mask -+#define kbase_hw_set_issues_mask midgard_kbase_hw_set_issues_mask -+#define kbase_install_interrupts midgard_kbase_install_interrupts -+#define kbase_instr_backend_init midgard_kbase_instr_backend_init -+#define kbase_instr_backend_term midgard_kbase_instr_backend_term -+#define kbase_instr_hwcnt_clear midgard_kbase_instr_hwcnt_clear -+#define kbase_instr_hwcnt_disable_internal midgard_kbase_instr_hwcnt_disable_internal -+#define kbase_instr_hwcnt_dump_complete midgard_kbase_instr_hwcnt_dump_complete -+#define kbase_instr_hwcnt_enable_internal midgard_kbase_instr_hwcnt_enable_internal -+#define kbase_instr_hwcnt_request_dump midgard_kbase_instr_hwcnt_request_dump -+#define kbase_instr_hwcnt_sample_done midgard_kbase_instr_hwcnt_sample_done -+#define kbase_instr_hwcnt_wait_for_dump midgard_kbase_instr_hwcnt_wait_for_dump -+#define kbase_invoke_smc_fid midgard_kbase_invoke_smc_fid -+#define kbase_invoke_smc midgard_kbase_invoke_smc -+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) -+#define kbase_io_history_dump midgard_kbase_io_history_dump -+#define kbase_io_history_init midgard_kbase_io_history_init -+#define kbase_io_history_term midgard_kbase_io_history_term -+#endif /* CONFIG_DEBUG_FS */ -+#define kbase_ipa_debugfs_init midgard_kbase_ipa_debugfs_init -+#define kbase_ipa_init midgard_kbase_ipa_init -+#define kbase_ipa_init_model midgard_kbase_ipa_init_model -+#define kbase_ipa_model_add_param_s32 midgard_kbase_ipa_model_add_param_s32 -+#define kbase_ipa_model_add_param_string midgard_kbase_ipa_model_add_param_string -+#define kbase_ipa_model_name_from_id midgard_kbase_ipa_model_name_from_id -+#define kbase_ipa_model_param_add midgard_kbase_ipa_model_param_add -+#define kbase_ipa_model_param_free_all midgard_kbase_ipa_model_param_free_all -+#define kbase_ipa_model_recalculate midgard_kbase_ipa_model_recalculate -+#define kbase_ipa_power_model_ops midgard_kbase_ipa_power_model_ops -+#define kbase_ipa_term midgard_kbase_ipa_term -+#define kbase_ipa_term_model midgard_kbase_ipa_term_model -+#define kbase_jd_cancel midgard_kbase_jd_cancel -+#define kbase_jd_done midgard_kbase_jd_done -+#define kbase_jd_done_worker midgard_kbase_jd_done_worker -+#define kbase_jd_exit midgard_kbase_jd_exit -+#define kbase_jd_free_external_resources midgard_kbase_jd_free_external_resources -+#define kbase_jd_init midgard_kbase_jd_init -+#define kbase_jd_submit midgard_kbase_jd_submit -+#define kbase_jd_zap_context midgard_kbase_jd_zap_context -+#define kbase_jit_allocate midgard_kbase_jit_allocate -+#define kbase_jit_backing_lost midgard_kbase_jit_backing_lost -+#define kbase_jit_debugfs_init midgard_kbase_jit_debugfs_init -+#define kbase_jit_evict midgard_kbase_jit_evict -+#define kbase_jit_free midgard_kbase_jit_free -+#define kbase_jit_init midgard_kbase_jit_init -+#define kbase_jit_term midgard_kbase_jit_term -+#define kbase_jm_complete midgard_kbase_jm_complete -+#define kbase_jm_idle_ctx midgard_kbase_jm_idle_ctx -+#define kbase_jm_kick midgard_kbase_jm_kick -+#define kbase_jm_return_atom_to_js midgard_kbase_jm_return_atom_to_js -+#define kbase_jm_try_kick_all midgard_kbase_jm_try_kick_all -+#define kbase_jm_try_kick midgard_kbase_jm_try_kick -+#define kbase_jm_wait_for_zero_jobs midgard_kbase_jm_wait_for_zero_jobs -+#define kbase_job_check_enter_disjoint midgard_kbase_job_check_enter_disjoint -+#define kbase_job_check_leave_disjoint midgard_kbase_job_check_leave_disjoint -+#define kbase_job_done midgard_kbase_job_done -+#define kbase_job_fault_get_reg_snapshot midgard_kbase_job_fault_get_reg_snapshot -+#define kbase_job_hw_submit midgard_kbase_job_hw_submit -+#define kbase_job_slot_ctx_priority_check_locked midgard_kbase_job_slot_ctx_priority_check_locked -+#define kbase_job_slot_halt midgard_kbase_job_slot_halt -+#define kbase_job_slot_hardstop midgard_kbase_job_slot_hardstop -+#define kbase_job_slot_init midgard_kbase_job_slot_init -+#define kbase_job_slot_softstop midgard_kbase_job_slot_softstop -+#define kbase_job_slot_softstop_swflags midgard_kbase_job_slot_softstop_swflags -+#define kbase_job_slot_term midgard_kbase_job_slot_term -+#define kbase_js_complete_atom midgard_kbase_js_complete_atom -+#define kbase_js_complete_atom_wq midgard_kbase_js_complete_atom_wq -+#define kbase_js_dep_resolved_submit midgard_kbase_js_dep_resolved_submit -+#define kbase_js_is_atom_valid midgard_kbase_js_is_atom_valid -+#define kbase_js_pull midgard_kbase_js_pull -+#define kbase_js_sched midgard_kbase_js_sched -+#define kbase_js_set_timeouts midgard_kbase_js_set_timeouts -+#define kbase_js_unpull midgard_kbase_js_unpull -+#define kbase_js_zap_context midgard_kbase_js_zap_context -+#define kbase_map_external_resource midgard_kbase_map_external_resource -+#define kbase_mem_alias midgard_kbase_mem_alias -+#define kbase_mem_alloc midgard_kbase_mem_alloc -+#define kbase_mem_alloc_page midgard_kbase_mem_alloc_page -+#define kbase_mem_commit midgard_kbase_mem_commit -+#define kbase_mem_evictable_deinit midgard_kbase_mem_evictable_deinit -+#define kbase_mem_evictable_init midgard_kbase_mem_evictable_init -+#define kbase_mem_evictable_make midgard_kbase_mem_evictable_make -+#define kbase_mem_evictable_unmake midgard_kbase_mem_evictable_unmake -+#define kbase_mem_flags_change midgard_kbase_mem_flags_change -+#define kbase_mem_free midgard_kbase_mem_free -+#define kbase_mem_free_region midgard_kbase_mem_free_region -+#define kbase_mem_grow_gpu_mapping midgard_kbase_mem_grow_gpu_mapping -+#define kbase_mem_halt midgard_kbase_mem_halt -+#define kbase_mem_import midgard_kbase_mem_import -+#define kbase_mem_init midgard_kbase_mem_init -+#define kbase_mem_kref_free midgard_kbase_mem_kref_free -+#define kbase_mem_pool_alloc midgard_kbase_mem_pool_alloc -+#define kbase_mem_pool_alloc_pages midgard_kbase_mem_pool_alloc_pages -+#define kbase_mem_pool_debugfs_init midgard_kbase_mem_pool_debugfs_init -+#define kbase_mem_pool_free midgard_kbase_mem_pool_free -+#define kbase_mem_pool_free_pages midgard_kbase_mem_pool_free_pages -+#define kbase_mem_pool_grow midgard_kbase_mem_pool_grow -+#define kbase_mem_pool_init midgard_kbase_mem_pool_init -+#define kbase_mem_pool_set_max_size midgard_kbase_mem_pool_set_max_size -+#define kbase_mem_pool_term midgard_kbase_mem_pool_term -+#define kbase_mem_pool_trim midgard_kbase_mem_pool_trim -+#define kbase_mem_query midgard_kbase_mem_query -+#define kbase_mem_term midgard_kbase_mem_term -+#define kbase_mmu_disable_as midgard_kbase_mmu_disable_as -+#define kbase_mmu_disable midgard_kbase_mmu_disable -+#define kbase_mmu_dump midgard_kbase_mmu_dump -+#define kbase_mmu_hw_clear_fault midgard_kbase_mmu_hw_clear_fault -+#define kbase_mmu_hw_configure midgard_kbase_mmu_hw_configure -+#define kbase_mmu_hw_do_operation midgard_kbase_mmu_hw_do_operation -+#define kbase_mmu_hw_enable_fault midgard_kbase_mmu_hw_enable_fault -+#define kbase_mmu_init midgard_kbase_mmu_init -+#define kbase_mmu_insert_pages midgard_kbase_mmu_insert_pages -+#define kbase_mmu_insert_pages_no_flush midgard_kbase_mmu_insert_pages_no_flush -+#define kbase_mmu_insert_single_page midgard_kbase_mmu_insert_single_page -+#define kbase_mmu_interrupt midgard_kbase_mmu_interrupt -+#define kbase_mmu_mode_get_aarch64 midgard_kbase_mmu_mode_get_aarch64 -+#define kbase_mmu_mode_get_lpae midgard_kbase_mmu_mode_get_lpae -+#define kbase_mmu_teardown_pages midgard_kbase_mmu_teardown_pages -+#define kbase_mmu_term midgard_kbase_mmu_term -+#define kbase_mmu_update midgard_kbase_mmu_update -+#define kbase_mmu_update_pages midgard_kbase_mmu_update_pages -+#define kbase_os_mem_map_lock midgard_kbase_os_mem_map_lock -+#define kbase_os_mem_map_unlock midgard_kbase_os_mem_map_unlock -+#define kbasep_cache_clean_worker midgard_kbasep_cache_clean_worker -+#define kbasep_common_test_interrupt_handlers midgard_kbasep_common_test_interrupt_handlers -+#define kbasep_complete_triggered_soft_events midgard_kbasep_complete_triggered_soft_events -+#define kbasep_debug_assert_call_hook midgard_kbasep_debug_assert_call_hook -+#define kbasep_find_enclosing_cpu_mapping_offset midgard_kbasep_find_enclosing_cpu_mapping_offset -+#define kbasep_gpu_memory_debugfs_init midgard_kbasep_gpu_memory_debugfs_init -+#define kbasep_jd_debugfs_ctx_init midgard_kbasep_jd_debugfs_ctx_init -+#define kbasep_job_slot_soft_or_hard_stop_do_action midgard_kbasep_job_slot_soft_or_hard_stop_do_action -+#define kbasep_js_add_job midgard_kbasep_js_add_job -+#define kbasep_js_atom_priority_to_relative midgard_kbasep_js_atom_priority_to_relative -+#define kbasep_js_ctx_attr_ctx_release_atom midgard_kbasep_js_ctx_attr_ctx_release_atom -+#define kbasep_js_ctx_attr_ctx_retain_atom midgard_kbasep_js_ctx_attr_ctx_retain_atom -+#define kbasep_js_ctx_attr_runpool_release_ctx midgard_kbasep_js_ctx_attr_runpool_release_ctx -+#define kbasep_js_ctx_attr_runpool_retain_ctx midgard_kbasep_js_ctx_attr_runpool_retain_ctx -+#define kbasep_js_devdata_halt midgard_kbasep_js_devdata_halt -+#define kbasep_js_devdata_init midgard_kbasep_js_devdata_init -+#define kbasep_js_devdata_term midgard_kbasep_js_devdata_term -+#define kbasep_js_kctx_init midgard_kbasep_js_kctx_init -+#define kbasep_js_kctx_term midgard_kbasep_js_kctx_term -+#define kbasep_js_relative_priority_to_atom midgard_kbasep_js_relative_priority_to_atom -+#define kbasep_js_release_privileged_ctx midgard_kbasep_js_release_privileged_ctx -+#define kbasep_js_remove_cancelled_job midgard_kbasep_js_remove_cancelled_job -+#define kbasep_js_remove_job midgard_kbasep_js_remove_job -+#define kbasep_js_resume midgard_kbasep_js_resume -+#define kbasep_js_runpool_release_ctx_and_katom_retained_state midgard_kbasep_js_runpool_release_ctx_and_katom_retained_state -+#define kbasep_js_runpool_release_ctx midgard_kbasep_js_runpool_release_ctx -+#define kbasep_js_runpool_release_ctx_nolock midgard_kbasep_js_runpool_release_ctx_nolock -+#define kbasep_js_runpool_requeue_or_kill_ctx midgard_kbasep_js_runpool_requeue_or_kill_ctx -+#define kbasep_js_schedule_privileged_ctx midgard_kbasep_js_schedule_privileged_ctx -+#define kbasep_js_suspend midgard_kbasep_js_suspend -+#define kbase_platform_early_init midgard_kbase_platform_early_init -+#define kbase_platform_rk_init_opp_table midgard_kbase_platform_rk_init_opp_table -+#define kbase_platform_rk_shutdown midgard_kbase_platform_rk_shutdown -+#define kbase_pm_always_on_policy_ops midgard_kbase_pm_always_on_policy_ops -+#define kbase_pm_cache_snoop_disable midgard_kbase_pm_cache_snoop_disable -+#define kbase_pm_cache_snoop_enable midgard_kbase_pm_cache_snoop_enable -+#define kbase_pm_ca_get_core_mask midgard_kbase_pm_ca_get_core_mask -+#define kbase_pm_ca_init midgard_kbase_pm_ca_init -+#define kbase_pm_ca_term midgard_kbase_pm_ca_term -+#define kbase_pm_clock_off midgard_kbase_pm_clock_off -+#define kbase_pm_clock_on midgard_kbase_pm_clock_on -+#define kbase_pm_coarse_demand_policy_ops midgard_kbase_pm_coarse_demand_policy_ops -+#define kbase_pm_context_active_handle_suspend midgard_kbase_pm_context_active_handle_suspend -+#define kbase_pm_context_active midgard_kbase_pm_context_active -+#define kbase_pm_context_idle midgard_kbase_pm_context_idle -+#define kbase_pm_disable_interrupts midgard_kbase_pm_disable_interrupts -+#define kbase_pm_disable_interrupts_nolock midgard_kbase_pm_disable_interrupts_nolock -+#define kbase_pm_do_poweroff midgard_kbase_pm_do_poweroff -+#define kbase_pm_do_poweron midgard_kbase_pm_do_poweron -+#define kbasep_mem_profile_debugfs_insert midgard_kbasep_mem_profile_debugfs_insert -+#define kbasep_mem_profile_debugfs_remove midgard_kbasep_mem_profile_debugfs_remove -+#define kbase_pm_enable_interrupts midgard_kbase_pm_enable_interrupts -+#define kbase_pm_get_active_cores midgard_kbase_pm_get_active_cores -+#define kbase_pm_get_policy midgard_kbase_pm_get_policy -+#define kbase_pm_get_present_cores midgard_kbase_pm_get_present_cores -+#define kbase_pm_get_ready_cores midgard_kbase_pm_get_ready_cores -+#define kbase_pm_get_trans_cores midgard_kbase_pm_get_trans_cores -+#define kbase_pm_halt midgard_kbase_pm_halt -+#define kbase_pm_init_hw midgard_kbase_pm_init_hw -+#define kbase_pm_list_policies midgard_kbase_pm_list_policies -+#define kbase_pm_metrics_update midgard_kbase_pm_metrics_update -+#define kbase_pm_policy_init midgard_kbase_pm_policy_init -+#define kbase_pm_policy_term midgard_kbase_pm_policy_term -+#define kbase_pm_power_changed midgard_kbase_pm_power_changed -+#define kbase_pm_powerup midgard_kbase_pm_powerup -+#define kbase_pm_register_access_disable midgard_kbase_pm_register_access_disable -+#define kbase_pm_register_access_enable midgard_kbase_pm_register_access_enable -+#define kbase_pm_release_gpu_cycle_counter midgard_kbase_pm_release_gpu_cycle_counter -+#define kbase_pm_release_gpu_cycle_counter_nolock midgard_kbase_pm_release_gpu_cycle_counter_nolock -+#define kbase_pm_request_gpu_cycle_counter_l2_is_on midgard_kbase_pm_request_gpu_cycle_counter_l2_is_on -+#define kbase_pm_request_gpu_cycle_counter midgard_kbase_pm_request_gpu_cycle_counter -+#define kbase_pm_reset_done midgard_kbase_pm_reset_done -+#define kbase_pm_resume midgard_kbase_pm_resume -+#define kbase_pm_set_debug_core_mask midgard_kbase_pm_set_debug_core_mask -+#define kbase_pm_set_policy midgard_kbase_pm_set_policy -+#define kbase_pm_suspend midgard_kbase_pm_suspend -+#define kbase_pm_update_active midgard_kbase_pm_update_active -+#define kbase_pm_update_cores_state midgard_kbase_pm_update_cores_state -+#define kbase_pm_update_cores_state_nolock midgard_kbase_pm_update_cores_state_nolock -+#define kbase_pm_wait_for_poweroff_complete midgard_kbase_pm_wait_for_poweroff_complete -+#define kbasep_os_process_page_usage_update midgard_kbasep_os_process_page_usage_update -+#define kbasep_platform_device_init midgard_kbasep_platform_device_init -+#define kbasep_platform_device_term midgard_kbasep_platform_device_term -+#define kbasep_pm_metrics_init midgard_kbasep_pm_metrics_init -+#define kbasep_pm_metrics_term midgard_kbasep_pm_metrics_term -+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) -+#define kbasep_regs_history_debugfs_init midgard_kbasep_regs_history_debugfs_init -+#endif /* CONFIG_DEBUG_FS */ -+#define kbasep_remove_waiting_soft_job midgard_kbasep_remove_waiting_soft_job -+#define kbase_prepare_soft_job midgard_kbase_prepare_soft_job -+#define kbase_prepare_to_reset_gpu_locked midgard_kbase_prepare_to_reset_gpu_locked -+#define kbase_prepare_to_reset_gpu midgard_kbase_prepare_to_reset_gpu -+#define kbase_process_soft_job midgard_kbase_process_soft_job -+#define kbasep_soft_job_timeout_worker midgard_kbasep_soft_job_timeout_worker -+#define kbase_region_tracker_find_region_base_address midgard_kbase_region_tracker_find_region_base_address -+#define kbase_region_tracker_find_region_enclosing_address midgard_kbase_region_tracker_find_region_enclosing_address -+#define kbase_region_tracker_init_jit midgard_kbase_region_tracker_init_jit -+#define kbase_region_tracker_init midgard_kbase_region_tracker_init -+#define kbase_region_tracker_term midgard_kbase_region_tracker_term -+#define kbase_reg_read midgard_kbase_reg_read -+#define kbase_reg_write midgard_kbase_reg_write -+#define kbase_release_device midgard_kbase_release_device -+#define kbase_release_interrupts midgard_kbase_release_interrupts -+#define kbase_reset_gpu_locked midgard_kbase_reset_gpu_locked -+#define kbase_reset_gpu midgard_kbase_reset_gpu -+#define kbase_reset_gpu_silent midgard_kbase_reset_gpu_silent -+#define kbase_resume_suspended_soft_jobs midgard_kbase_resume_suspended_soft_jobs -+#define kbase_scale_static_power midgard_kbase_scale_static_power -+#define kbase_set_custom_irq_handler midgard_kbase_set_custom_irq_handler -+#define kbase_simple_ipa_model_ops midgard_kbase_simple_ipa_model_ops -+#define kbase_soft_event_update midgard_kbase_soft_event_update -+#define kbase_soft_event_wait_callback midgard_kbase_soft_event_wait_callback -+#define kbase_sticky_resource_acquire midgard_kbase_sticky_resource_acquire -+#define kbase_sticky_resource_init midgard_kbase_sticky_resource_init -+#define kbase_sticky_resource_release midgard_kbase_sticky_resource_release -+#define kbase_sticky_resource_term midgard_kbase_sticky_resource_term -+#define kbase_sync_fence_in_cancel_wait midgard_kbase_sync_fence_in_cancel_wait -+#define kbase_sync_fence_in_dump midgard_kbase_sync_fence_in_dump -+#define kbase_sync_fence_in_from_fd midgard_kbase_sync_fence_in_from_fd -+#define kbase_sync_fence_in_info_get midgard_kbase_sync_fence_in_info_get -+#define kbase_sync_fence_in_remove midgard_kbase_sync_fence_in_remove -+#define kbase_sync_fence_in_wait midgard_kbase_sync_fence_in_wait -+#define kbase_sync_fence_out_create midgard_kbase_sync_fence_out_create -+#define kbase_sync_fence_out_info_get midgard_kbase_sync_fence_out_info_get -+#define kbase_sync_fence_out_remove midgard_kbase_sync_fence_out_remove -+#define kbase_sync_fence_out_trigger midgard_kbase_sync_fence_out_trigger -+#define kbase_sync_fence_stream_create midgard_kbase_sync_fence_stream_create -+#define kbase_sync_fence_validate midgard_kbase_sync_fence_validate -+#define kbase_sync_fence_wait_worker midgard_kbase_sync_fence_wait_worker -+#define kbase_synchronize_irqs midgard_kbase_synchronize_irqs -+#define kbase_sync_now midgard_kbase_sync_now -+#define kbase_sync_single_for_cpu midgard_kbase_sync_single_for_cpu -+#define kbase_sync_single_for_device midgard_kbase_sync_single_for_device -+#define kbase_sync_single midgard_kbase_sync_single -+#define kbase_sync_status_string midgard_kbase_sync_status_string -+#define kbase_timeline_name midgard_kbase_timeline_name -+#define __kbase_tlstream_aux_devfreq_target midgard___kbase_tlstream_aux_devfreq_target -+#define __kbase_tlstream_aux_pagefault midgard___kbase_tlstream_aux_pagefault -+#define __kbase_tlstream_aux_pagesalloc midgard___kbase_tlstream_aux_pagesalloc -+#define __kbase_tlstream_aux_pm_state midgard___kbase_tlstream_aux_pm_state -+#define __kbase_tlstream_aux_protected_enter_end midgard___kbase_tlstream_aux_protected_enter_end -+#define __kbase_tlstream_aux_protected_enter_start midgard___kbase_tlstream_aux_protected_enter_start -+#define __kbase_tlstream_aux_protected_leave_end midgard___kbase_tlstream_aux_protected_leave_end -+#define __kbase_tlstream_aux_protected_leave_start midgard___kbase_tlstream_aux_protected_leave_start -+#define kbase_tlstream_init midgard_kbase_tlstream_init -+#define __kbase_tlstream_jd_gpu_soft_reset midgard___kbase_tlstream_jd_gpu_soft_reset -+#define kbase_tlstream_term midgard_kbase_tlstream_term -+#define __kbase_tlstream_tl_attrib_as_config midgard___kbase_tlstream_tl_attrib_as_config -+#define __kbase_tlstream_tl_attrib_atom_config midgard___kbase_tlstream_tl_attrib_atom_config -+#define __kbase_tlstream_tl_attrib_atom_jit midgard___kbase_tlstream_tl_attrib_atom_jit -+#define __kbase_tlstream_tl_attrib_atom_priority midgard___kbase_tlstream_tl_attrib_atom_priority -+#define __kbase_tlstream_tl_attrib_atom_state midgard___kbase_tlstream_tl_attrib_atom_state -+#define __kbase_tlstream_tl_del_atom midgard___kbase_tlstream_tl_del_atom -+#define __kbase_tlstream_tl_del_ctx midgard___kbase_tlstream_tl_del_ctx -+#define __kbase_tlstream_tl_event_atom_softstop_ex midgard___kbase_tlstream_tl_event_atom_softstop_ex -+#define __kbase_tlstream_tl_event_atom_softstop_issue midgard___kbase_tlstream_tl_event_atom_softstop_issue -+#define __kbase_tlstream_tl_event_lpu_softstop midgard___kbase_tlstream_tl_event_lpu_softstop -+#define __kbase_tlstream_tl_new_atom midgard___kbase_tlstream_tl_new_atom -+#define __kbase_tlstream_tl_new_ctx midgard___kbase_tlstream_tl_new_ctx -+#define __kbase_tlstream_tl_nret_as_ctx midgard___kbase_tlstream_tl_nret_as_ctx -+#define __kbase_tlstream_tl_nret_atom_as midgard___kbase_tlstream_tl_nret_atom_as -+#define __kbase_tlstream_tl_nret_atom_ctx midgard___kbase_tlstream_tl_nret_atom_ctx -+#define __kbase_tlstream_tl_nret_atom_lpu midgard___kbase_tlstream_tl_nret_atom_lpu -+#define __kbase_tlstream_tl_nret_ctx_lpu midgard___kbase_tlstream_tl_nret_ctx_lpu -+#define __kbase_tlstream_tl_ret_as_ctx midgard___kbase_tlstream_tl_ret_as_ctx -+#define __kbase_tlstream_tl_ret_atom_as midgard___kbase_tlstream_tl_ret_atom_as -+#define __kbase_tlstream_tl_ret_atom_ctx midgard___kbase_tlstream_tl_ret_atom_ctx -+#define __kbase_tlstream_tl_ret_atom_lpu midgard___kbase_tlstream_tl_ret_atom_lpu -+#define __kbase_tlstream_tl_ret_ctx_lpu midgard___kbase_tlstream_tl_ret_ctx_lpu -+#define kbase_unmap_external_resource midgard_kbase_unmap_external_resource -+#define kbase_update_region_flags midgard_kbase_update_region_flags -+#define kbase_vinstr_hwcnt_reader_setup midgard_kbase_vinstr_hwcnt_reader_setup -+#define kbase_vinstr_init midgard_kbase_vinstr_init -+#define kbase_vinstr_resume midgard_kbase_vinstr_resume -+#define kbase_vinstr_suspend midgard_kbase_vinstr_suspend -+#define kbase_vinstr_term midgard_kbase_vinstr_term -+#define kbase_vmap midgard_kbase_vmap -+#define kbase_vmap_prot midgard_kbase_vmap_prot -+#define kbase_vm_ops midgard_kbase_vm_ops -+#define kbase_vunmap midgard_kbase_vunmap -+#define _mali_profiling_control midgard__mali_profiling_control -+#define platform_funcs midgard_platform_funcs -+#define pm_callbacks midgard_pm_callbacks -+#define rk_kbase_device_runtime_disable midgard_rk_kbase_device_runtime_disable -+#define rk_kbase_device_runtime_init midgard_rk_kbase_device_runtime_init -+#endif -diff --git a/drivers/gpu/arm/midgard/sconscript b/drivers/gpu/arm/midgard/sconscript -new file mode 100755 -index 000000000..ff23d7aeb ---- /dev/null -+++ b/drivers/gpu/arm/midgard/sconscript -@@ -0,0 +1,92 @@ -+# -+# (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# + ++/* Trace atoms present in JS_NEXT */ ++#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) \ ++ do { \ ++ struct timespec64 ts; \ ++ ktime_get_raw_ts64(&ts); \ ++ trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \ ++ SW_SET_GPU_SLOT_NEXT, \ ++ (int)kctx->timeline.owner_tgid, \ ++ js, count); \ ++ } while (0) + -+import sys -+Import('env') ++/* Trace atoms present in JS_HEAD */ ++#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) \ ++ do { \ ++ struct timespec64 ts; \ ++ ktime_get_raw_ts64(&ts); \ ++ trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \ ++ SW_SET_GPU_SLOT_HEAD, \ ++ (int)kctx->timeline.owner_tgid, \ ++ js, count); \ ++ } while (0) + -+SConscript( 'tests/sconscript' ) ++/* Trace that a soft stop/evict from next is being attempted on a slot */ ++#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) \ ++ do { \ ++ struct timespec64 ts; \ ++ ktime_get_raw_ts64(&ts); \ ++ trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \ ++ SW_SET_GPU_SLOT_STOPPING, \ ++ (kctx) ? (int)kctx->timeline.owner_tgid : 0, \ ++ js, count); \ ++ } while (0) + -+mock_test = 0 + -+# Fake platform is a transient solution for GPL drivers running in kernel that does not provide configuration via platform data. -+# For such kernels fake_platform_device should be set to 1. For kernels providing platform data fake_platform_device should be set to 0. -+if env['platform_config']=='devicetree' or env['platform_config']=='juno_soc': -+ fake_platform_device = 0 -+else: -+ fake_platform_device = 1 + -+# Source files required for kbase. -+kbase_src = [ -+ Glob('*.c'), -+ Glob('backend/*/*.c'), -+ Glob('internal/*/*.c'), -+ Glob('ipa/*.c') -+] ++/* Trace state of overall GPU power */ ++#define KBASE_TIMELINE_GPU_POWER(kbdev, active) \ ++ do { \ ++ struct timespec64 ts; \ ++ ktime_get_raw_ts64(&ts); \ ++ trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ ++ SW_SET_GPU_POWER_ACTIVE, active); \ ++ } while (0) + -+if env['platform_config']=='juno_soc': -+ kbase_src += [Glob('platform/devicetree/*.c')] -+else: -+ kbase_src += [Glob('platform/%s/*.c' % env['platform_config'])] ++/* Trace state of tiler power */ ++#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) \ ++ do { \ ++ struct timespec64 ts; \ ++ ktime_get_raw_ts64(&ts); \ ++ trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ ++ SW_SET_GPU_POWER_TILER_ACTIVE, \ ++ hweight64(bitmap)); \ ++ } while (0) + -+if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1': -+ kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')] -+ mock_test = 1 ++/* Trace number of shaders currently powered */ ++#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) \ ++ do { \ ++ struct timespec64 ts; \ ++ ktime_get_raw_ts64(&ts); \ ++ trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ ++ SW_SET_GPU_POWER_SHADER_ACTIVE, \ ++ hweight64(bitmap)); \ ++ } while (0) + -+# we need platform config for GPL version using fake platform -+if fake_platform_device==1: -+ # Check if we are compiling for PBX -+ if env.KernelConfigEnabled("CONFIG_MACH_REALVIEW_PBX") and \ -+ env["platform_config"] in {"vexpress", "vexpress_6xvirtex7_10mhz"}: -+ sys.stderr.write("WARNING: Building for a PBX kernel but with platform_config=vexpress*\n") -+ # if the file platform config file is in the tpip directory then use that, otherwise use the default config directory -+ if Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config'])): -+ kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config'])) -+ else: -+ kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/*%s.c' % (env['platform_config'])) ++/* Trace state of L2 power */ ++#define KBASE_TIMELINE_POWER_L2(kbdev, bitmap) \ ++ do { \ ++ struct timespec64 ts; \ ++ ktime_get_raw_ts64(&ts); \ ++ trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ ++ SW_SET_GPU_POWER_L2_ACTIVE, \ ++ hweight64(bitmap)); \ ++ } while (0) + -+make_args = env.kernel_get_config_defines(ret_list = True, -+ fake = fake_platform_device) + [ -+ 'PLATFORM=%s' % env['platform'], -+ 'MALI_ERROR_INJECT_ON=%s' % env['error_inject'], -+ 'MALI_KERNEL_TEST_API=%s' % env['debug'], -+ 'MALI_UNIT_TEST=%s' % env['unit'], -+ 'MALI_RELEASE_NAME=%s' % env['mali_release_name'], -+ 'MALI_MOCK_TEST=%s' % mock_test, -+ 'MALI_CUSTOMER_RELEASE=%s' % env['release'], -+ 'MALI_INSTRUMENTATION_LEVEL=%s' % env['instr'], -+ 'MALI_COVERAGE=%s' % env['coverage'], -+ 'MALI_BUS_LOG=%s' % env['buslog'] -+] ++/* Trace state of L2 cache*/ ++#define KBASE_TIMELINE_POWERING_L2(kbdev) \ ++ do { \ ++ struct timespec64 ts; \ ++ ktime_get_raw_ts64(&ts); \ ++ trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec, \ ++ SW_FLOW_GPU_POWER_L2_POWERING, \ ++ 1); \ ++ } while (0) + -+kbase = env.BuildKernelModule('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src, -+ make_args = make_args) ++#define KBASE_TIMELINE_POWERED_L2(kbdev) \ ++ do { \ ++ struct timespec64 ts; \ ++ ktime_get_raw_ts64(&ts); \ ++ trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec, \ ++ SW_FLOW_GPU_POWER_L2_ACTIVE, \ ++ 1); \ ++ } while (0) + -+# Add a dependency on kds.ko. -+# Only necessary when KDS is not built into the kernel. -+# -+if env['os'] != 'android': -+ if not env.KernelConfigEnabled("CONFIG_KDS"): -+ env.Depends(kbase, '$STATIC_LIB_PATH/kds.ko') ++/* Trace kbase_pm_send_event message send */ ++#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) \ ++ do { \ ++ struct timespec64 ts; \ ++ ktime_get_raw_ts64(&ts); \ ++ trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec, \ ++ SW_FLOW_PM_SEND_EVENT, \ ++ event_type, pm_event_id); \ ++ } while (0) + -+# need Module.symvers from ump.ko build -+if int(env['ump']) == 1: -+ env.Depends(kbase, '$STATIC_LIB_PATH/ump.ko') ++/* Trace kbase_pm_worker message receive */ ++#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) \ ++ do { \ ++ struct timespec64 ts; \ ++ ktime_get_raw_ts64(&ts); \ ++ trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec, \ ++ SW_FLOW_PM_HANDLE_EVENT, \ ++ event_type, pm_event_id); \ ++ } while (0) + -+if 'smc_protected_mode_switcher' in env: -+ env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/smc_protected_mode_switcher.ko') + -+env.KernelObjTarget('kbase', kbase) ++/* Trace atom_id starting in JS_HEAD */ ++#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) \ ++ do { \ ++ struct timespec64 ts; \ ++ ktime_get_raw_ts64(&ts); \ ++ trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec, \ ++ HW_START_GPU_JOB_CHAIN_SW_APPROX, \ ++ (int)kctx->timeline.owner_tgid, \ ++ js, _consumerof_atom_number); \ ++ } while (0) + -+env.AppendUnique(BASE=['cutils_linked_list']) -diff --git a/drivers/gpu/arm/midgard/tests/Kbuild b/drivers/gpu/arm/midgard/tests/Kbuild -new file mode 100755 -index 000000000..b4bed0473 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/Kbuild -@@ -0,0 +1,17 @@ -+# -+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++/* Trace atom_id stopping on JS_HEAD */ ++#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) \ ++ do { \ ++ struct timespec64 ts; \ ++ ktime_get_raw_ts64(&ts); \ ++ trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec, \ ++ HW_STOP_GPU_JOB_CHAIN_SW_APPROX, \ ++ (int)kctx->timeline.owner_tgid, \ ++ js, _producerof_atom_number_completed); \ ++ } while (0) + ++/** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a ++ * certin caller */ ++#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) \ ++ do { \ ++ struct timespec64 ts; \ ++ ktime_get_raw_ts64(&ts); \ ++ trace_mali_timeline_pm_checktrans(ts.tv_sec, ts.tv_nsec, \ ++ trace_code, 1); \ ++ } while (0) + -+obj-$(CONFIG_MALI_KUTF) += kutf/ -+obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test/ -diff --git a/drivers/gpu/arm/midgard/tests/Kconfig b/drivers/gpu/arm/midgard/tests/Kconfig -new file mode 100644 -index 000000000..da0515c06 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/Kconfig -@@ -0,0 +1,17 @@ -+# -+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++/* Trace number of contexts active */ ++#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) \ ++ do { \ ++ struct timespec64 ts; \ ++ ktime_get_raw_ts64(&ts); \ ++ trace_mali_timeline_context_active(ts.tv_sec, ts.tv_nsec, \ ++ count); \ ++ } while (0) + ++/* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */ + -+source "drivers/gpu/arm/midgard/tests/kutf/Kconfig" -+source "drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig" -diff --git a/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h -new file mode 100644 -index 000000000..0d145e42a ---- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h -@@ -0,0 +1,65 @@ -+/* ++/** ++ * Trace that an atom is starting on a job slot + * -+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * The caller must be holding hwaccess_lock ++ */ ++void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, ++ struct kbase_jd_atom *katom, int js); ++ ++/** ++ * Trace that an atom has done on a job slot + * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++ * 'Done' in this sense can occur either because: ++ * - the atom in JS_HEAD finished ++ * - the atom in JS_NEXT was evicted + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * Whether the atom finished or was evicted is passed in @a done_code ++ * ++ * It is assumed that the atom has already been removed from the submit slot, ++ * with either: ++ * - kbasep_jm_dequeue_submit_slot() ++ * - kbasep_jm_dequeue_tail_submit_slot() + * ++ * The caller must be holding hwaccess_lock + */ ++void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx, ++ struct kbase_jd_atom *katom, int js, ++ kbasep_js_atom_done_code done_code); + + ++/** Trace a pm event starting */ ++void kbase_timeline_pm_send_event(struct kbase_device *kbdev, ++ enum kbase_timeline_pm_event event_sent); + -+#ifndef _KERNEL_UTF_MEM_H_ -+#define _KERNEL_UTF_MEM_H_ ++/** Trace a pm event finishing */ ++void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event); + -+/* kutf_mem.h -+ * Functions for management of memory pools in the kernel. -+ * -+ * This module implements a memory pool allocator, allowing a test -+ * implementation to allocate linked allocations which can then be freed by a -+ * single free which releases all of the resources held by the entire pool. -+ * -+ * Note that it is not possible to free single resources within the pool once -+ * allocated. -+ */ ++/** Check whether a pm event was present, and if so trace finishing it */ ++void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event); + -+#include ++/** Trace L2 power-up start */ ++void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev); + -+/** -+ * struct kutf_mempool - the memory pool context management structure -+ * @head: list head on which the allocations in this context are added to -+ * -+ */ -+struct kutf_mempool { -+ struct list_head head; -+}; ++/** Trace L2 power-up done */ ++void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev); + -+/** -+ * kutf_mempool_init() - Initialize a memory pool. -+ * @pool: Memory pool structure to initialize, provided by the user -+ * -+ * Return: zero on success -+ */ -+int kutf_mempool_init(struct kutf_mempool *pool); ++#else + -+/** -+ * kutf_mempool_alloc() - Allocate memory from a pool -+ * @pool: Memory pool to allocate from -+ * @size: Size of memory wanted in number of bytes -+ * -+ * Return: Pointer to memory on success, NULL on failure. -+ */ -+void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size); ++#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) CSTD_NOP() + -+/** -+ * kutf_mempool_destroy() - Destroy a memory pool, freeing all memory within it. -+ * @pool: The memory pool to free -+ */ -+void kutf_mempool_destroy(struct kutf_mempool *pool); -+#endif /* _KERNEL_UTF_MEM_H_ */ -diff --git a/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h ++#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) CSTD_NOP() ++ ++#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) CSTD_NOP() ++ ++#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) CSTD_NOP() ++ ++#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) CSTD_NOP() ++ ++#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) CSTD_NOP() ++ ++#define KBASE_TIMELINE_GPU_POWER(kbdev, active) CSTD_NOP() ++ ++#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) CSTD_NOP() ++ ++#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) CSTD_NOP() ++ ++#define KBASE_TIMELINE_POWER_L2(kbdev, active) CSTD_NOP() ++ ++#define KBASE_TIMELINE_POWERING_L2(kbdev) CSTD_NOP() ++ ++#define KBASE_TIMELINE_POWERED_L2(kbdev) CSTD_NOP() ++ ++#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP() ++ ++#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP() ++ ++#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) CSTD_NOP() ++ ++#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) CSTD_NOP() ++ ++#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) CSTD_NOP() ++ ++#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP() ++ ++static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, ++ struct kbase_jd_atom *katom, int js) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++} ++ ++static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx, ++ struct kbase_jd_atom *katom, int js, ++ kbasep_js_atom_done_code done_code) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++} ++ ++static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent) ++{ ++} ++ ++static inline void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event) ++{ ++} ++ ++static inline void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event) ++{ ++} ++ ++static inline void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev) ++{ ++} ++ ++static inline void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev) ++{ ++} ++#endif /* CONFIG_MALI_TRACE_TIMELINE */ ++ ++#endif /* _KBASE_TRACE_TIMELINE_H */ ++ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h new file mode 100644 -index 000000000..1cc85f1b7 +index 000000000..156a95a67 --- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h -@@ -0,0 +1,121 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h +@@ -0,0 +1,140 @@ +/* + * -+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -405579,119 +406921,138 @@ index 000000000..1cc85f1b7 + + + -+#ifndef _KERNEL_UTF_RESULTSET_H_ -+#define _KERNEL_UTF_RESULTSET_H_ -+ -+/* kutf_resultset.h -+ * Functions and structures for handling test results and result sets. -+ * -+ * This section of the kernel UTF contains structures and functions used for the -+ * management of Results and Result Sets. -+ */ -+ -+/** -+ * enum kutf_result_status - Status values for a single Test error. -+ * @KUTF_RESULT_BENCHMARK: Result is a meta-result containing benchmark -+ * results. -+ * @KUTF_RESULT_SKIP: The test was skipped. -+ * @KUTF_RESULT_UNKNOWN: The test has an unknown result. -+ * @KUTF_RESULT_PASS: The test result passed. -+ * @KUTF_RESULT_DEBUG: The test result passed, but raised a debug -+ * message. -+ * @KUTF_RESULT_INFO: The test result passed, but raised -+ * an informative message. -+ * @KUTF_RESULT_WARN: The test result passed, but raised a warning -+ * message. -+ * @KUTF_RESULT_FAIL: The test result failed with a non-fatal error. -+ * @KUTF_RESULT_FATAL: The test result failed with a fatal error. -+ * @KUTF_RESULT_ABORT: The test result failed due to a non-UTF -+ * assertion failure. -+ * @KUTF_RESULT_COUNT: The current number of possible status messages. -+ */ -+enum kutf_result_status { -+ KUTF_RESULT_BENCHMARK = -3, -+ KUTF_RESULT_SKIP = -2, -+ KUTF_RESULT_UNKNOWN = -1, -+ -+ KUTF_RESULT_PASS = 0, -+ KUTF_RESULT_DEBUG = 1, -+ KUTF_RESULT_INFO = 2, -+ KUTF_RESULT_WARN = 3, -+ KUTF_RESULT_FAIL = 4, -+ KUTF_RESULT_FATAL = 5, -+ KUTF_RESULT_ABORT = 6, -+ -+ KUTF_RESULT_COUNT -+}; -+ -+/* The maximum size of a kutf_result_status result when -+ * converted to a string -+ */ -+#define KUTF_ERROR_MAX_NAME_SIZE 21 -+ -+#ifdef __KERNEL__ + -+#include + -+/** -+ * struct kutf_result - Represents a single test result. -+ * @node: Next result in the list of results. -+ * @status: The status summary (pass / warn / fail / etc). -+ * @message: A more verbose status message. -+ */ -+struct kutf_result { -+ struct list_head node; -+ enum kutf_result_status status; -+ const char *message; -+}; ++/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** ++ * ***** DO NOT INCLUDE DIRECTLY ***** ++ * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ + -+/** -+ * kutf_create_result_set() - Create a new result set -+ * to which results can be added. ++/* ++ * Conventions on Event Names: + * -+ * Return: The created resultset. ++ * - The prefix determines something about how the timeline should be ++ * displayed, and is split up into various parts, separated by underscores: ++ * - 'SW' and 'HW' as the first part will be used to determine whether a ++ * timeline is to do with Software or Hardware - effectively, separate ++ * 'channels' for Software and Hardware ++ * - 'START', 'STOP', 'ENTER', 'LEAVE' can be used in the second part, and ++ * signify related pairs of events - these are optional. ++ * - 'FLOW' indicates a generic event, which can use dependencies ++ * - This gives events such as: ++ * - 'SW_ENTER_FOO' ++ * - 'SW_LEAVE_FOO' ++ * - 'SW_FLOW_BAR_1' ++ * - 'SW_FLOW_BAR_2' ++ * - 'HW_START_BAZ' ++ * - 'HW_STOP_BAZ' ++ * - And an unadorned HW event: ++ * - 'HW_BAZ_FROZBOZ' + */ -+struct kutf_result_set *kutf_create_result_set(void); + -+/** -+ * kutf_add_result() - Add a result to the end of an existing resultset. ++/* ++ * Conventions on parameter names: ++ * - anything with 'instance' in the name will have a separate timeline based ++ * on that instances. ++ * - underscored-prefixed parameters will by hidden by default on timelines + * -+ * @mempool: The memory pool to allocate the result storage from. -+ * @set: The resultset to add the result to. -+ * @status: The result status to add. -+ * @message: The result message to add. -+ */ -+void kutf_add_result(struct kutf_mempool *mempool, struct kutf_result_set *set, -+ enum kutf_result_status status, const char *message); -+ -+/** -+ * kutf_remove_result() - Remove a result from the head of a resultset. -+ * @set: The resultset. ++ * Hence: ++ * - Different job slots have their own 'instance', based on the instance value ++ * - Per-context info (e.g. atoms on a context) have their own 'instance' ++ * (i.e. each context should be on a different timeline) + * -+ * Return: result or NULL if there are no further results in the resultset. ++ * Note that globally-shared resources can be tagged with a tgid, but we don't ++ * want an instance per context: ++ * - There's no point having separate Job Slot timelines for each context, that ++ * would be confusing - there's only really 3 job slots! ++ * - There's no point having separate Shader-powered timelines for each ++ * context, that would be confusing - all shader cores (whether it be 4, 8, ++ * etc) are shared in the system. + */ -+struct kutf_result *kutf_remove_result( -+ struct kutf_result_set *set); + -+/** -+ * kutf_destroy_result_set() - Free a previously created resultset. -+ * -+ * @results: The result set whose resources to free. -+ */ -+void kutf_destroy_result_set(struct kutf_result_set *results); ++ /* ++ * CTX events ++ */ ++ /* Separate timelines for each context 'instance'*/ ++ KBASE_TIMELINE_TRACE_CODE(CTX_SET_NR_ATOMS_IN_FLIGHT, "CTX: Atoms in flight", "%d,%d", "_instance_tgid,_value_number_of_atoms"), ++ KBASE_TIMELINE_TRACE_CODE(CTX_FLOW_ATOM_READY, "CTX: Atoms Ready to Run", "%d,%d,%d", "_instance_tgid,_consumerof_atom_number,_producerof_atom_number_ready"), + -+#endif /* __KERNEL__ */ ++ /* ++ * SW Events ++ */ ++ /* Separate timelines for each slot 'instance' */ ++ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_ACTIVE, "SW: GPU slot active", "%d,%d,%d", "_tgid,_instance_slot,_value_number_of_atoms"), ++ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_NEXT, "SW: GPU atom in NEXT", "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_next"), ++ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_HEAD, "SW: GPU atom in HEAD", "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_head"), ++ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_STOPPING, "SW: Try Soft-Stop on GPU slot", "%d,%d,%d", "_tgid,_instance_slot,_value_is_slot_stopping"), ++ /* Shader and overall power is shared - can't have separate instances of ++ * it, just tagging with the context */ ++ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_ACTIVE, "SW: GPU power active", "%d,%d", "_tgid,_value_is_power_active"), ++ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_TILER_ACTIVE, "SW: GPU tiler powered", "%d,%d", "_tgid,_value_number_of_tilers"), ++ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_SHADER_ACTIVE, "SW: GPU shaders powered", "%d,%d", "_tgid,_value_number_of_shaders"), ++ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_L2_ACTIVE, "SW: GPU L2 powered", "%d,%d", "_tgid,_value_number_of_l2"), + -+#endif /* _KERNEL_UTF_RESULTSET_H_ */ -diff --git a/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h ++ /* SW Power event messaging. _event_type is one from the kbase_pm_event enum */ ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_SEND_EVENT, "SW: PM Send Event", "%d,%d,%d", "_tgid,_event_type,_writerof_pm_event_id"), ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_HANDLE_EVENT, "SW: PM Handle Event", "%d,%d,%d", "_tgid,_event_type,_finalconsumerof_pm_event_id"), ++ /* SW L2 power events */ ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_POWERING, "SW: GPU L2 powering", "%d,%d", "_tgid,_writerof_l2_transitioning"), ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_ACTIVE, "SW: GPU L2 powering done", "%d,%d", "_tgid,_finalconsumerof_l2_transitioning"), ++ ++ KBASE_TIMELINE_TRACE_CODE(SW_SET_CONTEXT_ACTIVE, "SW: Context Active", "%d,%d", "_tgid,_value_active"), ++ ++ /* ++ * BEGIN: Significant SW Functions that call kbase_pm_check_transitions_nolock() ++ */ ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweroff"), ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweroff"), ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweron"), ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweron"), ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_writerof_pm_checktrans_gpu_interrupt"), ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_gpu_interrupt"), ++ ++ /* ++ * Significant Indirect callers of kbase_pm_check_transitions_nolock() ++ */ ++ /* kbase_pm_request_cores */ ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader"), ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader"), ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_tiler"), ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_tiler"), ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader_tiler"), ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader_tiler"), ++ /* kbase_pm_release_cores */ ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader"), ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader"), ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_tiler"), ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_tiler"), ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader_tiler"), ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader_tiler"), ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_shader_poweroff_callback"), ++ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_shader_poweroff_callback"), ++ /* ++ * END: SW Functions that call kbase_pm_check_transitions_nolock() ++ */ ++ ++ /* ++ * HW Events ++ */ ++ KBASE_TIMELINE_TRACE_CODE(HW_MMU_FAULT, ++"HW: MMU Fault", "%d,%d,%d", "_tgid,fault_type,fault_stage,asid"), ++ KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX, ++"HW: Job Chain start (SW approximated)", "%d,%d,%d", ++"_tgid,job_slot,_consumerof_atom_number_ready"), ++ KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX, ++"HW: Job Chain stop (SW approximated)", "%d,%d,%d", ++"_tgid,job_slot,_producerof_atom_number_completed") +diff --git a/drivers/gpu/arm/midgard/mali_kbase_uku.h b/drivers/gpu/arm/midgard/mali_kbase_uku.h new file mode 100644 -index 000000000..754c3adb1 +index 000000000..c22a59324 --- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h -@@ -0,0 +1,508 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_uku.h +@@ -0,0 +1,545 @@ +/* + * -+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2008-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -405706,656 +407067,543 @@ index 000000000..754c3adb1 + + + -+#ifndef _KERNEL_UTF_SUITE_H_ -+#define _KERNEL_UTF_SUITE_H_ + -+/* kutf_suite.h -+ * Functions for management of test suites. -+ * -+ * This collection of data structures, macros, and functions are used to -+ * create Test Suites, Tests within those Test Suites, and Fixture variants -+ * of each test. -+ */ + -+#include -+#include ++#ifndef _KBASE_UKU_H_ ++#define _KBASE_UKU_H_ + -+/** -+ * Pseudo-flag indicating an absence of any specified test class. Note that -+ * tests should not be annotated with this constant as it is simply a zero -+ * value; tests without a more specific class must be marked with the flag -+ * KUTF_F_TEST_GENERIC. -+ */ -+#define KUTF_F_TEST_NONE ((unsigned int)(0)) ++#include "mali_uk.h" ++#include "mali_base_kernel.h" + -+/** -+ * Class indicating this test is a smoke test. -+ * A given set of smoke tests should be quick to run, enabling rapid turn-around -+ * of "regress-on-commit" test runs. -+ */ -+#define KUTF_F_TEST_SMOKETEST ((unsigned int)(1 << 1)) ++/* This file needs to support being included from kernel and userside (which use different defines) */ ++#if defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON ++#define SUPPORT_MALI_ERROR_INJECT ++#endif /* defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON */ ++#if defined(CONFIG_MALI_NO_MALI) ++#define SUPPORT_MALI_NO_MALI ++#elif defined(MALI_NO_MALI) ++#if MALI_NO_MALI ++#define SUPPORT_MALI_NO_MALI ++#endif ++#endif + -+/** -+ * Class indicating this test is a performance test. -+ * These tests typically produce a performance metric, such as "time to run" or -+ * "frames per second", -+ */ -+#define KUTF_F_TEST_PERFORMANCE ((unsigned int)(1 << 2)) ++#if defined(SUPPORT_MALI_NO_MALI) || defined(SUPPORT_MALI_ERROR_INJECT) ++#include "backend/gpu/mali_kbase_model_dummy.h" ++#endif + -+/** -+ * Class indicating that this test is a deprecated test. -+ * These tests have typically been replaced by an alternative test which is -+ * more efficient, or has better coverage. -+ */ -+#define KUTF_F_TEST_DEPRECATED ((unsigned int)(1 << 3)) ++#include "mali_kbase_gpuprops_types.h" + -+/** -+ * Class indicating that this test is a known failure. -+ * These tests have typically been run and failed, but marking them as a known -+ * failure means it is easier to triage results. ++/* ++ * 10.1: ++ * - Do mmap in kernel for SAME_VA memory allocations rather then ++ * calling back into the kernel as a 2nd stage of the allocation request. + * -+ * It is typically more convenient to triage known failures using the -+ * results database and web UI, as this means there is no need to modify the -+ * test code. ++ * 10.2: ++ * - Add KBASE_FUNC_MEM_JIT_INIT which allows clients to request a custom VA ++ * region for use with JIT (ignored on 32-bit platforms) ++ * ++ * 10.3: ++ * - base_jd_core_req typedef-ed to u32 (instead of to u16) ++ * - two flags added: BASE_JD_REQ_SKIP_CACHE_STAT / _END ++ * ++ * 10.4: ++ * - Removed KBASE_FUNC_EXT_BUFFER_LOCK used only in internal tests ++ * ++ * 10.5: ++ * - Reverted to performing mmap in user space so that tools like valgrind work. ++ * ++ * 10.6: ++ * - Add flags input variable to KBASE_FUNC_TLSTREAM_ACQUIRE + */ -+#define KUTF_F_TEST_EXPECTED_FAILURE ((unsigned int)(1 << 4)) ++#define BASE_UK_VERSION_MAJOR 10 ++#define BASE_UK_VERSION_MINOR 6 + -+/** -+ * Class indicating that this test is a generic test, which is not a member of -+ * a more specific test class. Tests which are not created with a specific set -+ * of filter flags by the user are assigned this test class by default. -+ */ -+#define KUTF_F_TEST_GENERIC ((unsigned int)(1 << 5)) ++#define LINUX_UK_BASE_MAGIC 0x80 + -+/** -+ * Class indicating this test is a resource allocation failure test. -+ * A resource allocation failure test will test that an error code is -+ * correctly propagated when an allocation fails. -+ */ -+#define KUTF_F_TEST_RESFAIL ((unsigned int)(1 << 6)) ++struct kbase_uk_mem_alloc { ++ union uk_header header; ++ /* IN */ ++ u64 va_pages; ++ u64 commit_pages; ++ u64 extent; ++ /* IN/OUT */ ++ u64 flags; ++ /* OUT */ ++ u64 gpu_va; ++ u16 va_alignment; ++ u8 padding[6]; ++}; + -+/** -+ * Additional flag indicating that this test is an expected failure when -+ * run in resource failure mode. These tests are never run when running -+ * the low resource mode. -+ */ -+#define KUTF_F_TEST_EXPECTED_FAILURE_RF ((unsigned int)(1 << 7)) ++struct kbase_uk_mem_free { ++ union uk_header header; ++ /* IN */ ++ u64 gpu_addr; ++ /* OUT */ ++}; + -+/** -+ * Flag reserved for user-defined filter zero. -+ */ -+#define KUTF_F_TEST_USER_0 ((unsigned int)(1 << 24)) ++struct kbase_uk_mem_alias { ++ union uk_header header; ++ /* IN/OUT */ ++ u64 flags; ++ /* IN */ ++ u64 stride; ++ u64 nents; ++ union kbase_pointer ai; ++ /* OUT */ ++ u64 gpu_va; ++ u64 va_pages; ++}; + -+/** -+ * Flag reserved for user-defined filter one. -+ */ -+#define KUTF_F_TEST_USER_1 ((unsigned int)(1 << 25)) ++struct kbase_uk_mem_import { ++ union uk_header header; ++ /* IN */ ++ union kbase_pointer phandle; ++ u32 type; ++ u32 padding; ++ /* IN/OUT */ ++ u64 flags; ++ /* OUT */ ++ u64 gpu_va; ++ u64 va_pages; ++}; + -+/** -+ * Flag reserved for user-defined filter two. -+ */ -+#define KUTF_F_TEST_USER_2 ((unsigned int)(1 << 26)) ++struct kbase_uk_mem_flags_change { ++ union uk_header header; ++ /* IN */ ++ u64 gpu_va; ++ u64 flags; ++ u64 mask; ++}; + -+/** -+ * Flag reserved for user-defined filter three. -+ */ -+#define KUTF_F_TEST_USER_3 ((unsigned int)(1 << 27)) ++struct kbase_uk_job_submit { ++ union uk_header header; ++ /* IN */ ++ union kbase_pointer addr; ++ u32 nr_atoms; ++ u32 stride; /* bytes between atoms, i.e. sizeof(base_jd_atom_v2) */ ++ /* OUT */ ++}; + -+/** -+ * Flag reserved for user-defined filter four. -+ */ -+#define KUTF_F_TEST_USER_4 ((unsigned int)(1 << 28)) ++struct kbase_uk_post_term { ++ union uk_header header; ++}; + -+/** -+ * Flag reserved for user-defined filter five. -+ */ -+#define KUTF_F_TEST_USER_5 ((unsigned int)(1 << 29)) ++struct kbase_uk_sync_now { ++ union uk_header header; + -+/** -+ * Flag reserved for user-defined filter six. -+ */ -+#define KUTF_F_TEST_USER_6 ((unsigned int)(1 << 30)) ++ /* IN */ ++ struct base_syncset sset; + -+/** -+ * Flag reserved for user-defined filter seven. -+ */ -+#define KUTF_F_TEST_USER_7 ((unsigned int)(1 << 31)) ++ /* OUT */ ++}; + -+/** -+ * Pseudo-flag indicating that all test classes should be executed. -+ */ -+#define KUTF_F_TEST_ALL ((unsigned int)(0xFFFFFFFFU)) ++struct kbase_uk_hwcnt_setup { ++ union uk_header header; + -+/** -+ * union kutf_callback_data - Union used to store test callback data -+ * @ptr_value: pointer to the location where test callback data -+ * are stored -+ * @u32_value: a number which represents test callback data -+ */ -+union kutf_callback_data { -+ void *ptr_value; -+ u32 u32_value; ++ /* IN */ ++ u64 dump_buffer; ++ u32 jm_bm; ++ u32 shader_bm; ++ u32 tiler_bm; ++ u32 unused_1; /* keep for backwards compatibility */ ++ u32 mmu_l2_bm; ++ u32 padding; ++ /* OUT */ +}; + +/** -+ * struct kutf_context - Structure representing a kernel test context -+ * @suite: Convenience pointer to the suite this context -+ * is running -+ * @test_fix: The fixture that is being run in this context -+ * @fixture_pool: The memory pool used for the duration of -+ * the fixture/text context. -+ * @fixture: The user provided fixture structure. -+ * @fixture_index: The index (id) of the current fixture. -+ * @fixture_name: The name of the current fixture (or NULL if unnamed). -+ * @test_data: Any user private data associated with this test -+ * @result_set: All the results logged by this test context -+ * @status: The status of the currently running fixture. -+ * @expected_status: The expected status on exist of the currently -+ * running fixture. ++ * struct kbase_uk_hwcnt_reader_setup - User/Kernel space data exchange structure ++ * @header: UK structure header ++ * @buffer_count: requested number of dumping buffers ++ * @jm_bm: counters selection bitmask (JM) ++ * @shader_bm: counters selection bitmask (Shader) ++ * @tiler_bm: counters selection bitmask (Tiler) ++ * @mmu_l2_bm: counters selection bitmask (MMU_L2) ++ * @fd: dumping notification file descriptor ++ * ++ * This structure sets up HWC dumper/reader for this context. ++ * Multiple instances can be created for single context. + */ -+struct kutf_context { -+ struct kutf_suite *suite; -+ struct kutf_test_fixture *test_fix; -+ struct kutf_mempool fixture_pool; -+ void *fixture; -+ unsigned int fixture_index; -+ const char *fixture_name; -+ union kutf_callback_data test_data; -+ struct kutf_result_set *result_set; -+ enum kutf_result_status status; -+ enum kutf_result_status expected_status; ++struct kbase_uk_hwcnt_reader_setup { ++ union uk_header header; ++ ++ /* IN */ ++ u32 buffer_count; ++ u32 jm_bm; ++ u32 shader_bm; ++ u32 tiler_bm; ++ u32 mmu_l2_bm; ++ ++ /* OUT */ ++ s32 fd; +}; + -+/** -+ * struct kutf_suite - Structure representing a kernel test suite -+ * @app: The application this suite belongs to. -+ * @name: The name of this suite. -+ * @suite_data: Any user private data associated with this -+ * suite. -+ * @create_fixture: Function used to create a new fixture instance -+ * @remove_fixture: Function used to destroy a new fixture instance -+ * @fixture_variants: The number of variants (must be at least 1). -+ * @suite_default_flags: Suite global filter flags which are set on -+ * all tests. -+ * @node: List node for suite_list -+ * @dir: The debugfs directory for this suite -+ * @test_list: List head to store all the tests which are -+ * part of this suite -+ */ -+struct kutf_suite { -+ struct kutf_application *app; -+ const char *name; -+ union kutf_callback_data suite_data; -+ void *(*create_fixture)(struct kutf_context *context); -+ void (*remove_fixture)(struct kutf_context *context); -+ unsigned int fixture_variants; -+ unsigned int suite_default_flags; -+ struct list_head node; -+ struct dentry *dir; -+ struct list_head test_list; ++struct kbase_uk_hwcnt_dump { ++ union uk_header header; +}; + -+/* ============================================================================ -+ Application functions -+============================================================================ */ ++struct kbase_uk_hwcnt_clear { ++ union uk_header header; ++}; + -+/** -+ * kutf_create_application() - Create an in kernel test application. -+ * @name: The name of the test application. -+ * -+ * Return: pointer to the kutf_application on success or NULL -+ * on failure -+ */ -+struct kutf_application *kutf_create_application(const char *name); ++struct kbase_uk_fence_validate { ++ union uk_header header; ++ /* IN */ ++ s32 fd; ++ u32 padding; ++ /* OUT */ ++}; + -+/** -+ * kutf_destroy_application() - Destroy an in kernel test application. -+ * -+ * @app: The test application to destroy. -+ */ -+void kutf_destroy_application(struct kutf_application *app); ++struct kbase_uk_stream_create { ++ union uk_header header; ++ /* IN */ ++ char name[32]; ++ /* OUT */ ++ s32 fd; ++ u32 padding; ++}; + -+/* ============================================================================ -+ Suite functions -+============================================================================ */ ++struct kbase_uk_gpuprops { ++ union uk_header header; + -+/** -+ * kutf_create_suite() - Create a kernel test suite. -+ * @app: The test application to create the suite in. -+ * @name: The name of the suite. -+ * @fixture_count: The number of fixtures to run over the test -+ * functions in this suite -+ * @create_fixture: Callback used to create a fixture. The returned value -+ * is stored in the fixture pointer in the context for -+ * use in the test functions. -+ * @remove_fixture: Callback used to remove a previously created fixture. -+ * -+ * Suite names must be unique. Should two suites with the same name be -+ * registered with the same application then this function will fail, if they -+ * are registered with different applications then the function will not detect -+ * this and the call will succeed. -+ * -+ * Return: pointer to the created kutf_suite on success or NULL -+ * on failure -+ */ -+struct kutf_suite *kutf_create_suite( -+ struct kutf_application *app, -+ const char *name, -+ unsigned int fixture_count, -+ void *(*create_fixture)(struct kutf_context *context), -+ void (*remove_fixture)(struct kutf_context *context)); ++ /* IN */ ++ struct mali_base_gpu_props props; ++ /* OUT */ ++}; + -+/** -+ * kutf_create_suite_with_filters() - Create a kernel test suite with user -+ * defined default filters. -+ * @app: The test application to create the suite in. -+ * @name: The name of the suite. -+ * @fixture_count: The number of fixtures to run over the test -+ * functions in this suite -+ * @create_fixture: Callback used to create a fixture. The returned value -+ * is stored in the fixture pointer in the context for -+ * use in the test functions. -+ * @remove_fixture: Callback used to remove a previously created fixture. -+ * @filters: Filters to apply to a test if it doesn't provide its own -+ * -+ * Suite names must be unique. Should two suites with the same name be -+ * registered with the same application then this function will fail, if they -+ * are registered with different applications then the function will not detect -+ * this and the call will succeed. -+ * -+ * Return: pointer to the created kutf_suite on success or NULL on failure -+ */ -+struct kutf_suite *kutf_create_suite_with_filters( -+ struct kutf_application *app, -+ const char *name, -+ unsigned int fixture_count, -+ void *(*create_fixture)(struct kutf_context *context), -+ void (*remove_fixture)(struct kutf_context *context), -+ unsigned int filters); ++struct kbase_uk_mem_query { ++ union uk_header header; ++ /* IN */ ++ u64 gpu_addr; ++#define KBASE_MEM_QUERY_COMMIT_SIZE 1 ++#define KBASE_MEM_QUERY_VA_SIZE 2 ++#define KBASE_MEM_QUERY_FLAGS 3 ++ u64 query; ++ /* OUT */ ++ u64 value; ++}; + -+/** -+ * kutf_create_suite_with_filters_and_data() - Create a kernel test suite with -+ * user defined default filters. -+ * @app: The test application to create the suite in. -+ * @name: The name of the suite. -+ * @fixture_count: The number of fixtures to run over the test -+ * functions in this suite -+ * @create_fixture: Callback used to create a fixture. The returned value -+ * is stored in the fixture pointer in the context for -+ * use in the test functions. -+ * @remove_fixture: Callback used to remove a previously created fixture. -+ * @filters: Filters to apply to a test if it doesn't provide its own -+ * @suite_data: Suite specific callback data, provided during the -+ * running of the test in the kutf_context -+ * -+ * Return: pointer to the created kutf_suite on success or NULL -+ * on failure -+ */ -+struct kutf_suite *kutf_create_suite_with_filters_and_data( -+ struct kutf_application *app, -+ const char *name, -+ unsigned int fixture_count, -+ void *(*create_fixture)(struct kutf_context *context), -+ void (*remove_fixture)(struct kutf_context *context), -+ unsigned int filters, -+ union kutf_callback_data suite_data); ++struct kbase_uk_mem_commit { ++ union uk_header header; ++ /* IN */ ++ u64 gpu_addr; ++ u64 pages; ++ /* OUT */ ++ u32 result_subcode; ++ u32 padding; ++}; + -+/** -+ * kutf_add_test() - Add a test to a kernel test suite. -+ * @suite: The suite to add the test to. -+ * @id: The ID of the test. -+ * @name: The name of the test. -+ * @execute: Callback to the test function to run. -+ * -+ * Note: As no filters are provided the test will use the suite filters instead -+ */ -+void kutf_add_test(struct kutf_suite *suite, -+ unsigned int id, -+ const char *name, -+ void (*execute)(struct kutf_context *context)); ++struct kbase_uk_find_cpu_offset { ++ union uk_header header; ++ /* IN */ ++ u64 gpu_addr; ++ u64 cpu_addr; ++ u64 size; ++ /* OUT */ ++ u64 offset; ++}; + -+/** -+ * kutf_add_test_with_filters() - Add a test to a kernel test suite with filters -+ * @suite: The suite to add the test to. -+ * @id: The ID of the test. -+ * @name: The name of the test. -+ * @execute: Callback to the test function to run. -+ * @filters: A set of filtering flags, assigning test categories. -+ */ -+void kutf_add_test_with_filters(struct kutf_suite *suite, -+ unsigned int id, -+ const char *name, -+ void (*execute)(struct kutf_context *context), -+ unsigned int filters); ++#define KBASE_GET_VERSION_BUFFER_SIZE 64 ++struct kbase_uk_get_ddk_version { ++ union uk_header header; ++ /* OUT */ ++ char version_buffer[KBASE_GET_VERSION_BUFFER_SIZE]; ++ u32 version_string_size; ++ u32 padding; ++ u32 rk_version; ++}; + -+/** -+ * kutf_add_test_with_filters_and_data() - Add a test to a kernel test suite -+ * with filters. -+ * @suite: The suite to add the test to. -+ * @id: The ID of the test. -+ * @name: The name of the test. -+ * @execute: Callback to the test function to run. -+ * @filters: A set of filtering flags, assigning test categories. -+ * @test_data: Test specific callback data, provoided during the -+ * running of the test in the kutf_context -+ */ -+void kutf_add_test_with_filters_and_data( -+ struct kutf_suite *suite, -+ unsigned int id, -+ const char *name, -+ void (*execute)(struct kutf_context *context), -+ unsigned int filters, -+ union kutf_callback_data test_data); ++struct kbase_uk_disjoint_query { ++ union uk_header header; ++ /* OUT */ ++ u32 counter; ++ u32 padding; ++}; + -+/* ============================================================================ -+ Test functions -+============================================================================ */ -+/** -+ * kutf_test_log_result_external() - Log a result which has been created -+ * externally into a in a standard form -+ * recognized by the log parser. -+ * @context: The test context the test is running in -+ * @message: The message for this result -+ * @new_status: The result status of this log message -+ */ -+void kutf_test_log_result_external( -+ struct kutf_context *context, -+ const char *message, -+ enum kutf_result_status new_status); ++struct kbase_uk_set_flags { ++ union uk_header header; ++ /* IN */ ++ u32 create_flags; ++ u32 padding; ++}; + -+/** -+ * kutf_test_expect_abort() - Tell the kernel that you expect the current -+ * fixture to produce an abort. -+ * @context: The test context this test is running in. -+ */ -+void kutf_test_expect_abort(struct kutf_context *context); ++#if MALI_UNIT_TEST ++#define TEST_ADDR_COUNT 4 ++#define KBASE_TEST_BUFFER_SIZE 128 ++struct kbase_exported_test_data { ++ u64 test_addr[TEST_ADDR_COUNT]; /**< memory address */ ++ u32 test_addr_pages[TEST_ADDR_COUNT]; /**< memory size in pages */ ++ union kbase_pointer kctx; /**< base context created by process */ ++ union kbase_pointer mm; /**< pointer to process address space */ ++ u8 buffer1[KBASE_TEST_BUFFER_SIZE]; /**< unit test defined parameter */ ++ u8 buffer2[KBASE_TEST_BUFFER_SIZE]; /**< unit test defined parameter */ ++}; + -+/** -+ * kutf_test_expect_fatal() - Tell the kernel that you expect the current -+ * fixture to produce a fatal error. -+ * @context: The test context this test is running in. -+ */ -+void kutf_test_expect_fatal(struct kutf_context *context); ++struct kbase_uk_set_test_data { ++ union uk_header header; ++ /* IN */ ++ struct kbase_exported_test_data test_data; ++}; + -+/** -+ * kutf_test_expect_fail() - Tell the kernel that you expect the current -+ * fixture to fail. -+ * @context: The test context this test is running in. -+ */ -+void kutf_test_expect_fail(struct kutf_context *context); ++#endif /* MALI_UNIT_TEST */ + -+/** -+ * kutf_test_expect_warn() - Tell the kernel that you expect the current -+ * fixture to produce a warning. -+ * @context: The test context this test is running in. -+ */ -+void kutf_test_expect_warn(struct kutf_context *context); ++#ifdef SUPPORT_MALI_ERROR_INJECT ++struct kbase_uk_error_params { ++ union uk_header header; ++ /* IN */ ++ struct kbase_error_params params; ++}; ++#endif /* SUPPORT_MALI_ERROR_INJECT */ + -+/** -+ * kutf_test_expect_pass() - Tell the kernel that you expect the current -+ * fixture to pass. -+ * @context: The test context this test is running in. -+ */ -+void kutf_test_expect_pass(struct kutf_context *context); ++#ifdef SUPPORT_MALI_NO_MALI ++struct kbase_uk_model_control_params { ++ union uk_header header; ++ /* IN */ ++ struct kbase_model_control_params params; ++}; ++#endif /* SUPPORT_MALI_NO_MALI */ + -+/** -+ * kutf_test_skip() - Tell the kernel that the test should be skipped. -+ * @context: The test context this test is running in. -+ */ -+void kutf_test_skip(struct kutf_context *context); ++#ifdef BASE_LEGACY_UK8_SUPPORT ++struct kbase_uk_keep_gpu_powered { ++ union uk_header header; ++ u32 enabled; ++ u32 padding; ++}; ++#endif /* BASE_LEGACY_UK8_SUPPORT */ ++ ++struct kbase_uk_profiling_controls { ++ union uk_header header; ++ u32 profiling_controls[FBDUMP_CONTROL_MAX]; ++}; ++ ++struct kbase_uk_debugfs_mem_profile_add { ++ union uk_header header; ++ u32 len; ++ u32 padding; ++ union kbase_pointer buf; ++}; ++ ++struct kbase_uk_context_id { ++ union uk_header header; ++ /* OUT */ ++ int id; ++}; + +/** -+ * kutf_test_skip_msg() - Tell the kernel that this test has been skipped, -+ * supplying a reason string. -+ * @context: The test context this test is running in. -+ * @message: A message string containing the reason for the skip. ++ * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure ++ * @header: UK structure header ++ * @flags: timeline stream flags ++ * @fd: timeline stream file descriptor + * -+ * Note: The message must not be freed during the lifetime of the test run. -+ * This means it should either be a prebaked string, or if a dynamic string -+ * is required it must be created with kutf_dsprintf which will store -+ * the resultant string in a buffer who's lifetime is the same as the test run. ++ * This structure is used when performing a call to acquire kernel side timeline ++ * stream file descriptor. + */ -+void kutf_test_skip_msg(struct kutf_context *context, const char *message); ++struct kbase_uk_tlstream_acquire { ++ union uk_header header; ++ /* IN */ ++ u32 flags; ++ /* OUT */ ++ s32 fd; ++}; + +/** -+ * kutf_test_pass() - Tell the kernel that this test has passed. -+ * @context: The test context this test is running in. -+ * @message: A message string containing the reason for the pass. ++ * struct kbase_uk_tlstream_acquire_v10_4 - User/Kernel space data exchange ++ * structure ++ * @header: UK structure header ++ * @fd: timeline stream file descriptor + * -+ * Note: The message must not be freed during the lifetime of the test run. -+ * This means it should either be a pre-baked string, or if a dynamic string -+ * is required it must be created with kutf_dsprintf which will store -+ * the resultant string in a buffer who's lifetime is the same as the test run. ++ * This structure is used when performing a call to acquire kernel side timeline ++ * stream file descriptor. + */ -+void kutf_test_pass(struct kutf_context *context, char const *message); ++struct kbase_uk_tlstream_acquire_v10_4 { ++ union uk_header header; ++ /* IN */ ++ /* OUT */ ++ s32 fd; ++}; + +/** -+ * kutf_test_debug() - Send a debug message -+ * @context: The test context this test is running in. -+ * @message: A message string containing the debug information. ++ * struct kbase_uk_tlstream_flush - User/Kernel space data exchange structure ++ * @header: UK structure header + * -+ * Note: The message must not be freed during the lifetime of the test run. -+ * This means it should either be a pre-baked string, or if a dynamic string -+ * is required it must be created with kutf_dsprintf which will store -+ * the resultant string in a buffer who's lifetime is the same as the test run. ++ * This structure is used when performing a call to flush kernel side ++ * timeline streams. + */ -+void kutf_test_debug(struct kutf_context *context, char const *message); ++struct kbase_uk_tlstream_flush { ++ union uk_header header; ++ /* IN */ ++ /* OUT */ ++}; + ++#if MALI_UNIT_TEST +/** -+ * kutf_test_info() - Send an information message -+ * @context: The test context this test is running in. -+ * @message: A message string containing the information message. ++ * struct kbase_uk_tlstream_test - User/Kernel space data exchange structure ++ * @header: UK structure header ++ * @tpw_count: number of trace point writers in each context ++ * @msg_delay: time delay between tracepoints from one writer in milliseconds ++ * @msg_count: number of trace points written by one writer ++ * @aux_msg: if non-zero aux messages will be included + * -+ * Note: The message must not be freed during the lifetime of the test run. -+ * This means it should either be a pre-baked string, or if a dynamic string -+ * is required it must be created with kutf_dsprintf which will store -+ * the resultant string in a buffer who's lifetime is the same as the test run. ++ * This structure is used when performing a call to start timeline stream test ++ * embedded in kernel. + */ -+void kutf_test_info(struct kutf_context *context, char const *message); ++struct kbase_uk_tlstream_test { ++ union uk_header header; ++ /* IN */ ++ u32 tpw_count; ++ u32 msg_delay; ++ u32 msg_count; ++ u32 aux_msg; ++ /* OUT */ ++}; + +/** -+ * kutf_test_warn() - Send a warning message -+ * @context: The test context this test is running in. -+ * @message: A message string containing the warning message. ++ * struct kbase_uk_tlstream_stats - User/Kernel space data exchange structure ++ * @header: UK structure header ++ * @bytes_collected: number of bytes read by user ++ * @bytes_generated: number of bytes generated by tracepoints + * -+ * Note: The message must not be freed during the lifetime of the test run. -+ * This means it should either be a pre-baked string, or if a dynamic string -+ * is required it must be created with kutf_dsprintf which will store -+ * the resultant string in a buffer who's lifetime is the same as the test run. ++ * This structure is used when performing a call to obtain timeline stream ++ * statistics. + */ -+void kutf_test_warn(struct kutf_context *context, char const *message); ++struct kbase_uk_tlstream_stats { ++ union uk_header header; /**< UK structure header. */ ++ /* IN */ ++ /* OUT */ ++ u32 bytes_collected; ++ u32 bytes_generated; ++}; ++#endif /* MALI_UNIT_TEST */ + +/** -+ * kutf_test_fail() - Tell the kernel that a test has failed -+ * @context: The test context this test is running in. -+ * @message: A message string containing the failure message. -+ * -+ * Note: The message must not be freed during the lifetime of the test run. -+ * This means it should either be a pre-baked string, or if a dynamic string -+ * is required it must be created with kutf_dsprintf which will store -+ * the resultant string in a buffer who's lifetime is the same as the test run. ++ * struct struct kbase_uk_prfcnt_value for the KBASE_FUNC_SET_PRFCNT_VALUES ioctl ++ * @header: UK structure header ++ * @data: Counter samples for the dummy model ++ * @size:............Size of the counter sample data + */ -+void kutf_test_fail(struct kutf_context *context, char const *message); ++struct kbase_uk_prfcnt_values { ++ union uk_header header; ++ /* IN */ ++ u32 *data; ++ u32 size; ++}; + +/** -+ * kutf_test_fatal() - Tell the kernel that a test has triggered a fatal error -+ * @context: The test context this test is running in. -+ * @message: A message string containing the fatal error message. ++ * struct kbase_uk_soft_event_update - User/Kernel space data exchange structure ++ * @header: UK structure header ++ * @evt: the GPU address containing the event ++ * @new_status: the new event status, must be either BASE_JD_SOFT_EVENT_SET or ++ * BASE_JD_SOFT_EVENT_RESET ++ * @flags: reserved for future uses, must be set to 0 + * -+ * Note: The message must not be freed during the lifetime of the test run. -+ * This means it should either be a pre-baked string, or if a dynamic string -+ * is required it must be created with kutf_dsprintf which will store -+ * the resultant string in a buffer who's lifetime is the same as the test run. ++ * This structure is used to update the status of a software event. If the ++ * event's status is set to BASE_JD_SOFT_EVENT_SET, any job currently waiting ++ * on this event will complete. + */ -+void kutf_test_fatal(struct kutf_context *context, char const *message); ++struct kbase_uk_soft_event_update { ++ union uk_header header; ++ /* IN */ ++ u64 evt; ++ u32 new_status; ++ u32 flags; ++}; + +/** -+ * kutf_test_abort() - Tell the kernel that a test triggered an abort in the test ++ * struct kbase_uk_mem_jit_init - User/Kernel space data exchange structure ++ * @header: UK structure header ++ * @va_pages: Number of virtual pages required for JIT + * -+ * @context: The test context this test is running in. ++ * This structure is used when requesting initialization of JIT. + */ -+void kutf_test_abort(struct kutf_context *context); ++struct kbase_uk_mem_jit_init { ++ union uk_header header; ++ /* IN */ ++ u64 va_pages; ++}; + -+#endif /* _KERNEL_UTF_SUITE_H_ */ -diff --git a/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h -new file mode 100644 -index 000000000..c458c1f73 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h -@@ -0,0 +1,55 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. -+ * -+ */ ++enum kbase_uk_function_id { ++ KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0), ++ KBASE_FUNC_MEM_IMPORT = (UK_FUNC_ID + 1), ++ KBASE_FUNC_MEM_COMMIT = (UK_FUNC_ID + 2), ++ KBASE_FUNC_MEM_QUERY = (UK_FUNC_ID + 3), ++ KBASE_FUNC_MEM_FREE = (UK_FUNC_ID + 4), ++ KBASE_FUNC_MEM_FLAGS_CHANGE = (UK_FUNC_ID + 5), ++ KBASE_FUNC_MEM_ALIAS = (UK_FUNC_ID + 6), + ++#ifdef BASE_LEGACY_UK6_SUPPORT ++ KBASE_FUNC_JOB_SUBMIT_UK6 = (UK_FUNC_ID + 7), ++#endif /* BASE_LEGACY_UK6_SUPPORT */ + ++ KBASE_FUNC_SYNC = (UK_FUNC_ID + 8), + -+#ifndef _KERNEL_UTF_UTILS_H_ -+#define _KERNEL_UTF_UTILS_H_ ++ KBASE_FUNC_POST_TERM = (UK_FUNC_ID + 9), + -+/* kutf_utils.h -+ * Utilities for the kernel UTF test infrastructure. -+ * -+ * This collection of library functions are provided for use by kernel UTF -+ * and users of kernel UTF which don't directly fit within the other -+ * code modules. -+ */ ++ KBASE_FUNC_HWCNT_SETUP = (UK_FUNC_ID + 10), ++ KBASE_FUNC_HWCNT_DUMP = (UK_FUNC_ID + 11), ++ KBASE_FUNC_HWCNT_CLEAR = (UK_FUNC_ID + 12), + -+#include ++ KBASE_FUNC_GPU_PROPS_REG_DUMP = (UK_FUNC_ID + 14), + -+/** -+ * Maximum size of the message strings within kernel UTF, messages longer then -+ * this will be truncated. -+ */ -+#define KUTF_MAX_DSPRINTF_LEN 1024 ++ KBASE_FUNC_FIND_CPU_OFFSET = (UK_FUNC_ID + 15), + -+/** -+ * kutf_dsprintf() - dynamic sprintf -+ * @pool: memory pool to allocate from -+ * @fmt: The format string describing the string to document. -+ * @... The parameters to feed in to the format string. -+ * -+ * This function implements sprintf which dynamically allocates memory to store -+ * the string. The library will free the memory containing the string when the -+ * result set is cleared or destroyed. -+ * -+ * Note The returned string may be truncated to fit an internal temporary -+ * buffer, which is KUTF_MAX_DSPRINTF_LEN bytes in length. -+ * -+ * Return: Returns pointer to allocated string, or NULL on error. -+ */ -+const char *kutf_dsprintf(struct kutf_mempool *pool, -+ const char *fmt, ...); ++ KBASE_FUNC_GET_VERSION = (UK_FUNC_ID + 16), ++ KBASE_FUNC_SET_FLAGS = (UK_FUNC_ID + 18), + -+#endif /* _KERNEL_UTF_UTILS_H_ */ -diff --git a/drivers/gpu/arm/midgard/tests/kutf/Kbuild b/drivers/gpu/arm/midgard/tests/kutf/Kbuild -new file mode 100755 -index 000000000..6b840c2ef ---- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/kutf/Kbuild -@@ -0,0 +1,20 @@ -+# -+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++ KBASE_FUNC_SET_TEST_DATA = (UK_FUNC_ID + 19), ++ KBASE_FUNC_INJECT_ERROR = (UK_FUNC_ID + 20), ++ KBASE_FUNC_MODEL_CONTROL = (UK_FUNC_ID + 21), + ++#ifdef BASE_LEGACY_UK8_SUPPORT ++ KBASE_FUNC_KEEP_GPU_POWERED = (UK_FUNC_ID + 22), ++#endif /* BASE_LEGACY_UK8_SUPPORT */ + -+ccflags-y += -I$(src)/../include ++ KBASE_FUNC_FENCE_VALIDATE = (UK_FUNC_ID + 23), ++ KBASE_FUNC_STREAM_CREATE = (UK_FUNC_ID + 24), ++ KBASE_FUNC_GET_PROFILING_CONTROLS = (UK_FUNC_ID + 25), ++ KBASE_FUNC_SET_PROFILING_CONTROLS = (UK_FUNC_ID + 26), ++ /* to be used only for testing ++ * purposes, otherwise these controls ++ * are set through gator API */ + -+obj-$(CONFIG_MALI_KUTF) += kutf.o ++ KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD = (UK_FUNC_ID + 27), ++ KBASE_FUNC_JOB_SUBMIT = (UK_FUNC_ID + 28), ++ KBASE_FUNC_DISJOINT_QUERY = (UK_FUNC_ID + 29), + -+kutf-y := kutf_mem.o kutf_resultset.o kutf_suite.o kutf_utils.o -diff --git a/drivers/gpu/arm/midgard/tests/kutf/Kconfig b/drivers/gpu/arm/midgard/tests/kutf/Kconfig -new file mode 100644 -index 000000000..84364716a ---- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/kutf/Kconfig -@@ -0,0 +1,22 @@ -+# -+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++ KBASE_FUNC_GET_CONTEXT_ID = (UK_FUNC_ID + 31), + ++ KBASE_FUNC_TLSTREAM_ACQUIRE_V10_4 = (UK_FUNC_ID + 32), ++#if MALI_UNIT_TEST ++ KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33), ++ KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34), ++#endif /* MALI_UNIT_TEST */ ++ KBASE_FUNC_TLSTREAM_FLUSH = (UK_FUNC_ID + 35), + ++ KBASE_FUNC_HWCNT_READER_SETUP = (UK_FUNC_ID + 36), + -+config MALI_KUTF -+ tristate "Mali Kernel Unit Test Framework" -+ default n -+ help -+ Enables MALI testing framework. To compile it as a module, -+ choose M here - this will generate a single module called kutf. -diff --git a/drivers/gpu/arm/midgard/tests/kutf/Makefile b/drivers/gpu/arm/midgard/tests/kutf/Makefile -new file mode 100644 -index 000000000..010c92ca3 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/kutf/Makefile -@@ -0,0 +1,29 @@ -+# -+# (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++#ifdef SUPPORT_MALI_NO_MALI ++ KBASE_FUNC_SET_PRFCNT_VALUES = (UK_FUNC_ID + 37), ++#endif + ++ KBASE_FUNC_SOFT_EVENT_UPDATE = (UK_FUNC_ID + 38), + -+# linux build system bootstrap for out-of-tree module ++ KBASE_FUNC_MEM_JIT_INIT = (UK_FUNC_ID + 39), + -+# default to building for the host -+ARCH ?= $(shell uname -m) ++ KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 40), + -+ifeq ($(KDIR),) -+$(error Must specify KDIR to point to the kernel to target)) -+endif ++ KBASE_FUNC_MAX ++}; + -+all: -+ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS=-I$(CURDIR)/../include modules ++#endif /* _KBASE_UKU_H_ */ + -+clean: -+ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean -diff --git a/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c b/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c +diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.c b/drivers/gpu/arm/midgard/mali_kbase_utility.c new file mode 100644 -index 000000000..5408e57d4 +index 000000000..be474ff87 --- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c -@@ -0,0 +1,94 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_utility.c +@@ -0,0 +1,33 @@ +/* + * -+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -406370,92 +407618,31 @@ index 000000000..5408e57d4 + + + -+/* Kernel UTF memory management functions */ -+ -+#include -+#include -+#include -+ -+#include -+ -+ -+/** -+ * struct kutf_alloc_entry - Structure representing an allocation. -+ * @node: List node for use with kutf_mempool. -+ * @data: Data area of the allocation -+ */ -+struct kutf_alloc_entry { -+ struct list_head node; -+ u8 data[0]; -+}; -+ -+int kutf_mempool_init(struct kutf_mempool *pool) -+{ -+ if (!pool) { -+ pr_err("NULL pointer passed to %s\n", __func__); -+ return -1; -+ } -+ -+ INIT_LIST_HEAD(&pool->head); -+ -+ return 0; -+} -+EXPORT_SYMBOL(kutf_mempool_init); -+ -+void kutf_mempool_destroy(struct kutf_mempool *pool) -+{ -+ struct list_head *remove; -+ struct list_head *tmp; -+ -+ if (!pool) { -+ pr_err("NULL pointer passed to %s\n", __func__); -+ return; -+ } + -+ list_for_each_safe(remove, tmp, &pool->head) { -+ struct kutf_alloc_entry *remove_alloc; + -+ remove_alloc = list_entry(remove, struct kutf_alloc_entry, node); -+ list_del(&remove_alloc->node); -+ kfree(remove_alloc); -+ } -+} -+EXPORT_SYMBOL(kutf_mempool_destroy); ++#include + -+void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size) ++bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry) +{ -+ struct kutf_alloc_entry *ret; ++ struct list_head *pos = base->next; + -+ if (!pool) { -+ pr_err("NULL pointer passed to %s\n", __func__); -+ goto fail_pool; -+ } ++ while (pos != base) { ++ if (pos == entry) ++ return true; + -+ ret = kmalloc(sizeof(*ret) + size, GFP_KERNEL); -+ if (!ret) { -+ pr_err("Failed to allocate memory\n"); -+ goto fail_alloc; ++ pos = pos->next; + } -+ -+ INIT_LIST_HEAD(&ret->node); -+ list_add(&ret->node, &pool->head); -+ -+ return &ret->data[0]; -+ -+fail_alloc: -+fail_pool: -+ return NULL; ++ return false; +} -+EXPORT_SYMBOL(kutf_mempool_alloc); -diff --git a/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c b/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c +diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.h b/drivers/gpu/arm/midgard/mali_kbase_utility.h new file mode 100644 -index 000000000..5bd04969f +index 000000000..fd7252dab --- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c -@@ -0,0 +1,95 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_utility.h +@@ -0,0 +1,37 @@ +/* + * -+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -406470,93 +407657,35 @@ index 000000000..5bd04969f + + + -+/* Kernel UTF result management functions */ + -+#include -+#include -+#include + -+#include ++#ifndef _KBASE_UTILITY_H ++#define _KBASE_UTILITY_H + -+/** -+ * struct kutf_result_set - Represents a set of results. -+ * @results: Pointer to the linked list where the results are stored. ++#ifndef _KBASE_H_ ++#error "Don't include this file directly, use mali_kbase.h instead" ++#endif ++ ++/** Test whether the given list entry is a member of the given list. ++ * ++ * @param base The head of the list to be tested ++ * @param entry The list entry to be tested ++ * ++ * @return true if entry is a member of base ++ * false otherwise + */ -+struct kutf_result_set { -+ struct list_head results; -+}; ++bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry); + -+struct kutf_result_set *kutf_create_result_set(void) -+{ -+ struct kutf_result_set *set; -+ -+ set = kmalloc(sizeof(*set), GFP_KERNEL); -+ if (!set) { -+ pr_err("Failed to allocate resultset"); -+ goto fail_alloc; -+ } -+ -+ INIT_LIST_HEAD(&set->results); -+ -+ return set; -+ -+fail_alloc: -+ return NULL; -+} -+ -+void kutf_add_result(struct kutf_mempool *mempool, -+ struct kutf_result_set *set, -+ enum kutf_result_status status, -+ const char *message) -+{ -+ /* Create the new result */ -+ struct kutf_result *new_result; -+ -+ BUG_ON(set == NULL); -+ -+ new_result = kutf_mempool_alloc(mempool, sizeof(*new_result)); -+ if (!new_result) { -+ pr_err("Result allocation failed\n"); -+ return; -+ } -+ -+ INIT_LIST_HEAD(&new_result->node); -+ new_result->status = status; -+ new_result->message = message; -+ -+ list_add_tail(&new_result->node, &set->results); -+} -+ -+void kutf_destroy_result_set(struct kutf_result_set *set) -+{ -+ if (!list_empty(&set->results)) -+ pr_err("kutf_destroy_result_set: Unread results from test\n"); -+ -+ kfree(set); -+} -+ -+struct kutf_result *kutf_remove_result(struct kutf_result_set *set) -+{ -+ if (!list_empty(&set->results)) { -+ struct kutf_result *ret; -+ -+ ret = list_first_entry(&set->results, struct kutf_result, node); -+ list_del(&ret->node); -+ return ret; -+ } -+ -+ return NULL; -+} -+ -diff --git a/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c b/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c ++#endif /* _KBASE_UTILITY_H */ +diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c new file mode 100644 -index 000000000..a7cfd3be9 +index 000000000..8395568d0 --- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c -@@ -0,0 +1,1041 @@ ++++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c +@@ -0,0 +1,2070 @@ +/* + * -+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software @@ -406571,42804 +407700,50814 @@ index 000000000..a7cfd3be9 + + + -+/* Kernel UTF suite, test and fixture management including user to kernel -+ * interaction */ -+ ++#include ++#include ++#include ++#include ++#include +#include ++#include ++#include ++#include +#include -+#include -+#include -+#include -+#include -+#include -+#include ++#include + -+#include ++#include ++#include ++#include ++#include ++#include ++#include + -+#include -+#include -+#include ++/*****************************************************************************/ + -+#if defined(CONFIG_DEBUG_FS) ++/* Hwcnt reader API version */ ++#define HWCNT_READER_API 1 + -+/** -+ * struct kutf_application - Structure which represents kutf application -+ * @name: The name of this test application. -+ * @dir: The debugfs directory for this test -+ * @suite_list: List head to store all the suites which are part of this -+ * application -+ */ -+struct kutf_application { -+ const char *name; -+ struct dentry *dir; -+ struct list_head suite_list; -+}; ++/* The number of nanoseconds in a second. */ ++#define NSECS_IN_SEC 1000000000ull /* ns */ + -+/** -+ * struct kutf_test_function - Structure which represents kutf test function -+ * @suite: Back reference to the suite this test function -+ * belongs to -+ * @filters: Filters that apply to this test function -+ * @test_id: Test ID -+ * @execute: Function to run for this test -+ * @test_data: Static data for this test -+ * @node: List node for test_list -+ * @variant_list: List head to store all the variants which can run on -+ * this function -+ * @dir: debugfs directory for this test function -+ */ -+struct kutf_test_function { -+ struct kutf_suite *suite; -+ unsigned int filters; -+ unsigned int test_id; -+ void (*execute)(struct kutf_context *context); -+ union kutf_callback_data test_data; -+ struct list_head node; -+ struct list_head variant_list; -+ struct dentry *dir; -+}; ++/* The time resolution of dumping service. */ ++#define DUMPING_RESOLUTION 500000ull /* ns */ + -+/** -+ * struct kutf_test_fixture - Structure which holds information on the kutf -+ * test fixture -+ * @test_func: Test function this fixture belongs to -+ * @fixture_index: Index of this fixture -+ * @node: List node for variant_list -+ * @dir: debugfs directory for this test fixture -+ */ -+struct kutf_test_fixture { -+ struct kutf_test_function *test_func; -+ unsigned int fixture_index; -+ struct list_head node; -+ struct dentry *dir; -+}; ++/* The maximal supported number of dumping buffers. */ ++#define MAX_BUFFER_COUNT 32 + -+struct dentry *base_dir; ++/* Size and number of hw counters blocks. */ ++#define NR_CNT_BLOCKS_PER_GROUP 8 ++#define NR_CNT_PER_BLOCK 64 ++#define NR_BYTES_PER_CNT 4 ++#define NR_BYTES_PER_HDR 16 ++#define PRFCNT_EN_MASK_OFFSET 0x8 + -+/** -+ * struct kutf_convert_table - Structure which keeps test results -+ * @result_name: Status of the test result -+ * @result: Status value for a single test -+ */ -+struct kutf_convert_table { -+ char result_name[50]; -+ enum kutf_result_status result; -+}; ++/*****************************************************************************/ + -+struct kutf_convert_table kutf_convert[] = { -+#define ADD_UTF_RESULT(_name) \ -+{ \ -+ #_name, \ -+ _name, \ -+}, -+ADD_UTF_RESULT(KUTF_RESULT_BENCHMARK) -+ADD_UTF_RESULT(KUTF_RESULT_SKIP) -+ADD_UTF_RESULT(KUTF_RESULT_UNKNOWN) -+ADD_UTF_RESULT(KUTF_RESULT_PASS) -+ADD_UTF_RESULT(KUTF_RESULT_DEBUG) -+ADD_UTF_RESULT(KUTF_RESULT_INFO) -+ADD_UTF_RESULT(KUTF_RESULT_WARN) -+ADD_UTF_RESULT(KUTF_RESULT_FAIL) -+ADD_UTF_RESULT(KUTF_RESULT_FATAL) -+ADD_UTF_RESULT(KUTF_RESULT_ABORT) ++enum { ++ SHADER_HWCNT_BM, ++ TILER_HWCNT_BM, ++ MMU_L2_HWCNT_BM, ++ JM_HWCNT_BM +}; + -+#define UTF_CONVERT_SIZE (ARRAY_SIZE(kutf_convert)) ++enum vinstr_state { ++ VINSTR_IDLE, ++ VINSTR_DUMPING, ++ VINSTR_SUSPENDING, ++ VINSTR_SUSPENDED, ++ VINSTR_RESUMING ++}; + +/** -+ * kutf_create_context() - Create a test context in which a specific fixture -+ * of an application will be run and its results -+ * reported back to the user -+ * @test_fix: Test fixture to be run. -+ * -+ * Return: Returns the created test context on success or NULL on failure ++ * struct kbase_vinstr_context - vinstr context per device ++ * @lock: protects the entire vinstr context ++ * @kbdev: pointer to kbase device ++ * @kctx: pointer to kbase context ++ * @vmap: vinstr vmap for mapping hwcnt dump buffer ++ * @gpu_va: GPU hwcnt dump buffer address ++ * @cpu_va: the CPU side mapping of the hwcnt dump buffer ++ * @dump_size: size of the dump buffer in bytes ++ * @bitmap: current set of counters monitored, not always in sync ++ * with hardware ++ * @reprogram: when true, reprogram hwcnt block with the new set of ++ * counters ++ * @state: vinstr state ++ * @state_lock: protects information about vinstr state ++ * @suspend_waitq: notification queue to trigger state re-validation ++ * @suspend_cnt: reference counter of vinstr's suspend state ++ * @suspend_work: worker to execute on entering suspended state ++ * @resume_work: worker to execute on leaving suspended state ++ * @nclients: number of attached clients, pending or otherwise ++ * @waiting_clients: head of list of clients being periodically sampled ++ * @idle_clients: head of list of clients being idle ++ * @suspended_clients: head of list of clients being suspended ++ * @thread: periodic sampling thread ++ * @waitq: notification queue of sampling thread ++ * @request_pending: request for action for sampling thread + */ -+static struct kutf_context *kutf_create_context( -+ struct kutf_test_fixture *test_fix); ++struct kbase_vinstr_context { ++ struct mutex lock; ++ struct kbase_device *kbdev; ++ struct kbase_context *kctx; + -+/** -+ * kutf_destroy_context() - Destroy a previously created test context -+ * @context: Test context to destroy -+ */ -+static void kutf_destroy_context(struct kutf_context *context); ++ struct kbase_vmap_struct vmap; ++ u64 gpu_va; ++ void *cpu_va; ++ size_t dump_size; ++ u32 bitmap[4]; ++ bool reprogram; + -+/** -+ * kutf_set_result() - Set the test result against the specified test context -+ * @context: Test context -+ * @status: Result status -+ */ -+static void kutf_set_result(struct kutf_context *context, -+ enum kutf_result_status status); ++ enum vinstr_state state; ++ struct spinlock state_lock; ++ wait_queue_head_t suspend_waitq; ++ unsigned int suspend_cnt; ++ struct work_struct suspend_work; ++ struct work_struct resume_work; + -+/** -+ * kutf_set_expected_result() - Set the expected test result for the specified -+ * test context -+ * @context: Test context -+ * @expected_status: Expected result status -+ */ -+static void kutf_set_expected_result(struct kutf_context *context, -+ enum kutf_result_status expected_status); ++ u32 nclients; ++ struct list_head waiting_clients; ++ struct list_head idle_clients; ++ struct list_head suspended_clients; + -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)) -+/* Pre 3.4.0 kernels don't have the simple_open helper */ ++ struct task_struct *thread; ++ wait_queue_head_t waitq; ++ atomic_t request_pending; ++}; + +/** -+ * simple_open() - Helper for file opening which stores the inode private data -+ * into the file private data -+ * @inode: File entry representation -+ * @file: A specific opening of the file -+ * -+ * Return: always 0; if inode private data do not exist, the file will not -+ * be assigned private data ++ * struct kbase_vinstr_client - a vinstr client attached to a vinstr context ++ * @vinstr_ctx: vinstr context client is attached to ++ * @list: node used to attach this client to list in vinstr context ++ * @buffer_count: number of buffers this client is using ++ * @event_mask: events this client reacts to ++ * @dump_size: size of one dump buffer in bytes ++ * @bitmap: bitmap request for JM, TILER, SHADER and MMU counters ++ * @legacy_buffer: userspace hwcnt dump buffer (legacy interface) ++ * @kernel_buffer: kernel hwcnt dump buffer (kernel client interface) ++ * @accum_buffer: temporary accumulation buffer for preserving counters ++ * @dump_time: next time this clients shall request hwcnt dump ++ * @dump_interval: interval between periodic hwcnt dumps ++ * @dump_buffers: kernel hwcnt dump buffers allocated by this client ++ * @dump_buffers_meta: metadata of dump buffers ++ * @meta_idx: index of metadata being accessed by userspace ++ * @read_idx: index of buffer read by userspace ++ * @write_idx: index of buffer being written by dumping service ++ * @waitq: client's notification queue ++ * @pending: when true, client has attached but hwcnt not yet updated + */ -+static int simple_open(struct inode *inode, struct file *file) -+{ -+ if (inode->i_private) -+ file->private_data = inode->i_private; -+ return 0; -+} -+#endif ++struct kbase_vinstr_client { ++ struct kbase_vinstr_context *vinstr_ctx; ++ struct list_head list; ++ unsigned int buffer_count; ++ u32 event_mask; ++ size_t dump_size; ++ u32 bitmap[4]; ++ void __user *legacy_buffer; ++ void *kernel_buffer; ++ void *accum_buffer; ++ u64 dump_time; ++ u32 dump_interval; ++ char *dump_buffers; ++ struct kbase_hwcnt_reader_metadata *dump_buffers_meta; ++ atomic_t meta_idx; ++ atomic_t read_idx; ++ atomic_t write_idx; ++ wait_queue_head_t waitq; ++ bool pending; ++}; + +/** -+ * kutf_result_to_string() - Converts a KUTF result into a string -+ * @result_str: Output result string -+ * @result: Result status to convert -+ * -+ * Return: 1 if test result was successfully converted to string, 0 otherwise ++ * struct kbasep_vinstr_wake_up_timer - vinstr service thread wake up timer ++ * @hrtimer: high resolution timer ++ * @vinstr_ctx: vinstr context + */ -+static int kutf_result_to_string(char **result_str, -+ enum kutf_result_status result) -+{ -+ int i; -+ int ret = 0; ++struct kbasep_vinstr_wake_up_timer { ++ struct hrtimer hrtimer; ++ struct kbase_vinstr_context *vinstr_ctx; ++}; + -+ for (i = 0; i < UTF_CONVERT_SIZE; i++) { -+ if (result == kutf_convert[i].result) { -+ *result_str = kutf_convert[i].result_name; -+ ret = 1; -+ } -+ } -+ return ret; -+} ++/*****************************************************************************/ + -+/** -+ * kutf_debugfs_const_string_read() - Simple debugfs read callback which -+ * returns a constant string -+ * @file: Opened file to read from -+ * @buf: User buffer to write the data into -+ * @len: Amount of data to read -+ * @ppos: Offset into file to read from -+ * -+ * Return: On success, the number of bytes read and offset @ppos advanced by -+ * this number; on error, negative value -+ */ -+static ssize_t kutf_debugfs_const_string_read(struct file *file, -+ char __user *buf, size_t len, loff_t *ppos) -+{ -+ char *str = file->private_data; ++static int kbasep_vinstr_service_task(void *data); + -+ return simple_read_from_buffer(buf, len, ppos, str, strlen(str)); -+} ++static unsigned int kbasep_vinstr_hwcnt_reader_poll( ++ struct file *filp, ++ poll_table *wait); ++static long kbasep_vinstr_hwcnt_reader_ioctl( ++ struct file *filp, ++ unsigned int cmd, ++ unsigned long arg); ++static int kbasep_vinstr_hwcnt_reader_mmap( ++ struct file *filp, ++ struct vm_area_struct *vma); ++static int kbasep_vinstr_hwcnt_reader_release( ++ struct inode *inode, ++ struct file *filp); + -+static const struct file_operations kutf_debugfs_const_string_ops = { -+ .owner = THIS_MODULE, -+ .open = simple_open, -+ .read = kutf_debugfs_const_string_read, -+ .llseek = default_llseek, ++/* The timeline stream file operations structure. */ ++static const struct file_operations vinstr_client_fops = { ++ .poll = kbasep_vinstr_hwcnt_reader_poll, ++ .unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, ++ .compat_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, ++ .mmap = kbasep_vinstr_hwcnt_reader_mmap, ++ .release = kbasep_vinstr_hwcnt_reader_release, +}; + -+/** -+ * kutf_add_explicit_result() - Check if an explicit result needs to be added -+ * @context: KUTF test context -+ */ -+static void kutf_add_explicit_result(struct kutf_context *context) ++/*****************************************************************************/ ++ ++static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) +{ -+ switch (context->expected_status) { -+ case KUTF_RESULT_UNKNOWN: -+ if (context->status == KUTF_RESULT_UNKNOWN) -+ kutf_test_pass(context, "(implicit pass)"); -+ break; ++ struct kbase_context *kctx = vinstr_ctx->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_uk_hwcnt_setup setup; ++ int err; + -+ case KUTF_RESULT_WARN: -+ if (context->status == KUTF_RESULT_WARN) -+ kutf_test_pass(context, -+ "Pass (expected warn occurred)"); -+ else if (context->status != KUTF_RESULT_SKIP) -+ kutf_test_fail(context, -+ "Fail (expected warn missing)"); -+ break; ++ setup.dump_buffer = vinstr_ctx->gpu_va; ++ setup.jm_bm = vinstr_ctx->bitmap[JM_HWCNT_BM]; ++ setup.tiler_bm = vinstr_ctx->bitmap[TILER_HWCNT_BM]; ++ setup.shader_bm = vinstr_ctx->bitmap[SHADER_HWCNT_BM]; ++ setup.mmu_l2_bm = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM]; + -+ case KUTF_RESULT_FAIL: -+ if (context->status == KUTF_RESULT_FAIL) -+ kutf_test_pass(context, -+ "Pass (expected fail occurred)"); -+ else if (context->status != KUTF_RESULT_SKIP) { -+ /* Force the expected status so the fail gets logged */ -+ context->expected_status = KUTF_RESULT_PASS; -+ kutf_test_fail(context, -+ "Fail (expected fail missing)"); -+ } -+ break; ++ /* Mark the context as active so the GPU is kept turned on */ ++ /* A suspend won't happen here, because we're in a syscall from a ++ * userspace thread. */ ++ kbase_pm_context_active(kbdev); + -+ case KUTF_RESULT_FATAL: -+ if (context->status == KUTF_RESULT_FATAL) -+ kutf_test_pass(context, -+ "Pass (expected fatal occurred)"); -+ else if (context->status != KUTF_RESULT_SKIP) -+ kutf_test_fail(context, -+ "Fail (expected fatal missing)"); -+ break; ++ /* Schedule the context in */ ++ kbasep_js_schedule_privileged_ctx(kbdev, kctx); ++ err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &setup); ++ if (err) { ++ /* Release the context. This had its own Power Manager Active ++ * reference */ ++ kbasep_js_release_privileged_ctx(kbdev, kctx); + -+ case KUTF_RESULT_ABORT: -+ if (context->status == KUTF_RESULT_ABORT) -+ kutf_test_pass(context, -+ "Pass (expected abort occurred)"); -+ else if (context->status != KUTF_RESULT_SKIP) -+ kutf_test_fail(context, -+ "Fail (expected abort missing)"); -+ break; -+ default: -+ break; ++ /* Also release our Power Manager Active reference */ ++ kbase_pm_context_idle(kbdev); + } ++ ++ return err; +} + -+/** -+ * kutf_debugfs_run_open() Debugfs open callback for the "run" entry. -+ * @inode: inode of the opened file -+ * @file: Opened file to read from -+ * -+ * This function retrieves the test fixture data that is associated with the -+ * opened file and works back to get the test, suite and application so -+ * it can then run the test that is associated with the file entry. -+ * -+ * Return: 0 on success -+ */ -+static int kutf_debugfs_run_open(struct inode *inode, struct file *file) ++static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) +{ -+ struct kutf_test_fixture *test_fix = inode->i_private; -+ struct kutf_test_function *test_func = test_fix->test_func; -+ struct kutf_suite *suite = test_func->suite; -+ struct kutf_context *test_context; ++ struct kbase_context *kctx = vinstr_ctx->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ int err; + -+ test_context = kutf_create_context(test_fix); -+ if (!test_context) -+ return -ENODEV; ++ err = kbase_instr_hwcnt_disable_internal(kctx); ++ if (err) { ++ dev_warn(kbdev->dev, "Failed to disable HW counters (ctx:%p)", ++ kctx); ++ return; ++ } + -+ file->private_data = test_context; ++ /* Release the context. This had its own Power Manager Active reference. */ ++ kbasep_js_release_privileged_ctx(kbdev, kctx); + -+ /* -+ * Call the create fixture function if required before the -+ * fixture is run -+ */ -+ if (suite->create_fixture) -+ test_context->fixture = suite->create_fixture(test_context); ++ /* Also release our Power Manager Active reference. */ ++ kbase_pm_context_idle(kbdev); + -+ /* Only run the test if the fixture was created (if required) */ -+ if ((suite->create_fixture && test_context->fixture) || -+ (!suite->create_fixture)) { -+ /* Run this fixture */ -+ test_func->execute(test_context); ++ dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx); ++} + -+ if (suite->remove_fixture) -+ suite->remove_fixture(test_context); ++static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx) ++{ ++ disable_hwcnt(vinstr_ctx); ++ return enable_hwcnt(vinstr_ctx); ++} + -+ kutf_add_explicit_result(test_context); -+ } -+ return 0; ++static void hwcnt_bitmap_set(u32 dst[4], u32 src[4]) ++{ ++ dst[JM_HWCNT_BM] = src[JM_HWCNT_BM]; ++ dst[TILER_HWCNT_BM] = src[TILER_HWCNT_BM]; ++ dst[SHADER_HWCNT_BM] = src[SHADER_HWCNT_BM]; ++ dst[MMU_L2_HWCNT_BM] = src[MMU_L2_HWCNT_BM]; +} + -+/** -+ * kutf_debugfs_run_read() - Debugfs read callback for the "run" entry. -+ * @file: Opened file to read from -+ * @buf: User buffer to write the data into -+ * @len: Amount of data to read -+ * @ppos: Offset into file to read from -+ * -+ * This function emits the results which where logged during the opening of -+ * the file kutf_debugfs_run_open. -+ * Results will be emitted one at a time, once all the results have been read -+ * 0 will be returned to indicate there is no more data. -+ * -+ * Return: Number of bytes read. -+ */ -+static ssize_t kutf_debugfs_run_read(struct file *file, char __user *buf, -+ size_t len, loff_t *ppos) ++static void hwcnt_bitmap_union(u32 dst[4], u32 src[4]) +{ -+ struct kutf_context *test_context = file->private_data; -+ struct kutf_result *res; -+ unsigned long bytes_not_copied; -+ ssize_t bytes_copied = 0; ++ dst[JM_HWCNT_BM] |= src[JM_HWCNT_BM]; ++ dst[TILER_HWCNT_BM] |= src[TILER_HWCNT_BM]; ++ dst[SHADER_HWCNT_BM] |= src[SHADER_HWCNT_BM]; ++ dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM]; ++} + -+ /* Note: This code assumes a result is read completely */ -+ res = kutf_remove_result(test_context->result_set); -+ if (res) { -+ char *kutf_str_ptr = NULL; -+ unsigned int kutf_str_len = 0; -+ unsigned int message_len = 0; -+ char separator = ':'; -+ char terminator = '\n'; ++size_t kbase_vinstr_dump_size(struct kbase_device *kbdev) ++{ ++ size_t dump_size; + -+ kutf_result_to_string(&kutf_str_ptr, res->status); -+ if (kutf_str_ptr) -+ kutf_str_len = strlen(kutf_str_ptr); ++#ifndef CONFIG_MALI_NO_MALI ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) { ++ u32 nr_cg; + -+ if (res->message) -+ message_len = strlen(res->message); ++ nr_cg = kbdev->gpu_props.num_core_groups; ++ dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP * ++ NR_CNT_PER_BLOCK * ++ NR_BYTES_PER_CNT; ++ } else ++#endif /* CONFIG_MALI_NO_MALI */ ++ { ++ /* assume v5 for now */ ++ base_gpu_props *props = &kbdev->gpu_props.props; ++ u32 nr_l2 = props->l2_props.num_l2_slices; ++ u64 core_mask = props->coherency_info.group[0].core_mask; ++ u32 nr_blocks = fls64(core_mask); + -+ if ((kutf_str_len + 1 + message_len + 1) > len) { -+ pr_err("Not enough space in user buffer for a single result"); -+ return 0; -+ } ++ /* JM and tiler counter blocks are always present */ ++ dump_size = (2 + nr_l2 + nr_blocks) * ++ NR_CNT_PER_BLOCK * ++ NR_BYTES_PER_CNT; ++ } ++ return dump_size; ++} ++KBASE_EXPORT_TEST_API(kbase_vinstr_dump_size); + -+ /* First copy the result string */ -+ if (kutf_str_ptr) { -+ bytes_not_copied = copy_to_user(&buf[0], kutf_str_ptr, -+ kutf_str_len); -+ bytes_copied += kutf_str_len - bytes_not_copied; -+ if (bytes_not_copied) -+ goto exit; -+ } ++static size_t kbasep_vinstr_dump_size_ctx( ++ struct kbase_vinstr_context *vinstr_ctx) ++{ ++ return kbase_vinstr_dump_size(vinstr_ctx->kctx->kbdev); ++} + -+ /* Then the separator */ -+ bytes_not_copied = copy_to_user(&buf[bytes_copied], -+ &separator, 1); -+ bytes_copied += 1 - bytes_not_copied; -+ if (bytes_not_copied) -+ goto exit; ++static int kbasep_vinstr_map_kernel_dump_buffer( ++ struct kbase_vinstr_context *vinstr_ctx) ++{ ++ struct kbase_va_region *reg; ++ struct kbase_context *kctx = vinstr_ctx->kctx; ++ u64 flags, nr_pages; + -+ /* Finally Next copy the result string */ -+ if (res->message) { -+ bytes_not_copied = copy_to_user(&buf[bytes_copied], -+ res->message, message_len); -+ bytes_copied += message_len - bytes_not_copied; -+ if (bytes_not_copied) -+ goto exit; -+ } ++ flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR; ++ vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx); ++ nr_pages = PFN_UP(vinstr_ctx->dump_size); + -+ /* Finally the terminator */ -+ bytes_not_copied = copy_to_user(&buf[bytes_copied], -+ &terminator, 1); -+ bytes_copied += 1 - bytes_not_copied; ++ reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, ++ &vinstr_ctx->gpu_va); ++ if (!reg) ++ return -ENOMEM; ++ ++ vinstr_ctx->cpu_va = kbase_vmap( ++ kctx, ++ vinstr_ctx->gpu_va, ++ vinstr_ctx->dump_size, ++ &vinstr_ctx->vmap); ++ if (!vinstr_ctx->cpu_va) { ++ kbase_mem_free(kctx, vinstr_ctx->gpu_va); ++ return -ENOMEM; + } -+exit: -+ return bytes_copied; ++ ++ return 0; +} + -+/** -+ * kutf_debugfs_run_release() - Debugfs release callback for the "run" entry. -+ * @inode: File entry representation -+ * @file: A specific opening of the file -+ * -+ * Release any resources that where created during the opening of the file -+ * -+ * Return: 0 on success -+ */ -+static int kutf_debugfs_run_release(struct inode *inode, struct file *file) ++static void kbasep_vinstr_unmap_kernel_dump_buffer( ++ struct kbase_vinstr_context *vinstr_ctx) +{ -+ struct kutf_context *test_context = file->private_data; ++ struct kbase_context *kctx = vinstr_ctx->kctx; + -+ kutf_destroy_context(test_context); -+ return 0; ++ kbase_vunmap(kctx, &vinstr_ctx->vmap); ++ kbase_mem_free(kctx, vinstr_ctx->gpu_va); +} + -+static const struct file_operations kutf_debugfs_run_ops = { -+ .owner = THIS_MODULE, -+ .open = kutf_debugfs_run_open, -+ .read = kutf_debugfs_run_read, -+ .release = kutf_debugfs_run_release, -+ .llseek = default_llseek, -+}; -+ +/** -+ * create_fixture_variant() - Creates a fixture variant for the specified -+ * test function and index and the debugfs entries -+ * that represent it. -+ * @test_func: Test function -+ * @fixture_index: Fixture index -+ * -+ * Return: 0 on success, negative value corresponding to error code in failure ++ * kbasep_vinstr_create_kctx - create kernel context for vinstr ++ * @vinstr_ctx: vinstr context ++ * Return: zero on success + */ -+static int create_fixture_variant(struct kutf_test_function *test_func, -+ unsigned int fixture_index) ++static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) +{ -+ struct kutf_test_fixture *test_fix; -+ char name[11]; /* Enough to print the MAX_UINT32 + the null terminator */ -+ struct dentry *tmp; ++ struct kbase_device *kbdev = vinstr_ctx->kbdev; ++ struct kbasep_kctx_list_element *element; ++ unsigned long flags; ++ bool enable_backend = false; + int err; + -+ test_fix = kmalloc(sizeof(*test_fix), GFP_KERNEL); -+ if (!test_fix) { -+ pr_err("Failed to create debugfs directory when adding fixture\n"); -+ err = -ENOMEM; -+ goto fail_alloc; ++ vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true); ++ if (!vinstr_ctx->kctx) ++ return -ENOMEM; ++ ++ /* Map the master kernel dump buffer. The HW dumps the counters ++ * into this memory region. */ ++ err = kbasep_vinstr_map_kernel_dump_buffer(vinstr_ctx); ++ if (err) { ++ kbase_destroy_context(vinstr_ctx->kctx); ++ vinstr_ctx->kctx = NULL; ++ return err; + } + -+ test_fix->test_func = test_func; -+ test_fix->fixture_index = fixture_index; ++ /* Add kernel context to list of contexts associated with device. */ ++ element = kzalloc(sizeof(*element), GFP_KERNEL); ++ if (element) { ++ element->kctx = vinstr_ctx->kctx; ++ mutex_lock(&kbdev->kctx_list_lock); ++ list_add(&element->link, &kbdev->kctx_list); + -+ snprintf(name, sizeof(name), "%d", fixture_index); -+ test_fix->dir = debugfs_create_dir(name, test_func->dir); -+ if (!test_func->dir) { -+ pr_err("Failed to create debugfs directory when adding fixture\n"); -+ /* Might not be the right error, we don't get it passed back to us */ -+ err = -EEXIST; -+ goto fail_dir; ++ /* Inform timeline client about new context. ++ * Do this while holding the lock to avoid tracepoint ++ * being created in both body and summary stream. */ ++ KBASE_TLSTREAM_TL_NEW_CTX( ++ vinstr_ctx->kctx, ++ (u32)(vinstr_ctx->kctx->id), ++ (u32)(vinstr_ctx->kctx->tgid)); ++ ++ mutex_unlock(&kbdev->kctx_list_lock); ++ } else { ++ /* Don't treat this as a fail - just warn about it. */ ++ dev_warn(kbdev->dev, ++ "couldn't add kctx to kctx_list\n"); + } + -+ tmp = debugfs_create_file("type", S_IROTH, test_fix->dir, "fixture\n", -+ &kutf_debugfs_const_string_ops); -+ if (!tmp) { -+ pr_err("Failed to create debugfs file \"type\" when adding fixture\n"); -+ /* Might not be the right error, we don't get it passed back to us */ -+ err = -EEXIST; -+ goto fail_file; ++ /* Don't enable hardware counters if vinstr is suspended. ++ * Note that vinstr resume code is run under vinstr context lock, ++ * lower layer will be enabled as needed on resume. */ ++ spin_lock_irqsave(&vinstr_ctx->state_lock, flags); ++ if (VINSTR_IDLE == vinstr_ctx->state) ++ enable_backend = true; ++ spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); ++ if (enable_backend) ++ err = enable_hwcnt(vinstr_ctx); ++ ++ if (err) { ++ kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); ++ kbase_destroy_context(vinstr_ctx->kctx); ++ if (element) { ++ mutex_lock(&kbdev->kctx_list_lock); ++ list_del(&element->link); ++ kfree(element); ++ mutex_unlock(&kbdev->kctx_list_lock); ++ } ++ KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx); ++ vinstr_ctx->kctx = NULL; ++ return err; + } + -+ tmp = debugfs_create_file("run", S_IROTH, test_fix->dir, test_fix, -+ &kutf_debugfs_run_ops); -+ if (!tmp) { -+ pr_err("Failed to create debugfs file \"run\" when adding fixture\n"); -+ /* Might not be the right error, we don't get it passed back to us */ -+ err = -EEXIST; -+ goto fail_file; ++ vinstr_ctx->thread = kthread_run( ++ kbasep_vinstr_service_task, ++ vinstr_ctx, ++ "mali_vinstr_service"); ++ if (!vinstr_ctx->thread) { ++ disable_hwcnt(vinstr_ctx); ++ kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); ++ kbase_destroy_context(vinstr_ctx->kctx); ++ if (element) { ++ mutex_lock(&kbdev->kctx_list_lock); ++ list_del(&element->link); ++ kfree(element); ++ mutex_unlock(&kbdev->kctx_list_lock); ++ } ++ KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx); ++ vinstr_ctx->kctx = NULL; ++ return -EFAULT; + } + -+ list_add(&test_fix->node, &test_func->variant_list); + return 0; -+ -+fail_file: -+ debugfs_remove_recursive(test_fix->dir); -+fail_dir: -+ kfree(test_fix); -+fail_alloc: -+ return err; +} + +/** -+ * kutf_remove_test_variant() - Destroy a previously created fixture variant. -+ * @test_fix: Test fixture ++ * kbasep_vinstr_destroy_kctx - destroy vinstr's kernel context ++ * @vinstr_ctx: vinstr context + */ -+static void kutf_remove_test_variant(struct kutf_test_fixture *test_fix) ++static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx) +{ -+ debugfs_remove_recursive(test_fix->dir); -+ kfree(test_fix); -+} ++ struct kbase_device *kbdev = vinstr_ctx->kbdev; ++ struct kbasep_kctx_list_element *element; ++ struct kbasep_kctx_list_element *tmp; ++ bool found = false; + -+void kutf_add_test_with_filters_and_data( -+ struct kutf_suite *suite, -+ unsigned int id, -+ const char *name, -+ void (*execute)(struct kutf_context *context), -+ unsigned int filters, -+ union kutf_callback_data test_data) -+{ -+ struct kutf_test_function *test_func; -+ struct dentry *tmp; -+ unsigned int i; ++ /* Release hw counters dumping resources. */ ++ vinstr_ctx->thread = NULL; ++ disable_hwcnt(vinstr_ctx); ++ kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); ++ kbase_destroy_context(vinstr_ctx->kctx); + -+ test_func = kmalloc(sizeof(*test_func), GFP_KERNEL); -+ if (!test_func) { -+ pr_err("Failed to allocate memory when adding test %s\n", name); -+ goto fail_alloc; ++ /* Remove kernel context from the device's contexts list. */ ++ mutex_lock(&kbdev->kctx_list_lock); ++ list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) { ++ if (element->kctx == vinstr_ctx->kctx) { ++ list_del(&element->link); ++ kfree(element); ++ found = true; ++ } + } ++ mutex_unlock(&kbdev->kctx_list_lock); + -+ INIT_LIST_HEAD(&test_func->variant_list); ++ if (!found) ++ dev_warn(kbdev->dev, "kctx not in kctx_list\n"); + -+ test_func->dir = debugfs_create_dir(name, suite->dir); -+ if (!test_func->dir) { -+ pr_err("Failed to create debugfs directory when adding test %s\n", name); -+ goto fail_dir; -+ } ++ /* Inform timeline client about context destruction. */ ++ KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx); + -+ tmp = debugfs_create_file("type", S_IROTH, test_func->dir, "test\n", -+ &kutf_debugfs_const_string_ops); -+ if (!tmp) { -+ pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name); -+ goto fail_file; -+ } ++ vinstr_ctx->kctx = NULL; ++} + -+ test_func->filters = filters; -+ tmp = debugfs_create_x32("filters", S_IROTH, test_func->dir, -+ &test_func->filters); -+ if (!tmp) { -+ pr_err("Failed to create debugfs file \"filters\" when adding test %s\n", name); -+ goto fail_file; -+ } ++/** ++ * kbasep_vinstr_attach_client - Attach a client to the vinstr core ++ * @vinstr_ctx: vinstr context ++ * @buffer_count: requested number of dump buffers ++ * @bitmap: bitmaps describing which counters should be enabled ++ * @argp: pointer where notification descriptor shall be stored ++ * @kernel_buffer: pointer to kernel side buffer ++ * ++ * Return: vinstr opaque client handle or NULL on failure ++ */ ++static struct kbase_vinstr_client *kbasep_vinstr_attach_client( ++ struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count, ++ u32 bitmap[4], void *argp, void *kernel_buffer) ++{ ++ struct task_struct *thread = NULL; ++ struct kbase_vinstr_client *cli; + -+ test_func->test_id = id; -+ tmp = debugfs_create_u32("test_id", S_IROTH, test_func->dir, -+ &test_func->test_id); -+ if (!tmp) { -+ pr_err("Failed to create debugfs file \"test_id\" when adding test %s\n", name); -+ goto fail_file; -+ } ++ KBASE_DEBUG_ASSERT(vinstr_ctx); + -+ for (i = 0; i < suite->fixture_variants; i++) { -+ if (create_fixture_variant(test_func, i)) { -+ pr_err("Failed to create fixture %d when adding test %s\n", i, name); -+ goto fail_file; -+ } -+ } ++ if (buffer_count > MAX_BUFFER_COUNT ++ || (buffer_count & (buffer_count - 1))) ++ return NULL; + -+ test_func->suite = suite; -+ test_func->execute = execute; -+ test_func->test_data = test_data; ++ cli = kzalloc(sizeof(*cli), GFP_KERNEL); ++ if (!cli) ++ return NULL; + -+ list_add(&test_func->node, &suite->test_list); -+ return; ++ cli->vinstr_ctx = vinstr_ctx; ++ cli->buffer_count = buffer_count; ++ cli->event_mask = ++ (1 << BASE_HWCNT_READER_EVENT_MANUAL) | ++ (1 << BASE_HWCNT_READER_EVENT_PERIODIC); ++ cli->pending = true; + -+fail_file: -+ debugfs_remove_recursive(test_func->dir); -+fail_dir: -+ kfree(test_func); -+fail_alloc: -+ return; -+} -+EXPORT_SYMBOL(kutf_add_test_with_filters_and_data); ++ hwcnt_bitmap_set(cli->bitmap, bitmap); + -+void kutf_add_test_with_filters( -+ struct kutf_suite *suite, -+ unsigned int id, -+ const char *name, -+ void (*execute)(struct kutf_context *context), -+ unsigned int filters) -+{ -+ union kutf_callback_data data; ++ mutex_lock(&vinstr_ctx->lock); + -+ data.ptr_value = NULL; ++ hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap); ++ vinstr_ctx->reprogram = true; + -+ kutf_add_test_with_filters_and_data(suite, -+ id, -+ name, -+ execute, -+ suite->suite_default_flags, -+ data); -+} -+EXPORT_SYMBOL(kutf_add_test_with_filters); ++ /* If this is the first client, create the vinstr kbase ++ * context. This context is permanently resident until the ++ * last client exits. */ ++ if (!vinstr_ctx->nclients) { ++ hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap); ++ if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0) ++ goto error; + -+void kutf_add_test(struct kutf_suite *suite, -+ unsigned int id, -+ const char *name, -+ void (*execute)(struct kutf_context *context)) -+{ -+ union kutf_callback_data data; ++ vinstr_ctx->reprogram = false; ++ cli->pending = false; ++ } + -+ data.ptr_value = NULL; ++ /* The GPU resets the counter block every time there is a request ++ * to dump it. We need a per client kernel buffer for accumulating ++ * the counters. */ ++ cli->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx); ++ cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL); ++ if (!cli->accum_buffer) ++ goto error; + -+ kutf_add_test_with_filters_and_data(suite, -+ id, -+ name, -+ execute, -+ suite->suite_default_flags, -+ data); -+} -+EXPORT_SYMBOL(kutf_add_test); ++ /* Prepare buffers. */ ++ if (cli->buffer_count) { ++ int *fd = (int *)argp; ++ size_t tmp; + -+/** -+ * kutf_remove_test(): Remove a previously added test function. -+ * @test_func: Test function -+ */ -+static void kutf_remove_test(struct kutf_test_function *test_func) -+{ -+ struct list_head *pos; -+ struct list_head *tmp; ++ /* Allocate area for buffers metadata storage. */ ++ tmp = sizeof(struct kbase_hwcnt_reader_metadata) * ++ cli->buffer_count; ++ cli->dump_buffers_meta = kmalloc(tmp, GFP_KERNEL); ++ if (!cli->dump_buffers_meta) ++ goto error; + -+ list_for_each_safe(pos, tmp, &test_func->variant_list) { -+ struct kutf_test_fixture *test_fix; ++ /* Allocate required number of dumping buffers. */ ++ cli->dump_buffers = (char *)__get_free_pages( ++ GFP_KERNEL | __GFP_ZERO, ++ get_order(cli->dump_size * cli->buffer_count)); ++ if (!cli->dump_buffers) ++ goto error; + -+ test_fix = list_entry(pos, struct kutf_test_fixture, node); -+ kutf_remove_test_variant(test_fix); ++ /* Create descriptor for user-kernel data exchange. */ ++ *fd = anon_inode_getfd( ++ "[mali_vinstr_desc]", ++ &vinstr_client_fops, ++ cli, ++ O_RDONLY | O_CLOEXEC); ++ if (0 > *fd) ++ goto error; ++ } else if (kernel_buffer) { ++ cli->kernel_buffer = kernel_buffer; ++ } else { ++ cli->legacy_buffer = (void __user *)argp; + } + -+ list_del(&test_func->node); -+ debugfs_remove_recursive(test_func->dir); -+ kfree(test_func); -+} ++ atomic_set(&cli->read_idx, 0); ++ atomic_set(&cli->meta_idx, 0); ++ atomic_set(&cli->write_idx, 0); ++ init_waitqueue_head(&cli->waitq); + -+struct kutf_suite *kutf_create_suite_with_filters_and_data( -+ struct kutf_application *app, -+ const char *name, -+ unsigned int fixture_count, -+ void *(*create_fixture)(struct kutf_context *context), -+ void (*remove_fixture)(struct kutf_context *context), -+ unsigned int filters, -+ union kutf_callback_data suite_data) -+{ -+ struct kutf_suite *suite; -+ struct dentry *tmp; ++ vinstr_ctx->nclients++; ++ list_add(&cli->list, &vinstr_ctx->idle_clients); + -+ suite = kmalloc(sizeof(*suite), GFP_KERNEL); -+ if (!suite) { -+ pr_err("Failed to allocate memory when creating suite %s\n", name); -+ goto fail_kmalloc; -+ } ++ mutex_unlock(&vinstr_ctx->lock); + -+ suite->dir = debugfs_create_dir(name, app->dir); -+ if (!suite->dir) { -+ pr_err("Failed to create debugfs directory when adding test %s\n", name); -+ goto fail_debugfs; -+ } ++ return cli; + -+ tmp = debugfs_create_file("type", S_IROTH, suite->dir, "suite\n", -+ &kutf_debugfs_const_string_ops); -+ if (!tmp) { -+ pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name); -+ goto fail_file; ++error: ++ kfree(cli->dump_buffers_meta); ++ if (cli->dump_buffers) ++ free_pages( ++ (unsigned long)cli->dump_buffers, ++ get_order(cli->dump_size * cli->buffer_count)); ++ kfree(cli->accum_buffer); ++ if (!vinstr_ctx->nclients && vinstr_ctx->kctx) { ++ thread = vinstr_ctx->thread; ++ kbasep_vinstr_destroy_kctx(vinstr_ctx); + } ++ kfree(cli); + -+ INIT_LIST_HEAD(&suite->test_list); -+ suite->app = app; -+ suite->name = name; -+ suite->fixture_variants = fixture_count; -+ suite->create_fixture = create_fixture; -+ suite->remove_fixture = remove_fixture; -+ suite->suite_default_flags = filters; -+ suite->suite_data = suite_data; -+ -+ list_add(&suite->node, &app->suite_list); ++ mutex_unlock(&vinstr_ctx->lock); + -+ return suite; ++ /* Thread must be stopped after lock is released. */ ++ if (thread) ++ kthread_stop(thread); + -+fail_file: -+ debugfs_remove_recursive(suite->dir); -+fail_debugfs: -+ kfree(suite); -+fail_kmalloc: + return NULL; +} -+EXPORT_SYMBOL(kutf_create_suite_with_filters_and_data); + -+struct kutf_suite *kutf_create_suite_with_filters( -+ struct kutf_application *app, -+ const char *name, -+ unsigned int fixture_count, -+ void *(*create_fixture)(struct kutf_context *context), -+ void (*remove_fixture)(struct kutf_context *context), -+ unsigned int filters) ++void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli) +{ -+ union kutf_callback_data data; -+ -+ data.ptr_value = NULL; -+ return kutf_create_suite_with_filters_and_data(app, -+ name, -+ fixture_count, -+ create_fixture, -+ remove_fixture, -+ filters, -+ data); -+} -+EXPORT_SYMBOL(kutf_create_suite_with_filters); ++ struct kbase_vinstr_context *vinstr_ctx; ++ struct kbase_vinstr_client *iter, *tmp; ++ struct task_struct *thread = NULL; ++ u32 zerobitmap[4] = { 0 }; ++ int cli_found = 0; + -+struct kutf_suite *kutf_create_suite( -+ struct kutf_application *app, -+ const char *name, -+ unsigned int fixture_count, -+ void *(*create_fixture)(struct kutf_context *context), -+ void (*remove_fixture)(struct kutf_context *context)) -+{ -+ union kutf_callback_data data; ++ KBASE_DEBUG_ASSERT(cli); ++ vinstr_ctx = cli->vinstr_ctx; ++ KBASE_DEBUG_ASSERT(vinstr_ctx); + -+ data.ptr_value = NULL; -+ return kutf_create_suite_with_filters_and_data(app, -+ name, -+ fixture_count, -+ create_fixture, -+ remove_fixture, -+ KUTF_F_TEST_GENERIC, -+ data); -+} -+EXPORT_SYMBOL(kutf_create_suite); ++ mutex_lock(&vinstr_ctx->lock); + -+/** -+ * kutf_destroy_suite() - Destroy a previously added test suite. -+ * @suite: Test suite -+ */ -+static void kutf_destroy_suite(struct kutf_suite *suite) -+{ -+ struct list_head *pos; -+ struct list_head *tmp; ++ list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) { ++ if (iter == cli) { ++ vinstr_ctx->reprogram = true; ++ cli_found = 1; ++ list_del(&iter->list); ++ break; ++ } ++ } ++ if (!cli_found) { ++ list_for_each_entry_safe( ++ iter, tmp, &vinstr_ctx->waiting_clients, list) { ++ if (iter == cli) { ++ vinstr_ctx->reprogram = true; ++ cli_found = 1; ++ list_del(&iter->list); ++ break; ++ } ++ } ++ } ++ KBASE_DEBUG_ASSERT(cli_found); + -+ list_for_each_safe(pos, tmp, &suite->test_list) { -+ struct kutf_test_function *test_func; ++ kfree(cli->dump_buffers_meta); ++ free_pages( ++ (unsigned long)cli->dump_buffers, ++ get_order(cli->dump_size * cli->buffer_count)); ++ kfree(cli->accum_buffer); ++ kfree(cli); + -+ test_func = list_entry(pos, struct kutf_test_function, node); -+ kutf_remove_test(test_func); ++ vinstr_ctx->nclients--; ++ if (!vinstr_ctx->nclients) { ++ thread = vinstr_ctx->thread; ++ kbasep_vinstr_destroy_kctx(vinstr_ctx); + } + -+ list_del(&suite->node); -+ debugfs_remove_recursive(suite->dir); -+ kfree(suite); ++ /* Rebuild context bitmap now that the client has detached */ ++ hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap); ++ list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) ++ hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap); ++ list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) ++ hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap); ++ ++ mutex_unlock(&vinstr_ctx->lock); ++ ++ /* Thread must be stopped after lock is released. */ ++ if (thread) ++ kthread_stop(thread); +} ++KBASE_EXPORT_TEST_API(kbase_vinstr_detach_client); + -+struct kutf_application *kutf_create_application(const char *name) ++/* Accumulate counters in the dump buffer */ ++static void accum_dump_buffer(void *dst, void *src, size_t dump_size) +{ -+ struct kutf_application *app; -+ struct dentry *tmp; ++ size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT; ++ u32 *d = dst; ++ u32 *s = src; ++ size_t i, j; + -+ app = kmalloc(sizeof(*app), GFP_KERNEL); -+ if (!app) { -+ pr_err("Failed to create allocate memory when creating application %s\n", name); -+ goto fail_kmalloc; ++ for (i = 0; i < dump_size; i += block_size) { ++ /* skip over the header block */ ++ d += NR_BYTES_PER_HDR / sizeof(u32); ++ s += NR_BYTES_PER_HDR / sizeof(u32); ++ for (j = 0; j < (block_size - NR_BYTES_PER_HDR) / sizeof(u32); j++) { ++ /* saturate result if addition would result in wraparound */ ++ if (U32_MAX - *d < *s) ++ *d = U32_MAX; ++ else ++ *d += *s; ++ d++; ++ s++; ++ } + } ++} + -+ app->dir = debugfs_create_dir(name, base_dir); -+ if (!app->dir) { -+ pr_err("Failed to create debugfs direcotry when creating application %s\n", name); -+ goto fail_debugfs; -+ } ++/* This is the Midgard v4 patch function. It copies the headers for each ++ * of the defined blocks from the master kernel buffer and then patches up ++ * the performance counter enable mask for each of the blocks to exclude ++ * counters that were not requested by the client. */ ++static void patch_dump_buffer_hdr_v4( ++ struct kbase_vinstr_context *vinstr_ctx, ++ struct kbase_vinstr_client *cli) ++{ ++ u32 *mask; ++ u8 *dst = cli->accum_buffer; ++ u8 *src = vinstr_ctx->cpu_va; ++ u32 nr_cg = vinstr_ctx->kctx->kbdev->gpu_props.num_core_groups; ++ size_t i, group_size, group; ++ enum { ++ SC0_BASE = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, ++ SC1_BASE = 1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, ++ SC2_BASE = 2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, ++ SC3_BASE = 3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, ++ TILER_BASE = 4 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, ++ MMU_L2_BASE = 5 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, ++ JM_BASE = 7 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT ++ }; + -+ tmp = debugfs_create_file("type", S_IROTH, app->dir, "application\n", -+ &kutf_debugfs_const_string_ops); -+ if (!tmp) { -+ pr_err("Failed to create debugfs file \"type\" when creating application %s\n", name); -+ goto fail_file; -+ } ++ group_size = NR_CNT_BLOCKS_PER_GROUP * ++ NR_CNT_PER_BLOCK * ++ NR_BYTES_PER_CNT; ++ for (i = 0; i < nr_cg; i++) { ++ group = i * group_size; ++ /* copy shader core headers */ ++ memcpy(&dst[group + SC0_BASE], &src[group + SC0_BASE], ++ NR_BYTES_PER_HDR); ++ memcpy(&dst[group + SC1_BASE], &src[group + SC1_BASE], ++ NR_BYTES_PER_HDR); ++ memcpy(&dst[group + SC2_BASE], &src[group + SC2_BASE], ++ NR_BYTES_PER_HDR); ++ memcpy(&dst[group + SC3_BASE], &src[group + SC3_BASE], ++ NR_BYTES_PER_HDR); + -+ INIT_LIST_HEAD(&app->suite_list); -+ app->name = name; ++ /* copy tiler header */ ++ memcpy(&dst[group + TILER_BASE], &src[group + TILER_BASE], ++ NR_BYTES_PER_HDR); + -+ return app; ++ /* copy mmu header */ ++ memcpy(&dst[group + MMU_L2_BASE], &src[group + MMU_L2_BASE], ++ NR_BYTES_PER_HDR); + -+fail_file: -+ debugfs_remove_recursive(app->dir); -+fail_debugfs: -+ kfree(app); -+fail_kmalloc: -+ return NULL; -+} -+EXPORT_SYMBOL(kutf_create_application); ++ /* copy job manager header */ ++ memcpy(&dst[group + JM_BASE], &src[group + JM_BASE], ++ NR_BYTES_PER_HDR); + -+void kutf_destroy_application(struct kutf_application *app) -+{ -+ struct list_head *pos; -+ struct list_head *tmp; ++ /* patch the shader core enable mask */ ++ mask = (u32 *)&dst[group + SC0_BASE + PRFCNT_EN_MASK_OFFSET]; ++ *mask &= cli->bitmap[SHADER_HWCNT_BM]; ++ mask = (u32 *)&dst[group + SC1_BASE + PRFCNT_EN_MASK_OFFSET]; ++ *mask &= cli->bitmap[SHADER_HWCNT_BM]; ++ mask = (u32 *)&dst[group + SC2_BASE + PRFCNT_EN_MASK_OFFSET]; ++ *mask &= cli->bitmap[SHADER_HWCNT_BM]; ++ mask = (u32 *)&dst[group + SC3_BASE + PRFCNT_EN_MASK_OFFSET]; ++ *mask &= cli->bitmap[SHADER_HWCNT_BM]; + -+ list_for_each_safe(pos, tmp, &app->suite_list) { -+ struct kutf_suite *suite; ++ /* patch the tiler core enable mask */ ++ mask = (u32 *)&dst[group + TILER_BASE + PRFCNT_EN_MASK_OFFSET]; ++ *mask &= cli->bitmap[TILER_HWCNT_BM]; + -+ suite = list_entry(pos, struct kutf_suite, node); -+ kutf_destroy_suite(suite); -+ } ++ /* patch the mmu core enable mask */ ++ mask = (u32 *)&dst[group + MMU_L2_BASE + PRFCNT_EN_MASK_OFFSET]; ++ *mask &= cli->bitmap[MMU_L2_HWCNT_BM]; + -+ debugfs_remove_recursive(app->dir); -+ kfree(app); ++ /* patch the job manager enable mask */ ++ mask = (u32 *)&dst[group + JM_BASE + PRFCNT_EN_MASK_OFFSET]; ++ *mask &= cli->bitmap[JM_HWCNT_BM]; ++ } +} -+EXPORT_SYMBOL(kutf_destroy_application); + -+static struct kutf_context *kutf_create_context( -+ struct kutf_test_fixture *test_fix) ++/* This is the Midgard v5 patch function. It copies the headers for each ++ * of the defined blocks from the master kernel buffer and then patches up ++ * the performance counter enable mask for each of the blocks to exclude ++ * counters that were not requested by the client. */ ++static void patch_dump_buffer_hdr_v5( ++ struct kbase_vinstr_context *vinstr_ctx, ++ struct kbase_vinstr_client *cli) +{ -+ struct kutf_context *new_context; -+ -+ new_context = kmalloc(sizeof(*new_context), GFP_KERNEL); -+ if (!new_context) { -+ pr_err("Failed to allocate test context"); -+ goto fail_alloc; -+ } ++ struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev; ++ u32 i, nr_l2; ++ u64 core_mask; ++ u32 *mask; ++ u8 *dst = cli->accum_buffer; ++ u8 *src = vinstr_ctx->cpu_va; ++ size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT; + -+ new_context->result_set = kutf_create_result_set(); -+ if (!new_context->result_set) { -+ pr_err("Failed to create resultset"); -+ goto fail_result_set; -+ } ++ /* copy and patch job manager header */ ++ memcpy(dst, src, NR_BYTES_PER_HDR); ++ mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; ++ *mask &= cli->bitmap[JM_HWCNT_BM]; ++ dst += block_size; ++ src += block_size; + -+ new_context->test_fix = test_fix; -+ /* Save the pointer to the suite as the callbacks will require it */ -+ new_context->suite = test_fix->test_func->suite; -+ new_context->status = KUTF_RESULT_UNKNOWN; -+ new_context->expected_status = KUTF_RESULT_UNKNOWN; ++ /* copy and patch tiler header */ ++ memcpy(dst, src, NR_BYTES_PER_HDR); ++ mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; ++ *mask &= cli->bitmap[TILER_HWCNT_BM]; ++ dst += block_size; ++ src += block_size; + -+ kutf_mempool_init(&new_context->fixture_pool); -+ new_context->fixture = NULL; -+ new_context->fixture_index = test_fix->fixture_index; -+ new_context->fixture_name = NULL; -+ new_context->test_data = test_fix->test_func->test_data; ++ /* copy and patch MMU/L2C headers */ ++ nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices; ++ for (i = 0; i < nr_l2; i++) { ++ memcpy(dst, src, NR_BYTES_PER_HDR); ++ mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; ++ *mask &= cli->bitmap[MMU_L2_HWCNT_BM]; ++ dst += block_size; ++ src += block_size; ++ } + -+ return new_context; ++ /* copy and patch shader core headers */ ++ core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; ++ while (0ull != core_mask) { ++ memcpy(dst, src, NR_BYTES_PER_HDR); ++ if (0ull != (core_mask & 1ull)) { ++ /* if block is not reserved update header */ ++ mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; ++ *mask &= cli->bitmap[SHADER_HWCNT_BM]; ++ } ++ dst += block_size; ++ src += block_size; + -+fail_result_set: -+ kfree(new_context); -+fail_alloc: -+ return NULL; ++ core_mask >>= 1; ++ } +} + -+static void kutf_destroy_context(struct kutf_context *context) ++/** ++ * accum_clients - accumulate dumped hw counters for all known clients ++ * @vinstr_ctx: vinstr context ++ */ ++static void accum_clients(struct kbase_vinstr_context *vinstr_ctx) +{ -+ kutf_destroy_result_set(context->result_set); -+ kutf_mempool_destroy(&context->fixture_pool); -+ kfree(context); -+} ++ struct kbase_vinstr_client *iter; ++ int v4 = 0; + -+static void kutf_set_result(struct kutf_context *context, -+ enum kutf_result_status status) -+{ -+ context->status = status; -+} ++#ifndef CONFIG_MALI_NO_MALI ++ v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4); ++#endif + -+static void kutf_set_expected_result(struct kutf_context *context, -+ enum kutf_result_status expected_status) -+{ -+ context->expected_status = expected_status; ++ list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) { ++ /* Don't bother accumulating clients whose hwcnt requests ++ * have not yet been honoured. */ ++ if (iter->pending) ++ continue; ++ if (v4) ++ patch_dump_buffer_hdr_v4(vinstr_ctx, iter); ++ else ++ patch_dump_buffer_hdr_v5(vinstr_ctx, iter); ++ accum_dump_buffer( ++ iter->accum_buffer, ++ vinstr_ctx->cpu_va, ++ iter->dump_size); ++ } ++ list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) { ++ /* Don't bother accumulating clients whose hwcnt requests ++ * have not yet been honoured. */ ++ if (iter->pending) ++ continue; ++ if (v4) ++ patch_dump_buffer_hdr_v4(vinstr_ctx, iter); ++ else ++ patch_dump_buffer_hdr_v5(vinstr_ctx, iter); ++ accum_dump_buffer( ++ iter->accum_buffer, ++ vinstr_ctx->cpu_va, ++ iter->dump_size); ++ } +} + ++/*****************************************************************************/ ++ +/** -+ * kutf_test_log_result() - Log a result for the specified test context -+ * @context: Test context -+ * @message: Result string -+ * @new_status: Result status ++ * kbasep_vinstr_get_timestamp - return timestamp ++ * ++ * Function returns timestamp value based on raw monotonic timer. Value will ++ * wrap around zero in case of overflow. ++ * ++ * Return: timestamp value + */ -+static void kutf_test_log_result( -+ struct kutf_context *context, -+ const char *message, -+ enum kutf_result_status new_status) ++static u64 kbasep_vinstr_get_timestamp(void) +{ -+ if (context->status < new_status) -+ context->status = new_status; -+ -+ if (context->expected_status != new_status) -+ kutf_add_result(&context->fixture_pool, context->result_set, -+ new_status, message); -+} ++ struct timespec64 ts; + -+void kutf_test_log_result_external( -+ struct kutf_context *context, -+ const char *message, -+ enum kutf_result_status new_status) -+{ -+ kutf_test_log_result(context, message, new_status); ++ ktime_get_raw_ts64(&ts); ++ return (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec; +} -+EXPORT_SYMBOL(kutf_test_log_result_external); + -+void kutf_test_expect_abort(struct kutf_context *context) ++/** ++ * kbasep_vinstr_add_dump_request - register client's dumping request ++ * @cli: requesting client ++ * @waiting_clients: list of pending dumping requests ++ */ ++static void kbasep_vinstr_add_dump_request( ++ struct kbase_vinstr_client *cli, ++ struct list_head *waiting_clients) +{ -+ kutf_set_expected_result(context, KUTF_RESULT_ABORT); -+} -+EXPORT_SYMBOL(kutf_test_expect_abort); ++ struct kbase_vinstr_client *tmp; + -+void kutf_test_expect_fatal(struct kutf_context *context) -+{ -+ kutf_set_expected_result(context, KUTF_RESULT_FATAL); ++ if (list_empty(waiting_clients)) { ++ list_add(&cli->list, waiting_clients); ++ return; ++ } ++ list_for_each_entry(tmp, waiting_clients, list) { ++ if (tmp->dump_time > cli->dump_time) { ++ list_add_tail(&cli->list, &tmp->list); ++ return; ++ } ++ } ++ list_add_tail(&cli->list, waiting_clients); +} -+EXPORT_SYMBOL(kutf_test_expect_fatal); + -+void kutf_test_expect_fail(struct kutf_context *context) ++/** ++ * kbasep_vinstr_collect_and_accumulate - collect hw counters via low level ++ * dump and accumulate them for known ++ * clients ++ * @vinstr_ctx: vinstr context ++ * @timestamp: pointer where collection timestamp will be recorded ++ * ++ * Return: zero on success ++ */ ++static int kbasep_vinstr_collect_and_accumulate( ++ struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp) +{ -+ kutf_set_expected_result(context, KUTF_RESULT_FAIL); -+} -+EXPORT_SYMBOL(kutf_test_expect_fail); ++ unsigned long flags; ++ int rcode; + -+void kutf_test_expect_warn(struct kutf_context *context) -+{ -+ kutf_set_expected_result(context, KUTF_RESULT_WARN); -+} -+EXPORT_SYMBOL(kutf_test_expect_warn); ++#ifdef CONFIG_MALI_NO_MALI ++ /* The dummy model needs the CPU mapping. */ ++ gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va); ++#endif + -+void kutf_test_expect_pass(struct kutf_context *context) -+{ -+ kutf_set_expected_result(context, KUTF_RESULT_PASS); -+} -+EXPORT_SYMBOL(kutf_test_expect_pass); ++ spin_lock_irqsave(&vinstr_ctx->state_lock, flags); ++ if (VINSTR_IDLE != vinstr_ctx->state) { ++ spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); ++ return -EAGAIN; ++ } else { ++ vinstr_ctx->state = VINSTR_DUMPING; ++ } ++ spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + -+void kutf_test_skip(struct kutf_context *context) -+{ -+ kutf_set_result(context, KUTF_RESULT_SKIP); -+ kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN); ++ /* Request HW counters dump. ++ * Disable preemption to make dump timestamp more accurate. */ ++ preempt_disable(); ++ *timestamp = kbasep_vinstr_get_timestamp(); ++ rcode = kbase_instr_hwcnt_request_dump(vinstr_ctx->kctx); ++ preempt_enable(); + -+ kutf_test_log_result(context, "Test skipped", KUTF_RESULT_SKIP); -+} -+EXPORT_SYMBOL(kutf_test_skip); ++ if (!rcode) ++ rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx); ++ WARN_ON(rcode); + -+void kutf_test_skip_msg(struct kutf_context *context, const char *message) -+{ -+ kutf_set_result(context, KUTF_RESULT_SKIP); -+ kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN); ++ spin_lock_irqsave(&vinstr_ctx->state_lock, flags); ++ switch (vinstr_ctx->state) ++ { ++ case VINSTR_SUSPENDING: ++ schedule_work(&vinstr_ctx->suspend_work); ++ break; ++ case VINSTR_DUMPING: ++ vinstr_ctx->state = VINSTR_IDLE; ++ wake_up_all(&vinstr_ctx->suspend_waitq); ++ break; ++ default: ++ break; ++ } ++ spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + -+ kutf_test_log_result(context, kutf_dsprintf(&context->fixture_pool, -+ "Test skipped: %s", message), KUTF_RESULT_SKIP); -+ kutf_test_log_result(context, "!!!Test skipped!!!", KUTF_RESULT_SKIP); -+} -+EXPORT_SYMBOL(kutf_test_skip_msg); ++ /* Accumulate values of collected counters. */ ++ if (!rcode) ++ accum_clients(vinstr_ctx); + -+void kutf_test_debug(struct kutf_context *context, char const *message) -+{ -+ kutf_test_log_result(context, message, KUTF_RESULT_DEBUG); ++ return rcode; +} -+EXPORT_SYMBOL(kutf_test_debug); + -+void kutf_test_pass(struct kutf_context *context, char const *message) ++/** ++ * kbasep_vinstr_fill_dump_buffer - copy accumulated counters to empty kernel ++ * buffer ++ * @cli: requesting client ++ * @timestamp: timestamp when counters were collected ++ * @event_id: id of event that caused triggered counters collection ++ * ++ * Return: zero on success ++ */ ++static int kbasep_vinstr_fill_dump_buffer( ++ struct kbase_vinstr_client *cli, u64 timestamp, ++ enum base_hwcnt_reader_event event_id) +{ -+ static const char explicit_message[] = "(explicit pass)"; ++ unsigned int write_idx = atomic_read(&cli->write_idx); ++ unsigned int read_idx = atomic_read(&cli->read_idx); + -+ if (!message) -+ message = explicit_message; ++ struct kbase_hwcnt_reader_metadata *meta; ++ void *buffer; + -+ kutf_test_log_result(context, message, KUTF_RESULT_PASS); -+} -+EXPORT_SYMBOL(kutf_test_pass); ++ /* Check if there is a place to copy HWC block into. */ ++ if (write_idx - read_idx == cli->buffer_count) ++ return -1; ++ write_idx %= cli->buffer_count; + -+void kutf_test_info(struct kutf_context *context, char const *message) -+{ -+ kutf_test_log_result(context, message, KUTF_RESULT_INFO); ++ /* Fill in dump buffer and its metadata. */ ++ buffer = &cli->dump_buffers[write_idx * cli->dump_size]; ++ meta = &cli->dump_buffers_meta[write_idx]; ++ meta->timestamp = timestamp; ++ meta->event_id = event_id; ++ meta->buffer_idx = write_idx; ++ memcpy(buffer, cli->accum_buffer, cli->dump_size); ++ return 0; +} -+EXPORT_SYMBOL(kutf_test_info); + -+void kutf_test_warn(struct kutf_context *context, char const *message) ++/** ++ * kbasep_vinstr_fill_dump_buffer_legacy - copy accumulated counters to buffer ++ * allocated in userspace ++ * @cli: requesting client ++ * ++ * Return: zero on success ++ * ++ * This is part of legacy ioctl interface. ++ */ ++static int kbasep_vinstr_fill_dump_buffer_legacy( ++ struct kbase_vinstr_client *cli) +{ -+ kutf_test_log_result(context, message, KUTF_RESULT_WARN); -+} -+EXPORT_SYMBOL(kutf_test_warn); ++ void __user *buffer = cli->legacy_buffer; ++ int rcode; + -+void kutf_test_fail(struct kutf_context *context, char const *message) -+{ -+ kutf_test_log_result(context, message, KUTF_RESULT_FAIL); ++ /* Copy data to user buffer. */ ++ rcode = copy_to_user(buffer, cli->accum_buffer, cli->dump_size); ++ if (rcode) ++ pr_warn("error while copying buffer to user\n"); ++ return rcode; +} -+EXPORT_SYMBOL(kutf_test_fail); + -+void kutf_test_fatal(struct kutf_context *context, char const *message) ++/** ++ * kbasep_vinstr_fill_dump_buffer_kernel - copy accumulated counters to buffer ++ * allocated in kernel space ++ * @cli: requesting client ++ * ++ * Return: zero on success ++ * ++ * This is part of the kernel client interface. ++ */ ++static int kbasep_vinstr_fill_dump_buffer_kernel( ++ struct kbase_vinstr_client *cli) +{ -+ kutf_test_log_result(context, message, KUTF_RESULT_FATAL); -+} -+EXPORT_SYMBOL(kutf_test_fatal); ++ memcpy(cli->kernel_buffer, cli->accum_buffer, cli->dump_size); + -+void kutf_test_abort(struct kutf_context *context) -+{ -+ kutf_test_log_result(context, "", KUTF_RESULT_ABORT); ++ return 0; +} -+EXPORT_SYMBOL(kutf_test_abort); + +/** -+ * init_kutf_core() - Module entry point. -+ * -+ * Create the base entry point in debugfs. ++ * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst ++ * @vinstr_ctx: vinstr context + */ -+static int __init init_kutf_core(void) ++static void kbasep_vinstr_reprogram( ++ struct kbase_vinstr_context *vinstr_ctx) +{ -+ int ret; ++ unsigned long flags; ++ bool suspended = false; + -+ base_dir = debugfs_create_dir("kutf_tests", NULL); -+ if (!base_dir) { -+ ret = -ENODEV; -+ goto exit_dir; -+ } ++ /* Don't enable hardware counters if vinstr is suspended. */ ++ spin_lock_irqsave(&vinstr_ctx->state_lock, flags); ++ if (VINSTR_IDLE != vinstr_ctx->state) ++ suspended = true; ++ spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); ++ if (suspended) ++ return; + -+ return 0; ++ /* Change to suspended state is done while holding vinstr context ++ * lock. Below code will then no re-enable the instrumentation. */ + -+exit_dir: -+ return ret; ++ if (vinstr_ctx->reprogram) { ++ struct kbase_vinstr_client *iter; ++ ++ if (!reprogram_hwcnt(vinstr_ctx)) { ++ vinstr_ctx->reprogram = false; ++ list_for_each_entry( ++ iter, ++ &vinstr_ctx->idle_clients, ++ list) ++ iter->pending = false; ++ list_for_each_entry( ++ iter, ++ &vinstr_ctx->waiting_clients, ++ list) ++ iter->pending = false; ++ } ++ } +} + +/** -+ * exit_kutf_core() - Module exit point. ++ * kbasep_vinstr_update_client - copy accumulated counters to user readable ++ * buffer and notify the user ++ * @cli: requesting client ++ * @timestamp: timestamp when counters were collected ++ * @event_id: id of event that caused triggered counters collection + * -+ * Remove the base entry point in debugfs. ++ * Return: zero on success + */ -+static void __exit exit_kutf_core(void) ++static int kbasep_vinstr_update_client( ++ struct kbase_vinstr_client *cli, u64 timestamp, ++ enum base_hwcnt_reader_event event_id) +{ -+ debugfs_remove_recursive(base_dir); -+} ++ int rcode = 0; + -+#else /* defined(CONFIG_DEBUG_FS) */ ++ /* Copy collected counters to user readable buffer. */ ++ if (cli->buffer_count) ++ rcode = kbasep_vinstr_fill_dump_buffer( ++ cli, timestamp, event_id); ++ else if (cli->kernel_buffer) ++ rcode = kbasep_vinstr_fill_dump_buffer_kernel(cli); ++ else ++ rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli); ++ ++ if (rcode) ++ goto exit; ++ ++ ++ /* Notify client. Make sure all changes to memory are visible. */ ++ wmb(); ++ atomic_inc(&cli->write_idx); ++ wake_up_interruptible(&cli->waitq); ++ ++ /* Prepare for next request. */ ++ memset(cli->accum_buffer, 0, cli->dump_size); ++ ++exit: ++ return rcode; ++} + +/** -+ * init_kutf_core() - Module entry point. ++ * kbasep_vinstr_wake_up_callback - vinstr wake up timer wake up function + * -+ * Stub for when build against a kernel without debugfs support ++ * @hrtimer: high resolution timer ++ * ++ * Return: High resolution timer restart enum. + */ -+static int __init init_kutf_core(void) ++static enum hrtimer_restart kbasep_vinstr_wake_up_callback( ++ struct hrtimer *hrtimer) +{ -+ pr_debug("KUTF requires a kernel with debug fs support"); ++ struct kbasep_vinstr_wake_up_timer *timer = ++ container_of( ++ hrtimer, ++ struct kbasep_vinstr_wake_up_timer, ++ hrtimer); + -+ return -ENODEV; ++ KBASE_DEBUG_ASSERT(timer); ++ ++ atomic_set(&timer->vinstr_ctx->request_pending, 1); ++ wake_up_all(&timer->vinstr_ctx->waitq); ++ ++ return HRTIMER_NORESTART; +} + ++#ifdef CONFIG_DEBUG_OBJECT_TIMERS ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0)) +/** -+ * exit_kutf_core() - Module exit point. ++ * kbase_destroy_hrtimer_on_stack - kernel's destroy_hrtimer_on_stack(), ++ * rewritten + * -+ * Stub for when build against a kernel without debugfs support ++ * @timer: high resolution timer ++ * ++ * destroy_hrtimer_on_stack() was exported only for 4.7.0 kernel so for ++ * earlier kernel versions it is not possible to call it explicitly. ++ * Since this function must accompany hrtimer_init_on_stack(), which ++ * has to be used for hrtimer initialization if CONFIG_DEBUG_OBJECT_TIMERS ++ * is defined in order to avoid the warning about object on stack not being ++ * annotated, we rewrite it here to be used for earlier kernel versions. + */ -+static void __exit exit_kutf_core(void) ++static void kbase_destroy_hrtimer_on_stack(struct hrtimer *timer) +{ ++ debug_object_free(timer, &hrtimer_debug_descr); +} -+#endif /* defined(CONFIG_DEBUG_FS) */ -+ -+MODULE_LICENSE("GPL"); ++#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0) */ ++#endif /* CONFIG_DEBUG_OBJECT_TIMERS */ + -+module_init(init_kutf_core); -+module_exit(exit_kutf_core); -diff --git a/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c b/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c -new file mode 100644 -index 000000000..a429a2dbf ---- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c -@@ -0,0 +1,71 @@ -+/* -+ * -+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. ++/** ++ * kbasep_vinstr_service_task - HWC dumping service thread + * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ * @data: Pointer to vinstr context structure. + * ++ * Return: Always returns zero. + */ ++static int kbasep_vinstr_service_task(void *data) ++{ ++ struct kbase_vinstr_context *vinstr_ctx = data; ++ struct kbasep_vinstr_wake_up_timer timer; + ++ KBASE_DEBUG_ASSERT(vinstr_ctx); + ++ hrtimer_init_on_stack(&timer.hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + -+/* Kernel UTF utility functions */ ++ timer.hrtimer.function = kbasep_vinstr_wake_up_callback; ++ timer.vinstr_ctx = vinstr_ctx; + -+#include -+#include -+#include -+#include ++ while (!kthread_should_stop()) { ++ struct kbase_vinstr_client *cli = NULL; ++ struct kbase_vinstr_client *tmp; ++ int rcode; + -+#include -+#include ++ u64 timestamp = kbasep_vinstr_get_timestamp(); ++ u64 dump_time = 0; ++ struct list_head expired_requests; + -+static char tmp_buffer[KUTF_MAX_DSPRINTF_LEN]; ++ /* Hold lock while performing operations on lists of clients. */ ++ mutex_lock(&vinstr_ctx->lock); + -+DEFINE_MUTEX(buffer_lock); ++ /* Closing thread must not interact with client requests. */ ++ if (current == vinstr_ctx->thread) { ++ atomic_set(&vinstr_ctx->request_pending, 0); + -+const char *kutf_dsprintf(struct kutf_mempool *pool, -+ const char *fmt, ...) -+{ -+ va_list args; -+ int len; -+ int size; -+ void *buffer; ++ if (!list_empty(&vinstr_ctx->waiting_clients)) { ++ cli = list_first_entry( ++ &vinstr_ctx->waiting_clients, ++ struct kbase_vinstr_client, ++ list); ++ dump_time = cli->dump_time; ++ } ++ } + -+ mutex_lock(&buffer_lock); -+ va_start(args, fmt); -+ len = vsnprintf(tmp_buffer, sizeof(tmp_buffer), fmt, args); -+ va_end(args); ++ if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) { ++ mutex_unlock(&vinstr_ctx->lock); + -+ if (len < 0) { -+ pr_err("kutf_dsprintf: Bad format dsprintf format %s\n", fmt); -+ goto fail_format; -+ } ++ /* Sleep until next dumping event or service request. */ ++ if (cli) { ++ u64 diff = dump_time - timestamp; + -+ if (len >= sizeof(tmp_buffer)) { -+ pr_warn("kutf_dsprintf: Truncated dsprintf message %s\n", fmt); -+ size = sizeof(tmp_buffer); -+ } else { -+ size = len + 1; -+ } ++ hrtimer_start( ++ &timer.hrtimer, ++ ns_to_ktime(diff), ++ HRTIMER_MODE_REL); ++ } ++ wait_event( ++ vinstr_ctx->waitq, ++ atomic_read( ++ &vinstr_ctx->request_pending) || ++ kthread_should_stop()); ++ hrtimer_cancel(&timer.hrtimer); ++ continue; ++ } + -+ buffer = kutf_mempool_alloc(pool, size); -+ if (!buffer) -+ goto fail_alloc; ++ rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, ++ ×tamp); + -+ memcpy(buffer, tmp_buffer, size); -+ mutex_unlock(&buffer_lock); ++ INIT_LIST_HEAD(&expired_requests); + -+ return buffer; ++ /* Find all expired requests. */ ++ list_for_each_entry_safe( ++ cli, ++ tmp, ++ &vinstr_ctx->waiting_clients, ++ list) { ++ s64 tdiff = ++ (s64)(timestamp + DUMPING_RESOLUTION) - ++ (s64)cli->dump_time; ++ if (tdiff >= 0ll) { ++ list_del(&cli->list); ++ list_add(&cli->list, &expired_requests); ++ } else { ++ break; ++ } ++ } + -+fail_alloc: -+fail_format: -+ mutex_unlock(&buffer_lock); -+ return NULL; -+} -+EXPORT_SYMBOL(kutf_dsprintf); -diff --git a/drivers/gpu/arm/midgard/tests/kutf/sconscript b/drivers/gpu/arm/midgard/tests/kutf/sconscript -new file mode 100755 -index 000000000..d7f112448 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/kutf/sconscript -@@ -0,0 +1,21 @@ -+# -+# (C) COPYRIGHT 2014-2016, 2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++ /* Fill data for each request found. */ ++ list_for_each_entry_safe(cli, tmp, &expired_requests, list) { ++ /* Ensure that legacy buffer will not be used from ++ * this kthread context. */ ++ BUG_ON(0 == cli->buffer_count); ++ /* Expect only periodically sampled clients. */ ++ BUG_ON(0 == cli->dump_interval); + ++ if (!rcode) ++ kbasep_vinstr_update_client( ++ cli, ++ timestamp, ++ BASE_HWCNT_READER_EVENT_PERIODIC); + -+Import('kutf_env') ++ /* Set new dumping time. Drop missed probing times. */ ++ do { ++ cli->dump_time += cli->dump_interval; ++ } while (cli->dump_time < timestamp); + -+make_args = kutf_env.kernel_get_config_defines(ret_list = True) ++ list_del(&cli->list); ++ kbasep_vinstr_add_dump_request( ++ cli, ++ &vinstr_ctx->waiting_clients); ++ } + -+mod = kutf_env.BuildKernelModule('$STATIC_LIB_PATH/kutf.ko', Glob('*.c'), make_args = make_args) -+kutf_env.KernelObjTarget('kutf', mod) -diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild -new file mode 100755 -index 000000000..0cd9cebe9 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild -@@ -0,0 +1,20 @@ -+# -+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++ /* Reprogram counters set if required. */ ++ kbasep_vinstr_reprogram(vinstr_ctx); + ++ mutex_unlock(&vinstr_ctx->lock); ++ } + -+ccflags-y += -I$(src)/../include -I$(src)/../../../ -I$(src)/../../ -I$(src)/../../backend/gpu -I$(srctree)/drivers/staging/android ++#ifdef CONFIG_DEBUG_OBJECTS_TIMERS ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0)) ++ kbase_destroy_hrtimer_on_stack(&timer.hrtimer); ++#else ++ destroy_hrtimer_on_stack(&timer.hrtimer); ++#endif /* (LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0)) */ ++#endif /* CONFIG_DEBUG_OBJECTS_TIMERS */ + -+obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test.o ++ return 0; ++} + -+mali_kutf_irq_test-y := mali_kutf_irq_test_main.o -diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig -new file mode 100644 -index 000000000..16f68d15c ---- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig -@@ -0,0 +1,23 @@ -+# -+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++/*****************************************************************************/ + ++/** ++ * kbasep_vinstr_hwcnt_reader_buffer_ready - check if client has ready buffers ++ * @cli: pointer to vinstr client structure ++ * ++ * Return: non-zero if client has at least one dumping buffer filled that was ++ * not notified to user yet ++ */ ++static int kbasep_vinstr_hwcnt_reader_buffer_ready( ++ struct kbase_vinstr_client *cli) ++{ ++ KBASE_DEBUG_ASSERT(cli); ++ return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx); ++} + -+config MALI_IRQ_LATENCY -+ tristate "Mali GPU IRQ latency measurement" -+ depends on MALI_MIDGARD && MALI_DEBUG && MALI_KUTF -+ default n -+ help -+ This option will build a test module mali_kutf_irq_test that -+ can determine the latency of the Mali GPU IRQ on your system. -+ Choosing M here will generate a single module called mali_kutf_irq_test. -diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile -new file mode 100644 -index 000000000..4e948767a ---- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile -@@ -0,0 +1,51 @@ -+# -+# (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++/** ++ * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer - hwcnt reader's ioctl command ++ * @cli: pointer to vinstr client structure ++ * @buffer: pointer to userspace buffer ++ * @size: size of buffer ++ * ++ * Return: zero on success ++ */ ++static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( ++ struct kbase_vinstr_client *cli, void __user *buffer, ++ size_t size) ++{ ++ unsigned int meta_idx = atomic_read(&cli->meta_idx); ++ unsigned int idx = meta_idx % cli->buffer_count; ++ ++ struct kbase_hwcnt_reader_metadata *meta = &cli->dump_buffers_meta[idx]; + ++ /* Metadata sanity check. */ ++ KBASE_DEBUG_ASSERT(idx == meta->buffer_idx); + -+# linux build system bootstrap for out-of-tree module ++ if (sizeof(struct kbase_hwcnt_reader_metadata) != size) ++ return -EINVAL; + -+# default to building for the host -+ARCH ?= $(shell uname -m) ++ /* Check if there is any buffer available. */ ++ if (atomic_read(&cli->write_idx) == meta_idx) ++ return -EAGAIN; + -+ifeq ($(KDIR),) -+$(error Must specify KDIR to point to the kernel to target)) -+endif ++ /* Check if previously taken buffer was put back. */ ++ if (atomic_read(&cli->read_idx) != meta_idx) ++ return -EBUSY; + -+TEST_CCFLAGS := \ -+ -DMALI_DEBUG=$(MALI_DEBUG) \ -+ -DMALI_BACKEND_KERNEL=$(MALI_BACKEND_KERNEL) \ -+ -DMALI_MODEL=$(MALI_MODEL) \ -+ -DMALI_NO_MALI=$(MALI_NO_MALI) \ -+ -DMALI_BASE_QA_LEAK=$(MALI_BASE_QA_LEAK) \ -+ -DMALI_BASE_QA_RESFAIL=$(MALI_BASE_QA_RESFAIL) \ -+ -DMALI_BASE_QA_USE_AFTER_FREE=$(MALI_BASE_QA_USE_AFTER_FREE) \ -+ -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ -+ -DMALI_USE_UMP=$(MALI_USE_UMP) \ -+ -DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \ -+ -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ -+ $(SCONS_CFLAGS) \ -+ -I$(CURDIR)/../include \ -+ -I$(CURDIR)/../../../../../../include \ -+ -I$(CURDIR)/../../../ \ -+ -I$(CURDIR)/../../ \ -+ -I$(CURDIR)/../../backend/gpu \ -+ -I$(CURDIR)/ \ -+ -I$(srctree)/drivers/staging/android \ -+ -I$(srctree)/include/linux ++ /* Copy next available buffer's metadata to user. */ ++ if (copy_to_user(buffer, meta, size)) ++ return -EFAULT; + -+all: -+ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS="$(TEST_CCFLAGS)" KBUILD_EXTRA_SYMBOLS="$(CURDIR)/../kutf/Module.symvers $(CURDIR)/../../Module.symvers" modules ++ atomic_inc(&cli->meta_idx); + -+clean: -+ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean -diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c -new file mode 100644 -index 000000000..e2ff4432b ---- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c -@@ -0,0 +1,257 @@ -+/* -+ * -+ * (C) COPYRIGHT 2016, 2017 ARM Limited. All rights reserved. -+ * -+ * This program is free software and is provided to you under the terms of the -+ * GNU General Public License version 2 as published by the Free Software -+ * Foundation, and any use by you of this program is subject to the terms -+ * of such GNU licence. -+ * -+ * A copy of the licence is included with the program, and can also be obtained -+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+ * Boston, MA 02110-1301, USA. ++ return 0; ++} ++ ++/** ++ * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer - hwcnt reader's ioctl command ++ * @cli: pointer to vinstr client structure ++ * @buffer: pointer to userspace buffer ++ * @size: size of buffer + * ++ * Return: zero on success + */ ++static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( ++ struct kbase_vinstr_client *cli, void __user *buffer, ++ size_t size) ++{ ++ unsigned int read_idx = atomic_read(&cli->read_idx); ++ unsigned int idx = read_idx % cli->buffer_count; + ++ struct kbase_hwcnt_reader_metadata meta; + ++ if (sizeof(struct kbase_hwcnt_reader_metadata) != size) ++ return -EINVAL; + -+#include -+#include -+#include -+ -+#include "mali_kbase.h" -+#include ++ /* Check if any buffer was taken. */ ++ if (atomic_read(&cli->meta_idx) == read_idx) ++ return -EPERM; + -+#include -+#include ++ /* Check if correct buffer is put back. */ ++ if (copy_from_user(&meta, buffer, size)) ++ return -EFAULT; ++ if (idx != meta.buffer_idx) ++ return -EINVAL; + -+/* -+ * This file contains the code which is used for measuring interrupt latency -+ * of the Mali GPU IRQ. In particular, function mali_kutf_irq_latency() is -+ * used with this purpose and it is called within KUTF framework - a kernel -+ * unit test framework. The measured latency provided by this test should -+ * be representative for the latency of the Mali JOB/MMU IRQs as well. -+ */ ++ atomic_inc(&cli->read_idx); + -+/* KUTF test application pointer for this test */ -+struct kutf_application *irq_app; ++ return 0; ++} + +/** -+ * struct kutf_irq_fixture data - test fixture used by the test functions. -+ * @kbdev: kbase device for the GPU. ++ * kbasep_vinstr_hwcnt_reader_ioctl_set_interval - hwcnt reader's ioctl command ++ * @cli: pointer to vinstr client structure ++ * @interval: periodic dumping interval (disable periodic dumping if zero) + * ++ * Return: zero on success + */ -+struct kutf_irq_fixture_data { -+ struct kbase_device *kbdev; -+}; ++static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval( ++ struct kbase_vinstr_client *cli, u32 interval) ++{ ++ struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; + -+#define SEC_TO_NANO(s) ((s)*1000000000LL) ++ KBASE_DEBUG_ASSERT(vinstr_ctx); + -+/* ID for the GPU IRQ */ -+#define GPU_IRQ_HANDLER 2 ++ mutex_lock(&vinstr_ctx->lock); + -+#define NR_TEST_IRQS 1000000 ++ list_del(&cli->list); + -+/* IRQ for the test to trigger. Currently MULTIPLE_GPU_FAULTS as we would not -+ * expect to see this in normal use (e.g., when Android is running). */ -+#define TEST_IRQ MULTIPLE_GPU_FAULTS ++ cli->dump_interval = interval; + -+#define IRQ_TIMEOUT HZ ++ /* If interval is non-zero, enable periodic dumping for this client. */ ++ if (cli->dump_interval) { ++ if (DUMPING_RESOLUTION > cli->dump_interval) ++ cli->dump_interval = DUMPING_RESOLUTION; ++ cli->dump_time = ++ kbasep_vinstr_get_timestamp() + cli->dump_interval; + -+/* Kernel API for setting irq throttle hook callback and irq time in us*/ -+extern int kbase_set_custom_irq_handler(struct kbase_device *kbdev, -+ irq_handler_t custom_handler, -+ int irq_type); -+extern irqreturn_t kbase_gpu_irq_handler(int irq, void *data); ++ kbasep_vinstr_add_dump_request( ++ cli, &vinstr_ctx->waiting_clients); + -+static DECLARE_WAIT_QUEUE_HEAD(wait); -+static bool triggered; -+static u64 irq_time; ++ atomic_set(&vinstr_ctx->request_pending, 1); ++ wake_up_all(&vinstr_ctx->waitq); ++ } else { ++ list_add(&cli->list, &vinstr_ctx->idle_clients); ++ } + -+static void *kbase_untag(void *ptr) -+{ -+ return (void *)(((uintptr_t) ptr) & ~3); ++ mutex_unlock(&vinstr_ctx->lock); ++ ++ return 0; +} + +/** -+ * kbase_gpu_irq_custom_handler - Custom IRQ throttle handler -+ * @irq: IRQ number -+ * @data: Data associated with this IRQ -+ * -+ * Return: state of the IRQ ++ * kbasep_vinstr_hwcnt_reader_event_mask - return event mask for event id ++ * @event_id: id of event ++ * Return: event_mask or zero if event is not supported or maskable + */ -+static irqreturn_t kbase_gpu_irq_custom_handler(int irq, void *data) ++static u32 kbasep_vinstr_hwcnt_reader_event_mask( ++ enum base_hwcnt_reader_event event_id) +{ -+ struct kbase_device *kbdev = kbase_untag(data); -+ u32 val; ++ u32 event_mask = 0; + -+ val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL); -+ if (val & TEST_IRQ) { -+ struct timespec64 tval; ++ switch (event_id) { ++ case BASE_HWCNT_READER_EVENT_PREJOB: ++ case BASE_HWCNT_READER_EVENT_POSTJOB: ++ /* These event are maskable. */ ++ event_mask = (1 << event_id); ++ break; + -+ ktime_get_real_ts64(&tval); -+ irq_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec); ++ case BASE_HWCNT_READER_EVENT_MANUAL: ++ case BASE_HWCNT_READER_EVENT_PERIODIC: ++ /* These event are non-maskable. */ ++ default: ++ /* These event are not supported. */ ++ break; ++ } + -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, -+ NULL); ++ return event_mask; ++} + -+ triggered = true; -+ wake_up(&wait); ++/** ++ * kbasep_vinstr_hwcnt_reader_ioctl_enable_event - hwcnt reader's ioctl command ++ * @cli: pointer to vinstr client structure ++ * @event_id: id of event to enable ++ * ++ * Return: zero on success ++ */ ++static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event( ++ struct kbase_vinstr_client *cli, ++ enum base_hwcnt_reader_event event_id) ++{ ++ struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; ++ u32 event_mask; + -+ return IRQ_HANDLED; -+ } ++ KBASE_DEBUG_ASSERT(vinstr_ctx); + -+ /* Trigger main irq handler */ -+ return kbase_gpu_irq_handler(irq, data); ++ event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id); ++ if (!event_mask) ++ return -EINVAL; ++ ++ mutex_lock(&vinstr_ctx->lock); ++ cli->event_mask |= event_mask; ++ mutex_unlock(&vinstr_ctx->lock); ++ ++ return 0; +} + +/** -+ * mali_kutf_irq_default_create_fixture() - Creates the fixture data required -+ * for all the tests in the irq suite. -+ * @context: KUTF context. ++ * kbasep_vinstr_hwcnt_reader_ioctl_disable_event - hwcnt reader's ioctl command ++ * @cli: pointer to vinstr client structure ++ * @event_id: id of event to disable + * -+ * Return: Fixture data created on success or NULL on failure ++ * Return: zero on success + */ -+static void *mali_kutf_irq_default_create_fixture( -+ struct kutf_context *context) ++static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event( ++ struct kbase_vinstr_client *cli, ++ enum base_hwcnt_reader_event event_id) +{ -+ struct kutf_irq_fixture_data *data; -+ -+ data = kutf_mempool_alloc(&context->fixture_pool, -+ sizeof(struct kutf_irq_fixture_data)); ++ struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; ++ u32 event_mask; + -+ if (!data) -+ goto fail; ++ KBASE_DEBUG_ASSERT(vinstr_ctx); + -+ /* Acquire the kbase device */ -+ data->kbdev = kbase_find_device(-1); -+ if (data->kbdev == NULL) { -+ kutf_test_fail(context, "Failed to find kbase device"); -+ goto fail; -+ } ++ event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id); ++ if (!event_mask) ++ return -EINVAL; + -+ return data; ++ mutex_lock(&vinstr_ctx->lock); ++ cli->event_mask &= ~event_mask; ++ mutex_unlock(&vinstr_ctx->lock); + -+fail: -+ return NULL; ++ return 0; +} + +/** -+ * mali_kutf_irq_default_remove_fixture() - Destroy fixture data previously -+ * created by mali_kutf_irq_default_create_fixture. ++ * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver - hwcnt reader's ioctl command ++ * @cli: pointer to vinstr client structure ++ * @hwver: pointer to user buffer where hw version will be stored + * -+ * @context: KUTF context. ++ * Return: zero on success + */ -+static void mali_kutf_irq_default_remove_fixture( -+ struct kutf_context *context) ++static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( ++ struct kbase_vinstr_client *cli, u32 __user *hwver) +{ -+ struct kutf_irq_fixture_data *data = context->fixture; -+ struct kbase_device *kbdev = data->kbdev; ++#ifndef CONFIG_MALI_NO_MALI ++ struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; ++#endif + -+ kbase_release_device(kbdev); ++ u32 ver = 5; ++ ++#ifndef CONFIG_MALI_NO_MALI ++ KBASE_DEBUG_ASSERT(vinstr_ctx); ++ if (kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4)) ++ ver = 4; ++#endif ++ ++ return put_user(ver, hwver); +} + +/** -+ * mali_kutf_irq_latency() - measure GPU IRQ latency -+ * @context: kutf context within which to perform the test -+ * -+ * The test triggers IRQs manually, and measures the -+ * time between triggering the IRQ and the IRQ handler being executed. ++ * kbasep_vinstr_hwcnt_reader_ioctl - hwcnt reader's ioctl ++ * @filp: pointer to file structure ++ * @cmd: user command ++ * @arg: command's argument + * -+ * This is not a traditional test, in that the pass/fail status has little -+ * meaning (other than indicating that the IRQ handler executed at all). Instead -+ * the results are in the latencies provided with the test result. There is no -+ * meaningful pass/fail result that can be obtained here, instead the latencies -+ * are provided for manual analysis only. ++ * Return: zero on success + */ -+static void mali_kutf_irq_latency(struct kutf_context *context) ++static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp, ++ unsigned int cmd, unsigned long arg) +{ -+ struct kutf_irq_fixture_data *data = context->fixture; -+ struct kbase_device *kbdev = data->kbdev; -+ u64 min_time = U64_MAX, max_time = 0, average_time = 0; -+ int i; -+ bool test_failed = false; ++ long rcode = 0; ++ struct kbase_vinstr_client *cli; + -+ /* Force GPU to be powered */ -+ kbase_pm_context_active(kbdev); ++ KBASE_DEBUG_ASSERT(filp); + -+ kbase_set_custom_irq_handler(kbdev, kbase_gpu_irq_custom_handler, -+ GPU_IRQ_HANDLER); ++ cli = filp->private_data; ++ KBASE_DEBUG_ASSERT(cli); + -+ for (i = 0; i < NR_TEST_IRQS; i++) { -+ struct timespec64 tval; -+ u64 start_time; -+ int ret; ++ if (unlikely(KBASE_HWCNT_READER != _IOC_TYPE(cmd))) ++ return -EINVAL; + -+ triggered = false; -+ ktime_get_real_ts64(&tval); -+ start_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec); ++ switch (cmd) { ++ case KBASE_HWCNT_READER_GET_API_VERSION: ++ rcode = put_user(HWCNT_READER_API, (u32 __user *)arg); ++ break; ++ case KBASE_HWCNT_READER_GET_HWVER: ++ rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( ++ cli, (u32 __user *)arg); ++ break; ++ case KBASE_HWCNT_READER_GET_BUFFER_SIZE: ++ KBASE_DEBUG_ASSERT(cli->vinstr_ctx); ++ rcode = put_user( ++ (u32)cli->vinstr_ctx->dump_size, ++ (u32 __user *)arg); ++ break; ++ case KBASE_HWCNT_READER_DUMP: ++ rcode = kbase_vinstr_hwc_dump( ++ cli, BASE_HWCNT_READER_EVENT_MANUAL); ++ break; ++ case KBASE_HWCNT_READER_CLEAR: ++ rcode = kbase_vinstr_hwc_clear(cli); ++ break; ++ case KBASE_HWCNT_READER_GET_BUFFER: ++ rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( ++ cli, (void __user *)arg, _IOC_SIZE(cmd)); ++ break; ++ case KBASE_HWCNT_READER_PUT_BUFFER: ++ rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( ++ cli, (void __user *)arg, _IOC_SIZE(cmd)); ++ break; ++ case KBASE_HWCNT_READER_SET_INTERVAL: ++ rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval( ++ cli, (u32)arg); ++ break; ++ case KBASE_HWCNT_READER_ENABLE_EVENT: ++ rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event( ++ cli, (enum base_hwcnt_reader_event)arg); ++ break; ++ case KBASE_HWCNT_READER_DISABLE_EVENT: ++ rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event( ++ cli, (enum base_hwcnt_reader_event)arg); ++ break; ++ default: ++ rcode = -EINVAL; ++ break; ++ } + -+ /* Trigger fake IRQ */ -+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), -+ TEST_IRQ, NULL); ++ return rcode; ++} + -+ ret = wait_event_timeout(wait, triggered != false, IRQ_TIMEOUT); ++/** ++ * kbasep_vinstr_hwcnt_reader_poll - hwcnt reader's poll ++ * @filp: pointer to file structure ++ * @wait: pointer to poll table ++ * Return: POLLIN if data can be read without blocking, otherwise zero ++ */ ++static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp, ++ poll_table *wait) ++{ ++ struct kbase_vinstr_client *cli; + -+ if (ret == 0) { -+ kutf_test_fail(context, "Timed out waiting for IRQ\n"); -+ test_failed = true; -+ break; -+ } ++ KBASE_DEBUG_ASSERT(filp); ++ KBASE_DEBUG_ASSERT(wait); + -+ if ((irq_time - start_time) < min_time) -+ min_time = irq_time - start_time; -+ if ((irq_time - start_time) > max_time) -+ max_time = irq_time - start_time; -+ average_time += irq_time - start_time; ++ cli = filp->private_data; ++ KBASE_DEBUG_ASSERT(cli); + -+ udelay(10); -+ } ++ poll_wait(filp, &cli->waitq, wait); ++ if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli)) ++ return POLLIN; ++ return 0; ++} + -+ /* Go back to default handler */ -+ kbase_set_custom_irq_handler(kbdev, NULL, GPU_IRQ_HANDLER); ++/** ++ * kbasep_vinstr_hwcnt_reader_mmap - hwcnt reader's mmap ++ * @filp: pointer to file structure ++ * @vma: pointer to vma structure ++ * Return: zero on success ++ */ ++static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp, ++ struct vm_area_struct *vma) ++{ ++ struct kbase_vinstr_client *cli; ++ unsigned long size, addr, pfn, offset; ++ unsigned long vm_size = vma->vm_end - vma->vm_start; + -+ kbase_pm_context_idle(kbdev); ++ KBASE_DEBUG_ASSERT(filp); ++ KBASE_DEBUG_ASSERT(vma); + -+ if (!test_failed) { -+ const char *results; ++ cli = filp->private_data; ++ KBASE_DEBUG_ASSERT(cli); + -+ do_div(average_time, NR_TEST_IRQS); -+ results = kutf_dsprintf(&context->fixture_pool, -+ "Min latency = %lldns, Max latency = %lldns, Average latency = %lldns\n", -+ min_time, max_time, average_time); -+ kutf_test_pass(context, results); -+ } ++ size = cli->buffer_count * cli->dump_size; ++ ++ if (vma->vm_pgoff > (size >> PAGE_SHIFT)) ++ return -EINVAL; ++ ++ offset = vma->vm_pgoff << PAGE_SHIFT; ++ if (vm_size > size - offset) ++ return -EINVAL; ++ ++ addr = __pa((unsigned long)cli->dump_buffers + offset); ++ pfn = addr >> PAGE_SHIFT; ++ ++ return remap_pfn_range( ++ vma, ++ vma->vm_start, ++ pfn, ++ vm_size, ++ vma->vm_page_prot); +} + +/** -+ * Module entry point for this test. ++ * kbasep_vinstr_hwcnt_reader_release - hwcnt reader's release ++ * @inode: pointer to inode structure ++ * @filp: pointer to file structure ++ * Return always return zero + */ -+int mali_kutf_irq_test_main_init(void) ++static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode, ++ struct file *filp) +{ -+ struct kutf_suite *suite; ++ struct kbase_vinstr_client *cli; + -+ irq_app = kutf_create_application("irq"); -+ suite = kutf_create_suite(irq_app, "irq_default", -+ 1, mali_kutf_irq_default_create_fixture, -+ mali_kutf_irq_default_remove_fixture); ++ KBASE_DEBUG_ASSERT(inode); ++ KBASE_DEBUG_ASSERT(filp); + -+ kutf_add_test(suite, 0x0, "irq_latency", -+ mali_kutf_irq_latency); ++ cli = filp->private_data; ++ KBASE_DEBUG_ASSERT(cli); ++ ++ kbase_vinstr_detach_client(cli); + return 0; +} + ++/*****************************************************************************/ ++ +/** -+ * Module exit point for this test. ++ * kbasep_vinstr_kick_scheduler - trigger scheduler cycle ++ * @kbdev: pointer to kbase device structure + */ -+void mali_kutf_irq_test_main_exit(void) ++static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev) +{ -+ kutf_destroy_application(irq_app); -+} ++ struct kbasep_js_device_data *js_devdata = &kbdev->js_data; ++ unsigned long flags; + -+module_init(mali_kutf_irq_test_main_init); -+module_exit(mali_kutf_irq_test_main_exit); ++ down(&js_devdata->schedule_sem); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_backend_slot_update(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ up(&js_devdata->schedule_sem); ++} + -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("ARM Ltd."); -+MODULE_VERSION("1.0"); -diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript -new file mode 100755 -index 000000000..ec837f164 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript -@@ -0,0 +1,30 @@ -+# -+# (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++/** ++ * kbasep_vinstr_suspend_worker - worker suspending vinstr module ++ * @data: pointer to work structure ++ */ ++static void kbasep_vinstr_suspend_worker(struct work_struct *data) ++{ ++ struct kbase_vinstr_context *vinstr_ctx; ++ unsigned long flags; + ++ vinstr_ctx = container_of(data, struct kbase_vinstr_context, ++ suspend_work); + -+import os -+Import('env') ++ mutex_lock(&vinstr_ctx->lock); + -+src = [Glob('#kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/*.c'), Glob('#kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile')] ++ if (vinstr_ctx->kctx) ++ disable_hwcnt(vinstr_ctx); + -+if env.GetOption('clean') : -+ env.Execute(Action("make clean", '[CLEAN] mali_kutf_irq_test')) -+ cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, []) -+ env.KernelObjTarget('mali_kutf_irq_test', cmd) -+else: -+ makeAction=Action("cd ${SOURCE.dir} && make MALI_DEBUG=${debug} MALI_BACKEND_KERNEL=1 MALI_ERROR_INJECT_ON=${error_inject} MALI_MODEL=${mali_model} MALI_NO_MALI=${no_mali} MALI_HW_VERSION=${hwver} MALI_UNIT_TEST=${unit} MALI_USE_UMP=${ump} MALI_CUSTOMER_RELEASE=${release} %s %s && ( ( [ -f mali_kutf_irq_test.ko ] && cp mali_kutf_irq_test.ko $STATIC_LIB_PATH/ ) || touch $STATIC_LIB_PATH/mali_kutf_irq_test.ko)" % (env.base_get_qa_settings(), env.kernel_get_config_defines()), '$MAKECOMSTR') -+ cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, [makeAction]) -+ env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/kutf.ko') -+ env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/mali_kbase.ko') -+ env.KernelObjTarget('mali_kutf_irq_test', cmd) -diff --git a/drivers/gpu/arm/midgard/tests/sconscript b/drivers/gpu/arm/midgard/tests/sconscript -new file mode 100755 -index 000000000..5337e1078 ---- /dev/null -+++ b/drivers/gpu/arm/midgard/tests/sconscript -@@ -0,0 +1,37 @@ -+# -+# (C) COPYRIGHT 2010-2011, 2013, 2017 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# A copy of the licence is included with the program, and can also be obtained -+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -+# Boston, MA 02110-1301, USA. -+# -+# ++ spin_lock_irqsave(&vinstr_ctx->state_lock, flags); ++ vinstr_ctx->state = VINSTR_SUSPENDED; ++ wake_up_all(&vinstr_ctx->suspend_waitq); ++ spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + ++ mutex_unlock(&vinstr_ctx->lock); + -+Import ('env') ++ /* Kick GPU scheduler to allow entering protected mode. ++ * This must happen after vinstr was suspended. */ ++ kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev); ++} + -+kutf_env = env.Clone() -+kutf_env.Append(CPPPATH = '#kernel/drivers/gpu/arm/midgard/tests/include') -+Export('kutf_env') ++/** ++ * kbasep_vinstr_suspend_worker - worker resuming vinstr module ++ * @data: pointer to work structure ++ */ ++static void kbasep_vinstr_resume_worker(struct work_struct *data) ++{ ++ struct kbase_vinstr_context *vinstr_ctx; ++ unsigned long flags; + -+if Glob('internal/sconscript'): -+ SConscript('internal/sconscript') ++ vinstr_ctx = container_of(data, struct kbase_vinstr_context, ++ resume_work); + -+if kutf_env['debug'] == '1': -+ SConscript('kutf/sconscript') -+ SConscript('mali_kutf_irq_test/sconscript') ++ mutex_lock(&vinstr_ctx->lock); + -+ if Glob('kutf_test/sconscript'): -+ SConscript('kutf_test/sconscript') ++ if (vinstr_ctx->kctx) ++ enable_hwcnt(vinstr_ctx); + -+ if Glob('kutf_test_runner/sconscript'): -+ SConscript('kutf_test_runner/sconscript') ++ spin_lock_irqsave(&vinstr_ctx->state_lock, flags); ++ vinstr_ctx->state = VINSTR_IDLE; ++ wake_up_all(&vinstr_ctx->suspend_waitq); ++ spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + -+if env['unit'] == '1': -+ SConscript('mali_kutf_ipa_test/sconscript') -+ SConscript('mali_kutf_vinstr_test/sconscript') -diff --git a/drivers/gpu/arm/sconscript b/drivers/gpu/arm/sconscript -new file mode 100755 -index 000000000..a06092bd5 ---- /dev/null -+++ b/drivers/gpu/arm/sconscript -@@ -0,0 +1,25 @@ -+# -+# (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. -+# -+# This program is free software and is provided to you under the terms of the -+# GNU General Public License version 2 as published by the Free Software -+# Foundation, and any use by you of this program is subject to the terms -+# of such GNU licence. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, you can access it online at -+# http://www.gnu.org/licenses/gpl-2.0.html. -+# -+# SPDX-License-Identifier: GPL-2.0 -+# -+# ++ mutex_unlock(&vinstr_ctx->lock); + -+import glob ++ /* Kick GPU scheduler to allow entering protected mode. ++ * Note that scheduler state machine might requested re-entry to ++ * protected mode before vinstr was resumed. ++ * This must happen after vinstr was release. */ ++ kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev); ++} + ++/*****************************************************************************/ + -+SConscript('midgard/sconscript') -diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig -index ec4abf9ff..10ce5db97 100644 ---- a/drivers/gpu/drm/Kconfig -+++ b/drivers/gpu/drm/Kconfig -@@ -35,6 +35,18 @@ config DRM_MIPI_DBI - depends on DRM - select DRM_KMS_HELPER - -+config DRM_EDID -+ bool "EDID function for DRM" -+ depends on DRM -+ select HDMI -+ default y if !ROCKCHIP_MINI_KERNEL -+ help -+ DRM EDID read and parse function. ++struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev) ++{ ++ struct kbase_vinstr_context *vinstr_ctx; + -+config DRM_IGNORE_IOTCL_PERMIT -+ bool "Ignore drm ioctl permission" -+ depends on DRM && NO_GKI ++ vinstr_ctx = kzalloc(sizeof(*vinstr_ctx), GFP_KERNEL); ++ if (!vinstr_ctx) ++ return NULL; + - config DRM_MIPI_DSI - bool - depends on DRM -@@ -296,7 +308,7 @@ config DRM_VKMS - - source "drivers/gpu/drm/exynos/Kconfig" - --source "drivers/gpu/drm/rockchip/Kconfig" -+source "drivers/gpu/drm/rockchip-oh/Kconfig" - - source "drivers/gpu/drm/vmwgfx/Kconfig" - -diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile -index 215e78e79..dc748c528 100644 ---- a/drivers/gpu/drm/Makefile -+++ b/drivers/gpu/drm/Makefile -@@ -154,7 +154,7 @@ obj-$(CONFIG_DRM_VGEM) += vgem/ - obj-$(CONFIG_DRM_VKMS) += vkms/ - obj-$(CONFIG_DRM_NOUVEAU) +=nouveau/ - obj-$(CONFIG_DRM_EXYNOS) +=exynos/ --obj-$(CONFIG_DRM_ROCKCHIP) +=rockchip/ -+obj-$(CONFIG_DRM_ROCKCHIP) +=rockchip-oh/ - obj-$(CONFIG_DRM_GMA500) += gma500/ - obj-$(CONFIG_DRM_UDL) += udl/ - obj-$(CONFIG_DRM_AST) += ast/ -diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c -index df9370e0f..00127137c 100644 ---- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c -+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c -@@ -8,11 +8,13 @@ - - #include - #include -+#include - #include - #include - #include - #include - #include -+#include - #include - #include - #include -@@ -31,16 +33,52 @@ - - #include "analogix_dp_core.h" - #include "analogix_dp_reg.h" -+#include "../../rockchip-oh/rockchip_drm_drv.h" - - #define to_dp(nm) container_of(nm, struct analogix_dp_device, nm) - - static const bool verify_fast_training; - -+#ifdef CONFIG_NO_GKI -+#undef EXTCON_DISP_DP -+#define EXTCON_DISP_DP EXTCON_DISP_EDP -+#endif ++ INIT_LIST_HEAD(&vinstr_ctx->idle_clients); ++ INIT_LIST_HEAD(&vinstr_ctx->waiting_clients); ++ mutex_init(&vinstr_ctx->lock); ++ spin_lock_init(&vinstr_ctx->state_lock); ++ vinstr_ctx->kbdev = kbdev; ++ vinstr_ctx->thread = NULL; ++ vinstr_ctx->state = VINSTR_IDLE; ++ vinstr_ctx->suspend_cnt = 0; ++ INIT_WORK(&vinstr_ctx->suspend_work, kbasep_vinstr_suspend_worker); ++ INIT_WORK(&vinstr_ctx->resume_work, kbasep_vinstr_resume_worker); ++ init_waitqueue_head(&vinstr_ctx->suspend_waitq); + -+static const unsigned int analogix_dp_cable[] = { -+ EXTCON_DISP_DP, -+ EXTCON_NONE, -+}; ++ atomic_set(&vinstr_ctx->request_pending, 0); ++ init_waitqueue_head(&vinstr_ctx->waitq); + - struct bridge_init { - struct i2c_client *client; - struct device_node *node; - }; - -+static void analogix_dp_bridge_mode_set(struct drm_bridge *bridge, -+ const struct drm_display_mode *adj_mode); ++ return vinstr_ctx; ++} + -+static bool analogix_dp_bandwidth_ok(struct analogix_dp_device *dp, -+ const struct drm_display_mode *mode, -+ unsigned int rate, unsigned int lanes) ++void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx) +{ -+ const struct drm_display_info *info; -+ u32 max_bw, req_bw, bpp = 24; -+ -+ if (dp->plat_data->skip_connector) -+ return true; ++ struct kbase_vinstr_client *cli; + -+ info = &dp->connector.display_info; -+ if (info->bpc) -+ bpp = 3 * info->bpc; ++ /* Stop service thread first. */ ++ if (vinstr_ctx->thread) ++ kthread_stop(vinstr_ctx->thread); + -+ req_bw = mode->clock * bpp / 8; -+ max_bw = lanes * rate; -+ if (req_bw > max_bw) -+ return false; ++ /* Wait for workers. */ ++ flush_work(&vinstr_ctx->suspend_work); ++ flush_work(&vinstr_ctx->resume_work); + -+ return true; -+} ++ while (1) { ++ struct list_head *list = &vinstr_ctx->idle_clients; + - static int analogix_dp_init_dp(struct analogix_dp_device *dp) - { - int ret; -@@ -65,42 +103,56 @@ static int analogix_dp_init_dp(struct analogix_dp_device *dp) - return 0; - } - --static int analogix_dp_detect_hpd(struct analogix_dp_device *dp) -+static int analogix_dp_panel_prepare(struct analogix_dp_device *dp) - { -- int timeout_loop = 0; -+ int ret; - -- while (timeout_loop < DP_TIMEOUT_LOOP_COUNT) { -- if (analogix_dp_get_plug_in_status(dp) == 0) -- return 0; -+ mutex_lock(&dp->panel_lock); - -- timeout_loop++; -- usleep_range(1000, 1100); -- } -+ if (dp->panel_is_prepared) -+ goto out; - -- /* -- * Some edp screen do not have hpd signal, so we can't just -- * return failed when hpd plug in detect failed, DT property -- * "force-hpd" would indicate whether driver need this. -- */ -- if (!dp->force_hpd) -- return -ETIMEDOUT; -+ ret = drm_panel_prepare(dp->plat_data->panel); -+ if (ret) -+ goto out; - -- /* -- * The eDP TRM indicate that if HPD_STATUS(RO) is 0, AUX CH -- * will not work, so we need to give a force hpd action to -- * set HPD_STATUS manually. -- */ -- dev_dbg(dp->dev, "failed to get hpd plug status, try to force hpd\n"); -+ dp->panel_is_prepared = true; ++ if (list_empty(list)) { ++ list = &vinstr_ctx->waiting_clients; ++ if (list_empty(list)) ++ break; ++ } + -+out: -+ mutex_unlock(&dp->panel_lock); -+ return 0; ++ cli = list_first_entry(list, struct kbase_vinstr_client, list); ++ list_del(&cli->list); ++ kfree(cli->accum_buffer); ++ kfree(cli); ++ vinstr_ctx->nclients--; ++ } ++ KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients); ++ if (vinstr_ctx->kctx) ++ kbasep_vinstr_destroy_kctx(vinstr_ctx); ++ kfree(vinstr_ctx); +} + -+static int analogix_dp_panel_unprepare(struct analogix_dp_device *dp) ++int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx, ++ struct kbase_uk_hwcnt_reader_setup *setup) +{ -+ int ret; ++ struct kbase_vinstr_client *cli; ++ u32 bitmap[4]; + -+ mutex_lock(&dp->panel_lock); ++ KBASE_DEBUG_ASSERT(vinstr_ctx); ++ KBASE_DEBUG_ASSERT(setup); ++ KBASE_DEBUG_ASSERT(setup->buffer_count); + -+ if (!dp->panel_is_prepared) -+ goto out; ++ bitmap[SHADER_HWCNT_BM] = setup->shader_bm; ++ bitmap[TILER_HWCNT_BM] = setup->tiler_bm; ++ bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm; ++ bitmap[JM_HWCNT_BM] = setup->jm_bm; + -+ ret = drm_panel_unprepare(dp->plat_data->panel); -+ if (ret) -+ goto out; ++ cli = kbasep_vinstr_attach_client( ++ vinstr_ctx, ++ setup->buffer_count, ++ bitmap, ++ &setup->fd, ++ NULL); + -+ dp->panel_is_prepared = false; ++ if (!cli) ++ return -ENOMEM; + -+out: -+ mutex_unlock(&dp->panel_lock); + return 0; +} - -- analogix_dp_force_hpd(dp); -+static int analogix_dp_detect_hpd(struct analogix_dp_device *dp) ++ ++int kbase_vinstr_legacy_hwc_setup( ++ struct kbase_vinstr_context *vinstr_ctx, ++ struct kbase_vinstr_client **cli, ++ struct kbase_uk_hwcnt_setup *setup) +{ -+ if (dp->force_hpd) -+ analogix_dp_force_hpd(dp); - - if (analogix_dp_get_plug_in_status(dp) != 0) { - dev_err(dp->dev, "failed to get hpd plug in status\n"); - return -EINVAL; - } - -- dev_dbg(dp->dev, "success to get plug in status after force hpd\n"); -- - return 0; - } - -@@ -109,6 +161,9 @@ static bool analogix_dp_detect_sink_psr(struct analogix_dp_device *dp) - unsigned char psr_version; - int ret; - -+ if (!device_property_read_bool(dp->dev, "support-psr")) -+ return 0; ++ KBASE_DEBUG_ASSERT(vinstr_ctx); ++ KBASE_DEBUG_ASSERT(setup); ++ KBASE_DEBUG_ASSERT(cli); + - ret = drm_dp_dpcd_readb(&dp->aux, DP_PSR_SUPPORT, &psr_version); - if (ret != 1) { - dev_err(dp->dev, "failed to get PSR version, disable it\n"); -@@ -217,8 +272,24 @@ static int analogix_dp_set_enhanced_mode(struct analogix_dp_device *dp) - if (ret < 0) - return ret; - -+ if (!data) { -+ /* -+ * A setting of 1 indicates that this is an eDP device that -+ * uses only Enhanced Framing, independently of the setting by -+ * the source of ENHANCED_FRAME_EN -+ */ -+ ret = drm_dp_dpcd_readb(&dp->aux, DP_EDP_CONFIGURATION_CAP, -+ &data); -+ if (ret < 0) -+ return ret; ++ if (setup->dump_buffer) { ++ u32 bitmap[4]; + -+ data = !!(data & DP_FRAMING_CHANGE_CAP); -+ } ++ bitmap[SHADER_HWCNT_BM] = setup->shader_bm; ++ bitmap[TILER_HWCNT_BM] = setup->tiler_bm; ++ bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm; ++ bitmap[JM_HWCNT_BM] = setup->jm_bm; + - analogix_dp_enable_enhanced_mode(dp, data); - -+ dp->link_train.enhanced_framing = data; ++ if (*cli) ++ return -EBUSY; + - return 0; - } - -@@ -234,32 +305,27 @@ static int analogix_dp_training_pattern_dis(struct analogix_dp_device *dp) - return ret < 0 ? ret : 0; - } - --static void --analogix_dp_set_lane_lane_pre_emphasis(struct analogix_dp_device *dp, -- int pre_emphasis, int lane) -+static bool analogix_dp_get_vrr_capable(struct analogix_dp_device *dp) - { -- switch (lane) { -- case 0: -- analogix_dp_set_lane0_pre_emphasis(dp, pre_emphasis); -- break; -- case 1: -- analogix_dp_set_lane1_pre_emphasis(dp, pre_emphasis); -- break; -+ struct drm_connector *connector = &dp->connector; -+ struct drm_display_info *info = &connector->display_info; - -- case 2: -- analogix_dp_set_lane2_pre_emphasis(dp, pre_emphasis); -- break; -+ if (!info->monitor_range.max_vfreq) -+ return false; -+ if (!info->monitor_range.min_vfreq) -+ return false; -+ if (info->monitor_range.max_vfreq < info->monitor_range.min_vfreq) -+ return false; -+ if (!drm_dp_sink_can_do_video_without_timing_msa(dp->dpcd)) -+ return false; - -- case 3: -- analogix_dp_set_lane3_pre_emphasis(dp, pre_emphasis); -- break; -- } -+ return true; - } - - static int analogix_dp_link_start(struct analogix_dp_device *dp) - { - u8 buf[4]; -- int lane, lane_count, pll_tries, retval; -+ int lane, lane_count, retval; - - lane_count = dp->link_train.lane_count; - -@@ -279,6 +345,16 @@ static int analogix_dp_link_start(struct analogix_dp_device *dp) - retval = drm_dp_dpcd_write(&dp->aux, DP_LINK_BW_SET, buf, 2); - if (retval < 0) - return retval; ++ *cli = kbasep_vinstr_attach_client( ++ vinstr_ctx, ++ 0, ++ bitmap, ++ (void *)(long)setup->dump_buffer, ++ NULL); + -+ /* Spread AMP if required, enable 8b/10b coding */ -+ buf[0] = analogix_dp_ssc_supported(dp) ? DP_SPREAD_AMP_0_5 : 0; -+ if (analogix_dp_get_vrr_capable(dp)) -+ buf[0] |= DP_MSA_TIMING_PAR_IGNORE_EN; -+ buf[1] = DP_SET_ANSI_8B10B; -+ retval = drm_dp_dpcd_write(&dp->aux, DP_DOWNSPREAD_CTRL, buf, 2); -+ if (retval < 0) -+ return retval; ++ if (!(*cli)) ++ return -ENOMEM; ++ } else { ++ if (!*cli) ++ return -EINVAL; + - /* set enhanced mode if available */ - retval = analogix_dp_set_enhanced_mode(dp); - if (retval < 0) { -@@ -286,22 +362,12 @@ static int analogix_dp_link_start(struct analogix_dp_device *dp) - return retval; - } - -- /* Set TX pre-emphasis to minimum */ -+ /* Set TX voltage-swing and pre-emphasis to minimum */ - for (lane = 0; lane < lane_count; lane++) -- analogix_dp_set_lane_lane_pre_emphasis(dp, -- PRE_EMPHASIS_LEVEL_0, lane); -- -- /* Wait for PLL lock */ -- pll_tries = 0; -- while (analogix_dp_get_pll_lock_status(dp) == PLL_UNLOCKED) { -- if (pll_tries == DP_TIMEOUT_LOOP_COUNT) { -- dev_err(dp->dev, "Wait for PLL lock timed out\n"); -- return -ETIMEDOUT; -- } -- -- pll_tries++; -- usleep_range(90, 120); -- } -+ dp->link_train.training_lane[lane] = -+ DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | -+ DP_TRAIN_PRE_EMPH_LEVEL_0; -+ analogix_dp_set_lane_link_training(dp); - - /* Set training pattern 1 */ - analogix_dp_set_training_pattern(dp, TRAINING_PTN1); -@@ -384,54 +450,6 @@ static unsigned char analogix_dp_get_adjust_request_pre_emphasis( - return ((link_value >> shift) & 0xc) >> 2; - } - --static void analogix_dp_set_lane_link_training(struct analogix_dp_device *dp, -- u8 training_lane_set, int lane) --{ -- switch (lane) { -- case 0: -- analogix_dp_set_lane0_link_training(dp, training_lane_set); -- break; -- case 1: -- analogix_dp_set_lane1_link_training(dp, training_lane_set); -- break; -- -- case 2: -- analogix_dp_set_lane2_link_training(dp, training_lane_set); -- break; -- -- case 3: -- analogix_dp_set_lane3_link_training(dp, training_lane_set); -- break; -- } --} -- --static unsigned int --analogix_dp_get_lane_link_training(struct analogix_dp_device *dp, -- int lane) --{ -- u32 reg; -- -- switch (lane) { -- case 0: -- reg = analogix_dp_get_lane0_link_training(dp); -- break; -- case 1: -- reg = analogix_dp_get_lane1_link_training(dp); -- break; -- case 2: -- reg = analogix_dp_get_lane2_link_training(dp); -- break; -- case 3: -- reg = analogix_dp_get_lane3_link_training(dp); -- break; -- default: -- WARN_ON(1); -- return 0; -- } -- -- return reg; --} -- - static void analogix_dp_reduce_link_rate(struct analogix_dp_device *dp) - { - analogix_dp_training_pattern_dis(dp); -@@ -464,13 +482,27 @@ static void analogix_dp_get_adjust_training_lane(struct analogix_dp_device *dp, - } - } - -+static bool analogix_dp_tps3_supported(struct analogix_dp_device *dp) ++ kbase_vinstr_detach_client(*cli); ++ *cli = NULL; ++ } ++ ++ return 0; ++} ++ ++struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup( ++ struct kbase_vinstr_context *vinstr_ctx, ++ struct kbase_uk_hwcnt_reader_setup *setup, ++ void *kernel_buffer) +{ -+ bool source_tps3_supported, sink_tps3_supported; -+ u8 dpcd = 0; ++ u32 bitmap[4]; + -+ source_tps3_supported = -+ dp->video_info.max_link_rate == DP_LINK_BW_5_4; -+ drm_dp_dpcd_readb(&dp->aux, DP_MAX_LANE_COUNT, &dpcd); -+ sink_tps3_supported = dpcd & DP_TPS3_SUPPORTED; ++ if (!vinstr_ctx || !setup || !kernel_buffer) ++ return NULL; + -+ return source_tps3_supported && sink_tps3_supported; ++ bitmap[SHADER_HWCNT_BM] = setup->shader_bm; ++ bitmap[TILER_HWCNT_BM] = setup->tiler_bm; ++ bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm; ++ bitmap[JM_HWCNT_BM] = setup->jm_bm; ++ ++ return kbasep_vinstr_attach_client( ++ vinstr_ctx, ++ 0, ++ bitmap, ++ NULL, ++ kernel_buffer); +} ++KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup); + - static int analogix_dp_process_clock_recovery(struct analogix_dp_device *dp) - { - int lane, lane_count, retval; - u8 voltage_swing, pre_emphasis, training_lane; - u8 link_status[2], adjust_request[2]; -+ u8 training_pattern = TRAINING_PTN2; - -- usleep_range(100, 101); -+ drm_dp_link_train_clock_recovery_delay(&dp->aux, dp->dpcd); - - lane_count = dp->link_train.lane_count; - -@@ -478,24 +510,30 @@ static int analogix_dp_process_clock_recovery(struct analogix_dp_device *dp) - if (retval < 0) - return retval; - -- retval = drm_dp_dpcd_read(&dp->aux, DP_ADJUST_REQUEST_LANE0_1, -- adjust_request, 2); -- if (retval < 0) -- return retval; -- - if (analogix_dp_clock_recovery_ok(link_status, lane_count) == 0) { -- /* set training pattern 2 for EQ */ -- analogix_dp_set_training_pattern(dp, TRAINING_PTN2); -+ if (analogix_dp_tps3_supported(dp)) -+ training_pattern = TRAINING_PTN3; ++int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli, ++ enum base_hwcnt_reader_event event_id) ++{ ++ int rcode = 0; ++ struct kbase_vinstr_context *vinstr_ctx; ++ u64 timestamp; ++ u32 event_mask; + -+ /* set training pattern for EQ */ -+ analogix_dp_set_training_pattern(dp, training_pattern); - - retval = drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET, - DP_LINK_SCRAMBLING_DISABLE | -- DP_TRAINING_PATTERN_2); -+ (training_pattern == TRAINING_PTN3 ? -+ DP_TRAINING_PATTERN_3 : DP_TRAINING_PATTERN_2)); - if (retval < 0) - return retval; - - dev_dbg(dp->dev, "Link Training Clock Recovery success\n"); - dp->link_train.lt_state = EQUALIZER_TRAINING; ++ if (!cli) ++ return -EINVAL; + -+ return 0; - } else { -+ retval = drm_dp_dpcd_read(&dp->aux, DP_ADJUST_REQUEST_LANE0_1, -+ adjust_request, 2); -+ if (retval < 0) -+ return retval; ++ vinstr_ctx = cli->vinstr_ctx; ++ KBASE_DEBUG_ASSERT(vinstr_ctx); + - for (lane = 0; lane < lane_count; lane++) { - training_lane = analogix_dp_get_lane_link_training( - dp, lane); -@@ -523,10 +561,7 @@ static int analogix_dp_process_clock_recovery(struct analogix_dp_device *dp) - } - - analogix_dp_get_adjust_training_lane(dp, adjust_request); -- -- for (lane = 0; lane < lane_count; lane++) -- analogix_dp_set_lane_link_training(dp, -- dp->link_train.training_lane[lane], lane); -+ analogix_dp_set_lane_link_training(dp); - - retval = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_LANE0_SET, - dp->link_train.training_lane, lane_count); -@@ -538,11 +573,11 @@ static int analogix_dp_process_clock_recovery(struct analogix_dp_device *dp) - - static int analogix_dp_process_equalizer_training(struct analogix_dp_device *dp) - { -- int lane, lane_count, retval; -+ int lane_count, retval; - u32 reg; - u8 link_align, link_status[2], adjust_request[2]; - -- usleep_range(400, 401); -+ drm_dp_link_train_channel_eq_delay(&dp->aux, dp->dpcd); - - lane_count = dp->link_train.lane_count; - -@@ -555,18 +590,11 @@ static int analogix_dp_process_equalizer_training(struct analogix_dp_device *dp) - return -EIO; - } - -- retval = drm_dp_dpcd_read(&dp->aux, DP_ADJUST_REQUEST_LANE0_1, -- adjust_request, 2); -- if (retval < 0) -- return retval; -- - retval = drm_dp_dpcd_readb(&dp->aux, DP_LANE_ALIGN_STATUS_UPDATED, - &link_align); - if (retval < 0) - return retval; - -- analogix_dp_get_adjust_training_lane(dp, adjust_request); -- - if (!analogix_dp_channel_eq_ok(link_status, link_align, lane_count)) { - /* traing pattern Set to Normal */ - retval = analogix_dp_training_pattern_dis(dp); -@@ -598,9 +626,13 @@ static int analogix_dp_process_equalizer_training(struct analogix_dp_device *dp) - return -EIO; - } - -- for (lane = 0; lane < lane_count; lane++) -- analogix_dp_set_lane_link_training(dp, -- dp->link_train.training_lane[lane], lane); -+ retval = drm_dp_dpcd_read(&dp->aux, DP_ADJUST_REQUEST_LANE0_1, -+ adjust_request, 2); -+ if (retval < 0) -+ return retval; ++ KBASE_DEBUG_ASSERT(event_id < BASE_HWCNT_READER_EVENT_COUNT); ++ event_mask = 1 << event_id; + -+ analogix_dp_get_adjust_training_lane(dp, adjust_request); -+ analogix_dp_set_lane_link_training(dp); - - retval = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_LANE0_SET, - dp->link_train.training_lane, lane_count); -@@ -610,10 +642,11 @@ static int analogix_dp_process_equalizer_training(struct analogix_dp_device *dp) - return 0; - } - --static void analogix_dp_get_max_rx_bandwidth(struct analogix_dp_device *dp, -- u8 *bandwidth) -+static int analogix_dp_get_max_rx_bandwidth(struct analogix_dp_device *dp, -+ u8 *bandwidth) - { - u8 data; -+ int ret; - - /* - * For DP rev.1.1, Maximum link rate of Main Link lanes -@@ -621,28 +654,41 @@ static void analogix_dp_get_max_rx_bandwidth(struct analogix_dp_device *dp, - * For DP rev.1.2, Maximum link rate of Main Link lanes - * 0x06 = 1.62 Gbps, 0x0a = 2.7 Gbps, 0x14 = 5.4Gbps - */ -- drm_dp_dpcd_readb(&dp->aux, DP_MAX_LINK_RATE, &data); -+ ret = drm_dp_dpcd_readb(&dp->aux, DP_MAX_LINK_RATE, &data); -+ if (ret < 0) -+ return ret; ++ mutex_lock(&vinstr_ctx->lock); + - *bandwidth = data; ++ if (event_mask & cli->event_mask) { ++ rcode = kbasep_vinstr_collect_and_accumulate( ++ vinstr_ctx, ++ ×tamp); ++ if (rcode) ++ goto exit; + -+ return 0; - } - --static void analogix_dp_get_max_rx_lane_count(struct analogix_dp_device *dp, -- u8 *lane_count) -+static int analogix_dp_get_max_rx_lane_count(struct analogix_dp_device *dp, -+ u8 *lane_count) - { - u8 data; -+ int ret; - - /* - * For DP rev.1.1, Maximum number of Main Link lanes - * 0x01 = 1 lane, 0x02 = 2 lanes, 0x04 = 4 lanes - */ -- drm_dp_dpcd_readb(&dp->aux, DP_MAX_LANE_COUNT, &data); -+ ret = drm_dp_dpcd_readb(&dp->aux, DP_MAX_LANE_COUNT, &data); -+ if (ret < 0) -+ return ret; ++ rcode = kbasep_vinstr_update_client(cli, timestamp, event_id); ++ if (rcode) ++ goto exit; + - *lane_count = DPCD_MAX_LANE_COUNT(data); ++ kbasep_vinstr_reprogram(vinstr_ctx); ++ } + -+ return 0; - } - - static int analogix_dp_full_link_train(struct analogix_dp_device *dp, - u32 max_lanes, u32 max_rate) - { -+ struct video_info *video = &dp->video_info; - int retval = 0; - bool training_finished = false; -+ u8 dpcd; - - /* - * MACRO_RST must be applied after the PLL_LOCK to avoid -@@ -654,25 +700,19 @@ static int analogix_dp_full_link_train(struct analogix_dp_device *dp, - analogix_dp_get_max_rx_bandwidth(dp, &dp->link_train.link_rate); - analogix_dp_get_max_rx_lane_count(dp, &dp->link_train.lane_count); - -- if ((dp->link_train.link_rate != DP_LINK_BW_1_62) && -- (dp->link_train.link_rate != DP_LINK_BW_2_7) && -- (dp->link_train.link_rate != DP_LINK_BW_5_4)) { -- dev_err(dp->dev, "Rx Max Link Rate is abnormal :%x !\n", -- dp->link_train.link_rate); -- dp->link_train.link_rate = DP_LINK_BW_1_62; -- } -+ /* Setup TX lane count & rate */ -+ dp->link_train.lane_count = min_t(u32, dp->link_train.lane_count, max_lanes); -+ dp->link_train.link_rate = min_t(u32, dp->link_train.link_rate, max_rate); - -- if (dp->link_train.lane_count == 0) { -- dev_err(dp->dev, "Rx Max Lane count is abnormal :%x !\n", -- dp->link_train.lane_count); -- dp->link_train.lane_count = (u8)LANE_COUNT1; -+ if (!analogix_dp_bandwidth_ok(dp, &video->mode, -+ drm_dp_bw_code_to_link_rate(dp->link_train.link_rate), -+ dp->link_train.lane_count)) { -+ dev_err(dp->dev, "bandwidth overflow\n"); ++exit: ++ mutex_unlock(&vinstr_ctx->lock); ++ ++ return rcode; ++} ++KBASE_EXPORT_TEST_API(kbase_vinstr_hwc_dump); ++ ++int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli) ++{ ++ struct kbase_vinstr_context *vinstr_ctx; ++ int rcode; ++ u64 unused; ++ ++ if (!cli) + return -EINVAL; - } - -- /* Setup TX lane count & rate */ -- if (dp->link_train.lane_count > max_lanes) -- dp->link_train.lane_count = max_lanes; -- if (dp->link_train.link_rate > max_rate) -- dp->link_train.link_rate = max_rate; -+ drm_dp_dpcd_readb(&dp->aux, DP_MAX_DOWNSPREAD, &dpcd); -+ dp->link_train.ssc = !!(dpcd & DP_MAX_DOWNSPREAD_0_5); - - /* All DP analog module power up */ - analogix_dp_set_analog_power_down(dp, POWER_ALL, 0); -@@ -712,27 +752,15 @@ static int analogix_dp_full_link_train(struct analogix_dp_device *dp, - - static int analogix_dp_fast_link_train(struct analogix_dp_device *dp) - { -- int i, ret; -+ int ret; - u8 link_align, link_status[2]; -- enum pll_status status; - - analogix_dp_reset_macro(dp); - - analogix_dp_set_link_bandwidth(dp, dp->link_train.link_rate); - analogix_dp_set_lane_count(dp, dp->link_train.lane_count); -- -- for (i = 0; i < dp->link_train.lane_count; i++) { -- analogix_dp_set_lane_link_training(dp, -- dp->link_train.training_lane[i], i); -- } -- -- ret = readx_poll_timeout(analogix_dp_get_pll_lock_status, dp, status, -- status != PLL_UNLOCKED, 120, -- 120 * DP_TIMEOUT_LOOP_COUNT); -- if (ret) { -- DRM_DEV_ERROR(dp->dev, "Wait for pll lock failed %d\n", ret); -- return ret; -- } -+ analogix_dp_set_lane_link_training(dp); -+ analogix_dp_enable_enhanced_mode(dp, dp->link_train.enhanced_framing); - - /* source Set training pattern 1 */ - analogix_dp_set_training_pattern(dp, TRAINING_PTN1); -@@ -743,7 +771,6 @@ static int analogix_dp_fast_link_train(struct analogix_dp_device *dp) - /* From DP spec, pattern must be on-screen for a minimum 500us */ - usleep_range(500, 600); - -- /* TODO: enhanced_mode?*/ - analogix_dp_set_training_pattern(dp, DP_NONE); - - /* -@@ -823,7 +850,7 @@ static int analogix_dp_config_video(struct analogix_dp_device *dp) - analogix_dp_set_video_cr_mn(dp, CALCULATED_M, 0, 0); - - /* For video bist, Video timing must be generated by register */ -- analogix_dp_set_video_timing_mode(dp, VIDEO_TIMING_FROM_CAPTURE); -+ analogix_dp_set_video_timing_mode(dp, VIDEO_TIMING_FROM_REGISTER); - - /* Disable video mute */ - analogix_dp_enable_video_mute(dp, 0); -@@ -885,123 +912,309 @@ static int analogix_dp_enable_scramble(struct analogix_dp_device *dp, - return ret < 0 ? ret : 0; - } - --static irqreturn_t analogix_dp_hardirq(int irq, void *arg) -+static u8 analogix_dp_autotest_phy_pattern(struct analogix_dp_device *dp) - { -- struct analogix_dp_device *dp = arg; -- irqreturn_t ret = IRQ_NONE; -- enum dp_irq_type irq_type; -+ struct drm_dp_phy_test_params *data = &dp->compliance.phytest; - -- irq_type = analogix_dp_get_irq_type(dp); -- if (irq_type != DP_IRQ_TYPE_UNKNOWN) { -- analogix_dp_mute_hpd_interrupt(dp); -- ret = IRQ_WAKE_THREAD; -+ if (drm_dp_get_phy_test_pattern(&dp->aux, data)) { -+ dev_err(dp->dev, "DP Phy Test pattern AUX read failure\n"); -+ return DP_TEST_NAK; - } - -- return ret; -+ if (data->link_rate > drm_dp_bw_code_to_link_rate(dp->video_info.max_link_rate)) { -+ dev_err(dp->dev, "invalid link rate = 0x%x\n", data->link_rate); -+ return DP_TEST_NAK; -+ } + -+ /* Set test active flag here so userspace doesn't interrupt things */ -+ dp->compliance.test_active = true; ++ vinstr_ctx = cli->vinstr_ctx; ++ KBASE_DEBUG_ASSERT(vinstr_ctx); + -+ return DP_TEST_ACK; - } - --static irqreturn_t analogix_dp_irq_thread(int irq, void *arg) -+static void analogix_dp_handle_test_request(struct analogix_dp_device *dp) - { -- struct analogix_dp_device *dp = arg; -- enum dp_irq_type irq_type; -+ u8 response = DP_TEST_NAK; -+ u8 request = 0; -+ int ret; - -- irq_type = analogix_dp_get_irq_type(dp); -- if (irq_type & DP_IRQ_TYPE_HP_CABLE_IN || -- irq_type & DP_IRQ_TYPE_HP_CABLE_OUT) { -- dev_dbg(dp->dev, "Detected cable status changed!\n"); -- if (dp->drm_dev) -- drm_helper_hpd_irq_event(dp->drm_dev); -+ ret = drm_dp_dpcd_readb(&dp->aux, DP_TEST_REQUEST, &request); -+ if (ret < 0) { -+ dev_err(dp->dev, "Could not read test request from sink\n"); -+ goto update_status; - } - -- if (irq_type != DP_IRQ_TYPE_UNKNOWN) { -- analogix_dp_clear_hotplug_interrupts(dp); -- analogix_dp_unmute_hpd_interrupt(dp); -+ switch (request) { -+ case DP_TEST_LINK_PHY_TEST_PATTERN: -+ dev_info(dp->dev, "PHY_PATTERN test requested\n"); -+ response = analogix_dp_autotest_phy_pattern(dp); -+ break; -+ default: -+ dev_err(dp->dev, "Invalid test request '%02x'\n", request); -+ break; - } - -- return IRQ_HANDLED; -+ if (response & DP_TEST_ACK) -+ dp->compliance.test_type = request; ++ mutex_lock(&vinstr_ctx->lock); + -+update_status: -+ ret = drm_dp_dpcd_writeb(&dp->aux, DP_TEST_RESPONSE, response); -+ if (ret < 0) -+ dev_err(dp->dev, "Could not write test response to sink\n"); - } - --static int analogix_dp_fast_link_train_detection(struct analogix_dp_device *dp) -+void analogix_dp_check_device_service_irq(struct analogix_dp_device *dp) - { -+ u8 val; - int ret; -- u8 spread; - -- ret = drm_dp_dpcd_readb(&dp->aux, DP_MAX_DOWNSPREAD, &spread); -- if (ret != 1) { -- dev_err(dp->dev, "failed to read downspread %d\n", ret); -- return ret; -- } -- dp->fast_train_enable = !!(spread & DP_NO_AUX_HANDSHAKE_LINK_TRAINING); -- dev_dbg(dp->dev, "fast link training %s\n", -- dp->fast_train_enable ? "supported" : "unsupported"); -- return 0; -+ ret = drm_dp_dpcd_readb(&dp->aux, DP_DEVICE_SERVICE_IRQ_VECTOR, &val); -+ if (ret < 0 || !val) -+ return; ++ rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused); ++ if (rcode) ++ goto exit; ++ rcode = kbase_instr_hwcnt_clear(vinstr_ctx->kctx); ++ if (rcode) ++ goto exit; ++ memset(cli->accum_buffer, 0, cli->dump_size); + -+ ret = drm_dp_dpcd_writeb(&dp->aux, DP_DEVICE_SERVICE_IRQ_VECTOR, val); -+ if (ret < 0) -+ return; ++ kbasep_vinstr_reprogram(vinstr_ctx); + -+ if (val & DP_AUTOMATED_TEST_REQUEST) -+ analogix_dp_handle_test_request(dp); - } -+EXPORT_SYMBOL_GPL(analogix_dp_check_device_service_irq); - --static int analogix_dp_commit(struct analogix_dp_device *dp) -+static void analogix_dp_process_phy_request(struct analogix_dp_device *dp) - { -+ struct drm_dp_phy_test_params *data = &dp->compliance.phytest; -+ u8 spread, adjust_request[2]; - int ret; - -- /* Keep the panel disabled while we configure video */ -- if (dp->plat_data->panel) { -- if (drm_panel_disable(dp->plat_data->panel)) -- DRM_ERROR("failed to disable the panel\n"); -- } -- -- ret = analogix_dp_train_link(dp); -- if (ret) { -- dev_err(dp->dev, "unable to do link train, ret=%d\n", ret); -- return ret; -- } -+ dp->link_train.link_rate = drm_dp_link_rate_to_bw_code(data->link_rate); -+ dp->link_train.lane_count = data->num_lanes; - -- ret = analogix_dp_enable_scramble(dp, 1); -+ ret = drm_dp_dpcd_readb(&dp->aux, DP_MAX_DOWNSPREAD, &spread); - if (ret < 0) { -- dev_err(dp->dev, "can not enable scramble\n"); -- return ret; -+ dev_err(dp->dev, "Could not read ssc from sink\n"); -+ return; - } - -- analogix_dp_init_video(dp); -- ret = analogix_dp_config_video(dp); -- if (ret) { -- dev_err(dp->dev, "unable to config video\n"); -- return ret; -- } -+ dp->link_train.ssc = !!(spread & DP_MAX_DOWNSPREAD_0_5); - -- /* Safe to enable the panel now */ -- if (dp->plat_data->panel) { -- ret = drm_panel_enable(dp->plat_data->panel); -- if (ret) { -- DRM_ERROR("failed to enable the panel\n"); -- return ret; -- } -+ ret = drm_dp_dpcd_read(&dp->aux, DP_ADJUST_REQUEST_LANE0_1, -+ adjust_request, 2); -+ if (ret < 0) { -+ dev_err(dp->dev, "Could not read swing/pre-emphasis\n"); -+ return; - } - -- /* Check whether panel supports fast training */ -- ret = analogix_dp_fast_link_train_detection(dp); -- if (ret) -- return ret; -+ analogix_dp_set_link_bandwidth(dp, dp->link_train.link_rate); -+ analogix_dp_set_lane_count(dp, dp->link_train.lane_count); -+ analogix_dp_get_adjust_training_lane(dp, adjust_request); -+ analogix_dp_set_lane_link_training(dp); - -- if (analogix_dp_detect_sink_psr(dp)) { -- ret = analogix_dp_enable_sink_psr(dp); -- if (ret) -- return ret; -+ switch (data->phy_pattern) { -+ case DP_PHY_TEST_PATTERN_NONE: -+ dev_info(dp->dev, "Disable Phy Test Pattern\n"); -+ analogix_dp_set_training_pattern(dp, DP_NONE); -+ break; -+ case DP_PHY_TEST_PATTERN_D10_2: -+ dev_info(dp->dev, "Set D10.2 Phy Test Pattern\n"); -+ analogix_dp_set_training_pattern(dp, D10_2); ++exit: ++ mutex_unlock(&vinstr_ctx->lock); ++ ++ return rcode; ++} ++ ++int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx) ++{ ++ unsigned long flags; ++ int ret = -EAGAIN; ++ ++ KBASE_DEBUG_ASSERT(vinstr_ctx); ++ ++ spin_lock_irqsave(&vinstr_ctx->state_lock, flags); ++ switch (vinstr_ctx->state) { ++ case VINSTR_SUSPENDED: ++ vinstr_ctx->suspend_cnt++; ++ /* overflow shall not happen */ ++ BUG_ON(0 == vinstr_ctx->suspend_cnt); ++ ret = 0; + break; -+ case DP_PHY_TEST_PATTERN_PRBS7: -+ dev_info(dp->dev, "Set PRBS7 Phy Test Pattern\n"); -+ analogix_dp_set_training_pattern(dp, PRBS7); ++ ++ case VINSTR_IDLE: ++ vinstr_ctx->state = VINSTR_SUSPENDING; ++ schedule_work(&vinstr_ctx->suspend_work); + break; -+ case DP_PHY_TEST_PATTERN_80BIT_CUSTOM: -+ dev_info(dp->dev, "Set 80Bit Custom Phy Test Pattern\n"); -+ analogix_dp_set_training_pattern(dp, TEST_PATTERN_80BIT); ++ ++ case VINSTR_DUMPING: ++ vinstr_ctx->state = VINSTR_SUSPENDING; + break; -+ case DP_PHY_TEST_PATTERN_CP2520: -+ dev_info(dp->dev, "Set HBR2 compliance Phy Test Pattern\n"); -+ analogix_dp_set_training_pattern(dp, TEST_PATTERN_HBR2); ++ ++ case VINSTR_SUSPENDING: ++ /* fall through */ ++ case VINSTR_RESUMING: + break; -+ default: -+ dev_err(dp->dev, "Invalid Phy Test Pattern: %d\n", data->phy_pattern); -+ return; - } - -- return ret; -+ drm_dp_set_phy_test_pattern(&dp->aux, data, 0x11); - } - --static int analogix_dp_enable_psr(struct analogix_dp_device *dp) -+void analogix_dp_phy_test(struct analogix_dp_device *dp) - { -- struct dp_sdp psr_vsc; -+ struct drm_device *dev = dp->drm_dev; -+ struct drm_modeset_acquire_ctx ctx; - int ret; -- u8 sink; - -- ret = drm_dp_dpcd_readb(&dp->aux, DP_PSR_STATUS, &sink); -- if (ret != 1) -- DRM_DEV_ERROR(dp->dev, "Failed to read psr status %d\n", ret); -- else if (sink == DP_PSR_SINK_ACTIVE_RFB) -- return 0; -+ DRM_DEV_INFO(dp->dev, "PHY test\n"); - -- /* Prepare VSC packet as per EDP 1.4 spec, Table 6.9 */ -+ drm_modeset_acquire_init(&ctx, 0); -+ for (;;) { -+ ret = drm_modeset_lock(&dev->mode_config.connection_mutex, &ctx); -+ if (ret != -EDEADLK) -+ break; + -+ drm_modeset_backoff(&ctx); ++ default: ++ BUG(); ++ break; + } ++ spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + -+ analogix_dp_process_phy_request(dp); -+ drm_modeset_drop_locks(&ctx); -+ drm_modeset_acquire_fini(&ctx); ++ return ret; +} -+EXPORT_SYMBOL_GPL(analogix_dp_phy_test); + -+static irqreturn_t analogix_dp_hpd_irq_handler(int irq, void *arg) ++void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx) +{ -+ struct analogix_dp_device *dp = arg; -+ -+ if (dp->drm_dev) -+ drm_helper_hpd_irq_event(dp->drm_dev); -+ -+ return IRQ_HANDLED; ++ wait_event(vinstr_ctx->suspend_waitq, ++ (0 == kbase_vinstr_try_suspend(vinstr_ctx))); +} + -+static irqreturn_t analogix_dp_irq_thread(int irq, void *arg) ++void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx) +{ -+ struct analogix_dp_device *dp = arg; -+ -+ analogix_dp_irq_handler(dp); -+ -+ return IRQ_HANDLED; -+} ++ unsigned long flags; + -+static int analogix_dp_fast_link_train_detection(struct analogix_dp_device *dp) -+{ -+ int ret; -+ u8 spread; ++ KBASE_DEBUG_ASSERT(vinstr_ctx); + -+ ret = drm_dp_dpcd_readb(&dp->aux, DP_MAX_DOWNSPREAD, &spread); -+ if (ret != 1) { -+ dev_err(dp->dev, "failed to read downspread %d\n", ret); -+ return ret; ++ spin_lock_irqsave(&vinstr_ctx->state_lock, flags); ++ BUG_ON(VINSTR_SUSPENDING == vinstr_ctx->state); ++ if (VINSTR_SUSPENDED == vinstr_ctx->state) { ++ BUG_ON(0 == vinstr_ctx->suspend_cnt); ++ vinstr_ctx->suspend_cnt--; ++ if (0 == vinstr_ctx->suspend_cnt) { ++ vinstr_ctx->state = VINSTR_RESUMING; ++ schedule_work(&vinstr_ctx->resume_work); ++ } + } -+ dp->fast_train_enable = !!(spread & DP_NO_AUX_HANDSHAKE_LINK_TRAINING); -+ dev_dbg(dp->dev, "fast link training %s\n", -+ dp->fast_train_enable ? "supported" : "unsupported"); -+ return 0; ++ spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); +} +diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h +new file mode 100644 +index 000000000..6207d25ae +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h +@@ -0,0 +1,155 @@ ++/* ++ * ++ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+static int analogix_dp_link_power_up(struct analogix_dp_device *dp) -+{ -+ u8 value; -+ int ret; + -+ if (dp->dpcd[DP_DPCD_REV] < 0x11) -+ return 0; + -+ ret = drm_dp_dpcd_readb(&dp->aux, DP_SET_POWER, &value); -+ if (ret < 0) -+ return ret; ++#ifndef _KBASE_VINSTR_H_ ++#define _KBASE_VINSTR_H_ + -+ value &= ~DP_SET_POWER_MASK; -+ value |= DP_SET_POWER_D0; ++#include ++#include + -+ ret = drm_dp_dpcd_writeb(&dp->aux, DP_SET_POWER, value); -+ if (ret < 0) -+ return ret; ++/*****************************************************************************/ + -+ usleep_range(1000, 2000); ++struct kbase_vinstr_context; ++struct kbase_vinstr_client; + -+ return 0; -+} ++/*****************************************************************************/ + -+static int analogix_dp_link_power_down(struct analogix_dp_device *dp) -+{ -+ u8 value; -+ int ret; ++/** ++ * kbase_vinstr_init() - initialize the vinstr core ++ * @kbdev: kbase device ++ * ++ * Return: pointer to the vinstr context on success or NULL on failure ++ */ ++struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev); + -+ if (dp->dpcd[DP_DPCD_REV] < 0x11) -+ return 0; ++/** ++ * kbase_vinstr_term() - terminate the vinstr core ++ * @vinstr_ctx: vinstr context ++ */ ++void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx); + -+ ret = drm_dp_dpcd_readb(&dp->aux, DP_SET_POWER, &value); -+ if (ret < 0) -+ return ret; ++/** ++ * kbase_vinstr_hwcnt_reader_setup - configure hw counters reader ++ * @vinstr_ctx: vinstr context ++ * @setup: reader's configuration ++ * ++ * Return: zero on success ++ */ ++int kbase_vinstr_hwcnt_reader_setup( ++ struct kbase_vinstr_context *vinstr_ctx, ++ struct kbase_uk_hwcnt_reader_setup *setup); + -+ value &= ~DP_SET_POWER_MASK; -+ value |= DP_SET_POWER_D3; ++/** ++ * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping ++ * @vinstr_ctx: vinstr context ++ * @cli: pointer where to store pointer to new vinstr client structure ++ * @setup: hwc configuration ++ * ++ * Return: zero on success ++ */ ++int kbase_vinstr_legacy_hwc_setup( ++ struct kbase_vinstr_context *vinstr_ctx, ++ struct kbase_vinstr_client **cli, ++ struct kbase_uk_hwcnt_setup *setup); + -+ ret = drm_dp_dpcd_writeb(&dp->aux, DP_SET_POWER, value); -+ if (ret < 0) -+ return ret; ++/** ++ * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side ++ * client ++ * @vinstr_ctx: vinstr context ++ * @setup: reader's configuration ++ * @kernel_buffer: pointer to dump buffer ++ * ++ * setup->buffer_count and setup->fd are not used for kernel side clients. ++ * ++ * Return: pointer to client structure, or NULL on failure ++ */ ++struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup( ++ struct kbase_vinstr_context *vinstr_ctx, ++ struct kbase_uk_hwcnt_reader_setup *setup, ++ void *kernel_buffer); + -+ return 0; -+} ++/** ++ * kbase_vinstr_hwc_dump - issue counter dump for vinstr client ++ * @cli: pointer to vinstr client ++ * @event_id: id of event that triggered hwcnt dump ++ * ++ * Return: zero on success ++ */ ++int kbase_vinstr_hwc_dump( ++ struct kbase_vinstr_client *cli, ++ enum base_hwcnt_reader_event event_id); + -+static int analogix_dp_commit(struct analogix_dp_device *dp) -+{ -+ struct video_info *video = &dp->video_info; -+ int ret; ++/** ++ * kbase_vinstr_hwc_clear - performs a reset of the hardware counters for ++ * a given kbase context ++ * @cli: pointer to vinstr client ++ * ++ * Return: zero on success ++ */ ++int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli); + -+ ret = drm_dp_read_dpcd_caps(&dp->aux, dp->dpcd); -+ if (ret < 0) { -+ dev_err(dp->dev, "failed to read dpcd caps: %d\n", ret); -+ return ret; -+ } ++/** ++ * kbase_vinstr_try_suspend - try suspending operation of a given vinstr context ++ * @vinstr_ctx: vinstr context ++ * ++ * Return: 0 on success, or negative if state change is in progress ++ * ++ * Warning: This API call is non-generic. It is meant to be used only by ++ * job scheduler state machine. ++ * ++ * Function initiates vinstr switch to suspended state. Once it was called ++ * vinstr enters suspending state. If function return non-zero value, it ++ * indicates that state switch is not complete and function must be called ++ * again. On state switch vinstr will trigger job scheduler state machine ++ * cycle. ++ */ ++int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx); + -+ ret = analogix_dp_link_power_up(dp); -+ if (ret) { -+ dev_err(dp->dev, "failed to power up link: %d\n", ret); -+ return ret; -+ } ++/** ++ * kbase_vinstr_suspend - suspends operation of a given vinstr context ++ * @vinstr_ctx: vinstr context ++ * ++ * Function initiates vinstr switch to suspended state. Then it blocks until ++ * operation is completed. ++ */ ++void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx); + -+ if (device_property_read_bool(dp->dev, "panel-self-test")) -+ return drm_dp_dpcd_writeb(&dp->aux, DP_EDP_CONFIGURATION_SET, -+ DP_PANEL_SELF_TEST_ENABLE); ++/** ++ * kbase_vinstr_resume - resumes operation of a given vinstr context ++ * @vinstr_ctx: vinstr context ++ * ++ * Function can be called only if it was preceded by a successful call ++ * to kbase_vinstr_suspend. ++ */ ++void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx); + -+ ret = analogix_dp_train_link(dp); -+ if (ret) { -+ dev_err(dp->dev, "unable to do link train, ret=%d\n", ret); -+ return ret; -+ } ++/** ++ * kbase_vinstr_dump_size - Return required size of dump buffer ++ * @kbdev: device pointer ++ * ++ * Return : buffer size in bytes ++ */ ++size_t kbase_vinstr_dump_size(struct kbase_device *kbdev); + -+ ret = analogix_dp_enable_scramble(dp, 1); -+ if (ret < 0) { -+ dev_err(dp->dev, "can not enable scramble\n"); -+ return ret; -+ } ++/** ++ * kbase_vinstr_detach_client - Detach a client from the vinstr core ++ * @cli: pointer to vinstr client ++ */ ++void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli); + -+ analogix_dp_init_video(dp); -+ analogix_dp_set_video_format(dp); ++#endif /* _KBASE_VINSTR_H_ */ + -+ if (video->video_bist_enable) -+ analogix_dp_video_bist_enable(dp); +diff --git a/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h +new file mode 100644 +index 000000000..5d6b4021d +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h +@@ -0,0 +1,201 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ ret = analogix_dp_config_video(dp); -+ if (ret) { -+ dev_err(dp->dev, "unable to config video\n"); -+ return ret; -+ } + -+ /* Check whether panel supports fast training */ -+ ret = analogix_dp_fast_link_train_detection(dp); -+ if (ret) -+ return ret; + -+ if (analogix_dp_detect_sink_psr(dp)) { -+ ret = analogix_dp_enable_sink_psr(dp); -+ if (ret) -+ return ret; -+ } + -+ return ret; -+} ++#if !defined(_TRACE_MALI_KBASE_H) || defined(TRACE_HEADER_MULTI_READ) ++#define _TRACE_MALI_KBASE_H + -+static int analogix_dp_enable_psr(struct analogix_dp_device *dp) -+{ -+ struct dp_sdp psr_vsc; -+ int ret; -+ u8 sink; ++#undef TRACE_SYSTEM ++#define TRACE_SYSTEM mali + -+ ret = drm_dp_dpcd_readb(&dp->aux, DP_PSR_STATUS, &sink); -+ if (ret != 1) -+ DRM_DEV_ERROR(dp->dev, "Failed to read psr status %d\n", ret); -+ else if (sink == DP_PSR_SINK_ACTIVE_RFB) -+ return 0; ++#include + -+ /* Prepare VSC packet as per EDP 1.4 spec, Table 6.9 */ - memset(&psr_vsc, 0, sizeof(psr_vsc)); - psr_vsc.sdp_header.HB0 = 0; - psr_vsc.sdp_header.HB1 = 0x7; -@@ -1011,9 +1224,20 @@ static int analogix_dp_enable_psr(struct analogix_dp_device *dp) - psr_vsc.db[1] = EDP_VSC_PSR_STATE_ACTIVE | EDP_VSC_PSR_CRC_VALUES_VALID; - - ret = analogix_dp_send_psr_spd(dp, &psr_vsc, true); -- if (!ret) -+ if (!ret) { - analogix_dp_set_analog_power_down(dp, POWER_ALL, true); - -+ if (dp->phy) { -+ union phy_configure_opts phy_cfg = {0}; ++DECLARE_EVENT_CLASS(mali_slot_template, ++ TP_PROTO(int jobslot, unsigned int info_val), ++ TP_ARGS(jobslot, info_val), ++ TP_STRUCT__entry( ++ __field(unsigned int, jobslot) ++ __field(unsigned int, info_val) ++ ), ++ TP_fast_assign( ++ __entry->jobslot = jobslot; ++ __entry->info_val = info_val; ++ ), ++ TP_printk("jobslot=%u info=%u", __entry->jobslot, __entry->info_val) ++); + -+ phy_cfg.dp.lanes = 0; -+ phy_cfg.dp.set_lanes = true; -+ ret = phy_configure(dp->phy, &phy_cfg); -+ if (ret) -+ return ret; -+ } -+ } ++#define DEFINE_MALI_SLOT_EVENT(name) \ ++DEFINE_EVENT(mali_slot_template, mali_##name, \ ++ TP_PROTO(int jobslot, unsigned int info_val), \ ++ TP_ARGS(jobslot, info_val)) ++DEFINE_MALI_SLOT_EVENT(JM_SUBMIT); ++DEFINE_MALI_SLOT_EVENT(JM_JOB_DONE); ++DEFINE_MALI_SLOT_EVENT(JM_UPDATE_HEAD); ++DEFINE_MALI_SLOT_EVENT(JM_CHECK_HEAD); ++DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP); ++DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_0); ++DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_1); ++DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP); ++DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_0); ++DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_1); ++DEFINE_MALI_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP); ++DEFINE_MALI_SLOT_EVENT(JM_SLOT_EVICT); ++DEFINE_MALI_SLOT_EVENT(JM_BEGIN_RESET_WORKER); ++DEFINE_MALI_SLOT_EVENT(JM_END_RESET_WORKER); ++DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED); ++DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED); ++DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_CURRENT); ++DEFINE_MALI_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB); ++DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED); ++DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED); ++DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED); ++DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE); ++DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB); ++DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED); ++DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB); ++DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ); ++#undef DEFINE_MALI_SLOT_EVENT + - return ret; - } - -@@ -1059,66 +1283,34 @@ static int analogix_dp_disable_psr(struct analogix_dp_device *dp) - return analogix_dp_send_psr_spd(dp, &psr_vsc, true); - } - --/* -- * This function is a bit of a catch-all for panel preparation, hopefully -- * simplifying the logic of functions that need to prepare/unprepare the panel -- * below. -- * -- * If @prepare is true, this function will prepare the panel. Conversely, if it -- * is false, the panel will be unprepared. -- * -- * If @is_modeset_prepare is true, the function will disregard the current state -- * of the panel and either prepare/unprepare the panel based on @prepare. Once -- * it finishes, it will update dp->panel_is_modeset to reflect the current state -- * of the panel. -- */ --static int analogix_dp_prepare_panel(struct analogix_dp_device *dp, -- bool prepare, bool is_modeset_prepare) --{ -- int ret = 0; -- -- if (!dp->plat_data->panel) -- return 0; -- -- mutex_lock(&dp->panel_lock); -- -- /* -- * Exit early if this is a temporary prepare/unprepare and we're already -- * modeset (since we neither want to prepare twice or unprepare early). -- */ -- if (dp->panel_is_modeset && !is_modeset_prepare) -- goto out; -- -- if (prepare) -- ret = drm_panel_prepare(dp->plat_data->panel); -- else -- ret = drm_panel_unprepare(dp->plat_data->panel); -- -- if (ret) -- goto out; -- -- if (is_modeset_prepare) -- dp->panel_is_modeset = prepare; -- --out: -- mutex_unlock(&dp->panel_lock); -- return ret; --} -- - static int analogix_dp_get_modes(struct drm_connector *connector) - { - struct analogix_dp_device *dp = to_dp(connector); - struct edid *edid; - int ret, num_modes = 0; - -- if (dp->plat_data->panel) { -+ if (dp->plat_data->right && dp->plat_data->right->plat_data->bridge) { -+ struct drm_bridge *bridge = dp->plat_data->right->plat_data->bridge; ++DECLARE_EVENT_CLASS(mali_refcount_template, ++ TP_PROTO(int refcount, unsigned int info_val), ++ TP_ARGS(refcount, info_val), ++ TP_STRUCT__entry( ++ __field(unsigned int, refcount) ++ __field(unsigned int, info_val) ++ ), ++ TP_fast_assign( ++ __entry->refcount = refcount; ++ __entry->info_val = info_val; ++ ), ++ TP_printk("refcount=%u info=%u", __entry->refcount, __entry->info_val) ++); + -+ if (bridge->ops & DRM_BRIDGE_OP_MODES) { -+ if (!drm_bridge_get_modes(bridge, connector)) -+ return 0; -+ } -+ } ++#define DEFINE_MALI_REFCOUNT_EVENT(name) \ ++DEFINE_EVENT(mali_refcount_template, mali_##name, \ ++ TP_PROTO(int refcount, unsigned int info_val), \ ++ TP_ARGS(refcount, info_val)) ++DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX_NOLOCK); ++DEFINE_MALI_REFCOUNT_EVENT(JS_ADD_JOB); ++DEFINE_MALI_REFCOUNT_EVENT(JS_REMOVE_JOB); ++DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX); ++DEFINE_MALI_REFCOUNT_EVENT(JS_RELEASE_CTX); ++DEFINE_MALI_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX); ++DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_INIT_CTX); ++DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TERM_CTX); ++DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX); ++DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX); ++DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX); ++DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX); ++DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX); ++DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS); ++DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_ACTIVE); ++DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_IDLE); ++#undef DEFINE_MALI_REFCOUNT_EVENT + -+ if (dp->plat_data->panel) - num_modes += drm_panel_get_modes(dp->plat_data->panel, connector); -- } else { -- ret = analogix_dp_prepare_panel(dp, true, false); -- if (ret) { -- DRM_ERROR("Failed to prepare panel (%d)\n", ret); ++DECLARE_EVENT_CLASS(mali_add_template, ++ TP_PROTO(int gpu_addr, unsigned int info_val), ++ TP_ARGS(gpu_addr, info_val), ++ TP_STRUCT__entry( ++ __field(unsigned int, gpu_addr) ++ __field(unsigned int, info_val) ++ ), ++ TP_fast_assign( ++ __entry->gpu_addr = gpu_addr; ++ __entry->info_val = info_val; ++ ), ++ TP_printk("gpu_addr=%u info=%u", __entry->gpu_addr, __entry->info_val) ++); + -+ if (dp->plat_data->bridge) -+ num_modes += drm_bridge_get_modes(dp->plat_data->bridge, connector); ++#define DEFINE_MALI_ADD_EVENT(name) \ ++DEFINE_EVENT(mali_add_template, mali_##name, \ ++ TP_PROTO(int gpu_addr, unsigned int info_val), \ ++ TP_ARGS(gpu_addr, info_val)) ++DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY); ++DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM); ++DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ); ++DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR); ++DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE); ++DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET); ++DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET); ++DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE); ++DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR); ++DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES); ++DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER); ++DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER_END); ++DEFINE_MALI_ADD_EVENT(JD_CANCEL_WORKER); ++DEFINE_MALI_ADD_EVENT(JD_DONE); ++DEFINE_MALI_ADD_EVENT(JD_CANCEL); ++DEFINE_MALI_ADD_EVENT(JD_ZAP_CONTEXT); ++DEFINE_MALI_ADD_EVENT(JM_IRQ); ++DEFINE_MALI_ADD_EVENT(JM_IRQ_END); ++DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS); ++DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS_DONE); ++DEFINE_MALI_ADD_EVENT(JM_ZAP_NON_SCHEDULED); ++DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED); ++DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE); ++DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET); ++DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE); ++DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX); ++DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL); ++DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL); ++DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX); ++DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX); ++DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_END); ++DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_START); ++DEFINE_MALI_ADD_EVENT(JS_POLICY_ENQUEUE_JOB); ++DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED); ++DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP); ++DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP); ++DEFINE_MALI_ADD_EVENT(PM_PWRON); ++DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER); ++DEFINE_MALI_ADD_EVENT(PM_PWRON_L2); ++DEFINE_MALI_ADD_EVENT(PM_PWROFF); ++DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER); ++DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2); ++DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED); ++DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER); ++DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2); ++DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED); ++DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER); ++DEFINE_MALI_ADD_EVENT(PM_UNREQUEST_CHANGE_SHADER_NEEDED); ++DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED); ++DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_NEEDED); ++DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_INUSE); ++DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_INUSE); ++DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE); ++DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER); ++DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE); ++DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER); ++DEFINE_MALI_ADD_EVENT(PM_GPU_ON); ++DEFINE_MALI_ADD_EVENT(PM_GPU_OFF); ++DEFINE_MALI_ADD_EVENT(PM_SET_POLICY); ++DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT); ++DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM); ++DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY); ++DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS); ++#undef DEFINE_MALI_ADD_EVENT + -+ if (!num_modes) { -+ ret = analogix_dp_phy_power_on(dp); -+ if (ret) - return 0; -- } ++#endif /* _TRACE_MALI_KBASE_H */ + -+ if (dp->plat_data->panel) -+ analogix_dp_panel_prepare(dp); - - edid = drm_get_edid(connector, &dp->aux.ddc); - if (edid) { -@@ -1128,14 +1320,19 @@ static int analogix_dp_get_modes(struct drm_connector *connector) - kfree(edid); - } - -- ret = analogix_dp_prepare_panel(dp, false, false); -- if (ret) -- DRM_ERROR("Failed to unprepare panel (%d)\n", ret); -+ analogix_dp_phy_power_off(dp); - } - - if (dp->plat_data->get_modes) - num_modes += dp->plat_data->get_modes(dp->plat_data, connector); - -+ if (num_modes > 0 && dp->plat_data->split_mode) { -+ struct drm_display_mode *mode; ++#undef TRACE_INCLUDE_PATH ++#undef linux ++#define TRACE_INCLUDE_PATH . ++#undef TRACE_INCLUDE_FILE ++#define TRACE_INCLUDE_FILE mali_linux_kbase_trace + -+ list_for_each_entry(mode, &connector->probed_modes, head) -+ dp->plat_data->convert_to_split_mode(mode); -+ } ++/* This part must be outside protection */ ++#include +diff --git a/drivers/gpu/arm/midgard/mali_linux_trace.h b/drivers/gpu/arm/midgard/mali_linux_trace.h +new file mode 100644 +index 000000000..2be06a552 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_linux_trace.h +@@ -0,0 +1,189 @@ ++/* ++ * ++ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + - return num_modes; - } - -@@ -1181,38 +1378,110 @@ static const struct drm_connector_helper_funcs analogix_dp_connector_helper_func - }; - - static enum drm_connector_status --analogix_dp_detect(struct drm_connector *connector, bool force) -+analogix_dp_detect(struct analogix_dp_device *dp) - { -- struct analogix_dp_device *dp = to_dp(connector); - enum drm_connector_status status = connector_status_disconnected; - int ret; - -- if (dp->plat_data->panel) -- return connector_status_connected; -- -- ret = analogix_dp_prepare_panel(dp, true, false); -+ ret = analogix_dp_phy_power_on(dp); - if (ret) { -- DRM_ERROR("Failed to prepare panel (%d)\n", ret); -+ extcon_set_state_sync(dp->extcon, EXTCON_DISP_DP, false); - return connector_status_disconnected; - } - -- if (!analogix_dp_detect_hpd(dp)) -+ if (dp->plat_data->panel) -+ analogix_dp_panel_prepare(dp); + -+ if (!analogix_dp_detect_hpd(dp)) { -+ ret = analogix_dp_get_max_rx_bandwidth(dp, &dp->link_train.link_rate); -+ if (ret) { -+ dev_err(dp->dev, "failed to read max link rate\n"); -+ goto out; -+ } + -+ ret = analogix_dp_get_max_rx_lane_count(dp, &dp->link_train.lane_count); -+ if (ret) { -+ dev_err(dp->dev, "failed to read max lane count\n"); -+ goto out; -+ } ++#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ) ++#define _TRACE_MALI_H + - status = connector_status_connected; -+ } - -- ret = analogix_dp_prepare_panel(dp, false, false); -- if (ret) -- DRM_ERROR("Failed to unprepare panel (%d)\n", ret); -+ if (dp->plat_data->bridge) { -+ struct drm_bridge *next_bridge = dp->plat_data->bridge; ++#undef TRACE_SYSTEM ++#define TRACE_SYSTEM mali ++#define TRACE_INCLUDE_FILE mali_linux_trace + -+ if (next_bridge->ops & DRM_BRIDGE_OP_DETECT) -+ status = drm_bridge_detect(next_bridge); -+ } ++#include + -+out: -+ analogix_dp_phy_power_off(dp); ++#define MALI_JOB_SLOTS_EVENT_CHANGED + -+ if (status == connector_status_connected) -+ extcon_set_state_sync(dp->extcon, EXTCON_DISP_DP, true); -+ else -+ extcon_set_state_sync(dp->extcon, EXTCON_DISP_DP, false); - - return status; - } - -+static enum drm_connector_status -+analogix_dp_connector_detect(struct drm_connector *connector, bool force) -+{ -+ struct analogix_dp_device *dp = to_dp(connector); ++/** ++ * mali_job_slots_event - called from mali_kbase_core_linux.c ++ * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro. ++ */ ++TRACE_EVENT(mali_job_slots_event, ++ TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid, ++ unsigned char job_id), ++ TP_ARGS(event_id, tgid, pid, job_id), ++ TP_STRUCT__entry( ++ __field(unsigned int, event_id) ++ __field(unsigned int, tgid) ++ __field(unsigned int, pid) ++ __field(unsigned char, job_id) ++ ), ++ TP_fast_assign( ++ __entry->event_id = event_id; ++ __entry->tgid = tgid; ++ __entry->pid = pid; ++ __entry->job_id = job_id; ++ ), ++ TP_printk("event=%u tgid=%u pid=%u job_id=%u", ++ __entry->event_id, __entry->tgid, __entry->pid, __entry->job_id) ++); + -+ if (dp->plat_data->right && analogix_dp_detect(dp->plat_data->right) != connector_status_connected) -+ return connector_status_disconnected; ++/** ++ * mali_pm_status - Called by mali_kbase_pm_driver.c ++ * @event_id: core type (shader, tiler, l2 cache) ++ * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF) ++ */ ++TRACE_EVENT(mali_pm_status, ++ TP_PROTO(unsigned int event_id, unsigned long long value), ++ TP_ARGS(event_id, value), ++ TP_STRUCT__entry( ++ __field(unsigned int, event_id) ++ __field(unsigned long long, value) ++ ), ++ TP_fast_assign( ++ __entry->event_id = event_id; ++ __entry->value = value; ++ ), ++ TP_printk("event %u = %llu", __entry->event_id, __entry->value) ++); + -+ return analogix_dp_detect(dp); -+} ++/** ++ * mali_pm_power_on - Called by mali_kbase_pm_driver.c ++ * @event_id: core type (shader, tiler, l2 cache) ++ * @value: 64bits bitmask reporting the cores to power up ++ */ ++TRACE_EVENT(mali_pm_power_on, ++ TP_PROTO(unsigned int event_id, unsigned long long value), ++ TP_ARGS(event_id, value), ++ TP_STRUCT__entry( ++ __field(unsigned int, event_id) ++ __field(unsigned long long, value) ++ ), ++ TP_fast_assign( ++ __entry->event_id = event_id; ++ __entry->value = value; ++ ), ++ TP_printk("event %u = %llu", __entry->event_id, __entry->value) ++); + -+static void analogix_dp_connector_force(struct drm_connector *connector) -+{ -+ struct analogix_dp_device *dp = to_dp(connector); ++/** ++ * mali_pm_power_off - Called by mali_kbase_pm_driver.c ++ * @event_id: core type (shader, tiler, l2 cache) ++ * @value: 64bits bitmask reporting the cores to power down ++ */ ++TRACE_EVENT(mali_pm_power_off, ++ TP_PROTO(unsigned int event_id, unsigned long long value), ++ TP_ARGS(event_id, value), ++ TP_STRUCT__entry( ++ __field(unsigned int, event_id) ++ __field(unsigned long long, value) ++ ), ++ TP_fast_assign( ++ __entry->event_id = event_id; ++ __entry->value = value; ++ ), ++ TP_printk("event %u = %llu", __entry->event_id, __entry->value) ++); + -+ if (connector->status == connector_status_connected) -+ extcon_set_state_sync(dp->extcon, EXTCON_DISP_DP, true); -+ else -+ extcon_set_state_sync(dp->extcon, EXTCON_DISP_DP, false); -+} ++/** ++ * mali_page_fault_insert_pages - Called by page_fault_worker() ++ * it reports an MMU page fault resulting in new pages being mapped. ++ * @event_id: MMU address space number. ++ * @value: number of newly allocated pages ++ */ ++TRACE_EVENT(mali_page_fault_insert_pages, ++ TP_PROTO(int event_id, unsigned long value), ++ TP_ARGS(event_id, value), ++ TP_STRUCT__entry( ++ __field(int, event_id) ++ __field(unsigned long, value) ++ ), ++ TP_fast_assign( ++ __entry->event_id = event_id; ++ __entry->value = value; ++ ), ++ TP_printk("event %d = %lu", __entry->event_id, __entry->value) ++); + -+static int -+analogix_dp_atomic_connector_get_property(struct drm_connector *connector, -+ const struct drm_connector_state *state, -+ struct drm_property *property, -+ uint64_t *val) -+{ -+ struct rockchip_drm_private *private = connector->dev->dev_private; -+ struct analogix_dp_device *dp = to_dp(connector); ++/** ++ * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space() ++ * it reports that a certain MMU address space is in use now. ++ * @event_id: MMU address space number. ++ */ ++TRACE_EVENT(mali_mmu_as_in_use, ++ TP_PROTO(int event_id), ++ TP_ARGS(event_id), ++ TP_STRUCT__entry( ++ __field(int, event_id) ++ ), ++ TP_fast_assign( ++ __entry->event_id = event_id; ++ ), ++ TP_printk("event=%d", __entry->event_id) ++); + -+ if (property == private->split_area_prop) { -+ switch (dp->split_area) { -+ case 1: -+ *val = ROCKCHIP_DRM_SPLIT_LEFT_SIDE; -+ break; -+ case 2: -+ *val = ROCKCHIP_DRM_SPLIT_RIGHT_SIDE; -+ break; -+ default: -+ *val = ROCKCHIP_DRM_SPLIT_UNSET; -+ break; -+ } -+ } ++/** ++ * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal() ++ * it reports that a certain MMU address space has been released now. ++ * @event_id: MMU address space number. ++ */ ++TRACE_EVENT(mali_mmu_as_released, ++ TP_PROTO(int event_id), ++ TP_ARGS(event_id), ++ TP_STRUCT__entry( ++ __field(int, event_id) ++ ), ++ TP_fast_assign( ++ __entry->event_id = event_id; ++ ), ++ TP_printk("event=%d", __entry->event_id) ++); + -+ return 0; -+} ++/** ++ * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages() ++ * and by kbase_atomic_sub_pages() ++ * it reports that the total number of allocated pages is changed. ++ * @event_id: number of pages to be added or subtracted (according to the sign). ++ */ ++TRACE_EVENT(mali_total_alloc_pages_change, ++ TP_PROTO(long long int event_id), ++ TP_ARGS(event_id), ++ TP_STRUCT__entry( ++ __field(long long int, event_id) ++ ), ++ TP_fast_assign( ++ __entry->event_id = event_id; ++ ), ++ TP_printk("event=%lld", __entry->event_id) ++); + - static const struct drm_connector_funcs analogix_dp_connector_funcs = { - .fill_modes = drm_helper_probe_single_connector_modes, -- .detect = analogix_dp_detect, -+ .detect = analogix_dp_connector_detect, - .destroy = drm_connector_cleanup, - .reset = drm_atomic_helper_connector_reset, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, -+ .force = analogix_dp_connector_force, -+ .atomic_get_property = analogix_dp_atomic_connector_get_property, - }; - - static int analogix_dp_bridge_attach(struct drm_bridge *bridge, -@@ -1223,28 +1492,53 @@ static int analogix_dp_bridge_attach(struct drm_bridge *bridge, - struct drm_connector *connector = NULL; - int ret = 0; - -- if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR) { -- DRM_ERROR("Fix bridge driver to make connector optional!"); -- return -EINVAL; -- } -- - if (!bridge->encoder) { - DRM_ERROR("Parent encoder object not found"); - return -ENODEV; - } - -+ if (dp->plat_data->bridge) { -+ ret = drm_bridge_attach(bridge->encoder, dp->plat_data->bridge, bridge, -+ dp->plat_data->skip_connector ? -+ 0 : DRM_BRIDGE_ATTACH_NO_CONNECTOR); -+ if (ret) { -+ DRM_ERROR("Failed to attach external bridge: %d\n", ret); -+ return ret; -+ } -+ } ++#endif /* _TRACE_MALI_H */ + -+ if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR) -+ return 0; ++#undef TRACE_INCLUDE_PATH ++#undef linux ++#define TRACE_INCLUDE_PATH . + - if (!dp->plat_data->skip_connector) { -+ int connector_type = DRM_MODE_CONNECTOR_eDP; -+ struct rockchip_drm_private *private; ++/* This part must be outside protection */ ++#include +diff --git a/drivers/gpu/arm/midgard/mali_malisw.h b/drivers/gpu/arm/midgard/mali_malisw.h +new file mode 100644 +index 000000000..99452933e +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_malisw.h +@@ -0,0 +1,131 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ if (dp->plat_data->bridge && -+ dp->plat_data->bridge->type != DRM_MODE_CONNECTOR_Unknown) -+ connector_type = dp->plat_data->bridge->type; + - connector = &dp->connector; - connector->polled = DRM_CONNECTOR_POLL_HPD; -+ if (dp->plat_data->bridge && dp->plat_data->bridge->ops & DRM_BRIDGE_OP_DETECT) -+ connector->polled = DRM_CONNECTOR_POLL_CONNECT | -+ DRM_CONNECTOR_POLL_DISCONNECT; - - ret = drm_connector_init(dp->drm_dev, connector, - &analogix_dp_connector_funcs, -- DRM_MODE_CONNECTOR_eDP); -+ connector_type); - if (ret) { - DRM_ERROR("Failed to initialize connector with drm\n"); - return ret; - } - -+ private = connector->dev->dev_private; + -+ if (dp->split_area) -+ drm_object_attach_property(&connector->base, -+ private->split_area_prop, -+ dp->split_area); ++/** ++ * Kernel-wide include for common macros and types. ++ */ + - drm_connector_helper_add(connector, - &analogix_dp_connector_helper_funcs); - drm_connector_attach_encoder(connector, encoder); -@@ -1267,6 +1561,14 @@ static int analogix_dp_bridge_attach(struct drm_bridge *bridge, - return 0; - } - -+static void analogix_dp_bridge_detach(struct drm_bridge *bridge) -+{ -+ struct analogix_dp_device *dp = bridge->driver_private; ++#ifndef _MALISW_H_ ++#define _MALISW_H_ + -+ if (dp->plat_data->detach) -+ dp->plat_data->detach(dp->plat_data, bridge); -+} ++#include ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) ++#define U8_MAX ((u8)~0U) ++#define S8_MAX ((s8)(U8_MAX>>1)) ++#define S8_MIN ((s8)(-S8_MAX - 1)) ++#define U16_MAX ((u16)~0U) ++#define S16_MAX ((s16)(U16_MAX>>1)) ++#define S16_MIN ((s16)(-S16_MAX - 1)) ++#define U32_MAX ((u32)~0U) ++#define S32_MAX ((s32)(U32_MAX>>1)) ++#define S32_MIN ((s32)(-S32_MAX - 1)) ++#define U64_MAX ((u64)~0ULL) ++#define S64_MAX ((s64)(U64_MAX>>1)) ++#define S64_MIN ((s64)(-S64_MAX - 1)) ++#endif /* LINUX_VERSION_CODE */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) ++#define SIZE_MAX (~(size_t)0) ++#endif /* LINUX_VERSION_CODE */ + - static - struct drm_crtc *analogix_dp_get_old_crtc(struct analogix_dp_device *dp, - struct drm_atomic_state *state) -@@ -1290,7 +1592,8 @@ static - struct drm_crtc *analogix_dp_get_new_crtc(struct analogix_dp_device *dp, - struct drm_atomic_state *state) - { -- struct drm_encoder *encoder = dp->encoder; -+ struct drm_bridge *bridge = &dp->bridge; -+ struct drm_encoder *encoder = bridge->encoder; - struct drm_connector *connector; - struct drm_connector_state *conn_state; - -@@ -1312,39 +1615,35 @@ analogix_dp_bridge_atomic_pre_enable(struct drm_bridge *bridge, - struct drm_atomic_state *old_state = old_bridge_state->base.state; - struct analogix_dp_device *dp = bridge->driver_private; - struct drm_crtc *crtc; -- struct drm_crtc_state *old_crtc_state; -- int ret; -+ struct drm_crtc_state *old_crtc_state, *new_crtc_state; - - crtc = analogix_dp_get_new_crtc(dp, old_state); - if (!crtc) - return; - - old_crtc_state = drm_atomic_get_old_crtc_state(old_state, crtc); ++/** ++ * MIN - Return the lesser of two values. ++ * ++ * As a macro it may evaluate its arguments more than once. ++ * Refer to MAX macro for more details ++ */ ++#define MIN(x, y) ((x) < (y) ? (x) : (y)) + -+ new_crtc_state = drm_atomic_get_new_crtc_state(old_state, crtc); -+ analogix_dp_bridge_mode_set(bridge, &new_crtc_state->adjusted_mode); ++/** ++ * MAX - Return the greater of two values. ++ * ++ * As a macro it may evaluate its arguments more than once. ++ * If called on the same two arguments as MIN it is guaranteed to return ++ * the one that MIN didn't return. This is significant for types where not ++ * all values are comparable e.g. NaNs in floating-point types. But if you want ++ * to retrieve the min and max of two values, consider using a conditional swap ++ * instead. ++ */ ++#define MAX(x, y) ((x) < (y) ? (y) : (x)) + - /* Don't touch the panel if we're coming back from PSR */ - if (old_crtc_state && old_crtc_state->self_refresh_active) - return; - -- ret = analogix_dp_prepare_panel(dp, true, true); -- if (ret) -- DRM_ERROR("failed to setup the panel ret = %d\n", ret); -+ if (dp->plat_data->panel) -+ analogix_dp_panel_prepare(dp); - } - - static int analogix_dp_set_bridge(struct analogix_dp_device *dp) - { - int ret; - -- pm_runtime_get_sync(dp->dev); -- -- ret = clk_prepare_enable(dp->clock); -- if (ret < 0) { -- DRM_ERROR("Failed to prepare_enable the clock clk [%d]\n", ret); -- goto out_dp_clk_pre; -- } -- - if (dp->plat_data->power_on_start) - dp->plat_data->power_on_start(dp->plat_data); - -- phy_power_on(dp->phy); -+ ret = analogix_dp_phy_power_on(dp); -+ if (ret) -+ return ret; - - ret = analogix_dp_init_dp(dp); - if (ret) -@@ -1362,28 +1661,35 @@ static int analogix_dp_set_bridge(struct analogix_dp_device *dp) - } - - ret = analogix_dp_commit(dp); -- if (ret) { -+ if (ret < 0) { - DRM_ERROR("dp commit error, ret = %d\n", ret); - goto out_dp_init; - } - -+ if (dp->plat_data->panel) -+ drm_panel_enable(dp->plat_data->panel); ++/** ++ * @hideinitializer ++ * Function-like macro for suppressing unused variable warnings. Where possible ++ * such variables should be removed; this macro is present for cases where we ++ * much support API backwards compatibility. ++ */ ++#define CSTD_UNUSED(x) ((void)(x)) + - if (dp->plat_data->power_on_end) - dp->plat_data->power_on_end(dp->plat_data); - -- enable_irq(dp->irq); - return 0; - - out_dp_init: -- phy_power_off(dp->phy); -+ analogix_dp_phy_power_off(dp); - if (dp->plat_data->power_off) - dp->plat_data->power_off(dp->plat_data); -- clk_disable_unprepare(dp->clock); --out_dp_clk_pre: -- pm_runtime_put_sync(dp->dev); -- - return ret; - } - -+static void analogix_dp_modeset_retry_work_fn(struct work_struct *work) -+{ -+ struct analogix_dp_device *dp = -+ container_of(work, typeof(*dp), modeset_retry_work); ++/** ++ * @hideinitializer ++ * Function-like macro for use where "no behavior" is desired. This is useful ++ * when compile time macros turn a function-like macro in to a no-op, but ++ * where having no statement is otherwise invalid. ++ */ ++#define CSTD_NOP(...) ((void)#__VA_ARGS__) + -+ /* Send Hotplug uevent so userspace can reprobe */ -+ drm_kms_helper_hotplug_event(dp->bridge.dev); -+} ++/** ++ * Function-like macro for converting a pointer in to a u64 for storing into ++ * an external data structure. This is commonly used when pairing a 32-bit ++ * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion ++ * is complex and a straight cast does not work reliably as pointers are ++ * often considered as signed. ++ */ ++#define PTR_TO_U64(x) ((uint64_t)((uintptr_t)(x))) + - static void - analogix_dp_bridge_atomic_enable(struct drm_bridge *bridge, - struct drm_bridge_state *old_bridge_state) -@@ -1422,12 +1728,14 @@ analogix_dp_bridge_atomic_enable(struct drm_bridge *bridge, - usleep_range(10, 11); - } - dev_err(dp->dev, "too many times retry set bridge, give it up\n"); ++/** ++ * @hideinitializer ++ * Function-like macro for stringizing a single level macro. ++ * @code ++ * #define MY_MACRO 32 ++ * CSTD_STR1( MY_MACRO ) ++ * > "MY_MACRO" ++ * @endcode ++ */ ++#define CSTD_STR1(x) #x + -+ /* Schedule a Hotplug Uevent to userspace to start modeset */ -+ schedule_work(&dp->modeset_retry_work); - } - - static void analogix_dp_bridge_disable(struct drm_bridge *bridge) - { - struct analogix_dp_device *dp = bridge->driver_private; -- int ret; - - if (dp->dpms_mode != DRM_MODE_DPMS_ON) - return; -@@ -1439,27 +1747,29 @@ static void analogix_dp_bridge_disable(struct drm_bridge *bridge) - } - } - -- disable_irq(dp->irq); -+ if (!analogix_dp_get_plug_in_status(dp)) -+ analogix_dp_link_power_down(dp); - - if (dp->plat_data->power_off) - dp->plat_data->power_off(dp->plat_data); - - analogix_dp_set_analog_power_down(dp, POWER_ALL, 1); -- phy_power_off(dp->phy); -- -- clk_disable_unprepare(dp->clock); -+ analogix_dp_phy_power_off(dp); - -- pm_runtime_put_sync(dp->dev); -- -- ret = analogix_dp_prepare_panel(dp, false, true); -- if (ret) -- DRM_ERROR("failed to setup the panel ret = %d\n", ret); -+ if (dp->plat_data->panel) -+ analogix_dp_panel_unprepare(dp); - - dp->fast_train_enable = false; - dp->psr_supported = false; - dp->dpms_mode = DRM_MODE_DPMS_OFF; - } - -+void analogix_dp_disable(struct analogix_dp_device *dp) -+{ -+ analogix_dp_bridge_disable(&dp->bridge); -+} -+EXPORT_SYMBOL_GPL(analogix_dp_disable); ++/** ++ * @hideinitializer ++ * Function-like macro for stringizing a macro's value. This should not be used ++ * if the macro is defined in a way which may have no value; use the ++ * alternative @c CSTD_STR2N macro should be used instead. ++ * @code ++ * #define MY_MACRO 32 ++ * CSTD_STR2( MY_MACRO ) ++ * > "32" ++ * @endcode ++ */ ++#define CSTD_STR2(x) CSTD_STR1(x) + - static void - analogix_dp_bridge_atomic_disable(struct drm_bridge *bridge, - struct drm_bridge_state *old_bridge_state) -@@ -1524,33 +1834,38 @@ analogix_dp_bridge_atomic_post_disable(struct drm_bridge *bridge, - } - - static void analogix_dp_bridge_mode_set(struct drm_bridge *bridge, -- const struct drm_display_mode *orig_mode, -- const struct drm_display_mode *mode) -+ const struct drm_display_mode *adj_mode) - { - struct analogix_dp_device *dp = bridge->driver_private; - struct drm_display_info *display_info = &dp->connector.display_info; - struct video_info *video = &dp->video_info; -+ struct drm_display_mode *mode = &video->mode; - struct device_node *dp_node = dp->dev->of_node; - int vic; - -+ drm_mode_copy(mode, adj_mode); -+ if (dp->plat_data->split_mode) -+ dp->plat_data->convert_to_origin_mode(mode); ++/** ++ * Specify an assertion value which is evaluated at compile time. Recommended ++ * usage is specification of a @c static @c INLINE function containing all of ++ * the assertions thus: ++ * ++ * @code ++ * static INLINE [module]_compile_time_assertions( void ) ++ * { ++ * COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) ); ++ * } ++ * @endcode ++ * ++ * @note Use @c static not @c STATIC. We never want to turn off this @c static ++ * specification for testing purposes. ++ */ ++#define CSTD_COMPILE_TIME_ASSERT(expr) \ ++ do { switch (0) { case 0: case (expr):; } } while (false) + - /* Input video interlaces & hsync pol & vsync pol */ - video->interlaced = !!(mode->flags & DRM_MODE_FLAG_INTERLACE); -- video->v_sync_polarity = !!(mode->flags & DRM_MODE_FLAG_NVSYNC); -- video->h_sync_polarity = !!(mode->flags & DRM_MODE_FLAG_NHSYNC); -+ if (dp->plat_data->dev_type == RK3588_EDP) { -+ video->v_sync_polarity = true; -+ video->h_sync_polarity = true; -+ } else { -+ video->v_sync_polarity = !!(mode->flags & DRM_MODE_FLAG_NVSYNC); -+ video->h_sync_polarity = !!(mode->flags & DRM_MODE_FLAG_NHSYNC); -+ } - - /* Input video dynamic_range & colorimetry */ - vic = drm_match_cea_mode(mode); - if ((vic == 6) || (vic == 7) || (vic == 21) || (vic == 22) || -- (vic == 2) || (vic == 3) || (vic == 17) || (vic == 18)) { -+ (vic == 2) || (vic == 3) || (vic == 17) || (vic == 18)) - video->dynamic_range = CEA; -- video->ycbcr_coeff = COLOR_YCBCR601; -- } else if (vic) { -+ else if (vic) - video->dynamic_range = CEA; -- video->ycbcr_coeff = COLOR_YCBCR709; -- } else { -+ else - video->dynamic_range = VESA; -- video->ycbcr_coeff = COLOR_YCBCR709; -- } - - /* Input vide bpc and color_formats */ - switch (display_info->bpc) { -@@ -1570,12 +1885,16 @@ static void analogix_dp_bridge_mode_set(struct drm_bridge *bridge, - video->color_depth = COLOR_8; - break; - } -- if (display_info->color_formats & DRM_COLOR_FORMAT_YCBCR444) -+ if (display_info->color_formats & DRM_COLOR_FORMAT_YCBCR444) { - video->color_space = COLOR_YCBCR444; -- else if (display_info->color_formats & DRM_COLOR_FORMAT_YCBCR422) -+ video->ycbcr_coeff = COLOR_YCBCR709; -+ } else if (display_info->color_formats & DRM_COLOR_FORMAT_YCBCR422) { - video->color_space = COLOR_YCBCR422; -- else -+ video->ycbcr_coeff = COLOR_YCBCR709; -+ } else { - video->color_space = COLOR_RGB; -+ video->ycbcr_coeff = COLOR_YCBCR601; -+ } - - /* - * NOTE: those property parsing code is used for providing backward -@@ -1600,6 +1919,56 @@ static void analogix_dp_bridge_mode_set(struct drm_bridge *bridge, - video->interlaced = true; - } - -+static bool analogix_dp_link_config_validate(u8 link_rate, u8 lane_count) -+{ -+ switch (link_rate) { -+ case DP_LINK_BW_1_62: -+ case DP_LINK_BW_2_7: -+ case DP_LINK_BW_5_4: -+ break; -+ default: -+ return false; -+ } ++#endif /* _MALISW_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_midg_coherency.h b/drivers/gpu/arm/midgard/mali_midg_coherency.h +new file mode 100644 +index 000000000..a509cbd5f +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_midg_coherency.h +@@ -0,0 +1,26 @@ ++/* ++ * ++ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ switch (lane_count) { -+ case 1: -+ case 2: -+ case 4: -+ break; -+ default: -+ return false; -+ } + -+ return true; -+} + -+static enum drm_mode_status -+analogix_dp_bridge_mode_valid(struct drm_bridge *bridge, -+ const struct drm_display_info *info, -+ const struct drm_display_mode *mode) -+{ -+ struct analogix_dp_device *dp = bridge->driver_private; -+ struct drm_display_mode m; -+ u32 max_link_rate, max_lane_count; ++#ifndef _MIDG_COHERENCY_H_ ++#define _MIDG_COHERENCY_H_ + -+ drm_mode_copy(&m, mode); ++#define COHERENCY_ACE_LITE 0 ++#define COHERENCY_ACE 1 ++#define COHERENCY_NONE 31 ++#define COHERENCY_FEATURE_BIT(x) (1 << (x)) + -+ if (dp->plat_data->split_mode || dp->plat_data->dual_connector_split) -+ dp->plat_data->convert_to_origin_mode(&m); ++#endif /* _MIDG_COHERENCY_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h +new file mode 100644 +index 000000000..7d7b7bcd3 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h +@@ -0,0 +1,611 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ max_link_rate = min_t(u32, dp->video_info.max_link_rate, -+ dp->link_train.link_rate); -+ max_lane_count = min_t(u32, dp->video_info.max_lane_count, -+ dp->link_train.lane_count); -+ if (analogix_dp_link_config_validate(max_link_rate, max_lane_count) && -+ !analogix_dp_bandwidth_ok(dp, &m, -+ drm_dp_bw_code_to_link_rate(max_link_rate), -+ max_lane_count)) -+ return MODE_BAD; + -+ return MODE_OK; -+} + - static const struct drm_bridge_funcs analogix_dp_bridge_funcs = { - .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, -@@ -1608,37 +1977,85 @@ static const struct drm_bridge_funcs analogix_dp_bridge_funcs = { - .atomic_enable = analogix_dp_bridge_atomic_enable, - .atomic_disable = analogix_dp_bridge_atomic_disable, - .atomic_post_disable = analogix_dp_bridge_atomic_post_disable, -- .mode_set = analogix_dp_bridge_mode_set, - .attach = analogix_dp_bridge_attach, -+ .detach = analogix_dp_bridge_detach, -+ .mode_valid = analogix_dp_bridge_mode_valid, - }; - --static int analogix_dp_create_bridge(struct drm_device *drm_dev, -- struct analogix_dp_device *dp) -+static int analogix_dp_bridge_init(struct analogix_dp_device *dp) - { -- struct drm_bridge *bridge; -+ struct drm_bridge *bridge = &dp->bridge; -+ int ret; ++#ifndef _MIDGARD_REGMAP_H_ ++#define _MIDGARD_REGMAP_H_ + -+ if (!dp->plat_data->left) { -+ ret = drm_bridge_attach(dp->encoder, bridge, NULL, 0); -+ if (ret) { -+ DRM_ERROR("failed to attach drm bridge\n"); -+ return ret; -+ } -+ } - -- bridge = devm_kzalloc(drm_dev->dev, sizeof(*bridge), GFP_KERNEL); -- if (!bridge) { -- DRM_ERROR("failed to allocate for drm bridge\n"); -- return -ENOMEM; -+ if (dp->plat_data->right) { -+ struct analogix_dp_device *secondary = dp->plat_data->right; -+ struct drm_bridge *last_bridge = -+ list_last_entry(&bridge->encoder->bridge_chain, -+ struct drm_bridge, chain_node); ++#include "mali_midg_coherency.h" ++#include "mali_kbase_gpu_id.h" + -+ ret = drm_bridge_attach(dp->encoder, &secondary->bridge, last_bridge, -+ DRM_BRIDGE_ATTACH_NO_CONNECTOR); -+ if (ret) -+ return ret; - } - -- dp->bridge = bridge; -+ return 0; -+} ++/* ++ * Begin Register Offsets ++ */ + -+static u32 analogix_dp_parse_link_frequencies(struct analogix_dp_device *dp) -+{ -+ struct device_node *node = dp->dev->of_node; -+ struct device_node *endpoint; -+ u64 frequency = 0; -+ int cnt; ++#define GPU_CONTROL_BASE 0x0000 ++#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) ++#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ ++#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ ++#define SUSPEND_SIZE 0x008 /* (RO) Fixed-function suspend buffer ++ size */ ++#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ ++#define MEM_FEATURES 0x010 /* (RO) Memory system features */ ++#define MMU_FEATURES 0x014 /* (RO) MMU features */ ++#define AS_PRESENT 0x018 /* (RO) Address space slots present */ ++#define JS_PRESENT 0x01C /* (RO) Job slots present */ ++#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ ++#define GPU_IRQ_CLEAR 0x024 /* (WO) */ ++#define GPU_IRQ_MASK 0x028 /* (RW) */ ++#define GPU_IRQ_STATUS 0x02C /* (RO) */ + -+ endpoint = of_graph_get_endpoint_by_regs(node, 1, 0); -+ if (!endpoint) -+ return 0; ++/* IRQ flags */ ++#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ ++#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ ++#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. Intended to use with SOFT_RESET ++ commands which may take time. */ ++#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ ++#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down ++ and the power manager is idle. */ + -+ cnt = of_property_count_u64_elems(endpoint, "link-frequencies"); -+ if (cnt > 0) -+ of_property_read_u64_index(endpoint, "link-frequencies", -+ cnt - 1, &frequency); -+ of_node_put(endpoint); ++#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ ++#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ + -+ if (!frequency) -+ return 0; ++#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ ++ | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) + -+ do_div(frequency, 10 * 1000); /* symbol rate kbytes */ - -- bridge->driver_private = dp; -- bridge->funcs = &analogix_dp_bridge_funcs; -+ switch (frequency) { -+ case 162000: -+ case 270000: -+ case 540000: -+ break; -+ default: -+ dev_err(dp->dev, "invalid link frequency value: %lld\n", frequency); -+ return 0; -+ } - -- return drm_bridge_attach(dp->encoder, bridge, NULL, 0); -+ return frequency; - } - - static int analogix_dp_dt_parse_pdata(struct analogix_dp_device *dp) - { - struct device_node *dp_node = dp->dev->of_node; - struct video_info *video_info = &dp->video_info; -+ struct property *prop; -+ int ret, len, num_lanes; -+ u32 max_link_rate; - - switch (dp->plat_data->dev_type) { - case RK3288_DP: -- case RK3399_EDP: -+ case RK3568_EDP: - /* - * Like Rk3288 DisplayPort TRM indicate that "Main link - * containing 4 physical lanes of 2.7/1.62 Gbps/lane". -@@ -1646,6 +2063,11 @@ static int analogix_dp_dt_parse_pdata(struct analogix_dp_device *dp) - video_info->max_link_rate = 0x0A; - video_info->max_lane_count = 0x04; - break; -+ case RK3399_EDP: -+ case RK3588_EDP: -+ video_info->max_link_rate = 0x14; -+ video_info->max_lane_count = 0x04; -+ break; - case EXYNOS_DP: - /* - * NOTE: those property parseing code is used for -@@ -1658,6 +2080,44 @@ static int analogix_dp_dt_parse_pdata(struct analogix_dp_device *dp) - break; - } - -+ max_link_rate = analogix_dp_parse_link_frequencies(dp); -+ if (max_link_rate && max_link_rate < drm_dp_bw_code_to_link_rate(video_info->max_link_rate)) -+ video_info->max_link_rate = drm_dp_link_rate_to_bw_code(max_link_rate); ++#define GPU_COMMAND 0x030 /* (WO) */ ++#define GPU_STATUS 0x034 /* (RO) */ ++#define LATEST_FLUSH 0x038 /* (RO) */ + -+ video_info->video_bist_enable = -+ of_property_read_bool(dp_node, "analogix,video-bist-enable"); -+ video_info->force_stream_valid = -+ of_property_read_bool(dp_node, "analogix,force-stream-valid"); ++#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ ++#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ + -+ prop = of_find_property(dp_node, "data-lanes", &len); -+ if (!prop) { -+ video_info->lane_map[0] = 0; -+ video_info->lane_map[1] = 1; -+ video_info->lane_map[2] = 2; -+ video_info->lane_map[3] = 3; -+ DRM_DEV_DEBUG(dp->dev, "failed to find data lane mapping, using default\n"); -+ return 0; -+ } ++#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ ++#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ ++#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ + -+ num_lanes = len / sizeof(u32); ++#define PWR_KEY 0x050 /* (WO) Power manager key register */ ++#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ ++#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ + -+ if (num_lanes < 1 || num_lanes > 4 || num_lanes == 3) { -+ DRM_DEV_ERROR(dp->dev, "bad number of data lanes\n"); -+ return -EINVAL; -+ } ++#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory region base address, low word */ ++#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory region base address, high word */ ++#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter configuration */ ++#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable flags for Job Manager */ ++#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable flags for shader cores */ ++#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable flags for tiler */ ++#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable flags for MMU/L2 cache */ + -+ video_info->max_lane_count = num_lanes; ++#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ ++#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ ++#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ ++#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ + -+ ret = of_property_read_u32_array(dp_node, "data-lanes", -+ video_info->lane_map, num_lanes); -+ if (ret) { -+ DRM_DEV_ERROR(dp->dev, "failed to read lane data\n"); -+ return ret; -+ } ++#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ ++#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ ++#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ ++#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ + -+ if (device_property_read_u32(dp->dev, "split-area", &dp->split_area)) -+ dp->split_area = 0; ++#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ ++#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ ++#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ + - return 0; - } - -@@ -1681,13 +2141,116 @@ static ssize_t analogix_dpaux_transfer(struct drm_dp_aux *aux, - return ret; - } - -+int analogix_dp_audio_hw_params(struct analogix_dp_device *dp, -+ struct hdmi_codec_daifmt *daifmt, -+ struct hdmi_codec_params *params) -+{ -+ switch (daifmt->fmt) { -+ case HDMI_SPDIF: -+ analogix_dp_audio_config_spdif(dp); -+ break; -+ case HDMI_I2S: -+ analogix_dp_audio_config_i2s(dp); -+ break; -+ default: -+ DRM_DEV_ERROR(dp->dev, "invalid daifmt %d\n", daifmt->fmt); -+ return -EINVAL; -+ } ++#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) + -+ return 0; -+} -+EXPORT_SYMBOL_GPL(analogix_dp_audio_hw_params); ++#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */ ++#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */ ++#define JS2_FEATURES 0x0C8 /* (RO) Features of job slot 2 */ ++#define JS3_FEATURES 0x0CC /* (RO) Features of job slot 3 */ ++#define JS4_FEATURES 0x0D0 /* (RO) Features of job slot 4 */ ++#define JS5_FEATURES 0x0D4 /* (RO) Features of job slot 5 */ ++#define JS6_FEATURES 0x0D8 /* (RO) Features of job slot 6 */ ++#define JS7_FEATURES 0x0DC /* (RO) Features of job slot 7 */ ++#define JS8_FEATURES 0x0E0 /* (RO) Features of job slot 8 */ ++#define JS9_FEATURES 0x0E4 /* (RO) Features of job slot 9 */ ++#define JS10_FEATURES 0x0E8 /* (RO) Features of job slot 10 */ ++#define JS11_FEATURES 0x0EC /* (RO) Features of job slot 11 */ ++#define JS12_FEATURES 0x0F0 /* (RO) Features of job slot 12 */ ++#define JS13_FEATURES 0x0F4 /* (RO) Features of job slot 13 */ ++#define JS14_FEATURES 0x0F8 /* (RO) Features of job slot 14 */ ++#define JS15_FEATURES 0x0FC /* (RO) Features of job slot 15 */ + -+void analogix_dp_audio_shutdown(struct analogix_dp_device *dp) -+{ -+ analogix_dp_audio_disable(dp); -+} -+EXPORT_SYMBOL_GPL(analogix_dp_audio_shutdown); ++#define JS_FEATURES_REG(n) GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2)) + -+int analogix_dp_audio_startup(struct analogix_dp_device *dp) -+{ -+ analogix_dp_audio_enable(dp); ++#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ ++#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ + -+ return 0; -+} -+EXPORT_SYMBOL_GPL(analogix_dp_audio_startup); ++#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ ++#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ + -+int analogix_dp_audio_get_eld(struct analogix_dp_device *dp, u8 *buf, size_t len) -+{ -+ memcpy(buf, dp->connector.eld, min(sizeof(dp->connector.eld), len)); ++#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ ++#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ + -+ return 0; -+} -+EXPORT_SYMBOL_GPL(analogix_dp_audio_get_eld); ++#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ ++#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ + -+static void analogix_dp_link_train_restore(struct analogix_dp_device *dp) -+{ -+ u32 link_rate, lane_count; -+ u8 lane, spread; + -+ analogix_dp_get_link_bandwidth(dp, &link_rate); -+ analogix_dp_get_lane_count(dp, &lane_count); -+ drm_dp_dpcd_readb(&dp->aux, DP_MAX_DOWNSPREAD, &spread); ++#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ ++#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ + -+ dp->link_train.link_rate = link_rate; -+ dp->link_train.lane_count = lane_count; -+ dp->link_train.enhanced_framing = analogix_dp_get_enhanced_mode(dp); -+ dp->link_train.ssc = !!(spread & DP_MAX_DOWNSPREAD_0_5); ++#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ ++#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ + -+ for (lane = 0; lane < 4; lane++) -+ dp->link_train.training_lane[lane] = -+ analogix_dp_get_lane_link_training(dp, lane); -+} ++#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ ++#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ + -+int analogix_dp_loader_protect(struct analogix_dp_device *dp) -+{ -+ u8 link_status[DP_LINK_STATUS_SIZE]; -+ int ret; ++#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ ++#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ + -+ ret = analogix_dp_phy_power_on(dp); -+ if (ret) -+ return ret; + -+ dp->dpms_mode = DRM_MODE_DPMS_ON; ++#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ ++#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ + -+ analogix_dp_link_train_restore(dp); ++#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ ++#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ + -+ ret = analogix_dp_fast_link_train_detection(dp); -+ if (ret) -+ goto err_disable; ++#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ ++#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ + -+ if (analogix_dp_detect_sink_psr(dp)) { -+ ret = analogix_dp_enable_sink_psr(dp); -+ if (ret) -+ goto err_disable; -+ } ++#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ ++#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ + -+ ret = drm_dp_dpcd_read_link_status(&dp->aux, link_status); -+ if (ret < 0) { -+ dev_err(dp->dev, "Failed to read link status\n"); -+ goto err_disable; -+ } + -+ if (!drm_dp_channel_eq_ok(link_status, dp->link_train.lane_count)) { -+ dev_err(dp->dev, "Channel EQ or CR not ok\n"); -+ ret = -EINVAL; -+ goto err_disable; -+ } ++#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ ++#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ + -+ return 0; ++#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ ++#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ + -+err_disable: -+ analogix_dp_disable(dp); -+ return ret; -+} -+EXPORT_SYMBOL_GPL(analogix_dp_loader_protect); ++#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ ++#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ + - struct analogix_dp_device * - analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) - { - struct platform_device *pdev = to_platform_device(dev); - struct analogix_dp_device *dp; - struct resource *res; -- unsigned int irq_flags; - int ret; - - if (!plat_data) { -@@ -1701,9 +2264,10 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) - - dp->dev = &pdev->dev; - dp->dpms_mode = DRM_MODE_DPMS_OFF; -+ INIT_WORK(&dp->modeset_retry_work, analogix_dp_modeset_retry_work_fn); - - mutex_init(&dp->panel_lock); -- dp->panel_is_modeset = false; -+ dp->panel_is_prepared = false; - - /* - * platform dp driver need containor_of the plat_data to get -@@ -1732,21 +2296,19 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) - } - } - -- dp->clock = devm_clk_get(&pdev->dev, "dp"); -- if (IS_ERR(dp->clock)) { -- dev_err(&pdev->dev, "failed to get clock\n"); -- return ERR_CAST(dp->clock); -+ ret = devm_clk_bulk_get_all(dev, &dp->clks); -+ if (ret < 0) { -+ dev_err(dev, "failed to get clocks %d\n", ret); -+ return ERR_PTR(ret); - } - -- clk_prepare_enable(dp->clock); -+ dp->nr_clks = ret; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - - dp->reg_base = devm_ioremap_resource(&pdev->dev, res); -- if (IS_ERR(dp->reg_base)) { -- ret = PTR_ERR(dp->reg_base); -- goto err_disable_clk; -- } -+ if (IS_ERR(dp->reg_base)) -+ return ERR_CAST(dp->reg_base); - - dp->force_hpd = of_property_read_bool(dev->of_node, "force-hpd"); - -@@ -1758,46 +2320,55 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) - if (IS_ERR(dp->hpd_gpiod)) { - dev_err(dev, "error getting HDP GPIO: %ld\n", - PTR_ERR(dp->hpd_gpiod)); -- ret = PTR_ERR(dp->hpd_gpiod); -- goto err_disable_clk; -+ return ERR_CAST(dp->hpd_gpiod); - } - - if (dp->hpd_gpiod) { -- /* -- * Set up the hotplug GPIO from the device tree as an interrupt. -- * Simply specifying a different interrupt in the device tree -- * doesn't work since we handle hotplug rather differently when -- * using a GPIO. We also need the actual GPIO specifier so -- * that we can get the current state of the GPIO. -- */ -- dp->irq = gpiod_to_irq(dp->hpd_gpiod); -- irq_flags = IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING; -- } else { -- dp->irq = platform_get_irq(pdev, 0); -- irq_flags = 0; -+ ret = devm_request_threaded_irq(dev, -+ gpiod_to_irq(dp->hpd_gpiod), -+ NULL, -+ analogix_dp_hpd_irq_handler, -+ IRQF_TRIGGER_RISING | -+ IRQF_TRIGGER_FALLING | -+ IRQF_ONESHOT, -+ "analogix-hpd", dp); -+ if (ret) { -+ dev_err(dev, "failed to request hpd IRQ: %d\n", ret); -+ return ERR_PTR(ret); -+ } - } - -+ dp->irq = platform_get_irq(pdev, 0); - if (dp->irq == -ENXIO) { - dev_err(&pdev->dev, "failed to get irq\n"); -- ret = -ENODEV; -- goto err_disable_clk; -+ return ERR_PTR(-ENODEV); - } - -- ret = devm_request_threaded_irq(&pdev->dev, dp->irq, -- analogix_dp_hardirq, -+ irq_set_status_flags(dp->irq, IRQ_NOAUTOEN); -+ ret = devm_request_threaded_irq(dev, dp->irq, NULL, - analogix_dp_irq_thread, -- irq_flags, "analogix-dp", dp); -+ IRQF_ONESHOT, dev_name(dev), dp); - if (ret) { - dev_err(&pdev->dev, "failed to request irq\n"); -- goto err_disable_clk; -+ return ERR_PTR(ret); - } -- disable_irq(dp->irq); - -- return dp; -+ dp->extcon = devm_extcon_dev_allocate(dev, analogix_dp_cable); -+ if (IS_ERR(dp->extcon)) { -+ dev_err(dev, "failed to allocate extcon device\n"); -+ return ERR_CAST(dp->extcon); -+ } ++#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ ++#define STACK_PRWOFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ + -+ ret = devm_extcon_dev_register(dev, dp->extcon); -+ if (ret) { -+ dev_err(dev, "failed to register extcon device\n"); -+ return ERR_PTR(ret); -+ } + -+ dp->bridge.driver_private = dp; -+ dp->bridge.funcs = &analogix_dp_bridge_funcs; - --err_disable_clk: -- clk_disable_unprepare(dp->clock); -- return ERR_PTR(ret); -+ return dp; - } - EXPORT_SYMBOL_GPL(analogix_dp_probe); - -@@ -1820,16 +2391,21 @@ int analogix_dp_bind(struct analogix_dp_device *dp, struct drm_device *drm_dev) - pm_runtime_use_autosuspend(dp->dev); - pm_runtime_set_autosuspend_delay(dp->dev, 100); - pm_runtime_enable(dp->dev); -+ pm_runtime_get_sync(dp->dev); -+ analogix_dp_init(dp); - -- ret = analogix_dp_create_bridge(drm_dev, dp); -+ ret = analogix_dp_bridge_init(dp); - if (ret) { -- DRM_ERROR("failed to create bridge (%d)\n", ret); -+ DRM_ERROR("failed to init bridge (%d)\n", ret); - goto err_disable_pm_runtime; - } - -+ enable_irq(dp->irq); ++#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ ++#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ + - return 0; - - err_disable_pm_runtime: -+ pm_runtime_put(dp->dev); - pm_runtime_dont_use_autosuspend(dp->dev); - pm_runtime_disable(dp->dev); - drm_dp_aux_unregister(&dp->aux); -@@ -1840,15 +2416,11 @@ EXPORT_SYMBOL_GPL(analogix_dp_bind); - - void analogix_dp_unbind(struct analogix_dp_device *dp) - { -- analogix_dp_bridge_disable(dp->bridge); -- dp->connector.funcs->destroy(&dp->connector); -- -- if (dp->plat_data->panel) { -- if (drm_panel_unprepare(dp->plat_data->panel)) -- DRM_ERROR("failed to turnoff the panel\n"); -- } -- -+ disable_irq(dp->irq); -+ if (dp->connector.funcs->destroy) -+ dp->connector.funcs->destroy(&dp->connector); - drm_dp_aux_unregister(&dp->aux); -+ pm_runtime_put(dp->dev); - pm_runtime_dont_use_autosuspend(dp->dev); - pm_runtime_disable(dp->dev); - } -@@ -1856,32 +2428,40 @@ EXPORT_SYMBOL_GPL(analogix_dp_unbind); - - void analogix_dp_remove(struct analogix_dp_device *dp) - { -- clk_disable_unprepare(dp->clock); -+ cancel_work_sync(&dp->modeset_retry_work); - } - EXPORT_SYMBOL_GPL(analogix_dp_remove); - --#ifdef CONFIG_PM - int analogix_dp_suspend(struct analogix_dp_device *dp) - { -- clk_disable_unprepare(dp->clock); -+ pm_runtime_force_suspend(dp->dev); ++#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ ++#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ + - return 0; - } - EXPORT_SYMBOL_GPL(analogix_dp_suspend); - - int analogix_dp_resume(struct analogix_dp_device *dp) - { -- int ret; -- -- ret = clk_prepare_enable(dp->clock); -- if (ret < 0) { -- DRM_ERROR("Failed to prepare_enable the clock clk [%d]\n", ret); -- return ret; -- } -+ pm_runtime_force_resume(dp->dev); -+ analogix_dp_init(dp); - - return 0; - } - EXPORT_SYMBOL_GPL(analogix_dp_resume); --#endif ++#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ ++#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ + -+int analogix_dp_runtime_suspend(struct analogix_dp_device *dp) -+{ -+ clk_bulk_disable_unprepare(dp->nr_clks, dp->clks); ++#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ ++#define STACK_PRWTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ + -+ return 0; -+} -+EXPORT_SYMBOL_GPL(analogix_dp_runtime_suspend); + -+int analogix_dp_runtime_resume(struct analogix_dp_device *dp) -+{ -+ return clk_bulk_prepare_enable(dp->nr_clks, dp->clks); -+} -+EXPORT_SYMBOL_GPL(analogix_dp_runtime_resume); - - int analogix_dp_start_crc(struct drm_connector *connector) - { -diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h -index 433f2d7ef..87c9cefbc 100644 ---- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h -+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h -@@ -11,6 +11,7 @@ - - #include - #include -+#include - - #define DP_TIMEOUT_LOOP_COUNT 100 - #define MAX_CR_LOOP 5 -@@ -69,6 +70,9 @@ enum pattern_set { - D10_2, - TRAINING_PTN1, - TRAINING_PTN2, -+ TRAINING_PTN3, -+ TEST_PATTERN_80BIT, -+ TEST_PATTERN_HBR2, - DP_NONE - }; - -@@ -120,15 +124,9 @@ enum analog_power_block { - POWER_ALL - }; - --enum dp_irq_type { -- DP_IRQ_TYPE_HP_CABLE_IN = BIT(0), -- DP_IRQ_TYPE_HP_CABLE_OUT = BIT(1), -- DP_IRQ_TYPE_HP_CHANGE = BIT(2), -- DP_IRQ_TYPE_UNKNOWN = BIT(3), --}; -- - struct video_info { - char *name; -+ struct drm_display_mode mode; - - bool h_sync_polarity; - bool v_sync_polarity; -@@ -141,6 +139,10 @@ struct video_info { - - int max_link_rate; - enum link_lane_count_type max_lane_count; -+ u32 lane_map[4]; ++#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ ++#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ + -+ bool video_bist_enable; -+ bool force_stream_valid; - }; - - struct link_train { -@@ -150,18 +152,29 @@ struct link_train { - u8 link_rate; - u8 lane_count; - u8 training_lane[4]; -+ bool ssc; -+ bool enhanced_framing; - - enum link_training_state lt_state; - }; - -+struct analogix_dp_compliance { -+ struct drm_dp_phy_test_params phytest; -+ int test_link_rate; -+ u8 test_lane_count; -+ unsigned long test_type; -+ bool test_active; -+}; ++#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ ++#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ + - struct analogix_dp_device { - struct drm_encoder *encoder; - struct device *dev; - struct drm_device *drm_dev; - struct drm_connector connector; -- struct drm_bridge *bridge; -+ struct drm_bridge bridge; - struct drm_dp_aux aux; -- struct clk *clock; -+ struct clk_bulk_data *clks; -+ int nr_clks; - unsigned int irq; - void __iomem *reg_base; - -@@ -173,17 +186,22 @@ struct analogix_dp_device { - bool force_hpd; - bool fast_train_enable; - bool psr_supported; -+ struct work_struct modeset_retry_work; - - struct mutex panel_lock; -- bool panel_is_modeset; -+ bool panel_is_prepared; - -+ u8 dpcd[DP_RECEIVER_CAP_SIZE]; - struct analogix_dp_plat_data *plat_data; -+ struct extcon_dev *extcon; -+ struct analogix_dp_compliance compliance; ++#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ ++#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ + -+ u32 split_area; - }; - - /* analogix_dp_reg.c */ - void analogix_dp_enable_video_mute(struct analogix_dp_device *dp, bool enable); - void analogix_dp_stop_video(struct analogix_dp_device *dp); --void analogix_dp_lane_swap(struct analogix_dp_device *dp, bool enable); - void analogix_dp_init_analog_param(struct analogix_dp_device *dp); - void analogix_dp_init_interrupt(struct analogix_dp_device *dp); - void analogix_dp_reset(struct analogix_dp_device *dp); -@@ -199,7 +217,6 @@ void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp, - int analogix_dp_init_analog_func(struct analogix_dp_device *dp); - void analogix_dp_init_hpd(struct analogix_dp_device *dp); - void analogix_dp_force_hpd(struct analogix_dp_device *dp); --enum dp_irq_type analogix_dp_get_irq_type(struct analogix_dp_device *dp); - void analogix_dp_clear_hotplug_interrupts(struct analogix_dp_device *dp); - void analogix_dp_reset_aux(struct analogix_dp_device *dp); - void analogix_dp_init_aux(struct analogix_dp_device *dp); -@@ -211,28 +228,11 @@ void analogix_dp_set_lane_count(struct analogix_dp_device *dp, u32 count); - void analogix_dp_get_lane_count(struct analogix_dp_device *dp, u32 *count); - void analogix_dp_enable_enhanced_mode(struct analogix_dp_device *dp, - bool enable); -+bool analogix_dp_get_enhanced_mode(struct analogix_dp_device *dp); - void analogix_dp_set_training_pattern(struct analogix_dp_device *dp, - enum pattern_set pattern); --void analogix_dp_set_lane0_pre_emphasis(struct analogix_dp_device *dp, -- u32 level); --void analogix_dp_set_lane1_pre_emphasis(struct analogix_dp_device *dp, -- u32 level); --void analogix_dp_set_lane2_pre_emphasis(struct analogix_dp_device *dp, -- u32 level); --void analogix_dp_set_lane3_pre_emphasis(struct analogix_dp_device *dp, -- u32 level); --void analogix_dp_set_lane0_link_training(struct analogix_dp_device *dp, -- u32 training_lane); --void analogix_dp_set_lane1_link_training(struct analogix_dp_device *dp, -- u32 training_lane); --void analogix_dp_set_lane2_link_training(struct analogix_dp_device *dp, -- u32 training_lane); --void analogix_dp_set_lane3_link_training(struct analogix_dp_device *dp, -- u32 training_lane); --u32 analogix_dp_get_lane0_link_training(struct analogix_dp_device *dp); --u32 analogix_dp_get_lane1_link_training(struct analogix_dp_device *dp); --u32 analogix_dp_get_lane2_link_training(struct analogix_dp_device *dp); --u32 analogix_dp_get_lane3_link_training(struct analogix_dp_device *dp); -+void analogix_dp_set_lane_link_training(struct analogix_dp_device *dp); -+u32 analogix_dp_get_lane_link_training(struct analogix_dp_device *dp, u8 lane); - void analogix_dp_reset_macro(struct analogix_dp_device *dp); - void analogix_dp_init_video(struct analogix_dp_device *dp); - -@@ -255,5 +255,18 @@ int analogix_dp_send_psr_spd(struct analogix_dp_device *dp, - struct dp_sdp *vsc, bool blocking); - ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, - struct drm_dp_aux_msg *msg); -+void analogix_dp_set_video_format(struct analogix_dp_device *dp); -+void analogix_dp_video_bist_enable(struct analogix_dp_device *dp); -+bool analogix_dp_ssc_supported(struct analogix_dp_device *dp); -+int analogix_dp_phy_power_on(struct analogix_dp_device *dp); -+void analogix_dp_phy_power_off(struct analogix_dp_device *dp); -+void analogix_dp_audio_config_spdif(struct analogix_dp_device *dp); -+void analogix_dp_audio_config_i2s(struct analogix_dp_device *dp); -+void analogix_dp_audio_enable(struct analogix_dp_device *dp); -+void analogix_dp_audio_disable(struct analogix_dp_device *dp); -+void analogix_dp_init(struct analogix_dp_device *dp); -+void analogix_dp_irq_handler(struct analogix_dp_device *dp); -+void analogix_dp_phy_test(struct analogix_dp_device *dp); -+void analogix_dp_check_device_service_irq(struct analogix_dp_device *dp); - - #endif /* _ANALOGIX_DP_CORE_H */ -diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c -index 7b0bc9704..b110018b8 100644 ---- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c -+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c -@@ -11,30 +11,44 @@ - #include - #include - #include -+#include - - #include -+#include - - #include "analogix_dp_core.h" - #include "analogix_dp_reg.h" - --#define COMMON_INT_MASK_1 0 --#define COMMON_INT_MASK_2 0 --#define COMMON_INT_MASK_3 0 --#define COMMON_INT_MASK_4 (HOTPLUG_CHG | HPD_LOST | PLUG) --#define INT_STA_MASK INT_HPD -+static void analogix_dp_write(struct analogix_dp_device *dp, u32 reg, u32 val) -+{ -+ if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) { -+ readl(dp->reg_base); -+ writel(val, dp->reg_base + reg); -+ } ++#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ ++#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ + -+ writel(val, dp->reg_base + reg); -+} ++#define JM_CONFIG 0xF00 /* (RW) Job Manager configuration register (Implementation specific register) */ ++#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration settings (Implementation specific register) */ ++#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration settings (Implementation specific register) */ ++#define L2_MMU_CONFIG 0xF0C /* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */ + -+static u32 analogix_dp_read(struct analogix_dp_device *dp, u32 reg) -+{ -+ if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) -+ readl(dp->reg_base + reg); ++#define JOB_CONTROL_BASE 0x1000 + -+ return readl(dp->reg_base + reg); -+} - - void analogix_dp_enable_video_mute(struct analogix_dp_device *dp, bool enable) - { - u32 reg; - - if (enable) { -- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_1); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_1); - reg |= HDCP_VIDEO_MUTE; -- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_1); -+ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_1, reg); - } else { -- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_1); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_1); - reg &= ~HDCP_VIDEO_MUTE; -- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_1); -+ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_1, reg); - } - } - -@@ -42,23 +56,20 @@ void analogix_dp_stop_video(struct analogix_dp_device *dp) - { - u32 reg; - -- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_1); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_1); - reg &= ~VIDEO_EN; -- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_1); -+ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_1, reg); - } - --void analogix_dp_lane_swap(struct analogix_dp_device *dp, bool enable) -+static void analogix_dp_set_lane_map(struct analogix_dp_device *dp) - { -- u32 reg; -+ struct video_info *video_info = &dp->video_info; -+ u32 i, reg = 0; - -- if (enable) -- reg = LANE3_MAP_LOGIC_LANE_0 | LANE2_MAP_LOGIC_LANE_1 | -- LANE1_MAP_LOGIC_LANE_2 | LANE0_MAP_LOGIC_LANE_3; -- else -- reg = LANE3_MAP_LOGIC_LANE_3 | LANE2_MAP_LOGIC_LANE_2 | -- LANE1_MAP_LOGIC_LANE_1 | LANE0_MAP_LOGIC_LANE_0; -+ for (i = 0; i < video_info->max_lane_count; i++) -+ reg |= video_info->lane_map[i] << (2 * i); - -- writel(reg, dp->reg_base + ANALOGIX_DP_LANE_MAP); -+ analogix_dp_write(dp, ANALOGIX_DP_LANE_MAP, reg); - } - - void analogix_dp_init_analog_param(struct analogix_dp_device *dp) -@@ -66,53 +77,54 @@ void analogix_dp_init_analog_param(struct analogix_dp_device *dp) - u32 reg; - - reg = TX_TERMINAL_CTRL_50_OHM; -- writel(reg, dp->reg_base + ANALOGIX_DP_ANALOG_CTL_1); -+ analogix_dp_write(dp, ANALOGIX_DP_ANALOG_CTL_1, reg); - - reg = SEL_24M | TX_DVDD_BIT_1_0625V; -- writel(reg, dp->reg_base + ANALOGIX_DP_ANALOG_CTL_2); -+ analogix_dp_write(dp, ANALOGIX_DP_ANALOG_CTL_2, reg); - - if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) { - reg = REF_CLK_24M; - if (dp->plat_data->dev_type == RK3288_DP) - reg ^= REF_CLK_MASK; - -- writel(reg, dp->reg_base + ANALOGIX_DP_PLL_REG_1); -- writel(0x95, dp->reg_base + ANALOGIX_DP_PLL_REG_2); -- writel(0x40, dp->reg_base + ANALOGIX_DP_PLL_REG_3); -- writel(0x58, dp->reg_base + ANALOGIX_DP_PLL_REG_4); -- writel(0x22, dp->reg_base + ANALOGIX_DP_PLL_REG_5); -+ analogix_dp_write(dp, ANALOGIX_DP_PLL_REG_1, reg); -+ analogix_dp_write(dp, ANALOGIX_DP_PLL_REG_2, 0x99); -+ analogix_dp_write(dp, ANALOGIX_DP_PLL_REG_3, 0x40); -+ analogix_dp_write(dp, ANALOGIX_DP_PLL_REG_4, 0x58); -+ analogix_dp_write(dp, ANALOGIX_DP_PLL_REG_5, 0x22); -+ analogix_dp_write(dp, ANALOGIX_DP_BIAS, 0x44); - } - - reg = DRIVE_DVDD_BIT_1_0625V | VCO_BIT_600_MICRO; -- writel(reg, dp->reg_base + ANALOGIX_DP_ANALOG_CTL_3); -+ analogix_dp_write(dp, ANALOGIX_DP_ANALOG_CTL_3, reg); - - reg = PD_RING_OSC | AUX_TERMINAL_CTRL_50_OHM | - TX_CUR1_2X | TX_CUR_16_MA; -- writel(reg, dp->reg_base + ANALOGIX_DP_PLL_FILTER_CTL_1); -+ analogix_dp_write(dp, ANALOGIX_DP_PLL_FILTER_CTL_1, reg); - - reg = CH3_AMP_400_MV | CH2_AMP_400_MV | - CH1_AMP_400_MV | CH0_AMP_400_MV; -- writel(reg, dp->reg_base + ANALOGIX_DP_TX_AMP_TUNING_CTL); -+ analogix_dp_write(dp, ANALOGIX_DP_TX_AMP_TUNING_CTL, reg); - } - - void analogix_dp_init_interrupt(struct analogix_dp_device *dp) - { - /* Set interrupt pin assertion polarity as high */ -- writel(INT_POL1 | INT_POL0, dp->reg_base + ANALOGIX_DP_INT_CTL); -+ analogix_dp_write(dp, ANALOGIX_DP_INT_CTL, INT_POL1 | INT_POL0); - - /* Clear pending regisers */ -- writel(0xff, dp->reg_base + ANALOGIX_DP_COMMON_INT_STA_1); -- writel(0x4f, dp->reg_base + ANALOGIX_DP_COMMON_INT_STA_2); -- writel(0xe0, dp->reg_base + ANALOGIX_DP_COMMON_INT_STA_3); -- writel(0xe7, dp->reg_base + ANALOGIX_DP_COMMON_INT_STA_4); -- writel(0x63, dp->reg_base + ANALOGIX_DP_INT_STA); -+ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_STA_1, 0xff); -+ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_STA_2, 0x4f); -+ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_STA_3, 0xe0); -+ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_STA_4, 0xe7); -+ analogix_dp_write(dp, ANALOGIX_DP_INT_STA, 0x63); - - /* 0:mask,1: unmask */ -- writel(0x00, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_1); -- writel(0x00, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_2); -- writel(0x00, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_3); -- writel(0x00, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_4); -- writel(0x00, dp->reg_base + ANALOGIX_DP_INT_STA_MASK); -+ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_MASK_1, 0x00); -+ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_MASK_2, 0x00); -+ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_MASK_3, 0x00); -+ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_MASK_4, 0x00); -+ analogix_dp_write(dp, ANALOGIX_DP_INT_STA_MASK, 0x00); - } - - void analogix_dp_reset(struct analogix_dp_device *dp) -@@ -130,65 +142,54 @@ void analogix_dp_reset(struct analogix_dp_device *dp) - AUD_FIFO_FUNC_EN_N | AUD_FUNC_EN_N | - HDCP_FUNC_EN_N | SW_FUNC_EN_N; - -- writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_1); -+ analogix_dp_write(dp, ANALOGIX_DP_FUNC_EN_1, reg); - - reg = SSC_FUNC_EN_N | AUX_FUNC_EN_N | - SERDES_FIFO_FUNC_EN_N | - LS_CLK_DOMAIN_FUNC_EN_N; -- writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_2); -+ analogix_dp_write(dp, ANALOGIX_DP_FUNC_EN_2, reg); - - usleep_range(20, 30); - -- analogix_dp_lane_swap(dp, 0); -+ analogix_dp_set_lane_map(dp); - -- writel(0x0, dp->reg_base + ANALOGIX_DP_SYS_CTL_1); -- writel(0x40, dp->reg_base + ANALOGIX_DP_SYS_CTL_2); -- writel(0x0, dp->reg_base + ANALOGIX_DP_SYS_CTL_3); -- writel(0x0, dp->reg_base + ANALOGIX_DP_SYS_CTL_4); -+ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_1, 0x0); -+ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_2, 0x40); -+ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_3, 0x0); -+ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_4, 0x0); - -- writel(0x0, dp->reg_base + ANALOGIX_DP_PKT_SEND_CTL); -- writel(0x0, dp->reg_base + ANALOGIX_DP_HDCP_CTL); -+ analogix_dp_write(dp, ANALOGIX_DP_PKT_SEND_CTL, 0x0); -+ analogix_dp_write(dp, ANALOGIX_DP_HDCP_CTL, 0x0); - -- writel(0x5e, dp->reg_base + ANALOGIX_DP_HPD_DEGLITCH_L); -- writel(0x1a, dp->reg_base + ANALOGIX_DP_HPD_DEGLITCH_H); -+ analogix_dp_write(dp, ANALOGIX_DP_LINK_DEBUG_CTL, 0x10); - -- writel(0x10, dp->reg_base + ANALOGIX_DP_LINK_DEBUG_CTL); -+ analogix_dp_write(dp, ANALOGIX_DP_PHY_TEST, 0x0); - -- writel(0x0, dp->reg_base + ANALOGIX_DP_PHY_TEST); -+ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_FIFO_THRD, 0x0); -+ analogix_dp_write(dp, ANALOGIX_DP_AUDIO_MARGIN, 0x20); - -- writel(0x0, dp->reg_base + ANALOGIX_DP_VIDEO_FIFO_THRD); -- writel(0x20, dp->reg_base + ANALOGIX_DP_AUDIO_MARGIN); -+ analogix_dp_write(dp, ANALOGIX_DP_M_VID_GEN_FILTER_TH, 0x4); -+ analogix_dp_write(dp, ANALOGIX_DP_M_AUD_GEN_FILTER_TH, 0x2); - -- writel(0x4, dp->reg_base + ANALOGIX_DP_M_VID_GEN_FILTER_TH); -- writel(0x2, dp->reg_base + ANALOGIX_DP_M_AUD_GEN_FILTER_TH); -- -- writel(0x00000101, dp->reg_base + ANALOGIX_DP_SOC_GENERAL_CTL); -+ analogix_dp_write(dp, ANALOGIX_DP_SOC_GENERAL_CTL, 0x00000101); - } - - void analogix_dp_swreset(struct analogix_dp_device *dp) - { -- writel(RESET_DP_TX, dp->reg_base + ANALOGIX_DP_TX_SW_RESET); -+ analogix_dp_write(dp, ANALOGIX_DP_TX_SW_RESET, RESET_DP_TX); - } - - void analogix_dp_config_interrupt(struct analogix_dp_device *dp) - { -- u32 reg; -- - /* 0: mask, 1: unmask */ -- reg = COMMON_INT_MASK_1; -- writel(reg, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_1); -- -- reg = COMMON_INT_MASK_2; -- writel(reg, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_2); -+ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_MASK_1, 0); -+ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_MASK_2, 0); -+ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_MASK_3, 0); - -- reg = COMMON_INT_MASK_3; -- writel(reg, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_3); -- -- reg = COMMON_INT_MASK_4; -- writel(reg, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_4); -- -- reg = INT_STA_MASK; -- writel(reg, dp->reg_base + ANALOGIX_DP_INT_STA_MASK); -+ if (dp->force_hpd || dp->hpd_gpiod) -+ analogix_dp_mute_hpd_interrupt(dp); -+ else -+ analogix_dp_unmute_hpd_interrupt(dp); - } - - void analogix_dp_mute_hpd_interrupt(struct analogix_dp_device *dp) -@@ -196,13 +197,13 @@ void analogix_dp_mute_hpd_interrupt(struct analogix_dp_device *dp) - u32 reg; - - /* 0: mask, 1: unmask */ -- reg = readl(dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_4); -- reg &= ~COMMON_INT_MASK_4; -- writel(reg, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_4); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_COMMON_INT_MASK_4); -+ reg &= ~HOTPLUG_CHG; -+ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_MASK_4, reg); - -- reg = readl(dp->reg_base + ANALOGIX_DP_INT_STA_MASK); -- reg &= ~INT_STA_MASK; -- writel(reg, dp->reg_base + ANALOGIX_DP_INT_STA_MASK); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_INT_STA_MASK); -+ reg &= ~INT_HPD; -+ analogix_dp_write(dp, ANALOGIX_DP_INT_STA_MASK, reg); - } - - void analogix_dp_unmute_hpd_interrupt(struct analogix_dp_device *dp) -@@ -210,18 +211,20 @@ void analogix_dp_unmute_hpd_interrupt(struct analogix_dp_device *dp) - u32 reg; - - /* 0: mask, 1: unmask */ -- reg = COMMON_INT_MASK_4; -- writel(reg, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_4); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_COMMON_INT_MASK_4); -+ reg |= HOTPLUG_CHG; -+ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_MASK_4, reg); - -- reg = INT_STA_MASK; -- writel(reg, dp->reg_base + ANALOGIX_DP_INT_STA_MASK); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_INT_STA_MASK); -+ reg |= INT_HPD; -+ analogix_dp_write(dp, ANALOGIX_DP_INT_STA_MASK, reg); - } - - enum pll_status analogix_dp_get_pll_lock_status(struct analogix_dp_device *dp) - { - u32 reg; - -- reg = readl(dp->reg_base + ANALOGIX_DP_DEBUG_CTL); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_DEBUG_CTL); - if (reg & PLL_LOCK) - return PLL_LOCKED; - else -@@ -239,12 +242,12 @@ void analogix_dp_set_pll_power_down(struct analogix_dp_device *dp, bool enable) - mask = RK_PLL_PD; - } - -- reg = readl(dp->reg_base + pd_addr); -+ reg = analogix_dp_read(dp, pd_addr); - if (enable) - reg |= mask; - else - reg &= ~mask; -- writel(reg, dp->reg_base + pd_addr); -+ analogix_dp_write(dp, pd_addr, reg); - } - - void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp, -@@ -265,52 +268,54 @@ void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp, - else - mask = AUX_PD; - -- reg = readl(dp->reg_base + phy_pd_addr); -- if (enable) -+ reg = analogix_dp_read(dp, phy_pd_addr); -+ if (enable) { -+ reg &= ~(DP_INC_BG | DP_EXP_BG); - reg |= mask; -- else -+ } else { - reg &= ~mask; -- writel(reg, dp->reg_base + phy_pd_addr); -+ } -+ analogix_dp_write(dp, phy_pd_addr, reg); - break; - case CH0_BLOCK: - mask = CH0_PD; -- reg = readl(dp->reg_base + phy_pd_addr); -+ reg = analogix_dp_read(dp, phy_pd_addr); - - if (enable) - reg |= mask; - else - reg &= ~mask; -- writel(reg, dp->reg_base + phy_pd_addr); -+ analogix_dp_write(dp, phy_pd_addr, reg); - break; - case CH1_BLOCK: - mask = CH1_PD; -- reg = readl(dp->reg_base + phy_pd_addr); -+ reg = analogix_dp_read(dp, phy_pd_addr); - - if (enable) - reg |= mask; - else - reg &= ~mask; -- writel(reg, dp->reg_base + phy_pd_addr); -+ analogix_dp_write(dp, phy_pd_addr, reg); - break; - case CH2_BLOCK: - mask = CH2_PD; -- reg = readl(dp->reg_base + phy_pd_addr); -+ reg = analogix_dp_read(dp, phy_pd_addr); - - if (enable) - reg |= mask; - else - reg &= ~mask; -- writel(reg, dp->reg_base + phy_pd_addr); -+ analogix_dp_write(dp, phy_pd_addr, reg); - break; - case CH3_BLOCK: - mask = CH3_PD; -- reg = readl(dp->reg_base + phy_pd_addr); -+ reg = analogix_dp_read(dp, phy_pd_addr); - - if (enable) - reg |= mask; - else - reg &= ~mask; -- writel(reg, dp->reg_base + phy_pd_addr); -+ analogix_dp_write(dp, phy_pd_addr, reg); - break; - case ANALOG_TOTAL: - /* -@@ -323,29 +328,29 @@ void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp, - else - mask = DP_PHY_PD; - -- reg = readl(dp->reg_base + phy_pd_addr); -+ reg = analogix_dp_read(dp, phy_pd_addr); - if (enable) - reg |= mask; - else - reg &= ~mask; - -- writel(reg, dp->reg_base + phy_pd_addr); -+ analogix_dp_write(dp, phy_pd_addr, reg); - if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) - usleep_range(10, 15); - break; - case POWER_ALL: - if (enable) { - reg = DP_ALL_PD; -- writel(reg, dp->reg_base + phy_pd_addr); -+ analogix_dp_write(dp, phy_pd_addr, reg); - } else { - reg = DP_ALL_PD; -- writel(reg, dp->reg_base + phy_pd_addr); -+ analogix_dp_write(dp, phy_pd_addr, reg); - usleep_range(10, 15); - reg &= ~DP_INC_BG; -- writel(reg, dp->reg_base + phy_pd_addr); -+ analogix_dp_write(dp, phy_pd_addr, reg); - usleep_range(10, 15); - -- writel(0x00, dp->reg_base + phy_pd_addr); -+ analogix_dp_write(dp, phy_pd_addr, 0x00); - } - break; - default: -@@ -356,36 +361,24 @@ void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp, - int analogix_dp_init_analog_func(struct analogix_dp_device *dp) - { - u32 reg; -- int timeout_loop = 0; - - analogix_dp_set_analog_power_down(dp, POWER_ALL, 0); - - reg = PLL_LOCK_CHG; -- writel(reg, dp->reg_base + ANALOGIX_DP_COMMON_INT_STA_1); -+ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_STA_1, reg); - -- reg = readl(dp->reg_base + ANALOGIX_DP_DEBUG_CTL); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_DEBUG_CTL); - reg &= ~(F_PLL_LOCK | PLL_LOCK_CTRL); -- writel(reg, dp->reg_base + ANALOGIX_DP_DEBUG_CTL); -+ analogix_dp_write(dp, ANALOGIX_DP_DEBUG_CTL, reg); - - /* Power up PLL */ -- if (analogix_dp_get_pll_lock_status(dp) == PLL_UNLOCKED) { -- analogix_dp_set_pll_power_down(dp, 0); -- -- while (analogix_dp_get_pll_lock_status(dp) == PLL_UNLOCKED) { -- timeout_loop++; -- if (DP_TIMEOUT_LOOP_COUNT < timeout_loop) { -- dev_err(dp->dev, "failed to get pll lock status\n"); -- return -ETIMEDOUT; -- } -- usleep_range(10, 20); -- } -- } -+ analogix_dp_set_pll_power_down(dp, 0); - - /* Enable Serdes FIFO function and Link symbol clock domain module */ -- reg = readl(dp->reg_base + ANALOGIX_DP_FUNC_EN_2); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_FUNC_EN_2); - reg &= ~(SERDES_FIFO_FUNC_EN_N | LS_CLK_DOMAIN_FUNC_EN_N - | AUX_FUNC_EN_N); -- writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_2); -+ analogix_dp_write(dp, ANALOGIX_DP_FUNC_EN_2, reg); - return 0; - } - -@@ -397,10 +390,10 @@ void analogix_dp_clear_hotplug_interrupts(struct analogix_dp_device *dp) - return; - - reg = HOTPLUG_CHG | HPD_LOST | PLUG; -- writel(reg, dp->reg_base + ANALOGIX_DP_COMMON_INT_STA_4); -+ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_STA_4, reg); - - reg = INT_HPD; -- writel(reg, dp->reg_base + ANALOGIX_DP_INT_STA); -+ analogix_dp_write(dp, ANALOGIX_DP_INT_STA, reg); - } - - void analogix_dp_init_hpd(struct analogix_dp_device *dp) -@@ -410,47 +403,56 @@ void analogix_dp_init_hpd(struct analogix_dp_device *dp) - if (dp->hpd_gpiod) - return; - -- analogix_dp_clear_hotplug_interrupts(dp); -+ analogix_dp_write(dp, ANALOGIX_DP_HPD_DEGLITCH_H, 0xbb); -+ analogix_dp_write(dp, ANALOGIX_DP_HPD_DEGLITCH_L, 0x80); - -- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_3); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_3); - reg &= ~(F_HPD | HPD_CTRL); -- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_3); -+ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_3, reg); - } - - void analogix_dp_force_hpd(struct analogix_dp_device *dp) - { - u32 reg; - -- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_3); -- reg = (F_HPD | HPD_CTRL); -- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_3); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_3); -+ reg |= (F_HPD | HPD_CTRL); -+ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_3, reg); - } - --enum dp_irq_type analogix_dp_get_irq_type(struct analogix_dp_device *dp) -+static void analogix_dp_handle_hpd_event(struct analogix_dp_device *dp) - { -+ bool changed = false; - u32 reg; - -- if (dp->hpd_gpiod) { -- reg = gpiod_get_value(dp->hpd_gpiod); -- if (reg) -- return DP_IRQ_TYPE_HP_CABLE_IN; -- else -- return DP_IRQ_TYPE_HP_CABLE_OUT; -- } else { -- /* Parse hotplug interrupt status register */ -- reg = readl(dp->reg_base + ANALOGIX_DP_COMMON_INT_STA_4); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_INT_STA); -+ if (reg & INT_HPD) { -+ analogix_dp_write(dp, ANALOGIX_DP_INT_STA, INT_HPD); - -- if (reg & PLUG) -- return DP_IRQ_TYPE_HP_CABLE_IN; -+ memset(&dp->compliance, 0, sizeof(dp->compliance)); - -- if (reg & HPD_LOST) -- return DP_IRQ_TYPE_HP_CABLE_OUT; -+ analogix_dp_check_device_service_irq(dp); - -- if (reg & HOTPLUG_CHG) -- return DP_IRQ_TYPE_HP_CHANGE; -+ if (dp->compliance.test_active && -+ dp->compliance.test_type == DP_TEST_LINK_PHY_TEST_PATTERN) { -+ analogix_dp_phy_test(dp); -+ return; -+ } -+ } - -- return DP_IRQ_TYPE_UNKNOWN; -+ reg = analogix_dp_read(dp, ANALOGIX_DP_COMMON_INT_STA_4); -+ if (reg & HOTPLUG_CHG) { -+ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_STA_4, HOTPLUG_CHG); -+ changed = true; - } ++#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) + -+ if (changed) -+ drm_helper_hpd_irq_event(dp->drm_dev); -+} ++#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ ++#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ ++#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ ++#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ ++#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */ ++#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */ + -+void analogix_dp_irq_handler(struct analogix_dp_device *dp) -+{ -+ analogix_dp_handle_hpd_event(dp); - } - - void analogix_dp_reset_aux(struct analogix_dp_device *dp) -@@ -458,9 +460,9 @@ void analogix_dp_reset_aux(struct analogix_dp_device *dp) - u32 reg; - - /* Disable AUX channel module */ -- reg = readl(dp->reg_base + ANALOGIX_DP_FUNC_EN_2); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_FUNC_EN_2); - reg |= AUX_FUNC_EN_N; -- writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_2); -+ analogix_dp_write(dp, ANALOGIX_DP_FUNC_EN_2, reg); - } - - void analogix_dp_init_aux(struct analogix_dp_device *dp) -@@ -469,7 +471,7 @@ void analogix_dp_init_aux(struct analogix_dp_device *dp) - - /* Clear inerrupts related to AUX channel */ - reg = RPLY_RECEIV | AUX_ERR; -- writel(reg, dp->reg_base + ANALOGIX_DP_INT_STA); -+ analogix_dp_write(dp, ANALOGIX_DP_INT_STA, reg); - - analogix_dp_set_analog_power_down(dp, AUX_BLOCK, true); - usleep_range(10, 11); -@@ -487,16 +489,17 @@ void analogix_dp_init_aux(struct analogix_dp_device *dp) - reg |= AUX_HW_RETRY_COUNT_SEL(0) | - AUX_HW_RETRY_INTERVAL_600_MICROSECONDS; - -- writel(reg, dp->reg_base + ANALOGIX_DP_AUX_HW_RETRY_CTL); -+ analogix_dp_write(dp, ANALOGIX_DP_AUX_HW_RETRY_CTL, reg); - - /* Receive AUX Channel DEFER commands equal to DEFFER_COUNT*64 */ - reg = DEFER_CTRL_EN | DEFER_COUNT(1); -- writel(reg, dp->reg_base + ANALOGIX_DP_AUX_CH_DEFER_CTL); -+ analogix_dp_write(dp, ANALOGIX_DP_AUX_CH_DEFER_CTL, reg); - - /* Enable AUX channel module */ -- reg = readl(dp->reg_base + ANALOGIX_DP_FUNC_EN_2); -+ analogix_dp_enable_sw_function(dp); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_FUNC_EN_2); - reg &= ~AUX_FUNC_EN_N; -- writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_2); -+ analogix_dp_write(dp, ANALOGIX_DP_FUNC_EN_2, reg); - } - - int analogix_dp_get_plug_in_status(struct analogix_dp_device *dp) -@@ -507,7 +510,7 @@ int analogix_dp_get_plug_in_status(struct analogix_dp_device *dp) - if (gpiod_get_value(dp->hpd_gpiod)) - return 0; - } else { -- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_3); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_3); - if (reg & HPD_STATUS) - return 0; - } -@@ -519,60 +522,193 @@ void analogix_dp_enable_sw_function(struct analogix_dp_device *dp) - { - u32 reg; - -- reg = readl(dp->reg_base + ANALOGIX_DP_FUNC_EN_1); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_FUNC_EN_1); - reg &= ~SW_FUNC_EN_N; -- writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_1); -+ analogix_dp_write(dp, ANALOGIX_DP_FUNC_EN_1, reg); - } - --void analogix_dp_set_link_bandwidth(struct analogix_dp_device *dp, u32 bwtype) -+static void analogix_dp_ssc_enable(struct analogix_dp_device *dp) - { - u32 reg; - -- reg = bwtype; -- if ((bwtype == DP_LINK_BW_2_7) || (bwtype == DP_LINK_BW_1_62)) -- writel(reg, dp->reg_base + ANALOGIX_DP_LINK_BW_SET); -+ /* 4500ppm */ -+ writel(0x19, dp->reg_base + ANALOIGX_DP_SSC_REG); -+ /* -+ * To apply updated SSC parameters into SSC operation, -+ * firmware must disable and enable this bit. -+ */ -+ reg = readl(dp->reg_base + ANALOGIX_DP_FUNC_EN_2); -+ reg |= SSC_FUNC_EN_N; -+ writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_2); -+ reg &= ~SSC_FUNC_EN_N; -+ writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_2); -+} ++#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */ ++#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */ ++#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */ ++#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */ ++#define JOB_SLOT4 0xA00 /* Configuration registers for job slot 4 */ ++#define JOB_SLOT5 0xA80 /* Configuration registers for job slot 5 */ ++#define JOB_SLOT6 0xB00 /* Configuration registers for job slot 6 */ ++#define JOB_SLOT7 0xB80 /* Configuration registers for job slot 7 */ ++#define JOB_SLOT8 0xC00 /* Configuration registers for job slot 8 */ ++#define JOB_SLOT9 0xC80 /* Configuration registers for job slot 9 */ ++#define JOB_SLOT10 0xD00 /* Configuration registers for job slot 10 */ ++#define JOB_SLOT11 0xD80 /* Configuration registers for job slot 11 */ ++#define JOB_SLOT12 0xE00 /* Configuration registers for job slot 12 */ ++#define JOB_SLOT13 0xE80 /* Configuration registers for job slot 13 */ ++#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */ ++#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */ + -+static void analogix_dp_ssc_disable(struct analogix_dp_device *dp) -+{ -+ u32 reg; ++#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r)) + -+ reg = readl(dp->reg_base + ANALOGIX_DP_FUNC_EN_2); -+ reg |= SSC_FUNC_EN_N; -+ writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_2); -+} ++#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */ ++#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */ ++#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */ ++#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */ ++#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ ++#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ ++#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ ++#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job ++ slot n */ + -+bool analogix_dp_ssc_supported(struct analogix_dp_device *dp) -+{ -+ /* Check if SSC is supported by both sides */ -+ return dp->plat_data->ssc && dp->link_train.ssc; -+} ++#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ ++#define JS_STATUS 0x24 /* (RO) Status register for job slot n */ + -+void analogix_dp_set_link_bandwidth(struct analogix_dp_device *dp, u32 bwtype) -+{ -+ u32 status; -+ int ret; ++#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */ ++#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */ + -+ analogix_dp_write(dp, ANALOGIX_DP_LINK_BW_SET, bwtype); ++#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ ++#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ ++#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ ++#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for ++ job slot n */ + -+ if (dp->phy) { -+ union phy_configure_opts phy_cfg = {0}; ++#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ + -+ phy_cfg.dp.lanes = dp->link_train.lane_count; -+ phy_cfg.dp.link_rate = -+ drm_dp_bw_code_to_link_rate(dp->link_train.link_rate) / 100; -+ phy_cfg.dp.ssc = analogix_dp_ssc_supported(dp); -+ phy_cfg.dp.set_lanes = false; -+ phy_cfg.dp.set_rate = true; -+ phy_cfg.dp.set_voltages = false; -+ ret = phy_configure(dp->phy, &phy_cfg); -+ if (ret && ret != -EOPNOTSUPP) { -+ dev_err(dp->dev, "%s: phy_configure failed: %d\n", -+ __func__, ret); -+ return; -+ } -+ } else { -+ if (analogix_dp_ssc_supported(dp)) -+ analogix_dp_ssc_enable(dp); -+ else -+ analogix_dp_ssc_disable(dp); -+ } ++#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ + -+ ret = readx_poll_timeout(analogix_dp_get_pll_lock_status, dp, status, -+ status != PLL_UNLOCKED, 120, -+ 120 * DP_TIMEOUT_LOOP_COUNT); -+ if (ret) { -+ dev_err(dp->dev, "Wait for pll lock failed %d\n", ret); -+ return; -+ } - } - - void analogix_dp_get_link_bandwidth(struct analogix_dp_device *dp, u32 *bwtype) - { - u32 reg; - -- reg = readl(dp->reg_base + ANALOGIX_DP_LINK_BW_SET); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_LINK_BW_SET); - *bwtype = reg; - } - - void analogix_dp_set_lane_count(struct analogix_dp_device *dp, u32 count) - { - u32 reg; -+ int ret; - - reg = count; -- writel(reg, dp->reg_base + ANALOGIX_DP_LANE_COUNT_SET); -+ analogix_dp_write(dp, ANALOGIX_DP_LANE_COUNT_SET, reg); ++#define MEMORY_MANAGEMENT_BASE 0x2000 ++#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) + -+ if (dp->phy) { -+ union phy_configure_opts phy_cfg = {0}; ++#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ ++#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ ++#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ ++#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ + -+ phy_cfg.dp.lanes = dp->link_train.lane_count; -+ phy_cfg.dp.set_lanes = true; -+ phy_cfg.dp.set_rate = false; -+ phy_cfg.dp.set_voltages = false; -+ ret = phy_configure(dp->phy, &phy_cfg); -+ if (ret && ret != -EOPNOTSUPP) { -+ dev_err(dp->dev, "%s: phy_configure() failed: %d\n", -+ __func__, ret); -+ return; -+ } -+ } - } - - void analogix_dp_get_lane_count(struct analogix_dp_device *dp, u32 *count) - { - u32 reg; - -- reg = readl(dp->reg_base + ANALOGIX_DP_LANE_COUNT_SET); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_LANE_COUNT_SET); - *count = reg; - } - -+void analogix_dp_set_lane_link_training(struct analogix_dp_device *dp) -+{ -+ u8 lane; -+ int ret; ++#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ ++#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ ++#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ ++#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ ++#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ ++#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ ++#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ ++#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ ++#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ ++#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ ++#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ ++#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ ++#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ ++#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ ++#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ ++#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ + -+ for (lane = 0; lane < dp->link_train.lane_count; lane++) -+ analogix_dp_write(dp, -+ ANALOGIX_DP_LN0_LINK_TRAINING_CTL + 4 * lane, -+ dp->link_train.training_lane[lane]); ++#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) + -+ if (dp->phy) { -+ union phy_configure_opts phy_cfg = {0}; ++#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ ++#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ ++#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ ++#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ ++#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ ++#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ ++#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ ++#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ ++#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ ++#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ ++#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ + -+ for (lane = 0; lane < dp->link_train.lane_count; lane++) { -+ u8 training_lane = dp->link_train.training_lane[lane]; -+ u8 vs, pe; + -+ vs = (training_lane & DP_TRAIN_VOLTAGE_SWING_MASK) >> -+ DP_TRAIN_VOLTAGE_SWING_SHIFT; -+ pe = (training_lane & DP_TRAIN_PRE_EMPHASIS_MASK) >> -+ DP_TRAIN_PRE_EMPHASIS_SHIFT; -+ phy_cfg.dp.voltage[lane] = vs; -+ phy_cfg.dp.pre[lane] = pe; -+ } ++/* (RW) Translation table configuration for address space n, low word */ ++#define AS_TRANSCFG_LO 0x30 ++/* (RW) Translation table configuration for address space n, high word */ ++#define AS_TRANSCFG_HI 0x34 ++/* (RO) Secondary fault address for address space n, low word */ ++#define AS_FAULTEXTRA_LO 0x38 ++/* (RO) Secondary fault address for address space n, high word */ ++#define AS_FAULTEXTRA_HI 0x3C + -+ phy_cfg.dp.lanes = dp->link_train.lane_count; -+ phy_cfg.dp.link_rate = -+ drm_dp_bw_code_to_link_rate(dp->link_train.link_rate) / 100; -+ phy_cfg.dp.set_lanes = false; -+ phy_cfg.dp.set_rate = false; -+ phy_cfg.dp.set_voltages = true; -+ ret = phy_configure(dp->phy, &phy_cfg); -+ if (ret && ret != -EOPNOTSUPP) { -+ dev_err(dp->dev, "%s: phy_configure() failed: %d\n", -+ __func__, ret); -+ return; -+ } -+ } -+} ++/* End Register Offsets */ + -+u32 analogix_dp_get_lane_link_training(struct analogix_dp_device *dp, u8 lane) -+{ -+ return analogix_dp_read(dp, -+ ANALOGIX_DP_LN0_LINK_TRAINING_CTL + 4 * lane); -+} ++/* ++ * MMU_IRQ_RAWSTAT register values. Values are valid also for ++ MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. ++ */ + - void analogix_dp_enable_enhanced_mode(struct analogix_dp_device *dp, - bool enable) - { - u32 reg; - - if (enable) { -- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_4); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_4); - reg |= ENHANCED; -- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_4); -+ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_4, reg); - } else { -- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_4); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_4); - reg &= ~ENHANCED; -- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_4); -+ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_4, reg); - } - } - -+bool analogix_dp_get_enhanced_mode(struct analogix_dp_device *dp) -+{ -+ u32 reg; ++#define MMU_PAGE_FAULT_FLAGS 16 + -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_4); ++/* Macros returning a bitmask to retrieve page fault or bus error flags from ++ * MMU registers */ ++#define MMU_PAGE_FAULT(n) (1UL << (n)) ++#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) + -+ return !!(reg & ENHANCED); -+} ++/* ++ * Begin LPAE MMU TRANSTAB register values ++ */ ++#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK 0xfffff000 ++#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED (0u << 0) ++#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY (1u << 1) ++#define AS_TRANSTAB_LPAE_ADRMODE_TABLE (3u << 0) ++#define AS_TRANSTAB_LPAE_READ_INNER (1u << 2) ++#define AS_TRANSTAB_LPAE_SHARE_OUTER (1u << 4) + - void analogix_dp_set_training_pattern(struct analogix_dp_device *dp, - enum pattern_set pattern) - { -@@ -581,144 +717,64 @@ void analogix_dp_set_training_pattern(struct analogix_dp_device *dp, - switch (pattern) { - case PRBS7: - reg = SCRAMBLING_ENABLE | LINK_QUAL_PATTERN_SET_PRBS7; -- writel(reg, dp->reg_base + ANALOGIX_DP_TRAINING_PTN_SET); -+ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); - break; - case D10_2: - reg = SCRAMBLING_ENABLE | LINK_QUAL_PATTERN_SET_D10_2; -- writel(reg, dp->reg_base + ANALOGIX_DP_TRAINING_PTN_SET); -+ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); - break; - case TRAINING_PTN1: - reg = SCRAMBLING_DISABLE | SW_TRAINING_PATTERN_SET_PTN1; -- writel(reg, dp->reg_base + ANALOGIX_DP_TRAINING_PTN_SET); -+ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); - break; - case TRAINING_PTN2: - reg = SCRAMBLING_DISABLE | SW_TRAINING_PATTERN_SET_PTN2; -- writel(reg, dp->reg_base + ANALOGIX_DP_TRAINING_PTN_SET); -+ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); -+ break; -+ case TRAINING_PTN3: -+ reg = SCRAMBLING_DISABLE | SW_TRAINING_PATTERN_SET_PTN3; -+ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); -+ break; -+ case TEST_PATTERN_80BIT: -+ reg = 0x3e0f83e0; -+ analogix_dp_write(dp, ANALOGIX_DP_TEST_80B_PATTERN0, reg); -+ reg = 0x0f83e0f8; -+ analogix_dp_write(dp, ANALOGIX_DP_TEST_80B_PATTERN1, reg); -+ reg = 0x0000f83e; -+ analogix_dp_write(dp, ANALOGIX_DP_TEST_80B_PATTERN2, reg); -+ reg = SCRAMBLING_ENABLE | LINK_QUAL_PATTERN_SET_80BIT; -+ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); -+ break; -+ case TEST_PATTERN_HBR2: -+ reg = 0xfb; -+ analogix_dp_write(dp, ANALOGIX_DP_TEST_HBR2_PATTERN, reg); -+ reg = SCRAMBLING_ENABLE | LINK_QUAL_PATTERN_SET_HBR2; -+ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); - break; - case DP_NONE: - reg = SCRAMBLING_ENABLE | - LINK_QUAL_PATTERN_SET_DISABLE | - SW_TRAINING_PATTERN_SET_NORMAL; -- writel(reg, dp->reg_base + ANALOGIX_DP_TRAINING_PTN_SET); -+ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); - break; - default: - break; - } - } - --void analogix_dp_set_lane0_pre_emphasis(struct analogix_dp_device *dp, -- u32 level) --{ -- u32 reg; -- -- reg = readl(dp->reg_base + ANALOGIX_DP_LN0_LINK_TRAINING_CTL); -- reg &= ~PRE_EMPHASIS_SET_MASK; -- reg |= level << PRE_EMPHASIS_SET_SHIFT; -- writel(reg, dp->reg_base + ANALOGIX_DP_LN0_LINK_TRAINING_CTL); --} -- --void analogix_dp_set_lane1_pre_emphasis(struct analogix_dp_device *dp, -- u32 level) --{ -- u32 reg; -- -- reg = readl(dp->reg_base + ANALOGIX_DP_LN1_LINK_TRAINING_CTL); -- reg &= ~PRE_EMPHASIS_SET_MASK; -- reg |= level << PRE_EMPHASIS_SET_SHIFT; -- writel(reg, dp->reg_base + ANALOGIX_DP_LN1_LINK_TRAINING_CTL); --} -- --void analogix_dp_set_lane2_pre_emphasis(struct analogix_dp_device *dp, -- u32 level) --{ -- u32 reg; -- -- reg = readl(dp->reg_base + ANALOGIX_DP_LN2_LINK_TRAINING_CTL); -- reg &= ~PRE_EMPHASIS_SET_MASK; -- reg |= level << PRE_EMPHASIS_SET_SHIFT; -- writel(reg, dp->reg_base + ANALOGIX_DP_LN2_LINK_TRAINING_CTL); --} -- --void analogix_dp_set_lane3_pre_emphasis(struct analogix_dp_device *dp, -- u32 level) --{ -- u32 reg; -- -- reg = readl(dp->reg_base + ANALOGIX_DP_LN3_LINK_TRAINING_CTL); -- reg &= ~PRE_EMPHASIS_SET_MASK; -- reg |= level << PRE_EMPHASIS_SET_SHIFT; -- writel(reg, dp->reg_base + ANALOGIX_DP_LN3_LINK_TRAINING_CTL); --} -- --void analogix_dp_set_lane0_link_training(struct analogix_dp_device *dp, -- u32 training_lane) --{ -- u32 reg; -- -- reg = training_lane; -- writel(reg, dp->reg_base + ANALOGIX_DP_LN0_LINK_TRAINING_CTL); --} -- --void analogix_dp_set_lane1_link_training(struct analogix_dp_device *dp, -- u32 training_lane) --{ -- u32 reg; -- -- reg = training_lane; -- writel(reg, dp->reg_base + ANALOGIX_DP_LN1_LINK_TRAINING_CTL); --} -- --void analogix_dp_set_lane2_link_training(struct analogix_dp_device *dp, -- u32 training_lane) --{ -- u32 reg; -- -- reg = training_lane; -- writel(reg, dp->reg_base + ANALOGIX_DP_LN2_LINK_TRAINING_CTL); --} -- --void analogix_dp_set_lane3_link_training(struct analogix_dp_device *dp, -- u32 training_lane) --{ -- u32 reg; -- -- reg = training_lane; -- writel(reg, dp->reg_base + ANALOGIX_DP_LN3_LINK_TRAINING_CTL); --} -- --u32 analogix_dp_get_lane0_link_training(struct analogix_dp_device *dp) --{ -- return readl(dp->reg_base + ANALOGIX_DP_LN0_LINK_TRAINING_CTL); --} -- --u32 analogix_dp_get_lane1_link_training(struct analogix_dp_device *dp) --{ -- return readl(dp->reg_base + ANALOGIX_DP_LN1_LINK_TRAINING_CTL); --} -- --u32 analogix_dp_get_lane2_link_training(struct analogix_dp_device *dp) --{ -- return readl(dp->reg_base + ANALOGIX_DP_LN2_LINK_TRAINING_CTL); --} -- --u32 analogix_dp_get_lane3_link_training(struct analogix_dp_device *dp) --{ -- return readl(dp->reg_base + ANALOGIX_DP_LN3_LINK_TRAINING_CTL); --} -- - void analogix_dp_reset_macro(struct analogix_dp_device *dp) - { - u32 reg; - -- reg = readl(dp->reg_base + ANALOGIX_DP_PHY_TEST); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_PHY_TEST); - reg |= MACRO_RST; -- writel(reg, dp->reg_base + ANALOGIX_DP_PHY_TEST); -+ analogix_dp_write(dp, ANALOGIX_DP_PHY_TEST, reg); - - /* 10 us is the minimum reset time. */ - usleep_range(10, 20); - - reg &= ~MACRO_RST; -- writel(reg, dp->reg_base + ANALOGIX_DP_PHY_TEST); -+ analogix_dp_write(dp, ANALOGIX_DP_PHY_TEST, reg); - } - - void analogix_dp_init_video(struct analogix_dp_device *dp) -@@ -726,19 +782,22 @@ void analogix_dp_init_video(struct analogix_dp_device *dp) - u32 reg; - - reg = VSYNC_DET | VID_FORMAT_CHG | VID_CLK_CHG; -- writel(reg, dp->reg_base + ANALOGIX_DP_COMMON_INT_STA_1); -+ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_STA_1, reg); - - reg = 0x0; -- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_1); -+ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_1, reg); - - reg = CHA_CRI(4) | CHA_CTRL; -- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_2); -+ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_2, reg); - -- reg = 0x0; -- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_3); -+ if (dp->video_info.force_stream_valid) { -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_3); -+ reg |= VALID_CTRL | F_VALID; -+ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_3, reg); -+ } - - reg = VID_HRES_TH(2) | VID_VRES_TH(0); -- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_8); -+ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_8, reg); - } - - void analogix_dp_set_video_color_format(struct analogix_dp_device *dp) -@@ -749,36 +808,36 @@ void analogix_dp_set_video_color_format(struct analogix_dp_device *dp) - reg = (dp->video_info.dynamic_range << IN_D_RANGE_SHIFT) | - (dp->video_info.color_depth << IN_BPC_SHIFT) | - (dp->video_info.color_space << IN_COLOR_F_SHIFT); -- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_2); -+ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_2, reg); - - /* Set Input Color YCbCr Coefficients to ITU601 or ITU709 */ -- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_3); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_3); - reg &= ~IN_YC_COEFFI_MASK; - if (dp->video_info.ycbcr_coeff) - reg |= IN_YC_COEFFI_ITU709; - else - reg |= IN_YC_COEFFI_ITU601; -- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_3); -+ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_3, reg); - } - - int analogix_dp_is_slave_video_stream_clock_on(struct analogix_dp_device *dp) - { - u32 reg; - -- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_1); -- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_1); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_1); -+ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_1, reg); - -- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_1); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_1); - - if (!(reg & DET_STA)) { - dev_dbg(dp->dev, "Input stream clock not detected.\n"); - return -EINVAL; - } - -- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_2); -- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_2); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_2); -+ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_2, reg); - -- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_2); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_2); - dev_dbg(dp->dev, "wait SYS_CTL_2.\n"); - - if (reg & CHA_STA) { -@@ -796,30 +855,30 @@ void analogix_dp_set_video_cr_mn(struct analogix_dp_device *dp, - u32 reg; - - if (type == REGISTER_M) { -- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_4); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_4); - reg |= FIX_M_VID; -- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_4); -+ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_4, reg); - reg = m_value & 0xff; -- writel(reg, dp->reg_base + ANALOGIX_DP_M_VID_0); -+ analogix_dp_write(dp, ANALOGIX_DP_M_VID_0, reg); - reg = (m_value >> 8) & 0xff; -- writel(reg, dp->reg_base + ANALOGIX_DP_M_VID_1); -+ analogix_dp_write(dp, ANALOGIX_DP_M_VID_1, reg); - reg = (m_value >> 16) & 0xff; -- writel(reg, dp->reg_base + ANALOGIX_DP_M_VID_2); -+ analogix_dp_write(dp, ANALOGIX_DP_M_VID_2, reg); - - reg = n_value & 0xff; -- writel(reg, dp->reg_base + ANALOGIX_DP_N_VID_0); -+ analogix_dp_write(dp, ANALOGIX_DP_N_VID_0, reg); - reg = (n_value >> 8) & 0xff; -- writel(reg, dp->reg_base + ANALOGIX_DP_N_VID_1); -+ analogix_dp_write(dp, ANALOGIX_DP_N_VID_1, reg); - reg = (n_value >> 16) & 0xff; -- writel(reg, dp->reg_base + ANALOGIX_DP_N_VID_2); -+ analogix_dp_write(dp, ANALOGIX_DP_N_VID_2, reg); - } else { -- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_4); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_4); - reg &= ~FIX_M_VID; -- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_4); -+ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_4, reg); - -- writel(0x00, dp->reg_base + ANALOGIX_DP_N_VID_0); -- writel(0x80, dp->reg_base + ANALOGIX_DP_N_VID_1); -- writel(0x00, dp->reg_base + ANALOGIX_DP_N_VID_2); -+ analogix_dp_write(dp, ANALOGIX_DP_N_VID_0, 0x00); -+ analogix_dp_write(dp, ANALOGIX_DP_N_VID_1, 0x80); -+ analogix_dp_write(dp, ANALOGIX_DP_N_VID_2, 0x00); - } - } - -@@ -828,13 +887,13 @@ void analogix_dp_set_video_timing_mode(struct analogix_dp_device *dp, u32 type) - u32 reg; - - if (type == VIDEO_TIMING_FROM_CAPTURE) { -- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_10); - reg &= ~FORMAT_SEL; -- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); -+ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_10, reg); - } else { -- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_10); - reg |= FORMAT_SEL; -- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); -+ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_10, reg); - } - } - -@@ -843,15 +902,15 @@ void analogix_dp_enable_video_master(struct analogix_dp_device *dp, bool enable) - u32 reg; - - if (enable) { -- reg = readl(dp->reg_base + ANALOGIX_DP_SOC_GENERAL_CTL); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SOC_GENERAL_CTL); - reg &= ~VIDEO_MODE_MASK; - reg |= VIDEO_MASTER_MODE_EN | VIDEO_MODE_MASTER_MODE; -- writel(reg, dp->reg_base + ANALOGIX_DP_SOC_GENERAL_CTL); -+ analogix_dp_write(dp, ANALOGIX_DP_SOC_GENERAL_CTL, reg); - } else { -- reg = readl(dp->reg_base + ANALOGIX_DP_SOC_GENERAL_CTL); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SOC_GENERAL_CTL); - reg &= ~VIDEO_MODE_MASK; - reg |= VIDEO_MODE_SLAVE_MODE; -- writel(reg, dp->reg_base + ANALOGIX_DP_SOC_GENERAL_CTL); -+ analogix_dp_write(dp, ANALOGIX_DP_SOC_GENERAL_CTL, reg); - } - } - -@@ -859,19 +918,19 @@ void analogix_dp_start_video(struct analogix_dp_device *dp) - { - u32 reg; - -- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_1); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_1); - reg |= VIDEO_EN; -- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_1); -+ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_1, reg); - } - - int analogix_dp_is_video_stream_on(struct analogix_dp_device *dp) - { - u32 reg; - -- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_3); -- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_3); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_3); -+ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_3, reg); - -- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_3); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_3); - if (!(reg & STRM_VALID)) { - dev_dbg(dp->dev, "Input video stream is not detected.\n"); - return -EINVAL; -@@ -884,55 +943,55 @@ void analogix_dp_config_video_slave_mode(struct analogix_dp_device *dp) - { - u32 reg; - -- reg = readl(dp->reg_base + ANALOGIX_DP_FUNC_EN_1); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_FUNC_EN_1); - if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) { - reg &= ~(RK_VID_CAP_FUNC_EN_N | RK_VID_FIFO_FUNC_EN_N); - } else { - reg &= ~(MASTER_VID_FUNC_EN_N | SLAVE_VID_FUNC_EN_N); - reg |= MASTER_VID_FUNC_EN_N; - } -- writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_1); -+ analogix_dp_write(dp, ANALOGIX_DP_FUNC_EN_1, reg); - -- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_10); - reg &= ~INTERACE_SCAN_CFG; - reg |= (dp->video_info.interlaced << 2); -- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); -+ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_10, reg); - -- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_10); - reg &= ~VSYNC_POLARITY_CFG; - reg |= (dp->video_info.v_sync_polarity << 1); -- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); -+ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_10, reg); - -- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_10); - reg &= ~HSYNC_POLARITY_CFG; - reg |= (dp->video_info.h_sync_polarity << 0); -- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); -+ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_10, reg); - - reg = AUDIO_MODE_SPDIF_MODE | VIDEO_MODE_SLAVE_MODE; -- writel(reg, dp->reg_base + ANALOGIX_DP_SOC_GENERAL_CTL); -+ analogix_dp_write(dp, ANALOGIX_DP_SOC_GENERAL_CTL, reg); - } - - void analogix_dp_enable_scrambling(struct analogix_dp_device *dp) - { - u32 reg; - -- reg = readl(dp->reg_base + ANALOGIX_DP_TRAINING_PTN_SET); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_TRAINING_PTN_SET); - reg &= ~SCRAMBLING_DISABLE; -- writel(reg, dp->reg_base + ANALOGIX_DP_TRAINING_PTN_SET); -+ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); - } - - void analogix_dp_disable_scrambling(struct analogix_dp_device *dp) - { - u32 reg; - -- reg = readl(dp->reg_base + ANALOGIX_DP_TRAINING_PTN_SET); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_TRAINING_PTN_SET); - reg |= SCRAMBLING_DISABLE; -- writel(reg, dp->reg_base + ANALOGIX_DP_TRAINING_PTN_SET); -+ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); - } - - void analogix_dp_enable_psr_crc(struct analogix_dp_device *dp) - { -- writel(PSR_VID_CRC_ENABLE, dp->reg_base + ANALOGIX_DP_CRC_CON); -+ analogix_dp_write(dp, ANALOGIX_DP_CRC_CON, PSR_VID_CRC_ENABLE); - } - - static ssize_t analogix_dp_get_psr_status(struct analogix_dp_device *dp) -@@ -948,6 +1007,24 @@ static ssize_t analogix_dp_get_psr_status(struct analogix_dp_device *dp) - return status; - } - -+static void analogix_dp_reuse_spd(struct analogix_dp_device *dp) -+{ -+ u32 reg, val; ++#define AS_TRANSTAB_LPAE_ADRMODE_MASK 0x00000003 + -+ switch (dp->plat_data->dev_type) { -+ case RK3588_EDP: -+ reg = ANALOGIX_DP_SPDIF_AUDIO_CTL_0; -+ break; -+ default: -+ reg = ANALOGIX_DP_VIDEO_CTL_3; -+ break; -+ } ++/* ++ * Begin AARCH64 MMU TRANSTAB register values ++ */ ++#define MMU_HW_OUTA_BITS 40 ++#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) + -+ val = analogix_dp_read(dp, reg); -+ val |= REUSE_SPD_EN; -+ analogix_dp_write(dp, reg, val); -+} ++/* ++ * Begin MMU STATUS register values ++ */ ++#define AS_STATUS_AS_ACTIVE 0x01 + - int analogix_dp_send_psr_spd(struct analogix_dp_device *dp, - struct dp_sdp *vsc, bool blocking) - { -@@ -956,44 +1033,47 @@ int analogix_dp_send_psr_spd(struct analogix_dp_device *dp, - ssize_t psr_status; - - /* don't send info frame */ -- val = readl(dp->reg_base + ANALOGIX_DP_PKT_SEND_CTL); -+ val = analogix_dp_read(dp, ANALOGIX_DP_PKT_SEND_CTL); - val &= ~IF_EN; -- writel(val, dp->reg_base + ANALOGIX_DP_PKT_SEND_CTL); -+ analogix_dp_write(dp, ANALOGIX_DP_PKT_SEND_CTL, val); - - /* configure single frame update mode */ -- writel(PSR_FRAME_UP_TYPE_BURST | PSR_CRC_SEL_HARDWARE, -- dp->reg_base + ANALOGIX_DP_PSR_FRAME_UPDATE_CTRL); -+ analogix_dp_write(dp, ANALOGIX_DP_PSR_FRAME_UPDATE_CTRL, -+ PSR_FRAME_UP_TYPE_BURST | PSR_CRC_SEL_HARDWARE); - - /* configure VSC HB0~HB3 */ -- writel(vsc->sdp_header.HB0, dp->reg_base + ANALOGIX_DP_SPD_HB0); -- writel(vsc->sdp_header.HB1, dp->reg_base + ANALOGIX_DP_SPD_HB1); -- writel(vsc->sdp_header.HB2, dp->reg_base + ANALOGIX_DP_SPD_HB2); -- writel(vsc->sdp_header.HB3, dp->reg_base + ANALOGIX_DP_SPD_HB3); -+ analogix_dp_write(dp, ANALOGIX_DP_SPD_HB0, vsc->sdp_header.HB0); -+ analogix_dp_write(dp, ANALOGIX_DP_SPD_HB1, vsc->sdp_header.HB1); -+ analogix_dp_write(dp, ANALOGIX_DP_SPD_HB2, vsc->sdp_header.HB2); -+ analogix_dp_write(dp, ANALOGIX_DP_SPD_HB3, vsc->sdp_header.HB3); - - /* configure reused VSC PB0~PB3, magic number from vendor */ -- writel(0x00, dp->reg_base + ANALOGIX_DP_SPD_PB0); -- writel(0x16, dp->reg_base + ANALOGIX_DP_SPD_PB1); -- writel(0xCE, dp->reg_base + ANALOGIX_DP_SPD_PB2); -- writel(0x5D, dp->reg_base + ANALOGIX_DP_SPD_PB3); -+ analogix_dp_write(dp, ANALOGIX_DP_SPD_PB0, 0x00); -+ analogix_dp_write(dp, ANALOGIX_DP_SPD_PB1, 0x16); -+ analogix_dp_write(dp, ANALOGIX_DP_SPD_PB2, 0xCE); -+ analogix_dp_write(dp, ANALOGIX_DP_SPD_PB3, 0x5D); - - /* configure DB0 / DB1 values */ -- writel(vsc->db[0], dp->reg_base + ANALOGIX_DP_VSC_SHADOW_DB0); -- writel(vsc->db[1], dp->reg_base + ANALOGIX_DP_VSC_SHADOW_DB1); -+ analogix_dp_write(dp, ANALOGIX_DP_VSC_SHADOW_DB0, vsc->db[0]); -+ analogix_dp_write(dp, ANALOGIX_DP_VSC_SHADOW_DB1, vsc->db[1]); ++#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3) ++#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3) ++#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3) ++#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) ++#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) + -+ /* configure PB0 / PB1 values */ -+ analogix_dp_write(dp, ANALOGIX_DP_VSC_SHADOW_PB0, -+ vsc->db[1] ? 0x8d : 0x00); -+ analogix_dp_write(dp, ANALOGIX_DP_VSC_SHADOW_PB1, 0x00); - - /* set reuse spd inforframe */ -- val = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_3); -- val |= REUSE_SPD_EN; -- writel(val, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_3); -+ analogix_dp_reuse_spd(dp); - - /* mark info frame update */ -- val = readl(dp->reg_base + ANALOGIX_DP_PKT_SEND_CTL); -+ val = analogix_dp_read(dp, ANALOGIX_DP_PKT_SEND_CTL); - val = (val | IF_UP) & ~IF_EN; -- writel(val, dp->reg_base + ANALOGIX_DP_PKT_SEND_CTL); -+ analogix_dp_write(dp, ANALOGIX_DP_PKT_SEND_CTL, val); - - /* send info frame */ -- val = readl(dp->reg_base + ANALOGIX_DP_PKT_SEND_CTL); -+ val = analogix_dp_read(dp, ANALOGIX_DP_PKT_SEND_CTL); - val |= IF_EN; -- writel(val, dp->reg_base + ANALOGIX_DP_PKT_SEND_CTL); -+ analogix_dp_write(dp, ANALOGIX_DP_PKT_SEND_CTL, val); - - if (!blocking) - return 0; -@@ -1020,11 +1100,46 @@ int analogix_dp_send_psr_spd(struct analogix_dp_device *dp, - return 0; - } - -+int analogix_dp_phy_power_on(struct analogix_dp_device *dp) -+{ -+ int ret; ++#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) ++#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) + -+ ret = phy_set_mode(dp->phy, PHY_MODE_DP); -+ if (ret) { -+ dev_err(dp->dev, "phy_set_mode failed: %d\n", ret); -+ return ret; -+ } ++#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3<<8) ++#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0<<8) ++#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1<<8) ++#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2<<8) ++#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3<<8) + -+ ret = phy_power_on(dp->phy); -+ if (ret) { -+ dev_err(dp->dev, "phy_power_on failed: %d\n", ret); -+ return ret; -+ } ++/* ++ * Begin MMU TRANSCFG register values ++ */ + -+ return ret; -+} ++#define AS_TRANSCFG_ADRMODE_LEGACY 0 ++#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 ++#define AS_TRANSCFG_ADRMODE_IDENTITY 2 ++#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 ++#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 + -+void analogix_dp_phy_power_off(struct analogix_dp_device *dp) -+{ -+ phy_power_off(dp->phy); -+} ++#define AS_TRANSCFG_ADRMODE_MASK 0xF + -+enum { -+ AUX_STATUS_OK, -+ AUX_STATUS_NACK_ERROR, -+ AUX_STATUS_TIMEOUT_ERROR, -+ AUX_STATUS_UNKNOWN_ERROR, -+ AUX_STATUS_MUCH_DEFER_ERROR, -+ AUX_STATUS_TX_SHORT_ERROR, -+ AUX_STATUS_RX_SHORT_ERROR, -+ AUX_STATUS_NACK_WITHOUT_M_ERROR, -+ AUX_STATUS_I2C_NACK_ERROR -+}; + - ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, - struct drm_dp_aux_msg *msg) - { - u32 reg; -- u32 status_reg; - u8 *buffer = msg->buffer; - unsigned int i; - int ret; -@@ -1035,7 +1150,7 @@ ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, - - /* Clear AUX CH data buffer */ - reg = BUF_CLR; -- writel(reg, dp->reg_base + ANALOGIX_DP_BUFFER_DATA_CTL); -+ analogix_dp_write(dp, ANALOGIX_DP_BUFFER_DATA_CTL, reg); - - switch (msg->request & ~DP_AUX_I2C_MOT) { - case DP_AUX_I2C_WRITE: -@@ -1063,21 +1178,21 @@ ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, - } - - reg |= AUX_LENGTH(msg->size); -- writel(reg, dp->reg_base + ANALOGIX_DP_AUX_CH_CTL_1); -+ analogix_dp_write(dp, ANALOGIX_DP_AUX_CH_CTL_1, reg); - - /* Select DPCD device address */ - reg = AUX_ADDR_7_0(msg->address); -- writel(reg, dp->reg_base + ANALOGIX_DP_AUX_ADDR_7_0); -+ analogix_dp_write(dp, ANALOGIX_DP_AUX_ADDR_7_0, reg); - reg = AUX_ADDR_15_8(msg->address); -- writel(reg, dp->reg_base + ANALOGIX_DP_AUX_ADDR_15_8); -+ analogix_dp_write(dp, ANALOGIX_DP_AUX_ADDR_15_8, reg); - reg = AUX_ADDR_19_16(msg->address); -- writel(reg, dp->reg_base + ANALOGIX_DP_AUX_ADDR_19_16); -+ analogix_dp_write(dp, ANALOGIX_DP_AUX_ADDR_19_16, reg); - - if (!(msg->request & DP_AUX_I2C_READ)) { - for (i = 0; i < msg->size; i++) { - reg = buffer[i]; -- writel(reg, dp->reg_base + ANALOGIX_DP_BUF_DATA_0 + -- 4 * i); -+ analogix_dp_write(dp, ANALOGIX_DP_BUF_DATA_0 + 4 * i, -+ reg); - } - } - -@@ -1088,7 +1203,7 @@ ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, - if (msg->size < 1) - reg |= ADDR_ONLY; - -- writel(reg, dp->reg_base + ANALOGIX_DP_AUX_CH_CTL_2); -+ analogix_dp_write(dp, ANALOGIX_DP_AUX_CH_CTL_2, reg); - - ret = readx_poll_timeout(readl, dp->reg_base + ANALOGIX_DP_AUX_CH_CTL_2, - reg, !(reg & AUX_EN), 25, 500 * 1000); -@@ -1107,29 +1222,29 @@ ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, - } - - /* Clear interrupt source for AUX CH command reply */ -- writel(RPLY_RECEIV, dp->reg_base + ANALOGIX_DP_INT_STA); -+ analogix_dp_write(dp, ANALOGIX_DP_INT_STA, RPLY_RECEIV); - -- /* Clear interrupt source for AUX CH access error */ -- reg = readl(dp->reg_base + ANALOGIX_DP_INT_STA); -- status_reg = readl(dp->reg_base + ANALOGIX_DP_AUX_CH_STA); -- if ((reg & AUX_ERR) || (status_reg & AUX_STATUS_MASK)) { -- writel(AUX_ERR, dp->reg_base + ANALOGIX_DP_INT_STA); -- -- dev_warn(dp->dev, "AUX CH error happened: %#x (%d)\n", -- status_reg & AUX_STATUS_MASK, !!(reg & AUX_ERR)); -- goto aux_error; -- } -+ reg = analogix_dp_read(dp, ANALOGIX_DP_AUX_CH_STA); -+ if ((reg & AUX_STATUS_MASK) == AUX_STATUS_TIMEOUT_ERROR) -+ return -ETIMEDOUT; - - if (msg->request & DP_AUX_I2C_READ) { -+ size_t buf_data_count; ++/* ++ * Begin TRANSCFG register values ++ */ ++#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24) ++#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24) ++#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24) + -+ reg = analogix_dp_read(dp, ANALOGIX_DP_BUFFER_DATA_CTL); -+ buf_data_count = BUF_DATA_COUNT(reg); ++#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28)) ++#define AS_TRANSCFG_PTW_SH_OS (2 << 28) ++#define AS_TRANSCFG_PTW_SH_IS (3 << 28) + -+ if (buf_data_count != msg->size) -+ return -EBUSY; - for (i = 0; i < msg->size; i++) { -- reg = readl(dp->reg_base + ANALOGIX_DP_BUF_DATA_0 + -- 4 * i); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_BUF_DATA_0 + -+ 4 * i); - buffer[i] = (unsigned char)reg; - } - } - - /* Check if Rx sends defer */ -- reg = readl(dp->reg_base + ANALOGIX_DP_AUX_RX_COMM); -+ reg = analogix_dp_read(dp, ANALOGIX_DP_AUX_RX_COMM); - if (reg == AUX_RX_COMM_AUX_DEFER) - msg->reply = DP_AUX_NATIVE_REPLY_DEFER; - else if (reg == AUX_RX_COMM_I2C_DEFER) -@@ -1149,3 +1264,127 @@ ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, - - return -EREMOTEIO; - } ++/* ++ * Begin Command Values ++ */ + -+void analogix_dp_set_video_format(struct analogix_dp_device *dp) -+{ -+ struct video_info *video = &dp->video_info; -+ const struct drm_display_mode *mode = &video->mode; -+ unsigned int hsw, hfp, hbp, vsw, vfp, vbp; ++/* JS_COMMAND register commands */ ++#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */ ++#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */ ++#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */ ++#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */ ++#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */ ++#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */ ++#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ ++#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ + -+ hsw = mode->hsync_end - mode->hsync_start; -+ hfp = mode->hsync_start - mode->hdisplay; -+ hbp = mode->htotal - mode->hsync_end; -+ vsw = mode->vsync_end - mode->vsync_start; -+ vfp = mode->vsync_start - mode->vdisplay; -+ vbp = mode->vtotal - mode->vsync_end; ++#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */ + -+ /* Set Video Format Parameters */ -+ analogix_dp_write(dp, ANALOGIX_DP_TOTAL_LINE_CFG_L, -+ TOTAL_LINE_CFG_L(mode->vtotal)); -+ analogix_dp_write(dp, ANALOGIX_DP_TOTAL_LINE_CFG_H, -+ TOTAL_LINE_CFG_H(mode->vtotal >> 8)); -+ analogix_dp_write(dp, ANALOGIX_DP_ACTIVE_LINE_CFG_L, -+ ACTIVE_LINE_CFG_L(mode->vdisplay)); -+ analogix_dp_write(dp, ANALOGIX_DP_ACTIVE_LINE_CFG_H, -+ ACTIVE_LINE_CFG_H(mode->vdisplay >> 8)); -+ analogix_dp_write(dp, ANALOGIX_DP_V_F_PORCH_CFG, -+ V_F_PORCH_CFG(vfp)); -+ analogix_dp_write(dp, ANALOGIX_DP_V_SYNC_WIDTH_CFG, -+ V_SYNC_WIDTH_CFG(vsw)); -+ analogix_dp_write(dp, ANALOGIX_DP_V_B_PORCH_CFG, -+ V_B_PORCH_CFG(vbp)); -+ analogix_dp_write(dp, ANALOGIX_DP_TOTAL_PIXEL_CFG_L, -+ TOTAL_PIXEL_CFG_L(mode->htotal)); -+ analogix_dp_write(dp, ANALOGIX_DP_TOTAL_PIXEL_CFG_H, -+ TOTAL_PIXEL_CFG_H(mode->htotal >> 8)); -+ analogix_dp_write(dp, ANALOGIX_DP_ACTIVE_PIXEL_CFG_L, -+ ACTIVE_PIXEL_CFG_L(mode->hdisplay)); -+ analogix_dp_write(dp, ANALOGIX_DP_ACTIVE_PIXEL_CFG_H, -+ ACTIVE_PIXEL_CFG_H(mode->hdisplay >> 8)); -+ analogix_dp_write(dp, ANALOGIX_DP_H_F_PORCH_CFG_L, -+ H_F_PORCH_CFG_L(hfp)); -+ analogix_dp_write(dp, ANALOGIX_DP_H_F_PORCH_CFG_H, -+ H_F_PORCH_CFG_H(hfp >> 8)); -+ analogix_dp_write(dp, ANALOGIX_DP_H_SYNC_CFG_L, -+ H_SYNC_CFG_L(hsw)); -+ analogix_dp_write(dp, ANALOGIX_DP_H_SYNC_CFG_H, -+ H_SYNC_CFG_H(hsw >> 8)); -+ analogix_dp_write(dp, ANALOGIX_DP_H_B_PORCH_CFG_L, -+ H_B_PORCH_CFG_L(hbp)); -+ analogix_dp_write(dp, ANALOGIX_DP_H_B_PORCH_CFG_H, -+ H_B_PORCH_CFG_H(hbp >> 8)); -+} ++/* AS_COMMAND register commands */ ++#define AS_COMMAND_NOP 0x00 /* NOP Operation */ ++#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ ++#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ ++#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ ++#define AS_COMMAND_FLUSH 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs ++ (deprecated - only for use with T60x) */ ++#define AS_COMMAND_FLUSH_PT 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs */ ++#define AS_COMMAND_FLUSH_MEM 0x05 /* Wait for memory accesses to complete, flush all the L1s cache then ++ flush all L2 caches then issue a flush region command to all MMUs */ + -+void analogix_dp_video_bist_enable(struct analogix_dp_device *dp) -+{ -+ u32 reg; ++/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */ ++#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0) ++#define JS_CONFIG_START_FLUSH_CLEAN (1u << 8) ++#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8) ++#define JS_CONFIG_START_MMU (1u << 10) ++#define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11) ++#define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION ++#define JS_CONFIG_END_FLUSH_CLEAN (1u << 12) ++#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) ++#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14) ++#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15) ++#define JS_CONFIG_THREAD_PRI(n) ((n) << 16) + -+ /* Enable Video BIST */ -+ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_4, BIST_EN); ++/* JS_XAFFINITY register values */ ++#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0) ++#define JS_XAFFINITY_TILER_ENABLE (1u << 8) ++#define JS_XAFFINITY_CACHE_ENABLE (1u << 16) + -+ /* -+ * Note that if BIST_EN is set to 1, F_SEL must be cleared to 0 -+ * although video format information comes from registers set by user. -+ */ -+ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_10); -+ reg &= ~FORMAT_SEL; -+ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_10, reg); -+} ++/* JS_STATUS register values */ + -+void analogix_dp_audio_config_i2s(struct analogix_dp_device *dp) -+{ -+ u32 reg; ++/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h. ++ * The values are separated to avoid dependency of userspace and kernel code. ++ */ + -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_4); -+ reg &= ~FIX_M_AUD; -+ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_4, reg); ++/* Group of values representing the job status insead a particular fault */ ++#define JS_STATUS_NO_EXCEPTION_BASE 0x00 ++#define JS_STATUS_INTERRUPTED (JS_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */ ++#define JS_STATUS_STOPPED (JS_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */ ++#define JS_STATUS_TERMINATED (JS_STATUS_NO_EXCEPTION_BASE + 0x04) /* 0x04 means TERMINATED */ + -+ reg = analogix_dp_read(dp, ANALOGIX_DP_I2S_CTRL); -+ reg |= I2S_EN; -+ analogix_dp_write(dp, ANALOGIX_DP_I2S_CTRL, reg); -+} ++/* General fault values */ ++#define JS_STATUS_FAULT_BASE 0x40 ++#define JS_STATUS_CONFIG_FAULT (JS_STATUS_FAULT_BASE) /* 0x40 means CONFIG FAULT */ ++#define JS_STATUS_POWER_FAULT (JS_STATUS_FAULT_BASE + 0x01) /* 0x41 means POWER FAULT */ ++#define JS_STATUS_READ_FAULT (JS_STATUS_FAULT_BASE + 0x02) /* 0x42 means READ FAULT */ ++#define JS_STATUS_WRITE_FAULT (JS_STATUS_FAULT_BASE + 0x03) /* 0x43 means WRITE FAULT */ ++#define JS_STATUS_AFFINITY_FAULT (JS_STATUS_FAULT_BASE + 0x04) /* 0x44 means AFFINITY FAULT */ ++#define JS_STATUS_BUS_FAULT (JS_STATUS_FAULT_BASE + 0x08) /* 0x48 means BUS FAULT */ + -+void analogix_dp_audio_config_spdif(struct analogix_dp_device *dp) -+{ -+ u32 reg; ++/* Instruction or data faults */ ++#define JS_STATUS_INSTRUCTION_FAULT_BASE 0x50 ++#define JS_STATUS_INSTR_INVALID_PC (JS_STATUS_INSTRUCTION_FAULT_BASE) /* 0x50 means INSTR INVALID PC */ ++#define JS_STATUS_INSTR_INVALID_ENC (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01) /* 0x51 means INSTR INVALID ENC */ ++#define JS_STATUS_INSTR_TYPE_MISMATCH (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02) /* 0x52 means INSTR TYPE MISMATCH */ ++#define JS_STATUS_INSTR_OPERAND_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03) /* 0x53 means INSTR OPERAND FAULT */ ++#define JS_STATUS_INSTR_TLS_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04) /* 0x54 means INSTR TLS FAULT */ ++#define JS_STATUS_INSTR_BARRIER_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05) /* 0x55 means INSTR BARRIER FAULT */ ++#define JS_STATUS_INSTR_ALIGN_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06) /* 0x56 means INSTR ALIGN FAULT */ ++/* NOTE: No fault with 0x57 code defined in spec. */ ++#define JS_STATUS_DATA_INVALID_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08) /* 0x58 means DATA INVALID FAULT */ ++#define JS_STATUS_TILE_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09) /* 0x59 means TILE RANGE FAULT */ ++#define JS_STATUS_ADDRESS_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A) /* 0x5A means ADDRESS RANGE FAULT */ + -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_4); -+ reg &= ~FIX_M_AUD; -+ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_4, reg); ++/* Other faults */ ++#define JS_STATUS_MEMORY_FAULT_BASE 0x60 ++#define JS_STATUS_OUT_OF_MEMORY (JS_STATUS_MEMORY_FAULT_BASE) /* 0x60 means OUT OF MEMORY */ ++#define JS_STATUS_UNKNOWN 0x7F /* 0x7F means UNKNOWN */ + -+ reg = analogix_dp_read(dp, ANALOGIX_DP_SPDIF_AUDIO_CTL_0); -+ reg |= AUD_SPDIF_EN; -+ analogix_dp_write(dp, ANALOGIX_DP_SPDIF_AUDIO_CTL_0, reg); -+} ++/* GPU_COMMAND values */ ++#define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */ ++#define GPU_COMMAND_SOFT_RESET 0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */ ++#define GPU_COMMAND_HARD_RESET 0x02 /* Immediately reset the entire GPU. */ ++#define GPU_COMMAND_PRFCNT_CLEAR 0x03 /* Clear all performance counters, setting them all to zero. */ ++#define GPU_COMMAND_PRFCNT_SAMPLE 0x04 /* Sample all performance counters, writing them out to memory */ ++#define GPU_COMMAND_CYCLE_COUNT_START 0x05 /* Starts the cycle counter, and system timestamp propagation */ ++#define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */ ++#define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */ ++#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ ++#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ + -+void analogix_dp_audio_enable(struct analogix_dp_device *dp) -+{ -+ u32 reg; ++/* End Command Values */ + -+ reg = analogix_dp_read(dp, ANALOGIX_DP_FUNC_EN_1); -+ reg &= ~(AUD_FIFO_FUNC_EN_N | AUD_FUNC_EN_N); -+ analogix_dp_write(dp, ANALOGIX_DP_FUNC_EN_1, reg); ++/* GPU_STATUS values */ ++#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ ++#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ + -+ reg = analogix_dp_read(dp, ANALOGIX_DP_AUD_CTL); -+ reg |= MISC_CTRL_RESET | DP_AUDIO_EN; -+ analogix_dp_write(dp, ANALOGIX_DP_AUD_CTL, reg); -+} ++/* PRFCNT_CONFIG register values */ ++#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ ++#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ ++#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ + -+void analogix_dp_audio_disable(struct analogix_dp_device *dp) -+{ -+ u32 reg; ++#define PRFCNT_CONFIG_MODE_OFF 0 /* The performance counters are disabled. */ ++#define PRFCNT_CONFIG_MODE_MANUAL 1 /* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */ ++#define PRFCNT_CONFIG_MODE_TILE 2 /* The performance counters are enabled, and are written out each time a tile finishes rendering. */ + -+ analogix_dp_write(dp, ANALOGIX_DP_AUD_CTL, 0); ++/* AS_MEMATTR values: */ ++/* Use GPU implementation-defined caching policy. */ ++#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull ++/* The attribute set to force all resources to be cached. */ ++#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full ++/* Inner write-alloc cache setup, no outer caching */ ++#define AS_MEMATTR_WRITE_ALLOC 0x8Dull + -+ reg = analogix_dp_read(dp, ANALOGIX_DP_FUNC_EN_1); -+ reg |= AUD_FIFO_FUNC_EN_N | AUD_FUNC_EN_N; -+ analogix_dp_write(dp, ANALOGIX_DP_FUNC_EN_1, reg); -+} ++/* Set to implementation defined, outer caching */ ++#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull ++/* Set to write back memory, outer caching */ ++#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull + -+void analogix_dp_init(struct analogix_dp_device *dp) -+{ -+ analogix_dp_init_interrupt(dp); -+ analogix_dp_config_interrupt(dp); -+ analogix_dp_init_hpd(dp); -+ analogix_dp_init_aux(dp); -+} -diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h -index e284ee8da..0a368b172 100644 ---- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h -+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h -@@ -15,9 +15,27 @@ - #define ANALOGIX_DP_VIDEO_CTL_1 0x20 - #define ANALOGIX_DP_VIDEO_CTL_2 0x24 - #define ANALOGIX_DP_VIDEO_CTL_3 0x28 -+#define ANALOGIX_DP_VIDEO_CTL_4 0x2C - - #define ANALOGIX_DP_VIDEO_CTL_8 0x3C - #define ANALOGIX_DP_VIDEO_CTL_10 0x44 -+#define ANALOGIX_DP_TOTAL_LINE_CFG_L 0x48 -+#define ANALOGIX_DP_TOTAL_LINE_CFG_H 0x4C -+#define ANALOGIX_DP_ACTIVE_LINE_CFG_L 0x50 -+#define ANALOGIX_DP_ACTIVE_LINE_CFG_H 0x54 -+#define ANALOGIX_DP_V_F_PORCH_CFG 0x58 -+#define ANALOGIX_DP_V_SYNC_WIDTH_CFG 0x5C -+#define ANALOGIX_DP_V_B_PORCH_CFG 0x60 -+#define ANALOGIX_DP_TOTAL_PIXEL_CFG_L 0x64 -+#define ANALOGIX_DP_TOTAL_PIXEL_CFG_H 0x68 -+#define ANALOGIX_DP_ACTIVE_PIXEL_CFG_L 0x6C -+#define ANALOGIX_DP_ACTIVE_PIXEL_CFG_H 0x70 -+#define ANALOGIX_DP_H_F_PORCH_CFG_L 0x74 -+#define ANALOGIX_DP_H_F_PORCH_CFG_H 0x78 -+#define ANALOGIX_DP_H_SYNC_CFG_L 0x7C -+#define ANALOGIX_DP_H_SYNC_CFG_H 0x80 -+#define ANALOGIX_DP_H_B_PORCH_CFG_L 0x84 -+#define ANALOGIX_DP_H_B_PORCH_CFG_H 0x88 - - #define ANALOGIX_DP_SPDIF_AUDIO_CTL_0 0xD8 - -@@ -27,6 +45,8 @@ - #define ANALOGIX_DP_PLL_REG_4 0x9ec - #define ANALOGIX_DP_PLL_REG_5 0xa00 - -+#define ANALOIGX_DP_SSC_REG 0x104 -+#define ANALOGIX_DP_BIAS 0x124 - #define ANALOGIX_DP_PD 0x12c - - #define ANALOGIX_DP_IF_TYPE 0x244 -@@ -43,6 +63,8 @@ - #define ANALOGIX_DP_PSR_FRAME_UPDATE_CTRL 0x318 - #define ANALOGIX_DP_VSC_SHADOW_DB0 0x31C - #define ANALOGIX_DP_VSC_SHADOW_DB1 0x320 -+#define ANALOGIX_DP_VSC_SHADOW_PB0 0x33C -+#define ANALOGIX_DP_VSC_SHADOW_PB1 0x340 - - #define ANALOGIX_DP_LANE_MAP 0x35C - -@@ -70,7 +92,7 @@ - #define ANALOGIX_DP_SYS_CTL_2 0x604 - #define ANALOGIX_DP_SYS_CTL_3 0x608 - #define ANALOGIX_DP_SYS_CTL_4 0x60C -- -+#define ANALOGIX_DP_AUD_CTL 0x618 - #define ANALOGIX_DP_PKT_SEND_CTL 0x640 - #define ANALOGIX_DP_HDCP_CTL 0x648 - -@@ -116,8 +138,13 @@ - #define ANALOGIX_DP_BUF_DATA_0 0x7C0 - - #define ANALOGIX_DP_SOC_GENERAL_CTL 0x800 -- -+#define ANALOGIX_DP_TEST_80B_PATTERN0 0x81C -+#define ANALOGIX_DP_TEST_80B_PATTERN1 0x820 -+#define ANALOGIX_DP_TEST_80B_PATTERN2 0x824 -+#define ANALOGIX_DP_TEST_HBR2_PATTERN 0x828 -+#define ANALOGIX_DP_AUD_CHANNEL_CTL 0x834 - #define ANALOGIX_DP_CRC_CON 0x890 -+#define ANALOGIX_DP_I2S_CTRL 0x9C8 - - /* ANALOGIX_DP_TX_SW_RESET */ - #define RESET_DP_TX (0x1 << 0) -@@ -171,6 +198,11 @@ - #define VID_CHK_UPDATE_TYPE_0 (0x0 << 4) - #define REUSE_SPD_EN (0x1 << 3) - -+/* ANALOGIX_DP_VIDEO_CTL_4 */ -+#define BIST_EN (0x1 << 3) -+#define BIST_WIDTH(x) (((x) & 0x1) << 2) -+#define BIST_TYPE(x) (((x) & 0x3) << 0) ++/* Use GPU implementation-defined caching policy. */ ++#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull ++/* The attribute set to force all resources to be cached. */ ++#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full ++/* Inner write-alloc cache setup, no outer caching */ ++#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull ++/* Set to implementation defined, outer caching */ ++#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull ++/* Set to write back memory, outer caching */ ++#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull + - /* ANALOGIX_DP_VIDEO_CTL_8 */ - #define VID_HRES_TH(x) (((x) & 0xf) << 4) - #define VID_VRES_TH(x) (((x) & 0xf) << 0) -@@ -181,6 +213,60 @@ - #define VSYNC_POLARITY_CFG (0x1 << 1) - #define HSYNC_POLARITY_CFG (0x1 << 0) - -+/* ANALOGIX_DP_TOTAL_LINE_CFG_L */ -+#define TOTAL_LINE_CFG_L(x) (((x) & 0xff) << 0) ++/* Symbol for default MEMATTR to use */ + -+/* ANALOGIX_DP_TOTAL_LINE_CFG_H */ -+#define TOTAL_LINE_CFG_H(x) (((x) & 0xf) << 0) ++/* Default is - HW implementation defined caching */ ++#define AS_MEMATTR_INDEX_DEFAULT 0 ++#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 + -+/* ANALOGIX_DP_ACTIVE_LINE_CFG_L */ -+#define ACTIVE_LINE_CFG_L(x) (((x) & 0xff) << 0) ++/* HW implementation defined caching */ ++#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 ++/* Force cache on */ ++#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 ++/* Write-alloc */ ++#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 ++/* Outer coherent, inner implementation defined policy */ ++#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 ++/* Outer coherent, write alloc inner */ ++#define AS_MEMATTR_INDEX_OUTER_WA 4 + -+/* ANALOGIX_DP_ACTIVE_LINE_CFG_H */ -+#define ACTIVE_LINE_CFG_H(x) (((x) & 0xf) << 0) ++/* JS_FEATURES register */ + -+/* ANALOGIX_DP_V_F_PORCH_CFG */ -+#define V_F_PORCH_CFG(x) (((x) & 0xff) << 0) ++#define JS_FEATURE_NULL_JOB (1u << 1) ++#define JS_FEATURE_SET_VALUE_JOB (1u << 2) ++#define JS_FEATURE_CACHE_FLUSH_JOB (1u << 3) ++#define JS_FEATURE_COMPUTE_JOB (1u << 4) ++#define JS_FEATURE_VERTEX_JOB (1u << 5) ++#define JS_FEATURE_GEOMETRY_JOB (1u << 6) ++#define JS_FEATURE_TILER_JOB (1u << 7) ++#define JS_FEATURE_FUSED_JOB (1u << 8) ++#define JS_FEATURE_FRAGMENT_JOB (1u << 9) + -+/* ANALOGIX_DP_V_SYNC_WIDTH_CFG */ -+#define V_SYNC_WIDTH_CFG(x) (((x) & 0xff) << 0) ++/* End JS_FEATURES register */ + -+/* ANALOGIX_DP_V_B_PORCH_CFG */ -+#define V_B_PORCH_CFG(x) (((x) & 0xff) << 0) ++/* L2_MMU_CONFIG register */ ++#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) ++#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) ++#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT (24) ++#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) ++#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) ++#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) ++#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) + -+/* ANALOGIX_DP_TOTAL_PIXEL_CFG_L */ -+#define TOTAL_PIXEL_CFG_L(x) (((x) & 0xff) << 0) ++#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT (26) ++#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) ++#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) ++#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) ++#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) ++/* End L2_MMU_CONFIG register */ + -+/* ANALOGIX_DP_TOTAL_PIXEL_CFG_H */ -+#define TOTAL_PIXEL_CFG_H(x) (((x) & 0x3f) << 0) ++/* THREAD_* registers */ + -+/* ANALOGIX_DP_ACTIVE_PIXEL_CFG_L */ -+#define ACTIVE_PIXEL_CFG_L(x) (((x) & 0xff) << 0) ++/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */ ++#define IMPLEMENTATION_UNSPECIFIED 0 ++#define IMPLEMENTATION_SILICON 1 ++#define IMPLEMENTATION_FPGA 2 ++#define IMPLEMENTATION_MODEL 3 + -+/* ANALOGIX_DP_ACTIVE_PIXEL_CFG_H */ -+#define ACTIVE_PIXEL_CFG_H(x) (((x) & 0x3f) << 0) ++/* Default values when registers are not supported by the implemented hardware */ ++#define THREAD_MT_DEFAULT 256 ++#define THREAD_MWS_DEFAULT 256 ++#define THREAD_MBS_DEFAULT 256 ++#define THREAD_MR_DEFAULT 1024 ++#define THREAD_MTQ_DEFAULT 4 ++#define THREAD_MTGS_DEFAULT 10 + -+/* ANALOGIX_DP_H_F_PORCH_CFG_L */ -+#define H_F_PORCH_CFG_L(x) (((x) & 0xff) << 0) ++/* End THREAD_* registers */ + -+/* ANALOGIX_DP_H_F_PORCH_CFG_H */ -+#define H_F_PORCH_CFG_H(x) (((x) & 0xf) << 0) ++/* SHADER_CONFIG register */ + -+/* ANALOGIX_DP_H_SYNC_CFG_L */ -+#define H_SYNC_CFG_L(x) (((x) & 0xff) << 0) ++#define SC_ALT_COUNTERS (1ul << 3) ++#define SC_OVERRIDE_FWD_PIXEL_KILL (1ul << 4) ++#define SC_SDC_DISABLE_OQ_DISCARD (1ul << 6) ++#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16) ++#define SC_LS_PAUSEBUFFER_DISABLE (1ul << 16) ++#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18) ++#define SC_ENABLE_TEXGRD_FLAGS (1ul << 25) ++/* End SHADER_CONFIG register */ + -+/* ANALOGIX_DP_H_SYNC_CFG_H */ -+#define H_SYNC_CFG_H(x) (((x) & 0xf) << 0) ++/* TILER_CONFIG register */ + -+/* ANALOGIX_DP_H_B_PORCH_CFG_L */ -+#define H_B_PORCH_CFG_L(x) (((x) & 0xff) << 0) ++#define TC_CLOCK_GATE_OVERRIDE (1ul << 0) + -+/* ANALOGIX_DP_H_B_PORCH_CFG_H */ -+#define H_B_PORCH_CFG_H(x) (((x) & 0xf) << 0) ++/* End TILER_CONFIG register */ + -+/* ANALOGIX_DP_SPDIF_AUDIO_CTL_0 */ -+#define AUD_SPDIF_EN (0x1 << 7) ++/* JM_CONFIG register */ + - /* ANALOGIX_DP_PLL_REG_1 */ - #define REF_CLK_24M (0x1 << 0) - #define REF_CLK_27M (0x0 << 0) -@@ -309,16 +395,23 @@ - #define FIX_M_VID (0x1 << 2) - #define M_VID_UPDATE_CTRL (0x3 << 0) - -+/* ANALOGIX_DP_AUD_CTL */ -+#define MISC_CTRL_RESET (0x1 << 4) -+#define DP_AUDIO_EN (0x1 << 0) ++#define JM_TIMESTAMP_OVERRIDE (1ul << 0) ++#define JM_CLOCK_GATE_OVERRIDE (1ul << 1) ++#define JM_JOB_THROTTLE_ENABLE (1ul << 2) ++#define JM_JOB_THROTTLE_LIMIT_SHIFT (3) ++#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F) ++#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2) ++#define JM_IDVS_GROUP_SIZE_SHIFT (16) ++#define JM_MAX_IDVS_GROUP_SIZE (0x3F) ++/* End JM_CONFIG register */ + - /* ANALOGIX_DP_TRAINING_PTN_SET */ - #define SCRAMBLER_TYPE (0x1 << 9) - #define HW_LINK_TRAINING_PATTERN (0x1 << 8) - #define SCRAMBLING_DISABLE (0x1 << 5) - #define SCRAMBLING_ENABLE (0x0 << 5) --#define LINK_QUAL_PATTERN_SET_MASK (0x3 << 2) -+#define LINK_QUAL_PATTERN_SET_MASK (0x7 << 2) -+#define LINK_QUAL_PATTERN_SET_HBR2 (0x5 << 2) -+#define LINK_QUAL_PATTERN_SET_80BIT (0x4 << 2) - #define LINK_QUAL_PATTERN_SET_PRBS7 (0x3 << 2) - #define LINK_QUAL_PATTERN_SET_D10_2 (0x1 << 2) - #define LINK_QUAL_PATTERN_SET_DISABLE (0x0 << 2) - #define SW_TRAINING_PATTERN_SET_MASK (0x3 << 0) -+#define SW_TRAINING_PATTERN_SET_PTN3 (0x3 << 0) - #define SW_TRAINING_PATTERN_SET_PTN2 (0x2 << 0) - #define SW_TRAINING_PATTERN_SET_PTN1 (0x1 << 0) - #define SW_TRAINING_PATTERN_SET_NORMAL (0x0 << 0) -@@ -406,6 +499,11 @@ - #define VIDEO_MODE_SLAVE_MODE (0x1 << 0) - #define VIDEO_MODE_MASTER_MODE (0x0 << 0) - -+/* ANALOGIX_DP_AUD_CHANNEL_CTL */ -+#define AUD_CHANNEL_COUNT_6 (0x5 << 0) -+#define AUD_CHANNEL_COUNT_4 (0x3 << 0) -+#define AUD_CHANNEL_COUNT_2 (0x1 << 0) + - /* ANALOGIX_DP_PKT_SEND_CTL */ - #define IF_UP (0x1 << 4) - #define IF_EN (0x1 << 0) -@@ -414,4 +512,7 @@ - #define PSR_VID_CRC_FLUSH (0x1 << 2) - #define PSR_VID_CRC_ENABLE (0x1 << 0) - -+/* ANALOGIX_DP_I2S_CTRL */ -+#define I2S_EN (0x1 << 4) ++#endif /* _MIDGARD_REGMAP_H_ */ +diff --git a/drivers/gpu/arm/midgard/mali_timeline.h b/drivers/gpu/arm/midgard/mali_timeline.h +new file mode 100644 +index 000000000..bd5f6614b +--- /dev/null ++++ b/drivers/gpu/arm/midgard/mali_timeline.h +@@ -0,0 +1,396 @@ ++/* ++ * ++ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + - #endif /* _ANALOGIX_DP_REG_H */ -diff --git a/drivers/gpu/drm/bridge/synopsys/Makefile b/drivers/gpu/drm/bridge/synopsys/Makefile -index ce715562e..f4c6cf628 100644 ---- a/drivers/gpu/drm/bridge/synopsys/Makefile -+++ b/drivers/gpu/drm/bridge/synopsys/Makefile -@@ -1,8 +1,9 @@ - # SPDX-License-Identifier: GPL-2.0-only --obj-$(CONFIG_DRM_DW_HDMI) += dw-hdmi.o -+obj-$(CONFIG_DRM_DW_HDMI) += dw-hdmi.o dw-hdmi-hdcp.o \ -+ dw-hdmi-qp.o dw-hdmi-qp-hdcp.o - obj-$(CONFIG_DRM_DW_HDMI_AHB_AUDIO) += dw-hdmi-ahb-audio.o - obj-$(CONFIG_DRM_DW_HDMI_GP_AUDIO) += dw-hdmi-gp-audio.o --obj-$(CONFIG_DRM_DW_HDMI_I2S_AUDIO) += dw-hdmi-i2s-audio.o --obj-$(CONFIG_DRM_DW_HDMI_CEC) += dw-hdmi-cec.o -+obj-$(CONFIG_DRM_DW_HDMI_I2S_AUDIO) += dw-hdmi-i2s-audio.o dw-hdmi-qp-i2s-audio.o -+obj-$(CONFIG_DRM_DW_HDMI_CEC) += dw-hdmi-cec.o dw-hdmi-qp-cec.o - - obj-$(CONFIG_DRM_DW_MIPI_DSI) += dw-mipi-dsi.o -diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h -index f72d27208..966d297ef 100644 ---- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h -+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h -@@ -17,6 +17,7 @@ struct dw_hdmi_i2s_audio_data { - - void (*write)(struct dw_hdmi *hdmi, u8 val, int offset); - u8 (*read)(struct dw_hdmi *hdmi, int offset); -+ void (*mod)(struct dw_hdmi *hdmi, u8 data, u8 mask, unsigned int reg); - u8 *(*get_eld)(struct dw_hdmi *hdmi); - }; - -diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.c -index be21c11de..414d601bf 100644 ---- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.c -+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.c -@@ -4,14 +4,17 @@ - * - * Copyright (C) 2015-2017 Russell King. - */ -+#include - #include - #include -+#include - #include - #include - #include - #include - - #include -+#include - - #include - #include -@@ -21,8 +24,11 @@ - enum { - HDMI_IH_CEC_STAT0 = 0x0106, - HDMI_IH_MUTE_CEC_STAT0 = 0x0186, -+ HDMI_IH_MUTE = 0x01ff, - - HDMI_CEC_CTRL = 0x7d00, -+ CEC_TRANS_MASK = 0x7, -+ CEC_CTRL_STANDBY = BIT(4), - CEC_CTRL_START = BIT(0), - CEC_CTRL_FRAME_TYP = 3 << 1, - CEC_CTRL_RETRY = 0 << 1, -@@ -47,12 +53,15 @@ enum { - HDMI_CEC_RX_CNT = 0x7d08, - HDMI_CEC_TX_DATA0 = 0x7d10, - HDMI_CEC_RX_DATA0 = 0x7d20, -+ HDMI_CEC_RX_DATA1 = 0x7d21, - HDMI_CEC_LOCK = 0x7d30, - HDMI_CEC_WKUPCTRL = 0x7d31, - }; - - struct dw_hdmi_cec { -+ struct device *dev; - struct dw_hdmi *hdmi; -+ struct miscdevice misc_dev; - const struct dw_hdmi_cec_ops *ops; - u32 addresses; - struct cec_adapter *adap; -@@ -61,11 +70,12 @@ struct dw_hdmi_cec { - bool tx_done; - bool rx_done; - struct cec_notifier *notify; -+ struct input_dev *devinput; - int irq; -- -- u8 regs_polarity; -- u8 regs_mask; -- u8 regs_mute_stat0; -+ int wake_irq; -+ bool wake_en; -+ bool standby_en; -+ struct mutex wake_lock; - }; - - static void dw_hdmi_write(struct dw_hdmi_cec *cec, u8 val, int offset) -@@ -78,6 +88,11 @@ static u8 dw_hdmi_read(struct dw_hdmi_cec *cec, int offset) - return cec->ops->read(cec->hdmi, offset); - } - -+static void dw_hdmi_mod(struct dw_hdmi_cec *cec, unsigned int offset, u8 mask, u8 val) -+{ -+ cec->ops->mod(cec->hdmi, val, mask, offset); -+} + - static int dw_hdmi_cec_log_addr(struct cec_adapter *adap, u8 logical_addr) - { - struct dw_hdmi_cec *cec = cec_get_drvdata(adap); -@@ -116,7 +131,7 @@ static int dw_hdmi_cec_transmit(struct cec_adapter *adap, u8 attempts, - dw_hdmi_write(cec, msg->msg[i], HDMI_CEC_TX_DATA0 + i); - - dw_hdmi_write(cec, msg->len, HDMI_CEC_TX_CNT); -- dw_hdmi_write(cec, ctrl | CEC_CTRL_START, HDMI_CEC_CTRL); -+ dw_hdmi_mod(cec, HDMI_CEC_CTRL, CEC_TRANS_MASK, ctrl | CEC_CTRL_START); - - return 0; - } -@@ -192,20 +207,28 @@ static int dw_hdmi_cec_enable(struct cec_adapter *adap, bool enable) - struct dw_hdmi_cec *cec = cec_get_drvdata(adap); - - if (!enable) { -- dw_hdmi_write(cec, ~0, HDMI_CEC_MASK); -- dw_hdmi_write(cec, ~0, HDMI_IH_MUTE_CEC_STAT0); - dw_hdmi_write(cec, 0, HDMI_CEC_POLARITY); - -- cec->ops->disable(cec->hdmi); -+ if (cec->wake_en && cec->standby_en) { -+ dw_hdmi_write(cec, 0xff, HDMI_IH_CEC_STAT0); -+ dw_hdmi_mod(cec, HDMI_CEC_CTRL, CEC_CTRL_STANDBY, CEC_CTRL_STANDBY); -+ dw_hdmi_write(cec, 0, HDMI_CEC_LOCK); -+ dw_hdmi_write(cec, 0xff, HDMI_CEC_WKUPCTRL); -+ dw_hdmi_write(cec, ~(1 << 6), HDMI_CEC_MASK); -+ dw_hdmi_write(cec, ~(1 << 6), HDMI_IH_MUTE_CEC_STAT0); -+ dw_hdmi_write(cec, 0x01, HDMI_IH_MUTE); -+ } else { -+ cec->ops->disable(cec->hdmi); -+ } - } else { - unsigned int irqs; - -- dw_hdmi_write(cec, 0, HDMI_CEC_CTRL); -+ dw_hdmi_cec_log_addr(cec->adap, CEC_LOG_ADDR_INVALID); -+ dw_hdmi_mod(cec, HDMI_CEC_CTRL, CEC_CTRL_STANDBY, 0); -+ dw_hdmi_write(cec, 0x02, HDMI_IH_MUTE); - dw_hdmi_write(cec, ~0, HDMI_IH_CEC_STAT0); - dw_hdmi_write(cec, 0, HDMI_CEC_LOCK); - -- dw_hdmi_cec_log_addr(cec->adap, CEC_LOG_ADDR_INVALID); -- - cec->ops->enable(cec->hdmi); - - irqs = CEC_STAT_ERROR_INIT | CEC_STAT_NACK | CEC_STAT_EOM | -@@ -230,6 +253,173 @@ static void dw_hdmi_cec_del(void *data) - cec_delete_adapter(cec->adap); - } - -+static irqreturn_t dw_hdmi_cec_wake_irq(int irq, void *data) -+{ -+ struct cec_adapter *adap = data; -+ struct dw_hdmi_cec *cec = cec_get_drvdata(adap); -+ u8 cec_int; + -+ cec_int = dw_hdmi_read(cec, HDMI_IH_CEC_STAT0); -+ if (!cec_int) -+ return IRQ_NONE; + -+ dw_hdmi_write(cec, 0x02, HDMI_IH_MUTE); -+ dw_hdmi_write(cec, cec_int, HDMI_IH_CEC_STAT0); -+ dw_hdmi_write(cec, 0x00, HDMI_CEC_WKUPCTRL); + -+ if (!cec->wake_en) -+ return IRQ_HANDLED; ++#undef TRACE_SYSTEM ++#define TRACE_SYSTEM mali_timeline + -+ return IRQ_WAKE_THREAD; -+} ++#if !defined(_MALI_TIMELINE_H) || defined(TRACE_HEADER_MULTI_READ) ++#define _MALI_TIMELINE_H + -+static irqreturn_t dw_hdmi_cec_wake_thread(int irq, void *data) -+{ -+ struct cec_adapter *adap = data; -+ struct dw_hdmi_cec *cec = cec_get_drvdata(adap); ++#include + -+ mutex_lock(&cec->wake_lock); ++TRACE_EVENT(mali_timeline_atoms_in_flight, + -+ if (!cec->standby_en) { -+ mutex_unlock(&cec->wake_lock); -+ return IRQ_HANDLED; -+ } -+ cec->standby_en = false; ++ TP_PROTO(u64 ts_sec, ++ u32 ts_nsec, ++ int tgid, ++ int count), + -+ dev_dbg(cec->dev, "wakeup opcode:0x%x\n", dw_hdmi_read(cec, HDMI_CEC_RX_DATA1)); -+ input_event(cec->devinput, EV_KEY, KEY_POWER, 1); -+ input_sync(cec->devinput); -+ input_event(cec->devinput, EV_KEY, KEY_POWER, 0); -+ input_sync(cec->devinput); -+ mutex_unlock(&cec->wake_lock); ++ TP_ARGS(ts_sec, ++ ts_nsec, ++ tgid, ++ count), + -+ return IRQ_HANDLED; -+} ++ TP_STRUCT__entry( ++ __field(u64, ts_sec) ++ __field(u32, ts_nsec) ++ __field(int, tgid) ++ __field(int, count) ++ ), + -+static int rockchip_hdmi_cec_input_init(struct dw_hdmi_cec *cec) -+{ -+ int err; ++ TP_fast_assign( ++ __entry->ts_sec = ts_sec; ++ __entry->ts_nsec = ts_nsec; ++ __entry->tgid = tgid; ++ __entry->count = count; ++ ), + -+ cec->devinput = devm_input_allocate_device(cec->dev); -+ if (!cec->devinput) -+ return -EPERM; ++ TP_printk("%i,%i.%.9i,%i,%i", CTX_SET_NR_ATOMS_IN_FLIGHT, ++ (int)__entry->ts_sec, ++ (int)__entry->ts_nsec, ++ __entry->tgid, ++ __entry->count) ++); + -+ cec->devinput->name = "hdmi_cec_key"; -+ cec->devinput->phys = "hdmi_cec_key/input0"; -+ cec->devinput->id.bustype = BUS_HOST; -+ cec->devinput->id.vendor = 0x0001; -+ cec->devinput->id.product = 0x0001; -+ cec->devinput->id.version = 0x0100; + -+ err = input_register_device(cec->devinput); -+ if (err < 0) { -+ input_free_device(cec->devinput); -+ return err; -+ } -+ input_set_capability(cec->devinput, EV_KEY, KEY_POWER); ++TRACE_EVENT(mali_timeline_atom, + -+ return 0; -+} ++ TP_PROTO(u64 ts_sec, ++ u32 ts_nsec, ++ int event_type, ++ int tgid, ++ int atom_id), + -+static long cec_standby(struct cec_adapter *adap, __u8 __user *parg) -+{ -+ u8 en; -+ int ret; -+ struct dw_hdmi_cec *cec = cec_get_drvdata(adap); ++ TP_ARGS(ts_sec, ++ ts_nsec, ++ event_type, ++ tgid, ++ atom_id), + -+ mutex_lock(&cec->wake_lock); -+ if (copy_from_user(&en, parg, sizeof(en))) { -+ mutex_unlock(&cec->wake_lock); -+ return -EFAULT; -+ } ++ TP_STRUCT__entry( ++ __field(u64, ts_sec) ++ __field(u32, ts_nsec) ++ __field(int, event_type) ++ __field(int, tgid) ++ __field(int, atom_id) ++ ), + -+ cec->standby_en = !en; -+ ret = adap->ops->adap_enable(adap, en); -+ mutex_unlock(&cec->wake_lock); ++ TP_fast_assign( ++ __entry->ts_sec = ts_sec; ++ __entry->ts_nsec = ts_nsec; ++ __entry->event_type = event_type; ++ __entry->tgid = tgid; ++ __entry->atom_id = atom_id; ++ ), + -+ return ret; -+} ++ TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, ++ (int)__entry->ts_sec, ++ (int)__entry->ts_nsec, ++ __entry->tgid, ++ __entry->atom_id, ++ __entry->atom_id) ++); + -+static long cec_func_en(struct dw_hdmi_cec *cec, int __user *parg) -+{ -+ int en_mask; ++TRACE_EVENT(mali_timeline_gpu_slot_active, + -+ if (copy_from_user(&en_mask, parg, sizeof(en_mask))) -+ return -EFAULT; ++ TP_PROTO(u64 ts_sec, ++ u32 ts_nsec, ++ int event_type, ++ int tgid, ++ int js, ++ int count), + -+ cec->wake_en = (en_mask & CEC_EN) && (en_mask & CEC_WAKE); ++ TP_ARGS(ts_sec, ++ ts_nsec, ++ event_type, ++ tgid, ++ js, ++ count), + -+ return 0; -+} ++ TP_STRUCT__entry( ++ __field(u64, ts_sec) ++ __field(u32, ts_nsec) ++ __field(int, event_type) ++ __field(int, tgid) ++ __field(int, js) ++ __field(int, count) ++ ), + -+static long dw_hdmi_cec_ioctl(struct file *f, unsigned int cmd, unsigned long arg) -+{ -+ struct dw_hdmi_cec *cec; -+ struct miscdevice *misc_dev; -+ void __user *data; ++ TP_fast_assign( ++ __entry->ts_sec = ts_sec; ++ __entry->ts_nsec = ts_nsec; ++ __entry->event_type = event_type; ++ __entry->tgid = tgid; ++ __entry->js = js; ++ __entry->count = count; ++ ), + -+ if (!f) -+ return -EFAULT; ++ TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, ++ (int)__entry->ts_sec, ++ (int)__entry->ts_nsec, ++ __entry->tgid, ++ __entry->js, ++ __entry->count) ++); + -+ misc_dev = f->private_data; -+ cec = container_of(misc_dev, struct dw_hdmi_cec, misc_dev); -+ data = (void __user *)arg; ++TRACE_EVENT(mali_timeline_gpu_slot_action, + -+ switch (cmd) { -+ case CEC_STANDBY: -+ return cec_standby(cec->adap, data); -+ case CEC_FUNC_EN: -+ return cec_func_en(cec, data); -+ default: -+ return -EINVAL; -+ } ++ TP_PROTO(u64 ts_sec, ++ u32 ts_nsec, ++ int event_type, ++ int tgid, ++ int js, ++ int count), + -+ return -ENOTTY; -+} ++ TP_ARGS(ts_sec, ++ ts_nsec, ++ event_type, ++ tgid, ++ js, ++ count), + -+static int dw_hdmi_cec_open(struct inode *inode, struct file *f) -+{ -+ return 0; -+} ++ TP_STRUCT__entry( ++ __field(u64, ts_sec) ++ __field(u32, ts_nsec) ++ __field(int, event_type) ++ __field(int, tgid) ++ __field(int, js) ++ __field(int, count) ++ ), + -+static int dw_hdmi_cec_release(struct inode *inode, struct file *f) -+{ -+ return 0; -+} ++ TP_fast_assign( ++ __entry->ts_sec = ts_sec; ++ __entry->ts_nsec = ts_nsec; ++ __entry->event_type = event_type; ++ __entry->tgid = tgid; ++ __entry->js = js; ++ __entry->count = count; ++ ), + -+static const struct file_operations dw_hdmi_cec_file_operations = { -+ .compat_ioctl = dw_hdmi_cec_ioctl, -+ .unlocked_ioctl = dw_hdmi_cec_ioctl, -+ .open = dw_hdmi_cec_open, -+ .release = dw_hdmi_cec_release, -+ .owner = THIS_MODULE, -+}; ++ TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, ++ (int)__entry->ts_sec, ++ (int)__entry->ts_nsec, ++ __entry->tgid, ++ __entry->js, ++ __entry->count) ++); + -+static void dw_hdmi_cec_hpd_wake_up(struct platform_device *pdev) -+{ -+ struct dw_hdmi_cec *cec = platform_get_drvdata(pdev); ++TRACE_EVENT(mali_timeline_gpu_power_active, + -+ mutex_lock(&cec->wake_lock); ++ TP_PROTO(u64 ts_sec, ++ u32 ts_nsec, ++ int event_type, ++ int active), + -+ if (!cec->standby_en) { -+ mutex_unlock(&cec->wake_lock); -+ return; -+ } -+ cec->standby_en = false; ++ TP_ARGS(ts_sec, ++ ts_nsec, ++ event_type, ++ active), + -+ dw_hdmi_write(cec, 0x02, HDMI_IH_MUTE); ++ TP_STRUCT__entry( ++ __field(u64, ts_sec) ++ __field(u32, ts_nsec) ++ __field(int, event_type) ++ __field(int, active) ++ ), + -+ input_event(cec->devinput, EV_KEY, KEY_POWER, 1); -+ input_sync(cec->devinput); -+ input_event(cec->devinput, EV_KEY, KEY_POWER, 0); -+ input_sync(cec->devinput); -+ mutex_unlock(&cec->wake_lock); -+} ++ TP_fast_assign( ++ __entry->ts_sec = ts_sec; ++ __entry->ts_nsec = ts_nsec; ++ __entry->event_type = event_type; ++ __entry->active = active; ++ ), + -+static const struct dw_hdmi_cec_wake_ops cec_ops = { -+ .hpd_wake_up = dw_hdmi_cec_hpd_wake_up, -+}; ++ TP_printk("%i,%i.%.9i,0,%i", __entry->event_type, ++ (int)__entry->ts_sec, ++ (int)__entry->ts_nsec, ++ __entry->active) + - static int dw_hdmi_cec_probe(struct platform_device *pdev) - { - struct dw_hdmi_cec_data *data = dev_get_platdata(&pdev->dev); -@@ -248,10 +438,14 @@ static int dw_hdmi_cec_probe(struct platform_device *pdev) - if (!cec) - return -ENOMEM; - -+ cec->dev = &pdev->dev; - cec->irq = data->irq; -+ cec->wake_irq = data->wake_irq; - cec->ops = data->ops; - cec->hdmi = data->hdmi; - -+ mutex_init(&cec->wake_lock); ++); + - platform_set_drvdata(pdev, cec); - - dw_hdmi_write(cec, 0, HDMI_CEC_TX_CNT); -@@ -266,20 +460,40 @@ static int dw_hdmi_cec_probe(struct platform_device *pdev) - if (IS_ERR(cec->adap)) - return PTR_ERR(cec->adap); - -+ dw_hdmi_set_cec_adap(cec->hdmi, cec->adap); ++TRACE_EVENT(mali_timeline_l2_power_active, + - /* override the module pointer */ - cec->adap->owner = THIS_MODULE; - -- ret = devm_add_action_or_reset(&pdev->dev, dw_hdmi_cec_del, cec); -- if (ret) -+ ret = devm_add_action(&pdev->dev, dw_hdmi_cec_del, cec); -+ if (ret) { -+ cec_delete_adapter(cec->adap); - return ret; -+ } - - ret = devm_request_threaded_irq(&pdev->dev, cec->irq, - dw_hdmi_cec_hardirq, -- dw_hdmi_cec_thread, IRQF_SHARED, -+ dw_hdmi_cec_thread, IRQF_SHARED | IRQF_ONESHOT, - "dw-hdmi-cec", cec->adap); - if (ret < 0) - return ret; - -+ if (cec->wake_irq > 0) { -+ ret = devm_request_threaded_irq(&pdev->dev, cec->wake_irq, -+ dw_hdmi_cec_wake_irq, -+ dw_hdmi_cec_wake_thread, -+ IRQF_TRIGGER_HIGH | IRQF_ONESHOT, -+ "cec-wakeup", cec->adap); -+ if (ret) { -+ dev_err(&pdev->dev, -+ "hdmi_cec request_irq failed (%d).\n", -+ ret); -+ return ret; -+ } -+ device_init_wakeup(&pdev->dev, 1); -+ enable_irq_wake(cec->wake_irq); -+ } ++ TP_PROTO(u64 ts_sec, ++ u32 ts_nsec, ++ int event_type, ++ int state), + - cec->notify = cec_notifier_cec_adap_register(pdev->dev.parent, - NULL, cec->adap); - if (!cec->notify) -@@ -297,55 +511,38 @@ static int dw_hdmi_cec_probe(struct platform_device *pdev) - */ - devm_remove_action(&pdev->dev, dw_hdmi_cec_del, cec); - -- return 0; --} -- --static void dw_hdmi_cec_remove(struct platform_device *pdev) --{ -- struct dw_hdmi_cec *cec = platform_get_drvdata(pdev); -- -- cec_notifier_cec_adap_unregister(cec->notify, cec->adap); -- cec_unregister_adapter(cec->adap); --} -+ rockchip_hdmi_cec_input_init(cec); - --static int __maybe_unused dw_hdmi_cec_resume(struct device *dev) --{ -- struct dw_hdmi_cec *cec = dev_get_drvdata(dev); -+ cec->misc_dev.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "rk_cec"); -+ if (!cec->misc_dev.name) -+ return -ENOMEM; -+ cec->misc_dev.minor = MISC_DYNAMIC_MINOR; -+ cec->misc_dev.fops = &dw_hdmi_cec_file_operations; -+ cec->misc_dev.mode = 0666; - -- /* Restore logical address */ -- dw_hdmi_write(cec, cec->addresses & 255, HDMI_CEC_ADDR_L); -- dw_hdmi_write(cec, cec->addresses >> 8, HDMI_CEC_ADDR_H); -+ ret = misc_register(&cec->misc_dev); - -- /* Restore interrupt status/mask registers */ -- dw_hdmi_write(cec, cec->regs_polarity, HDMI_CEC_POLARITY); -- dw_hdmi_write(cec, cec->regs_mask, HDMI_CEC_MASK); -- dw_hdmi_write(cec, cec->regs_mute_stat0, HDMI_IH_MUTE_CEC_STAT0); -+ dw_hdmi_cec_wake_ops_register(cec->hdmi, &cec_ops); - -- return 0; -+ return ret; - } - --static int __maybe_unused dw_hdmi_cec_suspend(struct device *dev) -+static int dw_hdmi_cec_remove(struct platform_device *pdev) - { -- struct dw_hdmi_cec *cec = dev_get_drvdata(dev); -+ struct dw_hdmi_cec *cec = platform_get_drvdata(pdev); - -- /* store interrupt status/mask registers */ -- cec->regs_polarity = dw_hdmi_read(cec, HDMI_CEC_POLARITY); -- cec->regs_mask = dw_hdmi_read(cec, HDMI_CEC_MASK); -- cec->regs_mute_stat0 = dw_hdmi_read(cec, HDMI_IH_MUTE_CEC_STAT0); -+ cec_notifier_cec_adap_unregister(cec->notify, cec->adap); -+ cec_unregister_adapter(cec->adap); -+ misc_deregister(&cec->misc_dev); - - return 0; - } - --static const struct dev_pm_ops dw_hdmi_cec_pm = { -- SET_SYSTEM_SLEEP_PM_OPS(dw_hdmi_cec_suspend, dw_hdmi_cec_resume) --}; -- - static struct platform_driver dw_hdmi_cec_driver = { - .probe = dw_hdmi_cec_probe, -- .remove_new = dw_hdmi_cec_remove, -+ .remove = dw_hdmi_cec_remove, - .driver = { - .name = "dw-hdmi-cec", -- .pm = &dw_hdmi_cec_pm, - }, - }; - module_platform_driver(dw_hdmi_cec_driver); -diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.h -index cf4dc121a..ec10660a7 100644 ---- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.h -+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.h -@@ -3,17 +3,24 @@ - - struct dw_hdmi; - -+#define CEC_EN BIT(0) -+#define CEC_WAKE BIT(1) ++ TP_ARGS(ts_sec, ++ ts_nsec, ++ event_type, ++ state), + - struct dw_hdmi_cec_ops { - void (*write)(struct dw_hdmi *hdmi, u8 val, int offset); - u8 (*read)(struct dw_hdmi *hdmi, int offset); - void (*enable)(struct dw_hdmi *hdmi); - void (*disable)(struct dw_hdmi *hdmi); -+ void (*mod)(struct dw_hdmi *hdmi, u8 data, u8 mask, unsigned int reg); - }; - - struct dw_hdmi_cec_data { - struct dw_hdmi *hdmi; - const struct dw_hdmi_cec_ops *ops; - int irq; -+ int wake_irq; - }; - -+void dw_hdmi_hpd_wake_up(struct platform_device *pdev); ++ TP_STRUCT__entry( ++ __field(u64, ts_sec) ++ __field(u32, ts_nsec) ++ __field(int, event_type) ++ __field(int, state) ++ ), + - #endif -diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-hdcp.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-hdcp.c ++ TP_fast_assign( ++ __entry->ts_sec = ts_sec; ++ __entry->ts_nsec = ts_nsec; ++ __entry->event_type = event_type; ++ __entry->state = state; ++ ), ++ ++ TP_printk("%i,%i.%.9i,0,%i", __entry->event_type, ++ (int)__entry->ts_sec, ++ (int)__entry->ts_nsec, ++ __entry->state) ++ ++); ++TRACE_EVENT(mali_timeline_pm_event, ++ ++ TP_PROTO(u64 ts_sec, ++ u32 ts_nsec, ++ int event_type, ++ int pm_event_type, ++ unsigned int pm_event_id), ++ ++ TP_ARGS(ts_sec, ++ ts_nsec, ++ event_type, ++ pm_event_type, ++ pm_event_id), ++ ++ TP_STRUCT__entry( ++ __field(u64, ts_sec) ++ __field(u32, ts_nsec) ++ __field(int, event_type) ++ __field(int, pm_event_type) ++ __field(unsigned int, pm_event_id) ++ ), ++ ++ TP_fast_assign( ++ __entry->ts_sec = ts_sec; ++ __entry->ts_nsec = ts_nsec; ++ __entry->event_type = event_type; ++ __entry->pm_event_type = pm_event_type; ++ __entry->pm_event_id = pm_event_id; ++ ), ++ ++ TP_printk("%i,%i.%.9i,0,%i,%u", __entry->event_type, ++ (int)__entry->ts_sec, ++ (int)__entry->ts_nsec, ++ __entry->pm_event_type, __entry->pm_event_id) ++ ++); ++ ++TRACE_EVENT(mali_timeline_slot_atom, ++ ++ TP_PROTO(u64 ts_sec, ++ u32 ts_nsec, ++ int event_type, ++ int tgid, ++ int js, ++ int atom_id), ++ ++ TP_ARGS(ts_sec, ++ ts_nsec, ++ event_type, ++ tgid, ++ js, ++ atom_id), ++ ++ TP_STRUCT__entry( ++ __field(u64, ts_sec) ++ __field(u32, ts_nsec) ++ __field(int, event_type) ++ __field(int, tgid) ++ __field(int, js) ++ __field(int, atom_id) ++ ), ++ ++ TP_fast_assign( ++ __entry->ts_sec = ts_sec; ++ __entry->ts_nsec = ts_nsec; ++ __entry->event_type = event_type; ++ __entry->tgid = tgid; ++ __entry->js = js; ++ __entry->atom_id = atom_id; ++ ), ++ ++ TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, ++ (int)__entry->ts_sec, ++ (int)__entry->ts_nsec, ++ __entry->tgid, ++ __entry->js, ++ __entry->atom_id) ++); ++ ++TRACE_EVENT(mali_timeline_pm_checktrans, ++ ++ TP_PROTO(u64 ts_sec, ++ u32 ts_nsec, ++ int trans_code, ++ int trans_id), ++ ++ TP_ARGS(ts_sec, ++ ts_nsec, ++ trans_code, ++ trans_id), ++ ++ TP_STRUCT__entry( ++ __field(u64, ts_sec) ++ __field(u32, ts_nsec) ++ __field(int, trans_code) ++ __field(int, trans_id) ++ ), ++ ++ TP_fast_assign( ++ __entry->ts_sec = ts_sec; ++ __entry->ts_nsec = ts_nsec; ++ __entry->trans_code = trans_code; ++ __entry->trans_id = trans_id; ++ ), ++ ++ TP_printk("%i,%i.%.9i,0,%i", __entry->trans_code, ++ (int)__entry->ts_sec, ++ (int)__entry->ts_nsec, ++ __entry->trans_id) ++ ++); ++ ++TRACE_EVENT(mali_timeline_context_active, ++ ++ TP_PROTO(u64 ts_sec, ++ u32 ts_nsec, ++ int count), ++ ++ TP_ARGS(ts_sec, ++ ts_nsec, ++ count), ++ ++ TP_STRUCT__entry( ++ __field(u64, ts_sec) ++ __field(u32, ts_nsec) ++ __field(int, count) ++ ), ++ ++ TP_fast_assign( ++ __entry->ts_sec = ts_sec; ++ __entry->ts_nsec = ts_nsec; ++ __entry->count = count; ++ ), ++ ++ TP_printk("%i,%i.%.9i,0,%i", SW_SET_CONTEXT_ACTIVE, ++ (int)__entry->ts_sec, ++ (int)__entry->ts_nsec, ++ __entry->count) ++); ++ ++#endif /* _MALI_TIMELINE_H */ ++ ++#undef TRACE_INCLUDE_PATH ++#define TRACE_INCLUDE_PATH . ++ ++/* This part must be outside protection */ ++#include ++ +diff --git a/drivers/gpu/arm/midgard/mali_uk.h b/drivers/gpu/arm/midgard/mali_uk.h new file mode 100644 -index 000000000..5906b7cac +index 000000000..841d03fb5 --- /dev/null -+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-hdcp.c -@@ -0,0 +1,748 @@ ++++ b/drivers/gpu/arm/midgard/mali_uk.h +@@ -0,0 +1,141 @@ +/* -+ * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd -+ * Author Huicong Xu + * -+ * This software is licensed under the terms of the GNU General Public -+ * License version 2, as published by the Free Software Foundation, and -+ * may be copied, distributed, and modified under those terms. ++ * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. + * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. + */ + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include + -+#include "dw-hdmi.h" -+#include "dw-hdmi-hdcp.h" + -+#define HDCP_KEY_SIZE 308 -+#define HDCP_KEY_SEED_SIZE 2 + -+#define KSV_LEN 5 -+#define HEADER 10 -+#define SHAMAX 20 + -+#define MAX_DOWNSTREAM_DEVICE_NUM 5 -+#define DPK_WR_OK_TIMEOUT_US 30000 -+#define HDMI_HDCP1X_ID 5 ++/** ++ * @file mali_uk.h ++ * Types and definitions that are common across OSs for both the user ++ * and kernel side of the User-Kernel interface. ++ */ + -+/* HDCP Registers */ -+#define HDMI_HDCPREG_RMCTL 0x780e -+#define HDMI_HDCPREG_RMSTS 0x780f -+#define HDMI_HDCPREG_SEED0 0x7810 -+#define HDMI_HDCPREG_SEED1 0x7811 -+#define HDMI_HDCPREG_DPK0 0x7812 -+#define HDMI_HDCPREG_DPK1 0x7813 -+#define HDMI_HDCPREG_DPK2 0x7814 -+#define HDMI_HDCPREG_DPK3 0x7815 -+#define HDMI_HDCPREG_DPK4 0x7816 -+#define HDMI_HDCPREG_DPK5 0x7817 -+#define HDMI_HDCPREG_DPK6 0x7818 -+#define HDMI_HDCP2REG_CTRL 0x7904 -+#define HDMI_HDCP2REG_MASK 0x790c -+#define HDMI_HDCP2REG_MUTE 0x790e ++#ifndef _UK_H_ ++#define _UK_H_ + -+enum dw_hdmi_hdcp_state { -+ DW_HDCP_DISABLED, -+ DW_HDCP_AUTH_START, -+ DW_HDCP_AUTH_SUCCESS, -+ DW_HDCP_AUTH_FAIL, ++#ifdef __cplusplus ++extern "C" { ++#endif /* __cplusplus */ ++ ++/** ++ * @addtogroup base_api ++ * @{ ++ */ ++ ++/** ++ * @defgroup uk_api User-Kernel Interface API ++ * ++ * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device ++ * drivers developed as part of the Midgard DDK. Currently that includes the Base driver and the UMP driver. ++ * ++ * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent ++ * kernel-side API (UKK) via an OS-specific communication mechanism. ++ * ++ * This API is internal to the Midgard DDK and is not exposed to any applications. ++ * ++ * @{ ++ */ ++ ++/** ++ * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The ++ * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this ++ * identifier to select a UKK client to the uku_open() function. ++ * ++ * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id ++ * enumeration and the uku_open() implemenation for the various OS ports need to be updated to ++ * provide a mapping of the identifier to the OS specific device name. ++ * ++ */ ++enum uk_client_id { ++ /** ++ * Value used to identify the Base driver UK client. ++ */ ++ UK_CLIENT_MALI_T600_BASE, ++ ++ /** The number of uk clients supported. This must be the last member of the enum */ ++ UK_CLIENT_COUNT +}; + -+enum { -+ DW_HDMI_HDCP_KSV_LEN = 8, -+ DW_HDMI_HDCP_SHA_LEN = 20, -+ DW_HDMI_HDCP_DPK_LEN = 280, -+ DW_HDMI_HDCP_KEY_LEN = 308, -+ DW_HDMI_HDCP_SEED_LEN = 2, ++/** ++ * Each function callable through the UK interface has a unique number. ++ * Functions provided by UK clients start from number UK_FUNC_ID. ++ * Numbers below UK_FUNC_ID are used for internal UK functions. ++ */ ++enum uk_func { ++ UKP_FUNC_ID_CHECK_VERSION, /**< UKK Core internal function */ ++ /** ++ * Each UK client numbers the functions they provide starting from ++ * number UK_FUNC_ID. This number is then eventually assigned to the ++ * id field of the union uk_header structure when preparing to make a ++ * UK call. See your UK client for a list of their function numbers. ++ */ ++ UK_FUNC_ID = 512 +}; + -+enum { -+ HDMI_MC_CLKDIS_HDCPCLK_MASK = 0x40, -+ HDMI_MC_CLKDIS_HDCPCLK_ENABLE = 0x00, ++/** ++ * Arguments for a UK call are stored in a structure. This structure consists ++ * of a fixed size header and a payload. The header carries a 32-bit number ++ * identifying the UK function to be called (see uk_func). When the UKK client ++ * receives this header and executed the requested UK function, it will use ++ * the same header to store the result of the function in the form of a ++ * int return code. The size of this structure is such that the ++ * first member of the payload following the header can be accessed efficiently ++ * on a 32 and 64-bit kernel and the structure has the same size regardless ++ * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined ++ * accordingly in the OS specific mali_uk_os.h header file. ++ */ ++union uk_header { ++ /** ++ * 32-bit number identifying the UK function to be called. ++ * Also see uk_func. ++ */ ++ u32 id; ++ /** ++ * The int return code returned by the called UK function. ++ * See the specification of the particular UK function you are ++ * calling for the meaning of the error codes returned. All ++ * UK functions return 0 on success. ++ */ ++ u32 ret; ++ /* ++ * Used to ensure 64-bit alignment of this union. Do not remove. ++ * This field is used for padding and does not need to be initialized. ++ */ ++ u64 sizer; ++}; + -+ HDMI_A_SRMCTRL_SHA1_FAIL_MASK = 0X08, -+ HDMI_A_SRMCTRL_SHA1_FAIL_DISABLE = 0X00, -+ HDMI_A_SRMCTRL_SHA1_FAIL_ENABLE = 0X08, ++/** ++ * This structure carries a 16-bit major and minor number and is sent along with an internal UK call ++ * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side. ++ */ ++struct uku_version_check_args { ++ union uk_header header; ++ /**< UK call header */ ++ u16 major; ++ /**< This field carries the user-side major version on input and the kernel-side major version on output */ ++ u16 minor; ++ /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */ ++ u8 padding[4]; ++}; + -+ HDMI_A_SRMCTRL_KSV_UPDATE_MASK = 0X04, -+ HDMI_A_SRMCTRL_KSV_UPDATE_DISABLE = 0X00, -+ HDMI_A_SRMCTRL_KSV_UPDATE_ENABLE = 0X04, ++/** @} end group uk_api */ + -+ HDMI_A_SRMCTRL_KSV_MEM_REQ_MASK = 0X01, -+ HDMI_A_SRMCTRL_KSV_MEM_REQ_DISABLE = 0X00, -+ HDMI_A_SRMCTRL_KSV_MEM_REQ_ENABLE = 0X01, ++/** @} *//* end group base_api */ + -+ HDMI_A_SRMCTRL_KSV_MEM_ACCESS_MASK = 0X02, -+ HDMI_A_SRMCTRL_KSV_MEM_ACCESS_DISABLE = 0X00, -+ HDMI_A_SRMCTRL_KSV_MEM_ACCESS_ENABLE = 0X02, ++#ifdef __cplusplus ++} ++#endif /* __cplusplus */ ++#endif /* _UK_H_ */ +diff --git a/drivers/gpu/arm/midgard/platform/Kconfig b/drivers/gpu/arm/midgard/platform/Kconfig +new file mode 100644 +index 000000000..8fb4e917c +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/Kconfig +@@ -0,0 +1,24 @@ ++# ++# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+ HDMI_A_SRM_BASE_MAX_DEVS_EXCEEDED = 0x80, -+ HDMI_A_SRM_BASE_DEVICE_COUNT = 0x7f, + -+ HDMI_A_SRM_BASE_MAX_CASCADE_EXCEEDED = 0x08, + -+ HDMI_A_APIINTSTAT_KSVSHA1_CALC_INT = 0x02, + -+ /* HDCPREG_RMSTS field values */ -+ DPK_WR_OK_STS = 0x40, ++# Add your platform specific Kconfig file here ++# ++# "drivers/gpu/arm/midgard/platform/xxx/Kconfig" ++# ++# Where xxx is the platform name is the name set in MALI_PLATFORM_THIRDPARTY_NAME ++# + -+ HDMI_A_HDCP22_MASK = 0x40, +diff --git a/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/drivers/gpu/arm/midgard/platform/devicetree/Kbuild +new file mode 100755 +index 000000000..e888a42fc +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/devicetree/Kbuild +@@ -0,0 +1,18 @@ ++# ++# (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+ HDMI_HDCP2_OVR_EN_MASK = 0x02, -+ HDMI_HDCP2_OVR_ENABLE = 0x02, -+ HDMI_HDCP2_OVR_DISABLE = 0x00, + -+ HDMI_HDCP2_FORCE_MASK = 0x04, -+ HDMI_HDCP2_FORCE_ENABLE = 0x04, -+ HDMI_HDCP2_FORCE_DISABLE = 0x00, -+}; ++mali_kbase-y += \ ++ $(MALI_PLATFORM_THIRDPARTY_DIR)/mali_kbase_config_devicetree.o \ ++ $(MALI_PLATFORM_THIRDPARTY_DIR)/mali_kbase_runtime_pm.o +diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c +new file mode 100644 +index 000000000..b2a7c93f1 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c +@@ -0,0 +1,31 @@ ++/* ++ * ++ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+struct sha_t { -+ u8 mlength[8]; -+ u8 mblock[64]; -+ int mindex; -+ int mcomputed; -+ int mcorrupted; -+ unsigned int mdigest[5]; -+}; + -+static struct dw_hdcp *g_hdcp; + -+static inline unsigned int shacircularshift(unsigned int bits, -+ unsigned int word) ++#include ++ ++int kbase_platform_early_init(void) +{ -+ return (((word << bits) & 0xFFFFFFFF) | (word >> (32 - bits))); ++ /* Nothing needed at this stage */ ++ return 0; +} + -+static void hdcp_modb(struct dw_hdcp *hdcp, u8 data, u8 mask, unsigned int reg) -+{ -+ struct dw_hdmi *hdmi = hdcp->hdmi; -+ u8 val = hdcp->read(hdmi, reg) & ~mask; ++static struct kbase_platform_config dummy_platform_config; + -+ val |= data & mask; -+ hdcp->write(hdmi, val, reg); ++struct kbase_platform_config *kbase_get_platform_config(void) ++{ ++ return &dummy_platform_config; +} +diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h +new file mode 100644 +index 000000000..49e107f98 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h +@@ -0,0 +1,73 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+static void sha_reset(struct sha_t *sha) -+{ -+ u32 i = 0; + -+ sha->mindex = 0; -+ sha->mcomputed = false; -+ sha->mcorrupted = false; -+ for (i = 0; i < sizeof(sha->mlength); i++) -+ sha->mlength[i] = 0; + -+ sha1_init(sha->mdigest); -+} ++/** ++ * Maximum frequency GPU will be clocked at. Given in kHz. ++ * This must be specified as there is no default value. ++ * ++ * Attached value: number in kHz ++ * Default value: NA ++ */ ++#define GPU_FREQ_KHZ_MAX (5000) ++/** ++ * Minimum frequency GPU will be clocked at. Given in kHz. ++ * This must be specified as there is no default value. ++ * ++ * Attached value: number in kHz ++ * Default value: NA ++ */ ++#define GPU_FREQ_KHZ_MIN (5000) + -+static void sha_processblock(struct sha_t *sha) -+{ -+ u32 array[SHA1_WORKSPACE_WORDS]; ++/** ++ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock ++ * ++ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func ++ * for the function prototype. ++ * ++ * Attached value: A kbase_cpu_clk_speed_func. ++ * Default Value: NA ++ */ ++#define CPU_SPEED_FUNC (NULL) + -+ sha1_transform(sha->mdigest, sha->mblock, array); -+ sha->mindex = 0; -+} ++/** ++ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock ++ * ++ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func ++ * for the function prototype. ++ * ++ * Attached value: A kbase_gpu_clk_speed_func. ++ * Default Value: NA ++ */ ++#define GPU_SPEED_FUNC (NULL) + -+static void sha_padmessage(struct sha_t *sha) ++/** ++ * Power management configuration ++ * ++ * Attached value: pointer to @ref kbase_pm_callback_conf ++ * Default value: See @ref kbase_pm_callback_conf ++ */ ++#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) ++ ++/** ++ * Platform specific configuration functions ++ * ++ * Attached value: pointer to @ref kbase_platform_funcs_conf ++ * Default value: See @ref kbase_platform_funcs_conf ++ */ ++#define PLATFORM_FUNCS (NULL) ++ ++extern struct kbase_pm_callback_conf pm_callbacks; +diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c +new file mode 100644 +index 000000000..aa4376afd +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c +@@ -0,0 +1,100 @@ ++/* ++ * ++ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ ++ ++ ++ ++#include ++#include ++#include ++#include ++ ++static int pm_callback_power_on(struct kbase_device *kbdev) +{ -+ /* -+ * Check to see if the current message block is too small to hold -+ * the initial padding bits and length. If so, we will pad the -+ * block, process it, and then continue padding into a second -+ * block. -+ */ -+ if (sha->mindex > 55) { -+ sha->mblock[sha->mindex++] = 0x80; -+ while (sha->mindex < 64) -+ sha->mblock[sha->mindex++] = 0; ++ int ret; + -+ sha_processblock(sha); -+ while (sha->mindex < 56) -+ sha->mblock[sha->mindex++] = 0; -+ } else { -+ sha->mblock[sha->mindex++] = 0x80; -+ while (sha->mindex < 56) -+ sha->mblock[sha->mindex++] = 0; -+ } ++ dev_dbg(kbdev->dev, "pm_callback_power_on %p\n", ++ (void *)kbdev->dev->pm_domain); + -+ /* Store the message length as the last 8 octets */ -+ sha->mblock[56] = sha->mlength[7]; -+ sha->mblock[57] = sha->mlength[6]; -+ sha->mblock[58] = sha->mlength[5]; -+ sha->mblock[59] = sha->mlength[4]; -+ sha->mblock[60] = sha->mlength[3]; -+ sha->mblock[61] = sha->mlength[2]; -+ sha->mblock[62] = sha->mlength[1]; -+ sha->mblock[63] = sha->mlength[0]; ++ ret = pm_runtime_get_sync(kbdev->dev); + -+ sha_processblock(sha); ++ dev_dbg(kbdev->dev, "pm_runtime_get returned %d\n", ret); ++ ++ return 1; +} + -+static int sha_result(struct sha_t *sha) ++static void pm_callback_power_off(struct kbase_device *kbdev) +{ -+ if (sha->mcorrupted) -+ return false; ++ dev_dbg(kbdev->dev, "pm_callback_power_off\n"); + -+ if (sha->mcomputed == 0) { -+ sha_padmessage(sha); -+ sha->mcomputed = true; -+ } -+ return true; ++ pm_runtime_put_autosuspend(kbdev->dev); +} + -+static void sha_input(struct sha_t *sha, const u8 *data, u32 size) ++int kbase_device_runtime_init(struct kbase_device *kbdev) +{ -+ int i = 0; -+ unsigned int j = 0; -+ int rc = true; -+ -+ if (data == 0 || size == 0) -+ return; ++ dev_dbg(kbdev->dev, "kbase_device_runtime_init\n"); ++ pm_runtime_enable(kbdev->dev); + -+ if (sha->mcomputed || sha->mcorrupted) { -+ sha->mcorrupted = true; -+ return; -+ } -+ while (size-- && !sha->mcorrupted) { -+ sha->mblock[sha->mindex++] = *data; ++ return 0; ++} + -+ for (i = 0; i < 8; i++) { -+ rc = true; -+ for (j = 0; j < sizeof(sha->mlength); j++) { -+ sha->mlength[j]++; -+ if (sha->mlength[j] != 0) { -+ rc = false; -+ break; -+ } -+ } -+ sha->mcorrupted = (sha->mcorrupted || -+ rc) ? true : false; -+ } -+ /* if corrupted then message is too long */ -+ if (sha->mindex == 64) -+ sha_processblock(sha); -+ data++; -+ } ++void kbase_device_runtime_disable(struct kbase_device *kbdev) ++{ ++ dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n"); ++ pm_runtime_disable(kbdev->dev); +} + -+static int hdcp_verify_ksv(const u8 *data, u32 size) ++static int pm_callback_runtime_on(struct kbase_device *kbdev) +{ -+ u32 i = 0; -+ struct sha_t sha; ++ dev_dbg(kbdev->dev, "pm_callback_runtime_on\n"); + -+ if ((!data) || (size < (HEADER + SHAMAX))) -+ return false; ++ return 0; ++} + -+ sha_reset(&sha); -+ sha_input(&sha, data, size - SHAMAX); -+ if (sha_result(&sha) == false) -+ return false; ++static void pm_callback_runtime_off(struct kbase_device *kbdev) ++{ ++ dev_dbg(kbdev->dev, "pm_callback_runtime_off\n"); ++} + -+ for (i = 0; i < SHAMAX; i++) { -+ if (data[size - SHAMAX + i] != (u8)(sha.mdigest[i / 4] -+ >> ((i % 4) * 8))) -+ return false; -+ } -+ return true; ++static void pm_callback_resume(struct kbase_device *kbdev) ++{ ++ int ret = pm_callback_runtime_on(kbdev); ++ ++ WARN_ON(ret); +} + -+static int hdcp_load_keys_cb(struct dw_hdcp *hdcp) ++static void pm_callback_suspend(struct kbase_device *kbdev) +{ -+ u32 size; -+ u8 hdcp_vendor_data[320]; ++ pm_callback_runtime_off(kbdev); ++} + -+ hdcp->keys = kmalloc(HDCP_KEY_SIZE, GFP_KERNEL); -+ if (!hdcp->keys) -+ return -ENOMEM; ++struct kbase_pm_callback_conf pm_callbacks = { ++ .power_on_callback = pm_callback_power_on, ++ .power_off_callback = pm_callback_power_off, ++ .power_suspend_callback = pm_callback_suspend, ++ .power_resume_callback = pm_callback_resume, ++#ifdef KBASE_PM_RUNTIME ++ .power_runtime_init_callback = kbase_device_runtime_init, ++ .power_runtime_term_callback = kbase_device_runtime_disable, ++ .power_runtime_on_callback = pm_callback_runtime_on, ++ .power_runtime_off_callback = pm_callback_runtime_off, ++#else /* KBASE_PM_RUNTIME */ ++ .power_runtime_init_callback = NULL, ++ .power_runtime_term_callback = NULL, ++ .power_runtime_on_callback = NULL, ++ .power_runtime_off_callback = NULL, ++#endif /* KBASE_PM_RUNTIME */ ++}; + -+ hdcp->seeds = kmalloc(HDCP_KEY_SEED_SIZE, GFP_KERNEL); -+ if (!hdcp->seeds) { -+ kfree(hdcp->keys); -+ return -ENOMEM; -+ } + -+ size = rk_vendor_read(HDMI_HDCP1X_ID, hdcp_vendor_data, 314); -+ if (size < (HDCP_KEY_SIZE + HDCP_KEY_SEED_SIZE)) { -+ dev_dbg(hdcp->dev, "HDCP: read size %d\n", size); -+ memset(hdcp->keys, 0, HDCP_KEY_SIZE); -+ memset(hdcp->seeds, 0, HDCP_KEY_SEED_SIZE); -+ } else { -+ memcpy(hdcp->keys, hdcp_vendor_data, HDCP_KEY_SIZE); -+ memcpy(hdcp->seeds, hdcp_vendor_data + HDCP_KEY_SIZE, -+ HDCP_KEY_SEED_SIZE); -+ } -+ return 0; -+} +diff --git a/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h +new file mode 100644 +index 000000000..c11085af5 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h +@@ -0,0 +1,28 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+static int dw_hdmi_hdcp_load_key(struct dw_hdcp *hdcp) -+{ -+ int i, j; -+ int ret, val; -+ void __iomem *reg_rmsts_addr; -+ struct hdcp_keys *hdcp_keys; -+ struct dw_hdmi *hdmi = hdcp->hdmi; ++#include + -+ if (!hdcp->keys) { -+ ret = hdcp_load_keys_cb(hdcp); -+ if (ret) -+ return ret; -+ } -+ hdcp_keys = hdcp->keys; + -+ if (hdcp->reg_io_width == 4) -+ reg_rmsts_addr = hdcp->regs + (HDMI_HDCPREG_RMSTS << 2); -+ else if (hdcp->reg_io_width == 1) -+ reg_rmsts_addr = hdcp->regs + HDMI_HDCPREG_RMSTS; -+ else -+ return -EPERM; ++/** ++ * @brief Entry point to transfer control to a platform for early initialization ++ * ++ * This function is called early on in the initialization during execution of ++ * @ref kbase_driver_init. ++ * ++ * @return Zero to indicate success non-zero for failure. ++ */ ++int kbase_platform_early_init(void); ++int kbase_platform_rk_init_opp_table(struct kbase_device *kbdev); +diff --git a/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h +new file mode 100644 +index 000000000..01f9dfce9 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h +@@ -0,0 +1,38 @@ ++/* ++ * ++ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ /* Disable decryption logic */ -+ hdcp->write(hdmi, 0, HDMI_HDCPREG_RMCTL); -+ ret = readx_poll_timeout(readl, reg_rmsts_addr, val, -+ val & DPK_WR_OK_STS, 1000, -+ DPK_WR_OK_TIMEOUT_US); -+ if (ret) -+ return ret; -+ hdcp->write(hdmi, 0, HDMI_HDCPREG_DPK6); -+ hdcp->write(hdmi, 0, HDMI_HDCPREG_DPK5); + -+ /* The useful data in ksv should be 5 byte */ -+ for (i = 4; i >= 0; i--) -+ hdcp->write(hdmi, hdcp_keys->KSV[i], HDMI_HDCPREG_DPK0 + i); -+ ret = readx_poll_timeout(readl, reg_rmsts_addr, val, -+ val & DPK_WR_OK_STS, 1000, -+ DPK_WR_OK_TIMEOUT_US); + -+ if (ret) -+ return ret; ++#ifdef CONFIG_MALI_PLATFORM_FAKE + -+ /* Enable decryption logic */ -+ if (hdcp->seeds) { -+ hdcp->write(hdmi, 1, HDMI_HDCPREG_RMCTL); -+ hdcp->write(hdmi, hdcp->seeds[0], HDMI_HDCPREG_SEED1); -+ hdcp->write(hdmi, hdcp->seeds[1], HDMI_HDCPREG_SEED0); -+ } else { -+ hdcp->write(hdmi, 0, HDMI_HDCPREG_RMCTL); -+ } ++/** ++ * kbase_platform_fake_register - Entry point for fake platform registration ++ * ++ * This function is called early on in the initialization during execution of ++ * kbase_driver_init. ++ * ++ * Return: 0 to indicate success, non-zero for failure. ++ */ ++int kbase_platform_fake_register(void); + -+ /* Write encrypt device private key */ -+ for (i = 0; i < DW_HDMI_HDCP_DPK_LEN - 6; i += 7) { -+ for (j = 6; j >= 0; j--) -+ hdcp->write(hdmi, hdcp_keys->devicekey[i + j], -+ HDMI_HDCPREG_DPK0 + j); -+ ret = readx_poll_timeout(readl, reg_rmsts_addr, val, -+ val & DPK_WR_OK_STS, 1000, -+ DPK_WR_OK_TIMEOUT_US); ++/** ++ * kbase_platform_fake_unregister - Entry point for fake platform unregistration ++ * ++ * This function is called in the termination during execution of ++ * kbase_driver_exit. ++ */ ++void kbase_platform_fake_unregister(void); + -+ if (ret) -+ return ret; -+ } -+ return 0; -+} ++#endif /* CONFIG_MALI_PLATFORM_FAKE */ +diff --git a/drivers/gpu/arm/midgard/platform/rk/Kbuild b/drivers/gpu/arm/midgard/platform/rk/Kbuild +new file mode 100755 +index 000000000..db993487e +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/rk/Kbuild +@@ -0,0 +1,17 @@ ++# ++# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+static int dw_hdmi_hdcp_start(struct dw_hdcp *hdcp) -+{ -+ struct dw_hdmi *hdmi = hdcp->hdmi; ++midgard_kbase-y += \ ++ $(MALI_PLATFORM_THIRDPARTY_DIR)/mali_kbase_config_rk.o + -+ if (!hdcp->enable) -+ return -EPERM; +diff --git a/drivers/gpu/arm/midgard/platform/rk/custom_log.h b/drivers/gpu/arm/midgard/platform/rk/custom_log.h +new file mode 100644 +index 000000000..fe5e12241 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/rk/custom_log.h +@@ -0,0 +1,209 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ---------------------------------------------------------------------------- ++ * File: custom_log.h ++ * ++ * Desc: ChenZhen å好的 log 输出的定制实现. ++ * ++ * -------------------------------------------------------------------- ++ * < 习语 å’Œ 缩略语 > : ++ * ++ * -------------------------------------------------------------------- ++ * Usage: ++ * ++ * Note: ++ * ++ * Author: ChenZhen ++ * ++ * ---------------------------------------------------------------------------- ++ * Version: ++ * v1.0 ++ * ---------------------------------------------------------------------------- ++ * Log: ++ ----Fri Nov 19 15:20:28 2010 v1.0 ++ * ++ * ---------------------------------------------------------------------------- ++ */ + -+ if (!(hdcp->read(hdmi, HDMI_HDCPREG_RMSTS) & 0x3f)) -+ dw_hdmi_hdcp_load_key(hdcp); ++#ifndef __CUSTOM_LOG_H__ ++#define __CUSTOM_LOG_H__ + -+ hdcp_modb(hdcp, HDMI_FC_INVIDCONF_HDCP_KEEPOUT_ACTIVE, -+ HDMI_FC_INVIDCONF_HDCP_KEEPOUT_MASK, -+ HDMI_FC_INVIDCONF); ++#ifdef __cplusplus ++extern "C" { ++#endif + -+ hdcp->remaining_times = hdcp->retry_times; -+ if (hdcp->read(hdmi, HDMI_CONFIG1_ID) & HDMI_A_HDCP22_MASK) { -+ if (hdcp->hdcp2_enable == 0) { -+ hdcp_modb(hdcp, HDMI_HDCP2_OVR_ENABLE | -+ HDMI_HDCP2_FORCE_DISABLE, -+ HDMI_HDCP2_OVR_EN_MASK | -+ HDMI_HDCP2_FORCE_MASK, -+ HDMI_HDCP2REG_CTRL); -+ hdcp->write(hdmi, 0xff, HDMI_HDCP2REG_MASK); -+ hdcp->write(hdmi, 0xff, HDMI_HDCP2REG_MUTE); -+ } else { -+ hdcp_modb(hdcp, HDMI_HDCP2_OVR_DISABLE | -+ HDMI_HDCP2_FORCE_DISABLE, -+ HDMI_HDCP2_OVR_EN_MASK | -+ HDMI_HDCP2_FORCE_MASK, -+ HDMI_HDCP2REG_CTRL); -+ hdcp->write(hdmi, 0x00, HDMI_HDCP2REG_MASK); -+ hdcp->write(hdmi, 0x00, HDMI_HDCP2REG_MUTE); -+ } -+ } ++/* ----------------------------------------------------------------------------- ++ * Include Files ++ * ----------------------------------------------------------------------------- ++ */ ++#include ++#include + -+ hdcp->write(hdmi, 0x40, HDMI_A_OESSWCFG); -+ hdcp_modb(hdcp, HDMI_A_HDCPCFG0_BYPENCRYPTION_DISABLE | -+ HDMI_A_HDCPCFG0_EN11FEATURE_DISABLE | -+ HDMI_A_HDCPCFG0_SYNCRICHECK_ENABLE, -+ HDMI_A_HDCPCFG0_BYPENCRYPTION_MASK | -+ HDMI_A_HDCPCFG0_EN11FEATURE_MASK | -+ HDMI_A_HDCPCFG0_SYNCRICHECK_MASK, HDMI_A_HDCPCFG0); ++/* ----------------------------------------------------------------------------- ++ * Macros Definition ++ * ----------------------------------------------------------------------------- ++ */ + -+ hdcp_modb(hdcp, HDMI_A_HDCPCFG1_ENCRYPTIONDISABLE_ENABLE | -+ HDMI_A_HDCPCFG1_PH2UPSHFTENC_ENABLE, -+ HDMI_A_HDCPCFG1_ENCRYPTIONDISABLE_MASK | -+ HDMI_A_HDCPCFG1_PH2UPSHFTENC_MASK, HDMI_A_HDCPCFG1); ++/** 若下列 macro 有被定义, æ‰ ä½¿èƒ½ log 输出. */ ++/* #define ENABLE_DEBUG_LOG */ + -+ /* Reset HDCP Engine */ -+ if (hdcp->read(hdmi, HDMI_MC_CLKDIS) & HDMI_MC_CLKDIS_HDCPCLK_MASK) { -+ hdcp_modb(hdcp, HDMI_A_HDCPCFG1_SWRESET_ASSERT, -+ HDMI_A_HDCPCFG1_SWRESET_MASK, HDMI_A_HDCPCFG1); -+ } ++/*----------------------------------------------------------------------------*/ + -+ hdcp->write(hdmi, 0x00, HDMI_A_APIINTMSK); -+ hdcp_modb(hdcp, HDMI_A_HDCPCFG0_RXDETECT_ENABLE, -+ HDMI_A_HDCPCFG0_RXDETECT_MASK, HDMI_A_HDCPCFG0); ++#ifdef ENABLE_VERBOSE_LOG ++/** Verbose log. */ ++#define V(fmt, args...) \ ++ pr_debug("V : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ ++ "\n", \ ++ __FILE__, \ ++ __LINE__, \ ++ __func__, \ ++ ## args) ++#else ++#define V(...) ((void)0) ++#endif + -+ /* -+ * XXX: to sleep 100ms here between output hdmi and enable hdcpclk, -+ * otherwise hdcp auth fail when Connect to repeater -+ */ -+ msleep(100); -+ hdcp_modb(hdcp, HDMI_MC_CLKDIS_HDCPCLK_ENABLE, -+ HDMI_MC_CLKDIS_HDCPCLK_MASK, HDMI_MC_CLKDIS); ++#ifdef ENABLE_DEBUG_LOG ++/** Debug log. */ ++#define D(fmt, args...) \ ++ pr_info("D : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ ++ "\n", \ ++ __FILE__, \ ++ __LINE__, \ ++ __func__, \ ++ ## args) ++#else ++#define D(...) ((void)0) ++#endif + -+ hdcp->status = DW_HDCP_AUTH_START; -+ dev_dbg(hdcp->dev, "%s success\n", __func__); -+ return 0; -+} ++#define I(fmt, args...) \ ++ pr_info("I : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ ++ "\n", \ ++ __FILE__, \ ++ __LINE__, \ ++ __func__, \ ++ ## args) + -+static int dw_hdmi_hdcp_stop(struct dw_hdcp *hdcp) -+{ -+ struct dw_hdmi *hdmi = hdcp->hdmi; ++#define W(fmt, args...) \ ++ pr_warn("W : [File] : %s; [Line] : %d; [Func] : %s(); " \ ++ fmt "\n", \ ++ __FILE__, \ ++ __LINE__, \ ++ __func__, \ ++ ## args) + -+ if (!hdcp->enable) -+ return -EPERM; ++#define E(fmt, args...) \ ++ pr_err("E : [File] : %s; [Line] : %d; [Func] : %s(); " fmt \ ++ "\n", \ ++ __FILE__, \ ++ __LINE__, \ ++ __func__, \ ++ ## args) + -+ hdcp_modb(hdcp, HDMI_MC_CLKDIS_HDCPCLK_DISABLE, -+ HDMI_MC_CLKDIS_HDCPCLK_MASK, HDMI_MC_CLKDIS); -+ hdcp->write(hdmi, 0xff, HDMI_A_APIINTMSK); ++/*-------------------------------------------------------*/ + -+ hdcp_modb(hdcp, HDMI_A_HDCPCFG0_RXDETECT_DISABLE, -+ HDMI_A_HDCPCFG0_RXDETECT_MASK, HDMI_A_HDCPCFG0); ++/** 使用 D(), 以åè¿›åˆ¶çš„å½¢å¼æ‰“å°å˜é‡ 'var' çš„ value. */ ++#define D_DEC(var) D(#var " = %d.", var) + -+ hdcp_modb(hdcp, HDMI_A_SRMCTRL_SHA1_FAIL_DISABLE | -+ HDMI_A_SRMCTRL_KSV_UPDATE_DISABLE, -+ HDMI_A_SRMCTRL_SHA1_FAIL_MASK | -+ HDMI_A_SRMCTRL_KSV_UPDATE_MASK, HDMI_A_SRMCTRL); ++#define E_DEC(var) E(#var " = %d.", var) + -+ hdcp->status = DW_HDCP_DISABLED; -+ return 0; -+} ++/** 使用 D(), 以åå…­è¿›åˆ¶çš„å½¢å¼æ‰“å°å˜é‡ 'var' çš„ value. */ ++#define D_HEX(var) D(#var " = 0x%x.", var) + -+static int dw_hdmi_hdcp_ksvsha1(struct dw_hdcp *hdcp) -+{ -+ int rc = 0, value, list, i; -+ char bstaus0, bstaus1; -+ char *ksvlistbuf; -+ struct dw_hdmi *hdmi = hdcp->hdmi; ++#define E_HEX(var) E(#var " = 0x%x.", var) + -+ hdcp_modb(hdcp, HDMI_A_SRMCTRL_KSV_MEM_REQ_ENABLE, -+ HDMI_A_SRMCTRL_KSV_MEM_REQ_MASK, HDMI_A_SRMCTRL); ++/** ++ * 使用 D(), 以å六进制的形å¼, ++ * æ‰“å°æŒ‡é’ˆç±»åž‹å˜é‡ 'ptr' çš„ value. ++ */ ++#define D_PTR(ptr) D(#ptr " = %p.", ptr) + -+ list = 20; -+ do { -+ value = hdcp->read(hdmi, HDMI_A_SRMCTRL); -+ usleep_range(500, 1000); -+ } while ((value & HDMI_A_SRMCTRL_KSV_MEM_ACCESS_MASK) == 0 && --list); ++#define E_PTR(ptr) E(#ptr " = %p.", ptr) + -+ if ((value & HDMI_A_SRMCTRL_KSV_MEM_ACCESS_MASK) == 0) { -+ dev_err(hdcp->dev, "KSV memory can not access\n"); -+ rc = -EPERM; -+ goto out; -+ } ++/** 使用 D(), æ‰“å° char 字串. */ ++#define D_STR(p_str) \ ++do { \ ++ if (!p_str) { \ ++ D(#p_str " = NULL."); \ ++ else \ ++ D(#p_str " = '%s'.", p_str); \ ++} while (0) + -+ hdcp->read(hdmi, HDMI_A_SRM_BASE); -+ bstaus0 = hdcp->read(hdmi, HDMI_A_SRM_BASE + 1); -+ bstaus1 = hdcp->read(hdmi, HDMI_A_SRM_BASE + 2); ++#define E_STR(p_str) \ ++do { \ ++ if (!p_str) \ ++ E(#p_str " = NULL."); \ ++ else \ ++ E(#p_str " = '%s'.", p_str); \ ++} while (0) + -+ if (bstaus0 & HDMI_A_SRM_BASE_MAX_DEVS_EXCEEDED) { -+ dev_err(hdcp->dev, "MAX_DEVS_EXCEEDED\n"); -+ rc = -EPERM; -+ goto out; -+ } ++#ifdef ENABLE_DEBUG_LOG ++/** ++ * log 从 'p_start' 地å€å¼€å§‹çš„ 'len' 个字节的数æ®. ++ */ ++#define D_MEM(p_start, len) \ ++do { \ ++ int i = 0; \ ++ char *p = (char *)(p_start); \ ++ D("dump memory from addr of '" #p_start "', from %p, length %d' : ", \ ++ (p_start), \ ++ (len)); \ ++ pr_debug("\t\t"); \ ++ for (i = 0; i < (len); i++) \ ++ pr_debug("0x%02x, ", p[i]); \ ++ pr_debug("\n"); \ ++} while (0) ++#else ++#define D_MEM(...) ((void)0) ++#endif + -+ list = bstaus0 & HDMI_A_SRM_BASE_DEVICE_COUNT; -+ if (list > MAX_DOWNSTREAM_DEVICE_NUM) { -+ dev_err(hdcp->dev, "MAX_DOWNSTREAM_DEVICE_NUM\n"); -+ rc = -EPERM; -+ goto out; -+ } -+ if (bstaus1 & HDMI_A_SRM_BASE_MAX_CASCADE_EXCEEDED) { -+ dev_err(hdcp->dev, "MAX_CASCADE_EXCEEDED\n"); -+ rc = -EPERM; -+ goto out; -+ } ++/*-------------------------------------------------------*/ + -+ value = (list * KSV_LEN) + HEADER + SHAMAX; -+ ksvlistbuf = kmalloc(value, GFP_KERNEL); -+ if (!ksvlistbuf) { -+ rc = -ENOMEM; -+ goto out; -+ } ++/** ++ * 在特定æ¡ä»¶ä¸‹, 判定 error å‘生, ++ * å°†å˜é‡ 'ret_var' 设置 'err_code', ++ * log 输出对应的 Error Caution, ++ * ç„¶åŽè·³è½¬ 'label' 指定的代ç å¤„执行. ++ * @param msg ++ * 纯字串形å¼çš„æç¤ºä¿¡æ¯. ++ * @param ret_var ++ * æ ‡è¯†å‡½æ•°æ‰§è¡ŒçŠ¶æ€æˆ–者结果的å˜é‡, ++ * 将被设置具体的 Error Code. ++ * 通常是 'ret' or 'result'. ++ * @param err_code ++ * 表å¾ç‰¹å®š error 的常数标识, ++ * 通常是 å®çš„å½¢æ€. ++ * @param label ++ * 程åºå°†è¦è·³è½¬åˆ°çš„错误处ç†ä»£ç çš„æ ‡å·, ++ * 通常就是 'EXIT'. ++ * @param args... ++ * 对应 'msg_fmt' 实å‚中, ++ * '%s', '%d', ... 等转æ¢è¯´æ˜Žç¬¦çš„具体å¯å˜é•¿å®žå‚. ++ */ ++#define SET_ERROR_AND_JUMP(msg_fmt, ret_var, err_code, label, args...) \ ++do { \ ++ E("To set '" #ret_var "' to %d('" #err_code "'), because : " msg_fmt, \ ++ (err_code), \ ++ ## args); \ ++ (ret_var) = (err_code); \ ++ goto label; \ ++} while (0) + -+ ksvlistbuf[(list * KSV_LEN)] = bstaus0; -+ ksvlistbuf[(list * KSV_LEN) + 1] = bstaus1; -+ for (i = 2; i < value; i++) { -+ if (i < HEADER) /* BSTATUS & M0 */ -+ ksvlistbuf[(list * KSV_LEN) + i] = -+ hdcp->read(hdmi, HDMI_A_SRM_BASE + i + 1); -+ else if (i < (HEADER + (list * KSV_LEN))) /* KSV list */ -+ ksvlistbuf[i - HEADER] = -+ hdcp->read(hdmi, HDMI_A_SRM_BASE + i + 1); -+ else /* SHA */ -+ ksvlistbuf[i] = -+ hdcp->read(hdmi, HDMI_A_SRM_BASE + i + 1); -+ } -+ if (hdcp_verify_ksv(ksvlistbuf, value) == true) { -+ rc = 0; -+ dev_dbg(hdcp->dev, "ksv check valid\n"); -+ } else { -+ dev_err(hdcp->dev, "ksv check invalid\n"); -+ rc = -1; -+ } -+ kfree(ksvlistbuf); -+out: -+ hdcp_modb(hdcp, HDMI_A_SRMCTRL_KSV_MEM_REQ_DISABLE, -+ HDMI_A_SRMCTRL_KSV_MEM_REQ_MASK, HDMI_A_SRMCTRL); -+ return rc; -+} ++/* ----------------------------------------------------------------------------- ++ * Types and Structures Definition ++ * ----------------------------------------------------------------------------- ++ */ + -+static void dw_hdmi_hdcp_2nd_auth(struct dw_hdcp *hdcp) -+{ -+ if (dw_hdmi_hdcp_ksvsha1(hdcp)) -+ hdcp_modb(hdcp, HDMI_A_SRMCTRL_SHA1_FAIL_ENABLE | -+ HDMI_A_SRMCTRL_KSV_UPDATE_ENABLE, -+ HDMI_A_SRMCTRL_SHA1_FAIL_MASK | -+ HDMI_A_SRMCTRL_KSV_UPDATE_MASK, HDMI_A_SRMCTRL); -+ else -+ hdcp_modb(hdcp, HDMI_A_SRMCTRL_SHA1_FAIL_DISABLE | -+ HDMI_A_SRMCTRL_KSV_UPDATE_ENABLE, -+ HDMI_A_SRMCTRL_SHA1_FAIL_MASK | -+ HDMI_A_SRMCTRL_KSV_UPDATE_MASK, HDMI_A_SRMCTRL); -+} ++/* ----------------------------------------------------------------------------- ++ * Global Functions' Prototype ++ * ----------------------------------------------------------------------------- ++ */ + -+static void dw_hdmi_hdcp_isr(struct dw_hdcp *hdcp, int hdcp_int) -+{ -+ dev_dbg(hdcp->dev, "hdcp_int is 0x%02x\n", hdcp_int); -+ if (hdcp_int & HDMI_A_APIINTSTAT_KSVSHA1_CALC_INT) { -+ dev_dbg(hdcp->dev, "hdcp sink is a repeater\n"); -+ dw_hdmi_hdcp_2nd_auth(hdcp); -+ } -+ if (hdcp_int & 0x40) { -+ hdcp->status = DW_HDCP_AUTH_FAIL; -+ if (hdcp->remaining_times > 1) -+ hdcp->remaining_times--; -+ else if (hdcp->remaining_times == 1) -+ hdcp_modb(hdcp, -+ HDMI_A_HDCPCFG1_ENCRYPTIONDISABLE_DISABLE, -+ HDMI_A_HDCPCFG1_ENCRYPTIONDISABLE_MASK, -+ HDMI_A_HDCPCFG1); -+ } -+ if (hdcp_int & 0x80) { -+ dev_dbg(hdcp->dev, "hdcp auth success\n"); -+ hdcp->status = DW_HDCP_AUTH_SUCCESS; -+ } ++/* ----------------------------------------------------------------------------- ++ * Inline Functions Implementation ++ * ----------------------------------------------------------------------------- ++ */ ++ ++#ifdef __cplusplus +} ++#endif + -+static ssize_t hdcp_enable_read(struct device *device, -+ struct device_attribute *attr, char *buf) -+{ -+ bool enable = 0; -+ struct dw_hdcp *hdcp = g_hdcp; ++#endif /* __CUSTOM_LOG_H__ */ +diff --git a/drivers/gpu/arm/midgard/platform/rk/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/rk/mali_kbase_config_platform.h +new file mode 100644 +index 000000000..07c5b6f8a +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/rk/mali_kbase_config_platform.h +@@ -0,0 +1,88 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ */ + -+ if (hdcp) -+ enable = hdcp->enable; ++/** ++ * @file mali_kbase_config_platform.h ++ * 声明 platform_config_of_rk (platform_rk çš„ platform_config). ++ */ + -+ return snprintf(buf, PAGE_SIZE, "%d\n", enable); -+} ++/** ++ * Maximum frequency GPU will be clocked at. ++ * Given in kHz. ++ * This must be specified as there is no default value. ++ * ++ * Attached value: number in kHz ++ * Default value: NA ++ */ ++#define GPU_FREQ_KHZ_MAX (5000) + -+static ssize_t hdcp_enable_write(struct device *device, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ bool enable; -+ struct dw_hdcp *hdcp = g_hdcp; ++/** ++ * Minimum frequency GPU will be clocked at. ++ * Given in kHz. ++ * This must be specified as there is no default value. ++ * ++ * Attached value: number in kHz ++ * Default value: NA ++ */ ++#define GPU_FREQ_KHZ_MIN (5000) + -+ if (!hdcp) -+ return -EINVAL; ++/** ++ * CPU_SPEED_FUNC ++ * - A pointer to a function that calculates the CPU clock ++ * ++ * CPU clock speed of the platform is in MHz ++ * - see kbase_cpu_clk_speed_func for the function prototype. ++ * ++ * Attached value: A kbase_cpu_clk_speed_func. ++ * Default Value: NA ++ */ ++#define CPU_SPEED_FUNC (NULL) + -+ if (kstrtobool(buf, &enable)) -+ return -EINVAL; ++/** ++ * GPU_SPEED_FUNC ++ * - A pointer to a function that calculates the GPU clock ++ * ++ * GPU clock speed of the platform in MHz ++ * - see kbase_gpu_clk_speed_func for the function prototype. ++ * ++ * Attached value: A kbase_gpu_clk_speed_func. ++ * Default Value: NA ++ */ ++#define GPU_SPEED_FUNC (NULL) + -+ if (hdcp->enable != enable) { -+ if (enable) { -+ hdcp->enable = enable; -+ if (hdcp->read(hdcp->hdmi, HDMI_PHY_STAT0) & -+ HDMI_PHY_HPD) -+ dw_hdmi_hdcp_start(hdcp); -+ } else { -+ dw_hdmi_hdcp_stop(hdcp); -+ hdcp->enable = enable; -+ } -+ } ++/** ++ * Power management configuration ++ * ++ * Attached value: ++ * pointer to @ref kbase_pm_callback_conf ++ * Default value: ++ * See @ref kbase_pm_callback_conf ++ */ ++#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) ++extern struct kbase_pm_callback_conf pm_callbacks; + -+ return count; -+} ++/** ++ * Platform specific configuration functions ++ * ++ * Attached value: ++ * pointer to @ref kbase_platform_funcs_conf ++ * Default value: ++ * See @ref kbase_platform_funcs_conf ++ */ ++#define PLATFORM_FUNCS (&platform_funcs) ++extern struct kbase_platform_funcs_conf platform_funcs; + -+static DEVICE_ATTR(enable, 0644, hdcp_enable_read, hdcp_enable_write); ++/** ++ * Secure mode switch ++ * ++ * Attached value: pointer to @ref kbase_secure_ops ++ */ ++#define SECURE_CALLBACKS (NULL) + -+static ssize_t hdcp_trytimes_read(struct device *device, -+ struct device_attribute *attr, char *buf) -+{ -+ int trytimes = 0; -+ struct dw_hdcp *hdcp = g_hdcp; +diff --git a/drivers/gpu/arm/midgard/platform/rk/mali_kbase_config_rk.c b/drivers/gpu/arm/midgard/platform/rk/mali_kbase_config_rk.c +new file mode 100644 +index 000000000..8ad910c12 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/rk/mali_kbase_config_rk.c +@@ -0,0 +1,492 @@ ++/* ++ * ++ * (C) COPYRIGHT ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ */ + -+ if (hdcp) -+ trytimes = hdcp->retry_times; ++/* #define ENABLE_DEBUG_LOG */ ++#include "custom_log.h" + -+ return snprintf(buf, PAGE_SIZE, "%d\n", trytimes); ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "mali_kbase_rk.h" ++ ++/** ++ * @file mali_kbase_config_rk.c ++ * 对 platform_config_of_rk 的具体实现. ++ * ++ * mali_device_driver 包å«ä¸¤éƒ¨åˆ† : ++ * .DP : platform_dependent_part_in_mdd : ++ * ä¾èµ– platform 部分, ++ * æºç åœ¨ /platform// ++ * 在 mali_device_driver 内部, ++ * 记为 platform_dependent_part, ++ * 也被记为 platform_specific_code. ++ * .DP : common_parts_in_mdd : ++ * arm 实现的通用的部分, ++ * æºç åœ¨ / 下. ++ * 在 mali_device_driver 内部, 记为 common_parts. ++ */ ++ ++/*---------------------------------------------------------------------------*/ ++ ++#ifdef CONFIG_REGULATOR ++static int rk_pm_enable_regulator(struct kbase_device *kbdev); ++static void rk_pm_disable_regulator(struct kbase_device *kbdev); ++#else ++static inline int rk_pm_enable_regulator(struct kbase_device *kbdev) ++{ ++ return 0; +} + -+static ssize_t hdcp_trytimes_write(struct device *device, -+ struct device_attribute *attr, -+ const char *buf, size_t count) ++static inline void rk_pm_disable_regulator(struct kbase_device *kbdev) +{ -+ int trytimes; -+ struct dw_hdcp *hdcp = g_hdcp; ++} ++#endif + -+ if (!hdcp) -+ return -EINVAL; ++static int rk_pm_enable_clk(struct kbase_device *kbdev); + -+ if (kstrtoint(buf, 0, &trytimes)) -+ return -EINVAL; ++static void rk_pm_disable_clk(struct kbase_device *kbdev); + -+ if (hdcp->retry_times != trytimes) { -+ hdcp->retry_times = trytimes; -+ hdcp->remaining_times = hdcp->retry_times; -+ } ++static int kbase_platform_rk_create_sysfs_files(struct device *dev); + -+ return count; -+} ++static void kbase_platform_rk_remove_sysfs_files(struct device *dev); + -+static DEVICE_ATTR(trytimes, 0644, hdcp_trytimes_read, hdcp_trytimes_write); ++/*---------------------------------------------------------------------------*/ + -+static ssize_t hdcp_status_read(struct device *device, -+ struct device_attribute *attr, char *buf) ++static void rk_pm_power_off_delay_work(struct work_struct *work) +{ -+ int status = DW_HDCP_DISABLED; -+ struct dw_hdcp *hdcp = g_hdcp; ++ struct rk_context *platform = ++ container_of(to_delayed_work(work), struct rk_context, work); ++ struct kbase_device *kbdev = platform->kbdev; + -+ if (hdcp) -+ status = hdcp->status; ++ if (!platform->is_powered) { ++ D("mali_dev is already powered off."); ++ return; ++ } + -+ if (status == DW_HDCP_DISABLED) -+ return snprintf(buf, PAGE_SIZE, "hdcp disable\n"); -+ else if (status == DW_HDCP_AUTH_START) -+ return snprintf(buf, PAGE_SIZE, "hdcp_auth_start\n"); -+ else if (status == DW_HDCP_AUTH_SUCCESS) -+ return snprintf(buf, PAGE_SIZE, "hdcp_auth_success\n"); -+ else if (status == DW_HDCP_AUTH_FAIL) -+ return snprintf(buf, PAGE_SIZE, "hdcp_auth_fail\n"); -+ else -+ return snprintf(buf, PAGE_SIZE, "unknown status\n"); -+} ++ if (pm_runtime_enabled(kbdev->dev)) { ++ D("to put_sync_suspend mali_dev."); ++ pm_runtime_put_sync_suspend(kbdev->dev); ++ } + -+static DEVICE_ATTR(status, 0444, hdcp_status_read, NULL); ++ rk_pm_disable_regulator(kbdev); + -+static int dw_hdmi_hdcp_probe(struct platform_device *pdev) ++ platform->is_powered = false; ++ KBASE_TIMELINE_GPU_POWER(kbdev, 0); ++ wake_unlock(&platform->wake_lock); ++} ++ ++static int kbase_platform_rk_init(struct kbase_device *kbdev) +{ + int ret = 0; -+ struct dw_hdcp *hdcp = pdev->dev.platform_data; -+ -+ g_hdcp = hdcp; -+ hdcp->mdev.minor = MISC_DYNAMIC_MINOR; -+ hdcp->mdev.name = "hdmi_hdcp1x"; -+ hdcp->mdev.mode = 0666; ++ struct rk_context *platform; + -+ if (misc_register(&hdcp->mdev)) { -+ dev_err(&pdev->dev, "HDCP: Could not add character driver\n"); -+ return -EINVAL; ++ platform = kzalloc(sizeof(*platform), GFP_KERNEL); ++ if (!platform) { ++ E("err."); ++ return -ENOMEM; + } + -+ ret = device_create_file(hdcp->mdev.this_device, &dev_attr_enable); -+ if (ret) { -+ dev_err(&pdev->dev, "HDCP: Could not add sys file enable\n"); -+ ret = -EINVAL; -+ goto error0; -+ } ++ platform->is_powered = false; ++ platform->kbdev = kbdev; + -+ ret = device_create_file(hdcp->mdev.this_device, &dev_attr_trytimes); -+ if (ret) { -+ dev_err(&pdev->dev, "HDCP: Could not add sys file trytimes\n"); -+ ret = -EINVAL; -+ goto error1; ++ platform->delay_ms = 200; ++ if (of_property_read_u32(kbdev->dev->of_node, "power-off-delay-ms", ++ &platform->delay_ms)) ++ W("power-off-delay-ms not available."); ++ ++ platform->power_off_wq = create_freezable_workqueue("gpu_power_off_wq"); ++ if (!platform->power_off_wq) { ++ E("couldn't create workqueue"); ++ ret = -ENOMEM; ++ goto err_wq; + } ++ INIT_DEFERRABLE_WORK(&platform->work, rk_pm_power_off_delay_work); + -+ ret = device_create_file(hdcp->mdev.this_device, &dev_attr_status); ++ wake_lock_init(&platform->wake_lock, WAKE_LOCK_SUSPEND, "gpu"); ++ ++ platform->utilisation_period = DEFAULT_UTILISATION_PERIOD_IN_MS; ++ ++ ret = kbase_platform_rk_create_sysfs_files(kbdev->dev); + if (ret) { -+ dev_err(&pdev->dev, "HDCP: Could not add sys file status\n"); -+ ret = -EINVAL; -+ goto error2; ++ E("fail to create sysfs_files. ret = %d.", ret); ++ goto err_sysfs_files; + } + -+ /* retry time if hdcp auth fail. unlimited time if set 0 */ -+ hdcp->retry_times = 0; -+ hdcp->dev = &pdev->dev; -+ hdcp->hdcp_start = dw_hdmi_hdcp_start; -+ hdcp->hdcp_stop = dw_hdmi_hdcp_stop; -+ hdcp->hdcp_isr = dw_hdmi_hdcp_isr; -+ dev_dbg(hdcp->dev, "%s success\n", __func__); ++ kbdev->platform_context = (void *)platform; ++ pm_runtime_enable(kbdev->dev); ++ + return 0; + -+error2: -+ device_remove_file(hdcp->mdev.this_device, &dev_attr_trytimes); -+error1: -+ device_remove_file(hdcp->mdev.this_device, &dev_attr_enable); -+error0: -+ misc_deregister(&hdcp->mdev); ++err_sysfs_files: ++ wake_lock_destroy(&platform->wake_lock); ++ destroy_workqueue(platform->power_off_wq); ++err_wq: + return ret; +} + -+static int dw_hdmi_hdcp_remove(struct platform_device *pdev) ++static void kbase_platform_rk_term(struct kbase_device *kbdev) +{ -+ struct dw_hdcp *hdcp = pdev->dev.platform_data; ++ struct rk_context *platform = ++ (struct rk_context *)kbdev->platform_context; + -+ device_remove_file(hdcp->mdev.this_device, &dev_attr_trytimes); -+ device_remove_file(hdcp->mdev.this_device, &dev_attr_enable); -+ device_remove_file(hdcp->mdev.this_device, &dev_attr_status); -+ misc_deregister(&hdcp->mdev); ++ pm_runtime_disable(kbdev->dev); ++ kbdev->platform_context = NULL; + -+ kfree(hdcp->keys); -+ kfree(hdcp->seeds); ++ if (platform) { ++ cancel_delayed_work_sync(&platform->work); ++ wake_lock_destroy(&platform->wake_lock); ++ destroy_workqueue(platform->power_off_wq); ++ platform->is_powered = false; ++ platform->kbdev = NULL; ++ kfree(platform); ++ } ++ kbase_platform_rk_remove_sysfs_files(kbdev->dev); ++} ++ ++struct kbase_platform_funcs_conf platform_funcs = { ++ .platform_init_func = &kbase_platform_rk_init, ++ .platform_term_func = &kbase_platform_rk_term, ++}; ++ ++/*---------------------------------------------------------------------------*/ + ++static int rk_pm_callback_runtime_on(struct kbase_device *kbdev) ++{ + return 0; +} + -+static struct platform_driver dw_hdmi_hdcp_driver = { -+ .probe = dw_hdmi_hdcp_probe, -+ .remove = dw_hdmi_hdcp_remove, -+ .driver = { -+ .name = DW_HDCP_DRIVER_NAME, -+ }, -+}; ++static void rk_pm_callback_runtime_off(struct kbase_device *kbdev) ++{ ++} + -+module_platform_driver(dw_hdmi_hdcp_driver); -+MODULE_DESCRIPTION("DW HDMI transmitter HDCP driver"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-hdcp.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-hdcp.h -new file mode 100644 -index 000000000..d138f91f3 ---- /dev/null -+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-hdcp.h -@@ -0,0 +1,54 @@ -+/* -+ * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd -+ * Author Huicong Xu -+ * -+ * This software is licensed under the terms of the GNU General Public -+ * License version 2, as published by the Free Software Foundation, and -+ * may be copied, distributed, and modified under those terms. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#ifndef DW_HDMI_HDCP_H -+#define DW_HDMI_HDCP_H -+ -+#include -+ -+#define DW_HDCP_DRIVER_NAME "dw-hdmi-hdcp" -+#define HDCP_PRIVATE_KEY_SIZE 280 -+#define HDCP_KEY_SHA_SIZE 20 -+ -+struct hdcp_keys { -+ u8 KSV[8]; -+ u8 devicekey[HDCP_PRIVATE_KEY_SIZE]; -+ u8 sha1[HDCP_KEY_SHA_SIZE]; -+}; -+ -+struct dw_hdcp { -+ bool enable; -+ int retry_times; -+ int remaining_times; -+ char *seeds; -+ int invalidkey; -+ char *invalidkeys; -+ int hdcp2_enable; -+ int status; -+ u32 reg_io_width; -+ -+ struct miscdevice mdev; -+ struct hdcp_keys *keys; -+ struct device *dev; -+ struct dw_hdmi *hdmi; -+ void __iomem *regs; -+ -+ void (*write)(struct dw_hdmi *hdmi, u8 val, int offset); -+ u8 (*read)(struct dw_hdmi *hdmi, int offset); -+ int (*hdcp_start)(struct dw_hdcp *hdcp); -+ int (*hdcp_stop)(struct dw_hdcp *hdcp); -+ void (*hdcp_isr)(struct dw_hdcp *hdcp, int hdcp_int); -+}; -+ -+#endif -diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c -index 26c187d20..194c96faa 100644 ---- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c -+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c -@@ -34,6 +34,14 @@ static inline u8 hdmi_read(struct dw_hdmi_i2s_audio_data *audio, int offset) - return audio->read(hdmi, offset); - } - -+static inline void hdmi_update_bits(struct dw_hdmi_i2s_audio_data *audio, -+ u8 data, u8 mask, unsigned int reg) ++static int rk_pm_callback_power_on(struct kbase_device *kbdev) +{ -+ struct dw_hdmi *hdmi = audio->hdmi; ++ int ret = 1; /* Assume GPU has been powered off */ ++ int err = 0; ++ struct rk_context *platform = get_rk_context(kbdev); + -+ audio->mod(hdmi, data, mask, reg); -+} ++ cancel_delayed_work_sync(&platform->work); + - static int dw_hdmi_i2s_hw_params(struct device *dev, void *data, - struct hdmi_codec_daifmt *fmt, - struct hdmi_codec_params *hparms) -@@ -42,6 +50,7 @@ static int dw_hdmi_i2s_hw_params(struct device *dev, void *data, - struct dw_hdmi *hdmi = audio->hdmi; - u8 conf0 = 0; - u8 conf1 = 0; -+ u8 conf2 = 0; - u8 inputclkfs = 0; - - /* it cares I2S only */ -@@ -51,7 +60,8 @@ static int dw_hdmi_i2s_hw_params(struct device *dev, void *data, - } - - /* Reset the FIFOs before applying new params */ -- hdmi_write(audio, HDMI_AUD_CONF0_SW_RESET, HDMI_AUD_CONF0); -+ hdmi_update_bits(audio, HDMI_AUD_CONF0_SW_RESET, -+ HDMI_AUD_CONF0_SW_RESET, HDMI_AUD_CONF0); - hdmi_write(audio, (u8)~HDMI_MC_SWRSTZ_I2SSWRST_REQ, HDMI_MC_SWRSTZ); - - inputclkfs = HDMI_AUD_INPUTCLKFS_64FS; -@@ -101,6 +111,23 @@ static int dw_hdmi_i2s_hw_params(struct device *dev, void *data, - return -EINVAL; - } - -+ switch (fmt->bit_fmt) { -+ case SNDRV_PCM_FORMAT_IEC958_SUBFRAME_LE: -+ conf1 = HDMI_AUD_CONF1_WIDTH_21; -+ conf2 = (hparms->channels == 8) ? HDMI_AUD_CONF2_HBR : HDMI_AUD_CONF2_NLPCM; -+ break; -+ default: -+ /* -+ * dw-hdmi introduced insert_pcuv bit in version 2.10a. -+ * When set (1'b1), this bit enables the insertion of the PCUV -+ * (Parity, Channel Status, User bit and Validity) bits on the -+ * incoming audio stream (support limited to Linear PCM audio) -+ */ -+ if (hdmi_read(audio, HDMI_DESIGN_ID) >= 0x21) -+ conf2 = HDMI_AUD_CONF2_INSERT_PCUV; -+ break; ++ err = rk_pm_enable_clk(kbdev); ++ if (err) { ++ E("failed to enable clk: %d", err); ++ return err; + } + - dw_hdmi_set_sample_rate(hdmi, hparms->sample_rate); - dw_hdmi_set_channel_status(hdmi, hparms->iec.status); - dw_hdmi_set_channel_count(hdmi, hparms->channels); -@@ -109,10 +136,18 @@ static int dw_hdmi_i2s_hw_params(struct device *dev, void *data, - hdmi_write(audio, inputclkfs, HDMI_AUD_INPUTCLKFS); - hdmi_write(audio, conf0, HDMI_AUD_CONF0); - hdmi_write(audio, conf1, HDMI_AUD_CONF1); -+ hdmi_write(audio, conf2, HDMI_AUD_CONF2); - - return 0; - } - -+static int dw_hdmi_i2s_prepare(struct device *dev, void *data, -+ struct hdmi_codec_daifmt *fmt, -+ struct hdmi_codec_params *hparms) -+{ -+ return dw_hdmi_i2s_hw_params(dev, data, fmt, hparms); -+} -+ - static int dw_hdmi_i2s_audio_startup(struct device *dev, void *data) - { - struct dw_hdmi_i2s_audio_data *audio = data; -@@ -179,6 +214,7 @@ static int dw_hdmi_i2s_hook_plugged_cb(struct device *dev, void *data, - - static const struct hdmi_codec_ops dw_hdmi_i2s_ops = { - .hw_params = dw_hdmi_i2s_hw_params, -+ .prepare = dw_hdmi_i2s_prepare, - .audio_startup = dw_hdmi_i2s_audio_startup, - .audio_shutdown = dw_hdmi_i2s_audio_shutdown, - .get_eld = dw_hdmi_i2s_get_eld, -@@ -193,7 +229,6 @@ static int snd_dw_hdmi_probe(struct platform_device *pdev) - struct hdmi_codec_pdata pdata; - struct platform_device *platform; - -- memset(&pdata, 0, sizeof(pdata)); - pdata.ops = &dw_hdmi_i2s_ops; - pdata.i2s = 1; - pdata.max_i2s_channels = 8; -diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp-audio.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp-audio.h -new file mode 100644 -index 000000000..93f1a4295 ---- /dev/null -+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp-audio.h -@@ -0,0 +1,29 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (c) 2021 Rockchip Electronics Co. Ltd. -+ * Author: Sugar Zhang -+ */ -+ -+#ifndef DW_HDMI_QP_AUDIO_H -+#define DW_HDMI_QP_AUDIO_H -+ -+struct dw_hdmi_qp; -+ -+struct dw_hdmi_qp_audio_data { -+ phys_addr_t phys; -+ void __iomem *base; -+ int irq; -+ struct dw_hdmi_qp *hdmi; -+ u8 *eld; -+}; -+ -+struct dw_hdmi_qp_i2s_audio_data { -+ struct dw_hdmi_qp *hdmi; -+ u8 *eld; -+ -+ void (*write)(struct dw_hdmi_qp *hdmi, u32 val, int offset); -+ u32 (*read)(struct dw_hdmi_qp *hdmi, int offset); -+ void (*mod)(struct dw_hdmi_qp *hdmi, u32 val, u32 mask, u32 reg); -+}; -+ -+#endif -diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp-cec.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp-cec.c -new file mode 100644 -index 000000000..2d2320ed0 ---- /dev/null -+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp-cec.c -@@ -0,0 +1,336 @@ -+// SPDX-License-Identifier: GPL-2.0+ -+/* -+ * Copyright (C) Rockchip Electronics Co.Ltd -+ * Author: -+ * Algea Cao -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+ -+#include "dw-hdmi-qp-cec.h" -+ -+enum { -+ CEC_TX_CONTROL = 0x1000, -+ CEC_CTRL_CLEAR = BIT(0), -+ CEC_CTRL_START = BIT(0), -+ -+ CEC_STAT_DONE = BIT(0), -+ CEC_STAT_NACK = BIT(1), -+ CEC_STAT_ARBLOST = BIT(2), -+ CEC_STAT_LINE_ERR = BIT(3), -+ CEC_STAT_RETRANS_FAIL = BIT(4), -+ CEC_STAT_DISCARD = BIT(5), -+ CEC_STAT_TX_BUSY = BIT(8), -+ CEC_STAT_RX_BUSY = BIT(9), -+ CEC_STAT_DRIVE_ERR = BIT(10), -+ CEC_STAT_EOM = BIT(11), -+ CEC_STAT_NOTIFY_ERR = BIT(12), ++ if (platform->is_powered) { ++ D("mali_device is already powered."); ++ return 0; ++ } + -+ CEC_CONFIG = 0x1008, -+ CEC_ADDR = 0x100c, -+ CEC_TX_CNT = 0x1020, -+ CEC_RX_CNT = 0x1040, -+ CEC_TX_DATA3_0 = 0x1024, -+ CEC_RX_DATA3_0 = 0x1044, -+ CEC_LOCK_CONTROL = 0x1054, ++ /* we must enable vdd_gpu before pd_gpu_in_chip. */ ++ err = rk_pm_enable_regulator(kbdev); ++ if (err) { ++ E("fail to enable regulator, err : %d.", err); ++ return err; ++ } + -+ CEC_INT_STATUS = 0x4000, -+ CEC_INT_MASK_N = 0x4004, -+ CEC_INT_CLEAR = 0x4008, -+}; ++ /* è‹¥ mali_dev çš„ runtime_pm 是 enabled çš„, 则... */ ++ if (pm_runtime_enabled(kbdev->dev)) { ++ D("to resume mali_dev syncly."); ++ /* 对 pd_in_chip çš„ on æ“作, ++ * 将在 pm_domain çš„ runtime_pm_callbacks 中完æˆ. ++ */ ++ err = pm_runtime_get_sync(kbdev->dev); ++ if (err < 0) { ++ E("failed to runtime resume device: %d.", err); ++ return err; ++ } else if (err == 1) { /* runtime_pm_status is still active */ ++ D("chip has NOT been powered off, no need to re-init."); ++ ret = 0; ++ } ++ } + -+struct dw_hdmi_qp_cec { -+ struct dw_hdmi_qp *hdmi; -+ const struct dw_hdmi_qp_cec_ops *ops; -+ u32 addresses; -+ struct cec_adapter *adap; -+ struct cec_msg rx_msg; -+ unsigned int tx_status; -+ bool tx_done; -+ bool rx_done; -+ struct cec_notifier *notify; -+ int irq; -+}; ++ platform->is_powered = true; ++ KBASE_TIMELINE_GPU_POWER(kbdev, 1); ++ wake_lock(&platform->wake_lock); + -+static void dw_hdmi_qp_write(struct dw_hdmi_qp_cec *cec, u32 val, int offset) -+{ -+ cec->ops->write(cec->hdmi, val, offset); ++ return ret; +} + -+static u32 dw_hdmi_qp_read(struct dw_hdmi_qp_cec *cec, int offset) ++static void rk_pm_callback_power_off(struct kbase_device *kbdev) +{ -+ return cec->ops->read(cec->hdmi, offset); ++ struct rk_context *platform = get_rk_context(kbdev); ++ ++ rk_pm_disable_clk(kbdev); ++ queue_delayed_work(platform->power_off_wq, &platform->work, ++ msecs_to_jiffies(platform->delay_ms)); +} + -+static int dw_hdmi_qp_cec_log_addr(struct cec_adapter *adap, u8 logical_addr) ++int rk_kbase_device_runtime_init(struct kbase_device *kbdev) +{ -+ struct dw_hdmi_qp_cec *cec = cec_get_drvdata(adap); -+ -+ if (logical_addr == CEC_LOG_ADDR_INVALID) -+ cec->addresses = 0; -+ else -+ cec->addresses |= BIT(logical_addr) | BIT(15); -+ -+ dw_hdmi_qp_write(cec, cec->addresses, CEC_ADDR); -+ + return 0; +} + -+static int dw_hdmi_qp_cec_transmit(struct cec_adapter *adap, u8 attempts, -+ u32 signal_free_time, struct cec_msg *msg) ++void rk_kbase_device_runtime_disable(struct kbase_device *kbdev) +{ -+ struct dw_hdmi_qp_cec *cec = cec_get_drvdata(adap); -+ unsigned int i; -+ u32 val; -+ -+ for (i = 0; i < msg->len; i++) { -+ if (!(i % 4)) -+ val = msg->msg[i]; -+ if ((i % 4) == 1) -+ val |= msg->msg[i] << 8; -+ if ((i % 4) == 2) -+ val |= msg->msg[i] << 16; -+ if ((i % 4) == 3) -+ val |= msg->msg[i] << 24; -+ -+ if (i == (msg->len - 1) || (i % 4) == 3) -+ dw_hdmi_qp_write(cec, val, CEC_TX_DATA3_0 + (i / 4) * 4); -+ } ++} + -+ dw_hdmi_qp_write(cec, msg->len - 1, CEC_TX_CNT); -+ dw_hdmi_qp_write(cec, CEC_CTRL_START, CEC_TX_CONTROL); ++struct kbase_pm_callback_conf pm_callbacks = { ++ .power_on_callback = rk_pm_callback_power_on, ++ .power_off_callback = rk_pm_callback_power_off, ++#ifdef CONFIG_PM ++ .power_runtime_init_callback = rk_kbase_device_runtime_init, ++ .power_runtime_term_callback = rk_kbase_device_runtime_disable, ++ .power_runtime_on_callback = rk_pm_callback_runtime_on, ++ .power_runtime_off_callback = rk_pm_callback_runtime_off, ++#else /* CONFIG_PM */ ++ .power_runtime_init_callback = NULL, ++ .power_runtime_term_callback = NULL, ++ .power_runtime_on_callback = NULL, ++ .power_runtime_off_callback = NULL, ++#endif /* CONFIG_PM */ ++}; + ++int kbase_platform_early_init(void) ++{ ++ /* Nothing needed at this stage */ + return 0; +} + -+static irqreturn_t dw_hdmi_qp_cec_hardirq(int irq, void *data) ++/*---------------------------------------------------------------------------*/ ++ ++void kbase_platform_rk_shutdown(struct kbase_device *kbdev) +{ -+ struct cec_adapter *adap = data; -+ struct dw_hdmi_qp_cec *cec = cec_get_drvdata(adap); -+ u32 stat = dw_hdmi_qp_read(cec, CEC_INT_STATUS); -+ irqreturn_t ret = IRQ_HANDLED; ++ I("to make vdd_gpu enabled for turning off pd_gpu in pm_framework."); ++ rk_pm_enable_regulator(kbdev); ++} + -+ if (stat == 0) -+ return IRQ_NONE; ++/*---------------------------------------------------------------------------*/ + -+ dw_hdmi_qp_write(cec, stat, CEC_INT_CLEAR); ++#ifdef CONFIG_REGULATOR ++static int rk_pm_enable_regulator(struct kbase_device *kbdev) ++{ ++ int ret = 0; + -+ if (stat & CEC_STAT_LINE_ERR) { -+ cec->tx_status = CEC_TX_STATUS_ERROR; -+ cec->tx_done = true; -+ ret = IRQ_WAKE_THREAD; -+ } else if (stat & CEC_STAT_DONE) { -+ cec->tx_status = CEC_TX_STATUS_OK; -+ cec->tx_done = true; -+ ret = IRQ_WAKE_THREAD; -+ } else if (stat & CEC_STAT_NACK) { -+ cec->tx_status = CEC_TX_STATUS_NACK; -+ cec->tx_done = true; -+ ret = IRQ_WAKE_THREAD; ++ if (!kbdev->regulator) { ++ W("no mali regulator control, no need to enable."); ++ goto EXIT; + } + -+ if (stat & CEC_STAT_EOM) { -+ unsigned int len, i, val; -+ -+ val = dw_hdmi_qp_read(cec, CEC_RX_CNT); -+ len = (val & 0xf) + 1; -+ -+ if (len > sizeof(cec->rx_msg.msg)) -+ len = sizeof(cec->rx_msg.msg); -+ -+ for (i = 0; i < 4; i++) { -+ val = dw_hdmi_qp_read(cec, CEC_RX_DATA3_0 + i * 4); -+ cec->rx_msg.msg[i * 4] = val & 0xff; -+ cec->rx_msg.msg[i * 4 + 1] = (val >> 8) & 0xff; -+ cec->rx_msg.msg[i * 4 + 2] = (val >> 16) & 0xff; -+ cec->rx_msg.msg[i * 4 + 3] = (val >> 24) & 0xff; -+ } -+ -+ dw_hdmi_qp_write(cec, 1, CEC_LOCK_CONTROL); -+ -+ cec->rx_msg.len = len; -+ cec->rx_done = true; -+ -+ ret = IRQ_WAKE_THREAD; ++ D("to enable regulator."); ++ ret = regulator_enable(kbdev->regulator); ++ if (ret) { ++ E("fail to enable regulator, ret : %d.", ret); ++ goto EXIT; + } + ++EXIT: + return ret; +} + -+static irqreturn_t dw_hdmi_qp_cec_thread(int irq, void *data) ++static void rk_pm_disable_regulator(struct kbase_device *kbdev) +{ -+ struct cec_adapter *adap = data; -+ struct dw_hdmi_qp_cec *cec = cec_get_drvdata(adap); -+ -+ if (cec->tx_done) { -+ cec->tx_done = false; -+ cec_transmit_attempt_done(adap, cec->tx_status); -+ } -+ if (cec->rx_done) { -+ cec->rx_done = false; -+ cec_received_msg(adap, &cec->rx_msg); ++ if (!(kbdev->regulator)) { ++ W("no mali regulator control, no need to disable."); ++ return; + } -+ return IRQ_HANDLED; ++ ++ D("to disable regulator."); ++ regulator_disable(kbdev->regulator); +} ++#endif + -+static int dw_hdmi_qp_cec_enable(struct cec_adapter *adap, bool enable) ++static int rk_pm_enable_clk(struct kbase_device *kbdev) +{ -+ struct dw_hdmi_qp_cec *cec = cec_get_drvdata(adap); ++ int err = 0; + -+ if (!enable) { -+ dw_hdmi_qp_write(cec, 0, CEC_INT_MASK_N); -+ dw_hdmi_qp_write(cec, ~0, CEC_INT_CLEAR); -+ cec->ops->disable(cec->hdmi); ++ if (!(kbdev->clock)) { ++ W("no mali clock control, no need to enable."); + } else { -+ unsigned int irqs; -+ -+ cec->ops->enable(cec->hdmi); -+ -+ dw_hdmi_qp_write(cec, ~0, CEC_INT_CLEAR); -+ dw_hdmi_qp_write(cec, 1, CEC_LOCK_CONTROL); ++ D("to enable clk."); ++ err = clk_enable(kbdev->clock); ++ if (err) ++ E("failed to enable clk: %d.", err); ++ } + -+ dw_hdmi_qp_cec_log_addr(cec->adap, CEC_LOG_ADDR_INVALID); ++ return err; ++} + -+ irqs = CEC_STAT_LINE_ERR | CEC_STAT_NACK | CEC_STAT_EOM | -+ CEC_STAT_DONE; -+ dw_hdmi_qp_write(cec, ~0, CEC_INT_CLEAR); -+ dw_hdmi_qp_write(cec, irqs, CEC_INT_MASK_N); ++static void rk_pm_disable_clk(struct kbase_device *kbdev) ++{ ++ if (!(kbdev->clock)) { ++ W("no mali clock control, no need to disable."); ++ } else { ++ D("to disable clk."); ++ clk_disable(kbdev->clock); + } -+ return 0; +} + -+static const struct cec_adap_ops dw_hdmi_qp_cec_ops = { -+ .adap_enable = dw_hdmi_qp_cec_enable, -+ .adap_log_addr = dw_hdmi_qp_cec_log_addr, -+ .adap_transmit = dw_hdmi_qp_cec_transmit, -+}; ++/*---------------------------------------------------------------------------*/ + -+static void dw_hdmi_qp_cec_del(void *data) ++static ssize_t utilisation_period_show(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) +{ -+ struct dw_hdmi_qp_cec *cec = data; ++ struct kbase_device *kbdev = dev_get_drvdata(dev); ++ struct rk_context *platform = get_rk_context(kbdev); ++ ssize_t ret = 0; + -+ cec_delete_adapter(cec->adap); ++ ret += snprintf(buf, PAGE_SIZE, "%u\n", platform->utilisation_period); ++ ++ return ret; +} + -+static int dw_hdmi_qp_cec_probe(struct platform_device *pdev) ++static ssize_t utilisation_period_store(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, ++ size_t count) +{ -+ struct dw_hdmi_qp_cec_data *data = dev_get_platdata(&pdev->dev); -+ struct dw_hdmi_qp_cec *cec; -+ int ret; ++ struct kbase_device *kbdev = dev_get_drvdata(dev); ++ struct rk_context *platform = get_rk_context(kbdev); ++ int ret = 0; + -+ if (!data) { -+ dev_err(&pdev->dev, "can't get data\n"); -+ return -ENXIO; ++ ret = kstrtouint(buf, 0, &platform->utilisation_period); ++ if (ret) { ++ E("invalid input period : %s.", buf); ++ return ret; + } ++ D("set utilisation_period to '%d'.", platform->utilisation_period); + -+ /* -+ * Our device is just a convenience - we want to link to the real -+ * hardware device here, so that userspace can see the association -+ * between the HDMI hardware and its associated CEC chardev. -+ */ -+ cec = devm_kzalloc(&pdev->dev, sizeof(*cec), GFP_KERNEL); -+ if (!cec) -+ return -ENOMEM; ++ return count; ++} + -+ cec->ops = data->ops; -+ cec->hdmi = data->hdmi; -+ cec->irq = data->irq; ++static ssize_t utilisation_show(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct kbase_device *kbdev = dev_get_drvdata(dev); ++ struct rk_context *platform = get_rk_context(kbdev); ++ ssize_t ret = 0; ++ unsigned long period_in_us = platform->utilisation_period * 1000; ++ unsigned long total_time; ++ unsigned long busy_time; ++ unsigned long utilisation; + -+ platform_set_drvdata(pdev, cec); ++ kbase_pm_reset_dvfs_utilisation(kbdev); ++ usleep_range(period_in_us, period_in_us + 100); ++ kbase_pm_get_dvfs_utilisation(kbdev, &total_time, &busy_time); ++ /* 'devfreq_dev_profile' instance registered to devfreq ++ * also uses kbase_pm_reset_dvfs_utilisation ++ * and kbase_pm_get_dvfs_utilisation. ++ * it's better to cat this file when DVFS is disabled. ++ */ ++ D("total_time : %lu, busy_time : %lu.", total_time, busy_time); + -+ dw_hdmi_qp_write(cec, 0, CEC_TX_CNT); -+ dw_hdmi_qp_write(cec, ~0, CEC_INT_CLEAR); -+ dw_hdmi_qp_write(cec, 0, CEC_INT_MASK_N); ++ utilisation = busy_time * 100 / total_time; ++ ret += snprintf(buf, PAGE_SIZE, "%ld\n", utilisation); + -+ cec->adap = cec_allocate_adapter(&dw_hdmi_qp_cec_ops, cec, "dw_hdmi_qp", -+ CEC_CAP_LOG_ADDRS | CEC_CAP_TRANSMIT | -+ CEC_CAP_RC | CEC_CAP_PASSTHROUGH, -+ CEC_MAX_LOG_ADDRS); -+ if (IS_ERR(cec->adap)) { -+ dev_err(&pdev->dev, "cec allocate adapter failed\n"); -+ return PTR_ERR(cec->adap); -+ } ++ return ret; ++} + -+ dw_hdmi_qp_set_cec_adap(cec->hdmi, cec->adap); ++static DEVICE_ATTR_RW(utilisation_period); ++static DEVICE_ATTR_RO(utilisation); + -+ /* override the module pointer */ -+ cec->adap->owner = THIS_MODULE; ++static int kbase_platform_rk_create_sysfs_files(struct device *dev) ++{ ++ int ret = 0; + -+ ret = devm_add_action(&pdev->dev, dw_hdmi_qp_cec_del, cec); ++ ret = device_create_file(dev, &dev_attr_utilisation_period); + if (ret) { -+ dev_err(&pdev->dev, "cec add action failed\n"); -+ cec_delete_adapter(cec->adap); -+ return ret; -+ } -+ -+ if (cec->irq < 0) { -+ ret = cec->irq; -+ dev_err(&pdev->dev, "cec get irq failed\n"); -+ return ret; -+ } -+ -+ ret = devm_request_threaded_irq(&pdev->dev, cec->irq, -+ dw_hdmi_qp_cec_hardirq, -+ dw_hdmi_qp_cec_thread, IRQF_SHARED, -+ "dw-hdmi-qp-cec", cec->adap); -+ if (ret < 0) { -+ dev_err(&pdev->dev, "cec request irq thread failed\n"); -+ return ret; ++ E("fail to create sysfs file 'utilisation_period'."); ++ goto out; + } + -+ cec->notify = cec_notifier_cec_adap_register(pdev->dev.parent, -+ NULL, cec->adap); -+ if (!cec->notify) { -+ dev_err(&pdev->dev, "cec notifier adap register failed\n"); -+ return -ENOMEM; ++ ret = device_create_file(dev, &dev_attr_utilisation); ++ if (ret) { ++ E("fail to create sysfs file 'utilisation'."); ++ goto remove_utilisation_period; + } + -+ ret = cec_register_adapter(cec->adap, pdev->dev.parent); -+ if (ret < 0) { -+ dev_err(&pdev->dev, "cec adap register failed\n"); -+ cec_notifier_cec_adap_unregister(cec->notify, cec->adap); -+ return ret; -+ } ++ return 0; + -+ /* -+ * CEC documentation says we must not call cec_delete_adapter -+ * after a successful call to cec_register_adapter(). -+ */ -+ devm_remove_action(&pdev->dev, dw_hdmi_qp_cec_del, cec); ++remove_utilisation_period: ++ device_remove_file(dev, &dev_attr_utilisation_period); ++out: ++ return ret; ++} + -+ return 0; ++static void kbase_platform_rk_remove_sysfs_files(struct device *dev) ++{ ++ device_remove_file(dev, &dev_attr_utilisation_period); ++ device_remove_file(dev, &dev_attr_utilisation); +} + -+static int dw_hdmi_qp_cec_remove(struct platform_device *pdev) ++static int rk3288_get_soc_info(struct device *dev, struct device_node *np, ++ int *bin, int *process) +{ -+ struct dw_hdmi_qp_cec *cec = platform_get_drvdata(pdev); ++ int ret = -EINVAL; ++ u8 value = 0; ++ char *name; + -+ cec_notifier_cec_adap_unregister(cec->notify, cec->adap); -+ cec_unregister_adapter(cec->adap); ++ if (!bin) ++ goto out; + -+ return 0; ++ if (soc_is_rk3288w()) ++ name = "performance-w"; ++ else ++ name = "performance"; ++ if (of_property_match_string(np, "nvmem-cell-names", name) >= 0) { ++ ret = rockchip_nvmem_cell_read_u8(np, name, &value); ++ if (ret) { ++ dev_err(dev, "Failed to get soc performance value\n"); ++ goto out; ++ } ++ if (value & 0x2) ++ *bin = 3; ++ else if (value & 0x01) ++ *bin = 2; ++ else ++ *bin = 0; ++ } else { ++ dev_err(dev, "Failed to get bin config\n"); ++ } ++ if (*bin >= 0) ++ dev_info(dev, "bin=%d\n", *bin); ++ ++out: ++ return ret; +} + -+static struct platform_driver dw_hdmi_qp_cec_driver = { -+ .probe = dw_hdmi_qp_cec_probe, -+ .remove = dw_hdmi_qp_cec_remove, -+ .driver = { -+ .name = "dw-hdmi-qp-cec", ++static const struct rockchip_opp_data rk3288_gpu_opp_data = { ++ .get_soc_info = rk3288_get_soc_info, ++}; ++ ++static const struct of_device_id rockchip_mali_of_match[] = { ++ { ++ .compatible = "rockchip,rk3288", ++ .data = (void *)&rk3288_gpu_opp_data, ++ }, ++ { ++ .compatible = "rockchip,rk3288w", ++ .data = (void *)&rk3288_gpu_opp_data, + }, ++ {}, +}; -+module_platform_driver(dw_hdmi_qp_cec_driver); + -+MODULE_AUTHOR("Algea Cao "); -+MODULE_DESCRIPTION("Synopsys Designware HDMI QP CEC driver"); -+MODULE_LICENSE("GPL"); -+MODULE_ALIAS(PLATFORM_MODULE_PREFIX "dw-hdmi-qp-cec"); -diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp-cec.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp-cec.h ++int kbase_platform_rk_init_opp_table(struct kbase_device *kbdev) ++{ ++ rockchip_get_opp_data(rockchip_mali_of_match, &kbdev->opp_info); ++ ++ return rockchip_init_opp_table(kbdev->dev, &kbdev->opp_info, ++ "gpu_leakage", "mali"); ++} +diff --git a/drivers/gpu/arm/midgard/platform/rk/mali_kbase_rk.h b/drivers/gpu/arm/midgard/platform/rk/mali_kbase_rk.h new file mode 100644 -index 000000000..c0977c612 +index 000000000..6eab25014 --- /dev/null -+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp-cec.h -@@ -0,0 +1,25 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (C) Rockchip Electronics Co.Ltd -+ * Author: -+ * Algea Cao ++++ b/drivers/gpu/arm/midgard/platform/rk/mali_kbase_rk.h +@@ -0,0 +1,62 @@ ++/* drivers/gpu/t6xx/kbase/src/platform/rk/mali_kbase_platform.h ++ * Rockchip SoC Mali-Midgard platform-dependent codes ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software FoundatIon. + */ -+#ifndef DW_HDMI_QP_CEC_H -+#define DW_HDMI_QP_CEC_H + -+struct dw_hdmi_qp; ++/** ++ * @file mali_kbase_rk.h ++ * ++ * defines work_context type of platform_dependent_part. ++ */ + -+struct dw_hdmi_qp_cec_ops { -+ void (*enable)(struct dw_hdmi_qp *hdmi); -+ void (*disable)(struct dw_hdmi_qp *hdmi); -+ void (*write)(struct dw_hdmi_qp *hdmi, u32 val, int offset); -+ u32 (*read)(struct dw_hdmi_qp *hdmi, int offset); -+}; ++#ifndef _MALI_KBASE_RK_H_ ++#define _MALI_KBASE_RK_H_ + -+struct dw_hdmi_qp_cec_data { -+ struct dw_hdmi_qp *hdmi; -+ const struct dw_hdmi_qp_cec_ops *ops; -+ int irq; -+}; ++#include ++ ++/*---------------------------------------------------------------------------*/ ++ ++#define DEFAULT_UTILISATION_PERIOD_IN_MS (100) ++ ++/*---------------------------------------------------------------------------*/ + -+#endif -diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp-hdcp.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp-hdcp.c -new file mode 100644 -index 000000000..388890d60 ---- /dev/null -+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp-hdcp.c -@@ -0,0 +1,650 @@ -+// SPDX-License-Identifier: GPL-2.0+ +/* -+ * Copyright (C) Rockchip Electronics Co.Ltd -+ * Author: -+ * Algea Cao ++ * struct rk_context - work_context of platform_dependent_part_of_rk. + */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++struct rk_context { ++ /* ++ * record the status of common_parts calling 'power_on_callback' ++ * and 'power_off_callback'. ++ */ ++ bool is_powered; + -+#include "dw-hdmi-qp.h" -+#include "dw-hdmi-qp-hdcp.h" ++ struct kbase_device *kbdev; + -+#define HDCP_KEY_SIZE 308 -+#define HDCP_KEY_SEED_SIZE 2 ++ struct workqueue_struct *power_off_wq; ++ /* delayed_work_to_power_off_gpu. */ ++ struct delayed_work work; ++ unsigned int delay_ms; + -+#define KSV_LEN 5 -+#define HEADER 10 -+#define SHAMAX 20 ++ /* ++ * WAKE_LOCK_SUSPEND for ensuring to run ++ * delayed_work_to_power_off_gpu before suspend. ++ */ ++ struct wake_lock wake_lock; + -+#define MAX_DOWNSTREAM_DEVICE_NUM 5 -+#define DPK_WR_OK_TIMEOUT_US 30000 -+#define HDMI_HDCP1X_ID 5 ++ /* debug only, the period in ms to count gpu_utilisation. */ ++ unsigned int utilisation_period; ++}; + -+/* HDCP Registers */ -+#define HDMI_HDCPREG_RMCTL 0x780e -+#define HDMI_HDCPREG_RMSTS 0x780f -+#define HDMI_HDCPREG_SEED0 0x7810 -+#define HDMI_HDCPREG_SEED1 0x7811 -+#define HDMI_HDCPREG_DPK0 0x7812 -+#define HDMI_HDCPREG_DPK1 0x7813 -+#define HDMI_HDCPREG_DPK2 0x7814 -+#define HDMI_HDCPREG_DPK3 0x7815 -+#define HDMI_HDCPREG_DPK4 0x7816 -+#define HDMI_HDCPREG_DPK5 0x7817 -+#define HDMI_HDCPREG_DPK6 0x7818 -+#define HDMI_HDCP2REG_CTRL 0x7904 -+#define HDMI_HDCP2REG_MASK 0x790c -+#define HDMI_HDCP2REG_MUTE 0x790e ++/*---------------------------------------------------------------------------*/ + -+enum dw_hdmi_hdcp_state { -+ DW_HDCP_DISABLED, -+ DW_HDCP_AUTH_START, -+ DW_HDCP_AUTH_SUCCESS, -+ DW_HDCP_AUTH_FAIL, -+}; ++static inline struct rk_context *get_rk_context( ++ const struct kbase_device *kbdev) ++{ ++ return (struct rk_context *)(kbdev->platform_context); ++} + -+enum { -+ DW_HDMI_HDCP_KSV_LEN = 8, -+ DW_HDMI_HDCP_SHA_LEN = 20, -+ DW_HDMI_HDCP_DPK_LEN = 280, -+ DW_HDMI_HDCP_KEY_LEN = 308, -+ DW_HDMI_HDCP_SEED_LEN = 2, -+}; ++#endif /* _MALI_KBASE_RK_H_ */ + -+enum { -+ HDCP14_R0_TIMER_OVR_EN_MASK = 0x01, -+ HDCP14_R0_TIMER_OVR_EN = 0x01, -+ HDCP14_R0_TIMER_OVR_DISABLE = 0x00, +diff --git a/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress/Kbuild +new file mode 100755 +index 000000000..1caa29366 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/vexpress/Kbuild +@@ -0,0 +1,18 @@ ++# ++# (C) COPYRIGHT 2012-2013, 2016 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+ HDCP14_RI_TIMER_OVR_EN_MASK = 0x80, -+ HDCP14_RI_TIMER_OVR_EN = 0x80, -+ HDCP14_RI_TIMER_OVR_DISABLE = 0x00, + -+ HDCP14_R0_TIMER_OVR_VALUE_MASK = 0x1e, -+ HDCP14_RI_TIMER_OVR_VALUE_MASK = 0xff00, ++mali_kbase-y += \ ++ $(MALI_PLATFORM_THIRDPARTY_DIR)/mali_kbase_config_vexpress.o \ ++ $(MALI_PLATFORM_THIRDPARTY_DIR)/mali_kbase_cpu_vexpress.o +diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h +new file mode 100644 +index 000000000..02835f129 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h +@@ -0,0 +1,75 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ HDCP14_KEY_WR_OK = 0x100, + -+ HDCP14_HPD_MASK = 0x01, -+ HDCP14_HPD_EN = 0x01, -+ HDCP14_HPD_DISABLE = 0x00, + -+ HDCP14_ENCRYPTION_ENABLE_MASK = 0x04, -+ HDCP14_ENCRYPTION_ENABLE = 0x04, -+ HDCP14_ENCRYPTION_DISABLE = 0x04, ++#include "mali_kbase_cpu_vexpress.h" + -+ HDCP14_KEY_DECRYPT_EN_MASK = 0x400, -+ HDCP14_KEY_DECRYPT_EN = 0x400, -+ HDCP14_KEY_DECRYPT_DISABLE = 0x00, ++/** ++ * Maximum frequency GPU will be clocked at. Given in kHz. ++ * This must be specified as there is no default value. ++ * ++ * Attached value: number in kHz ++ * Default value: NA ++ */ ++#define GPU_FREQ_KHZ_MAX kbase_get_platform_max_freq() ++/** ++ * Minimum frequency GPU will be clocked at. Given in kHz. ++ * This must be specified as there is no default value. ++ * ++ * Attached value: number in kHz ++ * Default value: NA ++ */ ++#define GPU_FREQ_KHZ_MIN kbase_get_platform_min_freq() + -+ HDMI_A_SRMCTRL_SHA1_FAIL_MASK = 0X08, -+ HDMI_A_SRMCTRL_SHA1_FAIL_DISABLE = 0X00, -+ HDMI_A_SRMCTRL_SHA1_FAIL_ENABLE = 0X08, ++/** ++ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock ++ * ++ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func ++ * for the function prototype. ++ * ++ * Attached value: A kbase_cpu_clk_speed_func. ++ * Default Value: NA ++ */ ++#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed) + -+ HDMI_A_SRMCTRL_KSV_UPDATE_MASK = 0X04, -+ HDMI_A_SRMCTRL_KSV_UPDATE_DISABLE = 0X00, -+ HDMI_A_SRMCTRL_KSV_UPDATE_ENABLE = 0X04, ++/** ++ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock ++ * ++ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func ++ * for the function prototype. ++ * ++ * Attached value: A kbase_gpu_clk_speed_func. ++ * Default Value: NA ++ */ ++#define GPU_SPEED_FUNC (NULL) + -+ HDMI_A_SRMCTRL_KSV_MEM_REQ_MASK = 0X01, -+ HDMI_A_SRMCTRL_KSV_MEM_REQ_DISABLE = 0X00, -+ HDMI_A_SRMCTRL_KSV_MEM_REQ_ENABLE = 0X01, ++/** ++ * Power management configuration ++ * ++ * Attached value: pointer to @ref kbase_pm_callback_conf ++ * Default value: See @ref kbase_pm_callback_conf ++ */ ++#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + -+ HDMI_A_SRMCTRL_KSV_MEM_ACCESS_MASK = 0X02, -+ HDMI_A_SRMCTRL_KSV_MEM_ACCESS_DISABLE = 0X00, -+ HDMI_A_SRMCTRL_KSV_MEM_ACCESS_ENABLE = 0X02, ++/** ++ * Platform specific configuration functions ++ * ++ * Attached value: pointer to @ref kbase_platform_funcs_conf ++ * Default value: See @ref kbase_platform_funcs_conf ++ */ ++#define PLATFORM_FUNCS (NULL) + -+ HDMI_A_SRM_BASE_MAX_DEVS_EXCEEDED = 0x80, -+ HDMI_A_SRM_BASE_DEVICE_COUNT = 0x7f, ++extern struct kbase_pm_callback_conf pm_callbacks; +diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c +new file mode 100644 +index 000000000..15ce2bc5e +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c +@@ -0,0 +1,85 @@ ++/* ++ * ++ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ HDMI_A_SRM_BASE_MAX_CASCADE_EXCEEDED = 0x08, + -+ HDMI_A_APIINTSTAT_KSVSHA1_CALC_INT = 0x02, + -+ /* HDCPREG_RMSTS field values */ -+ DPK_WR_OK_STS = 0x40, + -+ HDMI_A_HDCP22_MASK = 0x40, + -+ HDMI_HDCP2_OVR_EN_MASK = 0x02, -+ HDMI_HDCP2_OVR_ENABLE = 0x02, -+ HDMI_HDCP2_OVR_DISABLE = 0x00, ++#include ++#include ++#include ++#include ++#include "mali_kbase_cpu_vexpress.h" ++#include "mali_kbase_config_platform.h" + -+ HDMI_HDCP2_FORCE_MASK = 0x04, -+ HDMI_HDCP2_FORCE_ENABLE = 0x04, -+ HDMI_HDCP2_FORCE_DISABLE = 0x00, -+}; ++#define HARD_RESET_AT_POWER_OFF 0 + -+struct sha_t { -+ u8 mlength[8]; -+ u8 mblock[64]; -+ int mindex; -+ int mcomputed; -+ int mcorrupted; -+ unsigned int mdigest[5]; ++#ifndef CONFIG_OF ++static struct kbase_io_resources io_resources = { ++ .job_irq_number = 68, ++ .mmu_irq_number = 69, ++ .gpu_irq_number = 70, ++ .io_memory_region = { ++ .start = 0xFC010000, ++ .end = 0xFC010000 + (4096 * 4) - 1 ++ } +}; ++#endif /* CONFIG_OF */ + -+static inline unsigned int shacircularshift(unsigned int bits, -+ unsigned int word) ++static int pm_callback_power_on(struct kbase_device *kbdev) +{ -+ return (((word << bits) & 0xFFFFFFFF) | (word >> (32 - bits))); ++ /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ ++ return 1; +} + -+static void hdcp_modb(struct dw_qp_hdcp *hdcp, u32 data, u32 mask, u32 reg) ++static void pm_callback_power_off(struct kbase_device *kbdev) +{ -+ struct dw_hdmi_qp *hdmi = hdcp->hdmi; -+ u32 val = hdcp->read(hdmi, reg) & ~mask; -+ -+ val |= data & mask; -+ hdcp->write(hdmi, val, reg); ++#if HARD_RESET_AT_POWER_OFF ++ /* Cause a GPU hard reset to test whether we have actually idled the GPU ++ * and that we properly reconfigure the GPU on power up. ++ * Usually this would be dangerous, but if the GPU is working correctly it should ++ * be completely safe as the GPU should not be active at this point. ++ * However this is disabled normally because it will most likely interfere with ++ * bus logging etc. ++ */ ++ KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); ++ kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET); ++#endif +} + -+static int hdcp_load_keys_cb(struct dw_qp_hdcp *hdcp) -+{ -+ u32 size; -+ u8 hdcp_vendor_data[320]; ++struct kbase_pm_callback_conf pm_callbacks = { ++ .power_on_callback = pm_callback_power_on, ++ .power_off_callback = pm_callback_power_off, ++ .power_suspend_callback = NULL, ++ .power_resume_callback = NULL ++}; + -+ hdcp->keys = kmalloc(HDCP_KEY_SIZE, GFP_KERNEL); -+ if (!hdcp->keys) -+ return -ENOMEM; ++static struct kbase_platform_config versatile_platform_config = { ++#ifndef CONFIG_OF ++ .io_resources = &io_resources ++#endif ++}; + -+ hdcp->seeds = kmalloc(HDCP_KEY_SEED_SIZE, GFP_KERNEL); -+ if (!hdcp->seeds) { -+ kfree(hdcp->keys); -+ return -ENOMEM; -+ } ++struct kbase_platform_config *kbase_get_platform_config(void) ++{ ++ return &versatile_platform_config; ++} + -+ size = rk_vendor_read(HDMI_HDCP1X_ID, hdcp_vendor_data, 314); -+ if (size < (HDCP_KEY_SIZE + HDCP_KEY_SEED_SIZE)) { -+ dev_err(hdcp->dev, "HDCP: read size %d\n", size); -+ memset(hdcp->keys, 0, HDCP_KEY_SIZE); -+ memset(hdcp->seeds, 0, HDCP_KEY_SEED_SIZE); -+ } else { -+ memcpy(hdcp->keys, hdcp_vendor_data, HDCP_KEY_SIZE); -+ memcpy(hdcp->seeds, hdcp_vendor_data + HDCP_KEY_SIZE, -+ HDCP_KEY_SEED_SIZE); -+ } + ++int kbase_platform_early_init(void) ++{ ++ /* Nothing needed at this stage */ + return 0; +} +diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c +new file mode 100644 +index 000000000..4665f98cb +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c +@@ -0,0 +1,279 @@ ++/* ++ * ++ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+static int dw_hdcp_qp_hdcp_load_key(struct dw_qp_hdcp *hdcp) -+{ -+ int i, j; -+ int ret, val; -+ void __iomem *reg_rmsts_addr; -+ struct dw_hdmi_qp_hdcp_keys *hdcp_keys; -+ struct dw_hdmi_qp *hdmi = hdcp->hdmi; -+ u32 ksv, dkl, dkh; -+ -+ if (!hdcp->keys) { -+ ret = hdcp_load_keys_cb(hdcp); -+ if (ret) -+ return ret; -+ } -+ hdcp_keys = hdcp->keys; -+ -+ reg_rmsts_addr = hdcp->regs + HDCP14_KEY_STATUS; -+ -+ /* hdcp key has been written */ -+ if (hdcp->read(hdmi, HDCP14_KEY_STATUS) & 0x3f) { -+ dev_info(hdcp->dev, "hdcp key has been written\n"); -+ return 0; -+ } -+ -+ ksv = hdcp_keys->KSV[0] | hdcp_keys->KSV[1] << 8 | -+ hdcp_keys->KSV[2] << 16 | hdcp_keys->KSV[3] << 24; -+ hdcp->write(hdmi, ksv, HDCP14_AKSV_L); + -+ ksv = hdcp_keys->KSV[4]; -+ hdcp->write(hdmi, ksv, HDCP14_AKSV_H); + -+ if (hdcp->seeds) { -+ hdcp_modb(hdcp, HDCP14_KEY_DECRYPT_EN, -+ HDCP14_KEY_DECRYPT_EN_MASK, -+ HDCP14_CONFIG0); -+ hdcp->write(hdmi, (hdcp->seeds[0] << 8) | hdcp->seeds[1], -+ HDCP14_KEY_SEED); -+ } else { -+ hdcp_modb(hdcp, HDCP14_KEY_DECRYPT_DISABLE, -+ HDCP14_KEY_DECRYPT_EN_MASK, -+ HDCP14_CONFIG0); -+ } ++#include ++#include ++#include "mali_kbase_cpu_vexpress.h" + -+ for (i = 0; i < DW_HDMI_HDCP_DPK_LEN - 6; i += 7) { -+ dkl = 0; -+ dkh = 0; -+ for (j = 0; j < 4; j++) -+ dkl |= hdcp_keys->devicekey[i + j] << (j * 8); -+ for (j = 4; j < 7; j++) -+ dkh |= hdcp_keys->devicekey[i + j] << ((j - 4) * 8); ++#define HZ_IN_MHZ (1000000) + -+ hdcp->write(hdmi, dkh, HDCP14_KEY_H); -+ hdcp->write(hdmi, dkl, HDCP14_KEY_L); ++#define CORETILE_EXPRESS_A9X4_SCC_START (0x100E2000) ++#define MOTHERBOARD_SYS_CFG_START (0x10000000) ++#define SYS_CFGDATA_OFFSET (0x000000A0) ++#define SYS_CFGCTRL_OFFSET (0x000000A4) ++#define SYS_CFGSTAT_OFFSET (0x000000A8) + -+ ret = readx_poll_timeout(readl, reg_rmsts_addr, val, -+ val & HDCP14_KEY_WR_OK, 1000, -+ DPK_WR_OK_TIMEOUT_US); -+ if (ret) { -+ dev_err(hdcp->dev, "hdcp key write err\n"); -+ return ret; -+ } -+ } ++#define SYS_CFGCTRL_START_BIT_VALUE (1 << 31) ++#define READ_REG_BIT_VALUE (0 << 30) ++#define DCC_DEFAULT_BIT_VALUE (0 << 26) ++#define SYS_CFG_OSC_FUNC_BIT_VALUE (1 << 20) ++#define SITE_DEFAULT_BIT_VALUE (1 << 16) ++#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12) ++#define DEVICE_DEFAULT_BIT_VALUE (2 << 0) ++#define SYS_CFG_COMPLETE_BIT_VALUE (1 << 0) ++#define SYS_CFG_ERROR_BIT_VALUE (1 << 1) + -+ return 0; -+} ++#define FEED_REG_BIT_MASK (0x0F) ++#define FCLK_PA_DIVIDE_BIT_SHIFT (0x03) ++#define FCLK_PB_DIVIDE_BIT_SHIFT (0x07) ++#define FCLK_PC_DIVIDE_BIT_SHIFT (0x0B) ++#define AXICLK_PA_DIVIDE_BIT_SHIFT (0x0F) ++#define AXICLK_PB_DIVIDE_BIT_SHIFT (0x13) + -+static void dw_hdcp_qp_hdcp_restart(struct dw_qp_hdcp *hdcp) -+{ -+ mutex_lock(&hdcp->mutex); ++/* the following three values used for reading ++ * HBI value of the LogicTile daughterboard */ ++#define VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 (0x10000000) ++#define VE_SYS_PROC_ID1_OFFSET (0x00000088) ++#define VE_LOGIC_TILE_HBI_MASK (0x00000FFF) + -+ if (!hdcp->remaining_times) { -+ mutex_unlock(&hdcp->mutex); -+ return; -+ } ++#define IS_SINGLE_BIT_SET(val, pos) (val&(1<write(hdcp->hdmi, 1, HDCP14_CONFIG1); -+ mdelay(50); -+ hdcp->write(hdcp->hdmi, HDCP14_AUTH_CHG_MASK_N | HDCP14_KSV_LIST_DONE_MASK_N, -+ AVP_1_INT_CLEAR); -+ hdcp_modb(hdcp, HDCP14_AUTH_CHG_MASK_N | HDCP14_KSV_LIST_DONE_MASK_N, -+ HDCP14_AUTH_CHG_MASK_N | HDCP14_KSV_LIST_DONE_MASK_N, AVP_1_INT_MASK_N); + -+ hdcp_modb(hdcp, HDCP14_ENCRYPTION_ENABLE_MASK | HDCP14_HPD_MASK, -+ HDCP14_ENCRYPTION_ENABLE_MASK | HDCP14_HPD_MASK, -+ HDCP14_CONFIG0); ++#define CPU_CLOCK_SPEED_UNDEFINED (0) + -+ hdcp->remaining_times--; -+ mutex_unlock(&hdcp->mutex); -+} ++static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED; + -+static int dw_hdcp_qp_hdcp_start(struct dw_qp_hdcp *hdcp) ++static DEFINE_RAW_SPINLOCK(syscfg_lock); ++/** ++ * kbase_get_vendor_specific_cpu_clock_speed -Retrieves the CPU clock speed ++ * @cpu_clock - the value of CPU clock speed in MHz ++ * ++ * Returns 0 on success, error code otherwise. ++ * ++ * The implementation is platform specific. ++*/ ++int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock) +{ -+ struct dw_hdmi_qp *hdmi = hdcp->hdmi; -+ -+ dw_hdcp_qp_hdcp_load_key(hdcp); -+ -+ mutex_lock(&hdcp->mutex); -+ hdcp->remaining_times = hdcp->retry_times; -+ -+ hdcp->write(hdmi, HDCP14_AUTH_CHG_MASK_N | HDCP14_KSV_LIST_DONE_MASK_N, AVP_1_INT_CLEAR); -+ hdcp_modb(hdcp, HDCP14_AUTH_CHG_MASK_N | HDCP14_KSV_LIST_DONE_MASK_N, -+ HDCP14_AUTH_CHG_MASK_N | HDCP14_KSV_LIST_DONE_MASK_N, AVP_1_INT_MASK_N); -+ -+ mdelay(50); ++ int err = 0; ++ u32 reg_val = 0; ++ u32 osc2_value = 0; ++ u32 pa_divide = 0; ++ u32 pb_divide = 0; ++ u32 pc_divide = 0; ++ void __iomem *syscfg_reg = NULL; ++ void __iomem *scc_reg = NULL; + -+ hdcp_modb(hdcp, HDCP14_ENCRYPTION_ENABLE | HDCP14_HPD_EN, -+ HDCP14_ENCRYPTION_ENABLE_MASK | HDCP14_HPD_MASK, -+ HDCP14_CONFIG0); ++ if (CPU_CLOCK_SPEED_UNDEFINED != cpu_clock_speed) { ++ *cpu_clock = cpu_clock_speed; ++ return 0; ++ } + -+ hdcp->status = DW_HDCP_AUTH_START; -+ dev_info(hdcp->dev, "start hdcp\n"); -+ mutex_unlock(&hdcp->mutex); ++ /* Init the value in case something goes wrong */ ++ *cpu_clock = 0; + -+ queue_work(hdcp->workqueue, &hdcp->work); -+ return 0; -+} ++ /* Map CPU register into virtual memory */ ++ syscfg_reg = ioremap(MOTHERBOARD_SYS_CFG_START, 0x1000); ++ if (syscfg_reg == NULL) { ++ err = -EIO; ++ goto syscfg_reg_map_failed; ++ } + -+static int dw_hdcp_qp_hdcp_stop(struct dw_qp_hdcp *hdcp) -+{ -+ mutex_lock(&hdcp->mutex); -+ hdcp_modb(hdcp, 0, HDCP14_ENCRYPTION_ENABLE_MASK | HDCP14_HPD_MASK, -+ HDCP14_CONFIG0); ++ scc_reg = ioremap(CORETILE_EXPRESS_A9X4_SCC_START, 0x1000); ++ if (scc_reg == NULL) { ++ err = -EIO; ++ goto scc_reg_map_failed; ++ } + -+ hdcp_modb(hdcp, 0, HDCP14_AUTH_CHG_MASK_N | HDCP14_KSV_LIST_DONE_MASK_N, AVP_1_INT_MASK_N); -+ hdcp->write(hdcp->hdmi, 0, HDCP14_CONFIG1); -+ hdcp->status = DW_HDCP_DISABLED; -+ mutex_unlock(&hdcp->mutex); -+ return 0; -+} ++ raw_spin_lock(&syscfg_lock); + -+static void sha_reset(struct sha_t *sha) -+{ -+ u32 i = 0; ++ /* Read SYS regs - OSC2 */ ++ reg_val = readl(syscfg_reg + SYS_CFGCTRL_OFFSET); + -+ sha->mindex = 0; -+ sha->mcomputed = false; -+ sha->mcorrupted = false; -+ for (i = 0; i < sizeof(sha->mlength); i++) -+ sha->mlength[i] = 0; ++ /* Check if there is any other undergoing request */ ++ if (reg_val & SYS_CFGCTRL_START_BIT_VALUE) { ++ err = -EBUSY; ++ goto ongoing_request; ++ } ++ /* Reset the CGFGSTAT reg */ ++ writel(0, (syscfg_reg + SYS_CFGSTAT_OFFSET)); + -+ sha1_init(sha->mdigest); -+} ++ writel(SYS_CFGCTRL_START_BIT_VALUE | READ_REG_BIT_VALUE | ++ DCC_DEFAULT_BIT_VALUE | ++ SYS_CFG_OSC_FUNC_BIT_VALUE | ++ SITE_DEFAULT_BIT_VALUE | ++ BOARD_STACK_POS_DEFAULT_BIT_VALUE | ++ DEVICE_DEFAULT_BIT_VALUE, ++ (syscfg_reg + SYS_CFGCTRL_OFFSET)); ++ /* Wait for the transaction to complete */ ++ while (!(readl(syscfg_reg + SYS_CFGSTAT_OFFSET) & ++ SYS_CFG_COMPLETE_BIT_VALUE)) ++ ; ++ /* Read SYS_CFGSTAT Register to get the status of submitted ++ * transaction */ ++ reg_val = readl(syscfg_reg + SYS_CFGSTAT_OFFSET); + -+static void sha_processblock(struct sha_t *sha) -+{ -+ u32 array[SHA1_WORKSPACE_WORDS]; ++ if (reg_val & SYS_CFG_ERROR_BIT_VALUE) { ++ /* Error while setting register */ ++ err = -EIO; ++ goto set_reg_error; ++ } + -+ sha1_transform(sha->mdigest, sha->mblock, array); -+ sha->mindex = 0; -+} ++ osc2_value = readl(syscfg_reg + SYS_CFGDATA_OFFSET); ++ /* Read the SCC CFGRW0 register */ ++ reg_val = readl(scc_reg); + -+static void sha_padmessage(struct sha_t *sha) -+{ + /* -+ * Check to see if the current message block is too small to hold -+ * the initial padding bits and length. If so, we will pad the -+ * block, process it, and then continue padding into a second -+ * block. ++ * Select the appropriate feed: ++ * CFGRW0[0] - CLKOB ++ * CFGRW0[1] - CLKOC ++ * CFGRW0[2] - FACLK (CLK)B FROM AXICLK PLL) + */ -+ if (sha->mindex > 55) { -+ sha->mblock[sha->mindex++] = 0x80; -+ while (sha->mindex < 64) -+ sha->mblock[sha->mindex++] = 0; -+ -+ sha_processblock(sha); -+ while (sha->mindex < 56) -+ sha->mblock[sha->mindex++] = 0; ++ /* Calculate the FCLK */ ++ if (IS_SINGLE_BIT_SET(reg_val, 0)) { ++ /* CFGRW0[0] - CLKOB */ ++ /* CFGRW0[6:3] */ ++ pa_divide = ((reg_val & (FEED_REG_BIT_MASK << ++ FCLK_PA_DIVIDE_BIT_SHIFT)) >> ++ FCLK_PA_DIVIDE_BIT_SHIFT); ++ /* CFGRW0[10:7] */ ++ pb_divide = ((reg_val & (FEED_REG_BIT_MASK << ++ FCLK_PB_DIVIDE_BIT_SHIFT)) >> ++ FCLK_PB_DIVIDE_BIT_SHIFT); ++ *cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1); ++ } else if (IS_SINGLE_BIT_SET(reg_val, 1)) { ++ /* CFGRW0[1] - CLKOC */ ++ /* CFGRW0[6:3] */ ++ pa_divide = ((reg_val & (FEED_REG_BIT_MASK << ++ FCLK_PA_DIVIDE_BIT_SHIFT)) >> ++ FCLK_PA_DIVIDE_BIT_SHIFT); ++ /* CFGRW0[14:11] */ ++ pc_divide = ((reg_val & (FEED_REG_BIT_MASK << ++ FCLK_PC_DIVIDE_BIT_SHIFT)) >> ++ FCLK_PC_DIVIDE_BIT_SHIFT); ++ *cpu_clock = osc2_value * (pa_divide + 1) / (pc_divide + 1); ++ } else if (IS_SINGLE_BIT_SET(reg_val, 2)) { ++ /* CFGRW0[2] - FACLK */ ++ /* CFGRW0[18:15] */ ++ pa_divide = ((reg_val & (FEED_REG_BIT_MASK << ++ AXICLK_PA_DIVIDE_BIT_SHIFT)) >> ++ AXICLK_PA_DIVIDE_BIT_SHIFT); ++ /* CFGRW0[22:19] */ ++ pb_divide = ((reg_val & (FEED_REG_BIT_MASK << ++ AXICLK_PB_DIVIDE_BIT_SHIFT)) >> ++ AXICLK_PB_DIVIDE_BIT_SHIFT); ++ *cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1); + } else { -+ sha->mblock[sha->mindex++] = 0x80; -+ while (sha->mindex < 56) -+ sha->mblock[sha->mindex++] = 0; ++ err = -EIO; + } + -+ /* Store the message length as the last 8 octets */ -+ sha->mblock[56] = sha->mlength[7]; -+ sha->mblock[57] = sha->mlength[6]; -+ sha->mblock[58] = sha->mlength[5]; -+ sha->mblock[59] = sha->mlength[4]; -+ sha->mblock[60] = sha->mlength[3]; -+ sha->mblock[61] = sha->mlength[2]; -+ sha->mblock[62] = sha->mlength[1]; -+ sha->mblock[63] = sha->mlength[0]; -+ -+ sha_processblock(sha); -+} -+ -+static int sha_result(struct sha_t *sha) -+{ -+ if (sha->mcorrupted) -+ return false; ++set_reg_error: ++ongoing_request: ++ raw_spin_unlock(&syscfg_lock); ++ *cpu_clock /= HZ_IN_MHZ; + -+ if (sha->mcomputed == 0) { -+ sha_padmessage(sha); -+ sha->mcomputed = true; -+ } -+ return true; -+} ++ if (!err) ++ cpu_clock_speed = *cpu_clock; + -+static void sha_input(struct sha_t *sha, const u8 *data, u32 size) -+{ -+ int i = 0; -+ unsigned int j = 0; -+ int rc = true; ++ iounmap(scc_reg); + -+ if (data == 0 || size == 0) -+ return; ++scc_reg_map_failed: ++ iounmap(syscfg_reg); + -+ if (sha->mcomputed || sha->mcorrupted) { -+ sha->mcorrupted = true; -+ return; -+ } -+ while (size-- && !sha->mcorrupted) { -+ sha->mblock[sha->mindex++] = *data; ++syscfg_reg_map_failed: + -+ for (i = 0; i < 8; i++) { -+ rc = true; -+ for (j = 0; j < sizeof(sha->mlength); j++) { -+ sha->mlength[j]++; -+ if (sha->mlength[j] != 0) { -+ rc = false; -+ break; -+ } -+ } -+ sha->mcorrupted = (sha->mcorrupted || -+ rc) ? true : false; -+ } -+ /* if corrupted then message is too long */ -+ if (sha->mindex == 64) -+ sha_processblock(sha); -+ data++; -+ } ++ return err; +} + -+static int hdcp_verify_ksv(const u8 *data, u32 size) ++/** ++ * kbase_get_platform_logic_tile_type - determines which LogicTile type ++ * is used by Versatile Express ++ * ++ * When platform_config build parameter is specified as vexpress, i.e., ++ * platform_config=vexpress, GPU frequency may vary dependent on the ++ * particular platform. The GPU frequency depends on the LogicTile type. ++ * ++ * This function determines which LogicTile type is used by the platform by ++ * reading the HBI value of the daughterboard which holds the LogicTile: ++ * ++ * 0x217 HBI0217 Virtex-6 ++ * 0x192 HBI0192 Virtex-5 ++ * 0x247 HBI0247 Virtex-7 ++ * ++ * Return: HBI value of the logic tile daughterboard, zero if not accessible ++ */ ++static u32 kbase_get_platform_logic_tile_type(void) +{ -+ u32 i = 0; -+ struct sha_t sha; -+ -+ if ((!data) || (size < (HEADER + SHAMAX))) -+ return false; -+ -+ sha_reset(&sha); -+ sha_input(&sha, data, size - SHAMAX); -+ if (sha_result(&sha) == false) -+ return false; ++ void __iomem *syscfg_reg = NULL; ++ u32 sys_procid1 = 0; + -+ for (i = 0; i < SHAMAX; i++) { -+ if (data[size - SHAMAX + i] != (u8)(sha.mdigest[i / 4] >> ((i % 4) * 8))) -+ return false; ++ syscfg_reg = ioremap(VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 + VE_SYS_PROC_ID1_OFFSET, 4); ++ if (NULL != syscfg_reg) { ++ sys_procid1 = readl(syscfg_reg); ++ iounmap(syscfg_reg); + } -+ return true; ++ ++ return sys_procid1 & VE_LOGIC_TILE_HBI_MASK; +} + -+static void dw_hdcp_qp_hdcp_2nd_auth(struct dw_qp_hdcp *hdcp) ++u32 kbase_get_platform_min_freq(void) +{ -+ u8 *data; -+ u32 len; -+ -+ len = (hdcp->read(hdcp->hdmi, HDCP14_STATUS0) & HDCP14_RPT_DEVICE_COUNT) >> 9; -+ len = len * KSV_LEN + BSTATUS_LEN + M0_LEN + SHAMAX; -+ -+ data = kmalloc(len, GFP_KERNEL); -+ if (!data) -+ return; -+ -+ hdcp->get_mem(hdcp->hdmi, data, len); ++ u32 ve_logic_tile = kbase_get_platform_logic_tile_type(); + -+ if (hdcp_verify_ksv(data, len)) -+ hdcp->write(hdcp->hdmi, HDCP14_SHA1_MSG_CORRECT_P, HDCP14_CONFIG1); -+ else -+ dw_hdcp_qp_hdcp_restart(hdcp); ++ switch (ve_logic_tile) { ++ case 0x217: ++ /* Virtex 6, HBI0217 */ ++ return VE_VIRTEX6_GPU_FREQ_MIN; ++ case 0x247: ++ /* Virtex 7, HBI0247 */ ++ return VE_VIRTEX7_GPU_FREQ_MIN; ++ default: ++ /* all other logic tiles, i.e., Virtex 5 HBI0192 ++ * or unsuccessful reading from the platform - ++ * fall back to some default value */ ++ return VE_DEFAULT_GPU_FREQ_MIN; ++ } +} + -+static void dw_hdcp_qp_hdcp_auth(struct dw_qp_hdcp *hdcp, u32 hdcp_status) ++u32 kbase_get_platform_max_freq(void) +{ -+ if (!(hdcp_status & BIT(2))) { -+ mutex_lock(&hdcp->mutex); -+ if (hdcp->status == DW_HDCP_DISABLED) { -+ mutex_unlock(&hdcp->mutex); -+ return; -+ } -+ dev_err(hdcp->dev, "hdcp auth failed\n"); -+ hdcp_modb(hdcp, 0, HDCP14_ENCRYPTION_ENABLE_MASK | HDCP14_HPD_MASK, -+ HDCP14_CONFIG0); -+ hdcp->status = DW_HDCP_AUTH_FAIL; -+ mutex_unlock(&hdcp->mutex); ++ u32 ve_logic_tile = kbase_get_platform_logic_tile_type(); + -+ dw_hdcp_qp_hdcp_restart(hdcp); -+ } else { -+ mutex_lock(&hdcp->mutex); -+ dev_info(hdcp->dev, "hdcp auth success\n"); -+ hdcp->status = DW_HDCP_AUTH_SUCCESS; -+ mutex_unlock(&hdcp->mutex); ++ switch (ve_logic_tile) { ++ case 0x217: ++ /* Virtex 6, HBI0217 */ ++ return VE_VIRTEX6_GPU_FREQ_MAX; ++ case 0x247: ++ /* Virtex 7, HBI0247 */ ++ return VE_VIRTEX7_GPU_FREQ_MAX; ++ default: ++ /* all other logic tiles, i.e., Virtex 5 HBI0192 ++ * or unsuccessful reading from the platform - ++ * fall back to some default value */ ++ return VE_DEFAULT_GPU_FREQ_MAX; + } +} +diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h +new file mode 100644 +index 000000000..da8656981 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h +@@ -0,0 +1,38 @@ ++/* ++ * ++ * (C) COPYRIGHT 2012-2013, 2015-2016 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+static void dw_hdcp_qp_hdcp_isr(struct dw_qp_hdcp *hdcp, u32 avp_int, u32 hdcp_status) -+{ -+ if (hdcp->status == DW_HDCP_DISABLED) -+ return; + -+ dev_info(hdcp->dev, "hdcp_int is 0x%02x\n", hdcp_status); + -+ if (avp_int & HDCP14_KSV_LIST_DONE_MASK_N) -+ dw_hdcp_qp_hdcp_2nd_auth(hdcp); + -+ if (avp_int & HDCP14_AUTH_CHG_MASK_N) -+ dw_hdcp_qp_hdcp_auth(hdcp, hdcp_status); -+} + -+static ssize_t trytimes_show(struct device *device, -+ struct device_attribute *attr, char *buf) -+{ -+ int trytimes = 0; -+ struct dw_qp_hdcp *hdcp = dev_get_drvdata(device); ++#ifndef _KBASE_CPU_VEXPRESS_H_ ++#define _KBASE_CPU_VEXPRESS_H_ + -+ if (hdcp) -+ trytimes = hdcp->retry_times; ++/** ++ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func. ++ */ ++int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock); + -+ return snprintf(buf, PAGE_SIZE, "%d\n", trytimes); -+} ++/** ++ * Get the minimum GPU frequency for the attached logic tile ++ */ ++u32 kbase_get_platform_min_freq(void); + -+static ssize_t trytimes_store(struct device *device, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ int trytimes; -+ struct dw_qp_hdcp *hdcp = dev_get_drvdata(device); ++/** ++ * Get the maximum GPU frequency for the attached logic tile ++ */ ++u32 kbase_get_platform_max_freq(void); + -+ if (!hdcp) -+ return -EINVAL; ++#endif /* _KBASE_CPU_VEXPRESS_H_ */ +diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild +new file mode 100755 +index 000000000..7efe8fa42 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild +@@ -0,0 +1,16 @@ ++# ++# (C) COPYRIGHT 2013-2014, 2016 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+ if (kstrtoint(buf, 0, &trytimes)) -+ return -EINVAL; + -+ if (hdcp->retry_times != trytimes) { -+ hdcp->retry_times = trytimes; -+ hdcp->remaining_times = hdcp->retry_times; -+ } ++mali_kbase-y += $(MALI_PLATFORM_THIRDPARTY_DIR)/mali_kbase_config_vexpress.o +diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h +new file mode 100644 +index 000000000..0efbf3962 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h +@@ -0,0 +1,73 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ return count; -+} + -+static DEVICE_ATTR_RW(trytimes); + -+static ssize_t status_show(struct device *device, -+ struct device_attribute *attr, char *buf) -+{ -+ int status = DW_HDCP_DISABLED; -+ struct dw_qp_hdcp *hdcp = dev_get_drvdata(device); ++/** ++ * Maximum frequency GPU will be clocked at. Given in kHz. ++ * This must be specified as there is no default value. ++ * ++ * Attached value: number in kHz ++ * Default value: NA ++ */ ++#define GPU_FREQ_KHZ_MAX 5000 ++/** ++ * Minimum frequency GPU will be clocked at. Given in kHz. ++ * This must be specified as there is no default value. ++ * ++ * Attached value: number in kHz ++ * Default value: NA ++ */ ++#define GPU_FREQ_KHZ_MIN 5000 + -+ if (hdcp) -+ status = hdcp->status; ++/** ++ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock ++ * ++ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func ++ * for the function prototype. ++ * ++ * Attached value: A kbase_cpu_clk_speed_func. ++ * Default Value: NA ++ */ ++#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed) + -+ if (status == DW_HDCP_DISABLED) -+ return snprintf(buf, PAGE_SIZE, "hdcp disable\n"); -+ else if (status == DW_HDCP_AUTH_START) -+ return snprintf(buf, PAGE_SIZE, "hdcp_auth_start\n"); -+ else if (status == DW_HDCP_AUTH_SUCCESS) -+ return snprintf(buf, PAGE_SIZE, "hdcp_auth_success\n"); -+ else if (status == DW_HDCP_AUTH_FAIL) -+ return snprintf(buf, PAGE_SIZE, "hdcp_auth_fail\n"); -+ else -+ return snprintf(buf, PAGE_SIZE, "unknown status\n"); -+} ++/** ++ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock ++ * ++ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func ++ * for the function prototype. ++ * ++ * Attached value: A kbase_gpu_clk_speed_func. ++ * Default Value: NA ++ */ ++#define GPU_SPEED_FUNC (NULL) + -+static DEVICE_ATTR_RO(status); ++/** ++ * Power management configuration ++ * ++ * Attached value: pointer to @ref kbase_pm_callback_conf ++ * Default value: See @ref kbase_pm_callback_conf ++ */ ++#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + -+static struct attribute *dw_hdmi_qp_hdcp_attrs[] = { -+ &dev_attr_trytimes.attr, -+ &dev_attr_status.attr, -+ NULL -+}; -+ATTRIBUTE_GROUPS(dw_hdmi_qp_hdcp); ++/** ++ * Platform specific configuration functions ++ * ++ * Attached value: pointer to @ref kbase_platform_funcs_conf ++ * Default value: See @ref kbase_platform_funcs_conf ++ */ ++#define PLATFORM_FUNCS (NULL) + -+/* If sink is a repeater, we need to wait ksv list ready */ -+static void dw_hdmi_qp_hdcp(struct work_struct *p_work) -+{ -+ struct dw_qp_hdcp *hdcp = container_of(p_work, struct dw_qp_hdcp, work); -+ u32 val; -+ int i = 500; ++extern struct kbase_pm_callback_conf pm_callbacks; +diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c +new file mode 100644 +index 000000000..3ff0930fb +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c +@@ -0,0 +1,79 @@ ++/* ++ * ++ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ while (i--) { -+ usleep_range(7000, 8000); + -+ mutex_lock(&hdcp->mutex); -+ if (hdcp->status == DW_HDCP_DISABLED) { -+ dev_dbg(hdcp->dev, "hdcp is disabled, don't wait repeater ready\n"); -+ mutex_unlock(&hdcp->mutex); -+ return; -+ } + -+ val = hdcp->read(hdcp->hdmi, HDCP14_STATUS1); ++#include ++#include ++#include ++#include + -+ /* sink isn't repeater or ksv fifo ready, stop waiting */ -+ if (!(val & HDCP14_RCV_REPEATER) || (val & HDCP14_RCV_KSV_FIFO_READY)) { -+ dev_dbg(hdcp->dev, "wait ksv fifo finished\n"); -+ mutex_unlock(&hdcp->mutex); -+ return; -+ } ++#define HARD_RESET_AT_POWER_OFF 0 + -+ mutex_unlock(&hdcp->mutex); -+ } ++#ifndef CONFIG_OF ++static struct kbase_io_resources io_resources = { ++ .job_irq_number = 68, ++ .mmu_irq_number = 69, ++ .gpu_irq_number = 70, ++ .io_memory_region = { ++ .start = 0x2f010000, ++ .end = 0x2f010000 + (4096 * 4) - 1} ++}; ++#endif + -+ if (i < 0) { -+ dev_err(hdcp->dev, "wait repeater ready time out\n"); -+ dw_hdcp_qp_hdcp_restart(hdcp); -+ } ++static int pm_callback_power_on(struct kbase_device *kbdev) ++{ ++ /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ ++ return 1; +} + -+static int dw_hdcp_qp_hdcp_probe(struct platform_device *pdev) ++static void pm_callback_power_off(struct kbase_device *kbdev) +{ -+ int ret = 0; -+ struct dw_qp_hdcp *hdcp = pdev->dev.platform_data; -+ -+ /* retry time if hdcp auth fail. unlimited time if set 0 */ -+ hdcp->dev = &pdev->dev; -+ hdcp->hdcp_start = dw_hdcp_qp_hdcp_start; -+ hdcp->hdcp_stop = dw_hdcp_qp_hdcp_stop; -+ hdcp->hdcp_isr = dw_hdcp_qp_hdcp_isr; -+ -+ ret = device_add_groups(hdcp->dev, dw_hdmi_qp_hdcp_groups); -+ if (ret) { -+ dev_err(hdcp->dev, "Failed to add sysfs files group\n"); -+ return ret; -+ } -+ -+ platform_set_drvdata(pdev, hdcp); ++#if HARD_RESET_AT_POWER_OFF ++ /* Cause a GPU hard reset to test whether we have actually idled the GPU ++ * and that we properly reconfigure the GPU on power up. ++ * Usually this would be dangerous, but if the GPU is working correctly it should ++ * be completely safe as the GPU should not be active at this point. ++ * However this is disabled normally because it will most likely interfere with ++ * bus logging etc. ++ */ ++ KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); ++ kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET); ++#endif ++} + -+ hdcp->workqueue = create_workqueue("hdcp_queue"); -+ INIT_WORK(&hdcp->work, dw_hdmi_qp_hdcp); ++struct kbase_pm_callback_conf pm_callbacks = { ++ .power_on_callback = pm_callback_power_on, ++ .power_off_callback = pm_callback_power_off, ++ .power_suspend_callback = NULL, ++ .power_resume_callback = NULL ++}; + -+ hdcp->retry_times = 3; -+ mutex_init(&hdcp->mutex); ++static struct kbase_platform_config versatile_platform_config = { ++#ifndef CONFIG_OF ++ .io_resources = &io_resources ++#endif ++}; + -+ dev_info(hdcp->dev, "%s success\n", __func__); -+ return 0; ++struct kbase_platform_config *kbase_get_platform_config(void) ++{ ++ return &versatile_platform_config; +} + -+static int dw_hdcp_qp_hdcp_remove(struct platform_device *pdev) ++int kbase_platform_early_init(void) +{ -+ struct dw_qp_hdcp *hdcp = pdev->dev.platform_data; -+ -+ cancel_work_sync(&hdcp->work); -+ flush_workqueue(hdcp->workqueue); -+ destroy_workqueue(hdcp->workqueue); -+ -+ device_remove_groups(hdcp->dev, dw_hdmi_qp_hdcp_groups); -+ kfree(hdcp->keys); -+ kfree(hdcp->seeds); -+ ++ /* Nothing needed at this stage */ + return 0; +} +diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild +new file mode 100755 +index 000000000..1caa29366 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild +@@ -0,0 +1,18 @@ ++# ++# (C) COPYRIGHT 2012-2013, 2016 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+static struct platform_driver dw_hdcp_qp_hdcp_driver = { -+ .probe = dw_hdcp_qp_hdcp_probe, -+ .remove = dw_hdcp_qp_hdcp_remove, -+ .driver = { -+ .name = DW_HDCP_QP_DRIVER_NAME, -+ }, -+}; + -+module_platform_driver(dw_hdcp_qp_hdcp_driver); -+MODULE_DESCRIPTION("DW HDMI QP transmitter HDCP driver"); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp-hdcp.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp-hdcp.h ++mali_kbase-y += \ ++ $(MALI_PLATFORM_THIRDPARTY_DIR)/mali_kbase_config_vexpress.o \ ++ $(MALI_PLATFORM_THIRDPARTY_DIR)/mali_kbase_cpu_vexpress.o +diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h new file mode 100644 -index 000000000..48c3a4843 +index 000000000..dbdf21e00 --- /dev/null -+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp-hdcp.h -@@ -0,0 +1,55 @@ -+/* SPDX-License-Identifier: GPL-2.0+ */ ++++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h +@@ -0,0 +1,75 @@ +/* -+ * Copyright (C) Rockchip Electronics Co.Ltd -+ * Author: -+ * Algea Cao ++ * ++ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * + */ -+#ifndef DW_HDMI_QP_HDCP_H -+#define DW_HDMI_QP_HDCP_H -+ -+#include + -+#define DW_HDCP_QP_DRIVER_NAME "dw-hdmi-qp-hdcp" -+#define PRIVATE_KEY_SIZE 280 -+#define KEY_SHA_SIZE 20 + -+#define KSV_LEN 5 -+#define BSTATUS_LEN 2 -+#define M0_LEN 8 -+#define SHAMAX 20 + -+struct dw_hdmi_qp_hdcp_keys { -+ u8 KSV[8]; -+ u8 devicekey[PRIVATE_KEY_SIZE]; -+ u8 sha1[KEY_SHA_SIZE]; -+}; ++#include "mali_kbase_cpu_vexpress.h" + -+struct dw_qp_hdcp { -+ int retry_times; -+ int remaining_times; -+ char *seeds; -+ int invalidkey; -+ char *invalidkeys; -+ int hdcp2_enable; -+ int status; -+ u32 reg_io_width; ++/** ++ * Maximum frequency GPU will be clocked at. Given in kHz. ++ * This must be specified as there is no default value. ++ * ++ * Attached value: number in kHz ++ * Default value: NA ++ */ ++#define GPU_FREQ_KHZ_MAX 10000 ++/** ++ * Minimum frequency GPU will be clocked at. Given in kHz. ++ * This must be specified as there is no default value. ++ * ++ * Attached value: number in kHz ++ * Default value: NA ++ */ ++#define GPU_FREQ_KHZ_MIN 10000 + -+ struct dw_hdmi_qp_hdcp_keys *keys; -+ struct device *dev; -+ struct dw_hdmi_qp *hdmi; -+ void __iomem *regs; ++/** ++ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock ++ * ++ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func ++ * for the function prototype. ++ * ++ * Attached value: A kbase_cpu_clk_speed_func. ++ * Default Value: NA ++ */ ++#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed) + -+ struct mutex mutex; ++/** ++ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock ++ * ++ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func ++ * for the function prototype. ++ * ++ * Attached value: A kbase_gpu_clk_speed_func. ++ * Default Value: NA ++ */ ++#define GPU_SPEED_FUNC (NULL) + -+ struct work_struct work; -+ struct workqueue_struct *workqueue; ++/** ++ * Power management configuration ++ * ++ * Attached value: pointer to @ref kbase_pm_callback_conf ++ * Default value: See @ref kbase_pm_callback_conf ++ */ ++#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + -+ void (*write)(struct dw_hdmi_qp *hdmi, u32 val, int offset); -+ u32 (*read)(struct dw_hdmi_qp *hdmi, int offset); -+ void (*get_mem)(struct dw_hdmi_qp *hdmi, u8 *data, u32 len); -+ int (*hdcp_start)(struct dw_qp_hdcp *hdcp); -+ int (*hdcp_stop)(struct dw_qp_hdcp *hdcp); -+ void (*hdcp_isr)(struct dw_qp_hdcp *hdcp, u32 avp_int, u32 hdcp_status); -+}; ++/** ++ * Platform specific configuration functions ++ * ++ * Attached value: pointer to @ref kbase_platform_funcs_conf ++ * Default value: See @ref kbase_platform_funcs_conf ++ */ ++#define PLATFORM_FUNCS (NULL) + -+#endif -diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp-i2s-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp-i2s-audio.c ++extern struct kbase_pm_callback_conf pm_callbacks; +diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c new file mode 100644 -index 000000000..e07c2b423 +index 000000000..76ffe4a1e --- /dev/null -+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp-i2s-audio.c -@@ -0,0 +1,188 @@ -+// SPDX-License-Identifier: GPL-2.0 ++++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c +@@ -0,0 +1,83 @@ +/* -+ * dw-hdmi-qp-i2s-audio.c + * -+ * Copyright (c) 2021 Rockchip Electronics Co. Ltd. -+ * Author: Sugar Zhang ++ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * + */ + -+#include -+#include + -+#include -+#include + -+#include + -+#include "dw-hdmi-qp.h" -+#include "dw-hdmi-qp-audio.h" + -+#define DRIVER_NAME "dw-hdmi-qp-i2s-audio" ++#include ++#include ++#include ++#include ++#include "mali_kbase_cpu_vexpress.h" + -+static inline void hdmi_write(struct dw_hdmi_qp_i2s_audio_data *audio, -+ u32 val, int offset) -+{ -+ struct dw_hdmi_qp *hdmi = audio->hdmi; ++#define HARD_RESET_AT_POWER_OFF 0 + -+ audio->write(hdmi, val, offset); -+} ++#ifndef CONFIG_OF ++static struct kbase_io_resources io_resources = { ++ .job_irq_number = 75, ++ .mmu_irq_number = 76, ++ .gpu_irq_number = 77, ++ .io_memory_region = { ++ .start = 0x2F000000, ++ .end = 0x2F000000 + (4096 * 4) - 1} ++}; ++#endif + -+static inline u32 hdmi_read(struct dw_hdmi_qp_i2s_audio_data *audio, int offset) ++static int pm_callback_power_on(struct kbase_device *kbdev) +{ -+ struct dw_hdmi_qp *hdmi = audio->hdmi; -+ -+ return audio->read(hdmi, offset); ++ /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ ++ return 1; +} + -+static inline void hdmi_mod(struct dw_hdmi_qp_i2s_audio_data *audio, -+ u32 data, u32 mask, u32 reg) ++static void pm_callback_power_off(struct kbase_device *kbdev) +{ -+ struct dw_hdmi_qp *hdmi = audio->hdmi; -+ -+ return audio->mod(hdmi, data, mask, reg); ++#if HARD_RESET_AT_POWER_OFF ++ /* Cause a GPU hard reset to test whether we have actually idled the GPU ++ * and that we properly reconfigure the GPU on power up. ++ * Usually this would be dangerous, but if the GPU is working correctly it should ++ * be completely safe as the GPU should not be active at this point. ++ * However this is disabled normally because it will most likely interfere with ++ * bus logging etc. ++ */ ++ KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); ++ kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET); ++#endif +} + -+static int dw_hdmi_qp_i2s_hw_params(struct device *dev, void *data, -+ struct hdmi_codec_daifmt *fmt, -+ struct hdmi_codec_params *hparms) -+{ -+ struct dw_hdmi_qp_i2s_audio_data *audio = data; -+ struct dw_hdmi_qp *hdmi = audio->hdmi; -+ bool ref2stream = false; -+ -+ if (fmt->bit_clk_provider | fmt->frame_clk_provider) { -+ dev_err(dev, "unsupported clock settings\n"); -+ return -EINVAL; -+ } -+ -+ if (fmt->bit_fmt == SNDRV_PCM_FORMAT_IEC958_SUBFRAME_LE) -+ ref2stream = true; -+ -+ dw_hdmi_qp_set_audio_interface(hdmi, fmt, hparms); -+ dw_hdmi_qp_set_sample_rate(hdmi, hparms->sample_rate); -+ dw_hdmi_qp_set_channel_status(hdmi, hparms->iec.status, ref2stream); -+ dw_hdmi_qp_set_channel_count(hdmi, hparms->channels); -+ dw_hdmi_qp_set_channel_allocation(hdmi, hparms->cea.channel_allocation); -+ dw_hdmi_qp_set_audio_infoframe(hdmi, hparms); ++struct kbase_pm_callback_conf pm_callbacks = { ++ .power_on_callback = pm_callback_power_on, ++ .power_off_callback = pm_callback_power_off, ++ .power_suspend_callback = NULL, ++ .power_resume_callback = NULL ++}; + -+ return 0; -+} ++static struct kbase_platform_config versatile_platform_config = { ++#ifndef CONFIG_OF ++ .io_resources = &io_resources ++#endif ++}; + -+static int dw_hdmi_qp_i2s_audio_startup(struct device *dev, void *data) ++struct kbase_platform_config *kbase_get_platform_config(void) +{ -+ struct dw_hdmi_qp_i2s_audio_data *audio = data; -+ struct dw_hdmi_qp *hdmi = audio->hdmi; -+ -+ dw_hdmi_qp_audio_enable(hdmi); -+ -+ return 0; ++ return &versatile_platform_config; +} + -+static void dw_hdmi_qp_i2s_audio_shutdown(struct device *dev, void *data) ++int kbase_platform_early_init(void) +{ -+ struct dw_hdmi_qp_i2s_audio_data *audio = data; -+ struct dw_hdmi_qp *hdmi = audio->hdmi; -+ -+ dw_hdmi_qp_audio_disable(hdmi); ++ /* Nothing needed at this stage */ ++ return 0; +} + -+static int dw_hdmi_qp_i2s_get_eld(struct device *dev, void *data, uint8_t *buf, -+ size_t len) -+{ -+ struct dw_hdmi_qp_i2s_audio_data *audio = data; +diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c +new file mode 100644 +index 000000000..816dff498 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c +@@ -0,0 +1,71 @@ ++/* ++ * ++ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ memcpy(buf, audio->eld, min_t(size_t, MAX_ELD_BYTES, len)); + -+ return 0; -+} + -+static int dw_hdmi_qp_i2s_get_dai_id(struct snd_soc_component *component, -+ struct device_node *endpoint) -+{ -+ struct of_endpoint of_ep; -+ int ret; + -+ ret = of_graph_parse_endpoint(endpoint, &of_ep); -+ if (ret < 0) -+ return ret; + -+ /* -+ * HDMI sound should be located as reg = <2> -+ * Then, it is sound port 0 -+ */ -+ if (of_ep.port == 2) -+ return 0; ++#include ++#include ++#include "mali_kbase_cpu_vexpress.h" + -+ return -EINVAL; -+} ++#define HZ_IN_MHZ (1000000) + -+static int dw_hdmi_qp_i2s_hook_plugged_cb(struct device *dev, void *data, -+ hdmi_codec_plugged_cb fn, -+ struct device *codec_dev) -+{ -+ struct dw_hdmi_qp_i2s_audio_data *audio = data; -+ struct dw_hdmi_qp *hdmi = audio->hdmi; ++#define CORETILE_EXPRESS_A9X4_SCC_START (0x100E2000) ++#define MOTHERBOARD_SYS_CFG_START (0x10000000) ++#define SYS_CFGDATA_OFFSET (0x000000A0) ++#define SYS_CFGCTRL_OFFSET (0x000000A4) ++#define SYS_CFGSTAT_OFFSET (0x000000A8) + -+ return dw_hdmi_qp_set_plugged_cb(hdmi, fn, codec_dev); -+} ++#define SYS_CFGCTRL_START_BIT_VALUE (1 << 31) ++#define READ_REG_BIT_VALUE (0 << 30) ++#define DCC_DEFAULT_BIT_VALUE (0 << 26) ++#define SYS_CFG_OSC_FUNC_BIT_VALUE (1 << 20) ++#define SITE_DEFAULT_BIT_VALUE (1 << 16) ++#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12) ++#define DEVICE_DEFAULT_BIT_VALUE (2 << 0) ++#define SYS_CFG_COMPLETE_BIT_VALUE (1 << 0) ++#define SYS_CFG_ERROR_BIT_VALUE (1 << 1) + -+static struct hdmi_codec_ops dw_hdmi_qp_i2s_ops = { -+ .hw_params = dw_hdmi_qp_i2s_hw_params, -+ .audio_startup = dw_hdmi_qp_i2s_audio_startup, -+ .audio_shutdown = dw_hdmi_qp_i2s_audio_shutdown, -+ .get_eld = dw_hdmi_qp_i2s_get_eld, -+ .get_dai_id = dw_hdmi_qp_i2s_get_dai_id, -+ .hook_plugged_cb = dw_hdmi_qp_i2s_hook_plugged_cb, -+}; ++#define FEED_REG_BIT_MASK (0x0F) ++#define FCLK_PA_DIVIDE_BIT_SHIFT (0x03) ++#define FCLK_PB_DIVIDE_BIT_SHIFT (0x07) ++#define FCLK_PC_DIVIDE_BIT_SHIFT (0x0B) ++#define AXICLK_PA_DIVIDE_BIT_SHIFT (0x0F) ++#define AXICLK_PB_DIVIDE_BIT_SHIFT (0x13) + -+static int snd_dw_hdmi_qp_probe(struct platform_device *pdev) -+{ -+ struct dw_hdmi_qp_i2s_audio_data *audio = pdev->dev.platform_data; -+ struct platform_device_info pdevinfo; -+ struct hdmi_codec_pdata pdata; -+ struct platform_device *platform; ++#define IS_SINGLE_BIT_SET(val, pos) (val&(1<dev.parent; -+ pdevinfo.id = PLATFORM_DEVID_AUTO; -+ pdevinfo.name = HDMI_CODEC_DRV_NAME; -+ pdevinfo.data = &pdata; -+ pdevinfo.size_data = sizeof(pdata); -+ pdevinfo.dma_mask = DMA_BIT_MASK(32); ++#define CPU_CLOCK_SPEED_6XV7 50 + -+ platform = platform_device_register_full(&pdevinfo); -+ if (IS_ERR(platform)) -+ return PTR_ERR(platform); ++static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED; + -+ dev_set_drvdata(&pdev->dev, platform); ++static DEFINE_RAW_SPINLOCK(syscfg_lock); ++/** ++ * kbase_get_vendor_specific_cpu_clock_speed ++ * @brief Retrieves the CPU clock speed. ++ * The implementation is platform specific. ++ * @param[out] cpu_clock - the value of CPU clock speed in MHz ++ * @return 0 on success, 1 otherwise ++*/ ++int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock) ++{ ++ /* TODO: MIDBASE-2873 - Provide runtime detection of CPU clock freq for 6XV7 board */ ++ *cpu_clock = CPU_CLOCK_SPEED_6XV7; + + return 0; +} +diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h +new file mode 100644 +index 000000000..23647ccb0 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h +@@ -0,0 +1,28 @@ ++/* ++ * ++ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+static int snd_dw_hdmi_qp_remove(struct platform_device *pdev) -+{ -+ struct platform_device *platform = dev_get_drvdata(&pdev->dev); + -+ platform_device_unregister(platform); + -+ return 0; -+} + -+static struct platform_driver snd_dw_hdmi_qp_driver = { -+ .probe = snd_dw_hdmi_qp_probe, -+ .remove = snd_dw_hdmi_qp_remove, -+ .driver = { -+ .name = DRIVER_NAME, -+ }, -+}; -+module_platform_driver(snd_dw_hdmi_qp_driver); + -+MODULE_AUTHOR("Sugar Zhang "); -+MODULE_DESCRIPTION("Synopsis Designware HDMI QP I2S ALSA SoC interface"); -+MODULE_LICENSE("GPL v2"); -+MODULE_ALIAS("platform:" DRIVER_NAME); -diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c ++#ifndef _KBASE_CPU_VEXPRESS_H_ ++#define _KBASE_CPU_VEXPRESS_H_ ++ ++/** ++ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func. ++ */ ++int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock); ++ ++#endif /* _KBASE_CPU_VEXPRESS_H_ */ +diff --git a/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h b/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h new file mode 100644 -index 000000000..f6803bc2a +index 000000000..5fa9b39c4 --- /dev/null -+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c -@@ -0,0 +1,4121 @@ -+// SPDX-License-Identifier: GPL-2.0+ ++++ b/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h +@@ -0,0 +1,53 @@ +/* -+ * Copyright (C) Rockchip Electronics Co.Ltd -+ * Author: -+ * Algea Cao ++ * ++ * (C) COPYRIGHT 2010, 2012-2014 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * + */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include + -+#include -+#include + -+#include "dw-hdmi-qp-audio.h" -+#include "dw-hdmi-qp.h" -+#include "dw-hdmi-qp-cec.h" -+#include "dw-hdmi-qp-hdcp.h" + -+#include + -+#define DDC_CI_ADDR 0x37 -+#define DDC_SEGMENT_ADDR 0x30 + -+#define HDMI_EDID_LEN 512 ++/** ++ * @file mali_ukk_os.h ++ * Types and definitions that are common for Linux OSs for the kernel side of the ++ * User-Kernel interface. ++ */ + -+/* DW-HDMI Controller >= 0x200a are at least compliant with SCDC version 1 */ -+#define SCDC_MIN_SOURCE_VERSION 0x1 ++#ifndef _UKK_OS_H_ /* Linux version */ ++#define _UKK_OS_H_ + -+#define HDMI14_MAX_TMDSCLK 340000000 -+#define HDMI20_MAX_TMDSCLK_KHZ 600000 ++#include + -+#define HDMI_VH0 0x20 -+#define HDMI_HDCP_ADDR 0x3a -+#define HDMI_BCAPS 0x40 -+#define HDMI_HDCP14_SUPPORT BIT(7) -+#define HDMI_HDCP2_VERSION 0x50 -+#define HDMI_HDCP2_SUPPORT BIT(2) ++/** ++ * @addtogroup uk_api User-Kernel Interface API ++ * @{ ++ */ + -+#define SINK_CAP_HDCP14 BIT(0) -+#define SINK_CAP_HDCP2 BIT(1) ++/** ++ * @addtogroup uk_api_kernel UKK (Kernel side) ++ * @{ ++ */ + -+#define HDMI_HDCP2_AUTH BIT(1) -+#define HDMI_HDCP14_AUTH BIT(0) ++/** ++ * Internal OS specific data structure associated with each UKK session. Part ++ * of a ukk_session object. ++ */ ++typedef struct ukkp_session { ++ int dummy; /**< No internal OS specific data at this time */ ++} ukkp_session; + -+static const unsigned int dw_hdmi_cable[] = { -+ EXTCON_DISP_HDMI, -+ EXTCON_NONE, -+}; ++/** @} end group uk_api_kernel */ + -+/* -+ * Recommended N and Expected CTS Values in FRL Mode in chapter 9.2.2 -+ * of HDMI Specification 2.1. -+ */ -+static const struct dw_hdmi_audio_frl_n common_frl_n_table[] = { -+ { .r_bit = 3, .n_32k = 4224, .n_44k1 = 5292, .n_48k = 5760, }, -+ { .r_bit = 6, .n_32k = 4032, .n_44k1 = 5292, .n_48k = 6048, }, -+ { .r_bit = 8, .n_32k = 4032, .n_44k1 = 3969, .n_48k = 6048, }, -+ { .r_bit = 10, .n_32k = 3456, .n_44k1 = 3969, .n_48k = 5184, }, -+ { .r_bit = 12, .n_32k = 3072, .n_44k1 = 3969, .n_48k = 4752, }, -+}; ++/** @} end group uk_api */ + ++#endif /* _UKK_OS_H__ */ +diff --git a/drivers/gpu/arm/midgard/protected_mode_switcher.h b/drivers/gpu/arm/midgard/protected_mode_switcher.h +new file mode 100644 +index 000000000..5dc2f3ba8 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/protected_mode_switcher.h +@@ -0,0 +1,64 @@ +/* -+ * Unless otherwise noted, entries in this table are 100% optimization. -+ * Values can be obtained from hdmi_compute_n() but that function is -+ * slow so we pre-compute values we expect to see. + * -+ * All 32k and 48k values are expected to be the same (due to the way -+ * the math works) for any rate that's an exact kHz. ++ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * + */ -+static const struct dw_hdmi_audio_tmds_n common_tmds_n_table[] = { -+ { .tmds = 25175000, .n_32k = 4096, .n_44k1 = 12854, .n_48k = 6144, }, -+ { .tmds = 25200000, .n_32k = 4096, .n_44k1 = 5656, .n_48k = 6144, }, -+ { .tmds = 27000000, .n_32k = 4096, .n_44k1 = 5488, .n_48k = 6144, }, -+ { .tmds = 28320000, .n_32k = 4096, .n_44k1 = 5586, .n_48k = 6144, }, -+ { .tmds = 30240000, .n_32k = 4096, .n_44k1 = 5642, .n_48k = 6144, }, -+ { .tmds = 31500000, .n_32k = 4096, .n_44k1 = 5600, .n_48k = 6144, }, -+ { .tmds = 32000000, .n_32k = 4096, .n_44k1 = 5733, .n_48k = 6144, }, -+ { .tmds = 33750000, .n_32k = 4096, .n_44k1 = 6272, .n_48k = 6144, }, -+ { .tmds = 36000000, .n_32k = 4096, .n_44k1 = 5684, .n_48k = 6144, }, -+ { .tmds = 40000000, .n_32k = 4096, .n_44k1 = 5733, .n_48k = 6144, }, -+ { .tmds = 49500000, .n_32k = 4096, .n_44k1 = 5488, .n_48k = 6144, }, -+ { .tmds = 50000000, .n_32k = 4096, .n_44k1 = 5292, .n_48k = 6144, }, -+ { .tmds = 54000000, .n_32k = 4096, .n_44k1 = 5684, .n_48k = 6144, }, -+ { .tmds = 65000000, .n_32k = 4096, .n_44k1 = 7056, .n_48k = 6144, }, -+ { .tmds = 68250000, .n_32k = 4096, .n_44k1 = 5376, .n_48k = 6144, }, -+ { .tmds = 71000000, .n_32k = 4096, .n_44k1 = 7056, .n_48k = 6144, }, -+ { .tmds = 72000000, .n_32k = 4096, .n_44k1 = 5635, .n_48k = 6144, }, -+ { .tmds = 73250000, .n_32k = 4096, .n_44k1 = 14112, .n_48k = 6144, }, -+ { .tmds = 74250000, .n_32k = 4096, .n_44k1 = 6272, .n_48k = 6144, }, -+ { .tmds = 75000000, .n_32k = 4096, .n_44k1 = 5880, .n_48k = 6144, }, -+ { .tmds = 78750000, .n_32k = 4096, .n_44k1 = 5600, .n_48k = 6144, }, -+ { .tmds = 78800000, .n_32k = 4096, .n_44k1 = 5292, .n_48k = 6144, }, -+ { .tmds = 79500000, .n_32k = 4096, .n_44k1 = 4704, .n_48k = 6144, }, -+ { .tmds = 83500000, .n_32k = 4096, .n_44k1 = 7056, .n_48k = 6144, }, -+ { .tmds = 85500000, .n_32k = 4096, .n_44k1 = 5488, .n_48k = 6144, }, -+ { .tmds = 88750000, .n_32k = 4096, .n_44k1 = 14112, .n_48k = 6144, }, -+ { .tmds = 97750000, .n_32k = 4096, .n_44k1 = 14112, .n_48k = 6144, }, -+ { .tmds = 101000000, .n_32k = 4096, .n_44k1 = 7056, .n_48k = 6144, }, -+ { .tmds = 106500000, .n_32k = 4096, .n_44k1 = 4704, .n_48k = 6144, }, -+ { .tmds = 108000000, .n_32k = 4096, .n_44k1 = 5684, .n_48k = 6144, }, -+ { .tmds = 115500000, .n_32k = 4096, .n_44k1 = 5712, .n_48k = 6144, }, -+ { .tmds = 119000000, .n_32k = 4096, .n_44k1 = 5544, .n_48k = 6144, }, -+ { .tmds = 135000000, .n_32k = 4096, .n_44k1 = 5488, .n_48k = 6144, }, -+ { .tmds = 146250000, .n_32k = 4096, .n_44k1 = 6272, .n_48k = 6144, }, -+ { .tmds = 148500000, .n_32k = 4096, .n_44k1 = 5488, .n_48k = 6144, }, -+ { .tmds = 154000000, .n_32k = 4096, .n_44k1 = 5544, .n_48k = 6144, }, -+ { .tmds = 162000000, .n_32k = 4096, .n_44k1 = 5684, .n_48k = 6144, }, + -+ /* For 297 MHz+ HDMI spec have some other rule for setting N */ -+ { .tmds = 297000000, .n_32k = 3073, .n_44k1 = 4704, .n_48k = 5120, }, -+ { .tmds = 594000000, .n_32k = 3073, .n_44k1 = 9408, .n_48k = 10240, }, + -+ /* End of table */ -+ { .tmds = 0, .n_32k = 0, .n_44k1 = 0, .n_48k = 0, }, -+}; + -+static const struct drm_display_mode dw_hdmi_default_modes[] = { -+ /* 16 - 1920x1080@60Hz 16:9 */ -+ { DRM_MODE("1920x1080", DRM_MODE_TYPE_DRIVER, 148500, 1920, 2008, -+ 2052, 2200, 0, 1080, 1084, 1089, 1125, 0, -+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC), -+ .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, }, -+ /* 2 - 720x480@60Hz 4:3 */ -+ { DRM_MODE("720x480", DRM_MODE_TYPE_DRIVER, 27000, 720, 736, -+ 798, 858, 0, 480, 489, 495, 525, 0, -+ DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC), -+ .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, }, -+ /* 4 - 1280x720@60Hz 16:9 */ -+ { DRM_MODE("1280x720", DRM_MODE_TYPE_DRIVER, 74250, 1280, 1390, -+ 1430, 1650, 0, 720, 725, 730, 750, 0, -+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC), -+ .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, }, -+ /* 31 - 1920x1080@50Hz 16:9 */ -+ { DRM_MODE("1920x1080", DRM_MODE_TYPE_DRIVER, 148500, 1920, 2448, -+ 2492, 2640, 0, 1080, 1084, 1089, 1125, 0, -+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC), -+ .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, }, -+ /* 19 - 1280x720@50Hz 16:9 */ -+ { DRM_MODE("1280x720", DRM_MODE_TYPE_DRIVER, 74250, 1280, 1720, -+ 1760, 1980, 0, 720, 725, 730, 750, 0, -+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC), -+ .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, }, -+ /* 17 - 720x576@50Hz 4:3 */ -+ { DRM_MODE("720x576", DRM_MODE_TYPE_DRIVER, 27000, 720, 732, -+ 796, 864, 0, 576, 581, 586, 625, 0, -+ DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC), -+ .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, }, -+ /* 2 - 720x480@60Hz 4:3 */ -+ { DRM_MODE("720x480", DRM_MODE_TYPE_DRIVER, 27000, 720, 736, -+ 798, 858, 0, 480, 489, 495, 525, 0, -+ DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC), -+ .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, }, -+}; ++#ifndef _PROTECTED_MODE_SWITCH_H_ ++#define _PROTECTED_MODE_SWITCH_H_ + -+enum frl_mask { -+ FRL_3GBPS_3LANE = 1, -+ FRL_6GBPS_3LANE, -+ FRL_6GBPS_4LANE, -+ FRL_8GBPS_4LANE, -+ FRL_10GBPS_4LANE, -+ FRL_12GBPS_4LANE, -+}; ++struct protected_mode_device; + -+struct hdmi_vmode_qp { -+ bool mdataenablepolarity; ++/** ++ * struct protected_mode_ops - Callbacks for protected mode switch operations ++ * ++ * @protected_mode_enable: Callback to enable protected mode for device ++ * @protected_mode_disable: Callback to disable protected mode for device ++ */ ++struct protected_mode_ops { ++ /** ++ * protected_mode_enable() - Enable protected mode on device ++ * @dev: The struct device ++ * ++ * Return: 0 on success, non-zero on error ++ */ ++ int (*protected_mode_enable)( ++ struct protected_mode_device *protected_dev); + -+ unsigned int previous_pixelclock; -+ unsigned long mpixelclock; -+ unsigned int mpixelrepetitioninput; -+ unsigned int mpixelrepetitionoutput; -+ unsigned long previous_tmdsclock; -+ unsigned int mtmdsclock; ++ /** ++ * protected_mode_disable() - Disable protected mode on device, and ++ * reset device ++ * @dev: The struct device ++ * ++ * Return: 0 on success, non-zero on error ++ */ ++ int (*protected_mode_disable)( ++ struct protected_mode_device *protected_dev); +}; + -+struct hdmi_qp_data_info { -+ unsigned int enc_in_bus_format; -+ unsigned int enc_out_bus_format; -+ unsigned int enc_in_encoding; -+ unsigned int enc_out_encoding; -+ unsigned int quant_range; -+ unsigned int pix_repet_factor; -+ struct hdmi_vmode_qp video_mode; -+ bool update; ++/** ++ * struct protected_mode_device - Device structure for protected mode devices ++ * ++ * @ops - Callbacks associated with this device ++ * @data - Pointer to device private data ++ * ++ * This structure should be registered with the platform device using ++ * platform_set_drvdata(). ++ */ ++struct protected_mode_device { ++ struct protected_mode_ops ops; ++ void *data; +}; + -+struct dw_hdmi_qp_i2c { -+ struct i2c_adapter adap; -+ -+ struct mutex lock; /* used to serialize data transfers */ -+ struct completion cmp; -+ u32 stat; ++#endif /* _PROTECTED_MODE_SWITCH_H_ */ +diff --git a/drivers/gpu/arm/midgard/rename.h b/drivers/gpu/arm/midgard/rename.h +new file mode 100644 +index 000000000..821866618 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/rename.h +@@ -0,0 +1,426 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _RENAME_H ++#define _RENAME_H ++#define __crc_kbase_create_context midgard___crc_kbase_create_context ++#define __crc_kbase_destroy_context midgard___crc_kbase_destroy_context ++#define __crc_kbase_find_device midgard___crc_kbase_find_device ++#define __crc_kbase_instr_hwcnt_clear midgard___crc_kbase_instr_hwcnt_clear ++#define __crc_kbase_instr_hwcnt_dump_complete midgard___crc_kbase_instr_hwcnt_dump_complete ++#define __crc_kbase_instr_hwcnt_request_dump midgard___crc_kbase_instr_hwcnt_request_dump ++#define __crc_kbase_release_device midgard___crc_kbase_release_device ++#define jd_done_nolock midgard_jd_done_nolock ++#define kbase_add_va_region midgard_kbase_add_va_region ++#define kbase_alloc_free_region midgard_kbase_alloc_free_region ++#define kbase_alloc_phy_pages_helper midgard_kbase_alloc_phy_pages_helper ++#define kbase_alloc_phy_pages midgard_kbase_alloc_phy_pages ++#define kbase_as_fault_debugfs_init midgard_kbase_as_fault_debugfs_init ++#define kbase_backend_complete_wq midgard_kbase_backend_complete_wq ++#define kbase_backend_complete_wq_post_sched midgard_kbase_backend_complete_wq_post_sched ++#define kbase_backend_ctx_count_changed midgard_kbase_backend_ctx_count_changed ++#define kbase_backend_find_and_release_free_address_space midgard_kbase_backend_find_and_release_free_address_space ++#define kbase_backend_get_current_flush_id midgard_kbase_backend_get_current_flush_id ++#define kbase_backend_get_gpu_time midgard_kbase_backend_get_gpu_time ++#define kbase_backend_gpuprops_get_features midgard_kbase_backend_gpuprops_get_features ++#define kbase_backend_gpuprops_get midgard_kbase_backend_gpuprops_get ++#define kbase_backend_inspect_tail midgard_kbase_backend_inspect_tail ++#define kbase_backend_nr_atoms_on_slot midgard_kbase_backend_nr_atoms_on_slot ++#define kbase_backend_nr_atoms_submitted midgard_kbase_backend_nr_atoms_submitted ++#define kbase_backend_release_ctx_irq midgard_kbase_backend_release_ctx_irq ++#define kbase_backend_release_ctx_noirq midgard_kbase_backend_release_ctx_noirq ++#define kbase_backend_reset midgard_kbase_backend_reset ++#define kbase_backend_run_atom midgard_kbase_backend_run_atom ++#define kbase_backend_slot_free midgard_kbase_backend_slot_free ++#define kbase_backend_slot_update midgard_kbase_backend_slot_update ++#define kbase_backend_soft_hard_stop_slot midgard_kbase_backend_soft_hard_stop_slot ++#define kbase_backend_timeouts_changed midgard_kbase_backend_timeouts_changed ++#define kbase_backend_timer_init midgard_kbase_backend_timer_init ++#define kbase_backend_timer_resume midgard_kbase_backend_timer_resume ++#define kbase_backend_timer_suspend midgard_kbase_backend_timer_suspend ++#define kbase_backend_timer_term midgard_kbase_backend_timer_term ++#define kbase_backend_use_ctx midgard_kbase_backend_use_ctx ++#define kbase_backend_use_ctx_sched midgard_kbase_backend_use_ctx_sched ++#define kbase_cache_enabled midgard_kbase_cache_enabled ++#define kbase_cache_set_coherency_mode midgard_kbase_cache_set_coherency_mode ++#define kbase_cancel_soft_job midgard_kbase_cancel_soft_job ++#define kbase_check_alloc_flags midgard_kbase_check_alloc_flags ++#define kbase_check_import_flags midgard_kbase_check_import_flags ++#define kbase_clean_caches_done midgard_kbase_clean_caches_done ++#define kbase_create_context midgard_kbase_create_context ++#define kbase_ctx_sched_init midgard_kbase_ctx_sched_init ++#define kbase_ctx_sched_release_ctx midgard_kbase_ctx_sched_release_ctx ++#define kbase_ctx_sched_remove_ctx midgard_kbase_ctx_sched_remove_ctx ++#define kbase_ctx_sched_restore_all_as midgard_kbase_ctx_sched_restore_all_as ++#define kbase_ctx_sched_retain_ctx midgard_kbase_ctx_sched_retain_ctx ++#define kbase_ctx_sched_retain_ctx_refcount midgard_kbase_ctx_sched_retain_ctx_refcount ++#define kbase_ctx_sched_term midgard_kbase_ctx_sched_term ++#define kbase_debug_assert_register_hook midgard_kbase_debug_assert_register_hook ++#define kbase_debug_job_fault_context_init midgard_kbase_debug_job_fault_context_init ++#define kbase_debug_job_fault_context_term midgard_kbase_debug_job_fault_context_term ++#define kbase_debug_job_fault_debugfs_init midgard_kbase_debug_job_fault_debugfs_init ++#define kbase_debug_job_fault_dev_init midgard_kbase_debug_job_fault_dev_init ++#define kbase_debug_job_fault_dev_term midgard_kbase_debug_job_fault_dev_term ++#define kbase_debug_job_fault_process midgard_kbase_debug_job_fault_process ++#define kbase_debug_job_fault_reg_snapshot_init midgard_kbase_debug_job_fault_reg_snapshot_init ++#define kbase_debug_mem_view_init midgard_kbase_debug_mem_view_init ++#define kbase_destroy_context midgard_kbase_destroy_context ++#define kbase_devfreq_init midgard_kbase_devfreq_init ++#define kbase_devfreq_set_core_mask midgard_kbase_devfreq_set_core_mask ++#define kbase_devfreq_term midgard_kbase_devfreq_term ++#define kbase_device_alloc midgard_kbase_device_alloc ++#define kbase_device_free midgard_kbase_device_free ++#define kbase_device_init midgard_kbase_device_init ++#define kbase_device_term midgard_kbase_device_term ++#define kbase_disjoint_event_get midgard_kbase_disjoint_event_get ++#define kbase_disjoint_event midgard_kbase_disjoint_event ++#define kbase_disjoint_event_potential midgard_kbase_disjoint_event_potential ++#define kbase_disjoint_init midgard_kbase_disjoint_init ++#define kbase_disjoint_state_down midgard_kbase_disjoint_state_down ++#define kbase_disjoint_state_up midgard_kbase_disjoint_state_up ++#define kbase_drv_name midgard_kbase_drv_name ++#define kbase_event_cleanup midgard_kbase_event_cleanup ++#define kbase_event_close midgard_kbase_event_close ++#define kbase_event_dequeue midgard_kbase_event_dequeue ++#define kbase_event_init midgard_kbase_event_init ++#define kbase_event_pending midgard_kbase_event_pending ++#define kbase_event_post midgard_kbase_event_post ++#define kbase_event_wakeup midgard_kbase_event_wakeup ++#define kbase_fence_add_callback midgard_kbase_fence_add_callback ++#define kbase_fence_free_callbacks midgard_kbase_fence_free_callbacks ++#define kbase_fence_ops midgard_kbase_fence_ops ++#define kbase_fence_out_new midgard_kbase_fence_out_new ++#define kbase_find_device midgard_kbase_find_device ++#define kbase_finish_soft_job midgard_kbase_finish_soft_job ++#define kbase_flush_mmu_wqs midgard_kbase_flush_mmu_wqs ++#define kbase_free_alloced_region midgard_kbase_free_alloced_region ++#define kbase_free_phy_pages_helper midgard_kbase_free_phy_pages_helper ++#define kbase_get_real_power midgard_kbase_get_real_power ++#define kbase_gpu_complete_hw midgard_kbase_gpu_complete_hw ++#define kbase_gpu_dump_slots midgard_kbase_gpu_dump_slots ++#define kbase_gpu_inspect midgard_kbase_gpu_inspect ++#define kbase_gpu_interrupt midgard_kbase_gpu_interrupt ++#define kbase_gpu_irq_evict midgard_kbase_gpu_irq_evict ++#define kbase_gpu_mmap midgard_kbase_gpu_mmap ++#define kbase_gpu_munmap midgard_kbase_gpu_munmap ++#define kbase_gpuprops_populate_user_buffer midgard_kbase_gpuprops_populate_user_buffer ++#define kbase_gpuprops_set_features midgard_kbase_gpuprops_set_features ++#define kbase_gpuprops_set midgard_kbase_gpuprops_set ++#define kbase_gpuprops_update_core_props_gpu_id midgard_kbase_gpuprops_update_core_props_gpu_id ++#define kbase_gpu_vm_lock midgard_kbase_gpu_vm_lock ++#define kbase_gpu_vm_unlock midgard_kbase_gpu_vm_unlock ++#define kbase_hwaccess_pm_gpu_active midgard_kbase_hwaccess_pm_gpu_active ++#define kbase_hwaccess_pm_gpu_idle midgard_kbase_hwaccess_pm_gpu_idle ++#define kbase_hwaccess_pm_halt midgard_kbase_hwaccess_pm_halt ++#define kbase_hwaccess_pm_init midgard_kbase_hwaccess_pm_init ++#define kbase_hwaccess_pm_powerup midgard_kbase_hwaccess_pm_powerup ++#define kbase_hwaccess_pm_resume midgard_kbase_hwaccess_pm_resume ++#define kbase_hwaccess_pm_suspend midgard_kbase_hwaccess_pm_suspend ++#define kbase_hwaccess_pm_term midgard_kbase_hwaccess_pm_term ++#define kbase_hw_set_features_mask midgard_kbase_hw_set_features_mask ++#define kbase_hw_set_issues_mask midgard_kbase_hw_set_issues_mask ++#define kbase_install_interrupts midgard_kbase_install_interrupts ++#define kbase_instr_backend_init midgard_kbase_instr_backend_init ++#define kbase_instr_backend_term midgard_kbase_instr_backend_term ++#define kbase_instr_hwcnt_clear midgard_kbase_instr_hwcnt_clear ++#define kbase_instr_hwcnt_disable_internal midgard_kbase_instr_hwcnt_disable_internal ++#define kbase_instr_hwcnt_dump_complete midgard_kbase_instr_hwcnt_dump_complete ++#define kbase_instr_hwcnt_enable_internal midgard_kbase_instr_hwcnt_enable_internal ++#define kbase_instr_hwcnt_request_dump midgard_kbase_instr_hwcnt_request_dump ++#define kbase_instr_hwcnt_sample_done midgard_kbase_instr_hwcnt_sample_done ++#define kbase_instr_hwcnt_wait_for_dump midgard_kbase_instr_hwcnt_wait_for_dump ++#define kbase_invoke_smc_fid midgard_kbase_invoke_smc_fid ++#define kbase_invoke_smc midgard_kbase_invoke_smc ++#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) ++#define kbase_io_history_dump midgard_kbase_io_history_dump ++#define kbase_io_history_init midgard_kbase_io_history_init ++#define kbase_io_history_term midgard_kbase_io_history_term ++#endif /* CONFIG_DEBUG_FS */ ++#define kbase_ipa_debugfs_init midgard_kbase_ipa_debugfs_init ++#define kbase_ipa_init midgard_kbase_ipa_init ++#define kbase_ipa_init_model midgard_kbase_ipa_init_model ++#define kbase_ipa_model_add_param_s32 midgard_kbase_ipa_model_add_param_s32 ++#define kbase_ipa_model_add_param_string midgard_kbase_ipa_model_add_param_string ++#define kbase_ipa_model_name_from_id midgard_kbase_ipa_model_name_from_id ++#define kbase_ipa_model_param_add midgard_kbase_ipa_model_param_add ++#define kbase_ipa_model_param_free_all midgard_kbase_ipa_model_param_free_all ++#define kbase_ipa_model_recalculate midgard_kbase_ipa_model_recalculate ++#define kbase_ipa_power_model_ops midgard_kbase_ipa_power_model_ops ++#define kbase_ipa_term midgard_kbase_ipa_term ++#define kbase_ipa_term_model midgard_kbase_ipa_term_model ++#define kbase_jd_cancel midgard_kbase_jd_cancel ++#define kbase_jd_done midgard_kbase_jd_done ++#define kbase_jd_done_worker midgard_kbase_jd_done_worker ++#define kbase_jd_exit midgard_kbase_jd_exit ++#define kbase_jd_free_external_resources midgard_kbase_jd_free_external_resources ++#define kbase_jd_init midgard_kbase_jd_init ++#define kbase_jd_submit midgard_kbase_jd_submit ++#define kbase_jd_zap_context midgard_kbase_jd_zap_context ++#define kbase_jit_allocate midgard_kbase_jit_allocate ++#define kbase_jit_backing_lost midgard_kbase_jit_backing_lost ++#define kbase_jit_debugfs_init midgard_kbase_jit_debugfs_init ++#define kbase_jit_evict midgard_kbase_jit_evict ++#define kbase_jit_free midgard_kbase_jit_free ++#define kbase_jit_init midgard_kbase_jit_init ++#define kbase_jit_term midgard_kbase_jit_term ++#define kbase_jm_complete midgard_kbase_jm_complete ++#define kbase_jm_idle_ctx midgard_kbase_jm_idle_ctx ++#define kbase_jm_kick midgard_kbase_jm_kick ++#define kbase_jm_return_atom_to_js midgard_kbase_jm_return_atom_to_js ++#define kbase_jm_try_kick_all midgard_kbase_jm_try_kick_all ++#define kbase_jm_try_kick midgard_kbase_jm_try_kick ++#define kbase_jm_wait_for_zero_jobs midgard_kbase_jm_wait_for_zero_jobs ++#define kbase_job_check_enter_disjoint midgard_kbase_job_check_enter_disjoint ++#define kbase_job_check_leave_disjoint midgard_kbase_job_check_leave_disjoint ++#define kbase_job_done midgard_kbase_job_done ++#define kbase_job_fault_get_reg_snapshot midgard_kbase_job_fault_get_reg_snapshot ++#define kbase_job_hw_submit midgard_kbase_job_hw_submit ++#define kbase_job_slot_ctx_priority_check_locked midgard_kbase_job_slot_ctx_priority_check_locked ++#define kbase_job_slot_halt midgard_kbase_job_slot_halt ++#define kbase_job_slot_hardstop midgard_kbase_job_slot_hardstop ++#define kbase_job_slot_init midgard_kbase_job_slot_init ++#define kbase_job_slot_softstop midgard_kbase_job_slot_softstop ++#define kbase_job_slot_softstop_swflags midgard_kbase_job_slot_softstop_swflags ++#define kbase_job_slot_term midgard_kbase_job_slot_term ++#define kbase_js_complete_atom midgard_kbase_js_complete_atom ++#define kbase_js_complete_atom_wq midgard_kbase_js_complete_atom_wq ++#define kbase_js_dep_resolved_submit midgard_kbase_js_dep_resolved_submit ++#define kbase_js_is_atom_valid midgard_kbase_js_is_atom_valid ++#define kbase_js_pull midgard_kbase_js_pull ++#define kbase_js_sched midgard_kbase_js_sched ++#define kbase_js_set_timeouts midgard_kbase_js_set_timeouts ++#define kbase_js_unpull midgard_kbase_js_unpull ++#define kbase_js_zap_context midgard_kbase_js_zap_context ++#define kbase_map_external_resource midgard_kbase_map_external_resource ++#define kbase_mem_alias midgard_kbase_mem_alias ++#define kbase_mem_alloc midgard_kbase_mem_alloc ++#define kbase_mem_alloc_page midgard_kbase_mem_alloc_page ++#define kbase_mem_commit midgard_kbase_mem_commit ++#define kbase_mem_evictable_deinit midgard_kbase_mem_evictable_deinit ++#define kbase_mem_evictable_init midgard_kbase_mem_evictable_init ++#define kbase_mem_evictable_make midgard_kbase_mem_evictable_make ++#define kbase_mem_evictable_unmake midgard_kbase_mem_evictable_unmake ++#define kbase_mem_flags_change midgard_kbase_mem_flags_change ++#define kbase_mem_free midgard_kbase_mem_free ++#define kbase_mem_free_region midgard_kbase_mem_free_region ++#define kbase_mem_grow_gpu_mapping midgard_kbase_mem_grow_gpu_mapping ++#define kbase_mem_halt midgard_kbase_mem_halt ++#define kbase_mem_import midgard_kbase_mem_import ++#define kbase_mem_init midgard_kbase_mem_init ++#define kbase_mem_kref_free midgard_kbase_mem_kref_free ++#define kbase_mem_pool_alloc midgard_kbase_mem_pool_alloc ++#define kbase_mem_pool_alloc_pages midgard_kbase_mem_pool_alloc_pages ++#define kbase_mem_pool_debugfs_init midgard_kbase_mem_pool_debugfs_init ++#define kbase_mem_pool_free midgard_kbase_mem_pool_free ++#define kbase_mem_pool_free_pages midgard_kbase_mem_pool_free_pages ++#define kbase_mem_pool_grow midgard_kbase_mem_pool_grow ++#define kbase_mem_pool_init midgard_kbase_mem_pool_init ++#define kbase_mem_pool_set_max_size midgard_kbase_mem_pool_set_max_size ++#define kbase_mem_pool_term midgard_kbase_mem_pool_term ++#define kbase_mem_pool_trim midgard_kbase_mem_pool_trim ++#define kbase_mem_query midgard_kbase_mem_query ++#define kbase_mem_term midgard_kbase_mem_term ++#define kbase_mmu_disable_as midgard_kbase_mmu_disable_as ++#define kbase_mmu_disable midgard_kbase_mmu_disable ++#define kbase_mmu_dump midgard_kbase_mmu_dump ++#define kbase_mmu_hw_clear_fault midgard_kbase_mmu_hw_clear_fault ++#define kbase_mmu_hw_configure midgard_kbase_mmu_hw_configure ++#define kbase_mmu_hw_do_operation midgard_kbase_mmu_hw_do_operation ++#define kbase_mmu_hw_enable_fault midgard_kbase_mmu_hw_enable_fault ++#define kbase_mmu_init midgard_kbase_mmu_init ++#define kbase_mmu_insert_pages midgard_kbase_mmu_insert_pages ++#define kbase_mmu_insert_pages_no_flush midgard_kbase_mmu_insert_pages_no_flush ++#define kbase_mmu_insert_single_page midgard_kbase_mmu_insert_single_page ++#define kbase_mmu_interrupt midgard_kbase_mmu_interrupt ++#define kbase_mmu_mode_get_aarch64 midgard_kbase_mmu_mode_get_aarch64 ++#define kbase_mmu_mode_get_lpae midgard_kbase_mmu_mode_get_lpae ++#define kbase_mmu_teardown_pages midgard_kbase_mmu_teardown_pages ++#define kbase_mmu_term midgard_kbase_mmu_term ++#define kbase_mmu_update midgard_kbase_mmu_update ++#define kbase_mmu_update_pages midgard_kbase_mmu_update_pages ++#define kbase_os_mem_map_lock midgard_kbase_os_mem_map_lock ++#define kbase_os_mem_map_unlock midgard_kbase_os_mem_map_unlock ++#define kbasep_cache_clean_worker midgard_kbasep_cache_clean_worker ++#define kbasep_common_test_interrupt_handlers midgard_kbasep_common_test_interrupt_handlers ++#define kbasep_complete_triggered_soft_events midgard_kbasep_complete_triggered_soft_events ++#define kbasep_debug_assert_call_hook midgard_kbasep_debug_assert_call_hook ++#define kbasep_find_enclosing_cpu_mapping_offset midgard_kbasep_find_enclosing_cpu_mapping_offset ++#define kbasep_gpu_memory_debugfs_init midgard_kbasep_gpu_memory_debugfs_init ++#define kbasep_jd_debugfs_ctx_init midgard_kbasep_jd_debugfs_ctx_init ++#define kbasep_job_slot_soft_or_hard_stop_do_action midgard_kbasep_job_slot_soft_or_hard_stop_do_action ++#define kbasep_js_add_job midgard_kbasep_js_add_job ++#define kbasep_js_atom_priority_to_relative midgard_kbasep_js_atom_priority_to_relative ++#define kbasep_js_ctx_attr_ctx_release_atom midgard_kbasep_js_ctx_attr_ctx_release_atom ++#define kbasep_js_ctx_attr_ctx_retain_atom midgard_kbasep_js_ctx_attr_ctx_retain_atom ++#define kbasep_js_ctx_attr_runpool_release_ctx midgard_kbasep_js_ctx_attr_runpool_release_ctx ++#define kbasep_js_ctx_attr_runpool_retain_ctx midgard_kbasep_js_ctx_attr_runpool_retain_ctx ++#define kbasep_js_devdata_halt midgard_kbasep_js_devdata_halt ++#define kbasep_js_devdata_init midgard_kbasep_js_devdata_init ++#define kbasep_js_devdata_term midgard_kbasep_js_devdata_term ++#define kbasep_js_kctx_init midgard_kbasep_js_kctx_init ++#define kbasep_js_kctx_term midgard_kbasep_js_kctx_term ++#define kbasep_js_relative_priority_to_atom midgard_kbasep_js_relative_priority_to_atom ++#define kbasep_js_release_privileged_ctx midgard_kbasep_js_release_privileged_ctx ++#define kbasep_js_remove_cancelled_job midgard_kbasep_js_remove_cancelled_job ++#define kbasep_js_remove_job midgard_kbasep_js_remove_job ++#define kbasep_js_resume midgard_kbasep_js_resume ++#define kbasep_js_runpool_release_ctx_and_katom_retained_state midgard_kbasep_js_runpool_release_ctx_and_katom_retained_state ++#define kbasep_js_runpool_release_ctx midgard_kbasep_js_runpool_release_ctx ++#define kbasep_js_runpool_release_ctx_nolock midgard_kbasep_js_runpool_release_ctx_nolock ++#define kbasep_js_runpool_requeue_or_kill_ctx midgard_kbasep_js_runpool_requeue_or_kill_ctx ++#define kbasep_js_schedule_privileged_ctx midgard_kbasep_js_schedule_privileged_ctx ++#define kbasep_js_suspend midgard_kbasep_js_suspend ++#define kbase_platform_early_init midgard_kbase_platform_early_init ++#define kbase_platform_rk_init_opp_table midgard_kbase_platform_rk_init_opp_table ++#define kbase_platform_rk_shutdown midgard_kbase_platform_rk_shutdown ++#define kbase_pm_always_on_policy_ops midgard_kbase_pm_always_on_policy_ops ++#define kbase_pm_cache_snoop_disable midgard_kbase_pm_cache_snoop_disable ++#define kbase_pm_cache_snoop_enable midgard_kbase_pm_cache_snoop_enable ++#define kbase_pm_ca_get_core_mask midgard_kbase_pm_ca_get_core_mask ++#define kbase_pm_ca_init midgard_kbase_pm_ca_init ++#define kbase_pm_ca_term midgard_kbase_pm_ca_term ++#define kbase_pm_clock_off midgard_kbase_pm_clock_off ++#define kbase_pm_clock_on midgard_kbase_pm_clock_on ++#define kbase_pm_coarse_demand_policy_ops midgard_kbase_pm_coarse_demand_policy_ops ++#define kbase_pm_context_active_handle_suspend midgard_kbase_pm_context_active_handle_suspend ++#define kbase_pm_context_active midgard_kbase_pm_context_active ++#define kbase_pm_context_idle midgard_kbase_pm_context_idle ++#define kbase_pm_disable_interrupts midgard_kbase_pm_disable_interrupts ++#define kbase_pm_disable_interrupts_nolock midgard_kbase_pm_disable_interrupts_nolock ++#define kbase_pm_do_poweroff midgard_kbase_pm_do_poweroff ++#define kbase_pm_do_poweron midgard_kbase_pm_do_poweron ++#define kbasep_mem_profile_debugfs_insert midgard_kbasep_mem_profile_debugfs_insert ++#define kbasep_mem_profile_debugfs_remove midgard_kbasep_mem_profile_debugfs_remove ++#define kbase_pm_enable_interrupts midgard_kbase_pm_enable_interrupts ++#define kbase_pm_get_active_cores midgard_kbase_pm_get_active_cores ++#define kbase_pm_get_policy midgard_kbase_pm_get_policy ++#define kbase_pm_get_present_cores midgard_kbase_pm_get_present_cores ++#define kbase_pm_get_ready_cores midgard_kbase_pm_get_ready_cores ++#define kbase_pm_get_trans_cores midgard_kbase_pm_get_trans_cores ++#define kbase_pm_halt midgard_kbase_pm_halt ++#define kbase_pm_init_hw midgard_kbase_pm_init_hw ++#define kbase_pm_list_policies midgard_kbase_pm_list_policies ++#define kbase_pm_metrics_update midgard_kbase_pm_metrics_update ++#define kbase_pm_policy_init midgard_kbase_pm_policy_init ++#define kbase_pm_policy_term midgard_kbase_pm_policy_term ++#define kbase_pm_power_changed midgard_kbase_pm_power_changed ++#define kbase_pm_powerup midgard_kbase_pm_powerup ++#define kbase_pm_register_access_disable midgard_kbase_pm_register_access_disable ++#define kbase_pm_register_access_enable midgard_kbase_pm_register_access_enable ++#define kbase_pm_release_gpu_cycle_counter midgard_kbase_pm_release_gpu_cycle_counter ++#define kbase_pm_release_gpu_cycle_counter_nolock midgard_kbase_pm_release_gpu_cycle_counter_nolock ++#define kbase_pm_request_gpu_cycle_counter_l2_is_on midgard_kbase_pm_request_gpu_cycle_counter_l2_is_on ++#define kbase_pm_request_gpu_cycle_counter midgard_kbase_pm_request_gpu_cycle_counter ++#define kbase_pm_reset_done midgard_kbase_pm_reset_done ++#define kbase_pm_resume midgard_kbase_pm_resume ++#define kbase_pm_set_debug_core_mask midgard_kbase_pm_set_debug_core_mask ++#define kbase_pm_set_policy midgard_kbase_pm_set_policy ++#define kbase_pm_suspend midgard_kbase_pm_suspend ++#define kbase_pm_update_active midgard_kbase_pm_update_active ++#define kbase_pm_update_cores_state midgard_kbase_pm_update_cores_state ++#define kbase_pm_update_cores_state_nolock midgard_kbase_pm_update_cores_state_nolock ++#define kbase_pm_wait_for_poweroff_complete midgard_kbase_pm_wait_for_poweroff_complete ++#define kbasep_os_process_page_usage_update midgard_kbasep_os_process_page_usage_update ++#define kbasep_platform_device_init midgard_kbasep_platform_device_init ++#define kbasep_platform_device_term midgard_kbasep_platform_device_term ++#define kbasep_pm_metrics_init midgard_kbasep_pm_metrics_init ++#define kbasep_pm_metrics_term midgard_kbasep_pm_metrics_term ++#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) ++#define kbasep_regs_history_debugfs_init midgard_kbasep_regs_history_debugfs_init ++#endif /* CONFIG_DEBUG_FS */ ++#define kbasep_remove_waiting_soft_job midgard_kbasep_remove_waiting_soft_job ++#define kbase_prepare_soft_job midgard_kbase_prepare_soft_job ++#define kbase_prepare_to_reset_gpu_locked midgard_kbase_prepare_to_reset_gpu_locked ++#define kbase_prepare_to_reset_gpu midgard_kbase_prepare_to_reset_gpu ++#define kbase_process_soft_job midgard_kbase_process_soft_job ++#define kbasep_soft_job_timeout_worker midgard_kbasep_soft_job_timeout_worker ++#define kbase_region_tracker_find_region_base_address midgard_kbase_region_tracker_find_region_base_address ++#define kbase_region_tracker_find_region_enclosing_address midgard_kbase_region_tracker_find_region_enclosing_address ++#define kbase_region_tracker_init_jit midgard_kbase_region_tracker_init_jit ++#define kbase_region_tracker_init midgard_kbase_region_tracker_init ++#define kbase_region_tracker_term midgard_kbase_region_tracker_term ++#define kbase_reg_read midgard_kbase_reg_read ++#define kbase_reg_write midgard_kbase_reg_write ++#define kbase_release_device midgard_kbase_release_device ++#define kbase_release_interrupts midgard_kbase_release_interrupts ++#define kbase_reset_gpu_locked midgard_kbase_reset_gpu_locked ++#define kbase_reset_gpu midgard_kbase_reset_gpu ++#define kbase_reset_gpu_silent midgard_kbase_reset_gpu_silent ++#define kbase_resume_suspended_soft_jobs midgard_kbase_resume_suspended_soft_jobs ++#define kbase_scale_static_power midgard_kbase_scale_static_power ++#define kbase_set_custom_irq_handler midgard_kbase_set_custom_irq_handler ++#define kbase_simple_ipa_model_ops midgard_kbase_simple_ipa_model_ops ++#define kbase_soft_event_update midgard_kbase_soft_event_update ++#define kbase_soft_event_wait_callback midgard_kbase_soft_event_wait_callback ++#define kbase_sticky_resource_acquire midgard_kbase_sticky_resource_acquire ++#define kbase_sticky_resource_init midgard_kbase_sticky_resource_init ++#define kbase_sticky_resource_release midgard_kbase_sticky_resource_release ++#define kbase_sticky_resource_term midgard_kbase_sticky_resource_term ++#define kbase_sync_fence_in_cancel_wait midgard_kbase_sync_fence_in_cancel_wait ++#define kbase_sync_fence_in_dump midgard_kbase_sync_fence_in_dump ++#define kbase_sync_fence_in_from_fd midgard_kbase_sync_fence_in_from_fd ++#define kbase_sync_fence_in_info_get midgard_kbase_sync_fence_in_info_get ++#define kbase_sync_fence_in_remove midgard_kbase_sync_fence_in_remove ++#define kbase_sync_fence_in_wait midgard_kbase_sync_fence_in_wait ++#define kbase_sync_fence_out_create midgard_kbase_sync_fence_out_create ++#define kbase_sync_fence_out_info_get midgard_kbase_sync_fence_out_info_get ++#define kbase_sync_fence_out_remove midgard_kbase_sync_fence_out_remove ++#define kbase_sync_fence_out_trigger midgard_kbase_sync_fence_out_trigger ++#define kbase_sync_fence_stream_create midgard_kbase_sync_fence_stream_create ++#define kbase_sync_fence_validate midgard_kbase_sync_fence_validate ++#define kbase_sync_fence_wait_worker midgard_kbase_sync_fence_wait_worker ++#define kbase_synchronize_irqs midgard_kbase_synchronize_irqs ++#define kbase_sync_now midgard_kbase_sync_now ++#define kbase_sync_single_for_cpu midgard_kbase_sync_single_for_cpu ++#define kbase_sync_single_for_device midgard_kbase_sync_single_for_device ++#define kbase_sync_single midgard_kbase_sync_single ++#define kbase_sync_status_string midgard_kbase_sync_status_string ++#define kbase_timeline_name midgard_kbase_timeline_name ++#define __kbase_tlstream_aux_devfreq_target midgard___kbase_tlstream_aux_devfreq_target ++#define __kbase_tlstream_aux_pagefault midgard___kbase_tlstream_aux_pagefault ++#define __kbase_tlstream_aux_pagesalloc midgard___kbase_tlstream_aux_pagesalloc ++#define __kbase_tlstream_aux_pm_state midgard___kbase_tlstream_aux_pm_state ++#define __kbase_tlstream_aux_protected_enter_end midgard___kbase_tlstream_aux_protected_enter_end ++#define __kbase_tlstream_aux_protected_enter_start midgard___kbase_tlstream_aux_protected_enter_start ++#define __kbase_tlstream_aux_protected_leave_end midgard___kbase_tlstream_aux_protected_leave_end ++#define __kbase_tlstream_aux_protected_leave_start midgard___kbase_tlstream_aux_protected_leave_start ++#define kbase_tlstream_init midgard_kbase_tlstream_init ++#define __kbase_tlstream_jd_gpu_soft_reset midgard___kbase_tlstream_jd_gpu_soft_reset ++#define kbase_tlstream_term midgard_kbase_tlstream_term ++#define __kbase_tlstream_tl_attrib_as_config midgard___kbase_tlstream_tl_attrib_as_config ++#define __kbase_tlstream_tl_attrib_atom_config midgard___kbase_tlstream_tl_attrib_atom_config ++#define __kbase_tlstream_tl_attrib_atom_jit midgard___kbase_tlstream_tl_attrib_atom_jit ++#define __kbase_tlstream_tl_attrib_atom_priority midgard___kbase_tlstream_tl_attrib_atom_priority ++#define __kbase_tlstream_tl_attrib_atom_state midgard___kbase_tlstream_tl_attrib_atom_state ++#define __kbase_tlstream_tl_del_atom midgard___kbase_tlstream_tl_del_atom ++#define __kbase_tlstream_tl_del_ctx midgard___kbase_tlstream_tl_del_ctx ++#define __kbase_tlstream_tl_event_atom_softstop_ex midgard___kbase_tlstream_tl_event_atom_softstop_ex ++#define __kbase_tlstream_tl_event_atom_softstop_issue midgard___kbase_tlstream_tl_event_atom_softstop_issue ++#define __kbase_tlstream_tl_event_lpu_softstop midgard___kbase_tlstream_tl_event_lpu_softstop ++#define __kbase_tlstream_tl_new_atom midgard___kbase_tlstream_tl_new_atom ++#define __kbase_tlstream_tl_new_ctx midgard___kbase_tlstream_tl_new_ctx ++#define __kbase_tlstream_tl_nret_as_ctx midgard___kbase_tlstream_tl_nret_as_ctx ++#define __kbase_tlstream_tl_nret_atom_as midgard___kbase_tlstream_tl_nret_atom_as ++#define __kbase_tlstream_tl_nret_atom_ctx midgard___kbase_tlstream_tl_nret_atom_ctx ++#define __kbase_tlstream_tl_nret_atom_lpu midgard___kbase_tlstream_tl_nret_atom_lpu ++#define __kbase_tlstream_tl_nret_ctx_lpu midgard___kbase_tlstream_tl_nret_ctx_lpu ++#define __kbase_tlstream_tl_ret_as_ctx midgard___kbase_tlstream_tl_ret_as_ctx ++#define __kbase_tlstream_tl_ret_atom_as midgard___kbase_tlstream_tl_ret_atom_as ++#define __kbase_tlstream_tl_ret_atom_ctx midgard___kbase_tlstream_tl_ret_atom_ctx ++#define __kbase_tlstream_tl_ret_atom_lpu midgard___kbase_tlstream_tl_ret_atom_lpu ++#define __kbase_tlstream_tl_ret_ctx_lpu midgard___kbase_tlstream_tl_ret_ctx_lpu ++#define kbase_unmap_external_resource midgard_kbase_unmap_external_resource ++#define kbase_update_region_flags midgard_kbase_update_region_flags ++#define kbase_vinstr_hwcnt_reader_setup midgard_kbase_vinstr_hwcnt_reader_setup ++#define kbase_vinstr_init midgard_kbase_vinstr_init ++#define kbase_vinstr_resume midgard_kbase_vinstr_resume ++#define kbase_vinstr_suspend midgard_kbase_vinstr_suspend ++#define kbase_vinstr_term midgard_kbase_vinstr_term ++#define kbase_vmap midgard_kbase_vmap ++#define kbase_vmap_prot midgard_kbase_vmap_prot ++#define kbase_vm_ops midgard_kbase_vm_ops ++#define kbase_vunmap midgard_kbase_vunmap ++#define _mali_profiling_control midgard__mali_profiling_control ++#define platform_funcs midgard_platform_funcs ++#define pm_callbacks midgard_pm_callbacks ++#define rk_kbase_device_runtime_disable midgard_rk_kbase_device_runtime_disable ++#define rk_kbase_device_runtime_init midgard_rk_kbase_device_runtime_init ++#endif +diff --git a/drivers/gpu/arm/midgard/sconscript b/drivers/gpu/arm/midgard/sconscript +new file mode 100755 +index 000000000..ff23d7aeb +--- /dev/null ++++ b/drivers/gpu/arm/midgard/sconscript +@@ -0,0 +1,92 @@ ++# ++# (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+ u8 slave_reg; -+ bool is_regaddr; -+ bool is_segment; + -+ unsigned int scl_high_ns; -+ unsigned int scl_low_ns; -+}; ++import sys ++Import('env') + -+struct dw_hdmi_phy_data { -+ enum dw_hdmi_phy_type type; -+ const char *name; -+ unsigned int gen; -+ bool has_svsret; -+ int (*configure)(struct dw_hdmi_qp *hdmi, -+ const struct dw_hdmi_plat_data *pdata, -+ unsigned long mpixelclock); -+}; ++SConscript( 'tests/sconscript' ) + -+struct dw_hdmi_qp { -+ struct drm_connector connector; -+ struct drm_bridge bridge; -+ struct drm_bridge *next_bridge; -+ struct drm_panel *panel; -+ struct platform_device *hdcp_dev; -+ struct platform_device *audio; -+ struct platform_device *cec; -+ struct device *dev; -+ struct dw_hdmi_qp_i2c *i2c; ++mock_test = 0 + -+ struct hdmi_qp_data_info hdmi_data; -+ const struct dw_hdmi_plat_data *plat_data; -+ struct dw_qp_hdcp *hdcp; -+ int vic; -+ int main_irq; -+ int avp_irq; -+ int earc_irq; ++# Fake platform is a transient solution for GPL drivers running in kernel that does not provide configuration via platform data. ++# For such kernels fake_platform_device should be set to 1. For kernels providing platform data fake_platform_device should be set to 0. ++if env['platform_config']=='devicetree' or env['platform_config']=='juno_soc': ++ fake_platform_device = 0 ++else: ++ fake_platform_device = 1 + -+ u8 edid[HDMI_EDID_LEN]; ++# Source files required for kbase. ++kbase_src = [ ++ Glob('*.c'), ++ Glob('backend/*/*.c'), ++ Glob('internal/*/*.c'), ++ Glob('ipa/*.c') ++] + -+ struct { -+ const struct dw_hdmi_qp_phy_ops *ops; -+ const char *name; -+ void *data; -+ bool enabled; -+ } phy; ++if env['platform_config']=='juno_soc': ++ kbase_src += [Glob('platform/devicetree/*.c')] ++else: ++ kbase_src += [Glob('platform/%s/*.c' % env['platform_config'])] + -+ struct drm_display_mode previous_mode; ++if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1': ++ kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')] ++ mock_test = 1 + -+ struct i2c_adapter *ddc; -+ void __iomem *regs; -+ void __iomem *hdcp14_mem; -+ bool sink_is_hdmi; -+ bool sink_has_audio; -+ bool dclk_en; -+ bool frl_switch; -+ bool cec_enable; -+ bool allm_enable; -+ bool support_hdmi; -+ bool skip_connector; -+ int force_output; -+ int vp_id; -+ int old_vp_id; ++# we need platform config for GPL version using fake platform ++if fake_platform_device==1: ++ # Check if we are compiling for PBX ++ if env.KernelConfigEnabled("CONFIG_MACH_REALVIEW_PBX") and \ ++ env["platform_config"] in {"vexpress", "vexpress_6xvirtex7_10mhz"}: ++ sys.stderr.write("WARNING: Building for a PBX kernel but with platform_config=vexpress*\n") ++ # if the file platform config file is in the tpip directory then use that, otherwise use the default config directory ++ if Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config'])): ++ kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config'])) ++ else: ++ kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/*%s.c' % (env['platform_config'])) + -+ struct mutex mutex; /* for state below and previous_mode */ -+ struct drm_connector *curr_conn;/* current connector (only valid when !disabled) */ -+ enum drm_connector_force force; /* mutex-protected force state */ -+ bool disabled; /* DRM has disabled our bridge */ -+ bool bridge_is_on; /* indicates the bridge is on */ -+ bool rxsense; /* rxsense state */ -+ u8 phy_mask; /* desired phy int mask settings */ -+ u8 mc_clkdis; /* clock disable register */ -+ u8 hdcp_caps; -+ u8 hdcp_status; ++make_args = env.kernel_get_config_defines(ret_list = True, ++ fake = fake_platform_device) + [ ++ 'PLATFORM=%s' % env['platform'], ++ 'MALI_ERROR_INJECT_ON=%s' % env['error_inject'], ++ 'MALI_KERNEL_TEST_API=%s' % env['debug'], ++ 'MALI_UNIT_TEST=%s' % env['unit'], ++ 'MALI_RELEASE_NAME=%s' % env['mali_release_name'], ++ 'MALI_MOCK_TEST=%s' % mock_test, ++ 'MALI_CUSTOMER_RELEASE=%s' % env['release'], ++ 'MALI_INSTRUMENTATION_LEVEL=%s' % env['instr'], ++ 'MALI_COVERAGE=%s' % env['coverage'], ++ 'MALI_BUS_LOG=%s' % env['buslog'] ++] + -+ bool update; -+ bool hdr2sdr; ++kbase = env.BuildKernelModule('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src, ++ make_args = make_args) + -+ u32 scdc_intr; -+ u32 flt_intr; -+ u32 earc_intr; ++# Add a dependency on kds.ko. ++# Only necessary when KDS is not built into the kernel. ++# ++if env['os'] != 'android': ++ if not env.KernelConfigEnabled("CONFIG_KDS"): ++ env.Depends(kbase, '$STATIC_LIB_PATH/kds.ko') + -+ struct mutex audio_mutex; -+ unsigned int sample_rate; -+ unsigned int audio_cts; -+ unsigned int audio_n; -+ bool audio_enable; -+ void (*enable_audio)(struct dw_hdmi_qp *hdmi); -+ void (*disable_audio)(struct dw_hdmi_qp *hdmi); ++# need Module.symvers from ump.ko build ++if int(env['ump']) == 1: ++ env.Depends(kbase, '$STATIC_LIB_PATH/ump.ko') + -+ struct dentry *debugfs_dir; -+ bool scramble_low_rates; ++if 'smc_protected_mode_switcher' in env: ++ env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/smc_protected_mode_switcher.ko') + -+ struct extcon_dev *extcon; ++env.KernelObjTarget('kbase', kbase) + -+ struct regmap *regm; ++env.AppendUnique(BASE=['cutils_linked_list']) +diff --git a/drivers/gpu/arm/midgard/tests/Kbuild b/drivers/gpu/arm/midgard/tests/Kbuild +new file mode 100755 +index 000000000..b4bed0473 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/Kbuild +@@ -0,0 +1,17 @@ ++# ++# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+ bool initialized; /* hdmi is enabled before bind */ -+ bool logo_plug_out; /* hdmi is plug out when kernel logo */ -+ struct completion flt_cmp; -+ struct completion earc_cmp; + -+ struct cec_notifier *cec_notifier; -+ struct cec_adapter *cec_adap; -+ struct mutex cec_notifier_mutex; ++obj-$(CONFIG_MALI_KUTF) += kutf/ ++obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test/ +diff --git a/drivers/gpu/arm/midgard/tests/Kconfig b/drivers/gpu/arm/midgard/tests/Kconfig +new file mode 100644 +index 000000000..da0515c06 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/Kconfig +@@ -0,0 +1,17 @@ ++# ++# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+ hdmi_codec_plugged_cb plugged_cb; -+ struct device *codec_dev; -+ enum drm_connector_status last_connector_result; -+}; + -+static inline void hdmi_writel(struct dw_hdmi_qp *hdmi, u32 val, int offset) -+{ -+ regmap_write(hdmi->regm, offset, val); -+} ++source "drivers/gpu/arm/midgard/tests/kutf/Kconfig" ++source "drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig" +diff --git a/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h +new file mode 100644 +index 000000000..0d145e42a +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h +@@ -0,0 +1,65 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+static inline u32 hdmi_readl(struct dw_hdmi_qp *hdmi, int offset) -+{ -+ unsigned int val = 0; + -+ regmap_read(hdmi->regm, offset, &val); + -+ return val; -+} ++#ifndef _KERNEL_UTF_MEM_H_ ++#define _KERNEL_UTF_MEM_H_ + -+static void handle_plugged_change(struct dw_hdmi_qp *hdmi, bool plugged) -+{ -+ if (hdmi->plugged_cb && hdmi->codec_dev) -+ hdmi->plugged_cb(hdmi->codec_dev, plugged); -+} ++/* kutf_mem.h ++ * Functions for management of memory pools in the kernel. ++ * ++ * This module implements a memory pool allocator, allowing a test ++ * implementation to allocate linked allocations which can then be freed by a ++ * single free which releases all of the resources held by the entire pool. ++ * ++ * Note that it is not possible to free single resources within the pool once ++ * allocated. ++ */ + -+int dw_hdmi_qp_set_plugged_cb(struct dw_hdmi_qp *hdmi, hdmi_codec_plugged_cb fn, -+ struct device *codec_dev) -+{ -+ bool plugged; ++#include + -+ mutex_lock(&hdmi->mutex); -+ hdmi->plugged_cb = fn; -+ hdmi->codec_dev = codec_dev; -+ plugged = hdmi->last_connector_result == connector_status_connected; -+ handle_plugged_change(hdmi, plugged); -+ mutex_unlock(&hdmi->mutex); ++/** ++ * struct kutf_mempool - the memory pool context management structure ++ * @head: list head on which the allocations in this context are added to ++ * ++ */ ++struct kutf_mempool { ++ struct list_head head; ++}; + -+ return 0; -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_set_plugged_cb); ++/** ++ * kutf_mempool_init() - Initialize a memory pool. ++ * @pool: Memory pool structure to initialize, provided by the user ++ * ++ * Return: zero on success ++ */ ++int kutf_mempool_init(struct kutf_mempool *pool); + -+static void hdmi_modb(struct dw_hdmi_qp *hdmi, u32 data, u32 mask, u32 reg) -+{ -+ regmap_update_bits(hdmi->regm, reg, mask, data); -+} ++/** ++ * kutf_mempool_alloc() - Allocate memory from a pool ++ * @pool: Memory pool to allocate from ++ * @size: Size of memory wanted in number of bytes ++ * ++ * Return: Pointer to memory on success, NULL on failure. ++ */ ++void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size); + -+static void hdmi_set_cts_n(struct dw_hdmi_qp *hdmi, unsigned int cts, -+ unsigned int n) -+{ -+ /* Set N */ -+ hdmi_modb(hdmi, n, AUDPKT_ACR_N_VALUE, AUDPKT_ACR_CONTROL0); ++/** ++ * kutf_mempool_destroy() - Destroy a memory pool, freeing all memory within it. ++ * @pool: The memory pool to free ++ */ ++void kutf_mempool_destroy(struct kutf_mempool *pool); ++#endif /* _KERNEL_UTF_MEM_H_ */ +diff --git a/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h +new file mode 100644 +index 000000000..1cc85f1b7 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h +@@ -0,0 +1,121 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ /* Set CTS */ -+ if (cts) -+ hdmi_modb(hdmi, AUDPKT_ACR_CTS_OVR_EN, AUDPKT_ACR_CTS_OVR_EN_MSK, -+ AUDPKT_ACR_CONTROL1); -+ else -+ hdmi_modb(hdmi, 0, AUDPKT_ACR_CTS_OVR_EN_MSK, -+ AUDPKT_ACR_CONTROL1); + -+ hdmi_modb(hdmi, AUDPKT_ACR_CTS_OVR_VAL(cts), AUDPKT_ACR_CTS_OVR_VAL_MSK, -+ AUDPKT_ACR_CONTROL1); -+} + -+static int hdmi_match_frl_n_table(struct dw_hdmi_qp *hdmi, -+ unsigned long r_bit, -+ unsigned long freq) -+{ -+ const struct dw_hdmi_audio_frl_n *frl_n = NULL; -+ int i = 0, n = 0; ++#ifndef _KERNEL_UTF_RESULTSET_H_ ++#define _KERNEL_UTF_RESULTSET_H_ + -+ for (i = 0; ARRAY_SIZE(common_frl_n_table); i++) { -+ if (r_bit == common_frl_n_table[i].r_bit) { -+ frl_n = &common_frl_n_table[i]; -+ break; -+ } -+ } ++/* kutf_resultset.h ++ * Functions and structures for handling test results and result sets. ++ * ++ * This section of the kernel UTF contains structures and functions used for the ++ * management of Results and Result Sets. ++ */ + -+ if (!frl_n) -+ goto err; ++/** ++ * enum kutf_result_status - Status values for a single Test error. ++ * @KUTF_RESULT_BENCHMARK: Result is a meta-result containing benchmark ++ * results. ++ * @KUTF_RESULT_SKIP: The test was skipped. ++ * @KUTF_RESULT_UNKNOWN: The test has an unknown result. ++ * @KUTF_RESULT_PASS: The test result passed. ++ * @KUTF_RESULT_DEBUG: The test result passed, but raised a debug ++ * message. ++ * @KUTF_RESULT_INFO: The test result passed, but raised ++ * an informative message. ++ * @KUTF_RESULT_WARN: The test result passed, but raised a warning ++ * message. ++ * @KUTF_RESULT_FAIL: The test result failed with a non-fatal error. ++ * @KUTF_RESULT_FATAL: The test result failed with a fatal error. ++ * @KUTF_RESULT_ABORT: The test result failed due to a non-UTF ++ * assertion failure. ++ * @KUTF_RESULT_COUNT: The current number of possible status messages. ++ */ ++enum kutf_result_status { ++ KUTF_RESULT_BENCHMARK = -3, ++ KUTF_RESULT_SKIP = -2, ++ KUTF_RESULT_UNKNOWN = -1, + -+ switch (freq) { -+ case 32000: -+ case 64000: -+ case 128000: -+ n = (freq / 32000) * frl_n->n_32k; -+ break; -+ case 44100: -+ case 88200: -+ case 176400: -+ n = (freq / 44100) * frl_n->n_44k1; -+ break; -+ case 48000: -+ case 96000: -+ case 192000: -+ n = (freq / 48000) * frl_n->n_48k; -+ break; -+ default: -+ goto err; -+ } ++ KUTF_RESULT_PASS = 0, ++ KUTF_RESULT_DEBUG = 1, ++ KUTF_RESULT_INFO = 2, ++ KUTF_RESULT_WARN = 3, ++ KUTF_RESULT_FAIL = 4, ++ KUTF_RESULT_FATAL = 5, ++ KUTF_RESULT_ABORT = 6, + -+ return n; -+err: -+ dev_err(hdmi->dev, "FRL; unexpected Rbit: %lu Gbps\n", r_bit); ++ KUTF_RESULT_COUNT ++}; + -+ return 0; -+} ++/* The maximum size of a kutf_result_status result when ++ * converted to a string ++ */ ++#define KUTF_ERROR_MAX_NAME_SIZE 21 + -+static int hdmi_match_tmds_n_table(struct dw_hdmi_qp *hdmi, -+ unsigned long pixel_clk, -+ unsigned long freq) -+{ -+ const struct dw_hdmi_plat_data *plat_data = hdmi->plat_data; -+ const struct dw_hdmi_audio_tmds_n *tmds_n = NULL; -+ int i; ++#ifdef __KERNEL__ + -+ if (plat_data->tmds_n_table) { -+ for (i = 0; plat_data->tmds_n_table[i].tmds != 0; i++) { -+ if (pixel_clk == plat_data->tmds_n_table[i].tmds) { -+ tmds_n = &plat_data->tmds_n_table[i]; -+ break; -+ } -+ } -+ } ++#include + -+ if (tmds_n == NULL) { -+ for (i = 0; common_tmds_n_table[i].tmds != 0; i++) { -+ if (pixel_clk == common_tmds_n_table[i].tmds) { -+ tmds_n = &common_tmds_n_table[i]; -+ break; -+ } -+ } -+ } ++/** ++ * struct kutf_result - Represents a single test result. ++ * @node: Next result in the list of results. ++ * @status: The status summary (pass / warn / fail / etc). ++ * @message: A more verbose status message. ++ */ ++struct kutf_result { ++ struct list_head node; ++ enum kutf_result_status status; ++ const char *message; ++}; + -+ if (tmds_n == NULL) -+ return -ENOENT; ++/** ++ * kutf_create_result_set() - Create a new result set ++ * to which results can be added. ++ * ++ * Return: The created resultset. ++ */ ++struct kutf_result_set *kutf_create_result_set(void); + -+ switch (freq) { -+ case 32000: -+ return tmds_n->n_32k; -+ case 44100: -+ case 88200: -+ case 176400: -+ return (freq / 44100) * tmds_n->n_44k1; -+ case 48000: -+ case 96000: -+ case 192000: -+ return (freq / 48000) * tmds_n->n_48k; -+ default: -+ return -ENOENT; -+ } -+} ++/** ++ * kutf_add_result() - Add a result to the end of an existing resultset. ++ * ++ * @mempool: The memory pool to allocate the result storage from. ++ * @set: The resultset to add the result to. ++ * @status: The result status to add. ++ * @message: The result message to add. ++ */ ++void kutf_add_result(struct kutf_mempool *mempool, struct kutf_result_set *set, ++ enum kutf_result_status status, const char *message); + -+static u64 hdmi_audio_math_diff(unsigned int freq, unsigned int n, -+ unsigned int pixel_clk) -+{ -+ u64 final, diff; -+ u64 cts; ++/** ++ * kutf_remove_result() - Remove a result from the head of a resultset. ++ * @set: The resultset. ++ * ++ * Return: result or NULL if there are no further results in the resultset. ++ */ ++struct kutf_result *kutf_remove_result( ++ struct kutf_result_set *set); + -+ final = (u64)pixel_clk * n; ++/** ++ * kutf_destroy_result_set() - Free a previously created resultset. ++ * ++ * @results: The result set whose resources to free. ++ */ ++void kutf_destroy_result_set(struct kutf_result_set *results); + -+ cts = final; -+ do_div(cts, 128 * freq); ++#endif /* __KERNEL__ */ + -+ diff = final - (u64)cts * (128 * freq); ++#endif /* _KERNEL_UTF_RESULTSET_H_ */ +diff --git a/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h +new file mode 100644 +index 000000000..754c3adb1 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h +@@ -0,0 +1,508 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ return diff; -+} + -+static unsigned int hdmi_compute_n(struct dw_hdmi_qp *hdmi, -+ unsigned long pixel_clk, -+ unsigned long freq) -+{ -+ unsigned int min_n = DIV_ROUND_UP((128 * freq), 1500); -+ unsigned int max_n = (128 * freq) / 300; -+ unsigned int ideal_n = (128 * freq) / 1000; -+ unsigned int best_n_distance = ideal_n; -+ unsigned int best_n = 0; -+ u64 best_diff = U64_MAX; -+ int n; + -+ /* If the ideal N could satisfy the audio math, then just take it */ -+ if (hdmi_audio_math_diff(freq, ideal_n, pixel_clk) == 0) -+ return ideal_n; ++#ifndef _KERNEL_UTF_SUITE_H_ ++#define _KERNEL_UTF_SUITE_H_ + -+ for (n = min_n; n <= max_n; n++) { -+ u64 diff = hdmi_audio_math_diff(freq, n, pixel_clk); ++/* kutf_suite.h ++ * Functions for management of test suites. ++ * ++ * This collection of data structures, macros, and functions are used to ++ * create Test Suites, Tests within those Test Suites, and Fixture variants ++ * of each test. ++ */ + -+ if (diff < best_diff || (diff == best_diff && -+ abs(n - ideal_n) < best_n_distance)) { -+ best_n = n; -+ best_diff = diff; -+ best_n_distance = abs(best_n - ideal_n); -+ } ++#include ++#include + -+ /* -+ * The best N already satisfy the audio math, and also be -+ * the closest value to ideal N, so just cut the loop. -+ */ -+ if ((best_diff == 0) && (abs(n - ideal_n) > best_n_distance)) -+ break; -+ } ++/** ++ * Pseudo-flag indicating an absence of any specified test class. Note that ++ * tests should not be annotated with this constant as it is simply a zero ++ * value; tests without a more specific class must be marked with the flag ++ * KUTF_F_TEST_GENERIC. ++ */ ++#define KUTF_F_TEST_NONE ((unsigned int)(0)) + -+ return best_n; -+} ++/** ++ * Class indicating this test is a smoke test. ++ * A given set of smoke tests should be quick to run, enabling rapid turn-around ++ * of "regress-on-commit" test runs. ++ */ ++#define KUTF_F_TEST_SMOKETEST ((unsigned int)(1 << 1)) + -+static unsigned int hdmi_find_n(struct dw_hdmi_qp *hdmi, unsigned long pixel_clk, -+ unsigned long sample_rate) -+{ -+ struct dw_hdmi_link_config *link_cfg = NULL; -+ void *data = hdmi->plat_data->phy_data; -+ int n; ++/** ++ * Class indicating this test is a performance test. ++ * These tests typically produce a performance metric, such as "time to run" or ++ * "frames per second", ++ */ ++#define KUTF_F_TEST_PERFORMANCE ((unsigned int)(1 << 2)) + -+ if (hdmi->plat_data->get_link_cfg) { -+ link_cfg = hdmi->plat_data->get_link_cfg(data); -+ if (link_cfg && link_cfg->frl_mode) -+ return hdmi_match_frl_n_table(hdmi, link_cfg->rate_per_lane, sample_rate); -+ } ++/** ++ * Class indicating that this test is a deprecated test. ++ * These tests have typically been replaced by an alternative test which is ++ * more efficient, or has better coverage. ++ */ ++#define KUTF_F_TEST_DEPRECATED ((unsigned int)(1 << 3)) + -+ n = hdmi_match_tmds_n_table(hdmi, pixel_clk, sample_rate); -+ if (n > 0) -+ return n; ++/** ++ * Class indicating that this test is a known failure. ++ * These tests have typically been run and failed, but marking them as a known ++ * failure means it is easier to triage results. ++ * ++ * It is typically more convenient to triage known failures using the ++ * results database and web UI, as this means there is no need to modify the ++ * test code. ++ */ ++#define KUTF_F_TEST_EXPECTED_FAILURE ((unsigned int)(1 << 4)) + -+ dev_dbg(hdmi->dev, "Rate %lu missing; compute N dynamically\n", -+ pixel_clk); ++/** ++ * Class indicating that this test is a generic test, which is not a member of ++ * a more specific test class. Tests which are not created with a specific set ++ * of filter flags by the user are assigned this test class by default. ++ */ ++#define KUTF_F_TEST_GENERIC ((unsigned int)(1 << 5)) + -+ return hdmi_compute_n(hdmi, pixel_clk, sample_rate); -+} ++/** ++ * Class indicating this test is a resource allocation failure test. ++ * A resource allocation failure test will test that an error code is ++ * correctly propagated when an allocation fails. ++ */ ++#define KUTF_F_TEST_RESFAIL ((unsigned int)(1 << 6)) + -+void dw_hdmi_qp_set_audio_interface(struct dw_hdmi_qp *hdmi, -+ struct hdmi_codec_daifmt *fmt, -+ struct hdmi_codec_params *hparms) -+{ -+ u32 conf0 = 0; ++/** ++ * Additional flag indicating that this test is an expected failure when ++ * run in resource failure mode. These tests are never run when running ++ * the low resource mode. ++ */ ++#define KUTF_F_TEST_EXPECTED_FAILURE_RF ((unsigned int)(1 << 7)) + -+ mutex_lock(&hdmi->audio_mutex); -+ if (!hdmi->dclk_en) { -+ mutex_unlock(&hdmi->audio_mutex); -+ return; -+ } ++/** ++ * Flag reserved for user-defined filter zero. ++ */ ++#define KUTF_F_TEST_USER_0 ((unsigned int)(1 << 24)) + -+ /* Reset the audio data path of the AVP */ -+ hdmi_writel(hdmi, AVP_DATAPATH_PACKET_AUDIO_SWINIT_P, GLOBAL_SWRESET_REQUEST); ++/** ++ * Flag reserved for user-defined filter one. ++ */ ++#define KUTF_F_TEST_USER_1 ((unsigned int)(1 << 25)) + -+ /* Disable AUDS, ACR, AUDI */ -+ hdmi_modb(hdmi, 0, -+ PKTSCHED_ACR_TX_EN | PKTSCHED_AUDS_TX_EN | PKTSCHED_AUDI_TX_EN, -+ PKTSCHED_PKT_EN); ++/** ++ * Flag reserved for user-defined filter two. ++ */ ++#define KUTF_F_TEST_USER_2 ((unsigned int)(1 << 26)) + -+ /* Clear the audio FIFO */ -+ hdmi_writel(hdmi, AUDIO_FIFO_CLR_P, AUDIO_INTERFACE_CONTROL0); ++/** ++ * Flag reserved for user-defined filter three. ++ */ ++#define KUTF_F_TEST_USER_3 ((unsigned int)(1 << 27)) + -+ /* Select I2S interface as the audio source */ -+ hdmi_modb(hdmi, AUD_IF_I2S, AUD_IF_SEL_MSK, AUDIO_INTERFACE_CONFIG0); ++/** ++ * Flag reserved for user-defined filter four. ++ */ ++#define KUTF_F_TEST_USER_4 ((unsigned int)(1 << 28)) + -+ /* Enable the active i2s lanes */ -+ switch (hparms->channels) { -+ case 7 ... 8: -+ conf0 |= I2S_LINES_EN(3); -+ fallthrough; -+ case 5 ... 6: -+ conf0 |= I2S_LINES_EN(2); -+ fallthrough; -+ case 3 ... 4: -+ conf0 |= I2S_LINES_EN(1); -+ fallthrough; -+ default: -+ conf0 |= I2S_LINES_EN(0); -+ break; -+ } ++/** ++ * Flag reserved for user-defined filter five. ++ */ ++#define KUTF_F_TEST_USER_5 ((unsigned int)(1 << 29)) + -+ hdmi_modb(hdmi, conf0, I2S_LINES_EN_MSK, AUDIO_INTERFACE_CONFIG0); ++/** ++ * Flag reserved for user-defined filter six. ++ */ ++#define KUTF_F_TEST_USER_6 ((unsigned int)(1 << 30)) + -+ /* -+ * Enable bpcuv generated internally for L-PCM, or received -+ * from stream for NLPCM/HBR. -+ */ -+ switch (fmt->bit_fmt) { -+ case SNDRV_PCM_FORMAT_IEC958_SUBFRAME_LE: -+ conf0 = (hparms->channels == 8) ? AUD_HBR : AUD_ASP; -+ conf0 |= I2S_BPCUV_RCV_EN; -+ break; -+ default: -+ conf0 = AUD_ASP | I2S_BPCUV_RCV_DIS; -+ break; -+ } ++/** ++ * Flag reserved for user-defined filter seven. ++ */ ++#define KUTF_F_TEST_USER_7 ((unsigned int)(1 << 31)) + -+ hdmi_modb(hdmi, conf0, I2S_BPCUV_RCV_MSK | AUD_FORMAT_MSK, -+ AUDIO_INTERFACE_CONFIG0); ++/** ++ * Pseudo-flag indicating that all test classes should be executed. ++ */ ++#define KUTF_F_TEST_ALL ((unsigned int)(0xFFFFFFFFU)) + -+ /* Enable audio FIFO auto clear when overflow */ -+ hdmi_modb(hdmi, AUD_FIFO_INIT_ON_OVF_EN, AUD_FIFO_INIT_ON_OVF_MSK, -+ AUDIO_INTERFACE_CONFIG0); ++/** ++ * union kutf_callback_data - Union used to store test callback data ++ * @ptr_value: pointer to the location where test callback data ++ * are stored ++ * @u32_value: a number which represents test callback data ++ */ ++union kutf_callback_data { ++ void *ptr_value; ++ u32 u32_value; ++}; + -+ mutex_unlock(&hdmi->audio_mutex); -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_set_audio_interface); ++/** ++ * struct kutf_context - Structure representing a kernel test context ++ * @suite: Convenience pointer to the suite this context ++ * is running ++ * @test_fix: The fixture that is being run in this context ++ * @fixture_pool: The memory pool used for the duration of ++ * the fixture/text context. ++ * @fixture: The user provided fixture structure. ++ * @fixture_index: The index (id) of the current fixture. ++ * @fixture_name: The name of the current fixture (or NULL if unnamed). ++ * @test_data: Any user private data associated with this test ++ * @result_set: All the results logged by this test context ++ * @status: The status of the currently running fixture. ++ * @expected_status: The expected status on exist of the currently ++ * running fixture. ++ */ ++struct kutf_context { ++ struct kutf_suite *suite; ++ struct kutf_test_fixture *test_fix; ++ struct kutf_mempool fixture_pool; ++ void *fixture; ++ unsigned int fixture_index; ++ const char *fixture_name; ++ union kutf_callback_data test_data; ++ struct kutf_result_set *result_set; ++ enum kutf_result_status status; ++ enum kutf_result_status expected_status; ++}; + -+/* -+ * When transmitting IEC60958 linear PCM audio, these registers allow to -+ * configure the channel status information of all the channel status -+ * bits in the IEC60958 frame. For the moment this configuration is only -+ * used when the I2S audio interface, General Purpose Audio (GPA), -+ * or AHB audio DMA (AHBAUDDMA) interface is active -+ * (for S/PDIF interface this information comes from the stream). ++/** ++ * struct kutf_suite - Structure representing a kernel test suite ++ * @app: The application this suite belongs to. ++ * @name: The name of this suite. ++ * @suite_data: Any user private data associated with this ++ * suite. ++ * @create_fixture: Function used to create a new fixture instance ++ * @remove_fixture: Function used to destroy a new fixture instance ++ * @fixture_variants: The number of variants (must be at least 1). ++ * @suite_default_flags: Suite global filter flags which are set on ++ * all tests. ++ * @node: List node for suite_list ++ * @dir: The debugfs directory for this suite ++ * @test_list: List head to store all the tests which are ++ * part of this suite + */ -+void dw_hdmi_qp_set_channel_status(struct dw_hdmi_qp *hdmi, -+ u8 *channel_status, bool ref2stream) -+{ -+ mutex_lock(&hdmi->audio_mutex); -+ if (!hdmi->dclk_en) { -+ mutex_unlock(&hdmi->audio_mutex); -+ return; -+ } ++struct kutf_suite { ++ struct kutf_application *app; ++ const char *name; ++ union kutf_callback_data suite_data; ++ void *(*create_fixture)(struct kutf_context *context); ++ void (*remove_fixture)(struct kutf_context *context); ++ unsigned int fixture_variants; ++ unsigned int suite_default_flags; ++ struct list_head node; ++ struct dentry *dir; ++ struct list_head test_list; ++}; + -+ /* -+ * AUDPKT_CHSTATUS_OVR0: { RSV, RSV, CS1, CS0 } -+ * AUDPKT_CHSTATUS_OVR1: { CS6, CS5, CS4, CS3 } -+ * -+ * | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | -+ * CS0: | Mode | d | c | b | a | -+ * CS1: | Category Code | -+ * CS2: | Channel Number | Source Number | -+ * CS3: | Clock Accuracy | Sample Freq | -+ * CS4: | Ori Sample Freq | Word Length | -+ * CS5: | | CGMS-A | -+ * CS6~CS23: Reserved -+ * -+ * a: use of channel status block -+ * b: linear PCM identification: 0 for lpcm, 1 for nlpcm -+ * c: copyright information -+ * d: additional format information -+ */ ++/* ============================================================================ ++ Application functions ++============================================================================ */ + -+ if (ref2stream) -+ channel_status[0] |= IEC958_AES0_NONAUDIO; ++/** ++ * kutf_create_application() - Create an in kernel test application. ++ * @name: The name of the test application. ++ * ++ * Return: pointer to the kutf_application on success or NULL ++ * on failure ++ */ ++struct kutf_application *kutf_create_application(const char *name); + -+ if ((hdmi_readl(hdmi, AUDIO_INTERFACE_CONFIG0) & GENMASK(25, 24)) == AUD_HBR) { -+ /* fixup cs for HBR */ -+ channel_status[3] = (channel_status[3] & 0xf0) | IEC958_AES3_CON_FS_768000; -+ channel_status[4] = (channel_status[4] & 0x0f) | IEC958_AES4_CON_ORIGFS_NOTID; -+ } ++/** ++ * kutf_destroy_application() - Destroy an in kernel test application. ++ * ++ * @app: The test application to destroy. ++ */ ++void kutf_destroy_application(struct kutf_application *app); + -+ hdmi_writel(hdmi, channel_status[0] | (channel_status[1] << 8), -+ AUDPKT_CHSTATUS_OVR0); ++/* ============================================================================ ++ Suite functions ++============================================================================ */ + -+ regmap_bulk_write(hdmi->regm, AUDPKT_CHSTATUS_OVR1, &channel_status[3], 1); ++/** ++ * kutf_create_suite() - Create a kernel test suite. ++ * @app: The test application to create the suite in. ++ * @name: The name of the suite. ++ * @fixture_count: The number of fixtures to run over the test ++ * functions in this suite ++ * @create_fixture: Callback used to create a fixture. The returned value ++ * is stored in the fixture pointer in the context for ++ * use in the test functions. ++ * @remove_fixture: Callback used to remove a previously created fixture. ++ * ++ * Suite names must be unique. Should two suites with the same name be ++ * registered with the same application then this function will fail, if they ++ * are registered with different applications then the function will not detect ++ * this and the call will succeed. ++ * ++ * Return: pointer to the created kutf_suite on success or NULL ++ * on failure ++ */ ++struct kutf_suite *kutf_create_suite( ++ struct kutf_application *app, ++ const char *name, ++ unsigned int fixture_count, ++ void *(*create_fixture)(struct kutf_context *context), ++ void (*remove_fixture)(struct kutf_context *context)); + -+ if (ref2stream) -+ hdmi_modb(hdmi, 0, -+ AUDPKT_PBIT_FORCE_EN_MASK | AUDPKT_CHSTATUS_OVR_EN_MASK, -+ AUDPKT_CONTROL0); -+ else -+ hdmi_modb(hdmi, AUDPKT_PBIT_FORCE_EN | AUDPKT_CHSTATUS_OVR_EN, -+ AUDPKT_PBIT_FORCE_EN_MASK | AUDPKT_CHSTATUS_OVR_EN_MASK, -+ AUDPKT_CONTROL0); ++/** ++ * kutf_create_suite_with_filters() - Create a kernel test suite with user ++ * defined default filters. ++ * @app: The test application to create the suite in. ++ * @name: The name of the suite. ++ * @fixture_count: The number of fixtures to run over the test ++ * functions in this suite ++ * @create_fixture: Callback used to create a fixture. The returned value ++ * is stored in the fixture pointer in the context for ++ * use in the test functions. ++ * @remove_fixture: Callback used to remove a previously created fixture. ++ * @filters: Filters to apply to a test if it doesn't provide its own ++ * ++ * Suite names must be unique. Should two suites with the same name be ++ * registered with the same application then this function will fail, if they ++ * are registered with different applications then the function will not detect ++ * this and the call will succeed. ++ * ++ * Return: pointer to the created kutf_suite on success or NULL on failure ++ */ ++struct kutf_suite *kutf_create_suite_with_filters( ++ struct kutf_application *app, ++ const char *name, ++ unsigned int fixture_count, ++ void *(*create_fixture)(struct kutf_context *context), ++ void (*remove_fixture)(struct kutf_context *context), ++ unsigned int filters); + -+ mutex_unlock(&hdmi->audio_mutex); -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_set_channel_status); ++/** ++ * kutf_create_suite_with_filters_and_data() - Create a kernel test suite with ++ * user defined default filters. ++ * @app: The test application to create the suite in. ++ * @name: The name of the suite. ++ * @fixture_count: The number of fixtures to run over the test ++ * functions in this suite ++ * @create_fixture: Callback used to create a fixture. The returned value ++ * is stored in the fixture pointer in the context for ++ * use in the test functions. ++ * @remove_fixture: Callback used to remove a previously created fixture. ++ * @filters: Filters to apply to a test if it doesn't provide its own ++ * @suite_data: Suite specific callback data, provided during the ++ * running of the test in the kutf_context ++ * ++ * Return: pointer to the created kutf_suite on success or NULL ++ * on failure ++ */ ++struct kutf_suite *kutf_create_suite_with_filters_and_data( ++ struct kutf_application *app, ++ const char *name, ++ unsigned int fixture_count, ++ void *(*create_fixture)(struct kutf_context *context), ++ void (*remove_fixture)(struct kutf_context *context), ++ unsigned int filters, ++ union kutf_callback_data suite_data); + -+static void hdmi_set_clk_regenerator(struct dw_hdmi_qp *hdmi, -+ unsigned long pixel_clk, unsigned int sample_rate) -+{ -+ unsigned int n = 0, cts = 0; ++/** ++ * kutf_add_test() - Add a test to a kernel test suite. ++ * @suite: The suite to add the test to. ++ * @id: The ID of the test. ++ * @name: The name of the test. ++ * @execute: Callback to the test function to run. ++ * ++ * Note: As no filters are provided the test will use the suite filters instead ++ */ ++void kutf_add_test(struct kutf_suite *suite, ++ unsigned int id, ++ const char *name, ++ void (*execute)(struct kutf_context *context)); + -+ n = hdmi_find_n(hdmi, pixel_clk, sample_rate); ++/** ++ * kutf_add_test_with_filters() - Add a test to a kernel test suite with filters ++ * @suite: The suite to add the test to. ++ * @id: The ID of the test. ++ * @name: The name of the test. ++ * @execute: Callback to the test function to run. ++ * @filters: A set of filtering flags, assigning test categories. ++ */ ++void kutf_add_test_with_filters(struct kutf_suite *suite, ++ unsigned int id, ++ const char *name, ++ void (*execute)(struct kutf_context *context), ++ unsigned int filters); + -+ hdmi->audio_n = n; -+ hdmi->audio_cts = cts; -+ hdmi_set_cts_n(hdmi, cts, hdmi->audio_enable ? n : 0); -+} ++/** ++ * kutf_add_test_with_filters_and_data() - Add a test to a kernel test suite ++ * with filters. ++ * @suite: The suite to add the test to. ++ * @id: The ID of the test. ++ * @name: The name of the test. ++ * @execute: Callback to the test function to run. ++ * @filters: A set of filtering flags, assigning test categories. ++ * @test_data: Test specific callback data, provoided during the ++ * running of the test in the kutf_context ++ */ ++void kutf_add_test_with_filters_and_data( ++ struct kutf_suite *suite, ++ unsigned int id, ++ const char *name, ++ void (*execute)(struct kutf_context *context), ++ unsigned int filters, ++ union kutf_callback_data test_data); + -+static void hdmi_init_clk_regenerator(struct dw_hdmi_qp *hdmi) -+{ -+ mutex_lock(&hdmi->audio_mutex); -+ if (hdmi->dclk_en) -+ hdmi_set_clk_regenerator(hdmi, 74250000, hdmi->sample_rate); -+ mutex_unlock(&hdmi->audio_mutex); -+} ++/* ============================================================================ ++ Test functions ++============================================================================ */ ++/** ++ * kutf_test_log_result_external() - Log a result which has been created ++ * externally into a in a standard form ++ * recognized by the log parser. ++ * @context: The test context the test is running in ++ * @message: The message for this result ++ * @new_status: The result status of this log message ++ */ ++void kutf_test_log_result_external( ++ struct kutf_context *context, ++ const char *message, ++ enum kutf_result_status new_status); + -+static void hdmi_clk_regenerator_update_pixel_clock(struct dw_hdmi_qp *hdmi) -+{ -+ mutex_lock(&hdmi->audio_mutex); -+ if (hdmi->dclk_en) -+ hdmi_set_clk_regenerator(hdmi, hdmi->hdmi_data.video_mode.mtmdsclock, -+ hdmi->sample_rate); -+ mutex_unlock(&hdmi->audio_mutex); -+} ++/** ++ * kutf_test_expect_abort() - Tell the kernel that you expect the current ++ * fixture to produce an abort. ++ * @context: The test context this test is running in. ++ */ ++void kutf_test_expect_abort(struct kutf_context *context); + -+void dw_hdmi_qp_set_sample_rate(struct dw_hdmi_qp *hdmi, unsigned int rate) -+{ -+ mutex_lock(&hdmi->audio_mutex); -+ if (hdmi->dclk_en) { -+ hdmi->sample_rate = rate; -+ hdmi_set_clk_regenerator(hdmi, hdmi->hdmi_data.video_mode.mtmdsclock, -+ hdmi->sample_rate); -+ } -+ mutex_unlock(&hdmi->audio_mutex); -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_set_sample_rate); ++/** ++ * kutf_test_expect_fatal() - Tell the kernel that you expect the current ++ * fixture to produce a fatal error. ++ * @context: The test context this test is running in. ++ */ ++void kutf_test_expect_fatal(struct kutf_context *context); + -+void dw_hdmi_qp_set_channel_count(struct dw_hdmi_qp *hdmi, unsigned int cnt) -+{ -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_set_channel_count); ++/** ++ * kutf_test_expect_fail() - Tell the kernel that you expect the current ++ * fixture to fail. ++ * @context: The test context this test is running in. ++ */ ++void kutf_test_expect_fail(struct kutf_context *context); + -+void dw_hdmi_qp_set_channel_allocation(struct dw_hdmi_qp *hdmi, unsigned int ca) -+{ -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_set_channel_allocation); ++/** ++ * kutf_test_expect_warn() - Tell the kernel that you expect the current ++ * fixture to produce a warning. ++ * @context: The test context this test is running in. ++ */ ++void kutf_test_expect_warn(struct kutf_context *context); + -+static int dw_hdmi_qp_init_audio_infoframe(struct dw_hdmi_qp *hdmi) -+{ -+ struct hdmi_audio_infoframe frame; -+ u8 infoframe_buf[HDMI_INFOFRAME_SIZE(AUDIO)]; -+ int ret = 0; ++/** ++ * kutf_test_expect_pass() - Tell the kernel that you expect the current ++ * fixture to pass. ++ * @context: The test context this test is running in. ++ */ ++void kutf_test_expect_pass(struct kutf_context *context); + -+ hdmi_audio_infoframe_init(&frame); ++/** ++ * kutf_test_skip() - Tell the kernel that the test should be skipped. ++ * @context: The test context this test is running in. ++ */ ++void kutf_test_skip(struct kutf_context *context); + -+ frame.coding_type = HDMI_AUDIO_CODING_TYPE_STREAM; -+ frame.sample_frequency = HDMI_AUDIO_SAMPLE_FREQUENCY_STREAM; -+ frame.sample_size = HDMI_AUDIO_SAMPLE_SIZE_STREAM; -+ frame.channels = 2; ++/** ++ * kutf_test_skip_msg() - Tell the kernel that this test has been skipped, ++ * supplying a reason string. ++ * @context: The test context this test is running in. ++ * @message: A message string containing the reason for the skip. ++ * ++ * Note: The message must not be freed during the lifetime of the test run. ++ * This means it should either be a prebaked string, or if a dynamic string ++ * is required it must be created with kutf_dsprintf which will store ++ * the resultant string in a buffer who's lifetime is the same as the test run. ++ */ ++void kutf_test_skip_msg(struct kutf_context *context, const char *message); + -+ ret = hdmi_audio_infoframe_pack(&frame, infoframe_buf, -+ sizeof(infoframe_buf)); -+ if (ret < 0) { -+ dev_err(hdmi->dev, "%s: Failed to pack audio infoframe: %d\n", -+ __func__, ret); -+ return ret; -+ } ++/** ++ * kutf_test_pass() - Tell the kernel that this test has passed. ++ * @context: The test context this test is running in. ++ * @message: A message string containing the reason for the pass. ++ * ++ * Note: The message must not be freed during the lifetime of the test run. ++ * This means it should either be a pre-baked string, or if a dynamic string ++ * is required it must be created with kutf_dsprintf which will store ++ * the resultant string in a buffer who's lifetime is the same as the test run. ++ */ ++void kutf_test_pass(struct kutf_context *context, char const *message); + -+ regmap_bulk_write(hdmi->regm, PKT_AUDI_CONTENTS1, &infoframe_buf[3], 2); -+ hdmi_modb(hdmi, -+ PKTSCHED_ACR_TX_EN | PKTSCHED_AUDI_TX_EN, -+ PKTSCHED_ACR_TX_EN | PKTSCHED_AUDI_TX_EN, -+ PKTSCHED_PKT_EN); ++/** ++ * kutf_test_debug() - Send a debug message ++ * @context: The test context this test is running in. ++ * @message: A message string containing the debug information. ++ * ++ * Note: The message must not be freed during the lifetime of the test run. ++ * This means it should either be a pre-baked string, or if a dynamic string ++ * is required it must be created with kutf_dsprintf which will store ++ * the resultant string in a buffer who's lifetime is the same as the test run. ++ */ ++void kutf_test_debug(struct kutf_context *context, char const *message); + -+ return 0; -+} ++/** ++ * kutf_test_info() - Send an information message ++ * @context: The test context this test is running in. ++ * @message: A message string containing the information message. ++ * ++ * Note: The message must not be freed during the lifetime of the test run. ++ * This means it should either be a pre-baked string, or if a dynamic string ++ * is required it must be created with kutf_dsprintf which will store ++ * the resultant string in a buffer who's lifetime is the same as the test run. ++ */ ++void kutf_test_info(struct kutf_context *context, char const *message); + -+void dw_hdmi_qp_set_audio_infoframe(struct dw_hdmi_qp *hdmi, -+ struct hdmi_codec_params *hparms) -+{ -+ u8 infoframe_buf[HDMI_INFOFRAME_SIZE(AUDIO)]; -+ int ret = 0; ++/** ++ * kutf_test_warn() - Send a warning message ++ * @context: The test context this test is running in. ++ * @message: A message string containing the warning message. ++ * ++ * Note: The message must not be freed during the lifetime of the test run. ++ * This means it should either be a pre-baked string, or if a dynamic string ++ * is required it must be created with kutf_dsprintf which will store ++ * the resultant string in a buffer who's lifetime is the same as the test run. ++ */ ++void kutf_test_warn(struct kutf_context *context, char const *message); + -+ ret = hdmi_audio_infoframe_pack(&hparms->cea, infoframe_buf, -+ sizeof(infoframe_buf)); -+ if (!ret) { -+ dev_err(hdmi->dev, "%s: Failed to pack audio infoframe: %d\n", -+ __func__, ret); -+ return; -+ } ++/** ++ * kutf_test_fail() - Tell the kernel that a test has failed ++ * @context: The test context this test is running in. ++ * @message: A message string containing the failure message. ++ * ++ * Note: The message must not be freed during the lifetime of the test run. ++ * This means it should either be a pre-baked string, or if a dynamic string ++ * is required it must be created with kutf_dsprintf which will store ++ * the resultant string in a buffer who's lifetime is the same as the test run. ++ */ ++void kutf_test_fail(struct kutf_context *context, char const *message); + -+ mutex_lock(&hdmi->audio_mutex); -+ if (!hdmi->dclk_en) { -+ mutex_unlock(&hdmi->audio_mutex); -+ return; -+ } ++/** ++ * kutf_test_fatal() - Tell the kernel that a test has triggered a fatal error ++ * @context: The test context this test is running in. ++ * @message: A message string containing the fatal error message. ++ * ++ * Note: The message must not be freed during the lifetime of the test run. ++ * This means it should either be a pre-baked string, or if a dynamic string ++ * is required it must be created with kutf_dsprintf which will store ++ * the resultant string in a buffer who's lifetime is the same as the test run. ++ */ ++void kutf_test_fatal(struct kutf_context *context, char const *message); + -+ /* -+ * AUDI_CONTENTS0: { RSV, HB2, HB1, RSV } -+ * AUDI_CONTENTS1: { PB3, PB2, PB1, PB0 } -+ * AUDI_CONTENTS2: { PB7, PB6, PB5, PB4 } -+ * -+ * PB0: CheckSum -+ * PB1: | CT3 | CT2 | CT1 | CT0 | F13 | CC2 | CC1 | CC0 | -+ * PB2: | F27 | F26 | F25 | SF2 | SF1 | SF0 | SS1 | SS0 | -+ * PB3: | F37 | F36 | F35 | F34 | F33 | F32 | F31 | F30 | -+ * PB4: | CA7 | CA6 | CA5 | CA4 | CA3 | CA2 | CA1 | CA0 | -+ * PB5: | DM_INH | LSV3 | LSV2 | LSV1 | LSV0 | F52 | F51 | F50 | -+ * PB6~PB10: Reserved -+ * -+ * AUDI_CONTENTS0 default value defined by HDMI specification, -+ * and shall only be changed for debug purposes. -+ * So, we only configure payload byte from PB0~PB7(2 word total). -+ */ -+ regmap_bulk_write(hdmi->regm, PKT_AUDI_CONTENTS1, &infoframe_buf[3], 2); ++/** ++ * kutf_test_abort() - Tell the kernel that a test triggered an abort in the test ++ * ++ * @context: The test context this test is running in. ++ */ ++void kutf_test_abort(struct kutf_context *context); + -+ /* Enable ACR, AUDI */ -+ hdmi_modb(hdmi, PKTSCHED_ACR_TX_EN | PKTSCHED_AUDI_TX_EN, -+ PKTSCHED_ACR_TX_EN | PKTSCHED_AUDI_TX_EN, -+ PKTSCHED_PKT_EN); ++#endif /* _KERNEL_UTF_SUITE_H_ */ +diff --git a/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h +new file mode 100644 +index 000000000..c458c1f73 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h +@@ -0,0 +1,55 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ /* Enable AUDS */ -+ hdmi_modb(hdmi, PKTSCHED_AUDS_TX_EN, PKTSCHED_AUDS_TX_EN, PKTSCHED_PKT_EN); -+ mutex_unlock(&hdmi->audio_mutex); -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_set_audio_infoframe); + -+static void hdmi_enable_audio_clk(struct dw_hdmi_qp *hdmi, bool enable) -+{ -+ if (enable) -+ hdmi_modb(hdmi, 0, -+ AVP_DATAPATH_PACKET_AUDIO_SWDISABLE, GLOBAL_SWDISABLE); -+ else -+ hdmi_modb(hdmi, AVP_DATAPATH_PACKET_AUDIO_SWDISABLE, -+ AVP_DATAPATH_PACKET_AUDIO_SWDISABLE, GLOBAL_SWDISABLE); -+} + -+static void dw_hdmi_i2s_audio_enable(struct dw_hdmi_qp *hdmi) -+{ -+ hdmi_set_cts_n(hdmi, hdmi->audio_cts, hdmi->audio_n); -+ hdmi_enable_audio_clk(hdmi, true); -+} ++#ifndef _KERNEL_UTF_UTILS_H_ ++#define _KERNEL_UTF_UTILS_H_ + -+static void dw_hdmi_i2s_audio_disable(struct dw_hdmi_qp *hdmi) -+{ -+ /* -+ * Keep ACR, AUDI, AUDS packet always on to make SINK device -+ * active for better compatibility and user experience. -+ * -+ * This also fix POP sound on some SINK devices which wakeup -+ * from suspend to active. -+ */ -+ hdmi_modb(hdmi, I2S_BPCUV_RCV_DIS, I2S_BPCUV_RCV_MSK, -+ AUDIO_INTERFACE_CONFIG0); -+ hdmi_modb(hdmi, AUDPKT_PBIT_FORCE_EN | AUDPKT_CHSTATUS_OVR_EN, -+ AUDPKT_PBIT_FORCE_EN_MASK | AUDPKT_CHSTATUS_OVR_EN_MASK, -+ AUDPKT_CONTROL0); -+} ++/* kutf_utils.h ++ * Utilities for the kernel UTF test infrastructure. ++ * ++ * This collection of library functions are provided for use by kernel UTF ++ * and users of kernel UTF which don't directly fit within the other ++ * code modules. ++ */ + -+void dw_hdmi_qp_audio_enable(struct dw_hdmi_qp *hdmi) -+{ -+ mutex_lock(&hdmi->audio_mutex); -+ if (hdmi->dclk_en) { -+ hdmi->audio_enable = true; -+ if (hdmi->enable_audio) -+ hdmi->enable_audio(hdmi); -+ } -+ mutex_unlock(&hdmi->audio_mutex); -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_audio_enable); ++#include + -+void dw_hdmi_qp_audio_disable(struct dw_hdmi_qp *hdmi) -+{ -+ mutex_lock(&hdmi->audio_mutex); -+ if (hdmi->dclk_en) { -+ hdmi->audio_enable = false; -+ if (hdmi->disable_audio) -+ hdmi->disable_audio(hdmi); -+ } -+ mutex_unlock(&hdmi->audio_mutex); -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_audio_disable); ++/** ++ * Maximum size of the message strings within kernel UTF, messages longer then ++ * this will be truncated. ++ */ ++#define KUTF_MAX_DSPRINTF_LEN 1024 + -+static bool hdmi_bus_fmt_is_rgb(unsigned int bus_format) -+{ -+ switch (bus_format) { -+ case MEDIA_BUS_FMT_RGB888_1X24: -+ case MEDIA_BUS_FMT_RGB101010_1X30: -+ case MEDIA_BUS_FMT_RGB121212_1X36: -+ case MEDIA_BUS_FMT_RGB161616_1X48: -+ return true; ++/** ++ * kutf_dsprintf() - dynamic sprintf ++ * @pool: memory pool to allocate from ++ * @fmt: The format string describing the string to document. ++ * @... The parameters to feed in to the format string. ++ * ++ * This function implements sprintf which dynamically allocates memory to store ++ * the string. The library will free the memory containing the string when the ++ * result set is cleared or destroyed. ++ * ++ * Note The returned string may be truncated to fit an internal temporary ++ * buffer, which is KUTF_MAX_DSPRINTF_LEN bytes in length. ++ * ++ * Return: Returns pointer to allocated string, or NULL on error. ++ */ ++const char *kutf_dsprintf(struct kutf_mempool *pool, ++ const char *fmt, ...); + -+ default: -+ return false; -+ } -+} ++#endif /* _KERNEL_UTF_UTILS_H_ */ +diff --git a/drivers/gpu/arm/midgard/tests/kutf/Kbuild b/drivers/gpu/arm/midgard/tests/kutf/Kbuild +new file mode 100755 +index 000000000..6b840c2ef +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/kutf/Kbuild +@@ -0,0 +1,20 @@ ++# ++# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+static bool hdmi_bus_fmt_is_yuv444(unsigned int bus_format) -+{ -+ switch (bus_format) { -+ case MEDIA_BUS_FMT_YUV8_1X24: -+ case MEDIA_BUS_FMT_YUV10_1X30: -+ case MEDIA_BUS_FMT_YUV12_1X36: -+ case MEDIA_BUS_FMT_YUV16_1X48: -+ return true; + -+ default: -+ return false; -+ } -+} ++ccflags-y += -I$(src)/../include + -+static bool hdmi_bus_fmt_is_yuv422(unsigned int bus_format) -+{ -+ switch (bus_format) { -+ case MEDIA_BUS_FMT_UYVY8_1X16: -+ case MEDIA_BUS_FMT_UYVY10_1X20: -+ case MEDIA_BUS_FMT_UYVY12_1X24: -+ case MEDIA_BUS_FMT_YUYV8_1X16: -+ case MEDIA_BUS_FMT_YUYV10_1X20: -+ case MEDIA_BUS_FMT_YUYV12_1X24: -+ return true; ++obj-$(CONFIG_MALI_KUTF) += kutf.o + -+ default: -+ return false; -+ } -+} ++kutf-y := kutf_mem.o kutf_resultset.o kutf_suite.o kutf_utils.o +diff --git a/drivers/gpu/arm/midgard/tests/kutf/Kconfig b/drivers/gpu/arm/midgard/tests/kutf/Kconfig +new file mode 100644 +index 000000000..84364716a +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/kutf/Kconfig +@@ -0,0 +1,22 @@ ++# ++# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+static bool hdmi_bus_fmt_is_yuv420(unsigned int bus_format) -+{ -+ switch (bus_format) { -+ case MEDIA_BUS_FMT_UYYVYY8_0_5X24: -+ case MEDIA_BUS_FMT_UYYVYY10_0_5X30: -+ case MEDIA_BUS_FMT_UYYVYY12_0_5X36: -+ case MEDIA_BUS_FMT_UYYVYY16_0_5X48: -+ return true; + -+ default: -+ return false; -+ } -+} + -+static int hdmi_bus_fmt_color_depth(unsigned int bus_format) -+{ -+ switch (bus_format) { -+ case MEDIA_BUS_FMT_RGB888_1X24: -+ case MEDIA_BUS_FMT_YUV8_1X24: -+ case MEDIA_BUS_FMT_UYVY8_1X16: -+ case MEDIA_BUS_FMT_YUYV8_1X16: -+ case MEDIA_BUS_FMT_UYYVYY8_0_5X24: -+ return 8; ++config MALI_KUTF ++ tristate "Mali Kernel Unit Test Framework" ++ default n ++ help ++ Enables MALI testing framework. To compile it as a module, ++ choose M here - this will generate a single module called kutf. +diff --git a/drivers/gpu/arm/midgard/tests/kutf/Makefile b/drivers/gpu/arm/midgard/tests/kutf/Makefile +new file mode 100644 +index 000000000..010c92ca3 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/kutf/Makefile +@@ -0,0 +1,29 @@ ++# ++# (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+ case MEDIA_BUS_FMT_RGB101010_1X30: -+ case MEDIA_BUS_FMT_YUV10_1X30: -+ case MEDIA_BUS_FMT_UYVY10_1X20: -+ case MEDIA_BUS_FMT_YUYV10_1X20: -+ case MEDIA_BUS_FMT_UYYVYY10_0_5X30: -+ return 10; + -+ case MEDIA_BUS_FMT_RGB121212_1X36: -+ case MEDIA_BUS_FMT_YUV12_1X36: -+ case MEDIA_BUS_FMT_UYVY12_1X24: -+ case MEDIA_BUS_FMT_YUYV12_1X24: -+ case MEDIA_BUS_FMT_UYYVYY12_0_5X36: -+ return 12; ++# linux build system bootstrap for out-of-tree module + -+ case MEDIA_BUS_FMT_RGB161616_1X48: -+ case MEDIA_BUS_FMT_YUV16_1X48: -+ case MEDIA_BUS_FMT_UYYVYY16_0_5X48: -+ return 16; ++# default to building for the host ++ARCH ?= $(shell uname -m) + -+ default: -+ return 0; -+ } -+} ++ifeq ($(KDIR),) ++$(error Must specify KDIR to point to the kernel to target)) ++endif + -+static void dw_hdmi_i2c_init(struct dw_hdmi_qp *hdmi) -+{ -+ u32 ddc_i2c_rxfilter; ++all: ++ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS=-I$(CURDIR)/../include modules + -+ /* Software reset */ -+ hdmi_writel(hdmi, 0x01, I2CM_CONTROL0); ++clean: ++ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean +diff --git a/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c b/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c +new file mode 100644 +index 000000000..5408e57d4 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c +@@ -0,0 +1,94 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ /* Configure I2CM hold time and rxfilter */ -+ if (device_property_read_u32(hdmi->dev, "ddc-i2c-rxfilter", &ddc_i2c_rxfilter) == 0) -+ hdmi_writel(hdmi, ddc_i2c_rxfilter, I2CM_CONFIG0); + -+ hdmi_modb(hdmi, 0, I2CM_FM_EN, I2CM_INTERFACE_CONTROL0); + -+ /* Clear DONE and ERROR interrupts */ -+ hdmi_writel(hdmi, I2CM_OP_DONE_CLEAR | I2CM_NACK_RCVD_CLEAR, -+ MAINUNIT_1_INT_CLEAR); -+} ++/* Kernel UTF memory management functions */ + -+static int dw_hdmi_i2c_read(struct dw_hdmi_qp *hdmi, -+ unsigned char *buf, unsigned int length) -+{ -+ struct dw_hdmi_qp_i2c *i2c = hdmi->i2c; -+ int stat; ++#include ++#include ++#include + -+ if (!i2c->is_regaddr) { -+ dev_dbg(hdmi->dev, "set read register address to 0\n"); -+ i2c->slave_reg = 0x00; -+ i2c->is_regaddr = true; -+ } ++#include + -+ while (length--) { -+ reinit_completion(&i2c->cmp); + -+ hdmi_modb(hdmi, i2c->slave_reg++ << 12, I2CM_ADDR, -+ I2CM_INTERFACE_CONTROL0); ++/** ++ * struct kutf_alloc_entry - Structure representing an allocation. ++ * @node: List node for use with kutf_mempool. ++ * @data: Data area of the allocation ++ */ ++struct kutf_alloc_entry { ++ struct list_head node; ++ u8 data[0]; ++}; + -+ if (i2c->is_segment) -+ hdmi_modb(hdmi, I2CM_EXT_READ, I2CM_WR_MASK, -+ I2CM_INTERFACE_CONTROL0); -+ else -+ hdmi_modb(hdmi, I2CM_FM_READ, I2CM_WR_MASK, -+ I2CM_INTERFACE_CONTROL0); ++int kutf_mempool_init(struct kutf_mempool *pool) ++{ ++ if (!pool) { ++ pr_err("NULL pointer passed to %s\n", __func__); ++ return -1; ++ } + -+ stat = wait_for_completion_timeout(&i2c->cmp, HZ / 10); -+ if (!stat) { -+ dev_err(hdmi->dev, "i2c read time out!\n"); -+ hdmi_writel(hdmi, 0x01, I2CM_CONTROL0); -+ return -EAGAIN; -+ } ++ INIT_LIST_HEAD(&pool->head); + -+ /* Check for error condition on the bus */ -+ if (i2c->stat & I2CM_NACK_RCVD_IRQ) { -+ dev_err(hdmi->dev, "i2c read err!\n"); -+ hdmi_writel(hdmi, 0x01, I2CM_CONTROL0); -+ return -EIO; -+ } ++ return 0; ++} ++EXPORT_SYMBOL(kutf_mempool_init); + -+ *buf++ = hdmi_readl(hdmi, I2CM_INTERFACE_RDDATA_0_3) & 0xff; -+ dev_dbg(hdmi->dev, "i2c read done! i2c->stat:%02x 0x%02x\n", -+ i2c->stat, hdmi_readl(hdmi, I2CM_INTERFACE_RDDATA_0_3)); -+ hdmi_modb(hdmi, 0, I2CM_WR_MASK, I2CM_INTERFACE_CONTROL0); ++void kutf_mempool_destroy(struct kutf_mempool *pool) ++{ ++ struct list_head *remove; ++ struct list_head *tmp; ++ ++ if (!pool) { ++ pr_err("NULL pointer passed to %s\n", __func__); ++ return; + } -+ i2c->is_segment = false; + -+ return 0; ++ list_for_each_safe(remove, tmp, &pool->head) { ++ struct kutf_alloc_entry *remove_alloc; ++ ++ remove_alloc = list_entry(remove, struct kutf_alloc_entry, node); ++ list_del(&remove_alloc->node); ++ kfree(remove_alloc); ++ } +} ++EXPORT_SYMBOL(kutf_mempool_destroy); + -+static int dw_hdmi_i2c_write(struct dw_hdmi_qp *hdmi, -+ unsigned char *buf, unsigned int length) ++void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size) +{ -+ struct dw_hdmi_qp_i2c *i2c = hdmi->i2c; -+ int stat; ++ struct kutf_alloc_entry *ret; + -+ if (!i2c->is_regaddr) { -+ /* Use the first write byte as register address */ -+ i2c->slave_reg = buf[0]; -+ length--; -+ buf++; -+ i2c->is_regaddr = true; ++ if (!pool) { ++ pr_err("NULL pointer passed to %s\n", __func__); ++ goto fail_pool; + } + -+ while (length--) { -+ reinit_completion(&i2c->cmp); ++ ret = kmalloc(sizeof(*ret) + size, GFP_KERNEL); ++ if (!ret) { ++ pr_err("Failed to allocate memory\n"); ++ goto fail_alloc; ++ } + -+ hdmi_writel(hdmi, *buf++, I2CM_INTERFACE_WRDATA_0_3); -+ hdmi_modb(hdmi, i2c->slave_reg++ << 12, I2CM_ADDR, -+ I2CM_INTERFACE_CONTROL0); -+ hdmi_modb(hdmi, I2CM_FM_WRITE, I2CM_WR_MASK, -+ I2CM_INTERFACE_CONTROL0); ++ INIT_LIST_HEAD(&ret->node); ++ list_add(&ret->node, &pool->head); + -+ stat = wait_for_completion_timeout(&i2c->cmp, HZ / 10); -+ if (!stat) { -+ dev_err(hdmi->dev, "i2c write time out!\n"); -+ hdmi_writel(hdmi, 0x01, I2CM_CONTROL0); -+ return -EAGAIN; -+ } ++ return &ret->data[0]; + -+ /* Check for error condition on the bus */ -+ if (i2c->stat & I2CM_NACK_RCVD_IRQ) { -+ dev_err(hdmi->dev, "i2c write nack!\n"); -+ hdmi_writel(hdmi, 0x01, I2CM_CONTROL0); -+ return -EIO; -+ } -+ hdmi_modb(hdmi, 0, I2CM_WR_MASK, I2CM_INTERFACE_CONTROL0); -+ } -+ dev_dbg(hdmi->dev, "i2c write done!\n"); -+ return 0; ++fail_alloc: ++fail_pool: ++ return NULL; +} ++EXPORT_SYMBOL(kutf_mempool_alloc); +diff --git a/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c b/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c +new file mode 100644 +index 000000000..5bd04969f +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c +@@ -0,0 +1,95 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+static int dw_hdmi_i2c_xfer(struct i2c_adapter *adap, -+ struct i2c_msg *msgs, int num) -+{ -+ struct dw_hdmi_qp *hdmi = i2c_get_adapdata(adap); -+ struct dw_hdmi_qp_i2c *i2c = hdmi->i2c; -+ u8 addr = msgs[0].addr; -+ int i, ret = 0; + -+ if (addr == DDC_CI_ADDR) -+ /* -+ * The internal I2C controller does not support the multi-byte -+ * read and write operations needed for DDC/CI. -+ * TOFIX: Blacklist the DDC/CI address until we filter out -+ * unsupported I2C operations. -+ */ -+ return -EOPNOTSUPP; + -+ dev_dbg(hdmi->dev, "i2c xfer: num: %d, addr: %#x\n", num, addr); ++/* Kernel UTF result management functions */ + -+ for (i = 0; i < num; i++) { -+ if (msgs[i].len == 0) { -+ dev_err(hdmi->dev, -+ "unsupported transfer %d/%d, no data\n", -+ i + 1, num); -+ return -EOPNOTSUPP; -+ } -+ } ++#include ++#include ++#include + -+ mutex_lock(&i2c->lock); ++#include + -+ /* Unmute DONE and ERROR interrupts */ -+ hdmi_modb(hdmi, I2CM_NACK_RCVD_MASK_N | I2CM_OP_DONE_MASK_N, -+ I2CM_NACK_RCVD_MASK_N | I2CM_OP_DONE_MASK_N, -+ MAINUNIT_1_INT_MASK_N); ++/** ++ * struct kutf_result_set - Represents a set of results. ++ * @results: Pointer to the linked list where the results are stored. ++ */ ++struct kutf_result_set { ++ struct list_head results; ++}; + -+ /* Set slave device address taken from the first I2C message */ -+ if (addr == DDC_SEGMENT_ADDR && msgs[0].len == 1) -+ addr = DDC_ADDR; ++struct kutf_result_set *kutf_create_result_set(void) ++{ ++ struct kutf_result_set *set; + -+ hdmi_modb(hdmi, addr << 5, I2CM_SLVADDR, I2CM_INTERFACE_CONTROL0); ++ set = kmalloc(sizeof(*set), GFP_KERNEL); ++ if (!set) { ++ pr_err("Failed to allocate resultset"); ++ goto fail_alloc; ++ } + -+ /* Set slave device register address on transfer */ -+ i2c->is_regaddr = false; ++ INIT_LIST_HEAD(&set->results); + -+ /* Set segment pointer for I2C extended read mode operation */ -+ i2c->is_segment = false; ++ return set; + -+ for (i = 0; i < num; i++) { -+ dev_dbg(hdmi->dev, "xfer: num: %d/%d, len: %d, flags: %#x\n", -+ i + 1, num, msgs[i].len, msgs[i].flags); ++fail_alloc: ++ return NULL; ++} + -+ if (msgs[i].addr == DDC_SEGMENT_ADDR && msgs[i].len == 1) { -+ i2c->is_segment = true; -+ hdmi_modb(hdmi, DDC_SEGMENT_ADDR, I2CM_SEG_ADDR, -+ I2CM_INTERFACE_CONTROL1); -+ hdmi_modb(hdmi, *msgs[i].buf << 7, I2CM_SEG_PTR, -+ I2CM_INTERFACE_CONTROL1); -+ } else { -+ if (msgs[i].flags & I2C_M_RD) -+ ret = dw_hdmi_i2c_read(hdmi, msgs[i].buf, -+ msgs[i].len); -+ else -+ ret = dw_hdmi_i2c_write(hdmi, msgs[i].buf, -+ msgs[i].len); -+ } -+ if (ret < 0) -+ break; -+ } ++void kutf_add_result(struct kutf_mempool *mempool, ++ struct kutf_result_set *set, ++ enum kutf_result_status status, ++ const char *message) ++{ ++ /* Create the new result */ ++ struct kutf_result *new_result; + -+ if (!ret) -+ ret = num; ++ BUG_ON(set == NULL); + -+ /* Mute DONE and ERROR interrupts */ -+ hdmi_modb(hdmi, 0, I2CM_OP_DONE_MASK_N | I2CM_NACK_RCVD_MASK_N, -+ MAINUNIT_1_INT_MASK_N); ++ new_result = kutf_mempool_alloc(mempool, sizeof(*new_result)); ++ if (!new_result) { ++ pr_err("Result allocation failed\n"); ++ return; ++ } + -+ mutex_unlock(&i2c->lock); ++ INIT_LIST_HEAD(&new_result->node); ++ new_result->status = status; ++ new_result->message = message; + -+ return ret; ++ list_add_tail(&new_result->node, &set->results); +} + -+static u32 dw_hdmi_i2c_func(struct i2c_adapter *adapter) ++void kutf_destroy_result_set(struct kutf_result_set *set) +{ -+ return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; -+} ++ if (!list_empty(&set->results)) ++ pr_err("kutf_destroy_result_set: Unread results from test\n"); + -+static const struct i2c_algorithm dw_hdmi_algorithm = { -+ .master_xfer = dw_hdmi_i2c_xfer, -+ .functionality = dw_hdmi_i2c_func, -+}; ++ kfree(set); ++} + -+static struct i2c_adapter *dw_hdmi_i2c_adapter(struct dw_hdmi_qp *hdmi) ++struct kutf_result *kutf_remove_result(struct kutf_result_set *set) +{ -+ struct i2c_adapter *adap; -+ struct dw_hdmi_qp_i2c *i2c; -+ int ret; ++ if (!list_empty(&set->results)) { ++ struct kutf_result *ret; + -+ i2c = devm_kzalloc(hdmi->dev, sizeof(*i2c), GFP_KERNEL); -+ if (!i2c) -+ return ERR_PTR(-ENOMEM); ++ ret = list_first_entry(&set->results, struct kutf_result, node); ++ list_del(&ret->node); ++ return ret; ++ } + -+ mutex_init(&i2c->lock); -+ init_completion(&i2c->cmp); ++ return NULL; ++} + -+ adap = &i2c->adap; -+ adap->class = I2C_CLASS_DDC; -+ adap->owner = THIS_MODULE; -+ adap->dev.parent = hdmi->dev; -+ adap->algo = &dw_hdmi_algorithm; -+ strscpy(adap->name, "ddc", sizeof(adap->name)); -+ i2c_set_adapdata(adap, hdmi); +diff --git a/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c b/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c +new file mode 100644 +index 000000000..a7cfd3be9 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c +@@ -0,0 +1,1041 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ ret = i2c_add_adapter(adap); -+ if (ret) { -+ dev_warn(hdmi->dev, "cannot add %s I2C adapter\n", adap->name); -+ devm_kfree(hdmi->dev, i2c); -+ return ERR_PTR(ret); -+ } + -+ hdmi->i2c = i2c; + -+ dev_info(hdmi->dev, "registered %s I2C bus driver\n", adap->name); ++/* Kernel UTF suite, test and fixture management including user to kernel ++ * interaction */ + -+ return adap; -+} ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+#define HDMI_PHY_EARC_MASK BIT(29) ++#include + -+int dw_hdmi_qp_set_earc(struct dw_hdmi_qp *hdmi) -+{ -+ u32 stat, ret; ++#include ++#include ++#include + -+ /* set hdmi phy earc mode */ -+ hdmi->phy.ops->set_mode(hdmi, hdmi->phy.data, HDMI_PHY_EARC_MASK, -+ true); ++#if defined(CONFIG_DEBUG_FS) + -+ ret = hdmi->phy.ops->init(hdmi, hdmi->phy.data, -+ &hdmi->previous_mode); -+ if (ret) -+ return ret; ++/** ++ * struct kutf_application - Structure which represents kutf application ++ * @name: The name of this test application. ++ * @dir: The debugfs directory for this test ++ * @suite_list: List head to store all the suites which are part of this ++ * application ++ */ ++struct kutf_application { ++ const char *name; ++ struct dentry *dir; ++ struct list_head suite_list; ++}; + -+ hdmi->disabled = false; ++/** ++ * struct kutf_test_function - Structure which represents kutf test function ++ * @suite: Back reference to the suite this test function ++ * belongs to ++ * @filters: Filters that apply to this test function ++ * @test_id: Test ID ++ * @execute: Function to run for this test ++ * @test_data: Static data for this test ++ * @node: List node for test_list ++ * @variant_list: List head to store all the variants which can run on ++ * this function ++ * @dir: debugfs directory for this test function ++ */ ++struct kutf_test_function { ++ struct kutf_suite *suite; ++ unsigned int filters; ++ unsigned int test_id; ++ void (*execute)(struct kutf_context *context); ++ union kutf_callback_data test_data; ++ struct list_head node; ++ struct list_head variant_list; ++ struct dentry *dir; ++}; + -+ reinit_completion(&hdmi->earc_cmp); ++/** ++ * struct kutf_test_fixture - Structure which holds information on the kutf ++ * test fixture ++ * @test_func: Test function this fixture belongs to ++ * @fixture_index: Index of this fixture ++ * @node: List node for variant_list ++ * @dir: debugfs directory for this test fixture ++ */ ++struct kutf_test_fixture { ++ struct kutf_test_function *test_func; ++ unsigned int fixture_index; ++ struct list_head node; ++ struct dentry *dir; ++}; + -+ hdmi_modb(hdmi, EARCRX_CMDC_DISCOVERY_TIMEOUT_IRQ | -+ EARCRX_CMDC_DISCOVERY_DONE_IRQ, -+ EARCRX_CMDC_DISCOVERY_TIMEOUT_IRQ | -+ EARCRX_CMDC_DISCOVERY_DONE_IRQ, EARCRX_0_INT_MASK_N); ++struct dentry *base_dir; + -+ /* start discovery */ -+ hdmi_modb(hdmi, EARCRX_CMDC_DISCOVERY_EN, EARCRX_CMDC_DISCOVERY_EN, -+ EARCRX_CMDC_CONTROL); ++/** ++ * struct kutf_convert_table - Structure which keeps test results ++ * @result_name: Status of the test result ++ * @result: Status value for a single test ++ */ ++struct kutf_convert_table { ++ char result_name[50]; ++ enum kutf_result_status result; ++}; + -+ /* -+ * The eARC TX device drives a logic-high-voltage-level -+ * pulse on the physical HPD connector pin, after -+ * at least 100 ms of low voltage level to start the -+ * eARC Discovery process. -+ */ -+ hdmi_modb(hdmi, EARCRX_CONNECTOR_HPD, EARCRX_CONNECTOR_HPD, -+ EARCRX_CMDC_CONTROL); ++struct kutf_convert_table kutf_convert[] = { ++#define ADD_UTF_RESULT(_name) \ ++{ \ ++ #_name, \ ++ _name, \ ++}, ++ADD_UTF_RESULT(KUTF_RESULT_BENCHMARK) ++ADD_UTF_RESULT(KUTF_RESULT_SKIP) ++ADD_UTF_RESULT(KUTF_RESULT_UNKNOWN) ++ADD_UTF_RESULT(KUTF_RESULT_PASS) ++ADD_UTF_RESULT(KUTF_RESULT_DEBUG) ++ADD_UTF_RESULT(KUTF_RESULT_INFO) ++ADD_UTF_RESULT(KUTF_RESULT_WARN) ++ADD_UTF_RESULT(KUTF_RESULT_FAIL) ++ADD_UTF_RESULT(KUTF_RESULT_FATAL) ++ADD_UTF_RESULT(KUTF_RESULT_ABORT) ++}; + -+ stat = wait_for_completion_timeout(&hdmi->earc_cmp, HZ / 10); -+ if (!stat) -+ return -EAGAIN; ++#define UTF_CONVERT_SIZE (ARRAY_SIZE(kutf_convert)) + -+ if (hdmi->earc_intr & EARCRX_CMDC_DISCOVERY_TIMEOUT_IRQ) { -+ dev_err(hdmi->dev, "discovery timeout\n"); -+ return -ETIMEDOUT; -+ } else if (hdmi->earc_intr & EARCRX_CMDC_DISCOVERY_DONE_IRQ) { -+ dev_info(hdmi->dev, "discovery done\n"); -+ } else { -+ dev_err(hdmi->dev, "discovery failed\n"); -+ return -EINVAL; -+ } ++/** ++ * kutf_create_context() - Create a test context in which a specific fixture ++ * of an application will be run and its results ++ * reported back to the user ++ * @test_fix: Test fixture to be run. ++ * ++ * Return: Returns the created test context on success or NULL on failure ++ */ ++static struct kutf_context *kutf_create_context( ++ struct kutf_test_fixture *test_fix); + -+ hdmi_writel(hdmi, 1, EARCRX_DMAC_PHY_CONTROL); -+ hdmi_modb(hdmi, EARCRX_CMDC_SWINIT_P, EARCRX_CMDC_SWINIT_P, -+ EARCRX_CMDC_CONFIG0); ++/** ++ * kutf_destroy_context() - Destroy a previously created test context ++ * @context: Test context to destroy ++ */ ++static void kutf_destroy_context(struct kutf_context *context); + -+ hdmi_writel(hdmi, 0xf3, EARCRX_DMAC_CONFIG); -+ hdmi_writel(hdmi, 0x63, EARCRX_DMAC_CONTROL0); -+ hdmi_writel(hdmi, 0xff, EARCRX_DMAC_CONTROL1); ++/** ++ * kutf_set_result() - Set the test result against the specified test context ++ * @context: Test context ++ * @status: Result status ++ */ ++static void kutf_set_result(struct kutf_context *context, ++ enum kutf_result_status status); + -+ hdmi_modb(hdmi, EARCRX_XACTREAD_STOP_CFG | EARCRX_XACTREAD_RETRY_CFG | -+ EARCRX_CMDC_DSCVR_EARCVALID0_TO_DISC1 | EARCRX_CMDC_XACT_RESTART_EN, -+ EARCRX_XACTREAD_STOP_CFG | EARCRX_XACTREAD_RETRY_CFG | -+ EARCRX_CMDC_DSCVR_EARCVALID0_TO_DISC1 | EARCRX_CMDC_XACT_RESTART_EN, -+ EARCRX_CMDC_CONFIG0); ++/** ++ * kutf_set_expected_result() - Set the expected test result for the specified ++ * test context ++ * @context: Test context ++ * @expected_status: Expected result status ++ */ ++static void kutf_set_expected_result(struct kutf_context *context, ++ enum kutf_result_status expected_status); + -+ hdmi_writel(hdmi, 0, EARCRX_DMAC_CHSTATUS_STREAMER0); -+ hdmi_writel(hdmi, 0x1b0e, EARCRX_DMAC_CHSTATUS_STREAMER1); -+ hdmi_writel(hdmi, 0, EARCRX_DMAC_CHSTATUS_STREAMER2); -+ hdmi_writel(hdmi, 0, EARCRX_DMAC_CHSTATUS_STREAMER3); -+ hdmi_writel(hdmi, 0xf2000000, EARCRX_DMAC_CHSTATUS_STREAMER4); -+ hdmi_writel(hdmi, 0, EARCRX_DMAC_CHSTATUS_STREAMER5); -+ hdmi_writel(hdmi, 0, EARCRX_DMAC_CHSTATUS_STREAMER6); -+ hdmi_writel(hdmi, 0, EARCRX_DMAC_CHSTATUS_STREAMER7); -+ hdmi_writel(hdmi, 0, EARCRX_DMAC_CHSTATUS_STREAMER8); ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)) ++/* Pre 3.4.0 kernels don't have the simple_open helper */ + ++/** ++ * simple_open() - Helper for file opening which stores the inode private data ++ * into the file private data ++ * @inode: File entry representation ++ * @file: A specific opening of the file ++ * ++ * Return: always 0; if inode private data do not exist, the file will not ++ * be assigned private data ++ */ ++static int simple_open(struct inode *inode, struct file *file) ++{ ++ if (inode->i_private) ++ file->private_data = inode->i_private; + return 0; +} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_set_earc); ++#endif + -+/* ----------------------------------------------------------------------------- -+ * HDMI TX Setup ++/** ++ * kutf_result_to_string() - Converts a KUTF result into a string ++ * @result_str: Output result string ++ * @result: Result status to convert ++ * ++ * Return: 1 if test result was successfully converted to string, 0 otherwise + */ -+ -+static void hdmi_infoframe_set_checksum(u8 *ptr, int size) ++static int kutf_result_to_string(char **result_str, ++ enum kutf_result_status result) +{ -+ u8 csum = 0; + int i; ++ int ret = 0; + -+ ptr[3] = 0; -+ /* compute checksum */ -+ for (i = 0; i < size; i++) -+ csum += ptr[i]; -+ -+ ptr[3] = 256 - csum; ++ for (i = 0; i < UTF_CONVERT_SIZE; i++) { ++ if (result == kutf_convert[i].result) { ++ *result_str = kutf_convert[i].result_name; ++ ret = 1; ++ } ++ } ++ return ret; +} + -+static bool is_hdmi2_sink(const struct drm_connector *connector) ++/** ++ * kutf_debugfs_const_string_read() - Simple debugfs read callback which ++ * returns a constant string ++ * @file: Opened file to read from ++ * @buf: User buffer to write the data into ++ * @len: Amount of data to read ++ * @ppos: Offset into file to read from ++ * ++ * Return: On success, the number of bytes read and offset @ppos advanced by ++ * this number; on error, negative value ++ */ ++static ssize_t kutf_debugfs_const_string_read(struct file *file, ++ char __user *buf, size_t len, loff_t *ppos) +{ -+ if (!connector) -+ return true; ++ char *str = file->private_data; + -+ return connector->display_info.hdmi.scdc.supported || -+ connector->display_info.color_formats & DRM_COLOR_FORMAT_YCBCR420; ++ return simple_read_from_buffer(buf, len, ppos, str, strlen(str)); +} + -+static void hdmi_config_AVI(struct dw_hdmi_qp *hdmi, -+ const struct drm_connector *connector, -+ const struct drm_display_mode *mode) -+{ -+ struct hdmi_avi_infoframe frame; -+ u32 val, i, j; -+ u8 buff[17]; -+ enum hdmi_quantization_range rgb_quant_range = -+ hdmi->hdmi_data.quant_range; ++static const struct file_operations kutf_debugfs_const_string_ops = { ++ .owner = THIS_MODULE, ++ .open = simple_open, ++ .read = kutf_debugfs_const_string_read, ++ .llseek = default_llseek, ++}; + -+ /* Initialise info frame from DRM mode */ -+ drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode); ++/** ++ * kutf_add_explicit_result() - Check if an explicit result needs to be added ++ * @context: KUTF test context ++ */ ++static void kutf_add_explicit_result(struct kutf_context *context) ++{ ++ switch (context->expected_status) { ++ case KUTF_RESULT_UNKNOWN: ++ if (context->status == KUTF_RESULT_UNKNOWN) ++ kutf_test_pass(context, "(implicit pass)"); ++ break; + -+ /* -+ * Ignore monitor selectable quantization, use quantization set -+ * by the user -+ */ -+ drm_hdmi_avi_infoframe_quant_range(&frame, connector, mode, rgb_quant_range); -+ if (hdmi_bus_fmt_is_yuv444(hdmi->hdmi_data.enc_out_bus_format)) -+ frame.colorspace = HDMI_COLORSPACE_YUV444; -+ else if (hdmi_bus_fmt_is_yuv422(hdmi->hdmi_data.enc_out_bus_format)) -+ frame.colorspace = HDMI_COLORSPACE_YUV422; -+ else if (hdmi_bus_fmt_is_yuv420(hdmi->hdmi_data.enc_out_bus_format)) -+ frame.colorspace = HDMI_COLORSPACE_YUV420; -+ else -+ frame.colorspace = HDMI_COLORSPACE_RGB; ++ case KUTF_RESULT_WARN: ++ if (context->status == KUTF_RESULT_WARN) ++ kutf_test_pass(context, ++ "Pass (expected warn occurred)"); ++ else if (context->status != KUTF_RESULT_SKIP) ++ kutf_test_fail(context, ++ "Fail (expected warn missing)"); ++ break; + -+ /* Set up colorimetry and quant range */ -+ if (!hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_out_bus_format)) { -+ switch (hdmi->hdmi_data.enc_out_encoding) { -+ case V4L2_YCBCR_ENC_601: -+ if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV601) -+ frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; -+ else -+ frame.colorimetry = HDMI_COLORIMETRY_ITU_601; -+ frame.extended_colorimetry = -+ HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; -+ break; -+ case V4L2_YCBCR_ENC_709: -+ if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV709) -+ frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; -+ else -+ frame.colorimetry = HDMI_COLORIMETRY_ITU_709; -+ frame.extended_colorimetry = -+ HDMI_EXTENDED_COLORIMETRY_XV_YCC_709; -+ break; -+ case V4L2_YCBCR_ENC_BT2020: -+ if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_BT2020) -+ frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; -+ else -+ frame.colorimetry = HDMI_COLORIMETRY_ITU_709; -+ frame.extended_colorimetry = -+ HDMI_EXTENDED_COLORIMETRY_BT2020; -+ break; -+ default: /* Carries no data */ -+ frame.colorimetry = HDMI_COLORIMETRY_ITU_601; -+ frame.extended_colorimetry = -+ HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; -+ break; ++ case KUTF_RESULT_FAIL: ++ if (context->status == KUTF_RESULT_FAIL) ++ kutf_test_pass(context, ++ "Pass (expected fail occurred)"); ++ else if (context->status != KUTF_RESULT_SKIP) { ++ /* Force the expected status so the fail gets logged */ ++ context->expected_status = KUTF_RESULT_PASS; ++ kutf_test_fail(context, ++ "Fail (expected fail missing)"); + } ++ break; + -+ frame.ycc_quantization_range = HDMI_YCC_QUANTIZATION_RANGE_LIMITED; -+ } else { -+ if (hdmi->hdmi_data.enc_out_encoding == V4L2_YCBCR_ENC_BT2020) { -+ frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; -+ frame.extended_colorimetry = -+ HDMI_EXTENDED_COLORIMETRY_BT2020; -+ } else { -+ frame.colorimetry = HDMI_COLORIMETRY_NONE; -+ frame.extended_colorimetry = -+ HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; -+ } ++ case KUTF_RESULT_FATAL: ++ if (context->status == KUTF_RESULT_FATAL) ++ kutf_test_pass(context, ++ "Pass (expected fatal occurred)"); ++ else if (context->status != KUTF_RESULT_SKIP) ++ kutf_test_fail(context, ++ "Fail (expected fatal missing)"); ++ break; + -+ if (is_hdmi2_sink(connector) && -+ frame.quantization_range == HDMI_QUANTIZATION_RANGE_FULL) -+ frame.ycc_quantization_range = HDMI_YCC_QUANTIZATION_RANGE_FULL; -+ else -+ frame.ycc_quantization_range = HDMI_YCC_QUANTIZATION_RANGE_LIMITED; ++ case KUTF_RESULT_ABORT: ++ if (context->status == KUTF_RESULT_ABORT) ++ kutf_test_pass(context, ++ "Pass (expected abort occurred)"); ++ else if (context->status != KUTF_RESULT_SKIP) ++ kutf_test_fail(context, ++ "Fail (expected abort missing)"); ++ break; ++ default: ++ break; + } ++} + -+ frame.scan_mode = HDMI_SCAN_MODE_NONE; ++/** ++ * kutf_debugfs_run_open() Debugfs open callback for the "run" entry. ++ * @inode: inode of the opened file ++ * @file: Opened file to read from ++ * ++ * This function retrieves the test fixture data that is associated with the ++ * opened file and works back to get the test, suite and application so ++ * it can then run the test that is associated with the file entry. ++ * ++ * Return: 0 on success ++ */ ++static int kutf_debugfs_run_open(struct inode *inode, struct file *file) ++{ ++ struct kutf_test_fixture *test_fix = inode->i_private; ++ struct kutf_test_function *test_func = test_fix->test_func; ++ struct kutf_suite *suite = test_func->suite; ++ struct kutf_context *test_context; + -+ hdmi_avi_infoframe_pack_only(&frame, buff, 17); ++ test_context = kutf_create_context(test_fix); ++ if (!test_context) ++ return -ENODEV; + -+ /* mode which vic >= 128 must use avi version 3 */ -+ if (hdmi->vic >= 128) { -+ frame.version = 3; -+ buff[1] = frame.version; -+ buff[4] &= 0x1f; -+ buff[4] |= ((frame.colorspace & 0x7) << 5); -+ buff[7] = hdmi->vic; -+ hdmi_infoframe_set_checksum(buff, 17); -+ } else if (is_hdmi2_sink(connector)) { -+ buff[7] = hdmi->vic; -+ } ++ file->private_data = test_context; + + /* -+ * The Designware IP uses a different byte format from standard -+ * AVI info frames, though generally the bits are in the correct -+ * bytes. ++ * Call the create fixture function if required before the ++ * fixture is run + */ ++ if (suite->create_fixture) ++ test_context->fixture = suite->create_fixture(test_context); + -+ val = (frame.version << 8) | (frame.length << 16); -+ hdmi_writel(hdmi, val, PKT_AVI_CONTENTS0); ++ /* Only run the test if the fixture was created (if required) */ ++ if ((suite->create_fixture && test_context->fixture) || ++ (!suite->create_fixture)) { ++ /* Run this fixture */ ++ test_func->execute(test_context); + -+ for (i = 0; i < 4; i++) { -+ for (j = 0; j < 4; j++) { -+ if (i * 4 + j >= 14) -+ break; -+ if (!j) -+ val = buff[i * 4 + j + 3]; -+ val |= buff[i * 4 + j + 3] << (8 * j); -+ } ++ if (suite->remove_fixture) ++ suite->remove_fixture(test_context); + -+ hdmi_writel(hdmi, val, PKT_AVI_CONTENTS1 + i * 4); ++ kutf_add_explicit_result(test_context); + } -+ -+ hdmi_modb(hdmi, 0, PKTSCHED_AVI_FIELDRATE, PKTSCHED_PKT_CONFIG1); -+ -+ hdmi_modb(hdmi, PKTSCHED_AVI_TX_EN, PKTSCHED_AVI_TX_EN, PKTSCHED_PKT_EN); ++ return 0; +} + -+#define VSI_PKT_TYPE 0x81 -+#define VSI_PKT_VERSION 1 -+#define HDMI_FORUM_OUI 0xc45dd8 -+#define ALLM_MODE BIT(1) -+#define HDMI_FORUM_LEN 9 -+ -+static void hdmi_config_vendor_specific_infoframe(struct dw_hdmi_qp *hdmi, -+ const struct drm_connector *connector, -+ const struct drm_display_mode *mode) ++/** ++ * kutf_debugfs_run_read() - Debugfs read callback for the "run" entry. ++ * @file: Opened file to read from ++ * @buf: User buffer to write the data into ++ * @len: Amount of data to read ++ * @ppos: Offset into file to read from ++ * ++ * This function emits the results which where logged during the opening of ++ * the file kutf_debugfs_run_open. ++ * Results will be emitted one at a time, once all the results have been read ++ * 0 will be returned to indicate there is no more data. ++ * ++ * Return: Number of bytes read. ++ */ ++static ssize_t kutf_debugfs_run_read(struct file *file, char __user *buf, ++ size_t len, loff_t *ppos) +{ -+ struct hdmi_vendor_infoframe frame; -+ u8 buffer[10]; -+ u32 val; -+ ssize_t err; -+ int i, reg; -+ struct dw_hdmi_link_config *link_cfg = NULL; -+ void *data = hdmi->plat_data->phy_data; -+ -+ if (hdmi->plat_data->get_link_cfg) -+ link_cfg = hdmi->plat_data->get_link_cfg(data); ++ struct kutf_context *test_context = file->private_data; ++ struct kutf_result *res; ++ unsigned long bytes_not_copied; ++ ssize_t bytes_copied = 0; + -+ hdmi_modb(hdmi, 0, PKTSCHED_VSI_TX_EN, PKTSCHED_PKT_EN); -+ for (i = 0; i <= 7; i++) -+ hdmi_writel(hdmi, 0, PKT_VSI_CONTENTS0 + i * 4); ++ /* Note: This code assumes a result is read completely */ ++ res = kutf_remove_result(test_context->result_set); ++ if (res) { ++ char *kutf_str_ptr = NULL; ++ unsigned int kutf_str_len = 0; ++ unsigned int message_len = 0; ++ char separator = ':'; ++ char terminator = '\n'; + -+ if (hdmi->allm_enable && (link_cfg->add_func & SUPPORT_HDMI_ALLM)) { -+ buffer[0] = VSI_PKT_TYPE; -+ buffer[1] = VSI_PKT_VERSION; -+ buffer[2] = 5; -+ buffer[4] = HDMI_FORUM_OUI & 0xff; -+ buffer[5] = (HDMI_FORUM_OUI >> 8) & 0xff; -+ buffer[6] = (HDMI_FORUM_OUI >> 16) & 0xff; -+ buffer[7] = VSI_PKT_VERSION; -+ buffer[8] = ALLM_MODE; ++ kutf_result_to_string(&kutf_str_ptr, res->status); ++ if (kutf_str_ptr) ++ kutf_str_len = strlen(kutf_str_ptr); + -+ hdmi_infoframe_set_checksum(buffer, HDMI_FORUM_LEN); ++ if (res->message) ++ message_len = strlen(res->message); + -+ err = 9; -+ } else { -+ if (is_hdmi2_sink(connector)) { -+ hdmi_modb(hdmi, 0, PKTSCHED_VSI_TX_EN, PKTSCHED_PKT_EN); -+ return; ++ if ((kutf_str_len + 1 + message_len + 1) > len) { ++ pr_err("Not enough space in user buffer for a single result"); ++ return 0; + } -+ err = drm_hdmi_vendor_infoframe_from_display_mode(&frame, connector, -+ mode); -+ if (err < 0) -+ /* -+ * Going into that statement does not means vendor infoframe -+ * fails. It just informed us that vendor infoframe is not -+ * needed for the selected mode. Only 4k or stereoscopic 3D -+ * mode requires vendor infoframe. So just simply return. -+ */ -+ return; + -+ err = hdmi_vendor_infoframe_pack(&frame, buffer, sizeof(buffer)); -+ if (err < 0) { -+ dev_err(hdmi->dev, "Failed to pack vendor infoframe: %zd\n", -+ err); -+ return; ++ /* First copy the result string */ ++ if (kutf_str_ptr) { ++ bytes_not_copied = copy_to_user(&buf[0], kutf_str_ptr, ++ kutf_str_len); ++ bytes_copied += kutf_str_len - bytes_not_copied; ++ if (bytes_not_copied) ++ goto exit; + } -+ } + -+ /* vsi header */ -+ val = (buffer[2] << 16) | (buffer[1] << 8) | buffer[0]; -+ hdmi_writel(hdmi, val, PKT_VSI_CONTENTS0); -+ -+ reg = PKT_VSI_CONTENTS1; -+ for (i = 3; i < err; i++) { -+ if (i % 4 == 3) -+ val = buffer[i]; -+ if (i % 4 == 0) -+ val |= buffer[i] << 8; -+ if (i % 4 == 1) -+ val |= buffer[i] << 16; -+ if (i % 4 == 2) -+ val |= buffer[i] << 24; ++ /* Then the separator */ ++ bytes_not_copied = copy_to_user(&buf[bytes_copied], ++ &separator, 1); ++ bytes_copied += 1 - bytes_not_copied; ++ if (bytes_not_copied) ++ goto exit; + -+ if ((i % 4 == 2) || (i == (err - 1))) { -+ hdmi_writel(hdmi, val, reg); -+ reg += 4; ++ /* Finally Next copy the result string */ ++ if (res->message) { ++ bytes_not_copied = copy_to_user(&buf[bytes_copied], ++ res->message, message_len); ++ bytes_copied += message_len - bytes_not_copied; ++ if (bytes_not_copied) ++ goto exit; + } -+ } -+ -+ hdmi_writel(hdmi, 0, PKT_VSI_CONTENTS7); + -+ hdmi_modb(hdmi, 0, PKTSCHED_VSI_FIELDRATE, PKTSCHED_PKT_CONFIG1); -+ hdmi_modb(hdmi, PKTSCHED_VSI_TX_EN, PKTSCHED_VSI_TX_EN, -+ PKTSCHED_PKT_EN); ++ /* Finally the terminator */ ++ bytes_not_copied = copy_to_user(&buf[bytes_copied], ++ &terminator, 1); ++ bytes_copied += 1 - bytes_not_copied; ++ } ++exit: ++ return bytes_copied; +} + -+static void hdmi_config_CVTEM(struct dw_hdmi_qp *hdmi) ++/** ++ * kutf_debugfs_run_release() - Debugfs release callback for the "run" entry. ++ * @inode: File entry representation ++ * @file: A specific opening of the file ++ * ++ * Release any resources that where created during the opening of the file ++ * ++ * Return: 0 on success ++ */ ++static int kutf_debugfs_run_release(struct inode *inode, struct file *file) +{ -+ u8 ds_type = 0; -+ u8 sync = 1; -+ u8 vfr = 1; -+ u8 afr = 0; -+ u8 new = 1; -+ u8 end = 0; -+ u8 data_set_length = 136; -+ u8 hb1[6] = { 0x80, 0, 0, 0, 0, 0x40 }; -+ u8 *pps_body; -+ u32 val, i, reg; -+ struct drm_display_mode *mode = &hdmi->previous_mode; -+ int hsync, hfront, hback; -+ struct dw_hdmi_link_config *link_cfg; -+ void *data = hdmi->plat_data->phy_data; -+ -+ hdmi_modb(hdmi, 0, PKTSCHED_EMP_CVTEM_TX_EN, PKTSCHED_PKT_EN); -+ -+ if (hdmi->plat_data->get_link_cfg) { -+ link_cfg = hdmi->plat_data->get_link_cfg(data); -+ } else { -+ dev_err(hdmi->dev, "can't get frl link cfg\n"); -+ return; -+ } ++ struct kutf_context *test_context = file->private_data; + -+ if (!link_cfg->dsc_mode) { -+ dev_info(hdmi->dev, "don't use dsc mode\n"); -+ return; -+ } ++ kutf_destroy_context(test_context); ++ return 0; ++} + -+ pps_body = link_cfg->pps_payload; ++static const struct file_operations kutf_debugfs_run_ops = { ++ .owner = THIS_MODULE, ++ .open = kutf_debugfs_run_open, ++ .read = kutf_debugfs_run_read, ++ .release = kutf_debugfs_run_release, ++ .llseek = default_llseek, ++}; + -+ hsync = mode->hsync_end - mode->hsync_start; -+ hback = mode->htotal - mode->hsync_end; -+ hfront = mode->hsync_start - mode->hdisplay; ++/** ++ * create_fixture_variant() - Creates a fixture variant for the specified ++ * test function and index and the debugfs entries ++ * that represent it. ++ * @test_func: Test function ++ * @fixture_index: Fixture index ++ * ++ * Return: 0 on success, negative value corresponding to error code in failure ++ */ ++static int create_fixture_variant(struct kutf_test_function *test_func, ++ unsigned int fixture_index) ++{ ++ struct kutf_test_fixture *test_fix; ++ char name[11]; /* Enough to print the MAX_UINT32 + the null terminator */ ++ struct dentry *tmp; ++ int err; + -+ for (i = 0; i < 6; i++) { -+ val = i << 16 | hb1[i] << 8; -+ hdmi_writel(hdmi, val, PKT0_EMP_CVTEM_CONTENTS0 + i * 0x20); ++ test_fix = kmalloc(sizeof(*test_fix), GFP_KERNEL); ++ if (!test_fix) { ++ pr_err("Failed to create debugfs directory when adding fixture\n"); ++ err = -ENOMEM; ++ goto fail_alloc; + } + -+ val = new << 7 | end << 6 | ds_type << 4 | afr << 3 | -+ vfr << 2 | sync << 1; -+ hdmi_writel(hdmi, val, PKT0_EMP_CVTEM_CONTENTS1); -+ -+ val = data_set_length << 16 | pps_body[0] << 24; -+ hdmi_writel(hdmi, val, PKT0_EMP_CVTEM_CONTENTS2); ++ test_fix->test_func = test_func; ++ test_fix->fixture_index = fixture_index; + -+ reg = PKT0_EMP_CVTEM_CONTENTS3; -+ for (i = 1; i < 125; i++) { -+ if (reg == PKT1_EMP_CVTEM_CONTENTS0 || -+ reg == PKT2_EMP_CVTEM_CONTENTS0 || -+ reg == PKT3_EMP_CVTEM_CONTENTS0 || -+ reg == PKT4_EMP_CVTEM_CONTENTS0 || -+ reg == PKT5_EMP_CVTEM_CONTENTS0) { -+ reg += 4; -+ i--; -+ continue; -+ } -+ if (i % 4 == 1) -+ val = pps_body[i]; -+ if (i % 4 == 2) -+ val |= pps_body[i] << 8; -+ if (i % 4 == 3) -+ val |= pps_body[i] << 16; -+ if (!(i % 4)) { -+ val |= pps_body[i] << 24; -+ hdmi_writel(hdmi, val, reg); -+ reg += 4; -+ } ++ snprintf(name, sizeof(name), "%d", fixture_index); ++ test_fix->dir = debugfs_create_dir(name, test_func->dir); ++ if (!test_func->dir) { ++ pr_err("Failed to create debugfs directory when adding fixture\n"); ++ /* Might not be the right error, we don't get it passed back to us */ ++ err = -EEXIST; ++ goto fail_dir; + } + -+ val = (hfront & 0xff) << 24 | pps_body[127] << 16 | -+ pps_body[126] << 8 | pps_body[125]; -+ hdmi_writel(hdmi, val, PKT4_EMP_CVTEM_CONTENTS6); ++ tmp = debugfs_create_file("type", S_IROTH, test_fix->dir, "fixture\n", ++ &kutf_debugfs_const_string_ops); ++ if (!tmp) { ++ pr_err("Failed to create debugfs file \"type\" when adding fixture\n"); ++ /* Might not be the right error, we don't get it passed back to us */ ++ err = -EEXIST; ++ goto fail_file; ++ } + -+ val = (hback & 0xff) << 24 | ((hsync >> 8) & 0xff) << 16 | -+ (hsync & 0xff) << 8 | ((hfront >> 8) & 0xff); -+ hdmi_writel(hdmi, val, PKT4_EMP_CVTEM_CONTENTS7); ++ tmp = debugfs_create_file("run", S_IROTH, test_fix->dir, test_fix, ++ &kutf_debugfs_run_ops); ++ if (!tmp) { ++ pr_err("Failed to create debugfs file \"run\" when adding fixture\n"); ++ /* Might not be the right error, we don't get it passed back to us */ ++ err = -EEXIST; ++ goto fail_file; ++ } + -+ val = link_cfg->hcactive << 8 | ((hback >> 8) & 0xff); -+ hdmi_writel(hdmi, val, PKT5_EMP_CVTEM_CONTENTS1); ++ list_add(&test_fix->node, &test_func->variant_list); ++ return 0; + -+ for (i = PKT5_EMP_CVTEM_CONTENTS2; i <= PKT5_EMP_CVTEM_CONTENTS7; i += 4) -+ hdmi_writel(hdmi, 0, i); ++fail_file: ++ debugfs_remove_recursive(test_fix->dir); ++fail_dir: ++ kfree(test_fix); ++fail_alloc: ++ return err; ++} + -+ hdmi_modb(hdmi, PKTSCHED_EMP_CVTEM_TX_EN, PKTSCHED_EMP_CVTEM_TX_EN, -+ PKTSCHED_PKT_EN); ++/** ++ * kutf_remove_test_variant() - Destroy a previously created fixture variant. ++ * @test_fix: Test fixture ++ */ ++static void kutf_remove_test_variant(struct kutf_test_fixture *test_fix) ++{ ++ debugfs_remove_recursive(test_fix->dir); ++ kfree(test_fix); +} + -+static void hdmi_config_drm_infoframe(struct dw_hdmi_qp *hdmi, -+ const struct drm_connector *connector) ++void kutf_add_test_with_filters_and_data( ++ struct kutf_suite *suite, ++ unsigned int id, ++ const char *name, ++ void (*execute)(struct kutf_context *context), ++ unsigned int filters, ++ union kutf_callback_data test_data) +{ -+ const struct drm_connector_state *conn_state = connector->state; -+ struct hdr_output_metadata *hdr_metadata; -+ struct hdmi_drm_infoframe frame; -+ u8 buffer[30]; -+ ssize_t err; -+ int i; -+ u32 val; ++ struct kutf_test_function *test_func; ++ struct dentry *tmp; ++ unsigned int i; + -+ if (!hdmi->plat_data->use_drm_infoframe) -+ return; ++ test_func = kmalloc(sizeof(*test_func), GFP_KERNEL); ++ if (!test_func) { ++ pr_err("Failed to allocate memory when adding test %s\n", name); ++ goto fail_alloc; ++ } + -+ hdmi_modb(hdmi, 0, PKTSCHED_DRMI_TX_EN, PKTSCHED_PKT_EN); ++ INIT_LIST_HEAD(&test_func->variant_list); + -+ if (!hdmi->connector.hdr_sink_metadata.hdmi_type1.eotf) { -+ DRM_DEBUG("No need to set HDR metadata in infoframe\n"); -+ return; ++ test_func->dir = debugfs_create_dir(name, suite->dir); ++ if (!test_func->dir) { ++ pr_err("Failed to create debugfs directory when adding test %s\n", name); ++ goto fail_dir; + } + -+ if (!conn_state->hdr_output_metadata) { -+ DRM_DEBUG("source metadata not set yet\n"); -+ return; ++ tmp = debugfs_create_file("type", S_IROTH, test_func->dir, "test\n", ++ &kutf_debugfs_const_string_ops); ++ if (!tmp) { ++ pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name); ++ goto fail_file; + } + -+ hdr_metadata = (struct hdr_output_metadata *) -+ conn_state->hdr_output_metadata->data; -+ -+ if (!(hdmi->connector.hdr_sink_metadata.hdmi_type1.eotf & -+ BIT(hdr_metadata->hdmi_metadata_type1.eotf))) { -+ DRM_ERROR("Not support EOTF %d\n", -+ hdr_metadata->hdmi_metadata_type1.eotf); -+ return; ++ test_func->filters = filters; ++ tmp = debugfs_create_x32("filters", S_IROTH, test_func->dir, ++ &test_func->filters); ++ if (!tmp) { ++ pr_err("Failed to create debugfs file \"filters\" when adding test %s\n", name); ++ goto fail_file; + } + -+ err = drm_hdmi_infoframe_set_hdr_metadata(&frame, conn_state); -+ if (err < 0) -+ return; ++ test_func->test_id = id; ++ tmp = debugfs_create_u32("test_id", S_IROTH, test_func->dir, ++ &test_func->test_id); ++ if (!tmp) { ++ pr_err("Failed to create debugfs file \"test_id\" when adding test %s\n", name); ++ goto fail_file; ++ } + -+ err = hdmi_drm_infoframe_pack(&frame, buffer, sizeof(buffer)); -+ if (err < 0) { -+ dev_err(hdmi->dev, "Failed to pack drm infoframe: %zd\n", err); -+ return; ++ for (i = 0; i < suite->fixture_variants; i++) { ++ if (create_fixture_variant(test_func, i)) { ++ pr_err("Failed to create fixture %d when adding test %s\n", i, name); ++ goto fail_file; ++ } + } + -+ val = (frame.version << 8) | (frame.length << 16); -+ hdmi_writel(hdmi, val, PKT_DRMI_CONTENTS0); ++ test_func->suite = suite; ++ test_func->execute = execute; ++ test_func->test_data = test_data; + -+ for (i = 0; i <= frame.length; i++) { -+ if (i % 4 == 0) -+ val = buffer[3 + i]; -+ val |= buffer[3 + i] << ((i % 4) * 8); ++ list_add(&test_func->node, &suite->test_list); ++ return; + -+ if (i % 4 == 3 || (i == (frame.length))) -+ hdmi_writel(hdmi, val, PKT_DRMI_CONTENTS1 + ((i / 4) * 4)); -+ } ++fail_file: ++ debugfs_remove_recursive(test_func->dir); ++fail_dir: ++ kfree(test_func); ++fail_alloc: ++ return; ++} ++EXPORT_SYMBOL(kutf_add_test_with_filters_and_data); + -+ hdmi_modb(hdmi, 0, PKTSCHED_DRMI_FIELDRATE, PKTSCHED_PKT_CONFIG1); ++void kutf_add_test_with_filters( ++ struct kutf_suite *suite, ++ unsigned int id, ++ const char *name, ++ void (*execute)(struct kutf_context *context), ++ unsigned int filters) ++{ ++ union kutf_callback_data data; + -+ /* -+ * avi and hdr infoframe cannot be sent at the same time -+ * for compatibility with Huawei TV -+ */ -+ mdelay(50); -+ hdmi_modb(hdmi, PKTSCHED_DRMI_TX_EN, PKTSCHED_DRMI_TX_EN, PKTSCHED_PKT_EN); ++ data.ptr_value = NULL; + -+ DRM_DEBUG("%s eotf %d end\n", __func__, -+ hdr_metadata->hdmi_metadata_type1.eotf); ++ kutf_add_test_with_filters_and_data(suite, ++ id, ++ name, ++ execute, ++ suite->suite_default_flags, ++ data); +} ++EXPORT_SYMBOL(kutf_add_test_with_filters); + -+/* Filter out invalid setups to avoid configuring SCDC and scrambling */ -+static bool dw_hdmi_support_scdc(struct dw_hdmi_qp *hdmi, -+ const struct drm_display_info *display) ++void kutf_add_test(struct kutf_suite *suite, ++ unsigned int id, ++ const char *name, ++ void (*execute)(struct kutf_context *context)) +{ -+ /* Disable if no DDC bus */ -+ if (!hdmi->ddc) -+ return false; -+ -+ /* Disable if SCDC is not supported, or if an HF-VSDB block is absent */ -+ if (!display->hdmi.scdc.supported || -+ !display->hdmi.scdc.scrambling.supported) -+ return false; ++ union kutf_callback_data data; + -+ /* -+ * Disable if display only support low TMDS rates and scrambling -+ * for low rates is not supported either -+ */ -+ if (!display->hdmi.scdc.scrambling.low_rates && -+ display->max_tmds_clock <= 340000) -+ return false; ++ data.ptr_value = NULL; + -+ return true; ++ kutf_add_test_with_filters_and_data(suite, ++ id, ++ name, ++ execute, ++ suite->suite_default_flags, ++ data); +} ++EXPORT_SYMBOL(kutf_add_test); + -+static int hdmi_set_frl_mask(int frl_rate) ++/** ++ * kutf_remove_test(): Remove a previously added test function. ++ * @test_func: Test function ++ */ ++static void kutf_remove_test(struct kutf_test_function *test_func) +{ -+ switch (frl_rate) { -+ case 48: -+ return FRL_12GBPS_4LANE; -+ case 40: -+ return FRL_10GBPS_4LANE; -+ case 32: -+ return FRL_8GBPS_4LANE; -+ case 24: -+ return FRL_6GBPS_4LANE; -+ case 18: -+ return FRL_6GBPS_3LANE; -+ case 9: -+ return FRL_3GBPS_3LANE; ++ struct list_head *pos; ++ struct list_head *tmp; ++ ++ list_for_each_safe(pos, tmp, &test_func->variant_list) { ++ struct kutf_test_fixture *test_fix; ++ ++ test_fix = list_entry(pos, struct kutf_test_fixture, node); ++ kutf_remove_test_variant(test_fix); + } + -+ return 0; ++ list_del(&test_func->node); ++ debugfs_remove_recursive(test_func->dir); ++ kfree(test_func); +} + -+static int hdmi_start_flt(struct dw_hdmi_qp *hdmi, u8 rate) ++struct kutf_suite *kutf_create_suite_with_filters_and_data( ++ struct kutf_application *app, ++ const char *name, ++ unsigned int fixture_count, ++ void *(*create_fixture)(struct kutf_context *context), ++ void (*remove_fixture)(struct kutf_context *context), ++ unsigned int filters, ++ union kutf_callback_data suite_data) +{ -+ u8 val; -+ u32 value; -+ u8 ffe_lv = 0; -+ int i = 0; -+ bool ltsp = false; -+ -+ hdmi_modb(hdmi, AVP_DATAPATH_VIDEO_SWDISABLE, -+ AVP_DATAPATH_VIDEO_SWDISABLE, GLOBAL_SWDISABLE); -+ -+ /* reset avp data path */ -+ hdmi_writel(hdmi, BIT(6), GLOBAL_SWRESET_REQUEST); ++ struct kutf_suite *suite; ++ struct dentry *tmp; + -+ /* FLT_READY & FFE_LEVELS read */ -+ for (i = 0; i < 20; i++) { -+ drm_scdc_readb(hdmi->ddc, SCDC_STATUS_FLAGS_0, &val); -+ if (val & BIT(6)) -+ break; -+ msleep(20); ++ suite = kmalloc(sizeof(*suite), GFP_KERNEL); ++ if (!suite) { ++ pr_err("Failed to allocate memory when creating suite %s\n", name); ++ goto fail_kmalloc; + } + -+ if (i == 20) { -+ dev_err(hdmi->dev, "sink flt isn't ready\n"); -+ return -EINVAL; ++ suite->dir = debugfs_create_dir(name, app->dir); ++ if (!suite->dir) { ++ pr_err("Failed to create debugfs directory when adding test %s\n", name); ++ goto fail_debugfs; + } + -+ /* clear flt flags */ -+ drm_scdc_readb(hdmi->ddc, 0x10, &val); -+ if (val & BIT(5)) -+ drm_scdc_writeb(hdmi->ddc, 0x10, BIT(5)); ++ tmp = debugfs_create_file("type", S_IROTH, suite->dir, "suite\n", ++ &kutf_debugfs_const_string_ops); ++ if (!tmp) { ++ pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name); ++ goto fail_file; ++ } + -+ /* max ffe level 3 */ -+ val = 0 << 4 | hdmi_set_frl_mask(rate); -+ drm_scdc_writeb(hdmi->ddc, 0x31, val); ++ INIT_LIST_HEAD(&suite->test_list); ++ suite->app = app; ++ suite->name = name; ++ suite->fixture_variants = fixture_count; ++ suite->create_fixture = create_fixture; ++ suite->remove_fixture = remove_fixture; ++ suite->suite_default_flags = filters; ++ suite->suite_data = suite_data; + -+ /* select FRL_RATE & FFE_LEVELS */ -+ hdmi_writel(hdmi, ffe_lv, FLT_CONFIG0); ++ list_add(&suite->node, &app->suite_list); + -+ /* we set max 2s timeout */ -+ i = 4000; -+ while (i--) { -+ /* source should poll update flag every 2ms or less */ -+ usleep_range(400, 500); -+ drm_scdc_readb(hdmi->ddc, 0x10, &val); ++ return suite; + -+ if (!(val & 0x30)) -+ continue; ++fail_file: ++ debugfs_remove_recursive(suite->dir); ++fail_debugfs: ++ kfree(suite); ++fail_kmalloc: ++ return NULL; ++} ++EXPORT_SYMBOL(kutf_create_suite_with_filters_and_data); + -+ if (val & BIT(5)) { -+ u8 reg_val, ln0, ln1, ln2, ln3; ++struct kutf_suite *kutf_create_suite_with_filters( ++ struct kutf_application *app, ++ const char *name, ++ unsigned int fixture_count, ++ void *(*create_fixture)(struct kutf_context *context), ++ void (*remove_fixture)(struct kutf_context *context), ++ unsigned int filters) ++{ ++ union kutf_callback_data data; + -+ drm_scdc_readb(hdmi->ddc, 0x41, ®_val); -+ ln0 = reg_val & 0xf; -+ ln1 = (reg_val >> 4) & 0xf; ++ data.ptr_value = NULL; ++ return kutf_create_suite_with_filters_and_data(app, ++ name, ++ fixture_count, ++ create_fixture, ++ remove_fixture, ++ filters, ++ data); ++} ++EXPORT_SYMBOL(kutf_create_suite_with_filters); + -+ drm_scdc_readb(hdmi->ddc, 0x42, ®_val); -+ ln2 = reg_val & 0xf; -+ ln3 = (reg_val >> 4) & 0xf; ++struct kutf_suite *kutf_create_suite( ++ struct kutf_application *app, ++ const char *name, ++ unsigned int fixture_count, ++ void *(*create_fixture)(struct kutf_context *context), ++ void (*remove_fixture)(struct kutf_context *context)) ++{ ++ union kutf_callback_data data; + -+ if (!ln0 && !ln1 && !ln2 && !ln3) { -+ dev_info(hdmi->dev, "goto ltsp\n"); -+ ltsp = true; -+ hdmi_writel(hdmi, 0, FLT_CONFIG1); -+ } else if ((ln0 == 0xf) | (ln1 == 0xf) | (ln2 == 0xf) | (ln3 == 0xf)) { -+ dev_err(hdmi->dev, "goto lts4\n"); -+ break; -+ } else if ((ln0 == 0xe) | (ln1 == 0xe) | (ln2 == 0xe) | (ln3 == 0xe)) { -+ dev_info(hdmi->dev, "goto ffe\n"); -+ break; -+ } else { -+ value = (ln3 << 16) | (ln2 << 12) | (ln1 << 8) | (ln0 << 4) | 0xf; -+ hdmi_writel(hdmi, value, FLT_CONFIG1); -+ } -+ } ++ data.ptr_value = NULL; ++ return kutf_create_suite_with_filters_and_data(app, ++ name, ++ fixture_count, ++ create_fixture, ++ remove_fixture, ++ KUTF_F_TEST_GENERIC, ++ data); ++} ++EXPORT_SYMBOL(kutf_create_suite); + -+ /* only clear frl_start and flt_update */ -+ drm_scdc_writeb(hdmi->ddc, 0x10, val & 0x30); ++/** ++ * kutf_destroy_suite() - Destroy a previously added test suite. ++ * @suite: Test suite ++ */ ++static void kutf_destroy_suite(struct kutf_suite *suite) ++{ ++ struct list_head *pos; ++ struct list_head *tmp; + -+ if ((val & BIT(4)) && ltsp) { -+ hdmi_modb(hdmi, 0, AVP_DATAPATH_VIDEO_SWDISABLE, GLOBAL_SWDISABLE); -+ dev_info(hdmi->dev, "flt success\n"); -+ break; -+ } -+ } ++ list_for_each_safe(pos, tmp, &suite->test_list) { ++ struct kutf_test_function *test_func; + -+ if (i < 0) { -+ dev_err(hdmi->dev, "flt time out\n"); -+ return -ETIMEDOUT; ++ test_func = list_entry(pos, struct kutf_test_function, node); ++ kutf_remove_test(test_func); + } + -+ return 0; ++ list_del(&suite->node); ++ debugfs_remove_recursive(suite->dir); ++ kfree(suite); +} + -+#define HDMI_MODE_FRL_MASK BIT(30) -+ -+static int hdmi_set_op_mode(struct dw_hdmi_qp *hdmi, -+ struct dw_hdmi_link_config *link_cfg, -+ const struct drm_connector *connector) ++struct kutf_application *kutf_create_application(const char *name) +{ -+ int frl_rate; -+ int i, ret = 0; -+ -+ if (hdmi->frl_switch) -+ return 0; -+ -+ if (!link_cfg->frl_mode) { -+ dev_info(hdmi->dev, "dw hdmi qp use tmds mode\n"); -+ hdmi_modb(hdmi, 0, OPMODE_FRL, LINK_CONFIG0); -+ hdmi_modb(hdmi, 0, OPMODE_FRL_4LANES, LINK_CONFIG0); -+ if (!hdmi->update) { -+ ret = hdmi->phy.ops->init(hdmi, hdmi->phy.data, &hdmi->previous_mode); -+ if (!ret) -+ hdmi->disabled = false; -+ } ++ struct kutf_application *app; ++ struct dentry *tmp; + -+ return ret; ++ app = kmalloc(sizeof(*app), GFP_KERNEL); ++ if (!app) { ++ pr_err("Failed to create allocate memory when creating application %s\n", name); ++ goto fail_kmalloc; + } + -+ if (hdmi->update) -+ return 0; ++ app->dir = debugfs_create_dir(name, base_dir); ++ if (!app->dir) { ++ pr_err("Failed to create debugfs direcotry when creating application %s\n", name); ++ goto fail_debugfs; ++ } + -+ if (link_cfg->frl_lanes == 4) -+ hdmi_modb(hdmi, OPMODE_FRL_4LANES, OPMODE_FRL_4LANES, -+ LINK_CONFIG0); -+ else -+ hdmi_modb(hdmi, 0, OPMODE_FRL_4LANES, LINK_CONFIG0); ++ tmp = debugfs_create_file("type", S_IROTH, app->dir, "application\n", ++ &kutf_debugfs_const_string_ops); ++ if (!tmp) { ++ pr_err("Failed to create debugfs file \"type\" when creating application %s\n", name); ++ goto fail_file; ++ } + -+ hdmi_modb(hdmi, 1, OPMODE_FRL, LINK_CONFIG0); ++ INIT_LIST_HEAD(&app->suite_list); ++ app->name = name; + -+ frl_rate = link_cfg->frl_lanes * link_cfg->rate_per_lane; ++ return app; + -+ ret = hdmi->phy.ops->init(hdmi, hdmi->phy.data, &hdmi->previous_mode); -+ if (ret) -+ return ret; -+ hdmi->disabled = false; ++fail_file: ++ debugfs_remove_recursive(app->dir); ++fail_debugfs: ++ kfree(app); ++fail_kmalloc: ++ return NULL; ++} ++EXPORT_SYMBOL(kutf_create_application); + -+ msleep(50); ++void kutf_destroy_application(struct kutf_application *app) ++{ ++ struct list_head *pos; ++ struct list_head *tmp; + -+ ret = hdmi_start_flt(hdmi, frl_rate); -+ if (ret) { -+ hdmi_writel(hdmi, 0, FLT_CONFIG0); -+ drm_scdc_writeb(hdmi->ddc, 0x31, 0); -+ hdmi_modb(hdmi, 0, AVP_DATAPATH_VIDEO_SWDISABLE, GLOBAL_SWDISABLE); -+ return ret; -+ } ++ list_for_each_safe(pos, tmp, &app->suite_list) { ++ struct kutf_suite *suite; + -+ for (i = 0; i < 200; i++) { -+ hdmi_modb(hdmi, PKTSCHED_NULL_TX_EN, PKTSCHED_NULL_TX_EN, PKTSCHED_PKT_EN); -+ usleep_range(50, 60); -+ hdmi_modb(hdmi, 0, PKTSCHED_NULL_TX_EN, PKTSCHED_PKT_EN); -+ usleep_range(50, 60); ++ suite = list_entry(pos, struct kutf_suite, node); ++ kutf_destroy_suite(suite); + } + -+ return 0; ++ debugfs_remove_recursive(app->dir); ++ kfree(app); +} ++EXPORT_SYMBOL(kutf_destroy_application); + -+static unsigned long -+hdmi_get_tmdsclock(struct dw_hdmi_qp *hdmi, unsigned long mpixelclock) ++static struct kutf_context *kutf_create_context( ++ struct kutf_test_fixture *test_fix) +{ -+ unsigned long tmdsclock = mpixelclock; -+ unsigned int depth = -+ hdmi_bus_fmt_color_depth(hdmi->hdmi_data.enc_out_bus_format); ++ struct kutf_context *new_context; + -+ if (!hdmi_bus_fmt_is_yuv422(hdmi->hdmi_data.enc_out_bus_format)) { -+ switch (depth) { -+ case 16: -+ tmdsclock = mpixelclock * 2; -+ break; -+ case 12: -+ tmdsclock = mpixelclock * 3 / 2; -+ break; -+ case 10: -+ tmdsclock = mpixelclock * 5 / 4; -+ break; -+ default: -+ break; -+ } ++ new_context = kmalloc(sizeof(*new_context), GFP_KERNEL); ++ if (!new_context) { ++ pr_err("Failed to allocate test context"); ++ goto fail_alloc; + } + -+ return tmdsclock; -+} ++ new_context->result_set = kutf_create_result_set(); ++ if (!new_context->result_set) { ++ pr_err("Failed to create resultset"); ++ goto fail_result_set; ++ } + -+static void dw_hdmi_qp_hdcp_enable(struct dw_hdmi_qp *hdmi, -+ struct drm_connector *connector) -+{ -+ int ret, val; -+ const struct drm_connector_state *conn_state = connector->state; -+ void *data = hdmi->plat_data->phy_data; ++ new_context->test_fix = test_fix; ++ /* Save the pointer to the suite as the callbacks will require it */ ++ new_context->suite = test_fix->test_func->suite; ++ new_context->status = KUTF_RESULT_UNKNOWN; ++ new_context->expected_status = KUTF_RESULT_UNKNOWN; + -+ if (conn_state->content_protection != DRM_MODE_CONTENT_PROTECTION_DESIRED) -+ return; ++ kutf_mempool_init(&new_context->fixture_pool); ++ new_context->fixture = NULL; ++ new_context->fixture_index = test_fix->fixture_index; ++ new_context->fixture_name = NULL; ++ new_context->test_data = test_fix->test_func->test_data; + -+ /* sink support hdcp2.x */ -+ if (hdmi->hdcp_caps & SINK_CAP_HDCP2) { -+ hdmi_writel(hdmi, HDCP2_ESM_P0_GPIO_OUT_2_CHG_IRQ, AVP_3_INT_CLEAR); -+ hdmi_modb(hdmi, HDCP2_ESM_P0_GPIO_OUT_2_CHG_IRQ, -+ HDCP2_ESM_P0_GPIO_OUT_2_CHG_IRQ, AVP_3_INT_MASK_N); ++ return new_context; + -+ hdmi_writel(hdmi, 0x35, HDCP2LOGIC_ESM_GPIO_IN); -+ hdmi_modb(hdmi, 0, HDCP2_BYPASS, HDCP2LOGIC_CONFIG0); -+ if (hdmi->plat_data->set_hdcp2_enable) -+ hdmi->plat_data->set_hdcp2_enable(data, true); ++fail_result_set: ++ kfree(new_context); ++fail_alloc: ++ return NULL; ++} + -+ /* wait hdcp2.X auth success */ -+ ret = regmap_read_poll_timeout(hdmi->regm, HDCP2LOGIC_ESM_GPIO_OUT, val, -+ FIELD_GET(HDCP2_AUTHENTICATION_SUCCESS, val), -+ 10000, 2000000); -+ if (ret) { -+ hdmi->hdcp_status &= ~HDMI_HDCP2_AUTH; -+ dev_info(hdmi->dev, "hdcp2 auth failed,start hdcp1.4\n"); -+ -+ hdmi_writel(hdmi, 0, HDCP2LOGIC_ESM_GPIO_IN); -+ hdmi_modb(hdmi, HDCP2_BYPASS, HDCP2_BYPASS, HDCP2LOGIC_CONFIG0); -+ -+ if (hdmi->plat_data->set_hdcp2_enable) -+ hdmi->plat_data->set_hdcp2_enable(data, false); -+ -+ if (hdmi->hdcp && hdmi->hdcp->hdcp_start) -+ hdmi->hdcp->hdcp_start(hdmi->hdcp); -+ goto exit; -+ } -+ -+ hdmi->hdcp_status |= HDMI_HDCP2_AUTH; -+ drm_hdcp_update_content_protection(connector, DRM_MODE_CONTENT_PROTECTION_ENABLED); -+ dev_info(hdmi->dev, "HDCP2 authentication succeed\n"); -+ } else { -+ if (hdmi->hdcp && hdmi->hdcp->hdcp_start) -+ hdmi->hdcp->hdcp_start(hdmi->hdcp); -+ } -+exit: -+ if (hdmi->plat_data->set_hdcp_status) -+ hdmi->plat_data->set_hdcp_status(data, hdmi->hdcp_status); ++static void kutf_destroy_context(struct kutf_context *context) ++{ ++ kutf_destroy_result_set(context->result_set); ++ kutf_mempool_destroy(&context->fixture_pool); ++ kfree(context); +} + -+static int dw_hdmi_qp_setup(struct dw_hdmi_qp *hdmi, -+ const struct drm_connector *connector, -+ struct drm_display_mode *mode) ++static void kutf_set_result(struct kutf_context *context, ++ enum kutf_result_status status) +{ -+ void *data = hdmi->plat_data->phy_data; -+ struct hdmi_vmode_qp *vmode = &hdmi->hdmi_data.video_mode; -+ struct dw_hdmi_link_config *link_cfg; -+ u8 bytes = 0; -+ -+ hdmi->vic = drm_match_cea_mode(mode); -+ if (!hdmi->vic) -+ dev_dbg(hdmi->dev, "Non-CEA mode used in HDMI\n"); -+ else -+ dev_dbg(hdmi->dev, "CEA mode used vic=%d\n", hdmi->vic); -+ -+ if (hdmi->plat_data->get_enc_out_encoding) -+ hdmi->hdmi_data.enc_out_encoding = -+ hdmi->plat_data->get_enc_out_encoding(data); -+ else if ((hdmi->vic == 6) || (hdmi->vic == 7) || -+ (hdmi->vic == 21) || (hdmi->vic == 22) || -+ (hdmi->vic == 2) || (hdmi->vic == 3) || -+ (hdmi->vic == 17) || (hdmi->vic == 18)) -+ hdmi->hdmi_data.enc_out_encoding = V4L2_YCBCR_ENC_601; -+ else -+ hdmi->hdmi_data.enc_out_encoding = V4L2_YCBCR_ENC_709; -+ -+ if (mode->flags & DRM_MODE_FLAG_DBLCLK) { -+ hdmi->hdmi_data.video_mode.mpixelrepetitionoutput = 1; -+ hdmi->hdmi_data.video_mode.mpixelrepetitioninput = 1; -+ } else { -+ hdmi->hdmi_data.video_mode.mpixelrepetitionoutput = 0; -+ hdmi->hdmi_data.video_mode.mpixelrepetitioninput = 0; -+ } -+ -+ /* Get input format from plat data or fallback to RGB888 */ -+ if (hdmi->plat_data->get_input_bus_format) -+ hdmi->hdmi_data.enc_in_bus_format = -+ hdmi->plat_data->get_input_bus_format(data); -+ else if (hdmi->plat_data->input_bus_format) -+ hdmi->hdmi_data.enc_in_bus_format = -+ hdmi->plat_data->input_bus_format; -+ else -+ hdmi->hdmi_data.enc_in_bus_format = MEDIA_BUS_FMT_RGB888_1X24; -+ -+ /* Default to RGB888 output format */ -+ if (hdmi->plat_data->get_output_bus_format) -+ hdmi->hdmi_data.enc_out_bus_format = -+ hdmi->plat_data->get_output_bus_format(data); -+ else -+ hdmi->hdmi_data.enc_out_bus_format = MEDIA_BUS_FMT_RGB888_1X24; -+ -+ if (hdmi->plat_data->set_prev_bus_format) -+ hdmi->plat_data->set_prev_bus_format(data, hdmi->hdmi_data.enc_out_bus_format); -+ -+ /* Get input encoding from plat data or fallback to none */ -+ if (hdmi->plat_data->get_enc_in_encoding) -+ hdmi->hdmi_data.enc_in_encoding = -+ hdmi->plat_data->get_enc_in_encoding(data); -+ else if (hdmi->plat_data->input_bus_encoding) -+ hdmi->hdmi_data.enc_in_encoding = -+ hdmi->plat_data->input_bus_encoding; -+ else -+ hdmi->hdmi_data.enc_in_encoding = V4L2_YCBCR_ENC_DEFAULT; -+ -+ if (hdmi->plat_data->get_quant_range) -+ hdmi->hdmi_data.quant_range = -+ hdmi->plat_data->get_quant_range(data); -+ else -+ hdmi->hdmi_data.quant_range = HDMI_QUANTIZATION_RANGE_DEFAULT; -+ -+ if (hdmi->plat_data->get_link_cfg) -+ link_cfg = hdmi->plat_data->get_link_cfg(data); -+ else -+ return -EINVAL; -+ -+ hdmi->phy.ops->set_mode(hdmi, hdmi->phy.data, HDMI_MODE_FRL_MASK, -+ link_cfg->frl_mode); -+ -+ if (!hdmi->update && !hdmi->frl_switch && hdmi->plat_data->link_clk_set) -+ hdmi->plat_data->link_clk_set(data, true); -+ -+ /* -+ * According to the dw-hdmi specification 6.4.2 -+ * vp_pr_cd[3:0]: -+ * 0000b: No pixel repetition (pixel sent only once) -+ * 0001b: Pixel sent two times (pixel repeated once) -+ */ -+ hdmi->hdmi_data.pix_repet_factor = -+ (mode->flags & DRM_MODE_FLAG_DBLCLK) ? 1 : 0; -+ hdmi->hdmi_data.video_mode.mdataenablepolarity = true; -+ -+ vmode->previous_pixelclock = vmode->mpixelclock; -+ if (hdmi->plat_data->split_mode) -+ mode->crtc_clock /= 2; -+ vmode->mpixelclock = mode->crtc_clock * 1000; -+ if ((mode->flags & DRM_MODE_FLAG_3D_MASK) == DRM_MODE_FLAG_3D_FRAME_PACKING) -+ vmode->mpixelclock *= 2; -+ dev_dbg(hdmi->dev, "final pixclk = %ld\n", vmode->mpixelclock); -+ vmode->previous_tmdsclock = vmode->mtmdsclock; -+ vmode->mtmdsclock = hdmi_get_tmdsclock(hdmi, vmode->mpixelclock); -+ if (hdmi_bus_fmt_is_yuv420(hdmi->hdmi_data.enc_out_bus_format)) -+ vmode->mtmdsclock /= 2; -+ dev_info(hdmi->dev, "final tmdsclk = %d\n", vmode->mtmdsclock); -+ -+ if (hdmi->plat_data->set_grf_cfg) -+ hdmi->plat_data->set_grf_cfg(data); -+ -+ if (hdmi->sink_has_audio) { -+ dev_dbg(hdmi->dev, "sink has audio support\n"); -+ -+ /* HDMI Initialization Step E - Configure audio */ -+ hdmi_clk_regenerator_update_pixel_clock(hdmi); -+ hdmi_enable_audio_clk(hdmi, hdmi->audio_enable); -+ } -+ -+ /* not for DVI mode */ -+ if (hdmi->sink_is_hdmi) { -+ int ret; -+ -+ dev_dbg(hdmi->dev, "%s HDMI mode\n", __func__); -+ hdmi_modb(hdmi, 0, OPMODE_DVI, LINK_CONFIG0); -+ hdmi_modb(hdmi, HDCP2_BYPASS, HDCP2_BYPASS, HDCP2LOGIC_CONFIG0); -+ hdmi_modb(hdmi, KEEPOUT_REKEY_ALWAYS, KEEPOUT_REKEY_CFG, FRAME_COMPOSER_CONFIG9); -+ -+ if (!link_cfg->frl_mode && dw_hdmi_support_scdc(hdmi, &connector->display_info) && -+ !hdmi->update) { -+ if (vmode->mtmdsclock > HDMI14_MAX_TMDSCLK) { -+ drm_scdc_readb(hdmi->ddc, SCDC_SINK_VERSION, &bytes); -+ drm_scdc_writeb(hdmi->ddc, SCDC_SOURCE_VERSION, -+ min_t(u8, bytes, SCDC_MIN_SOURCE_VERSION)); -+ drm_scdc_set_high_tmds_clock_ratio((struct drm_connector *)connector, 1); -+ drm_scdc_set_scrambling((struct drm_connector *)connector, 1); -+ hdmi_writel(hdmi, 1, SCRAMB_CONFIG0); -+ /* Wait for resuming transmission of TMDS clock and data */ -+ msleep(100); -+ } else { -+ drm_scdc_set_high_tmds_clock_ratio((struct drm_connector *)connector, 0); -+ drm_scdc_set_scrambling((struct drm_connector *)connector, 0); -+ hdmi_writel(hdmi, 0, SCRAMB_CONFIG0); -+ } -+ } -+ /* HDMI Initialization Step F - Configure AVI InfoFrame */ -+ hdmi_config_AVI(hdmi, connector, mode); -+ hdmi_config_vendor_specific_infoframe(hdmi, connector, mode); -+ hdmi_config_CVTEM(hdmi); -+ hdmi_config_drm_infoframe(hdmi, connector); -+ ret = hdmi_set_op_mode(hdmi, link_cfg, connector); -+ if (ret) { -+ dev_err(hdmi->dev, "%s hdmi set operation mode failed\n", __func__); -+ hdmi->frl_switch = false; -+ return ret; -+ } -+ } else { -+ hdmi_modb(hdmi, HDCP2_BYPASS, HDCP2_BYPASS, HDCP2LOGIC_CONFIG0); -+ hdmi_modb(hdmi, OPMODE_DVI, OPMODE_DVI, LINK_CONFIG0); -+ hdmi_writel(hdmi, 2, PKTSCHED_PKT_CONTROL0); -+ hdmi_modb(hdmi, PKTSCHED_GCP_TX_EN, PKTSCHED_GCP_TX_EN, PKTSCHED_PKT_EN); -+ hdmi->phy.ops->init(hdmi, hdmi->phy.data, &hdmi->previous_mode); -+ dev_info(hdmi->dev, "%s DVI mode\n", __func__); -+ } -+ -+ dw_hdmi_qp_hdcp_enable(hdmi, hdmi->curr_conn); -+ hdmi->frl_switch = false; -+ return 0; ++ context->status = status; +} + -+static enum drm_connector_status -+dw_hdmi_connector_detect(struct drm_connector *connector, bool force) ++static void kutf_set_expected_result(struct kutf_context *context, ++ enum kutf_result_status expected_status) +{ -+ struct dw_hdmi_qp *hdmi = -+ container_of(connector, struct dw_hdmi_qp, connector); -+ struct dw_hdmi_qp *secondary = NULL; -+ enum drm_connector_status result, result_secondary; -+ -+ mutex_lock(&hdmi->mutex); -+ hdmi->force = DRM_FORCE_UNSPECIFIED; -+ mutex_unlock(&hdmi->mutex); -+ -+ if (hdmi->panel) -+ return connector_status_connected; -+ -+ if (hdmi->next_bridge && hdmi->next_bridge->ops & DRM_BRIDGE_OP_DETECT) -+ return drm_bridge_detect(hdmi->next_bridge); -+ -+ if (hdmi->plat_data->left) -+ secondary = hdmi->plat_data->left; -+ else if (hdmi->plat_data->right) -+ secondary = hdmi->plat_data->right; -+ -+ result = hdmi->phy.ops->read_hpd(hdmi, hdmi->phy.data); -+ -+ if (secondary) { -+ result_secondary = secondary->phy.ops->read_hpd(secondary, secondary->phy.data); -+ if (result == connector_status_connected && -+ result_secondary == connector_status_connected) -+ result = connector_status_connected; -+ else -+ result = connector_status_disconnected; -+ } -+ -+ return result; ++ context->expected_status = expected_status; +} + -+static int -+dw_hdmi_update_hdr_property(struct drm_connector *connector) ++/** ++ * kutf_test_log_result() - Log a result for the specified test context ++ * @context: Test context ++ * @message: Result string ++ * @new_status: Result status ++ */ ++static void kutf_test_log_result( ++ struct kutf_context *context, ++ const char *message, ++ enum kutf_result_status new_status) +{ -+ struct drm_device *dev = connector->dev; -+ struct dw_hdmi_qp *hdmi = container_of(connector, struct dw_hdmi_qp, -+ connector); -+ void *data = hdmi->plat_data->phy_data; -+ const struct hdr_static_metadata *metadata = -+ &connector->hdr_sink_metadata.hdmi_type1; -+ size_t size = sizeof(*metadata); -+ struct drm_property *property; -+ struct drm_property_blob *blob; -+ int ret; -+ -+ if (hdmi->plat_data->get_hdr_property) -+ property = hdmi->plat_data->get_hdr_property(data); -+ else -+ return -EINVAL; -+ -+ if (hdmi->plat_data->get_hdr_blob) -+ blob = hdmi->plat_data->get_hdr_blob(data); -+ else -+ return -EINVAL; ++ if (context->status < new_status) ++ context->status = new_status; + -+ ret = drm_property_replace_global_blob(dev, &blob, size, metadata, -+ &connector->base, property); -+ return ret; ++ if (context->expected_status != new_status) ++ kutf_add_result(&context->fixture_pool, context->result_set, ++ new_status, message); +} + -+static bool dw_hdmi_qp_check_output_type_changed(struct dw_hdmi_qp *hdmi) ++void kutf_test_log_result_external( ++ struct kutf_context *context, ++ const char *message, ++ enum kutf_result_status new_status) +{ -+ bool sink_hdmi; -+ -+ sink_hdmi = hdmi->sink_is_hdmi; -+ -+ if (hdmi->force_output == 1) -+ hdmi->sink_is_hdmi = true; -+ else if (hdmi->force_output == 2) -+ hdmi->sink_is_hdmi = false; -+ else -+ hdmi->sink_is_hdmi = hdmi->support_hdmi; -+ -+ if (sink_hdmi != hdmi->sink_is_hdmi) -+ return true; -+ -+ return false; ++ kutf_test_log_result(context, message, new_status); +} ++EXPORT_SYMBOL(kutf_test_log_result_external); + -+static ssize_t hdcp_ddc_read(struct i2c_adapter *adapter, u8 address, -+ u8 offset, void *buffer) ++void kutf_test_expect_abort(struct kutf_context *context) +{ -+ int ret; -+ struct i2c_msg msgs[2] = { -+ { -+ .addr = address, -+ .flags = 0, -+ .len = 1, -+ .buf = &offset, -+ }, { -+ .addr = address, -+ .flags = I2C_M_RD, -+ .len = 1, -+ .buf = buffer, -+ } -+ }; -+ -+ ret = i2c_transfer(adapter, msgs, ARRAY_SIZE(msgs)); -+ if (ret < 0) -+ return ret; -+ if (ret != ARRAY_SIZE(msgs)) -+ return -EPROTO; -+ -+ return 0; ++ kutf_set_expected_result(context, KUTF_RESULT_ABORT); +} ++EXPORT_SYMBOL(kutf_test_expect_abort); + -+static u8 dw_hdmi_qp_hdcp_capable(struct dw_hdmi_qp *hdmi) ++void kutf_test_expect_fatal(struct kutf_context *context) +{ -+ u8 version = 0; -+ u8 bcaps; -+ int ret; -+ -+ ret = hdcp_ddc_read(hdmi->ddc, HDMI_HDCP_ADDR, HDMI_BCAPS, &bcaps); -+ if (ret < 0) { -+ dev_err(hdmi->dev, "get hdcp1.4 capable failed:%d\n", ret); -+ return 0; -+ } -+ if (bcaps & HDMI_HDCP14_SUPPORT) -+ version |= SINK_CAP_HDCP14; -+ -+ ret = hdcp_ddc_read(hdmi->ddc, HDMI_HDCP_ADDR, HDMI_HDCP2_VERSION, &bcaps); -+ if (ret < 0) { -+ dev_err(hdmi->dev, "get hdcp2.x capable failed:%d\n", ret); -+ return 0; -+ } -+ if (bcaps & HDMI_HDCP2_SUPPORT) -+ version |= SINK_CAP_HDCP2; -+ -+ return version; ++ kutf_set_expected_result(context, KUTF_RESULT_FATAL); +} ++EXPORT_SYMBOL(kutf_test_expect_fatal); + -+static int dw_hdmi_connector_get_modes(struct drm_connector *connector) ++void kutf_test_expect_fail(struct kutf_context *context) +{ -+ struct dw_hdmi_qp *hdmi = -+ container_of(connector, struct dw_hdmi_qp, connector); -+ struct hdr_static_metadata *metedata = -+ &connector->hdr_sink_metadata.hdmi_type1; -+ struct edid *edid; -+ struct drm_display_mode *mode; -+ struct drm_display_info *info = &connector->display_info; -+ void *data = hdmi->plat_data->phy_data; -+ struct drm_property_blob *edid_blob_ptr = connector->edid_blob_ptr; -+ int i, ret = 0; -+ -+ if (hdmi->plat_data->right && hdmi->plat_data->right->next_bridge) { -+ struct drm_bridge *bridge = hdmi->plat_data->right->next_bridge; -+ -+ if (bridge->ops & DRM_BRIDGE_OP_MODES) { -+ if (!drm_bridge_get_modes(bridge, connector)) -+ return 0; -+ } -+ } -+ -+ if (hdmi->panel) -+ return drm_panel_get_modes(hdmi->panel, connector); -+ -+ if (hdmi->next_bridge && hdmi->next_bridge->ops & DRM_BRIDGE_OP_MODES) -+ return drm_bridge_get_modes(hdmi->next_bridge, connector); -+ -+ if (!hdmi->ddc) -+ return 0; -+ -+ memset(metedata, 0, sizeof(*metedata)); -+ -+ if (edid_blob_ptr && edid_blob_ptr->length) { -+ edid = kmalloc(edid_blob_ptr->length, GFP_KERNEL); -+ if (!edid) -+ return -ENOMEM; -+ memcpy(edid, edid_blob_ptr->data, edid_blob_ptr->length); -+ } else { -+ edid = drm_get_edid(connector, hdmi->ddc); -+ hdmi->hdcp_caps = dw_hdmi_qp_hdcp_capable(hdmi); -+ } -+ -+ if (edid) { -+ dev_dbg(hdmi->dev, "got edid: width[%d] x height[%d]\n", -+ edid->width_cm, edid->height_cm); -+ -+ hdmi->support_hdmi = drm_detect_hdmi_monitor(edid); -+ hdmi->sink_has_audio = drm_detect_monitor_audio(edid); -+ drm_connector_update_edid_property(connector, edid); -+ if (hdmi->cec_notifier) -+ cec_notifier_set_phys_addr_from_edid(hdmi->cec_notifier, edid); -+ if (hdmi->plat_data->get_edid_dsc_info) -+ hdmi->plat_data->get_edid_dsc_info(data, edid); -+ ret = drm_add_edid_modes(connector, edid); -+ if (hdmi->plat_data->get_colorimetry) -+ hdmi->plat_data->get_colorimetry(data, edid); -+ if (hdmi->plat_data->get_yuv422_format) -+ hdmi->plat_data->get_yuv422_format(connector, edid); -+ dw_hdmi_update_hdr_property(connector); -+ hdmi->hdcp_caps = dw_hdmi_qp_hdcp_capable(hdmi); -+ if (ret > 0 && hdmi->plat_data->split_mode) { -+ struct dw_hdmi_qp *secondary = NULL; -+ void *secondary_data; -+ -+ if (hdmi->plat_data->left) -+ secondary = hdmi->plat_data->left; -+ else if (hdmi->plat_data->right) -+ secondary = hdmi->plat_data->right; -+ -+ if (!secondary) { -+ kfree(edid); -+ return -ENOMEM; -+ } -+ secondary_data = secondary->plat_data->phy_data; -+ -+ list_for_each_entry(mode, &connector->probed_modes, head) -+ hdmi->plat_data->convert_to_split_mode(mode); -+ -+ secondary->sink_is_hdmi = drm_detect_hdmi_monitor(edid); -+ secondary->sink_has_audio = drm_detect_monitor_audio(edid); -+ if (secondary->cec_notifier) -+ cec_notifier_set_phys_addr_from_edid(secondary->cec_notifier, -+ edid); -+ if (secondary->plat_data->get_edid_dsc_info) -+ secondary->plat_data->get_edid_dsc_info(secondary_data, edid); -+ } -+ kfree(edid); -+ } else { -+ hdmi->support_hdmi = true; -+ hdmi->sink_has_audio = true; -+ -+ if (hdmi->plat_data->split_mode) { -+ if (hdmi->plat_data->left) { -+ hdmi->plat_data->left->sink_is_hdmi = true; -+ hdmi->plat_data->left->sink_has_audio = true; -+ } else if (hdmi->plat_data->right) { -+ hdmi->plat_data->right->sink_is_hdmi = true; -+ hdmi->plat_data->right->sink_has_audio = true; -+ } -+ } -+ -+ for (i = 0; i < ARRAY_SIZE(dw_hdmi_default_modes); i++) { -+ const struct drm_display_mode *ptr = -+ &dw_hdmi_default_modes[i]; -+ -+ mode = drm_mode_duplicate(connector->dev, ptr); -+ if (mode) { -+ if (!i) -+ mode->type = DRM_MODE_TYPE_PREFERRED; -+ drm_mode_probed_add(connector, mode); -+ ret++; -+ } -+ } -+ if (ret > 0 && hdmi->plat_data->split_mode) { -+ struct drm_display_mode *mode; -+ -+ list_for_each_entry(mode, &connector->probed_modes, head) -+ hdmi->plat_data->convert_to_split_mode(mode); -+ } -+ info->edid_hdmi_rgb444_dc_modes = 0; -+ info->edid_hdmi_ycbcr444_dc_modes = 0; -+ info->hdmi.y420_dc_modes = 0; -+ info->color_formats = 0; -+ -+ dev_info(hdmi->dev, "failed to get edid\n"); -+ } -+ -+ return ret; ++ kutf_set_expected_result(context, KUTF_RESULT_FAIL); +} ++EXPORT_SYMBOL(kutf_test_expect_fail); + -+void dw_hdmi_qp_set_allm_enable(struct dw_hdmi_qp *hdmi, bool enable) ++void kutf_test_expect_warn(struct kutf_context *context) +{ -+ struct dw_hdmi_link_config *link_cfg = NULL; -+ void *data; -+ -+ if (!hdmi || !hdmi->curr_conn) -+ return; -+ -+ data = hdmi->plat_data->phy_data; -+ -+ if (hdmi->plat_data->get_link_cfg) -+ link_cfg = hdmi->plat_data->get_link_cfg(data); -+ -+ if (!link_cfg) -+ return; -+ -+ if (enable == hdmi->allm_enable) -+ return; -+ -+ hdmi->allm_enable = enable; -+ -+ if (enable && !(link_cfg->add_func & SUPPORT_HDMI_ALLM)) { -+ hdmi->allm_enable = false; -+ dev_err(hdmi->dev, "sink don't support allm, allm won't be enabled\n"); -+ return; -+ } ++ kutf_set_expected_result(context, KUTF_RESULT_WARN); ++} ++EXPORT_SYMBOL(kutf_test_expect_warn); + -+ hdmi_config_vendor_specific_infoframe(hdmi, hdmi->curr_conn, &hdmi->previous_mode); ++void kutf_test_expect_pass(struct kutf_context *context) ++{ ++ kutf_set_expected_result(context, KUTF_RESULT_PASS); +} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_set_allm_enable); ++EXPORT_SYMBOL(kutf_test_expect_pass); + -+static int -+dw_hdmi_atomic_connector_set_property(struct drm_connector *connector, -+ struct drm_connector_state *state, -+ struct drm_property *property, -+ uint64_t val) ++void kutf_test_skip(struct kutf_context *context) +{ -+ struct dw_hdmi_qp *hdmi = -+ container_of(connector, struct dw_hdmi_qp, connector); -+ const struct dw_hdmi_property_ops *ops = hdmi->plat_data->property_ops; ++ kutf_set_result(context, KUTF_RESULT_SKIP); ++ kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN); + -+ if (ops && ops->set_property) -+ return ops->set_property(connector, state, property, -+ val, hdmi->plat_data->phy_data); -+ else -+ return -EINVAL; ++ kutf_test_log_result(context, "Test skipped", KUTF_RESULT_SKIP); +} ++EXPORT_SYMBOL(kutf_test_skip); + -+static int -+dw_hdmi_atomic_connector_get_property(struct drm_connector *connector, -+ const struct drm_connector_state *state, -+ struct drm_property *property, -+ uint64_t *val) ++void kutf_test_skip_msg(struct kutf_context *context, const char *message) +{ -+ struct dw_hdmi_qp *hdmi = -+ container_of(connector, struct dw_hdmi_qp, connector); -+ const struct dw_hdmi_property_ops *ops = hdmi->plat_data->property_ops; ++ kutf_set_result(context, KUTF_RESULT_SKIP); ++ kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN); + -+ if (ops && ops->get_property) -+ return ops->get_property(connector, state, property, -+ val, hdmi->plat_data->phy_data); -+ else -+ return -EINVAL; ++ kutf_test_log_result(context, kutf_dsprintf(&context->fixture_pool, ++ "Test skipped: %s", message), KUTF_RESULT_SKIP); ++ kutf_test_log_result(context, "!!!Test skipped!!!", KUTF_RESULT_SKIP); +} ++EXPORT_SYMBOL(kutf_test_skip_msg); + -+static int -+dw_hdmi_connector_set_property(struct drm_connector *connector, -+ struct drm_property *property, uint64_t val) ++void kutf_test_debug(struct kutf_context *context, char const *message) +{ -+ return dw_hdmi_atomic_connector_set_property(connector, NULL, -+ property, val); ++ kutf_test_log_result(context, message, KUTF_RESULT_DEBUG); +} ++EXPORT_SYMBOL(kutf_test_debug); + -+static void dw_hdmi_attach_properties(struct dw_hdmi_qp *hdmi) ++void kutf_test_pass(struct kutf_context *context, char const *message) +{ -+ u32 val; -+ u64 color = MEDIA_BUS_FMT_YUV8_1X24; -+ const struct dw_hdmi_property_ops *ops = -+ hdmi->plat_data->property_ops; -+ void *data = hdmi->plat_data->phy_data; -+ enum drm_connector_status connect_status = -+ hdmi->phy.ops->read_hpd(hdmi, hdmi->phy.data); -+ -+ if ((connect_status == connector_status_connected) && -+ hdmi->initialized) { -+ if (hdmi->plat_data->get_grf_color_fmt) -+ color = hdmi->plat_data->get_grf_color_fmt(data); ++ static const char explicit_message[] = "(explicit pass)"; + -+ val = (hdmi_readl(hdmi, PKT_VSI_CONTENTS1) >> 8) & 0xffffff; -+ if (val == HDMI_FORUM_OUI) -+ hdmi->allm_enable = true; -+ else -+ hdmi->allm_enable = false; -+ } ++ if (!message) ++ message = explicit_message; + -+ /* -+ * Because all hdmi registers are configured the same value -+ * between yuv422 8/10 bit. We set a useless bit in uboot to mark -+ * yuv422 10bit. -+ */ -+ if (color == MEDIA_BUS_FMT_YUYV10_1X20 && -+ !(hdmi_readl(hdmi, VIDEO_INTERFACE_CONFIG0) & BIT(20))) -+ color = MEDIA_BUS_FMT_YUYV8_1X16; ++ kutf_test_log_result(context, message, KUTF_RESULT_PASS); ++} ++EXPORT_SYMBOL(kutf_test_pass); + -+ if (ops && ops->attach_properties) -+ return ops->attach_properties(&hdmi->connector, color, 0, -+ hdmi->plat_data->phy_data, hdmi->allm_enable); ++void kutf_test_info(struct kutf_context *context, char const *message) ++{ ++ kutf_test_log_result(context, message, KUTF_RESULT_INFO); +} ++EXPORT_SYMBOL(kutf_test_info); + -+static void dw_hdmi_destroy_properties(struct dw_hdmi_qp *hdmi) ++void kutf_test_warn(struct kutf_context *context, char const *message) +{ -+ const struct dw_hdmi_property_ops *ops = -+ hdmi->plat_data->property_ops; ++ kutf_test_log_result(context, message, KUTF_RESULT_WARN); ++} ++EXPORT_SYMBOL(kutf_test_warn); + -+ if (ops && ops->destroy_properties) -+ return ops->destroy_properties(&hdmi->connector, -+ hdmi->plat_data->phy_data); ++void kutf_test_fail(struct kutf_context *context, char const *message) ++{ ++ kutf_test_log_result(context, message, KUTF_RESULT_FAIL); +} ++EXPORT_SYMBOL(kutf_test_fail); + -+static struct drm_encoder * -+dw_hdmi_connector_best_encoder(struct drm_connector *connector) ++void kutf_test_fatal(struct kutf_context *context, char const *message) +{ -+ struct dw_hdmi_qp *hdmi = -+ container_of(connector, struct dw_hdmi_qp, connector); ++ kutf_test_log_result(context, message, KUTF_RESULT_FATAL); ++} ++EXPORT_SYMBOL(kutf_test_fatal); + -+ return hdmi->bridge.encoder; ++void kutf_test_abort(struct kutf_context *context) ++{ ++ kutf_test_log_result(context, "", KUTF_RESULT_ABORT); +} ++EXPORT_SYMBOL(kutf_test_abort); + -+static bool dw_hdmi_color_changed(struct drm_connector *connector, -+ struct drm_atomic_state *state) ++/** ++ * init_kutf_core() - Module entry point. ++ * ++ * Create the base entry point in debugfs. ++ */ ++static int __init init_kutf_core(void) +{ -+ struct dw_hdmi_qp *hdmi = -+ container_of(connector, struct dw_hdmi_qp, connector); -+ void *data = hdmi->plat_data->phy_data; -+ struct drm_connector_state *old_state = -+ drm_atomic_get_old_connector_state(state, connector); -+ struct drm_connector_state *new_state = -+ drm_atomic_get_new_connector_state(state, connector); -+ bool ret = false; ++ int ret; + -+ if (hdmi->plat_data->get_color_changed) -+ ret = hdmi->plat_data->get_color_changed(data); ++ base_dir = debugfs_create_dir("kutf_tests", NULL); ++ if (!base_dir) { ++ ret = -ENODEV; ++ goto exit_dir; ++ } + -+ if (new_state->colorspace != old_state->colorspace) -+ ret = true; ++ return 0; + ++exit_dir: + return ret; +} + -+static bool hdr_metadata_equal(struct dw_hdmi_qp *hdmi, const struct drm_connector_state *old_state, -+ const struct drm_connector_state *new_state) ++/** ++ * exit_kutf_core() - Module exit point. ++ * ++ * Remove the base entry point in debugfs. ++ */ ++static void __exit exit_kutf_core(void) +{ -+ struct drm_property_blob *old_blob = old_state->hdr_output_metadata; -+ struct drm_property_blob *new_blob = new_state->hdr_output_metadata; -+ int i, ret; -+ u8 *data; -+ -+ hdmi->hdr2sdr = false; -+ -+ if (!old_blob && !new_blob) -+ return true; -+ -+ if (!old_blob) { -+ data = (u8 *)new_blob->data; -+ -+ for (i = 0; i < new_blob->length; i++) -+ if (data[i]) -+ return false; ++ debugfs_remove_recursive(base_dir); ++} + -+ return true; -+ } ++#else /* defined(CONFIG_DEBUG_FS) */ + -+ if (!new_blob) { -+ data = (u8 *)old_blob->data; ++/** ++ * init_kutf_core() - Module entry point. ++ * ++ * Stub for when build against a kernel without debugfs support ++ */ ++static int __init init_kutf_core(void) ++{ ++ pr_debug("KUTF requires a kernel with debug fs support"); + -+ for (i = 0; i < old_blob->length; i++) -+ if (data[i]) -+ return false; ++ return -ENODEV; ++} + -+ return true; -+ } ++/** ++ * exit_kutf_core() - Module exit point. ++ * ++ * Stub for when build against a kernel without debugfs support ++ */ ++static void __exit exit_kutf_core(void) ++{ ++} ++#endif /* defined(CONFIG_DEBUG_FS) */ + -+ if (old_blob->length != new_blob->length) -+ return false; ++MODULE_LICENSE("GPL"); + -+ ret = !memcmp(old_blob->data, new_blob->data, old_blob->length); ++module_init(init_kutf_core); ++module_exit(exit_kutf_core); +diff --git a/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c b/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c +new file mode 100644 +index 000000000..a429a2dbf +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c +@@ -0,0 +1,71 @@ ++/* ++ * ++ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+ if (!ret && new_blob) { -+ data = (u8 *)new_blob->data; + -+ for (i = 0; i < new_blob->length; i++) -+ if (data[i]) -+ break; + -+ if (i == new_blob->length) -+ hdmi->hdr2sdr = true; -+ } ++/* Kernel UTF utility functions */ + -+ return ret; -+} ++#include ++#include ++#include ++#include + -+static bool check_hdr_color_change(struct drm_connector_state *old_state, -+ struct drm_connector_state *new_state, -+ struct dw_hdmi_qp *hdmi) -+{ -+ void *data = hdmi->plat_data->phy_data; ++#include ++#include + -+ if (!hdr_metadata_equal(hdmi, old_state, new_state)) { -+ hdmi->plat_data->check_hdr_color_change(new_state, data); -+ return true; -+ } ++static char tmp_buffer[KUTF_MAX_DSPRINTF_LEN]; + -+ return false; -+} ++DEFINE_MUTEX(buffer_lock); + -+static bool check_dw_hdcp_state_changed(struct drm_connector *conn, -+ struct drm_atomic_state *state) ++const char *kutf_dsprintf(struct kutf_mempool *pool, ++ const char *fmt, ...) +{ -+ struct drm_connector_state *old_state, *new_state; -+ u64 old_cp, new_cp; ++ va_list args; ++ int len; ++ int size; ++ void *buffer; + -+ old_state = drm_atomic_get_old_connector_state(state, conn); -+ new_state = drm_atomic_get_new_connector_state(state, conn); -+ old_cp = old_state->content_protection; -+ new_cp = new_state->content_protection; ++ mutex_lock(&buffer_lock); ++ va_start(args, fmt); ++ len = vsnprintf(tmp_buffer, sizeof(tmp_buffer), fmt, args); ++ va_end(args); + -+ if (old_state->hdcp_content_type != new_state->hdcp_content_type && -+ new_cp != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) { -+ new_state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED; -+ return true; ++ if (len < 0) { ++ pr_err("kutf_dsprintf: Bad format dsprintf format %s\n", fmt); ++ goto fail_format; + } + -+ if (!new_state->crtc) { -+ if (old_cp == DRM_MODE_CONTENT_PROTECTION_ENABLED) -+ new_state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED; -+ return false; ++ if (len >= sizeof(tmp_buffer)) { ++ pr_warn("kutf_dsprintf: Truncated dsprintf message %s\n", fmt); ++ size = sizeof(tmp_buffer); ++ } else { ++ size = len + 1; + } + -+ if (old_cp == new_cp || -+ (old_cp == DRM_MODE_CONTENT_PROTECTION_DESIRED && -+ new_cp == DRM_MODE_CONTENT_PROTECTION_ENABLED)) -+ return false; -+ -+ return true; -+} ++ buffer = kutf_mempool_alloc(pool, size); ++ if (!buffer) ++ goto fail_alloc; + -+static int dw_hdmi_connector_atomic_check(struct drm_connector *connector, -+ struct drm_atomic_state *state) -+{ -+ struct drm_connector_state *old_state = -+ drm_atomic_get_old_connector_state(state, connector); -+ struct drm_connector_state *new_state = -+ drm_atomic_get_new_connector_state(state, connector); -+ struct drm_crtc *crtc = new_state->crtc; -+ struct drm_crtc *old_crtc = old_state->crtc; -+ struct drm_crtc_state *crtc_state, *old_crtc_state; -+ struct dw_hdmi_qp *hdmi = -+ container_of(connector, struct dw_hdmi_qp, connector); -+ struct drm_display_mode mode; -+ void *data = hdmi->plat_data->phy_data; -+ struct hdmi_vmode_qp *vmode = &hdmi->hdmi_data.video_mode; ++ memcpy(buffer, tmp_buffer, size); ++ mutex_unlock(&buffer_lock); + -+ if (old_crtc) { -+ old_crtc_state = drm_atomic_get_crtc_state(state, old_crtc); -+ if (IS_ERR(old_crtc_state)) -+ return PTR_ERR(old_crtc_state); ++ return buffer; + -+ if (hdmi->plat_data->get_vp_id) -+ hdmi->old_vp_id = hdmi->plat_data->get_vp_id(old_crtc_state); -+ } ++fail_alloc: ++fail_format: ++ mutex_unlock(&buffer_lock); ++ return NULL; ++} ++EXPORT_SYMBOL(kutf_dsprintf); +diff --git a/drivers/gpu/arm/midgard/tests/kutf/sconscript b/drivers/gpu/arm/midgard/tests/kutf/sconscript +new file mode 100755 +index 000000000..d7f112448 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/kutf/sconscript +@@ -0,0 +1,21 @@ ++# ++# (C) COPYRIGHT 2014-2016, 2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+ if (!crtc) -+ return 0; + -+ crtc_state = drm_atomic_get_crtc_state(state, crtc); -+ if (IS_ERR(crtc_state)) -+ return PTR_ERR(crtc_state); ++Import('kutf_env') + -+ if (hdmi->plat_data->get_vp_id) -+ hdmi->vp_id = hdmi->plat_data->get_vp_id(crtc_state); ++make_args = kutf_env.kernel_get_config_defines(ret_list = True) + -+ drm_mode_copy(&mode, &crtc_state->mode); -+ /* -+ * If HDMI is enabled in uboot, it's need to record -+ * drm_display_mode and set phy status to enabled. -+ */ -+ if (!vmode->mpixelclock) { -+ struct dw_hdmi_qp *secondary = NULL; -+ u8 val; ++mod = kutf_env.BuildKernelModule('$STATIC_LIB_PATH/kutf.ko', Glob('*.c'), make_args = make_args) ++kutf_env.KernelObjTarget('kutf', mod) +diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild +new file mode 100755 +index 000000000..0cd9cebe9 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild +@@ -0,0 +1,20 @@ ++# ++# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+ if (hdmi->plat_data->left) -+ secondary = hdmi->plat_data->left; -+ else if (hdmi->plat_data->right) -+ secondary = hdmi->plat_data->right; -+ hdmi->curr_conn = connector; -+ if (secondary) -+ secondary->curr_conn = connector; -+ if (hdmi->plat_data->get_enc_in_encoding) -+ hdmi->hdmi_data.enc_in_encoding = -+ hdmi->plat_data->get_enc_in_encoding(data); -+ if (hdmi->plat_data->get_enc_out_encoding) -+ hdmi->hdmi_data.enc_out_encoding = -+ hdmi->plat_data->get_enc_out_encoding(data); -+ if (hdmi->plat_data->get_input_bus_format) -+ hdmi->hdmi_data.enc_in_bus_format = -+ hdmi->plat_data->get_input_bus_format(data); -+ if (hdmi->plat_data->get_output_bus_format) -+ hdmi->hdmi_data.enc_out_bus_format = -+ hdmi->plat_data->get_output_bus_format(data); + -+ if (hdmi->plat_data->split_mode) { -+ hdmi->plat_data->convert_to_origin_mode(&mode); -+ mode.crtc_clock /= 2; -+ } -+ drm_mode_copy(&hdmi->previous_mode, &mode); -+ vmode->mpixelclock = mode.crtc_clock * 1000; -+ vmode->previous_pixelclock = mode.clock; -+ vmode->previous_tmdsclock = mode.clock; -+ vmode->mtmdsclock = hdmi_get_tmdsclock(hdmi, -+ vmode->mpixelclock); -+ if (hdmi_bus_fmt_is_yuv420(hdmi->hdmi_data.enc_out_bus_format)) -+ vmode->mtmdsclock /= 2; ++ccflags-y += -I$(src)/../include -I$(src)/../../../ -I$(src)/../../ -I$(src)/../../backend/gpu -I$(srctree)/drivers/staging/android + -+ /* -+ * If uboot logo enabled, atomic_enable won't be called, -+ * but atomic_disable will be called when hdmi plug out. -+ * That will cause dclk enable count is incorrect. So -+ * we should check ipi/link/video clk to determine whether -+ * uboot logo is enabled. -+ */ -+ if (hdmi->initialized && !hdmi->dclk_en) { -+ mutex_lock(&hdmi->audio_mutex); -+ if (hdmi->plat_data->dclk_set) -+ hdmi->plat_data->dclk_set(data, true, hdmi->vp_id); -+ hdmi->dclk_en = true; -+ mutex_unlock(&hdmi->audio_mutex); -+ hdmi->curr_conn = connector; -+ extcon_set_state_sync(hdmi->extcon, EXTCON_DISP_HDMI, true); -+ } ++obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test.o + -+ drm_scdc_readb(hdmi->ddc, SCDC_TMDS_CONFIG, &val); -+ /* if plug out before hdmi bind, reset hdmi */ -+ if (vmode->mtmdsclock >= 340000000 && vmode->mpixelclock <= 600000000 && -+ !(val & SCDC_TMDS_BIT_CLOCK_RATIO_BY_40)) -+ hdmi->logo_plug_out = true; -+ } ++mali_kutf_irq_test-y := mali_kutf_irq_test_main.o +diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig +new file mode 100644 +index 000000000..16f68d15c +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig +@@ -0,0 +1,23 @@ ++# ++# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+ if (check_hdr_color_change(old_state, new_state, hdmi) || hdmi->logo_plug_out || -+ dw_hdmi_color_changed(connector, state) || -+ dw_hdmi_qp_check_output_type_changed(hdmi)) { -+ u32 mtmdsclk; + -+ crtc_state = drm_atomic_get_crtc_state(state, crtc); -+ if (IS_ERR(crtc_state)) -+ return PTR_ERR(crtc_state); ++config MALI_IRQ_LATENCY ++ tristate "Mali GPU IRQ latency measurement" ++ depends on MALI_MIDGARD && MALI_DEBUG && MALI_KUTF ++ default n ++ help ++ This option will build a test module mali_kutf_irq_test that ++ can determine the latency of the Mali GPU IRQ on your system. ++ Choosing M here will generate a single module called mali_kutf_irq_test. +diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile +new file mode 100644 +index 000000000..4e948767a +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile +@@ -0,0 +1,51 @@ ++# ++# (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+ if (hdmi->plat_data->update_color_format) -+ hdmi->plat_data->update_color_format(new_state, data); -+ if (hdmi->plat_data->get_enc_in_encoding) -+ hdmi->hdmi_data.enc_in_encoding = -+ hdmi->plat_data->get_enc_in_encoding(data); -+ if (hdmi->plat_data->get_enc_out_encoding) -+ hdmi->hdmi_data.enc_out_encoding = -+ hdmi->plat_data->get_enc_out_encoding(data); -+ if (hdmi->plat_data->get_input_bus_format) -+ hdmi->hdmi_data.enc_in_bus_format = -+ hdmi->plat_data->get_input_bus_format(data); -+ if (hdmi->plat_data->get_output_bus_format) -+ hdmi->hdmi_data.enc_out_bus_format = -+ hdmi->plat_data->get_output_bus_format(data); + -+ mtmdsclk = hdmi_get_tmdsclock(hdmi, mode.clock); -+ if (hdmi_bus_fmt_is_yuv420(hdmi->hdmi_data.enc_out_bus_format)) -+ mtmdsclk /= 2; ++# linux build system bootstrap for out-of-tree module + -+ if (hdmi->hdmi_data.video_mode.mpixelclock == (mode.clock * 1000) && -+ hdmi->hdmi_data.video_mode.mtmdsclock == (mtmdsclk * 1000) && -+ mode.clock <= 600000 && !hdmi->disabled && !hdmi->logo_plug_out) { -+ hdmi->update = true; -+ hdmi_writel(hdmi, 1, PKTSCHED_PKT_CONTROL0); -+ hdmi_modb(hdmi, PKTSCHED_GCP_TX_EN, PKTSCHED_GCP_TX_EN, PKTSCHED_PKT_EN); -+ mdelay(50); -+ } else if (!hdmi->disabled) { -+ if (hdmi->previous_mode.clock > 600000 && mode.clock > 600000) -+ hdmi->frl_switch = true; -+ hdmi->update = false; -+ crtc_state->mode_changed = true; -+ hdmi->logo_plug_out = false; -+ } -+ } ++# default to building for the host ++ARCH ?= $(shell uname -m) + -+ if (check_dw_hdcp_state_changed(connector, state)) -+ crtc_state->mode_changed = true; ++ifeq ($(KDIR),) ++$(error Must specify KDIR to point to the kernel to target)) ++endif + -+ return 0; -+} ++TEST_CCFLAGS := \ ++ -DMALI_DEBUG=$(MALI_DEBUG) \ ++ -DMALI_BACKEND_KERNEL=$(MALI_BACKEND_KERNEL) \ ++ -DMALI_MODEL=$(MALI_MODEL) \ ++ -DMALI_NO_MALI=$(MALI_NO_MALI) \ ++ -DMALI_BASE_QA_LEAK=$(MALI_BASE_QA_LEAK) \ ++ -DMALI_BASE_QA_RESFAIL=$(MALI_BASE_QA_RESFAIL) \ ++ -DMALI_BASE_QA_USE_AFTER_FREE=$(MALI_BASE_QA_USE_AFTER_FREE) \ ++ -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ ++ -DMALI_USE_UMP=$(MALI_USE_UMP) \ ++ -DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \ ++ -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ ++ $(SCONS_CFLAGS) \ ++ -I$(CURDIR)/../include \ ++ -I$(CURDIR)/../../../../../../include \ ++ -I$(CURDIR)/../../../ \ ++ -I$(CURDIR)/../../ \ ++ -I$(CURDIR)/../../backend/gpu \ ++ -I$(CURDIR)/ \ ++ -I$(srctree)/drivers/staging/android \ ++ -I$(srctree)/include/linux + -+static void dw_hdmi_connector_atomic_commit(struct drm_connector *connector, -+ struct drm_atomic_state *state) -+{ -+ struct dw_hdmi_qp *hdmi = -+ container_of(connector, struct dw_hdmi_qp, connector); ++all: ++ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS="$(TEST_CCFLAGS)" KBUILD_EXTRA_SYMBOLS="$(CURDIR)/../kutf/Module.symvers $(CURDIR)/../../Module.symvers" modules + -+ if (hdmi->update) { -+ dw_hdmi_qp_setup(hdmi, hdmi->curr_conn, &hdmi->previous_mode); -+ msleep(50); -+ hdmi_writel(hdmi, 2, PKTSCHED_PKT_CONTROL0); -+ hdmi->update = false; -+ } -+} ++clean: ++ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean +diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c +new file mode 100644 +index 000000000..e2ff4432b +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c +@@ -0,0 +1,257 @@ ++/* ++ * ++ * (C) COPYRIGHT 2016, 2017 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program, and can also be obtained ++ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ + -+void dw_hdmi_qp_set_output_type(struct dw_hdmi_qp *hdmi, u64 val) -+{ -+ hdmi->force_output = val; -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_set_output_type); + -+bool dw_hdmi_qp_get_output_whether_hdmi(struct dw_hdmi_qp *hdmi) -+{ -+ return hdmi->sink_is_hdmi; -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_get_output_whether_hdmi); + -+int dw_hdmi_qp_get_output_type_cap(struct dw_hdmi_qp *hdmi) -+{ -+ return hdmi->support_hdmi; -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_get_output_type_cap); ++#include ++#include ++#include + -+static void dw_hdmi_connector_force(struct drm_connector *connector) -+{ -+ struct dw_hdmi_qp *hdmi = -+ container_of(connector, struct dw_hdmi_qp, connector); ++#include "mali_kbase.h" ++#include + -+ mutex_lock(&hdmi->mutex); ++#include ++#include + -+ if (hdmi->force != connector->force) { -+ if (!hdmi->disabled && connector->force == DRM_FORCE_OFF) -+ extcon_set_state_sync(hdmi->extcon, EXTCON_DISP_HDMI, -+ false); -+ else if (hdmi->disabled && connector->force == DRM_FORCE_ON) -+ extcon_set_state_sync(hdmi->extcon, EXTCON_DISP_HDMI, -+ true); -+ } ++/* ++ * This file contains the code which is used for measuring interrupt latency ++ * of the Mali GPU IRQ. In particular, function mali_kutf_irq_latency() is ++ * used with this purpose and it is called within KUTF framework - a kernel ++ * unit test framework. The measured latency provided by this test should ++ * be representative for the latency of the Mali JOB/MMU IRQs as well. ++ */ + -+ hdmi->force = connector->force; -+ mutex_unlock(&hdmi->mutex); -+} ++/* KUTF test application pointer for this test */ ++struct kutf_application *irq_app; + -+static const struct drm_connector_funcs dw_hdmi_connector_funcs = { -+ .fill_modes = drm_helper_probe_single_connector_modes, -+ .detect = dw_hdmi_connector_detect, -+ .destroy = drm_connector_cleanup, -+ .force = dw_hdmi_connector_force, -+ .reset = drm_atomic_helper_connector_reset, -+ .set_property = dw_hdmi_connector_set_property, -+ .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, -+ .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, -+ .atomic_set_property = dw_hdmi_atomic_connector_set_property, -+ .atomic_get_property = dw_hdmi_atomic_connector_get_property, ++/** ++ * struct kutf_irq_fixture data - test fixture used by the test functions. ++ * @kbdev: kbase device for the GPU. ++ * ++ */ ++struct kutf_irq_fixture_data { ++ struct kbase_device *kbdev; +}; + -+static const struct drm_connector_helper_funcs dw_hdmi_connector_helper_funcs = { -+ .get_modes = dw_hdmi_connector_get_modes, -+ .best_encoder = dw_hdmi_connector_best_encoder, -+ .atomic_check = dw_hdmi_connector_atomic_check, -+ .atomic_commit = dw_hdmi_connector_atomic_commit, -+}; ++#define SEC_TO_NANO(s) ((s)*1000000000LL) + -+static int dw_hdmi_qp_bridge_attach(struct drm_bridge *bridge, -+ enum drm_bridge_attach_flags flags) -+{ -+ struct dw_hdmi_qp *hdmi = bridge->driver_private; -+ struct drm_encoder *encoder = bridge->encoder; -+ struct drm_connector *connector = &hdmi->connector; -+ struct cec_connector_info conn_info; -+ struct cec_notifier *notifier; -+ bool skip_connector = false; ++/* ID for the GPU IRQ */ ++#define GPU_IRQ_HANDLER 2 + -+ if (hdmi->next_bridge) { -+ struct drm_bridge *next_bridge = hdmi->next_bridge; -+ int ret; ++#define NR_TEST_IRQS 1000000 + -+ ret = drm_bridge_attach(bridge->encoder, next_bridge, bridge, -+ next_bridge->ops & DRM_BRIDGE_OP_MODES ? -+ DRM_BRIDGE_ATTACH_NO_CONNECTOR : 0); -+ if (ret) { -+ DRM_ERROR("failed to attach next bridge: %d\n", ret); -+ return ret; -+ } ++/* IRQ for the test to trigger. Currently MULTIPLE_GPU_FAULTS as we would not ++ * expect to see this in normal use (e.g., when Android is running). */ ++#define TEST_IRQ MULTIPLE_GPU_FAULTS + -+ skip_connector = !(next_bridge->ops & DRM_BRIDGE_OP_MODES); -+ } ++#define IRQ_TIMEOUT HZ + -+ hdmi->skip_connector = skip_connector; -+ if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR || skip_connector) -+ return 0; ++/* Kernel API for setting irq throttle hook callback and irq time in us*/ ++extern int kbase_set_custom_irq_handler(struct kbase_device *kbdev, ++ irq_handler_t custom_handler, ++ int irq_type); ++extern irqreturn_t kbase_gpu_irq_handler(int irq, void *data); + -+ connector->interlace_allowed = 1; -+ connector->polled = DRM_CONNECTOR_POLL_HPD; -+ if (hdmi->next_bridge && hdmi->next_bridge->ops & DRM_BRIDGE_OP_DETECT) -+ connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; -+ drm_connector_helper_add(connector, &dw_hdmi_connector_helper_funcs); ++static DECLARE_WAIT_QUEUE_HEAD(wait); ++static bool triggered; ++static u64 irq_time; + -+ drm_connector_init(bridge->dev, connector, &dw_hdmi_connector_funcs, -+ DRM_MODE_CONNECTOR_HDMIA); ++static void *kbase_untag(void *ptr) ++{ ++ return (void *)(((uintptr_t) ptr) & ~3); ++} + -+ drm_connector_attach_encoder(connector, encoder); -+ dw_hdmi_attach_properties(hdmi); ++/** ++ * kbase_gpu_irq_custom_handler - Custom IRQ throttle handler ++ * @irq: IRQ number ++ * @data: Data associated with this IRQ ++ * ++ * Return: state of the IRQ ++ */ ++static irqreturn_t kbase_gpu_irq_custom_handler(int irq, void *data) ++{ ++ struct kbase_device *kbdev = kbase_untag(data); ++ u32 val; + -+ if (hdmi->cec_enable) { -+ cec_fill_conn_info_from_drm(&conn_info, connector); -+ notifier = cec_notifier_conn_register(hdmi->dev, NULL, &conn_info); -+ if (!notifier) -+ return -ENOMEM; ++ val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL); ++ if (val & TEST_IRQ) { ++ struct timespec64 tval; + -+ mutex_lock(&hdmi->cec_notifier_mutex); -+ hdmi->cec_notifier = notifier; -+ mutex_unlock(&hdmi->cec_notifier_mutex); -+ } ++ ktime_get_real_ts64(&tval); ++ irq_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec); + -+ return 0; -+} ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, ++ NULL); + -+static void dw_hdmi_qp_bridge_detach(struct drm_bridge *bridge) -+{ -+ struct dw_hdmi_qp *hdmi = bridge->driver_private; ++ triggered = true; ++ wake_up(&wait); + -+ if (hdmi->cec_notifier) { -+ mutex_lock(&hdmi->cec_notifier_mutex); -+ cec_notifier_conn_unregister(hdmi->cec_notifier); -+ hdmi->cec_notifier = NULL; -+ mutex_unlock(&hdmi->cec_notifier_mutex); ++ return IRQ_HANDLED; + } ++ ++ /* Trigger main irq handler */ ++ return kbase_gpu_irq_handler(irq, data); +} + -+static enum drm_mode_status -+dw_hdmi_qp_bridge_mode_valid(struct drm_bridge *bridge, -+ const struct drm_display_info *info, -+ const struct drm_display_mode *mode) ++/** ++ * mali_kutf_irq_default_create_fixture() - Creates the fixture data required ++ * for all the tests in the irq suite. ++ * @context: KUTF context. ++ * ++ * Return: Fixture data created on success or NULL on failure ++ */ ++static void *mali_kutf_irq_default_create_fixture( ++ struct kutf_context *context) +{ -+ struct dw_hdmi_qp *hdmi = bridge->driver_private; -+ const struct dw_hdmi_plat_data *pdata = hdmi->plat_data; ++ struct kutf_irq_fixture_data *data; + -+ if (mode->clock <= 25000) -+ return MODE_CLOCK_RANGE; ++ data = kutf_mempool_alloc(&context->fixture_pool, ++ sizeof(struct kutf_irq_fixture_data)); + -+ if (!hdmi->sink_is_hdmi && mode->clock > 340000) -+ return MODE_BAD; ++ if (!data) ++ goto fail; + -+ if (pdata->mode_valid) -+ return pdata->mode_valid(NULL, pdata->priv_data, info, -+ mode); ++ /* Acquire the kbase device */ ++ data->kbdev = kbase_find_device(-1); ++ if (data->kbdev == NULL) { ++ kutf_test_fail(context, "Failed to find kbase device"); ++ goto fail; ++ } + -+ return MODE_OK; ++ return data; ++ ++fail: ++ return NULL; +} + -+static void dw_hdmi_qp_bridge_mode_set(struct drm_bridge *bridge, -+ const struct drm_display_mode *orig_mode, -+ const struct drm_display_mode *mode) ++/** ++ * mali_kutf_irq_default_remove_fixture() - Destroy fixture data previously ++ * created by mali_kutf_irq_default_create_fixture. ++ * ++ * @context: KUTF context. ++ */ ++static void mali_kutf_irq_default_remove_fixture( ++ struct kutf_context *context) +{ -+ struct dw_hdmi_qp *hdmi = bridge->driver_private; -+ -+ mutex_lock(&hdmi->mutex); -+ -+ if (!drm_mode_equal(orig_mode, mode)) -+ hdmi->frl_switch = false; -+ /* Store the display mode for plugin/DKMS poweron events */ -+ drm_mode_copy(&hdmi->previous_mode, mode); -+ if (hdmi->plat_data->split_mode) -+ hdmi->plat_data->convert_to_origin_mode(&hdmi->previous_mode); ++ struct kutf_irq_fixture_data *data = context->fixture; ++ struct kbase_device *kbdev = data->kbdev; + -+ mutex_unlock(&hdmi->mutex); ++ kbase_release_device(kbdev); +} + -+static void dw_hdmi_qp_bridge_atomic_disable(struct drm_bridge *bridge, -+ struct drm_bridge_state *old_state) ++/** ++ * mali_kutf_irq_latency() - measure GPU IRQ latency ++ * @context: kutf context within which to perform the test ++ * ++ * The test triggers IRQs manually, and measures the ++ * time between triggering the IRQ and the IRQ handler being executed. ++ * ++ * This is not a traditional test, in that the pass/fail status has little ++ * meaning (other than indicating that the IRQ handler executed at all). Instead ++ * the results are in the latencies provided with the test result. There is no ++ * meaningful pass/fail result that can be obtained here, instead the latencies ++ * are provided for manual analysis only. ++ */ ++static void mali_kutf_irq_latency(struct kutf_context *context) +{ -+ struct dw_hdmi_qp *hdmi = bridge->driver_private; -+ void *data = hdmi->plat_data->phy_data; -+ const struct drm_connector_state *conn_state = hdmi->curr_conn->state; ++ struct kutf_irq_fixture_data *data = context->fixture; ++ struct kbase_device *kbdev = data->kbdev; ++ u64 min_time = U64_MAX, max_time = 0, average_time = 0; ++ int i; ++ bool test_failed = false; + -+ if (hdmi->panel) -+ drm_panel_disable(hdmi->panel); ++ /* Force GPU to be powered */ ++ kbase_pm_context_active(kbdev); + -+ /* set avmute */ -+ hdmi_writel(hdmi, 1, PKTSCHED_PKT_CONTROL0); -+ mdelay(50); ++ kbase_set_custom_irq_handler(kbdev, kbase_gpu_irq_custom_handler, ++ GPU_IRQ_HANDLER); + -+ hdmi_modb(hdmi, 0, HDCP2_ESM_P0_GPIO_OUT_2_CHG_IRQ, -+ AVP_3_INT_MASK_N); -+ if (hdmi->hdcp && hdmi->hdcp->hdcp_stop) -+ hdmi->hdcp->hdcp_stop(hdmi->hdcp); ++ for (i = 0; i < NR_TEST_IRQS; i++) { ++ struct timespec64 tval; ++ u64 start_time; ++ int ret; + -+ hdmi_writel(hdmi, 0, HDCP2LOGIC_ESM_GPIO_IN); -+ if (conn_state->content_protection != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) -+ drm_hdcp_update_content_protection(hdmi->curr_conn, -+ DRM_MODE_CONTENT_PROTECTION_DESIRED); ++ triggered = false; ++ ktime_get_real_ts64(&tval); ++ start_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec); + -+ if (hdmi->plat_data->set_hdcp_status) -+ hdmi->plat_data->set_hdcp_status(data, hdmi->hdcp_status); ++ /* Trigger fake IRQ */ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), ++ TEST_IRQ, NULL); + -+ extcon_set_state_sync(hdmi->extcon, EXTCON_DISP_HDMI, false); -+ handle_plugged_change(hdmi, false); -+ mutex_lock(&hdmi->mutex); ++ ret = wait_event_timeout(wait, triggered != false, IRQ_TIMEOUT); + -+ if (hdmi->dclk_en) { -+ mutex_lock(&hdmi->audio_mutex); -+ if (hdmi->plat_data->dclk_set) -+ hdmi->plat_data->dclk_set(data, false, hdmi->old_vp_id); -+ hdmi->dclk_en = false; -+ mutex_unlock(&hdmi->audio_mutex); -+ }; ++ if (ret == 0) { ++ kutf_test_fail(context, "Timed out waiting for IRQ\n"); ++ test_failed = true; ++ break; ++ } + -+ if (hdmi->phy.ops->disable && !hdmi->frl_switch) { -+ hdmi_writel(hdmi, 0, FLT_CONFIG0); -+ hdmi_writel(hdmi, 0, SCRAMB_CONFIG0); -+ /* set sink frl mode disable */ -+ if (dw_hdmi_support_scdc(hdmi, &hdmi->curr_conn->display_info)) -+ drm_scdc_writeb(hdmi->ddc, 0x31, 0); ++ if ((irq_time - start_time) < min_time) ++ min_time = irq_time - start_time; ++ if ((irq_time - start_time) > max_time) ++ max_time = irq_time - start_time; ++ average_time += irq_time - start_time; + -+ hdmi->phy.ops->disable(hdmi, hdmi->phy.data); -+ hdmi->disabled = true; -+ if (hdmi->plat_data->link_clk_set) -+ hdmi->plat_data->link_clk_set(data, false); ++ udelay(10); + } + -+ hdmi->curr_conn = NULL; -+ mutex_unlock(&hdmi->mutex); -+ -+ if (hdmi->panel) -+ drm_panel_unprepare(hdmi->panel); -+} -+ -+static void dw_hdmi_qp_bridge_atomic_enable(struct drm_bridge *bridge, -+ struct drm_bridge_state *old_state) -+{ -+ struct dw_hdmi_qp *hdmi = bridge->driver_private; -+ struct drm_atomic_state *state = old_state->base.state; -+ struct drm_connector *connector; -+ void *data = hdmi->plat_data->phy_data; -+ -+ if (hdmi->panel) -+ drm_panel_prepare(hdmi->panel); -+ -+ connector = drm_atomic_get_new_connector_for_encoder(state, -+ bridge->encoder); ++ /* Go back to default handler */ ++ kbase_set_custom_irq_handler(kbdev, NULL, GPU_IRQ_HANDLER); + -+ mutex_lock(&hdmi->mutex); -+ hdmi->curr_conn = connector; ++ kbase_pm_context_idle(kbdev); + -+ dw_hdmi_qp_setup(hdmi, hdmi->curr_conn, &hdmi->previous_mode); -+ hdmi_writel(hdmi, 2, PKTSCHED_PKT_CONTROL0); -+ hdmi_modb(hdmi, PKTSCHED_GCP_TX_EN, PKTSCHED_GCP_TX_EN, PKTSCHED_PKT_EN); -+ mutex_unlock(&hdmi->mutex); ++ if (!test_failed) { ++ const char *results; + -+ if (!hdmi->dclk_en) { -+ mutex_lock(&hdmi->audio_mutex); -+ if (hdmi->plat_data->dclk_set) -+ hdmi->plat_data->dclk_set(data, true, hdmi->vp_id); -+ hdmi->dclk_en = true; -+ mutex_unlock(&hdmi->audio_mutex); ++ do_div(average_time, NR_TEST_IRQS); ++ results = kutf_dsprintf(&context->fixture_pool, ++ "Min latency = %lldns, Max latency = %lldns, Average latency = %lldns\n", ++ min_time, max_time, average_time); ++ kutf_test_pass(context, results); + } -+ dw_hdmi_qp_init_audio_infoframe(hdmi); -+ dw_hdmi_qp_audio_enable(hdmi); -+ hdmi_clk_regenerator_update_pixel_clock(hdmi); -+ -+ extcon_set_state_sync(hdmi->extcon, EXTCON_DISP_HDMI, true); -+ handle_plugged_change(hdmi, true); -+ -+ if (hdmi->panel) -+ drm_panel_enable(hdmi->panel); -+} -+ -+static const struct drm_bridge_funcs dw_hdmi_bridge_funcs = { -+ .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, -+ .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, -+ .atomic_reset = drm_atomic_helper_bridge_reset, -+ .attach = dw_hdmi_qp_bridge_attach, -+ .detach = dw_hdmi_qp_bridge_detach, -+ .mode_set = dw_hdmi_qp_bridge_mode_set, -+ .mode_valid = dw_hdmi_qp_bridge_mode_valid, -+ .atomic_enable = dw_hdmi_qp_bridge_atomic_enable, -+ .atomic_disable = dw_hdmi_qp_bridge_atomic_disable, -+}; -+ -+void dw_hdmi_qp_set_cec_adap(struct dw_hdmi_qp *hdmi, struct cec_adapter *adap) -+{ -+ hdmi->cec_adap = adap; +} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_set_cec_adap); + -+static irqreturn_t dw_hdmi_qp_main_hardirq(int irq, void *dev_id) ++/** ++ * Module entry point for this test. ++ */ ++int mali_kutf_irq_test_main_init(void) +{ -+ struct dw_hdmi_qp *hdmi = dev_id; -+ struct dw_hdmi_qp_i2c *i2c = hdmi->i2c; -+ u32 stat; ++ struct kutf_suite *suite; + -+ stat = hdmi_readl(hdmi, MAINUNIT_1_INT_STATUS); ++ irq_app = kutf_create_application("irq"); ++ suite = kutf_create_suite(irq_app, "irq_default", ++ 1, mali_kutf_irq_default_create_fixture, ++ mali_kutf_irq_default_remove_fixture); + -+ i2c->stat = stat & (I2CM_OP_DONE_IRQ | I2CM_READ_REQUEST_IRQ | -+ I2CM_NACK_RCVD_IRQ); -+ hdmi->scdc_intr = stat & (SCDC_UPD_FLAGS_RD_IRQ | -+ SCDC_UPD_FLAGS_CHG_IRQ | -+ SCDC_UPD_FLAGS_CLR_IRQ | -+ SCDC_RR_REPLY_STOP_IRQ | -+ SCDC_NACK_RCVD_IRQ); -+ hdmi->flt_intr = stat & (FLT_EXIT_TO_LTSP_IRQ | -+ FLT_EXIT_TO_LTS4_IRQ | -+ FLT_EXIT_TO_LTSL_IRQ); ++ kutf_add_test(suite, 0x0, "irq_latency", ++ mali_kutf_irq_latency); ++ return 0; ++} + -+ dev_dbg(hdmi->dev, "i2c main unit irq:%#x\n", stat); -+ if (i2c->stat) { -+ hdmi_writel(hdmi, i2c->stat, MAINUNIT_1_INT_CLEAR); -+ complete(&i2c->cmp); -+ } ++/** ++ * Module exit point for this test. ++ */ ++void mali_kutf_irq_test_main_exit(void) ++{ ++ kutf_destroy_application(irq_app); ++} + -+ if (hdmi->flt_intr) { -+ dev_dbg(hdmi->dev, "i2c flt irq:%#x\n", hdmi->flt_intr); -+ hdmi_writel(hdmi, hdmi->flt_intr, MAINUNIT_1_INT_CLEAR); -+ complete(&hdmi->flt_cmp); -+ } ++module_init(mali_kutf_irq_test_main_init); ++module_exit(mali_kutf_irq_test_main_exit); + -+ if (hdmi->scdc_intr) { -+ u8 val; ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("ARM Ltd."); ++MODULE_VERSION("1.0"); +diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript +new file mode 100755 +index 000000000..ec837f164 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript +@@ -0,0 +1,30 @@ ++# ++# (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+ dev_dbg(hdmi->dev, "i2c scdc irq:%#x\n", hdmi->scdc_intr); -+ hdmi_writel(hdmi, hdmi->scdc_intr, MAINUNIT_1_INT_CLEAR); -+ val = hdmi_readl(hdmi, SCDC_STATUS0); + -+ /* frl start */ -+ if (val & BIT(4)) { -+ hdmi_modb(hdmi, 0, SCDC_UPD_FLAGS_POLL_EN | -+ SCDC_UPD_FLAGS_AUTO_CLR, SCDC_CONFIG0); -+ hdmi_modb(hdmi, 0, SCDC_UPD_FLAGS_RD_IRQ, -+ MAINUNIT_1_INT_MASK_N); -+ dev_info(hdmi->dev, "frl start\n"); -+ } ++import os ++Import('env') + -+ } ++src = [Glob('#kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/*.c'), Glob('#kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile')] + -+ if (stat) -+ return IRQ_HANDLED; ++if env.GetOption('clean') : ++ env.Execute(Action("make clean", '[CLEAN] mali_kutf_irq_test')) ++ cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, []) ++ env.KernelObjTarget('mali_kutf_irq_test', cmd) ++else: ++ makeAction=Action("cd ${SOURCE.dir} && make MALI_DEBUG=${debug} MALI_BACKEND_KERNEL=1 MALI_ERROR_INJECT_ON=${error_inject} MALI_MODEL=${mali_model} MALI_NO_MALI=${no_mali} MALI_HW_VERSION=${hwver} MALI_UNIT_TEST=${unit} MALI_USE_UMP=${ump} MALI_CUSTOMER_RELEASE=${release} %s %s && ( ( [ -f mali_kutf_irq_test.ko ] && cp mali_kutf_irq_test.ko $STATIC_LIB_PATH/ ) || touch $STATIC_LIB_PATH/mali_kutf_irq_test.ko)" % (env.base_get_qa_settings(), env.kernel_get_config_defines()), '$MAKECOMSTR') ++ cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, [makeAction]) ++ env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/kutf.ko') ++ env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/mali_kbase.ko') ++ env.KernelObjTarget('mali_kutf_irq_test', cmd) +diff --git a/drivers/gpu/arm/midgard/tests/sconscript b/drivers/gpu/arm/midgard/tests/sconscript +new file mode 100755 +index 000000000..5337e1078 +--- /dev/null ++++ b/drivers/gpu/arm/midgard/tests/sconscript +@@ -0,0 +1,37 @@ ++# ++# (C) COPYRIGHT 2010-2011, 2013, 2017 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# A copy of the licence is included with the program, and can also be obtained ++# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++# Boston, MA 02110-1301, USA. ++# ++# + -+ return IRQ_NONE; -+} + -+static irqreturn_t dw_hdmi_qp_avp_hardirq(int irq, void *dev_id) -+{ -+ struct dw_hdmi_qp *hdmi = dev_id; -+ u32 stat1, stat3; ++Import ('env') + -+ stat1 = hdmi_readl(hdmi, AVP_1_INT_STATUS); -+ stat3 = hdmi_readl(hdmi, AVP_3_INT_STATUS); ++kutf_env = env.Clone() ++kutf_env.Append(CPPPATH = '#kernel/drivers/gpu/arm/midgard/tests/include') ++Export('kutf_env') + -+ if (!stat1 && !stat3) -+ return IRQ_NONE; ++if Glob('internal/sconscript'): ++ SConscript('internal/sconscript') + -+ return IRQ_WAKE_THREAD; -+} ++if kutf_env['debug'] == '1': ++ SConscript('kutf/sconscript') ++ SConscript('mali_kutf_irq_test/sconscript') + -+static irqreturn_t dw_hdmi_qp_avp_irq(int irq, void *dev_id) -+{ -+ struct dw_hdmi_qp *hdmi = dev_id; -+ struct drm_connector_state *conn_state; -+ void *data = hdmi->plat_data->phy_data; -+ u32 stat1, stat3, val; ++ if Glob('kutf_test/sconscript'): ++ SConscript('kutf_test/sconscript') + -+ stat1 = hdmi_readl(hdmi, AVP_1_INT_STATUS); -+ stat3 = hdmi_readl(hdmi, AVP_3_INT_STATUS); ++ if Glob('kutf_test_runner/sconscript'): ++ SConscript('kutf_test_runner/sconscript') + -+ hdmi_writel(hdmi, stat1, AVP_1_INT_CLEAR); -+ hdmi_writel(hdmi, stat3, AVP_3_INT_CLEAR); ++if env['unit'] == '1': ++ SConscript('mali_kutf_ipa_test/sconscript') ++ SConscript('mali_kutf_vinstr_test/sconscript') +diff --git a/drivers/gpu/arm/sconscript b/drivers/gpu/arm/sconscript +new file mode 100755 +index 000000000..a06092bd5 +--- /dev/null ++++ b/drivers/gpu/arm/sconscript +@@ -0,0 +1,25 @@ ++# ++# (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU licence. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# SPDX-License-Identifier: GPL-2.0 ++# ++# + -+ if (!hdmi->curr_conn || !hdmi->curr_conn->state) -+ return IRQ_HANDLED; ++import glob + -+ conn_state = hdmi->curr_conn->state; -+ val = conn_state->content_protection; + -+ if (hdmi->hdcp && hdmi->hdcp->hdcp_isr) { -+ u32 hdcp_status = hdmi_readl(hdmi, HDCP14_STATUS0); ++SConscript('midgard/sconscript') +diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig +index a2b4c97bc..879b58908 100644 +--- a/drivers/gpu/drm/Kconfig ++++ b/drivers/gpu/drm/Kconfig +@@ -35,6 +35,18 @@ config DRM_MIPI_DBI + depends on DRM + select DRM_KMS_HELPER + ++config DRM_EDID ++ bool "EDID function for DRM" ++ depends on DRM ++ select HDMI ++ default y if !ROCKCHIP_MINI_KERNEL ++ help ++ DRM EDID read and parse function. + -+ if (stat1 & HDCP14_AUTH_CHG_MASK_N) { -+ /* hdcp14 auth success */ -+ if (hdcp_status & BIT(2)) { -+ hdmi->hdcp_status |= HDMI_HDCP14_AUTH; -+ if (conn_state->content_protection != -+ DRM_MODE_CONTENT_PROTECTION_UNDESIRED) -+ val = DRM_MODE_CONTENT_PROTECTION_ENABLED; -+ } else if (!(hdcp_status & BIT(2))) { -+ hdmi->hdcp_status &= ~HDMI_HDCP14_AUTH; -+ if (conn_state->content_protection != -+ DRM_MODE_CONTENT_PROTECTION_UNDESIRED) -+ val = DRM_MODE_CONTENT_PROTECTION_DESIRED; -+ } -+ conn_state->content_protection = val; -+ } -+ hdmi->hdcp->hdcp_isr(hdmi->hdcp, stat1, hdcp_status); -+ } ++config DRM_IGNORE_IOTCL_PERMIT ++ bool "Ignore drm ioctl permission" ++ depends on DRM && NO_GKI + -+ if (stat3 & HDCP2_ESM_P0_GPIO_OUT_2_CHG_IRQ) { -+ stat3 = hdmi_readl(hdmi, HDCP2LOGIC_ESM_GPIO_OUT); -+ if (stat3 & HDCP2_AUTHENTICATION_SUCCESS) { -+ hdmi->hdcp_status |= HDMI_HDCP2_AUTH; -+ if (conn_state->content_protection != -+ DRM_MODE_CONTENT_PROTECTION_UNDESIRED) -+ val = DRM_MODE_CONTENT_PROTECTION_ENABLED; -+ } else if (!(stat3 & HDCP2_AUTHENTICATION_SUCCESS)) { -+ hdmi->hdcp_status &= ~HDMI_HDCP2_AUTH; -+ if (conn_state->content_protection != -+ DRM_MODE_CONTENT_PROTECTION_UNDESIRED) -+ val = DRM_MODE_CONTENT_PROTECTION_DESIRED; -+ } -+ conn_state->content_protection = val; -+ } + config DRM_MIPI_DSI + bool + depends on DRM +@@ -296,7 +308,7 @@ config DRM_VKMS + + source "drivers/gpu/drm/exynos/Kconfig" + +-source "drivers/gpu/drm/rockchip/Kconfig" ++source "drivers/gpu/drm/rockchip-oh/Kconfig" + + source "drivers/gpu/drm/vmwgfx/Kconfig" + +diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile +index 215e78e79..dc748c528 100644 +--- a/drivers/gpu/drm/Makefile ++++ b/drivers/gpu/drm/Makefile +@@ -154,7 +154,7 @@ obj-$(CONFIG_DRM_VGEM) += vgem/ + obj-$(CONFIG_DRM_VKMS) += vkms/ + obj-$(CONFIG_DRM_NOUVEAU) +=nouveau/ + obj-$(CONFIG_DRM_EXYNOS) +=exynos/ +-obj-$(CONFIG_DRM_ROCKCHIP) +=rockchip/ ++obj-$(CONFIG_DRM_ROCKCHIP) +=rockchip-oh/ + obj-$(CONFIG_DRM_GMA500) += gma500/ + obj-$(CONFIG_DRM_UDL) += udl/ + obj-$(CONFIG_DRM_AST) += ast/ +diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +index df9370e0f..00127137c 100644 +--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c ++++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +@@ -8,11 +8,13 @@ + + #include + #include ++#include + #include + #include + #include + #include + #include ++#include + #include + #include + #include +@@ -31,16 +33,52 @@ + + #include "analogix_dp_core.h" + #include "analogix_dp_reg.h" ++#include "../../rockchip-oh/rockchip_drm_drv.h" + + #define to_dp(nm) container_of(nm, struct analogix_dp_device, nm) + + static const bool verify_fast_training; + ++#ifdef CONFIG_NO_GKI ++#undef EXTCON_DISP_DP ++#define EXTCON_DISP_DP EXTCON_DISP_EDP ++#endif + -+ if (hdmi->plat_data->set_hdcp_status) -+ hdmi->plat_data->set_hdcp_status(data, hdmi->hdcp_status); ++static const unsigned int analogix_dp_cable[] = { ++ EXTCON_DISP_DP, ++ EXTCON_NONE, ++}; + -+ return IRQ_HANDLED; -+} + struct bridge_init { + struct i2c_client *client; + struct device_node *node; + }; + ++static void analogix_dp_bridge_mode_set(struct drm_bridge *bridge, ++ const struct drm_display_mode *adj_mode); + -+static irqreturn_t dw_hdmi_qp_earc_hardirq(int irq, void *dev_id) ++static bool analogix_dp_bandwidth_ok(struct analogix_dp_device *dp, ++ const struct drm_display_mode *mode, ++ unsigned int rate, unsigned int lanes) +{ -+ struct dw_hdmi_qp *hdmi = dev_id; -+ u32 stat; -+ -+ stat = hdmi_readl(hdmi, EARCRX_0_INT_STATUS); -+ if (stat) { -+ dev_dbg(hdmi->dev, "earc irq %#x\n", stat); -+ stat &= ~stat; -+ hdmi_writel(hdmi, stat, EARCRX_0_INT_MASK_N); -+ return IRQ_WAKE_THREAD; -+ } -+ -+ return IRQ_NONE; -+} ++ const struct drm_display_info *info; ++ u32 max_bw, req_bw, bpp = 24; + -+static irqreturn_t dw_hdmi_qp_earc_irq(int irq, void *dev_id) -+{ -+ struct dw_hdmi_qp *hdmi = dev_id; -+ u32 stat; ++ if (dp->plat_data->skip_connector) ++ return true; + -+ stat = hdmi_readl(hdmi, EARCRX_0_INT_STATUS); ++ info = &dp->connector.display_info; ++ if (info->bpc) ++ bpp = 3 * info->bpc; + -+ if (!stat) -+ return IRQ_NONE; ++ req_bw = mode->clock * bpp / 8; ++ max_bw = lanes * rate; ++ if (req_bw > max_bw) ++ return false; + -+ hdmi_writel(hdmi, stat, EARCRX_0_INT_CLEAR); ++ return true; ++} + -+ hdmi->earc_intr = stat; -+ complete(&hdmi->earc_cmp); + static int analogix_dp_init_dp(struct analogix_dp_device *dp) + { + int ret; +@@ -65,42 +103,56 @@ static int analogix_dp_init_dp(struct analogix_dp_device *dp) + return 0; + } + +-static int analogix_dp_detect_hpd(struct analogix_dp_device *dp) ++static int analogix_dp_panel_prepare(struct analogix_dp_device *dp) + { +- int timeout_loop = 0; ++ int ret; + +- while (timeout_loop < DP_TIMEOUT_LOOP_COUNT) { +- if (analogix_dp_get_plug_in_status(dp) == 0) +- return 0; ++ mutex_lock(&dp->panel_lock); + +- timeout_loop++; +- usleep_range(1000, 1100); +- } ++ if (dp->panel_is_prepared) ++ goto out; + +- /* +- * Some edp screen do not have hpd signal, so we can't just +- * return failed when hpd plug in detect failed, DT property +- * "force-hpd" would indicate whether driver need this. +- */ +- if (!dp->force_hpd) +- return -ETIMEDOUT; ++ ret = drm_panel_prepare(dp->plat_data->panel); ++ if (ret) ++ goto out; + +- /* +- * The eDP TRM indicate that if HPD_STATUS(RO) is 0, AUX CH +- * will not work, so we need to give a force hpd action to +- * set HPD_STATUS manually. +- */ +- dev_dbg(dp->dev, "failed to get hpd plug status, try to force hpd\n"); ++ dp->panel_is_prepared = true; + -+ return IRQ_HANDLED; ++out: ++ mutex_unlock(&dp->panel_lock); ++ return 0; +} + -+static int dw_hdmi_detect_phy(struct dw_hdmi_qp *hdmi) ++static int analogix_dp_panel_unprepare(struct analogix_dp_device *dp) +{ -+ u8 phy_type; ++ int ret; + -+ phy_type = hdmi->plat_data->phy_force_vendor ? -+ DW_HDMI_PHY_VENDOR_PHY : 0; ++ mutex_lock(&dp->panel_lock); + -+ if (phy_type == DW_HDMI_PHY_VENDOR_PHY) { -+ /* Vendor PHYs require support from the glue layer. */ -+ if (!hdmi->plat_data->qp_phy_ops || !hdmi->plat_data->phy_name) { -+ dev_err(hdmi->dev, -+ "Vendor HDMI PHY not supported by glue layer\n"); -+ return -ENODEV; -+ } ++ if (!dp->panel_is_prepared) ++ goto out; + -+ hdmi->phy.ops = hdmi->plat_data->qp_phy_ops; -+ hdmi->phy.data = hdmi->plat_data->phy_data; -+ hdmi->phy.name = hdmi->plat_data->phy_name; -+ } ++ ret = drm_panel_unprepare(dp->plat_data->panel); ++ if (ret) ++ goto out; ++ ++ dp->panel_is_prepared = false; + ++out: ++ mutex_unlock(&dp->panel_lock); + return 0; +} -+ -+void dw_hdmi_qp_cec_set_hpd(struct dw_hdmi_qp *hdmi, bool plug_in, bool change) + +- analogix_dp_force_hpd(dp); ++static int analogix_dp_detect_hpd(struct analogix_dp_device *dp) +{ -+ enum drm_connector_status status = plug_in ? -+ connector_status_connected : connector_status_disconnected; -+ -+ if (!hdmi->cec_notifier) -+ return; ++ if (dp->force_hpd) ++ analogix_dp_force_hpd(dp); + + if (analogix_dp_get_plug_in_status(dp) != 0) { + dev_err(dp->dev, "failed to get hpd plug in status\n"); + return -EINVAL; + } + +- dev_dbg(dp->dev, "success to get plug in status after force hpd\n"); +- + return 0; + } + +@@ -109,6 +161,9 @@ static bool analogix_dp_detect_sink_psr(struct analogix_dp_device *dp) + unsigned char psr_version; + int ret; + ++ if (!device_property_read_bool(dp->dev, "support-psr")) ++ return 0; + -+ if (!plug_in) -+ cec_notifier_set_phys_addr(hdmi->cec_notifier, -+ CEC_PHYS_ADDR_INVALID); -+ else if (hdmi->ddc) { -+ struct edid *edid = drm_get_edid(&hdmi->connector, hdmi->ddc); -+ if (edid) { -+ if (hdmi->cec_notifier) -+ cec_notifier_set_phys_addr_from_edid( -+ hdmi->cec_notifier, edid); -+ kfree(edid); -+ } -+ } + ret = drm_dp_dpcd_readb(&dp->aux, DP_PSR_SUPPORT, &psr_version); + if (ret != 1) { + dev_err(dp->dev, "failed to get PSR version, disable it\n"); +@@ -217,8 +272,24 @@ static int analogix_dp_set_enhanced_mode(struct analogix_dp_device *dp) + if (ret < 0) + return ret; + ++ if (!data) { ++ /* ++ * A setting of 1 indicates that this is an eDP device that ++ * uses only Enhanced Framing, independently of the setting by ++ * the source of ENHANCED_FRAME_EN ++ */ ++ ret = drm_dp_dpcd_readb(&dp->aux, DP_EDP_CONFIGURATION_CAP, ++ &data); ++ if (ret < 0) ++ return ret; + -+ if (hdmi->bridge.dev) { -+ if (change && hdmi->cec_adap && hdmi->cec_adap->devnode.registered) -+ cec_queue_pin_hpd_event(hdmi->cec_adap, plug_in, ktime_get()); -+ drm_bridge_hpd_notify(&hdmi->bridge, status); ++ data = !!(data & DP_FRAMING_CHANGE_CAP); + } -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_cec_set_hpd); + -+static void dw_hdmi_qp_cec_enable(struct dw_hdmi_qp *hdmi) -+{ -+ mutex_lock(&hdmi->mutex); -+ hdmi_modb(hdmi, 0, CEC_SWDISABLE, GLOBAL_SWDISABLE); -+ mutex_unlock(&hdmi->mutex); -+} + analogix_dp_enable_enhanced_mode(dp, data); + ++ dp->link_train.enhanced_framing = data; + -+static void dw_hdmi_qp_cec_disable(struct dw_hdmi_qp *hdmi) -+{ -+ mutex_lock(&hdmi->mutex); -+ hdmi_modb(hdmi, CEC_SWDISABLE, CEC_SWDISABLE, GLOBAL_SWDISABLE); -+ mutex_unlock(&hdmi->mutex); -+} + return 0; + } + +@@ -234,32 +305,27 @@ static int analogix_dp_training_pattern_dis(struct analogix_dp_device *dp) + return ret < 0 ? ret : 0; + } + +-static void +-analogix_dp_set_lane_lane_pre_emphasis(struct analogix_dp_device *dp, +- int pre_emphasis, int lane) ++static bool analogix_dp_get_vrr_capable(struct analogix_dp_device *dp) + { +- switch (lane) { +- case 0: +- analogix_dp_set_lane0_pre_emphasis(dp, pre_emphasis); +- break; +- case 1: +- analogix_dp_set_lane1_pre_emphasis(dp, pre_emphasis); +- break; ++ struct drm_connector *connector = &dp->connector; ++ struct drm_display_info *info = &connector->display_info; + +- case 2: +- analogix_dp_set_lane2_pre_emphasis(dp, pre_emphasis); +- break; ++ if (!info->monitor_range.max_vfreq) ++ return false; ++ if (!info->monitor_range.min_vfreq) ++ return false; ++ if (info->monitor_range.max_vfreq < info->monitor_range.min_vfreq) ++ return false; ++ if (!drm_dp_sink_can_do_video_without_timing_msa(dp->dpcd)) ++ return false; + +- case 3: +- analogix_dp_set_lane3_pre_emphasis(dp, pre_emphasis); +- break; +- } ++ return true; + } + + static int analogix_dp_link_start(struct analogix_dp_device *dp) + { + u8 buf[4]; +- int lane, lane_count, pll_tries, retval; ++ int lane, lane_count, retval; + + lane_count = dp->link_train.lane_count; + +@@ -279,6 +345,16 @@ static int analogix_dp_link_start(struct analogix_dp_device *dp) + retval = drm_dp_dpcd_write(&dp->aux, DP_LINK_BW_SET, buf, 2); + if (retval < 0) + return retval; + -+static const struct dw_hdmi_qp_cec_ops dw_hdmi_qp_cec_ops = { -+ .enable = dw_hdmi_qp_cec_enable, -+ .disable = dw_hdmi_qp_cec_disable, -+ .write = hdmi_writel, -+ .read = hdmi_readl, -+}; -+ -+static const struct regmap_config hdmi_regmap_config = { -+ .reg_bits = 32, -+ .val_bits = 32, -+ .reg_stride = 4, -+ .max_register = EARCRX_1_INT_FORCE, -+}; -+ -+struct dw_hdmi_qp_reg_table { -+ int reg_base; -+ int reg_end; -+}; -+ -+static const struct dw_hdmi_qp_reg_table hdmi_reg_table[] = { -+ {0x0, 0xc}, -+ {0x14, 0x1c}, -+ {0x44, 0x48}, -+ {0x50, 0x58}, -+ {0x80, 0x84}, -+ {0xa0, 0xc4}, -+ {0xe0, 0xe8}, -+ {0xf0, 0x118}, -+ {0x140, 0x140}, -+ {0x150, 0x150}, -+ {0x160, 0x168}, -+ {0x180, 0x180}, -+ {0x800, 0x800}, -+ {0x808, 0x808}, -+ {0x814, 0x814}, -+ {0x81c, 0x824}, -+ {0x834, 0x834}, -+ {0x840, 0x864}, -+ {0x86c, 0x86c}, -+ {0x880, 0x89c}, -+ {0x8e0, 0x8e8}, -+ {0x900, 0x900}, -+ {0x908, 0x90c}, -+ {0x920, 0x938}, -+ {0x920, 0x938}, -+ {0x960, 0x960}, -+ {0x968, 0x968}, -+ {0xa20, 0xa20}, -+ {0xa30, 0xa30}, -+ {0xa40, 0xa40}, -+ {0xa54, 0xa54}, -+ {0xa80, 0xaac}, -+ {0xab4, 0xab8}, -+ {0xb00, 0xcbc}, -+ {0xce0, 0xce0}, -+ {0xd00, 0xddc}, -+ {0xe20, 0xe24}, -+ {0xe40, 0xe44}, -+ {0xe4c, 0xe4c}, -+ {0xe60, 0xe80}, -+ {0xea0, 0xf24}, -+ {0x1004, 0x100c}, -+ {0x1020, 0x1030}, -+ {0x1040, 0x1050}, -+ {0x1060, 0x1068}, -+ {0x1800, 0x1820}, -+ {0x182c, 0x182c}, -+ {0x1840, 0x1940}, -+ {0x1960, 0x1a60}, -+ {0x1b00, 0x1b00}, -+ {0x1c00, 0x1c00}, -+ {0x3000, 0x3000}, -+ {0x3010, 0x3014}, -+ {0x3020, 0x3024}, -+ {0x3800, 0x3800}, -+ {0x3810, 0x3814}, -+ {0x3820, 0x3824}, -+ {0x3830, 0x3834}, -+ {0x3840, 0x3844}, -+ {0x3850, 0x3854}, -+ {0x3860, 0x3864}, -+ {0x3870, 0x3874}, -+ {0x4000, 0x4004}, -+ {0x4800, 0x4800}, -+ {0x4810, 0x4814}, -+}; ++ /* Spread AMP if required, enable 8b/10b coding */ ++ buf[0] = analogix_dp_ssc_supported(dp) ? DP_SPREAD_AMP_0_5 : 0; ++ if (analogix_dp_get_vrr_capable(dp)) ++ buf[0] |= DP_MSA_TIMING_PAR_IGNORE_EN; ++ buf[1] = DP_SET_ANSI_8B10B; ++ retval = drm_dp_dpcd_write(&dp->aux, DP_DOWNSPREAD_CTRL, buf, 2); ++ if (retval < 0) ++ return retval; + -+static int dw_hdmi_ctrl_show(struct seq_file *s, void *v) + /* set enhanced mode if available */ + retval = analogix_dp_set_enhanced_mode(dp); + if (retval < 0) { +@@ -286,22 +362,12 @@ static int analogix_dp_link_start(struct analogix_dp_device *dp) + return retval; + } + +- /* Set TX pre-emphasis to minimum */ ++ /* Set TX voltage-swing and pre-emphasis to minimum */ + for (lane = 0; lane < lane_count; lane++) +- analogix_dp_set_lane_lane_pre_emphasis(dp, +- PRE_EMPHASIS_LEVEL_0, lane); +- +- /* Wait for PLL lock */ +- pll_tries = 0; +- while (analogix_dp_get_pll_lock_status(dp) == PLL_UNLOCKED) { +- if (pll_tries == DP_TIMEOUT_LOOP_COUNT) { +- dev_err(dp->dev, "Wait for PLL lock timed out\n"); +- return -ETIMEDOUT; +- } +- +- pll_tries++; +- usleep_range(90, 120); +- } ++ dp->link_train.training_lane[lane] = ++ DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | ++ DP_TRAIN_PRE_EMPH_LEVEL_0; ++ analogix_dp_set_lane_link_training(dp); + + /* Set training pattern 1 */ + analogix_dp_set_training_pattern(dp, TRAINING_PTN1); +@@ -384,54 +450,6 @@ static unsigned char analogix_dp_get_adjust_request_pre_emphasis( + return ((link_value >> shift) & 0xc) >> 2; + } + +-static void analogix_dp_set_lane_link_training(struct analogix_dp_device *dp, +- u8 training_lane_set, int lane) +-{ +- switch (lane) { +- case 0: +- analogix_dp_set_lane0_link_training(dp, training_lane_set); +- break; +- case 1: +- analogix_dp_set_lane1_link_training(dp, training_lane_set); +- break; +- +- case 2: +- analogix_dp_set_lane2_link_training(dp, training_lane_set); +- break; +- +- case 3: +- analogix_dp_set_lane3_link_training(dp, training_lane_set); +- break; +- } +-} +- +-static unsigned int +-analogix_dp_get_lane_link_training(struct analogix_dp_device *dp, +- int lane) +-{ +- u32 reg; +- +- switch (lane) { +- case 0: +- reg = analogix_dp_get_lane0_link_training(dp); +- break; +- case 1: +- reg = analogix_dp_get_lane1_link_training(dp); +- break; +- case 2: +- reg = analogix_dp_get_lane2_link_training(dp); +- break; +- case 3: +- reg = analogix_dp_get_lane3_link_training(dp); +- break; +- default: +- WARN_ON(1); +- return 0; +- } +- +- return reg; +-} +- + static void analogix_dp_reduce_link_rate(struct analogix_dp_device *dp) + { + analogix_dp_training_pattern_dis(dp); +@@ -464,13 +482,27 @@ static void analogix_dp_get_adjust_training_lane(struct analogix_dp_device *dp, + } + } + ++static bool analogix_dp_tps3_supported(struct analogix_dp_device *dp) +{ -+ struct dw_hdmi_qp *hdmi = s->private; -+ u32 i = 0, j = 0, val = 0; -+ -+ if (hdmi->disabled) { -+ dev_err(hdmi->dev, "hdmi is disabled\n"); -+ return -EACCES; -+ } -+ -+ seq_puts(s, "\n---------------------------------------------------"); -+ -+ for (i = 0; i < ARRAY_SIZE(hdmi_reg_table); i++) { -+ for (j = hdmi_reg_table[i].reg_base; -+ j <= hdmi_reg_table[i].reg_end; j += 4) { -+ val = hdmi_readl(hdmi, j); ++ bool source_tps3_supported, sink_tps3_supported; ++ u8 dpcd = 0; + -+ if ((j - hdmi_reg_table[i].reg_base) % 16 == 0) -+ seq_printf(s, "\n>>>hdmi_ctl %04x:", j); -+ seq_printf(s, " %08x", val); -+ } -+ } -+ seq_puts(s, "\n---------------------------------------------------\n"); ++ source_tps3_supported = ++ dp->video_info.max_link_rate == DP_LINK_BW_5_4; ++ drm_dp_dpcd_readb(&dp->aux, DP_MAX_LANE_COUNT, &dpcd); ++ sink_tps3_supported = dpcd & DP_TPS3_SUPPORTED; + -+ return 0; ++ return source_tps3_supported && sink_tps3_supported; +} + -+static int dw_hdmi_ctrl_open(struct inode *inode, struct file *file) -+{ -+ return single_open(file, dw_hdmi_ctrl_show, inode->i_private); -+} + static int analogix_dp_process_clock_recovery(struct analogix_dp_device *dp) + { + int lane, lane_count, retval; + u8 voltage_swing, pre_emphasis, training_lane; + u8 link_status[2], adjust_request[2]; ++ u8 training_pattern = TRAINING_PTN2; + +- usleep_range(100, 101); ++ drm_dp_link_train_clock_recovery_delay(&dp->aux, dp->dpcd); + + lane_count = dp->link_train.lane_count; + +@@ -478,24 +510,30 @@ static int analogix_dp_process_clock_recovery(struct analogix_dp_device *dp) + if (retval < 0) + return retval; + +- retval = drm_dp_dpcd_read(&dp->aux, DP_ADJUST_REQUEST_LANE0_1, +- adjust_request, 2); +- if (retval < 0) +- return retval; +- + if (analogix_dp_clock_recovery_ok(link_status, lane_count) == 0) { +- /* set training pattern 2 for EQ */ +- analogix_dp_set_training_pattern(dp, TRAINING_PTN2); ++ if (analogix_dp_tps3_supported(dp)) ++ training_pattern = TRAINING_PTN3; + -+static ssize_t -+dw_hdmi_ctrl_write(struct file *file, const char __user *buf, -+ size_t count, loff_t *ppos) -+{ -+ struct dw_hdmi_qp *hdmi = -+ ((struct seq_file *)file->private_data)->private; -+ u32 reg, val; -+ char kbuf[25]; ++ /* set training pattern for EQ */ ++ analogix_dp_set_training_pattern(dp, training_pattern); + + retval = drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET, + DP_LINK_SCRAMBLING_DISABLE | +- DP_TRAINING_PATTERN_2); ++ (training_pattern == TRAINING_PTN3 ? ++ DP_TRAINING_PATTERN_3 : DP_TRAINING_PATTERN_2)); + if (retval < 0) + return retval; + + dev_dbg(dp->dev, "Link Training Clock Recovery success\n"); + dp->link_train.lt_state = EQUALIZER_TRAINING; + -+ if (hdmi->disabled) { -+ dev_err(hdmi->dev, "hdmi is disabled\n"); -+ return -EACCES; -+ } ++ return 0; + } else { ++ retval = drm_dp_dpcd_read(&dp->aux, DP_ADJUST_REQUEST_LANE0_1, ++ adjust_request, 2); ++ if (retval < 0) ++ return retval; + -+ if (count > 24) { -+ dev_err(hdmi->dev, "out of buf range\n"); -+ return count; -+ } + for (lane = 0; lane < lane_count; lane++) { + training_lane = analogix_dp_get_lane_link_training( + dp, lane); +@@ -523,10 +561,7 @@ static int analogix_dp_process_clock_recovery(struct analogix_dp_device *dp) + } + + analogix_dp_get_adjust_training_lane(dp, adjust_request); +- +- for (lane = 0; lane < lane_count; lane++) +- analogix_dp_set_lane_link_training(dp, +- dp->link_train.training_lane[lane], lane); ++ analogix_dp_set_lane_link_training(dp); + + retval = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_LANE0_SET, + dp->link_train.training_lane, lane_count); +@@ -538,11 +573,11 @@ static int analogix_dp_process_clock_recovery(struct analogix_dp_device *dp) + + static int analogix_dp_process_equalizer_training(struct analogix_dp_device *dp) + { +- int lane, lane_count, retval; ++ int lane_count, retval; + u32 reg; + u8 link_align, link_status[2], adjust_request[2]; + +- usleep_range(400, 401); ++ drm_dp_link_train_channel_eq_delay(&dp->aux, dp->dpcd); + + lane_count = dp->link_train.lane_count; + +@@ -555,18 +590,11 @@ static int analogix_dp_process_equalizer_training(struct analogix_dp_device *dp) + return -EIO; + } + +- retval = drm_dp_dpcd_read(&dp->aux, DP_ADJUST_REQUEST_LANE0_1, +- adjust_request, 2); +- if (retval < 0) +- return retval; +- + retval = drm_dp_dpcd_readb(&dp->aux, DP_LANE_ALIGN_STATUS_UPDATED, + &link_align); + if (retval < 0) + return retval; + +- analogix_dp_get_adjust_training_lane(dp, adjust_request); +- + if (!analogix_dp_channel_eq_ok(link_status, link_align, lane_count)) { + /* traing pattern Set to Normal */ + retval = analogix_dp_training_pattern_dis(dp); +@@ -598,9 +626,13 @@ static int analogix_dp_process_equalizer_training(struct analogix_dp_device *dp) + return -EIO; + } + +- for (lane = 0; lane < lane_count; lane++) +- analogix_dp_set_lane_link_training(dp, +- dp->link_train.training_lane[lane], lane); ++ retval = drm_dp_dpcd_read(&dp->aux, DP_ADJUST_REQUEST_LANE0_1, ++ adjust_request, 2); ++ if (retval < 0) ++ return retval; + -+ if (copy_from_user(kbuf, buf, count)) -+ return -EFAULT; -+ kbuf[count - 1] = '\0'; ++ analogix_dp_get_adjust_training_lane(dp, adjust_request); ++ analogix_dp_set_lane_link_training(dp); + + retval = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_LANE0_SET, + dp->link_train.training_lane, lane_count); +@@ -610,10 +642,11 @@ static int analogix_dp_process_equalizer_training(struct analogix_dp_device *dp) + return 0; + } + +-static void analogix_dp_get_max_rx_bandwidth(struct analogix_dp_device *dp, +- u8 *bandwidth) ++static int analogix_dp_get_max_rx_bandwidth(struct analogix_dp_device *dp, ++ u8 *bandwidth) + { + u8 data; ++ int ret; + + /* + * For DP rev.1.1, Maximum link rate of Main Link lanes +@@ -621,28 +654,41 @@ static void analogix_dp_get_max_rx_bandwidth(struct analogix_dp_device *dp, + * For DP rev.1.2, Maximum link rate of Main Link lanes + * 0x06 = 1.62 Gbps, 0x0a = 2.7 Gbps, 0x14 = 5.4Gbps + */ +- drm_dp_dpcd_readb(&dp->aux, DP_MAX_LINK_RATE, &data); ++ ret = drm_dp_dpcd_readb(&dp->aux, DP_MAX_LINK_RATE, &data); ++ if (ret < 0) ++ return ret; + -+ if (sscanf(kbuf, "%x %x", ®, &val) == -1) -+ return -EFAULT; -+ if (reg > EARCRX_1_INT_FORCE) { -+ dev_err(hdmi->dev, "it is no a hdmi register\n"); -+ return count; -+ } -+ dev_info(hdmi->dev, "/**********hdmi register config******/"); -+ dev_info(hdmi->dev, "\n reg=%x val=%x\n", reg, val); -+ hdmi_writel(hdmi, val, reg); -+ return count; -+} + *bandwidth = data; + -+static const struct file_operations dw_hdmi_ctrl_fops = { -+ .owner = THIS_MODULE, -+ .open = dw_hdmi_ctrl_open, -+ .read = seq_read, -+ .write = dw_hdmi_ctrl_write, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; ++ return 0; + } + +-static void analogix_dp_get_max_rx_lane_count(struct analogix_dp_device *dp, +- u8 *lane_count) ++static int analogix_dp_get_max_rx_lane_count(struct analogix_dp_device *dp, ++ u8 *lane_count) + { + u8 data; ++ int ret; + + /* + * For DP rev.1.1, Maximum number of Main Link lanes + * 0x01 = 1 lane, 0x02 = 2 lanes, 0x04 = 4 lanes + */ +- drm_dp_dpcd_readb(&dp->aux, DP_MAX_LANE_COUNT, &data); ++ ret = drm_dp_dpcd_readb(&dp->aux, DP_MAX_LANE_COUNT, &data); ++ if (ret < 0) ++ return ret; + -+static int dw_hdmi_status_show(struct seq_file *s, void *v) -+{ -+ struct dw_hdmi_qp *hdmi = s->private; -+ u32 val; + *lane_count = DPCD_MAX_LANE_COUNT(data); + -+ seq_puts(s, "PHY: "); -+ if (hdmi->disabled) { -+ seq_puts(s, "disabled\n"); -+ return 0; ++ return 0; + } + + static int analogix_dp_full_link_train(struct analogix_dp_device *dp, + u32 max_lanes, u32 max_rate) + { ++ struct video_info *video = &dp->video_info; + int retval = 0; + bool training_finished = false; ++ u8 dpcd; + + /* + * MACRO_RST must be applied after the PLL_LOCK to avoid +@@ -654,25 +700,19 @@ static int analogix_dp_full_link_train(struct analogix_dp_device *dp, + analogix_dp_get_max_rx_bandwidth(dp, &dp->link_train.link_rate); + analogix_dp_get_max_rx_lane_count(dp, &dp->link_train.lane_count); + +- if ((dp->link_train.link_rate != DP_LINK_BW_1_62) && +- (dp->link_train.link_rate != DP_LINK_BW_2_7) && +- (dp->link_train.link_rate != DP_LINK_BW_5_4)) { +- dev_err(dp->dev, "Rx Max Link Rate is abnormal :%x !\n", +- dp->link_train.link_rate); +- dp->link_train.link_rate = DP_LINK_BW_1_62; +- } ++ /* Setup TX lane count & rate */ ++ dp->link_train.lane_count = min_t(u32, dp->link_train.lane_count, max_lanes); ++ dp->link_train.link_rate = min_t(u32, dp->link_train.link_rate, max_rate); + +- if (dp->link_train.lane_count == 0) { +- dev_err(dp->dev, "Rx Max Lane count is abnormal :%x !\n", +- dp->link_train.lane_count); +- dp->link_train.lane_count = (u8)LANE_COUNT1; ++ if (!analogix_dp_bandwidth_ok(dp, &video->mode, ++ drm_dp_bw_code_to_link_rate(dp->link_train.link_rate), ++ dp->link_train.lane_count)) { ++ dev_err(dp->dev, "bandwidth overflow\n"); ++ return -EINVAL; + } + +- /* Setup TX lane count & rate */ +- if (dp->link_train.lane_count > max_lanes) +- dp->link_train.lane_count = max_lanes; +- if (dp->link_train.link_rate > max_rate) +- dp->link_train.link_rate = max_rate; ++ drm_dp_dpcd_readb(&dp->aux, DP_MAX_DOWNSPREAD, &dpcd); ++ dp->link_train.ssc = !!(dpcd & DP_MAX_DOWNSPREAD_0_5); + + /* All DP analog module power up */ + analogix_dp_set_analog_power_down(dp, POWER_ALL, 0); +@@ -712,27 +752,15 @@ static int analogix_dp_full_link_train(struct analogix_dp_device *dp, + + static int analogix_dp_fast_link_train(struct analogix_dp_device *dp) + { +- int i, ret; ++ int ret; + u8 link_align, link_status[2]; +- enum pll_status status; + + analogix_dp_reset_macro(dp); + + analogix_dp_set_link_bandwidth(dp, dp->link_train.link_rate); + analogix_dp_set_lane_count(dp, dp->link_train.lane_count); +- +- for (i = 0; i < dp->link_train.lane_count; i++) { +- analogix_dp_set_lane_link_training(dp, +- dp->link_train.training_lane[i], i); +- } +- +- ret = readx_poll_timeout(analogix_dp_get_pll_lock_status, dp, status, +- status != PLL_UNLOCKED, 120, +- 120 * DP_TIMEOUT_LOOP_COUNT); +- if (ret) { +- DRM_DEV_ERROR(dp->dev, "Wait for pll lock failed %d\n", ret); +- return ret; +- } ++ analogix_dp_set_lane_link_training(dp); ++ analogix_dp_enable_enhanced_mode(dp, dp->link_train.enhanced_framing); + + /* source Set training pattern 1 */ + analogix_dp_set_training_pattern(dp, TRAINING_PTN1); +@@ -743,7 +771,6 @@ static int analogix_dp_fast_link_train(struct analogix_dp_device *dp) + /* From DP spec, pattern must be on-screen for a minimum 500us */ + usleep_range(500, 600); + +- /* TODO: enhanced_mode?*/ + analogix_dp_set_training_pattern(dp, DP_NONE); + + /* +@@ -823,7 +850,7 @@ static int analogix_dp_config_video(struct analogix_dp_device *dp) + analogix_dp_set_video_cr_mn(dp, CALCULATED_M, 0, 0); + + /* For video bist, Video timing must be generated by register */ +- analogix_dp_set_video_timing_mode(dp, VIDEO_TIMING_FROM_CAPTURE); ++ analogix_dp_set_video_timing_mode(dp, VIDEO_TIMING_FROM_REGISTER); + + /* Disable video mute */ + analogix_dp_enable_video_mute(dp, 0); +@@ -885,123 +912,309 @@ static int analogix_dp_enable_scramble(struct analogix_dp_device *dp, + return ret < 0 ? ret : 0; + } + +-static irqreturn_t analogix_dp_hardirq(int irq, void *arg) ++static u8 analogix_dp_autotest_phy_pattern(struct analogix_dp_device *dp) + { +- struct analogix_dp_device *dp = arg; +- irqreturn_t ret = IRQ_NONE; +- enum dp_irq_type irq_type; ++ struct drm_dp_phy_test_params *data = &dp->compliance.phytest; + +- irq_type = analogix_dp_get_irq_type(dp); +- if (irq_type != DP_IRQ_TYPE_UNKNOWN) { +- analogix_dp_mute_hpd_interrupt(dp); +- ret = IRQ_WAKE_THREAD; ++ if (drm_dp_get_phy_test_pattern(&dp->aux, data)) { ++ dev_err(dp->dev, "DP Phy Test pattern AUX read failure\n"); ++ return DP_TEST_NAK; + } + +- return ret; ++ if (data->link_rate > drm_dp_bw_code_to_link_rate(dp->video_info.max_link_rate)) { ++ dev_err(dp->dev, "invalid link rate = 0x%x\n", data->link_rate); ++ return DP_TEST_NAK; + } -+ seq_puts(s, "enabled\t\t\tMode: "); -+ if (hdmi->sink_is_hdmi) -+ seq_puts(s, "HDMI\n"); -+ else -+ seq_puts(s, "DVI\n"); + -+ if (hdmi->hdmi_data.video_mode.mpixelclock > 600000000) { -+ seq_printf(s, "FRL Mode Pixel Clk: %luHz\n", -+ hdmi->hdmi_data.video_mode.mpixelclock); -+ } else { -+ if (hdmi->hdmi_data.video_mode.mtmdsclock > 340000000) -+ val = hdmi->hdmi_data.video_mode.mtmdsclock / 4; -+ else -+ val = hdmi->hdmi_data.video_mode.mtmdsclock; -+ seq_printf(s, "TMDS Mode Pixel Clk: %luHz\t\tTMDS Clk: %uHz\n", -+ hdmi->hdmi_data.video_mode.mpixelclock, val); -+ } -+ seq_printf(s, "ALLM: %d\n", hdmi->allm_enable); -+ seq_puts(s, "Color Format: "); -+ if (hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_out_bus_format)) -+ seq_puts(s, "RGB"); -+ else if (hdmi_bus_fmt_is_yuv444(hdmi->hdmi_data.enc_out_bus_format)) -+ seq_puts(s, "YUV444"); -+ else if (hdmi_bus_fmt_is_yuv422(hdmi->hdmi_data.enc_out_bus_format)) -+ seq_puts(s, "YUV422"); -+ else if (hdmi_bus_fmt_is_yuv420(hdmi->hdmi_data.enc_out_bus_format)) -+ seq_puts(s, "YUV420"); -+ else -+ seq_puts(s, "UNKNOWN"); -+ val = hdmi_bus_fmt_color_depth(hdmi->hdmi_data.enc_out_bus_format); -+ seq_printf(s, "\t\tColor Depth: %d bit\n", val); -+ seq_puts(s, "Colorimetry: "); -+ switch (hdmi->hdmi_data.enc_out_encoding) { -+ case V4L2_YCBCR_ENC_601: -+ seq_puts(s, "ITU.BT601"); -+ break; -+ case V4L2_YCBCR_ENC_709: -+ seq_puts(s, "ITU.BT709"); -+ break; -+ case V4L2_YCBCR_ENC_BT2020: -+ seq_puts(s, "ITU.BT2020"); ++ /* Set test active flag here so userspace doesn't interrupt things */ ++ dp->compliance.test_active = true; ++ ++ return DP_TEST_ACK; + } + +-static irqreturn_t analogix_dp_irq_thread(int irq, void *arg) ++static void analogix_dp_handle_test_request(struct analogix_dp_device *dp) + { +- struct analogix_dp_device *dp = arg; +- enum dp_irq_type irq_type; ++ u8 response = DP_TEST_NAK; ++ u8 request = 0; ++ int ret; + +- irq_type = analogix_dp_get_irq_type(dp); +- if (irq_type & DP_IRQ_TYPE_HP_CABLE_IN || +- irq_type & DP_IRQ_TYPE_HP_CABLE_OUT) { +- dev_dbg(dp->dev, "Detected cable status changed!\n"); +- if (dp->drm_dev) +- drm_helper_hpd_irq_event(dp->drm_dev); ++ ret = drm_dp_dpcd_readb(&dp->aux, DP_TEST_REQUEST, &request); ++ if (ret < 0) { ++ dev_err(dp->dev, "Could not read test request from sink\n"); ++ goto update_status; + } + +- if (irq_type != DP_IRQ_TYPE_UNKNOWN) { +- analogix_dp_clear_hotplug_interrupts(dp); +- analogix_dp_unmute_hpd_interrupt(dp); ++ switch (request) { ++ case DP_TEST_LINK_PHY_TEST_PATTERN: ++ dev_info(dp->dev, "PHY_PATTERN test requested\n"); ++ response = analogix_dp_autotest_phy_pattern(dp); + break; -+ default: /* Carries no data */ -+ seq_puts(s, "ITU.BT601"); ++ default: ++ dev_err(dp->dev, "Invalid test request '%02x'\n", request); + break; -+ } + } + +- return IRQ_HANDLED; ++ if (response & DP_TEST_ACK) ++ dp->compliance.test_type = request; + -+ seq_puts(s, "\t\tEOTF: "); ++update_status: ++ ret = drm_dp_dpcd_writeb(&dp->aux, DP_TEST_RESPONSE, response); ++ if (ret < 0) ++ dev_err(dp->dev, "Could not write test response to sink\n"); + } + +-static int analogix_dp_fast_link_train_detection(struct analogix_dp_device *dp) ++void analogix_dp_check_device_service_irq(struct analogix_dp_device *dp) + { ++ u8 val; + int ret; +- u8 spread; + +- ret = drm_dp_dpcd_readb(&dp->aux, DP_MAX_DOWNSPREAD, &spread); +- if (ret != 1) { +- dev_err(dp->dev, "failed to read downspread %d\n", ret); +- return ret; +- } +- dp->fast_train_enable = !!(spread & DP_NO_AUX_HANDSHAKE_LINK_TRAINING); +- dev_dbg(dp->dev, "fast link training %s\n", +- dp->fast_train_enable ? "supported" : "unsupported"); +- return 0; ++ ret = drm_dp_dpcd_readb(&dp->aux, DP_DEVICE_SERVICE_IRQ_VECTOR, &val); ++ if (ret < 0 || !val) ++ return; + -+ val = hdmi_readl(hdmi, PKTSCHED_PKT_EN); -+ if (!(val & PKTSCHED_DRMI_TX_EN)) { -+ seq_puts(s, "Off\n"); -+ return 0; -+ } ++ ret = drm_dp_dpcd_writeb(&dp->aux, DP_DEVICE_SERVICE_IRQ_VECTOR, val); ++ if (ret < 0) ++ return; + -+ val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS1); -+ val = (val >> 8) & 0x7; -+ switch (val) { -+ case HDMI_EOTF_TRADITIONAL_GAMMA_SDR: -+ seq_puts(s, "SDR"); ++ if (val & DP_AUTOMATED_TEST_REQUEST) ++ analogix_dp_handle_test_request(dp); + } ++EXPORT_SYMBOL_GPL(analogix_dp_check_device_service_irq); + +-static int analogix_dp_commit(struct analogix_dp_device *dp) ++static void analogix_dp_process_phy_request(struct analogix_dp_device *dp) + { ++ struct drm_dp_phy_test_params *data = &dp->compliance.phytest; ++ u8 spread, adjust_request[2]; + int ret; + +- /* Keep the panel disabled while we configure video */ +- if (dp->plat_data->panel) { +- if (drm_panel_disable(dp->plat_data->panel)) +- DRM_ERROR("failed to disable the panel\n"); +- } +- +- ret = analogix_dp_train_link(dp); +- if (ret) { +- dev_err(dp->dev, "unable to do link train, ret=%d\n", ret); +- return ret; +- } ++ dp->link_train.link_rate = drm_dp_link_rate_to_bw_code(data->link_rate); ++ dp->link_train.lane_count = data->num_lanes; + +- ret = analogix_dp_enable_scramble(dp, 1); ++ ret = drm_dp_dpcd_readb(&dp->aux, DP_MAX_DOWNSPREAD, &spread); + if (ret < 0) { +- dev_err(dp->dev, "can not enable scramble\n"); +- return ret; ++ dev_err(dp->dev, "Could not read ssc from sink\n"); ++ return; + } + +- analogix_dp_init_video(dp); +- ret = analogix_dp_config_video(dp); +- if (ret) { +- dev_err(dp->dev, "unable to config video\n"); +- return ret; +- } ++ dp->link_train.ssc = !!(spread & DP_MAX_DOWNSPREAD_0_5); + +- /* Safe to enable the panel now */ +- if (dp->plat_data->panel) { +- ret = drm_panel_enable(dp->plat_data->panel); +- if (ret) { +- DRM_ERROR("failed to enable the panel\n"); +- return ret; +- } ++ ret = drm_dp_dpcd_read(&dp->aux, DP_ADJUST_REQUEST_LANE0_1, ++ adjust_request, 2); ++ if (ret < 0) { ++ dev_err(dp->dev, "Could not read swing/pre-emphasis\n"); ++ return; + } + +- /* Check whether panel supports fast training */ +- ret = analogix_dp_fast_link_train_detection(dp); +- if (ret) +- return ret; ++ analogix_dp_set_link_bandwidth(dp, dp->link_train.link_rate); ++ analogix_dp_set_lane_count(dp, dp->link_train.lane_count); ++ analogix_dp_get_adjust_training_lane(dp, adjust_request); ++ analogix_dp_set_lane_link_training(dp); + +- if (analogix_dp_detect_sink_psr(dp)) { +- ret = analogix_dp_enable_sink_psr(dp); +- if (ret) +- return ret; ++ switch (data->phy_pattern) { ++ case DP_PHY_TEST_PATTERN_NONE: ++ dev_info(dp->dev, "Disable Phy Test Pattern\n"); ++ analogix_dp_set_training_pattern(dp, DP_NONE); + break; -+ case HDMI_EOTF_TRADITIONAL_GAMMA_HDR: -+ seq_puts(s, "HDR"); ++ case DP_PHY_TEST_PATTERN_D10_2: ++ dev_info(dp->dev, "Set D10.2 Phy Test Pattern\n"); ++ analogix_dp_set_training_pattern(dp, D10_2); + break; -+ case HDMI_EOTF_SMPTE_ST2084: -+ seq_puts(s, "ST2084"); ++ case DP_PHY_TEST_PATTERN_PRBS7: ++ dev_info(dp->dev, "Set PRBS7 Phy Test Pattern\n"); ++ analogix_dp_set_training_pattern(dp, PRBS7); + break; -+ case HDMI_EOTF_BT_2100_HLG: -+ seq_puts(s, "HLG"); ++ case DP_PHY_TEST_PATTERN_80BIT_CUSTOM: ++ dev_info(dp->dev, "Set 80Bit Custom Phy Test Pattern\n"); ++ analogix_dp_set_training_pattern(dp, TEST_PATTERN_80BIT); ++ break; ++ case DP_PHY_TEST_PATTERN_CP2520: ++ dev_info(dp->dev, "Set HBR2 compliance Phy Test Pattern\n"); ++ analogix_dp_set_training_pattern(dp, TEST_PATTERN_HBR2); + break; + default: -+ seq_puts(s, "Not Defined\n"); -+ return 0; ++ dev_err(dp->dev, "Invalid Phy Test Pattern: %d\n", data->phy_pattern); ++ return; + } + +- return ret; ++ drm_dp_set_phy_test_pattern(&dp->aux, data, 0x11); + } + +-static int analogix_dp_enable_psr(struct analogix_dp_device *dp) ++void analogix_dp_phy_test(struct analogix_dp_device *dp) + { +- struct dp_sdp psr_vsc; ++ struct drm_device *dev = dp->drm_dev; ++ struct drm_modeset_acquire_ctx ctx; + int ret; +- u8 sink; + +- ret = drm_dp_dpcd_readb(&dp->aux, DP_PSR_STATUS, &sink); +- if (ret != 1) +- DRM_DEV_ERROR(dp->dev, "Failed to read psr status %d\n", ret); +- else if (sink == DP_PSR_SINK_ACTIVE_RFB) +- return 0; ++ DRM_DEV_INFO(dp->dev, "PHY test\n"); + +- /* Prepare VSC packet as per EDP 1.4 spec, Table 6.9 */ ++ drm_modeset_acquire_init(&ctx, 0); ++ for (;;) { ++ ret = drm_modeset_lock(&dev->mode_config.connection_mutex, &ctx); ++ if (ret != -EDEADLK) ++ break; ++ ++ drm_modeset_backoff(&ctx); + } + -+ val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS1); -+ val = (val >> 16) & 0xffff; -+ seq_printf(s, "\nx0: %d", val); -+ val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS2); -+ val = val & 0xffff; -+ seq_printf(s, "\t\t\t\ty0: %d\n", val); -+ val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS2); -+ val = (val >> 16) & 0xffff; -+ seq_printf(s, "x1: %d", val); -+ val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS3); -+ val = val & 0xffff; -+ seq_printf(s, "\t\t\t\ty1: %d\n", val); -+ val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS3); -+ val = (val >> 16) & 0xffff; -+ seq_printf(s, "x2: %d", val); -+ val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS4); -+ val = val & 0xffff; -+ seq_printf(s, "\t\t\t\ty2: %d\n", val); -+ val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS4); -+ val = (val >> 16) & 0xffff; -+ seq_printf(s, "white x: %d", val); -+ val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS5); -+ val = val & 0xffff; -+ seq_printf(s, "\t\t\twhite y: %d\n", val); -+ val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS5); -+ val = (val >> 16) & 0xffff; -+ seq_printf(s, "max lum: %d", val); -+ val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS6); -+ val = val & 0xffff; -+ seq_printf(s, "\t\t\tmin lum: %d\n", val); -+ val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS6); -+ val = (val >> 16) & 0xffff; -+ seq_printf(s, "max cll: %d", val); -+ val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS7); -+ val = val & 0xffff; -+ seq_printf(s, "\t\t\tmax fall: %d\n", val); -+ return 0; ++ analogix_dp_process_phy_request(dp); ++ drm_modeset_drop_locks(&ctx); ++ drm_modeset_acquire_fini(&ctx); +} ++EXPORT_SYMBOL_GPL(analogix_dp_phy_test); + -+static int dw_hdmi_status_open(struct inode *inode, struct file *file) ++static irqreturn_t analogix_dp_hpd_irq_handler(int irq, void *arg) +{ -+ return single_open(file, dw_hdmi_status_show, inode->i_private); -+} ++ struct analogix_dp_device *dp = arg; + -+static const struct file_operations dw_hdmi_status_fops = { -+ .owner = THIS_MODULE, -+ .open = dw_hdmi_status_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; ++ if (dp->drm_dev) ++ drm_helper_hpd_irq_event(dp->drm_dev); + -+static void dw_hdmi_register_debugfs(struct device *dev, struct dw_hdmi_qp *hdmi) ++ return IRQ_HANDLED; ++} ++ ++static irqreturn_t analogix_dp_irq_thread(int irq, void *arg) +{ -+ u8 buf[11]; ++ struct analogix_dp_device *dp = arg; + -+ snprintf(buf, sizeof(buf), "dw-hdmi%d", hdmi->plat_data->id); -+ hdmi->debugfs_dir = debugfs_create_dir(buf, NULL); -+ if (IS_ERR(hdmi->debugfs_dir)) { -+ dev_err(dev, "failed to create debugfs dir!\n"); -+ return; -+ } ++ analogix_dp_irq_handler(dp); + -+ debugfs_create_file("status", 0400, hdmi->debugfs_dir, -+ hdmi, &dw_hdmi_status_fops); -+ debugfs_create_file("ctrl", 0600, hdmi->debugfs_dir, -+ hdmi, &dw_hdmi_ctrl_fops); ++ return IRQ_HANDLED; +} + -+static void dw_hdmi_qp_hdcp14_get_mem(struct dw_hdmi_qp *hdmi, u8 *data, u32 len) ++static int analogix_dp_fast_link_train_detection(struct analogix_dp_device *dp) +{ -+ u32 ksv_len, i, val; -+ void *hdmi_data = hdmi->plat_data->phy_data; -+ -+ if (hdmi->plat_data->set_hdcp14_mem) -+ hdmi->plat_data->set_hdcp14_mem(hdmi_data, true); -+ -+ ksv_len = len - BSTATUS_LEN - M0_LEN - SHAMAX; -+ for (i = 0; i < len; i++) { -+ /* read ksv list */ -+ if (i < ksv_len) -+ val = readl(hdmi->hdcp14_mem + HDMI_HDCP14_MEM_KSV0 + i * 4); -+ /* read bstatus */ -+ else if (i < len - SHAMAX - M0_LEN) -+ val = readl(hdmi->hdcp14_mem + HDMI_HDCP14_MEM_BSTATUS0 + -+ (i - ksv_len) * 4); -+ /* read M0 */ -+ else if (i < len - SHAMAX) -+ val = readl(hdmi->hdcp14_mem + HDMI_HDCP14_MEM_M0_1 + -+ (i - ksv_len - BSTATUS_LEN) * 4); -+ else -+ /* VH0 save in external memory is error, we need to read VH0 via ddc */ -+ hdcp_ddc_read(hdmi->ddc, HDMI_HDCP_ADDR, HDMI_VH0 + i - (len - SHAMAX), -+ &val); ++ int ret; ++ u8 spread; + -+ data[i] = val; ++ ret = drm_dp_dpcd_readb(&dp->aux, DP_MAX_DOWNSPREAD, &spread); ++ if (ret != 1) { ++ dev_err(dp->dev, "failed to read downspread %d\n", ret); ++ return ret; + } -+ -+ if (hdmi->plat_data->set_hdcp14_mem) -+ hdmi->plat_data->set_hdcp14_mem(hdmi_data, false); ++ dp->fast_train_enable = !!(spread & DP_NO_AUX_HANDSHAKE_LINK_TRAINING); ++ dev_dbg(dp->dev, "fast link training %s\n", ++ dp->fast_train_enable ? "supported" : "unsupported"); ++ return 0; +} + -+static int dw_hdmi_qp_register_hdcp(struct device *dev, -+ struct dw_hdmi_qp *hdmi) ++static int analogix_dp_link_power_up(struct analogix_dp_device *dp) +{ -+ struct dw_qp_hdcp hdmi_hdcp = { -+ .hdmi = hdmi, -+ .write = hdmi_writel, -+ .read = hdmi_readl, -+ .regs = hdmi->regs, -+ .get_mem = dw_hdmi_qp_hdcp14_get_mem, -+ }; -+ struct platform_device_info hdcp_device_info = { -+ .parent = dev, -+ .id = PLATFORM_DEVID_AUTO, -+ .res = NULL, -+ .num_res = 0, -+ .name = DW_HDCP_QP_DRIVER_NAME, -+ .data = &hdmi_hdcp, -+ .size_data = sizeof(hdmi_hdcp), -+ .dma_mask = DMA_BIT_MASK(32), -+ }; -+ hdmi->hdcp_dev = platform_device_register_full(&hdcp_device_info); -+ if (IS_ERR(hdmi->hdcp_dev)) { -+ dev_err(dev, "failed to register hdcp!\n"); -+ return -ENOMEM; -+ } ++ u8 value; ++ int ret; + -+ hdmi->hdcp = hdmi->hdcp_dev->dev.platform_data; ++ if (dp->dpcd[DP_DPCD_REV] < 0x11) ++ return 0; ++ ++ ret = drm_dp_dpcd_readb(&dp->aux, DP_SET_POWER, &value); ++ if (ret < 0) ++ return ret; ++ ++ value &= ~DP_SET_POWER_MASK; ++ value |= DP_SET_POWER_D0; ++ ++ ret = drm_dp_dpcd_writeb(&dp->aux, DP_SET_POWER, value); ++ if (ret < 0) ++ return ret; ++ ++ usleep_range(1000, 2000); + + return 0; +} + -+static struct dw_hdmi_qp * -+__dw_hdmi_probe(struct platform_device *pdev, -+ const struct dw_hdmi_plat_data *plat_data) ++static int analogix_dp_link_power_down(struct analogix_dp_device *dp) +{ -+ struct device *dev = &pdev->dev; -+ struct device_node *np = dev->of_node; -+ struct device_node *ddc_node; -+ struct dw_hdmi_qp *hdmi; -+ struct dw_hdmi_qp_i2s_audio_data audio; -+ struct platform_device_info pdevinfo; -+ struct dw_hdmi_qp_cec_data cec; -+ struct resource *iores = NULL; -+ struct drm_panel *panel = NULL; -+ struct drm_bridge *bridge = NULL; -+ int irq; ++ u8 value; + int ret; + -+ ret = drm_of_find_panel_or_bridge(np, 1, -1, &panel, &bridge); -+ if (ret < 0 && ret != -ENODEV) -+ return ERR_PTR(ret); ++ if (dp->dpcd[DP_DPCD_REV] < 0x11) ++ return 0; + -+ hdmi = devm_kzalloc(dev, sizeof(*hdmi), GFP_KERNEL); -+ if (!hdmi) -+ return ERR_PTR(-ENOMEM); ++ ret = drm_dp_dpcd_readb(&dp->aux, DP_SET_POWER, &value); ++ if (ret < 0) ++ return ret; + -+ hdmi->panel = panel; -+ hdmi->next_bridge = bridge; -+ hdmi->connector.stereo_allowed = 1; -+ hdmi->plat_data = plat_data; -+ hdmi->dev = dev; -+ hdmi->sample_rate = 48000; -+ hdmi->disabled = true; ++ value &= ~DP_SET_POWER_MASK; ++ value |= DP_SET_POWER_D3; + -+ mutex_init(&hdmi->mutex); -+ mutex_init(&hdmi->audio_mutex); -+ mutex_init(&hdmi->cec_notifier_mutex); ++ ret = drm_dp_dpcd_writeb(&dp->aux, DP_SET_POWER, value); ++ if (ret < 0) ++ return ret; + -+ ddc_node = of_parse_phandle(np, "ddc-i2c-bus", 0); -+ if (ddc_node) { -+ hdmi->ddc = of_get_i2c_adapter_by_node(ddc_node); -+ of_node_put(ddc_node); -+ if (!hdmi->ddc) { -+ dev_dbg(hdmi->dev, "failed to read ddc node\n"); -+ return ERR_PTR(-EPROBE_DEFER); -+ } ++ return 0; ++} + -+ } else { -+ dev_dbg(hdmi->dev, "no ddc property found\n"); ++static int analogix_dp_commit(struct analogix_dp_device *dp) ++{ ++ struct video_info *video = &dp->video_info; ++ int ret; ++ ++ ret = drm_dp_read_dpcd_caps(&dp->aux, dp->dpcd); ++ if (ret < 0) { ++ dev_err(dp->dev, "failed to read dpcd caps: %d\n", ret); ++ return ret; + } + -+ if (!plat_data->regm) { -+ const struct regmap_config *reg_config; ++ ret = analogix_dp_link_power_up(dp); ++ if (ret) { ++ dev_err(dp->dev, "failed to power up link: %d\n", ret); ++ return ret; ++ } + -+ reg_config = &hdmi_regmap_config; ++ if (device_property_read_bool(dp->dev, "panel-self-test")) ++ return drm_dp_dpcd_writeb(&dp->aux, DP_EDP_CONFIGURATION_SET, ++ DP_PANEL_SELF_TEST_ENABLE); + -+ iores = platform_get_resource(pdev, IORESOURCE_MEM, 0); -+ hdmi->regs = devm_ioremap_resource(dev, iores); -+ if (IS_ERR(hdmi->regs)) { -+ ret = PTR_ERR(hdmi->regs); -+ goto err_ddc; -+ } ++ ret = analogix_dp_train_link(dp); ++ if (ret) { ++ dev_err(dp->dev, "unable to do link train, ret=%d\n", ret); ++ return ret; ++ } + -+ hdmi->regm = devm_regmap_init_mmio(dev, hdmi->regs, reg_config); -+ if (IS_ERR(hdmi->regm)) { -+ dev_err(dev, "Failed to configure regmap\n"); -+ ret = PTR_ERR(hdmi->regm); -+ goto err_ddc; -+ } -+ } else { -+ hdmi->regm = plat_data->regm; ++ ret = analogix_dp_enable_scramble(dp, 1); ++ if (ret < 0) { ++ dev_err(dp->dev, "can not enable scramble\n"); ++ return ret; + } + -+ ret = dw_hdmi_detect_phy(hdmi); -+ if (ret < 0) -+ goto err_ddc; ++ analogix_dp_init_video(dp); ++ analogix_dp_set_video_format(dp); + -+ hdmi_writel(hdmi, 0, MAINUNIT_0_INT_MASK_N); -+ hdmi_writel(hdmi, 0, MAINUNIT_1_INT_MASK_N); -+ hdmi_writel(hdmi, 428571429, TIMER_BASE_CONFIG0); -+ hdmi->logo_plug_out = false; -+ if (hdmi->phy.ops->read_hpd(hdmi, hdmi->phy.data) == connector_status_connected && -+ hdmi_readl(hdmi, I2CM_INTERFACE_CONTROL0)) { -+ hdmi->initialized = true; -+ hdmi->disabled = false; ++ if (video->video_bist_enable) ++ analogix_dp_video_bist_enable(dp); ++ ++ ret = analogix_dp_config_video(dp); ++ if (ret) { ++ dev_err(dp->dev, "unable to config video\n"); ++ return ret; + } + -+ hdmi->sink_is_hdmi = true; ++ /* Check whether panel supports fast training */ ++ ret = analogix_dp_fast_link_train_detection(dp); ++ if (ret) ++ return ret; + -+ /* If DDC bus is not specified, try to register HDMI I2C bus */ -+ if (!hdmi->ddc) { -+ hdmi->ddc = dw_hdmi_i2c_adapter(hdmi); -+ if (IS_ERR(hdmi->ddc)) -+ hdmi->ddc = NULL; -+ /* -+ * Read high and low time from device tree. If not available use -+ * the default timing scl clock rate is about 99.6KHz. -+ */ -+ if (of_property_read_u32(np, "ddc-i2c-scl-high-time-ns", -+ &hdmi->i2c->scl_high_ns)) -+ hdmi->i2c->scl_high_ns = 4708; -+ if (of_property_read_u32(np, "ddc-i2c-scl-low-time-ns", -+ &hdmi->i2c->scl_low_ns)) -+ hdmi->i2c->scl_low_ns = 4916; ++ if (analogix_dp_detect_sink_psr(dp)) { ++ ret = analogix_dp_enable_sink_psr(dp); ++ if (ret) ++ return ret; + } + -+ /* Reset HDMI DDC I2C master controller and mute I2CM interrupts */ -+ if (hdmi->i2c) -+ dw_hdmi_i2c_init(hdmi); -+ -+ init_completion(&hdmi->flt_cmp); -+ init_completion(&hdmi->earc_cmp); ++ return ret; ++} + -+ if (of_property_read_bool(np, "scramble-low-rates")) -+ hdmi->scramble_low_rates = true; ++static int analogix_dp_enable_psr(struct analogix_dp_device *dp) ++{ ++ struct dp_sdp psr_vsc; ++ int ret; ++ u8 sink; + -+ hdmi_init_clk_regenerator(hdmi); ++ ret = drm_dp_dpcd_readb(&dp->aux, DP_PSR_STATUS, &sink); ++ if (ret != 1) ++ DRM_DEV_ERROR(dp->dev, "Failed to read psr status %d\n", ret); ++ else if (sink == DP_PSR_SINK_ACTIVE_RFB) ++ return 0; + -+ hdmi->bridge.driver_private = hdmi; -+ hdmi->bridge.funcs = &dw_hdmi_bridge_funcs; -+#ifdef CONFIG_OF -+ hdmi->bridge.of_node = pdev->dev.of_node; -+#endif ++ /* Prepare VSC packet as per EDP 1.4 spec, Table 6.9 */ + memset(&psr_vsc, 0, sizeof(psr_vsc)); + psr_vsc.sdp_header.HB0 = 0; + psr_vsc.sdp_header.HB1 = 0x7; +@@ -1011,9 +1224,20 @@ static int analogix_dp_enable_psr(struct analogix_dp_device *dp) + psr_vsc.db[1] = EDP_VSC_PSR_STATE_ACTIVE | EDP_VSC_PSR_CRC_VALUES_VALID; + + ret = analogix_dp_send_psr_spd(dp, &psr_vsc, true); +- if (!ret) ++ if (!ret) { + analogix_dp_set_analog_power_down(dp, POWER_ALL, true); + ++ if (dp->phy) { ++ union phy_configure_opts phy_cfg = {0}; + -+ if (hdmi->phy.ops->setup_hpd) -+ hdmi->phy.ops->setup_hpd(hdmi, hdmi->phy.data); ++ phy_cfg.dp.lanes = 0; ++ phy_cfg.dp.set_lanes = true; ++ ret = phy_configure(dp->phy, &phy_cfg); ++ if (ret) ++ return ret; ++ } ++ } + -+ hdmi->connector.ycbcr_420_allowed = hdmi->plat_data->ycbcr_420_allowed; + return ret; + } + +@@ -1059,66 +1283,34 @@ static int analogix_dp_disable_psr(struct analogix_dp_device *dp) + return analogix_dp_send_psr_spd(dp, &psr_vsc, true); + } + +-/* +- * This function is a bit of a catch-all for panel preparation, hopefully +- * simplifying the logic of functions that need to prepare/unprepare the panel +- * below. +- * +- * If @prepare is true, this function will prepare the panel. Conversely, if it +- * is false, the panel will be unprepared. +- * +- * If @is_modeset_prepare is true, the function will disregard the current state +- * of the panel and either prepare/unprepare the panel based on @prepare. Once +- * it finishes, it will update dp->panel_is_modeset to reflect the current state +- * of the panel. +- */ +-static int analogix_dp_prepare_panel(struct analogix_dp_device *dp, +- bool prepare, bool is_modeset_prepare) +-{ +- int ret = 0; +- +- if (!dp->plat_data->panel) +- return 0; +- +- mutex_lock(&dp->panel_lock); +- +- /* +- * Exit early if this is a temporary prepare/unprepare and we're already +- * modeset (since we neither want to prepare twice or unprepare early). +- */ +- if (dp->panel_is_modeset && !is_modeset_prepare) +- goto out; +- +- if (prepare) +- ret = drm_panel_prepare(dp->plat_data->panel); +- else +- ret = drm_panel_unprepare(dp->plat_data->panel); +- +- if (ret) +- goto out; +- +- if (is_modeset_prepare) +- dp->panel_is_modeset = prepare; +- +-out: +- mutex_unlock(&dp->panel_lock); +- return ret; +-} +- + static int analogix_dp_get_modes(struct drm_connector *connector) + { + struct analogix_dp_device *dp = to_dp(connector); + struct edid *edid; + int ret, num_modes = 0; + +- if (dp->plat_data->panel) { ++ if (dp->plat_data->right && dp->plat_data->right->plat_data->bridge) { ++ struct drm_bridge *bridge = dp->plat_data->right->plat_data->bridge; + -+ audio.hdmi = hdmi; -+ audio.eld = hdmi->connector.eld; -+ audio.write = hdmi_writel; -+ audio.read = hdmi_readl; -+ audio.mod = hdmi_modb; -+ hdmi->enable_audio = dw_hdmi_i2s_audio_enable; -+ hdmi->disable_audio = dw_hdmi_i2s_audio_disable; ++ if (bridge->ops & DRM_BRIDGE_OP_MODES) { ++ if (!drm_bridge_get_modes(bridge, connector)) ++ return 0; ++ } ++ } + -+ memset(&pdevinfo, 0, sizeof(pdevinfo)); -+ pdevinfo.parent = dev; -+ pdevinfo.id = PLATFORM_DEVID_AUTO; -+ pdevinfo.name = "dw-hdmi-qp-i2s-audio"; -+ pdevinfo.data = &audio; -+ pdevinfo.size_data = sizeof(audio); -+ pdevinfo.dma_mask = DMA_BIT_MASK(32); -+ hdmi->audio = platform_device_register_full(&pdevinfo); ++ if (dp->plat_data->panel) + num_modes += drm_panel_get_modes(dp->plat_data->panel, connector); +- } else { +- ret = analogix_dp_prepare_panel(dp, true, false); +- if (ret) { +- DRM_ERROR("Failed to prepare panel (%d)\n", ret); + -+ hdmi->extcon = devm_extcon_dev_allocate(hdmi->dev, dw_hdmi_cable); -+ if (IS_ERR(hdmi->extcon)) { -+ dev_err(hdmi->dev, "allocate extcon failed\n"); -+ ret = PTR_ERR(hdmi->extcon); -+ goto err_aud; -+ } ++ if (dp->plat_data->bridge) ++ num_modes += drm_bridge_get_modes(dp->plat_data->bridge, connector); + -+ ret = devm_extcon_dev_register(hdmi->dev, hdmi->extcon); -+ if (ret) { -+ dev_err(hdmi->dev, "failed to register extcon: %d\n", ret); -+ goto err_aud; -+ } ++ if (!num_modes) { ++ ret = analogix_dp_phy_power_on(dp); ++ if (ret) + return 0; +- } + -+ ret = extcon_set_property_capability(hdmi->extcon, EXTCON_DISP_HDMI, -+ EXTCON_PROP_DISP_HPD); -+ if (ret) { -+ dev_err(hdmi->dev, -+ "failed to set USB property capability: %d\n", ret); -+ goto err_aud; -+ } ++ if (dp->plat_data->panel) ++ analogix_dp_panel_prepare(dp); + + edid = drm_get_edid(connector, &dp->aux.ddc); + if (edid) { +@@ -1128,14 +1320,19 @@ static int analogix_dp_get_modes(struct drm_connector *connector) + kfree(edid); + } + +- ret = analogix_dp_prepare_panel(dp, false, false); +- if (ret) +- DRM_ERROR("Failed to unprepare panel (%d)\n", ret); ++ analogix_dp_phy_power_off(dp); + } + + if (dp->plat_data->get_modes) + num_modes += dp->plat_data->get_modes(dp->plat_data, connector); + ++ if (num_modes > 0 && dp->plat_data->split_mode) { ++ struct drm_display_mode *mode; + -+ irq = platform_get_irq(pdev, 0); -+ if (irq < 0) { -+ ret = irq; -+ goto err_aud; ++ list_for_each_entry(mode, &connector->probed_modes, head) ++ dp->plat_data->convert_to_split_mode(mode); + } + -+ hdmi->avp_irq = irq; -+ ret = devm_request_threaded_irq(dev, hdmi->avp_irq, -+ dw_hdmi_qp_avp_hardirq, -+ dw_hdmi_qp_avp_irq, IRQF_ONESHOT, -+ dev_name(dev), hdmi); -+ if (ret) -+ goto err_aud; + return num_modes; + } + +@@ -1181,38 +1378,110 @@ static const struct drm_connector_helper_funcs analogix_dp_connector_helper_func + }; + + static enum drm_connector_status +-analogix_dp_detect(struct drm_connector *connector, bool force) ++analogix_dp_detect(struct analogix_dp_device *dp) + { +- struct analogix_dp_device *dp = to_dp(connector); + enum drm_connector_status status = connector_status_disconnected; + int ret; + +- if (dp->plat_data->panel) +- return connector_status_connected; +- +- ret = analogix_dp_prepare_panel(dp, true, false); ++ ret = analogix_dp_phy_power_on(dp); + if (ret) { +- DRM_ERROR("Failed to prepare panel (%d)\n", ret); ++ extcon_set_state_sync(dp->extcon, EXTCON_DISP_DP, false); + return connector_status_disconnected; + } + +- if (!analogix_dp_detect_hpd(dp)) ++ if (dp->plat_data->panel) ++ analogix_dp_panel_prepare(dp); + -+ irq = platform_get_irq(pdev, 1); -+ if (irq < 0) { -+ ret = irq; -+ goto err_aud; -+ } ++ if (!analogix_dp_detect_hpd(dp)) { ++ ret = analogix_dp_get_max_rx_bandwidth(dp, &dp->link_train.link_rate); ++ if (ret) { ++ dev_err(dp->dev, "failed to read max link rate\n"); ++ goto out; ++ } + -+ cec.irq = irq; ++ ret = analogix_dp_get_max_rx_lane_count(dp, &dp->link_train.lane_count); ++ if (ret) { ++ dev_err(dp->dev, "failed to read max lane count\n"); ++ goto out; ++ } + -+ if (of_property_read_bool(np, "cec-enable")) { -+ hdmi->cec_enable = true; -+ cec.hdmi = hdmi; -+ cec.ops = &dw_hdmi_qp_cec_ops; -+ pdevinfo.name = "dw-hdmi-qp-cec"; -+ pdevinfo.data = &cec; -+ pdevinfo.size_data = sizeof(cec); -+ pdevinfo.dma_mask = 0; -+ hdmi->cec = platform_device_register_full(&pdevinfo); + status = connector_status_connected; + } + +- ret = analogix_dp_prepare_panel(dp, false, false); +- if (ret) +- DRM_ERROR("Failed to unprepare panel (%d)\n", ret); ++ if (dp->plat_data->bridge) { ++ struct drm_bridge *next_bridge = dp->plat_data->bridge; + -+ irq = platform_get_irq(pdev, 2); -+ if (irq < 0) { -+ ret = irq; -+ goto err_cec; ++ if (next_bridge->ops & DRM_BRIDGE_OP_DETECT) ++ status = drm_bridge_detect(next_bridge); + } + -+ hdmi->earc_irq = irq; -+ ret = devm_request_threaded_irq(dev, hdmi->earc_irq, -+ dw_hdmi_qp_earc_hardirq, -+ dw_hdmi_qp_earc_irq, IRQF_SHARED, -+ dev_name(dev), hdmi); -+ if (ret) -+ goto err_cec; ++out: ++ analogix_dp_phy_power_off(dp); + -+ irq = platform_get_irq(pdev, 3); -+ if (irq < 0) { -+ ret = irq; -+ goto err_cec; -+ } ++ if (status == connector_status_connected) ++ extcon_set_state_sync(dp->extcon, EXTCON_DISP_DP, true); ++ else ++ extcon_set_state_sync(dp->extcon, EXTCON_DISP_DP, false); + + return status; + } + ++static enum drm_connector_status ++analogix_dp_connector_detect(struct drm_connector *connector, bool force) ++{ ++ struct analogix_dp_device *dp = to_dp(connector); + -+ hdmi->main_irq = irq; -+ ret = devm_request_threaded_irq(dev, hdmi->main_irq, -+ dw_hdmi_qp_main_hardirq, NULL, -+ IRQF_SHARED, dev_name(dev), hdmi); -+ if (ret) -+ goto err_cec; ++ if (dp->plat_data->right && analogix_dp_detect(dp->plat_data->right) != connector_status_connected) ++ return connector_status_disconnected; + -+ dw_hdmi_register_debugfs(dev, hdmi); ++ return analogix_dp_detect(dp); ++} + -+ if (hdmi_readl(hdmi, CONFIG_REG) & CONFIG_HDCP14) { -+ iores = platform_get_resource(pdev, IORESOURCE_MEM, 1); -+ hdmi->hdcp14_mem = devm_ioremap_resource(dev, iores); ++static void analogix_dp_connector_force(struct drm_connector *connector) ++{ ++ struct analogix_dp_device *dp = to_dp(connector); + -+ if (IS_ERR(hdmi->hdcp14_mem)) { -+ ret = PTR_ERR(hdmi->hdcp14_mem); -+ goto err_cec; -+ } ++ if (connector->status == connector_status_connected) ++ extcon_set_state_sync(dp->extcon, EXTCON_DISP_DP, true); ++ else ++ extcon_set_state_sync(dp->extcon, EXTCON_DISP_DP, false); ++} + -+ ret = dw_hdmi_qp_register_hdcp(dev, hdmi); -+ if (ret) -+ goto err_cec; ++static int ++analogix_dp_atomic_connector_get_property(struct drm_connector *connector, ++ const struct drm_connector_state *state, ++ struct drm_property *property, ++ uint64_t *val) ++{ ++ struct rockchip_drm_private *private = connector->dev->dev_private; ++ struct analogix_dp_device *dp = to_dp(connector); ++ ++ if (property == private->split_area_prop) { ++ switch (dp->split_area) { ++ case 1: ++ *val = ROCKCHIP_DRM_SPLIT_LEFT_SIDE; ++ break; ++ case 2: ++ *val = ROCKCHIP_DRM_SPLIT_RIGHT_SIDE; ++ break; ++ default: ++ *val = ROCKCHIP_DRM_SPLIT_UNSET; ++ break; ++ } + } + -+ return hdmi; ++ return 0; ++} + -+err_cec: -+ if (!IS_ERR(hdmi->cec)) -+ platform_device_unregister(hdmi->cec); + static const struct drm_connector_funcs analogix_dp_connector_funcs = { + .fill_modes = drm_helper_probe_single_connector_modes, +- .detect = analogix_dp_detect, ++ .detect = analogix_dp_connector_detect, + .destroy = drm_connector_cleanup, + .reset = drm_atomic_helper_connector_reset, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, ++ .force = analogix_dp_connector_force, ++ .atomic_get_property = analogix_dp_atomic_connector_get_property, + }; + + static int analogix_dp_bridge_attach(struct drm_bridge *bridge, +@@ -1223,28 +1492,53 @@ static int analogix_dp_bridge_attach(struct drm_bridge *bridge, + struct drm_connector *connector = NULL; + int ret = 0; + +- if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR) { +- DRM_ERROR("Fix bridge driver to make connector optional!"); +- return -EINVAL; +- } +- + if (!bridge->encoder) { + DRM_ERROR("Parent encoder object not found"); + return -ENODEV; + } + ++ if (dp->plat_data->bridge) { ++ ret = drm_bridge_attach(bridge->encoder, dp->plat_data->bridge, bridge, ++ dp->plat_data->skip_connector ? ++ 0 : DRM_BRIDGE_ATTACH_NO_CONNECTOR); ++ if (ret) { ++ DRM_ERROR("Failed to attach external bridge: %d\n", ret); ++ return ret; ++ } ++ } + -+err_aud: -+ if (hdmi->audio && !IS_ERR(hdmi->audio)) -+ platform_device_unregister(hdmi->audio); ++ if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR) ++ return 0; + -+err_ddc: -+ if (hdmi->i2c) -+ i2c_del_adapter(&hdmi->i2c->adap); -+ else -+ i2c_put_adapter(hdmi->ddc); + if (!dp->plat_data->skip_connector) { ++ int connector_type = DRM_MODE_CONNECTOR_eDP; ++ struct rockchip_drm_private *private; + -+ if (!hdmi->plat_data->first_screen) { -+ dw_hdmi_destroy_properties(hdmi); -+ hdmi->connector.funcs->destroy(&hdmi->connector); -+ } ++ if (dp->plat_data->bridge && ++ dp->plat_data->bridge->type != DRM_MODE_CONNECTOR_Unknown) ++ connector_type = dp->plat_data->bridge->type; + -+ if (hdmi->bridge.encoder && !hdmi->plat_data->first_screen) -+ hdmi->bridge.encoder->funcs->destroy(hdmi->bridge.encoder); + connector = &dp->connector; + connector->polled = DRM_CONNECTOR_POLL_HPD; ++ if (dp->plat_data->bridge && dp->plat_data->bridge->ops & DRM_BRIDGE_OP_DETECT) ++ connector->polled = DRM_CONNECTOR_POLL_CONNECT | ++ DRM_CONNECTOR_POLL_DISCONNECT; + + ret = drm_connector_init(dp->drm_dev, connector, + &analogix_dp_connector_funcs, +- DRM_MODE_CONNECTOR_eDP); ++ connector_type); + if (ret) { + DRM_ERROR("Failed to initialize connector with drm\n"); + return ret; + } + ++ private = connector->dev->dev_private; + -+ return ERR_PTR(ret); -+} ++ if (dp->split_area) ++ drm_object_attach_property(&connector->base, ++ private->split_area_prop, ++ dp->split_area); + -+static void __dw_hdmi_remove(struct dw_hdmi_qp *hdmi) + drm_connector_helper_add(connector, + &analogix_dp_connector_helper_funcs); + drm_connector_attach_encoder(connector, encoder); +@@ -1267,6 +1561,14 @@ static int analogix_dp_bridge_attach(struct drm_bridge *bridge, + return 0; + } + ++static void analogix_dp_bridge_detach(struct drm_bridge *bridge) +{ -+ if (hdmi->avp_irq) -+ disable_irq(hdmi->avp_irq); ++ struct analogix_dp_device *dp = bridge->driver_private; + -+ if (hdmi->main_irq) -+ disable_irq(hdmi->main_irq); ++ if (dp->plat_data->detach) ++ dp->plat_data->detach(dp->plat_data, bridge); ++} + -+ if (hdmi->earc_irq) -+ disable_irq(hdmi->earc_irq); + static + struct drm_crtc *analogix_dp_get_old_crtc(struct analogix_dp_device *dp, + struct drm_atomic_state *state) +@@ -1290,7 +1592,8 @@ static + struct drm_crtc *analogix_dp_get_new_crtc(struct analogix_dp_device *dp, + struct drm_atomic_state *state) + { +- struct drm_encoder *encoder = dp->encoder; ++ struct drm_bridge *bridge = &dp->bridge; ++ struct drm_encoder *encoder = bridge->encoder; + struct drm_connector *connector; + struct drm_connector_state *conn_state; + +@@ -1312,39 +1615,35 @@ analogix_dp_bridge_atomic_pre_enable(struct drm_bridge *bridge, + struct drm_atomic_state *old_state = old_bridge_state->base.state; + struct analogix_dp_device *dp = bridge->driver_private; + struct drm_crtc *crtc; +- struct drm_crtc_state *old_crtc_state; +- int ret; ++ struct drm_crtc_state *old_crtc_state, *new_crtc_state; + + crtc = analogix_dp_get_new_crtc(dp, old_state); + if (!crtc) + return; + + old_crtc_state = drm_atomic_get_old_crtc_state(old_state, crtc); + -+ debugfs_remove_recursive(hdmi->debugfs_dir); ++ new_crtc_state = drm_atomic_get_new_crtc_state(old_state, crtc); ++ analogix_dp_bridge_mode_set(bridge, &new_crtc_state->adjusted_mode); + -+ if (!hdmi->plat_data->first_screen) { -+ dw_hdmi_destroy_properties(hdmi); -+ hdmi->connector.funcs->destroy(&hdmi->connector); -+ } + /* Don't touch the panel if we're coming back from PSR */ + if (old_crtc_state && old_crtc_state->self_refresh_active) + return; + +- ret = analogix_dp_prepare_panel(dp, true, true); +- if (ret) +- DRM_ERROR("failed to setup the panel ret = %d\n", ret); ++ if (dp->plat_data->panel) ++ analogix_dp_panel_prepare(dp); + } + + static int analogix_dp_set_bridge(struct analogix_dp_device *dp) + { + int ret; + +- pm_runtime_get_sync(dp->dev); +- +- ret = clk_prepare_enable(dp->clock); +- if (ret < 0) { +- DRM_ERROR("Failed to prepare_enable the clock clk [%d]\n", ret); +- goto out_dp_clk_pre; +- } +- + if (dp->plat_data->power_on_start) + dp->plat_data->power_on_start(dp->plat_data); + +- phy_power_on(dp->phy); ++ ret = analogix_dp_phy_power_on(dp); ++ if (ret) ++ return ret; + + ret = analogix_dp_init_dp(dp); + if (ret) +@@ -1362,28 +1661,35 @@ static int analogix_dp_set_bridge(struct analogix_dp_device *dp) + } + + ret = analogix_dp_commit(dp); +- if (ret) { ++ if (ret < 0) { + DRM_ERROR("dp commit error, ret = %d\n", ret); + goto out_dp_init; + } + ++ if (dp->plat_data->panel) ++ drm_panel_enable(dp->plat_data->panel); + -+ if (hdmi->audio && !IS_ERR(hdmi->audio)) -+ platform_device_unregister(hdmi->audio); + if (dp->plat_data->power_on_end) + dp->plat_data->power_on_end(dp->plat_data); + +- enable_irq(dp->irq); + return 0; + + out_dp_init: +- phy_power_off(dp->phy); ++ analogix_dp_phy_power_off(dp); + if (dp->plat_data->power_off) + dp->plat_data->power_off(dp->plat_data); +- clk_disable_unprepare(dp->clock); +-out_dp_clk_pre: +- pm_runtime_put_sync(dp->dev); +- + return ret; + } + ++static void analogix_dp_modeset_retry_work_fn(struct work_struct *work) ++{ ++ struct analogix_dp_device *dp = ++ container_of(work, typeof(*dp), modeset_retry_work); + -+ if (hdmi->bridge.encoder && !hdmi->plat_data->first_screen) -+ hdmi->bridge.encoder->funcs->destroy(hdmi->bridge.encoder); -+ if (!IS_ERR(hdmi->cec)) -+ platform_device_unregister(hdmi->cec); -+ if (!IS_ERR(hdmi->hdcp_dev)) -+ platform_device_unregister(hdmi->hdcp_dev); -+ if (hdmi->i2c) -+ i2c_del_adapter(&hdmi->i2c->adap); -+ else -+ i2c_put_adapter(hdmi->ddc); ++ /* Send Hotplug uevent so userspace can reprobe */ ++ drm_kms_helper_hotplug_event(dp->bridge.dev); +} + -+/* ----------------------------------------------------------------------------- -+ * Bind/unbind API, used from platforms based on the component framework. -+ */ -+struct dw_hdmi_qp *dw_hdmi_qp_bind(struct platform_device *pdev, -+ struct drm_encoder *encoder, -+ struct dw_hdmi_plat_data *plat_data) -+{ -+ struct dw_hdmi_qp *hdmi; -+ int ret; + static void + analogix_dp_bridge_atomic_enable(struct drm_bridge *bridge, + struct drm_bridge_state *old_bridge_state) +@@ -1422,12 +1728,14 @@ analogix_dp_bridge_atomic_enable(struct drm_bridge *bridge, + usleep_range(10, 11); + } + dev_err(dp->dev, "too many times retry set bridge, give it up\n"); + -+ hdmi = __dw_hdmi_probe(pdev, plat_data); -+ if (IS_ERR(hdmi)) -+ return hdmi; ++ /* Schedule a Hotplug Uevent to userspace to start modeset */ ++ schedule_work(&dp->modeset_retry_work); + } + + static void analogix_dp_bridge_disable(struct drm_bridge *bridge) + { + struct analogix_dp_device *dp = bridge->driver_private; +- int ret; + + if (dp->dpms_mode != DRM_MODE_DPMS_ON) + return; +@@ -1439,27 +1747,29 @@ static void analogix_dp_bridge_disable(struct drm_bridge *bridge) + } + } + +- disable_irq(dp->irq); ++ if (!analogix_dp_get_plug_in_status(dp)) ++ analogix_dp_link_power_down(dp); + + if (dp->plat_data->power_off) + dp->plat_data->power_off(dp->plat_data); + + analogix_dp_set_analog_power_down(dp, POWER_ALL, 1); +- phy_power_off(dp->phy); +- +- clk_disable_unprepare(dp->clock); ++ analogix_dp_phy_power_off(dp); + +- pm_runtime_put_sync(dp->dev); +- +- ret = analogix_dp_prepare_panel(dp, false, true); +- if (ret) +- DRM_ERROR("failed to setup the panel ret = %d\n", ret); ++ if (dp->plat_data->panel) ++ analogix_dp_panel_unprepare(dp); + + dp->fast_train_enable = false; + dp->psr_supported = false; + dp->dpms_mode = DRM_MODE_DPMS_OFF; + } + ++void analogix_dp_disable(struct analogix_dp_device *dp) ++{ ++ analogix_dp_bridge_disable(&dp->bridge); ++} ++EXPORT_SYMBOL_GPL(analogix_dp_disable); + -+ if (!plat_data->first_screen) { -+ ret = drm_bridge_attach(encoder, &hdmi->bridge, NULL, 0); -+ if (ret) { -+ __dw_hdmi_remove(hdmi); -+ dev_err(hdmi->dev, "Failed to initialize bridge with drm\n"); -+ return ERR_PTR(ret); -+ } + static void + analogix_dp_bridge_atomic_disable(struct drm_bridge *bridge, + struct drm_bridge_state *old_bridge_state) +@@ -1524,33 +1834,38 @@ analogix_dp_bridge_atomic_post_disable(struct drm_bridge *bridge, + } + + static void analogix_dp_bridge_mode_set(struct drm_bridge *bridge, +- const struct drm_display_mode *orig_mode, +- const struct drm_display_mode *mode) ++ const struct drm_display_mode *adj_mode) + { + struct analogix_dp_device *dp = bridge->driver_private; + struct drm_display_info *display_info = &dp->connector.display_info; + struct video_info *video = &dp->video_info; ++ struct drm_display_mode *mode = &video->mode; + struct device_node *dp_node = dp->dev->of_node; + int vic; + ++ drm_mode_copy(mode, adj_mode); ++ if (dp->plat_data->split_mode) ++ dp->plat_data->convert_to_origin_mode(mode); + -+ plat_data->connector = &hdmi->connector; -+ if (hdmi->skip_connector && hdmi->next_bridge) -+ plat_data->bridge = hdmi->next_bridge; -+ else -+ plat_data->bridge = NULL; + /* Input video interlaces & hsync pol & vsync pol */ + video->interlaced = !!(mode->flags & DRM_MODE_FLAG_INTERLACE); +- video->v_sync_polarity = !!(mode->flags & DRM_MODE_FLAG_NVSYNC); +- video->h_sync_polarity = !!(mode->flags & DRM_MODE_FLAG_NHSYNC); ++ if (dp->plat_data->dev_type == RK3588_EDP) { ++ video->v_sync_polarity = true; ++ video->h_sync_polarity = true; ++ } else { ++ video->v_sync_polarity = !!(mode->flags & DRM_MODE_FLAG_NVSYNC); ++ video->h_sync_polarity = !!(mode->flags & DRM_MODE_FLAG_NHSYNC); ++ } + + /* Input video dynamic_range & colorimetry */ + vic = drm_match_cea_mode(mode); + if ((vic == 6) || (vic == 7) || (vic == 21) || (vic == 22) || +- (vic == 2) || (vic == 3) || (vic == 17) || (vic == 18)) { ++ (vic == 2) || (vic == 3) || (vic == 17) || (vic == 18)) + video->dynamic_range = CEA; +- video->ycbcr_coeff = COLOR_YCBCR601; +- } else if (vic) { ++ else if (vic) + video->dynamic_range = CEA; +- video->ycbcr_coeff = COLOR_YCBCR709; +- } else { ++ else + video->dynamic_range = VESA; +- video->ycbcr_coeff = COLOR_YCBCR709; +- } + + /* Input vide bpc and color_formats */ + switch (display_info->bpc) { +@@ -1570,12 +1885,16 @@ static void analogix_dp_bridge_mode_set(struct drm_bridge *bridge, + video->color_depth = COLOR_8; + break; + } +- if (display_info->color_formats & DRM_COLOR_FORMAT_YCBCR444) ++ if (display_info->color_formats & DRM_COLOR_FORMAT_YCBCR444) { + video->color_space = COLOR_YCBCR444; +- else if (display_info->color_formats & DRM_COLOR_FORMAT_YCBCR422) ++ video->ycbcr_coeff = COLOR_YCBCR709; ++ } else if (display_info->color_formats & DRM_COLOR_FORMAT_YCBCR422) { + video->color_space = COLOR_YCBCR422; +- else ++ video->ycbcr_coeff = COLOR_YCBCR709; ++ } else { + video->color_space = COLOR_RGB; ++ video->ycbcr_coeff = COLOR_YCBCR601; ++ } + + /* + * NOTE: those property parsing code is used for providing backward +@@ -1600,6 +1919,56 @@ static void analogix_dp_bridge_mode_set(struct drm_bridge *bridge, + video->interlaced = true; + } + ++static bool analogix_dp_link_config_validate(u8 link_rate, u8 lane_count) ++{ ++ switch (link_rate) { ++ case DP_LINK_BW_1_62: ++ case DP_LINK_BW_2_7: ++ case DP_LINK_BW_5_4: ++ break; ++ default: ++ return false; + } + -+ if (plat_data->split_mode && !hdmi->plat_data->first_screen) { -+ struct dw_hdmi_qp *secondary = NULL; -+ -+ if (hdmi->plat_data->left) -+ secondary = hdmi->plat_data->left; -+ else if (hdmi->plat_data->right) -+ secondary = hdmi->plat_data->right; -+ -+ if (!secondary) -+ return ERR_PTR(-ENOMEM); -+ ret = drm_bridge_attach(encoder, &secondary->bridge, &hdmi->bridge, -+ DRM_BRIDGE_ATTACH_NO_CONNECTOR); -+ if (ret) -+ return ERR_PTR(ret); ++ switch (lane_count) { ++ case 1: ++ case 2: ++ case 4: ++ break; ++ default: ++ return false; + } + -+ return hdmi; ++ return true; +} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_bind); + -+void dw_hdmi_qp_unbind(struct dw_hdmi_qp *hdmi) ++static enum drm_mode_status ++analogix_dp_bridge_mode_valid(struct drm_bridge *bridge, ++ const struct drm_display_info *info, ++ const struct drm_display_mode *mode) +{ -+ __dw_hdmi_remove(hdmi); -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_unbind); ++ struct analogix_dp_device *dp = bridge->driver_private; ++ struct drm_display_mode m; ++ u32 max_link_rate, max_lane_count; + -+void dw_hdmi_qp_suspend(struct device *dev, struct dw_hdmi_qp *hdmi) -+{ -+ if (!hdmi) { -+ dev_warn(dev, "Hdmi has not been initialized\n"); -+ return; -+ } ++ drm_mode_copy(&m, mode); + -+ mutex_lock(&hdmi->mutex); ++ if (dp->plat_data->split_mode || dp->plat_data->dual_connector_split) ++ dp->plat_data->convert_to_origin_mode(&m); + -+ /* -+ * When system shutdown, hdmi should be disabled. -+ * When system suspend, dw_hdmi_qp_bridge_disable will disable hdmi first. -+ * To prevent duplicate operation, we should determine whether hdmi -+ * has been disabled. -+ */ -+ if (!hdmi->disabled) -+ hdmi->disabled = true; -+ mutex_unlock(&hdmi->mutex); ++ max_link_rate = min_t(u32, dp->video_info.max_link_rate, ++ dp->link_train.link_rate); ++ max_lane_count = min_t(u32, dp->video_info.max_lane_count, ++ dp->link_train.lane_count); ++ if (analogix_dp_link_config_validate(max_link_rate, max_lane_count) && ++ !analogix_dp_bandwidth_ok(dp, &m, ++ drm_dp_bw_code_to_link_rate(max_link_rate), ++ max_lane_count)) ++ return MODE_BAD; + -+ if (hdmi->avp_irq) -+ disable_irq(hdmi->avp_irq); ++ return MODE_OK; ++} + -+ if (hdmi->main_irq) -+ disable_irq(hdmi->main_irq); + static const struct drm_bridge_funcs analogix_dp_bridge_funcs = { + .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, +@@ -1608,37 +1977,85 @@ static const struct drm_bridge_funcs analogix_dp_bridge_funcs = { + .atomic_enable = analogix_dp_bridge_atomic_enable, + .atomic_disable = analogix_dp_bridge_atomic_disable, + .atomic_post_disable = analogix_dp_bridge_atomic_post_disable, +- .mode_set = analogix_dp_bridge_mode_set, + .attach = analogix_dp_bridge_attach, ++ .detach = analogix_dp_bridge_detach, ++ .mode_valid = analogix_dp_bridge_mode_valid, + }; + +-static int analogix_dp_create_bridge(struct drm_device *drm_dev, +- struct analogix_dp_device *dp) ++static int analogix_dp_bridge_init(struct analogix_dp_device *dp) + { +- struct drm_bridge *bridge; ++ struct drm_bridge *bridge = &dp->bridge; ++ int ret; + -+ if (hdmi->earc_irq) -+ disable_irq(hdmi->earc_irq); ++ if (!dp->plat_data->left) { ++ ret = drm_bridge_attach(dp->encoder, bridge, NULL, 0); ++ if (ret) { ++ DRM_ERROR("failed to attach drm bridge\n"); ++ return ret; ++ } ++ } + +- bridge = devm_kzalloc(drm_dev->dev, sizeof(*bridge), GFP_KERNEL); +- if (!bridge) { +- DRM_ERROR("failed to allocate for drm bridge\n"); +- return -ENOMEM; ++ if (dp->plat_data->right) { ++ struct analogix_dp_device *secondary = dp->plat_data->right; ++ struct drm_bridge *last_bridge = ++ list_last_entry(&bridge->encoder->bridge_chain, ++ struct drm_bridge, chain_node); + -+ pinctrl_pm_select_sleep_state(dev); -+ drm_connector_update_edid_property(&hdmi->connector, NULL); ++ ret = drm_bridge_attach(dp->encoder, &secondary->bridge, last_bridge, ++ DRM_BRIDGE_ATTACH_NO_CONNECTOR); ++ if (ret) ++ return ret; + } + +- dp->bridge = bridge; ++ return 0; +} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_suspend); + -+void dw_hdmi_qp_resume(struct device *dev, struct dw_hdmi_qp *hdmi) ++static u32 analogix_dp_parse_link_frequencies(struct analogix_dp_device *dp) +{ -+ if (!hdmi) { -+ dev_warn(dev, "Hdmi has not been initialized\n"); -+ return; ++ struct device_node *node = dp->dev->of_node; ++ struct device_node *endpoint; ++ u64 frequency = 0; ++ int cnt; ++ ++ endpoint = of_graph_get_endpoint_by_regs(node, 1, 0); ++ if (!endpoint) ++ return 0; ++ ++ cnt = of_property_count_u64_elems(endpoint, "link-frequencies"); ++ if (cnt > 0) ++ of_property_read_u64_index(endpoint, "link-frequencies", ++ cnt - 1, &frequency); ++ of_node_put(endpoint); ++ ++ if (!frequency) ++ return 0; ++ ++ do_div(frequency, 10 * 1000); /* symbol rate kbytes */ + +- bridge->driver_private = dp; +- bridge->funcs = &analogix_dp_bridge_funcs; ++ switch (frequency) { ++ case 162000: ++ case 270000: ++ case 540000: ++ break; ++ default: ++ dev_err(dp->dev, "invalid link frequency value: %lld\n", frequency); ++ return 0; + } + +- return drm_bridge_attach(dp->encoder, bridge, NULL, 0); ++ return frequency; + } + + static int analogix_dp_dt_parse_pdata(struct analogix_dp_device *dp) + { + struct device_node *dp_node = dp->dev->of_node; + struct video_info *video_info = &dp->video_info; ++ struct property *prop; ++ int ret, len, num_lanes; ++ u32 max_link_rate; + + switch (dp->plat_data->dev_type) { + case RK3288_DP: +- case RK3399_EDP: ++ case RK3568_EDP: + /* + * Like Rk3288 DisplayPort TRM indicate that "Main link + * containing 4 physical lanes of 2.7/1.62 Gbps/lane". +@@ -1646,6 +2063,11 @@ static int analogix_dp_dt_parse_pdata(struct analogix_dp_device *dp) + video_info->max_link_rate = 0x0A; + video_info->max_lane_count = 0x04; + break; ++ case RK3399_EDP: ++ case RK3588_EDP: ++ video_info->max_link_rate = 0x14; ++ video_info->max_lane_count = 0x04; ++ break; + case EXYNOS_DP: + /* + * NOTE: those property parseing code is used for +@@ -1658,6 +2080,44 @@ static int analogix_dp_dt_parse_pdata(struct analogix_dp_device *dp) + break; + } + ++ max_link_rate = analogix_dp_parse_link_frequencies(dp); ++ if (max_link_rate && max_link_rate < drm_dp_bw_code_to_link_rate(video_info->max_link_rate)) ++ video_info->max_link_rate = drm_dp_link_rate_to_bw_code(max_link_rate); + -+ hdmi_writel(hdmi, 0, MAINUNIT_0_INT_MASK_N); -+ hdmi_writel(hdmi, 0, MAINUNIT_1_INT_MASK_N); -+ hdmi_writel(hdmi, 428571429, TIMER_BASE_CONFIG0); ++ video_info->video_bist_enable = ++ of_property_read_bool(dp_node, "analogix,video-bist-enable"); ++ video_info->force_stream_valid = ++ of_property_read_bool(dp_node, "analogix,force-stream-valid"); + -+ pinctrl_pm_select_default_state(dev); ++ prop = of_find_property(dp_node, "data-lanes", &len); ++ if (!prop) { ++ video_info->lane_map[0] = 0; ++ video_info->lane_map[1] = 1; ++ video_info->lane_map[2] = 2; ++ video_info->lane_map[3] = 3; ++ DRM_DEV_DEBUG(dp->dev, "failed to find data lane mapping, using default\n"); ++ return 0; ++ } + -+ if (hdmi->cec_adap) -+ hdmi->cec_adap->ops->adap_enable(hdmi->cec_adap, true); ++ num_lanes = len / sizeof(u32); + -+ mutex_lock(&hdmi->mutex); -+ if (hdmi->i2c) -+ dw_hdmi_i2c_init(hdmi); -+ if (hdmi->avp_irq) -+ enable_irq(hdmi->avp_irq); ++ if (num_lanes < 1 || num_lanes > 4 || num_lanes == 3) { ++ DRM_DEV_ERROR(dp->dev, "bad number of data lanes\n"); ++ return -EINVAL; ++ } + -+ if (hdmi->main_irq) -+ enable_irq(hdmi->main_irq); ++ video_info->max_lane_count = num_lanes; + -+ if (hdmi->earc_irq) -+ enable_irq(hdmi->earc_irq); ++ ret = of_property_read_u32_array(dp_node, "data-lanes", ++ video_info->lane_map, num_lanes); ++ if (ret) { ++ DRM_DEV_ERROR(dp->dev, "failed to read lane data\n"); ++ return ret; ++ } + -+ mutex_unlock(&hdmi->mutex); -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_qp_resume); ++ if (device_property_read_u32(dp->dev, "split-area", &dp->split_area)) ++ dp->split_area = 0; + -+MODULE_AUTHOR("Algea Cao "); -+MODULE_DESCRIPTION("DW HDMI QP transmitter driver"); -+MODULE_LICENSE("GPL"); -+MODULE_ALIAS("platform:dw-hdmi-qp"); -diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.h -new file mode 100644 -index 000000000..e9b5e19a3 ---- /dev/null -+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.h -@@ -0,0 +1,850 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (C) Rockchip Electronics Co.Ltd -+ * Author: -+ * Algea Cao -+ */ -+#ifndef __DW_HDMI_QP_H__ -+#define __DW_HDMI_QP_H__ -+/* Main Unit Registers */ -+#define CORE_ID 0x0 -+#define VER_NUMBER 0x4 -+#define VER_TYPE 0x8 -+#define CONFIG_REG 0xc -+#define CONFIG_CEC BIT(28) -+#define CONFIG_AUD_UD BIT(23) -+#define CONFIG_HDCP14 BIT(8) -+#define CORE_TIMESTAMP_HHMM 0x14 -+#define CORE_TIMESTAMP_MMDD 0x18 -+#define CORE_TIMESTAMP_YYYY 0x1c -+/* Reset Manager Registers */ -+#define GLOBAL_SWRESET_REQUEST 0x40 -+#define EARCRX_CMDC_SWINIT_P BIT(27) -+#define AVP_DATAPATH_PACKET_AUDIO_SWINIT_P BIT(10) -+#define GLOBAL_SWDISABLE 0x44 -+#define CEC_SWDISABLE BIT(17) -+#define AVP_DATAPATH_PACKET_AUDIO_SWDISABLE BIT(10) -+#define AVP_DATAPATH_VIDEO_SWDISABLE BIT(6) -+#define RESET_MANAGER_CONFIG0 0x48 -+#define RESET_MANAGER_STATUS0 0x50 -+#define RESET_MANAGER_STATUS1 0x54 -+#define RESET_MANAGER_STATUS2 0x58 -+/* Timer Base Registers */ -+#define TIMER_BASE_CONFIG0 0x80 -+#define TIMER_BASE_STATUS0 0x84 -+/* CMU Registers */ -+#define CMU_CONFIG0 0xa0 -+#define CMU_CONFIG1 0xa4 -+#define CMU_CONFIG2 0xa8 -+#define CMU_CONFIG3 0xac -+#define CMU_STATUS 0xb0 -+#define DISPLAY_CLK_MONITOR 0x3f -+#define DISPLAY_CLK_LOCKED 0X15 -+#define EARC_BPCLK_OFF BIT(9) -+#define AUDCLK_OFF BIT(7) -+#define LINKQPCLK_OFF BIT(5) -+#define VIDQPCLK_OFF BIT(3) -+#define IPI_CLK_OFF BIT(1) -+#define CMU_IPI_CLK_FREQ 0xb4 -+#define CMU_VIDQPCLK_FREQ 0xb8 -+#define CMU_LINKQPCLK_FREQ 0xbc -+#define CMU_AUDQPCLK_FREQ 0xc0 -+#define CMU_EARC_BPCLK_FREQ 0xc4 -+/* I2CM Registers */ -+#define I2CM_SM_SCL_CONFIG0 0xe0 -+#define I2CM_FM_SCL_CONFIG0 0xe4 -+#define I2CM_CONFIG0 0xe8 -+#define I2CM_CONTROL0 0xec -+#define I2CM_STATUS0 0xf0 -+#define I2CM_INTERFACE_CONTROL0 0xf4 -+#define I2CM_ADDR 0xff000 -+#define I2CM_SLVADDR 0xfe0 -+#define I2CM_WR_MASK 0x1e -+#define I2CM_EXT_READ BIT(4) -+#define I2CM_SHORT_READ BIT(3) -+#define I2CM_FM_READ BIT(2) -+#define I2CM_FM_WRITE BIT(1) -+#define I2CM_FM_EN BIT(0) -+#define I2CM_INTERFACE_CONTROL1 0xf8 -+#define I2CM_SEG_PTR 0x7f80 -+#define I2CM_SEG_ADDR 0x7f -+#define I2CM_INTERFACE_WRDATA_0_3 0xfc -+#define I2CM_INTERFACE_WRDATA_4_7 0x100 -+#define I2CM_INTERFACE_WRDATA_8_11 0x104 -+#define I2CM_INTERFACE_WRDATA_12_15 0x108 -+#define I2CM_INTERFACE_RDDATA_0_3 0x10c -+#define I2CM_INTERFACE_RDDATA_4_7 0x110 -+#define I2CM_INTERFACE_RDDATA_8_11 0x114 -+#define I2CM_INTERFACE_RDDATA_12_15 0x118 -+/* SCDC Registers */ -+#define SCDC_CONFIG0 0x140 -+#define SCDC_I2C_FM_EN BIT(12) -+#define SCDC_UPD_FLAGS_AUTO_CLR BIT(6) -+#define SCDC_UPD_FLAGS_POLL_EN BIT(4) -+#define SCDC_CONTROL0 0x148 -+#define SCDC_STATUS0 0x150 -+#define STATUS_UPDATE BIT(0) -+#define FRL_START BIT(4) -+#define FLT_UPDATE BIT(5) -+/* FLT Registers */ -+#define FLT_CONFIG0 0x160 -+#define FLT_CONFIG1 0x164 -+#define FLT_CONFIG2 0x168 -+#define FLT_CONTROL0 0x170 -+/* Main Unit 2 Registers */ -+#define MAINUNIT_STATUS0 0x180 -+/* Video Interface Registers */ -+#define VIDEO_INTERFACE_CONFIG0 0x800 -+#define VIDEO_INTERFACE_CONFIG1 0x804 -+#define VIDEO_INTERFACE_CONFIG2 0x808 -+#define VIDEO_INTERFACE_CONTROL0 0x80c -+#define VIDEO_INTERFACE_STATUS0 0x814 -+/* Video Packing Registers */ -+#define VIDEO_PACKING_CONFIG0 0x81c -+/* Audio Interface Registers */ -+#define AUDIO_INTERFACE_CONFIG0 0x820 -+#define AUD_IF_SEL_MSK 0x3 -+#define AUD_IF_SPDIF 0x2 -+#define AUD_IF_I2S 0x1 -+#define AUD_IF_PAI 0x0 -+#define AUD_FIFO_INIT_ON_OVF_MSK BIT(2) -+#define AUD_FIFO_INIT_ON_OVF_EN BIT(2) -+#define I2S_LINES_EN_MSK GENMASK(7, 4) -+#define I2S_LINES_EN(x) BIT(x + 4) -+#define I2S_BPCUV_RCV_MSK BIT(12) -+#define I2S_BPCUV_RCV_EN BIT(12) -+#define I2S_BPCUV_RCV_DIS 0 -+#define SPDIF_LINES_EN GENMASK(19, 16) -+#define AUD_FORMAT_MSK GENMASK(26, 24) -+#define AUD_3DOBA (0x7 << 24) -+#define AUD_3DASP (0x6 << 24) -+#define AUD_MSOBA (0x5 << 24) -+#define AUD_MSASP (0x4 << 24) -+#define AUD_HBR (0x3 << 24) -+#define AUD_DST (0x2 << 24) -+#define AUD_OBA (0x1 << 24) -+#define AUD_ASP (0x0 << 24) -+#define AUDIO_INTERFACE_CONFIG1 0x824 -+#define AUDIO_INTERFACE_CONTROL0 0x82c -+#define AUDIO_FIFO_CLR_P BIT(0) -+#define AUDIO_INTERFACE_STATUS0 0x834 -+/* Frame Composer Registers */ -+#define FRAME_COMPOSER_CONFIG0 0x840 -+#define FRAME_COMPOSER_CONFIG1 0x844 -+#define FRAME_COMPOSER_CONFIG2 0x848 -+#define FRAME_COMPOSER_CONFIG3 0x84c -+#define FRAME_COMPOSER_CONFIG4 0x850 -+#define FRAME_COMPOSER_CONFIG5 0x854 -+#define FRAME_COMPOSER_CONFIG6 0x858 -+#define FRAME_COMPOSER_CONFIG7 0x85c -+#define FRAME_COMPOSER_CONFIG8 0x860 -+#define FRAME_COMPOSER_CONFIG9 0x864 -+#define KEEPOUT_REKEY_CFG GENMASK(9, 8) -+#define KEEPOUT_REKEY_ALWAYS (0x2 << 8) -+#define FRAME_COMPOSER_CONTROL0 0x86c -+/* Video Monitor Registers */ -+#define VIDEO_MONITOR_CONFIG0 0x880 -+#define VIDEO_MONITOR_STATUS0 0x884 -+#define VIDEO_MONITOR_STATUS1 0x888 -+#define VIDEO_MONITOR_STATUS2 0x88c -+#define VIDEO_MONITOR_STATUS3 0x890 -+#define VIDEO_MONITOR_STATUS4 0x894 -+#define VIDEO_MONITOR_STATUS5 0x898 -+#define VIDEO_MONITOR_STATUS6 0x89c -+/* HDCP2 Logic Registers */ -+#define HDCP2LOGIC_CONFIG0 0x8e0 -+#define HDCP2_BYPASS BIT(0) -+#define HDCP2LOGIC_ESM_GPIO_IN 0x8e4 -+#define HDCP2LOGIC_ESM_GPIO_OUT 0x8e8 -+#define HDCP2_AUTHENTICATION_SUCCESS BIT(6) -+/* HDCP14 Registers */ -+#define HDCP14_CONFIG0 0x900 -+#define HDCP14_OESS_ESSS_OVR_VALUE BIT(14) -+#define HDCP14_OESS_ESSS_OVR_EN BIT(13) -+#define HDCP14_CONFIG1 0x904 -+#define HDCP14_SHA1_MSG_CORRECT_P BIT(3) -+#define HDCP14_CONFIG2 0x908 -+#define HDCP14_CONFIG3 0x90c -+#define HDCP14_KEY_SEED 0x914 -+#define HDCP14_KEY_H 0x918 -+#define HDCP14_KEY_L 0x91c -+#define HDCP14_KEY_STATUS 0x920 -+#define HDCP14_AKSV_H 0x924 -+#define HDCP14_AKSV_L 0x928 -+#define HDCP14_AN_H 0x92c -+#define HDCP14_AN_L 0x930 -+#define HDCP14_STATUS0 0x934 -+#define HDCP14_RPT_DEVICE_COUNT 0xFE00 -+#define HDCP14_STATUS1 0x938 -+#define HDCP14_RCV_REPEATER BIT(6) -+#define HDCP14_RCV_KSV_FIFO_READY BIT(5) -+/* Scrambler Registers */ -+#define SCRAMB_CONFIG0 0x960 -+/* Video Configuration Registers */ -+#define LINK_CONFIG0 0x968 -+#define OPMODE_FRL_4LANES BIT(8) -+#define OPMODE_DVI BIT(4) -+#define OPMODE_FRL BIT(0) -+/* TMDS FIFO Registers */ -+#define TMDS_FIFO_CONFIG0 0x970 -+#define TMDS_FIFO_CONTROL0 0x974 -+/* FRL RSFEC Registers */ -+#define FRL_RSFEC_CONFIG0 0xa20 -+#define FRL_RSFEC_STATUS0 0xa30 -+/* FRL Packetizer Registers */ -+#define FRL_PKTZ_CONFIG0 0xa40 -+#define FRL_PKTZ_CONTROL0 0xa44 -+#define FRL_PKTZ_CONTROL1 0xa50 -+#define FRL_PKTZ_STATUS1 0xa54 -+/* Packet Scheduler Registers */ -+#define PKTSCHED_CONFIG0 0xa80 -+#define PKTSCHED_PRQUEUE0_CONFIG0 0xa84 -+#define PKTSCHED_PRQUEUE1_CONFIG0 0xa88 -+#define PKTSCHED_PRQUEUE2_CONFIG0 0xa8c -+#define PKTSCHED_PRQUEUE2_CONFIG1 0xa90 -+#define PKTSCHED_PRQUEUE2_CONFIG2 0xa94 -+#define PKTSCHED_PKT_CONFIG0 0xa98 -+#define PKTSCHED_PKT_CONFIG1 0xa9c -+#define PKTSCHED_VSI_FIELDRATE BIT(14) -+#define PKTSCHED_DRMI_FIELDRATE BIT(13) -+#define PKTSCHED_AVI_FIELDRATE BIT(12) -+#define PKTSCHED_PKT_CONFIG2 0xaa0 -+#define PKTSCHED_PKT_CONFIG3 0xaa4 -+#define PKTSCHED_PKT_EN 0xaa8 -+#define PKTSCHED_DRMI_TX_EN BIT(17) -+#define PKTSCHED_AUDI_TX_EN BIT(15) -+#define PKTSCHED_AVI_TX_EN BIT(13) -+#define PKTSCHED_VSI_TX_EN BIT(12) -+#define PKTSCHED_EMP_CVTEM_TX_EN BIT(10) -+#define PKTSCHED_AMD_TX_EN BIT(8) -+#define PKTSCHED_GCP_TX_EN BIT(3) -+#define PKTSCHED_AUDS_TX_EN BIT(2) -+#define PKTSCHED_ACR_TX_EN BIT(1) -+#define PKTSCHED_NULL_TX_EN BIT(0) -+#define PKTSCHED_PKT_CONTROL0 0xaac -+#define PKTSCHED_PKT_SEND 0xab0 -+#define PKTSCHED_PKT_STATUS0 0xab4 -+#define PKTSCHED_PKT_STATUS1 0xab8 -+#define PKT_NULL_CONTENTS0 0xb00 -+#define PKT_NULL_CONTENTS1 0xb04 -+#define PKT_NULL_CONTENTS2 0xb08 -+#define PKT_NULL_CONTENTS3 0xb0c -+#define PKT_NULL_CONTENTS4 0xb10 -+#define PKT_NULL_CONTENTS5 0xb14 -+#define PKT_NULL_CONTENTS6 0xb18 -+#define PKT_NULL_CONTENTS7 0xb1c -+#define PKT_ACP_CONTENTS0 0xb20 -+#define PKT_ACP_CONTENTS1 0xb24 -+#define PKT_ACP_CONTENTS2 0xb28 -+#define PKT_ACP_CONTENTS3 0xb2c -+#define PKT_ACP_CONTENTS4 0xb30 -+#define PKT_ACP_CONTENTS5 0xb34 -+#define PKT_ACP_CONTENTS6 0xb38 -+#define PKT_ACP_CONTENTS7 0xb3c -+#define PKT_ISRC1_CONTENTS0 0xb40 -+#define PKT_ISRC1_CONTENTS1 0xb44 -+#define PKT_ISRC1_CONTENTS2 0xb48 -+#define PKT_ISRC1_CONTENTS3 0xb4c -+#define PKT_ISRC1_CONTENTS4 0xb50 -+#define PKT_ISRC1_CONTENTS5 0xb54 -+#define PKT_ISRC1_CONTENTS6 0xb58 -+#define PKT_ISRC1_CONTENTS7 0xb5c -+#define PKT_ISRC2_CONTENTS0 0xb60 -+#define PKT_ISRC2_CONTENTS1 0xb64 -+#define PKT_ISRC2_CONTENTS2 0xb68 -+#define PKT_ISRC2_CONTENTS3 0xb6c -+#define PKT_ISRC2_CONTENTS4 0xb70 -+#define PKT_ISRC2_CONTENTS5 0xb74 -+#define PKT_ISRC2_CONTENTS6 0xb78 -+#define PKT_ISRC2_CONTENTS7 0xb7c -+#define PKT_GMD_CONTENTS0 0xb80 -+#define PKT_GMD_CONTENTS1 0xb84 -+#define PKT_GMD_CONTENTS2 0xb88 -+#define PKT_GMD_CONTENTS3 0xb8c -+#define PKT_GMD_CONTENTS4 0xb90 -+#define PKT_GMD_CONTENTS5 0xb94 -+#define PKT_GMD_CONTENTS6 0xb98 -+#define PKT_GMD_CONTENTS7 0xb9c -+#define PKT_AMD_CONTENTS0 0xba0 -+#define PKT_AMD_CONTENTS1 0xba4 -+#define PKT_AMD_CONTENTS2 0xba8 -+#define PKT_AMD_CONTENTS3 0xbac -+#define PKT_AMD_CONTENTS4 0xbb0 -+#define PKT_AMD_CONTENTS5 0xbb4 -+#define PKT_AMD_CONTENTS6 0xbb8 -+#define PKT_AMD_CONTENTS7 0xbbc -+#define PKT_VSI_CONTENTS0 0xbc0 -+#define PKT_VSI_CONTENTS1 0xbc4 -+#define PKT_VSI_CONTENTS2 0xbc8 -+#define PKT_VSI_CONTENTS3 0xbcc -+#define PKT_VSI_CONTENTS4 0xbd0 -+#define PKT_VSI_CONTENTS5 0xbd4 -+#define PKT_VSI_CONTENTS6 0xbd8 -+#define PKT_VSI_CONTENTS7 0xbdc -+#define PKT_AVI_CONTENTS0 0xbe0 -+#define HDMI_FC_AVICONF0_ACTIVE_FMT_INFO_PRESENT BIT(4) -+#define HDMI_FC_AVICONF0_BAR_DATA_VERT_BAR 0x04 -+#define HDMI_FC_AVICONF0_BAR_DATA_HORIZ_BAR 0x08 -+#define HDMI_FC_AVICONF2_IT_CONTENT_VALID 0x80 -+#define PKT_AVI_CONTENTS1 0xbe4 -+#define PKT_AVI_CONTENTS2 0xbe8 -+#define PKT_AVI_CONTENTS3 0xbec -+#define PKT_AVI_CONTENTS4 0xbf0 -+#define PKT_AVI_CONTENTS5 0xbf4 -+#define PKT_AVI_CONTENTS6 0xbf8 -+#define PKT_AVI_CONTENTS7 0xbfc -+#define PKT_SPDI_CONTENTS0 0xc00 -+#define PKT_SPDI_CONTENTS1 0xc04 -+#define PKT_SPDI_CONTENTS2 0xc08 -+#define PKT_SPDI_CONTENTS3 0xc0c -+#define PKT_SPDI_CONTENTS4 0xc10 -+#define PKT_SPDI_CONTENTS5 0xc14 -+#define PKT_SPDI_CONTENTS6 0xc18 -+#define PKT_SPDI_CONTENTS7 0xc1c -+#define PKT_AUDI_CONTENTS0 0xc20 -+#define PKT_AUDI_CONTENTS1 0xc24 -+#define PKT_AUDI_CONTENTS2 0xc28 -+#define PKT_AUDI_CONTENTS3 0xc2c -+#define PKT_AUDI_CONTENTS4 0xc30 -+#define PKT_AUDI_CONTENTS5 0xc34 -+#define PKT_AUDI_CONTENTS6 0xc38 -+#define PKT_AUDI_CONTENTS7 0xc3c -+#define PKT_NVI_CONTENTS0 0xc40 -+#define PKT_NVI_CONTENTS1 0xc44 -+#define PKT_NVI_CONTENTS2 0xc48 -+#define PKT_NVI_CONTENTS3 0xc4c -+#define PKT_NVI_CONTENTS4 0xc50 -+#define PKT_NVI_CONTENTS5 0xc54 -+#define PKT_NVI_CONTENTS6 0xc58 -+#define PKT_NVI_CONTENTS7 0xc5c -+#define PKT_DRMI_CONTENTS0 0xc60 -+#define PKT_DRMI_CONTENTS1 0xc64 -+#define PKT_DRMI_CONTENTS2 0xc68 -+#define PKT_DRMI_CONTENTS3 0xc6c -+#define PKT_DRMI_CONTENTS4 0xc70 -+#define PKT_DRMI_CONTENTS5 0xc74 -+#define PKT_DRMI_CONTENTS6 0xc78 -+#define PKT_DRMI_CONTENTS7 0xc7c -+#define PKT_GHDMI1_CONTENTS0 0xc80 -+#define PKT_GHDMI1_CONTENTS1 0xc84 -+#define PKT_GHDMI1_CONTENTS2 0xc88 -+#define PKT_GHDMI1_CONTENTS3 0xc8c -+#define PKT_GHDMI1_CONTENTS4 0xc90 -+#define PKT_GHDMI1_CONTENTS5 0xc94 -+#define PKT_GHDMI1_CONTENTS6 0xc98 -+#define PKT_GHDMI1_CONTENTS7 0xc9c -+#define PKT_GHDMI2_CONTENTS0 0xca0 -+#define PKT_GHDMI2_CONTENTS1 0xca4 -+#define PKT_GHDMI2_CONTENTS2 0xca8 -+#define PKT_GHDMI2_CONTENTS3 0xcac -+#define PKT_GHDMI2_CONTENTS4 0xcb0 -+#define PKT_GHDMI2_CONTENTS5 0xcb4 -+#define PKT_GHDMI2_CONTENTS6 0xcb8 -+#define PKT_GHDMI2_CONTENTS7 0xcbc -+/* EMP Packetizer Registers */ -+#define PKT_EMP_CONFIG0 0xce0 -+#define PKT_EMP_CONTROL0 0xcec -+#define PKT_EMP_CONTROL1 0xcf0 -+#define PKT_EMP_CONTROL2 0xcf4 -+#define PKT_EMP_VTEM_CONTENTS0 0xd00 -+#define PKT_EMP_VTEM_CONTENTS1 0xd04 -+#define PKT_EMP_VTEM_CONTENTS2 0xd08 -+#define PKT_EMP_VTEM_CONTENTS3 0xd0c -+#define PKT_EMP_VTEM_CONTENTS4 0xd10 -+#define PKT_EMP_VTEM_CONTENTS5 0xd14 -+#define PKT_EMP_VTEM_CONTENTS6 0xd18 -+#define PKT_EMP_VTEM_CONTENTS7 0xd1c -+#define PKT0_EMP_CVTEM_CONTENTS0 0xd20 -+#define PKT0_EMP_CVTEM_CONTENTS1 0xd24 -+#define PKT0_EMP_CVTEM_CONTENTS2 0xd28 -+#define PKT0_EMP_CVTEM_CONTENTS3 0xd2c -+#define PKT0_EMP_CVTEM_CONTENTS4 0xd30 -+#define PKT0_EMP_CVTEM_CONTENTS5 0xd34 -+#define PKT0_EMP_CVTEM_CONTENTS6 0xd38 -+#define PKT0_EMP_CVTEM_CONTENTS7 0xd3c -+#define PKT1_EMP_CVTEM_CONTENTS0 0xd40 -+#define PKT1_EMP_CVTEM_CONTENTS1 0xd44 -+#define PKT1_EMP_CVTEM_CONTENTS2 0xd48 -+#define PKT1_EMP_CVTEM_CONTENTS3 0xd4c -+#define PKT1_EMP_CVTEM_CONTENTS4 0xd50 -+#define PKT1_EMP_CVTEM_CONTENTS5 0xd54 -+#define PKT1_EMP_CVTEM_CONTENTS6 0xd58 -+#define PKT1_EMP_CVTEM_CONTENTS7 0xd5c -+#define PKT2_EMP_CVTEM_CONTENTS0 0xd60 -+#define PKT2_EMP_CVTEM_CONTENTS1 0xd64 -+#define PKT2_EMP_CVTEM_CONTENTS2 0xd68 -+#define PKT2_EMP_CVTEM_CONTENTS3 0xd6c -+#define PKT2_EMP_CVTEM_CONTENTS4 0xd70 -+#define PKT2_EMP_CVTEM_CONTENTS5 0xd74 -+#define PKT2_EMP_CVTEM_CONTENTS6 0xd78 -+#define PKT2_EMP_CVTEM_CONTENTS7 0xd7c -+#define PKT3_EMP_CVTEM_CONTENTS0 0xd80 -+#define PKT3_EMP_CVTEM_CONTENTS1 0xd84 -+#define PKT3_EMP_CVTEM_CONTENTS2 0xd88 -+#define PKT3_EMP_CVTEM_CONTENTS3 0xd8c -+#define PKT3_EMP_CVTEM_CONTENTS4 0xd90 -+#define PKT3_EMP_CVTEM_CONTENTS5 0xd94 -+#define PKT3_EMP_CVTEM_CONTENTS6 0xd98 -+#define PKT3_EMP_CVTEM_CONTENTS7 0xd9c -+#define PKT4_EMP_CVTEM_CONTENTS0 0xda0 -+#define PKT4_EMP_CVTEM_CONTENTS1 0xda4 -+#define PKT4_EMP_CVTEM_CONTENTS2 0xda8 -+#define PKT4_EMP_CVTEM_CONTENTS3 0xdac -+#define PKT4_EMP_CVTEM_CONTENTS4 0xdb0 -+#define PKT4_EMP_CVTEM_CONTENTS5 0xdb4 -+#define PKT4_EMP_CVTEM_CONTENTS6 0xdb8 -+#define PKT4_EMP_CVTEM_CONTENTS7 0xdbc -+#define PKT5_EMP_CVTEM_CONTENTS0 0xdc0 -+#define PKT5_EMP_CVTEM_CONTENTS1 0xdc4 -+#define PKT5_EMP_CVTEM_CONTENTS2 0xdc8 -+#define PKT5_EMP_CVTEM_CONTENTS3 0xdcc -+#define PKT5_EMP_CVTEM_CONTENTS4 0xdd0 -+#define PKT5_EMP_CVTEM_CONTENTS5 0xdd4 -+#define PKT5_EMP_CVTEM_CONTENTS6 0xdd8 -+#define PKT5_EMP_CVTEM_CONTENTS7 0xddc -+/* Audio Packetizer Registers */ -+#define AUDPKT_CONTROL0 0xe20 -+#define AUDPKT_PBIT_FORCE_EN_MASK BIT(12) -+#define AUDPKT_PBIT_FORCE_EN BIT(12) -+#define AUDPKT_CHSTATUS_OVR_EN_MASK BIT(0) -+#define AUDPKT_CHSTATUS_OVR_EN BIT(0) -+#define AUDPKT_CONTROL1 0xe24 -+#define AUDPKT_ACR_CONTROL0 0xe40 -+#define AUDPKT_ACR_N_VALUE 0xfffff -+#define AUDPKT_ACR_CONTROL1 0xe44 -+#define AUDPKT_ACR_CTS_OVR_VAL_MSK GENMASK(23, 4) -+#define AUDPKT_ACR_CTS_OVR_VAL(x) ((x) << 4) -+#define AUDPKT_ACR_CTS_OVR_EN_MSK BIT(1) -+#define AUDPKT_ACR_CTS_OVR_EN BIT(1) -+#define AUDPKT_ACR_STATUS0 0xe4c -+#define AUDPKT_CHSTATUS_OVR0 0xe60 -+#define AUDPKT_CHSTATUS_OVR1 0xe64 -+/* IEC60958 Byte 3: Sampleing frenuency Bits 24 to 27 */ -+#define AUDPKT_CHSTATUS_SR_MASK GENMASK(3, 0) -+#define AUDPKT_CHSTATUS_SR_22050 0x4 -+#define AUDPKT_CHSTATUS_SR_24000 0x6 -+#define AUDPKT_CHSTATUS_SR_32000 0x3 -+#define AUDPKT_CHSTATUS_SR_44100 0x0 -+#define AUDPKT_CHSTATUS_SR_48000 0x2 -+#define AUDPKT_CHSTATUS_SR_88200 0x8 -+#define AUDPKT_CHSTATUS_SR_96000 0xa -+#define AUDPKT_CHSTATUS_SR_176400 0xc -+#define AUDPKT_CHSTATUS_SR_192000 0xe -+#define AUDPKT_CHSTATUS_SR_768000 0x9 -+#define AUDPKT_CHSTATUS_SR_NOT_INDICATED 0x1 -+/* IEC60958 Byte 4: Original Sampleing frenuency Bits 36 to 39 */ -+#define AUDPKT_CHSTATUS_0SR_MASK GENMASK(15, 12) -+#define AUDPKT_CHSTATUS_OSR_8000 0x6 -+#define AUDPKT_CHSTATUS_OSR_11025 0xa -+#define AUDPKT_CHSTATUS_OSR_12000 0x2 -+#define AUDPKT_CHSTATUS_OSR_16000 0x8 -+#define AUDPKT_CHSTATUS_OSR_22050 0xb -+#define AUDPKT_CHSTATUS_OSR_24000 0x9 -+#define AUDPKT_CHSTATUS_OSR_32000 0xc -+#define AUDPKT_CHSTATUS_OSR_44100 0xf -+#define AUDPKT_CHSTATUS_OSR_48000 0xd -+#define AUDPKT_CHSTATUS_OSR_88200 0x7 -+#define AUDPKT_CHSTATUS_OSR_96000 0x5 -+#define AUDPKT_CHSTATUS_OSR_176400 0x3 -+#define AUDPKT_CHSTATUS_OSR_192000 0x1 -+#define AUDPKT_CHSTATUS_OSR_NOT_INDICATED 0x0 -+#define AUDPKT_CHSTATUS_OVR2 0xe68 -+#define AUDPKT_CHSTATUS_OVR3 0xe6c -+#define AUDPKT_CHSTATUS_OVR4 0xe70 -+#define AUDPKT_CHSTATUS_OVR5 0xe74 -+#define AUDPKT_CHSTATUS_OVR6 0xe78 -+#define AUDPKT_CHSTATUS_OVR7 0xe7c -+#define AUDPKT_CHSTATUS_OVR8 0xe80 -+#define AUDPKT_CHSTATUS_OVR9 0xe84 -+#define AUDPKT_CHSTATUS_OVR10 0xe88 -+#define AUDPKT_CHSTATUS_OVR11 0xe8c -+#define AUDPKT_CHSTATUS_OVR12 0xe90 -+#define AUDPKT_CHSTATUS_OVR13 0xe94 -+#define AUDPKT_CHSTATUS_OVR14 0xe98 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC0 0xea0 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC1 0xea4 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC2 0xea8 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC3 0xeac -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC4 0xeb0 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC5 0xeb4 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC6 0xeb8 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC7 0xebc -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC8 0xec0 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC9 0xec4 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC10 0xec8 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC11 0xecc -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC12 0xed0 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC13 0xed4 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC14 0xed8 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC15 0xedc -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC16 0xee0 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC17 0xee4 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC18 0xee8 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC19 0xeec -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC20 0xef0 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC21 0xef4 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC22 0xef8 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC23 0xefc -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC24 0xf00 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC25 0xf04 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC26 0xf08 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC27 0xf0c -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC28 0xf10 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC29 0xf14 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC30 0xf18 -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC31 0xf1c -+#define AUDPKT_USRDATA_OVR_MSG_GENERIC32 0xf20 -+#define AUDPKT_VBIT_OVR0 0xf24 -+/* CEC Registers */ -+#define CEC_TX_CONTROL 0x1000 -+#define CEC_STATUS 0x1004 -+#define CEC_CONFIG 0x1008 -+#define CEC_ADDR 0x100c -+#define CEC_TX_COUNT 0x1020 -+#define CEC_TX_DATA3_0 0x1024 -+#define CEC_TX_DATA7_4 0x1028 -+#define CEC_TX_DATA11_8 0x102c -+#define CEC_TX_DATA15_12 0x1030 -+#define CEC_RX_COUNT_STATUS 0x1040 -+#define CEC_RX_DATA3_0 0x1044 -+#define CEC_RX_DATA7_4 0x1048 -+#define CEC_RX_DATA11_8 0x104c -+#define CEC_RX_DATA15_12 0x1050 -+#define CEC_LOCK_CONTROL 0x1054 -+#define CEC_RXQUAL_BITTIME_CONFIG 0x1060 -+#define CEC_RX_BITTIME_CONFIG 0x1064 -+#define CEC_TX_BITTIME_CONFIG 0x1068 -+/* eARC RX CMDC Registers */ -+#define EARCRX_CMDC_CONFIG0 0x1800 -+#define EARCRX_XACTREAD_STOP_CFG BIT(26) -+#define EARCRX_XACTREAD_RETRY_CFG BIT(25) -+#define EARCRX_CMDC_DSCVR_EARCVALID0_TO_DISC1 BIT(24) -+#define EARCRX_CMDC_XACT_RESTART_EN BIT(18) -+#define EARCRX_CMDC_CONFIG1 0x1804 -+#define EARCRX_CMDC_CONTROL 0x1808 -+#define EARCRX_CMDC_HEARTBEAT_LOSS_EN BIT(4) -+#define EARCRX_CMDC_DISCOVERY_EN BIT(3) -+#define EARCRX_CONNECTOR_HPD BIT(1) -+#define EARCRX_CMDC_WHITELIST0_CONFIG 0x180c -+#define EARCRX_CMDC_WHITELIST1_CONFIG 0x1810 -+#define EARCRX_CMDC_WHITELIST2_CONFIG 0x1814 -+#define EARCRX_CMDC_WHITELIST3_CONFIG 0x1818 -+#define EARCRX_CMDC_STATUS 0x181c -+#define EARCRX_CMDC_XACT_INFO 0x1820 -+#define EARCRX_CMDC_XACT_ACTION 0x1824 -+#define EARCRX_CMDC_HEARTBEAT_RXSTAT_SE 0x1828 -+#define EARCRX_CMDC_HEARTBEAT_STATUS 0x182c -+#define EARCRX_CMDC_XACT_WR0 0x1840 -+#define EARCRX_CMDC_XACT_WR1 0x1844 -+#define EARCRX_CMDC_XACT_WR2 0x1848 -+#define EARCRX_CMDC_XACT_WR3 0x184c -+#define EARCRX_CMDC_XACT_WR4 0x1850 -+#define EARCRX_CMDC_XACT_WR5 0x1854 -+#define EARCRX_CMDC_XACT_WR6 0x1858 -+#define EARCRX_CMDC_XACT_WR7 0x185c -+#define EARCRX_CMDC_XACT_WR8 0x1860 -+#define EARCRX_CMDC_XACT_WR9 0x1864 -+#define EARCRX_CMDC_XACT_WR10 0x1868 -+#define EARCRX_CMDC_XACT_WR11 0x186c -+#define EARCRX_CMDC_XACT_WR12 0x1870 -+#define EARCRX_CMDC_XACT_WR13 0x1874 -+#define EARCRX_CMDC_XACT_WR14 0x1878 -+#define EARCRX_CMDC_XACT_WR15 0x187c -+#define EARCRX_CMDC_XACT_WR16 0x1880 -+#define EARCRX_CMDC_XACT_WR17 0x1884 -+#define EARCRX_CMDC_XACT_WR18 0x1888 -+#define EARCRX_CMDC_XACT_WR19 0x188c -+#define EARCRX_CMDC_XACT_WR20 0x1890 -+#define EARCRX_CMDC_XACT_WR21 0x1894 -+#define EARCRX_CMDC_XACT_WR22 0x1898 -+#define EARCRX_CMDC_XACT_WR23 0x189c -+#define EARCRX_CMDC_XACT_WR24 0x18a0 -+#define EARCRX_CMDC_XACT_WR25 0x18a4 -+#define EARCRX_CMDC_XACT_WR26 0x18a8 -+#define EARCRX_CMDC_XACT_WR27 0x18ac -+#define EARCRX_CMDC_XACT_WR28 0x18b0 -+#define EARCRX_CMDC_XACT_WR29 0x18b4 -+#define EARCRX_CMDC_XACT_WR30 0x18b8 -+#define EARCRX_CMDC_XACT_WR31 0x18bc -+#define EARCRX_CMDC_XACT_WR32 0x18c0 -+#define EARCRX_CMDC_XACT_WR33 0x18c4 -+#define EARCRX_CMDC_XACT_WR34 0x18c8 -+#define EARCRX_CMDC_XACT_WR35 0x18cc -+#define EARCRX_CMDC_XACT_WR36 0x18d0 -+#define EARCRX_CMDC_XACT_WR37 0x18d4 -+#define EARCRX_CMDC_XACT_WR38 0x18d8 -+#define EARCRX_CMDC_XACT_WR39 0x18dc -+#define EARCRX_CMDC_XACT_WR40 0x18e0 -+#define EARCRX_CMDC_XACT_WR41 0x18e4 -+#define EARCRX_CMDC_XACT_WR42 0x18e8 -+#define EARCRX_CMDC_XACT_WR43 0x18ec -+#define EARCRX_CMDC_XACT_WR44 0x18f0 -+#define EARCRX_CMDC_XACT_WR45 0x18f4 -+#define EARCRX_CMDC_XACT_WR46 0x18f8 -+#define EARCRX_CMDC_XACT_WR47 0x18fc -+#define EARCRX_CMDC_XACT_WR48 0x1900 -+#define EARCRX_CMDC_XACT_WR49 0x1904 -+#define EARCRX_CMDC_XACT_WR50 0x1908 -+#define EARCRX_CMDC_XACT_WR51 0x190c -+#define EARCRX_CMDC_XACT_WR52 0x1910 -+#define EARCRX_CMDC_XACT_WR53 0x1914 -+#define EARCRX_CMDC_XACT_WR54 0x1918 -+#define EARCRX_CMDC_XACT_WR55 0x191c -+#define EARCRX_CMDC_XACT_WR56 0x1920 -+#define EARCRX_CMDC_XACT_WR57 0x1924 -+#define EARCRX_CMDC_XACT_WR58 0x1928 -+#define EARCRX_CMDC_XACT_WR59 0x192c -+#define EARCRX_CMDC_XACT_WR60 0x1930 -+#define EARCRX_CMDC_XACT_WR61 0x1934 -+#define EARCRX_CMDC_XACT_WR62 0x1938 -+#define EARCRX_CMDC_XACT_WR63 0x193c -+#define EARCRX_CMDC_XACT_WR64 0x1940 -+#define EARCRX_CMDC_XACT_RD0 0x1960 -+#define EARCRX_CMDC_XACT_RD1 0x1964 -+#define EARCRX_CMDC_XACT_RD2 0x1968 -+#define EARCRX_CMDC_XACT_RD3 0x196c -+#define EARCRX_CMDC_XACT_RD4 0x1970 -+#define EARCRX_CMDC_XACT_RD5 0x1974 -+#define EARCRX_CMDC_XACT_RD6 0x1978 -+#define EARCRX_CMDC_XACT_RD7 0x197c -+#define EARCRX_CMDC_XACT_RD8 0x1980 -+#define EARCRX_CMDC_XACT_RD9 0x1984 -+#define EARCRX_CMDC_XACT_RD10 0x1988 -+#define EARCRX_CMDC_XACT_RD11 0x198c -+#define EARCRX_CMDC_XACT_RD12 0x1990 -+#define EARCRX_CMDC_XACT_RD13 0x1994 -+#define EARCRX_CMDC_XACT_RD14 0x1998 -+#define EARCRX_CMDC_XACT_RD15 0x199c -+#define EARCRX_CMDC_XACT_RD16 0x19a0 -+#define EARCRX_CMDC_XACT_RD17 0x19a4 -+#define EARCRX_CMDC_XACT_RD18 0x19a8 -+#define EARCRX_CMDC_XACT_RD19 0x19ac -+#define EARCRX_CMDC_XACT_RD20 0x19b0 -+#define EARCRX_CMDC_XACT_RD21 0x19b4 -+#define EARCRX_CMDC_XACT_RD22 0x19b8 -+#define EARCRX_CMDC_XACT_RD23 0x19bc -+#define EARCRX_CMDC_XACT_RD24 0x19c0 -+#define EARCRX_CMDC_XACT_RD25 0x19c4 -+#define EARCRX_CMDC_XACT_RD26 0x19c8 -+#define EARCRX_CMDC_XACT_RD27 0x19cc -+#define EARCRX_CMDC_XACT_RD28 0x19d0 -+#define EARCRX_CMDC_XACT_RD29 0x19d4 -+#define EARCRX_CMDC_XACT_RD30 0x19d8 -+#define EARCRX_CMDC_XACT_RD31 0x19dc -+#define EARCRX_CMDC_XACT_RD32 0x19e0 -+#define EARCRX_CMDC_XACT_RD33 0x19e4 -+#define EARCRX_CMDC_XACT_RD34 0x19e8 -+#define EARCRX_CMDC_XACT_RD35 0x19ec -+#define EARCRX_CMDC_XACT_RD36 0x19f0 -+#define EARCRX_CMDC_XACT_RD37 0x19f4 -+#define EARCRX_CMDC_XACT_RD38 0x19f8 -+#define EARCRX_CMDC_XACT_RD39 0x19fc -+#define EARCRX_CMDC_XACT_RD40 0x1a00 -+#define EARCRX_CMDC_XACT_RD41 0x1a04 -+#define EARCRX_CMDC_XACT_RD42 0x1a08 -+#define EARCRX_CMDC_XACT_RD43 0x1a0c -+#define EARCRX_CMDC_XACT_RD44 0x1a10 -+#define EARCRX_CMDC_XACT_RD45 0x1a14 -+#define EARCRX_CMDC_XACT_RD46 0x1a18 -+#define EARCRX_CMDC_XACT_RD47 0x1a1c -+#define EARCRX_CMDC_XACT_RD48 0x1a20 -+#define EARCRX_CMDC_XACT_RD49 0x1a24 -+#define EARCRX_CMDC_XACT_RD50 0x1a28 -+#define EARCRX_CMDC_XACT_RD51 0x1a2c -+#define EARCRX_CMDC_XACT_RD52 0x1a30 -+#define EARCRX_CMDC_XACT_RD53 0x1a34 -+#define EARCRX_CMDC_XACT_RD54 0x1a38 -+#define EARCRX_CMDC_XACT_RD55 0x1a3c -+#define EARCRX_CMDC_XACT_RD56 0x1a40 -+#define EARCRX_CMDC_XACT_RD57 0x1a44 -+#define EARCRX_CMDC_XACT_RD58 0x1a48 -+#define EARCRX_CMDC_XACT_RD59 0x1a4c -+#define EARCRX_CMDC_XACT_RD60 0x1a50 -+#define EARCRX_CMDC_XACT_RD61 0x1a54 -+#define EARCRX_CMDC_XACT_RD62 0x1a58 -+#define EARCRX_CMDC_XACT_RD63 0x1a5c -+#define EARCRX_CMDC_XACT_RD64 0x1a60 -+#define EARCRX_CMDC_SYNC_CONFIG 0x1b00 -+/* eARC RX DMAC Registers */ -+#define EARCRX_DMAC_PHY_CONTROL 0x1c00 -+#define EARCRX_DMAC_CONFIG 0x1c08 -+#define EARCRX_DMAC_CONTROL0 0x1c0c -+#define EARCRX_DMAC_AUDIO_EN BIT(1) -+#define EARCRX_DMAC_EN BIT(0) -+#define EARCRX_DMAC_CONTROL1 0x1c10 -+#define EARCRX_DMAC_STATUS 0x1c14 -+#define EARCRX_DMAC_CHSTATUS0 0x1c18 -+#define EARCRX_DMAC_CHSTATUS1 0x1c1c -+#define EARCRX_DMAC_CHSTATUS2 0x1c20 -+#define EARCRX_DMAC_CHSTATUS3 0x1c24 -+#define EARCRX_DMAC_CHSTATUS4 0x1c28 -+#define EARCRX_DMAC_CHSTATUS5 0x1c2c -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC0 0x1c30 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC1 0x1c34 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC2 0x1c38 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC3 0x1c3c -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC4 0x1c40 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC5 0x1c44 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC6 0x1c48 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC7 0x1c4c -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC8 0x1c50 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC9 0x1c54 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC10 0x1c58 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC11 0x1c5c -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT0 0x1c60 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT1 0x1c64 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT2 0x1c68 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT3 0x1c6c -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT4 0x1c70 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT5 0x1c74 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT6 0x1c78 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT7 0x1c7c -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT8 0x1c80 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT9 0x1c84 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT10 0x1c88 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT11 0x1c8c -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT0 0x1c90 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT1 0x1c94 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT2 0x1c98 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT3 0x1c9c -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT4 0x1ca0 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT5 0x1ca4 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT6 0x1ca8 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT7 0x1cac -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT8 0x1cb0 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT9 0x1cb4 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT10 0x1cb8 -+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT11 0x1cbc -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC0 0x1cc0 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC1 0x1cc4 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC2 0x1cc8 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC3 0x1ccc -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC4 0x1cd0 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC5 0x1cd4 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC6 0x1cd8 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC7 0x1cdc -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC8 0x1ce0 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC9 0x1ce4 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC10 0x1ce8 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC11 0x1cec -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC12 0x1cf0 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC13 0x1cf4 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC14 0x1cf8 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC15 0x1cfc -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC16 0x1d00 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC17 0x1d04 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC18 0x1d08 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC19 0x1d0c -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC20 0x1d10 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC21 0x1d14 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC22 0x1d18 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC23 0x1d1c -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC24 0x1d20 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC25 0x1d24 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC26 0x1d28 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC27 0x1d2c -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC28 0x1d30 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC29 0x1d34 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC30 0x1d38 -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC31 0x1d3c -+#define EARCRX_DMAC_USRDATA_MSG_GENERIC32 0x1d40 -+#define EARCRX_DMAC_CHSTATUS_STREAMER0 0x1d44 -+#define EARCRX_DMAC_CHSTATUS_STREAMER1 0x1d48 -+#define EARCRX_DMAC_CHSTATUS_STREAMER2 0x1d4c -+#define EARCRX_DMAC_CHSTATUS_STREAMER3 0x1d50 -+#define EARCRX_DMAC_CHSTATUS_STREAMER4 0x1d54 -+#define EARCRX_DMAC_CHSTATUS_STREAMER5 0x1d58 -+#define EARCRX_DMAC_CHSTATUS_STREAMER6 0x1d5c -+#define EARCRX_DMAC_CHSTATUS_STREAMER7 0x1d60 -+#define EARCRX_DMAC_CHSTATUS_STREAMER8 0x1d64 -+#define EARCRX_DMAC_CHSTATUS_STREAMER9 0x1d68 -+#define EARCRX_DMAC_CHSTATUS_STREAMER10 0x1d6c -+#define EARCRX_DMAC_CHSTATUS_STREAMER11 0x1d70 -+#define EARCRX_DMAC_CHSTATUS_STREAMER12 0x1d74 -+#define EARCRX_DMAC_CHSTATUS_STREAMER13 0x1d78 -+#define EARCRX_DMAC_CHSTATUS_STREAMER14 0x1d7c -+#define EARCRX_DMAC_USRDATA_STREAMER0 0x1d80 -+/* Main Unit Interrupt Registers */ -+#define MAIN_INTVEC_INDEX 0x3000 -+#define MAINUNIT_0_INT_STATUS 0x3010 -+#define MAINUNIT_0_INT_MASK_N 0x3014 -+#define MAINUNIT_0_INT_CLEAR 0x3018 -+#define MAINUNIT_0_INT_FORCE 0x301c -+#define MAINUNIT_1_INT_STATUS 0x3020 -+#define FLT_EXIT_TO_LTSL_IRQ BIT(22) -+#define FLT_EXIT_TO_LTS4_IRQ BIT(21) -+#define FLT_EXIT_TO_LTSP_IRQ BIT(20) -+#define SCDC_NACK_RCVD_IRQ BIT(12) -+#define SCDC_RR_REPLY_STOP_IRQ BIT(11) -+#define SCDC_UPD_FLAGS_CLR_IRQ BIT(10) -+#define SCDC_UPD_FLAGS_CHG_IRQ BIT(9) -+#define SCDC_UPD_FLAGS_RD_IRQ BIT(8) -+#define I2CM_NACK_RCVD_IRQ BIT(2) -+#define I2CM_READ_REQUEST_IRQ BIT(1) -+#define I2CM_OP_DONE_IRQ BIT(0) -+#define MAINUNIT_1_INT_MASK_N 0x3024 -+#define I2CM_NACK_RCVD_MASK_N BIT(2) -+#define I2CM_READ_REQUEST_MASK_N BIT(1) -+#define I2CM_OP_DONE_MASK_N BIT(0) -+#define MAINUNIT_1_INT_CLEAR 0x3028 -+#define I2CM_NACK_RCVD_CLEAR BIT(2) -+#define I2CM_READ_REQUEST_CLEAR BIT(1) -+#define I2CM_OP_DONE_CLEAR BIT(0) -+#define MAINUNIT_1_INT_FORCE 0x302c -+/* AVPUNIT Interrupt Registers */ -+#define AVP_INTVEC_INDEX 0x3800 -+#define AVP_0_INT_STATUS 0x3810 -+#define AVP_0_INT_MASK_N 0x3814 -+#define AVP_0_INT_CLEAR 0x3818 -+#define AVP_0_INT_FORCE 0x381c -+#define AVP_1_INT_STATUS 0x3820 -+#define AVP_1_INT_MASK_N 0x3824 -+#define HDCP14_AUTH_CHG_MASK_N BIT(6) -+#define HDCP14_KSV_LIST_DONE_MASK_N BIT(1) -+#define AVP_1_INT_CLEAR 0x3828 -+#define AVP_1_INT_FORCE 0x382c -+#define AVP_2_INT_STATUS 0x3830 -+#define AVP_2_INT_MASK_N 0x3834 -+#define AVP_2_INT_CLEAR 0x3838 -+#define AVP_2_INT_FORCE 0x383c -+#define AVP_3_INT_STATUS 0x3840 -+#define AVP_3_INT_MASK_N 0x3844 -+#define AVP_3_INT_CLEAR 0x3848 -+#define AVP_3_INT_FORCE 0x384c -+#define HDCP2_ESM_P0_GPIO_OUT_2_CHG_IRQ BIT(17) -+#define AVP_4_INT_STATUS 0x3850 -+#define AVP_4_INT_MASK_N 0x3854 -+#define AVP_4_INT_CLEAR 0x3858 -+#define AVP_4_INT_FORCE 0x385c -+#define AVP_5_INT_STATUS 0x3860 -+#define AVP_5_INT_MASK_N 0x3864 -+#define AVP_5_INT_CLEAR 0x3868 -+#define AVP_5_INT_FORCE 0x386c -+#define AVP_6_INT_STATUS 0x3870 -+#define AVP_6_INT_MASK_N 0x3874 -+#define AVP_6_INT_CLEAR 0x3878 -+#define AVP_6_INT_FORCE 0x387c -+/* CEC Interrupt Registers */ -+#define CEC_INT_STATUS 0x4000 -+#define CEC_INT_MASK_N 0x4004 -+#define CEC_INT_CLEAR 0x4008 -+#define CEC_INT_FORCE 0x400c -+/* eARC RX Interrupt Registers */ -+#define EARCRX_INTVEC_INDEX 0x4800 -+#define EARCRX_0_INT_STATUS 0x4810 -+#define EARCRX_CMDC_DISCOVERY_TIMEOUT_IRQ BIT(9) -+#define EARCRX_CMDC_DISCOVERY_DONE_IRQ BIT(8) -+#define EARCRX_0_INT_MASK_N 0x4814 -+#define EARCRX_0_INT_CLEAR 0x4818 -+#define EARCRX_0_INT_FORCE 0x481c -+#define EARCRX_1_INT_STATUS 0x4820 -+#define EARCRX_1_INT_MASK_N 0x4824 -+#define EARCRX_1_INT_CLEAR 0x4828 -+#define EARCRX_1_INT_FORCE 0x482c -+ -+#define HDMI_HDCP14_MEM_KSV0 0x4f08 -+#define HDMI_HDCP14_MEM_BSTATUS0 0x5958 -+#define HDMI_HDCP14_MEM_M0_1 0x5960 -+#define HDMI_HDCP14_MEM_M0_7 0x597c -+ -+#endif /* __DW_HDMI_QP_H__ */ -diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c -index 6c1d79474..70d35c683 100644 ---- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c -+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c -@@ -9,16 +9,19 @@ - #include - #include - #include -+#include -+#include - #include - #include - #include - #include - #include --#include -+#include - #include - #include - #include - #include -+#include - - #include - -@@ -37,18 +40,94 @@ - - #include "dw-hdmi-audio.h" - #include "dw-hdmi-cec.h" -+#include "dw-hdmi-hdcp.h" - #include "dw-hdmi.h" - - #define DDC_CI_ADDR 0x37 - #define DDC_SEGMENT_ADDR 0x30 - - #define HDMI_EDID_LEN 512 -+#define HDMI_EDID_BLOCK_LEN 128 - - /* DW-HDMI Controller >= 0x200a are at least compliant with SCDC version 1 */ - #define SCDC_MIN_SOURCE_VERSION 0x1 - - #define HDMI14_MAX_TMDSCLK 340000000 - -+static const unsigned int dw_hdmi_cable[] = { -+ EXTCON_DISP_HDMI, -+ EXTCON_NONE, -+}; -+ -+enum hdmi_datamap { -+ RGB444_8B = 0x01, -+ RGB444_10B = 0x03, -+ RGB444_12B = 0x05, -+ RGB444_16B = 0x07, -+ YCbCr444_8B = 0x09, -+ YCbCr444_10B = 0x0B, -+ YCbCr444_12B = 0x0D, -+ YCbCr444_16B = 0x0F, -+ YCbCr422_8B = 0x16, -+ YCbCr422_10B = 0x14, -+ YCbCr422_12B = 0x12, -+}; -+ -+/* -+ * Unless otherwise noted, entries in this table are 100% optimization. -+ * Values can be obtained from hdmi_compute_n() but that function is -+ * slow so we pre-compute values we expect to see. -+ * -+ * All 32k and 48k values are expected to be the same (due to the way -+ * the math works) for any rate that's an exact kHz. -+ */ -+static const struct dw_hdmi_audio_tmds_n common_tmds_n_table[] = { -+ { .tmds = 25175000, .n_32k = 4096, .n_44k1 = 12854, .n_48k = 6144, }, -+ { .tmds = 25200000, .n_32k = 4096, .n_44k1 = 5656, .n_48k = 6144, }, -+ { .tmds = 27000000, .n_32k = 4096, .n_44k1 = 5488, .n_48k = 6144, }, -+ { .tmds = 28320000, .n_32k = 4096, .n_44k1 = 5586, .n_48k = 6144, }, -+ { .tmds = 30240000, .n_32k = 4096, .n_44k1 = 5642, .n_48k = 6144, }, -+ { .tmds = 31500000, .n_32k = 4096, .n_44k1 = 5600, .n_48k = 6144, }, -+ { .tmds = 32000000, .n_32k = 4096, .n_44k1 = 5733, .n_48k = 6144, }, -+ { .tmds = 33750000, .n_32k = 4096, .n_44k1 = 6272, .n_48k = 6144, }, -+ { .tmds = 36000000, .n_32k = 4096, .n_44k1 = 5684, .n_48k = 6144, }, -+ { .tmds = 40000000, .n_32k = 4096, .n_44k1 = 5733, .n_48k = 6144, }, -+ { .tmds = 49500000, .n_32k = 4096, .n_44k1 = 5488, .n_48k = 6144, }, -+ { .tmds = 50000000, .n_32k = 4096, .n_44k1 = 5292, .n_48k = 6144, }, -+ { .tmds = 54000000, .n_32k = 4096, .n_44k1 = 5684, .n_48k = 6144, }, -+ { .tmds = 65000000, .n_32k = 4096, .n_44k1 = 7056, .n_48k = 6144, }, -+ { .tmds = 68250000, .n_32k = 4096, .n_44k1 = 5376, .n_48k = 6144, }, -+ { .tmds = 71000000, .n_32k = 4096, .n_44k1 = 7056, .n_48k = 6144, }, -+ { .tmds = 72000000, .n_32k = 4096, .n_44k1 = 5635, .n_48k = 6144, }, -+ { .tmds = 73250000, .n_32k = 4096, .n_44k1 = 14112, .n_48k = 6144, }, -+ { .tmds = 74250000, .n_32k = 4096, .n_44k1 = 6272, .n_48k = 6144, }, -+ { .tmds = 75000000, .n_32k = 4096, .n_44k1 = 5880, .n_48k = 6144, }, -+ { .tmds = 78750000, .n_32k = 4096, .n_44k1 = 5600, .n_48k = 6144, }, -+ { .tmds = 78800000, .n_32k = 4096, .n_44k1 = 5292, .n_48k = 6144, }, -+ { .tmds = 79500000, .n_32k = 4096, .n_44k1 = 4704, .n_48k = 6144, }, -+ { .tmds = 83500000, .n_32k = 4096, .n_44k1 = 7056, .n_48k = 6144, }, -+ { .tmds = 85500000, .n_32k = 4096, .n_44k1 = 5488, .n_48k = 6144, }, -+ { .tmds = 88750000, .n_32k = 4096, .n_44k1 = 14112, .n_48k = 6144, }, -+ { .tmds = 97750000, .n_32k = 4096, .n_44k1 = 14112, .n_48k = 6144, }, -+ { .tmds = 101000000, .n_32k = 4096, .n_44k1 = 7056, .n_48k = 6144, }, -+ { .tmds = 106500000, .n_32k = 4096, .n_44k1 = 4704, .n_48k = 6144, }, -+ { .tmds = 108000000, .n_32k = 4096, .n_44k1 = 5684, .n_48k = 6144, }, -+ { .tmds = 115500000, .n_32k = 4096, .n_44k1 = 5712, .n_48k = 6144, }, -+ { .tmds = 119000000, .n_32k = 4096, .n_44k1 = 5544, .n_48k = 6144, }, -+ { .tmds = 135000000, .n_32k = 4096, .n_44k1 = 5488, .n_48k = 6144, }, -+ { .tmds = 146250000, .n_32k = 4096, .n_44k1 = 6272, .n_48k = 6144, }, -+ { .tmds = 148500000, .n_32k = 4096, .n_44k1 = 5488, .n_48k = 6144, }, -+ { .tmds = 154000000, .n_32k = 4096, .n_44k1 = 5544, .n_48k = 6144, }, -+ { .tmds = 162000000, .n_32k = 4096, .n_44k1 = 5684, .n_48k = 6144, }, -+ -+ /* For 297 MHz+ HDMI spec have some other rule for setting N */ -+ { .tmds = 297000000, .n_32k = 3073, .n_44k1 = 4704, .n_48k = 5120, }, -+ { .tmds = 594000000, .n_32k = 3073, .n_44k1 = 9408, .n_48k = 10240, }, -+ -+ /* End of table */ -+ { .tmds = 0, .n_32k = 0, .n_44k1 = 0, .n_48k = 0, }, -+}; -+ - static const u16 csc_coeff_default[3][4] = { - { 0x2000, 0x0000, 0x0000, 0x0000 }, - { 0x0000, 0x2000, 0x0000, 0x0000 }, -@@ -85,12 +164,47 @@ static const u16 csc_coeff_rgb_full_to_rgb_limited[3][4] = { - { 0x0000, 0x0000, 0x1b7c, 0x0020 } - }; - -+static const struct drm_display_mode dw_hdmi_default_modes[] = { -+ /* 4 - 1280x720@60Hz 16:9 */ -+ { DRM_MODE("1280x720", DRM_MODE_TYPE_DRIVER, 74250, 1280, 1390, -+ 1430, 1650, 0, 720, 725, 730, 750, 0, -+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC), -+ .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, }, -+ /* 16 - 1920x1080@60Hz 16:9 */ -+ { DRM_MODE("1920x1080", DRM_MODE_TYPE_DRIVER, 148500, 1920, 2008, -+ 2052, 2200, 0, 1080, 1084, 1089, 1125, 0, -+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC), -+ .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, }, -+ /* 31 - 1920x1080@50Hz 16:9 */ -+ { DRM_MODE("1920x1080", DRM_MODE_TYPE_DRIVER, 148500, 1920, 2448, -+ 2492, 2640, 0, 1080, 1084, 1089, 1125, 0, -+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC), -+ .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, }, -+ /* 19 - 1280x720@50Hz 16:9 */ -+ { DRM_MODE("1280x720", DRM_MODE_TYPE_DRIVER, 74250, 1280, 1720, -+ 1760, 1980, 0, 720, 725, 730, 750, 0, -+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC), -+ .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, }, -+ /* 17 - 720x576@50Hz 4:3 */ -+ { DRM_MODE("720x576", DRM_MODE_TYPE_DRIVER, 27000, 720, 732, -+ 796, 864, 0, 576, 581, 586, 625, 0, -+ DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC), -+ .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, }, -+ /* 2 - 720x480@60Hz 4:3 */ -+ { DRM_MODE("720x480", DRM_MODE_TYPE_DRIVER, 27000, 720, 736, -+ 798, 858, 0, 480, 489, 495, 525, 0, -+ DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC), -+ .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, }, -+}; -+ - struct hdmi_vmode { - bool mdataenablepolarity; - -+ unsigned int previous_pixelclock; - unsigned int mpixelclock; - unsigned int mpixelrepetitioninput; - unsigned int mpixelrepetitionoutput; -+ unsigned int previous_tmdsclock; - unsigned int mtmdsclock; - }; - -@@ -99,8 +213,8 @@ struct hdmi_data_info { - unsigned int enc_out_bus_format; - unsigned int enc_in_encoding; - unsigned int enc_out_encoding; -+ unsigned int quant_range; - unsigned int pix_repet_factor; -- unsigned int hdcp_enable; - struct hdmi_vmode video_mode; - bool rgb_limited_range; - }; -@@ -115,6 +229,9 @@ struct dw_hdmi_i2c { - u8 slave_reg; - bool is_regaddr; - bool is_segment; -+ -+ unsigned int scl_high_ns; -+ unsigned int scl_low_ns; - }; - - struct dw_hdmi_phy_data { -@@ -131,6 +248,7 @@ struct dw_hdmi { - struct drm_connector connector; - struct drm_bridge bridge; - struct drm_bridge *next_bridge; -+ struct platform_device *hdcp_dev; - - unsigned int version; - -@@ -144,8 +262,11 @@ struct dw_hdmi { - - struct hdmi_data_info hdmi_data; - const struct dw_hdmi_plat_data *plat_data; -+ const struct dw_hdmi_cec_wake_ops *cec_ops; -+ struct dw_hdcp *hdcp; - - int vic; -+ int irq; - - u8 edid[HDMI_EDID_LEN]; - -@@ -162,6 +283,13 @@ struct dw_hdmi { - void __iomem *regs; - bool sink_is_hdmi; - bool sink_has_audio; -+ bool hpd_state; -+ bool support_hdmi; -+ bool force_logo; -+ int force_output; -+ -+ struct delayed_work work; -+ struct workqueue_struct *workqueue; - - struct pinctrl *pinctrl; - struct pinctrl_state *default_state; -@@ -178,6 +306,7 @@ struct dw_hdmi { - - spinlock_t audio_lock; - struct mutex audio_mutex; -+ struct dentry *debugfs_dir; - unsigned int sample_non_pcm; - unsigned int sample_width; - unsigned int sample_rate; -@@ -185,6 +314,9 @@ struct dw_hdmi { - unsigned int audio_cts; - unsigned int audio_n; - bool audio_enable; -+ bool scramble_low_rates; -+ -+ struct extcon_dev *extcon; - - unsigned int reg_shift; - struct regmap *regm; -@@ -193,10 +325,15 @@ struct dw_hdmi { - - struct mutex cec_notifier_mutex; - struct cec_notifier *cec_notifier; -+ struct cec_adapter *cec_adap; - - hdmi_codec_plugged_cb plugged_cb; - struct device *codec_dev; - enum drm_connector_status last_connector_result; -+ bool initialized; /* hdmi is enabled before bind */ -+ bool logo_plug_out; /* hdmi is plug out when kernel logo */ -+ bool update; -+ bool hdr2sdr; /* from hdr to sdr */ - }; + return 0; + } - #define HDMI_IH_PHY_STAT0_RX_SENSE \ -@@ -254,6 +391,129 @@ static void hdmi_mask_writeb(struct dw_hdmi *hdmi, u8 data, unsigned int reg, - hdmi_modb(hdmi, data << shift, mask, reg); +@@ -1681,13 +2141,116 @@ static ssize_t analogix_dpaux_transfer(struct drm_dp_aux *aux, + return ret; } -+static bool dw_hdmi_check_output_type_changed(struct dw_hdmi *hdmi) ++int analogix_dp_audio_hw_params(struct analogix_dp_device *dp, ++ struct hdmi_codec_daifmt *daifmt, ++ struct hdmi_codec_params *params) +{ -+ bool sink_hdmi; -+ -+ sink_hdmi = hdmi->sink_is_hdmi; -+ -+ if (hdmi->force_output == 1) -+ hdmi->sink_is_hdmi = true; -+ else if (hdmi->force_output == 2) -+ hdmi->sink_is_hdmi = false; -+ else -+ hdmi->sink_is_hdmi = hdmi->support_hdmi; -+ -+ if (sink_hdmi != hdmi->sink_is_hdmi) -+ return true; ++ switch (daifmt->fmt) { ++ case HDMI_SPDIF: ++ analogix_dp_audio_config_spdif(dp); ++ break; ++ case HDMI_I2S: ++ analogix_dp_audio_config_i2s(dp); ++ break; ++ default: ++ DRM_DEV_ERROR(dp->dev, "invalid daifmt %d\n", daifmt->fmt); ++ return -EINVAL; ++ } + -+ return false; ++ return 0; +} ++EXPORT_SYMBOL_GPL(analogix_dp_audio_hw_params); + -+static void repo_hpd_event(struct work_struct *p_work) ++void analogix_dp_audio_shutdown(struct analogix_dp_device *dp) +{ -+ struct dw_hdmi *hdmi = container_of(p_work, struct dw_hdmi, work.work); -+ enum drm_connector_status status = hdmi->hpd_state ? -+ connector_status_connected : connector_status_disconnected; -+ u8 phy_stat = hdmi_readb(hdmi, HDMI_PHY_STAT0); ++ analogix_dp_audio_disable(dp); ++} ++EXPORT_SYMBOL_GPL(analogix_dp_audio_shutdown); + -+ mutex_lock(&hdmi->mutex); -+ if (!(phy_stat & HDMI_PHY_RX_SENSE)) -+ hdmi->rxsense = false; -+ if (phy_stat & HDMI_PHY_HPD) -+ hdmi->rxsense = true; -+ mutex_unlock(&hdmi->mutex); ++int analogix_dp_audio_startup(struct analogix_dp_device *dp) ++{ ++ analogix_dp_audio_enable(dp); + -+ if (hdmi->bridge.dev) { -+ bool change; -+ void *data = hdmi->plat_data->phy_data; ++ return 0; ++} ++EXPORT_SYMBOL_GPL(analogix_dp_audio_startup); + -+ change = drm_helper_hpd_irq_event(hdmi->bridge.dev); ++int analogix_dp_audio_get_eld(struct analogix_dp_device *dp, u8 *buf, size_t len) ++{ ++ memcpy(buf, dp->connector.eld, min(sizeof(dp->connector.eld), len)); + -+ if (change) { -+ if (hdmi->plat_data->set_ddc_io) -+ hdmi->plat_data->set_ddc_io(data, hdmi->hpd_state); -+ if (hdmi->cec_adap->devnode.registered) -+ cec_queue_pin_hpd_event(hdmi->cec_adap, -+ hdmi->hpd_state, -+ ktime_get()); -+ } -+ drm_bridge_hpd_notify(&hdmi->bridge, status); -+ } ++ return 0; +} ++EXPORT_SYMBOL_GPL(analogix_dp_audio_get_eld); + -+static bool check_hdmi_irq(struct dw_hdmi *hdmi, int intr_stat, -+ int phy_int_pol) ++static void analogix_dp_link_train_restore(struct analogix_dp_device *dp) +{ -+ int msecs; ++ u32 link_rate, lane_count; ++ u8 lane, spread; + -+ /* To determine whether interrupt type is HPD */ -+ if (!(intr_stat & HDMI_IH_PHY_STAT0_HPD)) -+ return false; ++ analogix_dp_get_link_bandwidth(dp, &link_rate); ++ analogix_dp_get_lane_count(dp, &lane_count); ++ drm_dp_dpcd_readb(&dp->aux, DP_MAX_DOWNSPREAD, &spread); + -+ if (phy_int_pol & HDMI_PHY_HPD) { -+ dev_dbg(hdmi->dev, "dw hdmi plug in\n"); -+ msecs = 150; -+ hdmi->hpd_state = true; -+ } else { -+ dev_dbg(hdmi->dev, "dw hdmi plug out\n"); -+ msecs = 20; -+ hdmi->hpd_state = false; -+ } -+ mod_delayed_work(hdmi->workqueue, &hdmi->work, msecs_to_jiffies(msecs)); ++ dp->link_train.link_rate = link_rate; ++ dp->link_train.lane_count = lane_count; ++ dp->link_train.enhanced_framing = analogix_dp_get_enhanced_mode(dp); ++ dp->link_train.ssc = !!(spread & DP_MAX_DOWNSPREAD_0_5); + -+ return true; ++ for (lane = 0; lane < 4; lane++) ++ dp->link_train.training_lane[lane] = ++ analogix_dp_get_lane_link_training(dp, lane); +} + -+static void init_hpd_work(struct dw_hdmi *hdmi) ++int analogix_dp_loader_protect(struct analogix_dp_device *dp) +{ -+ hdmi->workqueue = create_workqueue("hpd_queue"); -+ INIT_DELAYED_WORK(&hdmi->work, repo_hpd_event); -+} ++ u8 link_status[DP_LINK_STATUS_SIZE]; ++ int ret; + -+static void dw_hdmi_i2c_set_divs(struct dw_hdmi *hdmi) -+{ -+ unsigned long clk_rate_khz; -+ unsigned long low_ns, high_ns; -+ unsigned long div_low, div_high; ++ ret = analogix_dp_phy_power_on(dp); ++ if (ret) ++ return ret; + -+ /* Standard-mode */ -+ if (hdmi->i2c->scl_high_ns < 4000) -+ high_ns = 4708; -+ else -+ high_ns = hdmi->i2c->scl_high_ns; ++ dp->dpms_mode = DRM_MODE_DPMS_ON; + -+ if (hdmi->i2c->scl_low_ns < 4700) -+ low_ns = 4916; -+ else -+ low_ns = hdmi->i2c->scl_low_ns; ++ analogix_dp_link_train_restore(dp); + -+ /* Adjust to avoid overflow */ -+ clk_rate_khz = DIV_ROUND_UP(clk_get_rate(hdmi->isfr_clk), 1000); ++ ret = analogix_dp_fast_link_train_detection(dp); ++ if (ret) ++ goto err_disable; + -+ div_low = (clk_rate_khz * low_ns) / 1000000; -+ if ((clk_rate_khz * low_ns) % 1000000) -+ div_low++; ++ if (analogix_dp_detect_sink_psr(dp)) { ++ ret = analogix_dp_enable_sink_psr(dp); ++ if (ret) ++ goto err_disable; ++ } + -+ div_high = (clk_rate_khz * high_ns) / 1000000; -+ if ((clk_rate_khz * high_ns) % 1000000) -+ div_high++; ++ ret = drm_dp_dpcd_read_link_status(&dp->aux, link_status); ++ if (ret < 0) { ++ dev_err(dp->dev, "Failed to read link status\n"); ++ goto err_disable; ++ } + -+ /* Maximum divider supported by hw is 0xffff */ -+ if (div_low > 0xffff) -+ div_low = 0xffff; ++ if (!drm_dp_channel_eq_ok(link_status, dp->link_train.lane_count)) { ++ dev_err(dp->dev, "Channel EQ or CR not ok\n"); ++ ret = -EINVAL; ++ goto err_disable; ++ } + -+ if (div_high > 0xffff) -+ div_high = 0xffff; ++ return 0; + -+ hdmi_writeb(hdmi, div_high & 0xff, HDMI_I2CM_SS_SCL_HCNT_0_ADDR); -+ hdmi_writeb(hdmi, (div_high >> 8) & 0xff, -+ HDMI_I2CM_SS_SCL_HCNT_1_ADDR); -+ hdmi_writeb(hdmi, div_low & 0xff, HDMI_I2CM_SS_SCL_LCNT_0_ADDR); -+ hdmi_writeb(hdmi, (div_low >> 8) & 0xff, -+ HDMI_I2CM_SS_SCL_LCNT_1_ADDR); ++err_disable: ++ analogix_dp_disable(dp); ++ return ret; +} ++EXPORT_SYMBOL_GPL(analogix_dp_loader_protect); + - static void dw_hdmi_i2c_init(struct dw_hdmi *hdmi) + struct analogix_dp_device * + analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) { - hdmi_writeb(hdmi, HDMI_PHY_I2CM_INT_ADDR_DONE_POL, -@@ -267,7 +527,8 @@ static void dw_hdmi_i2c_init(struct dw_hdmi *hdmi) - hdmi_writeb(hdmi, 0x00, HDMI_I2CM_SOFTRSTZ); + struct platform_device *pdev = to_platform_device(dev); + struct analogix_dp_device *dp; + struct resource *res; +- unsigned int irq_flags; + int ret; - /* Set Standard Mode speed (determined to be 100KHz on iMX6) */ -- hdmi_writeb(hdmi, 0x00, HDMI_I2CM_DIV); -+ hdmi_modb(hdmi, HDMI_I2CM_DIV_STD_MODE, -+ HDMI_I2CM_DIV_FAST_STD_MODE, HDMI_I2CM_DIV); + if (!plat_data) { +@@ -1701,9 +2264,10 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) - /* Set done, not acknowledged and arbitration interrupt polarities */ - hdmi_writeb(hdmi, HDMI_I2CM_INT_DONE_POL, HDMI_I2CM_INT); -@@ -281,6 +542,13 @@ static void dw_hdmi_i2c_init(struct dw_hdmi *hdmi) - /* Mute DONE and ERROR interrupts */ - hdmi_writeb(hdmi, HDMI_IH_I2CM_STAT0_ERROR | HDMI_IH_I2CM_STAT0_DONE, - HDMI_IH_MUTE_I2CM_STAT0); -+ -+ /* Only configure when we use the internal I2C controller */ -+ if (hdmi->i2c) { -+ /* set SDA high level holding time */ -+ hdmi_writeb(hdmi, 0x48, HDMI_I2CM_SDA_HOLD); -+ dw_hdmi_i2c_set_divs(hdmi); -+ } - } + dp->dev = &pdev->dev; + dp->dpms_mode = DRM_MODE_DPMS_OFF; ++ INIT_WORK(&dp->modeset_retry_work, analogix_dp_modeset_retry_work_fn); - static bool dw_hdmi_i2c_unwedge(struct dw_hdmi *hdmi) -@@ -358,7 +626,8 @@ static int dw_hdmi_i2c_read(struct dw_hdmi *hdmi, - unsigned char *buf, unsigned int length) - { - struct dw_hdmi_i2c *i2c = hdmi->i2c; -- int ret; -+ int ret, retry, i; -+ bool read_edid = false; + mutex_init(&dp->panel_lock); +- dp->panel_is_modeset = false; ++ dp->panel_is_prepared = false; - if (!i2c->is_regaddr) { - dev_dbg(hdmi->dev, "set read register address to 0\n"); -@@ -366,23 +635,76 @@ static int dw_hdmi_i2c_read(struct dw_hdmi *hdmi, - i2c->is_regaddr = true; + /* + * platform dp driver need containor_of the plat_data to get +@@ -1732,21 +2296,19 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) + } } -- while (length--) { -- reinit_completion(&i2c->cmp); -+ /* edid reads are in 128 bytes. scdc reads are in 1 byte */ -+ if (length == HDMI_EDID_BLOCK_LEN) -+ read_edid = true; +- dp->clock = devm_clk_get(&pdev->dev, "dp"); +- if (IS_ERR(dp->clock)) { +- dev_err(&pdev->dev, "failed to get clock\n"); +- return ERR_CAST(dp->clock); ++ ret = devm_clk_bulk_get_all(dev, &dp->clks); ++ if (ret < 0) { ++ dev_err(dev, "failed to get clocks %d\n", ret); ++ return ERR_PTR(ret); + } -- hdmi_writeb(hdmi, i2c->slave_reg++, HDMI_I2CM_ADDRESS); -- if (i2c->is_segment) -- hdmi_writeb(hdmi, HDMI_I2CM_OPERATION_READ_EXT, -- HDMI_I2CM_OPERATION); -- else -- hdmi_writeb(hdmi, HDMI_I2CM_OPERATION_READ, -- HDMI_I2CM_OPERATION); -+ while (length > 0) { -+ retry = 100; -+ hdmi_writeb(hdmi, i2c->slave_reg, HDMI_I2CM_ADDRESS); +- clk_prepare_enable(dp->clock); ++ dp->nr_clks = ret; -- ret = dw_hdmi_i2c_wait(hdmi); -- if (ret) -- return ret; -+ if (read_edid) { -+ i2c->slave_reg += 8; -+ length -= 8; -+ } else { -+ i2c->slave_reg++; -+ length--; -+ } -+ -+ while (retry > 0) { -+ if (!(hdmi_readb(hdmi, HDMI_PHY_STAT0) & HDMI_PHY_HPD)) { -+ dev_dbg(hdmi->dev, "hdmi disconnect, stop ddc read\n"); -+ return -EPERM; -+ } -+ -+ if (i2c->is_segment) { -+ if (read_edid) -+ hdmi_writeb(hdmi, HDMI_I2CM_OPERATION_READ8_EXT, -+ HDMI_I2CM_OPERATION); -+ else -+ hdmi_writeb(hdmi, HDMI_I2CM_OPERATION_READ_EXT, -+ HDMI_I2CM_OPERATION); -+ } else { -+ if (read_edid) -+ hdmi_writeb(hdmi, HDMI_I2CM_OPERATION_READ8, -+ HDMI_I2CM_OPERATION); -+ else -+ hdmi_writeb(hdmi, HDMI_I2CM_OPERATION_READ, -+ HDMI_I2CM_OPERATION); -+ } -+ -+ ret = dw_hdmi_i2c_wait(hdmi); -+ if (ret == -EAGAIN) { -+ dev_dbg(hdmi->dev, "ddc read time out\n"); -+ hdmi_writeb(hdmi, 0, HDMI_I2CM_SOFTRSTZ); -+ hdmi_writeb(hdmi, HDMI_I2CM_OPERATION_BUS_CLEAR, -+ HDMI_I2CM_OPERATION); -+ retry -= 10; -+ continue; -+ } else if (ret == -EIO) { -+ dev_dbg(hdmi->dev, "ddc read err\n"); -+ hdmi_writeb(hdmi, 0, HDMI_I2CM_SOFTRSTZ); -+ hdmi_writeb(hdmi, HDMI_I2CM_OPERATION_BUS_CLEAR, -+ HDMI_I2CM_OPERATION); -+ retry--; -+ usleep_range(10000, 11000); -+ continue; -+ } -+ /* read success */ -+ break; -+ } -+ if (retry <= 0) { -+ dev_err(hdmi->dev, "ddc read failed\n"); -+ return -EIO; -+ } + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); -- *buf++ = hdmi_readb(hdmi, HDMI_I2CM_DATAI); -+ if (read_edid) -+ for (i = 0; i < 8; i++) -+ *buf++ = hdmi_readb(hdmi, HDMI_I2CM_READ_BUFF0 + i); -+ else -+ *buf++ = hdmi_readb(hdmi, HDMI_I2CM_DATAI); - } -+ - i2c->is_segment = false; + dp->reg_base = devm_ioremap_resource(&pdev->dev, res); +- if (IS_ERR(dp->reg_base)) { +- ret = PTR_ERR(dp->reg_base); +- goto err_disable_clk; +- } ++ if (IS_ERR(dp->reg_base)) ++ return ERR_CAST(dp->reg_base); - return 0; -@@ -392,7 +714,7 @@ static int dw_hdmi_i2c_write(struct dw_hdmi *hdmi, - unsigned char *buf, unsigned int length) - { - struct dw_hdmi_i2c *i2c = hdmi->i2c; -- int ret; -+ int ret, retry; + dp->force_hpd = of_property_read_bool(dev->of_node, "force-hpd"); - if (!i2c->is_regaddr) { - /* Use the first write byte as register address */ -@@ -403,16 +725,47 @@ static int dw_hdmi_i2c_write(struct dw_hdmi *hdmi, +@@ -1758,46 +2320,55 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) + if (IS_ERR(dp->hpd_gpiod)) { + dev_err(dev, "error getting HDP GPIO: %ld\n", + PTR_ERR(dp->hpd_gpiod)); +- ret = PTR_ERR(dp->hpd_gpiod); +- goto err_disable_clk; ++ return ERR_CAST(dp->hpd_gpiod); } - while (length--) { -- reinit_completion(&i2c->cmp); -+ retry = 100; - - hdmi_writeb(hdmi, *buf++, HDMI_I2CM_DATAO); - hdmi_writeb(hdmi, i2c->slave_reg++, HDMI_I2CM_ADDRESS); -- hdmi_writeb(hdmi, HDMI_I2CM_OPERATION_WRITE, -- HDMI_I2CM_OPERATION); - -- ret = dw_hdmi_i2c_wait(hdmi); -- if (ret) -- return ret; -+ while (retry > 0) { -+ if (!(hdmi_readb(hdmi, HDMI_PHY_STAT0) & HDMI_PHY_HPD)) { -+ dev_dbg(hdmi->dev, "hdmi disconnect, stop ddc write\n"); -+ return -EPERM; -+ } -+ -+ reinit_completion(&i2c->cmp); -+ hdmi_writeb(hdmi, HDMI_I2CM_OPERATION_WRITE, -+ HDMI_I2CM_OPERATION); -+ -+ ret = dw_hdmi_i2c_wait(hdmi); -+ if (ret == -EAGAIN) { -+ dev_dbg(hdmi->dev, "ddc write time out\n"); -+ hdmi_writeb(hdmi, 0, HDMI_I2CM_SOFTRSTZ); -+ hdmi_writeb(hdmi, HDMI_I2CM_OPERATION_BUS_CLEAR, -+ HDMI_I2CM_OPERATION); -+ retry -= 10; -+ continue; -+ } else if (ret == -EIO) { -+ dev_dbg(hdmi->dev, "ddc write err\n"); -+ hdmi_writeb(hdmi, 0, HDMI_I2CM_SOFTRSTZ); -+ hdmi_writeb(hdmi, HDMI_I2CM_OPERATION_BUS_CLEAR, -+ HDMI_I2CM_OPERATION); -+ retry--; -+ usleep_range(10000, 11000); -+ continue; -+ } -+ -+ /* write success */ -+ break; -+ } -+ -+ if (retry <= 0) { -+ dev_err(hdmi->dev, "ddc write failed\n"); -+ return -EIO; + if (dp->hpd_gpiod) { +- /* +- * Set up the hotplug GPIO from the device tree as an interrupt. +- * Simply specifying a different interrupt in the device tree +- * doesn't work since we handle hotplug rather differently when +- * using a GPIO. We also need the actual GPIO specifier so +- * that we can get the current state of the GPIO. +- */ +- dp->irq = gpiod_to_irq(dp->hpd_gpiod); +- irq_flags = IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING; +- } else { +- dp->irq = platform_get_irq(pdev, 0); +- irq_flags = 0; ++ ret = devm_request_threaded_irq(dev, ++ gpiod_to_irq(dp->hpd_gpiod), ++ NULL, ++ analogix_dp_hpd_irq_handler, ++ IRQF_TRIGGER_RISING | ++ IRQF_TRIGGER_FALLING | ++ IRQF_ONESHOT, ++ "analogix-hpd", dp); ++ if (ret) { ++ dev_err(dev, "failed to request hpd IRQ: %d\n", ret); ++ return ERR_PTR(ret); + } } - return 0; -@@ -448,10 +801,15 @@ static int dw_hdmi_i2c_xfer(struct i2c_adapter *adap, ++ dp->irq = platform_get_irq(pdev, 0); + if (dp->irq == -ENXIO) { + dev_err(&pdev->dev, "failed to get irq\n"); +- ret = -ENODEV; +- goto err_disable_clk; ++ return ERR_PTR(-ENODEV); + } - mutex_lock(&i2c->lock); +- ret = devm_request_threaded_irq(&pdev->dev, dp->irq, +- analogix_dp_hardirq, ++ irq_set_status_flags(dp->irq, IRQ_NOAUTOEN); ++ ret = devm_request_threaded_irq(dev, dp->irq, NULL, + analogix_dp_irq_thread, +- irq_flags, "analogix-dp", dp); ++ IRQF_ONESHOT, dev_name(dev), dp); + if (ret) { + dev_err(&pdev->dev, "failed to request irq\n"); +- goto err_disable_clk; ++ return ERR_PTR(ret); + } +- disable_irq(dp->irq); -+ hdmi_writeb(hdmi, 0, HDMI_I2CM_SOFTRSTZ); -+ udelay(100); +- return dp; ++ dp->extcon = devm_extcon_dev_allocate(dev, analogix_dp_cable); ++ if (IS_ERR(dp->extcon)) { ++ dev_err(dev, "failed to allocate extcon device\n"); ++ return ERR_CAST(dp->extcon); ++ } + - /* Unmute DONE and ERROR interrupts */ - hdmi_writeb(hdmi, 0x00, HDMI_IH_MUTE_I2CM_STAT0); - - /* Set slave device address taken from the first I2C message */ -+ if (addr == DDC_SEGMENT_ADDR && msgs[0].len == 1) -+ addr = DDC_ADDR; - hdmi_writeb(hdmi, addr, HDMI_I2CM_SLAVE); - - /* Set slave device register address on transfer */ -@@ -519,7 +877,7 @@ static struct i2c_adapter *dw_hdmi_i2c_adapter(struct dw_hdmi *hdmi) - adap->owner = THIS_MODULE; - adap->dev.parent = hdmi->dev; - adap->algo = &dw_hdmi_algorithm; -- strscpy(adap->name, "DesignWare HDMI", sizeof(adap->name)); -+ strlcpy(adap->name, "DesignWare HDMI", sizeof(adap->name)); - i2c_set_adapdata(adap, hdmi); ++ ret = devm_extcon_dev_register(dev, dp->extcon); ++ if (ret) { ++ dev_err(dev, "failed to register extcon device\n"); ++ return ERR_PTR(ret); ++ } ++ ++ dp->bridge.driver_private = dp; ++ dp->bridge.funcs = &analogix_dp_bridge_funcs; - ret = i2c_add_adapter(adap); -@@ -561,66 +919,117 @@ static void hdmi_set_cts_n(struct dw_hdmi *hdmi, unsigned int cts, - hdmi_writeb(hdmi, n & 0xff, HDMI_AUD_N1); +-err_disable_clk: +- clk_disable_unprepare(dp->clock); +- return ERR_PTR(ret); ++ return dp; } + EXPORT_SYMBOL_GPL(analogix_dp_probe); --static unsigned int hdmi_compute_n(unsigned int freq, unsigned long pixel_clk) -+static int hdmi_match_tmds_n_table(struct dw_hdmi *hdmi, -+ unsigned long pixel_clk, -+ unsigned long freq) - { -- unsigned int n = (128 * freq) / 1000; -- unsigned int mult = 1; -+ const struct dw_hdmi_plat_data *plat_data = hdmi->plat_data; -+ const struct dw_hdmi_audio_tmds_n *tmds_n = NULL; -+ int i; -+ -+ if (plat_data->tmds_n_table) { -+ for (i = 0; plat_data->tmds_n_table[i].tmds != 0; i++) { -+ if (pixel_clk == plat_data->tmds_n_table[i].tmds) { -+ tmds_n = &plat_data->tmds_n_table[i]; -+ break; -+ } -+ } -+ } +@@ -1820,16 +2391,21 @@ int analogix_dp_bind(struct analogix_dp_device *dp, struct drm_device *drm_dev) + pm_runtime_use_autosuspend(dp->dev); + pm_runtime_set_autosuspend_delay(dp->dev, 100); + pm_runtime_enable(dp->dev); ++ pm_runtime_get_sync(dp->dev); ++ analogix_dp_init(dp); -- while (freq > 48000) { -- mult *= 2; -- freq /= 2; -+ if (tmds_n == NULL) { -+ for (i = 0; common_tmds_n_table[i].tmds != 0; i++) { -+ if (pixel_clk == common_tmds_n_table[i].tmds) { -+ tmds_n = &common_tmds_n_table[i]; -+ break; -+ } -+ } +- ret = analogix_dp_create_bridge(drm_dev, dp); ++ ret = analogix_dp_bridge_init(dp); + if (ret) { +- DRM_ERROR("failed to create bridge (%d)\n", ret); ++ DRM_ERROR("failed to init bridge (%d)\n", ret); + goto err_disable_pm_runtime; } -+ if (tmds_n == NULL) -+ return -ENOENT; ++ enable_irq(dp->irq); + - switch (freq) { - case 32000: -- if (pixel_clk == 25175000) -- n = 4576; -- else if (pixel_clk == 27027000) -- n = 4096; -- else if (pixel_clk == 74176000 || pixel_clk == 148352000) -- n = 11648; -- else if (pixel_clk == 297000000) -- n = 3072; -- else -- n = 4096; -- n *= mult; -- break; -- -+ return tmds_n->n_32k; - case 44100: -- if (pixel_clk == 25175000) -- n = 7007; -- else if (pixel_clk == 74176000) -- n = 17836; -- else if (pixel_clk == 148352000) -- n = 8918; -- else if (pixel_clk == 297000000) -- n = 4704; -- else -- n = 6272; -- n *= mult; -- break; + return 0; + + err_disable_pm_runtime: ++ pm_runtime_put(dp->dev); + pm_runtime_dont_use_autosuspend(dp->dev); + pm_runtime_disable(dp->dev); + drm_dp_aux_unregister(&dp->aux); +@@ -1840,15 +2416,11 @@ EXPORT_SYMBOL_GPL(analogix_dp_bind); + + void analogix_dp_unbind(struct analogix_dp_device *dp) + { +- analogix_dp_bridge_disable(dp->bridge); +- dp->connector.funcs->destroy(&dp->connector); - -+ case 88200: -+ case 176400: -+ return (freq / 44100) * tmds_n->n_44k1; - case 48000: -- if (pixel_clk == 25175000) -- n = 6864; -- else if (pixel_clk == 27027000) -- n = 6144; -- else if (pixel_clk == 74176000) -- n = 11648; -- else if (pixel_clk == 148352000) -- n = 5824; -- else if (pixel_clk == 297000000) -- n = 5120; -- else -- n = 6144; -- n *= mult; -- break; +- if (dp->plat_data->panel) { +- if (drm_panel_unprepare(dp->plat_data->panel)) +- DRM_ERROR("failed to turnoff the panel\n"); +- } - -+ case 96000: -+ case 192000: -+ return (freq / 48000) * tmds_n->n_48k; - default: -- break; -+ return -ENOENT; -+ } -+} -+ -+static u64 hdmi_audio_math_diff(unsigned int freq, unsigned int n, -+ unsigned int pixel_clk) -+{ -+ u64 final, diff; -+ u64 cts; -+ -+ final = (u64)pixel_clk * n; -+ -+ cts = final; -+ do_div(cts, 128 * freq); -+ -+ diff = final - (u64)cts * (128 * freq); -+ -+ return diff; -+} -+ -+static unsigned int hdmi_compute_n(struct dw_hdmi *hdmi, -+ unsigned long pixel_clk, -+ unsigned long freq) -+{ -+ unsigned int min_n = DIV_ROUND_UP((128 * freq), 1500); -+ unsigned int max_n = (128 * freq) / 300; -+ unsigned int ideal_n = (128 * freq) / 1000; -+ unsigned int best_n_distance = ideal_n; -+ unsigned int best_n = 0; -+ u64 best_diff = U64_MAX; -+ int n; -+ -+ /* If the ideal N could satisfy the audio math, then just take it */ -+ if (hdmi_audio_math_diff(freq, ideal_n, pixel_clk) == 0) -+ return ideal_n; -+ -+ for (n = min_n; n <= max_n; n++) { -+ u64 diff = hdmi_audio_math_diff(freq, n, pixel_clk); -+ -+ if (diff < best_diff || (diff == best_diff && -+ abs(n - ideal_n) < best_n_distance)) { -+ best_n = n; -+ best_diff = diff; -+ best_n_distance = abs(best_n - ideal_n); -+ } -+ -+ /* -+ * The best N already satisfy the audio math, and also be -+ * the closest value to ideal N, so just cut the loop. -+ */ -+ if ((best_diff == 0) && (abs(n - ideal_n) > best_n_distance)) -+ break; - } - -- return n; -+ return best_n; -+} -+ -+static unsigned int hdmi_find_n(struct dw_hdmi *hdmi, unsigned long pixel_clk, -+ unsigned long sample_rate) -+{ -+ int n; -+ -+ n = hdmi_match_tmds_n_table(hdmi, pixel_clk, sample_rate); -+ if (n > 0) -+ return n; -+ -+ dev_warn(hdmi->dev, "Rate %lu missing; compute N dynamically\n", -+ pixel_clk); -+ -+ return hdmi_compute_n(hdmi, pixel_clk, sample_rate); ++ disable_irq(dp->irq); ++ if (dp->connector.funcs->destroy) ++ dp->connector.funcs->destroy(&dp->connector); + drm_dp_aux_unregister(&dp->aux); ++ pm_runtime_put(dp->dev); + pm_runtime_dont_use_autosuspend(dp->dev); + pm_runtime_disable(dp->dev); } +@@ -1856,32 +2428,40 @@ EXPORT_SYMBOL_GPL(analogix_dp_unbind); - /* -@@ -651,7 +1060,7 @@ static void hdmi_set_clk_regenerator(struct dw_hdmi *hdmi, - u8 config3; - u64 tmp; - -- n = hdmi_compute_n(sample_rate, pixel_clk); -+ n = hdmi_find_n(hdmi, pixel_clk, sample_rate); - - config3 = hdmi_readb(hdmi, HDMI_CONFIG3_ID); - -@@ -842,10 +1251,10 @@ static void dw_hdmi_gp_audio_enable(struct dw_hdmi *hdmi) - - if (pdata->enable_audio) - pdata->enable_audio(hdmi, -- hdmi->channels, -- hdmi->sample_width, -- hdmi->sample_rate, -- hdmi->sample_non_pcm); -+ hdmi->channels, -+ hdmi->sample_width, -+ hdmi->sample_rate, -+ hdmi->sample_non_pcm); + void analogix_dp_remove(struct analogix_dp_device *dp) + { +- clk_disable_unprepare(dp->clock); ++ cancel_work_sync(&dp->modeset_retry_work); } + EXPORT_SYMBOL_GPL(analogix_dp_remove); - static void dw_hdmi_gp_audio_disable(struct dw_hdmi *hdmi) -@@ -1110,6 +1519,15 @@ static bool is_csc_needed(struct dw_hdmi *hdmi) - is_color_space_interpolation(hdmi); +-#ifdef CONFIG_PM + int analogix_dp_suspend(struct analogix_dp_device *dp) + { +- clk_disable_unprepare(dp->clock); ++ pm_runtime_force_suspend(dp->dev); ++ + return 0; } + EXPORT_SYMBOL_GPL(analogix_dp_suspend); -+static bool is_rgb_full_to_limited_needed(struct dw_hdmi *hdmi) + int analogix_dp_resume(struct analogix_dp_device *dp) + { +- int ret; +- +- ret = clk_prepare_enable(dp->clock); +- if (ret < 0) { +- DRM_ERROR("Failed to prepare_enable the clock clk [%d]\n", ret); +- return ret; +- } ++ pm_runtime_force_resume(dp->dev); ++ analogix_dp_init(dp); + + return 0; + } + EXPORT_SYMBOL_GPL(analogix_dp_resume); +-#endif ++ ++int analogix_dp_runtime_suspend(struct analogix_dp_device *dp) +{ -+ if (hdmi->hdmi_data.quant_range == HDMI_QUANTIZATION_RANGE_LIMITED || -+ (!hdmi->hdmi_data.quant_range && hdmi->hdmi_data.rgb_limited_range)) -+ return true; ++ clk_bulk_disable_unprepare(dp->nr_clks, dp->clks); + -+ return false; ++ return 0; +} ++EXPORT_SYMBOL_GPL(analogix_dp_runtime_suspend); + - static void dw_hdmi_update_csc_coeffs(struct dw_hdmi *hdmi) - { - const u16 (*csc_coeff)[3][4] = &csc_coeff_default; -@@ -1132,7 +1550,7 @@ static void dw_hdmi_update_csc_coeffs(struct dw_hdmi *hdmi) - csc_coeff = &csc_coeff_rgb_in_eitu709; - csc_scale = 0; - } else if (is_input_rgb && is_output_rgb && -- hdmi->hdmi_data.rgb_limited_range) { -+ is_rgb_full_to_limited_needed(hdmi)) { - csc_coeff = &csc_coeff_rgb_full_to_rgb_limited; - } - -@@ -1164,7 +1582,7 @@ static void hdmi_video_csc(struct dw_hdmi *hdmi) - if (is_color_space_interpolation(hdmi)) - interpolation = HDMI_CSC_CFG_INTMODE_CHROMA_INT_FORMULA1; - else if (is_color_space_decimation(hdmi)) -- decimation = HDMI_CSC_CFG_DECMODE_CHROMA_INT_FORMULA3; -+ decimation = HDMI_CSC_CFG_DECMODE_CHROMA_INT_FORMULA1; ++int analogix_dp_runtime_resume(struct analogix_dp_device *dp) ++{ ++ return clk_bulk_prepare_enable(dp->nr_clks, dp->clks); ++} ++EXPORT_SYMBOL_GPL(analogix_dp_runtime_resume); - switch (hdmi_bus_fmt_color_depth(hdmi->hdmi_data.enc_out_bus_format)) { - case 8: -@@ -1213,7 +1631,7 @@ static void hdmi_video_packetize(struct dw_hdmi *hdmi) - switch (hdmi_bus_fmt_color_depth( - hdmi->hdmi_data.enc_out_bus_format)) { - case 8: -- color_depth = 4; -+ color_depth = 0; - output_select = HDMI_VP_CONF_OUTPUT_SELECTOR_BYPASS; - clear_gcp_auto = 1; - break; -@@ -1253,11 +1671,8 @@ static void hdmi_video_packetize(struct dw_hdmi *hdmi) - } + int analogix_dp_start_crc(struct drm_connector *connector) + { +diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h +index 433f2d7ef..87c9cefbc 100644 +--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h ++++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h +@@ -11,6 +11,7 @@ - /* set the packetizer registers */ -- val = ((color_depth << HDMI_VP_PR_CD_COLOR_DEPTH_OFFSET) & -- HDMI_VP_PR_CD_COLOR_DEPTH_MASK) | -- ((hdmi_data->pix_repet_factor << -- HDMI_VP_PR_CD_DESIRED_PR_FACTOR_OFFSET) & -- HDMI_VP_PR_CD_DESIRED_PR_FACTOR_MASK); -+ val = (color_depth << HDMI_VP_PR_CD_COLOR_DEPTH_OFFSET) & -+ HDMI_VP_PR_CD_COLOR_DEPTH_MASK; - hdmi_writeb(hdmi, val, HDMI_VP_PR_CD); + #include + #include ++#include - /* HDMI1.4b specification section 6.5.3: -@@ -1277,7 +1692,7 @@ static void hdmi_video_packetize(struct dw_hdmi *hdmi) - HDMI_VP_STUFF_PR_STUFFING_MASK, HDMI_VP_STUFF); + #define DP_TIMEOUT_LOOP_COUNT 100 + #define MAX_CR_LOOP 5 +@@ -69,6 +70,9 @@ enum pattern_set { + D10_2, + TRAINING_PTN1, + TRAINING_PTN2, ++ TRAINING_PTN3, ++ TEST_PATTERN_80BIT, ++ TEST_PATTERN_HBR2, + DP_NONE + }; - /* Data from pixel repeater block */ -- if (hdmi_data->pix_repet_factor > 1) { -+ if (hdmi_data->pix_repet_factor > 0) { - vp_conf = HDMI_VP_CONF_PR_EN_ENABLE | - HDMI_VP_CONF_BYPASS_SELECT_PIX_REPEATER; - } else { /* data from packetizer block */ -@@ -1289,9 +1704,7 @@ static void hdmi_video_packetize(struct dw_hdmi *hdmi) - HDMI_VP_CONF_PR_EN_MASK | - HDMI_VP_CONF_BYPASS_SELECT_MASK, HDMI_VP_CONF); +@@ -120,15 +124,9 @@ enum analog_power_block { + POWER_ALL + }; -- hdmi_modb(hdmi, 1 << HDMI_VP_STUFF_IDEFAULT_PHASE_OFFSET, -- HDMI_VP_STUFF_IDEFAULT_PHASE_MASK, HDMI_VP_STUFF); +-enum dp_irq_type { +- DP_IRQ_TYPE_HP_CABLE_IN = BIT(0), +- DP_IRQ_TYPE_HP_CABLE_OUT = BIT(1), +- DP_IRQ_TYPE_HP_CHANGE = BIT(2), +- DP_IRQ_TYPE_UNKNOWN = BIT(3), +-}; - -+ hdmi_modb(hdmi, 0, HDMI_VP_STUFF_IDEFAULT_PHASE_MASK, HDMI_VP_STUFF); - hdmi_writeb(hdmi, remap_size, HDMI_VP_REMAP); + struct video_info { + char *name; ++ struct drm_display_mode mode; - if (output_select == HDMI_VP_CONF_OUTPUT_SELECTOR_PP) { -@@ -1391,6 +1804,23 @@ static bool dw_hdmi_support_scdc(struct dw_hdmi *hdmi, - return true; - } + bool h_sync_polarity; + bool v_sync_polarity; +@@ -141,6 +139,10 @@ struct video_info { -+static int hdmi_phy_i2c_read(struct dw_hdmi *hdmi, unsigned char addr) -+{ -+ int val; -+ -+ hdmi_writeb(hdmi, 0xFF, HDMI_IH_I2CMPHY_STAT0); -+ hdmi_writeb(hdmi, addr, HDMI_PHY_I2CM_ADDRESS_ADDR); -+ hdmi_writeb(hdmi, 0, HDMI_PHY_I2CM_DATAI_1_ADDR); -+ hdmi_writeb(hdmi, 0, HDMI_PHY_I2CM_DATAI_0_ADDR); -+ hdmi_writeb(hdmi, HDMI_PHY_I2CM_OPERATION_ADDR_READ, -+ HDMI_PHY_I2CM_OPERATION_ADDR); -+ hdmi_phy_wait_i2c_done(hdmi, 1000); -+ val = hdmi_readb(hdmi, HDMI_PHY_I2CM_DATAI_1_ADDR); -+ val = (val & 0xff) << 8; -+ val += hdmi_readb(hdmi, HDMI_PHY_I2CM_DATAI_0_ADDR) & 0xff; -+ return val; -+} -+ - /* - * HDMI2.0 Specifies the following procedure for High TMDS Bit Rates: - * - The Source shall suspend transmission of the TMDS clock and data -@@ -1576,6 +2006,13 @@ static int hdmi_phy_configure_dwc_hdmi_3d_tx(struct dw_hdmi *hdmi, - const struct dw_hdmi_mpll_config *mpll_config = pdata->mpll_cfg; - const struct dw_hdmi_curr_ctrl *curr_ctrl = pdata->cur_ctr; - const struct dw_hdmi_phy_config *phy_config = pdata->phy_config; -+ unsigned int tmdsclock = hdmi->hdmi_data.video_mode.mtmdsclock; -+ unsigned int depth = -+ hdmi_bus_fmt_color_depth(hdmi->hdmi_data.enc_out_bus_format); + int max_link_rate; + enum link_lane_count_type max_lane_count; ++ u32 lane_map[4]; + -+ if (hdmi_bus_fmt_is_yuv420(hdmi->hdmi_data.enc_out_bus_format) && -+ pdata->mpll_cfg_420) -+ mpll_config = pdata->mpll_cfg_420; ++ bool video_bist_enable; ++ bool force_stream_valid; + }; - /* TOFIX Will need 420 specific PHY configuration tables */ + struct link_train { +@@ -150,18 +152,29 @@ struct link_train { + u8 link_rate; + u8 lane_count; + u8 training_lane[4]; ++ bool ssc; ++ bool enhanced_framing; -@@ -1585,11 +2022,11 @@ static int hdmi_phy_configure_dwc_hdmi_3d_tx(struct dw_hdmi *hdmi, - break; + enum link_training_state lt_state; + }; - for (; curr_ctrl->mpixelclock != ~0UL; curr_ctrl++) -- if (mpixelclock <= curr_ctrl->mpixelclock) -+ if (tmdsclock <= curr_ctrl->mpixelclock) - break; ++struct analogix_dp_compliance { ++ struct drm_dp_phy_test_params phytest; ++ int test_link_rate; ++ u8 test_lane_count; ++ unsigned long test_type; ++ bool test_active; ++}; ++ + struct analogix_dp_device { + struct drm_encoder *encoder; + struct device *dev; + struct drm_device *drm_dev; + struct drm_connector connector; +- struct drm_bridge *bridge; ++ struct drm_bridge bridge; + struct drm_dp_aux aux; +- struct clk *clock; ++ struct clk_bulk_data *clks; ++ int nr_clks; + unsigned int irq; + void __iomem *reg_base; - for (; phy_config->mpixelclock != ~0UL; phy_config++) -- if (mpixelclock <= phy_config->mpixelclock) -+ if (tmdsclock <= phy_config->mpixelclock) - break; +@@ -173,17 +186,22 @@ struct analogix_dp_device { + bool force_hpd; + bool fast_train_enable; + bool psr_supported; ++ struct work_struct modeset_retry_work; - if (mpll_config->mpixelclock == ~0UL || -@@ -1597,11 +2034,18 @@ static int hdmi_phy_configure_dwc_hdmi_3d_tx(struct dw_hdmi *hdmi, - phy_config->mpixelclock == ~0UL) - return -EINVAL; + struct mutex panel_lock; +- bool panel_is_modeset; ++ bool panel_is_prepared; -- dw_hdmi_phy_i2c_write(hdmi, mpll_config->res[0].cpce, -+ if (!hdmi_bus_fmt_is_yuv422(hdmi->hdmi_data.enc_out_bus_format)) -+ depth = fls(depth - 8); -+ else -+ depth = 0; -+ if (depth) -+ depth--; ++ u8 dpcd[DP_RECEIVER_CAP_SIZE]; + struct analogix_dp_plat_data *plat_data; ++ struct extcon_dev *extcon; ++ struct analogix_dp_compliance compliance; + -+ dw_hdmi_phy_i2c_write(hdmi, mpll_config->res[depth].cpce, - HDMI_3D_TX_PHY_CPCE_CTRL); -- dw_hdmi_phy_i2c_write(hdmi, mpll_config->res[0].gmp, -+ dw_hdmi_phy_i2c_write(hdmi, mpll_config->res[depth].gmp, - HDMI_3D_TX_PHY_GMPCTRL); -- dw_hdmi_phy_i2c_write(hdmi, curr_ctrl->curr[0], -+ dw_hdmi_phy_i2c_write(hdmi, curr_ctrl->curr[depth], - HDMI_3D_TX_PHY_CURRCTRL); ++ u32 split_area; + }; - dw_hdmi_phy_i2c_write(hdmi, 0, HDMI_3D_TX_PHY_PLLPHBYCTRL); -@@ -1614,10 +2058,6 @@ static int hdmi_phy_configure_dwc_hdmi_3d_tx(struct dw_hdmi *hdmi, - dw_hdmi_phy_i2c_write(hdmi, phy_config->vlev_ctr, - HDMI_3D_TX_PHY_VLEVCTRL); + /* analogix_dp_reg.c */ + void analogix_dp_enable_video_mute(struct analogix_dp_device *dp, bool enable); + void analogix_dp_stop_video(struct analogix_dp_device *dp); +-void analogix_dp_lane_swap(struct analogix_dp_device *dp, bool enable); + void analogix_dp_init_analog_param(struct analogix_dp_device *dp); + void analogix_dp_init_interrupt(struct analogix_dp_device *dp); + void analogix_dp_reset(struct analogix_dp_device *dp); +@@ -199,7 +217,6 @@ void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp, + int analogix_dp_init_analog_func(struct analogix_dp_device *dp); + void analogix_dp_init_hpd(struct analogix_dp_device *dp); + void analogix_dp_force_hpd(struct analogix_dp_device *dp); +-enum dp_irq_type analogix_dp_get_irq_type(struct analogix_dp_device *dp); + void analogix_dp_clear_hotplug_interrupts(struct analogix_dp_device *dp); + void analogix_dp_reset_aux(struct analogix_dp_device *dp); + void analogix_dp_init_aux(struct analogix_dp_device *dp); +@@ -211,28 +228,11 @@ void analogix_dp_set_lane_count(struct analogix_dp_device *dp, u32 count); + void analogix_dp_get_lane_count(struct analogix_dp_device *dp, u32 *count); + void analogix_dp_enable_enhanced_mode(struct analogix_dp_device *dp, + bool enable); ++bool analogix_dp_get_enhanced_mode(struct analogix_dp_device *dp); + void analogix_dp_set_training_pattern(struct analogix_dp_device *dp, + enum pattern_set pattern); +-void analogix_dp_set_lane0_pre_emphasis(struct analogix_dp_device *dp, +- u32 level); +-void analogix_dp_set_lane1_pre_emphasis(struct analogix_dp_device *dp, +- u32 level); +-void analogix_dp_set_lane2_pre_emphasis(struct analogix_dp_device *dp, +- u32 level); +-void analogix_dp_set_lane3_pre_emphasis(struct analogix_dp_device *dp, +- u32 level); +-void analogix_dp_set_lane0_link_training(struct analogix_dp_device *dp, +- u32 training_lane); +-void analogix_dp_set_lane1_link_training(struct analogix_dp_device *dp, +- u32 training_lane); +-void analogix_dp_set_lane2_link_training(struct analogix_dp_device *dp, +- u32 training_lane); +-void analogix_dp_set_lane3_link_training(struct analogix_dp_device *dp, +- u32 training_lane); +-u32 analogix_dp_get_lane0_link_training(struct analogix_dp_device *dp); +-u32 analogix_dp_get_lane1_link_training(struct analogix_dp_device *dp); +-u32 analogix_dp_get_lane2_link_training(struct analogix_dp_device *dp); +-u32 analogix_dp_get_lane3_link_training(struct analogix_dp_device *dp); ++void analogix_dp_set_lane_link_training(struct analogix_dp_device *dp); ++u32 analogix_dp_get_lane_link_training(struct analogix_dp_device *dp, u8 lane); + void analogix_dp_reset_macro(struct analogix_dp_device *dp); + void analogix_dp_init_video(struct analogix_dp_device *dp); -- /* Override and disable clock termination. */ -- dw_hdmi_phy_i2c_write(hdmi, HDMI_3D_TX_PHY_CKCALCTRL_OVERRIDE, -- HDMI_3D_TX_PHY_CKCALCTRL); -- - return 0; - } +@@ -255,5 +255,18 @@ int analogix_dp_send_psr_spd(struct analogix_dp_device *dp, + struct dp_sdp *vsc, bool blocking); + ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, + struct drm_dp_aux_msg *msg); ++void analogix_dp_set_video_format(struct analogix_dp_device *dp); ++void analogix_dp_video_bist_enable(struct analogix_dp_device *dp); ++bool analogix_dp_ssc_supported(struct analogix_dp_device *dp); ++int analogix_dp_phy_power_on(struct analogix_dp_device *dp); ++void analogix_dp_phy_power_off(struct analogix_dp_device *dp); ++void analogix_dp_audio_config_spdif(struct analogix_dp_device *dp); ++void analogix_dp_audio_config_i2s(struct analogix_dp_device *dp); ++void analogix_dp_audio_enable(struct analogix_dp_device *dp); ++void analogix_dp_audio_disable(struct analogix_dp_device *dp); ++void analogix_dp_init(struct analogix_dp_device *dp); ++void analogix_dp_irq_handler(struct analogix_dp_device *dp); ++void analogix_dp_phy_test(struct analogix_dp_device *dp); ++void analogix_dp_check_device_service_irq(struct analogix_dp_device *dp); -@@ -1719,14 +2159,16 @@ void dw_hdmi_phy_setup_hpd(struct dw_hdmi *hdmi, void *data) - hdmi_writeb(hdmi, HDMI_IH_PHY_STAT0_HPD | HDMI_IH_PHY_STAT0_RX_SENSE, - HDMI_IH_PHY_STAT0); + #endif /* _ANALOGIX_DP_CORE_H */ +diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c +index 7b0bc9704..b110018b8 100644 +--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c ++++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c +@@ -11,30 +11,44 @@ + #include + #include + #include ++#include -- /* Enable cable hot plug irq. */ -- hdmi_writeb(hdmi, hdmi->phy_mask, HDMI_PHY_MASK0); -+ if (!hdmi->next_bridge) { -+ /* Enable cable hot plug irq. */ -+ hdmi_writeb(hdmi, hdmi->phy_mask, HDMI_PHY_MASK0); + #include ++#include -- /* Clear and unmute interrupts. */ -- hdmi_writeb(hdmi, HDMI_IH_PHY_STAT0_HPD | HDMI_IH_PHY_STAT0_RX_SENSE, -- HDMI_IH_PHY_STAT0); -- hdmi_writeb(hdmi, ~(HDMI_IH_PHY_STAT0_HPD | HDMI_IH_PHY_STAT0_RX_SENSE), -- HDMI_IH_MUTE_PHY_STAT0); -+ /* Clear and unmute interrupts. */ -+ hdmi_writeb(hdmi, HDMI_IH_PHY_STAT0_HPD | HDMI_IH_PHY_STAT0_RX_SENSE, -+ HDMI_IH_PHY_STAT0); -+ hdmi_writeb(hdmi, ~(HDMI_IH_PHY_STAT0_HPD | HDMI_IH_PHY_STAT0_RX_SENSE), -+ HDMI_IH_MUTE_PHY_STAT0); + #include "analogix_dp_core.h" + #include "analogix_dp_reg.h" + +-#define COMMON_INT_MASK_1 0 +-#define COMMON_INT_MASK_2 0 +-#define COMMON_INT_MASK_3 0 +-#define COMMON_INT_MASK_4 (HOTPLUG_CHG | HPD_LOST | PLUG) +-#define INT_STA_MASK INT_HPD ++static void analogix_dp_write(struct analogix_dp_device *dp, u32 reg, u32 val) ++{ ++ if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) { ++ readl(dp->reg_base); ++ writel(val, dp->reg_base + reg); + } ++ ++ writel(val, dp->reg_base + reg); ++} ++ ++static u32 analogix_dp_read(struct analogix_dp_device *dp, u32 reg) ++{ ++ if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) ++ readl(dp->reg_base + reg); ++ ++ return readl(dp->reg_base + reg); ++} + + void analogix_dp_enable_video_mute(struct analogix_dp_device *dp, bool enable) + { + u32 reg; + + if (enable) { +- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_1); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_1); + reg |= HDCP_VIDEO_MUTE; +- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_1); ++ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_1, reg); + } else { +- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_1); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_1); + reg &= ~HDCP_VIDEO_MUTE; +- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_1); ++ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_1, reg); + } } - EXPORT_SYMBOL_GPL(dw_hdmi_phy_setup_hpd); -@@ -1742,23 +2184,36 @@ static const struct dw_hdmi_phy_ops dw_hdmi_synopsys_phy_ops = { - * HDMI TX Setup - */ +@@ -42,23 +56,20 @@ void analogix_dp_stop_video(struct analogix_dp_device *dp) + { + u32 reg; --static void hdmi_tx_hdcp_config(struct dw_hdmi *hdmi) -+static void hdmi_tx_hdcp_config(struct dw_hdmi *hdmi, -+ const struct drm_display_mode *mode) +- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_1); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_1); + reg &= ~VIDEO_EN; +- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_1); ++ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_1, reg); + } + +-void analogix_dp_lane_swap(struct analogix_dp_device *dp, bool enable) ++static void analogix_dp_set_lane_map(struct analogix_dp_device *dp) { -- u8 de; -- -- if (hdmi->hdmi_data.video_mode.mdataenablepolarity) -- de = HDMI_A_VIDPOLCFG_DATAENPOL_ACTIVE_HIGH; +- u32 reg; ++ struct video_info *video_info = &dp->video_info; ++ u32 i, reg = 0; + +- if (enable) +- reg = LANE3_MAP_LOGIC_LANE_0 | LANE2_MAP_LOGIC_LANE_1 | +- LANE1_MAP_LOGIC_LANE_2 | LANE0_MAP_LOGIC_LANE_3; - else -- de = HDMI_A_VIDPOLCFG_DATAENPOL_ACTIVE_LOW; -- -- /* disable rx detect */ -- hdmi_modb(hdmi, HDMI_A_HDCPCFG0_RXDETECT_DISABLE, -- HDMI_A_HDCPCFG0_RXDETECT_MASK, HDMI_A_HDCPCFG0); -- -- hdmi_modb(hdmi, de, HDMI_A_VIDPOLCFG_DATAENPOL_MASK, HDMI_A_VIDPOLCFG); -- -- hdmi_modb(hdmi, HDMI_A_HDCPCFG1_ENCRYPTIONDISABLE_DISABLE, -- HDMI_A_HDCPCFG1_ENCRYPTIONDISABLE_MASK, HDMI_A_HDCPCFG1); -+ struct hdmi_vmode *vmode = &hdmi->hdmi_data.video_mode; -+ u8 vsync_pol, hsync_pol, data_pol, hdmi_dvi; -+ -+ /* Configure the video polarity */ -+ vsync_pol = mode->flags & DRM_MODE_FLAG_PVSYNC ? -+ HDMI_A_VIDPOLCFG_VSYNCPOL_ACTIVE_HIGH : -+ HDMI_A_VIDPOLCFG_VSYNCPOL_ACTIVE_LOW; -+ hsync_pol = mode->flags & DRM_MODE_FLAG_PHSYNC ? -+ HDMI_A_VIDPOLCFG_HSYNCPOL_ACTIVE_HIGH : -+ HDMI_A_VIDPOLCFG_HSYNCPOL_ACTIVE_LOW; -+ data_pol = vmode->mdataenablepolarity ? -+ HDMI_A_VIDPOLCFG_DATAENPOL_ACTIVE_HIGH : -+ HDMI_A_VIDPOLCFG_DATAENPOL_ACTIVE_LOW; -+ hdmi_modb(hdmi, vsync_pol | hsync_pol | data_pol, -+ HDMI_A_VIDPOLCFG_VSYNCPOL_MASK | -+ HDMI_A_VIDPOLCFG_HSYNCPOL_MASK | -+ HDMI_A_VIDPOLCFG_DATAENPOL_MASK, -+ HDMI_A_VIDPOLCFG); -+ -+ /* Config the display mode */ -+ hdmi_dvi = hdmi->sink_is_hdmi ? HDMI_A_HDCPCFG0_HDMIDVI_HDMI : -+ HDMI_A_HDCPCFG0_HDMIDVI_DVI; -+ hdmi_modb(hdmi, hdmi_dvi, HDMI_A_HDCPCFG0_HDMIDVI_MASK, -+ HDMI_A_HDCPCFG0); -+ -+ if (hdmi->hdcp && hdmi->hdcp->hdcp_start) -+ hdmi->hdcp->hdcp_start(hdmi->hdcp); +- reg = LANE3_MAP_LOGIC_LANE_3 | LANE2_MAP_LOGIC_LANE_2 | +- LANE1_MAP_LOGIC_LANE_1 | LANE0_MAP_LOGIC_LANE_0; ++ for (i = 0; i < video_info->max_lane_count; i++) ++ reg |= video_info->lane_map[i] << (2 * i); + +- writel(reg, dp->reg_base + ANALOGIX_DP_LANE_MAP); ++ analogix_dp_write(dp, ANALOGIX_DP_LANE_MAP, reg); } - static void hdmi_config_AVI(struct dw_hdmi *hdmi, -@@ -1767,15 +2222,24 @@ static void hdmi_config_AVI(struct dw_hdmi *hdmi, - { - struct hdmi_avi_infoframe frame; - u8 val; -+ bool is_hdmi2; -+ const struct drm_display_info *info = &connector->display_info; -+ -+ is_hdmi2 = info->hdmi.scdc.supported || (info->color_formats & DRM_COLOR_FORMAT_YCBCR420); + void analogix_dp_init_analog_param(struct analogix_dp_device *dp) +@@ -66,53 +77,54 @@ void analogix_dp_init_analog_param(struct analogix_dp_device *dp) + u32 reg; - /* Initialise info frame from DRM mode */ - drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode); + reg = TX_TERMINAL_CTRL_50_OHM; +- writel(reg, dp->reg_base + ANALOGIX_DP_ANALOG_CTL_1); ++ analogix_dp_write(dp, ANALOGIX_DP_ANALOG_CTL_1, reg); - if (hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_out_bus_format)) { -- drm_hdmi_avi_infoframe_quant_range(&frame, connector, mode, -- hdmi->hdmi_data.rgb_limited_range ? -- HDMI_QUANTIZATION_RANGE_LIMITED : -- HDMI_QUANTIZATION_RANGE_FULL); -+ /* default range */ -+ if (!hdmi->hdmi_data.quant_range) -+ drm_hdmi_avi_infoframe_quant_range(&frame, connector, mode, -+ hdmi->hdmi_data.rgb_limited_range ? -+ HDMI_QUANTIZATION_RANGE_LIMITED : -+ HDMI_QUANTIZATION_RANGE_FULL); -+ else -+ drm_hdmi_avi_infoframe_quant_range(&frame, connector, mode, -+ hdmi->hdmi_data.quant_range); - } else { - frame.quantization_range = HDMI_QUANTIZATION_RANGE_DEFAULT; - frame.ycc_quantization_range = -@@ -1810,16 +2274,36 @@ static void hdmi_config_AVI(struct dw_hdmi *hdmi, - frame.extended_colorimetry = - HDMI_EXTENDED_COLORIMETRY_XV_YCC_709; - break; -+ case V4L2_YCBCR_ENC_BT2020: -+ if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_BT2020) -+ frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; -+ else -+ frame.colorimetry = HDMI_COLORIMETRY_ITU_709; -+ frame.extended_colorimetry = -+ HDMI_EXTENDED_COLORIMETRY_BT2020; -+ break; - default: /* Carries no data */ - frame.colorimetry = HDMI_COLORIMETRY_ITU_601; - frame.extended_colorimetry = - HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; - break; - } -+ frame.ycc_quantization_range = HDMI_YCC_QUANTIZATION_RANGE_LIMITED; - } else { -- frame.colorimetry = HDMI_COLORIMETRY_NONE; -- frame.extended_colorimetry = -- HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; -+ if (hdmi->hdmi_data.enc_out_encoding == V4L2_YCBCR_ENC_BT2020) { -+ frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; -+ frame.extended_colorimetry = -+ HDMI_EXTENDED_COLORIMETRY_BT2020; -+ } else { -+ frame.colorimetry = HDMI_COLORIMETRY_NONE; -+ frame.extended_colorimetry = -+ HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; -+ } -+ -+ if (is_hdmi2 && frame.quantization_range == HDMI_QUANTIZATION_RANGE_FULL) -+ frame.ycc_quantization_range = HDMI_YCC_QUANTIZATION_RANGE_FULL; -+ else -+ frame.ycc_quantization_range = HDMI_YCC_QUANTIZATION_RANGE_LIMITED; + reg = SEL_24M | TX_DVDD_BIT_1_0625V; +- writel(reg, dp->reg_base + ANALOGIX_DP_ANALOG_CTL_2); ++ analogix_dp_write(dp, ANALOGIX_DP_ANALOG_CTL_2, reg); + + if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) { + reg = REF_CLK_24M; + if (dp->plat_data->dev_type == RK3288_DP) + reg ^= REF_CLK_MASK; + +- writel(reg, dp->reg_base + ANALOGIX_DP_PLL_REG_1); +- writel(0x95, dp->reg_base + ANALOGIX_DP_PLL_REG_2); +- writel(0x40, dp->reg_base + ANALOGIX_DP_PLL_REG_3); +- writel(0x58, dp->reg_base + ANALOGIX_DP_PLL_REG_4); +- writel(0x22, dp->reg_base + ANALOGIX_DP_PLL_REG_5); ++ analogix_dp_write(dp, ANALOGIX_DP_PLL_REG_1, reg); ++ analogix_dp_write(dp, ANALOGIX_DP_PLL_REG_2, 0x99); ++ analogix_dp_write(dp, ANALOGIX_DP_PLL_REG_3, 0x40); ++ analogix_dp_write(dp, ANALOGIX_DP_PLL_REG_4, 0x58); ++ analogix_dp_write(dp, ANALOGIX_DP_PLL_REG_5, 0x22); ++ analogix_dp_write(dp, ANALOGIX_DP_BIAS, 0x44); } - /* -@@ -1857,7 +2341,11 @@ static void hdmi_config_AVI(struct dw_hdmi *hdmi, - hdmi_writeb(hdmi, val, HDMI_FC_AVICONF2); + reg = DRIVE_DVDD_BIT_1_0625V | VCO_BIT_600_MICRO; +- writel(reg, dp->reg_base + ANALOGIX_DP_ANALOG_CTL_3); ++ analogix_dp_write(dp, ANALOGIX_DP_ANALOG_CTL_3, reg); - /* AVI data byte 4 differences: none */ -- val = frame.video_code & 0x7f; -+ if (hdmi_bus_fmt_is_yuv420(hdmi->hdmi_data.enc_out_bus_format) || -+ hdmi->connector.display_info.hdmi.scdc.supported) -+ val = hdmi->vic; -+ else -+ val = frame.video_code & 0x7f; - hdmi_writeb(hdmi, val, HDMI_FC_AVIVID); + reg = PD_RING_OSC | AUX_TERMINAL_CTRL_50_OHM | + TX_CUR1_2X | TX_CUR_16_MA; +- writel(reg, dp->reg_base + ANALOGIX_DP_PLL_FILTER_CTL_1); ++ analogix_dp_write(dp, ANALOGIX_DP_PLL_FILTER_CTL_1, reg); - /* AVI Data Byte 5- set up input and output pixel repetition */ -@@ -1896,7 +2384,16 @@ static void hdmi_config_vendor_specific_infoframe(struct dw_hdmi *hdmi, - u8 buffer[10]; - ssize_t err; + reg = CH3_AMP_400_MV | CH2_AMP_400_MV | + CH1_AMP_400_MV | CH0_AMP_400_MV; +- writel(reg, dp->reg_base + ANALOGIX_DP_TX_AMP_TUNING_CTL); ++ analogix_dp_write(dp, ANALOGIX_DP_TX_AMP_TUNING_CTL, reg); + } -- err = drm_hdmi_vendor_infoframe_from_display_mode(&frame, connector, -+ /* if sink support hdmi2.0, don't send vsi */ -+ if (hdmi_bus_fmt_is_yuv420(hdmi->hdmi_data.enc_out_bus_format) || -+ hdmi->connector.display_info.hdmi.scdc.supported) { -+ hdmi_mask_writeb(hdmi, 0, HDMI_FC_DATAUTO0, HDMI_FC_DATAUTO0_VSD_OFFSET, -+ HDMI_FC_DATAUTO0_VSD_MASK); -+ return; -+ } -+ -+ err = drm_hdmi_vendor_infoframe_from_display_mode(&frame, -+ &hdmi->connector, - mode); - if (err < 0) - /* -@@ -1946,17 +2443,44 @@ static void hdmi_config_drm_infoframe(struct dw_hdmi *hdmi, - const struct drm_connector *connector) + void analogix_dp_init_interrupt(struct analogix_dp_device *dp) { - const struct drm_connector_state *conn_state = connector->state; -+ struct hdr_output_metadata *hdr_metadata; - struct hdmi_drm_infoframe frame; - u8 buffer[30]; - ssize_t err; - int i; + /* Set interrupt pin assertion polarity as high */ +- writel(INT_POL1 | INT_POL0, dp->reg_base + ANALOGIX_DP_INT_CTL); ++ analogix_dp_write(dp, ANALOGIX_DP_INT_CTL, INT_POL1 | INT_POL0); -+ /* Dynamic Range and Mastering Infoframe is introduced in v2.11a. */ -+ if (hdmi->version < 0x211a) { -+ dev_dbg(hdmi->dev, "Not support DRM Infoframe\n"); -+ return; -+ } -+ - if (!hdmi->plat_data->use_drm_infoframe) - return; + /* Clear pending regisers */ +- writel(0xff, dp->reg_base + ANALOGIX_DP_COMMON_INT_STA_1); +- writel(0x4f, dp->reg_base + ANALOGIX_DP_COMMON_INT_STA_2); +- writel(0xe0, dp->reg_base + ANALOGIX_DP_COMMON_INT_STA_3); +- writel(0xe7, dp->reg_base + ANALOGIX_DP_COMMON_INT_STA_4); +- writel(0x63, dp->reg_base + ANALOGIX_DP_INT_STA); ++ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_STA_1, 0xff); ++ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_STA_2, 0x4f); ++ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_STA_3, 0xe0); ++ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_STA_4, 0xe7); ++ analogix_dp_write(dp, ANALOGIX_DP_INT_STA, 0x63); - hdmi_modb(hdmi, HDMI_FC_PACKET_TX_EN_DRM_DISABLE, - HDMI_FC_PACKET_TX_EN_DRM_MASK, HDMI_FC_PACKET_TX_EN); + /* 0:mask,1: unmask */ +- writel(0x00, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_1); +- writel(0x00, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_2); +- writel(0x00, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_3); +- writel(0x00, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_4); +- writel(0x00, dp->reg_base + ANALOGIX_DP_INT_STA_MASK); ++ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_MASK_1, 0x00); ++ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_MASK_2, 0x00); ++ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_MASK_3, 0x00); ++ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_MASK_4, 0x00); ++ analogix_dp_write(dp, ANALOGIX_DP_INT_STA_MASK, 0x00); + } -+ if (!hdmi->connector.hdr_sink_metadata.hdmi_type1.eotf) { -+ DRM_DEBUG("No need to set HDR metadata in infoframe\n"); -+ return; -+ } -+ -+ if (!conn_state->hdr_output_metadata) { -+ DRM_DEBUG("source metadata not set yet\n"); -+ return; -+ } -+ -+ hdr_metadata = (struct hdr_output_metadata *) -+ conn_state->hdr_output_metadata->data; -+ -+ if (!(hdmi->connector.hdr_sink_metadata.hdmi_type1.eotf & -+ BIT(hdr_metadata->hdmi_metadata_type1.eotf))) { -+ DRM_ERROR("Not support EOTF %d\n", -+ hdr_metadata->hdmi_metadata_type1.eotf); -+ return; -+ } -+ - err = drm_hdmi_infoframe_set_hdr_metadata(&frame, conn_state); - if (err < 0) - return; -@@ -1974,53 +2498,74 @@ static void hdmi_config_drm_infoframe(struct dw_hdmi *hdmi, - hdmi_writeb(hdmi, buffer[4 + i], HDMI_FC_DRM_PB0 + i); + void analogix_dp_reset(struct analogix_dp_device *dp) +@@ -130,65 +142,54 @@ void analogix_dp_reset(struct analogix_dp_device *dp) + AUD_FIFO_FUNC_EN_N | AUD_FUNC_EN_N | + HDCP_FUNC_EN_N | SW_FUNC_EN_N; - hdmi_writeb(hdmi, 1, HDMI_FC_DRM_UP); -+ /* -+ * avi and hdr infoframe cannot be sent at the same time -+ * for compatibility with Huawei TV -+ */ -+ msleep(300); - hdmi_modb(hdmi, HDMI_FC_PACKET_TX_EN_DRM_ENABLE, - HDMI_FC_PACKET_TX_EN_DRM_MASK, HDMI_FC_PACKET_TX_EN); -+ -+ DRM_DEBUG("%s eotf %d end\n", __func__, -+ hdr_metadata->hdmi_metadata_type1.eotf); +- writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_1); ++ analogix_dp_write(dp, ANALOGIX_DP_FUNC_EN_1, reg); + + reg = SSC_FUNC_EN_N | AUX_FUNC_EN_N | + SERDES_FIFO_FUNC_EN_N | + LS_CLK_DOMAIN_FUNC_EN_N; +- writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_2); ++ analogix_dp_write(dp, ANALOGIX_DP_FUNC_EN_2, reg); + + usleep_range(20, 30); + +- analogix_dp_lane_swap(dp, 0); ++ analogix_dp_set_lane_map(dp); + +- writel(0x0, dp->reg_base + ANALOGIX_DP_SYS_CTL_1); +- writel(0x40, dp->reg_base + ANALOGIX_DP_SYS_CTL_2); +- writel(0x0, dp->reg_base + ANALOGIX_DP_SYS_CTL_3); +- writel(0x0, dp->reg_base + ANALOGIX_DP_SYS_CTL_4); ++ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_1, 0x0); ++ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_2, 0x40); ++ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_3, 0x0); ++ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_4, 0x0); + +- writel(0x0, dp->reg_base + ANALOGIX_DP_PKT_SEND_CTL); +- writel(0x0, dp->reg_base + ANALOGIX_DP_HDCP_CTL); ++ analogix_dp_write(dp, ANALOGIX_DP_PKT_SEND_CTL, 0x0); ++ analogix_dp_write(dp, ANALOGIX_DP_HDCP_CTL, 0x0); + +- writel(0x5e, dp->reg_base + ANALOGIX_DP_HPD_DEGLITCH_L); +- writel(0x1a, dp->reg_base + ANALOGIX_DP_HPD_DEGLITCH_H); ++ analogix_dp_write(dp, ANALOGIX_DP_LINK_DEBUG_CTL, 0x10); + +- writel(0x10, dp->reg_base + ANALOGIX_DP_LINK_DEBUG_CTL); ++ analogix_dp_write(dp, ANALOGIX_DP_PHY_TEST, 0x0); + +- writel(0x0, dp->reg_base + ANALOGIX_DP_PHY_TEST); ++ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_FIFO_THRD, 0x0); ++ analogix_dp_write(dp, ANALOGIX_DP_AUDIO_MARGIN, 0x20); + +- writel(0x0, dp->reg_base + ANALOGIX_DP_VIDEO_FIFO_THRD); +- writel(0x20, dp->reg_base + ANALOGIX_DP_AUDIO_MARGIN); ++ analogix_dp_write(dp, ANALOGIX_DP_M_VID_GEN_FILTER_TH, 0x4); ++ analogix_dp_write(dp, ANALOGIX_DP_M_AUD_GEN_FILTER_TH, 0x2); + +- writel(0x4, dp->reg_base + ANALOGIX_DP_M_VID_GEN_FILTER_TH); +- writel(0x2, dp->reg_base + ANALOGIX_DP_M_AUD_GEN_FILTER_TH); +- +- writel(0x00000101, dp->reg_base + ANALOGIX_DP_SOC_GENERAL_CTL); ++ analogix_dp_write(dp, ANALOGIX_DP_SOC_GENERAL_CTL, 0x00000101); } --static void hdmi_av_composer(struct dw_hdmi *hdmi, -- const struct drm_display_info *display, -- const struct drm_display_mode *mode) -+static unsigned int -+hdmi_get_tmdsclock(struct dw_hdmi *hdmi, unsigned long mpixelclock) + void analogix_dp_swreset(struct analogix_dp_device *dp) { -- u8 inv_val, bytes; -- const struct drm_hdmi_info *hdmi_info = &display->hdmi; -- struct hdmi_vmode *vmode = &hdmi->hdmi_data.video_mode; -- int hblank, vblank, h_de_hs, v_de_vs, hsync_len, vsync_len; -- unsigned int vdisplay, hdisplay; +- writel(RESET_DP_TX, dp->reg_base + ANALOGIX_DP_TX_SW_RESET); ++ analogix_dp_write(dp, ANALOGIX_DP_TX_SW_RESET, RESET_DP_TX); + } + + void analogix_dp_config_interrupt(struct analogix_dp_device *dp) + { +- u32 reg; - -- vmode->mpixelclock = mode->clock * 1000; + /* 0: mask, 1: unmask */ +- reg = COMMON_INT_MASK_1; +- writel(reg, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_1); - -- dev_dbg(hdmi->dev, "final pixclk = %d\n", vmode->mpixelclock); +- reg = COMMON_INT_MASK_2; +- writel(reg, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_2); ++ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_MASK_1, 0); ++ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_MASK_2, 0); ++ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_MASK_3, 0); + +- reg = COMMON_INT_MASK_3; +- writel(reg, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_3); - -- vmode->mtmdsclock = vmode->mpixelclock; -+ unsigned int tmdsclock = mpixelclock; -+ unsigned int depth = -+ hdmi_bus_fmt_color_depth(hdmi->hdmi_data.enc_out_bus_format); +- reg = COMMON_INT_MASK_4; +- writel(reg, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_4); +- +- reg = INT_STA_MASK; +- writel(reg, dp->reg_base + ANALOGIX_DP_INT_STA_MASK); ++ if (dp->force_hpd || dp->hpd_gpiod) ++ analogix_dp_mute_hpd_interrupt(dp); ++ else ++ analogix_dp_unmute_hpd_interrupt(dp); + } - if (!hdmi_bus_fmt_is_yuv422(hdmi->hdmi_data.enc_out_bus_format)) { -- switch (hdmi_bus_fmt_color_depth( -- hdmi->hdmi_data.enc_out_bus_format)) { -+ switch (depth) { - case 16: -- vmode->mtmdsclock = vmode->mpixelclock * 2; -+ tmdsclock = mpixelclock * 2; - break; - case 12: -- vmode->mtmdsclock = vmode->mpixelclock * 3 / 2; -+ tmdsclock = mpixelclock * 3 / 2; - break; - case 10: -- vmode->mtmdsclock = vmode->mpixelclock * 5 / 4; -+ tmdsclock = mpixelclock * 5 / 4; -+ break; -+ default: - break; - } - } + void analogix_dp_mute_hpd_interrupt(struct analogix_dp_device *dp) +@@ -196,13 +197,13 @@ void analogix_dp_mute_hpd_interrupt(struct analogix_dp_device *dp) + u32 reg; -+ return tmdsclock; -+} -+ -+static void hdmi_av_composer(struct dw_hdmi *hdmi, -+ const struct drm_display_info *display, -+ const struct drm_display_mode *mode) -+{ -+ u8 inv_val, bytes; -+ const struct drm_hdmi_info *hdmi_info = &display->hdmi; -+ struct hdmi_vmode *vmode = &hdmi->hdmi_data.video_mode; -+ int hblank, vblank, h_de_hs, v_de_vs, hsync_len, vsync_len; -+ unsigned int vdisplay, hdisplay; -+ -+ vmode->previous_pixelclock = vmode->mpixelclock; -+ vmode->mpixelclock = mode->crtc_clock * 1000; -+ dev_dbg(hdmi->dev, "final pixclk = %d\n", vmode->mpixelclock); -+ -+ vmode->previous_tmdsclock = vmode->mtmdsclock; -+ vmode->mtmdsclock = hdmi_get_tmdsclock(hdmi, vmode->mpixelclock); - if (hdmi_bus_fmt_is_yuv420(hdmi->hdmi_data.enc_out_bus_format)) - vmode->mtmdsclock /= 2; + /* 0: mask, 1: unmask */ +- reg = readl(dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_4); +- reg &= ~COMMON_INT_MASK_4; +- writel(reg, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_4); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_COMMON_INT_MASK_4); ++ reg &= ~HOTPLUG_CHG; ++ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_MASK_4, reg); - dev_dbg(hdmi->dev, "final tmdsclock = %d\n", vmode->mtmdsclock); +- reg = readl(dp->reg_base + ANALOGIX_DP_INT_STA_MASK); +- reg &= ~INT_STA_MASK; +- writel(reg, dp->reg_base + ANALOGIX_DP_INT_STA_MASK); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_INT_STA_MASK); ++ reg &= ~INT_HPD; ++ analogix_dp_write(dp, ANALOGIX_DP_INT_STA_MASK, reg); + } -- /* Set up HDMI_FC_INVIDCONF */ -- inv_val = (hdmi->hdmi_data.hdcp_enable || -- (dw_hdmi_support_scdc(hdmi, display) && -- (vmode->mtmdsclock > HDMI14_MAX_TMDSCLK || -- hdmi_info->scdc.scrambling.low_rates)) ? -- HDMI_FC_INVIDCONF_HDCP_KEEPOUT_ACTIVE : -- HDMI_FC_INVIDCONF_HDCP_KEEPOUT_INACTIVE); -+ if (hdmi->update) -+ return; -+ -+ /* Set up HDMI_FC_INVIDCONF -+ * Some display equipments require that the interval -+ * between Video Data and Data island must be at least 58 pixels, -+ * and fc_invidconf.HDCP_keepout set (1'b1) can meet the requirement. -+ */ -+ inv_val = HDMI_FC_INVIDCONF_HDCP_KEEPOUT_ACTIVE; + void analogix_dp_unmute_hpd_interrupt(struct analogix_dp_device *dp) +@@ -210,18 +211,20 @@ void analogix_dp_unmute_hpd_interrupt(struct analogix_dp_device *dp) + u32 reg; - inv_val |= mode->flags & DRM_MODE_FLAG_PVSYNC ? - HDMI_FC_INVIDCONF_VSYNC_IN_POLARITY_ACTIVE_HIGH : -@@ -2086,7 +2631,8 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi, - /* Scrambling Control */ - if (dw_hdmi_support_scdc(hdmi, display)) { - if (vmode->mtmdsclock > HDMI14_MAX_TMDSCLK || -- hdmi_info->scdc.scrambling.low_rates) { -+ (hdmi_info->scdc.scrambling.low_rates && -+ hdmi->scramble_low_rates)) { - /* - * HDMI2.0 Specifies the following procedure: - * After the Source Device has determined that -@@ -2120,6 +2666,8 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi, - HDMI_MC_SWRSTZ); - drm_scdc_set_scrambling(hdmi->curr_conn, 0); - } -+ } else { -+ hdmi_writeb(hdmi, 0, HDMI_FC_SCRAMBLER_CTRL); - } + /* 0: mask, 1: unmask */ +- reg = COMMON_INT_MASK_4; +- writel(reg, dp->reg_base + ANALOGIX_DP_COMMON_INT_MASK_4); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_COMMON_INT_MASK_4); ++ reg |= HOTPLUG_CHG; ++ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_MASK_4, reg); - /* Set up horizontal active pixel width */ -@@ -2166,17 +2714,25 @@ static void dw_hdmi_enable_video_path(struct dw_hdmi *hdmi) - hdmi_writeb(hdmi, 0x21, HDMI_FC_CH2PREAM); +- reg = INT_STA_MASK; +- writel(reg, dp->reg_base + ANALOGIX_DP_INT_STA_MASK); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_INT_STA_MASK); ++ reg |= INT_HPD; ++ analogix_dp_write(dp, ANALOGIX_DP_INT_STA_MASK, reg); + } - /* Enable pixel clock and tmds data path */ -- hdmi->mc_clkdis |= HDMI_MC_CLKDIS_HDCPCLK_DISABLE | -- HDMI_MC_CLKDIS_CSCCLK_DISABLE | -- HDMI_MC_CLKDIS_AUDCLK_DISABLE | -- HDMI_MC_CLKDIS_PREPCLK_DISABLE | -- HDMI_MC_CLKDIS_TMDSCLK_DISABLE; -+ -+ if (!hdmi->update) -+ hdmi->mc_clkdis |= HDMI_MC_CLKDIS_HDCPCLK_DISABLE | -+ HDMI_MC_CLKDIS_CSCCLK_DISABLE | -+ HDMI_MC_CLKDIS_AUDCLK_DISABLE | -+ HDMI_MC_CLKDIS_PREPCLK_DISABLE | -+ HDMI_MC_CLKDIS_TMDSCLK_DISABLE; - hdmi->mc_clkdis &= ~HDMI_MC_CLKDIS_PIXELCLK_DISABLE; - hdmi_writeb(hdmi, hdmi->mc_clkdis, HDMI_MC_CLKDIS); + enum pll_status analogix_dp_get_pll_lock_status(struct analogix_dp_device *dp) + { + u32 reg; - hdmi->mc_clkdis &= ~HDMI_MC_CLKDIS_TMDSCLK_DISABLE; - hdmi_writeb(hdmi, hdmi->mc_clkdis, HDMI_MC_CLKDIS); +- reg = readl(dp->reg_base + ANALOGIX_DP_DEBUG_CTL); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_DEBUG_CTL); + if (reg & PLL_LOCK) + return PLL_LOCKED; + else +@@ -239,12 +242,12 @@ void analogix_dp_set_pll_power_down(struct analogix_dp_device *dp, bool enable) + mask = RK_PLL_PD; + } -+ /* Enable pixel repetition path */ -+ if (hdmi->hdmi_data.video_mode.mpixelrepetitioninput) { -+ hdmi->mc_clkdis &= ~HDMI_MC_CLKDIS_PREPCLK_DISABLE; -+ hdmi_writeb(hdmi, hdmi->mc_clkdis, HDMI_MC_CLKDIS); -+ } -+ - /* Enable csc path */ - if (is_csc_needed(hdmi)) { - hdmi->mc_clkdis &= ~HDMI_MC_CLKDIS_CSCCLK_DISABLE; -@@ -2238,11 +2794,44 @@ static void hdmi_disable_overflow_interrupts(struct dw_hdmi *hdmi) - HDMI_IH_MUTE_FC_STAT2); +- reg = readl(dp->reg_base + pd_addr); ++ reg = analogix_dp_read(dp, pd_addr); + if (enable) + reg |= mask; + else + reg &= ~mask; +- writel(reg, dp->reg_base + pd_addr); ++ analogix_dp_write(dp, pd_addr, reg); } -+static void dw_hdmi_force_output_pattern(struct dw_hdmi *hdmi, const struct drm_display_mode *mode) -+{ -+ /* force output black */ -+ if (hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_out_bus_format)) { -+ enum hdmi_quantization_range rgb_quant_range = drm_default_rgb_quant_range(mode); -+ -+ if (hdmi->hdmi_data.quant_range == HDMI_QUANTIZATION_RANGE_FULL) { -+ hdmi_writeb(hdmi, 0x00, HDMI_FC_DBGTMDS2); /*R*/ -+ hdmi_writeb(hdmi, 0x00, HDMI_FC_DBGTMDS1); /*G*/ -+ hdmi_writeb(hdmi, 0x00, HDMI_FC_DBGTMDS0); /*B*/ -+ } else if (hdmi->hdmi_data.quant_range == HDMI_QUANTIZATION_RANGE_LIMITED) { -+ hdmi_writeb(hdmi, 0x10, HDMI_FC_DBGTMDS2); /*R*/ -+ hdmi_writeb(hdmi, 0x10, HDMI_FC_DBGTMDS1); /*G*/ -+ hdmi_writeb(hdmi, 0x10, HDMI_FC_DBGTMDS0); /*B*/ -+ } else if (hdmi->hdmi_data.quant_range == HDMI_QUANTIZATION_RANGE_DEFAULT) { -+ if (rgb_quant_range == HDMI_QUANTIZATION_RANGE_FULL) { -+ hdmi_writeb(hdmi, 0x00, HDMI_FC_DBGTMDS2); /*R*/ -+ hdmi_writeb(hdmi, 0x00, HDMI_FC_DBGTMDS1); /*G*/ -+ hdmi_writeb(hdmi, 0x00, HDMI_FC_DBGTMDS0); /*B*/ -+ } else if (rgb_quant_range == HDMI_QUANTIZATION_RANGE_LIMITED) { -+ hdmi_writeb(hdmi, 0x10, HDMI_FC_DBGTMDS2); /*R*/ -+ hdmi_writeb(hdmi, 0x10, HDMI_FC_DBGTMDS1); /*G*/ -+ hdmi_writeb(hdmi, 0x10, HDMI_FC_DBGTMDS0); /*B*/ -+ } + void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp, +@@ -265,52 +268,54 @@ void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp, + else + mask = AUX_PD; + +- reg = readl(dp->reg_base + phy_pd_addr); +- if (enable) ++ reg = analogix_dp_read(dp, phy_pd_addr); ++ if (enable) { ++ reg &= ~(DP_INC_BG | DP_EXP_BG); + reg |= mask; +- else ++ } else { + reg &= ~mask; +- writel(reg, dp->reg_base + phy_pd_addr); + } -+ } else { -+ hdmi_writeb(hdmi, 0x80, HDMI_FC_DBGTMDS2); /*Cr*/ -+ hdmi_writeb(hdmi, 0x10, HDMI_FC_DBGTMDS1); /*Y*/ -+ hdmi_writeb(hdmi, 0x80, HDMI_FC_DBGTMDS0); /*Cb*/ -+ } -+} -+ - static int dw_hdmi_setup(struct dw_hdmi *hdmi, - const struct drm_connector *connector, - const struct drm_display_mode *mode) - { - int ret; -+ void *data = hdmi->plat_data->phy_data; ++ analogix_dp_write(dp, phy_pd_addr, reg); + break; + case CH0_BLOCK: + mask = CH0_PD; +- reg = readl(dp->reg_base + phy_pd_addr); ++ reg = analogix_dp_read(dp, phy_pd_addr); - hdmi_disable_overflow_interrupts(hdmi); + if (enable) + reg |= mask; + else + reg &= ~mask; +- writel(reg, dp->reg_base + phy_pd_addr); ++ analogix_dp_write(dp, phy_pd_addr, reg); + break; + case CH1_BLOCK: + mask = CH1_PD; +- reg = readl(dp->reg_base + phy_pd_addr); ++ reg = analogix_dp_read(dp, phy_pd_addr); -@@ -2254,49 +2843,83 @@ static int dw_hdmi_setup(struct dw_hdmi *hdmi, - dev_dbg(hdmi->dev, "CEA mode used vic=%d\n", hdmi->vic); - } + if (enable) + reg |= mask; + else + reg &= ~mask; +- writel(reg, dp->reg_base + phy_pd_addr); ++ analogix_dp_write(dp, phy_pd_addr, reg); + break; + case CH2_BLOCK: + mask = CH2_PD; +- reg = readl(dp->reg_base + phy_pd_addr); ++ reg = analogix_dp_read(dp, phy_pd_addr); -- if ((hdmi->vic == 6) || (hdmi->vic == 7) || -- (hdmi->vic == 21) || (hdmi->vic == 22) || -- (hdmi->vic == 2) || (hdmi->vic == 3) || -- (hdmi->vic == 17) || (hdmi->vic == 18)) -+ if (hdmi->plat_data->get_enc_out_encoding) -+ hdmi->hdmi_data.enc_out_encoding = -+ hdmi->plat_data->get_enc_out_encoding(data); -+ else if ((hdmi->vic == 6) || (hdmi->vic == 7) || -+ (hdmi->vic == 21) || (hdmi->vic == 22) || -+ (hdmi->vic == 2) || (hdmi->vic == 3) || -+ (hdmi->vic == 17) || (hdmi->vic == 18)) - hdmi->hdmi_data.enc_out_encoding = V4L2_YCBCR_ENC_601; - else - hdmi->hdmi_data.enc_out_encoding = V4L2_YCBCR_ENC_709; + if (enable) + reg |= mask; + else + reg &= ~mask; +- writel(reg, dp->reg_base + phy_pd_addr); ++ analogix_dp_write(dp, phy_pd_addr, reg); + break; + case CH3_BLOCK: + mask = CH3_PD; +- reg = readl(dp->reg_base + phy_pd_addr); ++ reg = analogix_dp_read(dp, phy_pd_addr); -- hdmi->hdmi_data.video_mode.mpixelrepetitionoutput = 0; -- hdmi->hdmi_data.video_mode.mpixelrepetitioninput = 0; -+ if (mode->flags & DRM_MODE_FLAG_DBLCLK) { -+ hdmi->hdmi_data.video_mode.mpixelrepetitionoutput = 1; -+ hdmi->hdmi_data.video_mode.mpixelrepetitioninput = 1; -+ } else { -+ hdmi->hdmi_data.video_mode.mpixelrepetitionoutput = 0; -+ hdmi->hdmi_data.video_mode.mpixelrepetitioninput = 0; -+ } -+ /* TOFIX: Get input format from plat data or fallback to RGB888 */ -+ if (hdmi->plat_data->get_input_bus_format) -+ hdmi->hdmi_data.enc_in_bus_format = -+ hdmi->plat_data->get_input_bus_format(data); -+ else if (hdmi->plat_data->input_bus_format) -+ hdmi->hdmi_data.enc_in_bus_format = -+ hdmi->plat_data->input_bus_format; -+ else -+ hdmi->hdmi_data.enc_in_bus_format = -+ MEDIA_BUS_FMT_RGB888_1X24; + if (enable) + reg |= mask; + else + reg &= ~mask; +- writel(reg, dp->reg_base + phy_pd_addr); ++ analogix_dp_write(dp, phy_pd_addr, reg); + break; + case ANALOG_TOTAL: + /* +@@ -323,29 +328,29 @@ void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp, + else + mask = DP_PHY_PD; -- if (hdmi->hdmi_data.enc_in_bus_format == MEDIA_BUS_FMT_FIXED) -- hdmi->hdmi_data.enc_in_bus_format = MEDIA_BUS_FMT_RGB888_1X24; -+ /* TOFIX: Default to RGB888 output format */ -+ if (hdmi->plat_data->get_output_bus_format) -+ hdmi->hdmi_data.enc_out_bus_format = -+ hdmi->plat_data->get_output_bus_format(data); -+ else -+ hdmi->hdmi_data.enc_out_bus_format = -+ MEDIA_BUS_FMT_RGB888_1X24; -+ -+ if (hdmi->plat_data->set_prev_bus_format) -+ hdmi->plat_data->set_prev_bus_format(data, hdmi->hdmi_data.enc_out_bus_format); +- reg = readl(dp->reg_base + phy_pd_addr); ++ reg = analogix_dp_read(dp, phy_pd_addr); + if (enable) + reg |= mask; + else + reg &= ~mask; - /* TOFIX: Get input encoding from plat data or fallback to none */ -- if (hdmi->plat_data->input_bus_encoding) -+ if (hdmi->plat_data->get_enc_in_encoding) -+ hdmi->hdmi_data.enc_in_encoding = -+ hdmi->plat_data->get_enc_in_encoding(data); -+ else if (hdmi->plat_data->input_bus_encoding) - hdmi->hdmi_data.enc_in_encoding = - hdmi->plat_data->input_bus_encoding; - else - hdmi->hdmi_data.enc_in_encoding = V4L2_YCBCR_ENC_DEFAULT; +- writel(reg, dp->reg_base + phy_pd_addr); ++ analogix_dp_write(dp, phy_pd_addr, reg); + if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) + usleep_range(10, 15); + break; + case POWER_ALL: + if (enable) { + reg = DP_ALL_PD; +- writel(reg, dp->reg_base + phy_pd_addr); ++ analogix_dp_write(dp, phy_pd_addr, reg); + } else { + reg = DP_ALL_PD; +- writel(reg, dp->reg_base + phy_pd_addr); ++ analogix_dp_write(dp, phy_pd_addr, reg); + usleep_range(10, 15); + reg &= ~DP_INC_BG; +- writel(reg, dp->reg_base + phy_pd_addr); ++ analogix_dp_write(dp, phy_pd_addr, reg); + usleep_range(10, 15); -- if (hdmi->hdmi_data.enc_out_bus_format == MEDIA_BUS_FMT_FIXED) -- hdmi->hdmi_data.enc_out_bus_format = MEDIA_BUS_FMT_RGB888_1X24; -+ -+ if (hdmi->plat_data->get_quant_range) -+ hdmi->hdmi_data.quant_range = -+ hdmi->plat_data->get_quant_range(data); +- writel(0x00, dp->reg_base + phy_pd_addr); ++ analogix_dp_write(dp, phy_pd_addr, 0x00); + } + break; + default: +@@ -356,36 +361,24 @@ void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp, + int analogix_dp_init_analog_func(struct analogix_dp_device *dp) + { + u32 reg; +- int timeout_loop = 0; - hdmi->hdmi_data.rgb_limited_range = hdmi->sink_is_hdmi && - drm_default_rgb_quant_range(mode) == - HDMI_QUANTIZATION_RANGE_LIMITED; + analogix_dp_set_analog_power_down(dp, POWER_ALL, 0); -- hdmi->hdmi_data.pix_repet_factor = 0; -- hdmi->hdmi_data.hdcp_enable = 0; -+ if (!hdmi->sink_is_hdmi) -+ hdmi->hdmi_data.quant_range = HDMI_QUANTIZATION_RANGE_FULL; -+ -+ /* -+ * According to the dw-hdmi specification 6.4.2 -+ * vp_pr_cd[3:0]: -+ * 0000b: No pixel repetition (pixel sent only once) -+ * 0001b: Pixel sent two times (pixel repeated once) -+ */ -+ hdmi->hdmi_data.pix_repet_factor = -+ (mode->flags & DRM_MODE_FLAG_DBLCLK) ? 1 : 0; - hdmi->hdmi_data.video_mode.mdataenablepolarity = true; + reg = PLL_LOCK_CHG; +- writel(reg, dp->reg_base + ANALOGIX_DP_COMMON_INT_STA_1); ++ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_STA_1, reg); -+ dw_hdmi_force_output_pattern(hdmi, mode); -+ - /* HDMI Initialization Step B.1 */ - hdmi_av_composer(hdmi, &connector->display_info, mode); +- reg = readl(dp->reg_base + ANALOGIX_DP_DEBUG_CTL); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_DEBUG_CTL); + reg &= ~(F_PLL_LOCK | PLL_LOCK_CTRL); +- writel(reg, dp->reg_base + ANALOGIX_DP_DEBUG_CTL); ++ analogix_dp_write(dp, ANALOGIX_DP_DEBUG_CTL, reg); -- /* HDMI Initializateion Step B.2 */ -- ret = hdmi->phy.ops->init(hdmi, hdmi->phy.data, -- &connector->display_info, -- &hdmi->previous_mode); -- if (ret) -- return ret; -- hdmi->phy.enabled = true; + /* Power up PLL */ +- if (analogix_dp_get_pll_lock_status(dp) == PLL_UNLOCKED) { +- analogix_dp_set_pll_power_down(dp, 0); - - /* HDMI Initialization Step B.3 */ - dw_hdmi_enable_video_path(hdmi); +- while (analogix_dp_get_pll_lock_status(dp) == PLL_UNLOCKED) { +- timeout_loop++; +- if (DP_TIMEOUT_LOOP_COUNT < timeout_loop) { +- dev_err(dp->dev, "failed to get pll lock status\n"); +- return -ETIMEDOUT; +- } +- usleep_range(10, 20); +- } +- } ++ analogix_dp_set_pll_power_down(dp, 0); -@@ -2323,10 +2946,34 @@ static int dw_hdmi_setup(struct dw_hdmi *hdmi, - hdmi_video_packetize(hdmi); - hdmi_video_csc(hdmi); - hdmi_video_sample(hdmi); -- hdmi_tx_hdcp_config(hdmi); -+ hdmi_tx_hdcp_config(hdmi, mode); -+ -+ /* HDMI Enable phy output */ -+ if (!hdmi->phy.enabled || -+ hdmi->hdmi_data.video_mode.previous_pixelclock != -+ hdmi->hdmi_data.video_mode.mpixelclock || -+ hdmi->hdmi_data.video_mode.previous_tmdsclock != -+ hdmi->hdmi_data.video_mode.mtmdsclock) { -+ ret = hdmi->phy.ops->init(hdmi, hdmi->phy.data, -+ &connector->display_info, -+ &hdmi->previous_mode); -+ if (ret) -+ return ret; -+ hdmi->phy.enabled = true; -+ } + /* Enable Serdes FIFO function and Link symbol clock domain module */ +- reg = readl(dp->reg_base + ANALOGIX_DP_FUNC_EN_2); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_FUNC_EN_2); + reg &= ~(SERDES_FIFO_FUNC_EN_N | LS_CLK_DOMAIN_FUNC_EN_N + | AUX_FUNC_EN_N); +- writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_2); ++ analogix_dp_write(dp, ANALOGIX_DP_FUNC_EN_2, reg); + return 0; + } - dw_hdmi_clear_overflow(hdmi); +@@ -397,10 +390,10 @@ void analogix_dp_clear_hotplug_interrupts(struct analogix_dp_device *dp) + return; -+ /* -+ * konka tv should switch pattern after set to yuv420 10bit or -+ * the TV might not recognize the signal. -+ */ -+ if (!hdmi->update) { -+ hdmi_writeb(hdmi, 1, HDMI_FC_DBGFORCE); -+ msleep(50); -+ hdmi_writeb(hdmi, 0, HDMI_FC_DBGFORCE); -+ } -+ - return 0; + reg = HOTPLUG_CHG | HPD_LOST | PLUG; +- writel(reg, dp->reg_base + ANALOGIX_DP_COMMON_INT_STA_4); ++ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_STA_4, reg); + + reg = INT_HPD; +- writel(reg, dp->reg_base + ANALOGIX_DP_INT_STA); ++ analogix_dp_write(dp, ANALOGIX_DP_INT_STA, reg); } -@@ -2399,6 +3046,8 @@ static void dw_hdmi_poweroff(struct dw_hdmi *hdmi) - hdmi->phy.enabled = false; - } + void analogix_dp_init_hpd(struct analogix_dp_device *dp) +@@ -410,47 +403,56 @@ void analogix_dp_init_hpd(struct analogix_dp_device *dp) + if (dp->hpd_gpiod) + return; -+ if (hdmi->hdcp && hdmi->hdcp->hdcp_stop) -+ hdmi->hdcp->hdcp_stop(hdmi->hdcp); - hdmi->bridge_is_on = false; +- analogix_dp_clear_hotplug_interrupts(dp); ++ analogix_dp_write(dp, ANALOGIX_DP_HPD_DEGLITCH_H, 0xbb); ++ analogix_dp_write(dp, ANALOGIX_DP_HPD_DEGLITCH_L, 0x80); + +- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_3); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_3); + reg &= ~(F_HPD | HPD_CTRL); +- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_3); ++ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_3, reg); } -@@ -2416,6 +3065,11 @@ static void dw_hdmi_update_power(struct dw_hdmi *hdmi) - } + void analogix_dp_force_hpd(struct analogix_dp_device *dp) + { + u32 reg; - if (force == DRM_FORCE_OFF) { -+ if (hdmi->initialized) { -+ hdmi->initialized = false; -+ hdmi->disabled = true; -+ hdmi->logo_plug_out = true; -+ } - if (hdmi->bridge_is_on) - dw_hdmi_poweroff(hdmi); - } else { -@@ -2448,8 +3102,28 @@ static enum drm_connector_status dw_hdmi_detect(struct dw_hdmi *hdmi) +- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_3); +- reg = (F_HPD | HPD_CTRL); +- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_3); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_3); ++ reg |= (F_HPD | HPD_CTRL); ++ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_3, reg); + } + +-enum dp_irq_type analogix_dp_get_irq_type(struct analogix_dp_device *dp) ++static void analogix_dp_handle_hpd_event(struct analogix_dp_device *dp) { - enum drm_connector_status result; ++ bool changed = false; + u32 reg; -+ if (!hdmi->force_logo) { -+ mutex_lock(&hdmi->mutex); -+ hdmi->force = DRM_FORCE_UNSPECIFIED; -+ dw_hdmi_update_power(hdmi); -+ dw_hdmi_update_phy_mask(hdmi); -+ mutex_unlock(&hdmi->mutex); +- if (dp->hpd_gpiod) { +- reg = gpiod_get_value(dp->hpd_gpiod); +- if (reg) +- return DP_IRQ_TYPE_HP_CABLE_IN; +- else +- return DP_IRQ_TYPE_HP_CABLE_OUT; +- } else { +- /* Parse hotplug interrupt status register */ +- reg = readl(dp->reg_base + ANALOGIX_DP_COMMON_INT_STA_4); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_INT_STA); ++ if (reg & INT_HPD) { ++ analogix_dp_write(dp, ANALOGIX_DP_INT_STA, INT_HPD); + +- if (reg & PLUG) +- return DP_IRQ_TYPE_HP_CABLE_IN; ++ memset(&dp->compliance, 0, sizeof(dp->compliance)); + +- if (reg & HPD_LOST) +- return DP_IRQ_TYPE_HP_CABLE_OUT; ++ analogix_dp_check_device_service_irq(dp); + +- if (reg & HOTPLUG_CHG) +- return DP_IRQ_TYPE_HP_CHANGE; ++ if (dp->compliance.test_active && ++ dp->compliance.test_type == DP_TEST_LINK_PHY_TEST_PATTERN) { ++ analogix_dp_phy_test(dp); ++ return; ++ } + } + +- return DP_IRQ_TYPE_UNKNOWN; ++ reg = analogix_dp_read(dp, ANALOGIX_DP_COMMON_INT_STA_4); ++ if (reg & HOTPLUG_CHG) { ++ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_STA_4, HOTPLUG_CHG); ++ changed = true; + } + - result = hdmi->phy.ops->read_hpd(hdmi, hdmi->phy.data); -- hdmi->last_connector_result = result; -+ mutex_lock(&hdmi->mutex); -+ if (result != hdmi->last_connector_result) { -+ dev_dbg(hdmi->dev, "read_hpd result: %d", result); -+ handle_plugged_change(hdmi, -+ result == connector_status_connected); -+ hdmi->last_connector_result = result; -+ } -+ mutex_unlock(&hdmi->mutex); ++ if (changed) ++ drm_helper_hpd_irq_event(dp->drm_dev); ++} + -+ if (result == connector_status_connected) -+ extcon_set_state_sync(hdmi->extcon, EXTCON_DISP_HDMI, true); -+ else -+ extcon_set_state_sync(hdmi->extcon, EXTCON_DISP_HDMI, false); - - return result; ++void analogix_dp_irq_handler(struct analogix_dp_device *dp) ++{ ++ analogix_dp_handle_hpd_event(dp); } -@@ -2471,7 +3145,7 @@ static struct edid *dw_hdmi_get_edid(struct dw_hdmi *hdmi, - dev_dbg(hdmi->dev, "got edid: width[%d] x height[%d]\n", - edid->width_cm, edid->height_cm); -- hdmi->sink_is_hdmi = drm_detect_hdmi_monitor(edid); -+ hdmi->support_hdmi = drm_detect_hdmi_monitor(edid); - hdmi->sink_has_audio = drm_detect_monitor_audio(edid); + void analogix_dp_reset_aux(struct analogix_dp_device *dp) +@@ -458,9 +460,9 @@ void analogix_dp_reset_aux(struct analogix_dp_device *dp) + u32 reg; - return edid; -@@ -2489,25 +3163,190 @@ dw_hdmi_connector_detect(struct drm_connector *connector, bool force) - return dw_hdmi_detect(hdmi); + /* Disable AUX channel module */ +- reg = readl(dp->reg_base + ANALOGIX_DP_FUNC_EN_2); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_FUNC_EN_2); + reg |= AUX_FUNC_EN_N; +- writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_2); ++ analogix_dp_write(dp, ANALOGIX_DP_FUNC_EN_2, reg); } -+static int -+dw_hdmi_update_hdr_property(struct drm_connector *connector) -+{ -+ struct drm_device *dev = connector->dev; -+ struct dw_hdmi *hdmi = container_of(connector, struct dw_hdmi, -+ connector); -+ void *data = hdmi->plat_data->phy_data; -+ const struct hdr_static_metadata *metadata = -+ &connector->hdr_sink_metadata.hdmi_type1; -+ size_t size = sizeof(*metadata); -+ struct drm_property *property; -+ struct drm_property_blob *blob; -+ int ret; -+ -+ if (hdmi->plat_data->get_hdr_property) -+ property = hdmi->plat_data->get_hdr_property(data); -+ else -+ return -EINVAL; -+ -+ if (hdmi->plat_data->get_hdr_blob) -+ blob = hdmi->plat_data->get_hdr_blob(data); -+ else -+ return -EINVAL; -+ -+ ret = drm_property_replace_global_blob(dev, &blob, size, metadata, -+ &connector->base, property); -+ return ret; -+} -+ - static int dw_hdmi_connector_get_modes(struct drm_connector *connector) - { - struct dw_hdmi *hdmi = container_of(connector, struct dw_hdmi, - connector); -+ struct hdr_static_metadata *metedata = -+ &connector->hdr_sink_metadata.hdmi_type1; - struct edid *edid; -- int ret; -+ struct drm_display_mode *mode; -+ struct drm_display_info *info = &connector->display_info; -+ void *data = hdmi->plat_data->phy_data; -+ int i, ret = 0; + void analogix_dp_init_aux(struct analogix_dp_device *dp) +@@ -469,7 +471,7 @@ void analogix_dp_init_aux(struct analogix_dp_device *dp) -+ memset(metedata, 0, sizeof(*metedata)); - edid = dw_hdmi_get_edid(hdmi, connector); -- if (!edid) -- return 0; -+ if (edid) { -+ int vic = 0; -+ -+ dev_dbg(hdmi->dev, "got edid: width[%d] x height[%d]\n", -+ edid->width_cm, edid->height_cm); -+ drm_connector_update_edid_property(connector, edid); -+ cec_notifier_set_phys_addr_from_edid(hdmi->cec_notifier, edid); -+ ret = drm_add_edid_modes(connector, edid); -+ if (hdmi->plat_data->get_color_changed) -+ hdmi->plat_data->get_yuv422_format(connector, edid); -+ if (hdmi->plat_data->get_colorimetry) -+ hdmi->plat_data->get_colorimetry(data, edid); -+ -+ list_for_each_entry(mode, &connector->probed_modes, head) { -+ vic = drm_match_cea_mode(mode); -+ -+ if (mode->picture_aspect_ratio == HDMI_PICTURE_ASPECT_NONE) { -+ if (vic >= 93 && vic <= 95) -+ mode->picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9; -+ else if (vic == 98) -+ mode->picture_aspect_ratio = HDMI_PICTURE_ASPECT_256_135; -+ } -+ } -+ -+ kfree(edid); -+ } else { -+ hdmi->support_hdmi = true; -+ hdmi->sink_has_audio = true; -+ for (i = 0; i < ARRAY_SIZE(dw_hdmi_default_modes); i++) { -+ const struct drm_display_mode *ptr = -+ &dw_hdmi_default_modes[i]; -+ -+ mode = drm_mode_duplicate(connector->dev, ptr); -+ if (mode) { -+ if (!i) -+ mode->type = DRM_MODE_TYPE_PREFERRED; -+ drm_mode_probed_add(connector, mode); -+ ret++; -+ } -+ } -+ info->edid_hdmi_rgb444_dc_modes = 0; -+ info->edid_hdmi_ycbcr444_dc_modes = 0; -+ info->hdmi.y420_dc_modes = 0; -+ info->color_formats = 0; + /* Clear inerrupts related to AUX channel */ + reg = RPLY_RECEIV | AUX_ERR; +- writel(reg, dp->reg_base + ANALOGIX_DP_INT_STA); ++ analogix_dp_write(dp, ANALOGIX_DP_INT_STA, reg); -- drm_connector_update_edid_property(connector, edid); -- cec_notifier_set_phys_addr_from_edid(hdmi->cec_notifier, edid); -- ret = drm_add_edid_modes(connector, edid); -- kfree(edid); -+ dev_info(hdmi->dev, "failed to get edid\n"); -+ } -+ dw_hdmi_update_hdr_property(connector); -+ dw_hdmi_check_output_type_changed(hdmi); + analogix_dp_set_analog_power_down(dp, AUX_BLOCK, true); + usleep_range(10, 11); +@@ -487,16 +489,17 @@ void analogix_dp_init_aux(struct analogix_dp_device *dp) + reg |= AUX_HW_RETRY_COUNT_SEL(0) | + AUX_HW_RETRY_INTERVAL_600_MICROSECONDS; - return ret; +- writel(reg, dp->reg_base + ANALOGIX_DP_AUX_HW_RETRY_CTL); ++ analogix_dp_write(dp, ANALOGIX_DP_AUX_HW_RETRY_CTL, reg); + + /* Receive AUX Channel DEFER commands equal to DEFFER_COUNT*64 */ + reg = DEFER_CTRL_EN | DEFER_COUNT(1); +- writel(reg, dp->reg_base + ANALOGIX_DP_AUX_CH_DEFER_CTL); ++ analogix_dp_write(dp, ANALOGIX_DP_AUX_CH_DEFER_CTL, reg); + + /* Enable AUX channel module */ +- reg = readl(dp->reg_base + ANALOGIX_DP_FUNC_EN_2); ++ analogix_dp_enable_sw_function(dp); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_FUNC_EN_2); + reg &= ~AUX_FUNC_EN_N; +- writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_2); ++ analogix_dp_write(dp, ANALOGIX_DP_FUNC_EN_2, reg); } -+static struct drm_encoder * -+dw_hdmi_connector_best_encoder(struct drm_connector *connector) -+{ -+ struct dw_hdmi *hdmi = container_of(connector, struct dw_hdmi, -+ connector); -+ -+ return hdmi->bridge.encoder; + int analogix_dp_get_plug_in_status(struct analogix_dp_device *dp) +@@ -507,7 +510,7 @@ int analogix_dp_get_plug_in_status(struct analogix_dp_device *dp) + if (gpiod_get_value(dp->hpd_gpiod)) + return 0; + } else { +- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_3); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_3); + if (reg & HPD_STATUS) + return 0; + } +@@ -519,60 +522,193 @@ void analogix_dp_enable_sw_function(struct analogix_dp_device *dp) + { + u32 reg; + +- reg = readl(dp->reg_base + ANALOGIX_DP_FUNC_EN_1); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_FUNC_EN_1); + reg &= ~SW_FUNC_EN_N; +- writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_1); ++ analogix_dp_write(dp, ANALOGIX_DP_FUNC_EN_1, reg); + } + +-void analogix_dp_set_link_bandwidth(struct analogix_dp_device *dp, u32 bwtype) ++static void analogix_dp_ssc_enable(struct analogix_dp_device *dp) + { + u32 reg; + +- reg = bwtype; +- if ((bwtype == DP_LINK_BW_2_7) || (bwtype == DP_LINK_BW_1_62)) +- writel(reg, dp->reg_base + ANALOGIX_DP_LINK_BW_SET); ++ /* 4500ppm */ ++ writel(0x19, dp->reg_base + ANALOIGX_DP_SSC_REG); ++ /* ++ * To apply updated SSC parameters into SSC operation, ++ * firmware must disable and enable this bit. ++ */ ++ reg = readl(dp->reg_base + ANALOGIX_DP_FUNC_EN_2); ++ reg |= SSC_FUNC_EN_N; ++ writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_2); ++ reg &= ~SSC_FUNC_EN_N; ++ writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_2); +} + -+static bool dw_hdmi_color_changed(struct drm_connector *connector) ++static void analogix_dp_ssc_disable(struct analogix_dp_device *dp) +{ -+ struct dw_hdmi *hdmi = container_of(connector, struct dw_hdmi, -+ connector); -+ void *data = hdmi->plat_data->phy_data; -+ bool ret = false; -+ -+ if (hdmi->plat_data->get_color_changed) -+ ret = hdmi->plat_data->get_color_changed(data); ++ u32 reg; + -+ return ret; ++ reg = readl(dp->reg_base + ANALOGIX_DP_FUNC_EN_2); ++ reg |= SSC_FUNC_EN_N; ++ writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_2); +} + -+static bool hdr_metadata_equal(struct dw_hdmi *hdmi, const struct drm_connector_state *old_state, -+ const struct drm_connector_state *new_state) ++bool analogix_dp_ssc_supported(struct analogix_dp_device *dp) +{ -+ struct drm_property_blob *old_blob = old_state->hdr_output_metadata; -+ struct drm_property_blob *new_blob = new_state->hdr_output_metadata; -+ int i, ret; -+ u8 *data; -+ -+ hdmi->hdr2sdr = false; -+ -+ if (!old_blob && !new_blob) -+ return true; -+ -+ if (!old_blob) { -+ data = (u8 *)new_blob->data; -+ -+ for (i = 0; i < new_blob->length; i++) -+ if (data[i]) -+ return false; -+ -+ return true; -+ } -+ -+ if (!new_blob) { -+ data = (u8 *)old_blob->data; -+ -+ for (i = 0; i < old_blob->length; i++) -+ if (data[i]) -+ return false; -+ -+ return true; -+ } -+ -+ if (old_blob->length != new_blob->length) -+ return false; ++ /* Check if SSC is supported by both sides */ ++ return dp->plat_data->ssc && dp->link_train.ssc; ++} + -+ ret = !memcmp(old_blob->data, new_blob->data, old_blob->length); ++void analogix_dp_set_link_bandwidth(struct analogix_dp_device *dp, u32 bwtype) ++{ ++ u32 status; ++ int ret; + -+ if (!ret && new_blob) { -+ data = (u8 *)new_blob->data; ++ analogix_dp_write(dp, ANALOGIX_DP_LINK_BW_SET, bwtype); + -+ for (i = 0; i < new_blob->length; i++) -+ if (data[i]) -+ break; ++ if (dp->phy) { ++ union phy_configure_opts phy_cfg = {0}; + -+ if (i == new_blob->length) -+ hdmi->hdr2sdr = true; ++ phy_cfg.dp.lanes = dp->link_train.lane_count; ++ phy_cfg.dp.link_rate = ++ drm_dp_bw_code_to_link_rate(dp->link_train.link_rate) / 100; ++ phy_cfg.dp.ssc = analogix_dp_ssc_supported(dp); ++ phy_cfg.dp.set_lanes = false; ++ phy_cfg.dp.set_rate = true; ++ phy_cfg.dp.set_voltages = false; ++ ret = phy_configure(dp->phy, &phy_cfg); ++ if (ret && ret != -EOPNOTSUPP) { ++ dev_err(dp->dev, "%s: phy_configure failed: %d\n", ++ __func__, ret); ++ return; ++ } ++ } else { ++ if (analogix_dp_ssc_supported(dp)) ++ analogix_dp_ssc_enable(dp); ++ else ++ analogix_dp_ssc_disable(dp); + } + -+ return ret; -+} -+ -+static bool check_hdr_color_change(struct drm_connector_state *old_state, -+ struct drm_connector_state *new_state, -+ struct dw_hdmi *hdmi) -+{ -+ void *data = hdmi->plat_data->phy_data; -+ -+ if (!hdr_metadata_equal(hdmi, old_state, new_state)) { -+ hdmi->plat_data->check_hdr_color_change(new_state, data); -+ return true; ++ ret = readx_poll_timeout(analogix_dp_get_pll_lock_status, dp, status, ++ status != PLL_UNLOCKED, 120, ++ 120 * DP_TIMEOUT_LOOP_COUNT); ++ if (ret) { ++ dev_err(dp->dev, "Wait for pll lock failed %d\n", ret); ++ return; + } -+ -+ return false; -+} -+ - static int dw_hdmi_connector_atomic_check(struct drm_connector *connector, - struct drm_atomic_state *state) + } + + void analogix_dp_get_link_bandwidth(struct analogix_dp_device *dp, u32 *bwtype) { -@@ -2517,27 +3356,231 @@ static int dw_hdmi_connector_atomic_check(struct drm_connector *connector, - drm_atomic_get_new_connector_state(state, connector); - struct drm_crtc *crtc = new_state->crtc; - struct drm_crtc_state *crtc_state; -+ struct dw_hdmi *hdmi = container_of(connector, struct dw_hdmi, -+ connector); -+ struct drm_display_mode *mode = NULL; -+ void *data = hdmi->plat_data->phy_data; -+ struct hdmi_vmode *vmode = &hdmi->hdmi_data.video_mode; + u32 reg; - if (!crtc) - return 0; +- reg = readl(dp->reg_base + ANALOGIX_DP_LINK_BW_SET); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_LINK_BW_SET); + *bwtype = reg; + } -- if (!drm_connector_atomic_hdr_metadata_equal(old_state, new_state)) { -- crtc_state = drm_atomic_get_crtc_state(state, crtc); -- if (IS_ERR(crtc_state)) -- return PTR_ERR(crtc_state); -+ crtc_state = drm_atomic_get_crtc_state(state, crtc); -+ if (IS_ERR(crtc_state)) -+ return PTR_ERR(crtc_state); -+ -+ mode = &crtc_state->mode; -+ -+ /* -+ * If HDMI is enabled in uboot, it's need to record -+ * drm_display_mode and set phy status to enabled. -+ */ -+ if (!vmode->mpixelclock) { -+ u8 val; -+ -+ hdmi->curr_conn = connector; -+ -+ if (hdmi->plat_data->get_enc_in_encoding) -+ hdmi->hdmi_data.enc_in_encoding = -+ hdmi->plat_data->get_enc_in_encoding(data); -+ if (hdmi->plat_data->get_enc_out_encoding) -+ hdmi->hdmi_data.enc_out_encoding = -+ hdmi->plat_data->get_enc_out_encoding(data); -+ if (hdmi->plat_data->get_input_bus_format) -+ hdmi->hdmi_data.enc_in_bus_format = -+ hdmi->plat_data->get_input_bus_format(data); -+ if (hdmi->plat_data->get_output_bus_format) -+ hdmi->hdmi_data.enc_out_bus_format = -+ hdmi->plat_data->get_output_bus_format(data); -+ -+ drm_mode_copy(&hdmi->previous_mode, mode); -+ vmode->mpixelclock = mode->crtc_clock * 1000; -+ vmode->previous_pixelclock = mode->clock * 1000; -+ vmode->previous_tmdsclock = mode->clock * 1000; -+ vmode->mtmdsclock = hdmi_get_tmdsclock(hdmi, -+ vmode->mpixelclock); -+ if (hdmi_bus_fmt_is_yuv420(hdmi->hdmi_data.enc_out_bus_format)) -+ vmode->mtmdsclock /= 2; + void analogix_dp_set_lane_count(struct analogix_dp_device *dp, u32 count) + { + u32 reg; ++ int ret; + + reg = count; +- writel(reg, dp->reg_base + ANALOGIX_DP_LANE_COUNT_SET); ++ analogix_dp_write(dp, ANALOGIX_DP_LANE_COUNT_SET, reg); + -+ dw_hdmi_force_output_pattern(hdmi, mode); -+ drm_scdc_readb(hdmi->ddc, SCDC_TMDS_CONFIG, &val); ++ if (dp->phy) { ++ union phy_configure_opts phy_cfg = {0}; + -+ /* if plug out before hdmi bind, reset hdmi */ -+ if (vmode->mtmdsclock >= 340000000 && !(val & SCDC_TMDS_BIT_CLOCK_RATIO_BY_40)) -+ hdmi->logo_plug_out = true; ++ phy_cfg.dp.lanes = dp->link_train.lane_count; ++ phy_cfg.dp.set_lanes = true; ++ phy_cfg.dp.set_rate = false; ++ phy_cfg.dp.set_voltages = false; ++ ret = phy_configure(dp->phy, &phy_cfg); ++ if (ret && ret != -EOPNOTSUPP) { ++ dev_err(dp->dev, "%s: phy_configure() failed: %d\n", ++ __func__, ret); ++ return; ++ } + } -+ -+ if (check_hdr_color_change(old_state, new_state, hdmi) || hdmi->logo_plug_out || -+ dw_hdmi_color_changed(connector)) { -+ u32 mtmdsclk; -+ -+ if (hdmi->plat_data->update_color_format) -+ hdmi->plat_data->update_color_format(new_state, data); -+ if (hdmi->plat_data->get_enc_in_encoding) -+ hdmi->hdmi_data.enc_in_encoding = -+ hdmi->plat_data->get_enc_in_encoding(data); -+ if (hdmi->plat_data->get_enc_out_encoding) -+ hdmi->hdmi_data.enc_out_encoding = -+ hdmi->plat_data->get_enc_out_encoding(data); -+ if (hdmi->plat_data->get_input_bus_format) -+ hdmi->hdmi_data.enc_in_bus_format = -+ hdmi->plat_data->get_input_bus_format(data); -+ if (hdmi->plat_data->get_output_bus_format) -+ hdmi->hdmi_data.enc_out_bus_format = -+ hdmi->plat_data->get_output_bus_format(data); -+ -+ mtmdsclk = hdmi_get_tmdsclock(hdmi, mode->clock); -+ -+ if (hdmi_bus_fmt_is_yuv420(hdmi->hdmi_data.enc_out_bus_format)) -+ mtmdsclk /= 2; -+ -+ if (!(hdmi_readb(hdmi, HDMI_PHY_STAT0) & HDMI_PHY_HPD)) -+ return 0; + } -- crtc_state->mode_changed = true; -+ if (hdmi->hdmi_data.video_mode.mpixelclock == (mode->clock * 1000) && -+ hdmi->hdmi_data.video_mode.mtmdsclock == (mtmdsclk * 1000) && -+ !hdmi->logo_plug_out && !hdmi->disabled) { -+ hdmi->update = true; -+ hdmi_writeb(hdmi, HDMI_FC_GCP_SET_AVMUTE, HDMI_FC_GCP); -+ mdelay(180); -+ handle_plugged_change(hdmi, false); -+ } else { -+ hdmi->update = false; -+ crtc_state->mode_changed = true; -+ hdmi->logo_plug_out = false; -+ } - } + void analogix_dp_get_lane_count(struct analogix_dp_device *dp, u32 *count) + { + u32 reg; - return 0; +- reg = readl(dp->reg_base + ANALOGIX_DP_LANE_COUNT_SET); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_LANE_COUNT_SET); + *count = reg; } -+static int -+dw_hdmi_atomic_connector_set_property(struct drm_connector *connector, -+ struct drm_connector_state *state, -+ struct drm_property *property, -+ uint64_t val) ++void analogix_dp_set_lane_link_training(struct analogix_dp_device *dp) +{ -+ struct dw_hdmi *hdmi = container_of(connector, struct dw_hdmi, -+ connector); -+ const struct dw_hdmi_property_ops *ops = -+ hdmi->plat_data->property_ops; -+ -+ if (ops && ops->set_property) -+ return ops->set_property(connector, state, property, -+ val, hdmi->plat_data->phy_data); -+ else -+ return -EINVAL; -+} ++ u8 lane; ++ int ret; + -+static int -+dw_hdmi_atomic_connector_get_property(struct drm_connector *connector, -+ const struct drm_connector_state *state, -+ struct drm_property *property, -+ uint64_t *val) -+{ -+ struct dw_hdmi *hdmi = container_of(connector, struct dw_hdmi, -+ connector); -+ const struct dw_hdmi_property_ops *ops = -+ hdmi->plat_data->property_ops; ++ for (lane = 0; lane < dp->link_train.lane_count; lane++) ++ analogix_dp_write(dp, ++ ANALOGIX_DP_LN0_LINK_TRAINING_CTL + 4 * lane, ++ dp->link_train.training_lane[lane]); + -+ if (ops && ops->get_property) -+ return ops->get_property(connector, state, property, -+ val, hdmi->plat_data->phy_data); -+ else -+ return -EINVAL; -+} ++ if (dp->phy) { ++ union phy_configure_opts phy_cfg = {0}; + -+static int -+dw_hdmi_connector_set_property(struct drm_connector *connector, -+ struct drm_property *property, uint64_t val) -+{ -+ return dw_hdmi_atomic_connector_set_property(connector, NULL, -+ property, val); -+} ++ for (lane = 0; lane < dp->link_train.lane_count; lane++) { ++ u8 training_lane = dp->link_train.training_lane[lane]; ++ u8 vs, pe; + -+static void dw_hdmi_connector_atomic_commit(struct drm_connector *connector, -+ struct drm_atomic_state *state) -+{ -+ struct dw_hdmi *hdmi = -+ container_of(connector, struct dw_hdmi, connector); ++ vs = (training_lane & DP_TRAIN_VOLTAGE_SWING_MASK) >> ++ DP_TRAIN_VOLTAGE_SWING_SHIFT; ++ pe = (training_lane & DP_TRAIN_PRE_EMPHASIS_MASK) >> ++ DP_TRAIN_PRE_EMPHASIS_SHIFT; ++ phy_cfg.dp.voltage[lane] = vs; ++ phy_cfg.dp.pre[lane] = pe; ++ } + -+ if (hdmi->update) { -+ dw_hdmi_setup(hdmi, hdmi->curr_conn, &hdmi->previous_mode); -+ mdelay(50); -+ handle_plugged_change(hdmi, true); -+ hdmi_writeb(hdmi, HDMI_FC_GCP_CLEAR_AVMUTE, HDMI_FC_GCP); -+ hdmi->update = false; ++ phy_cfg.dp.lanes = dp->link_train.lane_count; ++ phy_cfg.dp.link_rate = ++ drm_dp_bw_code_to_link_rate(dp->link_train.link_rate) / 100; ++ phy_cfg.dp.set_lanes = false; ++ phy_cfg.dp.set_rate = false; ++ phy_cfg.dp.set_voltages = true; ++ ret = phy_configure(dp->phy, &phy_cfg); ++ if (ret && ret != -EOPNOTSUPP) { ++ dev_err(dp->dev, "%s: phy_configure() failed: %d\n", ++ __func__, ret); ++ return; ++ } + } +} + -+void dw_hdmi_set_quant_range(struct dw_hdmi *hdmi) -+{ -+ if (!hdmi->bridge_is_on) -+ return; -+ -+ hdmi_writeb(hdmi, HDMI_FC_GCP_SET_AVMUTE, HDMI_FC_GCP); -+ dw_hdmi_setup(hdmi, hdmi->curr_conn, &hdmi->previous_mode); -+ hdmi_writeb(hdmi, HDMI_FC_GCP_CLEAR_AVMUTE, HDMI_FC_GCP); -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_set_quant_range); -+ -+void dw_hdmi_set_output_type(struct dw_hdmi *hdmi, u64 val) -+{ -+ hdmi->force_output = val; -+ -+ if (!dw_hdmi_check_output_type_changed(hdmi)) -+ return; -+ -+ if (!hdmi->bridge_is_on) -+ return; -+ -+ hdmi_writeb(hdmi, HDMI_FC_GCP_SET_AVMUTE, HDMI_FC_GCP); -+ dw_hdmi_setup(hdmi, hdmi->curr_conn, &hdmi->previous_mode); -+ hdmi_writeb(hdmi, HDMI_FC_GCP_CLEAR_AVMUTE, HDMI_FC_GCP); -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_set_output_type); -+ -+bool dw_hdmi_get_output_whether_hdmi(struct dw_hdmi *hdmi) -+{ -+ return hdmi->sink_is_hdmi; -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_get_output_whether_hdmi); -+ -+int dw_hdmi_get_output_type_cap(struct dw_hdmi *hdmi) -+{ -+ return hdmi->support_hdmi; -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_get_output_type_cap); -+ -+void dw_hdmi_set_hpd_wake(struct dw_hdmi *hdmi) ++u32 analogix_dp_get_lane_link_training(struct analogix_dp_device *dp, u8 lane) +{ -+ if (!hdmi->cec) -+ return; -+ -+ if (!hdmi->cec_ops) -+ return; -+ -+ if (hdmi->cec_ops->hpd_wake_up) -+ hdmi->cec_ops->hpd_wake_up(hdmi->cec); ++ return analogix_dp_read(dp, ++ ANALOGIX_DP_LN0_LINK_TRAINING_CTL + 4 * lane); +} -+EXPORT_SYMBOL_GPL(dw_hdmi_set_hpd_wake); + - static void dw_hdmi_connector_force(struct drm_connector *connector) + void analogix_dp_enable_enhanced_mode(struct analogix_dp_device *dp, + bool enable) { - struct dw_hdmi *hdmi = container_of(connector, struct dw_hdmi, - connector); - - mutex_lock(&hdmi->mutex); -+ -+ if (hdmi->force != connector->force) { -+ if (!hdmi->disabled && connector->force == DRM_FORCE_OFF) -+ extcon_set_state_sync(hdmi->extcon, EXTCON_DISP_HDMI, -+ false); -+ else if (hdmi->disabled && connector->force == DRM_FORCE_ON) -+ extcon_set_state_sync(hdmi->extcon, EXTCON_DISP_HDMI, -+ true); -+ } -+ - hdmi->force = connector->force; - dw_hdmi_update_power(hdmi); - dw_hdmi_update_phy_mask(hdmi); -@@ -2550,15 +3593,99 @@ static const struct drm_connector_funcs dw_hdmi_connector_funcs = { - .destroy = drm_connector_cleanup, - .force = dw_hdmi_connector_force, - .reset = drm_atomic_helper_connector_reset, -+ .set_property = dw_hdmi_connector_set_property, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, -+ .atomic_set_property = dw_hdmi_atomic_connector_set_property, -+ .atomic_get_property = dw_hdmi_atomic_connector_get_property, - }; + u32 reg; - static const struct drm_connector_helper_funcs dw_hdmi_connector_helper_funcs = { - .get_modes = dw_hdmi_connector_get_modes, -+ .best_encoder = dw_hdmi_connector_best_encoder, - .atomic_check = dw_hdmi_connector_atomic_check, -+ .atomic_commit = dw_hdmi_connector_atomic_commit, - }; + if (enable) { +- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_4); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_4); + reg |= ENHANCED; +- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_4); ++ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_4, reg); + } else { +- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_4); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_4); + reg &= ~ENHANCED; +- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_4); ++ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_4, reg); + } + } -+static void dw_hdmi_attach_properties(struct dw_hdmi *hdmi) ++bool analogix_dp_get_enhanced_mode(struct analogix_dp_device *dp) +{ -+ unsigned int color = MEDIA_BUS_FMT_RGB888_1X24; -+ int video_mapping, colorspace; -+ enum drm_connector_status connect_status = -+ hdmi->phy.ops->read_hpd(hdmi, hdmi->phy.data); -+ const struct dw_hdmi_property_ops *ops = -+ hdmi->plat_data->property_ops; -+ -+ if (connect_status == connector_status_connected) { -+ video_mapping = (hdmi_readb(hdmi, HDMI_TX_INVID0) & -+ HDMI_TX_INVID0_VIDEO_MAPPING_MASK); -+ colorspace = (hdmi_readb(hdmi, HDMI_FC_AVICONF0) & -+ HDMI_FC_AVICONF0_PIX_FMT_MASK); -+ switch (video_mapping) { -+ case 0x01: -+ color = MEDIA_BUS_FMT_RGB888_1X24; -+ break; -+ case 0x03: -+ color = MEDIA_BUS_FMT_RGB101010_1X30; -+ break; -+ case 0x09: -+ if (colorspace == HDMI_COLORSPACE_YUV420) -+ color = MEDIA_BUS_FMT_UYYVYY8_0_5X24; -+ else if (colorspace == HDMI_COLORSPACE_YUV422) -+ color = MEDIA_BUS_FMT_UYVY8_1X16; -+ else -+ color = MEDIA_BUS_FMT_YUV8_1X24; -+ break; -+ case 0x0b: -+ if (colorspace == HDMI_COLORSPACE_YUV420) -+ color = MEDIA_BUS_FMT_UYYVYY10_0_5X30; -+ else if (colorspace == HDMI_COLORSPACE_YUV422) -+ color = MEDIA_BUS_FMT_UYVY10_1X20; -+ else -+ color = MEDIA_BUS_FMT_YUV10_1X30; -+ break; -+ case 0x14: -+ color = MEDIA_BUS_FMT_UYVY10_1X20; -+ break; -+ case 0x16: -+ color = MEDIA_BUS_FMT_UYVY8_1X16; -+ break; -+ default: -+ color = MEDIA_BUS_FMT_RGB888_1X24; -+ dev_err(hdmi->dev, "unexpected mapping: 0x%x\n", -+ video_mapping); -+ } -+ -+ hdmi->hdmi_data.enc_in_bus_format = color; -+ hdmi->hdmi_data.enc_out_bus_format = color; -+ /* -+ * input format will be set as yuv444 when output -+ * format is yuv420 -+ */ -+ if (color == MEDIA_BUS_FMT_UYVY10_1X20) -+ hdmi->hdmi_data.enc_in_bus_format = -+ MEDIA_BUS_FMT_YUV10_1X30; -+ else if (color == MEDIA_BUS_FMT_UYVY8_1X16) -+ hdmi->hdmi_data.enc_in_bus_format = -+ MEDIA_BUS_FMT_YUV8_1X24; -+ } -+ -+ if (ops && ops->attach_properties) -+ return ops->attach_properties(&hdmi->connector, -+ color, hdmi->version, -+ hdmi->plat_data->phy_data, 0); -+} ++ u32 reg; + -+static void dw_hdmi_destroy_properties(struct dw_hdmi *hdmi) -+{ -+ const struct dw_hdmi_property_ops *ops = -+ hdmi->plat_data->property_ops; ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_4); + -+ if (ops && ops->destroy_properties) -+ return ops->destroy_properties(&hdmi->connector, -+ hdmi->plat_data->phy_data); ++ return !!(reg & ENHANCED); +} + - static int dw_hdmi_connector_create(struct dw_hdmi *hdmi) + void analogix_dp_set_training_pattern(struct analogix_dp_device *dp, + enum pattern_set pattern) { - struct drm_connector *connector = &hdmi->connector; -@@ -2594,6 +3721,8 @@ static int dw_hdmi_connector_create(struct dw_hdmi *hdmi) - - drm_connector_attach_encoder(connector, hdmi->bridge.encoder); - -+ dw_hdmi_attach_properties(hdmi); -+ - cec_fill_conn_info_from_drm(&conn_info, connector); - - notifier = cec_notifier_conn_register(hdmi->dev, NULL, &conn_info); -@@ -2688,10 +3817,9 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge, - /* Default 8bit fallback */ - output_fmts[i++] = MEDIA_BUS_FMT_UYYVYY8_0_5X24; - -- if (drm_mode_is_420_only(info, mode)) { -- *num_output_fmts = i; -- return output_fmts; -- } -+ *num_output_fmts = i; -+ -+ return output_fmts; +@@ -581,144 +717,64 @@ void analogix_dp_set_training_pattern(struct analogix_dp_device *dp, + switch (pattern) { + case PRBS7: + reg = SCRAMBLING_ENABLE | LINK_QUAL_PATTERN_SET_PRBS7; +- writel(reg, dp->reg_base + ANALOGIX_DP_TRAINING_PTN_SET); ++ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); + break; + case D10_2: + reg = SCRAMBLING_ENABLE | LINK_QUAL_PATTERN_SET_D10_2; +- writel(reg, dp->reg_base + ANALOGIX_DP_TRAINING_PTN_SET); ++ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); + break; + case TRAINING_PTN1: + reg = SCRAMBLING_DISABLE | SW_TRAINING_PATTERN_SET_PTN1; +- writel(reg, dp->reg_base + ANALOGIX_DP_TRAINING_PTN_SET); ++ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); + break; + case TRAINING_PTN2: + reg = SCRAMBLING_DISABLE | SW_TRAINING_PATTERN_SET_PTN2; +- writel(reg, dp->reg_base + ANALOGIX_DP_TRAINING_PTN_SET); ++ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); ++ break; ++ case TRAINING_PTN3: ++ reg = SCRAMBLING_DISABLE | SW_TRAINING_PATTERN_SET_PTN3; ++ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); ++ break; ++ case TEST_PATTERN_80BIT: ++ reg = 0x3e0f83e0; ++ analogix_dp_write(dp, ANALOGIX_DP_TEST_80B_PATTERN0, reg); ++ reg = 0x0f83e0f8; ++ analogix_dp_write(dp, ANALOGIX_DP_TEST_80B_PATTERN1, reg); ++ reg = 0x0000f83e; ++ analogix_dp_write(dp, ANALOGIX_DP_TEST_80B_PATTERN2, reg); ++ reg = SCRAMBLING_ENABLE | LINK_QUAL_PATTERN_SET_80BIT; ++ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); ++ break; ++ case TEST_PATTERN_HBR2: ++ reg = 0xfb; ++ analogix_dp_write(dp, ANALOGIX_DP_TEST_HBR2_PATTERN, reg); ++ reg = SCRAMBLING_ENABLE | LINK_QUAL_PATTERN_SET_HBR2; ++ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); + break; + case DP_NONE: + reg = SCRAMBLING_ENABLE | + LINK_QUAL_PATTERN_SET_DISABLE | + SW_TRAINING_PATTERN_SET_NORMAL; +- writel(reg, dp->reg_base + ANALOGIX_DP_TRAINING_PTN_SET); ++ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); + break; + default: + break; } + } - /* -@@ -2870,16 +3998,36 @@ static int dw_hdmi_bridge_atomic_check(struct drm_bridge *bridge, - struct drm_connector_state *conn_state) +-void analogix_dp_set_lane0_pre_emphasis(struct analogix_dp_device *dp, +- u32 level) +-{ +- u32 reg; +- +- reg = readl(dp->reg_base + ANALOGIX_DP_LN0_LINK_TRAINING_CTL); +- reg &= ~PRE_EMPHASIS_SET_MASK; +- reg |= level << PRE_EMPHASIS_SET_SHIFT; +- writel(reg, dp->reg_base + ANALOGIX_DP_LN0_LINK_TRAINING_CTL); +-} +- +-void analogix_dp_set_lane1_pre_emphasis(struct analogix_dp_device *dp, +- u32 level) +-{ +- u32 reg; +- +- reg = readl(dp->reg_base + ANALOGIX_DP_LN1_LINK_TRAINING_CTL); +- reg &= ~PRE_EMPHASIS_SET_MASK; +- reg |= level << PRE_EMPHASIS_SET_SHIFT; +- writel(reg, dp->reg_base + ANALOGIX_DP_LN1_LINK_TRAINING_CTL); +-} +- +-void analogix_dp_set_lane2_pre_emphasis(struct analogix_dp_device *dp, +- u32 level) +-{ +- u32 reg; +- +- reg = readl(dp->reg_base + ANALOGIX_DP_LN2_LINK_TRAINING_CTL); +- reg &= ~PRE_EMPHASIS_SET_MASK; +- reg |= level << PRE_EMPHASIS_SET_SHIFT; +- writel(reg, dp->reg_base + ANALOGIX_DP_LN2_LINK_TRAINING_CTL); +-} +- +-void analogix_dp_set_lane3_pre_emphasis(struct analogix_dp_device *dp, +- u32 level) +-{ +- u32 reg; +- +- reg = readl(dp->reg_base + ANALOGIX_DP_LN3_LINK_TRAINING_CTL); +- reg &= ~PRE_EMPHASIS_SET_MASK; +- reg |= level << PRE_EMPHASIS_SET_SHIFT; +- writel(reg, dp->reg_base + ANALOGIX_DP_LN3_LINK_TRAINING_CTL); +-} +- +-void analogix_dp_set_lane0_link_training(struct analogix_dp_device *dp, +- u32 training_lane) +-{ +- u32 reg; +- +- reg = training_lane; +- writel(reg, dp->reg_base + ANALOGIX_DP_LN0_LINK_TRAINING_CTL); +-} +- +-void analogix_dp_set_lane1_link_training(struct analogix_dp_device *dp, +- u32 training_lane) +-{ +- u32 reg; +- +- reg = training_lane; +- writel(reg, dp->reg_base + ANALOGIX_DP_LN1_LINK_TRAINING_CTL); +-} +- +-void analogix_dp_set_lane2_link_training(struct analogix_dp_device *dp, +- u32 training_lane) +-{ +- u32 reg; +- +- reg = training_lane; +- writel(reg, dp->reg_base + ANALOGIX_DP_LN2_LINK_TRAINING_CTL); +-} +- +-void analogix_dp_set_lane3_link_training(struct analogix_dp_device *dp, +- u32 training_lane) +-{ +- u32 reg; +- +- reg = training_lane; +- writel(reg, dp->reg_base + ANALOGIX_DP_LN3_LINK_TRAINING_CTL); +-} +- +-u32 analogix_dp_get_lane0_link_training(struct analogix_dp_device *dp) +-{ +- return readl(dp->reg_base + ANALOGIX_DP_LN0_LINK_TRAINING_CTL); +-} +- +-u32 analogix_dp_get_lane1_link_training(struct analogix_dp_device *dp) +-{ +- return readl(dp->reg_base + ANALOGIX_DP_LN1_LINK_TRAINING_CTL); +-} +- +-u32 analogix_dp_get_lane2_link_training(struct analogix_dp_device *dp) +-{ +- return readl(dp->reg_base + ANALOGIX_DP_LN2_LINK_TRAINING_CTL); +-} +- +-u32 analogix_dp_get_lane3_link_training(struct analogix_dp_device *dp) +-{ +- return readl(dp->reg_base + ANALOGIX_DP_LN3_LINK_TRAINING_CTL); +-} +- + void analogix_dp_reset_macro(struct analogix_dp_device *dp) { - struct dw_hdmi *hdmi = bridge->driver_private; -+ void *data = hdmi->plat_data->phy_data; - -- hdmi->hdmi_data.enc_out_bus_format = -- bridge_state->output_bus_cfg.format; -+ if (bridge_state->output_bus_cfg.format == MEDIA_BUS_FMT_FIXED) { -+ if (hdmi->plat_data->get_output_bus_format) -+ hdmi->hdmi_data.enc_out_bus_format = -+ hdmi->plat_data->get_output_bus_format(data); -+ else -+ hdmi->hdmi_data.enc_out_bus_format = -+ MEDIA_BUS_FMT_RGB888_1X24; -+ -+ if (hdmi->plat_data->get_input_bus_format) -+ hdmi->hdmi_data.enc_in_bus_format = -+ hdmi->plat_data->get_input_bus_format(data); -+ else if (hdmi->plat_data->input_bus_format) -+ hdmi->hdmi_data.enc_in_bus_format = -+ hdmi->plat_data->input_bus_format; -+ else -+ hdmi->hdmi_data.enc_in_bus_format = -+ MEDIA_BUS_FMT_RGB888_1X24; -+ } else { -+ hdmi->hdmi_data.enc_out_bus_format = -+ bridge_state->output_bus_cfg.format; + u32 reg; -- hdmi->hdmi_data.enc_in_bus_format = -- bridge_state->input_bus_cfg.format; -+ hdmi->hdmi_data.enc_in_bus_format = -+ bridge_state->input_bus_cfg.format; +- reg = readl(dp->reg_base + ANALOGIX_DP_PHY_TEST); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_PHY_TEST); + reg |= MACRO_RST; +- writel(reg, dp->reg_base + ANALOGIX_DP_PHY_TEST); ++ analogix_dp_write(dp, ANALOGIX_DP_PHY_TEST, reg); -- dev_dbg(hdmi->dev, "input format 0x%04x, output format 0x%04x\n", -- bridge_state->input_bus_cfg.format, -- bridge_state->output_bus_cfg.format); -+ dev_dbg(hdmi->dev, "input format 0x%04x, output format 0x%04x\n", -+ bridge_state->input_bus_cfg.format, -+ bridge_state->output_bus_cfg.format); -+ } + /* 10 us is the minimum reset time. */ + usleep_range(10, 20); - return 0; + reg &= ~MACRO_RST; +- writel(reg, dp->reg_base + ANALOGIX_DP_PHY_TEST); ++ analogix_dp_write(dp, ANALOGIX_DP_PHY_TEST, reg); } -@@ -2888,11 +4036,23 @@ static int dw_hdmi_bridge_attach(struct drm_bridge *bridge, - enum drm_bridge_attach_flags flags) - { - struct dw_hdmi *hdmi = bridge->driver_private; -+ int ret; - if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR) - return drm_bridge_attach(bridge->encoder, hdmi->next_bridge, - bridge, flags); + void analogix_dp_init_video(struct analogix_dp_device *dp) +@@ -726,19 +782,22 @@ void analogix_dp_init_video(struct analogix_dp_device *dp) + u32 reg; -+ if (hdmi->next_bridge) { -+ hdmi->next_bridge->encoder = bridge->encoder; -+ ret = drm_bridge_attach(bridge->encoder, hdmi->next_bridge, bridge, flags); -+ if (ret) { -+ DRM_ERROR("Failed to attach bridge with dw-hdmi\n"); -+ return ret; -+ } -+ -+ return 0; + reg = VSYNC_DET | VID_FORMAT_CHG | VID_CLK_CHG; +- writel(reg, dp->reg_base + ANALOGIX_DP_COMMON_INT_STA_1); ++ analogix_dp_write(dp, ANALOGIX_DP_COMMON_INT_STA_1, reg); + + reg = 0x0; +- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_1); ++ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_1, reg); + + reg = CHA_CRI(4) | CHA_CTRL; +- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_2); ++ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_2, reg); + +- reg = 0x0; +- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_3); ++ if (dp->video_info.force_stream_valid) { ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_3); ++ reg |= VALID_CTRL | F_VALID; ++ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_3, reg); + } -+ - return dw_hdmi_connector_create(hdmi); + + reg = VID_HRES_TH(2) | VID_VRES_TH(0); +- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_8); ++ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_8, reg); } -@@ -2915,9 +4075,11 @@ dw_hdmi_bridge_mode_valid(struct drm_bridge *bridge, - const struct dw_hdmi_plat_data *pdata = hdmi->plat_data; - enum drm_mode_status mode_status = MODE_OK; + void analogix_dp_set_video_color_format(struct analogix_dp_device *dp) +@@ -749,36 +808,36 @@ void analogix_dp_set_video_color_format(struct analogix_dp_device *dp) + reg = (dp->video_info.dynamic_range << IN_D_RANGE_SHIFT) | + (dp->video_info.color_depth << IN_BPC_SHIFT) | + (dp->video_info.color_space << IN_COLOR_F_SHIFT); +- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_2); ++ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_2, reg); -- /* We don't support double-clocked modes */ -- if (mode->flags & DRM_MODE_FLAG_DBLCLK) -- return MODE_BAD; -+ if (hdmi->next_bridge) -+ return MODE_OK; -+ -+ if (!(hdmi_readb(hdmi, HDMI_PHY_STAT0) & HDMI_PHY_HPD) && hdmi->hdr2sdr) -+ return MODE_OK; + /* Set Input Color YCbCr Coefficients to ITU601 or ITU709 */ +- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_3); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_3); + reg &= ~IN_YC_COEFFI_MASK; + if (dp->video_info.ycbcr_coeff) + reg |= IN_YC_COEFFI_ITU709; + else + reg |= IN_YC_COEFFI_ITU601; +- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_3); ++ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_3, reg); + } - if (pdata->mode_valid) - mode_status = pdata->mode_valid(hdmi, pdata->priv_data, info, -@@ -2947,10 +4109,12 @@ static void dw_hdmi_bridge_atomic_disable(struct drm_bridge *bridge, + int analogix_dp_is_slave_video_stream_clock_on(struct analogix_dp_device *dp) + { + u32 reg; - mutex_lock(&hdmi->mutex); - hdmi->disabled = true; -+ handle_plugged_change(hdmi, false); - hdmi->curr_conn = NULL; - dw_hdmi_update_power(hdmi); - dw_hdmi_update_phy_mask(hdmi); -- handle_plugged_change(hdmi, false); -+ if (hdmi->plat_data->dclk_set) -+ hdmi->plat_data->dclk_set(hdmi->plat_data->phy_data, false, 0); - mutex_unlock(&hdmi->mutex); +- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_1); +- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_1); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_1); ++ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_1, reg); + +- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_1); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_1); + + if (!(reg & DET_STA)) { + dev_dbg(dp->dev, "Input stream clock not detected.\n"); + return -EINVAL; + } + +- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_2); +- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_2); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_2); ++ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_2, reg); + +- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_2); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_2); + dev_dbg(dp->dev, "wait SYS_CTL_2.\n"); + + if (reg & CHA_STA) { +@@ -796,30 +855,30 @@ void analogix_dp_set_video_cr_mn(struct analogix_dp_device *dp, + u32 reg; + + if (type == REGISTER_M) { +- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_4); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_4); + reg |= FIX_M_VID; +- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_4); ++ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_4, reg); + reg = m_value & 0xff; +- writel(reg, dp->reg_base + ANALOGIX_DP_M_VID_0); ++ analogix_dp_write(dp, ANALOGIX_DP_M_VID_0, reg); + reg = (m_value >> 8) & 0xff; +- writel(reg, dp->reg_base + ANALOGIX_DP_M_VID_1); ++ analogix_dp_write(dp, ANALOGIX_DP_M_VID_1, reg); + reg = (m_value >> 16) & 0xff; +- writel(reg, dp->reg_base + ANALOGIX_DP_M_VID_2); ++ analogix_dp_write(dp, ANALOGIX_DP_M_VID_2, reg); + + reg = n_value & 0xff; +- writel(reg, dp->reg_base + ANALOGIX_DP_N_VID_0); ++ analogix_dp_write(dp, ANALOGIX_DP_N_VID_0, reg); + reg = (n_value >> 8) & 0xff; +- writel(reg, dp->reg_base + ANALOGIX_DP_N_VID_1); ++ analogix_dp_write(dp, ANALOGIX_DP_N_VID_1, reg); + reg = (n_value >> 16) & 0xff; +- writel(reg, dp->reg_base + ANALOGIX_DP_N_VID_2); ++ analogix_dp_write(dp, ANALOGIX_DP_N_VID_2, reg); + } else { +- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_4); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_4); + reg &= ~FIX_M_VID; +- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_4); ++ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_4, reg); + +- writel(0x00, dp->reg_base + ANALOGIX_DP_N_VID_0); +- writel(0x80, dp->reg_base + ANALOGIX_DP_N_VID_1); +- writel(0x00, dp->reg_base + ANALOGIX_DP_N_VID_2); ++ analogix_dp_write(dp, ANALOGIX_DP_N_VID_0, 0x00); ++ analogix_dp_write(dp, ANALOGIX_DP_N_VID_1, 0x80); ++ analogix_dp_write(dp, ANALOGIX_DP_N_VID_2, 0x00); + } } -@@ -2967,6 +4131,8 @@ static void dw_hdmi_bridge_atomic_enable(struct drm_bridge *bridge, - mutex_lock(&hdmi->mutex); - hdmi->disabled = false; - hdmi->curr_conn = connector; -+ if (hdmi->plat_data->dclk_set) -+ hdmi->plat_data->dclk_set(hdmi->plat_data->phy_data, true, 0); - dw_hdmi_update_power(hdmi); - dw_hdmi_update_phy_mask(hdmi); - handle_plugged_change(hdmi, true); -@@ -3005,6 +4171,12 @@ static const struct drm_bridge_funcs dw_hdmi_bridge_funcs = { - .get_edid = dw_hdmi_bridge_get_edid, - }; +@@ -828,13 +887,13 @@ void analogix_dp_set_video_timing_mode(struct analogix_dp_device *dp, u32 type) + u32 reg; -+void dw_hdmi_set_cec_adap(struct dw_hdmi *hdmi, struct cec_adapter *adap) -+{ -+ hdmi->cec_adap = adap; -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_set_cec_adap); -+ - /* ----------------------------------------------------------------------------- - * IRQ Handling - */ -@@ -3030,7 +4202,7 @@ static irqreturn_t dw_hdmi_i2c_irq(struct dw_hdmi *hdmi) - static irqreturn_t dw_hdmi_hardirq(int irq, void *dev_id) - { - struct dw_hdmi *hdmi = dev_id; -- u8 intr_stat; -+ u8 intr_stat, hdcp_stat; - irqreturn_t ret = IRQ_NONE; + if (type == VIDEO_TIMING_FROM_CAPTURE) { +- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_10); + reg &= ~FORMAT_SEL; +- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); ++ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_10, reg); + } else { +- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_10); + reg |= FORMAT_SEL; +- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); ++ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_10, reg); + } + } - if (hdmi->i2c) -@@ -3042,6 +4214,13 @@ static irqreturn_t dw_hdmi_hardirq(int irq, void *dev_id) - return IRQ_WAKE_THREAD; +@@ -843,15 +902,15 @@ void analogix_dp_enable_video_master(struct analogix_dp_device *dp, bool enable) + u32 reg; + + if (enable) { +- reg = readl(dp->reg_base + ANALOGIX_DP_SOC_GENERAL_CTL); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SOC_GENERAL_CTL); + reg &= ~VIDEO_MODE_MASK; + reg |= VIDEO_MASTER_MODE_EN | VIDEO_MODE_MASTER_MODE; +- writel(reg, dp->reg_base + ANALOGIX_DP_SOC_GENERAL_CTL); ++ analogix_dp_write(dp, ANALOGIX_DP_SOC_GENERAL_CTL, reg); + } else { +- reg = readl(dp->reg_base + ANALOGIX_DP_SOC_GENERAL_CTL); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SOC_GENERAL_CTL); + reg &= ~VIDEO_MODE_MASK; + reg |= VIDEO_MODE_SLAVE_MODE; +- writel(reg, dp->reg_base + ANALOGIX_DP_SOC_GENERAL_CTL); ++ analogix_dp_write(dp, ANALOGIX_DP_SOC_GENERAL_CTL, reg); } + } -+ hdcp_stat = hdmi_readb(hdmi, HDMI_A_APIINTSTAT); -+ if (hdcp_stat) { -+ dev_dbg(hdmi->dev, "HDCP irq %#x\n", hdcp_stat); -+ hdmi_writeb(hdmi, 0xff, HDMI_A_APIINTMSK); -+ return IRQ_WAKE_THREAD; -+ } -+ - return ret; +@@ -859,19 +918,19 @@ void analogix_dp_start_video(struct analogix_dp_device *dp) + { + u32 reg; + +- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_1); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_1); + reg |= VIDEO_EN; +- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_1); ++ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_1, reg); } -@@ -3049,7 +4228,7 @@ void dw_hdmi_setup_rx_sense(struct dw_hdmi *hdmi, bool hpd, bool rx_sense) + int analogix_dp_is_video_stream_on(struct analogix_dp_device *dp) { - mutex_lock(&hdmi->mutex); + u32 reg; -- if (!hdmi->force) { -+ if (!hdmi->force && !hdmi->force_logo) { - /* - * If the RX sense status indicates we're disconnected, - * clear the software rxsense status. -@@ -3076,8 +4255,7 @@ EXPORT_SYMBOL_GPL(dw_hdmi_setup_rx_sense); - static irqreturn_t dw_hdmi_irq(int irq, void *dev_id) +- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_3); +- writel(reg, dp->reg_base + ANALOGIX_DP_SYS_CTL_3); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_3); ++ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_3, reg); + +- reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_3); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_3); + if (!(reg & STRM_VALID)) { + dev_dbg(dp->dev, "Input video stream is not detected.\n"); + return -EINVAL; +@@ -884,55 +943,55 @@ void analogix_dp_config_video_slave_mode(struct analogix_dp_device *dp) { - struct dw_hdmi *hdmi = dev_id; -- u8 intr_stat, phy_int_pol, phy_pol_mask, phy_stat; -- enum drm_connector_status status = connector_status_unknown; -+ u8 intr_stat, phy_int_pol, phy_pol_mask, phy_stat, hdcp_stat; + u32 reg; - intr_stat = hdmi_readb(hdmi, HDMI_IH_PHY_STAT0); - phy_int_pol = hdmi_readb(hdmi, HDMI_PHY_POL0); -@@ -3116,29 +4294,23 @@ static irqreturn_t dw_hdmi_irq(int irq, void *dev_id) - cec_notifier_phys_addr_invalidate(hdmi->cec_notifier); - mutex_unlock(&hdmi->cec_notifier_mutex); - } -- -- if (phy_stat & HDMI_PHY_HPD) -- status = connector_status_connected; -- -- if (!(phy_stat & (HDMI_PHY_HPD | HDMI_PHY_RX_SENSE))) -- status = connector_status_disconnected; +- reg = readl(dp->reg_base + ANALOGIX_DP_FUNC_EN_1); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_FUNC_EN_1); + if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) { + reg &= ~(RK_VID_CAP_FUNC_EN_N | RK_VID_FIFO_FUNC_EN_N); + } else { + reg &= ~(MASTER_VID_FUNC_EN_N | SLAVE_VID_FUNC_EN_N); + reg |= MASTER_VID_FUNC_EN_N; } +- writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_1); ++ analogix_dp_write(dp, ANALOGIX_DP_FUNC_EN_1, reg); -- if (status != connector_status_unknown) { -- dev_dbg(hdmi->dev, "EVENT=%s\n", -- status == connector_status_connected ? -- "plugin" : "plugout"); -- -- if (hdmi->bridge.dev) { -- drm_helper_hpd_irq_event(hdmi->bridge.dev); -- drm_bridge_hpd_notify(&hdmi->bridge, status); -- } -- } -+ check_hdmi_irq(hdmi, intr_stat, phy_int_pol); +- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_10); + reg &= ~INTERACE_SCAN_CFG; + reg |= (dp->video_info.interlaced << 2); +- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); ++ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_10, reg); - hdmi_writeb(hdmi, intr_stat, HDMI_IH_PHY_STAT0); -- hdmi_writeb(hdmi, ~(HDMI_IH_PHY_STAT0_HPD | HDMI_IH_PHY_STAT0_RX_SENSE), -- HDMI_IH_MUTE_PHY_STAT0); -- -+ if (!hdmi->next_bridge) -+ hdmi_writeb(hdmi, ~(HDMI_IH_PHY_STAT0_HPD | -+ HDMI_IH_PHY_STAT0_RX_SENSE), -+ HDMI_IH_MUTE_PHY_STAT0); -+ -+ hdcp_stat = hdmi_readb(hdmi, HDMI_A_APIINTSTAT); -+ if (hdcp_stat) { -+ if (hdmi->hdcp) -+ hdmi->hdcp->hdcp_isr(hdmi->hdcp, hdcp_stat); -+ hdmi_writeb(hdmi, hdcp_stat, HDMI_A_APIINTCLR); -+ hdmi_writeb(hdmi, 0x00, HDMI_A_APIINTMSK); -+ } - return IRQ_HANDLED; +- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_10); + reg &= ~VSYNC_POLARITY_CFG; + reg |= (dp->video_info.v_sync_polarity << 1); +- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); ++ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_10, reg); + +- reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_10); + reg &= ~HSYNC_POLARITY_CFG; + reg |= (dp->video_info.h_sync_polarity << 0); +- writel(reg, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); ++ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_10, reg); + + reg = AUDIO_MODE_SPDIF_MODE | VIDEO_MODE_SLAVE_MODE; +- writel(reg, dp->reg_base + ANALOGIX_DP_SOC_GENERAL_CTL); ++ analogix_dp_write(dp, ANALOGIX_DP_SOC_GENERAL_CTL, reg); } -@@ -3245,6 +4417,7 @@ static void dw_hdmi_cec_disable(struct dw_hdmi *hdmi) - static const struct dw_hdmi_cec_ops dw_hdmi_cec_ops = { - .write = hdmi_writeb, - .read = hdmi_readb, -+ .mod = hdmi_modb, - .enable = dw_hdmi_cec_enable, - .disable = dw_hdmi_cec_disable, - }; -@@ -3253,14 +4426,14 @@ static const struct regmap_config hdmi_regmap_8bit_config = { - .reg_bits = 32, - .val_bits = 8, - .reg_stride = 1, -- .max_register = HDMI_I2CM_FS_SCL_LCNT_0_ADDR, -+ .max_register = HDMI_I2CM_SCDC_UPDATE1, - }; + void analogix_dp_enable_scrambling(struct analogix_dp_device *dp) + { + u32 reg; - static const struct regmap_config hdmi_regmap_32bit_config = { - .reg_bits = 32, - .val_bits = 32, - .reg_stride = 4, -- .max_register = HDMI_I2CM_FS_SCL_LCNT_0_ADDR << 2, -+ .max_register = HDMI_I2CM_SCDC_UPDATE1 << 2, - }; +- reg = readl(dp->reg_base + ANALOGIX_DP_TRAINING_PTN_SET); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_TRAINING_PTN_SET); + reg &= ~SCRAMBLING_DISABLE; +- writel(reg, dp->reg_base + ANALOGIX_DP_TRAINING_PTN_SET); ++ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); + } - static void dw_hdmi_init_hw(struct dw_hdmi *hdmi) -@@ -3278,6 +4451,367 @@ static void dw_hdmi_init_hw(struct dw_hdmi *hdmi) - hdmi->phy.ops->setup_hpd(hdmi, hdmi->phy.data); + void analogix_dp_disable_scrambling(struct analogix_dp_device *dp) + { + u32 reg; + +- reg = readl(dp->reg_base + ANALOGIX_DP_TRAINING_PTN_SET); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_TRAINING_PTN_SET); + reg |= SCRAMBLING_DISABLE; +- writel(reg, dp->reg_base + ANALOGIX_DP_TRAINING_PTN_SET); ++ analogix_dp_write(dp, ANALOGIX_DP_TRAINING_PTN_SET, reg); } -+static int dw_hdmi_status_show(struct seq_file *s, void *v) + void analogix_dp_enable_psr_crc(struct analogix_dp_device *dp) + { +- writel(PSR_VID_CRC_ENABLE, dp->reg_base + ANALOGIX_DP_CRC_CON); ++ analogix_dp_write(dp, ANALOGIX_DP_CRC_CON, PSR_VID_CRC_ENABLE); + } + + static ssize_t analogix_dp_get_psr_status(struct analogix_dp_device *dp) +@@ -948,6 +1007,24 @@ static ssize_t analogix_dp_get_psr_status(struct analogix_dp_device *dp) + return status; + } + ++static void analogix_dp_reuse_spd(struct analogix_dp_device *dp) +{ -+ struct dw_hdmi *hdmi = s->private; -+ u32 val; ++ u32 reg, val; + -+ seq_puts(s, "PHY: "); -+ if (!hdmi->phy.enabled) { -+ seq_puts(s, "disabled\n"); -+ return 0; -+ } -+ seq_puts(s, "enabled\t\t\tMode: "); -+ if (hdmi->sink_is_hdmi) -+ seq_puts(s, "HDMI\n"); -+ else -+ seq_puts(s, "DVI\n"); -+ if (hdmi->hdmi_data.video_mode.mtmdsclock > 340000000) -+ val = hdmi->hdmi_data.video_mode.mtmdsclock / 4; -+ else -+ val = hdmi->hdmi_data.video_mode.mtmdsclock; -+ seq_printf(s, "Pixel Clk: %uHz\t\tTMDS Clk: %uHz\n", -+ hdmi->hdmi_data.video_mode.mpixelclock, val); -+ seq_puts(s, "Color Format: "); -+ if (hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_out_bus_format)) -+ seq_puts(s, "RGB"); -+ else if (hdmi_bus_fmt_is_yuv444(hdmi->hdmi_data.enc_out_bus_format)) -+ seq_puts(s, "YUV444"); -+ else if (hdmi_bus_fmt_is_yuv422(hdmi->hdmi_data.enc_out_bus_format)) -+ seq_puts(s, "YUV422"); -+ else if (hdmi_bus_fmt_is_yuv420(hdmi->hdmi_data.enc_out_bus_format)) -+ seq_puts(s, "YUV420"); -+ else -+ seq_puts(s, "UNKNOWN"); -+ val = hdmi_bus_fmt_color_depth(hdmi->hdmi_data.enc_out_bus_format); -+ seq_printf(s, "\t\tColor Depth: %d bit\n", val); -+ seq_puts(s, "Colorimetry: "); -+ switch (hdmi->hdmi_data.enc_out_encoding) { -+ case V4L2_YCBCR_ENC_601: -+ seq_puts(s, "ITU.BT601"); -+ break; -+ case V4L2_YCBCR_ENC_709: -+ seq_puts(s, "ITU.BT709"); -+ break; -+ case V4L2_YCBCR_ENC_BT2020: -+ seq_puts(s, "ITU.BT2020"); ++ switch (dp->plat_data->dev_type) { ++ case RK3588_EDP: ++ reg = ANALOGIX_DP_SPDIF_AUDIO_CTL_0; + break; -+ default: /* Carries no data */ -+ seq_puts(s, "ITU.BT601"); ++ default: ++ reg = ANALOGIX_DP_VIDEO_CTL_3; + break; + } + -+ seq_puts(s, "\t\tEOTF: "); ++ val = analogix_dp_read(dp, reg); ++ val |= REUSE_SPD_EN; ++ analogix_dp_write(dp, reg, val); ++} + -+ if (hdmi->version < 0x211a) { -+ seq_puts(s, "Unsupported\n"); -+ return 0; -+ } + int analogix_dp_send_psr_spd(struct analogix_dp_device *dp, + struct dp_sdp *vsc, bool blocking) + { +@@ -956,44 +1033,47 @@ int analogix_dp_send_psr_spd(struct analogix_dp_device *dp, + ssize_t psr_status; + + /* don't send info frame */ +- val = readl(dp->reg_base + ANALOGIX_DP_PKT_SEND_CTL); ++ val = analogix_dp_read(dp, ANALOGIX_DP_PKT_SEND_CTL); + val &= ~IF_EN; +- writel(val, dp->reg_base + ANALOGIX_DP_PKT_SEND_CTL); ++ analogix_dp_write(dp, ANALOGIX_DP_PKT_SEND_CTL, val); + + /* configure single frame update mode */ +- writel(PSR_FRAME_UP_TYPE_BURST | PSR_CRC_SEL_HARDWARE, +- dp->reg_base + ANALOGIX_DP_PSR_FRAME_UPDATE_CTRL); ++ analogix_dp_write(dp, ANALOGIX_DP_PSR_FRAME_UPDATE_CTRL, ++ PSR_FRAME_UP_TYPE_BURST | PSR_CRC_SEL_HARDWARE); + + /* configure VSC HB0~HB3 */ +- writel(vsc->sdp_header.HB0, dp->reg_base + ANALOGIX_DP_SPD_HB0); +- writel(vsc->sdp_header.HB1, dp->reg_base + ANALOGIX_DP_SPD_HB1); +- writel(vsc->sdp_header.HB2, dp->reg_base + ANALOGIX_DP_SPD_HB2); +- writel(vsc->sdp_header.HB3, dp->reg_base + ANALOGIX_DP_SPD_HB3); ++ analogix_dp_write(dp, ANALOGIX_DP_SPD_HB0, vsc->sdp_header.HB0); ++ analogix_dp_write(dp, ANALOGIX_DP_SPD_HB1, vsc->sdp_header.HB1); ++ analogix_dp_write(dp, ANALOGIX_DP_SPD_HB2, vsc->sdp_header.HB2); ++ analogix_dp_write(dp, ANALOGIX_DP_SPD_HB3, vsc->sdp_header.HB3); + + /* configure reused VSC PB0~PB3, magic number from vendor */ +- writel(0x00, dp->reg_base + ANALOGIX_DP_SPD_PB0); +- writel(0x16, dp->reg_base + ANALOGIX_DP_SPD_PB1); +- writel(0xCE, dp->reg_base + ANALOGIX_DP_SPD_PB2); +- writel(0x5D, dp->reg_base + ANALOGIX_DP_SPD_PB3); ++ analogix_dp_write(dp, ANALOGIX_DP_SPD_PB0, 0x00); ++ analogix_dp_write(dp, ANALOGIX_DP_SPD_PB1, 0x16); ++ analogix_dp_write(dp, ANALOGIX_DP_SPD_PB2, 0xCE); ++ analogix_dp_write(dp, ANALOGIX_DP_SPD_PB3, 0x5D); + + /* configure DB0 / DB1 values */ +- writel(vsc->db[0], dp->reg_base + ANALOGIX_DP_VSC_SHADOW_DB0); +- writel(vsc->db[1], dp->reg_base + ANALOGIX_DP_VSC_SHADOW_DB1); ++ analogix_dp_write(dp, ANALOGIX_DP_VSC_SHADOW_DB0, vsc->db[0]); ++ analogix_dp_write(dp, ANALOGIX_DP_VSC_SHADOW_DB1, vsc->db[1]); + -+ val = hdmi_readb(hdmi, HDMI_FC_PACKET_TX_EN); -+ if (!(val & HDMI_FC_PACKET_TX_EN_DRM_MASK)) { -+ seq_puts(s, "Off\n"); -+ return 0; -+ } ++ /* configure PB0 / PB1 values */ ++ analogix_dp_write(dp, ANALOGIX_DP_VSC_SHADOW_PB0, ++ vsc->db[1] ? 0x8d : 0x00); ++ analogix_dp_write(dp, ANALOGIX_DP_VSC_SHADOW_PB1, 0x00); + + /* set reuse spd inforframe */ +- val = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_3); +- val |= REUSE_SPD_EN; +- writel(val, dp->reg_base + ANALOGIX_DP_VIDEO_CTL_3); ++ analogix_dp_reuse_spd(dp); + + /* mark info frame update */ +- val = readl(dp->reg_base + ANALOGIX_DP_PKT_SEND_CTL); ++ val = analogix_dp_read(dp, ANALOGIX_DP_PKT_SEND_CTL); + val = (val | IF_UP) & ~IF_EN; +- writel(val, dp->reg_base + ANALOGIX_DP_PKT_SEND_CTL); ++ analogix_dp_write(dp, ANALOGIX_DP_PKT_SEND_CTL, val); + + /* send info frame */ +- val = readl(dp->reg_base + ANALOGIX_DP_PKT_SEND_CTL); ++ val = analogix_dp_read(dp, ANALOGIX_DP_PKT_SEND_CTL); + val |= IF_EN; +- writel(val, dp->reg_base + ANALOGIX_DP_PKT_SEND_CTL); ++ analogix_dp_write(dp, ANALOGIX_DP_PKT_SEND_CTL, val); + + if (!blocking) + return 0; +@@ -1020,11 +1100,46 @@ int analogix_dp_send_psr_spd(struct analogix_dp_device *dp, + return 0; + } + ++int analogix_dp_phy_power_on(struct analogix_dp_device *dp) ++{ ++ int ret; + -+ switch (hdmi_readb(hdmi, HDMI_FC_DRM_PB0)) { -+ case HDMI_EOTF_TRADITIONAL_GAMMA_SDR: -+ seq_puts(s, "SDR"); -+ break; -+ case HDMI_EOTF_TRADITIONAL_GAMMA_HDR: -+ seq_puts(s, "HDR"); -+ break; -+ case HDMI_EOTF_SMPTE_ST2084: -+ seq_puts(s, "ST2084"); -+ break; -+ case HDMI_EOTF_BT_2100_HLG: -+ seq_puts(s, "HLG"); -+ break; -+ default: -+ seq_puts(s, "Not Defined\n"); -+ return 0; -+ } ++ ret = phy_set_mode(dp->phy, PHY_MODE_DP); ++ if (ret) { ++ dev_err(dp->dev, "phy_set_mode failed: %d\n", ret); ++ return ret; ++ } + -+ val = hdmi_readb(hdmi, HDMI_FC_DRM_PB3) << 8; -+ val |= hdmi_readb(hdmi, HDMI_FC_DRM_PB2); -+ seq_printf(s, "\nx0: %d", val); -+ val = hdmi_readb(hdmi, HDMI_FC_DRM_PB5) << 8; -+ val |= hdmi_readb(hdmi, HDMI_FC_DRM_PB4); -+ seq_printf(s, "\t\t\t\ty0: %d\n", val); -+ val = hdmi_readb(hdmi, HDMI_FC_DRM_PB7) << 8; -+ val |= hdmi_readb(hdmi, HDMI_FC_DRM_PB6); -+ seq_printf(s, "x1: %d", val); -+ val = hdmi_readb(hdmi, HDMI_FC_DRM_PB9) << 8; -+ val |= hdmi_readb(hdmi, HDMI_FC_DRM_PB8); -+ seq_printf(s, "\t\t\t\ty1: %d\n", val); -+ val = hdmi_readb(hdmi, HDMI_FC_DRM_PB11) << 8; -+ val |= hdmi_readb(hdmi, HDMI_FC_DRM_PB10); -+ seq_printf(s, "x2: %d", val); -+ val = hdmi_readb(hdmi, HDMI_FC_DRM_PB13) << 8; -+ val |= hdmi_readb(hdmi, HDMI_FC_DRM_PB12); -+ seq_printf(s, "\t\t\t\ty2: %d\n", val); -+ val = hdmi_readb(hdmi, HDMI_FC_DRM_PB15) << 8; -+ val |= hdmi_readb(hdmi, HDMI_FC_DRM_PB14); -+ seq_printf(s, "white x: %d", val); -+ val = hdmi_readb(hdmi, HDMI_FC_DRM_PB17) << 8; -+ val |= hdmi_readb(hdmi, HDMI_FC_DRM_PB16); -+ seq_printf(s, "\t\t\twhite y: %d\n", val); -+ val = hdmi_readb(hdmi, HDMI_FC_DRM_PB19) << 8; -+ val |= hdmi_readb(hdmi, HDMI_FC_DRM_PB18); -+ seq_printf(s, "max lum: %d", val); -+ val = hdmi_readb(hdmi, HDMI_FC_DRM_PB21) << 8; -+ val |= hdmi_readb(hdmi, HDMI_FC_DRM_PB20); -+ seq_printf(s, "\t\t\tmin lum: %d\n", val); -+ val = hdmi_readb(hdmi, HDMI_FC_DRM_PB23) << 8; -+ val |= hdmi_readb(hdmi, HDMI_FC_DRM_PB22); -+ seq_printf(s, "max cll: %d", val); -+ val = hdmi_readb(hdmi, HDMI_FC_DRM_PB25) << 8; -+ val |= hdmi_readb(hdmi, HDMI_FC_DRM_PB24); -+ seq_printf(s, "\t\t\tmax fall: %d\n", val); -+ return 0; ++ ret = phy_power_on(dp->phy); ++ if (ret) { ++ dev_err(dp->dev, "phy_power_on failed: %d\n", ret); ++ return ret; ++ } ++ ++ return ret; +} + -+static int dw_hdmi_status_open(struct inode *inode, struct file *file) ++void analogix_dp_phy_power_off(struct analogix_dp_device *dp) +{ -+ return single_open(file, dw_hdmi_status_show, inode->i_private); ++ phy_power_off(dp->phy); +} + -+static const struct file_operations dw_hdmi_status_fops = { -+ .owner = THIS_MODULE, -+ .open = dw_hdmi_status_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, ++enum { ++ AUX_STATUS_OK, ++ AUX_STATUS_NACK_ERROR, ++ AUX_STATUS_TIMEOUT_ERROR, ++ AUX_STATUS_UNKNOWN_ERROR, ++ AUX_STATUS_MUCH_DEFER_ERROR, ++ AUX_STATUS_TX_SHORT_ERROR, ++ AUX_STATUS_RX_SHORT_ERROR, ++ AUX_STATUS_NACK_WITHOUT_M_ERROR, ++ AUX_STATUS_I2C_NACK_ERROR +}; + -+#include -+#include -+#include + ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, + struct drm_dp_aux_msg *msg) + { + u32 reg; +- u32 status_reg; + u8 *buffer = msg->buffer; + unsigned int i; + int ret; +@@ -1035,7 +1150,7 @@ ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, + + /* Clear AUX CH data buffer */ + reg = BUF_CLR; +- writel(reg, dp->reg_base + ANALOGIX_DP_BUFFER_DATA_CTL); ++ analogix_dp_write(dp, ANALOGIX_DP_BUFFER_DATA_CTL, reg); + + switch (msg->request & ~DP_AUX_I2C_MOT) { + case DP_AUX_I2C_WRITE: +@@ -1063,21 +1178,21 @@ ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, + } + + reg |= AUX_LENGTH(msg->size); +- writel(reg, dp->reg_base + ANALOGIX_DP_AUX_CH_CTL_1); ++ analogix_dp_write(dp, ANALOGIX_DP_AUX_CH_CTL_1, reg); + + /* Select DPCD device address */ + reg = AUX_ADDR_7_0(msg->address); +- writel(reg, dp->reg_base + ANALOGIX_DP_AUX_ADDR_7_0); ++ analogix_dp_write(dp, ANALOGIX_DP_AUX_ADDR_7_0, reg); + reg = AUX_ADDR_15_8(msg->address); +- writel(reg, dp->reg_base + ANALOGIX_DP_AUX_ADDR_15_8); ++ analogix_dp_write(dp, ANALOGIX_DP_AUX_ADDR_15_8, reg); + reg = AUX_ADDR_19_16(msg->address); +- writel(reg, dp->reg_base + ANALOGIX_DP_AUX_ADDR_19_16); ++ analogix_dp_write(dp, ANALOGIX_DP_AUX_ADDR_19_16, reg); + + if (!(msg->request & DP_AUX_I2C_READ)) { + for (i = 0; i < msg->size; i++) { + reg = buffer[i]; +- writel(reg, dp->reg_base + ANALOGIX_DP_BUF_DATA_0 + +- 4 * i); ++ analogix_dp_write(dp, ANALOGIX_DP_BUF_DATA_0 + 4 * i, ++ reg); + } + } + +@@ -1088,7 +1203,7 @@ ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, + if (msg->size < 1) + reg |= ADDR_ONLY; + +- writel(reg, dp->reg_base + ANALOGIX_DP_AUX_CH_CTL_2); ++ analogix_dp_write(dp, ANALOGIX_DP_AUX_CH_CTL_2, reg); + + ret = readx_poll_timeout(readl, dp->reg_base + ANALOGIX_DP_AUX_CH_CTL_2, + reg, !(reg & AUX_EN), 25, 500 * 1000); +@@ -1107,29 +1222,29 @@ ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, + } + + /* Clear interrupt source for AUX CH command reply */ +- writel(RPLY_RECEIV, dp->reg_base + ANALOGIX_DP_INT_STA); ++ analogix_dp_write(dp, ANALOGIX_DP_INT_STA, RPLY_RECEIV); + +- /* Clear interrupt source for AUX CH access error */ +- reg = readl(dp->reg_base + ANALOGIX_DP_INT_STA); +- status_reg = readl(dp->reg_base + ANALOGIX_DP_AUX_CH_STA); +- if ((reg & AUX_ERR) || (status_reg & AUX_STATUS_MASK)) { +- writel(AUX_ERR, dp->reg_base + ANALOGIX_DP_INT_STA); +- +- dev_warn(dp->dev, "AUX CH error happened: %#x (%d)\n", +- status_reg & AUX_STATUS_MASK, !!(reg & AUX_ERR)); +- goto aux_error; +- } ++ reg = analogix_dp_read(dp, ANALOGIX_DP_AUX_CH_STA); ++ if ((reg & AUX_STATUS_MASK) == AUX_STATUS_TIMEOUT_ERROR) ++ return -ETIMEDOUT; + + if (msg->request & DP_AUX_I2C_READ) { ++ size_t buf_data_count; + -+struct dw_hdmi_reg_table { -+ int reg_base; -+ int reg_end; -+}; ++ reg = analogix_dp_read(dp, ANALOGIX_DP_BUFFER_DATA_CTL); ++ buf_data_count = BUF_DATA_COUNT(reg); + -+static const struct dw_hdmi_reg_table hdmi_reg_table[] = { -+ {HDMI_DESIGN_ID, HDMI_CONFIG3_ID}, -+ {HDMI_IH_FC_STAT0, HDMI_IH_MUTE}, -+ {HDMI_TX_INVID0, HDMI_TX_BCBDATA1}, -+ {HDMI_VP_STATUS, HDMI_VP_POL}, -+ {HDMI_FC_INVIDCONF, HDMI_FC_DBGTMDS2}, -+ {HDMI_PHY_CONF0, HDMI_PHY_POL0}, -+ {HDMI_PHY_I2CM_SLAVE_ADDR, HDMI_PHY_I2CM_FS_SCL_LCNT_0_ADDR}, -+ {HDMI_AUD_CONF0, 0x3624}, -+ {HDMI_MC_SFRDIV, HDMI_MC_HEACPHY_RST}, -+ {HDMI_CSC_CFG, HDMI_CSC_COEF_C4_LSB}, -+ {HDMI_A_HDCPCFG0, 0x52bb}, -+ {0x7800, 0x7818}, -+ {0x7900, 0x790e}, -+ {HDMI_CEC_CTRL, HDMI_CEC_WKUPCTRL}, -+ {HDMI_I2CM_SLAVE, 0x7e31}, -+}; ++ if (buf_data_count != msg->size) ++ return -EBUSY; + for (i = 0; i < msg->size; i++) { +- reg = readl(dp->reg_base + ANALOGIX_DP_BUF_DATA_0 + +- 4 * i); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_BUF_DATA_0 + ++ 4 * i); + buffer[i] = (unsigned char)reg; + } + } + + /* Check if Rx sends defer */ +- reg = readl(dp->reg_base + ANALOGIX_DP_AUX_RX_COMM); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_AUX_RX_COMM); + if (reg == AUX_RX_COMM_AUX_DEFER) + msg->reply = DP_AUX_NATIVE_REPLY_DEFER; + else if (reg == AUX_RX_COMM_I2C_DEFER) +@@ -1149,3 +1264,127 @@ ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, + + return -EREMOTEIO; + } + -+static int dw_hdmi_ctrl_show(struct seq_file *s, void *v) ++void analogix_dp_set_video_format(struct analogix_dp_device *dp) +{ -+ struct dw_hdmi *hdmi = s->private; -+ u32 i = 0, j = 0, val = 0; -+ -+ seq_puts(s, "\n>>>hdmi_ctl reg "); -+ for (i = 0; i < 16; i++) -+ seq_printf(s, " %2x", i); -+ seq_puts(s, "\n---------------------------------------------------"); ++ struct video_info *video = &dp->video_info; ++ const struct drm_display_mode *mode = &video->mode; ++ unsigned int hsw, hfp, hbp, vsw, vfp, vbp; + -+ for (i = 0; i < ARRAY_SIZE(hdmi_reg_table); i++) { -+ for (j = hdmi_reg_table[i].reg_base; -+ j <= hdmi_reg_table[i].reg_end; j++) { -+ val = hdmi_readb(hdmi, j); -+ if ((j - hdmi_reg_table[i].reg_base) % 16 == 0) -+ seq_printf(s, "\n>>>hdmi_ctl %04x:", j); -+ seq_printf(s, " %02x", val); -+ } -+ } -+ seq_puts(s, "\n---------------------------------------------------\n"); ++ hsw = mode->hsync_end - mode->hsync_start; ++ hfp = mode->hsync_start - mode->hdisplay; ++ hbp = mode->htotal - mode->hsync_end; ++ vsw = mode->vsync_end - mode->vsync_start; ++ vfp = mode->vsync_start - mode->vdisplay; ++ vbp = mode->vtotal - mode->vsync_end; + -+ return 0; ++ /* Set Video Format Parameters */ ++ analogix_dp_write(dp, ANALOGIX_DP_TOTAL_LINE_CFG_L, ++ TOTAL_LINE_CFG_L(mode->vtotal)); ++ analogix_dp_write(dp, ANALOGIX_DP_TOTAL_LINE_CFG_H, ++ TOTAL_LINE_CFG_H(mode->vtotal >> 8)); ++ analogix_dp_write(dp, ANALOGIX_DP_ACTIVE_LINE_CFG_L, ++ ACTIVE_LINE_CFG_L(mode->vdisplay)); ++ analogix_dp_write(dp, ANALOGIX_DP_ACTIVE_LINE_CFG_H, ++ ACTIVE_LINE_CFG_H(mode->vdisplay >> 8)); ++ analogix_dp_write(dp, ANALOGIX_DP_V_F_PORCH_CFG, ++ V_F_PORCH_CFG(vfp)); ++ analogix_dp_write(dp, ANALOGIX_DP_V_SYNC_WIDTH_CFG, ++ V_SYNC_WIDTH_CFG(vsw)); ++ analogix_dp_write(dp, ANALOGIX_DP_V_B_PORCH_CFG, ++ V_B_PORCH_CFG(vbp)); ++ analogix_dp_write(dp, ANALOGIX_DP_TOTAL_PIXEL_CFG_L, ++ TOTAL_PIXEL_CFG_L(mode->htotal)); ++ analogix_dp_write(dp, ANALOGIX_DP_TOTAL_PIXEL_CFG_H, ++ TOTAL_PIXEL_CFG_H(mode->htotal >> 8)); ++ analogix_dp_write(dp, ANALOGIX_DP_ACTIVE_PIXEL_CFG_L, ++ ACTIVE_PIXEL_CFG_L(mode->hdisplay)); ++ analogix_dp_write(dp, ANALOGIX_DP_ACTIVE_PIXEL_CFG_H, ++ ACTIVE_PIXEL_CFG_H(mode->hdisplay >> 8)); ++ analogix_dp_write(dp, ANALOGIX_DP_H_F_PORCH_CFG_L, ++ H_F_PORCH_CFG_L(hfp)); ++ analogix_dp_write(dp, ANALOGIX_DP_H_F_PORCH_CFG_H, ++ H_F_PORCH_CFG_H(hfp >> 8)); ++ analogix_dp_write(dp, ANALOGIX_DP_H_SYNC_CFG_L, ++ H_SYNC_CFG_L(hsw)); ++ analogix_dp_write(dp, ANALOGIX_DP_H_SYNC_CFG_H, ++ H_SYNC_CFG_H(hsw >> 8)); ++ analogix_dp_write(dp, ANALOGIX_DP_H_B_PORCH_CFG_L, ++ H_B_PORCH_CFG_L(hbp)); ++ analogix_dp_write(dp, ANALOGIX_DP_H_B_PORCH_CFG_H, ++ H_B_PORCH_CFG_H(hbp >> 8)); +} + -+static int dw_hdmi_ctrl_open(struct inode *inode, struct file *file) ++void analogix_dp_video_bist_enable(struct analogix_dp_device *dp) +{ -+ return single_open(file, dw_hdmi_ctrl_show, inode->i_private); -+} ++ u32 reg; + -+static ssize_t -+dw_hdmi_ctrl_write(struct file *file, const char __user *buf, -+ size_t count, loff_t *ppos) -+{ -+ struct dw_hdmi *hdmi = -+ ((struct seq_file *)file->private_data)->private; -+ u32 reg, val; -+ char kbuf[25]; ++ /* Enable Video BIST */ ++ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_4, BIST_EN); + -+ if (copy_from_user(kbuf, buf, count)) -+ return -EFAULT; -+ if (sscanf(kbuf, "%x%x", ®, &val) == -1) -+ return -EFAULT; -+ if (reg > HDMI_I2CM_FS_SCL_LCNT_0_ADDR) { -+ dev_err(hdmi->dev, "it is no a hdmi register\n"); -+ return count; -+ } -+ dev_info(hdmi->dev, "/**********hdmi register config******/"); -+ dev_info(hdmi->dev, "\n reg=%x val=%x\n", reg, val); -+ hdmi_writeb(hdmi, val, reg); -+ return count; ++ /* ++ * Note that if BIST_EN is set to 1, F_SEL must be cleared to 0 ++ * although video format information comes from registers set by user. ++ */ ++ reg = analogix_dp_read(dp, ANALOGIX_DP_VIDEO_CTL_10); ++ reg &= ~FORMAT_SEL; ++ analogix_dp_write(dp, ANALOGIX_DP_VIDEO_CTL_10, reg); +} + -+static const struct file_operations dw_hdmi_ctrl_fops = { -+ .owner = THIS_MODULE, -+ .open = dw_hdmi_ctrl_open, -+ .read = seq_read, -+ .write = dw_hdmi_ctrl_write, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; -+ -+static int dw_hdmi_phy_show(struct seq_file *s, void *v) ++void analogix_dp_audio_config_i2s(struct analogix_dp_device *dp) +{ -+ struct dw_hdmi *hdmi = s->private; -+ u32 i; ++ u32 reg; + -+ seq_puts(s, "\n>>>hdmi_phy reg "); -+ for (i = 0; i < 0x28; i++) -+ seq_printf(s, "regs %02x val %04x\n", -+ i, hdmi_phy_i2c_read(hdmi, i)); -+ return 0; -+} ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_4); ++ reg &= ~FIX_M_AUD; ++ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_4, reg); + -+static int dw_hdmi_phy_open(struct inode *inode, struct file *file) -+{ -+ return single_open(file, dw_hdmi_phy_show, inode->i_private); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_I2S_CTRL); ++ reg |= I2S_EN; ++ analogix_dp_write(dp, ANALOGIX_DP_I2S_CTRL, reg); +} + -+static ssize_t -+dw_hdmi_phy_write(struct file *file, const char __user *buf, -+ size_t count, loff_t *ppos) ++void analogix_dp_audio_config_spdif(struct analogix_dp_device *dp) +{ -+ struct dw_hdmi *hdmi = -+ ((struct seq_file *)file->private_data)->private; -+ u32 reg, val; -+ char kbuf[25]; -+ -+ if (copy_from_user(kbuf, buf, count)) -+ return -EFAULT; -+ if (sscanf(kbuf, "%x%x", ®, &val) == -1) -+ return -EFAULT; -+ if (reg > 0x28) { -+ dev_err(hdmi->dev, "it is not a hdmi phy register\n"); -+ return count; -+ } -+ dev_info(hdmi->dev, "/*******hdmi phy register config******/"); -+ dev_info(hdmi->dev, "\n reg=%x val=%x\n", reg, val); -+ dw_hdmi_phy_i2c_write(hdmi, val, reg); -+ return count; -+} ++ u32 reg; + -+static const struct file_operations dw_hdmi_phy_fops = { -+ .owner = THIS_MODULE, -+ .open = dw_hdmi_phy_open, -+ .read = seq_read, -+ .write = dw_hdmi_phy_write, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SYS_CTL_4); ++ reg &= ~FIX_M_AUD; ++ analogix_dp_write(dp, ANALOGIX_DP_SYS_CTL_4, reg); + -+static void dw_hdmi_register_debugfs(struct device *dev, struct dw_hdmi *hdmi) -+{ -+ hdmi->debugfs_dir = debugfs_create_dir("dw-hdmi", NULL); -+ if (IS_ERR(hdmi->debugfs_dir)) { -+ dev_err(dev, "failed to create debugfs dir!\n"); -+ return; -+ } -+ debugfs_create_file("status", 0400, hdmi->debugfs_dir, -+ hdmi, &dw_hdmi_status_fops); -+ debugfs_create_file("ctrl", 0400, hdmi->debugfs_dir, -+ hdmi, &dw_hdmi_ctrl_fops); -+ debugfs_create_file("phy", 0400, hdmi->debugfs_dir, -+ hdmi, &dw_hdmi_phy_fops); ++ reg = analogix_dp_read(dp, ANALOGIX_DP_SPDIF_AUDIO_CTL_0); ++ reg |= AUD_SPDIF_EN; ++ analogix_dp_write(dp, ANALOGIX_DP_SPDIF_AUDIO_CTL_0, reg); +} + -+static void dw_hdmi_register_hdcp(struct device *dev, struct dw_hdmi *hdmi, -+ u32 val, bool hdcp1x_enable) ++void analogix_dp_audio_enable(struct analogix_dp_device *dp) +{ -+ struct dw_hdcp hdmi_hdcp = { -+ .hdmi = hdmi, -+ .write = hdmi_writeb, -+ .read = hdmi_readb, -+ .regs = hdmi->regs, -+ .reg_io_width = val, -+ .enable = hdcp1x_enable, -+ }; -+ struct platform_device_info hdcp_device_info = { -+ .parent = dev, -+ .id = PLATFORM_DEVID_AUTO, -+ .res = NULL, -+ .num_res = 0, -+ .name = DW_HDCP_DRIVER_NAME, -+ .data = &hdmi_hdcp, -+ .size_data = sizeof(hdmi_hdcp), -+ .dma_mask = DMA_BIT_MASK(32), -+ }; ++ u32 reg; + -+ hdmi->hdcp_dev = platform_device_register_full(&hdcp_device_info); -+ if (IS_ERR(hdmi->hdcp_dev)) -+ dev_err(dev, "failed to register hdcp!\n"); -+ else -+ hdmi->hdcp = hdmi->hdcp_dev->dev.platform_data; ++ reg = analogix_dp_read(dp, ANALOGIX_DP_FUNC_EN_1); ++ reg &= ~(AUD_FIFO_FUNC_EN_N | AUD_FUNC_EN_N); ++ analogix_dp_write(dp, ANALOGIX_DP_FUNC_EN_1, reg); ++ ++ reg = analogix_dp_read(dp, ANALOGIX_DP_AUD_CTL); ++ reg |= MISC_CTRL_RESET | DP_AUDIO_EN; ++ analogix_dp_write(dp, ANALOGIX_DP_AUD_CTL, reg); +} + -+static int get_force_logo_property(struct dw_hdmi *hdmi) ++void analogix_dp_audio_disable(struct analogix_dp_device *dp) +{ -+ struct device_node *dss; -+ struct device_node *route; -+ struct device_node *route_hdmi; -+ -+ dss = of_find_node_by_name(NULL, "display-subsystem"); -+ if (!dss) { -+ dev_err(hdmi->dev, "can't find display-subsystem\n"); -+ return -ENODEV; -+ } -+ -+ route = of_find_node_by_name(dss, "route"); -+ if (!route) { -+ dev_err(hdmi->dev, "can't find route\n"); -+ of_node_put(dss); -+ return -ENODEV; -+ } -+ of_node_put(dss); -+ -+ route_hdmi = of_find_node_by_name(route, "route-hdmi"); -+ if (!route_hdmi) { -+ dev_err(hdmi->dev, "can't find route-hdmi\n"); -+ of_node_put(route); -+ return -ENODEV; -+ } -+ of_node_put(route); -+ -+ hdmi->force_logo = -+ of_property_read_bool(route_hdmi, "force-output"); ++ u32 reg; + -+ of_node_put(route_hdmi); ++ analogix_dp_write(dp, ANALOGIX_DP_AUD_CTL, 0); + -+ return 0; ++ reg = analogix_dp_read(dp, ANALOGIX_DP_FUNC_EN_1); ++ reg |= AUD_FIFO_FUNC_EN_N | AUD_FUNC_EN_N; ++ analogix_dp_write(dp, ANALOGIX_DP_FUNC_EN_1, reg); +} + -+void -+dw_hdmi_cec_wake_ops_register(struct dw_hdmi *hdmi, const struct dw_hdmi_cec_wake_ops *cec_ops) ++void analogix_dp_init(struct analogix_dp_device *dp) +{ -+ if (!cec_ops || !hdmi) -+ return; -+ -+ hdmi->cec_ops = cec_ops; ++ analogix_dp_init_interrupt(dp); ++ analogix_dp_config_interrupt(dp); ++ analogix_dp_init_hpd(dp); ++ analogix_dp_init_aux(dp); +} -+EXPORT_SYMBOL_GPL(dw_hdmi_cec_wake_ops_register); -+ -+ - /* ----------------------------------------------------------------------------- - * Probe/remove API, used from platforms based on the DRM bridge API. - */ -@@ -3327,17 +4861,12 @@ static int dw_hdmi_parse_dt(struct dw_hdmi *hdmi) - return 0; - } +diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h +index e284ee8da..0a368b172 100644 +--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h ++++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h +@@ -15,9 +15,27 @@ + #define ANALOGIX_DP_VIDEO_CTL_1 0x20 + #define ANALOGIX_DP_VIDEO_CTL_2 0x24 + #define ANALOGIX_DP_VIDEO_CTL_3 0x28 ++#define ANALOGIX_DP_VIDEO_CTL_4 0x2C --bool dw_hdmi_bus_fmt_is_420(struct dw_hdmi *hdmi) --{ -- return hdmi_bus_fmt_is_yuv420(hdmi->hdmi_data.enc_out_bus_format); --} --EXPORT_SYMBOL_GPL(dw_hdmi_bus_fmt_is_420); -- - struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev, - const struct dw_hdmi_plat_data *plat_data) - { - struct device *dev = &pdev->dev; - struct device_node *np = dev->of_node; -+ struct device_node *endpoint; - struct platform_device_info pdevinfo; - struct device_node *ddc_node; - struct dw_hdmi_cec_data cec; -@@ -3350,11 +4879,13 @@ struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev, - u8 prod_id1; - u8 config0; - u8 config3; -+ bool hdcp1x_enable = 0; + #define ANALOGIX_DP_VIDEO_CTL_8 0x3C + #define ANALOGIX_DP_VIDEO_CTL_10 0x44 ++#define ANALOGIX_DP_TOTAL_LINE_CFG_L 0x48 ++#define ANALOGIX_DP_TOTAL_LINE_CFG_H 0x4C ++#define ANALOGIX_DP_ACTIVE_LINE_CFG_L 0x50 ++#define ANALOGIX_DP_ACTIVE_LINE_CFG_H 0x54 ++#define ANALOGIX_DP_V_F_PORCH_CFG 0x58 ++#define ANALOGIX_DP_V_SYNC_WIDTH_CFG 0x5C ++#define ANALOGIX_DP_V_B_PORCH_CFG 0x60 ++#define ANALOGIX_DP_TOTAL_PIXEL_CFG_L 0x64 ++#define ANALOGIX_DP_TOTAL_PIXEL_CFG_H 0x68 ++#define ANALOGIX_DP_ACTIVE_PIXEL_CFG_L 0x6C ++#define ANALOGIX_DP_ACTIVE_PIXEL_CFG_H 0x70 ++#define ANALOGIX_DP_H_F_PORCH_CFG_L 0x74 ++#define ANALOGIX_DP_H_F_PORCH_CFG_H 0x78 ++#define ANALOGIX_DP_H_SYNC_CFG_L 0x7C ++#define ANALOGIX_DP_H_SYNC_CFG_H 0x80 ++#define ANALOGIX_DP_H_B_PORCH_CFG_L 0x84 ++#define ANALOGIX_DP_H_B_PORCH_CFG_H 0x88 - hdmi = devm_kzalloc(dev, sizeof(*hdmi), GFP_KERNEL); - if (!hdmi) - return ERR_PTR(-ENOMEM); + #define ANALOGIX_DP_SPDIF_AUDIO_CTL_0 0xD8 -+ hdmi->connector.stereo_allowed = 1; - hdmi->plat_data = plat_data; - hdmi->dev = dev; - hdmi->sample_rate = 48000; -@@ -3490,7 +5021,31 @@ struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev, - prod_id1 & HDMI_PRODUCT_ID1_HDCP ? "with" : "without", - hdmi->phy.name); +@@ -27,6 +45,8 @@ + #define ANALOGIX_DP_PLL_REG_4 0x9ec + #define ANALOGIX_DP_PLL_REG_5 0xa00 -- dw_hdmi_init_hw(hdmi); -+ ret = get_force_logo_property(hdmi); -+ if (ret) -+ goto err_iahb; -+ -+ hdmi->logo_plug_out = false; -+ hdmi->initialized = false; -+ ret = hdmi_readb(hdmi, HDMI_PHY_STAT0); -+ if (((ret & HDMI_PHY_TX_PHY_LOCK) && (ret & HDMI_PHY_HPD) && -+ hdmi_readb(hdmi, HDMI_FC_EXCTRLDUR)) || hdmi->force_logo) { -+ hdmi->mc_clkdis = hdmi_readb(hdmi, HDMI_MC_CLKDIS); -+ hdmi->disabled = false; -+ hdmi->bridge_is_on = true; -+ hdmi->phy.enabled = true; -+ hdmi->initialized = true; -+ if (hdmi->plat_data->set_ddc_io) -+ hdmi->plat_data->set_ddc_io(hdmi->plat_data->phy_data, true); -+ if (hdmi->plat_data->dclk_set) -+ hdmi->plat_data->dclk_set(hdmi->plat_data->phy_data, true, 0); -+ } else if (ret & HDMI_PHY_TX_PHY_LOCK) { -+ hdmi->phy.ops->disable(hdmi, hdmi->phy.data); -+ if (hdmi->plat_data->set_ddc_io) -+ hdmi->plat_data->set_ddc_io(hdmi->plat_data->phy_data, false); -+ } -+ -+ init_hpd_work(hdmi); ++#define ANALOIGX_DP_SSC_REG 0x104 ++#define ANALOGIX_DP_BIAS 0x124 + #define ANALOGIX_DP_PD 0x12c - irq = platform_get_irq(pdev, 0); - if (irq < 0) { -@@ -3498,8 +5053,9 @@ struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev, - goto err_iahb; - } + #define ANALOGIX_DP_IF_TYPE 0x244 +@@ -43,6 +63,8 @@ + #define ANALOGIX_DP_PSR_FRAME_UPDATE_CTRL 0x318 + #define ANALOGIX_DP_VSC_SHADOW_DB0 0x31C + #define ANALOGIX_DP_VSC_SHADOW_DB1 0x320 ++#define ANALOGIX_DP_VSC_SHADOW_PB0 0x33C ++#define ANALOGIX_DP_VSC_SHADOW_PB1 0x340 -+ hdmi->irq = irq; - ret = devm_request_threaded_irq(dev, irq, dw_hdmi_hardirq, -- dw_hdmi_irq, IRQF_SHARED, -+ dw_hdmi_irq, IRQF_SHARED | IRQF_ONESHOT, - dev_name(dev), hdmi); - if (ret) - goto err_iahb; -@@ -3533,18 +5089,53 @@ struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev, - hdmi->ddc = dw_hdmi_i2c_adapter(hdmi); - if (IS_ERR(hdmi->ddc)) - hdmi->ddc = NULL; -+ /* -+ * Read high and low time from device tree. If not available use -+ * the default timing scl clock rate is about 99.6KHz. -+ */ -+ if (of_property_read_u32(np, "ddc-i2c-scl-high-time-ns", -+ &hdmi->i2c->scl_high_ns)) -+ hdmi->i2c->scl_high_ns = 4708; -+ if (of_property_read_u32(np, "ddc-i2c-scl-low-time-ns", -+ &hdmi->i2c->scl_low_ns)) -+ hdmi->i2c->scl_low_ns = 4916; - } + #define ANALOGIX_DP_LANE_MAP 0x35C -+ dw_hdmi_init_hw(hdmi); -+ - hdmi->bridge.driver_private = hdmi; - hdmi->bridge.funcs = &dw_hdmi_bridge_funcs; - hdmi->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID - | DRM_BRIDGE_OP_HPD; - hdmi->bridge.interlace_allowed = true; -- hdmi->bridge.ddc = hdmi->ddc; - #ifdef CONFIG_OF - hdmi->bridge.of_node = pdev->dev.of_node; - #endif +@@ -70,7 +92,7 @@ + #define ANALOGIX_DP_SYS_CTL_2 0x604 + #define ANALOGIX_DP_SYS_CTL_3 0x608 + #define ANALOGIX_DP_SYS_CTL_4 0x60C +- ++#define ANALOGIX_DP_AUD_CTL 0x618 + #define ANALOGIX_DP_PKT_SEND_CTL 0x640 + #define ANALOGIX_DP_HDCP_CTL 0x648 -+ endpoint = of_graph_get_endpoint_by_regs(hdmi->dev->of_node, 1, -1); -+ if (endpoint && of_device_is_available(endpoint)) { -+ struct device_node *remote; -+ -+ remote = of_graph_get_remote_port_parent(endpoint); -+ of_node_put(endpoint); -+ if (!remote || !of_device_is_available(remote)) { -+ of_node_put(remote); -+ ret = -ENODEV; -+ goto err_iahb; -+ } -+ -+ hdmi->next_bridge = of_drm_find_bridge(remote); -+ of_node_put(remote); -+ if (!hdmi->next_bridge) { -+ dev_err(hdmi->dev, "can't find next bridge\n"); -+ ret = -EPROBE_DEFER; -+ goto err_iahb; -+ } -+ -+ hdmi->sink_is_hdmi = true; -+ hdmi->sink_has_audio = true; -+ } -+ - memset(&pdevinfo, 0, sizeof(pdevinfo)); - pdevinfo.parent = dev; - pdevinfo.id = PLATFORM_DEVID_AUTO; -@@ -3575,6 +5166,7 @@ struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev, - audio.get_eld = hdmi_audio_get_eld; - audio.write = hdmi_writeb; - audio.read = hdmi_readb; -+ audio.mod = hdmi_modb; - hdmi->enable_audio = dw_hdmi_i2s_audio_enable; - hdmi->disable_audio = dw_hdmi_i2s_audio_disable; +@@ -116,8 +138,13 @@ + #define ANALOGIX_DP_BUF_DATA_0 0x7C0 -@@ -3608,6 +5200,12 @@ struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev, - cec.ops = &dw_hdmi_cec_ops; - cec.irq = irq; + #define ANALOGIX_DP_SOC_GENERAL_CTL 0x800 +- ++#define ANALOGIX_DP_TEST_80B_PATTERN0 0x81C ++#define ANALOGIX_DP_TEST_80B_PATTERN1 0x820 ++#define ANALOGIX_DP_TEST_80B_PATTERN2 0x824 ++#define ANALOGIX_DP_TEST_HBR2_PATTERN 0x828 ++#define ANALOGIX_DP_AUD_CHANNEL_CTL 0x834 + #define ANALOGIX_DP_CRC_CON 0x890 ++#define ANALOGIX_DP_I2S_CTRL 0x9C8 -+ irq = platform_get_irq(pdev, 1); -+ if (irq < 0) -+ dev_dbg(hdmi->dev, "can't get cec wake up irq\n"); -+ -+ cec.wake_irq = irq; -+ - pdevinfo.name = "dw-hdmi-cec"; - pdevinfo.data = &cec; - pdevinfo.size_data = sizeof(cec); -@@ -3616,8 +5214,40 @@ struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev, - hdmi->cec = platform_device_register_full(&pdevinfo); - } + /* ANALOGIX_DP_TX_SW_RESET */ + #define RESET_DP_TX (0x1 << 0) +@@ -171,6 +198,11 @@ + #define VID_CHK_UPDATE_TYPE_0 (0x0 << 4) + #define REUSE_SPD_EN (0x1 << 3) -+ hdmi->extcon = devm_extcon_dev_allocate(hdmi->dev, dw_hdmi_cable); -+ if (IS_ERR(hdmi->extcon)) { -+ ret = PTR_ERR(hdmi->extcon); -+ dev_err(hdmi->dev, "allocate extcon failed: %d\n", ret); -+ goto err_iahb; -+ } -+ -+ ret = devm_extcon_dev_register(hdmi->dev, hdmi->extcon); -+ if (ret) { -+ dev_err(hdmi->dev, "failed to register extcon: %d\n", -+ ret); -+ goto err_iahb; -+ } -+ -+ ret = extcon_set_property_capability(hdmi->extcon, EXTCON_DISP_HDMI, -+ EXTCON_PROP_DISP_HPD); -+ if (ret) { -+ dev_err(hdmi->dev, -+ "failed to set USB property capability: %d\n", -+ ret); -+ goto err_iahb; -+ } ++/* ANALOGIX_DP_VIDEO_CTL_4 */ ++#define BIST_EN (0x1 << 3) ++#define BIST_WIDTH(x) (((x) & 0x1) << 2) ++#define BIST_TYPE(x) (((x) & 0x3) << 0) + - drm_bridge_add(&hdmi->bridge); + /* ANALOGIX_DP_VIDEO_CTL_8 */ + #define VID_HRES_TH(x) (((x) & 0xf) << 4) + #define VID_VRES_TH(x) (((x) & 0xf) << 0) +@@ -181,6 +213,60 @@ + #define VSYNC_POLARITY_CFG (0x1 << 1) + #define HSYNC_POLARITY_CFG (0x1 << 0) -+ dw_hdmi_register_debugfs(dev, hdmi); ++/* ANALOGIX_DP_TOTAL_LINE_CFG_L */ ++#define TOTAL_LINE_CFG_L(x) (((x) & 0xff) << 0) + -+ if (of_property_read_bool(np, "scramble-low-rates")) -+ hdmi->scramble_low_rates = true; ++/* ANALOGIX_DP_TOTAL_LINE_CFG_H */ ++#define TOTAL_LINE_CFG_H(x) (((x) & 0xf) << 0) + -+ if (of_property_read_bool(np, "hdcp1x-enable")) -+ hdcp1x_enable = 1; -+ dw_hdmi_register_hdcp(dev, hdmi, val, hdcp1x_enable); ++/* ANALOGIX_DP_ACTIVE_LINE_CFG_L */ ++#define ACTIVE_LINE_CFG_L(x) (((x) & 0xff) << 0) + - return hdmi; - - err_iahb: -@@ -3626,7 +5256,10 @@ struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev, - err_isfr: - clk_disable_unprepare(hdmi->isfr_clk); - err_res: -- i2c_put_adapter(hdmi->ddc); -+ if (hdmi->i2c) -+ i2c_del_adapter(&hdmi->i2c->adap); -+ else -+ i2c_put_adapter(hdmi->ddc); - - return ERR_PTR(ret); - } -@@ -3634,16 +5267,35 @@ EXPORT_SYMBOL_GPL(dw_hdmi_probe); - - void dw_hdmi_remove(struct dw_hdmi *hdmi) - { -+ if (hdmi->irq) -+ disable_irq(hdmi->irq); ++/* ANALOGIX_DP_ACTIVE_LINE_CFG_H */ ++#define ACTIVE_LINE_CFG_H(x) (((x) & 0xf) << 0) + -+ cancel_delayed_work(&hdmi->work); -+ flush_workqueue(hdmi->workqueue); -+ destroy_workqueue(hdmi->workqueue); ++/* ANALOGIX_DP_V_F_PORCH_CFG */ ++#define V_F_PORCH_CFG(x) (((x) & 0xff) << 0) + -+ debugfs_remove_recursive(hdmi->debugfs_dir); ++/* ANALOGIX_DP_V_SYNC_WIDTH_CFG */ ++#define V_SYNC_WIDTH_CFG(x) (((x) & 0xff) << 0) + - drm_bridge_remove(&hdmi->bridge); - - if (hdmi->audio && !IS_ERR(hdmi->audio)) - platform_device_unregister(hdmi->audio); -+ if (hdmi->hdcp_dev && !IS_ERR(hdmi->hdcp_dev)) -+ platform_device_unregister(hdmi->hdcp_dev); - if (!IS_ERR(hdmi->cec)) - platform_device_unregister(hdmi->cec); - - /* Disable all interrupts */ - hdmi_writeb(hdmi, ~0, HDMI_IH_MUTE_PHY_STAT0); - -+ if (!hdmi->next_bridge) { -+ dw_hdmi_destroy_properties(hdmi); -+ hdmi->connector.funcs->destroy(&hdmi->connector); -+ } ++/* ANALOGIX_DP_V_B_PORCH_CFG */ ++#define V_B_PORCH_CFG(x) (((x) & 0xff) << 0) + -+ if (hdmi->bridge.encoder) -+ hdmi->bridge.encoder->funcs->destroy(hdmi->bridge.encoder); ++/* ANALOGIX_DP_TOTAL_PIXEL_CFG_L */ ++#define TOTAL_PIXEL_CFG_L(x) (((x) & 0xff) << 0) + - clk_disable_unprepare(hdmi->iahb_clk); - clk_disable_unprepare(hdmi->isfr_clk); - clk_disable_unprepare(hdmi->cec_clk); -@@ -3660,7 +5312,7 @@ EXPORT_SYMBOL_GPL(dw_hdmi_remove); - */ - struct dw_hdmi *dw_hdmi_bind(struct platform_device *pdev, - struct drm_encoder *encoder, -- const struct dw_hdmi_plat_data *plat_data) -+ struct dw_hdmi_plat_data *plat_data) - { - struct dw_hdmi *hdmi; - int ret; -@@ -3675,6 +5327,9 @@ struct dw_hdmi *dw_hdmi_bind(struct platform_device *pdev, - return ERR_PTR(ret); - } - -+ if (!hdmi->next_bridge) -+ plat_data->connector = &hdmi->connector; ++/* ANALOGIX_DP_TOTAL_PIXEL_CFG_H */ ++#define TOTAL_PIXEL_CFG_H(x) (((x) & 0x3f) << 0) + - return hdmi; - } - EXPORT_SYMBOL_GPL(dw_hdmi_bind); -@@ -3685,9 +5340,86 @@ void dw_hdmi_unbind(struct dw_hdmi *hdmi) - } - EXPORT_SYMBOL_GPL(dw_hdmi_unbind); - -+static void dw_hdmi_reg_initial(struct dw_hdmi *hdmi) -+{ -+ if (hdmi_readb(hdmi, HDMI_IH_MUTE)) { -+ initialize_hdmi_ih_mutes(hdmi); -+ /* unmute cec irq */ -+ hdmi_writeb(hdmi, 0x68, HDMI_IH_MUTE_CEC_STAT0); ++/* ANALOGIX_DP_ACTIVE_PIXEL_CFG_L */ ++#define ACTIVE_PIXEL_CFG_L(x) (((x) & 0xff) << 0) + -+ hdmi_writeb(hdmi, HDMI_PHY_I2CM_INT_ADDR_DONE_POL, -+ HDMI_PHY_I2CM_INT_ADDR); ++/* ANALOGIX_DP_ACTIVE_PIXEL_CFG_H */ ++#define ACTIVE_PIXEL_CFG_H(x) (((x) & 0x3f) << 0) + -+ hdmi_writeb(hdmi, HDMI_PHY_I2CM_CTLINT_ADDR_NAC_POL | -+ HDMI_PHY_I2CM_CTLINT_ADDR_ARBITRATION_POL, -+ HDMI_PHY_I2CM_CTLINT_ADDR); ++/* ANALOGIX_DP_H_F_PORCH_CFG_L */ ++#define H_F_PORCH_CFG_L(x) (((x) & 0xff) << 0) + -+ if (!hdmi->next_bridge) { -+ hdmi_writeb(hdmi, HDMI_PHY_HPD | HDMI_PHY_RX_SENSE, -+ HDMI_PHY_POL0); -+ hdmi_writeb(hdmi, hdmi->phy_mask, HDMI_PHY_MASK0); -+ hdmi_writeb(hdmi, ~(HDMI_IH_PHY_STAT0_HPD | -+ HDMI_IH_PHY_STAT0_RX_SENSE), -+ HDMI_IH_MUTE_PHY_STAT0); -+ } -+ } -+} ++/* ANALOGIX_DP_H_F_PORCH_CFG_H */ ++#define H_F_PORCH_CFG_H(x) (((x) & 0xf) << 0) + -+void dw_hdmi_suspend(struct dw_hdmi *hdmi) -+{ -+ if (!hdmi) -+ return; ++/* ANALOGIX_DP_H_SYNC_CFG_L */ ++#define H_SYNC_CFG_L(x) (((x) & 0xff) << 0) + -+ mutex_lock(&hdmi->mutex); ++/* ANALOGIX_DP_H_SYNC_CFG_H */ ++#define H_SYNC_CFG_H(x) (((x) & 0xf) << 0) + -+ /* -+ * When system shutdown, hdmi should be disabled. -+ * When system suspend, dw_hdmi_bridge_disable will disable hdmi first. -+ * To prevent duplicate operation, we should determine whether hdmi -+ * has been disabled. -+ */ -+ if (!hdmi->disabled) { -+ hdmi->disabled = true; -+ dw_hdmi_update_power(hdmi); -+ dw_hdmi_update_phy_mask(hdmi); -+ } -+ mutex_unlock(&hdmi->mutex); ++/* ANALOGIX_DP_H_B_PORCH_CFG_L */ ++#define H_B_PORCH_CFG_L(x) (((x) & 0xff) << 0) + -+ if (hdmi->irq) -+ disable_irq(hdmi->irq); -+ cancel_delayed_work(&hdmi->work); -+ flush_workqueue(hdmi->workqueue); -+ pinctrl_pm_select_sleep_state(hdmi->dev); -+} -+EXPORT_SYMBOL_GPL(dw_hdmi_suspend); ++/* ANALOGIX_DP_H_B_PORCH_CFG_H */ ++#define H_B_PORCH_CFG_H(x) (((x) & 0xf) << 0) + - void dw_hdmi_resume(struct dw_hdmi *hdmi) - { -- dw_hdmi_init_hw(hdmi); -+ if (!hdmi) -+ return; ++/* ANALOGIX_DP_SPDIF_AUDIO_CTL_0 */ ++#define AUD_SPDIF_EN (0x1 << 7) + -+ pinctrl_pm_select_default_state(hdmi->dev); -+ mutex_lock(&hdmi->mutex); -+ dw_hdmi_reg_initial(hdmi); -+ dw_hdmi_i2c_init(hdmi); -+ if (hdmi->irq) -+ enable_irq(hdmi->irq); -+ /* -+ * HDMI status maybe incorrect in the following condition: -+ * HDMI plug in -> system sleep -> HDMI plug out -> system wake up. -+ * At this time, cat /sys/class/drm/card 0-HDMI-A-1/status is connected. -+ * There is no hpd interrupt, because HDMI is powerdown during suspend. -+ * So we need check the current HDMI status in this case. -+ */ -+ if (hdmi->connector.status == connector_status_connected) { -+ if (hdmi->phy.ops->read_hpd(hdmi, hdmi->phy.data) == -+ connector_status_disconnected) { -+ hdmi->hpd_state = false; -+ mod_delayed_work(hdmi->workqueue, &hdmi->work, -+ msecs_to_jiffies(20)); -+ } -+ } -+ mutex_unlock(&hdmi->mutex); - } - EXPORT_SYMBOL_GPL(dw_hdmi_resume); - -diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.h -index af43a0414..5432d0246 100644 ---- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.h -+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.h -@@ -518,6 +518,51 @@ - #define HDMI_A_PRESETUP 0x501A - #define HDMI_A_SRM_BASE 0x5020 + /* ANALOGIX_DP_PLL_REG_1 */ + #define REF_CLK_24M (0x1 << 0) + #define REF_CLK_27M (0x0 << 0) +@@ -309,16 +395,23 @@ + #define FIX_M_VID (0x1 << 2) + #define M_VID_UPDATE_CTRL (0x3 << 0) -+/* CEC Engine Registers */ -+#define HDMI_CEC_CTRL 0x7D00 -+#define HDMI_CEC_STAT 0x7D01 -+#define HDMI_CEC_MASK 0x7D02 -+#define HDMI_CEC_POLARITY 0x7D03 -+#define HDMI_CEC_INT 0x7D04 -+#define HDMI_CEC_ADDR_L 0x7D05 -+#define HDMI_CEC_ADDR_H 0x7D06 -+#define HDMI_CEC_TX_CNT 0x7D07 -+#define HDMI_CEC_RX_CNT 0x7D08 -+#define HDMI_CEC_TX_DATA0 0x7D10 -+#define HDMI_CEC_TX_DATA1 0x7D11 -+#define HDMI_CEC_TX_DATA2 0x7D12 -+#define HDMI_CEC_TX_DATA3 0x7D13 -+#define HDMI_CEC_TX_DATA4 0x7D14 -+#define HDMI_CEC_TX_DATA5 0x7D15 -+#define HDMI_CEC_TX_DATA6 0x7D16 -+#define HDMI_CEC_TX_DATA7 0x7D17 -+#define HDMI_CEC_TX_DATA8 0x7D18 -+#define HDMI_CEC_TX_DATA9 0x7D19 -+#define HDMI_CEC_TX_DATA10 0x7D1a -+#define HDMI_CEC_TX_DATA11 0x7D1b -+#define HDMI_CEC_TX_DATA12 0x7D1c -+#define HDMI_CEC_TX_DATA13 0x7D1d -+#define HDMI_CEC_TX_DATA14 0x7D1e -+#define HDMI_CEC_TX_DATA15 0x7D1f -+#define HDMI_CEC_RX_DATA0 0x7D20 -+#define HDMI_CEC_RX_DATA1 0x7D21 -+#define HDMI_CEC_RX_DATA2 0x7D22 -+#define HDMI_CEC_RX_DATA3 0x7D23 -+#define HDMI_CEC_RX_DATA4 0x7D24 -+#define HDMI_CEC_RX_DATA5 0x7D25 -+#define HDMI_CEC_RX_DATA6 0x7D26 -+#define HDMI_CEC_RX_DATA7 0x7D27 -+#define HDMI_CEC_RX_DATA8 0x7D28 -+#define HDMI_CEC_RX_DATA9 0x7D29 -+#define HDMI_CEC_RX_DATA10 0x7D2a -+#define HDMI_CEC_RX_DATA11 0x7D2b -+#define HDMI_CEC_RX_DATA12 0x7D2c -+#define HDMI_CEC_RX_DATA13 0x7D2d -+#define HDMI_CEC_RX_DATA14 0x7D2e -+#define HDMI_CEC_RX_DATA15 0x7D2f -+#define HDMI_CEC_LOCK 0x7D30 -+#define HDMI_CEC_WKUPCTRL 0x7D31 ++/* ANALOGIX_DP_AUD_CTL */ ++#define MISC_CTRL_RESET (0x1 << 4) ++#define DP_AUDIO_EN (0x1 << 0) + - /* I2C Master Registers (E-DDC) */ - #define HDMI_I2CM_SLAVE 0x7E00 - #define HDMI_I2CM_ADDRESS 0x7E01 -@@ -538,6 +583,17 @@ - #define HDMI_I2CM_FS_SCL_HCNT_0_ADDR 0x7E10 - #define HDMI_I2CM_FS_SCL_LCNT_1_ADDR 0x7E11 - #define HDMI_I2CM_FS_SCL_LCNT_0_ADDR 0x7E12 -+#define HDMI_I2CM_SDA_HOLD 0x7E13 -+#define HDMI_I2CM_SCDC_READ_UPDATE 0x7E14 -+#define HDMI_I2CM_READ_REQ_EN_MSK BIT(4) -+#define HDMI_I2CM_READ_REQ_EN_OFFSET 4 -+#define HDMI_I2CM_READ_UPDATE_MSK BIT(0) -+#define HDMI_I2CM_READ_UPDATE_OFFSET 0 -+#define HDMI_I2CM_I2CM_UPRD_VSYNC_EN_MSK BIT(5) -+#define HDMI_I2CM_I2CM_UPRD_VSYNC_EN_OFFSET 5 -+#define HDMI_I2CM_READ_BUFF0 0x7E20 -+#define HDMI_I2CM_SCDC_UPDATE0 0x7E30 -+#define HDMI_I2CM_SCDC_UPDATE1 0x7E31 - - enum { - /* PRODUCT_ID0 field values */ -@@ -851,6 +907,10 @@ enum { - HDMI_FC_AVICONF3_QUANT_RANGE_LIMITED = 0x00, - HDMI_FC_AVICONF3_QUANT_RANGE_FULL = 0x04, + /* ANALOGIX_DP_TRAINING_PTN_SET */ + #define SCRAMBLER_TYPE (0x1 << 9) + #define HW_LINK_TRAINING_PATTERN (0x1 << 8) + #define SCRAMBLING_DISABLE (0x1 << 5) + #define SCRAMBLING_ENABLE (0x0 << 5) +-#define LINK_QUAL_PATTERN_SET_MASK (0x3 << 2) ++#define LINK_QUAL_PATTERN_SET_MASK (0x7 << 2) ++#define LINK_QUAL_PATTERN_SET_HBR2 (0x5 << 2) ++#define LINK_QUAL_PATTERN_SET_80BIT (0x4 << 2) + #define LINK_QUAL_PATTERN_SET_PRBS7 (0x3 << 2) + #define LINK_QUAL_PATTERN_SET_D10_2 (0x1 << 2) + #define LINK_QUAL_PATTERN_SET_DISABLE (0x0 << 2) + #define SW_TRAINING_PATTERN_SET_MASK (0x3 << 0) ++#define SW_TRAINING_PATTERN_SET_PTN3 (0x3 << 0) + #define SW_TRAINING_PATTERN_SET_PTN2 (0x2 << 0) + #define SW_TRAINING_PATTERN_SET_PTN1 (0x1 << 0) + #define SW_TRAINING_PATTERN_SET_NORMAL (0x0 << 0) +@@ -406,6 +499,11 @@ + #define VIDEO_MODE_SLAVE_MODE (0x1 << 0) + #define VIDEO_MODE_MASTER_MODE (0x0 << 0) -+/* HDMI_FC_GCP */ -+ HDMI_FC_GCP_SET_AVMUTE = 0x2, -+ HDMI_FC_GCP_CLEAR_AVMUTE = 0x1, ++/* ANALOGIX_DP_AUD_CHANNEL_CTL */ ++#define AUD_CHANNEL_COUNT_6 (0x5 << 0) ++#define AUD_CHANNEL_COUNT_4 (0x3 << 0) ++#define AUD_CHANNEL_COUNT_2 (0x1 << 0) + - /* FC_DBGFORCE field values */ - HDMI_FC_DBGFORCE_FORCEAUDIO = 0x10, - HDMI_FC_DBGFORCE_FORCEVIDEO = 0x1, -@@ -929,8 +989,14 @@ enum { - HDMI_AUD_CONF1_MODE_BURST_1 = 0x60, - HDMI_AUD_CONF1_MODE_BURST_2 = 0x80, - HDMI_AUD_CONF1_WIDTH_16 = 0x10, -+ HDMI_AUD_CONF1_WIDTH_21 = 0x15, - HDMI_AUD_CONF1_WIDTH_24 = 0x18, + /* ANALOGIX_DP_PKT_SEND_CTL */ + #define IF_UP (0x1 << 4) + #define IF_EN (0x1 << 0) +@@ -414,4 +512,7 @@ + #define PSR_VID_CRC_FLUSH (0x1 << 2) + #define PSR_VID_CRC_ENABLE (0x1 << 0) -+/* AUD_CONF2 filed values */ -+ HDMI_AUD_CONF2_HBR = 0x1, -+ HDMI_AUD_CONF2_NLPCM = 0x2, -+ HDMI_AUD_CONF2_INSERT_PCUV = 0x04, ++/* ANALOGIX_DP_I2S_CTRL */ ++#define I2S_EN (0x1 << 4) + - /* AUD_CTS3 field values */ - HDMI_AUD_CTS3_N_SHIFT_OFFSET = 5, - HDMI_AUD_CTS3_N_SHIFT_MASK = 0xe0, -@@ -1084,7 +1150,10 @@ enum { - HDMI_A_VIDPOLCFG_HSYNCPOL_ACTIVE_LOW = 0x0, + #endif /* _ANALOGIX_DP_REG_H */ +diff --git a/drivers/gpu/drm/bridge/synopsys/Makefile b/drivers/gpu/drm/bridge/synopsys/Makefile +index ce715562e..f4c6cf628 100644 +--- a/drivers/gpu/drm/bridge/synopsys/Makefile ++++ b/drivers/gpu/drm/bridge/synopsys/Makefile +@@ -1,8 +1,9 @@ + # SPDX-License-Identifier: GPL-2.0-only +-obj-$(CONFIG_DRM_DW_HDMI) += dw-hdmi.o ++obj-$(CONFIG_DRM_DW_HDMI) += dw-hdmi.o dw-hdmi-hdcp.o \ ++ dw-hdmi-qp.o dw-hdmi-qp-hdcp.o + obj-$(CONFIG_DRM_DW_HDMI_AHB_AUDIO) += dw-hdmi-ahb-audio.o + obj-$(CONFIG_DRM_DW_HDMI_GP_AUDIO) += dw-hdmi-gp-audio.o +-obj-$(CONFIG_DRM_DW_HDMI_I2S_AUDIO) += dw-hdmi-i2s-audio.o +-obj-$(CONFIG_DRM_DW_HDMI_CEC) += dw-hdmi-cec.o ++obj-$(CONFIG_DRM_DW_HDMI_I2S_AUDIO) += dw-hdmi-i2s-audio.o dw-hdmi-qp-i2s-audio.o ++obj-$(CONFIG_DRM_DW_HDMI_CEC) += dw-hdmi-cec.o dw-hdmi-qp-cec.o - /* I2CM_OPERATION field values */ -+ HDMI_I2CM_OPERATION_BUS_CLEAR = 0x20, - HDMI_I2CM_OPERATION_WRITE = 0x10, -+ HDMI_I2CM_OPERATION_READ8_EXT = 0x8, -+ HDMI_I2CM_OPERATION_READ8 = 0x4, - HDMI_I2CM_OPERATION_READ_EXT = 0x2, - HDMI_I2CM_OPERATION_READ = 0x1, + obj-$(CONFIG_DRM_DW_MIPI_DSI) += dw-mipi-dsi.o +diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h +index f72d27208..966d297ef 100644 +--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h ++++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h +@@ -17,6 +17,7 @@ struct dw_hdmi_i2s_audio_data { -@@ -1097,6 +1166,11 @@ enum { - HDMI_I2CM_CTLINT_NAC_MASK = 0x40, - HDMI_I2CM_CTLINT_ARB_POL = 0x8, - HDMI_I2CM_CTLINT_ARB_MASK = 0x4, -+ -+/* I2CM_DIV field values */ -+ HDMI_I2CM_DIV_FAST_STD_MODE = 0x8, -+ HDMI_I2CM_DIV_FAST_MODE = 0x8, -+ HDMI_I2CM_DIV_STD_MODE = 0, + void (*write)(struct dw_hdmi *hdmi, u8 val, int offset); + u8 (*read)(struct dw_hdmi *hdmi, int offset); ++ void (*mod)(struct dw_hdmi *hdmi, u8 data, u8 mask, unsigned int reg); + u8 *(*get_eld)(struct dw_hdmi *hdmi); }; - /* -diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c -index 04d4a1a10..bd99f785e 100644 ---- a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c -+++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c -@@ -14,6 +14,7 @@ - #include +diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.c +index be21c11de..414d601bf 100644 +--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.c ++++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.c +@@ -4,14 +4,17 @@ + * + * Copyright (C) 2015-2017 Russell King. + */ ++#include + #include + #include ++#include #include #include -+#include - #include - #include - -@@ -26,6 +27,8 @@ - #include - #include - #include -+#include -+#include - #include - - #define HWVER_131 0x31333100 /* IP version 1.31 */ -@@ -83,8 +86,12 @@ - #define ENABLE_CMD_MODE BIT(0) - - #define DSI_VID_MODE_CFG 0x38 --#define ENABLE_LOW_POWER (0x3f << 8) --#define ENABLE_LOW_POWER_MASK (0x3f << 8) -+#define LP_HFP_EN BIT(13) -+#define LP_HBP_EN BIT(12) -+#define LP_VACT_EN BIT(11) -+#define LP_VFP_EN BIT(10) -+#define LP_VBP_EN BIT(9) -+#define LP_VSA_EN BIT(8) - #define VID_MODE_TYPE_NON_BURST_SYNC_PULSES 0x0 - #define VID_MODE_TYPE_NON_BURST_SYNC_EVENTS 0x1 - #define VID_MODE_TYPE_BURST 0x2 -@@ -239,17 +246,21 @@ struct debugfs_entries { - - struct dw_mipi_dsi { - struct drm_bridge bridge; -+ struct drm_connector connector; -+ struct drm_encoder *encoder; - struct mipi_dsi_host dsi_host; -- struct drm_bridge *panel_bridge; -+ struct drm_panel *panel; -+ struct drm_bridge *next_bridge; - struct device *dev; - void __iomem *base; + #include + #include -- struct clk *pclk; -+ struct reset_control *apb_rst; + #include ++#include - unsigned int lane_mbps; /* per lane */ - u32 channel; - u32 lanes; - u32 format; -+ struct drm_display_mode mode; - unsigned long mode_flags; + #include + #include +@@ -21,8 +24,11 @@ + enum { + HDMI_IH_CEC_STAT0 = 0x0106, + HDMI_IH_MUTE_CEC_STAT0 = 0x0186, ++ HDMI_IH_MUTE = 0x01ff, - #ifdef CONFIG_DEBUG_FS -@@ -265,7 +276,6 @@ struct dw_mipi_dsi { - struct dw_mipi_dsi *master; /* dual-dsi master ptr */ - struct dw_mipi_dsi *slave; /* dual-dsi slave ptr */ + HDMI_CEC_CTRL = 0x7d00, ++ CEC_TRANS_MASK = 0x7, ++ CEC_CTRL_STANDBY = BIT(4), + CEC_CTRL_START = BIT(0), + CEC_CTRL_FRAME_TYP = 3 << 1, + CEC_CTRL_RETRY = 0 << 1, +@@ -47,12 +53,15 @@ enum { + HDMI_CEC_RX_CNT = 0x7d08, + HDMI_CEC_TX_DATA0 = 0x7d10, + HDMI_CEC_RX_DATA0 = 0x7d20, ++ HDMI_CEC_RX_DATA1 = 0x7d21, + HDMI_CEC_LOCK = 0x7d30, + HDMI_CEC_WKUPCTRL = 0x7d31, + }; -- struct drm_display_mode mode; - const struct dw_mipi_dsi_plat_data *plat_data; + struct dw_hdmi_cec { ++ struct device *dev; + struct dw_hdmi *hdmi; ++ struct miscdevice misc_dev; + const struct dw_hdmi_cec_ops *ops; + u32 addresses; + struct cec_adapter *adap; +@@ -61,11 +70,12 @@ struct dw_hdmi_cec { + bool tx_done; + bool rx_done; + struct cec_notifier *notify; ++ struct input_dev *devinput; + int irq; +- +- u8 regs_polarity; +- u8 regs_mask; +- u8 regs_mute_stat0; ++ int wake_irq; ++ bool wake_en; ++ bool standby_en; ++ struct mutex wake_lock; }; -@@ -300,6 +310,11 @@ static inline struct dw_mipi_dsi *bridge_to_dsi(struct drm_bridge *bridge) - return container_of(bridge, struct dw_mipi_dsi, bridge); + static void dw_hdmi_write(struct dw_hdmi_cec *cec, u8 val, int offset) +@@ -78,6 +88,11 @@ static u8 dw_hdmi_read(struct dw_hdmi_cec *cec, int offset) + return cec->ops->read(cec->hdmi, offset); } -+static inline struct dw_mipi_dsi *con_to_dsi(struct drm_connector *con) ++static void dw_hdmi_mod(struct dw_hdmi_cec *cec, unsigned int offset, u8 mask, u8 val) +{ -+ return container_of(con, struct dw_mipi_dsi, connector); ++ cec->ops->mod(cec->hdmi, val, mask, offset); +} + - static inline void dsi_write(struct dw_mipi_dsi *dsi, u32 reg, u32 val) - { - writel(val, dsi->base + reg); -@@ -315,26 +330,20 @@ static int dw_mipi_dsi_host_attach(struct mipi_dsi_host *host, - { - struct dw_mipi_dsi *dsi = host_to_dsi(host); - const struct dw_mipi_dsi_plat_data *pdata = dsi->plat_data; -- struct drm_bridge *bridge; -+ int max_data_lanes = dsi->plat_data->max_data_lanes; - int ret; - -- if (device->lanes > dsi->plat_data->max_data_lanes) { -- dev_err(dsi->dev, "the number of data lanes(%u) is too many\n", -- device->lanes); -- return -EINVAL; -- } -- -- dsi->lanes = device->lanes; -+ dsi->lanes = (device->lanes > max_data_lanes) ? device->lanes / 2 : device->lanes; - dsi->channel = device->channel; - dsi->format = device->format; - dsi->mode_flags = device->mode_flags; - -- bridge = devm_drm_of_get_bridge(dsi->dev, dsi->dev->of_node, 1, 0); -- if (IS_ERR(bridge)) -- return PTR_ERR(bridge); -- -- bridge->pre_enable_prev_first = true; -- dsi->panel_bridge = bridge; -+ ret = drm_of_find_panel_or_bridge(host->dev->of_node, 1, -1, -+ &dsi->panel, &dsi->next_bridge); -+ if (ret) { -+ DRM_DEV_ERROR(dsi->dev, "Failed to find panel or bridge: %d\n", ret); -+ return ret; -+ } - - drm_bridge_add(&dsi->bridge); - -@@ -372,6 +381,7 @@ static void dw_mipi_message_config(struct dw_mipi_dsi *dsi, + static int dw_hdmi_cec_log_addr(struct cec_adapter *adap, u8 logical_addr) { - bool lpm = msg->flags & MIPI_DSI_MSG_USE_LPM; - u32 val = 0; -+ u32 ctrl = 0; + struct dw_hdmi_cec *cec = cec_get_drvdata(adap); +@@ -116,7 +131,7 @@ static int dw_hdmi_cec_transmit(struct cec_adapter *adap, u8 attempts, + dw_hdmi_write(cec, msg->msg[i], HDMI_CEC_TX_DATA0 + i); - /* - * TODO dw drv improvements -@@ -390,11 +400,17 @@ static void dw_mipi_message_config(struct dw_mipi_dsi *dsi, - dsi_write(dsi, DSI_CMD_MODE_CFG, val); + dw_hdmi_write(cec, msg->len, HDMI_CEC_TX_CNT); +- dw_hdmi_write(cec, ctrl | CEC_CTRL_START, HDMI_CEC_CTRL); ++ dw_hdmi_mod(cec, HDMI_CEC_CTRL, CEC_TRANS_MASK, ctrl | CEC_CTRL_START); - val = dsi_read(dsi, DSI_VID_MODE_CFG); -- if (lpm) -+ ctrl = dsi_read(dsi, DSI_LPCLK_CTRL); -+ if (lpm) { - val |= ENABLE_LOW_POWER_CMD; -- else -+ ctrl &= ~PHY_TXREQUESTCLKHS; -+ } else { - val &= ~ENABLE_LOW_POWER_CMD; -+ ctrl |= PHY_TXREQUESTCLKHS; -+ } -+ - dsi_write(dsi, DSI_VID_MODE_CFG, val); -+ dsi_write(dsi, DSI_LPCLK_CTRL, ctrl); + return 0; } +@@ -192,20 +207,28 @@ static int dw_hdmi_cec_enable(struct cec_adapter *adap, bool enable) + struct dw_hdmi_cec *cec = cec_get_drvdata(adap); - static int dw_mipi_dsi_gen_pkt_hdr_write(struct dw_mipi_dsi *dsi, u32 hdr_val) -@@ -540,14 +556,14 @@ static const struct mipi_dsi_host_ops dw_mipi_dsi_host_ops = { - - static void dw_mipi_dsi_video_mode_config(struct dw_mipi_dsi *dsi) - { -- u32 val; -+ u32 val = LP_VSA_EN | LP_VBP_EN | LP_VFP_EN | -+ LP_VACT_EN | LP_HBP_EN | LP_HFP_EN; - -- /* -- * TODO dw drv improvements -- * enabling low power is panel-dependent, we should use the -- * panel configuration here... -- */ -- val = ENABLE_LOW_POWER; -+ if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HFP) -+ val &= ~LP_HFP_EN; -+ -+ if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HBP) -+ val &= ~LP_HBP_EN; + if (!enable) { +- dw_hdmi_write(cec, ~0, HDMI_CEC_MASK); +- dw_hdmi_write(cec, ~0, HDMI_IH_MUTE_CEC_STAT0); + dw_hdmi_write(cec, 0, HDMI_CEC_POLARITY); - if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_BURST) - val |= VID_MODE_TYPE_BURST; -@@ -571,8 +587,6 @@ static void dw_mipi_dsi_video_mode_config(struct dw_mipi_dsi *dsi) - static void dw_mipi_dsi_set_mode(struct dw_mipi_dsi *dsi, - unsigned long mode_flags) - { -- u32 val; -- - dsi_write(dsi, DSI_PWR_UP, RESET); +- cec->ops->disable(cec->hdmi); ++ if (cec->wake_en && cec->standby_en) { ++ dw_hdmi_write(cec, 0xff, HDMI_IH_CEC_STAT0); ++ dw_hdmi_mod(cec, HDMI_CEC_CTRL, CEC_CTRL_STANDBY, CEC_CTRL_STANDBY); ++ dw_hdmi_write(cec, 0, HDMI_CEC_LOCK); ++ dw_hdmi_write(cec, 0xff, HDMI_CEC_WKUPCTRL); ++ dw_hdmi_write(cec, ~(1 << 6), HDMI_CEC_MASK); ++ dw_hdmi_write(cec, ~(1 << 6), HDMI_IH_MUTE_CEC_STAT0); ++ dw_hdmi_write(cec, 0x01, HDMI_IH_MUTE); ++ } else { ++ cec->ops->disable(cec->hdmi); ++ } + } else { + unsigned int irqs; - if (mode_flags & MIPI_DSI_MODE_VIDEO) { -@@ -582,18 +596,16 @@ static void dw_mipi_dsi_set_mode(struct dw_mipi_dsi *dsi, - dsi_write(dsi, DSI_MODE_CFG, ENABLE_CMD_MODE); - } +- dw_hdmi_write(cec, 0, HDMI_CEC_CTRL); ++ dw_hdmi_cec_log_addr(cec->adap, CEC_LOG_ADDR_INVALID); ++ dw_hdmi_mod(cec, HDMI_CEC_CTRL, CEC_CTRL_STANDBY, 0); ++ dw_hdmi_write(cec, 0x02, HDMI_IH_MUTE); + dw_hdmi_write(cec, ~0, HDMI_IH_CEC_STAT0); + dw_hdmi_write(cec, 0, HDMI_CEC_LOCK); -- val = PHY_TXREQUESTCLKHS; -- if (dsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS) -- val |= AUTO_CLKLANE_CTRL; -- dsi_write(dsi, DSI_LPCLK_CTRL, val); +- dw_hdmi_cec_log_addr(cec->adap, CEC_LOG_ADDR_INVALID); - - dsi_write(dsi, DSI_PWR_UP, POWERUP); - } - - static void dw_mipi_dsi_disable(struct dw_mipi_dsi *dsi) - { -- dsi_write(dsi, DSI_PWR_UP, RESET); -- dsi_write(dsi, DSI_PHY_RSTZ, PHY_RSTZ); -+ dsi_write(dsi, DSI_LPCLK_CTRL, 0); -+ dsi_write(dsi, DSI_EDPI_CMD_SIZE, 0); -+ dw_mipi_dsi_set_mode(dsi, 0); -+ if (dsi->slave) -+ dw_mipi_dsi_disable(dsi->slave); - } - - static void dw_mipi_dsi_init(struct dw_mipi_dsi *dsi) -@@ -708,16 +720,16 @@ static u32 dw_mipi_dsi_get_hcomponent_lbcc(struct dw_mipi_dsi *dsi, - const struct drm_display_mode *mode, - u32 hcomponent) - { -- u32 frac, lbcc; -+ u32 lbcc; - - lbcc = hcomponent * dsi->lane_mbps * MSEC_PER_SEC / 8; - -- frac = lbcc % mode->clock; -- lbcc = lbcc / mode->clock; -- if (frac) -- lbcc++; -+ if (mode->clock == 0) { -+ DRM_ERROR("dsi mode clock is 0!\n"); -+ return 0; -+ } - -- return lbcc; -+ return DIV_ROUND_CLOSEST_ULL(lbcc, mode->clock); - } - - static void dw_mipi_dsi_line_timer_config(struct dw_mipi_dsi *dsi, -@@ -830,13 +842,13 @@ static void dw_mipi_dsi_dphy_enable(struct dw_mipi_dsi *dsi) - ret = readl_poll_timeout(dsi->base + DSI_PHY_STATUS, val, - val & PHY_LOCK, 1000, PHY_STATUS_TIMEOUT_US); - if (ret) -- DRM_DEBUG_DRIVER("failed to wait phy lock state\n"); -+ DRM_ERROR("failed to wait phy lock state\n"); - - ret = readl_poll_timeout(dsi->base + DSI_PHY_STATUS, - val, val & PHY_STOP_STATE_CLK_LANE, 1000, - PHY_STATUS_TIMEOUT_US); - if (ret) -- DRM_DEBUG_DRIVER("failed to wait phy clk lane stop state\n"); -+ DRM_ERROR("failed to wait phy clk lane stop state\n"); - } + cec->ops->enable(cec->hdmi); - static void dw_mipi_dsi_clear_err(struct dw_mipi_dsi *dsi) -@@ -847,32 +859,49 @@ static void dw_mipi_dsi_clear_err(struct dw_mipi_dsi *dsi) - dsi_write(dsi, DSI_INT_MSK1, 0); + irqs = CEC_STAT_ERROR_INIT | CEC_STAT_NACK | CEC_STAT_EOM | +@@ -230,6 +253,173 @@ static void dw_hdmi_cec_del(void *data) + cec_delete_adapter(cec->adap); } -+static void dw_mipi_dsi_post_disable(struct dw_mipi_dsi *dsi) ++static irqreturn_t dw_hdmi_cec_wake_irq(int irq, void *data) +{ -+ const struct dw_mipi_dsi_phy_ops *phy_ops = dsi->plat_data->phy_ops; ++ struct cec_adapter *adap = data; ++ struct dw_hdmi_cec *cec = cec_get_drvdata(adap); ++ u8 cec_int; + -+ if (phy_ops->power_off) -+ phy_ops->power_off(dsi->plat_data->priv_data); ++ cec_int = dw_hdmi_read(cec, HDMI_IH_CEC_STAT0); ++ if (!cec_int) ++ return IRQ_NONE; + -+ dsi_write(dsi, DSI_PWR_UP, RESET); -+ dsi_write(dsi, DSI_PHY_RSTZ, PHY_RSTZ); -+ pm_runtime_put(dsi->dev); ++ dw_hdmi_write(cec, 0x02, HDMI_IH_MUTE); ++ dw_hdmi_write(cec, cec_int, HDMI_IH_CEC_STAT0); ++ dw_hdmi_write(cec, 0x00, HDMI_CEC_WKUPCTRL); + -+ if (dsi->slave) -+ dw_mipi_dsi_post_disable(dsi->slave); -+} ++ if (!cec->wake_en) ++ return IRQ_HANDLED; + - static void dw_mipi_dsi_bridge_post_atomic_disable(struct drm_bridge *bridge, - struct drm_bridge_state *old_bridge_state) - { - struct dw_mipi_dsi *dsi = bridge_to_dsi(bridge); -- const struct dw_mipi_dsi_phy_ops *phy_ops = dsi->plat_data->phy_ops; -+ const struct dw_mipi_dsi_plat_data *pdata = dsi->plat_data; - -- /* -- * Switch to command mode before panel-bridge post_disable & -- * panel unprepare. -- * Note: panel-bridge disable & panel disable has been called -- * before by the drm framework. -- */ -- dw_mipi_dsi_set_mode(dsi, 0); -+ if (dsi->panel) -+ drm_panel_unprepare(dsi->panel); - -- if (phy_ops->power_off) -- phy_ops->power_off(dsi->plat_data->priv_data); -+ dw_mipi_dsi_post_disable(dsi); - -- if (dsi->slave) { -- dw_mipi_dsi_disable(dsi->slave); -- clk_disable_unprepare(dsi->slave->pclk); -- pm_runtime_put(dsi->slave->dev); -- } -- dw_mipi_dsi_disable(dsi); -+ if (pdata->stream_standby) -+ pdata->stream_standby(pdata->priv_data, 0); ++ return IRQ_WAKE_THREAD; +} - -- clk_disable_unprepare(dsi->pclk); -- pm_runtime_put(dsi->dev); -+static void dw_mipi_dsi_bridge_atomic_disable(struct drm_bridge *bridge, -+ struct drm_bridge_state *old_bridge_state) ++ ++static irqreturn_t dw_hdmi_cec_wake_thread(int irq, void *data) +{ -+ struct dw_mipi_dsi *dsi = bridge_to_dsi(bridge); -+ const struct dw_mipi_dsi_plat_data *pdata = dsi->plat_data; ++ struct cec_adapter *adap = data; ++ struct dw_hdmi_cec *cec = cec_get_drvdata(adap); + -+ if (dsi->panel) -+ drm_panel_disable(dsi->panel); ++ mutex_lock(&cec->wake_lock); + -+ if (pdata->stream_standby) -+ pdata->stream_standby(pdata->priv_data, 1); ++ if (!cec->standby_en) { ++ mutex_unlock(&cec->wake_lock); ++ return IRQ_HANDLED; ++ } ++ cec->standby_en = false; + -+ dw_mipi_dsi_disable(dsi); - } - - static unsigned int dw_mipi_dsi_get_lanes(struct dw_mipi_dsi *dsi) -@@ -889,15 +918,31 @@ static unsigned int dw_mipi_dsi_get_lanes(struct dw_mipi_dsi *dsi) - return dsi->lanes; - } - --static void dw_mipi_dsi_mode_set(struct dw_mipi_dsi *dsi, -- const struct drm_display_mode *adjusted_mode) -+static void dw_mipi_dsi_bridge_mode_set(struct drm_bridge *bridge, -+ const struct drm_display_mode *mode, -+ const struct drm_display_mode *adjusted_mode) ++ dev_dbg(cec->dev, "wakeup opcode:0x%x\n", dw_hdmi_read(cec, HDMI_CEC_RX_DATA1)); ++ input_event(cec->devinput, EV_KEY, KEY_POWER, 1); ++ input_sync(cec->devinput); ++ input_event(cec->devinput, EV_KEY, KEY_POWER, 0); ++ input_sync(cec->devinput); ++ mutex_unlock(&cec->wake_lock); ++ ++ return IRQ_HANDLED; ++} ++ ++static int rockchip_hdmi_cec_input_init(struct dw_hdmi_cec *cec) +{ -+ struct dw_mipi_dsi *dsi = bridge_to_dsi(bridge); ++ int err; + -+ drm_mode_copy(&dsi->mode, adjusted_mode); ++ cec->devinput = devm_input_allocate_device(cec->dev); ++ if (!cec->devinput) ++ return -EPERM; + -+ if (dsi->slave) -+ drm_mode_copy(&dsi->slave->mode, adjusted_mode); ++ cec->devinput->name = "hdmi_cec_key"; ++ cec->devinput->phys = "hdmi_cec_key/input0"; ++ cec->devinput->id.bustype = BUS_HOST; ++ cec->devinput->id.vendor = 0x0001; ++ cec->devinput->id.product = 0x0001; ++ cec->devinput->id.version = 0x0100; ++ ++ err = input_register_device(cec->devinput); ++ if (err < 0) { ++ input_free_device(cec->devinput); ++ return err; ++ } ++ input_set_capability(cec->devinput, EV_KEY, KEY_POWER); ++ ++ return 0; +} + -+static void dw_mipi_dsi_pre_enable(struct dw_mipi_dsi *dsi) - { - const struct dw_mipi_dsi_phy_ops *phy_ops = dsi->plat_data->phy_ops; - void *priv_data = dsi->plat_data->priv_data; -+ const struct drm_display_mode *adjusted_mode = &dsi->mode; - int ret; - u32 lanes = dw_mipi_dsi_get_lanes(dsi); - -- clk_prepare_enable(dsi->pclk); -+ if (dsi->apb_rst) { -+ reset_control_assert(dsi->apb_rst); -+ usleep_range(10, 20); -+ reset_control_deassert(dsi->apb_rst); -+ } - - ret = phy_ops->get_lane_mbps(priv_data, adjusted_mode, dsi->mode_flags, - lanes, dsi->format, &dsi->lane_mbps); -@@ -924,6 +969,9 @@ static void dw_mipi_dsi_mode_set(struct dw_mipi_dsi *dsi, - if (ret) - DRM_DEBUG_DRIVER("Phy init() failed\n"); - -+ if (phy_ops->power_on) -+ phy_ops->power_on(dsi->plat_data->priv_data); -+ - dw_mipi_dsi_dphy_enable(dsi); - - dw_mipi_dsi_wait_for_two_frames(adjusted_mode); -@@ -931,40 +979,62 @@ static void dw_mipi_dsi_mode_set(struct dw_mipi_dsi *dsi, - /* Switch to cmd mode for panel-bridge pre_enable & panel prepare */ - dw_mipi_dsi_set_mode(dsi, 0); - -- if (phy_ops->power_on) -- phy_ops->power_on(dsi->plat_data->priv_data); -+ if (dsi->slave) -+ dw_mipi_dsi_pre_enable(dsi->slave); - } - - static void dw_mipi_dsi_bridge_atomic_pre_enable(struct drm_bridge *bridge, - struct drm_bridge_state *old_bridge_state) - { - struct dw_mipi_dsi *dsi = bridge_to_dsi(bridge); -+ const struct dw_mipi_dsi_plat_data *pdata = dsi->plat_data; - -- /* Power up the dsi ctl into a command mode */ -- dw_mipi_dsi_mode_set(dsi, &dsi->mode); -- if (dsi->slave) -- dw_mipi_dsi_mode_set(dsi->slave, &dsi->mode); -+ if (pdata->stream_standby) -+ pdata->stream_standby(pdata->priv_data, 1); -+ -+ dw_mipi_dsi_pre_enable(dsi); -+ -+ if (dsi->panel) -+ drm_panel_prepare(dsi->panel); - } - --static void dw_mipi_dsi_bridge_mode_set(struct drm_bridge *bridge, -- const struct drm_display_mode *mode, -- const struct drm_display_mode *adjusted_mode) -+static void dw_mipi_dsi_enable(struct dw_mipi_dsi *dsi) - { -- struct dw_mipi_dsi *dsi = bridge_to_dsi(bridge); -+ u32 val; - -- /* Store the display mode for later use in pre_enable callback */ -- drm_mode_copy(&dsi->mode, adjusted_mode); -+ val = PHY_TXREQUESTCLKHS; -+ if (dsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS) -+ val |= AUTO_CLKLANE_CTRL; -+ -+ dsi_write(dsi, DSI_LPCLK_CTRL, val); ++static long cec_standby(struct cec_adapter *adap, __u8 __user *parg) ++{ ++ u8 en; ++ int ret; ++ struct dw_hdmi_cec *cec = cec_get_drvdata(adap); + -+ if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) { -+ dw_mipi_dsi_set_mode(dsi, MIPI_DSI_MODE_VIDEO); -+ } else { -+ dsi_write(dsi, DSI_EDPI_CMD_SIZE, dsi->mode.hdisplay); -+ dw_mipi_dsi_set_mode(dsi, 0); ++ mutex_lock(&cec->wake_lock); ++ if (copy_from_user(&en, parg, sizeof(en))) { ++ mutex_unlock(&cec->wake_lock); ++ return -EFAULT; + } + -+ if (dsi->slave) -+ dw_mipi_dsi_enable(dsi->slave); - } - - static void dw_mipi_dsi_bridge_atomic_enable(struct drm_bridge *bridge, - struct drm_bridge_state *old_bridge_state) - { - struct dw_mipi_dsi *dsi = bridge_to_dsi(bridge); -+ const struct dw_mipi_dsi_plat_data *pdata = dsi->plat_data; - -- /* Switch to video mode for panel-bridge enable & panel enable */ -- dw_mipi_dsi_set_mode(dsi, MIPI_DSI_MODE_VIDEO); -- if (dsi->slave) -- dw_mipi_dsi_set_mode(dsi->slave, MIPI_DSI_MODE_VIDEO); -+ dw_mipi_dsi_enable(dsi); ++ cec->standby_en = !en; ++ ret = adap->ops->adap_enable(adap, en); ++ mutex_unlock(&cec->wake_lock); + -+ if (pdata->stream_standby) -+ pdata->stream_standby(pdata->priv_data, 0); ++ return ret; ++} + -+ if (dsi->panel) -+ drm_panel_enable(dsi->panel); ++static long cec_func_en(struct dw_hdmi_cec *cec, int __user *parg) ++{ ++ int en_mask; + -+ DRM_DEV_INFO(dsi->dev, "final DSI-Link bandwidth: %u x %d Mbps\n", -+ dsi->lane_mbps, dsi->slave ? dsi->lanes * 2 : dsi->lanes); - } - - static enum drm_mode_status -@@ -998,9 +1068,12 @@ static int dw_mipi_dsi_bridge_attach(struct drm_bridge *bridge, - /* Set the encoder type as caller does not know it */ - bridge->encoder->encoder_type = DRM_MODE_ENCODER_DSI; - -- /* Attach the panel-bridge to the dsi bridge */ -- return drm_bridge_attach(bridge->encoder, dsi->panel_bridge, bridge, -- flags); -+ /* Attach the next-bridge to the dsi bridge */ -+ if (dsi->next_bridge) -+ return drm_bridge_attach(bridge->encoder, dsi->next_bridge, -+ bridge, flags); ++ if (copy_from_user(&en_mask, parg, sizeof(en_mask))) ++ return -EFAULT; ++ ++ cec->wake_en = (en_mask & CEC_EN) && (en_mask & CEC_WAKE); + + return 0; - } - - static const struct drm_bridge_funcs dw_mipi_dsi_bridge_funcs = { -@@ -1010,6 +1083,7 @@ static const struct drm_bridge_funcs dw_mipi_dsi_bridge_funcs = { - .atomic_pre_enable = dw_mipi_dsi_bridge_atomic_pre_enable, - .atomic_enable = dw_mipi_dsi_bridge_atomic_enable, - .atomic_post_disable = dw_mipi_dsi_bridge_post_atomic_disable, -+ .atomic_disable = dw_mipi_dsi_bridge_atomic_disable, - .mode_set = dw_mipi_dsi_bridge_mode_set, - .mode_valid = dw_mipi_dsi_bridge_mode_valid, - .attach = dw_mipi_dsi_bridge_attach, -@@ -1132,13 +1206,6 @@ __dw_mipi_dsi_probe(struct platform_device *pdev, - dsi->base = plat_data->base; - } - -- dsi->pclk = devm_clk_get(dev, "pclk"); -- if (IS_ERR(dsi->pclk)) { -- ret = PTR_ERR(dsi->pclk); -- dev_err(dev, "Unable to get pclk: %d\n", ret); -- return ERR_PTR(ret); -- } -- - /* - * Note that the reset was not defined in the initial device tree, so - * we have to be prepared for it not being found. -@@ -1152,20 +1219,7 @@ __dw_mipi_dsi_probe(struct platform_device *pdev, - - return ERR_PTR(ret); - } -- -- if (apb_rst) { -- ret = clk_prepare_enable(dsi->pclk); -- if (ret) { -- dev_err(dev, "%s: Failed to enable pclk\n", __func__); -- return ERR_PTR(ret); -- } -- -- reset_control_assert(apb_rst); -- usleep_range(10, 20); -- reset_control_deassert(apb_rst); -- -- clk_disable_unprepare(dsi->pclk); -- } -+ dsi->apb_rst = apb_rst; - - dw_mipi_dsi_debugfs_init(dsi); - pm_runtime_enable(dev); -@@ -1228,12 +1282,97 @@ void dw_mipi_dsi_remove(struct dw_mipi_dsi *dsi) - } - EXPORT_SYMBOL_GPL(dw_mipi_dsi_remove); - -+static int dw_mipi_dsi_connector_get_modes(struct drm_connector *connector) ++} ++ ++static long dw_hdmi_cec_ioctl(struct file *f, unsigned int cmd, unsigned long arg) +{ -+ struct dw_mipi_dsi *dsi = con_to_dsi(connector); ++ struct dw_hdmi_cec *cec; ++ struct miscdevice *misc_dev; ++ void __user *data; + -+ if (dsi->next_bridge && (dsi->next_bridge->ops & DRM_BRIDGE_OP_MODES)) -+ return drm_bridge_get_modes(dsi->next_bridge, connector); ++ if (!f) ++ return -EFAULT; + -+ if (dsi->panel) -+ return drm_panel_get_modes(dsi->panel, connector); ++ misc_dev = f->private_data; ++ cec = container_of(misc_dev, struct dw_hdmi_cec, misc_dev); ++ data = (void __user *)arg; + -+ return -EINVAL; -+} ++ switch (cmd) { ++ case CEC_STANDBY: ++ return cec_standby(cec->adap, data); ++ case CEC_FUNC_EN: ++ return cec_func_en(cec, data); ++ default: ++ return -EINVAL; ++ } + -+static struct drm_connector_helper_funcs dw_mipi_dsi_connector_helper_funcs = { -+ .get_modes = dw_mipi_dsi_connector_get_modes, -+}; ++ return -ENOTTY; ++} + -+static enum drm_connector_status -+dw_mipi_dsi_connector_detect(struct drm_connector *connector, bool force) ++static int dw_hdmi_cec_open(struct inode *inode, struct file *f) +{ -+ struct dw_mipi_dsi *dsi = con_to_dsi(connector); -+ -+ if (dsi->next_bridge && (dsi->next_bridge->ops & DRM_BRIDGE_OP_DETECT)) -+ return drm_bridge_detect(dsi->next_bridge); -+ -+ return connector_status_connected; ++ return 0; +} + -+static void dw_mipi_dsi_drm_connector_destroy(struct drm_connector *connector) ++static int dw_hdmi_cec_release(struct inode *inode, struct file *f) +{ -+ drm_connector_unregister(connector); -+ drm_connector_cleanup(connector); ++ return 0; +} + -+static const struct drm_connector_funcs dw_mipi_dsi_atomic_connector_funcs = { -+ .fill_modes = drm_helper_probe_single_connector_modes, -+ .detect = dw_mipi_dsi_connector_detect, -+ .destroy = dw_mipi_dsi_drm_connector_destroy, -+ .reset = drm_atomic_helper_connector_reset, -+ .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, -+ .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, ++static const struct file_operations dw_hdmi_cec_file_operations = { ++ .compat_ioctl = dw_hdmi_cec_ioctl, ++ .unlocked_ioctl = dw_hdmi_cec_ioctl, ++ .open = dw_hdmi_cec_open, ++ .release = dw_hdmi_cec_release, ++ .owner = THIS_MODULE, +}; + -+static int dw_mipi_dsi_connector_init(struct dw_mipi_dsi *dsi) ++static void dw_hdmi_cec_hpd_wake_up(struct platform_device *pdev) +{ -+ struct drm_encoder *encoder = dsi->encoder; -+ struct drm_connector *connector = &dsi->connector; -+ struct drm_device *drm_dev = dsi->bridge.dev; -+ struct device *dev = dsi->dev; -+ int ret; ++ struct dw_hdmi_cec *cec = platform_get_drvdata(pdev); + -+ ret = drm_connector_init(drm_dev, connector, -+ &dw_mipi_dsi_atomic_connector_funcs, -+ DRM_MODE_CONNECTOR_DSI); -+ if (ret) { -+ DRM_DEV_ERROR(dev, "Failed to initialize connector\n"); -+ return ret; -+ } ++ mutex_lock(&cec->wake_lock); + -+ drm_connector_helper_add(connector, -+ &dw_mipi_dsi_connector_helper_funcs); -+ ret = drm_connector_attach_encoder(connector, encoder); -+ if (ret < 0) { -+ DRM_DEV_ERROR(dev, "Failed to attach encoder: %d\n", ret); -+ goto connector_cleanup; ++ if (!cec->standby_en) { ++ mutex_unlock(&cec->wake_lock); ++ return; + } ++ cec->standby_en = false; + -+ return 0; -+ -+connector_cleanup: -+ connector->funcs->destroy(connector); ++ dw_hdmi_write(cec, 0x02, HDMI_IH_MUTE); + -+ return ret; ++ input_event(cec->devinput, EV_KEY, KEY_POWER, 1); ++ input_sync(cec->devinput); ++ input_event(cec->devinput, EV_KEY, KEY_POWER, 0); ++ input_sync(cec->devinput); ++ mutex_unlock(&cec->wake_lock); +} + - /* - * Bind/unbind API, used from platforms based on the component framework. - */ - int dw_mipi_dsi_bind(struct dw_mipi_dsi *dsi, struct drm_encoder *encoder) ++static const struct dw_hdmi_cec_wake_ops cec_ops = { ++ .hpd_wake_up = dw_hdmi_cec_hpd_wake_up, ++}; ++ + static int dw_hdmi_cec_probe(struct platform_device *pdev) { -- return drm_bridge_attach(encoder, &dsi->bridge, NULL, 0); -+ int ret; + struct dw_hdmi_cec_data *data = dev_get_platdata(&pdev->dev); +@@ -248,10 +438,14 @@ static int dw_hdmi_cec_probe(struct platform_device *pdev) + if (!cec) + return -ENOMEM; + ++ cec->dev = &pdev->dev; + cec->irq = data->irq; ++ cec->wake_irq = data->wake_irq; + cec->ops = data->ops; + cec->hdmi = data->hdmi; + ++ mutex_init(&cec->wake_lock); + -+ dsi->encoder = encoder; + platform_set_drvdata(pdev, cec); + + dw_hdmi_write(cec, 0, HDMI_CEC_TX_CNT); +@@ -266,20 +460,40 @@ static int dw_hdmi_cec_probe(struct platform_device *pdev) + if (IS_ERR(cec->adap)) + return PTR_ERR(cec->adap); + ++ dw_hdmi_set_cec_adap(cec->hdmi, cec->adap); + -+ ret = drm_bridge_attach(encoder, &dsi->bridge, NULL, 0); + /* override the module pointer */ + cec->adap->owner = THIS_MODULE; + +- ret = devm_add_action_or_reset(&pdev->dev, dw_hdmi_cec_del, cec); +- if (ret) ++ ret = devm_add_action(&pdev->dev, dw_hdmi_cec_del, cec); + if (ret) { -+ DRM_ERROR("Failed to initialize bridge with drm\n"); -+ return ret; ++ cec_delete_adapter(cec->adap); + return ret; + } -+ -+ return ret; - } - EXPORT_SYMBOL_GPL(dw_mipi_dsi_bind); -@@ -1242,6 +1381,38 @@ void dw_mipi_dsi_unbind(struct dw_mipi_dsi *dsi) - } - EXPORT_SYMBOL_GPL(dw_mipi_dsi_unbind); + ret = devm_request_threaded_irq(&pdev->dev, cec->irq, + dw_hdmi_cec_hardirq, +- dw_hdmi_cec_thread, IRQF_SHARED, ++ dw_hdmi_cec_thread, IRQF_SHARED | IRQF_ONESHOT, + "dw-hdmi-cec", cec->adap); + if (ret < 0) + return ret; -+struct drm_connector *dw_mipi_dsi_get_connector(struct dw_mipi_dsi *dsi) -+{ -+ struct drm_connector *connector = NULL; -+ enum drm_bridge_attach_flags flags = 0; -+ int ret; -+ -+ if (dsi->next_bridge) { -+ enum drm_bridge_attach_flags flags; -+ struct list_head *connector_list = -+ &dsi->next_bridge->dev->mode_config.connector_list; -+ -+ flags = dsi->next_bridge->ops & DRM_BRIDGE_OP_MODES ? -+ DRM_BRIDGE_ATTACH_NO_CONNECTOR : 0; -+ if (!(flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR)) -+ list_for_each_entry(connector, connector_list, head) -+ if (drm_connector_has_possible_encoder(connector, -+ dsi->encoder)) -+ break; -+ } -+ -+ if (dsi->panel || (dsi->next_bridge && (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR))) { -+ ret = dw_mipi_dsi_connector_init(dsi); -+ if (ret) -+ return ERR_PTR(ret); -+ -+ connector = &dsi->connector; ++ if (cec->wake_irq > 0) { ++ ret = devm_request_threaded_irq(&pdev->dev, cec->wake_irq, ++ dw_hdmi_cec_wake_irq, ++ dw_hdmi_cec_wake_thread, ++ IRQF_TRIGGER_HIGH | IRQF_ONESHOT, ++ "cec-wakeup", cec->adap); ++ if (ret) { ++ dev_err(&pdev->dev, ++ "hdmi_cec request_irq failed (%d).\n", ++ ret); ++ return ret; ++ } ++ device_init_wakeup(&pdev->dev, 1); ++ enable_irq_wake(cec->wake_irq); + } + -+ return connector; -+} -+EXPORT_SYMBOL_GPL(dw_mipi_dsi_get_connector); -+ - MODULE_AUTHOR("Chris Zhong "); - MODULE_AUTHOR("Philippe Cornu "); - MODULE_DESCRIPTION("DW MIPI DSI host controller driver"); -diff --git a/drivers/gpu/drm/display/Kconfig b/drivers/gpu/drm/display/Kconfig -index 09712b88a..6f10a437b 100644 ---- a/drivers/gpu/drm/display/Kconfig -+++ b/drivers/gpu/drm/display/Kconfig -@@ -1,5 +1,14 @@ - # SPDX-License-Identifier: MIT + cec->notify = cec_notifier_cec_adap_register(pdev->dev.parent, + NULL, cec->adap); + if (!cec->notify) +@@ -297,55 +511,38 @@ static int dw_hdmi_cec_probe(struct platform_device *pdev) + */ + devm_remove_action(&pdev->dev, dw_hdmi_cec_del, cec); -+config DRM_DP -+ bool "DRM DisplayPort support" -+ depends on DRM -+ depends on DRM_KMS_HELPER -+ default y if DRM_ANALOGIX_DP -+ default y if !ROCKCHIP_MINI_KERNEL -+ help -+ Choose this option to support DP interface. -+ - config DRM_DP_AUX_BUS - tristate - depends on DRM -diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c -index 77590b0f3..0dc2a587c 100644 ---- a/drivers/gpu/drm/drm_ioctl.c -+++ b/drivers/gpu/drm/drm_ioctl.c -@@ -245,7 +245,8 @@ static int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_ - req->value = 1; - return 0; - case DRM_CAP_PRIME: -- req->value = DRM_PRIME_CAP_IMPORT | DRM_PRIME_CAP_EXPORT; -+ req->value |= dev->driver->prime_fd_to_handle ? DRM_PRIME_CAP_IMPORT : 0; -+ req->value |= dev->driver->prime_handle_to_fd ? DRM_PRIME_CAP_EXPORT : 0; - return 0; - case DRM_CAP_SYNCOBJ: - req->value = drm_core_check_feature(dev, DRIVER_SYNCOBJ); -@@ -529,6 +530,7 @@ int drm_version(struct drm_device *dev, void *data, +- return 0; +-} +- +-static void dw_hdmi_cec_remove(struct platform_device *pdev) +-{ +- struct dw_hdmi_cec *cec = platform_get_drvdata(pdev); +- +- cec_notifier_cec_adap_unregister(cec->notify, cec->adap); +- cec_unregister_adapter(cec->adap); +-} ++ rockchip_hdmi_cec_input_init(cec); - static int drm_ioctl_permit(u32 flags, struct drm_file *file_priv) +-static int __maybe_unused dw_hdmi_cec_resume(struct device *dev) +-{ +- struct dw_hdmi_cec *cec = dev_get_drvdata(dev); ++ cec->misc_dev.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "rk_cec"); ++ if (!cec->misc_dev.name) ++ return -ENOMEM; ++ cec->misc_dev.minor = MISC_DYNAMIC_MINOR; ++ cec->misc_dev.fops = &dw_hdmi_cec_file_operations; ++ cec->misc_dev.mode = 0666; + +- /* Restore logical address */ +- dw_hdmi_write(cec, cec->addresses & 255, HDMI_CEC_ADDR_L); +- dw_hdmi_write(cec, cec->addresses >> 8, HDMI_CEC_ADDR_H); ++ ret = misc_register(&cec->misc_dev); + +- /* Restore interrupt status/mask registers */ +- dw_hdmi_write(cec, cec->regs_polarity, HDMI_CEC_POLARITY); +- dw_hdmi_write(cec, cec->regs_mask, HDMI_CEC_MASK); +- dw_hdmi_write(cec, cec->regs_mute_stat0, HDMI_IH_MUTE_CEC_STAT0); ++ dw_hdmi_cec_wake_ops_register(cec->hdmi, &cec_ops); + +- return 0; ++ return ret; + } + +-static int __maybe_unused dw_hdmi_cec_suspend(struct device *dev) ++static int dw_hdmi_cec_remove(struct platform_device *pdev) { -+#ifndef CONFIG_DRM_IGNORE_IOTCL_PERMIT - /* ROOT_ONLY is only for CAP_SYS_ADMIN */ - if (unlikely((flags & DRM_ROOT_ONLY) && !capable(CAP_SYS_ADMIN))) - return -EACCES; -@@ -547,6 +549,7 @@ static int drm_ioctl_permit(u32 flags, struct drm_file *file_priv) - if (unlikely(!(flags & DRM_RENDER_ALLOW) && - drm_is_render_client(file_priv))) - return -EACCES; -+#endif +- struct dw_hdmi_cec *cec = dev_get_drvdata(dev); ++ struct dw_hdmi_cec *cec = platform_get_drvdata(pdev); + +- /* store interrupt status/mask registers */ +- cec->regs_polarity = dw_hdmi_read(cec, HDMI_CEC_POLARITY); +- cec->regs_mask = dw_hdmi_read(cec, HDMI_CEC_MASK); +- cec->regs_mute_stat0 = dw_hdmi_read(cec, HDMI_IH_MUTE_CEC_STAT0); ++ cec_notifier_cec_adap_unregister(cec->notify, cec->adap); ++ cec_unregister_adapter(cec->adap); ++ misc_deregister(&cec->misc_dev); return 0; } -@@ -677,9 +680,9 @@ static const struct drm_ioctl_desc drm_ioctls[] = { - DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb_ioctl, 0), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER), -- DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, 0), -- DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, 0), -- DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, 0), -+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, DRM_RENDER_ALLOW), -+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, DRM_RENDER_ALLOW), -+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, 0), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER), -diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile -index 433e93d57..001ec7292 100644 ---- a/drivers/gpu/drm/panel/Makefile -+++ b/drivers/gpu/drm/panel/Makefile -@@ -8,7 +8,7 @@ obj-$(CONFIG_DRM_PANEL_BOE_HIMAX8279D) += panel-boe-himax8279d.o - obj-$(CONFIG_DRM_PANEL_BOE_TV101WUM_NL6) += panel-boe-tv101wum-nl6.o - obj-$(CONFIG_DRM_PANEL_DSI_CM) += panel-dsi-cm.o - obj-$(CONFIG_DRM_PANEL_LVDS) += panel-lvds.o --obj-$(CONFIG_DRM_PANEL_SIMPLE) += panel-simple.o -+obj-$(CONFIG_DRM_PANEL_SIMPLE) += panel-simple-rk.o - obj-$(CONFIG_DRM_PANEL_EDP) += panel-edp.o - obj-$(CONFIG_DRM_PANEL_EBBG_FT8719) += panel-ebbg-ft8719.o - obj-$(CONFIG_DRM_PANEL_ELIDA_KD35T133) += panel-elida-kd35t133.o -diff --git a/drivers/gpu/drm/panel/panel-simple-rk.c b/drivers/gpu/drm/panel/panel-simple-rk.c + +-static const struct dev_pm_ops dw_hdmi_cec_pm = { +- SET_SYSTEM_SLEEP_PM_OPS(dw_hdmi_cec_suspend, dw_hdmi_cec_resume) +-}; +- + static struct platform_driver dw_hdmi_cec_driver = { + .probe = dw_hdmi_cec_probe, +- .remove_new = dw_hdmi_cec_remove, ++ .remove = dw_hdmi_cec_remove, + .driver = { + .name = "dw-hdmi-cec", +- .pm = &dw_hdmi_cec_pm, + }, + }; + module_platform_driver(dw_hdmi_cec_driver); +diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.h +index cf4dc121a..ec10660a7 100644 +--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.h ++++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.h +@@ -3,17 +3,24 @@ + + struct dw_hdmi; + ++#define CEC_EN BIT(0) ++#define CEC_WAKE BIT(1) ++ + struct dw_hdmi_cec_ops { + void (*write)(struct dw_hdmi *hdmi, u8 val, int offset); + u8 (*read)(struct dw_hdmi *hdmi, int offset); + void (*enable)(struct dw_hdmi *hdmi); + void (*disable)(struct dw_hdmi *hdmi); ++ void (*mod)(struct dw_hdmi *hdmi, u8 data, u8 mask, unsigned int reg); + }; + + struct dw_hdmi_cec_data { + struct dw_hdmi *hdmi; + const struct dw_hdmi_cec_ops *ops; + int irq; ++ int wake_irq; + }; + ++void dw_hdmi_hpd_wake_up(struct platform_device *pdev); ++ + #endif +diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-hdcp.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-hdcp.c new file mode 100644 -index 000000000..06d5c91db +index 000000000..5906b7cac --- /dev/null -+++ b/drivers/gpu/drm/panel/panel-simple-rk.c -@@ -0,0 +1,5567 @@ ++++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-hdcp.c +@@ -0,0 +1,748 @@ +/* -+ * Copyright (C) 2013, NVIDIA Corporation. All rights reserved. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sub license, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: ++ * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd ++ * Author Huicong Xu + * -+ * The above copyright notice and this permission notice (including the -+ * next paragraph) shall be included in all copies or substantial portions -+ * of the Software. ++ * This software is licensed under the terms of the GNU General Public ++ * License version 2, as published by the Free Software Foundation, and ++ * may be copied, distributed, and modified under those terms. + * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -+ * DEALINGS IN THE SOFTWARE. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + */ + -+#include ++#include +#include -+#include -+#include -+#include ++#include ++#include ++#include ++#include ++#include ++#include +#include -+#include -+#include -+#include -+#include -+#include ++#include ++#include ++#include ++#include ++#include + -+#include